Update Linux to v5.4.2 Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd

commit: 0f672f6c0b52b7b0700b0915c72b540721af4465 [log] [tgz]
author: David Brazdil <dbrazdil@google.com> Tue Dec 10 10:32:29 2019 +0000
committer: David Brazdil <dbrazdil@google.com> Tue Dec 10 19:03:18 2019 +0000
tree: 85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent: 3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index b223769..3da9799 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c

@@ -1266,12 +1266,14 @@
 					pr_cont("Forcing 3c5x9b full-duplex mode");
 					break;
 				}
+				/* fall through */
 			case 8:
 				/* set full-duplex mode based on eeprom config setting */
 				if ((sw_info & 0x000f) && (sw_info & 0x8000)) {
 					pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)");
 					break;
 				}
+				/* fall through */
 			default:
 				/* xcvr=(0 || 4) OR user has an old 3c5x9 non "B" model */
 				pr_cont("Setting 3c5x9/3c5x9B half-duplex mode");

diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index b648e3f..b157522 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c

@@ -1177,7 +1177,7 @@
 				if (inl(ioaddr + DownListPtr) == isa_virt_to_bus(&lp->tx_ring[entry]))
 					break;	/* It still hasn't been processed. */
 				if (lp->tx_skbuff[entry]) {
-					dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+					dev_consume_skb_irq(lp->tx_skbuff[entry]);
 					lp->tx_skbuff[entry] = NULL;
 				}
 				dirty_tx++;
@@ -1192,7 +1192,7 @@
 #ifdef VORTEX_BUS_MASTER
 		if (status & DMADone) {
 			outw(0x1000, ioaddr + Wn7_MasterStatus);	/* Ack the event. */
-			dev_kfree_skb_irq(lp->tx_skb);	/* Release the transferred buffer */
+			dev_consume_skb_irq(lp->tx_skb);	/* Release the transferred buffer */
 			netif_wake_queue(dev);
 		}
 #endif
@@ -1521,7 +1521,7 @@
 static void set_rx_mode(struct net_device *dev)
 {
 	int ioaddr = dev->base_addr;
-	short new_mode;
+	unsigned short new_mode;
 
 	if (dev->flags & IFF_PROMISC) {
 		if (corkscrew_debug > 3)

diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 5bc1683..8785c2f 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c

@@ -847,8 +847,7 @@
 
 static int vortex_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *ndev = pci_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (!ndev || !netif_running(ndev))
 		return 0;
@@ -861,8 +860,7 @@
 
 static int vortex_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *ndev = pci_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	int err;
 
 	if (!ndev || !netif_running(ndev))
@@ -1151,7 +1149,7 @@
 
 	print_info = (vortex_debug > 1);
 	if (print_info)
-		pr_info("See Documentation/networking/vortex.txt\n");
+		pr_info("See Documentation/networking/device_drivers/3com/vortex.txt\n");
 
 	pr_info("%s: 3Com %s %s at %p.\n",
 	       print_name,
@@ -1956,7 +1954,7 @@
 				   dev->name, tx_status);
 			if (tx_status == 0x82) {
 				pr_err("Probably a duplex mismatch.  See "
-						"Documentation/networking/vortex.txt\n");
+						"Documentation/networking/device_drivers/3com/vortex.txt\n");
 			}
 			dump_tx_ring(dev);
 		}
@@ -2175,7 +2173,7 @@
 
 			dma_addr = skb_frag_dma_map(vp->gendev, frag,
 						    0,
-						    frag->size,
+						    skb_frag_size(frag),
 						    DMA_TO_DEVICE);
 			if (dma_mapping_error(vp->gendev, dma_addr)) {
 				for(i = i-1; i >= 0; i--)
@@ -2307,7 +2305,7 @@
 				dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
 				pkts_compl++;
 				bytes_compl += vp->tx_skb->len;
-				dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+				dev_consume_skb_irq(vp->tx_skb); /* Release the transferred buffer */
 				if (ioread16(ioaddr + TxFree) > 1536) {
 					/*
 					 * AKPM: FIXME: I don't think we need this.  If the queue was stopped due to
@@ -2449,7 +2447,7 @@
 #endif
 					pkts_compl++;
 					bytes_compl += skb->len;
-					dev_kfree_skb_irq(skb);
+					dev_consume_skb_irq(skb);
 					vp->tx_skbuff[entry] = NULL;
 				} else {
 					pr_debug("boomerang_interrupt: no skb!\n");

diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 5c3ef9f..3a6fc99 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # 3Com Ethernet device configuration
 #
@@ -75,8 +76,9 @@
 	  "Hurricane" (3c555/3cSOHO)                           PCI
 
 	  If you have such a card, say Y here.  More specific information is in
-	  <file:Documentation/networking/vortex.txt> and in the comments at
-	  the beginning of <file:drivers/net/ethernet/3com/3c59x.c>.
+	  <file:Documentation/networking/device_drivers/3com/vortex.txt> and
+	  in the comments at the beginning of
+	  <file:drivers/net/ethernet/3com/3c59x.c>.
 
 	  To compile this support as a module, choose M here.
 

diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index a43544a..78f3e53 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 8390 core for usual drivers */
 
 static const char version[] =

diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index 46d2257..6cf3699 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 8390 core for ISA devices needing bus delays */
 
 static const char version[] =

diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index f2f0264..a947857 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # 8390 device configuration
 #
@@ -11,8 +12,8 @@
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
-	  the questions about Western Digital cards. If you say Y, you will be
-	  asked for your specific card in the following questions.
+	  the questions about National Semiconductor 8390 cards. If you say Y,
+	  you will be asked for your specific card in the following questions.
 
 if NET_VENDOR_8390
 
@@ -49,7 +50,7 @@
 	tristate "Amiga XSurf 100 AX88796/NE2000 clone support"
 	depends on ZORRO
 	select AX88796
-	select ASIX_PHY
+	select AX88796B_PHY
 	help
 	  This driver is for the Individual Computers X-Surf 100 Ethernet
 	  card (based on the Asix AX88796 chip). If you have such a card,

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 2a0ddec..172947f 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* drivers/net/ethernet/8390/ax88796.c
  *
  * Copyright 2005,2007 Simtec Electronics
@@ -5,10 +6,6 @@
  *
  * Asix AX88796 10/100 Ethernet controller support
  *	Based on ne.c, by Donald Becker, et-al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -377,9 +374,7 @@
 		return ret;
 	}
 
-	/* mask with MAC supported features */
-	phy_dev->supported &= PHY_BASIC_FEATURES;
-	phy_dev->advertising = phy_dev->supported;
+	phy_set_max_speed(phy_dev, SPEED_100);
 
 	netdev_info(dev, "PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
 		    phy_dev->drv->name, phydev_name(phy_dev), phy_dev->irq);

diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index 32e9627..bd22a53 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/drivers/acorn/net/etherh.c
  *
  *  Copyright (C) 2000-2002 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * NS8390 I-cubed EtherH and ANT EtherM specific driver
  * Thanks to I-Cubed for information on their cards.
  * EtherM conversion (C) 1999 Chris Kemp and Tim Watterton
@@ -564,26 +561,29 @@
 		sizeof(info->bus_info));
 }
 
-static int etherh_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int etherh_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd)
 {
-	cmd->supported	= etherh_priv(dev)->supported;
-	ethtool_cmd_speed_set(cmd, SPEED_10);
-	cmd->duplex	= DUPLEX_HALF;
-	cmd->port	= dev->if_port == IF_PORT_10BASET ? PORT_TP : PORT_BNC;
-	cmd->autoneg	= (dev->flags & IFF_AUTOMEDIA ?
-			   AUTONEG_ENABLE : AUTONEG_DISABLE);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						etherh_priv(dev)->supported);
+	cmd->base.speed = SPEED_10;
+	cmd->base.duplex = DUPLEX_HALF;
+	cmd->base.port = dev->if_port == IF_PORT_10BASET ? PORT_TP : PORT_BNC;
+	cmd->base.autoneg = (dev->flags & IFF_AUTOMEDIA ? AUTONEG_ENABLE :
+							  AUTONEG_DISABLE);
 	return 0;
 }
 
-static int etherh_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int etherh_set_link_ksettings(struct net_device *dev,
+				     const struct ethtool_link_ksettings *cmd)
 {
-	switch (cmd->autoneg) {
+	switch (cmd->base.autoneg) {
 	case AUTONEG_ENABLE:
 		dev->flags |= IFF_AUTOMEDIA;
 		break;
 
 	case AUTONEG_DISABLE:
-		switch (cmd->port) {
+		switch (cmd->base.port) {
 		case PORT_TP:
 			dev->if_port = IF_PORT_10BASET;
 			break;
@@ -622,12 +622,12 @@
 }
 
 static const struct ethtool_ops etherh_ethtool_ops = {
-	.get_settings	= etherh_get_settings,
-	.set_settings	= etherh_set_settings,
-	.get_drvinfo	= etherh_get_drvinfo,
-	.get_ts_info	= ethtool_op_get_ts_info,
-	.get_msglevel	= etherh_get_msglevel,
-	.set_msglevel	= etherh_set_msglevel,
+	.get_drvinfo		= etherh_get_drvinfo,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_msglevel		= etherh_get_msglevel,
+	.set_msglevel		= etherh_set_msglevel,
+	.get_link_ksettings	= etherh_get_link_ksettings,
+	.set_link_ksettings	= etherh_set_link_ksettings,
 };
 
 static const struct net_device_ops etherh_netdev_ops = {

diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 342ae08..d60a86a 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c

@@ -153,8 +153,6 @@
 static void dayna_block_output(struct net_device *dev, int count,
 			       const unsigned char *buf, int start_page);
 
-#define memcmp_withio(a, b, c)	memcmp((a), (void *)(b), (c))
-
 /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */
 static void slow_sane_get_8390_hdr(struct net_device *dev,
 				   struct e8390_pkt_hdr *hdr, int ring_page);
@@ -233,19 +231,26 @@
 
 static enum mac8390_access mac8390_testio(unsigned long membase)
 {
-	unsigned long outdata = 0xA5A0B5B0;
-	unsigned long indata =  0x00000000;
+	u32 outdata = 0xA5A0B5B0;
+	u32 indata = 0;
+
 	/* Try writing 32 bits */
-	memcpy_toio((void __iomem *)membase, &outdata, 4);
-	/* Now compare them */
-	if (memcmp_withio(&outdata, membase, 4) == 0)
+	nubus_writel(outdata, membase);
+	/* Now read it back */
+	indata = nubus_readl(membase);
+	if (outdata == indata)
 		return ACCESS_32;
+
+	outdata = 0xC5C0D5D0;
+	indata = 0;
+
 	/* Write 16 bit output */
 	word_memcpy_tocard(membase, &outdata, 4);
 	/* Now read it back */
 	word_memcpy_fromcard(&indata, membase, 4);
 	if (outdata == indata)
 		return ACCESS_16;
+
 	return ACCESS_UNKNOWN;
 }
 

diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 61e4380..645efac 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c

@@ -289,6 +289,11 @@
 
     virt = ioremap(link->resource[2]->start,
 	    resource_size(link->resource[2]));
+    if (unlikely(!virt)) {
+	    pcmcia_release_window(link, link->resource[2]);
+	    return NULL;
+    }
+
     for (i = 0; i < NR_INFO; i++) {
 	pcmcia_map_mem_page(link, link->resource[2],
 		hw_info[i].offset & ~(resource_size(link->resource[2])-1));
@@ -1423,6 +1428,11 @@
     /* Try scribbling on the buffer */
     info->base = ioremap(link->resource[3]->start,
 			resource_size(link->resource[3]));
+    if (unlikely(!info->base)) {
+	    ret = -ENOMEM;
+	    goto failed;
+    }
+
     for (i = 0; i < (TX_PAGES<<8); i += 2)
 	__raw_writew((i>>1), info->base+offset+i);
     udelay(100);

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 6fde68a..e8e9c16 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Ethernet LAN device configuration
 #
@@ -75,6 +76,7 @@
 source "drivers/net/ethernet/faraday/Kconfig"
 source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
+source "drivers/net/ethernet/google/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
 source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/huawei/Kconfig"
@@ -108,6 +110,13 @@
 	---help---
 	  Support for the MII0 inside the Lantiq SoC
 
+config LANTIQ_XRX200
+	tristate "Lantiq / Intel xRX200 PMAC network driver"
+	depends on SOC_TYPE_XWAY
+	---help---
+	  Support for the PMAC of the Gigabit switch (GSWIP) inside the
+	  Lantiq / Intel VRX200 VDSL SoC
+
 source "drivers/net/ethernet/marvell/Kconfig"
 source "drivers/net/ethernet/mediatek/Kconfig"
 source "drivers/net/ethernet/mellanox/Kconfig"
@@ -131,18 +140,6 @@
 source "drivers/net/ethernet/netronome/Kconfig"
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
-
-config NET_NETX
-	tristate "NetX Ethernet support"
-	select MII
-	depends on ARCH_NETX
-	---help---
-	  This is support for the Hilscher netX builtin Ethernet ports
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called netx-eth.
-
-source "drivers/net/ethernet/nuvoton/Kconfig"
 source "drivers/net/ethernet/nvidia/Kconfig"
 source "drivers/net/ethernet/nxp/Kconfig"
 source "drivers/net/ethernet/oki-semi/Kconfig"
@@ -159,6 +156,7 @@
 
 source "drivers/net/ethernet/packetengines/Kconfig"
 source "drivers/net/ethernet/pasemi/Kconfig"
+source "drivers/net/ethernet/pensando/Kconfig"
 source "drivers/net/ethernet/qlogic/Kconfig"
 source "drivers/net/ethernet/qualcomm/Kconfig"
 source "drivers/net/ethernet/rdc/Kconfig"

diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index b45d5f6..05abebc 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile

@@ -39,6 +39,7 @@
 obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/
 obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
+obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/
 obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
 obj-$(CONFIG_NET_VENDOR_HP) += hp/
 obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
@@ -49,6 +50,7 @@
 obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_KORINA) += korina.o
 obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
+obj-$(CONFIG_LANTIQ_XRX200) += lantiq_xrx200.o
 obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/
 obj-$(CONFIG_NET_VENDOR_MEDIATEK) += mediatek/
 obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
@@ -62,8 +64,6 @@
 obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
 obj-$(CONFIG_NET_VENDOR_NI) += ni/
-obj-$(CONFIG_NET_NETX) += netx-eth.o
-obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/
 obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
 obj-$(CONFIG_LPC_ENET) += nxp/
 obj-$(CONFIG_NET_VENDOR_OKI) += oki-semi/
@@ -95,3 +95,4 @@
 obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
 obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
 obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/
+obj-$(CONFIG_NET_VENDOR_PENSANDO) += pensando/

diff --git a/drivers/net/ethernet/adaptec/Kconfig b/drivers/net/ethernet/adaptec/Kconfig
index 822cffb..86e02da 100644
--- a/drivers/net/ethernet/adaptec/Kconfig
+++ b/drivers/net/ethernet/adaptec/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Adaptec network device configuration
 #

diff --git a/drivers/net/ethernet/adaptec/Makefile b/drivers/net/ethernet/adaptec/Makefile
index 6c07b75..d84138c 100644
--- a/drivers/net/ethernet/adaptec/Makefile
+++ b/drivers/net/ethernet/adaptec/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Adaptec network device drivers.
 #

diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 097467f..816540e 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c

@@ -1390,7 +1390,7 @@
 					}
 				}
 
-				dev_kfree_skb_irq(skb);
+				dev_consume_skb_irq(skb);
 			}
 			np->tx_done_q[np->tx_done].status = 0;
 			np->tx_done = (np->tx_done + 1) % DONE_Q_SIZE;

diff --git a/drivers/net/ethernet/aeroflex/Kconfig b/drivers/net/ethernet/aeroflex/Kconfig
index 4f4a8d7..2fa0a31 100644
--- a/drivers/net/ethernet/aeroflex/Kconfig
+++ b/drivers/net/ethernet/aeroflex/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Aeroflex Gaisler network device configuration
 #

diff --git a/drivers/net/ethernet/aeroflex/Makefile b/drivers/net/ethernet/aeroflex/Makefile
index 6e62a67..1b18ef0 100644
--- a/drivers/net/ethernet/aeroflex/Makefile
+++ b/drivers/net/ethernet/aeroflex/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Aeroflex Gaisler network device drivers.
 #

diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 4309be3..2a9f864 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Aeroflex Gaisler GRETH 10/100/1G Ethernet MAC.
  *
@@ -12,11 +13,6 @@
  * The Gigabit version supports scatter/gather DMA, any alignment of
  * buffers and checksum offloading.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
  * Contributors: Kristoffer Glembo
  *               Daniel Hellstrom
  *               Marko Isomaki
@@ -114,7 +110,7 @@
 
 		print_hex_dump(KERN_DEBUG, "TX: ", DUMP_PREFIX_OFFSET, 16, 1,
 			       skb_frag_address(&skb_shinfo(skb)->frags[i]),
-			       skb_shinfo(skb)->frags[i].size, true);
+			       skb_frag_size(&skb_shinfo(skb)->frags[i]), true);
 	}
 }
 
@@ -613,7 +609,6 @@
 		napi_schedule(&greth->napi);
 	}
 
-	mmiowb();
 	spin_unlock(&greth->devlock);
 
 	return retval;
@@ -1279,11 +1274,11 @@
 	}
 
 	if (greth->gbit_mac)
-		phy->supported &= PHY_GBIT_FEATURES;
+		phy_set_max_speed(phy, SPEED_1000);
 	else
-		phy->supported &= PHY_BASIC_FEATURES;
+		phy_set_max_speed(phy, SPEED_100);
 
-	phy->advertising = phy->supported;
+	linkmode_copy(phy->advertising, phy->supported);
 
 	greth->link = 0;
 	greth->speed = 0;
@@ -1433,18 +1428,18 @@
 	}
 
 	/* Allocate TX descriptor ring in coherent memory */
-	greth->tx_bd_base = dma_zalloc_coherent(greth->dev, 1024,
-						&greth->tx_bd_base_phys,
-						GFP_KERNEL);
+	greth->tx_bd_base = dma_alloc_coherent(greth->dev, 1024,
+					       &greth->tx_bd_base_phys,
+					       GFP_KERNEL);
 	if (!greth->tx_bd_base) {
 		err = -ENOMEM;
 		goto error3;
 	}
 
 	/* Allocate RX descriptor ring in coherent memory */
-	greth->rx_bd_base = dma_zalloc_coherent(greth->dev, 1024,
-						&greth->rx_bd_base_phys,
-						GFP_KERNEL);
+	greth->rx_bd_base = dma_alloc_coherent(greth->dev, 1024,
+					       &greth->rx_bd_base_phys,
+					       GFP_KERNEL);
 	if (!greth->rx_bd_base) {
 		err = -ENOMEM;
 		goto error4;
@@ -1459,7 +1454,7 @@
 		const u8 *addr;
 
 		addr = of_get_mac_address(ofdev->dev.of_node);
-		if (addr) {
+		if (!IS_ERR(addr)) {
 			for (i = 0; i < 6; i++)
 				macaddr[i] = (unsigned int) addr[i];
 		} else {

diff --git a/drivers/net/ethernet/agere/Kconfig b/drivers/net/ethernet/agere/Kconfig
index b6fe920..084c719 100644
--- a/drivers/net/ethernet/agere/Kconfig
+++ b/drivers/net/ethernet/agere/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Agere device configuration
 #

diff --git a/drivers/net/ethernet/agere/Makefile b/drivers/net/ethernet/agere/Makefile
index 027ff94..8dbdf66 100644
--- a/drivers/net/ethernet/agere/Makefile
+++ b/drivers/net/ethernet/agere/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Agere ET-131x ethernet driver
 #

diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 48220b6..174344c 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c

@@ -2362,7 +2362,7 @@
 
 	/* Allocate memory for the TCB's (Transmit Control Block) */
 	tx_ring->tcb_ring = kcalloc(NUM_TCB, sizeof(struct tcb),
-				    GFP_ATOMIC | GFP_DMA);
+				    GFP_KERNEL | GFP_DMA);
 	if (!tx_ring->tcb_ring)
 		return -ENOMEM;
 
@@ -2426,7 +2426,7 @@
 	u32 thiscopy, remainder;
 	struct sk_buff *skb = tcb->skb;
 	u32 nr_frags = skb_shinfo(skb)->nr_frags + 1;
-	struct skb_frag_struct *frags = &skb_shinfo(skb)->frags[0];
+	skb_frag_t *frags = &skb_shinfo(skb)->frags[0];
 	struct phy_device *phydev = adapter->netdev->phydev;
 	dma_addr_t dma_addr;
 	struct tx_ring *tx_ring = &adapter->tx_ring;
@@ -2488,11 +2488,11 @@
 				frag++;
 			}
 		} else {
-			desc[frag].len_vlan = frags[i - 1].size;
+			desc[frag].len_vlan = skb_frag_size(&frags[i - 1]);
 			dma_addr = skb_frag_dma_map(&adapter->pdev->dev,
 						    &frags[i - 1],
 						    0,
-						    frags[i - 1].size,
+						    desc[frag].len_vlan,
 						    DMA_TO_DEVICE);
 			desc[frag].addr_lo = lower_32_bits(dma_addr);
 			desc[frag].addr_hi = upper_32_bits(dma_addr);
@@ -3258,19 +3258,11 @@
 		return PTR_ERR(phydev);
 	}
 
-	phydev->supported &= (SUPPORTED_10baseT_Half |
-			      SUPPORTED_10baseT_Full |
-			      SUPPORTED_100baseT_Half |
-			      SUPPORTED_100baseT_Full |
-			      SUPPORTED_Autoneg |
-			      SUPPORTED_MII |
-			      SUPPORTED_TP);
+	phy_set_max_speed(phydev, SPEED_100);
 
 	if (adapter->pdev->device != ET131X_PCI_DEVICE_ID_FAST)
-		phydev->supported |= SUPPORTED_1000baseT_Half |
-				     SUPPORTED_1000baseT_Full;
+		phy_set_max_speed(phydev, SPEED_1000);
 
-	phydev->advertising = phydev->supported;
 	phydev->autoneg = AUTONEG_ENABLE;
 
 	phy_attached_info(phydev);

diff --git a/drivers/net/ethernet/alacritech/Kconfig b/drivers/net/ethernet/alacritech/Kconfig
index 09496e1..212f92c 100644
--- a/drivers/net/ethernet/alacritech/Kconfig
+++ b/drivers/net/ethernet/alacritech/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_VENDOR_ALACRITECH
 	bool "Alacritech devices"
 	default y

diff --git a/drivers/net/ethernet/alacritech/Makefile b/drivers/net/ethernet/alacritech/Makefile
index 8790e9e..4378aad 100644
--- a/drivers/net/ethernet/alacritech/Makefile
+++ b/drivers/net/ethernet/alacritech/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Alacritech Slicoss driver
 #

diff --git a/drivers/net/ethernet/alacritech/slic.h b/drivers/net/ethernet/alacritech/slic.h
index d0c388c..3add305 100644
--- a/drivers/net/ethernet/alacritech/slic.h
+++ b/drivers/net/ethernet/alacritech/slic.h

@@ -8,7 +8,6 @@
 #include <linux/spinlock_types.h>
 #include <linux/dma-mapping.h>
 #include <linux/pci.h>
-#include <linux/netdevice.h>
 #include <linux/list.h>
 #include <linux/u64_stats_sync.h>
 

diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 0b60921..80ef3e1 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c

@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for Gigabit Ethernet adapters based on the Session Layer
  * Interface (SLIC) technology by Alacritech. The driver does not
  * support the hardware acceleration features provided by these cards.
  *
  * Copyright (C) 2016 Lino Sanfilippo <LinoSanfilippo@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -345,8 +336,6 @@
 	if (sdev->promisc != set_promisc) {
 		sdev->promisc = set_promisc;
 		slic_configure_rcv(sdev);
-		/* make sure writes to receiver cant leak out of the lock */
-		mmiowb();
 	}
 	spin_unlock_bh(&sdev->link_lock);
 }
@@ -795,8 +784,8 @@
 	size = stq->len * sizeof(*descs) + DESC_ALIGN_MASK;
 
 	for (i = 0; i < SLIC_NUM_STAT_DESC_ARRAYS; i++) {
-		descs = dma_zalloc_coherent(&sdev->pdev->dev, size, &paddr,
-					    GFP_KERNEL);
+		descs = dma_alloc_coherent(&sdev->pdev->dev, size, &paddr,
+					   GFP_KERNEL);
 		if (!descs) {
 			netdev_err(sdev->netdev,
 				   "failed to allocate status descriptors\n");
@@ -1240,8 +1229,8 @@
 	struct slic_shmem_data *sm_data;
 	dma_addr_t paddr;
 
-	sm_data = dma_zalloc_coherent(&sdev->pdev->dev, sizeof(*sm_data),
-				      &paddr, GFP_KERNEL);
+	sm_data = dma_alloc_coherent(&sdev->pdev->dev, sizeof(*sm_data),
+				     &paddr, GFP_KERNEL);
 	if (!sm_data) {
 		dev_err(&sdev->pdev->dev, "failed to allocate shared memory\n");
 		return -ENOMEM;
@@ -1461,8 +1450,6 @@
 
 	if (slic_get_free_tx_descs(txq) < SLIC_MAX_REQ_TX_DESCS)
 		netif_stop_queue(dev);
-	/* make sure writes to io-memory cant leak out of tx queue lock */
-	mmiowb();
 
 	return NETDEV_TX_OK;
 drop_skb:
@@ -1621,8 +1608,8 @@
 	int err = 0;
 	u8 *mac[2];
 
-	eeprom = dma_zalloc_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE,
-				     &paddr, GFP_KERNEL);
+	eeprom = dma_alloc_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE,
+				    &paddr, GFP_KERNEL);
 	if (!eeprom)
 		return -ENOMEM;
 

diff --git a/drivers/net/ethernet/allwinner/Kconfig b/drivers/net/ethernet/allwinner/Kconfig
index 47da7e7..264a482 100644
--- a/drivers/net/ethernet/allwinner/Kconfig
+++ b/drivers/net/ethernet/allwinner/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Allwinner device configuration
 #
@@ -20,17 +21,17 @@
 if NET_VENDOR_ALLWINNER
 
 config SUN4I_EMAC
-        tristate "Allwinner A10 EMAC support"
+	tristate "Allwinner A10 EMAC support"
 	depends on ARCH_SUNXI
 	depends on OF
 	select CRC32
 	select MII
 	select PHYLIB
 	select MDIO_SUN4I
-        ---help---
-          Support for Allwinner A10 EMAC ethernet driver.
+	---help---
+	  Support for Allwinner A10 EMAC ethernet driver.
 
-          To compile this driver as a module, choose M here.  The module
-          will be called sun4i-emac.
+	  To compile this driver as a module, choose M here.  The module
+	  will be called sun4i-emac.
 
 endif # NET_VENDOR_ALLWINNER

diff --git a/drivers/net/ethernet/allwinner/Makefile b/drivers/net/ethernet/allwinner/Makefile
index 03129f7..ddd5a50 100644
--- a/drivers/net/ethernet/allwinner/Makefile
+++ b/drivers/net/ethernet/allwinner/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Allwinner device drivers.
 #

diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 3143de4..0537df0 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c

@@ -172,8 +172,7 @@
 	}
 
 	/* mask with MAC supported features */
-	phydev->supported &= PHY_BASIC_FEATURES;
-	phydev->advertising = phydev->supported;
+	phy_set_max_speed(phydev, SPEED_100);
 
 	db->link = 0;
 	db->speed = 0;
@@ -225,8 +224,8 @@
 static void emac_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
 {
-	strlcpy(info->driver, DRV_NAME, sizeof(DRV_NAME));
-	strlcpy(info->version, DRV_VERSION, sizeof(DRV_VERSION));
+	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 	strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info));
 }
 
@@ -819,7 +818,6 @@
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	db = netdev_priv(ndev);
-	memset(db, 0, sizeof(*db));
 
 	db->dev = &pdev->dev;
 	db->ndev = ndev;
@@ -862,7 +860,9 @@
 		goto out_clk_disable_unprepare;
 	}
 
-	db->phy_node = of_parse_phandle(np, "phy", 0);
+	db->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!db->phy_node)
+		db->phy_node = of_parse_phandle(np, "phy", 0);
 	if (!db->phy_node) {
 		dev_err(&pdev->dev, "no associated PHY\n");
 		ret = -ENODEV;
@@ -871,8 +871,8 @@
 
 	/* Read MAC-address from DT */
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
-		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(ndev->dev_addr, mac_addr);
 
 	/* Check if the MAC address is valid, if not get a random one */
 	if (!is_valid_ether_addr(ndev->dev_addr)) {

diff --git a/drivers/net/ethernet/alteon/Kconfig b/drivers/net/ethernet/alteon/Kconfig
index e06ccab..c3f7067 100644
--- a/drivers/net/ethernet/alteon/Kconfig
+++ b/drivers/net/ethernet/alteon/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Alteon network device configuration
 #

diff --git a/drivers/net/ethernet/alteon/Makefile b/drivers/net/ethernet/alteon/Makefile
index a2ca173..be52255 100644
--- a/drivers/net/ethernet/alteon/Makefile
+++ b/drivers/net/ethernet/alteon/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Alteon network device drivers.
 #

diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 4f11f98..46b4207 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card
  *           and other Tigon based cards.
@@ -12,11 +13,6 @@
  * about the driver. Send mail to linux-acenic-help@sunsite.auc.dk to
  * see how to subscribe.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  * Additional credits:
  *   Pete Wyckoff <wyckoff@ca.sandia.gov>: Initial Linux/Alpha and trace
  *       dump support. The trace dump support has not been
@@ -2059,7 +2055,7 @@
 		if (skb) {
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += skb->len;
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 			info->skb = NULL;
 		}
 

diff --git a/drivers/net/ethernet/altera/Kconfig b/drivers/net/ethernet/altera/Kconfig
index fdddba5..2690c39 100644
--- a/drivers/net/ethernet/altera/Kconfig
+++ b/drivers/net/ethernet/altera/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config ALTERA_TSE
 	tristate "Altera Triple-Speed Ethernet MAC support"
 	depends on HAS_DMA

diff --git a/drivers/net/ethernet/altera/Makefile b/drivers/net/ethernet/altera/Makefile
index d4a187e..a52db80 100644
--- a/drivers/net/ethernet/altera/Makefile
+++ b/drivers/net/ethernet/altera/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Altera device drivers.
 #

diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c
index 0fb986b..ac1efd0 100644
--- a/drivers/net/ethernet/altera/altera_msgdma.c
+++ b/drivers/net/ethernet/altera/altera_msgdma.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/netdevice.h>
@@ -145,7 +134,8 @@
 			& 0xffff;
 
 	if (inuse) { /* Tx FIFO is not empty */
-		ready = priv->tx_prod - priv->tx_cons - inuse - 1;
+		ready = max_t(int,
+			      priv->tx_prod - priv->tx_cons - inuse - 1, 0);
 	} else {
 		/* Check for buffered last packet */
 		status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status));

diff --git a/drivers/net/ethernet/altera/altera_msgdma.h b/drivers/net/ethernet/altera/altera_msgdma.h
index 42cf61c..9813fbf 100644
--- a/drivers/net/ethernet/altera/altera_msgdma.h
+++ b/drivers/net/ethernet/altera/altera_msgdma.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ALTERA_MSGDMA_H__

diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h b/drivers/net/ethernet/altera/altera_msgdmahw.h
index 89cd11d..019f5a1 100644
--- a/drivers/net/ethernet/altera/altera_msgdmahw.h
+++ b/drivers/net/ethernet/altera/altera_msgdmahw.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ALTERA_MSGDMAHW_H__

diff --git a/drivers/net/ethernet/altera/altera_sgdma.c b/drivers/net/ethernet/altera/altera_sgdma.c
index 88ef67a..db97170 100644
--- a/drivers/net/ethernet/altera/altera_sgdma.c
+++ b/drivers/net/ethernet/altera/altera_sgdma.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/list.h>

diff --git a/drivers/net/ethernet/altera/altera_sgdma.h b/drivers/net/ethernet/altera/altera_sgdma.h
index 584977e..08afe1c 100644
--- a/drivers/net/ethernet/altera/altera_sgdma.h
+++ b/drivers/net/ethernet/altera/altera_sgdma.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ALTERA_SGDMA_H__

diff --git a/drivers/net/ethernet/altera/altera_sgdmahw.h b/drivers/net/ethernet/altera/altera_sgdmahw.h
index bbd52f0..3304518 100644
--- a/drivers/net/ethernet/altera/altera_sgdmahw.h
+++ b/drivers/net/ethernet/altera/altera_sgdmahw.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ALTERA_SGDMAHW_H__

diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h
index e2feee8..f17acfb 100644
--- a/drivers/net/ethernet/altera/altera_tse.h
+++ b/drivers/net/ethernet/altera/altera_tse.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera Triple-Speed Ethernet MAC driver
  * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
  *
@@ -14,18 +15,6 @@
  *
  * Original driver contributed by SLS.
  * Major updates contributed by GlobalLogic
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ALTERA_TSE_H__

diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index 7c36771..2382346 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Ethtool support for Altera Triple-Speed Ethernet MAC driver
  * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
  *
@@ -13,18 +14,6 @@
  *
  * Original driver contributed by SLS.
  * Major updates contributed by GlobalLogic
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/ethtool.h>

diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index baca8f7..bb032be 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Altera Triple-Speed Ethernet MAC driver
  * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
  *
@@ -14,18 +15,6 @@
  *
  * Original driver contributed by SLS.
  * Major updates contributed by GlobalLogic
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/atomic.h>
@@ -714,8 +703,10 @@
 
 		phydev = phy_connect(dev, phy_id_fmt, &altera_tse_adjust_link,
 				     priv->phy_iface);
-		if (IS_ERR(phydev))
+		if (IS_ERR(phydev)) {
 			netdev_err(dev, "Could not attach to PHY\n");
+			phydev = NULL;
+		}
 
 	} else {
 		int ret;
@@ -835,13 +826,10 @@
 	}
 
 	/* Stop Advertising 1000BASE Capability if interface is not GMII
-	 * Note: Checkpatch throws CHECKs for the camel case defines below,
-	 * it's ok to ignore.
 	 */
 	if ((priv->phy_iface == PHY_INTERFACE_MODE_MII) ||
 	    (priv->phy_iface == PHY_INTERFACE_MODE_RMII))
-		phydev->advertising &= ~(SUPPORTED_1000baseT_Half |
-					 SUPPORTED_1000baseT_Full);
+		phy_set_max_speed(phydev, SPEED_100);
 
 	/* Broken HW is sometimes missing the pull-up resistor on the
 	 * MDIO line, which results in reads to non-existent devices returning
@@ -1538,7 +1526,7 @@
 
 	/* get default MAC address from device tree */
 	macaddr = of_get_mac_address(pdev->dev.of_node);
-	if (macaddr)
+	if (!IS_ERR(macaddr))
 		ether_addr_copy(ndev->dev_addr, macaddr);
 	else
 		eth_hw_addr_random(ndev);

diff --git a/drivers/net/ethernet/altera/altera_utils.c b/drivers/net/ethernet/altera/altera_utils.c
index d7eeb17..e6a7fc9 100644
--- a/drivers/net/ethernet/altera/altera_utils.c
+++ b/drivers/net/ethernet/altera/altera_utils.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "altera_tse.h"

diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h
index baf100c..b7d772f 100644
--- a/drivers/net/ethernet/altera/altera_utils.h
+++ b/drivers/net/ethernet/altera/altera_utils.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Altera TSE SGDMA and MSGDMA Linux driver
  * Copyright (C) 2014 Altera Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
index 99b3035..cca72a7 100644
--- a/drivers/net/ethernet/amazon/Kconfig
+++ b/drivers/net/ethernet/amazon/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Amazon network device configuration
 #
@@ -17,7 +18,8 @@
 
 config ENA_ETHERNET
 	tristate "Elastic Network Adapter (ENA) support"
-	depends on (PCI_MSI && X86)
+	depends on PCI_MSI && !CPU_BIG_ENDIAN
+	select DIMLIB
 	---help---
 	  This driver supports Elastic Network Adapter (ENA)"
 

diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile
index 8e0b73f..f614f23 100644
--- a/drivers/net/ethernet/amazon/Makefile
+++ b/drivers/net/ethernet/amazon/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Amazon network device drivers.
 #

diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
index eaeeae0..f1f752a 100644
--- a/drivers/net/ethernet/amazon/ena/Makefile
+++ b/drivers/net/ethernet/amazon/ena/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Elastic Network Adapter (ENA) device drivers.
 #

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 4532e57..8baf847 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h

@@ -32,115 +32,82 @@
 #ifndef _ENA_ADMIN_H_
 #define _ENA_ADMIN_H_
 
+
 enum ena_admin_aq_opcode {
-	ENA_ADMIN_CREATE_SQ	= 1,
-
-	ENA_ADMIN_DESTROY_SQ	= 2,
-
-	ENA_ADMIN_CREATE_CQ	= 3,
-
-	ENA_ADMIN_DESTROY_CQ	= 4,
-
-	ENA_ADMIN_GET_FEATURE	= 8,
-
-	ENA_ADMIN_SET_FEATURE	= 9,
-
-	ENA_ADMIN_GET_STATS	= 11,
+	ENA_ADMIN_CREATE_SQ                         = 1,
+	ENA_ADMIN_DESTROY_SQ                        = 2,
+	ENA_ADMIN_CREATE_CQ                         = 3,
+	ENA_ADMIN_DESTROY_CQ                        = 4,
+	ENA_ADMIN_GET_FEATURE                       = 8,
+	ENA_ADMIN_SET_FEATURE                       = 9,
+	ENA_ADMIN_GET_STATS                         = 11,
 };
 
 enum ena_admin_aq_completion_status {
-	ENA_ADMIN_SUCCESS			= 0,
-
-	ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE	= 1,
-
-	ENA_ADMIN_BAD_OPCODE			= 2,
-
-	ENA_ADMIN_UNSUPPORTED_OPCODE		= 3,
-
-	ENA_ADMIN_MALFORMED_REQUEST		= 4,
-
+	ENA_ADMIN_SUCCESS                           = 0,
+	ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE       = 1,
+	ENA_ADMIN_BAD_OPCODE                        = 2,
+	ENA_ADMIN_UNSUPPORTED_OPCODE                = 3,
+	ENA_ADMIN_MALFORMED_REQUEST                 = 4,
 	/* Additional status is provided in ACQ entry extended_status */
-	ENA_ADMIN_ILLEGAL_PARAMETER		= 5,
-
-	ENA_ADMIN_UNKNOWN_ERROR			= 6,
+	ENA_ADMIN_ILLEGAL_PARAMETER                 = 5,
+	ENA_ADMIN_UNKNOWN_ERROR                     = 6,
+	ENA_ADMIN_RESOURCE_BUSY                     = 7,
 };
 
 enum ena_admin_aq_feature_id {
-	ENA_ADMIN_DEVICE_ATTRIBUTES		= 1,
-
-	ENA_ADMIN_MAX_QUEUES_NUM		= 2,
-
-	ENA_ADMIN_HW_HINTS			= 3,
-
-	ENA_ADMIN_RSS_HASH_FUNCTION		= 10,
-
-	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG	= 11,
-
-	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG	= 12,
-
-	ENA_ADMIN_MTU				= 14,
-
-	ENA_ADMIN_RSS_HASH_INPUT		= 18,
-
-	ENA_ADMIN_INTERRUPT_MODERATION		= 20,
-
-	ENA_ADMIN_AENQ_CONFIG			= 26,
-
-	ENA_ADMIN_LINK_CONFIG			= 27,
-
-	ENA_ADMIN_HOST_ATTR_CONFIG		= 28,
-
-	ENA_ADMIN_FEATURES_OPCODE_NUM		= 32,
+	ENA_ADMIN_DEVICE_ATTRIBUTES                 = 1,
+	ENA_ADMIN_MAX_QUEUES_NUM                    = 2,
+	ENA_ADMIN_HW_HINTS                          = 3,
+	ENA_ADMIN_LLQ                               = 4,
+	ENA_ADMIN_MAX_QUEUES_EXT                    = 7,
+	ENA_ADMIN_RSS_HASH_FUNCTION                 = 10,
+	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG          = 11,
+	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG      = 12,
+	ENA_ADMIN_MTU                               = 14,
+	ENA_ADMIN_RSS_HASH_INPUT                    = 18,
+	ENA_ADMIN_INTERRUPT_MODERATION              = 20,
+	ENA_ADMIN_AENQ_CONFIG                       = 26,
+	ENA_ADMIN_LINK_CONFIG                       = 27,
+	ENA_ADMIN_HOST_ATTR_CONFIG                  = 28,
+	ENA_ADMIN_FEATURES_OPCODE_NUM               = 32,
 };
 
 enum ena_admin_placement_policy_type {
 	/* descriptors and headers are in host memory */
-	ENA_ADMIN_PLACEMENT_POLICY_HOST	= 1,
-
+	ENA_ADMIN_PLACEMENT_POLICY_HOST             = 1,
 	/* descriptors and headers are in device memory (a.k.a Low Latency
 	 * Queue)
 	 */
-	ENA_ADMIN_PLACEMENT_POLICY_DEV	= 3,
+	ENA_ADMIN_PLACEMENT_POLICY_DEV              = 3,
 };
 
 enum ena_admin_link_types {
-	ENA_ADMIN_LINK_SPEED_1G		= 0x1,
-
-	ENA_ADMIN_LINK_SPEED_2_HALF_G	= 0x2,
-
-	ENA_ADMIN_LINK_SPEED_5G		= 0x4,
-
-	ENA_ADMIN_LINK_SPEED_10G	= 0x8,
-
-	ENA_ADMIN_LINK_SPEED_25G	= 0x10,
-
-	ENA_ADMIN_LINK_SPEED_40G	= 0x20,
-
-	ENA_ADMIN_LINK_SPEED_50G	= 0x40,
-
-	ENA_ADMIN_LINK_SPEED_100G	= 0x80,
-
-	ENA_ADMIN_LINK_SPEED_200G	= 0x100,
-
-	ENA_ADMIN_LINK_SPEED_400G	= 0x200,
+	ENA_ADMIN_LINK_SPEED_1G                     = 0x1,
+	ENA_ADMIN_LINK_SPEED_2_HALF_G               = 0x2,
+	ENA_ADMIN_LINK_SPEED_5G                     = 0x4,
+	ENA_ADMIN_LINK_SPEED_10G                    = 0x8,
+	ENA_ADMIN_LINK_SPEED_25G                    = 0x10,
+	ENA_ADMIN_LINK_SPEED_40G                    = 0x20,
+	ENA_ADMIN_LINK_SPEED_50G                    = 0x40,
+	ENA_ADMIN_LINK_SPEED_100G                   = 0x80,
+	ENA_ADMIN_LINK_SPEED_200G                   = 0x100,
+	ENA_ADMIN_LINK_SPEED_400G                   = 0x200,
 };
 
 enum ena_admin_completion_policy_type {
 	/* completion queue entry for each sq descriptor */
-	ENA_ADMIN_COMPLETION_POLICY_DESC		= 0,
-
+	ENA_ADMIN_COMPLETION_POLICY_DESC            = 0,
 	/* completion queue entry upon request in sq descriptor */
-	ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND	= 1,
-
+	ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND  = 1,
 	/* current queue head pointer is updated in OS memory upon sq
 	 * descriptor request
 	 */
-	ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND	= 2,
-
+	ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND  = 2,
 	/* current queue head pointer is updated in OS memory for each sq
 	 * descriptor
 	 */
-	ENA_ADMIN_COMPLETION_POLICY_HEAD		= 3,
+	ENA_ADMIN_COMPLETION_POLICY_HEAD            = 3,
 };
 
 /* basic stats return ena_admin_basic_stats while extanded stats return a
@@ -148,15 +115,13 @@
  * device id
  */
 enum ena_admin_get_stats_type {
-	ENA_ADMIN_GET_STATS_TYPE_BASIC		= 0,
-
-	ENA_ADMIN_GET_STATS_TYPE_EXTENDED	= 1,
+	ENA_ADMIN_GET_STATS_TYPE_BASIC              = 0,
+	ENA_ADMIN_GET_STATS_TYPE_EXTENDED           = 1,
 };
 
 enum ena_admin_get_stats_scope {
-	ENA_ADMIN_SPECIFIC_QUEUE	= 0,
-
-	ENA_ADMIN_ETH_TRAFFIC		= 1,
+	ENA_ADMIN_SPECIFIC_QUEUE                    = 0,
+	ENA_ADMIN_ETH_TRAFFIC                       = 1,
 };
 
 struct ena_admin_aq_common_desc {
@@ -227,7 +192,9 @@
 
 	u16 extended_status;
 
-	/* serves as a hint what AQ entries can be revoked */
+	/* indicates to the driver which AQ entry has been consumed by the
+	 *    device and could be reused
+	 */
 	u16 sq_head_indx;
 };
 
@@ -296,9 +263,8 @@
 };
 
 enum ena_admin_sq_direction {
-	ENA_ADMIN_SQ_DIRECTION_TX	= 1,
-
-	ENA_ADMIN_SQ_DIRECTION_RX	= 2,
+	ENA_ADMIN_SQ_DIRECTION_TX                   = 1,
+	ENA_ADMIN_SQ_DIRECTION_RX                   = 2,
 };
 
 struct ena_admin_acq_create_sq_resp_desc {
@@ -456,7 +422,13 @@
 	/* as appears in ena_admin_aq_feature_id */
 	u8 feature_id;
 
-	u16 reserved16;
+	/* The driver specifies the max feature version it supports and the
+	 * device responds with the currently supported feature version. The
+	 * field is zero based
+	 */
+	u8 feature_version;
+
+	u8 reserved8;
 };
 
 struct ena_admin_device_attr_feature_desc {
@@ -483,8 +455,118 @@
 	u32 max_mtu;
 };
 
+enum ena_admin_llq_header_location {
+	/* header is in descriptor list */
+	ENA_ADMIN_INLINE_HEADER                     = 1,
+	/* header in a separate ring, implies 16B descriptor list entry */
+	ENA_ADMIN_HEADER_RING                       = 2,
+};
+
+enum ena_admin_llq_ring_entry_size {
+	ENA_ADMIN_LIST_ENTRY_SIZE_128B              = 1,
+	ENA_ADMIN_LIST_ENTRY_SIZE_192B              = 2,
+	ENA_ADMIN_LIST_ENTRY_SIZE_256B              = 4,
+};
+
+enum ena_admin_llq_num_descs_before_header {
+	ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0     = 0,
+	ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1     = 1,
+	ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2     = 2,
+	ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4     = 4,
+	ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8     = 8,
+};
+
+/* packet descriptor list entry always starts with one or more descriptors,
+ * followed by a header. The rest of the descriptors are located in the
+ * beginning of the subsequent entry. Stride refers to how the rest of the
+ * descriptors are placed. This field is relevant only for inline header
+ * mode
+ */
+enum ena_admin_llq_stride_ctrl {
+	ENA_ADMIN_SINGLE_DESC_PER_ENTRY             = 1,
+	ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY          = 2,
+};
+
+struct ena_admin_feature_llq_desc {
+	u32 max_llq_num;
+
+	u32 max_llq_depth;
+
+	/*  specify the header locations the device supports. bitfield of
+	 *    enum ena_admin_llq_header_location.
+	 */
+	u16 header_location_ctrl_supported;
+
+	/* the header location the driver selected to use. */
+	u16 header_location_ctrl_enabled;
+
+	/* if inline header is specified - this is the size of descriptor
+	 *    list entry. If header in a separate ring is specified - this is
+	 *    the size of header ring entry. bitfield of enum
+	 *    ena_admin_llq_ring_entry_size. specify the entry sizes the device
+	 *    supports
+	 */
+	u16 entry_size_ctrl_supported;
+
+	/* the entry size the driver selected to use. */
+	u16 entry_size_ctrl_enabled;
+
+	/* valid only if inline header is specified. First entry associated
+	 *    with the packet includes descriptors and header. Rest of the
+	 *    entries occupied by descriptors. This parameter defines the max
+	 *    number of descriptors precedding the header in the first entry.
+	 *    The field is bitfield of enum
+	 *    ena_admin_llq_num_descs_before_header and specify the values the
+	 *    device supports
+	 */
+	u16 desc_num_before_header_supported;
+
+	/* the desire field the driver selected to use */
+	u16 desc_num_before_header_enabled;
+
+	/* valid only if inline was chosen. bitfield of enum
+	 *    ena_admin_llq_stride_ctrl
+	 */
+	u16 descriptors_stride_ctrl_supported;
+
+	/* the stride control the driver selected to use */
+	u16 descriptors_stride_ctrl_enabled;
+
+	/* Maximum size in bytes taken by llq entries in a single tx burst.
+	 * Set to 0 when there is no such limit.
+	 */
+	u32 max_tx_burst_size;
+};
+
+struct ena_admin_queue_ext_feature_fields {
+	u32 max_tx_sq_num;
+
+	u32 max_tx_cq_num;
+
+	u32 max_rx_sq_num;
+
+	u32 max_rx_cq_num;
+
+	u32 max_tx_sq_depth;
+
+	u32 max_tx_cq_depth;
+
+	u32 max_rx_sq_depth;
+
+	u32 max_rx_cq_depth;
+
+	u32 max_tx_header_size;
+
+	/* Maximum Descriptors number, including meta descriptor, allowed for
+	 * a single Tx packet
+	 */
+	u16 max_per_packet_tx_descs;
+
+	/* Maximum Descriptors number allowed for a single Rx packet */
+	u16 max_per_packet_rx_descs;
+};
+
 struct ena_admin_queue_feature_desc {
-	/* including LLQs */
 	u32 max_sq_num;
 
 	u32 max_sq_depth;
@@ -493,9 +575,9 @@
 
 	u32 max_cq_depth;
 
-	u32 max_llq_num;
+	u32 max_legacy_llq_num;
 
-	u32 max_llq_depth;
+	u32 max_legacy_llq_depth;
 
 	u32 max_header_size;
 
@@ -583,9 +665,8 @@
 };
 
 enum ena_admin_hash_functions {
-	ENA_ADMIN_TOEPLITZ	= 1,
-
-	ENA_ADMIN_CRC32		= 2,
+	ENA_ADMIN_TOEPLITZ                          = 1,
+	ENA_ADMIN_CRC32                             = 2,
 };
 
 struct ena_admin_feature_rss_flow_hash_control {
@@ -611,50 +692,35 @@
 
 /* RSS flow hash protocols */
 enum ena_admin_flow_hash_proto {
-	ENA_ADMIN_RSS_TCP4	= 0,
-
-	ENA_ADMIN_RSS_UDP4	= 1,
-
-	ENA_ADMIN_RSS_TCP6	= 2,
-
-	ENA_ADMIN_RSS_UDP6	= 3,
-
-	ENA_ADMIN_RSS_IP4	= 4,
-
-	ENA_ADMIN_RSS_IP6	= 5,
-
-	ENA_ADMIN_RSS_IP4_FRAG	= 6,
-
-	ENA_ADMIN_RSS_NOT_IP	= 7,
-
+	ENA_ADMIN_RSS_TCP4                          = 0,
+	ENA_ADMIN_RSS_UDP4                          = 1,
+	ENA_ADMIN_RSS_TCP6                          = 2,
+	ENA_ADMIN_RSS_UDP6                          = 3,
+	ENA_ADMIN_RSS_IP4                           = 4,
+	ENA_ADMIN_RSS_IP6                           = 5,
+	ENA_ADMIN_RSS_IP4_FRAG                      = 6,
+	ENA_ADMIN_RSS_NOT_IP                        = 7,
 	/* TCPv6 with extension header */
-	ENA_ADMIN_RSS_TCP6_EX	= 8,
-
+	ENA_ADMIN_RSS_TCP6_EX                       = 8,
 	/* IPv6 with extension header */
-	ENA_ADMIN_RSS_IP6_EX	= 9,
-
-	ENA_ADMIN_RSS_PROTO_NUM	= 16,
+	ENA_ADMIN_RSS_IP6_EX                        = 9,
+	ENA_ADMIN_RSS_PROTO_NUM                     = 16,
 };
 
 /* RSS flow hash fields */
 enum ena_admin_flow_hash_fields {
 	/* Ethernet Dest Addr */
-	ENA_ADMIN_RSS_L2_DA	= BIT(0),
-
+	ENA_ADMIN_RSS_L2_DA                         = BIT(0),
 	/* Ethernet Src Addr */
-	ENA_ADMIN_RSS_L2_SA	= BIT(1),
-
+	ENA_ADMIN_RSS_L2_SA                         = BIT(1),
 	/* ipv4/6 Dest Addr */
-	ENA_ADMIN_RSS_L3_DA	= BIT(2),
-
+	ENA_ADMIN_RSS_L3_DA                         = BIT(2),
 	/* ipv4/6 Src Addr */
-	ENA_ADMIN_RSS_L3_SA	= BIT(3),
-
+	ENA_ADMIN_RSS_L3_SA                         = BIT(3),
 	/* tcp/udp Dest Port */
-	ENA_ADMIN_RSS_L4_DP	= BIT(4),
-
+	ENA_ADMIN_RSS_L4_DP                         = BIT(4),
 	/* tcp/udp Src Port */
-	ENA_ADMIN_RSS_L4_SP	= BIT(5),
+	ENA_ADMIN_RSS_L4_SP                         = BIT(5),
 };
 
 struct ena_admin_proto_input {
@@ -693,15 +759,13 @@
 };
 
 enum ena_admin_os_type {
-	ENA_ADMIN_OS_LINUX	= 1,
-
-	ENA_ADMIN_OS_WIN	= 2,
-
-	ENA_ADMIN_OS_DPDK	= 3,
-
-	ENA_ADMIN_OS_FREEBSD	= 4,
-
-	ENA_ADMIN_OS_IPXE	= 5,
+	ENA_ADMIN_OS_LINUX                          = 1,
+	ENA_ADMIN_OS_WIN                            = 2,
+	ENA_ADMIN_OS_DPDK                           = 3,
+	ENA_ADMIN_OS_FREEBSD                        = 4,
+	ENA_ADMIN_OS_IPXE                           = 5,
+	ENA_ADMIN_OS_ESXI			    = 6,
+	ENA_ADMIN_OS_GROUPS_NUM			    = 6,
 };
 
 struct ena_admin_host_info {
@@ -723,11 +787,33 @@
 	/* 7:0 : major
 	 * 15:8 : minor
 	 * 23:16 : sub_minor
+	 * 31:24 : module_type
 	 */
 	u32 driver_version;
 
 	/* features bitmap */
-	u32 supported_network_features[4];
+	u32 supported_network_features[2];
+
+	/* ENA spec version of driver */
+	u16 ena_spec_version;
+
+	/* ENA device's Bus, Device and Function
+	 * 2:0 : function
+	 * 7:3 : device
+	 * 15:8 : bus
+	 */
+	u16 bdf;
+
+	/* Number of CPUs */
+	u16 num_cpus;
+
+	u16 reserved;
+
+	/* 1 :0 : reserved
+	 * 2 : interrupt_moderation
+	 * 31:3 : reserved
+	 */
+	u32 driver_supported_features;
 };
 
 struct ena_admin_rss_ind_table_entry {
@@ -792,6 +878,19 @@
 	u32 raw[11];
 };
 
+struct ena_admin_queue_ext_feature_desc {
+	/* version */
+	u8 version;
+
+	u8 reserved1[3];
+
+	union {
+		struct ena_admin_queue_ext_feature_fields max_queue_ext;
+
+		u32 raw[10];
+	};
+};
+
 struct ena_admin_get_feat_resp {
 	struct ena_admin_acq_common_desc acq_common_desc;
 
@@ -800,8 +899,12 @@
 
 		struct ena_admin_device_attr_feature_desc dev_attr;
 
+		struct ena_admin_feature_llq_desc llq;
+
 		struct ena_admin_queue_feature_desc max_queue;
 
+		struct ena_admin_queue_ext_feature_desc max_queue_ext;
+
 		struct ena_admin_feature_aenq_desc aenq;
 
 		struct ena_admin_get_feature_link_desc link;
@@ -847,6 +950,9 @@
 
 		/* rss indirection table */
 		struct ena_admin_feature_rss_ind_table ind_table;
+
+		/* LLQ configuration */
+		struct ena_admin_feature_llq_desc llq;
 	} u;
 };
 
@@ -863,7 +969,9 @@
 
 	u16 syndrom;
 
-	/* 0 : phase */
+	/* 0 : phase
+	 * 7:1 : reserved - MBZ
+	 */
 	u8 flags;
 
 	u8 reserved1[3];
@@ -875,25 +983,18 @@
 
 /* asynchronous event notification groups */
 enum ena_admin_aenq_group {
-	ENA_ADMIN_LINK_CHANGE		= 0,
-
-	ENA_ADMIN_FATAL_ERROR		= 1,
-
-	ENA_ADMIN_WARNING		= 2,
-
-	ENA_ADMIN_NOTIFICATION		= 3,
-
-	ENA_ADMIN_KEEP_ALIVE		= 4,
-
-	ENA_ADMIN_AENQ_GROUPS_NUM	= 5,
+	ENA_ADMIN_LINK_CHANGE                       = 0,
+	ENA_ADMIN_FATAL_ERROR                       = 1,
+	ENA_ADMIN_WARNING                           = 2,
+	ENA_ADMIN_NOTIFICATION                      = 3,
+	ENA_ADMIN_KEEP_ALIVE                        = 4,
+	ENA_ADMIN_AENQ_GROUPS_NUM                   = 5,
 };
 
 enum ena_admin_aenq_notification_syndrom {
-	ENA_ADMIN_SUSPEND	= 0,
-
-	ENA_ADMIN_RESUME	= 1,
-
-	ENA_ADMIN_UPDATE_HINTS	= 2,
+	ENA_ADMIN_SUSPEND                           = 0,
+	ENA_ADMIN_RESUME                            = 1,
+	ENA_ADMIN_UPDATE_HINTS                      = 2,
 };
 
 struct ena_admin_aenq_entry {
@@ -928,27 +1029,27 @@
 };
 
 /* aq_common_desc */
-#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
-#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK            GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK                 BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT            1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK             BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT   2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK    BIT(2)
 
 /* sq */
-#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
-#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT                     5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK                      GENMASK(7, 5)
 
 /* acq_common_desc */
-#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
-#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK           GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK                BIT(0)
 
 /* aq_create_sq_cmd */
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT       5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK        GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK    GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT  4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK   GENMASK(6, 4)
 #define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
 
 /* aq_create_cq_cmd */
@@ -957,12 +1058,12 @@
 #define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
 
 /* get_set_feature_common_desc */
-#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK   GENMASK(1, 0)
 
 /* get_feature_link_desc */
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK        BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT        1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK         BIT(1)
 
 /* feature_offload_desc */
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
@@ -974,19 +1075,19 @@
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT       5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK        BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT       6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK        BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT        7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK         BIT(7)
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
 #define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT        3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK         BIT(3)
 
 /* feature_rss_flow_hash_function */
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
@@ -994,25 +1095,34 @@
 
 /* feature_rss_flow_hash_input */
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK  BIT(1)
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK  BIT(2)
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
 #define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
 
 /* host_info */
-#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
-#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
-#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK                      GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT                     8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK                      GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT                 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK                  GENMASK(23, 16)
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT               24
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK                GENMASK(31, 24)
+#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK                   GENMASK(2, 0)
+#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT                    3
+#define ENA_ADMIN_HOST_INFO_DEVICE_MASK                     GENMASK(7, 3)
+#define ENA_ADMIN_HOST_INFO_BUS_SHIFT                       8
+#define ENA_ADMIN_HOST_INFO_BUS_MASK                        GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT      2
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK       BIT(2)
 
 /* aenq_common_desc */
-#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK               BIT(0)
 
 /* aenq_link_change_desc */
-#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK    BIT(0)
 
 #endif /*_ENA_ADMIN_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 7635c38..ea62604 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c

@@ -41,9 +41,6 @@
 #define ENA_ASYNC_QUEUE_DEPTH 16
 #define ENA_ADMIN_QUEUE_DEPTH 32
 
-#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
-		ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \
-		| (ENA_COMMON_SPEC_VERSION_MINOR))
 
 #define ENA_CTRL_MAJOR		0
 #define ENA_CTRL_MINOR		0
@@ -61,6 +58,8 @@
 
 #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
 
+#define ENA_COM_BOUNCE_BUFFER_CNTRL_CNT	4
+
 #define ENA_REGS_ADMIN_INTR_MASK 1
 
 #define ENA_POLL_MS	5
@@ -92,7 +91,7 @@
 	struct ena_admin_acq_get_stats_resp get_resp;
 };
 
-static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
 				       struct ena_common_mem_addr *ena_addr,
 				       dma_addr_t addr)
 {
@@ -112,11 +111,11 @@
 	struct ena_com_admin_sq *sq = &queue->sq;
 	u16 size = ADMIN_SQ_SIZE(queue->q_depth);
 
-	sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
-					  GFP_KERNEL);
+	sq->entries = dma_alloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
+					 GFP_KERNEL);
 
 	if (!sq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -134,11 +133,11 @@
 	struct ena_com_admin_cq *cq = &queue->cq;
 	u16 size = ADMIN_CQ_SIZE(queue->q_depth);
 
-	cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
-					  GFP_KERNEL);
+	cq->entries = dma_alloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
+					 GFP_KERNEL);
 
 	if (!cq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -157,11 +156,11 @@
 
 	dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
 	size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
-	aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr,
-					    GFP_KERNEL);
+	aenq->entries = dma_alloc_coherent(dev->dmadev, size, &aenq->dma_addr,
+					   GFP_KERNEL);
 
 	if (!aenq->entries) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -191,7 +190,7 @@
 	return 0;
 }
 
-static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+static void comp_ctxt_release(struct ena_com_admin_queue *queue,
 				     struct ena_comp_ctx *comp_ctx)
 {
 	comp_ctx->occupied = false;
@@ -236,7 +235,7 @@
 	tail_masked = admin_queue->sq.tail & queue_size_mask;
 
 	/* In case of queue FULL */
-	cnt = atomic_read(&admin_queue->outstanding_cmds);
+	cnt = (u16)atomic_read(&admin_queue->outstanding_cmds);
 	if (cnt >= admin_queue->q_depth) {
 		pr_debug("admin queue is full.\n");
 		admin_queue->stats.out_of_space++;
@@ -278,7 +277,7 @@
 	return comp_ctx;
 }
 
-static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
 {
 	size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
 	struct ena_comp_ctx *comp_ctx;
@@ -286,7 +285,7 @@
 
 	queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
 	if (unlikely(!queue->comp_ctx)) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -305,7 +304,7 @@
 						     struct ena_admin_acq_entry *comp,
 						     size_t comp_size_in_bytes)
 {
-	unsigned long flags;
+	unsigned long flags = 0;
 	struct ena_comp_ctx *comp_ctx;
 
 	spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -333,7 +332,7 @@
 
 	memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
 
-	io_sq->dma_addr_bits = ena_dev->dma_addr_bits;
+	io_sq->dma_addr_bits = (u8)ena_dev->dma_addr_bits;
 	io_sq->desc_entry_size =
 		(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
 		sizeof(struct ena_eth_io_tx_desc) :
@@ -345,31 +344,62 @@
 		dev_node = dev_to_node(ena_dev->dmadev);
 		set_dev_node(ena_dev->dmadev, ctx->numa_node);
 		io_sq->desc_addr.virt_addr =
-			dma_zalloc_coherent(ena_dev->dmadev, size,
-					    &io_sq->desc_addr.phys_addr,
-					    GFP_KERNEL);
+			dma_alloc_coherent(ena_dev->dmadev, size,
+					   &io_sq->desc_addr.phys_addr,
+					   GFP_KERNEL);
 		set_dev_node(ena_dev->dmadev, dev_node);
 		if (!io_sq->desc_addr.virt_addr) {
 			io_sq->desc_addr.virt_addr =
-				dma_zalloc_coherent(ena_dev->dmadev, size,
-						    &io_sq->desc_addr.phys_addr,
-						    GFP_KERNEL);
+				dma_alloc_coherent(ena_dev->dmadev, size,
+						   &io_sq->desc_addr.phys_addr,
+						   GFP_KERNEL);
 		}
-	} else {
-		dev_node = dev_to_node(ena_dev->dmadev);
-		set_dev_node(ena_dev->dmadev, ctx->numa_node);
-		io_sq->desc_addr.virt_addr =
-			devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
-		set_dev_node(ena_dev->dmadev, dev_node);
+
 		if (!io_sq->desc_addr.virt_addr) {
-			io_sq->desc_addr.virt_addr =
-				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+			pr_err("memory allocation failed\n");
+			return -ENOMEM;
 		}
 	}
 
-	if (!io_sq->desc_addr.virt_addr) {
-		pr_err("memory allocation failed");
-		return -ENOMEM;
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		/* Allocate bounce buffers */
+		io_sq->bounce_buf_ctrl.buffer_size =
+			ena_dev->llq_info.desc_list_entry_size;
+		io_sq->bounce_buf_ctrl.buffers_num =
+			ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
+		io_sq->bounce_buf_ctrl.next_to_use = 0;
+
+		size = io_sq->bounce_buf_ctrl.buffer_size *
+			 io_sq->bounce_buf_ctrl.buffers_num;
+
+		dev_node = dev_to_node(ena_dev->dmadev);
+		set_dev_node(ena_dev->dmadev, ctx->numa_node);
+		io_sq->bounce_buf_ctrl.base_buffer =
+			devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+		set_dev_node(ena_dev->dmadev, dev_node);
+		if (!io_sq->bounce_buf_ctrl.base_buffer)
+			io_sq->bounce_buf_ctrl.base_buffer =
+				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+
+		if (!io_sq->bounce_buf_ctrl.base_buffer) {
+			pr_err("bounce buffer memory allocation failed\n");
+			return -ENOMEM;
+		}
+
+		memcpy(&io_sq->llq_info, &ena_dev->llq_info,
+		       sizeof(io_sq->llq_info));
+
+		/* Initiate the first bounce buffer */
+		io_sq->llq_buf_ctrl.curr_bounce_buf =
+			ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+		memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+		       0x0, io_sq->llq_info.desc_list_entry_size);
+		io_sq->llq_buf_ctrl.descs_left_in_line =
+			io_sq->llq_info.descs_num_before_header;
+
+		if (io_sq->llq_info.max_entries_in_tx_burst > 0)
+			io_sq->entries_in_tx_burst_left =
+				io_sq->llq_info.max_entries_in_tx_burst;
 	}
 
 	io_sq->tail = 0;
@@ -399,18 +429,18 @@
 	prev_node = dev_to_node(ena_dev->dmadev);
 	set_dev_node(ena_dev->dmadev, ctx->numa_node);
 	io_cq->cdesc_addr.virt_addr =
-		dma_zalloc_coherent(ena_dev->dmadev, size,
-				    &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, size,
+				   &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
 	set_dev_node(ena_dev->dmadev, prev_node);
 	if (!io_cq->cdesc_addr.virt_addr) {
 		io_cq->cdesc_addr.virt_addr =
-			dma_zalloc_coherent(ena_dev->dmadev, size,
-					    &io_cq->cdesc_addr.phys_addr,
-					    GFP_KERNEL);
+			dma_alloc_coherent(ena_dev->dmadev, size,
+					   &io_cq->cdesc_addr.phys_addr,
+					   GFP_KERNEL);
 	}
 
 	if (!io_cq->cdesc_addr.virt_addr) {
-		pr_err("memory allocation failed");
+		pr_err("memory allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -460,7 +490,7 @@
 
 	/* Go over all the completions */
 	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
-			ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+		ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
 		/* Do not read the rest of the completion entry before the
 		 * phase bit was validated
 		 */
@@ -511,7 +541,8 @@
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
 						     struct ena_com_admin_queue *admin_queue)
 {
-	unsigned long flags, timeout;
+	unsigned long flags = 0;
+	unsigned long timeout;
 	int ret;
 
 	timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout);
@@ -557,10 +588,163 @@
 	return ret;
 }
 
+/**
+ * Set the LLQ configurations of the firmware
+ *
+ * The driver provides only the enabled feature values to the device,
+ * which in turn, checks if they are supported.
+ */
+static int ena_com_set_llq(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+	int ret;
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.feat_common.feature_id = ENA_ADMIN_LLQ;
+
+	cmd.u.llq.header_location_ctrl_enabled = llq_info->header_location_ctrl;
+	cmd.u.llq.entry_size_ctrl_enabled = llq_info->desc_list_entry_size_ctrl;
+	cmd.u.llq.desc_num_before_header_enabled = llq_info->descs_num_before_header;
+	cmd.u.llq.descriptors_stride_ctrl_enabled = llq_info->desc_stride_ctrl;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set LLQ configurations: %d\n", ret);
+
+	return ret;
+}
+
+static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
+				   struct ena_admin_feature_llq_desc *llq_features,
+				   struct ena_llq_configurations *llq_default_cfg)
+{
+	struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+	u16 supported_feat;
+	int rc;
+
+	memset(llq_info, 0, sizeof(*llq_info));
+
+	supported_feat = llq_features->header_location_ctrl_supported;
+
+	if (likely(supported_feat & llq_default_cfg->llq_header_location)) {
+		llq_info->header_location_ctrl =
+			llq_default_cfg->llq_header_location;
+	} else {
+		pr_err("Invalid header location control, supported: 0x%x\n",
+		       supported_feat);
+		return -EINVAL;
+	}
+
+	if (likely(llq_info->header_location_ctrl == ENA_ADMIN_INLINE_HEADER)) {
+		supported_feat = llq_features->descriptors_stride_ctrl_supported;
+		if (likely(supported_feat & llq_default_cfg->llq_stride_ctrl)) {
+			llq_info->desc_stride_ctrl = llq_default_cfg->llq_stride_ctrl;
+		} else	{
+			if (supported_feat & ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY) {
+				llq_info->desc_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+			} else if (supported_feat & ENA_ADMIN_SINGLE_DESC_PER_ENTRY) {
+				llq_info->desc_stride_ctrl = ENA_ADMIN_SINGLE_DESC_PER_ENTRY;
+			} else {
+				pr_err("Invalid desc_stride_ctrl, supported: 0x%x\n",
+				       supported_feat);
+				return -EINVAL;
+			}
+
+			pr_err("Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+			       llq_default_cfg->llq_stride_ctrl, supported_feat,
+			       llq_info->desc_stride_ctrl);
+		}
+	} else {
+		llq_info->desc_stride_ctrl = 0;
+	}
+
+	supported_feat = llq_features->entry_size_ctrl_supported;
+	if (likely(supported_feat & llq_default_cfg->llq_ring_entry_size)) {
+		llq_info->desc_list_entry_size_ctrl = llq_default_cfg->llq_ring_entry_size;
+		llq_info->desc_list_entry_size = llq_default_cfg->llq_ring_entry_size_value;
+	} else {
+		if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_128B) {
+			llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+			llq_info->desc_list_entry_size = 128;
+		} else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_192B) {
+			llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_192B;
+			llq_info->desc_list_entry_size = 192;
+		} else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_256B) {
+			llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
+			llq_info->desc_list_entry_size = 256;
+		} else {
+			pr_err("Invalid entry_size_ctrl, supported: 0x%x\n",
+			       supported_feat);
+			return -EINVAL;
+		}
+
+		pr_err("Default llq ring entry size is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+		       llq_default_cfg->llq_ring_entry_size, supported_feat,
+		       llq_info->desc_list_entry_size);
+	}
+	if (unlikely(llq_info->desc_list_entry_size & 0x7)) {
+		/* The desc list entry size should be whole multiply of 8
+		 * This requirement comes from __iowrite64_copy()
+		 */
+		pr_err("illegal entry size %d\n",
+		       llq_info->desc_list_entry_size);
+		return -EINVAL;
+	}
+
+	if (llq_info->desc_stride_ctrl == ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY)
+		llq_info->descs_per_entry = llq_info->desc_list_entry_size /
+			sizeof(struct ena_eth_io_tx_desc);
+	else
+		llq_info->descs_per_entry = 1;
+
+	supported_feat = llq_features->desc_num_before_header_supported;
+	if (likely(supported_feat & llq_default_cfg->llq_num_decs_before_header)) {
+		llq_info->descs_num_before_header = llq_default_cfg->llq_num_decs_before_header;
+	} else {
+		if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2) {
+			llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+		} else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1) {
+			llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1;
+		} else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4) {
+			llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4;
+		} else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8) {
+			llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8;
+		} else {
+			pr_err("Invalid descs_num_before_header, supported: 0x%x\n",
+			       supported_feat);
+			return -EINVAL;
+		}
+
+		pr_err("Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+		       llq_default_cfg->llq_num_decs_before_header,
+		       supported_feat, llq_info->descs_num_before_header);
+	}
+
+	llq_info->max_entries_in_tx_burst =
+		(u16)(llq_features->max_tx_burst_size /	llq_default_cfg->llq_ring_entry_size_value);
+
+	rc = ena_com_set_llq(ena_dev);
+	if (rc)
+		pr_err("Cannot set LLQ configuration: %d\n", rc);
+
+	return rc;
+}
+
 static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
 							struct ena_com_admin_queue *admin_queue)
 {
-	unsigned long flags;
+	unsigned long flags = 0;
 	int ret;
 
 	wait_for_completion_timeout(&comp_ctx->wait_event,
@@ -578,16 +762,26 @@
 		admin_queue->stats.no_completion++;
 		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
 
-		if (comp_ctx->status == ENA_CMD_COMPLETED)
-			pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
-			       comp_ctx->cmd_opcode);
-		else
-			pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+		if (comp_ctx->status == ENA_CMD_COMPLETED) {
+			pr_err("The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n",
+			       comp_ctx->cmd_opcode,
+			       admin_queue->auto_polling ? "ON" : "OFF");
+			/* Check if fallback to polling is enabled */
+			if (admin_queue->auto_polling)
+				admin_queue->polling = true;
+		} else {
+			pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n",
 			       comp_ctx->cmd_opcode, comp_ctx->status);
-
-		admin_queue->running_state = false;
-		ret = -ETIME;
-		goto err;
+		}
+		/* Check if shifted to polling mode.
+		 * This will happen if there is a completion without an interrupt
+		 * and autopolling mode is enabled. Continuing normal execution in such case
+		 */
+		if (!admin_queue->polling) {
+			admin_queue->running_state = false;
+			ret = -ETIME;
+			goto err;
+		}
 	}
 
 	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
@@ -606,7 +800,7 @@
 	volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
 		mmio_read->read_resp;
 	u32 mmio_read_reg, ret, i;
-	unsigned long flags;
+	unsigned long flags = 0;
 	u32 timeout = mmio_read->reg_read_to;
 
 	might_sleep();
@@ -645,7 +839,7 @@
 	}
 
 	if (read_resp->reg_off != offset) {
-		pr_err("Read failure: wrong offset provided");
+		pr_err("Read failure: wrong offset provided\n");
 		ret = ENA_MMIO_READ_TIMEOUT;
 	} else {
 		ret = read_resp->reg_val;
@@ -728,15 +922,17 @@
 	if (io_sq->desc_addr.virt_addr) {
 		size = io_sq->desc_entry_size * io_sq->q_depth;
 
-		if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-			dma_free_coherent(ena_dev->dmadev, size,
-					  io_sq->desc_addr.virt_addr,
-					  io_sq->desc_addr.phys_addr);
-		else
-			devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr);
+		dma_free_coherent(ena_dev->dmadev, size,
+				  io_sq->desc_addr.virt_addr,
+				  io_sq->desc_addr.phys_addr);
 
 		io_sq->desc_addr.virt_addr = NULL;
 	}
+
+	if (io_sq->bounce_buf_ctrl.base_buffer) {
+		devm_kfree(ena_dev->dmadev, io_sq->bounce_buf_ctrl.base_buffer);
+		io_sq->bounce_buf_ctrl.base_buffer = NULL;
+	}
 }
 
 static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
@@ -782,7 +978,8 @@
 				  struct ena_admin_get_feat_resp *get_resp,
 				  enum ena_admin_aq_feature_id feature_id,
 				  dma_addr_t control_buf_dma_addr,
-				  u32 control_buff_size)
+				  u32 control_buff_size,
+				  u8 feature_ver)
 {
 	struct ena_com_admin_queue *admin_queue;
 	struct ena_admin_get_feat_cmd get_cmd;
@@ -813,7 +1010,7 @@
 	}
 
 	get_cmd.control_buffer.length = control_buff_size;
-
+	get_cmd.feat_common.feature_version = feature_ver;
 	get_cmd.feat_common.feature_id = feature_id;
 
 	ret = ena_com_execute_admin_command(admin_queue,
@@ -833,13 +1030,15 @@
 
 static int ena_com_get_feature(struct ena_com_dev *ena_dev,
 			       struct ena_admin_get_feat_resp *get_resp,
-			       enum ena_admin_aq_feature_id feature_id)
+			       enum ena_admin_aq_feature_id feature_id,
+			       u8 feature_ver)
 {
 	return ena_com_get_feature_ex(ena_dev,
 				      get_resp,
 				      feature_id,
 				      0,
-				      0);
+				      0,
+				      feature_ver);
 }
 
 static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
@@ -847,8 +1046,8 @@
 	struct ena_rss *rss = &ena_dev->rss;
 
 	rss->hash_key =
-		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
-				    &rss->hash_key_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+				   &rss->hash_key_dma_addr, GFP_KERNEL);
 
 	if (unlikely(!rss->hash_key))
 		return -ENOMEM;
@@ -871,8 +1070,8 @@
 	struct ena_rss *rss = &ena_dev->rss;
 
 	rss->hash_ctrl =
-		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
-				    &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+				   &rss->hash_ctrl_dma_addr, GFP_KERNEL);
 
 	if (unlikely(!rss->hash_ctrl))
 		return -ENOMEM;
@@ -899,7 +1098,7 @@
 	int ret;
 
 	ret = ena_com_get_feature(ena_dev, &get_resp,
-				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0);
 	if (unlikely(ret))
 		return ret;
 
@@ -915,8 +1114,8 @@
 		sizeof(struct ena_admin_rss_ind_table_entry);
 
 	rss->rss_ind_tbl =
-		dma_zalloc_coherent(ena_dev->dmadev, tbl_size,
-				    &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, tbl_size,
+				   &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
 	if (unlikely(!rss->rss_ind_tbl))
 		goto mem_err1;
 
@@ -1079,40 +1278,33 @@
 	return 0;
 }
 
-static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev)
-{
-	size_t size;
-
-	size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS;
-
-	ena_dev->intr_moder_tbl =
-		devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
-	if (!ena_dev->intr_moder_tbl)
-		return -ENOMEM;
-
-	ena_com_config_default_interrupt_moderation_table(ena_dev);
-
-	return 0;
-}
-
 static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
 						 u16 intr_delay_resolution)
 {
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-	unsigned int i;
+	/* Initial value of intr_delay_resolution might be 0 */
+	u16 prev_intr_delay_resolution =
+		ena_dev->intr_delay_resolution ?
+		ena_dev->intr_delay_resolution :
+		ENA_DEFAULT_INTR_DELAY_RESOLUTION;
 
 	if (!intr_delay_resolution) {
 		pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
-		intr_delay_resolution = 1;
+		intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
 	}
-	ena_dev->intr_delay_resolution = intr_delay_resolution;
 
 	/* update Rx */
-	for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++)
-		intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution;
+	ena_dev->intr_moder_rx_interval =
+		ena_dev->intr_moder_rx_interval *
+		prev_intr_delay_resolution /
+		intr_delay_resolution;
 
 	/* update Tx */
-	ena_dev->intr_moder_tx_interval /= intr_delay_resolution;
+	ena_dev->intr_moder_tx_interval =
+		ena_dev->intr_moder_tx_interval *
+		prev_intr_delay_resolution /
+		intr_delay_resolution;
+
+	ena_dev->intr_delay_resolution = intr_delay_resolution;
 }
 
 /*****************************************************************************/
@@ -1248,7 +1440,7 @@
 void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
 {
 	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
-	unsigned long flags;
+	unsigned long flags = 0;
 
 	spin_lock_irqsave(&admin_queue->q_lock, flags);
 	while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
@@ -1292,7 +1484,7 @@
 void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
 {
 	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
-	unsigned long flags;
+	unsigned long flags = 0;
 
 	spin_lock_irqsave(&admin_queue->q_lock, flags);
 	ena_dev->admin_queue.running_state = state;
@@ -1319,14 +1511,14 @@
 	struct ena_admin_get_feat_resp get_resp;
 	int ret;
 
-	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0);
 	if (ret) {
 		pr_info("Can't get aenq configuration\n");
 		return ret;
 	}
 
 	if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
-		pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n",
+		pr_warn("Trying to set unsupported aenq events. supported flag: 0x%x asked flag: 0x%x\n",
 			get_resp.u.aenq.supported_groups, groups_flag);
 		return -EOPNOTSUPP;
 	}
@@ -1400,11 +1592,6 @@
 			ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
 		ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
 
-	if (ver < MIN_ENA_VER) {
-		pr_err("ENA version is lower than the minimal version the driver supports\n");
-		return -1;
-	}
-
 	pr_info("ena controller version: %d.%d.%d implementation version %d\n",
 		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
 			ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
@@ -1469,17 +1656,23 @@
 	ena_dev->admin_queue.polling = polling;
 }
 
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+					 bool polling)
+{
+	ena_dev->admin_queue.auto_polling = polling;
+}
+
 int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
 {
 	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
 
 	spin_lock_init(&mmio_read->lock);
 	mmio_read->read_resp =
-		dma_zalloc_coherent(ena_dev->dmadev,
-				    sizeof(*mmio_read->read_resp),
-				    &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev,
+				   sizeof(*mmio_read->read_resp),
+				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
 	if (unlikely(!mmio_read->read_resp))
-		return -ENOMEM;
+		goto err;
 
 	ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
 
@@ -1488,6 +1681,10 @@
 	mmio_read->readless_supported = true;
 
 	return 0;
+
+err:
+
+	return -ENOMEM;
 }
 
 void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
@@ -1523,8 +1720,7 @@
 }
 
 int ena_com_admin_init(struct ena_com_dev *ena_dev,
-		       struct ena_aenq_handlers *aenq_handlers,
-		       bool init_spinlock)
+		       struct ena_aenq_handlers *aenq_handlers)
 {
 	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
 	u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
@@ -1550,8 +1746,7 @@
 
 	atomic_set(&admin_queue->outstanding_cmds, 0);
 
-	if (init_spinlock)
-		spin_lock_init(&admin_queue->q_lock);
+	spin_lock_init(&admin_queue->q_lock);
 
 	ret = ena_com_init_comp_ctxt(admin_queue);
 	if (ret)
@@ -1691,7 +1886,7 @@
 int ena_com_get_link_params(struct ena_com_dev *ena_dev,
 			    struct ena_admin_get_feat_resp *resp)
 {
-	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0);
 }
 
 int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
@@ -1701,7 +1896,7 @@
 	int rc;
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_DEVICE_ATTRIBUTES);
+				 ENA_ADMIN_DEVICE_ATTRIBUTES, 0);
 	if (rc)
 		return rc;
 
@@ -1709,17 +1904,34 @@
 	       sizeof(get_resp.u.dev_attr));
 	ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
 
-	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_MAX_QUEUES_NUM);
-	if (rc)
-		return rc;
+	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		rc = ena_com_get_feature(ena_dev, &get_resp,
+					 ENA_ADMIN_MAX_QUEUES_EXT,
+					 ENA_FEATURE_MAX_QUEUE_EXT_VER);
+		if (rc)
+			return rc;
 
-	memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
-	       sizeof(get_resp.u.max_queue));
-	ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+		if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER)
+			return -EINVAL;
+
+		memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
+		       sizeof(get_resp.u.max_queue_ext));
+		ena_dev->tx_max_header_size =
+			get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size;
+	} else {
+		rc = ena_com_get_feature(ena_dev, &get_resp,
+					 ENA_ADMIN_MAX_QUEUES_NUM, 0);
+		memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+		       sizeof(get_resp.u.max_queue));
+		ena_dev->tx_max_header_size =
+			get_resp.u.max_queue.max_header_size;
+
+		if (rc)
+			return rc;
+	}
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_AENQ_CONFIG);
+				 ENA_ADMIN_AENQ_CONFIG, 0);
 	if (rc)
 		return rc;
 
@@ -1727,7 +1939,7 @@
 	       sizeof(get_resp.u.aenq));
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
 	if (rc)
 		return rc;
 
@@ -1737,7 +1949,7 @@
 	/* Driver hints isn't mandatory admin command. So in case the
 	 * command isn't supported set driver hints to 0
 	 */
-	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS);
+	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0);
 
 	if (!rc)
 		memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
@@ -1748,6 +1960,15 @@
 	else
 		return rc;
 
+	rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0);
+	if (!rc)
+		memcpy(&get_feat_ctx->llq, &get_resp.u.llq,
+		       sizeof(get_resp.u.llq));
+	else if (rc == -EOPNOTSUPP)
+		memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq));
+	else
+		return rc;
+
 	return 0;
 }
 
@@ -1779,6 +2000,7 @@
 	struct ena_admin_aenq_entry *aenq_e;
 	struct ena_admin_aenq_common_desc *aenq_common;
 	struct ena_com_aenq *aenq  = &dev->aenq;
+	unsigned long long timestamp;
 	ena_aenq_handler handler_cb;
 	u16 masked_head, processed = 0;
 	u8 phase;
@@ -1796,10 +2018,11 @@
 		 */
 		dma_rmb();
 
+		timestamp =
+			(unsigned long long)aenq_common->timestamp_low |
+			((unsigned long long)aenq_common->timestamp_high << 32);
 		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
-			 aenq_common->group, aenq_common->syndrom,
-			 (u64)aenq_common->timestamp_low +
-				 ((u64)aenq_common->timestamp_high << 32));
+			 aenq_common->group, aenq_common->syndrom, timestamp);
 
 		/* Handle specific event*/
 		handler_cb = ena_com_get_specific_aenq_cb(dev,
@@ -1829,7 +2052,6 @@
 	mb();
 	writel_relaxed((u32)aenq->head,
 		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
-	mmiowb();
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
@@ -1975,7 +2197,7 @@
 	struct ena_admin_get_feat_resp resp;
 
 	ret = ena_com_get_feature(ena_dev, &resp,
-				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
 	if (unlikely(ret)) {
 		pr_err("Failed to get offload capabilities %d\n", ret);
 		return ret;
@@ -2004,11 +2226,11 @@
 
 	/* Validate hash function is supported */
 	ret = ena_com_get_feature(ena_dev, &get_resp,
-				  ENA_ADMIN_RSS_HASH_FUNCTION);
+				  ENA_ADMIN_RSS_HASH_FUNCTION, 0);
 	if (unlikely(ret))
 		return ret;
 
-	if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) {
+	if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) {
 		pr_err("Func hash %d isn't supported by device, abort\n",
 		       rss->hash_func);
 		return -EOPNOTSUPP;
@@ -2064,7 +2286,7 @@
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_FUNCTION,
 				    rss->hash_key_dma_addr,
-				    sizeof(*rss->hash_key));
+				    sizeof(*rss->hash_key), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2093,6 +2315,7 @@
 		return -EINVAL;
 	}
 
+	rss->hash_func = func;
 	rc = ena_com_set_hash_function(ena_dev);
 
 	/* Restore the old function */
@@ -2115,7 +2338,7 @@
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_FUNCTION,
 				    rss->hash_key_dma_addr,
-				    sizeof(*rss->hash_key));
+				    sizeof(*rss->hash_key), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2140,7 +2363,7 @@
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_INPUT,
 				    rss->hash_ctrl_dma_addr,
-				    sizeof(*rss->hash_ctrl));
+				    sizeof(*rss->hash_ctrl), 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2376,7 +2599,7 @@
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
 				    rss->rss_ind_tbl_dma_addr,
-				    tbl_size);
+				    tbl_size, 0);
 	if (unlikely(rc))
 		return rc;
 
@@ -2436,11 +2659,15 @@
 	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
 
 	host_attr->host_info =
-		dma_zalloc_coherent(ena_dev->dmadev, SZ_4K,
-				    &host_attr->host_info_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, SZ_4K,
+				   &host_attr->host_info_dma_addr, GFP_KERNEL);
 	if (unlikely(!host_attr->host_info))
 		return -ENOMEM;
 
+	host_attr->host_info->ena_spec_version = ((ENA_COMMON_SPEC_VERSION_MAJOR <<
+		ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) |
+		(ENA_COMMON_SPEC_VERSION_MINOR));
+
 	return 0;
 }
 
@@ -2450,8 +2677,9 @@
 	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
 
 	host_attr->debug_area_virt_addr =
-		dma_zalloc_coherent(ena_dev->dmadev, debug_area_size,
-				    &host_attr->debug_area_dma_addr, GFP_KERNEL);
+		dma_alloc_coherent(ena_dev->dmadev, debug_area_size,
+				   &host_attr->debug_area_dma_addr,
+				   GFP_KERNEL);
 	if (unlikely(!host_attr->debug_area_virt_addr)) {
 		host_attr->debug_area_size = 0;
 		return -ENOMEM;
@@ -2541,42 +2769,34 @@
 						  ENA_ADMIN_INTERRUPT_MODERATION);
 }
 
-int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
-						      u32 tx_coalesce_usecs)
+static int ena_com_update_nonadaptive_moderation_interval(u32 coalesce_usecs,
+							  u32 intr_delay_resolution,
+							  u32 *intr_moder_interval)
 {
-	if (!ena_dev->intr_delay_resolution) {
+	if (!intr_delay_resolution) {
 		pr_err("Illegal interrupt delay granularity value\n");
 		return -EFAULT;
 	}
 
-	ena_dev->intr_moder_tx_interval = tx_coalesce_usecs /
-		ena_dev->intr_delay_resolution;
+	*intr_moder_interval = coalesce_usecs / intr_delay_resolution;
 
 	return 0;
 }
 
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+						      u32 tx_coalesce_usecs)
+{
+	return ena_com_update_nonadaptive_moderation_interval(tx_coalesce_usecs,
+							      ena_dev->intr_delay_resolution,
+							      &ena_dev->intr_moder_tx_interval);
+}
+
 int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
 						      u32 rx_coalesce_usecs)
 {
-	if (!ena_dev->intr_delay_resolution) {
-		pr_err("Illegal interrupt delay granularity value\n");
-		return -EFAULT;
-	}
-
-	/* We use LOWEST entry of moderation table for storing
-	 * nonadaptive interrupt coalescing values
-	 */
-	ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
-		rx_coalesce_usecs / ena_dev->intr_delay_resolution;
-
-	return 0;
-}
-
-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev)
-{
-	if (ena_dev->intr_moder_tbl)
-		devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl);
-	ena_dev->intr_moder_tbl = NULL;
+	return ena_com_update_nonadaptive_moderation_interval(rx_coalesce_usecs,
+							      ena_dev->intr_delay_resolution,
+							      &ena_dev->intr_moder_rx_interval);
 }
 
 int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
@@ -2586,7 +2806,7 @@
 	int rc;
 
 	rc = ena_com_get_feature(ena_dev, &get_resp,
-				 ENA_ADMIN_INTERRUPT_MODERATION);
+				 ENA_ADMIN_INTERRUPT_MODERATION, 0);
 
 	if (rc) {
 		if (rc == -EOPNOTSUPP) {
@@ -2603,62 +2823,14 @@
 		return rc;
 	}
 
-	rc = ena_com_init_interrupt_moderation_table(ena_dev);
-	if (rc)
-		goto err;
-
 	/* if moderation is supported by device we set adaptive moderation */
 	delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
 	ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
-	ena_com_enable_adaptive_moderation(ena_dev);
+
+	/* Disable adaptive moderation by default - can be enabled later */
+	ena_com_disable_adaptive_moderation(ena_dev);
 
 	return 0;
-err:
-	ena_com_destroy_interrupt_moderation(ena_dev);
-	return rc;
-}
-
-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev)
-{
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
-	if (!intr_moder_tbl)
-		return;
-
-	intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
-		ENA_INTR_LOWEST_USECS;
-	intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval =
-		ENA_INTR_LOWEST_PKTS;
-	intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval =
-		ENA_INTR_LOWEST_BYTES;
-
-	intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval =
-		ENA_INTR_LOW_USECS;
-	intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval =
-		ENA_INTR_LOW_PKTS;
-	intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval =
-		ENA_INTR_LOW_BYTES;
-
-	intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval =
-		ENA_INTR_MID_USECS;
-	intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval =
-		ENA_INTR_MID_PKTS;
-	intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval =
-		ENA_INTR_MID_BYTES;
-
-	intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval =
-		ENA_INTR_HIGH_USECS;
-	intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval =
-		ENA_INTR_HIGH_PKTS;
-	intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval =
-		ENA_INTR_HIGH_BYTES;
-
-	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval =
-		ENA_INTR_HIGHEST_USECS;
-	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval =
-		ENA_INTR_HIGHEST_PKTS;
-	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval =
-		ENA_INTR_HIGHEST_BYTES;
 }
 
 unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
@@ -2668,47 +2840,34 @@
 
 unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
 {
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+	return ena_dev->intr_moder_rx_interval;
+}
 
-	if (intr_moder_tbl)
-		return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval;
+int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
+			    struct ena_admin_feature_llq_desc *llq_features,
+			    struct ena_llq_configurations *llq_default_cfg)
+{
+	struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+	int rc;
+
+	if (!llq_features->max_llq_num) {
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+		return 0;
+	}
+
+	rc = ena_com_config_llq_info(ena_dev, llq_features, llq_default_cfg);
+	if (rc)
+		return rc;
+
+	ena_dev->tx_max_header_size = llq_info->desc_list_entry_size -
+		(llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
+
+	if (unlikely(ena_dev->tx_max_header_size == 0)) {
+		pr_err("the size of the LLQ entry is smaller than needed\n");
+		return -EINVAL;
+	}
+
+	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
 
 	return 0;
 }
-
-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
-					enum ena_intr_moder_level level,
-					struct ena_intr_moder_entry *entry)
-{
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
-	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
-		return;
-
-	intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval;
-	if (ena_dev->intr_delay_resolution)
-		intr_moder_tbl[level].intr_moder_interval /=
-			ena_dev->intr_delay_resolution;
-	intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval;
-
-	/* use hardcoded value until ethtool supports bytecount parameter */
-	if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED)
-		intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval;
-}
-
-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
-				       enum ena_intr_moder_level level,
-				       struct ena_intr_moder_entry *entry)
-{
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
-	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
-		return;
-
-	entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval;
-	if (ena_dev->intr_delay_resolution)
-		entry->intr_moder_interval *= ena_dev->intr_delay_resolution;
-	entry->pkts_per_interval =
-	intr_moder_tbl[level].pkts_per_interval;
-	entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval;
-}

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 7b784f8..7c941eb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h

@@ -37,6 +37,8 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/prefetch.h>
 #include <linux/sched.h>
 #include <linux/sizes.h>
 #include <linux/spinlock.h>
@@ -70,48 +72,20 @@
 /*****************************************************************************/
 /* ENA adaptive interrupt moderation settings */
 
-#define ENA_INTR_LOWEST_USECS           (0)
-#define ENA_INTR_LOWEST_PKTS            (3)
-#define ENA_INTR_LOWEST_BYTES           (2 * 1524)
-
-#define ENA_INTR_LOW_USECS              (32)
-#define ENA_INTR_LOW_PKTS               (12)
-#define ENA_INTR_LOW_BYTES              (16 * 1024)
-
-#define ENA_INTR_MID_USECS              (80)
-#define ENA_INTR_MID_PKTS               (48)
-#define ENA_INTR_MID_BYTES              (64 * 1024)
-
-#define ENA_INTR_HIGH_USECS             (128)
-#define ENA_INTR_HIGH_PKTS              (96)
-#define ENA_INTR_HIGH_BYTES             (128 * 1024)
-
-#define ENA_INTR_HIGHEST_USECS          (192)
-#define ENA_INTR_HIGHEST_PKTS           (128)
-#define ENA_INTR_HIGHEST_BYTES          (192 * 1024)
-
 #define ENA_INTR_INITIAL_TX_INTERVAL_USECS		196
-#define ENA_INTR_INITIAL_RX_INTERVAL_USECS		4
-#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT			6
-#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT			4
-#define ENA_INTR_MODER_LEVEL_STRIDE			2
-#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED		0xFFFFFF
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS		0
+#define ENA_DEFAULT_INTR_DELAY_RESOLUTION		1
 
 #define ENA_HW_HINTS_NO_TIMEOUT				0xFFFF
 
-enum ena_intr_moder_level {
-	ENA_INTR_MODER_LOWEST = 0,
-	ENA_INTR_MODER_LOW,
-	ENA_INTR_MODER_MID,
-	ENA_INTR_MODER_HIGH,
-	ENA_INTR_MODER_HIGHEST,
-	ENA_INTR_MAX_NUM_OF_LEVELS,
-};
+#define ENA_FEATURE_MAX_QUEUE_EXT_VER	1
 
-struct ena_intr_moder_entry {
-	unsigned int intr_moder_interval;
-	unsigned int pkts_per_interval;
-	unsigned int bytes_per_interval;
+struct ena_llq_configurations {
+	enum ena_admin_llq_header_location llq_header_location;
+	enum ena_admin_llq_ring_entry_size llq_ring_entry_size;
+	enum ena_admin_llq_stride_ctrl  llq_stride_ctrl;
+	enum ena_admin_llq_num_descs_before_header llq_num_decs_before_header;
+	u16 llq_ring_entry_size_value;
 };
 
 enum queue_direction {
@@ -142,6 +116,16 @@
 	u16 l4_hdr_len; /* In words */
 };
 
+struct ena_com_llq_info {
+	u16 header_location_ctrl;
+	u16 desc_stride_ctrl;
+	u16 desc_list_entry_size_ctrl;
+	u16 desc_list_entry_size;
+	u16 descs_num_before_header;
+	u16 descs_per_entry;
+	u16 max_entries_in_tx_burst;
+};
+
 struct ena_com_io_cq {
 	struct ena_com_io_desc_addr cdesc_addr;
 
@@ -179,6 +163,20 @@
 
 } ____cacheline_aligned;
 
+struct ena_com_io_bounce_buffer_control {
+	u8 *base_buffer;
+	u16 next_to_use;
+	u16 buffer_size;
+	u16 buffers_num;  /* Must be a power of 2 */
+};
+
+/* This struct is to keep tracking the current location of the next llq entry */
+struct ena_com_llq_pkt_ctrl {
+	u8 *curr_bounce_buf;
+	u16 idx;
+	u16 descs_left_in_line;
+};
+
 struct ena_com_io_sq {
 	struct ena_com_io_desc_addr desc_addr;
 
@@ -190,6 +188,9 @@
 
 	u32 msix_vector;
 	struct ena_com_tx_meta cached_tx_meta;
+	struct ena_com_llq_info llq_info;
+	struct ena_com_llq_pkt_ctrl llq_buf_ctrl;
+	struct ena_com_io_bounce_buffer_control bounce_buf_ctrl;
 
 	u16 q_depth;
 	u16 qid;
@@ -197,10 +198,12 @@
 	u16 idx;
 	u16 tail;
 	u16 next_to_comp;
+	u16 llq_last_copy_tail;
 	u32 tx_max_header_size;
 	u8 phase;
 	u8 desc_entry_size;
 	u8 dma_addr_bits;
+	u16 entries_in_tx_burst_left;
 } ____cacheline_aligned;
 
 struct ena_com_admin_cq {
@@ -244,6 +247,9 @@
 	/* Indicate if the admin queue should poll for completion */
 	bool polling;
 
+	/* Define if fallback to polling mode should occur */
+	bool auto_polling;
+
 	u16 curr_cmd_id;
 
 	/* Indicate that the ena was initialized and can
@@ -332,16 +338,26 @@
 	struct ena_host_attribute host_attr;
 	bool adaptive_coalescing;
 	u16 intr_delay_resolution;
+
+	/* interrupt moderation intervals are in usec divided by
+	 * intr_delay_resolution, which is supplied by the device.
+	 */
 	u32 intr_moder_tx_interval;
+	u32 intr_moder_rx_interval;
+
 	struct ena_intr_moder_entry *intr_moder_tbl;
+
+	struct ena_com_llq_info llq_info;
 };
 
 struct ena_com_dev_get_features_ctx {
 	struct ena_admin_queue_feature_desc max_queues;
+	struct ena_admin_queue_ext_feature_desc max_queue_ext;
 	struct ena_admin_device_attr_feature_desc dev_attr;
 	struct ena_admin_feature_aenq_desc aenq;
 	struct ena_admin_feature_offload_desc offload;
 	struct ena_admin_ena_hw_hints hw_hints;
+	struct ena_admin_feature_llq_desc llq;
 };
 
 struct ena_com_create_io_ctx {
@@ -397,8 +413,6 @@
 /* ena_com_admin_init - Init the admin and the async queues
  * @ena_dev: ENA communication layer struct
  * @aenq_handlers: Those handlers to be called upon event.
- * @init_spinlock: Indicate if this method should init the admin spinlock or
- * the spinlock was init before (for example, in a case of FLR).
  *
  * Initialize the admin submission and completion queues.
  * Initialize the asynchronous events notification queues.
@@ -406,8 +420,7 @@
  * @return - 0 on success, negative value on failure.
  */
 int ena_com_admin_init(struct ena_com_dev *ena_dev,
-		       struct ena_aenq_handlers *aenq_handlers,
-		       bool init_spinlock);
+		       struct ena_aenq_handlers *aenq_handlers);
 
 /* ena_com_admin_destroy - Destroy the admin and the async events queues.
  * @ena_dev: ENA communication layer struct
@@ -499,6 +512,17 @@
  */
 bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
 
+/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the autopolling mode.
+ * If autopolling is on:
+ * In case of missing interrupt when data is available switch to polling.
+ */
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+					 bool polling);
+
 /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
  * @ena_dev: ENA communication layer struct
  *
@@ -858,11 +882,6 @@
  */
 int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
 
-/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources
- * @ena_dev: ENA communication layer struct
- */
-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
-
 /* ena_com_interrupt_moderation_supported - Return if interrupt moderation
  * capability is supported by the device.
  *
@@ -870,12 +889,6 @@
  */
 bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
 
-/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt
- * moderation table back to the default parameters.
- * @ena_dev: ENA communication layer struct
- */
-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev);
-
 /* ena_com_update_nonadaptive_moderation_interval_tx - Update the
  * non-adaptive interval in Tx direction.
  * @ena_dev: ENA communication layer struct
@@ -912,28 +925,15 @@
  */
 unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
 
-/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt
- * moderation table.
+/* ena_com_config_dev_mode - Configure the placement policy of the device.
  * @ena_dev: ENA communication layer struct
- * @level: Interrupt moderation table level
- * @entry: Entry value
- *
- * Update a single entry in the interrupt moderation table.
+ * @llq_features: LLQ feature descriptor, retrieve via
+ *                ena_com_get_dev_attr_feat.
+ * @ena_llq_config: The default driver LLQ parameters configurations
  */
-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
-					enum ena_intr_moder_level level,
-					struct ena_intr_moder_entry *entry);
-
-/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry.
- * @ena_dev: ENA communication layer struct
- * @level: Interrupt moderation table level
- * @entry: Entry to fill.
- *
- * Initialize the entry according to the adaptive interrupt moderation table.
- */
-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
-				       enum ena_intr_moder_level level,
-				       struct ena_intr_moder_entry *entry);
+int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
+			    struct ena_admin_feature_llq_desc *llq_features,
+			    struct ena_llq_configurations *llq_default_config);
 
 static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
 {
@@ -950,75 +950,6 @@
 	ena_dev->adaptive_coalescing = false;
 }
 
-/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay
- * @ena_dev: ENA communication layer struct
- * @pkts: Number of packets since the last update
- * @bytes: Number of bytes received since the last update.
- * @smoothed_interval: Returned interval
- * @moder_tbl_idx: Current table level as input update new level as return
- * value.
- */
-static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev,
-						     unsigned int pkts,
-						     unsigned int bytes,
-						     unsigned int *smoothed_interval,
-						     unsigned int *moder_tbl_idx)
-{
-	enum ena_intr_moder_level curr_moder_idx, new_moder_idx;
-	struct ena_intr_moder_entry *curr_moder_entry;
-	struct ena_intr_moder_entry *pred_moder_entry;
-	struct ena_intr_moder_entry *new_moder_entry;
-	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-	unsigned int interval;
-
-	/* We apply adaptive moderation on Rx path only.
-	 * Tx uses static interrupt moderation.
-	 */
-	if (!pkts || !bytes)
-		/* Tx interrupt, or spurious interrupt,
-		 * in both cases we just use same delay values
-		 */
-		return;
-
-	curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx);
-	if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) {
-		pr_err("Wrong moderation index %u\n", curr_moder_idx);
-		return;
-	}
-
-	curr_moder_entry = &intr_moder_tbl[curr_moder_idx];
-	new_moder_idx = curr_moder_idx;
-
-	if (curr_moder_idx == ENA_INTR_MODER_LOWEST) {
-		if ((pkts > curr_moder_entry->pkts_per_interval) ||
-		    (bytes > curr_moder_entry->bytes_per_interval))
-			new_moder_idx =
-				(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
-	} else {
-		pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE];
-
-		if ((pkts <= pred_moder_entry->pkts_per_interval) ||
-		    (bytes <= pred_moder_entry->bytes_per_interval))
-			new_moder_idx =
-				(enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE);
-		else if ((pkts > curr_moder_entry->pkts_per_interval) ||
-			 (bytes > curr_moder_entry->bytes_per_interval)) {
-			if (curr_moder_idx != ENA_INTR_MODER_HIGHEST)
-				new_moder_idx =
-					(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
-		}
-	}
-	new_moder_entry = &intr_moder_tbl[new_moder_idx];
-
-	interval = new_moder_entry->intr_moder_interval;
-	*smoothed_interval = (
-		(interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT +
-		ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) /
-		10;
-
-	*moder_tbl_idx = new_moder_idx;
-}
-
 /* ena_com_update_intr_reg - Prepare interrupt register
  * @intr_reg: interrupt register to update.
  * @rx_delay_interval: Rx interval in usecs
@@ -1044,4 +975,21 @@
 		intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK;
 }
 
+static inline u8 *ena_com_get_next_bounce_buffer(struct ena_com_io_bounce_buffer_control *bounce_buf_ctrl)
+{
+	u16 size, buffers_num;
+	u8 *buf;
+
+	size = bounce_buf_ctrl->buffer_size;
+	buffers_num = bounce_buf_ctrl->buffers_num;
+
+	buf = bounce_buf_ctrl->base_buffer +
+		(bounce_buf_ctrl->next_to_use++ & (buffers_num - 1)) * size;
+
+	prefetchw(bounce_buf_ctrl->base_buffer +
+		(bounce_buf_ctrl->next_to_use & (buffers_num - 1)) * size);
+
+	return buf;
+}
+
 #endif /* !(ENA_COM) */

diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
index bb8d736..23beb7e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_common_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h

@@ -32,8 +32,8 @@
 #ifndef _ENA_COMMON_H_
 #define _ENA_COMMON_H_
 
-#define ENA_COMMON_SPEC_VERSION_MAJOR	0 /*  */
-#define ENA_COMMON_SPEC_VERSION_MINOR	10 /*  */
+#define ENA_COMMON_SPEC_VERSION_MAJOR        2
+#define ENA_COMMON_SPEC_VERSION_MINOR        0
 
 /* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
 struct ena_common_mem_addr {

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 2b3ff0c..2845ac2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c

@@ -32,7 +32,7 @@
 
 #include "ena_eth_com.h"
 
-static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	struct ena_com_io_cq *io_cq)
 {
 	struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -59,16 +59,7 @@
 	return cdesc;
 }
 
-static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
-{
-	io_cq->head++;
-
-	/* Switch phase bit in case of wrap around */
-	if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
-		io_cq->phase ^= 1;
-}
-
-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
 {
 	u16 tail_masked;
 	u32 offset;
@@ -80,50 +71,175 @@
 	return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
 }
 
-static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq)
+static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
+						     u8 *bounce_buffer)
 {
-	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
-	u32 offset = tail_masked * io_sq->desc_entry_size;
+	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
 
-	/* In case this queue isn't a LLQ */
-	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-		return;
+	u16 dst_tail_mask;
+	u32 dst_offset;
 
-	memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset,
-		    io_sq->desc_addr.virt_addr + offset,
-		    io_sq->desc_entry_size);
-}
+	dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1);
+	dst_offset = dst_tail_mask * llq_info->desc_list_entry_size;
 
-static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
-{
+	if (is_llq_max_tx_burst_exists(io_sq)) {
+		if (unlikely(!io_sq->entries_in_tx_burst_left)) {
+			pr_err("Error: trying to send more packets than tx burst allows\n");
+			return -ENOSPC;
+		}
+
+		io_sq->entries_in_tx_burst_left--;
+		pr_debug("decreasing entries_in_tx_burst_left of queue %d to %d\n",
+			 io_sq->qid, io_sq->entries_in_tx_burst_left);
+	}
+
+	/* Make sure everything was written into the bounce buffer before
+	 * writing the bounce buffer to the device
+	 */
+	wmb();
+
+	/* The line is completed. Copy it to dev */
+	__iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset,
+			 bounce_buffer, (llq_info->desc_list_entry_size) / 8);
+
 	io_sq->tail++;
 
 	/* Switch phase bit in case of wrap around */
 	if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
 		io_sq->phase ^= 1;
-}
-
-static inline int ena_com_write_header(struct ena_com_io_sq *io_sq,
-				       u8 *head_src, u16 header_len)
-{
-	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
-	u8 __iomem *dev_head_addr =
-		io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size);
-
-	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-		return 0;
-
-	if (unlikely(!io_sq->header_addr)) {
-		pr_err("Push buffer header ptr is NULL\n");
-		return -EINVAL;
-	}
-
-	memcpy_toio(dev_head_addr, head_src, header_len);
 
 	return 0;
 }
 
-static inline struct ena_eth_io_rx_cdesc_base *
+static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
+						 u8 *header_src,
+						 u16 header_len)
+{
+	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+	u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf;
+	u16 header_offset;
+
+	if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+		return 0;
+
+	header_offset =
+		llq_info->descs_num_before_header * io_sq->desc_entry_size;
+
+	if (unlikely((header_offset + header_len) >
+		     llq_info->desc_list_entry_size)) {
+		pr_err("trying to write header larger than llq entry can accommodate\n");
+		return -EFAULT;
+	}
+
+	if (unlikely(!bounce_buffer)) {
+		pr_err("bounce buffer is NULL\n");
+		return -EFAULT;
+	}
+
+	memcpy(bounce_buffer + header_offset, header_src, header_len);
+
+	return 0;
+}
+
+static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
+{
+	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+	u8 *bounce_buffer;
+	void *sq_desc;
+
+	bounce_buffer = pkt_ctrl->curr_bounce_buf;
+
+	if (unlikely(!bounce_buffer)) {
+		pr_err("bounce buffer is NULL\n");
+		return NULL;
+	}
+
+	sq_desc = bounce_buffer + pkt_ctrl->idx * io_sq->desc_entry_size;
+	pkt_ctrl->idx++;
+	pkt_ctrl->descs_left_in_line--;
+
+	return sq_desc;
+}
+
+static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
+{
+	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+	int rc;
+
+	if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+		return 0;
+
+	/* bounce buffer was used, so write it and get a new one */
+	if (pkt_ctrl->idx) {
+		rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+							pkt_ctrl->curr_bounce_buf);
+		if (unlikely(rc))
+			return rc;
+
+		pkt_ctrl->curr_bounce_buf =
+			ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+		memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+		       0x0, llq_info->desc_list_entry_size);
+	}
+
+	pkt_ctrl->idx = 0;
+	pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header;
+	return 0;
+}
+
+static void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		return get_sq_desc_llq(io_sq);
+
+	return get_sq_desc_regular_queue(io_sq);
+}
+
+static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
+{
+	struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
+	struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+	int rc;
+
+	if (!pkt_ctrl->descs_left_in_line) {
+		rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+							pkt_ctrl->curr_bounce_buf);
+		if (unlikely(rc))
+			return rc;
+
+		pkt_ctrl->curr_bounce_buf =
+			ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
+		memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+		       0x0, llq_info->desc_list_entry_size);
+
+		pkt_ctrl->idx = 0;
+		if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
+			pkt_ctrl->descs_left_in_line = 1;
+		else
+			pkt_ctrl->descs_left_in_line =
+			llq_info->desc_list_entry_size / io_sq->desc_entry_size;
+	}
+
+	return 0;
+}
+
+static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		return ena_com_sq_update_llq_tail(io_sq);
+
+	io_sq->tail++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+		io_sq->phase ^= 1;
+
+	return 0;
+}
+
+static struct ena_eth_io_rx_cdesc_base *
 	ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
 {
 	idx &= (io_cq->q_depth - 1);
@@ -132,7 +248,7 @@
 		idx * io_cq->cdesc_entry_size_in_bytes);
 }
 
-static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 					   u16 *first_cdesc_idx)
 {
 	struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -169,25 +285,8 @@
 	return count;
 }
 
-static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
-					     struct ena_com_tx_ctx *ena_tx_ctx)
-{
-	int rc;
-
-	if (ena_tx_ctx->meta_valid) {
-		rc = memcmp(&io_sq->cached_tx_meta,
-			    &ena_tx_ctx->ena_meta,
-			    sizeof(struct ena_com_tx_meta));
-
-		if (unlikely(rc != 0))
-			return true;
-	}
-
-	return false;
-}
-
-static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
-							 struct ena_com_tx_ctx *ena_tx_ctx)
+static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+							struct ena_com_tx_ctx *ena_tx_ctx)
 {
 	struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
 	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
@@ -232,11 +331,10 @@
 	memcpy(&io_sq->cached_tx_meta, ena_meta,
 	       sizeof(struct ena_com_tx_meta));
 
-	ena_com_copy_curr_sq_desc_to_dev(io_sq);
-	ena_com_sq_update_tail(io_sq);
+	return ena_com_sq_update_tail(io_sq);
 }
 
-static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
 					struct ena_eth_io_rx_cdesc_base *cdesc)
 {
 	ena_rx_ctx->l3_proto = cdesc->status &
@@ -250,6 +348,9 @@
 	ena_rx_ctx->l4_csum_err =
 		!!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
 		ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT);
+	ena_rx_ctx->l4_csum_checked =
+		!!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT);
 	ena_rx_ctx->hash = cdesc->hash;
 	ena_rx_ctx->frag =
 		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
@@ -271,18 +372,19 @@
 {
 	struct ena_eth_io_tx_desc *desc = NULL;
 	struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
-	void *push_header = ena_tx_ctx->push_header;
+	void *buffer_to_push = ena_tx_ctx->push_header;
 	u16 header_len = ena_tx_ctx->header_len;
 	u16 num_bufs = ena_tx_ctx->num_bufs;
-	int total_desc, i, rc;
+	u16 start_tail = io_sq->tail;
+	int i, rc;
 	bool have_meta;
 	u64 addr_hi;
 
 	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
 
 	/* num_bufs +1 for potential meta desc */
-	if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) {
-		pr_err("Not enough space in the tx queue\n");
+	if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) {
+		pr_debug("Not enough space in the tx queue\n");
 		return -ENOMEM;
 	}
 
@@ -292,23 +394,32 @@
 		return -EINVAL;
 	}
 
-	/* start with pushing the header (if needed) */
-	rc = ena_com_write_header(io_sq, push_header, header_len);
+	if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
+		     !buffer_to_push))
+		return -EINVAL;
+
+	rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len);
 	if (unlikely(rc))
 		return rc;
 
 	have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
 			ena_tx_ctx);
-	if (have_meta)
-		ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+	if (have_meta) {
+		rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+		if (unlikely(rc))
+			return rc;
+	}
 
-	/* If the caller doesn't want send packets */
+	/* If the caller doesn't want to send packets */
 	if (unlikely(!num_bufs && !header_len)) {
-		*nb_hw_desc = have_meta ? 0 : 1;
-		return 0;
+		rc = ena_com_close_bounce_buffer(io_sq);
+		*nb_hw_desc = io_sq->tail - start_tail;
+		return rc;
 	}
 
 	desc = get_sq_desc(io_sq);
+	if (unlikely(!desc))
+		return -EFAULT;
 	memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
 
 	/* Set first desc when we don't have meta descriptor */
@@ -360,10 +471,14 @@
 	for (i = 0; i < num_bufs; i++) {
 		/* The first desc share the same desc as the header */
 		if (likely(i != 0)) {
-			ena_com_copy_curr_sq_desc_to_dev(io_sq);
-			ena_com_sq_update_tail(io_sq);
+			rc = ena_com_sq_update_tail(io_sq);
+			if (unlikely(rc))
+				return rc;
 
 			desc = get_sq_desc(io_sq);
+			if (unlikely(!desc))
+				return -EFAULT;
+
 			memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
 
 			desc->len_ctrl |= (io_sq->phase <<
@@ -386,15 +501,14 @@
 	/* set the last desc indicator */
 	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
 
-	ena_com_copy_curr_sq_desc_to_dev(io_sq);
+	rc = ena_com_sq_update_tail(io_sq);
+	if (unlikely(rc))
+		return rc;
 
-	ena_com_sq_update_tail(io_sq);
+	rc = ena_com_close_bounce_buffer(io_sq);
 
-	total_desc = max_t(u16, num_bufs, 1);
-	total_desc += have_meta ? 1 : 0;
-
-	*nb_hw_desc = total_desc;
-	return 0;
+	*nb_hw_desc = io_sq->tail - start_tail;
+	return rc;
 }
 
 int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
@@ -453,15 +567,18 @@
 
 	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
 
-	if (unlikely(ena_com_sq_empty_space(io_sq) == 0))
+	if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1)))
 		return -ENOSPC;
 
 	desc = get_sq_desc(io_sq);
+	if (unlikely(!desc))
+		return -EFAULT;
+
 	memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
 
 	desc->length = ena_buf->len;
 
-	desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
+	desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK;
 	desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
 	desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
 	desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
@@ -472,43 +589,7 @@
 	desc->buff_addr_hi =
 		((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
 
-	ena_com_sq_update_tail(io_sq);
-
-	return 0;
-}
-
-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
-{
-	u8 expected_phase, cdesc_phase;
-	struct ena_eth_io_tx_cdesc *cdesc;
-	u16 masked_head;
-
-	masked_head = io_cq->head & (io_cq->q_depth - 1);
-	expected_phase = io_cq->phase;
-
-	cdesc = (struct ena_eth_io_tx_cdesc *)
-		((uintptr_t)io_cq->cdesc_addr.virt_addr +
-		(masked_head * io_cq->cdesc_entry_size_in_bytes));
-
-	/* When the current completion descriptor phase isn't the same as the
-	 * expected, it mean that the device still didn't update
-	 * this completion.
-	 */
-	cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
-	if (cdesc_phase != expected_phase)
-		return -EAGAIN;
-
-	dma_rmb();
-	if (unlikely(cdesc->req_id >= io_cq->q_depth)) {
-		pr_err("Invalid req id %d\n", cdesc->req_id);
-		return -EINVAL;
-	}
-
-	ena_com_cq_inc_head(io_cq);
-
-	*req_id = READ_ONCE(cdesc->req_id);
-
-	return 0;
+	return ena_com_sq_update_tail(io_sq);
 }
 
 bool ena_com_cq_empty(struct ena_com_io_cq *io_cq)

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f76572..77986c0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h

@@ -67,6 +67,7 @@
 	enum ena_eth_io_l4_proto_index l4_proto;
 	bool l3_csum_err;
 	bool l4_csum_err;
+	u8 l4_csum_checked;
 	/* fragmented packet */
 	bool frag;
 	u32 hash;
@@ -86,8 +87,6 @@
 			       struct ena_com_buf *ena_buf,
 			       u16 req_id);
 
-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);
-
 bool ena_com_cq_empty(struct ena_com_io_cq *io_cq);
 
 static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
@@ -96,7 +95,7 @@
 	writel(intr_reg->intr_control, io_cq->unmask_reg);
 }
 
-static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
+static inline int ena_com_free_desc(struct ena_com_io_sq *io_sq)
 {
 	u16 tail, next_to_comp, cnt;
 
@@ -107,17 +106,87 @@
 	return io_sq->q_depth - 1 - cnt;
 }
 
+/* Check if the submission queue has enough space to hold required_buffers */
+static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
+						u16 required_buffers)
+{
+	int temp;
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return ena_com_free_desc(io_sq) >= required_buffers;
+
+	/* This calculation doesn't need to be 100% accurate. So to reduce
+	 * the calculation overhead just Subtract 2 lines from the free descs
+	 * (one for the header line and one to compensate the devision
+	 * down calculation.
+	 */
+	temp = required_buffers / io_sq->llq_info.descs_per_entry + 2;
+
+	return ena_com_free_desc(io_sq) > temp;
+}
+
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+					     struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	if (!ena_tx_ctx->meta_valid)
+		return false;
+
+	return !!memcmp(&io_sq->cached_tx_meta,
+			&ena_tx_ctx->ena_meta,
+			sizeof(struct ena_com_tx_meta));
+}
+
+static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq)
+{
+	return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) &&
+	       io_sq->llq_info.max_entries_in_tx_burst > 0;
+}
+
+static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
+					      struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	struct ena_com_llq_info *llq_info;
+	int descs_after_first_entry;
+	int num_entries_needed = 1;
+	u16 num_descs;
+
+	if (!is_llq_max_tx_burst_exists(io_sq))
+		return false;
+
+	llq_info = &io_sq->llq_info;
+	num_descs = ena_tx_ctx->num_bufs;
+
+	if (unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
+		++num_descs;
+
+	if (num_descs > llq_info->descs_num_before_header) {
+		descs_after_first_entry = num_descs - llq_info->descs_num_before_header;
+		num_entries_needed += DIV_ROUND_UP(descs_after_first_entry,
+						   llq_info->descs_per_entry);
+	}
+
+	pr_debug("queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid,
+		 num_descs, num_entries_needed);
+
+	return num_entries_needed > io_sq->entries_in_tx_burst_left;
+}
+
 static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 {
-	u16 tail;
-
-	tail = io_sq->tail;
+	u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst;
+	u16 tail = io_sq->tail;
 
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
 	writel(tail, io_sq->db_addr);
 
+	if (is_llq_max_tx_burst_exists(io_sq)) {
+		pr_debug("reset available entries in tx burst for queue %d to %d\n",
+			 io_sq->qid, max_entries_in_tx_burst);
+		io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
+	}
+
 	return 0;
 }
 
@@ -126,15 +195,17 @@
 	u16 unreported_comp, head;
 	bool need_update;
 
-	head = io_cq->head;
-	unreported_comp = head - io_cq->last_head_update;
-	need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+	if (unlikely(io_cq->cq_head_db_reg)) {
+		head = io_cq->head;
+		unreported_comp = head - io_cq->last_head_update;
+		need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
 
-	if (io_cq->cq_head_db_reg && need_update) {
-		pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
-			 io_cq->qid, head);
-		writel(head, io_cq->cq_head_db_reg);
-		io_cq->last_head_update = head;
+		if (unlikely(need_update)) {
+			pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
+				 io_cq->qid, head);
+			writel(head, io_cq->cq_head_db_reg);
+			io_cq->last_head_update = head;
+		}
 	}
 
 	return 0;
@@ -159,4 +230,48 @@
 	io_sq->next_to_comp += elem;
 }
 
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+	io_cq->head++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+		io_cq->phase ^= 1;
+}
+
+static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq,
+					     u16 *req_id)
+{
+	u8 expected_phase, cdesc_phase;
+	struct ena_eth_io_tx_cdesc *cdesc;
+	u16 masked_head;
+
+	masked_head = io_cq->head & (io_cq->q_depth - 1);
+	expected_phase = io_cq->phase;
+
+	cdesc = (struct ena_eth_io_tx_cdesc *)
+		((uintptr_t)io_cq->cdesc_addr.virt_addr +
+		(masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+	/* When the current completion descriptor phase isn't the same as the
+	 * expected, it mean that the device still didn't update
+	 * this completion.
+	 */
+	cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+	if (cdesc_phase != expected_phase)
+		return -EAGAIN;
+
+	dma_rmb();
+
+	*req_id = READ_ONCE(cdesc->req_id);
+	if (unlikely(*req_id >= io_cq->q_depth)) {
+		pr_err("Invalid req id %d\n", cdesc->req_id);
+		return -EINVAL;
+	}
+
+	ena_com_cq_inc_head(io_cq);
+
+	return 0;
+}
+
 #endif /* ENA_ETH_COM_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
index f320c58..00e0f05 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h

@@ -33,25 +33,18 @@
 #define _ENA_ETH_IO_H_
 
 enum ena_eth_io_l3_proto_index {
-	ENA_ETH_IO_L3_PROTO_UNKNOWN	= 0,
-
-	ENA_ETH_IO_L3_PROTO_IPV4	= 8,
-
-	ENA_ETH_IO_L3_PROTO_IPV6	= 11,
-
-	ENA_ETH_IO_L3_PROTO_FCOE	= 21,
-
-	ENA_ETH_IO_L3_PROTO_ROCE	= 22,
+	ENA_ETH_IO_L3_PROTO_UNKNOWN                 = 0,
+	ENA_ETH_IO_L3_PROTO_IPV4                    = 8,
+	ENA_ETH_IO_L3_PROTO_IPV6                    = 11,
+	ENA_ETH_IO_L3_PROTO_FCOE                    = 21,
+	ENA_ETH_IO_L3_PROTO_ROCE                    = 22,
 };
 
 enum ena_eth_io_l4_proto_index {
-	ENA_ETH_IO_L4_PROTO_UNKNOWN		= 0,
-
-	ENA_ETH_IO_L4_PROTO_TCP			= 12,
-
-	ENA_ETH_IO_L4_PROTO_UDP			= 13,
-
-	ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE	= 23,
+	ENA_ETH_IO_L4_PROTO_UNKNOWN                 = 0,
+	ENA_ETH_IO_L4_PROTO_TCP                     = 12,
+	ENA_ETH_IO_L4_PROTO_UDP                     = 13,
+	ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE          = 23,
 };
 
 struct ena_eth_io_tx_desc {
@@ -242,9 +235,13 @@
 	 *    checksum error detected, or, the controller didn't
 	 *    validate the checksum. This bit is valid only when
 	 *    l4_proto_idx indicates TCP/UDP packet, and,
-	 *    ipv4_frag is not set
+	 *    ipv4_frag is not set. This bit is valid only when
+	 *    l4_csum_checked below is set.
 	 * 15 : ipv4_frag - Indicates IPv4 fragmented packet
-	 * 23:16 : reserved16
+	 * 16 : l4_csum_checked - L4 checksum was verified
+	 *    (could be OK or error), when cleared the status of
+	 *    checksum is unknown
+	 * 23:17 : reserved17 - MBZ
 	 * 24 : phase
 	 * 25 : l3_csum2 - second checksum engine result
 	 * 26 : first - Indicates first descriptor in
@@ -303,114 +300,116 @@
 };
 
 /* tx_desc */
-#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
-#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
-#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
-#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
-#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
-#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
-#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
-#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
-#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
-#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
-#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
-#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
-#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
-#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
-#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK                      GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT                  16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK                   GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT                  23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK                   BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT                      24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK                       BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT                      26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK                       BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT                       27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK                        BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT                   28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK                    BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK                GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT                         4
+#define ENA_ETH_IO_TX_DESC_DF_MASK                          BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT                     7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK                      BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT               8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK                GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT                 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK                  BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT                 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK                  BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT           15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK            BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT            17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK             BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT                  22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK                   GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK                     GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT              24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK               GENMASK(31, 24)
 
 /* tx_meta_desc */
-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
-#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
-#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
-#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
-#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
-#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
-#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
-#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
-#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK              GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT             14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK              BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT                16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK                 GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT         20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK          BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT            21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK             BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT             23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK              BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT                 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK                  BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT                 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK                  BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT                  27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK                   BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT              28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK               BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK              GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK             GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT            8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK             GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT   16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK    GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT                22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK                 GENMASK(31, 22)
 
 /* tx_cdesc */
-#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK                      BIT(0)
 
 /* rx_desc */
-#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
-#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
-#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
-#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
-#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
-#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
-#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK                       BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT                      2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK                       BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT                       3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK                        BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT                   4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK                    BIT(4)
 
 /* rx_cdesc_base */
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK          GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT         5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK          GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT         8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK          GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT          13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK           BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT          14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK           BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT            15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK             BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT      16
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK       BIT(16)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT                24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK                 BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT             25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK              BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT                26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK                 BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT                 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK                  BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT               30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK                BIT(30)
 
 /* intr_reg */
-#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK              GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT             15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK              GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT               30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK                BIT(30)
 
 /* numa_node_cfg_reg */
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK              GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT          31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK           BIT(31)
 
 #endif /*_ENA_ETH_IO_H_ */

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 521607b..16553d9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c

@@ -81,21 +81,24 @@
 	ENA_STAT_TX_ENTRY(doorbells),
 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
 	ENA_STAT_TX_ENTRY(bad_req_id),
+	ENA_STAT_TX_ENTRY(llq_buffer_copy),
 	ENA_STAT_TX_ENTRY(missed_tx),
 };
 
 static const struct ena_stats ena_stats_rx_strings[] = {
 	ENA_STAT_RX_ENTRY(cnt),
 	ENA_STAT_RX_ENTRY(bytes),
+	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+	ENA_STAT_RX_ENTRY(csum_good),
 	ENA_STAT_RX_ENTRY(refil_partial),
 	ENA_STAT_RX_ENTRY(bad_csum),
 	ENA_STAT_RX_ENTRY(page_alloc_fail),
 	ENA_STAT_RX_ENTRY(skb_alloc_fail),
 	ENA_STAT_RX_ENTRY(dma_mapping_err),
 	ENA_STAT_RX_ENTRY(bad_desc_num),
-	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
 	ENA_STAT_RX_ENTRY(bad_req_id),
 	ENA_STAT_RX_ENTRY(empty_rx_ring),
+	ENA_STAT_RX_ENTRY(csum_unchecked),
 };
 
 static const struct ena_stats ena_stats_ena_com_strings[] = {
@@ -302,32 +305,21 @@
 {
 	struct ena_adapter *adapter = netdev_priv(net_dev);
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
-	struct ena_intr_moder_entry intr_moder_entry;
 
 	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
 		/* the devie doesn't support interrupt moderation */
 		return -EOPNOTSUPP;
 	}
+
 	coalesce->tx_coalesce_usecs =
-		ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) /
+		ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) *
 			ena_dev->intr_delay_resolution;
-	if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+
+	if (!ena_com_get_adaptive_moderation_enabled(ena_dev))
 		coalesce->rx_coalesce_usecs =
 			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
-			/ ena_dev->intr_delay_resolution;
-	} else {
-		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
-		coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval;
-		coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval;
+			* ena_dev->intr_delay_resolution;
 
-		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
-		coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval;
-		coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval;
-
-		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
-		coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval;
-		coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval;
-	}
 	coalesce->use_adaptive_rx_coalesce =
 		ena_com_get_adaptive_moderation_enabled(ena_dev);
 
@@ -345,12 +337,22 @@
 		adapter->tx_ring[i].smoothed_interval = val;
 }
 
+static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter)
+{
+	unsigned int val;
+	int i;
+
+	val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev);
+
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->rx_ring[i].smoothed_interval = val;
+}
+
 static int ena_set_coalesce(struct net_device *net_dev,
 			    struct ethtool_coalesce *coalesce)
 {
 	struct ena_adapter *adapter = netdev_priv(net_dev);
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
-	struct ena_intr_moder_entry intr_moder_entry;
 	int rc;
 
 	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
@@ -358,22 +360,6 @@
 		return -EOPNOTSUPP;
 	}
 
-	if (coalesce->rx_coalesce_usecs_irq ||
-	    coalesce->rx_max_coalesced_frames_irq ||
-	    coalesce->tx_coalesce_usecs_irq ||
-	    coalesce->tx_max_coalesced_frames ||
-	    coalesce->tx_max_coalesced_frames_irq ||
-	    coalesce->stats_block_coalesce_usecs ||
-	    coalesce->use_adaptive_tx_coalesce ||
-	    coalesce->pkt_rate_low ||
-	    coalesce->tx_coalesce_usecs_low ||
-	    coalesce->tx_max_coalesced_frames_low ||
-	    coalesce->pkt_rate_high ||
-	    coalesce->tx_coalesce_usecs_high ||
-	    coalesce->tx_max_coalesced_frames_high ||
-	    coalesce->rate_sample_interval)
-		return -EINVAL;
-
 	rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
 							       coalesce->tx_coalesce_usecs);
 	if (rc)
@@ -381,37 +367,23 @@
 
 	ena_update_tx_rings_intr_moderation(adapter);
 
-	if (ena_com_get_adaptive_moderation_enabled(ena_dev)) {
-		if (!coalesce->use_adaptive_rx_coalesce) {
-			ena_com_disable_adaptive_moderation(ena_dev);
-			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
-									       coalesce->rx_coalesce_usecs);
-			return rc;
-		}
-	} else { /* was in non-adaptive mode */
-		if (coalesce->use_adaptive_rx_coalesce) {
+	if (coalesce->use_adaptive_rx_coalesce) {
+		if (!ena_com_get_adaptive_moderation_enabled(ena_dev))
 			ena_com_enable_adaptive_moderation(ena_dev);
-		} else {
-			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
-									       coalesce->rx_coalesce_usecs);
-			return rc;
-		}
+		return 0;
 	}
 
-	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low;
-	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low;
-	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
-	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+	rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+							       coalesce->rx_coalesce_usecs);
+	if (rc)
+		return rc;
 
-	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs;
-	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames;
-	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
-	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+	ena_update_rx_rings_intr_moderation(adapter);
 
-	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high;
-	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high;
-	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
-	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+	if (!coalesce->use_adaptive_rx_coalesce) {
+		if (ena_com_get_adaptive_moderation_enabled(ena_dev))
+			ena_com_disable_adaptive_moderation(ena_dev);
+	}
 
 	return 0;
 }
@@ -445,13 +417,32 @@
 			      struct ethtool_ringparam *ring)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
-	struct ena_ring *tx_ring = &adapter->tx_ring[0];
-	struct ena_ring *rx_ring = &adapter->rx_ring[0];
 
-	ring->rx_max_pending = rx_ring->ring_size;
-	ring->tx_max_pending = tx_ring->ring_size;
-	ring->rx_pending = rx_ring->ring_size;
-	ring->tx_pending = tx_ring->ring_size;
+	ring->tx_max_pending = adapter->max_tx_ring_size;
+	ring->rx_max_pending = adapter->max_rx_ring_size;
+	ring->tx_pending = adapter->tx_ring[0].ring_size;
+	ring->rx_pending = adapter->rx_ring[0].ring_size;
+}
+
+static int ena_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	u32 new_tx_size, new_rx_size;
+
+	new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ?
+			ENA_MIN_RING_SIZE : ring->tx_pending;
+	new_tx_size = rounddown_pow_of_two(new_tx_size);
+
+	new_rx_size = ring->rx_pending < ENA_MIN_RING_SIZE ?
+			ENA_MIN_RING_SIZE : ring->rx_pending;
+	new_rx_size = rounddown_pow_of_two(new_rx_size);
+
+	if (new_tx_size == adapter->requested_tx_ring_size &&
+	    new_rx_size == adapter->requested_rx_ring_size)
+		return 0;
+
+	return ena_update_queue_sizes(adapter, new_tx_size, new_rx_size);
 }
 
 static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
@@ -695,8 +686,8 @@
 	if (indir) {
 		for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
 			rc = ena_com_indirect_table_fill_entry(ena_dev,
-							       ENA_IO_RXQ_IDX(indir[i]),
-							       i);
+							       i,
+							       ENA_IO_RXQ_IDX(indir[i]));
 			if (unlikely(rc)) {
 				netif_err(adapter, drv, netdev,
 					  "Cannot fill indirect table (index is too large)\n");
@@ -805,6 +796,7 @@
 	.get_coalesce		= ena_get_coalesce,
 	.set_coalesce		= ena_set_coalesce,
 	.get_ringparam		= ena_get_ringparam,
+	.set_ringparam		= ena_set_ringparam,
 	.get_sset_count         = ena_get_sset_count,
 	.get_strings		= ena_get_strings,
 	.get_ethtool_stats      = ena_get_ethtool_stats,

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 4b73131..c487d2a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c

@@ -39,7 +39,6 @@
 #include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/numa.h>
 #include <linux/pci.h>
 #include <linux/utsname.h>
@@ -159,7 +158,6 @@
 	ring->adapter = adapter;
 	ring->ena_dev = adapter->ena_dev;
 	ring->per_napi_packets = 0;
-	ring->per_napi_bytes = 0;
 	ring->cpu = 0;
 	ring->first_interrupt = false;
 	ring->no_interrupt_event_cnt = 0;
@@ -183,7 +181,7 @@
 		ena_init_io_rings_common(adapter, rxr, i);
 
 		/* TX specific ring state */
-		txr->ring_size = adapter->tx_ring_size;
+		txr->ring_size = adapter->requested_tx_ring_size;
 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 		txr->sgl_size = adapter->max_tx_sgl_size;
@@ -191,12 +189,13 @@
 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 
 		/* RX specific ring state */
-		rxr->ring_size = adapter->rx_ring_size;
+		rxr->ring_size = adapter->requested_rx_ring_size;
 		rxr->rx_copybreak = adapter->rx_copybreak;
 		rxr->sgl_size = adapter->max_rx_sgl_size;
 		rxr->smoothed_interval =
 			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 		rxr->empty_rx_queue = 0;
+		adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	}
 }
 
@@ -225,22 +224,28 @@
 	if (!tx_ring->tx_buffer_info) {
 		tx_ring->tx_buffer_info = vzalloc(size);
 		if (!tx_ring->tx_buffer_info)
-			return -ENOMEM;
+			goto err_tx_buffer_info;
 	}
 
 	size = sizeof(u16) * tx_ring->ring_size;
-	tx_ring->free_tx_ids = vzalloc_node(size, node);
-	if (!tx_ring->free_tx_ids) {
-		tx_ring->free_tx_ids = vzalloc(size);
-		if (!tx_ring->free_tx_ids) {
-			vfree(tx_ring->tx_buffer_info);
-			return -ENOMEM;
-		}
+	tx_ring->free_ids = vzalloc_node(size, node);
+	if (!tx_ring->free_ids) {
+		tx_ring->free_ids = vzalloc(size);
+		if (!tx_ring->free_ids)
+			goto err_tx_free_ids;
+	}
+
+	size = tx_ring->tx_max_header_size;
+	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
+	if (!tx_ring->push_buf_intermediate_buf) {
+		tx_ring->push_buf_intermediate_buf = vzalloc(size);
+		if (!tx_ring->push_buf_intermediate_buf)
+			goto err_push_buf_intermediate_buf;
 	}
 
 	/* Req id ring for TX out of order completions */
 	for (i = 0; i < tx_ring->ring_size; i++)
-		tx_ring->free_tx_ids[i] = i;
+		tx_ring->free_ids[i] = i;
 
 	/* Reset tx statistics */
 	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
@@ -249,6 +254,15 @@
 	tx_ring->next_to_clean = 0;
 	tx_ring->cpu = ena_irq->cpu;
 	return 0;
+
+err_push_buf_intermediate_buf:
+	vfree(tx_ring->free_ids);
+	tx_ring->free_ids = NULL;
+err_tx_free_ids:
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+err_tx_buffer_info:
+	return -ENOMEM;
 }
 
 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
@@ -264,8 +278,11 @@
 	vfree(tx_ring->tx_buffer_info);
 	tx_ring->tx_buffer_info = NULL;
 
-	vfree(tx_ring->free_tx_ids);
-	tx_ring->free_tx_ids = NULL;
+	vfree(tx_ring->free_ids);
+	tx_ring->free_ids = NULL;
+
+	vfree(tx_ring->push_buf_intermediate_buf);
+	tx_ring->push_buf_intermediate_buf = NULL;
 }
 
 /* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
@@ -309,7 +326,7 @@
 		ena_free_tx_resources(adapter, i);
 }
 
-static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
+static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
 {
 	if (likely(req_id < rx_ring->ring_size))
 		return 0;
@@ -360,18 +377,19 @@
 	}
 
 	size = sizeof(u16) * rx_ring->ring_size;
-	rx_ring->free_rx_ids = vzalloc_node(size, node);
-	if (!rx_ring->free_rx_ids) {
-		rx_ring->free_rx_ids = vzalloc(size);
-		if (!rx_ring->free_rx_ids) {
+	rx_ring->free_ids = vzalloc_node(size, node);
+	if (!rx_ring->free_ids) {
+		rx_ring->free_ids = vzalloc(size);
+		if (!rx_ring->free_ids) {
 			vfree(rx_ring->rx_buffer_info);
+			rx_ring->rx_buffer_info = NULL;
 			return -ENOMEM;
 		}
 	}
 
 	/* Req id ring for receiving RX pkts out of order */
 	for (i = 0; i < rx_ring->ring_size; i++)
-		rx_ring->free_rx_ids[i] = i;
+		rx_ring->free_ids[i] = i;
 
 	/* Reset rx statistics */
 	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
@@ -397,8 +415,8 @@
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
-	vfree(rx_ring->free_rx_ids);
-	rx_ring->free_rx_ids = NULL;
+	vfree(rx_ring->free_ids);
+	rx_ring->free_ids = NULL;
 }
 
 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
@@ -442,7 +460,7 @@
 		ena_free_rx_resources(adapter, i);
 }
 
-static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
+static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 				    struct ena_rx_buffer *rx_info, gfp_t gfp)
 {
 	struct ena_com_buf *ena_buf;
@@ -513,7 +531,7 @@
 	for (i = 0; i < num; i++) {
 		struct ena_rx_buffer *rx_info;
 
-		req_id = rx_ring->free_rx_ids[next_to_use];
+		req_id = rx_ring->free_ids[next_to_use];
 		rc = validate_rx_req_id(rx_ring, req_id);
 		if (unlikely(rc < 0))
 			break;
@@ -576,7 +594,6 @@
 
 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
  * @adapter: board private structure
- *
  */
 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 {
@@ -603,6 +620,36 @@
 		ena_free_rx_bufs(adapter, i);
 }
 
+static void ena_unmap_tx_skb(struct ena_ring *tx_ring,
+				    struct ena_tx_buffer *tx_info)
+{
+	struct ena_com_buf *ena_buf;
+	u32 cnt;
+	int i;
+
+	ena_buf = tx_info->bufs;
+	cnt = tx_info->num_of_bufs;
+
+	if (unlikely(!cnt))
+		return;
+
+	if (tx_info->map_linear_data) {
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(ena_buf, paddr),
+				 dma_unmap_len(ena_buf, len),
+				 DMA_TO_DEVICE);
+		ena_buf++;
+		cnt--;
+	}
+
+	/* unmap remaining mapped pages */
+	for (i = 0; i < cnt; i++) {
+		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+		ena_buf++;
+	}
+}
+
 /* ena_free_tx_bufs - Free Tx Buffers per Queue
  * @tx_ring: TX ring for which buffers be freed
  */
@@ -613,9 +660,6 @@
 
 	for (i = 0; i < tx_ring->ring_size; i++) {
 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
-		struct ena_com_buf *ena_buf;
-		int nr_frags;
-		int j;
 
 		if (!tx_info->skb)
 			continue;
@@ -631,21 +675,7 @@
 				   tx_ring->qid, i);
 		}
 
-		ena_buf = tx_info->bufs;
-		dma_unmap_single(tx_ring->dev,
-				 ena_buf->paddr,
-				 ena_buf->len,
-				 DMA_TO_DEVICE);
-
-		/* unmap remaining mapped pages */
-		nr_frags = tx_info->num_of_bufs - 1;
-		for (j = 0; j < nr_frags; j++) {
-			ena_buf++;
-			dma_unmap_page(tx_ring->dev,
-				       ena_buf->paddr,
-				       ena_buf->len,
-				       DMA_TO_DEVICE);
-		}
+		ena_unmap_tx_skb(tx_ring, tx_info);
 
 		dev_kfree_skb_any(tx_info->skb);
 	}
@@ -682,6 +712,7 @@
 
 	for (i = 0; i < adapter->num_queues; i++) {
 		ena_qid = ENA_IO_RXQ_IDX(i);
+		cancel_work_sync(&adapter->ena_napi[i].dim.work);
 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 	}
 }
@@ -736,8 +767,6 @@
 	while (tx_pkts < budget) {
 		struct ena_tx_buffer *tx_info;
 		struct sk_buff *skb;
-		struct ena_com_buf *ena_buf;
-		int i, nr_frags;
 
 		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
 						&req_id);
@@ -757,24 +786,7 @@
 		tx_info->skb = NULL;
 		tx_info->last_jiffies = 0;
 
-		if (likely(tx_info->num_of_bufs != 0)) {
-			ena_buf = tx_info->bufs;
-
-			dma_unmap_single(tx_ring->dev,
-					 dma_unmap_addr(ena_buf, paddr),
-					 dma_unmap_len(ena_buf, len),
-					 DMA_TO_DEVICE);
-
-			/* unmap remaining mapped pages */
-			nr_frags = tx_info->num_of_bufs - 1;
-			for (i = 0; i < nr_frags; i++) {
-				ena_buf++;
-				dma_unmap_page(tx_ring->dev,
-					       dma_unmap_addr(ena_buf, paddr),
-					       dma_unmap_len(ena_buf, len),
-					       DMA_TO_DEVICE);
-			}
-		}
+		ena_unmap_tx_skb(tx_ring, tx_info);
 
 		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
@@ -785,7 +797,7 @@
 		tx_pkts++;
 		total_done += tx_info->tx_descs;
 
-		tx_ring->free_tx_ids[next_to_clean] = req_id;
+		tx_ring->free_ids[next_to_clean] = req_id;
 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
 						     tx_ring->ring_size);
 	}
@@ -805,13 +817,15 @@
 	 */
 	smp_mb();
 
-	above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
-		ENA_TX_WAKEUP_THRESH;
+	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+						    ENA_TX_WAKEUP_THRESH);
 	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
 		__netif_tx_lock(txq, smp_processor_id());
-		above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
-			ENA_TX_WAKEUP_THRESH;
-		if (netif_tx_queue_stopped(txq) && above_thresh) {
+		above_thresh =
+			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+						     ENA_TX_WAKEUP_THRESH);
+		if (netif_tx_queue_stopped(txq) && above_thresh &&
+		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
 			netif_tx_wake_queue(txq);
 			u64_stats_update_begin(&tx_ring->syncp);
 			tx_ring->tx_stats.queue_wakeup++;
@@ -820,9 +834,6 @@
 		__netif_tx_unlock(txq);
 	}
 
-	tx_ring->per_napi_bytes += tx_bytes;
-	tx_ring->per_napi_packets += tx_pkts;
-
 	return tx_pkts;
 }
 
@@ -898,7 +909,7 @@
 
 		skb_put(skb, len);
 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-		rx_ring->free_rx_ids[*next_to_clean] = req_id;
+		rx_ring->free_ids[*next_to_clean] = req_id;
 		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
 						     rx_ring->ring_size);
 		return skb;
@@ -922,7 +933,7 @@
 
 		rx_info->page = NULL;
 
-		rx_ring->free_rx_ids[*next_to_clean] = req_id;
+		rx_ring->free_ids[*next_to_clean] = req_id;
 		*next_to_clean =
 			ENA_RX_RING_IDX_NEXT(*next_to_clean,
 					     rx_ring->ring_size);
@@ -943,7 +954,7 @@
  * @ena_rx_ctx: received packet context/metadata
  * @skb: skb currently being received and modified
  */
-static inline void ena_rx_checksum(struct ena_ring *rx_ring,
+static void ena_rx_checksum(struct ena_ring *rx_ring,
 				   struct ena_com_rx_ctx *ena_rx_ctx,
 				   struct sk_buff *skb)
 {
@@ -986,8 +997,22 @@
 			return;
 		}
 
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (likely(ena_rx_ctx->l4_csum_checked)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.csum_good++;
+			u64_stats_update_end(&rx_ring->syncp);
+		} else {
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.csum_unchecked++;
+			u64_stats_update_end(&rx_ring->syncp);
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
 	}
+
 }
 
 static void ena_set_rx_hash(struct ena_ring *rx_ring,
@@ -1064,7 +1089,7 @@
 		/* exit if we failed to retrieve a buffer */
 		if (unlikely(!skb)) {
 			for (i = 0; i < ena_rx_ctx.descs; i++) {
-				rx_ring->free_tx_ids[next_to_clean] =
+				rx_ring->free_ids[next_to_clean] =
 					rx_ring->ena_bufs[i].req_id;
 				next_to_clean =
 					ENA_RX_RING_IDX_NEXT(next_to_clean,
@@ -1092,7 +1117,6 @@
 	} while (likely(res_budget));
 
 	work_done = budget - res_budget;
-	rx_ring->per_napi_bytes += total_len;
 	rx_ring->per_napi_packets += work_done;
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->rx_stats.bytes += total_len;
@@ -1102,8 +1126,10 @@
 
 	rx_ring->next_to_clean = next_to_clean;
 
-	refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
-	refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
+	refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
+	refill_threshold =
+		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
+		      ENA_RX_REFILL_THRESH_PACKET);
 
 	/* Optimization, try to batch new rx buffers */
 	if (refill_required > refill_threshold) {
@@ -1127,35 +1153,50 @@
 	return 0;
 }
 
-inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
-				       struct ena_ring *tx_ring)
+static void ena_dim_work(struct work_struct *w)
 {
-	/* We apply adaptive moderation on Rx path only.
-	 * Tx uses static interrupt moderation.
-	 */
-	ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
-					  rx_ring->per_napi_packets,
-					  rx_ring->per_napi_bytes,
-					  &rx_ring->smoothed_interval,
-					  &rx_ring->moder_tbl_idx);
+	struct dim *dim = container_of(w, struct dim, work);
+	struct dim_cq_moder cur_moder =
+		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
 
-	/* Reset per napi packets/bytes */
-	tx_ring->per_napi_packets = 0;
-	tx_ring->per_napi_bytes = 0;
-	rx_ring->per_napi_packets = 0;
-	rx_ring->per_napi_bytes = 0;
+	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
+	dim->state = DIM_START_MEASURE;
 }
 
-static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
+{
+	struct dim_sample dim_sample;
+	struct ena_ring *rx_ring = ena_napi->rx_ring;
+
+	if (!rx_ring->per_napi_packets)
+		return;
+
+	rx_ring->non_empty_napi_events++;
+
+	dim_update_sample(rx_ring->non_empty_napi_events,
+			  rx_ring->rx_stats.cnt,
+			  rx_ring->rx_stats.bytes,
+			  &dim_sample);
+
+	net_dim(&ena_napi->dim, dim_sample);
+
+	rx_ring->per_napi_packets = 0;
+}
+
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 					struct ena_ring *rx_ring)
 {
 	struct ena_eth_io_intr_reg intr_reg;
+	u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
+		rx_ring->smoothed_interval :
+		ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
 
 	/* Update intr register: rx intr delay,
 	 * tx intr delay and interrupt unmask
 	 */
 	ena_com_update_intr_reg(&intr_reg,
-				rx_ring->smoothed_interval,
+				rx_interval,
 				tx_ring->smoothed_interval,
 				true);
 
@@ -1166,7 +1207,7 @@
 	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
 }
 
-static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 					     struct ena_ring *rx_ring)
 {
 	int cpu = get_cpu();
@@ -1232,9 +1273,11 @@
 		 * from the interrupt context (vs from sk_busy_loop)
 		 */
 		if (napi_complete_done(napi, rx_work_done)) {
-			/* Tx and Rx share the same interrupt vector */
+			/* We apply adaptive moderation on Rx path only.
+			 * Tx uses static interrupt moderation.
+			 */
 			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
-				ena_adjust_intr_moderation(rx_ring, tx_ring);
+				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
 
 			ena_unmask_interrupt(tx_ring, rx_ring);
 		}
@@ -1300,7 +1343,6 @@
 
 	/* Reserved the max msix vectors we might need */
 	msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
-
 	netif_dbg(adapter, probe, adapter->netdev,
 		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
 
@@ -1525,14 +1567,6 @@
 		napi_enable(&adapter->ena_napi[i].napi);
 }
 
-static void ena_restore_ethtool_params(struct ena_adapter *adapter)
-{
-	adapter->tx_usecs = 0;
-	adapter->rx_usecs = 0;
-	adapter->tx_frames = 1;
-	adapter->rx_frames = 1;
-}
-
 /* Configure the Rx forwarding */
 static int ena_rss_configure(struct ena_adapter *adapter)
 {
@@ -1582,8 +1616,6 @@
 	/* enable transmits */
 	netif_tx_start_all_queues(adapter->netdev);
 
-	ena_restore_ethtool_params(adapter);
-
 	ena_napi_enable_all(adapter);
 
 	return 0;
@@ -1591,7 +1623,7 @@
 
 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 {
-	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_com_create_io_ctx ctx;
 	struct ena_com_dev *ena_dev;
 	struct ena_ring *tx_ring;
 	u32 msix_vector;
@@ -1604,11 +1636,13 @@
 	msix_vector = ENA_IO_IRQ_IDX(qid);
 	ena_qid = ENA_IO_TXQ_IDX(qid);
 
+	memset(&ctx, 0x0, sizeof(ctx));
+
 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
 	ctx.qid = ena_qid;
 	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
 	ctx.msix_vector = msix_vector;
-	ctx.queue_size = adapter->tx_ring_size;
+	ctx.queue_size = tx_ring->ring_size;
 	ctx.numa_node = cpu_to_node(tx_ring->cpu);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1657,7 +1691,7 @@
 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
 {
 	struct ena_com_dev *ena_dev;
-	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_com_create_io_ctx ctx;
 	struct ena_ring *rx_ring;
 	u32 msix_vector;
 	u16 ena_qid;
@@ -1669,11 +1703,13 @@
 	msix_vector = ENA_IO_IRQ_IDX(qid);
 	ena_qid = ENA_IO_RXQ_IDX(qid);
 
+	memset(&ctx, 0x0, sizeof(ctx));
+
 	ctx.qid = ena_qid;
 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
 	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
 	ctx.msix_vector = msix_vector;
-	ctx.queue_size = adapter->rx_ring_size;
+	ctx.queue_size = rx_ring->ring_size;
 	ctx.numa_node = cpu_to_node(rx_ring->cpu);
 
 	rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1709,17 +1745,126 @@
 		rc = ena_create_io_rx_queue(adapter, i);
 		if (rc)
 			goto create_err;
+		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
 	}
 
 	return 0;
 
 create_err:
-	while (i--)
+	while (i--) {
+		cancel_work_sync(&adapter->ena_napi[i].dim.work);
 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
+	}
 
 	return rc;
 }
 
+static void set_io_rings_size(struct ena_adapter *adapter,
+				     int new_tx_size, int new_rx_size)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		adapter->tx_ring[i].ring_size = new_tx_size;
+		adapter->rx_ring[i].ring_size = new_rx_size;
+	}
+}
+
+/* This function allows queue allocation to backoff when the system is
+ * low on memory. If there is not enough memory to allocate io queues
+ * the driver will try to allocate smaller queues.
+ *
+ * The backoff algorithm is as follows:
+ *  1. Try to allocate TX and RX and if successful.
+ *  1.1. return success
+ *
+ *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
+ *
+ *  3. If TX or RX is smaller than 256
+ *  3.1. return failure.
+ *  4. else
+ *  4.1. go back to 1.
+ */
+static int create_queues_with_size_backoff(struct ena_adapter *adapter)
+{
+	int rc, cur_rx_ring_size, cur_tx_ring_size;
+	int new_rx_ring_size, new_tx_ring_size;
+
+	/* current queue sizes might be set to smaller than the requested
+	 * ones due to past queue allocation failures.
+	 */
+	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
+			  adapter->requested_rx_ring_size);
+
+	while (1) {
+		rc = ena_setup_all_tx_resources(adapter);
+		if (rc)
+			goto err_setup_tx;
+
+		rc = ena_create_all_io_tx_queues(adapter);
+		if (rc)
+			goto err_create_tx_queues;
+
+		rc = ena_setup_all_rx_resources(adapter);
+		if (rc)
+			goto err_setup_rx;
+
+		rc = ena_create_all_io_rx_queues(adapter);
+		if (rc)
+			goto err_create_rx_queues;
+
+		return 0;
+
+err_create_rx_queues:
+		ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+		ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+		ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+		if (rc != -ENOMEM) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Queue creation failed with error code %d\n",
+				  rc);
+			return rc;
+		}
+
+		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
+		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
+
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
+			  cur_tx_ring_size, cur_rx_ring_size);
+
+		new_tx_ring_size = cur_tx_ring_size;
+		new_rx_ring_size = cur_rx_ring_size;
+
+		/* Decrease the size of the larger queue, or
+		 * decrease both if they are the same size.
+		 */
+		if (cur_rx_ring_size <= cur_tx_ring_size)
+			new_tx_ring_size = cur_tx_ring_size / 2;
+		if (cur_rx_ring_size >= cur_tx_ring_size)
+			new_rx_ring_size = cur_rx_ring_size / 2;
+
+		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
+		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
+				  ENA_MIN_RING_SIZE);
+			return rc;
+		}
+
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
+			  new_tx_ring_size,
+			  new_rx_ring_size);
+
+		set_io_rings_size(adapter, new_tx_ring_size,
+				  new_rx_ring_size);
+	}
+}
+
 static int ena_up(struct ena_adapter *adapter)
 {
 	int rc, i;
@@ -1739,25 +1884,9 @@
 	if (rc)
 		goto err_req_irq;
 
-	/* allocate transmit descriptors */
-	rc = ena_setup_all_tx_resources(adapter);
+	rc = create_queues_with_size_backoff(adapter);
 	if (rc)
-		goto err_setup_tx;
-
-	/* allocate receive descriptors */
-	rc = ena_setup_all_rx_resources(adapter);
-	if (rc)
-		goto err_setup_rx;
-
-	/* Create TX queues */
-	rc = ena_create_all_io_tx_queues(adapter);
-	if (rc)
-		goto err_create_tx_queues;
-
-	/* Create RX queues */
-	rc = ena_create_all_io_rx_queues(adapter);
-	if (rc)
-		goto err_create_rx_queues;
+		goto err_create_queues_with_backoff;
 
 	rc = ena_up_complete(adapter);
 	if (rc)
@@ -1786,16 +1915,14 @@
 	return rc;
 
 err_up:
-	ena_destroy_all_rx_queues(adapter);
-err_create_rx_queues:
 	ena_destroy_all_tx_queues(adapter);
-err_create_tx_queues:
-	ena_free_all_io_rx_resources(adapter);
-err_setup_rx:
 	ena_free_all_io_tx_resources(adapter);
-err_setup_tx:
+	ena_destroy_all_rx_queues(adapter);
+	ena_free_all_io_rx_resources(adapter);
+err_create_queues_with_backoff:
 	ena_free_io_irq(adapter);
 err_req_irq:
+	ena_del_napi(adapter);
 
 	return rc;
 }
@@ -1824,6 +1951,8 @@
 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
 		if (rc)
 			dev_err(&adapter->pdev->dev, "Device reset failed\n");
+		/* stop submitting admin commands on a device that was reset */
+		ena_com_set_admin_running_state(adapter->ena_dev, false);
 	}
 
 	ena_destroy_all_io_queues(adapter);
@@ -1890,6 +2019,9 @@
 
 	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
 
+	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+		return 0;
+
 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		ena_down(adapter);
 
@@ -1907,6 +2039,20 @@
 	return 0;
 }
 
+int ena_update_queue_sizes(struct ena_adapter *adapter,
+			   u32 new_tx_size,
+			   u32 new_rx_size)
+{
+	bool dev_up;
+
+	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	ena_close(adapter->netdev);
+	adapter->requested_tx_ring_size = new_tx_size;
+	adapter->requested_rx_ring_size = new_rx_size;
+	ena_init_io_rings(adapter);
+	return dev_up ? ena_up(adapter) : 0;
+}
+
 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
 {
 	u32 mss = skb_shinfo(skb)->gso_size;
@@ -1986,6 +2132,112 @@
 	return rc;
 }
 
+static int ena_tx_map_skb(struct ena_ring *tx_ring,
+			  struct ena_tx_buffer *tx_info,
+			  struct sk_buff *skb,
+			  void **push_hdr,
+			  u16 *header_len)
+{
+	struct ena_adapter *adapter = tx_ring->adapter;
+	struct ena_com_buf *ena_buf;
+	dma_addr_t dma;
+	u32 skb_head_len, frag_len, last_frag;
+	u16 push_len = 0;
+	u16 delta = 0;
+	int i = 0;
+
+	skb_head_len = skb_headlen(skb);
+	tx_info->skb = skb;
+	ena_buf = tx_info->bufs;
+
+	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		/* When the device is LLQ mode, the driver will copy
+		 * the header into the device memory space.
+		 * the ena_com layer assume the header is in a linear
+		 * memory space.
+		 * This assumption might be wrong since part of the header
+		 * can be in the fragmented buffers.
+		 * Use skb_header_pointer to make sure the header is in a
+		 * linear memory space.
+		 */
+
+		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
+		*push_hdr = skb_header_pointer(skb, 0, push_len,
+					       tx_ring->push_buf_intermediate_buf);
+		*header_len = push_len;
+		if (unlikely(skb->data != *push_hdr)) {
+			u64_stats_update_begin(&tx_ring->syncp);
+			tx_ring->tx_stats.llq_buffer_copy++;
+			u64_stats_update_end(&tx_ring->syncp);
+
+			delta = push_len - skb_head_len;
+		}
+	} else {
+		*push_hdr = NULL;
+		*header_len = min_t(u32, skb_head_len,
+				    tx_ring->tx_max_header_size);
+	}
+
+	netif_dbg(adapter, tx_queued, adapter->netdev,
+		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
+		  *push_hdr, push_len);
+
+	if (skb_head_len > push_len) {
+		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
+				     skb_head_len - push_len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = skb_head_len - push_len;
+
+		ena_buf++;
+		tx_info->num_of_bufs++;
+		tx_info->map_linear_data = 1;
+	} else {
+		tx_info->map_linear_data = 0;
+	}
+
+	last_frag = skb_shinfo(skb)->nr_frags;
+
+	for (i = 0; i < last_frag; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		frag_len = skb_frag_size(frag);
+
+		if (unlikely(delta >= frag_len)) {
+			delta -= frag_len;
+			continue;
+		}
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
+				       frag_len - delta, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = frag_len - delta;
+		ena_buf++;
+		tx_info->num_of_bufs++;
+		delta = 0;
+	}
+
+	return 0;
+
+error_report_dma_error:
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.dma_mapping_err++;
+	u64_stats_update_end(&tx_ring->syncp);
+	netdev_warn(adapter->netdev, "failed to map skb\n");
+
+	tx_info->skb = NULL;
+
+	tx_info->num_of_bufs += i;
+	ena_unmap_tx_skb(tx_ring, tx_info);
+
+	return -EINVAL;
+}
+
 /* Called with netif_tx_lock. */
 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -1994,16 +2246,9 @@
 	struct ena_com_tx_ctx ena_tx_ctx;
 	struct ena_ring *tx_ring;
 	struct netdev_queue *txq;
-	struct ena_com_buf *ena_buf;
 	void *push_hdr;
-	u32 len, last_frag;
-	u16 next_to_use;
-	u16 req_id;
-	u16 push_len;
-	u16 header_len;
-	dma_addr_t dma;
+	u16 next_to_use, req_id, header_len;
 	int qid, rc, nb_hw_desc;
-	int i = -1;
 
 	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
 	/*  Determine which tx ring we will be placed on */
@@ -2016,62 +2261,17 @@
 		goto error_drop_packet;
 
 	skb_tx_timestamp(skb);
-	len = skb_headlen(skb);
 
 	next_to_use = tx_ring->next_to_use;
-	req_id = tx_ring->free_tx_ids[next_to_use];
+	req_id = tx_ring->free_ids[next_to_use];
 	tx_info = &tx_ring->tx_buffer_info[req_id];
 	tx_info->num_of_bufs = 0;
 
 	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
-	ena_buf = tx_info->bufs;
-	tx_info->skb = skb;
 
-	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		/* prepared the push buffer */
-		push_len = min_t(u32, len, tx_ring->tx_max_header_size);
-		header_len = push_len;
-		push_hdr = skb->data;
-	} else {
-		push_len = 0;
-		header_len = min_t(u32, len, tx_ring->tx_max_header_size);
-		push_hdr = NULL;
-	}
-
-	netif_dbg(adapter, tx_queued, dev,
-		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
-		  push_hdr, push_len);
-
-	if (len > push_len) {
-		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
-				     len - push_len, DMA_TO_DEVICE);
-		if (dma_mapping_error(tx_ring->dev, dma))
-			goto error_report_dma_error;
-
-		ena_buf->paddr = dma;
-		ena_buf->len = len - push_len;
-
-		ena_buf++;
-		tx_info->num_of_bufs++;
-	}
-
-	last_frag = skb_shinfo(skb)->nr_frags;
-
-	for (i = 0; i < last_frag; i++) {
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		len = skb_frag_size(frag);
-		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
-				       DMA_TO_DEVICE);
-		if (dma_mapping_error(tx_ring->dev, dma))
-			goto error_report_dma_error;
-
-		ena_buf->paddr = dma;
-		ena_buf->len = len;
-		ena_buf++;
-	}
-
-	tx_info->num_of_bufs += last_frag;
+	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
+	if (unlikely(rc))
+		goto error_drop_packet;
 
 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
 	ena_tx_ctx.ena_bufs = tx_info->bufs;
@@ -2083,18 +2283,33 @@
 	/* set flags and meta data */
 	ena_tx_csum(&ena_tx_ctx, skb);
 
+	if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) {
+		netif_dbg(adapter, tx_queued, dev,
+			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+			  qid);
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+	}
+
 	/* prepare the packet's descriptors to dma engine */
 	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
 				&nb_hw_desc);
 
+	/* ena_com_prepare_tx() can't fail due to overflow of tx queue,
+	 * since the number of free descriptors in the queue is checked
+	 * after sending the previous packet. In case there isn't enough
+	 * space in the queue for the next packet, it is stopped
+	 * until there is again enough available space in the queue.
+	 * All other failure reasons of ena_com_prepare_tx() are fatal
+	 * and therefore require a device reset.
+	 */
 	if (unlikely(rc)) {
 		netif_err(adapter, tx_queued, dev,
 			  "failed to prepare tx bufs\n");
 		u64_stats_update_begin(&tx_ring->syncp);
-		tx_ring->tx_stats.queue_stop++;
 		tx_ring->tx_stats.prepare_ctx_err++;
 		u64_stats_update_end(&tx_ring->syncp);
-		netif_tx_stop_queue(txq);
+		adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 		goto error_unmap_dma;
 	}
 
@@ -2116,8 +2331,8 @@
 	 * to sgl_size + 2. one for the meta descriptor and one for header
 	 * (if the header is larger than tx_max_header_size).
 	 */
-	if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
-		     (tx_ring->sgl_size + 2))) {
+	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+						   tx_ring->sgl_size + 2))) {
 		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
 			  __func__, qid);
 
@@ -2136,8 +2351,8 @@
 		 */
 		smp_mb();
 
-		if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
-				> ENA_TX_WAKEUP_THRESH) {
+		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+						 ENA_TX_WAKEUP_THRESH)) {
 			netif_tx_wake_queue(txq);
 			u64_stats_update_begin(&tx_ring->syncp);
 			tx_ring->tx_stats.queue_wakeup++;
@@ -2145,7 +2360,7 @@
 		}
 	}
 
-	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
 		/* trigger the dma engine. ena_com_write_sq_doorbell()
 		 * has a mb
 		 */
@@ -2157,42 +2372,17 @@
 
 	return NETDEV_TX_OK;
 
-error_report_dma_error:
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->tx_stats.dma_mapping_err++;
-	u64_stats_update_end(&tx_ring->syncp);
-	netdev_warn(adapter->netdev, "failed to map skb\n");
-
+error_unmap_dma:
+	ena_unmap_tx_skb(tx_ring, tx_info);
 	tx_info->skb = NULL;
 
-error_unmap_dma:
-	if (i >= 0) {
-		/* save value of frag that failed */
-		last_frag = i;
-
-		/* start back at beginning and unmap skb */
-		tx_info->skb = NULL;
-		ena_buf = tx_info->bufs;
-		dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
-				 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
-
-		/* unmap remaining mapped pages */
-		for (i = 0; i < last_frag; i++) {
-			ena_buf++;
-			dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
-				       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
-		}
-	}
-
 error_drop_packet:
-
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    struct net_device *sb_dev,
-			    select_queue_fallback_t fallback)
+			    struct net_device *sb_dev)
 {
 	u16 qid;
 	/* we suspect that this is good for in--kernel network services that
@@ -2202,12 +2392,13 @@
 	if (skb_rx_queue_recorded(skb))
 		qid = skb_get_rx_queue(skb);
 	else
-		qid = fallback(dev, skb, NULL);
+		qid = netdev_pick_tx(dev, skb, NULL);
 
 	return qid;
 }
 
-static void ena_config_host_info(struct ena_com_dev *ena_dev)
+static void ena_config_host_info(struct ena_com_dev *ena_dev,
+				 struct pci_dev *pdev)
 {
 	struct ena_admin_host_info *host_info;
 	int rc;
@@ -2221,9 +2412,10 @@
 
 	host_info = ena_dev->host_attr.host_info;
 
+	host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
 	host_info->os_type = ENA_ADMIN_OS_LINUX;
 	host_info->kernel_ver = LINUX_VERSION_CODE;
-	strncpy(host_info->kernel_ver_str, utsname()->version,
+	strlcpy(host_info->kernel_ver_str, utsname()->version,
 		sizeof(host_info->kernel_ver_str) - 1);
 	host_info->os_dist = 0;
 	strncpy(host_info->os_dist_str, utsname()->release,
@@ -2231,7 +2423,12 @@
 	host_info->driver_version =
 		(DRV_MODULE_VER_MAJOR) |
 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
-		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
+		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
+	host_info->num_cpus = num_online_cpus();
+
+	host_info->driver_supported_features =
+		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK;
 
 	rc = ena_com_set_host_attributes(ena_dev);
 	if (rc) {
@@ -2371,13 +2568,6 @@
 		return -EINVAL;
 	}
 
-	if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
-	    (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
-		netif_err(adapter, drv, netdev,
-			  "Error, device doesn't support enough queues\n");
-		return -EINVAL;
-	}
-
 	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
 		netif_err(adapter, drv, netdev,
 			  "Error, device max mtu is smaller than netdev MTU\n");
@@ -2442,7 +2632,7 @@
 	}
 
 	/* ENA admin level init */
-	rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
 	if (rc) {
 		dev_err(dev,
 			"Can not initialize ena admin queue with device\n");
@@ -2455,7 +2645,7 @@
 	 */
 	ena_com_set_admin_polling_mode(ena_dev, true);
 
-	ena_config_host_info(ena_dev);
+	ena_config_host_info(ena_dev, pdev);
 
 	/* Get Device Attributes*/
 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
@@ -2540,17 +2730,14 @@
 
 	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 	adapter->dev_up_before_reset = dev_up;
-
 	if (!graceful)
 		ena_com_set_admin_running_state(ena_dev, false);
 
 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		ena_down(adapter);
 
-	/* Before releasing the ENA resources, a device reset is required.
-	 * (to prevent the device from accessing them).
-	 * In case the reset flag is set and the device is up, ena_down()
-	 * already perform the reset, so it can be skipped.
+	/* Stop the device from sending AENQ events (in case reset flag is set
+	 *  and device is up, ena_down() already reset the device.
 	 */
 	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
 		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
@@ -2595,11 +2782,6 @@
 		goto err_device_destroy;
 	}
 
-	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
-	/* Make sure we don't have a race with AENQ Links state handler */
-	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
-		netif_carrier_on(adapter->netdev);
-
 	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
 						      adapter->num_queues);
 	if (rc) {
@@ -2616,8 +2798,15 @@
 	}
 
 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
+	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+		netif_carrier_on(adapter->netdev);
+
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
-	dev_err(&pdev->dev, "Device reset completed successfully\n");
+	dev_err(&pdev->dev,
+		"Device reset completed successfully, Driver info: %s\n",
+		version);
 
 	return rc;
 err_disable_msix:
@@ -2810,7 +2999,7 @@
 		rx_ring = &adapter->rx_ring[i];
 
 		refill_required =
-			ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+			ena_com_free_desc(rx_ring->ena_com_io_sq);
 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
 			rx_ring->empty_rx_queue++;
 
@@ -2952,28 +3141,32 @@
 				 struct ena_com_dev *ena_dev,
 				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
-	int io_sq_num, io_queue_num;
+	int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
 
-	/* In case of LLQ use the llq number in the get feature cmd */
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		io_sq_num = get_feat_ctx->max_queues.max_llq_num;
+	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+			&get_feat_ctx->max_queue_ext.max_queue_ext;
+		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
+				  max_queue_ext->max_rx_cq_num);
 
-		if (io_sq_num == 0) {
-			dev_err(&pdev->dev,
-				"Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
-
-			ena_dev->tx_mem_queue_type =
-				ENA_ADMIN_PLACEMENT_POLICY_HOST;
-			io_sq_num = get_feat_ctx->max_queues.max_sq_num;
-		}
+		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
+		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
 	} else {
-		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+		struct ena_admin_queue_feature_desc *max_queues =
+			&get_feat_ctx->max_queues;
+		io_tx_sq_num = max_queues->max_sq_num;
+		io_tx_cq_num = max_queues->max_cq_num;
+		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
 	}
 
+	/* In case of LLQ use the llq fields for the tx SQ/CQ */
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
+
 	io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
-	io_queue_num = min_t(int, io_queue_num, io_sq_num);
-	io_queue_num = min_t(int, io_queue_num,
-			     get_feat_ctx->max_queues.max_cq_num);
+	io_queue_num = min_t(int, io_queue_num, io_rx_num);
+	io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
+	io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
 	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
 	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
 	if (unlikely(!io_queue_num)) {
@@ -2984,18 +3177,52 @@
 	return io_queue_num;
 }
 
-static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
-			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_set_queues_placement_policy(struct pci_dev *pdev,
+					   struct ena_com_dev *ena_dev,
+					   struct ena_admin_feature_llq_desc *llq,
+					   struct ena_llq_configurations *llq_default_configurations)
 {
 	bool has_mem_bar;
+	int rc;
+	u32 llq_feature_mask;
+
+	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
+	if (!(ena_dev->supported_features & llq_feature_mask)) {
+		dev_err(&pdev->dev,
+			"LLQ is not supported Fallback to host mode policy.\n");
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+		return 0;
+	}
 
 	has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
 
-	/* Enable push mode if device supports LLQ */
-	if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
-		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
-	else
+	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
+	if (unlikely(rc)) {
+		dev_err(&pdev->dev,
+			"Failed to configure the device mode.  Fallback to host mode policy.\n");
 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+		return 0;
+	}
+
+	/* Nothing to config, exit */
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return 0;
+
+	if (!has_mem_bar) {
+		dev_err(&pdev->dev,
+			"ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+		return 0;
+	}
+
+	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+					   pci_resource_start(pdev, ENA_MEM_BAR),
+					   pci_resource_len(pdev, ENA_MEM_BAR));
+
+	if (!ena_dev->mem_bar)
+		return -EFAULT;
+
+	return 0;
 }
 
 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
@@ -3113,36 +3340,79 @@
 	pci_release_selected_regions(pdev, release_bars);
 }
 
-static int ena_calc_queue_size(struct pci_dev *pdev,
-			       struct ena_com_dev *ena_dev,
-			       u16 *max_tx_sgl_size,
-			       u16 *max_rx_sgl_size,
-			       struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
 {
-	u32 queue_size = ENA_DEFAULT_RING_SIZE;
+	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
+	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+	llq_config->llq_ring_entry_size_value = 128;
+}
 
-	queue_size = min_t(u32, queue_size,
-			   get_feat_ctx->max_queues.max_cq_depth);
-	queue_size = min_t(u32, queue_size,
-			   get_feat_ctx->max_queues.max_sq_depth);
+static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
+{
+	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
+	struct ena_com_dev *ena_dev = ctx->ena_dev;
+	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
+	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
+	u32 max_tx_queue_size;
+	u32 max_rx_queue_size;
 
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
-		queue_size = min_t(u32, queue_size,
-				   get_feat_ctx->max_queues.max_llq_depth);
+	if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
+		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
+					  max_queue_ext->max_rx_sq_depth);
+		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
 
-	queue_size = rounddown_pow_of_two(queue_size);
+		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  llq->max_llq_depth);
+		else
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  max_queue_ext->max_tx_sq_depth);
 
-	if (unlikely(!queue_size)) {
-		dev_err(&pdev->dev, "Invalid queue size\n");
-		return -EFAULT;
+		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queue_ext->max_per_packet_tx_descs);
+		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queue_ext->max_per_packet_rx_descs);
+	} else {
+		struct ena_admin_queue_feature_desc *max_queues =
+			&ctx->get_feat_ctx->max_queues;
+		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
+					  max_queues->max_sq_depth);
+		max_tx_queue_size = max_queues->max_cq_depth;
+
+		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  llq->max_llq_depth);
+		else
+			max_tx_queue_size = min_t(u32, max_tx_queue_size,
+						  max_queues->max_sq_depth);
+
+		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queues->max_packet_tx_descs);
+		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+					     max_queues->max_packet_rx_descs);
 	}
 
-	*max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
-				 get_feat_ctx->max_queues.max_packet_tx_descs);
-	*max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
-				 get_feat_ctx->max_queues.max_packet_rx_descs);
+	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
+	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
 
-	return queue_size;
+	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
+				  max_tx_queue_size);
+	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
+				  max_rx_queue_size);
+
+	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
+	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
+
+	ctx->max_tx_queue_size = max_tx_queue_size;
+	ctx->max_rx_queue_size = max_rx_queue_size;
+	ctx->tx_queue_size = tx_queue_size;
+	ctx->rx_queue_size = rx_queue_size;
+
+	return 0;
 }
 
 /* ena_probe - Device Initialization Routine
@@ -3158,21 +3428,19 @@
 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct ena_com_dev_get_features_ctx get_feat_ctx;
-	static int version_printed;
-	struct net_device *netdev;
-	struct ena_adapter *adapter;
+	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
+	struct ena_llq_configurations llq_config;
 	struct ena_com_dev *ena_dev = NULL;
-	static int adapters_found;
+	struct ena_adapter *adapter;
 	int io_queue_num, bars, rc;
-	int queue_size;
-	u16 tx_sgl_size = 0;
-	u16 rx_sgl_size = 0;
+	struct net_device *netdev;
+	static int adapters_found;
+	char *queue_type_str;
 	bool wd_state;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
-	if (version_printed++ == 0)
-		dev_info(&pdev->dev, "%s", version);
+	dev_info_once(&pdev->dev, "%s", version);
 
 	rc = pci_enable_device_mem(pdev);
 	if (rc) {
@@ -3215,32 +3483,38 @@
 		goto err_free_region;
 	}
 
-	ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
+	set_default_llq_configurations(&llq_config);
 
-	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
-						   pci_resource_start(pdev, ENA_MEM_BAR),
-						   pci_resource_len(pdev, ENA_MEM_BAR));
-		if (!ena_dev->mem_bar) {
-			rc = -EFAULT;
-			goto err_device_destroy;
-		}
+	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
+					     &llq_config);
+	if (rc) {
+		dev_err(&pdev->dev, "ena device init failed\n");
+		goto err_device_destroy;
 	}
 
-	/* initial Tx interrupt delay, Assumes 1 usec granularity.
+	calc_queue_ctx.ena_dev = ena_dev;
+	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
+	calc_queue_ctx.pdev = pdev;
+
+	/* Initial Tx and RX interrupt delay. Assumes 1 usec granularity.
 	* Updated during device initialization with the real granularity
 	*/
 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
+	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
+	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
 	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
-	queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
-					 &rx_sgl_size, &get_feat_ctx);
-	if ((queue_size <= 0) || (io_queue_num <= 0)) {
+	rc = ena_calc_queue_size(&calc_queue_ctx);
+	if (rc || io_queue_num <= 0) {
 		rc = -EFAULT;
 		goto err_device_destroy;
 	}
 
-	dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
-		 io_queue_num, queue_size);
+	dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n",
+		 io_queue_num,
+		 calc_queue_ctx.rx_queue_size,
+		 calc_queue_ctx.tx_queue_size,
+		 (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
+		 "ENABLED" : "DISABLED");
 
 	/* dev zeroed in init_etherdev */
 	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
@@ -3264,11 +3538,12 @@
 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
 
-	adapter->tx_ring_size = queue_size;
-	adapter->rx_ring_size = queue_size;
-
-	adapter->max_tx_sgl_size = tx_sgl_size;
-	adapter->max_rx_sgl_size = rx_sgl_size;
+	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
+	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
+	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
+	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
+	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
+	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
 
 	adapter->num_queues = io_queue_num;
 	adapter->last_monitored_tx_qid = 0;
@@ -3330,9 +3605,15 @@
 	timer_setup(&adapter->timer_service, ena_timer_service, 0);
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
 
-	dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		queue_type_str = "Regular";
+	else
+		queue_type_str = "Low Latency";
+
+	dev_info(&pdev->dev,
+		 "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n",
 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
-		 netdev->dev_addr, io_queue_num);
+		 netdev->dev_addr, io_queue_num, queue_type_str);
 
 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
@@ -3345,10 +3626,11 @@
 	ena_com_rss_destroy(ena_dev);
 err_free_msix:
 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
+	/* stop submitting admin commands on a device that was reset */
+	ena_com_set_admin_running_state(ena_dev, false);
 	ena_free_mgmnt_irq(adapter);
 	ena_disable_msix(adapter);
 err_worker_destroy:
-	ena_com_destroy_interrupt_moderation(ena_dev);
 	del_timer(&adapter->timer_service);
 err_netdev_destroy:
 	free_netdev(netdev);
@@ -3391,18 +3673,12 @@
 
 	cancel_work_sync(&adapter->reset_task);
 
-	unregister_netdev(netdev);
-
-	/* If the device is running then we want to make sure the device will be
-	 * reset to make sure no more events will be issued by the device.
-	 */
-	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
-		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-
 	rtnl_lock();
 	ena_destroy_device(adapter, true);
 	rtnl_unlock();
 
+	unregister_netdev(netdev);
+
 	free_netdev(netdev);
 
 	ena_com_rss_destroy(ena_dev);
@@ -3415,8 +3691,6 @@
 
 	pci_disable_device(pdev);
 
-	ena_com_destroy_interrupt_moderation(ena_dev);
-
 	vfree(ena_dev);
 }
 

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 7c7ae56..72ee51a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h

@@ -34,6 +34,7 @@
 #define ENA_H
 
 #include <linux/bitops.h>
+#include <linux/dim.h>
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/interrupt.h>
@@ -43,8 +44,8 @@
 #include "ena_com.h"
 #include "ena_eth_com.h"
 
-#define DRV_MODULE_VER_MAJOR	1
-#define DRV_MODULE_VER_MINOR	5
+#define DRV_MODULE_VER_MAJOR	2
+#define DRV_MODULE_VER_MINOR	1
 #define DRV_MODULE_VER_SUBMINOR 0
 
 #define DRV_MODULE_NAME		"ena"
@@ -61,6 +62,17 @@
 #define ENA_ADMIN_MSIX_VEC		1
 #define ENA_MAX_MSIX_VEC(io_queues)	(ENA_ADMIN_MSIX_VEC + (io_queues))
 
+/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the
+ * driver passes 0.
+ * Since the max packet size the ENA handles is ~9kB limit the buffer length to
+ * 16kB.
+ */
+#if PAGE_SIZE > SZ_16K
+#define ENA_PAGE_SIZE SZ_16K
+#else
+#define ENA_PAGE_SIZE PAGE_SIZE
+#endif
+
 #define ENA_MIN_MSIX_VEC		2
 
 #define ENA_REG_BAR			0
@@ -68,9 +80,10 @@
 #define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
 
 #define ENA_DEFAULT_RING_SIZE	(1024)
+#define ENA_MIN_RING_SIZE	(256)
 
 #define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
-#define ENA_DEFAULT_RX_COPYBREAK	(128 - NET_IP_ALIGN)
+#define ENA_DEFAULT_RX_COPYBREAK	(256 - NET_IP_ALIGN)
 
 /* limit the buffer size to 600 bytes to handle MTU changes from very
  * small to very large, in which case the number of buffers per packet
@@ -95,10 +108,11 @@
  */
 #define ENA_TX_POLL_BUDGET_DIVIDER	4
 
-/* Refill Rx queue when number of available descriptors is below
- * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER
+/* Refill Rx queue when number of required descriptors is above
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
  */
 #define ENA_RX_REFILL_THRESH_DIVIDER	8
+#define ENA_RX_REFILL_THRESH_PACKET	256
 
 /* Number of queues to check for missing queues per timer service */
 #define ENA_MONITORED_TX_QUEUES	4
@@ -140,6 +154,19 @@
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
 	u32 qid;
+	struct dim dim;
+};
+
+struct ena_calc_queue_size_ctx {
+	struct ena_com_dev_get_features_ctx *get_feat_ctx;
+	struct ena_com_dev *ena_dev;
+	struct pci_dev *pdev;
+	u16 tx_queue_size;
+	u16 rx_queue_size;
+	u16 max_tx_queue_size;
+	u16 max_rx_queue_size;
+	u16 max_tx_sgl_size;
+	u16 max_rx_sgl_size;
 };
 
 struct ena_tx_buffer {
@@ -151,6 +178,9 @@
 	/* num of buffers used by this skb */
 	u32 num_of_bufs;
 
+	/* Indicate if bufs[0] map the linear data of the skb. */
+	u8 map_linear_data;
+
 	/* Used for detect missing tx packets to limit the number of prints */
 	u32 print_once;
 	/* Save the last jiffies to detect missing tx packets
@@ -186,31 +216,31 @@
 	u64 tx_poll;
 	u64 doorbells;
 	u64 bad_req_id;
+	u64 llq_buffer_copy;
 	u64 missed_tx;
 };
 
 struct ena_stats_rx {
 	u64 cnt;
 	u64 bytes;
+	u64 rx_copybreak_pkt;
+	u64 csum_good;
 	u64 refil_partial;
 	u64 bad_csum;
 	u64 page_alloc_fail;
 	u64 skb_alloc_fail;
 	u64 dma_mapping_err;
 	u64 bad_desc_num;
-	u64 rx_copybreak_pkt;
 	u64 bad_req_id;
 	u64 empty_rx_ring;
+	u64 csum_unchecked;
 };
 
 struct ena_ring {
-	union {
-		/* Holds the empty requests for TX/RX
-		 * out of order completions
-		 */
-		u16 *free_tx_ids;
-		u16 *free_rx_ids;
-	};
+	/* Holds the empty requests for TX/RX
+	 * out of order completions
+	 */
+	u16 *free_ids;
 
 	union {
 		struct ena_tx_buffer *tx_buffer_info;
@@ -250,13 +280,14 @@
 	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
 	u32  smoothed_interval;
 	u32  per_napi_packets;
-	u32  per_napi_bytes;
-	enum ena_intr_moder_level moder_tbl_idx;
+	u16 non_empty_napi_events;
 	struct u64_stats_sync syncp;
 	union {
 		struct ena_stats_tx tx_stats;
 		struct ena_stats_rx rx_stats;
 	};
+
+	u8 *push_buf_intermediate_buf;
 	int empty_rx_queue;
 } ____cacheline_aligned;
 
@@ -299,11 +330,11 @@
 
 	u32 missing_tx_completion_threshold;
 
-	u32 tx_usecs, rx_usecs; /* interrupt moderation */
-	u32 tx_frames, rx_frames; /* interrupt moderation */
+	u32 requested_tx_ring_size;
+	u32 requested_rx_ring_size;
 
-	u32 tx_ring_size;
-	u32 rx_ring_size;
+	u32 max_tx_ring_size;
+	u32 max_rx_ring_size;
 
 	u32 msg_enable;
 
@@ -353,17 +384,10 @@
 
 void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 
-int ena_get_sset_count(struct net_device *netdev, int sset);
+int ena_update_queue_sizes(struct ena_adapter *adapter,
+			   u32 new_tx_size,
+			   u32 new_rx_size);
 
-/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the
- * driver passas 0.
- * Since the max packet size the ENA handles is ~9kB limit the buffer length to
- * 16kB.
- */
-#if PAGE_SIZE > SZ_16K
-#define ENA_PAGE_SIZE SZ_16K
-#else
-#define ENA_PAGE_SIZE PAGE_SIZE
-#endif
+int ena_get_sset_count(struct net_device *netdev, int sset);
 
 #endif /* !(ENA_H) */

diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
index 48ca97f..04fcafc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h

@@ -33,137 +33,125 @@
 #define _ENA_REGS_H_
 
 enum ena_regs_reset_reason_types {
-	ENA_REGS_RESET_NORMAL			= 0,
-
-	ENA_REGS_RESET_KEEP_ALIVE_TO		= 1,
-
-	ENA_REGS_RESET_ADMIN_TO			= 2,
-
-	ENA_REGS_RESET_MISS_TX_CMPL		= 3,
-
-	ENA_REGS_RESET_INV_RX_REQ_ID		= 4,
-
-	ENA_REGS_RESET_INV_TX_REQ_ID		= 5,
-
-	ENA_REGS_RESET_TOO_MANY_RX_DESCS	= 6,
-
-	ENA_REGS_RESET_INIT_ERR			= 7,
-
-	ENA_REGS_RESET_DRIVER_INVALID_STATE	= 8,
-
-	ENA_REGS_RESET_OS_TRIGGER		= 9,
-
-	ENA_REGS_RESET_OS_NETDEV_WD		= 10,
-
-	ENA_REGS_RESET_SHUTDOWN			= 11,
-
-	ENA_REGS_RESET_USER_TRIGGER		= 12,
-
-	ENA_REGS_RESET_GENERIC			= 13,
-
-	ENA_REGS_RESET_MISS_INTERRUPT		= 14,
+	ENA_REGS_RESET_NORMAL                       = 0,
+	ENA_REGS_RESET_KEEP_ALIVE_TO                = 1,
+	ENA_REGS_RESET_ADMIN_TO                     = 2,
+	ENA_REGS_RESET_MISS_TX_CMPL                 = 3,
+	ENA_REGS_RESET_INV_RX_REQ_ID                = 4,
+	ENA_REGS_RESET_INV_TX_REQ_ID                = 5,
+	ENA_REGS_RESET_TOO_MANY_RX_DESCS            = 6,
+	ENA_REGS_RESET_INIT_ERR                     = 7,
+	ENA_REGS_RESET_DRIVER_INVALID_STATE         = 8,
+	ENA_REGS_RESET_OS_TRIGGER                   = 9,
+	ENA_REGS_RESET_OS_NETDEV_WD                 = 10,
+	ENA_REGS_RESET_SHUTDOWN                     = 11,
+	ENA_REGS_RESET_USER_TRIGGER                 = 12,
+	ENA_REGS_RESET_GENERIC                      = 13,
+	ENA_REGS_RESET_MISS_INTERRUPT               = 14,
 };
 
 /* ena_registers offsets */
-#define ENA_REGS_VERSION_OFF		0x0
-#define ENA_REGS_CONTROLLER_VERSION_OFF		0x4
-#define ENA_REGS_CAPS_OFF		0x8
-#define ENA_REGS_CAPS_EXT_OFF		0xc
-#define ENA_REGS_AQ_BASE_LO_OFF		0x10
-#define ENA_REGS_AQ_BASE_HI_OFF		0x14
-#define ENA_REGS_AQ_CAPS_OFF		0x18
-#define ENA_REGS_ACQ_BASE_LO_OFF		0x20
-#define ENA_REGS_ACQ_BASE_HI_OFF		0x24
-#define ENA_REGS_ACQ_CAPS_OFF		0x28
-#define ENA_REGS_AQ_DB_OFF		0x2c
-#define ENA_REGS_ACQ_TAIL_OFF		0x30
-#define ENA_REGS_AENQ_CAPS_OFF		0x34
-#define ENA_REGS_AENQ_BASE_LO_OFF		0x38
-#define ENA_REGS_AENQ_BASE_HI_OFF		0x3c
-#define ENA_REGS_AENQ_HEAD_DB_OFF		0x40
-#define ENA_REGS_AENQ_TAIL_OFF		0x44
-#define ENA_REGS_INTR_MASK_OFF		0x4c
-#define ENA_REGS_DEV_CTL_OFF		0x54
-#define ENA_REGS_DEV_STS_OFF		0x58
-#define ENA_REGS_MMIO_REG_READ_OFF		0x5c
-#define ENA_REGS_MMIO_RESP_LO_OFF		0x60
-#define ENA_REGS_MMIO_RESP_HI_OFF		0x64
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF		0x68
+
+/* 0 base */
+#define ENA_REGS_VERSION_OFF                                0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF                     0x4
+#define ENA_REGS_CAPS_OFF                                   0x8
+#define ENA_REGS_CAPS_EXT_OFF                               0xc
+#define ENA_REGS_AQ_BASE_LO_OFF                             0x10
+#define ENA_REGS_AQ_BASE_HI_OFF                             0x14
+#define ENA_REGS_AQ_CAPS_OFF                                0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF                            0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF                            0x24
+#define ENA_REGS_ACQ_CAPS_OFF                               0x28
+#define ENA_REGS_AQ_DB_OFF                                  0x2c
+#define ENA_REGS_ACQ_TAIL_OFF                               0x30
+#define ENA_REGS_AENQ_CAPS_OFF                              0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF                           0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF                           0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF                           0x40
+#define ENA_REGS_AENQ_TAIL_OFF                              0x44
+#define ENA_REGS_INTR_MASK_OFF                              0x4c
+#define ENA_REGS_DEV_CTL_OFF                                0x54
+#define ENA_REGS_DEV_STS_OFF                                0x58
+#define ENA_REGS_MMIO_REG_READ_OFF                          0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF                           0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF                           0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF                   0x68
 
 /* version register */
-#define ENA_REGS_VERSION_MINOR_VERSION_MASK		0xff
-#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT		8
-#define ENA_REGS_VERSION_MAJOR_VERSION_MASK		0xff00
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK                 0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT                8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK                 0xff00
 
 /* controller_version register */
-#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK		0xff
-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT		8
-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK		0xff00
-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT		16
-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK		0xff0000
-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT		24
-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK		0xff000000
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK   0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT     8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK      0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT     16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK      0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT           24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK            0xff000000
 
 /* caps register */
-#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK		0x1
-#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT		1
-#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK		0x3e
-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT		8
-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK		0xff00
-#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT		16
-#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK		0xf0000
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK        0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT                   1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK                    0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT                  8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK                   0xff00
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT                    16
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK                     0xf0000
 
 /* aq_caps register */
-#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK		0xffff
-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT		16
-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK		0xffff0000
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK                      0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT                16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK                 0xffff0000
 
 /* acq_caps register */
-#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK		0xffff
-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT		16
-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK		0xffff0000
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK                    0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT              16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK               0xffff0000
 
 /* aenq_caps register */
-#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK		0xffff
-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT		16
-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK		0xffff0000
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK                  0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT            16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK             0xffff0000
 
 /* dev_ctl register */
-#define ENA_REGS_DEV_CTL_DEV_RESET_MASK		0x1
-#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT		1
-#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK		0x2
-#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT		2
-#define ENA_REGS_DEV_CTL_QUIESCENT_MASK		0x4
-#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT		3
-#define ENA_REGS_DEV_CTL_IO_RESUME_MASK		0x8
-#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT		28
-#define ENA_REGS_DEV_CTL_RESET_REASON_MASK		0xf0000000
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK                     0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT                   1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK                    0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT                    2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK                     0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT                    3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK                     0x8
+#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT                 28
+#define ENA_REGS_DEV_CTL_RESET_REASON_MASK                  0xf0000000
 
 /* dev_sts register */
-#define ENA_REGS_DEV_STS_READY_MASK		0x1
-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT		1
-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK		0x2
-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT		2
-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK		0x4
-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT		3
-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK		0x8
-#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT		4
-#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK		0x10
-#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT		5
-#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK		0x20
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT		6
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK		0x40
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT		7
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK		0x80
+#define ENA_REGS_DEV_STS_READY_MASK                         0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT       1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK        0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT          2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK           0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT            3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK             0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT               4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK                0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT                  5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK                   0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT  6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK   0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT     7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK      0x80
 
 /* mmio_reg_read register */
-#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK		0xffff
-#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT		16
-#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK		0xffff0000
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK                  0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT                16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK                 0xffff0000
 
 /* rss_ind_entry_update register */
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK		0xffff
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT		16
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK		0xffff0000
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK            0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT          16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK           0xffff0000
 
 #endif /*_ENA_REGS_H_ */

diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c
index dc57f27..ab30761 100644
--- a/drivers/net/ethernet/amd/7990.c
+++ b/drivers/net/ethernet/amd/7990.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * 7990.c -- LANCE ethernet IC generic routines.
  * This is an attempt to separate out the bits of various ethernet

diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 9e5cf55..9f965cd 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # AMD network device configuration
 #
@@ -13,7 +14,7 @@
 	  say Y.
 
 	  Note that the answer to this question does not directly affect
-	  the kernel: saying N will just case the configurator to skip all
+	  the kernel: saying N will just cause the configurator to skip all
 	  the questions regarding AMD chipsets. If you say Y, you will be asked
 	  for your specific chipset/driver in the following questions.
 

diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index 01d132c..0842da4 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/drivers/net/ethernet/amd/am79c961a.c
  *
  *  by Russell King <rmk@arm.linux.org.uk> 1995-2001.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Derived from various things including skeleton.c
  *
  * This is a special driver for the am79c961A Lance chip used in the
@@ -440,7 +437,7 @@
 /*
  * Transmit a packet
  */
-static int
+static netdev_tx_t
 am79c961_sendpacket(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dev_priv *priv = netdev_priv(dev);

diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h
index fc5088c..73679e0 100644
--- a/drivers/net/ethernet/amd/am79c961a.h
+++ b/drivers/net/ethernet/amd/am79c961a.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * linux/drivers/net/ethernet/amd/am79c961a.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _LINUX_am79c961a_H

diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index a90080f..573e88f 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c

@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 
 /* Advanced  Micro Devices Inc. AMD8111E Linux Network Driver
  * Copyright (C) 2004 Advanced Micro Devices
  *
- *
  * Copyright 2001,2002 Jeff Garzik <jgarzik@mandrakesoft.com> [ 8139cp.c,tg3.c ]
  * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)[ tg3.c]
  * Copyright 1996-1999 Thomas Bogendoerfer [ pcnet32.c ]
@@ -12,19 +12,6 @@
  * Carsten Langgaard, carstenl@mips.com [ pcnet32.c ]
  * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
  *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 Module Name:
 
@@ -435,7 +422,7 @@
 	int i,reg_val;
 
 	/* stop the chip */
-	 writel(RUN, mmio + CMD0);
+	writel(RUN, mmio + CMD0);
 
 	if(amd8111e_init_ring(dev))
 		return -ENOMEM;
@@ -666,7 +653,7 @@
 			pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[tx_index],
 				  	lp->tx_skbuff[tx_index]->len,
 					PCI_DMA_TODEVICE);
-			dev_kfree_skb_irq (lp->tx_skbuff[tx_index]);
+			dev_consume_skb_irq(lp->tx_skbuff[tx_index]);
 			lp->tx_skbuff[tx_index] = NULL;
 			lp->tx_dma_addr[tx_index] = 0;
 		}
@@ -1720,7 +1707,7 @@
 		writew((u32)tmp_ipg, mmio + IPG);
 		writew((u32)(tmp_ipg - IFS1_DELTA), mmio + IFS1);
 	}
-	 mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES);
+	mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES);
 	return;
 
 }

diff --git a/drivers/net/ethernet/amd/amd8111e.h b/drivers/net/ethernet/amd/amd8111e.h
index 2a57b46..493f154 100644
--- a/drivers/net/ethernet/amd/amd8111e.h
+++ b/drivers/net/ethernet/amd/amd8111e.h

@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Advanced  Micro Devices Inc. AMD8111E Linux Network Driver
  * Copyright (C) 2003 Advanced Micro Devices
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 Module Name:
 

diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index c5b8126..d3d44e0 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c

@@ -339,7 +339,8 @@
                                    *init_rec );
 static int lance_open( struct net_device *dev );
 static void lance_init_ring( struct net_device *dev );
-static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev );
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev);
 static irqreturn_t lance_interrupt( int irq, void *dev_id );
 static int lance_rx( struct net_device *dev );
 static int lance_close( struct net_device *dev );
@@ -769,7 +770,8 @@
 
 /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
 
-static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
+static netdev_tx_t
+lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	struct lance_ioreg	 *IO = lp->iobase;

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 73ca887..1793950 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *
  * Alchemy Au1x00 ethernet driver
@@ -14,24 +15,6 @@
  *
  * Author: MontaVista Software, Inc.
  *		ppopov@mvista.com or source@mvista.com
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * ########################################################################
- *
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -564,17 +547,7 @@
 		return PTR_ERR(phydev);
 	}
 
-	/* mask with MAC supported features */
-	phydev->supported &= (SUPPORTED_10baseT_Half
-			      | SUPPORTED_10baseT_Full
-			      | SUPPORTED_100baseT_Half
-			      | SUPPORTED_100baseT_Full
-			      | SUPPORTED_Autoneg
-			      /* | SUPPORTED_Pause | SUPPORTED_Asym_Pause */
-			      | SUPPORTED_MII
-			      | SUPPORTED_TP);
-
-	phydev->advertising = phydev->supported;
+	phy_set_max_speed(phydev, SPEED_100);
 
 	aup->old_link = 0;
 	aup->old_speed = 0;
@@ -950,11 +923,8 @@
 		return retval;
 	}
 
-	if (dev->phydev) {
-		/* cause the PHY state machine to schedule a link state check */
-		dev->phydev->state = PHY_CHANGELINK;
+	if (dev->phydev)
 		phy_start(dev->phydev);
-	}
 
 	netif_start_queue(dev);
 
@@ -1130,7 +1100,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to retrieve IRQ\n");
 		err = -ENODEV;
 		goto out;
 	}
@@ -1180,7 +1149,7 @@
 	/* Allocate the data buffers
 	 * Snooping works fine with eth on all au1xxx
 	 */
-	aup->vaddr = (u32)dma_alloc_attrs(NULL, MAX_BUF_SIZE *
+	aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE *
 					  (NUM_TX_BUFFS + NUM_RX_BUFFS),
 					  &aup->dma_addr, 0,
 					  DMA_ATTR_NON_CONSISTENT);
@@ -1362,7 +1331,7 @@
 err_remap2:
 	iounmap(aup->mac);
 err_remap1:
-	dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
+	dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
 			(void *)aup->vaddr, aup->dma_addr,
 			DMA_ATTR_NON_CONSISTENT);
 err_vaddr:
@@ -1396,7 +1365,7 @@
 		if (aup->tx_db_inuse[i])
 			au1000_ReleaseDB(aup, aup->tx_db_inuse[i]);
 
-	dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
+	dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
 			(void *)aup->vaddr, aup->dma_addr,
 			DMA_ATTR_NON_CONSISTENT);
 

diff --git a/drivers/net/ethernet/amd/au1000_eth.h b/drivers/net/ethernet/amd/au1000_eth.h
index 4c47c23..e3a3ed2 100644
--- a/drivers/net/ethernet/amd/au1000_eth.h
+++ b/drivers/net/ethernet/amd/au1000_eth.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *
  * Alchemy Au1x00 ethernet driver include file
@@ -5,24 +6,6 @@
  * Author: Pete Popov <ppopov@mvista.com>
  *
  * Copyright 2001 MontaVista Software Inc.
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * ########################################################################
- *
- *
  */
 
 

diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 00332a1..dac4a2f 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *    Lance ethernet driver for the MIPS processor based
  *      DECstation family
@@ -894,7 +895,7 @@
 	netif_wake_queue(dev);
 }
 
-static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_regs *ll = lp->ll;

diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c
index c3dbf1c..1381a47 100644
--- a/drivers/net/ethernet/amd/hplance.c
+++ b/drivers/net/ethernet/amd/hplance.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* hplance.c  : the  Linux/hp300/lance ethernet driver
  *
  * Copyright (C) 05/1998 Peter Maydell <pmaydell@chiark.greenend.org.uk>

diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index b56d84c..f90b454 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c

@@ -1084,7 +1084,7 @@
 				/* We must free the original skb if it's not a data-only copy
 				   in the bounce buffer. */
 				if (lp->tx_skbuff[entry]) {
-					dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+					dev_consume_skb_irq(lp->tx_skbuff[entry]);
 					lp->tx_skbuff[entry] = NULL;
 				}
 				dirty_tx++;

diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c
index 0a92044..72abd3f 100644
--- a/drivers/net/ethernet/amd/mvme147.c
+++ b/drivers/net/ethernet/amd/mvme147.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* mvme147.c  : the  Linux/mvme147/lance ethernet driver
  *
  * Copyright (C) 05/1998 Peter Maydell <pmaydell@chiark.greenend.org.uk>

diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index e248d1a..c6c2a54 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c

@@ -435,10 +435,8 @@
 		}
 		if(cards[i].vendor_id) {
 			for(j=0;j<3;j++)
-				if(inb(ioaddr+cards[i].addr_offset+j) != cards[i].vendor_id[j]) {
+				if(inb(ioaddr+cards[i].addr_offset+j) != cards[i].vendor_id[j])
 					release_region(ioaddr, cards[i].total_size);
-					continue;
-			  }
 		}
 		break;
 	}
@@ -699,16 +697,14 @@
 	for(i=0;i<TMDNUM;i++) {
 		kfree(p->tmdbounce[i]);
 #ifdef XMT_VIA_SKB
-		if(p->tmd_skb[i])
-			dev_kfree_skb(p->tmd_skb[i]);
+		dev_kfree_skb(p->tmd_skb[i]);
 #endif
 	}
 
 	for(i=0;i<RMDNUM;i++)
 	{
 #ifdef RCV_VIA_SKB
-		if(p->recv_skb[i])
-			dev_kfree_skb(p->recv_skb[i]);
+		dev_kfree_skb(p->recv_skb[i]);
 #else
 		kfree(p->recvbounce[i]);
 #endif
@@ -1030,7 +1026,7 @@
 
 #ifdef XMT_VIA_SKB
 		if(p->tmd_skb[p->tmdlast]) {
-			 dev_kfree_skb_irq(p->tmd_skb[p->tmdlast]);
+			 dev_consume_skb_irq(p->tmd_skb[p->tmdlast]);
 			 p->tmd_skb[p->tmdlast] = NULL;
 		}
 #endif

diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index 77b1db2..da7e3d4 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c

@@ -236,7 +236,8 @@
 static int lance_probe( struct net_device *dev);
 static int lance_open( struct net_device *dev );
 static void lance_init_ring( struct net_device *dev );
-static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev );
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev);
 static irqreturn_t lance_interrupt( int irq, void *dev_id);
 static int lance_rx( struct net_device *dev );
 static int lance_close( struct net_device *dev );
@@ -511,7 +512,8 @@
 }
 
 
-static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
+static netdev_tx_t
+lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	int entry, len;

diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 19f89d9..ebcbf8c 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* $Id: sunlance.c,v 1.112 2002/01/15 06:48:55 davem Exp $
  * lance.c: Linux/Sparc/Lance driver
  *
@@ -1106,7 +1107,7 @@
 	netif_wake_queue(dev);
 }
 
-static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	int entry, skblen, len;
@@ -1488,9 +1489,9 @@
 	struct device_node *parent_dp = parent->dev.of_node;
 	int err;
 
-	if (!strcmp(parent_dp->name, "ledma")) {
+	if (of_node_name_eq(parent_dp, "ledma")) {
 		err = sparc_lance_probe_one(op, parent, NULL);
-	} else if (!strcmp(parent_dp->name, "lebuffer")) {
+	} else if (of_node_name_eq(parent_dp, "lebuffer")) {
 		err = sparc_lance_probe_one(op, NULL, parent);
 	} else
 		err = sparc_lance_probe_one(op, NULL, NULL);

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index d272dc6..b40d437 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h

@@ -431,8 +431,6 @@
 #define MAC_MDIOSCAR_PA_WIDTH		5
 #define MAC_MDIOSCAR_RA_INDEX		0
 #define MAC_MDIOSCAR_RA_WIDTH		16
-#define MAC_MDIOSCAR_REG_INDEX		0
-#define MAC_MDIOSCAR_REG_WIDTH		21
 #define MAC_MDIOSCCDR_BUSY_INDEX	22
 #define MAC_MDIOSCCDR_BUSY_WIDTH	1
 #define MAC_MDIOSCCDR_CMD_INDEX		16

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
index b911439..b0a6c96 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c

@@ -438,7 +438,6 @@
 
 void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
 {
-	struct dentry *pfile;
 	char *buf;
 
 	/* Set defaults */
@@ -451,88 +450,48 @@
 		return;
 
 	pdata->xgbe_debugfs = debugfs_create_dir(buf, NULL);
-	if (!pdata->xgbe_debugfs) {
-		netdev_err(pdata->netdev, "debugfs_create_dir failed\n");
-		kfree(buf);
-		return;
-	}
 
-	pfile = debugfs_create_file("xgmac_register", 0600,
-				    pdata->xgbe_debugfs, pdata,
-				    &xgmac_reg_addr_fops);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+	debugfs_create_file("xgmac_register", 0600, pdata->xgbe_debugfs, pdata,
+			    &xgmac_reg_addr_fops);
 
-	pfile = debugfs_create_file("xgmac_register_value", 0600,
-				    pdata->xgbe_debugfs, pdata,
-				    &xgmac_reg_value_fops);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+	debugfs_create_file("xgmac_register_value", 0600, pdata->xgbe_debugfs,
+			    pdata, &xgmac_reg_value_fops);
 
-	pfile = debugfs_create_file("xpcs_mmd", 0600,
-				    pdata->xgbe_debugfs, pdata,
-				    &xpcs_mmd_fops);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+	debugfs_create_file("xpcs_mmd", 0600, pdata->xgbe_debugfs, pdata,
+			    &xpcs_mmd_fops);
 
-	pfile = debugfs_create_file("xpcs_register", 0600,
-				    pdata->xgbe_debugfs, pdata,
-				    &xpcs_reg_addr_fops);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+	debugfs_create_file("xpcs_register", 0600, pdata->xgbe_debugfs, pdata,
+			    &xpcs_reg_addr_fops);
 
-	pfile = debugfs_create_file("xpcs_register_value", 0600,
-				    pdata->xgbe_debugfs, pdata,
-				    &xpcs_reg_value_fops);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
+	debugfs_create_file("xpcs_register_value", 0600, pdata->xgbe_debugfs,
+			    pdata, &xpcs_reg_value_fops);
 
 	if (pdata->xprop_regs) {
-		pfile = debugfs_create_file("xprop_register", 0600,
-					    pdata->xgbe_debugfs, pdata,
-					    &xprop_reg_addr_fops);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_file failed\n");
+		debugfs_create_file("xprop_register", 0600, pdata->xgbe_debugfs,
+				    pdata, &xprop_reg_addr_fops);
 
-		pfile = debugfs_create_file("xprop_register_value", 0600,
-					    pdata->xgbe_debugfs, pdata,
-					    &xprop_reg_value_fops);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_file failed\n");
+		debugfs_create_file("xprop_register_value", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xprop_reg_value_fops);
 	}
 
 	if (pdata->xi2c_regs) {
-		pfile = debugfs_create_file("xi2c_register", 0600,
-					    pdata->xgbe_debugfs, pdata,
-					    &xi2c_reg_addr_fops);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_file failed\n");
+		debugfs_create_file("xi2c_register", 0600, pdata->xgbe_debugfs,
+				    pdata, &xi2c_reg_addr_fops);
 
-		pfile = debugfs_create_file("xi2c_register_value", 0600,
-					    pdata->xgbe_debugfs, pdata,
-					    &xi2c_reg_value_fops);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_file failed\n");
+		debugfs_create_file("xi2c_register_value", 0600,
+				    pdata->xgbe_debugfs, pdata,
+				    &xi2c_reg_value_fops);
 	}
 
 	if (pdata->vdata->an_cdr_workaround) {
-		pfile = debugfs_create_bool("an_cdr_workaround", 0600,
-					    pdata->xgbe_debugfs,
-					    &pdata->debugfs_an_cdr_workaround);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_bool failed\n");
+		debugfs_create_bool("an_cdr_workaround", 0600,
+				    pdata->xgbe_debugfs,
+				    &pdata->debugfs_an_cdr_workaround);
 
-		pfile = debugfs_create_bool("an_cdr_track_early", 0600,
-					    pdata->xgbe_debugfs,
-					    &pdata->debugfs_an_cdr_track_early);
-		if (!pfile)
-			netdev_err(pdata->netdev,
-				   "debugfs_create_bool failed\n");
+		debugfs_create_bool("an_cdr_track_early", 0600,
+				    pdata->xgbe_debugfs,
+				    &pdata->debugfs_an_cdr_track_early);
 	}
 
 	kfree(buf);
@@ -546,7 +505,6 @@
 
 void xgbe_debugfs_rename(struct xgbe_prv_data *pdata)
 {
-	struct dentry *pfile;
 	char *buf;
 
 	if (!pdata->xgbe_debugfs)
@@ -559,11 +517,8 @@
 	if (!strcmp(pdata->xgbe_debugfs->d_name.name, buf))
 		goto out;
 
-	pfile = debugfs_rename(pdata->xgbe_debugfs->d_parent,
-			       pdata->xgbe_debugfs,
-			       pdata->xgbe_debugfs->d_parent, buf);
-	if (!pfile)
-		netdev_err(pdata->netdev, "debugfs_rename failed\n");
+	debugfs_rename(pdata->xgbe_debugfs->d_parent, pdata->xgbe_debugfs,
+		       pdata->xgbe_debugfs->d_parent, buf);
 
 out:
 	kfree(buf);

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 5330942..230726d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c

@@ -526,7 +526,7 @@
 	struct xgbe_ring *ring = channel->tx_ring;
 	struct xgbe_ring_data *rdata;
 	struct xgbe_packet_data *packet;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	dma_addr_t skb_dma;
 	unsigned int start_index, cur_index;
 	unsigned int offset, tso, vlan, datalen, len;

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 1e929a1..d5fd49d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c

@@ -1284,6 +1284,20 @@
 	}
 }
 
+static unsigned int xgbe_create_mdio_sca(int port, int reg)
+{
+	unsigned int mdio_sca, da;
+
+	da = (reg & MII_ADDR_C45) ? reg >> 16 : 0;
+
+	mdio_sca = 0;
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg);
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port);
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da);
+
+	return mdio_sca;
+}
+
 static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
 				   int reg, u16 val)
 {
@@ -1291,9 +1305,7 @@
 
 	reinit_completion(&pdata->mdio_complete);
 
-	mdio_sca = 0;
-	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
-	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+	mdio_sca = xgbe_create_mdio_sca(addr, reg);
 	XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
 
 	mdio_sccd = 0;
@@ -1317,9 +1329,7 @@
 
 	reinit_completion(&pdata->mdio_complete);
 
-	mdio_sca = 0;
-	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
-	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+	mdio_sca = xgbe_create_mdio_sca(addr, reg);
 	XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
 
 	mdio_sccd = 0;
@@ -1877,7 +1887,7 @@
 	smp_wmb();
 
 	ring->cur = cur_index + 1;
-	if (!packet->skb->xmit_more ||
+	if (!netdev_xmit_more() ||
 	    netif_xmit_stopped(netdev_get_tx_queue(pdata->netdev,
 						   channel->queue_index)))
 		xgbe_tx_start_xmit(channel, ring);

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 24f1053..98f8f20 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c

@@ -119,7 +119,6 @@
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
 #include <linux/interrupt.h>
-#include <net/busy_poll.h>
 #include <linux/clk.h>
 #include <linux/if_ether.h>
 #include <linux/net_tstamp.h>
@@ -1613,7 +1612,7 @@
 	/* PTP v2, UDP, any kind of event packet */
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
-	/* PTP v1, UDP, any kind of event packet */
+		/* Fall through - to PTP v1, UDP, any kind of event packet */
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
@@ -1624,7 +1623,7 @@
 	/* PTP v2, UDP, Sync packet */
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
-	/* PTP v1, UDP, Sync packet */
+		/* Fall through - to PTP v1, UDP, Sync packet */
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
@@ -1635,7 +1634,7 @@
 	/* PTP v2, UDP, Delay_req packet */
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSVER2ENA, 1);
-	/* PTP v1, UDP, Delay_req packet */
+		/* Fall through - to PTP v1, UDP, Delay_req packet */
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV4ENA, 1);
 		XGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSIPV6ENA, 1);
@@ -1834,7 +1833,7 @@
 			     struct xgbe_ring *ring, struct sk_buff *skb,
 			     struct xgbe_packet_data *packet)
 {
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	unsigned int context_desc;
 	unsigned int len;
 	unsigned int i;
@@ -2009,7 +2008,7 @@
 	return 0;
 }
 
-static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
@@ -2018,7 +2017,7 @@
 	struct xgbe_ring *ring;
 	struct xgbe_packet_data *packet;
 	struct netdev_queue *txq;
-	int ret;
+	netdev_tx_t ret;
 
 	DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
 

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index b41f236..7ce9c69 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c

@@ -469,13 +469,19 @@
 
 	ret = xgbe_platform_init();
 	if (ret)
-		return ret;
+		goto err_platform_init;
 
 	ret = xgbe_pci_init();
 	if (ret)
-		return ret;
+		goto err_pci_init;
 
 	return 0;
+
+err_pci_init:
+	xgbe_platform_exit();
+err_platform_init:
+	unregister_netdevice_notifier(&xgbe_netdev_notifier);
+	return ret;
 }
 
 static void __exit xgbe_mod_exit(void)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 3ceb4f9..128cd64 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c

@@ -857,6 +857,7 @@
 
 static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	unsigned int phy_id = phy_data->phydev->phy_id;
 
@@ -878,9 +879,16 @@
 	phy_write(phy_data->phydev, 0x04, 0x0d01);
 	phy_write(phy_data->phydev, 0x00, 0x9140);
 
-	phy_data->phydev->supported = PHY_GBIT_FEATURES;
-	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	phy_data->phydev->advertising = phy_data->phydev->supported;
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       supported);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       supported);
+
+	linkmode_copy(phy_data->phydev->supported, supported);
+
+	phy_support_asym_pause(phy_data->phydev);
 
 	netif_dbg(pdata, drv, pdata->netdev,
 		  "Finisar PHY quirk in place\n");
@@ -890,6 +898,7 @@
 
 static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
 	unsigned int phy_id = phy_data->phydev->phy_id;
@@ -950,9 +959,14 @@
 	reg = phy_read(phy_data->phydev, 0x00);
 	phy_write(phy_data->phydev, 0x00, reg & ~0x00800);
 
-	phy_data->phydev->supported = PHY_GBIT_FEATURES;
-	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	phy_data->phydev->advertising = phy_data->phydev->supported;
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       supported);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       supported);
+	linkmode_copy(phy_data->phydev->supported, supported);
+	phy_support_asym_pause(phy_data->phydev);
 
 	netif_dbg(pdata, drv, pdata->netdev,
 		  "BelFuse PHY quirk in place\n");
@@ -974,7 +988,6 @@
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	struct phy_device *phydev;
-	u32 advertising;
 	int ret;
 
 	/* If we already have a PHY, just return */
@@ -1034,9 +1047,8 @@
 
 	xgbe_phy_external_phy_quirks(pdata);
 
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						lks->link_modes.advertising);
-	phydev->advertising &= advertising;
+	linkmode_and(phydev->advertising, phydev->advertising,
+		     lks->link_modes.advertising);
 
 	phy_start_aneg(phy_data->phydev);
 
@@ -1495,10 +1507,7 @@
 	if (!phy_data->phydev)
 		return;
 
-	if (phy_data->phydev->advertising & ADVERTISED_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+	lcl_adv = linkmode_adv_to_lcl_adv_t(phy_data->phydev->advertising);
 
 	if (phy_data->phydev->pause) {
 		XGBE_SET_LP_ADV(lks, Pause);
@@ -1816,7 +1825,6 @@
 {
 	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	u32 advertising;
 	int ret;
 
 	ret = xgbe_phy_find_phy_device(pdata);
@@ -1826,12 +1834,10 @@
 	if (!phy_data->phydev)
 		return 0;
 
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						lks->link_modes.advertising);
-
 	phy_data->phydev->autoneg = pdata->phy.autoneg;
-	phy_data->phydev->advertising = phy_data->phydev->supported &
-					advertising;
+	linkmode_and(phy_data->phydev->advertising,
+		     phy_data->phydev->supported,
+		     lks->link_modes.advertising);
 
 	if (pdata->phy.autoneg != AUTONEG_ENABLE) {
 		phy_data->phydev->speed = pdata->phy.speed;

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index d0f3dfb..4ebd241 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c

@@ -301,7 +301,6 @@
 	struct xgbe_prv_data *pdata;
 	struct device *dev = &pdev->dev;
 	struct platform_device *phy_pdev;
-	struct resource *res;
 	const char *phy_mode;
 	unsigned int phy_memnum, phy_irqnum;
 	unsigned int dma_irqnum, dma_irqend;
@@ -353,8 +352,7 @@
 	}
 
 	/* Obtain the mmio areas for the device */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pdata->xgmac_regs = devm_ioremap_resource(dev, res);
+	pdata->xgmac_regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pdata->xgmac_regs)) {
 		dev_err(dev, "xgmac ioremap failed\n");
 		ret = PTR_ERR(pdata->xgmac_regs);
@@ -363,8 +361,7 @@
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	pdata->xpcs_regs = devm_ioremap_resource(dev, res);
+	pdata->xpcs_regs = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(pdata->xpcs_regs)) {
 		dev_err(dev, "xpcs ioremap failed\n");
 		ret = PTR_ERR(pdata->xpcs_regs);
@@ -373,8 +370,8 @@
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
 
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->rxtx_regs = devm_ioremap_resource(dev, res);
+	pdata->rxtx_regs = devm_platform_ioremap_resource(phy_pdev,
+							  phy_memnum++);
 	if (IS_ERR(pdata->rxtx_regs)) {
 		dev_err(dev, "rxtx ioremap failed\n");
 		ret = PTR_ERR(pdata->rxtx_regs);
@@ -383,8 +380,8 @@
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "rxtx_regs  = %p\n", pdata->rxtx_regs);
 
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->sir0_regs = devm_ioremap_resource(dev, res);
+	pdata->sir0_regs = devm_platform_ioremap_resource(phy_pdev,
+							  phy_memnum++);
 	if (IS_ERR(pdata->sir0_regs)) {
 		dev_err(dev, "sir0 ioremap failed\n");
 		ret = PTR_ERR(pdata->sir0_regs);
@@ -393,8 +390,8 @@
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "sir0_regs  = %p\n", pdata->sir0_regs);
 
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->sir1_regs = devm_ioremap_resource(dev, res);
+	pdata->sir1_regs = devm_platform_ioremap_resource(phy_pdev,
+							  phy_memnum++);
 	if (IS_ERR(pdata->sir1_regs)) {
 		dev_err(dev, "sir1 ioremap failed\n");
 		ret = PTR_ERR(pdata->sir1_regs);
@@ -467,10 +464,8 @@
 
 	/* Get the device interrupt */
 	ret = platform_get_irq(pdev, 0);
-	if (ret < 0) {
-		dev_err(dev, "platform_get_irq 0 failed\n");
+	if (ret < 0)
 		goto err_io;
-	}
 	pdata->dev_irq = ret;
 
 	/* Get the per channel DMA interrupts */
@@ -479,12 +474,8 @@
 
 		for (i = 0; (i < max) && (dma_irqnum < dma_irqend); i++) {
 			ret = platform_get_irq(pdata->platdev, dma_irqnum++);
-			if (ret < 0) {
-				netdev_err(pdata->netdev,
-					   "platform_get_irq %u failed\n",
-					   dma_irqnum - 1);
+			if (ret < 0)
 				goto err_io;
-			}
 
 			pdata->channel_irq[i] = ret;
 		}
@@ -496,10 +487,8 @@
 
 	/* Get the auto-negotiation interrupt */
 	ret = platform_get_irq(phy_pdev, phy_irqnum++);
-	if (ret < 0) {
-		dev_err(dev, "platform_get_irq phy 0 failed\n");
+	if (ret < 0)
 		goto err_io;
-	}
 	pdata->an_irq = ret;
 
 	/* Configure the netdev resource */

diff --git a/drivers/net/ethernet/apm/Kconfig b/drivers/net/ethernet/apm/Kconfig
index 59efe5b..a893ef0 100644
--- a/drivers/net/ethernet/apm/Kconfig
+++ b/drivers/net/ethernet/apm/Kconfig

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 source "drivers/net/ethernet/apm/xgene/Kconfig"
 source "drivers/net/ethernet/apm/xgene-v2/Kconfig"

diff --git a/drivers/net/ethernet/apm/Makefile b/drivers/net/ethernet/apm/Makefile
index 946b2a4..cc8af97 100644
--- a/drivers/net/ethernet/apm/Makefile
+++ b/drivers/net/ethernet/apm/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for APM X-GENE Ethernet driver.
 #

diff --git a/drivers/net/ethernet/apm/xgene-v2/Kconfig b/drivers/net/ethernet/apm/xgene-v2/Kconfig
index eedd3f3..2274af9 100644
--- a/drivers/net/ethernet/apm/xgene-v2/Kconfig
+++ b/drivers/net/ethernet/apm/xgene-v2/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_XGENE_V2
 	tristate "APM X-Gene SoC Ethernet-v2 Driver"
 	depends on ARCH_XGENE || COMPILE_TEST

diff --git a/drivers/net/ethernet/apm/xgene-v2/Makefile b/drivers/net/ethernet/apm/xgene-v2/Makefile
index f16a2b3..fdde3b6 100644
--- a/drivers/net/ethernet/apm/xgene-v2/Makefile
+++ b/drivers/net/ethernet/apm/xgene-v2/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for APM X-Gene Ethernet v2 driver
 #

diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.c b/drivers/net/ethernet/apm/xgene-v2/enet.c
index 5998da0..a8c6b37 100644
--- a/drivers/net/ethernet/apm/xgene-v2/enet.c
+++ b/drivers/net/ethernet/apm/xgene-v2/enet.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/drivers/net/ethernet/apm/xgene-v2/enet.h b/drivers/net/ethernet/apm/xgene-v2/enet.h
index 3fd36dc..15cbd0c 100644
--- a/drivers/net/ethernet/apm/xgene-v2/enet.h
+++ b/drivers/net/ethernet/apm/xgene-v2/enet.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_V2_ENET_H__

diff --git a/drivers/net/ethernet/apm/xgene-v2/ethtool.c b/drivers/net/ethernet/apm/xgene-v2/ethtool.c
index d31ad82..a58250c 100644
--- a/drivers/net/ethernet/apm/xgene-v2/ethtool.c
+++ b/drivers/net/ethernet/apm/xgene-v2/ethtool.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/drivers/net/ethernet/apm/xgene-v2/ethtool.h b/drivers/net/ethernet/apm/xgene-v2/ethtool.h
index 54b48d5..8263b4a 100644
--- a/drivers/net/ethernet/apm/xgene-v2/ethtool.h
+++ b/drivers/net/ethernet/apm/xgene-v2/ethtool.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_V2_ETHTOOL_H__

diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.c b/drivers/net/ethernet/apm/xgene-v2/mac.c
index ee431e3..2da979e 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.c
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.h b/drivers/net/ethernet/apm/xgene-v2/mac.h
index 3c83fa6..7392f60 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.h
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_V2_MAC_H__

diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 0f2ad50..02b4f3a 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"
@@ -66,10 +54,8 @@
 	}
 
 	ret = platform_get_irq(pdev, 0);
-	if (ret < 0) {
-		dev_err(dev, "Unable to get irq\n");
+	if (ret < 0)
 		return ret;
-	}
 	pdata->resources.irq = ret;
 
 	return 0;
@@ -206,8 +192,8 @@
 	}
 
 	/* Packet buffers should be 64B aligned */
-	pkt_buf = dma_zalloc_coherent(dev, XGENE_ENET_STD_MTU, &dma_addr,
-				      GFP_ATOMIC);
+	pkt_buf = dma_alloc_coherent(dev, XGENE_ENET_STD_MTU, &dma_addr,
+				     GFP_ATOMIC);
 	if (unlikely(!pkt_buf)) {
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
@@ -428,8 +414,8 @@
 	ring->ndev = ndev;
 
 	size = XGENE_ENET_DESC_SIZE * XGENE_ENET_NUM_DESC;
-	ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma_addr,
-					      GFP_KERNEL);
+	ring->desc_addr = dma_alloc_coherent(dev, size, &ring->dma_addr,
+					     GFP_KERNEL);
 	if (!ring->desc_addr)
 		goto err;
 

diff --git a/drivers/net/ethernet/apm/xgene-v2/main.h b/drivers/net/ethernet/apm/xgene-v2/main.h
index 969b258..d41439d 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.h
+++ b/drivers/net/ethernet/apm/xgene-v2/main.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_V2_MAIN_H__

diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c
index f5fe3bb..eba0683 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mdio.c
+++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"
@@ -109,6 +97,7 @@
 
 int xge_mdio_config(struct net_device *ndev)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 	struct xge_pdata *pdata = netdev_priv(ndev);
 	struct device *dev = &pdata->pdev->dev;
 	struct mii_bus *mdio_bus;
@@ -148,16 +137,17 @@
 		goto err;
 	}
 
-	phydev->supported &= ~(SUPPORTED_10baseT_Half |
-			       SUPPORTED_10baseT_Full |
-			       SUPPORTED_100baseT_Half |
-			       SUPPORTED_100baseT_Full |
-			       SUPPORTED_1000baseT_Half |
-			       SUPPORTED_AUI |
-			       SUPPORTED_MII |
-			       SUPPORTED_FIBRE |
-			       SUPPORTED_BNC);
-	phydev->advertising = phydev->supported;
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_AUI_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_BNC_BIT, mask);
+
+	linkmode_andnot(phydev->supported, phydev->supported, mask);
+	linkmode_copy(phydev->advertising, phydev->supported);
 	pdata->phy_speed = SPEED_UNKNOWN;
 
 	return 0;

diff --git a/drivers/net/ethernet/apm/xgene-v2/ring.c b/drivers/net/ethernet/apm/xgene-v2/ring.c
index 3881082..fbea4bc 100644
--- a/drivers/net/ethernet/apm/xgene-v2/ring.c
+++ b/drivers/net/ethernet/apm/xgene-v2/ring.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "main.h"

diff --git a/drivers/net/ethernet/apm/xgene-v2/ring.h b/drivers/net/ethernet/apm/xgene-v2/ring.h
index abc8c9a..2fd2555 100644
--- a/drivers/net/ethernet/apm/xgene-v2/ring.h
+++ b/drivers/net/ethernet/apm/xgene-v2/ring.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Applied Micro X-Gene SoC Ethernet v2 Driver
  *
  * Copyright (c) 2017, Applied Micro Circuits Corporation
  * Author(s): Iyappan Subramanian <isubramanian@apm.com>
  *	      Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_V2_RING_H__

diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
index e4e33c9..7bdfe78 100644
--- a/drivers/net/ethernet/apm/xgene/Kconfig
+++ b/drivers/net/ethernet/apm/xgene/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_XGENE
 	tristate "APM X-Gene SoC Ethernet Driver"
 	depends on ARCH_XGENE || COMPILE_TEST

diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile
index f46321f..6d13147 100644
--- a/drivers/net/ethernet/apm/xgene/Makefile
+++ b/drivers/net/ethernet/apm/xgene/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for APM X-Gene Ethernet Driver.
 #

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
index e1a51d8..de54643 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Classifier structures
  *
  * Copyright (c) 2016, Applied Micro Circuits Corporation
  * Authors: Khuong Dinh <kdinh@apm.com>
  *          Tanmay Inamdar <tinamdar@apm.com>
  *          Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "xgene_enet_main.h"

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
index 18fe8d5..bc05cbc 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Classifier structures
  *
  * Copyright (c) 2016, Applied Micro Circuits Corporation
  * Authors: Khuong Dinh <kdinh@apm.com>
  *          Tanmay Inamdar <tinamdar@apm.com>
  *          Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_CLE_H__

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index 4f50f11..246dec2 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/ethtool.h>
@@ -306,45 +294,25 @@
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	struct phy_device *phydev = ndev->phydev;
-	u32 oldadv, newadv;
 
 	if (phy_interface_mode_is_rgmii(pdata->phy_mode) ||
 	    pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
 		if (!phydev)
 			return -EINVAL;
 
-		if (!(phydev->supported & SUPPORTED_Pause) ||
-		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-		     pp->rx_pause != pp->tx_pause))
+		if (!phy_validate_pause(phydev, pp))
 			return -EINVAL;
 
 		pdata->pause_autoneg = pp->autoneg;
 		pdata->tx_pause = pp->tx_pause;
 		pdata->rx_pause = pp->rx_pause;
 
-		oldadv = phydev->advertising;
-		newadv = oldadv & ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+		phy_set_asym_pause(phydev, pp->rx_pause,  pp->tx_pause);
 
-		if (pp->rx_pause)
-			newadv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-
-		if (pp->tx_pause)
-			newadv ^= ADVERTISED_Asym_Pause;
-
-		if (oldadv ^ newadv) {
-			phydev->advertising = newadv;
-
-			if (phydev->autoneg)
-				return phy_start_aneg(phydev);
-
-			if (!pp->autoneg) {
-				pdata->mac_ops->flowctl_tx(pdata,
-							   pdata->tx_pause);
-				pdata->mac_ops->flowctl_rx(pdata,
-							   pdata->rx_pause);
-			}
+		if (!pp->autoneg) {
+			pdata->mac_ops->flowctl_tx(pdata, pdata->tx_pause);
+			pdata->mac_ops->flowctl_rx(pdata, pdata->rx_pause);
 		}
-
 	} else {
 		if (pp->autoneg)
 			return -EINVAL;

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 078a04d..5f65787 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Ravi Patel <rapatel@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "xgene_enet_main.h"
@@ -724,11 +712,11 @@
 		udelay(5);
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
-			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
-					     "_RST", NULL, NULL);
-		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
-					 "_INI")) {
+		acpi_status status;
+
+		status = acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					      "_RST", NULL, NULL);
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
 					     "_INI", NULL, NULL);
 		}
@@ -895,12 +883,10 @@
 	}
 
 	pdata->phy_speed = SPEED_UNKNOWN;
-	phy_dev->supported &= ~SUPPORTED_10baseT_Half &
-			      ~SUPPORTED_100baseT_Half &
-			      ~SUPPORTED_1000baseT_Half;
-	phy_dev->supported |= SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause;
-	phy_dev->advertising = phy_dev->supported;
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	phy_support_asym_pause(phy_dev);
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 5d3e18d..2f534f9 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Ravi Patel <rapatel@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_HW_H__

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 3b889ef..d861213 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c

@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Ravi Patel <rapatel@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/gpio.h>
@@ -29,9 +17,6 @@
 #define RES_RING_CSR	1
 #define RES_RING_CMD	2
 
-static const struct of_device_id xgene_enet_of_match[];
-static const struct acpi_device_id xgene_enet_acpi_match[];
-
 static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool)
 {
 	struct xgene_enet_raw_desc16 *raw_desc;
@@ -355,7 +340,8 @@
 				nr_frags = skb_shinfo(skb)->nr_frags;
 
 				for (i = 0; i < 2 && i < nr_frags; i++)
-					len += skb_shinfo(skb)->frags[i].size;
+					len += skb_frag_size(
+						&skb_shinfo(skb)->frags[i]);
 
 				/* HW requires header must reside in 3 buffer */
 				if (unlikely(hdr_len > len)) {
@@ -1631,7 +1617,6 @@
 static int xgene_enet_get_irqs(struct xgene_enet_pdata *pdata)
 {
 	struct platform_device *pdev = pdata->pdev;
-	struct device *dev = &pdev->dev;
 	int i, ret, max_irqs;
 
 	if (phy_interface_mode_is_rgmii(pdata->phy_mode))
@@ -1651,9 +1636,7 @@
 				pdata->cq_cnt = max_irqs / 2;
 				break;
 			}
-			dev_err(dev, "Unable to get ENET IRQ\n");
-			ret = ret ? : -ENXIO;
-			return ret;
+			return ret ? : -ENXIO;
 		}
 		pdata->irqs[i] = ret;
 	}

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 9857685..18f4923 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Ravi Patel <rapatel@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_MAIN_H__

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
index 4ff4055..02892ef 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2015, Applied Micro Circuits Corporation
  * Author: Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "xgene_enet_main.h"

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h
index 8b235db..4e2edee 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h

@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2015, Applied Micro Circuits Corporation
  * Author: Iyappan Subramanian <isubramanian@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_RING2_H__

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index b1a83fd..f482ced 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "xgene_enet_main.h"
@@ -472,12 +460,14 @@
 		}
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_RST"))
-			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
-					     "_RST", NULL, NULL);
-		else if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_INI"))
+		acpi_status status;
+
+		status = acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
+					      "_RST", NULL, NULL);
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
 					     "_INI", NULL, NULL);
+		}
 #endif
 	}
 

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
index 3d0ba37..3bba0ce 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_SGMAC_H__

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index b7d75d0..304b5d4 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/of_gpio.h>
@@ -405,11 +393,11 @@
 		udelay(5);
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
-			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
-					     "_RST", NULL, NULL);
-		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
-					   "_INI")) {
+		acpi_status status;
+
+		status = acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+					      "_RST", NULL, NULL);
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
 					     "_INI", NULL, NULL);
 		}

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index a3b4551..98622dc 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Applied Micro X-Gene SoC Ethernet Driver
  *
  * Copyright (c) 2014, Applied Micro Circuits Corporation
  * Authors: Iyappan Subramanian <isubramanian@apm.com>
  *	    Keyur Chudgar <kchudgar@apm.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __XGENE_ENET_XGMAC_H__

diff --git a/drivers/net/ethernet/apple/Kconfig b/drivers/net/ethernet/apple/Kconfig
index 3107129..f78b9c8 100644
--- a/drivers/net/ethernet/apple/Kconfig
+++ b/drivers/net/ethernet/apple/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Apple device configuration
 #
@@ -10,8 +11,8 @@
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about IBM devices. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about Apple devices. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if NET_VENDOR_APPLE

diff --git a/drivers/net/ethernet/apple/Makefile b/drivers/net/ethernet/apple/Makefile
index 86eaa17..3224570 100644
--- a/drivers/net/ethernet/apple/Makefile
+++ b/drivers/net/ethernet/apple/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Apple network device drivers.
 #

diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 6a8e256..a58185b 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Network device driver for the BMAC ethernet controller on
  * Apple Powermacs.  Assumes it's under a DBDMA controller.
@@ -777,7 +778,7 @@
 
 		if (bp->tx_bufs[bp->tx_empty]) {
 			++dev->stats.tx_packets;
-			dev_kfree_skb_irq(bp->tx_bufs[bp->tx_empty]);
+			dev_consume_skb_irq(bp->tx_bufs[bp->tx_empty]);
 		}
 		bp->tx_bufs[bp->tx_empty] = NULL;
 		bp->tx_fullup = 0;
@@ -814,8 +815,8 @@
 static unsigned int
 crc416(unsigned int curval, unsigned short nxtval)
 {
-	register unsigned int counter, cur = curval, next = nxtval;
-	register int high_crc_set, low_data_set;
+	unsigned int counter, cur = curval, next = nxtval;
+	int high_crc_set, low_data_set;
 
 	/* Swap bytes */
 	next = ((next & 0x00FF) << 8) | (next >> 8);

diff --git a/drivers/net/ethernet/apple/bmac.h b/drivers/net/ethernet/apple/bmac.h
index a1d19d8..f8826c4 100644
--- a/drivers/net/ethernet/apple/bmac.h
+++ b/drivers/net/ethernet/apple/bmac.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * mace.h - definitions for the registers in the "Big Mac"
  *  Ethernet controller found in PowerMac G3 models.
  *
  * Copyright (C) 1998 Randy Gobbel.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 /* The "Big MAC" appears to have some parts in common with the Sun "Happy Meal"

diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 68b9ee4..b8ba2ab 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Network device driver for the MACE ethernet controller on
  * Apple Powermacs.  Assumes it's under a DBDMA controller.
@@ -764,7 +765,7 @@
 	    dev->stats.tx_bytes += mp->tx_bufs[i]->len;
 	    ++dev->stats.tx_packets;
 	}
-	dev_kfree_skb_irq(mp->tx_bufs[i]);
+	dev_consume_skb_irq(mp->tx_bufs[i]);
 	--mp->tx_active;
 	if (++i >= N_TX_RING)
 	    i = 0;

diff --git a/drivers/net/ethernet/apple/mace.h b/drivers/net/ethernet/apple/mace.h
index 30b7ec0..697f71c 100644
--- a/drivers/net/ethernet/apple/mace.h
+++ b/drivers/net/ethernet/apple/mace.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * mace.h - definitions for the registers in the Am79C940 MACE
  * (Medium Access Control for Ethernet) controller.
  *
  * Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define REG(x)	volatile unsigned char x; char x ## _pad[15]

diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 376f2c2..8d03578 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *	Driver for the Macintosh 68K onboard MACE controller with PSC
  *	driven DMA. The MACE driver code is derived from mace.c. The
  *	Mac68k theory of operation is courtesy of the MacBSD wizards.
  *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
  *	Copyright (C) 1996 Paul Mackerras.
  *	Copyright (C) 1998 Alan Cox <alan@lxorguk.ukuu.org.uk>
  *

diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig
index 7d623e9..350a48e 100644
--- a/drivers/net/ethernet/aquantia/Kconfig
+++ b/drivers/net/ethernet/aquantia/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # aQuantia device configuration
 #
@@ -17,7 +18,8 @@
 
 config AQTION
 	tristate "aQuantia AQtion(tm) Support"
-	depends on PCI && X86_64
+	depends on PCI
+	depends on X86_64 || ARM64 || COMPILE_TEST
 	---help---
 	  This enables the support for the aQuantia AQtion(tm) Ethernet card.
 

diff --git a/drivers/net/ethernet/aquantia/Makefile b/drivers/net/ethernet/aquantia/Makefile
index 4f4897b..c4e7d01 100644
--- a/drivers/net/ethernet/aquantia/Makefile
+++ b/drivers/net/ethernet/aquantia/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the aQuantia device drivers.
 #

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 686f6d8..131cab8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile

@@ -1,23 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ################################################################################
 #
 # aQuantia Ethernet Controller AQtion Linux Driver
 # Copyright(c) 2014-2017 aQuantia Corporation.
 #
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
 # Contact Information: <rdc-drv@aquantia.com>
 # aQuantia Corporation, 105 E. Tasman Dr. San Jose, CA 95134, USA
 #
@@ -36,6 +22,8 @@
 	aq_ring.o \
 	aq_hw_utils.o \
 	aq_ethtool.o \
+	aq_drvinfo.o \
+	aq_filters.o \
 	hw_atl/hw_atl_a0.o \
 	hw_atl/hw_atl_b0.o \
 	hw_atl/hw_atl_utils.o \

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 91eb891..02f1b70 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_cfg.h: Definition of configuration parameters and constants. */
@@ -12,11 +9,13 @@
 #ifndef AQ_CFG_H
 #define AQ_CFG_H
 
-#define AQ_CFG_VECS_DEF   4U
+#include <generated/utsrelease.h>
+
+#define AQ_CFG_VECS_DEF   8U
 #define AQ_CFG_TCS_DEF    1U
 
 #define AQ_CFG_TXDS_DEF    4096U
-#define AQ_CFG_RXDS_DEF    1024U
+#define AQ_CFG_RXDS_DEF    2048U
 
 #define AQ_CFG_IS_POLLING_DEF 0U
 
@@ -34,16 +33,22 @@
 #define AQ_CFG_TCS_MAX    8U
 
 #define AQ_CFG_TX_FRAME_MAX  (16U * 1024U)
-#define AQ_CFG_RX_FRAME_MAX  (4U * 1024U)
+#define AQ_CFG_RX_FRAME_MAX  (2U * 1024U)
 
 #define AQ_CFG_TX_CLEAN_BUDGET 256U
 
+#define AQ_CFG_RX_REFILL_THRES 32U
+
+#define AQ_CFG_RX_HDR_SIZE 256U
+
+#define AQ_CFG_RX_PAGEORDER 0U
+
 /* LRO */
 #define AQ_CFG_IS_LRO_DEF           1U
 
 /* RSS */
-#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX  128U
-#define AQ_CFG_RSS_HASHKEY_SIZE           320U
+#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX  64U
+#define AQ_CFG_RSS_HASHKEY_SIZE           40U
 
 #define AQ_CFG_IS_RSS_DEF           1U
 #define AQ_CFG_NUM_RSS_QUEUES_DEF   AQ_CFG_VECS_DEF
@@ -83,10 +88,7 @@
 #define AQ_CFG_DRV_AUTHOR      "aQuantia"
 #define AQ_CFG_DRV_DESC        "aQuantia Corporation(R) Network Driver"
 #define AQ_CFG_DRV_NAME        "atlantic"
-#define AQ_CFG_DRV_VERSION	__stringify(NIC_MAJOR_DRIVER_VERSION)"."\
-				__stringify(NIC_MINOR_DRIVER_VERSION)"."\
-				__stringify(NIC_BUILD_DRIVER_VERSION)"."\
-				__stringify(NIC_REVISION_DRIVER_VERSION) \
+#define AQ_CFG_DRV_VERSION	UTS_RELEASE \
 				AQ_CFG_DRV_VERSION_SUFFIX
 
 #endif /* AQ_CFG_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
index d52b088..42ea8d8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_common.h: Basic includes for all files in project. */
@@ -14,7 +11,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/pci.h>
-
+#include <linux/if_vlan.h>
 #include "ver.h"
 #include "aq_cfg.h"
 #include "aq_utils.h"
@@ -41,9 +38,6 @@
 #define AQ_DEVICE_ID_AQC111S	0x91B1
 #define AQ_DEVICE_ID_AQC112S	0x92B1
 
-#define AQ_DEVICE_ID_AQC111E	0x51B1
-#define AQ_DEVICE_ID_AQC112E	0x52B1
-
 #define HW_ATL_NIC_NAME "aQuantia AQtion 10Gbit Network Adapter"
 
 #define AQ_HWREV_ANY	0
@@ -57,4 +51,9 @@
 #define AQ_NIC_RATE_1G         BIT(4)
 #define AQ_NIC_RATE_100M       BIT(5)
 
+#define AQ_NIC_RATE_EEE_10G	BIT(6)
+#define AQ_NIC_RATE_EEE_5G	BIT(7)
+#define AQ_NIC_RATE_EEE_2GS	BIT(8)
+#define AQ_NIC_RATE_EEE_1G	BIT(9)
+
 #endif /* AQ_COMMON_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
new file mode 100644
index 0000000..6da6509
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c

@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2014-2019 aQuantia Corporation. */
+
+/* File aq_drvinfo.c: Definition of common code for firmware info in sys.*/
+
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/hwmon.h>
+#include <linux/uaccess.h>
+
+#include "aq_drvinfo.h"
+
+#if IS_REACHABLE(CONFIG_HWMON)
+static int aq_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long *value)
+{
+	struct aq_nic_s *aq_nic = dev_get_drvdata(dev);
+	int temp;
+	int err;
+
+	if (!aq_nic)
+		return -EIO;
+
+	if (type != hwmon_temp)
+		return -EOPNOTSUPP;
+
+	if (!aq_nic->aq_fw_ops->get_phy_temp)
+		return -EOPNOTSUPP;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		err = aq_nic->aq_fw_ops->get_phy_temp(aq_nic->aq_hw, &temp);
+		*value = temp;
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int aq_hwmon_read_string(struct device *dev,
+				enum hwmon_sensor_types type,
+				u32 attr, int channel, const char **str)
+{
+	struct aq_nic_s *aq_nic = dev_get_drvdata(dev);
+
+	if (!aq_nic)
+		return -EIO;
+
+	if (type != hwmon_temp)
+		return -EOPNOTSUPP;
+
+	if (!aq_nic->aq_fw_ops->get_phy_temp)
+		return -EOPNOTSUPP;
+
+	switch (attr) {
+	case hwmon_temp_label:
+		*str = "PHY Temperature";
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t aq_hwmon_is_visible(const void *data,
+				   enum hwmon_sensor_types type,
+				   u32 attr, int channel)
+{
+	if (type != hwmon_temp)
+		return 0;
+
+	switch (attr) {
+	case hwmon_temp_input:
+	case hwmon_temp_label:
+		return 0444;
+	default:
+		return 0;
+	}
+}
+
+static const struct hwmon_ops aq_hwmon_ops = {
+	.is_visible = aq_hwmon_is_visible,
+	.read = aq_hwmon_read,
+	.read_string = aq_hwmon_read_string,
+};
+
+static u32 aq_hwmon_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_LABEL,
+	0,
+};
+
+static const struct hwmon_channel_info aq_hwmon_temp = {
+	.type = hwmon_temp,
+	.config = aq_hwmon_temp_config,
+};
+
+static const struct hwmon_channel_info *aq_hwmon_info[] = {
+	&aq_hwmon_temp,
+	NULL,
+};
+
+static const struct hwmon_chip_info aq_hwmon_chip_info = {
+	.ops = &aq_hwmon_ops,
+	.info = aq_hwmon_info,
+};
+
+int aq_drvinfo_init(struct net_device *ndev)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct device *dev = &aq_nic->pdev->dev;
+	struct device *hwmon_dev;
+	int err = 0;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev,
+							 ndev->name,
+							 aq_nic,
+							 &aq_hwmon_chip_info,
+							 NULL);
+
+	if (IS_ERR(hwmon_dev))
+		err = PTR_ERR(hwmon_dev);
+
+	return err;
+}
+
+#else
+int aq_drvinfo_init(struct net_device *ndev) { return 0; }
+#endif

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
new file mode 100644
index 0000000..23a0487
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h

@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File aq_drvinfo.h: Declaration of common code for firmware info in sys.*/
+
+#ifndef AQ_DRVINFO_H
+#define AQ_DRVINFO_H
+
+#include "aq_nic.h"
+#include "aq_hw.h"
+#include "hw_atl/hw_atl_utils.h"
+
+int aq_drvinfo_init(struct net_device *ndev);
+
+#endif /* AQ_DRVINFO_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 08c9fa6..24df132 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_ethtool.c: Definition of ethertool related functions. */
@@ -12,6 +9,7 @@
 #include "aq_ethtool.h"
 #include "aq_nic.h"
 #include "aq_vec.h"
+#include "aq_filters.h"
 
 static void aq_ethtool_get_regs(struct net_device *ndev,
 				struct ethtool_regs *regs, void *p)
@@ -98,8 +96,8 @@
 	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
 
 	memset(data, 0, (ARRAY_SIZE(aq_ethtool_stat_names) +
-				ARRAY_SIZE(aq_ethtool_queue_stat_names) *
-				cfg->vecs) * sizeof(u64));
+			 ARRAY_SIZE(aq_ethtool_queue_stat_names) *
+			 cfg->vecs) * sizeof(u64));
 	aq_nic_get_stats(aq_nic, data);
 }
 
@@ -137,7 +135,7 @@
 	u8 *p = data;
 
 	if (stringset == ETH_SS_STATS) {
-		memcpy(p, *aq_ethtool_stat_names,
+		memcpy(p, aq_ethtool_stat_names,
 		       sizeof(aq_ethtool_stat_names));
 		p = p + sizeof(aq_ethtool_stat_names);
 		for (i = 0; i < cfg->vecs; i++) {
@@ -201,6 +199,41 @@
 	return 0;
 }
 
+static int aq_ethtool_set_rss(struct net_device *netdev, const u32 *indir,
+			      const u8 *key, const u8 hfunc)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(netdev);
+	struct aq_nic_cfg_s *cfg;
+	unsigned int i = 0U;
+	u32 rss_entries;
+	int err = 0;
+
+	cfg = aq_nic_get_cfg(aq_nic);
+	rss_entries = cfg->aq_rss.indirection_table_size;
+
+	/* We do not allow change in unsupported parameters */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+	/* Fill out the redirection table */
+	if (indir)
+		for (i = 0; i < rss_entries; i++)
+			cfg->aq_rss.indirection_table[i] = indir[i];
+
+	/* Fill out the rss hash key */
+	if (key) {
+		memcpy(cfg->aq_rss.hash_secret_key, key,
+		       sizeof(cfg->aq_rss.hash_secret_key));
+		err = aq_nic->aq_hw_ops->hw_rss_hash_set(aq_nic->aq_hw,
+			&cfg->aq_rss);
+		if (err)
+			return err;
+	}
+
+	err = aq_nic->aq_hw_ops->hw_rss_set(aq_nic->aq_hw, &cfg->aq_rss);
+
+	return err;
+}
+
 static int aq_ethtool_get_rxnfc(struct net_device *ndev,
 				struct ethtool_rxnfc *cmd,
 				u32 *rule_locs)
@@ -213,7 +246,36 @@
 	case ETHTOOL_GRXRINGS:
 		cmd->data = cfg->vecs;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = aq_get_rxnfc_count_all_rules(aq_nic);
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		err = aq_get_rxnfc_rule(aq_nic, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		err = aq_get_rxnfc_all_rules(aq_nic, cmd, rule_locs);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
 
+	return err;
+}
+
+static int aq_ethtool_set_rxnfc(struct net_device *ndev,
+				struct ethtool_rxnfc *cmd)
+{
+	int err = 0;
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		err = aq_add_rxnfc_rule(aq_nic, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = aq_del_rxnfc_rule(aq_nic, cmd);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -285,30 +347,147 @@
 	return aq_nic_update_interrupt_moderation_settings(aq_nic);
 }
 
+static void aq_ethtool_get_wol(struct net_device *ndev,
+			       struct ethtool_wolinfo *wol)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+
+	wol->supported = WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	if (cfg->wol)
+		wol->wolopts |= WAKE_MAGIC;
+}
+
+static int aq_ethtool_set_wol(struct net_device *ndev,
+			      struct ethtool_wolinfo *wol)
+{
+	struct pci_dev *pdev = to_pci_dev(ndev->dev.parent);
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	int err = 0;
+
+	if (wol->wolopts & WAKE_MAGIC)
+		cfg->wol |= AQ_NIC_WOL_ENABLED;
+	else
+		cfg->wol &= ~AQ_NIC_WOL_ENABLED;
+	err = device_set_wakeup_enable(&pdev->dev, wol->wolopts);
+
+	return err;
+}
+
+static enum hw_atl_fw2x_rate eee_mask_to_ethtool_mask(u32 speed)
+{
+	u32 rate = 0;
+
+	if (speed & AQ_NIC_RATE_EEE_10G)
+		rate |= SUPPORTED_10000baseT_Full;
+
+	if (speed & AQ_NIC_RATE_EEE_2GS)
+		rate |= SUPPORTED_2500baseX_Full;
+
+	if (speed & AQ_NIC_RATE_EEE_1G)
+		rate |= SUPPORTED_1000baseT_Full;
+
+	return rate;
+}
+
+static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	u32 rate, supported_rates;
+	int err = 0;
+
+	if (!aq_nic->aq_fw_ops->get_eee_rate)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&aq_nic->fwreq_mutex);
+	err = aq_nic->aq_fw_ops->get_eee_rate(aq_nic->aq_hw, &rate,
+					      &supported_rates);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+	if (err < 0)
+		return err;
+
+	eee->supported = eee_mask_to_ethtool_mask(supported_rates);
+
+	if (aq_nic->aq_nic_cfg.eee_speeds)
+		eee->advertised = eee->supported;
+
+	eee->lp_advertised = eee_mask_to_ethtool_mask(rate);
+
+	eee->eee_enabled = !!eee->advertised;
+
+	eee->tx_lpi_enabled = eee->eee_enabled;
+	if (eee->advertised & eee->lp_advertised)
+		eee->eee_active = true;
+
+	return 0;
+}
+
+static int aq_ethtool_set_eee(struct net_device *ndev, struct ethtool_eee *eee)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	u32 rate, supported_rates;
+	struct aq_nic_cfg_s *cfg;
+	int err = 0;
+
+	cfg = aq_nic_get_cfg(aq_nic);
+
+	if (unlikely(!aq_nic->aq_fw_ops->get_eee_rate ||
+		     !aq_nic->aq_fw_ops->set_eee_rate))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&aq_nic->fwreq_mutex);
+	err = aq_nic->aq_fw_ops->get_eee_rate(aq_nic->aq_hw, &rate,
+					      &supported_rates);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+	if (err < 0)
+		return err;
+
+	if (eee->eee_enabled) {
+		rate = supported_rates;
+		cfg->eee_speeds = rate;
+	} else {
+		rate = 0;
+		cfg->eee_speeds = 0;
+	}
+
+	mutex_lock(&aq_nic->fwreq_mutex);
+	err = aq_nic->aq_fw_ops->set_eee_rate(aq_nic->aq_hw, rate);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	return err;
+}
+
 static int aq_ethtool_nway_reset(struct net_device *ndev)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	int err = 0;
 
 	if (unlikely(!aq_nic->aq_fw_ops->renegotiate))
 		return -EOPNOTSUPP;
 
-	if (netif_running(ndev))
-		return aq_nic->aq_fw_ops->renegotiate(aq_nic->aq_hw);
+	if (netif_running(ndev)) {
+		mutex_lock(&aq_nic->fwreq_mutex);
+		err = aq_nic->aq_fw_ops->renegotiate(aq_nic->aq_hw);
+		mutex_unlock(&aq_nic->fwreq_mutex);
+	}
 
-	return 0;
+	return err;
 }
 
 static void aq_ethtool_get_pauseparam(struct net_device *ndev,
 				      struct ethtool_pauseparam *pause)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	u32 fc = aq_nic->aq_nic_cfg.flow_control;
 
 	pause->autoneg = 0;
 
-	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
-		pause->rx_pause = 1;
-	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
-		pause->tx_pause = 1;
+	pause->rx_pause = !!(fc & AQ_NIC_FC_RX);
+	pause->tx_pause = !!(fc & AQ_NIC_FC_TX);
+
 }
 
 static int aq_ethtool_set_pauseparam(struct net_device *ndev,
@@ -333,7 +512,9 @@
 	else
 		aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_TX;
 
+	mutex_lock(&aq_nic->fwreq_mutex);
 	err = aq_nic->aq_fw_ops->set_flow_control(aq_nic->aq_hw);
+	mutex_unlock(&aq_nic->fwreq_mutex);
 
 	return err;
 }
@@ -390,7 +571,7 @@
 		}
 	}
 	if (ndev_running)
-		err = dev_open(ndev);
+		err = dev_open(ndev, NULL);
 
 err_exit:
 	return err;
@@ -403,14 +584,20 @@
 	.get_drvinfo         = aq_ethtool_get_drvinfo,
 	.get_strings         = aq_ethtool_get_strings,
 	.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
+	.get_wol             = aq_ethtool_get_wol,
+	.set_wol             = aq_ethtool_set_wol,
 	.nway_reset          = aq_ethtool_nway_reset,
 	.get_ringparam       = aq_get_ringparam,
 	.set_ringparam       = aq_set_ringparam,
+	.get_eee             = aq_ethtool_get_eee,
+	.set_eee             = aq_ethtool_set_eee,
 	.get_pauseparam      = aq_ethtool_get_pauseparam,
 	.set_pauseparam      = aq_ethtool_set_pauseparam,
 	.get_rxfh_key_size   = aq_ethtool_get_rss_key_size,
 	.get_rxfh            = aq_ethtool_get_rss,
+	.set_rxfh            = aq_ethtool_set_rss,
 	.get_rxnfc           = aq_ethtool_get_rxnfc,
+	.set_rxnfc           = aq_ethtool_set_rxnfc,
 	.get_sset_count      = aq_ethtool_get_sset_count,
 	.get_ethtool_stats   = aq_ethtool_stats,
 	.get_link_ksettings  = aq_ethtool_get_link_ksettings,

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
index 21c126e..632b553 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_ethtool.h: Declaration of ethertool related functions. */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
new file mode 100644
index 0000000..aee827f
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c

@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File aq_filters.c: RX filters related functions. */
+
+#include "aq_filters.h"
+
+static bool __must_check
+aq_rule_is_approve(struct ethtool_rx_flow_spec *fsp)
+{
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return false;
+
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case ETHER_FLOW:
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		return true;
+	case IP_USER_FLOW:
+		switch (fsp->h_u.usr_ip4_spec.proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_SCTP:
+		case IPPROTO_IP:
+			return true;
+		default:
+			return false;
+			}
+	case IPV6_USER_FLOW:
+		switch (fsp->h_u.usr_ip6_spec.l4_proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_SCTP:
+		case IPPROTO_IP:
+			return true;
+		default:
+			return false;
+			}
+	default:
+		return false;
+	}
+
+	return false;
+}
+
+static bool __must_check
+aq_match_filter(struct ethtool_rx_flow_spec *fsp1,
+		struct ethtool_rx_flow_spec *fsp2)
+{
+	if (fsp1->flow_type != fsp2->flow_type ||
+	    memcmp(&fsp1->h_u, &fsp2->h_u, sizeof(fsp2->h_u)) ||
+	    memcmp(&fsp1->h_ext, &fsp2->h_ext, sizeof(fsp2->h_ext)) ||
+	    memcmp(&fsp1->m_u, &fsp2->m_u, sizeof(fsp2->m_u)) ||
+	    memcmp(&fsp1->m_ext, &fsp2->m_ext, sizeof(fsp2->m_ext)))
+		return false;
+
+	return true;
+}
+
+static bool __must_check
+aq_rule_already_exists(struct aq_nic_s *aq_nic,
+		       struct ethtool_rx_flow_spec *fsp)
+{
+	struct aq_rx_filter *rule;
+	struct hlist_node *aq_node2;
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		if (rule->aq_fsp.location == fsp->location)
+			continue;
+		if (aq_match_filter(&rule->aq_fsp, fsp)) {
+			netdev_err(aq_nic->ndev,
+				   "ethtool: This filter is already set\n");
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static int aq_check_approve_fl3l4(struct aq_nic_s *aq_nic,
+				  struct aq_hw_rx_fltrs_s *rx_fltrs,
+				  struct ethtool_rx_flow_spec *fsp)
+{
+	if (fsp->location < AQ_RX_FIRST_LOC_FL3L4 ||
+	    fsp->location > AQ_RX_LAST_LOC_FL3L4) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: location must be in range [%d, %d]",
+			   AQ_RX_FIRST_LOC_FL3L4,
+			   AQ_RX_LAST_LOC_FL3L4);
+		return -EINVAL;
+	}
+	if (rx_fltrs->fl3l4.is_ipv6 && rx_fltrs->fl3l4.active_ipv4) {
+		rx_fltrs->fl3l4.is_ipv6 = false;
+		netdev_err(aq_nic->ndev,
+			   "ethtool: mixing ipv4 and ipv6 is not allowed");
+		return -EINVAL;
+	} else if (!rx_fltrs->fl3l4.is_ipv6 && rx_fltrs->fl3l4.active_ipv6) {
+		rx_fltrs->fl3l4.is_ipv6 = true;
+		netdev_err(aq_nic->ndev,
+			   "ethtool: mixing ipv4 and ipv6 is not allowed");
+		return -EINVAL;
+	} else if (rx_fltrs->fl3l4.is_ipv6		      &&
+		   fsp->location != AQ_RX_FIRST_LOC_FL3L4 + 4 &&
+		   fsp->location != AQ_RX_FIRST_LOC_FL3L4) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: The specified location for ipv6 must be %d or %d",
+			   AQ_RX_FIRST_LOC_FL3L4, AQ_RX_FIRST_LOC_FL3L4 + 4);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __must_check
+aq_check_approve_fl2(struct aq_nic_s *aq_nic,
+		     struct aq_hw_rx_fltrs_s *rx_fltrs,
+		     struct ethtool_rx_flow_spec *fsp)
+{
+	if (fsp->location < AQ_RX_FIRST_LOC_FETHERT ||
+	    fsp->location > AQ_RX_LAST_LOC_FETHERT) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: location must be in range [%d, %d]",
+			   AQ_RX_FIRST_LOC_FETHERT,
+			   AQ_RX_LAST_LOC_FETHERT);
+		return -EINVAL;
+	}
+
+	if (be16_to_cpu(fsp->m_ext.vlan_tci) == VLAN_PRIO_MASK &&
+	    fsp->m_u.ether_spec.h_proto == 0U) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: proto (ether_type) parameter must be specified");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __must_check
+aq_check_approve_fvlan(struct aq_nic_s *aq_nic,
+		       struct aq_hw_rx_fltrs_s *rx_fltrs,
+		       struct ethtool_rx_flow_spec *fsp)
+{
+	if (fsp->location < AQ_RX_FIRST_LOC_FVLANID ||
+	    fsp->location > AQ_RX_LAST_LOC_FVLANID) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: location must be in range [%d, %d]",
+			   AQ_RX_FIRST_LOC_FVLANID,
+			   AQ_RX_LAST_LOC_FVLANID);
+		return -EINVAL;
+	}
+
+	if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci),
+		       aq_nic->active_vlans))) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: unknown vlan-id specified");
+		return -EINVAL;
+	}
+
+	if (fsp->ring_cookie > aq_nic->aq_nic_cfg.num_rss_queues) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: queue number must be in range [0, %d]",
+			   aq_nic->aq_nic_cfg.num_rss_queues - 1);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __must_check
+aq_check_filter(struct aq_nic_s *aq_nic,
+		struct ethtool_rx_flow_spec *fsp)
+{
+	int err = 0;
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+
+	if (fsp->flow_type & FLOW_EXT) {
+		if (be16_to_cpu(fsp->m_ext.vlan_tci) == VLAN_VID_MASK) {
+			err = aq_check_approve_fvlan(aq_nic, rx_fltrs, fsp);
+		} else if (be16_to_cpu(fsp->m_ext.vlan_tci) == VLAN_PRIO_MASK) {
+			err = aq_check_approve_fl2(aq_nic, rx_fltrs, fsp);
+		} else {
+			netdev_err(aq_nic->ndev,
+				   "ethtool: invalid vlan mask 0x%x specified",
+				   be16_to_cpu(fsp->m_ext.vlan_tci));
+			err = -EINVAL;
+		}
+	} else {
+		switch (fsp->flow_type & ~FLOW_EXT) {
+		case ETHER_FLOW:
+			err = aq_check_approve_fl2(aq_nic, rx_fltrs, fsp);
+			break;
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+		case IPV4_FLOW:
+		case IP_USER_FLOW:
+			rx_fltrs->fl3l4.is_ipv6 = false;
+			err = aq_check_approve_fl3l4(aq_nic, rx_fltrs, fsp);
+			break;
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+		case IPV6_FLOW:
+		case IPV6_USER_FLOW:
+			rx_fltrs->fl3l4.is_ipv6 = true;
+			err = aq_check_approve_fl3l4(aq_nic, rx_fltrs, fsp);
+			break;
+		default:
+			netdev_err(aq_nic->ndev,
+				   "ethtool: unknown flow-type specified");
+			err = -EINVAL;
+		}
+	}
+
+	return err;
+}
+
+static bool __must_check
+aq_rule_is_not_support(struct aq_nic_s *aq_nic,
+		       struct ethtool_rx_flow_spec *fsp)
+{
+	bool rule_is_not_support = false;
+
+	if (!(aq_nic->ndev->features & NETIF_F_NTUPLE)) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: Please, to enable the RX flow control:\n"
+			   "ethtool -K %s ntuple on\n", aq_nic->ndev->name);
+		rule_is_not_support = true;
+	} else if (!aq_rule_is_approve(fsp)) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: The specified flow type is not supported\n");
+		rule_is_not_support = true;
+	} else if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW &&
+		   (fsp->h_u.tcp_ip4_spec.tos ||
+		    fsp->h_u.tcp_ip6_spec.tclass)) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: The specified tos tclass are not supported\n");
+		rule_is_not_support = true;
+	} else if (fsp->flow_type & FLOW_MAC_EXT) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: MAC_EXT is not supported");
+		rule_is_not_support = true;
+	}
+
+	return rule_is_not_support;
+}
+
+static bool __must_check
+aq_rule_is_not_correct(struct aq_nic_s *aq_nic,
+		       struct ethtool_rx_flow_spec *fsp)
+{
+	bool rule_is_not_correct = false;
+
+	if (!aq_nic) {
+		rule_is_not_correct = true;
+	} else if (fsp->location > AQ_RX_MAX_RXNFC_LOC) {
+		netdev_err(aq_nic->ndev,
+			   "ethtool: The specified number %u rule is invalid\n",
+			   fsp->location);
+		rule_is_not_correct = true;
+	} else if (aq_check_filter(aq_nic, fsp)) {
+		rule_is_not_correct = true;
+	} else if (fsp->ring_cookie != RX_CLS_FLOW_DISC) {
+		if (fsp->ring_cookie >= aq_nic->aq_nic_cfg.num_rss_queues) {
+			netdev_err(aq_nic->ndev,
+				   "ethtool: The specified action is invalid.\n"
+				   "Maximum allowable value action is %u.\n",
+				   aq_nic->aq_nic_cfg.num_rss_queues - 1);
+			rule_is_not_correct = true;
+		}
+	}
+
+	return rule_is_not_correct;
+}
+
+static int __must_check
+aq_check_rule(struct aq_nic_s *aq_nic,
+	      struct ethtool_rx_flow_spec *fsp)
+{
+	int err = 0;
+
+	if (aq_rule_is_not_correct(aq_nic, fsp))
+		err = -EINVAL;
+	else if (aq_rule_is_not_support(aq_nic, fsp))
+		err = -EOPNOTSUPP;
+	else if (aq_rule_already_exists(aq_nic, fsp))
+		err = -EEXIST;
+
+	return err;
+}
+
+static void aq_set_data_fl2(struct aq_nic_s *aq_nic,
+			    struct aq_rx_filter *aq_rx_fltr,
+			    struct aq_rx_filter_l2 *data, bool add)
+{
+	const struct ethtool_rx_flow_spec *fsp = &aq_rx_fltr->aq_fsp;
+
+	memset(data, 0, sizeof(*data));
+
+	data->location = fsp->location - AQ_RX_FIRST_LOC_FETHERT;
+
+	if (fsp->ring_cookie != RX_CLS_FLOW_DISC)
+		data->queue = fsp->ring_cookie;
+	else
+		data->queue = -1;
+
+	data->ethertype = be16_to_cpu(fsp->h_u.ether_spec.h_proto);
+	data->user_priority_en = be16_to_cpu(fsp->m_ext.vlan_tci)
+				 == VLAN_PRIO_MASK;
+	data->user_priority = (be16_to_cpu(fsp->h_ext.vlan_tci)
+			       & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+static int aq_add_del_fether(struct aq_nic_s *aq_nic,
+			     struct aq_rx_filter *aq_rx_fltr, bool add)
+{
+	struct aq_rx_filter_l2 data;
+	struct aq_hw_s *aq_hw = aq_nic->aq_hw;
+	const struct aq_hw_ops *aq_hw_ops = aq_nic->aq_hw_ops;
+
+	aq_set_data_fl2(aq_nic, aq_rx_fltr, &data, add);
+
+	if (unlikely(!aq_hw_ops->hw_filter_l2_set))
+		return -EOPNOTSUPP;
+	if (unlikely(!aq_hw_ops->hw_filter_l2_clear))
+		return -EOPNOTSUPP;
+
+	if (add)
+		return aq_hw_ops->hw_filter_l2_set(aq_hw, &data);
+	else
+		return aq_hw_ops->hw_filter_l2_clear(aq_hw, &data);
+}
+
+static bool aq_fvlan_is_busy(struct aq_rx_filter_vlan *aq_vlans, int vlan)
+{
+	int i;
+
+	for (i = 0; i < AQ_VLAN_MAX_FILTERS; ++i) {
+		if (aq_vlans[i].enable &&
+		    aq_vlans[i].queue != AQ_RX_QUEUE_NOT_ASSIGNED &&
+		    aq_vlans[i].vlan_id == vlan) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/* Function rebuilds array of vlan filters so that filters with assigned
+ * queue have a precedence over just vlans on the interface.
+ */
+static void aq_fvlan_rebuild(struct aq_nic_s *aq_nic,
+			     unsigned long *active_vlans,
+			     struct aq_rx_filter_vlan *aq_vlans)
+{
+	bool vlan_busy = false;
+	int vlan = -1;
+	int i;
+
+	for (i = 0; i < AQ_VLAN_MAX_FILTERS; ++i) {
+		if (aq_vlans[i].enable &&
+		    aq_vlans[i].queue != AQ_RX_QUEUE_NOT_ASSIGNED)
+			continue;
+		do {
+			vlan = find_next_bit(active_vlans,
+					     VLAN_N_VID,
+					     vlan + 1);
+			if (vlan == VLAN_N_VID) {
+				aq_vlans[i].enable = 0U;
+				aq_vlans[i].queue = AQ_RX_QUEUE_NOT_ASSIGNED;
+				aq_vlans[i].vlan_id = 0;
+				continue;
+			}
+
+			vlan_busy = aq_fvlan_is_busy(aq_vlans, vlan);
+			if (!vlan_busy) {
+				aq_vlans[i].enable = 1U;
+				aq_vlans[i].queue = AQ_RX_QUEUE_NOT_ASSIGNED;
+				aq_vlans[i].vlan_id = vlan;
+			}
+		} while (vlan_busy && vlan != VLAN_N_VID);
+	}
+}
+
+static int aq_set_data_fvlan(struct aq_nic_s *aq_nic,
+			     struct aq_rx_filter *aq_rx_fltr,
+			     struct aq_rx_filter_vlan *aq_vlans, bool add)
+{
+	const struct ethtool_rx_flow_spec *fsp = &aq_rx_fltr->aq_fsp;
+	int location = fsp->location - AQ_RX_FIRST_LOC_FVLANID;
+	int i;
+
+	memset(&aq_vlans[location], 0, sizeof(aq_vlans[location]));
+
+	if (!add)
+		return 0;
+
+	/* remove vlan if it was in table without queue assignment */
+	for (i = 0; i < AQ_VLAN_MAX_FILTERS; ++i) {
+		if (aq_vlans[i].vlan_id ==
+		   (be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK)) {
+			aq_vlans[i].enable = false;
+		}
+	}
+
+	aq_vlans[location].location = location;
+	aq_vlans[location].vlan_id = be16_to_cpu(fsp->h_ext.vlan_tci)
+				     & VLAN_VID_MASK;
+	aq_vlans[location].queue = fsp->ring_cookie & 0x1FU;
+	aq_vlans[location].enable = 1U;
+
+	return 0;
+}
+
+int aq_del_fvlan_by_vlan(struct aq_nic_s *aq_nic, u16 vlan_id)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct aq_rx_filter *rule = NULL;
+	struct hlist_node *aq_node2;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		if (be16_to_cpu(rule->aq_fsp.h_ext.vlan_tci) == vlan_id)
+			break;
+	}
+	if (rule && rule->type == aq_rx_filter_vlan &&
+	    be16_to_cpu(rule->aq_fsp.h_ext.vlan_tci) == vlan_id) {
+		struct ethtool_rxnfc cmd;
+
+		cmd.fs.location = rule->aq_fsp.location;
+		return aq_del_rxnfc_rule(aq_nic, &cmd);
+	}
+
+	return -ENOENT;
+}
+
+static int aq_add_del_fvlan(struct aq_nic_s *aq_nic,
+			    struct aq_rx_filter *aq_rx_fltr, bool add)
+{
+	const struct aq_hw_ops *aq_hw_ops = aq_nic->aq_hw_ops;
+
+	if (unlikely(!aq_hw_ops->hw_filter_vlan_set))
+		return -EOPNOTSUPP;
+
+	aq_set_data_fvlan(aq_nic,
+			  aq_rx_fltr,
+			  aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans,
+			  add);
+
+	return aq_filters_vlans_update(aq_nic);
+}
+
+static int aq_set_data_fl3l4(struct aq_nic_s *aq_nic,
+			     struct aq_rx_filter *aq_rx_fltr,
+			     struct aq_rx_filter_l3l4 *data, bool add)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	const struct ethtool_rx_flow_spec *fsp = &aq_rx_fltr->aq_fsp;
+
+	memset(data, 0, sizeof(*data));
+
+	data->is_ipv6 = rx_fltrs->fl3l4.is_ipv6;
+	data->location = HW_ATL_GET_REG_LOCATION_FL3L4(fsp->location);
+
+	if (!add) {
+		if (!data->is_ipv6)
+			rx_fltrs->fl3l4.active_ipv4 &= ~BIT(data->location);
+		else
+			rx_fltrs->fl3l4.active_ipv6 &=
+				~BIT((data->location) / 4);
+
+		return 0;
+	}
+
+	data->cmd |= HW_ATL_RX_ENABLE_FLTR_L3L4;
+
+	switch (fsp->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_PROT_L4;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		data->cmd |= HW_ATL_RX_UDP;
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_PROT_L4;
+		break;
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		data->cmd |= HW_ATL_RX_SCTP;
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_PROT_L4;
+		break;
+	default:
+		break;
+	}
+
+	if (!data->is_ipv6) {
+		data->ip_src[0] =
+			ntohl(fsp->h_u.tcp_ip4_spec.ip4src);
+		data->ip_dst[0] =
+			ntohl(fsp->h_u.tcp_ip4_spec.ip4dst);
+		rx_fltrs->fl3l4.active_ipv4 |= BIT(data->location);
+	} else {
+		int i;
+
+		rx_fltrs->fl3l4.active_ipv6 |= BIT((data->location) / 4);
+		for (i = 0; i < HW_ATL_RX_CNT_REG_ADDR_IPV6; ++i) {
+			data->ip_dst[i] =
+				ntohl(fsp->h_u.tcp_ip6_spec.ip6dst[i]);
+			data->ip_src[i] =
+				ntohl(fsp->h_u.tcp_ip6_spec.ip6src[i]);
+		}
+		data->cmd |= HW_ATL_RX_ENABLE_L3_IPV6;
+	}
+	if (fsp->flow_type != IP_USER_FLOW &&
+	    fsp->flow_type != IPV6_USER_FLOW) {
+		if (!data->is_ipv6) {
+			data->p_dst =
+				ntohs(fsp->h_u.tcp_ip4_spec.pdst);
+			data->p_src =
+				ntohs(fsp->h_u.tcp_ip4_spec.psrc);
+		} else {
+			data->p_dst =
+				ntohs(fsp->h_u.tcp_ip6_spec.pdst);
+			data->p_src =
+				ntohs(fsp->h_u.tcp_ip6_spec.psrc);
+		}
+	}
+	if (data->ip_src[0] && !data->is_ipv6)
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_SRC_ADDR_L3;
+	if (data->ip_dst[0] && !data->is_ipv6)
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_DEST_ADDR_L3;
+	if (data->p_dst)
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4;
+	if (data->p_src)
+		data->cmd |= HW_ATL_RX_ENABLE_CMP_SRC_PORT_L4;
+	if (fsp->ring_cookie != RX_CLS_FLOW_DISC) {
+		data->cmd |= HW_ATL_RX_HOST << HW_ATL_RX_ACTION_FL3F4_SHIFT;
+		data->cmd |= fsp->ring_cookie << HW_ATL_RX_QUEUE_FL3L4_SHIFT;
+		data->cmd |= HW_ATL_RX_ENABLE_QUEUE_L3L4;
+	} else {
+		data->cmd |= HW_ATL_RX_DISCARD << HW_ATL_RX_ACTION_FL3F4_SHIFT;
+	}
+
+	return 0;
+}
+
+static int aq_set_fl3l4(struct aq_hw_s *aq_hw,
+			const struct aq_hw_ops *aq_hw_ops,
+			struct aq_rx_filter_l3l4 *data)
+{
+	if (unlikely(!aq_hw_ops->hw_filter_l3l4_set))
+		return -EOPNOTSUPP;
+
+	return aq_hw_ops->hw_filter_l3l4_set(aq_hw, data);
+}
+
+static int aq_add_del_fl3l4(struct aq_nic_s *aq_nic,
+			    struct aq_rx_filter *aq_rx_fltr, bool add)
+{
+	const struct aq_hw_ops *aq_hw_ops = aq_nic->aq_hw_ops;
+	struct aq_hw_s *aq_hw = aq_nic->aq_hw;
+	struct aq_rx_filter_l3l4 data;
+
+	if (unlikely(aq_rx_fltr->aq_fsp.location < AQ_RX_FIRST_LOC_FL3L4 ||
+		     aq_rx_fltr->aq_fsp.location > AQ_RX_LAST_LOC_FL3L4  ||
+		     aq_set_data_fl3l4(aq_nic, aq_rx_fltr, &data, add)))
+		return -EINVAL;
+
+	return aq_set_fl3l4(aq_hw, aq_hw_ops, &data);
+}
+
+static int aq_add_del_rule(struct aq_nic_s *aq_nic,
+			   struct aq_rx_filter *aq_rx_fltr, bool add)
+{
+	int err = -EINVAL;
+
+	if (aq_rx_fltr->aq_fsp.flow_type & FLOW_EXT) {
+		if (be16_to_cpu(aq_rx_fltr->aq_fsp.m_ext.vlan_tci)
+		    == VLAN_VID_MASK) {
+			aq_rx_fltr->type = aq_rx_filter_vlan;
+			err = aq_add_del_fvlan(aq_nic, aq_rx_fltr, add);
+		} else if (be16_to_cpu(aq_rx_fltr->aq_fsp.m_ext.vlan_tci)
+			== VLAN_PRIO_MASK) {
+			aq_rx_fltr->type = aq_rx_filter_ethertype;
+			err = aq_add_del_fether(aq_nic, aq_rx_fltr, add);
+		}
+	} else {
+		switch (aq_rx_fltr->aq_fsp.flow_type & ~FLOW_EXT) {
+		case ETHER_FLOW:
+			aq_rx_fltr->type = aq_rx_filter_ethertype;
+			err = aq_add_del_fether(aq_nic, aq_rx_fltr, add);
+			break;
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+		case IP_USER_FLOW:
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+		case IPV6_USER_FLOW:
+			aq_rx_fltr->type = aq_rx_filter_l3l4;
+			err = aq_add_del_fl3l4(aq_nic, aq_rx_fltr, add);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int aq_update_table_filters(struct aq_nic_s *aq_nic,
+				   struct aq_rx_filter *aq_rx_fltr, u16 index,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct aq_rx_filter *rule = NULL, *parent = NULL;
+	struct hlist_node *aq_node2;
+	int err = -EINVAL;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		if (rule->aq_fsp.location >= index)
+			break;
+		parent = rule;
+	}
+
+	if (rule && rule->aq_fsp.location == index) {
+		err = aq_add_del_rule(aq_nic, rule, false);
+		hlist_del(&rule->aq_node);
+		kfree(rule);
+		--rx_fltrs->active_filters;
+	}
+
+	if (unlikely(!aq_rx_fltr))
+		return err;
+
+	INIT_HLIST_NODE(&aq_rx_fltr->aq_node);
+
+	if (parent)
+		hlist_add_behind(&aq_rx_fltr->aq_node, &parent->aq_node);
+	else
+		hlist_add_head(&aq_rx_fltr->aq_node, &rx_fltrs->filter_list);
+
+	++rx_fltrs->active_filters;
+
+	return 0;
+}
+
+u16 aq_get_rxnfc_count_all_rules(struct aq_nic_s *aq_nic)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+
+	return rx_fltrs->active_filters;
+}
+
+struct aq_hw_rx_fltrs_s *aq_get_hw_rx_fltrs(struct aq_nic_s *aq_nic)
+{
+	return &aq_nic->aq_hw_rx_fltrs;
+}
+
+int aq_add_rxnfc_rule(struct aq_nic_s *aq_nic, const struct ethtool_rxnfc *cmd)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct aq_rx_filter *aq_rx_fltr;
+	int err = 0;
+
+	err = aq_check_rule(aq_nic, fsp);
+	if (err)
+		goto err_exit;
+
+	aq_rx_fltr = kzalloc(sizeof(*aq_rx_fltr), GFP_KERNEL);
+	if (unlikely(!aq_rx_fltr)) {
+		err = -ENOMEM;
+		goto err_exit;
+	}
+
+	memcpy(&aq_rx_fltr->aq_fsp, fsp, sizeof(*fsp));
+
+	err = aq_update_table_filters(aq_nic, aq_rx_fltr, fsp->location, NULL);
+	if (unlikely(err))
+		goto err_free;
+
+	err = aq_add_del_rule(aq_nic, aq_rx_fltr, true);
+	if (unlikely(err)) {
+		hlist_del(&aq_rx_fltr->aq_node);
+		--rx_fltrs->active_filters;
+		goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	kfree(aq_rx_fltr);
+err_exit:
+	return err;
+}
+
+int aq_del_rxnfc_rule(struct aq_nic_s *aq_nic, const struct ethtool_rxnfc *cmd)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct aq_rx_filter *rule = NULL;
+	struct hlist_node *aq_node2;
+	int err = -EINVAL;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		if (rule->aq_fsp.location == cmd->fs.location)
+			break;
+	}
+
+	if (rule && rule->aq_fsp.location == cmd->fs.location) {
+		err = aq_add_del_rule(aq_nic, rule, false);
+		hlist_del(&rule->aq_node);
+		kfree(rule);
+		--rx_fltrs->active_filters;
+	}
+	return err;
+}
+
+int aq_get_rxnfc_rule(struct aq_nic_s *aq_nic, struct ethtool_rxnfc *cmd)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct ethtool_rx_flow_spec *fsp =
+			(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct aq_rx_filter *rule = NULL;
+	struct hlist_node *aq_node2;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node)
+		if (fsp->location <= rule->aq_fsp.location)
+			break;
+
+	if (unlikely(!rule || fsp->location != rule->aq_fsp.location))
+		return -EINVAL;
+
+	memcpy(fsp, &rule->aq_fsp, sizeof(*fsp));
+
+	return 0;
+}
+
+int aq_get_rxnfc_all_rules(struct aq_nic_s *aq_nic, struct ethtool_rxnfc *cmd,
+			   u32 *rule_locs)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct hlist_node *aq_node2;
+	struct aq_rx_filter *rule;
+	int count = 0;
+
+	cmd->data = aq_get_rxnfc_count_all_rules(aq_nic);
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		if (unlikely(count == cmd->rule_cnt))
+			return -EMSGSIZE;
+
+		rule_locs[count++] = rule->aq_fsp.location;
+	}
+
+	cmd->rule_cnt = count;
+
+	return 0;
+}
+
+int aq_clear_rxnfc_all_rules(struct aq_nic_s *aq_nic)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct hlist_node *aq_node2;
+	struct aq_rx_filter *rule;
+	int err = 0;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		err = aq_add_del_rule(aq_nic, rule, false);
+		if (err)
+			goto err_exit;
+		hlist_del(&rule->aq_node);
+		kfree(rule);
+		--rx_fltrs->active_filters;
+	}
+
+err_exit:
+	return err;
+}
+
+int aq_reapply_rxnfc_all_rules(struct aq_nic_s *aq_nic)
+{
+	struct aq_hw_rx_fltrs_s *rx_fltrs = aq_get_hw_rx_fltrs(aq_nic);
+	struct hlist_node *aq_node2;
+	struct aq_rx_filter *rule;
+	int err = 0;
+
+	hlist_for_each_entry_safe(rule, aq_node2,
+				  &rx_fltrs->filter_list, aq_node) {
+		err = aq_add_del_rule(aq_nic, rule, true);
+		if (err)
+			goto err_exit;
+	}
+
+err_exit:
+	return err;
+}
+
+int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
+{
+	const struct aq_hw_ops *aq_hw_ops = aq_nic->aq_hw_ops;
+	struct aq_hw_s *aq_hw = aq_nic->aq_hw;
+	int hweight = 0;
+	int err = 0;
+	int i;
+
+	if (unlikely(!aq_hw_ops->hw_filter_vlan_set))
+		return -EOPNOTSUPP;
+	if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
+		return -EOPNOTSUPP;
+
+	aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans,
+			 aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
+
+	if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		for (i = 0; i < BITS_TO_LONGS(VLAN_N_VID); i++)
+			hweight += hweight_long(aq_nic->active_vlans[i]);
+
+		err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
+		if (err)
+			return err;
+	}
+
+	err = aq_hw_ops->hw_filter_vlan_set(aq_hw,
+					    aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans
+					   );
+	if (err)
+		return err;
+
+	if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		if (hweight <= AQ_VLAN_MAX_FILTERS && hweight > 0) {
+			err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw,
+				!(aq_nic->packet_filter & IFF_PROMISC));
+			aq_nic->aq_nic_cfg.is_vlan_force_promisc = false;
+		} else {
+		/* otherwise left in promiscue mode */
+			aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
+		}
+	}
+
+	return err;
+}
+
+int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
+{
+	const struct aq_hw_ops *aq_hw_ops = aq_nic->aq_hw_ops;
+	struct aq_hw_s *aq_hw = aq_nic->aq_hw;
+	int err = 0;
+
+	memset(aq_nic->active_vlans, 0, sizeof(aq_nic->active_vlans));
+	aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans,
+			 aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans);
+
+	if (unlikely(!aq_hw_ops->hw_filter_vlan_set))
+		return -EOPNOTSUPP;
+	if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
+		return -EOPNOTSUPP;
+
+	aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
+	err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
+	if (err)
+		return err;
+	err = aq_hw_ops->hw_filter_vlan_set(aq_hw,
+					    aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans
+					   );
+	return err;
+}

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
new file mode 100644
index 0000000..122e06c
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h

@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File aq_filters.h: RX filters related functions. */
+
+#ifndef AQ_FILTERS_H
+#define AQ_FILTERS_H
+
+#include "aq_nic.h"
+
+enum aq_rx_filter_type {
+	aq_rx_filter_ethertype,
+	aq_rx_filter_vlan,
+	aq_rx_filter_l3l4
+};
+
+struct aq_rx_filter {
+	struct hlist_node aq_node;
+	enum aq_rx_filter_type type;
+	struct ethtool_rx_flow_spec aq_fsp;
+};
+
+u16 aq_get_rxnfc_count_all_rules(struct aq_nic_s *aq_nic);
+struct aq_hw_rx_fltrs_s *aq_get_hw_rx_fltrs(struct aq_nic_s *aq_nic);
+int aq_add_rxnfc_rule(struct aq_nic_s *aq_nic, const struct ethtool_rxnfc *cmd);
+int aq_del_rxnfc_rule(struct aq_nic_s *aq_nic, const struct ethtool_rxnfc *cmd);
+int aq_get_rxnfc_rule(struct aq_nic_s *aq_nic, struct ethtool_rxnfc *cmd);
+int aq_get_rxnfc_all_rules(struct aq_nic_s *aq_nic, struct ethtool_rxnfc *cmd,
+			   u32 *rule_locs);
+int aq_del_fvlan_by_vlan(struct aq_nic_s *aq_nic, u16 vlan_id);
+int aq_clear_rxnfc_all_rules(struct aq_nic_s *aq_nic);
+int aq_reapply_rxnfc_all_rules(struct aq_nic_s *aq_nic);
+int aq_filters_vlans_update(struct aq_nic_s *aq_nic);
+int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic);
+
+#endif /* AQ_FILTERS_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 5c00671..53d7478 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_hw.h: Declaration of abstract interface for NIC hardware specific
@@ -18,6 +15,17 @@
 #include "aq_rss.h"
 #include "hw_atl/hw_atl_utils.h"
 
+#define AQ_RX_FIRST_LOC_FVLANID     0U
+#define AQ_RX_LAST_LOC_FVLANID	   15U
+#define AQ_RX_FIRST_LOC_FETHERT    16U
+#define AQ_RX_LAST_LOC_FETHERT	   31U
+#define AQ_RX_FIRST_LOC_FL3L4	   32U
+#define AQ_RX_LAST_LOC_FL3L4	   39U
+#define AQ_RX_MAX_RXNFC_LOC	   AQ_RX_LAST_LOC_FL3L4
+#define AQ_VLAN_MAX_FILTERS   \
+			(AQ_RX_LAST_LOC_FVLANID - AQ_RX_FIRST_LOC_FVLANID + 1U)
+#define AQ_RX_QUEUE_NOT_ASSIGNED   0xFFU
+
 /* NIC H/W capabilities */
 struct aq_hw_caps_s {
 	u64 hw_features;
@@ -77,6 +85,8 @@
 #define AQ_HW_IRQ_MSI     2U
 #define AQ_HW_IRQ_MSIX    3U
 
+#define AQ_HW_SERVICE_IRQS   1U
+
 #define AQ_HW_POWER_STATE_D0   0U
 #define AQ_HW_POWER_STATE_D3   3U
 
@@ -112,7 +122,7 @@
 	const struct aq_fw_ops *aq_fw_ops;
 	void __iomem *mmio;
 	struct aq_hw_link_status_s aq_link_status;
-	struct hw_aq_atl_utils_mbox mbox;
+	struct hw_atl_utils_mbox mbox;
 	struct hw_atl_stats_s last_stats;
 	struct aq_stats_s curr_stats;
 	u64 speed;
@@ -124,12 +134,13 @@
 	u32 mbox_addr;
 	u32 rpc_addr;
 	u32 rpc_tid;
-	struct hw_aq_atl_utils_fw_rpc rpc;
+	struct hw_atl_utils_fw_rpc rpc;
 };
 
 struct aq_ring_s;
 struct aq_ring_param_s;
 struct sk_buff;
+struct aq_rx_filter_l3l4;
 
 struct aq_hw_ops {
 
@@ -183,6 +194,23 @@
 	int (*hw_packet_filter_set)(struct aq_hw_s *self,
 				    unsigned int packet_filter);
 
+	int (*hw_filter_l3l4_set)(struct aq_hw_s *self,
+				  struct aq_rx_filter_l3l4 *data);
+
+	int (*hw_filter_l3l4_clear)(struct aq_hw_s *self,
+				    struct aq_rx_filter_l3l4 *data);
+
+	int (*hw_filter_l2_set)(struct aq_hw_s *self,
+				struct aq_rx_filter_l2 *data);
+
+	int (*hw_filter_l2_clear)(struct aq_hw_s *self,
+				  struct aq_rx_filter_l2 *data);
+
+	int (*hw_filter_vlan_set)(struct aq_hw_s *self,
+				  struct aq_rx_filter_vlan *aq_vlans);
+
+	int (*hw_filter_vlan_ctrl)(struct aq_hw_s *self, bool enable);
+
 	int (*hw_multicast_list_set)(struct aq_hw_s *self,
 				     u8 ar_mac[AQ_HW_MULTICAST_ADDRESS_MAX]
 				     [ETH_ALEN],
@@ -204,7 +232,10 @@
 
 	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
-	int (*hw_set_power)(struct aq_hw_s *self, unsigned int power_state);
+	int (*hw_set_offload)(struct aq_hw_s *self,
+			      struct aq_nic_cfg_s *aq_nic_cfg);
+
+	int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc);
 };
 
 struct aq_fw_ops {
@@ -227,7 +258,19 @@
 
 	int (*update_stats)(struct aq_hw_s *self);
 
+	int (*get_phy_temp)(struct aq_hw_s *self, int *temp);
+
+	u32 (*get_flow_control)(struct aq_hw_s *self, u32 *fcmode);
+
 	int (*set_flow_control)(struct aq_hw_s *self);
+
+	int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
+			 u8 *mac);
+
+	int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
+
+	int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
+			    u32 *supported_rates);
 };
 
 #endif /* AQ_HW_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
index d526c4f..9c7a226 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_hw_utils.c: Definitions of helper functions used across
@@ -53,6 +50,18 @@
 	writel(value, hw->mmio + reg);
 }
 
+/* Most of 64-bit registers are in LSW, MSW form.
+   Counters are normally implemented by HW as latched pairs:
+   reading LSW first locks MSW, to overcome LSW overflow
+ */
+u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg)
+{
+	u64 value = aq_hw_read_reg(hw, reg);
+
+	value |= (u64)aq_hw_read_reg(hw, reg + 4) << 32;
+	return value;
+}
+
 int aq_hw_err_from_flags(struct aq_hw_s *hw)
 {
 	int err = 0;

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
index dc88a12..9ef82d4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_hw_utils.h: Declaration of helper functions used across hardware
@@ -14,6 +11,8 @@
 #ifndef AQ_HW_UTILS_H
 #define AQ_HW_UTILS_H
 
+#include <linux/iopoll.h>
+
 #include "aq_common.h"
 
 #ifndef HIDWORD
@@ -23,18 +22,6 @@
 
 #define AQ_HW_SLEEP(_US_) mdelay(_US_)
 
-#define AQ_HW_WAIT_FOR(_B_, _US_, _N_) \
-do { \
-	unsigned int AQ_HW_WAIT_FOR_i; \
-	for (AQ_HW_WAIT_FOR_i = _N_; (!(_B_)) && (AQ_HW_WAIT_FOR_i);\
-	--AQ_HW_WAIT_FOR_i) {\
-		udelay(_US_); \
-	} \
-	if (!AQ_HW_WAIT_FOR_i) {\
-		err = -ETIME; \
-	} \
-} while (0)
-
 #define aq_pr_err(...) pr_err(AQ_CFG_DRV_NAME ": " __VA_ARGS__)
 #define aq_pr_trace(...) pr_info(AQ_CFG_DRV_NAME ": " __VA_ARGS__)
 
@@ -45,6 +32,7 @@
 u32 aq_hw_read_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, u32 shift);
 u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg);
 void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value);
+u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg);
 int aq_hw_err_from_flags(struct aq_hw_s *hw);
 
 #endif /* AQ_HW_UTILS_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index e3ae29e..bb65dd3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_main.c: Main file for aQuantia Linux driver. */
@@ -13,6 +10,7 @@
 #include "aq_nic.h"
 #include "aq_pci_func.h"
 #include "aq_ethtool.h"
+#include "aq_filters.h"
 
 #include <linux/netdevice.h>
 #include <linux/module.h>
@@ -22,8 +20,17 @@
 MODULE_AUTHOR(AQ_CFG_DRV_AUTHOR);
 MODULE_DESCRIPTION(AQ_CFG_DRV_DESC);
 
+static const char aq_ndev_driver_name[] = AQ_CFG_DRV_NAME;
+
 static const struct net_device_ops aq_ndev_ops;
 
+static struct workqueue_struct *aq_ndev_wq;
+
+void aq_ndev_schedule_work(struct work_struct *work)
+{
+	queue_work(aq_ndev_wq, work);
+}
+
 struct net_device *aq_ndev_alloc(void)
 {
 	struct net_device *ndev = NULL;
@@ -49,6 +56,15 @@
 	err = aq_nic_init(aq_nic);
 	if (err < 0)
 		goto err_exit;
+
+	err = aq_reapply_rxnfc_all_rules(aq_nic);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_filters_vlans_update(aq_nic);
+	if (err < 0)
+		goto err_exit;
+
 	err = aq_nic_start(aq_nic);
 	if (err < 0)
 		goto err_exit;
@@ -96,24 +112,66 @@
 static int aq_ndev_set_features(struct net_device *ndev,
 				netdev_features_t features)
 {
+	bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+	bool is_vlan_tx_insert = !!(features & NETIF_F_HW_VLAN_CTAG_TX);
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic);
+	bool need_ndev_restart = false;
+	struct aq_nic_cfg_s *aq_cfg;
 	bool is_lro = false;
+	int err = 0;
 
-	if (aq_cfg->hw_features & NETIF_F_LRO) {
+	aq_cfg = aq_nic_get_cfg(aq_nic);
+
+	if (!(features & NETIF_F_NTUPLE)) {
+		if (aq_nic->ndev->features & NETIF_F_NTUPLE) {
+			err = aq_clear_rxnfc_all_rules(aq_nic);
+			if (unlikely(err))
+				goto err_exit;
+		}
+	}
+	if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+		if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+			err = aq_filters_vlan_offload_off(aq_nic);
+			if (unlikely(err))
+				goto err_exit;
+		}
+	}
+
+	aq_cfg->features = features;
+
+	if (aq_cfg->aq_hw_caps->hw_features & NETIF_F_LRO) {
 		is_lro = features & NETIF_F_LRO;
 
 		if (aq_cfg->is_lro != is_lro) {
 			aq_cfg->is_lro = is_lro;
-
-			if (netif_running(ndev)) {
-				aq_ndev_close(ndev);
-				aq_ndev_open(ndev);
-			}
+			need_ndev_restart = true;
 		}
 	}
 
-	return 0;
+	if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) {
+		err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw,
+							aq_cfg);
+
+		if (unlikely(err))
+			goto err_exit;
+	}
+
+	if (aq_cfg->is_vlan_rx_strip != is_vlan_rx_strip) {
+		aq_cfg->is_vlan_rx_strip = is_vlan_rx_strip;
+		need_ndev_restart = true;
+	}
+	if (aq_cfg->is_vlan_tx_insert != is_vlan_tx_insert) {
+		aq_cfg->is_vlan_tx_insert = is_vlan_tx_insert;
+		need_ndev_restart = true;
+	}
+
+	if (need_ndev_restart && netif_running(ndev)) {
+		aq_ndev_close(ndev);
+		aq_ndev_open(ndev);
+	}
+
+err_exit:
+	return err;
 }
 
 static int aq_ndev_set_mac_address(struct net_device *ndev, void *addr)
@@ -136,9 +194,36 @@
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
-	aq_nic_set_packet_filter(aq_nic, ndev->flags);
+	(void)aq_nic_set_multicast_list(aq_nic, ndev);
+}
 
-	aq_nic_set_multicast_list(aq_nic, ndev);
+static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
+				  u16 vid)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	if (!aq_nic->aq_hw_ops->hw_filter_vlan_set)
+		return -EOPNOTSUPP;
+
+	set_bit(vid, aq_nic->active_vlans);
+
+	return aq_filters_vlans_update(aq_nic);
+}
+
+static int aq_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto,
+				   u16 vid)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	if (!aq_nic->aq_hw_ops->hw_filter_vlan_set)
+		return -EOPNOTSUPP;
+
+	clear_bit(vid, aq_nic->active_vlans);
+
+	if (-ENOENT == aq_del_fvlan_by_vlan(aq_nic, vid))
+		return aq_filters_vlans_update(aq_nic);
+
+	return 0;
 }
 
 static const struct net_device_ops aq_ndev_ops = {
@@ -148,5 +233,39 @@
 	.ndo_set_rx_mode = aq_ndev_set_multicast_settings,
 	.ndo_change_mtu = aq_ndev_change_mtu,
 	.ndo_set_mac_address = aq_ndev_set_mac_address,
-	.ndo_set_features = aq_ndev_set_features
+	.ndo_set_features = aq_ndev_set_features,
+	.ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
 };
+
+static int __init aq_ndev_init_module(void)
+{
+	int ret;
+
+	aq_ndev_wq = create_singlethread_workqueue(aq_ndev_driver_name);
+	if (!aq_ndev_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
+	ret = aq_pci_func_register_driver();
+	if (ret) {
+		destroy_workqueue(aq_ndev_wq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit aq_ndev_exit_module(void)
+{
+	aq_pci_func_unregister_driver();
+
+	if (aq_ndev_wq) {
+		destroy_workqueue(aq_ndev_wq);
+		aq_ndev_wq = NULL;
+	}
+}
+
+module_init(aq_ndev_init_module);
+module_exit(aq_ndev_exit_module);

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h
index ce92152..a5a624b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_main.h: Main file for aQuantia Linux driver. */
@@ -13,7 +10,9 @@
 #define AQ_MAIN_H
 
 #include "aq_common.h"
+#include "aq_nic.h"
 
+void aq_ndev_schedule_work(struct work_struct *work);
 struct net_device *aq_ndev_alloc(void);
 
 #endif /* AQ_MAIN_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 4f34808..137c1de 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_nic.c: Definition of common code for NIC. */
@@ -14,6 +11,7 @@
 #include "aq_vec.h"
 #include "aq_hw.h"
 #include "aq_pci_func.h"
+#include "aq_main.h"
 
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
@@ -44,7 +42,7 @@
 	struct aq_rss_parameters *rss_params = &cfg->aq_rss;
 	int i = 0;
 
-	static u8 rss_key[40] = {
+	static u8 rss_key[AQ_CFG_RSS_HASHKEY_SIZE] = {
 		0x1e, 0xad, 0x71, 0x87, 0x65, 0xfc, 0x26, 0x7d,
 		0x0d, 0x45, 0x67, 0x74, 0xcd, 0x06, 0x1a, 0x18,
 		0xb6, 0xc1, 0xf0, 0xc7, 0xbb, 0x18, 0xbe, 0xf8,
@@ -73,6 +71,7 @@
 	cfg->tx_itr = aq_itr_tx;
 	cfg->rx_itr = aq_itr_rx;
 
+	cfg->rxpageorder = AQ_CFG_RX_PAGEORDER;
 	cfg->is_rss = AQ_CFG_IS_RSS_DEF;
 	cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF;
 	cfg->aq_rss.base_cpu_number = AQ_CFG_RSS_BASE_CPU_NUM_DEF;
@@ -84,10 +83,6 @@
 
 	cfg->is_lro = AQ_CFG_IS_LRO_DEF;
 
-	cfg->vlan_id = 0U;
-
-	aq_nic_rss_init(self, cfg->num_rss_queues);
-
 	/*descriptors */
 	cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF);
 	cfg->txds = min(cfg->aq_hw_caps->txds_max, AQ_CFG_TXDS_DEF);
@@ -95,7 +90,8 @@
 	/*rss rings */
 	cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
 	cfg->vecs = min(cfg->vecs, num_online_cpus());
-	cfg->vecs = min(cfg->vecs, self->irqvecs);
+	if (self->irqvecs > AQ_HW_SERVICE_IRQS)
+		cfg->vecs = min(cfg->vecs, self->irqvecs - AQ_HW_SERVICE_IRQS);
 	/* cfg->vecs should be power of 2 for RSS */
 	if (cfg->vecs >= 8U)
 		cfg->vecs = 8U;
@@ -108,6 +104,8 @@
 
 	cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF);
 
+	aq_nic_rss_init(self, cfg->num_rss_queues);
+
 	cfg->irq_type = aq_pci_func_get_irq_type(self);
 
 	if ((cfg->irq_type == AQ_HW_IRQ_LEGACY) ||
@@ -117,13 +115,26 @@
 		cfg->vecs = 1U;
 	}
 
+	/* Check if we have enough vectors allocated for
+	 * link status IRQ. If no - we'll know link state from
+	 * slower service task.
+	 */
+	if (AQ_HW_SERVICE_IRQS > 0 && cfg->vecs + 1 <= self->irqvecs)
+		cfg->link_irq_vec = cfg->vecs;
+	else
+		cfg->link_irq_vec = 0;
+
 	cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
-	cfg->hw_features = cfg->aq_hw_caps->hw_features;
+	cfg->features = cfg->aq_hw_caps->hw_features;
+	cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX);
+	cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX);
+	cfg->is_vlan_force_promisc = true;
 }
 
 static int aq_nic_update_link_status(struct aq_nic_s *self)
 {
 	int err = self->aq_fw_ops->update_link_status(self->aq_hw);
+	u32 fc = 0;
 
 	if (err)
 		return err;
@@ -133,6 +144,15 @@
 			AQ_CFG_DRV_NAME, self->link_status.mbps,
 			self->aq_hw->aq_link_status.mbps);
 		aq_nic_update_interrupt_moderation_settings(self);
+
+		/* Driver has to update flow control settings on RX block
+		 * on any link event.
+		 * We should query FW whether it negotiated FC.
+		 */
+		if (self->aq_fw_ops->get_flow_control)
+			self->aq_fw_ops->get_flow_control(self->aq_hw, &fc);
+		if (self->aq_hw_ops->hw_set_fc)
+			self->aq_hw_ops->hw_set_fc(self->aq_hw, fc, 0);
 	}
 
 	self->link_status = self->aq_hw->aq_link_status;
@@ -152,30 +172,48 @@
 	return 0;
 }
 
-static void aq_nic_service_timer_cb(struct timer_list *t)
+static irqreturn_t aq_linkstate_threaded_isr(int irq, void *private)
 {
-	struct aq_nic_s *self = from_timer(self, t, service_timer);
-	int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL;
-	int err = 0;
+	struct aq_nic_s *self = private;
+
+	if (!self)
+		return IRQ_NONE;
+
+	aq_nic_update_link_status(self);
+
+	self->aq_hw_ops->hw_irq_enable(self->aq_hw,
+				       BIT(self->aq_nic_cfg.link_irq_vec));
+	return IRQ_HANDLED;
+}
+
+static void aq_nic_service_task(struct work_struct *work)
+{
+	struct aq_nic_s *self = container_of(work, struct aq_nic_s,
+					     service_task);
+	int err;
 
 	if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
-		goto err_exit;
+		return;
 
 	err = aq_nic_update_link_status(self);
 	if (err)
-		goto err_exit;
+		return;
 
+	mutex_lock(&self->fwreq_mutex);
 	if (self->aq_fw_ops->update_stats)
 		self->aq_fw_ops->update_stats(self->aq_hw);
+	mutex_unlock(&self->fwreq_mutex);
 
 	aq_nic_update_ndev_stats(self);
+}
 
-	/* If no link - use faster timer rate to detect link up asap */
-	if (!netif_carrier_ok(self->ndev))
-		ctimer = max(ctimer / 2, 1);
+static void aq_nic_service_timer_cb(struct timer_list *t)
+{
+	struct aq_nic_s *self = from_timer(self, t, service_timer);
 
-err_exit:
-	mod_timer(&self->service_timer, jiffies + ctimer);
+	mod_timer(&self->service_timer, jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
+
+	aq_ndev_schedule_work(&self->service_task);
 }
 
 static void aq_nic_polling_timer_cb(struct timer_list *t)
@@ -189,7 +227,7 @@
 		aq_vec_isr(i, (void *)aq_vec);
 
 	mod_timer(&self->polling_timer, jiffies +
-		AQ_CFG_POLLING_TIMER_INTERVAL);
+		  AQ_CFG_POLLING_TIMER_INTERVAL);
 }
 
 int aq_nic_ndev_register(struct aq_nic_s *self)
@@ -205,8 +243,10 @@
 	if (err)
 		goto err_exit;
 
+	mutex_lock(&self->fwreq_mutex);
 	err = self->aq_fw_ops->get_mac_permanent(self->aq_hw,
 			    self->ndev->dev_addr);
+	mutex_unlock(&self->fwreq_mutex);
 	if (err)
 		goto err_exit;
 
@@ -248,7 +288,8 @@
 	self->ndev->hw_features |= aq_hw_caps->hw_features;
 	self->ndev->features = aq_hw_caps->hw_features;
 	self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
-				     NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO;
+				     NETIF_F_RXHASH | NETIF_F_SG |
+				     NETIF_F_LRO | NETIF_F_TSO;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
 	self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
@@ -275,7 +316,9 @@
 	unsigned int i = 0U;
 
 	self->power_state = AQ_HW_POWER_STATE_D0;
+	mutex_lock(&self->fwreq_mutex);
 	err = self->aq_hw_ops->hw_reset(self->aq_hw);
+	mutex_unlock(&self->fwreq_mutex);
 	if (err < 0)
 		goto err_exit;
 
@@ -301,13 +344,13 @@
 	unsigned int i = 0U;
 
 	err = self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
-						    self->mc_list.ar,
-						    self->mc_list.count);
+						     self->mc_list.ar,
+						     self->mc_list.count);
 	if (err < 0)
 		goto err_exit;
 
 	err = self->aq_hw_ops->hw_packet_filter_set(self->aq_hw,
-						   self->packet_filter);
+						    self->packet_filter);
 	if (err < 0)
 		goto err_exit;
 
@@ -325,9 +368,11 @@
 	err = aq_nic_update_interrupt_moderation_settings(self);
 	if (err)
 		goto err_exit;
+
+	INIT_WORK(&self->service_task, aq_nic_service_task);
+
 	timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
-	mod_timer(&self->service_timer, jiffies +
-			AQ_CFG_SERVICE_TIMER_INTERVAL);
+	aq_nic_service_timer_cb(&self->service_timer);
 
 	if (self->aq_nic_cfg.is_polling) {
 		timer_setup(&self->polling_timer, aq_nic_polling_timer_cb, 0);
@@ -336,15 +381,27 @@
 	} else {
 		for (i = 0U, aq_vec = self->aq_vec[0];
 			self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
-			err = aq_pci_func_alloc_irq(self, i,
-						    self->ndev->name, aq_vec,
+			err = aq_pci_func_alloc_irq(self, i, self->ndev->name,
+						    aq_vec_isr, aq_vec,
 						    aq_vec_get_affinity_mask(aq_vec));
 			if (err < 0)
 				goto err_exit;
 		}
 
+		if (self->aq_nic_cfg.link_irq_vec) {
+			int irqvec = pci_irq_vector(self->pdev,
+						   self->aq_nic_cfg.link_irq_vec);
+			err = request_threaded_irq(irqvec, NULL,
+						   aq_linkstate_threaded_isr,
+						   IRQF_SHARED | IRQF_ONESHOT,
+						   self->ndev->name, self);
+			if (err < 0)
+				goto err_exit;
+			self->msix_entry_mask |= (1 << self->aq_nic_cfg.link_irq_vec);
+		}
+
 		err = self->aq_hw_ops->hw_irq_enable(self->aq_hw,
-				    AQ_CFG_IRQ_MASK);
+						     AQ_CFG_IRQ_MASK);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -373,26 +430,37 @@
 	unsigned int dx = ring->sw_tail;
 	struct aq_ring_buff_s *first = NULL;
 	struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx];
+	bool need_context_tag = false;
+
+	dx_buff->flags = 0U;
 
 	if (unlikely(skb_is_gso(skb))) {
-		dx_buff->flags = 0U;
+		dx_buff->mss = skb_shinfo(skb)->gso_size;
+		dx_buff->is_gso = 1U;
 		dx_buff->len_pkt = skb->len;
 		dx_buff->len_l2 = ETH_HLEN;
 		dx_buff->len_l3 = ip_hdrlen(skb);
 		dx_buff->len_l4 = tcp_hdrlen(skb);
-		dx_buff->mss = skb_shinfo(skb)->gso_size;
-		dx_buff->is_txc = 1U;
 		dx_buff->eop_index = 0xffffU;
-
 		dx_buff->is_ipv6 =
 			(ip_hdr(skb)->version == 6) ? 1U : 0U;
+		need_context_tag = true;
+	}
 
+	if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) {
+		dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb);
+		dx_buff->len_pkt = skb->len;
+		dx_buff->is_vlan = 1U;
+		need_context_tag = true;
+	}
+
+	if (need_context_tag) {
 		dx = aq_ring_next_dx(ring, dx);
 		dx_buff = &ring->buff_ring[dx];
+		dx_buff->flags = 0U;
 		++ret;
 	}
 
-	dx_buff->flags = 0U;
 	dx_buff->len = skb_headlen(skb);
 	dx_buff->pa = dma_map_single(aq_nic_get_dev(self),
 				     skb->data,
@@ -481,7 +549,7 @@
 	     --ret, dx = aq_ring_next_dx(ring, dx)) {
 		dx_buff = &ring->buff_ring[dx];
 
-		if (!dx_buff->is_txc && dx_buff->pa) {
+		if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) {
 			if (unlikely(dx_buff->is_sop)) {
 				dma_unmap_single(aq_nic_get_dev(self),
 						 dx_buff->pa,
@@ -563,9 +631,12 @@
 
 int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
 {
-	unsigned int packet_filter = self->packet_filter;
+	const struct aq_hw_ops *hw_ops = self->aq_hw_ops;
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	unsigned int packet_filter = ndev->flags;
 	struct netdev_hw_addr *ha = NULL;
 	unsigned int i = 0U;
+	int err = 0;
 
 	self->mc_list.count = 0;
 	if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
@@ -573,29 +644,28 @@
 	} else {
 		netdev_for_each_uc_addr(ha, ndev) {
 			ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
-			if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
-				break;
 		}
 	}
 
-	if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
-		packet_filter |= IFF_ALLMULTI;
-	} else {
-		netdev_for_each_mc_addr(ha, ndev) {
-			ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
-			if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
-				break;
+	cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST);
+	if (cfg->is_mc_list_enabled) {
+		if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
+			packet_filter |= IFF_ALLMULTI;
+		} else {
+			netdev_for_each_mc_addr(ha, ndev) {
+				ether_addr_copy(self->mc_list.ar[i++],
+						ha->addr);
+			}
 		}
 	}
 
 	if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) {
-		packet_filter |= IFF_MULTICAST;
 		self->mc_list.count = i;
-		self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
-						       self->mc_list.ar,
-						       self->mc_list.count);
+		err = hw_ops->hw_multicast_list_set(self->aq_hw,
+						    self->mc_list.ar,
+						    self->mc_list.count);
+		if (err < 0)
+			return err;
 	}
 	return aq_nic_set_packet_filter(self, packet_filter);
 }
@@ -644,7 +714,14 @@
 	unsigned int i = 0U;
 	unsigned int count = 0U;
 	struct aq_vec_s *aq_vec = NULL;
-	struct aq_stats_s *stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw);
+	struct aq_stats_s *stats;
+
+	if (self->aq_fw_ops->update_stats) {
+		mutex_lock(&self->fwreq_mutex);
+		self->aq_fw_ops->update_stats(self->aq_hw);
+		mutex_unlock(&self->fwreq_mutex);
+	}
+	stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw);
 
 	if (!stats)
 		goto err_exit;
@@ -690,11 +767,12 @@
 	struct net_device *ndev = self->ndev;
 	struct aq_stats_s *stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw);
 
-	ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc;
-	ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc;
+	ndev->stats.rx_packets = stats->dma_pkt_rc;
+	ndev->stats.rx_bytes = stats->dma_oct_rc;
 	ndev->stats.rx_errors = stats->erpr;
-	ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc;
-	ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc;
+	ndev->stats.rx_dropped = stats->dpc;
+	ndev->stats.tx_packets = stats->dma_pkt_tc;
+	ndev->stats.tx_bytes = stats->dma_oct_tc;
 	ndev->stats.tx_errors = stats->erpt;
 	ndev->stats.multicast = stats->mprc;
 }
@@ -772,7 +850,9 @@
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Pause);
 
-	if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX)
+	/* Asym is when either RX or TX, but not both */
+	if (!!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) ^
+	    !!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX))
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Asym_Pause);
 
@@ -829,7 +909,9 @@
 		self->aq_nic_cfg.is_autoneg = false;
 	}
 
+	mutex_lock(&self->fwreq_mutex);
 	err = self->aq_fw_ops->set_link_speed(self->aq_hw, rate);
+	mutex_unlock(&self->fwreq_mutex);
 	if (err < 0)
 		goto err_exit;
 
@@ -862,6 +944,7 @@
 	netif_carrier_off(self->ndev);
 
 	del_timer_sync(&self->service_timer);
+	cancel_work_sync(&self->service_task);
 
 	self->aq_hw_ops->hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK);
 
@@ -889,13 +972,23 @@
 		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
 		aq_vec_deinit(aq_vec);
 
-	if (self->power_state == AQ_HW_POWER_STATE_D0) {
-		(void)self->aq_fw_ops->deinit(self->aq_hw);
-	} else {
-		(void)self->aq_hw_ops->hw_set_power(self->aq_hw,
-						   self->power_state);
+	if (likely(self->aq_fw_ops->deinit)) {
+		mutex_lock(&self->fwreq_mutex);
+		self->aq_fw_ops->deinit(self->aq_hw);
+		mutex_unlock(&self->fwreq_mutex);
 	}
 
+	if (self->power_state != AQ_HW_POWER_STATE_D0 ||
+	    self->aq_hw->aq_nic_cfg->wol)
+		if (likely(self->aq_fw_ops->set_power)) {
+			mutex_lock(&self->fwreq_mutex);
+			self->aq_fw_ops->set_power(self->aq_hw,
+						   self->power_state,
+						   self->ndev->dev_addr);
+			mutex_unlock(&self->fwreq_mutex);
+		}
+
+
 err_exit:;
 }
 
@@ -974,4 +1067,4 @@
 
 err_exit:
 	rtnl_unlock();
-}
\ No newline at end of file
+}

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index fecfc40..255b54a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_nic.h: Declaration of common code for NIC. */
@@ -23,19 +20,24 @@
 
 struct aq_nic_cfg_s {
 	const struct aq_hw_caps_s *aq_hw_caps;
-	u64 hw_features;
+	u64 features;
 	u32 rxds;		/* rx ring size, descriptors # */
 	u32 txds;		/* tx ring size, descriptors # */
-	u32 vecs;		/* vecs==allocated irqs */
+	u32 vecs;		/* allocated rx/tx vectors */
+	u32 link_irq_vec;
 	u32 irq_type;
 	u32 itr;
 	u16 rx_itr;
 	u16 tx_itr;
+	u32 rxpageorder;
 	u32 num_rss_queues;
 	u32 mtu;
 	u32 flow_control;
 	u32 link_speed_msk;
-	u32 vlan_id;
+	u32 wol;
+	u8 is_vlan_rx_strip;
+	u8 is_vlan_tx_insert;
+	bool is_vlan_force_promisc;
 	u16 is_mc_list_enabled;
 	u16 mc_list_count;
 	bool is_autoneg;
@@ -44,6 +46,7 @@
 	bool is_lro;
 	u8  tcs;
 	struct aq_rss_parameters aq_rss;
+	u32 eee_speeds;
 };
 
 #define AQ_NIC_FLAG_STARTED     0x00000004U
@@ -54,9 +57,28 @@
 #define AQ_NIC_FLAG_ERR_UNPLUG  0x40000000U
 #define AQ_NIC_FLAG_ERR_HW      0x80000000U
 
+#define AQ_NIC_WOL_ENABLED	BIT(0)
+
 #define AQ_NIC_TCVEC2RING(_NIC_, _TC_, _VEC_) \
 	((_TC_) * AQ_CFG_TCS_MAX + (_VEC_))
 
+struct aq_hw_rx_fl2 {
+	struct aq_rx_filter_vlan aq_vlans[AQ_VLAN_MAX_FILTERS];
+};
+
+struct aq_hw_rx_fl3l4 {
+	u8   active_ipv4;
+	u8   active_ipv6:2;
+	u8 is_ipv6;
+};
+
+struct aq_hw_rx_fltrs_s {
+	struct hlist_head     filter_list;
+	u16                   active_filters;
+	struct aq_hw_rx_fl2   fl2;
+	struct aq_hw_rx_fl3l4 fl3l4;
+};
+
 struct aq_nic_s {
 	atomic_t flags;
 	struct aq_vec_s *aq_vec[AQ_CFG_VECS_MAX];
@@ -71,16 +93,22 @@
 	const struct aq_fw_ops *aq_fw_ops;
 	struct aq_nic_cfg_s aq_nic_cfg;
 	struct timer_list service_timer;
+	struct work_struct service_task;
 	struct timer_list polling_timer;
 	struct aq_hw_link_status_s link_status;
 	struct {
 		u32 count;
 		u8 ar[AQ_HW_MULTICAST_ADDRESS_MAX][ETH_ALEN];
 	} mc_list;
+	/* Bitmask of currently assigned vlans from linux */
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 
 	struct pci_dev *pdev;
 	unsigned int msix_entry_mask;
 	u32 irqvecs;
+	/* mutex to serialize FW interface access operations */
+	struct mutex fwreq_mutex;
+	struct aq_hw_rx_fltrs_s aq_hw_rx_fltrs;
 };
 
 static inline struct device *aq_nic_get_dev(struct aq_nic_s *self)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 7500075..74b9f3f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_pci_func.c: Definition of PCI functions. */
@@ -19,6 +16,8 @@
 #include "aq_pci_func.h"
 #include "hw_atl/hw_atl_a0.h"
 #include "hw_atl/hw_atl_b0.h"
+#include "aq_filters.h"
+#include "aq_drvinfo.h"
 
 static const struct pci_device_id aq_pci_tbl[] = {
 	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_0001), },
@@ -41,9 +40,6 @@
 	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111S), },
 	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112S), },
 
-	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111E), },
-	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112E), },
-
 	{}
 };
 
@@ -73,9 +69,6 @@
 	{ AQ_DEVICE_ID_AQC109S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc109s, },
 	{ AQ_DEVICE_ID_AQC111S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc111s, },
 	{ AQ_DEVICE_ID_AQC112S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc112s, },
-
-	{ AQ_DEVICE_ID_AQC111E,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc111e, },
-	{ AQ_DEVICE_ID_AQC112E,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc112e, },
 };
 
 MODULE_DEVICE_TABLE(pci, aq_pci_tbl);
@@ -84,7 +77,7 @@
 				     const struct aq_hw_ops **ops,
 				     const struct aq_hw_caps_s **caps)
 {
-	int i = 0;
+	int i;
 
 	if (pdev->vendor != PCI_VENDOR_ID_AQUANTIA)
 		return -EINVAL;
@@ -107,7 +100,7 @@
 
 int aq_pci_func_init(struct pci_dev *pdev)
 {
-	int err = 0;
+	int err;
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (!err) {
@@ -138,41 +131,50 @@
 }
 
 int aq_pci_func_alloc_irq(struct aq_nic_s *self, unsigned int i,
-			  char *name, void *aq_vec, cpumask_t *affinity_mask)
+			  char *name, irq_handler_t irq_handler,
+			  void *irq_arg, cpumask_t *affinity_mask)
 {
 	struct pci_dev *pdev = self->pdev;
-	int err = 0;
+	int err;
 
 	if (pdev->msix_enabled || pdev->msi_enabled)
-		err = request_irq(pci_irq_vector(pdev, i), aq_vec_isr, 0,
-				  name, aq_vec);
+		err = request_irq(pci_irq_vector(pdev, i), irq_handler, 0,
+				  name, irq_arg);
 	else
 		err = request_irq(pci_irq_vector(pdev, i), aq_vec_isr_legacy,
-				  IRQF_SHARED, name, aq_vec);
+				  IRQF_SHARED, name, irq_arg);
 
 	if (err >= 0) {
 		self->msix_entry_mask |= (1 << i);
-		self->aq_vec[i] = aq_vec;
 
-		if (pdev->msix_enabled)
+		if (pdev->msix_enabled && affinity_mask)
 			irq_set_affinity_hint(pci_irq_vector(pdev, i),
 					      affinity_mask);
 	}
+
 	return err;
 }
 
 void aq_pci_func_free_irqs(struct aq_nic_s *self)
 {
 	struct pci_dev *pdev = self->pdev;
-	unsigned int i = 0U;
+	unsigned int i;
+	void *irq_data;
 
 	for (i = 32U; i--;) {
 		if (!((1U << i) & self->msix_entry_mask))
 			continue;
+		if (self->aq_nic_cfg.link_irq_vec &&
+		    i == self->aq_nic_cfg.link_irq_vec)
+			irq_data = self;
+		else if (i < AQ_CFG_VECS_MAX)
+			irq_data = self->aq_vec[i];
+		else
+			continue;
 
 		if (pdev->msix_enabled)
 			irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
-		free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
+		free_irq(pci_irq_vector(pdev, i), irq_data);
 		self->msix_entry_mask &= ~(1U << i);
 	}
 }
@@ -182,7 +184,7 @@
 	if (self->pdev->msix_enabled)
 		return AQ_HW_IRQ_MSIX;
 	if (self->pdev->msi_enabled)
-		return AQ_HW_IRQ_MSIX;
+		return AQ_HW_IRQ_MSI;
 	return AQ_HW_IRQ_LEGACY;
 }
 
@@ -194,8 +196,8 @@
 static int aq_pci_probe(struct pci_dev *pdev,
 			const struct pci_device_id *pci_id)
 {
-	struct aq_nic_s *self = NULL;
-	int err = 0;
+	struct aq_nic_s *self;
+	int err;
 	struct net_device *ndev;
 	resource_size_t mmio_pa;
 	u32 bar;
@@ -220,6 +222,8 @@
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	pci_set_drvdata(pdev, self);
 
+	mutex_init(&self->fwreq_mutex);
+
 	err = aq_pci_probe_get_hw_by_id(pdev, &self->aq_hw_ops,
 					&aq_nic_get_cfg(self)->aq_hw_caps);
 	if (err)
@@ -265,6 +269,7 @@
 	numvecs = min((u8)AQ_CFG_VECS_DEF,
 		      aq_nic_get_cfg(self)->aq_hw_caps->msix_irqs);
 	numvecs = min(numvecs, num_online_cpus());
+	numvecs += AQ_HW_SERVICE_IRQS;
 	/*enable interrupts */
 #if !AQ_CFG_FORCE_LEGACY_INT
 	err = pci_alloc_irq_vectors(self->pdev, 1, numvecs,
@@ -286,6 +291,8 @@
 	if (err < 0)
 		goto err_register;
 
+	aq_drvinfo_init(ndev);
+
 	return 0;
 
 err_register:
@@ -309,6 +316,7 @@
 	struct aq_nic_s *self = pci_get_drvdata(pdev);
 
 	if (self->ndev) {
+		aq_clear_rxnfc_all_rules(self);
 		if (self->ndev->reg_state == NETREG_REGISTERED)
 			unregister_netdev(self->ndev);
 		aq_nic_free_vectors(self);
@@ -361,4 +369,13 @@
 	.shutdown = aq_pci_shutdown,
 };
 
-module_pci_driver(aq_pci_ops);
+int aq_pci_func_register_driver(void)
+{
+	return pci_register_driver(&aq_pci_ops);
+}
+
+void aq_pci_func_unregister_driver(void)
+{
+	pci_unregister_driver(&aq_pci_ops);
+}
+

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h
index aeee67b..77be7ee 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_pci_func.h: Declaration of PCI functions. */
@@ -24,9 +21,12 @@
 
 int aq_pci_func_init(struct pci_dev *pdev);
 int aq_pci_func_alloc_irq(struct aq_nic_s *self, unsigned int i,
-			  char *name, void *aq_vec,
-			  cpumask_t *affinity_mask);
+			  char *name, irq_handler_t irq_handler,
+			  void *irq_arg, cpumask_t *affinity_mask);
 void aq_pci_func_free_irqs(struct aq_nic_s *self);
 unsigned int aq_pci_func_get_irq_type(struct aq_nic_s *self);
 
+int aq_pci_func_register_driver(void);
+void aq_pci_func_unregister_driver(void);
+
 #endif /* AQ_PCI_FUNC_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 7134d0d..76bdbe1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_ring.c: Definition of functions for Rx/Tx rings. */
@@ -12,10 +9,89 @@
 #include "aq_ring.h"
 #include "aq_nic.h"
 #include "aq_hw.h"
+#include "aq_hw_utils.h"
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
+static inline void aq_free_rxpage(struct aq_rxpage *rxpage, struct device *dev)
+{
+	unsigned int len = PAGE_SIZE << rxpage->order;
+
+	dma_unmap_page(dev, rxpage->daddr, len, DMA_FROM_DEVICE);
+
+	/* Drop the ref for being in the ring. */
+	__free_pages(rxpage->page, rxpage->order);
+	rxpage->page = NULL;
+}
+
+static int aq_get_rxpage(struct aq_rxpage *rxpage, unsigned int order,
+			 struct device *dev)
+{
+	struct page *page;
+	dma_addr_t daddr;
+	int ret = -ENOMEM;
+
+	page = dev_alloc_pages(order);
+	if (unlikely(!page))
+		goto err_exit;
+
+	daddr = dma_map_page(dev, page, 0, PAGE_SIZE << order,
+			     DMA_FROM_DEVICE);
+
+	if (unlikely(dma_mapping_error(dev, daddr)))
+		goto free_page;
+
+	rxpage->page = page;
+	rxpage->daddr = daddr;
+	rxpage->order = order;
+	rxpage->pg_off = 0;
+
+	return 0;
+
+free_page:
+	__free_pages(page, order);
+
+err_exit:
+	return ret;
+}
+
+static int aq_get_rxpages(struct aq_ring_s *self, struct aq_ring_buff_s *rxbuf,
+			  int order)
+{
+	int ret;
+
+	if (rxbuf->rxdata.page) {
+		/* One means ring is the only user and can reuse */
+		if (page_ref_count(rxbuf->rxdata.page) > 1) {
+			/* Try reuse buffer */
+			rxbuf->rxdata.pg_off += AQ_CFG_RX_FRAME_MAX;
+			if (rxbuf->rxdata.pg_off + AQ_CFG_RX_FRAME_MAX <=
+				(PAGE_SIZE << order)) {
+				self->stats.rx.pg_flips++;
+			} else {
+				/* Buffer exhausted. We have other users and
+				 * should release this page and realloc
+				 */
+				aq_free_rxpage(&rxbuf->rxdata,
+					       aq_nic_get_dev(self->aq_nic));
+				self->stats.rx.pg_losts++;
+			}
+		} else {
+			rxbuf->rxdata.pg_off = 0;
+			self->stats.rx.pg_reuses++;
+		}
+	}
+
+	if (!rxbuf->rxdata.page) {
+		ret = aq_get_rxpage(&rxbuf->rxdata, order,
+				    aq_nic_get_dev(self->aq_nic));
+		return ret;
+	}
+
+	return 0;
+}
+
 static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self,
 				       struct aq_nic_s *aq_nic)
 {
@@ -29,8 +105,8 @@
 		goto err_exit;
 	}
 	self->dx_ring = dma_alloc_coherent(aq_nic_get_dev(aq_nic),
-						self->size * self->dx_size,
-						&self->dx_ring_pa, GFP_KERNEL);
+					   self->size * self->dx_size,
+					   &self->dx_ring_pa, GFP_KERNEL);
 	if (!self->dx_ring) {
 		err = -ENOMEM;
 		goto err_exit;
@@ -81,6 +157,11 @@
 	self->idx = idx;
 	self->size = aq_nic_cfg->rxds;
 	self->dx_size = aq_nic_cfg->aq_hw_caps->rxd_size;
+	self->page_order = fls(AQ_CFG_RX_FRAME_MAX / PAGE_SIZE +
+			       (AQ_CFG_RX_FRAME_MAX % PAGE_SIZE ? 1 : 0)) - 1;
+
+	if (aq_nic_cfg->rxpageorder > self->page_order)
+		self->page_order = aq_nic_cfg->rxpageorder;
 
 	self = aq_ring_alloc(self, aq_nic);
 	if (!self) {
@@ -139,10 +220,10 @@
 bool aq_ring_tx_clean(struct aq_ring_s *self)
 {
 	struct device *dev = aq_nic_get_dev(self->aq_nic);
-	unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET;
+	unsigned int budget;
 
-	for (; self->sw_head != self->hw_head && budget--;
-		self->sw_head = aq_ring_next_dx(self, self->sw_head)) {
+	for (budget = AQ_CFG_TX_CLEAN_BUDGET;
+	     budget && self->sw_head != self->hw_head; budget--) {
 		struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
 
 		if (likely(buff->is_mapped)) {
@@ -167,6 +248,7 @@
 
 		buff->pa = 0U;
 		buff->eop_index = 0xffffU;
+		self->sw_head = aq_ring_next_dx(self, self->sw_head);
 	}
 
 	return !!budget;
@@ -186,11 +268,12 @@
 	}
 	if (buff->is_ip_cso) {
 		__skb_incr_checksum_unnecessary(skb);
-		if (buff->is_udp_cso || buff->is_tcp_cso)
-			__skb_incr_checksum_unnecessary(skb);
 	} else {
 		skb->ip_summed = CHECKSUM_NONE;
 	}
+
+	if (buff->is_udp_cso || buff->is_tcp_cso)
+		__skb_incr_checksum_unnecessary(skb);
 }
 
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
@@ -200,93 +283,137 @@
 		     int budget)
 {
 	struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
-	int err = 0;
 	bool is_rsc_completed = true;
+	int err = 0;
 
 	for (; (self->sw_head != self->hw_head) && budget;
 		self->sw_head = aq_ring_next_dx(self, self->sw_head),
 		--budget, ++(*work_done)) {
 		struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
+		struct aq_ring_buff_s *buff_ = NULL;
 		struct sk_buff *skb = NULL;
 		unsigned int next_ = 0U;
 		unsigned int i = 0U;
-		struct aq_ring_buff_s *buff_ = NULL;
-
-		if (buff->is_error) {
-			__free_pages(buff->page, 0);
-			continue;
-		}
+		u16 hdr_len;
 
 		if (buff->is_cleaned)
 			continue;
 
 		if (!buff->is_eop) {
-			for (next_ = buff->next,
-			     buff_ = &self->buff_ring[next_]; true;
-			     next_ = buff_->next,
-			     buff_ = &self->buff_ring[next_]) {
+			buff_ = buff;
+			do {
+				next_ = buff_->next,
+				buff_ = &self->buff_ring[next_];
 				is_rsc_completed =
 					aq_ring_dx_in_range(self->sw_head,
 							    next_,
 							    self->hw_head);
 
-				if (unlikely(!is_rsc_completed)) {
-					is_rsc_completed = false;
+				if (unlikely(!is_rsc_completed))
 					break;
-				}
 
-				if (buff_->is_eop)
-					break;
-			}
+				buff->is_error |= buff_->is_error;
+				buff->is_cso_err |= buff_->is_cso_err;
+
+			} while (!buff_->is_eop);
 
 			if (!is_rsc_completed) {
 				err = 0;
 				goto err_exit;
 			}
+			if (buff->is_error || buff->is_cso_err) {
+				buff_ = buff;
+				do {
+					next_ = buff_->next,
+					buff_ = &self->buff_ring[next_];
+
+					buff_->is_cleaned = true;
+				} while (!buff_->is_eop);
+
+				++self->stats.rx.errors;
+				continue;
+			}
 		}
 
+		if (buff->is_error) {
+			++self->stats.rx.errors;
+			continue;
+		}
+
+		dma_sync_single_range_for_cpu(aq_nic_get_dev(self->aq_nic),
+					      buff->rxdata.daddr,
+					      buff->rxdata.pg_off,
+					      buff->len, DMA_FROM_DEVICE);
+
 		/* for single fragment packets use build_skb() */
 		if (buff->is_eop &&
 		    buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
-			skb = build_skb(page_address(buff->page),
+			skb = build_skb(aq_buf_vaddr(&buff->rxdata),
 					AQ_CFG_RX_FRAME_MAX);
 			if (unlikely(!skb)) {
 				err = -ENOMEM;
 				goto err_exit;
 			}
-
 			skb_put(skb, buff->len);
+			page_ref_inc(buff->rxdata.page);
 		} else {
-			skb = netdev_alloc_skb(ndev, ETH_HLEN);
+			skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
 			if (unlikely(!skb)) {
 				err = -ENOMEM;
 				goto err_exit;
 			}
-			skb_put(skb, ETH_HLEN);
-			memcpy(skb->data, page_address(buff->page), ETH_HLEN);
 
-			skb_add_rx_frag(skb, 0, buff->page, ETH_HLEN,
-					buff->len - ETH_HLEN,
-					SKB_TRUESIZE(buff->len - ETH_HLEN));
+			hdr_len = buff->len;
+			if (hdr_len > AQ_CFG_RX_HDR_SIZE)
+				hdr_len = eth_get_headlen(skb->dev,
+							  aq_buf_vaddr(&buff->rxdata),
+							  AQ_CFG_RX_HDR_SIZE);
+
+			memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
+			       ALIGN(hdr_len, sizeof(long)));
+
+			if (buff->len - hdr_len > 0) {
+				skb_add_rx_frag(skb, 0, buff->rxdata.page,
+						buff->rxdata.pg_off + hdr_len,
+						buff->len - hdr_len,
+						AQ_CFG_RX_FRAME_MAX);
+				page_ref_inc(buff->rxdata.page);
+			}
 
 			if (!buff->is_eop) {
-				for (i = 1U, next_ = buff->next,
-				     buff_ = &self->buff_ring[next_];
-				     true; next_ = buff_->next,
-				     buff_ = &self->buff_ring[next_], ++i) {
-					skb_add_rx_frag(skb, i,
-							buff_->page, 0,
+				buff_ = buff;
+				i = 1U;
+				do {
+					next_ = buff_->next,
+					buff_ = &self->buff_ring[next_];
+
+					dma_sync_single_range_for_cpu(
+							aq_nic_get_dev(self->aq_nic),
+							buff_->rxdata.daddr,
+							buff_->rxdata.pg_off,
 							buff_->len,
-							SKB_TRUESIZE(buff->len -
-							ETH_HLEN));
+							DMA_FROM_DEVICE);
+					skb_add_rx_frag(skb, i++,
+							buff_->rxdata.page,
+							buff_->rxdata.pg_off,
+							buff_->len,
+							AQ_CFG_RX_FRAME_MAX);
+					page_ref_inc(buff_->rxdata.page);
 					buff_->is_cleaned = 1;
 
-					if (buff_->is_eop)
-						break;
-				}
+					buff->is_ip_cso &= buff_->is_ip_cso;
+					buff->is_udp_cso &= buff_->is_udp_cso;
+					buff->is_tcp_cso &= buff_->is_tcp_cso;
+					buff->is_cso_err |= buff_->is_cso_err;
+
+				} while (!buff_->is_eop);
 			}
 		}
 
+		if (buff->is_vlan)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       buff->vlan_rx_tag);
+
 		skb->protocol = eth_type_trans(skb, ndev);
 
 		aq_rx_checksum(self, buff, skb);
@@ -309,12 +436,15 @@
 
 int aq_ring_rx_fill(struct aq_ring_s *self)
 {
-	unsigned int pages_order = fls(AQ_CFG_RX_FRAME_MAX / PAGE_SIZE +
-		(AQ_CFG_RX_FRAME_MAX % PAGE_SIZE ? 1 : 0)) - 1;
+	unsigned int page_order = self->page_order;
 	struct aq_ring_buff_s *buff = NULL;
 	int err = 0;
 	int i = 0;
 
+	if (aq_ring_avail_dx(self) < min_t(unsigned int, AQ_CFG_RX_REFILL_THRES,
+					   self->size / 2))
+		return err;
+
 	for (i = aq_ring_avail_dx(self); i--;
 		self->sw_tail = aq_ring_next_dx(self, self->sw_tail)) {
 		buff = &self->buff_ring[self->sw_tail];
@@ -322,30 +452,15 @@
 		buff->flags = 0U;
 		buff->len = AQ_CFG_RX_FRAME_MAX;
 
-		buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order);
-		if (!buff->page) {
-			err = -ENOMEM;
+		err = aq_get_rxpages(self, buff, page_order);
+		if (err)
 			goto err_exit;
-		}
 
-		buff->pa = dma_map_page(aq_nic_get_dev(self->aq_nic),
-					buff->page, 0,
-					AQ_CFG_RX_FRAME_MAX, DMA_FROM_DEVICE);
-
-		if (dma_mapping_error(aq_nic_get_dev(self->aq_nic), buff->pa)) {
-			err = -ENOMEM;
-			goto err_exit;
-		}
-
+		buff->pa = aq_buf_daddr(&buff->rxdata);
 		buff = NULL;
 	}
 
 err_exit:
-	if (err < 0) {
-		if (buff && buff->page)
-			__free_pages(buff->page, 0);
-	}
-
 	return err;
 }
 
@@ -358,10 +473,7 @@
 		self->sw_head = aq_ring_next_dx(self, self->sw_head)) {
 		struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
 
-		dma_unmap_page(aq_nic_get_dev(self->aq_nic), buff->pa,
-			       AQ_CFG_RX_FRAME_MAX, DMA_FROM_DEVICE);
-
-		__free_pages(buff->page, 0);
+		aq_free_rxpage(&buff->rxdata, aq_nic_get_dev(self->aq_nic));
 	}
 
 err_exit:;

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index ac1329f..47abd09 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_ring.h: Declaration of functions for Rx/Tx rings. */
@@ -17,13 +14,20 @@
 struct page;
 struct aq_nic_cfg_s;
 
+struct aq_rxpage {
+	struct page *page;
+	dma_addr_t daddr;
+	unsigned int order;
+	unsigned int pg_off;
+};
+
 /*           TxC       SOP        DX         EOP
  *         +----------+----------+----------+-----------
  *   8bytes|len l3,l4 | pa       | pa       | pa
  *         +----------+----------+----------+-----------
  * 4/8bytes|len pkt   |len pkt   |          | skb
  *         +----------+----------+----------+-----------
- * 4/8bytes|is_txc    |len,flags |len       |len,is_eop
+ * 4/8bytes|is_gso    |len,flags |len       |len,is_eop
  *         +----------+----------+----------+-----------
  *
  *  This aq_ring_buff_s doesn't have endianness dependency.
@@ -31,28 +35,22 @@
  */
 struct __packed aq_ring_buff_s {
 	union {
+		/* RX/TX */
+		dma_addr_t pa;
 		/* RX */
 		struct {
 			u32 rss_hash;
 			u16 next;
 			u8 is_hash_l4;
 			u8 rsvd1;
-			struct page *page;
+			struct aq_rxpage rxdata;
+			u16 vlan_rx_tag;
 		};
 		/* EOP */
 		struct {
 			dma_addr_t pa_eop;
 			struct sk_buff *skb;
 		};
-		/* DX */
-		struct {
-			dma_addr_t pa;
-		};
-		/* SOP */
-		struct {
-			dma_addr_t pa_sop;
-			u32 len_pkt_sop;
-		};
 		/* TxC */
 		struct {
 			u32 mss;
@@ -62,6 +60,7 @@
 			u8 is_ipv6:1;
 			u8 rsvd2:7;
 			u32 len_pkt;
+			u16 vlan_tx_tag;
 		};
 	};
 	union {
@@ -73,11 +72,12 @@
 			u32 is_cso_err:1;
 			u32 is_sop:1;
 			u32 is_eop:1;
-			u32 is_txc:1;
+			u32 is_gso:1;
 			u32 is_mapped:1;
 			u32 is_cleaned:1;
 			u32 is_error:1;
-			u32 rsvd3:6;
+			u32 is_vlan:1;
+			u32 rsvd3:5;
 			u16 eop_index;
 			u16 rsvd4;
 		};
@@ -91,6 +91,9 @@
 	u64 bytes;
 	u64 lro_packets;
 	u64 jumbo_packets;
+	u64 pg_losts;
+	u64 pg_flips;
+	u64 pg_reuses;
 };
 
 struct aq_ring_stats_tx_s {
@@ -116,6 +119,7 @@
 	unsigned int size;	/* descriptors number */
 	unsigned int dx_size;	/* TX or RX descriptor size,  */
 				/* stored here for fater math */
+	unsigned int page_order;
 	union aq_ring_stats_s stats;
 	dma_addr_t dx_ring_pa;
 };
@@ -126,6 +130,16 @@
 	cpumask_t affinity_mask;
 };
 
+static inline void *aq_buf_vaddr(struct aq_rxpage *rxpage)
+{
+	return page_to_virt(rxpage->page) + rxpage->pg_off;
+}
+
+static inline dma_addr_t aq_buf_daddr(struct aq_rxpage *rxpage)
+{
+	return rxpage->daddr + rxpage->pg_off;
+}
+
 static inline unsigned int aq_ring_next_dx(struct aq_ring_s *self,
 					   unsigned int dx)
 {

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_rss.h b/drivers/net/ethernet/aquantia/atlantic/aq_rss.h
index 1db6eb2..39b1f43 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_rss.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_rss.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_rss.h: Receive Side Scaling definitions. */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
index 786ea81..d3a0b2e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_utils.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_utils.h: Useful macro and structures used in all layers of driver. */

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index d335c33..a95c263 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_vec.c: Definition of common structure for vector of Rx and Tx rings.
@@ -89,6 +86,7 @@
 			}
 		}
 
+err_exit:
 		if (!was_tx_cleaned)
 			work_done = budget;
 
@@ -98,7 +96,7 @@
 					1U << self->aq_ring_param.vec_idx);
 		}
 	}
-err_exit:
+
 	return work_done;
 }
 
@@ -308,15 +306,13 @@
 {
 	struct aq_vec_s *self = private;
 	u64 irq_mask = 0U;
-	irqreturn_t err = 0;
+	int err;
 
-	if (!self) {
-		err = -EINVAL;
-		goto err_exit;
-	}
+	if (!self)
+		return IRQ_NONE;
 	err = self->aq_hw_ops->hw_irq_read(self->aq_hw, &irq_mask);
 	if (err < 0)
-		goto err_exit;
+		return IRQ_NONE;
 
 	if (irq_mask) {
 		self->aq_hw_ops->hw_irq_disable(self->aq_hw,
@@ -324,11 +320,10 @@
 		napi_schedule(&self->napi);
 	} else {
 		self->aq_hw_ops->hw_irq_enable(self->aq_hw, 1U);
-		err = IRQ_NONE;
+		return IRQ_NONE;
 	}
 
-err_exit:
-	return err >= 0 ? IRQ_HANDLED : IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
 cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self)
@@ -353,6 +348,9 @@
 		stats_rx->errors += rx->errors;
 		stats_rx->jumbo_packets += rx->jumbo_packets;
 		stats_rx->lro_packets += rx->lro_packets;
+		stats_rx->pg_losts += rx->pg_losts;
+		stats_rx->pg_flips += rx->pg_flips;
+		stats_rx->pg_reuses += rx->pg_reuses;
 
 		stats_tx->packets += tx->packets;
 		stats_tx->bytes += tx->bytes;

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
index 8bdf60b..0fe8e09 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File aq_vec.h: Definition of common structures for vector of Rx and Tx rings.

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 97addfa..359a4d3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_a0.c: Definition of Atlantic hardware specific functions. */
@@ -49,42 +46,43 @@
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc100 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_FIBRE,
-	.link_speed_msk = HW_ATL_A0_RATE_5G  |
-			  HW_ATL_A0_RATE_2G5 |
-			  HW_ATL_A0_RATE_1G  |
-			  HW_ATL_A0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc107 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_A0_RATE_10G |
-			  HW_ATL_A0_RATE_5G  |
-			  HW_ATL_A0_RATE_2G5 |
-			  HW_ATL_A0_RATE_1G  |
-			  HW_ATL_A0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_10G |
+			  AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc108 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_A0_RATE_5G  |
-			  HW_ATL_A0_RATE_2G5 |
-			  HW_ATL_A0_RATE_1G  |
-			  HW_ATL_A0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc109 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_A0_RATE_2G5 |
-			  HW_ATL_A0_RATE_1G  |
-			  HW_ATL_A0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 static int hw_atl_a0_hw_reset(struct aq_hw_s *self)
 {
 	int err = 0;
+	u32 val;
 
 	hw_atl_glb_glb_reg_res_dis_set(self, 1U);
 	hw_atl_pci_pci_reg_res_dis_set(self, 0U);
@@ -95,7 +93,9 @@
 	hw_atl_glb_soft_res_set(self, 1);
 
 	/* check 10 times by 1ms */
-	AQ_HW_WAIT_FOR(hw_atl_glb_soft_res_get(self) == 0, 1000U, 10U);
+	err = readx_poll_timeout_atomic(hw_atl_glb_soft_res_get,
+					self, val, val == 0,
+					1000U, 10000U);
 	if (err < 0)
 		goto err_exit;
 
@@ -103,7 +103,9 @@
 	hw_atl_itr_res_irq_set(self, 1U);
 
 	/* check 10 times by 1ms */
-	AQ_HW_WAIT_FOR(hw_atl_itr_res_irq_get(self) == 0, 1000U, 10U);
+	err = readx_poll_timeout_atomic(hw_atl_itr_res_irq_get,
+					self, val, val == 0,
+					1000U, 10000U);
 	if (err < 0)
 		goto err_exit;
 
@@ -181,6 +183,7 @@
 	int err = 0;
 	unsigned int i = 0U;
 	unsigned int addr = 0U;
+	u32 val;
 
 	for (i = 10, addr = 0U; i--; ++addr) {
 		u32 key_data = cfg->is_rss ?
@@ -188,8 +191,9 @@
 		hw_atl_rpf_rss_key_wr_data_set(self, key_data);
 		hw_atl_rpf_rss_key_addr_set(self, addr);
 		hw_atl_rpf_rss_key_wr_en_set(self, 1U);
-		AQ_HW_WAIT_FOR(hw_atl_rpf_rss_key_wr_en_get(self) == 0,
-			       1000U, 10U);
+		err = readx_poll_timeout_atomic(hw_atl_rpf_rss_key_wr_en_get,
+						self, val, val == 0,
+						1000U, 10000U);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -207,8 +211,9 @@
 	u32 i = 0U;
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
 	int err = 0;
-	u16 bitary[(HW_ATL_A0_RSS_REDIRECTION_MAX *
-					HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)];
+	u16 bitary[1 + (HW_ATL_A0_RSS_REDIRECTION_MAX *
+		   HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)];
+	u32 val;
 
 	memset(bitary, 0, sizeof(bitary));
 
@@ -222,8 +227,9 @@
 		hw_atl_rpf_rss_redir_tbl_wr_data_set(self, bitary[i]);
 		hw_atl_rpf_rss_redir_tbl_addr_set(self, i);
 		hw_atl_rpf_rss_redir_wr_en_set(self, 1U);
-		AQ_HW_WAIT_FOR(hw_atl_rpf_rss_redir_wr_en_get(self) == 0,
-			       1000U, 10U);
+		err = readx_poll_timeout_atomic(hw_atl_rpf_rss_redir_wr_en_get,
+						self, val, val == 0,
+						1000U, 10000U);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -284,7 +290,7 @@
 
 	/* RSS Ring selection */
 	hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ?
-					0xB3333333U : 0x00000000U);
+					  0xB3333333U : 0x00000000U);
 
 	/* Multicast filters */
 	for (i = HW_ATL_A0_MAC_MAX; i--;) {
@@ -325,7 +331,7 @@
 	}
 	h = (mac_addr[0] << 8) | (mac_addr[1]);
 	l = (mac_addr[2] << 24) | (mac_addr[3] << 16) |
-		(mac_addr[4] << 8) | mac_addr[5];
+	    (mac_addr[4] << 8) | mac_addr[5];
 
 	hw_atl_rpfl2_uc_flr_en_set(self, 0U, HW_ATL_A0_MAC);
 	hw_atl_rpfl2unicast_dest_addresslsw_set(self, l, HW_ATL_A0_MAC);
@@ -341,10 +347,10 @@
 static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 {
 	static u32 aq_hw_atl_igcr_table_[4][2] = {
-		{ 0x20000000U, 0x20000000U }, /* AQ_IRQ_INVALID */
-		{ 0x20000080U, 0x20000080U }, /* AQ_IRQ_LEGACY */
-		{ 0x20000021U, 0x20000025U }, /* AQ_IRQ_MSI */
-		{ 0x20000022U, 0x20000026U }  /* AQ_IRQ_MSIX */
+		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
+		[AQ_HW_IRQ_LEGACY]  = { 0x20000080U, 0x20000080U },
+		[AQ_HW_IRQ_MSI]     = { 0x20000021U, 0x20000025U },
+		[AQ_HW_IRQ_MSIX]    = { 0x20000022U, 0x20000026U },
 	};
 
 	int err = 0;
@@ -445,7 +451,7 @@
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_txc) {
+		if (buff->is_gso) {
 			txd->ctl |= (buff->len_l3 << 31) |
 				(buff->len_l2 << 24) |
 				HW_ATL_A0_TXD_CTL_CMD_TCP |
@@ -519,7 +525,7 @@
 
 	hw_atl_rdm_rx_desc_data_buff_size_set(self,
 					      AQ_CFG_RX_FRAME_MAX / 1024U,
-				       aq_ring->idx);
+					      aq_ring->idx);
 
 	hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, aq_ring->idx);
 	hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, aq_ring->idx);
@@ -610,8 +616,6 @@
 static int hw_atl_a0_hw_ring_rx_receive(struct aq_hw_s *self,
 					struct aq_ring_s *ring)
 {
-	struct device *ndev = aq_nic_get_dev(ring->aq_nic);
-
 	for (; ring->hw_head != ring->sw_tail;
 		ring->hw_head = aq_ring_next_dx(ring, ring->hw_head)) {
 		struct aq_ring_buff_s *buff = NULL;
@@ -678,8 +682,6 @@
 		is_err &= ~0x18U;
 		is_err &= ~0x04U;
 
-		dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE);
-
 		if (is_err || rxd_wb->type & 0x1000U) {
 			/* status error or DMA error */
 			buff->is_error = 1U;
@@ -758,7 +760,7 @@
 		hw_atl_rpfl2_uc_flr_en_set(self,
 					   (self->aq_nic_cfg->is_mc_list_enabled &&
 					   (i <= self->aq_nic_cfg->mc_list_count)) ?
-					    1U : 0U, i);
+					   1U : 0U, i);
 
 	return aq_hw_err_from_flags(self);
 }
@@ -877,7 +879,6 @@
 const struct aq_hw_ops hw_atl_ops_a0 = {
 	.hw_set_mac_address   = hw_atl_a0_hw_mac_addr_set,
 	.hw_init              = hw_atl_a0_hw_init,
-	.hw_set_power         = hw_atl_utils_hw_set_power,
 	.hw_reset             = hw_atl_a0_hw_reset,
 	.hw_start             = hw_atl_a0_hw_start,
 	.hw_ring_tx_start     = hw_atl_a0_hw_ring_tx_start,

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h
index 25fe954..1a294ad 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_a0.h: Declaration of abstract interface for Atlantic hardware

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
index 3c94cff..1b9b2e5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_a0_internal.h: Definition of Atlantic A0 chip specific
@@ -62,12 +59,6 @@
 #define HW_ATL_A0_MPI_SPEED_MSK       0xFFFFU
 #define HW_ATL_A0_MPI_SPEED_SHIFT     16U
 
-#define HW_ATL_A0_RATE_10G            BIT(0)
-#define HW_ATL_A0_RATE_5G             BIT(1)
-#define HW_ATL_A0_RATE_2G5            BIT(3)
-#define HW_ATL_A0_RATE_1G             BIT(4)
-#define HW_ATL_A0_RATE_100M           BIT(5)
-
 #define HW_ATL_A0_TXBUF_MAX 160U
 #define HW_ATL_A0_RXBUF_MAX 320U
 

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 56363ff..2ad3fa6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_b0.c: Definition of Atlantic hardware specific functions. */
@@ -21,7 +18,7 @@
 
 #define DEFAULT_B0_BOARD_BASIC_CAPABILITIES \
 	.is_64_dma = true,		  \
-	.msix_irqs = 4U,		  \
+	.msix_irqs = 8U,		  \
 	.irq_mask = ~0U,		  \
 	.vecs = HW_ATL_B0_RSS_MAX,	  \
 	.tcs = HW_ATL_B0_TC_MAX,	  \
@@ -41,7 +38,11 @@
 			NETIF_F_RXHASH |  \
 			NETIF_F_SG |      \
 			NETIF_F_TSO |     \
-			NETIF_F_LRO,      \
+			NETIF_F_LRO |     \
+			NETIF_F_NTUPLE |  \
+			NETIF_F_HW_VLAN_CTAG_FILTER | \
+			NETIF_F_HW_VLAN_CTAG_RX |     \
+			NETIF_F_HW_VLAN_CTAG_TX,      \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
@@ -51,38 +52,38 @@
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_FIBRE,
-	.link_speed_msk = HW_ATL_B0_RATE_10G |
-			  HW_ATL_B0_RATE_5G  |
-			  HW_ATL_B0_RATE_2G5 |
-			  HW_ATL_B0_RATE_1G  |
-			  HW_ATL_B0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_10G |
+			  AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc107 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_B0_RATE_10G |
-			  HW_ATL_B0_RATE_5G  |
-			  HW_ATL_B0_RATE_2G5 |
-			  HW_ATL_B0_RATE_1G  |
-			  HW_ATL_B0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_10G |
+			  AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc108 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_B0_RATE_5G  |
-			  HW_ATL_B0_RATE_2G5 |
-			  HW_ATL_B0_RATE_1G  |
-			  HW_ATL_B0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_5G |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc109 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = HW_ATL_B0_RATE_2G5 |
-			  HW_ATL_B0_RATE_1G  |
-			  HW_ATL_B0_RATE_100M,
+	.link_speed_msk = AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G |
+			  AQ_NIC_RATE_100M,
 };
 
 static int hw_atl_b0_hw_reset(struct aq_hw_s *self)
@@ -100,12 +101,17 @@
 	return err;
 }
 
+static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc)
+{
+	hw_atl_rpb_rx_xoff_en_per_tc_set(self, !!(fc & AQ_NIC_FC_RX), tc);
+	return 0;
+}
+
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
 	u32 tc = 0U;
 	u32 buff_size = 0U;
 	unsigned int i_priority = 0U;
-	bool is_rx_flow_control = false;
 
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
@@ -138,7 +144,6 @@
 
 	/* QoS Rx buf size per TC */
 	tc = 0;
-	is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->flow_control);
 	buff_size = HW_ATL_B0_RXBUF_MAX;
 
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
@@ -150,7 +155,8 @@
 						   (buff_size *
 						   (1024U / 32U) * 50U) /
 						   100U, tc);
-	hw_atl_rpb_rx_xoff_en_per_tc_set(self, is_rx_flow_control ? 1U : 0U, tc);
+
+	hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
@@ -166,6 +172,7 @@
 	int err = 0;
 	unsigned int i = 0U;
 	unsigned int addr = 0U;
+	u32 val;
 
 	for (i = 10, addr = 0U; i--; ++addr) {
 		u32 key_data = cfg->is_rss ?
@@ -173,8 +180,9 @@
 		hw_atl_rpf_rss_key_wr_data_set(self, key_data);
 		hw_atl_rpf_rss_key_addr_set(self, addr);
 		hw_atl_rpf_rss_key_wr_en_set(self, 1U);
-		AQ_HW_WAIT_FOR(hw_atl_rpf_rss_key_wr_en_get(self) == 0,
-			       1000U, 10U);
+		err = readx_poll_timeout_atomic(hw_atl_rpf_rss_key_wr_en_get,
+						self, val, val == 0,
+						1000U, 10000U);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -192,8 +200,9 @@
 	u32 i = 0U;
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
 	int err = 0;
-	u16 bitary[(HW_ATL_B0_RSS_REDIRECTION_MAX *
-					HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)];
+	u16 bitary[1 + (HW_ATL_B0_RSS_REDIRECTION_MAX *
+		   HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)];
+	u32 val;
 
 	memset(bitary, 0, sizeof(bitary));
 
@@ -207,8 +216,9 @@
 		hw_atl_rpf_rss_redir_tbl_wr_data_set(self, bitary[i]);
 		hw_atl_rpf_rss_redir_tbl_addr_set(self, i);
 		hw_atl_rpf_rss_redir_wr_en_set(self, 1U);
-		AQ_HW_WAIT_FOR(hw_atl_rpf_rss_redir_wr_en_get(self) == 0,
-			       1000U, 10U);
+		err = readx_poll_timeout_atomic(hw_atl_rpf_rss_redir_wr_en_get,
+						self, val, val == 0,
+						1000U, 10000U);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -229,12 +239,17 @@
 	hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1);
 
 	/* RX checksums offloads*/
-	hw_atl_rpo_ipv4header_crc_offload_en_set(self, 1);
-	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, 1);
+	hw_atl_rpo_ipv4header_crc_offload_en_set(self, !!(aq_nic_cfg->features &
+						 NETIF_F_RXCSUM));
+	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, !!(aq_nic_cfg->features &
+					      NETIF_F_RXCSUM));
 
 	/* LSO offloads*/
 	hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
 
+	/* Outer VLAN tag offload */
+	hw_atl_rpo_outer_vlan_tag_mode_set(self, 1U);
+
 /* LRO offloads */
 	{
 		unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U :
@@ -246,13 +261,18 @@
 
 		hw_atl_rpo_lro_time_base_divider_set(self, 0x61AU);
 		hw_atl_rpo_lro_inactive_interval_set(self, 0);
-		hw_atl_rpo_lro_max_coalescing_interval_set(self, 2);
+		/* the LRO timebase divider is 5 uS (0x61a),
+		 * which is multiplied by 50(0x32)
+		 * to get a maximum coalescing interval of 250 uS,
+		 * which is the default value
+		 */
+		hw_atl_rpo_lro_max_coalescing_interval_set(self, 50);
 
 		hw_atl_rpo_lro_qsessions_lim_set(self, 1U);
 
 		hw_atl_rpo_lro_total_desc_lim_set(self, 2U);
 
-		hw_atl_rpo_lro_patch_optimization_en_set(self, 0U);
+		hw_atl_rpo_lro_patch_optimization_en_set(self, 1U);
 
 		hw_atl_rpo_lro_min_pay_of_first_pkt_set(self, 10U);
 
@@ -260,12 +280,19 @@
 
 		hw_atl_rpo_lro_en_set(self,
 				      aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U);
+		hw_atl_itr_rsc_en_set(self,
+				      aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U);
+
+		hw_atl_itr_rsc_delay_set(self, 1U);
 	}
 	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 {
+	/* Tx TC/Queue number config */
+	hw_atl_rpb_tps_tx_tc_mode_set(self, 1U);
+
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_last_pkt_set(self, 0x0F7FU);
@@ -312,20 +339,11 @@
 	hw_atl_rpf_vlan_outer_etht_set(self, 0x88A8U);
 	hw_atl_rpf_vlan_inner_etht_set(self, 0x8100U);
 
-	if (cfg->vlan_id) {
-		hw_atl_rpf_vlan_flr_act_set(self, 1U, 0U);
-		hw_atl_rpf_vlan_id_flr_set(self, 0U, 0U);
-		hw_atl_rpf_vlan_flr_en_set(self, 0U, 0U);
+	hw_atl_rpf_vlan_prom_mode_en_set(self, 1);
 
-		hw_atl_rpf_vlan_accept_untagged_packets_set(self, 1U);
-		hw_atl_rpf_vlan_untagged_act_set(self, 1U);
-
-		hw_atl_rpf_vlan_flr_act_set(self, 1U, 1U);
-		hw_atl_rpf_vlan_id_flr_set(self, cfg->vlan_id, 0U);
-		hw_atl_rpf_vlan_flr_en_set(self, 1U, 1U);
-	} else {
-		hw_atl_rpf_vlan_prom_mode_en_set(self, 1);
-	}
+	// Always accept untagged packets
+	hw_atl_rpf_vlan_accept_untagged_packets_set(self, 1U);
+	hw_atl_rpf_vlan_untagged_act_set(self, 1U);
 
 	/* Rx Interrupts */
 	hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 1U);
@@ -371,10 +389,10 @@
 static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 {
 	static u32 aq_hw_atl_igcr_table_[4][2] = {
-		{ 0x20000000U, 0x20000000U }, /* AQ_IRQ_INVALID */
-		{ 0x20000080U, 0x20000080U }, /* AQ_IRQ_LEGACY */
-		{ 0x20000021U, 0x20000025U }, /* AQ_IRQ_MSI */
-		{ 0x20000022U, 0x20000026U }  /* AQ_IRQ_MSIX */
+		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
+		[AQ_HW_IRQ_LEGACY]  = { 0x20000080U, 0x20000080U },
+		[AQ_HW_IRQ_MSI]     = { 0x20000021U, 0x20000025U },
+		[AQ_HW_IRQ_MSIX]    = { 0x20000022U, 0x20000026U },
 	};
 
 	int err = 0;
@@ -426,6 +444,11 @@
 				   ((HW_ATL_B0_ERR_INT << 0x18) | (1U << 0x1F)) |
 			    ((HW_ATL_B0_ERR_INT << 0x10) | (1U << 0x17)), 0U);
 
+	/* Enable link interrupt */
+	if (aq_nic_cfg->link_irq_vec)
+		hw_atl_reg_gen_irq_map_set(self, BIT(7) |
+					   aq_nic_cfg->link_irq_vec, 3U);
+
 	hw_atl_b0_hw_offload_set(self, aq_nic_cfg);
 
 err_exit:
@@ -469,6 +492,7 @@
 	unsigned int buff_pa_len = 0U;
 	unsigned int pkt_len = 0U;
 	unsigned int frag_count = 0U;
+	bool is_vlan = false;
 	bool is_gso = false;
 
 	buff = &ring->buff_ring[ring->sw_tail];
@@ -483,36 +507,44 @@
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_txc) {
+		if (buff->is_gso) {
+			txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
+			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
 			txd->ctl |= (buff->len_l3 << 31) |
-				(buff->len_l2 << 24) |
-				HW_ATL_B0_TXD_CTL_CMD_TCP |
-				HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
-			txd->ctl2 |= (buff->mss << 16) |
-				(buff->len_l4 << 8) |
-				(buff->len_l3 >> 1);
+				    (buff->len_l2 << 24);
+			txd->ctl2 |= (buff->mss << 16);
+			is_gso = true;
 
 			pkt_len -= (buff->len_l4 +
 				    buff->len_l3 +
 				    buff->len_l2);
-			is_gso = true;
-
 			if (buff->is_ipv6)
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
-		} else {
+			txd->ctl2 |= (buff->len_l4 << 8) |
+				     (buff->len_l3 >> 1);
+		}
+		if (buff->is_vlan) {
+			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
+			txd->ctl |= buff->vlan_tx_tag << 4;
+			is_vlan = true;
+		}
+		if (!buff->is_gso && !buff->is_vlan) {
 			buff_pa_len = buff->len;
 
 			txd->buf_addr = buff->pa;
 			txd->ctl |= (HW_ATL_B0_TXD_CTL_BLEN &
 						((u32)buff_pa_len << 4));
 			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXD;
+
 			/* PAY_LEN */
 			txd->ctl2 |= HW_ATL_B0_TXD_CTL2_LEN & (pkt_len << 14);
 
-			if (is_gso) {
-				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
+			if (is_gso || is_vlan) {
+				/* enable tx context */
 				txd->ctl2 |= HW_ATL_B0_TXD_CTL2_CTX_EN;
 			}
+			if (is_gso)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
 
 			/* Tx checksum offloads */
 			if (buff->is_ip_cso)
@@ -521,13 +553,16 @@
 			if (buff->is_udp_cso || buff->is_tcp_cso)
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TUCSO;
 
+			if (is_vlan)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_VLAN;
+
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
 				is_gso = false;
+				is_vlan = false;
 			}
 		}
-
 		ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail);
 	}
 
@@ -541,6 +576,7 @@
 {
 	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+	u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip;
 
 	hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx);
 
@@ -560,7 +596,8 @@
 
 	hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, aq_ring->idx);
 	hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, aq_ring->idx);
-	hw_atl_rpo_rx_desc_vlan_stripping_set(self, 0U, aq_ring->idx);
+	hw_atl_rpo_rx_desc_vlan_stripping_set(self, !!vlan_rx_stripping,
+					      aq_ring->idx);
 
 	/* Rx ring set mode */
 
@@ -647,8 +684,6 @@
 static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 					struct aq_ring_s *ring)
 {
-	struct device *ndev = aq_nic_get_dev(ring->aq_nic);
-
 	for (; ring->hw_head != ring->sw_tail;
 		ring->hw_head = aq_ring_next_dx(ring, ring->hw_head)) {
 		struct aq_ring_buff_s *buff = NULL;
@@ -665,11 +700,15 @@
 
 		buff = &ring->buff_ring[ring->hw_head];
 
+		buff->flags = 0U;
+		buff->is_hash_l4 = 0U;
+
 		rx_stat = (0x0000003CU & rxd_wb->status) >> 2;
 
 		is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U;
 
-		pkt_type = 0xFFU & (rxd_wb->type >> 4);
+		pkt_type = (rxd_wb->type & HW_ATL_B0_RXD_WB_STAT_PKTTYPE) >>
+			   HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT;
 
 		if (is_rx_check_sum_enabled & BIT(0) &&
 		    (0x0U == (pkt_type & 0x3U)))
@@ -690,43 +729,51 @@
 			buff->is_cso_err = 0U;
 		}
 
-		dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE);
+		if (self->aq_nic_cfg->is_vlan_rx_strip &&
+		    ((pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN) ||
+		     (pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE))) {
+			buff->is_vlan = 1;
+			buff->vlan_rx_tag = le16_to_cpu(rxd_wb->vlan);
+		}
 
 		if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) {
 			/* MAC error or DMA error */
 			buff->is_error = 1U;
-		} else {
-			if (self->aq_nic_cfg->is_rss) {
-				/* last 4 byte */
-				u16 rss_type = rxd_wb->type & 0xFU;
+		}
+		if (self->aq_nic_cfg->is_rss) {
+			/* last 4 byte */
+			u16 rss_type = rxd_wb->type & 0xFU;
 
-				if (rss_type && rss_type < 0x8U) {
-					buff->is_hash_l4 = (rss_type == 0x4 ||
-					rss_type == 0x5);
-					buff->rss_hash = rxd_wb->rss_hash;
-				}
+			if (rss_type && rss_type < 0x8U) {
+				buff->is_hash_l4 = (rss_type == 0x4 ||
+				rss_type == 0x5);
+				buff->rss_hash = rxd_wb->rss_hash;
 			}
+		}
 
-			if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) {
-				buff->len = rxd_wb->pkt_len %
-					AQ_CFG_RX_FRAME_MAX;
-				buff->len = buff->len ?
-					buff->len : AQ_CFG_RX_FRAME_MAX;
-				buff->next = 0U;
-				buff->is_eop = 1U;
+		if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) {
+			buff->len = rxd_wb->pkt_len %
+				AQ_CFG_RX_FRAME_MAX;
+			buff->len = buff->len ?
+				buff->len : AQ_CFG_RX_FRAME_MAX;
+			buff->next = 0U;
+			buff->is_eop = 1U;
+		} else {
+			buff->len =
+				rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ?
+				AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len;
+
+			if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
+				rxd_wb->status) {
+				/* LRO */
+				buff->next = rxd_wb->next_desc_ptr;
+				++ring->stats.rx.lro_packets;
 			} else {
-				if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT &
-					rxd_wb->status) {
-					/* LRO */
-					buff->next = rxd_wb->next_desc_ptr;
-					++ring->stats.rx.lro_packets;
-				} else {
-					/* jumbo */
-					buff->next =
-						aq_ring_next_dx(ring,
-								ring->hw_head);
-					++ring->stats.rx.jumbo_packets;
-				}
+				/* jumbo */
+				buff->next =
+					aq_ring_next_dx(ring,
+							ring->hw_head);
+				++ring->stats.rx.jumbo_packets;
 			}
 		}
 	}
@@ -761,23 +808,31 @@
 					  unsigned int packet_filter)
 {
 	unsigned int i = 0U;
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 
-	hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
+	hw_atl_rpfl2promiscuous_mode_en_set(self,
+					    IS_FILTER_ENABLED(IFF_PROMISC));
+
+	hw_atl_rpf_vlan_prom_mode_en_set(self,
+				     IS_FILTER_ENABLED(IFF_PROMISC) ||
+				     cfg->is_vlan_force_promisc);
+
 	hw_atl_rpfl2multicast_flr_en_set(self,
-					 IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
+					 IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+					 IS_FILTER_ENABLED(IFF_MULTICAST), 0);
 
 	hw_atl_rpfl2_accept_all_mc_packets_set(self,
-					       IS_FILTER_ENABLED(IFF_ALLMULTI));
+					      IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+					      IS_FILTER_ENABLED(IFF_MULTICAST));
 
 	hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
 
-	self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
 
 	for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
 		hw_atl_rpfl2_uc_flr_en_set(self,
-					   (self->aq_nic_cfg->is_mc_list_enabled &&
-				    (i <= self->aq_nic_cfg->mc_list_count)) ?
-				    1U : 0U, i);
+					   (cfg->is_mc_list_enabled &&
+					    (i <= cfg->mc_list_count)) ?
+					   1U : 0U, i);
 
 	return aq_hw_err_from_flags(self);
 }
@@ -914,14 +969,26 @@
 
 static int hw_atl_b0_hw_stop(struct aq_hw_s *self)
 {
+	int err;
+	u32 val;
+
 	hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK);
 
 	/* Invalidate Descriptor Cache to prevent writing to the cached
 	 * descriptors and to the data pointer of those descriptors
 	 */
-	hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1);
+	hw_atl_rdm_rx_dma_desc_cache_init_tgl(self);
 
-	return aq_hw_err_from_flags(self);
+	err = aq_hw_err_from_flags(self);
+
+	if (err)
+		goto err_exit;
+
+	readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get,
+				  self, val, val == 1, 1000U, 10000U);
+
+err_exit:
+	return err;
 }
 
 static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
@@ -938,10 +1005,145 @@
 	return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_hw_fl3l4_clear(struct aq_hw_s *self,
+				    struct aq_rx_filter_l3l4 *data)
+{
+	u8 location = data->location;
+
+	if (!data->is_ipv6) {
+		hw_atl_rpfl3l4_cmd_clear(self, location);
+		hw_atl_rpf_l4_spd_set(self, 0U, location);
+		hw_atl_rpf_l4_dpd_set(self, 0U, location);
+		hw_atl_rpfl3l4_ipv4_src_addr_clear(self, location);
+		hw_atl_rpfl3l4_ipv4_dest_addr_clear(self, location);
+	} else {
+		int i;
+
+		for (i = 0; i < HW_ATL_RX_CNT_REG_ADDR_IPV6; ++i) {
+			hw_atl_rpfl3l4_cmd_clear(self, location + i);
+			hw_atl_rpf_l4_spd_set(self, 0U, location + i);
+			hw_atl_rpf_l4_dpd_set(self, 0U, location + i);
+		}
+		hw_atl_rpfl3l4_ipv6_src_addr_clear(self, location);
+		hw_atl_rpfl3l4_ipv6_dest_addr_clear(self, location);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_fl3l4_set(struct aq_hw_s *self,
+				  struct aq_rx_filter_l3l4 *data)
+{
+	u8 location = data->location;
+
+	hw_atl_b0_hw_fl3l4_clear(self, data);
+
+	if (data->cmd) {
+		if (!data->is_ipv6) {
+			hw_atl_rpfl3l4_ipv4_dest_addr_set(self,
+							  location,
+							  data->ip_dst[0]);
+			hw_atl_rpfl3l4_ipv4_src_addr_set(self,
+							 location,
+							 data->ip_src[0]);
+		} else {
+			hw_atl_rpfl3l4_ipv6_dest_addr_set(self,
+							  location,
+							  data->ip_dst);
+			hw_atl_rpfl3l4_ipv6_src_addr_set(self,
+							 location,
+							 data->ip_src);
+		}
+	}
+	hw_atl_rpf_l4_dpd_set(self, data->p_dst, location);
+	hw_atl_rpf_l4_spd_set(self, data->p_src, location);
+	hw_atl_rpfl3l4_cmd_set(self, location, data->cmd);
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_fl2_set(struct aq_hw_s *self,
+				struct aq_rx_filter_l2 *data)
+{
+	hw_atl_rpf_etht_flr_en_set(self, 1U, data->location);
+	hw_atl_rpf_etht_flr_set(self, data->ethertype, data->location);
+	hw_atl_rpf_etht_user_priority_en_set(self,
+					     !!data->user_priority_en,
+					     data->location);
+	if (data->user_priority_en)
+		hw_atl_rpf_etht_user_priority_set(self,
+						  data->user_priority,
+						  data->location);
+
+	if (data->queue < 0) {
+		hw_atl_rpf_etht_flr_act_set(self, 0U, data->location);
+		hw_atl_rpf_etht_rx_queue_en_set(self, 0U, data->location);
+	} else {
+		hw_atl_rpf_etht_flr_act_set(self, 1U, data->location);
+		hw_atl_rpf_etht_rx_queue_en_set(self, 1U, data->location);
+		hw_atl_rpf_etht_rx_queue_set(self, data->queue, data->location);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_fl2_clear(struct aq_hw_s *self,
+				  struct aq_rx_filter_l2 *data)
+{
+	hw_atl_rpf_etht_flr_en_set(self, 0U, data->location);
+	hw_atl_rpf_etht_flr_set(self, 0U, data->location);
+	hw_atl_rpf_etht_user_priority_en_set(self, 0U, data->location);
+
+	return aq_hw_err_from_flags(self);
+}
+
+/**
+ * @brief Set VLAN filter table
+ * @details Configure VLAN filter table to accept (and assign the queue) traffic
+ *  for the particular vlan ids.
+ * Note: use this function under vlan promisc mode not to lost the traffic
+ *
+ * @param aq_hw_s
+ * @param aq_rx_filter_vlan VLAN filter configuration
+ * @return 0 - OK, <0 - error
+ */
+static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self,
+				 struct aq_rx_filter_vlan *aq_vlans)
+{
+	int i;
+
+	for (i = 0; i < AQ_VLAN_MAX_FILTERS; i++) {
+		hw_atl_rpf_vlan_flr_en_set(self, 0U, i);
+		hw_atl_rpf_vlan_rxq_en_flr_set(self, 0U, i);
+		if (aq_vlans[i].enable) {
+			hw_atl_rpf_vlan_id_flr_set(self,
+						   aq_vlans[i].vlan_id,
+						   i);
+			hw_atl_rpf_vlan_flr_act_set(self, 1U, i);
+			hw_atl_rpf_vlan_flr_en_set(self, 1U, i);
+			if (aq_vlans[i].queue != 0xFF) {
+				hw_atl_rpf_vlan_rxq_flr_set(self,
+							    aq_vlans[i].queue,
+							    i);
+				hw_atl_rpf_vlan_rxq_en_flr_set(self, 1U, i);
+			}
+		}
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
+{
+	/* set promisc in case of disabing the vland filter */
+	hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);
+
+	return aq_hw_err_from_flags(self);
+}
+
 const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
 	.hw_init              = hw_atl_b0_hw_init,
-	.hw_set_power         = hw_atl_utils_hw_set_power,
 	.hw_reset             = hw_atl_b0_hw_reset,
 	.hw_start             = hw_atl_b0_hw_start,
 	.hw_ring_tx_start     = hw_atl_b0_hw_ring_tx_start,
@@ -963,6 +1165,11 @@
 	.hw_ring_rx_init             = hw_atl_b0_hw_ring_rx_init,
 	.hw_ring_tx_init             = hw_atl_b0_hw_ring_tx_init,
 	.hw_packet_filter_set        = hw_atl_b0_hw_packet_filter_set,
+	.hw_filter_l2_set            = hw_atl_b0_hw_fl2_set,
+	.hw_filter_l2_clear          = hw_atl_b0_hw_fl2_clear,
+	.hw_filter_l3l4_set          = hw_atl_b0_hw_fl3l4_set,
+	.hw_filter_vlan_set          = hw_atl_b0_hw_vlan_set,
+	.hw_filter_vlan_ctrl         = hw_atl_b0_hw_vlan_ctrl,
 	.hw_multicast_list_set       = hw_atl_b0_hw_multicast_list_set,
 	.hw_interrupt_moderation_set = hw_atl_b0_hw_interrupt_moderation_set,
 	.hw_rss_set                  = hw_atl_b0_hw_rss_set,
@@ -970,4 +1177,6 @@
 	.hw_get_regs                 = hw_atl_utils_hw_get_regs,
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
+	.hw_set_offload              = hw_atl_b0_hw_offload_set,
+	.hw_set_fc                   = hw_atl_b0_set_fc,
 };

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index 2cc8dac..09af168 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_b0.h: Declaration of abstract interface for Atlantic hardware
@@ -32,9 +29,6 @@
 #define hw_atl_b0_caps_aqc111s hw_atl_b0_caps_aqc108
 #define hw_atl_b0_caps_aqc112s hw_atl_b0_caps_aqc109
 
-#define hw_atl_b0_caps_aqc111e hw_atl_b0_caps_aqc108
-#define hw_atl_b0_caps_aqc112e hw_atl_b0_caps_aqc109
-
 extern const struct aq_hw_ops hw_atl_ops_b0;
 
 #define hw_atl_ops_b1 hw_atl_ops_b0

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 28568f5..808d8cd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_b0_internal.h: Definition of Atlantic B0 chip specific
@@ -67,12 +64,6 @@
 #define HW_ATL_B0_MPI_SPEED_MSK         0xFFFFU
 #define HW_ATL_B0_MPI_SPEED_SHIFT       16U
 
-#define HW_ATL_B0_RATE_10G              BIT(0)
-#define HW_ATL_B0_RATE_5G               BIT(1)
-#define HW_ATL_B0_RATE_2G5              BIT(3)
-#define HW_ATL_B0_RATE_1G               BIT(4)
-#define HW_ATL_B0_RATE_100M             BIT(5)
-
 #define HW_ATL_B0_TXBUF_MAX  160U
 #define HW_ATL_B0_RXBUF_MAX  320U
 
@@ -84,7 +75,7 @@
 #define HW_ATL_B0_TC_MAX 1U
 #define HW_ATL_B0_RSS_MAX 8U
 
-#define HW_ATL_B0_LRO_RXD_MAX 2U
+#define HW_ATL_B0_LRO_RXD_MAX 16U
 #define HW_ATL_B0_RS_SLIP_ENABLED  0U
 
 /* (256k -1(max pay_len) - 54(header)) */
@@ -116,10 +107,17 @@
 #define HW_ATL_B0_RXD_NCEA0 (0x1)
 
 #define HW_ATL_B0_RXD_WB_STAT_RSSTYPE (0x0000000F)
+#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE_SHIFT (0x0)
 #define HW_ATL_B0_RXD_WB_STAT_PKTTYPE (0x00000FF0)
+#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT (0x4)
 #define HW_ATL_B0_RXD_WB_STAT_RXCTRL  (0x00180000)
+#define HW_ATL_B0_RXD_WB_STAT_RXCTRL_SHIFT (0x13)
 #define HW_ATL_B0_RXD_WB_STAT_SPLHDR  (0x00200000)
 #define HW_ATL_B0_RXD_WB_STAT_HDRLEN  (0xFFC00000)
+#define HW_ATL_B0_RXD_WB_STAT_HDRLEN_SHIFT (0x16)
+
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN          BIT(5)
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE   BIT(6)
 
 #define HW_ATL_B0_RXD_WB_STAT2_DD      (0x0001)
 #define HW_ATL_B0_RXD_WB_STAT2_EOP     (0x0002)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 10ec5dc..6f34069 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_llh.c: Definitions of bitfield and register access functions for
@@ -49,11 +46,6 @@
 				  HW_ATL_GLB_SOFT_RES_SHIFT);
 }
 
-u32 hw_atl_reg_rx_dma_stat_counter7get(struct aq_hw_s *aq_hw)
-{
-	return aq_hw_read_reg(aq_hw, HW_ATL_RX_DMA_STAT_COUNTER7_ADR);
-}
-
 u32 hw_atl_reg_glb_mif_id_get(struct aq_hw_s *aq_hw)
 {
 	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MIF_ID_ADR);
@@ -65,44 +57,24 @@
 	return aq_hw_read_reg(aq_hw, HW_ATL_RPB_RX_DMA_DROP_PKT_CNT_ADR);
 }
 
-u32 hw_atl_stats_rx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw)
+u64 hw_atl_stats_rx_dma_good_octet_counter_get(struct aq_hw_s *aq_hw)
 {
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERLSW);
+	return aq_hw_read_reg64(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERLSW);
 }
 
-u32 hw_atl_stats_rx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw)
+u64 hw_atl_stats_rx_dma_good_pkt_counter_get(struct aq_hw_s *aq_hw)
 {
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERLSW);
+	return aq_hw_read_reg64(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERLSW);
 }
 
-u32 hw_atl_stats_tx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw)
+u64 hw_atl_stats_tx_dma_good_octet_counter_get(struct aq_hw_s *aq_hw)
 {
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERLSW);
+	return aq_hw_read_reg64(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERLSW);
 }
 
-u32 hw_atl_stats_tx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw)
+u64 hw_atl_stats_tx_dma_good_pkt_counter_get(struct aq_hw_s *aq_hw)
 {
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERLSW);
-}
-
-u32 hw_atl_stats_rx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw)
-{
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERMSW);
-}
-
-u32 hw_atl_stats_rx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw)
-{
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERMSW);
-}
-
-u32 hw_atl_stats_tx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw)
-{
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERMSW);
-}
-
-u32 hw_atl_stats_tx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw)
-{
-	return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERMSW);
+	return aq_hw_read_reg64(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERLSW);
 }
 
 /* interrupt */
@@ -315,6 +287,21 @@
 			    HW_ATL_ITR_RES_SHIFT, res_irq);
 }
 
+/* set RSC interrupt */
+void hw_atl_itr_rsc_en_set(struct aq_hw_s *aq_hw, u32 enable)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_ITR_RSC_EN_ADR, enable);
+}
+
+/* set RSC delay */
+void hw_atl_itr_rsc_delay_set(struct aq_hw_s *aq_hw, u32 delay)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_ITR_RSC_DELAY_ADR,
+			    HW_ATL_ITR_RSC_DELAY_MSK,
+			    HW_ATL_ITR_RSC_DELAY_SHIFT,
+			    delay);
+}
+
 /* rdm */
 void hw_atl_rdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca)
 {
@@ -619,12 +606,25 @@
 			    HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
 }
 
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init)
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw)
 {
+	u32 val;
+
+	val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
+				 HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
+				 HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT);
+
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
 			    HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
 			    HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT,
-			    init);
+			    val ^ 1);
+}
+
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR,
+				  RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK,
+				  RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT);
 }
 
 void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
@@ -898,6 +898,24 @@
 			    vlan_id_flr);
 }
 
+void hw_atl_rpf_vlan_rxq_en_flr_set(struct aq_hw_s *aq_hw, u32 vlan_rxq_en,
+				    u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_RXQ_EN_F_ADR(filter),
+			    HW_ATL_RPF_VL_RXQ_EN_F_MSK,
+			    HW_ATL_RPF_VL_RXQ_EN_F_SHIFT,
+			    vlan_rxq_en);
+}
+
+void hw_atl_rpf_vlan_rxq_flr_set(struct aq_hw_s *aq_hw, u32 vlan_rxq,
+				 u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_RXQ_F_ADR(filter),
+			    HW_ATL_RPF_VL_RXQ_F_MSK,
+			    HW_ATL_RPF_VL_RXQ_F_SHIFT,
+			    vlan_rxq);
+};
+
 void hw_atl_rpf_etht_flr_en_set(struct aq_hw_s *aq_hw, u32 etht_flr_en,
 				u32 filter)
 {
@@ -965,6 +983,20 @@
 			    HW_ATL_RPF_ET_VALF_SHIFT, etht_flr);
 }
 
+void hw_atl_rpf_l4_spd_set(struct aq_hw_s *aq_hw, u32 val, u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_L4_SPD_ADR(filter),
+			    HW_ATL_RPF_L4_SPD_MSK,
+			    HW_ATL_RPF_L4_SPD_SHIFT, val);
+}
+
+void hw_atl_rpf_l4_dpd_set(struct aq_hw_s *aq_hw, u32 val, u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_L4_DPD_ADR(filter),
+			    HW_ATL_RPF_L4_DPD_MSK,
+			    HW_ATL_RPF_L4_DPD_SHIFT, val);
+}
+
 /* RPO: rx packet offload */
 void hw_atl_rpo_ipv4header_crc_offload_en_set(struct aq_hw_s *aq_hw,
 					      u32 ipv4header_crc_offload_en)
@@ -985,6 +1017,22 @@
 			    rx_desc_vlan_stripping);
 }
 
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+					u32 outervlantagmode)
+{
+	aq_hw_write_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+			    HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+			    HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT,
+			    outervlantagmode);
+}
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context)
+{
+	return aq_hw_read_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+				  HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+				  HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT);
+}
+
 void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
 					   u32 tcp_udp_crc_offload_en)
 {
@@ -1242,6 +1290,15 @@
 			    HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
 }
 
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+				   u32 tx_traf_class_mode)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+			HW_ATL_TPB_TX_TC_MODE_MSK,
+			HW_ATL_TPB_TX_TC_MODE_SHIFT,
+			tx_traf_class_mode);
+}
+
 void hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
 						u32 tx_buff_hi_threshold_per_tc,
 					 u32 buffer)
@@ -1468,3 +1525,109 @@
 	aq_hw_write_reg(aq_hw, HW_ATL_GLB_CPU_SCRATCH_SCP_ADR(scratch_scp),
 			glb_cpu_scratch_scp);
 }
+
+void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR,
+			    HW_ATL_MCP_UP_FORCE_INTERRUPT_MSK,
+			    HW_ATL_MCP_UP_FORCE_INTERRUPT_SHIFT,
+			    up_force_intr);
+}
+
+void hw_atl_rpfl3l4_ipv4_dest_addr_clear(struct aq_hw_s *aq_hw, u8 location)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_DSTA_ADR(location), 0U);
+}
+
+void hw_atl_rpfl3l4_ipv4_src_addr_clear(struct aq_hw_s *aq_hw, u8 location)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_SRCA_ADR(location), 0U);
+}
+
+void hw_atl_rpfl3l4_cmd_clear(struct aq_hw_s *aq_hw, u8 location)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_REG_CTRL_ADR(location), 0U);
+}
+
+void hw_atl_rpfl3l4_ipv6_dest_addr_clear(struct aq_hw_s *aq_hw, u8 location)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i)
+		aq_hw_write_reg(aq_hw,
+				HW_ATL_RPF_L3_DSTA_ADR(location + i),
+				0U);
+}
+
+void hw_atl_rpfl3l4_ipv6_src_addr_clear(struct aq_hw_s *aq_hw, u8 location)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i)
+		aq_hw_write_reg(aq_hw,
+				HW_ATL_RPF_L3_SRCA_ADR(location + i),
+				0U);
+}
+
+void hw_atl_rpfl3l4_ipv4_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				       u32 ipv4_dest)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_DSTA_ADR(location),
+			ipv4_dest);
+}
+
+void hw_atl_rpfl3l4_ipv4_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 ipv4_src)
+{
+	aq_hw_write_reg(aq_hw,
+			HW_ATL_RPF_L3_SRCA_ADR(location),
+			ipv4_src);
+}
+
+void hw_atl_rpfl3l4_cmd_set(struct aq_hw_s *aq_hw, u8 location, u32 cmd)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_RPF_L3_REG_CTRL_ADR(location), cmd);
+}
+
+void hw_atl_rpfl3l4_ipv6_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 *ipv6_src)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i)
+		aq_hw_write_reg(aq_hw,
+				HW_ATL_RPF_L3_SRCA_ADR(location + i),
+				ipv6_src[i]);
+}
+
+void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				       u32 *ipv6_dest)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i)
+		aq_hw_write_reg(aq_hw,
+				HW_ATL_RPF_L3_DSTA_ADR(location + i),
+				ipv6_dest[i]);
+}
+
+u32 hw_atl_sem_ram_get(struct aq_hw_s *self)
+{
+	return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+}
+
+u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp)
+{
+	return aq_hw_read_reg(aq_hw,
+			      HW_ATL_GLB_CPU_SCRATCH_SCP_ADR(scratch_scp));
+}
+
+u32 hw_atl_scrpad12_get(struct aq_hw_s *self)
+{
+	return  hw_atl_scrpad_get(self, 0xB);
+}
+
+u32 hw_atl_scrpad25_get(struct aq_hw_s *self)
+{
+	return hw_atl_scrpad_get(self, 0x18);
+}

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index b3bf64b..c3ee278 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_llh.h: Declarations of bitfield and register access functions for
@@ -40,29 +37,17 @@
 
 u32 hw_atl_rpb_rx_dma_drop_pkt_cnt_get(struct aq_hw_s *aq_hw);
 
-/* get rx dma good octet counter lsw */
-u32 hw_atl_stats_rx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw);
+/* get rx dma good octet counter */
+u64 hw_atl_stats_rx_dma_good_octet_counter_get(struct aq_hw_s *aq_hw);
 
-/* get rx dma good packet counter lsw */
-u32 hw_atl_stats_rx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw);
+/* get rx dma good packet counter */
+u64 hw_atl_stats_rx_dma_good_pkt_counter_get(struct aq_hw_s *aq_hw);
 
-/* get tx dma good octet counter lsw */
-u32 hw_atl_stats_tx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw);
+/* get tx dma good octet counter */
+u64 hw_atl_stats_tx_dma_good_octet_counter_get(struct aq_hw_s *aq_hw);
 
-/* get tx dma good packet counter lsw */
-u32 hw_atl_stats_tx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw);
-
-/* get rx dma good octet counter msw */
-u32 hw_atl_stats_rx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw);
-
-/* get rx dma good packet counter msw */
-u32 hw_atl_stats_rx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw);
-
-/* get tx dma good octet counter msw */
-u32 hw_atl_stats_tx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw);
-
-/* get tx dma good packet counter msw */
-u32 hw_atl_stats_tx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw);
+/* get tx dma good packet counter */
+u64 hw_atl_stats_tx_dma_good_pkt_counter_get(struct aq_hw_s *aq_hw);
 
 /* get msm rx errors counter register */
 u32 hw_atl_reg_mac_msm_rx_errs_cnt_get(struct aq_hw_s *aq_hw);
@@ -82,9 +67,6 @@
 /* get msm rx unicast octets counter register 0 */
 u32 hw_atl_reg_mac_msm_rx_ucst_octets_counter0get(struct aq_hw_s *aq_hw);
 
-/* get rx dma statistics counter 7 */
-u32 hw_atl_reg_rx_dma_stat_counter7get(struct aq_hw_s *aq_hw);
-
 /* get msm tx errors counter register */
 u32 hw_atl_reg_mac_msm_tx_errs_cnt_get(struct aq_hw_s *aq_hw);
 
@@ -152,6 +134,12 @@
 /* set reset interrupt */
 void hw_atl_itr_res_irq_set(struct aq_hw_s *aq_hw, u32 res_irq);
 
+/* set RSC interrupt */
+void hw_atl_itr_rsc_en_set(struct aq_hw_s *aq_hw, u32 enable);
+
+/* set RSC delay */
+void hw_atl_itr_rsc_delay_set(struct aq_hw_s *aq_hw, u32 delay);
+
 /* rdm */
 
 /* set cpu id */
@@ -325,8 +313,11 @@
 					    u32 rx_pkt_buff_size_per_tc,
 					    u32 buffer);
 
-/* set rdm rx dma descriptor cache init */
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init);
+/* toggle rdm rx dma descriptor cache init */
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
+
+/* get rdm rx dma descriptor cache init done */
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
 
 /* set rx xoff enable (per tc) */
 void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
@@ -441,6 +432,14 @@
 void hw_atl_rpf_vlan_id_flr_set(struct aq_hw_s *aq_hw, u32 vlan_id_flr,
 				u32 filter);
 
+/* Set VLAN RX queue assignment enable */
+void hw_atl_rpf_vlan_rxq_en_flr_set(struct aq_hw_s *aq_hw, u32 vlan_rxq_en,
+				    u32 filter);
+
+/* Set VLAN RX queue */
+void hw_atl_rpf_vlan_rxq_flr_set(struct aq_hw_s *aq_hw, u32 vlan_rxq,
+				 u32 filter);
+
 /* set ethertype filter enable */
 void hw_atl_rpf_etht_flr_en_set(struct aq_hw_s *aq_hw, u32 etht_flr_en,
 				u32 filter);
@@ -475,6 +474,12 @@
 /* set ethertype filter */
 void hw_atl_rpf_etht_flr_set(struct aq_hw_s *aq_hw, u32 etht_flr, u32 filter);
 
+/* set L4 source port */
+void hw_atl_rpf_l4_spd_set(struct aq_hw_s *aq_hw, u32 val, u32 filter);
+
+/* set L4 destination port */
+void hw_atl_rpf_l4_dpd_set(struct aq_hw_s *aq_hw, u32 val, u32 filter);
+
 /* rpo */
 
 /* set ipv4 header checksum offload enable */
@@ -486,6 +491,11 @@
 					   u32 rx_desc_vlan_stripping,
 					   u32 descriptor);
 
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+					u32 outervlantagmode);
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context);
+
 /* set tcp/udp checksum offload enable */
 void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
 					   u32 tcp_udp_crc_offload_en);
@@ -591,6 +601,10 @@
 
 /* tpb */
 
+/* set TX Traffic Class Mode */
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+				   u32 tx_traf_class_mode);
+
 /* set tx buffer enable */
 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
 
@@ -701,4 +715,53 @@
 /* set pci register reset disable */
 void hw_atl_pci_pci_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 pci_reg_res_dis);
 
+/* set uP Force Interrupt */
+void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr);
+
+/* clear ipv4 filter destination address */
+void hw_atl_rpfl3l4_ipv4_dest_addr_clear(struct aq_hw_s *aq_hw, u8 location);
+
+/* clear ipv4 filter source address */
+void hw_atl_rpfl3l4_ipv4_src_addr_clear(struct aq_hw_s *aq_hw, u8 location);
+
+/* clear command for filter l3-l4 */
+void hw_atl_rpfl3l4_cmd_clear(struct aq_hw_s *aq_hw, u8 location);
+
+/* clear ipv6 filter destination address */
+void hw_atl_rpfl3l4_ipv6_dest_addr_clear(struct aq_hw_s *aq_hw, u8 location);
+
+/* clear ipv6 filter source address */
+void hw_atl_rpfl3l4_ipv6_src_addr_clear(struct aq_hw_s *aq_hw, u8 location);
+
+/* set ipv4 filter destination address */
+void hw_atl_rpfl3l4_ipv4_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				       u32 ipv4_dest);
+
+/* set ipv4 filter source address */
+void hw_atl_rpfl3l4_ipv4_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 ipv4_src);
+
+/* set command for filter l3-l4 */
+void hw_atl_rpfl3l4_cmd_set(struct aq_hw_s *aq_hw, u8 location, u32 cmd);
+
+/* set ipv6 filter source address */
+void hw_atl_rpfl3l4_ipv6_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 *ipv6_src);
+
+/* set ipv6 filter destination address */
+void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
+				       u32 *ipv6_dest);
+
+/* get global microprocessor ram semaphore */
+u32 hw_atl_sem_ram_get(struct aq_hw_s *self);
+
+/* get global microprocessor scratch pad register */
+u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp);
+
+/* get global microprocessor scratch pad 12 register */
+u32 hw_atl_scrpad12_get(struct aq_hw_s *self);
+
+/* get global microprocessor scratch pad 25 register */
+u32 hw_atl_scrpad25_get(struct aq_hw_s *self);
+
 #endif /* HW_ATL_LLH_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index e2ecdb1..35887ad 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_llh_internal.h: Preprocessor definitions
@@ -58,9 +55,6 @@
 /* preprocessor definitions for msm rx unicast octets counter register 0 */
 #define HW_ATL_MAC_MSM_RX_UCST_OCTETS_COUNTER0_ADR 0x000001b8u
 
-/* preprocessor definitions for rx dma statistics counter 7 */
-#define HW_ATL_RX_DMA_STAT_COUNTER7_ADR 0x00006818u
-
 /* preprocessor definitions for msm tx unicast frames counter register */
 #define HW_ATL_MAC_MSM_TX_UCST_FRM_CNT_ADR 0x00000108u
 
@@ -95,6 +89,19 @@
 #define HW_ATL_ITR_RES_MSK 0x80000000
 /* lower bit position of bitfield itr_reset */
 #define HW_ATL_ITR_RES_SHIFT 31
+
+/* register address for bitfield  rsc_en */
+#define HW_ATL_ITR_RSC_EN_ADR 0x00002200
+
+/* register address for bitfield  rsc_delay */
+#define HW_ATL_ITR_RSC_DELAY_ADR 0x00002204
+/* bitmask for bitfield  rsc_delay */
+#define HW_ATL_ITR_RSC_DELAY_MSK 0x0000000f
+/* width of bitfield  rsc_delay */
+#define HW_ATL_ITR_RSC_DELAY_WIDTH 4
+/* lower bit position of bitfield  rsc_delay */
+#define HW_ATL_ITR_RSC_DELAY_SHIFT 0
+
 /* register address for bitfield dca{d}_cpuid[7:0] */
 #define HW_ATL_RDM_DCADCPUID_ADR(dca) (0x00006100 + (dca) * 0x4)
 /* bitmask for bitfield dca{d}_cpuid[7:0] */
@@ -311,6 +318,25 @@
 /* default value of bitfield rdm_desc_init_i */
 #define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0
 
+/* rdm_desc_init_done_i bitfield definitions
+ * preprocessor definitions for the bitfield rdm_desc_init_done_i.
+ * port="pif_rdm_desc_init_done_i"
+ */
+
+/* register address for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10
+/* bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U
+/* inverted bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe
+/* lower bit position of bitfield  rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U
+/* width of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1
+/* default value of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0
+
+
 /* rx int_desc_wrb_en bitfield definitions
  * preprocessor definitions for the bitfield "int_desc_wrb_en".
  * port="pif_rdm_int_desc_wrb_en_i"
@@ -1092,24 +1118,43 @@
 /* Default value of bitfield vl_id{F}[B:0] */
 #define HW_ATL_RPF_VL_ID_F_DEFAULT 0x0
 
-/* RX et_en{F} Bitfield Definitions
- * Preprocessor definitions for the bitfield "et_en{F}".
+/* RX vl_rxq_en{F} Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_rxq{F}".
  * Parameter: filter {F} | stride size 0x4 | range [0, 15]
- * PORT="pif_rpf_et_en_i[0]"
+ * PORT="pif_rpf_vl_rxq_en_i"
  */
 
-/* Register address for bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_ADR(filter) (0x00005300 + (filter) * 0x4)
-/* Bitmask for bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_MSK 0x80000000
-/* Inverted bitmask for bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_MSKN 0x7FFFFFFF
-/* Lower bit position of bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_SHIFT 31
-/* Width of bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_WIDTH 1
-/* Default value of bitfield et_en{F} */
-#define HW_ATL_RPF_ET_EN_F_DEFAULT 0x0
+/* Register address for bitfield vl_rxq_en{F} */
+#define HW_ATL_RPF_VL_RXQ_EN_F_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* Bitmask for bitfield vl_rxq_en{F} */
+#define HW_ATL_RPF_VL_RXQ_EN_F_MSK 0x10000000
+/* Inverted bitmask for bitfield vl_rxq_en{F}[ */
+#define HW_ATL_RPF_VL_RXQ_EN_F_MSKN 0xEFFFFFFF
+/* Lower bit position of bitfield vl_rxq_en{F} */
+#define HW_ATL_RPF_VL_RXQ_EN_F_SHIFT 28
+/* Width of bitfield vl_rxq_en{F} */
+#define HW_ATL_RPF_VL_RXQ_EN_F_WIDTH 1
+/* Default value of bitfield vl_rxq_en{F} */
+#define HW_ATL_RPF_VL_RXQ_EN_F_DEFAULT 0x0
+
+/* RX vl_rxq{F}[4:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_rxq{F}[4:0]".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 15]
+ * PORT="pif_rpf_vl_rxq0_i[4:0]"
+ */
+
+/* Register address for bitfield vl_rxq{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* Bitmask for bitfield vl_rxq{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_MSK 0x01F00000
+/* Inverted bitmask for bitfield vl_rxq{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_MSKN 0xFE0FFFFF
+/* Lower bit position of bitfield vl_rxq{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_SHIFT 20
+/* Width of bitfield vl_rxw{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_WIDTH 5
+/* Default value of bitfield vl_rxq{F}[4:0] */
+#define HW_ATL_RPF_VL_RXQ_F_DEFAULT 0x0
 
 /* rx et_en{f} bitfield definitions
  * preprocessor definitions for the bitfield "et_en{f}".
@@ -1263,6 +1308,44 @@
 /* default value of bitfield et_val{f}[f:0] */
 #define HW_ATL_RPF_ET_VALF_DEFAULT 0x0
 
+/* RX l4_sp{D}[F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l4_sp{D}[F:0]".
+ * Parameter: srcport {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l4_sp0_i[15:0]"
+ */
+
+/* Register address for bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_ADR(srcport) (0x00005400u + (srcport) * 0x4)
+/* Bitmask for bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_MSK 0x0000FFFFu
+/* Inverted bitmask for bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_MSKN 0xFFFF0000u
+/* Lower bit position of bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_SHIFT 0
+/* Width of bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_WIDTH 16
+/* Default value of bitfield l4_sp{D}[F:0] */
+#define HW_ATL_RPF_L4_SPD_DEFAULT 0x0
+
+/* RX l4_dp{D}[F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l4_dp{D}[F:0]".
+ * Parameter: destport {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l4_dp0_i[15:0]"
+ */
+
+/* Register address for bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_ADR(destport) (0x00005420u + (destport) * 0x4)
+/* Bitmask for bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_MSK 0x0000FFFFu
+/* Inverted bitmask for bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_MSKN 0xFFFF0000u
+/* Lower bit position of bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_SHIFT 0
+/* Width of bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_WIDTH 16
+/* Default value of bitfield l4_dp{D}[F:0] */
+#define HW_ATL_RPF_L4_DPD_DEFAULT 0x0
+
 /* rx ipv4_chk_en bitfield definitions
  * preprocessor definitions for the bitfield "ipv4_chk_en".
  * port="pif_rpo_ipv4_chk_en_i"
@@ -1319,6 +1402,24 @@
 /* default value of bitfield l4_chk_en */
 #define HW_ATL_RPOL4CHK_EN_DEFAULT 0x0
 
+/* RX outer_vl_ins_mode Bitfield Definitions
+ * Preprocessor definitions for the bitfield "outer_vl_ins_mode".
+ * PORT="pif_rpo_outer_vl_mode_i"
+ */
+
+/* Register address for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_ADR 0x00005580
+/* Bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSK 0x00000004
+/* Inverted bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSKN 0xFFFFFFFB
+/* Lower bit position of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT 2
+/* Width of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_WIDTH 1
+/* Default value of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_DEFAULT 0x0
+
 /* rx reg_res_dsbl bitfield definitions
  * preprocessor definitions for the bitfield "reg_res_dsbl".
  * port="pif_rx_reg_res_dsbl_i"
@@ -1891,6 +1992,19 @@
 /* default value of bitfield tx_buf_en */
 #define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0
 
+/* register address for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900
+/* bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100
+/* inverted bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF
+/* lower bit position of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8
+/* width of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1
+/* default value of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
+
 /* tx tx{b}_hi_thresh[c:0] bitfield definitions
  * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]".
  * parameter: buffer {b} | stride size 0x10 | range [0, 7]
@@ -2405,4 +2519,63 @@
 #define HW_ATL_GLB_CPU_SCRATCH_SCP_ADR(scratch_scp) \
 	(0x00000300u + (scratch_scp) * 0x4)
 
+/* register address for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR 0x00000404
+/* bitmask for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_MSK 0x00000002
+/* inverted bitmask for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_MSKN 0xFFFFFFFD
+/* lower bit position of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_SHIFT 1
+/* width of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_WIDTH 1
+/* default value of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_DEFAULT 0x0
+
+#define HW_ATL_RX_CTRL_ADDR_BEGIN_FL3L4   0x00005380
+#define HW_ATL_RX_SRCA_ADDR_BEGIN_FL3L4   0x000053B0
+#define HW_ATL_RX_DESTA_ADDR_BEGIN_FL3L4  0x000053D0
+
+#define HW_ATL_RPF_L3_REG_CTRL_ADR(location) (0x00005380 + (location) * 0x4)
+
+/* RX rpf_l3_sa{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_sa{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_sa0_i[31:0]"
+ */
+
+/* Register address for bitfield pif_rpf_l3_sa0_i[31:0] */
+#define HW_ATL_RPF_L3_SRCA_ADR(location) (0x000053B0 + (location) * 0x4)
+/* Bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_SHIFT 0
+/* Width of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_WIDTH 32
+/* Default value of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_DEFAULT 0x0
+
+/* RX rpf_l3_da{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_da{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_da0_i[31:0]"
+ */
+
+ /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */
+#define HW_ATL_RPF_L3_DSTA_ADR(location) (0x000053B0 + (location) * 0x4)
+/* Bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_SHIFT 0
+/* Width of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_WIDTH 32
+/* Default value of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_DEFAULT 0x0
+
+#define HW_ATL_FW_SM_RAM        0x2U
+
 #endif /* HW_ATL_LLH_INTERNAL_H */

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index c965e65..5264685 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
@@ -25,7 +22,9 @@
 #define HW_ATL_MIF_ADDR         0x0208U
 #define HW_ATL_MIF_VAL          0x020CU
 
-#define HW_ATL_FW_SM_RAM        0x2U
+#define HW_ATL_RPC_CONTROL_ADR  0x0338U
+#define HW_ATL_RPC_STATE_ADR    0x033CU
+
 #define HW_ATL_MPI_FW_VERSION	0x18
 #define HW_ATL_MPI_CONTROL_ADR  0x0368U
 #define HW_ATL_MPI_STATE_ADR    0x036CU
@@ -49,9 +48,16 @@
 #define FORCE_FLASHLESS 0
 
 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
+
 static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
 				      enum hal_atl_utils_fw_state_e state);
 
+static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self);
+static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self);
+static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self);
+static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self);
+static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self);
+
 int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 {
 	int err = 0;
@@ -69,10 +75,10 @@
 				   self->fw_ver_actual) == 0) {
 		*fw_ops = &aq_fw_1x_ops;
 	} else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_2X,
-					self->fw_ver_actual) == 0) {
+					  self->fw_ver_actual) == 0) {
 		*fw_ops = &aq_fw_2x_ops;
 	} else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_3X,
-					self->fw_ver_actual) == 0) {
+					  self->fw_ver_actual) == 0) {
 		*fw_ops = &aq_fw_2x_ops;
 	} else {
 		aq_pr_err("Bad FW version detected: %x\n",
@@ -233,6 +239,7 @@
 {
 	int k;
 	u32 boot_exit_code = 0;
+	u32 val;
 
 	for (k = 0; k < 1000; ++k) {
 		u32 flb_status = aq_hw_read_reg(self,
@@ -259,9 +266,13 @@
 		int err = 0;
 
 		hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
-		AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR) &
-			       HW_ATL_MPI_STATE_MSK) == MPI_DEINIT,
-			       10, 1000U);
+		err = readx_poll_timeout_atomic(hw_atl_utils_mpi_get_state,
+						self, val,
+						(val & HW_ATL_MPI_STATE_MSK) ==
+						 MPI_DEINIT,
+						10, 10000U);
+		if (err)
+			return err;
 	}
 
 	if (self->rbl_enabled)
@@ -274,16 +285,17 @@
 				  u32 *p, u32 cnt)
 {
 	int err = 0;
+	u32 val;
 
-	AQ_HW_WAIT_FOR(hw_atl_reg_glb_cpu_sem_get(self,
-						  HW_ATL_FW_SM_RAM) == 1U,
-						  1U, 10000U);
+	err = readx_poll_timeout_atomic(hw_atl_sem_ram_get,
+					self, val, val == 1U,
+					1U, 10000U);
 
 	if (err < 0) {
 		bool is_locked;
 
 		hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
-		is_locked = hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+		is_locked = hw_atl_sem_ram_get(self);
 		if (!is_locked) {
 			err = -ETIME;
 			goto err_exit;
@@ -296,13 +308,14 @@
 		aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U);
 
 		if (IS_CHIP_FEATURE(REVISION_B1))
-			AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self,
-							   HW_ATL_MIF_ADDR),
-				       1, 1000U);
+			err = readx_poll_timeout_atomic(hw_atl_utils_mif_addr_get,
+							self, val, val != a,
+							1U, 1000U);
 		else
-			AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self,
-							   HW_ATL_MIF_CMD)),
-				       1, 1000U);
+			err = readx_poll_timeout_atomic(hw_atl_utils_mif_cmd_get,
+							self, val,
+							!(val & 0x100),
+							1U, 1000U);
 
 		*(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL);
 		a += 4;
@@ -317,25 +330,43 @@
 static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
 					 u32 cnt)
 {
+	u32 val;
 	int err = 0;
-	bool is_locked;
 
-	is_locked = hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
-	if (!is_locked) {
-		err = -ETIME;
+	err = readx_poll_timeout_atomic(hw_atl_sem_ram_get, self,
+					val, val == 1U,
+					10U, 100000U);
+	if (err < 0)
 		goto err_exit;
-	}
 
-	aq_hw_write_reg(self, 0x00000208U, a);
+	if (IS_CHIP_FEATURE(REVISION_B1)) {
+		u32 offset = 0;
 
-	for (++cnt; --cnt;) {
-		u32 i = 0U;
+		for (; offset < cnt; ++offset) {
+			aq_hw_write_reg(self, 0x328, p[offset]);
+			aq_hw_write_reg(self, 0x32C,
+					(0x80000000 | (0xFFFF & (offset * 4))));
+			hw_atl_mcp_up_force_intr_set(self, 1);
+			/* 1000 times by 10us = 10ms */
+			err = readx_poll_timeout_atomic(hw_atl_scrpad12_get,
+							self, val,
+							(val & 0xF0000000) !=
+							0x80000000,
+							10U, 10000U);
+		}
+	} else {
+		u32 offset = 0;
 
-		aq_hw_write_reg(self, 0x0000020CU, *(p++));
-		aq_hw_write_reg(self, 0x00000200U, 0xC000U);
+		aq_hw_write_reg(self, 0x208, a);
 
-		for (i = 1024U;
-			(0x100U & aq_hw_read_reg(self, 0x00000200U)) && --i;) {
+		for (; offset < cnt; ++offset) {
+			aq_hw_write_reg(self, 0x20C, p[offset]);
+			aq_hw_write_reg(self, 0x200, 0xC000);
+
+			err = readx_poll_timeout_atomic(hw_atl_utils_mif_cmd_get,
+							self, val,
+							(val & 0x100) == 0,
+							1000U, 10000U);
 		}
 	}
 
@@ -378,15 +409,14 @@
 	hw_atl_reg_glb_cpu_scratch_scp_set(self, 0x00000000U, 25U);
 
 	/* check 10 times by 1ms */
-	AQ_HW_WAIT_FOR(0U != (self->mbox_addr =
-			aq_hw_read_reg(self, 0x360U)), 1000U, 10U);
+	err = readx_poll_timeout_atomic(hw_atl_scrpad25_get,
+					self, self->mbox_addr,
+					self->mbox_addr != 0U,
+					1000U, 10000U);
 
 	return err;
 }
 
-#define HW_ATL_RPC_CONTROL_ADR 0x0338U
-#define HW_ATL_RPC_STATE_ADR   0x033CU
-
 struct aq_hw_atl_utils_fw_rpc_tid_s {
 	union {
 		u32 val;
@@ -399,7 +429,7 @@
 
 #define hw_atl_utils_fw_rpc_init(_H_) hw_atl_utils_fw_rpc_wait(_H_, NULL)
 
-static int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size)
+int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size)
 {
 	int err = 0;
 	struct aq_hw_atl_utils_fw_rpc_tid_s sw;
@@ -411,7 +441,7 @@
 	err = hw_atl_utils_fw_upload_dwords(self, self->rpc_addr,
 					    (u32 *)(void *)&self->rpc,
 					    (rpc_size + sizeof(u32) -
-					    sizeof(u8)) / sizeof(u32));
+					     sizeof(u8)) / sizeof(u32));
 	if (err < 0)
 		goto err_exit;
 
@@ -423,8 +453,8 @@
 	return err;
 }
 
-static int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
-				    struct hw_aq_atl_utils_fw_rpc **rpc)
+int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+			     struct hw_atl_utils_fw_rpc **rpc)
 {
 	int err = 0;
 	struct aq_hw_atl_utils_fw_rpc_tid_s sw;
@@ -435,12 +465,10 @@
 
 		self->rpc_tid = sw.tid;
 
-		AQ_HW_WAIT_FOR(sw.tid ==
-				(fw.val =
-				aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR),
-				fw.tid), 1000U, 100U);
-		if (err < 0)
-			goto err_exit;
+		err = readx_poll_timeout_atomic(hw_atl_utils_rpc_state_get,
+						self, fw.val,
+						sw.tid == fw.tid,
+						1000U, 100000U);
 
 		if (fw.len == 0xFFFFU) {
 			err = hw_atl_utils_fw_rpc_call(self, sw.len);
@@ -448,8 +476,6 @@
 				goto err_exit;
 		}
 	} while (sw.tid != fw.tid || 0xFFFFU == fw.len);
-	if (err < 0)
-		goto err_exit;
 
 	if (rpc) {
 		if (fw.len) {
@@ -459,7 +485,7 @@
 						      (u32 *)(void *)
 						      &self->rpc,
 						      (fw.len + sizeof(u32) -
-						      sizeof(u8)) /
+						       sizeof(u8)) /
 						      sizeof(u32));
 			if (err < 0)
 				goto err_exit;
@@ -489,16 +515,16 @@
 }
 
 int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self,
-			       struct hw_aq_atl_utils_mbox_header *pmbox)
+			       struct hw_atl_utils_mbox_header *pmbox)
 {
 	return hw_atl_utils_fw_downld_dwords(self,
-				      self->mbox_addr,
-				      (u32 *)(void *)pmbox,
-				      sizeof(*pmbox) / sizeof(u32));
+					     self->mbox_addr,
+					     (u32 *)(void *)pmbox,
+					     sizeof(*pmbox) / sizeof(u32));
 }
 
 void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
-				 struct hw_aq_atl_utils_mbox *pmbox)
+				 struct hw_atl_utils_mbox *pmbox)
 {
 	int err = 0;
 
@@ -516,7 +542,7 @@
 		pmbox->stats.ubtc = pmbox->stats.uptc * mtu;
 		pmbox->stats.dpc = atomic_read(&self->dpc);
 	} else {
-		pmbox->stats.dpc = hw_atl_reg_rx_dma_stat_counter7get(self);
+		pmbox->stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
 	}
 
 err_exit:;
@@ -538,7 +564,7 @@
 {
 	int err = 0;
 	u32 transaction_id = 0;
-	struct hw_aq_atl_utils_mbox_header mbox;
+	struct hw_atl_utils_mbox_header mbox;
 	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
 	if (state == MPI_RESET) {
@@ -546,10 +572,11 @@
 
 		transaction_id = mbox.transaction_id;
 
-		AQ_HW_WAIT_FOR(transaction_id !=
-				(hw_atl_utils_mpi_read_mbox(self, &mbox),
-				 mbox.transaction_id),
-			       1000U, 100U);
+		err = readx_poll_timeout_atomic(hw_atl_utils_get_mpi_mbox_tid,
+						self, mbox.transaction_id,
+						transaction_id !=
+						mbox.transaction_id,
+						1000U, 100000U);
 		if (err < 0)
 			goto err_exit;
 	}
@@ -572,7 +599,7 @@
 
 int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
 {
-	u32 cp0x036C = aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR);
+	u32 cp0x036C = hw_atl_utils_mpi_get_state(self);
 	u32 link_speed_mask = cp0x036C >> HW_ATL_MPI_SPEED_SHIFT;
 	struct aq_hw_link_status_s *link_status = &self->aq_link_status;
 
@@ -645,9 +672,9 @@
 
 	if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
 		/* chip revision */
-		l = 0xE3000000U
-			| (0xFFFFU & aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG))
-			| (0x00 << 16);
+		l = 0xE3000000U |
+		    (0xFFFFU & aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG)) |
+		    (0x00 << 16);
 		h = 0x8001300EU;
 
 		mac[5] = (u8)(0xFFU & l);
@@ -730,17 +757,10 @@
 	return 0;
 }
 
-int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
-			      unsigned int power_state)
-{
-	hw_atl_utils_mpi_set_speed(self, 0);
-	hw_atl_utils_mpi_set_state(self, MPI_POWER);
-	return 0;
-}
-
 int hw_atl_utils_update_stats(struct aq_hw_s *self)
 {
-	struct hw_aq_atl_utils_mbox mbox;
+	struct hw_atl_utils_mbox mbox;
+	struct aq_stats_s *cs = &self->curr_stats;
 
 	hw_atl_utils_mpi_read_stats(self, &mbox);
 
@@ -767,10 +787,11 @@
 		AQ_SDELTA(dpc);
 	}
 #undef AQ_SDELTA
-	self->curr_stats.dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counterlsw_get(self);
-	self->curr_stats.dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counterlsw_get(self);
-	self->curr_stats.dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counterlsw_get(self);
-	self->curr_stats.dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counterlsw_get(self);
+
+	cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self);
+	cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self);
+	cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self);
+	cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self);
 
 	memcpy(&self->last_stats, &mbox.stats, sizeof(mbox.stats));
 
@@ -825,6 +846,110 @@
 	return 0;
 }
 
+static int aq_fw1x_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac)
+{
+	struct hw_atl_utils_fw_rpc *prpc = NULL;
+	unsigned int rpc_size = 0U;
+	int err = 0;
+
+	err = hw_atl_utils_fw_rpc_wait(self, &prpc);
+	if (err < 0)
+		goto err_exit;
+
+	memset(prpc, 0, sizeof(*prpc));
+
+	if (wol_enabled) {
+		rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_wol);
+
+		prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD;
+		prpc->msg_wol.priority =
+				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PRIOR;
+		prpc->msg_wol.pattern_id =
+				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN;
+		prpc->msg_wol.wol_packet_type =
+				HAL_ATLANTIC_UTILS_FW_MSG_WOL_MAG_PKT;
+
+		ether_addr_copy((u8 *)&prpc->msg_wol.wol_pattern, mac);
+	} else {
+		rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_del_id);
+
+		prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL;
+		prpc->msg_wol.pattern_id =
+				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN;
+	}
+
+	err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+
+err_exit:
+	return err;
+}
+
+static int aq_fw1x_set_power(struct aq_hw_s *self, unsigned int power_state,
+			     u8 *mac)
+{
+	struct hw_atl_utils_fw_rpc *prpc = NULL;
+	unsigned int rpc_size = 0U;
+	int err = 0;
+
+	if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
+		err = aq_fw1x_set_wol(self, 1, mac);
+
+		if (err < 0)
+			goto err_exit;
+
+		rpc_size = sizeof(prpc->msg_id) +
+			   sizeof(prpc->msg_enable_wakeup);
+
+		err = hw_atl_utils_fw_rpc_wait(self, &prpc);
+
+		if (err < 0)
+			goto err_exit;
+
+		memset(prpc, 0, rpc_size);
+
+		prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_ENABLE_WAKEUP;
+		prpc->msg_enable_wakeup.pattern_mask = 0x00000002;
+
+		err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+		if (err < 0)
+			goto err_exit;
+	}
+	hw_atl_utils_mpi_set_speed(self, 0);
+	hw_atl_utils_mpi_set_state(self, MPI_POWER);
+
+err_exit:
+	return err;
+}
+
+static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self)
+{
+	struct hw_atl_utils_mbox_header mbox;
+
+	hw_atl_utils_mpi_read_mbox(self, &mbox);
+
+	return mbox.transaction_id;
+}
+
+static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR);
+}
+
+static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_MIF_CMD);
+}
+
+static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_MIF_ADDR);
+}
+
+static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR);
+}
+
 const struct aq_fw_ops aq_fw_1x_ops = {
 	.init = hw_atl_utils_mpi_create,
 	.deinit = hw_atl_fw1x_deinit,
@@ -834,5 +959,9 @@
 	.set_state = hw_atl_utils_mpi_set_state,
 	.update_link_status = hw_atl_utils_mpi_get_link_status,
 	.update_stats = hw_atl_utils_update_stats,
+	.get_phy_temp = NULL,
+	.set_power = aq_fw1x_set_power,
+	.set_eee_rate = NULL,
+	.get_eee_rate = NULL,
 	.set_flow_control = NULL,
 };

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index b875590..692bed7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_utils.h: Declaration of common functions for Atlantic hardware
@@ -75,7 +72,7 @@
 	} v4;
 };
 
-struct __packed hw_aq_atl_utils_fw_rpc {
+struct __packed hw_atl_utils_fw_rpc {
 	u32 msg_id;
 
 	union {
@@ -101,8 +98,6 @@
 		struct {
 			u32 priority;
 			u32 wol_packet_type;
-			u16 friendly_name_len;
-			u16 friendly_name[65];
 			u32 pattern_id;
 			u32 next_wol_pattern_offset;
 
@@ -134,27 +129,172 @@
 					u32 pattern_offset;
 					u32 pattern_size;
 				} wol_bit_map_pattern;
+
+				struct {
+					u8 mac_addr[ETH_ALEN];
+				} wol_magic_packet_patter;
 			} wol_pattern;
 		} msg_wol;
 
 		struct {
-			u32 is_wake_on_link_down;
-			u32 is_wake_on_link_up;
-		} msg_wolink;
+			union {
+				u32 pattern_mask;
+
+				struct {
+					u32 reason_arp_v4_pkt : 1;
+					u32 reason_ipv4_ping_pkt : 1;
+					u32 reason_ipv6_ns_pkt : 1;
+					u32 reason_ipv6_ping_pkt : 1;
+					u32 reason_link_up : 1;
+					u32 reason_link_down : 1;
+					u32 reason_maximum : 1;
+				};
+			};
+
+			union {
+				u32 offload_mask;
+			};
+		} msg_enable_wakeup;
+
+		struct {
+			u32 id;
+		} msg_del_id;
 	};
 };
 
-struct __packed hw_aq_atl_utils_mbox_header {
+struct __packed hw_atl_utils_mbox_header {
 	u32 version;
 	u32 transaction_id;
 	u32 error;
 };
 
-struct __packed hw_aq_atl_utils_mbox {
-	struct hw_aq_atl_utils_mbox_header header;
-	struct hw_atl_stats_s stats;
+struct __packed hw_aq_info {
+	u8 reserved[6];
+	u16 phy_fault_code;
+	u16 phy_temperature;
+	u8 cable_len;
+	u8 reserved1;
+	u32 cable_diag_data[4];
+	u8 reserved2[32];
+	u32 caps_lo;
+	u32 caps_hi;
 };
 
+struct __packed hw_atl_utils_mbox {
+	struct hw_atl_utils_mbox_header header;
+	struct hw_atl_stats_s stats;
+	struct hw_aq_info info;
+};
+
+/* fw2x */
+typedef u32	fw_offset_t;
+
+struct __packed offload_ip_info {
+	u8 v4_local_addr_count;
+	u8 v4_addr_count;
+	u8 v6_local_addr_count;
+	u8 v6_addr_count;
+	fw_offset_t v4_addr;
+	fw_offset_t v4_prefix;
+	fw_offset_t v6_addr;
+	fw_offset_t v6_prefix;
+};
+
+struct __packed offload_port_info {
+	u16 udp_port_count;
+	u16 tcp_port_count;
+	fw_offset_t udp_port;
+	fw_offset_t tcp_port;
+};
+
+struct __packed offload_ka_info {
+	u16 v4_ka_count;
+	u16 v6_ka_count;
+	u32 retry_count;
+	u32 retry_interval;
+	fw_offset_t v4_ka;
+	fw_offset_t v6_ka;
+};
+
+struct __packed offload_rr_info {
+	u32 rr_count;
+	u32 rr_buf_len;
+	fw_offset_t rr_id_x;
+	fw_offset_t rr_buf;
+};
+
+struct __packed offload_info {
+	u32 version;
+	u32 len;
+	u8 mac_addr[ETH_ALEN];
+
+	u8 reserved[2];
+
+	struct offload_ip_info ips;
+	struct offload_port_info ports;
+	struct offload_ka_info kas;
+	struct offload_rr_info rrs;
+	u8 buf[0];
+};
+
+enum hw_atl_rx_action_with_traffic {
+	HW_ATL_RX_DISCARD,
+	HW_ATL_RX_HOST,
+};
+
+struct aq_rx_filter_vlan {
+	u8 enable;
+	u8 location;
+	u16 vlan_id;
+	u8 queue;
+};
+
+struct aq_rx_filter_l2 {
+	s8 queue;
+	u8 location;
+	u8 user_priority_en;
+	u8 user_priority;
+	u16 ethertype;
+};
+
+struct aq_rx_filter_l3l4 {
+	u32 cmd;
+	u8 location;
+	u32 ip_dst[4];
+	u32 ip_src[4];
+	u16 p_dst;
+	u16 p_src;
+	u8 is_ipv6;
+};
+
+enum hw_atl_rx_protocol_value_l3l4 {
+	HW_ATL_RX_TCP,
+	HW_ATL_RX_UDP,
+	HW_ATL_RX_SCTP,
+	HW_ATL_RX_ICMP
+};
+
+enum hw_atl_rx_ctrl_registers_l3l4 {
+	HW_ATL_RX_ENABLE_MNGMNT_QUEUE_L3L4 = BIT(22),
+	HW_ATL_RX_ENABLE_QUEUE_L3L4        = BIT(23),
+	HW_ATL_RX_ENABLE_ARP_FLTR_L3       = BIT(24),
+	HW_ATL_RX_ENABLE_CMP_PROT_L4       = BIT(25),
+	HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4  = BIT(26),
+	HW_ATL_RX_ENABLE_CMP_SRC_PORT_L4   = BIT(27),
+	HW_ATL_RX_ENABLE_CMP_DEST_ADDR_L3  = BIT(28),
+	HW_ATL_RX_ENABLE_CMP_SRC_ADDR_L3   = BIT(29),
+	HW_ATL_RX_ENABLE_L3_IPV6           = BIT(30),
+	HW_ATL_RX_ENABLE_FLTR_L3L4         = BIT(31)
+};
+
+#define HW_ATL_RX_QUEUE_FL3L4_SHIFT       8U
+#define HW_ATL_RX_ACTION_FL3F4_SHIFT      16U
+
+#define HW_ATL_RX_CNT_REG_ADDR_IPV6       4U
+
+#define HW_ATL_GET_REG_LOCATION_FL3L4(location) \
+	((location) - AQ_RX_FIRST_LOC_FL3L4)
+
 #define HAL_ATLANTIC_UTILS_CHIP_MIPS         0x00000001U
 #define HAL_ATLANTIC_UTILS_CHIP_TPO2         0x00000002U
 #define HAL_ATLANTIC_UTILS_CHIP_RPF2         0x00000004U
@@ -181,6 +321,21 @@
 #define HAL_ATLANTIC_RATE_100M       BIT(5)
 #define HAL_ATLANTIC_RATE_INVALID    BIT(6)
 
+#define HAL_ATLANTIC_UTILS_FW_MSG_PING          0x1U
+#define HAL_ATLANTIC_UTILS_FW_MSG_ARP           0x2U
+#define HAL_ATLANTIC_UTILS_FW_MSG_INJECT        0x3U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD       0x4U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_PRIOR     0x10000000U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN   0x1U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_MAG_PKT   0x2U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL       0x5U
+#define HAL_ATLANTIC_UTILS_FW_MSG_ENABLE_WAKEUP 0x6U
+#define HAL_ATLANTIC_UTILS_FW_MSG_MSM_PFC       0x7U
+#define HAL_ATLANTIC_UTILS_FW_MSG_PROVISIONING  0x8U
+#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_ADD   0x9U
+#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_DEL   0xAU
+#define HAL_ATLANTIC_UTILS_FW_MSG_CABLE_DIAG    0xDU
+
 enum hw_atl_fw2x_rate {
 	FW2X_RATE_100M    = 0x20,
 	FW2X_RATE_1G      = 0x100,
@@ -286,10 +441,10 @@
 void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p);
 
 int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self,
-			       struct hw_aq_atl_utils_mbox_header *pmbox);
+			       struct hw_atl_utils_mbox_header *pmbox);
 
 void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
-				 struct hw_aq_atl_utils_mbox *pmbox);
+				 struct hw_atl_utils_mbox *pmbox);
 
 void hw_atl_utils_mpi_set(struct aq_hw_s *self,
 			  enum hal_atl_utils_fw_state_e state,
@@ -316,9 +471,17 @@
 int hw_atl_utils_update_stats(struct aq_hw_s *self);
 
 struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
+
 int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
 				  u32 *p, u32 cnt);
 
+int hw_atl_utils_fw_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac);
+
+int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size);
+
+int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+			     struct hw_atl_utils_fw_rpc **rpc);
+
 extern const struct aq_fw_ops aq_fw_1x_ops;
 extern const struct aq_fw_ops aq_fw_2x_ops;
 

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index e379437..7bc51f8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 /* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for
@@ -16,30 +13,81 @@
 #include "../aq_pci_func.h"
 #include "../aq_ring.h"
 #include "../aq_vec.h"
+#include "../aq_nic.h"
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
 
-#define HW_ATL_FW2X_MPI_EFUSE_ADDR	0x364
-#define HW_ATL_FW2X_MPI_MBOX_ADDR	0x360
+#define HW_ATL_FW2X_MPI_RPC_ADDR        0x334
 
+#define HW_ATL_FW2X_MPI_MBOX_ADDR       0x360
+#define HW_ATL_FW2X_MPI_EFUSE_ADDR	0x364
 #define HW_ATL_FW2X_MPI_CONTROL_ADDR	0x368
 #define HW_ATL_FW2X_MPI_CONTROL2_ADDR	0x36C
-
 #define HW_ATL_FW2X_MPI_STATE_ADDR	0x370
-#define HW_ATL_FW2X_MPI_STATE2_ADDR	0x374
+#define HW_ATL_FW2X_MPI_STATE2_ADDR     0x374
+
+#define HW_ATL_FW2X_CAP_PAUSE            BIT(CAPS_HI_PAUSE)
+#define HW_ATL_FW2X_CAP_ASYM_PAUSE       BIT(CAPS_HI_ASYMMETRIC_PAUSE)
+#define HW_ATL_FW2X_CAP_SLEEP_PROXY      BIT(CAPS_HI_SLEEP_PROXY)
+#define HW_ATL_FW2X_CAP_WOL              BIT(CAPS_HI_WOL)
+
+#define HW_ATL_FW2X_CTRL_SLEEP_PROXY      BIT(CTRL_SLEEP_PROXY)
+#define HW_ATL_FW2X_CTRL_WOL              BIT(CTRL_WOL)
+#define HW_ATL_FW2X_CTRL_LINK_DROP        BIT(CTRL_LINK_DROP)
+#define HW_ATL_FW2X_CTRL_PAUSE            BIT(CTRL_PAUSE)
+#define HW_ATL_FW2X_CTRL_TEMPERATURE      BIT(CTRL_TEMPERATURE)
+#define HW_ATL_FW2X_CTRL_ASYMMETRIC_PAUSE BIT(CTRL_ASYMMETRIC_PAUSE)
+#define HW_ATL_FW2X_CTRL_FORCE_RECONNECT  BIT(CTRL_FORCE_RECONNECT)
+
+#define HW_ATL_FW2X_CAP_EEE_1G_MASK      BIT(CAPS_HI_1000BASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_2G5_MASK     BIT(CAPS_HI_2P5GBASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_5G_MASK      BIT(CAPS_HI_5GBASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_10G_MASK     BIT(CAPS_HI_10GBASET_FD_EEE)
+
+#define HAL_ATLANTIC_WOL_FILTERS_COUNT   8
+#define HAL_ATLANTIC_UTILS_FW2X_MSG_WOL  0x0E
+
+struct __packed fw2x_msg_wol_pattern {
+	u8 mask[16];
+	u32 crc;
+};
+
+struct __packed fw2x_msg_wol {
+	u32 msg_id;
+	u8 hw_addr[ETH_ALEN];
+	u8 magic_packet_enabled;
+	u8 filter_count;
+	struct fw2x_msg_wol_pattern filter[HAL_ATLANTIC_WOL_FILTERS_COUNT];
+	u8 link_up_enabled;
+	u8 link_down_enabled;
+	u16 reserved;
+	u32 link_up_timeout;
+	u32 link_down_timeout;
+};
 
 static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed);
 static int aq_fw2x_set_state(struct aq_hw_s *self,
 			     enum hal_atl_utils_fw_state_e state);
 
+static u32 aq_fw2x_mbox_get(struct aq_hw_s *self);
+static u32 aq_fw2x_rpc_get(struct aq_hw_s *self);
+static u32 aq_fw2x_state2_get(struct aq_hw_s *self);
+
 static int aq_fw2x_init(struct aq_hw_s *self)
 {
 	int err = 0;
 
 	/* check 10 times by 1ms */
-	AQ_HW_WAIT_FOR(0U != (self->mbox_addr =
-			aq_hw_read_reg(self, HW_ATL_FW2X_MPI_MBOX_ADDR)),
-		       1000U, 10U);
+	err = readx_poll_timeout_atomic(aq_fw2x_mbox_get,
+					self, self->mbox_addr,
+					self->mbox_addr != 0U,
+					1000U, 10000U);
+
+	err = readx_poll_timeout_atomic(aq_fw2x_rpc_get,
+					self, self->rpc_addr,
+					self->rpc_addr != 0U,
+					1000U, 100000U);
+
 	return err;
 }
 
@@ -78,6 +126,38 @@
 	return rate;
 }
 
+static u32 fw2x_to_eee_mask(u32 speed)
+{
+	u32 rate = 0;
+
+	if (speed & HW_ATL_FW2X_CAP_EEE_10G_MASK)
+		rate |= AQ_NIC_RATE_EEE_10G;
+	if (speed & HW_ATL_FW2X_CAP_EEE_5G_MASK)
+		rate |= AQ_NIC_RATE_EEE_5G;
+	if (speed & HW_ATL_FW2X_CAP_EEE_2G5_MASK)
+		rate |= AQ_NIC_RATE_EEE_2GS;
+	if (speed & HW_ATL_FW2X_CAP_EEE_1G_MASK)
+		rate |= AQ_NIC_RATE_EEE_1G;
+
+	return rate;
+}
+
+static u32 eee_mask_to_fw2x(u32 speed)
+{
+	u32 rate = 0;
+
+	if (speed & AQ_NIC_RATE_EEE_10G)
+		rate |= HW_ATL_FW2X_CAP_EEE_10G_MASK;
+	if (speed & AQ_NIC_RATE_EEE_5G)
+		rate |= HW_ATL_FW2X_CAP_EEE_5G_MASK;
+	if (speed & AQ_NIC_RATE_EEE_2GS)
+		rate |= HW_ATL_FW2X_CAP_EEE_2G5_MASK;
+	if (speed & AQ_NIC_RATE_EEE_1G)
+		rate |= HW_ATL_FW2X_CAP_EEE_1G_MASK;
+
+	return rate;
+}
+
 static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
 {
 	u32 val = link_speed_mask_2fw2x_ratemask(speed);
@@ -100,14 +180,27 @@
 		*mpi_state &= ~BIT(CAPS_HI_ASYMMETRIC_PAUSE);
 }
 
+static void aq_fw2x_upd_eee_rate_bits(struct aq_hw_s *self, u32 *mpi_opts,
+				      u32 eee_speeds)
+{
+	*mpi_opts &= ~(HW_ATL_FW2X_CAP_EEE_1G_MASK |
+		       HW_ATL_FW2X_CAP_EEE_2G5_MASK |
+		       HW_ATL_FW2X_CAP_EEE_5G_MASK |
+		       HW_ATL_FW2X_CAP_EEE_10G_MASK);
+
+	*mpi_opts |= eee_mask_to_fw2x(eee_speeds);
+}
+
 static int aq_fw2x_set_state(struct aq_hw_s *self,
 			     enum hal_atl_utils_fw_state_e state)
 {
 	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 
 	switch (state) {
 	case MPI_INIT:
 		mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
+		aq_fw2x_upd_eee_rate_bits(self, &mpi_state, cfg->eee_speeds);
 		aq_fw2x_set_mpi_flow_control(self, &mpi_state);
 		break;
 	case MPI_DEINIT:
@@ -126,7 +219,7 @@
 {
 	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR);
 	u32 speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
-				FW2X_RATE_2G5 | FW2X_RATE_5G | FW2X_RATE_10G);
+				 FW2X_RATE_2G5 | FW2X_RATE_5G | FW2X_RATE_10G);
 	struct aq_hw_link_status_s *link_status = &self->aq_link_status;
 
 	if (speed) {
@@ -175,9 +268,7 @@
 
 		get_random_bytes(&rnd, sizeof(unsigned int));
 
-		l = 0xE3000000U
-			| (0xFFFFU & rnd)
-			| (0x00 << 16);
+		l = 0xE3000000U | (0xFFFFU & rnd) | (0x00 << 16);
 		h = 0x8001300EU;
 
 		mac[5] = (u8)(0xFFU & l);
@@ -199,22 +290,194 @@
 	int err = 0;
 	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
 	u32 orig_stats_val = mpi_opts & BIT(CAPS_HI_STATISTICS);
+	u32 stats_val;
 
 	/* Toggle statistics bit for FW to update */
 	mpi_opts = mpi_opts ^ BIT(CAPS_HI_STATISTICS);
 	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
 
 	/* Wait FW to report back */
-	AQ_HW_WAIT_FOR(orig_stats_val !=
-		       (aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) &
-				       BIT(CAPS_HI_STATISTICS)),
-		       1U, 10000U);
+	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
+					self, stats_val,
+					orig_stats_val != (stats_val &
+					BIT(CAPS_HI_STATISTICS)),
+					1U, 10000U);
 	if (err)
 		return err;
 
 	return hw_atl_utils_update_stats(self);
 }
 
+static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
+{
+	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	u32 temp_val = mpi_opts & HW_ATL_FW2X_CTRL_TEMPERATURE;
+	u32 phy_temp_offset;
+	u32 temp_res;
+	int err = 0;
+	u32 val;
+
+	phy_temp_offset = self->mbox_addr +
+			  offsetof(struct hw_atl_utils_mbox, info) +
+			  offsetof(struct hw_aq_info, phy_temperature);
+	/* Toggle statistics bit for FW to 0x36C.18 (CTRL_TEMPERATURE) */
+	mpi_opts = mpi_opts ^ HW_ATL_FW2X_CTRL_TEMPERATURE;
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+	/* Wait FW to report back */
+	err = readx_poll_timeout_atomic(aq_fw2x_state2_get, self, val,
+					temp_val !=
+					(val & HW_ATL_FW2X_CTRL_TEMPERATURE),
+					1U, 10000U);
+	err = hw_atl_utils_fw_downld_dwords(self, phy_temp_offset,
+					    &temp_res, 1);
+
+	if (err)
+		return err;
+
+	/* Convert PHY temperature from 1/256 degree Celsius
+	 * to 1/1000 degree Celsius.
+	 */
+	*temp = (temp_res & 0xFFFF) * 1000 / 256;
+
+	return 0;
+}
+
+static int aq_fw2x_set_sleep_proxy(struct aq_hw_s *self, u8 *mac)
+{
+	struct hw_atl_utils_fw_rpc *rpc = NULL;
+	struct offload_info *cfg = NULL;
+	unsigned int rpc_size = 0U;
+	u32 mpi_opts;
+	int err = 0;
+	u32 val;
+
+	rpc_size = sizeof(rpc->msg_id) + sizeof(*cfg);
+
+	err = hw_atl_utils_fw_rpc_wait(self, &rpc);
+	if (err < 0)
+		goto err_exit;
+
+	memset(rpc, 0, rpc_size);
+	cfg = (struct offload_info *)(&rpc->msg_id + 1);
+
+	memcpy(cfg->mac_addr, mac, ETH_ALEN);
+	cfg->len = sizeof(*cfg);
+
+	/* Clear bit 0x36C.23 and 0x36C.22 */
+	mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	mpi_opts &= ~HW_ATL_FW2X_CTRL_SLEEP_PROXY;
+	mpi_opts &= ~HW_ATL_FW2X_CTRL_LINK_DROP;
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+	if (err < 0)
+		goto err_exit;
+
+	/* Set bit 0x36C.23 */
+	mpi_opts |= HW_ATL_FW2X_CTRL_SLEEP_PROXY;
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
+					self, val,
+					val & HW_ATL_FW2X_CTRL_SLEEP_PROXY,
+					1U, 100000U);
+
+err_exit:
+	return err;
+}
+
+static int aq_fw2x_set_wol_params(struct aq_hw_s *self, u8 *mac)
+{
+	struct hw_atl_utils_fw_rpc *rpc = NULL;
+	struct fw2x_msg_wol *msg = NULL;
+	u32 mpi_opts;
+	int err = 0;
+	u32 val;
+
+	err = hw_atl_utils_fw_rpc_wait(self, &rpc);
+	if (err < 0)
+		goto err_exit;
+
+	msg = (struct fw2x_msg_wol *)rpc;
+
+	memset(msg, 0, sizeof(*msg));
+
+	msg->msg_id = HAL_ATLANTIC_UTILS_FW2X_MSG_WOL;
+	msg->magic_packet_enabled = true;
+	memcpy(msg->hw_addr, mac, ETH_ALEN);
+
+	mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	mpi_opts &= ~(HW_ATL_FW2X_CTRL_SLEEP_PROXY | HW_ATL_FW2X_CTRL_WOL);
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	err = hw_atl_utils_fw_rpc_call(self, sizeof(*msg));
+	if (err < 0)
+		goto err_exit;
+
+	/* Set bit 0x36C.24 */
+	mpi_opts |= HW_ATL_FW2X_CTRL_WOL;
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
+					self, val, val & HW_ATL_FW2X_CTRL_WOL,
+					1U, 10000U);
+
+err_exit:
+	return err;
+}
+
+static int aq_fw2x_set_power(struct aq_hw_s *self, unsigned int power_state,
+			     u8 *mac)
+{
+	int err = 0;
+
+	if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
+		err = aq_fw2x_set_sleep_proxy(self, mac);
+		if (err < 0)
+			goto err_exit;
+		err = aq_fw2x_set_wol_params(self, mac);
+	}
+
+err_exit:
+	return err;
+}
+
+static int aq_fw2x_set_eee_rate(struct aq_hw_s *self, u32 speed)
+{
+	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+	aq_fw2x_upd_eee_rate_bits(self, &mpi_opts, speed);
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	return 0;
+}
+
+static int aq_fw2x_get_eee_rate(struct aq_hw_s *self, u32 *rate,
+				u32 *supported_rates)
+{
+	u32 mpi_state;
+	u32 caps_hi;
+	int err = 0;
+	u32 addr = self->mbox_addr + offsetof(struct hw_atl_utils_mbox, info) +
+		   offsetof(struct hw_aq_info, caps_hi);
+
+	err = hw_atl_utils_fw_downld_dwords(self, addr, &caps_hi,
+					    sizeof(caps_hi) / sizeof(u32));
+
+	if (err)
+		return err;
+
+	*supported_rates = fw2x_to_eee_mask(caps_hi);
+
+	mpi_state = aq_fw2x_state2_get(self);
+	*rate = fw2x_to_eee_mask(mpi_state);
+
+	return err;
+}
+
 static int aq_fw2x_renegotiate(struct aq_hw_s *self)
 {
 	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
@@ -237,6 +500,39 @@
 	return 0;
 }
 
+static u32 aq_fw2x_get_flow_control(struct aq_hw_s *self, u32 *fcmode)
+{
+	u32 mpi_state = aq_fw2x_state2_get(self);
+
+	if (mpi_state & HW_ATL_FW2X_CAP_PAUSE)
+		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
+			*fcmode = AQ_NIC_FC_RX;
+		else
+			*fcmode = AQ_NIC_FC_RX | AQ_NIC_FC_TX;
+	else
+		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
+			*fcmode = AQ_NIC_FC_TX;
+		else
+			*fcmode = 0;
+
+	return 0;
+}
+
+static u32 aq_fw2x_mbox_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_MBOX_ADDR);
+}
+
+static u32 aq_fw2x_rpc_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_RPC_ADDR);
+}
+
+static u32 aq_fw2x_state2_get(struct aq_hw_s *self)
+{
+	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR);
+}
+
 const struct aq_fw_ops aq_fw_2x_ops = {
 	.init = aq_fw2x_init,
 	.deinit = aq_fw2x_deinit,
@@ -247,5 +543,10 @@
 	.set_state = aq_fw2x_set_state,
 	.update_link_status = aq_fw2x_update_link_status,
 	.update_stats = aq_fw2x_update_stats,
-	.set_flow_control   = aq_fw2x_set_flow_control,
+	.get_phy_temp = aq_fw2x_get_phy_temp,
+	.set_power = aq_fw2x_set_power,
+	.set_eee_rate = aq_fw2x_set_eee_rate,
+	.get_eee_rate = aq_fw2x_get_eee_rate,
+	.set_flow_control = aq_fw2x_set_flow_control,
+	.get_flow_control = aq_fw2x_get_flow_control
 };

diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 94efc64..597654b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h

@@ -1,20 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
  * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
  */
 
 #ifndef VER_H
 #define VER_H
 
-#define NIC_MAJOR_DRIVER_VERSION           2
-#define NIC_MINOR_DRIVER_VERSION           0
-#define NIC_BUILD_DRIVER_VERSION           3
-#define NIC_REVISION_DRIVER_VERSION        0
-
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
 #endif /* VER_H */

diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
index 5d0ab8e..45c663d 100644
--- a/drivers/net/ethernet/arc/Kconfig
+++ b/drivers/net/ethernet/arc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # ARC EMAC network device configuration
 #

diff --git a/drivers/net/ethernet/arc/Makefile b/drivers/net/ethernet/arc/Makefile
index 79108af..d63ada5 100644
--- a/drivers/net/ethernet/arc/Makefile
+++ b/drivers/net/ethernet/arc/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the ARC network device drivers.
 #

diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c
index ffd1805..78e52d2 100644
--- a/drivers/net/ethernet/arc/emac_arc.c
+++ b/drivers/net/ethernet/arc/emac_arc.c

@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /**
  * emac_arc.c - ARC EMAC specific glue layer
  *
  * Copyright (C) 2014 Romain Perier
  *
  * Romain Perier  <romain.perier@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/etherdevice.h>

diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index bd277b0..6f2c867 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2004-2013 Synopsys, Inc. (www.synopsys.com)
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Driver for the ARC EMAC 10100 (hardware revision 5)
  *
  * Contributors:
@@ -148,7 +145,7 @@
 				 dma_unmap_len(tx_buff, len), DMA_TO_DEVICE);
 
 		/* return the sk_buff to system */
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 
 		txbd->data = 0;
 		txbd->info = 0;
@@ -432,7 +429,8 @@
 	phy_dev->autoneg = AUTONEG_ENABLE;
 	phy_dev->speed = 0;
 	phy_dev->duplex = 0;
-	phy_dev->advertising &= phy_dev->supported;
+	linkmode_and(phy_dev->advertising, phy_dev->advertising,
+		     phy_dev->supported);
 
 	priv->last_rx_bd = 0;
 
@@ -959,8 +957,8 @@
 	/* Get MAC address from device tree */
 	mac_addr = of_get_mac_address(dev->of_node);
 
-	if (mac_addr)
-		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(ndev->dev_addr, mac_addr);
 	else
 		eth_hw_addr_random(ndev);
 

diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 0f65768..664d664 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /**
  * emac-rockchip.c - Rockchip EMAC specific glue layer
  *
  * Copyright (C) 2014 Romain Perier <romain.perier@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/etherdevice.h>
@@ -265,6 +256,9 @@
 	if (priv->regulator)
 		regulator_disable(priv->regulator);
 
+	if (priv->soc_data->need_div_macclk)
+		clk_disable_unprepare(priv->macclk);
+
 	free_netdev(ndev);
 	return err;
 }

diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index e05b256..0058051 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Atheros device configuration
 #
@@ -5,7 +6,7 @@
 config NET_VENDOR_ATHEROS
 	bool "Atheros devices"
 	default y
-	depends on PCI
+	depends on (PCI || ATH79)
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -16,6 +17,14 @@
 
 if NET_VENDOR_ATHEROS
 
+config AG71XX
+	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
+	depends on ATH79
+	select PHYLIB
+	help
+	  If you wish to compile a kernel for AR7XXX/91XXX and enable
+	  ethernet support, then you should always answer Y to this.
+
 config ATL2
 	tristate "Atheros L2 Fast Ethernet support"
 	depends on PCI

diff --git a/drivers/net/ethernet/atheros/Makefile b/drivers/net/ethernet/atheros/Makefile
index aa3d394..aca696c 100644
--- a/drivers/net/ethernet/atheros/Makefile
+++ b/drivers/net/ethernet/atheros/Makefile

@@ -3,6 +3,7 @@
 # Makefile for the Atheros network device drivers.
 #
 
+obj-$(CONFIG_AG71XX) += ag71xx.o
 obj-$(CONFIG_ATL1) += atlx/
 obj-$(CONFIG_ATL2) += atlx/
 obj-$(CONFIG_ATL1E) += atl1e/

diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
new file mode 100644
index 0000000..1b1a090
--- /dev/null
+++ b/drivers/net/ethernet/atheros/ag71xx.c

@@ -0,0 +1,1903 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Atheros AR71xx built-in ethernet mac driver
+ *
+ *  Copyright (C) 2019 Oleksij Rempel <o.rempel@pengutronix.de>
+ *
+ *  List of authors contributed to this driver before mainlining:
+ *  Alexander Couzens <lynxis@fe80.eu>
+ *  Christian Lamparter <chunkeey@gmail.com>
+ *  Chuanhong Guo <gch981213@gmail.com>
+ *  Daniel F. Dickinson <cshored@thecshore.com>
+ *  David Bauer <mail@david-bauer.net>
+ *  Felix Fietkau <nbd@nbd.name>
+ *  Gabor Juhos <juhosg@freemail.hu>
+ *  Hauke Mehrtens <hauke@hauke-m.de>
+ *  Johann Neuhauser <johann@it-neuhauser.de>
+ *  John Crispin <john@phrozen.org>
+ *  Jo-Philipp Wich <jo@mein.io>
+ *  Koen Vandeputte <koen.vandeputte@ncentric.com>
+ *  Lucian Cristian <lucian.cristian@gmail.com>
+ *  Matt Merhar <mattmerhar@protonmail.com>
+ *  Milan Krstic <milan.krstic@gmail.com>
+ *  Petr Štetiar <ynezz@true.cz>
+ *  Rosen Penev <rosenp@gmail.com>
+ *  Stephen Walker <stephendwalker+github@gmail.com>
+ *  Vittorio Gambaletta <openwrt@vittgam.net>
+ *  Weijie Gao <hackpascal@gmail.com>
+ *  Imre Kaloz <kaloz@openwrt.org>
+ */
+
+#include <linux/if_vlan.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+
+/* For our NAPI weight bigger does *NOT* mean better - it means more
+ * D-cache misses and lots more wasted cycles than we'll ever
+ * possibly gain from saving instructions.
+ */
+#define AG71XX_NAPI_WEIGHT	32
+#define AG71XX_OOM_REFILL	(1 + HZ / 10)
+
+#define AG71XX_INT_ERR	(AG71XX_INT_RX_BE | AG71XX_INT_TX_BE)
+#define AG71XX_INT_TX	(AG71XX_INT_TX_PS)
+#define AG71XX_INT_RX	(AG71XX_INT_RX_PR | AG71XX_INT_RX_OF)
+
+#define AG71XX_INT_POLL	(AG71XX_INT_RX | AG71XX_INT_TX)
+#define AG71XX_INT_INIT	(AG71XX_INT_ERR | AG71XX_INT_POLL)
+
+#define AG71XX_TX_MTU_LEN	1540
+
+#define AG71XX_TX_RING_SPLIT		512
+#define AG71XX_TX_RING_DS_PER_PKT	DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+						     AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT	128
+#define AG71XX_RX_RING_SIZE_DEFAULT	256
+
+#define AG71XX_MDIO_RETRY	1000
+#define AG71XX_MDIO_DELAY	5
+#define AG71XX_MDIO_MAX_CLK	5000000
+
+/* Register offsets */
+#define AG71XX_REG_MAC_CFG1	0x0000
+#define MAC_CFG1_TXE		BIT(0)	/* Tx Enable */
+#define MAC_CFG1_STX		BIT(1)	/* Synchronize Tx Enable */
+#define MAC_CFG1_RXE		BIT(2)	/* Rx Enable */
+#define MAC_CFG1_SRX		BIT(3)	/* Synchronize Rx Enable */
+#define MAC_CFG1_TFC		BIT(4)	/* Tx Flow Control Enable */
+#define MAC_CFG1_RFC		BIT(5)	/* Rx Flow Control Enable */
+#define MAC_CFG1_SR		BIT(31)	/* Soft Reset */
+#define MAC_CFG1_INIT	(MAC_CFG1_RXE | MAC_CFG1_TXE | \
+			 MAC_CFG1_SRX | MAC_CFG1_STX)
+
+#define AG71XX_REG_MAC_CFG2	0x0004
+#define MAC_CFG2_FDX		BIT(0)
+#define MAC_CFG2_PAD_CRC_EN	BIT(2)
+#define MAC_CFG2_LEN_CHECK	BIT(4)
+#define MAC_CFG2_IF_1000	BIT(9)
+#define MAC_CFG2_IF_10_100	BIT(8)
+
+#define AG71XX_REG_MAC_MFL	0x0010
+
+#define AG71XX_REG_MII_CFG	0x0020
+#define MII_CFG_CLK_DIV_4	0
+#define MII_CFG_CLK_DIV_6	2
+#define MII_CFG_CLK_DIV_8	3
+#define MII_CFG_CLK_DIV_10	4
+#define MII_CFG_CLK_DIV_14	5
+#define MII_CFG_CLK_DIV_20	6
+#define MII_CFG_CLK_DIV_28	7
+#define MII_CFG_CLK_DIV_34	8
+#define MII_CFG_CLK_DIV_42	9
+#define MII_CFG_CLK_DIV_50	10
+#define MII_CFG_CLK_DIV_58	11
+#define MII_CFG_CLK_DIV_66	12
+#define MII_CFG_CLK_DIV_74	13
+#define MII_CFG_CLK_DIV_82	14
+#define MII_CFG_CLK_DIV_98	15
+#define MII_CFG_RESET		BIT(31)
+
+#define AG71XX_REG_MII_CMD	0x0024
+#define MII_CMD_READ		BIT(0)
+
+#define AG71XX_REG_MII_ADDR	0x0028
+#define MII_ADDR_SHIFT		8
+
+#define AG71XX_REG_MII_CTRL	0x002c
+#define AG71XX_REG_MII_STATUS	0x0030
+#define AG71XX_REG_MII_IND	0x0034
+#define MII_IND_BUSY		BIT(0)
+#define MII_IND_INVALID		BIT(2)
+
+#define AG71XX_REG_MAC_IFCTL	0x0038
+#define MAC_IFCTL_SPEED		BIT(16)
+
+#define AG71XX_REG_MAC_ADDR1	0x0040
+#define AG71XX_REG_MAC_ADDR2	0x0044
+#define AG71XX_REG_FIFO_CFG0	0x0048
+#define FIFO_CFG0_WTM		BIT(0)	/* Watermark Module */
+#define FIFO_CFG0_RXS		BIT(1)	/* Rx System Module */
+#define FIFO_CFG0_RXF		BIT(2)	/* Rx Fabric Module */
+#define FIFO_CFG0_TXS		BIT(3)	/* Tx System Module */
+#define FIFO_CFG0_TXF		BIT(4)	/* Tx Fabric Module */
+#define FIFO_CFG0_ALL	(FIFO_CFG0_WTM | FIFO_CFG0_RXS | FIFO_CFG0_RXF \
+			| FIFO_CFG0_TXS | FIFO_CFG0_TXF)
+#define FIFO_CFG0_INIT	(FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT)
+
+#define FIFO_CFG0_ENABLE_SHIFT	8
+
+#define AG71XX_REG_FIFO_CFG1	0x004c
+#define AG71XX_REG_FIFO_CFG2	0x0050
+#define AG71XX_REG_FIFO_CFG3	0x0054
+#define AG71XX_REG_FIFO_CFG4	0x0058
+#define FIFO_CFG4_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG4_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG4_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG4_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG4_CR		BIT(4)	/* CRC error */
+#define FIFO_CFG4_LM		BIT(5)	/* Length Mismatch */
+#define FIFO_CFG4_LO		BIT(6)	/* Length out of range */
+#define FIFO_CFG4_OK		BIT(7)	/* Packet is OK */
+#define FIFO_CFG4_MC		BIT(8)	/* Multicast Packet */
+#define FIFO_CFG4_BC		BIT(9)	/* Broadcast Packet */
+#define FIFO_CFG4_DR		BIT(10)	/* Dribble */
+#define FIFO_CFG4_LE		BIT(11)	/* Long Event */
+#define FIFO_CFG4_CF		BIT(12)	/* Control Frame */
+#define FIFO_CFG4_PF		BIT(13)	/* Pause Frame */
+#define FIFO_CFG4_UO		BIT(14)	/* Unsupported Opcode */
+#define FIFO_CFG4_VT		BIT(15)	/* VLAN tag detected */
+#define FIFO_CFG4_FT		BIT(16)	/* Frame Truncated */
+#define FIFO_CFG4_UC		BIT(17)	/* Unicast Packet */
+#define FIFO_CFG4_INIT	(FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \
+			 FIFO_CFG4_CE | FIFO_CFG4_CR | FIFO_CFG4_LM | \
+			 FIFO_CFG4_LO | FIFO_CFG4_OK | FIFO_CFG4_MC | \
+			 FIFO_CFG4_BC | FIFO_CFG4_DR | FIFO_CFG4_LE | \
+			 FIFO_CFG4_CF | FIFO_CFG4_PF | FIFO_CFG4_UO | \
+			 FIFO_CFG4_VT)
+
+#define AG71XX_REG_FIFO_CFG5	0x005c
+#define FIFO_CFG5_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG5_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG5_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG5_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG5_LM		BIT(4)	/* Length Mismatch */
+#define FIFO_CFG5_LO		BIT(5)	/* Length Out of Range */
+#define FIFO_CFG5_OK		BIT(6)	/* Packet is OK */
+#define FIFO_CFG5_MC		BIT(7)	/* Multicast Packet */
+#define FIFO_CFG5_BC		BIT(8)	/* Broadcast Packet */
+#define FIFO_CFG5_DR		BIT(9)	/* Dribble */
+#define FIFO_CFG5_CF		BIT(10)	/* Control Frame */
+#define FIFO_CFG5_PF		BIT(11)	/* Pause Frame */
+#define FIFO_CFG5_UO		BIT(12)	/* Unsupported Opcode */
+#define FIFO_CFG5_VT		BIT(13)	/* VLAN tag detected */
+#define FIFO_CFG5_LE		BIT(14)	/* Long Event */
+#define FIFO_CFG5_FT		BIT(15)	/* Frame Truncated */
+#define FIFO_CFG5_16		BIT(16)	/* unknown */
+#define FIFO_CFG5_17		BIT(17)	/* unknown */
+#define FIFO_CFG5_SF		BIT(18)	/* Short Frame */
+#define FIFO_CFG5_BM		BIT(19)	/* Byte Mode */
+#define FIFO_CFG5_INIT	(FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \
+			 FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \
+			 FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \
+			 FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \
+			 FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \
+			 FIFO_CFG5_17 | FIFO_CFG5_SF)
+
+#define AG71XX_REG_TX_CTRL	0x0180
+#define TX_CTRL_TXE		BIT(0)	/* Tx Enable */
+
+#define AG71XX_REG_TX_DESC	0x0184
+#define AG71XX_REG_TX_STATUS	0x0188
+#define TX_STATUS_PS		BIT(0)	/* Packet Sent */
+#define TX_STATUS_UR		BIT(1)	/* Tx Underrun */
+#define TX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_RX_CTRL	0x018c
+#define RX_CTRL_RXE		BIT(0)	/* Rx Enable */
+
+#define AG71XX_DMA_RETRY	10
+#define AG71XX_DMA_DELAY	1
+
+#define AG71XX_REG_RX_DESC	0x0190
+#define AG71XX_REG_RX_STATUS	0x0194
+#define RX_STATUS_PR		BIT(0)	/* Packet Received */
+#define RX_STATUS_OF		BIT(2)	/* Rx Overflow */
+#define RX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_INT_ENABLE	0x0198
+#define AG71XX_REG_INT_STATUS	0x019c
+#define AG71XX_INT_TX_PS	BIT(0)
+#define AG71XX_INT_TX_UR	BIT(1)
+#define AG71XX_INT_TX_BE	BIT(3)
+#define AG71XX_INT_RX_PR	BIT(4)
+#define AG71XX_INT_RX_OF	BIT(6)
+#define AG71XX_INT_RX_BE	BIT(7)
+
+#define AG71XX_REG_FIFO_DEPTH	0x01a8
+#define AG71XX_REG_RX_SM	0x01b0
+#define AG71XX_REG_TX_SM	0x01b4
+
+#define ETH_SWITCH_HEADER_LEN	2
+
+#define AG71XX_DEFAULT_MSG_ENABLE	\
+	(NETIF_MSG_DRV			\
+	| NETIF_MSG_PROBE		\
+	| NETIF_MSG_LINK		\
+	| NETIF_MSG_TIMER		\
+	| NETIF_MSG_IFDOWN		\
+	| NETIF_MSG_IFUP		\
+	| NETIF_MSG_RX_ERR		\
+	| NETIF_MSG_TX_ERR)
+
+#define DESC_EMPTY		BIT(31)
+#define DESC_MORE		BIT(24)
+#define DESC_PKTLEN_M		0xfff
+struct ag71xx_desc {
+	u32 data;
+	u32 ctrl;
+	u32 next;
+	u32 pad;
+} __aligned(4);
+
+#define AG71XX_DESC_SIZE	roundup(sizeof(struct ag71xx_desc), \
+					L1_CACHE_BYTES)
+
+struct ag71xx_buf {
+	union {
+		struct {
+			struct sk_buff *skb;
+			unsigned int len;
+		} tx;
+		struct {
+			dma_addr_t dma_addr;
+			void *rx_buf;
+		} rx;
+	};
+};
+
+struct ag71xx_ring {
+	/* "Hot" fields in the data path. */
+	unsigned int curr;
+	unsigned int dirty;
+
+	/* "Cold" fields - not used in the data path. */
+	struct ag71xx_buf *buf;
+	u16 order;
+	u16 desc_split;
+	dma_addr_t descs_dma;
+	u8 *descs_cpu;
+};
+
+enum ag71xx_type {
+	AR7100,
+	AR7240,
+	AR9130,
+	AR9330,
+	AR9340,
+	QCA9530,
+	QCA9550,
+};
+
+struct ag71xx_dcfg {
+	u32 max_frame_len;
+	const u32 *fifodata;
+	u16 desc_pktlen_mask;
+	bool tx_hang_workaround;
+	enum ag71xx_type type;
+};
+
+struct ag71xx {
+	/* Critical data related to the per-packet data path are clustered
+	 * early in this structure to help improve the D-cache footprint.
+	 */
+	struct ag71xx_ring rx_ring ____cacheline_aligned;
+	struct ag71xx_ring tx_ring ____cacheline_aligned;
+
+	u16 rx_buf_size;
+	u8 rx_buf_offset;
+
+	struct net_device *ndev;
+	struct platform_device *pdev;
+	struct napi_struct napi;
+	u32 msg_enable;
+	const struct ag71xx_dcfg *dcfg;
+
+	/* From this point onwards we're not looking at per-packet fields. */
+	void __iomem *mac_base;
+
+	struct ag71xx_desc *stop_desc;
+	dma_addr_t stop_desc_dma;
+
+	int phy_if_mode;
+
+	struct delayed_work restart_work;
+	struct timer_list oom_timer;
+
+	struct reset_control *mac_reset;
+
+	u32 fifodata[3];
+	int mac_idx;
+
+	struct reset_control *mdio_reset;
+	struct mii_bus *mii_bus;
+	struct clk *clk_mdio;
+	struct clk *clk_eth;
+};
+
+static int ag71xx_desc_empty(struct ag71xx_desc *desc)
+{
+	return (desc->ctrl & DESC_EMPTY) != 0;
+}
+
+static struct ag71xx_desc *ag71xx_ring_desc(struct ag71xx_ring *ring, int idx)
+{
+	return (struct ag71xx_desc *)&ring->descs_cpu[idx * AG71XX_DESC_SIZE];
+}
+
+static int ag71xx_ring_size_order(int size)
+{
+	return fls(size - 1);
+}
+
+static bool ag71xx_is(struct ag71xx *ag, enum ag71xx_type type)
+{
+	return ag->dcfg->type == type;
+}
+
+static void ag71xx_wr(struct ag71xx *ag, unsigned int reg, u32 value)
+{
+	iowrite32(value, ag->mac_base + reg);
+	/* flush write */
+	(void)ioread32(ag->mac_base + reg);
+}
+
+static u32 ag71xx_rr(struct ag71xx *ag, unsigned int reg)
+{
+	return ioread32(ag->mac_base + reg);
+}
+
+static void ag71xx_sb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) | mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_cb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) & ~mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_int_enable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_sb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_MDIO_RETRY; i++) {
+		u32 busy;
+
+		udelay(AG71XX_MDIO_DELAY);
+
+		busy = ag71xx_rr(ag, AG71XX_REG_MII_IND);
+		if (!busy)
+			return 0;
+
+		udelay(AG71XX_MDIO_DELAY);
+	}
+
+	netif_err(ag, link, ndev, "MDIO operation timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static int ag71xx_mdio_mii_read(struct mii_bus *bus, int addr, int reg)
+{
+	struct ag71xx *ag = bus->priv;
+	int err, val;
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	/* enable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, MII_CMD_READ);
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	val = ag71xx_rr(ag, AG71XX_REG_MII_STATUS);
+	/* disable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, 0);
+
+	netif_dbg(ag, link, ag->ndev, "mii_read: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	return val;
+}
+
+static int ag71xx_mdio_mii_write(struct mii_bus *bus, int addr, int reg,
+				 u16 val)
+{
+	struct ag71xx *ag = bus->priv;
+
+	netif_dbg(ag, link, ag->ndev, "mii_write: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	ag71xx_wr(ag, AG71XX_REG_MII_CTRL, val);
+
+	return ag71xx_mdio_wait_busy(ag);
+}
+
+static const u32 ar71xx_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28,
+};
+
+static const u32 ar7240_mdio_div_table[] = {
+	2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96,
+};
+
+static const u32 ar933x_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98,
+};
+
+static int ag71xx_mdio_get_divider(struct ag71xx *ag, u32 *div)
+{
+	unsigned long ref_clock;
+	const u32 *table;
+	int ndivs, i;
+
+	ref_clock = clk_get_rate(ag->clk_mdio);
+	if (!ref_clock)
+		return -EINVAL;
+
+	if (ag71xx_is(ag, AR9330) || ag71xx_is(ag, AR9340)) {
+		table = ar933x_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar933x_mdio_div_table);
+	} else if (ag71xx_is(ag, AR7240)) {
+		table = ar7240_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar7240_mdio_div_table);
+	} else {
+		table = ar71xx_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar71xx_mdio_div_table);
+	}
+
+	for (i = 0; i < ndivs; i++) {
+		unsigned long t;
+
+		t = ref_clock / table[i];
+		if (t <= AG71XX_MDIO_MAX_CLK) {
+			*div = i;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int ag71xx_mdio_reset(struct mii_bus *bus)
+{
+	struct ag71xx *ag = bus->priv;
+	int err;
+	u32 t;
+
+	err = ag71xx_mdio_get_divider(ag, &t);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t | MII_CFG_RESET);
+	usleep_range(100, 200);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t);
+	usleep_range(100, 200);
+
+	return 0;
+}
+
+static int ag71xx_mdio_probe(struct ag71xx *ag)
+{
+	struct device *dev = &ag->pdev->dev;
+	struct net_device *ndev = ag->ndev;
+	static struct mii_bus *mii_bus;
+	struct device_node *np, *mnp;
+	int err;
+
+	np = dev->of_node;
+	ag->mii_bus = NULL;
+
+	ag->clk_mdio = devm_clk_get(dev, "mdio");
+	if (IS_ERR(ag->clk_mdio)) {
+		netif_err(ag, probe, ndev, "Failed to get mdio clk.\n");
+		return PTR_ERR(ag->clk_mdio);
+	}
+
+	err = clk_prepare_enable(ag->clk_mdio);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n");
+		return err;
+	}
+
+	mii_bus = devm_mdiobus_alloc(dev);
+	if (!mii_bus) {
+		err = -ENOMEM;
+		goto mdio_err_put_clk;
+	}
+
+	ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio");
+	if (IS_ERR(ag->mdio_reset)) {
+		netif_err(ag, probe, ndev, "Failed to get reset mdio.\n");
+		return PTR_ERR(ag->mdio_reset);
+	}
+
+	mii_bus->name = "ag71xx_mdio";
+	mii_bus->read = ag71xx_mdio_mii_read;
+	mii_bus->write = ag71xx_mdio_mii_write;
+	mii_bus->reset = ag71xx_mdio_reset;
+	mii_bus->priv = ag;
+	mii_bus->parent = dev;
+	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx);
+
+	if (!IS_ERR(ag->mdio_reset)) {
+		reset_control_assert(ag->mdio_reset);
+		msleep(100);
+		reset_control_deassert(ag->mdio_reset);
+		msleep(200);
+	}
+
+	mnp = of_get_child_by_name(np, "mdio");
+	err = of_mdiobus_register(mii_bus, mnp);
+	of_node_put(mnp);
+	if (err)
+		goto mdio_err_put_clk;
+
+	ag->mii_bus = mii_bus;
+
+	return 0;
+
+mdio_err_put_clk:
+	clk_disable_unprepare(ag->clk_mdio);
+	return err;
+}
+
+static void ag71xx_mdio_remove(struct ag71xx *ag)
+{
+	if (ag->mii_bus)
+		mdiobus_unregister(ag->mii_bus);
+	clk_disable_unprepare(ag->clk_mdio);
+}
+
+static void ag71xx_hw_stop(struct ag71xx *ag)
+{
+	/* disable all interrupts and stop the rx/tx engine */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, 0);
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+}
+
+static bool ag71xx_check_dma_stuck(struct ag71xx *ag)
+{
+	unsigned long timestamp;
+	u32 rx_sm, tx_sm, rx_fd;
+
+	timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start;
+	if (likely(time_before(jiffies, timestamp + HZ / 10)))
+		return false;
+
+	if (!netif_carrier_ok(ag->ndev))
+		return false;
+
+	rx_sm = ag71xx_rr(ag, AG71XX_REG_RX_SM);
+	if ((rx_sm & 0x7) == 0x3 && ((rx_sm >> 4) & 0x7) == 0x6)
+		return true;
+
+	tx_sm = ag71xx_rr(ag, AG71XX_REG_TX_SM);
+	rx_fd = ag71xx_rr(ag, AG71XX_REG_FIFO_DEPTH);
+	if (((tx_sm >> 4) & 0x7) == 0 && ((rx_sm & 0x7) == 0) &&
+	    ((rx_sm >> 4) & 0x7) == 0 && rx_fd == 0)
+		return true;
+
+	return false;
+}
+
+static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int sent = 0, bytes_compl = 0, n = 0;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size;
+	bool dma_stuck = false;
+
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, tx_queued, ndev, "processing TX ring\n");
+
+	while (ring->dirty + n != ring->curr) {
+		struct ag71xx_desc *desc;
+		struct sk_buff *skb;
+		unsigned int i;
+
+		i = (ring->dirty + n) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+		skb = ring->buf[i].tx.skb;
+
+		if (!flush && !ag71xx_desc_empty(desc)) {
+			if (ag->dcfg->tx_hang_workaround &&
+			    ag71xx_check_dma_stuck(ag)) {
+				schedule_delayed_work(&ag->restart_work,
+						      HZ / 2);
+				dma_stuck = true;
+			}
+			break;
+		}
+
+		if (flush)
+			desc->ctrl |= DESC_EMPTY;
+
+		n++;
+		if (!skb)
+			continue;
+
+		dev_kfree_skb_any(skb);
+		ring->buf[i].tx.skb = NULL;
+
+		bytes_compl += ring->buf[i].tx.len;
+
+		sent++;
+		ring->dirty += n;
+
+		while (n > 0) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+			n--;
+		}
+	}
+
+	netif_dbg(ag, tx_done, ndev, "%d packets sent out\n", sent);
+
+	if (!sent)
+		return 0;
+
+	ag->ndev->stats.tx_bytes += bytes_compl;
+	ag->ndev->stats.tx_packets += sent;
+
+	netdev_completed_queue(ag->ndev, sent, bytes_compl);
+	if ((ring->curr - ring->dirty) < (ring_size * 3) / 4)
+		netif_wake_queue(ag->ndev);
+
+	if (!dma_stuck)
+		cancel_delayed_work(&ag->restart_work);
+
+	return sent;
+}
+
+static void ag71xx_dma_wait_stop(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_DMA_RETRY; i++) {
+		u32 rx, tx;
+
+		mdelay(AG71XX_DMA_DELAY);
+
+		rx = ag71xx_rr(ag, AG71XX_REG_RX_CTRL) & RX_CTRL_RXE;
+		tx = ag71xx_rr(ag, AG71XX_REG_TX_CTRL) & TX_CTRL_TXE;
+		if (!rx && !tx)
+			return;
+	}
+
+	netif_err(ag, hw, ndev, "DMA stop operation timed out\n");
+}
+
+static void ag71xx_dma_reset(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	u32 val;
+	int i;
+
+	/* stop RX and TX */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+
+	/* give the hardware some time to really stop all rx/tx activity
+	 * clearing the descriptors too early causes random memory corruption
+	 */
+	ag71xx_dma_wait_stop(ag);
+
+	/* clear descriptor addresses */
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->stop_desc_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->stop_desc_dma);
+
+	/* clear pending RX/TX interrupts */
+	for (i = 0; i < 256; i++) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+		ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+	}
+
+	/* clear pending errors */
+	ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE | RX_STATUS_OF);
+	ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE | TX_STATUS_UR);
+
+	val = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Rx status: %08x\n",
+			  val);
+
+	val = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+
+	/* mask out reserved bits */
+	val &= ~0xff000000;
+
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Tx status: %08x\n",
+			  val);
+}
+
+static void ag71xx_hw_setup(struct ag71xx *ag)
+{
+	u32 init = MAC_CFG1_INIT;
+
+	/* setup MAC configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, init);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG2,
+		  MAC_CFG2_PAD_CRC_EN | MAC_CFG2_LEN_CHECK);
+
+	/* setup max frame length to zero */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, 0);
+
+	/* setup FIFO configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG0, FIFO_CFG0_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG1, ag->fifodata[0]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG2, ag->fifodata[1]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG4, FIFO_CFG4_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, FIFO_CFG5_INIT);
+}
+
+static unsigned int ag71xx_max_frame_len(unsigned int mtu)
+{
+	return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+}
+
+static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
+{
+	u32 t;
+
+	t = (((u32)mac[5]) << 24) | (((u32)mac[4]) << 16)
+	  | (((u32)mac[3]) << 8) | ((u32)mac[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR1, t);
+
+	t = (((u32)mac[1]) << 24) | (((u32)mac[0]) << 16);
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR2, t);
+}
+
+static void ag71xx_fast_reset(struct ag71xx *ag)
+{
+	struct net_device *dev = ag->ndev;
+	u32 rx_ds;
+	u32 mii_reg;
+
+	ag71xx_hw_stop(ag);
+
+	mii_reg = ag71xx_rr(ag, AG71XX_REG_MII_CFG);
+	rx_ds = ag71xx_rr(ag, AG71XX_REG_RX_DESC);
+
+	ag71xx_tx_packets(ag, true);
+
+	reset_control_assert(ag->mac_reset);
+	usleep_range(10, 20);
+	reset_control_deassert(ag->mac_reset);
+	usleep_range(10, 20);
+
+	ag71xx_dma_reset(ag);
+	ag71xx_hw_setup(ag);
+	ag->tx_ring.curr = 0;
+	ag->tx_ring.dirty = 0;
+	netdev_reset_queue(ag->ndev);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ag->ndev->mtu));
+
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, rx_ds);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, mii_reg);
+
+	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
+}
+
+static void ag71xx_hw_start(struct ag71xx *ag)
+{
+	/* start RX engine */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+
+	/* enable interrupts */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, AG71XX_INT_INIT);
+
+	netif_wake_queue(ag->ndev);
+}
+
+static void ag71xx_link_adjust(struct ag71xx *ag, bool update)
+{
+	struct phy_device *phydev = ag->ndev->phydev;
+	u32 cfg2;
+	u32 ifctl;
+	u32 fifo5;
+
+	if (!phydev->link && update) {
+		ag71xx_hw_stop(ag);
+		return;
+	}
+
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag71xx_fast_reset(ag);
+
+	cfg2 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG2);
+	cfg2 &= ~(MAC_CFG2_IF_1000 | MAC_CFG2_IF_10_100 | MAC_CFG2_FDX);
+	cfg2 |= (phydev->duplex) ? MAC_CFG2_FDX : 0;
+
+	ifctl = ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL);
+	ifctl &= ~(MAC_IFCTL_SPEED);
+
+	fifo5 = ag71xx_rr(ag, AG71XX_REG_FIFO_CFG5);
+	fifo5 &= ~FIFO_CFG5_BM;
+
+	switch (phydev->speed) {
+	case SPEED_1000:
+		cfg2 |= MAC_CFG2_IF_1000;
+		fifo5 |= FIFO_CFG5_BM;
+		break;
+	case SPEED_100:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		ifctl |= MAC_IFCTL_SPEED;
+		break;
+	case SPEED_10:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		break;
+	default:
+		WARN(1, "not supported speed %i\n", phydev->speed);
+		return;
+	}
+
+	if (ag->tx_ring.desc_split) {
+		ag->fifodata[2] &= 0xffff;
+		ag->fifodata[2] |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, ag->fifodata[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG2, cfg2);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5);
+	ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl);
+
+	ag71xx_hw_start(ag);
+
+	if (update)
+		phy_print_status(phydev);
+}
+
+static void ag71xx_phy_link_adjust(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ag71xx_link_adjust(ag, true);
+}
+
+static int ag71xx_phy_connect(struct ag71xx *ag)
+{
+	struct device_node *np = ag->pdev->dev.of_node;
+	struct net_device *ndev = ag->ndev;
+	struct device_node *phy_node;
+	struct phy_device *phydev;
+	int ret;
+
+	if (of_phy_is_fixed_link(np)) {
+		ret = of_phy_register_fixed_link(np);
+		if (ret < 0) {
+			netif_err(ag, probe, ndev, "Failed to register fixed PHY link: %d\n",
+				  ret);
+			return ret;
+		}
+
+		phy_node = of_node_get(np);
+	} else {
+		phy_node = of_parse_phandle(np, "phy-handle", 0);
+	}
+
+	if (!phy_node) {
+		netif_err(ag, probe, ndev, "Could not find valid phy node\n");
+		return -ENODEV;
+	}
+
+	phydev = of_phy_connect(ag->ndev, phy_node, ag71xx_phy_link_adjust,
+				0, ag->phy_if_mode);
+
+	of_node_put(phy_node);
+
+	if (!phydev) {
+		netif_err(ag, probe, ndev, "Could not connect to PHY device\n");
+		return -ENODEV;
+	}
+
+	phy_attached_info(phydev);
+
+	return 0;
+}
+
+static void ag71xx_ring_tx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	u32 bytes_compl = 0, pkts_compl = 0;
+	struct net_device *ndev = ag->ndev;
+
+	while (ring->curr != ring->dirty) {
+		struct ag71xx_desc *desc;
+		u32 i = ring->dirty & ring_mask;
+
+		desc = ag71xx_ring_desc(ring, i);
+		if (!ag71xx_desc_empty(desc)) {
+			desc->ctrl = 0;
+			ndev->stats.tx_errors++;
+		}
+
+		if (ring->buf[i].tx.skb) {
+			bytes_compl += ring->buf[i].tx.len;
+			pkts_compl++;
+			dev_kfree_skb_any(ring->buf[i].tx.skb);
+		}
+		ring->buf[i].tx.skb = NULL;
+		ring->dirty++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+}
+
+static void ag71xx_ring_tx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_size = BIT(ring->order);
+	int ring_mask = ring_size - 1;
+	int i;
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		desc->ctrl = DESC_EMPTY;
+		ring->buf[i].tx.skb = NULL;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_ring_rx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_size = BIT(ring->order);
+	int i;
+
+	if (!ring->buf)
+		return;
+
+	for (i = 0; i < ring_size; i++)
+		if (ring->buf[i].rx.rx_buf) {
+			dma_unmap_single(&ag->pdev->dev,
+					 ring->buf[i].rx.dma_addr,
+					 ag->rx_buf_size, DMA_FROM_DEVICE);
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+		}
+}
+
+static int ag71xx_buffer_size(struct ag71xx *ag)
+{
+	return ag->rx_buf_size +
+	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+			       int offset,
+			       void *(*alloc)(unsigned int size))
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct ag71xx_desc *desc;
+	void *data;
+
+	desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]);
+
+	data = alloc(ag71xx_buffer_size(ag));
+	if (!data)
+		return false;
+
+	buf->rx.rx_buf = data;
+	buf->rx.dma_addr = dma_map_single(&ag->pdev->dev, data, ag->rx_buf_size,
+					  DMA_FROM_DEVICE);
+	desc->data = (u32)buf->rx.dma_addr + offset;
+	return true;
+}
+
+static int ag71xx_ring_rx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask = BIT(ring->order) - 1;
+	int ring_size = BIT(ring->order);
+	unsigned int i;
+	int ret;
+
+	ret = 0;
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		netif_dbg(ag, rx_status, ndev, "RX desc at %p, next is %08x\n",
+			  desc, desc->next);
+	}
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], ag->rx_buf_offset,
+					netdev_alloc_frag)) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		desc->ctrl = DESC_EMPTY;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+
+	return ret;
+}
+
+static int ag71xx_ring_rx_refill(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	int offset = ag->rx_buf_offset;
+	unsigned int count;
+
+	count = 0;
+	for (; ring->curr - ring->dirty > 0; ring->dirty++) {
+		struct ag71xx_desc *desc;
+		unsigned int i;
+
+		i = ring->dirty & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ring->buf[i].rx.rx_buf &&
+		    !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset,
+					napi_alloc_frag))
+			break;
+
+		desc->ctrl = DESC_EMPTY;
+		count++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netif_dbg(ag, rx_status, ag->ndev, "%u rx descriptors refilled\n",
+		  count);
+
+	return count;
+}
+
+static int ag71xx_rings_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size, tx_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+	tx_size = BIT(tx->order);
+
+	tx->buf = kcalloc(ring_size, sizeof(*tx->buf), GFP_KERNEL);
+	if (!tx->buf)
+		return -ENOMEM;
+
+	tx->descs_cpu = dma_alloc_coherent(&ag->pdev->dev,
+					   ring_size * AG71XX_DESC_SIZE,
+					   &tx->descs_dma, GFP_KERNEL);
+	if (!tx->descs_cpu) {
+		kfree(tx->buf);
+		tx->buf = NULL;
+		return -ENOMEM;
+	}
+
+	rx->buf = &tx->buf[tx_size];
+	rx->descs_cpu = ((void *)tx->descs_cpu) + tx_size * AG71XX_DESC_SIZE;
+	rx->descs_dma = tx->descs_dma + tx_size * AG71XX_DESC_SIZE;
+
+	ag71xx_ring_tx_init(ag);
+	return ag71xx_ring_rx_init(ag);
+}
+
+static void ag71xx_rings_free(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+
+	if (tx->descs_cpu)
+		dma_free_coherent(&ag->pdev->dev, ring_size * AG71XX_DESC_SIZE,
+				  tx->descs_cpu, tx->descs_dma);
+
+	kfree(tx->buf);
+
+	tx->descs_cpu = NULL;
+	rx->descs_cpu = NULL;
+	tx->buf = NULL;
+	rx->buf = NULL;
+}
+
+static void ag71xx_rings_cleanup(struct ag71xx *ag)
+{
+	ag71xx_ring_rx_clean(ag);
+	ag71xx_ring_tx_clean(ag);
+	ag71xx_rings_free(ag);
+
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_hw_init(struct ag71xx *ag)
+{
+	ag71xx_hw_stop(ag);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR);
+	usleep_range(20, 30);
+
+	reset_control_assert(ag->mac_reset);
+	msleep(100);
+	reset_control_deassert(ag->mac_reset);
+	msleep(200);
+
+	ag71xx_hw_setup(ag);
+
+	ag71xx_dma_reset(ag);
+}
+
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+	int ret;
+
+	ret = ag71xx_rings_init(ag);
+	if (ret)
+		return ret;
+
+	napi_enable(&ag->napi);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+	netif_start_queue(ag->ndev);
+
+	return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+	netif_stop_queue(ag->ndev);
+
+	ag71xx_hw_stop(ag);
+	ag71xx_dma_reset(ag);
+
+	napi_disable(&ag->napi);
+	del_timer_sync(&ag->oom_timer);
+
+	ag71xx_rings_cleanup(ag);
+}
+
+static int ag71xx_open(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+	unsigned int max_frame_len;
+	int ret;
+
+	max_frame_len = ag71xx_max_frame_len(ndev->mtu);
+	ag->rx_buf_size =
+		SKB_DATA_ALIGN(max_frame_len + NET_SKB_PAD + NET_IP_ALIGN);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+	ag71xx_hw_set_macaddr(ag, ndev->dev_addr);
+
+	ret = ag71xx_hw_enable(ag);
+	if (ret)
+		goto err;
+
+	ret = ag71xx_phy_connect(ag);
+	if (ret)
+		goto err;
+
+	phy_start(ndev->phydev);
+
+	return 0;
+
+err:
+	ag71xx_rings_cleanup(ag);
+	return ret;
+}
+
+static int ag71xx_stop(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	phy_stop(ndev->phydev);
+	phy_disconnect(ndev->phydev);
+	ag71xx_hw_disable(ag);
+
+	return 0;
+}
+
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+	int i, ring_mask, ndesc, split;
+	struct ag71xx_desc *desc;
+
+	ring_mask = BIT(ring->order) - 1;
+	ndesc = 0;
+	split = ring->desc_split;
+
+	if (!split)
+		split = len;
+
+	while (len > 0) {
+		unsigned int cur_len = len;
+
+		i = (ring->curr + ndesc) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_desc_empty(desc))
+			return -1;
+
+		if (cur_len > split) {
+			cur_len = split;
+
+			/*  TX will hang if DMA transfers <= 4 bytes,
+			 * make sure next segment is more than 4 bytes long.
+			 */
+			if (len <= split + 4)
+				cur_len -= 4;
+		}
+
+		desc->data = addr;
+		addr += cur_len;
+		len -= cur_len;
+
+		if (len > 0)
+			cur_len |= DESC_MORE;
+
+		/* prevent early tx attempt of this descriptor */
+		if (!ndesc)
+			cur_len |= DESC_EMPTY;
+
+		desc->ctrl = cur_len;
+		ndesc++;
+	}
+
+	return ndesc;
+}
+
+static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
+					  struct net_device *ndev)
+{
+	int i, n, ring_min, ring_mask, ring_size;
+	struct ag71xx *ag = netdev_priv(ndev);
+	struct ag71xx_ring *ring;
+	struct ag71xx_desc *desc;
+	dma_addr_t dma_addr;
+
+	ring = &ag->tx_ring;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	if (skb->len <= 4) {
+		netif_dbg(ag, tx_err, ndev, "packet len is too small\n");
+		goto err_drop;
+	}
+
+	dma_addr = dma_map_single(&ag->pdev->dev, skb->data, skb->len,
+				  DMA_TO_DEVICE);
+
+	i = ring->curr & ring_mask;
+	desc = ag71xx_ring_desc(ring, i);
+
+	/* setup descriptor fields */
+	n = ag71xx_fill_dma_desc(ring, (u32)dma_addr,
+				 skb->len & ag->dcfg->desc_pktlen_mask);
+	if (n < 0)
+		goto err_drop_unmap;
+
+	i = (ring->curr + n - 1) & ring_mask;
+	ring->buf[i].tx.len = skb->len;
+	ring->buf[i].tx.skb = skb;
+
+	netdev_sent_queue(ndev, skb->len);
+
+	skb_tx_timestamp(skb);
+
+	desc->ctrl &= ~DESC_EMPTY;
+	ring->curr += n;
+
+	/* flush descriptor */
+	wmb();
+
+	ring_min = 2;
+	if (ring->desc_split)
+		ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+	if (ring->curr - ring->dirty >= ring_size - ring_min) {
+		netif_dbg(ag, tx_err, ndev, "tx queue full\n");
+		netif_stop_queue(ndev);
+	}
+
+	netif_dbg(ag, tx_queued, ndev, "packet injected into TX queue\n");
+
+	/* enable TX engine */
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, TX_CTRL_TXE);
+
+	return NETDEV_TX_OK;
+
+err_drop_unmap:
+	dma_unmap_single(&ag->pdev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_drop:
+	ndev->stats.tx_dropped++;
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+	if (!ndev->phydev)
+		return -EINVAL;
+
+	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
+}
+
+static void ag71xx_oom_timer_handler(struct timer_list *t)
+{
+	struct ag71xx *ag = from_timer(ag, t, oom_timer);
+
+	napi_schedule(&ag->napi);
+}
+
+static void ag71xx_tx_timeout(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	netif_err(ag, tx_err, ndev, "tx timeout\n");
+
+	schedule_delayed_work(&ag->restart_work, 1);
+}
+
+static void ag71xx_restart_work_func(struct work_struct *work)
+{
+	struct ag71xx *ag = container_of(work, struct ag71xx,
+					 restart_work.work);
+	struct net_device *ndev = ag->ndev;
+
+	rtnl_lock();
+	ag71xx_hw_disable(ag);
+	ag71xx_hw_enable(ag);
+	if (ndev->phydev->link)
+		ag71xx_link_adjust(ag, false);
+	rtnl_unlock();
+}
+
+static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
+{
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size, done = 0;
+	unsigned int pktlen_mask, offset;
+	struct sk_buff *next, *skb;
+	struct ag71xx_ring *ring;
+	struct list_head rx_list;
+
+	ring = &ag->rx_ring;
+	pktlen_mask = ag->dcfg->desc_pktlen_mask;
+	offset = ag->rx_buf_offset;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, rx_status, ndev, "rx packets, limit=%d, curr=%u, dirty=%u\n",
+		  limit, ring->curr, ring->dirty);
+
+	INIT_LIST_HEAD(&rx_list);
+
+	while (done < limit) {
+		unsigned int i = ring->curr & ring_mask;
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+		int pktlen;
+		int err = 0;
+
+		if (ag71xx_desc_empty(desc))
+			break;
+
+		if ((ring->dirty + ring_size) == ring->curr) {
+			WARN_ONCE(1, "RX out of ring");
+			break;
+		}
+
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+
+		pktlen = desc->ctrl & pktlen_mask;
+		pktlen -= ETH_FCS_LEN;
+
+		dma_unmap_single(&ag->pdev->dev, ring->buf[i].rx.dma_addr,
+				 ag->rx_buf_size, DMA_FROM_DEVICE);
+
+		ndev->stats.rx_packets++;
+		ndev->stats.rx_bytes += pktlen;
+
+		skb = build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag));
+		if (!skb) {
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+			goto next;
+		}
+
+		skb_reserve(skb, offset);
+		skb_put(skb, pktlen);
+
+		if (err) {
+			ndev->stats.rx_dropped++;
+			kfree_skb(skb);
+		} else {
+			skb->dev = ndev;
+			skb->ip_summed = CHECKSUM_NONE;
+			list_add_tail(&skb->list, &rx_list);
+		}
+
+next:
+		ring->buf[i].rx.rx_buf = NULL;
+		done++;
+
+		ring->curr++;
+	}
+
+	ag71xx_ring_rx_refill(ag);
+
+	list_for_each_entry_safe(skb, next, &rx_list, list)
+		skb->protocol = eth_type_trans(skb, ndev);
+	netif_receive_skb_list(&rx_list);
+
+	netif_dbg(ag, rx_status, ndev, "rx finish, curr=%u, dirty=%u, done=%d\n",
+		  ring->curr, ring->dirty, done);
+
+	return done;
+}
+
+static int ag71xx_poll(struct napi_struct *napi, int limit)
+{
+	struct ag71xx *ag = container_of(napi, struct ag71xx, napi);
+	struct ag71xx_ring *rx_ring = &ag->rx_ring;
+	int rx_ring_size = BIT(rx_ring->order);
+	struct net_device *ndev = ag->ndev;
+	int tx_done, rx_done;
+	u32 status;
+
+	tx_done = ag71xx_tx_packets(ag, false);
+
+	netif_dbg(ag, rx_status, ndev, "processing RX ring\n");
+	rx_done = ag71xx_rx_packets(ag, limit);
+
+	if (!rx_ring->buf[rx_ring->dirty % rx_ring_size].rx.rx_buf)
+		goto oom;
+
+	status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (unlikely(status & RX_STATUS_OF)) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_OF);
+		ndev->stats.rx_fifo_errors++;
+
+		/* restart RX */
+		ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+	}
+
+	if (rx_done < limit) {
+		if (status & RX_STATUS_PR)
+			goto more;
+
+		status = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+		if (status & TX_STATUS_PS)
+			goto more;
+
+		netif_dbg(ag, rx_status, ndev, "disable polling mode, rx=%d, tx=%d,limit=%d\n",
+			  rx_done, tx_done, limit);
+
+		napi_complete(napi);
+
+		/* enable interrupts */
+		ag71xx_int_enable(ag, AG71XX_INT_POLL);
+		return rx_done;
+	}
+
+more:
+	netif_dbg(ag, rx_status, ndev, "stay in polling mode, rx=%d, tx=%d, limit=%d\n",
+		  rx_done, tx_done, limit);
+	return limit;
+
+oom:
+	netif_err(ag, rx_err, ndev, "out of memory\n");
+
+	mod_timer(&ag->oom_timer, jiffies + AG71XX_OOM_REFILL);
+	napi_complete(napi);
+	return 0;
+}
+
+static irqreturn_t ag71xx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct ag71xx *ag;
+	u32 status;
+
+	ag = netdev_priv(ndev);
+	status = ag71xx_rr(ag, AG71XX_REG_INT_STATUS);
+
+	if (unlikely(!status))
+		return IRQ_NONE;
+
+	if (unlikely(status & AG71XX_INT_ERR)) {
+		if (status & AG71XX_INT_TX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE);
+			netif_err(ag, intr, ndev, "TX BUS error\n");
+		}
+		if (status & AG71XX_INT_RX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE);
+			netif_err(ag, intr, ndev, "RX BUS error\n");
+		}
+	}
+
+	if (likely(status & AG71XX_INT_POLL)) {
+		ag71xx_int_disable(ag, AG71XX_INT_POLL);
+		netif_dbg(ag, intr, ndev, "enable polling mode\n");
+		napi_schedule(&ag->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ndev->mtu = new_mtu;
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ndev->mtu));
+
+	return 0;
+}
+
+static const struct net_device_ops ag71xx_netdev_ops = {
+	.ndo_open		= ag71xx_open,
+	.ndo_stop		= ag71xx_stop,
+	.ndo_start_xmit		= ag71xx_hard_start_xmit,
+	.ndo_do_ioctl		= ag71xx_do_ioctl,
+	.ndo_tx_timeout		= ag71xx_tx_timeout,
+	.ndo_change_mtu		= ag71xx_change_mtu,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static const u32 ar71xx_addr_ar7100[] = {
+	0x19000000, 0x1a000000,
+};
+
+static int ag71xx_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct ag71xx_dcfg *dcfg;
+	struct net_device *ndev;
+	struct resource *res;
+	const void *mac_addr;
+	int tx_size, err, i;
+	struct ag71xx *ag;
+
+	if (!np)
+		return -ENODEV;
+
+	ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*ag));
+	if (!ndev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	dcfg = of_device_get_match_data(&pdev->dev);
+	if (!dcfg)
+		return -EINVAL;
+
+	ag = netdev_priv(ndev);
+	ag->mac_idx = -1;
+	for (i = 0; i < ARRAY_SIZE(ar71xx_addr_ar7100); i++) {
+		if (ar71xx_addr_ar7100[i] == res->start)
+			ag->mac_idx = i;
+	}
+
+	if (ag->mac_idx < 0) {
+		netif_err(ag, probe, ndev, "unknown mac idx\n");
+		return -EINVAL;
+	}
+
+	ag->clk_eth = devm_clk_get(&pdev->dev, "eth");
+	if (IS_ERR(ag->clk_eth)) {
+		netif_err(ag, probe, ndev, "Failed to get eth clk.\n");
+		return PTR_ERR(ag->clk_eth);
+	}
+
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	ag->pdev = pdev;
+	ag->ndev = ndev;
+	ag->dcfg = dcfg;
+	ag->msg_enable = netif_msg_init(-1, AG71XX_DEFAULT_MSG_ENABLE);
+	memcpy(ag->fifodata, dcfg->fifodata, sizeof(ag->fifodata));
+
+	ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+	if (IS_ERR(ag->mac_reset)) {
+		netif_err(ag, probe, ndev, "missing mac reset\n");
+		err = PTR_ERR(ag->mac_reset);
+		goto err_free;
+	}
+
+	ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start,
+					    resource_size(res));
+	if (!ag->mac_base) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+			       0x0, dev_name(&pdev->dev), ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+			  ndev->irq);
+		goto err_free;
+	}
+
+	ndev->netdev_ops = &ag71xx_netdev_ops;
+
+	INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func);
+	timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0);
+
+	tx_size = AG71XX_TX_RING_SIZE_DEFAULT;
+	ag->rx_ring.order = ag71xx_ring_size_order(AG71XX_RX_RING_SIZE_DEFAULT);
+
+	ndev->min_mtu = 68;
+	ndev->max_mtu = dcfg->max_frame_len - ag71xx_max_frame_len(0);
+
+	ag->rx_buf_offset = NET_SKB_PAD;
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag->rx_buf_offset += NET_IP_ALIGN;
+
+	if (ag71xx_is(ag, AR7100)) {
+		ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+		tx_size *= AG71XX_TX_RING_DS_PER_PKT;
+	}
+	ag->tx_ring.order = ag71xx_ring_size_order(tx_size);
+
+	ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+					    sizeof(struct ag71xx_desc),
+					    &ag->stop_desc_dma, GFP_KERNEL);
+	if (!ag->stop_desc) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ag->stop_desc->data = 0;
+	ag->stop_desc->ctrl = 0;
+	ag->stop_desc->next = (u32)ag->stop_desc_dma;
+
+	mac_addr = of_get_mac_address(np);
+	if (!IS_ERR(mac_addr))
+		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) {
+		netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
+		eth_random_addr(ndev->dev_addr);
+	}
+
+	ag->phy_if_mode = of_get_phy_mode(np);
+	if (ag->phy_if_mode < 0) {
+		netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+		err = ag->phy_if_mode;
+		goto err_free;
+	}
+
+	netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+
+	err = clk_prepare_enable(ag->clk_eth);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+		goto err_free;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+
+	ag71xx_hw_init(ag);
+
+	err = ag71xx_mdio_probe(ag);
+	if (err)
+		goto err_put_clk;
+
+	platform_set_drvdata(pdev, ndev);
+
+	err = register_netdev(ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to register net device\n");
+		platform_set_drvdata(pdev, NULL);
+		goto err_mdio_remove;
+	}
+
+	netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n",
+		   (unsigned long)ag->mac_base, ndev->irq,
+		   phy_modes(ag->phy_if_mode));
+
+	return 0;
+
+err_mdio_remove:
+	ag71xx_mdio_remove(ag);
+err_put_clk:
+	clk_disable_unprepare(ag->clk_eth);
+err_free:
+	free_netdev(ndev);
+	return err;
+}
+
+static int ag71xx_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct ag71xx *ag;
+
+	if (!ndev)
+		return 0;
+
+	ag = netdev_priv(ndev);
+	unregister_netdev(ndev);
+	ag71xx_mdio_remove(ag);
+	clk_disable_unprepare(ag->clk_eth);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const u32 ar71xx_fifo_ar7100[] = {
+	0x0fff0000, 0x00001fff, 0x00780fff,
+};
+
+static const u32 ar71xx_fifo_ar9130[] = {
+	0x0fff0000, 0x00001fff, 0x008001ff,
+};
+
+static const u32 ar71xx_fifo_ar9330[] = {
+	0x0010ffff, 0x015500aa, 0x01f00140,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7100 = {
+	.type = AR7100,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7240 = {
+	.type = AR7240,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9130 = {
+	.type = AR9130,
+	.fifodata = ar71xx_fifo_ar9130,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9330 = {
+	.type = AR9330,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9340 = {
+	.type = AR9340,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9530 = {
+	.type = QCA9530,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9550 = {
+	.type = QCA9550,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct of_device_id ag71xx_match[] = {
+	{ .compatible = "qca,ar7100-eth", .data = &ag71xx_dcfg_ar7100 },
+	{ .compatible = "qca,ar7240-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7241-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7242-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar9130-eth", .data = &ag71xx_dcfg_ar9130 },
+	{ .compatible = "qca,ar9330-eth", .data = &ag71xx_dcfg_ar9330 },
+	{ .compatible = "qca,ar9340-eth", .data = &ag71xx_dcfg_ar9340 },
+	{ .compatible = "qca,qca9530-eth", .data = &ag71xx_dcfg_qca9530 },
+	{ .compatible = "qca,qca9550-eth", .data = &ag71xx_dcfg_qca9550 },
+	{ .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 },
+	{}
+};
+
+static struct platform_driver ag71xx_driver = {
+	.probe		= ag71xx_probe,
+	.remove		= ag71xx_remove,
+	.driver = {
+		.name	= "ag71xx",
+		.of_match_table = ag71xx_match,
+	}
+};
+
+module_platform_driver(ag71xx_driver);
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/ethernet/atheros/alx/Makefile b/drivers/net/ethernet/atheros/alx/Makefile
index ed4a605..fec7885 100644
--- a/drivers/net/ethernet/atheros/alx/Makefile
+++ b/drivers/net/ethernet/atheros/alx/Makefile

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ALX) += alx.o
 alx-objs := main.o ethtool.o hw.o

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 78c5de4..9d0e74f 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h

@@ -140,6 +140,5 @@
 };
 
 extern const struct ethtool_ops alx_ethtool_ops;
-extern const char alx_drv_name[];
 
 #endif

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 6d32211..d4bbcdf 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c

@@ -49,7 +49,7 @@
 #include "hw.h"
 #include "reg.h"
 
-const char alx_drv_name[] = "alx";
+static const char alx_drv_name[] = "alx";
 
 static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
@@ -660,10 +660,9 @@
 			    alx->num_txq +
 			    sizeof(struct alx_rrd) * alx->rx_ringsz +
 			    sizeof(struct alx_rfd) * alx->rx_ringsz;
-	alx->descmem.virt = dma_zalloc_coherent(&alx->hw.pdev->dev,
-						alx->descmem.size,
-						&alx->descmem.dma,
-						GFP_KERNEL);
+	alx->descmem.virt = dma_alloc_coherent(&alx->hw.pdev->dev,
+					       alx->descmem.size,
+					       &alx->descmem.dma, GFP_KERNEL);
 	if (!alx->descmem.virt)
 		return -ENOMEM;
 
@@ -1466,9 +1465,7 @@
 	tpd->len = cpu_to_le16(maplen);
 
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
-		struct skb_frag_struct *frag;
-
-		frag = &skb_shinfo(skb)->frags[f];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
 		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
@@ -1880,8 +1877,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int alx_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct alx_priv *alx = pci_get_drvdata(pdev);
+	struct alx_priv *alx = dev_get_drvdata(dev);
 
 	if (!netif_running(alx->dev))
 		return 0;
@@ -1892,8 +1888,7 @@
 
 static int alx_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct alx_priv *alx = pci_get_drvdata(pdev);
+	struct alx_priv *alx = dev_get_drvdata(dev);
 	struct alx_hw *hw = &alx->hw;
 	int err;
 
@@ -1964,8 +1959,6 @@
 	if (!alx_reset_mac(hw))
 		rc = PCI_ERS_RESULT_RECOVERED;
 out:
-	pci_cleanup_aer_uncorrect_error_status(pdev);
-
 	rtnl_unlock();
 
 	return rc;

diff --git a/drivers/net/ethernet/atheros/atl1c/Makefile b/drivers/net/ethernet/atheros/atl1c/Makefile
index c37d966..02d0250 100644
--- a/drivers/net/ethernet/atheros/atl1c/Makefile
+++ b/drivers/net/ethernet/atheros/atl1c/Makefile

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ATL1C) += atl1c.o
 atl1c-objs := atl1c_main.o atl1c_hw.o atl1c_ethtool.o

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index c46b489..60b2feb 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h

@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright(c) 2008 - 2009 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef _ATL1C_H_

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
index 28e9ae1..b5a70a3 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c

@@ -1,23 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2009 - 2009 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
index 73efdb0..140358d 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/pci.h>
 #include <linux/delay.h>

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
index 21d8c4d..ce1a123 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h

@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright(c) 2008 - 2009 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef _ATL1C_HW_H_

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 7087b88..2b239ec 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2008 - 2009 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #include "atl1c.h"
@@ -1019,8 +1006,8 @@
 		sizeof(struct atl1c_recv_ret_status) * rx_desc_count +
 		8 * 4;
 
-	ring_header->desc = dma_zalloc_coherent(&pdev->dev, ring_header->size,
-						&ring_header->dma, GFP_KERNEL);
+	ring_header->desc = dma_alloc_coherent(&pdev->dev, ring_header->size,
+					       &ring_header->dma, GFP_KERNEL);
 	if (unlikely(!ring_header->desc)) {
 		dev_err(&pdev->dev, "could not get memory for DMA buffer\n");
 		goto err_nomem;
@@ -1833,10 +1820,10 @@
 		atl1c_clean_rrd(rrd_ring, rrs, rfd_num);
 		if (rrs->word3 & (RRS_RX_ERR_SUM | RRS_802_3_LEN_ERR)) {
 			atl1c_clean_rfd(rfd_ring, rrs, rfd_num);
-				if (netif_msg_rx_err(adapter))
-					dev_warn(&pdev->dev,
-						"wrong packet! rrs word3 is %x\n",
-						rrs->word3);
+			if (netif_msg_rx_err(adapter))
+				dev_warn(&pdev->dev,
+					 "wrong packet! rrs word3 is %x\n",
+					 rrs->word3);
 			continue;
 		}
 
@@ -2163,9 +2150,7 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		struct skb_frag_struct *frag;
-
-		frag = &skb_shinfo(skb)->frags[f];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
 		use_tpd = atl1c_get_tpd(adapter, type);
 		memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc));
@@ -2214,7 +2199,7 @@
 					  struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
-	u16 tpd_req = 1;
+	u16 tpd_req;
 	struct atl1c_tpd_desc *tpd;
 	enum atl1c_trans_queue type = atl1c_trans_normal;
 
@@ -2435,8 +2420,7 @@
 
 static int atl1c_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
 	u32 wufc = adapter->wol;
@@ -2450,7 +2434,7 @@
 
 	if (wufc)
 		if (atl1c_phy_to_ps_link(hw) != 0)
-			dev_dbg(&pdev->dev, "phy power saving failed");
+			dev_dbg(dev, "phy power saving failed");
 
 	atl1c_power_saving(hw, wufc);
 
@@ -2460,8 +2444,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int atl1c_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 
 	AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);

diff --git a/drivers/net/ethernet/atheros/atl1e/Makefile b/drivers/net/ethernet/atheros/atl1e/Makefile
index bc11be8..8506694 100644
--- a/drivers/net/ethernet/atheros/atl1e/Makefile
+++ b/drivers/net/ethernet/atheros/atl1e/Makefile

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ATL1E)	+= atl1e.o
 atl1e-objs		+= atl1e_main.o atl1e_hw.o atl1e_ethtool.o atl1e_param.o

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e.h b/drivers/net/ethernet/atheros/atl1e/atl1e.h
index 632bb84..e9893da 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e.h
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e.h

@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  * Copyright(c) 2007 xiong huang <xiong.huang@atheros.com>
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef _ATL1E_H_

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
index 282ebdd..c6b9e7e 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c

@@ -1,23 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.c b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.c
index 113565d..fa89bc2 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/pci.h>
 #include <linux/delay.h>

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h
index 88a6271..3193f7d 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_hw.h

@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef _ATHL1E_HW_H_

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 9dc6da0..4f7b658 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #include "atl1e.h"
@@ -473,7 +460,9 @@
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 
-	atl1e_write_phy_reg(&adapter->hw, reg_num & MDIO_REG_ADDR_MASK, val);
+	if (atl1e_write_phy_reg(&adapter->hw,
+				reg_num & MDIO_REG_ADDR_MASK, val))
+		netdev_err(netdev, "write phy register failed\n");
 }
 
 static int atl1e_mii_ioctl(struct net_device *netdev,
@@ -1257,7 +1246,7 @@
 		}
 
 		if (tx_buffer->skb) {
-			dev_kfree_skb_irq(tx_buffer->skb);
+			dev_consume_skb_irq(tx_buffer->skb);
 			tx_buffer->skb = NULL;
 		}
 
@@ -1781,11 +1770,10 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 		u16 i;
 		u16 seg_num;
 
-		frag = &skb_shinfo(skb)->frags[f];
 		buf_len = skb_frag_size(frag);
 
 		seg_num = (buf_len + MAX_TX_BUF_LEN - 1) / MAX_TX_BUF_LEN;

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_param.c b/drivers/net/ethernet/atheros/atl1e/atl1e_param.c
index fa31428..6b1d6df 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_param.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_param.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/atheros/atlx/Makefile b/drivers/net/ethernet/atheros/atlx/Makefile
index e4f6022..df030e4 100644
--- a/drivers/net/ethernet/atheros/atlx/Makefile
+++ b/drivers/net/ethernet/atheros/atlx/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ATL1)	+= atl1.o
 obj-$(CONFIG_ATL2)	+= atl2.o
 

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index b81fbf1..b498fd6 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
@@ -6,23 +7,6 @@
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- *
  * Contact Information:
  * Xiong Huang <xiong.huang@atheros.com>
  * Jie Yang <jie.yang@atheros.com>
@@ -63,7 +47,6 @@
 #include <linux/jiffies.h>
 #include <linux/mii.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
@@ -1077,8 +1060,6 @@
 		goto err_nomem;
 	}
 
-	memset(ring_header->desc, 0, ring_header->size);
-
 	/* init TPD ring */
 	tpd_ring->dma = ring_header->dma;
 	offset = (tpd_ring->dma & 0x7) ? (8 - (ring_header->dma & 0x7)) : 0;
@@ -1722,7 +1703,7 @@
 	adapter->soft_stats.scc += smb->tx_1_col;
 	adapter->soft_stats.mcc += smb->tx_2_col;
 	adapter->soft_stats.latecol += smb->tx_late_col;
-	adapter->soft_stats.tx_underun += smb->tx_underrun;
+	adapter->soft_stats.tx_underrun += smb->tx_underrun;
 	adapter->soft_stats.tx_trunc += smb->tx_trunc;
 	adapter->soft_stats.tx_pause += smb->tx_pause;
 
@@ -2089,7 +2070,7 @@
 		}
 
 		if (buffer_info->skb) {
-			dev_kfree_skb_irq(buffer_info->skb);
+			dev_consume_skb_irq(buffer_info->skb);
 			buffer_info->skb = NULL;
 		}
 
@@ -2275,10 +2256,9 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 		u16 i, nseg;
 
-		frag = &skb_shinfo(skb)->frags[f];
 		buf_len = skb_frag_size(frag);
 
 		nseg = (buf_len + ATL1_MAX_TX_BUF_LEN - 1) /
@@ -2440,7 +2420,6 @@
 	atl1_tx_map(adapter, skb, ptpd);
 	atl1_tx_queue(adapter, count, ptpd);
 	atl1_update_mailbox(adapter);
-	mmiowb();
 	return NETDEV_TX_OK;
 }
 
@@ -2774,8 +2753,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int atl1_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 	struct atl1_hw *hw = &adapter->hw;
 	u32 ctrl = 0;
@@ -2800,7 +2778,7 @@
 		val = atl1_get_speed_and_duplex(hw, &speed, &duplex);
 		if (val) {
 			if (netif_msg_ifdown(adapter))
-				dev_printk(KERN_DEBUG, &pdev->dev,
+				dev_printk(KERN_DEBUG, dev,
 					"error getting speed/duplex\n");
 			goto disable_wol;
 		}
@@ -2857,8 +2835,7 @@
 
 static int atl1_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 
 	iowrite32(0, adapter->hw.hw_addr + REG_WOL_CTRL);
@@ -3180,7 +3157,7 @@
 	{"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
 	{"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
 	{"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
-	{"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+	{"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
 	{"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
 	{"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
 	{"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
@@ -3278,7 +3255,6 @@
 	u16 phy_data;
 	int ret_val = 0;
 	u16 old_media_type = hw->media_type;
-	u32 advertising;
 
 	if (netif_running(adapter->netdev)) {
 		if (netif_msg_link(adapter))
@@ -3312,25 +3288,7 @@
 				hw->media_type = MEDIA_TYPE_10M_HALF;
 		}
 	}
-	switch (hw->media_type) {
-	case MEDIA_TYPE_AUTO_SENSOR:
-		advertising =
-		    ADVERTISED_10baseT_Half |
-		    ADVERTISED_10baseT_Full |
-		    ADVERTISED_100baseT_Half |
-		    ADVERTISED_100baseT_Full |
-		    ADVERTISED_1000baseT_Full |
-		    ADVERTISED_Autoneg | ADVERTISED_TP;
-		break;
-	case MEDIA_TYPE_1000M_FULL:
-		advertising =
-		    ADVERTISED_1000baseT_Full |
-		    ADVERTISED_Autoneg | ADVERTISED_TP;
-		break;
-	default:
-		advertising = 0;
-		break;
-	}
+
 	if (atl1_phy_setup_autoneg_adv(hw)) {
 		ret_val = -EINVAL;
 		if (netif_msg_link(adapter))

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h
index 34a58cd..6b264c5 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.h
+++ b/drivers/net/ethernet/atheros/atlx/atl1.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
@@ -5,20 +6,6 @@
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef ATL1_H
@@ -681,7 +668,7 @@
 	u64 scc;		/* packets TX after a single collision */
 	u64 mcc;		/* packets TX after multiple collisions */
 	u64 latecol;		/* TX packets w/ late collisions */
-	u64 tx_underun;		/* TX packets aborted due to TX FIFO underrun
+	u64 tx_underrun;	/* TX packets aborted due to TX FIFO underrun
 				 * or TRD FIFO underrun */
 	u64 tx_trunc;		/* TX packets truncated due to size > MTU */
 	u64 rx_pause;		/* num Pause packets received. */

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index bb41bec..3aba383 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c

@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright(c) 2006 - 2007 Atheros Corporation. All rights reserved.
  * Copyright(c) 2007 - 2008 Chris Snook <csnook@redhat.com>
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #include <linux/atomic.h>
@@ -304,7 +291,6 @@
 		&adapter->ring_dma);
 	if (!adapter->ring_vir_addr)
 		return -ENOMEM;
-	memset(adapter->ring_vir_addr, 0, adapter->ring_size);
 
 	/* Init TXD Ring */
 	adapter->txd_dma = adapter->ring_dma ;
@@ -553,7 +539,7 @@
 			netdev->stats.tx_aborted_errors++;
 		if (txs->late_col)
 			netdev->stats.tx_window_errors++;
-		if (txs->underun)
+		if (txs->underrun)
 			netdev->stats.tx_fifo_errors++;
 	} while (1);
 
@@ -908,8 +894,7 @@
 	ATL2_WRITE_REGW(&adapter->hw, REG_MB_TXD_WR_IDX,
 		(adapter->txd_write_ptr >> 2));
 
-	mmiowb();
-	dev_kfree_skb_any(skb);
+	dev_consume_skb_any(skb);
 	return NETDEV_TX_OK;
 }
 
@@ -1335,13 +1320,11 @@
 {
 	struct net_device *netdev;
 	struct atl2_adapter *adapter;
-	static int cards_found;
+	static int cards_found = 0;
 	unsigned long mmio_start;
 	int mmio_len;
 	int err;
 
-	cards_found = 0;
-
 	err = pci_enable_device(pdev);
 	if (err)
 		return err;
@@ -2946,7 +2929,7 @@
 			if (*value == ent->i) {
 				if (ent->str[0] != '\0')
 					printk(KERN_INFO "%s\n", ent->str);
-			return 0;
+				return 0;
 			}
 		}
 		break;

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h
index c64a6bd..d97613b 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* atl2.h -- atl2 driver definitions
  *
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
@@ -6,20 +7,6 @@
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef _ATL2_H_
@@ -260,7 +247,7 @@
 	unsigned multi_col:1;
 	unsigned late_col:1;
 	unsigned abort_col:1;
-	unsigned underun:1;	/* current packet is aborted
+	unsigned underrun:1;	/* current packet is aborted
 				 * due to txram underrun */
 	unsigned:3;		/* reserved */
 	unsigned update:1;	/* always 1'b1 in tx_status_buf */

diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c
index 46a622c..505a22c 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.c
+++ b/drivers/net/ethernet/atheros/atlx/atlx.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* atlx.c -- common functions for Attansic network drivers
  *
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
@@ -7,20 +8,6 @@
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 /* Including this file like a header is a temporary hack, I promise. -- CHS */

diff --git a/drivers/net/ethernet/atheros/atlx/atlx.h b/drivers/net/ethernet/atheros/atlx/atlx.h
index 448f5dc..7f5d4e2 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.h
+++ b/drivers/net/ethernet/atheros/atlx/atlx.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* atlx_hw.h -- common hardware definitions for Attansic network drivers
  *
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
@@ -7,20 +8,6 @@
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
 #ifndef ATLX_H

diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
index 392f564..9ee30ea 100644
--- a/drivers/net/ethernet/aurora/Kconfig
+++ b/drivers/net/ethernet/aurora/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_VENDOR_AURORA
 	bool "Aurora VLSI devices"
 	default y

diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
index 6cb528a..f3d5998 100644
--- a/drivers/net/ethernet/aurora/Makefile
+++ b/drivers/net/ethernet/aurora/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_AURORA_NB8800) += nb8800.o

diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index c8d1f8f..37752d9 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c

@@ -1,23 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
  *
  * Mostly rewritten, based on driver from Sigma Designs.  Original
  * copyright notice below.
  *
- *
  * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
  *
  * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -404,6 +394,7 @@
 	unsigned int dma_len;
 	unsigned int align;
 	unsigned int next;
+	bool xmit_more;
 
 	if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
 		netif_stop_queue(dev);
@@ -423,9 +414,10 @@
 		return NETDEV_TX_OK;
 	}
 
+	xmit_more = netdev_xmit_more();
 	if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
 		netif_stop_queue(dev);
-		skb->xmit_more = 0;
+		xmit_more = false;
 	}
 
 	next = priv->tx_next;
@@ -450,7 +442,7 @@
 	desc->n_addr = priv->tx_bufs[next].dma_desc;
 	desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
 
-	if (!skb->xmit_more)
+	if (!xmit_more)
 		desc->config |= DESC_EOC;
 
 	txb->skb = skb;
@@ -468,7 +460,7 @@
 
 	priv->tx_next = next;
 
-	if (!skb->xmit_more) {
+	if (!xmit_more) {
 		smp_wmb();
 		priv->tx_chain->ready = true;
 		priv->tx_chain = NULL;
@@ -935,18 +927,11 @@
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
-	u32 adv = 0;
 
 	if (!phydev)
 		return;
 
-	if (priv->pause_rx)
-		adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (priv->pause_tx)
-		adv ^= ADVERTISED_Asym_Pause;
-
-	phydev->supported |= adv;
-	phydev->advertising |= adv;
+	phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx);
 }
 
 static int nb8800_open(struct net_device *dev)
@@ -1366,10 +1351,8 @@
 		ops = match->data;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
-		dev_err(&pdev->dev, "No IRQ\n");
+	if (irq <= 0)
 		return -EINVAL;
-	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
@@ -1468,7 +1451,7 @@
 	dev->irq = irq;
 
 	mac = of_get_mac_address(pdev->dev.of_node);
-	if (mac)
+	if (!IS_ERR(mac))
 		ether_addr_copy(dev->dev_addr, mac);
 
 	if (!is_valid_ether_addr(dev->dev_addr))

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index c1d3ee9..53055ce 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Broadcom device configuration
 #
@@ -12,9 +13,9 @@
 	  say Y.
 
 	  Note that the answer to this question does not directly affect
-	  the kernel: saying N will just case the configurator to skip all
-	  the questions regarding AMD chipsets. If you say Y, you will be asked
-	  for your specific chipset/driver in the following questions.
+	  the kernel: saying N will just cause the configurator to skip all
+	  the questions regarding Broadcom chipsets. If you say Y, you will
+	  be asked for your specific chipset/driver in the following questions.
 
 if NET_VENDOR_BROADCOM
 
@@ -67,6 +68,7 @@
 	select FIXED_PHY
 	select BCM7XXX_PHY
 	select MDIO_BCM_UNIMAC
+	select DIMLIB
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset.
@@ -186,6 +188,7 @@
 	select MII
 	select PHYLIB
 	select FIXED_PHY
+	select DIMLIB
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset using an internal
@@ -194,9 +197,11 @@
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
-	depends on MAY_USE_DEVLINK
 	select FW_LOADER
 	select LIBCRC32C
+	select NET_DEVLINK
+	select PAGE_POOL
+	select DIMLIB
 	---help---
 	  This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
 	  Ethernet cards.  To compile this driver as a module, choose M here:

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index e445ab7..97ab0dd 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c

@@ -638,7 +638,7 @@
 		bytes_compl += skb->len;
 		pkts_compl++;
 
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 
 	netdev_completed_queue(bp->dev, pkts_compl, bytes_compl);
@@ -1012,7 +1012,7 @@
 		}
 
 		skb_copy_from_linear_data(skb, skb_put(bounce_skb, len), len);
-		dev_kfree_skb_any(skb);
+		dev_consume_skb_any(skb);
 		skb = bounce_skb;
 	}
 
@@ -2248,6 +2248,7 @@
 
 static int b44_register_phy_one(struct b44 *bp)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 	struct mii_bus *mii_bus;
 	struct ssb_device *sdev = bp->sdev;
 	struct phy_device *phydev;
@@ -2303,11 +2304,12 @@
 	}
 
 	/* mask with MAC supported features */
-	phydev->supported &= (SUPPORTED_100baseT_Half |
-			      SUPPORTED_100baseT_Full |
-			      SUPPORTED_Autoneg |
-			      SUPPORTED_MII);
-	phydev->advertising = phydev->supported;
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
+	linkmode_and(phydev->supported, phydev->supported, mask);
+	linkmode_copy(phydev->advertising, phydev->supported);
 
 	bp->old_link = 0;
 	bp->phy_addr = phydev->mdio.addr;

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 897302a..620cd3f 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Driver for BCM963xx builtin Ethernet mac
  *
  * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -568,12 +555,13 @@
 /*
  * tx request callback
  */
-static int bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bcm_enet_priv *priv;
 	struct bcm_enet_desc *desc;
 	u32 len_stat;
-	int ret;
+	netdev_tx_t ret;
 
 	priv = netdev_priv(dev);
 
@@ -890,19 +878,10 @@
 		}
 
 		/* mask with MAC supported features */
-		phydev->supported &= (SUPPORTED_10baseT_Half |
-				      SUPPORTED_10baseT_Full |
-				      SUPPORTED_100baseT_Half |
-				      SUPPORTED_100baseT_Full |
-				      SUPPORTED_Autoneg |
-				      SUPPORTED_Pause |
-				      SUPPORTED_MII);
-		phydev->advertising = phydev->supported;
-
-		if (priv->pause_auto && priv->pause_rx && priv->pause_tx)
-			phydev->advertising |= SUPPORTED_Pause;
-		else
-			phydev->advertising &= ~SUPPORTED_Pause;
+		phy_support_sym_pause(phydev);
+		phy_set_max_speed(phydev, SPEED_100);
+		phy_set_sym_pause(phydev, priv->pause_rx, priv->pause_rx,
+				  priv->pause_auto);
 
 		phy_attached_info(phydev);
 
@@ -944,7 +923,7 @@
 
 	/* allocate rx dma ring */
 	size = priv->rx_ring_size * sizeof(struct bcm_enet_desc);
-	p = dma_zalloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL);
+	p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL);
 	if (!p) {
 		ret = -ENOMEM;
 		goto out_freeirq_tx;
@@ -955,7 +934,7 @@
 
 	/* allocate tx dma ring */
 	size = priv->tx_ring_size * sizeof(struct bcm_enet_desc);
-	p = dma_zalloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL);
+	p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL);
 	if (!p) {
 		ret = -ENOMEM;
 		goto out_free_rx_ring;
@@ -1714,7 +1693,7 @@
 	struct bcm_enet_priv *priv;
 	struct net_device *dev;
 	struct bcm63xx_enet_platform_data *pd;
-	struct resource *res_mem, *res_irq, *res_irq_rx, *res_irq_tx;
+	struct resource *res_irq, *res_irq_rx, *res_irq_tx;
 	struct mii_bus *bus;
 	int i, ret;
 
@@ -1740,8 +1719,7 @@
 	if (ret)
 		goto out;
 
-	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, res_mem);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto out;
@@ -2128,7 +2106,7 @@
 
 	/* allocate rx dma ring */
 	size = priv->rx_ring_size * sizeof(struct bcm_enet_desc);
-	p = dma_zalloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL);
+	p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL);
 	if (!p) {
 		dev_err(kdev, "cannot allocate rx ring %u\n", size);
 		ret = -ENOMEM;
@@ -2140,7 +2118,7 @@
 
 	/* allocate tx dma ring */
 	size = priv->tx_ring_size * sizeof(struct bcm_enet_desc);
-	p = dma_zalloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL);
+	p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL);
 	if (!p) {
 		dev_err(kdev, "cannot allocate tx ring\n");
 		ret = -ENOMEM;
@@ -2680,7 +2658,6 @@
 	if (!dev)
 		return -ENOMEM;
 	priv = netdev_priv(dev);
-	memset(priv, 0, sizeof(*priv));
 
 	/* initialize default and fetch platform data */
 	priv->enet_is_sw = true;
@@ -2784,15 +2761,13 @@
 /* reserve & remap memory space shared between all macs */
 static int bcm_enet_shared_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	void __iomem *p[3];
 	unsigned int i;
 
 	memset(bcm_enet_shared_base, 0, sizeof(bcm_enet_shared_base));
 
 	for (i = 0; i < 3; i++) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		p[i] = devm_ioremap_resource(&pdev->dev, res);
+		p[i] = devm_platform_ioremap_resource(pdev, i);
 		if (IS_ERR(p[i]))
 			return PTR_ERR(p[i]);
 	}

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 7b6859e..a977a45 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Broadcom BCM7xxx System Port Ethernet MAC driver
  *
  * Copyright (C) 2014 Broadcom Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
@@ -116,24 +113,19 @@
 	writel_relaxed(lower_32_bits(addr), d + DESC_ADDR_LO);
 }
 
-static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
-					     struct dma_desc *desc,
-					     unsigned int port)
-{
-	/* Ports are latched, so write upper address first */
-	tdma_writel(priv, desc->addr_status_len, TDMA_WRITE_PORT_HI(port));
-	tdma_writel(priv, desc->addr_lo, TDMA_WRITE_PORT_LO(port));
-}
-
 /* Ethtool operations */
-static int bcm_sysport_set_rx_csum(struct net_device *dev,
-				   netdev_features_t wanted)
+static void bcm_sysport_set_rx_csum(struct net_device *dev,
+				    netdev_features_t wanted)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	u32 reg;
 
 	priv->rx_chk_en = !!(wanted & NETIF_F_RXCSUM);
 	reg = rxchk_readl(priv, RXCHK_CONTROL);
+	/* Clear L2 header checks, which would prevent BPDUs
+	 * from being received.
+	 */
+	reg &= ~RXCHK_L2_HDR_DIS;
 	if (priv->rx_chk_en)
 		reg |= RXCHK_EN;
 	else
@@ -157,12 +149,10 @@
 		reg &= ~RXCHK_BRCM_TAG_EN;
 
 	rxchk_writel(priv, reg, RXCHK_CONTROL);
-
-	return 0;
 }
 
-static int bcm_sysport_set_tx_csum(struct net_device *dev,
-				   netdev_features_t wanted)
+static void bcm_sysport_set_tx_csum(struct net_device *dev,
+				    netdev_features_t wanted)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	u32 reg;
@@ -177,23 +167,24 @@
 	else
 		reg &= ~tdma_control_bit(priv, TSB_EN);
 	tdma_writel(priv, reg, TDMA_CONTROL);
-
-	return 0;
 }
 
 static int bcm_sysport_set_features(struct net_device *dev,
 				    netdev_features_t features)
 {
-	netdev_features_t changed = features ^ dev->features;
-	netdev_features_t wanted = dev->wanted_features;
-	int ret = 0;
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
 
-	if (changed & NETIF_F_RXCSUM)
-		ret = bcm_sysport_set_rx_csum(dev, wanted);
-	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
-		ret = bcm_sysport_set_tx_csum(dev, wanted);
+	/* Read CRC forward */
+	if (!priv->is_lite)
+		priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD);
+	else
+		priv->crc_fwd = !((gib_readl(priv, GIB_CONTROL) &
+				  GIB_FCS_STRIP) >> GIB_FCS_STRIP_SHIFT);
 
-	return ret;
+	bcm_sysport_set_rx_csum(dev, features);
+	bcm_sysport_set_tx_csum(dev, features);
+
+	return 0;
 }
 
 /* Hardware counters must be kept in sync because the order/offset
@@ -285,6 +276,8 @@
 	STAT_MIB_SOFT("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
 	STAT_MIB_SOFT("rx_dma_failed", mib.rx_dma_failed),
 	STAT_MIB_SOFT("tx_dma_failed", mib.tx_dma_failed),
+	STAT_MIB_SOFT("tx_realloc_tsb", mib.tx_realloc_tsb),
+	STAT_MIB_SOFT("tx_realloc_tsb_failed", mib.tx_realloc_tsb_failed),
 	/* Per TX-queue statistics are dynamically appended */
 };
 
@@ -519,7 +512,6 @@
 				struct ethtool_wolinfo *wol)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	u32 reg;
 
 	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
 	wol->wolopts = priv->wolopts;
@@ -527,11 +519,7 @@
 	if (!(priv->wolopts & WAKE_MAGICSECURE))
 		return;
 
-	/* Return the programmed SecureOn password */
-	reg = umac_readl(priv, UMAC_PSW_MS);
-	put_unaligned_be16(reg, &wol->sopass[0]);
-	reg = umac_readl(priv, UMAC_PSW_LS);
-	put_unaligned_be32(reg, &wol->sopass[2]);
+	memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
 }
 
 static int bcm_sysport_set_wol(struct net_device *dev,
@@ -547,13 +535,8 @@
 	if (wol->wolopts & ~supported)
 		return -EINVAL;
 
-	/* Program the SecureOn password */
-	if (wol->wolopts & WAKE_MAGICSECURE) {
-		umac_writel(priv, get_unaligned_be16(&wol->sopass[0]),
-			    UMAC_PSW_MS);
-		umac_writel(priv, get_unaligned_be32(&wol->sopass[2]),
-			    UMAC_PSW_LS);
-	}
+	if (wol->wolopts & WAKE_MAGICSECURE)
+		memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass));
 
 	/* Flag the device and relevant IRQ as wakeup capable */
 	if (wol->wolopts) {
@@ -626,7 +609,7 @@
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 	unsigned int i;
 
@@ -725,8 +708,7 @@
 	for (i = 0; i < priv->num_rx_bds; i++) {
 		cb = &priv->rx_cbs[i];
 		skb = bcm_sysport_rx_refill(priv, cb);
-		if (skb)
-			dev_kfree_skb(skb);
+		dev_kfree_skb(skb);
 		if (!cb->skb)
 			return -ENOMEM;
 	}
@@ -1009,7 +991,7 @@
 {
 	struct bcm_sysport_priv *priv =
 		container_of(napi, struct bcm_sysport_priv, napi);
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample = {};
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_desc_rx(priv, budget);
@@ -1033,8 +1015,8 @@
 	}
 
 	if (priv->dim.use_dim) {
-		net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
-			       priv->dim.bytes, &dim_sample);
+		dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
+				  priv->dim.bytes, &dim_sample);
 		net_dim(&priv->dim.dim, dim_sample);
 	}
 
@@ -1067,6 +1049,7 @@
 
 static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 {
+	unsigned int index;
 	u32 reg;
 
 	/* Disable RXCHK, active filters and Broadcom tag matching */
@@ -1075,6 +1058,15 @@
 		 RXCHK_BRCM_TAG_MATCH_SHIFT | RXCHK_EN | RXCHK_BRCM_TAG_EN);
 	rxchk_writel(priv, reg, RXCHK_CONTROL);
 
+	/* Make sure we restore correct CID index in case HW lost
+	 * its context during deep idle state
+	 */
+	for_each_set_bit(index, priv->filters, RXCHK_BRCM_TAG_MAX) {
+		rxchk_writel(priv, priv->filters_loc[index] <<
+			     RXCHK_BRCM_TAG_CID_SHIFT, RXCHK_BRCM_TAG(index));
+		rxchk_writel(priv, 0xff00ffff, RXCHK_BRCM_TAG_MASK(index));
+	}
+
 	/* Clear the MagicPacket detection logic */
 	mpd_enable_set(priv, false);
 
@@ -1094,16 +1086,16 @@
 
 static void bcm_sysport_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bcm_sysport_net_dim *ndim =
 			container_of(dim, struct bcm_sysport_net_dim, dim);
 	struct bcm_sysport_priv *priv =
 			container_of(ndim, struct bcm_sysport_priv, dim);
-	struct net_dim_cq_moder cur_profile =
-			net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+	struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode,
+								    dim->profile_ix);
 
 	bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 /* RX and misc interrupt routine */
@@ -1218,6 +1210,7 @@
 static struct sk_buff *bcm_sysport_insert_tsb(struct sk_buff *skb,
 					      struct net_device *dev)
 {
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	struct sk_buff *nskb;
 	struct bcm_tsb *tsb;
 	u32 csum_info;
@@ -1228,13 +1221,16 @@
 	/* Re-allocate SKB if needed */
 	if (unlikely(skb_headroom(skb) < sizeof(*tsb))) {
 		nskb = skb_realloc_headroom(skb, sizeof(*tsb));
-		dev_kfree_skb(skb);
 		if (!nskb) {
+			dev_kfree_skb_any(skb);
+			priv->mib.tx_realloc_tsb_failed++;
 			dev->stats.tx_errors++;
 			dev->stats.tx_dropped++;
 			return NULL;
 		}
+		dev_consume_skb_any(skb);
 		skb = nskb;
+		priv->mib.tx_realloc_tsb++;
 	}
 
 	tsb = skb_push(skb, sizeof(*tsb));
@@ -1282,11 +1278,10 @@
 	struct bcm_sysport_tx_ring *ring;
 	struct bcm_sysport_cb *cb;
 	struct netdev_queue *txq;
-	struct dma_desc *desc;
+	u32 len_status, addr_lo;
 	unsigned int skb_len;
 	unsigned long flags;
 	dma_addr_t mapping;
-	u32 len_status;
 	u16 queue;
 	int ret;
 
@@ -1329,10 +1324,7 @@
 	dma_unmap_addr_set(cb, dma_addr, mapping);
 	dma_unmap_len_set(cb, dma_len, skb_len);
 
-	/* Fetch a descriptor entry from our pool */
-	desc = ring->desc_cpu;
-
-	desc->addr_lo = lower_32_bits(mapping);
+	addr_lo = lower_32_bits(mapping);
 	len_status = upper_32_bits(mapping) & DESC_ADDR_HI_MASK;
 	len_status |= (skb_len << DESC_LEN_SHIFT);
 	len_status |= (DESC_SOP | DESC_EOP | TX_STATUS_APP_CRC) <<
@@ -1345,16 +1337,9 @@
 		ring->curr_desc = 0;
 	ring->desc_count--;
 
-	/* Ensure write completion of the descriptor status/length
-	 * in DRAM before the System Port WRITE_PORT register latches
-	 * the value
-	 */
-	wmb();
-	desc->addr_status_len = len_status;
-	wmb();
-
-	/* Write this descriptor address to the RING write port */
-	tdma_port_write_desc_addr(priv, desc, ring->index);
+	/* Ports are latched, so write upper address first */
+	tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index));
+	tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index));
 
 	/* Check ring space and update SW control flow */
 	if (ring->desc_count == 0)
@@ -1451,7 +1436,7 @@
 	struct bcm_sysport_net_dim *dim = &priv->dim;
 
 	INIT_WORK(&dim->dim.work, cb);
-	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
 	dim->packets = 0;
 	dim->bytes = 0;
@@ -1460,7 +1445,7 @@
 static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
 {
 	struct bcm_sysport_net_dim *dim = &priv->dim;
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	usecs = priv->rx_coalesce_usecs;
@@ -1480,28 +1465,14 @@
 				    unsigned int index)
 {
 	struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
-	struct device *kdev = &priv->pdev->dev;
 	size_t size;
-	void *p;
 	u32 reg;
 
 	/* Simple descriptors partitioning for now */
 	size = 256;
 
-	/* We just need one DMA descriptor which is DMA-able, since writing to
-	 * the port will allocate a new descriptor in its internal linked-list
-	 */
-	p = dma_zalloc_coherent(kdev, sizeof(struct dma_desc), &ring->desc_dma,
-				GFP_KERNEL);
-	if (!p) {
-		netif_err(priv, hw, priv->netdev, "DMA alloc failed\n");
-		return -ENOMEM;
-	}
-
 	ring->cbs = kcalloc(size, sizeof(struct bcm_sysport_cb), GFP_KERNEL);
 	if (!ring->cbs) {
-		dma_free_coherent(kdev, sizeof(struct dma_desc),
-				  ring->desc_cpu, ring->desc_dma);
 		netif_err(priv, hw, priv->netdev, "CB allocation failed\n");
 		return -ENOMEM;
 	}
@@ -1514,7 +1485,6 @@
 	ring->size = size;
 	ring->clean_index = 0;
 	ring->alloc_size = ring->size;
-	ring->desc_cpu = p;
 	ring->desc_count = ring->size;
 	ring->curr_desc = 0;
 
@@ -1569,8 +1539,8 @@
 	napi_enable(&ring->napi);
 
 	netif_dbg(priv, hw, priv->netdev,
-		  "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n",
-		  ring->size, ring->desc_cpu, ring->switch_queue,
+		  "TDMA cfg, size=%d, switch q=%d,port=%d\n",
+		  ring->size, ring->switch_queue,
 		  ring->switch_port);
 
 	return 0;
@@ -1580,7 +1550,6 @@
 				     unsigned int index)
 {
 	struct bcm_sysport_tx_ring *ring = &priv->tx_rings[index];
-	struct device *kdev = &priv->pdev->dev;
 	u32 reg;
 
 	/* Caller should stop the TDMA engine */
@@ -1602,12 +1571,6 @@
 
 	kfree(ring->cbs);
 	ring->cbs = NULL;
-
-	if (ring->desc_dma) {
-		dma_free_coherent(kdev, sizeof(struct dma_desc),
-				  ring->desc_cpu, ring->desc_dma);
-		ring->desc_dma = 0;
-	}
 	ring->size = 0;
 	ring->alloc_size = 0;
 
@@ -1967,16 +1930,14 @@
 	else
 		gib_set_pad_extension(priv);
 
+	/* Apply features again in case we changed them while interface was
+	 * down
+	 */
+	bcm_sysport_set_features(dev, dev->features);
+
 	/* Set MAC address */
 	umac_set_hw_addr(priv, dev->dev_addr);
 
-	/* Read CRC forward */
-	if (!priv->is_lite)
-		priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD);
-	else
-		priv->crc_fwd = !((gib_readl(priv, GIB_CONTROL) &
-				  GIB_FCS_STRIP) >> GIB_FCS_STRIP_SHIFT);
-
 	phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link,
 				0, priv->phy_interface);
 	if (!phydev) {
@@ -2186,6 +2147,7 @@
 	rxchk_writel(priv, reg, RXCHK_BRCM_TAG(index));
 	rxchk_writel(priv, 0xff00ffff, RXCHK_BRCM_TAG_MASK(index));
 
+	priv->filters_loc[index] = nfc->fs.location;
 	set_bit(index, priv->filters);
 
 	return 0;
@@ -2205,6 +2167,7 @@
 	 * be taken care of during suspend time by bcm_sysport_suspend_to_wol
 	 */
 	clear_bit(index, priv->filters);
+	priv->filters_loc[index] = 0;
 
 	return 0;
 }
@@ -2265,8 +2228,7 @@
 };
 
 static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
-				    struct net_device *sb_dev,
-				    select_queue_fallback_t fallback)
+				    struct net_device *sb_dev)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	u16 queue = skb_get_queue_mapping(skb);
@@ -2274,7 +2236,7 @@
 	unsigned int q, port;
 
 	if (!netdev_uses_dsa(dev))
-		return fallback(dev, skb, NULL);
+		return netdev_pick_tx(dev, skb, NULL);
 
 	/* DSA tagging layer will have configured the correct queue */
 	q = BRCM_TAG_GET_QUEUE(queue);
@@ -2282,7 +2244,7 @@
 	tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
 
 	if (unlikely(!tx_ring))
-		return fallback(dev, skb, NULL);
+		return netdev_pick_tx(dev, skb, NULL);
 
 	return tx_ring->index;
 }
@@ -2309,7 +2271,7 @@
 	struct bcm_sysport_priv *priv;
 	struct net_device *slave_dev;
 	unsigned int num_tx_queues;
-	unsigned int q, start, port;
+	unsigned int q, qp, port;
 	struct net_device *dev;
 
 	priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier);
@@ -2348,20 +2310,61 @@
 
 	priv->per_port_num_tx_queues = num_tx_queues;
 
-	start = find_first_zero_bit(&priv->queue_bitmap, dev->num_tx_queues);
-	for (q = 0; q < num_tx_queues; q++) {
-		ring = &priv->tx_rings[q + start];
+	for (q = 0, qp = 0; q < dev->num_tx_queues && qp < num_tx_queues;
+	     q++) {
+		ring = &priv->tx_rings[q];
+
+		if (ring->inspect)
+			continue;
 
 		/* Just remember the mapping actual programming done
 		 * during bcm_sysport_init_tx_ring
 		 */
-		ring->switch_queue = q;
+		ring->switch_queue = qp;
 		ring->switch_port = port;
 		ring->inspect = true;
 		priv->ring_map[q + port * num_tx_queues] = ring;
+		qp++;
+	}
 
-		/* Set all queues as being used now */
-		set_bit(q + start, &priv->queue_bitmap);
+	return 0;
+}
+
+static int bcm_sysport_unmap_queues(struct notifier_block *nb,
+				    struct dsa_notifier_register_info *info)
+{
+	struct bcm_sysport_tx_ring *ring;
+	struct bcm_sysport_priv *priv;
+	struct net_device *slave_dev;
+	unsigned int num_tx_queues;
+	struct net_device *dev;
+	unsigned int q, port;
+
+	priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier);
+	if (priv->netdev != info->master)
+		return 0;
+
+	dev = info->master;
+
+	if (dev->netdev_ops != &bcm_sysport_netdev_ops)
+		return 0;
+
+	port = info->port_number;
+	slave_dev = info->info.dev;
+
+	num_tx_queues = slave_dev->real_num_tx_queues;
+
+	for (q = 0; q < dev->num_tx_queues; q++) {
+		ring = &priv->tx_rings[q];
+
+		if (ring->switch_port != port)
+			continue;
+
+		if (!ring->inspect)
+			continue;
+
+		ring->inspect = false;
+		priv->ring_map[q + port * num_tx_queues] = NULL;
 	}
 
 	return 0;
@@ -2370,14 +2373,18 @@
 static int bcm_sysport_dsa_notifier(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
-	struct dsa_notifier_register_info *info;
+	int ret = NOTIFY_DONE;
 
-	if (event != DSA_PORT_REGISTER)
-		return NOTIFY_DONE;
+	switch (event) {
+	case DSA_PORT_REGISTER:
+		ret = bcm_sysport_map_queues(nb, ptr);
+		break;
+	case DSA_PORT_UNREGISTER:
+		ret = bcm_sysport_unmap_queues(nb, ptr);
+		break;
+	}
 
-	info = ptr;
-
-	return notifier_from_errno(bcm_sysport_map_queues(nb, info));
+	return notifier_from_errno(ret);
 }
 
 #define REV_FMT	"v%2x.%02x"
@@ -2412,12 +2419,10 @@
 	struct device_node *dn;
 	struct net_device *dev;
 	const void *macaddr;
-	struct resource *r;
 	u32 txq, rxq;
 	int ret;
 
 	dn = pdev->dev.of_node;
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	of_id = of_match_node(bcm_sysport_of_match, dn);
 	if (!of_id || !of_id->data)
 		return -EINVAL;
@@ -2465,7 +2470,7 @@
 		goto err_free_netdev;
 	}
 
-	priv->base = devm_ioremap_resource(&pdev->dev, r);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto err_free_netdev;
@@ -2476,7 +2481,7 @@
 
 	priv->phy_interface = of_get_phy_mode(dn);
 	/* Default to GMII interface mode */
-	if (priv->phy_interface < 0)
+	if ((int)priv->phy_interface < 0)
 		priv->phy_interface = PHY_INTERFACE_MODE_GMII;
 
 	/* In the case of a fixed PHY, the DT node associated
@@ -2494,7 +2499,7 @@
 
 	/* Initialize netdevice members */
 	macaddr = of_get_mac_address(dn);
-	if (!macaddr || !is_valid_ether_addr(macaddr)) {
+	if (IS_ERR(macaddr)) {
 		dev_warn(&pdev->dev, "using random Ethernet MAC\n");
 		eth_hw_addr_random(dev);
 	} else {
@@ -2507,9 +2512,10 @@
 	dev->netdev_ops = &bcm_sysport_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64);
 
-	/* HW supported features, none enabled by default */
-	dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_HIGHDMA |
-				NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	dev->features |= NETIF_F_RXCSUM | NETIF_F_HIGHDMA |
+			 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	dev->hw_features |= dev->features;
+	dev->vlan_features |= dev->features;
 
 	/* Request the WOL interrupt and advertise suspend if available */
 	priv->wol_irq_disabled = 1;
@@ -2544,11 +2550,11 @@
 
 	priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
 	dev_info(&pdev->dev,
-		 "Broadcom SYSTEMPORT%s" REV_FMT
-		 " at 0x%p (irqs: %d, %d, TXQs: %d, RXQs: %d)\n",
+		 "Broadcom SYSTEMPORT%s " REV_FMT
+		 " (irqs: %d, %d, TXQs: %d, RXQs: %d)\n",
 		 priv->is_lite ? " Lite" : "",
 		 (priv->rev >> 8) & 0xff, priv->rev & 0xff,
-		 priv->base, priv->irq0, priv->irq1, txq, rxq);
+		 priv->irq0, priv->irq1, txq, rxq);
 
 	return 0;
 
@@ -2588,13 +2594,18 @@
 	unsigned int index, i = 0;
 	u32 reg;
 
-	/* Password has already been programmed */
 	reg = umac_readl(priv, UMAC_MPD_CTRL);
 	if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE))
 		reg |= MPD_EN;
 	reg &= ~PSW_EN;
-	if (priv->wolopts & WAKE_MAGICSECURE)
+	if (priv->wolopts & WAKE_MAGICSECURE) {
+		/* Program the SecureOn password */
+		umac_writel(priv, get_unaligned_be16(&priv->sopass[0]),
+			    UMAC_PSW_MS);
+		umac_writel(priv, get_unaligned_be32(&priv->sopass[2]),
+			    UMAC_PSW_LS);
 		reg |= PSW_EN;
+	}
 	umac_writel(priv, reg, UMAC_MPD_CTRL);
 
 	if (priv->wolopts & WAKE_FILTER) {
@@ -2709,7 +2720,6 @@
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	unsigned int i;
-	u32 reg;
 	int ret;
 
 	if (!netif_running(dev))
@@ -2751,12 +2761,8 @@
 		goto out_free_rx_ring;
 	}
 
-	/* Enable rxhck */
-	if (priv->rx_chk_en) {
-		reg = rxchk_readl(priv, RXCHK_CONTROL);
-		reg |= RXCHK_EN;
-		rxchk_writel(priv, reg, RXCHK_CONTROL);
-	}
+	/* Restore enabled features */
+	bcm_sysport_set_features(dev, dev->features);
 
 	rbuf_init(priv);
 

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 046c6c1..6d80735 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h

@@ -1,19 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Broadcom BCM7xxx System Port Ethernet MAC driver
  *
  * Copyright (C) 2014 Broadcom Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __BCM_SYSPORT_H
 #define __BCM_SYSPORT_H
 
 #include <linux/bitmap.h>
+#include <linux/ethtool.h>
 #include <linux/if_vlan.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 
 /* Receive/transmit descriptor format */
 #define DESC_ADDR_HI_STATUS_LEN	0x00
@@ -515,12 +513,6 @@
 
 #define TDMA_DEBUG			0x64c
 
-/* Transmit/Receive descriptor */
-struct dma_desc {
-	u32	addr_status_len;
-	u32	addr_lo;
-};
-
 /* Number of Receive hardware descriptor words */
 #define SP_NUM_HW_RX_DESC_WORDS		1024
 #define SP_LT_NUM_HW_RX_DESC_WORDS	256
@@ -529,7 +521,7 @@
 #define SP_NUM_TX_DESC			1536
 #define SP_LT_NUM_TX_DESC		256
 
-#define WORDS_PER_DESC			(sizeof(struct dma_desc) / sizeof(u32))
+#define WORDS_PER_DESC			2
 
 /* Rx/Tx common counter group.*/
 struct bcm_sysport_pkt_counters {
@@ -607,6 +599,8 @@
 	u32 alloc_rx_buff_failed;
 	u32 rx_dma_failed;
 	u32 tx_dma_failed;
+	u32 tx_realloc_tsb;
+	u32 tx_realloc_tsb_failed;
 };
 
 /* HW maintains a large list of counters */
@@ -708,14 +702,13 @@
 	u16			event_ctr;
 	unsigned long		packets;
 	unsigned long		bytes;
-	struct net_dim		dim;
+	struct dim		dim;
 };
 
 /* Software view of the TX ring */
 struct bcm_sysport_tx_ring {
 	spinlock_t	lock;		/* Ring lock for tx reclaim/xmit */
 	struct napi_struct napi;	/* NAPI per tx queue */
-	dma_addr_t	desc_dma;	/* DMA cookie */
 	unsigned int	index;		/* Ring index */
 	unsigned int	size;		/* Ring current size */
 	unsigned int	alloc_size;	/* Ring one-time allocated size */
@@ -724,7 +717,6 @@
 	unsigned int	c_index;	/* Last consumer index */
 	unsigned int	clean_index;	/* Current clean index */
 	struct bcm_sysport_cb *cbs;	/* Transmit control blocks */
-	struct dma_desc	*desc_cpu;	/* CPU view of the descriptor */
 	struct bcm_sysport_priv *priv;	/* private context backpointer */
 	unsigned long	packets;	/* packets statistics */
 	unsigned long	bytes;		/* bytes statistics */
@@ -776,6 +768,7 @@
 	unsigned int		crc_fwd:1;
 	u16			rev;
 	u32			wolopts;
+	u8			sopass[SOPASS_MAX];
 	unsigned int		wol_irq_disabled:1;
 
 	/* MIB related fields */
@@ -784,6 +777,7 @@
 	/* Ethtool */
 	u32			msg_enable;
 	DECLARE_BITMAP(filters, RXCHK_BRCM_TAG_MAX);
+	u32			filters_loc[RXCHK_BRCM_TAG_MAX];
 
 	struct bcm_sysport_stats64	stats64;
 
@@ -793,7 +787,6 @@
 	/* map information between switch port queues and local queues */
 	struct notifier_block	dsa_notifier;
 	unsigned int		per_port_num_tx_queues;
-	unsigned long		queue_bitmap;
 	struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8];
 
 };

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 6fe074c..34d1830 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c

@@ -132,7 +132,7 @@
 		mac = of_get_mac_address(bgmac->dev->of_node);
 
 	/* If no MAC address assigned via device tree, check SPROM */
-	if (!mac) {
+	if (IS_ERR_OR_NULL(mac)) {
 		switch (core->core_unit) {
 		case 0:
 			mac = sprom->et0mac;

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 894eda5..c46c1b1 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c

@@ -193,16 +193,14 @@
 	bgmac->dma_dev = &pdev->dev;
 
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(bgmac->net_dev->dev_addr, mac_addr);
 	else
 		dev_warn(&pdev->dev, "MAC address not present in device tree\n");
 
 	bgmac->irq = platform_get_irq(pdev, 0);
-	if (bgmac->irq < 0) {
-		dev_err(&pdev->dev, "Unable to obtain IRQ\n");
+	if (bgmac->irq < 0)
 		return bgmac->irq;
-	}
 
 	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "amac_base");
 	if (!regs) {

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 4c94d92..148734b 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c

@@ -172,7 +172,7 @@
 	flags = 0;
 
 	for (i = 0; i < nr_frags; i++) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		int len = skb_frag_size(frag);
 
 		index = (index + 1) % BGMAC_TX_RING_SLOTS;
@@ -616,7 +616,6 @@
 	static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
 					 BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
 	int size; /* ring size: different for Tx and Rx */
-	int err;
 	int i;
 
 	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
@@ -635,9 +634,9 @@
 
 		/* Alloc ring of descriptors */
 		size = BGMAC_TX_RING_SLOTS * sizeof(struct bgmac_dma_desc);
-		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
-						     &ring->dma_base,
-						     GFP_KERNEL);
+		ring->cpu_base = dma_alloc_coherent(dma_dev, size,
+						    &ring->dma_base,
+						    GFP_KERNEL);
 		if (!ring->cpu_base) {
 			dev_err(bgmac->dev, "Allocation of TX ring 0x%X failed\n",
 				ring->mmio_base);
@@ -660,13 +659,12 @@
 
 		/* Alloc ring of descriptors */
 		size = BGMAC_RX_RING_SLOTS * sizeof(struct bgmac_dma_desc);
-		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
-						     &ring->dma_base,
-						     GFP_KERNEL);
+		ring->cpu_base = dma_alloc_coherent(dma_dev, size,
+						    &ring->dma_base,
+						    GFP_KERNEL);
 		if (!ring->cpu_base) {
 			dev_err(bgmac->dev, "Allocation of RX ring 0x%X failed\n",
 				ring->mmio_base);
-			err = -ENOMEM;
 			goto err_dma_free;
 		}
 
@@ -1448,7 +1446,7 @@
 	struct phy_device *phy_dev;
 	int err;
 
-	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
+	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
 	if (!phy_dev || IS_ERR(phy_dev)) {
 		dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
 		return -ENODEV;

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 122fdb8..fbc196b 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c

@@ -844,8 +844,8 @@
 						 BNX2_SBLK_MSIX_ALIGN_SIZE);
 	bp->status_stats_size = status_blk_size +
 				sizeof(struct statistics_block);
-	status_blk = dma_zalloc_coherent(&bp->pdev->dev, bp->status_stats_size,
-					 &bp->status_blk_mapping, GFP_KERNEL);
+	status_blk = dma_alloc_coherent(&bp->pdev->dev, bp->status_stats_size,
+					&bp->status_blk_mapping, GFP_KERNEL);
 	if (!status_blk)
 		return -ENOMEM;
 
@@ -3305,8 +3305,6 @@
 
 	BNX2_WR(bp, rxr->rx_bseq_addr, rxr->rx_prod_bseq);
 
-	mmiowb();
-
 	return rx_pkt;
 
 }
@@ -6723,8 +6721,6 @@
 	BNX2_WR16(bp, txr->tx_bidx_addr, prod);
 	BNX2_WR(bp, txr->tx_bseq_addr, txr->tx_prod_bseq);
 
-	mmiowb();
-
 	txr->tx_prod = prod;
 
 	if (unlikely(bnx2_tx_avail(bp, txr) <= MAX_SKB_FRAGS)) {
@@ -8677,8 +8673,7 @@
 static int
 bnx2_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnx2 *bp = netdev_priv(dev);
 
 	if (netif_running(dev)) {
@@ -8697,8 +8692,7 @@
 static int
 bnx2_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnx2 *bp = netdev_priv(dev);
 
 	if (!netif_running(dev))
@@ -8793,13 +8787,6 @@
 	if (!(bp->flags & BNX2_FLAG_AER_ENABLED))
 		return result;
 
-	err = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (err) {
-		dev_err(&pdev->dev,
-			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
-			 err); /* non-fatal, continue */
-	}
-
 	return result;
 }
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/Makefile b/drivers/net/ethernet/broadcom/bnx2x/Makefile
index 116762d..9fdfaa2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/Makefile
+++ b/drivers/net/ethernet/broadcom/bnx2x/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for Broadcom 10-Gigabit ethernet driver
 #

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 0de487a..6026b53 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h

@@ -32,7 +32,7 @@
  * (you will need to reboot afterwards) */
 /* #define BNX2X_STOP_ON_ERROR */
 
-#define DRV_MODULE_VERSION      "1.712.30-0"
+#define DRV_MODULE_VERSION      "1.713.36-0"
 #define DRV_MODULE_RELDATE      "2014/02/10"
 #define BNX2X_BC_VER            0x040200
 
@@ -1282,6 +1282,7 @@
 	BNX2X_SP_RTNL_TX_STOP,
 	BNX2X_SP_RTNL_GET_DRV_VERSION,
 	BNX2X_SP_RTNL_CHANGE_UDP_PORT,
+	BNX2X_SP_RTNL_UPDATE_SVID,
 };
 
 enum bnx2x_iov_flag {
@@ -2080,7 +2081,7 @@
 			    bool is_pf);
 
 #define BNX2X_ILT_ZALLOC(x, y, size)					\
-	x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL)
+	x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL)
 
 #define BNX2X_ILT_FREE(x, y, size) \
 	do { \
@@ -2520,6 +2521,7 @@
 void bnx2x_init_ptp(struct bnx2x *bp);
 int bnx2x_configure_ptp_filters(struct bnx2x *bp);
 void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb);
+void bnx2x_register_phc(struct bnx2x *bp);
 
 #define BNX2X_MAX_PHC_DRIFT 31000000
 #define BNX2X_PTP_TX_TIMEOUT

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 5a727d4..d10b421 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

@@ -27,7 +27,6 @@
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <net/ip6_checksum.h>
-#include <net/busy_poll.h>
 #include <linux/prefetch.h>
 #include "bnx2x_cmn.h"
 #include "bnx2x_init.h"
@@ -286,6 +285,9 @@
 	hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
 	sw_cons = txdata->tx_pkt_cons;
 
+	/* Ensure subsequent loads occur after hw_cons */
+	smp_rmb();
+
 	while (sw_cons != hw_cons) {
 		u16 pkt_cons;
 
@@ -685,7 +687,7 @@
 		if (unlikely(gfpflags_allow_blocking(gfp_mask)))
 			return (void *)__get_free_page(gfp_mask);
 
-		return netdev_alloc_frag(fp->rx_frag_size);
+		return napi_alloc_frag(fp->rx_frag_size);
 	}
 
 	return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask);
@@ -1910,8 +1912,7 @@
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev,
-		       select_queue_fallback_t fallback)
+		       struct net_device *sb_dev)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -1933,8 +1934,7 @@
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb, NULL) %
-	       (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
+	return netdev_pick_tx(dev, skb, NULL) % (BNX2X_NUM_ETH_QUEUES(bp));
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
@@ -2843,6 +2843,7 @@
 	bnx2x_set_rx_mode_inner(bp);
 
 	if (bp->flags & PTP_SUPPORTED) {
+		bnx2x_register_phc(bp);
 		bnx2x_init_ptp(bp);
 		bnx2x_configure_ptp_filters(bp);
 	}
@@ -3056,12 +3057,13 @@
 	/* if VF indicate to PF this function is going down (PF will delete sp
 	 * elements and clear initializations
 	 */
-	if (IS_VF(bp))
+	if (IS_VF(bp)) {
+		bnx2x_clear_vlan_info(bp);
 		bnx2x_vfpf_close_vf(bp);
-	else if (unload_mode != UNLOAD_RECOVERY)
+	} else if (unload_mode != UNLOAD_RECOVERY) {
 		/* if this is a normal/close unload need to clean up chip*/
 		bnx2x_chip_cleanup(bp, unload_mode, keep_link);
-	else {
+	} else {
 		/* Send the UNLOAD_REQUEST to the MCP */
 		bnx2x_send_unload_req(bp, unload_mode);
 
@@ -3858,9 +3860,12 @@
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		if (!(bp->flags & TX_TIMESTAMPING_EN)) {
+			bp->eth_stats.ptp_skip_tx_ts++;
 			BNX2X_ERR("Tx timestamping was not enabled, this packet will not be timestamped\n");
 		} else if (bp->ptp_tx_skb) {
-			BNX2X_ERR("The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+			bp->eth_stats.ptp_skip_tx_ts++;
+			netdev_err_once(bp->dev,
+					"Device supports only a single outstanding packet to timestamp, this packet won't be timestamped\n");
 		} else {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			/* schedule check for Tx timestamp */
@@ -4166,8 +4171,6 @@
 
 	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
-	mmiowb();
-
 	txdata->tx_bd_prod += nbd;
 
 	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_DESC_PER_TX_PKT)) {

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 0e508e5..8b08cb1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h

@@ -52,7 +52,7 @@
 
 #define BNX2X_PCI_ALLOC(y, size)					\
 ({									\
-	void *x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \
+	void *x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \
 	if (x)								\
 		DP(NETIF_MSG_HW,					\
 		   "BNX2X_PCI_ALLOC: Physical %Lx Virtual %p\n",	\
@@ -425,6 +425,8 @@
 void bnx2x_disable_close_the_gate(struct bnx2x *bp);
 int bnx2x_init_hw_func_cnic(struct bnx2x *bp);
 
+void bnx2x_clear_vlan_info(struct bnx2x *bp);
+
 /**
  * bnx2x_sp_event - handle ramrods completion.
  *
@@ -494,11 +496,11 @@
 int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
 int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 		      __be16 vlan_proto);
+int bnx2x_set_vf_spoofchk(struct net_device *dev, int idx, bool val);
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev,
-		       select_queue_fallback_t fallback);
+		       struct net_device *sb_dev);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,
@@ -526,8 +528,6 @@
 		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
 			       ((u32 *)&rx_prods)[i]);
 
-	mmiowb(); /* keep prod updates ordered */
-
 	DP(NETIF_MSG_RX_STATUS,
 	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
 	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
@@ -652,7 +652,6 @@
 	REG_WR(bp, igu_addr, cmd_data.sb_id_and_flags);
 
 	/* Make sure that ACK is written */
-	mmiowb();
 	barrier();
 }
 
@@ -673,7 +672,6 @@
 	REG_WR(bp, hc_addr, (*(u32 *)&igu_ack));
 
 	/* Make sure that ACK is written */
-	mmiowb();
 	barrier();
 }
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index a4a90b6..4a0ba68 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c

@@ -182,7 +182,9 @@
 	{ STATS_OFFSET32(driver_filtered_tx_pkt),
 				4, false, "driver_filtered_tx_pkt" },
 	{ STATS_OFFSET32(eee_tx_lpi),
-				4, true, "Tx LPI entry count"}
+				4, true, "Tx LPI entry count"},
+	{ STATS_OFFSET32(ptp_skip_tx_ts),
+				4, false, "ptp_skipped_tx_tstamp" },
 };
 
 #define BNX2X_NUM_STATS		ARRAY_SIZE(bnx2x_stats_arr)
@@ -1105,11 +1107,39 @@
 			      struct ethtool_drvinfo *info)
 {
 	struct bnx2x *bp = netdev_priv(dev);
+	char version[ETHTOOL_FWVERS_LEN];
+	int ext_dev_info_offset;
+	u32 mbi;
 
 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
 
-	bnx2x_fill_fw_str(bp, info->fw_version, sizeof(info->fw_version));
+	memset(version, 0, sizeof(version));
+	snprintf(version, ETHTOOL_FWVERS_LEN, " storm %d.%d.%d.%d",
+		 BCM_5710_FW_MAJOR_VERSION, BCM_5710_FW_MINOR_VERSION,
+		 BCM_5710_FW_REVISION_VERSION, BCM_5710_FW_ENGINEERING_VERSION);
+	strlcat(info->version, version, sizeof(info->version));
+
+	if (SHMEM2_HAS(bp, extended_dev_info_shared_addr)) {
+		ext_dev_info_offset = SHMEM2_RD(bp,
+						extended_dev_info_shared_addr);
+		mbi = REG_RD(bp, ext_dev_info_offset +
+			     offsetof(struct extended_dev_info_shared_cfg,
+				      mbi_version));
+		if (mbi) {
+			memset(version, 0, sizeof(version));
+			snprintf(version, ETHTOOL_FWVERS_LEN, "mbi %d.%d.%d ",
+				 (mbi & 0xff000000) >> 24,
+				 (mbi & 0x00ff0000) >> 16,
+				 (mbi & 0x0000ff00) >> 8);
+			strlcpy(info->fw_version, version,
+				sizeof(info->fw_version));
+		}
+	}
+
+	memset(version, 0, sizeof(version));
+	bnx2x_fill_fw_str(bp, version, ETHTOOL_FWVERS_LEN);
+	strlcat(info->fw_version, version, sizeof(info->fw_version));
 
 	strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
 }
@@ -1581,7 +1611,8 @@
 	}
 
 	if (!sff8472_comp ||
-	    (diag_type & SFP_EEPROM_DIAG_ADDR_CHANGE_REQ)) {
+	    (diag_type & SFP_EEPROM_DIAG_ADDR_CHANGE_REQ) ||
+	    !(diag_type & SFP_EEPROM_DDM_IMPLEMENTED)) {
 		modinfo->type = ETH_MODULE_SFF_8079;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
 	} else {
@@ -2595,7 +2626,6 @@
 	wmb();
 	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
-	mmiowb();
 	barrier();
 
 	num_pkts++;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index f8b8103..78326a6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h

@@ -1140,6 +1140,11 @@
 
 };
 
+struct extended_dev_info_shared_cfg {
+	u32 reserved[18];
+	u32 mbi_version;
+	u32 mbi_date;
+};
 
 #if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
 	#error "Missing either LITTLE_ENDIAN or BIG_ENDIAN definition."
@@ -3019,7 +3024,7 @@
 
 #define BCM_5710_FW_MAJOR_VERSION			7
 #define BCM_5710_FW_MINOR_VERSION			13
-#define BCM_5710_FW_REVISION_VERSION		1
+#define BCM_5710_FW_REVISION_VERSION		11
 #define BCM_5710_FW_ENGINEERING_VERSION		0
 #define BCM_5710_FW_COMPILE_FLAGS			1
 
@@ -3634,8 +3639,10 @@
 #define CLIENT_INIT_RX_DATA_TPA_EN_IPV6_SHIFT 1
 #define CLIENT_INIT_RX_DATA_TPA_MODE (0x1<<2)
 #define CLIENT_INIT_RX_DATA_TPA_MODE_SHIFT 2
-#define CLIENT_INIT_RX_DATA_RESERVED5 (0x1F<<3)
-#define CLIENT_INIT_RX_DATA_RESERVED5_SHIFT 3
+#define CLIENT_INIT_RX_DATA_TPA_OVER_VLAN_DISABLE (0x1<<3)
+#define CLIENT_INIT_RX_DATA_TPA_OVER_VLAN_DISABLE_SHIFT 3
+#define CLIENT_INIT_RX_DATA_RESERVED5 (0xF<<4)
+#define CLIENT_INIT_RX_DATA_RESERVED5_SHIFT 4
 	u8 vmqueue_mode_en_flg;
 	u8 extra_data_over_sgl_en_flg;
 	u8 cache_line_alignment_log_size;
@@ -3826,7 +3833,7 @@
  */
 struct eth_classify_header {
 	u8 rule_cnt;
-	u8 reserved0;
+	u8 warning_on_error;
 	__le16 reserved1;
 	__le32 echo;
 };
@@ -4747,6 +4754,8 @@
 	__le32 sge_page_base_hi;
 	__le16 sge_pause_thr_low;
 	__le16 sge_pause_thr_high;
+	u8 tpa_over_vlan_disable;
+	u8 reserved[7];
 };
 
 
@@ -4941,7 +4950,7 @@
 	u32 upper_bound;
 	u32 fair_threshold;
 	u32 fairness_timeout;
-	u32 reserved0;
+	u32 size_thr;
 };
 
 /*
@@ -5410,7 +5419,9 @@
 	u8 sd_vlan_force_pri_val;
 	u8 c2s_pri_tt_valid;
 	u8 c2s_pri_default;
-	u8 reserved2[6];
+	u8 tx_vlan_filtering_enable;
+	u8 tx_vlan_filtering_use_pvid;
+	u8 reserved2[4];
 	struct c2s_pri_trans_table_entry c2s_pri_trans_table;
 };
 
@@ -5443,7 +5454,8 @@
 	u8 reserved1;
 	__le16 sd_vlan_tag;
 	__le16 sd_vlan_eth_type;
-	__le16 reserved0;
+	u8 tx_vlan_filtering_pvid_change_flg;
+	u8 reserved0;
 	__le32 reserved2;
 };
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
index 46ee2c0..066765f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h

@@ -449,7 +449,7 @@
 				ccd[cos] =
 				    (u32)input_data->cos_min_rate[cos] * 100 *
 				    (T_FAIR_COEF / (8 * 100 * cosWeightSum));
-				 if (ccd[cos] < pdata->fair_vars.fair_threshold
+				if (ccd[cos] < pdata->fair_vars.fair_threshold
 						+ MIN_ABOVE_THRESH) {
 					ccd[cos] =
 					    pdata->fair_vars.fair_threshold +

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 98d4c5a..d581d0a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c

@@ -837,49 +837,45 @@
 
 	switch (cos_entry) {
 	case 0:
-	    nig_reg_adress_crd_weight =
-		 (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 :
-		     NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0;
-	     pbf_reg_adress_crd_weight = (port) ?
-		 PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0;
-	     break;
+		nig_reg_adress_crd_weight =
+			(port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 :
+			NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0;
+		pbf_reg_adress_crd_weight = (port) ?
+		    PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0;
+		break;
 	case 1:
-	     nig_reg_adress_crd_weight = (port) ?
-		 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 :
-		 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1;
-	     pbf_reg_adress_crd_weight = (port) ?
-		 PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0;
-	     break;
+		nig_reg_adress_crd_weight = (port) ?
+			NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 :
+			NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1;
+		pbf_reg_adress_crd_weight = (port) ?
+			PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0;
+		break;
 	case 2:
-	     nig_reg_adress_crd_weight = (port) ?
-		 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 :
-		 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2;
+		nig_reg_adress_crd_weight = (port) ?
+			NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 :
+			NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2;
 
-		 pbf_reg_adress_crd_weight = (port) ?
-		     PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0;
-	     break;
+		pbf_reg_adress_crd_weight = (port) ?
+			PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0;
+		break;
 	case 3:
-	    if (port)
+		if (port)
 			return -EINVAL;
-	     nig_reg_adress_crd_weight =
-		 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3;
-	     pbf_reg_adress_crd_weight =
-		 PBF_REG_COS3_WEIGHT_P0;
-	     break;
+		nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3;
+		pbf_reg_adress_crd_weight = PBF_REG_COS3_WEIGHT_P0;
+		break;
 	case 4:
-	    if (port)
-		return -EINVAL;
-	     nig_reg_adress_crd_weight =
-		 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4;
-	     pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0;
-	     break;
+		if (port)
+			return -EINVAL;
+		nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4;
+		pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0;
+		break;
 	case 5:
-	    if (port)
-		return -EINVAL;
-	     nig_reg_adress_crd_weight =
-		 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5;
-	     pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0;
-	     break;
+		if (port)
+			return -EINVAL;
+		nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5;
+		pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0;
+		break;
 	}
 
 	REG_WR(bp, nig_reg_adress_crd_weight, cos_bw_nig);
@@ -966,7 +962,7 @@
 	if (pri >= max_num_of_cos) {
 		DP(NETIF_MSG_LINK, "bnx2x_ets_e3b0_sp_pri_to_cos_set invalid "
 		   "parameter Illegal strict priority\n");
-	    return -EINVAL;
+		return -EINVAL;
 	}
 
 	if (sp_pri_to_cos[pri] != DCBX_INVALID_COS) {
@@ -1845,28 +1841,28 @@
 	bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
 		      EMAC_TX_MODE_RESET);
 
-		/* pause enable/disable */
-		bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE,
-			       EMAC_RX_MODE_FLOW_EN);
+	/* pause enable/disable */
+	bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE,
+		       EMAC_RX_MODE_FLOW_EN);
 
-		bnx2x_bits_dis(bp,  emac_base + EMAC_REG_EMAC_TX_MODE,
-			       (EMAC_TX_MODE_EXT_PAUSE_EN |
-				EMAC_TX_MODE_FLOW_EN));
-		if (!(params->feature_config_flags &
-		      FEATURE_CONFIG_PFC_ENABLED)) {
-			if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX)
-				bnx2x_bits_en(bp, emac_base +
-					      EMAC_REG_EMAC_RX_MODE,
-					      EMAC_RX_MODE_FLOW_EN);
+	bnx2x_bits_dis(bp,  emac_base + EMAC_REG_EMAC_TX_MODE,
+		       (EMAC_TX_MODE_EXT_PAUSE_EN |
+			EMAC_TX_MODE_FLOW_EN));
+	if (!(params->feature_config_flags &
+	      FEATURE_CONFIG_PFC_ENABLED)) {
+		if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX)
+			bnx2x_bits_en(bp, emac_base +
+				      EMAC_REG_EMAC_RX_MODE,
+				      EMAC_RX_MODE_FLOW_EN);
 
-			if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX)
-				bnx2x_bits_en(bp, emac_base +
-					      EMAC_REG_EMAC_TX_MODE,
-					      (EMAC_TX_MODE_EXT_PAUSE_EN |
-					       EMAC_TX_MODE_FLOW_EN));
-		} else
-			bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
-				      EMAC_TX_MODE_FLOW_EN);
+		if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX)
+			bnx2x_bits_en(bp, emac_base +
+				      EMAC_REG_EMAC_TX_MODE,
+				      (EMAC_TX_MODE_EXT_PAUSE_EN |
+				       EMAC_TX_MODE_FLOW_EN));
+	} else
+		bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
+			      EMAC_TX_MODE_FLOW_EN);
 
 	/* KEEP_VLAN_TAG, promiscuous */
 	val = REG_RD(bp, emac_base + EMAC_REG_EMAC_RX_MODE);
@@ -6339,7 +6335,7 @@
 		 */
 		if (!vars->link_up)
 			break;
-		/* else: fall through */
+		/* fall through */
 	case LED_MODE_ON:
 		if (((params->phy[EXT_PHY1].type ==
 			  PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727) ||
@@ -6478,9 +6474,9 @@
 			  MDIO_REG_BANK_GP_STATUS,
 			  MDIO_GP_STATUS_TOP_AN_STATUS1,
 			  &gp_status);
-	/* Link is up only if both local phy and external phy are up */
-	if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS))
-		return -ESRCH;
+		/* Link is up only if both local phy and external phy are up */
+		if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS))
+			return -ESRCH;
 	}
 	/* In XGXS loopback mode, do not check external PHY */
 	if (params->loopback_mode == LOOPBACK_XGXS)
@@ -7293,8 +7289,8 @@
 					DP(NETIF_MSG_LINK,
 					  "XAUI workaround has completed\n");
 					return 0;
-				 }
-				 usleep_range(3000, 6000);
+				}
+				usleep_range(3000, 6000);
 			}
 			break;
 		}
@@ -12675,39 +12671,39 @@
 				     struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
-		vars->link_up = 1;
-		vars->line_speed = SPEED_10000;
-		vars->duplex = DUPLEX_FULL;
-		vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
-		vars->mac_type = MAC_TYPE_BMAC;
+	vars->link_up = 1;
+	vars->line_speed = SPEED_10000;
+	vars->duplex = DUPLEX_FULL;
+	vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
+	vars->mac_type = MAC_TYPE_BMAC;
 
-		vars->phy_flags = PHY_XGXS_FLAG;
+	vars->phy_flags = PHY_XGXS_FLAG;
 
-		bnx2x_xgxs_deassert(params);
+	bnx2x_xgxs_deassert(params);
 
-		/* Set bmac loopback */
-		bnx2x_bmac_enable(params, vars, 1, 1);
+	/* Set bmac loopback */
+	bnx2x_bmac_enable(params, vars, 1, 1);
 
-		REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
+	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 }
 
 static void bnx2x_init_emac_loopback(struct link_params *params,
 				     struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
-		vars->link_up = 1;
-		vars->line_speed = SPEED_1000;
-		vars->duplex = DUPLEX_FULL;
-		vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
-		vars->mac_type = MAC_TYPE_EMAC;
+	vars->link_up = 1;
+	vars->line_speed = SPEED_1000;
+	vars->duplex = DUPLEX_FULL;
+	vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
+	vars->mac_type = MAC_TYPE_EMAC;
 
-		vars->phy_flags = PHY_XGXS_FLAG;
+	vars->phy_flags = PHY_XGXS_FLAG;
 
-		bnx2x_xgxs_deassert(params);
-		/* Set bmac loopback */
-		bnx2x_emac_enable(params, vars, 1);
-		bnx2x_emac_program(params, vars);
-		REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
+	bnx2x_xgxs_deassert(params);
+	/* Set bmac loopback */
+	bnx2x_emac_enable(params, vars, 1);
+	bnx2x_emac_program(params, vars);
+	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 }
 
 static void bnx2x_init_xmac_loopback(struct link_params *params,
@@ -13073,12 +13069,12 @@
 		REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0);
 	}
 
-		if (!CHIP_IS_E3(bp)) {
-			bnx2x_set_bmac_rx(bp, params->chip_id, port, 0);
-		} else {
-			bnx2x_set_xmac_rxtx(params, 0);
-			bnx2x_set_umac_rxtx(params, 0);
-		}
+	if (!CHIP_IS_E3(bp)) {
+		bnx2x_set_bmac_rx(bp, params->chip_id, port, 0);
+	} else {
+		bnx2x_set_xmac_rxtx(params, 0);
+		bnx2x_set_umac_rxtx(params, 0);
+	}
 	/* Disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index b7d2511..7115f50 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h

@@ -62,6 +62,7 @@
 #define SFP_EEPROM_DIAG_TYPE_ADDR		0x5c
 #define SFP_EEPROM_DIAG_TYPE_SIZE		1
 #define SFP_EEPROM_DIAG_ADDR_CHANGE_REQ		(1<<2)
+#define SFP_EEPROM_DDM_IMPLEMENTED		(1<<6)
 #define SFP_EEPROM_SFF_8472_COMP_ADDR		0x5e
 #define SFP_EEPROM_SFF_8472_COMP_SIZE		1
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index fcc2328..192ff8d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

@@ -869,9 +869,6 @@
 	   "write %x to HC %d (addr 0x%x)\n",
 	   val, port, addr);
 
-	/* flush all outstanding writes */
-	mmiowb();
-
 	REG_WR(bp, addr, val);
 	if (REG_RD(bp, addr) != val)
 		BNX2X_ERR("BUG! Proper val not read from IGU!\n");
@@ -887,9 +884,6 @@
 
 	DP(NETIF_MSG_IFDOWN, "write %x to IGU\n", val);
 
-	/* flush all outstanding writes */
-	mmiowb();
-
 	REG_WR(bp, IGU_REG_PF_CONFIGURATION, val);
 	if (REG_RD(bp, IGU_REG_PF_CONFIGURATION) != val)
 		BNX2X_ERR("BUG! Proper val not read from IGU!\n");
@@ -1595,7 +1589,6 @@
 	/*
 	 * Ensure that HC_CONFIG is written before leading/trailing edge config
 	 */
-	mmiowb();
 	barrier();
 
 	if (!CHIP_IS_E1(bp)) {
@@ -1611,9 +1604,6 @@
 		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
 		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
 	}
-
-	/* Make sure that interrupts are indeed enabled from here on */
-	mmiowb();
 }
 
 static void bnx2x_igu_int_enable(struct bnx2x *bp)
@@ -1674,9 +1664,6 @@
 
 	REG_WR(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
 	REG_WR(bp, IGU_REG_LEADING_EDGE_LATCH, val);
-
-	/* Make sure that interrupts are indeed enabled from here on */
-	mmiowb();
 }
 
 void bnx2x_int_enable(struct bnx2x *bp)
@@ -2925,6 +2912,10 @@
 	func_params.f_obj = &bp->func_obj;
 	func_params.cmd = BNX2X_F_CMD_SWITCH_UPDATE;
 
+	/* Prepare parameters for function state transitions */
+	__set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	__set_bit(RAMROD_RETRY, &func_params.ramrod_flags);
+
 	if (IS_MF_UFP(bp) || IS_MF_BD(bp)) {
 		int func = BP_ABS_FUNC(bp);
 		u32 val;
@@ -3536,6 +3527,16 @@
  */
 static void bnx2x_config_mf_bw(struct bnx2x *bp)
 {
+	/* Workaround for MFW bug.
+	 * MFW is not supposed to generate BW attention in
+	 * single function mode.
+	 */
+	if (!IS_MF(bp)) {
+		DP(BNX2X_MSG_MCP,
+		   "Ignoring MF BW config in single function mode\n");
+		return;
+	}
+
 	if (bp->link_vars.link_up) {
 		bnx2x_cmng_fns_init(bp, true, CMNG_FNS_MINMAX);
 		bnx2x_link_sync_notify(bp);
@@ -3819,7 +3820,6 @@
 
 	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
 			 bp->spq_prod_idx);
-	mmiowb();
 }
 
 /**
@@ -4301,7 +4301,8 @@
 				bnx2x_handle_eee_event(bp);
 
 			if (val & DRV_STATUS_OEM_UPDATE_SVID)
-				bnx2x_handle_update_svid_cmd(bp);
+				bnx2x_schedule_sp_rtnl(bp,
+					BNX2X_SP_RTNL_UPDATE_SVID, 0);
 
 			if (bp->link_vars.periodic_flags &
 			    PERIODIC_FLAGS_LINK_EVENT) {
@@ -5229,7 +5230,6 @@
 {
 	/* No memory barriers */
 	storm_memset_eq_prod(bp, prod, BP_FUNC(bp));
-	mmiowb(); /* keep prod updates ordered */
 }
 
 static int  bnx2x_cnic_handle_cfc_del(struct bnx2x *bp, u32 cid,
@@ -6498,7 +6498,6 @@
 
 	/* flush all */
 	mb();
-	mmiowb();
 }
 
 void bnx2x_pre_irq_nic_init(struct bnx2x *bp)
@@ -6538,7 +6537,6 @@
 
 	/* flush all before enabling interrupts */
 	mb();
-	mmiowb();
 
 	bnx2x_int_enable(bp);
 
@@ -7713,6 +7711,9 @@
 		REG_WR(bp, reg_addr, val);
 	}
 
+	if (CHIP_IS_E3B0(bp))
+		bp->flags |= PTP_SUPPORTED;
+
 	return 0;
 }
 
@@ -7757,12 +7758,10 @@
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			 data, igu_addr_data);
 	REG_WR(bp, igu_addr_data, data);
-	mmiowb();
 	barrier();
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			  ctl, igu_addr_ctl);
 	REG_WR(bp, igu_addr_ctl, ctl);
-	mmiowb();
 	barrier();
 
 	/* wait for clean up to finish */
@@ -8462,6 +8461,7 @@
 	/* Fill a user request section if needed */
 	if (!test_bit(RAMROD_CONT, ramrod_flags)) {
 		ramrod_param.user_req.u.vlan.vlan = vlan;
+		__set_bit(BNX2X_VLAN, &ramrod_param.user_req.vlan_mac_flags);
 		/* Set the command: ADD or DEL */
 		if (set)
 			ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD;
@@ -8482,6 +8482,34 @@
 	return rc;
 }
 
+void bnx2x_clear_vlan_info(struct bnx2x *bp)
+{
+	struct bnx2x_vlan_entry *vlan;
+
+	/* Mark that hw forgot all entries */
+	list_for_each_entry(vlan, &bp->vlan_reg, link)
+		vlan->hw = false;
+
+	bp->vlan_cnt = 0;
+}
+
+static int bnx2x_del_all_vlans(struct bnx2x *bp)
+{
+	struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj;
+	unsigned long ramrod_flags = 0, vlan_flags = 0;
+	int rc;
+
+	__set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
+	__set_bit(BNX2X_VLAN, &vlan_flags);
+	rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_flags, &ramrod_flags);
+	if (rc)
+		return rc;
+
+	bnx2x_clear_vlan_info(bp);
+
+	return 0;
+}
+
 int bnx2x_del_all_macs(struct bnx2x *bp,
 		       struct bnx2x_vlan_mac_obj *mac_obj,
 		       int mac_type, bool wait_for_comp)
@@ -9320,6 +9348,17 @@
 		BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n",
 			  rc);
 
+	/* The whole *vlan_obj structure may be not initialized if VLAN
+	 * filtering offload is not supported by hardware. Currently this is
+	 * true for all hardware covered by CHIP_IS_E1x().
+	 */
+	if (!CHIP_IS_E1x(bp)) {
+		/* Remove all currently configured VLANs */
+		rc = bnx2x_del_all_vlans(bp);
+		if (rc < 0)
+			BNX2X_ERR("Failed to delete all VLANs\n");
+	}
+
 	/* Disable LLH */
 	if (!CHIP_IS_E1(bp))
 		REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 0);
@@ -9407,8 +9446,13 @@
 	 * function stop ramrod is sent, since as part of this ramrod FW access
 	 * PTP registers.
 	 */
-	if (bp->flags & PTP_SUPPORTED)
+	if (bp->flags & PTP_SUPPORTED) {
 		bnx2x_stop_ptp(bp);
+		if (bp->ptp_clock) {
+			ptp_clock_unregister(bp->ptp_clock);
+			bp->ptp_clock = NULL;
+		}
+	}
 
 	/* Disable HW interrupts, NAPI */
 	bnx2x_netif_stop(bp, 1);
@@ -9494,7 +9538,6 @@
 
 	DP(NETIF_MSG_HW | NETIF_MSG_IFUP, "%s gates #2, #3 and #4\n",
 		close ? "closing" : "opening");
-	mmiowb();
 }
 
 #define SHARED_MF_CLP_MAGIC  0x80000000 /* `magic' bit */
@@ -9618,7 +9661,6 @@
 	if (!CHIP_IS_E1(bp)) {
 		REG_WR(bp, PXP2_REG_RD_START_INIT, 0);
 		REG_WR(bp, PXP2_REG_RQ_RBC_DONE, 0);
-		mmiowb();
 	}
 }
 
@@ -9718,16 +9760,13 @@
 	       reset_mask1 & (~not_reset_mask1));
 
 	barrier();
-	mmiowb();
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
 	       reset_mask2 & (~stay_reset2));
 
 	barrier();
-	mmiowb();
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1);
-	mmiowb();
 }
 
 /**
@@ -9811,9 +9850,6 @@
 	REG_WR(bp, MISC_REG_UNPREPARED, 0);
 	barrier();
 
-	/* Make sure all is written to the chip before the reset */
-	mmiowb();
-
 	/* Wait for 1ms to empty GLUE and PCI-E core queues,
 	 * PSWHST, GRC and PSWRD Tetris buffer.
 	 */
@@ -10349,6 +10385,9 @@
 			       &bp->sp_rtnl_state))
 		bnx2x_update_mng_version(bp);
 
+	if (test_and_clear_bit(BNX2X_SP_RTNL_UPDATE_SVID, &bp->sp_rtnl_state))
+		bnx2x_handle_update_svid_cmd(bp);
+
 	if (test_and_clear_bit(BNX2X_SP_RTNL_CHANGE_UDP_PORT,
 			       &bp->sp_rtnl_state)) {
 		if (bnx2x_udp_port_update(bp)) {
@@ -11239,7 +11278,7 @@
 			   dev_info.port_hw_config[port].external_phy_config),
 			   SHMEM_RD(bp,
 			   dev_info.port_hw_config[port].external_phy_config2));
-			return;
+		return;
 	}
 
 	if (CHIP_IS_E3(bp))
@@ -11740,8 +11779,10 @@
 	 * If maximum allowed number of connections is zero -
 	 * disable the feature.
 	 */
-	if (!bp->cnic_eth_dev.max_fcoe_conn)
+	if (!bp->cnic_eth_dev.max_fcoe_conn) {
 		bp->flags |= NO_FCOE_FLAG;
+		eth_zero_addr(bp->fip_mac);
+	}
 }
 
 static void bnx2x_get_cnic_info(struct bnx2x *bp)
@@ -11937,7 +11978,7 @@
 static int bnx2x_get_hwinfo(struct bnx2x *bp)
 {
 	int /*abs*/func = BP_ABS_FUNC(bp);
-	int vn, mfw_vn;
+	int vn;
 	u32 val = 0, val2 = 0;
 	int rc = 0;
 
@@ -12022,12 +12063,10 @@
 	/*
 	 * Initialize MF configuration
 	 */
-
 	bp->mf_ov = 0;
 	bp->mf_mode = 0;
 	bp->mf_sub_mode = 0;
 	vn = BP_VN(bp);
-	mfw_vn = BP_FW_MB_IDX(bp);
 
 	if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
 		BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n",
@@ -12484,9 +12523,6 @@
 
 	bp->dump_preset_idx = 1;
 
-	if (CHIP_IS_E3B0(bp))
-		bp->flags |= PTP_SUPPORTED;
-
 	return rc;
 }
 
@@ -13014,13 +13050,6 @@
 
 int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp)
 {
-	struct bnx2x_vlan_entry *vlan;
-
-	/* The hw forgot all entries after reload */
-	list_for_each_entry(vlan, &bp->vlan_reg, link)
-		vlan->hw = false;
-	bp->vlan_cnt = 0;
-
 	/* Don't set rx mode here. Our caller will do it. */
 	bnx2x_vlan_configure(bp, false);
 
@@ -13105,6 +13134,7 @@
 	.ndo_set_vf_mac		= bnx2x_set_vf_mac,
 	.ndo_set_vf_vlan	= bnx2x_set_vf_vlan,
 	.ndo_get_vf_config	= bnx2x_get_vf_config,
+	.ndo_set_vf_spoofchk	= bnx2x_set_vf_spoofchk,
 #endif
 #ifdef NETDEV_FCOE_WWNN
 	.ndo_fcoe_get_wwn	= bnx2x_fcoe_get_wwn,
@@ -13884,7 +13914,7 @@
 	return -ENOTSUPP;
 }
 
-static void bnx2x_register_phc(struct bnx2x *bp)
+void bnx2x_register_phc(struct bnx2x *bp)
 {
 	/* Fill the ptp_clock_info struct and register PTP clock*/
 	bp->ptp_clock_info.owner = THIS_MODULE;
@@ -14086,8 +14116,6 @@
 	       dev->base_addr, bp->pdev->irq, dev->dev_addr);
 	pcie_print_link_status(bp->pdev);
 
-	bnx2x_register_phc(bp);
-
 	if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp))
 		bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_DISABLED);
 
@@ -14120,11 +14148,6 @@
 			   struct bnx2x *bp,
 			   bool remove_netdev)
 {
-	if (bp->ptp_clock) {
-		ptp_clock_unregister(bp->ptp_clock);
-		bp->ptp_clock = NULL;
-	}
-
 	/* Delete storage MAC address */
 	if (!NO_FCOE(bp)) {
 		rtnl_lock();
@@ -14369,14 +14392,6 @@
 
 	rtnl_unlock();
 
-	/* If AER, perform cleanup of the PCIe registers */
-	if (bp->flags & AER_ENABLED) {
-		if (pci_cleanup_aer_uncorrect_error_status(pdev))
-			BNX2X_ERR("pci_cleanup_aer_uncorrect_error_status failed\n");
-		else
-			DP(NETIF_MSG_HW, "pci_cleanup_aer_uncorrect_error_status succeeded\n");
-	}
-
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -14793,7 +14808,6 @@
 		if (rc)
 			break;
 
-		mmiowb();
 		barrier();
 
 		/* Start accepting on iSCSI L2 ring */
@@ -14828,7 +14842,6 @@
 		if (!bnx2x_wait_sp_comp(bp, sp_bits))
 			BNX2X_ERR("rx_mode completion timed out!\n");
 
-		mmiowb();
 		barrier();
 
 		/* Unset iSCSI L2 MAC */
@@ -15208,11 +15221,24 @@
 	u32 val_seq;
 	u64 timestamp, ns;
 	struct skb_shared_hwtstamps shhwtstamps;
+	bool bail = true;
+	int i;
 
-	/* Read Tx timestamp registers */
-	val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID :
-			 NIG_REG_P0_TLLH_PTP_BUF_SEQID);
-	if (val_seq & 0x10000) {
+	/* FW may take a while to complete timestamping; try a bit and if it's
+	 * still not complete, may indicate an error state - bail out then.
+	 */
+	for (i = 0; i < 10; i++) {
+		/* Read Tx timestamp registers */
+		val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID :
+				 NIG_REG_P0_TLLH_PTP_BUF_SEQID);
+		if (val_seq & 0x10000) {
+			bail = false;
+			break;
+		}
+		msleep(1 << i);
+	}
+
+	if (!bail) {
 		/* There is a valid timestamp value */
 		timestamp = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_TS_MSB :
 				   NIG_REG_P0_TLLH_PTP_BUF_TS_MSB);
@@ -15227,16 +15253,18 @@
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ns_to_ktime(ns);
 		skb_tstamp_tx(bp->ptp_tx_skb, &shhwtstamps);
-		dev_kfree_skb_any(bp->ptp_tx_skb);
-		bp->ptp_tx_skb = NULL;
 
 		DP(BNX2X_MSG_PTP, "Tx timestamp, timestamp cycles = %llu, ns = %llu\n",
 		   timestamp, ns);
 	} else {
-		DP(BNX2X_MSG_PTP, "There is no valid Tx timestamp yet\n");
-		/* Reschedule to keep checking for a valid timestamp value */
-		schedule_work(&bp->ptp_task);
+		DP(BNX2X_MSG_PTP,
+		   "Tx timestamp is not recorded (register read=%u)\n",
+		   val_seq);
+		bp->eth_stats.ptp_skip_tx_ts++;
 	}
+
+	dev_kfree_skb_any(bp->ptp_tx_skb);
+	bp->ptp_tx_skb = NULL;
 }
 
 void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb)
@@ -15341,27 +15369,47 @@
 	return 0;
 }
 
+#define BNX2X_P2P_DETECT_PARAM_MASK 0x5F5
+#define BNX2X_P2P_DETECT_RULE_MASK 0x3DBB
+#define BNX2X_PTP_TX_ON_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6AA)
+#define BNX2X_PTP_TX_ON_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EEE)
+#define BNX2X_PTP_V1_L4_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x7EE)
+#define BNX2X_PTP_V1_L4_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3FFE)
+#define BNX2X_PTP_V2_L4_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x7EA)
+#define BNX2X_PTP_V2_L4_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3FEE)
+#define BNX2X_PTP_V2_L2_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6BF)
+#define BNX2X_PTP_V2_L2_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EFF)
+#define BNX2X_PTP_V2_PARAM_MASK (BNX2X_P2P_DETECT_PARAM_MASK & 0x6AA)
+#define BNX2X_PTP_V2_RULE_MASK (BNX2X_P2P_DETECT_RULE_MASK & 0x3EEE)
+
 int bnx2x_configure_ptp_filters(struct bnx2x *bp)
 {
 	int port = BP_PORT(bp);
+	u32 param, rule;
 	int rc;
 
 	if (!bp->hwtstamp_ioctl_called)
 		return 0;
 
+	param = port ? NIG_REG_P1_TLLH_PTP_PARAM_MASK :
+		NIG_REG_P0_TLLH_PTP_PARAM_MASK;
+	rule = port ? NIG_REG_P1_TLLH_PTP_RULE_MASK :
+		NIG_REG_P0_TLLH_PTP_RULE_MASK;
 	switch (bp->tx_type) {
 	case HWTSTAMP_TX_ON:
 		bp->flags |= TX_TIMESTAMPING_EN;
-		REG_WR(bp, port ? NIG_REG_P1_TLLH_PTP_PARAM_MASK :
-		       NIG_REG_P0_TLLH_PTP_PARAM_MASK, 0x6AA);
-		REG_WR(bp, port ? NIG_REG_P1_TLLH_PTP_RULE_MASK :
-		       NIG_REG_P0_TLLH_PTP_RULE_MASK, 0x3EEE);
+		REG_WR(bp, param, BNX2X_PTP_TX_ON_PARAM_MASK);
+		REG_WR(bp, rule, BNX2X_PTP_TX_ON_RULE_MASK);
 		break;
 	case HWTSTAMP_TX_ONESTEP_SYNC:
 		BNX2X_ERR("One-step timestamping is not supported\n");
 		return -ERANGE;
 	}
 
+	param = port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
+		NIG_REG_P0_LLH_PTP_PARAM_MASK;
+	rule = port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
+		NIG_REG_P0_LLH_PTP_RULE_MASK;
 	switch (bp->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		break;
@@ -15375,30 +15423,24 @@
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 		bp->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		/* Initialize PTP detection for UDP/IPv4 events */
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
-		       NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x7EE);
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
-		       NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3FFE);
+		REG_WR(bp, param, BNX2X_PTP_V1_L4_PARAM_MASK);
+		REG_WR(bp, rule, BNX2X_PTP_V1_L4_RULE_MASK);
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
 		bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
 		/* Initialize PTP detection for UDP/IPv4 or UDP/IPv6 events */
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
-		       NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x7EA);
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
-		       NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3FEE);
+		REG_WR(bp, param, BNX2X_PTP_V2_L4_PARAM_MASK);
+		REG_WR(bp, rule, BNX2X_PTP_V2_L4_RULE_MASK);
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
 		bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
 		/* Initialize PTP detection L2 events */
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
-		       NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x6BF);
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
-		       NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3EFF);
+		REG_WR(bp, param, BNX2X_PTP_V2_L2_PARAM_MASK);
+		REG_WR(bp, rule, BNX2X_PTP_V2_L2_RULE_MASK);
 
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
@@ -15406,10 +15448,8 @@
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 		bp->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		/* Initialize PTP detection L2, UDP/IPv4 or UDP/IPv6 events */
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_PARAM_MASK :
-		       NIG_REG_P0_LLH_PTP_PARAM_MASK, 0x6AA);
-		REG_WR(bp, port ? NIG_REG_P1_LLH_PTP_RULE_MASK :
-		       NIG_REG_P0_LLH_PTP_RULE_MASK, 0x3EEE);
+		REG_WR(bp, param, BNX2X_PTP_V2_PARAM_MASK);
+		REG_WR(bp, rule, BNX2X_PTP_V2_RULE_MASK);
 		break;
 	}
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index a9eaaf3..80d250a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c

@@ -2977,8 +2977,8 @@
 
 		cmd_pos->data.macs_num--;
 
-		  DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n",
-				   cmd_pos->data.macs_num, cnt);
+		DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n",
+		   cmd_pos->data.macs_num, cnt);
 
 		/* Break if we reached the maximum
 		 * number of rules.
@@ -3597,8 +3597,8 @@
 	/* RESTORE command will restore the entire multicast configuration */
 	case BNX2X_MCAST_CMD_RESTORE:
 		p->mcast_list_len = reg_sz;
-		  DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n",
-				   cmd, p->mcast_list_len);
+		DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n",
+		   cmd, p->mcast_list_len);
 		break;
 
 	case BNX2X_MCAST_CMD_ADD:
@@ -3735,8 +3735,8 @@
 
 		i++;
 
-		  DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n",
-		     cfg_data.mac);
+		DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n",
+		   cfg_data.mac);
 	}
 
 	*rdata_idx = i;
@@ -5039,7 +5039,6 @@
 	/* As no ramrod is sent, complete the command immediately  */
 	o->complete_cmd(bp, o, BNX2X_Q_CMD_INIT);
 
-	mmiowb();
 	smp_mb();
 
 	return 0;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 0bf2fd4..7a6e82d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h

@@ -265,6 +265,7 @@
 	BNX2X_ETH_MAC,
 	BNX2X_ISCSI_ETH_MAC,
 	BNX2X_NETQ_ETH_MAC,
+	BNX2X_VLAN,
 	BNX2X_DONT_CONSUME_CAM_CREDIT,
 	BNX2X_DONT_CONSUME_CAM_CREDIT_DEST,
 };
@@ -272,7 +273,8 @@
 #define BNX2X_VLAN_MAC_CMP_MASK	(1 << BNX2X_UC_LIST_MAC | \
 				 1 << BNX2X_ETH_MAC | \
 				 1 << BNX2X_ISCSI_ETH_MAC | \
-				 1 << BNX2X_NETQ_ETH_MAC)
+				 1 << BNX2X_NETQ_ETH_MAC | \
+				 1 << BNX2X_VLAN)
 #define BNX2X_VLAN_MAC_CMP_FLAGS(flags) \
 	((flags) & BNX2X_VLAN_MAC_CMP_MASK)
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 62da465..0edbb0a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

@@ -100,13 +100,11 @@
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   cmd_data.sb_id_and_flags, igu_addr_data);
 	REG_WR(bp, igu_addr_data, cmd_data.sb_id_and_flags);
-	mmiowb();
 	barrier();
 
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   ctl, igu_addr_ctl);
 	REG_WR(bp, igu_addr_ctl, ctl);
-	mmiowb();
 	barrier();
 }
 
@@ -209,7 +207,10 @@
 	 */
 	__set_bit(BNX2X_Q_FLG_TX_SWITCH, &setup_p->flags);
 	__set_bit(BNX2X_Q_FLG_TX_SEC, &setup_p->flags);
-	__set_bit(BNX2X_Q_FLG_ANTI_SPOOF, &setup_p->flags);
+	if (vf->spoofchk)
+		__set_bit(BNX2X_Q_FLG_ANTI_SPOOF, &setup_p->flags);
+	else
+		__clear_bit(BNX2X_Q_FLG_ANTI_SPOOF, &setup_p->flags);
 
 	/* Setup-op rx parameters */
 	if (test_bit(BNX2X_Q_TYPE_HAS_RX, &q_type)) {
@@ -1269,6 +1270,8 @@
 		bnx2x_vf(bp, i, state) = VF_FREE;
 		mutex_init(&bnx2x_vf(bp, i, op_mutex));
 		bnx2x_vf(bp, i, op_current) = CHANNEL_TLV_NONE;
+		/* enable spoofchk by default */
+		bnx2x_vf(bp, i, spoofchk) = 1;
 	}
 
 	/* re-read the IGU CAM for VFs - index and abs_vfid must be set */
@@ -2225,7 +2228,7 @@
 		rc = bnx2x_vf_close(bp, vf);
 		if (rc)
 			goto op_err;
-		/* Fallthrough to release resources */
+		/* Fall through - to release resources */
 	case VF_ACQUIRED:
 		DP(BNX2X_MSG_IOV, "about to free resources\n");
 		bnx2x_vf_free_resc(bp, vf);
@@ -2632,7 +2635,8 @@
 	ivi->qos = 0;
 	ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */
 	ivi->min_tx_rate = 0;
-	ivi->spoofchk = 1; /*always enabled */
+	ivi->spoofchk = vf->spoofchk ? 1 : 0;
+	ivi->linkstate = vf->link_cfg;
 	if (vf->state == VF_ENABLED) {
 		/* mac and vlan are in vlan_mac objects */
 		if (bnx2x_validate_vf_sp_objs(bp, vf, false)) {
@@ -2950,6 +2954,77 @@
 	return rc;
 }
 
+int bnx2x_set_vf_spoofchk(struct net_device *dev, int idx, bool val)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+	struct bnx2x_virtf *vf;
+	int i, rc = 0;
+
+	vf = BP_VF(bp, idx);
+	if (!vf)
+		return -EINVAL;
+
+	/* nothing to do */
+	if (vf->spoofchk == val)
+		return 0;
+
+	vf->spoofchk = val ? 1 : 0;
+
+	DP(BNX2X_MSG_IOV, "%s spoofchk for VF %d\n",
+	   val ? "enabling" : "disabling", idx);
+
+	/* is vf initialized and queue set up? */
+	if (vf->state != VF_ENABLED ||
+	    bnx2x_get_q_logical_state(bp, &bnx2x_leading_vfq(vf, sp_obj)) !=
+	    BNX2X_Q_LOGICAL_STATE_ACTIVE)
+		return rc;
+
+	/* User should be able to see error in system logs */
+	if (!bnx2x_validate_vf_sp_objs(bp, vf, true))
+		return -EINVAL;
+
+	/* send queue update ramrods to configure spoofchk */
+	for_each_vfq(vf, i) {
+		struct bnx2x_queue_state_params q_params = {NULL};
+		struct bnx2x_queue_update_params *update_params;
+
+		q_params.q_obj = &bnx2x_vfq(vf, i, sp_obj);
+
+		/* validate the Q is UP */
+		if (bnx2x_get_q_logical_state(bp, q_params.q_obj) !=
+		    BNX2X_Q_LOGICAL_STATE_ACTIVE)
+			continue;
+
+		__set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags);
+		q_params.cmd = BNX2X_Q_CMD_UPDATE;
+		update_params = &q_params.params.update;
+		__set_bit(BNX2X_Q_UPDATE_ANTI_SPOOF_CHNG,
+			  &update_params->update_flags);
+		if (val) {
+			__set_bit(BNX2X_Q_UPDATE_ANTI_SPOOF,
+				  &update_params->update_flags);
+		} else {
+			__clear_bit(BNX2X_Q_UPDATE_ANTI_SPOOF,
+				    &update_params->update_flags);
+		}
+
+		/* Update the Queue state */
+		rc = bnx2x_queue_state_change(bp, &q_params);
+		if (rc) {
+			BNX2X_ERR("Failed to %s spoofchk on VF %d - vfq %d\n",
+				  val ? "enable" : "disable", idx, i);
+			goto out;
+		}
+	}
+out:
+	if (!rc)
+		DP(BNX2X_MSG_IOV,
+		   "%s spoofchk for VF[%d]\n", val ? "Enabled" : "Disabled",
+		   idx);
+
+	return rc;
+}
+
 /* crc is the first field in the bulletin board. Compute the crc over the
  * entire bulletin board excluding the crc field itself. Use the length field
  * as the Bulletin Board was posted by a PF with possibly a different version

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index eb814c6..b6ebd92 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h

@@ -142,6 +142,8 @@
 
 	bool flr_clnup_stage;	/* true during flr cleanup */
 	bool malicious;		/* true if FW indicated so, until FLR */
+	/* 1(true) if spoof check is enabled */
+	u8 spoofchk;
 
 	/* dma */
 	dma_addr_t fw_stat_map;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
index b2644ed..d55e636 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h

@@ -207,6 +207,9 @@
 	u32 driver_filtered_tx_pkt;
 	/* src: Clear-on-Read register; Will not survive PMF Migration */
 	u32 eee_tx_lpi;
+
+	/* PTP */
+	u32 ptp_skip_tx_ts;
 };
 
 struct bnx2x_eth_q_stats {

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 8e0a317..0752b7f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

@@ -172,8 +172,6 @@
 	/* Trigger the PF FW */
 	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
 
-	mmiowb();
-
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {
 		msleep(interval);
@@ -957,7 +955,7 @@
 	bnx2x_sample_bulletin(bp);
 
 	if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) {
-		BNX2X_ERR("Hypervisor will dicline the request, avoiding\n");
+		BNX2X_ERR("Hypervisor will decline the request, avoiding\n");
 		rc = -EINVAL;
 		goto out;
 	}
@@ -1179,7 +1177,6 @@
 
 	/* ack the FW */
 	storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
-	mmiowb();
 
 	/* copy the response header including status-done field,
 	 * must be last dmae, must be after FW is acked
@@ -1654,13 +1651,9 @@
 {
 	int i, j;
 	struct bnx2x_vf_mac_vlan_filters *fl = NULL;
-	size_t fsz;
 
-	fsz = tlv->n_mac_vlan_filters *
-	      sizeof(struct bnx2x_vf_mac_vlan_filter) +
-	      sizeof(struct bnx2x_vf_mac_vlan_filters);
-
-	fl = kzalloc(fsz, GFP_KERNEL);
+	fl = kzalloc(struct_size(fl, filters, tlv->n_mac_vlan_filters),
+		     GFP_KERNEL);
 	if (!fl)
 		return -ENOMEM;
 
@@ -2178,7 +2171,6 @@
 		 */
 		storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
 		/* Firmware ack should be written before unlocking channel */
-		mmiowb();
 		bnx2x_unlock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type);
 	}
 }

diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 5a779b1..cb97ec5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
 bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e2d9254..04ec909 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

@@ -1,7 +1,7 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2018 Broadcom Limited
+ * Copyright (c) 2016-2019 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +31,7 @@
 #include <asm/page.h>
 #include <linux/time.h>
 #include <linux/mii.h>
+#include <linux/mdio.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
 #include <linux/if_bridge.h>
@@ -53,6 +54,7 @@
 #include <net/pkt_cls.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <net/page_pool.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -111,12 +113,19 @@
 	BCM57452,
 	BCM57454,
 	BCM5745x_NPAR,
+	BCM57508,
+	BCM57504,
+	BCM57502,
+	BCM57508_NPAR,
+	BCM57504_NPAR,
+	BCM57502_NPAR,
 	BCM58802,
 	BCM58804,
 	BCM58808,
 	NETXTREME_E_VF,
 	NETXTREME_C_VF,
 	NETXTREME_S_VF,
+	NETXTREME_E_P5_VF,
 };
 
 /* indexed by enum above */
@@ -152,12 +161,19 @@
 	[BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
 	[BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[BCM5745x_NPAR] = { "Broadcom BCM5745x NetXtreme-E Ethernet Partition" },
+	[BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
+	[BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" },
+	[BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" },
+	[BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" },
+	[BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" },
+	[BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" },
 	[BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
 	[BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
 	[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
 	[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
+	[NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -196,6 +212,15 @@
 	{ PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 },
 	{ PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 },
+	{ PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
+	{ PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
+	{ PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
+	{ PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
 	{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
 #ifdef CONFIG_BNXT_SRIOV
@@ -207,6 +232,8 @@
 	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x1806), .driver_data = NETXTREME_E_P5_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF },
 	{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
 #endif
 	{ 0 }
@@ -227,6 +254,8 @@
 	ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED,
 	ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE,
 	ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE,
+	ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
+	ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
 };
 
 static struct workqueue_struct *bnxt_pf_wq;
@@ -234,22 +263,53 @@
 static bool bnxt_vf_pciid(enum board_idx idx)
 {
 	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
-		idx == NETXTREME_S_VF);
+		idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
 #define DB_CP_FLAGS		(DB_KEY_CP | DB_IDX_VALID | DB_IRQ_DIS)
 #define DB_CP_IRQ_DIS_FLAGS	(DB_KEY_CP | DB_IRQ_DIS)
 
-#define BNXT_CP_DB_REARM(db, raw_cons)					\
-		writel(DB_CP_REARM_FLAGS | RING_CMP(raw_cons), db)
-
-#define BNXT_CP_DB(db, raw_cons)					\
-		writel(DB_CP_FLAGS | RING_CMP(raw_cons), db)
-
 #define BNXT_CP_DB_IRQ_DIS(db)						\
 		writel(DB_CP_IRQ_DIS_FLAGS, db)
 
+#define BNXT_DB_CQ(db, idx)						\
+	writel(DB_CP_FLAGS | RING_CMP(idx), (db)->doorbell)
+
+#define BNXT_DB_NQ_P5(db, idx)						\
+	writeq((db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx), (db)->doorbell)
+
+#define BNXT_DB_CQ_ARM(db, idx)						\
+	writel(DB_CP_REARM_FLAGS | RING_CMP(idx), (db)->doorbell)
+
+#define BNXT_DB_NQ_ARM_P5(db, idx)					\
+	writeq((db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx), (db)->doorbell)
+
+static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		BNXT_DB_NQ_P5(db, idx);
+	else
+		BNXT_DB_CQ(db, idx);
+}
+
+static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		BNXT_DB_NQ_ARM_P5(db, idx);
+	else
+		BNXT_DB_CQ_ARM(db, idx);
+}
+
+static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		writeq(db->db_key64 | DBR_TYPE_CQ_ARMALL | RING_CMP(idx),
+		       db->doorbell);
+	else
+		BNXT_DB_CQ(db, idx);
+}
+
 const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_512_AND_SMALLER,
 	TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -341,6 +401,7 @@
 		struct tx_push_buffer *tx_push_buf = txr->tx_push;
 		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
 		struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
+		void __iomem *db = txr->tx_db.doorbell;
 		void *pdata = tx_push_buf->data;
 		u64 *end;
 		int j, push_len;
@@ -398,12 +459,11 @@
 
 		push_len = (length + sizeof(*tx_push) + 7) / 8;
 		if (push_len > 16) {
-			__iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
-			__iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+			__iowrite64_copy(db, tx_push_buf, 16);
+			__iowrite32_copy(db + 4, tx_push_buf + 1,
 					 (push_len - 16) << 1);
 		} else {
-			__iowrite64_copy(txr->tx_doorbell, tx_push_buf,
-					 push_len);
+			__iowrite64_copy(db, tx_push_buf, push_len);
 		}
 
 		goto tx_done;
@@ -463,6 +523,12 @@
 	}
 
 	length >>= 9;
+	if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) {
+		dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n",
+				     skb->len);
+		i = 0;
+		goto tx_dma_error;
+	}
 	flags |= bnxt_lhint_arr[length];
 	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
 
@@ -504,16 +570,14 @@
 	prod = NEXT_TX(prod);
 	txr->tx_prod = prod;
 
-	if (!skb->xmit_more || netif_xmit_stopped(txq))
-		bnxt_db_write(bp, txr->tx_doorbell, DB_KEY_TX | prod);
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
+		bnxt_db_write(bp, &txr->tx_db, prod);
 
 tx_done:
 
-	mmiowb();
-
 	if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) {
-		if (skb->xmit_more && !tx_buf->is_push)
-			bnxt_db_write(bp, txr->tx_doorbell, DB_KEY_TX | prod);
+		if (netdev_xmit_more() && !tx_buf->is_push)
+			bnxt_db_write(bp, &txr->tx_db, prod);
 
 		netif_tx_stop_queue(txq);
 
@@ -619,19 +683,20 @@
 }
 
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+					 struct bnxt_rx_ring_info *rxr,
 					 gfp_t gfp)
 {
 	struct device *dev = &bp->pdev->dev;
 	struct page *page;
 
-	page = alloc_page(gfp);
+	page = page_pool_dev_alloc_pages(rxr->page_pool);
 	if (!page)
 		return NULL;
 
 	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
 				      DMA_ATTR_WEAK_ORDERING);
 	if (dma_mapping_error(dev, *mapping)) {
-		__free_page(page);
+		page_pool_recycle_direct(rxr->page_pool, page);
 		return NULL;
 	}
 	*mapping += bp->rx_dma_offset;
@@ -667,7 +732,8 @@
 	dma_addr_t mapping;
 
 	if (BNXT_RX_PAGE_MODE(bp)) {
-		struct page *page = __bnxt_alloc_rx_page(bp, &mapping, gfp);
+		struct page *page =
+			__bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
 
 		if (!page)
 			return -ENOMEM;
@@ -776,16 +842,41 @@
 	return 0;
 }
 
-static void bnxt_reuse_rx_agg_bufs(struct bnxt_napi *bnapi, u16 cp_cons,
-				   u32 agg_bufs)
+static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp,
+				       struct bnxt_cp_ring_info *cpr,
+				       u16 cp_cons, u16 curr)
 {
+	struct rx_agg_cmp *agg;
+
+	cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr));
+	agg = (struct rx_agg_cmp *)
+		&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+	return agg;
+}
+
+static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp,
+					      struct bnxt_rx_ring_info *rxr,
+					      u16 agg_id, u16 curr)
+{
+	struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id];
+
+	return &tpa_info->agg_arr[curr];
+}
+
+static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
+				   u16 start, u32 agg_bufs, bool tpa)
+{
+	struct bnxt_napi *bnapi = cpr->bnapi;
 	struct bnxt *bp = bnapi->bp;
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 	u16 prod = rxr->rx_agg_prod;
 	u16 sw_prod = rxr->rx_sw_agg_prod;
+	bool p5_tpa = false;
 	u32 i;
 
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+		p5_tpa = true;
+
 	for (i = 0; i < agg_bufs; i++) {
 		u16 cons;
 		struct rx_agg_cmp *agg;
@@ -793,8 +884,10 @@
 		struct rx_bd *prod_bd;
 		struct page *page;
 
-		agg = (struct rx_agg_cmp *)
-			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+		if (p5_tpa)
+			agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
+		else
+			agg = bnxt_get_agg(bp, cpr, idx, start + i);
 		cons = agg->rx_agg_cmp_opaque;
 		__clear_bit(cons, rxr->rx_agg_bmap);
 
@@ -822,7 +915,6 @@
 
 		prod = NEXT_RX_AGG(prod);
 		sw_prod = NEXT_RX_AGG(sw_prod);
-		cp_cons = NEXT_CMP(cp_cons);
 	}
 	rxr->rx_agg_prod = prod;
 	rxr->rx_sw_agg_prod = sw_prod;
@@ -836,7 +928,7 @@
 {
 	unsigned int payload = offset_and_len >> 16;
 	unsigned int len = offset_and_len & 0xffff;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	struct page *page = data;
 	u16 prod = rxr->rx_prod;
 	struct sk_buff *skb;
@@ -852,7 +944,7 @@
 			     DMA_ATTR_WEAK_ORDERING);
 
 	if (unlikely(!payload))
-		payload = eth_get_headlen(data_ptr, len);
+		payload = eth_get_headlen(bp->dev, data_ptr, len);
 
 	skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
 	if (!skb) {
@@ -867,7 +959,7 @@
 
 	frag = &skb_shinfo(skb)->frags[0];
 	skb_frag_size_sub(frag, payload);
-	frag->page_offset += payload;
+	skb_frag_off_add(frag, payload);
 	skb->data_len -= payload;
 	skb->tail += payload;
 
@@ -903,16 +995,21 @@
 	return skb;
 }
 
-static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_napi *bnapi,
-				     struct sk_buff *skb, u16 cp_cons,
-				     u32 agg_bufs)
+static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
+				     struct bnxt_cp_ring_info *cpr,
+				     struct sk_buff *skb, u16 idx,
+				     u32 agg_bufs, bool tpa)
 {
+	struct bnxt_napi *bnapi = cpr->bnapi;
 	struct pci_dev *pdev = bp->pdev;
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 	u16 prod = rxr->rx_agg_prod;
+	bool p5_tpa = false;
 	u32 i;
 
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+		p5_tpa = true;
+
 	for (i = 0; i < agg_bufs; i++) {
 		u16 cons, frag_len;
 		struct rx_agg_cmp *agg;
@@ -920,8 +1017,10 @@
 		struct page *page;
 		dma_addr_t mapping;
 
-		agg = (struct rx_agg_cmp *)
-			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+		if (p5_tpa)
+			agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i);
+		else
+			agg = bnxt_get_agg(bp, cpr, idx, i);
 		cons = agg->rx_agg_cmp_opaque;
 		frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) &
 			    RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
@@ -955,7 +1054,7 @@
 			 * allocated already.
 			 */
 			rxr->rx_agg_prod = prod;
-			bnxt_reuse_rx_agg_bufs(bnapi, cp_cons, agg_bufs - i);
+			bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa);
 			return NULL;
 		}
 
@@ -968,7 +1067,6 @@
 		skb->truesize += PAGE_SIZE;
 
 		prod = NEXT_RX_AGG(prod);
-		cp_cons = NEXT_CMP(cp_cons);
 	}
 	rxr->rx_agg_prod = prod;
 	return skb;
@@ -1012,10 +1110,9 @@
 	return skb;
 }
 
-static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_napi *bnapi,
+static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 			   u32 *raw_cons, void *cmp)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct rx_cmp *rxcmp = cmp;
 	u32 tmp_raw_cons = *raw_cons;
 	u8 cmp_type, agg_bufs = 0;
@@ -1029,9 +1126,10 @@
 	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
 		struct rx_tpa_end_cmp *tpa_end = cmp;
 
-		agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
-			    RX_TPA_END_CMP_AGG_BUFS) >>
-			   RX_TPA_END_CMP_AGG_BUFS_SHIFT;
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			return 0;
+
+		agg_bufs = TPA_END_AGG_BUFS(tpa_end);
 	}
 
 	if (agg_bufs) {
@@ -1042,6 +1140,14 @@
 	return 0;
 }
 
+static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
+{
+	if (BNXT_PF(bp))
+		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
+	else
+		schedule_delayed_work(&bp->fw_reset_task, delay);
+}
+
 static void bnxt_queue_sp_work(struct bnxt *bp)
 {
 	if (BNXT_PF(bp))
@@ -1068,24 +1174,60 @@
 	rxr->rx_next_cons = 0xffff;
 }
 
+static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
+{
+	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+	u16 idx = agg_id & MAX_TPA_P5_MASK;
+
+	if (test_bit(idx, map->agg_idx_bmap))
+		idx = find_first_zero_bit(map->agg_idx_bmap,
+					  BNXT_AGG_IDX_BMAP_SIZE);
+	__set_bit(idx, map->agg_idx_bmap);
+	map->agg_id_tbl[agg_id] = idx;
+	return idx;
+}
+
+static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
+{
+	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+
+	__clear_bit(idx, map->agg_idx_bmap);
+}
+
+static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
+{
+	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
+
+	return map->agg_id_tbl[agg_id];
+}
+
 static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 			   struct rx_tpa_start_cmp *tpa_start,
 			   struct rx_tpa_start_cmp_ext *tpa_start1)
 {
-	u8 agg_id = TPA_START_AGG_ID(tpa_start);
-	u16 cons, prod;
-	struct bnxt_tpa_info *tpa_info;
 	struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
+	struct bnxt_tpa_info *tpa_info;
+	u16 cons, prod, agg_id;
 	struct rx_bd *prod_bd;
 	dma_addr_t mapping;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		agg_id = TPA_START_AGG_ID_P5(tpa_start);
+		agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
+	} else {
+		agg_id = TPA_START_AGG_ID(tpa_start);
+	}
 	cons = tpa_start->rx_tpa_start_cmp_opaque;
 	prod = rxr->rx_prod;
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 	prod_rx_buf = &rxr->rx_buf_ring[prod];
 	tpa_info = &rxr->rx_tpa[agg_id];
 
-	if (unlikely(cons != rxr->rx_next_cons)) {
+	if (unlikely(cons != rxr->rx_next_cons ||
+		     TPA_START_ERROR(tpa_start))) {
+		netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
+			    cons, rxr->rx_next_cons,
+			    TPA_START_ERROR_CODE(tpa_start1));
 		bnxt_sched_reset(bp, rxr);
 		return;
 	}
@@ -1130,6 +1272,7 @@
 	tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
 	tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
 	tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info);
+	tpa_info->agg_count = 0;
 
 	rxr->rx_prod = NEXT_RX(prod);
 	cons = NEXT_RX(cons);
@@ -1141,13 +1284,37 @@
 	cons_rx_buf->data = NULL;
 }
 
-static void bnxt_abort_tpa(struct bnxt *bp, struct bnxt_napi *bnapi,
-			   u16 cp_cons, u32 agg_bufs)
+static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs)
 {
 	if (agg_bufs)
-		bnxt_reuse_rx_agg_bufs(bnapi, cp_cons, agg_bufs);
+		bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true);
 }
 
+#ifdef CONFIG_INET
+static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto)
+{
+	struct udphdr *uh = NULL;
+
+	if (ip_proto == htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)skb->data;
+
+		if (iph->protocol == IPPROTO_UDP)
+			uh = (struct udphdr *)(iph + 1);
+	} else {
+		struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+		if (iph->nexthdr == IPPROTO_UDP)
+			uh = (struct udphdr *)(iph + 1);
+	}
+	if (uh) {
+		if (uh->check)
+			skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+		else
+			skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+	}
+}
+#endif
+
 static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info,
 					   int payload_off, int tcp_ts,
 					   struct sk_buff *skb)
@@ -1205,28 +1372,39 @@
 	}
 
 	if (inner_mac_off) { /* tunnel */
-		struct udphdr *uh = NULL;
 		__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
 					    ETH_HLEN - 2));
 
-		if (proto == htons(ETH_P_IP)) {
-			struct iphdr *iph = (struct iphdr *)skb->data;
+		bnxt_gro_tunnel(skb, proto);
+	}
+#endif
+	return skb;
+}
 
-			if (iph->protocol == IPPROTO_UDP)
-				uh = (struct udphdr *)(iph + 1);
-		} else {
-			struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info,
+					   int payload_off, int tcp_ts,
+					   struct sk_buff *skb)
+{
+#ifdef CONFIG_INET
+	u16 outer_ip_off, inner_ip_off, inner_mac_off;
+	u32 hdr_info = tpa_info->hdr_info;
+	int iphdr_len, nw_off;
 
-			if (iph->nexthdr == IPPROTO_UDP)
-				uh = (struct udphdr *)(iph + 1);
-		}
-		if (uh) {
-			if (uh->check)
-				skb_shinfo(skb)->gso_type |=
-					SKB_GSO_UDP_TUNNEL_CSUM;
-			else
-				skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-		}
+	inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info);
+	inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info);
+	outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info);
+
+	nw_off = inner_ip_off - ETH_HLEN;
+	skb_set_network_header(skb, nw_off);
+	iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ?
+		     sizeof(struct ipv6hdr) : sizeof(struct iphdr);
+	skb_set_transport_header(skb, nw_off + iphdr_len);
+
+	if (inner_mac_off) { /* tunnel */
+		__be16 proto = *((__be16 *)(skb->data + outer_ip_off -
+					    ETH_HLEN - 2));
+
+		bnxt_gro_tunnel(skb, proto);
 	}
 #endif
 	return skb;
@@ -1273,28 +1451,8 @@
 		return NULL;
 	}
 
-	if (nw_off) { /* tunnel */
-		struct udphdr *uh = NULL;
-
-		if (skb->protocol == htons(ETH_P_IP)) {
-			struct iphdr *iph = (struct iphdr *)skb->data;
-
-			if (iph->protocol == IPPROTO_UDP)
-				uh = (struct udphdr *)(iph + 1);
-		} else {
-			struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
-
-			if (iph->nexthdr == IPPROTO_UDP)
-				uh = (struct udphdr *)(iph + 1);
-		}
-		if (uh) {
-			if (uh->check)
-				skb_shinfo(skb)->gso_type |=
-					SKB_GSO_UDP_TUNNEL_CSUM;
-			else
-				skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-		}
-	}
+	if (nw_off) /* tunnel */
+		bnxt_gro_tunnel(skb, skb->protocol);
 #endif
 	return skb;
 }
@@ -1317,9 +1475,10 @@
 	skb_shinfo(skb)->gso_size =
 		le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
 	skb_shinfo(skb)->gso_type = tpa_info->gso_type;
-	payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
-		       RX_TPA_END_CMP_PAYLOAD_OFFSET) >>
-		      RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT;
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
+	else
+		payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
 	skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb);
 	if (likely(skb))
 		tcp_gro_complete(skb);
@@ -1339,51 +1498,68 @@
 }
 
 static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
-					   struct bnxt_napi *bnapi,
+					   struct bnxt_cp_ring_info *cpr,
 					   u32 *raw_cons,
 					   struct rx_tpa_end_cmp *tpa_end,
 					   struct rx_tpa_end_cmp_ext *tpa_end1,
 					   u8 *event)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct bnxt_napi *bnapi = cpr->bnapi;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
-	u8 agg_id = TPA_END_AGG_ID(tpa_end);
 	u8 *data_ptr, agg_bufs;
-	u16 cp_cons = RING_CMP(*raw_cons);
 	unsigned int len;
 	struct bnxt_tpa_info *tpa_info;
 	dma_addr_t mapping;
 	struct sk_buff *skb;
+	u16 idx = 0, agg_id;
 	void *data;
+	bool gro;
 
 	if (unlikely(bnapi->in_reset)) {
-		int rc = bnxt_discard_rx(bp, bnapi, raw_cons, tpa_end);
+		int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end);
 
 		if (rc < 0)
 			return ERR_PTR(-EBUSY);
 		return NULL;
 	}
 
-	tpa_info = &rxr->rx_tpa[agg_id];
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		agg_id = TPA_END_AGG_ID_P5(tpa_end);
+		agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
+		agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
+		tpa_info = &rxr->rx_tpa[agg_id];
+		if (unlikely(agg_bufs != tpa_info->agg_count)) {
+			netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n",
+				    agg_bufs, tpa_info->agg_count);
+			agg_bufs = tpa_info->agg_count;
+		}
+		tpa_info->agg_count = 0;
+		*event |= BNXT_AGG_EVENT;
+		bnxt_free_agg_idx(rxr, agg_id);
+		idx = agg_id;
+		gro = !!(bp->flags & BNXT_FLAG_GRO);
+	} else {
+		agg_id = TPA_END_AGG_ID(tpa_end);
+		agg_bufs = TPA_END_AGG_BUFS(tpa_end);
+		tpa_info = &rxr->rx_tpa[agg_id];
+		idx = RING_CMP(*raw_cons);
+		if (agg_bufs) {
+			if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
+				return ERR_PTR(-EBUSY);
+
+			*event |= BNXT_AGG_EVENT;
+			idx = NEXT_CMP(idx);
+		}
+		gro = !!TPA_END_GRO(tpa_end);
+	}
 	data = tpa_info->data;
 	data_ptr = tpa_info->data_ptr;
 	prefetch(data_ptr);
 	len = tpa_info->len;
 	mapping = tpa_info->mapping;
 
-	agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
-		    RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT;
-
-	if (agg_bufs) {
-		if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
-			return ERR_PTR(-EBUSY);
-
-		*event |= BNXT_AGG_EVENT;
-		cp_cons = NEXT_CMP(cp_cons);
-	}
-
 	if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
-		bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
+		bnxt_abort_tpa(cpr, idx, agg_bufs);
 		if (agg_bufs > MAX_SKB_FRAGS)
 			netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
 				    agg_bufs, (int)MAX_SKB_FRAGS);
@@ -1393,7 +1569,7 @@
 	if (len <= bp->rx_copy_thresh) {
 		skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
 		if (!skb) {
-			bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
+			bnxt_abort_tpa(cpr, idx, agg_bufs);
 			return NULL;
 		}
 	} else {
@@ -1402,7 +1578,7 @@
 
 		new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
 		if (!new_data) {
-			bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
+			bnxt_abort_tpa(cpr, idx, agg_bufs);
 			return NULL;
 		}
 
@@ -1417,7 +1593,7 @@
 
 		if (!skb) {
 			kfree(data);
-			bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
+			bnxt_abort_tpa(cpr, idx, agg_bufs);
 			return NULL;
 		}
 		skb_reserve(skb, bp->rx_offset);
@@ -1425,7 +1601,7 @@
 	}
 
 	if (agg_bufs) {
-		skb = bnxt_rx_pages(bp, bnapi, skb, cp_cons, agg_bufs);
+		skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
 		if (!skb) {
 			/* Page reuse already handled by bnxt_rx_pages(). */
 			return NULL;
@@ -1454,12 +1630,24 @@
 			(tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3;
 	}
 
-	if (TPA_END_GRO(tpa_end))
+	if (gro)
 		skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
 
 	return skb;
 }
 
+static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+			 struct rx_agg_cmp *rx_agg)
+{
+	u16 agg_id = TPA_AGG_AGG_ID(rx_agg);
+	struct bnxt_tpa_info *tpa_info;
+
+	agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
+	tpa_info = &rxr->rx_tpa[agg_id];
+	BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS);
+	tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg;
+}
+
 static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
 			     struct sk_buff *skb)
 {
@@ -1479,10 +1667,10 @@
  * -ENOMEM - packet aborted due to out of memory
  * -EIO    - packet aborted due to hw error indicated in BD
  */
-static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
-		       u8 *event)
+static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+		       u32 *raw_cons, u8 *event)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct bnxt_napi *bnapi = cpr->bnapi;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 	struct net_device *dev = bp->dev;
 	struct rx_cmp *rxcmp;
@@ -1501,6 +1689,13 @@
 	rxcmp = (struct rx_cmp *)
 			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
 
+	cmp_type = RX_CMP_TYPE(rxcmp);
+
+	if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) {
+		bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp);
+		goto next_rx_no_prod_no_len;
+	}
+
 	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
 	cp_cons = RING_CMP(tmp_raw_cons);
 	rxcmp1 = (struct rx_cmp_ext *)
@@ -1509,8 +1704,6 @@
 	if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
 		return -EBUSY;
 
-	cmp_type = RX_CMP_TYPE(rxcmp);
-
 	prod = rxr->rx_prod;
 
 	if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) {
@@ -1521,7 +1714,7 @@
 		goto next_rx_no_prod_no_len;
 
 	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
-		skb = bnxt_tpa_end(bp, bnapi, &tmp_raw_cons,
+		skb = bnxt_tpa_end(bp, cpr, &tmp_raw_cons,
 				   (struct rx_tpa_end_cmp *)rxcmp,
 				   (struct rx_tpa_end_cmp_ext *)rxcmp1, event);
 
@@ -1538,15 +1731,17 @@
 	}
 
 	cons = rxcmp->rx_cmp_opaque;
-	rx_buf = &rxr->rx_buf_ring[cons];
-	data = rx_buf->data;
-	data_ptr = rx_buf->data_ptr;
 	if (unlikely(cons != rxr->rx_next_cons)) {
-		int rc1 = bnxt_discard_rx(bp, bnapi, raw_cons, rxcmp);
+		int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
 
+		netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+			    cons, rxr->rx_next_cons);
 		bnxt_sched_reset(bp, rxr);
 		return rc1;
 	}
+	rx_buf = &rxr->rx_buf_ring[cons];
+	data = rx_buf->data;
+	data_ptr = rx_buf->data_ptr;
 	prefetch(data_ptr);
 
 	misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
@@ -1563,12 +1758,19 @@
 
 	rx_buf->data = NULL;
 	if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
+		u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2);
+
 		bnxt_reuse_rx_data(rxr, cons, data);
 		if (agg_bufs)
-			bnxt_reuse_rx_agg_bufs(bnapi, cp_cons, agg_bufs);
+			bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs,
+					       false);
 
 		rc = -EIO;
-		goto next_rx;
+		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
+			netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
+			bnxt_sched_reset(bp, rxr);
+		}
+		goto next_rx_no_len;
 	}
 
 	len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
@@ -1583,6 +1785,9 @@
 		skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr);
 		bnxt_reuse_rx_data(rxr, cons, data);
 		if (!skb) {
+			if (agg_bufs)
+				bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
+						       agg_bufs, false);
 			rc = -ENOMEM;
 			goto next_rx;
 		}
@@ -1602,7 +1807,7 @@
 	}
 
 	if (agg_bufs) {
-		skb = bnxt_rx_pages(bp, bnapi, skb, cp_cons, agg_bufs);
+		skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
 		if (!skb) {
 			rc = -ENOMEM;
 			goto next_rx;
@@ -1641,7 +1846,7 @@
 	} else {
 		if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L4_CS_ERR_BITS) {
 			if (dev->features & NETIF_F_RXCSUM)
-				cpr->rx_l4_csum_errors++;
+				bnapi->cp_ring.rx_l4_csum_errors++;
 		}
 	}
 
@@ -1649,12 +1854,13 @@
 	rc = 1;
 
 next_rx:
-	rxr->rx_prod = NEXT_RX(prod);
-	rxr->rx_next_cons = NEXT_RX(cons);
-
 	cpr->rx_packets += 1;
 	cpr->rx_bytes += len;
 
+next_rx_no_len:
+	rxr->rx_prod = NEXT_RX(prod);
+	rxr->rx_next_cons = NEXT_RX(cons);
+
 next_rx_no_prod_no_len:
 	*raw_cons = tmp_raw_cons;
 
@@ -1664,10 +1870,10 @@
 /* In netpoll mode, if we are using a combined completion ring, we need to
  * discard the rx packets and recycle the buffers.
  */
-static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+static int bnxt_force_rx_discard(struct bnxt *bp,
+				 struct bnxt_cp_ring_info *cpr,
 				 u32 *raw_cons, u8 *event)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	u32 tmp_raw_cons = *raw_cons;
 	struct rx_cmp_ext *rxcmp1;
 	struct rx_cmp *rxcmp;
@@ -1697,7 +1903,34 @@
 		tpa_end1->rx_tpa_end_cmp_errors_v2 |=
 			cpu_to_le32(RX_TPA_END_CMP_ERRORS);
 	}
-	return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+	return bnxt_rx_pkt(bp, cpr, raw_cons, event);
+}
+
+u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 reg = fw_health->regs[reg_idx];
+	u32 reg_type, reg_off, val = 0;
+
+	reg_type = BNXT_FW_HEALTH_REG_TYPE(reg);
+	reg_off = BNXT_FW_HEALTH_REG_OFF(reg);
+	switch (reg_type) {
+	case BNXT_FW_HEALTH_REG_TYPE_CFG:
+		pci_read_config_dword(bp->pdev, reg_off, &val);
+		break;
+	case BNXT_FW_HEALTH_REG_TYPE_GRC:
+		reg_off = fw_health->mapped_regs[reg_idx];
+		/* fall through */
+	case BNXT_FW_HEALTH_REG_TYPE_BAR0:
+		val = readl(bp->bar0 + reg_off);
+		break;
+	case BNXT_FW_HEALTH_REG_TYPE_BAR1:
+		val = readl(bp->bar1 + reg_off);
+		break;
+	}
+	if (reg_idx == BNXT_FW_RESET_INPROG_REG)
+		val &= fw_health->fw_reset_inprog_reg_mask;
+	return val;
 }
 
 #define BNXT_GET_EVENT_PORT(data)	\
@@ -1755,6 +1988,55 @@
 			goto async_event_process_exit;
 		set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
 		break;
+	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
+		u32 data1 = le32_to_cpu(cmpl->event_data1);
+
+		bp->fw_reset_timestamp = jiffies;
+		bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
+		if (!bp->fw_reset_min_dsecs)
+			bp->fw_reset_min_dsecs = BNXT_DFLT_FW_RST_MIN_DSECS;
+		bp->fw_reset_max_dsecs = le16_to_cpu(cmpl->timestamp_hi);
+		if (!bp->fw_reset_max_dsecs)
+			bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS;
+		if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
+			netdev_warn(bp->dev, "Firmware fatal reset event received\n");
+			set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
+		} else {
+			netdev_warn(bp->dev, "Firmware non-fatal reset event received, max wait time %d msec\n",
+				    bp->fw_reset_max_dsecs * 100);
+		}
+		set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event);
+		break;
+	}
+	case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: {
+		struct bnxt_fw_health *fw_health = bp->fw_health;
+		u32 data1 = le32_to_cpu(cmpl->event_data1);
+
+		if (!fw_health)
+			goto async_event_process_exit;
+
+		fw_health->enabled = EVENT_DATA1_RECOVERY_ENABLED(data1);
+		fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
+		if (!fw_health->enabled)
+			break;
+
+		if (netif_msg_drv(bp))
+			netdev_info(bp->dev, "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n",
+				    fw_health->enabled, fw_health->master,
+				    bnxt_fw_health_readl(bp,
+							 BNXT_FW_RESET_CNT_REG),
+				    bnxt_fw_health_readl(bp,
+							 BNXT_FW_HEALTH_REG));
+		fw_health->tmr_multiplier =
+			DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
+				     bp->current_interval * 10);
+		fw_health->tmr_counter = fw_health->tmr_multiplier;
+		fw_health->last_fw_heartbeat =
+			bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
+		fw_health->last_fw_reset_cnt =
+			bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+		goto async_event_process_exit;
+	}
 	default:
 		goto async_event_process_exit;
 	}
@@ -1775,7 +2057,7 @@
 	case CMPL_BASE_TYPE_HWRM_DONE:
 		seq_id = le16_to_cpu(h_cmpl->sequence_id);
 		if (seq_id == bp->hwrm_intr_seq_id)
-			bp->hwrm_intr_seq_id = HWRM_SEQ_ID_INVALID;
+			bp->hwrm_intr_seq_id = (u16)~bp->hwrm_intr_seq_id;
 		else
 			netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
 		break;
@@ -1848,7 +2130,7 @@
 	}
 
 	/* disable ring IRQ */
-	BNXT_CP_DB_IRQ_DIS(cpr->cp_doorbell);
+	BNXT_CP_DB_IRQ_DIS(cpr->cp_db.doorbell);
 
 	/* Return here if interrupt is shared and is disabled. */
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
@@ -1858,9 +2140,10 @@
 	return IRQ_HANDLED;
 }
 
-static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+			    int budget)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct bnxt_napi *bnapi = cpr->bnapi;
 	u32 raw_cons = cpr->cp_raw_cons;
 	u32 cons;
 	int tx_pkts = 0;
@@ -1868,6 +2151,7 @@
 	u8 event = 0;
 	struct tx_cmp *txcmp;
 
+	cpr->has_more_work = 0;
 	while (1) {
 		int rc;
 
@@ -1881,19 +2165,22 @@
 		 * reading any further.
 		 */
 		dma_rmb();
+		cpr->had_work_done = 1;
 		if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
 			tx_pkts++;
 			/* return full budget so NAPI will complete. */
 			if (unlikely(tx_pkts > bp->tx_wake_thresh)) {
 				rx_pkts = budget;
 				raw_cons = NEXT_RAW_CMP(raw_cons);
+				if (budget)
+					cpr->has_more_work = 1;
 				break;
 			}
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
 			if (likely(budget))
-				rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+				rc = bnxt_rx_pkt(bp, cpr, &raw_cons, &event);
 			else
-				rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+				rc = bnxt_force_rx_discard(bp, cpr, &raw_cons,
 							   &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
@@ -1916,39 +2203,63 @@
 		}
 		raw_cons = NEXT_RAW_CMP(raw_cons);
 
-		if (rx_pkts && rx_pkts == budget)
+		if (rx_pkts && rx_pkts == budget) {
+			cpr->has_more_work = 1;
 			break;
+		}
 	}
 
+	if (event & BNXT_REDIRECT_EVENT)
+		xdp_do_flush_map();
+
 	if (event & BNXT_TX_EVENT) {
 		struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
-		void __iomem *db = txr->tx_doorbell;
 		u16 prod = txr->tx_prod;
 
 		/* Sync BD data before updating doorbell */
 		wmb();
 
-		bnxt_db_write_relaxed(bp, db, DB_KEY_TX | prod);
+		bnxt_db_write_relaxed(bp, &txr->tx_db, prod);
 	}
 
 	cpr->cp_raw_cons = raw_cons;
+	bnapi->tx_pkts += tx_pkts;
+	bnapi->events |= event;
+	return rx_pkts;
+}
+
+static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
+{
+	if (bnapi->tx_pkts) {
+		bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
+		bnapi->tx_pkts = 0;
+	}
+
+	if (bnapi->events & BNXT_RX_EVENT) {
+		struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+
+		if (bnapi->events & BNXT_AGG_EVENT)
+			bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+		bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+	}
+	bnapi->events = 0;
+}
+
+static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+			  int budget)
+{
+	struct bnxt_napi *bnapi = cpr->bnapi;
+	int rx_pkts;
+
+	rx_pkts = __bnxt_poll_work(bp, cpr, budget);
+
 	/* ACK completion ring before freeing tx ring and producing new
 	 * buffers in rx/agg rings to prevent overflowing the completion
 	 * ring.
 	 */
-	BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+	bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons);
 
-	if (tx_pkts)
-		bnapi->tx_int(bp, bnapi, tx_pkts);
-
-	if (event & BNXT_RX_EVENT) {
-		struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
-
-		bnxt_db_write(bp, rxr->rx_doorbell, DB_KEY_RX | rxr->rx_prod);
-		if (event & BNXT_AGG_EVENT)
-			bnxt_db_write(bp, rxr->rx_agg_doorbell,
-				      DB_KEY_RX | rxr->rx_agg_prod);
-	}
+	__bnxt_poll_work_done(bp, bnapi);
 	return rx_pkts;
 }
 
@@ -1987,7 +2298,7 @@
 			rxcmp1->rx_cmp_cfa_code_errors_v2 |=
 				cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
 
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			rc = bnxt_rx_pkt(bp, cpr, &raw_cons, &event);
 			if (likely(rc == -EIO) && budget)
 				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
@@ -2006,16 +2317,15 @@
 	}
 
 	cpr->cp_raw_cons = raw_cons;
-	BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
-	bnxt_db_write(bp, rxr->rx_doorbell, DB_KEY_RX | rxr->rx_prod);
+	BNXT_DB_CQ(&cpr->cp_db, cpr->cp_raw_cons);
+	bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
 
 	if (event & BNXT_AGG_EVENT)
-		bnxt_db_write(bp, rxr->rx_agg_doorbell,
-			      DB_KEY_RX | rxr->rx_agg_prod);
+		bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
 
 	if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) {
 		napi_complete_done(napi, rx_pkts);
-		BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
+		BNXT_DB_CQ_ARM(&cpr->cp_db, cpr->cp_raw_cons);
 	}
 	return rx_pkts;
 }
@@ -2028,32 +2338,127 @@
 	int work_done = 0;
 
 	while (1) {
-		work_done += bnxt_poll_work(bp, bnapi, budget - work_done);
+		work_done += bnxt_poll_work(bp, cpr, budget - work_done);
 
 		if (work_done >= budget) {
 			if (!budget)
-				BNXT_CP_DB_REARM(cpr->cp_doorbell,
-						 cpr->cp_raw_cons);
+				BNXT_DB_CQ_ARM(&cpr->cp_db, cpr->cp_raw_cons);
 			break;
 		}
 
 		if (!bnxt_has_work(bp, cpr)) {
 			if (napi_complete_done(napi, work_done))
-				BNXT_CP_DB_REARM(cpr->cp_doorbell,
-						 cpr->cp_raw_cons);
+				BNXT_DB_CQ_ARM(&cpr->cp_db, cpr->cp_raw_cons);
 			break;
 		}
 	}
 	if (bp->flags & BNXT_FLAG_DIM) {
-		struct net_dim_sample dim_sample;
+		struct dim_sample dim_sample = {};
 
-		net_dim_sample(cpr->event_ctr,
-			       cpr->rx_packets,
-			       cpr->rx_bytes,
-			       &dim_sample);
+		dim_update_sample(cpr->event_ctr,
+				  cpr->rx_packets,
+				  cpr->rx_bytes,
+				  &dim_sample);
 		net_dim(&cpr->dim, dim_sample);
 	}
-	mmiowb();
+	return work_done;
+}
+
+static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	int i, work_done = 0;
+
+	for (i = 0; i < 2; i++) {
+		struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+
+		if (cpr2) {
+			work_done += __bnxt_poll_work(bp, cpr2,
+						      budget - work_done);
+			cpr->has_more_work |= cpr2->has_more_work;
+		}
+	}
+	return work_done;
+}
+
+static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
+				 u64 dbr_type, bool all)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+		struct bnxt_db_info *db;
+
+		if (cpr2 && (all || cpr2->had_work_done)) {
+			db = &cpr2->cp_db;
+			writeq(db->db_key64 | dbr_type |
+			       RING_CMP(cpr2->cp_raw_cons), db->doorbell);
+			cpr2->had_work_done = 0;
+		}
+	}
+	__bnxt_poll_work_done(bp, bnapi);
+}
+
+static int bnxt_poll_p5(struct napi_struct *napi, int budget)
+{
+	struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi);
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	u32 raw_cons = cpr->cp_raw_cons;
+	struct bnxt *bp = bnapi->bp;
+	struct nqe_cn *nqcmp;
+	int work_done = 0;
+	u32 cons;
+
+	if (cpr->has_more_work) {
+		cpr->has_more_work = 0;
+		work_done = __bnxt_poll_cqs(bp, bnapi, budget);
+		if (cpr->has_more_work) {
+			__bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, false);
+			return work_done;
+		}
+		__bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL, true);
+		if (napi_complete_done(napi, work_done))
+			BNXT_DB_NQ_ARM_P5(&cpr->cp_db, cpr->cp_raw_cons);
+		return work_done;
+	}
+	while (1) {
+		cons = RING_CMP(raw_cons);
+		nqcmp = &cpr->nq_desc_ring[CP_RING(cons)][CP_IDX(cons)];
+
+		if (!NQ_CMP_VALID(nqcmp, raw_cons)) {
+			__bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL,
+					     false);
+			cpr->cp_raw_cons = raw_cons;
+			if (napi_complete_done(napi, work_done))
+				BNXT_DB_NQ_ARM_P5(&cpr->cp_db,
+						  cpr->cp_raw_cons);
+			return work_done;
+		}
+
+		/* The valid test of the entry must be done first before
+		 * reading any further.
+		 */
+		dma_rmb();
+
+		if (nqcmp->type == cpu_to_le16(NQ_CN_TYPE_CQ_NOTIFICATION)) {
+			u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
+			struct bnxt_cp_ring_info *cpr2;
+
+			cpr2 = cpr->cp_ring_arr[idx];
+			work_done += __bnxt_poll_work(bp, cpr2,
+						      budget - work_done);
+			cpr->has_more_work = cpr2->has_more_work;
+		} else {
+			bnxt_hwrm_handler(bp, (struct tx_cmp *)nqcmp);
+		}
+		raw_cons = NEXT_RAW_CMP(raw_cons);
+		if (cpr->has_more_work)
+			break;
+	}
+	__bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, true);
+	cpr->cp_raw_cons = raw_cons;
 	return work_done;
 }
 
@@ -2072,9 +2477,23 @@
 
 		for (j = 0; j < max_idx;) {
 			struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j];
-			struct sk_buff *skb = tx_buf->skb;
+			struct sk_buff *skb;
 			int k, last;
 
+			if (i < bp->tx_nr_rings_xdp &&
+			    tx_buf->action == XDP_REDIRECT) {
+				dma_unmap_single(&pdev->dev,
+					dma_unmap_addr(tx_buf, mapping),
+					dma_unmap_len(tx_buf, len),
+					PCI_DMA_TODEVICE);
+				xdp_return_frame(tx_buf->xdpf);
+				tx_buf->action = 0;
+				tx_buf->xdpf = NULL;
+				j++;
+				continue;
+			}
+
+			skb = tx_buf->skb;
 			if (!skb) {
 				j++;
 				continue;
@@ -2123,10 +2542,11 @@
 	max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT;
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+		struct bnxt_tpa_idx_map *map;
 		int j;
 
 		if (rxr->rx_tpa) {
-			for (j = 0; j < MAX_TPA; j++) {
+			for (j = 0; j < bp->max_tpa; j++) {
 				struct bnxt_tpa_info *tpa_info =
 							&rxr->rx_tpa[j];
 				u8 *data = tpa_info->data;
@@ -2161,7 +2581,7 @@
 				dma_unmap_page_attrs(&pdev->dev, mapping,
 						     PAGE_SIZE, bp->rx_dir,
 						     DMA_ATTR_WEAK_ORDERING);
-				__free_page(data);
+				page_pool_recycle_direct(rxr->page_pool, data);
 			} else {
 				dma_unmap_single_attrs(&pdev->dev, mapping,
 						       bp->rx_buf_use_size,
@@ -2193,6 +2613,9 @@
 			__free_page(rxr->rx_page);
 			rxr->rx_page = NULL;
 		}
+		map = rxr->rx_tpa_idx_map;
+		if (map)
+			memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap));
 	}
 }
 
@@ -2202,60 +2625,135 @@
 	bnxt_free_rx_skbs(bp);
 }
 
-static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_struct *ring)
+static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 {
 	struct pci_dev *pdev = bp->pdev;
 	int i;
 
-	for (i = 0; i < ring->nr_pages; i++) {
-		if (!ring->pg_arr[i])
+	for (i = 0; i < rmem->nr_pages; i++) {
+		if (!rmem->pg_arr[i])
 			continue;
 
-		dma_free_coherent(&pdev->dev, ring->page_size,
-				  ring->pg_arr[i], ring->dma_arr[i]);
+		dma_free_coherent(&pdev->dev, rmem->page_size,
+				  rmem->pg_arr[i], rmem->dma_arr[i]);
 
-		ring->pg_arr[i] = NULL;
+		rmem->pg_arr[i] = NULL;
 	}
-	if (ring->pg_tbl) {
-		dma_free_coherent(&pdev->dev, ring->nr_pages * 8,
-				  ring->pg_tbl, ring->pg_tbl_map);
-		ring->pg_tbl = NULL;
+	if (rmem->pg_tbl) {
+		size_t pg_tbl_size = rmem->nr_pages * 8;
+
+		if (rmem->flags & BNXT_RMEM_USE_FULL_PAGE_FLAG)
+			pg_tbl_size = rmem->page_size;
+		dma_free_coherent(&pdev->dev, pg_tbl_size,
+				  rmem->pg_tbl, rmem->pg_tbl_map);
+		rmem->pg_tbl = NULL;
 	}
-	if (ring->vmem_size && *ring->vmem) {
-		vfree(*ring->vmem);
-		*ring->vmem = NULL;
+	if (rmem->vmem_size && *rmem->vmem) {
+		vfree(*rmem->vmem);
+		*rmem->vmem = NULL;
 	}
 }
 
-static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_struct *ring)
+static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
+{
+	struct pci_dev *pdev = bp->pdev;
+	u64 valid_bit = 0;
+	int i;
+
+	if (rmem->flags & (BNXT_RMEM_VALID_PTE_FLAG | BNXT_RMEM_RING_PTE_FLAG))
+		valid_bit = PTU_PTE_VALID;
+	if ((rmem->nr_pages > 1 || rmem->depth > 0) && !rmem->pg_tbl) {
+		size_t pg_tbl_size = rmem->nr_pages * 8;
+
+		if (rmem->flags & BNXT_RMEM_USE_FULL_PAGE_FLAG)
+			pg_tbl_size = rmem->page_size;
+		rmem->pg_tbl = dma_alloc_coherent(&pdev->dev, pg_tbl_size,
+						  &rmem->pg_tbl_map,
+						  GFP_KERNEL);
+		if (!rmem->pg_tbl)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < rmem->nr_pages; i++) {
+		u64 extra_bits = valid_bit;
+
+		rmem->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+						     rmem->page_size,
+						     &rmem->dma_arr[i],
+						     GFP_KERNEL);
+		if (!rmem->pg_arr[i])
+			return -ENOMEM;
+
+		if (rmem->nr_pages > 1 || rmem->depth > 0) {
+			if (i == rmem->nr_pages - 2 &&
+			    (rmem->flags & BNXT_RMEM_RING_PTE_FLAG))
+				extra_bits |= PTU_PTE_NEXT_TO_LAST;
+			else if (i == rmem->nr_pages - 1 &&
+				 (rmem->flags & BNXT_RMEM_RING_PTE_FLAG))
+				extra_bits |= PTU_PTE_LAST;
+			rmem->pg_tbl[i] =
+				cpu_to_le64(rmem->dma_arr[i] | extra_bits);
+		}
+	}
+
+	if (rmem->vmem_size) {
+		*rmem->vmem = vzalloc(rmem->vmem_size);
+		if (!(*rmem->vmem))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void bnxt_free_tpa_info(struct bnxt *bp)
 {
 	int i;
-	struct pci_dev *pdev = bp->pdev;
 
-	if (ring->nr_pages > 1) {
-		ring->pg_tbl = dma_alloc_coherent(&pdev->dev,
-						  ring->nr_pages * 8,
-						  &ring->pg_tbl_map,
-						  GFP_KERNEL);
-		if (!ring->pg_tbl)
-			return -ENOMEM;
+	for (i = 0; i < bp->rx_nr_rings; i++) {
+		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+
+		kfree(rxr->rx_tpa_idx_map);
+		rxr->rx_tpa_idx_map = NULL;
+		if (rxr->rx_tpa) {
+			kfree(rxr->rx_tpa[0].agg_arr);
+			rxr->rx_tpa[0].agg_arr = NULL;
+		}
+		kfree(rxr->rx_tpa);
+		rxr->rx_tpa = NULL;
+	}
+}
+
+static int bnxt_alloc_tpa_info(struct bnxt *bp)
+{
+	int i, j, total_aggs = 0;
+
+	bp->max_tpa = MAX_TPA;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (!bp->max_tpa_v2)
+			return 0;
+		bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+		total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
 	}
 
-	for (i = 0; i < ring->nr_pages; i++) {
-		ring->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
-						     ring->page_size,
-						     &ring->dma_arr[i],
-						     GFP_KERNEL);
-		if (!ring->pg_arr[i])
+	for (i = 0; i < bp->rx_nr_rings; i++) {
+		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+		struct rx_agg_cmp *agg;
+
+		rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
+				      GFP_KERNEL);
+		if (!rxr->rx_tpa)
 			return -ENOMEM;
 
-		if (ring->nr_pages > 1)
-			ring->pg_tbl[i] = cpu_to_le64(ring->dma_arr[i]);
-	}
-
-	if (ring->vmem_size) {
-		*ring->vmem = vzalloc(ring->vmem_size);
-		if (!(*ring->vmem))
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+			continue;
+		agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
+		rxr->rx_tpa[0].agg_arr = agg;
+		if (!agg)
+			return -ENOMEM;
+		for (j = 1; j < bp->max_tpa; j++)
+			rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
+		rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+					      GFP_KERNEL);
+		if (!rxr->rx_tpa_idx_map)
 			return -ENOMEM;
 	}
 	return 0;
@@ -2268,6 +2766,7 @@
 	if (!bp->rx_ring)
 		return;
 
+	bnxt_free_tpa_info(bp);
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring;
@@ -2278,23 +2777,43 @@
 		if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
 			xdp_rxq_info_unreg(&rxr->xdp_rxq);
 
-		kfree(rxr->rx_tpa);
-		rxr->rx_tpa = NULL;
+		page_pool_destroy(rxr->page_pool);
+		rxr->page_pool = NULL;
 
 		kfree(rxr->rx_agg_bmap);
 		rxr->rx_agg_bmap = NULL;
 
 		ring = &rxr->rx_ring_struct;
-		bnxt_free_ring(bp, ring);
+		bnxt_free_ring(bp, &ring->ring_mem);
 
 		ring = &rxr->rx_agg_ring_struct;
-		bnxt_free_ring(bp, ring);
+		bnxt_free_ring(bp, &ring->ring_mem);
 	}
 }
 
+static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
+				   struct bnxt_rx_ring_info *rxr)
+{
+	struct page_pool_params pp = { 0 };
+
+	pp.pool_size = bp->rx_ring_size;
+	pp.nid = dev_to_node(&bp->pdev->dev);
+	pp.dev = &bp->pdev->dev;
+	pp.dma_dir = DMA_BIDIRECTIONAL;
+
+	rxr->page_pool = page_pool_create(&pp);
+	if (IS_ERR(rxr->page_pool)) {
+		int err = PTR_ERR(rxr->page_pool);
+
+		rxr->page_pool = NULL;
+		return err;
+	}
+	return 0;
+}
+
 static int bnxt_alloc_rx_rings(struct bnxt *bp)
 {
-	int i, rc, agg_rings = 0, tpa_rings = 0;
+	int i, rc = 0, agg_rings = 0;
 
 	if (!bp->rx_ring)
 		return -ENOMEM;
@@ -2302,28 +2821,38 @@
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		agg_rings = 1;
 
-	if (bp->flags & BNXT_FLAG_TPA)
-		tpa_rings = 1;
-
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring;
 
 		ring = &rxr->rx_ring_struct;
 
+		rc = bnxt_alloc_rx_page_pool(bp, rxr);
+		if (rc)
+			return rc;
+
 		rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
 		if (rc < 0)
 			return rc;
 
-		rc = bnxt_alloc_ring(bp, ring);
+		rc = xdp_rxq_info_reg_mem_model(&rxr->xdp_rxq,
+						MEM_TYPE_PAGE_POOL,
+						rxr->page_pool);
+		if (rc) {
+			xdp_rxq_info_unreg(&rxr->xdp_rxq);
+			return rc;
+		}
+
+		rc = bnxt_alloc_ring(bp, &ring->ring_mem);
 		if (rc)
 			return rc;
 
+		ring->grp_idx = i;
 		if (agg_rings) {
 			u16 mem_size;
 
 			ring = &rxr->rx_agg_ring_struct;
-			rc = bnxt_alloc_ring(bp, ring);
+			rc = bnxt_alloc_ring(bp, &ring->ring_mem);
 			if (rc)
 				return rc;
 
@@ -2333,17 +2862,11 @@
 			rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL);
 			if (!rxr->rx_agg_bmap)
 				return -ENOMEM;
-
-			if (tpa_rings) {
-				rxr->rx_tpa = kcalloc(MAX_TPA,
-						sizeof(struct bnxt_tpa_info),
-						GFP_KERNEL);
-				if (!rxr->rx_tpa)
-					return -ENOMEM;
-			}
 		}
 	}
-	return 0;
+	if (bp->flags & BNXT_FLAG_TPA)
+		rc = bnxt_alloc_tpa_info(bp);
+	return rc;
 }
 
 static void bnxt_free_tx_rings(struct bnxt *bp)
@@ -2366,7 +2889,7 @@
 
 		ring = &txr->tx_ring_struct;
 
-		bnxt_free_ring(bp, ring);
+		bnxt_free_ring(bp, &ring->ring_mem);
 	}
 }
 
@@ -2397,7 +2920,7 @@
 
 		ring = &txr->tx_ring_struct;
 
-		rc = bnxt_alloc_ring(bp, ring);
+		rc = bnxt_alloc_ring(bp, &ring->ring_mem);
 		if (rc)
 			return rc;
 
@@ -2419,8 +2942,6 @@
 			mapping = txr->tx_push_mapping +
 				sizeof(struct tx_push_bd);
 			txr->data_mapping = cpu_to_le64(mapping);
-
-			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
 		}
 		qidx = bp->tc_to_qidx[j];
 		ring->queue_id = bp->q_info[qidx].queue_id;
@@ -2443,6 +2964,7 @@
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr;
 		struct bnxt_ring_struct *ring;
+		int j;
 
 		if (!bnapi)
 			continue;
@@ -2450,12 +2972,51 @@
 		cpr = &bnapi->cp_ring;
 		ring = &cpr->cp_ring_struct;
 
-		bnxt_free_ring(bp, ring);
+		bnxt_free_ring(bp, &ring->ring_mem);
+
+		for (j = 0; j < 2; j++) {
+			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+
+			if (cpr2) {
+				ring = &cpr2->cp_ring_struct;
+				bnxt_free_ring(bp, &ring->ring_mem);
+				kfree(cpr2);
+				cpr->cp_ring_arr[j] = NULL;
+			}
+		}
 	}
 }
 
+static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
+{
+	struct bnxt_ring_mem_info *rmem;
+	struct bnxt_ring_struct *ring;
+	struct bnxt_cp_ring_info *cpr;
+	int rc;
+
+	cpr = kzalloc(sizeof(*cpr), GFP_KERNEL);
+	if (!cpr)
+		return NULL;
+
+	ring = &cpr->cp_ring_struct;
+	rmem = &ring->ring_mem;
+	rmem->nr_pages = bp->cp_nr_pages;
+	rmem->page_size = HW_CMPD_RING_SIZE;
+	rmem->pg_arr = (void **)cpr->cp_desc_ring;
+	rmem->dma_arr = cpr->cp_desc_mapping;
+	rmem->flags = BNXT_RMEM_RING_PTE_FLAG;
+	rc = bnxt_alloc_ring(bp, rmem);
+	if (rc) {
+		bnxt_free_ring(bp, rmem);
+		kfree(cpr);
+		cpr = NULL;
+	}
+	return cpr;
+}
+
 static int bnxt_alloc_cp_rings(struct bnxt *bp)
 {
+	bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS);
 	int i, rc, ulp_base_vec, ulp_msix;
 
 	ulp_msix = bnxt_get_ulp_msix_num(bp);
@@ -2469,9 +3030,10 @@
 			continue;
 
 		cpr = &bnapi->cp_ring;
+		cpr->bnapi = bnapi;
 		ring = &cpr->cp_ring_struct;
 
-		rc = bnxt_alloc_ring(bp, ring);
+		rc = bnxt_alloc_ring(bp, &ring->ring_mem);
 		if (rc)
 			return rc;
 
@@ -2479,6 +3041,29 @@
 			ring->map_idx = i + ulp_msix;
 		else
 			ring->map_idx = i;
+
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+			continue;
+
+		if (i < bp->rx_nr_rings) {
+			struct bnxt_cp_ring_info *cpr2 =
+				bnxt_alloc_cp_sub_ring(bp);
+
+			cpr->cp_ring_arr[BNXT_RX_HDL] = cpr2;
+			if (!cpr2)
+				return -ENOMEM;
+			cpr2->bnapi = bnapi;
+		}
+		if ((sh && i < bp->tx_nr_rings) ||
+		    (!sh && i >= bp->rx_nr_rings)) {
+			struct bnxt_cp_ring_info *cpr2 =
+				bnxt_alloc_cp_sub_ring(bp);
+
+			cpr->cp_ring_arr[BNXT_TX_HDL] = cpr2;
+			if (!cpr2)
+				return -ENOMEM;
+			cpr2->bnapi = bnapi;
+		}
 	}
 	return 0;
 }
@@ -2489,6 +3074,7 @@
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
+		struct bnxt_ring_mem_info *rmem;
 		struct bnxt_cp_ring_info *cpr;
 		struct bnxt_rx_ring_info *rxr;
 		struct bnxt_tx_ring_info *txr;
@@ -2499,31 +3085,34 @@
 
 		cpr = &bnapi->cp_ring;
 		ring = &cpr->cp_ring_struct;
-		ring->nr_pages = bp->cp_nr_pages;
-		ring->page_size = HW_CMPD_RING_SIZE;
-		ring->pg_arr = (void **)cpr->cp_desc_ring;
-		ring->dma_arr = cpr->cp_desc_mapping;
-		ring->vmem_size = 0;
+		rmem = &ring->ring_mem;
+		rmem->nr_pages = bp->cp_nr_pages;
+		rmem->page_size = HW_CMPD_RING_SIZE;
+		rmem->pg_arr = (void **)cpr->cp_desc_ring;
+		rmem->dma_arr = cpr->cp_desc_mapping;
+		rmem->vmem_size = 0;
 
 		rxr = bnapi->rx_ring;
 		if (!rxr)
 			goto skip_rx;
 
 		ring = &rxr->rx_ring_struct;
-		ring->nr_pages = bp->rx_nr_pages;
-		ring->page_size = HW_RXBD_RING_SIZE;
-		ring->pg_arr = (void **)rxr->rx_desc_ring;
-		ring->dma_arr = rxr->rx_desc_mapping;
-		ring->vmem_size = SW_RXBD_RING_SIZE * bp->rx_nr_pages;
-		ring->vmem = (void **)&rxr->rx_buf_ring;
+		rmem = &ring->ring_mem;
+		rmem->nr_pages = bp->rx_nr_pages;
+		rmem->page_size = HW_RXBD_RING_SIZE;
+		rmem->pg_arr = (void **)rxr->rx_desc_ring;
+		rmem->dma_arr = rxr->rx_desc_mapping;
+		rmem->vmem_size = SW_RXBD_RING_SIZE * bp->rx_nr_pages;
+		rmem->vmem = (void **)&rxr->rx_buf_ring;
 
 		ring = &rxr->rx_agg_ring_struct;
-		ring->nr_pages = bp->rx_agg_nr_pages;
-		ring->page_size = HW_RXBD_RING_SIZE;
-		ring->pg_arr = (void **)rxr->rx_agg_desc_ring;
-		ring->dma_arr = rxr->rx_agg_desc_mapping;
-		ring->vmem_size = SW_RXBD_AGG_RING_SIZE * bp->rx_agg_nr_pages;
-		ring->vmem = (void **)&rxr->rx_agg_ring;
+		rmem = &ring->ring_mem;
+		rmem->nr_pages = bp->rx_agg_nr_pages;
+		rmem->page_size = HW_RXBD_RING_SIZE;
+		rmem->pg_arr = (void **)rxr->rx_agg_desc_ring;
+		rmem->dma_arr = rxr->rx_agg_desc_mapping;
+		rmem->vmem_size = SW_RXBD_AGG_RING_SIZE * bp->rx_agg_nr_pages;
+		rmem->vmem = (void **)&rxr->rx_agg_ring;
 
 skip_rx:
 		txr = bnapi->tx_ring;
@@ -2531,12 +3120,13 @@
 			continue;
 
 		ring = &txr->tx_ring_struct;
-		ring->nr_pages = bp->tx_nr_pages;
-		ring->page_size = HW_RXBD_RING_SIZE;
-		ring->pg_arr = (void **)txr->tx_desc_ring;
-		ring->dma_arr = txr->tx_desc_mapping;
-		ring->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
-		ring->vmem = (void **)&txr->tx_buf_ring;
+		rmem = &ring->ring_mem;
+		rmem->nr_pages = bp->tx_nr_pages;
+		rmem->page_size = HW_RXBD_RING_SIZE;
+		rmem->pg_arr = (void **)txr->tx_desc_ring;
+		rmem->dma_arr = txr->tx_desc_mapping;
+		rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
+		rmem->vmem = (void **)&txr->tx_buf_ring;
 	}
 }
 
@@ -2546,8 +3136,8 @@
 	u32 prod;
 	struct rx_bd **rx_buf_ring;
 
-	rx_buf_ring = (struct rx_bd **)ring->pg_arr;
-	for (i = 0, prod = 0; i < ring->nr_pages; i++) {
+	rx_buf_ring = (struct rx_bd **)ring->ring_mem.pg_arr;
+	for (i = 0, prod = 0; i < ring->ring_mem.nr_pages; i++) {
 		int j;
 		struct rx_bd *rxbd;
 
@@ -2628,7 +3218,7 @@
 			u8 *data;
 			dma_addr_t mapping;
 
-			for (i = 0; i < MAX_TPA; i++) {
+			for (i = 0; i < bp->max_tpa; i++) {
 				data = __bnxt_alloc_rx_data(bp, &mapping,
 							    GFP_KERNEL);
 				if (!data)
@@ -2649,7 +3239,7 @@
 
 static void bnxt_init_cp_rings(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
@@ -2658,6 +3248,17 @@
 		ring->fw_ring_id = INVALID_HW_RING_ID;
 		cpr->rx_ring_coal.coal_ticks = bp->rx_coal.coal_ticks;
 		cpr->rx_ring_coal.coal_bufs = bp->rx_coal.coal_bufs;
+		for (j = 0; j < 2; j++) {
+			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+
+			if (!cpr2)
+				continue;
+
+			ring = &cpr2->cp_ring_struct;
+			ring->fw_ring_id = INVALID_HW_RING_ID;
+			cpr2->rx_ring_coal.coal_ticks = bp->rx_coal.coal_ticks;
+			cpr2->rx_ring_coal.coal_bufs = bp->rx_coal.coal_bufs;
+		}
 	}
 }
 
@@ -2739,7 +3340,7 @@
 	int num_vnics = 1;
 
 #ifdef CONFIG_RFS_ACCEL
-	if (bp->flags & BNXT_FLAG_RFS)
+	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
 		num_vnics += bp->rx_nr_rings;
 #endif
 
@@ -2761,10 +3362,12 @@
 
 	for (i = 0; i < bp->nr_vnics; i++) {
 		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
+		int j;
 
 		vnic->fw_vnic_id = INVALID_HW_RING_ID;
-		vnic->fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID;
-		vnic->fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID;
+		for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++)
+			vnic->fw_rss_cos_lb_ctx[j] = INVALID_HW_RING_ID;
+
 		vnic->fw_l2_ctx_id = INVALID_HW_RING_ID;
 
 		if (bp->vnic_info[i].rss_hash_key) {
@@ -2978,6 +3581,9 @@
 			}
 		}
 
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			goto vnic_skip_grps;
+
 		if (vnic->flags & BNXT_VNIC_RSS_FLAG)
 			max_rings = bp->rx_nr_rings;
 		else
@@ -2988,7 +3594,7 @@
 			rc = -ENOMEM;
 			goto out;
 		}
-
+vnic_skip_grps:
 		if ((bp->flags & BNXT_FLAG_NEW_RSS_CAP) &&
 		    !(vnic->flags & BNXT_VNIC_RSS_FLAG))
 			continue;
@@ -3022,6 +3628,30 @@
 				  bp->hwrm_cmd_resp_dma_addr);
 		bp->hwrm_cmd_resp_addr = NULL;
 	}
+
+	if (bp->hwrm_cmd_kong_resp_addr) {
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  bp->hwrm_cmd_kong_resp_addr,
+				  bp->hwrm_cmd_kong_resp_dma_addr);
+		bp->hwrm_cmd_kong_resp_addr = NULL;
+	}
+}
+
+static int bnxt_alloc_kong_hwrm_resources(struct bnxt *bp)
+{
+	struct pci_dev *pdev = bp->pdev;
+
+	if (bp->hwrm_cmd_kong_resp_addr)
+		return 0;
+
+	bp->hwrm_cmd_kong_resp_addr =
+		dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+				   &bp->hwrm_cmd_kong_resp_dma_addr,
+				   GFP_KERNEL);
+	if (!bp->hwrm_cmd_kong_resp_addr)
+		return -ENOMEM;
+
+	return 0;
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -3042,7 +3672,7 @@
 	if (bp->hwrm_short_cmd_req_addr) {
 		struct pci_dev *pdev = bp->pdev;
 
-		dma_free_coherent(&pdev->dev, BNXT_HWRM_MAX_REQ_LEN,
+		dma_free_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
 				  bp->hwrm_short_cmd_req_addr,
 				  bp->hwrm_short_cmd_req_dma_addr);
 		bp->hwrm_short_cmd_req_addr = NULL;
@@ -3053,8 +3683,11 @@
 {
 	struct pci_dev *pdev = bp->pdev;
 
+	if (bp->hwrm_short_cmd_req_addr)
+		return 0;
+
 	bp->hwrm_short_cmd_req_addr =
-		dma_alloc_coherent(&pdev->dev, BNXT_HWRM_MAX_REQ_LEN,
+		dma_alloc_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
 				   &bp->hwrm_short_cmd_req_dma_addr,
 				   GFP_KERNEL);
 	if (!bp->hwrm_short_cmd_req_addr)
@@ -3063,9 +3696,8 @@
 	return 0;
 }
 
-static void bnxt_free_stats(struct bnxt *bp)
+static void bnxt_free_port_stats(struct bnxt *bp)
 {
-	u32 size, i;
 	struct pci_dev *pdev = bp->pdev;
 
 	bp->flags &= ~BNXT_FLAG_PORT_STATS;
@@ -3078,6 +3710,13 @@
 		bp->hw_rx_port_stats = NULL;
 	}
 
+	if (bp->hw_tx_port_stats_ext) {
+		dma_free_coherent(&pdev->dev, sizeof(struct tx_port_stats_ext),
+				  bp->hw_tx_port_stats_ext,
+				  bp->hw_tx_port_stats_ext_map);
+		bp->hw_tx_port_stats_ext = NULL;
+	}
+
 	if (bp->hw_rx_port_stats_ext) {
 		dma_free_coherent(&pdev->dev, sizeof(struct rx_port_stats_ext),
 				  bp->hw_rx_port_stats_ext,
@@ -3085,10 +3724,22 @@
 		bp->hw_rx_port_stats_ext = NULL;
 	}
 
+	if (bp->hw_pcie_stats) {
+		dma_free_coherent(&pdev->dev, sizeof(struct pcie_ctx_hw_stats),
+				  bp->hw_pcie_stats, bp->hw_pcie_stats_map);
+		bp->hw_pcie_stats = NULL;
+	}
+}
+
+static void bnxt_free_ring_stats(struct bnxt *bp)
+{
+	struct pci_dev *pdev = bp->pdev;
+	int size, i;
+
 	if (!bp->bnapi)
 		return;
 
-	size = sizeof(struct ctx_hw_stats);
+	size = bp->hw_ring_stats_size;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
@@ -3107,7 +3758,7 @@
 	u32 size, i;
 	struct pci_dev *pdev = bp->pdev;
 
-	size = sizeof(struct ctx_hw_stats);
+	size = bp->hw_ring_stats_size;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
@@ -3122,38 +3773,68 @@
 		cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
 	}
 
-	if (BNXT_PF(bp) && bp->chip_num != CHIP_NUM_58700) {
-		bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
-					 sizeof(struct tx_port_stats) + 1024;
+	if (BNXT_VF(bp) || bp->chip_num == CHIP_NUM_58700)
+		return 0;
 
-		bp->hw_rx_port_stats =
-			dma_alloc_coherent(&pdev->dev, bp->hw_port_stats_size,
-					   &bp->hw_rx_port_stats_map,
+	if (bp->hw_rx_port_stats)
+		goto alloc_ext_stats;
+
+	bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
+				 sizeof(struct tx_port_stats) + 1024;
+
+	bp->hw_rx_port_stats =
+		dma_alloc_coherent(&pdev->dev, bp->hw_port_stats_size,
+				   &bp->hw_rx_port_stats_map,
+				   GFP_KERNEL);
+	if (!bp->hw_rx_port_stats)
+		return -ENOMEM;
+
+	bp->hw_tx_port_stats = (void *)(bp->hw_rx_port_stats + 1) + 512;
+	bp->hw_tx_port_stats_map = bp->hw_rx_port_stats_map +
+				   sizeof(struct rx_port_stats) + 512;
+	bp->flags |= BNXT_FLAG_PORT_STATS;
+
+alloc_ext_stats:
+	/* Display extended statistics only if FW supports it */
+	if (bp->hwrm_spec_code < 0x10804 || bp->hwrm_spec_code == 0x10900)
+		if (!(bp->fw_cap & BNXT_FW_CAP_EXT_STATS_SUPPORTED))
+			return 0;
+
+	if (bp->hw_rx_port_stats_ext)
+		goto alloc_tx_ext_stats;
+
+	bp->hw_rx_port_stats_ext =
+		dma_alloc_coherent(&pdev->dev, sizeof(struct rx_port_stats_ext),
+				   &bp->hw_rx_port_stats_ext_map, GFP_KERNEL);
+	if (!bp->hw_rx_port_stats_ext)
+		return 0;
+
+alloc_tx_ext_stats:
+	if (bp->hw_tx_port_stats_ext)
+		goto alloc_pcie_stats;
+
+	if (bp->hwrm_spec_code >= 0x10902 ||
+	    (bp->fw_cap & BNXT_FW_CAP_EXT_STATS_SUPPORTED)) {
+		bp->hw_tx_port_stats_ext =
+			dma_alloc_coherent(&pdev->dev,
+					   sizeof(struct tx_port_stats_ext),
+					   &bp->hw_tx_port_stats_ext_map,
 					   GFP_KERNEL);
-		if (!bp->hw_rx_port_stats)
-			return -ENOMEM;
-
-		bp->hw_tx_port_stats = (void *)(bp->hw_rx_port_stats + 1) +
-				       512;
-		bp->hw_tx_port_stats_map = bp->hw_rx_port_stats_map +
-					   sizeof(struct rx_port_stats) + 512;
-		bp->flags |= BNXT_FLAG_PORT_STATS;
-
-		/* Display extended statistics only if FW supports it */
-		if (bp->hwrm_spec_code < 0x10804 ||
-		    bp->hwrm_spec_code == 0x10900)
-			return 0;
-
-		bp->hw_rx_port_stats_ext =
-			dma_zalloc_coherent(&pdev->dev,
-					    sizeof(struct rx_port_stats_ext),
-					    &bp->hw_rx_port_stats_ext_map,
-					    GFP_KERNEL);
-		if (!bp->hw_rx_port_stats_ext)
-			return 0;
-
-		bp->flags |= BNXT_FLAG_PORT_STATS_EXT;
 	}
+	bp->flags |= BNXT_FLAG_PORT_STATS_EXT;
+
+alloc_pcie_stats:
+	if (bp->hw_pcie_stats ||
+	    !(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED))
+		return 0;
+
+	bp->hw_pcie_stats =
+		dma_alloc_coherent(&pdev->dev, sizeof(struct pcie_ctx_hw_stats),
+				   &bp->hw_pcie_stats_map, GFP_KERNEL);
+	if (!bp->hw_pcie_stats)
+		return 0;
+
+	bp->flags |= BNXT_FLAG_PCIE_STATS;
 	return 0;
 }
 
@@ -3252,7 +3933,7 @@
 	bnxt_free_cp_rings(bp);
 	bnxt_free_ntp_fltrs(bp, irq_re_init);
 	if (irq_re_init) {
-		bnxt_free_stats(bp);
+		bnxt_free_ring_stats(bp);
 		bnxt_free_ring_grps(bp);
 		bnxt_free_vnics(bp);
 		kfree(bp->tx_ring_map);
@@ -3290,6 +3971,13 @@
 			bp->bnapi[i] = bnapi;
 			bp->bnapi[i]->index = i;
 			bp->bnapi[i]->bp = bp;
+			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+				struct bnxt_cp_ring_info *cpr =
+					&bp->bnapi[i]->cp_ring;
+
+				cpr->cp_ring_struct.ring_mem.flags =
+					BNXT_RMEM_RING_PTE_FLAG;
+			}
 		}
 
 		bp->rx_ring = kcalloc(bp->rx_nr_rings,
@@ -3299,7 +3987,15 @@
 			return -ENOMEM;
 
 		for (i = 0; i < bp->rx_nr_rings; i++) {
-			bp->rx_ring[i].bnapi = bp->bnapi[i];
+			struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+
+			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+				rxr->rx_ring_struct.ring_mem.flags =
+					BNXT_RMEM_RING_PTE_FLAG;
+				rxr->rx_agg_ring_struct.ring_mem.flags =
+					BNXT_RMEM_RING_PTE_FLAG;
+			}
+			rxr->bnapi = bp->bnapi[i];
 			bp->bnapi[i]->rx_ring = &bp->rx_ring[i];
 		}
 
@@ -3321,12 +4017,16 @@
 			j = bp->rx_nr_rings;
 
 		for (i = 0; i < bp->tx_nr_rings; i++, j++) {
-			bp->tx_ring[i].bnapi = bp->bnapi[j];
-			bp->bnapi[j]->tx_ring = &bp->tx_ring[i];
+			struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
+
+			if (bp->flags & BNXT_FLAG_CHIP_P5)
+				txr->tx_ring_struct.ring_mem.flags =
+					BNXT_RMEM_RING_PTE_FLAG;
+			txr->bnapi = bp->bnapi[j];
+			bp->bnapi[j]->tx_ring = txr;
 			bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i;
 			if (i >= bp->tx_nr_rings_xdp) {
-				bp->tx_ring[i].txq_index = i -
-					bp->tx_nr_rings_xdp;
+				txr->txq_index = i - bp->tx_nr_rings_xdp;
 				bp->bnapi[j]->tx_int = bnxt_tx_int;
 			} else {
 				bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP;
@@ -3386,7 +4086,7 @@
 		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
 
 		if (ring->fw_ring_id != INVALID_HW_RING_ID)
-			BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+			bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons);
 	}
 }
 
@@ -3422,7 +4122,7 @@
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 
-		BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
+		bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
 	}
 }
 
@@ -3434,7 +4134,36 @@
 	req->req_type = cpu_to_le16(req_type);
 	req->cmpl_ring = cpu_to_le16(cmpl_ring);
 	req->target_id = cpu_to_le16(target_id);
-	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
+	if (bnxt_kong_hwrm_message(bp, req))
+		req->resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
+	else
+		req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
+}
+
+static int bnxt_hwrm_to_stderr(u32 hwrm_err)
+{
+	switch (hwrm_err) {
+	case HWRM_ERR_CODE_SUCCESS:
+		return 0;
+	case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
+		return -EACCES;
+	case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
+		return -ENOSPC;
+	case HWRM_ERR_CODE_INVALID_PARAMS:
+	case HWRM_ERR_CODE_INVALID_FLAGS:
+	case HWRM_ERR_CODE_INVALID_ENABLES:
+	case HWRM_ERR_CODE_UNSUPPORTED_TLV:
+	case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR:
+		return -EINVAL;
+	case HWRM_ERR_CODE_NO_BUFFER:
+		return -ENOMEM;
+	case HWRM_ERR_CODE_HOT_RESET_PROGRESS:
+		return -EAGAIN;
+	case HWRM_ERR_CODE_CMD_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	default:
+		return -EIO;
+	}
 }
 
 static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
@@ -3449,18 +4178,52 @@
 	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
 	struct hwrm_short_input short_input = {0};
+	u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
+	u8 *resp_addr = (u8 *)bp->hwrm_cmd_resp_addr;
+	u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+	u16 dst = BNXT_HWRM_CHNL_CHIMP;
 
-	req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++);
+	if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+		return -EBUSY;
+
+	if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
+		if (msg_len > bp->hwrm_max_ext_req_len ||
+		    !bp->hwrm_short_cmd_req_addr)
+			return -EINVAL;
+	}
+
+	if (bnxt_hwrm_kong_chnl(bp, req)) {
+		dst = BNXT_HWRM_CHNL_KONG;
+		bar_offset = BNXT_GRCPF_REG_KONG_COMM;
+		doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
+		resp = bp->hwrm_cmd_kong_resp_addr;
+		resp_addr = (u8 *)bp->hwrm_cmd_kong_resp_addr;
+	}
+
 	memset(resp, 0, PAGE_SIZE);
 	cp_ring_id = le16_to_cpu(req->cmpl_ring);
 	intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
 
-	if (bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) {
+	req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
+	/* currently supports only one outstanding message */
+	if (intr_process)
+		bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
+
+	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+	    msg_len > BNXT_HWRM_MAX_REQ_LEN) {
 		void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
+		u16 max_msg_len;
+
+		/* Set boundary for maximum extended request length for short
+		 * cmd format. If passed up from device use the max supported
+		 * internal req length.
+		 */
+		max_msg_len = bp->hwrm_max_ext_req_len;
 
 		memcpy(short_cmd_req, req, msg_len);
-		memset(short_cmd_req + msg_len, 0, BNXT_HWRM_MAX_REQ_LEN -
-						   msg_len);
+		if (msg_len < max_msg_len)
+			memset(short_cmd_req + msg_len, 0,
+			       max_msg_len - msg_len);
 
 		short_input.req_type = req->req_type;
 		short_input.signature =
@@ -3479,17 +4242,16 @@
 	}
 
 	/* Write request msg to hwrm channel */
-	__iowrite32_copy(bp->bar0, data, msg_len / 4);
+	__iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
 
 	for (i = msg_len; i < max_req_len; i += 4)
-		writel(0, bp->bar0 + i);
-
-	/* currently supports only one outstanding message */
-	if (intr_process)
-		bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
+		writel(0, bp->bar0 + bar_offset + i);
 
 	/* Ring channel doorbell */
-	writel(1, bp->bar0 + 0x100);
+	writel(1, bp->bar0 + doorbell_offset);
+
+	if (!pci_is_enabled(bp->pdev))
+		return 0;
 
 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
@@ -3504,10 +4266,13 @@
 	tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
 	timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
 	tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
-	resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
+	resp_len = (__le32 *)(resp_addr + HWRM_RESP_LEN_OFFSET);
+
 	if (intr_process) {
+		u16 seq_id = bp->hwrm_intr_seq_id;
+
 		/* Wait until hwrm response cmpl interrupt is processed */
-		while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
+		while (bp->hwrm_intr_seq_id != (u16)~seq_id &&
 		       i++ < tmo_count) {
 			/* on first few passes, just barely sleep */
 			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
@@ -3518,14 +4283,15 @@
 					     HWRM_MAX_TIMEOUT);
 		}
 
-		if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) {
-			netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
-				   le16_to_cpu(req->req_type));
-			return -1;
+		if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
+			if (!silent)
+				netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
+					   le16_to_cpu(req->req_type));
+			return -EBUSY;
 		}
 		len = (le32_to_cpu(*resp_len) & HWRM_RESP_LEN_MASK) >>
 		      HWRM_RESP_LEN_SFT;
-		valid = bp->hwrm_cmd_resp_addr + len - 1;
+		valid = resp_addr + len - 1;
 	} else {
 		int j;
 
@@ -3536,7 +4302,7 @@
 			if (len)
 				break;
 			/* on first few passes, just barely sleep */
-			if (i < DFLT_HWRM_CMD_TIMEOUT)
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
 			else
@@ -3545,29 +4311,32 @@
 		}
 
 		if (i >= tmo_count) {
-			netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
-				   HWRM_TOTAL_TIMEOUT(i),
-				   le16_to_cpu(req->req_type),
-				   le16_to_cpu(req->seq_id), len);
-			return -1;
+			if (!silent)
+				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
+					   HWRM_TOTAL_TIMEOUT(i),
+					   le16_to_cpu(req->req_type),
+					   le16_to_cpu(req->seq_id), len);
+			return -EBUSY;
 		}
 
 		/* Last byte of resp contains valid bit */
-		valid = bp->hwrm_cmd_resp_addr + len - 1;
+		valid = resp_addr + len - 1;
 		for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
 			/* make sure we read from updated DMA memory */
 			dma_rmb();
 			if (*valid)
 				break;
-			udelay(1);
+			usleep_range(1, 5);
 		}
 
 		if (j >= HWRM_VALID_BIT_DELAY_USEC) {
-			netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
-				   HWRM_TOTAL_TIMEOUT(i),
-				   le16_to_cpu(req->req_type),
-				   le16_to_cpu(req->seq_id), len, *valid);
-			return -1;
+			if (!silent)
+				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
+					   HWRM_TOTAL_TIMEOUT(i),
+					   le16_to_cpu(req->req_type),
+					   le16_to_cpu(req->seq_id), len,
+					   *valid);
+			return -EBUSY;
 		}
 	}
 
@@ -3581,7 +4350,7 @@
 		netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
 			   le16_to_cpu(resp->req_type),
 			   le16_to_cpu(resp->seq_id), rc);
-	return rc;
+	return bnxt_hwrm_to_stderr(rc);
 }
 
 int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
@@ -3630,9 +4399,14 @@
 		cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
 
 	memset(async_events_bmap, 0, sizeof(async_events_bmap));
-	for (i = 0; i < ARRAY_SIZE(bnxt_async_events_arr); i++)
-		__set_bit(bnxt_async_events_arr[i], async_events_bmap);
+	for (i = 0; i < ARRAY_SIZE(bnxt_async_events_arr); i++) {
+		u16 event_id = bnxt_async_events_arr[i];
 
+		if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY &&
+		    !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+			continue;
+		__set_bit(bnxt_async_events_arr[i], async_events_bmap);
+	}
 	if (bmap && bmap_size) {
 		for (i = 0; i < bmap_size; i++) {
 			if (test_bit(i, bmap))
@@ -3650,6 +4424,7 @@
 {
 	struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_func_drv_rgtr_input req = {0};
+	u32 flags;
 	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR, -1, -1);
@@ -3659,7 +4434,11 @@
 			    FUNC_DRV_RGTR_REQ_ENABLES_VER);
 
 	req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
-	req.flags = cpu_to_le32(FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE);
+	flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
+		FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
+	if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
+		flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT;
+	req.flags = cpu_to_le32(flags);
 	req.ver_maj_8b = DRV_VER_MAJ;
 	req.ver_min_8b = DRV_VER_MIN;
 	req.ver_upd_8b = DRV_VER_UPD;
@@ -3688,12 +4467,14 @@
 			cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD);
 	}
 
+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+		req.flags |= cpu_to_le32(
+			FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE);
+
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
-	else if (resp->flags &
-		 cpu_to_le32(FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED))
+	if (!rc && (resp->flags &
+		    cpu_to_le32(FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED)))
 		bp->fw_cap |= BNXT_FW_CAP_IF_CHANGE;
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -3816,17 +4597,25 @@
 static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 					     struct bnxt_ntuple_filter *fltr)
 {
-	int rc = 0;
 	struct hwrm_cfa_ntuple_filter_alloc_input req = {0};
-	struct hwrm_cfa_ntuple_filter_alloc_output *resp =
-		bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_ntuple_filter_alloc_output *resp;
 	struct flow_keys *keys = &fltr->fkeys;
-	struct bnxt_vnic_info *vnic = &bp->vnic_info[fltr->rxq + 1];
+	struct bnxt_vnic_info *vnic;
+	u32 dst_ena = 0;
+	int rc = 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_ALLOC, -1, -1);
 	req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
 
-	req.enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
+	if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX) {
+		dst_ena = CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_RFS_RING_TBL_IDX;
+		req.rfs_ring_tbl_idx = cpu_to_le16(fltr->rxq);
+		vnic = &bp->vnic_info[0];
+	} else {
+		vnic = &bp->vnic_info[fltr->rxq + 1];
+	}
+	req.dst_id = cpu_to_le16(vnic->fw_vnic_id);
+	req.enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS | dst_ena);
 
 	req.ethertype = htons(ETH_P_IP);
 	memcpy(req.src_macaddr, fltr->src_mac_addr, ETH_ALEN);
@@ -3864,11 +4653,12 @@
 	req.dst_port = keys->ports.dst;
 	req.dst_port_mask = cpu_to_be16(0xffff);
 
-	req.dst_id = cpu_to_le16(vnic->fw_vnic_id);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		fltr->filter_id = resp->ntuple_filter_id;
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
@@ -3939,6 +4729,7 @@
 static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+	u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
 	struct hwrm_vnic_tpa_cfg_input req = {0};
 
 	if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
@@ -3978,9 +4769,14 @@
 			nsegs = (MAX_SKB_FRAGS - n) / n;
 		}
 
-		segs = ilog2(nsegs);
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			segs = MAX_TPA_SEGS_P5;
+			max_aggs = bp->max_tpa;
+		} else {
+			segs = ilog2(nsegs);
+		}
 		req.max_agg_segs = cpu_to_le16(segs);
-		req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX);
+		req.max_aggs = cpu_to_le16(max_aggs);
 
 		req.min_agg_len = cpu_to_le32(512);
 	}
@@ -3989,13 +4785,48 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
+static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring)
+{
+	struct bnxt_ring_grp_info *grp_info;
+
+	grp_info = &bp->grp_info[ring->grp_idx];
+	return grp_info->cp_fw_ring_id;
+}
+
+static u16 bnxt_cp_ring_for_rx(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		struct bnxt_napi *bnapi = rxr->bnapi;
+		struct bnxt_cp_ring_info *cpr;
+
+		cpr = bnapi->cp_ring.cp_ring_arr[BNXT_RX_HDL];
+		return cpr->cp_ring_struct.fw_ring_id;
+	} else {
+		return bnxt_cp_ring_from_grp(bp, &rxr->rx_ring_struct);
+	}
+}
+
+static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		struct bnxt_napi *bnapi = txr->bnapi;
+		struct bnxt_cp_ring_info *cpr;
+
+		cpr = bnapi->cp_ring.cp_ring_arr[BNXT_TX_HDL];
+		return cpr->cp_ring_struct.fw_ring_id;
+	} else {
+		return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct);
+	}
+}
+
 static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 {
 	u32 i, j, max_rings;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	struct hwrm_vnic_rss_cfg_input req = {0};
 
-	if (vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) ||
+	    vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
@@ -4026,6 +4857,51 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
+static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss)
+{
+	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+	u32 i, j, k, nr_ctxs, max_rings = bp->rx_nr_rings;
+	struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
+	struct hwrm_vnic_rss_cfg_input req = {0};
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
+	req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+	if (!set_rss) {
+		hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+		return 0;
+	}
+	req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
+	req.hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
+	req.ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr);
+	req.hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr);
+	nr_ctxs = DIV_ROUND_UP(bp->rx_nr_rings, 64);
+	for (i = 0, k = 0; i < nr_ctxs; i++) {
+		__le16 *ring_tbl = vnic->rss_table;
+		int rc;
+
+		req.ring_table_pair_index = i;
+		req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]);
+		for (j = 0; j < 64; j++) {
+			u16 ring_id;
+
+			ring_id = rxr->rx_ring_struct.fw_ring_id;
+			*ring_tbl++ = cpu_to_le16(ring_id);
+			ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+			*ring_tbl++ = cpu_to_le16(ring_id);
+			rxr++;
+			k++;
+			if (k == max_rings) {
+				k = 0;
+				rxr = &bp->rx_ring[0];
+			}
+		}
+		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
@@ -4109,6 +4985,18 @@
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1);
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
+
+		req.default_rx_ring_id =
+			cpu_to_le16(rxr->rx_ring_struct.fw_ring_id);
+		req.default_cmpl_ring_id =
+			cpu_to_le16(bnxt_cp_ring_for_rx(bp, rxr));
+		req.enables =
+			cpu_to_le32(VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID |
+				    VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID);
+		goto vnic_mru;
+	}
 	req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
 	/* Only RSS support for now TBD: COS & LB */
 	if (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID) {
@@ -4141,13 +5029,13 @@
 		ring = bp->rx_nr_rings - 1;
 
 	grp_idx = bp->rx_ring[ring].bnapi->index;
-	req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 	req.dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
-
 	req.lb_rule = cpu_to_le16(0xffff);
+vnic_mru:
 	req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + ETH_FCS_LEN +
 			      VLAN_HLEN);
 
+	req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 #ifdef CONFIG_BNXT_SRIOV
 	if (BNXT_VF(bp))
 		def_vlan = bp->vf.vlan;
@@ -4172,8 +5060,6 @@
 			cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id);
 
 		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-		if (rc)
-			return rc;
 		bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID;
 	}
 	return rc;
@@ -4195,6 +5081,10 @@
 	unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings;
 	struct hwrm_vnic_alloc_input req = {0};
 	struct hwrm_vnic_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		goto vnic_no_ring_grps;
 
 	/* map ring groups to this vnic */
 	for (i = start_rx_ring_idx, j = 0; i < end_idx; i++, j++) {
@@ -4204,12 +5094,12 @@
 				   j, nr_rings);
 			break;
 		}
-		bp->vnic_info[vnic_id].fw_grp_ids[j] =
-					bp->grp_info[grp_idx].fw_grp_id;
+		vnic->fw_grp_ids[j] = bp->grp_info[grp_idx].fw_grp_id;
 	}
 
-	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID;
-	bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID;
+vnic_no_ring_grps:
+	for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++)
+		vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID;
 	if (vnic_id == 0)
 		req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
 
@@ -4218,7 +5108,7 @@
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc)
-		bp->vnic_info[vnic_id].fw_vnic_id = le32_to_cpu(resp->vnic_id);
+		vnic->fw_vnic_id = le32_to_cpu(resp->vnic_id);
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
@@ -4229,6 +5119,8 @@
 	struct hwrm_vnic_qcaps_input req = {0};
 	int rc;
 
+	bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
+	bp->flags &= ~(BNXT_FLAG_NEW_RSS_CAP | BNXT_FLAG_ROCE_MIRROR_CAP);
 	if (bp->hwrm_spec_code < 0x10600)
 		return 0;
 
@@ -4238,11 +5130,16 @@
 	if (!rc) {
 		u32 flags = le32_to_cpu(resp->flags);
 
-		if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP)
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+		    (flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP))
 			bp->flags |= BNXT_FLAG_NEW_RSS_CAP;
 		if (flags &
 		    VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP)
 			bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP;
+		bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
+		if (bp->max_tpa_v2)
+			bp->hw_ring_stats_size =
+				sizeof(struct ctx_hw_stats_ext);
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -4253,6 +5150,9 @@
 	u16 i;
 	u32 rc = 0;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return 0;
+
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct hwrm_ring_grp_alloc_input req = {0};
@@ -4285,7 +5185,7 @@
 	u32 rc = 0;
 	struct hwrm_ring_grp_free_input req = {0};
 
-	if (!bp->grp_info)
+	if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5))
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_GRP_FREE, -1, -1);
@@ -4299,8 +5199,6 @@
 
 		rc = _hwrm_send_message(bp, &req, sizeof(req),
 					HWRM_CMD_TIMEOUT);
-		if (rc)
-			break;
 		bp->grp_info[i].fw_grp_id = INVALID_HW_RING_ID;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
@@ -4314,45 +5212,90 @@
 	int rc = 0, err = 0;
 	struct hwrm_ring_alloc_input req = {0};
 	struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_ring_mem_info *rmem = &ring->ring_mem;
 	struct bnxt_ring_grp_info *grp_info;
 	u16 ring_id;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_ALLOC, -1, -1);
 
 	req.enables = 0;
-	if (ring->nr_pages > 1) {
-		req.page_tbl_addr = cpu_to_le64(ring->pg_tbl_map);
+	if (rmem->nr_pages > 1) {
+		req.page_tbl_addr = cpu_to_le64(rmem->pg_tbl_map);
 		/* Page size is in log2 units */
 		req.page_size = BNXT_PAGE_SHIFT;
 		req.page_tbl_depth = 1;
 	} else {
-		req.page_tbl_addr =  cpu_to_le64(ring->dma_arr[0]);
+		req.page_tbl_addr =  cpu_to_le64(rmem->dma_arr[0]);
 	}
 	req.fbo = 0;
 	/* Association of ring index with doorbell index and MSIX number */
 	req.logical_id = cpu_to_le16(map_index);
 
 	switch (ring_type) {
-	case HWRM_RING_ALLOC_TX:
+	case HWRM_RING_ALLOC_TX: {
+		struct bnxt_tx_ring_info *txr;
+
+		txr = container_of(ring, struct bnxt_tx_ring_info,
+				   tx_ring_struct);
 		req.ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
 		/* Association of transmit ring with completion ring */
 		grp_info = &bp->grp_info[ring->grp_idx];
-		req.cmpl_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
+		req.cmpl_ring_id = cpu_to_le16(bnxt_cp_ring_for_tx(bp, txr));
 		req.length = cpu_to_le32(bp->tx_ring_mask + 1);
 		req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
 		req.queue_id = cpu_to_le16(ring->queue_id);
 		break;
+	}
 	case HWRM_RING_ALLOC_RX:
 		req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
 		req.length = cpu_to_le32(bp->rx_ring_mask + 1);
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			u16 flags = 0;
+
+			/* Association of rx ring with stats context */
+			grp_info = &bp->grp_info[ring->grp_idx];
+			req.rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
+			req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+			req.enables |= cpu_to_le32(
+				RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
+			if (NET_IP_ALIGN == 2)
+				flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD;
+			req.flags = cpu_to_le16(flags);
+		}
 		break;
 	case HWRM_RING_ALLOC_AGG:
-		req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
+			/* Association of agg ring with rx ring */
+			grp_info = &bp->grp_info[ring->grp_idx];
+			req.rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
+			req.rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
+			req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+			req.enables |= cpu_to_le32(
+				RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID |
+				RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
+		} else {
+			req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+		}
 		req.length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
 		break;
 	case HWRM_RING_ALLOC_CMPL:
 		req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
 		req.length = cpu_to_le32(bp->cp_ring_mask + 1);
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			/* Association of cp ring with nq */
+			grp_info = &bp->grp_info[map_index];
+			req.nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
+			req.cq_handle = cpu_to_le64(ring->handle);
+			req.enables |= cpu_to_le32(
+				RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID);
+		} else if (bp->flags & BNXT_FLAG_USING_MSIX) {
+			req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+		}
+		break;
+	case HWRM_RING_ALLOC_NQ:
+		req.ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
+		req.length = cpu_to_le32(bp->cp_ring_mask + 1);
 		if (bp->flags & BNXT_FLAG_USING_MSIX)
 			req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
 		break;
@@ -4401,22 +5344,74 @@
 	return rc;
 }
 
+static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
+			u32 map_idx, u32 xid)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (BNXT_PF(bp))
+			db->doorbell = bp->bar1 + 0x10000;
+		else
+			db->doorbell = bp->bar1 + 0x4000;
+		switch (ring_type) {
+		case HWRM_RING_ALLOC_TX:
+			db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SQ;
+			break;
+		case HWRM_RING_ALLOC_RX:
+		case HWRM_RING_ALLOC_AGG:
+			db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SRQ;
+			break;
+		case HWRM_RING_ALLOC_CMPL:
+			db->db_key64 = DBR_PATH_L2;
+			break;
+		case HWRM_RING_ALLOC_NQ:
+			db->db_key64 = DBR_PATH_L2;
+			break;
+		}
+		db->db_key64 |= (u64)xid << DBR_XID_SFT;
+	} else {
+		db->doorbell = bp->bar1 + map_idx * 0x80;
+		switch (ring_type) {
+		case HWRM_RING_ALLOC_TX:
+			db->db_key32 = DB_KEY_TX;
+			break;
+		case HWRM_RING_ALLOC_RX:
+		case HWRM_RING_ALLOC_AGG:
+			db->db_key32 = DB_KEY_RX;
+			break;
+		case HWRM_RING_ALLOC_CMPL:
+			db->db_key32 = DB_KEY_CP;
+			break;
+		}
+	}
+}
+
 static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 {
+	bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS);
 	int i, rc = 0;
+	u32 type;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		type = HWRM_RING_ALLOC_NQ;
+	else
+		type = HWRM_RING_ALLOC_CMPL;
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
 		u32 map_idx = ring->map_idx;
+		unsigned int vector;
 
-		cpr->cp_doorbell = bp->bar1 + map_idx * 0x80;
-		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_CMPL,
-					      map_idx);
-		if (rc)
+		vector = bp->irq_tbl[map_idx].vector;
+		disable_irq_nosync(vector);
+		rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
+		if (rc) {
+			enable_irq(vector);
 			goto err_out;
-		BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+		}
+		bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id);
+		bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons);
+		enable_irq(vector);
 		bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id;
 
 		if (!i) {
@@ -4426,33 +5421,71 @@
 		}
 	}
 
+	type = HWRM_RING_ALLOC_TX;
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
-		struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
-		u32 map_idx = i;
+		struct bnxt_ring_struct *ring;
+		u32 map_idx;
 
-		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_TX,
-					      map_idx);
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			struct bnxt_napi *bnapi = txr->bnapi;
+			struct bnxt_cp_ring_info *cpr, *cpr2;
+			u32 type2 = HWRM_RING_ALLOC_CMPL;
+
+			cpr = &bnapi->cp_ring;
+			cpr2 = cpr->cp_ring_arr[BNXT_TX_HDL];
+			ring = &cpr2->cp_ring_struct;
+			ring->handle = BNXT_TX_HDL;
+			map_idx = bnapi->index;
+			rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
+			if (rc)
+				goto err_out;
+			bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx,
+				    ring->fw_ring_id);
+			bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons);
+		}
+		ring = &txr->tx_ring_struct;
+		map_idx = i;
+		rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
 		if (rc)
 			goto err_out;
-		txr->tx_doorbell = bp->bar1 + map_idx * 0x80;
+		bnxt_set_db(bp, &txr->tx_db, type, map_idx, ring->fw_ring_id);
 	}
 
+	type = HWRM_RING_ALLOC_RX;
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring = &rxr->rx_ring_struct;
-		u32 map_idx = rxr->bnapi->index;
+		struct bnxt_napi *bnapi = rxr->bnapi;
+		u32 map_idx = bnapi->index;
 
-		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_RX,
-					      map_idx);
+		rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
 		if (rc)
 			goto err_out;
-		rxr->rx_doorbell = bp->bar1 + map_idx * 0x80;
-		writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
+		bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id);
+		/* If we have agg rings, post agg buffers first. */
+		if (!agg_rings)
+			bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
 		bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+			u32 type2 = HWRM_RING_ALLOC_CMPL;
+			struct bnxt_cp_ring_info *cpr2;
+
+			cpr2 = cpr->cp_ring_arr[BNXT_RX_HDL];
+			ring = &cpr2->cp_ring_struct;
+			ring->handle = BNXT_RX_HDL;
+			rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
+			if (rc)
+				goto err_out;
+			bnxt_set_db(bp, &cpr2->cp_db, type2, map_idx,
+				    ring->fw_ring_id);
+			bnxt_db_cq(bp, &cpr2->cp_db, cpr2->cp_raw_cons);
+		}
 	}
 
-	if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+	if (agg_rings) {
+		type = HWRM_RING_ALLOC_AGG;
 		for (i = 0; i < bp->rx_nr_rings; i++) {
 			struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 			struct bnxt_ring_struct *ring =
@@ -4460,15 +5493,14 @@
 			u32 grp_idx = ring->grp_idx;
 			u32 map_idx = grp_idx + bp->rx_nr_rings;
 
-			rc = hwrm_ring_alloc_send_msg(bp, ring,
-						      HWRM_RING_ALLOC_AGG,
-						      map_idx);
+			rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
 			if (rc)
 				goto err_out;
 
-			rxr->rx_agg_doorbell = bp->bar1 + map_idx * 0x80;
-			writel(DB_KEY_RX | rxr->rx_agg_prod,
-			       rxr->rx_agg_doorbell);
+			bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx,
+				    ring->fw_ring_id);
+			bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+			bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
 			bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id;
 		}
 	}
@@ -4485,6 +5517,9 @@
 	struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 error_code;
 
+	if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+		return 0;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
 	req.ring_type = ring_type;
 	req.ring_id = cpu_to_le16(ring->fw_ring_id);
@@ -4504,6 +5539,7 @@
 
 static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 {
+	u32 type;
 	int i;
 
 	if (!bp->bnapi)
@@ -4512,10 +5548,10 @@
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 		struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
-		u32 grp_idx = txr->bnapi->index;
-		u32 cmpl_ring_id = bp->grp_info[grp_idx].cp_fw_ring_id;
 
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+			u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
+
 			hwrm_ring_free_send_msg(bp, ring,
 						RING_FREE_REQ_RING_TYPE_TX,
 						close_path ? cmpl_ring_id :
@@ -4528,9 +5564,10 @@
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring = &rxr->rx_ring_struct;
 		u32 grp_idx = rxr->bnapi->index;
-		u32 cmpl_ring_id = bp->grp_info[grp_idx].cp_fw_ring_id;
 
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
 			hwrm_ring_free_send_msg(bp, ring,
 						RING_FREE_REQ_RING_TYPE_RX,
 						close_path ? cmpl_ring_id :
@@ -4541,15 +5578,19 @@
 		}
 	}
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		type = RING_FREE_REQ_RING_TYPE_RX_AGG;
+	else
+		type = RING_FREE_REQ_RING_TYPE_RX;
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct;
 		u32 grp_idx = rxr->bnapi->index;
-		u32 cmpl_ring_id = bp->grp_info[grp_idx].cp_fw_ring_id;
 
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
-			hwrm_ring_free_send_msg(bp, ring,
-						RING_FREE_REQ_RING_TYPE_RX,
+			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
+			hwrm_ring_free_send_msg(bp, ring, type,
 						close_path ? cmpl_ring_id :
 						INVALID_HW_RING_ID);
 			ring->fw_ring_id = INVALID_HW_RING_ID;
@@ -4564,14 +5605,32 @@
 	 */
 	bnxt_disable_int_sync(bp);
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		type = RING_FREE_REQ_RING_TYPE_NQ;
+	else
+		type = RING_FREE_REQ_RING_TYPE_L2_CMPL;
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
-		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+		struct bnxt_ring_struct *ring;
+		int j;
 
+		for (j = 0; j < 2; j++) {
+			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+
+			if (cpr2) {
+				ring = &cpr2->cp_ring_struct;
+				if (ring->fw_ring_id == INVALID_HW_RING_ID)
+					continue;
+				hwrm_ring_free_send_msg(bp, ring,
+					RING_FREE_REQ_RING_TYPE_L2_CMPL,
+					INVALID_HW_RING_ID);
+				ring->fw_ring_id = INVALID_HW_RING_ID;
+			}
+		}
+		ring = &cpr->cp_ring_struct;
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
-			hwrm_ring_free_send_msg(bp, ring,
-						RING_FREE_REQ_RING_TYPE_L2_CMPL,
+			hwrm_ring_free_send_msg(bp, ring, type,
 						INVALID_HW_RING_ID);
 			ring->fw_ring_id = INVALID_HW_RING_ID;
 			bp->grp_info[i].cp_fw_ring_id = INVALID_HW_RING_ID;
@@ -4579,6 +5638,9 @@
 	}
 }
 
+static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
+			   bool shared);
+
 static int bnxt_hwrm_get_rings(struct bnxt *bp)
 {
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
@@ -4595,7 +5657,7 @@
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc) {
 		mutex_unlock(&bp->hwrm_cmd_lock);
-		return -EIO;
+		return rc;
 	}
 
 	hw_resc->resv_tx_rings = le16_to_cpu(resp->alloc_tx_rings);
@@ -4608,8 +5670,25 @@
 		hw_resc->resv_vnics = le16_to_cpu(resp->alloc_vnics);
 		cp = le16_to_cpu(resp->alloc_cmpl_rings);
 		stats = le16_to_cpu(resp->alloc_stat_ctx);
-		cp = min_t(u16, cp, stats);
+		hw_resc->resv_irqs = cp;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			int rx = hw_resc->resv_rx_rings;
+			int tx = hw_resc->resv_tx_rings;
+
+			if (bp->flags & BNXT_FLAG_AGG_RINGS)
+				rx >>= 1;
+			if (cp < (rx + tx)) {
+				bnxt_trim_rings(bp, &rx, &tx, cp, false);
+				if (bp->flags & BNXT_FLAG_AGG_RINGS)
+					rx <<= 1;
+				hw_resc->resv_rx_rings = rx;
+				hw_resc->resv_tx_rings = tx;
+			}
+			hw_resc->resv_irqs = le16_to_cpu(resp->alloc_msix);
+			hw_resc->resv_hw_ring_grps = rx;
+		}
 		hw_resc->resv_cp_rings = cp;
+		hw_resc->resv_stat_ctxs = stats;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return 0;
@@ -4634,10 +5713,12 @@
 	return rc;
 }
 
+static bool bnxt_rfs_supported(struct bnxt *bp);
+
 static void
 __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
 			     int tx_rings, int rx_rings, int ring_grps,
-			     int cp_rings, int vnics)
+			     int cp_rings, int stats, int vnics)
 {
 	u32 enables = 0;
 
@@ -4647,16 +5728,38 @@
 	req->num_tx_rings = cpu_to_le16(tx_rings);
 	if (BNXT_NEW_RM(bp)) {
 		enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
-		enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-				      FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-		enables |= ring_grps ?
-			   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+		enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
+			enables |= tx_rings + ring_grps ?
+				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
+			enables |= rx_rings ?
+				FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+		} else {
+			enables |= cp_rings ?
+				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
+			enables |= ring_grps ?
+				   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
+				   FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+		}
 		enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
 		req->num_rx_rings = cpu_to_le16(rx_rings);
-		req->num_hw_ring_grps = cpu_to_le16(ring_grps);
-		req->num_cmpl_rings = cpu_to_le16(cp_rings);
-		req->num_stat_ctxs = req->num_cmpl_rings;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
+			req->num_msix = cpu_to_le16(cp_rings);
+			req->num_rsscos_ctxs =
+				cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+		} else {
+			req->num_cmpl_rings = cpu_to_le16(cp_rings);
+			req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+			req->num_rsscos_ctxs = cpu_to_le16(1);
+			if (!(bp->flags & BNXT_FLAG_NEW_RSS_CAP) &&
+			    bnxt_rfs_supported(bp))
+				req->num_rsscos_ctxs =
+					cpu_to_le16(ring_grps + 1);
+		}
+		req->num_stat_ctxs = cpu_to_le16(stats);
 		req->num_vnics = cpu_to_le16(vnics);
 	}
 	req->enables = cpu_to_le32(enables);
@@ -4666,23 +5769,39 @@
 __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
 			     struct hwrm_func_vf_cfg_input *req, int tx_rings,
 			     int rx_rings, int ring_grps, int cp_rings,
-			     int vnics)
+			     int stats, int vnics)
 {
 	u32 enables = 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1);
 	enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
-	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
-	enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-			      FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-	enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
+			      FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+	enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		enables |= tx_rings + ring_grps ?
+			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
+	} else {
+		enables |= cp_rings ?
+			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
+		enables |= ring_grps ?
+			   FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+	}
 	enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+	enables |= FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS;
 
+	req->num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	req->num_tx_rings = cpu_to_le16(tx_rings);
 	req->num_rx_rings = cpu_to_le16(rx_rings);
-	req->num_hw_ring_grps = cpu_to_le16(ring_grps);
-	req->num_cmpl_rings = cpu_to_le16(cp_rings);
-	req->num_stat_ctxs = req->num_cmpl_rings;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
+		req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+	} else {
+		req->num_cmpl_rings = cpu_to_le16(cp_rings);
+		req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+		req->num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+	}
+	req->num_stat_ctxs = cpu_to_le16(stats);
 	req->num_vnics = cpu_to_le16(vnics);
 
 	req->enables = cpu_to_le32(enables);
@@ -4690,19 +5809,19 @@
 
 static int
 bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-			   int ring_grps, int cp_rings, int vnics)
+			   int ring_grps, int cp_rings, int stats, int vnics)
 {
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
 
 	__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-				     cp_rings, vnics);
+				     cp_rings, stats, vnics);
 	if (!req.enables)
 		return 0;
 
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
-		return -ENOMEM;
+		return rc;
 
 	if (bp->hwrm_spec_code < 0x10601)
 		bp->hw_resc.resv_tx_rings = tx_rings;
@@ -4713,7 +5832,7 @@
 
 static int
 bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-			   int ring_grps, int cp_rings, int vnics)
+			   int ring_grps, int cp_rings, int stats, int vnics)
 {
 	struct hwrm_func_vf_cfg_input req = {0};
 	int rc;
@@ -4724,29 +5843,27 @@
 	}
 
 	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-				     cp_rings, vnics);
-	req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
-				   FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS);
-	req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
-	req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
+				     cp_rings, stats, vnics);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
-		return -ENOMEM;
+		return rc;
 
 	rc = bnxt_hwrm_get_rings(bp);
 	return rc;
 }
 
 static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp,
-				   int cp, int vnic)
+				   int cp, int stat, int vnic)
 {
 	if (BNXT_PF(bp))
-		return bnxt_hwrm_reserve_pf_rings(bp, tx, rx, grp, cp, vnic);
+		return bnxt_hwrm_reserve_pf_rings(bp, tx, rx, grp, cp, stat,
+						  vnic);
 	else
-		return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, vnic);
+		return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, stat,
+						  vnic);
 }
 
-static int bnxt_cp_rings_in_use(struct bnxt *bp)
+int bnxt_nq_rings_in_use(struct bnxt *bp)
 {
 	int cp = bp->cp_nr_rings;
 	int ulp_msix, ulp_base;
@@ -4761,11 +5878,37 @@
 	return cp;
 }
 
+static int bnxt_cp_rings_in_use(struct bnxt *bp)
+{
+	int cp;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		return bnxt_nq_rings_in_use(bp);
+
+	cp = bp->tx_nr_rings + bp->rx_nr_rings;
+	return cp;
+}
+
+static int bnxt_get_func_stat_ctxs(struct bnxt *bp)
+{
+	int ulp_stat = bnxt_get_ulp_stat_ctxs(bp);
+	int cp = bp->cp_nr_rings;
+
+	if (!ulp_stat)
+		return cp;
+
+	if (bnxt_nq_rings_in_use(bp) > cp + bnxt_get_ulp_msix_num(bp))
+		return bnxt_get_ulp_msix_base(bp) + ulp_stat;
+
+	return cp + ulp_stat;
+}
+
 static bool bnxt_need_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 	int cp = bnxt_cp_rings_in_use(bp);
-	int rx = bp->rx_nr_rings;
+	int nq = bnxt_nq_rings_in_use(bp);
+	int rx = bp->rx_nr_rings, stat;
 	int vnic = 1, grp = rx;
 
 	if (bp->hwrm_spec_code < 0x10601)
@@ -4774,51 +5917,56 @@
 	if (hw_resc->resv_tx_rings != bp->tx_nr_rings)
 		return true;
 
-	if (bp->flags & BNXT_FLAG_RFS)
+	if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
 		vnic = rx + 1;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx <<= 1;
+	stat = bnxt_get_func_stat_ctxs(bp);
 	if (BNXT_NEW_RM(bp) &&
 	    (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
-	     hw_resc->resv_hw_ring_grps != grp || hw_resc->resv_vnics != vnic))
+	     hw_resc->resv_vnics != vnic || hw_resc->resv_stat_ctxs != stat ||
+	     (hw_resc->resv_hw_ring_grps != grp &&
+	      !(bp->flags & BNXT_FLAG_CHIP_P5))))
+		return true;
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && BNXT_PF(bp) &&
+	    hw_resc->resv_irqs != nq)
 		return true;
 	return false;
 }
 
-static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
-			   bool shared);
-
 static int __bnxt_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int cp = bnxt_cp_rings_in_use(bp);
+	int cp = bnxt_nq_rings_in_use(bp);
 	int tx = bp->tx_nr_rings;
 	int rx = bp->rx_nr_rings;
 	int grp, rx_rings, rc;
+	int vnic = 1, stat;
 	bool sh = false;
-	int vnic = 1;
 
 	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
 		sh = true;
-	if (bp->flags & BNXT_FLAG_RFS)
+	if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
 		vnic = rx + 1;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx <<= 1;
 	grp = bp->rx_nr_rings;
+	stat = bnxt_get_func_stat_ctxs(bp);
 
-	rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, vnic);
+	rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, stat, vnic);
 	if (rc)
 		return rc;
 
 	tx = hw_resc->resv_tx_rings;
 	if (BNXT_NEW_RM(bp)) {
 		rx = hw_resc->resv_rx_rings;
-		cp = hw_resc->resv_cp_rings;
+		cp = hw_resc->resv_irqs;
 		grp = hw_resc->resv_hw_ring_grps;
 		vnic = hw_resc->resv_vnics;
+		stat = hw_resc->resv_stat_ctxs;
 	}
 
 	rx_rings = rx;
@@ -4837,6 +5985,10 @@
 		}
 	}
 	rx_rings = min_t(int, rx_rings, grp);
+	cp = min_t(int, cp, bp->cp_nr_rings);
+	if (stat > bnxt_get_ulp_stat_ctxs(bp))
+		stat -= bnxt_get_ulp_stat_ctxs(bp);
+	cp = min_t(int, cp, stat);
 	rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh);
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx = rx_rings << 1;
@@ -4845,14 +5997,15 @@
 	bp->rx_nr_rings = rx_rings;
 	bp->cp_nr_rings = cp;
 
-	if (!tx || !rx || !cp || !grp || !vnic)
+	if (!tx || !rx || !cp || !grp || !vnic || !stat)
 		return -ENOMEM;
 
 	return rc;
 }
 
 static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				    int ring_grps, int cp_rings, int vnics)
+				    int ring_grps, int cp_rings, int stats,
+				    int vnics)
 {
 	struct hwrm_func_vf_cfg_input req = {0};
 	u32 flags;
@@ -4862,99 +6015,199 @@
 		return 0;
 
 	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-				     cp_rings, vnics);
+				     cp_rings, stats, vnics);
 	flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
-		FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
-		FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
+		FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST |
+		FUNC_VF_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST;
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
 
 	req.flags = cpu_to_le32(flags);
 	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		return -ENOMEM;
-	return 0;
+	return rc;
 }
 
 static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				    int ring_grps, int cp_rings, int vnics)
+				    int ring_grps, int cp_rings, int stats,
+				    int vnics)
 {
 	struct hwrm_func_cfg_input req = {0};
 	u32 flags;
 	int rc;
 
 	__bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-				     cp_rings, vnics);
+				     cp_rings, stats, vnics);
 	flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
-	if (BNXT_NEW_RM(bp))
+	if (BNXT_NEW_RM(bp)) {
 		flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
-			 FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST |
+				 FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST;
+		else
+			flags |= FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
+	}
 
 	req.flags = cpu_to_le32(flags);
 	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		return -ENOMEM;
-	return 0;
+	return rc;
 }
 
 static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-				 int ring_grps, int cp_rings, int vnics)
+				 int ring_grps, int cp_rings, int stats,
+				 int vnics)
 {
 	if (bp->hwrm_spec_code < 0x10801)
 		return 0;
 
 	if (BNXT_PF(bp))
 		return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings,
-						ring_grps, cp_rings, vnics);
+						ring_grps, cp_rings, stats,
+						vnics);
 
 	return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps,
-					cp_rings, vnics);
+					cp_rings, stats, vnics);
 }
 
-static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal,
+static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
+{
+	struct hwrm_ring_aggint_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
+	struct hwrm_ring_aggint_qcaps_input req = {0};
+	int rc;
+
+	coal_cap->cmpl_params = BNXT_LEGACY_COAL_CMPL_PARAMS;
+	coal_cap->num_cmpl_dma_aggr_max = 63;
+	coal_cap->num_cmpl_dma_aggr_during_int_max = 63;
+	coal_cap->cmpl_aggr_dma_tmr_max = 65535;
+	coal_cap->cmpl_aggr_dma_tmr_during_int_max = 65535;
+	coal_cap->int_lat_tmr_min_max = 65535;
+	coal_cap->int_lat_tmr_max_max = 65535;
+	coal_cap->num_cmpl_aggr_int_max = 65535;
+	coal_cap->timer_units = 80;
+
+	if (bp->hwrm_spec_code < 0x10902)
+		return;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_AGGINT_QCAPS, -1, -1);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		coal_cap->cmpl_params = le32_to_cpu(resp->cmpl_params);
+		coal_cap->nq_params = le32_to_cpu(resp->nq_params);
+		coal_cap->num_cmpl_dma_aggr_max =
+			le16_to_cpu(resp->num_cmpl_dma_aggr_max);
+		coal_cap->num_cmpl_dma_aggr_during_int_max =
+			le16_to_cpu(resp->num_cmpl_dma_aggr_during_int_max);
+		coal_cap->cmpl_aggr_dma_tmr_max =
+			le16_to_cpu(resp->cmpl_aggr_dma_tmr_max);
+		coal_cap->cmpl_aggr_dma_tmr_during_int_max =
+			le16_to_cpu(resp->cmpl_aggr_dma_tmr_during_int_max);
+		coal_cap->int_lat_tmr_min_max =
+			le16_to_cpu(resp->int_lat_tmr_min_max);
+		coal_cap->int_lat_tmr_max_max =
+			le16_to_cpu(resp->int_lat_tmr_max_max);
+		coal_cap->num_cmpl_aggr_int_max =
+			le16_to_cpu(resp->num_cmpl_aggr_int_max);
+		coal_cap->timer_units = le16_to_cpu(resp->timer_units);
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+}
+
+static u16 bnxt_usec_to_coal_tmr(struct bnxt *bp, u16 usec)
+{
+	struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
+
+	return usec * 1000 / coal_cap->timer_units;
+}
+
+static void bnxt_hwrm_set_coal_params(struct bnxt *bp,
+	struct bnxt_coal *hw_coal,
 	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
 {
-	u16 val, tmr, max, flags;
+	struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
+	u32 cmpl_params = coal_cap->cmpl_params;
+	u16 val, tmr, max, flags = 0;
 
 	max = hw_coal->bufs_per_record * 128;
 	if (hw_coal->budget)
 		max = hw_coal->bufs_per_record * hw_coal->budget;
+	max = min_t(u16, max, coal_cap->num_cmpl_aggr_int_max);
 
 	val = clamp_t(u16, hw_coal->coal_bufs, 1, max);
 	req->num_cmpl_aggr_int = cpu_to_le16(val);
 
-	/* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */
-	val = min_t(u16, val, 63);
+	val = min_t(u16, val, coal_cap->num_cmpl_dma_aggr_max);
 	req->num_cmpl_dma_aggr = cpu_to_le16(val);
 
-	/* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */
-	val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, 63);
+	val = clamp_t(u16, hw_coal->coal_bufs_irq, 1,
+		      coal_cap->num_cmpl_dma_aggr_during_int_max);
 	req->num_cmpl_dma_aggr_during_int = cpu_to_le16(val);
 
-	tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks);
-	tmr = max_t(u16, tmr, 1);
+	tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks);
+	tmr = clamp_t(u16, tmr, 1, coal_cap->int_lat_tmr_max_max);
 	req->int_lat_tmr_max = cpu_to_le16(tmr);
 
 	/* min timer set to 1/2 of interrupt timer */
-	val = tmr / 2;
-	req->int_lat_tmr_min = cpu_to_le16(val);
+	if (cmpl_params & RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_INT_LAT_TMR_MIN) {
+		val = tmr / 2;
+		val = clamp_t(u16, val, 1, coal_cap->int_lat_tmr_min_max);
+		req->int_lat_tmr_min = cpu_to_le16(val);
+		req->enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
+	}
 
 	/* buf timer set to 1/4 of interrupt timer */
-	val = max_t(u16, tmr / 4, 1);
+	val = clamp_t(u16, tmr / 4, 1, coal_cap->cmpl_aggr_dma_tmr_max);
 	req->cmpl_aggr_dma_tmr = cpu_to_le16(val);
 
-	tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks_irq);
-	tmr = max_t(u16, tmr, 1);
-	req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
+	if (cmpl_params &
+	    RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_DMA_AGGR_DURING_INT) {
+		tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks_irq);
+		val = clamp_t(u16, tmr, 1,
+			      coal_cap->cmpl_aggr_dma_tmr_during_int_max);
+		req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
+		req->enables |=
+			cpu_to_le16(BNXT_COAL_CMPL_AGGR_TMR_DURING_INT_ENABLE);
+	}
 
-	flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET;
-	if (hw_coal->idle_thresh && hw_coal->coal_ticks < hw_coal->idle_thresh)
+	if (cmpl_params & RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_TIMER_RESET)
+		flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET;
+	if ((cmpl_params & RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_RING_IDLE) &&
+	    hw_coal->idle_thresh && hw_coal->coal_ticks < hw_coal->idle_thresh)
 		flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE;
 	req->flags = cpu_to_le16(flags);
+	req->enables |= cpu_to_le16(BNXT_COAL_CMPL_ENABLES);
+}
+
+/* Caller holds bp->hwrm_cmd_lock */
+static int __bnxt_hwrm_set_coal_nq(struct bnxt *bp, struct bnxt_napi *bnapi,
+				   struct bnxt_coal *hw_coal)
+{
+	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req = {0};
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
+	u32 nq_params = coal_cap->nq_params;
+	u16 tmr;
+
+	if (!(nq_params & RING_AGGINT_QCAPS_RESP_NQ_PARAMS_INT_LAT_TMR_MIN))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS,
+			       -1, -1);
+	req.ring_id = cpu_to_le16(cpr->cp_ring_struct.fw_ring_id);
+	req.flags =
+		cpu_to_le16(RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_IS_NQ);
+
+	tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks) / 2;
+	tmr = clamp_t(u16, tmr, 1, coal_cap->int_lat_tmr_min_max);
+	req.int_lat_tmr_min = cpu_to_le16(tmr);
+	req.enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
+	return _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
@@ -4962,7 +6215,6 @@
 	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0};
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct bnxt_coal coal;
-	unsigned int grp_idx;
 
 	/* Tick values in micro seconds.
 	 * 1 coal_buf x bufs_per_record = 1 completion record.
@@ -4978,10 +6230,9 @@
 	bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
 			       HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
 
-	bnxt_hwrm_set_coal_params(&coal, &req_rx);
+	bnxt_hwrm_set_coal_params(bp, &coal, &req_rx);
 
-	grp_idx = bnapi->index;
-	req_rx.ring_id = cpu_to_le16(bp->grp_info[grp_idx].cp_fw_ring_id);
+	req_rx.ring_id = cpu_to_le16(bnxt_cp_ring_for_rx(bp, bnapi->rx_ring));
 
 	return hwrm_send_message(bp, &req_rx, sizeof(req_rx),
 				 HWRM_CMD_TIMEOUT);
@@ -4998,22 +6249,46 @@
 	bnxt_hwrm_cmd_hdr_init(bp, &req_tx,
 			       HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
 
-	bnxt_hwrm_set_coal_params(&bp->rx_coal, &req_rx);
-	bnxt_hwrm_set_coal_params(&bp->tx_coal, &req_tx);
+	bnxt_hwrm_set_coal_params(bp, &bp->rx_coal, &req_rx);
+	bnxt_hwrm_set_coal_params(bp, &bp->tx_coal, &req_tx);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
+		struct bnxt_coal *hw_coal;
+		u16 ring_id;
 
 		req = &req_rx;
-		if (!bnapi->rx_ring)
+		if (!bnapi->rx_ring) {
+			ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
 			req = &req_tx;
-		req->ring_id = cpu_to_le16(bp->grp_info[i].cp_fw_ring_id);
+		} else {
+			ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
+		}
+		req->ring_id = cpu_to_le16(ring_id);
 
 		rc = _hwrm_send_message(bp, req, sizeof(*req),
 					HWRM_CMD_TIMEOUT);
 		if (rc)
 			break;
+
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+			continue;
+
+		if (bnapi->rx_ring && bnapi->tx_ring) {
+			req = &req_tx;
+			ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
+			req->ring_id = cpu_to_le16(ring_id);
+			rc = _hwrm_send_message(bp, req, sizeof(*req),
+						HWRM_CMD_TIMEOUT);
+			if (rc)
+				break;
+		}
+		if (bnapi->rx_ring)
+			hw_coal = &bp->rx_coal;
+		else
+			hw_coal = &bp->tx_coal;
+		__bnxt_hwrm_set_coal_nq(bp, bnapi, hw_coal);
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -5042,8 +6317,6 @@
 
 			rc = _hwrm_send_message(bp, &req, sizeof(req),
 						HWRM_CMD_TIMEOUT);
-			if (rc)
-				break;
 
 			cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
 		}
@@ -5063,6 +6336,7 @@
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
 
+	req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
 	req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
@@ -5104,6 +6378,8 @@
 		struct bnxt_vf_info *vf = &bp->vf;
 
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
+	} else {
+		bp->pf.registered_vfs = le16_to_cpu(resp->registered_vfs);
 	}
 #endif
 	flags = le16_to_cpu(resp->flags);
@@ -5140,6 +6416,453 @@
 	return rc;
 }
 
+static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
+{
+	struct hwrm_func_backing_store_qcaps_input req = {0};
+	struct hwrm_func_backing_store_qcaps_output *resp =
+		bp->hwrm_cmd_resp_addr;
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx)
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_QCAPS, -1, -1);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		struct bnxt_ctx_pg_info *ctx_pg;
+		struct bnxt_ctx_mem_info *ctx;
+		int i;
+
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx) {
+			rc = -ENOMEM;
+			goto ctx_err;
+		}
+		ctx_pg = kzalloc(sizeof(*ctx_pg) * (bp->max_q + 1), GFP_KERNEL);
+		if (!ctx_pg) {
+			kfree(ctx);
+			rc = -ENOMEM;
+			goto ctx_err;
+		}
+		for (i = 0; i < bp->max_q + 1; i++, ctx_pg++)
+			ctx->tqm_mem[i] = ctx_pg;
+
+		bp->ctx = ctx;
+		ctx->qp_max_entries = le32_to_cpu(resp->qp_max_entries);
+		ctx->qp_min_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
+		ctx->qp_max_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
+		ctx->qp_entry_size = le16_to_cpu(resp->qp_entry_size);
+		ctx->srq_max_l2_entries = le16_to_cpu(resp->srq_max_l2_entries);
+		ctx->srq_max_entries = le32_to_cpu(resp->srq_max_entries);
+		ctx->srq_entry_size = le16_to_cpu(resp->srq_entry_size);
+		ctx->cq_max_l2_entries = le16_to_cpu(resp->cq_max_l2_entries);
+		ctx->cq_max_entries = le32_to_cpu(resp->cq_max_entries);
+		ctx->cq_entry_size = le16_to_cpu(resp->cq_entry_size);
+		ctx->vnic_max_vnic_entries =
+			le16_to_cpu(resp->vnic_max_vnic_entries);
+		ctx->vnic_max_ring_table_entries =
+			le16_to_cpu(resp->vnic_max_ring_table_entries);
+		ctx->vnic_entry_size = le16_to_cpu(resp->vnic_entry_size);
+		ctx->stat_max_entries = le32_to_cpu(resp->stat_max_entries);
+		ctx->stat_entry_size = le16_to_cpu(resp->stat_entry_size);
+		ctx->tqm_entry_size = le16_to_cpu(resp->tqm_entry_size);
+		ctx->tqm_min_entries_per_ring =
+			le32_to_cpu(resp->tqm_min_entries_per_ring);
+		ctx->tqm_max_entries_per_ring =
+			le32_to_cpu(resp->tqm_max_entries_per_ring);
+		ctx->tqm_entries_multiple = resp->tqm_entries_multiple;
+		if (!ctx->tqm_entries_multiple)
+			ctx->tqm_entries_multiple = 1;
+		ctx->mrav_max_entries = le32_to_cpu(resp->mrav_max_entries);
+		ctx->mrav_entry_size = le16_to_cpu(resp->mrav_entry_size);
+		ctx->mrav_num_entries_units =
+			le16_to_cpu(resp->mrav_num_entries_units);
+		ctx->tim_entry_size = le16_to_cpu(resp->tim_entry_size);
+		ctx->tim_max_entries = le32_to_cpu(resp->tim_max_entries);
+	} else {
+		rc = 0;
+	}
+ctx_err:
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
+				  __le64 *pg_dir)
+{
+	u8 pg_size = 0;
+
+	if (BNXT_PAGE_SHIFT == 13)
+		pg_size = 1 << 4;
+	else if (BNXT_PAGE_SIZE == 16)
+		pg_size = 2 << 4;
+
+	*pg_attr = pg_size;
+	if (rmem->depth >= 1) {
+		if (rmem->depth == 2)
+			*pg_attr |= 2;
+		else
+			*pg_attr |= 1;
+		*pg_dir = cpu_to_le64(rmem->pg_tbl_map);
+	} else {
+		*pg_dir = cpu_to_le64(rmem->dma_arr[0]);
+	}
+}
+
+#define FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES			\
+	(FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP |		\
+	 FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ |		\
+	 FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ |		\
+	 FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC |		\
+	 FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT)
+
+static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
+{
+	struct hwrm_func_backing_store_cfg_input req = {0};
+	struct bnxt_ctx_mem_info *ctx = bp->ctx;
+	struct bnxt_ctx_pg_info *ctx_pg;
+	__le32 *num_entries;
+	__le64 *pg_dir;
+	u32 flags = 0;
+	u8 *pg_attr;
+	int i, rc;
+	u32 ena;
+
+	if (!ctx)
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1);
+	req.enables = cpu_to_le32(enables);
+
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) {
+		ctx_pg = &ctx->qp_mem;
+		req.qp_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
+		req.qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
+		req.qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.qpc_pg_size_qpc_lvl,
+				      &req.qpc_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) {
+		ctx_pg = &ctx->srq_mem;
+		req.srq_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
+		req.srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.srq_pg_size_srq_lvl,
+				      &req.srq_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) {
+		ctx_pg = &ctx->cq_mem;
+		req.cq_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
+		req.cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req.cq_pg_size_cq_lvl,
+				      &req.cq_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) {
+		ctx_pg = &ctx->vnic_mem;
+		req.vnic_num_vnic_entries =
+			cpu_to_le16(ctx->vnic_max_vnic_entries);
+		req.vnic_num_ring_table_entries =
+			cpu_to_le16(ctx->vnic_max_ring_table_entries);
+		req.vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.vnic_pg_size_vnic_lvl,
+				      &req.vnic_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) {
+		ctx_pg = &ctx->stat_mem;
+		req.stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
+		req.stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.stat_pg_size_stat_lvl,
+				      &req.stat_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
+		ctx_pg = &ctx->mrav_mem;
+		req.mrav_num_entries = cpu_to_le32(ctx_pg->entries);
+		if (ctx->mrav_num_entries_units)
+			flags |=
+			FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
+		req.mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.mrav_pg_size_mrav_lvl,
+				      &req.mrav_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
+		ctx_pg = &ctx->tim_mem;
+		req.tim_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.tim_pg_size_tim_lvl,
+				      &req.tim_page_dir);
+	}
+	for (i = 0, num_entries = &req.tqm_sp_num_entries,
+	     pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl,
+	     pg_dir = &req.tqm_sp_page_dir,
+	     ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
+	     i < 9; i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
+		if (!(enables & ena))
+			continue;
+
+		req.tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
+		ctx_pg = ctx->tqm_mem[i];
+		*num_entries = cpu_to_le32(ctx_pg->entries);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
+	}
+	req.flags = cpu_to_le32(flags);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return rc;
+}
+
+static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
+				  struct bnxt_ctx_pg_info *ctx_pg)
+{
+	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+
+	rmem->page_size = BNXT_PAGE_SIZE;
+	rmem->pg_arr = ctx_pg->ctx_pg_arr;
+	rmem->dma_arr = ctx_pg->ctx_dma_arr;
+	rmem->flags = BNXT_RMEM_VALID_PTE_FLAG;
+	if (rmem->depth >= 1)
+		rmem->flags |= BNXT_RMEM_USE_FULL_PAGE_FLAG;
+	return bnxt_alloc_ring(bp, rmem);
+}
+
+static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
+				  struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size,
+				  u8 depth)
+{
+	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+	int rc;
+
+	if (!mem_size)
+		return 0;
+
+	ctx_pg->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
+	if (ctx_pg->nr_pages > MAX_CTX_TOTAL_PAGES) {
+		ctx_pg->nr_pages = 0;
+		return -EINVAL;
+	}
+	if (ctx_pg->nr_pages > MAX_CTX_PAGES || depth > 1) {
+		int nr_tbls, i;
+
+		rmem->depth = 2;
+		ctx_pg->ctx_pg_tbl = kcalloc(MAX_CTX_PAGES, sizeof(ctx_pg),
+					     GFP_KERNEL);
+		if (!ctx_pg->ctx_pg_tbl)
+			return -ENOMEM;
+		nr_tbls = DIV_ROUND_UP(ctx_pg->nr_pages, MAX_CTX_PAGES);
+		rmem->nr_pages = nr_tbls;
+		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
+		if (rc)
+			return rc;
+		for (i = 0; i < nr_tbls; i++) {
+			struct bnxt_ctx_pg_info *pg_tbl;
+
+			pg_tbl = kzalloc(sizeof(*pg_tbl), GFP_KERNEL);
+			if (!pg_tbl)
+				return -ENOMEM;
+			ctx_pg->ctx_pg_tbl[i] = pg_tbl;
+			rmem = &pg_tbl->ring_mem;
+			rmem->pg_tbl = ctx_pg->ctx_pg_arr[i];
+			rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i];
+			rmem->depth = 1;
+			rmem->nr_pages = MAX_CTX_PAGES;
+			if (i == (nr_tbls - 1)) {
+				int rem = ctx_pg->nr_pages % MAX_CTX_PAGES;
+
+				if (rem)
+					rmem->nr_pages = rem;
+			}
+			rc = bnxt_alloc_ctx_mem_blk(bp, pg_tbl);
+			if (rc)
+				break;
+		}
+	} else {
+		rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
+		if (rmem->nr_pages > 1 || depth)
+			rmem->depth = 1;
+		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
+	}
+	return rc;
+}
+
+static void bnxt_free_ctx_pg_tbls(struct bnxt *bp,
+				  struct bnxt_ctx_pg_info *ctx_pg)
+{
+	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+
+	if (rmem->depth > 1 || ctx_pg->nr_pages > MAX_CTX_PAGES ||
+	    ctx_pg->ctx_pg_tbl) {
+		int i, nr_tbls = rmem->nr_pages;
+
+		for (i = 0; i < nr_tbls; i++) {
+			struct bnxt_ctx_pg_info *pg_tbl;
+			struct bnxt_ring_mem_info *rmem2;
+
+			pg_tbl = ctx_pg->ctx_pg_tbl[i];
+			if (!pg_tbl)
+				continue;
+			rmem2 = &pg_tbl->ring_mem;
+			bnxt_free_ring(bp, rmem2);
+			ctx_pg->ctx_pg_arr[i] = NULL;
+			kfree(pg_tbl);
+			ctx_pg->ctx_pg_tbl[i] = NULL;
+		}
+		kfree(ctx_pg->ctx_pg_tbl);
+		ctx_pg->ctx_pg_tbl = NULL;
+	}
+	bnxt_free_ring(bp, rmem);
+	ctx_pg->nr_pages = 0;
+}
+
+static void bnxt_free_ctx_mem(struct bnxt *bp)
+{
+	struct bnxt_ctx_mem_info *ctx = bp->ctx;
+	int i;
+
+	if (!ctx)
+		return;
+
+	if (ctx->tqm_mem[0]) {
+		for (i = 0; i < bp->max_q + 1; i++)
+			bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]);
+		kfree(ctx->tqm_mem[0]);
+		ctx->tqm_mem[0] = NULL;
+	}
+
+	bnxt_free_ctx_pg_tbls(bp, &ctx->tim_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->mrav_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->stat_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->vnic_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->cq_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->srq_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->qp_mem);
+	ctx->flags &= ~BNXT_CTX_FLAG_INITED;
+}
+
+static int bnxt_alloc_ctx_mem(struct bnxt *bp)
+{
+	struct bnxt_ctx_pg_info *ctx_pg;
+	struct bnxt_ctx_mem_info *ctx;
+	u32 mem_size, ena, entries;
+	u32 num_mr, num_ah;
+	u32 extra_srqs = 0;
+	u32 extra_qps = 0;
+	u8 pg_lvl = 1;
+	int i, rc;
+
+	rc = bnxt_hwrm_func_backing_store_qcaps(bp);
+	if (rc) {
+		netdev_err(bp->dev, "Failed querying context mem capability, rc = %d.\n",
+			   rc);
+		return rc;
+	}
+	ctx = bp->ctx;
+	if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED))
+		return 0;
+
+	if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) {
+		pg_lvl = 2;
+		extra_qps = 65536;
+		extra_srqs = 8192;
+	}
+
+	ctx_pg = &ctx->qp_mem;
+	ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries +
+			  extra_qps;
+	mem_size = ctx->qp_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
+	if (rc)
+		return rc;
+
+	ctx_pg = &ctx->srq_mem;
+	ctx_pg->entries = ctx->srq_max_l2_entries + extra_srqs;
+	mem_size = ctx->srq_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
+	if (rc)
+		return rc;
+
+	ctx_pg = &ctx->cq_mem;
+	ctx_pg->entries = ctx->cq_max_l2_entries + extra_qps * 2;
+	mem_size = ctx->cq_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
+	if (rc)
+		return rc;
+
+	ctx_pg = &ctx->vnic_mem;
+	ctx_pg->entries = ctx->vnic_max_vnic_entries +
+			  ctx->vnic_max_ring_table_entries;
+	mem_size = ctx->vnic_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
+	if (rc)
+		return rc;
+
+	ctx_pg = &ctx->stat_mem;
+	ctx_pg->entries = ctx->stat_max_entries;
+	mem_size = ctx->stat_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
+	if (rc)
+		return rc;
+
+	ena = 0;
+	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
+		goto skip_rdma;
+
+	ctx_pg = &ctx->mrav_mem;
+	/* 128K extra is needed to accommodate static AH context
+	 * allocation by f/w.
+	 */
+	num_mr = 1024 * 256;
+	num_ah = 1024 * 128;
+	ctx_pg->entries = num_mr + num_ah;
+	mem_size = ctx->mrav_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 2);
+	if (rc)
+		return rc;
+	ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
+	if (ctx->mrav_num_entries_units)
+		ctx_pg->entries =
+			((num_mr / ctx->mrav_num_entries_units) << 16) |
+			 (num_ah / ctx->mrav_num_entries_units);
+
+	ctx_pg = &ctx->tim_mem;
+	ctx_pg->entries = ctx->qp_mem.entries;
+	mem_size = ctx->tim_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
+	if (rc)
+		return rc;
+	ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM;
+
+skip_rdma:
+	entries = ctx->qp_max_l2_entries + extra_qps;
+	entries = roundup(entries, ctx->tqm_entries_multiple);
+	entries = clamp_t(u32, entries, ctx->tqm_min_entries_per_ring,
+			  ctx->tqm_max_entries_per_ring);
+	for (i = 0; i < bp->max_q + 1; i++) {
+		ctx_pg = ctx->tqm_mem[i];
+		ctx_pg->entries = entries;
+		mem_size = ctx->tqm_entry_size * entries;
+		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
+		if (rc)
+			return rc;
+		ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i;
+	}
+	ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES;
+	rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
+	if (rc)
+		netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n",
+			   rc);
+	else
+		ctx->flags |= BNXT_CTX_FLAG_INITED;
+
+	return 0;
+}
+
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 {
 	struct hwrm_func_resource_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
@@ -5151,11 +6874,10 @@
 	req.fid = cpu_to_le16(0xffff);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc) {
-		rc = -EIO;
+	rc = _hwrm_send_message_silent(bp, &req, sizeof(req),
+				       HWRM_CMD_TIMEOUT);
+	if (rc)
 		goto hwrm_func_resc_qcaps_exit;
-	}
 
 	hw_resc->max_tx_sch_inputs = le16_to_cpu(resp->max_tx_scheduler_inputs);
 	if (!all)
@@ -5178,6 +6900,13 @@
 	hw_resc->min_stat_ctxs = le16_to_cpu(resp->min_stat_ctx);
 	hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		u16 max_msix = le16_to_cpu(resp->max_msix);
+
+		hw_resc->max_nqs = max_msix;
+		hw_resc->max_hw_ring_grps = hw_resc->max_rx_rings;
+	}
+
 	if (BNXT_PF(bp)) {
 		struct bnxt_pf_info *pf = &bp->pf;
 
@@ -5212,6 +6941,14 @@
 		bp->flags |= BNXT_FLAG_ROCEV1_CAP;
 	if (flags & FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED)
 		bp->flags |= BNXT_FLAG_ROCEV2_CAP;
+	if (flags & FUNC_QCAPS_RESP_FLAGS_PCIE_STATS_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_PCIE_STATS_SUPPORTED;
+	if (flags & FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_EXT_STATS_SUPPORTED;
+	if (flags &  FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE)
+		bp->fw_cap |= BNXT_FW_CAP_ERROR_RECOVERY;
+	if (flags & FUNC_QCAPS_RESP_FLAGS_ERR_RECOVER_RELOAD)
+		bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD;
 
 	bp->tx_push_thresh = 0;
 	if (flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)
@@ -5243,6 +6980,7 @@
 		pf->max_tx_wm_flows = le32_to_cpu(resp->max_tx_wm_flows);
 		pf->max_rx_em_flows = le32_to_cpu(resp->max_rx_em_flows);
 		pf->max_rx_wm_flows = le32_to_cpu(resp->max_rx_wm_flows);
+		bp->flags &= ~BNXT_FLAG_WOL_CAP;
 		if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED)
 			bp->flags |= BNXT_FLAG_WOL_CAP;
 	} else {
@@ -5259,6 +6997,8 @@
 	return rc;
 }
 
+static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp);
+
 static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
 	int rc;
@@ -5266,7 +7006,15 @@
 	rc = __bnxt_hwrm_func_qcaps(bp);
 	if (rc)
 		return rc;
+	rc = bnxt_hwrm_queue_qportcfg(bp);
+	if (rc) {
+		netdev_err(bp->dev, "hwrm query qportcfg failure rc: %d\n", rc);
+		return rc;
+	}
 	if (bp->hwrm_spec_code >= 0x10803) {
+		rc = bnxt_alloc_ctx_mem(bp);
+		if (rc)
+			return rc;
 		rc = bnxt_hwrm_func_resc_qcaps(bp, true);
 		if (!rc)
 			bp->fw_cap |= BNXT_FW_CAP_NEW_RM;
@@ -5274,6 +7022,131 @@
 	return 0;
 }
 
+static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
+{
+	struct hwrm_cfa_adv_flow_mgnt_qcaps_input req = {0};
+	struct hwrm_cfa_adv_flow_mgnt_qcaps_output *resp;
+	int rc = 0;
+	u32 flags;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_CFA_ADV_FLOW))
+		return 0;
+
+	resp = bp->hwrm_cmd_resp_addr;
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ADV_FLOW_MGNT_QCAPS, -1, -1);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		goto hwrm_cfa_adv_qcaps_exit;
+
+	flags = le32_to_cpu(resp->flags);
+	if (flags &
+	    CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX;
+
+hwrm_cfa_adv_qcaps_exit:
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int bnxt_map_fw_health_regs(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 reg_base = 0xffffffff;
+	int i;
+
+	/* Only pre-map the monitoring GRC registers using window 3 */
+	for (i = 0; i < 4; i++) {
+		u32 reg = fw_health->regs[i];
+
+		if (BNXT_FW_HEALTH_REG_TYPE(reg) != BNXT_FW_HEALTH_REG_TYPE_GRC)
+			continue;
+		if (reg_base == 0xffffffff)
+			reg_base = reg & BNXT_GRC_BASE_MASK;
+		if ((reg & BNXT_GRC_BASE_MASK) != reg_base)
+			return -ERANGE;
+		fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_BASE +
+					    (reg & BNXT_GRC_OFFSET_MASK);
+	}
+	if (reg_base == 0xffffffff)
+		return 0;
+
+	writel(reg_base, bp->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT +
+			 BNXT_FW_HEALTH_WIN_MAP_OFF);
+	return 0;
+}
+
+static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
+{
+	struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	struct hwrm_error_recovery_qcfg_input req = {0};
+	int rc, i;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG, -1, -1);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		goto err_recovery_out;
+	if (!fw_health) {
+		fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
+		bp->fw_health = fw_health;
+		if (!fw_health) {
+			rc = -ENOMEM;
+			goto err_recovery_out;
+		}
+	}
+	fw_health->flags = le32_to_cpu(resp->flags);
+	if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
+	    !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
+		rc = -EINVAL;
+		goto err_recovery_out;
+	}
+	fw_health->polling_dsecs = le32_to_cpu(resp->driver_polling_freq);
+	fw_health->master_func_wait_dsecs =
+		le32_to_cpu(resp->master_func_wait_period);
+	fw_health->normal_func_wait_dsecs =
+		le32_to_cpu(resp->normal_func_wait_period);
+	fw_health->post_reset_wait_dsecs =
+		le32_to_cpu(resp->master_func_wait_period_after_reset);
+	fw_health->post_reset_max_wait_dsecs =
+		le32_to_cpu(resp->max_bailout_time_after_reset);
+	fw_health->regs[BNXT_FW_HEALTH_REG] =
+		le32_to_cpu(resp->fw_health_status_reg);
+	fw_health->regs[BNXT_FW_HEARTBEAT_REG] =
+		le32_to_cpu(resp->fw_heartbeat_reg);
+	fw_health->regs[BNXT_FW_RESET_CNT_REG] =
+		le32_to_cpu(resp->fw_reset_cnt_reg);
+	fw_health->regs[BNXT_FW_RESET_INPROG_REG] =
+		le32_to_cpu(resp->reset_inprogress_reg);
+	fw_health->fw_reset_inprog_reg_mask =
+		le32_to_cpu(resp->reset_inprogress_reg_mask);
+	fw_health->fw_reset_seq_cnt = resp->reg_array_cnt;
+	if (fw_health->fw_reset_seq_cnt >= 16) {
+		rc = -EINVAL;
+		goto err_recovery_out;
+	}
+	for (i = 0; i < fw_health->fw_reset_seq_cnt; i++) {
+		fw_health->fw_reset_seq_regs[i] =
+			le32_to_cpu(resp->reset_reg[i]);
+		fw_health->fw_reset_seq_vals[i] =
+			le32_to_cpu(resp->reset_reg_val[i]);
+		fw_health->fw_reset_seq_delay_msec[i] =
+			resp->delay_after_reset[i];
+	}
+err_recovery_out:
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	if (!rc)
+		rc = bnxt_map_fw_health_regs(bp);
+	if (rc)
+		bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+	return rc;
+}
+
 static int bnxt_hwrm_func_reset(struct bnxt *bp)
 {
 	struct hwrm_func_reset_input req = {0};
@@ -5311,13 +7184,15 @@
 	no_rdma = !(bp->flags & BNXT_FLAG_ROCE_CAP);
 	qptr = &resp->queue_id0;
 	for (i = 0, j = 0; i < bp->max_tc; i++) {
-		bp->q_info[j].queue_id = *qptr++;
+		bp->q_info[j].queue_id = *qptr;
+		bp->q_ids[i] = *qptr++;
 		bp->q_info[j].queue_profile = *qptr++;
 		bp->tc_to_qidx[j] = j;
 		if (!BNXT_CNPQ(bp->q_info[j].queue_profile) ||
 		    (no_rdma && BNXT_PF(bp)))
 			j++;
 	}
+	bp->max_q = bp->max_tc;
 	bp->max_tc = max_t(u8, j, 1);
 
 	if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG)
@@ -5331,20 +7206,30 @@
 	return rc;
 }
 
-static int bnxt_hwrm_ver_get(struct bnxt *bp)
+static int __bnxt_hwrm_ver_get(struct bnxt *bp, bool silent)
 {
-	int rc;
 	struct hwrm_ver_get_input req = {0};
-	struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
-	u32 dev_caps_cfg;
+	int rc;
 
-	bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN;
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET, -1, -1);
 	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
 	req.hwrm_intf_min = HWRM_VERSION_MINOR;
 	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+
+	rc = bnxt_hwrm_do_send_msg(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT,
+				   silent);
+	return rc;
+}
+
+static int bnxt_hwrm_ver_get(struct bnxt *bp)
+{
+	struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
+	u32 dev_caps_cfg;
+	int rc;
+
+	bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN;
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	rc = __bnxt_hwrm_ver_get(bp, false);
 	if (rc)
 		goto hwrm_ver_get_exit;
 
@@ -5363,12 +7248,25 @@
 		 resp->hwrm_fw_maj_8b, resp->hwrm_fw_min_8b,
 		 resp->hwrm_fw_bld_8b, resp->hwrm_fw_rsvd_8b);
 
+	if (strlen(resp->active_pkg_name)) {
+		int fw_ver_len = strlen(bp->fw_ver_str);
+
+		snprintf(bp->fw_ver_str + fw_ver_len,
+			 FW_VER_STR_LEN - fw_ver_len - 1, "/pkg %s",
+			 resp->active_pkg_name);
+		bp->fw_cap |= BNXT_FW_CAP_PKG_VER;
+	}
+
 	bp->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout);
 	if (!bp->hwrm_cmd_timeout)
 		bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT;
 
-	if (resp->hwrm_intf_maj_8b >= 1)
+	if (resp->hwrm_intf_maj_8b >= 1) {
 		bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len);
+		bp->hwrm_max_ext_req_len = le16_to_cpu(resp->max_ext_req_len);
+	}
+	if (bp->hwrm_max_ext_req_len < HWRM_MAX_REQ_LEN)
+		bp->hwrm_max_ext_req_len = HWRM_MAX_REQ_LEN;
 
 	bp->chip_num = le16_to_cpu(resp->chip_num);
 	if (bp->chip_num == CHIP_NUM_58700 && !resp->chip_rev &&
@@ -5380,6 +7278,21 @@
 	    (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED))
 		bp->fw_cap |= BNXT_FW_CAP_SHORT_CMD;
 
+	if (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_KONG_MB_CHNL;
+
+	if (dev_caps_cfg &
+	    VER_GET_RESP_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_OVS_64BIT_HANDLE;
+
+	if (dev_caps_cfg &
+	    VER_GET_RESP_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_TRUSTED_VF;
+
+	if (dev_caps_cfg &
+	    VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_CFA_ADV_FLOW;
+
 hwrm_ver_get_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -5425,8 +7338,12 @@
 
 static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
 {
+	struct hwrm_port_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
 	struct hwrm_port_qstats_ext_input req = {0};
 	struct bnxt_pf_info *pf = &bp->pf;
+	u32 tx_stat_size;
+	int rc;
 
 	if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
 		return 0;
@@ -5435,6 +7352,62 @@
 	req.port_id = cpu_to_le16(pf->port_id);
 	req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
 	req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map);
+	tx_stat_size = bp->hw_tx_port_stats_ext ?
+		       sizeof(*bp->hw_tx_port_stats_ext) : 0;
+	req.tx_stat_size = cpu_to_le16(tx_stat_size);
+	req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
+		bp->fw_tx_stats_ext_size = tx_stat_size ?
+			le16_to_cpu(resp->tx_stat_size) / 8 : 0;
+	} else {
+		bp->fw_rx_stats_ext_size = 0;
+		bp->fw_tx_stats_ext_size = 0;
+	}
+	if (bp->fw_tx_stats_ext_size <=
+	    offsetof(struct tx_port_stats_ext, pfc_pri0_tx_duration_us) / 8) {
+		mutex_unlock(&bp->hwrm_cmd_lock);
+		bp->pri2cos_valid = 0;
+		return rc;
+	}
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req2, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+	req2.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+
+	rc = _hwrm_send_message(bp, &req2, sizeof(req2), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		struct hwrm_queue_pri2cos_qcfg_output *resp2;
+		u8 *pri2cos;
+		int i, j;
+
+		resp2 = bp->hwrm_cmd_resp_addr;
+		pri2cos = &resp2->pri0_cos_queue_id;
+		for (i = 0; i < 8; i++) {
+			u8 queue_id = pri2cos[i];
+
+			for (j = 0; j < bp->max_q; j++) {
+				if (bp->q_ids[j] == queue_id)
+					bp->pri2cos[i] = j;
+			}
+		}
+		bp->pri2cos_valid = 1;
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int bnxt_hwrm_pcie_qstats(struct bnxt *bp)
+{
+	struct hwrm_pcie_qstats_input req = {0};
+
+	if (!(bp->flags & BNXT_FLAG_PCIE_STATS))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PCIE_QSTATS, -1, -1);
+	req.pcie_stat_size = cpu_to_le16(sizeof(struct pcie_ctx_hw_stats));
+	req.pcie_stat_host_addr = cpu_to_le64(bp->hw_pcie_stats_map);
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
@@ -5459,6 +7432,8 @@
 
 	if (set_tpa)
 		tpa_flags = bp->flags & BNXT_FLAG_TPA;
+	else if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+		return 0;
 	for (i = 0; i < bp->nr_vnics; i++) {
 		rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
 		if (rc) {
@@ -5478,19 +7453,29 @@
 		bnxt_hwrm_vnic_set_rss(bp, i, false);
 }
 
-static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
-				    bool irq_re_init)
+static void bnxt_clear_vnic(struct bnxt *bp)
 {
-	if (bp->vnic_info) {
-		bnxt_hwrm_clear_vnic_filter(bp);
+	if (!bp->vnic_info)
+		return;
+
+	bnxt_hwrm_clear_vnic_filter(bp);
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
 		/* clear all RSS setting before free vnic ctx */
 		bnxt_hwrm_clear_vnic_rss(bp);
 		bnxt_hwrm_vnic_ctx_free(bp);
-		/* before free the vnic, undo the vnic tpa settings */
-		if (bp->flags & BNXT_FLAG_TPA)
-			bnxt_set_tpa(bp, false);
-		bnxt_hwrm_vnic_free(bp);
 	}
+	/* before free the vnic, undo the vnic tpa settings */
+	if (bp->flags & BNXT_FLAG_TPA)
+		bnxt_set_tpa(bp, false);
+	bnxt_hwrm_vnic_free(bp);
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		bnxt_hwrm_vnic_ctx_free(bp);
+}
+
+static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
+				    bool irq_re_init)
+{
+	bnxt_clear_vnic(bp);
 	bnxt_hwrm_ring_free(bp, close_path);
 	bnxt_hwrm_ring_grp_free(bp);
 	if (irq_re_init) {
@@ -5514,8 +7499,6 @@
 	else
 		return -EINVAL;
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -5535,12 +7518,10 @@
 		req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
 
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
-static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
+static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
 	int rc;
@@ -5596,11 +7577,61 @@
 	return rc;
 }
 
+static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id)
+{
+	int rc, i, nr_ctxs;
+
+	nr_ctxs = DIV_ROUND_UP(bp->rx_nr_rings, 64);
+	for (i = 0; i < nr_ctxs; i++) {
+		rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, i);
+		if (rc) {
+			netdev_err(bp->dev, "hwrm vnic %d ctx %d alloc failure rc: %x\n",
+				   vnic_id, i, rc);
+			break;
+		}
+		bp->rsscos_nr_ctxs++;
+	}
+	if (i < nr_ctxs)
+		return -ENOMEM;
+
+	rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true);
+	if (rc) {
+		netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n",
+			   vnic_id, rc);
+		return rc;
+	}
+	rc = bnxt_hwrm_vnic_cfg(bp, vnic_id);
+	if (rc) {
+		netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n",
+			   vnic_id, rc);
+		return rc;
+	}
+	if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+		rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id);
+		if (rc) {
+			netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n",
+				   vnic_id, rc);
+		}
+	}
+	return rc;
+}
+
+static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
+{
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return __bnxt_setup_vnic_p5(bp, vnic_id);
+	else
+		return __bnxt_setup_vnic(bp, vnic_id);
+}
+
 static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 {
 #ifdef CONFIG_RFS_ACCEL
 	int i, rc = 0;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return 0;
+
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_vnic_info *vnic;
 		u16 vnic_id = i + 1;
@@ -5911,25 +7942,28 @@
 	return bp->hw_resc.max_stat_ctxs;
 }
 
-void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max)
-{
-	bp->hw_resc.max_stat_ctxs = max;
-}
-
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp)
 {
 	return bp->hw_resc.max_cp_rings;
 }
 
-unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
+static unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
 {
-	return bp->hw_resc.max_cp_rings - bnxt_get_ulp_msix_num(bp);
+	unsigned int cp = bp->hw_resc.max_cp_rings;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		cp -= bnxt_get_ulp_msix_num(bp);
+
+	return cp;
 }
 
 static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_nqs);
+
 	return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings);
 }
 
@@ -5938,6 +7972,22 @@
 	bp->hw_resc.max_irqs = max_irqs;
 }
 
+unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp)
+{
+	unsigned int cp;
+
+	cp = bnxt_get_max_func_cp_rings_for_en(bp);
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return cp - bp->rx_nr_rings - bp->tx_nr_rings;
+	else
+		return cp - bp->cp_nr_rings;
+}
+
+unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp)
+{
+	return bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_func_stat_ctxs(bp);
+}
+
 int bnxt_get_avail_msix(struct bnxt *bp, int num)
 {
 	int max_cp = bnxt_get_max_func_cp_rings(bp);
@@ -5945,7 +7995,9 @@
 	int total_req = bp->cp_nr_rings + num;
 	int max_idx, avail_msix;
 
-	max_idx = min_t(int, bp->total_irqs, max_cp);
+	max_idx = bp->total_irqs;
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		max_idx = min_t(int, bp->total_irqs, max_cp);
 	avail_msix = max_idx - bp->cp_nr_rings;
 	if (!BNXT_NEW_RM(bp) || avail_msix >= num)
 		return avail_msix;
@@ -5963,7 +8015,7 @@
 	if (!BNXT_NEW_RM(bp))
 		return bnxt_get_max_func_irqs(bp);
 
-	return bnxt_cp_rings_in_use(bp);
+	return bnxt_nq_rings_in_use(bp);
 }
 
 static int bnxt_init_msix(struct bnxt *bp)
@@ -6070,26 +8122,30 @@
 	bp->flags &= ~BNXT_FLAG_USING_MSIX;
 }
 
-int bnxt_reserve_rings(struct bnxt *bp)
+int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 {
 	int tcs = netdev_get_num_tc(bp->dev);
+	bool irq_cleared = false;
 	int rc;
 
 	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
-	rc = __bnxt_reserve_rings(bp);
-	if (rc) {
-		netdev_err(bp->dev, "ring reservation failure rc: %d\n", rc);
-		return rc;
-	}
-	if (BNXT_NEW_RM(bp) && (bnxt_get_num_msix(bp) != bp->total_irqs)) {
+	if (irq_re_init && BNXT_NEW_RM(bp) &&
+	    bnxt_get_num_msix(bp) != bp->total_irqs) {
 		bnxt_ulp_irq_stop(bp);
 		bnxt_clear_int_mode(bp);
-		rc = bnxt_init_int_mode(bp);
+		irq_cleared = true;
+	}
+	rc = __bnxt_reserve_rings(bp);
+	if (irq_cleared) {
+		if (!rc)
+			rc = bnxt_init_int_mode(bp);
 		bnxt_ulp_irq_restart(bp, rc);
-		if (rc)
-			return rc;
+	}
+	if (rc) {
+		netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
+		return rc;
 	}
 	if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) {
 		netdev_err(bp->dev, "tx ring reservation failure\n");
@@ -6097,7 +8153,6 @@
 		bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 		return -ENOMEM;
 	}
-	bp->num_stat_ctxs = bp->cp_nr_rings;
 	return 0;
 }
 
@@ -6214,12 +8269,15 @@
 	struct bnxt_napi *bnapi;
 
 	if (bp->flags & BNXT_FLAG_USING_MSIX) {
-		if (BNXT_CHIP_TYPE_NITRO_A0(bp))
+		int (*poll_fn)(struct napi_struct *, int) = bnxt_poll;
+
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			poll_fn = bnxt_poll_p5;
+		else if (BNXT_CHIP_TYPE_NITRO_A0(bp))
 			cp_nr_rings--;
 		for (i = 0; i < cp_nr_rings; i++) {
 			bnapi = bp->bnapi[i];
-			netif_napi_add(bp->dev, &bnapi->napi,
-				       bnxt_poll, 64);
+			netif_napi_add(bp->dev, &bnapi->napi, poll_fn, 64);
 		}
 		if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
 			bnapi = bp->bnapi[cp_nr_rings];
@@ -6259,7 +8317,7 @@
 
 		if (bp->bnapi[i]->rx_ring) {
 			INIT_WORK(&cpr->dim.work, bnxt_dim_work);
-			cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+			cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 		}
 		napi_enable(&bp->bnapi[i]->napi);
 	}
@@ -6342,6 +8400,9 @@
 	struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
 	struct bnxt_link_info *link_info = &bp->link_info;
 
+	bp->flags &= ~BNXT_FLAG_EEE_CAP;
+	if (bp->test_info)
+		bp->test_info->flags &= ~BNXT_TEST_FL_EXT_LPBK;
 	if (bp->hwrm_spec_code < 0x10201)
 		return 0;
 
@@ -6663,11 +8724,14 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
+static int bnxt_fw_init_one(struct bnxt *bp);
+
 static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
 	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_func_drv_if_change_input req = {0};
-	bool resc_reinit = false;
+	bool resc_reinit = false, fw_reset = false;
+	u32 flags = 0;
 	int rc;
 
 	if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE))
@@ -6678,24 +8742,57 @@
 		req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc && (resp->flags &
-		    cpu_to_le32(FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)))
-		resc_reinit = true;
+	if (!rc)
+		flags = le32_to_cpu(resp->flags);
 	mutex_unlock(&bp->hwrm_cmd_lock);
+	if (rc)
+		return rc;
 
-	if (up && resc_reinit && BNXT_NEW_RM(bp)) {
-		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+	if (!up)
+		return 0;
 
-		rc = bnxt_hwrm_func_resc_qcaps(bp, true);
-		hw_resc->resv_cp_rings = 0;
-		hw_resc->resv_tx_rings = 0;
-		hw_resc->resv_rx_rings = 0;
-		hw_resc->resv_hw_ring_grps = 0;
-		hw_resc->resv_vnics = 0;
-		bp->tx_nr_rings = 0;
-		bp->rx_nr_rings = 0;
+	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
+		resc_reinit = true;
+	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
+		fw_reset = true;
+
+	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
+		netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
+		return -ENODEV;
 	}
-	return rc;
+	if (resc_reinit || fw_reset) {
+		if (fw_reset) {
+			rc = bnxt_fw_init_one(bp);
+			if (rc) {
+				set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
+				return rc;
+			}
+			bnxt_clear_int_mode(bp);
+			rc = bnxt_init_int_mode(bp);
+			if (rc) {
+				netdev_err(bp->dev, "init int mode failed\n");
+				return rc;
+			}
+			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
+		}
+		if (BNXT_NEW_RM(bp)) {
+			struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+			rc = bnxt_hwrm_func_resc_qcaps(bp, true);
+			hw_resc->resv_cp_rings = 0;
+			hw_resc->resv_stat_ctxs = 0;
+			hw_resc->resv_irqs = 0;
+			hw_resc->resv_tx_rings = 0;
+			hw_resc->resv_rx_rings = 0;
+			hw_resc->resv_hw_ring_grps = 0;
+			hw_resc->resv_vnics = 0;
+			if (!fw_reset) {
+				bp->tx_nr_rings = 0;
+				bp->rx_nr_rings = 0;
+			}
+		}
+	}
+	return 0;
 }
 
 static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
@@ -6705,6 +8802,7 @@
 	struct bnxt_pf_info *pf = &bp->pf;
 	int rc;
 
+	bp->num_leds = 0;
 	if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10601)
 		return 0;
 
@@ -6799,6 +8897,7 @@
 {
 	u16 handle = 0;
 
+	bp->wol = 0;
 	if (!BNXT_PF(bp) || !(bp->flags & BNXT_FLAG_WOL_CAP))
 		return;
 
@@ -6845,6 +8944,9 @@
 {
 	struct pci_dev *pdev = bp->pdev;
 
+	if (bp->hwmon_dev)
+		return;
+
 	bp->hwmon_dev = hwmon_device_register_with_groups(&pdev->dev,
 							  DRV_MODULE_NAME, bp,
 							  bnxt_groups);
@@ -6976,10 +9078,10 @@
 			netdev_err(bp->dev, "Failed to reserve default rings at open\n");
 			return rc;
 		}
-		rc = bnxt_reserve_rings(bp);
-		if (rc)
-			return rc;
 	}
+	rc = bnxt_reserve_rings(bp, irq_re_init);
+	if (rc)
+		return rc;
 	if ((bp->flags & BNXT_FLAG_RFS) &&
 	    !(bp->flags & BNXT_FLAG_USING_MSIX)) {
 		/* disable RFS if falling back to INTA */
@@ -7110,12 +9212,28 @@
 	struct bnxt *bp = netdev_priv(dev);
 	int rc;
 
-	bnxt_hwrm_if_change(bp, true);
-	rc = __bnxt_open_nic(bp, true, true);
-	if (rc)
-		bnxt_hwrm_if_change(bp, false);
+	if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
+		netdev_err(bp->dev, "A previous firmware reset did not complete, aborting\n");
+		return -ENODEV;
+	}
 
-	bnxt_hwmon_open(bp);
+	rc = bnxt_hwrm_if_change(bp, true);
+	if (rc)
+		return rc;
+	rc = __bnxt_open_nic(bp, true, true);
+	if (rc) {
+		bnxt_hwrm_if_change(bp, false);
+	} else {
+		if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state) &&
+		    BNXT_PF(bp)) {
+			struct bnxt_pf_info *pf = &bp->pf;
+			int n = pf->active_vfs;
+
+			if (n)
+				bnxt_cfg_hw_sriov(bp, &n, true);
+		}
+		bnxt_hwmon_open(bp);
+	}
 
 	return rc;
 }
@@ -7126,6 +9244,9 @@
 		test_bit(BNXT_STATE_READ_STATS, &bp->state));
 }
 
+static void bnxt_get_ring_stats(struct bnxt *bp,
+				struct rtnl_link_stats64 *stats);
+
 static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
 			     bool link_re_init)
 {
@@ -7149,8 +9270,15 @@
 	bnxt_debug_dev_exit(bp);
 	bnxt_disable_napi(bp);
 	del_timer_sync(&bp->timer);
+	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) &&
+	    pci_is_enabled(bp->pdev))
+		pci_disable_device(bp->pdev);
+
 	bnxt_free_skbs(bp);
 
+	/* Save ring stats before shutdown */
+	if (bp->bnapi)
+		bnxt_get_ring_stats(bp, &bp->net_stats_prev);
 	if (irq_re_init) {
 		bnxt_free_irq(bp);
 		bnxt_del_napi(bp);
@@ -7162,6 +9290,18 @@
 {
 	int rc = 0;
 
+	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+		/* If we get here, it means firmware reset is in progress
+		 * while we are trying to close.  We can safely proceed with
+		 * the close because we are holding rtnl_lock().  Some firmware
+		 * messages may fail as we proceed to close.  We set the
+		 * ABORT_ERR flag here so that the FW reset thread will later
+		 * abort when it gets the rtnl_lock() and sees the flag.
+		 */
+		netdev_warn(bp->dev, "FW reset in progress during close, FW reset will be aborted\n");
+		set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
+	}
+
 #ifdef CONFIG_BNXT_SRIOV
 	if (bp->sriov_cfg) {
 		rc = wait_event_interruptible_timeout(bp->sriov_cfg_wait,
@@ -7186,24 +9326,88 @@
 	return 0;
 }
 
+static int bnxt_hwrm_port_phy_read(struct bnxt *bp, u16 phy_addr, u16 reg,
+				   u16 *val)
+{
+	struct hwrm_port_phy_mdio_read_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_port_phy_mdio_read_input req = {0};
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10a00)
+		return -EOPNOTSUPP;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_READ, -1, -1);
+	req.port_id = cpu_to_le16(bp->pf.port_id);
+	req.phy_addr = phy_addr;
+	req.reg_addr = cpu_to_le16(reg & 0x1f);
+	if (mdio_phy_id_is_c45(phy_addr)) {
+		req.cl45_mdio = 1;
+		req.phy_addr = mdio_phy_id_prtad(phy_addr);
+		req.dev_addr = mdio_phy_id_devad(phy_addr);
+		req.reg_addr = cpu_to_le16(reg);
+	}
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*val = le16_to_cpu(resp->reg_data);
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg,
+				    u16 val)
+{
+	struct hwrm_port_phy_mdio_write_input req = {0};
+
+	if (bp->hwrm_spec_code < 0x10a00)
+		return -EOPNOTSUPP;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_WRITE, -1, -1);
+	req.port_id = cpu_to_le16(bp->pf.port_id);
+	req.phy_addr = phy_addr;
+	req.reg_addr = cpu_to_le16(reg & 0x1f);
+	if (mdio_phy_id_is_c45(phy_addr)) {
+		req.cl45_mdio = 1;
+		req.phy_addr = mdio_phy_id_prtad(phy_addr);
+		req.dev_addr = mdio_phy_id_devad(phy_addr);
+		req.reg_addr = cpu_to_le16(reg);
+	}
+	req.reg_data = cpu_to_le16(val);
+
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+}
+
 /* rtnl_lock held */
 static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct mii_ioctl_data *mdio = if_mii(ifr);
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
 	switch (cmd) {
 	case SIOCGMIIPHY:
+		mdio->phy_id = bp->link_info.phy_addr;
+
 		/* fallthru */
 	case SIOCGMIIREG: {
+		u16 mii_regval = 0;
+
 		if (!netif_running(dev))
 			return -EAGAIN;
 
-		return 0;
+		rc = bnxt_hwrm_port_phy_read(bp, mdio->phy_id, mdio->reg_num,
+					     &mii_regval);
+		mdio->val_out = mii_regval;
+		return rc;
 	}
 
 	case SIOCSMIIREG:
 		if (!netif_running(dev))
 			return -EAGAIN;
 
-		return 0;
+		return bnxt_hwrm_port_phy_write(bp, mdio->phy_id, mdio->reg_num,
+						mdio->val_in);
 
 	default:
 		/* do nothing */
@@ -7212,23 +9416,12 @@
 	return -EOPNOTSUPP;
 }
 
-static void
-bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+static void bnxt_get_ring_stats(struct bnxt *bp,
+				struct rtnl_link_stats64 *stats)
 {
-	u32 i;
-	struct bnxt *bp = netdev_priv(dev);
+	int i;
 
-	set_bit(BNXT_STATE_READ_STATS, &bp->state);
-	/* Make sure bnxt_close_nic() sees that we are reading stats before
-	 * we check the BNXT_STATE_OPEN flag.
-	 */
-	smp_mb__after_atomic();
-	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
-		clear_bit(BNXT_STATE_READ_STATS, &bp->state);
-		return;
-	}
 
-	/* TODO check if we need to synchronize with bnxt_close path */
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -7257,6 +9450,40 @@
 
 		stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
 	}
+}
+
+static void bnxt_add_prev_stats(struct bnxt *bp,
+				struct rtnl_link_stats64 *stats)
+{
+	struct rtnl_link_stats64 *prev_stats = &bp->net_stats_prev;
+
+	stats->rx_packets += prev_stats->rx_packets;
+	stats->tx_packets += prev_stats->tx_packets;
+	stats->rx_bytes += prev_stats->rx_bytes;
+	stats->tx_bytes += prev_stats->tx_bytes;
+	stats->rx_missed_errors += prev_stats->rx_missed_errors;
+	stats->multicast += prev_stats->multicast;
+	stats->tx_dropped += prev_stats->tx_dropped;
+}
+
+static void
+bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	set_bit(BNXT_STATE_READ_STATS, &bp->state);
+	/* Make sure bnxt_close_nic() sees that we are reading stats before
+	 * we check the BNXT_STATE_OPEN flag.
+	 */
+	smp_mb__after_atomic();
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+		clear_bit(BNXT_STATE_READ_STATS, &bp->state);
+		*stats = bp->net_stats_prev;
+		return;
+	}
+
+	bnxt_get_ring_stats(bp, stats);
+	bnxt_add_prev_stats(bp, stats);
 
 	if (bp->flags & BNXT_FLAG_PORT_STATS) {
 		struct rx_port_stats *rx = bp->hw_rx_port_stats;
@@ -7332,14 +9559,16 @@
 static void bnxt_set_rx_mode(struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-	u32 mask = vnic->rx_mask;
+	struct bnxt_vnic_info *vnic;
 	bool mc_update = false;
 	bool uc_update;
+	u32 mask;
 
-	if (!netif_running(dev))
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state))
 		return;
 
+	vnic = &bp->vnic_info[0];
+	mask = vnic->rx_mask;
 	mask &= ~(CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS |
 		  CFA_L2_SET_RX_MASK_REQ_MASK_MCAST |
 		  CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST |
@@ -7422,8 +9651,15 @@
 
 skip_uc:
 	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+	if (rc && vnic->mc_list_count) {
+		netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
+			    rc);
+		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+		vnic->mc_list_count = 0;
+		rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+	}
 	if (rc)
-		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
+		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n",
 			   rc);
 
 	return rc;
@@ -7451,6 +9687,11 @@
 /* If the chip and firmware supports RFS */
 static bool bnxt_rfs_supported(struct bnxt *bp)
 {
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX)
+			return true;
+		return false;
+	}
 	if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp))
 		return true;
 	if (bp->flags & BNXT_FLAG_NEW_RSS_CAP)
@@ -7464,6 +9705,8 @@
 #ifdef CONFIG_RFS_ACCEL
 	int vnics, max_vnics, max_rss_ctxs;
 
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		return bnxt_rfs_supported(bp);
 	if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
 		return false;
 
@@ -7488,12 +9731,12 @@
 	if (vnics == bp->hw_resc.resv_vnics)
 		return true;
 
-	bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, vnics);
+	bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, vnics);
 	if (vnics <= bp->hw_resc.resv_vnics)
 		return true;
 
 	netdev_warn(bp->dev, "Unable to reserve resources to support NTUPLE filters.\n");
-	bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 1);
+	bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, 1);
 	return false;
 #else
 	return false;
@@ -7568,7 +9811,8 @@
 	if (changes & BNXT_FLAG_TPA) {
 		update_tpa = true;
 		if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
-		    (flags & BNXT_FLAG_TPA) == 0)
+		    (flags & BNXT_FLAG_TPA) == 0 ||
+		    (bp->flags & BNXT_FLAG_CHIP_P5))
 			re_init = true;
 	}
 
@@ -7578,9 +9822,8 @@
 	if (flags != bp->flags) {
 		u32 old_flags = bp->flags;
 
-		bp->flags = flags;
-
 		if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+			bp->flags = flags;
 			if (update_tpa)
 				bnxt_set_ring_params(bp);
 			return rc;
@@ -7588,12 +9831,14 @@
 
 		if (re_init) {
 			bnxt_close_nic(bp, false, false);
+			bp->flags = flags;
 			if (update_tpa)
 				bnxt_set_ring_params(bp);
 
 			return bnxt_open_nic(bp, false, false);
 		}
 		if (update_tpa) {
+			bp->flags = flags;
 			rc = bnxt_set_tpa(bp,
 					  (flags & BNXT_FLAG_TPA) ?
 					  true : false);
@@ -7604,6 +9849,26 @@
 	return rc;
 }
 
+static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
+				       u32 ring_id, u32 *prod, u32 *cons)
+{
+	struct hwrm_dbg_ring_info_get_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_dbg_ring_info_get_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_RING_INFO_GET, -1, -1);
+	req.ring_type = ring_type;
+	req.fw_ring_id = cpu_to_le32(ring_id);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		*prod = le32_to_cpu(resp->producer_index);
+		*cons = le32_to_cpu(resp->consumer_index);
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi)
 {
 	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
@@ -7680,6 +9945,38 @@
 	bnxt_queue_sp_work(bp);
 }
 
+static void bnxt_fw_health_check(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 val;
+
+	if (!fw_health || !fw_health->enabled ||
+	    test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+		return;
+
+	if (fw_health->tmr_counter) {
+		fw_health->tmr_counter--;
+		return;
+	}
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
+	if (val == fw_health->last_fw_heartbeat)
+		goto fw_reset;
+
+	fw_health->last_fw_heartbeat = val;
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+	if (val != fw_health->last_fw_reset_cnt)
+		goto fw_reset;
+
+	fw_health->tmr_counter = fw_health->tmr_multiplier;
+	return;
+
+fw_reset:
+	set_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event);
+	bnxt_queue_sp_work(bp);
+}
+
 static void bnxt_timer(struct timer_list *t)
 {
 	struct bnxt *bp = from_timer(bp, t, timer);
@@ -7691,6 +9988,9 @@
 	if (atomic_read(&bp->intr_sem) != 0)
 		goto bnxt_restart_timer;
 
+	if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
+		bnxt_fw_health_check(bp);
+
 	if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) &&
 	    bp->stats_coal_ticks) {
 		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
@@ -7711,6 +10011,11 @@
 			bnxt_queue_sp_work(bp);
 		}
 	}
+
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && netif_carrier_ok(dev)) {
+		set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
+		bnxt_queue_sp_work(bp);
+	}
 bnxt_restart_timer:
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
@@ -7741,6 +10046,176 @@
 	bnxt_rtnl_unlock_sp(bp);
 }
 
+static void bnxt_fw_reset_close(struct bnxt *bp)
+{
+	__bnxt_close_nic(bp, true, false);
+	bnxt_ulp_irq_stop(bp);
+	bnxt_clear_int_mode(bp);
+	bnxt_hwrm_func_drv_unrgtr(bp);
+	bnxt_free_ctx_mem(bp);
+	kfree(bp->ctx);
+	bp->ctx = NULL;
+}
+
+static bool is_bnxt_fw_ok(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	bool no_heartbeat = false, has_reset = false;
+	u32 val;
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
+	if (val == fw_health->last_fw_heartbeat)
+		no_heartbeat = true;
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+	if (val != fw_health->last_fw_reset_cnt)
+		has_reset = true;
+
+	if (!no_heartbeat && has_reset)
+		return true;
+
+	return false;
+}
+
+/* rtnl_lock is acquired before calling this function */
+static void bnxt_force_fw_reset(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 wait_dsecs;
+
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state) ||
+	    test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+		return;
+
+	set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+	bnxt_fw_reset_close(bp);
+	wait_dsecs = fw_health->master_func_wait_dsecs;
+	if (fw_health->master) {
+		if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU)
+			wait_dsecs = 0;
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW;
+	} else {
+		bp->fw_reset_timestamp = jiffies + wait_dsecs * HZ / 10;
+		wait_dsecs = fw_health->normal_func_wait_dsecs;
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
+	}
+
+	bp->fw_reset_min_dsecs = fw_health->post_reset_wait_dsecs;
+	bp->fw_reset_max_dsecs = fw_health->post_reset_max_wait_dsecs;
+	bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10);
+}
+
+void bnxt_fw_exception(struct bnxt *bp)
+{
+	set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
+	bnxt_rtnl_lock_sp(bp);
+	bnxt_force_fw_reset(bp);
+	bnxt_rtnl_unlock_sp(bp);
+}
+
+/* Returns the number of registered VFs, or 1 if VF configuration is pending, or
+ * < 0 on error.
+ */
+static int bnxt_get_registered_vfs(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+	int rc;
+
+	if (!BNXT_PF(bp))
+		return 0;
+
+	rc = bnxt_hwrm_func_qcfg(bp);
+	if (rc) {
+		netdev_err(bp->dev, "func_qcfg cmd failed, rc = %d\n", rc);
+		return rc;
+	}
+	if (bp->pf.registered_vfs)
+		return bp->pf.registered_vfs;
+	if (bp->sriov_cfg)
+		return 1;
+#endif
+	return 0;
+}
+
+void bnxt_fw_reset(struct bnxt *bp)
+{
+	bnxt_rtnl_lock_sp(bp);
+	if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
+	    !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+		int n = 0, tmo;
+
+		set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+		if (bp->pf.active_vfs &&
+		    !test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+			n = bnxt_get_registered_vfs(bp);
+		if (n < 0) {
+			netdev_err(bp->dev, "Firmware reset aborted, rc = %d\n",
+				   n);
+			clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+			dev_close(bp->dev);
+			goto fw_reset_exit;
+		} else if (n > 0) {
+			u16 vf_tmo_dsecs = n * 10;
+
+			if (bp->fw_reset_max_dsecs < vf_tmo_dsecs)
+				bp->fw_reset_max_dsecs = vf_tmo_dsecs;
+			bp->fw_reset_state =
+				BNXT_FW_RESET_STATE_POLL_VF;
+			bnxt_queue_fw_reset_work(bp, HZ / 10);
+			goto fw_reset_exit;
+		}
+		bnxt_fw_reset_close(bp);
+		if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
+			bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN;
+			tmo = HZ / 10;
+		} else {
+			bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
+			tmo = bp->fw_reset_min_dsecs * HZ / 10;
+		}
+		bnxt_queue_fw_reset_work(bp, tmo);
+	}
+fw_reset_exit:
+	bnxt_rtnl_unlock_sp(bp);
+}
+
+static void bnxt_chk_missed_irq(struct bnxt *bp)
+{
+	int i;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		return;
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		struct bnxt_napi *bnapi = bp->bnapi[i];
+		struct bnxt_cp_ring_info *cpr;
+		u32 fw_ring_id;
+		int j;
+
+		if (!bnapi)
+			continue;
+
+		cpr = &bnapi->cp_ring;
+		for (j = 0; j < 2; j++) {
+			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+			u32 val[2];
+
+			if (!cpr2 || cpr2->has_more_work ||
+			    !bnxt_has_work(bp, cpr2))
+				continue;
+
+			if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) {
+				cpr2->last_cp_raw_cons = cpr2->cp_raw_cons;
+				continue;
+			}
+			fw_ring_id = cpr2->cp_ring_struct.fw_ring_id;
+			bnxt_dbg_hwrm_ring_info_get(bp,
+				DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL,
+				fw_ring_id, &val[0], &val[1]);
+			cpr->missed_irqs++;
+		}
+	}
+}
+
 static void bnxt_cfg_ntp_filters(struct bnxt *);
 
 static void bnxt_sp_task(struct work_struct *work)
@@ -7782,6 +10257,7 @@
 	if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) {
 		bnxt_hwrm_port_qstats(bp);
 		bnxt_hwrm_port_qstats_ext(bp);
+		bnxt_hwrm_pcie_qstats(bp);
 	}
 
 	if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) {
@@ -7820,6 +10296,9 @@
 	if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event))
 		bnxt_tc_flow_stats_work(bp);
 
+	if (test_and_clear_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event))
+		bnxt_chk_missed_irq(bp);
+
 	/* These functions below will clear BNXT_STATE_IN_SP_TASK.  They
 	 * must be the last functions to be called before exiting.
 	 */
@@ -7829,6 +10308,15 @@
 	if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event))
 		bnxt_reset(bp, true);
 
+	if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event))
+		bnxt_devlink_health_report(bp, BNXT_FW_RESET_NOTIFY_SP_EVENT);
+
+	if (test_and_clear_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event)) {
+		if (!is_bnxt_fw_ok(bp))
+			bnxt_devlink_health_report(bp,
+						   BNXT_FW_EXCEPTION_SP_EVENT);
+	}
+
 	smp_mb__before_atomic();
 	clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
 }
@@ -7838,7 +10326,7 @@
 		     int tx_xdp)
 {
 	int max_rx, max_tx, tx_sets = 1;
-	int tx_rings_needed;
+	int tx_rings_needed, stats;
 	int rx_rings = rx;
 	int cp, vnics, rc;
 
@@ -7857,16 +10345,19 @@
 		return -ENOMEM;
 
 	vnics = 1;
-	if (bp->flags & BNXT_FLAG_RFS)
+	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
 		vnics += rx_rings;
 
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx_rings <<= 1;
 	cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
-	if (BNXT_NEW_RM(bp))
+	stats = cp;
+	if (BNXT_NEW_RM(bp)) {
 		cp += bnxt_get_ulp_msix_num(bp);
+		stats += bnxt_get_ulp_stat_ctxs(bp);
+	}
 	return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
-				     vnics);
+				     stats, vnics);
 }
 
 static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -7891,7 +10382,8 @@
 {
 	bnxt_unmap_bars(bp, bp->pdev);
 	pci_release_regions(bp->pdev);
-	pci_disable_device(bp->pdev);
+	if (pci_is_enabled(bp->pdev))
+		pci_disable_device(bp->pdev);
 }
 
 static void bnxt_init_dflt_coal(struct bnxt *bp)
@@ -7902,7 +10394,7 @@
 	 * 1 coal_buf x bufs_per_record = 1 completion record.
 	 */
 	coal = &bp->rx_coal;
-	coal->coal_ticks = 14;
+	coal->coal_ticks = 10;
 	coal->coal_bufs = 30;
 	coal->coal_ticks_irq = 1;
 	coal->coal_bufs_irq = 2;
@@ -7920,6 +10412,333 @@
 	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
 }
 
+static int bnxt_fw_init_one_p1(struct bnxt *bp)
+{
+	int rc;
+
+	bp->fw_cap = 0;
+	rc = bnxt_hwrm_ver_get(bp);
+	if (rc)
+		return rc;
+
+	if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
+		rc = bnxt_alloc_kong_hwrm_resources(bp);
+		if (rc)
+			bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
+	}
+
+	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+	    bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
+		rc = bnxt_alloc_hwrm_short_cmd_req(bp);
+		if (rc)
+			return rc;
+	}
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		return -ENODEV;
+
+	bnxt_hwrm_fw_set_time(bp);
+	return 0;
+}
+
+static int bnxt_fw_init_one_p2(struct bnxt *bp)
+{
+	int rc;
+
+	/* Get the MAX capabilities for this function */
+	rc = bnxt_hwrm_func_qcaps(bp);
+	if (rc) {
+		netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
+			   rc);
+		return -ENODEV;
+	}
+
+	rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp);
+	if (rc)
+		netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
+			    rc);
+
+	rc = bnxt_hwrm_error_recovery_qcfg(bp);
+	if (rc)
+		netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
+			    rc);
+
+	rc = bnxt_hwrm_func_drv_rgtr(bp);
+	if (rc)
+		return -ENODEV;
+
+	rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
+	if (rc)
+		return -ENODEV;
+
+	bnxt_hwrm_func_qcfg(bp);
+	bnxt_hwrm_vnic_qcaps(bp);
+	bnxt_hwrm_port_led_qcaps(bp);
+	bnxt_ethtool_init(bp);
+	bnxt_dcb_init(bp);
+	return 0;
+}
+
+static void bnxt_set_dflt_rss_hash_type(struct bnxt *bp)
+{
+	bp->flags &= ~BNXT_FLAG_UDP_RSS_CAP;
+	bp->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+	if (BNXT_CHIP_P4(bp) && bp->hwrm_spec_code >= 0x10501) {
+		bp->flags |= BNXT_FLAG_UDP_RSS_CAP;
+		bp->rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 |
+				    VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+	}
+}
+
+static void bnxt_set_dflt_rfs(struct bnxt *bp)
+{
+	struct net_device *dev = bp->dev;
+
+	dev->hw_features &= ~NETIF_F_NTUPLE;
+	dev->features &= ~NETIF_F_NTUPLE;
+	bp->flags &= ~BNXT_FLAG_RFS;
+	if (bnxt_rfs_supported(bp)) {
+		dev->hw_features |= NETIF_F_NTUPLE;
+		if (bnxt_rfs_capable(bp)) {
+			bp->flags |= BNXT_FLAG_RFS;
+			dev->features |= NETIF_F_NTUPLE;
+		}
+	}
+}
+
+static void bnxt_fw_init_one_p3(struct bnxt *bp)
+{
+	struct pci_dev *pdev = bp->pdev;
+
+	bnxt_set_dflt_rss_hash_type(bp);
+	bnxt_set_dflt_rfs(bp);
+
+	bnxt_get_wol_settings(bp);
+	if (bp->flags & BNXT_FLAG_WOL_CAP)
+		device_set_wakeup_enable(&pdev->dev, bp->wol);
+	else
+		device_set_wakeup_capable(&pdev->dev, false);
+
+	bnxt_hwrm_set_cache_line_size(bp, cache_line_size());
+	bnxt_hwrm_coal_params_qcaps(bp);
+}
+
+static int bnxt_fw_init_one(struct bnxt *bp)
+{
+	int rc;
+
+	rc = bnxt_fw_init_one_p1(bp);
+	if (rc) {
+		netdev_err(bp->dev, "Firmware init phase 1 failed\n");
+		return rc;
+	}
+	rc = bnxt_fw_init_one_p2(bp);
+	if (rc) {
+		netdev_err(bp->dev, "Firmware init phase 2 failed\n");
+		return rc;
+	}
+	rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
+	if (rc)
+		return rc;
+	bnxt_fw_init_one_p3(bp);
+	return 0;
+}
+
+static void bnxt_fw_reset_writel(struct bnxt *bp, int reg_idx)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 reg = fw_health->fw_reset_seq_regs[reg_idx];
+	u32 val = fw_health->fw_reset_seq_vals[reg_idx];
+	u32 reg_type, reg_off, delay_msecs;
+
+	delay_msecs = fw_health->fw_reset_seq_delay_msec[reg_idx];
+	reg_type = BNXT_FW_HEALTH_REG_TYPE(reg);
+	reg_off = BNXT_FW_HEALTH_REG_OFF(reg);
+	switch (reg_type) {
+	case BNXT_FW_HEALTH_REG_TYPE_CFG:
+		pci_write_config_dword(bp->pdev, reg_off, val);
+		break;
+	case BNXT_FW_HEALTH_REG_TYPE_GRC:
+		writel(reg_off & BNXT_GRC_BASE_MASK,
+		       bp->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+		reg_off = (reg_off & BNXT_GRC_OFFSET_MASK) + 0x2000;
+		/* fall through */
+	case BNXT_FW_HEALTH_REG_TYPE_BAR0:
+		writel(val, bp->bar0 + reg_off);
+		break;
+	case BNXT_FW_HEALTH_REG_TYPE_BAR1:
+		writel(val, bp->bar1 + reg_off);
+		break;
+	}
+	if (delay_msecs) {
+		pci_read_config_dword(bp->pdev, 0, &val);
+		msleep(delay_msecs);
+	}
+}
+
+static void bnxt_reset_all(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	int i;
+
+	if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST) {
+		for (i = 0; i < fw_health->fw_reset_seq_cnt; i++)
+			bnxt_fw_reset_writel(bp, i);
+	} else if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) {
+		struct hwrm_fw_reset_input req = {0};
+		int rc;
+
+		bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
+		req.resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
+		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
+		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
+		req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
+		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+		if (rc)
+			netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc);
+	}
+	bp->fw_reset_timestamp = jiffies;
+}
+
+static void bnxt_fw_reset_task(struct work_struct *work)
+{
+	struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work);
+	int rc;
+
+	if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+		netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n");
+		return;
+	}
+
+	switch (bp->fw_reset_state) {
+	case BNXT_FW_RESET_STATE_POLL_VF: {
+		int n = bnxt_get_registered_vfs(bp);
+		int tmo;
+
+		if (n < 0) {
+			netdev_err(bp->dev, "Firmware reset aborted, subsequent func_qcfg cmd failed, rc = %d, %d msecs since reset timestamp\n",
+				   n, jiffies_to_msecs(jiffies -
+				   bp->fw_reset_timestamp));
+			goto fw_reset_abort;
+		} else if (n > 0) {
+			if (time_after(jiffies, bp->fw_reset_timestamp +
+				       (bp->fw_reset_max_dsecs * HZ / 10))) {
+				clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+				bp->fw_reset_state = 0;
+				netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n",
+					   n);
+				return;
+			}
+			bnxt_queue_fw_reset_work(bp, HZ / 10);
+			return;
+		}
+		bp->fw_reset_timestamp = jiffies;
+		rtnl_lock();
+		bnxt_fw_reset_close(bp);
+		if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
+			bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN;
+			tmo = HZ / 10;
+		} else {
+			bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
+			tmo = bp->fw_reset_min_dsecs * HZ / 10;
+		}
+		rtnl_unlock();
+		bnxt_queue_fw_reset_work(bp, tmo);
+		return;
+	}
+	case BNXT_FW_RESET_STATE_POLL_FW_DOWN: {
+		u32 val;
+
+		val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+		if (!(val & BNXT_FW_STATUS_SHUTDOWN) &&
+		    !time_after(jiffies, bp->fw_reset_timestamp +
+		    (bp->fw_reset_max_dsecs * HZ / 10))) {
+			bnxt_queue_fw_reset_work(bp, HZ / 5);
+			return;
+		}
+
+		if (!bp->fw_health->master) {
+			u32 wait_dsecs = bp->fw_health->normal_func_wait_dsecs;
+
+			bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
+			bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10);
+			return;
+		}
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW;
+	}
+	/* fall through */
+	case BNXT_FW_RESET_STATE_RESET_FW:
+		bnxt_reset_all(bp);
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
+		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
+		return;
+	case BNXT_FW_RESET_STATE_ENABLE_DEV:
+		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
+		    bp->fw_health) {
+			u32 val;
+
+			val = bnxt_fw_health_readl(bp,
+						   BNXT_FW_RESET_INPROG_REG);
+			if (val)
+				netdev_warn(bp->dev, "FW reset inprog %x after min wait time.\n",
+					    val);
+		}
+		clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
+		if (pci_enable_device(bp->pdev)) {
+			netdev_err(bp->dev, "Cannot re-enable PCI device\n");
+			goto fw_reset_abort;
+		}
+		pci_set_master(bp->pdev);
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW;
+		/* fall through */
+	case BNXT_FW_RESET_STATE_POLL_FW:
+		bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
+		rc = __bnxt_hwrm_ver_get(bp, true);
+		if (rc) {
+			if (time_after(jiffies, bp->fw_reset_timestamp +
+				       (bp->fw_reset_max_dsecs * HZ / 10))) {
+				netdev_err(bp->dev, "Firmware reset aborted\n");
+				goto fw_reset_abort;
+			}
+			bnxt_queue_fw_reset_work(bp, HZ / 5);
+			return;
+		}
+		bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT;
+		bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
+		/* fall through */
+	case BNXT_FW_RESET_STATE_OPENING:
+		while (!rtnl_trylock()) {
+			bnxt_queue_fw_reset_work(bp, HZ / 10);
+			return;
+		}
+		rc = bnxt_open(bp->dev);
+		if (rc) {
+			netdev_err(bp->dev, "bnxt_open_nic() failed\n");
+			clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+			dev_close(bp->dev);
+		}
+		bnxt_ulp_irq_restart(bp, rc);
+		rtnl_unlock();
+
+		bp->fw_reset_state = 0;
+		/* Make sure fw_reset_state is 0 before clearing the flag */
+		smp_mb__before_atomic();
+		clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+		break;
+	}
+	return;
+
+fw_reset_abort:
+	clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+	bp->fw_reset_state = 0;
+	rtnl_lock();
+	dev_close(bp->dev);
+	rtnl_unlock();
+}
+
 static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 {
 	int rc;
@@ -7982,8 +10801,12 @@
 	pci_enable_pcie_error_reporting(pdev);
 
 	INIT_WORK(&bp->sp_task, bnxt_sp_task);
+	INIT_DELAYED_WORK(&bp->fw_reset_task, bnxt_fw_reset_task);
 
 	spin_lock_init(&bp->ntp_fltr_lock);
+#if BITS_PER_LONG == 32
+	spin_lock_init(&bp->db_lock);
+#endif
 
 	bp->rx_ring_size = BNXT_DEFAULT_RX_RING_SIZE;
 	bp->tx_ring_size = BNXT_DEFAULT_TX_RING_SIZE;
@@ -8087,7 +10910,6 @@
 	bp->tx_nr_rings += bp->tx_nr_rings_xdp;
 	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
 			       bp->tx_nr_rings + bp->rx_nr_rings;
-	bp->num_stat_ctxs = bp->cp_nr_rings;
 
 	if (netif_running(bp->dev))
 		return bnxt_open_nic(bp, true, false);
@@ -8112,32 +10934,19 @@
 	}
 }
 
-static int bnxt_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct bnxt *bp = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
-					     bp, bp, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(bnxt_block_cb_list);
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
+	struct bnxt *bp = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return bnxt_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &bnxt_block_cb_list,
+						  bnxt_setup_tc_block_cb,
+						  bp, bp, true);
 	case TC_SETUP_QDISC_MQPRIO: {
 		struct tc_mqprio_qopt *mqprio = type_data;
 
@@ -8410,7 +11219,7 @@
 }
 
 static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
-			       u16 flags)
+			       u16 flags, struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
@@ -8444,25 +11253,11 @@
 	return rc;
 }
 
-static int bnxt_get_phys_port_name(struct net_device *dev, char *buf,
-				   size_t len)
+int bnxt_get_port_parent_id(struct net_device *dev,
+			    struct netdev_phys_item_id *ppid)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	int rc;
 
-	/* The PF and it's VF-reps only support the switchdev framework */
-	if (!BNXT_PF(bp))
-		return -EOPNOTSUPP;
-
-	rc = snprintf(buf, len, "p%d", bp->pf.port_id);
-
-	if (rc >= len)
-		return -EOPNOTSUPP;
-	return 0;
-}
-
-int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr)
-{
 	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
 		return -EOPNOTSUPP;
 
@@ -8470,26 +11265,18 @@
 	if (!BNXT_PF(bp))
 		return -EOPNOTSUPP;
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(bp->switch_id);
-		memcpy(attr->u.ppid.id, bp->switch_id, attr->u.ppid.id_len);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
+	ppid->id_len = sizeof(bp->switch_id);
+	memcpy(ppid->id, bp->switch_id, ppid->id_len);
+
 	return 0;
 }
 
-static int bnxt_swdev_port_attr_get(struct net_device *dev,
-				    struct switchdev_attr *attr)
+static struct devlink_port *bnxt_get_devlink_port(struct net_device *dev)
 {
-	return bnxt_port_attr_get(netdev_priv(dev), attr);
-}
+	struct bnxt *bp = netdev_priv(dev);
 
-static const struct switchdev_ops bnxt_switchdev_ops = {
-	.switchdev_port_attr_get	= bnxt_swdev_port_attr_get
-};
+	return &bp->dl_port;
+}
 
 static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_open		= bnxt_open,
@@ -8520,9 +11307,10 @@
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
 	.ndo_bpf		= bnxt_xdp,
+	.ndo_xdp_xmit		= bnxt_xdp_xmit,
 	.ndo_bridge_getlink	= bnxt_bridge_getlink,
 	.ndo_bridge_setlink	= bnxt_bridge_setlink,
-	.ndo_get_phys_port_name = bnxt_get_phys_port_name
+	.ndo_get_devlink_port	= bnxt_get_devlink_port,
 };
 
 static void bnxt_remove_one(struct pci_dev *pdev)
@@ -8550,10 +11338,14 @@
 	kfree(bp->edev);
 	bp->edev = NULL;
 	bnxt_cleanup_pci(bp);
+	bnxt_free_ctx_mem(bp);
+	kfree(bp->ctx);
+	bp->ctx = NULL;
+	bnxt_free_port_stats(bp);
 	free_netdev(dev);
 }
 
-static int bnxt_probe_phy(struct bnxt *bp)
+static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
 {
 	int rc = 0;
 	struct bnxt_link_info *link_info = &bp->link_info;
@@ -8564,8 +11356,6 @@
 			   rc);
 		return rc;
 	}
-	mutex_init(&bp->link_lock);
-
 	rc = bnxt_update_link(bp, false);
 	if (rc) {
 		netdev_err(bp->dev, "Probe phy can't update link (rc: %x)\n",
@@ -8579,6 +11369,9 @@
 	if (link_info->auto_link_speeds && !link_info->support_auto_speeds)
 		link_info->support_auto_speeds = link_info->support_speeds;
 
+	if (!fw_dflt)
+		return 0;
+
 	/*initialize the ethool setting copy with NVM settings */
 	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
 		link_info->autoneg = BNXT_AUTONEG_SPEED;
@@ -8599,7 +11392,7 @@
 			link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH;
 	else
 		link_info->req_flow_ctrl = link_info->force_pause_setting;
-	return rc;
+	return 0;
 }
 
 static int bnxt_get_max_irq(struct pci_dev *pdev)
@@ -8617,13 +11410,16 @@
 				int *max_cp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int max_ring_grps = 0;
+	int max_ring_grps = 0, max_irq;
 
 	*max_tx = hw_resc->max_tx_rings;
 	*max_rx = hw_resc->max_rx_rings;
-	*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
-			hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
-	*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
+	*max_cp = bnxt_get_max_func_cp_rings_for_en(bp);
+	max_irq = min_t(int, bnxt_get_max_func_irqs(bp) -
+			bnxt_get_ulp_msix_num(bp),
+			hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp));
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		*max_cp = min_t(int, *max_cp, max_irq);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
 		*max_cp -= 1;
@@ -8631,6 +11427,11 @@
 	}
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		*max_rx >>= 1;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false);
+		/* On P5 chips, max_cp output param should be available NQs */
+		*max_cp = max_irq;
+	}
 	*max_rx = min_t(int, *max_rx, max_ring_grps);
 }
 
@@ -8712,7 +11513,7 @@
 
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
-	dflt_rings = netif_get_num_default_rss_queues();
+	dflt_rings = is_kdump_kernel() ? 1 : netif_get_num_default_rss_queues();
 	/* Reduce default rings on multi-port cards so that total default
 	 * rings do not exceed CPU count.
 	 */
@@ -8747,7 +11548,6 @@
 			netdev_warn(bp->dev, "2nd rings reservation failed.\n");
 		bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 	}
-	bp->num_stat_ctxs = bp->cp_nr_rings;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
 		bp->rx_nr_rings++;
 		bp->cp_nr_rings++;
@@ -8835,6 +11635,26 @@
 	return rc;
 }
 
+static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
+{
+	struct pci_dev *pdev = bp->pdev;
+	int pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
+	u32 dw;
+
+	if (!pos) {
+		netdev_info(bp->dev, "Unable do read adapter's DSN");
+		return -EOPNOTSUPP;
+	}
+
+	/* DSN (two dw) is at an offset of 4 from the cap pos */
+	pos += 4;
+	pci_read_config_dword(pdev, pos, &dw);
+	put_unaligned_le32(dw, &dsn[0]);
+	pci_read_config_dword(pdev, pos + 4, &dw);
+	put_unaligned_le32(dw, &dsn[4]);
+	return 0;
+}
+
 static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int version_printed;
@@ -8854,6 +11674,7 @@
 		return -ENOMEM;
 
 	bp = netdev_priv(dev);
+	bnxt_set_max_func_irqs(bp, max_irqs);
 
 	if (bnxt_vf_pciid(ent->driver_data))
 		bp->flags |= BNXT_FLAG_VF;
@@ -8868,7 +11689,6 @@
 	dev->netdev_ops = &bnxt_netdev_ops;
 	dev->watchdog_timeo = BNXT_TX_TIMEOUT;
 	dev->ethtool_ops = &bnxt_ethtool_ops;
-	SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops);
 	pci_set_drvdata(pdev, dev);
 
 	rc = bnxt_alloc_hwrm_resources(bp);
@@ -8876,22 +11696,19 @@
 		goto init_err_pci_clean;
 
 	mutex_init(&bp->hwrm_cmd_lock);
-	rc = bnxt_hwrm_ver_get(bp);
+	mutex_init(&bp->link_lock);
+
+	rc = bnxt_fw_init_one_p1(bp);
 	if (rc)
 		goto init_err_pci_clean;
 
-	if (bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) {
-		rc = bnxt_alloc_hwrm_short_cmd_req(bp);
-		if (rc)
-			goto init_err_pci_clean;
-	}
+	if (BNXT_CHIP_P5(bp))
+		bp->flags |= BNXT_FLAG_CHIP_P5;
 
-	rc = bnxt_hwrm_func_reset(bp);
+	rc = bnxt_fw_init_one_p2(bp);
 	if (rc)
 		goto init_err_pci_clean;
 
-	bnxt_hwrm_fw_set_time(bp);
-
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
 			   NETIF_F_TSO | NETIF_F_TSO6 |
 			   NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
@@ -8900,7 +11717,7 @@
 			   NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH |
 			   NETIF_F_RXCSUM | NETIF_F_GRO;
 
-	if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
+	if (BNXT_SUPPORTS_TPA(bp))
 		dev->hw_features |= NETIF_F_LRO;
 
 	dev->hw_enc_features =
@@ -8914,7 +11731,7 @@
 	dev->vlan_features = dev->hw_features | NETIF_F_HIGHDMA;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX |
 			    NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX;
-	if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
+	if (BNXT_SUPPORTS_TPA(bp))
 		dev->hw_features |= NETIF_F_GRO_HW;
 	dev->features |= dev->hw_features | NETIF_F_HIGHDMA;
 	if (dev->features & NETIF_F_GRO_HW)
@@ -8925,61 +11742,43 @@
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 	mutex_init(&bp->sriov_lock);
 #endif
-	bp->gro_func = bnxt_gro_func_5730x;
-	if (BNXT_CHIP_P4_PLUS(bp))
-		bp->gro_func = bnxt_gro_func_5731x;
-	else
+	if (BNXT_SUPPORTS_TPA(bp)) {
+		bp->gro_func = bnxt_gro_func_5730x;
+		if (BNXT_CHIP_P4(bp))
+			bp->gro_func = bnxt_gro_func_5731x;
+		else if (BNXT_CHIP_P5(bp))
+			bp->gro_func = bnxt_gro_func_5750x;
+	}
+	if (!BNXT_CHIP_P4_PLUS(bp))
 		bp->flags |= BNXT_FLAG_DOUBLE_DB;
 
-	rc = bnxt_hwrm_func_drv_rgtr(bp);
-	if (rc)
-		goto init_err_pci_clean;
-
-	rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
-	if (rc)
-		goto init_err_pci_clean;
-
 	bp->ulp_probe = bnxt_ulp_probe;
 
-	/* Get the MAX capabilities for this function */
-	rc = bnxt_hwrm_func_qcaps(bp);
-	if (rc) {
-		netdev_err(bp->dev, "hwrm query capability failure rc: %x\n",
-			   rc);
-		rc = -1;
-		goto init_err_pci_clean;
-	}
 	rc = bnxt_init_mac_addr(bp);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to initialize mac address.\n");
 		rc = -EADDRNOTAVAIL;
 		goto init_err_pci_clean;
 	}
-	rc = bnxt_hwrm_queue_qportcfg(bp);
-	if (rc) {
-		netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n",
-			   rc);
-		rc = -1;
-		goto init_err_pci_clean;
-	}
 
-	bnxt_hwrm_func_qcfg(bp);
-	bnxt_hwrm_port_led_qcaps(bp);
-	bnxt_ethtool_init(bp);
-	bnxt_dcb_init(bp);
+	if (BNXT_PF(bp)) {
+		/* Read the adapter's DSN to use as the eswitch switch_id */
+		rc = bnxt_pcie_dsn_get(bp, bp->switch_id);
+		if (rc)
+			goto init_err_pci_clean;
+	}
 
 	/* MTU range: 60 - FW defined max */
 	dev->min_mtu = ETH_ZLEN;
 	dev->max_mtu = bp->max_mtu;
 
-	rc = bnxt_probe_phy(bp);
+	rc = bnxt_probe_phy(bp, true);
 	if (rc)
 		goto init_err_pci_clean;
 
 	bnxt_set_rx_skb_mode(bp, false);
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
-	bnxt_set_max_func_irqs(bp, max_irqs);
 	rc = bnxt_set_dflt_rings(bp, true);
 	if (rc) {
 		netdev_err(bp->dev, "Not enough rings available.\n");
@@ -8987,25 +11786,7 @@
 		goto init_err_pci_clean;
 	}
 
-	/* Default RSS hash cfg. */
-	bp->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
-			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
-			   VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
-			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
-	if (BNXT_CHIP_P4_PLUS(bp) && bp->hwrm_spec_code >= 0x10501) {
-		bp->flags |= BNXT_FLAG_UDP_RSS_CAP;
-		bp->rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 |
-				    VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
-	}
-
-	bnxt_hwrm_vnic_qcaps(bp);
-	if (bnxt_rfs_supported(bp)) {
-		dev->hw_features |= NETIF_F_NTUPLE;
-		if (bnxt_rfs_capable(bp)) {
-			bp->flags |= BNXT_FLAG_RFS;
-			dev->features |= NETIF_F_NTUPLE;
-		}
-	}
+	bnxt_fw_init_one_p3(bp);
 
 	if (dev->hw_features & NETIF_F_HW_VLAN_CTAG_RX)
 		bp->flags |= BNXT_FLAG_STRIP_VLAN;
@@ -9019,14 +11800,6 @@
 	 */
 	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 
-	bnxt_get_wol_settings(bp);
-	if (bp->flags & BNXT_FLAG_WOL_CAP)
-		device_set_wakeup_enable(&pdev->dev, bp->wol);
-	else
-		device_set_wakeup_capable(&pdev->dev, false);
-
-	bnxt_hwrm_set_cache_line_size(bp, cache_line_size());
-
 	if (BNXT_PF(bp)) {
 		if (!bnxt_pf_wq) {
 			bnxt_pf_wq =
@@ -9058,7 +11831,13 @@
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
+	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_free_hwrm_resources(bp);
+	bnxt_free_ctx_mem(bp);
+	kfree(bp->ctx);
+	bp->ctx = NULL;
+	kfree(bp->fw_health);
+	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 
 init_err_free:
@@ -9086,6 +11865,7 @@
 
 	if (system_state == SYSTEM_POWER_OFF) {
 		bnxt_clear_int_mode(bp);
+		pci_disable_device(pdev);
 		pci_wake_from_d3(pdev, bp->wol);
 		pci_set_power_state(pdev, PCI_D3hot);
 	}
@@ -9097,8 +11877,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int bnxt_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
@@ -9114,8 +11893,7 @@
 
 static int bnxt_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
@@ -9227,13 +12005,6 @@
 
 	rtnl_unlock();
 
-	err = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (err) {
-		dev_err(&pdev->dev,
-			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
-			 err); /* non-fatal, continue */
-	}
-
 	return PCI_ERS_RESULT_RECOVERED;
 }
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index bde3846..d333589 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

@@ -12,19 +12,21 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.9.2"
+#define DRV_MODULE_VERSION	"1.10.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	9
-#define DRV_VER_UPD	2
+#define DRV_VER_MIN	10
+#define DRV_VER_UPD	0
 
 #include <linux/interrupt.h>
 #include <linux/rhashtable.h>
+#include <linux/crash_dump.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
-#include <net/switchdev.h>
 #include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
+
+struct page_pool;
 
 struct tx_bd {
 	__le32 tx_bd_len_flags_type;
@@ -111,6 +113,7 @@
 	 #define CMP_TYPE_RX_AGG_CMP				 18
 	 #define CMP_TYPE_RX_L2_TPA_START_CMP			 19
 	 #define CMP_TYPE_RX_L2_TPA_END_CMP			 21
+	 #define CMP_TYPE_RX_TPA_AGG_CMP			 22
 	 #define CMP_TYPE_STATUS_CMP				 32
 	 #define CMP_TYPE_REMOTE_DRIVER_REQ			 34
 	 #define CMP_TYPE_REMOTE_DRIVER_RESP			 36
@@ -261,14 +264,21 @@
 	u32 rx_agg_cmp_opaque;
 	__le32 rx_agg_cmp_v;
 	#define RX_AGG_CMP_V					(1 << 0)
+	#define RX_AGG_CMP_AGG_ID				(0xffff << 16)
+	 #define RX_AGG_CMP_AGG_ID_SHIFT			 16
 	__le32 rx_agg_cmp_unused;
 };
 
+#define TPA_AGG_AGG_ID(rx_agg)				\
+	((le32_to_cpu((rx_agg)->rx_agg_cmp_v) &		\
+	 RX_AGG_CMP_AGG_ID) >> RX_AGG_CMP_AGG_ID_SHIFT)
+
 struct rx_tpa_start_cmp {
 	__le32 rx_tpa_start_cmp_len_flags_type;
 	#define RX_TPA_START_CMP_TYPE				(0x3f << 0)
 	#define RX_TPA_START_CMP_FLAGS				(0x3ff << 6)
 	 #define RX_TPA_START_CMP_FLAGS_SHIFT			 6
+	#define RX_TPA_START_CMP_FLAGS_ERROR			(0x1 << 6)
 	#define RX_TPA_START_CMP_FLAGS_PLACEMENT		(0x7 << 7)
 	 #define RX_TPA_START_CMP_FLAGS_PLACEMENT_SHIFT		 7
 	 #define RX_TPA_START_CMP_FLAGS_PLACEMENT_JUMBO		 (0x1 << 7)
@@ -276,6 +286,7 @@
 	 #define RX_TPA_START_CMP_FLAGS_PLACEMENT_GRO_JUMBO	 (0x5 << 7)
 	 #define RX_TPA_START_CMP_FLAGS_PLACEMENT_GRO_HDS	 (0x6 << 7)
 	#define RX_TPA_START_CMP_FLAGS_RSS_VALID		(0x1 << 10)
+	#define RX_TPA_START_CMP_FLAGS_TIMESTAMP		(0x1 << 11)
 	#define RX_TPA_START_CMP_FLAGS_ITYPES			(0xf << 12)
 	 #define RX_TPA_START_CMP_FLAGS_ITYPES_SHIFT		 12
 	 #define RX_TPA_START_CMP_FLAGS_ITYPE_TCP		 (0x2 << 12)
@@ -289,6 +300,8 @@
 	 #define RX_TPA_START_CMP_RSS_HASH_TYPE_SHIFT		 9
 	#define RX_TPA_START_CMP_AGG_ID				(0x7f << 25)
 	 #define RX_TPA_START_CMP_AGG_ID_SHIFT			 25
+	#define RX_TPA_START_CMP_AGG_ID_P5			(0xffff << 16)
+	 #define RX_TPA_START_CMP_AGG_ID_SHIFT_P5		 16
 
 	__le32 rx_tpa_start_cmp_rss_hash;
 };
@@ -306,6 +319,14 @@
 	((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_misc_v1) &	\
 	 RX_TPA_START_CMP_AGG_ID) >> RX_TPA_START_CMP_AGG_ID_SHIFT)
 
+#define TPA_START_AGG_ID_P5(rx_tpa_start)				\
+	((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_misc_v1) &	\
+	 RX_TPA_START_CMP_AGG_ID_P5) >> RX_TPA_START_CMP_AGG_ID_SHIFT_P5)
+
+#define TPA_START_ERROR(rx_tpa_start)					\
+	((rx_tpa_start)->rx_tpa_start_cmp_len_flags_type &		\
+	 cpu_to_le32(RX_TPA_START_CMP_FLAGS_ERROR))
+
 struct rx_tpa_start_cmp_ext {
 	__le32 rx_tpa_start_cmp_flags2;
 	#define RX_TPA_START_CMP_FLAGS2_IP_CS_CALC		(0x1 << 0)
@@ -313,10 +334,20 @@
 	#define RX_TPA_START_CMP_FLAGS2_T_IP_CS_CALC		(0x1 << 2)
 	#define RX_TPA_START_CMP_FLAGS2_T_L4_CS_CALC		(0x1 << 3)
 	#define RX_TPA_START_CMP_FLAGS2_IP_TYPE			(0x1 << 8)
+	#define RX_TPA_START_CMP_FLAGS2_CSUM_CMPL_VALID		(0x1 << 9)
+	#define RX_TPA_START_CMP_FLAGS2_EXT_META_FORMAT		(0x3 << 10)
+	 #define RX_TPA_START_CMP_FLAGS2_EXT_META_FORMAT_SHIFT	 10
+	#define RX_TPA_START_CMP_FLAGS2_CSUM_CMPL		(0xffff << 16)
+	 #define RX_TPA_START_CMP_FLAGS2_CSUM_CMPL_SHIFT	 16
 
 	__le32 rx_tpa_start_cmp_metadata;
 	__le32 rx_tpa_start_cmp_cfa_code_v2;
 	#define RX_TPA_START_CMP_V2				(0x1 << 0)
+	#define RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_MASK	(0x7 << 1)
+	 #define RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_SHIFT	 1
+	 #define RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_NO_BUFFER	 (0x0 << 1)
+	 #define RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_BAD_FORMAT (0x3 << 1)
+	 #define RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_FLUSH	 (0x5 << 1)
 	#define RX_TPA_START_CMP_CFA_CODE			(0xffff << 16)
 	 #define RX_TPA_START_CMPL_CFA_CODE_SHIFT		 16
 	__le32 rx_tpa_start_cmp_hdr_info;
@@ -330,6 +361,11 @@
 	(!!((rx_tpa_start)->rx_tpa_start_cmp_flags2 &		\
 	    cpu_to_le32(RX_TPA_START_CMP_FLAGS2_IP_TYPE)))
 
+#define TPA_START_ERROR_CODE(rx_tpa_start)				\
+	((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) &	\
+	  RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_MASK) >>			\
+	 RX_TPA_START_CMP_ERRORS_BUFFER_ERROR_SHIFT)
+
 struct rx_tpa_end_cmp {
 	__le32 rx_tpa_end_cmp_len_flags_type;
 	#define RX_TPA_END_CMP_TYPE				(0x3f << 0)
@@ -359,6 +395,8 @@
 	 #define RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT		 16
 	#define RX_TPA_END_CMP_AGG_ID				(0x7f << 25)
 	 #define RX_TPA_END_CMP_AGG_ID_SHIFT			 25
+	#define RX_TPA_END_CMP_AGG_ID_P5			(0xffff << 16)
+	 #define RX_TPA_END_CMP_AGG_ID_SHIFT_P5			 16
 
 	__le32 rx_tpa_end_cmp_tsdelta;
 	#define RX_TPA_END_GRO_TS				(0x1 << 31)
@@ -368,6 +406,18 @@
 	((le32_to_cpu((rx_tpa_end)->rx_tpa_end_cmp_misc_v1) &		\
 	 RX_TPA_END_CMP_AGG_ID) >> RX_TPA_END_CMP_AGG_ID_SHIFT)
 
+#define TPA_END_AGG_ID_P5(rx_tpa_end)					\
+	((le32_to_cpu((rx_tpa_end)->rx_tpa_end_cmp_misc_v1) &		\
+	 RX_TPA_END_CMP_AGG_ID_P5) >> RX_TPA_END_CMP_AGG_ID_SHIFT_P5)
+
+#define TPA_END_PAYLOAD_OFF(rx_tpa_end)					\
+	((le32_to_cpu((rx_tpa_end)->rx_tpa_end_cmp_misc_v1) &		\
+	 RX_TPA_END_CMP_PAYLOAD_OFFSET) >> RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT)
+
+#define TPA_END_AGG_BUFS(rx_tpa_end)					\
+	((le32_to_cpu((rx_tpa_end)->rx_tpa_end_cmp_misc_v1) &		\
+	 RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT)
+
 #define TPA_END_TPA_SEGS(rx_tpa_end)					\
 	((le32_to_cpu((rx_tpa_end)->rx_tpa_end_cmp_misc_v1) &		\
 	 RX_TPA_END_CMP_TPA_SEGS) >> RX_TPA_END_CMP_TPA_SEGS_SHIFT)
@@ -387,6 +437,10 @@
 struct rx_tpa_end_cmp_ext {
 	__le32 rx_tpa_end_cmp_dup_acks;
 	#define RX_TPA_END_CMP_TPA_DUP_ACKS			(0xf << 0)
+	#define RX_TPA_END_CMP_PAYLOAD_OFFSET_P5		(0xff << 16)
+	 #define RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT_P5		 16
+	#define RX_TPA_END_CMP_AGG_BUFS_P5			(0xff << 24)
+	 #define RX_TPA_END_CMP_AGG_BUFS_SHIFT_P5		 24
 
 	__le32 rx_tpa_end_cmp_seg_len;
 	#define RX_TPA_END_CMP_TPA_SEG_LEN			(0xffff << 0)
@@ -394,7 +448,13 @@
 	__le32 rx_tpa_end_cmp_errors_v2;
 	#define RX_TPA_END_CMP_V2				(0x1 << 0)
 	#define RX_TPA_END_CMP_ERRORS				(0x3 << 1)
+	#define RX_TPA_END_CMP_ERRORS_P5			(0x7 << 1)
 	#define RX_TPA_END_CMPL_ERRORS_SHIFT			 1
+	 #define RX_TPA_END_CMP_ERRORS_BUFFER_ERROR_NO_BUFFER	 (0x0 << 1)
+	 #define RX_TPA_END_CMP_ERRORS_BUFFER_ERROR_NOT_ON_CHIP	 (0x2 << 1)
+	 #define RX_TPA_END_CMP_ERRORS_BUFFER_ERROR_BAD_FORMAT	 (0x3 << 1)
+	 #define RX_TPA_END_CMP_ERRORS_BUFFER_ERROR_RSV_ERROR	 (0x4 << 1)
+	 #define RX_TPA_END_CMP_ERRORS_BUFFER_ERROR_FLUSH	 (0x5 << 1)
 
 	u32 rx_tpa_end_cmp_start_opaque;
 };
@@ -403,6 +463,41 @@
 	((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &			\
 	 cpu_to_le32(RX_TPA_END_CMP_ERRORS))
 
+#define TPA_END_PAYLOAD_OFF_P5(rx_tpa_end_ext)				\
+	((le32_to_cpu((rx_tpa_end_ext)->rx_tpa_end_cmp_dup_acks) &	\
+	 RX_TPA_END_CMP_PAYLOAD_OFFSET_P5) >>				\
+	RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT_P5)
+
+#define TPA_END_AGG_BUFS_P5(rx_tpa_end_ext)				\
+	((le32_to_cpu((rx_tpa_end_ext)->rx_tpa_end_cmp_dup_acks) &	\
+	 RX_TPA_END_CMP_AGG_BUFS_P5) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT_P5)
+
+#define EVENT_DATA1_RESET_NOTIFY_FATAL(data1)				\
+	(((data1) &							\
+	  ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\
+	 ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL)
+
+#define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1)				\
+	!!((data1) &							\
+	   ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC)
+
+#define EVENT_DATA1_RECOVERY_ENABLED(data1)				\
+	!!((data1) &							\
+	   ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
+
+struct nqe_cn {
+	__le16	type;
+	#define NQ_CN_TYPE_MASK           0x3fUL
+	#define NQ_CN_TYPE_SFT            0
+	#define NQ_CN_TYPE_CQ_NOTIFICATION  0x30UL
+	#define NQ_CN_TYPE_LAST            NQ_CN_TYPE_CQ_NOTIFICATION
+	__le16	reserved16;
+	__le32	cq_handle_low;
+	__le32	v;
+	#define NQ_CN_V     0x1UL
+	__le32	cq_handle_high;
+};
+
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
 #define DB_IRQ_DIS						(0x1 << 27)
@@ -416,6 +511,25 @@
 #define BNXT_MIN_ROCE_CP_RINGS	2
 #define BNXT_MIN_ROCE_STAT_CTXS	1
 
+/* 64-bit doorbell */
+#define DBR_INDEX_MASK					0x0000000000ffffffULL
+#define DBR_XID_MASK					0x000fffff00000000ULL
+#define DBR_XID_SFT					32
+#define DBR_PATH_L2					(0x1ULL << 56)
+#define DBR_TYPE_SQ					(0x0ULL << 60)
+#define DBR_TYPE_RQ					(0x1ULL << 60)
+#define DBR_TYPE_SRQ					(0x2ULL << 60)
+#define DBR_TYPE_SRQ_ARM				(0x3ULL << 60)
+#define DBR_TYPE_CQ					(0x4ULL << 60)
+#define DBR_TYPE_CQ_ARMSE				(0x5ULL << 60)
+#define DBR_TYPE_CQ_ARMALL				(0x6ULL << 60)
+#define DBR_TYPE_CQ_ARMENA				(0x7ULL << 60)
+#define DBR_TYPE_SRQ_ARMENA				(0x8ULL << 60)
+#define DBR_TYPE_CQ_CUTOFF_ACK				(0x9ULL << 60)
+#define DBR_TYPE_NQ					(0xaULL << 60)
+#define DBR_TYPE_NQ_ARM					(0xbULL << 60)
+#define DBR_TYPE_NULL					(0xfULL << 60)
+
 #define INVALID_HW_RING_ID	((u16)-1)
 
 /* The hardware supports certain page sizes.  Use the supported page sizes
@@ -453,6 +567,9 @@
 #define BNXT_DEFAULT_TX_RING_SIZE	511
 
 #define MAX_TPA		64
+#define MAX_TPA_P5	256
+#define MAX_TPA_P5_MASK	(MAX_TPA_P5 - 1)
+#define MAX_TPA_SEGS_P5	0x3f
 
 #if (BNXT_PAGE_SHIFT == 16)
 #define MAX_RX_PAGES	1
@@ -505,6 +622,9 @@
 	(!!((agg)->rx_agg_cmp_v & cpu_to_le32(RX_AGG_CMP_V)) ==	\
 	 !((raw_cons) & bp->cp_bit))
 
+#define NQ_CMP_VALID(nqcmp, raw_cons)				\
+	(!!((nqcmp)->v & cpu_to_le32(NQ_CN_V)) == !((raw_cons) & bp->cp_bit))
+
 #define TX_CMP_TYPE(txcmp)					\
 	(le32_to_cpu((txcmp)->tx_cmp_flags_type) & CMP_TYPE)
 
@@ -525,14 +645,15 @@
 #define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 #define DFLT_HWRM_CMD_TIMEOUT		500
+#define SHORT_HWRM_CMD_TIMEOUT		20
 #define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
 #define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
+#define HWRM_COREDUMP_TIMEOUT		((HWRM_CMD_TIMEOUT) * 12)
 #define HWRM_RESP_ERR_CODE_MASK		0xffff
 #define HWRM_RESP_LEN_OFFSET		4
 #define HWRM_RESP_LEN_MASK		0xffff0000
 #define HWRM_RESP_LEN_SFT		16
 #define HWRM_RESP_VALID_MASK		0xff000000
-#define HWRM_SEQ_ID_INVALID		-1
 #define BNXT_HWRM_REQ_MAX_SIZE		128
 #define BNXT_HWRM_REQS_PER_PAGE		(BNXT_PAGE_SIZE /	\
 					 BNXT_HWRM_REQ_MAX_SIZE)
@@ -548,17 +669,26 @@
 	(HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +		\
 	 ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
 
-#define HWRM_VALID_BIT_DELAY_USEC	20
+#define HWRM_VALID_BIT_DELAY_USEC	150
 
-#define BNXT_RX_EVENT	1
-#define BNXT_AGG_EVENT	2
-#define BNXT_TX_EVENT	4
+#define BNXT_HWRM_CHNL_CHIMP	0
+#define BNXT_HWRM_CHNL_KONG	1
+
+#define BNXT_RX_EVENT		1
+#define BNXT_AGG_EVENT		2
+#define BNXT_TX_EVENT		4
+#define BNXT_REDIRECT_EVENT	8
 
 struct bnxt_sw_tx_bd {
-	struct sk_buff		*skb;
+	union {
+		struct sk_buff		*skb;
+		struct xdp_frame	*xdpf;
+	};
 	DEFINE_DMA_UNMAP_ADDR(mapping);
+	DEFINE_DMA_UNMAP_LEN(len);
 	u8			is_gso;
 	u8			is_push;
+	u8			action;
 	union {
 		unsigned short		nr_frags;
 		u16			rx_prod;
@@ -577,9 +707,16 @@
 	dma_addr_t		mapping;
 };
 
-struct bnxt_ring_struct {
+struct bnxt_ring_mem_info {
 	int			nr_pages;
 	int			page_size;
+	u16			flags;
+#define BNXT_RMEM_VALID_PTE_FLAG	1
+#define BNXT_RMEM_RING_PTE_FLAG		2
+#define BNXT_RMEM_USE_FULL_PAGE_FLAG	4
+
+	u16			depth;
+
 	void			**pg_arr;
 	dma_addr_t		*dma_arr;
 
@@ -588,12 +725,17 @@
 
 	int			vmem_size;
 	void			**vmem;
+};
+
+struct bnxt_ring_struct {
+	struct bnxt_ring_mem_info	ring_mem;
 
 	u16			fw_ring_id; /* Ring id filled by Chimp FW */
 	union {
 		u16		grp_idx;
 		u16		map_idx; /* Used by cmpl rings */
 	};
+	u32			handle;
 	u8			queue_id;
 };
 
@@ -609,12 +751,20 @@
 	u32			data[25];
 };
 
+struct bnxt_db_info {
+	void __iomem		*doorbell;
+	union {
+		u64		db_key64;
+		u32		db_key32;
+	};
+};
+
 struct bnxt_tx_ring_info {
 	struct bnxt_napi	*bnapi;
 	u16			tx_prod;
 	u16			tx_cons;
 	u16			txq_index;
-	void __iomem		*tx_doorbell;
+	struct bnxt_db_info	tx_db;
 
 	struct tx_bd		*tx_desc_ring[MAX_TX_PAGES];
 	struct bnxt_sw_tx_bd	*tx_buf_ring;
@@ -631,6 +781,42 @@
 	struct bnxt_ring_struct	tx_ring_struct;
 };
 
+#define BNXT_LEGACY_COAL_CMPL_PARAMS					\
+	(RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_INT_LAT_TMR_MIN |		\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_INT_LAT_TMR_MAX |		\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_TIMER_RESET |		\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_RING_IDLE |			\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_DMA_AGGR |		\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_DMA_AGGR_DURING_INT | \
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_CMPL_AGGR_DMA_TMR |		\
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_CMPL_AGGR_DMA_TMR_DURING_INT | \
+	 RING_AGGINT_QCAPS_RESP_CMPL_PARAMS_NUM_CMPL_AGGR_INT)
+
+#define BNXT_COAL_CMPL_ENABLES						\
+	(RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_DMA_AGGR | \
+	 RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_CMPL_AGGR_DMA_TMR | \
+	 RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_INT_LAT_TMR_MAX | \
+	 RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_AGGR_INT)
+
+#define BNXT_COAL_CMPL_MIN_TMR_ENABLE					\
+	RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_INT_LAT_TMR_MIN
+
+#define BNXT_COAL_CMPL_AGGR_TMR_DURING_INT_ENABLE			\
+	RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_ENABLES_NUM_CMPL_DMA_AGGR_DURING_INT
+
+struct bnxt_coal_cap {
+	u32			cmpl_params;
+	u32			nq_params;
+	u16			num_cmpl_dma_aggr_max;
+	u16			num_cmpl_dma_aggr_during_int_max;
+	u16			cmpl_aggr_dma_tmr_max;
+	u16			cmpl_aggr_dma_tmr_during_int_max;
+	u16			int_lat_tmr_min_max;
+	u16			int_lat_tmr_max_max;
+	u16			num_cmpl_aggr_int_max;
+	u16			timer_units;
+};
+
 struct bnxt_coal {
 	u16			coal_ticks;
 	u16			coal_ticks_irq;
@@ -667,6 +853,15 @@
 	((hdr_info) & 0x1ff)
 
 	u16			cfa_code; /* cfa_code in TPA start compl */
+	u8			agg_count;
+	struct rx_agg_cmp	*agg_arr;
+};
+
+#define BNXT_AGG_IDX_BMAP_SIZE	(MAX_TPA_P5 / BITS_PER_LONG)
+
+struct bnxt_tpa_idx_map {
+	u16		agg_id_tbl[1024];
+	unsigned long	agg_idx_bmap[BNXT_AGG_IDX_BMAP_SIZE];
 };
 
 struct bnxt_rx_ring_info {
@@ -675,8 +870,8 @@
 	u16			rx_agg_prod;
 	u16			rx_sw_agg_prod;
 	u16			rx_next_cons;
-	void __iomem		*rx_doorbell;
-	void __iomem		*rx_agg_doorbell;
+	struct bnxt_db_info	rx_db;
+	struct bnxt_db_info	rx_agg_db;
 
 	struct bpf_prog		*xdp_prog;
 
@@ -696,24 +891,35 @@
 	dma_addr_t		rx_agg_desc_mapping[MAX_RX_AGG_PAGES];
 
 	struct bnxt_tpa_info	*rx_tpa;
+	struct bnxt_tpa_idx_map *rx_tpa_idx_map;
 
 	struct bnxt_ring_struct	rx_ring_struct;
 	struct bnxt_ring_struct	rx_agg_ring_struct;
 	struct xdp_rxq_info	xdp_rxq;
+	struct page_pool	*page_pool;
 };
 
 struct bnxt_cp_ring_info {
+	struct bnxt_napi	*bnapi;
 	u32			cp_raw_cons;
-	void __iomem		*cp_doorbell;
+	struct bnxt_db_info	cp_db;
+
+	u8			had_work_done:1;
+	u8			has_more_work:1;
+
+	u32			last_cp_raw_cons;
 
 	struct bnxt_coal	rx_ring_coal;
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			event_ctr;
 
-	struct net_dim		dim;
+	struct dim		dim;
 
-	struct tx_cmp		*cp_desc_ring[MAX_CP_PAGES];
+	union {
+		struct tx_cmp	*cp_desc_ring[MAX_CP_PAGES];
+		struct nqe_cn	*nq_desc_ring[MAX_CP_PAGES];
+	};
 
 	dma_addr_t		cp_desc_mapping[MAX_CP_PAGES];
 
@@ -721,8 +927,13 @@
 	dma_addr_t		hw_stats_map;
 	u32			hw_stats_ctx_id;
 	u64			rx_l4_csum_errors;
+	u64			missed_irqs;
 
 	struct bnxt_ring_struct	cp_ring_struct;
+
+	struct bnxt_cp_ring_info *cp_ring_arr[2];
+#define BNXT_RX_HDL	0
+#define BNXT_TX_HDL	1
 };
 
 struct bnxt_napi {
@@ -736,6 +947,9 @@
 
 	void			(*tx_int)(struct bnxt *, struct bnxt_napi *,
 					  int);
+	int			tx_pkts;
+	u8			events;
+
 	u32			flags;
 #define BNXT_NAPI_FLAG_XDP	0x1
 
@@ -755,6 +969,7 @@
 #define HWRM_RING_ALLOC_RX	0x2
 #define HWRM_RING_ALLOC_AGG	0x4
 #define HWRM_RING_ALLOC_CMPL	0x8
+#define HWRM_RING_ALLOC_NQ	0x10
 
 #define INVALID_STATS_CTX_ID	-1
 
@@ -768,7 +983,7 @@
 
 struct bnxt_vnic_info {
 	u16		fw_vnic_id; /* returned by Chimp during alloc */
-#define BNXT_MAX_CTX_PER_VNIC	2
+#define BNXT_MAX_CTX_PER_VNIC	8
 	u16		fw_rss_cos_lb_ctx[BNXT_MAX_CTX_PER_VNIC];
 	u16		fw_l2_ctx_id;
 #define BNXT_MAX_UC_ADDRS	4
@@ -821,7 +1036,10 @@
 	u16	resv_vnics;
 	u16	min_stat_ctxs;
 	u16	max_stat_ctxs;
+	u16	resv_stat_ctxs;
+	u16	max_nqs;
 	u16	max_irqs;
+	u16	resv_irqs;
 };
 
 #if defined(CONFIG_BNXT_SRIOV)
@@ -832,6 +1050,7 @@
 					 * stored by PF.
 					 */
 	u16	vlan;
+	u16	func_qcfg_flags;
 	u32	flags;
 #define BNXT_VF_QOS		0x1
 #define BNXT_VF_SPOOFCHK	0x2
@@ -854,6 +1073,7 @@
 	u8	mac_addr[ETH_ALEN];
 	u32	first_vf_id;
 	u16	active_vfs;
+	u16	registered_vfs;
 	u16	max_vfs;
 	u32	max_encap_records;
 	u32	max_decap_records;
@@ -1004,9 +1224,17 @@
 	char string[BNXT_MAX_TEST][ETH_GSTRING_LEN];
 };
 
-#define BNXT_GRCPF_REG_WINDOW_BASE_OUT	0x400
-#define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
-#define BNXT_CAG_REG_BASE		0x300000
+#define BNXT_GRCPF_REG_CHIMP_COMM		0x0
+#define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER	0x100
+#define BNXT_GRCPF_REG_WINDOW_BASE_OUT		0x400
+#define BNXT_CAG_REG_LEGACY_INT_STATUS		0x4014
+#define BNXT_CAG_REG_BASE			0x300000
+
+#define BNXT_GRCPF_REG_KONG_COMM		0xA00
+#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00
+
+#define BNXT_GRC_BASE_MASK			0xfffff000
+#define BNXT_GRC_OFFSET_MASK			0x00000ffc
 
 struct bnxt_tc_flow_stats {
 	u64		packets;
@@ -1069,6 +1297,109 @@
 	struct bnxt_vf_rep_stats	tx_stats;
 };
 
+#define PTU_PTE_VALID             0x1UL
+#define PTU_PTE_LAST              0x2UL
+#define PTU_PTE_NEXT_TO_LAST      0x4UL
+
+#define MAX_CTX_PAGES	(BNXT_PAGE_SIZE / 8)
+#define MAX_CTX_TOTAL_PAGES	(MAX_CTX_PAGES * MAX_CTX_PAGES)
+
+struct bnxt_ctx_pg_info {
+	u32		entries;
+	u32		nr_pages;
+	void		*ctx_pg_arr[MAX_CTX_PAGES];
+	dma_addr_t	ctx_dma_arr[MAX_CTX_PAGES];
+	struct bnxt_ring_mem_info ring_mem;
+	struct bnxt_ctx_pg_info **ctx_pg_tbl;
+};
+
+struct bnxt_ctx_mem_info {
+	u32	qp_max_entries;
+	u16	qp_min_qp1_entries;
+	u16	qp_max_l2_entries;
+	u16	qp_entry_size;
+	u16	srq_max_l2_entries;
+	u32	srq_max_entries;
+	u16	srq_entry_size;
+	u16	cq_max_l2_entries;
+	u32	cq_max_entries;
+	u16	cq_entry_size;
+	u16	vnic_max_vnic_entries;
+	u16	vnic_max_ring_table_entries;
+	u16	vnic_entry_size;
+	u32	stat_max_entries;
+	u16	stat_entry_size;
+	u16	tqm_entry_size;
+	u32	tqm_min_entries_per_ring;
+	u32	tqm_max_entries_per_ring;
+	u32	mrav_max_entries;
+	u16	mrav_entry_size;
+	u16	tim_entry_size;
+	u32	tim_max_entries;
+	u16	mrav_num_entries_units;
+	u8	tqm_entries_multiple;
+
+	u32	flags;
+	#define BNXT_CTX_FLAG_INITED	0x01
+
+	struct bnxt_ctx_pg_info qp_mem;
+	struct bnxt_ctx_pg_info srq_mem;
+	struct bnxt_ctx_pg_info cq_mem;
+	struct bnxt_ctx_pg_info vnic_mem;
+	struct bnxt_ctx_pg_info stat_mem;
+	struct bnxt_ctx_pg_info mrav_mem;
+	struct bnxt_ctx_pg_info tim_mem;
+	struct bnxt_ctx_pg_info *tqm_mem[9];
+};
+
+struct bnxt_fw_health {
+	u32 flags;
+	u32 polling_dsecs;
+	u32 master_func_wait_dsecs;
+	u32 normal_func_wait_dsecs;
+	u32 post_reset_wait_dsecs;
+	u32 post_reset_max_wait_dsecs;
+	u32 regs[4];
+	u32 mapped_regs[4];
+#define BNXT_FW_HEALTH_REG		0
+#define BNXT_FW_HEARTBEAT_REG		1
+#define BNXT_FW_RESET_CNT_REG		2
+#define BNXT_FW_RESET_INPROG_REG	3
+	u32 fw_reset_inprog_reg_mask;
+	u32 last_fw_heartbeat;
+	u32 last_fw_reset_cnt;
+	u8 enabled:1;
+	u8 master:1;
+	u8 tmr_multiplier;
+	u8 tmr_counter;
+	u8 fw_reset_seq_cnt;
+	u32 fw_reset_seq_regs[16];
+	u32 fw_reset_seq_vals[16];
+	u32 fw_reset_seq_delay_msec[16];
+	struct devlink_health_reporter	*fw_reporter;
+	struct devlink_health_reporter *fw_reset_reporter;
+	struct devlink_health_reporter *fw_fatal_reporter;
+};
+
+struct bnxt_fw_reporter_ctx {
+	unsigned long sp_event;
+};
+
+#define BNXT_FW_HEALTH_REG_TYPE_MASK	3
+#define BNXT_FW_HEALTH_REG_TYPE_CFG	0
+#define BNXT_FW_HEALTH_REG_TYPE_GRC	1
+#define BNXT_FW_HEALTH_REG_TYPE_BAR0	2
+#define BNXT_FW_HEALTH_REG_TYPE_BAR1	3
+
+#define BNXT_FW_HEALTH_REG_TYPE(reg)	((reg) & BNXT_FW_HEALTH_REG_TYPE_MASK)
+#define BNXT_FW_HEALTH_REG_OFF(reg)	((reg) & ~BNXT_FW_HEALTH_REG_TYPE_MASK)
+
+#define BNXT_FW_HEALTH_WIN_BASE		0x3000
+#define BNXT_FW_HEALTH_WIN_MAP_OFF	8
+
+#define BNXT_FW_STATUS_HEALTHY		0x8000
+#define BNXT_FW_STATUS_SHUTDOWN		0x100000
+
 struct bnxt {
 	void __iomem		*bar0;
 	void __iomem		*bar1;
@@ -1098,6 +1429,10 @@
 
 #define CHIP_NUM_5745X		0xd730
 
+#define CHIP_NUM_57508		0x1750
+#define CHIP_NUM_57504		0x1751
+#define CHIP_NUM_57502		0x1752
+
 #define CHIP_NUM_58802		0xd802
 #define CHIP_NUM_58804		0xd804
 #define CHIP_NUM_58808		0xd808
@@ -1144,6 +1479,7 @@
 	atomic_t		intr_sem;
 
 	u32			flags;
+	#define BNXT_FLAG_CHIP_P5	0x1
 	#define BNXT_FLAG_VF		0x2
 	#define BNXT_FLAG_LRO		0x4
 #ifdef CONFIG_INET
@@ -1178,6 +1514,7 @@
 	#define BNXT_FLAG_DIM		0x2000000
 	#define BNXT_FLAG_ROCE_MIRROR_CAP	0x4000000
 	#define BNXT_FLAG_PORT_STATS_EXT	0x10000000
+	#define BNXT_FLAG_PCIE_STATS	0x40000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
 					    BNXT_FLAG_RFS |		\
@@ -1190,15 +1527,27 @@
 #define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 #define BNXT_RX_PAGE_MODE(bp)	((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
+#define BNXT_SUPPORTS_TPA(bp)	(!BNXT_CHIP_TYPE_NITRO_A0(bp) &&	\
+				 (!((bp)->flags & BNXT_FLAG_CHIP_P5) ||	\
+				  (bp)->max_tpa_v2) && !is_kdump_kernel())
 
-/* Chip class phase 4 and later */
-#define BNXT_CHIP_P4_PLUS(bp)			\
+/* Chip class phase 5 */
+#define BNXT_CHIP_P5(bp)			\
+	((bp)->chip_num == CHIP_NUM_57508 ||	\
+	 (bp)->chip_num == CHIP_NUM_57504 ||	\
+	 (bp)->chip_num == CHIP_NUM_57502)
+
+/* Chip class phase 4.x */
+#define BNXT_CHIP_P4(bp)			\
 	(BNXT_CHIP_NUM_57X1X((bp)->chip_num) ||	\
 	 BNXT_CHIP_NUM_5745X((bp)->chip_num) ||	\
 	 BNXT_CHIP_NUM_588XX((bp)->chip_num) ||	\
 	 (BNXT_CHIP_NUM_58700((bp)->chip_num) &&	\
 	  !BNXT_CHIP_TYPE_NITRO_A0(bp)))
 
+#define BNXT_CHIP_P4_PLUS(bp)			\
+	(BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
+
 	struct bnxt_en_dev	*edev;
 	struct bnxt_en_dev *	(*ulp_probe)(struct net_device *);
 
@@ -1216,6 +1565,8 @@
 					       u16, void *, u8 *, dma_addr_t,
 					       unsigned int);
 
+	u16			max_tpa_v2;
+	u16			max_tpa;
 	u32			rx_buf_size;
 	u32			rx_buf_use_size;	/* useable size */
 	u16			rx_offset;
@@ -1248,8 +1599,6 @@
 	int			cp_nr_pages;
 	int			cp_nr_rings;
 
-	int			num_stat_ctxs;
-
 	/* grp_info indexed by completion ring index */
 	struct bnxt_ring_grp_info	*grp_info;
 	struct bnxt_vnic_info	*vnic_info;
@@ -1261,6 +1610,8 @@
 	u8			max_lltc;	/* lossless TCs */
 	struct bnxt_queue_info	q_info[BNXT_MAX_QUEUE];
 	u8			tc_to_qidx[BNXT_MAX_QUEUE];
+	u8			q_ids[BNXT_MAX_QUEUE];
+	u8			max_q;
 
 	unsigned int		current_interval;
 #define BNXT_TIMER_INTERVAL	HZ
@@ -1271,6 +1622,10 @@
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
 #define BNXT_STATE_READ_STATS	2
+#define BNXT_STATE_FW_RESET_DET 3
+#define BNXT_STATE_IN_FW_RESET	4
+#define BNXT_STATE_ABORT_ERR	5
+#define BNXT_STATE_FW_FATAL_COND	6
 
 	struct bnxt_irq	*irq_tbl;
 	int			total_irqs;
@@ -1287,30 +1642,54 @@
 	u32			msg_enable;
 
 	u32			fw_cap;
-	#define BNXT_FW_CAP_SHORT_CMD	0x00000001
-	#define BNXT_FW_CAP_LLDP_AGENT	0x00000002
-	#define BNXT_FW_CAP_DCBX_AGENT	0x00000004
-	#define BNXT_FW_CAP_NEW_RM	0x00000008
-	#define BNXT_FW_CAP_IF_CHANGE	0x00000010
+	#define BNXT_FW_CAP_SHORT_CMD			0x00000001
+	#define BNXT_FW_CAP_LLDP_AGENT			0x00000002
+	#define BNXT_FW_CAP_DCBX_AGENT			0x00000004
+	#define BNXT_FW_CAP_NEW_RM			0x00000008
+	#define BNXT_FW_CAP_IF_CHANGE			0x00000010
+	#define BNXT_FW_CAP_KONG_MB_CHNL		0x00000080
+	#define BNXT_FW_CAP_OVS_64BIT_HANDLE		0x00000400
+	#define BNXT_FW_CAP_TRUSTED_VF			0x00000800
+	#define BNXT_FW_CAP_ERROR_RECOVERY		0x00002000
+	#define BNXT_FW_CAP_PKG_VER			0x00004000
+	#define BNXT_FW_CAP_CFA_ADV_FLOW		0x00008000
+	#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX	0x00010000
+	#define BNXT_FW_CAP_PCIE_STATS_SUPPORTED	0x00020000
+	#define BNXT_FW_CAP_EXT_STATS_SUPPORTED		0x00040000
+	#define BNXT_FW_CAP_ERR_RECOVER_RELOAD		0x00100000
 
 #define BNXT_NEW_RM(bp)		((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
 	u32			hwrm_spec_code;
 	u16			hwrm_cmd_seq;
-	u32			hwrm_intr_seq_id;
+	u16                     hwrm_cmd_kong_seq;
+	u16			hwrm_intr_seq_id;
 	void			*hwrm_short_cmd_req_addr;
 	dma_addr_t		hwrm_short_cmd_req_dma_addr;
 	void			*hwrm_cmd_resp_addr;
 	dma_addr_t		hwrm_cmd_resp_dma_addr;
+	void			*hwrm_cmd_kong_resp_addr;
+	dma_addr_t		hwrm_cmd_kong_resp_dma_addr;
 
+	struct rtnl_link_stats64	net_stats_prev;
 	struct rx_port_stats	*hw_rx_port_stats;
 	struct tx_port_stats	*hw_tx_port_stats;
 	struct rx_port_stats_ext	*hw_rx_port_stats_ext;
+	struct tx_port_stats_ext	*hw_tx_port_stats_ext;
+	struct pcie_ctx_hw_stats	*hw_pcie_stats;
 	dma_addr_t		hw_rx_port_stats_map;
 	dma_addr_t		hw_tx_port_stats_map;
 	dma_addr_t		hw_rx_port_stats_ext_map;
+	dma_addr_t		hw_tx_port_stats_ext_map;
+	dma_addr_t		hw_pcie_stats_map;
 	int			hw_port_stats_size;
+	u16			fw_rx_stats_ext_size;
+	u16			fw_tx_stats_ext_size;
+	u16			hw_ring_stats_size;
+	u8			pri2cos[8];
+	u8			pri2cos_valid;
 
 	u16			hwrm_max_req_len;
+	u16			hwrm_max_ext_req_len;
 	int			hwrm_cmd_timeout;
 	struct mutex		hwrm_cmd_lock;	/* serialize hwrm messages */
 	struct hwrm_ver_get_output	ver_resp;
@@ -1328,11 +1707,10 @@
 	u8			port_count;
 	u16			br_mode;
 
+	struct bnxt_coal_cap	coal_cap;
 	struct bnxt_coal	rx_coal;
 	struct bnxt_coal	tx_coal;
 
-#define BNXT_USEC_TO_COAL_TIMER(x)	((x) * 25 / 2)
-
 	u32			stats_coal_ticks;
 #define BNXT_DEF_STATS_COAL_TICKS	 1000000
 #define BNXT_MIN_STATS_COAL_TICKS	  250000
@@ -1357,9 +1735,30 @@
 #define BNXT_LINK_SPEED_CHNG_SP_EVENT	14
 #define BNXT_FLOW_STATS_SP_EVENT	15
 #define BNXT_UPDATE_PHY_SP_EVENT	16
+#define BNXT_RING_COAL_NOW_SP_EVENT	17
+#define BNXT_FW_RESET_NOTIFY_SP_EVENT	18
+#define BNXT_FW_EXCEPTION_SP_EVENT	19
+
+	struct delayed_work	fw_reset_task;
+	int			fw_reset_state;
+#define BNXT_FW_RESET_STATE_POLL_VF	1
+#define BNXT_FW_RESET_STATE_RESET_FW	2
+#define BNXT_FW_RESET_STATE_ENABLE_DEV	3
+#define BNXT_FW_RESET_STATE_POLL_FW	4
+#define BNXT_FW_RESET_STATE_OPENING	5
+#define BNXT_FW_RESET_STATE_POLL_FW_DOWN	6
+
+	u16			fw_reset_min_dsecs;
+#define BNXT_DFLT_FW_RST_MIN_DSECS	20
+	u16			fw_reset_max_dsecs;
+#define BNXT_DFLT_FW_RST_MAX_DSECS	60
+	unsigned long		fw_reset_timestamp;
+
+	struct bnxt_fw_health	*fw_health;
 
 	struct bnxt_hw_resc	hw_resc;
 	struct bnxt_pf_info	pf;
+	struct bnxt_ctx_mem_info	*ctx;
 #ifdef CONFIG_BNXT_SRIOV
 	int			nr_vfs;
 	struct bnxt_vf_info	vf;
@@ -1374,6 +1773,11 @@
 	struct mutex		sriov_lock;
 #endif
 
+#if BITS_PER_LONG == 32
+	/* ensure atomic 64-bit doorbell writes on 32-bit systems. */
+	spinlock_t		db_lock;
+#endif
+
 #define BNXT_NTP_FLTR_MAX_FLTR	4096
 #define BNXT_NTP_FLTR_HASH_SIZE	512
 #define BNXT_NTP_FLTR_HASH_MASK	(BNXT_NTP_FLTR_HASH_SIZE - 1)
@@ -1405,13 +1809,13 @@
 
 	/* devlink interface and vf-rep structs */
 	struct devlink		*dl;
+	struct devlink_port	dl_port;
 	enum devlink_eswitch_mode eswitch_mode;
 	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
 	u8			switch_id[8];
 	struct bnxt_tc_info	*tc_info;
 	struct dentry		*debugfs_pdev;
-	struct dentry		*debugfs_dim;
 	struct device		*hwmon_dev;
 };
 
@@ -1425,6 +1829,12 @@
 #define BNXT_RX_STATS_EXT_OFFSET(counter)		\
 	(offsetof(struct rx_port_stats_ext, counter) / 8)
 
+#define BNXT_TX_STATS_EXT_OFFSET(counter)		\
+	(offsetof(struct tx_port_stats_ext, counter) / 8)
+
+#define BNXT_PCIE_STATS_OFFSET(counter)			\
+	(offsetof(struct pcie_ctx_hw_stats, counter) / 8)
+
 #define I2C_DEV_ADDR_A0				0xa0
 #define I2C_DEV_ADDR_A2				0xa2
 #define SFF_DIAG_SUPPORT_OFFSET			0x5c
@@ -1443,21 +1853,106 @@
 		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
+#if BITS_PER_LONG == 32
+#define writeq(val64, db)			\
+do {						\
+	spin_lock(&bp->db_lock);		\
+	writel((val64) & 0xffffffff, db);	\
+	writel((val64) >> 32, (db) + 4);	\
+	spin_unlock(&bp->db_lock);		\
+} while (0)
+
+#define writeq_relaxed writeq
+#endif
+
 /* For TX and RX ring doorbells with no ordering guarantee*/
-static inline void bnxt_db_write_relaxed(struct bnxt *bp, void __iomem *db,
-					 u32 val)
+static inline void bnxt_db_write_relaxed(struct bnxt *bp,
+					 struct bnxt_db_info *db, u32 idx)
 {
-	writel_relaxed(val, db);
-	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
-		writel_relaxed(val, db);
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		writeq_relaxed(db->db_key64 | idx, db->doorbell);
+	} else {
+		u32 db_val = db->db_key32 | idx;
+
+		writel_relaxed(db_val, db->doorbell);
+		if (bp->flags & BNXT_FLAG_DOUBLE_DB)
+			writel_relaxed(db_val, db->doorbell);
+	}
 }
 
 /* For TX and RX ring doorbells */
-static inline void bnxt_db_write(struct bnxt *bp, void __iomem *db, u32 val)
+static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
+				 u32 idx)
 {
-	writel(val, db);
-	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
-		writel(val, db);
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		writeq(db->db_key64 | idx, db->doorbell);
+	} else {
+		u32 db_val = db->db_key32 | idx;
+
+		writel(db_val, db->doorbell);
+		if (bp->flags & BNXT_FLAG_DOUBLE_DB)
+			writel(db_val, db->doorbell);
+	}
+}
+
+static inline bool bnxt_cfa_hwrm_message(u16 req_type)
+{
+	switch (req_type) {
+	case HWRM_CFA_ENCAP_RECORD_ALLOC:
+	case HWRM_CFA_ENCAP_RECORD_FREE:
+	case HWRM_CFA_DECAP_FILTER_ALLOC:
+	case HWRM_CFA_DECAP_FILTER_FREE:
+	case HWRM_CFA_NTUPLE_FILTER_ALLOC:
+	case HWRM_CFA_NTUPLE_FILTER_FREE:
+	case HWRM_CFA_NTUPLE_FILTER_CFG:
+	case HWRM_CFA_EM_FLOW_ALLOC:
+	case HWRM_CFA_EM_FLOW_FREE:
+	case HWRM_CFA_EM_FLOW_CFG:
+	case HWRM_CFA_FLOW_ALLOC:
+	case HWRM_CFA_FLOW_FREE:
+	case HWRM_CFA_FLOW_INFO:
+	case HWRM_CFA_FLOW_FLUSH:
+	case HWRM_CFA_FLOW_STATS:
+	case HWRM_CFA_METER_PROFILE_ALLOC:
+	case HWRM_CFA_METER_PROFILE_FREE:
+	case HWRM_CFA_METER_PROFILE_CFG:
+	case HWRM_CFA_METER_INSTANCE_ALLOC:
+	case HWRM_CFA_METER_INSTANCE_FREE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
+{
+	return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+		bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)));
+}
+
+static inline bool bnxt_hwrm_kong_chnl(struct bnxt *bp, struct input *req)
+{
+	return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+		req->resp_addr == cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr));
+}
+
+static inline void *bnxt_get_hwrm_resp_addr(struct bnxt *bp, void *req)
+{
+	if (bnxt_hwrm_kong_chnl(bp, (struct input *)req))
+		return bp->hwrm_cmd_kong_resp_addr;
+	else
+		return bp->hwrm_cmd_resp_addr;
+}
+
+static inline u16 bnxt_get_hwrm_seq_id(struct bnxt *bp, u16 dst)
+{
+	u16 seq_id;
+
+	if (dst == BNXT_HWRM_CHNL_CHIMP)
+		seq_id = bp->hwrm_cmd_seq++;
+	else
+		seq_id = bp->hwrm_cmd_kong_seq++;
+	return seq_id;
 }
 
 extern const u16 bnxt_lhint_arr[];
@@ -1465,6 +1960,7 @@
 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 		       u16 prod, gfp_t gfp);
 void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
+u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
 void bnxt_set_tpa_flags(struct bnxt *bp);
 void bnxt_set_ring_params(struct bnxt *);
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
@@ -1477,13 +1973,14 @@
 				     int bmap_size);
 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
 int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
+int bnxt_nq_rings_in_use(struct bnxt *bp);
 int bnxt_hwrm_set_coal(struct bnxt *);
 unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
-void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
+unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
-unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp);
+unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp);
 int bnxt_get_avail_msix(struct bnxt *bp, int num);
-int bnxt_reserve_rings(struct bnxt *bp);
+int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
 int bnxt_hwrm_set_pause(struct bnxt *);
@@ -1496,12 +1993,15 @@
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
 int bnxt_close_nic(struct bnxt *, bool, bool);
+void bnxt_fw_exception(struct bnxt *bp);
+void bnxt_fw_reset(struct bnxt *bp);
 int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 		     int tx_xdp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 int bnxt_restore_pf_fw_resources(struct bnxt *bp);
-int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr);
+int bnxt_get_port_parent_id(struct net_device *dev,
+			    struct netdev_phys_item_id *ppid);
 void bnxt_dim_work(struct work_struct *work);
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi);
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index a85d2be..fb6f30d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c

@@ -316,8 +316,8 @@
 
 	n = IEEE_8021QAZ_MAX_TCS;
 	data_len = sizeof(*data) + sizeof(*fw_app) * n;
-	data = dma_zalloc_coherent(&bp->pdev->dev, data_len, &mapping,
-				   GFP_KERNEL);
+	data = dma_alloc_coherent(&bp->pdev->dev, data_len, &mapping,
+				  GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -377,8 +377,6 @@
 	set.data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
 	set.hdr_cnt = 1;
 	rc = hwrm_send_message(bp, &set, sizeof(set), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
 
 set_app_exit:
 	dma_free_coherent(&bp->pdev->dev, data_len, data, mapping);
@@ -391,12 +389,13 @@
 	struct hwrm_queue_dscp_qcaps_input req = {0};
 	int rc;
 
+	bp->max_dscp_value = 0;
 	if (bp->hwrm_spec_code < 0x10800 || BNXT_VF(bp))
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP_QCAPS, -1, -1);
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
 		bp->max_dscp_value = (1 << resp->num_dscp_bits) - 1;
 		if (bp->max_dscp_value < 0x3f)
@@ -433,8 +432,6 @@
 	dscp2pri->pri = app->priority;
 	req.entry_cnt = cpu_to_le16(1);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
 	dma_free_coherent(&bp->pdev->dev, sizeof(*dscp2pri), dscp2pri,
 			  mapping);
 	return rc;
@@ -471,7 +468,10 @@
 	if (total_ets_bw > 100)
 		return -EINVAL;
 
-	*tc = max_tc + 1;
+	if (max_tc >= bp->max_tc)
+		*tc = bp->max_tc;
+	else
+		*tc = max_tc + 1;
 	return 0;
 }
 
@@ -719,6 +719,7 @@
 
 void bnxt_dcb_init(struct bnxt *bp)
 {
+	bp->dcbx_cap = 0;
 	if (bp->hwrm_spec_code < 0x10501)
 		return;
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
index 94e208e..156c240 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c

@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include "bnxt_hsi.h"
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "bnxt.h"
 #include "bnxt_debugfs.h"
 
@@ -21,7 +21,7 @@
 				char __user *buffer,
 				size_t count, loff_t *ppos)
 {
-	struct net_dim *dim = filep->private_data;
+	struct dim *dim = filep->private_data;
 	int len;
 	char *buf;
 
@@ -61,45 +61,30 @@
 	.read = debugfs_dim_read,
 };
 
-static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
-					    struct dentry *dd)
+static void debugfs_dim_ring_init(struct dim *dim, int ring_idx,
+				  struct dentry *dd)
 {
 	static char qname[16];
 
 	snprintf(qname, 10, "%d", ring_idx);
-	return debugfs_create_file(qname, 0600, dd,
-				   dim, &debugfs_dim_fops);
+	debugfs_create_file(qname, 0600, dd, dim, &debugfs_dim_fops);
 }
 
 void bnxt_debug_dev_init(struct bnxt *bp)
 {
 	const char *pname = pci_name(bp->pdev);
-	struct dentry *pdevf;
+	struct dentry *dir;
 	int i;
 
 	bp->debugfs_pdev = debugfs_create_dir(pname, bnxt_debug_mnt);
-	if (bp->debugfs_pdev) {
-		pdevf = debugfs_create_dir("dim", bp->debugfs_pdev);
-		if (!pdevf) {
-			pr_err("failed to create debugfs entry %s/dim\n",
-			       pname);
-			return;
-		}
-		bp->debugfs_dim = pdevf;
-		/* create files for each rx ring */
-		for (i = 0; i < bp->cp_nr_rings; i++) {
-			struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+	dir = debugfs_create_dir("dim", bp->debugfs_pdev);
 
-			if (cpr && bp->bnapi[i]->rx_ring) {
-				pdevf = debugfs_dim_ring_init(&cpr->dim, i,
-							      bp->debugfs_dim);
-				if (!pdevf)
-					pr_err("failed to create debugfs entry %s/dim/%d\n",
-					       pname, i);
-			}
-		}
-	} else {
-		pr_err("failed to create debugfs entry %s\n", pname);
+	/* create files for each rx ring */
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+
+		if (cpr && bp->bnapi[i]->rx_ring)
+			debugfs_dim_ring_init(&cpr->dim, i, dir);
 	}
 }
 
@@ -114,8 +99,6 @@
 void bnxt_debug_init(void)
 {
 	bnxt_debug_mnt = debugfs_create_dir("bnxt_en", NULL);
-	if (!bnxt_debug_mnt)
-		pr_err("failed to init bnxt_en debugfs\n");
 }
 
 void bnxt_debug_exit(void)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 790c684..7151244 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c

@@ -9,11 +9,193 @@
 
 #include <linux/pci.h>
 #include <linux/netdevice.h>
+#include <net/devlink.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_vfr.h"
 #include "bnxt_devlink.h"
 
+static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+				     struct devlink_fmsg *fmsg)
+{
+	struct bnxt *bp = devlink_health_reporter_priv(reporter);
+	struct bnxt_fw_health *health = bp->fw_health;
+	u32 val, health_status;
+	int rc;
+
+	if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+		return 0;
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+	health_status = val & 0xffff;
+
+	if (health_status < BNXT_FW_STATUS_HEALTHY) {
+		rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+						  "Not yet completed initialization");
+		if (rc)
+			return rc;
+	} else if (health_status > BNXT_FW_STATUS_HEALTHY) {
+		rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+						  "Encountered fatal error and cannot recover");
+		if (rc)
+			return rc;
+	}
+
+	if (val >> 16) {
+		rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16);
+		if (rc)
+			return rc;
+	}
+
+	val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+	rc = devlink_fmsg_u32_pair_put(fmsg, "Reset count", val);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
+	.name = "fw",
+	.diagnose = bnxt_fw_reporter_diagnose,
+};
+
+static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
+				 void *priv_ctx)
+{
+	struct bnxt *bp = devlink_health_reporter_priv(reporter);
+
+	if (!priv_ctx)
+		return -EOPNOTSUPP;
+
+	bnxt_fw_reset(bp);
+	return 0;
+}
+
+static const
+struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
+	.name = "fw_reset",
+	.recover = bnxt_fw_reset_recover,
+};
+
+static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
+				 void *priv_ctx)
+{
+	struct bnxt *bp = devlink_health_reporter_priv(reporter);
+	struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+	unsigned long event;
+
+	if (!priv_ctx)
+		return -EOPNOTSUPP;
+
+	event = fw_reporter_ctx->sp_event;
+	if (event == BNXT_FW_RESET_NOTIFY_SP_EVENT)
+		bnxt_fw_reset(bp);
+	else if (event == BNXT_FW_EXCEPTION_SP_EVENT)
+		bnxt_fw_exception(bp);
+
+	return 0;
+}
+
+static const
+struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
+	.name = "fw_fatal",
+	.recover = bnxt_fw_fatal_recover,
+};
+
+static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
+{
+	struct bnxt_fw_health *health = bp->fw_health;
+
+	if (!health)
+		return;
+
+	health->fw_reporter =
+		devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
+					       0, false, bp);
+	if (IS_ERR(health->fw_reporter)) {
+		netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
+			    PTR_ERR(health->fw_reporter));
+		health->fw_reporter = NULL;
+	}
+
+	health->fw_reset_reporter =
+		devlink_health_reporter_create(bp->dl,
+					       &bnxt_dl_fw_reset_reporter_ops,
+					       0, true, bp);
+	if (IS_ERR(health->fw_reset_reporter)) {
+		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+			    PTR_ERR(health->fw_reset_reporter));
+		health->fw_reset_reporter = NULL;
+	}
+
+	health->fw_fatal_reporter =
+		devlink_health_reporter_create(bp->dl,
+					       &bnxt_dl_fw_fatal_reporter_ops,
+					       0, true, bp);
+	if (IS_ERR(health->fw_fatal_reporter)) {
+		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
+			    PTR_ERR(health->fw_fatal_reporter));
+		health->fw_fatal_reporter = NULL;
+	}
+}
+
+static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
+{
+	struct bnxt_fw_health *health = bp->fw_health;
+
+	if (!health)
+		return;
+
+	if (health->fw_reporter)
+		devlink_health_reporter_destroy(health->fw_reporter);
+
+	if (health->fw_reset_reporter)
+		devlink_health_reporter_destroy(health->fw_reset_reporter);
+
+	if (health->fw_fatal_reporter)
+		devlink_health_reporter_destroy(health->fw_fatal_reporter);
+}
+
+void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	struct bnxt_fw_reporter_ctx fw_reporter_ctx;
+
+	if (!fw_health)
+		return;
+
+	fw_reporter_ctx.sp_event = event;
+	switch (event) {
+	case BNXT_FW_RESET_NOTIFY_SP_EVENT:
+		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
+			if (!fw_health->fw_fatal_reporter)
+				return;
+
+			devlink_health_report(fw_health->fw_fatal_reporter,
+					      "FW fatal async event received",
+					      &fw_reporter_ctx);
+			return;
+		}
+		if (!fw_health->fw_reset_reporter)
+			return;
+
+		devlink_health_report(fw_health->fw_reset_reporter,
+				      "FW non-fatal reset event received",
+				      &fw_reporter_ctx);
+		return;
+
+	case BNXT_FW_EXCEPTION_SP_EVENT:
+		if (!fw_health->fw_fatal_reporter)
+			return;
+
+		devlink_health_report(fw_health->fw_fatal_reporter,
+				      "FW fatal error reported",
+				      &fw_reporter_ctx);
+		return;
+	}
+}
+
 static const struct devlink_ops bnxt_dl_ops = {
 #ifdef CONFIG_BNXT_SRIOV
 	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
@@ -21,19 +203,75 @@
 #endif /* CONFIG_BNXT_SRIOV */
 };
 
+enum bnxt_dl_param_id {
+	BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
+};
+
 static const struct bnxt_dl_nvm_param nvm_params[] = {
 	{DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, NVM_OFF_ENABLE_SRIOV,
-	 BNXT_NVM_SHARED_CFG, 1},
+	 BNXT_NVM_SHARED_CFG, 1, 1},
+	{DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, NVM_OFF_IGNORE_ARI,
+	 BNXT_NVM_SHARED_CFG, 1, 1},
+	{DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+	 NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10, 4},
+	{DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+	 NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7, 4},
+	{BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK,
+	 BNXT_NVM_SHARED_CFG, 1, 1},
 };
 
+union bnxt_nvm_data {
+	u8	val8;
+	__le32	val32;
+};
+
+static void bnxt_copy_to_nvm_data(union bnxt_nvm_data *dst,
+				  union devlink_param_value *src,
+				  int nvm_num_bits, int dl_num_bytes)
+{
+	u32 val32 = 0;
+
+	if (nvm_num_bits == 1) {
+		dst->val8 = src->vbool;
+		return;
+	}
+	if (dl_num_bytes == 4)
+		val32 = src->vu32;
+	else if (dl_num_bytes == 2)
+		val32 = (u32)src->vu16;
+	else if (dl_num_bytes == 1)
+		val32 = (u32)src->vu8;
+	dst->val32 = cpu_to_le32(val32);
+}
+
+static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
+				    union bnxt_nvm_data *src,
+				    int nvm_num_bits, int dl_num_bytes)
+{
+	u32 val32;
+
+	if (nvm_num_bits == 1) {
+		dst->vbool = src->val8;
+		return;
+	}
+	val32 = le32_to_cpu(src->val32);
+	if (dl_num_bytes == 4)
+		dst->vu32 = val32;
+	else if (dl_num_bytes == 2)
+		dst->vu16 = (u16)val32;
+	else if (dl_num_bytes == 1)
+		dst->vu8 = (u8)val32;
+}
+
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
 			     int msg_len, union devlink_param_value *val)
 {
 	struct hwrm_nvm_get_variable_input *req = msg;
-	void *data_addr = NULL, *buf = NULL;
 	struct bnxt_dl_nvm_param nvm_param;
-	int bytesize, idx = 0, rc, i;
+	union bnxt_nvm_data *data;
 	dma_addr_t data_dma_addr;
+	int idx = 0, rc, i;
 
 	/* Get/Set NVM CFG parameter is supported only on PFs */
 	if (BNXT_VF(bp))
@@ -54,33 +292,34 @@
 	else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
 		idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
 
-	bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
-	if (nvm_param.num_bits == 1)
-		buf = &val->vbool;
-
-	data_addr = dma_zalloc_coherent(&bp->pdev->dev, bytesize,
-					&data_dma_addr, GFP_KERNEL);
-	if (!data_addr)
+	data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
+				  &data_dma_addr, GFP_KERNEL);
+	if (!data)
 		return -ENOMEM;
 
 	req->dest_data_addr = cpu_to_le64(data_dma_addr);
-	req->data_len = cpu_to_le16(nvm_param.num_bits);
+	req->data_len = cpu_to_le16(nvm_param.nvm_num_bits);
 	req->option_num = cpu_to_le16(nvm_param.offset);
 	req->index_0 = cpu_to_le16(idx);
 	if (idx)
 		req->dimensions = cpu_to_le16(1);
 
-	if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE))
-		memcpy(data_addr, buf, bytesize);
-
-	rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
-	if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE))
-		memcpy(buf, data_addr, bytesize);
-
-	dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
-	if (rc)
-		return -EIO;
-	return 0;
+	if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) {
+		bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits,
+				      nvm_param.dl_num_bytes);
+		rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
+	} else {
+		rc = hwrm_send_message_silent(bp, msg, msg_len,
+					      HWRM_CMD_TIMEOUT);
+		if (!rc)
+			bnxt_copy_from_nvm_data(val, data,
+						nvm_param.nvm_num_bits,
+						nvm_param.dl_num_bytes);
+	}
+	dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+	if (rc == -EACCES)
+		netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
+	return rc;
 }
 
 static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
@@ -88,9 +327,15 @@
 {
 	struct hwrm_nvm_get_variable_input req = {0};
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
-	return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+	rc = bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+	if (!rc)
+		if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
+			ctx->val.vbool = !ctx->val.vbool;
+
+	return rc;
 }
 
 static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id,
@@ -100,14 +345,58 @@
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_SET_VARIABLE, -1, -1);
+
+	if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
+		ctx->val.vbool = !ctx->val.vbool;
+
 	return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
 }
 
+static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
+				 union devlink_param_value val,
+				 struct netlink_ext_ack *extack)
+{
+	int max_val = -1;
+
+	if (id == DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX)
+		max_val = BNXT_MSIX_VEC_MAX;
+
+	if (id == DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN)
+		max_val = BNXT_MSIX_VEC_MIN_MAX;
+
+	if (val.vu32 > max_val) {
+		NL_SET_ERR_MSG_MOD(extack, "MSIX value is exceeding the range");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct devlink_param bnxt_dl_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV,
 			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
 			      bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
 			      NULL),
+	DEVLINK_PARAM_GENERIC(IGNORE_ARI,
+			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
+			      NULL),
+	DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX,
+			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
+			      bnxt_dl_msix_validate),
+	DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN,
+			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
+			      bnxt_dl_msix_validate),
+	DEVLINK_PARAM_DRIVER(BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
+			     "gre_ver_check", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			     bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
+			     NULL),
+};
+
+static const struct devlink_param bnxt_dl_port_params[] = {
 };
 
 int bnxt_dl_register(struct bnxt *bp)
@@ -147,8 +436,34 @@
 		goto err_dl_unreg;
 	}
 
+	devlink_port_attrs_set(&bp->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+			       bp->pf.port_id, false, 0,
+			       bp->switch_id, sizeof(bp->switch_id));
+	rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id);
+	if (rc) {
+		netdev_err(bp->dev, "devlink_port_register failed");
+		goto err_dl_param_unreg;
+	}
+	devlink_port_type_eth_set(&bp->dl_port, bp->dev);
+
+	rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
+					  ARRAY_SIZE(bnxt_dl_port_params));
+	if (rc) {
+		netdev_err(bp->dev, "devlink_port_params_register failed");
+		goto err_dl_port_unreg;
+	}
+
+	devlink_params_publish(dl);
+
+	bnxt_dl_fw_reporters_create(bp);
+
 	return 0;
 
+err_dl_port_unreg:
+	devlink_port_unregister(&bp->dl_port);
+err_dl_param_unreg:
+	devlink_params_unregister(dl, bnxt_dl_params,
+				  ARRAY_SIZE(bnxt_dl_params));
 err_dl_unreg:
 	devlink_unregister(dl);
 err_dl_free:
@@ -164,6 +479,10 @@
 	if (!dl)
 		return;
 
+	bnxt_dl_fw_reporters_destroy(bp);
+	devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+				       ARRAY_SIZE(bnxt_dl_port_params));
+	devlink_port_unregister(&bp->dl_port);
 	devlink_params_unregister(dl, bnxt_dl_params,
 				  ARRAY_SIZE(bnxt_dl_params));
 	devlink_unregister(dl);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index 2f68dc0..2f4fd0a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h

@@ -33,8 +33,15 @@
 	}
 }
 
+#define NVM_OFF_MSIX_VEC_PER_PF_MAX	108
+#define NVM_OFF_MSIX_VEC_PER_PF_MIN	114
+#define NVM_OFF_IGNORE_ARI		164
+#define NVM_OFF_DIS_GRE_VER_CHECK	171
 #define NVM_OFF_ENABLE_SRIOV		401
 
+#define BNXT_MSIX_VEC_MAX	1280
+#define BNXT_MSIX_VEC_MIN_MAX	128
+
 enum bnxt_nvm_dir_type {
 	BNXT_NVM_SHARED_CFG = 40,
 	BNXT_NVM_PORT_CFG,
@@ -45,9 +52,11 @@
 	u16 id;
 	u16 offset;
 	u16 dir_type;
-	u16 num_bits;
+	u16 nvm_num_bits;
+	u8 dl_num_bytes;
 };
 
+void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
 int bnxt_dl_register(struct bnxt *bp);
 void bnxt_dl_unregister(struct bnxt *bp);
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
index afa97c8..6f6576d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c

@@ -7,26 +7,25 @@
  * the Free Software Foundation.
  */
 
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 
 void bnxt_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim,
-					   work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bnxt_cp_ring_info *cpr = container_of(dim,
 						     struct bnxt_cp_ring_info,
 						     dim);
 	struct bnxt_napi *bnapi = container_of(cpr,
 					       struct bnxt_napi,
 					       cp_ring);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
 	cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
 
 	bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index da9b876..51c1404 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

@@ -137,7 +137,44 @@
 	return rc;
 }
 
-#define BNXT_NUM_STATS	21
+static const char * const bnxt_ring_stats_str[] = {
+	"rx_ucast_packets",
+	"rx_mcast_packets",
+	"rx_bcast_packets",
+	"rx_discards",
+	"rx_drops",
+	"rx_ucast_bytes",
+	"rx_mcast_bytes",
+	"rx_bcast_bytes",
+	"tx_ucast_packets",
+	"tx_mcast_packets",
+	"tx_bcast_packets",
+	"tx_discards",
+	"tx_drops",
+	"tx_ucast_bytes",
+	"tx_mcast_bytes",
+	"tx_bcast_bytes",
+};
+
+static const char * const bnxt_ring_tpa_stats_str[] = {
+	"tpa_packets",
+	"tpa_bytes",
+	"tpa_events",
+	"tpa_aborts",
+};
+
+static const char * const bnxt_ring_tpa2_stats_str[] = {
+	"rx_tpa_eligible_pkt",
+	"rx_tpa_eligible_bytes",
+	"rx_tpa_pkt",
+	"rx_tpa_bytes",
+	"rx_tpa_errors",
+};
+
+static const char * const bnxt_ring_sw_stats_str[] = {
+	"rx_l4_csum_errors",
+	"missed_irqs",
+};
 
 #define BNXT_RX_STATS_ENTRY(counter)	\
 	{ BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
@@ -148,6 +185,110 @@
 #define BNXT_RX_STATS_EXT_ENTRY(counter)	\
 	{ BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) }
 
+#define BNXT_TX_STATS_EXT_ENTRY(counter)	\
+	{ BNXT_TX_STATS_EXT_OFFSET(counter), __stringify(counter) }
+
+#define BNXT_RX_STATS_EXT_PFC_ENTRY(n)				\
+	BNXT_RX_STATS_EXT_ENTRY(pfc_pri##n##_rx_duration_us),	\
+	BNXT_RX_STATS_EXT_ENTRY(pfc_pri##n##_rx_transitions)
+
+#define BNXT_TX_STATS_EXT_PFC_ENTRY(n)				\
+	BNXT_TX_STATS_EXT_ENTRY(pfc_pri##n##_tx_duration_us),	\
+	BNXT_TX_STATS_EXT_ENTRY(pfc_pri##n##_tx_transitions)
+
+#define BNXT_RX_STATS_EXT_PFC_ENTRIES				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(0),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(1),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(2),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(3),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(4),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(5),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(6),				\
+	BNXT_RX_STATS_EXT_PFC_ENTRY(7)
+
+#define BNXT_TX_STATS_EXT_PFC_ENTRIES				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(0),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(1),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(2),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(3),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(4),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(5),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(6),				\
+	BNXT_TX_STATS_EXT_PFC_ENTRY(7)
+
+#define BNXT_RX_STATS_EXT_COS_ENTRY(n)				\
+	BNXT_RX_STATS_EXT_ENTRY(rx_bytes_cos##n),		\
+	BNXT_RX_STATS_EXT_ENTRY(rx_packets_cos##n)
+
+#define BNXT_TX_STATS_EXT_COS_ENTRY(n)				\
+	BNXT_TX_STATS_EXT_ENTRY(tx_bytes_cos##n),		\
+	BNXT_TX_STATS_EXT_ENTRY(tx_packets_cos##n)
+
+#define BNXT_RX_STATS_EXT_COS_ENTRIES				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(0),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(1),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(2),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(3),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(4),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(5),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(6),				\
+	BNXT_RX_STATS_EXT_COS_ENTRY(7)				\
+
+#define BNXT_TX_STATS_EXT_COS_ENTRIES				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(0),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(1),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(2),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(3),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(4),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(5),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(6),				\
+	BNXT_TX_STATS_EXT_COS_ENTRY(7)				\
+
+#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n)			\
+	BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n),	\
+	BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n)
+
+#define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6),				\
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7)
+
+#define BNXT_RX_STATS_PRI_ENTRY(counter, n)		\
+	{ BNXT_RX_STATS_EXT_OFFSET(counter##_cos0),	\
+	  __stringify(counter##_pri##n) }
+
+#define BNXT_TX_STATS_PRI_ENTRY(counter, n)		\
+	{ BNXT_TX_STATS_EXT_OFFSET(counter##_cos0),	\
+	  __stringify(counter##_pri##n) }
+
+#define BNXT_RX_STATS_PRI_ENTRIES(counter)		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 0),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 1),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 2),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 3),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 4),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 5),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 6),		\
+	BNXT_RX_STATS_PRI_ENTRY(counter, 7)
+
+#define BNXT_TX_STATS_PRI_ENTRIES(counter)		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 0),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 1),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 2),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 3),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 4),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 5),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 6),		\
+	BNXT_TX_STATS_PRI_ENTRY(counter, 7)
+
+#define BNXT_PCIE_STATS_ENTRY(counter)	\
+	{ BNXT_PCIE_STATS_OFFSET(counter), __stringify(counter) }
+
 enum {
 	RX_TOTAL_DISCARDS,
 	TX_TOTAL_DISCARDS,
@@ -256,23 +397,116 @@
 	BNXT_RX_STATS_EXT_ENTRY(resume_pause_events),
 	BNXT_RX_STATS_EXT_ENTRY(continuous_roce_pause_events),
 	BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events),
+	BNXT_RX_STATS_EXT_COS_ENTRIES,
+	BNXT_RX_STATS_EXT_PFC_ENTRIES,
+	BNXT_RX_STATS_EXT_ENTRY(rx_bits),
+	BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold),
+	BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err),
+	BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits),
+	BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES,
+};
+
+static const struct {
+	long offset;
+	char string[ETH_GSTRING_LEN];
+} bnxt_tx_port_stats_ext_arr[] = {
+	BNXT_TX_STATS_EXT_COS_ENTRIES,
+	BNXT_TX_STATS_EXT_PFC_ENTRIES,
+};
+
+static const struct {
+	long base_off;
+	char string[ETH_GSTRING_LEN];
+} bnxt_rx_bytes_pri_arr[] = {
+	BNXT_RX_STATS_PRI_ENTRIES(rx_bytes),
+};
+
+static const struct {
+	long base_off;
+	char string[ETH_GSTRING_LEN];
+} bnxt_rx_pkts_pri_arr[] = {
+	BNXT_RX_STATS_PRI_ENTRIES(rx_packets),
+};
+
+static const struct {
+	long base_off;
+	char string[ETH_GSTRING_LEN];
+} bnxt_tx_bytes_pri_arr[] = {
+	BNXT_TX_STATS_PRI_ENTRIES(tx_bytes),
+};
+
+static const struct {
+	long base_off;
+	char string[ETH_GSTRING_LEN];
+} bnxt_tx_pkts_pri_arr[] = {
+	BNXT_TX_STATS_PRI_ENTRIES(tx_packets),
+};
+
+static const struct {
+	long offset;
+	char string[ETH_GSTRING_LEN];
+} bnxt_pcie_stats_arr[] = {
+	BNXT_PCIE_STATS_ENTRY(pcie_pl_signal_integrity),
+	BNXT_PCIE_STATS_ENTRY(pcie_dl_signal_integrity),
+	BNXT_PCIE_STATS_ENTRY(pcie_tl_signal_integrity),
+	BNXT_PCIE_STATS_ENTRY(pcie_link_integrity),
+	BNXT_PCIE_STATS_ENTRY(pcie_tx_traffic_rate),
+	BNXT_PCIE_STATS_ENTRY(pcie_rx_traffic_rate),
+	BNXT_PCIE_STATS_ENTRY(pcie_tx_dllp_statistics),
+	BNXT_PCIE_STATS_ENTRY(pcie_rx_dllp_statistics),
+	BNXT_PCIE_STATS_ENTRY(pcie_equalization_time),
+	BNXT_PCIE_STATS_ENTRY(pcie_ltssm_histogram[0]),
+	BNXT_PCIE_STATS_ENTRY(pcie_ltssm_histogram[2]),
+	BNXT_PCIE_STATS_ENTRY(pcie_recovery_histogram),
 };
 
 #define BNXT_NUM_SW_FUNC_STATS	ARRAY_SIZE(bnxt_sw_func_stats)
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
-#define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr)
+#define BNXT_NUM_STATS_PRI			\
+	(ARRAY_SIZE(bnxt_rx_bytes_pri_arr) +	\
+	 ARRAY_SIZE(bnxt_rx_pkts_pri_arr) +	\
+	 ARRAY_SIZE(bnxt_tx_bytes_pri_arr) +	\
+	 ARRAY_SIZE(bnxt_tx_pkts_pri_arr))
+#define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr)
+
+static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
+{
+	if (BNXT_SUPPORTS_TPA(bp)) {
+		if (bp->max_tpa_v2)
+			return ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
+		return ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+	}
+	return 0;
+}
+
+static int bnxt_get_num_ring_stats(struct bnxt *bp)
+{
+	int num_stats;
+
+	num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
+		    ARRAY_SIZE(bnxt_ring_sw_stats_str) +
+		    bnxt_get_num_tpa_ring_stats(bp);
+	return num_stats * bp->cp_nr_rings;
+}
 
 static int bnxt_get_num_stats(struct bnxt *bp)
 {
-	int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+	int num_stats = bnxt_get_num_ring_stats(bp);
 
 	num_stats += BNXT_NUM_SW_FUNC_STATS;
 
 	if (bp->flags & BNXT_FLAG_PORT_STATS)
 		num_stats += BNXT_NUM_PORT_STATS;
 
-	if (bp->flags & BNXT_FLAG_PORT_STATS_EXT)
-		num_stats += BNXT_NUM_PORT_STATS_EXT;
+	if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
+		num_stats += bp->fw_rx_stats_ext_size +
+			     bp->fw_tx_stats_ext_size;
+		if (bp->pri2cos_valid)
+			num_stats += BNXT_NUM_STATS_PRI;
+	}
+
+	if (bp->flags & BNXT_FLAG_PCIE_STATS)
+		num_stats += BNXT_NUM_PCIE_STATS;
 
 	return num_stats;
 }
@@ -298,10 +532,13 @@
 {
 	u32 i, j = 0;
 	struct bnxt *bp = netdev_priv(dev);
-	u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
+	u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) +
+			  bnxt_get_num_tpa_ring_stats(bp);
 
-	if (!bp->bnapi)
-		return;
+	if (!bp->bnapi) {
+		j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS;
+		goto skip_ring_stats;
+	}
 
 	for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
 		bnxt_sw_func_stats[i].counter = 0;
@@ -315,6 +552,7 @@
 		for (k = 0; k < stat_fields; j++, k++)
 			buf[j] = le64_to_cpu(hw_stats[k]);
 		buf[j++] = cpr->rx_l4_csum_errors;
+		buf[j++] = cpr->missed_irqs;
 
 		bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
 			le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
@@ -325,6 +563,7 @@
 	for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
 		buf[j] = bnxt_sw_func_stats[i].counter;
 
+skip_ring_stats:
 	if (bp->flags & BNXT_FLAG_PORT_STATS) {
 		__le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
 
@@ -334,66 +573,90 @@
 		}
 	}
 	if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
-		__le64 *port_stats_ext = (__le64 *)bp->hw_rx_port_stats_ext;
+		__le64 *rx_port_stats_ext = (__le64 *)bp->hw_rx_port_stats_ext;
+		__le64 *tx_port_stats_ext = (__le64 *)bp->hw_tx_port_stats_ext;
 
-		for (i = 0; i < BNXT_NUM_PORT_STATS_EXT; i++, j++) {
-			buf[j] = le64_to_cpu(*(port_stats_ext +
+		for (i = 0; i < bp->fw_rx_stats_ext_size; i++, j++) {
+			buf[j] = le64_to_cpu(*(rx_port_stats_ext +
 					    bnxt_port_stats_ext_arr[i].offset));
 		}
+		for (i = 0; i < bp->fw_tx_stats_ext_size; i++, j++) {
+			buf[j] = le64_to_cpu(*(tx_port_stats_ext +
+					bnxt_tx_port_stats_ext_arr[i].offset));
+		}
+		if (bp->pri2cos_valid) {
+			for (i = 0; i < 8; i++, j++) {
+				long n = bnxt_rx_bytes_pri_arr[i].base_off +
+					 bp->pri2cos[i];
+
+				buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
+			}
+			for (i = 0; i < 8; i++, j++) {
+				long n = bnxt_rx_pkts_pri_arr[i].base_off +
+					 bp->pri2cos[i];
+
+				buf[j] = le64_to_cpu(*(rx_port_stats_ext + n));
+			}
+			for (i = 0; i < 8; i++, j++) {
+				long n = bnxt_tx_bytes_pri_arr[i].base_off +
+					 bp->pri2cos[i];
+
+				buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
+			}
+			for (i = 0; i < 8; i++, j++) {
+				long n = bnxt_tx_pkts_pri_arr[i].base_off +
+					 bp->pri2cos[i];
+
+				buf[j] = le64_to_cpu(*(tx_port_stats_ext + n));
+			}
+		}
+	}
+	if (bp->flags & BNXT_FLAG_PCIE_STATS) {
+		__le64 *pcie_stats = (__le64 *)bp->hw_pcie_stats;
+
+		for (i = 0; i < BNXT_NUM_PCIE_STATS; i++, j++) {
+			buf[j] = le64_to_cpu(*(pcie_stats +
+					       bnxt_pcie_stats_arr[i].offset));
+		}
 	}
 }
 
 static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	u32 i;
+	static const char * const *str;
+	u32 i, j, num_str;
 
 	switch (stringset) {
-	/* The number of strings must match BNXT_NUM_STATS defined above. */
 	case ETH_SS_STATS:
 		for (i = 0; i < bp->cp_nr_rings; i++) {
-			sprintf(buf, "[%d]: rx_ucast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_mcast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_bcast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_discards", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_drops", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_ucast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_mcast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_bcast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_ucast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_mcast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_bcast_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_discards", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_drops", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_ucast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_mcast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tx_bcast_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tpa_packets", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tpa_bytes", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tpa_events", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: tpa_aborts", i);
-			buf += ETH_GSTRING_LEN;
-			sprintf(buf, "[%d]: rx_l4_csum_errors", i);
-			buf += ETH_GSTRING_LEN;
+			num_str = ARRAY_SIZE(bnxt_ring_stats_str);
+			for (j = 0; j < num_str; j++) {
+				sprintf(buf, "[%d]: %s", i,
+					bnxt_ring_stats_str[j]);
+				buf += ETH_GSTRING_LEN;
+			}
+			if (!BNXT_SUPPORTS_TPA(bp))
+				goto skip_tpa_stats;
+
+			if (bp->max_tpa_v2) {
+				num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
+				str = bnxt_ring_tpa2_stats_str;
+			} else {
+				num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+				str = bnxt_ring_tpa_stats_str;
+			}
+			for (j = 0; j < num_str; j++) {
+				sprintf(buf, "[%d]: %s", i, str[j]);
+				buf += ETH_GSTRING_LEN;
+			}
+skip_tpa_stats:
+			num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str);
+			for (j = 0; j < num_str; j++) {
+				sprintf(buf, "[%d]: %s", i,
+					bnxt_ring_sw_stats_str[j]);
+				buf += ETH_GSTRING_LEN;
+			}
 		}
 		for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
 			strcpy(buf, bnxt_sw_func_stats[i].string);
@@ -407,10 +670,43 @@
 			}
 		}
 		if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
-			for (i = 0; i < BNXT_NUM_PORT_STATS_EXT; i++) {
+			for (i = 0; i < bp->fw_rx_stats_ext_size; i++) {
 				strcpy(buf, bnxt_port_stats_ext_arr[i].string);
 				buf += ETH_GSTRING_LEN;
 			}
+			for (i = 0; i < bp->fw_tx_stats_ext_size; i++) {
+				strcpy(buf,
+				       bnxt_tx_port_stats_ext_arr[i].string);
+				buf += ETH_GSTRING_LEN;
+			}
+			if (bp->pri2cos_valid) {
+				for (i = 0; i < 8; i++) {
+					strcpy(buf,
+					       bnxt_rx_bytes_pri_arr[i].string);
+					buf += ETH_GSTRING_LEN;
+				}
+				for (i = 0; i < 8; i++) {
+					strcpy(buf,
+					       bnxt_rx_pkts_pri_arr[i].string);
+					buf += ETH_GSTRING_LEN;
+				}
+				for (i = 0; i < 8; i++) {
+					strcpy(buf,
+					       bnxt_tx_bytes_pri_arr[i].string);
+					buf += ETH_GSTRING_LEN;
+				}
+				for (i = 0; i < 8; i++) {
+					strcpy(buf,
+					       bnxt_tx_pkts_pri_arr[i].string);
+					buf += ETH_GSTRING_LEN;
+				}
+			}
+		}
+		if (bp->flags & BNXT_FLAG_PCIE_STATS) {
+			for (i = 0; i < BNXT_NUM_PCIE_STATS; i++) {
+				strcpy(buf, bnxt_pcie_stats_arr[i].string);
+				buf += ETH_GSTRING_LEN;
+			}
 		}
 		break;
 	case ETH_SS_TEST:
@@ -581,8 +877,6 @@
 	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
 			       bp->tx_nr_rings + bp->rx_nr_rings;
 
-	bp->num_stat_ctxs = bp->cp_nr_rings;
-
 	/* After changing number of rx channels, update NTUPLE feature. */
 	netdev_update_features(dev);
 	if (netif_running(dev)) {
@@ -593,7 +887,7 @@
 			 */
 		}
 	} else {
-		rc = bnxt_reserve_rings(bp);
+		rc = bnxt_reserve_rings(bp, true);
 	}
 
 	return rc;
@@ -1405,6 +1699,11 @@
 	return bp->link_info.link_up;
 }
 
+static void bnxt_print_admin_err(struct bnxt *bp)
+{
+	netdev_info(bp->dev, "PF does not have admin privileges to flash or reset the device\n");
+}
+
 static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
 				u16 ext, u16 *index, u32 *item_length,
 				u32 *data_length);
@@ -1444,14 +1743,17 @@
 	rc = hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT);
 	dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle);
 
+	if (rc == -EACCES)
+		bnxt_print_admin_err(bp);
 	return rc;
 }
 
 static int bnxt_firmware_reset(struct net_device *dev,
 			       u16 dir_type)
 {
-	struct bnxt *bp = netdev_priv(dev);
 	struct hwrm_fw_reset_input req = {0};
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
 
@@ -1491,7 +1793,10 @@
 		return -EINVAL;
 	}
 
-	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc == -EACCES)
+		bnxt_print_admin_err(bp);
+	return rc;
 }
 
 static int bnxt_flash_firmware(struct net_device *dev,
@@ -1698,9 +2003,9 @@
 	struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_nvm_install_update_input install = {0};
 	const struct firmware *fw;
+	int rc, hwrm_err = 0;
 	u32 item_len;
 	u16 index;
-	int rc;
 
 	bnxt_hwrm_fw_set_time(bp);
 
@@ -1743,15 +2048,16 @@
 			memcpy(kmem, fw->data, fw->size);
 			modify.host_src_addr = cpu_to_le64(dma_handle);
 
-			rc = hwrm_send_message(bp, &modify, sizeof(modify),
-					       FLASH_PACKAGE_TIMEOUT);
+			hwrm_err = hwrm_send_message(bp, &modify,
+						     sizeof(modify),
+						     FLASH_PACKAGE_TIMEOUT);
 			dma_free_coherent(&bp->pdev->dev, fw->size, kmem,
 					  dma_handle);
 		}
 	}
 	release_firmware(fw);
-	if (rc)
-		return rc;
+	if (rc || hwrm_err)
+		goto err_exit;
 
 	if ((install_type & 0xffff) == 0)
 		install_type >>= 16;
@@ -1759,26 +2065,21 @@
 	install.install_type = cpu_to_le32(install_type);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &install, sizeof(install),
-				INSTALL_PACKAGE_TIMEOUT);
-	if (rc) {
-		rc = -EOPNOTSUPP;
-		goto flash_pkg_exit;
-	}
-
-	if (resp->error_code) {
+	hwrm_err = _hwrm_send_message(bp, &install, sizeof(install),
+				      INSTALL_PACKAGE_TIMEOUT);
+	if (hwrm_err) {
 		u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
 
-		if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
+		if (resp->error_code && error_code ==
+		    NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
 			install.flags |= cpu_to_le16(
 			       NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
-			rc = _hwrm_send_message(bp, &install, sizeof(install),
-						INSTALL_PACKAGE_TIMEOUT);
-			if (rc) {
-				rc = -EOPNOTSUPP;
-				goto flash_pkg_exit;
-			}
+			hwrm_err = _hwrm_send_message(bp, &install,
+						      sizeof(install),
+						      INSTALL_PACKAGE_TIMEOUT);
 		}
+		if (hwrm_err)
+			goto flash_pkg_exit;
 	}
 
 	if (resp->result) {
@@ -1788,6 +2089,9 @@
 	}
 flash_pkg_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
+err_exit:
+	if (hwrm_err == -EACCES)
+		bnxt_print_admin_err(bp);
 	return rc;
 }
 
@@ -2326,8 +2630,6 @@
 		led_cfg->led_group_id = bp->leds[i].led_group_id;
 	}
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -2368,17 +2670,37 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
+static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds)
+{
+	struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_port_phy_qcaps_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*force_speeds = le16_to_cpu(resp->supported_speeds_force_mode);
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
 				    struct hwrm_port_phy_cfg_input *req)
 {
 	struct bnxt_link_info *link_info = &bp->link_info;
-	u16 fw_advertising = link_info->advertising;
+	u16 fw_advertising;
 	u16 fw_speed;
 	int rc;
 
 	if (!link_info->autoneg)
 		return 0;
 
+	rc = bnxt_query_force_speeds(bp, &fw_advertising);
+	if (rc)
+		return rc;
+
 	fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB;
 	if (netif_carrier_ok(bp->dev))
 		fw_speed = bp->link_info.link_speed;
@@ -2419,11 +2741,11 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
-static int bnxt_rx_loopback(struct bnxt *bp, struct bnxt_napi *bnapi,
+static int bnxt_rx_loopback(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 			    u32 raw_cons, int pkt_size)
 {
-	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
-	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	struct bnxt_napi *bnapi = cpr->bnapi;
+	struct bnxt_rx_ring_info *rxr;
 	struct bnxt_sw_rx_bd *rx_buf;
 	struct rx_cmp *rxcmp;
 	u16 cp_cons, cons;
@@ -2431,6 +2753,7 @@
 	u32 len;
 	int i;
 
+	rxr = bnapi->rx_ring;
 	cp_cons = RING_CMP(raw_cons);
 	rxcmp = (struct rx_cmp *)
 		&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
@@ -2451,17 +2774,15 @@
 	return 0;
 }
 
-static int bnxt_poll_loopback(struct bnxt *bp, int pkt_size)
+static int bnxt_poll_loopback(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+			      int pkt_size)
 {
-	struct bnxt_napi *bnapi = bp->bnapi[0];
-	struct bnxt_cp_ring_info *cpr;
 	struct tx_cmp *txcmp;
 	int rc = -EIO;
 	u32 raw_cons;
 	u32 cons;
 	int i;
 
-	cpr = &bnapi->cp_ring;
 	raw_cons = cpr->cp_raw_cons;
 	for (i = 0; i < 200; i++) {
 		cons = RING_CMP(raw_cons);
@@ -2477,7 +2798,7 @@
 		 */
 		dma_rmb();
 		if (TX_CMP_TYPE(txcmp) == CMP_TYPE_RX_L2_CMP) {
-			rc = bnxt_rx_loopback(bp, bnapi, raw_cons, pkt_size);
+			rc = bnxt_rx_loopback(bp, cpr, raw_cons, pkt_size);
 			raw_cons = NEXT_RAW_CMP(raw_cons);
 			raw_cons = NEXT_RAW_CMP(raw_cons);
 			break;
@@ -2491,12 +2812,17 @@
 static int bnxt_run_loopback(struct bnxt *bp)
 {
 	struct bnxt_tx_ring_info *txr = &bp->tx_ring[0];
+	struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
+	struct bnxt_cp_ring_info *cpr;
 	int pkt_size, i = 0;
 	struct sk_buff *skb;
 	dma_addr_t map;
 	u8 *data;
 	int rc;
 
+	cpr = &rxr->bnapi->cp_ring;
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		cpr = cpr->cp_ring_arr[BNXT_RX_HDL];
 	pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh);
 	skb = netdev_alloc_skb(bp->dev, pkt_size);
 	if (!skb)
@@ -2515,13 +2841,13 @@
 		dev_kfree_skb(skb);
 		return -EIO;
 	}
-	bnxt_xmit_xdp(bp, txr, map, pkt_size, 0);
+	bnxt_xmit_bd(bp, txr, map, pkt_size);
 
 	/* Sync BD data before updating doorbell */
 	wmb();
 
-	bnxt_db_write(bp, txr->tx_doorbell, DB_KEY_TX | txr->tx_prod);
-	rc = bnxt_poll_loopback(bp, pkt_size);
+	bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
+	rc = bnxt_poll_loopback(bp, cpr, pkt_size);
 
 	dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
 	dev_kfree_skb(skb);
@@ -2558,7 +2884,7 @@
 	bool offline = false;
 	u8 test_results = 0;
 	u8 test_mask = 0;
-	int rc, i;
+	int rc = 0, i;
 
 	if (!bp->num_tests || !BNXT_SINGLE_PF(bp))
 		return;
@@ -2629,9 +2955,9 @@
 		}
 		bnxt_hwrm_phy_loopback(bp, false, false);
 		bnxt_half_close_nic(bp);
-		bnxt_open_nic(bp, false, true);
+		rc = bnxt_open_nic(bp, false, true);
 	}
-	if (bnxt_test_irq(bp)) {
+	if (rc || bnxt_test_irq(bp)) {
 		buf[BNXT_IRQ_TEST_IDX] = 1;
 		etest->flags |= ETH_TEST_FL_FAILED;
 	}
@@ -2786,7 +3112,7 @@
 	req.component_id = cpu_to_le16(component_id);
 	req.segment_id = cpu_to_le16(segment_id);
 
-	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_COREDUMP_TIMEOUT);
 }
 
 static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
@@ -3021,8 +3347,10 @@
 	struct net_device *dev = bp->dev;
 	int i, rc;
 
-	bnxt_get_pkgver(dev);
+	if (!(bp->fw_cap & BNXT_FW_CAP_PKG_VER))
+		bnxt_get_pkgver(dev);
 
+	bp->num_tests = 0;
 	if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp))
 		return;
 
@@ -3032,7 +3360,9 @@
 	if (rc)
 		goto ethtool_init_exit;
 
-	test_info = kzalloc(sizeof(*bp->test_info), GFP_KERNEL);
+	test_info = bp->test_info;
+	if (!test_info)
+		test_info = kzalloc(sizeof(*bp->test_info), GFP_KERNEL);
 	if (!test_info)
 		goto ethtool_init_exit;
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 971ace5..03b197e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h

@@ -1,7 +1,8 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2018 Broadcom Limited
+ * Copyright (c) 2014-2018 Broadcom Limited
+ * Copyright (c) 2018-2019 Broadcom Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -37,15 +38,18 @@
 #define TLV_TYPE_HWRM_REQUEST                    0x1UL
 #define TLV_TYPE_HWRM_RESPONSE                   0x2UL
 #define TLV_TYPE_ROCE_SP_COMMAND                 0x3UL
-#define TLV_TYPE_ENGINE_CKV_DEVICE_SERIAL_NUMBER 0x8001UL
-#define TLV_TYPE_ENGINE_CKV_NONCE                0x8002UL
+#define TLV_TYPE_QUERY_ROCE_CC_GEN1              0x4UL
+#define TLV_TYPE_MODIFY_ROCE_CC_GEN1             0x5UL
+#define TLV_TYPE_ENGINE_CKV_ALIAS_ECC_PUBLIC_KEY 0x8001UL
 #define TLV_TYPE_ENGINE_CKV_IV                   0x8003UL
 #define TLV_TYPE_ENGINE_CKV_AUTH_TAG             0x8004UL
 #define TLV_TYPE_ENGINE_CKV_CIPHERTEXT           0x8005UL
-#define TLV_TYPE_ENGINE_CKV_ALGORITHMS           0x8006UL
-#define TLV_TYPE_ENGINE_CKV_ECC_PUBLIC_KEY       0x8007UL
+#define TLV_TYPE_ENGINE_CKV_HOST_ALGORITHMS      0x8006UL
+#define TLV_TYPE_ENGINE_CKV_HOST_ECC_PUBLIC_KEY  0x8007UL
 #define TLV_TYPE_ENGINE_CKV_ECDSA_SIGNATURE      0x8008UL
-#define TLV_TYPE_LAST                           TLV_TYPE_ENGINE_CKV_ECDSA_SIGNATURE
+#define TLV_TYPE_ENGINE_CKV_FW_ECC_PUBLIC_KEY    0x8009UL
+#define TLV_TYPE_ENGINE_CKV_FW_ALGORITHMS        0x800aUL
+#define TLV_TYPE_LAST                           TLV_TYPE_ENGINE_CKV_FW_ALGORITHMS
 
 
 /* tlv (size:64b/8B) */
@@ -87,7 +91,10 @@
 	__le16	signature;
 	#define SHORT_REQ_SIGNATURE_SHORT_CMD 0x4321UL
 	#define SHORT_REQ_SIGNATURE_LAST     SHORT_REQ_SIGNATURE_SHORT_CMD
-	__le16	unused_0;
+	__le16	target_id;
+	#define SHORT_REQ_TARGET_ID_DEFAULT 0x0UL
+	#define SHORT_REQ_TARGET_ID_TOOLS   0xfffdUL
+	#define SHORT_REQ_TARGET_ID_LAST   SHORT_REQ_TARGET_ID_TOOLS
 	__le16	size;
 	__le64	req_addr;
 };
@@ -96,6 +103,7 @@
 struct cmd_nums {
 	__le16	req_type;
 	#define HWRM_VER_GET                              0x0UL
+	#define HWRM_ERROR_RECOVERY_QCFG                  0xcUL
 	#define HWRM_FUNC_DRV_IF_CHANGE                   0xdUL
 	#define HWRM_FUNC_BUF_UNRGTR                      0xeUL
 	#define HWRM_FUNC_VF_CFG                          0xfUL
@@ -186,15 +194,24 @@
 	#define HWRM_TUNNEL_DST_PORT_QUERY                0xa0UL
 	#define HWRM_TUNNEL_DST_PORT_ALLOC                0xa1UL
 	#define HWRM_TUNNEL_DST_PORT_FREE                 0xa2UL
+	#define HWRM_STAT_CTX_ENG_QUERY                   0xafUL
 	#define HWRM_STAT_CTX_ALLOC                       0xb0UL
 	#define HWRM_STAT_CTX_FREE                        0xb1UL
 	#define HWRM_STAT_CTX_QUERY                       0xb2UL
 	#define HWRM_STAT_CTX_CLR_STATS                   0xb3UL
 	#define HWRM_PORT_QSTATS_EXT                      0xb4UL
+	#define HWRM_PORT_PHY_MDIO_WRITE                  0xb5UL
+	#define HWRM_PORT_PHY_MDIO_READ                   0xb6UL
+	#define HWRM_PORT_PHY_MDIO_BUS_ACQUIRE            0xb7UL
+	#define HWRM_PORT_PHY_MDIO_BUS_RELEASE            0xb8UL
 	#define HWRM_FW_RESET                             0xc0UL
 	#define HWRM_FW_QSTATUS                           0xc1UL
 	#define HWRM_FW_HEALTH_CHECK                      0xc2UL
 	#define HWRM_FW_SYNC                              0xc3UL
+	#define HWRM_FW_STATE_BUFFER_QCAPS                0xc4UL
+	#define HWRM_FW_STATE_QUIESCE                     0xc5UL
+	#define HWRM_FW_STATE_BACKUP                      0xc6UL
+	#define HWRM_FW_STATE_RESTORE                     0xc7UL
 	#define HWRM_FW_SET_TIME                          0xc8UL
 	#define HWRM_FW_GET_TIME                          0xc9UL
 	#define HWRM_FW_SET_STRUCTURED_DATA               0xcaUL
@@ -205,16 +222,22 @@
 	#define HWRM_FWD_RESP                             0xd2UL
 	#define HWRM_FWD_ASYNC_EVENT_CMPL                 0xd3UL
 	#define HWRM_OEM_CMD                              0xd4UL
+	#define HWRM_PORT_PRBS_TEST                       0xd5UL
+	#define HWRM_PORT_SFP_SIDEBAND_CFG                0xd6UL
+	#define HWRM_PORT_SFP_SIDEBAND_QCFG               0xd7UL
 	#define HWRM_TEMP_MONITOR_QUERY                   0xe0UL
+	#define HWRM_REG_POWER_QUERY                      0xe1UL
 	#define HWRM_WOL_FILTER_ALLOC                     0xf0UL
 	#define HWRM_WOL_FILTER_FREE                      0xf1UL
 	#define HWRM_WOL_FILTER_QCFG                      0xf2UL
 	#define HWRM_WOL_REASON_QCFG                      0xf3UL
+	#define HWRM_CFA_METER_QCAPS                      0xf4UL
 	#define HWRM_CFA_METER_PROFILE_ALLOC              0xf5UL
 	#define HWRM_CFA_METER_PROFILE_FREE               0xf6UL
 	#define HWRM_CFA_METER_PROFILE_CFG                0xf7UL
 	#define HWRM_CFA_METER_INSTANCE_ALLOC             0xf8UL
 	#define HWRM_CFA_METER_INSTANCE_FREE              0xf9UL
+	#define HWRM_CFA_METER_INSTANCE_CFG               0xfaUL
 	#define HWRM_CFA_VFR_ALLOC                        0xfdUL
 	#define HWRM_CFA_VFR_FREE                         0xfeUL
 	#define HWRM_CFA_VF_PAIR_ALLOC                    0x100UL
@@ -235,7 +258,26 @@
 	#define HWRM_CFA_PAIR_INFO                        0x10fUL
 	#define HWRM_FW_IPC_MSG                           0x110UL
 	#define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO        0x111UL
-	#define HWRM_ENGINE_CKV_HELLO                     0x12dUL
+	#define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE       0x112UL
+	#define HWRM_CFA_FLOW_AGING_TIMER_RESET           0x113UL
+	#define HWRM_CFA_FLOW_AGING_CFG                   0x114UL
+	#define HWRM_CFA_FLOW_AGING_QCFG                  0x115UL
+	#define HWRM_CFA_FLOW_AGING_QCAPS                 0x116UL
+	#define HWRM_CFA_CTX_MEM_RGTR                     0x117UL
+	#define HWRM_CFA_CTX_MEM_UNRGTR                   0x118UL
+	#define HWRM_CFA_CTX_MEM_QCTX                     0x119UL
+	#define HWRM_CFA_CTX_MEM_QCAPS                    0x11aUL
+	#define HWRM_CFA_COUNTER_QCAPS                    0x11bUL
+	#define HWRM_CFA_COUNTER_CFG                      0x11cUL
+	#define HWRM_CFA_COUNTER_QCFG                     0x11dUL
+	#define HWRM_CFA_COUNTER_QSTATS                   0x11eUL
+	#define HWRM_CFA_TCP_FLAG_PROCESS_QCFG            0x11fUL
+	#define HWRM_CFA_EEM_QCAPS                        0x120UL
+	#define HWRM_CFA_EEM_CFG                          0x121UL
+	#define HWRM_CFA_EEM_QCFG                         0x122UL
+	#define HWRM_CFA_EEM_OP                           0x123UL
+	#define HWRM_CFA_ADV_FLOW_MGNT_QCAPS              0x124UL
+	#define HWRM_CFA_TFLIB                            0x125UL
 	#define HWRM_ENGINE_CKV_STATUS                    0x12eUL
 	#define HWRM_ENGINE_CKV_CKEK_ADD                  0x12fUL
 	#define HWRM_ENGINE_CKV_CKEK_DELETE               0x130UL
@@ -244,6 +286,8 @@
 	#define HWRM_ENGINE_CKV_FLUSH                     0x133UL
 	#define HWRM_ENGINE_CKV_RNG_GET                   0x134UL
 	#define HWRM_ENGINE_CKV_KEY_GEN                   0x135UL
+	#define HWRM_ENGINE_CKV_KEY_LABEL_CFG             0x136UL
+	#define HWRM_ENGINE_CKV_KEY_LABEL_QCFG            0x137UL
 	#define HWRM_ENGINE_QG_CONFIG_QUERY               0x13cUL
 	#define HWRM_ENGINE_QG_QUERY                      0x13dUL
 	#define HWRM_ENGINE_QG_METER_PROFILE_CONFIG_QUERY 0x13eUL
@@ -271,6 +315,7 @@
 	#define HWRM_ENGINE_NQ_ALLOC                      0x162UL
 	#define HWRM_ENGINE_NQ_FREE                       0x163UL
 	#define HWRM_ENGINE_ON_DIE_RQE_CREDITS            0x164UL
+	#define HWRM_ENGINE_FUNC_QCFG                     0x165UL
 	#define HWRM_FUNC_RESOURCE_QCAPS                  0x190UL
 	#define HWRM_FUNC_VF_RESOURCE_CFG                 0x191UL
 	#define HWRM_FUNC_BACKING_STORE_QCAPS             0x192UL
@@ -278,11 +323,17 @@
 	#define HWRM_FUNC_BACKING_STORE_QCFG              0x194UL
 	#define HWRM_FUNC_VF_BW_CFG                       0x195UL
 	#define HWRM_FUNC_VF_BW_QCFG                      0x196UL
+	#define HWRM_FUNC_HOST_PF_IDS_QUERY               0x197UL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
 	#define HWRM_SELFTEST_RETRIEVE_SERDES_DATA        0x203UL
 	#define HWRM_PCIE_QSTATS                          0x204UL
+	#define HWRM_MFG_FRU_WRITE_CONTROL                0x205UL
+	#define HWRM_MFG_TIMERS_QUERY                     0x206UL
+	#define HWRM_MFG_OTP_CFG                          0x207UL
+	#define HWRM_MFG_OTP_QCFG                         0x208UL
+	#define HWRM_MFG_HDMA_TEST                        0x209UL
 	#define HWRM_DBG_READ_DIRECT                      0xff10UL
 	#define HWRM_DBG_READ_INDIRECT                    0xff11UL
 	#define HWRM_DBG_WRITE_DIRECT                     0xff12UL
@@ -295,6 +346,9 @@
 	#define HWRM_DBG_COREDUMP_RETRIEVE                0xff19UL
 	#define HWRM_DBG_FW_CLI                           0xff1aUL
 	#define HWRM_DBG_I2C_CMD                          0xff1bUL
+	#define HWRM_DBG_RING_INFO_GET                    0xff1cUL
+	#define HWRM_DBG_CRASHDUMP_HEADER                 0xff1dUL
+	#define HWRM_DBG_CRASHDUMP_ERASE                  0xff1eUL
 	#define HWRM_NVM_FACTORY_DEFAULTS                 0xffeeUL
 	#define HWRM_NVM_VALIDATE_OPTION                  0xffefUL
 	#define HWRM_NVM_FLUSH                            0xfff0UL
@@ -320,20 +374,26 @@
 /* ret_codes (size:64b/8B) */
 struct ret_codes {
 	__le16	error_code;
-	#define HWRM_ERR_CODE_SUCCESS                0x0UL
-	#define HWRM_ERR_CODE_FAIL                   0x1UL
-	#define HWRM_ERR_CODE_INVALID_PARAMS         0x2UL
-	#define HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED 0x3UL
-	#define HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR   0x4UL
-	#define HWRM_ERR_CODE_INVALID_FLAGS          0x5UL
-	#define HWRM_ERR_CODE_INVALID_ENABLES        0x6UL
-	#define HWRM_ERR_CODE_UNSUPPORTED_TLV        0x7UL
-	#define HWRM_ERR_CODE_NO_BUFFER              0x8UL
-	#define HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR 0x9UL
-	#define HWRM_ERR_CODE_HWRM_ERROR             0xfUL
-	#define HWRM_ERR_CODE_UNKNOWN_ERR            0xfffeUL
-	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED      0xffffUL
-	#define HWRM_ERR_CODE_LAST                  HWRM_ERR_CODE_CMD_NOT_SUPPORTED
+	#define HWRM_ERR_CODE_SUCCESS                      0x0UL
+	#define HWRM_ERR_CODE_FAIL                         0x1UL
+	#define HWRM_ERR_CODE_INVALID_PARAMS               0x2UL
+	#define HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED       0x3UL
+	#define HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR         0x4UL
+	#define HWRM_ERR_CODE_INVALID_FLAGS                0x5UL
+	#define HWRM_ERR_CODE_INVALID_ENABLES              0x6UL
+	#define HWRM_ERR_CODE_UNSUPPORTED_TLV              0x7UL
+	#define HWRM_ERR_CODE_NO_BUFFER                    0x8UL
+	#define HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR       0x9UL
+	#define HWRM_ERR_CODE_HOT_RESET_PROGRESS           0xaUL
+	#define HWRM_ERR_CODE_HOT_RESET_FAIL               0xbUL
+	#define HWRM_ERR_CODE_NO_FLOW_COUNTER_DURING_ALLOC 0xcUL
+	#define HWRM_ERR_CODE_KEY_HASH_COLLISION           0xdUL
+	#define HWRM_ERR_CODE_KEY_ALREADY_EXISTS           0xeUL
+	#define HWRM_ERR_CODE_HWRM_ERROR                   0xfUL
+	#define HWRM_ERR_CODE_TLV_ENCAPSULATED_RESPONSE    0x8000UL
+	#define HWRM_ERR_CODE_UNKNOWN_ERR                  0xfffeUL
+	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED            0xffffUL
+	#define HWRM_ERR_CODE_LAST                        HWRM_ERR_CODE_CMD_NOT_SUPPORTED
 	__le16	unused_0[3];
 };
 
@@ -350,15 +410,19 @@
 };
 #define HWRM_NA_SIGNATURE ((__le32)(-1))
 #define HWRM_MAX_REQ_LEN 128
-#define HWRM_MAX_RESP_LEN 280
+#define HWRM_MAX_RESP_LEN 704
 #define HW_HASH_INDEX_SIZE 0x80
 #define HW_HASH_KEY_SIZE 40
 #define HWRM_RESP_VALID_KEY 1
+#define HWRM_TARGET_ID_BONO 0xFFF8
+#define HWRM_TARGET_ID_KONG 0xFFF9
+#define HWRM_TARGET_ID_APE 0xFFFA
+#define HWRM_TARGET_ID_TOOLS 0xFFFD
 #define HWRM_VERSION_MAJOR 1
-#define HWRM_VERSION_MINOR 9
-#define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 25
-#define HWRM_VERSION_STR "1.9.2.25"
+#define HWRM_VERSION_MINOR 10
+#define HWRM_VERSION_UPDATE 0
+#define HWRM_VERSION_RSVD 100
+#define HWRM_VERSION_STR "1.10.0.100"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -396,10 +460,20 @@
 	u8	netctrl_fw_bld_8b;
 	u8	netctrl_fw_rsvd_8b;
 	__le32	dev_caps_cfg;
-	#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED     0x1UL
-	#define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED     0x2UL
-	#define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED         0x4UL
-	#define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED          0x8UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED                  0x1UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED                  0x2UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED                      0x4UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED                       0x8UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED                   0x10UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED              0x20UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_L2_FILTER_TYPES_ROCE_OR_L2_SUPPORTED     0x40UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_VIRTIO_VSWITCH_OFFLOAD_SUPPORTED         0x80UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED                     0x100UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_FLOW_AGING_SUPPORTED                     0x200UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_ADV_FLOW_COUNTERS_SUPPORTED              0x400UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_EEM_SUPPORTED                        0x800UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED              0x1000UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED                      0x2000UL
 	u8	roce_fw_maj_8b;
 	u8	roce_fw_min_8b;
 	u8	roce_fw_bld_8b;
@@ -407,7 +481,7 @@
 	char	hwrm_fw_name[16];
 	char	mgmt_fw_name[16];
 	char	netctrl_fw_name[16];
-	u8	reserved2[16];
+	char	active_pkg_name[16];
 	char	roce_fw_name[16];
 	__le16	chip_num;
 	u8	chip_rev;
@@ -454,14 +528,27 @@
 /* eject_cmpl (size:128b/16B) */
 struct eject_cmpl {
 	__le16	type;
-	#define EJECT_CMPL_TYPE_MASK      0x3fUL
-	#define EJECT_CMPL_TYPE_SFT       0
-	#define EJECT_CMPL_TYPE_STAT_EJECT  0x1aUL
-	#define EJECT_CMPL_TYPE_LAST       EJECT_CMPL_TYPE_STAT_EJECT
+	#define EJECT_CMPL_TYPE_MASK       0x3fUL
+	#define EJECT_CMPL_TYPE_SFT        0
+	#define EJECT_CMPL_TYPE_STAT_EJECT   0x1aUL
+	#define EJECT_CMPL_TYPE_LAST        EJECT_CMPL_TYPE_STAT_EJECT
+	#define EJECT_CMPL_FLAGS_MASK      0xffc0UL
+	#define EJECT_CMPL_FLAGS_SFT       6
+	#define EJECT_CMPL_FLAGS_ERROR      0x40UL
 	__le16	len;
 	__le32	opaque;
-	__le32	v;
-	#define EJECT_CMPL_V     0x1UL
+	__le16	v;
+	#define EJECT_CMPL_V                              0x1UL
+	#define EJECT_CMPL_ERRORS_MASK                    0xfffeUL
+	#define EJECT_CMPL_ERRORS_SFT                     1
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_MASK        0xeUL
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_SFT         1
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_NO_BUFFER     (0x0UL << 1)
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_DID_NOT_FIT   (0x1UL << 1)
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_BAD_FORMAT    (0x3UL << 1)
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_FLUSH         (0x5UL << 1)
+	#define EJECT_CMPL_ERRORS_BUFFER_ERROR_LAST         EJECT_CMPL_ERRORS_BUFFER_ERROR_FLUSH
+	__le16	reserved16;
 	__le32	unused_2;
 };
 
@@ -528,6 +615,8 @@
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE      0x6UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE        0x7UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY               0x8UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY             0x9UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD           0x10UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD             0x11UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT        0x12UL
@@ -539,6 +628,16 @@
 	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE              0x33UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE            0x34UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_DEFAULT_VNIC_CHANGE        0x35UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_HW_FLOW_AGED               0x36UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION         0x37UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_REQ        0x38UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_DONE       0x39UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_TCP_FLAG_ACTION_CHANGE     0x3aUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_EEM_FLOW_ACTIVE            0x3bUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CFG_CHANGE             0x3cUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_TFLIB_DEFAULT_VNIC_CHANGE  0x3dUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_TFLIB_LINK_STATUS_CHANGE   0x3eUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
 	__le32	event_data2;
@@ -634,6 +733,63 @@
 	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_ILLEGAL_LINK_SPEED_CFG           0x20000UL
 };
 
+/* hwrm_async_event_cmpl_reset_notify (size:128b/16B) */
+struct hwrm_async_event_cmpl_reset_notify {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_LAST             ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST        ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_V          0x1UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_MASK                  0xffUL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_SFT                   0
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_STOP_TX_QUEUE    0x1UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN           0x2UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_LAST                   ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK                    0xff00UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_SFT                     8
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST  (0x1UL << 8)
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL        (0x2UL << 8)
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL    (0x3UL << 8)
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST                     ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK           0xffff0000UL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT            16
+};
+
+/* hwrm_async_event_cmpl_error_recovery (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_recovery {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY 0x9UL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_LAST          ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK                 0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_SFT                  0
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC           0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED      0x2UL
+};
+
 /* hwrm_async_event_cmpl_vf_cfg_change (size:128b/16B) */
 struct hwrm_async_event_cmpl_vf_cfg_change {
 	__le16	type;
@@ -652,10 +808,110 @@
 	u8	timestamp_lo;
 	__le16	timestamp_hi;
 	__le32	event_data1;
-	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MTU_CHANGE               0x1UL
-	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MRU_CHANGE               0x2UL
-	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_MAC_ADDR_CHANGE     0x4UL
-	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE         0x8UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MTU_CHANGE                0x1UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MRU_CHANGE                0x2UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_MAC_ADDR_CHANGE      0x4UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE          0x8UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_TRUSTED_VF_CFG_CHANGE     0x10UL
+};
+
+/* hwrm_async_event_cmpl_default_vnic_change (size:128b/16B) */
+struct hwrm_async_event_cmpl_default_vnic_change {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_LAST             ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_TYPE_HWRM_ASYNC_EVENT
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_UNUSED1_MASK         0xffc0UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_UNUSED1_SFT          6
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_ALLOC_FREE_NOTIFICATION 0x35UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_LAST                   ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_ID_ALLOC_FREE_NOTIFICATION
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_V          0x1UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_MASK          0x3UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_SFT           0
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_ALLOC  0x1UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_FREE   0x2UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_LAST           ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_DEF_VNIC_STATE_DEF_VNIC_FREE
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_PF_ID_MASK                   0x3fcUL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_PF_ID_SFT                    2
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_VF_ID_MASK                   0x3fffc00UL
+	#define ASYNC_EVENT_CMPL_DEFAULT_VNIC_CHANGE_EVENT_DATA1_VF_ID_SFT                    10
+};
+
+/* hwrm_async_event_cmpl_hw_flow_aged (size:128b/16B) */
+struct hwrm_async_event_cmpl_hw_flow_aged {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_LAST             ASYNC_EVENT_CMPL_HW_FLOW_AGED_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_HW_FLOW_AGED 0x36UL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_LAST        ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_ID_HW_FLOW_AGED
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_V          0x1UL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_ID_MASK       0x7fffffffUL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_ID_SFT        0
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION     0x80000000UL
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_RX    (0x0UL << 31)
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_TX    (0x1UL << 31)
+	#define ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_LAST ASYNC_EVENT_CMPL_HW_FLOW_AGED_EVENT_DATA1_FLOW_DIRECTION_TX
+};
+
+/* hwrm_async_event_cmpl_eem_cache_flush_req (size:128b/16B) */
+struct hwrm_async_event_cmpl_eem_cache_flush_req {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_LAST             ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_EEM_CACHE_FLUSH_REQ 0x38UL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_LAST               ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_EVENT_ID_EEM_CACHE_FLUSH_REQ
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_V          0x1UL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_REQ_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+};
+
+/* hwrm_async_event_cmpl_eem_cache_flush_done (size:128b/16B) */
+struct hwrm_async_event_cmpl_eem_cache_flush_done {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_LAST             ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_EEM_CACHE_FLUSH_DONE 0x39UL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_LAST                ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_ID_EEM_CACHE_FLUSH_DONE
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_V          0x1UL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_DATA1_FID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_EEM_CACHE_FLUSH_DONE_EVENT_DATA1_FID_SFT 0
 };
 
 /* hwrm_func_reset_input (size:192b/24B) */
@@ -832,26 +1088,33 @@
 	__le16	fid;
 	__le16	port_id;
 	__le32	flags;
-	#define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED             0x1UL
-	#define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING         0x2UL
-	#define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED                   0x4UL
-	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED               0x8UL
-	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED               0x10UL
-	#define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED          0x20UL
-	#define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED               0x40UL
-	#define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED            0x80UL
-	#define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED             0x100UL
-	#define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED         0x200UL
-	#define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED             0x400UL
-	#define FUNC_QCAPS_RESP_FLAGS_STD_TX_RING_MODE_SUPPORTED      0x800UL
-	#define FUNC_QCAPS_RESP_FLAGS_GENEVE_TUN_FLAGS_SUPPORTED      0x1000UL
-	#define FUNC_QCAPS_RESP_FLAGS_NVGRE_TUN_FLAGS_SUPPORTED       0x2000UL
-	#define FUNC_QCAPS_RESP_FLAGS_GRE_TUN_FLAGS_SUPPORTED         0x4000UL
-	#define FUNC_QCAPS_RESP_FLAGS_MPLS_TUN_FLAGS_SUPPORTED        0x8000UL
-	#define FUNC_QCAPS_RESP_FLAGS_PCIE_STATS_SUPPORTED            0x10000UL
-	#define FUNC_QCAPS_RESP_FLAGS_ADOPTED_PF_SUPPORTED            0x20000UL
-	#define FUNC_QCAPS_RESP_FLAGS_ADMIN_PF_SUPPORTED              0x40000UL
-	#define FUNC_QCAPS_RESP_FLAGS_LINK_ADMIN_STATUS_SUPPORTED     0x80000UL
+	#define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED                   0x1UL
+	#define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING               0x2UL
+	#define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED                         0x4UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED                     0x8UL
+	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED                     0x10UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED                0x20UL
+	#define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED                     0x40UL
+	#define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED                  0x80UL
+	#define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED                   0x100UL
+	#define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED               0x200UL
+	#define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED                   0x400UL
+	#define FUNC_QCAPS_RESP_FLAGS_STD_TX_RING_MODE_SUPPORTED            0x800UL
+	#define FUNC_QCAPS_RESP_FLAGS_GENEVE_TUN_FLAGS_SUPPORTED            0x1000UL
+	#define FUNC_QCAPS_RESP_FLAGS_NVGRE_TUN_FLAGS_SUPPORTED             0x2000UL
+	#define FUNC_QCAPS_RESP_FLAGS_GRE_TUN_FLAGS_SUPPORTED               0x4000UL
+	#define FUNC_QCAPS_RESP_FLAGS_MPLS_TUN_FLAGS_SUPPORTED              0x8000UL
+	#define FUNC_QCAPS_RESP_FLAGS_PCIE_STATS_SUPPORTED                  0x10000UL
+	#define FUNC_QCAPS_RESP_FLAGS_ADOPTED_PF_SUPPORTED                  0x20000UL
+	#define FUNC_QCAPS_RESP_FLAGS_ADMIN_PF_SUPPORTED                    0x40000UL
+	#define FUNC_QCAPS_RESP_FLAGS_LINK_ADMIN_STATUS_SUPPORTED           0x80000UL
+	#define FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE                         0x100000UL
+	#define FUNC_QCAPS_RESP_FLAGS_DYNAMIC_TX_RING_ALLOC                 0x200000UL
+	#define FUNC_QCAPS_RESP_FLAGS_HOT_RESET_CAPABLE                     0x400000UL
+	#define FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE                0x800000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED                   0x1000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_ERR_RECOVER_RELOAD                    0x2000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_NOTIFY_VF_DEF_VNIC_CHNG_SUPPORTED     0x4000000UL
 	u8	mac_address[6];
 	__le16	max_rsscos_ctx;
 	__le16	max_cmpl_rings;
@@ -887,7 +1150,7 @@
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcfg_output (size:640b/80B) */
+/* hwrm_func_qcfg_output (size:704b/88B) */
 struct hwrm_func_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -903,6 +1166,9 @@
 	#define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED     0x8UL
 	#define FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED        0x10UL
 	#define FUNC_QCFG_RESP_FLAGS_MULTI_HOST                   0x20UL
+	#define FUNC_QCFG_RESP_FLAGS_TRUSTED_VF                   0x40UL
+	#define FUNC_QCFG_RESP_FLAGS_SECURE_MODE_ENABLED          0x80UL
+	#define FUNC_QCFG_RESP_FLAGS_PREBOOT_LEGACY_L2_RINGS      0x100UL
 	u8	mac_address[6];
 	__le16	pci_id;
 	__le16	alloc_rsscos_ctx;
@@ -984,7 +1250,12 @@
 	__le16	alloc_sp_tx_rings;
 	__le16	alloc_stat_ctx;
 	__le16	alloc_msix;
-	u8	unused_2[5];
+	__le16	registered_vfs;
+	__le16	l2_doorbell_bar_size_kb;
+	u8	unused_1;
+	u8	always_1;
+	__le32	reset_addr_poll;
+	u8	unused_2[3];
 	u8	valid;
 };
 
@@ -1014,6 +1285,11 @@
 	#define FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST           0x40000UL
 	#define FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST               0x80000UL
 	#define FUNC_CFG_REQ_FLAGS_L2_CTX_ASSETS_TEST             0x100000UL
+	#define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE              0x200000UL
+	#define FUNC_CFG_REQ_FLAGS_DYNAMIC_TX_RING_ALLOC          0x400000UL
+	#define FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST                 0x800000UL
+	#define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE             0x1000000UL
+	#define FUNC_CFG_REQ_FLAGS_PREBOOT_LEGACY_L2_RINGS        0x2000000UL
 	__le32	enables;
 	#define FUNC_CFG_REQ_ENABLES_MTU                     0x1UL
 	#define FUNC_CFG_REQ_ENABLES_MRU                     0x2UL
@@ -1131,7 +1407,11 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	fid;
-	u8	unused_0[6];
+	u8	flags;
+	#define FUNC_QSTATS_REQ_FLAGS_UNUSED    0x0UL
+	#define FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY 0x1UL
+	#define FUNC_QSTATS_REQ_FLAGS_LAST     FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY
+	u8	unused_0[5];
 };
 
 /* hwrm_func_qstats_output (size:1408b/176B) */
@@ -1214,9 +1494,12 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE       0x1UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE      0x2UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE     0x4UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE               0x1UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE              0x2UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE             0x4UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE     0x8UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT          0x10UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT     0x20UL
 	__le32	enables;
 	#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE             0x1UL
 	#define FUNC_DRV_RGTR_REQ_ENABLES_VER                 0x2UL
@@ -1416,7 +1699,9 @@
 	__le16	min_hw_ring_grps;
 	__le16	max_hw_ring_grps;
 	__le16	max_tx_scheduler_inputs;
-	u8	unused_0[7];
+	__le16	flags;
+	#define FUNC_RESOURCE_QCAPS_RESP_FLAGS_MIN_GUARANTEED     0x1UL
+	u8	unused_0[5];
 	u8	valid;
 };
 
@@ -1445,7 +1730,9 @@
 	__le16	max_stat_ctx;
 	__le16	min_hw_ring_grps;
 	__le16	max_hw_ring_grps;
-	u8	unused_0[4];
+	__le16	flags;
+	#define FUNC_VF_RESOURCE_CFG_REQ_FLAGS_MIN_GUARANTEED     0x1UL
+	u8	unused_0[2];
 };
 
 /* hwrm_func_vf_resource_cfg_output (size:256b/32B) */
@@ -1503,7 +1790,8 @@
 	__le16	mrav_entry_size;
 	__le16	tim_entry_size;
 	__le32	tim_max_entries;
-	u8	unused_0[3];
+	__le16	mrav_num_entries_units;
+	u8	tqm_entries_multiple;
 	u8	valid;
 };
 
@@ -1515,7 +1803,8 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE     0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE               0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT     0x2UL
 	__le32	enables;
 	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP            0x1UL
 	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ           0x2UL
@@ -1846,6 +2135,89 @@
 	u8	valid;
 };
 
+/* hwrm_error_recovery_qcfg_input (size:192b/24B) */
+struct hwrm_error_recovery_qcfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	u8	unused_0[8];
+};
+
+/* hwrm_error_recovery_qcfg_output (size:1664b/208B) */
+struct hwrm_error_recovery_qcfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le32	flags;
+	#define ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU     0x2UL
+	__le32	driver_polling_freq;
+	__le32	master_func_wait_period;
+	__le32	normal_func_wait_period;
+	__le32	master_func_wait_period_after_reset;
+	__le32	max_bailout_time_after_reset;
+	__le32	fw_health_status_reg;
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SFT           2
+	__le32	fw_heartbeat_reg;
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SFT           2
+	__le32	fw_reset_cnt_reg;
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SFT           2
+	__le32	reset_inprogress_reg;
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SFT           2
+	__le32	reset_inprogress_reg_mask;
+	u8	unused_0[3];
+	u8	reg_array_cnt;
+	__le32	reset_reg[16];
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SFT           2
+	__le32	reset_reg_val[16];
+	u8	delay_after_reset[16];
+	u8	unused_1[7];
+	u8	valid;
+};
+
 /* hwrm_func_drv_if_change_input (size:192b/24B) */
 struct hwrm_func_drv_if_change_input {
 	__le16	req_type;
@@ -1865,7 +2237,8 @@
 	__le16	seq_id;
 	__le16	resp_len;
 	__le32	flags;
-	#define FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE     0x1UL
+	#define FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE           0x1UL
+	#define FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE     0x2UL
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -1917,6 +2290,7 @@
 	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB  0x190UL
 	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB  0x1f4UL
 	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_200GB 0x7d0UL
 	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB  0xffffUL
 	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB
 	u8	auto_mode;
@@ -1947,6 +2321,7 @@
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB  0x190UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB  0x1f4UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_200GB 0x7d0UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB  0xffffUL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_LAST PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB
 	__le16	auto_link_speed_mask;
@@ -1964,6 +2339,7 @@
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100GB       0x800UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD      0x1000UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB        0x2000UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_200GB       0x4000UL
 	u8	wirespeed;
 	#define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL
 	#define PORT_PHY_CFG_REQ_WIRESPEED_ON  0x1UL
@@ -2048,6 +2424,7 @@
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB  0x190UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB  0x1f4UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_200GB 0x7d0UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB  0xffffUL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_LINK_SPEED_10MB
 	u8	duplex_cfg;
@@ -2072,6 +2449,7 @@
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_100GB       0x800UL
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD      0x1000UL
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB        0x2000UL
+	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_200GB       0x4000UL
 	__le16	force_link_speed;
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB   0xaUL
@@ -2083,6 +2461,7 @@
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB  0x190UL
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB  0x1f4UL
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_200GB 0x7d0UL
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB  0xffffUL
 	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB
 	u8	auto_mode;
@@ -2107,6 +2486,7 @@
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB  0x190UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB  0x1f4UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_200GB 0x7d0UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB  0xffffUL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB
 	__le16	auto_link_speed_mask;
@@ -2124,6 +2504,7 @@
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100GB       0x800UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD      0x1000UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB        0x2000UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_200GB       0x4000UL
 	u8	wirespeed;
 	#define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL
 	#define PORT_PHY_QCFG_RESP_WIRESPEED_ON  0x1UL
@@ -2178,7 +2559,11 @@
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASET         0x19UL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASESX        0x1aUL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX        0x1bUL
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST            PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASECR4     0x1cUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR4     0x1dUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR4     0x1eUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4     0x1fUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST            PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4
 	u8	media_type;
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP      0x1UL
@@ -2271,7 +2656,7 @@
 	u8	valid;
 };
 
-/* hwrm_port_mac_cfg_input (size:320b/40B) */
+/* hwrm_port_mac_cfg_input (size:384b/48B) */
 struct hwrm_port_mac_cfg_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -2292,6 +2677,7 @@
 	#define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE          0x400UL
 	#define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE        0x800UL
 	#define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE           0x1000UL
+	#define PORT_MAC_CFG_REQ_FLAGS_PTP_ONE_STEP_TX_TS            0x2000UL
 	__le32	enables;
 	#define PORT_MAC_CFG_REQ_ENABLES_IPG                            0x1UL
 	#define PORT_MAC_CFG_REQ_ENABLES_LPBK                           0x2UL
@@ -2301,6 +2687,7 @@
 	#define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE     0x40UL
 	#define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE     0x80UL
 	#define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG                  0x100UL
+	#define PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB               0x200UL
 	__le16	port_id;
 	u8	ipg;
 	u8	lpbk;
@@ -2333,6 +2720,8 @@
 	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK          0xe0UL
 	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT           5
 	u8	unused_0[3];
+	__s32	ptp_freq_adj_ppb;
+	u8	unused_1[4];
 };
 
 /* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -2371,8 +2760,9 @@
 	__le16	seq_id;
 	__le16	resp_len;
 	u8	flags;
-	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS     0x1UL
-	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS       0x2UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS      0x1UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS        0x2UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS     0x4UL
 	u8	unused_0[3];
 	__le32	rx_ts_reg_off_lower;
 	__le32	rx_ts_reg_off_upper;
@@ -2579,7 +2969,7 @@
 	__le64	pfc_pri7_tx_transitions;
 };
 
-/* rx_port_stats_ext (size:2368b/296B) */
+/* rx_port_stats_ext (size:3648b/456B) */
 struct rx_port_stats_ext {
 	__le64	link_down_events;
 	__le64	continuous_pause_events;
@@ -2618,6 +3008,26 @@
 	__le64	pfc_pri6_rx_transitions;
 	__le64	pfc_pri7_rx_duration_us;
 	__le64	pfc_pri7_rx_transitions;
+	__le64	rx_bits;
+	__le64	rx_buffer_passed_threshold;
+	__le64	rx_pcs_symbol_err;
+	__le64	rx_corrected_bits;
+	__le64	rx_discard_bytes_cos0;
+	__le64	rx_discard_bytes_cos1;
+	__le64	rx_discard_bytes_cos2;
+	__le64	rx_discard_bytes_cos3;
+	__le64	rx_discard_bytes_cos4;
+	__le64	rx_discard_bytes_cos5;
+	__le64	rx_discard_bytes_cos6;
+	__le64	rx_discard_bytes_cos7;
+	__le64	rx_discard_packets_cos0;
+	__le64	rx_discard_packets_cos1;
+	__le64	rx_discard_packets_cos2;
+	__le64	rx_discard_packets_cos3;
+	__le64	rx_discard_packets_cos4;
+	__le64	rx_discard_packets_cos5;
+	__le64	rx_discard_packets_cos6;
+	__le64	rx_discard_packets_cos7;
 };
 
 /* hwrm_port_qstats_ext_input (size:320b/40B) */
@@ -2644,7 +3054,8 @@
 	__le16	tx_stat_size;
 	__le16	rx_stat_size;
 	__le16	total_active_cos_queues;
-	u8	unused_0;
+	u8	flags;
+	#define PORT_QSTATS_EXT_RESP_FLAGS_CLEAR_ROCE_COUNTERS_SUPPORTED     0x1UL
 	u8	valid;
 };
 
@@ -2685,7 +3096,9 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	port_id;
-	u8	unused_0[6];
+	u8	flags;
+	#define PORT_CLR_STATS_REQ_FLAGS_ROCE_COUNTERS     0x1UL
+	u8	unused_0[5];
 };
 
 /* hwrm_port_clr_stats_output (size:128b/16B) */
@@ -2717,6 +3130,35 @@
 	u8	valid;
 };
 
+/* hwrm_port_ts_query_input (size:192b/24B) */
+struct hwrm_port_ts_query_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define PORT_TS_QUERY_REQ_FLAGS_PATH             0x1UL
+	#define PORT_TS_QUERY_REQ_FLAGS_PATH_TX            0x0UL
+	#define PORT_TS_QUERY_REQ_FLAGS_PATH_RX            0x1UL
+	#define PORT_TS_QUERY_REQ_FLAGS_PATH_LAST         PORT_TS_QUERY_REQ_FLAGS_PATH_RX
+	#define PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME     0x2UL
+	__le16	port_id;
+	u8	unused_0[2];
+};
+
+/* hwrm_port_ts_query_output (size:192b/24B) */
+struct hwrm_port_ts_query_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le64	ptp_msg_ts;
+	__le16	ptp_msg_seqid;
+	u8	unused_0[5];
+	u8	valid;
+};
+
 /* hwrm_port_phy_qcaps_input (size:192b/24B) */
 struct hwrm_port_phy_qcaps_input {
 	__le16	req_type;
@@ -2761,6 +3203,7 @@
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100GB       0x800UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MBHD      0x1000UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MB        0x2000UL
+	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_200GB       0x4000UL
 	__le16	supported_speeds_auto_mode;
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MBHD     0x1UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MB       0x2UL
@@ -2776,6 +3219,7 @@
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100GB       0x800UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MBHD      0x1000UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MB        0x2000UL
+	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_200GB       0x4000UL
 	__le16	supported_speeds_eee_mode;
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD1     0x1UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_100MB     0x2UL
@@ -2826,6 +3270,60 @@
 	u8	valid;
 };
 
+/* hwrm_port_phy_mdio_write_input (size:320b/40B) */
+struct hwrm_port_phy_mdio_write_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	unused_0[2];
+	__le16	port_id;
+	u8	phy_addr;
+	u8	dev_addr;
+	__le16	reg_addr;
+	__le16	reg_data;
+	u8	cl45_mdio;
+	u8	unused_1[7];
+};
+
+/* hwrm_port_phy_mdio_write_output (size:128b/16B) */
+struct hwrm_port_phy_mdio_write_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
+/* hwrm_port_phy_mdio_read_input (size:256b/32B) */
+struct hwrm_port_phy_mdio_read_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	unused_0[2];
+	__le16	port_id;
+	u8	phy_addr;
+	u8	dev_addr;
+	__le16	reg_addr;
+	u8	cl45_mdio;
+	u8	unused_1;
+};
+
+/* hwrm_port_phy_mdio_read_output (size:128b/16B) */
+struct hwrm_port_phy_mdio_read_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	reg_data;
+	u8	unused_0[5];
+	u8	valid;
+};
+
 /* hwrm_port_led_cfg_input (size:512b/64B) */
 struct hwrm_port_led_cfg_input {
 	__le16	req_type;
@@ -4264,7 +4762,7 @@
 	u8	valid;
 };
 
-/* hwrm_vnic_cfg_input (size:320b/40B) */
+/* hwrm_vnic_cfg_input (size:384b/48B) */
 struct hwrm_vnic_cfg_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -4287,6 +4785,7 @@
 	#define VNIC_CFG_REQ_ENABLES_MRU                      0x10UL
 	#define VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID       0x20UL
 	#define VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID     0x40UL
+	#define VNIC_CFG_REQ_ENABLES_QUEUE_ID                 0x80UL
 	__le16	vnic_id;
 	__le16	dflt_ring_grp;
 	__le16	rss_rule;
@@ -4295,6 +4794,8 @@
 	__le16	mru;
 	__le16	default_rx_ring_id;
 	__le16	default_cmpl_ring_id;
+	__le16	queue_id;
+	u8	unused0[6];
 };
 
 /* hwrm_vnic_cfg_output (size:128b/16B) */
@@ -4335,7 +4836,9 @@
 	#define VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP                     0x20UL
 	#define VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP     0x40UL
 	#define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP                   0x80UL
-	u8	unused_1[7];
+	#define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP                  0x100UL
+	__le16	max_aggs_supported;
+	u8	unused_1[5];
 	u8	valid;
 };
 
@@ -4355,6 +4858,7 @@
 	#define VNIC_TPA_CFG_REQ_FLAGS_AGG_WITH_SAME_GRE_SEQ     0x20UL
 	#define VNIC_TPA_CFG_REQ_FLAGS_GRO_IPID_CHECK            0x40UL
 	#define VNIC_TPA_CFG_REQ_FLAGS_GRO_TTL_CHECK             0x80UL
+	#define VNIC_TPA_CFG_REQ_FLAGS_AGG_PACK_AS_GRO           0x100UL
 	__le32	enables;
 	#define VNIC_TPA_CFG_REQ_ENABLES_MAX_AGG_SEGS      0x1UL
 	#define VNIC_TPA_CFG_REQ_ENABLES_MAX_AGGS          0x2UL
@@ -4574,7 +5078,9 @@
 	#define RING_ALLOC_REQ_RING_TYPE_RX_AGG    0x4UL
 	#define RING_ALLOC_REQ_RING_TYPE_NQ        0x5UL
 	#define RING_ALLOC_REQ_RING_TYPE_LAST     RING_ALLOC_REQ_RING_TYPE_NQ
-	u8	unused_0[3];
+	u8	unused_0;
+	__le16	flags;
+	#define RING_ALLOC_REQ_FLAGS_RX_SOP_PAD     0x1UL
 	__le64	page_tbl_addr;
 	__le32	fbo;
 	u8	page_size;
@@ -4669,6 +5175,35 @@
 	u8	valid;
 };
 
+/* hwrm_ring_reset_input (size:192b/24B) */
+struct hwrm_ring_reset_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	u8	ring_type;
+	#define RING_RESET_REQ_RING_TYPE_L2_CMPL   0x0UL
+	#define RING_RESET_REQ_RING_TYPE_TX        0x1UL
+	#define RING_RESET_REQ_RING_TYPE_RX        0x2UL
+	#define RING_RESET_REQ_RING_TYPE_ROCE_CMPL 0x3UL
+	#define RING_RESET_REQ_RING_TYPE_LAST     RING_RESET_REQ_RING_TYPE_ROCE_CMPL
+	u8	unused_0;
+	__le16	ring_id;
+	u8	unused_1[4];
+};
+
+/* hwrm_ring_reset_output (size:128b/16B) */
+struct hwrm_ring_reset_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[4];
+	u8	consumer_idx[3];
+	u8	valid;
+};
+
 /* hwrm_ring_aggint_qcaps_input (size:128b/16B) */
 struct hwrm_ring_aggint_qcaps_input {
 	__le16	req_type;
@@ -4829,6 +5364,10 @@
 	u8	unused_0[7];
 	u8	valid;
 };
+#define DEFAULT_FLOW_ID 0xFFFFFFFFUL
+#define ROCEV1_FLOW_ID 0xFFFFFFFEUL
+#define ROCEV2_FLOW_ID 0xFFFFFFFDUL
+#define ROCEV2_CNP_FLOW_ID 0xFFFFFFFCUL
 
 /* hwrm_cfa_l2_filter_alloc_input (size:768b/96B) */
 struct hwrm_cfa_l2_filter_alloc_input {
@@ -4838,13 +5377,21 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH          0x1UL
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_TX         0x0UL
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX         0x1UL
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_LAST      CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_LOOPBACK      0x2UL
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_DROP          0x4UL
-	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST     0x8UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH              0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_TX             0x0UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX             0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_LAST          CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_LOOPBACK          0x2UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_DROP              0x4UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST         0x8UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_MASK      0x30UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_SFT       4
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_NO_ROCE_L2  (0x0UL << 4)
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_L2          (0x1UL << 4)
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_ROCE        (0x2UL << 4)
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_LAST       CFA_L2_FILTER_ALLOC_REQ_FLAGS_TRAFFIC_ROCE
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_XDP_DISABLE       0x40UL
+	#define CFA_L2_FILTER_ALLOC_REQ_FLAGS_SOURCE_VALID      0x80UL
 	__le32	enables;
 	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR             0x1UL
 	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK        0x2UL
@@ -4863,8 +5410,11 @@
 	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE         0x4000UL
 	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID              0x8000UL
 	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID      0x10000UL
+	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_NUM_VLANS           0x20000UL
+	#define CFA_L2_FILTER_ALLOC_REQ_ENABLES_T_NUM_VLANS         0x40000UL
 	u8	l2_addr[6];
-	u8	unused_0[2];
+	u8	num_vlans;
+	u8	t_num_vlans;
 	u8	l2_addr_mask[6];
 	__le16	l2_ovlan;
 	__le16	l2_ovlan_mask;
@@ -4891,18 +5441,21 @@
 	u8	unused_3;
 	__le32	src_id;
 	u8	tunnel_type;
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST     CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL    0x0UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE        0x2UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE        0x3UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP         0x4UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS         0x6UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT          0x7UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE        0x8UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL    0xffUL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST        CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
 	u8	unused_4;
 	__le16	dst_id;
 	__le16	mirror_vnic_id;
@@ -4926,6 +5479,16 @@
 	__le16	resp_len;
 	__le64	l2_filter_id;
 	__le32	flow_id;
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE      0x40000000UL
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT    (0x0UL << 30)
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT    (0x1UL << 30)
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST  CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR       0x80000000UL
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX      (0x0UL << 31)
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX      (0x1UL << 31)
+	#define CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST   CFA_L2_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -4958,11 +5521,17 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH     0x1UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX    0x0UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX    0x1UL
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
-	#define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP     0x2UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH              0x1UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX             0x0UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX             0x1UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST          CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP              0x2UL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK      0xcUL
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT       2
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2  (0x0UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2          (0x1UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE        (0x2UL << 2)
+	#define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST       CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE
 	__le32	enables;
 	#define CFA_L2_FILTER_CFG_REQ_ENABLES_DST_ID                 0x1UL
 	#define CFA_L2_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID     0x2UL
@@ -5054,18 +5623,21 @@
 	u8	l3_addr_type;
 	u8	t_l3_addr_type;
 	u8	tunnel_type;
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST     CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL    0x0UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE        0x2UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE        0x3UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP         0x4UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS         0x6UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT          0x7UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE        0x8UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL    0xffUL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST        CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
 	u8	tunnel_flags;
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_FLAGS_TUN_FLAGS_OAM_CHECKSUM_EXPLHDR     0x1UL
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_FLAGS_TUN_FLAGS_CRITICAL_OPT_S1          0x2UL
@@ -5083,6 +5655,16 @@
 	__le16	resp_len;
 	__le64	tunnel_filter_id;
 	__le32	flow_id;
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE      0x40000000UL
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT    (0x0UL << 30)
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT    (0x1UL << 30)
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST  CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR       0x80000000UL
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX      (0x0UL << 31)
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX      (0x1UL << 31)
+	#define CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST   CFA_TUNNEL_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -5140,7 +5722,7 @@
 	__be32	dest_ip_addr[4];
 };
 
-/* hwrm_cfa_encap_data_vxlan (size:576b/72B) */
+/* hwrm_cfa_encap_data_vxlan (size:640b/80B) */
 struct hwrm_cfa_encap_data_vxlan {
 	u8	src_mac_addr[6];
 	__le16	unused_0;
@@ -5159,6 +5741,10 @@
 	__be16	src_port;
 	__be16	dst_port;
 	__be32	vni;
+	u8	hdr_rsvd0[3];
+	u8	hdr_rsvd1;
+	u8	hdr_flags;
+	u8	unused[3];
 };
 
 /* hwrm_cfa_encap_record_alloc_input (size:832b/104B) */
@@ -5170,16 +5756,21 @@
 	__le64	resp_addr;
 	__le32	flags;
 	#define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK     0x1UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_EXTERNAL     0x2UL
 	u8	encap_type;
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN  0x1UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE  0x2UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE  0x3UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP   0x4UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS   0x6UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN   0x7UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE  0x8UL
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_LAST  CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN        0x1UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE        0x2UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE        0x3UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP         0x4UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE       0x5UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS         0x6UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN         0x7UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE        0x8UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_V4     0x9UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE_V1     0xaUL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2_ETYPE     0xbUL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_LAST        CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE_V6
 	u8	unused_0[3];
 	__le32	encap_data[20];
 };
@@ -5216,7 +5807,7 @@
 	u8	valid;
 };
 
-/* hwrm_cfa_ntuple_filter_alloc_input (size:1024b/128B) */
+/* hwrm_cfa_ntuple_filter_alloc_input (size:1088b/136B) */
 struct hwrm_cfa_ntuple_filter_alloc_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -5227,6 +5818,7 @@
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_LOOPBACK     0x1UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP         0x2UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_METER        0x4UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_FID     0x8UL
 	__le32	enables;
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID         0x1UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE            0x2UL
@@ -5247,6 +5839,7 @@
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_ID               0x10000UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID       0x20000UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR          0x40000UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_RFS_RING_TBL_IDX     0x80000UL
 	__le64	l2_filter_id;
 	u8	src_macaddr[6];
 	__be16	ethertype;
@@ -5263,18 +5856,21 @@
 	__le16	dst_id;
 	__le16	mirror_vnic_id;
 	u8	tunnel_type;
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST     CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL    0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE        0x2UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE        0x3UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP         0x4UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS         0x6UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT          0x7UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE        0x8UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL    0xffUL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST        CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
 	u8	pri_hint;
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE     0x1UL
@@ -5291,6 +5887,8 @@
 	__be16	dst_port;
 	__be16	dst_port_mask;
 	__le64	ntuple_filter_id_hint;
+	__le16	rfs_ring_tbl_idx;
+	u8	unused_0[6];
 };
 
 /* hwrm_cfa_ntuple_filter_alloc_output (size:192b/24B) */
@@ -5301,6 +5899,16 @@
 	__le16	resp_len;
 	__le64	ntuple_filter_id;
 	__le32	flow_id;
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_VALUE_SFT 0
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE      0x40000000UL
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_INT    (0x0UL << 30)
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT    (0x1UL << 30)
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_LAST  CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_TYPE_EXT
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR       0x80000000UL
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_RX      (0x0UL << 31)
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX      (0x1UL << 31)
+	#define CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_LAST   CFA_NTUPLE_FILTER_ALLOC_RESP_FLOW_ID_DIR_TX
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -5345,7 +5953,8 @@
 	#define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_DST_ID                0x1UL
 	#define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID        0x2UL
 	#define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_METER_INSTANCE_ID     0x4UL
-	u8	unused_0[4];
+	__le32	flags;
+	#define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_FID     0x1UL
 	__le64	ntuple_filter_id;
 	__le32	new_dst_id;
 	__le32	new_mirror_vnic_id;
@@ -5394,18 +6003,21 @@
 	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID     0x10000UL
 	__be32	tunnel_id;
 	u8	tunnel_type;
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
-	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST     CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL    0x0UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE        0x2UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE        0x3UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP         0x4UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS         0x6UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT          0x7UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE        0x8UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL    0xffUL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_LAST        CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
 	u8	unused_0;
 	__le16	unused_1;
 	u8	src_macaddr[6];
@@ -5476,32 +6088,40 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	flags;
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL       0x1UL
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_MASK 0x6UL
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_SFT 1
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_NONE  (0x0UL << 1)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE   (0x1UL << 1)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO   (0x2UL << 1)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_LAST CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_MASK 0x38UL
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_SFT 3
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2    (0x0UL << 3)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4  (0x1UL << 3)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6  (0x2UL << 3)
-	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_LAST CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL                 0x1UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_MASK          0x6UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_SFT           1
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_NONE            (0x0UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE             (0x1UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO             (0x2UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_LAST           CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_MASK          0x38UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_SFT           3
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2              (0x0UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4            (0x1UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6            (0x2UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_LAST           CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_PATH_TX                0x40UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_PATH_RX                0x80UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_MATCH_VXLAN_IP_VNI     0x100UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_VHOST_ID_USE_VLAN      0x200UL
 	__le16	src_fid;
 	__le32	tunnel_handle;
 	__le16	action_flags;
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD                   0x1UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_RECYCLE               0x2UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP                  0x4UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_METER                 0x8UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL                0x10UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC               0x20UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST              0x40UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS      0x80UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE     0x100UL
-	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TTL_DECREMENT         0x200UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD                       0x1UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_RECYCLE                   0x2UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP                      0x4UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_METER                     0x8UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL                    0x10UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC                   0x20UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST                  0x40UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS          0x80UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE         0x100UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TTL_DECREMENT             0x200UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL_IP                 0x400UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FLOW_AGING_ENABLED        0x800UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_PRI_HINT                  0x1000UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NO_FLOW_COUNTER_ALLOC     0x2000UL
 	__le16	dst_fid;
 	__be16	l2_rewrite_vlan_tpid;
 	__be16	l2_rewrite_vlan_tci;
@@ -5525,21 +6145,65 @@
 	__be16	nat_port;
 	__be16	l2_rewrite_smac[3];
 	u8	ip_proto;
-	u8	unused_0;
+	u8	tunnel_type;
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL    0x0UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_NVGRE        0x2UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_L2GRE        0x3UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPIP         0x4UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_MPLS         0x6UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_STT          0x7UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPGRE        0x8UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL    0xffUL
+	#define CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_LAST        CFA_FLOW_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL
 };
 
-/* hwrm_cfa_flow_alloc_output (size:128b/16B) */
+/* hwrm_cfa_flow_alloc_output (size:256b/32B) */
 struct hwrm_cfa_flow_alloc_output {
 	__le16	error_code;
 	__le16	req_type;
 	__le16	seq_id;
 	__le16	resp_len;
 	__le16	flow_handle;
-	u8	unused_0[5];
+	u8	unused_0[2];
+	__le32	flow_id;
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_VALUE_MASK 0x3fffffffUL
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_VALUE_SFT 0
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE      0x40000000UL
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_INT    (0x0UL << 30)
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_EXT    (0x1UL << 30)
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_LAST  CFA_FLOW_ALLOC_RESP_FLOW_ID_TYPE_EXT
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR       0x80000000UL
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_RX      (0x0UL << 31)
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_TX      (0x1UL << 31)
+	#define CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_LAST   CFA_FLOW_ALLOC_RESP_FLOW_ID_DIR_TX
+	__le64	ext_flow_handle;
+	__le32	flow_counter_id;
+	u8	unused_1[3];
 	u8	valid;
 };
 
-/* hwrm_cfa_flow_free_input (size:192b/24B) */
+/* hwrm_cfa_flow_alloc_cmd_err (size:64b/8B) */
+struct hwrm_cfa_flow_alloc_cmd_err {
+	u8	code;
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_UNKNOWN         0x0UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_L2_CONTEXT_TCAM 0x1UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_ACTION_RECORD   0x2UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_COUNTER    0x3UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_WILD_CARD_TCAM  0x4UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_HASH_COLLISION  0x5UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_KEY_EXISTS      0x6UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_CTXT_DB    0x7UL
+	#define CFA_FLOW_ALLOC_CMD_ERR_CODE_LAST           CFA_FLOW_ALLOC_CMD_ERR_CODE_FLOW_CTXT_DB
+	u8	unused_0[7];
+};
+
+/* hwrm_cfa_flow_free_input (size:256b/32B) */
 struct hwrm_cfa_flow_free_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -5547,7 +6211,9 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	flow_handle;
-	u8	unused_0[6];
+	__le16	unused_0;
+	__le32	flow_counter_id;
+	__le64	ext_flow_handle;
 };
 
 /* hwrm_cfa_flow_free_output (size:256b/32B) */
@@ -5562,7 +6228,52 @@
 	u8	valid;
 };
 
-/* hwrm_cfa_flow_stats_input (size:320b/40B) */
+/* hwrm_cfa_flow_info_input (size:256b/32B) */
+struct hwrm_cfa_flow_info_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	flow_handle;
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK       0xfffUL
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_SFT        0
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_CNP_CNT        0x1000UL
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV1_CNT     0x2000UL
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_ROCEV2_CNT     0x4000UL
+	#define CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX         0x8000UL
+	u8	unused_0[6];
+	__le64	ext_flow_handle;
+};
+
+/* hwrm_cfa_flow_info_output (size:5632b/704B) */
+struct hwrm_cfa_flow_info_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	flags;
+	#define CFA_FLOW_INFO_RESP_FLAGS_PATH_TX     0x1UL
+	#define CFA_FLOW_INFO_RESP_FLAGS_PATH_RX     0x2UL
+	u8	profile;
+	__le16	src_fid;
+	__le16	dst_fid;
+	__le16	l2_ctxt_id;
+	__le64	em_info;
+	__le64	tcam_info;
+	__le64	vfp_tcam_info;
+	__le16	ar_id;
+	__le16	flow_handle;
+	__le32	tunnel_handle;
+	__le16	flow_timer;
+	u8	unused_0[6];
+	__le32	flow_key_data[130];
+	__le32	flow_action_info[30];
+	u8	unused_1[7];
+	u8	valid;
+};
+
+/* hwrm_cfa_flow_stats_input (size:640b/80B) */
 struct hwrm_cfa_flow_stats_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -5581,6 +6292,16 @@
 	__le16	flow_handle_8;
 	__le16	flow_handle_9;
 	u8	unused_0[2];
+	__le32	flow_id_0;
+	__le32	flow_id_1;
+	__le32	flow_id_2;
+	__le32	flow_id_3;
+	__le32	flow_id_4;
+	__le32	flow_id_5;
+	__le32	flow_id_6;
+	__le32	flow_id_7;
+	__le32	flow_id_8;
+	__le32	flow_id_9;
 };
 
 /* hwrm_cfa_flow_stats_output (size:1408b/176B) */
@@ -5658,6 +6379,177 @@
 	u8	valid;
 };
 
+/* hwrm_cfa_eem_qcaps_input (size:192b/24B) */
+struct hwrm_cfa_eem_qcaps_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define CFA_EEM_QCAPS_REQ_FLAGS_PATH_TX               0x1UL
+	#define CFA_EEM_QCAPS_REQ_FLAGS_PATH_RX               0x2UL
+	#define CFA_EEM_QCAPS_REQ_FLAGS_PREFERRED_OFFLOAD     0x4UL
+	__le32	unused_0;
+};
+
+/* hwrm_cfa_eem_qcaps_output (size:320b/40B) */
+struct hwrm_cfa_eem_qcaps_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le32	flags;
+	#define CFA_EEM_QCAPS_RESP_FLAGS_PATH_TX                                         0x1UL
+	#define CFA_EEM_QCAPS_RESP_FLAGS_PATH_RX                                         0x2UL
+	#define CFA_EEM_QCAPS_RESP_FLAGS_CENTRALIZED_MEMORY_MODEL_SUPPORTED              0x4UL
+	#define CFA_EEM_QCAPS_RESP_FLAGS_DETACHED_CENTRALIZED_MEMORY_MODEL_SUPPORTED     0x8UL
+	__le32	unused_0;
+	__le32	supported;
+	#define CFA_EEM_QCAPS_RESP_SUPPORTED_KEY0_TABLE                       0x1UL
+	#define CFA_EEM_QCAPS_RESP_SUPPORTED_KEY1_TABLE                       0x2UL
+	#define CFA_EEM_QCAPS_RESP_SUPPORTED_EXTERNAL_RECORD_TABLE            0x4UL
+	#define CFA_EEM_QCAPS_RESP_SUPPORTED_EXTERNAL_FLOW_COUNTERS_TABLE     0x8UL
+	#define CFA_EEM_QCAPS_RESP_SUPPORTED_FID_TABLE                        0x10UL
+	__le32	max_entries_supported;
+	__le16	key_entry_size;
+	__le16	record_entry_size;
+	__le16	efc_entry_size;
+	__le16	fid_entry_size;
+	u8	unused_1[7];
+	u8	valid;
+};
+
+/* hwrm_cfa_eem_cfg_input (size:384b/48B) */
+struct hwrm_cfa_eem_cfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define CFA_EEM_CFG_REQ_FLAGS_PATH_TX               0x1UL
+	#define CFA_EEM_CFG_REQ_FLAGS_PATH_RX               0x2UL
+	#define CFA_EEM_CFG_REQ_FLAGS_PREFERRED_OFFLOAD     0x4UL
+	#define CFA_EEM_CFG_REQ_FLAGS_SECONDARY_PF          0x8UL
+	__le16	group_id;
+	__le16	unused_0;
+	__le32	num_entries;
+	__le32	unused_1;
+	__le16	key0_ctx_id;
+	__le16	key1_ctx_id;
+	__le16	record_ctx_id;
+	__le16	efc_ctx_id;
+	__le16	fid_ctx_id;
+	__le16	unused_2;
+	__le32	unused_3;
+};
+
+/* hwrm_cfa_eem_cfg_output (size:128b/16B) */
+struct hwrm_cfa_eem_cfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
+/* hwrm_cfa_eem_qcfg_input (size:192b/24B) */
+struct hwrm_cfa_eem_qcfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define CFA_EEM_QCFG_REQ_FLAGS_PATH_TX     0x1UL
+	#define CFA_EEM_QCFG_REQ_FLAGS_PATH_RX     0x2UL
+	__le32	unused_0;
+};
+
+/* hwrm_cfa_eem_qcfg_output (size:256b/32B) */
+struct hwrm_cfa_eem_qcfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le32	flags;
+	#define CFA_EEM_QCFG_RESP_FLAGS_PATH_TX               0x1UL
+	#define CFA_EEM_QCFG_RESP_FLAGS_PATH_RX               0x2UL
+	#define CFA_EEM_QCFG_RESP_FLAGS_PREFERRED_OFFLOAD     0x4UL
+	__le32	num_entries;
+	__le16	key0_ctx_id;
+	__le16	key1_ctx_id;
+	__le16	record_ctx_id;
+	__le16	efc_ctx_id;
+	__le16	fid_ctx_id;
+	u8	unused_2[5];
+	u8	valid;
+};
+
+/* hwrm_cfa_eem_op_input (size:192b/24B) */
+struct hwrm_cfa_eem_op_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define CFA_EEM_OP_REQ_FLAGS_PATH_TX     0x1UL
+	#define CFA_EEM_OP_REQ_FLAGS_PATH_RX     0x2UL
+	__le16	unused_0;
+	__le16	op;
+	#define CFA_EEM_OP_REQ_OP_RESERVED    0x0UL
+	#define CFA_EEM_OP_REQ_OP_EEM_DISABLE 0x1UL
+	#define CFA_EEM_OP_REQ_OP_EEM_ENABLE  0x2UL
+	#define CFA_EEM_OP_REQ_OP_EEM_CLEANUP 0x3UL
+	#define CFA_EEM_OP_REQ_OP_LAST       CFA_EEM_OP_REQ_OP_EEM_CLEANUP
+};
+
+/* hwrm_cfa_eem_op_output (size:128b/16B) */
+struct hwrm_cfa_eem_op_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
+/* hwrm_cfa_adv_flow_mgnt_qcaps_input (size:256b/32B) */
+struct hwrm_cfa_adv_flow_mgnt_qcaps_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	unused_0[4];
+};
+
+/* hwrm_cfa_adv_flow_mgnt_qcaps_output (size:128b/16B) */
+struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le32	flags;
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED              0x1UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED              0x2UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED           0x4UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED              0x8UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED       0x10UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED                 0x20UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED                 0x40UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED          0x80UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED            0x100UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED               0x200UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED                         0x400UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED     0x800UL
+	u8	unused_0[3];
+	u8	valid;
+};
+
 /* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
 struct hwrm_tunnel_dst_port_query_input {
 	__le16	req_type;
@@ -5666,11 +6558,13 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	u8	tunnel_type;
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN    0x1UL
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE   0x5UL
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_LAST    TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_LAST        TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6
 	u8	unused_0[7];
 };
 
@@ -5694,11 +6588,13 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	u8	tunnel_type;
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN    0x1UL
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE   0x5UL
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_LAST    TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_LAST        TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6
 	u8	unused_0;
 	__be16	tunnel_dst_port_val;
 	u8	unused_1[4];
@@ -5723,11 +6619,13 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	u8	tunnel_type;
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN    0x1UL
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE   0x5UL
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_LAST    TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN        0x1UL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1     0xaUL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE     0xbUL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_LAST        TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6
 	u8	unused_0;
 	__le16	tunnel_dst_port_id;
 	u8	unused_1[4];
@@ -5767,6 +6665,31 @@
 	__le64	tpa_aborts;
 };
 
+/* ctx_hw_stats_ext (size:1344b/168B) */
+struct ctx_hw_stats_ext {
+	__le64	rx_ucast_pkts;
+	__le64	rx_mcast_pkts;
+	__le64	rx_bcast_pkts;
+	__le64	rx_discard_pkts;
+	__le64	rx_drop_pkts;
+	__le64	rx_ucast_bytes;
+	__le64	rx_mcast_bytes;
+	__le64	rx_bcast_bytes;
+	__le64	tx_ucast_pkts;
+	__le64	tx_mcast_pkts;
+	__le64	tx_bcast_pkts;
+	__le64	tx_discard_pkts;
+	__le64	tx_drop_pkts;
+	__le64	tx_ucast_bytes;
+	__le64	tx_mcast_bytes;
+	__le64	tx_bcast_bytes;
+	__le64	rx_tpa_eligible_pkt;
+	__le64	rx_tpa_eligible_bytes;
+	__le64	rx_tpa_pkt;
+	__le64	rx_tpa_bytes;
+	__le64	rx_tpa_errors;
+};
+
 /* hwrm_stat_ctx_alloc_input (size:256b/32B) */
 struct hwrm_stat_ctx_alloc_input {
 	__le16	req_type;
@@ -5778,7 +6701,8 @@
 	__le32	update_period_ms;
 	u8	stat_ctx_flags;
 	#define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE     0x1UL
-	u8	unused_0[3];
+	u8	unused_0;
+	__le16	stats_dma_length;
 };
 
 /* hwrm_stat_ctx_alloc_output (size:128b/16B) */
@@ -5922,22 +6846,26 @@
 	__le16	target_id;
 	__le64	resp_addr;
 	u8	embedded_proc_type;
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT                 0x0UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT                 0x1UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL              0x2UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE                 0x3UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST                 0x4UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP                   0x5UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP                 0x6UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT 0x7UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_LAST                FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT                  0x0UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT                  0x1UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL               0x2UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE                  0x3UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST                  0x4UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP                    0x5UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP                  0x6UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT  0x7UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_IMPACTLESS_ACTIVATION 0x8UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_LAST                 FW_RESET_REQ_EMBEDDED_PROC_TYPE_IMPACTLESS_ACTIVATION
 	u8	selfrst_status;
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE    0x0UL
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP    0x1UL
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL
-	#define FW_RESET_REQ_SELFRST_STATUS_LAST          FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE      0x0UL
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP      0x1UL
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST   0x2UL
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTIMMEDIATE 0x3UL
+	#define FW_RESET_REQ_SELFRST_STATUS_LAST            FW_RESET_REQ_SELFRST_STATUS_SELFRSTIMMEDIATE
 	u8	host_idx;
-	u8	unused_0[5];
+	u8	flags;
+	#define FW_RESET_REQ_FLAGS_RESET_GRACEFUL     0x1UL
+	u8	unused_0[4];
 };
 
 /* hwrm_fw_reset_output (size:128b/16B) */
@@ -5947,10 +6875,11 @@
 	__le16	seq_id;
 	__le16	resp_len;
 	u8	selfrst_status;
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE    0x0UL
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP    0x1UL
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL
-	#define FW_RESET_RESP_SELFRST_STATUS_LAST          FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE      0x0UL
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP      0x1UL
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST   0x2UL
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTIMMEDIATE 0x3UL
+	#define FW_RESET_RESP_SELFRST_STATUS_LAST            FW_RESET_RESP_SELFRST_STATUS_SELFRSTIMMEDIATE
 	u8	unused_0[6];
 	u8	valid;
 };
@@ -5984,7 +6913,8 @@
 	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE    0x0UL
 	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP    0x1UL
 	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL
-	#define FW_QSTATUS_RESP_SELFRST_STATUS_LAST          FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST
+	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPOWER   0x3UL
+	#define FW_QSTATUS_RESP_SELFRST_STATUS_LAST          FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPOWER
 	u8	unused_0[6];
 	u8	valid;
 };
@@ -6007,8 +6937,8 @@
 	u8	unused_0;
 	__le16	millisecond;
 	__le16	zone;
-	#define FW_SET_TIME_REQ_ZONE_UTC     0x0UL
-	#define FW_SET_TIME_REQ_ZONE_UNKNOWN 0xffffUL
+	#define FW_SET_TIME_REQ_ZONE_UTC     0
+	#define FW_SET_TIME_REQ_ZONE_UNKNOWN 65535
 	#define FW_SET_TIME_REQ_ZONE_LAST   FW_SET_TIME_REQ_ZONE_UNKNOWN
 	u8	unused_1[4];
 };
@@ -6033,6 +6963,7 @@
 	#define STRUCT_HDR_STRUCT_ID_DCBX_FEATURE_STATE 0x422UL
 	#define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC       0x424UL
 	#define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE        0x426UL
+	#define STRUCT_HDR_STRUCT_ID_POWER_BKUP         0x427UL
 	#define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE         0x1UL
 	#define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION   0xaUL
 	#define STRUCT_HDR_STRUCT_ID_RSS_V2             0x64UL
@@ -6247,7 +7178,14 @@
 	__le16	seq_id;
 	__le16	resp_len;
 	u8	temp;
-	u8	unused_0[6];
+	u8	phy_temp;
+	u8	om_temp;
+	u8	flags;
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_TEMP_NOT_AVAILABLE         0x1UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_PHY_TEMP_NOT_AVAILABLE     0x2UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_NOT_PRESENT             0x4UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_TEMP_NOT_AVAILABLE      0x8UL
+	u8	unused_0[3];
 	u8	valid;
 };
 
@@ -6398,7 +7336,9 @@
 	u8	version_hi;
 	u8	version_low;
 	u8	seg_flags;
-	u8	unused_0[7];
+	u8	compress_flags;
+	#define SFLAG_COMPRESSED_ZLIB     0x1UL
+	u8	unused_0[6];
 };
 
 /* hwrm_dbg_coredump_list_input (size:256b/32B) */
@@ -6411,7 +7351,9 @@
 	__le64	host_dest_addr;
 	__le32	host_buf_len;
 	__le16	seq_no;
-	u8	unused_0[2];
+	u8	flags;
+	#define DBG_COREDUMP_LIST_REQ_FLAGS_CRASHDUMP     0x1UL
+	u8	unused_0[1];
 };
 
 /* hwrm_dbg_coredump_list_output (size:128b/16B) */
@@ -6498,6 +7440,34 @@
 	u8	valid;
 };
 
+/* hwrm_dbg_ring_info_get_input (size:192b/24B) */
+struct hwrm_dbg_ring_info_get_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	u8	ring_type;
+	#define DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL 0x0UL
+	#define DBG_RING_INFO_GET_REQ_RING_TYPE_TX      0x1UL
+	#define DBG_RING_INFO_GET_REQ_RING_TYPE_RX      0x2UL
+	#define DBG_RING_INFO_GET_REQ_RING_TYPE_LAST   DBG_RING_INFO_GET_REQ_RING_TYPE_RX
+	u8	unused_0[3];
+	__le32	fw_ring_id;
+};
+
+/* hwrm_dbg_ring_info_get_output (size:192b/24B) */
+struct hwrm_dbg_ring_info_get_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le32	producer_index;
+	__le32	consumer_index;
+	u8	unused_0[7];
+	u8	valid;
+};
+
 /* hwrm_nvm_read_input (size:320b/40B) */
 struct hwrm_nvm_read_input {
 	__le16	req_type;
@@ -6711,7 +7681,9 @@
 	__le32	nvram_size;
 	__le32	reserved_size;
 	__le32	available_size;
-	u8	unused_0[3];
+	u8	nvm_cfg_ver_maj;
+	u8	nvm_cfg_ver_min;
+	u8	nvm_cfg_ver_upd;
 	u8	valid;
 };
 
@@ -6891,6 +7863,9 @@
 	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_AES256          (0x2UL << 1)
 	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1_AUTH  (0x3UL << 1)
 	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_LAST           NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1_AUTH
+	#define NVM_SET_VARIABLE_REQ_FLAGS_FLAGS_UNUSED_0_MASK        0x70UL
+	#define NVM_SET_VARIABLE_REQ_FLAGS_FLAGS_UNUSED_0_SFT         4
+	#define NVM_SET_VARIABLE_REQ_FLAGS_FACTORY_DEFAULT            0x80UL
 	u8	unused_0;
 };
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 3962f6f..f6f3454 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c

@@ -25,7 +25,6 @@
 static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
 					  struct bnxt_vf_info *vf, u16 event_id)
 {
-	struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_fwd_async_event_cmpl_input req = {0};
 	struct hwrm_async_event_cmpl *async_cmpl;
 	int rc = 0;
@@ -40,23 +39,10 @@
 	async_cmpl->type = cpu_to_le16(ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
 	async_cmpl->event_id = cpu_to_le16(event_id);
 
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-
-	if (rc) {
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
 		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n",
 			   rc);
-		goto fwd_async_event_cmpl_exit;
-	}
-
-	if (resp->error_code) {
-		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n",
-			   resp->error_code);
-		rc = -1;
-	}
-
-fwd_async_event_cmpl_exit:
-	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
 
@@ -121,6 +107,52 @@
 	return rc;
 }
 
+static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
+{
+	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_func_qcfg_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
+	req.fid = cpu_to_le16(vf->fw_fid);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc) {
+		mutex_unlock(&bp->hwrm_cmd_lock);
+		return rc;
+	}
+	vf->func_qcfg_flags = le16_to_cpu(resp->flags);
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return 0;
+}
+
+static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
+{
+	if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
+		return !!(vf->flags & BNXT_VF_TRUST);
+
+	bnxt_hwrm_func_qcfg_flags(bp, vf);
+	return !!(vf->func_qcfg_flags & FUNC_QCFG_RESP_FLAGS_TRUSTED_VF);
+}
+
+static int bnxt_hwrm_set_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
+{
+	struct hwrm_func_cfg_input req = {0};
+	int rc;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+	req.fid = cpu_to_le16(vf->fw_fid);
+	if (vf->flags & BNXT_VF_TRUST)
+		req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+	else
+		req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return rc;
+}
+
 int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted)
 {
 	struct bnxt *bp = netdev_priv(dev);
@@ -135,6 +167,7 @@
 	else
 		vf->flags &= ~BNXT_VF_TRUST;
 
+	bnxt_hwrm_set_trusted_vf(bp, vf);
 	return 0;
 }
 
@@ -164,7 +197,7 @@
 	else
 		ivi->qos = 0;
 	ivi->spoofchk = !!(vf->flags & BNXT_VF_SPOOFCHK);
-	ivi->trusted = !!(vf->flags & BNXT_VF_TRUST);
+	ivi->trusted = bnxt_is_trusted_vf(bp, vf);
 	if (!(vf->flags & BNXT_VF_LINK_FORCED))
 		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
 	else if (vf->flags & BNXT_VF_LINK_UP)
@@ -437,10 +470,43 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
+/* Caller holds bp->hwrm_cmd_lock mutex lock */
+static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
+{
+	struct hwrm_func_cfg_input req = {0};
+	struct bnxt_vf_info *vf;
+
+	vf = &bp->pf.vf[vf_id];
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+	req.fid = cpu_to_le16(vf->fw_fid);
+	req.flags = cpu_to_le32(vf->func_flags);
+
+	if (is_valid_ether_addr(vf->mac_addr)) {
+		req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+		memcpy(req.dflt_mac_addr, vf->mac_addr, ETH_ALEN);
+	}
+	if (vf->vlan) {
+		req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+		req.dflt_vlan = cpu_to_le16(vf->vlan);
+	}
+	if (vf->max_tx_rate) {
+		req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
+		req.max_bw = cpu_to_le32(vf->max_tx_rate);
+#ifdef HAVE_IFLA_TX_RATE
+		req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
+		req.min_bw = cpu_to_le32(vf->min_tx_rate);
+#endif
+	}
+	if (vf->flags & BNXT_VF_TRUST)
+		req.flags |= cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+
+	_hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+}
+
 /* Only called by PF to reserve resources for VFs, returns actual number of
  * VFs configured, or < 0 on error.
  */
-static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
+static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 {
 	struct hwrm_func_vf_resource_cfg_input req = {0};
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
@@ -448,16 +514,22 @@
 	u16 vf_stat_ctx, vf_vnics, vf_ring_grps;
 	struct bnxt_pf_info *pf = &bp->pf;
 	int i, rc = 0, min = 1;
+	u16 vf_msix = 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1);
 
-	vf_cp_rings = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
-	vf_stat_ctx = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
+		vf_ring_grps = 0;
+	} else {
+		vf_ring_grps = hw_resc->max_hw_ring_grps - bp->rx_nr_rings;
+	}
+	vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp);
+	vf_stat_ctx = bnxt_get_avail_stat_ctxs_for_en(bp);
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings * 2;
 	else
 		vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings;
-	vf_ring_grps = hw_resc->max_hw_ring_grps - bp->rx_nr_rings;
 	vf_tx_rings = hw_resc->max_tx_rings - bp->tx_nr_rings;
 	vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
 	vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
@@ -476,7 +548,8 @@
 		req.min_l2_ctxs = cpu_to_le16(min);
 		req.min_vnics = cpu_to_le16(min);
 		req.min_stat_ctx = cpu_to_le16(min);
-		req.min_hw_ring_grps = cpu_to_le16(min);
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+			req.min_hw_ring_grps = cpu_to_le16(min);
 	} else {
 		vf_cp_rings /= num_vfs;
 		vf_tx_rings /= num_vfs;
@@ -500,16 +573,19 @@
 	req.max_vnics = cpu_to_le16(vf_vnics);
 	req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
 	req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+	if (bp->flags & BNXT_FLAG_CHIP_P5)
+		req.max_msix = cpu_to_le16(vf_msix / num_vfs);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < num_vfs; i++) {
+		if (reset)
+			__bnxt_set_vf_params(bp, i);
+
 		req.vf_id = cpu_to_le16(pf->first_vf_id + i);
 		rc = _hwrm_send_message(bp, &req, sizeof(req),
 					HWRM_CMD_TIMEOUT);
-		if (rc) {
-			rc = -ENOMEM;
+		if (rc)
 			break;
-		}
 		pf->active_vfs = i + 1;
 		pf->vf[i].fw_fid = pf->first_vf_id + i;
 	}
@@ -525,6 +601,8 @@
 		hw_resc->max_rsscos_ctxs -= pf->active_vfs;
 		hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
 		hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			hw_resc->max_irqs -= vf_msix * n;
 
 		rc = pf->active_vfs;
 	}
@@ -539,19 +617,16 @@
 	u32 rc = 0, mtu, i;
 	u16 vf_tx_rings, vf_rx_rings, vf_cp_rings, vf_stat_ctx, vf_vnics;
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	u16 vf_ring_grps, max_stat_ctxs;
 	struct hwrm_func_cfg_input req = {0};
 	struct bnxt_pf_info *pf = &bp->pf;
 	int total_vf_tx_rings = 0;
+	u16 vf_ring_grps;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
 
-	max_stat_ctxs = hw_resc->max_stat_ctxs;
-
 	/* Remaining rings are distributed equally amongs VF's for now */
-	vf_cp_rings = (bnxt_get_max_func_cp_rings_for_en(bp) -
-		       bp->cp_nr_rings) / num_vfs;
-	vf_stat_ctx = (max_stat_ctxs - bp->num_stat_ctxs) / num_vfs;
+	vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp) / num_vfs;
+	vf_stat_ctx = bnxt_get_avail_stat_ctxs_for_en(bp) / num_vfs;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		vf_rx_rings = (hw_resc->max_rx_rings - bp->rx_nr_rings * 2) /
 			      num_vfs;
@@ -607,8 +682,6 @@
 		total_vf_tx_rings += vf_tx_rsvd;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
-	if (rc)
-		rc = -ENOMEM;
 	if (pf->active_vfs) {
 		hw_resc->max_tx_rings -= total_vf_tx_rings;
 		hw_resc->max_rx_rings -= vf_rx_rings * num_vfs;
@@ -622,14 +695,40 @@
 	return rc;
 }
 
-static int bnxt_func_cfg(struct bnxt *bp, int num_vfs)
+static int bnxt_func_cfg(struct bnxt *bp, int num_vfs, bool reset)
 {
 	if (BNXT_NEW_RM(bp))
-		return bnxt_hwrm_func_vf_resc_cfg(bp, num_vfs);
+		return bnxt_hwrm_func_vf_resc_cfg(bp, num_vfs, reset);
 	else
 		return bnxt_hwrm_func_cfg(bp, num_vfs);
 }
 
+int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
+{
+	int rc;
+
+	/* Register buffers for VFs */
+	rc = bnxt_hwrm_func_buf_rgtr(bp);
+	if (rc)
+		return rc;
+
+	/* Reserve resources for VFs */
+	rc = bnxt_func_cfg(bp, *num_vfs, reset);
+	if (rc != *num_vfs) {
+		if (rc <= 0) {
+			netdev_warn(bp->dev, "Unable to reserve resources for SRIOV.\n");
+			*num_vfs = 0;
+			return rc;
+		}
+		netdev_warn(bp->dev, "Only able to reserve resources for %d VFs.\n",
+			    rc);
+		*num_vfs = rc;
+	}
+
+	bnxt_ulp_sriov_cfg(bp, *num_vfs);
+	return 0;
+}
+
 static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs)
 {
 	int rc = 0, vfs_supported;
@@ -644,8 +743,8 @@
 	 */
 	vfs_supported = *num_vfs;
 
-	avail_cp = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
-	avail_stat = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
+	avail_cp = bnxt_get_avail_cp_rings_for_en(bp);
+	avail_stat = bnxt_get_avail_stat_ctxs_for_en(bp);
 	avail_cp = min_t(int, avail_cp, avail_stat);
 
 	while (vfs_supported) {
@@ -695,25 +794,10 @@
 	if (rc)
 		goto err_out1;
 
-	/* Reserve resources for VFs */
-	rc = bnxt_func_cfg(bp, *num_vfs);
-	if (rc != *num_vfs) {
-		if (rc <= 0) {
-			netdev_warn(bp->dev, "Unable to reserve resources for SRIOV.\n");
-			*num_vfs = 0;
-			goto err_out2;
-		}
-		netdev_warn(bp->dev, "Only able to reserve resources for %d VFs.\n", rc);
-		*num_vfs = rc;
-	}
-
-	/* Register buffers for VFs */
-	rc = bnxt_hwrm_func_buf_rgtr(bp);
+	rc = bnxt_cfg_hw_sriov(bp, num_vfs, false);
 	if (rc)
 		goto err_out2;
 
-	bnxt_ulp_sriov_cfg(bp, *num_vfs);
-
 	rc = pci_enable_sriov(bp->pdev, *num_vfs);
 	if (rc)
 		goto err_out2;
@@ -780,6 +864,11 @@
 		rtnl_unlock();
 		return 0;
 	}
+	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+		netdev_warn(dev, "Reject SRIOV config request when FW reset is in progress\n");
+		rtnl_unlock();
+		return 0;
+	}
 	bp->sriov_cfg = true;
 	rtnl_unlock();
 
@@ -813,7 +902,6 @@
 {
 	int rc = 0;
 	struct hwrm_fwd_resp_input req = {0};
-	struct hwrm_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
 		return -EINVAL;
@@ -828,22 +916,9 @@
 	req.encap_resp_cmpl_ring = encap_resp_cpr;
 	memcpy(req.encap_resp, encap_resp, msg_size);
 
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-
-	if (rc) {
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
 		netdev_err(bp->dev, "hwrm_fwd_resp failed. rc:%d\n", rc);
-		goto fwd_resp_exit;
-	}
-
-	if (resp->error_code) {
-		netdev_err(bp->dev, "hwrm_fwd_resp error %d\n",
-			   resp->error_code);
-		rc = -1;
-	}
-
-fwd_resp_exit:
-	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
 
@@ -852,7 +927,6 @@
 {
 	int rc = 0;
 	struct hwrm_reject_fwd_resp_input req = {0};
-	struct hwrm_reject_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
 		return -EINVAL;
@@ -863,22 +937,9 @@
 	req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
 	memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
 
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-
-	if (rc) {
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
 		netdev_err(bp->dev, "hwrm_fwd_err_resp failed. rc:%d\n", rc);
-		goto fwd_err_resp_exit;
-	}
-
-	if (resp->error_code) {
-		netdev_err(bp->dev, "hwrm_fwd_err_resp error %d\n",
-			   resp->error_code);
-		rc = -1;
-	}
-
-fwd_err_resp_exit:
-	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
 
@@ -887,7 +948,6 @@
 {
 	int rc = 0;
 	struct hwrm_exec_fwd_resp_input req = {0};
-	struct hwrm_exec_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
 		return -EINVAL;
@@ -898,22 +958,9 @@
 	req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
 	memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
 
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-
-	if (rc) {
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
 		netdev_err(bp->dev, "hwrm_exec_fw_resp failed. rc:%d\n", rc);
-		goto exec_fwd_resp_exit;
-	}
-
-	if (resp->error_code) {
-		netdev_err(bp->dev, "hwrm_exec_fw_resp error %d\n",
-			   resp->error_code);
-		rc = -1;
-	}
-
-exec_fwd_resp_exit:
-	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
 
@@ -927,9 +974,10 @@
 	 * if the PF assigned MAC address is zero
 	 */
 	if (req->enables & cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR)) {
+		bool trust = bnxt_is_trusted_vf(bp, vf);
+
 		if (is_valid_ether_addr(req->dflt_mac_addr) &&
-		    ((vf->flags & BNXT_VF_TRUST) ||
-		     !is_valid_ether_addr(vf->mac_addr) ||
+		    (trust || !is_valid_ether_addr(vf->mac_addr) ||
 		     ether_addr_equal(req->dflt_mac_addr, vf->mac_addr))) {
 			ether_addr_copy(vf->vf_mac_addr, req->dflt_mac_addr);
 			return bnxt_hwrm_exec_fwd_resp(bp, vf, msg_size);
@@ -954,7 +1002,7 @@
 	 * Otherwise, it must match the VF MAC address if firmware spec >=
 	 * 1.2.2
 	 */
-	if (vf->flags & BNXT_VF_TRUST) {
+	if (bnxt_is_trusted_vf(bp, vf)) {
 		mac_ok = true;
 	} else if (is_valid_ether_addr(vf->mac_addr)) {
 		if (ether_addr_equal((const u8 *)req->l2_addr, vf->mac_addr))
@@ -1132,6 +1180,13 @@
 }
 #else
 
+int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
+{
+	if (*num_vfs)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
 void bnxt_sriov_disable(struct bnxt *bp)
 {
 }

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 2eed9ed..629641b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h

@@ -36,6 +36,7 @@
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
 int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust);
 int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset);
 void bnxt_sriov_disable(struct bnxt *);
 void bnxt_hwrm_exec_fwd_req(struct bnxt *);
 void bnxt_update_vf_mac(struct bnxt *);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index e1594c9..c8062d0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c

@@ -45,7 +45,7 @@
 	struct bnxt *bp;
 
 	/* check if dev belongs to the same switch */
-	if (!switchdev_port_same_parent_id(pf_bp->dev, dev)) {
+	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
 		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
 			    dev->ifindex);
 		return BNXT_FID_INVALID;
@@ -61,9 +61,9 @@
 
 static int bnxt_tc_parse_redir(struct bnxt *bp,
 			       struct bnxt_tc_actions *actions,
-			       const struct tc_action *tc_act)
+			       const struct flow_action_entry *act)
 {
-	struct net_device *dev = tcf_mirred_dev(tc_act);
+	struct net_device *dev = act->dev;
 
 	if (!dev) {
 		netdev_info(bp->dev, "no dev in mirred action");
@@ -77,16 +77,16 @@
 
 static int bnxt_tc_parse_vlan(struct bnxt *bp,
 			      struct bnxt_tc_actions *actions,
-			      const struct tc_action *tc_act)
+			      const struct flow_action_entry *act)
 {
-	switch (tcf_vlan_action(tc_act)) {
-	case TCA_VLAN_ACT_POP:
+	switch (act->id) {
+	case FLOW_ACTION_VLAN_POP:
 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
 		break;
-	case TCA_VLAN_ACT_PUSH:
+	case FLOW_ACTION_VLAN_PUSH:
 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
-		actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act));
-		actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act);
+		actions->push_vlan_tci = htons(act->vlan.vid);
+		actions->push_vlan_tpid = act->vlan.proto;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -96,10 +96,10 @@
 
 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
 				    struct bnxt_tc_actions *actions,
-				    const struct tc_action *tc_act)
+				    const struct flow_action_entry *act)
 {
-	struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act);
-	struct ip_tunnel_key *tun_key = &tun_info->key;
+	const struct ip_tunnel_info *tun_info = act->tunnel;
+	const struct ip_tunnel_key *tun_key = &tun_info->key;
 
 	if (ip_tunnel_info_af(tun_info) != AF_INET) {
 		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
@@ -113,51 +113,43 @@
 
 static int bnxt_tc_parse_actions(struct bnxt *bp,
 				 struct bnxt_tc_actions *actions,
-				 struct tcf_exts *tc_exts)
+				 struct flow_action *flow_action)
 {
-	const struct tc_action *tc_act;
+	struct flow_action_entry *act;
 	int i, rc;
 
-	if (!tcf_exts_has_actions(tc_exts)) {
+	if (!flow_action_has_entries(flow_action)) {
 		netdev_info(bp->dev, "no actions");
 		return -EINVAL;
 	}
 
-	tcf_exts_for_each_action(i, tc_act, tc_exts) {
-		/* Drop action */
-		if (is_tcf_gact_shot(tc_act)) {
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
 			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
 			return 0; /* don't bother with other actions */
-		}
-
-		/* Redirect action */
-		if (is_tcf_mirred_egress_redirect(tc_act)) {
-			rc = bnxt_tc_parse_redir(bp, actions, tc_act);
+		case FLOW_ACTION_REDIRECT:
+			rc = bnxt_tc_parse_redir(bp, actions, act);
 			if (rc)
 				return rc;
-			continue;
-		}
-
-		/* Push/pop VLAN */
-		if (is_tcf_vlan(tc_act)) {
-			rc = bnxt_tc_parse_vlan(bp, actions, tc_act);
+			break;
+		case FLOW_ACTION_VLAN_POP:
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_MANGLE:
+			rc = bnxt_tc_parse_vlan(bp, actions, act);
 			if (rc)
 				return rc;
-			continue;
-		}
-
-		/* Tunnel encap */
-		if (is_tcf_tunnel_set(tc_act)) {
-			rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act);
+			break;
+		case FLOW_ACTION_TUNNEL_ENCAP:
+			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
 			if (rc)
 				return rc;
-			continue;
-		}
-
-		/* Tunnel decap */
-		if (is_tcf_tunnel_release(tc_act)) {
+			break;
+		case FLOW_ACTION_TUNNEL_DECAP:
 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
-			continue;
+			break;
+		default:
+			break;
 		}
 	}
 
@@ -177,19 +169,12 @@
 	return 0;
 }
 
-#define GET_KEY(flow_cmd, key_type)					\
-		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
-					  (flow_cmd)->key)
-#define GET_MASK(flow_cmd, key_type)					\
-		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
-					  (flow_cmd)->mask)
-
 static int bnxt_tc_parse_flow(struct bnxt *bp,
-			      struct tc_cls_flower_offload *tc_flow_cmd,
+			      struct flow_cls_offload *tc_flow_cmd,
 			      struct bnxt_tc_flow *flow)
 {
-	struct flow_dissector *dissector = tc_flow_cmd->dissector;
-	u16 addr_type = 0;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
+	struct flow_dissector *dissector = rule->match.dissector;
 
 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
 	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
@@ -199,174 +184,141 @@
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_CONTROL);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		addr_type = key->addr_type;
-	}
+		flow_rule_match_basic(rule, &match);
+		flow->l2_key.ether_type = match.key->n_proto;
+		flow->l2_mask.ether_type = match.mask->n_proto;
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
-		struct flow_dissector_key_basic *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
-
-		flow->l2_key.ether_type = key->n_proto;
-		flow->l2_mask.ether_type = mask->n_proto;
-
-		if (key->n_proto == htons(ETH_P_IP) ||
-		    key->n_proto == htons(ETH_P_IPV6)) {
-			flow->l4_key.ip_proto = key->ip_proto;
-			flow->l4_mask.ip_proto = mask->ip_proto;
+		if (match.key->n_proto == htons(ETH_P_IP) ||
+		    match.key->n_proto == htons(ETH_P_IPV6)) {
+			flow->l4_key.ip_proto = match.key->ip_proto;
+			flow->l4_mask.ip_proto = match.mask->ip_proto;
 		}
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
-		struct flow_dissector_key_eth_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
-		ether_addr_copy(flow->l2_key.dmac, key->dst);
-		ether_addr_copy(flow->l2_mask.dmac, mask->dst);
-		ether_addr_copy(flow->l2_key.smac, key->src);
-		ether_addr_copy(flow->l2_mask.smac, mask->src);
+		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
+		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
+		ether_addr_copy(flow->l2_key.smac, match.key->src);
+		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
-		struct flow_dissector_key_vlan *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
+		flow_rule_match_vlan(rule, &match);
 		flow->l2_key.inner_vlan_tci =
-		   cpu_to_be16(VLAN_TCI(key->vlan_id, key->vlan_priority));
+			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
+					     match.key->vlan_priority));
 		flow->l2_mask.inner_vlan_tci =
-		   cpu_to_be16((VLAN_TCI(mask->vlan_id, mask->vlan_priority)));
+			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
+					      match.mask->vlan_priority)));
 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
 		flow->l2_key.num_vlans = 1;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
+		flow_rule_match_ipv4_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
-		flow->l3_key.ipv4.daddr.s_addr = key->dst;
-		flow->l3_mask.ipv4.daddr.s_addr = mask->dst;
-		flow->l3_key.ipv4.saddr.s_addr = key->src;
-		flow->l3_mask.ipv4.saddr.s_addr = mask->src;
-	} else if (dissector_uses_key(dissector,
-				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
+		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
+		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
+		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_ipv6_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
-		flow->l3_key.ipv6.daddr = key->dst;
-		flow->l3_mask.ipv6.daddr = mask->dst;
-		flow->l3_key.ipv6.saddr = key->src;
-		flow->l3_mask.ipv6.saddr = mask->src;
+		flow->l3_key.ipv6.daddr = match.key->dst;
+		flow->l3_mask.ipv6.daddr = match.mask->dst;
+		flow->l3_key.ipv6.saddr = match.key->src;
+		flow->l3_mask.ipv6.saddr = match.mask->src;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
-		struct flow_dissector_key_ports *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
+		flow_rule_match_ports(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
-		flow->l4_key.ports.dport = key->dst;
-		flow->l4_mask.ports.dport = mask->dst;
-		flow->l4_key.ports.sport = key->src;
-		flow->l4_mask.ports.sport = mask->src;
+		flow->l4_key.ports.dport = match.key->dst;
+		flow->l4_mask.ports.dport = match.mask->dst;
+		flow->l4_key.ports.sport = match.key->src;
+		flow->l4_mask.ports.sport = match.mask->src;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ICMP)) {
-		struct flow_dissector_key_icmp *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
-		struct flow_dissector_key_icmp *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
 
+		flow_rule_match_icmp(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
-		flow->l4_key.icmp.type = key->type;
-		flow->l4_key.icmp.code = key->code;
-		flow->l4_mask.icmp.type = mask->type;
-		flow->l4_mask.icmp.code = mask->code;
+		flow->l4_key.icmp.type = match.key->type;
+		flow->l4_key.icmp.code = match.key->code;
+		flow->l4_mask.icmp.type = match.mask->type;
+		flow->l4_mask.icmp.code = match.mask->code;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_CONTROL);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
-		addr_type = key->addr_type;
-	}
-
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
-		struct flow_dissector_key_ipv4_addrs *mask =
-				GET_MASK(tc_flow_cmd,
-					 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
-
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
-		flow->tun_key.u.ipv4.dst = key->dst;
-		flow->tun_mask.u.ipv4.dst = mask->dst;
-		flow->tun_key.u.ipv4.src = key->src;
-		flow->tun_mask.u.ipv4.src = mask->src;
-	} else if (dissector_uses_key(dissector,
+		flow->tun_key.u.ipv4.dst = match.key->dst;
+		flow->tun_mask.u.ipv4.dst = match.mask->dst;
+		flow->tun_key.u.ipv4.src = match.key->src;
+		flow->tun_mask.u.ipv4.src = match.mask->src;
+	} else if (flow_rule_match_key(rule,
 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
-		struct flow_dissector_key_keyid *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
+		flow_rule_match_enc_keyid(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
-		flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid);
-		flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid);
+		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
+		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
-		struct flow_dissector_key_ports *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		struct flow_match_ports match;
 
+		flow_rule_match_enc_ports(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
-		flow->tun_key.tp_dst = key->dst;
-		flow->tun_mask.tp_dst = mask->dst;
-		flow->tun_key.tp_src = key->src;
-		flow->tun_mask.tp_src = mask->src;
+		flow->tun_key.tp_dst = match.key->dst;
+		flow->tun_mask.tp_dst = match.mask->dst;
+		flow->tun_key.tp_src = match.key->src;
+		flow->tun_mask.tp_src = match.mask->src;
 	}
 
-	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
+	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action);
 }
 
-static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
+static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
+				   struct bnxt_tc_flow_node *flow_node)
 {
 	struct hwrm_cfa_flow_free_input req = { 0 };
 	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
-	req.flow_handle = flow_handle;
+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+		req.ext_flow_handle = flow_node->ext_flow_handle;
+	else
+		req.flow_handle = flow_node->flow_handle;
 
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
-		netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
-			    __func__, flow_handle, rc);
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -433,13 +385,14 @@
 
 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				    __le16 ref_flow_handle,
-				    __le32 tunnel_handle, __le16 *flow_handle)
+				    __le32 tunnel_handle,
+				    struct bnxt_tc_flow_node *flow_node)
 {
-	struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 	struct bnxt_tc_actions *actions = &flow->actions;
 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
 	struct hwrm_cfa_flow_alloc_input req = { 0 };
+	struct hwrm_cfa_flow_alloc_output *resp;
 	u16 flow_flags = 0, action_flags = 0;
 	int rc;
 
@@ -542,14 +495,24 @@
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
-		*flow_handle = resp->flow_handle;
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
+		/* CFA_FLOW_ALLOC response interpretation:
+		 *		    fw with	     fw with
+		 *		    16-bit	     64-bit
+		 *		    flow handle      flow handle
+		 *		    ===========	     ===========
+		 * flow_handle      flow handle      flow context id
+		 * ext_flow_handle  INVALID	     flow handle
+		 * flow_id	    INVALID	     flow counter id
+		 */
+		flow_node->flow_handle = resp->flow_handle;
+		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
+			flow_node->ext_flow_handle = resp->ext_flow_handle;
+			flow_node->flow_id = resp->flow_id;
+		}
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
-
-	if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR)
-		rc = -ENOSPC;
-	else if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -559,9 +522,8 @@
 				       __le32 ref_decap_handle,
 				       __le32 *decap_filter_handle)
 {
-	struct hwrm_cfa_decap_filter_alloc_output *resp =
-						bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
+	struct hwrm_cfa_decap_filter_alloc_output *resp;
 	struct ip_tunnel_key *tun_key = &flow->tun_key;
 	u32 enables = 0;
 	int rc;
@@ -614,14 +576,14 @@
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		*decap_filter_handle = resp->decap_filter_id;
-	else
+	} else {
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -638,8 +600,6 @@
 	if (rc)
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -648,9 +608,8 @@
 				       struct bnxt_tc_l2_key *l2_info,
 				       __le32 *encap_record_handle)
 {
-	struct hwrm_cfa_encap_record_alloc_output *resp =
-						bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
+	struct hwrm_cfa_encap_record_alloc_output *resp;
 	struct hwrm_cfa_encap_data_vxlan *encap =
 			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
@@ -682,14 +641,14 @@
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		*encap_record_handle = resp->encap_record_id;
-	else
+	} else {
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -706,8 +665,6 @@
 	if (rc)
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -1239,7 +1196,7 @@
 	int rc;
 
 	/* send HWRM cmd to free the flow-id */
-	bnxt_hwrm_cfa_flow_free(bp, flow_node->flow_handle);
+	bnxt_hwrm_cfa_flow_free(bp, flow_node);
 
 	mutex_lock(&tc_info->lock);
 
@@ -1261,6 +1218,12 @@
 	return 0;
 }
 
+static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				 u16 src_fid)
+{
+	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
+}
+
 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				u16 src_fid)
 {
@@ -1284,7 +1247,7 @@
  * The hash-tables are already protected by the rhashtable API.
  */
 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
-			    struct tc_cls_flower_offload *tc_flow_cmd)
+			    struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_flow_node *new_node, *old_node;
 	struct bnxt_tc_info *tc_info = bp->tc_info;
@@ -1307,9 +1270,10 @@
 		goto free_node;
 
 	bnxt_tc_set_src_fid(bp, flow, src_fid);
+	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
 
 	if (!bnxt_tc_can_offload(bp, flow)) {
-		rc = -ENOSPC;
+		rc = -EOPNOTSUPP;
 		goto free_node;
 	}
 
@@ -1335,7 +1299,7 @@
 
 	/* send HWRM cmd to alloc the flow */
 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
-				      tunnel_handle, &new_node->flow_handle);
+				      tunnel_handle, new_node);
 	if (rc)
 		goto put_tunnel;
 
@@ -1351,7 +1315,7 @@
 	return 0;
 
 hwrm_flow_free:
-	bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle);
+	bnxt_hwrm_cfa_flow_free(bp, new_node);
 put_tunnel:
 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
 put_l2:
@@ -1367,7 +1331,7 @@
 }
 
 static int bnxt_tc_del_flow(struct bnxt *bp,
-			    struct tc_cls_flower_offload *tc_flow_cmd)
+			    struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow_node *flow_node;
@@ -1382,7 +1346,7 @@
 }
 
 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
-				  struct tc_cls_flower_offload *tc_flow_cmd)
+				  struct flow_cls_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
 	struct bnxt_tc_info *tc_info = bp->tc_info;
@@ -1407,18 +1371,45 @@
 	lastused = flow->lastused;
 	spin_unlock(&flow->stats_lock);
 
-	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets,
-			      lastused);
+	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets,
+			  lastused);
 	return 0;
 }
 
+static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
+				    struct bnxt_tc_flow_node *flow_node,
+				    __le16 *flow_handle, __le32 *flow_id)
+{
+	u16 handle;
+
+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
+		*flow_id = flow_node->flow_id;
+
+		/* If flow_id is used to fetch flow stats then:
+		 * 1. lower 12 bits of flow_handle must be set to all 1s.
+		 * 2. 15th bit of flow_handle must specify the flow
+		 *    direction (TX/RX).
+		 */
+		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
+			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
+				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
+		else
+			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
+
+		*flow_handle = cpu_to_le16(handle);
+	} else {
+		*flow_handle = flow_node->flow_handle;
+	}
+}
+
 static int
 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
 			     struct bnxt_tc_stats_batch stats_batch[])
 {
-	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_flow_stats_input req = { 0 };
+	struct hwrm_cfa_flow_stats_output *resp;
 	__le16 *req_flow_handles = &req.flow_handle_0;
+	__le32 *req_flow_ids = &req.flow_id_0;
 	int rc, i;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
@@ -1426,14 +1417,19 @@
 	for (i = 0; i < num_flows; i++) {
 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
 
-		req_flow_handles[i] = flow_node->flow_handle;
+		bnxt_fill_cfa_stats_req(bp, flow_node,
+					&req_flow_handles[i], &req_flow_ids[i]);
 	}
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
-		__le64 *resp_packets = &resp->packet_0;
-		__le64 *resp_bytes = &resp->byte_0;
+		__le64 *resp_packets;
+		__le64 *resp_bytes;
+
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
+		resp_packets = &resp->packet_0;
+		resp_bytes = &resp->byte_0;
 
 		for (i = 0; i < num_flows; i++) {
 			stats_batch[i].hw_stats.packets =
@@ -1446,8 +1442,6 @@
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 
-	if (rc)
-		rc = -EIO;
 	return rc;
 }
 
@@ -1572,14 +1566,14 @@
 }
 
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-			 struct tc_cls_flower_offload *cls_flower)
+			 struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return bnxt_tc_del_flow(bp, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return bnxt_tc_get_flow_stats(bp, cls_flower);
 	default:
 		return -EOPNOTSUPP;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
index 97e09a8..4f05305 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h

@@ -23,6 +23,9 @@
 	__be16		inner_vlan_tci;
 	__be16		ether_type;
 	u8		num_vlans;
+	u8		dir;
+#define BNXT_DIR_RX	1
+#define BNXT_DIR_TX	0
 };
 
 struct bnxt_tc_l3_key {
@@ -170,7 +173,9 @@
 
 	struct bnxt_tc_flow		flow;
 
+	__le64				ext_flow_handle;
 	__le16				flow_handle;
+	__le32				flow_id;
 
 	/* L2 node in l2 hashtable that shares flow's l2 key */
 	struct bnxt_tc_l2_node		*l2_node;
@@ -191,7 +196,7 @@
 };
 
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-			 struct tc_cls_flower_offload *cls_flower);
+			 struct flow_cls_offload *cls_flower);
 int bnxt_init_tc(struct bnxt *bp);
 void bnxt_shutdown_tc(struct bnxt *bp);
 void bnxt_tc_flow_stats_work(struct bnxt *bp);
@@ -204,7 +209,7 @@
 #else /* CONFIG_BNXT_FLOWER_OFFLOAD */
 
 static inline int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
-				       struct tc_cls_flower_offload *cls_flower)
+				       struct flow_cls_offload *cls_flower)
 {
 	return -EOPNOTSUPP;
 }

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index beee612..b2c1609 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c

@@ -45,10 +45,8 @@
 
 		max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
 		if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS ||
-		    bp->num_stat_ctxs == max_stat_ctxs)
+		    bp->cp_nr_rings == max_stat_ctxs)
 			return -ENOMEM;
-		bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs -
-					    BNXT_MIN_ROCE_STAT_CTXS);
 	}
 
 	atomic_set(&ulp->ref_count, 0);
@@ -79,14 +77,9 @@
 		netdev_err(bp->dev, "ulp id %d not registered\n", ulp_id);
 		return -EINVAL;
 	}
-	if (ulp_id == BNXT_ROCE_ULP) {
-		unsigned int max_stat_ctxs;
+	if (ulp_id == BNXT_ROCE_ULP && ulp->msix_requested)
+		edev->en_ops->bnxt_free_msix(edev, ulp_id);
 
-		max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
-		bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs + 1);
-		if (ulp->msix_requested)
-			edev->en_ops->bnxt_free_msix(edev, ulp_id);
-	}
 	if (ulp->max_async_event_id)
 		bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
 
@@ -154,7 +147,7 @@
 			bnxt_close_nic(bp, true, false);
 			rc = bnxt_open_nic(bp, true, false);
 		} else {
-			rc = bnxt_reserve_rings(bp);
+			rc = bnxt_reserve_rings(bp, true);
 		}
 	}
 	if (rc) {
@@ -164,8 +157,10 @@
 
 	if (BNXT_NEW_RM(bp)) {
 		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+		int resv_msix;
 
-		avail_msix = hw_resc->resv_cp_rings - bp->cp_nr_rings;
+		resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
+		avail_msix = min_t(int, resv_msix, avail_msix);
 		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	}
 	bnxt_fill_msix_vecs(bp, ent);
@@ -215,6 +210,14 @@
 	return 0;
 }
 
+int bnxt_get_ulp_stat_ctxs(struct bnxt *bp)
+{
+	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
+		return BNXT_MIN_ROCE_STAT_CTXS;
+
+	return 0;
+}
+
 static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
 			 struct bnxt_fw_msg *fw_msg)
 {
@@ -223,6 +226,9 @@
 	struct input *req;
 	int rc;
 
+	if (ulp_id != BNXT_ROCE_ULP && bp->fw_reset_state)
+		return -EBUSY;
+
 	mutex_lock(&bp->hwrm_cmd_lock);
 	req = fw_msg->msg;
 	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index d9bea37..cd78453 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h

@@ -90,6 +90,7 @@
 
 int bnxt_get_ulp_msix_num(struct bnxt *bp);
 int bnxt_get_ulp_msix_base(struct bnxt *bp);
+int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
 void bnxt_ulp_stop(struct bnxt *bp);
 void bnxt_ulp_start(struct bnxt *bp);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index e31f5d8..f9bf7d7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c

@@ -161,34 +161,19 @@
 	}
 }
 
-static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
-				      struct tc_block_offload *f)
-{
-	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     bnxt_vf_rep_setup_tc_block_cb,
-					     vf_rep, vf_rep, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					bnxt_vf_rep_setup_tc_block_cb, vf_rep);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(bnxt_vf_block_cb_list);
 
 static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 				void *type_data)
 {
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return bnxt_vf_rep_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &bnxt_vf_block_cb_list,
+						  bnxt_vf_rep_setup_tc_block_cb,
+						  vf_rep, vf_rep, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -209,9 +194,7 @@
 void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
 {
 	struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev);
-	struct bnxt_vf_rep_stats *rx_stats;
 
-	rx_stats = &vf_rep->rx_stats;
 	vf_rep->rx_stats.bytes += skb->len;
 	vf_rep->rx_stats.packets++;
 
@@ -239,21 +222,17 @@
 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
 }
 
-static int bnxt_vf_rep_port_attr_get(struct net_device *dev,
-				     struct switchdev_attr *attr)
+static int bnxt_vf_rep_get_port_parent_id(struct net_device *dev,
+					  struct netdev_phys_item_id *ppid)
 {
 	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
 
 	/* as only PORT_PARENT_ID is supported currently use common code
 	 * between PF and VF-rep for now.
 	 */
-	return bnxt_port_attr_get(vf_rep->bp, attr);
+	return bnxt_get_port_parent_id(vf_rep->bp->dev, ppid);
 }
 
-static const struct switchdev_ops bnxt_vf_rep_switchdev_ops = {
-	.switchdev_port_attr_get	= bnxt_vf_rep_port_attr_get
-};
-
 static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = {
 	.get_drvinfo		= bnxt_vf_rep_get_drvinfo
 };
@@ -264,6 +243,7 @@
 	.ndo_start_xmit		= bnxt_vf_rep_xmit,
 	.ndo_get_stats64	= bnxt_vf_rep_get_stats64,
 	.ndo_setup_tc		= bnxt_vf_rep_setup_tc,
+	.ndo_get_port_parent_id	= bnxt_vf_rep_get_port_parent_id,
 	.ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name
 };
 
@@ -394,7 +374,6 @@
 
 	dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
 	dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
-	SWITCHDEV_SET_OPS(dev, &bnxt_vf_rep_switchdev_ops);
 	/* Just inherit all the featues of the parent PF as the VF-R
 	 * uses the RX/TX rings of the parent PF
 	 */
@@ -412,26 +391,6 @@
 	dev->min_mtu = ETH_ZLEN;
 }
 
-static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
-{
-	struct pci_dev *pdev = bp->pdev;
-	int pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
-	u32 dw;
-
-	if (!pos) {
-		netdev_info(bp->dev, "Unable do read adapter's DSN");
-		return -EOPNOTSUPP;
-	}
-
-	/* DSN (two dw) is at an offset of 4 from the cap pos */
-	pos += 4;
-	pci_read_config_dword(pdev, pos, &dw);
-	put_unaligned_le32(dw, &dsn[0]);
-	pci_read_config_dword(pdev, pos + 4, &dw);
-	put_unaligned_le32(dw, &dsn[4]);
-	return 0;
-}
-
 static int bnxt_vf_reps_create(struct bnxt *bp)
 {
 	u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev);
@@ -496,11 +455,6 @@
 		}
 	}
 
-	/* Read the adapter's DSN to use as the eswitch switch_id */
-	rc = bnxt_pcie_dsn_get(bp, bp->switch_id);
-	if (rc)
-		goto err;
-
 	/* publish cfa_code_map only after all VF-reps have been initialized */
 	bp->cfa_code_map = cfa_code_map;
 	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
@@ -523,7 +477,8 @@
 	return 0;
 }
 
-int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
+			     struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
 	int rc = 0;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index 38b9a75..d728765 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h

@@ -30,7 +30,8 @@
 
 bool bnxt_dev_is_vf_rep(struct net_device *dev);
 int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode);
-int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
+			     struct netlink_ext_ack *extack);
 
 #else
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 0584d07..c6f6f20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c

@@ -15,12 +15,14 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/filter.h>
+#include <net/page_pool.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_xdp.h"
 
-void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-		   dma_addr_t mapping, u32 len, u16 rx_prod)
+struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+				   struct bnxt_tx_ring_info *txr,
+				   dma_addr_t mapping, u32 len)
 {
 	struct bnxt_sw_tx_bd *tx_buf;
 	struct tx_bd *txbd;
@@ -29,7 +31,6 @@
 
 	prod = txr->tx_prod;
 	tx_buf = &txr->tx_buf_ring[prod];
-	tx_buf->rx_prod = rx_prod;
 
 	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
 	flags = (len << TX_BD_LEN_SHIFT) | (1 << TX_BD_FLAGS_BD_CNT_SHIFT) |
@@ -40,30 +41,67 @@
 
 	prod = NEXT_TX(prod);
 	txr->tx_prod = prod;
+	return tx_buf;
+}
+
+static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			    dma_addr_t mapping, u32 len, u16 rx_prod)
+{
+	struct bnxt_sw_tx_bd *tx_buf;
+
+	tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+	tx_buf->rx_prod = rx_prod;
+	tx_buf->action = XDP_TX;
+}
+
+static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
+				     struct bnxt_tx_ring_info *txr,
+				     dma_addr_t mapping, u32 len,
+				     struct xdp_frame *xdpf)
+{
+	struct bnxt_sw_tx_bd *tx_buf;
+
+	tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+	tx_buf->action = XDP_REDIRECT;
+	tx_buf->xdpf = xdpf;
+	dma_unmap_addr_set(tx_buf, mapping, mapping);
+	dma_unmap_len_set(tx_buf, len, 0);
 }
 
 void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 {
 	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	bool rx_doorbell_needed = false;
 	struct bnxt_sw_tx_bd *tx_buf;
 	u16 tx_cons = txr->tx_cons;
 	u16 last_tx_cons = tx_cons;
-	u16 rx_prod;
 	int i;
 
 	for (i = 0; i < nr_pkts; i++) {
-		last_tx_cons = tx_cons;
+		tx_buf = &txr->tx_buf_ring[tx_cons];
+
+		if (tx_buf->action == XDP_REDIRECT) {
+			struct pci_dev *pdev = bp->pdev;
+
+			dma_unmap_single(&pdev->dev,
+					 dma_unmap_addr(tx_buf, mapping),
+					 dma_unmap_len(tx_buf, len),
+					 PCI_DMA_TODEVICE);
+			xdp_return_frame(tx_buf->xdpf);
+			tx_buf->action = 0;
+			tx_buf->xdpf = NULL;
+		} else if (tx_buf->action == XDP_TX) {
+			rx_doorbell_needed = true;
+			last_tx_cons = tx_cons;
+		}
 		tx_cons = NEXT_TX(tx_cons);
 	}
 	txr->tx_cons = tx_cons;
-	if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) {
-		rx_prod = rxr->rx_prod;
-	} else {
+	if (rx_doorbell_needed) {
 		tx_buf = &txr->tx_buf_ring[last_tx_cons];
-		rx_prod = tx_buf->rx_prod;
+		bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod);
 	}
-	bnxt_db_write(bp, rxr->rx_doorbell, DB_KEY_RX | rx_prod);
 }
 
 /* returns the following:
@@ -88,19 +126,19 @@
 		return false;
 
 	pdev = bp->pdev;
-	txr = rxr->bnapi->tx_ring;
 	rx_buf = &rxr->rx_buf_ring[cons];
 	offset = bp->rx_offset;
 
+	mapping = rx_buf->mapping - bp->rx_dma_offset;
+	dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
+
+	txr = rxr->bnapi->tx_ring;
 	xdp.data_hard_start = *data_ptr - offset;
 	xdp.data = *data_ptr;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = *data_ptr + *len;
 	xdp.rxq = &rxr->xdp_rxq;
 	orig_data = xdp.data;
-	mapping = rx_buf->mapping - bp->rx_dma_offset;
-
-	dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
 
 	rcu_read_lock();
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -132,10 +170,34 @@
 		*event = BNXT_TX_EVENT;
 		dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
 					   bp->rx_dir);
-		bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
-			      NEXT_RX(rxr->rx_prod));
+		__bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
+				NEXT_RX(rxr->rx_prod));
 		bnxt_reuse_rx_data(rxr, cons, page);
 		return true;
+	case XDP_REDIRECT:
+		/* if we are calling this here then we know that the
+		 * redirect is coming from a frame received by the
+		 * bnxt_en driver.
+		 */
+		dma_unmap_page_attrs(&pdev->dev, mapping,
+				     PAGE_SIZE, bp->rx_dir,
+				     DMA_ATTR_WEAK_ORDERING);
+
+		/* if we are unable to allocate a new buffer, abort and reuse */
+		if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) {
+			trace_xdp_exception(bp->dev, xdp_prog, act);
+			bnxt_reuse_rx_data(rxr, cons, page);
+			return true;
+		}
+
+		if (xdp_do_redirect(bp->dev, &xdp, xdp_prog)) {
+			trace_xdp_exception(bp->dev, xdp_prog, act);
+			page_pool_recycle_direct(rxr->page_pool, page);
+			return true;
+		}
+
+		*event |= BNXT_REDIRECT_EVENT;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		/* Fall thru */
@@ -149,6 +211,56 @@
 	return true;
 }
 
+int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+		  struct xdp_frame **frames, u32 flags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bpf_prog *xdp_prog = READ_ONCE(bp->xdp_prog);
+	struct pci_dev *pdev = bp->pdev;
+	struct bnxt_tx_ring_info *txr;
+	dma_addr_t mapping;
+	int drops = 0;
+	int ring;
+	int i;
+
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state) ||
+	    !bp->tx_nr_rings_xdp ||
+	    !xdp_prog)
+		return -EINVAL;
+
+	ring = smp_processor_id() % bp->tx_nr_rings_xdp;
+	txr = &bp->tx_ring[ring];
+
+	for (i = 0; i < num_frames; i++) {
+		struct xdp_frame *xdp = frames[i];
+
+		if (!txr || !bnxt_tx_avail(bp, txr) ||
+		    !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) {
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+			continue;
+		}
+
+		mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len,
+					 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(&pdev->dev, mapping)) {
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+			continue;
+		}
+		__bnxt_xmit_xdp_redirect(bp, txr, mapping, xdp->len, xdp);
+	}
+
+	if (flags & XDP_XMIT_FLUSH) {
+		/* Sync BD data before updating doorbell */
+		wmb();
+		bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
+	}
+
+	return num_frames - drops;
+}
+
 /* Under rtnl_lock */
 static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 {
@@ -199,7 +311,6 @@
 	bp->tx_nr_rings_xdp = tx_xdp;
 	bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
 	bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
-	bp->num_stat_ctxs = bp->cp_nr_rings;
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 414b748..0df40c3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h

@@ -10,12 +10,15 @@
 #ifndef BNXT_XDP_H
 #define BNXT_XDP_H
 
-void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-		   dma_addr_t mapping, u32 len, u16 rx_prod);
+struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+				   struct bnxt_tx_ring_info *txr,
+				   dma_addr_t mapping, u32 len);
 void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
 bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		 struct page *page, u8 **data_ptr, unsigned int *len,
 		 u8 *event);
 int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+		  struct xdp_frame **frames, u32 flags);
 
 #endif

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index d83233a..155599d 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c

@@ -4038,15 +4038,14 @@
 	case L5CM_RAMROD_CMD_ID_CLOSE: {
 		struct iscsi_kcqe *l5kcqe = (struct iscsi_kcqe *) kcqe;
 
-		if (l4kcqe->status != 0 || l5kcqe->completion_status != 0) {
-			netdev_warn(dev->netdev, "RAMROD CLOSE compl with status 0x%x completion status 0x%x\n",
-				    l4kcqe->status, l5kcqe->completion_status);
-			opcode = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
-			/* Fall through */
-		} else {
+		if (l4kcqe->status == 0 && l5kcqe->completion_status == 0)
 			break;
-		}
+
+		netdev_warn(dev->netdev, "RAMROD CLOSE compl with status 0x%x completion status 0x%x\n",
+			    l4kcqe->status, l5kcqe->completion_status);
+		opcode = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
 	}
+		/* Fall through */
 	case L4_KCQE_OPCODE_VALUE_RESET_RECEIVED:
 	case L4_KCQE_OPCODE_VALUE_CLOSE_COMP:
 	case L4_KCQE_OPCODE_VALUE_RESET_COMP:
@@ -4097,12 +4096,16 @@
 {
 	struct cnic_local *cp = dev->cnic_priv;
 	u32 port_id;
+	int i;
 
 	cp->csk_tbl = kvcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
 			       GFP_KERNEL);
 	if (!cp->csk_tbl)
 		return -ENOMEM;
 
+	for (i = 0; i < MAX_CM_SK_TBL_SZ; i++)
+		atomic_set(&cp->csk_tbl[i].ref_count, 0);
+
 	port_id = prandom_u32();
 	port_id %= CNIC_LOCAL_PORT_RANGE;
 	if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE,
@@ -5481,6 +5484,7 @@
 	cdev->unregister_device = cnic_unregister_device;
 	cdev->iscsi_nl_msg_recv = cnic_iscsi_nl_msg_recv;
 	cdev->get_fc_npiv_tbl = cnic_get_fc_npiv_tbl;
+	atomic_set(&cdev->ref_count, 0);
 
 	cp = cdev->cnic_priv;
 	cp->dev = cdev;
@@ -5731,7 +5735,7 @@
 		if (realdev) {
 			dev = cnic_from_netdev(realdev);
 			if (dev) {
-				vid |= VLAN_TAG_PRESENT;
+				vid |= VLAN_CFI_MASK;	/* make non-zero */
 				cnic_rcv_netevent(dev->cnic_priv, event, vid);
 				cnic_put(dev);
 			}

diff --git a/drivers/net/ethernet/broadcom/genet/Makefile b/drivers/net/ethernet/broadcom/genet/Makefile
index 9b6885e..edfc26a 100644
--- a/drivers/net/ethernet/broadcom/genet/Makefile
+++ b/drivers/net/ethernet/broadcom/genet/Makefile

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BCMGENET) += genet.o
 genet-objs := bcmgenet.o bcmmii.o bcmgenet_wol.o

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 2d6f090..1de5181 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
  * Copyright (c) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt)				"bcmgenet: " fmt
@@ -643,7 +640,7 @@
 static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
 					  struct ethtool_coalesce *ec)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	ring->rx_coalesce_usecs = ec->rx_coalesce_usecs;
@@ -1127,6 +1124,7 @@
 	.set_coalesce		= bcmgenet_set_coalesce,
 	.get_link_ksettings	= bcmgenet_get_link_ksettings,
 	.set_link_ksettings	= bcmgenet_set_link_ksettings,
+	.get_ts_info		= ethtool_op_get_ts_info,
 };
 
 /* Power down the unimac, based on mode. */
@@ -1169,7 +1167,7 @@
 		break;
 	}
 
-	return 0;
+	return ret;
 }
 
 static void bcmgenet_power_up(struct bcmgenet_priv *priv,
@@ -1665,7 +1663,7 @@
 	if (ring->free_bds <= (MAX_SKB_FRAGS + 1))
 		netif_tx_stop_queue(txq);
 
-	if (!skb->xmit_more || netif_xmit_stopped(txq))
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
 		/* Packets are ready, update producer index */
 		bcmgenet_tdma_ring_writel(priv, ring->index,
 					  ring->prod_index, TDMA_PROD_INDEX);
@@ -1898,7 +1896,7 @@
 {
 	struct bcmgenet_rx_ring *ring = container_of(napi,
 			struct bcmgenet_rx_ring, napi);
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample = {};
 	unsigned int work_done;
 
 	work_done = bcmgenet_desc_rx(ring, budget);
@@ -1909,8 +1907,8 @@
 	}
 
 	if (ring->dim.use_dim) {
-		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
-			       ring->dim.bytes, &dim_sample);
+		dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
+				  ring->dim.bytes, &dim_sample);
 		net_dim(&ring->dim.dim, dim_sample);
 	}
 
@@ -1919,16 +1917,16 @@
 
 static void bcmgenet_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct bcmgenet_net_dim *ndim =
 			container_of(dim, struct bcmgenet_net_dim, dim);
 	struct bcmgenet_rx_ring *ring =
 			container_of(ndim, struct bcmgenet_rx_ring, dim);
-	struct net_dim_cq_moder cur_profile =
+	struct dim_cq_moder cur_profile =
 			net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 /* Assign skb to RX DMA descriptor. */
@@ -1998,8 +1996,6 @@
 
 	/* issue soft reset with (rg)mii loopback to ensure a stable rxclk */
 	bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD);
-	udelay(2);
-	bcmgenet_umac_writel(priv, 0, UMAC_CMD);
 }
 
 static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
@@ -2020,6 +2016,8 @@
 	 */
 	if (priv->internal_phy) {
 		int0_enable |= UMAC_IRQ_LINK_EVENT;
+		if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+			int0_enable |= UMAC_IRQ_PHY_DET_R;
 	} else if (priv->ext_phy) {
 		int0_enable |= UMAC_IRQ_LINK_EVENT;
 	} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
@@ -2085,7 +2083,7 @@
 	struct bcmgenet_net_dim *dim = &ring->dim;
 
 	INIT_WORK(&dim->dim.work, cb);
-	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
 	dim->packets = 0;
 	dim->bytes = 0;
@@ -2094,7 +2092,7 @@
 static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
 {
 	struct bcmgenet_net_dim *dim = &ring->dim;
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 	u32 usecs, pkts;
 
 	usecs = ring->rx_coalesce_usecs;
@@ -2518,19 +2516,14 @@
 static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 {
 	struct netdev_queue *txq;
-	struct sk_buff *skb;
-	struct enet_cb *cb;
 	int i;
 
 	bcmgenet_fini_rx_napi(priv);
 	bcmgenet_fini_tx_napi(priv);
 
-	for (i = 0; i < priv->num_tx_bds; i++) {
-		cb = priv->tx_cbs + i;
-		skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb);
-		if (skb)
-			dev_kfree_skb(skb);
-	}
+	for (i = 0; i < priv->num_tx_bds; i++)
+		dev_kfree_skb(bcmgenet_free_tx_cb(&priv->pdev->dev,
+						  priv->tx_cbs + i));
 
 	for (i = 0; i < priv->hw_params->tx_queues; i++) {
 		txq = netdev_get_tx_queue(priv->dev, priv->tx_rings[i].queue);
@@ -2618,11 +2611,16 @@
 	priv->irq0_stat = 0;
 	spin_unlock_irq(&priv->lock);
 
-	/* Link UP/DOWN event */
-	if (status & UMAC_IRQ_LINK_EVENT) {
-		priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
-		phy_mac_interrupt(priv->dev->phydev);
+	if (status & UMAC_IRQ_PHY_DET_R &&
+	    priv->dev->phydev->autoneg != AUTONEG_ENABLE) {
+		phy_init_hw(priv->dev->phydev);
+		genphy_config_aneg(priv->dev->phydev);
 	}
+
+	/* Link UP/DOWN event */
+	if (status & UMAC_IRQ_LINK_EVENT)
+		phy_mac_interrupt(priv->dev->phydev);
+
 }
 
 /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2717,7 +2715,7 @@
 	}
 
 	/* all other interested interrupts handled in bottom half */
-	status &= UMAC_IRQ_LINK_EVENT;
+	status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
 	if (status) {
 		/* Save irq status for bottom-half processing. */
 		spin_lock_irqsave(&priv->lock, flags);
@@ -3086,39 +3084,42 @@
 	netif_tx_wake_all_queues(dev);
 }
 
-#define MAX_MC_COUNT	16
+#define MAX_MDF_FILTER	17
 
 static inline void bcmgenet_set_mdf_addr(struct bcmgenet_priv *priv,
 					 unsigned char *addr,
-					 int *i,
-					 int *mc)
+					 int *i)
 {
-	u32 reg;
-
 	bcmgenet_umac_writel(priv, addr[0] << 8 | addr[1],
 			     UMAC_MDF_ADDR + (*i * 4));
 	bcmgenet_umac_writel(priv, addr[2] << 24 | addr[3] << 16 |
 			     addr[4] << 8 | addr[5],
 			     UMAC_MDF_ADDR + ((*i + 1) * 4));
-	reg = bcmgenet_umac_readl(priv, UMAC_MDF_CTRL);
-	reg |= (1 << (MAX_MC_COUNT - *mc));
-	bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL);
 	*i += 2;
-	(*mc)++;
 }
 
 static void bcmgenet_set_rx_mode(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct netdev_hw_addr *ha;
-	int i, mc;
+	int i, nfilter;
 	u32 reg;
 
 	netif_dbg(priv, hw, dev, "%s: %08X\n", __func__, dev->flags);
 
-	/* Promiscuous mode */
+	/* Number of filters needed */
+	nfilter = netdev_uc_count(dev) + netdev_mc_count(dev) + 2;
+
+	/*
+	 * Turn on promicuous mode for three scenarios
+	 * 1. IFF_PROMISC flag is set
+	 * 2. IFF_ALLMULTI flag is set
+	 * 3. The number of filters needed exceeds the number filters
+	 *    supported by the hardware.
+	*/
 	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
-	if (dev->flags & IFF_PROMISC) {
+	if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) ||
+	    (nfilter > MAX_MDF_FILTER)) {
 		reg |= CMD_PROMISC;
 		bcmgenet_umac_writel(priv, reg, UMAC_CMD);
 		bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL);
@@ -3128,32 +3129,24 @@
 		bcmgenet_umac_writel(priv, reg, UMAC_CMD);
 	}
 
-	/* UniMac doesn't support ALLMULTI */
-	if (dev->flags & IFF_ALLMULTI) {
-		netdev_warn(dev, "ALLMULTI is not supported\n");
-		return;
-	}
-
 	/* update MDF filter */
 	i = 0;
-	mc = 0;
 	/* Broadcast */
-	bcmgenet_set_mdf_addr(priv, dev->broadcast, &i, &mc);
+	bcmgenet_set_mdf_addr(priv, dev->broadcast, &i);
 	/* my own address.*/
-	bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i, &mc);
-	/* Unicast list*/
-	if (netdev_uc_count(dev) > (MAX_MC_COUNT - mc))
-		return;
+	bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i);
 
-	if (!netdev_uc_empty(dev))
-		netdev_for_each_uc_addr(ha, dev)
-			bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc);
+	/* Unicast */
+	netdev_for_each_uc_addr(ha, dev)
+		bcmgenet_set_mdf_addr(priv, ha->addr, &i);
+
 	/* Multicast */
-	if (netdev_mc_empty(dev) || netdev_mc_count(dev) >= (MAX_MC_COUNT - mc))
-		return;
-
 	netdev_for_each_mc_addr(ha, dev)
-		bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc);
+		bcmgenet_set_mdf_addr(priv, ha->addr, &i);
+
+	/* Enable filters */
+	reg = GENMASK(MAX_MDF_FILTER - 1, MAX_MDF_FILTER - nfilter);
+	bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL);
 }
 
 /* Set the hardware MAC address. */
@@ -3445,7 +3438,6 @@
 	struct bcmgenet_priv *priv;
 	struct net_device *dev;
 	const void *macaddr;
-	struct resource *r;
 	unsigned int i;
 	int err = -EIO;
 	const char *phy_mode_str;
@@ -3476,7 +3468,7 @@
 
 	if (dn) {
 		macaddr = of_get_mac_address(dn);
-		if (!macaddr) {
+		if (IS_ERR(macaddr)) {
 			dev_err(&pdev->dev, "can't find MAC address\n");
 			err = -EINVAL;
 			goto err;
@@ -3485,8 +3477,7 @@
 		macaddr = pd->mac_address;
 	}
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, r);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		err = PTR_ERR(priv->base);
 		goto err;
@@ -3612,36 +3603,6 @@
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int bcmgenet_suspend(struct device *d)
-{
-	struct net_device *dev = dev_get_drvdata(d);
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	int ret = 0;
-
-	if (!netif_running(dev))
-		return 0;
-
-	netif_device_detach(dev);
-
-	bcmgenet_netif_stop(dev);
-
-	if (!device_may_wakeup(d))
-		phy_suspend(dev->phydev);
-
-	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
-	if (device_may_wakeup(d) && priv->wolopts) {
-		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
-		clk_prepare_enable(priv->clk_wol);
-	} else if (priv->internal_phy) {
-		ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
-	}
-
-	/* Turn off the clocks */
-	clk_disable_unprepare(priv->clk);
-
-	return ret;
-}
-
 static int bcmgenet_resume(struct device *d)
 {
 	struct net_device *dev = dev_get_drvdata(d);
@@ -3675,6 +3636,7 @@
 	phy_init_hw(dev->phydev);
 
 	/* Speed settings must be restored */
+	genphy_config_aneg(dev->phydev);
 	bcmgenet_mii_config(priv->dev, false);
 
 	bcmgenet_set_hw_addr(priv, dev->dev_addr);
@@ -3719,6 +3681,39 @@
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }
+
+static int bcmgenet_suspend(struct device *d)
+{
+	struct net_device *dev = dev_get_drvdata(d);
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	int ret = 0;
+
+	if (!netif_running(dev))
+		return 0;
+
+	netif_device_detach(dev);
+
+	bcmgenet_netif_stop(dev);
+
+	if (!device_may_wakeup(d))
+		phy_suspend(dev->phydev);
+
+	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
+	if (device_may_wakeup(d) && priv->wolopts) {
+		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
+		clk_prepare_enable(priv->clk_wol);
+	} else if (priv->internal_phy) {
+		ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
+	}
+
+	/* Turn off the clocks */
+	clk_disable_unprepare(priv->clk);
+
+	if (ret)
+		bcmgenet_resume(d);
+
+	return ret;
+}
 #endif /* CONFIG_PM_SLEEP */
 
 static SIMPLE_DEV_PM_OPS(bcmgenet_pm_ops, bcmgenet_suspend, bcmgenet_resume);

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 14b4961..dbc69d8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __BCMGENET_H__
@@ -16,7 +13,7 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC				256
@@ -369,6 +366,7 @@
 #define  EXT_PWR_DOWN_PHY_EN		(1 << 20)
 
 #define EXT_RGMII_OOB_CTRL		0x0C
+#define  RGMII_MODE_EN_V123		(1 << 0)
 #define  RGMII_LINK			(1 << 4)
 #define  OOB_DISABLE			(1 << 5)
 #define  RGMII_MODE_EN			(1 << 6)
@@ -581,7 +579,7 @@
 	u16		event_ctr;
 	unsigned long	packets;
 	unsigned long	bytes;
-	struct net_dim	dim;
+	struct dim	dim;
 };
 
 struct bcmgenet_rx_ring {

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 2fbd027..ea20d94 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support
  *
  * Copyright (c) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt)				"bcmgenet_wol: " fmt
@@ -186,6 +183,8 @@
 	}
 
 	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
+	if (!(reg & MPD_EN))
+		return;	/* already powered up so skip the rest */
 	reg &= ~MPD_EN;
 	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index de0e24d..dbe18cd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Broadcom GENET MDIO routines
  *
  * Copyright (c) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 
@@ -184,8 +181,38 @@
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
 	u32 port_ctrl;
+	int bmcr = -1;
+	int ret;
 	u32 reg;
 
+	/* MAC clocking workaround during reset of umac state machines */
+	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+	if (reg & CMD_SW_RESET) {
+		/* An MII PHY must be isolated to prevent TXC contention */
+		if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
+			ret = phy_read(phydev, MII_BMCR);
+			if (ret >= 0) {
+				bmcr = ret;
+				ret = phy_write(phydev, MII_BMCR,
+						bmcr | BMCR_ISOLATE);
+			}
+			if (ret) {
+				netdev_err(dev, "failed to isolate PHY\n");
+				return ret;
+			}
+		}
+		/* Switch MAC clocking to RGMII generated clock */
+		bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
+		/* Ensure 5 clks with Rx disabled
+		 * followed by 5 clks with Reset asserted
+		 */
+		udelay(4);
+		reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN);
+		bcmgenet_umac_writel(priv, reg, UMAC_CMD);
+		/* Ensure 5 more clocks before Rx is enabled */
+		udelay(2);
+	}
+
 	priv->ext_phy = !priv->internal_phy &&
 			(priv->phy_interface != PHY_INTERFACE_MODE_MOCA);
 
@@ -214,9 +241,12 @@
 
 	case PHY_INTERFACE_MODE_MII:
 		phy_name = "external MII";
-		phydev->supported &= PHY_BASIC_FEATURES;
+		phy_set_max_speed(phydev, SPEED_100);
 		bcmgenet_sys_writel(priv,
 				    PORT_MODE_EXT_EPHY, SYS_PORT_CTRL);
+		/* Restore the MII PHY after isolation */
+		if (bmcr >= 0)
+			phy_write(phydev, MII_BMCR, bmcr);
 		break;
 
 	case PHY_INTERFACE_MODE_REVMII:
@@ -226,11 +256,11 @@
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((dev->phydev->supported & PHY_BASIC_FEATURES) ==
-				PHY_BASIC_FEATURES)
-			port_ctrl = PORT_MODE_EXT_RVMII_25;
-		else
+		if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				      dev->phydev->supported))
 			port_ctrl = PORT_MODE_EXT_RVMII_50;
+		else
+			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		bcmgenet_sys_writel(priv, port_ctrl, SYS_PORT_CTRL);
 		break;
 
@@ -261,7 +291,11 @@
 	 */
 	if (priv->ext_phy) {
 		reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
-		reg |= RGMII_MODE_EN | id_mode_dis;
+		reg |= id_mode_dis;
+		if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+			reg |= RGMII_MODE_EN_V123;
+		else
+			reg |= RGMII_MODE_EN;
 		bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
 	}
 
@@ -276,11 +310,12 @@
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct phy_device *phydev;
-	u32 phy_flags;
+	u32 phy_flags = 0;
 	int ret;
 
 	/* Communicate the integrated PHY revision */
-	phy_flags = priv->gphy_rev;
+	if (priv->internal_phy)
+		phy_flags = priv->gphy_rev;
 
 	/* Initialize link state variables that bcmgenet_mii_setup() uses */
 	priv->old_link = -1;
@@ -318,7 +353,7 @@
 		return ret;
 	}
 
-	phydev->advertising = phydev->supported;
+	linkmode_copy(phydev->advertising, phydev->supported);
 
 	/* The internal PHY has its link interrupts routed to the
 	 * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
@@ -525,7 +560,7 @@
 			.asym_pause = 0,
 		};
 
-		phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
+		phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
 		if (!phydev || IS_ERR(phydev)) {
 			dev_err(kdev, "failed to register fixed PHY device\n");
 			return -ENODEV;

diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index ef4a0c3..1604ad3 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c

@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2001,2002,2003,2004 Broadcom Corporation
  * Copyright (c) 2006, 2007  Maciej W. Rozycki
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *
  * This driver is designed for the Broadcom SiByte SOC built-in
  * Ethernet controllers. Written by Mitch Lichtenberg at Broadcom Corp.
  *
@@ -156,7 +143,7 @@
 			  (d)->sbdma_dscrtable : (d)->f+1)
 
 
-#define NUMCACHEBLKS(x) (((x)+SMP_CACHE_BYTES-1)/SMP_CACHE_BYTES)
+#define NUMCACHEBLKS(x) DIV_ROUND_UP(x, SMP_CACHE_BYTES)
 
 #define SBMAC_MAX_TXDESCR	256
 #define SBMAC_MAX_RXDESCR	256
@@ -299,7 +286,7 @@
 static void sbmac_promiscuous_mode(struct sbmac_softc *sc, int onoff);
 static uint64_t sbmac_addr2reg(unsigned char *ptr);
 static irqreturn_t sbmac_intr(int irq, void *dev_instance);
-static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev);
 static void sbmac_setmulti(struct sbmac_softc *sc);
 static int sbmac_init(struct platform_device *pldev, long long base);
 static int sbmac_set_speed(struct sbmac_softc *s, enum sbmac_speed speed);
@@ -1288,7 +1275,7 @@
 		 * for transmits, we just free buffers.
 		 */
 
-		dev_kfree_skb_irq(sb);
+		dev_consume_skb_irq(sb);
 
 		/*
 		 * .. and advance to the next buffer.
@@ -2028,7 +2015,7 @@
  *  Return value:
  *  	   nothing
  ********************************************************************* */
-static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sbmac_softc *sc = netdev_priv(dev);
 	unsigned long flags;
@@ -2357,21 +2344,11 @@
 	}
 
 	/* Remove any features not supported by the controller */
-	phy_dev->supported &= SUPPORTED_10baseT_Half |
-			      SUPPORTED_10baseT_Full |
-			      SUPPORTED_100baseT_Half |
-			      SUPPORTED_100baseT_Full |
-			      SUPPORTED_1000baseT_Half |
-			      SUPPORTED_1000baseT_Full |
-			      SUPPORTED_Autoneg |
-			      SUPPORTED_MII |
-			      SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause;
+	phy_set_max_speed(phy_dev, SPEED_1000);
+	phy_support_asym_pause(phy_dev);
 
 	phy_attached_info(phy_dev);
 
-	phy_dev->advertising = phy_dev->supported;
-
 	sc->phy_dev = phy_dev;
 
 	return 0;

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a129627..ca3aa12 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c

@@ -66,11 +66,6 @@
 #include <uapi/linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 
-#ifdef CONFIG_SPARC
-#include <asm/idprom.h>
-#include <asm/prom.h>
-#endif
-
 #define BAR_0	0
 #define BAR_2	2
 
@@ -726,7 +721,7 @@
 	case TG3_APE_LOCK_GPIO:
 		if (tg3_asic_rev(tp) == ASIC_REV_5761)
 			return 0;
-		/* else: fall through */
+		/* fall through */
 	case TG3_APE_LOCK_GRC:
 	case TG3_APE_LOCK_MEM:
 		if (!tp->pci_fn)
@@ -787,7 +782,7 @@
 	case TG3_APE_LOCK_GPIO:
 		if (tg3_asic_rev(tp) == ASIC_REV_5761)
 			return;
-		/* else: fall through */
+		/* fall through */
 	case TG3_APE_LOCK_GRC:
 	case TG3_APE_LOCK_MEM:
 		if (!tp->pci_fn)
@@ -1078,7 +1073,6 @@
 	struct tg3 *tp = tnapi->tp;
 
 	tw32_mailbox(tnapi->int_mbox, tnapi->last_tag << 24);
-	mmiowb();
 
 	/* When doing tagged status, this work check is unnecessary.
 	 * The last_tag we write above tells the chip which piece of
@@ -1598,7 +1592,7 @@
 			phydev->dev_flags |= PHY_BRCM_EXT_IBND_RX_ENABLE;
 		if (tg3_flag(tp, RGMII_EXT_IBND_TX_EN))
 			phydev->dev_flags |= PHY_BRCM_EXT_IBND_TX_ENABLE;
-		/* fallthru */
+		/* fall through */
 	case PHY_ID_RTL8211C:
 		phydev->interface = PHY_INTERFACE_MODE_RGMII;
 		break;
@@ -2122,16 +2116,14 @@
 	case PHY_INTERFACE_MODE_GMII:
 	case PHY_INTERFACE_MODE_RGMII:
 		if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
-			phydev->supported &= (PHY_GBIT_FEATURES |
-					      SUPPORTED_Pause |
-					      SUPPORTED_Asym_Pause);
+			phy_set_max_speed(phydev, SPEED_1000);
+			phy_support_asym_pause(phydev);
 			break;
 		}
-		/* fallthru */
+		/* fall through */
 	case PHY_INTERFACE_MODE_MII:
-		phydev->supported &= (PHY_BASIC_FEATURES |
-				      SUPPORTED_Pause |
-				      SUPPORTED_Asym_Pause);
+		phy_set_max_speed(phydev, SPEED_100);
+		phy_support_asym_pause(phydev);
 		break;
 	default:
 		phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr));
@@ -2140,8 +2132,6 @@
 
 	tp->phy_flags |= TG3_PHYFLG_IS_CONNECTED;
 
-	phydev->advertising = phydev->supported;
-
 	phy_attached_info(phydev);
 
 	return 0;
@@ -2161,7 +2151,8 @@
 		phydev->speed = tp->link_config.speed;
 		phydev->duplex = tp->link_config.duplex;
 		phydev->autoneg = tp->link_config.autoneg;
-		phydev->advertising = tp->link_config.advertising;
+		ethtool_convert_legacy_u32_to_link_mode(
+			phydev->advertising, tp->link_config.advertising);
 	}
 
 	phy_start(phydev);
@@ -4061,8 +4052,9 @@
 		do_low_power = false;
 		if ((tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) &&
 		    !(tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) {
+			__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising) = { 0, };
 			struct phy_device *phydev;
-			u32 phyid, advertising;
+			u32 phyid;
 
 			phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
 
@@ -4071,25 +4063,33 @@
 			tp->link_config.speed = phydev->speed;
 			tp->link_config.duplex = phydev->duplex;
 			tp->link_config.autoneg = phydev->autoneg;
-			tp->link_config.advertising = phydev->advertising;
+			ethtool_convert_link_mode_to_legacy_u32(
+				&tp->link_config.advertising,
+				phydev->advertising);
 
-			advertising = ADVERTISED_TP |
-				      ADVERTISED_Pause |
-				      ADVERTISED_Autoneg |
-				      ADVERTISED_10baseT_Half;
+			linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, advertising);
+			linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+					 advertising);
+			linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+					 advertising);
+			linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+					 advertising);
 
 			if (tg3_flag(tp, ENABLE_ASF) || device_should_wake) {
-				if (tg3_flag(tp, WOL_SPEED_100MB))
-					advertising |=
-						ADVERTISED_100baseT_Half |
-						ADVERTISED_100baseT_Full |
-						ADVERTISED_10baseT_Full;
-				else
-					advertising |= ADVERTISED_10baseT_Full;
+				if (tg3_flag(tp, WOL_SPEED_100MB)) {
+					linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+							 advertising);
+					linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+							 advertising);
+					linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+							 advertising);
+				} else {
+					linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+							 advertising);
+				}
 			}
 
-			phydev->advertising = advertising;
-
+			linkmode_copy(phydev->advertising, advertising);
 			phy_start_aneg(phydev);
 
 			phyid = phydev->drv->phy_id & phydev->drv->phy_id_mask;
@@ -4282,7 +4282,7 @@
 	pci_set_power_state(tp->pdev, PCI_D3hot);
 }
 
-static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex)
 {
 	switch (val & MII_TG3_AUX_STAT_SPDMASK) {
 	case MII_TG3_AUX_STAT_10HALF:
@@ -4786,7 +4786,7 @@
 	bool current_link_up;
 	u32 bmsr, val;
 	u32 lcl_adv, rmt_adv;
-	u16 current_speed;
+	u32 current_speed;
 	u8 current_duplex;
 	int i, err;
 
@@ -5215,7 +5215,7 @@
 		if (ap->flags & (MR_AN_ENABLE | MR_RESTART_AN))
 			ap->state = ANEG_STATE_AN_ENABLE;
 
-		/* fallthru */
+		/* fall through */
 	case ANEG_STATE_AN_ENABLE:
 		ap->flags &= ~(MR_AN_COMPLETE | MR_PAGE_RX);
 		if (ap->flags & MR_AN_ENABLE) {
@@ -5245,7 +5245,7 @@
 		ret = ANEG_TIMER_ENAB;
 		ap->state = ANEG_STATE_RESTART;
 
-		/* fallthru */
+		/* fall through */
 	case ANEG_STATE_RESTART:
 		delta = ap->cur_time - ap->link_time;
 		if (delta > ANEG_STATE_SETTLE_TIME)
@@ -5288,7 +5288,7 @@
 
 		ap->state = ANEG_STATE_ACK_DETECT;
 
-		/* fallthru */
+		/* fall through */
 	case ANEG_STATE_ACK_DETECT:
 		if (ap->ack_match != 0) {
 			if ((ap->rxconfig & ~ANEG_CFG_ACK) ==
@@ -5718,7 +5718,7 @@
 static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset)
 {
 	u32 orig_pause_cfg;
-	u16 orig_active_speed;
+	u32 orig_active_speed;
 	u8 orig_active_duplex;
 	u32 mac_status;
 	bool current_link_up;
@@ -5822,7 +5822,7 @@
 {
 	int err = 0;
 	u32 bmsr, bmcr;
-	u16 current_speed = SPEED_UNKNOWN;
+	u32 current_speed = SPEED_UNKNOWN;
 	u8 current_duplex = DUPLEX_UNKNOWN;
 	bool current_link_up = false;
 	u32 local_adv, remote_adv, sgsr;
@@ -6139,10 +6139,16 @@
 }
 
 /* tp->lock must be held */
-static u64 tg3_refclk_read(struct tg3 *tp)
+static u64 tg3_refclk_read(struct tg3 *tp, struct ptp_system_timestamp *sts)
 {
-	u64 stamp = tr32(TG3_EAV_REF_CLCK_LSB);
-	return stamp | (u64)tr32(TG3_EAV_REF_CLCK_MSB) << 32;
+	u64 stamp;
+
+	ptp_read_system_prets(sts);
+	stamp = tr32(TG3_EAV_REF_CLCK_LSB);
+	ptp_read_system_postts(sts);
+	stamp |= (u64)tr32(TG3_EAV_REF_CLCK_MSB) << 32;
+
+	return stamp;
 }
 
 /* tp->lock must be held */
@@ -6233,13 +6239,14 @@
 	return 0;
 }
 
-static int tg3_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int tg3_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			    struct ptp_system_timestamp *sts)
 {
 	u64 ns;
 	struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
 
 	tg3_full_lock(tp, 0);
-	ns = tg3_refclk_read(tp);
+	ns = tg3_refclk_read(tp, sts);
 	ns += tp->ptp_adjust;
 	tg3_full_unlock(tp);
 
@@ -6273,6 +6280,10 @@
 
 	switch (rq->type) {
 	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
 		if (rq->perout.index != 0)
 			return -EINVAL;
 
@@ -6334,7 +6345,7 @@
 	.pps		= 0,
 	.adjfreq	= tg3_ptp_adjfreq,
 	.adjtime	= tg3_ptp_adjtime,
-	.gettime64	= tg3_ptp_gettime,
+	.gettimex64	= tg3_ptp_gettimex,
 	.settime64	= tg3_ptp_settime,
 	.enable		= tg3_ptp_enable,
 };
@@ -6703,7 +6714,7 @@
 	skb_size = SKB_DATA_ALIGN(data_size + TG3_RX_OFFSET(tp)) +
 		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	if (skb_size <= PAGE_SIZE) {
-		data = netdev_alloc_frag(skb_size);
+		data = napi_alloc_frag(skb_size);
 		*frag_size = skb_size;
 	} else {
 		data = kmalloc(skb_size, GFP_ATOMIC);
@@ -6991,7 +7002,6 @@
 			tw32_rx_mbox(TG3_RX_JMB_PROD_IDX_REG,
 				     tpr->rx_jmb_prod_idx);
 		}
-		mmiowb();
 	} else if (work_mask) {
 		/* rx_std_buffers[] and rx_jmb_buffers[] entries must be
 		 * updated before the producer indices can be updated.
@@ -7202,8 +7212,6 @@
 			tw32_rx_mbox(TG3_RX_JMB_PROD_IDX_REG,
 				     dpr->rx_jmb_prod_idx);
 
-		mmiowb();
-
 		if (err)
 			tw32_f(HOSTCC_MODE, tp->coal_now);
 	}
@@ -7270,7 +7278,6 @@
 						  HOSTCC_MODE_ENABLE |
 						  tnapi->coal_now);
 			}
-			mmiowb();
 			break;
 		}
 	}
@@ -8148,10 +8155,9 @@
 			netif_tx_wake_queue(txq);
 	}
 
-	if (!skb->xmit_more || netif_xmit_stopped(txq)) {
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq)) {
 		/* Packets are ready, update Tx producer idx on card. */
 		tw32_tx_mbox(tnapi->prodmbox, entry);
-		mmiowb();
 	}
 
 	return NETDEV_TX_OK;
@@ -8704,10 +8710,10 @@
 		if (!i && tg3_flag(tp, ENABLE_RSS))
 			continue;
 
-		tnapi->rx_rcb = dma_zalloc_coherent(&tp->pdev->dev,
-						    TG3_RX_RCB_RING_BYTES(tp),
-						    &tnapi->rx_rcb_mapping,
-						    GFP_KERNEL);
+		tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev,
+						   TG3_RX_RCB_RING_BYTES(tp),
+						   &tnapi->rx_rcb_mapping,
+						   GFP_KERNEL);
 		if (!tnapi->rx_rcb)
 			goto err_out;
 	}
@@ -8760,9 +8766,9 @@
 {
 	int i;
 
-	tp->hw_stats = dma_zalloc_coherent(&tp->pdev->dev,
-					   sizeof(struct tg3_hw_stats),
-					   &tp->stats_mapping, GFP_KERNEL);
+	tp->hw_stats = dma_alloc_coherent(&tp->pdev->dev,
+					  sizeof(struct tg3_hw_stats),
+					  &tp->stats_mapping, GFP_KERNEL);
 	if (!tp->hw_stats)
 		goto err_out;
 
@@ -8770,10 +8776,10 @@
 		struct tg3_napi *tnapi = &tp->napi[i];
 		struct tg3_hw_status *sblk;
 
-		tnapi->hw_status = dma_zalloc_coherent(&tp->pdev->dev,
-						       TG3_HW_STATUS_SIZE,
-						       &tnapi->status_mapping,
-						       GFP_KERNEL);
+		tnapi->hw_status = dma_alloc_coherent(&tp->pdev->dev,
+						      TG3_HW_STATUS_SIZE,
+						      &tnapi->status_mapping,
+						      GFP_KERNEL);
 		if (!tnapi->hw_status)
 			goto err_out;
 
@@ -12504,31 +12510,24 @@
 		tg3_warn_mgmt_link_flap(tp);
 
 	if (tg3_flag(tp, USE_PHYLIB)) {
-		u32 newadv;
 		struct phy_device *phydev;
 
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
 
-		if (!(phydev->supported & SUPPORTED_Pause) ||
-		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-		     (epause->rx_pause != epause->tx_pause)))
+		if (!phy_validate_pause(phydev, epause))
 			return -EINVAL;
 
 		tp->link_config.flowctrl = 0;
+		phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 		if (epause->rx_pause) {
 			tp->link_config.flowctrl |= FLOW_CTRL_RX;
 
 			if (epause->tx_pause) {
 				tp->link_config.flowctrl |= FLOW_CTRL_TX;
-				newadv = ADVERTISED_Pause;
-			} else
-				newadv = ADVERTISED_Pause |
-					 ADVERTISED_Asym_Pause;
+			}
 		} else if (epause->tx_pause) {
 			tp->link_config.flowctrl |= FLOW_CTRL_TX;
-			newadv = ADVERTISED_Asym_Pause;
-		} else
-			newadv = 0;
+		}
 
 		if (epause->autoneg)
 			tg3_flag_set(tp, PAUSE_AUTONEG);
@@ -12536,33 +12535,19 @@
 			tg3_flag_clear(tp, PAUSE_AUTONEG);
 
 		if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) {
-			u32 oldadv = phydev->advertising &
-				     (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-			if (oldadv != newadv) {
-				phydev->advertising &=
-					~(ADVERTISED_Pause |
-					  ADVERTISED_Asym_Pause);
-				phydev->advertising |= newadv;
-				if (phydev->autoneg) {
-					/*
-					 * Always renegotiate the link to
-					 * inform our link partner of our
-					 * flow control settings, even if the
-					 * flow control is forced.  Let
-					 * tg3_adjust_link() do the final
-					 * flow control setup.
-					 */
-					return phy_start_aneg(phydev);
-				}
+			if (phydev->autoneg) {
+				/* phy_set_asym_pause() will
+				 * renegotiate the link to inform our
+				 * link partner of our flow control
+				 * settings, even if the flow control
+				 * is forced.  Let tg3_adjust_link()
+				 * do the final flow control setup.
+				 */
+				return 0;
 			}
 
 			if (!epause->autoneg)
 				tg3_setup_flow_control(tp, 0, 0);
-		} else {
-			tp->link_config.advertising &=
-					~(ADVERTISED_Pause |
-					  ADVERTISED_Asym_Pause);
-			tp->link_config.advertising |= newadv;
 		}
 	} else {
 		int irq_sync = 0;
@@ -12776,9 +12761,6 @@
 {
 	struct tg3 *tp = netdev_priv(dev);
 
-	if (!netif_running(tp->dev))
-		return -EAGAIN;
-
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
 		return 1;	/* cycle on/off once per second */
@@ -14027,7 +14009,7 @@
 	case SIOCGMIIPHY:
 		data->phy_id = tp->phy_addr;
 
-		/* fallthru */
+		/* fall through */
 	case SIOCGMIIREG: {
 		u32 mii_regval;
 
@@ -16998,32 +16980,6 @@
 	return err;
 }
 
-#ifdef CONFIG_SPARC
-static int tg3_get_macaddr_sparc(struct tg3 *tp)
-{
-	struct net_device *dev = tp->dev;
-	struct pci_dev *pdev = tp->pdev;
-	struct device_node *dp = pci_device_to_OF_node(pdev);
-	const unsigned char *addr;
-	int len;
-
-	addr = of_get_property(dp, "local-mac-address", &len);
-	if (addr && len == ETH_ALEN) {
-		memcpy(dev->dev_addr, addr, ETH_ALEN);
-		return 0;
-	}
-	return -ENODEV;
-}
-
-static int tg3_get_default_macaddr_sparc(struct tg3 *tp)
-{
-	struct net_device *dev = tp->dev;
-
-	memcpy(dev->dev_addr, idprom->id_ethaddr, ETH_ALEN);
-	return 0;
-}
-#endif
-
 static int tg3_get_device_address(struct tg3 *tp)
 {
 	struct net_device *dev = tp->dev;
@@ -17031,10 +16987,8 @@
 	int addr_ok = 0;
 	int err;
 
-#ifdef CONFIG_SPARC
-	if (!tg3_get_macaddr_sparc(tp))
+	if (!eth_platform_get_mac_address(&tp->pdev->dev, dev->dev_addr))
 		return 0;
-#endif
 
 	if (tg3_flag(tp, IS_SSB_CORE)) {
 		err = ssb_gige_get_macaddr(tp->pdev, &dev->dev_addr[0]);
@@ -17096,13 +17050,8 @@
 		}
 	}
 
-	if (!is_valid_ether_addr(&dev->dev_addr[0])) {
-#ifdef CONFIG_SPARC
-		if (!tg3_get_default_macaddr_sparc(tp))
-			return 0;
-#endif
+	if (!is_valid_ether_addr(&dev->dev_addr[0]))
 		return -EINVAL;
-	}
 	return 0;
 }
 
@@ -18096,8 +18045,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int tg3_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct tg3 *tp = netdev_priv(dev);
 	int err = 0;
 
@@ -18153,8 +18101,7 @@
 
 static int tg3_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct tg3 *tp = netdev_priv(dev);
 	int err = 0;
 

diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a772a33..6953d05 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h

@@ -2873,7 +2873,7 @@
 struct tg3_link_config {
 	/* Describes what we're trying to get. */
 	u32				advertising;
-	u16				speed;
+	u32				speed;
 	u8				duplex;
 	u8				autoneg;
 	u8				flowctrl;
@@ -2882,7 +2882,7 @@
 	u8				active_flowctrl;
 
 	u8				active_duplex;
-	u16				active_speed;
+	u32				active_speed;
 	u32				rmt_adv;
 };
 

diff --git a/drivers/net/ethernet/brocade/Kconfig b/drivers/net/ethernet/brocade/Kconfig
index c4bbe54..d4564c7 100644
--- a/drivers/net/ethernet/brocade/Kconfig
+++ b/drivers/net/ethernet/brocade/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # QLogic BR-series device configuration
 #

diff --git a/drivers/net/ethernet/brocade/Makefile b/drivers/net/ethernet/brocade/Makefile
index fec10f9..88b2f40 100644
--- a/drivers/net/ethernet/brocade/Makefile
+++ b/drivers/net/ethernet/brocade/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the QLogic BR-series device drivers.
 #

diff --git a/drivers/net/ethernet/brocade/bna/Kconfig b/drivers/net/ethernet/brocade/bna/Kconfig
index fe01279..b124a62 100644
--- a/drivers/net/ethernet/brocade/bna/Kconfig
+++ b/drivers/net/ethernet/brocade/bna/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # QLogic BR-series network device configuration
 #

diff --git a/drivers/net/ethernet/brocade/bna/Makefile b/drivers/net/ethernet/brocade/bna/Makefile
index 8584abc..d804b30 100644
--- a/drivers/net/ethernet/brocade/bna/Makefile
+++ b/drivers/net/ethernet/brocade/bna/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Copyright (c) 2005-2014 Brocade Communications Systems, Inc.
 # Copyright (c) 2014-2015 QLogic Corporation.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_cee.c b/drivers/net/ethernet/brocade/bna/bfa_cee.c
index 95bc8b6..09fb931 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_cee.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_cee.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_cee.h b/drivers/net/ethernet/brocade/bna/bfa_cee.h
index d04eef5..8e628bb 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_cee.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_cee.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_cs.h b/drivers/net/ethernet/brocade/bna/bfa_cs.h
index 1d11d66..8f0ac7b 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_cs.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_cs.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_defs.h b/drivers/net/ethernet/brocade/bna/bfa_defs.h
index d152b3f..b08b168 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_defs.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_defs.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_defs_cna.h b/drivers/net/ethernet/brocade/bna/bfa_defs_cna.h
index f048887..50d3562 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_defs_cna.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_defs_cna.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_defs_mfg_comm.h b/drivers/net/ethernet/brocade/bna/bfa_defs_mfg_comm.h
index 7e17451..0478f35 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_defs_mfg_comm.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_defs_mfg_comm.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_defs_status.h b/drivers/net/ethernet/brocade/bna/bfa_defs_status.h
index a43b560..0ed9ec2 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_defs_status.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_defs_status.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index a36e386..4042c21 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.
@@ -743,7 +735,7 @@
 
 	case IOCPF_E_TIMEOUT:
 		bfa_nw_ioc_hw_sem_release(ioc);
-			bfa_ioc_pf_failed(ioc);
+		bfa_ioc_pf_failed(ioc);
 		bfa_fsm_set_state(iocpf, bfa_iocpf_sm_initfail_sync);
 		break;
 
@@ -788,9 +780,8 @@
 
 	case IOCPF_E_INITFAIL:
 		del_timer(&ioc->iocpf_timer);
-		/*
-		 * !!! fall through !!!
-		 */
+		/* fall through */
+
 	case IOCPF_E_TIMEOUT:
 		bfa_nw_ioc_hw_sem_release(ioc);
 		if (event == IOCPF_E_TIMEOUT)
@@ -858,9 +849,7 @@
 
 	case IOCPF_E_FAIL:
 		del_timer(&ioc->iocpf_timer);
-		/*
-		 * !!! fall through !!!
-		 */
+		/* fall through*/
 
 	case IOCPF_E_TIMEOUT:
 		bfa_ioc_set_cur_ioc_fwstate(ioc, BFI_IOC_FAIL);

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.h b/drivers/net/ethernet/brocade/bna/bfa_ioc.h
index 2c0b4c0..edd0ed5 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc_ct.c b/drivers/net/ethernet/brocade/bna/bfa_ioc_ct.c
index 74e5ed5..bd643d8 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc_ct.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc_ct.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_msgq.c b/drivers/net/ethernet/brocade/bna/bfa_msgq.c
index 9c5bb24..47125f4 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_msgq.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_msgq.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfa_msgq.h b/drivers/net/ethernet/brocade/bna/bfa_msgq.h
index 66bc8b5..75343b5 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_msgq.h
+++ b/drivers/net/ethernet/brocade/bna/bfa_msgq.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfi.h b/drivers/net/ethernet/brocade/bna/bfi.h
index 81e59ea..09c912e 100644
--- a/drivers/net/ethernet/brocade/bna/bfi.h
+++ b/drivers/net/ethernet/brocade/bna/bfi.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfi_cna.h b/drivers/net/ethernet/brocade/bna/bfi_cna.h
index fad6511..fb78bfd 100644
--- a/drivers/net/ethernet/brocade/bna/bfi_cna.h
+++ b/drivers/net/ethernet/brocade/bna/bfi_cna.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfi_enet.h b/drivers/net/ethernet/brocade/bna/bfi_enet.h
index d7be7ea..112aadf 100644
--- a/drivers/net/ethernet/brocade/bna/bfi_enet.h
+++ b/drivers/net/ethernet/brocade/bna/bfi_enet.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bfi_reg.h b/drivers/net/ethernet/brocade/bna/bfi_reg.h
index 2835b51..b15ae9e 100644
--- a/drivers/net/ethernet/brocade/bna/bfi_reg.h
+++ b/drivers/net/ethernet/brocade/bna/bfi_reg.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bna.h b/drivers/net/ethernet/brocade/bna/bna.h
index 006dcad..50de153 100644
--- a/drivers/net/ethernet/brocade/bna/bna.h
+++ b/drivers/net/ethernet/brocade/bna/bna.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bna_enet.c b/drivers/net/ethernet/brocade/bna/bna_enet.c
index bba8173..40107a9 100644
--- a/drivers/net/ethernet/brocade/bna/bna_enet.c
+++ b/drivers/net/ethernet/brocade/bna/bna_enet.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.
@@ -1797,7 +1789,7 @@
 
 	/* A separate queue to allow synchronous setting of a list of MACs */
 	INIT_LIST_HEAD(&ucam_mod->del_q);
-	for (i = i; i < (bna->ioceth.attr.num_ucmac * 2); i++)
+	for (; i < (bna->ioceth.attr.num_ucmac * 2); i++)
 		list_add_tail(&ucam_mod->ucmac[i].qe, &ucam_mod->del_q);
 
 	ucam_mod->bna = bna;
@@ -1832,7 +1824,7 @@
 
 	/* A separate queue to allow synchronous setting of a list of MACs */
 	INIT_LIST_HEAD(&mcam_mod->del_q);
-	for (i = i; i < (bna->ioceth.attr.num_mcmac * 2); i++)
+	for (; i < (bna->ioceth.attr.num_mcmac * 2); i++)
 		list_add_tail(&mcam_mod->mcmac[i].qe, &mcam_mod->del_q);
 
 	mcam_mod->bna = bna;

diff --git a/drivers/net/ethernet/brocade/bna/bna_hw_defs.h b/drivers/net/ethernet/brocade/bna/bna_hw_defs.h
index 52b45c9..f335b71 100644
--- a/drivers/net/ethernet/brocade/bna/bna_hw_defs.h
+++ b/drivers/net/ethernet/brocade/bna/bna_hw_defs.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index 95bc470..b5ecbfe 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
   */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bna_types.h b/drivers/net/ethernet/brocade/bna/bna_types.h
index c438d03..666b692 100644
--- a/drivers/net/ethernet/brocade/bna/bna_types.h
+++ b/drivers/net/ethernet/brocade/bna/bna_types.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index ea5f32e..e338272 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.
@@ -3040,7 +3032,7 @@
 	head_unmap->nvecs++;
 
 	for (i = 0, vect_id = 0; i < vectors - 1; i++) {
-		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		u32		size = skb_frag_size(frag);
 
 		if (unlikely(size == 0)) {

diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h
index 46f7b84..492a02d 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.h
+++ b/drivers/net/ethernet/brocade/bna/bnad.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 933799b..04ad0f2 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 31032de..b764c9f 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/cna.h b/drivers/net/ethernet/brocade/bna/cna.h
index 75f8f1a..28d89d0 100644
--- a/drivers/net/ethernet/brocade/bna/cna.h
+++ b/drivers/net/ethernet/brocade/bna/cna.h

@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2006-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/brocade/bna/cna_fwimg.c b/drivers/net/ethernet/brocade/bna/cna_fwimg.c
index 2e7fb97..824eaef 100644
--- a/drivers/net/ethernet/brocade/bna/cna_fwimg.c
+++ b/drivers/net/ethernet/brocade/bna/cna_fwimg.c

@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Linux network driver for QLogic BR-series Converged Network Adapter.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License (GPL) Version 2 as
- * published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 /*
  * Copyright (c) 2005-2014 Brocade Communications Systems, Inc.

diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index b998401..f4b3bd8 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig

@@ -1,5 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
-# Atmel device configuration
+# Cadence device configuration
 #
 
 config NET_VENDOR_CADENCE
@@ -12,15 +13,15 @@
 	  If unsure, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the remaining Atmel network card questions. If you say Y, you will be
+	  kernel: saying N will just cause the configurator to skip all the
+	  remaining Cadence network card questions. If you say Y, you will be
 	  asked for your specific card in the following questions.
 
 if NET_VENDOR_CADENCE
 
 config MACB
 	tristate "Cadence MACB/GEM support"
-	depends on HAS_DMA
+	depends on HAS_DMA && COMMON_CLK
 	select PHYLIB
 	---help---
 	  The Cadence MACB ethernet interface is found on many Atmel AT32 and
@@ -41,7 +42,7 @@
 
 config MACB_PCI
 	tristate "Cadence PCI MACB/GEM support"
-	depends on MACB && PCI && COMMON_CLK
+	depends on MACB && PCI
 	---help---
 	  This is PCI wrapper for MACB driver.
 

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 3d45f4c..03983bd 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Atmel MACB Ethernet Controller driver
  *
  * Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _MACB_H
 #define _MACB_H
@@ -499,7 +496,11 @@
 
 /* Bitfields in TISUBN */
 #define GEM_SUBNSINCR_OFFSET			0
-#define GEM_SUBNSINCR_SIZE			16
+#define GEM_SUBNSINCRL_OFFSET			24
+#define GEM_SUBNSINCRL_SIZE			8
+#define GEM_SUBNSINCRH_OFFSET			0
+#define GEM_SUBNSINCRH_SIZE			16
+#define GEM_SUBNSINCR_SIZE			24
 
 /* Bitfields in TI */
 #define GEM_NSINCR_OFFSET			0
@@ -643,6 +644,7 @@
 #define MACB_CAPS_JUMBO				0x00000020
 #define MACB_CAPS_GEM_HAS_PTP			0x00000040
 #define MACB_CAPS_BD_RD_PREFETCH		0x00000080
+#define MACB_CAPS_NEEDS_RSTONUBR		0x00000100
 #define MACB_CAPS_FIFO_MODE			0x10000000
 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE	0x20000000
 #define MACB_CAPS_SG_DISABLED			0x40000000
@@ -714,6 +716,8 @@
 		__v; \
 	})
 
+#define MACB_READ_NSR(bp)	macb_readl(bp, NSR)
+
 /* struct macb_dma_desc - Hardware DMA descriptor
  * @addr: DMA address of data buffer
  * @ctrl: Control and status bits
@@ -834,6 +838,9 @@
 /* limit RX checksum offload to TCP and UDP packets */
 #define GEM_RX_CSUM_CHECKED_MASK		2
 
+/* Scaled PPM fraction */
+#define PPM_FRACTION	16
+
 /* struct macb_tx_skb - data about an skb which is being transmitted
  * @skb: skb currently being transmitted, only set for the last buffer
  *       of the frame
@@ -1060,7 +1067,8 @@
 	int	(*mog_alloc_rx_buffers)(struct macb *bp);
 	void	(*mog_free_rx_buffers)(struct macb *bp);
 	void	(*mog_init_rings)(struct macb *bp);
-	int	(*mog_rx)(struct macb_queue *queue, int budget);
+	int	(*mog_rx)(struct macb_queue *queue, struct napi_struct *napi,
+			  int budget);
 };
 
 /* MACB-PTP interface: adapt to platform needs. */
@@ -1077,12 +1085,17 @@
 			 struct ifreq *ifr, int cmd);
 };
 
+struct macb_pm_data {
+	u32 scrt2;
+	u32 usrio;
+};
+
 struct macb_config {
 	u32			caps;
 	unsigned int		dma_burst_length;
 	int	(*clk_init)(struct platform_device *pdev, struct clk **pclk,
 			    struct clk **hclk, struct clk **tx_clk,
-			    struct clk **rx_clk);
+			    struct clk **rx_clk, struct clk **tsu_clk);
 	int	(*init)(struct platform_device *pdev);
 	int	jumbo_max_len;
 };
@@ -1162,6 +1175,7 @@
 	struct clk		*hclk;
 	struct clk		*tx_clk;
 	struct clk		*rx_clk;
+	struct clk		*tsu_clk;
 	struct net_device	*dev;
 	union {
 		struct macb_stats	macb;
@@ -1214,6 +1228,10 @@
 
 	int	rx_bd_rd_prefetch;
 	int	tx_bd_rd_prefetch;
+
+	u32	rx_intr_mask;
+
+	struct macb_pm_data pm_data;
 };
 
 #ifdef CONFIG_MACB_USE_HWSTAMP

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 58b9744..0f10a27 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c

@@ -1,15 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Cadence MACB/GEM Ethernet Controller driver
  *
  * Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -36,8 +34,17 @@
 #include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
+#include <linux/iopoll.h>
+#include <linux/pm_runtime.h>
 #include "macb.h"
 
+/* This structure is only used for MACB on SiFive FU540 devices */
+struct sifive_fu540_macb_mgmt {
+	void __iomem *reg;
+	unsigned long rate;
+	struct clk_hw hw;
+};
+
 #define MACB_RX_BUFFER_SIZE	128
 #define RX_BUFFER_MULTIPLE	64  /* bytes */
 
@@ -56,12 +63,12 @@
 /* level of occupied TX descriptors under which we wake up TX process */
 #define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->tx_ring_size / 4)
 
-#define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(RXUBR)	\
-				 | MACB_BIT(ISR_ROVR))
+#define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR))
 #define MACB_TX_ERR_FLAGS	(MACB_BIT(ISR_TUND)			\
 					| MACB_BIT(ISR_RLE)		\
 					| MACB_BIT(TXERR))
-#define MACB_TX_INT_FLAGS	(MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP))
+#define MACB_TX_INT_FLAGS	(MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP)	\
+					| MACB_BIT(TXUBR))
 
 /* Max length of transmit frame must be a multiple of 8 bytes */
 #define MACB_TX_LEN_ALIGN	8
@@ -79,6 +86,10 @@
  */
 #define MACB_HALT_TIMEOUT	1230
 
+#define MACB_PM_TIMEOUT  100 /* ms */
+
+#define MACB_MDIO_TIMEOUT	1000000 /* in usecs */
+
 /* DMA buffer descriptor might be different size
  * depends on hardware configuration:
  *
@@ -154,9 +165,8 @@
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
 {
-	if (bp->hw_dma_cap & HW_DMA_CAP_64B)
-		return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
-	return NULL;
+	return (struct macb_dma_desc_64 *)((void *)desc
+		+ sizeof(struct macb_dma_desc));
 }
 #endif
 
@@ -279,34 +289,22 @@
 
 static void macb_get_hwaddr(struct macb *bp)
 {
-	struct macb_platform_data *pdata;
 	u32 bottom;
 	u16 top;
 	u8 addr[6];
 	int i;
 
-	pdata = dev_get_platdata(&bp->pdev->dev);
-
 	/* Check all 4 address register for valid address */
 	for (i = 0; i < 4; i++) {
 		bottom = macb_or_gem_readl(bp, SA1B + i * 8);
 		top = macb_or_gem_readl(bp, SA1T + i * 8);
 
-		if (pdata && pdata->rev_eth_addr) {
-			addr[5] = bottom & 0xff;
-			addr[4] = (bottom >> 8) & 0xff;
-			addr[3] = (bottom >> 16) & 0xff;
-			addr[2] = (bottom >> 24) & 0xff;
-			addr[1] = top & 0xff;
-			addr[0] = (top & 0xff00) >> 8;
-		} else {
-			addr[0] = bottom & 0xff;
-			addr[1] = (bottom >> 8) & 0xff;
-			addr[2] = (bottom >> 16) & 0xff;
-			addr[3] = (bottom >> 24) & 0xff;
-			addr[4] = top & 0xff;
-			addr[5] = (top >> 8) & 0xff;
-		}
+		addr[0] = bottom & 0xff;
+		addr[1] = (bottom >> 8) & 0xff;
+		addr[2] = (bottom >> 16) & 0xff;
+		addr[3] = (bottom >> 24) & 0xff;
+		addr[4] = top & 0xff;
+		addr[5] = (top >> 8) & 0xff;
 
 		if (is_valid_ether_addr(addr)) {
 			memcpy(bp->dev->dev_addr, addr, sizeof(addr));
@@ -318,10 +316,26 @@
 	eth_hw_addr_random(bp->dev);
 }
 
+static int macb_mdio_wait_for_idle(struct macb *bp)
+{
+	u32 val;
+
+	return readx_poll_timeout(MACB_READ_NSR, bp, val, val & MACB_BIT(IDLE),
+				  1, MACB_MDIO_TIMEOUT);
+}
+
 static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct macb *bp = bus->priv;
-	int value;
+	int status;
+
+	status = pm_runtime_get_sync(&bp->pdev->dev);
+	if (status < 0)
+		goto mdio_pm_exit;
+
+	status = macb_mdio_wait_for_idle(bp);
+	if (status < 0)
+		goto mdio_read_exit;
 
 	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
 			      | MACB_BF(RW, MACB_MAN_READ)
@@ -329,19 +343,32 @@
 			      | MACB_BF(REGA, regnum)
 			      | MACB_BF(CODE, MACB_MAN_CODE)));
 
-	/* wait for end of transfer */
-	while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR)))
-		cpu_relax();
+	status = macb_mdio_wait_for_idle(bp);
+	if (status < 0)
+		goto mdio_read_exit;
 
-	value = MACB_BFEXT(DATA, macb_readl(bp, MAN));
+	status = MACB_BFEXT(DATA, macb_readl(bp, MAN));
 
-	return value;
+mdio_read_exit:
+	pm_runtime_mark_last_busy(&bp->pdev->dev);
+	pm_runtime_put_autosuspend(&bp->pdev->dev);
+mdio_pm_exit:
+	return status;
 }
 
 static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 			   u16 value)
 {
 	struct macb *bp = bus->priv;
+	int status;
+
+	status = pm_runtime_get_sync(&bp->pdev->dev);
+	if (status < 0)
+		goto mdio_pm_exit;
+
+	status = macb_mdio_wait_for_idle(bp);
+	if (status < 0)
+		goto mdio_write_exit;
 
 	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
 			      | MACB_BF(RW, MACB_MAN_WRITE)
@@ -350,11 +377,15 @@
 			      | MACB_BF(CODE, MACB_MAN_CODE)
 			      | MACB_BF(DATA, value)));
 
-	/* wait for end of transfer */
-	while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR)))
-		cpu_relax();
+	status = macb_mdio_wait_for_idle(bp);
+	if (status < 0)
+		goto mdio_write_exit;
 
-	return 0;
+mdio_write_exit:
+	pm_runtime_mark_last_busy(&bp->pdev->dev);
+	pm_runtime_put_autosuspend(&bp->pdev->dev);
+mdio_pm_exit:
+	return status;
 }
 
 /**
@@ -471,12 +502,10 @@
 static int macb_mii_probe(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
-	struct macb_platform_data *pdata;
 	struct phy_device *phydev;
 	struct device_node *np;
-	int phy_irq, ret, i;
+	int ret, i;
 
-	pdata = dev_get_platdata(&bp->pdev->dev);
 	np = bp->pdev->dev.of_node;
 	ret = 0;
 
@@ -491,8 +520,6 @@
 			 */
 			if (!bp->phy_node && !phy_find_first(bp->mii_bus)) {
 				for (i = 0; i < PHY_MAX_ADDR; i++) {
-					struct phy_device *phydev;
-
 					phydev = mdiobus_scan(bp->mii_bus, i);
 					if (IS_ERR(phydev) &&
 					    PTR_ERR(phydev) != -ENODEV) {
@@ -520,19 +547,6 @@
 			return -ENXIO;
 		}
 
-		if (pdata) {
-			if (gpio_is_valid(pdata->phy_irq_pin)) {
-				ret = devm_gpio_request(&bp->pdev->dev,
-							pdata->phy_irq_pin, "phy int");
-				if (!ret) {
-					phy_irq = gpio_to_irq(pdata->phy_irq_pin);
-					phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq;
-				}
-			} else {
-				phydev->irq = PHY_POLL;
-			}
-		}
-
 		/* attach the mac to the phy */
 		ret = phy_connect_direct(dev, phydev, &macb_handle_link_change,
 					 bp->phy_interface);
@@ -544,14 +558,13 @@
 
 	/* mask with MAC supported features */
 	if (macb_is_gem(bp) && bp->caps & MACB_CAPS_GIGABIT_MODE_AVAILABLE)
-		phydev->supported &= PHY_GBIT_FEATURES;
+		phy_set_max_speed(phydev, SPEED_1000);
 	else
-		phydev->supported &= PHY_BASIC_FEATURES;
+		phy_set_max_speed(phydev, SPEED_100);
 
 	if (bp->caps & MACB_CAPS_NO_GIGABIT_HALF)
-		phydev->supported &= ~SUPPORTED_1000baseT_Half;
-
-	phydev->advertising = phydev->supported;
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	bp->link = 0;
 	bp->speed = 0;
@@ -562,7 +575,6 @@
 
 static int macb_mii_init(struct macb *bp)
 {
-	struct macb_platform_data *pdata;
 	struct device_node *np;
 	int err = -ENXIO;
 
@@ -582,7 +594,6 @@
 		 bp->pdev->name, bp->pdev->id);
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->pdev->dev;
-	pdata = dev_get_platdata(&bp->pdev->dev);
 
 	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
@@ -596,9 +607,6 @@
 
 		err = mdiobus_register(bp->mii_bus);
 	} else {
-		if (pdata)
-			bp->mii_bus->phy_mask = pdata->phy_mask;
-
 		err = of_mdiobus_register(bp->mii_bus, np);
 	}
 
@@ -681,6 +689,11 @@
 	if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
 		desc_64 = macb_64b_desc(bp, desc);
 		desc_64->addrh = upper_32_bits(addr);
+		/* The low bits of RX address contain the RX_USED bit, clearing
+		 * of which allows packet RX. Make sure the high bits are also
+		 * visible to HW at that point.
+		 */
+		dma_wmb();
 	}
 #endif
 	desc->addr = lower_32_bits(addr);
@@ -855,7 +868,9 @@
 
 			/* First, update TX stats if needed */
 			if (skb) {
-				if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+				if (unlikely(skb_shinfo(skb)->tx_flags &
+					     SKBTX_HW_TSTAMP) &&
+				    gem_ptp_do_txstamp(queue, skb, desc) == 0) {
 					/* skb now belongs to timestamp buffer
 					 * and will be removed later
 					 */
@@ -929,14 +944,19 @@
 
 			if (entry == bp->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
-			macb_set_addr(bp, desc, paddr);
 			desc->ctrl = 0;
+			/* Setting addr clears RX_USED and allows reception,
+			 * make sure ctrl is cleared first to avoid a race.
+			 */
+			dma_wmb();
+			macb_set_addr(bp, desc, paddr);
 
 			/* properly align Ethernet header */
 			skb_reserve(skb, NET_IP_ALIGN);
 		} else {
-			desc->addr &= ~MACB_BIT(RX_USED);
 			desc->ctrl = 0;
+			dma_wmb();
+			desc->addr &= ~MACB_BIT(RX_USED);
 		}
 	}
 
@@ -968,7 +988,8 @@
 	 */
 }
 
-static int gem_rx(struct macb_queue *queue, int budget)
+static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
+		  int budget)
 {
 	struct macb *bp = queue->bp;
 	unsigned int		len;
@@ -990,11 +1011,15 @@
 
 		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
 		addr = macb_get_addr(bp, desc);
-		ctrl = desc->ctrl;
 
 		if (!rxused)
 			break;
 
+		/* Ensure ctrl is at least as up-to-date as rxused */
+		dma_rmb();
+
+		ctrl = desc->ctrl;
+
 		queue->rx_tail++;
 		count++;
 
@@ -1046,7 +1071,7 @@
 			       skb->data, 32, true);
 #endif
 
-		netif_receive_skb(skb);
+		napi_gro_receive(napi, skb);
 	}
 
 	gem_rx_refill(queue);
@@ -1054,8 +1079,8 @@
 	return count;
 }
 
-static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag,
-			 unsigned int last_frag)
+static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
+			 unsigned int first_frag, unsigned int last_frag)
 {
 	unsigned int len;
 	unsigned int frag;
@@ -1131,7 +1156,7 @@
 	bp->dev->stats.rx_bytes += skb->len;
 	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
 		    skb->len, skb->csum);
-	netif_receive_skb(skb);
+	napi_gro_receive(napi, skb);
 
 	return 0;
 }
@@ -1154,7 +1179,8 @@
 	queue->rx_tail = 0;
 }
 
-static int macb_rx(struct macb_queue *queue, int budget)
+static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
+		   int budget)
 {
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
@@ -1169,11 +1195,14 @@
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		ctrl = desc->ctrl;
-
 		if (!(desc->addr & MACB_BIT(RX_USED)))
 			break;
 
+		/* Ensure ctrl is at least as up-to-date as addr */
+		dma_rmb();
+
+		ctrl = desc->ctrl;
+
 		if (ctrl & MACB_BIT(RX_SOF)) {
 			if (first_frag != -1)
 				discard_partial_frame(queue, first_frag, tail);
@@ -1188,7 +1217,7 @@
 				continue;
 			}
 
-			dropped = macb_rx_frame(queue, first_frag, tail);
+			dropped = macb_rx_frame(queue, napi, first_frag, tail);
 			first_frag = -1;
 			if (unlikely(dropped < 0)) {
 				reset_rx_queue = true;
@@ -1242,7 +1271,7 @@
 	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
 		    (unsigned long)status, budget);
 
-	work_done = bp->macbgem_ops.mog_rx(queue, budget);
+	work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 
@@ -1253,7 +1282,7 @@
 				queue_writel(queue, ISR, MACB_BIT(RCOMP));
 			napi_reschedule(napi);
 		} else {
-			queue_writel(queue, IER, MACB_RX_INT_FLAGS);
+			queue_writel(queue, IER, bp->rx_intr_mask);
 		}
 	}
 
@@ -1271,7 +1300,7 @@
 	u32 ctrl;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, IDR, MACB_RX_INT_FLAGS |
+		queue_writel(queue, IDR, bp->rx_intr_mask |
 					 MACB_TX_INT_FLAGS |
 					 MACB_BIT(HRESP));
 	}
@@ -1301,7 +1330,7 @@
 
 		/* Enable interrupts */
 		queue_writel(queue, IER,
-			     MACB_RX_INT_FLAGS |
+			     bp->rx_intr_mask |
 			     MACB_TX_INT_FLAGS |
 			     MACB_BIT(HRESP));
 	}
@@ -1313,6 +1342,21 @@
 	netif_tx_start_all_queues(dev);
 }
 
+static void macb_tx_restart(struct macb_queue *queue)
+{
+	unsigned int head = queue->tx_head;
+	unsigned int tail = queue->tx_tail;
+	struct macb *bp = queue->bp;
+
+	if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+		queue_writel(queue, ISR, MACB_BIT(TXUBR));
+
+	if (head == tail)
+		return;
+
+	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
+}
+
 static irqreturn_t macb_interrupt(int irq, void *dev_id)
 {
 	struct macb_queue *queue = dev_id;
@@ -1340,14 +1384,14 @@
 			    (unsigned int)(queue - bp->queues),
 			    (unsigned long)status);
 
-		if (status & MACB_RX_INT_FLAGS) {
+		if (status & bp->rx_intr_mask) {
 			/* There's no point taking any more interrupts
 			 * until we have processed the buffers. The
 			 * scheduling call may fail if the poll routine
 			 * is already scheduled, so disable interrupts
 			 * now.
 			 */
-			queue_writel(queue, IDR, MACB_RX_INT_FLAGS);
+			queue_writel(queue, IDR, bp->rx_intr_mask);
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
 				queue_writel(queue, ISR, MACB_BIT(RCOMP));
 
@@ -1370,6 +1414,9 @@
 		if (status & MACB_BIT(TCOMP))
 			macb_tx_interrupt(queue);
 
+		if (status & MACB_BIT(TXUBR))
+			macb_tx_restart(queue);
+
 		/* Link change detection isn't possible with RMII, so we'll
 		 * add that if/when we get our hands on a full-blown MII PHY.
 		 */
@@ -1377,8 +1424,9 @@
 		/* There is a hardware issue under heavy load where DMA can
 		 * stop, this causes endless "used buffer descriptor read"
 		 * interrupts but it can be cleared by re-enabling RX. See
-		 * the at91 manual, section 41.3.1 or the Zynq manual
-		 * section 16.7.4 for details.
+		 * the at91rm9200 manual, section 41.3.1 or the Zynq manual
+		 * section 16.7.4 for details. RXUBR is only enabled for
+		 * these two versions.
 		 */
 		if (status & MACB_BIT(RXUBR)) {
 			ctrl = macb_readl(bp, NCR);
@@ -1685,7 +1733,7 @@
 			padlen = 0;
 		/* No room for FCS, need to reallocate skb. */
 		else
-			padlen = ETH_FCS_LEN - tailroom;
+			padlen = ETH_FCS_LEN;
 	} else {
 		/* Add room for FCS. */
 		padlen += ETH_FCS_LEN;
@@ -1699,16 +1747,12 @@
 		if (!nskb)
 			return -ENOMEM;
 
-		dev_kfree_skb_any(*skb);
+		dev_consume_skb_any(*skb);
 		*skb = nskb;
 	}
 
-	if (padlen) {
-		if (padlen >= ETH_FCS_LEN)
-			skb_put_zero(*skb, padlen - ETH_FCS_LEN);
-		else
-			skb_trim(*skb, ETH_FCS_LEN - padlen);
-	}
+	if (padlen > ETH_FCS_LEN)
+		skb_put_zero(*skb, padlen - ETH_FCS_LEN);
 
 add_fcs:
 	/* set FCS to packet */
@@ -2228,7 +2272,7 @@
 
 		/* Enable interrupts */
 		queue_writel(queue, IER,
-			     MACB_RX_INT_FLAGS |
+			     bp->rx_intr_mask |
 			     MACB_TX_INT_FLAGS |
 			     MACB_BIT(HRESP));
 	}
@@ -2366,12 +2410,18 @@
 
 	netdev_dbg(bp->dev, "open\n");
 
+	err = pm_runtime_get_sync(&bp->pdev->dev);
+	if (err < 0)
+		goto pm_exit;
+
 	/* carrier starts down */
 	netif_carrier_off(dev);
 
 	/* if the phy is not yet register, retry later*/
-	if (!dev->phydev)
-		return -EAGAIN;
+	if (!dev->phydev) {
+		err = -EAGAIN;
+		goto pm_exit;
+	}
 
 	/* RX buffers initialization */
 	macb_init_rx_buffer_size(bp, bufsz);
@@ -2380,15 +2430,15 @@
 	if (err) {
 		netdev_err(dev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
-		return err;
+		goto pm_exit;
 	}
 
-	bp->macbgem_ops.mog_init_rings(bp);
-	macb_init_hw(bp);
-
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
 		napi_enable(&queue->napi);
 
+	bp->macbgem_ops.mog_init_rings(bp);
+	macb_init_hw(bp);
+
 	/* schedule a link state check */
 	phy_start(dev->phydev);
 
@@ -2397,6 +2447,11 @@
 	if (bp->ptp_info)
 		bp->ptp_info->ptp_init(dev);
 
+pm_exit:
+	if (err) {
+		pm_runtime_put_sync(&bp->pdev->dev);
+		return err;
+	}
 	return 0;
 }
 
@@ -2425,6 +2480,8 @@
 	if (bp->ptp_info)
 		bp->ptp_info->ptp_remove(dev);
 
+	pm_runtime_put(&bp->pdev->dev);
+
 	return 0;
 }
 
@@ -2798,10 +2855,14 @@
 
 static void gem_enable_flow_filters(struct macb *bp, bool enable)
 {
+	struct net_device *netdev = bp->dev;
 	struct ethtool_rx_fs_item *item;
 	u32 t2_scr;
 	int num_t2_scr;
 
+	if (!(netdev->features & NETIF_F_NTUPLE))
+		return;
+
 	num_t2_scr = GEM_BFEXT(T2SCR, gem_readl(bp, DCFG8));
 
 	list_for_each_entry(item, &bp->rx_fs_list.list, list) {
@@ -2961,8 +3022,7 @@
 	gem_prog_cmp_regs(bp, fs);
 	bp->rx_fs_list.count++;
 	/* enable filtering if NTUPLE on */
-	if (netdev->features & NETIF_F_NTUPLE)
-		gem_enable_flow_filters(bp, 1);
+	gem_enable_flow_filters(bp, 1);
 
 	spin_unlock_irqrestore(&bp->rx_fs_lock, flags);
 	return 0;
@@ -3150,6 +3210,50 @@
 	}
 }
 
+static inline void macb_set_txcsum_feature(struct macb *bp,
+					   netdev_features_t features)
+{
+	u32 val;
+
+	if (!macb_is_gem(bp))
+		return;
+
+	val = gem_readl(bp, DMACFG);
+	if (features & NETIF_F_HW_CSUM)
+		val |= GEM_BIT(TXCOEN);
+	else
+		val &= ~GEM_BIT(TXCOEN);
+
+	gem_writel(bp, DMACFG, val);
+}
+
+static inline void macb_set_rxcsum_feature(struct macb *bp,
+					   netdev_features_t features)
+{
+	struct net_device *netdev = bp->dev;
+	u32 val;
+
+	if (!macb_is_gem(bp))
+		return;
+
+	val = gem_readl(bp, NCFGR);
+	if ((features & NETIF_F_RXCSUM) && !(netdev->flags & IFF_PROMISC))
+		val |= GEM_BIT(RXCOEN);
+	else
+		val &= ~GEM_BIT(RXCOEN);
+
+	gem_writel(bp, NCFGR, val);
+}
+
+static inline void macb_set_rxflow_feature(struct macb *bp,
+					   netdev_features_t features)
+{
+	if (!macb_is_gem(bp))
+		return;
+
+	gem_enable_flow_filters(bp, !!(features & NETIF_F_NTUPLE));
+}
+
 static int macb_set_features(struct net_device *netdev,
 			     netdev_features_t features)
 {
@@ -3157,39 +3261,35 @@
 	netdev_features_t changed = features ^ netdev->features;
 
 	/* TX checksum offload */
-	if ((changed & NETIF_F_HW_CSUM) && macb_is_gem(bp)) {
-		u32 dmacfg;
-
-		dmacfg = gem_readl(bp, DMACFG);
-		if (features & NETIF_F_HW_CSUM)
-			dmacfg |= GEM_BIT(TXCOEN);
-		else
-			dmacfg &= ~GEM_BIT(TXCOEN);
-		gem_writel(bp, DMACFG, dmacfg);
-	}
+	if (changed & NETIF_F_HW_CSUM)
+		macb_set_txcsum_feature(bp, features);
 
 	/* RX checksum offload */
-	if ((changed & NETIF_F_RXCSUM) && macb_is_gem(bp)) {
-		u32 netcfg;
-
-		netcfg = gem_readl(bp, NCFGR);
-		if (features & NETIF_F_RXCSUM &&
-		    !(netdev->flags & IFF_PROMISC))
-			netcfg |= GEM_BIT(RXCOEN);
-		else
-			netcfg &= ~GEM_BIT(RXCOEN);
-		gem_writel(bp, NCFGR, netcfg);
-	}
+	if (changed & NETIF_F_RXCSUM)
+		macb_set_rxcsum_feature(bp, features);
 
 	/* RX Flow Filters */
-	if ((changed & NETIF_F_NTUPLE) && macb_is_gem(bp)) {
-		bool turn_on = features & NETIF_F_NTUPLE;
+	if (changed & NETIF_F_NTUPLE)
+		macb_set_rxflow_feature(bp, features);
 
-		gem_enable_flow_filters(bp, turn_on);
-	}
 	return 0;
 }
 
+static void macb_restore_features(struct macb *bp)
+{
+	struct net_device *netdev = bp->dev;
+	netdev_features_t features = netdev->features;
+
+	/* TX checksum offload */
+	macb_set_txcsum_feature(bp, features);
+
+	/* RX checksum offload */
+	macb_set_rxcsum_feature(bp, features);
+
+	/* RX Flow Filters */
+	macb_set_rxflow_feature(bp, features);
+}
+
 static const struct net_device_ops macb_netdev_ops = {
 	.ndo_open		= macb_open,
 	.ndo_stop		= macb_close,
@@ -3273,7 +3373,7 @@
 
 static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 			 struct clk **hclk, struct clk **tx_clk,
-			 struct clk **rx_clk)
+			 struct clk **rx_clk, struct clk **tsu_clk)
 {
 	struct macb_platform_data *pdata;
 	int err;
@@ -3287,52 +3387,71 @@
 		*hclk = devm_clk_get(&pdev->dev, "hclk");
 	}
 
-	if (IS_ERR(*pclk)) {
+	if (IS_ERR_OR_NULL(*pclk)) {
 		err = PTR_ERR(*pclk);
-		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
+		if (!err)
+			err = -ENODEV;
+
+		dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err);
 		return err;
 	}
 
-	if (IS_ERR(*hclk)) {
+	if (IS_ERR_OR_NULL(*hclk)) {
 		err = PTR_ERR(*hclk);
-		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
+		if (!err)
+			err = -ENODEV;
+
+		dev_err(&pdev->dev, "failed to get hclk (%d)\n", err);
 		return err;
 	}
 
-	*tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+	*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
 	if (IS_ERR(*tx_clk))
-		*tx_clk = NULL;
+		return PTR_ERR(*tx_clk);
 
-	*rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+	*rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk");
 	if (IS_ERR(*rx_clk))
-		*rx_clk = NULL;
+		return PTR_ERR(*rx_clk);
+
+	*tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk");
+	if (IS_ERR(*tsu_clk))
+		return PTR_ERR(*tsu_clk);
 
 	err = clk_prepare_enable(*pclk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err);
 		return err;
 	}
 
 	err = clk_prepare_enable(*hclk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable hclk (%d)\n", err);
 		goto err_disable_pclk;
 	}
 
 	err = clk_prepare_enable(*tx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
 		goto err_disable_hclk;
 	}
 
 	err = clk_prepare_enable(*rx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
 		goto err_disable_txclk;
 	}
 
+	err = clk_prepare_enable(*tsu_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable tsu_clk (%d)\n", err);
+		goto err_disable_rxclk;
+	}
+
 	return 0;
 
+err_disable_rxclk:
+	clk_disable_unprepare(*rx_clk);
+
 err_disable_txclk:
 	clk_disable_unprepare(*tx_clk);
 
@@ -3367,7 +3486,7 @@
 
 		queue = &bp->queues[q];
 		queue->bp = bp;
-		netif_napi_add(dev, &queue->napi, macb_poll, 64);
+		netif_napi_add(dev, &queue->napi, macb_poll, NAPI_POLL_WEIGHT);
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
 			queue->IER  = GEM_IER(hw_q - 1);
@@ -3506,6 +3625,8 @@
 /* max number of receive buffers */
 #define AT91ETHER_MAX_RX_DESCR	9
 
+static struct sifive_fu540_macb_mgmt *mgmt;
+
 /* Initialize and start the Receiver and Transmit subsystems */
 static int at91ether_start(struct net_device *dev)
 {
@@ -3642,9 +3763,9 @@
 		/* Store packet information (to free when Tx completed) */
 		lp->skb = skb;
 		lp->skb_length = skb->len;
-		lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len,
-							DMA_TO_DEVICE);
-		if (dma_mapping_error(NULL, lp->skb_physaddr)) {
+		lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data,
+						  skb->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) {
 			dev_kfree_skb_any(skb);
 			dev->stats.tx_dropped++;
 			netdev_err(dev, "%s: DMA mapping error\n", __func__);
@@ -3732,9 +3853,9 @@
 			dev->stats.tx_errors++;
 
 		if (lp->skb) {
-			dev_kfree_skb_irq(lp->skb);
+			dev_consume_skb_irq(lp->skb);
 			lp->skb = NULL;
-			dma_unmap_single(NULL, lp->skb_physaddr,
+			dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr,
 					 lp->skb_length, DMA_TO_DEVICE);
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += lp->skb_length;
@@ -3783,13 +3904,14 @@
 
 static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
 			      struct clk **hclk, struct clk **tx_clk,
-			      struct clk **rx_clk)
+			      struct clk **rx_clk, struct clk **tsu_clk)
 {
 	int err;
 
 	*hclk = NULL;
 	*tx_clk = NULL;
 	*rx_clk = NULL;
+	*tsu_clk = NULL;
 
 	*pclk = devm_clk_get(&pdev->dev, "ether_clk");
 	if (IS_ERR(*pclk))
@@ -3797,7 +3919,7 @@
 
 	err = clk_prepare_enable(*pclk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err);
 		return err;
 	}
 
@@ -3832,6 +3954,116 @@
 	return 0;
 }
 
+static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	return mgmt->rate;
+}
+
+static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	if (WARN_ON(rate < 2500000))
+		return 2500000;
+	else if (rate == 2500000)
+		return 2500000;
+	else if (WARN_ON(rate < 13750000))
+		return 2500000;
+	else if (WARN_ON(rate < 25000000))
+		return 25000000;
+	else if (rate == 25000000)
+		return 25000000;
+	else if (WARN_ON(rate < 75000000))
+		return 25000000;
+	else if (WARN_ON(rate < 125000000))
+		return 125000000;
+	else if (rate == 125000000)
+		return 125000000;
+
+	WARN_ON(rate > 125000000);
+
+	return 125000000;
+}
+
+static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long parent_rate)
+{
+	rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate);
+	if (rate != 125000000)
+		iowrite32(1, mgmt->reg);
+	else
+		iowrite32(0, mgmt->reg);
+	mgmt->rate = rate;
+
+	return 0;
+}
+
+static const struct clk_ops fu540_c000_ops = {
+	.recalc_rate = fu540_macb_tx_recalc_rate,
+	.round_rate = fu540_macb_tx_round_rate,
+	.set_rate = fu540_macb_tx_set_rate,
+};
+
+static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk,
+			       struct clk **hclk, struct clk **tx_clk,
+			       struct clk **rx_clk, struct clk **tsu_clk)
+{
+	struct clk_init_data init;
+	int err = 0;
+
+	err = macb_clk_init(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk);
+	if (err)
+		return err;
+
+	mgmt = devm_kzalloc(&pdev->dev, sizeof(*mgmt), GFP_KERNEL);
+	if (!mgmt)
+		return -ENOMEM;
+
+	init.name = "sifive-gemgxl-mgmt";
+	init.ops = &fu540_c000_ops;
+	init.flags = 0;
+	init.num_parents = 0;
+
+	mgmt->rate = 0;
+	mgmt->hw.init = &init;
+
+	*tx_clk = clk_register(NULL, &mgmt->hw);
+	if (IS_ERR(*tx_clk))
+		return PTR_ERR(*tx_clk);
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err)
+		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+	else
+		dev_info(&pdev->dev, "Registered clk switch '%s'\n", init.name);
+
+	return 0;
+}
+
+static int fu540_c000_init(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	mgmt->reg = ioremap(res->start, resource_size(res));
+	if (!mgmt->reg)
+		return -ENOMEM;
+
+	return macb_init(pdev);
+}
+
+static const struct macb_config fu540_c000_config = {
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
+		MACB_CAPS_GEM_HAS_PTP,
+	.dma_burst_length = 16,
+	.clk_init = fu540_c000_clk_init,
+	.init = fu540_c000_init,
+	.jumbo_max_len = 10240,
+};
+
 static const struct macb_config at91sam9260_config = {
 	.caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII,
 	.clk_init = macb_clk_init,
@@ -3876,6 +4108,7 @@
 };
 
 static const struct macb_config emac_config = {
+	.caps = MACB_CAPS_NEEDS_RSTONUBR,
 	.clk_init = at91ether_clk_init,
 	.init = at91ether_init,
 };
@@ -3897,7 +4130,8 @@
 };
 
 static const struct macb_config zynq_config = {
-	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF,
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF |
+		MACB_CAPS_NEEDS_RSTONUBR,
 	.dma_burst_length = 16,
 	.clk_init = macb_clk_init,
 	.init = macb_init,
@@ -3910,6 +4144,7 @@
 	{ .compatible = "cdns,np4-macb", .data = &np4_config },
 	{ .compatible = "cdns,pc302-gem", .data = &pc302gem_config },
 	{ .compatible = "cdns,gem", .data = &pc302gem_config },
+	{ .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config },
 	{ .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
 	{ .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
 	{ .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
@@ -3918,6 +4153,7 @@
 	{ .compatible = "cdns,emac", .data = &emac_config },
 	{ .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config},
 	{ .compatible = "cdns,zynq-gem", .data = &zynq_config },
+	{ .compatible = "sifive,fu540-c000-gem", .data = &fu540_c000_config },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, macb_dt_ids);
@@ -3937,13 +4173,13 @@
 {
 	const struct macb_config *macb_config = &default_gem_config;
 	int (*clk_init)(struct platform_device *, struct clk **,
-			struct clk **, struct clk **,  struct clk **)
-					      = macb_config->clk_init;
+			struct clk **, struct clk **,  struct clk **,
+			struct clk **) = macb_config->clk_init;
 	int (*init)(struct platform_device *) = macb_config->init;
 	struct device_node *np = pdev->dev.of_node;
 	struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
+	struct clk *tsu_clk = NULL;
 	unsigned int queue_mask, num_queues;
-	struct macb_platform_data *pdata;
 	bool native_io;
 	struct phy_device *phydev;
 	struct net_device *dev;
@@ -3969,10 +4205,15 @@
 		}
 	}
 
-	err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk);
+	err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk, &tsu_clk);
 	if (err)
 		return err;
 
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MACB_PM_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 	native_io = hw_is_native_io(mem);
 
 	macb_probe_queues(mem, native_io, &queue_mask, &num_queues);
@@ -4006,6 +4247,7 @@
 	bp->hclk = hclk;
 	bp->tx_clk = tx_clk;
 	bp->rx_clk = rx_clk;
+	bp->tsu_clk = tsu_clk;
 	if (macb_config)
 		bp->jumbo_max_len = macb_config->jumbo_max_len;
 
@@ -4052,28 +4294,26 @@
 						macb_dma_desc_get_size(bp);
 	}
 
+	bp->rx_intr_mask = MACB_RX_INT_FLAGS;
+	if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
+		bp->rx_intr_mask |= MACB_BIT(RXUBR);
+
 	mac = of_get_mac_address(np);
-	if (mac) {
+	if (PTR_ERR(mac) == -EPROBE_DEFER) {
+		err = -EPROBE_DEFER;
+		goto err_out_free_netdev;
+	} else if (!IS_ERR_OR_NULL(mac)) {
 		ether_addr_copy(bp->dev->dev_addr, mac);
 	} else {
-		err = of_get_nvmem_mac_address(np, bp->dev->dev_addr);
-		if (err) {
-			if (err == -EPROBE_DEFER)
-				goto err_out_free_netdev;
-			macb_get_hwaddr(bp);
-		}
+		macb_get_hwaddr(bp);
 	}
 
 	err = of_get_phy_mode(np);
-	if (err < 0) {
-		pdata = dev_get_platdata(&pdev->dev);
-		if (pdata && pdata->is_rmii)
-			bp->phy_interface = PHY_INTERFACE_MODE_RMII;
-		else
-			bp->phy_interface = PHY_INTERFACE_MODE_MII;
-	} else {
+	if (err < 0)
+		/* not found in DT, MII by default */
+		bp->phy_interface = PHY_INTERFACE_MODE_MII;
+	else
 		bp->phy_interface = err;
-	}
 
 	/* IP specific init */
 	err = init(pdev);
@@ -4103,6 +4343,9 @@
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
 		    dev->base_addr, dev->irq, dev->dev_addr);
 
+	pm_runtime_mark_last_busy(&bp->pdev->dev);
+	pm_runtime_put_autosuspend(&bp->pdev->dev);
+
 	return 0;
 
 err_out_unregister_mdio:
@@ -4118,9 +4361,14 @@
 
 err_disable_clocks:
 	clk_disable_unprepare(tx_clk);
+	clk_unregister(tx_clk);
 	clk_disable_unprepare(hclk);
 	clk_disable_unprepare(pclk);
 	clk_disable_unprepare(rx_clk);
+	clk_disable_unprepare(tsu_clk);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 
 	return err;
 }
@@ -4144,10 +4392,18 @@
 		mdiobus_free(bp->mii_bus);
 
 		unregister_netdev(dev);
-		clk_disable_unprepare(bp->tx_clk);
-		clk_disable_unprepare(bp->hclk);
-		clk_disable_unprepare(bp->pclk);
-		clk_disable_unprepare(bp->rx_clk);
+		tasklet_kill(&bp->hresp_err_tasklet);
+		pm_runtime_disable(&pdev->dev);
+		pm_runtime_dont_use_autosuspend(&pdev->dev);
+		if (!pm_runtime_suspended(&pdev->dev)) {
+			clk_disable_unprepare(bp->tx_clk);
+			clk_unregister(bp->tx_clk);
+			clk_disable_unprepare(bp->hclk);
+			clk_disable_unprepare(bp->pclk);
+			clk_disable_unprepare(bp->rx_clk);
+			clk_disable_unprepare(bp->tsu_clk);
+			pm_runtime_set_suspended(&pdev->dev);
+		}
 		of_node_put(bp->phy_node);
 		free_netdev(dev);
 	}
@@ -4157,50 +4413,127 @@
 
 static int __maybe_unused macb_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct macb *bp = netdev_priv(netdev);
+	struct macb_queue *queue = bp->queues;
+	unsigned long flags;
+	unsigned int q;
 
-	netif_carrier_off(netdev);
-	netif_device_detach(netdev);
+	if (!netif_running(netdev))
+		return 0;
+
 
 	if (bp->wol & MACB_WOL_ENABLED) {
 		macb_writel(bp, IER, MACB_BIT(WOL));
 		macb_writel(bp, WOL, MACB_BIT(MAG));
 		enable_irq_wake(bp->queues[0].irq);
+		netif_device_detach(netdev);
 	} else {
-		clk_disable_unprepare(bp->tx_clk);
-		clk_disable_unprepare(bp->hclk);
-		clk_disable_unprepare(bp->pclk);
-		clk_disable_unprepare(bp->rx_clk);
+		netif_device_detach(netdev);
+		for (q = 0, queue = bp->queues; q < bp->num_queues;
+		     ++q, ++queue)
+			napi_disable(&queue->napi);
+		phy_stop(netdev->phydev);
+		phy_suspend(netdev->phydev);
+		spin_lock_irqsave(&bp->lock, flags);
+		macb_reset_hw(bp);
+		spin_unlock_irqrestore(&bp->lock, flags);
+
+		if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
+			bp->pm_data.usrio = macb_or_gem_readl(bp, USRIO);
+
+		if (netdev->hw_features & NETIF_F_NTUPLE)
+			bp->pm_data.scrt2 = gem_readl_n(bp, ETHT, SCRT2_ETHT);
 	}
 
+	netif_carrier_off(netdev);
+	if (bp->ptp_info)
+		bp->ptp_info->ptp_remove(netdev);
+	pm_runtime_force_suspend(dev);
+
 	return 0;
 }
 
 static int __maybe_unused macb_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct macb *bp = netdev_priv(netdev);
+	struct macb_queue *queue = bp->queues;
+	unsigned int q;
+
+	if (!netif_running(netdev))
+		return 0;
+
+	pm_runtime_force_resume(dev);
 
 	if (bp->wol & MACB_WOL_ENABLED) {
 		macb_writel(bp, IDR, MACB_BIT(WOL));
 		macb_writel(bp, WOL, 0);
 		disable_irq_wake(bp->queues[0].irq);
 	} else {
+		macb_writel(bp, NCR, MACB_BIT(MPE));
+
+		if (netdev->hw_features & NETIF_F_NTUPLE)
+			gem_writel_n(bp, ETHT, SCRT2_ETHT, bp->pm_data.scrt2);
+
+		if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
+			macb_or_gem_writel(bp, USRIO, bp->pm_data.usrio);
+
+		for (q = 0, queue = bp->queues; q < bp->num_queues;
+		     ++q, ++queue)
+			napi_enable(&queue->napi);
+		phy_resume(netdev->phydev);
+		phy_init_hw(netdev->phydev);
+		phy_start(netdev->phydev);
+	}
+
+	bp->macbgem_ops.mog_init_rings(bp);
+	macb_init_hw(bp);
+	macb_set_rx_mode(netdev);
+	macb_restore_features(bp);
+	netif_device_attach(netdev);
+	if (bp->ptp_info)
+		bp->ptp_info->ptp_init(netdev);
+
+	return 0;
+}
+
+static int __maybe_unused macb_runtime_suspend(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct macb *bp = netdev_priv(netdev);
+
+	if (!(device_may_wakeup(&bp->dev->dev))) {
+		clk_disable_unprepare(bp->tx_clk);
+		clk_disable_unprepare(bp->hclk);
+		clk_disable_unprepare(bp->pclk);
+		clk_disable_unprepare(bp->rx_clk);
+	}
+	clk_disable_unprepare(bp->tsu_clk);
+
+	return 0;
+}
+
+static int __maybe_unused macb_runtime_resume(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct macb *bp = netdev_priv(netdev);
+
+	if (!(device_may_wakeup(&bp->dev->dev))) {
 		clk_prepare_enable(bp->pclk);
 		clk_prepare_enable(bp->hclk);
 		clk_prepare_enable(bp->tx_clk);
 		clk_prepare_enable(bp->rx_clk);
 	}
-
-	netif_device_attach(netdev);
+	clk_prepare_enable(bp->tsu_clk);
 
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(macb_pm_ops, macb_suspend, macb_resume);
+static const struct dev_pm_ops macb_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(macb_suspend, macb_resume)
+	SET_RUNTIME_PM_OPS(macb_runtime_suspend, macb_runtime_resume, NULL)
+};
 
 static struct platform_driver macb_driver = {
 	.probe		= macb_probe,

diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 248a8fc..617b3b7 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /**
  * Cadence GEM PCI wrapper.
  *
@@ -5,18 +6,6 @@
  *
  * Authors: Rafal Ozieblo <rafalo@cadence.com>
  *	    Bartosz Folta <bfolta@cadence.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>

diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index cd5296b..43a3f0d 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /**
  * 1588 PTP support for Cadence GEM device.
  *
@@ -5,18 +6,6 @@
  *
  * Authors: Rafal Ozieblo <rafalo@cadence.com>
  *          Bartosz Folta <bfolta@cadence.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2  of
- * the License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/kernel.h>
 #include <linux/types.h>
@@ -115,7 +104,10 @@
 	 * to take effect.
 	 */
 	spin_lock_irqsave(&bp->tsu_clk_lock, flags);
-	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns));
+	/* RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] */
+	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCRL, incr_spec->sub_ns) |
+		   GEM_BF(SUBNSINCRH, (incr_spec->sub_ns >>
+			  GEM_SUBNSINCRL_SIZE)));
 	gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns));
 	spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
 
@@ -146,7 +138,7 @@
 	 * (temp / USEC_PER_SEC) + 0.5
 	 */
 	adj += (USEC_PER_SEC >> 1);
-	adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */
+	adj >>= PPM_FRACTION; /* remove fractions */
 	adj = div_u64(adj, USEC_PER_SEC);
 	adj = neg_adj ? (word - adj) : (word + adj);
 
@@ -319,6 +311,8 @@
 	desc_ptp = macb_ptp_desc(queue->bp, desc);
 	tx_timestamp = &queue->tx_timestamps[head];
 	tx_timestamp->skb = skb;
+	/* ensure ts_1/ts_2 is loaded after ctrl (TX_USED check) */
+	dma_rmb();
 	tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1;
 	tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2;
 	/* move head */

diff --git a/drivers/net/ethernet/calxeda/Kconfig b/drivers/net/ethernet/calxeda/Kconfig
index 9fdd496..ce42157 100644
--- a/drivers/net/ethernet/calxeda/Kconfig
+++ b/drivers/net/ethernet/calxeda/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_CALXEDA_XGMAC
 	tristate "Calxeda 1G/10G XGMAC Ethernet driver"
 	depends on HAS_IOMEM

diff --git a/drivers/net/ethernet/calxeda/Makefile b/drivers/net/ethernet/calxeda/Makefile
index f0ef080..641e5b6 100644
--- a/drivers/net/ethernet/calxeda/Makefile
+++ b/drivers/net/ethernet/calxeda/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NET_CALXEDA_XGMAC) += xgmac.o

diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 13741ee..f96a42a 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2010-2011 Calxeda, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
@@ -1115,7 +1104,7 @@
 	for (i = 0; i < nfrags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		len = frag->size;
+		len = skb_frag_size(frag);
 
 		paddr = skb_frag_dma_map(priv->device, frag, 0, len,
 					 DMA_TO_DEVICE);
@@ -1866,7 +1855,7 @@
 
 static int xgmac_suspend(struct device *dev)
 {
-	struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xgmac_priv *priv = netdev_priv(ndev);
 	u32 value;
 
@@ -1892,7 +1881,7 @@
 
 static int xgmac_resume(struct device *dev)
 {
-	struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct xgmac_priv *priv = netdev_priv(ndev);
 	void __iomem *ioaddr = priv->base;
 

diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 5f03199..6a700d3 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Cavium ethernet device configuration
 #
@@ -54,7 +55,6 @@
 	tristate "Cavium PTP coprocessor as PTP clock"
 	depends on 64BIT && PCI
 	imply PTP_1588_CLOCK
-	default y
 	---help---
 	  This driver adds support for the Precision Time Protocol Clocks and
 	  Timestamping coprocessor (PTP) found on Cavium processors.
@@ -65,11 +65,11 @@
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT && PCI
-	depends on MAY_USE_DEVLINK
 	depends on PCI
 	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select LIBCRC32C
+	select NET_DEVLINK
 	---help---
 	  This driver supports Cavium LiquidIO Intelligent Server Adapters
 	  based on CN66XX, CN68XX and CN23XX chips.

diff --git a/drivers/net/ethernet/cavium/Makefile b/drivers/net/ethernet/cavium/Makefile
index 946bba8..5d32808 100644
--- a/drivers/net/ethernet/cavium/Makefile
+++ b/drivers/net/ethernet/cavium/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Cavium ethernet device drivers.
 #

diff --git a/drivers/net/ethernet/cavium/common/Makefile b/drivers/net/ethernet/cavium/common/Makefile
index dd8561b..e3f87bd 100644
--- a/drivers/net/ethernet/cavium/common/Makefile
+++ b/drivers/net/ethernet/cavium/common/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CAVIUM_PTP) += cavium_ptp.o

diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c
index 6aeb104..b821c9e 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.c
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c

@@ -10,7 +10,7 @@
 
 #include "cavium_ptp.h"
 
-#define DRV_NAME	"Cavium PTP Driver"
+#define DRV_NAME "cavium_ptp"
 
 #define PCI_DEVICE_ID_CAVIUM_PTP	0xA00C
 #define PCI_DEVICE_ID_CAVIUM_RST	0xA00E
@@ -277,10 +277,6 @@
 	writeq(clock_comp, clock->reg_base + PTP_CLOCK_COMP);
 
 	clock->ptp_clock = ptp_clock_register(&clock->ptp_info, dev);
-	if (!clock->ptp_clock) {
-		err = -ENODEV;
-		goto error_stop;
-	}
 	if (IS_ERR(clock->ptp_clock)) {
 		err = PTR_ERR(clock->ptp_clock);
 		goto error_stop;

diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.h b/drivers/net/ethernet/cavium/common/cavium_ptp.h
index be2bafc..a04eccb 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.h
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /* cavium_ptp.h - PTP 1588 clock on Cavium hardware
  * Copyright (c) 2003-2015, 2017 Cavium, Inc.
  */

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index 9f4f3c1..43d11c3 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c

@@ -1450,7 +1450,7 @@
 		mbox_cmd.recv_len = 0;
 		mbox_cmd.recv_status = 0;
 		mbox_cmd.fn = NULL;
-		mbox_cmd.fn_arg = 0;
+		mbox_cmd.fn_arg = NULL;
 		ether_addr_copy(mbox_cmd.msg.s.params, mac);
 		mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf;
 		octeon_mbox_write(oct, &mbox_cmd);

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
index 962bb62..fda4940 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c

@@ -616,7 +616,7 @@
 int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
 {
 	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
-	u32 rings_per_vf, ring_flag;
+	u32 rings_per_vf;
 	u64 reg_val;
 
 	if (octeon_map_pci_barx(oct, 0, 0))
@@ -634,8 +634,6 @@
 
 	rings_per_vf = reg_val & CN23XX_PKT_INPUT_CTL_RPVF_MASK;
 
-	ring_flag = 0;
-
 	cn23xx->conf  = oct_get_config_info(oct, LIO_23XX);
 	if (!cn23xx->conf) {
 		dev_err(&oct->pci_dev->dev, "%s No Config found for CN23XX\n",

diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index 2df7440..39643be 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c

@@ -38,9 +38,6 @@
 	lio_pci_readq(oct, CN6XXX_CIU_SOFT_RST);
 	lio_pci_writeq(oct, 1, CN6XXX_CIU_SOFT_RST);
 
-	/* make sure that the reset is written before starting timer */
-	mmiowb();
-
 	/* Wait for 10ms as Octeon resets. */
 	mdelay(100);
 
@@ -487,9 +484,6 @@
 
 	/* Disable Interrupts */
 	writeq(0, cn6xxx->intr_enb_reg64);
-
-	/* make sure interrupts are really disabled */
-	mmiowb();
 }
 
 static void lio_cn6xxx_get_pcie_qlmport(struct octeon_device *oct)
@@ -555,10 +549,6 @@
 				value &= ~(1 << oq_no);
 				octeon_write_csr(oct, reg, value);
 
-				/* Ensure that the enable register is written.
-				 */
-				mmiowb();
-
 				spin_unlock(&cn6xxx->lock_for_droq_int_enb_reg);
 			}
 		}

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 8093c5e..d7e8057 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c

@@ -32,38 +32,6 @@
 #define OCTNIC_MAX_SG  MAX_SKB_FRAGS
 
 /**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-void lio_if_cfg_callback(struct octeon_device *oct,
-			 u32 status __attribute__((unused)), void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_if_cfg_context *ctx;
-	struct liquidio_if_cfg_resp *resp;
-
-	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (resp->status)
-		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
-			CVM_CAST64(resp->status));
-	WRITE_ONCE(ctx->cond, 1);
-
-	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
-		 resp->cfg_info.liquidio_firmware_version);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
-/**
  * \brief Delete gather lists
  * @param lio per-network private data
  */
@@ -198,14 +166,15 @@
 	nctrl.ncmd.s.cmd = cmd;
 	nctrl.ncmd.s.param1 = param1;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -285,15 +254,7 @@
 	struct octeon_device *oct = lio->oct_dev;
 	u8 *mac;
 
-	if (nctrl->completion && nctrl->response_code) {
-		/* Signal whoever is interested that the response code from the
-		 * firmware has arrived.
-		 */
-		WRITE_ONCE(*nctrl->response_code, nctrl->status);
-		complete(nctrl->completion);
-	}
-
-	if (nctrl->status)
+	if (nctrl->sc_status)
 		return;
 
 	switch (nctrl->ncmd.s.cmd) {
@@ -464,56 +425,73 @@
 	 */
 }
 
+void octeon_schedule_rxq_oom_work(struct octeon_device *oct,
+				  struct octeon_droq *droq)
+{
+	struct net_device *netdev = oct->props[0].netdev;
+	struct lio *lio = GET_LIO(netdev);
+	struct cavium_wq *wq = &lio->rxq_status_wq[droq->q_no];
+
+	queue_delayed_work(wq->wq, &wq->wk.work,
+			   msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS));
+}
+
 static void octnet_poll_check_rxq_oom_status(struct work_struct *work)
 {
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 	struct octeon_device *oct = lio->oct_dev;
-	struct octeon_droq *droq;
-	int q, q_no = 0;
+	int q_no = wk->ctxul;
+	struct octeon_droq *droq = oct->droq[q_no];
 
-	if (ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
-		for (q = 0; q < lio->linfo.num_rxpciq; q++) {
-			q_no = lio->linfo.rxpciq[q].s.q_no;
-			droq = oct->droq[q_no];
-			if (!droq)
-				continue;
-			octeon_droq_check_oom(droq);
-		}
-	}
-	queue_delayed_work(lio->rxq_status_wq.wq,
-			   &lio->rxq_status_wq.wk.work,
-			   msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS));
+	if (!ifstate_check(lio, LIO_IFSTATE_RUNNING) || !droq)
+		return;
+
+	if (octeon_retry_droq_refill(droq))
+		octeon_schedule_rxq_oom_work(oct, droq);
 }
 
 int setup_rx_oom_poll_fn(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct cavium_wq *wq;
+	int q, q_no;
 
-	lio->rxq_status_wq.wq = alloc_workqueue("rxq-oom-status",
-						WQ_MEM_RECLAIM, 0);
-	if (!lio->rxq_status_wq.wq) {
-		dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n");
-		return -ENOMEM;
+	for (q = 0; q < oct->num_oqs; q++) {
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+		wq = &lio->rxq_status_wq[q_no];
+		wq->wq = alloc_workqueue("rxq-oom-status",
+					 WQ_MEM_RECLAIM, 0);
+		if (!wq->wq) {
+			dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n");
+			return -ENOMEM;
+		}
+
+		INIT_DELAYED_WORK(&wq->wk.work,
+				  octnet_poll_check_rxq_oom_status);
+		wq->wk.ctxptr = lio;
+		wq->wk.ctxul = q_no;
 	}
-	INIT_DELAYED_WORK(&lio->rxq_status_wq.wk.work,
-			  octnet_poll_check_rxq_oom_status);
-	lio->rxq_status_wq.wk.ctxptr = lio;
-	queue_delayed_work(lio->rxq_status_wq.wq,
-			   &lio->rxq_status_wq.wk.work,
-			   msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS));
+
 	return 0;
 }
 
 void cleanup_rx_oom_poll_fn(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct cavium_wq *wq;
+	int q_no;
 
-	if (lio->rxq_status_wq.wq) {
-		cancel_delayed_work_sync(&lio->rxq_status_wq.wk.work);
-		flush_workqueue(lio->rxq_status_wq.wq);
-		destroy_workqueue(lio->rxq_status_wq.wq);
+	for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+		wq = &lio->rxq_status_wq[q_no];
+		if (wq->wq) {
+			cancel_delayed_work_sync(&wq->wk.work);
+			flush_workqueue(wq->wq);
+			destroy_workqueue(wq->wq);
+			wq->wq = NULL;
+		}
 	}
 }
 
@@ -683,7 +661,8 @@
 		    (((rh->r_dh.encap_on) &&
 		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
 		     (!(rh->r_dh.encap_on) &&
-		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
+		      ((rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED) ==
+			CNNIC_CSUM_VERIFIED))))
 			/* checksum has already been verified */
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
@@ -985,7 +964,7 @@
 
 			if (droq->ops.poll_mode) {
 				droq->ops.napi_fn(droq);
-				oct_priv->napi_mask |= (1 << oq_no);
+				oct_priv->napi_mask |= BIT_ULL(oq_no);
 			} else {
 				tasklet_schedule(&oct_priv->droq_tasklet);
 			}
@@ -1218,30 +1197,6 @@
 	return 0;
 }
 
-static void liquidio_change_mtu_completion(struct octeon_device *oct,
-					   u32 status, void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_if_cfg_context *ctx;
-
-	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-	if (status) {
-		dev_err(&oct->pci_dev->dev, "MTU change failed. Status: %llx\n",
-			CVM_CAST64(status));
-		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_FAIL);
-	} else {
-		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_SUCCESS);
-	}
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
 /**
  * \brief Net device change_mtu
  * @param netdev network device
@@ -1250,22 +1205,22 @@
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
-	struct liquidio_if_cfg_context *ctx;
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
-	int ctx_size;
 	int ret = 0;
 
-	ctx_size = sizeof(struct liquidio_if_cfg_context);
 	sc = (struct octeon_soft_command *)
-		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, ctx_size);
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate soft command\n");
+		return -ENOMEM;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
-	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
 
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct);
-	init_waitqueue_head(&ctx->wc);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	ncmd->u64 = 0;
 	ncmd->s.cmd = OCTNET_CMD_CHANGE_MTU;
@@ -1278,28 +1233,28 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_CMD, 0, 0, 0);
 
-	sc->callback = liquidio_change_mtu_completion;
-	sc->callback_arg = sc;
-	sc->wait_time = 100;
-
 	ret = octeon_send_soft_command(oct, sc);
 	if (ret == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to change MTU\n");
+		octeon_free_soft_command(oct, sc);
 		return -EINVAL;
 	}
 	/* Sleep on a wait queue till the cond flag indicates that the
 	 * response arrived or timed-out.
 	 */
-	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR ||
-	    ctx->cond == LIO_CHANGE_MTU_FAIL) {
-		octeon_free_soft_command(oct, sc);
+	ret = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (ret)
+		return ret;
+
+	if (sc->sc_status) {
+		WRITE_ONCE(sc->caller_is_done, true);
 		return -EINVAL;
 	}
 
 	netdev->mtu = new_mtu;
 	lio->mtu = new_mtu;
 
-	octeon_free_soft_command(oct, sc);
+	WRITE_ONCE(sc->caller_is_done, true);
 	return 0;
 }
 
@@ -1333,8 +1288,6 @@
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
 	struct oct_nic_stats_resp *resp =
 	    (struct oct_nic_stats_resp *)sc->virtrptr;
-	struct oct_nic_stats_ctrl *ctrl =
-	    (struct oct_nic_stats_ctrl *)sc->ctxptr;
 	struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
 	struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
 	struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
@@ -1422,93 +1375,148 @@
 
 		resp->status = 1;
 	} else {
+		dev_err(&oct_dev->pci_dev->dev, "sc OPCODE_NIC_PORT_STATS command failed\n");
 		resp->status = -1;
 	}
-	complete(&ctrl->complete);
 }
 
-int octnet_get_link_stats(struct net_device *netdev)
+static int lio_fetch_vf_stats(struct lio *lio)
 {
-	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_soft_command *sc;
-	struct oct_nic_stats_ctrl *ctrl;
-	struct oct_nic_stats_resp *resp;
+	struct oct_nic_vf_stats_resp *resp;
+
 	int retval;
 
 	/* Alloc soft command */
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct_dev,
 					  0,
-					  sizeof(struct oct_nic_stats_resp),
-					  sizeof(struct octnic_ctrl_pkt));
+					  sizeof(struct oct_nic_vf_stats_resp),
+					  0);
 
-	if (!sc)
-		return -ENOMEM;
+	if (!sc) {
+		dev_err(&oct_dev->pci_dev->dev, "Soft command allocation failed\n");
+		retval = -ENOMEM;
+		goto lio_fetch_vf_stats_exit;
+	}
+
+	resp = (struct oct_nic_vf_stats_resp *)sc->virtrptr;
+	memset(resp, 0, sizeof(struct oct_nic_vf_stats_resp));
+
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
+				    OPCODE_NIC_VF_PORT_STATS, 0, 0, 0);
+
+	retval = octeon_send_soft_command(oct_dev, sc);
+	if (retval == IQ_SEND_FAILED) {
+		octeon_free_soft_command(oct_dev, sc);
+		goto lio_fetch_vf_stats_exit;
+	}
+
+	retval =
+		wait_for_sc_completion_timeout(oct_dev, sc,
+					       (2 * LIO_SC_MAX_TMO_MS));
+	if (retval)  {
+		dev_err(&oct_dev->pci_dev->dev,
+			"sc OPCODE_NIC_VF_PORT_STATS command failed\n");
+		goto lio_fetch_vf_stats_exit;
+	}
+
+	if (sc->sc_status != OCTEON_REQUEST_TIMEOUT && !resp->status) {
+		octeon_swap_8B_data((u64 *)&resp->spoofmac_cnt,
+				    (sizeof(u64)) >> 3);
+
+		if (resp->spoofmac_cnt != 0) {
+			dev_warn(&oct_dev->pci_dev->dev,
+				 "%llu Spoofed packets detected\n",
+				 resp->spoofmac_cnt);
+		}
+	}
+	WRITE_ONCE(sc->caller_is_done, 1);
+
+lio_fetch_vf_stats_exit:
+	return retval;
+}
+
+void lio_fetch_stats(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = wk->ctxptr;
+	struct octeon_device *oct_dev = lio->oct_dev;
+	struct octeon_soft_command *sc;
+	struct oct_nic_stats_resp *resp;
+	unsigned long time_in_jiffies;
+	int retval;
+
+	if (OCTEON_CN23XX_PF(oct_dev)) {
+		/* report spoofchk every 2 seconds */
+		if (!(oct_dev->vfstats_poll % LIO_VFSTATS_POLL) &&
+		    (oct_dev->fw_info.app_cap_flags & LIQUIDIO_SPOOFCHK_CAP) &&
+		    oct_dev->sriov_info.num_vfs_alloced) {
+			lio_fetch_vf_stats(lio);
+		}
+
+		oct_dev->vfstats_poll++;
+	}
+
+	/* Alloc soft command */
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct_dev,
+					  0,
+					  sizeof(struct oct_nic_stats_resp),
+					  0);
+
+	if (!sc) {
+		dev_err(&oct_dev->pci_dev->dev, "Soft command allocation failed\n");
+		goto lio_fetch_stats_exit;
+	}
 
 	resp = (struct oct_nic_stats_resp *)sc->virtrptr;
 	memset(resp, 0, sizeof(struct oct_nic_stats_resp));
 
-	ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
-	memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
-	ctrl->netdev = netdev;
-	init_completion(&ctrl->complete);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
 
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
 				    OPCODE_NIC_PORT_STATS, 0, 0, 0);
 
-	sc->callback = octnet_nic_stats_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 500;	/*in milli seconds*/
-
 	retval = octeon_send_soft_command(oct_dev, sc);
 	if (retval == IQ_SEND_FAILED) {
 		octeon_free_soft_command(oct_dev, sc);
-		return -EINVAL;
+		goto lio_fetch_stats_exit;
 	}
 
-	wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
-
-	if (resp->status != 1) {
-		octeon_free_soft_command(oct_dev, sc);
-
-		return -EINVAL;
+	retval = wait_for_sc_completion_timeout(oct_dev, sc,
+						(2 * LIO_SC_MAX_TMO_MS));
+	if (retval)  {
+		dev_err(&oct_dev->pci_dev->dev, "sc OPCODE_NIC_PORT_STATS command failed\n");
+		goto lio_fetch_stats_exit;
 	}
 
-	octeon_free_soft_command(oct_dev, sc);
+	octnet_nic_stats_callback(oct_dev, sc->sc_status, sc);
+	WRITE_ONCE(sc->caller_is_done, true);
 
-	return 0;
-}
+lio_fetch_stats_exit:
+	time_in_jiffies = msecs_to_jiffies(LIQUIDIO_NDEV_STATS_POLL_TIME_MS);
+	if (ifstate_check(lio, LIO_IFSTATE_RUNNING))
+		schedule_delayed_work(&lio->stats_wk.work, time_in_jiffies);
 
-static void liquidio_nic_seapi_ctl_callback(struct octeon_device *oct,
-					    u32 status,
-					    void *buf)
-{
-	struct liquidio_nic_seapi_ctl_context *ctx;
-	struct octeon_soft_command *sc = buf;
-
-	ctx = sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (status) {
-		dev_err(&oct->pci_dev->dev, "%s: instruction failed. Status: %llx\n",
-			__func__,
-			CVM_CAST64(status));
-	}
-	ctx->status = status;
-	complete(&ctx->complete);
+	return;
 }
 
 int liquidio_set_speed(struct lio *lio, int speed)
 {
-	struct liquidio_nic_seapi_ctl_context *ctx;
 	struct octeon_device *oct = lio->oct_dev;
 	struct oct_nic_seapi_resp *resp;
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
-	u32 ctx_size;
 	int retval;
 	u32 var;
 
@@ -1521,21 +1529,18 @@
 		return -EOPNOTSUPP;
 	}
 
-	ctx_size = sizeof(struct liquidio_nic_seapi_ctl_context);
 	sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 				       sizeof(struct oct_nic_seapi_resp),
-				       ctx_size);
+				       0);
 	if (!sc)
 		return -ENOMEM;
 
 	ncmd = sc->virtdptr;
-	ctx  = sc->ctxptr;
 	resp = sc->virtrptr;
 	memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
 
-	ctx->octeon_id = lio_get_device_id(oct);
-	ctx->status = 0;
-	init_completion(&ctx->complete);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	ncmd->u64 = 0;
 	ncmd->s.cmd = SEAPI_CMD_SPEED_SET;
@@ -1548,30 +1553,24 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
 
-	sc->callback = liquidio_nic_seapi_ctl_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 5000;
-
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		dev_info(&oct->pci_dev->dev, "Failed to send soft command\n");
+		octeon_free_soft_command(oct, sc);
 		retval = -EBUSY;
 	} else {
 		/* Wait for response or timeout */
-		if (wait_for_completion_timeout(&ctx->complete,
-						msecs_to_jiffies(10000)) == 0) {
-			dev_err(&oct->pci_dev->dev, "%s: sc timeout\n",
-				__func__);
-			octeon_free_soft_command(oct, sc);
-			return -EINTR;
-		}
+		retval = wait_for_sc_completion_timeout(oct, sc, 0);
+		if (retval)
+			return retval;
 
 		retval = resp->status;
 
 		if (retval) {
 			dev_err(&oct->pci_dev->dev, "%s failed, retval=%d\n",
 				__func__, retval);
-			octeon_free_soft_command(oct, sc);
+			WRITE_ONCE(sc->caller_is_done, true);
+
 			return -EIO;
 		}
 
@@ -1583,38 +1582,32 @@
 		}
 
 		oct->speed_setting = var;
+		WRITE_ONCE(sc->caller_is_done, true);
 	}
 
-	octeon_free_soft_command(oct, sc);
-
 	return retval;
 }
 
 int liquidio_get_speed(struct lio *lio)
 {
-	struct liquidio_nic_seapi_ctl_context *ctx;
 	struct octeon_device *oct = lio->oct_dev;
 	struct oct_nic_seapi_resp *resp;
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
-	u32 ctx_size;
 	int retval;
 
-	ctx_size = sizeof(struct liquidio_nic_seapi_ctl_context);
 	sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 				       sizeof(struct oct_nic_seapi_resp),
-				       ctx_size);
+				       0);
 	if (!sc)
 		return -ENOMEM;
 
 	ncmd = sc->virtdptr;
-	ctx  = sc->ctxptr;
 	resp = sc->virtrptr;
 	memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
 
-	ctx->octeon_id = lio_get_device_id(oct);
-	ctx->status = 0;
-	init_completion(&ctx->complete);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	ncmd->u64 = 0;
 	ncmd->s.cmd = SEAPI_CMD_SPEED_GET;
@@ -1626,37 +1619,20 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
 
-	sc->callback = liquidio_nic_seapi_ctl_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 5000;
-
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		dev_info(&oct->pci_dev->dev, "Failed to send soft command\n");
-		oct->no_speed_setting = 1;
-		oct->speed_setting = 25;
-
-		retval = -EBUSY;
+		octeon_free_soft_command(oct, sc);
+		retval = -EIO;
 	} else {
-		if (wait_for_completion_timeout(&ctx->complete,
-						msecs_to_jiffies(10000)) == 0) {
-			dev_err(&oct->pci_dev->dev, "%s: sc timeout\n",
-				__func__);
+		retval = wait_for_sc_completion_timeout(oct, sc, 0);
+		if (retval)
+			return retval;
 
-			oct->speed_setting = 25;
-			oct->no_speed_setting = 1;
-
-			octeon_free_soft_command(oct, sc);
-
-			return -EINTR;
-		}
 		retval = resp->status;
 		if (retval) {
 			dev_err(&oct->pci_dev->dev,
 				"%s failed retval=%d\n", __func__, retval);
-			oct->no_speed_setting = 1;
-			oct->speed_setting = 25;
-			octeon_free_soft_command(oct, sc);
 			retval = -EIO;
 		} else {
 			u32 var;
@@ -1664,16 +1640,176 @@
 			var = be32_to_cpu((__force __be32)resp->speed);
 			oct->speed_setting = var;
 			if (var == 0xffff) {
-				oct->no_speed_setting = 1;
 				/* unable to access boot variables
 				 * get the default value based on the NIC type
 				 */
-				oct->speed_setting = 25;
+				if (oct->subsystem_id ==
+						OCTEON_CN2350_25GB_SUBSYS_ID ||
+				    oct->subsystem_id ==
+						OCTEON_CN2360_25GB_SUBSYS_ID) {
+					oct->no_speed_setting = 1;
+					oct->speed_setting = 25;
+				} else {
+					oct->speed_setting = 10;
+				}
 			}
+
 		}
+		WRITE_ONCE(sc->caller_is_done, true);
 	}
 
-	octeon_free_soft_command(oct, sc);
+	return retval;
+}
+
+int liquidio_set_fec(struct lio *lio, int on_off)
+{
+	struct oct_nic_seapi_resp *resp;
+	struct octeon_soft_command *sc;
+	struct octeon_device *oct;
+	union octnet_cmd *ncmd;
+	int retval;
+	u32 var;
+
+	oct = lio->oct_dev;
+
+	if (oct->props[lio->ifidx].fec == on_off)
+		return 0;
+
+	if (!OCTEON_CN23XX_PF(oct)) {
+		dev_err(&oct->pci_dev->dev, "%s: SET FEC only for PF\n",
+			__func__);
+		return -1;
+	}
+
+	if (oct->speed_boot != 25)  {
+		dev_err(&oct->pci_dev->dev,
+			"Set FEC only when link speed is 25G during insmod\n");
+		return -1;
+	}
+
+	sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+				       sizeof(struct oct_nic_seapi_resp), 0);
+	if (!sc) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to allocate soft command\n");
+		return -ENOMEM;
+	}
+
+	ncmd = sc->virtdptr;
+	resp = sc->virtrptr;
+	memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
+
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = SEAPI_CMD_FEC_SET;
+	ncmd->s.param1 = on_off;
+	/* SEAPI_CMD_FEC_DISABLE(0) or SEAPI_CMD_FEC_RS(1) */
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		dev_info(&oct->pci_dev->dev, "Failed to send soft command\n");
+		octeon_free_soft_command(oct, sc);
+		return -EIO;
+	}
+
+	retval = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (retval)
+		return (-EIO);
+
+	var = be32_to_cpu(resp->fec_setting);
+	resp->fec_setting = var;
+	if (var != on_off) {
+		dev_err(&oct->pci_dev->dev,
+			"Setting failed fec= %x, expect %x\n",
+			var, on_off);
+		oct->props[lio->ifidx].fec = var;
+		if (resp->fec_setting == SEAPI_CMD_FEC_SET_RS)
+			oct->props[lio->ifidx].fec = 1;
+		else
+			oct->props[lio->ifidx].fec = 0;
+	}
+
+	WRITE_ONCE(sc->caller_is_done, true);
+
+	if (oct->props[lio->ifidx].fec !=
+	    oct->props[lio->ifidx].fec_boot) {
+		dev_dbg(&oct->pci_dev->dev,
+			"Reload driver to change fec to %s\n",
+			oct->props[lio->ifidx].fec ? "on" : "off");
+	}
+
+	return retval;
+}
+
+int liquidio_get_fec(struct lio *lio)
+{
+	struct oct_nic_seapi_resp *resp;
+	struct octeon_soft_command *sc;
+	struct octeon_device *oct;
+	union octnet_cmd *ncmd;
+	int retval;
+	u32 var;
+
+	oct = lio->oct_dev;
+
+	sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+				       sizeof(struct oct_nic_seapi_resp), 0);
+	if (!sc)
+		return -ENOMEM;
+
+	ncmd = sc->virtdptr;
+	resp = sc->virtrptr;
+	memset(resp, 0, sizeof(struct oct_nic_seapi_resp));
+
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = SEAPI_CMD_FEC_GET;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_UBOOT_CTL, 0, 0, 0);
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		dev_info(&oct->pci_dev->dev,
+			 "%s: Failed to send soft command\n", __func__);
+		octeon_free_soft_command(oct, sc);
+		return -EIO;
+	}
+
+	retval = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (retval)
+		return retval;
+
+	var = be32_to_cpu(resp->fec_setting);
+	resp->fec_setting = var;
+	if (resp->fec_setting == SEAPI_CMD_FEC_SET_RS)
+		oct->props[lio->ifidx].fec = 1;
+	else
+		oct->props[lio->ifidx].fec = 0;
+
+	WRITE_ONCE(sc->caller_is_done, true);
+
+	if (oct->props[lio->ifidx].fec !=
+	    oct->props[lio->ifidx].fec_boot) {
+		dev_dbg(&oct->pci_dev->dev,
+			"Reload driver to change fec to %s\n",
+			oct->props[lio->ifidx].fec ? "on" : "off");
+	}
 
 	return retval;
 }

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 8e05afd..abe5d0d 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c

@@ -33,25 +33,12 @@
 
 static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs);
 
-struct oct_intrmod_context {
-	int octeon_id;
-	wait_queue_head_t wc;
-	int cond;
-	int status;
-};
-
 struct oct_intrmod_resp {
 	u64     rh;
 	struct oct_intrmod_cfg intrmod;
 	u64     status;
 };
 
-struct oct_mdio_cmd_context {
-	int octeon_id;
-	wait_queue_head_t wc;
-	int cond;
-};
-
 struct oct_mdio_cmd_resp {
 	u64 rh;
 	struct oct_mdio_cmd resp;
@@ -124,7 +111,7 @@
 	"mac_tx_one_collision",
 	"mac_tx_multi_collision",
 	"mac_tx_max_collision_fail",
-	"mac_tx_max_deferal_fail",
+	"mac_tx_max_deferral_fail",
 	"mac_tx_fifo_err",
 	"mac_tx_runts",
 
@@ -257,6 +244,7 @@
 		    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
 		    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
 			dev_dbg(&oct->pci_dev->dev, "ecmd->base.transceiver is XCVR_EXTERNAL\n");
+			ecmd->base.transceiver = XCVR_EXTERNAL;
 		} else {
 			dev_err(&oct->pci_dev->dev, "Unknown link interface mode: %d\n",
 				linfo->link.s.if_mode);
@@ -290,10 +278,12 @@
 						 10000baseCR_Full);
 				}
 
-				if (oct->no_speed_setting == 0)
+				if (oct->no_speed_setting == 0) {
 					liquidio_get_speed(lio);
-				else
+					liquidio_get_fec(lio);
+				} else {
 					oct->speed_setting = 25;
+				}
 
 				if (oct->speed_setting == 10) {
 					ethtool_link_ksettings_add_link_mode
@@ -317,6 +307,24 @@
 						(ecmd, advertising,
 						 25000baseCR_Full);
 				}
+
+				if (oct->no_speed_setting)
+					break;
+
+				ethtool_link_ksettings_add_link_mode
+					(ecmd, supported, FEC_RS);
+				ethtool_link_ksettings_add_link_mode
+					(ecmd, supported, FEC_NONE);
+					/*FEC_OFF*/
+				if (oct->props[lio->ifidx].fec == 1) {
+					/* ETHTOOL_FEC_RS */
+					ethtool_link_ksettings_add_link_mode
+						(ecmd, advertising, FEC_RS);
+				} else {
+					/* ETHTOOL_FEC_OFF */
+					ethtool_link_ksettings_add_link_mode
+						(ecmd, advertising, FEC_NONE);
+				}
 			} else { /* VF */
 				if (linfo->link.s.speed == 10000) {
 					ethtool_link_ksettings_add_link_mode
@@ -472,12 +480,11 @@
 	nctrl.ncmd.s.param1 = num_queues;
 	nctrl.ncmd.s.param2 = num_queues;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Failed to send Queue reset command (ret: 0x%x)\n",
 			ret);
 		return -1;
@@ -708,13 +715,13 @@
 	nctrl.ncmd.s.param1 = addr;
 	nctrl.ncmd.s.param2 = val;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to configure gpio value, ret=%d\n", ret);
 		return -EINVAL;
 	}
 
@@ -734,41 +741,19 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE;
 	nctrl.ncmd.s.param1 = val;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to configure gpio value, ret=%d\n", ret);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
-/* Callback for when mdio command response arrives
- */
-static void octnet_mdio_resp_callback(struct octeon_device *oct,
-				      u32 status,
-				      void *buf)
-{
-	struct oct_mdio_cmd_context *mdio_cmd_ctx;
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-
-	mdio_cmd_ctx = (struct oct_mdio_cmd_context *)sc->ctxptr;
-
-	oct = lio_get_device(mdio_cmd_ctx->octeon_id);
-	if (status) {
-		dev_err(&oct->pci_dev->dev, "MIDO instruction failed. Status: %llx\n",
-			CVM_CAST64(status));
-		WRITE_ONCE(mdio_cmd_ctx->cond, -1);
-	} else {
-		WRITE_ONCE(mdio_cmd_ctx->cond, 1);
-	}
-	wake_up_interruptible(&mdio_cmd_ctx->wc);
-}
-
 /* This routine provides PHY access routines for
  * mdio  clause45 .
  */
@@ -778,25 +763,20 @@
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_soft_command *sc;
 	struct oct_mdio_cmd_resp *mdio_cmd_rsp;
-	struct oct_mdio_cmd_context *mdio_cmd_ctx;
 	struct oct_mdio_cmd *mdio_cmd;
 	int retval = 0;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct_dev,
 					  sizeof(struct oct_mdio_cmd),
-					  sizeof(struct oct_mdio_cmd_resp),
-					  sizeof(struct oct_mdio_cmd_context));
+					  sizeof(struct oct_mdio_cmd_resp), 0);
 
 	if (!sc)
 		return -ENOMEM;
 
-	mdio_cmd_ctx = (struct oct_mdio_cmd_context *)sc->ctxptr;
 	mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr;
 	mdio_cmd = (struct oct_mdio_cmd *)sc->virtdptr;
 
-	WRITE_ONCE(mdio_cmd_ctx->cond, 0);
-	mdio_cmd_ctx->octeon_id = lio_get_device_id(oct_dev);
 	mdio_cmd->op = op;
 	mdio_cmd->mdio_addr = loc;
 	if (op)
@@ -808,42 +788,40 @@
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, OPCODE_NIC_MDIO45,
 				    0, 0, 0);
 
-	sc->wait_time = 1000;
-	sc->callback = octnet_mdio_resp_callback;
-	sc->callback_arg = sc;
-
-	init_waitqueue_head(&mdio_cmd_ctx->wc);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct_dev, sc);
-
 	if (retval == IQ_SEND_FAILED) {
 		dev_err(&oct_dev->pci_dev->dev,
 			"octnet_mdio45_access instruction failed status: %x\n",
 			retval);
-		retval = -EBUSY;
+		octeon_free_soft_command(oct_dev, sc);
+		return -EBUSY;
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived
 		 */
-		sleep_cond(&mdio_cmd_ctx->wc, &mdio_cmd_ctx->cond);
+		retval = wait_for_sc_completion_timeout(oct_dev, sc, 0);
+		if (retval)
+			return retval;
+
 		retval = mdio_cmd_rsp->status;
 		if (retval) {
-			dev_err(&oct_dev->pci_dev->dev, "octnet mdio45 access failed\n");
-			retval = -EBUSY;
-		} else {
-			octeon_swap_8B_data((u64 *)(&mdio_cmd_rsp->resp),
-					    sizeof(struct oct_mdio_cmd) / 8);
-
-			if (READ_ONCE(mdio_cmd_ctx->cond) == 1) {
-				if (!op)
-					*value = mdio_cmd_rsp->resp.value1;
-			} else {
-				retval = -EINVAL;
-			}
+			dev_err(&oct_dev->pci_dev->dev,
+				"octnet mdio45 access failed: %x\n", retval);
+			WRITE_ONCE(sc->caller_is_done, true);
+			return -EBUSY;
 		}
-	}
 
-	octeon_free_soft_command(oct_dev, sc);
+		octeon_swap_8B_data((u64 *)(&mdio_cmd_rsp->resp),
+				    sizeof(struct oct_mdio_cmd) / 8);
+
+		if (!op)
+			*value = mdio_cmd_rsp->resp.value1;
+
+		WRITE_ONCE(sc->caller_is_done, true);
+	}
 
 	return retval;
 }
@@ -1007,8 +985,7 @@
 static int lio_23xx_reconfigure_queue_count(struct lio *lio)
 {
 	struct octeon_device *oct = lio->oct_dev;
-	struct liquidio_if_cfg_context *ctx;
-	u32 resp_size, ctx_size, data_size;
+	u32 resp_size, data_size;
 	struct liquidio_if_cfg_resp *resp;
 	struct octeon_soft_command *sc;
 	union oct_nic_if_cfg if_cfg;
@@ -1018,11 +995,10 @@
 	int j;
 
 	resp_size = sizeof(struct liquidio_if_cfg_resp);
-	ctx_size = sizeof(struct liquidio_if_cfg_context);
 	data_size = sizeof(struct lio_version);
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, data_size,
-					  resp_size, ctx_size);
+					  resp_size, 0);
 	if (!sc) {
 		dev_err(&oct->pci_dev->dev, "%s: Failed to allocate soft command\n",
 			__func__);
@@ -1030,7 +1006,6 @@
 	}
 
 	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
 	vdata = (struct lio_version *)sc->virtdptr;
 
 	vdata->major = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
@@ -1038,9 +1013,6 @@
 	vdata->micro = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
 
 	ifidx_or_pfnum = oct->pf_num;
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct);
-	init_waitqueue_head(&ctx->wc);
 
 	if_cfg.u64 = 0;
 	if_cfg.s.num_iqueues = oct->sriov_info.num_pf_rings;
@@ -1052,27 +1024,29 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_QCOUNT_UPDATE, 0,
 				    if_cfg.u64, 0);
-	sc->callback = lio_if_cfg_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = LIO_IFCFG_WAIT_TIME;
+
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		dev_err(&oct->pci_dev->dev,
-			"iq/oq config failed status: %x\n",
+			"Sending iq/oq config failed status: %x\n",
 			retval);
-		goto qcount_update_fail;
+		octeon_free_soft_command(oct, sc);
+		return -EIO;
 	}
 
-	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
-		dev_err(&oct->pci_dev->dev, "Wait interrupted\n");
-		return -1;
-	}
+	retval = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (retval)
+		return retval;
 
 	retval = resp->status;
 	if (retval) {
-		dev_err(&oct->pci_dev->dev, "iq/oq config failed\n");
-		goto qcount_update_fail;
+		dev_err(&oct->pci_dev->dev,
+			"iq/oq config failed: %x\n", retval);
+		WRITE_ONCE(sc->caller_is_done, true);
+		return -1;
 	}
 
 	octeon_swap_8B_data((u64 *)(&resp->cfg_info),
@@ -1097,16 +1071,12 @@
 	lio->txq = lio->linfo.txpciq[0].s.q_no;
 	lio->rxq = lio->linfo.rxpciq[0].s.q_no;
 
-	octeon_free_soft_command(oct, sc);
 	dev_info(&oct->pci_dev->dev, "Queue count updated to %d\n",
 		 lio->linfo.num_rxpciq);
 
+	WRITE_ONCE(sc->caller_is_done, true);
+
 	return 0;
-
-qcount_update_fail:
-	octeon_free_soft_command(oct, sc);
-
-	return -1;
 }
 
 static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
@@ -1166,6 +1136,8 @@
 	 * steps like updating sriov_info for the octeon device need to be done.
 	 */
 	if (queue_count_update) {
+		cleanup_rx_oom_poll_fn(netdev);
+
 		lio_delete_glists(lio);
 
 		/* Delete mbox for PF which is SRIOV disabled because sriov_info
@@ -1265,6 +1237,11 @@
 			return -1;
 		}
 
+		if (setup_rx_oom_poll_fn(netdev)) {
+			dev_err(&oct->pci_dev->dev, "lio_setup_rx_oom_poll_fn failed\n");
+			return 1;
+		}
+
 		/* Send firmware the information about new number of queues
 		 * if the interface is a VF or a PF that is SRIOV enabled.
 		 */
@@ -1412,7 +1389,6 @@
 	nctrl.ncmd.u64 = 0;
 	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
@@ -1433,8 +1409,9 @@
 	}
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n");
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to set pause parameter, ret=%d\n", ret);
 		return -EINVAL;
 	}
 
@@ -1764,7 +1741,8 @@
 	  */
 	data[i++] = lstats.rx_dropped;
 	/* sum of oct->instr_queue[iq_no]->stats.tx_dropped */
-	data[i++] = lstats.tx_dropped;
+	data[i++] = lstats.tx_dropped +
+		oct_dev->link_stats.fromhost.fw_err_drop;
 
 	data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast;
 	data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent;
@@ -2013,34 +1991,11 @@
 	}
 }
 
-/* Callback function for intrmod */
-static void octnet_intrmod_callback(struct octeon_device *oct_dev,
-				    u32 status,
-				    void *ptr)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
-	struct oct_intrmod_context *ctx;
-
-	ctx  = (struct oct_intrmod_context *)sc->ctxptr;
-
-	ctx->status = status;
-
-	WRITE_ONCE(ctx->cond, 1);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
 /*  get interrupt moderation parameters */
 static int octnet_get_intrmod_cfg(struct lio *lio,
 				  struct oct_intrmod_cfg *intr_cfg)
 {
 	struct octeon_soft_command *sc;
-	struct oct_intrmod_context *ctx;
 	struct oct_intrmod_resp *resp;
 	int retval;
 	struct octeon_device *oct_dev = lio->oct_dev;
@@ -2049,8 +2004,7 @@
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct_dev,
 					  0,
-					  sizeof(struct oct_intrmod_resp),
-					  sizeof(struct oct_intrmod_context));
+					  sizeof(struct oct_intrmod_resp), 0);
 
 	if (!sc)
 		return -ENOMEM;
@@ -2058,20 +2012,13 @@
 	resp = (struct oct_intrmod_resp *)sc->virtrptr;
 	memset(resp, 0, sizeof(struct oct_intrmod_resp));
 
-	ctx = (struct oct_intrmod_context *)sc->ctxptr;
-	memset(ctx, 0, sizeof(struct oct_intrmod_context));
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct_dev);
-	init_waitqueue_head(&ctx->wc);
-
 	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
 
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
 				    OPCODE_NIC_INTRMOD_PARAMS, 0, 0, 0);
 
-	sc->callback = octnet_intrmod_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 1000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct_dev, sc);
 	if (retval == IQ_SEND_FAILED) {
@@ -2082,32 +2029,23 @@
 	/* Sleep on a wait queue till the cond flag indicates that the
 	 * response arrived or timed-out.
 	 */
-	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
-		dev_err(&oct_dev->pci_dev->dev, "Wait interrupted\n");
-		goto intrmod_info_wait_intr;
-	}
+	retval = wait_for_sc_completion_timeout(oct_dev, sc, 0);
+	if (retval)
+		return -ENODEV;
 
-	retval = ctx->status || resp->status;
-	if (retval) {
+	if (resp->status) {
 		dev_err(&oct_dev->pci_dev->dev,
 			"Get interrupt moderation parameters failed\n");
-		goto intrmod_info_wait_fail;
+		WRITE_ONCE(sc->caller_is_done, true);
+		return -ENODEV;
 	}
 
 	octeon_swap_8B_data((u64 *)&resp->intrmod,
 			    (sizeof(struct oct_intrmod_cfg)) / 8);
 	memcpy(intr_cfg, &resp->intrmod, sizeof(struct oct_intrmod_cfg));
-	octeon_free_soft_command(oct_dev, sc);
+	WRITE_ONCE(sc->caller_is_done, true);
 
 	return 0;
-
-intrmod_info_wait_fail:
-
-	octeon_free_soft_command(oct_dev, sc);
-
-intrmod_info_wait_intr:
-
-	return -ENODEV;
 }
 
 /*  Configure interrupt moderation parameters */
@@ -2115,7 +2053,6 @@
 				  struct oct_intrmod_cfg *intr_cfg)
 {
 	struct octeon_soft_command *sc;
-	struct oct_intrmod_context *ctx;
 	struct oct_intrmod_cfg *cfg;
 	int retval;
 	struct octeon_device *oct_dev = lio->oct_dev;
@@ -2124,18 +2061,11 @@
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct_dev,
 					  sizeof(struct oct_intrmod_cfg),
-					  0,
-					  sizeof(struct oct_intrmod_context));
+					  16, 0);
 
 	if (!sc)
 		return -ENOMEM;
 
-	ctx = (struct oct_intrmod_context *)sc->ctxptr;
-
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct_dev);
-	init_waitqueue_head(&ctx->wc);
-
 	cfg = (struct oct_intrmod_cfg *)sc->virtdptr;
 
 	memcpy(cfg, intr_cfg, sizeof(struct oct_intrmod_cfg));
@@ -2146,9 +2076,8 @@
 	octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
 				    OPCODE_NIC_INTRMOD_CFG, 0, 0, 0);
 
-	sc->callback = octnet_intrmod_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 1000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct_dev, sc);
 	if (retval == IQ_SEND_FAILED) {
@@ -2159,26 +2088,24 @@
 	/* Sleep on a wait queue till the cond flag indicates that the
 	 * response arrived or timed-out.
 	 */
-	if (sleep_cond(&ctx->wc, &ctx->cond) != -EINTR) {
-		retval = ctx->status;
-		if (retval)
-			dev_err(&oct_dev->pci_dev->dev,
-				"intrmod config failed. Status: %llx\n",
-				CVM_CAST64(retval));
-		else
-			dev_info(&oct_dev->pci_dev->dev,
-				 "Rx-Adaptive Interrupt moderation %s\n",
-				 (intr_cfg->rx_enable) ?
-				 "enabled" : "disabled");
+	retval = wait_for_sc_completion_timeout(oct_dev, sc, 0);
+	if (retval)
+		return retval;
 
-		octeon_free_soft_command(oct_dev, sc);
-
-		return ((retval) ? -ENODEV : 0);
+	retval = sc->sc_status;
+	if (retval == 0) {
+		dev_info(&oct_dev->pci_dev->dev,
+			 "Rx-Adaptive Interrupt moderation %s\n",
+			 (intr_cfg->rx_enable) ?
+			 "enabled" : "disabled");
+		WRITE_ONCE(sc->caller_is_done, true);
+		return 0;
 	}
 
-	dev_err(&oct_dev->pci_dev->dev, "iq/oq config failed\n");
-
-	return -EINTR;
+	dev_err(&oct_dev->pci_dev->dev,
+		"intrmod config failed. Status: %x\n", retval);
+	WRITE_ONCE(sc->caller_is_done, true);
+	return -ENODEV;
 }
 
 static int lio_get_intr_coalesce(struct net_device *netdev,
@@ -3123,9 +3050,60 @@
 	return 0;
 }
 
+static int lio_get_fecparam(struct net_device *netdev,
+			    struct ethtool_fecparam *fec)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	fec->active_fec = ETHTOOL_FEC_NONE;
+	fec->fec = ETHTOOL_FEC_NONE;
+
+	if (oct->subsystem_id == OCTEON_CN2350_25GB_SUBSYS_ID ||
+	    oct->subsystem_id == OCTEON_CN2360_25GB_SUBSYS_ID) {
+		if (oct->no_speed_setting == 1)
+			return 0;
+
+		liquidio_get_fec(lio);
+		fec->fec = (ETHTOOL_FEC_RS | ETHTOOL_FEC_OFF);
+		if (oct->props[lio->ifidx].fec == 1)
+			fec->active_fec = ETHTOOL_FEC_RS;
+		else
+			fec->active_fec = ETHTOOL_FEC_OFF;
+	}
+
+	return 0;
+}
+
+static int lio_set_fecparam(struct net_device *netdev,
+			    struct ethtool_fecparam *fec)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (oct->subsystem_id == OCTEON_CN2350_25GB_SUBSYS_ID ||
+	    oct->subsystem_id == OCTEON_CN2360_25GB_SUBSYS_ID) {
+		if (oct->no_speed_setting == 1)
+			return -EOPNOTSUPP;
+
+		if (fec->fec & ETHTOOL_FEC_OFF)
+			liquidio_set_fec(lio, 0);
+		else if (fec->fec & ETHTOOL_FEC_RS)
+			liquidio_set_fec(lio, 1);
+		else
+			return -EOPNOTSUPP;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops lio_ethtool_ops = {
 	.get_link_ksettings	= lio_get_link_ksettings,
 	.set_link_ksettings	= lio_set_link_ksettings,
+	.get_fecparam		= lio_get_fecparam,
+	.set_fecparam		= lio_set_fecparam,
 	.get_link		= ethtool_op_get_link,
 	.get_drvinfo		= lio_get_drvinfo,
 	.get_ringparam		= lio_ethtool_get_ringparam,

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 6fb13fa..7f3b2e3 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c

@@ -21,7 +21,6 @@
 #include <linux/firmware.h>
 #include <net/vxlan.h>
 #include <linux/kthread.h>
-#include <net/switchdev.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -99,14 +98,6 @@
 	int status;
 };
 
-struct liquidio_rx_ctl_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
 struct oct_link_status_resp {
 	u64 rh;
 	struct oct_link_info link_info;
@@ -642,26 +633,6 @@
 }
 
 /**
- * lio_sync_octeon_time_cb - callback that is invoked when soft command
- * sent by lio_sync_octeon_time() has completed successfully or failed
- *
- * @oct - octeon device structure
- * @status - indicates success or failure
- * @buf - pointer to the command that was sent to firmware
- **/
-static void lio_sync_octeon_time_cb(struct octeon_device *oct,
-				    u32 status, void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-
-	if (status)
-		dev_err(&oct->pci_dev->dev,
-			"Failed to sync time to octeon; error=%d\n", status);
-
-	octeon_free_soft_command(oct, sc);
-}
-
-/**
  * lio_sync_octeon_time - send latest localtime to octeon firmware so that
  * firmware will correct it's time, in case there is a time skew
  *
@@ -677,7 +648,7 @@
 	struct lio_time *lt;
 	int ret;
 
-	sc = octeon_alloc_soft_command(oct, sizeof(struct lio_time), 0, 0);
+	sc = octeon_alloc_soft_command(oct, sizeof(struct lio_time), 16, 0);
 	if (!sc) {
 		dev_err(&oct->pci_dev->dev,
 			"Failed to sync time to octeon: soft command allocation failed\n");
@@ -696,15 +667,16 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_SYNC_OCTEON_TIME, 0, 0, 0);
 
-	sc->callback = lio_sync_octeon_time_cb;
-	sc->callback_arg = sc;
-	sc->wait_time = 1000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	ret = octeon_send_soft_command(oct, sc);
 	if (ret == IQ_SEND_FAILED) {
 		dev_err(&oct->pci_dev->dev,
 			"Failed to sync time to octeon: failed to send soft command\n");
 		octeon_free_soft_command(oct, sc);
+	} else {
+		WRITE_ONCE(sc->caller_is_done, true);
 	}
 
 	queue_delayed_work(lio->sync_octeon_time_wq.wq,
@@ -1037,12 +1009,12 @@
 
 		/* fallthrough */
 	case OCT_DEV_IO_QUEUES_DONE:
-		if (wait_for_pending_requests(oct))
-			dev_err(&oct->pci_dev->dev, "There were pending requests\n");
-
 		if (lio_wait_for_instr_fetch(oct))
 			dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
 
+		if (wait_for_pending_requests(oct))
+			dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
 		/* Disable the input and output queues now. No more packets will
 		 * arrive from Octeon, but we should wait for all packet
 		 * processing to finish.
@@ -1052,6 +1024,31 @@
 		if (lio_wait_for_oq_pkts(oct))
 			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
 
+		/* Force all requests waiting to be fetched by OCTEON to
+		 * complete.
+		 */
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+			struct octeon_instr_queue *iq;
+
+			if (!(oct->io_qmask.iq & BIT_ULL(i)))
+				continue;
+			iq = oct->instr_queue[i];
+
+			if (atomic_read(&iq->instr_pending)) {
+				spin_lock_bh(&iq->lock);
+				iq->fill_cnt = 0;
+				iq->octeon_read_index = iq->host_write_index;
+				iq->stats.instr_processed +=
+					atomic_read(&iq->instr_pending);
+				lio_process_iq_request_list(oct, iq, 0);
+				spin_unlock_bh(&iq->lock);
+			}
+		}
+
+		lio_process_ordered_list(oct, 1);
+		octeon_free_sc_done_list(oct);
+		octeon_free_sc_zombie_list(oct);
+
 	/* fallthrough */
 	case OCT_DEV_INTR_SET_DONE:
 		/* Disable interrupts  */
@@ -1178,34 +1175,6 @@
 }
 
 /**
- * \brief Callback for rx ctrl
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void rx_ctl_callback(struct octeon_device *oct,
-			    u32 status,
-			    void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_rx_ctl_context *ctx;
-
-	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (status)
-		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
-			CVM_CAST64(status));
-	WRITE_ONCE(ctx->cond, 1);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
-/**
  * \brief Send Rx control command
  * @param lio per-network private data
  * @param start_stop whether to start or stop
@@ -1213,9 +1182,7 @@
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_soft_command *sc;
-	struct liquidio_rx_ctl_context *ctx;
 	union octnet_cmd *ncmd;
-	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
 	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
 	int retval;
 
@@ -1224,14 +1191,14 @@
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
-					  16, ctx_size);
+					  16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate octeon_soft_command\n");
+		return;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
-	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
-
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct);
-	init_waitqueue_head(&ctx->wc);
 
 	ncmd->u64 = 0;
 	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
@@ -1244,23 +1211,25 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_CMD, 0, 0, 0);
 
-	sc->callback = rx_ctl_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 5000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+		octeon_free_soft_command(oct, sc);
+		return;
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
-		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+		retval = wait_for_sc_completion_timeout(oct, sc, 0);
+		if (retval)
 			return;
-		oct->props[lio->ifidx].rx_on = start_stop;
-	}
 
-	octeon_free_soft_command(oct, sc);
+		oct->props[lio->ifidx].rx_on = start_stop;
+		WRITE_ONCE(sc->caller_is_done, true);
+	}
 }
 
 /**
@@ -1274,8 +1243,10 @@
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
 	struct net_device *netdev = oct->props[ifidx].netdev;
-	struct lio *lio;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	struct lio *lio;
 
 	if (!netdev) {
 		dev_err(&oct->pci_dev->dev, "%s No netdevice ptr for index %d\n",
@@ -1304,6 +1275,8 @@
 	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 		netif_napi_del(napi);
 
+	tasklet_enable(&oct_priv->droq_tasklet);
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -1519,11 +1492,11 @@
 
 	i = 1;
 	while (frags--) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		pci_unmap_page((lio->oct_dev)->pci_dev,
 			       g->sg[(i >> 2)].ptr[(i & 3)],
-			       frag->size, DMA_TO_DEVICE);
+			       skb_frag_size(frag), DMA_TO_DEVICE);
 		i++;
 	}
 
@@ -1562,11 +1535,11 @@
 
 	i = 1;
 	while (frags--) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		pci_unmap_page((lio->oct_dev)->pci_dev,
 			       g->sg[(i >> 2)].ptr[(i & 3)],
-			       frag->size, DMA_TO_DEVICE);
+			       skb_frag_size(frag), DMA_TO_DEVICE);
 		i++;
 	}
 
@@ -1840,9 +1813,13 @@
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
 
 	if (oct->props[lio->ifidx].napi_enabled == 0) {
+		tasklet_disable(&oct_priv->droq_tasklet);
+
 		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 			napi_enable(napi);
 
@@ -1876,6 +1853,12 @@
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
 
+	/* start periodical statistics fetch */
+	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
+	lio->stats_wk.ctxptr = lio;
+	schedule_delayed_work(&lio->stats_wk.work, msecs_to_jiffies
+					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
+
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
 		 netdev->name);
 
@@ -1890,6 +1873,8 @@
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
@@ -1916,6 +1901,8 @@
 		cleanup_tx_poll_fn(netdev);
 	}
 
+	cancel_delayed_work_sync(&lio->stats_wk.work);
+
 	if (lio->ptp_clock) {
 		ptp_clock_unregister(lio->ptp_clock);
 		lio->ptp_clock = NULL;
@@ -1934,6 +1921,8 @@
 
 		if (OCTEON_CN23XX_PF(oct))
 			oct->droq[0]->ops.poll_mode = 0;
+
+		tasklet_enable(&oct_priv->droq_tasklet);
 	}
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
@@ -2014,10 +2003,9 @@
 	/* Apparently, any activity in this call from the kernel has to
 	 * be atomic. So we won't wait for response.
 	 */
-	nctrl.wait_time = 0;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
 			ret);
 	}
@@ -2046,8 +2034,6 @@
 	nctrl.ncmd.s.more = 1;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-	nctrl.wait_time = 100;
 
 	nctrl.udd[0] = 0;
 	/* The MAC Address is presented in network byte order. */
@@ -2058,6 +2044,14 @@
 		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
 		return -ENOMEM;
 	}
+
+	if (nctrl.sc_status) {
+		dev_err(&oct->pci_dev->dev,
+			"%s: MAC Address change failed. sc return=%x\n",
+			 __func__, nctrl.sc_status);
+		return -EIO;
+	}
+
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 	memcpy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data, ETH_ALEN);
 
@@ -2111,7 +2105,6 @@
 	lstats->rx_packets = pkts;
 	lstats->rx_dropped = drop;
 
-	octnet_get_link_stats(netdev);
 	lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
 	lstats->collisions = oct->link_stats.fromhost.total_collisions;
 
@@ -2324,7 +2317,7 @@
  * @returns whether the packet was transmitted to the device okay or not
  *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
  */
-static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct lio *lio;
 	struct octnet_buf_free_info *finfo;
@@ -2431,7 +2424,7 @@
 
 	} else {
 		int i, frags;
-		struct skb_frag_struct *frag;
+		skb_frag_t *frag;
 		struct octnic_gather *g;
 
 		spin_lock(&lio->glist_lock[q_idx]);
@@ -2469,11 +2462,9 @@
 			frag = &skb_shinfo(skb)->frags[i - 1];
 
 			g->sg[(i >> 2)].ptr[(i & 3)] =
-				dma_map_page(&oct->pci_dev->dev,
-					     frag->page.p,
-					     frag->page_offset,
-					     frag->size,
-					     DMA_TO_DEVICE);
+				skb_frag_dma_map(&oct->pci_dev->dev,
+					         frag, 0, skb_frag_size(frag),
+						 DMA_TO_DEVICE);
 
 			if (dma_mapping_error(&oct->pci_dev->dev,
 					      g->sg[i >> 2].ptr[i & 3])) {
@@ -2485,7 +2476,7 @@
 					frag = &skb_shinfo(skb)->frags[j - 1];
 					dma_unmap_page(&oct->pci_dev->dev,
 						       g->sg[j >> 2].ptr[j & 3],
-						       frag->size,
+						       skb_frag_size(frag),
 						       DMA_TO_DEVICE);
 				}
 				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
@@ -2493,7 +2484,8 @@
 				return NETDEV_TX_BUSY;
 			}
 
-			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+			add_sg_size(&g->sg[(i >> 2)], skb_frag_size(frag),
+				    (i & 3));
 			i++;
 		}
 
@@ -2529,7 +2521,7 @@
 		irh->vlan = skb_vlan_tag_get(skb) & 0xfff;
 	}
 
-	xmit_more = skb->xmit_more;
+	xmit_more = netdev_xmit_more();
 
 	if (unlikely(cmdsetup.s.timestamp))
 		status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
@@ -2598,14 +2590,15 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
 	nctrl.ncmd.s.param1 = vid;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 
 	return ret;
@@ -2626,14 +2619,15 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
 	nctrl.ncmd.s.param1 = vid;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Del VLAN filter failed in core (ret: 0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -2659,15 +2653,16 @@
 	nctrl.ncmd.s.cmd = command;
 	nctrl.ncmd.s.param1 = rx_cmd;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev,
 			"DEVFLAGS RXCSUM change failed in core(ret:0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -2695,15 +2690,16 @@
 	nctrl.ncmd.s.more = vxlan_cmd_bit;
 	nctrl.ncmd.s.param1 = vxlan_port;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev,
 			"VxLAN port add/delete failed in core (ret:0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -2826,6 +2822,7 @@
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
 
 	if (!is_valid_ether_addr(mac))
 		return -EINVAL;
@@ -2839,12 +2836,13 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
 	/* vfidx is 0 based, but vf_num (param1) is 1 based */
 	nctrl.ncmd.s.param1 = vfidx + 1;
-	nctrl.ncmd.s.param2 = (is_admin_assigned ? 1 : 0);
 	nctrl.ncmd.s.more = 1;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
+	if (is_admin_assigned) {
+		nctrl.ncmd.s.param2 = true;
+		nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+	}
 
 	nctrl.udd[0] = 0;
 	/* The MAC Address is presented in network byte order. */
@@ -2852,9 +2850,11 @@
 
 	oct->sriov_info.vf_macaddr[vfidx] = nctrl.udd[0];
 
-	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+	ret = octnet_send_nic_ctrl_pkt(oct, &nctrl);
+	if (ret > 0)
+		ret = -EIO;
 
-	return 0;
+	return ret;
 }
 
 static int liquidio_set_vf_mac(struct net_device *netdev, int vfidx, u8 *mac)
@@ -2873,6 +2873,62 @@
 	return retval;
 }
 
+static int liquidio_set_vf_spoofchk(struct net_device *netdev, int vfidx,
+				    bool enable)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int retval;
+
+	if (!(oct->fw_info.app_cap_flags & LIQUIDIO_SPOOFCHK_CAP)) {
+		netif_info(lio, drv, lio->netdev,
+			   "firmware does not support spoofchk\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) {
+		netif_info(lio, drv, lio->netdev, "Invalid vfidx %d\n", vfidx);
+		return -EINVAL;
+	}
+
+	if (enable) {
+		if (oct->sriov_info.vf_spoofchk[vfidx])
+			return 0;
+	} else {
+		/* Clear */
+		if (!oct->sriov_info.vf_spoofchk[vfidx])
+			return 0;
+	}
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	nctrl.ncmd.s.cmdgroup = OCTNET_CMD_GROUP1;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_VF_SPOOFCHK;
+	nctrl.ncmd.s.param1 =
+		vfidx + 1; /* vfidx is 0 based,
+			    * but vf_num (param1) is 1 based
+			    */
+	nctrl.ncmd.s.param2 = enable;
+	nctrl.ncmd.s.more = 0;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.cb_fn = NULL;
+
+	retval = octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+	if (retval) {
+		netif_info(lio, drv, lio->netdev,
+			   "Failed to set VF %d spoofchk %s\n", vfidx,
+			enable ? "on" : "off");
+		return -1;
+	}
+
+	oct->sriov_info.vf_spoofchk[vfidx] = enable;
+	netif_info(lio, drv, lio->netdev, "VF %u spoofchk is %s\n", vfidx,
+		   enable ? "on" : "off");
+
+	return 0;
+}
+
 static int liquidio_set_vf_vlan(struct net_device *netdev, int vfidx,
 				u16 vlan, u8 qos, __be16 vlan_proto)
 {
@@ -2880,6 +2936,7 @@
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
 	u16 vlantci;
+	int ret = 0;
 
 	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
 		return -EINVAL;
@@ -2911,13 +2968,17 @@
 	nctrl.ncmd.s.more = 0;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.cb_fn = NULL;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
 
-	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+	ret = octnet_send_nic_ctrl_pkt(oct, &nctrl);
+	if (ret) {
+		if (ret > 0)
+			ret = -EIO;
+		return ret;
+	}
 
 	oct->sriov_info.vf_vlantci[vfidx] = vlantci;
 
-	return 0;
+	return ret;
 }
 
 static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
@@ -2930,6 +2991,8 @@
 	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
 		return -EINVAL;
 
+	memset(ivi, 0, sizeof(struct ifla_vf_info));
+
 	ivi->vf = vfidx;
 	macaddr = 2 + (u8 *)&oct->sriov_info.vf_macaddr[vfidx];
 	ether_addr_copy(&ivi->mac[0], macaddr);
@@ -2941,33 +3004,22 @@
 	else
 		ivi->trusted = false;
 	ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
+	ivi->spoofchk = oct->sriov_info.vf_spoofchk[vfidx];
+	ivi->max_tx_rate = lio->linfo.link.s.speed;
+	ivi->min_tx_rate = 0;
+
 	return 0;
 }
 
-static void trusted_vf_callback(struct octeon_device *oct_dev,
-				u32 status, void *ptr)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
-	struct lio_trusted_vf_ctx *ctx;
-
-	ctx = (struct lio_trusted_vf_ctx *)sc->ctxptr;
-	ctx->status = status;
-
-	complete(&ctx->complete);
-}
-
 static int liquidio_send_vf_trust_cmd(struct lio *lio, int vfidx, bool trusted)
 {
 	struct octeon_device *oct = lio->oct_dev;
-	struct lio_trusted_vf_ctx *ctx;
 	struct octeon_soft_command *sc;
-	int ctx_size, retval;
+	int retval;
 
-	ctx_size = sizeof(struct lio_trusted_vf_ctx);
-	sc = octeon_alloc_soft_command(oct, 0, 0, ctx_size);
-
-	ctx  = (struct lio_trusted_vf_ctx *)sc->ctxptr;
-	init_completion(&ctx->complete);
+	sc = octeon_alloc_soft_command(oct, 0, 16, 0);
+	if (!sc)
+		return -ENOMEM;
 
 	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
 
@@ -2976,23 +3028,21 @@
 				    OPCODE_NIC_SET_TRUSTED_VF, 0, vfidx + 1,
 				    trusted);
 
-	sc->callback = trusted_vf_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 1000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
+		octeon_free_soft_command(oct, sc);
 		retval = -1;
 	} else {
 		/* Wait for response or timeout */
-		if (wait_for_completion_timeout(&ctx->complete,
-						msecs_to_jiffies(2000)))
-			retval = ctx->status;
-		else
-			retval = -1;
-	}
+		retval = wait_for_sc_completion_timeout(oct, sc, 0);
+		if (retval)
+			return (retval);
 
-	octeon_free_soft_command(oct, sc);
+		WRITE_ONCE(sc->caller_is_done, true);
+	}
 
 	return retval;
 }
@@ -3055,6 +3105,7 @@
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
 
 	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
 		return -EINVAL;
@@ -3070,13 +3121,15 @@
 	nctrl.ncmd.s.more = 0;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.cb_fn = NULL;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
 
-	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+	ret = octnet_send_nic_ctrl_pkt(oct, &nctrl);
 
-	oct->sriov_info.vf_linkstate[vfidx] = linkstate;
+	if (!ret)
+		oct->sriov_info.vf_linkstate[vfidx] = linkstate;
+	else if (ret > 0)
+		ret = -EIO;
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -3094,7 +3147,8 @@
 }
 
 static int
-liquidio_eswitch_mode_set(struct devlink *devlink, u16 mode)
+liquidio_eswitch_mode_set(struct devlink *devlink, u16 mode,
+			  struct netlink_ext_ack *extack)
 {
 	struct lio_devlink_priv *priv;
 	struct octeon_device *oct;
@@ -3133,7 +3187,8 @@
 };
 
 static int
-lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+liquidio_get_port_parent_id(struct net_device *dev,
+			    struct netdev_phys_item_id *ppid)
 {
 	struct lio *lio = GET_LIO(dev);
 	struct octeon_device *oct = lio->oct_dev;
@@ -3141,24 +3196,12 @@
 	if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
 		return -EOPNOTSUPP;
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = ETH_ALEN;
-		ether_addr_copy(attr->u.ppid.id,
-				(void *)&lio->linfo.hw_addr + 2);
-		break;
-
-	default:
-		return -EOPNOTSUPP;
-	}
+	ppid->id_len = ETH_ALEN;
+	ether_addr_copy(ppid->id, (void *)&lio->linfo.hw_addr + 2);
 
 	return 0;
 }
 
-static const struct switchdev_ops lio_pf_switchdev_ops = {
-	.switchdev_port_attr_get = lio_pf_switchdev_attr_get,
-};
-
 static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx,
 				 struct ifla_vf_stats *vf_stats)
 {
@@ -3204,9 +3247,11 @@
 	.ndo_set_vf_mac		= liquidio_set_vf_mac,
 	.ndo_set_vf_vlan	= liquidio_set_vf_vlan,
 	.ndo_get_vf_config	= liquidio_get_vf_config,
+	.ndo_set_vf_spoofchk	= liquidio_set_vf_spoofchk,
 	.ndo_set_vf_trust	= liquidio_set_vf_trust,
 	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
 	.ndo_get_vf_stats	= liquidio_get_vf_stats,
+	.ndo_get_port_parent_id	= liquidio_get_port_parent_id,
 };
 
 /** \brief Entry point for the liquidio module
@@ -3307,7 +3352,6 @@
 	unsigned long micro;
 	u32 cur_ver;
 	struct octeon_soft_command *sc;
-	struct liquidio_if_cfg_context *ctx;
 	struct liquidio_if_cfg_resp *resp;
 	struct octdev_props *props;
 	int retval, num_iqueues, num_oqueues;
@@ -3315,7 +3359,7 @@
 	union oct_nic_if_cfg if_cfg;
 	unsigned int base_queue;
 	unsigned int gmx_port_id;
-	u32 resp_size, ctx_size, data_size;
+	u32 resp_size, data_size;
 	u32 ifidx_or_pfnum;
 	struct lio_version *vdata;
 	struct devlink *devlink;
@@ -3340,13 +3384,11 @@
 
 	for (i = 0; i < octeon_dev->ifcount; i++) {
 		resp_size = sizeof(struct liquidio_if_cfg_resp);
-		ctx_size = sizeof(struct liquidio_if_cfg_context);
 		data_size = sizeof(struct lio_version);
 		sc = (struct octeon_soft_command *)
 			octeon_alloc_soft_command(octeon_dev, data_size,
-						  resp_size, ctx_size);
+						  resp_size, 0);
 		resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-		ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
 		vdata = (struct lio_version *)sc->virtdptr;
 
 		*((u64 *)vdata) = 0;
@@ -3376,9 +3418,6 @@
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"requesting config for interface %d, iqs %d, oqs %d\n",
 			ifidx_or_pfnum, num_iqueues, num_oqueues);
-		WRITE_ONCE(ctx->cond, 0);
-		ctx->octeon_id = lio_get_device_id(octeon_dev);
-		init_waitqueue_head(&ctx->wc);
 
 		if_cfg.u64 = 0;
 		if_cfg.s.num_iqueues = num_iqueues;
@@ -3392,9 +3431,8 @@
 					    OPCODE_NIC_IF_CFG, 0,
 					    if_cfg.u64, 0);
 
-		sc->callback = lio_if_cfg_callback;
-		sc->callback_arg = sc;
-		sc->wait_time = LIO_IFCFG_WAIT_TIME;
+		init_completion(&sc->complete);
+		sc->sc_status = OCTEON_REQUEST_PENDING;
 
 		retval = octeon_send_soft_command(octeon_dev, sc);
 		if (retval == IQ_SEND_FAILED) {
@@ -3402,22 +3440,26 @@
 				"iq/oq config failed status: %x\n",
 				retval);
 			/* Soft instr is freed by driver in case of failure. */
-			goto setup_nic_dev_fail;
+			octeon_free_soft_command(octeon_dev, sc);
+			return(-EIO);
 		}
 
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
-		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
-			dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
-			goto setup_nic_wait_intr;
-		}
+		retval = wait_for_sc_completion_timeout(octeon_dev, sc, 0);
+		if (retval)
+			return retval;
 
 		retval = resp->status;
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		}
+		snprintf(octeon_dev->fw_info.liquidio_firmware_version,
+			 32, "%s",
+			 resp->cfg_info.liquidio_firmware_version);
 
 		/* Verify f/w version (in case of 'auto' loading from flash) */
 		fw_ver = octeon_dev->fw_info.liquidio_firmware_version;
@@ -3427,7 +3469,8 @@
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Unmatched firmware version. Expected %s.x, got %s.\n",
 				LIQUIDIO_BASE_VERSION, fw_ver);
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		} else if (atomic_read(octeon_dev->adapter_fw_state) ==
 			   FW_IS_PRELOADED) {
 			dev_info(&octeon_dev->pci_dev->dev,
@@ -3454,7 +3497,8 @@
 				"Got bad iqueues (%016llx) or oqueues (%016llx) from firmware.\n",
 				resp->cfg_info.iqmask,
 				resp->cfg_info.oqmask);
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		}
 
 		if (OCTEON_CN6XXX(octeon_dev)) {
@@ -3473,7 +3517,8 @@
 
 		if (!netdev) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		}
 
 		SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
@@ -3482,20 +3527,21 @@
 		 * netdev tasks.
 		 */
 		netdev->netdev_ops = &lionetdevops;
-		SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops);
 
 		retval = netif_set_real_num_rx_queues(netdev, num_oqueues);
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"setting real number rx failed\n");
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_free;
 		}
 
 		retval = netif_set_real_num_tx_queues(netdev, num_iqueues);
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"setting real number tx failed\n");
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_free;
 		}
 
 		lio = GET_LIO(netdev);
@@ -3522,6 +3568,8 @@
 		lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
 		lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
 
+		WRITE_ONCE(sc->caller_is_done, true);
+
 		lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 		if (OCTEON_CN23XX_PF(octeon_dev) ||
@@ -3588,7 +3636,7 @@
 				dev_err(&octeon_dev->pci_dev->dev,
 					"Error setting VF%d MAC address\n",
 					j);
-				goto setup_nic_dev_fail;
+				goto setup_nic_dev_free;
 			}
 		}
 
@@ -3610,7 +3658,7 @@
 					     lio->linfo.num_txpciq,
 					     lio->linfo.num_rxpciq)) {
 			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
@@ -3621,7 +3669,7 @@
 		if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		/* Register ethtool support */
@@ -3643,20 +3691,20 @@
 					     OCTNET_CMD_VERBOSE_ENABLE, 0);
 
 		if (setup_link_status_change_wq(netdev))
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 
 		if ((octeon_dev->fw_info.app_cap_flags &
 		     LIQUIDIO_TIME_SYNC_CAP) &&
 		    setup_sync_octeon_time_wq(netdev))
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 
 		if (setup_rx_oom_poll_fn(netdev))
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 
 		/* Register the network device with the OS */
 		if (register_netdev(netdev)) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		dev_dbg(&octeon_dev->pci_dev->dev,
@@ -3679,8 +3727,6 @@
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup successful\n", i);
 
-		octeon_free_soft_command(octeon_dev, sc);
-
 		if (octeon_dev->subsystem_id ==
 			OCTEON_CN2350_25GB_SUBSYS_ID ||
 		    octeon_dev->subsystem_id ==
@@ -3709,13 +3755,20 @@
 		}
 		octeon_dev->speed_boot = octeon_dev->speed_setting;
 
+		/* don't read FEC setting if unsupported by f/w (see above) */
+		if (octeon_dev->speed_boot == 25 &&
+		    !octeon_dev->no_speed_setting) {
+			liquidio_get_fec(lio);
+			octeon_dev->props[lio->ifidx].fec_boot =
+				octeon_dev->props[lio->ifidx].fec;
+		}
 	}
 
 	devlink = devlink_alloc(&liquidio_devlink_ops,
 				sizeof(struct lio_devlink_priv));
 	if (!devlink) {
 		dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
-		goto setup_nic_wait_intr;
+		goto setup_nic_dev_free;
 	}
 
 	lio_devlink = devlink_priv(devlink);
@@ -3725,7 +3778,7 @@
 		devlink_free(devlink);
 		dev_err(&octeon_dev->pci_dev->dev,
 			"devlink registration failed\n");
-		goto setup_nic_wait_intr;
+		goto setup_nic_dev_free;
 	}
 
 	octeon_dev->devlink = devlink;
@@ -3733,17 +3786,16 @@
 
 	return 0;
 
-setup_nic_dev_fail:
-
-	octeon_free_soft_command(octeon_dev, sc);
-
-setup_nic_wait_intr:
+setup_nic_dev_free:
 
 	while (i--) {
 		dev_err(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup failed\n", i);
 		liquidio_destroy_nic_device(octeon_dev, i);
 	}
+
+setup_nic_dev_done:
+
 	return -ENODEV;
 }
 

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index b778357..370d768 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c

@@ -40,14 +40,6 @@
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-struct liquidio_rx_ctl_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
 struct oct_timestamp_resp {
 	u64 rh;
 	u64 timestamp;
@@ -452,6 +444,8 @@
  */
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct msix_entry *msix_entries;
 	int i;
 
@@ -471,12 +465,12 @@
 	case OCT_DEV_HOST_OK:
 		/* fallthrough */
 	case OCT_DEV_IO_QUEUES_DONE:
-		if (wait_for_pending_requests(oct))
-			dev_err(&oct->pci_dev->dev, "There were pending requests\n");
-
 		if (lio_wait_for_instr_fetch(oct))
 			dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
 
+		if (wait_for_pending_requests(oct))
+			dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
 		/* Disable the input and output queues now. No more packets will
 		 * arrive from Octeon, but we should wait for all packet
 		 * processing to finish.
@@ -485,7 +479,33 @@
 
 		if (lio_wait_for_oq_pkts(oct))
 			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
-		/* fall through */
+
+		/* Force all requests waiting to be fetched by OCTEON to
+		 * complete.
+		 */
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+			struct octeon_instr_queue *iq;
+
+			if (!(oct->io_qmask.iq & BIT_ULL(i)))
+				continue;
+			iq = oct->instr_queue[i];
+
+			if (atomic_read(&iq->instr_pending)) {
+				spin_lock_bh(&iq->lock);
+				iq->fill_cnt = 0;
+				iq->octeon_read_index = iq->host_write_index;
+				iq->stats.instr_processed +=
+					atomic_read(&iq->instr_pending);
+				lio_process_iq_request_list(oct, iq, 0);
+				spin_unlock_bh(&iq->lock);
+			}
+		}
+
+		lio_process_ordered_list(oct, 1);
+		octeon_free_sc_done_list(oct);
+		octeon_free_sc_zombie_list(oct);
+
+	/* fall through */
 	case OCT_DEV_INTR_SET_DONE:
 		/* Disable interrupts  */
 		oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
@@ -569,33 +589,8 @@
 		/* Nothing to be done here either */
 		break;
 	}
-}
 
-/**
- * \brief Callback for rx ctrl
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void rx_ctl_callback(struct octeon_device *oct,
-			    u32 status, void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_rx_ctl_context *ctx;
-
-	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (status)
-		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
-			CVM_CAST64(status));
-	WRITE_ONCE(ctx->cond, 1);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
+	tasklet_kill(&oct_priv->droq_tasklet);
 }
 
 /**
@@ -606,8 +601,6 @@
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
-	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
-	struct liquidio_rx_ctl_context *ctx;
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
 	int retval;
@@ -617,14 +610,9 @@
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
-					  16, ctx_size);
+					  16, 0);
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
-	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
-
-	WRITE_ONCE(ctx->cond, 0);
-	ctx->octeon_id = lio_get_device_id(oct);
-	init_waitqueue_head(&ctx->wc);
 
 	ncmd->u64 = 0;
 	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
@@ -637,23 +625,24 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_CMD, 0, 0, 0);
 
-	sc->callback = rx_ctl_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = 5000;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+		octeon_free_soft_command(oct, sc);
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
-		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+		retval = wait_for_sc_completion_timeout(oct, sc, 0);
+		if (retval)
 			return;
-		oct->props[lio->ifidx].rx_on = start_stop;
-	}
 
-	octeon_free_soft_command(oct, sc);
+		oct->props[lio->ifidx].rx_on = start_stop;
+		WRITE_ONCE(sc->caller_is_done, true);
+	}
 }
 
 /**
@@ -667,6 +656,8 @@
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
 	struct net_device *netdev = oct->props[ifidx].netdev;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
 	struct lio *lio;
 
@@ -696,6 +687,8 @@
 	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 		netif_napi_del(napi);
 
+	tasklet_enable(&oct_priv->droq_tasklet);
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -844,11 +837,11 @@
 
 	i = 1;
 	while (frags--) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		pci_unmap_page((lio->oct_dev)->pci_dev,
 			       g->sg[(i >> 2)].ptr[(i & 3)],
-			       frag->size, DMA_TO_DEVICE);
+			       skb_frag_size(frag), DMA_TO_DEVICE);
 		i++;
 	}
 
@@ -888,11 +881,11 @@
 
 	i = 1;
 	while (frags--) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		pci_unmap_page((lio->oct_dev)->pci_dev,
 			       g->sg[(i >> 2)].ptr[(i & 3)],
-			       frag->size, DMA_TO_DEVICE);
+			       skb_frag_size(frag), DMA_TO_DEVICE);
 		i++;
 	}
 
@@ -913,9 +906,13 @@
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
 
 	if (!oct->props[lio->ifidx].napi_enabled) {
+		tasklet_disable(&oct_priv->droq_tasklet);
+
 		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 			napi_enable(napi);
 
@@ -932,6 +929,11 @@
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 	start_txqs(netdev);
 
+	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
+	lio->stats_wk.ctxptr = lio;
+	schedule_delayed_work(&lio->stats_wk.work, msecs_to_jiffies
+					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
+
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
 
@@ -948,6 +950,8 @@
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
 
 	/* tell Octeon to stop forwarding packets to host */
@@ -977,8 +981,12 @@
 		oct->props[lio->ifidx].napi_enabled = 0;
 
 		oct->droq[0]->ops.poll_mode = 0;
+
+		tasklet_enable(&oct_priv->droq_tasklet);
 	}
 
+	cancel_delayed_work_sync(&lio->stats_wk.work);
+
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
 	return 0;
@@ -1093,10 +1101,9 @@
 	/* Apparently, any activity in this call from the kernel has to
 	 * be atomic. So we won't wait for response.
 	 */
-	nctrl.wait_time = 0;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
 			ret);
 	}
@@ -1133,8 +1140,6 @@
 	nctrl.ncmd.s.more = 1;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
 	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-	nctrl.wait_time = 100;
 
 	nctrl.udd[0] = 0;
 	/* The MAC Address is presented in network byte order. */
@@ -1145,6 +1150,13 @@
 		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
 		return -ENOMEM;
 	}
+
+	if (nctrl.sc_status ==
+	    FIRMWARE_STATUS_CODE(OCTEON_REQUEST_NO_PERMISSION)) {
+		dev_err(&oct->pci_dev->dev, "MAC Address change failed: no permission\n");
+		return -EPERM;
+	}
+
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 	ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data);
 
@@ -1198,7 +1210,6 @@
 	lstats->rx_packets = pkts;
 	lstats->rx_dropped = drop;
 
-	octnet_get_link_stats(netdev);
 	lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
 
 	/* detailed rx_errors: */
@@ -1390,7 +1401,7 @@
  * @returns whether the packet was transmitted to the device okay or not
  *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
  */
-static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct octnet_buf_free_info *finfo;
 	union octnic_cmd_setup cmdsetup;
@@ -1486,7 +1497,7 @@
 		ndata.reqtype = REQTYPE_NORESP_NET;
 
 	} else {
-		struct skb_frag_struct *frag;
+		skb_frag_t *frag;
 		struct octnic_gather *g;
 		int i, frags;
 
@@ -1524,11 +1535,9 @@
 			frag = &skb_shinfo(skb)->frags[i - 1];
 
 			g->sg[(i >> 2)].ptr[(i & 3)] =
-				dma_map_page(&oct->pci_dev->dev,
-					     frag->page.p,
-					     frag->page_offset,
-					     frag->size,
-					     DMA_TO_DEVICE);
+				skb_frag_dma_map(&oct->pci_dev->dev,
+						 frag, 0, skb_frag_size(frag),
+						 DMA_TO_DEVICE);
 			if (dma_mapping_error(&oct->pci_dev->dev,
 					      g->sg[i >> 2].ptr[i & 3])) {
 				dma_unmap_single(&oct->pci_dev->dev,
@@ -1539,7 +1548,7 @@
 					frag = &skb_shinfo(skb)->frags[j - 1];
 					dma_unmap_page(&oct->pci_dev->dev,
 						       g->sg[j >> 2].ptr[j & 3],
-						       frag->size,
+						       skb_frag_size(frag),
 						       DMA_TO_DEVICE);
 				}
 				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
@@ -1547,7 +1556,8 @@
 				return NETDEV_TX_BUSY;
 			}
 
-			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+			add_sg_size(&g->sg[(i >> 2)], skb_frag_size(frag),
+				    (i & 3));
 			i++;
 		}
 
@@ -1574,7 +1584,7 @@
 		irh->vlan = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
 	}
 
-	xmit_more = skb->xmit_more;
+	xmit_more = netdev_xmit_more();
 
 	if (unlikely(cmdsetup.s.timestamp))
 		status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
@@ -1638,8 +1648,6 @@
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct octnic_ctrl_pkt nctrl;
-	struct completion compl;
-	u16 response_code;
 	int ret = 0;
 
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
@@ -1648,27 +1656,16 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
 	nctrl.ncmd.s.param1 = vid;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-	init_completion(&compl);
-	nctrl.completion = &compl;
-	nctrl.response_code = &response_code;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
 			ret);
-		return -EIO;
+		return -EPERM;
 	}
 
-	if (!wait_for_completion_timeout(&compl,
-					 msecs_to_jiffies(nctrl.wait_time)))
-		return -EPERM;
-
-	if (READ_ONCE(response_code))
-		return -EPERM;
-
 	return 0;
 }
 
@@ -1687,14 +1684,15 @@
 	nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
 	nctrl.ncmd.s.param1 = vid;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "Del VLAN filter failed in core (ret: 0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -1720,14 +1718,15 @@
 	nctrl.ncmd.s.cmd = command;
 	nctrl.ncmd.s.param1 = rx_cmd;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev, "DEVFLAGS RXCSUM change failed in core (ret:0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -1755,15 +1754,16 @@
 	nctrl.ncmd.s.more = vxlan_cmd_bit;
 	nctrl.ncmd.s.param1 = vxlan_port;
 	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
 	nctrl.netpndev = (u64)netdev;
 	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
 	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
+	if (ret) {
 		dev_err(&oct->pci_dev->dev,
 			"DEVFLAGS VxLAN port add/delete failed in core (ret : 0x%x)\n",
 			ret);
+		if (ret > 0)
+			ret = -EIO;
 	}
 	return ret;
 }
@@ -1924,8 +1924,7 @@
 static int setup_nic_devices(struct octeon_device *octeon_dev)
 {
 	int retval, num_iqueues, num_oqueues;
-	struct liquidio_if_cfg_context *ctx;
-	u32 resp_size, ctx_size, data_size;
+	u32 resp_size, data_size;
 	struct liquidio_if_cfg_resp *resp;
 	struct octeon_soft_command *sc;
 	union oct_nic_if_cfg if_cfg;
@@ -1956,13 +1955,11 @@
 
 	for (i = 0; i < octeon_dev->ifcount; i++) {
 		resp_size = sizeof(struct liquidio_if_cfg_resp);
-		ctx_size = sizeof(struct liquidio_if_cfg_context);
 		data_size = sizeof(struct lio_version);
 		sc = (struct octeon_soft_command *)
 			octeon_alloc_soft_command(octeon_dev, data_size,
-						  resp_size, ctx_size);
+						  resp_size, 0);
 		resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-		ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
 		vdata = (struct lio_version *)sc->virtdptr;
 
 		*((u64 *)vdata) = 0;
@@ -1970,10 +1967,6 @@
 		vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
 		vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
 
-		WRITE_ONCE(ctx->cond, 0);
-		ctx->octeon_id = lio_get_device_id(octeon_dev);
-		init_waitqueue_head(&ctx->wc);
-
 		if_cfg.u64 = 0;
 
 		if_cfg.s.num_iqueues = octeon_dev->sriov_info.rings_per_vf;
@@ -1986,32 +1979,37 @@
 					    OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
 					    0);
 
-		sc->callback = lio_if_cfg_callback;
-		sc->callback_arg = sc;
-		sc->wait_time = 5000;
+		init_completion(&sc->complete);
+		sc->sc_status = OCTEON_REQUEST_PENDING;
 
 		retval = octeon_send_soft_command(octeon_dev, sc);
 		if (retval == IQ_SEND_FAILED) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"iq/oq config failed status: %x\n", retval);
 			/* Soft instr is freed by driver in case of failure. */
-			goto setup_nic_dev_fail;
+			octeon_free_soft_command(octeon_dev, sc);
+			return(-EIO);
 		}
 
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
-		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
-			dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
-			goto setup_nic_wait_intr;
-		}
+		retval = wait_for_sc_completion_timeout(octeon_dev, sc, 0);
+		if (retval)
+			return retval;
 
 		retval = resp->status;
 		if (retval) {
-			dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
-			goto setup_nic_dev_fail;
+			dev_err(&octeon_dev->pci_dev->dev,
+				"iq/oq config failed, retval = %d\n", retval);
+			WRITE_ONCE(sc->caller_is_done, true);
+			return -EIO;
 		}
 
+		snprintf(octeon_dev->fw_info.liquidio_firmware_version,
+			 32, "%s",
+			 resp->cfg_info.liquidio_firmware_version);
+
 		octeon_swap_8B_data((u64 *)(&resp->cfg_info),
 				    (sizeof(struct liquidio_if_cfg_info)) >> 3);
 
@@ -2022,7 +2020,8 @@
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Got bad iqueues (%016llx) or oqueues (%016llx) from firmware.\n",
 				resp->cfg_info.iqmask, resp->cfg_info.oqmask);
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		}
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
@@ -2033,7 +2032,8 @@
 
 		if (!netdev) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
-			goto setup_nic_dev_fail;
+			WRITE_ONCE(sc->caller_is_done, true);
+			goto setup_nic_dev_done;
 		}
 
 		SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
@@ -2070,6 +2070,8 @@
 		lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
 		lio->linfo.macaddr_is_admin_asgnd =
 			resp->cfg_info.linfo.macaddr_is_admin_asgnd;
+		lio->linfo.macaddr_spoofchk =
+			resp->cfg_info.linfo.macaddr_spoofchk;
 
 		lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
@@ -2109,6 +2111,8 @@
 		netdev->min_mtu = LIO_MIN_MTU_SIZE;
 		netdev->max_mtu = LIO_MAX_MTU_SIZE;
 
+		WRITE_ONCE(sc->caller_is_done, true);
+
 		/* Point to the  properties for octeon device to which this
 		 * interface belongs.
 		 */
@@ -2132,7 +2136,7 @@
 					     lio->linfo.num_txpciq,
 					     lio->linfo.num_rxpciq)) {
 			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
@@ -2155,7 +2159,7 @@
 		if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		/* Register ethtool support */
@@ -2170,15 +2174,15 @@
 					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
 
 		if (setup_link_status_change_wq(netdev))
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 
 		if (setup_rx_oom_poll_fn(netdev))
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 
 		/* Register the network device with the OS */
 		if (register_netdev(netdev)) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
-			goto setup_nic_dev_fail;
+			goto setup_nic_dev_free;
 		}
 
 		dev_dbg(&octeon_dev->pci_dev->dev,
@@ -2201,24 +2205,21 @@
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup successful\n", i);
 
-		octeon_free_soft_command(octeon_dev, sc);
-
 		octeon_dev->no_speed_setting = 1;
 	}
 
 	return 0;
 
-setup_nic_dev_fail:
-
-	octeon_free_soft_command(octeon_dev, sc);
-
-setup_nic_wait_intr:
+setup_nic_dev_free:
 
 	while (i--) {
 		dev_err(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup failed\n", i);
 		liquidio_destroy_nic_device(octeon_dev, i);
 	}
+
+setup_nic_dev_done:
+
 	return -ENODEV;
 }
 

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
index c99b59f..f3f2e71 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c

@@ -25,19 +25,20 @@
 #include "octeon_nic.h"
 #include "octeon_main.h"
 #include "octeon_network.h"
-#include <net/switchdev.h>
 #include "lio_vf_rep.h"
-#include "octeon_network.h"
 
 static int lio_vf_rep_open(struct net_device *ndev);
 static int lio_vf_rep_stop(struct net_device *ndev);
-static int lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev);
+static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb,
+				       struct net_device *ndev);
 static void lio_vf_rep_tx_timeout(struct net_device *netdev);
 static int lio_vf_rep_phys_port_name(struct net_device *dev,
 				     char *buf, size_t len);
 static void lio_vf_rep_get_stats64(struct net_device *dev,
 				   struct rtnl_link_stats64 *stats64);
 static int lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu);
+static int lio_vf_get_port_parent_id(struct net_device *dev,
+				     struct netdev_phys_item_id *ppid);
 
 static const struct net_device_ops lio_vf_rep_ndev_ops = {
 	.ndo_open = lio_vf_rep_open,
@@ -47,46 +48,28 @@
 	.ndo_get_phys_port_name = lio_vf_rep_phys_port_name,
 	.ndo_get_stats64 = lio_vf_rep_get_stats64,
 	.ndo_change_mtu = lio_vf_rep_change_mtu,
+	.ndo_get_port_parent_id = lio_vf_get_port_parent_id,
 };
 
-static void
-lio_vf_rep_send_sc_complete(struct octeon_device *oct,
-			    u32 status, void *ptr)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
-	struct lio_vf_rep_sc_ctx *ctx =
-		(struct lio_vf_rep_sc_ctx *)sc->ctxptr;
-	struct lio_vf_rep_resp *resp =
-		(struct lio_vf_rep_resp *)sc->virtrptr;
-
-	if (status != OCTEON_REQUEST_TIMEOUT && READ_ONCE(resp->status))
-		WRITE_ONCE(resp->status, 0);
-
-	complete(&ctx->complete);
-}
-
 static int
 lio_vf_rep_send_soft_command(struct octeon_device *oct,
 			     void *req, int req_size,
 			     void *resp, int resp_size)
 {
 	int tot_resp_size = sizeof(struct lio_vf_rep_resp) + resp_size;
-	int ctx_size = sizeof(struct lio_vf_rep_sc_ctx);
 	struct octeon_soft_command *sc = NULL;
 	struct lio_vf_rep_resp *rep_resp;
-	struct lio_vf_rep_sc_ctx *ctx;
 	void *sc_req;
 	int err;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, req_size,
-					  tot_resp_size, ctx_size);
+					  tot_resp_size, 0);
 	if (!sc)
 		return -ENOMEM;
 
-	ctx = (struct lio_vf_rep_sc_ctx *)sc->ctxptr;
-	memset(ctx, 0, ctx_size);
-	init_completion(&ctx->complete);
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	sc_req = (struct lio_vf_rep_req *)sc->virtdptr;
 	memcpy(sc_req, req, req_size);
@@ -98,23 +81,24 @@
 	sc->iq_no = 0;
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
 				    OPCODE_NIC_VF_REP_CMD, 0, 0, 0);
-	sc->callback = lio_vf_rep_send_sc_complete;
-	sc->callback_arg = sc;
-	sc->wait_time = LIO_VF_REP_REQ_TMO_MS;
 
 	err = octeon_send_soft_command(oct, sc);
 	if (err == IQ_SEND_FAILED)
 		goto free_buff;
 
-	wait_for_completion_timeout(&ctx->complete,
-				    msecs_to_jiffies
-				    (2 * LIO_VF_REP_REQ_TMO_MS));
+	err = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (err)
+		return err;
+
 	err = READ_ONCE(rep_resp->status) ? -EBUSY : 0;
 	if (err)
 		dev_err(&oct->pci_dev->dev, "VF rep send config failed\n");
-
-	if (resp)
+	else if (resp)
 		memcpy(resp, (rep_resp + 1), resp_size);
+
+	WRITE_ONCE(sc->caller_is_done, true);
+	return err;
+
 free_buff:
 	octeon_free_soft_command(oct, sc);
 
@@ -382,7 +366,7 @@
 		netif_wake_queue(ndev);
 }
 
-static int
+static netdev_tx_t
 lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev);
@@ -406,7 +390,7 @@
 	}
 
 	sc = (struct octeon_soft_command *)
-		octeon_alloc_soft_command(oct, 0, 0, 0);
+		octeon_alloc_soft_command(oct, 0, 16, 0);
 	if (!sc) {
 		dev_err(&oct->pci_dev->dev, "VF rep: Soft command alloc failed\n");
 		goto xmit_failed;
@@ -415,6 +399,7 @@
 	/* Multiple buffers are not used for vf_rep packets. */
 	if (skb_shinfo(skb)->nr_frags != 0) {
 		dev_err(&oct->pci_dev->dev, "VF rep: nr_frags != 0. Dropping packet\n");
+		octeon_free_soft_command(oct, sc);
 		goto xmit_failed;
 	}
 
@@ -422,6 +407,7 @@
 				     skb->data, skb->len, DMA_TO_DEVICE);
 	if (dma_mapping_error(&oct->pci_dev->dev, sc->dmadptr)) {
 		dev_err(&oct->pci_dev->dev, "VF rep: DMA mapping failed\n");
+		octeon_free_soft_command(oct, sc);
 		goto xmit_failed;
 	}
 
@@ -442,6 +428,7 @@
 	if (status == IQ_SEND_FAILED) {
 		dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr,
 				 sc->datasize, DMA_TO_DEVICE);
+		octeon_free_soft_command(oct, sc);
 		goto xmit_failed;
 	}
 
@@ -458,31 +445,19 @@
 	return NETDEV_TX_OK;
 }
 
-static int
-lio_vf_rep_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static int lio_vf_get_port_parent_id(struct net_device *dev,
+				     struct netdev_phys_item_id *ppid)
 {
 	struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
 	struct net_device *parent_ndev = vf_rep->parent_ndev;
 	struct lio *lio = GET_LIO(parent_ndev);
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = ETH_ALEN;
-		ether_addr_copy(attr->u.ppid.id,
-				(void *)&lio->linfo.hw_addr + 2);
-		break;
-
-	default:
-		return -EOPNOTSUPP;
-	}
+	ppid->id_len = ETH_ALEN;
+	ether_addr_copy(ppid->id, (void *)&lio->linfo.hw_addr + 2);
 
 	return 0;
 }
 
-static const struct switchdev_ops lio_vf_rep_switchdev_ops = {
-	.switchdev_port_attr_get        = lio_vf_rep_attr_get,
-};
-
 static void
 lio_vf_rep_fetch_stats(struct work_struct *work)
 {
@@ -539,7 +514,6 @@
 		ndev->min_mtu = LIO_MIN_MTU_SIZE;
 		ndev->max_mtu = LIO_MAX_MTU_SIZE;
 		ndev->netdev_ops = &lio_vf_rep_ndev_ops;
-		SWITCHDEV_SET_OPS(ndev, &lio_vf_rep_switchdev_ops);
 
 		vf_rep = netdev_priv(ndev);
 		memset(vf_rep, 0, sizeof(*vf_rep));

diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 7407fcd..a5e0e9f 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h

@@ -118,6 +118,10 @@
 /* App specific capabilities from firmware to pf driver */
 #define LIQUIDIO_TIME_SYNC_CAP 0x1
 #define LIQUIDIO_SWITCHDEV_CAP 0x2
+#define LIQUIDIO_SPOOFCHK_CAP  0x4
+
+/* error status return from firmware */
+#define OCTEON_REQUEST_NO_PERMISSION 0xc
 
 static inline u32 incr_index(u32 index, u32 count, u32 max)
 {
@@ -241,6 +245,10 @@
 
 #define   OCTNET_CMD_QUEUE_COUNT_CTL	0x1f
 
+#define   OCTNET_CMD_GROUP1             1
+#define   OCTNET_CMD_SET_VF_SPOOFCHK    0x1
+#define   OCTNET_GROUP1_LAST_CMD        OCTNET_CMD_SET_VF_SPOOFCHK
+
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -250,9 +258,18 @@
 #define   OCTNET_CMD_VLAN_FILTER_ENABLE 0x1
 #define   OCTNET_CMD_VLAN_FILTER_DISABLE 0x0
 
+#define   OCTNET_CMD_FAIL 0x1
+
+#define   SEAPI_CMD_FEC_SET             0x0
+#define   SEAPI_CMD_FEC_SET_DISABLE       0x0
+#define   SEAPI_CMD_FEC_SET_RS            0x1
+#define   SEAPI_CMD_FEC_GET             0x1
+
 #define   SEAPI_CMD_SPEED_SET           0x2
 #define   SEAPI_CMD_SPEED_GET           0x3
 
+#define OPCODE_NIC_VF_PORT_STATS        0x22
+
 #define   LIO_CMD_WAIT_TM 100
 
 /* RX(packets coming from wire) Checksum verification flags */
@@ -301,7 +318,8 @@
 
 		u64 more:6; /* How many udd words follow the command */
 
-		u64 reserved:29;
+		u64 cmdgroup:8;
+		u64 reserved:21;
 
 		u64 param1:16;
 
@@ -313,7 +331,8 @@
 
 		u64 param1:16;
 
-		u64 reserved:29;
+		u64 reserved:21;
+		u64 cmdgroup:8;
 
 		u64 more:6;
 
@@ -757,13 +776,17 @@
 #ifdef __BIG_ENDIAN_BITFIELD
 	u64 gmxport:16;
 	u64 macaddr_is_admin_asgnd:1;
-	u64 rsvd:31;
+	u64 rsvd:13;
+	u64 macaddr_spoofchk:1;
+	u64 rsvd1:17;
 	u64 num_txpciq:8;
 	u64 num_rxpciq:8;
 #else
 	u64 num_rxpciq:8;
 	u64 num_txpciq:8;
-	u64 rsvd:31;
+	u64 rsvd1:17;
+	u64 macaddr_spoofchk:1;
+	u64 rsvd:13;
 	u64 macaddr_is_admin_asgnd:1;
 	u64 gmxport:16;
 #endif

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index ceac743..24c2120 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h

@@ -438,9 +438,10 @@
 #define  MAX_BAR1_IOREMAP_SIZE  (16 * OCTEON_BAR1_ENTRY_SIZE)
 
 /* Response lists - 1 ordered, 1 unordered-blocking, 1 unordered-nonblocking
+ *                  1 process done list, 1 zombie lists(timeouted sc list)
  * NoResponse Lists are now maintained with each IQ. (Dec' 2007).
  */
-#define MAX_RESPONSE_LISTS           4
+#define MAX_RESPONSE_LISTS           6
 
 /* Opcode hash bits. The opcode is hashed on the lower 6-bits to lookup the
  * dispatch table.

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index f878a55..934115d 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c

@@ -1044,8 +1044,7 @@
 		dispatch = &oct->dispatch.dlist[i].list;
 		while (dispatch->next != dispatch) {
 			temp = dispatch->next;
-			list_del(temp);
-			list_add_tail(temp, &freelist);
+			list_move_tail(temp, &freelist);
 		}
 
 		oct->dispatch.dlist[i].opcode = 0;
@@ -1440,20 +1439,16 @@
 	/* the whole thing needs to be atomic, ideally */
 	if (droq) {
 		pkts_pend = (u32)atomic_read(&droq->pkts_pending);
-		spin_lock_bh(&droq->lock);
 		writel(droq->pkt_count - pkts_pend, droq->pkts_sent_reg);
 		droq->pkt_count = pkts_pend;
-		/* this write needs to be flushed before we release the lock */
-		mmiowb();
-		spin_unlock_bh(&droq->lock);
 		oct = droq->oct_dev;
 	}
 	if (iq) {
 		spin_lock_bh(&iq->lock);
-		writel(iq->pkt_in_done, iq->inst_cnt_reg);
-		iq->pkt_in_done = 0;
+		writel(iq->pkts_processed, iq->inst_cnt_reg);
+		iq->pkt_in_done -= iq->pkts_processed;
+		iq->pkts_processed = 0;
 		/* this write needs to be flushed before we release the lock */
-		mmiowb();
 		spin_unlock_bh(&iq->lock);
 		oct = iq->oct_dev;
 	}

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index d99ca6b..3d01d36 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h

@@ -316,6 +316,8 @@
 	 * device pointer (used for OS specific calls).
 	 */
 	int    rx_on;
+	int    fec;
+	int    fec_boot;
 	int    napi_enabled;
 	int    gmxport;
 	struct net_device *netdev;
@@ -397,6 +399,8 @@
 
 	int	vf_linkstate[MAX_POSSIBLE_VFS];
 
+	bool    vf_spoofchk[MAX_POSSIBLE_VFS];
+
 	u64	vf_drv_loaded_mask;
 };
 
@@ -607,6 +611,9 @@
 	u8  speed_boot;
 	u8  speed_setting;
 	u8  no_speed_setting;
+
+	u32    vfstats_poll;
+#define LIO_VFSTATS_POLL 10
 };
 
 #define  OCT_DRV_ONLINE 1

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index a71dbb7..0171690 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c

@@ -301,8 +301,6 @@
 	dev_dbg(&oct->pci_dev->dev, "DROQ INIT: max_empty_descs: %d\n",
 		droq->max_empty_descs);
 
-	spin_lock_init(&droq->lock);
-
 	INIT_LIST_HEAD(&droq->dispatch_list);
 
 	/* For 56xx Pass1, this function won't be called, so no checks. */
@@ -333,8 +331,6 @@
  * Returns:
  *  Success: Pointer to recv_info_t
  *  Failure: NULL.
- * Locks:
- *  The droq->lock is held when this routine is called.
  */
 static inline struct octeon_recv_info *octeon_create_recv_info(
 		struct octeon_device *octeon_dev,
@@ -433,8 +429,6 @@
  *  up buffers (that were not dispatched) to form a contiguous ring.
  * Returns:
  *  No of descriptors refilled.
- * Locks:
- *  This routine is called with droq->lock held.
  */
 static u32
 octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
@@ -449,8 +443,7 @@
 
 	while (droq->refill_count && (desc_refilled < droq->max_count)) {
 		/* If a valid buffer exists (happens if there is no dispatch),
-		 * reuse
-		 * the buffer, else allocate.
+		 * reuse the buffer, else allocate.
 		 */
 		if (!droq->recv_buf_list[droq->refill_idx].buffer) {
 			pg_info =
@@ -503,34 +496,35 @@
 
 /** check if we can allocate packets to get out of oom.
  *  @param  droq - Droq being checked.
- *  @return does not return anything
+ *  @return 1 if fails to refill minimum
  */
-void octeon_droq_check_oom(struct octeon_droq *droq)
+int octeon_retry_droq_refill(struct octeon_droq *droq)
 {
-	int desc_refilled;
 	struct octeon_device *oct = droq->oct_dev;
+	int desc_refilled, reschedule = 1;
+	u32 pkts_credit;
 
-	if (readl(droq->pkts_credit_reg) <= CN23XX_SLI_DEF_BP) {
-		spin_lock_bh(&droq->lock);
-		desc_refilled = octeon_droq_refill(oct, droq);
-		if (desc_refilled) {
-			/* Flush the droq descriptor data to memory to be sure
-			 * that when we update the credits the data in memory
-			 * is accurate.
-			 */
-			wmb();
-			writel(desc_refilled, droq->pkts_credit_reg);
-			/* make sure mmio write completes */
-			mmiowb();
-		}
-		spin_unlock_bh(&droq->lock);
+	pkts_credit = readl(droq->pkts_credit_reg);
+	desc_refilled = octeon_droq_refill(oct, droq);
+	if (desc_refilled) {
+		/* Flush the droq descriptor data to memory to be sure
+		 * that when we update the credits the data in memory
+		 * is accurate.
+		 */
+		wmb();
+		writel(desc_refilled, droq->pkts_credit_reg);
+
+		if (pkts_credit + desc_refilled >= CN23XX_SLI_DEF_BP)
+			reschedule = 0;
 	}
+
+	return reschedule;
 }
 
 static inline u32
 octeon_droq_get_bufcount(u32 buf_size, u32 total_len)
 {
-	return ((total_len + buf_size - 1) / buf_size);
+	return DIV_ROUND_UP(total_len, buf_size);
 }
 
 static int
@@ -603,9 +597,9 @@
 				 struct octeon_droq *droq,
 				 u32 pkts_to_process)
 {
+	u32 pkt, total_len = 0, pkt_count, retval;
 	struct octeon_droq_info *info;
 	union octeon_rh *rh;
-	u32 pkt, total_len = 0, pkt_count;
 
 	pkt_count = pkts_to_process;
 
@@ -709,30 +703,41 @@
 		if (droq->refill_count >= droq->refill_threshold) {
 			int desc_refilled = octeon_droq_refill(oct, droq);
 
-			/* Flush the droq descriptor data to memory to be sure
-			 * that when we update the credits the data in memory
-			 * is accurate.
-			 */
-			wmb();
-			writel((desc_refilled), droq->pkts_credit_reg);
-			/* make sure mmio write completes */
-			mmiowb();
+			if (desc_refilled) {
+				/* Flush the droq descriptor data to memory to
+				 * be sure that when we update the credits the
+				 * data in memory is accurate.
+				 */
+				wmb();
+				writel(desc_refilled, droq->pkts_credit_reg);
+			}
 		}
-
 	}                       /* for (each packet)... */
 
 	/* Increment refill_count by the number of buffers processed. */
 	droq->stats.pkts_received += pkt;
 	droq->stats.bytes_received += total_len;
 
+	retval = pkt;
 	if ((droq->ops.drop_on_max) && (pkts_to_process - pkt)) {
 		octeon_droq_drop_packets(oct, droq, (pkts_to_process - pkt));
 
 		droq->stats.dropped_toomany += (pkts_to_process - pkt);
-		return pkts_to_process;
+		retval = pkts_to_process;
 	}
 
-	return pkt;
+	atomic_sub(retval, &droq->pkts_pending);
+
+	if (droq->refill_count >= droq->refill_threshold &&
+	    readl(droq->pkts_credit_reg) < CN23XX_SLI_DEF_BP) {
+		octeon_droq_check_hw_for_pkts(droq);
+
+		/* Make sure there are no pkts_pending */
+		if (!atomic_read(&droq->pkts_pending))
+			octeon_schedule_rxq_oom_work(oct, droq);
+	}
+
+	return retval;
 }
 
 int
@@ -740,29 +745,19 @@
 			    struct octeon_droq *droq,
 			    u32 budget)
 {
-	u32 pkt_count = 0, pkts_processed = 0;
+	u32 pkt_count = 0;
 	struct list_head *tmp, *tmp2;
 
-	/* Grab the droq lock */
-	spin_lock(&droq->lock);
-
 	octeon_droq_check_hw_for_pkts(droq);
 	pkt_count = atomic_read(&droq->pkts_pending);
 
-	if (!pkt_count) {
-		spin_unlock(&droq->lock);
+	if (!pkt_count)
 		return 0;
-	}
 
 	if (pkt_count > budget)
 		pkt_count = budget;
 
-	pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count);
-
-	atomic_sub(pkts_processed, &droq->pkts_pending);
-
-	/* Release the spin lock */
-	spin_unlock(&droq->lock);
+	octeon_droq_fast_process_packets(oct, droq, pkt_count);
 
 	list_for_each_safe(tmp, tmp2, &droq->dispatch_list) {
 		struct __dispatch *rdisp = (struct __dispatch *)tmp;
@@ -798,8 +793,6 @@
 	if (budget > droq->max_count)
 		budget = droq->max_count;
 
-	spin_lock(&droq->lock);
-
 	while (total_pkts_processed < budget) {
 		octeon_droq_check_hw_for_pkts(droq);
 
@@ -813,13 +806,9 @@
 			octeon_droq_fast_process_packets(oct, droq,
 							 pkts_available);
 
-		atomic_sub(pkts_processed, &droq->pkts_pending);
-
 		total_pkts_processed += pkts_processed;
 	}
 
-	spin_unlock(&droq->lock);
-
 	list_for_each_safe(tmp, tmp2, &droq->dispatch_list) {
 		struct __dispatch *rdisp = (struct __dispatch *)tmp;
 
@@ -879,9 +868,8 @@
 int octeon_register_droq_ops(struct octeon_device *oct, u32 q_no,
 			     struct octeon_droq_ops *ops)
 {
-	struct octeon_droq *droq;
-	unsigned long flags;
 	struct octeon_config *oct_cfg = NULL;
+	struct octeon_droq *droq;
 
 	oct_cfg = octeon_get_conf(oct);
 
@@ -901,21 +889,15 @@
 	}
 
 	droq = oct->droq[q_no];
-
-	spin_lock_irqsave(&droq->lock, flags);
-
 	memcpy(&droq->ops, ops, sizeof(struct octeon_droq_ops));
 
-	spin_unlock_irqrestore(&droq->lock, flags);
-
 	return 0;
 }
 
 int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no)
 {
-	unsigned long flags;
-	struct octeon_droq *droq;
 	struct octeon_config *oct_cfg = NULL;
+	struct octeon_droq *droq;
 
 	oct_cfg = octeon_get_conf(oct);
 
@@ -936,14 +918,10 @@
 		return 0;
 	}
 
-	spin_lock_irqsave(&droq->lock, flags);
-
 	droq->ops.fptr = NULL;
 	droq->ops.farg = NULL;
 	droq->ops.drop_on_max = 0;
 
-	spin_unlock_irqrestore(&droq->lock, flags);
-
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index f28f262..c9b19e6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h

@@ -245,9 +245,6 @@
  *  Octeon DROQ.
  */
 struct octeon_droq {
-	/** A spinlock to protect access to this ring. */
-	spinlock_t lock;
-
 	u32 q_no;
 
 	u32 pkt_count;
@@ -414,6 +411,6 @@
 
 int octeon_enable_irq(struct octeon_device *oct, u32 q_no);
 
-void octeon_droq_check_oom(struct octeon_droq *droq);
+int octeon_retry_droq_refill(struct octeon_droq *droq);
 
 #endif	/*__OCTEON_DROQ_H__ */

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 2327062..bebf3bd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h

@@ -94,6 +94,8 @@
 
 	u32 pkt_in_done;
 
+	u32 pkts_processed;
+
 	/** A spinlock to protect access to the input ring.*/
 	spinlock_t iq_flush_running_lock;
 
@@ -290,13 +292,19 @@
 	u32  ctxsize;
 
 	/** Time out and callback */
-	size_t wait_time;
-	size_t timeout;
+	size_t expiry_time;
 	u32 iq_no;
 	void (*callback)(struct octeon_device *, u32, void *);
 	void *callback_arg;
+
+	int caller_is_done;
+	u32 sc_status;
+	struct completion complete;
 };
 
+/* max timeout (in milli sec) for soft request */
+#define LIO_SC_MAX_TMO_MS       60000
+
 /** Maximum number of buffers to allocate into soft command buffer pool
  */
 #define  MAX_SOFT_COMMAND_BUFFERS	256
@@ -317,6 +325,8 @@
 		(((octeon_dev_ptr)->instr_queue[iq_no]->stats.field) += count)
 
 int octeon_setup_sc_buffer_pool(struct octeon_device *oct);
+int octeon_free_sc_done_list(struct octeon_device *oct);
+int octeon_free_sc_zombie_list(struct octeon_device *oct);
 int octeon_free_sc_buffer_pool(struct octeon_device *oct);
 struct octeon_soft_command *
 	octeon_alloc_soft_command(struct octeon_device *oct,
@@ -368,6 +378,9 @@
 			u32 force_db, void *cmd, void *buf,
 			u32 datasize, u32 reqtype);
 
+void octeon_dump_soft_command(struct octeon_device *oct,
+			      struct octeon_soft_command *sc);
+
 void octeon_prepare_soft_command(struct octeon_device *oct,
 				 struct octeon_soft_command *sc,
 				 u8 opcode, u8 subcode,

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 021d99c..614d07b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c

@@ -260,9 +260,7 @@
 		dev_info(&oct->pci_dev->dev,
 			 "got a request for FLR from VF that owns DPI ring %u\n",
 			 mbox->q_no);
-		pcie_capability_set_word(
-			oct->sriov_info.dpiring_to_vfpcidev_lut[mbox->q_no],
-			PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+		pcie_flr(oct->sriov_info.dpiring_to_vfpcidev_lut[mbox->q_no]);
 		break;
 
 	case OCTEON_PF_CHANGED_VF_MACADDR:

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index c846eec..073d064 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h

@@ -70,6 +70,10 @@
 void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
 					unsigned int bytes_compl);
 void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac);
+
+void octeon_schedule_rxq_oom_work(struct octeon_device *oct,
+				  struct octeon_droq *droq);
+
 /** Swap 8B blocks */
 static inline void octeon_swap_8B_data(u64 *data, u32 blocks)
 {
@@ -146,48 +150,72 @@
 	return 1;
 }
 
+/* input parameter:
+ * sc: pointer to a soft request
+ * timeout: milli sec which an application wants to wait for the
+	    response of the request.
+ *          0: the request will wait until its response gets back
+ *	       from the firmware within LIO_SC_MAX_TMO_MS milli sec.
+ *	       It the response does not return within
+ *	       LIO_SC_MAX_TMO_MS milli sec, lio_process_ordered_list()
+ *	       will move the request to zombie response list.
+ *
+ * return value:
+ * 0: got the response from firmware for the sc request.
+ * errno -EINTR: user abort the command.
+ * errno -ETIME: user spefified timeout value has been expired.
+ * errno -EBUSY: the response of the request does not return in
+ *               resonable time (LIO_SC_MAX_TMO_MS).
+ *               the sc wll be move to zombie response list by
+ *               lio_process_ordered_list()
+ *
+ * A request with non-zero return value, the sc->caller_is_done
+ *  will be marked 1.
+ * When getting a request with zero return value, the requestor
+ *  should mark sc->caller_is_done with 1 after examing the
+ *  response of sc.
+ * lio_process_ordered_list() will free the soft command on behalf
+ * of the soft command requestor.
+ * This is to fix the possible race condition of both timeout process
+ * and lio_process_ordered_list()/callback function to free a
+ * sc strucutre.
+ */
 static inline int
-sleep_cond(wait_queue_head_t *wait_queue, int *condition)
+wait_for_sc_completion_timeout(struct octeon_device *oct_dev,
+			       struct octeon_soft_command *sc,
+			       unsigned long timeout)
 {
 	int errno = 0;
-	wait_queue_entry_t we;
+	long timeout_jiff;
 
-	init_waitqueue_entry(&we, current);
-	add_wait_queue(wait_queue, &we);
-	while (!(READ_ONCE(*condition))) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (signal_pending(current)) {
-			errno = -EINTR;
-			goto out;
-		}
-		schedule();
+	if (timeout)
+		timeout_jiff = msecs_to_jiffies(timeout);
+	else
+		timeout_jiff = MAX_SCHEDULE_TIMEOUT;
+
+	timeout_jiff =
+		wait_for_completion_interruptible_timeout(&sc->complete,
+							  timeout_jiff);
+	if (timeout_jiff == 0) {
+		dev_err(&oct_dev->pci_dev->dev, "%s: sc is timeout\n",
+			__func__);
+		WRITE_ONCE(sc->caller_is_done, true);
+		errno = -ETIME;
+	} else if (timeout_jiff == -ERESTARTSYS) {
+		dev_err(&oct_dev->pci_dev->dev, "%s: sc is interrupted\n",
+			__func__);
+		WRITE_ONCE(sc->caller_is_done, true);
+		errno = -EINTR;
+	} else  if (sc->sc_status == OCTEON_REQUEST_TIMEOUT) {
+		dev_err(&oct_dev->pci_dev->dev, "%s: sc has fatal timeout\n",
+			__func__);
+		WRITE_ONCE(sc->caller_is_done, true);
+		errno = -EBUSY;
 	}
-out:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(wait_queue, &we);
+
 	return errno;
 }
 
-/* Gives up the CPU for a timeout period.
- * Check that the condition is not true before we go to sleep for a
- * timeout period.
- */
-static inline void
-sleep_timeout_cond(wait_queue_head_t *wait_queue,
-		   int *condition,
-		   int timeout)
-{
-	wait_queue_entry_t we;
-
-	init_waitqueue_entry(&we, current);
-	add_wait_queue(wait_queue, &we);
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (!(*condition))
-		schedule_timeout(timeout);
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(wait_queue, &we);
-}
-
 #ifndef ROUNDUP4
 #define ROUNDUP4(val) (((val) + 3) & 0xfffffffc)
 #endif

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index d7a3916..50201fc 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h

@@ -35,12 +35,6 @@
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
 #define   LIO_IFSTATE_RESETTING		   0x10
 
-struct liquidio_if_cfg_context {
-	u32 octeon_id;
-	wait_queue_head_t wc;
-	int cond;
-};
-
 struct liquidio_if_cfg_resp {
 	u64 rh;
 	struct liquidio_if_cfg_info cfg_info;
@@ -48,6 +42,7 @@
 };
 
 #define LIO_IFCFG_WAIT_TIME    3000 /* In milli seconds */
+#define LIQUIDIO_NDEV_STATS_POLL_TIME_MS 200
 
 /* Structure of a node in list of gather components maintained by
  * NIC driver for each network device.
@@ -76,6 +71,12 @@
 	u64     status;
 };
 
+struct oct_nic_vf_stats_resp {
+	u64     rh;
+	u64	spoofmac_cnt;
+	u64     status;
+};
+
 struct oct_nic_stats_ctrl {
 	struct completion complete;
 	struct net_device *netdev;
@@ -83,16 +84,13 @@
 
 struct oct_nic_seapi_resp {
 	u64 rh;
-	u32 speed;
+	union {
+		u32 fec_setting;
+		u32 speed;
+	};
 	u64 status;
 };
 
-struct liquidio_nic_seapi_ctl_context {
-	int octeon_id;
-	u32 status;
-	struct completion complete;
-};
-
 /** LiquidIO per-interface network private data */
 struct lio {
 	/** State of the interface. Rx/Tx happens only in the RUNNING state.  */
@@ -178,7 +176,7 @@
 	struct cavium_wq	txq_status_wq;
 
 	/* work queue for  rxq oom status */
-	struct cavium_wq	rxq_status_wq;
+	struct cavium_wq rxq_status_wq[MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES];
 
 	/* work queue for  link status */
 	struct cavium_wq	link_status_wq;
@@ -187,6 +185,7 @@
 	struct cavium_wq	sync_octeon_time_wq;
 
 	int netdev_uc_count;
+	struct cavium_wk stats_wk;
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
@@ -225,7 +224,7 @@
 
 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
 
-int octnet_get_link_stats(struct net_device *netdev);
+void lio_fetch_stats(struct work_struct *work);
 
 int lio_wait_for_clean_oq(struct octeon_device *oct);
 /**
@@ -234,16 +233,14 @@
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
-void lio_if_cfg_callback(struct octeon_device *oct,
-			 u32 status __attribute__((unused)),
-			 void *buf);
-
 void lio_delete_glists(struct lio *lio);
 
 int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs);
 
 int liquidio_get_speed(struct lio *lio);
 int liquidio_set_speed(struct lio *lio, int speed);
+int liquidio_get_fec(struct lio *lio);
+int liquidio_set_fec(struct lio *lio, int on_off);
 
 /**
  * \brief Net device change_mtu

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 150609b..1a706f8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c

@@ -75,8 +75,7 @@
 	else
 		sc->cmd.cmd2.rptr =  sc->dmarptr;
 
-	sc->wait_time = 1000;
-	sc->timeout = jiffies + sc->wait_time;
+	sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
 
 	return sc;
 }
@@ -92,29 +91,6 @@
 				   ndata->reqtype);
 }
 
-static void octnet_link_ctrl_callback(struct octeon_device *oct,
-				      u32 status,
-				      void *sc_ptr)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)sc_ptr;
-	struct octnic_ctrl_pkt *nctrl;
-
-	nctrl = (struct octnic_ctrl_pkt *)sc->ctxptr;
-
-	/* Call the callback function if status is zero (meaning OK) or status
-	 * contains a firmware status code bigger than zero (meaning the
-	 * firmware is reporting an error).
-	 * If no response was expected, status is OK if the command was posted
-	 * successfully.
-	 */
-	if ((!status || status > FIRMWARE_STATUS_CODE(0)) && nctrl->cb_fn) {
-		nctrl->status = status;
-		nctrl->cb_fn(nctrl);
-	}
-
-	octeon_free_soft_command(oct, sc);
-}
-
 static inline struct octeon_soft_command
 *octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
 			  struct octnic_ctrl_pkt *nctrl)
@@ -127,17 +103,14 @@
 	uddsize = (u32)(nctrl->ncmd.s.more * 8);
 
 	datasize = OCTNET_CMD_SIZE + uddsize;
-	rdatasize = (nctrl->wait_time) ? 16 : 0;
+	rdatasize = 16;
 
 	sc = (struct octeon_soft_command *)
-		octeon_alloc_soft_command(oct, datasize, rdatasize,
-					  sizeof(struct octnic_ctrl_pkt));
+		octeon_alloc_soft_command(oct, datasize, rdatasize, 0);
 
 	if (!sc)
 		return NULL;
 
-	memcpy(sc->ctxptr, nctrl, sizeof(struct octnic_ctrl_pkt));
-
 	data = (u8 *)sc->virtdptr;
 
 	memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
@@ -154,9 +127,8 @@
 	octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_CMD,
 				    0, 0, 0);
 
-	sc->callback = octnet_link_ctrl_callback;
-	sc->callback_arg = sc;
-	sc->wait_time = nctrl->wait_time;
+	init_completion(&sc->complete);
+	sc->sc_status = OCTEON_REQUEST_PENDING;
 
 	return sc;
 }
@@ -199,5 +171,28 @@
 	}
 
 	spin_unlock_bh(&oct->cmd_resp_wqlock);
+
+	if (nctrl->ncmd.s.cmdgroup == 0) {
+		switch (nctrl->ncmd.s.cmd) {
+			/* caller holds lock, can not sleep */
+		case OCTNET_CMD_CHANGE_DEVFLAGS:
+		case OCTNET_CMD_SET_MULTI_LIST:
+		case OCTNET_CMD_SET_UC_LIST:
+			WRITE_ONCE(sc->caller_is_done, true);
+			return retval;
+		}
+	}
+
+	retval = wait_for_sc_completion_timeout(oct, sc, 0);
+	if (retval)
+		return (retval);
+
+	nctrl->sc_status = sc->sc_status;
+	retval = nctrl->sc_status;
+	if (nctrl->cb_fn)
+		nctrl->cb_fn(nctrl);
+
+	WRITE_ONCE(sc->caller_is_done, true);
+
 	return retval;
 }

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index de4130d..87dd6f8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h

@@ -52,20 +52,13 @@
 	/** Input queue to use to send this command. */
 	u64 iq_no;
 
-	/** Time to wait for Octeon software to respond to this control command.
-	 *  If wait_time is 0, OSI assumes no response is expected.
-	 */
-	size_t wait_time;
-
 	/** The network device that issued the control command. */
 	u64 netpndev;
 
 	/** Callback function called when the command has been fetched */
 	octnic_ctrl_pkt_cb_fn_t cb_fn;
 
-	u32 status;
-	u16 *response_code;
-	struct completion *completion;
+	u32 sc_status;
 };
 
 #define MAX_UDD_SIZE(nctrl) (sizeof((nctrl)->udd))

diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 8f746e1..6dd65f9 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c

@@ -123,6 +123,7 @@
 	iq->do_auto_flush = 1;
 	iq->db_timeout = (u32)conf->db_timeout;
 	atomic_set(&iq->instr_pending, 0);
+	iq->pkts_processed = 0;
 
 	/* Initialize the spinlock for this instruction queue */
 	spin_lock_init(&iq->lock);
@@ -217,15 +218,13 @@
 		return 0;
 	}
 	oct->instr_queue[iq_no] =
-	    vmalloc_node(sizeof(struct octeon_instr_queue), numa_node);
+	    vzalloc_node(sizeof(struct octeon_instr_queue), numa_node);
 	if (!oct->instr_queue[iq_no])
 		oct->instr_queue[iq_no] =
-		    vmalloc(sizeof(struct octeon_instr_queue));
+		    vzalloc(sizeof(struct octeon_instr_queue));
 	if (!oct->instr_queue[iq_no])
 		return 1;
 
-	memset(oct->instr_queue[iq_no], 0,
-	       sizeof(struct octeon_instr_queue));
 
 	oct->instr_queue[iq_no]->q_index = q_index;
 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
@@ -238,8 +237,10 @@
 	}
 
 	oct->num_iqs++;
-	if (oct->fn_list.enable_io_queues(oct))
+	if (oct->fn_list.enable_io_queues(oct)) {
+		octeon_delete_instr_queue(oct, iq_no);
 		return 1;
+	}
 
 	return 0;
 }
@@ -277,7 +278,6 @@
 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
 		writel(iq->fill_cnt, iq->doorbell_reg);
 		/* make sure doorbell write goes through */
-		mmiowb();
 		iq->fill_cnt = 0;
 		iq->last_db_time = jiffies;
 		return;
@@ -379,7 +379,6 @@
 	u32 inst_count = 0;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct octeon_soft_command *sc;
-	struct octeon_instr_irh *irh;
 	unsigned long flags;
 
 	while (old != iq->octeon_read_index) {
@@ -401,40 +400,21 @@
 		case REQTYPE_RESP_NET:
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
-
-			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
-				irh = (struct octeon_instr_irh *)
-					&sc->cmd.cmd3.irh;
-			else
-				irh = (struct octeon_instr_irh *)
-					&sc->cmd.cmd2.irh;
-			if (irh->rflag) {
-				/* We're expecting a response from Octeon.
-				 * It's up to lio_process_ordered_list() to
-				 * process  sc. Add sc to the ordered soft
-				 * command response list because we expect
-				 * a response from Octeon.
-				 */
-				spin_lock_irqsave
-					(&oct->response_list
-					 [OCTEON_ORDERED_SC_LIST].lock,
-					 flags);
-				atomic_inc(&oct->response_list
-					[OCTEON_ORDERED_SC_LIST].
-					pending_req_count);
-				list_add_tail(&sc->node, &oct->response_list
-					[OCTEON_ORDERED_SC_LIST].head);
-				spin_unlock_irqrestore
-					(&oct->response_list
-					 [OCTEON_ORDERED_SC_LIST].lock,
-					 flags);
-			} else {
-				if (sc->callback) {
-					/* This callback must not sleep */
-					sc->callback(oct, OCTEON_REQUEST_DONE,
-						     sc->callback_arg);
-				}
-			}
+			/* We're expecting a response from Octeon.
+			 * It's up to lio_process_ordered_list() to
+			 * process  sc. Add sc to the ordered soft
+			 * command response list because we expect
+			 * a response from Octeon.
+			 */
+			spin_lock_irqsave(&oct->response_list
+					  [OCTEON_ORDERED_SC_LIST].lock, flags);
+			atomic_inc(&oct->response_list
+				   [OCTEON_ORDERED_SC_LIST].pending_req_count);
+			list_add_tail(&sc->node, &oct->response_list
+				[OCTEON_ORDERED_SC_LIST].head);
+			spin_unlock_irqrestore(&oct->response_list
+					       [OCTEON_ORDERED_SC_LIST].lock,
+					       flags);
 			break;
 		default:
 			dev_err(&oct->pci_dev->dev,
@@ -459,7 +439,7 @@
 
 	if (atomic_read(&oct->response_list
 			[OCTEON_ORDERED_SC_LIST].pending_req_count))
-		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
+		queue_work(cwq->wq, &cwq->wk.work.work);
 
 	return inst_count;
 }
@@ -495,6 +475,7 @@
 				lio_process_iq_request_list(oct, iq, 0);
 
 		if (inst_processed) {
+			iq->pkts_processed += inst_processed;
 			atomic_sub(inst_processed, &iq->instr_pending);
 			iq->stats.instr_processed += inst_processed;
 		}
@@ -753,8 +734,7 @@
 		len = (u32)ih2->dlengsz;
 	}
 
-	if (sc->wait_time)
-		sc->timeout = jiffies + sc->wait_time;
+	sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
 
 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
 				    len, REQTYPE_SOFT_COMMAND));
@@ -789,11 +769,76 @@
 	return 0;
 }
 
+int octeon_free_sc_done_list(struct octeon_device *oct)
+{
+	struct octeon_response_list *done_sc_list, *zombie_sc_list;
+	struct octeon_soft_command *sc;
+	struct list_head *tmp, *tmp2;
+	spinlock_t *sc_lists_lock; /* lock for response_list */
+
+	done_sc_list = &oct->response_list[OCTEON_DONE_SC_LIST];
+	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
+
+	if (!atomic_read(&done_sc_list->pending_req_count))
+		return 0;
+
+	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
+
+	spin_lock_bh(sc_lists_lock);
+
+	list_for_each_safe(tmp, tmp2, &done_sc_list->head) {
+		sc = list_entry(tmp, struct octeon_soft_command, node);
+
+		if (READ_ONCE(sc->caller_is_done)) {
+			list_del(&sc->node);
+			atomic_dec(&done_sc_list->pending_req_count);
+
+			if (*sc->status_word == COMPLETION_WORD_INIT) {
+				/* timeout; move sc to zombie list */
+				list_add_tail(&sc->node, &zombie_sc_list->head);
+				atomic_inc(&zombie_sc_list->pending_req_count);
+			} else {
+				octeon_free_soft_command(oct, sc);
+			}
+		}
+	}
+
+	spin_unlock_bh(sc_lists_lock);
+
+	return 0;
+}
+
+int octeon_free_sc_zombie_list(struct octeon_device *oct)
+{
+	struct octeon_response_list *zombie_sc_list;
+	struct octeon_soft_command *sc;
+	struct list_head *tmp, *tmp2;
+	spinlock_t *sc_lists_lock; /* lock for response_list */
+
+	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
+	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
+
+	spin_lock_bh(sc_lists_lock);
+
+	list_for_each_safe(tmp, tmp2, &zombie_sc_list->head) {
+		list_del(tmp);
+		atomic_dec(&zombie_sc_list->pending_req_count);
+		sc = list_entry(tmp, struct octeon_soft_command, node);
+		octeon_free_soft_command(oct, sc);
+	}
+
+	spin_unlock_bh(sc_lists_lock);
+
+	return 0;
+}
+
 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
 {
 	struct list_head *tmp, *tmp2;
 	struct octeon_soft_command *sc;
 
+	octeon_free_sc_zombie_list(oct);
+
 	spin_lock_bh(&oct->sc_buf_pool.lock);
 
 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
@@ -822,6 +867,9 @@
 	struct octeon_soft_command *sc = NULL;
 	struct list_head *tmp;
 
+	if (!rdatasize)
+		rdatasize = 16;
+
 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
 	       SOFT_COMMAND_BUFFER_SIZE);
 

diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index fe5b537..ac7747c 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c

@@ -69,6 +69,8 @@
 	u32 status;
 	u64 status64;
 
+	octeon_free_sc_done_list(octeon_dev);
+
 	ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST];
 
 	do {
@@ -111,26 +113,88 @@
 					}
 				}
 			}
-		} else if (force_quit || (sc->timeout &&
-			time_after(jiffies, (unsigned long)sc->timeout))) {
-			dev_err(&octeon_dev->pci_dev->dev, "%s: cmd failed, timeout (%ld, %ld)\n",
-				__func__, (long)jiffies, (long)sc->timeout);
+		} else if (unlikely(force_quit) || (sc->expiry_time &&
+			time_after(jiffies, (unsigned long)sc->expiry_time))) {
+			struct octeon_instr_irh *irh =
+				(struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+
+			dev_err(&octeon_dev->pci_dev->dev, "%s: ", __func__);
+			dev_err(&octeon_dev->pci_dev->dev,
+				"cmd %x/%x/%llx/%llx failed, ",
+				irh->opcode, irh->subcode,
+				sc->cmd.cmd3.ossp[0], sc->cmd.cmd3.ossp[1]);
+			dev_err(&octeon_dev->pci_dev->dev,
+				"timeout (%ld, %ld)\n",
+				(long)jiffies, (long)sc->expiry_time);
 			status = OCTEON_REQUEST_TIMEOUT;
 		}
 
 		if (status != OCTEON_REQUEST_PENDING) {
+			sc->sc_status = status;
+
 			/* we have received a response or we have timed out */
 			/* remove node from linked list */
 			list_del(&sc->node);
 			atomic_dec(&octeon_dev->response_list
-					  [OCTEON_ORDERED_SC_LIST].
-					  pending_req_count);
-			spin_unlock_bh
-			    (&ordered_sc_list->lock);
+				   [OCTEON_ORDERED_SC_LIST].
+				   pending_req_count);
 
-			if (sc->callback)
+			if (!sc->callback) {
+				atomic_inc(&octeon_dev->response_list
+					   [OCTEON_DONE_SC_LIST].
+					   pending_req_count);
+				list_add_tail(&sc->node,
+					      &octeon_dev->response_list
+					      [OCTEON_DONE_SC_LIST].head);
+
+				if (unlikely(READ_ONCE(sc->caller_is_done))) {
+					/* caller does not wait for response
+					 * from firmware
+					 */
+					if (status != OCTEON_REQUEST_DONE) {
+						struct octeon_instr_irh *irh;
+
+						irh =
+						    (struct octeon_instr_irh *)
+						    &sc->cmd.cmd3.irh;
+						dev_dbg
+						    (&octeon_dev->pci_dev->dev,
+						    "%s: sc failed: opcode=%x, ",
+						    __func__, irh->opcode);
+						dev_dbg
+						    (&octeon_dev->pci_dev->dev,
+						    "subcode=%x, ossp[0]=%llx, ",
+						    irh->subcode,
+						    sc->cmd.cmd3.ossp[0]);
+						dev_dbg
+						    (&octeon_dev->pci_dev->dev,
+						    "ossp[1]=%llx, status=%d\n",
+						    sc->cmd.cmd3.ossp[1],
+						    status);
+					}
+				} else {
+					complete(&sc->complete);
+				}
+
+				spin_unlock_bh(&ordered_sc_list->lock);
+			} else {
+				/* sc with callback function */
+				if (status == OCTEON_REQUEST_TIMEOUT) {
+					atomic_inc(&octeon_dev->response_list
+						   [OCTEON_ZOMBIE_SC_LIST].
+						   pending_req_count);
+					list_add_tail(&sc->node,
+						      &octeon_dev->response_list
+						      [OCTEON_ZOMBIE_SC_LIST].
+						      head);
+				}
+
+				spin_unlock_bh(&ordered_sc_list->lock);
+
 				sc->callback(octeon_dev, status,
 					     sc->callback_arg);
+				/* sc is freed by caller */
+			}
 
 			request_complete++;
 

diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.h b/drivers/net/ethernet/cavium/liquidio/response_manager.h
index 9169c28..ed4020d 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.h
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.h

@@ -53,7 +53,9 @@
 	OCTEON_ORDERED_LIST = 0,
 	OCTEON_UNORDERED_NONBLOCKING_LIST = 1,
 	OCTEON_UNORDERED_BLOCKING_LIST = 2,
-	OCTEON_ORDERED_SC_LIST = 3
+	OCTEON_ORDERED_SC_LIST = 3,
+	OCTEON_DONE_SC_LIST = 4,
+	OCTEON_ZOMBIE_SC_LIST = 5
 };
 
 /** Response Order values for a Octeon Request. */

diff --git a/drivers/net/ethernet/cavium/octeon/Makefile b/drivers/net/ethernet/cavium/octeon/Makefile
index efa41c1..4f5098f 100644
--- a/drivers/net/ethernet/cavium/octeon/Makefile
+++ b/drivers/net/ethernet/cavium/octeon/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Cavium network device drivers.
 #

diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index bb43ddb..cdd7e5d 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c

@@ -1080,8 +1080,11 @@
 	/* Set the mode of the interface, RGMII/MII. */
 	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && netdev->phydev) {
 		union cvmx_agl_prtx_ctl agl_prtx_ctl;
-		int rgmii_mode = (netdev->phydev->supported &
-				  (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) != 0;
+		int rgmii_mode =
+			(linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+					   netdev->phydev->supported) |
+			 linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+					   netdev->phydev->supported)) != 0;
 
 		agl_prtx_ctl.u64 = cvmx_read_csr(p->agl_prt_ctl);
 		agl_prtx_ctl.s.mode = rgmii_mode ? 0 : 1;
@@ -1268,12 +1271,13 @@
 	return 0;
 }
 
-static int octeon_mgmt_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+octeon_mgmt_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct octeon_mgmt *p = netdev_priv(netdev);
 	union mgmt_port_ring_entry re;
 	unsigned long flags;
-	int rv = NETDEV_TX_BUSY;
+	netdev_tx_t rv = NETDEV_TX_BUSY;
 
 	re.d64 = 0;
 	re.s.tstamp = ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) != 0);
@@ -1495,12 +1499,12 @@
 	netdev->ethtool_ops = &octeon_mgmt_ethtool_ops;
 
 	netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM;
-	netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM;
+	netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN;
 
 	mac = of_get_mac_address(pdev->dev.of_node);
 
-	if (mac)
-		memcpy(netdev->dev_addr, mac, ETH_ALEN);
+	if (!IS_ERR(mac))
+		ether_addr_copy(netdev->dev_addr, mac);
 	else
 		eth_hw_addr_random(netdev);
 

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index f4d8176..090d6b8 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef NIC_H
@@ -271,7 +268,7 @@
 };
 
 struct nicvf_work {
-	struct delayed_work    work;
+	struct work_struct     work;
 	u8                     mode;
 	struct xcast_addr_list *mc;
 };
@@ -327,7 +324,11 @@
 	struct nicvf_work       rx_mode_work;
 	/* spinlock to protect workqueue arguments from concurrent access */
 	spinlock_t              rx_mode_wq_lock;
-
+	/* workqueue for handling kernel ndo_set_rx_mode() calls */
+	struct workqueue_struct *nicvf_rx_mode_wq;
+	/* mutex to protect VF's mailbox contents from concurrent access */
+	struct mutex            rx_mode_mtx;
+	struct delayed_work	link_change_work;
 	/* PTP timestamp */
 	struct cavium_ptp	*ptp_clock;
 	/* Inbound timestamping is on */
@@ -575,10 +576,8 @@
 
 struct xcast {
 	u8    msg;
-	union {
-		u8    mode;
-		u64   mac;
-	} data;
+	u8    mode;
+	u64   mac:48;
 };
 
 /* 128 bit shared memory between PF and each VF */

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6c8dcb6..9361f96 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -57,14 +54,8 @@
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
 	u8			*vf_lmac_map;
-	struct delayed_work     dwork;
-	struct workqueue_struct *check_link;
-	u8			*link;
-	u8			*duplex;
-	u32			*speed;
 	u16			cpi_base[MAX_NUM_VFS_SUPPORTED];
 	u16			rssi_base[MAX_NUM_VFS_SUPPORTED];
-	bool			mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
 	/* MSI-X */
 	u8			num_vec;
@@ -929,6 +920,35 @@
 	nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
 }
 
+/* Get BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
+static void nic_link_status_get(struct nicpf *nic, u8 vf)
+{
+	union nic_mbx mbx = {};
+	struct bgx_link_status link;
+	u8 bgx, lmac;
+
+	mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+	/* Get BGX, LMAC indices for the VF */
+	bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+	lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+	/* Get interface link status */
+	bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+	/* Send a mbox message to VF with current link status */
+	mbx.link_status.link_up = link.link_up;
+	mbx.link_status.duplex = link.duplex;
+	mbx.link_status.speed = link.speed;
+	mbx.link_status.mac_type = link.mac_type;
+
+	/* reply with link status */
+	nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
 /* Interrupt handler to handle mailbox messages from VFs */
 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 {
@@ -941,8 +961,6 @@
 	int i;
 	int ret = 0;
 
-	nic->mbx_lock[vf] = true;
-
 	mbx_addr = nic_get_mbx_addr(vf);
 	mbx_data = (u64 *)&mbx;
 
@@ -957,12 +975,7 @@
 	switch (mbx.msg.msg) {
 	case NIC_MBOX_MSG_READY:
 		nic_mbx_send_ready(nic, vf);
-		if (vf < nic->num_vf_en) {
-			nic->link[vf] = 0;
-			nic->duplex[vf] = 0;
-			nic->speed[vf] = 0;
-		}
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_QS_CFG:
 		reg_addr = NIC_PF_QSET_0_127_CFG |
 			   (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1031,7 +1044,7 @@
 		break;
 	case NIC_MBOX_MSG_RSS_SIZE:
 		nic_send_rss_size(nic, vf);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_RSS_CFG:
 	case NIC_MBOX_MSG_RSS_CFG_CONT:
 		nic_config_rss(nic, &mbx.rss_cfg);
@@ -1039,7 +1052,7 @@
 	case NIC_MBOX_MSG_CFG_DONE:
 		/* Last message of VF config msg sequence */
 		nic_enable_vf(nic, vf, true);
-		goto unlock;
+		break;
 	case NIC_MBOX_MSG_SHUTDOWN:
 		/* First msg in VF teardown sequence */
 		if (vf >= nic->num_vf_en)
@@ -1049,19 +1062,19 @@
 		break;
 	case NIC_MBOX_MSG_ALLOC_SQS:
 		nic_alloc_sqs(nic, &mbx.sqs_alloc);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_NICVF_PTR:
 		nic->nicvf[vf] = mbx.nicvf.nicvf;
 		break;
 	case NIC_MBOX_MSG_PNICVF_PTR:
 		nic_send_pnicvf(nic, vf);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_SNICVF_PTR:
 		nic_send_snicvf(nic, &mbx.nicvf);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_BGX_STATS:
 		nic_get_bgx_stats(nic, &mbx.bgx_stats);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_LOOPBACK:
 		ret = nic_config_loopback(nic, &mbx.lbk);
 		break;
@@ -1070,7 +1083,7 @@
 		break;
 	case NIC_MBOX_MSG_PFC:
 		nic_pause_frame(nic, vf, &mbx.pfc);
-		goto unlock;
+		return;
 	case NIC_MBOX_MSG_PTP_CFG:
 		nic_config_timestamp(nic, vf, &mbx.ptp);
 		break;
@@ -1094,7 +1107,7 @@
 		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 		bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
-					mbx.xcast.data.mac,
+					mbx.xcast.mac,
 					vf < NIC_VF_PER_MBX_REG ? vf :
 					vf - NIC_VF_PER_MBX_REG);
 		break;
@@ -1106,8 +1119,15 @@
 		}
 		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-		bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+		bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
 		break;
+	case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
+			break;
+		}
+		nic_link_status_get(nic, vf);
+		return;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1121,8 +1141,6 @@
 			mbx.msg.msg, vf);
 		nic_mbx_send_nack(nic, vf);
 	}
-unlock:
-	nic->mbx_lock[vf] = false;
 }
 
 static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1270,52 +1288,6 @@
 	return 0;
 }
 
-/* Poll for BGX LMAC link status and update corresponding VF
- * if there is a change, valid only if internal L2 switch
- * is not present otherwise VF link is always treated as up
- */
-static void nic_poll_for_link(struct work_struct *work)
-{
-	union nic_mbx mbx = {};
-	struct nicpf *nic;
-	struct bgx_link_status link;
-	u8 vf, bgx, lmac;
-
-	nic = container_of(work, struct nicpf, dwork.work);
-
-	mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
-
-	for (vf = 0; vf < nic->num_vf_en; vf++) {
-		/* Poll only if VF is UP */
-		if (!nic->vf_enabled[vf])
-			continue;
-
-		/* Get BGX, LMAC indices for the VF */
-		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-		/* Get interface link status */
-		bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
-
-		/* Inform VF only if link status changed */
-		if (nic->link[vf] == link.link_up)
-			continue;
-
-		if (!nic->mbx_lock[vf]) {
-			nic->link[vf] = link.link_up;
-			nic->duplex[vf] = link.duplex;
-			nic->speed[vf] = link.speed;
-
-			/* Send a mbox message to VF with current link status */
-			mbx.link_status.link_up = link.link_up;
-			mbx.link_status.duplex = link.duplex;
-			mbx.link_status.speed = link.speed;
-			mbx.link_status.mac_type = link.mac_type;
-			nic_send_msg_to_vf(nic, vf, &mbx);
-		}
-	}
-	queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
-}
-
 static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
@@ -1384,18 +1356,6 @@
 	if (!nic->vf_lmac_map)
 		goto err_release_regions;
 
-	nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-	if (!nic->link)
-		goto err_release_regions;
-
-	nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-	if (!nic->duplex)
-		goto err_release_regions;
-
-	nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
-	if (!nic->speed)
-		goto err_release_regions;
-
 	/* Initialize hardware */
 	nic_init_hw(nic);
 
@@ -1411,22 +1371,8 @@
 	if (err)
 		goto err_unregister_interrupts;
 
-	/* Register a physical link status poll fn() */
-	nic->check_link = alloc_workqueue("check_link_status",
-					  WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
-	if (!nic->check_link) {
-		err = -ENOMEM;
-		goto err_disable_sriov;
-	}
-
-	INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
-	queue_delayed_work(nic->check_link, &nic->dwork, 0);
-
 	return 0;
 
-err_disable_sriov:
-	if (nic->flags & NIC_SRIOV_ENABLED)
-		pci_disable_sriov(pdev);
 err_unregister_interrupts:
 	nic_unregister_interrupts(nic);
 err_release_regions:
@@ -1447,12 +1393,6 @@
 	if (nic->flags & NIC_SRIOV_ENABLED)
 		pci_disable_sriov(pdev);
 
-	if (nic->check_link) {
-		/* Destroy work Queue */
-		cancel_delayed_work_sync(&nic->dwork);
-		destroy_workqueue(nic->check_link);
-	}
-
 	nic_unregister_interrupts(nic);
 	pci_release_regions(pdev);
 

diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index a16c48a..b3bd24f 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef NIC_REG_H

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 92ba958..5e0b16b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 /* ETHTOOL Support for VNIC_VF Device*/

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 88f8a8f..40a44dc 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -32,6 +29,13 @@
 #define DRV_NAME	"nicvf"
 #define DRV_VERSION	"1.0"
 
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU	(1530 - ETH_HLEN - VLAN_HLEN * 2)
+
 /* Supported devices */
 static const struct pci_device_id nicvf_id_table[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
@@ -68,9 +72,6 @@
 MODULE_PARM_DESC(cpi_alg,
 		 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
-/* workqueue for handling kernel ndo_set_rx_mode() calls */
-static struct workqueue_struct *nicvf_rx_mode_wq;
-
 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 {
 	if (nic->sqs_mode)
@@ -127,6 +128,9 @@
 {
 	int timeout = NIC_MBOX_MSG_TIMEOUT;
 	int sleep = 10;
+	int ret = 0;
+
+	mutex_lock(&nic->rx_mode_mtx);
 
 	nic->pf_acked = false;
 	nic->pf_nacked = false;
@@ -139,7 +143,8 @@
 			netdev_err(nic->netdev,
 				   "PF NACK to mbox msg 0x%02x from VF%d\n",
 				   (mbx->msg.msg & 0xFF), nic->vf_id);
-			return -EINVAL;
+			ret = -EINVAL;
+			break;
 		}
 		msleep(sleep);
 		if (nic->pf_acked)
@@ -149,10 +154,12 @@
 			netdev_err(nic->netdev,
 				   "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
 				   (mbx->msg.msg & 0xFF), nic->vf_id);
-			return -EBUSY;
+			ret = -EBUSY;
+			break;
 		}
 	}
-	return 0;
+	mutex_unlock(&nic->rx_mode_mtx);
+	return ret;
 }
 
 /* Checks if VF is able to comminicate with PF
@@ -172,6 +179,17 @@
 	return 1;
 }
 
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+	union nic_mbx mbx = {};
+
+	mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+	if (nicvf_send_msg_to_pf(nic, &mbx)) {
+		netdev_err(nic->netdev,
+			   "PF didn't respond to CFG DONE msg\n");
+	}
+}
+
 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
 {
 	if (bgx->rx)
@@ -228,21 +246,24 @@
 		break;
 	case NIC_MBOX_MSG_BGX_LINK_CHANGE:
 		nic->pf_acked = true;
-		nic->link_up = mbx.link_status.link_up;
-		nic->duplex = mbx.link_status.duplex;
-		nic->speed = mbx.link_status.speed;
-		nic->mac_type = mbx.link_status.mac_type;
-		if (nic->link_up) {
-			netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
-				    nic->speed,
-				    nic->duplex == DUPLEX_FULL ?
-				    "Full" : "Half");
-			netif_carrier_on(nic->netdev);
-			netif_tx_start_all_queues(nic->netdev);
-		} else {
-			netdev_info(nic->netdev, "Link is Down\n");
-			netif_carrier_off(nic->netdev);
-			netif_tx_stop_all_queues(nic->netdev);
+		if (nic->link_up != mbx.link_status.link_up) {
+			nic->link_up = mbx.link_status.link_up;
+			nic->duplex = mbx.link_status.duplex;
+			nic->speed = mbx.link_status.speed;
+			nic->mac_type = mbx.link_status.mac_type;
+			if (nic->link_up) {
+				netdev_info(nic->netdev,
+					    "Link is Up %d Mbps %s duplex\n",
+					    nic->speed,
+					    nic->duplex == DUPLEX_FULL ?
+					    "Full" : "Half");
+				netif_carrier_on(nic->netdev);
+				netif_tx_start_all_queues(nic->netdev);
+			} else {
+				netdev_info(nic->netdev, "Link is Down\n");
+				netif_carrier_off(nic->netdev);
+				netif_tx_stop_all_queues(nic->netdev);
+			}
 		}
 		break;
 	case NIC_MBOX_MSG_ALLOC_SQS:
@@ -1311,6 +1332,12 @@
 	struct nicvf_cq_poll *cq_poll = NULL;
 	union nic_mbx mbx = {};
 
+	/* wait till all queued set_rx_mode tasks completes */
+	if (nic->nicvf_rx_mode_wq) {
+		cancel_delayed_work_sync(&nic->link_change_work);
+		drain_workqueue(nic->nicvf_rx_mode_wq);
+	}
+
 	mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
 	nicvf_send_msg_to_pf(nic, &mbx);
 
@@ -1410,13 +1437,28 @@
 	return nicvf_send_msg_to_pf(nic, &mbx);
 }
 
+static void nicvf_link_status_check_task(struct work_struct *work_arg)
+{
+	struct nicvf *nic = container_of(work_arg,
+					 struct nicvf,
+					 link_change_work.work);
+	union nic_mbx mbx = {};
+	mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+	nicvf_send_msg_to_pf(nic, &mbx);
+	queue_delayed_work(nic->nicvf_rx_mode_wq,
+			   &nic->link_change_work, 2 * HZ);
+}
+
 int nicvf_open(struct net_device *netdev)
 {
 	int cpu, err, qidx;
 	struct nicvf *nic = netdev_priv(netdev);
 	struct queue_set *qs = nic->qs;
 	struct nicvf_cq_poll *cq_poll = NULL;
-	union nic_mbx mbx = {};
+
+	/* wait till all queued set_rx_mode tasks completes if any */
+	if (nic->nicvf_rx_mode_wq)
+		drain_workqueue(nic->nicvf_rx_mode_wq);
 
 	netif_carrier_off(netdev);
 
@@ -1512,8 +1554,14 @@
 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
 
 	/* Send VF config done msg to PF */
-	mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
-	nicvf_write_to_mbx(nic, &mbx);
+	nicvf_send_cfg_done(nic);
+
+	if (nic->nicvf_rx_mode_wq) {
+		INIT_DELAYED_WORK(&nic->link_change_work,
+				  nicvf_link_status_check_task);
+		queue_delayed_work(nic->nicvf_rx_mode_wq,
+				   &nic->link_change_work, 0);
+	}
 
 	return 0;
 cleanup:
@@ -1538,6 +1586,15 @@
 	struct nicvf *nic = netdev_priv(netdev);
 	int orig_mtu = netdev->mtu;
 
+	/* For now just support only the usual MTU sized frames,
+	 * plus some headroom for VLAN, QinQ.
+	 */
+	if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+		netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+			    netdev->mtu);
+		return -EINVAL;
+	}
+
 	netdev->mtu = new_mtu;
 
 	if (!netif_running(netdev))
@@ -1786,8 +1843,10 @@
 	bool bpf_attached = false;
 	int ret = 0;
 
-	/* For now just support only the usual MTU sized frames */
-	if (prog && (dev->mtu > 1500)) {
+	/* For now just support only the usual MTU sized frames,
+	 * plus some headroom for VLAN, QinQ.
+	 */
+	if (prog && dev->mtu > MAX_XDP_MTU) {
 		netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
 			    dev->mtu);
 		return -EOPNOTSUPP;
@@ -1941,15 +2000,17 @@
 
 	/* flush DMAC filters and reset RX mode */
 	mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
-	nicvf_send_msg_to_pf(nic, &mbx);
+	if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+		goto free_mc;
 
 	if (mode & BGX_XCAST_MCAST_FILTER) {
 		/* once enabling filtering, we need to signal to PF to add
 		 * its' own LMAC to the filter to accept packets for it.
 		 */
 		mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-		mbx.xcast.data.mac = 0;
-		nicvf_send_msg_to_pf(nic, &mbx);
+		mbx.xcast.mac = 0;
+		if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+			goto free_mc;
 	}
 
 	/* check if we have any specific MACs to be added to PF DMAC filter */
@@ -1957,23 +2018,25 @@
 		/* now go through kernel list of MACs and add them one by one */
 		for (idx = 0; idx < mc_addrs->count; idx++) {
 			mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-			mbx.xcast.data.mac = mc_addrs->mc[idx];
-			nicvf_send_msg_to_pf(nic, &mbx);
+			mbx.xcast.mac = mc_addrs->mc[idx];
+			if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+				goto free_mc;
 		}
-		kfree(mc_addrs);
 	}
 
 	/* and finally set rx mode for PF accordingly */
 	mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
-	mbx.xcast.data.mode = mode;
+	mbx.xcast.mode = mode;
 
 	nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+	kfree(mc_addrs);
 }
 
 static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 {
 	struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
-						  work.work);
+						  work);
 	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
 	u8 mode;
 	struct xcast_addr_list *mc;
@@ -2030,7 +2093,7 @@
 	kfree(nic->rx_mode_work.mc);
 	nic->rx_mode_work.mc = mc_list;
 	nic->rx_mode_work.mode = mode;
-	queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+	queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
 	spin_unlock(&nic->rx_mode_wq_lock);
 }
 
@@ -2187,8 +2250,18 @@
 
 	INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
-	INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+	nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
+							WQ_MEM_RECLAIM,
+							nic->vf_id);
+	if (!nic->nicvf_rx_mode_wq) {
+		err = -ENOMEM;
+		dev_err(dev, "Failed to allocate work queue\n");
+		goto err_unregister_interrupts;
+	}
+
+	INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
 	spin_lock_init(&nic->rx_mode_wq_lock);
+	mutex_init(&nic->rx_mode_mtx);
 
 	err = register_netdev(netdev);
 	if (err) {
@@ -2228,13 +2301,15 @@
 	nic = netdev_priv(netdev);
 	pnetdev = nic->pnicvf->netdev;
 
-	cancel_delayed_work_sync(&nic->rx_mode_work.work);
-
 	/* Check if this Qset is assigned to different VF.
 	 * If yes, clean primary and all secondary Qsets.
 	 */
 	if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
 		unregister_netdev(pnetdev);
+	if (nic->nicvf_rx_mode_wq) {
+		destroy_workqueue(nic->nicvf_rx_mode_wq);
+		nic->nicvf_rx_mode_wq = NULL;
+	}
 	nicvf_unregister_interrupts(nic);
 	pci_set_drvdata(pdev, NULL);
 	if (nic->drv_stats)
@@ -2261,17 +2336,11 @@
 static int __init nicvf_init_module(void)
 {
 	pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-	nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
-						   WQ_MEM_RECLAIM);
 	return pci_register_driver(&nicvf_driver);
 }
 
 static void __exit nicvf_cleanup_module(void)
 {
-	if (nicvf_rx_mode_wq) {
-		destroy_workqueue(nicvf_rx_mode_wq);
-		nicvf_rx_mode_wq = NULL;
-	}
 	pci_unregister_driver(&nicvf_driver);
 }
 

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index fcaf18f..4ab57d3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #include <linux/pci.h>
@@ -59,7 +56,7 @@
 	dmem->q_len = q_len;
 	dmem->size = (desc_size * q_len) + align_bytes;
 	/* Save address, need it while freeing */
-	dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
+	dmem->unalign_base = dma_alloc_coherent(&nic->pdev->dev, dmem->size,
 						&dmem->dma, GFP_KERNEL);
 	if (!dmem->unalign_base)
 		return -ENOMEM;
@@ -105,20 +102,19 @@
 	/* Check if page can be recycled */
 	if (page) {
 		ref_count = page_ref_count(page);
-		/* Check if this page has been used once i.e 'put_page'
-		 * called after packet transmission i.e internal ref_count
-		 * and page's ref_count are equal i.e page can be recycled.
+		/* This page can be recycled if internal ref_count and page's
+		 * ref_count are equal, indicating that the page has been used
+		 * once for packet transmission. For non-XDP mode, internal
+		 * ref_count is always '1'.
 		 */
-		if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
-			pgcache->ref_count--;
-		else
+		if (rbdr->is_xdp) {
+			if (ref_count == pgcache->ref_count)
+				pgcache->ref_count--;
+			else
+				page = NULL;
+		} else if (ref_count != 1) {
 			page = NULL;
-
-		/* In non-XDP mode, page's ref_count needs to be '1' for it
-		 * to be recycled.
-		 */
-		if (!rbdr->is_xdp && (ref_count != 1))
-			page = NULL;
+		}
 	}
 
 	if (!page) {
@@ -365,11 +361,10 @@
 	while (head < rbdr->pgcnt) {
 		pgcache = &rbdr->pgcache[head];
 		if (pgcache->page && page_ref_count(pgcache->page) != 0) {
-			if (!rbdr->is_xdp) {
-				put_page(pgcache->page);
-				continue;
+			if (rbdr->is_xdp) {
+				page_ref_sub(pgcache->page,
+					     pgcache->ref_count - 1);
 			}
-			page_ref_sub(pgcache->page, pgcache->ref_count - 1);
 			put_page(pgcache->page);
 		}
 		head++;
@@ -1593,15 +1588,13 @@
 		goto doorbell;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		const struct skb_frag_struct *frag;
-
-		frag = &skb_shinfo(skb)->frags[i];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
 		size = skb_frag_size(frag);
 		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
 					      skb_frag_page(frag),
-					      frag->page_offset, size,
+					      skb_frag_off(frag), size,
 					      DMA_TO_DEVICE,
 					      DMA_ATTR_SKIP_CPU_SYNC);
 		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 5e9a03c..bc2427c 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef NICVF_QUEUES_H

diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h
index e47205a..0df115d 100644
--- a/drivers/net/ethernet/cavium/thunder/q_struct.h
+++ b/drivers/net/ethernet/cavium/thunder/q_struct.h

@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * This file contains HW queue descriptor formats, config register
  * structures etc
  *
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef Q_STRUCT_H

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index e337da6..acb0168 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #include <linux/acpi.h>
@@ -962,13 +959,13 @@
 	lmac->last_duplex = (an_result >> 1) & 0x1;
 	switch (speed) {
 	case 0:
-		lmac->last_speed = 10;
+		lmac->last_speed = SPEED_10;
 		break;
 	case 1:
-		lmac->last_speed = 100;
+		lmac->last_speed = SPEED_100;
 		break;
 	case 2:
-		lmac->last_speed = 1000;
+		lmac->last_speed = SPEED_1000;
 		break;
 	default:
 		lmac->link_up = false;
@@ -1012,10 +1009,10 @@
 	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
 		if (lmac->lmac_type == BGX_MODE_XLAUI)
-			lmac->last_speed = 40000;
+			lmac->last_speed = SPEED_40000;
 		else
-			lmac->last_speed = 10000;
-		lmac->last_duplex = 1;
+			lmac->last_speed = SPEED_10000;
+		lmac->last_duplex = DUPLEX_FULL;
 	} else {
 		lmac->link_up = 0;
 		lmac->last_speed = SPEED_UNKNOWN;
@@ -1105,8 +1102,8 @@
 			} else {
 				/* Default to below link speed and duplex */
 				lmac->link_up = true;
-				lmac->last_speed = 1000;
-				lmac->last_duplex = 1;
+				lmac->last_speed = SPEED_1000;
+				lmac->last_duplex = DUPLEX_FULL;
 				bgx_sgmii_change_link_state(lmac);
 				return 0;
 			}
@@ -1217,7 +1214,7 @@
 
 	/* Disable MAC steering (NCSI traffic) */
 	for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
-		bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+		bgx_reg_write(bgx, 0, BGX_CMR_RX_STEERING + (i * 8), 0x00);
 }
 
 static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
@@ -1384,24 +1381,18 @@
 				u8 *dst)
 {
 	u8 mac[ETH_ALEN];
-	int ret;
+	u8 *addr;
 
-	ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
-					    "mac-address", mac, ETH_ALEN);
-	if (ret)
-		goto out;
-
-	if (!is_valid_ether_addr(mac)) {
+	addr = fwnode_get_mac_address(acpi_fwnode_handle(adev), mac, ETH_ALEN);
+	if (!addr) {
 		dev_err(dev, "MAC address invalid: %pM\n", mac);
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	dev_info(dev, "MAC address set to: %pM\n", mac);
 
-	memcpy(dst, mac, ETH_ALEN);
-out:
-	return ret;
+	ether_addr_copy(dst, mac);
+	return 0;
 }
 
 /* Currently only sets the MAC address. */
@@ -1484,7 +1475,7 @@
 			break;
 
 		mac = of_get_mac_address(node);
-		if (mac)
+		if (!IS_ERR(mac))
 			ether_addr_copy(bgx->lmac[lmac].mac, mac);
 
 		SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev);

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index cbdd20b..2588870 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef THUNDER_BGX_H
@@ -60,7 +57,7 @@
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)			(((u64)x) << 49)
 #define  RX_DMAC_COUNT				32
-#define BGX_CMR_RX_STREERING		0x300
+#define BGX_CMR_RX_STEERING		0x300
 #define  RX_TRAFFIC_STEER_RULE_COUNT		8
 #define BGX_CMR_CHAN_MSK_AND		0x450
 #define BGX_CMR_BIST_STATUS		0x460

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
index 2d5e8da..3ebb937 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2016 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #include <linux/acpi.h>

diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index e2cdfa7..9909bfd 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Chelsio device configuration
 #
@@ -24,7 +25,8 @@
 	---help---
 	  This driver supports Chelsio gigabit and 10-gigabit
 	  Ethernet cards. More information about adapter features and
-	  performance tuning is in <file:Documentation/networking/cxgb.txt>.
+	  performance tuning is in
+	  <file:Documentation/networking/device_drivers/chelsio/cxgb.txt>.
 
 	  For general information about Chelsio and our products, visit
 	  our website at <http://www.chelsio.com>.

diff --git a/drivers/net/ethernet/chelsio/cxgb/Makefile b/drivers/net/ethernet/chelsio/cxgb/Makefile
index 57a4b26..8008282 100644
--- a/drivers/net/ethernet/chelsio/cxgb/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Chelsio T1 driver
 #

diff --git a/drivers/net/ethernet/chelsio/cxgb/my3126.c b/drivers/net/ethernet/chelsio/cxgb/my3126.c
index 20c09cc..60aa45b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/my3126.c
+++ b/drivers/net/ethernet/chelsio/cxgb/my3126.c

@@ -94,7 +94,7 @@
 	return cphy_cause_link_change;
 }
 
-static void my3216_poll(struct work_struct *work)
+static void my3126_poll(struct work_struct *work)
 {
 	struct cphy *cphy = container_of(work, struct cphy, phy_update.work);
 
@@ -177,7 +177,7 @@
 		return NULL;
 
 	cphy_init(cphy, dev, phy_addr, &my3126_ops, mdio_ops);
-	INIT_DELAYED_WORK(&cphy->phy_update, my3216_poll);
+	INIT_DELAYED_WORK(&cphy->phy_update, my3126_poll);
 	cphy->bmsr = 0;
 
 	return cphy;

diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 30de26e..47b5c8e 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c

@@ -585,8 +585,7 @@
 		sizeof(struct cpl_rx_data) +
 		sge->freelQ[!sge->jumbo_fl].dma_offset;
 
-		size = (16 * 1024) -
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	size = (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	sge->freelQ[sge->jumbo_fl].rx_buffer_size = size;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb3/Makefile b/drivers/net/ethernet/chelsio/cxgb3/Makefile
index 29aff78..f65f0d9 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb3/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Chelsio T3 driver
 #

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index c34ea38..58f89f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c

@@ -33,7 +33,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
@@ -3270,7 +3269,7 @@
 	if (!adapter->regs) {
 		dev_err(&pdev->dev, "cannot map device registers\n");
 		err = -ENOMEM;
-		goto out_free_adapter;
+		goto out_free_adapter_nofail;
 	}
 
 	adapter->pdev = pdev;
@@ -3398,6 +3397,9 @@
 		if (adapter->port[i])
 			free_netdev(adapter->port[i]);
 
+out_free_adapter_nofail:
+	kfree_skb(adapter->nofail_skb);
+
 out_free_adapter:
 	kfree(adapter);
 
@@ -3440,8 +3442,7 @@
 				free_netdev(adapter->port[i]);
 
 		iounmap(adapter->regs);
-		if (adapter->nofail_skb)
-			kfree_skb(adapter->nofail_skb);
+		kfree_skb(adapter->nofail_skb);
 		kfree(adapter);
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index 50cd660..84604af 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c

@@ -1302,8 +1302,7 @@
 	rcu_read_unlock();
 	RCU_INIT_POINTER(tdev->l2opt, NULL);
 	call_rcu(&d->rcu_head, clean_l2_data);
-	if (t->nofail_skb)
-		kfree_skb(t->nofail_skb);
+	kfree_skb(t->nofail_skb);
 	kfree(t);
 }
 

diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index 0e9182d..b3e4118 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c

@@ -443,9 +443,9 @@
 struct l2t_data *t3_init_l2t(unsigned int l2t_capacity)
 {
 	struct l2t_data *d;
-	int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
+	int i;
 
-	d = kvzalloc(size, GFP_KERNEL);
+	d = kvzalloc(struct_size(d, l2tab, l2t_capacity), GFP_KERNEL);
 	if (!d)
 		return NULL;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h
index c2fd323..ea75f27 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h

@@ -75,8 +75,8 @@
 	struct l2t_entry *rover;	/* starting point for next allocation */
 	atomic_t nfree;		/* number of free entries */
 	rwlock_t lock;
-	struct l2t_entry l2tab[0];
 	struct rcu_head rcu_head;	/* to handle rcu cleanup */
+	struct l2t_entry l2tab[];
 };
 
 typedef void (*arp_failure_handler_func)(struct t3cdev * dev,

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 20b6e1b..6dabbf1 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c

@@ -620,7 +620,7 @@
 {
 	size_t len = nelem * elem_size;
 	void *s = NULL;
-	void *p = dma_zalloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
+	void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 
 	if (!p)
 		return NULL;
@@ -2132,7 +2132,7 @@
 	struct port_info *pi = netdev_priv(qs->netdev);
 	struct sk_buff *skb = NULL;
 	struct cpl_rx_pkt *cpl;
-	struct skb_frag_struct *rx_frag;
+	skb_frag_t *rx_frag;
 	int nr_frags;
 	int offset = 0;
 
@@ -2182,7 +2182,7 @@
 
 	rx_frag += nr_frags;
 	__skb_frag_set_page(rx_frag, sd->pg_chunk.page);
-	rx_frag->page_offset = sd->pg_chunk.offset + offset;
+	skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
 	skb_frag_size_set(rx_frag, len);
 
 	skb->len += len;
@@ -2381,7 +2381,7 @@
 					lro_add_page(adap, qs, fl,
 						     G_RSPD_LEN(len),
 						     flags & F_RSPD_EOP);
-					 goto next_fl;
+					goto next_fl;
 				}
 
 				skb = get_packet_pg(adap, fl, q,
@@ -3214,11 +3214,13 @@
 	for (i = 0; i < SGE_QSETS; ++i) {
 		struct sge_qset *q = &adap->sge.qs[i];
 
-	if (q->tx_reclaim_timer.function)
-		mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
+		if (q->tx_reclaim_timer.function)
+			mod_timer(&q->tx_reclaim_timer,
+				  jiffies + TX_RECLAIM_PERIOD);
 
-	if (q->rx_reclaim_timer.function)
-		mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
+		if (q->rx_reclaim_timer.function)
+			mod_timer(&q->rx_reclaim_timer,
+				  jiffies + RX_RECLAIM_PERIOD);
 	}
 }
 

diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index 080918a..0a9f2c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c

@@ -1082,7 +1082,7 @@
 		CH_WARN(adapter, "found newer FW version(%u.%u), "
 		        "driver compiled for version %u.%u\n", major, minor,
 			FW_VERSION_MAJOR, FW_VERSION_MINOR);
-			return 0;
+		return 0;
 	}
 	return -EINVAL;
 }
@@ -3619,7 +3619,7 @@
 
 static int init_parity(struct adapter *adap)
 {
-		int i, err, addr;
+	int i, err, addr;
 
 	if (t3_read_reg(adap, A_SG_CONTEXT_CMD) & F_CONTEXT_CMD_BUSY)
 		return -EBUSY;
@@ -3806,6 +3806,6 @@
 		p->phy.ops->power_down(&p->phy, 1);
 	}
 
-return 0;
+	return 0;
 }
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index bea6a05..20390f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile

@@ -7,8 +7,9 @@
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
 	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
-	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
+	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \
 	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
+cxgb4-$(CONFIG_THERMAL) += cxgb4_thermal.o

diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index 5701272..ce28820 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c

@@ -289,8 +289,7 @@
 	if (clipt_size < CLIPT_MIN_HASH_BUCKETS)
 		return NULL;
 
-	ctbl = kvzalloc(sizeof(*ctbl) +
-			    clipt_size*sizeof(struct list_head), GFP_KERNEL);
+	ctbl = kvzalloc(struct_size(ctbl, hash_list, clipt_size), GFP_KERNEL);
 	if (!ctbl)
 		return NULL;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
index 8edc498..175e1a6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include "cxgb4.h"

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index 36d2588..6974669 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CUDBG_ENTITY_H__
@@ -315,6 +303,48 @@
 	u32 pbt_data[CUDBG_PBT_DATA_ENTRIES];
 };
 
+enum cudbg_qdesc_qtype {
+	CUDBG_QTYPE_UNKNOWN = 0,
+	CUDBG_QTYPE_NIC_TXQ,
+	CUDBG_QTYPE_NIC_RXQ,
+	CUDBG_QTYPE_NIC_FLQ,
+	CUDBG_QTYPE_CTRLQ,
+	CUDBG_QTYPE_FWEVTQ,
+	CUDBG_QTYPE_INTRQ,
+	CUDBG_QTYPE_PTP_TXQ,
+	CUDBG_QTYPE_OFLD_TXQ,
+	CUDBG_QTYPE_RDMA_RXQ,
+	CUDBG_QTYPE_RDMA_FLQ,
+	CUDBG_QTYPE_RDMA_CIQ,
+	CUDBG_QTYPE_ISCSI_RXQ,
+	CUDBG_QTYPE_ISCSI_FLQ,
+	CUDBG_QTYPE_ISCSIT_RXQ,
+	CUDBG_QTYPE_ISCSIT_FLQ,
+	CUDBG_QTYPE_CRYPTO_TXQ,
+	CUDBG_QTYPE_CRYPTO_RXQ,
+	CUDBG_QTYPE_CRYPTO_FLQ,
+	CUDBG_QTYPE_TLS_RXQ,
+	CUDBG_QTYPE_TLS_FLQ,
+	CUDBG_QTYPE_MAX,
+};
+
+#define CUDBG_QDESC_REV 1
+
+struct cudbg_qdesc_entry {
+	u32 data_size;
+	u32 qtype;
+	u32 qid;
+	u32 desc_size;
+	u32 num_desc;
+	u8 data[0]; /* Must be last */
+};
+
+struct cudbg_qdesc_info {
+	u32 qdesc_entry_size;
+	u32 num_queues;
+	u8 data[0]; /* Must be last */
+};
+
 #define IREG_NUM_ELEM 4
 
 static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
index 215fe62..fc38130 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CUDBG_IF_H__
@@ -81,7 +69,8 @@
 	CUDBG_MBOX_LOG = 66,
 	CUDBG_HMA_INDIRECT = 67,
 	CUDBG_HMA = 68,
-	CUDBG_MAX_ENTITY = 70,
+	CUDBG_QDESC = 70,
+	CUDBG_MAX_ENTITY = 71,
 };
 
 struct cudbg_init {

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index d97e0d7..c2e9278 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c

@@ -1,24 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include <linux/sort.h>
 
 #include "t4_regs.h"
 #include "cxgb4.h"
+#include "cxgb4_cudbg.h"
 #include "cudbg_if.h"
 #include "cudbg_lib_common.h"
 #include "cudbg_entity.h"
@@ -80,7 +69,7 @@
 {
 	struct adapter *padap = pdbg_init->adap;
 
-	if (!(padap->flags & FW_OK) || padap->use_bd)
+	if (!(padap->flags & CXGB4_FW_OK) || padap->use_bd)
 		return 0;
 
 	return 1;
@@ -1065,14 +1054,12 @@
 	}
 }
 
-static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
-				    struct cudbg_buffer *dbg_buff,
-				    struct cudbg_error *cudbg_err,
-				    u8 mem_type)
+static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init,
+					   struct cudbg_error *cudbg_err,
+					   u8 mem_type)
 {
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_meminfo mem_info;
-	unsigned long size;
 	u8 mc_idx;
 	int rc;
 
@@ -1086,7 +1073,16 @@
 	if (rc)
 		return rc;
 
-	size = mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+	return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+}
+
+static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
+				    struct cudbg_buffer *dbg_buff,
+				    struct cudbg_error *cudbg_err,
+				    u8 mem_type)
+{
+	unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type);
+
 	return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size,
 				 cudbg_err);
 }
@@ -1228,6 +1224,10 @@
 
 	rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_hw_sched),
 			    &temp_buff);
+
+	if (rc)
+		return rc;
+
 	hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data;
 	hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A);
 	hw_sched_buff->mode = TIMERMODE_G(t4_read_reg(padap, TP_MOD_CONFIG_A));
@@ -2890,3 +2890,240 @@
 	}
 	return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
 }
+
+void cudbg_fill_qdesc_num_and_size(const struct adapter *padap,
+				   u32 *num, u32 *size)
+{
+	u32 tot_entries = 0, tot_size = 0;
+
+	/* NIC TXQ, RXQ, FLQ, and CTRLQ */
+	tot_entries += MAX_ETH_QSETS * 3;
+	tot_entries += MAX_CTRL_QUEUES;
+
+	tot_size += MAX_ETH_QSETS * MAX_TXQ_ENTRIES * MAX_TXQ_DESC_SIZE;
+	tot_size += MAX_ETH_QSETS * MAX_RSPQ_ENTRIES * MAX_RXQ_DESC_SIZE;
+	tot_size += MAX_ETH_QSETS * MAX_RX_BUFFERS * MAX_FL_DESC_SIZE;
+	tot_size += MAX_CTRL_QUEUES * MAX_CTRL_TXQ_ENTRIES *
+		    MAX_CTRL_TXQ_DESC_SIZE;
+
+	/* FW_EVTQ and INTRQ */
+	tot_entries += INGQ_EXTRAS;
+	tot_size += INGQ_EXTRAS * MAX_RSPQ_ENTRIES * MAX_RXQ_DESC_SIZE;
+
+	/* PTP_TXQ */
+	tot_entries += 1;
+	tot_size += MAX_TXQ_ENTRIES * MAX_TXQ_DESC_SIZE;
+
+	/* ULD TXQ, RXQ, and FLQ */
+	tot_entries += CXGB4_TX_MAX * MAX_OFLD_QSETS;
+	tot_entries += CXGB4_ULD_MAX * MAX_ULD_QSETS * 2;
+
+	tot_size += CXGB4_TX_MAX * MAX_OFLD_QSETS * MAX_TXQ_ENTRIES *
+		    MAX_TXQ_DESC_SIZE;
+	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * MAX_RSPQ_ENTRIES *
+		    MAX_RXQ_DESC_SIZE;
+	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * MAX_RX_BUFFERS *
+		    MAX_FL_DESC_SIZE;
+
+	/* ULD CIQ */
+	tot_entries += CXGB4_ULD_MAX * MAX_ULD_QSETS;
+	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * SGE_MAX_IQ_SIZE *
+		    MAX_RXQ_DESC_SIZE;
+
+	tot_size += sizeof(struct cudbg_ver_hdr) +
+		    sizeof(struct cudbg_qdesc_info) +
+		    sizeof(struct cudbg_qdesc_entry) * tot_entries;
+
+	if (num)
+		*num = tot_entries;
+
+	if (size)
+		*size = tot_size;
+}
+
+int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err)
+{
+	u32 num_queues = 0, tot_entries = 0, size = 0;
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_qdesc_entry *qdesc_entry;
+	struct cudbg_qdesc_info *qdesc_info;
+	struct cudbg_ver_hdr *ver_hdr;
+	struct sge *s = &padap->sge;
+	u32 i, j, cur_off, tot_len;
+	u8 *data;
+	int rc;
+
+	cudbg_fill_qdesc_num_and_size(padap, &tot_entries, &size);
+	size = min_t(u32, size, CUDBG_DUMP_BUFF_SIZE);
+	tot_len = size;
+	data = kvzalloc(size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ver_hdr = (struct cudbg_ver_hdr *)data;
+	ver_hdr->signature = CUDBG_ENTITY_SIGNATURE;
+	ver_hdr->revision = CUDBG_QDESC_REV;
+	ver_hdr->size = sizeof(struct cudbg_qdesc_info);
+	size -= sizeof(*ver_hdr);
+
+	qdesc_info = (struct cudbg_qdesc_info *)(data +
+						 sizeof(*ver_hdr));
+	size -= sizeof(*qdesc_info);
+	qdesc_entry = (struct cudbg_qdesc_entry *)qdesc_info->data;
+
+#define QDESC_GET(q, desc, type, label) do { \
+	if (size <= 0) { \
+		goto label; \
+	} \
+	if (desc) { \
+		cudbg_fill_qdesc_##q(q, type, qdesc_entry); \
+		size -= sizeof(*qdesc_entry) + qdesc_entry->data_size; \
+		num_queues++; \
+		qdesc_entry = cudbg_next_qdesc(qdesc_entry); \
+	} \
+} while (0)
+
+#define QDESC_GET_TXQ(q, type, label) do { \
+	struct sge_txq *txq = (struct sge_txq *)q; \
+	QDESC_GET(txq, txq->desc, type, label); \
+} while (0)
+
+#define QDESC_GET_RXQ(q, type, label) do { \
+	struct sge_rspq *rxq = (struct sge_rspq *)q; \
+	QDESC_GET(rxq, rxq->desc, type, label); \
+} while (0)
+
+#define QDESC_GET_FLQ(q, type, label) do { \
+	struct sge_fl *flq = (struct sge_fl *)q; \
+	QDESC_GET(flq, flq->desc, type, label); \
+} while (0)
+
+	/* NIC TXQ */
+	for (i = 0; i < s->ethqsets; i++)
+		QDESC_GET_TXQ(&s->ethtxq[i].q, CUDBG_QTYPE_NIC_TXQ, out);
+
+	/* NIC RXQ */
+	for (i = 0; i < s->ethqsets; i++)
+		QDESC_GET_RXQ(&s->ethrxq[i].rspq, CUDBG_QTYPE_NIC_RXQ, out);
+
+	/* NIC FLQ */
+	for (i = 0; i < s->ethqsets; i++)
+		QDESC_GET_FLQ(&s->ethrxq[i].fl, CUDBG_QTYPE_NIC_FLQ, out);
+
+	/* NIC CTRLQ */
+	for (i = 0; i < padap->params.nports; i++)
+		QDESC_GET_TXQ(&s->ctrlq[i].q, CUDBG_QTYPE_CTRLQ, out);
+
+	/* FW_EVTQ */
+	QDESC_GET_RXQ(&s->fw_evtq, CUDBG_QTYPE_FWEVTQ, out);
+
+	/* INTRQ */
+	QDESC_GET_RXQ(&s->intrq, CUDBG_QTYPE_INTRQ, out);
+
+	/* PTP_TXQ */
+	QDESC_GET_TXQ(&s->ptptxq.q, CUDBG_QTYPE_PTP_TXQ, out);
+
+	/* ULD Queues */
+	mutex_lock(&uld_mutex);
+
+	if (s->uld_txq_info) {
+		struct sge_uld_txq_info *utxq;
+
+		/* ULD TXQ */
+		for (j = 0; j < CXGB4_TX_MAX; j++) {
+			if (!s->uld_txq_info[j])
+				continue;
+
+			utxq = s->uld_txq_info[j];
+			for (i = 0; i < utxq->ntxq; i++)
+				QDESC_GET_TXQ(&utxq->uldtxq[i].q,
+					      cudbg_uld_txq_to_qtype(j),
+					      out_unlock);
+		}
+	}
+
+	if (s->uld_rxq_info) {
+		struct sge_uld_rxq_info *urxq;
+		u32 base;
+
+		/* ULD RXQ */
+		for (j = 0; j < CXGB4_ULD_MAX; j++) {
+			if (!s->uld_rxq_info[j])
+				continue;
+
+			urxq = s->uld_rxq_info[j];
+			for (i = 0; i < urxq->nrxq; i++)
+				QDESC_GET_RXQ(&urxq->uldrxq[i].rspq,
+					      cudbg_uld_rxq_to_qtype(j),
+					      out_unlock);
+		}
+
+		/* ULD FLQ */
+		for (j = 0; j < CXGB4_ULD_MAX; j++) {
+			if (!s->uld_rxq_info[j])
+				continue;
+
+			urxq = s->uld_rxq_info[j];
+			for (i = 0; i < urxq->nrxq; i++)
+				QDESC_GET_FLQ(&urxq->uldrxq[i].fl,
+					      cudbg_uld_flq_to_qtype(j),
+					      out_unlock);
+		}
+
+		/* ULD CIQ */
+		for (j = 0; j < CXGB4_ULD_MAX; j++) {
+			if (!s->uld_rxq_info[j])
+				continue;
+
+			urxq = s->uld_rxq_info[j];
+			base = urxq->nrxq;
+			for (i = 0; i < urxq->nciq; i++)
+				QDESC_GET_RXQ(&urxq->uldrxq[base + i].rspq,
+					      cudbg_uld_ciq_to_qtype(j),
+					      out_unlock);
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+
+out:
+	qdesc_info->qdesc_entry_size = sizeof(*qdesc_entry);
+	qdesc_info->num_queues = num_queues;
+	cur_off = 0;
+	while (tot_len) {
+		u32 chunk_size = min_t(u32, tot_len, CUDBG_CHUNK_SIZE);
+
+		rc = cudbg_get_buff(pdbg_init, dbg_buff, chunk_size,
+				    &temp_buff);
+		if (rc) {
+			cudbg_err->sys_warn = CUDBG_STATUS_PARTIAL_DATA;
+			goto out_free;
+		}
+
+		memcpy(temp_buff.data, data + cur_off, chunk_size);
+		tot_len -= chunk_size;
+		cur_off += chunk_size;
+		rc = cudbg_write_and_release_buff(pdbg_init, &temp_buff,
+						  dbg_buff);
+		if (rc) {
+			cudbg_put_buff(pdbg_init, &temp_buff);
+			cudbg_err->sys_warn = CUDBG_STATUS_PARTIAL_DATA;
+			goto out_free;
+		}
+	}
+
+out_free:
+	if (data)
+		kvfree(data);
+
+#undef QDESC_GET_FLQ
+#undef QDESC_GET_RXQ
+#undef QDESC_GET_TXQ
+#undef QDESC_GET
+
+	return rc;
+}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
index eebefe7..10ee6ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CUDBG_LIB_H__
@@ -171,6 +159,9 @@
 int cudbg_collect_hma_meminfo(struct cudbg_init *pdbg_init,
 			      struct cudbg_buffer *dbg_buff,
 			      struct cudbg_error *cudbg_err);
+int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err);
 
 struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i);
 void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
@@ -182,4 +173,107 @@
 		       struct cudbg_meminfo *meminfo_buff);
 void cudbg_fill_le_tcam_info(struct adapter *padap,
 			     struct cudbg_tcam *tcam_region);
+void cudbg_fill_qdesc_num_and_size(const struct adapter *padap,
+				   u32 *num, u32 *size);
+
+static inline u32 cudbg_uld_txq_to_qtype(u32 uld)
+{
+	switch (uld) {
+	case CXGB4_TX_OFLD:
+		return CUDBG_QTYPE_OFLD_TXQ;
+	case CXGB4_TX_CRYPTO:
+		return CUDBG_QTYPE_CRYPTO_TXQ;
+	}
+
+	return CUDBG_QTYPE_UNKNOWN;
+}
+
+static inline u32 cudbg_uld_rxq_to_qtype(u32 uld)
+{
+	switch (uld) {
+	case CXGB4_ULD_RDMA:
+		return CUDBG_QTYPE_RDMA_RXQ;
+	case CXGB4_ULD_ISCSI:
+		return CUDBG_QTYPE_ISCSI_RXQ;
+	case CXGB4_ULD_ISCSIT:
+		return CUDBG_QTYPE_ISCSIT_RXQ;
+	case CXGB4_ULD_CRYPTO:
+		return CUDBG_QTYPE_CRYPTO_RXQ;
+	case CXGB4_ULD_TLS:
+		return CUDBG_QTYPE_TLS_RXQ;
+	}
+
+	return CUDBG_QTYPE_UNKNOWN;
+}
+
+static inline u32 cudbg_uld_flq_to_qtype(u32 uld)
+{
+	switch (uld) {
+	case CXGB4_ULD_RDMA:
+		return CUDBG_QTYPE_RDMA_FLQ;
+	case CXGB4_ULD_ISCSI:
+		return CUDBG_QTYPE_ISCSI_FLQ;
+	case CXGB4_ULD_ISCSIT:
+		return CUDBG_QTYPE_ISCSIT_FLQ;
+	case CXGB4_ULD_CRYPTO:
+		return CUDBG_QTYPE_CRYPTO_FLQ;
+	case CXGB4_ULD_TLS:
+		return CUDBG_QTYPE_TLS_FLQ;
+	}
+
+	return CUDBG_QTYPE_UNKNOWN;
+}
+
+static inline u32 cudbg_uld_ciq_to_qtype(u32 uld)
+{
+	switch (uld) {
+	case CXGB4_ULD_RDMA:
+		return CUDBG_QTYPE_RDMA_CIQ;
+	}
+
+	return CUDBG_QTYPE_UNKNOWN;
+}
+
+static inline void cudbg_fill_qdesc_txq(const struct sge_txq *txq,
+					enum cudbg_qdesc_qtype type,
+					struct cudbg_qdesc_entry *entry)
+{
+	entry->qtype = type;
+	entry->qid = txq->cntxt_id;
+	entry->desc_size = sizeof(struct tx_desc);
+	entry->num_desc = txq->size;
+	entry->data_size = txq->size * sizeof(struct tx_desc);
+	memcpy(entry->data, txq->desc, entry->data_size);
+}
+
+static inline void cudbg_fill_qdesc_rxq(const struct sge_rspq *rxq,
+					enum cudbg_qdesc_qtype type,
+					struct cudbg_qdesc_entry *entry)
+{
+	entry->qtype = type;
+	entry->qid = rxq->cntxt_id;
+	entry->desc_size = rxq->iqe_len;
+	entry->num_desc = rxq->size;
+	entry->data_size = rxq->size * rxq->iqe_len;
+	memcpy(entry->data, rxq->desc, entry->data_size);
+}
+
+static inline void cudbg_fill_qdesc_flq(const struct sge_fl *flq,
+					enum cudbg_qdesc_qtype type,
+					struct cudbg_qdesc_entry *entry)
+{
+	entry->qtype = type;
+	entry->qid = flq->cntxt_id;
+	entry->desc_size = sizeof(__be64);
+	entry->num_desc = flq->size;
+	entry->data_size = flq->size * sizeof(__be64);
+	memcpy(entry->data, flq->desc, entry->data_size);
+}
+
+static inline
+struct cudbg_qdesc_entry *cudbg_next_qdesc(struct cudbg_qdesc_entry *e)
+{
+	return (struct cudbg_qdesc_entry *)
+	       ((u8 *)e + sizeof(*e) + e->data_size);
+}
 #endif /* __CUDBG_LIB_H__ */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
index 8150ea8..9fac777 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CUDBG_LIB_COMMON_H__

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
index 25cc06d..aad55fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include <linux/zlib.h>

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
index 60d2380..f6d8328 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CUDBG_ZLIB_H__

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 76d1674..1fbb640 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

@@ -52,6 +52,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/ptp_classify.h>
 #include <linux/crash_dump.h>
+#include <linux/thermal.h>
 #include <asm/io.h>
 #include "t4_chip_type.h"
 #include "cxgb4_uld.h"
@@ -279,6 +280,7 @@
 	unsigned short tx_modq[NCHAN];	/* channel to modulation queue map */
 
 	u32 vlan_pri_map;               /* cached TP_VLAN_PRI_MAP */
+	u32 filter_mask;
 	u32 ingress_config;             /* cached TP_INGRESS_CONFIG */
 
 	/* cached TP_OUT_CONFIG compressed error vector
@@ -403,6 +405,7 @@
 	bool fr_nsmr_tpte_wr_support;	  /* FW support for FR_NSMR_TPTE_WR */
 	u8 fw_caps_support;		/* 32-bit Port Capabilities */
 	bool filter2_wr_support;	/* FW support for FILTER2_WR */
+	unsigned int viid_smt_extn_support:1; /* FW returns vin and smt index */
 
 	/* MPS Buffer Group Map[per Port].  Bit i is set if buffer group i is
 	 * used by the Port
@@ -533,6 +536,13 @@
 };
 
 enum {
+	MAX_TXQ_DESC_SIZE      = 64,
+	MAX_RXQ_DESC_SIZE      = 128,
+	MAX_FL_DESC_SIZE       = 8,
+	MAX_CTRL_TXQ_DESC_SIZE = 64,
+};
+
+enum {
 	INGQ_EXTRAS = 2,        /* firmware event queue and */
 				/*   forwarded interrupts */
 	MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
@@ -559,7 +569,7 @@
 struct port_info {
 	struct adapter *adapter;
 	u16    viid;
-	s16    xact_addr_filt;        /* index of exact MAC address filter */
+	int    xact_addr_filt;        /* index of exact MAC address filter */
 	u16    rss_size;              /* size of VI's RSS table slice */
 	s8     mdio_addr;
 	enum fw_port_type port_type;
@@ -584,23 +594,32 @@
 	bool ptp_enable;
 	struct sched_table *sched_tbl;
 	u32 eth_flags;
+
+	/* viid and smt fields either returned by fw
+	 * or decoded by parsing viid by driver.
+	 */
+	u8 vin;
+	u8 vivld;
+	u8 smt_idx;
+	u8 rx_cchan;
 };
 
 struct dentry;
 struct work_struct;
 
 enum {                                 /* adapter flags */
-	FULL_INIT_DONE     = (1 << 0),
-	DEV_ENABLED        = (1 << 1),
-	USING_MSI          = (1 << 2),
-	USING_MSIX         = (1 << 3),
-	FW_OK              = (1 << 4),
-	RSS_TNLALLLOOKUP   = (1 << 5),
-	USING_SOFT_PARAMS  = (1 << 6),
-	MASTER_PF          = (1 << 7),
-	FW_OFLD_CONN       = (1 << 9),
-	ROOT_NO_RELAXED_ORDERING = (1 << 10),
-	SHUTTING_DOWN	   = (1 << 11),
+	CXGB4_FULL_INIT_DONE		= (1 << 0),
+	CXGB4_DEV_ENABLED		= (1 << 1),
+	CXGB4_USING_MSI			= (1 << 2),
+	CXGB4_USING_MSIX		= (1 << 3),
+	CXGB4_FW_OK			= (1 << 4),
+	CXGB4_RSS_TNLALLLOOKUP		= (1 << 5),
+	CXGB4_USING_SOFT_PARAMS		= (1 << 6),
+	CXGB4_MASTER_PF			= (1 << 7),
+	CXGB4_FW_OFLD_CONN		= (1 << 9),
+	CXGB4_ROOT_NO_RELAXED_ORDERING	= (1 << 10),
+	CXGB4_SHUTTING_DOWN		= (1 << 11),
+	CXGB4_SGE_DBQ_TIMER		= (1 << 12),
 };
 
 enum {
@@ -685,6 +704,7 @@
 	unsigned long rx_cso;       /* # of Rx checksum offloads */
 	unsigned long vlan_ex;      /* # of Rx VLAN extractions */
 	unsigned long rx_drops;     /* # of packets dropped due to no mem */
+	unsigned long bad_rx_pkts;  /* # of packets with err_vec!=0 */
 };
 
 struct sge_eth_rxq {                /* SW Ethernet Rx queue */
@@ -739,6 +759,8 @@
 #ifdef CONFIG_CHELSIO_T4_DCB
 	u8 dcb_prio;		    /* DCB Priority bound to queue */
 #endif
+	u8 dbqt;                    /* SGE Doorbell Queue Timer in use */
+	unsigned int dbqtimerix;    /* SGE Doorbell Queue Timer Index */
 	unsigned long tso;          /* # of TSO requests */
 	unsigned long tx_cso;       /* # of Tx checksum offloads */
 	unsigned long vlan_ins;     /* # of Tx VLAN insertions */
@@ -799,6 +821,8 @@
 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
+	u16 dbqtimer_tick;
+	u16 dbqtimer_val[SGE_NDBQTIMERS];
 	u32 fl_pg_order;            /* large page allocation size */
 	u32 stat_len;               /* length of status page at ring end */
 	u32 pktshift;               /* padding between CPL & packet data */
@@ -843,6 +867,7 @@
 struct hash_mac_addr {
 	struct list_head list;
 	u8 addr[ETH_ALEN];
+	unsigned int iface_mac;
 };
 
 struct uld_msix_bmap {
@@ -855,6 +880,7 @@
 	unsigned short vec;
 	char desc[IFNAMSIZ + 10];
 	unsigned int idx;
+	cpumask_var_t aff_mask;
 };
 
 struct vf_info {
@@ -862,6 +888,7 @@
 	unsigned int tx_rate;
 	bool pf_set_mac;
 	u16 vlan;
+	int link_state;
 };
 
 enum {
@@ -878,8 +905,20 @@
 	struct list_head list;
 };
 
-struct mps_encap_entry {
-	atomic_t refcnt;
+#if IS_ENABLED(CONFIG_THERMAL)
+struct ch_thermal {
+	struct thermal_zone_device *tzdev;
+	int trip_temp;
+	int trip_type;
+};
+#endif
+
+struct mps_entries_ref {
+	struct list_head list;
+	u8 addr[ETH_ALEN];
+	u8 mask[ETH_ALEN];
+	u16 idx;
+	refcount_t refcnt;
 };
 
 struct adapter {
@@ -906,9 +945,10 @@
 	struct cxgb4_virt_res vres;
 	unsigned int swintr;
 
-	struct {
+	struct msix_info {
 		unsigned short vec;
 		char desc[IFNAMSIZ + 10];
+		cpumask_var_t aff_mask;
 	} msix_info[MAX_INGQ + 1];
 	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
 	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
@@ -933,7 +973,6 @@
 	unsigned int rawf_start;
 	unsigned int rawf_cnt;
 	struct smt_data *smt;
-	struct mps_encap_entry *mps_encap;
 	struct cxgb4_uld_info *uld;
 	void *uld_handle[CXGB4_ULD_MAX];
 	unsigned int num_uld;
@@ -941,6 +980,8 @@
 	struct list_head list_node;
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
+	struct list_head mps_ref;
+	spinlock_t mps_ref_lock; /* lock for syncing mps ref/def activities */
 
 	void *iscsi_ppm;
 
@@ -1000,6 +1041,9 @@
 
 	/* Dump buffer for collecting logs in kdump kernel */
 	struct vmcoredd_data vmcoredd;
+#if IS_ENABLED(CONFIG_THERMAL)
+	struct ch_thermal ch_thermal;
+#endif
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1373,7 +1417,7 @@
 		     rspq_flush_handler_t flush_handler, int cong);
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 			 struct net_device *dev, struct netdev_queue *netdevq,
-			 unsigned int iqid);
+			 unsigned int iqid, u8 dbqt);
 int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 			  struct net_device *dev, unsigned int iqid,
 			  unsigned int cmplqid);
@@ -1386,6 +1430,8 @@
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q,
+				 int maxreclaim);
 void cxgb4_set_ethtool_ops(struct net_device *netdev);
 int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
 enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb);
@@ -1538,9 +1584,11 @@
 
 int t4_wait_dev_ready(void __iomem *regs);
 
+fw_port_cap32_t t4_link_acaps(struct adapter *adapter, unsigned int port,
+			      struct link_config *lc);
 int t4_link_l1cfg_core(struct adapter *adap, unsigned int mbox,
 		       unsigned int port, struct link_config *lc,
-		       bool sleep_ok, int timeout);
+		       u8 sleep_ok, int timeout);
 
 static inline int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
 				unsigned int port, struct link_config *lc)
@@ -1737,7 +1785,7 @@
 		unsigned int nexact, unsigned int rcaps, unsigned int wxcaps);
 int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
 		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
-		unsigned int *rss_size);
+		unsigned int *rss_size, u8 *vivld, u8 *vin);
 int t4_free_vi(struct adapter *adap, unsigned int mbox,
 	       unsigned int pf, unsigned int vf,
 	       unsigned int viid);
@@ -1763,7 +1811,7 @@
 		     unsigned int viid, unsigned int naddr,
 		     const u8 **addr, bool sleep_ok);
 int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
-		  int idx, const u8 *addr, bool persist, bool add_smt);
+		  int idx, const u8 *addr, bool persist, u8 *smt_idx);
 int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid,
 		     bool ucast, u64 vec, bool sleep_ok);
 int t4_enable_vi_params(struct adapter *adap, unsigned int mbox,
@@ -1792,6 +1840,8 @@
 int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int eqid);
 int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+			  u16 *dbqtimers);
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
 int t4_update_port_info(struct port_info *pi);
 int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
@@ -1854,4 +1904,49 @@
 int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
 		    u16 vlan);
 int cxgb4_dcb_enabled(const struct net_device *dev);
+
+int cxgb4_thermal_init(struct adapter *adap);
+int cxgb4_thermal_remove(struct adapter *adap);
+int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
+		       cpumask_var_t *aff_mask, int idx);
+void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask);
+
+int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
+		     int *tcam_idx, const u8 *addr,
+		     bool persistent, u8 *smt_idx);
+
+int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid,
+			 bool free, unsigned int naddr,
+			 const u8 **addr, u16 *idx,
+			 u64 *hash, bool sleep_ok);
+int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid,
+			unsigned int naddr, const u8 **addr, bool sleep_ok);
+int cxgb4_init_mps_ref_entries(struct adapter *adap);
+void cxgb4_free_mps_ref_entries(struct adapter *adap);
+int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			       const u8 *addr, const u8 *mask,
+			       unsigned int vni, unsigned int vni_mask,
+			       u8 dip_hit, u8 lookup_type, bool sleep_ok);
+int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			      int idx, bool sleep_ok);
+int cxgb4_free_raw_mac_filt(struct adapter *adap,
+			    unsigned int viid,
+			    const u8 *addr,
+			    const u8 *mask,
+			    unsigned int idx,
+			    u8 lookup_type,
+			    u8 port_id,
+			    bool sleep_ok);
+int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
+			     unsigned int viid,
+			     const u8 *addr,
+			     const u8 *mask,
+			     unsigned int idx,
+			     u8 lookup_type,
+			     u8 port_id,
+			     bool sleep_ok);
+int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
+			  int *tcam_idx, const u8 *addr,
+			  bool persistent, u8 *smt_idx);
+
 #endif /* __CXGB4_H__ */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 5f01c0a..e374b41 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include "t4_regs.h"
@@ -30,6 +18,7 @@
 
 static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
 	{ CUDBG_MBOX_LOG, cudbg_collect_mbox_log },
+	{ CUDBG_QDESC, cudbg_collect_qdesc },
 	{ CUDBG_DEV_LOG, cudbg_collect_fw_devlog },
 	{ CUDBG_REG_DUMP, cudbg_collect_reg_dump },
 	{ CUDBG_CIM_LA, cudbg_collect_cim_la },
@@ -311,6 +300,9 @@
 		}
 		len = cudbg_mbytes_to_bytes(len);
 		break;
+	case CUDBG_QDESC:
+		cudbg_fill_qdesc_num_and_size(adap, NULL, &len);
+		break;
 	default:
 		break;
 	}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
index ef59ba1..66b805c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h

@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CXGB4_CUDBG_H__

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index b34f0f0..4a872f3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2013-2014 Chelsio Communications.  All rights reserved.
  *
  *  Written by Anish Bhatt (anish@chelsio.com)
  *	       Casey Leedom (leedom@chelsio.com)
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include "cxgb4.h"
@@ -114,6 +102,24 @@
 	cxgb4_dcb_state_init(dev);
 }
 
+/* update the dcb port support, if version is IEEE then set it to
+ * FW_PORT_DCB_VER_IEEE and if DCB_CAP_DCBX_VER_CEE is already set then
+ * clear that. and if it is set to CEE then set dcb supported to
+ * DCB_CAP_DCBX_VER_CEE & if DCB_CAP_DCBX_VER_IEEE is set, clear it
+ */
+static inline void cxgb4_dcb_update_support(struct port_dcb_info *dcb)
+{
+	if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) {
+		if (dcb->supported & DCB_CAP_DCBX_VER_CEE)
+			dcb->supported &= ~DCB_CAP_DCBX_VER_CEE;
+		dcb->supported |= DCB_CAP_DCBX_VER_IEEE;
+	} else if (dcb->dcb_version == FW_PORT_DCB_VER_CEE1D01) {
+		if (dcb->supported & DCB_CAP_DCBX_VER_IEEE)
+			dcb->supported &= ~DCB_CAP_DCBX_VER_IEEE;
+		dcb->supported |= DCB_CAP_DCBX_VER_CEE;
+	}
+}
+
 /* Finite State machine for Data Center Bridging.
  */
 void cxgb4_dcb_state_fsm(struct net_device *dev,
@@ -165,6 +171,15 @@
 	}
 
 	case CXGB4_DCB_STATE_FW_INCOMPLETE: {
+		if (transition_to != CXGB4_DCB_INPUT_FW_DISABLED) {
+			/* during this CXGB4_DCB_STATE_FW_INCOMPLETE state,
+			 * check if the dcb version is changed (there can be
+			 * mismatch in default config & the negotiated switch
+			 * configuration at FW, so update the dcb support
+			 * accordingly.
+			 */
+			cxgb4_dcb_update_support(dcb);
+		}
 		switch (transition_to) {
 		case CXGB4_DCB_INPUT_FW_ENABLED: {
 			/* we're alreaady in firmware DCB mode */
@@ -273,8 +288,8 @@
 		enum cxgb4_dcb_state_input input =
 			((pcmd->u.dcb.control.all_syncd_pkd &
 			  FW_PORT_CMD_ALL_SYNCD_F)
-			 ? CXGB4_DCB_STATE_FW_ALLSYNCED
-			 : CXGB4_DCB_STATE_FW_INCOMPLETE);
+			 ? CXGB4_DCB_INPUT_FW_ALLSYNCED
+			 : CXGB4_DCB_INPUT_FW_INCOMPLETE);
 
 		if (dcb->dcb_version != FW_PORT_DCB_VER_UNKNOWN) {
 			dcb_running_version = FW_PORT_CMD_DCB_VERSION_G(

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index 02040b9..d3c654b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h

@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 2013-2014 Chelsio Communications.  All rights reserved.
  *
  *  Written by Anish Bhatt (anish@chelsio.com)
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #ifndef __CXGB4_DCB_H
@@ -67,7 +55,7 @@
 	do { \
 		if ((__dcb)->dcb_version == FW_PORT_DCB_VER_IEEE) \
 			cxgb4_dcb_state_fsm((__dev), \
-					    CXGB4_DCB_STATE_FW_ALLSYNCED); \
+					    CXGB4_DCB_INPUT_FW_ALLSYNCED); \
 	} while (0)
 
 /* States we can be in for a port's Data Center Bridging.

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 0f72f9c..ae6a47d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c

@@ -378,19 +378,7 @@
 			   QUEREMFLITS_G(p[2]) * 16);
 	return 0;
 }
-
-static int cim_qcfg_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cim_qcfg_show, inode->i_private);
-}
-
-static const struct file_operations cim_qcfg_fops = {
-	.owner   = THIS_MODULE,
-	.open    = cim_qcfg_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(cim_qcfg);
 
 static int cimq_show(struct seq_file *seq, void *v, int idx)
 {
@@ -860,8 +848,7 @@
 	}
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tx_rate);
+DEFINE_SHOW_ATTRIBUTE(tx_rate);
 
 static int cctrl_tbl_show(struct seq_file *seq, void *v)
 {
@@ -893,8 +880,7 @@
 	kfree(incr);
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(cctrl_tbl);
+DEFINE_SHOW_ATTRIBUTE(cctrl_tbl);
 
 /* Format a value in a unit that differs from the value's native unit by the
  * given factor.
@@ -955,8 +941,7 @@
 
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(clk);
+DEFINE_SHOW_ATTRIBUTE(clk);
 
 /* Firmware Device Log dump. */
 static const char * const devlog_level_strings[] = {
@@ -1990,22 +1975,10 @@
 
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(sensors);
+DEFINE_SHOW_ATTRIBUTE(sensors);
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int clip_tbl_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, clip_tbl_show, inode->i_private);
-}
-
-static const struct file_operations clip_tbl_debugfs_fops = {
-	.owner   = THIS_MODULE,
-	.open    = clip_tbl_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release
-};
+DEFINE_SHOW_ATTRIBUTE(clip_tbl);
 #endif
 
 /*RSS Table.
@@ -2208,8 +2181,7 @@
 
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(rss_config);
+DEFINE_SHOW_ATTRIBUTE(rss_config);
 
 /* RSS Secret Key.
  */
@@ -2628,19 +2600,7 @@
 
 	return 0;
 }
-
-static int resources_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, resources_show, inode->i_private);
-}
-
-static const struct file_operations resources_debugfs_fops = {
-	.owner   = THIS_MODULE,
-	.open    = resources_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SHOW_ATTRIBUTE(resources);
 
 /**
  * ethqset2pinfo - return port_info of an Ethernet Queue Set
@@ -2660,7 +2620,7 @@
 	}
 
 	/* should never happen! */
-	BUG_ON(1);
+	BUG();
 	return NULL;
 }
 
@@ -2784,6 +2744,7 @@
 		RL("LROmerged:", stats.lro_merged);
 		RL("LROpackets:", stats.lro_pkts);
 		RL("RxDrops:", stats.rx_drops);
+		RL("RxBadPkts:", stats.bad_rx_pkts);
 		TL("TSO:", tso);
 		TL("TxCSO:", tx_cso);
 		TL("VLANins:", vlan_ins);
@@ -3182,7 +3143,7 @@
 			seq_printf(seq, ", in use: %u/%u\n",
 				   atomic_read(&t->tids_in_use),
 				   atomic_read(&t->hash_tids_in_use));
-		} else if (adap->flags & FW_OFLD_CONN) {
+		} else if (adap->flags & CXGB4_FW_OFLD_CONN) {
 			seq_printf(seq, "TID range: %u..%u/%u..%u",
 				   t->aftid_base,
 				   t->aftid_end,
@@ -3232,8 +3193,7 @@
 			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV6_A));
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
+DEFINE_SHOW_ATTRIBUTE(tid_info);
 
 static void add_debugfs_mem(struct adapter *adap, const char *name,
 			    unsigned int idx, unsigned int size_mb)
@@ -3276,8 +3236,10 @@
 		return -ENOMEM;
 
 	err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz);
-	if (err)
+	if (err) {
+		kvfree(t);
 		return err;
+	}
 
 	bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
 	kvfree(t);
@@ -3363,21 +3325,9 @@
 
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(meminfo);
 
-static int meminfo_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, meminfo_show, inode->i_private);
-}
-
-static const struct file_operations meminfo_fops = {
-	.owner   = THIS_MODULE,
-	.open    = meminfo_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
-static int chcr_show(struct seq_file *seq, void *v)
+static int chcr_stats_show(struct seq_file *seq, void *v)
 {
 	struct adapter *adap = seq->private;
 
@@ -3398,20 +3348,7 @@
 		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	return 0;
 }
-
-
-static int chcr_stats_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, chcr_show, inode->i_private);
-}
-
-static const struct file_operations chcr_stats_debugfs_fops = {
-        .owner   = THIS_MODULE,
-        .open    = chcr_stats_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(chcr_stats);
 
 #define PRINT_ADAP_STATS(string, value) \
 	seq_printf(seq, "%-25s %-20llu\n", (string), \
@@ -3572,8 +3509,7 @@
 
 	return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats);
+DEFINE_SHOW_ATTRIBUTE(tp_stats);
 
 /* Add an array of Debug FS files.
  */
@@ -3595,14 +3531,13 @@
 {
 	int i;
 	u32 size = 0;
-	struct dentry *de;
 
 	static struct t4_debugfs_entry t4_debugfs_files[] = {
 		{ "cim_la", &cim_la_fops, 0400, 0 },
 		{ "cim_pif_la", &cim_pif_la_fops, 0400, 0 },
 		{ "cim_ma_la", &cim_ma_la_fops, 0400, 0 },
 		{ "cim_qcfg", &cim_qcfg_fops, 0400, 0 },
-		{ "clk", &clk_debugfs_fops, 0400, 0 },
+		{ "clk", &clk_fops, 0400, 0 },
 		{ "devlog", &devlog_fops, 0400, 0 },
 		{ "mboxlog", &mboxlog_fops, 0400, 0 },
 		{ "mbox0", &mbox_debugfs_fops, 0600, 0 },
@@ -3620,11 +3555,11 @@
 		{ "l2t", &t4_l2t_fops, 0400, 0},
 		{ "mps_tcam", &mps_tcam_debugfs_fops, 0400, 0 },
 		{ "rss", &rss_debugfs_fops, 0400, 0 },
-		{ "rss_config", &rss_config_debugfs_fops, 0400, 0 },
+		{ "rss_config", &rss_config_fops, 0400, 0 },
 		{ "rss_key", &rss_key_debugfs_fops, 0400, 0 },
 		{ "rss_pf_config", &rss_pf_config_debugfs_fops, 0400, 0 },
 		{ "rss_vf_config", &rss_vf_config_debugfs_fops, 0400, 0 },
-		{ "resources", &resources_debugfs_fops, 0400, 0 },
+		{ "resources", &resources_fops, 0400, 0 },
 #ifdef CONFIG_CHELSIO_T4_DCB
 		{ "dcb_info", &dcb_info_debugfs_fops, 0400, 0 },
 #endif
@@ -3643,18 +3578,18 @@
 		{ "obq_ncsi", &cim_obq_fops, 0400, 5 },
 		{ "tp_la", &tp_la_fops, 0400, 0 },
 		{ "ulprx_la", &ulprx_la_fops, 0400, 0 },
-		{ "sensors", &sensors_debugfs_fops, 0400, 0 },
+		{ "sensors", &sensors_fops, 0400, 0 },
 		{ "pm_stats", &pm_stats_debugfs_fops, 0400, 0 },
-		{ "tx_rate", &tx_rate_debugfs_fops, 0400, 0 },
-		{ "cctrl", &cctrl_tbl_debugfs_fops, 0400, 0 },
+		{ "tx_rate", &tx_rate_fops, 0400, 0 },
+		{ "cctrl", &cctrl_tbl_fops, 0400, 0 },
 #if IS_ENABLED(CONFIG_IPV6)
-		{ "clip_tbl", &clip_tbl_debugfs_fops, 0400, 0 },
+		{ "clip_tbl", &clip_tbl_fops, 0400, 0 },
 #endif
-		{ "tids", &tid_info_debugfs_fops, 0400, 0},
+		{ "tids", &tid_info_fops, 0400, 0},
 		{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
 		{ "meminfo", &meminfo_fops, 0400, 0 },
-		{ "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
-		{ "tp_stats", &tp_stats_debugfs_fops, 0400, 0 },
+		{ "crypto", &chcr_stats_fops, 0400, 0 },
+		{ "tp_stats", &tp_stats_fops, 0400, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.
@@ -3706,8 +3641,8 @@
 		}
 	}
 
-	de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
-				      &flash_debugfs_fops, adap->params.sf_size);
+	debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
+				 &flash_debugfs_fops, adap->params.sf_size);
 	debugfs_create_bool("use_backdoor", 0600,
 			    adap->debugfs_root, &adap->use_bd);
 	debugfs_create_bool("trace_rss", 0600,

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
index 23f43a0..ba95e13 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h

@@ -37,19 +37,6 @@
 
 #include <linux/export.h>
 
-#define DEFINE_SIMPLE_DEBUGFS_FILE(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
-	return single_open(file, name##_show, inode->i_private); \
-} \
-static const struct file_operations name##_debugfs_fops = { \
-	.owner   = THIS_MODULE, \
-	.open    = name##_open, \
-	.read    = seq_read, \
-	.llseek  = seq_lseek, \
-	.release = single_release \
-}
-
 struct t4_debugfs_entry {
 	const char *name;
 	const struct file_operations *ops;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index d07230c..76538f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c

@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Copyright (C) 2013-2015 Chelsio Communications.  All rights reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms and conditions of the GNU General Public License,
- *  version 2, as published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- *  more details.
- *
- *  The full GNU General Public License is included in this distribution in
- *  the file called "COPYING".
- *
  */
 
 #include <linux/firmware.h>
@@ -442,12 +430,14 @@
  *	Link Mode Mask.
  */
 static void fw_caps_to_lmm(enum fw_port_type port_type,
-			   unsigned int fw_caps,
+			   fw_port_cap32_t fw_caps,
 			   unsigned long *link_mode_mask)
 {
 	#define SET_LMM(__lmm_name) \
-		__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
-			  link_mode_mask)
+		do { \
+			__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
+				  link_mode_mask); \
+		} while (0)
 
 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
 		do { \
@@ -541,7 +531,7 @@
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
 		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
-		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
@@ -552,6 +542,13 @@
 		break;
 	}
 
+	if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
+		FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
+		FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
+	} else {
+		SET_LMM(FEC_NONE);
+	}
+
 	FW_CAPS_TO_LMM(ANEG, Autoneg);
 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
@@ -623,7 +620,10 @@
 
 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
 		       link_ksettings->link_modes.supported);
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
+	fw_caps_to_lmm(pi->port_type,
+		       t4_link_acaps(pi->adapter,
+				     pi->lport,
+				     &pi->link_cfg),
 		       link_ksettings->link_modes.advertising);
 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
 		       link_ksettings->link_modes.lp_advertising);
@@ -633,22 +633,6 @@
 		       : SPEED_UNKNOWN);
 	base->duplex = DUPLEX_FULL;
 
-	if (pi->link_cfg.fc & PAUSE_RX) {
-		if (pi->link_cfg.fc & PAUSE_TX) {
-			ethtool_link_ksettings_add_link_mode(link_ksettings,
-							     advertising,
-							     Pause);
-		} else {
-			ethtool_link_ksettings_add_link_mode(link_ksettings,
-							     advertising,
-							     Asym_Pause);
-		}
-	} else if (pi->link_cfg.fc & PAUSE_TX) {
-		ethtool_link_ksettings_add_link_mode(link_ksettings,
-						     advertising,
-						     Asym_Pause);
-	}
-
 	base->autoneg = pi->link_cfg.autoneg;
 	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -679,18 +663,15 @@
 	    base->autoneg == AUTONEG_DISABLE) {
 		fw_caps = speed_to_fw_caps(base->speed);
 
-		/* Must only specify a single speed which must be supported
-		 * as part of the Physical Port Capabilities.
-		 */
-		if ((fw_caps & (fw_caps - 1)) != 0 ||
-		    !(lc->pcaps & fw_caps))
+		/* Speed must be supported by Physical Port Capabilities. */
+		if (!(lc->pcaps & fw_caps))
 			return -EINVAL;
 
 		lc->speed_caps = fw_caps;
 		lc->acaps = fw_caps;
 	} else {
 		fw_caps =
-			 lmm_to_fw_caps(link_ksettings->link_modes.advertising);
+			lmm_to_fw_caps(link_ksettings->link_modes.advertising);
 		if (!(lc->pcaps & fw_caps))
 			return -EINVAL;
 		lc->speed_caps = 0;
@@ -869,7 +850,7 @@
 	    e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
 		return -EINVAL;
 
-	if (adapter->flags & FULL_INIT_DONE)
+	if (adapter->flags & CXGB4_FULL_INIT_DONE)
 		return -EBUSY;
 
 	for (i = 0; i < pi->nqsets; ++i) {
@@ -926,11 +907,190 @@
 	return q->rspq.adaptive_rx;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+/* Return the current global Adapter SGE Doorbell Queue Timer Tick for all
+ * Ethernet TX Queues.
+ */
+static int get_dbqtimer_tick(struct net_device *dev)
 {
-	set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
-	return set_rx_intr_params(dev, c->rx_coalesce_usecs,
-				  c->rx_max_coalesced_frames);
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+
+	if (!(adap->flags & CXGB4_SGE_DBQ_TIMER))
+		return 0;
+
+	return adap->sge.dbqtimer_tick;
+}
+
+/* Return the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device.
+ */
+static int get_dbqtimer(struct net_device *dev)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge_eth_txq *txq;
+
+	txq = &adap->sge.ethtxq[pi->first_qset];
+
+	if (!(adap->flags & CXGB4_SGE_DBQ_TIMER))
+		return 0;
+
+	/* all of the TX Queues use the same Timer Index */
+	return adap->sge.dbqtimer_val[txq->dbqtimerix];
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues.  This is the fundamental "Tick" that sets the scale of values which
+ * can be used.  Individual Ethernet TX Queues index into a relatively small
+ * array of Tick Multipliers.  Changing the base Tick will thus change all of
+ * the resulting Timer Values associated with those multipliers for all
+ * Ethernet TX Queues.
+ */
+static int set_dbqtimer_tick(struct net_device *dev, int usecs)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge *s = &adap->sge;
+	u32 param, val;
+	int ret;
+
+	if (!(adap->flags & CXGB4_SGE_DBQ_TIMER))
+		return 0;
+
+	/* return early if it's the same Timer Tick we're already using */
+	if (s->dbqtimer_tick == usecs)
+		return 0;
+
+	/* attempt to set the new Timer Tick value */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+	val = usecs;
+	ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+	if (ret)
+		return ret;
+	s->dbqtimer_tick = usecs;
+
+	/* if successful, reread resulting dependent Timer values */
+	ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(s->dbqtimer_val),
+				    s->dbqtimer_val);
+	return ret;
+}
+
+/* Set the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device.  There is a relatively small array of
+ * possible Timer Values so we need to pick the closest value available.
+ */
+static int set_dbqtimer(struct net_device *dev, int usecs)
+{
+	int qix, timerix, min_timerix, delta, min_delta;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge *s = &adap->sge;
+	struct sge_eth_txq *txq;
+	u32 param, val;
+	int ret;
+
+	if (!(adap->flags & CXGB4_SGE_DBQ_TIMER))
+		return 0;
+
+	/* Find the SGE Doorbell Timer Value that's closest to the requested
+	 * value.
+	 */
+	min_delta = INT_MAX;
+	min_timerix = 0;
+	for (timerix = 0; timerix < ARRAY_SIZE(s->dbqtimer_val); timerix++) {
+		delta = s->dbqtimer_val[timerix] - usecs;
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			min_timerix = timerix;
+		}
+	}
+
+	/* Return early if it's the same Timer Index we're already using.
+	 * We use the same Timer Index for all of the TX Queues for an
+	 * interface so it's only necessary to check the first one.
+	 */
+	txq = &s->ethtxq[pi->first_qset];
+	if (txq->dbqtimerix == min_timerix)
+		return 0;
+
+	for (qix = 0; qix < pi->nqsets; qix++, txq++) {
+		if (adap->flags & CXGB4_FULL_INIT_DONE) {
+			param =
+			 (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+			  FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) |
+			  FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
+			val = min_timerix;
+			ret = t4_set_params(adap, adap->mbox, adap->pf, 0,
+					    1, &param, &val);
+			if (ret)
+				return ret;
+		}
+		txq->dbqtimerix = min_timerix;
+	}
+	return 0;
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues and the Timer Value for the Ethernet TX Queues associated with a
+ * Network Device.  Since changing the global Tick changes all of the
+ * available Timer Values, we need to do this first before selecting the
+ * resulting closest Timer Value.  Moreover, since the Tick is global,
+ * changing it affects the Timer Values for all Network Devices on the
+ * adapter.  So, before changing the Tick, we grab all of the current Timer
+ * Values for other Network Devices on this Adapter and then attempt to select
+ * new Timer Values which are close to the old values ...
+ */
+static int set_dbqtimer_tickval(struct net_device *dev,
+				int tick_usecs, int timer_usecs)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	int timer[MAX_NPORTS];
+	unsigned int port;
+	int ret;
+
+	/* Grab the other adapter Network Interface current timers and fill in
+	 * the new one for this Network Interface.
+	 */
+	for_each_port(adap, port)
+		if (port == pi->port_id)
+			timer[port] = timer_usecs;
+		else
+			timer[port] = get_dbqtimer(adap->port[port]);
+
+	/* Change the global Tick first ... */
+	ret = set_dbqtimer_tick(dev, tick_usecs);
+	if (ret)
+		return ret;
+
+	/* ... and then set all of the Network Interface Timer Values ... */
+	for_each_port(adap, port) {
+		ret = set_dbqtimer(adap->port[port], timer[port]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int set_coalesce(struct net_device *dev,
+			struct ethtool_coalesce *coalesce)
+{
+	int ret;
+
+	set_adaptive_rx_setting(dev, coalesce->use_adaptive_rx_coalesce);
+
+	ret = set_rx_intr_params(dev, coalesce->rx_coalesce_usecs,
+				 coalesce->rx_max_coalesced_frames);
+	if (ret)
+		return ret;
+
+	return set_dbqtimer_tickval(dev,
+				    coalesce->tx_coalesce_usecs_irq,
+				    coalesce->tx_coalesce_usecs);
 }
 
 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
@@ -943,6 +1103,8 @@
 	c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ?
 		adap->sge.counter_val[rq->pktcnt_idx] : 0;
 	c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
+	c->tx_coalesce_usecs_irq = get_dbqtimer_tick(dev);
+	c->tx_coalesce_usecs = get_dbqtimer(dev);
 	return 0;
 }
 
@@ -1076,7 +1238,7 @@
 	 * firmware image otherwise we'll try to do the entire job from the
 	 * host ... and we always "force" the operation in this path.
 	 */
-	if (adap->flags & FULL_INIT_DONE)
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
 		mbox = adap->mbox;
 
 	ret = t4_fw_upgrade(adap, mbox, fw->data, fw->size, 1);
@@ -1155,7 +1317,7 @@
 		return 0;
 
 	/* Interface must be brought up atleast once */
-	if (pi->adapter->flags & FULL_INIT_DONE) {
+	if (pi->adapter->flags & CXGB4_FULL_INIT_DONE) {
 		for (i = 0; i < pi->rss_size; i++)
 			pi->rss[i] = p[i];
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
index 6c8a62e..33b2c0c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c

@@ -74,7 +74,7 @@
 	if (is_t4(adap->params.chip))
 		return -EINVAL;
 
-	if (!(adap->flags & FULL_INIT_DONE))
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
 		return -EINVAL;
 
 	dev_info(adap->pdev_dev, "Enabling FCoE offload features\n");

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 7dddb9e..43b0f8c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c

@@ -248,8 +248,9 @@
 	u32 fconf, iconf;
 
 	/* Check for unconfigured fields being used. */
-	fconf = adapter->params.tp.vlan_pri_map;
 	iconf = adapter->params.tp.ingress_config;
+	fconf = fs->hash ? adapter->params.tp.filter_mask :
+			   adapter->params.tp.vlan_pri_map;
 
 	if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
 	    unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
@@ -524,8 +525,7 @@
 		return -ENOMEM;
 
 	fwr = __skb_put(skb, len);
-	t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1
-			: adapter->sge.fw_evtq.abs_id);
+	t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
 
 	/* Mark the filter as "pending" and ship off the Filter Work Request.
 	 * When we get the Work Request Reply we'll clear the pending status.
@@ -727,10 +727,8 @@
 		cxgb4_smt_release(f->smt);
 
 	if (f->fs.val.encap_vld && f->fs.val.ovlan_vld)
-		if (atomic_dec_and_test(&adap->mps_encap[f->fs.val.ovlan &
-							 0x1ff].refcnt))
-			t4_free_encap_mac_filt(adap, pi->viid,
-					       f->fs.val.ovlan & 0x1ff, 0);
+		t4_free_encap_mac_filt(adap, pi->viid,
+				       f->fs.val.ovlan & 0x1ff, 0);
 
 	if ((f->fs.hash || is_t6(adap->params.chip)) && f->fs.type)
 		cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
@@ -744,16 +742,40 @@
 
 void clear_all_filters(struct adapter *adapter)
 {
+	struct net_device *dev = adapter->port[0];
 	unsigned int i;
 
 	if (adapter->tids.ftid_tab) {
 		struct filter_entry *f = &adapter->tids.ftid_tab[0];
 		unsigned int max_ftid = adapter->tids.nftids +
 					adapter->tids.nsftids;
-
+		/* Clear all TCAM filters */
 		for (i = 0; i < max_ftid; i++, f++)
 			if (f->valid || f->pending)
-				clear_filter(adapter, f);
+				cxgb4_del_filter(dev, i, &f->fs);
+	}
+
+	/* Clear all hash filters */
+	if (is_hashfilter(adapter) && adapter->tids.tid_tab) {
+		struct filter_entry *f;
+		unsigned int sb;
+
+		for (i = adapter->tids.hash_base;
+		     i <= adapter->tids.ntids; i++) {
+			f = (struct filter_entry *)
+				adapter->tids.tid_tab[i];
+
+			if (f && (f->valid || f->pending))
+				cxgb4_del_filter(dev, i, &f->fs);
+		}
+
+		sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
+		for (i = 0; i < sb; i++) {
+			f = (struct filter_entry *)adapter->tids.tid_tab[i];
+
+			if (f && (f->valid || f->pending))
+				cxgb4_del_filter(dev, i, &f->fs);
+		}
 	}
 }
 
@@ -1018,7 +1040,7 @@
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1058,7 +1080,7 @@
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1153,7 +1175,6 @@
 			if (ret < 0)
 				goto free_atid;
 
-			atomic_inc(&adapter->mps_encap[ret].refcnt);
 			f->fs.val.ovlan = ret;
 			f->fs.mask.ovlan = 0xffff;
 			f->fs.val.ovlan_vld = 1;
@@ -1396,7 +1417,6 @@
 			if (ret < 0)
 				goto free_clip;
 
-			atomic_inc(&adapter->mps_encap[ret].refcnt);
 			f->fs.val.ovlan = ret;
 			f->fs.mask.ovlan = 0x1ff;
 			f->fs.val.ovlan_vld = 1;
@@ -1568,9 +1588,8 @@
 	struct filter_ctx ctx;
 	int ret;
 
-	/* If we are shutting down the adapter do not wait for completion */
-	if (netdev2adap(dev)->flags & SHUTTING_DOWN)
-		return __cxgb4_del_filter(dev, filter_id, fs, NULL);
+	if (netdev2adap(dev)->flags & CXGB4_SHUTTING_DOWN)
+		return 0;
 
 	init_completion(&ctx.completion);
 
@@ -1722,12 +1741,13 @@
 		break;
 
 	default:
-		dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n",
-			__func__, status);
+		if (status != CPL_ERR_TCAM_FULL)
+			dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n",
+				__func__, status);
 
 		if (ctx) {
 			if (status == CPL_ERR_TCAM_FULL)
-				ctx->result = -EAGAIN;
+				ctx->result = -ENOSPC;
 			else
 				ctx->result = -EINVAL;
 		}
@@ -1810,24 +1830,38 @@
 	}
 }
 
-int init_hash_filter(struct adapter *adap)
+void init_hash_filter(struct adapter *adap)
 {
+	u32 reg;
+
 	/* On T6, verify the necessary register configs and warn the user in
 	 * case of improper config
 	 */
 	if (is_t6(adap->params.chip)) {
-		if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4)
-			goto err;
+		if (is_offload(adap)) {
+			if (!(t4_read_reg(adap, TP_GLOBAL_CONFIG_A)
+			   & ACTIVEFILTERCOUNTS_F)) {
+				dev_err(adap->pdev_dev, "Invalid hash filter + ofld config\n");
+				return;
+			}
+		} else {
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_0_A);
+			if (TCAM_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
 
-		if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4)
-			goto err;
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_1_A);
+			if (HASH_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
+		}
+
 	} else {
 		dev_err(adap->pdev_dev, "Hash filter supported only on T6\n");
-		return -EINVAL;
+		return;
 	}
+
 	adap->params.hash_filter = 1;
-	return 0;
-err:
-	dev_warn(adap->pdev_dev, "Invalid hash filter config!\n");
-	return -EINVAL;
 }

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index 8db5fca..b0751c0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h

@@ -50,7 +50,7 @@
 
 int writable_filter(struct filter_entry *f);
 void clear_all_filters(struct adapter *adapter);
-int init_hash_filter(struct adapter *adap);
+void init_hash_filter(struct adapter *adap);
 bool is_filter_exact_match(struct adapter *adap,
 			   struct ch_filter_specification *fs);
 #endif /* __CXGB4_FILTER_H */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 961e308..3802487 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -62,7 +62,6 @@
 #include <net/netevent.h>
 #include <net/addrconf.h>
 #include <net/bonding.h>
-#include <net/addrconf.h>
 #include <linux/uaccess.h>
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
@@ -367,13 +366,19 @@
 	int ret;
 	u64 mhash = 0;
 	u64 uhash = 0;
+	/* idx stores the index of allocated filters,
+	 * its size should be modified based on the number of
+	 * MAC addresses that we allocate filters for
+	 */
+
+	u16 idx[1] = {};
 	bool free = false;
 	bool ucast = is_unicast_ether_addr(mac_addr);
 	const u8 *maclist[1] = {mac_addr};
 	struct hash_mac_addr *new_entry;
 
-	ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist,
-				NULL, ucast ? &uhash : &mhash, false);
+	ret = cxgb4_alloc_mac_filt(adap, pi->viid, free, 1, maclist,
+				   idx, ucast ? &uhash : &mhash, false);
 	if (ret < 0)
 		goto out;
 	/* if hash != 0, then add the addr to hash addr list
@@ -411,7 +416,7 @@
 		}
 	}
 
-	ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false);
+	ret = cxgb4_free_mac_filt(adap, pi->viid, 1, maclist, false);
 	return ret < 0 ? -EINVAL : 0;
 }
 
@@ -434,6 +439,60 @@
 }
 
 /**
+ *	cxgb4_change_mac - Update match filter for a MAC address.
+ *	@pi: the port_info
+ *	@viid: the VI id
+ *	@tcam_idx: TCAM index of existing filter for old value of MAC address,
+ *		   or -1
+ *	@addr: the new MAC address value
+ *	@persist: whether a new MAC allocation should be persistent
+ *	@add_smt: if true also add the address to the HW SMT
+ *
+ *	Modifies an MPS filter and sets it to the new MAC address if
+ *	@tcam_idx >= 0, or adds the MAC address to a new filter if
+ *	@tcam_idx < 0. In the latter case the address is added persistently
+ *	if @persist is %true.
+ *	Addresses are programmed to hash region, if tcam runs out of entries.
+ *
+ */
+int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
+		     int *tcam_idx, const u8 *addr, bool persist,
+		     u8 *smt_idx)
+{
+	struct adapter *adapter = pi->adapter;
+	struct hash_mac_addr *entry, *new_entry;
+	int ret;
+
+	ret = t4_change_mac(adapter, adapter->mbox, viid,
+			    *tcam_idx, addr, persist, smt_idx);
+	/* We ran out of TCAM entries. try programming hash region. */
+	if (ret == -ENOMEM) {
+		/* If the MAC address to be updated is in the hash addr
+		 * list, update it from the list
+		 */
+		list_for_each_entry(entry, &adapter->mac_hlist, list) {
+			if (entry->iface_mac) {
+				ether_addr_copy(entry->addr, addr);
+				goto set_hash;
+			}
+		}
+		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
+		if (!new_entry)
+			return -ENOMEM;
+		ether_addr_copy(new_entry->addr, addr);
+		new_entry->iface_mac = true;
+		list_add_tail(&new_entry->list, &adapter->mac_hlist);
+set_hash:
+		ret = cxgb4_set_addr_hash(pi);
+	} else if (ret >= 0) {
+		*tcam_idx = ret;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/*
  *	link_start - enable a port
  *	@dev: the port to enable
  *
@@ -451,15 +510,9 @@
 	 */
 	ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
 			    !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
-	if (ret == 0) {
-		ret = t4_change_mac(pi->adapter, mb, pi->viid,
-				    pi->xact_addr_filt, dev->dev_addr, true,
-				    true);
-		if (ret >= 0) {
-			pi->xact_addr_filt = ret;
-			ret = 0;
-		}
-	}
+	if (ret == 0)
+		ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
+					    dev->dev_addr, true, &pi->smt_idx);
 	if (ret == 0)
 		ret = t4_link_l1cfg(pi->adapter, mb, pi->tx_chan,
 				    &pi->link_cfg);
@@ -528,7 +581,7 @@
 			struct sge_eth_txq *eq;
 
 			eq = container_of(txq, struct sge_eth_txq, q);
-			netif_tx_wake_queue(eq->txq);
+			t4_sge_eth_txq_egress_update(q->adap, eq, -1);
 		} else {
 			struct sge_uld_txq *oq;
 
@@ -604,12 +657,12 @@
 
 static void disable_msi(struct adapter *adapter)
 {
-	if (adapter->flags & USING_MSIX) {
+	if (adapter->flags & CXGB4_USING_MSIX) {
 		pci_disable_msix(adapter->pdev);
-		adapter->flags &= ~USING_MSIX;
-	} else if (adapter->flags & USING_MSI) {
+		adapter->flags &= ~CXGB4_USING_MSIX;
+	} else if (adapter->flags & CXGB4_USING_MSI) {
 		pci_disable_msi(adapter->pdev);
-		adapter->flags &= ~USING_MSI;
+		adapter->flags &= ~CXGB4_USING_MSI;
 	}
 }
 
@@ -625,7 +678,7 @@
 		adap->swintr = 1;
 		t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A), v);
 	}
-	if (adap->flags & MASTER_PF)
+	if (adap->flags & CXGB4_MASTER_PF)
 		t4_slow_intr_handler(adap);
 	return IRQ_HANDLED;
 }
@@ -655,9 +708,38 @@
 	}
 }
 
+int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
+		       cpumask_var_t *aff_mask, int idx)
+{
+	int rv;
+
+	if (!zalloc_cpumask_var(aff_mask, GFP_KERNEL)) {
+		dev_err(adap->pdev_dev, "alloc_cpumask_var failed\n");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(idx, dev_to_node(adap->pdev_dev)),
+			*aff_mask);
+
+	rv = irq_set_affinity_hint(vec, *aff_mask);
+	if (rv)
+		dev_warn(adap->pdev_dev,
+			 "irq_set_affinity_hint %u failed %d\n",
+			 vec, rv);
+
+	return 0;
+}
+
+void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask)
+{
+	irq_set_affinity_hint(vec, NULL);
+	free_cpumask_var(aff_mask);
+}
+
 static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
+	struct msix_info *minfo;
 	int err, ethqidx;
 	int msi_index = 2;
 
@@ -667,32 +749,77 @@
 		return err;
 
 	for_each_ethrxq(s, ethqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
+		minfo = &adap->msix_info[msi_index];
+		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
+				  minfo->desc,
 				  &s->ethrxq[ethqidx].rspq);
 		if (err)
 			goto unwind;
+
+		cxgb4_set_msix_aff(adap, minfo->vec,
+				   &minfo->aff_mask, ethqidx);
 		msi_index++;
 	}
 	return 0;
 
 unwind:
-	while (--ethqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->ethrxq[ethqidx].rspq);
+	while (--ethqidx >= 0) {
+		msi_index--;
+		minfo = &adap->msix_info[msi_index];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
+		free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq);
+	}
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
 	return err;
 }
 
 static void free_msix_queue_irqs(struct adapter *adap)
 {
-	int i, msi_index = 2;
 	struct sge *s = &adap->sge;
+	struct msix_info *minfo;
+	int i, msi_index = 2;
 
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
-	for_each_ethrxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
+	for_each_ethrxq(s, i) {
+		minfo = &adap->msix_info[msi_index++];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
+		free_irq(minfo->vec, &s->ethrxq[i].rspq);
+	}
+}
+
+static int setup_ppod_edram(struct adapter *adap)
+{
+	unsigned int param, val;
+	int ret;
+
+	/* Driver sends FW_PARAMS_PARAM_DEV_PPOD_EDRAM read command to check
+	 * if firmware supports ppod edram feature or not. If firmware
+	 * returns 1, then driver can enable this feature by sending
+	 * FW_PARAMS_PARAM_DEV_PPOD_EDRAM write command with value 1 to
+	 * enable ppod edram feature.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PPOD_EDRAM));
+
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+	if (ret < 0) {
+		dev_warn(adap->pdev_dev,
+			 "querying PPOD_EDRAM support failed: %d\n",
+			 ret);
+		return -1;
+	}
+
+	if (val != 1)
+		return -1;
+
+	ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+	if (ret < 0) {
+		dev_err(adap->pdev_dev,
+			"setting PPOD_EDRAM failed: %d\n", ret);
+		return -1;
+	}
+	return 0;
 }
 
 /**
@@ -790,9 +917,9 @@
 /* Disable interrupt and napi handler */
 static void disable_interrupts(struct adapter *adap)
 {
-	if (adap->flags & FULL_INIT_DONE) {
+	if (adap->flags & CXGB4_FULL_INIT_DONE) {
 		t4_intr_disable(adap);
-		if (adap->flags & USING_MSIX) {
+		if (adap->flags & CXGB4_USING_MSIX) {
 			free_msix_queue_irqs(adap);
 			free_irq(adap->msix_info[0].vec, adap);
 		} else {
@@ -833,7 +960,7 @@
 	bitmap_zero(s->starving_fl, s->egr_sz);
 	bitmap_zero(s->txq_maperr, s->egr_sz);
 
-	if (adap->flags & USING_MSIX)
+	if (adap->flags & CXGB4_USING_MSIX)
 		adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
 	else {
 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
@@ -886,10 +1013,13 @@
 			q->rspq.idx = j;
 			memset(&q->stats, 0, sizeof(q->stats));
 		}
-		for (j = 0; j < pi->nqsets; j++, t++) {
+
+		q = &s->ethrxq[pi->first_qset];
+		for (j = 0; j < pi->nqsets; j++, t++, q++) {
 			err = t4_sge_alloc_eth_txq(adap, t, dev,
 					netdev_get_tx_queue(dev, j),
-					s->fw_evtq.cntxt_id);
+					q->rspq.cntxt_id,
+					!!(adap->flags & CXGB4_SGE_DBQ_TIMER));
 			if (err)
 				goto freeout;
 		}
@@ -911,7 +1041,7 @@
 	if (!is_t4(adap->params.chip)) {
 		err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0],
 					   netdev_get_tx_queue(adap->port[0], 0)
-					   , s->fw_evtq.cntxt_id);
+					   , s->fw_evtq.cntxt_id, false);
 		if (err)
 			goto freeout;
 	}
@@ -929,8 +1059,7 @@
 }
 
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     struct net_device *sb_dev,
-			     select_queue_fallback_t fallback)
+			     struct net_device *sb_dev)
 {
 	int txq;
 
@@ -972,7 +1101,7 @@
 		return txq;
 	}
 
-	return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
+	return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues;
 }
 
 static int closest_timer(const struct sge *s, int time)
@@ -1586,28 +1715,6 @@
 EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
 
 /**
- *	cxgb4_tp_smt_idx - Get the Source Mac Table index for this VI
- *	@chip: chip type
- *	@viid: VI id of the given port
- *
- *	Return the SMT index for this VI.
- */
-unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid)
-{
-	/* In T4/T5, SMT contains 256 SMAC entries organized in
-	 * 128 rows of 2 entries each.
-	 * In T6, SMT contains 256 SMAC entries in 256 rows.
-	 * TODO: The below code needs to be updated when we add support
-	 * for 256 VFs.
-	 */
-	if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
-		return ((viid & 0x7f) << 1);
-	else
-		return (viid & 0x7f);
-}
-EXPORT_SYMBOL(cxgb4_tp_smt_idx);
-
-/**
  *	cxgb4_port_chan - get the HW channel of a port
  *	@dev: the net device for the port
  *
@@ -1619,6 +1726,18 @@
 }
 EXPORT_SYMBOL(cxgb4_port_chan);
 
+/**
+ *      cxgb4_port_e2cchan - get the HW c-channel of a port
+ *      @dev: the net device for the port
+ *
+ *      Return the HW RX c-channel of the given port.
+ */
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev)
+{
+	return netdev2pinfo(dev)->rx_cchan;
+}
+EXPORT_SYMBOL(cxgb4_port_e2cchan);
+
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
 {
 	struct adapter *adap = netdev2adap(dev);
@@ -2252,7 +2371,7 @@
 	if (err)
 		goto freeq;
 
-	if (adap->flags & USING_MSIX) {
+	if (adap->flags & CXGB4_USING_MSIX) {
 		name_msix_vecs(adap);
 		err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
 				  adap->msix_info[0].desc, adap);
@@ -2265,7 +2384,8 @@
 		}
 	} else {
 		err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
-				  (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
+				  (adap->flags & CXGB4_USING_MSI) ? 0
+								  : IRQF_SHARED,
 				  adap->port[0]->name, adap);
 		if (err)
 			goto irq_err;
@@ -2274,15 +2394,13 @@
 	enable_rx(adap);
 	t4_sge_start(adap);
 	t4_intr_enable(adap);
-	adap->flags |= FULL_INIT_DONE;
+	adap->flags |= CXGB4_FULL_INIT_DONE;
 	mutex_unlock(&uld_mutex);
 
 	notify_ulds(adap, CXGB4_STATE_UP);
 #if IS_ENABLED(CONFIG_IPV6)
 	update_clip(adap);
 #endif
-	/* Initialize hash mac addr list*/
-	INIT_LIST_HEAD(&adap->mac_hlist);
 	return err;
 
  irq_err:
@@ -2304,7 +2422,8 @@
 
 	t4_sge_stop(adapter);
 	t4_free_sge_resources(adapter);
-	adapter->flags &= ~FULL_INIT_DONE;
+
+	adapter->flags &= ~CXGB4_FULL_INIT_DONE;
 }
 
 /*
@@ -2318,7 +2437,7 @@
 
 	netif_carrier_off(dev);
 
-	if (!(adapter->flags & FULL_INIT_DONE)) {
+	if (!(adapter->flags & CXGB4_FULL_INIT_DONE)) {
 		err = cxgb_up(adapter);
 		if (err < 0)
 			return err;
@@ -2670,7 +2789,7 @@
 
 	for (vf = 0, nvfs = pci_sriov_get_totalvfs(adap->pdev);
 		vf < nvfs; vf++) {
-		macaddr[5] = adap->pf * 16 + vf;
+		macaddr[5] = adap->pf * nvfs + vf;
 		ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, macaddr);
 	}
 }
@@ -2713,6 +2832,7 @@
 	ivi->min_tx_rate = 0;
 	ether_addr_copy(ivi->mac, vfinfo->vf_mac_addr);
 	ivi->vlan = vfinfo->vlan;
+	ivi->linkstate = vfinfo->link_state;
 	return 0;
 }
 
@@ -2749,6 +2869,27 @@
 		return -EINVAL;
 	}
 
+	if (max_tx_rate == 0) {
+		/* unbind VF to to any Traffic Class */
+		fw_pfvf =
+		    (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
+		     FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH));
+		fw_class = 0xffffffff;
+		ret = t4_set_params(adap, adap->mbox, adap->pf, vf + 1, 1,
+				    &fw_pfvf, &fw_class);
+		if (ret) {
+			dev_err(adap->pdev_dev,
+				"Err %d in unbinding PF %d VF %d from TX Rate Limiting\n",
+				ret, adap->pf, vf);
+			return -EINVAL;
+		}
+		dev_info(adap->pdev_dev,
+			 "PF %d VF %d is unbound from TX Rate Limiting\n",
+			 adap->pf, vf);
+		adap->vfinfo[vf].tx_rate = 0;
+		return 0;
+	}
+
 	ret = t4_get_link_params(pi, &link_ok, &speed, &mtu);
 	if (ret != FW_SUCCESS) {
 		dev_err(adap->pdev_dev,
@@ -2798,8 +2939,8 @@
 			    &fw_class);
 	if (ret) {
 		dev_err(adap->pdev_dev,
-			"Err %d in binding VF %d to Traffic Class %d\n",
-			ret, vf, class_id);
+			"Err %d in binding PF %d VF %d to Traffic Class %d\n",
+			ret, adap->pf, vf, class_id);
 		return -EINVAL;
 	}
 	dev_info(adap->pdev_dev, "PF %d VF %d is bound to Class %d\n",
@@ -2831,6 +2972,49 @@
 		ret, (vlan ? "setting" : "clearing"), adap->pf, vf);
 	return ret;
 }
+
+static int cxgb4_mgmt_set_vf_link_state(struct net_device *dev, int vf,
+					int link)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	u32 param, val;
+	int ret = 0;
+
+	if (vf >= adap->num_vfs)
+		return -EINVAL;
+
+	switch (link) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		val = FW_VF_LINK_STATE_AUTO;
+		break;
+
+	case IFLA_VF_LINK_STATE_ENABLE:
+		val = FW_VF_LINK_STATE_ENABLE;
+		break;
+
+	case IFLA_VF_LINK_STATE_DISABLE:
+		val = FW_VF_LINK_STATE_DISABLE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_LINK_STATE));
+	ret = t4_set_params(adap, adap->mbox, adap->pf, vf + 1, 1,
+			    &param, &val);
+	if (ret) {
+		dev_err(adap->pdev_dev,
+			"Error %d in setting PF %d VF %d link state\n",
+			ret, adap->pf, vf);
+		return -EINVAL;
+	}
+
+	adap->vfinfo[vf].link_state = link;
+	return ret;
+}
 #endif /* CONFIG_PCI_IOV */
 
 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
@@ -2842,8 +3026,8 @@
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = t4_change_mac(pi->adapter, pi->adapter->pf, pi->viid,
-			    pi->xact_addr_filt, addr->sa_data, true, true);
+	ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
+				    addr->sa_data, true, &pi->smt_idx);
 	if (ret < 0)
 		return ret;
 
@@ -2858,7 +3042,7 @@
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adap = pi->adapter;
 
-	if (adap->flags & USING_MSIX) {
+	if (adap->flags & CXGB4_USING_MSIX) {
 		int i;
 		struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
 
@@ -2885,7 +3069,7 @@
 	if (index < 0 || index > pi->nqsets - 1)
 		return -EINVAL;
 
-	if (!(adap->flags & FULL_INIT_DONE)) {
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
 		dev_err(adap->pdev_dev,
 			"Failed to rate limit on queue %d. Link Down?\n",
 			index);
@@ -2951,14 +3135,14 @@
 }
 
 static int cxgb_setup_tc_flower(struct net_device *dev,
-				struct tc_cls_flower_offload *cls_flower)
+				struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return cxgb4_tc_flower_replace(dev, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return cxgb4_tc_flower_destroy(dev, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return cxgb4_tc_flower_stats(dev, cls_flower);
 	default:
 		return -EOPNOTSUPP;
@@ -2986,7 +3170,7 @@
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
 
-	if (!(adap->flags & FULL_INIT_DONE)) {
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
 		dev_err(adap->pdev_dev,
 			"Failed to setup tc on port %d. Link Down?\n",
 			pi->port_id);
@@ -3006,32 +3190,19 @@
 	}
 }
 
-static int cxgb_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct port_info *pi = netdev2pinfo(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
-					     pi, dev, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(cxgb_block_cb_list);
 
 static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
+	struct port_info *pi = netdev2pinfo(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return cxgb_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &cxgb_block_cb_list,
+						  cxgb_setup_tc_block_cb,
+						  pi, dev, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3095,8 +3266,6 @@
 				    i);
 			return;
 		}
-		atomic_dec(&adapter->mps_encap[adapter->rawf_start +
-			   pi->port_id].refcnt);
 	}
 }
 
@@ -3185,7 +3354,6 @@
 			cxgb_del_udp_tunnel(netdev, ti);
 			return;
 		}
-		atomic_inc(&adapter->mps_encap[ret].refcnt);
 	}
 }
 
@@ -3246,12 +3414,13 @@
 
 #ifdef CONFIG_PCI_IOV
 static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
-	.ndo_open             = cxgb4_mgmt_open,
-	.ndo_set_vf_mac       = cxgb4_mgmt_set_vf_mac,
-	.ndo_get_vf_config    = cxgb4_mgmt_get_vf_config,
-	.ndo_set_vf_rate      = cxgb4_mgmt_set_vf_rate,
-	.ndo_get_phys_port_id = cxgb4_mgmt_get_phys_port_id,
-	.ndo_set_vf_vlan      = cxgb4_mgmt_set_vf_vlan,
+	.ndo_open               = cxgb4_mgmt_open,
+	.ndo_set_vf_mac         = cxgb4_mgmt_set_vf_mac,
+	.ndo_get_vf_config      = cxgb4_mgmt_get_vf_config,
+	.ndo_set_vf_rate        = cxgb4_mgmt_set_vf_rate,
+	.ndo_get_phys_port_id   = cxgb4_mgmt_get_phys_port_id,
+	.ndo_set_vf_vlan        = cxgb4_mgmt_set_vf_vlan,
+	.ndo_set_vf_link_state	= cxgb4_mgmt_set_vf_link_state,
 };
 #endif
 
@@ -3812,14 +3981,14 @@
  */
 static int adap_init0_config(struct adapter *adapter, int reset)
 {
-	struct fw_caps_config_cmd caps_cmd;
-	const struct firmware *cf;
-	unsigned long mtype = 0, maddr = 0;
-	u32 finiver, finicsum, cfcsum;
-	int ret;
-	int config_issued = 0;
 	char *fw_config_file, fw_config_file_path[256];
+	u32 finiver, finicsum, cfcsum, param, val;
+	struct fw_caps_config_cmd caps_cmd;
+	unsigned long mtype = 0, maddr = 0;
+	const struct firmware *cf;
 	char *config_name = NULL;
+	int config_issued = 0;
+	int ret;
 
 	/*
 	 * Reset device if necessary.
@@ -3927,6 +4096,24 @@
 			goto bye;
 	}
 
+	val = 0;
+
+	/* Ofld + Hash filter is supported. Older fw will fail this request and
+	 * it is fine.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD));
+	ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+			    1, &param, &val);
+
+	/* FW doesn't know about Hash filter + ofld support,
+	 * it's not a problem, don't return an error.
+	 */
+	if (ret < 0) {
+		dev_warn(adapter->pdev_dev,
+			 "Hash filter with ofld is not supported by FW\n");
+	}
+
 	/*
 	 * Issue a Capability Configuration command to the firmware to get it
 	 * to parse the Configuration File.  We don't use t4_fw_config_file()
@@ -4003,6 +4190,13 @@
 		dev_err(adapter->pdev_dev,
 			"HMA configuration failed with error %d\n", ret);
 
+	if (is_t6(adapter->params.chip)) {
+		ret = setup_ppod_edram(adapter);
+		if (!ret)
+			dev_info(adapter->pdev_dev, "Successfully enabled "
+				 "ppod edram feature\n");
+	}
+
 	/*
 	 * And finally tell the firmware to initialize itself using the
 	 * parameters from the Configuration File.
@@ -4117,7 +4311,7 @@
 		return ret;
 	}
 	if (ret == adap->mbox)
-		adap->flags |= MASTER_PF;
+		adap->flags |= CXGB4_MASTER_PF;
 
 	/*
 	 * If we're the Master PF Driver and the device is uninitialized,
@@ -4132,7 +4326,7 @@
 	/* If firmware is too old (not supported by driver) force an update. */
 	if (ret)
 		state = DEV_STATE_UNINIT;
-	if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) {
+	if ((adap->flags & CXGB4_MASTER_PF) && state != DEV_STATE_INIT) {
 		struct fw_info *fw_info;
 		struct fw_hdr *card_fw;
 		const struct firmware *fw;
@@ -4194,7 +4388,7 @@
 				ret);
 		dev_info(adap->pdev_dev, "Coming up as %s: "\
 			 "Adapter already initialized\n",
-			 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
+			 adap->flags & CXGB4_MASTER_PF ? "MASTER" : "SLAVE");
 	} else {
 		dev_info(adap->pdev_dev, "Coming up as MASTER: "\
 			 "Initializing adapter\n");
@@ -4280,6 +4474,24 @@
 	if (ret < 0)
 		goto bye;
 
+	/* Grab the SGE Doorbell Queue Timer values.  If successful, that
+	 * indicates that the Firmware and Hardware support this.
+	 */
+	params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		    FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+			      1, params, val);
+
+	if (!ret) {
+		adap->sge.dbqtimer_tick = val[0];
+		ret = t4_read_sge_dbqtimers(adap,
+					    ARRAY_SIZE(adap->sge.dbqtimer_val),
+					    adap->sge.dbqtimer_val);
+	}
+
+	if (!ret)
+		adap->flags |= CXGB4_SGE_DBQ_TIMER;
+
 	if (is_bypass_device(adap->pdev->device))
 		adap->params.bypass = 1;
 
@@ -4402,7 +4614,7 @@
 	 * offload connection through firmware work request
 	 */
 	if ((val[0] != val[1]) && (ret >= 0)) {
-		adap->flags |= FW_OFLD_CONN;
+		adap->flags |= CXGB4_FW_OFLD_CONN;
 		adap->tids.aftid_base = val[0];
 		adap->tids.aftid_end = val[1];
 	}
@@ -4447,6 +4659,15 @@
 		adap->params.filter2_wr_support = (ret == 0 && val[0] != 0);
 	}
 
+	/* Check if FW supports returning vin and smt index.
+	 * If this is not supported, driver will interpret
+	 * these values from viid.
+	 */
+	params[0] = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+			      1, params, val);
+	adap->params.viid_smt_extn_support = (ret == 0 && val[0] != 0);
+
 	/*
 	 * Get device capabilities so we can determine what resources we need
 	 * to manage.
@@ -4460,6 +4681,13 @@
 	if (ret < 0)
 		goto bye;
 
+	/* hash filter has some mandatory register settings to be tested and for
+	 * that it needs to test whether offload is enabled or not, hence
+	 * checking and setting it here.
+	 */
+	if (caps_cmd.ofldcaps)
+		adap->params.offload = 1;
+
 	if (caps_cmd.ofldcaps ||
 	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
 		/* query offload-related parameters */
@@ -4486,7 +4714,7 @@
 		 * 2. Server filter: This are special filters which are used
 		 * to redirect SYN packets to offload queue.
 		 */
-		if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) {
+		if (adap->flags & CXGB4_FW_OFLD_CONN && !is_bypass(adap)) {
 			adap->tids.sftid_base = adap->tids.ftid_base +
 					DIV_ROUND_UP(adap->tids.nftids, 3);
 			adap->tids.nsftids = adap->tids.nftids -
@@ -4499,11 +4727,8 @@
 		adap->params.ofldq_wr_cred = val[5];
 
 		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
-			ret = init_hash_filter(adap);
-			if (ret < 0)
-				goto bye;
+			init_hash_filter(adap);
 		} else {
-			adap->params.offload = 1;
 			adap->num_ofld_uld += 1;
 		}
 	}
@@ -4595,6 +4820,22 @@
 			goto bye;
 		adap->vres.iscsi.start = val[0];
 		adap->vres.iscsi.size = val[1] - val[0] + 1;
+		if (is_t6(adap->params.chip)) {
+			params[0] = FW_PARAM_PFVF(PPOD_EDRAM_START);
+			params[1] = FW_PARAM_PFVF(PPOD_EDRAM_END);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+					      params, val);
+			if (!ret) {
+				adap->vres.ppod_edram.start = val[0];
+				adap->vres.ppod_edram.size =
+					val[1] - val[0] + 1;
+
+				dev_info(adap->pdev_dev,
+					 "ppod edram start 0x%x end 0x%x size 0x%x\n",
+					 val[0], val[1],
+					 adap->vres.ppod_edram.size);
+			}
+		}
 		/* LIO target and cxgb4i initiaitor */
 		adap->num_ofld_uld += 2;
 	}
@@ -4665,7 +4906,7 @@
 			     adap->params.b_wnd);
 	}
 	t4_init_sge_params(adap);
-	adap->flags |= FW_OK;
+	adap->flags |= CXGB4_FW_OK;
 	t4_init_tp_params(adap, true);
 	return 0;
 
@@ -4700,7 +4941,7 @@
 		goto out;
 
 	rtnl_lock();
-	adap->flags &= ~FW_OK;
+	adap->flags &= ~CXGB4_FW_OK;
 	notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
 	spin_lock(&adap->stats_lock);
 	for_each_port(adap, i) {
@@ -4712,12 +4953,12 @@
 	}
 	spin_unlock(&adap->stats_lock);
 	disable_interrupts(adap);
-	if (adap->flags & FULL_INIT_DONE)
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
 		cxgb_down(adap);
 	rtnl_unlock();
-	if ((adap->flags & DEV_ENABLED)) {
+	if ((adap->flags & CXGB4_DEV_ENABLED)) {
 		pci_disable_device(pdev);
-		adap->flags &= ~DEV_ENABLED;
+		adap->flags &= ~CXGB4_DEV_ENABLED;
 	}
 out:	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -4735,37 +4976,48 @@
 		return PCI_ERS_RESULT_RECOVERED;
 	}
 
-	if (!(adap->flags & DEV_ENABLED)) {
+	if (!(adap->flags & CXGB4_DEV_ENABLED)) {
 		if (pci_enable_device(pdev)) {
 			dev_err(&pdev->dev, "Cannot reenable PCI "
 					    "device after reset\n");
 			return PCI_ERS_RESULT_DISCONNECT;
 		}
-		adap->flags |= DEV_ENABLED;
+		adap->flags |= CXGB4_DEV_ENABLED;
 	}
 
 	pci_set_master(pdev);
 	pci_restore_state(pdev);
 	pci_save_state(pdev);
-	pci_cleanup_aer_uncorrect_error_status(pdev);
 
 	if (t4_wait_dev_ready(adap->regs) < 0)
 		return PCI_ERS_RESULT_DISCONNECT;
 	if (t4_fw_hello(adap, adap->mbox, adap->pf, MASTER_MUST, NULL) < 0)
 		return PCI_ERS_RESULT_DISCONNECT;
-	adap->flags |= FW_OK;
+	adap->flags |= CXGB4_FW_OK;
 	if (adap_init1(adap, &c))
 		return PCI_ERS_RESULT_DISCONNECT;
 
 	for_each_port(adap, i) {
-		struct port_info *p = adap2pinfo(adap, i);
+		struct port_info *pi = adap2pinfo(adap, i);
+		u8 vivld = 0, vin = 0;
 
-		ret = t4_alloc_vi(adap, adap->mbox, p->tx_chan, adap->pf, 0, 1,
-				  NULL, NULL);
+		ret = t4_alloc_vi(adap, adap->mbox, pi->tx_chan, adap->pf, 0, 1,
+				  NULL, NULL, &vivld, &vin);
 		if (ret < 0)
 			return PCI_ERS_RESULT_DISCONNECT;
-		p->viid = ret;
-		p->xact_addr_filt = -1;
+		pi->viid = ret;
+		pi->xact_addr_filt = -1;
+		/* If fw supports returning the VIN as part of FW_VI_CMD,
+		 * save the returned values.
+		 */
+		if (adap->params.viid_smt_extn_support) {
+			pi->vivld = vivld;
+			pi->vin = vin;
+		} else {
+			/* Retrieve the values from VIID */
+			pi->vivld = FW_VIID_VIVLD_G(pi->viid);
+			pi->vin = FW_VIID_VIN_G(pi->viid);
+		}
 	}
 
 	t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
@@ -4853,7 +5105,7 @@
 	 * at all is problematic ...
 	 */
 	niqflint = adap->params.pfres.niqflint - 1;
-	if (!(adap->flags & USING_MSIX))
+	if (!(adap->flags & CXGB4_USING_MSIX))
 		niqflint--;
 	neq = adap->params.pfres.neq / 2;
 	avail_eth_qsets = min(niqflint, neq);
@@ -5135,8 +5387,8 @@
 	/* Software/Hardware configuration */
 	dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n",
 		 is_offload(adapter) ? "R" : "",
-		 ((adapter->flags & USING_MSIX) ? "MSI-X" :
-		  (adapter->flags & USING_MSI) ? "MSI" : ""),
+		 ((adapter->flags & CXGB4_USING_MSIX) ? "MSI-X" :
+		  (adapter->flags & CXGB4_USING_MSI) ? "MSI" : ""),
 		 is_offload(adapter) ? "Offload" : "non-Offload");
 }
 
@@ -5184,7 +5436,6 @@
 {
 	unsigned int i;
 
-	kvfree(adapter->mps_encap);
 	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
 	kvfree(adapter->srq);
@@ -5211,13 +5462,13 @@
 			kfree(adap2pinfo(adapter, i)->rss);
 			free_netdev(adapter->port[i]);
 		}
-	if (adapter->flags & FW_OK)
+	if (adapter->flags & CXGB4_FW_OK)
 		t4_fw_bye(adapter, adapter->pf);
 }
 
 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
-		   NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
+		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 #define SEGMENT_SIZE 128
 
 static int t4_get_chip_type(struct adapter *adap, int ver)
@@ -5310,7 +5561,6 @@
 		char name[IFNAMSIZ];
 		u32 devcap2;
 		u16 flags;
-		int pos;
 
 		/* If we want to instantiate Virtual Functions, then our
 		 * parent bridge's PCI-E needs to support Alternative Routing
@@ -5318,9 +5568,8 @@
 		 * and above.
 		 */
 		pbridge = pdev->bus->self;
-		pos = pci_find_capability(pbridge, PCI_CAP_ID_EXP);
-		pci_read_config_word(pbridge, pos + PCI_EXP_FLAGS, &flags);
-		pci_read_config_dword(pbridge, pos + PCI_EXP_DEVCAP2, &devcap2);
+		pcie_capability_read_word(pbridge, PCI_EXP_FLAGS, &flags);
+		pcie_capability_read_dword(pbridge, PCI_EXP_DEVCAP2, &devcap2);
 
 		if ((flags & PCI_EXP_FLAGS_VERS) < 2 ||
 		    !(devcap2 & PCI_EXP_DEVCAP2_ARI)) {
@@ -5452,7 +5701,7 @@
 	whoami = t4_read_reg(adapter, PL_WHOAMI_A);
 	pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
 	chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
-	if (chip < 0) {
+	if ((int)chip < 0) {
 		dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
 		err = chip;
 		goto out_free_adapter;
@@ -5515,7 +5764,7 @@
 	}
 
 	/* PCI device has been enabled */
-	adapter->flags |= DEV_ENABLED;
+	adapter->flags |= CXGB4_DEV_ENABLED;
 	memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
 	/* If possible, we use PCIe Relaxed Ordering Attribute to deliver
@@ -5533,7 +5782,7 @@
 	 * using Relaxed Ordering.
 	 */
 	if (!pcie_relaxed_ordering_enabled(pdev))
-		adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+		adapter->flags |= CXGB4_ROOT_NO_RELAXED_ORDERING;
 
 	spin_lock_init(&adapter->stats_lock);
 	spin_lock_init(&adapter->tid_release_lock);
@@ -5602,6 +5851,9 @@
 			     (is_t5(adapter->params.chip) ? STATMODE_V(0) :
 			      T6_STATMODE_V(0)));
 
+	/* Initialize hash mac addr list */
+	INIT_LIST_HEAD(&adapter->mac_hlist);
+
 	for_each_port(adapter, i) {
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
 					   MAX_ETH_QSETS);
@@ -5621,7 +5873,7 @@
 
 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			NETIF_F_RXCSUM | NETIF_F_RXHASH |
+			NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_GRO |
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 			NETIF_F_HW_TC;
 
@@ -5630,9 +5882,12 @@
 						   NETIF_F_IPV6_CSUM |
 						   NETIF_F_RXCSUM |
 						   NETIF_F_GSO_UDP_TUNNEL |
+						   NETIF_F_GSO_UDP_TUNNEL_CSUM |
 						   NETIF_F_TSO | NETIF_F_TSO6;
 
-			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+					       NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					       NETIF_F_HW_TLS_RECORD;
 		}
 
 		if (highdma)
@@ -5659,7 +5914,7 @@
 
 	pci_set_drvdata(pdev, adapter);
 
-	if (adapter->flags & FW_OK) {
+	if (adapter->flags & CXGB4_FW_OK) {
 		err = t4_port_init(adapter, func, func, 0);
 		if (err)
 			goto out_free_dev;
@@ -5681,7 +5936,7 @@
 		}
 	}
 
-	if (!(adapter->flags & FW_OK))
+	if (!(adapter->flags & CXGB4_FW_OK))
 		goto fw_attach_fail;
 
 	/* Configure queues and allocate tables now, they can be needed as
@@ -5704,12 +5959,6 @@
 		adapter->params.offload = 0;
 	}
 
-	adapter->mps_encap = kvcalloc(adapter->params.arch.mps_tcam_size,
-				      sizeof(struct mps_encap_entry),
-				      GFP_KERNEL);
-	if (!adapter->mps_encap)
-		dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n");
-
 #if IS_ENABLED(CONFIG_IPV6)
 	if (chip_ver <= CHELSIO_T5 &&
 	    (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
@@ -5775,9 +6024,9 @@
 
 	/* See what interrupts we'll be using */
 	if (msi > 1 && enable_msix(adapter) == 0)
-		adapter->flags |= USING_MSIX;
+		adapter->flags |= CXGB4_USING_MSIX;
 	else if (msi > 0 && pci_enable_msi(pdev) == 0) {
-		adapter->flags |= USING_MSI;
+		adapter->flags |= CXGB4_USING_MSI;
 		if (msi > 1)
 			free_msix_info(adapter);
 	}
@@ -5785,6 +6034,8 @@
 	/* check for PCI Express bandwidth capabiltites */
 	pcie_print_link_status(pdev);
 
+	cxgb4_init_mps_ref_entries(adapter);
+
 	err = init_rss(adapter);
 	if (err)
 		goto out_free_dev;
@@ -5844,13 +6095,17 @@
 	if (!is_t4(adapter->params.chip))
 		cxgb4_ptp_init(adapter);
 
+	if (IS_REACHABLE(CONFIG_THERMAL) &&
+	    !is_t4(adapter->params.chip) && (adapter->flags & CXGB4_FW_OK))
+		cxgb4_thermal_init(adapter);
+
 	print_adapter_info(adapter);
 	return 0;
 
  out_free_dev:
 	t4_free_sge_resources(adapter);
 	free_some_resources(adapter);
-	if (adapter->flags & USING_MSIX)
+	if (adapter->flags & CXGB4_USING_MSIX)
 		free_msix_info(adapter);
 	if (adapter->num_uld || adapter->num_ofld_uld)
 		t4_uld_mem_free(adapter);
@@ -5876,13 +6131,19 @@
 static void remove_one(struct pci_dev *pdev)
 {
 	struct adapter *adapter = pci_get_drvdata(pdev);
+	struct hash_mac_addr *entry, *tmp;
 
 	if (!adapter) {
 		pci_release_regions(pdev);
 		return;
 	}
 
-	adapter->flags |= SHUTTING_DOWN;
+	/* If we allocated filters, free up state associated with any
+	 * valid filters ...
+	 */
+	clear_all_filters(adapter);
+
+	adapter->flags |= CXGB4_SHUTTING_DOWN;
 
 	if (adapter->pf == 4) {
 		int i;
@@ -5901,6 +6162,8 @@
 
 		disable_interrupts(adapter);
 
+		cxgb4_free_mps_ref_entries(adapter);
+
 		for_each_port(adapter, i)
 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 				unregister_netdev(adapter->port[i]);
@@ -5909,20 +6172,23 @@
 
 		if (!is_t4(adapter->params.chip))
 			cxgb4_ptp_stop(adapter);
+		if (IS_REACHABLE(CONFIG_THERMAL))
+			cxgb4_thermal_remove(adapter);
 
-		/* If we allocated filters, free up state associated with any
-		 * valid filters ...
-		 */
-		clear_all_filters(adapter);
-
-		if (adapter->flags & FULL_INIT_DONE)
+		if (adapter->flags & CXGB4_FULL_INIT_DONE)
 			cxgb_down(adapter);
 
-		if (adapter->flags & USING_MSIX)
+		if (adapter->flags & CXGB4_USING_MSIX)
 			free_msix_info(adapter);
 		if (adapter->num_uld || adapter->num_ofld_uld)
 			t4_uld_mem_free(adapter);
 		free_some_resources(adapter);
+		list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
+					 list) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+
 #if IS_ENABLED(CONFIG_IPV6)
 		t4_cleanup_clip_tbl(adapter);
 #endif
@@ -5936,9 +6202,9 @@
 #endif
 	iounmap(adapter->regs);
 	pci_disable_pcie_error_reporting(pdev);
-	if ((adapter->flags & DEV_ENABLED)) {
+	if ((adapter->flags & CXGB4_DEV_ENABLED)) {
 		pci_disable_device(pdev);
-		adapter->flags &= ~DEV_ENABLED;
+		adapter->flags &= ~CXGB4_DEV_ENABLED;
 	}
 	pci_release_regions(pdev);
 	kfree(adapter->mbox_log);
@@ -5964,7 +6230,7 @@
 		return;
 	}
 
-	adapter->flags |= SHUTTING_DOWN;
+	adapter->flags |= CXGB4_SHUTTING_DOWN;
 
 	if (adapter->pf == 4) {
 		int i;
@@ -5982,7 +6248,7 @@
 		disable_msi(adapter);
 
 		t4_sge_stop(adapter);
-		if (adapter->flags & FW_OK)
+		if (adapter->flags & CXGB4_FW_OK)
 			t4_fw_bye(adapter, adapter->mbox);
 	}
 }
@@ -6003,22 +6269,28 @@
 {
 	int ret;
 
-	/* Debugfs support is optional, just warn if this fails */
 	cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	if (!cxgb4_debugfs_root)
-		pr_warn("could not create debugfs entry, continuing\n");
 
 	ret = pci_register_driver(&cxgb4_driver);
 	if (ret < 0)
-		debugfs_remove(cxgb4_debugfs_root);
+		goto err_pci;
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (!inet6addr_registered) {
-		register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
-		inet6addr_registered = true;
+		ret = register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+		if (ret)
+			pci_unregister_driver(&cxgb4_driver);
+		else
+			inet6addr_registered = true;
 	}
 #endif
 
+	if (ret == 0)
+		return ret;
+
+err_pci:
+	debugfs_remove(cxgb4_debugfs_root);
+
 	return ret;
 }
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
new file mode 100644
index 0000000..b1a073e
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c

@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Chelsio Communications, Inc. All rights reserved. */
+
+#include "cxgb4.h"
+
+static int cxgb4_mps_ref_dec_by_mac(struct adapter *adap,
+				    const u8 *addr, const u8 *mask)
+{
+	u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mps_entries_ref *mps_entry, *tmp;
+	int ret = -EINVAL;
+
+	spin_lock_bh(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		if (ether_addr_equal(mps_entry->addr, addr) &&
+		    ether_addr_equal(mps_entry->mask, mask ? mask : bitmask)) {
+			if (!refcount_dec_and_test(&mps_entry->refcnt)) {
+				spin_unlock_bh(&adap->mps_ref_lock);
+				return -EBUSY;
+			}
+			list_del(&mps_entry->list);
+			kfree(mps_entry);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_bh(&adap->mps_ref_lock);
+	return ret;
+}
+
+static int cxgb4_mps_ref_dec(struct adapter *adap, u16 idx)
+{
+	struct mps_entries_ref *mps_entry, *tmp;
+	int ret = -EINVAL;
+
+	spin_lock(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		if (mps_entry->idx == idx) {
+			if (!refcount_dec_and_test(&mps_entry->refcnt)) {
+				spin_unlock(&adap->mps_ref_lock);
+				return -EBUSY;
+			}
+			list_del(&mps_entry->list);
+			kfree(mps_entry);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&adap->mps_ref_lock);
+	return ret;
+}
+
+static int cxgb4_mps_ref_inc(struct adapter *adap, const u8 *mac_addr,
+			     u16 idx, const u8 *mask)
+{
+	u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mps_entries_ref *mps_entry;
+	int ret = 0;
+
+	spin_lock_bh(&adap->mps_ref_lock);
+	list_for_each_entry(mps_entry, &adap->mps_ref, list) {
+		if (mps_entry->idx == idx) {
+			refcount_inc(&mps_entry->refcnt);
+			goto unlock;
+		}
+	}
+	mps_entry = kzalloc(sizeof(*mps_entry), GFP_ATOMIC);
+	if (!mps_entry) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+	ether_addr_copy(mps_entry->mask, mask ? mask : bitmask);
+	ether_addr_copy(mps_entry->addr, mac_addr);
+	mps_entry->idx = idx;
+	refcount_set(&mps_entry->refcnt, 1);
+	list_add_tail(&mps_entry->list, &adap->mps_ref);
+unlock:
+	spin_unlock_bh(&adap->mps_ref_lock);
+	return ret;
+}
+
+int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid,
+			unsigned int naddr, const u8 **addr, bool sleep_ok)
+{
+	int ret, i;
+
+	for (i = 0; i < naddr; i++) {
+		if (!cxgb4_mps_ref_dec_by_mac(adap, addr[i], NULL)) {
+			ret = t4_free_mac_filt(adap, adap->mbox, viid,
+					       1, &addr[i], sleep_ok);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* return number of filters freed */
+	return naddr;
+}
+
+int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid,
+			 bool free, unsigned int naddr, const u8 **addr,
+			 u16 *idx, u64 *hash, bool sleep_ok)
+{
+	int ret, i;
+
+	ret = t4_alloc_mac_filt(adap, adap->mbox, viid, free,
+				naddr, addr, idx, hash, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < naddr; i++) {
+		if (idx[i] != 0xffff) {
+			if (cxgb4_mps_ref_inc(adap, addr[i], idx[i], NULL)) {
+				ret = -ENOMEM;
+				goto error;
+			}
+		}
+	}
+
+	goto out;
+error:
+	cxgb4_free_mac_filt(adap, viid, naddr, addr, sleep_ok);
+
+out:
+	/* Returns a negative error number or the number of filters allocated */
+	return ret;
+}
+
+int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
+			  int *tcam_idx, const u8 *addr,
+			  bool persistent, u8 *smt_idx)
+{
+	int ret;
+
+	ret = cxgb4_change_mac(pi, viid, tcam_idx,
+			       addr, persistent, smt_idx);
+	if (ret < 0)
+		return ret;
+
+	cxgb4_mps_ref_inc(pi->adapter, addr, *tcam_idx, NULL);
+	return ret;
+}
+
+int cxgb4_free_raw_mac_filt(struct adapter *adap,
+			    unsigned int viid,
+			    const u8 *addr,
+			    const u8 *mask,
+			    unsigned int idx,
+			    u8 lookup_type,
+			    u8 port_id,
+			    bool sleep_ok)
+{
+	int ret = 0;
+
+	if (!cxgb4_mps_ref_dec(adap, idx))
+		ret = t4_free_raw_mac_filt(adap, viid, addr,
+					   mask, idx, lookup_type,
+					   port_id, sleep_ok);
+
+	return ret;
+}
+
+int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
+			     unsigned int viid,
+			     const u8 *addr,
+			     const u8 *mask,
+			     unsigned int idx,
+			     u8 lookup_type,
+			     u8 port_id,
+			     bool sleep_ok)
+{
+	int ret;
+
+	ret = t4_alloc_raw_mac_filt(adap, viid, addr,
+				    mask, idx, lookup_type,
+				    port_id, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) {
+		ret = -ENOMEM;
+		t4_free_raw_mac_filt(adap, viid, addr,
+				     mask, idx, lookup_type,
+				     port_id, sleep_ok);
+	}
+
+	return ret;
+}
+
+int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			      int idx, bool sleep_ok)
+{
+	int ret = 0;
+
+	if (!cxgb4_mps_ref_dec(adap, idx))
+		ret = t4_free_encap_mac_filt(adap, viid, idx, sleep_ok);
+
+	return ret;
+}
+
+int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
+			       const u8 *addr, const u8 *mask,
+			       unsigned int vni, unsigned int vni_mask,
+			       u8 dip_hit, u8 lookup_type, bool sleep_ok)
+{
+	int ret;
+
+	ret = t4_alloc_encap_mac_filt(adap, viid, addr, mask, vni, vni_mask,
+				      dip_hit, lookup_type, sleep_ok);
+	if (ret < 0)
+		return ret;
+
+	if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) {
+		ret = -ENOMEM;
+		t4_free_encap_mac_filt(adap, viid, ret, sleep_ok);
+	}
+	return ret;
+}
+
+int cxgb4_init_mps_ref_entries(struct adapter *adap)
+{
+	spin_lock_init(&adap->mps_ref_lock);
+	INIT_LIST_HEAD(&adap->mps_ref);
+
+	return 0;
+}
+
+void cxgb4_free_mps_ref_entries(struct adapter *adap)
+{
+	struct mps_entries_ref *mps_entry, *tmp;
+
+	if (!list_empty(&adap->mps_ref))
+		return;
+
+	spin_lock(&adap->mps_ref_lock);
+	list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) {
+		list_del(&mps_entry->list);
+		kfree(mps_entry);
+	}
+	spin_unlock(&adap->mps_ref_lock);
+}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 9f9d6ca..58a039c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c

@@ -378,10 +378,10 @@
 	int err;
 
 	memset(&c, 0, sizeof(c));
-		c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) |
-					     FW_CMD_REQUEST_F |
-					     FW_CMD_WRITE_F |
-					     FW_PTP_CMD_PORTID_V(0));
+	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) |
+				     FW_CMD_REQUEST_F |
+				     FW_CMD_WRITE_F |
+				     FW_PTP_CMD_PORTID_V(0));
 	c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16));
 	c.u.scmd.sc = FW_PTP_SC_INIT_TIMER;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index c116f96..e447976 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c

@@ -67,7 +67,8 @@
 static struct ch_tc_flower_entry *allocate_flower_entry(void)
 {
 	struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
-	spin_lock_init(&new->lock);
+	if (new)
+		spin_lock_init(&new->lock);
 	return new;
 }
 
@@ -80,31 +81,26 @@
 }
 
 static void cxgb4_process_flow_match(struct net_device *dev,
-				     struct tc_cls_flower_offload *cls,
+				     struct flow_cls_offload *cls,
 				     struct ch_filter_specification *fs)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	u16 addr_type = 0;
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  cls->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->mask);
-		u16 ethtype_key = ntohs(key->n_proto);
-		u16 ethtype_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+		u16 ethtype_key, ethtype_mask;
+
+		flow_rule_match_basic(rule, &match);
+		ethtype_key = ntohs(match.key->n_proto);
+		ethtype_mask = ntohs(match.mask->n_proto);
 
 		if (ethtype_key == ETH_P_ALL) {
 			ethtype_key = 0;
@@ -116,118 +112,95 @@
 
 		fs->val.ethtype = ethtype_key;
 		fs->mask.ethtype = ethtype_mask;
-		fs->val.proto = key->ip_proto;
-		fs->mask.proto = mask->ip_proto;
+		fs->val.proto = match.key->ip_proto;
+		fs->mask.proto = match.mask->ip_proto;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  cls->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  cls->mask);
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
 		fs->type = 0;
-		memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst));
-		memcpy(&fs->val.fip[0], &key->src, sizeof(key->src));
-		memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst));
-		memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src));
+		memcpy(&fs->val.lip[0], &match.key->dst, sizeof(match.key->dst));
+		memcpy(&fs->val.fip[0], &match.key->src, sizeof(match.key->src));
+		memcpy(&fs->mask.lip[0], &match.mask->dst, sizeof(match.mask->dst));
+		memcpy(&fs->mask.fip[0], &match.mask->src, sizeof(match.mask->src));
 
 		/* also initialize nat_lip/fip to same values */
-		memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst));
-		memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src));
-
+		memcpy(&fs->nat_lip[0], &match.key->dst, sizeof(match.key->dst));
+		memcpy(&fs->nat_fip[0], &match.key->src, sizeof(match.key->src));
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  cls->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  cls->mask);
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_ipv6_addrs(rule, &match);
 		fs->type = 1;
-		memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst));
-		memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src));
-		memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst));
-		memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src));
+		memcpy(&fs->val.lip[0], match.key->dst.s6_addr,
+		       sizeof(match.key->dst));
+		memcpy(&fs->val.fip[0], match.key->src.s6_addr,
+		       sizeof(match.key->src));
+		memcpy(&fs->mask.lip[0], match.mask->dst.s6_addr,
+		       sizeof(match.mask->dst));
+		memcpy(&fs->mask.fip[0], match.mask->src.s6_addr,
+		       sizeof(match.mask->src));
 
 		/* also initialize nat_lip/fip to same values */
-		memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst));
-		memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src));
+		memcpy(&fs->nat_lip[0], match.key->dst.s6_addr,
+		       sizeof(match.key->dst));
+		memcpy(&fs->nat_fip[0], match.key->src.s6_addr,
+		       sizeof(match.key->src));
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_PORTS,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_PORTS,
-						 cls->mask);
-		fs->val.lport = cpu_to_be16(key->dst);
-		fs->mask.lport = cpu_to_be16(mask->dst);
-		fs->val.fport = cpu_to_be16(key->src);
-		fs->mask.fport = cpu_to_be16(mask->src);
+		flow_rule_match_ports(rule, &match);
+		fs->val.lport = cpu_to_be16(match.key->dst);
+		fs->mask.lport = cpu_to_be16(match.mask->dst);
+		fs->val.fport = cpu_to_be16(match.key->src);
+		fs->mask.fport = cpu_to_be16(match.mask->src);
 
 		/* also initialize nat_lport/fport to same values */
-		fs->nat_lport = cpu_to_be16(key->dst);
-		fs->nat_fport = cpu_to_be16(key->src);
+		fs->nat_lport = cpu_to_be16(match.key->dst);
+		fs->nat_fport = cpu_to_be16(match.key->src);
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_IP,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_IP,
-						 cls->mask);
-		fs->val.tos = key->tos;
-		fs->mask.tos = mask->tos;
+		flow_rule_match_ip(rule, &match);
+		fs->val.tos = match.key->tos;
+		fs->mask.tos = match.mask->tos;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_ENC_KEYID,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_ENC_KEYID,
-						 cls->mask);
-		fs->val.vni = be32_to_cpu(key->keyid);
-		fs->mask.vni = be32_to_cpu(mask->keyid);
+		flow_rule_match_enc_keyid(rule, &match);
+		fs->val.vni = be32_to_cpu(match.key->keyid);
+		fs->mask.vni = be32_to_cpu(match.mask->keyid);
 		if (fs->mask.vni) {
 			fs->val.encap_vld = 1;
 			fs->mask.encap_vld = 1;
 		}
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 		u16 vlan_tci, vlan_tci_mask;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_VLAN,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_VLAN,
-						 cls->mask);
-		vlan_tci = key->vlan_id | (key->vlan_priority <<
-					   VLAN_PRIO_SHIFT);
-		vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
-						 VLAN_PRIO_SHIFT);
+		flow_rule_match_vlan(rule, &match);
+		vlan_tci = match.key->vlan_id | (match.key->vlan_priority <<
+					       VLAN_PRIO_SHIFT);
+		vlan_tci_mask = match.mask->vlan_id | (match.mask->vlan_priority <<
+						     VLAN_PRIO_SHIFT);
 		fs->val.ivlan = vlan_tci;
 		fs->mask.ivlan = vlan_tci_mask;
 
+		fs->val.ivlan_vld = 1;
+		fs->mask.ivlan_vld = 1;
+
 		/* Chelsio adapters use ivlan_vld bit to match vlan packets
 		 * as 802.1Q. Also, when vlan tag is present in packets,
 		 * ethtype match is used then to match on ethtype of inner
@@ -238,8 +211,6 @@
 		 * ethtype value with ethtype of inner header.
 		 */
 		if (fs->val.ethtype == ETH_P_8021Q) {
-			fs->val.ivlan_vld = 1;
-			fs->mask.ivlan_vld = 1;
 			fs->val.ethtype = 0;
 			fs->mask.ethtype = 0;
 		}
@@ -253,12 +224,14 @@
 }
 
 static int cxgb4_validate_flow_match(struct net_device *dev,
-				     struct tc_cls_flower_offload *cls)
+				     struct flow_cls_offload *cls)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 ethtype_mask = 0;
 	u16 ethtype_key = 0;
 
-	if (cls->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -268,36 +241,29 @@
 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
 	      BIT(FLOW_DISSECTOR_KEY_IP))) {
 		netdev_warn(dev, "Unsupported key used: 0x%x\n",
-			    cls->dissector->used_keys);
+			    dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->mask);
-		ethtype_key = ntohs(key->n_proto);
-		ethtype_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		ethtype_key = ntohs(match.key->n_proto);
+		ethtype_mask = ntohs(match.mask->n_proto);
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
 		u16 eth_ip_type = ethtype_key & ethtype_mask;
-		struct flow_dissector_key_ip *mask;
+		struct flow_match_ip match;
 
 		if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) {
 			netdev_err(dev, "IP Key supported only with IPv4/v6");
 			return -EINVAL;
 		}
 
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_IP,
-						 cls->mask);
-		if (mask->ttl) {
+		flow_rule_match_ip(rule, &match);
+		if (match.mask->ttl) {
 			netdev_warn(dev, "ttl match unsupported for offload");
 			return -EOPNOTSUPP;
 		}
@@ -328,7 +294,7 @@
 				u32 mask, u32 offset, u8 htype)
 {
 	switch (htype) {
-	case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
 		switch (offset) {
 		case PEDIT_ETH_DMAC_31_0:
 			fs->newdmac = 1;
@@ -346,7 +312,7 @@
 			offload_pedit(fs, val, mask, ETH_SMAC_47_16);
 		}
 		break;
-	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 		switch (offset) {
 		case PEDIT_IP4_SRC:
 			offload_pedit(fs, val, mask, IP4_SRC);
@@ -356,7 +322,7 @@
 		}
 		fs->nat_mode = NAT_MODE_ALL;
 		break;
-	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 		switch (offset) {
 		case PEDIT_IP6_SRC_31_0:
 			offload_pedit(fs, val, mask, IP6_SRC_31_0);
@@ -384,7 +350,7 @@
 		}
 		fs->nat_mode = NAT_MODE_ALL;
 		break;
-	case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 		switch (offset) {
 		case PEDIT_TCP_SPORT_DPORT:
 			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
@@ -397,7 +363,7 @@
 		}
 		fs->nat_mode = NAT_MODE_ALL;
 		break;
-	case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 		switch (offset) {
 		case PEDIT_UDP_SPORT_DPORT:
 			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
@@ -413,59 +379,66 @@
 }
 
 static void cxgb4_process_flow_actions(struct net_device *in,
-				       struct tc_cls_flower_offload *cls,
+				       struct flow_cls_offload *cls,
 				       struct ch_filter_specification *fs)
 {
-	const struct tc_action *a;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_action_entry *act;
 	int i;
 
-	tcf_exts_for_each_action(i, a, cls->exts) {
-		if (is_tcf_gact_ok(a)) {
+	flow_action_for_each(i, act, &rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_ACCEPT:
 			fs->action = FILTER_PASS;
-		} else if (is_tcf_gact_shot(a)) {
+			break;
+		case FLOW_ACTION_DROP:
 			fs->action = FILTER_DROP;
-		} else if (is_tcf_mirred_egress_redirect(a)) {
-			struct net_device *out = tcf_mirred_dev(a);
+			break;
+		case FLOW_ACTION_REDIRECT: {
+			struct net_device *out = act->dev;
 			struct port_info *pi = netdev_priv(out);
 
 			fs->action = FILTER_SWITCH;
 			fs->eport = pi->port_id;
-		} else if (is_tcf_vlan(a)) {
-			u32 vlan_action = tcf_vlan_action(a);
-			u8 prio = tcf_vlan_push_prio(a);
-			u16 vid = tcf_vlan_push_vid(a);
+			}
+			break;
+		case FLOW_ACTION_VLAN_POP:
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_MANGLE: {
+			u8 prio = act->vlan.prio;
+			u16 vid = act->vlan.vid;
 			u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid;
-
-			switch (vlan_action) {
-			case TCA_VLAN_ACT_POP:
+			switch (act->id) {
+			case FLOW_ACTION_VLAN_POP:
 				fs->newvlan |= VLAN_REMOVE;
 				break;
-			case TCA_VLAN_ACT_PUSH:
+			case FLOW_ACTION_VLAN_PUSH:
 				fs->newvlan |= VLAN_INSERT;
 				fs->vlan = vlan_tci;
 				break;
-			case TCA_VLAN_ACT_MODIFY:
+			case FLOW_ACTION_VLAN_MANGLE:
 				fs->newvlan |= VLAN_REWRITE;
 				fs->vlan = vlan_tci;
 				break;
 			default:
 				break;
 			}
-		} else if (is_tcf_pedit(a)) {
+			}
+			break;
+		case FLOW_ACTION_MANGLE: {
 			u32 mask, val, offset;
-			int nkeys, i;
 			u8 htype;
 
-			nkeys = tcf_pedit_nkeys(a);
-			for (i = 0; i < nkeys; i++) {
-				htype = tcf_pedit_htype(a, i);
-				mask = tcf_pedit_mask(a, i);
-				val = tcf_pedit_val(a, i);
-				offset = tcf_pedit_offset(a, i);
+			htype = act->mangle.htype;
+			mask = act->mangle.mask;
+			val = act->mangle.val;
+			offset = act->mangle.offset;
 
-				process_pedit_field(fs, val, mask, offset,
-						    htype);
+			process_pedit_field(fs, val, mask, offset, htype);
 			}
+			break;
+		default:
+			break;
 		}
 	}
 }
@@ -484,126 +457,116 @@
 }
 
 static bool valid_pedit_action(struct net_device *dev,
-			       const struct tc_action *a)
+			       const struct flow_action_entry *act)
 {
 	u32 mask, offset;
-	u8 cmd, htype;
-	int nkeys, i;
+	u8 htype;
 
-	nkeys = tcf_pedit_nkeys(a);
-	for (i = 0; i < nkeys; i++) {
-		htype = tcf_pedit_htype(a, i);
-		cmd = tcf_pedit_cmd(a, i);
-		mask = tcf_pedit_mask(a, i);
-		offset = tcf_pedit_offset(a, i);
+	htype = act->mangle.htype;
+	mask = act->mangle.mask;
+	offset = act->mangle.offset;
 
-		if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) {
-			netdev_err(dev, "%s: Unsupported pedit cmd\n",
+	switch (htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+		switch (offset) {
+		case PEDIT_ETH_DMAC_31_0:
+		case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
+		case PEDIT_ETH_SMAC_47_16:
+			break;
+		default:
+			netdev_err(dev, "%s: Unsupported pedit field\n",
 				   __func__);
 			return false;
 		}
-
-		switch (htype) {
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
-			switch (offset) {
-			case PEDIT_ETH_DMAC_31_0:
-			case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
-			case PEDIT_ETH_SMAC_47_16:
-				break;
-			default:
-				netdev_err(dev, "%s: Unsupported pedit field\n",
-					   __func__);
-				return false;
-			}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		switch (offset) {
+		case PEDIT_IP4_SRC:
+		case PEDIT_IP4_DST:
 			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
-			switch (offset) {
-			case PEDIT_IP4_SRC:
-			case PEDIT_IP4_DST:
-				break;
-			default:
-				netdev_err(dev, "%s: Unsupported pedit field\n",
-					   __func__);
-				return false;
-			}
+		default:
+			netdev_err(dev, "%s: Unsupported pedit field\n",
+				   __func__);
+			return false;
+		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		switch (offset) {
+		case PEDIT_IP6_SRC_31_0:
+		case PEDIT_IP6_SRC_63_32:
+		case PEDIT_IP6_SRC_95_64:
+		case PEDIT_IP6_SRC_127_96:
+		case PEDIT_IP6_DST_31_0:
+		case PEDIT_IP6_DST_63_32:
+		case PEDIT_IP6_DST_95_64:
+		case PEDIT_IP6_DST_127_96:
 			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
-			switch (offset) {
-			case PEDIT_IP6_SRC_31_0:
-			case PEDIT_IP6_SRC_63_32:
-			case PEDIT_IP6_SRC_95_64:
-			case PEDIT_IP6_SRC_127_96:
-			case PEDIT_IP6_DST_31_0:
-			case PEDIT_IP6_DST_63_32:
-			case PEDIT_IP6_DST_95_64:
-			case PEDIT_IP6_DST_127_96:
-				break;
-			default:
-				netdev_err(dev, "%s: Unsupported pedit field\n",
-					   __func__);
-				return false;
-			}
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
-			switch (offset) {
-			case PEDIT_TCP_SPORT_DPORT:
-				if (!valid_l4_mask(~mask)) {
-					netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n",
-						   __func__);
-					return false;
-				}
-				break;
-			default:
-				netdev_err(dev, "%s: Unsupported pedit field\n",
-					   __func__);
-				return false;
-			}
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
-			switch (offset) {
-			case PEDIT_UDP_SPORT_DPORT:
-				if (!valid_l4_mask(~mask)) {
-					netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n",
-						   __func__);
-					return false;
-				}
-				break;
-			default:
-				netdev_err(dev, "%s: Unsupported pedit field\n",
+		default:
+			netdev_err(dev, "%s: Unsupported pedit field\n",
+				   __func__);
+			return false;
+		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+		switch (offset) {
+		case PEDIT_TCP_SPORT_DPORT:
+			if (!valid_l4_mask(~mask)) {
+				netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n",
 					   __func__);
 				return false;
 			}
 			break;
 		default:
-			netdev_err(dev, "%s: Unsupported pedit type\n",
+			netdev_err(dev, "%s: Unsupported pedit field\n",
 				   __func__);
 			return false;
 		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+		switch (offset) {
+		case PEDIT_UDP_SPORT_DPORT:
+			if (!valid_l4_mask(~mask)) {
+				netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n",
+					   __func__);
+				return false;
+			}
+			break;
+		default:
+			netdev_err(dev, "%s: Unsupported pedit field\n",
+				   __func__);
+			return false;
+		}
+		break;
+	default:
+		netdev_err(dev, "%s: Unsupported pedit type\n", __func__);
+		return false;
 	}
 	return true;
 }
 
 static int cxgb4_validate_flow_actions(struct net_device *dev,
-				       struct tc_cls_flower_offload *cls)
+				       struct flow_cls_offload *cls)
 {
-	const struct tc_action *a;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_action_entry *act;
 	bool act_redir = false;
 	bool act_pedit = false;
 	bool act_vlan = false;
 	int i;
 
-	tcf_exts_for_each_action(i, a, cls->exts) {
-		if (is_tcf_gact_ok(a)) {
+	flow_action_for_each(i, act, &rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_ACCEPT:
+		case FLOW_ACTION_DROP:
 			/* Do nothing */
-		} else if (is_tcf_gact_shot(a)) {
-			/* Do nothing */
-		} else if (is_tcf_mirred_egress_redirect(a)) {
+			break;
+		case FLOW_ACTION_REDIRECT: {
 			struct adapter *adap = netdev2adap(dev);
 			struct net_device *n_dev, *target_dev;
 			unsigned int i;
 			bool found = false;
 
-			target_dev = tcf_mirred_dev(a);
+			target_dev = act->dev;
 			for_each_port(adap, i) {
 				n_dev = adap->port[i];
 				if (target_dev == n_dev) {
@@ -621,15 +584,18 @@
 				return -EINVAL;
 			}
 			act_redir = true;
-		} else if (is_tcf_vlan(a)) {
-			u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
-			u32 vlan_action = tcf_vlan_action(a);
+			}
+			break;
+		case FLOW_ACTION_VLAN_POP:
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_MANGLE: {
+			u16 proto = be16_to_cpu(act->vlan.proto);
 
-			switch (vlan_action) {
-			case TCA_VLAN_ACT_POP:
+			switch (act->id) {
+			case FLOW_ACTION_VLAN_POP:
 				break;
-			case TCA_VLAN_ACT_PUSH:
-			case TCA_VLAN_ACT_MODIFY:
+			case FLOW_ACTION_VLAN_PUSH:
+			case FLOW_ACTION_VLAN_MANGLE:
 				if (proto != ETH_P_8021Q) {
 					netdev_err(dev, "%s: Unsupported vlan proto\n",
 						   __func__);
@@ -642,13 +608,17 @@
 				return -EOPNOTSUPP;
 			}
 			act_vlan = true;
-		} else if (is_tcf_pedit(a)) {
-			bool pedit_valid = valid_pedit_action(dev, a);
+			}
+			break;
+		case FLOW_ACTION_MANGLE: {
+			bool pedit_valid = valid_pedit_action(dev, act);
 
 			if (!pedit_valid)
 				return -EOPNOTSUPP;
 			act_pedit = true;
-		} else {
+			}
+			break;
+		default:
 			netdev_err(dev, "%s: Unsupported action\n", __func__);
 			return -EOPNOTSUPP;
 		}
@@ -664,7 +634,7 @@
 }
 
 int cxgb4_tc_flower_replace(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls)
+			    struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_entry *ch_flower;
@@ -719,11 +689,8 @@
 
 	ret = ctx.result;
 	/* Check if hw returned error for filter creation */
-	if (ret) {
-		netdev_err(dev, "%s: filter creation err %d\n",
-			   __func__, ret);
+	if (ret)
 		goto free_entry;
-	}
 
 	ch_flower->tc_flower_cookie = cls->cookie;
 	ch_flower->filter_id = ctx.tid;
@@ -743,7 +710,7 @@
 }
 
 int cxgb4_tc_flower_destroy(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls)
+			    struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_entry *ch_flower;
@@ -817,7 +784,7 @@
 }
 
 int cxgb4_tc_flower_stats(struct net_device *dev,
-			  struct tc_cls_flower_offload *cls)
+			  struct flow_cls_offload *cls)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_stats *ofld_stats;
@@ -843,9 +810,9 @@
 	if (ofld_stats->packet_count != packets) {
 		if (ofld_stats->prev_packet_count != packets)
 			ofld_stats->last_used = jiffies;
-		tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count,
-				      packets - ofld_stats->packet_count,
-				      ofld_stats->last_used);
+		flow_stats_update(&cls->stats, bytes - ofld_stats->byte_count,
+				  packets - ofld_stats->packet_count,
+				  ofld_stats->last_used);
 
 		ofld_stats->packet_count = packets;
 		ofld_stats->byte_count = bytes;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index 050c8a5..eb4c952 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h

@@ -109,11 +109,11 @@
 #define PEDIT_UDP_SPORT_DPORT		0x0
 
 int cxgb4_tc_flower_replace(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls);
+			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_destroy(struct net_device *dev,
-			    struct tc_cls_flower_offload *cls);
+			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_stats(struct net_device *dev,
-			  struct tc_cls_flower_offload *cls);
+			  struct flow_cls_offload *cls);
 
 int cxgb4_init_tc_flower(struct adapter *adap);
 void cxgb4_cleanup_tc_flower(struct adapter *adap);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index c7d2b4d..02fc63f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c

@@ -444,8 +444,7 @@
 	if (!max_tids)
 		return NULL;
 
-	t = kvzalloc(sizeof(*t) +
-			 (max_tids * sizeof(struct cxgb4_link)), GFP_KERNEL);
+	t = kvzalloc(struct_size(t, table, max_tids), GFP_KERNEL);
 	if (!t)
 		return NULL;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c
new file mode 100644
index 0000000..3de8a5e
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c

@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  Written by: Ganesh Goudar (ganeshgr@chelsio.com)
+ */
+
+#include "cxgb4.h"
+
+#define CXGB4_NUM_TRIPS 1
+
+static int cxgb4_thermal_get_temp(struct thermal_zone_device *tzdev,
+				  int *temp)
+{
+	struct adapter *adap = tzdev->devdata;
+	u32 param, val;
+	int ret;
+
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DIAG) |
+		 FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_DIAG_TMP));
+
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+			      &param, &val);
+	if (ret < 0 || val == 0)
+		return -1;
+
+	*temp = val * 1000;
+	return 0;
+}
+
+static int cxgb4_thermal_get_trip_type(struct thermal_zone_device *tzdev,
+				       int trip, enum thermal_trip_type *type)
+{
+	struct adapter *adap = tzdev->devdata;
+
+	if (!adap->ch_thermal.trip_temp)
+		return -EINVAL;
+
+	*type = adap->ch_thermal.trip_type;
+	return 0;
+}
+
+static int cxgb4_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
+				       int trip, int *temp)
+{
+	struct adapter *adap = tzdev->devdata;
+
+	if (!adap->ch_thermal.trip_temp)
+		return -EINVAL;
+
+	*temp = adap->ch_thermal.trip_temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops cxgb4_thermal_ops = {
+	.get_temp = cxgb4_thermal_get_temp,
+	.get_trip_type = cxgb4_thermal_get_trip_type,
+	.get_trip_temp = cxgb4_thermal_get_trip_temp,
+};
+
+int cxgb4_thermal_init(struct adapter *adap)
+{
+	struct ch_thermal *ch_thermal = &adap->ch_thermal;
+	int num_trip = CXGB4_NUM_TRIPS;
+	u32 param, val;
+	int ret;
+
+	/* on older firmwares we may not get the trip temperature,
+	 * set the num of trips to 0.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DIAG) |
+		 FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_DIAG_MAXTMPTHRESH));
+
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+			      &param, &val);
+	if (ret < 0) {
+		num_trip = 0; /* could not get trip temperature */
+	} else {
+		ch_thermal->trip_temp = val * 1000;
+		ch_thermal->trip_type = THERMAL_TRIP_CRITICAL;
+	}
+
+	ch_thermal->tzdev = thermal_zone_device_register("cxgb4", num_trip,
+							 0, adap,
+							 &cxgb4_thermal_ops,
+							 NULL, 0, 0);
+	if (IS_ERR(ch_thermal->tzdev)) {
+		ret = PTR_ERR(ch_thermal->tzdev);
+		dev_err(adap->pdev_dev, "Failed to register thermal zone\n");
+		ch_thermal->tzdev = NULL;
+		return ret;
+	}
+	return 0;
+}
+
+int cxgb4_thermal_remove(struct adapter *adap)
+{
+	if (adap->ch_thermal.tzdev)
+		thermal_zone_device_unregister(adap->ch_thermal.tzdev);
+	return 0;
+}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 4bc2110..86b528d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c

@@ -78,7 +78,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&bmap->lock, flags);
-	 __clear_bit(msix_idx, bmap->msix_bmap);
+	__clear_bit(msix_idx, bmap->msix_bmap);
 	spin_unlock_irqrestore(&bmap->lock, flags);
 }
 
@@ -137,17 +137,16 @@
 static int alloc_uld_rxqs(struct adapter *adap,
 			  struct sge_uld_rxq_info *rxq_info, bool lro)
 {
-	struct sge *s = &adap->sge;
 	unsigned int nq = rxq_info->nrxq + rxq_info->nciq;
+	int i, err, msi_idx, que_idx = 0, bmap_idx = 0;
 	struct sge_ofld_rxq *q = rxq_info->uldrxq;
 	unsigned short *ids = rxq_info->rspq_id;
-	unsigned int bmap_idx = 0;
+	struct sge *s = &adap->sge;
 	unsigned int per_chan;
-	int i, err, msi_idx, que_idx = 0;
 
 	per_chan = rxq_info->nrxq / adap->params.nports;
 
-	if (adap->flags & USING_MSIX)
+	if (adap->flags & CXGB4_USING_MSIX)
 		msi_idx = 1;
 	else
 		msi_idx = -((int)s->intrq.abs_id + 1);
@@ -161,6 +160,10 @@
 
 		if (msi_idx >= 0) {
 			bmap_idx = get_msix_idx_from_bmap(adap);
+			if (bmap_idx < 0) {
+				err = -ENOSPC;
+				goto freeout;
+			}
 			msi_idx = adap->msix_info_ulds[bmap_idx].idx;
 		}
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
@@ -195,7 +198,7 @@
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int i, ret = 0;
 
-	if (adap->flags & USING_MSIX) {
+	if (adap->flags & CXGB4_USING_MSIX) {
 		rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
 					     sizeof(unsigned short),
 					     GFP_KERNEL);
@@ -206,7 +209,7 @@
 	ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
 
 	/* Tell uP to route control queue completions to rdma rspq */
-	if (adap->flags & FULL_INIT_DONE &&
+	if (adap->flags & CXGB4_FULL_INIT_DONE &&
 	    !ret && uld_type == CXGB4_ULD_RDMA) {
 		struct sge *s = &adap->sge;
 		unsigned int cmplqid;
@@ -239,7 +242,7 @@
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
-	if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
+	if (adap->flags & CXGB4_FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
 		struct sge *s = &adap->sge;
 		u32 param, cmdop, cmplqid = 0;
 		int i;
@@ -258,7 +261,7 @@
 		t4_free_uld_rxqs(adap, rxq_info->nciq,
 				 rxq_info->uldrxq + rxq_info->nrxq);
 	t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq);
-	if (adap->flags & USING_MSIX)
+	if (adap->flags & CXGB4_USING_MSIX)
 		kfree(rxq_info->msix_tbl);
 }
 
@@ -273,7 +276,7 @@
 	if (!rxq_info)
 		return -ENOMEM;
 
-	if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) {
+	if (adap->flags & CXGB4_USING_MSIX && uld_info->nrxq > s->nqs_per_uld) {
 		i = s->nqs_per_uld;
 		rxq_info->nrxq = roundup(i, adap->params.nports);
 	} else {
@@ -284,7 +287,7 @@
 	if (!uld_info->ciq) {
 		rxq_info->nciq = 0;
 	} else  {
-		if (adap->flags & USING_MSIX)
+		if (adap->flags & CXGB4_USING_MSIX)
 			rxq_info->nciq = min_t(int, s->nqs_per_uld,
 					       num_online_cpus());
 		else
@@ -352,25 +355,32 @@
 request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	struct uld_msix_info *minfo;
 	int err = 0;
 	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
 		bmap_idx = rxq_info->msix_tbl[idx];
-		err = request_irq(adap->msix_info_ulds[bmap_idx].vec,
+		minfo = &adap->msix_info_ulds[bmap_idx];
+		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
-				  adap->msix_info_ulds[bmap_idx].desc,
+				  minfo->desc,
 				  &rxq_info->uldrxq[idx].rspq);
 		if (err)
 			goto unwind;
+
+		cxgb4_set_msix_aff(adap, minfo->vec,
+				   &minfo->aff_mask, idx);
 	}
 	return 0;
+
 unwind:
 	while (idx-- > 0) {
 		bmap_idx = rxq_info->msix_tbl[idx];
+		minfo = &adap->msix_info_ulds[bmap_idx];
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_msix_idx_in_bmap(adap, bmap_idx);
-		free_irq(adap->msix_info_ulds[bmap_idx].vec,
-			 &rxq_info->uldrxq[idx].rspq);
+		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 	return err;
 }
@@ -379,14 +389,16 @@
 free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	struct uld_msix_info *minfo;
 	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
 		bmap_idx = rxq_info->msix_tbl[idx];
+		minfo = &adap->msix_info_ulds[bmap_idx];
 
+		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_msix_idx_in_bmap(adap, bmap_idx);
-		free_irq(adap->msix_info_ulds[bmap_idx].vec,
-			 &rxq_info->uldrxq[idx].rspq);
+		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 }
 
@@ -520,10 +532,20 @@
 	txq_info = kzalloc(sizeof(*txq_info), GFP_KERNEL);
 	if (!txq_info)
 		return -ENOMEM;
+	if (uld_type == CXGB4_ULD_CRYPTO) {
+		i = min_t(int, adap->vres.ncrypto_fc,
+			  num_online_cpus());
+		txq_info->ntxq = rounddown(i, adap->params.nports);
+		if (txq_info->ntxq <= 0) {
+			dev_warn(adap->pdev_dev, "Crypto Tx Queues can't be zero\n");
+			kfree(txq_info);
+			return -EINVAL;
+		}
 
-	i = min_t(int, uld_info->ntxq, num_online_cpus());
-	txq_info->ntxq = roundup(i, adap->params.nports);
-
+	} else {
+		i = min_t(int, uld_info->ntxq, num_online_cpus());
+		txq_info->ntxq = roundup(i, adap->params.nports);
+	}
 	txq_info->uldtxq = kcalloc(txq_info->ntxq, sizeof(struct sge_uld_txq),
 				   GFP_KERNEL);
 	if (!txq_info->uldtxq) {
@@ -546,11 +568,14 @@
 			   struct cxgb4_lld_info *lli)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int tx_uld_type = TX_ULD(uld_type);
+	struct sge_uld_txq_info *txq_info = adap->sge.uld_txq_info[tx_uld_type];
 
 	lli->rxq_ids = rxq_info->rspq_id;
 	lli->nrxq = rxq_info->nrxq;
 	lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq;
 	lli->nciq = rxq_info->nciq;
+	lli->ntxq = txq_info->ntxq;
 }
 
 int t4_uld_mem_alloc(struct adapter *adap)
@@ -598,10 +623,10 @@
 		adap->uld[type].add = NULL;
 		release_sge_txq_uld(adap, type);
 
-		if (adap->flags & FULL_INIT_DONE)
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
 			quiesce_rx_uld(adap, type);
 
-		if (adap->flags & USING_MSIX)
+		if (adap->flags & CXGB4_USING_MSIX)
 			free_msix_queue_irqs_uld(adap, type);
 
 		free_sge_queues_uld(adap, type);
@@ -634,7 +659,6 @@
 	lld->ports = adap->port;
 	lld->vr = &adap->vres;
 	lld->mtus = adap->params.mtus;
-	lld->ntxq = adap->sge.ofldqsets;
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
@@ -648,6 +672,7 @@
 	lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
 	lld->udb_density = 1 << adap->params.sge.eq_qpp;
 	lld->ucq_density = 1 << adap->params.sge.iq_qpp;
+	lld->sge_host_page_size = 1 << (adap->params.sge.hps + 10);
 	lld->filt_mode = adap->params.tp.vlan_pri_map;
 	/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
 	for (i = 0; i < NCHAN; i++)
@@ -660,7 +685,7 @@
 	lld->sge_egrstatuspagesize = adap->sge.stat_len;
 	lld->sge_pktshift = adap->sge.pktshift;
 	lld->ulp_crypto = adap->params.crypto;
-	lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
+	lld->enable_fw_ofld_conn = adap->flags & CXGB4_FW_OFLD_CONN;
 	lld->max_ordird_qp = adap->params.max_ordird_qp;
 	lld->max_ird_adapter = adap->params.max_ird_adapter;
 	lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
@@ -670,10 +695,10 @@
 	lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
-static void uld_attach(struct adapter *adap, unsigned int uld)
+static int uld_attach(struct adapter *adap, unsigned int uld)
 {
-	void *handle;
 	struct cxgb4_lld_info lli;
+	void *handle;
 
 	uld_init(adap, &lli);
 	uld_queue_init(adap, uld, &lli);
@@ -683,34 +708,33 @@
 		dev_warn(adap->pdev_dev,
 			 "could not attach to the %s driver, error %ld\n",
 			 adap->uld[uld].name, PTR_ERR(handle));
-		return;
+		return PTR_ERR(handle);
 	}
 
 	adap->uld[uld].handle = handle;
 	t4_register_netevent_notifier();
 
-	if (adap->flags & FULL_INIT_DONE)
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
 		adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
+
+	return 0;
 }
 
-/**
- *	cxgb4_register_uld - register an upper-layer driver
- *	@type: the ULD type
- *	@p: the ULD methods
+/* cxgb4_register_uld - register an upper-layer driver
+ * @type: the ULD type
+ * @p: the ULD methods
  *
- *	Registers an upper-layer driver with this driver and notifies the ULD
- *	about any presently available devices that support its type.  Returns
- *	%-EBUSY if a ULD of the same type is already registered.
+ * Registers an upper-layer driver with this driver and notifies the ULD
+ * about any presently available devices that support its type.
  */
-int cxgb4_register_uld(enum cxgb4_uld type,
-		       const struct cxgb4_uld_info *p)
+void cxgb4_register_uld(enum cxgb4_uld type,
+			const struct cxgb4_uld_info *p)
 {
-	int ret = 0;
-	unsigned int adap_idx = 0;
 	struct adapter *adap;
+	int ret = 0;
 
 	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
+		return;
 
 	mutex_lock(&uld_mutex);
 	list_for_each_entry(adap, &adapter_list, list_node) {
@@ -725,60 +749,41 @@
 		ret = setup_sge_queues_uld(adap, type, p->lro);
 		if (ret)
 			goto free_queues;
-		if (adap->flags & USING_MSIX) {
+		if (adap->flags & CXGB4_USING_MSIX) {
 			name_msix_vecs_uld(adap, type);
 			ret = request_msix_queue_irqs_uld(adap, type);
 			if (ret)
 				goto free_rxq;
 		}
-		if (adap->flags & FULL_INIT_DONE)
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
 			enable_rx_uld(adap, type);
-		if (adap->uld[type].add) {
-			ret = -EBUSY;
+		if (adap->uld[type].add)
 			goto free_irq;
-		}
 		ret = setup_sge_txq_uld(adap, type, p);
 		if (ret)
 			goto free_irq;
 		adap->uld[type] = *p;
-		uld_attach(adap, type);
-		adap_idx++;
-	}
-	mutex_unlock(&uld_mutex);
-	return 0;
-
-free_irq:
-	if (adap->flags & FULL_INIT_DONE)
-		quiesce_rx_uld(adap, type);
-	if (adap->flags & USING_MSIX)
-		free_msix_queue_irqs_uld(adap, type);
-free_rxq:
-	free_sge_queues_uld(adap, type);
-free_queues:
-	free_queues_uld(adap, type);
-out:
-
-	list_for_each_entry(adap, &adapter_list, list_node) {
-		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
-		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
-			continue;
-		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
-			continue;
-		if (!adap_idx)
-			break;
-		adap->uld[type].handle = NULL;
-		adap->uld[type].add = NULL;
+		ret = uld_attach(adap, type);
+		if (ret)
+			goto free_txq;
+		continue;
+free_txq:
 		release_sge_txq_uld(adap, type);
-		if (adap->flags & FULL_INIT_DONE)
+free_irq:
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
 			quiesce_rx_uld(adap, type);
-		if (adap->flags & USING_MSIX)
+		if (adap->flags & CXGB4_USING_MSIX)
 			free_msix_queue_irqs_uld(adap, type);
+free_rxq:
 		free_sge_queues_uld(adap, type);
+free_queues:
 		free_queues_uld(adap, type);
-		adap_idx--;
+out:
+		dev_warn(adap->pdev_dev,
+			 "ULD registration failed for uld type %d\n", type);
 	}
 	mutex_unlock(&uld_mutex);
-	return ret;
+	return;
 }
 EXPORT_SYMBOL(cxgb4_register_uld);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index de9ad31..cee582e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h

@@ -292,6 +292,7 @@
 	struct cxgb4_range ocq;
 	struct cxgb4_range key;
 	unsigned int ncrypto_fc;
+	struct cxgb4_range ppod_edram;
 };
 
 struct chcr_stats_debug {
@@ -336,6 +337,7 @@
 	unsigned int cclk_ps;                /* Core clock period in psec */
 	unsigned short udb_density;          /* # of user DB/page */
 	unsigned short ucq_density;          /* # of user CQs/page */
+	unsigned int sge_host_page_size;     /* SGE host page size */
 	unsigned short filt_mode;            /* filter optional components */
 	unsigned short tx_modq[NCHAN];       /* maps each tx channel to a */
 					     /* scheduler queue */
@@ -384,7 +386,7 @@
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
+void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
 int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
@@ -392,6 +394,7 @@
 int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
 unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
 unsigned int cxgb4_port_idx(const struct net_device *dev);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 301c4df..1a407d3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c

@@ -433,10 +433,12 @@
 	else
 		lport = netdev2pinfo(physdev)->lport;
 
-	if (is_vlan_dev(neigh->dev))
+	if (is_vlan_dev(neigh->dev)) {
 		vlan = vlan_dev_vlan_id(neigh->dev);
-	else
+		vlan |= vlan_dev_get_egress_qos_mask(neigh->dev, priority);
+	} else {
 		vlan = VLAN_NONE;
+	}
 
 	write_lock_bh(&d->lock);
 	for (e = d->l2tab[hash].first; e; e = e->next)
@@ -493,14 +495,11 @@
 		ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift;
 
 	if (tp->vnic_shift >= 0 && (tp->ingress_config & VNIC_F)) {
-		u32 viid = cxgb4_port_viid(dev);
-		u32 vf = FW_VIID_VIN_G(viid);
-		u32 pf = FW_VIID_PFN_G(viid);
-		u32 vld = FW_VIID_VIVLD_G(viid);
+		struct port_info *pi = (struct port_info *)netdev_priv(dev);
 
-		ntuple |= (u64)(FT_VNID_ID_VF_V(vf) |
-				FT_VNID_ID_PF_V(pf) |
-				FT_VNID_ID_VLD_V(vld)) << tp->vnic_shift;
+		ntuple |= (u64)(FT_VNID_ID_VF_V(pi->vin) |
+				FT_VNID_ID_PF_V(adap->pf) |
+				FT_VNID_ID_VLD_V(pi->vivld)) << tp->vnic_shift;
 	}
 
 	return ntuple;
@@ -647,7 +646,7 @@
 	if (l2t_size < L2T_MIN_HASH_BUCKETS)
 		return NULL;
 
-	d = kvzalloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry), GFP_KERNEL);
+	d = kvzalloc(struct_size(d, l2tab, l2t_size), GFP_KERNEL);
 	if (!d)
 		return NULL;
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index 7fc6566..60218dc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c

@@ -38,7 +38,6 @@
 #include "cxgb4.h"
 #include "sched.h"
 
-/* Spinlock must be held by caller */
 static int t4_sched_class_fw_cmd(struct port_info *pi,
 				 struct ch_sched_params *p,
 				 enum sched_fw_ops op)
@@ -67,7 +66,6 @@
 	return err;
 }
 
-/* Spinlock must be held by caller */
 static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg,
 				   enum sched_bind_type type, bool bind)
 {
@@ -163,7 +161,6 @@
 	if (e && index >= 0) {
 		int i = 0;
 
-		spin_lock(&e->lock);
 		list_for_each_entry(qe, &e->queue_list, list) {
 			if (i == index)
 				break;
@@ -171,10 +168,8 @@
 		}
 		err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE,
 					      false);
-		if (err) {
-			spin_unlock(&e->lock);
-			goto out;
-		}
+		if (err)
+			return err;
 
 		list_del(&qe->list);
 		kvfree(qe);
@@ -182,9 +177,7 @@
 			e->state = SCHED_STATE_UNUSED;
 			memset(&e->info, 0, sizeof(e->info));
 		}
-		spin_unlock(&e->lock);
 	}
-out:
 	return err;
 }
 
@@ -210,29 +203,24 @@
 
 	/* Unbind queue from any existing class */
 	err = t4_sched_queue_unbind(pi, p);
-	if (err) {
-		kvfree(qe);
-		goto out;
-	}
+	if (err)
+		goto out_err;
 
 	/* Bind queue to specified class */
-	memset(qe, 0, sizeof(*qe));
 	qe->cntxt_id = qid;
 	memcpy(&qe->param, p, sizeof(qe->param));
 
 	e = &s->tab[qe->param.class];
-	spin_lock(&e->lock);
 	err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true);
-	if (err) {
-		kvfree(qe);
-		spin_unlock(&e->lock);
-		goto out;
-	}
+	if (err)
+		goto out_err;
 
 	list_add_tail(&qe->list, &e->queue_list);
 	atomic_inc(&e->refcnt);
-	spin_unlock(&e->lock);
-out:
+	return err;
+
+out_err:
+	kvfree(qe);
 	return err;
 }
 
@@ -296,8 +284,6 @@
 			   enum sched_bind_type type)
 {
 	struct port_info *pi = netdev2pinfo(dev);
-	struct sched_table *s;
-	int err = 0;
 	u8 class_id;
 
 	if (!can_sched(dev))
@@ -323,12 +309,8 @@
 	if (class_id == SCHED_CLS_NONE)
 		return -ENOTSUPP;
 
-	s = pi->sched_tbl;
-	write_lock(&s->rw_lock);
-	err = t4_sched_class_bind_unbind_op(pi, arg, type, true);
-	write_unlock(&s->rw_lock);
+	return t4_sched_class_bind_unbind_op(pi, arg, type, true);
 
-	return err;
 }
 
 /**
@@ -343,8 +325,6 @@
 			     enum sched_bind_type type)
 {
 	struct port_info *pi = netdev2pinfo(dev);
-	struct sched_table *s;
-	int err = 0;
 	u8 class_id;
 
 	if (!can_sched(dev))
@@ -367,12 +347,7 @@
 	if (!valid_class_id(dev, class_id))
 		return -EINVAL;
 
-	s = pi->sched_tbl;
-	write_lock(&s->rw_lock);
-	err = t4_sched_class_bind_unbind_op(pi, arg, type, false);
-	write_unlock(&s->rw_lock);
-
-	return err;
+	return t4_sched_class_bind_unbind_op(pi, arg, type, false);
 }
 
 /* If @p is NULL, fetch any available unused class */
@@ -425,7 +400,6 @@
 static struct sched_class *t4_sched_class_alloc(struct port_info *pi,
 						struct ch_sched_params *p)
 {
-	struct sched_table *s = pi->sched_tbl;
 	struct sched_class *e;
 	u8 class_id;
 	int err;
@@ -441,7 +415,6 @@
 	if (class_id != SCHED_CLS_NONE)
 		return NULL;
 
-	write_lock(&s->rw_lock);
 	/* See if there's an exisiting class with same
 	 * requested sched params
 	 */
@@ -452,27 +425,19 @@
 		/* Fetch any available unused class */
 		e = t4_sched_class_lookup(pi, NULL);
 		if (!e)
-			goto out;
+			return NULL;
 
 		memcpy(&np, p, sizeof(np));
 		np.u.params.class = e->idx;
-
-		spin_lock(&e->lock);
 		/* New class */
 		err = t4_sched_class_fw_cmd(pi, &np, SCHED_FW_OP_ADD);
-		if (err) {
-			spin_unlock(&e->lock);
-			e = NULL;
-			goto out;
-		}
+		if (err)
+			return NULL;
 		memcpy(&e->info, &np, sizeof(e->info));
 		atomic_set(&e->refcnt, 0);
 		e->state = SCHED_STATE_ACTIVE;
-		spin_unlock(&e->lock);
 	}
 
-out:
-	write_unlock(&s->rw_lock);
 	return e;
 }
 
@@ -512,19 +477,17 @@
 	struct sched_table *s;
 	unsigned int i;
 
-	s = kvzalloc(sizeof(*s) + sched_size * sizeof(struct sched_class), GFP_KERNEL);
+	s = kvzalloc(struct_size(s, tab, sched_size), GFP_KERNEL);
 	if (!s)
 		return NULL;
 
 	s->sched_size = sched_size;
-	rwlock_init(&s->rw_lock);
 
 	for (i = 0; i < s->sched_size; i++) {
 		memset(&s->tab[i], 0, sizeof(struct sched_class));
 		s->tab[i].idx = i;
 		s->tab[i].state = SCHED_STATE_UNUSED;
 		INIT_LIST_HEAD(&s->tab[i].queue_list);
-		spin_lock_init(&s->tab[i].lock);
 		atomic_set(&s->tab[i].refcnt, 0);
 	}
 	return s;
@@ -545,11 +508,9 @@
 		for (i = 0; i < s->sched_size; i++) {
 			struct sched_class *e;
 
-			write_lock(&s->rw_lock);
 			e = &s->tab[i];
 			if (e->state == SCHED_STATE_ACTIVE)
 				t4_sched_class_free(pi, e);
-			write_unlock(&s->rw_lock);
 		}
 		kvfree(s);
 	}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 3a49e00..168fb4c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h

@@ -69,13 +69,11 @@
 	u8 idx;
 	struct ch_sched_params info;
 	struct list_head queue_list;
-	spinlock_t lock; /* Per class lock */
 	atomic_t refcnt;
 };
 
 struct sched_table {      /* per port scheduling table */
 	u8 sched_size;
-	rwlock_t rw_lock; /* Table lock */
 	struct sched_class tab[0];
 };
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6807bc3..928bfea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c

@@ -80,9 +80,10 @@
  * Max number of Tx descriptors we clean up at a time.  Should be modest as
  * freeing skbs isn't cheap and it happens while holding locks.  We just need
  * to free packets faster than they arrive, we eventually catch up and keep
- * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.
+ * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.  It should
+ * also match the CIDX Flush Threshold.
  */
-#define MAX_TX_RECLAIM 16
+#define MAX_TX_RECLAIM 32
 
 /*
  * Max number of Rx buffers we replenish at a time.  Again keep this modest,
@@ -401,6 +402,39 @@
 }
 
 /**
+ *	reclaim_completed_tx - reclaims completed TX Descriptors
+ *	@adap: the adapter
+ *	@q: the Tx queue to reclaim completed descriptors from
+ *	@maxreclaim: the maximum number of TX Descriptors to reclaim or -1
+ *	@unmap: whether the buffers should be unmapped for DMA
+ *
+ *	Reclaims Tx Descriptors that the SGE has indicated it has processed,
+ *	and frees the associated buffers if possible.  If @max == -1, then
+ *	we'll use a defaiult maximum.  Called with the TX Queue locked.
+ */
+static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+				       int maxreclaim, bool unmap)
+{
+	int reclaim = reclaimable(q);
+
+	if (reclaim) {
+		/*
+		 * Limit the amount of clean up work we do at a time to keep
+		 * the Tx lock hold time O(1).
+		 */
+		if (maxreclaim < 0)
+			maxreclaim = MAX_TX_RECLAIM;
+		if (reclaim > maxreclaim)
+			reclaim = maxreclaim;
+
+		free_tx_desc(adap, q, reclaim, unmap);
+		q->in_use -= reclaim;
+	}
+
+	return reclaim;
+}
+
+/**
  *	cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
  *	@adap: the adapter
  *	@q: the Tx queue to reclaim completed descriptors from
@@ -410,22 +444,10 @@
  *	and frees the associated buffers if possible.  Called with the Tx
  *	queue locked.
  */
-inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
-					bool unmap)
+void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+				bool unmap)
 {
-	int avail = reclaimable(q);
-
-	if (avail) {
-		/*
-		 * Limit the amount of clean up work we do at a time to keep
-		 * the Tx lock hold time O(1).
-		 */
-		if (avail > MAX_TX_RECLAIM)
-			avail = MAX_TX_RECLAIM;
-
-		free_tx_desc(adap, q, avail, unmap);
-		q->in_use -= avail;
-	}
+	(void)reclaim_completed_tx(adap, q, -1, unmap);
 }
 EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 
@@ -454,7 +476,7 @@
 		break;
 
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 
 	return buf_size;
@@ -694,7 +716,7 @@
 {
 	size_t len = nelem * elem_size + stat_size;
 	void *s = NULL;
-	void *p = dma_zalloc_coherent(dev, len, phys, GFP_KERNEL);
+	void *p = dma_alloc_coherent(dev, len, phys, GFP_KERNEL);
 
 	if (!p)
 		return NULL;
@@ -1288,6 +1310,44 @@
 }
 
 /**
+ *	t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
+ *	@adap: the adapter
+ *	@eq: the Ethernet TX Queue
+ *	@maxreclaim: the maximum number of TX Descriptors to reclaim or -1
+ *
+ *	We're typically called here to update the state of an Ethernet TX
+ *	Queue with respect to the hardware's progress in consuming the TX
+ *	Work Requests that we've put on that Egress Queue.  This happens
+ *	when we get Egress Queue Update messages and also prophylactically
+ *	in regular timer-based Ethernet TX Queue maintenance.
+ */
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
+				 int maxreclaim)
+{
+	struct sge_txq *q = &eq->q;
+	unsigned int reclaimed;
+
+	if (!q->in_use || !__netif_tx_trylock(eq->txq))
+		return 0;
+
+	/* Reclaim pending completed TX Descriptors. */
+	reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
+
+	/* If the TX Queue is currently stopped and there's now more than half
+	 * the queue available, restart it.  Otherwise bail out since the rest
+	 * of what we want do here is with the possibility of shipping any
+	 * currently buffered Coalesced TX Work Request.
+	 */
+	if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+		netif_tx_wake_queue(eq->txq);
+		eq->q.restarts++;
+	}
+
+	__netif_tx_unlock(eq->txq);
+	return reclaimed;
+}
+
+/**
  *	cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *	@skb: the packet
  *	@dev: the egress net device
@@ -1357,7 +1417,7 @@
 	}
 	skb_tx_timestamp(skb);
 
-	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+	reclaim_completed_tx(adap, &q->q, -1, true);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1400,8 +1460,25 @@
 
 	wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
 	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		/* After we're done injecting the Work Request for this
+		 * packet, we'll be below our "stop threshold" so stop the TX
+		 * Queue now and schedule a request for an SGE Egress Queue
+		 * Update message. The queue will get started later on when
+		 * the firmware processes this Work Request and sends us an
+		 * Egress Queue Status Update message indicating that space
+		 * has opened up.
+		 */
 		eth_txq_stop(q);
-		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+		/* If we're using the SGE Doorbell Queue Timer facility, we
+		 * don't need to ask the Firmware to send us Egress Queue CIDX
+		 * Updates: the Hardware will do this automatically.  And
+		 * since we send the Ingress Queue CIDX Updates to the
+		 * corresponding Ethernet Response Queue, we'll get them very
+		 * quickly.
+		 */
+		if (!q->dbqt)
+			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
 	}
 
 	wr = (void *)&q->q.desc[q->q.pidx];
@@ -1671,7 +1748,7 @@
 	/* Take this opportunity to reclaim any TX Descriptors whose DMA
 	 * transfers have completed.
 	 */
-	cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
+	reclaim_completed_tx(adapter, &txq->q, -1, true);
 
 	/* Calculate the number of flits and TX Descriptors we're going to
 	 * need along with how many TX Descriptors will be left over after
@@ -1715,7 +1792,16 @@
 		 * has opened up.
 		 */
 		eth_txq_stop(txq);
-		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+		/* If we're using the SGE Doorbell Queue Timer facility, we
+		 * don't need to ask the Firmware to send us Egress Queue CIDX
+		 * Updates: the Hardware will do this automatically.  And
+		 * since we send the Ingress Queue CIDX Updates to the
+		 * corresponding Ethernet Response Queue, we'll get them very
+		 * quickly.
+		 */
+		if (!txq->dbqt)
+			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
 	}
 
 	/* Start filling in our Work Request.  Note that we do _not_ handle
@@ -2794,6 +2880,74 @@
 }
 
 /**
+ *	t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages
+ *	@rspq: Ethernet RX Response Queue associated with Ethernet TX Queue
+ *	@rsp: Response Entry pointer into Response Queue
+ *	@gl: Gather List pointer
+ *
+ *	For adapters which support the SGE Doorbell Queue Timer facility,
+ *	we configure the Ethernet TX Queues to send CIDX Updates to the
+ *	Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
+ *	messages.  This adds a small load to PCIe Link RX bandwidth and,
+ *	potentially, higher CPU Interrupt load, but allows us to respond
+ *	much more quickly to the CIDX Updates.  This is important for
+ *	Upper Layer Software which isn't willing to have a large amount
+ *	of TX Data outstanding before receiving DMA Completions.
+ */
+static void t4_tx_completion_handler(struct sge_rspq *rspq,
+				     const __be64 *rsp,
+				     const struct pkt_gl *gl)
+{
+	u8 opcode = ((const struct rss_header *)rsp)->opcode;
+	struct port_info *pi = netdev_priv(rspq->netdev);
+	struct adapter *adapter = rspq->adap;
+	struct sge *s = &adapter->sge;
+	struct sge_eth_txq *txq;
+
+	/* skip RSS header */
+	rsp++;
+
+	/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
+	 */
+	if (unlikely(opcode == CPL_FW4_MSG &&
+		     ((const struct cpl_fw4_msg *)rsp)->type ==
+							FW_TYPE_RSSCPL)) {
+		rsp++;
+		opcode = ((const struct rss_header *)rsp)->opcode;
+		rsp++;
+	}
+
+	if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) {
+		pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n",
+			__func__, opcode);
+		return;
+	}
+
+	txq = &s->ethtxq[pi->first_qset + rspq->idx];
+
+	/* We've got the Hardware Consumer Index Update in the Egress Update
+	 * message.  If we're using the SGE Doorbell Queue Timer mechanism,
+	 * these Egress Update messages will be our sole CIDX Updates we get
+	 * since we don't want to chew up PCIe bandwidth for both Ingress
+	 * Messages and Status Page writes.  However, The code which manages
+	 * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
+	 * stored in the Status Page at the end of the TX Queue.  It's easiest
+	 * to simply copy the CIDX Update value from the Egress Update message
+	 * to the Status Page.  Also note that no Endian issues need to be
+	 * considered here since both are Big Endian and we're just copying
+	 * bytes consistently ...
+	 */
+	if (txq->dbqt) {
+		struct cpl_sge_egr_update *egr;
+
+		egr = (struct cpl_sge_egr_update *)rsp;
+		WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
+	}
+
+	t4_sge_eth_txq_egress_update(adapter, txq, -1);
+}
+
+/**
  *	t4_ethrx_handler - process an ingress ethernet packet
  *	@q: the response queue that received the packet
  *	@rsp: the response queue descriptor holding the RX_PKT message
@@ -2816,6 +2970,15 @@
 	struct port_info *pi;
 	int ret = 0;
 
+	/* If we're looking at TX Queue CIDX Update, handle that separately
+	 * and return.
+	 */
+	if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) ||
+		     (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) {
+		t4_tx_completion_handler(q, rsp, si);
+		return 0;
+	}
+
 	if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
 		return handle_trace_pkt(q->adap, si);
 
@@ -2830,6 +2993,10 @@
 
 	csum_ok = pkt->csum_calc && !err_vec &&
 		  (q->netdev->features & NETIF_F_RXCSUM);
+
+	if (err_vec)
+		rxq->stats.bad_rx_pkts++;
+
 	if (((pkt->l2info & htonl(RXF_TCP_F)) ||
 	     tnl_hdr_len) &&
 	    (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
@@ -3208,7 +3375,7 @@
 {
 	struct adapter *adap = cookie;
 
-	if (adap->flags & MASTER_PF)
+	if (adap->flags & CXGB4_MASTER_PF)
 		t4_slow_intr_handler(adap);
 	process_intrq(adap);
 	return IRQ_HANDLED;
@@ -3224,7 +3391,7 @@
 	struct adapter *adap = cookie;
 
 	t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0);
-	if (((adap->flags & MASTER_PF) && t4_slow_intr_handler(adap)) |
+	if (((adap->flags & CXGB4_MASTER_PF) && t4_slow_intr_handler(adap)) |
 	    process_intrq(adap))
 		return IRQ_HANDLED;
 	return IRQ_NONE;             /* probably shared interrupt */
@@ -3239,9 +3406,9 @@
  */
 irq_handler_t t4_intr_handler(struct adapter *adap)
 {
-	if (adap->flags & USING_MSIX)
+	if (adap->flags & CXGB4_USING_MSIX)
 		return t4_sge_intr_msix;
-	if (adap->flags & USING_MSI)
+	if (adap->flags & CXGB4_USING_MSI)
 		return t4_intr_msi;
 	return t4_intr_intx;
 }
@@ -3274,7 +3441,7 @@
 	 * global Master PF activities like checking for chip ingress stalls,
 	 * etc.
 	 */
-	if (!(adap->flags & MASTER_PF))
+	if (!(adap->flags & CXGB4_MASTER_PF))
 		goto done;
 
 	t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
@@ -3285,10 +3452,10 @@
 
 static void sge_tx_timer_cb(struct timer_list *t)
 {
-	unsigned long m;
-	unsigned int i, budget;
 	struct adapter *adap = from_timer(adap, t, sge.tx_timer);
 	struct sge *s = &adap->sge;
+	unsigned long m, period;
+	unsigned int i, budget;
 
 	for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
 		for (m = s->txq_maperr[i]; m; m &= m - 1) {
@@ -3316,29 +3483,29 @@
 	budget = MAX_TIMER_TX_RECLAIM;
 	i = s->ethtxq_rover;
 	do {
-		struct sge_eth_txq *q = &s->ethtxq[i];
-
-		if (q->q.in_use &&
-		    time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
-		    __netif_tx_trylock(q->txq)) {
-			int avail = reclaimable(&q->q);
-
-			if (avail) {
-				if (avail > budget)
-					avail = budget;
-
-				free_tx_desc(adap, &q->q, avail, true);
-				q->q.in_use -= avail;
-				budget -= avail;
-			}
-			__netif_tx_unlock(q->txq);
-		}
+		budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i],
+						       budget);
+		if (!budget)
+			break;
 
 		if (++i >= s->ethqsets)
 			i = 0;
-	} while (budget && i != s->ethtxq_rover);
+	} while (i != s->ethtxq_rover);
 	s->ethtxq_rover = i;
-	mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
+
+	if (budget == 0) {
+		/* If we found too many reclaimable packets schedule a timer
+		 * in the near future to continue where we left off.
+		 */
+		period = 2;
+	} else {
+		/* We reclaimed all reclaimable TX Descriptors, so reschedule
+		 * at the normal period.
+		 */
+		period = TX_QCHECK_PERIOD;
+	}
+
+	mod_timer(&s->tx_timer, jiffies + period);
 }
 
 /**
@@ -3382,7 +3549,7 @@
 	struct fw_iq_cmd c;
 	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
-	int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
+	int relaxed = !(adap->flags & CXGB4_ROOT_NO_RELAXED_ORDERING);
 
 	/* Size needs to be multiple of 16, including status entry. */
 	iq->size = roundup(iq->size, 16);
@@ -3417,7 +3584,8 @@
 							:  FW_IQ_IQTYPE_OFLD));
 
 	if (fl) {
-		enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+		unsigned int chip_ver =
+			CHELSIO_CHIP_VERSION(adap->params.chip);
 
 		/* Allocate the ring for the hardware free list (with space
 		 * for its status page) along with the associated software
@@ -3455,10 +3623,10 @@
 		 * the smaller 64-byte value there).
 		 */
 		c.fl0dcaen_to_fl0cidxfthresh =
-			htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
+			htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ?
 						   FETCHBURSTMIN_128B_X :
-						   FETCHBURSTMIN_64B_X) |
-			      FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+						   FETCHBURSTMIN_64B_T6_X) |
+			      FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
 						   FETCHBURSTMAX_512B_X :
 						   FETCHBURSTMAX_256B_X));
 		c.fl0size = htons(flsz);
@@ -3580,14 +3748,24 @@
 	adap->sge.egr_map[id - adap->sge.egr_start] = q;
 }
 
+/**
+ *	t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue
+ *	@adap: the adapter
+ *	@txq: the SGE Ethernet TX Queue to initialize
+ *	@dev: the Linux Network Device
+ *	@netdevq: the corresponding Linux TX Queue
+ *	@iqid: the Ingress Queue to which to deliver CIDX Update messages
+ *	@dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers
+ */
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 			 struct net_device *dev, struct netdev_queue *netdevq,
-			 unsigned int iqid)
+			 unsigned int iqid, u8 dbqt)
 {
-	int ret, nentries;
-	struct fw_eq_eth_cmd c;
-	struct sge *s = &adap->sge;
+	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
 	struct port_info *pi = netdev_priv(dev);
+	struct sge *s = &adap->sge;
+	struct fw_eq_eth_cmd c;
+	int ret, nentries;
 
 	/* Add status entries */
 	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3606,19 +3784,43 @@
 			    FW_EQ_ETH_CMD_VFN_V(0));
 	c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F |
 				 FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c));
-	c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
-			   FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
+	/* For TX Ethernet Queues using the SGE Doorbell Queue Timer
+	 * mechanism, we use Ingress Queue messages for Hardware Consumer
+	 * Index Updates on the TX Queue.  Otherwise we have the Hardware
+	 * write the CIDX Updates into the Status Page at the end of the
+	 * TX Queue.
+	 */
+	c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+				     FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
 	c.fetchszm_to_iqid =
 		htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
 		      FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
+
+	/* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */
 	c.dcaen_to_eqsize =
-		htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+		htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+					    ? FETCHBURSTMIN_64B_X
+					    : FETCHBURSTMIN_64B_T6_X) |
 		      FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
 		      FW_EQ_ETH_CMD_EQSIZE_V(nentries));
+
 	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
 
+	/* If we're using the SGE Doorbell Queue Timer mechanism, pass in the
+	 * currently configured Timer Index.  THis can be changed later via an
+	 * ethtool -C tx-usecs {Timer Val} command.  Note that the SGE
+	 * Doorbell Queue mode is currently automatically enabled in the
+	 * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ...
+	 */
+	if (dbqt)
+		c.timeren_timerix =
+			cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F |
+				    FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix));
+
 	ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
 	if (ret) {
 		kfree(txq->q.sdesc);
@@ -3635,6 +3837,8 @@
 	txq->txq = netdevq;
 	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
 	txq->mapping_err = 0;
+	txq->dbqt = dbqt;
+
 	return 0;
 }
 
@@ -3642,10 +3846,11 @@
 			  struct net_device *dev, unsigned int iqid,
 			  unsigned int cmplqid)
 {
-	int ret, nentries;
-	struct fw_eq_ctrl_cmd c;
-	struct sge *s = &adap->sge;
+	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
 	struct port_info *pi = netdev_priv(dev);
+	struct sge *s = &adap->sge;
+	struct fw_eq_ctrl_cmd c;
+	int ret, nentries;
 
 	/* Add status entries */
 	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3669,7 +3874,9 @@
 		      FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid));
 	c.dcaen_to_eqsize =
-		htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+		htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+					     ? FETCHBURSTMIN_64B_X
+					     : FETCHBURSTMIN_64B_T6_X) |
 		      FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
 		      FW_EQ_CTRL_CMD_EQSIZE_V(nentries));
@@ -3709,6 +3916,7 @@
 			 struct net_device *dev, unsigned int iqid,
 			 unsigned int uld_type)
 {
+	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
 	int ret, nentries;
 	struct fw_eq_ofld_cmd c;
 	struct sge *s = &adap->sge;
@@ -3739,7 +3947,9 @@
 		      FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
 	c.dcaen_to_eqsize =
-		htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+		htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+					     ? FETCHBURSTMIN_64B_X
+					     : FETCHBURSTMIN_64B_T6_X) |
 		      FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
 		      FW_EQ_OFLD_CMD_EQSIZE_V(nentries));

diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
index 7b2207a..01c65d1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c

@@ -47,8 +47,7 @@
 
 	smt_size = SMT_SIZE;
 
-	s = kvzalloc(sizeof(*s) + smt_size * sizeof(struct smt_entry),
-		     GFP_KERNEL);
+	s = kvzalloc(struct_size(s, smtab, smt_size), GFP_KERNEL);
 	if (!s)
 		return NULL;
 	s->smt_size = smt_size;
@@ -58,7 +57,7 @@
 		s->smtab[i].state = SMT_STATE_UNUSED;
 		memset(&s->smtab[i].src_mac, 0, ETH_ALEN);
 		spin_lock_init(&s->smtab[i].lock);
-		atomic_set(&s->smtab[i].refcnt, 0);
+		s->smtab[i].refcnt = 0;
 	}
 	return s;
 }
@@ -69,7 +68,7 @@
 	struct smt_entry *e, *end;
 
 	for (e = &s->smtab[0], end = &s->smtab[s->smt_size]; e != end; ++e) {
-		if (atomic_read(&e->refcnt) == 0) {
+		if (e->refcnt == 0) {
 			if (!first_free)
 				first_free = e;
 		} else {
@@ -98,11 +97,9 @@
 
 static void t4_smte_free(struct smt_entry *e)
 {
-	spin_lock_bh(&e->lock);
-	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
+	if (e->refcnt == 0) {  /* hasn't been recycled */
 		e->state = SMT_STATE_UNUSED;
 	}
-	spin_unlock_bh(&e->lock);
 }
 
 /**
@@ -112,8 +109,10 @@
  */
 void cxgb4_smt_release(struct smt_entry *e)
 {
-	if (atomic_dec_and_test(&e->refcnt))
+	spin_lock_bh(&e->lock);
+	if ((--e->refcnt) == 0)
 		t4_smte_free(e);
+	spin_unlock_bh(&e->lock);
 }
 EXPORT_SYMBOL(cxgb4_smt_release);
 
@@ -216,14 +215,14 @@
 	e = find_or_alloc_smte(s, smac);
 	if (e) {
 		spin_lock(&e->lock);
-		if (!atomic_read(&e->refcnt)) {
-			atomic_set(&e->refcnt, 1);
+		if (!e->refcnt) {
+			e->refcnt = 1;
 			e->state = SMT_STATE_SWITCHING;
 			e->pfvf = pfvf;
 			memcpy(e->src_mac, smac, ETH_ALEN);
 			write_smt_entry(adap, e);
 		} else {
-			atomic_inc(&e->refcnt);
+			++e->refcnt;
 		}
 		spin_unlock(&e->lock);
 	}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h
index d6c2cc2..1268d6e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h

@@ -59,7 +59,7 @@
 	u16 idx;
 	u16 pfvf;
 	u8 src_mac[ETH_ALEN];
-	atomic_t refcnt;
+	int refcnt;
 	spinlock_t lock;	/* protect smt entry add,removal */
 };
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
index 82b70a5..9a54302 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/srq.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c

@@ -77,7 +77,7 @@
 	adap = netdev2adap(dev);
 	s = adap->srq;
 
-	if (!(adap->flags & FULL_INIT_DONE) || !s)
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE) || !s)
 		goto out;
 
 	skb = alloc_skb(sizeof(*req), GFP_KERNEL);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 5fe5d16..f2a7824 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

@@ -198,7 +198,7 @@
 	if (pcie_fw & PCIE_FW_ERR_F) {
 		dev_err(adap->pdev_dev, "Firmware reports adapter error: %s\n",
 			reason[PCIE_FW_EVAL_G(pcie_fw)]);
-		adap->flags &= ~FW_OK;
+		adap->flags &= ~CXGB4_FW_OK;
 	}
 }
 
@@ -329,7 +329,7 @@
 	for (i = 0; ; i += ms) {
 		/* If we've waited too long, return a busy indication.  This
 		 * really ought to be based on our initial position in the
-		 * mailbox access list but this is a start.  We very rearely
+		 * mailbox access list but this is a start.  We very rarely
 		 * contend on access to the mailbox ...
 		 */
 		pcie_fw = t4_read_reg(adap, PCIE_FW_A);
@@ -606,7 +606,7 @@
  *
  *	Reads/writes an [almost] arbitrary memory region in the firmware: the
  *	firmware memory address and host buffer must be aligned on 32-bit
- *	boudaries; the length may be arbitrary.  The memory is transferred as
+ *	boundaries; the length may be arbitrary.  The memory is transferred as
  *	a raw byte sequence from/to the firmware's memory.  If this memory
  *	contains data structures which contain multi-byte integers, it's the
  *	caller's responsibility to perform appropriate byte order conversions.
@@ -3774,7 +3774,7 @@
  *	A negative error number will be returned if an error occurs.  If
  *	version number support is available and there's no need to upgrade
  *	the firmware, 0 will be returned.  If firmware is successfully
- *	transferred to the adapter, 1 will be retured.
+ *	transferred to the adapter, 1 will be returned.
  *
  *	NOTE: some adapters only have local RAM to store the PHY firmware.  As
  *	a result, a RESET of the adapter would cause that RAM to lose its
@@ -3794,7 +3794,7 @@
 	/* If we have version number support, then check to see if the adapter
 	 * already has up-to-date PHY firmware loaded.
 	 */
-	 if (phy_fw_version) {
+	if (phy_fw_version) {
 		new_phy_fw_vers = phy_fw_version(phy_fw_data, phy_fw_size);
 		ret = t4_phy_fw_ver(adap, &cur_phy_fw_ver);
 		if (ret < 0)
@@ -3808,7 +3808,7 @@
 	}
 
 	/* Ask the firmware where it wants us to copy the PHY firmware image.
-	 * The size of the file requires a special version of the READ coommand
+	 * The size of the file requires a special version of the READ command
 	 * which will pass the file size via the values field in PARAMS_CMD and
 	 * retrieve the return value from firmware and place it in the same
 	 * buffer values
@@ -3889,7 +3889,7 @@
 	c.param[0].mnem =
 		cpu_to_be32(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
 			    FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FWCACHE));
-	c.param[0].val = (__force __be32)op;
+	c.param[0].val = cpu_to_be32(op);
 
 	return t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), NULL);
 }
@@ -3964,6 +3964,14 @@
 	}
 }
 
+/* The ADVERT_MASK is used to mask out all of the Advertised Firmware Port
+ * Capabilities which we control with separate controls -- see, for instance,
+ * Pause Frames and Forward Error Correction.  In order to determine what the
+ * full set of Advertised Port Capabilities are, the base Advertised Port
+ * Capabilities (masked by ADVERT_MASK) must be combined with the Advertised
+ * Port Capabilities associated with those other controls.  See
+ * t4_link_acaps() for how this is done.
+ */
 #define ADVERT_MASK (FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) | \
 		     FW_PORT_CAP32_ANEG)
 
@@ -4061,6 +4069,9 @@
 /* Translate Common Code Pause specification into Firmware Port Capabilities */
 static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause)
 {
+	/* Translate orthogonal RX/TX Pause Controls for L1 Configure
+	 * commands, etc.
+	 */
 	fw_port_cap32_t fw_pause = 0;
 
 	if (cc_pause & PAUSE_RX)
@@ -4070,6 +4081,19 @@
 	if (!(cc_pause & PAUSE_AUTONEG))
 		fw_pause |= FW_PORT_CAP32_FORCE_PAUSE;
 
+	/* Translate orthogonal Pause controls into IEEE 802.3 Pause,
+	 * Asymmetrical Pause for use in reporting to upper layer OS code, etc.
+	 * Note that these bits are ignored in L1 Configure commands.
+	 */
+	if (cc_pause & PAUSE_RX) {
+		if (cc_pause & PAUSE_TX)
+			fw_pause |= FW_PORT_CAP32_802_3_PAUSE;
+		else
+			fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+	} else if (cc_pause & PAUSE_TX) {
+		fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+	}
+
 	return fw_pause;
 }
 
@@ -4100,30 +4124,25 @@
 }
 
 /**
- *	t4_link_l1cfg - apply link configuration to MAC/PHY
+ *	t4_link_acaps - compute Link Advertised Port Capabilities
  *	@adapter: the adapter
- *	@mbox: the Firmware Mailbox to use
  *	@port: the Port ID
  *	@lc: the Port's Link Configuration
  *
- *	Set up a port's MAC and PHY according to a desired link configuration.
- *	- If the PHY can auto-negotiate first decide what to advertise, then
- *	  enable/disable auto-negotiation as desired, and reset.
- *	- If the PHY does not auto-negotiate just reset it.
- *	- If auto-negotiation is off set the MAC to the proper speed/duplex/FC,
- *	  otherwise do it later based on the outcome of auto-negotiation.
+ *	Synthesize the Advertised Port Capabilities we'll be using based on
+ *	the base Advertised Port Capabilities (which have been filtered by
+ *	ADVERT_MASK) plus the individual controls for things like Pause
+ *	Frames, Forward Error Correction, MDI, etc.
  */
-int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox,
-		       unsigned int port, struct link_config *lc,
-		       bool sleep_ok, int timeout)
+fw_port_cap32_t t4_link_acaps(struct adapter *adapter, unsigned int port,
+			      struct link_config *lc)
 {
-	unsigned int fw_caps = adapter->params.fw_caps_support;
-	fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
-	struct fw_port_cmd cmd;
+	fw_port_cap32_t fw_fc, fw_fec, acaps;
 	unsigned int fw_mdi;
-	int ret;
+	char cc_fec;
 
 	fw_mdi = (FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO) & lc->pcaps);
+
 	/* Convert driver coding of Pause Frame Flow Control settings into the
 	 * Firmware's API.
 	 */
@@ -4132,7 +4151,7 @@
 	/* Convert Common Code Forward Error Control settings into the
 	 * Firmware's API.  If the current Requested FEC has "Automatic"
 	 * (IEEE 802.3) specified, then we use whatever the Firmware
-	 * sent us as part of it's IEEE 802.3-based interpratation of
+	 * sent us as part of its IEEE 802.3-based interpretation of
 	 * the Transceiver Module EPROM FEC parameters.  Otherwise we
 	 * use whatever is in the current Requested FEC settings.
 	 */
@@ -4143,32 +4162,73 @@
 	fw_fec = cc_to_fwcap_fec(cc_fec);
 
 	/* Figure out what our Requested Port Capabilities are going to be.
+	 * Note parallel structure in t4_handle_get_port_info() and
+	 * init_link_config().
 	 */
 	if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
-		rcap = lc->acaps | fw_fc | fw_fec;
+		acaps = lc->acaps | fw_fc | fw_fec;
 		lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
 		lc->fec = cc_fec;
 	} else if (lc->autoneg == AUTONEG_DISABLE) {
-		rcap = lc->speed_caps | fw_fc | fw_fec | fw_mdi;
+		acaps = lc->speed_caps | fw_fc | fw_fec | fw_mdi;
 		lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
 		lc->fec = cc_fec;
 	} else {
-		rcap = lc->acaps | fw_fc | fw_fec | fw_mdi;
+		acaps = lc->acaps | fw_fc | fw_fec | fw_mdi;
 	}
 
-	/* Note that older Firmware doesn't have FW_PORT_CAP32_FORCE_PAUSE, so
+	/* Some Requested Port Capabilities are trivially wrong if they exceed
+	 * the Physical Port Capabilities.  We can check that here and provide
+	 * moderately useful feedback in the system log.
+	 *
+	 * Note that older Firmware doesn't have FW_PORT_CAP32_FORCE_PAUSE, so
 	 * we need to exclude this from this check in order to maintain
 	 * compatibility ...
 	 */
-	if ((rcap & ~lc->pcaps) & ~FW_PORT_CAP32_FORCE_PAUSE) {
-		dev_err(adapter->pdev_dev,
-			"Requested Port Capabilities %#x exceed Physical Port Capabilities %#x\n",
-			rcap, lc->pcaps);
+	if ((acaps & ~lc->pcaps) & ~FW_PORT_CAP32_FORCE_PAUSE) {
+		dev_err(adapter->pdev_dev, "Requested Port Capabilities %#x exceed Physical Port Capabilities %#x\n",
+			acaps, lc->pcaps);
 		return -EINVAL;
 	}
 
-	/* And send that on to the Firmware ...
+	return acaps;
+}
+
+/**
+ *	t4_link_l1cfg_core - apply link configuration to MAC/PHY
+ *	@adapter: the adapter
+ *	@mbox: the Firmware Mailbox to use
+ *	@port: the Port ID
+ *	@lc: the Port's Link Configuration
+ *	@sleep_ok: if true we may sleep while awaiting command completion
+ *	@timeout: time to wait for command to finish before timing out
+ *		(negative implies @sleep_ok=false)
+ *
+ *	Set up a port's MAC and PHY according to a desired link configuration.
+ *	- If the PHY can auto-negotiate first decide what to advertise, then
+ *	  enable/disable auto-negotiation as desired, and reset.
+ *	- If the PHY does not auto-negotiate just reset it.
+ *	- If auto-negotiation is off set the MAC to the proper speed/duplex/FC,
+ *	  otherwise do it later based on the outcome of auto-negotiation.
+ */
+int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox,
+		       unsigned int port, struct link_config *lc,
+		       u8 sleep_ok, int timeout)
+{
+	unsigned int fw_caps = adapter->params.fw_caps_support;
+	struct fw_port_cmd cmd;
+	fw_port_cap32_t rcap;
+	int ret;
+
+	if (!(lc->pcaps & FW_PORT_CAP32_ANEG) &&
+	    lc->autoneg == AUTONEG_ENABLE) {
+		return -EINVAL;
+	}
+
+	/* Compute our Requested Port Capabilities and send that on to the
+	 * Firmware.
 	 */
+	rcap = t4_link_acaps(adapter, port, lc);
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
 				       FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
@@ -4185,13 +4245,20 @@
 
 	ret = t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL,
 				      sleep_ok, timeout);
+
+	/* Unfortunately, even if the Requested Port Capabilities "fit" within
+	 * the Physical Port Capabilities, some combinations of features may
+	 * still not be legal.  For example, 40Gb/s and Reed-Solomon Forward
+	 * Error Correction.  So if the Firmware rejects the L1 Configure
+	 * request, flag that here.
+	 */
 	if (ret) {
 		dev_err(adapter->pdev_dev,
 			"Requested Port Capabilities %#x rejected, error %d\n",
 			rcap, -ret);
 		return ret;
 	}
-	return ret;
+	return 0;
 }
 
 /**
@@ -4204,6 +4271,7 @@
  */
 int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port)
 {
+	unsigned int fw_caps = adap->params.fw_caps_support;
 	struct fw_port_cmd c;
 
 	memset(&c, 0, sizeof(c));
@@ -4211,9 +4279,14 @@
 				     FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
 				     FW_PORT_CMD_PORTID_V(port));
 	c.action_to_len16 =
-		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
+		cpu_to_be32(FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+						 ? FW_PORT_ACTION_L1_CFG
+						 : FW_PORT_ACTION_L1_CFG32) |
 			    FW_LEN16(c));
-	c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP32_ANEG);
+	if (fw_caps == FW_CAPS16)
+		c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP_ANEG);
+	else
+		c.u.l1cfg32.rcap32 = cpu_to_be32(FW_PORT_CAP32_ANEG);
 	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
 }
 
@@ -4936,7 +5009,13 @@
  */
 int t4_slow_intr_handler(struct adapter *adapter)
 {
-	u32 cause = t4_read_reg(adapter, PL_INT_CAUSE_A);
+	/* There are rare cases where a PL_INT_CAUSE bit may end up getting
+	 * set when the corresponding PL_INT_ENABLE bit isn't set.  It's
+	 * easiest just to mask that case here.
+	 */
+	u32 raw_cause = t4_read_reg(adapter, PL_INT_CAUSE_A);
+	u32 enable = t4_read_reg(adapter, PL_INT_ENABLE_A);
+	u32 cause = raw_cause & enable;
 
 	if (!(cause & GLBL_INTR_MASK))
 		return 0;
@@ -4988,7 +5067,7 @@
 		ulptx_intr_handler(adapter);
 
 	/* Clear the interrupts just processed for which we are the master. */
-	t4_write_reg(adapter, PL_INT_CAUSE_A, cause & GLBL_INTR_MASK);
+	t4_write_reg(adapter, PL_INT_CAUSE_A, raw_cause & GLBL_INTR_MASK);
 	(void)t4_read_reg(adapter, PL_INT_CAUSE_A); /* flush */
 	return 1;
 }
@@ -5211,7 +5290,7 @@
 
 static unsigned int t4_use_ldst(struct adapter *adap)
 {
-	return (adap->flags & FW_OK) && !adap->use_bd;
+	return (adap->flags & CXGB4_FW_OK) && !adap->use_bd;
 }
 
 /**
@@ -5874,7 +5953,6 @@
 {
 	int i, ofst = idx * 4;
 	u32 data_reg, mask_reg, cfg;
-	u32 multitrc = TRCMULTIFILTER_F;
 
 	if (!enable) {
 		t4_write_reg(adap, MPS_TRC_FILTER_MATCH_CTL_A_A + ofst, 0);
@@ -5894,7 +5972,6 @@
 		 * maximum packet capture size of 9600 bytes is recommended.
 		 * Also in this mode, only trace0 can be enabled and running.
 		 */
-		multitrc = 0;
 		if (tp->snap_len > 9600 || idx)
 			return -EINVAL;
 	}
@@ -6102,7 +6179,7 @@
 	 *        ( MPSBGMAP[Port 1] <<  8 ) |
 	 *        ( MPSBGMAP[Port 0] <<  0 ))
 	 */
-	if (adapter->flags & FW_OK) {
+	if (adapter->flags & CXGB4_FW_OK) {
 		u32 param, val;
 		int ret;
 
@@ -6132,6 +6209,37 @@
 }
 
 /**
+ *      t4_get_tp_e2c_map - return the E2C channel map associated with a port
+ *      @adapter: the adapter
+ *      @pidx: the port index
+ */
+static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx)
+{
+	unsigned int nports;
+	u32 param, val = 0;
+	int ret;
+
+	nports = 1 << NUMPORTS_G(t4_read_reg(adapter, MPS_CMN_CTL_A));
+	if (pidx >= nports) {
+		CH_WARN(adapter, "TP E2C Channel Port Index %d >= Nports %d\n",
+			pidx, nports);
+		return 0;
+	}
+
+	/* FW version >= 1.16.44.0 can determine E2C channel map using
+	 * FW_PARAMS_PARAM_DEV_TPCHMAP API.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_TPCHMAP));
+	ret = t4_query_params_ns(adapter, adapter->mbox, adapter->pf,
+				 0, 1, &param, &val);
+	if (!ret)
+		return (val >> (8 * pidx)) & 0xff;
+
+	return 0;
+}
+
+/**
  *	t4_get_tp_ch_map - return TP ingress channels associated with a port
  *	@adapter: the adapter
  *	@pidx: the port index
@@ -6689,6 +6797,47 @@
 }
 
 /**
+ *	t4_read_sge_dbqtimers - read SGE Doorbell Queue Timer values
+ *	@adap - the adapter
+ *	@ndbqtimers: size of the provided SGE Doorbell Queue Timer table
+ *	@dbqtimers: SGE Doorbell Queue Timer table
+ *
+ *	Reads the SGE Doorbell Queue Timer values into the provided table.
+ *	Returns 0 on success (Firmware and Hardware support this feature),
+ *	an error on failure.
+ */
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+			  u16 *dbqtimers)
+{
+	int ret, dbqtimerix;
+
+	ret = 0;
+	dbqtimerix = 0;
+	while (dbqtimerix < ndbqtimers) {
+		int nparams, param;
+		u32 params[7], vals[7];
+
+		nparams = ndbqtimers - dbqtimerix;
+		if (nparams > ARRAY_SIZE(params))
+			nparams = ARRAY_SIZE(params);
+
+		for (param = 0; param < nparams; param++)
+			params[param] =
+			  (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+			   FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) |
+			   FW_PARAMS_PARAM_Y_V(dbqtimerix + param));
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+				      nparams, params, vals);
+		if (ret)
+			break;
+
+		for (param = 0; param < nparams; param++)
+			dbqtimers[dbqtimerix++] = vals[param];
+	}
+	return ret;
+}
+
+/**
  *      t4_fw_hello - establish communication with FW
  *      @adap: the adapter
  *      @mbox: mailbox to use for the FW command
@@ -6776,8 +6925,8 @@
 			waiting -= 50;
 
 			/*
-			 * If neither Error nor Initialialized are indicated
-			 * by the firmware keep waiting till we exaust our
+			 * If neither Error nor Initialized are indicated
+			 * by the firmware keep waiting till we exhaust our
 			 * timeout ... and then retry if we haven't exhausted
 			 * our retries ...
 			 */
@@ -7022,10 +7171,10 @@
 	if (!t4_fw_matches_chip(adap, fw_hdr))
 		return -EINVAL;
 
-	/* Disable FW_OK flag so that mbox commands with FW_OK flag set
-	 * wont be sent when we are flashing FW.
+	/* Disable CXGB4_FW_OK flag so that mbox commands with CXGB4_FW_OK flag
+	 * set wont be sent when we are flashing FW.
 	 */
-	adap->flags &= ~FW_OK;
+	adap->flags &= ~CXGB4_FW_OK;
 
 	ret = t4_fw_halt(adap, mbox, force);
 	if (ret < 0 && !force)
@@ -7064,7 +7213,7 @@
 	 */
 	(void)t4_init_devlog_params(adap);
 out:
-	adap->flags |= FW_OK;
+	adap->flags |= CXGB4_FW_OK;
 	return ret;
 }
 
@@ -7089,7 +7238,7 @@
 	 * separately.  The actual Ingress Packet Data alignment boundary
 	 * within Packed Buffer Mode is the maximum of these two
 	 * specifications.  (Note that it makes no real practical sense to
-	 * have the Pading Boudary be larger than the Packing Boundary but you
+	 * have the Padding Boundary be larger than the Packing Boundary but you
 	 * could set the chip up that way and, in fact, legacy T4 code would
 	 * end doing this because it would initialize the Padding Boundary and
 	 * leave the Packing Boundary initialized to 0 (16 bytes).)
@@ -7160,7 +7309,6 @@
 	} else {
 		unsigned int pack_align;
 		unsigned int ingpad, ingpack;
-		unsigned int pcie_cap;
 
 		/* T5 introduced the separation of the Free List Padding and
 		 * Packing Boundaries.  Thus, we can select a smaller Padding
@@ -7185,8 +7333,7 @@
 		 * multiple of the Maximum Payload Size.
 		 */
 		pack_align = fl_align;
-		pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP);
-		if (pcie_cap) {
+		if (pci_is_pcie(adap->pdev)) {
 			unsigned int mps, mps_log;
 			u16 devctl;
 
@@ -7194,9 +7341,8 @@
 			 * [bits 7:5] encodes sizes as powers of 2 starting at
 			 * 128 bytes.
 			 */
-			pci_read_config_word(adap->pdev,
-					     pcie_cap + PCI_EXP_DEVCTL,
-					     &devctl);
+			pcie_capability_read_word(adap->pdev, PCI_EXP_DEVCTL,
+						  &devctl);
 			mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
 			mps = 1 << mps_log;
 			if (mps > pack_align)
@@ -7482,7 +7628,7 @@
  */
 int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
 		unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac,
-		unsigned int *rss_size)
+		unsigned int *rss_size, u8 *vivld, u8 *vin)
 {
 	int ret;
 	struct fw_vi_cmd c;
@@ -7517,6 +7663,13 @@
 	}
 	if (rss_size)
 		*rss_size = FW_VI_CMD_RSSSIZE_G(be16_to_cpu(c.rsssize_pkd));
+
+	if (vivld)
+		*vivld = FW_VI_CMD_VFVLD_G(be32_to_cpu(c.alloc_to_len16));
+
+	if (vin)
+		*vin = FW_VI_CMD_VIN_G(be32_to_cpu(c.alloc_to_len16));
+
 	return FW_VI_CMD_VIID_G(be16_to_cpu(c.type_viid));
 }
 
@@ -7974,7 +8127,7 @@
  *	MAC value.
  */
 int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
-		  int idx, const u8 *addr, bool persist, bool add_smt)
+		  int idx, const u8 *addr, bool persist, u8 *smt_idx)
 {
 	int ret, mode;
 	struct fw_vi_mac_cmd c;
@@ -7983,7 +8136,7 @@
 
 	if (idx < 0)                             /* new allocation */
 		idx = persist ? FW_VI_MAC_ADD_PERSIST_MAC : FW_VI_MAC_ADD_MAC;
-	mode = add_smt ? FW_VI_MAC_SMT_AND_MPSTCAM : FW_VI_MAC_MPS_TCAM_ENTRY;
+	mode = smt_idx ? FW_VI_MAC_SMT_AND_MPSTCAM : FW_VI_MAC_MPS_TCAM_ENTRY;
 
 	memset(&c, 0, sizeof(c));
 	c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) |
@@ -8000,6 +8153,23 @@
 		ret = FW_VI_MAC_CMD_IDX_G(be16_to_cpu(p->valid_to_idx));
 		if (ret >= max_mac_addr)
 			ret = -ENOMEM;
+		if (smt_idx) {
+			if (adap->params.viid_smt_extn_support) {
+				*smt_idx = FW_VI_MAC_CMD_SMTID_G
+						    (be32_to_cpu(c.op_to_viid));
+			} else {
+				/* In T4/T5, SMT contains 256 SMAC entries
+				 * organized in 128 rows of 2 entries each.
+				 * In T6, SMT contains 256 SMAC entries in
+				 * 256 rows.
+				 */
+				if (CHELSIO_CHIP_VERSION(adap->params.chip) <=
+								     CHELSIO_T5)
+					*smt_idx = (viid & FW_VIID_VIN_M) << 1;
+				else
+					*smt_idx = (viid & FW_VIID_VIN_M);
+			}
+		}
 	}
 	return ret;
 }
@@ -8444,6 +8614,10 @@
 	fc = fwcap_to_cc_pause(linkattr);
 	speed = fwcap_to_speed(linkattr);
 
+	/* Reset state for communicating new Transceiver Module status and
+	 * whether the OS-dependent layer wants us to redo the current
+	 * "sticky" L1 Configure Link Parameters.
+	 */
 	lc->new_module = false;
 	lc->redo_l1cfg = false;
 
@@ -8480,9 +8654,15 @@
 		 */
 		pi->port_type = port_type;
 
+		/* Record new Module Type information.
+		 */
 		pi->mod_type = mod_type;
 
+		/* Let the OS-dependent layer know if we have a new
+		 * Transceiver Module inserted.
+		 */
 		lc->new_module = t4_is_inserted_mod_type(mod_type);
+
 		t4_os_portmod_changed(adapter, pi->port_id);
 	}
 
@@ -8490,8 +8670,10 @@
 	    fc != lc->fc || fec != lc->fec) {	/* something changed */
 		if (!link_ok && lc->link_ok) {
 			lc->link_down_rc = linkdnrc;
-			dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n",
-				 pi->tx_chan, t4_link_down_rc_str(linkdnrc));
+			dev_warn_ratelimited(adapter->pdev_dev,
+					     "Port %d link down, reason: %s\n",
+					     pi->tx_chan,
+					     t4_link_down_rc_str(linkdnrc));
 		}
 		lc->link_ok = link_ok;
 		lc->speed = speed;
@@ -8501,6 +8683,11 @@
 		lc->lpacaps = lpacaps;
 		lc->acaps = acaps & ADVERT_MASK;
 
+		/* If we're not physically capable of Auto-Negotiation, note
+		 * this as Auto-Negotiation disabled.  Otherwise, we track
+		 * what Auto-Negotiation settings we have.  Note parallel
+		 * structure in t4_link_l1cfg_core() and init_link_config().
+		 */
 		if (!(lc->acaps & FW_PORT_CAP32_ANEG)) {
 			lc->autoneg = AUTONEG_DISABLE;
 		} else if (lc->acaps & FW_PORT_CAP32_ANEG) {
@@ -8518,6 +8705,10 @@
 		t4_os_link_changed(adapter, pi->port_id, link_ok);
 	}
 
+	/* If we have a new Transceiver Module and the OS-dependent code has
+	 * told us that it wants us to redo whatever "sticky" L1 Configuration
+	 * Link Parameters are set, do that now.
+	 */
 	if (lc->new_module && lc->redo_l1cfg) {
 		struct link_config old_lc;
 		int ret;
@@ -8587,7 +8778,7 @@
 {
 	unsigned int fw_caps = pi->adapter->params.fw_caps_support;
 	struct fw_port_cmd port_cmd;
-	unsigned int action, link_ok, speed, mtu;
+	unsigned int action, link_ok, mtu;
 	fw_port_cap32_t linkattr;
 	int ret;
 
@@ -8621,7 +8812,6 @@
 		mtu = FW_PORT_CMD_MTU32_G(
 			be32_to_cpu(port_cmd.u.info32.auxlinfo32_mtu32));
 	}
-	speed = fwcap_to_speed(linkattr);
 
 	*link_okp = link_ok;
 	*speedp = fwcap_to_speed(linkattr);
@@ -8783,10 +8973,10 @@
 			goto found;
 		}
 
-	/* Decode Flash part size.  The code below looks repetative with
+	/* Decode Flash part size.  The code below looks repetitive with
 	 * common encodings, but that's not guaranteed in the JEDEC
-	 * specification for the Read JADEC ID command.  The only thing that
-	 * we're guaranteed by the JADEC specification is where the
+	 * specification for the Read JEDEC ID command.  The only thing that
+	 * we're guaranteed by the JEDEC specification is where the
 	 * Manufacturer ID is in the returned result.  After that each
 	 * Manufacturer ~could~ encode things completely differently.
 	 * Note, all Flash parts must have 64KB sectors.
@@ -9127,7 +9317,7 @@
 	struct fw_devlog_cmd devlog_cmd;
 	int ret;
 
-	/* If we're dealing with newer firmware, the Device Log Paramerters
+	/* If we're dealing with newer firmware, the Device Log Parameters
 	 * are stored in a designated register which allows us to access the
 	 * Device Log even if we can't talk to the firmware.
 	 */
@@ -9206,8 +9396,9 @@
  */
 int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 {
-	int chan;
-	u32 v;
+	u32 param, val, v;
+	int chan, ret;
+
 
 	v = t4_read_reg(adap, TP_TIMER_RESOLUTION_A);
 	adap->params.tp.tre = TIMERRESOLUTION_G(v);
@@ -9217,11 +9408,47 @@
 	for (chan = 0; chan < NCHAN; chan++)
 		adap->params.tp.tx_modq[chan] = chan;
 
-	/* Cache the adapter's Compressed Filter Mode and global Incress
+	/* Cache the adapter's Compressed Filter Mode/Mask and global Ingress
 	 * Configuration.
 	 */
-	t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
-		       TP_VLAN_PRI_MAP_A, sleep_ok);
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FILTER) |
+		 FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_FILTER_MODE_MASK));
+
+	/* Read current value */
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+			      &param, &val);
+	if (ret == 0) {
+		dev_info(adap->pdev_dev,
+			 "Current filter mode/mask 0x%x:0x%x\n",
+			 FW_PARAMS_PARAM_FILTER_MODE_G(val),
+			 FW_PARAMS_PARAM_FILTER_MASK_G(val));
+		adap->params.tp.vlan_pri_map =
+			FW_PARAMS_PARAM_FILTER_MODE_G(val);
+		adap->params.tp.filter_mask =
+			FW_PARAMS_PARAM_FILTER_MASK_G(val);
+	} else {
+		dev_info(adap->pdev_dev,
+			 "Failed to read filter mode/mask via fw api, using indirect-reg-read\n");
+
+		/* Incase of older-fw (which doesn't expose the api
+		 * FW_PARAM_DEV_FILTER_MODE_MASK) and newer-driver (which uses
+		 * the fw api) combination, fall-back to older method of reading
+		 * the filter mode from indirect-register
+		 */
+		t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+			       TP_VLAN_PRI_MAP_A, sleep_ok);
+
+		/* With the older-fw and newer-driver combination we might run
+		 * into an issue when user wants to use hash filter region but
+		 * the filter_mask is zero, in this case filter_mask validation
+		 * is tough. To avoid that we set the filter_mask same as filter
+		 * mode, which will behave exactly as the older way of ignoring
+		 * the filter mask validation.
+		 */
+		adap->params.tp.filter_mask = adap->params.tp.vlan_pri_map;
+	}
+
 	t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
 		       TP_INGRESS_CONFIG_A, sleep_ok);
 
@@ -9368,6 +9595,7 @@
 	enum fw_port_type port_type;
 	int mdio_addr;
 	fw_port_cap32_t pcaps, acaps;
+	u8 vivld = 0, vin = 0;
 	int ret;
 
 	/* If we haven't yet determined whether we're talking to Firmware
@@ -9422,7 +9650,8 @@
 		acaps = be32_to_cpu(cmd.u.info32.acaps32);
 	}
 
-	ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, mac, &rss_size);
+	ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, mac, &rss_size,
+			  &vivld, &vin);
 	if (ret < 0)
 		return ret;
 
@@ -9430,6 +9659,19 @@
 	pi->tx_chan = port;
 	pi->lport = port;
 	pi->rss_size = rss_size;
+	pi->rx_cchan = t4_get_tp_e2c_map(pi->adapter, port);
+
+	/* If fw supports returning the VIN as part of FW_VI_CMD,
+	 * save the returned values.
+	 */
+	if (adapter->params.viid_smt_extn_support) {
+		pi->vivld = vivld;
+		pi->vin = vin;
+	} else {
+		/* Retrieve the values from VIID */
+		pi->vivld = FW_VIID_VIVLD_G(pi->viid);
+		pi->vin =  FW_VIID_VIN_G(pi->viid);
+	}
 
 	pi->port_type = port_type;
 	pi->mdio_addr = mdio_addr;
@@ -10209,7 +10451,9 @@
 					 FW_ACL_VLAN_CMD_VFN_V(vf));
 	vlan_cmd.en_to_len16 = cpu_to_be32(enable | FW_LEN16(vlan_cmd));
 	/* Drop all packets that donot match vlan id */
-	vlan_cmd.dropnovlan_fm = FW_ACL_VLAN_CMD_FM_F;
+	vlan_cmd.dropnovlan_fm = (enable
+				  ? (FW_ACL_VLAN_CMD_DROPNOVLAN_F |
+				     FW_ACL_VLAN_CMD_FM_F) : 0);
 	if (enable != 0) {
 		vlan_cmd.nvlan = 1;
 		vlan_cmd.vlanid[0] = cpu_to_be16(vlan);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 361d503..002fc62 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h

@@ -91,6 +91,7 @@
 	SGE_CTXT_SIZE = 24,       /* size of SGE context */
 	SGE_NTIMERS = 6,          /* # of interrupt holdoff timer values */
 	SGE_NCOUNTERS = 4,        /* # of interrupt packet counter values */
+	SGE_NDBQTIMERS = 8,       /* # of Doorbell Queue Timer values */
 	SGE_MAX_IQ_SIZE = 65520,
 
 	SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index f152da1..38dd41e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h

@@ -56,6 +56,7 @@
 	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
+	CPL_GET_TCB_RPL       = 0x22,
 	CPL_L2T_WRITE_RPL     = 0x23,
 	CPL_PASS_OPEN_RPL     = 0x24,
 	CPL_ACT_OPEN_RPL      = 0x25,
@@ -688,6 +689,13 @@
 #define NO_REPLY_V(x) ((x) << NO_REPLY_S)
 #define NO_REPLY_F    NO_REPLY_V(1U)
 
+struct cpl_get_tcb_rpl {
+	union opcode_tid ot;
+	__u8 cookie;
+	__u8 status;
+	__be16 len;
+};
+
 struct cpl_set_tcb_field {
 	WR_HDR;
 	union opcode_tid ot;
@@ -1453,6 +1461,9 @@
 #define T6_TX_FORCE_V(x)	((x) << T6_TX_FORCE_S)
 #define T6_TX_FORCE_F		T6_TX_FORCE_V(1U)
 
+#define TX_URG_S    16
+#define TX_URG_V(x) ((x) << TX_URG_S)
+
 #define TX_SHOVE_S    14
 #define TX_SHOVE_V(x) ((x) << TX_SHOVE_S)
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 60df66f..0c53734 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h

@@ -217,6 +217,8 @@
 	CH_PCI_ID_TABLE_FENTRY(0x6087), /* Custom T6225-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x6088), /* Custom T62100-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */
+	CH_PCI_ID_TABLE_FENTRY(0x608a), /* Custom T62100-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x608b), /* Custom T6225-CR */
 CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
 
 #endif /* __T4_PCI_ID_TBL_H__ */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index eb222d4..a957a6e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h

@@ -1334,6 +1334,10 @@
 #define TP_OUT_CONFIG_A		0x7d04
 #define TP_GLOBAL_CONFIG_A	0x7d08
 
+#define ACTIVEFILTERCOUNTS_S    22
+#define ACTIVEFILTERCOUNTS_V(x) ((x) << ACTIVEFILTERCOUNTS_S)
+#define ACTIVEFILTERCOUNTS_F    ACTIVEFILTERCOUNTS_V(1U)
+
 #define TP_CMM_TCB_BASE_A 0x7d10
 #define TP_CMM_MM_BASE_A 0x7d14
 #define TP_CMM_TIMER_BASE_A 0x7d18

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
index 3297ce0..1b9afb1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h

@@ -41,6 +41,14 @@
 #define TCB_SMAC_SEL_V(x)	((x) << TCB_SMAC_SEL_S)
 
 #define TCB_T_FLAGS_W		1
+#define TCB_T_FLAGS_S		0
+#define TCB_T_FLAGS_M		0xffffffffffffffffULL
+#define TCB_T_FLAGS_V(x)	((__u64)(x) << TCB_T_FLAGS_S)
+
+#define TCB_RQ_START_W		30
+#define TCB_RQ_START_S		0
+#define TCB_RQ_START_M		0x3ffffffULL
+#define TCB_RQ_START_V(x)	((x) << TCB_RQ_START_S)
 
 #define TF_CCTRL_ECE_S		60
 #define TF_CCTRL_CWR_S		61
@@ -66,4 +74,8 @@
 #define TCB_RX_FRAG3_LEN_RAW_W	29
 #define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W	30
 #define TCB_PDU_HDR_LEN_W	31
+
+#define TF_RX_PDU_OUT_S		49
+#define TF_RX_PDU_OUT_V(x)	((__u64)(x) << TF_RX_PDU_OUT_S)
+
 #endif /* __T4_TCB_H */

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
index f6558cb..eb1aa82 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h

@@ -71,12 +71,18 @@
 #define FETCHBURSTMIN_64B_X		2
 #define FETCHBURSTMIN_128B_X		3
 
+/* T6 and later use a single-bit encoding for FetchBurstMin */
+#define FETCHBURSTMIN_64B_T6_X		0
+#define FETCHBURSTMIN_128B_T6_X		1
+
 #define FETCHBURSTMAX_256B_X		2
 #define FETCHBURSTMAX_512B_X		3
 
+#define HOSTFCMODE_INGRESS_QUEUE_X	1
 #define HOSTFCMODE_STATUS_PAGE_X	2
 
 #define CIDXFLUSHTHRESH_32_X		5
+#define CIDXFLUSHTHRESH_128_X		7
 
 #define UPDATEDELIVERY_INTERRUPT_X	1
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 5dc6c41..65313f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h

@@ -1221,6 +1221,23 @@
 /*
  * device parameters
  */
+
+#define FW_PARAMS_PARAM_FILTER_MODE_S 16
+#define FW_PARAMS_PARAM_FILTER_MODE_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MODE_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MODE_S)
+#define FW_PARAMS_PARAM_FILTER_MODE_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MODE_S) & \
+	FW_PARAMS_PARAM_FILTER_MODE_M)
+
+#define FW_PARAMS_PARAM_FILTER_MASK_S 0
+#define FW_PARAMS_PARAM_FILTER_MASK_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MASK_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MASK_S)
+#define FW_PARAMS_PARAM_FILTER_MASK_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MASK_S) & \
+	FW_PARAMS_PARAM_FILTER_MASK_M)
+
 enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_CCLK	= 0x00, /* chip core clock in khz */
 	FW_PARAMS_PARAM_DEV_PORTVEC	= 0x01, /* the port vector */
@@ -1250,9 +1267,16 @@
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_TPCHMAP     = 0x1F,
 	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
 	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+	FW_PARAMS_PARAM_DEV_PPOD_EDRAM  = 0x23,
 	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
+	FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
+	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
+	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
 };
 
 /*
@@ -1309,6 +1333,16 @@
 	FW_PARAMS_PARAM_PFVF_RAWF_END = 0x37,
 	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39,
 	FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
+	FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_START = 0x3B,
+	FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_END = 0x3C,
+	FW_PARAMS_PARAM_PFVF_LINK_STATE = 0x40,
+};
+
+/* Virtual link state as seen by the specified VF */
+enum vf_link_states {
+	FW_VF_LINK_STATE_AUTO		= 0x00,
+	FW_VF_LINK_STATE_ENABLE		= 0x01,
+	FW_VF_LINK_STATE_DISABLE	= 0x02,
 };
 
 /*
@@ -1321,6 +1355,7 @@
 	FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
 	FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
 	FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
+	FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX	= 0x15,
 	FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
 };
 
@@ -1332,6 +1367,12 @@
 enum fw_params_param_dev_diag {
 	FW_PARAM_DEV_DIAG_TMP		= 0x00,
 	FW_PARAM_DEV_DIAG_VDD		= 0x01,
+	FW_PARAM_DEV_DIAG_MAXTMPTHRESH	= 0x02,
+};
+
+enum fw_params_param_dev_filter {
+	FW_PARAM_DEV_FILTER_VNIC_MODE   = 0x00,
+	FW_PARAM_DEV_FILTER_MODE_MASK   = 0x01,
 };
 
 enum fw_params_param_dev_fwcache {
@@ -1749,8 +1790,8 @@
 	__be32 fetchszm_to_iqid;
 	__be32 dcaen_to_eqsize;
 	__be64 eqaddr;
-	__be32 viid_pkd;
-	__be32 r8_lo;
+	__be32 autoequiqe_to_viid;
+	__be32 timeren_timerix;
 	__be64 r9;
 };
 
@@ -1845,6 +1886,10 @@
 #define FW_EQ_ETH_CMD_EQSIZE_S		0
 #define FW_EQ_ETH_CMD_EQSIZE_V(x)	((x) << FW_EQ_ETH_CMD_EQSIZE_S)
 
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_S	31
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x)	((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S)
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_F	FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U)
+
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_S	30
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x)	((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S)
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_F	FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U)
@@ -1852,6 +1897,19 @@
 #define FW_EQ_ETH_CMD_VIID_S	16
 #define FW_EQ_ETH_CMD_VIID_V(x)	((x) << FW_EQ_ETH_CMD_VIID_S)
 
+#define FW_EQ_ETH_CMD_TIMEREN_S		3
+#define FW_EQ_ETH_CMD_TIMEREN_M		0x1
+#define FW_EQ_ETH_CMD_TIMEREN_V(x)	((x) << FW_EQ_ETH_CMD_TIMEREN_S)
+#define FW_EQ_ETH_CMD_TIMEREN_G(x)	\
+    (((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M)
+#define FW_EQ_ETH_CMD_TIMEREN_F	FW_EQ_ETH_CMD_TIMEREN_V(1U)
+
+#define FW_EQ_ETH_CMD_TIMERIX_S		0
+#define FW_EQ_ETH_CMD_TIMERIX_M		0x7
+#define FW_EQ_ETH_CMD_TIMERIX_V(x)	((x) << FW_EQ_ETH_CMD_TIMERIX_S)
+#define FW_EQ_ETH_CMD_TIMERIX_G(x)	\
+    (((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M)
+
 struct fw_eq_ctrl_cmd {
 	__be32 op_to_vfn;
 	__be32 alloc_to_len16;
@@ -2108,6 +2166,19 @@
 #define FW_VI_CMD_FREE_V(x)	((x) << FW_VI_CMD_FREE_S)
 #define FW_VI_CMD_FREE_F	FW_VI_CMD_FREE_V(1U)
 
+#define FW_VI_CMD_VFVLD_S	24
+#define FW_VI_CMD_VFVLD_M	0x1
+#define FW_VI_CMD_VFVLD_V(x)	((x) << FW_VI_CMD_VFVLD_S)
+#define FW_VI_CMD_VFVLD_G(x)	\
+	(((x) >> FW_VI_CMD_VFVLD_S) & FW_VI_CMD_VFVLD_M)
+#define FW_VI_CMD_VFVLD_F	FW_VI_CMD_VFVLD_V(1U)
+
+#define FW_VI_CMD_VIN_S		16
+#define FW_VI_CMD_VIN_M		0xff
+#define FW_VI_CMD_VIN_V(x)	((x) << FW_VI_CMD_VIN_S)
+#define FW_VI_CMD_VIN_G(x)	\
+	(((x) >> FW_VI_CMD_VIN_S) & FW_VI_CMD_VIN_M)
+
 #define FW_VI_CMD_VIID_S	0
 #define FW_VI_CMD_VIID_M	0xfff
 #define FW_VI_CMD_VIID_V(x)	((x) << FW_VI_CMD_VIID_S)
@@ -2181,6 +2252,12 @@
 	} u;
 };
 
+#define FW_VI_MAC_CMD_SMTID_S		12
+#define FW_VI_MAC_CMD_SMTID_M		0xff
+#define FW_VI_MAC_CMD_SMTID_V(x)	((x) << FW_VI_MAC_CMD_SMTID_S)
+#define FW_VI_MAC_CMD_SMTID_G(x)	\
+	(((x) >> FW_VI_MAC_CMD_SMTID_S) & FW_VI_MAC_CMD_SMTID_M)
+
 #define FW_VI_MAC_CMD_VIID_S	0
 #define FW_VI_MAC_CMD_VIID_V(x)	((x) << FW_VI_MAC_CMD_VIID_S)
 
@@ -2464,6 +2541,7 @@
 
 #define FW_ACL_VLAN_CMD_DROPNOVLAN_S	7
 #define FW_ACL_VLAN_CMD_DROPNOVLAN_V(x)	((x) << FW_ACL_VLAN_CMD_DROPNOVLAN_S)
+#define FW_ACL_VLAN_CMD_DROPNOVLAN_F    FW_ACL_VLAN_CMD_DROPNOVLAN_V(1U)
 
 #define FW_ACL_VLAN_CMD_FM_S		6
 #define FW_ACL_VLAN_CMD_FM_M		0x1

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index a844296..a02b1df 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h

@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x14
-#define T4FW_VERSION_MICRO 0x08
+#define T4FW_VERSION_MINOR 0x17
+#define T4FW_VERSION_MICRO 0x03
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x14
-#define T5FW_VERSION_MICRO 0x08
+#define T5FW_VERSION_MINOR 0x17
+#define T5FW_VERSION_MICRO 0x03
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x14
-#define T6FW_VERSION_MICRO 0x08
+#define T6FW_VERSION_MINOR 0x17
+#define T6FW_VERSION_MICRO 0x03
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/Makefile b/drivers/net/ethernet/chelsio/cxgb4vf/Makefile
index d72ee26..f527ab1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Chelsio T4 SR-IOV Virtual Function Driver
 #

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 5883f09..3782e48 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h

@@ -94,7 +94,7 @@
 	struct adapter *adapter;	/* our adapter */
 	u32 vlan_id;			/* vlan id for VST */
 	u16 viid;			/* virtual interface ID */
-	s16 xact_addr_filt;		/* index of our MAC address filter */
+	int xact_addr_filt;		/* index of our MAC address filter */
 	u16 rss_size;			/* size of VI's RSS table slice */
 	u8 pidx;			/* index into adapter port[] */
 	s8 mdio_addr;
@@ -352,6 +352,7 @@
 struct hash_mac_addr {
 	struct list_head list;
 	u8 addr[ETH_ALEN];
+	unsigned int iface_mac;
 };
 
 struct mbox_list {
@@ -405,11 +406,12 @@
 };
 
 enum { /* adapter flags */
-	FULL_INIT_DONE     = (1UL << 0),
-	USING_MSI          = (1UL << 1),
-	USING_MSIX         = (1UL << 2),
-	QUEUES_BOUND       = (1UL << 3),
-	ROOT_NO_RELAXED_ORDERING = (1UL << 4),
+	CXGB4VF_FULL_INIT_DONE			= (1UL << 0),
+	CXGB4VF_USING_MSI			= (1UL << 1),
+	CXGB4VF_USING_MSIX			= (1UL << 2),
+	CXGB4VF_QUEUES_BOUND			= (1UL << 3),
+	CXGB4VF_ROOT_NO_RELAXED_ORDERING	= (1UL << 4),
+	CXGB4VF_FW_OK				= (1UL << 5),
 };
 
 /*

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ff84791..f6fc087 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

@@ -155,6 +155,8 @@
 		const char *fc;
 		const struct port_info *pi = netdev_priv(dev);
 
+		netif_carrier_on(dev);
+
 		switch (pi->link_cfg.speed) {
 		case 100:
 			s = "100Mbps";
@@ -200,6 +202,7 @@
 
 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 	} else {
+		netif_carrier_off(dev);
 		netdev_info(dev, "link down\n");
 	}
 }
@@ -236,6 +239,73 @@
 			 "inserted\n", dev->name, pi->mod_type);
 }
 
+static int cxgb4vf_set_addr_hash(struct port_info *pi)
+{
+	struct adapter *adapter = pi->adapter;
+	u64 vec = 0;
+	bool ucast = false;
+	struct hash_mac_addr *entry;
+
+	/* Calculate the hash vector for the updated list and program it */
+	list_for_each_entry(entry, &adapter->mac_hlist, list) {
+		ucast |= is_unicast_ether_addr(entry->addr);
+		vec |= (1ULL << hash_mac_addr(entry->addr));
+	}
+	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
+}
+
+/**
+ *	cxgb4vf_change_mac - Update match filter for a MAC address.
+ *	@pi: the port_info
+ *	@viid: the VI id
+ *	@tcam_idx: TCAM index of existing filter for old value of MAC address,
+ *		   or -1
+ *	@addr: the new MAC address value
+ *	@persist: whether a new MAC allocation should be persistent
+ *	@add_smt: if true also add the address to the HW SMT
+ *
+ *	Modifies an MPS filter and sets it to the new MAC address if
+ *	@tcam_idx >= 0, or adds the MAC address to a new filter if
+ *	@tcam_idx < 0. In the latter case the address is added persistently
+ *	if @persist is %true.
+ *	Addresses are programmed to hash region, if tcam runs out of entries.
+ *
+ */
+static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid,
+			      int *tcam_idx, const u8 *addr, bool persistent)
+{
+	struct hash_mac_addr *new_entry, *entry;
+	struct adapter *adapter = pi->adapter;
+	int ret;
+
+	ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent);
+	/* We ran out of TCAM entries. try programming hash region. */
+	if (ret == -ENOMEM) {
+		/* If the MAC address to be updated is in the hash addr
+		 * list, update it from the list
+		 */
+		list_for_each_entry(entry, &adapter->mac_hlist, list) {
+			if (entry->iface_mac) {
+				ether_addr_copy(entry->addr, addr);
+				goto set_hash;
+			}
+		}
+		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
+		if (!new_entry)
+			return -ENOMEM;
+		ether_addr_copy(new_entry->addr, addr);
+		new_entry->iface_mac = true;
+		list_add_tail(&new_entry->list, &adapter->mac_hlist);
+set_hash:
+		ret = cxgb4vf_set_addr_hash(pi);
+	} else if (ret >= 0) {
+		*tcam_idx = ret;
+		ret = 0;
+	}
+
+	return ret;
+}
+
 /*
  * Net device operations.
  * ======================
@@ -259,14 +329,10 @@
 	 */
 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 			      true);
-	if (ret == 0) {
-		ret = t4vf_change_mac(pi->adapter, pi->viid,
-				      pi->xact_addr_filt, dev->dev_addr, true);
-		if (ret >= 0) {
-			pi->xact_addr_filt = ret;
-			ret = 0;
-		}
-	}
+	if (ret == 0)
+		ret = cxgb4vf_change_mac(pi, pi->viid,
+					 &pi->xact_addr_filt,
+					 dev->dev_addr, true);
 
 	/*
 	 * We don't need to actually "start the link" itself since the
@@ -276,16 +342,6 @@
 	if (ret == 0)
 		ret = t4vf_enable_pi(pi->adapter, pi, true, true);
 
-	/* The Virtual Interfaces are connected to an internal switch on the
-	 * chip which allows VIs attached to the same port to talk to each
-	 * other even when the port link is down.  As a result, we generally
-	 * want to always report a VI's link as being "up", provided there are
-	 * no errors in enabling vi.
-	 */
-
-	if (ret == 0)
-		netif_carrier_on(dev);
-
 	return ret;
 }
 
@@ -406,7 +462,7 @@
 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
 	 * its Going To Sleep register here to get it started.
 	 */
-	if (adapter->flags & USING_MSI)
+	if (adapter->flags & CXGB4VF_USING_MSI)
 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 			     CIDXINC_V(0) |
 			     SEINTARM_V(s->intrq.intr_params) |
@@ -462,8 +518,8 @@
 			break;
 		}
 		cpl = (void *)p;
-		/*FALLTHROUGH*/
 	}
+		/* Fall through */
 
 	case CPL_SGE_EGR_UPDATE: {
 		/*
@@ -550,7 +606,7 @@
 	 * the intrq's queue ID as the interrupt forwarding queue for the
 	 * subsequent calls ...
 	 */
-	if (adapter->flags & USING_MSI) {
+	if (adapter->flags & CXGB4VF_USING_MSI) {
 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 					 adapter->port[0], 0, NULL, NULL);
 		if (err)
@@ -710,7 +766,7 @@
 	 * adapter setup.  Once we've done this, many of our adapter
 	 * parameters can no longer be changed ...
 	 */
-	if ((adapter->flags & FULL_INIT_DONE) == 0) {
+	if ((adapter->flags & CXGB4VF_FULL_INIT_DONE) == 0) {
 		err = setup_sge_queues(adapter);
 		if (err)
 			return err;
@@ -720,16 +776,18 @@
 			return err;
 		}
 
-		if (adapter->flags & USING_MSIX)
+		if (adapter->flags & CXGB4VF_USING_MSIX)
 			name_msix_vecs(adapter);
-		adapter->flags |= FULL_INIT_DONE;
+
+		adapter->flags |= CXGB4VF_FULL_INIT_DONE;
 	}
 
 	/*
 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
 	 */
-	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
-	if (adapter->flags & USING_MSIX)
+	BUG_ON((adapter->flags &
+	       (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
+	if (adapter->flags & CXGB4VF_USING_MSIX)
 		err = request_msix_queue_irqs(adapter);
 	else
 		err = request_irq(adapter->pdev->irq,
@@ -747,8 +805,6 @@
 	enable_rx(adapter);
 	t4vf_sge_start(adapter);
 
-	/* Initialize hash mac addr list*/
-	INIT_LIST_HEAD(&adapter->mac_hlist);
 	return 0;
 }
 
@@ -762,7 +818,7 @@
 	/*
 	 * Free interrupt resources.
 	 */
-	if (adapter->flags & USING_MSIX)
+	if (adapter->flags & CXGB4VF_USING_MSIX)
 		free_msix_queue_irqs(adapter);
 	else
 		free_irq(adapter->pdev->irq, adapter);
@@ -783,6 +839,13 @@
 	struct adapter *adapter = pi->adapter;
 
 	/*
+	 * If we don't have a connection to the firmware there's nothing we
+	 * can do.
+	 */
+	if (!(adapter->flags & CXGB4VF_FW_OK))
+		return -ENXIO;
+
+	/*
 	 * If this is the first interface that we're opening on the "adapter",
 	 * bring the "adapter" up now.
 	 */
@@ -792,6 +855,13 @@
 			return err;
 	}
 
+	/* It's possible that the basic port information could have
+	 * changed since we first read it.
+	 */
+	err = t4vf_update_port_info(pi);
+	if (err < 0)
+		return err;
+
 	/*
 	 * Note that this interface is up and start everything up ...
 	 */
@@ -864,21 +934,6 @@
 	return ns;
 }
 
-static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
-{
-	struct adapter *adapter = pi->adapter;
-	u64 vec = 0;
-	bool ucast = false;
-	struct hash_mac_addr *entry;
-
-	/* Calculate the hash vector for the updated list and program it */
-	list_for_each_entry(entry, &adapter->mac_hlist, list) {
-		ucast |= is_unicast_ether_addr(entry->addr);
-		vec |= (1ULL << hash_mac_addr(entry->addr));
-	}
-	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
-}
-
 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 {
 	struct port_info *pi = netdev_priv(netdev);
@@ -1160,13 +1215,12 @@
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
-			      addr->sa_data, true);
+	ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
+				 addr->sa_data, true);
 	if (ret < 0)
 		return ret;
 
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-	pi->xact_addr_filt = ret;
 	return 0;
 }
 
@@ -1180,7 +1234,7 @@
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
 
-	if (adapter->flags & USING_MSIX) {
+	if (adapter->flags & CXGB4VF_USING_MSIX) {
 		struct sge_eth_rxq *rxq;
 		int nqsets;
 
@@ -1355,7 +1409,7 @@
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
 		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
-		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
@@ -1366,6 +1420,13 @@
 		break;
 	}
 
+	if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
+		FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
+		FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
+	} else {
+		SET_LMM(FEC_NONE);
+	}
+
 	FW_CAPS_TO_LMM(ANEG, Autoneg);
 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
@@ -1418,22 +1479,6 @@
 		base->duplex = DUPLEX_UNKNOWN;
 	}
 
-	if (pi->link_cfg.fc & PAUSE_RX) {
-		if (pi->link_cfg.fc & PAUSE_TX) {
-			ethtool_link_ksettings_add_link_mode(link_ksettings,
-							     advertising,
-							     Pause);
-		} else {
-			ethtool_link_ksettings_add_link_mode(link_ksettings,
-							     advertising,
-							     Asym_Pause);
-		}
-	} else if (pi->link_cfg.fc & PAUSE_TX) {
-		ethtool_link_ksettings_add_link_mode(link_ksettings,
-						     advertising,
-						     Asym_Pause);
-	}
-
 	base->autoneg = pi->link_cfg.autoneg;
 	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -1588,7 +1633,7 @@
 	    rp->tx_pending < MIN_TXQ_ENTRIES)
 		return -EINVAL;
 
-	if (adapter->flags & FULL_INIT_DONE)
+	if (adapter->flags & CXGB4VF_FULL_INIT_DONE)
 		return -EBUSY;
 
 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
@@ -1872,6 +1917,8 @@
  * TCP Segmentation Offload flags which we support.
  */
 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
+		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 
 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
@@ -2103,7 +2150,7 @@
 static int sge_queue_entries(const struct adapter *adapter)
 {
 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
-		((adapter->flags & USING_MSI) != 0);
+		((adapter->flags & CXGB4VF_USING_MSI) != 0);
 }
 
 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
@@ -2249,7 +2296,7 @@
 static int sge_qstats_entries(const struct adapter *adapter)
 {
 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
-		((adapter->flags & USING_MSI) != 0);
+		((adapter->flags & CXGB4VF_USING_MSI) != 0);
 }
 
 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
@@ -2324,19 +2371,7 @@
 
 	return 0;
 }
-
-static int resources_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, resources_show, inode->i_private);
-}
-
-static const struct file_operations resources_proc_fops = {
-	.owner   = THIS_MODULE,
-	.open    = resources_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(resources);
 
 /*
  * Show Virtual Interfaces.
@@ -2420,7 +2455,7 @@
 	{ "mboxlog",    0444, &mboxlog_fops },
 	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
 	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
-	{ "resources",  0444, &resources_proc_fops },
+	{ "resources",  0444, &resources_fops },
 	{ "interfaces", 0444, &interfaces_proc_fops },
 };
 
@@ -2443,11 +2478,10 @@
 	 * Debugfs support is best effort.
 	 */
 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
-		(void)debugfs_create_file(debugfs_files[i].name,
-				  debugfs_files[i].mode,
-				  adapter->debugfs_root,
-				  (void *)adapter,
-				  debugfs_files[i].fops);
+		debugfs_create_file(debugfs_files[i].name,
+				    debugfs_files[i].mode,
+				    adapter->debugfs_root, (void *)adapter,
+				    debugfs_files[i].fops);
 
 	return 0;
 }
@@ -2670,6 +2704,7 @@
 	 */
 	size_nports_qsets(adapter);
 
+	adapter->flags |= CXGB4VF_FW_OK;
 	return 0;
 }
 
@@ -2704,7 +2739,8 @@
 	 * support.  In particular, this means that we need to know what kind
 	 * of interrupts we'll be using ...
 	 */
-	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
+	BUG_ON((adapter->flags &
+	       (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
 
 	/*
 	 * Count the number of 10GbE Virtual Interfaces that we have.
@@ -3030,11 +3066,16 @@
 	 * using Relaxed Ordering.
 	 */
 	if (!pcie_relaxed_ordering_enabled(pdev))
-		adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+		adapter->flags |= CXGB4VF_ROOT_NO_RELAXED_ORDERING;
 
 	err = adap_init0(adapter);
 	if (err)
-		goto err_unmap_bar;
+		dev_err(&pdev->dev,
+			"Adapter initialization failed, error %d. Continuing in debug mode\n",
+			err);
+
+	/* Initialize hash mac addr list */
+	INIT_LIST_HEAD(&adapter->mac_hlist);
 
 	/*
 	 * Allocate our "adapter ports" and stitch everything together.
@@ -3056,13 +3097,6 @@
 			break;
 		port_id = ffs(pmask) - 1;
 		pmask &= ~(1 << port_id);
-		viid = t4vf_alloc_vi(adapter, port_id);
-		if (viid < 0) {
-			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
-				" err=%d\n", port_id, viid);
-			err = viid;
-			goto err_free_dev;
-		}
 
 		/*
 		 * Allocate our network device and stitch things together.
@@ -3070,7 +3104,6 @@
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
 					   MAX_PORT_QSETS);
 		if (netdev == NULL) {
-			t4vf_free_vi(adapter, viid);
 			err = -ENOMEM;
 			goto err_free_dev;
 		}
@@ -3080,26 +3113,21 @@
 		pi->adapter = adapter;
 		pi->pidx = pidx;
 		pi->port_id = port_id;
-		pi->viid = viid;
 
 		/*
 		 * Initialize the starting state of our "port" and register
 		 * it.
 		 */
 		pi->xact_addr_filt = -1;
-		netif_carrier_off(netdev);
 		netdev->irq = pdev->irq;
 
-		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
-			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
-		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
-			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			NETIF_F_HIGHDMA;
-		netdev->features = netdev->hw_features |
-				   NETIF_F_HW_VLAN_CTAG_TX;
+		netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_GRO |
+			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+		netdev->features = netdev->hw_features;
 		if (pci_using_dac)
 			netdev->features |= NETIF_F_HIGHDMA;
+		netdev->vlan_features = netdev->features & VLAN_FEAT;
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 		netdev->min_mtu = 81;
@@ -3110,6 +3138,23 @@
 		netdev->dev_port = pi->port_id;
 
 		/*
+		 * If we haven't been able to contact the firmware, there's
+		 * nothing else we can do for this "port" ...
+		 */
+		if (!(adapter->flags & CXGB4VF_FW_OK))
+			continue;
+
+		viid = t4vf_alloc_vi(adapter, port_id);
+		if (viid < 0) {
+			dev_err(&pdev->dev,
+				"cannot allocate VI for port %d: err=%d\n",
+				port_id, viid);
+			err = viid;
+			goto err_free_dev;
+		}
+		pi->viid = viid;
+
+		/*
 		 * Initialize the hardware/software state for the port.
 		 */
 		err = t4vf_port_init(adapter, pidx);
@@ -3146,7 +3191,7 @@
 	 * get MSI interrupts we bail with the error.
 	 */
 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
-		adapter->flags |= USING_MSIX;
+		adapter->flags |= CXGB4VF_USING_MSIX;
 	else {
 		if (msi == MSI_MSIX) {
 			dev_info(adapter->pdev_dev,
@@ -3166,7 +3211,7 @@
 				" err=%d\n", err);
 			goto err_free_dev;
 		}
-		adapter->flags |= USING_MSI;
+		adapter->flags |= CXGB4VF_USING_MSI;
 	}
 
 	/* Now that we know how many "ports" we have and what interrupt
@@ -3196,6 +3241,7 @@
 			continue;
 		}
 
+		netif_carrier_off(netdev);
 		set_bit(pidx, &adapter->registered_device_map);
 	}
 	if (adapter->registered_device_map == 0) {
@@ -3210,11 +3256,7 @@
 		adapter->debugfs_root =
 			debugfs_create_dir(pci_name(pdev),
 					   cxgb4vf_debugfs_root);
-		if (IS_ERR_OR_NULL(adapter->debugfs_root))
-			dev_warn(&pdev->dev, "could not create debugfs"
-				 " directory");
-		else
-			setup_debugfs(adapter);
+		setup_debugfs(adapter);
 	}
 
 	/*
@@ -3224,8 +3266,8 @@
 	for_each_port(adapter, pidx) {
 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
 			 adapter->port[pidx]->name,
-			 (adapter->flags & USING_MSIX) ? "MSI-X" :
-			 (adapter->flags & USING_MSI)  ? "MSI" : "");
+			 (adapter->flags & CXGB4VF_USING_MSIX) ? "MSI-X" :
+			 (adapter->flags & CXGB4VF_USING_MSI)  ? "MSI" : "");
 	}
 
 	/*
@@ -3238,12 +3280,12 @@
 	 * so far and return the error.
 	 */
 err_disable_interrupts:
-	if (adapter->flags & USING_MSIX) {
+	if (adapter->flags & CXGB4VF_USING_MSIX) {
 		pci_disable_msix(adapter->pdev);
-		adapter->flags &= ~USING_MSIX;
-	} else if (adapter->flags & USING_MSI) {
+		adapter->flags &= ~CXGB4VF_USING_MSIX;
+	} else if (adapter->flags & CXGB4VF_USING_MSI) {
 		pci_disable_msi(adapter->pdev);
-		adapter->flags &= ~USING_MSI;
+		adapter->flags &= ~CXGB4VF_USING_MSI;
 	}
 
 err_free_dev:
@@ -3252,13 +3294,13 @@
 		if (netdev == NULL)
 			continue;
 		pi = netdev_priv(netdev);
-		t4vf_free_vi(adapter, pi->viid);
+		if (pi->viid)
+			t4vf_free_vi(adapter, pi->viid);
 		if (test_bit(pidx, &adapter->registered_device_map))
 			unregister_netdev(netdev);
 		free_netdev(netdev);
 	}
 
-err_unmap_bar:
 	if (!is_t4(adapter->params.chip))
 		iounmap(adapter->bar2);
 
@@ -3287,6 +3329,7 @@
 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
 {
 	struct adapter *adapter = pci_get_drvdata(pdev);
+	struct hash_mac_addr *entry, *tmp;
 
 	/*
 	 * Tear down driver state associated with device.
@@ -3302,12 +3345,12 @@
 			if (test_bit(pidx, &adapter->registered_device_map))
 				unregister_netdev(adapter->port[pidx]);
 		t4vf_sge_stop(adapter);
-		if (adapter->flags & USING_MSIX) {
+		if (adapter->flags & CXGB4VF_USING_MSIX) {
 			pci_disable_msix(adapter->pdev);
-			adapter->flags &= ~USING_MSIX;
-		} else if (adapter->flags & USING_MSI) {
+			adapter->flags &= ~CXGB4VF_USING_MSIX;
+		} else if (adapter->flags & CXGB4VF_USING_MSI) {
 			pci_disable_msi(adapter->pdev);
-			adapter->flags &= ~USING_MSI;
+			adapter->flags &= ~CXGB4VF_USING_MSI;
 		}
 
 		/*
@@ -3330,13 +3373,19 @@
 				continue;
 
 			pi = netdev_priv(netdev);
-			t4vf_free_vi(adapter, pi->viid);
+			if (pi->viid)
+				t4vf_free_vi(adapter, pi->viid);
 			free_netdev(netdev);
 		}
 		iounmap(adapter->regs);
 		if (!is_t4(adapter->params.chip))
 			iounmap(adapter->bar2);
 		kfree(adapter->mbox_log);
+		list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
+					 list) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
 		kfree(adapter);
 	}
 
@@ -3373,12 +3422,12 @@
 	 * Interrupts allowing various internal pathways to drain.
 	 */
 	t4vf_sge_stop(adapter);
-	if (adapter->flags & USING_MSIX) {
+	if (adapter->flags & CXGB4VF_USING_MSIX) {
 		pci_disable_msix(adapter->pdev);
-		adapter->flags &= ~USING_MSIX;
-	} else if (adapter->flags & USING_MSI) {
+		adapter->flags &= ~CXGB4VF_USING_MSIX;
+	} else if (adapter->flags & CXGB4VF_USING_MSI) {
 		pci_disable_msi(adapter->pdev);
-		adapter->flags &= ~USING_MSI;
+		adapter->flags &= ~CXGB4VF_USING_MSI;
 	}
 
 	/*
@@ -3432,13 +3481,11 @@
 		return -EINVAL;
 	}
 
-	/* Debugfs support is optional, just warn if this fails */
+	/* Debugfs support is optional, debugfs will warn if this fails */
 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
-		pr_warn("could not create debugfs entry, continuing\n");
 
 	ret = pci_register_driver(&cxgb4vf_driver);
-	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
+	if (ret < 0)
 		debugfs_remove(cxgb4vf_debugfs_root);
 	return ret;
 }

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 3007e1a..f71c973 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c

@@ -756,7 +756,7 @@
 	 * Allocate the hardware ring and PCI DMA bus address space for said.
 	 */
 	size_t hwlen = nelem * hwsize + stat_size;
-	void *hwring = dma_zalloc_coherent(dev, hwlen, busaddrp, GFP_KERNEL);
+	void *hwring = dma_alloc_coherent(dev, hwlen, busaddrp, GFP_KERNEL);
 
 	if (!hwring)
 		return NULL;
@@ -2044,8 +2044,9 @@
  */
 irq_handler_t t4vf_intr_handler(struct adapter *adapter)
 {
-	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
-	if (adapter->flags & USING_MSIX)
+	BUG_ON((adapter->flags &
+	       (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
+	if (adapter->flags & CXGB4VF_USING_MSIX)
 		return t4vf_sge_intr_msix;
 	else
 		return t4vf_intr_msi;
@@ -2209,7 +2210,7 @@
 	struct port_info *pi = netdev_priv(dev);
 	struct fw_iq_cmd cmd, rpl;
 	int ret, iqandst, flsz = 0;
-	int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING);
+	int relaxed = !(adapter->flags & CXGB4VF_ROOT_NO_RELAXED_ORDERING);
 
 	/*
 	 * If we're using MSI interrupts and we're not initializing the
@@ -2218,7 +2219,8 @@
 	 * the Forwarded Interrupt Queue must be set up before any other
 	 * ingress queue ...
 	 */
-	if ((adapter->flags & USING_MSI) && rspq != &adapter->sge.intrq) {
+	if ((adapter->flags & CXGB4VF_USING_MSI) &&
+	    rspq != &adapter->sge.intrq) {
 		iqandst = SGE_INTRDST_IQ;
 		intr_dest = adapter->sge.intrq.abs_id;
 	} else
@@ -2268,7 +2270,7 @@
 	cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
 
 	if (fl) {
-		enum chip_type chip =
+		unsigned int chip_ver =
 			CHELSIO_CHIP_VERSION(adapter->params.chip);
 		/*
 		 * Allocate the ring for the hardware free list (with space
@@ -2319,10 +2321,10 @@
 		 */
 		cmd.fl0dcaen_to_fl0cidxfthresh =
 			cpu_to_be16(
-				FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
-						     FETCHBURSTMIN_128B_X :
-						     FETCHBURSTMIN_64B_X) |
-				FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+				FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5
+						     ? FETCHBURSTMIN_128B_X
+						     : FETCHBURSTMIN_64B_T6_X) |
+				FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
 						     FETCHBURSTMAX_512B_X :
 						     FETCHBURSTMAX_256B_X));
 		cmd.fl0size = cpu_to_be16(flsz);
@@ -2411,10 +2413,11 @@
 			   struct net_device *dev, struct netdev_queue *devq,
 			   unsigned int iqid)
 {
+	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
+	struct port_info *pi = netdev_priv(dev);
+	struct fw_eq_eth_cmd cmd, rpl;
 	struct sge *s = &adapter->sge;
 	int ret, nentries;
-	struct fw_eq_eth_cmd cmd, rpl;
-	struct port_info *pi = netdev_priv(dev);
 
 	/*
 	 * Calculate the size of the hardware TX Queue (including the Status
@@ -2448,17 +2451,19 @@
 	cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F |
 					 FW_EQ_ETH_CMD_EQSTART_F |
 					 FW_LEN16(cmd));
-	cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
-				   FW_EQ_ETH_CMD_VIID_V(pi->viid));
+	cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+					     FW_EQ_ETH_CMD_VIID_V(pi->viid));
 	cmd.fetchszm_to_iqid =
 		cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) |
 			    FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) |
 			    FW_EQ_ETH_CMD_IQID_V(iqid));
 	cmd.dcaen_to_eqsize =
-		cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) |
-			    FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) |
+		cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+						  ? FETCHBURSTMIN_64B_X
+						  : FETCHBURSTMIN_64B_T6_X) |
+			    FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 			    FW_EQ_ETH_CMD_CIDXFTHRESH_V(
-						SGE_CIDXFLUSHTHRESH_32) |
+						CIDXFLUSHTHRESH_32_X) |
 			    FW_EQ_ETH_CMD_EQSIZE_V(nentries));
 	cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 5b8c08c..8a389d6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c

@@ -313,7 +313,17 @@
 	return ret;
 }
 
+/* In the Physical Function Driver Common Code, the ADVERT_MASK is used to
+ * mask out bits in the Advertised Port Capabilities which are managed via
+ * separate controls, like Pause Frames and Forward Error Correction.  In the
+ * Virtual Function Common Code, since we never perform L1 Configuration on
+ * the Link, the only things we really need to filter out are things which
+ * we decode and report separately like Speed.
+ */
 #define ADVERT_MASK (FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) | \
+		     FW_PORT_CAP32_802_3_PAUSE | \
+		     FW_PORT_CAP32_802_3_ASM_DIR | \
+		     FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M) | \
 		     FW_PORT_CAP32_ANEG)
 
 /**
@@ -2005,8 +2015,10 @@
 	    fc != lc->fc || fec != lc->fec) {	/* something changed */
 		if (!link_ok && lc->link_ok) {
 			lc->link_down_rc = linkdnrc;
-			dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n",
-				 pi->port_id, t4vf_link_down_rc_str(linkdnrc));
+			dev_warn_ratelimited(adapter->pdev_dev,
+					     "Port %d link down, reason: %s\n",
+					     pi->port_id,
+					     t4vf_link_down_rc_str(linkdnrc));
 		}
 		lc->link_ok = link_ok;
 		lc->speed = speed;

diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile
index 2534e30..aa79264 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/Makefile
+++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile

@@ -1,4 +1,5 @@
-ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/$(src)/../cxgb4
 
 obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o
 

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index 74849be..2103453 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c

@@ -123,6 +123,9 @@
 	unsigned int cpu;
 	int i;
 
+	if (!ppm->pool)
+		return -EINVAL;
+
 	cpu = get_cpu();
 	pool = per_cpu_ptr(ppm->pool, cpu);
 	spin_lock_bh(&pool->lock);
@@ -169,7 +172,9 @@
 	}
 
 	ppm->next = i + count;
-	if (ppm->next >= ppm->bmap_index_max)
+	if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram))
+		ppm->next = 0;
+	else if (ppm->next >= ppm->bmap_index_max)
 		ppm->next = 0;
 
 	spin_unlock_bh(&ppm->map_lock);
@@ -354,7 +359,10 @@
 		ppmax = max;
 
 	/* pool size must be multiple of unsigned long */
-	bmap = BITS_TO_LONGS(ppmax);
+	bmap = ppmax / BITS_PER_TYPE(unsigned long);
+	if (!bmap)
+		return NULL;
+
 	ppmax = (bmap * sizeof(unsigned long)) << 3;
 
 	alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
@@ -379,18 +387,36 @@
 
 int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 		   struct pci_dev *pdev, void *lldev,
-		   struct cxgbi_tag_format *tformat,
-		   unsigned int ppmax,
-		   unsigned int llimit,
-		   unsigned int start,
-		   unsigned int reserve_factor)
+		   struct cxgbi_tag_format *tformat, unsigned int iscsi_size,
+		   unsigned int llimit, unsigned int start,
+		   unsigned int reserve_factor, unsigned int iscsi_edram_start,
+		   unsigned int iscsi_edram_size)
 {
 	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
 	struct cxgbi_ppm_pool *pool = NULL;
-	unsigned int ppmax_pool = 0;
 	unsigned int pool_index_max = 0;
-	unsigned int alloc_sz;
+	unsigned int ppmax_pool = 0;
 	unsigned int ppod_bmap_size;
+	unsigned int alloc_sz;
+	unsigned int ppmax;
+
+	if (!iscsi_edram_start)
+		iscsi_edram_size = 0;
+
+	if (iscsi_edram_size &&
+	    ((iscsi_edram_start + iscsi_edram_size) != start)) {
+		pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
+			"size 0x%x DDR start 0x%x\n",
+			iscsi_edram_start, iscsi_edram_size, start);
+		return -EINVAL;
+	}
+
+	if (iscsi_edram_size) {
+		reserve_factor = 0;
+		start = iscsi_edram_start;
+	}
+
+	ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT;
 
 	if (ppm) {
 		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
@@ -402,6 +428,10 @@
 	if (reserve_factor) {
 		ppmax_pool = ppmax / reserve_factor;
 		pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
+		if (!pool) {
+			ppmax_pool = 0;
+			reserve_factor = 0;
+		}
 
 		pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
 			 ndev->name, ppmax, ppmax_pool, pool_index_max);
@@ -427,6 +457,14 @@
 			__func__, ppmax, ppmax_pool, ppod_bmap_size, start,
 			end);
 	}
+	if (iscsi_edram_size) {
+		unsigned int first_ddr_idx =
+				iscsi_edram_size >> PPOD_SIZE_SHIFT;
+
+		ppm->max_index_in_edram = first_ddr_idx - 1;
+		bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1);
+		pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx);
+	}
 
 	spin_lock_init(&ppm->map_lock);
 	kref_init(&ppm->refcnt);

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index a91ad76..7b02c20 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h

@@ -143,6 +143,7 @@
 	spinlock_t map_lock;		/* ppm map lock */
 	unsigned int bmap_index_max;
 	unsigned int next;
+	unsigned int max_index_in_edram;
 	unsigned long *ppod_bmap;
 	struct cxgbi_ppod_data ppod_data[0];
 };
@@ -324,9 +325,9 @@
 			    unsigned long caller_data);
 int cxgbi_ppm_init(void **ppm_pp, struct net_device *, struct pci_dev *,
 		   void *lldev, struct cxgbi_tag_format *,
-		   unsigned int ppmax, unsigned int llimit,
-		   unsigned int start,
-		   unsigned int reserve_factor);
+		   unsigned int iscsi_size, unsigned int llimit,
+		   unsigned int start, unsigned int reserve_factor,
+		   unsigned int edram_start, unsigned int edram_size);
 int cxgbi_ppm_release(struct cxgbi_ppm *ppm);
 void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *);
 unsigned int cxgbi_tagmask_set(unsigned int ppmax);

diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig
index ec0b545..48f3198 100644
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Cirrus network device configuration
 #
@@ -23,7 +24,7 @@
 	---help---
 	  Support for CS89x0 chipset based Ethernet cards. If you have a
 	  network (Ethernet) card of this type, say Y and read the file
-	  <file:Documentation/networking/cs89x0.txt>.
+	  <file:Documentation/networking/device_drivers/cirrus/cs89x0.txt>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called cs89x0.
@@ -41,7 +42,7 @@
 
 config EP93XX_ETH
 	tristate "EP93xx Ethernet support"
-	depends on ARM && ARCH_EP93XX
+	depends on (ARM && ARCH_EP93XX) || COMPILE_TEST
 	select MII
 	help
 	  This is a driver for the ethernet hardware included in EP93xx CPUs.

diff --git a/drivers/net/ethernet/cirrus/Makefile b/drivers/net/ethernet/cirrus/Makefile
index ca245e2..84865e5 100644
--- a/drivers/net/ethernet/cirrus/Makefile
+++ b/drivers/net/ethernet/cirrus/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Cirrus network device drivers.
 #

diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index b3e7faf..c9aebcd 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c

@@ -1844,16 +1844,12 @@
 static int __init cs89x0_platform_probe(struct platform_device *pdev)
 {
 	struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
-	struct net_local *lp;
-	struct resource *mem_res;
 	void __iomem *virt_addr;
 	int err;
 
 	if (!dev)
 		return -ENOMEM;
 
-	lp = netdev_priv(dev);
-
 	dev->irq = platform_get_irq(pdev, 0);
 	if (dev->irq <= 0) {
 		dev_warn(&dev->dev, "interrupt resource missing\n");
@@ -1861,8 +1857,7 @@
 		goto free;
 	}
 
-	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	virt_addr = devm_ioremap_resource(&pdev->dev, mem_res);
+	virt_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(virt_addr)) {
 		err = PTR_ERR(virt_addr);
 		goto free;

diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 13dfdfc..f37c9a0 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * EP93xx ethernet network device driver
  * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
  * Dedicated to Marija Kulikova.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
@@ -25,7 +21,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 
-#include <mach/hardware.h>
+#include <linux/platform_data/eth-ep93xx.h>
 
 #define DRV_MODULE_NAME		"ep93xx-eth"
 #define DRV_MODULE_VERSION	"0.1"
@@ -767,6 +763,7 @@
 {
 	struct net_device *dev;
 	struct ep93xx_priv *ep;
+	struct resource *mem;
 
 	dev = platform_get_drvdata(pdev);
 	if (dev == NULL)
@@ -782,8 +779,8 @@
 		iounmap(ep->base_addr);
 
 	if (ep->res != NULL) {
-		release_resource(ep->res);
-		kfree(ep->res);
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		release_mem_region(mem->start, resource_size(mem));
 	}
 
 	free_netdev(dev);

diff --git a/drivers/net/ethernet/cisco/Kconfig b/drivers/net/ethernet/cisco/Kconfig
index 15b713a..ee5b7b3 100644
--- a/drivers/net/ethernet/cisco/Kconfig
+++ b/drivers/net/ethernet/cisco/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Cisco device configuration
 #

diff --git a/drivers/net/ethernet/cisco/Makefile b/drivers/net/ethernet/cisco/Makefile
index 6c7437b..074635b 100644
--- a/drivers/net/ethernet/cisco/Makefile
+++ b/drivers/net/ethernet/cisco/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Cisco device drivers.
 #

diff --git a/drivers/net/ethernet/cisco/enic/Kconfig b/drivers/net/ethernet/cisco/enic/Kconfig
index b63f8d8..edaae70 100644
--- a/drivers/net/ethernet/cisco/enic/Kconfig
+++ b/drivers/net/ethernet/cisco/enic/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Cisco device configuration
 #

diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile
index aadcaf7..c3b6feb 100644
--- a/drivers/net/ethernet/cisco/enic/Makefile
+++ b/drivers/net/ethernet/cisco/enic/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ENIC) := enic.o
 
 enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \

diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c
index 99038df..9900993 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.c
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c

@@ -32,7 +32,8 @@
 		break;
 	default:
 		return -EPROTONOSUPPORT;
-	};
+	}
+
 	data.type = FILTER_IPV4_5TUPLE;
 	data.u.ipv4.src_addr = ntohl(keys->addrs.v4addrs.src);
 	data.u.ipv4.dst_addr = ntohl(keys->addrs.v4addrs.dst);

diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index f42f7a6..ebd5c2c 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c

@@ -241,7 +241,7 @@
 	}
 	enic_init_vnic_resources(enic);
 	if (running) {
-		err = dev_open(netdev);
+		err = dev_open(netdev, NULL);
 		if (err)
 			goto err_out;
 	}

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 60641e2..acb2856 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c

@@ -119,7 +119,7 @@
 
 	for (i = 0; i < enic->intr_count; i++) {
 		if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) ||
-		    (enic->msix[i].affinity_mask &&
+		    (cpumask_available(enic->msix[i].affinity_mask) &&
 		     !cpumask_empty(enic->msix[i].affinity_mask)))
 			continue;
 		if (zalloc_cpumask_var(&enic->msix[i].affinity_mask,
@@ -148,7 +148,7 @@
 	for (i = 0; i < enic->intr_count; i++) {
 		if (enic_is_err_intr(enic, i)		||
 		    enic_is_notify_intr(enic, i)	||
-		    !enic->msix[i].affinity_mask	||
+		    !cpumask_available(enic->msix[i].affinity_mask) ||
 		    cpumask_empty(enic->msix[i].affinity_mask))
 			continue;
 		err = irq_set_affinity_hint(enic->msix_entry[i].vector,
@@ -161,7 +161,7 @@
 	for (i = 0; i < enic->wq_count; i++) {
 		int wq_intr = enic_msix_wq_intr(enic, i);
 
-		if (enic->msix[wq_intr].affinity_mask &&
+		if (cpumask_available(enic->msix[wq_intr].affinity_mask) &&
 		    !cpumask_empty(enic->msix[wq_intr].affinity_mask))
 			netif_set_xps_queue(enic->netdev,
 					    enic->msix[wq_intr].affinity_mask,
@@ -897,7 +897,7 @@
 	if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
 		netif_tx_stop_queue(txq);
 	skb_tx_timestamp(skb);
-	if (!skb->xmit_more || netif_xmit_stopped(txq))
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
 		vnic_wq_doorbell(wq);
 
 	spin_unlock(&enic->wq_lock[txq_map]);
@@ -1434,7 +1434,8 @@
 		 * csum is correct or is zero.
 		 */
 		if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
-		    tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) {
+		    tcp_udp_csum_ok && outer_csum_ok &&
+		    (ipv4_csum_ok || ipv6)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			skb->csum_level = encap;
 		}

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index dfd1ad0..a8f4c69 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c

@@ -372,9 +372,8 @@
 		return -ENODEV;
 	netdev->phydev = phy;
 
-	phy->supported &= PHY_GBIT_FEATURES;
-	phy->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
-	phy->advertising = phy->supported;
+	phy_set_max_speed(phy, SPEED_1000);
+	phy_support_asym_pause(phy);
 
 	/* set PHY interface type */
 	switch (phy->interface) {
@@ -1183,9 +1182,8 @@
 			buflen = skb_headlen(skb);
 		} else {
 			skb_frag = skb_si->frags + frag;
-			buffer = page_address(skb_frag_page(skb_frag)) +
-				 skb_frag->page_offset;
-			buflen = skb_frag->size;
+			buffer = skb_frag_address(skb_frag);
+			buflen = skb_frag_size(skb_frag);
 		}
 
 		if (frag == last_frag) {
@@ -1236,8 +1234,6 @@
 	int txq_num, nfrags;
 	union dma_rwptr rw;
 
-	SKB_FRAG_ASSERT(skb);
-
 	if (skb->len >= 0x10000)
 		goto out_drop_free;
 
@@ -2426,10 +2422,8 @@
 
 	/* Interrupt */
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
-		dev_err(dev, "no IRQ\n");
+	if (irq <= 0)
 		return irq ? irq : -ENODEV;
-	}
 	port->irq = irq;
 
 	/* Clock the port */
@@ -2530,6 +2524,7 @@
 	struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
 
 	gemini_port_remove(port);
+	free_netdev(port->netdev);
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h
index 0b12f89..9fdf77d 100644
--- a/drivers/net/ethernet/cortina/gemini.h
+++ b/drivers/net/ethernet/cortina/gemini.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Register definitions for Gemini GMAC Ethernet device driver
  *
  * Copyright (C) 2006 Storlink, Corp.

diff --git a/drivers/net/ethernet/davicom/Kconfig b/drivers/net/ethernet/davicom/Kconfig
index 680a6d9..a321a71 100644
--- a/drivers/net/ethernet/davicom/Kconfig
+++ b/drivers/net/ethernet/davicom/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Davicom device configuration
 #

diff --git a/drivers/net/ethernet/davicom/Makefile b/drivers/net/ethernet/davicom/Makefile
index 74b31f0..173c87d 100644
--- a/drivers/net/ethernet/davicom/Makefile
+++ b/drivers/net/ethernet/davicom/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Davicom device drivers.
 #

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 50222b7..cce90b5 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *      Davicom DM9000 Fast Ethernet driver for Linux.
  * 	Copyright (C) 1997  Sten Wang
  *
- * 	This program is free software; you can redistribute it and/or
- * 	modify it under the terms of the GNU General Public License
- * 	as published by the Free Software Foundation; either version 2
- * 	of the License, or (at your option) any later version.
- *
- * 	This program is distributed in the hope that it will be useful,
- * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
- * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * 	GNU General Public License for more details.
- *
  * (C) Copyright 1997-1998 DAVICOM Semiconductor,Inc. All Rights Reserved.
  *
  * Additional updates, Copyright:
@@ -395,6 +386,7 @@
 
 	case 3:
 		dev_dbg(db->dev, ": 3 byte IO, falling back to 16bit\n");
+		/* fall through */
 	case 2:
 		db->dumpblk = dm9000_dumpblk_16bit;
 		db->outblk  = dm9000_outblk_16bit;
@@ -1411,8 +1403,8 @@
 		pdata->flags |= DM9000_PLATF_NO_EEPROM;
 
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
-		memcpy(pdata->dev_addr, mac_addr, sizeof(pdata->dev_addr));
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(pdata->dev_addr, mac_addr);
 
 	return pdata;
 }
@@ -1508,8 +1500,6 @@
 
 	ndev->irq = platform_get_irq(pdev, 0);
 	if (ndev->irq < 0) {
-		dev_err(db->dev, "interrupt resource unavailable: %d\n",
-			ndev->irq);
 		ret = ndev->irq;
 		goto out;
 	}
@@ -1722,8 +1712,7 @@
 static int
 dm9000_drv_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct board_info *db;
 
 	if (ndev) {
@@ -1745,8 +1734,7 @@
 static int
 dm9000_drv_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct board_info *db = netdev_priv(ndev);
 
 	if (ndev) {

diff --git a/drivers/net/ethernet/dec/Kconfig b/drivers/net/ethernet/dec/Kconfig
index 740bbad..df1eeb0 100644
--- a/drivers/net/ethernet/dec/Kconfig
+++ b/drivers/net/ethernet/dec/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Digital Equipment Inc network device configuration
 #

diff --git a/drivers/net/ethernet/dec/Makefile b/drivers/net/ethernet/dec/Makefile
index 32993fc..e8aa12c 100644
--- a/drivers/net/ethernet/dec/Makefile
+++ b/drivers/net/ethernet/dec/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Digital Equipment Inc. network device drivers.
 #

diff --git a/drivers/net/ethernet/dec/tulip/Kconfig b/drivers/net/ethernet/dec/tulip/Kconfig
index 1003201..8ce6888 100644
--- a/drivers/net/ethernet/dec/tulip/Kconfig
+++ b/drivers/net/ethernet/dec/tulip/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Tulip family network device configuration
 #
@@ -113,7 +114,7 @@
 	  These include the DE425, DE434, DE435, DE450 and DE500 models.  If
 	  you have a network card of this type, say Y.  More specific
 	  information is contained in
-	  <file:Documentation/networking/de4x5.txt>.
+	  <file:Documentation/networking/device_drivers/dec/de4x5.txt>.
 
 	  To compile this driver as a module, choose M here. The module will
 	  be called de4x5.
@@ -137,7 +138,7 @@
 	  This driver is for DM9102(A)/DM9132/DM9801 compatible PCI cards from
 	  Davicom (<http://www.davicom.com.tw/>).  If you have such a network
 	  (Ethernet) card, say Y.  Some information is contained in the file
-	  <file:Documentation/networking/dmfe.txt>.
+	  <file:Documentation/networking/device_drivers/dec/dmfe.txt>.
 
 	  To compile this driver as a module, choose M here. The module will
 	  be called dmfe.

diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index 13430f7..f1a2da1 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c

@@ -585,7 +585,7 @@
 				netif_dbg(de, tx_done, de->dev,
 					  "tx done, slot %d\n", tx_tail);
 			}
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 		}
 
 next:

diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 66535d1..f16853c 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c

@@ -2107,7 +2107,6 @@
 		.remove  = de4x5_eisa_remove,
         }
 };
-MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids);
 #endif
 
 #ifdef CONFIG_PCI

diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 17ef7a2..0efdbd1 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
     A Davicom DM9102/DM9102A/DM9102A+DM9801/DM9102A+DM9802 NIC fast
     ethernet driver for Linux.
     Copyright (C) 1997  Sten Wang
 
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License
-    as published by the Free Software Foundation; either version 2
-    of the License, or (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 
     DAVICOM Web-Site: www.davicom.com.tw
 

diff --git a/drivers/net/ethernet/dec/tulip/eeprom.c b/drivers/net/ethernet/dec/tulip/eeprom.c
index 1812f49..ba0a69b 100644
--- a/drivers/net/ethernet/dec/tulip/eeprom.c
+++ b/drivers/net/ethernet/dec/tulip/eeprom.c

@@ -224,9 +224,7 @@
 		        return;
 		}
 
-		mtable = kmalloc(sizeof(struct mediatable) +
-				 count * sizeof(struct medialeaf),
-				 GFP_KERNEL);
+		mtable = kmalloc(struct_size(mtable, mleaf, count), GFP_KERNEL);
 		if (mtable == NULL)
 			return;				/* Horrible, impossible failure. */
 		last_mediatable = tp->mtable = mtable;

diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index 488a744..b1f30b1 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License
-    as published by the Free Software Foundation; either version 2
-    of the License, or (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
 
 
 */

diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig
index ebdc832..1362658 100644
--- a/drivers/net/ethernet/dlink/Kconfig
+++ b/drivers/net/ethernet/dlink/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # D-Link device configuration
 #

diff --git a/drivers/net/ethernet/dlink/Makefile b/drivers/net/ethernet/dlink/Makefile
index 40085f6..3ff503c 100644
--- a/drivers/net/ethernet/dlink/Makefile
+++ b/drivers/net/ethernet/dlink/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the D-Link network device drivers.
 #

diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index f0536b1..55e720d 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*  D-Link DL2000-based Gigabit Ethernet Adapter Linux driver */
 /*
     Copyright (c) 2001, 2002 by D-Link Corporation
     Written by Edward Peng.<edward_peng@dlink.com.tw>
     Created 03-May-2001, base on Linux' sundance.c.
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
 */
 
 #define DRV_NAME	"DL2000/TC902x-based linux driver"
@@ -843,9 +840,9 @@
 				  desc_to_dma(&np->tx_ring[entry]),
 				  skb->len, PCI_DMA_TODEVICE);
 		if (irq)
-			dev_kfree_skb_irq (skb);
+			dev_consume_skb_irq(skb);
 		else
-			dev_kfree_skb (skb);
+			dev_kfree_skb(skb);
 
 		np->tx_skbuff[entry] = NULL;
 		entry = (entry + 1) % TX_RING_SIZE;
@@ -1881,7 +1878,7 @@
 
 gcc -D__KERNEL__ -DMODULE -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -c dl2k.c
 
-Read Documentation/networking/dl2k.txt for details.
+Read Documentation/networking/device_drivers/dlink/dl2k.txt for details.
 
 */
 

diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
index 10e98ba..195dc6c 100644
--- a/drivers/net/ethernet/dlink/dl2k.h
+++ b/drivers/net/ethernet/dlink/dl2k.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*  D-Link DL2000-based Gigabit Ethernet Adapter Linux driver */
 /*
     Copyright (c) 2001, 2002 by D-Link Corporation
     Written by Edward Peng.<edward_peng@dlink.com.tw>
     Created 03-May-2001, base on Linux' sundance.c.
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
 */
 
 #ifndef __DL2K_H__

diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 1a27176..4a37a69 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c

@@ -1193,7 +1193,6 @@
 	int handled = 0;
 	int i;
 
-
 	do {
 		int intr_status = ioread16(ioaddr + IntrStatus);
 		iowrite16(intr_status, ioaddr + IntrStatus);
@@ -1286,7 +1285,7 @@
 				dma_unmap_single(&np->pci_dev->dev,
 					le32_to_cpu(np->tx_ring[entry].frag[0].addr),
 					skb->len, DMA_TO_DEVICE);
-				dev_kfree_skb_irq (np->tx_skbuff[entry]);
+				dev_consume_skb_irq(np->tx_skbuff[entry]);
 				np->tx_skbuff[entry] = NULL;
 				np->tx_ring[entry].frag[0].addr = 0;
 				np->tx_ring[entry].frag[0].length = 0;
@@ -1305,7 +1304,7 @@
 				dma_unmap_single(&np->pci_dev->dev,
 					le32_to_cpu(np->tx_ring[entry].frag[0].addr),
 					skb->len, DMA_TO_DEVICE);
-				dev_kfree_skb_irq (np->tx_skbuff[entry]);
+				dev_consume_skb_irq(np->tx_skbuff[entry]);
 				np->tx_skbuff[entry] = NULL;
 				np->tx_ring[entry].frag[0].addr = 0;
 				np->tx_ring[entry].frag[0].length = 0;

diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 5a84794..e249790 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Dave DNET Ethernet Controller driver
  *
  * Copyright (C) 2008 Dave S.r.l. <www.dave.eu>
  * Copyright (C) 2009 Ilya Yanok, Emcraft Systems Ltd, <yanok@emcraft.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/io.h>
 #include <linux/module.h>
@@ -284,13 +281,11 @@
 
 	/* mask with MAC supported features */
 	if (bp->capabilities & DNET_HAS_GIGABIT)
-		phydev->supported &= PHY_GBIT_FEATURES;
+		phy_set_max_speed(phydev, SPEED_1000);
 	else
-		phydev->supported &= PHY_BASIC_FEATURES;
+		phy_set_max_speed(phydev, SPEED_100);
 
-	phydev->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
-
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	bp->link = 0;
 	bp->speed = 0;

diff --git a/drivers/net/ethernet/dnet.h b/drivers/net/ethernet/dnet.h
index d985080..8af6c07 100644
--- a/drivers/net/ethernet/dnet.h
+++ b/drivers/net/ethernet/dnet.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Dave DNET Ethernet Controller driver
  *
  * Copyright (C) 2008 Dave S.r.l. <www.dave.eu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _DNET_H
 #define _DNET_H

diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index d71cba0..46b0dba 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
  /*
  * drivers/net/ethernet/ec_bhf.c
  *
  * Copyright (C) 2014 Darek Marcinkiewicz <reksio@newterm.pl>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 /* This is a driver for EtherCAT master module present on CCAT FPGA.

diff --git a/drivers/net/ethernet/emulex/Kconfig b/drivers/net/ethernet/emulex/Kconfig
index fdbb27c..22c143f 100644
--- a/drivers/net/ethernet/emulex/Kconfig
+++ b/drivers/net/ethernet/emulex/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Emulex driver configuration
 #

diff --git a/drivers/net/ethernet/emulex/Makefile b/drivers/net/ethernet/emulex/Makefile
index ea8ec57..1a7c5ae 100644
--- a/drivers/net/ethernet/emulex/Makefile
+++ b/drivers/net/ethernet/emulex/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Emulex device drivers.
 #

diff --git a/drivers/net/ethernet/emulex/benet/Kconfig b/drivers/net/ethernet/emulex/benet/Kconfig
index 8cf794e..17d300e 100644
--- a/drivers/net/ethernet/emulex/benet/Kconfig
+++ b/drivers/net/ethernet/emulex/benet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config BE2NET
 	tristate "ServerEngines' 10Gbps NIC - BladeEngine"
 	depends on PCI
@@ -47,5 +48,5 @@
 	  chipsets. (e.g. OneConnect OCe14xxx)
 
 comment "WARNING: be2net is useless without any enabled chip"
-        depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
+	depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
 	BE2NET_SKYHAWK=n && BE2NET

diff --git a/drivers/net/ethernet/emulex/benet/Makefile b/drivers/net/ethernet/emulex/benet/Makefile
index 1a91b27..1a238ec 100644
--- a/drivers/net/ethernet/emulex/benet/Makefile
+++ b/drivers/net/ethernet/emulex/benet/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile to build the network driver for ServerEngine's BladeEngine.
 #

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index ce041c9..cf3e6f2 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *
@@ -196,7 +192,6 @@
 } ____cacheline_aligned_in_smp;
 
 struct be_aic_obj {		/* Adaptive interrupt coalescing (AIC) info */
-	bool enable;
 	u32 min_eqd;		/* in usecs */
 	u32 max_eqd;		/* in usecs */
 	u32 prev_eqd;		/* in usecs */
@@ -593,6 +588,7 @@
 
 	struct be_drv_stats drv_stats;
 	struct be_aic_obj aic_obj[MAX_EVT_QS];
+	bool aic_enabled;
 	u8 vlan_prio_bmap;	/* Available Priority BitMap */
 	u16 recommended_prio_bits;/* Recommended Priority bits in vlan tag */
 	struct be_dma_mem rx_filter; /* Cmd DMA mem for rx-filter */

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1e9d882..701c12c 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *
@@ -554,7 +550,7 @@
 	int num = 0, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
-	spin_lock(&adapter->mcc_cq_lock);
+	spin_lock_bh(&adapter->mcc_cq_lock);
 
 	while ((compl = be_mcc_compl_get(adapter))) {
 		if (compl->flags & CQE_FLAGS_ASYNC_MASK) {
@@ -570,7 +566,7 @@
 	if (num)
 		be_cq_notify(adapter, mcc_obj->cq.id, mcc_obj->rearm_cq, num);
 
-	spin_unlock(&adapter->mcc_cq_lock);
+	spin_unlock_bh(&adapter->mcc_cq_lock);
 	return status;
 }
 
@@ -585,9 +581,7 @@
 		if (be_check_error(adapter, BE_ERROR_ANY))
 			return -EIO;
 
-		local_bh_disable();
 		status = be_process_mcc(adapter);
-		local_bh_enable();
 
 		if (atomic_read(&mcc_obj->q.used) == 0)
 			break;
@@ -1808,9 +1802,9 @@
 	total_size = buf_len;
 
 	get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
-	get_fat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					     get_fat_cmd.size,
-					     &get_fat_cmd.dma, GFP_ATOMIC);
+	get_fat_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					    get_fat_cmd.size,
+					    &get_fat_cmd.dma, GFP_ATOMIC);
 	if (!get_fat_cmd.va)
 		return -ENOMEM;
 
@@ -2302,8 +2296,8 @@
 		return -EINVAL;
 
 	cmd.size = sizeof(struct be_cmd_resp_port_type);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failed\n");
 		return -ENOMEM;
@@ -2762,7 +2756,7 @@
 	bool crc_match;
 	const u8 *p;
 
-	struct flash_comp gen3_flash_types[] = {
+	static const struct flash_comp gen3_flash_types[] = {
 		{ BE3_ISCSI_PRIMARY_IMAGE_START, OPTYPE_ISCSI_ACTIVE,
 			BE3_COMP_MAX_SIZE, IMAGE_FIRMWARE_ISCSI},
 		{ BE3_REDBOOT_START, OPTYPE_REDBOOT,
@@ -2785,7 +2779,7 @@
 			BE3_PHY_FW_COMP_MAX_SIZE, IMAGE_FIRMWARE_PHY}
 	};
 
-	struct flash_comp gen2_flash_types[] = {
+	static const struct flash_comp gen2_flash_types[] = {
 		{ BE2_ISCSI_PRIMARY_IMAGE_START, OPTYPE_ISCSI_ACTIVE,
 			BE2_COMP_MAX_SIZE, IMAGE_FIRMWARE_ISCSI},
 		{ BE2_REDBOOT_START, OPTYPE_REDBOOT,
@@ -3066,8 +3060,8 @@
 
 	flash_cmd.size = sizeof(struct lancer_cmd_req_write_object)
 				+ LANCER_FW_DOWNLOAD_CHUNK;
-	flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size,
-					   &flash_cmd.dma, GFP_KERNEL);
+	flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
+					  GFP_KERNEL);
 	if (!flash_cmd.va)
 		return -ENOMEM;
 
@@ -3184,8 +3178,8 @@
 	}
 
 	flash_cmd.size = sizeof(struct be_cmd_write_flashrom);
-	flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
-					   GFP_KERNEL);
+	flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
+					  GFP_KERNEL);
 	if (!flash_cmd.va)
 		return -ENOMEM;
 
@@ -3435,8 +3429,8 @@
 		goto err;
 	}
 	cmd.size = sizeof(struct be_cmd_req_get_phy_info);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -3522,9 +3516,9 @@
 
 	memset(&attribs_cmd, 0, sizeof(struct be_dma_mem));
 	attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs);
-	attribs_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					     attribs_cmd.size,
-					     &attribs_cmd.dma, GFP_ATOMIC);
+	attribs_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					    attribs_cmd.size,
+					    &attribs_cmd.dma, GFP_ATOMIC);
 	if (!attribs_cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
@@ -3699,10 +3693,10 @@
 
 	memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem));
 	get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list);
-	get_mac_list_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-						  get_mac_list_cmd.size,
-						  &get_mac_list_cmd.dma,
-						  GFP_ATOMIC);
+	get_mac_list_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+						 get_mac_list_cmd.size,
+						 &get_mac_list_cmd.dma,
+						 GFP_ATOMIC);
 
 	if (!get_mac_list_cmd.va) {
 		dev_err(&adapter->pdev->dev,
@@ -3829,8 +3823,8 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_req_set_mac_list);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_KERNEL);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_KERNEL);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -4035,8 +4029,8 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
@@ -4089,9 +4083,9 @@
 
 	memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
 	extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-	extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					    extfat_cmd.size, &extfat_cmd.dma,
-					    GFP_ATOMIC);
+	extfat_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					   extfat_cmd.size, &extfat_cmd.dma,
+					   GFP_ATOMIC);
 	if (!extfat_cmd.va)
 		return -ENOMEM;
 
@@ -4127,9 +4121,9 @@
 
 	memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
 	extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-	extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					    extfat_cmd.size, &extfat_cmd.dma,
-					    GFP_ATOMIC);
+	extfat_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					   extfat_cmd.size, &extfat_cmd.dma,
+					   GFP_ATOMIC);
 
 	if (!extfat_cmd.va) {
 		dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n",
@@ -4354,8 +4348,8 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_get_func_config);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -4452,8 +4446,8 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -4539,8 +4533,8 @@
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_req_set_profile_config);
-	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-				     GFP_ATOMIC);
+	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				    GFP_ATOMIC);
 	if (!cmd.va)
 		return -ENOMEM;
 

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index e8b43cf..c30d6d6 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 3f6749f..5bb5abf 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *
@@ -274,8 +270,8 @@
 	int status = 0;
 
 	read_cmd.size = LANCER_READ_FILE_CHUNK;
-	read_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, read_cmd.size,
-					  &read_cmd.dma, GFP_ATOMIC);
+	read_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, read_cmd.size,
+					 &read_cmd.dma, GFP_ATOMIC);
 
 	if (!read_cmd.va) {
 		dev_err(&adapter->pdev->dev,
@@ -333,8 +329,8 @@
 	et->tx_coalesce_usecs_high = aic->max_eqd;
 	et->tx_coalesce_usecs_low = aic->min_eqd;
 
-	et->use_adaptive_rx_coalesce = aic->enable;
-	et->use_adaptive_tx_coalesce = aic->enable;
+	et->use_adaptive_rx_coalesce = adapter->aic_enabled;
+	et->use_adaptive_tx_coalesce = adapter->aic_enabled;
 
 	return 0;
 }
@@ -350,8 +346,9 @@
 	struct be_eq_obj *eqo;
 	int i;
 
+	adapter->aic_enabled = et->use_adaptive_rx_coalesce;
+
 	for_all_evt_queues(adapter, eqo, i) {
-		aic->enable = et->use_adaptive_rx_coalesce;
 		aic->max_eqd = min(et->rx_coalesce_usecs_high, BE_MAX_EQD);
 		aic->min_eqd = min(et->rx_coalesce_usecs_low, aic->max_eqd);
 		aic->et_eqd = min(et->rx_coalesce_usecs, aic->max_eqd);
@@ -815,7 +812,7 @@
 	}
 
 	cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
-	cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
+	cmd.va = dma_alloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -851,9 +848,9 @@
 	};
 
 	ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test);
-	ddrdma_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					    ddrdma_cmd.size, &ddrdma_cmd.dma,
-					    GFP_KERNEL);
+	ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					   ddrdma_cmd.size, &ddrdma_cmd.dma,
+					   GFP_KERNEL);
 	if (!ddrdma_cmd.va)
 		return -ENOMEM;
 
@@ -895,7 +892,7 @@
 			 u64 *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	int status;
+	int status, cnt;
 	u8 link_status = 0;
 
 	if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
@@ -906,6 +903,9 @@
 
 	memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
 
+	/* check link status before offline tests */
+	link_status = netif_carrier_ok(netdev);
+
 	if (test->flags & ETH_TEST_FL_OFFLINE) {
 		if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0)
 			test->flags |= ETH_TEST_FL_FAILED;
@@ -926,13 +926,26 @@
 		test->flags |= ETH_TEST_FL_FAILED;
 	}
 
-	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
-	if (status) {
-		test->flags |= ETH_TEST_FL_FAILED;
-		data[4] = -1;
-	} else if (!link_status) {
+	/* link status was down prior to test */
+	if (!link_status) {
 		test->flags |= ETH_TEST_FL_FAILED;
 		data[4] = 1;
+		return;
+	}
+
+	for (cnt = 10; cnt; cnt--) {
+		status = be_cmd_link_status_query(adapter, NULL, &link_status,
+						  0);
+		if (status) {
+			test->flags |= ETH_TEST_FL_FAILED;
+			data[4] = -1;
+			break;
+		}
+
+		if (link_status)
+			break;
+
+		msleep_interruptible(500);
 	}
 }
 
@@ -1014,9 +1027,9 @@
 
 	memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem));
 	eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read);
-	eeprom_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
-					    eeprom_cmd.size, &eeprom_cmd.dma,
-					    GFP_KERNEL);
+	eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
+					   eeprom_cmd.size, &eeprom_cmd.dma,
+					   GFP_KERNEL);
 
 	if (!eeprom_cmd.va)
 		return -ENOMEM;
@@ -1105,7 +1118,7 @@
 		cmd->data = be_get_rss_hash_opts(adapter, cmd->flow_type);
 		break;
 	case ETHTOOL_GRXRINGS:
-		cmd->data = adapter->num_rx_qs - 1;
+		cmd->data = adapter->num_rx_qs;
 		break;
 	default:
 		return -EINVAL;

diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index db5f92f..3476194 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005-2016 Broadcom.
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index bff7475..39eb7d5 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.  The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *
@@ -167,8 +163,8 @@
 	q->len = len;
 	q->entry_size = entry_size;
 	mem->size = len * entry_size;
-	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
-				      GFP_KERNEL);
+	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
+				     &mem->dma, GFP_KERNEL);
 	if (!mem->va)
 		return -ENOMEM;
 	return 0;
@@ -796,7 +792,7 @@
 	u16 vlan_tag;
 
 	vlan_tag = skb_vlan_tag_get(skb);
-	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+	vlan_prio = skb_vlan_tag_get_prio(skb);
 	/* If vlan priority provided by OS is NOT in available bmap */
 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
@@ -1018,7 +1014,7 @@
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		len = skb_frag_size(frag);
 
 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
@@ -1049,30 +1045,35 @@
 					     struct be_wrb_params
 					     *wrb_params)
 {
+	bool insert_vlan = false;
 	u16 vlan_tag = 0;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return skb;
 
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
+		insert_vlan = true;
+	}
 
 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
-		if (!vlan_tag)
+		if (!insert_vlan) {
 			vlan_tag = adapter->pvid;
+			insert_vlan = true;
+		}
 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
 		 * skip VLAN insertion
 		 */
 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
 	}
 
-	if (vlan_tag) {
+	if (insert_vlan) {
 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
 						vlan_tag);
 		if (unlikely(!skb))
 			return skb;
-		skb->vlan_tci = 0;
+		__vlan_hwaccel_clear_tag(skb);
 	}
 
 	/* Insert the outer VLAN, if any */
@@ -1265,10 +1266,6 @@
 #define is_arp_allowed_on_bmc(adapter, skb)	\
 	(is_arp(skb) && is_arp_filt_enabled(adapter))
 
-#define is_broadcast_packet(eh, adapter)	\
-		(is_multicast_ether_addr(eh->h_dest) && \
-		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
-
 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
 
 #define is_arp_filt_enabled(adapter)	\
@@ -1375,7 +1372,7 @@
 	u16 q_idx = skb_get_queue_mapping(skb);
 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
 	struct be_wrb_params wrb_params = { 0 };
-	bool flush = !skb->xmit_more;
+	bool flush = !netdev_xmit_more();
 	u16 wrb_cnt;
 
 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
@@ -2150,7 +2147,7 @@
 	int i;
 
 	aic = &adapter->aic_obj[eqo->idx];
-	if (!aic->enable) {
+	if (!adapter->aic_enabled) {
 		if (aic->jiffies)
 			aic->jiffies = 0;
 		eqd = aic->et_eqd;
@@ -2207,7 +2204,7 @@
 	int eqd;
 	u32 mult_enc;
 
-	if (!aic->enable)
+	if (!adapter->aic_enabled)
 		return 0;
 
 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
@@ -2349,8 +2346,8 @@
 		memcpy(skb->data, start, hdr_len);
 		skb_shinfo(skb)->nr_frags = 1;
 		skb_frag_set_page(skb, 0, page_info->page);
-		skb_shinfo(skb)->frags[0].page_offset =
-					page_info->page_offset + hdr_len;
+		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
+				 page_info->page_offset + hdr_len);
 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
 				  curr_frag_len - hdr_len);
 		skb->data_len = curr_frag_len - hdr_len;
@@ -2375,8 +2372,8 @@
 			/* Fresh page */
 			j++;
 			skb_frag_set_page(skb, j, page_info->page);
-			skb_shinfo(skb)->frags[j].page_offset =
-							page_info->page_offset;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
+					 page_info->page_offset);
 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
 			skb_shinfo(skb)->nr_frags++;
 		} else {
@@ -2457,8 +2454,8 @@
 			/* First frag or Fresh page */
 			j++;
 			skb_frag_set_page(skb, j, page_info->page);
-			skb_shinfo(skb)->frags[j].page_offset =
-							page_info->page_offset;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
+					 page_info->page_offset);
 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
 		} else {
 			put_page(page_info->page);
@@ -2962,6 +2959,8 @@
 				    max(adapter->cfg_num_rx_irqs,
 					adapter->cfg_num_tx_irqs));
 
+	adapter->aic_enabled = true;
+
 	for_all_evt_queues(adapter, eqo, i) {
 		int numa_node = dev_to_node(&adapter->pdev->dev);
 
@@ -2969,7 +2968,6 @@
 		eqo->adapter = adapter;
 		eqo->idx = i;
 		aic->max_eqd = BE_MAX_EQD;
-		aic->enable = true;
 
 		eq = &eqo->q;
 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
@@ -4700,8 +4698,17 @@
 	struct net_device *netdev = adapter->netdev;
 	int status;
 
-	if (netif_running(netdev))
+	if (netif_running(netdev)) {
+		/* be_tx_timeout() must not run concurrently with this
+		 * function, synchronize with an already-running dev_watchdog
+		 */
+		netif_tx_lock_bh(netdev);
+		/* device cannot transmit now, avoid dev_watchdog timeouts */
+		netif_carrier_off(netdev);
+		netif_tx_unlock_bh(netdev);
+
 		be_close(netdev);
+	}
 
 	be_cancel_worker(adapter);
 
@@ -4950,7 +4957,7 @@
 }
 
 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
-				 u16 flags)
+				 u16 flags, struct netlink_ext_ack *extack)
 {
 	struct be_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
@@ -5624,9 +5631,7 @@
 	 * mcc completions
 	 */
 	if (!netif_running(adapter->netdev)) {
-		local_bh_disable();
 		be_process_mcc(adapter);
-		local_bh_enable();
 		goto reschedule;
 	}
 
@@ -5761,9 +5766,9 @@
 	int status = 0;
 
 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
-	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
-						 &mbox_mem_alloc->dma,
-						 GFP_KERNEL);
+	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
+						&mbox_mem_alloc->dma,
+						GFP_KERNEL);
 	if (!mbox_mem_alloc->va)
 		return -ENOMEM;
 
@@ -5772,8 +5777,8 @@
 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
 
 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
-	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
-					    &rx_filter->dma, GFP_KERNEL);
+	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
+					   &rx_filter->dma, GFP_KERNEL);
 	if (!rx_filter->va) {
 		status = -ENOMEM;
 		goto free_mbox;
@@ -5787,8 +5792,8 @@
 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
 	else
 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
-	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
-					    &stats_cmd->dma, GFP_KERNEL);
+	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
+					   &stats_cmd->dma, GFP_KERNEL);
 	if (!stats_cmd->va) {
 		status = -ENOMEM;
 		goto free_rx_filter;
@@ -6146,7 +6151,6 @@
 	if (status)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	pci_cleanup_aer_uncorrect_error_status(pdev);
 	be_clear_error(adapter, BE_CLEAR_ALL);
 	return PCI_ERS_RESULT_RECOVERED;
 }

diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 05989aa..521c4c2 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation. The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *

diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index e51719a..801e105 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005 - 2016 Broadcom
  * All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation. The full GNU General
- * Public License is included in this distribution in the file called COPYING.
- *
  * Contact Information:
  * linux-drivers@emulex.com
  *

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 60da049..ea4f17f 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * linux/drivers/net/ethernet/ethoc.c
  *
  * Copyright (C) 2007-2008 Avionic Design Development GmbH
  * Copyright (C) 2008-2009 Avionic Design GmbH
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Written by Thierry Reding <thierry.reding@avionic-design.de>
  */
 
@@ -721,10 +718,7 @@
 		return err;
 	}
 
-	phy->advertising &= ~(ADVERTISED_1000baseT_Full |
-			      ADVERTISED_1000baseT_Half);
-	phy->supported &= ~(SUPPORTED_1000baseT_Full |
-			    SUPPORTED_1000baseT_Half);
+	phy_set_max_speed(phy, SPEED_100);
 
 	return 0;
 }
@@ -1156,7 +1150,7 @@
 		const void *mac;
 
 		mac = of_get_mac_address(pdev->dev.of_node);
-		if (mac)
+		if (!IS_ERR(mac))
 			ether_addr_copy(netdev->dev_addr, mac);
 		priv->phy_id = -1;
 	}

diff --git a/drivers/net/ethernet/ezchip/Kconfig b/drivers/net/ethernet/ezchip/Kconfig
index b423ad3..6db75fd 100644
--- a/drivers/net/ethernet/ezchip/Kconfig
+++ b/drivers/net/ethernet/ezchip/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # EZchip network device configuration
 #

diff --git a/drivers/net/ethernet/ezchip/Makefile b/drivers/net/ethernet/ezchip/Makefile
index e490176..444570f 100644
--- a/drivers/net/ethernet/ezchip/Makefile
+++ b/drivers/net/ethernet/ezchip/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_EZCHIP_NPS_MANAGEMENT_ENET) += nps_enet.o

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 659f1ad..815fb62 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright(c) 2015 EZchip Technologies.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
  */
 
 #include <linux/module.h>
@@ -587,7 +576,6 @@
 	struct nps_enet_priv *priv;
 	s32 err = 0;
 	const char *mac_addr;
-	struct resource *res_regs;
 
 	if (!dev->of_node)
 		return -ENODEV;
@@ -606,8 +594,7 @@
 	/* FIXME :: no multicast support yet */
 	ndev->flags &= ~IFF_MULTICAST;
 
-	res_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs_base = devm_ioremap_resource(dev, res_regs);
+	priv->regs_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs_base)) {
 		err = PTR_ERR(priv->regs_base);
 		goto out_netdev;
@@ -616,7 +603,7 @@
 
 	/* set kernel MAC address to dev */
 	mac_addr = of_get_mac_address(dev->of_node);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(ndev->dev_addr, mac_addr);
 	else
 		eth_hw_addr_random(ndev);

diff --git a/drivers/net/ethernet/ezchip/nps_enet.h b/drivers/net/ethernet/ezchip/nps_enet.h
index 3939ca2..092da2d 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.h
+++ b/drivers/net/ethernet/ezchip/nps_enet.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright(c) 2015 EZchip Technologies.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
  */
 
 #ifndef _NPS_ENET_H
@@ -178,7 +167,7 @@
 };
 
 /**
- * nps_reg_set - Sets ENET register with provided value.
+ * nps_enet_reg_set - Sets ENET register with provided value.
  * @priv:       Pointer to EZchip ENET private data structure.
  * @reg:        Register offset from base address.
  * @value:      Value to set in register.
@@ -190,7 +179,7 @@
 }
 
 /**
- * nps_reg_get - Gets value of specified ENET register.
+ * nps_enet_reg_get - Gets value of specified ENET register.
  * @priv:       Pointer to EZchip ENET private data structure.
  * @reg:        Register offset from base address.
  *

diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig
index 0fb8df6..73e4f26 100644
--- a/drivers/net/ethernet/faraday/Kconfig
+++ b/drivers/net/ethernet/faraday/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Faraday device configuration
 #
@@ -31,6 +32,7 @@
 	depends on ARM || NDS32 || COMPILE_TEST
 	depends on !64BIT || BROKEN
 	select PHYLIB
+	select MDIO_ASPEED if MACH_ASPEED_G6
 	---help---
 	  This driver supports the FTGMAC100 Gigabit Ethernet controller
 	  from Faraday. It is used on Faraday A369, Andes AG102 and some

diff --git a/drivers/net/ethernet/faraday/Makefile b/drivers/net/ethernet/faraday/Makefile
index 408b539..f16f584 100644
--- a/drivers/net/ethernet/faraday/Makefile
+++ b/drivers/net/ethernet/faraday/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Faraday device drivers.
 #

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ed6c76d..96e9565 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Faraday FTGMAC100 Gigabit Ethernet
  *
  * (C) Copyright 2009-2011 Faraday Technology
  * Po-Yu Chuang <ratbert@faraday-tech.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
@@ -30,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
@@ -712,8 +700,8 @@
 	return skb_checksum_help(skb) == 0;
 }
 
-static int ftgmac100_hard_start_xmit(struct sk_buff *skb,
-				     struct net_device *netdev)
+static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
+					     struct net_device *netdev)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct ftgmac100_txdes *txdes, *first;
@@ -739,6 +727,18 @@
 	 */
 	nfrags = skb_shinfo(skb)->nr_frags;
 
+	/* Setup HW checksumming */
+	csum_vlan = 0;
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    !ftgmac100_prep_tx_csum(skb, &csum_vlan))
+		goto drop;
+
+	/* Add VLAN tag */
+	if (skb_vlan_tag_present(skb)) {
+		csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
+		csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
+	}
+
 	/* Get header len */
 	len = skb_headlen(skb);
 
@@ -765,19 +765,6 @@
 	if (nfrags == 0)
 		f_ctl_stat |= FTGMAC100_TXDES0_LTS;
 	txdes->txdes3 = cpu_to_le32(map);
-
-	/* Setup HW checksumming */
-	csum_vlan = 0;
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    !ftgmac100_prep_tx_csum(skb, &csum_vlan))
-		goto drop;
-
-	/* Add VLAN tag */
-	if (skb_vlan_tag_present(skb)) {
-		csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
-		csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
-	}
-
 	txdes->txdes1 = cpu_to_le32(csum_vlan);
 
 	/* Next descriptor */
@@ -787,7 +774,7 @@
 	for (i = 0; i < nfrags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		len = frag->size;
+		len = skb_frag_size(frag);
 
 		/* Map it */
 		map = skb_frag_dma_map(priv->dev, frag, 0, len,
@@ -935,16 +922,14 @@
 		return -ENOMEM;
 
 	/* Allocate descriptors */
-	priv->rxdes = dma_zalloc_coherent(priv->dev,
-					  MAX_RX_QUEUE_ENTRIES *
-					  sizeof(struct ftgmac100_rxdes),
-					  &priv->rxdes_dma, GFP_KERNEL);
+	priv->rxdes = dma_alloc_coherent(priv->dev,
+					 MAX_RX_QUEUE_ENTRIES * sizeof(struct ftgmac100_rxdes),
+					 &priv->rxdes_dma, GFP_KERNEL);
 	if (!priv->rxdes)
 		return -ENOMEM;
-	priv->txdes = dma_zalloc_coherent(priv->dev,
-					  MAX_TX_QUEUE_ENTRIES *
-					  sizeof(struct ftgmac100_txdes),
-					  &priv->txdes_dma, GFP_KERNEL);
+	priv->txdes = dma_alloc_coherent(priv->dev,
+					 MAX_TX_QUEUE_ENTRIES * sizeof(struct ftgmac100_txdes),
+					 &priv->txdes_dma, GFP_KERNEL);
 	if (!priv->txdes)
 		return -ENOMEM;
 
@@ -1077,10 +1062,9 @@
 	}
 
 	/* Indicate that we support PAUSE frames (see comment in
-	 * Documentation/networking/phy.txt)
+	 * Documentation/networking/phy.rst)
 	 */
-	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	/* Display what we found */
 	phy_attached_info(phydev);
@@ -1220,22 +1204,11 @@
 	priv->tx_pause = pause->tx_pause;
 	priv->rx_pause = pause->rx_pause;
 
-	if (phydev) {
-		phydev->advertising &= ~ADVERTISED_Pause;
-		phydev->advertising &= ~ADVERTISED_Asym_Pause;
+	if (phydev)
+		phy_set_asym_pause(phydev, pause->rx_pause, pause->tx_pause);
 
-		if (pause->rx_pause) {
-			phydev->advertising |= ADVERTISED_Pause;
-			phydev->advertising |= ADVERTISED_Asym_Pause;
-		}
-
-		if (pause->tx_pause)
-			phydev->advertising ^= ADVERTISED_Asym_Pause;
-	}
 	if (netif_running(netdev)) {
-		if (phydev && priv->aneg_pause)
-			phy_start_aneg(phydev);
-		else
+		if (!(phydev && priv->aneg_pause))
 			ftgmac100_config_pause(priv);
 	}
 
@@ -1646,12 +1619,17 @@
 	if (!priv->mii_bus)
 		return -EIO;
 
-	if (priv->is_aspeed) {
-		/* This driver supports the old MDIO interface */
+	if (of_device_is_compatible(np, "aspeed,ast2400-mac") ||
+	    of_device_is_compatible(np, "aspeed,ast2500-mac")) {
+		/* The AST2600 has a separate MDIO controller */
+
+		/* For the AST2400 and AST2500 this driver only supports the
+		 * old MDIO interface
+		 */
 		reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR);
 		reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE;
 		iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR);
-	};
+	}
 
 	/* Get PHY mode from device-tree */
 	if (np) {
@@ -1824,7 +1802,8 @@
 
 	np = pdev->dev.of_node;
 	if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac") ||
-		   of_device_is_compatible(np, "aspeed,ast2500-mac"))) {
+		   of_device_is_compatible(np, "aspeed,ast2500-mac") ||
+		   of_device_is_compatible(np, "aspeed,ast2600-mac"))) {
 		priv->rxdes0_edorr_mask = BIT(30);
 		priv->txdes0_edotr_mask = BIT(30);
 		priv->is_aspeed = true;
@@ -1844,7 +1823,29 @@
 		priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
 		if (!priv->ndev)
 			goto err_ncsi_dev;
-	} else {
+	} else if (np && of_get_property(np, "phy-handle", NULL)) {
+		struct phy_device *phy;
+
+		phy = of_phy_get_and_connect(priv->netdev, np,
+					     &ftgmac100_adjust_link);
+		if (!phy) {
+			dev_err(&pdev->dev, "Failed to connect to phy\n");
+			goto err_setup_mdio;
+		}
+
+		/* Indicate that we support PAUSE frames (see comment in
+		 * Documentation/networking/phy.txt)
+		 */
+		phy_support_asym_pause(phy);
+
+		/* Display what we found */
+		phy_attached_info(phy);
+	} else if (np && !of_get_child_by_name(np, "mdio")) {
+		/* Support legacy ASPEED devicetree descriptions that decribe a
+		 * MAC with an embedded MDIO controller but have no "mdio"
+		 * child node. Automatically scan the MDIO bus for available
+		 * PHYs.
+		 */
 		priv->use_ncsi = false;
 		err = ftgmac100_setup_mdio(netdev);
 		if (err)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 0653d81..e5876a3 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h

@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Faraday FTGMAC100 Gigabit Ethernet
  *
  * (C) Copyright 2009-2011 Faraday Technology
  * Po-Yu Chuang <ratbert@faraday-tech.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __FTGMAC100_H

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 9015bd9..6c247cb 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c

@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Faraday FTMAC100 10/100 Ethernet
  *
  * (C) Copyright 2009-2011 Faraday Technology
  * Po-Yu Chuang <ratbert@faraday-tech.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
@@ -634,8 +621,8 @@
 		;
 }
 
-static int ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb,
-			 dma_addr_t map)
+static netdev_tx_t ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb,
+				 dma_addr_t map)
 {
 	struct net_device *netdev = priv->netdev;
 	struct ftmac100_txdes *txdes;
@@ -734,10 +721,9 @@
 {
 	int i;
 
-	priv->descs = dma_zalloc_coherent(priv->dev,
-					  sizeof(struct ftmac100_descs),
-					  &priv->descs_dma_addr,
-					  GFP_KERNEL);
+	priv->descs = dma_alloc_coherent(priv->dev,
+					 sizeof(struct ftmac100_descs),
+					 &priv->descs_dma_addr, GFP_KERNEL);
 	if (!priv->descs)
 		return -ENOMEM;
 
@@ -1015,7 +1001,8 @@
 	return 0;
 }
 
-static int ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ftmac100 *priv = netdev_priv(netdev);
 	dma_addr_t map;

diff --git a/drivers/net/ethernet/faraday/ftmac100.h b/drivers/net/ethernet/faraday/ftmac100.h
index 46a0c47..fe986f1 100644
--- a/drivers/net/ethernet/faraday/ftmac100.h
+++ b/drivers/net/ethernet/faraday/ftmac100.h

@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Faraday FTMAC100 10/100 Ethernet
  *
  * (C) Copyright 2009-2011 Faraday Technology
  * Po-Yu Chuang <ratbert@faraday-tech.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __FTMAC100_H

diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index ae55da6..c24fd56 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c

@@ -1531,7 +1531,7 @@
 			/* Free the original skb. */
 			pci_unmap_single(np->pci_dev, np->cur_tx->buffer,
 				np->cur_tx->skbuff->len, PCI_DMA_TODEVICE);
-			dev_kfree_skb_irq(np->cur_tx->skbuff);
+			dev_consume_skb_irq(np->cur_tx->skbuff);
 			np->cur_tx->skbuff = NULL;
 			--np->really_tx_count;
 			if (np->cur_tx->control & TXLD) {

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index a580a3d..6a7e899 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Freescale device configuration
 #
@@ -96,5 +97,7 @@
 	  on the 8540.
 
 source "drivers/net/ethernet/freescale/dpaa/Kconfig"
+source "drivers/net/ethernet/freescale/dpaa2/Kconfig"
+source "drivers/net/ethernet/freescale/enetc/Kconfig"
 
 endif # NET_VENDOR_FREESCALE

diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 0914a3e..6a93293 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile

@@ -21,3 +21,8 @@
 
 obj-$(CONFIG_FSL_FMAN) += fman/
 obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
+
+obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/
+
+obj-$(CONFIG_FSL_ENETC) += enetc/
+obj-$(CONFIG_FSL_ENETC_VF) += enetc/

diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig
index a654736..3b32573 100644
--- a/drivers/net/ethernet/freescale/dpaa/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 menuconfig FSL_DPAA_ETH
 	tristate "DPAA Ethernet"
 	depends on FSL_DPAA && FSL_FMAN

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 65a22cd..b4b82b9 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c

@@ -51,9 +51,9 @@
 #include <linux/percpu.h>
 #include <linux/dma-mapping.h>
 #include <linux/sort.h>
+#include <linux/phy_fixed.h>
 #include <soc/fsl/bman.h>
 #include <soc/fsl/qman.h>
-
 #include "fman.h"
 #include "fman_port.h"
 #include "mac.h"
@@ -485,7 +485,7 @@
 static bool dpaa_bpid2pool_use(int bpid)
 {
 	if (dpaa_bpid2pool(bpid)) {
-		atomic_inc(&dpaa_bp_array[bpid]->refs);
+		refcount_inc(&dpaa_bp_array[bpid]->refs);
 		return true;
 	}
 
@@ -496,7 +496,7 @@
 static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
 {
 	dpaa_bp_array[bpid] = dpaa_bp;
-	atomic_set(&dpaa_bp->refs, 1);
+	refcount_set(&dpaa_bp->refs, 1);
 }
 
 static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
@@ -584,7 +584,7 @@
 	if (!bp)
 		return;
 
-	if (!atomic_dec_and_test(&bp->refs))
+	if (!refcount_dec_and_test(&bp->refs))
 		return;
 
 	if (bp->free_buf_cb)
@@ -780,7 +780,7 @@
 	struct qman_portal *portal;
 	int cpu;
 
-	for_each_cpu(cpu, cpus) {
+	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
 		portal = qman_get_affine_portal(cpu);
 		qman_p_static_dequeue_add(portal, pool);
 	}
@@ -896,7 +896,7 @@
 	u16 channels[NR_CPUS];
 	struct dpaa_fq *fq;
 
-	for_each_cpu(cpu, affine_cpus)
+	for_each_cpu_and(cpu, affine_cpus, cpu_online_mask)
 		channels[num_portals++] = qman_affine_channel(cpu);
 
 	if (num_portals == 0)
@@ -1280,7 +1280,7 @@
 
 	err = bman_release(dpaa_bp->pool, bmb, cnt);
 	/* Should never occur, address anyway to avoid leaking the buffers */
-	if (unlikely(WARN_ON(err)) && dpaa_bp->free_buf_cb)
+	if (WARN_ON(err) && dpaa_bp->free_buf_cb)
 		while (cnt-- > 0)
 			dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
 
@@ -1648,7 +1648,7 @@
 				 qm_sg_entry_get_len(&sgt[0]), dma_dir);
 
 		/* remaining pages were mapped with skb_frag_dma_map() */
-		for (i = 1; i < nr_frags; i++) {
+		for (i = 1; i <= nr_frags; i++) {
 			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
 
 			dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
@@ -1704,10 +1704,8 @@
 
 	skb = build_skb(vaddr, dpaa_bp->size +
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
-	if (unlikely(!skb)) {
-		WARN_ONCE(1, "Build skb failure on Rx\n");
+	if (WARN_ONCE(!skb, "Build skb failure on Rx\n"))
 		goto free_buffer;
-	}
 	WARN_ON(fd_off != priv->rx_headroom);
 	skb_reserve(skb, fd_off);
 	skb_put(skb, qm_fd_get_length(fd));
@@ -1770,7 +1768,7 @@
 			sz = dpaa_bp->size +
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 			skb = build_skb(sg_vaddr, sz);
-			if (WARN_ON(unlikely(!skb)))
+			if (WARN_ON(!skb))
 				goto free_buffers;
 
 			skb->ip_summed = rx_csum_offload(priv, fd);
@@ -1960,7 +1958,7 @@
 	/* populate the rest of SGT entries */
 	for (i = 0; i < nr_frags; i++) {
 		frag = &skb_shinfo(skb)->frags[i];
-		frag_len = frag->size;
+		frag_len = skb_frag_size(frag);
 		WARN_ON(!skb_frag_page(frag));
 		addr = skb_frag_dma_map(dev, frag, 0,
 					frag_len, dma_dir);
@@ -2046,12 +2044,14 @@
 	return 0;
 }
 
-static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+static netdev_tx_t
+dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 {
 	const int queue_mapping = skb_get_queue_mapping(skb);
 	bool nonlinear = skb_is_nonlinear(skb);
 	struct rtnl_link_stats64 *percpu_stats;
 	struct dpaa_percpu_priv *percpu_priv;
+	struct netdev_queue *txq;
 	struct dpaa_priv *priv;
 	struct qm_fd fd;
 	int offset = 0;
@@ -2101,6 +2101,11 @@
 	if (unlikely(err < 0))
 		goto skb_to_fd_failed;
 
+	txq = netdev_get_tx_queue(net_dev, queue_mapping);
+
+	/* LLTX requires to do our own update of trans_start */
+	txq->trans_start = jiffies;
+
 	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
 		fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD);
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
@@ -2169,7 +2174,6 @@
 	if (cleaned < budget) {
 		napi_complete_done(napi, cleaned);
 		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
-
 	} else if (np->down) {
 		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
 	}
@@ -2443,7 +2447,7 @@
 	struct dpaa_percpu_priv *percpu_priv;
 	int i;
 
-	for_each_possible_cpu(i) {
+	for_each_online_cpu(i) {
 		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
 
 		percpu_priv->np.down = 0;
@@ -2456,7 +2460,7 @@
 	struct dpaa_percpu_priv *percpu_priv;
 	int i;
 
-	for_each_possible_cpu(i) {
+	for_each_online_cpu(i) {
 		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
 
 		percpu_priv->np.down = 1;
@@ -2476,6 +2480,7 @@
 
 static int dpaa_phy_init(struct net_device *net_dev)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 	struct mac_device *mac_dev;
 	struct phy_device *phy_dev;
 	struct dpaa_priv *priv;
@@ -2492,9 +2497,10 @@
 	}
 
 	/* Remove any features not supported by the controller */
-	phy_dev->supported &= mac_dev->if_support;
-	phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	phy_dev->advertising = phy_dev->supported;
+	ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support);
+	linkmode_and(phy_dev->supported, phy_dev->supported, mask);
+
+	phy_support_asym_pause(phy_dev);
 
 	mac_dev->phy_dev = phy_dev;
 	net_dev->phydev = phy_dev;
@@ -2615,6 +2621,7 @@
 	.ndo_stop = dpaa_eth_stop,
 	.ndo_tx_timeout = dpaa_tx_timeout,
 	.ndo_get_stats64 = dpaa_get_stats64,
+	.ndo_change_carrier = fixed_phy_change_carrier,
 	.ndo_set_mac_address = dpaa_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_set_rx_mode = dpaa_set_rx_mode,
@@ -2733,8 +2740,6 @@
 	return err;
 }
 
-static const struct of_device_id dpaa_match[];
-
 static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
 {
 	u16 headroom;

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
index af320f8..f7e59e8 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h

@@ -32,6 +32,7 @@
 #define __DPAA_H
 
 #include <linux/netdevice.h>
+#include <linux/refcount.h>
 #include <soc/fsl/qman.h>
 #include <soc/fsl/bman.h>
 
@@ -99,7 +100,7 @@
 	int (*seed_cb)(struct dpaa_bp *);
 	/* bpool can be emptied before freeing by this cb */
 	void (*free_buf_cb)(const struct dpaa_bp *, struct bm_buffer *);
-	atomic_t refs;
+	refcount_t refs;
 };
 
 struct dpaa_rx_errors {

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 3184c8f..7ce2e99 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c

@@ -182,7 +182,6 @@
 	struct phy_device *phydev;
 	bool rx_pause, tx_pause;
 	struct dpaa_priv *priv;
-	u32 newadv, oldadv;
 	int err;
 
 	priv = netdev_priv(net_dev);
@@ -194,9 +193,7 @@
 		return -ENODEV;
 	}
 
-	if (!(phydev->supported & SUPPORTED_Pause) ||
-	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-	    (epause->rx_pause != epause->tx_pause)))
+	if (!phy_validate_pause(phydev, epause))
 		return -EINVAL;
 
 	/* The MAC should know how to handle PAUSE frame autonegotiation before
@@ -210,29 +207,8 @@
 	/* Determine the sym/asym advertised PAUSE capabilities from the desired
 	 * rx/tx pause settings.
 	 */
-	newadv = 0;
-	if (epause->rx_pause)
-		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (epause->tx_pause)
-		newadv ^= ADVERTISED_Asym_Pause;
 
-	oldadv = phydev->advertising &
-			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
-	/* If there are differences between the old and the new advertised
-	 * values, restart PHY autonegotiation and advertise the new values.
-	 */
-	if (oldadv != newadv) {
-		phydev->advertising &= ~(ADVERTISED_Pause
-				| ADVERTISED_Asym_Pause);
-		phydev->advertising |= newadv;
-		if (phydev->autoneg) {
-			err = phy_start_aneg(phydev);
-			if (err < 0)
-				netdev_err(net_dev, "phy_start_aneg() = %d\n",
-					   err);
-		}
-	}
+	phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 
 	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
 	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
@@ -525,7 +501,7 @@
 	struct device_node *mac_node = dev->of_node;
 	struct device_node *fman_node = NULL, *ptp_node = NULL;
 	struct platform_device *ptp_dev = NULL;
-	struct qoriq_ptp *ptp = NULL;
+	struct ptp_qoriq *ptp = NULL;
 
 	info->phc_index = -1;
 
@@ -553,6 +529,75 @@
 	return 0;
 }
 
+static int dpaa_get_coalesce(struct net_device *dev,
+			     struct ethtool_coalesce *c)
+{
+	struct qman_portal *portal;
+	u32 period;
+	u8 thresh;
+
+	portal = qman_get_affine_portal(smp_processor_id());
+	qman_portal_get_iperiod(portal, &period);
+	qman_dqrr_get_ithresh(portal, &thresh);
+
+	c->rx_coalesce_usecs = period;
+	c->rx_max_coalesced_frames = thresh;
+	c->use_adaptive_rx_coalesce = false;
+
+	return 0;
+}
+
+static int dpaa_set_coalesce(struct net_device *dev,
+			     struct ethtool_coalesce *c)
+{
+	const cpumask_t *cpus = qman_affine_cpus();
+	bool needs_revert[NR_CPUS] = {false};
+	struct qman_portal *portal;
+	u32 period, prev_period;
+	u8 thresh, prev_thresh;
+	int cpu, res;
+
+	if (c->use_adaptive_rx_coalesce)
+		return -EINVAL;
+
+	period = c->rx_coalesce_usecs;
+	thresh = c->rx_max_coalesced_frames;
+
+	/* save previous values */
+	portal = qman_get_affine_portal(smp_processor_id());
+	qman_portal_get_iperiod(portal, &prev_period);
+	qman_dqrr_get_ithresh(portal, &prev_thresh);
+
+	/* set new values */
+	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
+		portal = qman_get_affine_portal(cpu);
+		res = qman_portal_set_iperiod(portal, period);
+		if (res)
+			goto revert_values;
+		res = qman_dqrr_set_ithresh(portal, thresh);
+		if (res) {
+			qman_portal_set_iperiod(portal, prev_period);
+			goto revert_values;
+		}
+		needs_revert[cpu] = true;
+	}
+
+	return 0;
+
+revert_values:
+	/* restore previous values */
+	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
+		if (!needs_revert[cpu])
+			continue;
+		portal = qman_get_affine_portal(cpu);
+		/* previous values will not fail, ignore return value */
+		qman_portal_set_iperiod(portal, prev_period);
+		qman_dqrr_set_ithresh(portal, prev_thresh);
+	}
+
+	return res;
+}
+
 const struct ethtool_ops dpaa_ethtool_ops = {
 	.get_drvinfo = dpaa_get_drvinfo,
 	.get_msglevel = dpaa_get_msglevel,
@@ -569,4 +614,6 @@
 	.get_rxnfc = dpaa_get_rxnfc,
 	.set_rxnfc = dpaa_set_rxnfc,
 	.get_ts_info = dpaa_get_ts_info,
+	.get_coalesce = dpaa_get_coalesce,
+	.set_coalesce = dpaa_set_coalesce,
 };

diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
new file mode 100644
index 0000000..fbef282
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig

@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config FSL_DPAA2_ETH
+	tristate "Freescale DPAA2 Ethernet"
+	depends on FSL_MC_BUS && FSL_MC_DPIO
+	help
+	  This is the DPAA2 Ethernet driver supporting Freescale SoCs
+	  with DPAA2 (DataPath Acceleration Architecture v2).
+	  The driver manages network objects discovered on the Freescale
+	  MC bus.
+
+config FSL_DPAA2_PTP_CLOCK
+	tristate "Freescale DPAA2 PTP Clock"
+	depends on FSL_DPAA2_ETH && PTP_1588_CLOCK_QORIQ
+	default y
+	help
+	  This driver adds support for using the DPAA2 1588 timer module
+	  as a PTP clock.

diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
new file mode 100644
index 0000000..d1e78cd
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile

@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Freescale DPAA2 Ethernet controller
+#
+
+obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
+obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
+
+fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o
+fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
+fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
+
+# Needed by the tracing framework
+CFLAGS_dpaa2-eth.o := -I$(src)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
new file mode 100644
index 0000000..a9afe46
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c

@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2015 Freescale Semiconductor Inc.
+ * Copyright 2018-2019 NXP
+ */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include "dpaa2-eth.h"
+#include "dpaa2-eth-debugfs.h"
+
+#define DPAA2_ETH_DBG_ROOT "dpaa2-eth"
+
+static struct dentry *dpaa2_dbg_root;
+
+static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
+{
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
+	struct rtnl_link_stats64 *stats;
+	struct dpaa2_eth_drv_stats *extras;
+	int i;
+
+	seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name);
+	seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n",
+		   "CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf",
+		   "Tx SG", "Tx realloc", "Enq busy");
+
+	for_each_online_cpu(i) {
+		stats = per_cpu_ptr(priv->percpu_stats, i);
+		extras = per_cpu_ptr(priv->percpu_extras, i);
+		seq_printf(file, "%3d%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu\n",
+			   i,
+			   stats->rx_packets,
+			   stats->rx_errors,
+			   extras->rx_sg_frames,
+			   stats->tx_packets,
+			   stats->tx_errors,
+			   extras->tx_conf_frames,
+			   extras->tx_sg_frames,
+			   extras->tx_reallocs,
+			   extras->tx_portal_busy);
+	}
+
+	return 0;
+}
+
+static int dpaa2_dbg_cpu_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
+
+	err = single_open(file, dpaa2_dbg_cpu_show, priv);
+	if (err < 0)
+		netdev_err(priv->net_dev, "single_open() failed\n");
+
+	return err;
+}
+
+static const struct file_operations dpaa2_dbg_cpu_ops = {
+	.open = dpaa2_dbg_cpu_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static char *fq_type_to_str(struct dpaa2_eth_fq *fq)
+{
+	switch (fq->type) {
+	case DPAA2_RX_FQ:
+		return "Rx";
+	case DPAA2_TX_CONF_FQ:
+		return "Tx conf";
+	default:
+		return "N/A";
+	}
+}
+
+static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
+{
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
+	struct dpaa2_eth_fq *fq;
+	u32 fcnt, bcnt;
+	int i, err;
+
+	seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name);
+	seq_printf(file, "%s%16s%16s%16s%16s\n",
+		   "VFQID", "CPU", "Type", "Frames", "Pending frames");
+
+	for (i = 0; i <  priv->num_fqs; i++) {
+		fq = &priv->fq[i];
+		err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
+		if (err)
+			fcnt = 0;
+
+		seq_printf(file, "%5d%16d%16s%16llu%16u\n",
+			   fq->fqid,
+			   fq->target_cpu,
+			   fq_type_to_str(fq),
+			   fq->stats.frames,
+			   fcnt);
+	}
+
+	return 0;
+}
+
+static int dpaa2_dbg_fqs_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
+
+	err = single_open(file, dpaa2_dbg_fqs_show, priv);
+	if (err < 0)
+		netdev_err(priv->net_dev, "single_open() failed\n");
+
+	return err;
+}
+
+static const struct file_operations dpaa2_dbg_fq_ops = {
+	.open = dpaa2_dbg_fqs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
+{
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
+	struct dpaa2_eth_channel *ch;
+	int i;
+
+	seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name);
+	seq_printf(file, "%s%16s%16s%16s%16s\n",
+		   "CHID", "CPU", "Deq busy", "CDANs", "Buf count");
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		seq_printf(file, "%4d%16d%16llu%16llu%16d\n",
+			   ch->ch_id,
+			   ch->nctx.desired_cpu,
+			   ch->stats.dequeue_portal_busy,
+			   ch->stats.cdan,
+			   ch->buf_count);
+	}
+
+	return 0;
+}
+
+static int dpaa2_dbg_ch_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
+
+	err = single_open(file, dpaa2_dbg_ch_show, priv);
+	if (err < 0)
+		netdev_err(priv->net_dev, "single_open() failed\n");
+
+	return err;
+}
+
+static const struct file_operations dpaa2_dbg_ch_ops = {
+	.open = dpaa2_dbg_ch_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void dpaa2_dbg_add(struct dpaa2_eth_priv *priv)
+{
+	struct dentry *dir;
+
+	/* Create a directory for the interface */
+	dir = debugfs_create_dir(priv->net_dev->name, dpaa2_dbg_root);
+	priv->dbg.dir = dir;
+
+	/* per-cpu stats file */
+	debugfs_create_file("cpu_stats", 0444, dir, priv, &dpaa2_dbg_cpu_ops);
+
+	/* per-fq stats file */
+	debugfs_create_file("fq_stats", 0444, dir, priv, &dpaa2_dbg_fq_ops);
+
+	/* per-fq stats file */
+	debugfs_create_file("ch_stats", 0444, dir, priv, &dpaa2_dbg_ch_ops);
+}
+
+void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv)
+{
+	debugfs_remove_recursive(priv->dbg.dir);
+}
+
+void dpaa2_eth_dbg_init(void)
+{
+	dpaa2_dbg_root = debugfs_create_dir(DPAA2_ETH_DBG_ROOT, NULL);
+	pr_debug("DPAA2-ETH: debugfs created\n");
+}
+
+void dpaa2_eth_dbg_exit(void)
+{
+	debugfs_remove(dpaa2_dbg_root);
+}

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h
new file mode 100644
index 0000000..15598b2
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h

@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2015 Freescale Semiconductor Inc.
+ * Copyright 2018-2019 NXP
+ */
+#ifndef DPAA2_ETH_DEBUGFS_H
+#define DPAA2_ETH_DEBUGFS_H
+
+#include <linux/dcache.h>
+
+struct dpaa2_eth_priv;
+
+struct dpaa2_debugfs {
+	struct dentry *dir;
+};
+
+#ifdef CONFIG_DEBUG_FS
+void dpaa2_eth_dbg_init(void);
+void dpaa2_eth_dbg_exit(void);
+void dpaa2_dbg_add(struct dpaa2_eth_priv *priv);
+void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv);
+#else
+static inline void dpaa2_eth_dbg_init(void) {}
+static inline void dpaa2_eth_dbg_exit(void) {}
+static inline void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) {}
+static inline void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) {}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* DPAA2_ETH_DEBUGFS_H */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h
new file mode 100644
index 0000000..9801528
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h

@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2014-2015 Freescale Semiconductor Inc.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	dpaa2_eth
+
+#if !defined(_DPAA2_ETH_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _DPAA2_ETH_TRACE_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "dpaa2-eth.h"
+#include <linux/tracepoint.h>
+
+#define TR_FMT "[%s] fd: addr=0x%llx, len=%u, off=%u"
+/* trace_printk format for raw buffer event class */
+#define TR_BUF_FMT "[%s] vaddr=%p size=%zu dma_addr=%pad map_size=%zu bpid=%d"
+
+/* This is used to declare a class of events.
+ * individual events of this type will be defined below.
+ */
+
+/* Store details about a frame descriptor */
+DECLARE_EVENT_CLASS(dpaa2_eth_fd,
+		    /* Trace function prototype */
+		    TP_PROTO(struct net_device *netdev,
+			     const struct dpaa2_fd *fd),
+
+		    /* Repeat argument list here */
+		    TP_ARGS(netdev, fd),
+
+		    /* A structure containing the relevant information we want
+		     * to record. Declare name and type for each normal element,
+		     * name, type and size for arrays. Use __string for variable
+		     * length strings.
+		     */
+		    TP_STRUCT__entry(
+				     __field(u64, fd_addr)
+				     __field(u32, fd_len)
+				     __field(u16, fd_offset)
+				     __string(name, netdev->name)
+		    ),
+
+		    /* The function that assigns values to the above declared
+		     * fields
+		     */
+		    TP_fast_assign(
+				   __entry->fd_addr = dpaa2_fd_get_addr(fd);
+				   __entry->fd_len = dpaa2_fd_get_len(fd);
+				   __entry->fd_offset = dpaa2_fd_get_offset(fd);
+				   __assign_str(name, netdev->name);
+		    ),
+
+		    /* This is what gets printed when the trace event is
+		     * triggered.
+		     */
+		    TP_printk(TR_FMT,
+			      __get_str(name),
+			      __entry->fd_addr,
+			      __entry->fd_len,
+			      __entry->fd_offset)
+);
+
+/* Now declare events of the above type. Format is:
+ * DEFINE_EVENT(class, name, proto, args), with proto and args same as for class
+ */
+
+/* Tx (egress) fd */
+DEFINE_EVENT(dpaa2_eth_fd, dpaa2_tx_fd,
+	     TP_PROTO(struct net_device *netdev,
+		      const struct dpaa2_fd *fd),
+
+	     TP_ARGS(netdev, fd)
+);
+
+/* Rx fd */
+DEFINE_EVENT(dpaa2_eth_fd, dpaa2_rx_fd,
+	     TP_PROTO(struct net_device *netdev,
+		      const struct dpaa2_fd *fd),
+
+	     TP_ARGS(netdev, fd)
+);
+
+/* Tx confirmation fd */
+DEFINE_EVENT(dpaa2_eth_fd, dpaa2_tx_conf_fd,
+	     TP_PROTO(struct net_device *netdev,
+		      const struct dpaa2_fd *fd),
+
+	     TP_ARGS(netdev, fd)
+);
+
+/* Log data about raw buffers. Useful for tracing DPBP content. */
+TRACE_EVENT(dpaa2_eth_buf_seed,
+	    /* Trace function prototype */
+	    TP_PROTO(struct net_device *netdev,
+		     /* virtual address and size */
+		     void *vaddr,
+		     size_t size,
+		     /* dma map address and size */
+		     dma_addr_t dma_addr,
+		     size_t map_size,
+		     /* buffer pool id, if relevant */
+		     u16 bpid),
+
+	    /* Repeat argument list here */
+	    TP_ARGS(netdev, vaddr, size, dma_addr, map_size, bpid),
+
+	    /* A structure containing the relevant information we want
+	     * to record. Declare name and type for each normal element,
+	     * name, type and size for arrays. Use __string for variable
+	     * length strings.
+	     */
+	    TP_STRUCT__entry(
+			     __field(void *, vaddr)
+			     __field(size_t, size)
+			     __field(dma_addr_t, dma_addr)
+			     __field(size_t, map_size)
+			     __field(u16, bpid)
+			     __string(name, netdev->name)
+	    ),
+
+	    /* The function that assigns values to the above declared
+	     * fields
+	     */
+	    TP_fast_assign(
+			   __entry->vaddr = vaddr;
+			   __entry->size = size;
+			   __entry->dma_addr = dma_addr;
+			   __entry->map_size = map_size;
+			   __entry->bpid = bpid;
+			   __assign_str(name, netdev->name);
+	    ),
+
+	    /* This is what gets printed when the trace event is
+	     * triggered.
+	     */
+	    TP_printk(TR_BUF_FMT,
+		      __get_str(name),
+		      __entry->vaddr,
+		      __entry->size,
+		      &__entry->dma_addr,
+		      __entry->map_size,
+		      __entry->bpid)
+);
+
+/* If only one event of a certain type needs to be declared, use TRACE_EVENT().
+ * The syntax is the same as for DECLARE_EVENT_CLASS().
+ */
+
+#endif /* _DPAA2_ETH_TRACE_H */
+
+/* This must be outside ifdef _DPAA2_ETH_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE	dpaa2-eth-trace
+#include <trace/define_trace.h>

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
new file mode 100644
index 0000000..bf5add9
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

@@ -0,0 +1,3659 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2017 NXP
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/etherdevice.h>
+#include <linux/of_net.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/kthread.h>
+#include <linux/iommu.h>
+#include <linux/net_tstamp.h>
+#include <linux/fsl/mc.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <net/sock.h>
+
+#include "dpaa2-eth.h"
+
+/* CREATE_TRACE_POINTS only needs to be defined once. Other dpa files
+ * using trace events only need to #include <trace/events/sched.h>
+ */
+#define CREATE_TRACE_POINTS
+#include "dpaa2-eth-trace.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Freescale Semiconductor, Inc");
+MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");
+
+static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
+				dma_addr_t iova_addr)
+{
+	phys_addr_t phys_addr;
+
+	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
+
+	return phys_to_virt(phys_addr);
+}
+
+static void validate_rx_csum(struct dpaa2_eth_priv *priv,
+			     u32 fd_status,
+			     struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* HW checksum validation is disabled, nothing to do here */
+	if (!(priv->net_dev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* Read checksum validation bits */
+	if (!((fd_status & DPAA2_FAS_L3CV) &&
+	      (fd_status & DPAA2_FAS_L4CV)))
+		return;
+
+	/* Inform the stack there's no need to compute L3/L4 csum anymore */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+/* Free a received FD.
+ * Not to be used for Tx conf FDs or on any other paths.
+ */
+static void free_rx_fd(struct dpaa2_eth_priv *priv,
+		       const struct dpaa2_fd *fd,
+		       void *vaddr)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u8 fd_format = dpaa2_fd_get_format(fd);
+	struct dpaa2_sg_entry *sgt;
+	void *sg_vaddr;
+	int i;
+
+	/* If single buffer frame, just free the data buffer */
+	if (fd_format == dpaa2_fd_single)
+		goto free_buf;
+	else if (fd_format != dpaa2_fd_sg)
+		/* We don't support any other format */
+		return;
+
+	/* For S/G frames, we first need to free all SG entries
+	 * except the first one, which was taken care of already
+	 */
+	sgt = vaddr + dpaa2_fd_get_offset(fd);
+	for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
+		addr = dpaa2_sg_get_addr(&sgt[i]);
+		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
+		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+			       DMA_BIDIRECTIONAL);
+
+		free_pages((unsigned long)sg_vaddr, 0);
+		if (dpaa2_sg_is_final(&sgt[i]))
+			break;
+	}
+
+free_buf:
+	free_pages((unsigned long)vaddr, 0);
+}
+
+/* Build a linear skb based on a single-buffer frame descriptor */
+static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
+					const struct dpaa2_fd *fd,
+					void *fd_vaddr)
+{
+	struct sk_buff *skb = NULL;
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+
+	ch->buf_count--;
+
+	skb = build_skb(fd_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, fd_offset);
+	skb_put(skb, fd_length);
+
+	return skb;
+}
+
+/* Build a non linear (fragmented) skb based on a S/G table */
+static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
+				      struct dpaa2_eth_channel *ch,
+				      struct dpaa2_sg_entry *sgt)
+{
+	struct sk_buff *skb = NULL;
+	struct device *dev = priv->net_dev->dev.parent;
+	void *sg_vaddr;
+	dma_addr_t sg_addr;
+	u16 sg_offset;
+	u32 sg_length;
+	struct page *page, *head_page;
+	int page_offset;
+	int i;
+
+	for (i = 0; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
+		struct dpaa2_sg_entry *sge = &sgt[i];
+
+		/* NOTE: We only support SG entries in dpaa2_sg_single format,
+		 * but this is the only format we may receive from HW anyway
+		 */
+
+		/* Get the address and length from the S/G entry */
+		sg_addr = dpaa2_sg_get_addr(sge);
+		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
+		dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
+			       DMA_BIDIRECTIONAL);
+
+		sg_length = dpaa2_sg_get_len(sge);
+
+		if (i == 0) {
+			/* We build the skb around the first data buffer */
+			skb = build_skb(sg_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
+			if (unlikely(!skb)) {
+				/* Free the first SG entry now, since we already
+				 * unmapped it and obtained the virtual address
+				 */
+				free_pages((unsigned long)sg_vaddr, 0);
+
+				/* We still need to subtract the buffers used
+				 * by this FD from our software counter
+				 */
+				while (!dpaa2_sg_is_final(&sgt[i]) &&
+				       i < DPAA2_ETH_MAX_SG_ENTRIES)
+					i++;
+				break;
+			}
+
+			sg_offset = dpaa2_sg_get_offset(sge);
+			skb_reserve(skb, sg_offset);
+			skb_put(skb, sg_length);
+		} else {
+			/* Rest of the data buffers are stored as skb frags */
+			page = virt_to_page(sg_vaddr);
+			head_page = virt_to_head_page(sg_vaddr);
+
+			/* Offset in page (which may be compound).
+			 * Data in subsequent SG entries is stored from the
+			 * beginning of the buffer, so we don't need to add the
+			 * sg_offset.
+			 */
+			page_offset = ((unsigned long)sg_vaddr &
+				(PAGE_SIZE - 1)) +
+				(page_address(page) - page_address(head_page));
+
+			skb_add_rx_frag(skb, i - 1, head_page, page_offset,
+					sg_length, DPAA2_ETH_RX_BUF_SIZE);
+		}
+
+		if (dpaa2_sg_is_final(sge))
+			break;
+	}
+
+	WARN_ONCE(i == DPAA2_ETH_MAX_SG_ENTRIES, "Final bit not set in SGT");
+
+	/* Count all data buffers + SG table buffer */
+	ch->buf_count -= i + 2;
+
+	return skb;
+}
+
+/* Free buffers acquired from the buffer pool or which were meant to
+ * be released in the pool
+ */
+static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	void *vaddr;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
+		dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
+			       DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vaddr, 0);
+	}
+}
+
+static void xdp_release_buf(struct dpaa2_eth_priv *priv,
+			    struct dpaa2_eth_channel *ch,
+			    dma_addr_t addr)
+{
+	int err;
+
+	ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
+	if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
+		return;
+
+	while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
+					       ch->xdp.drop_bufs,
+					       ch->xdp.drop_cnt)) == -EBUSY)
+		cpu_relax();
+
+	if (err) {
+		free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
+		ch->buf_count -= ch->xdp.drop_cnt;
+	}
+
+	ch->xdp.drop_cnt = 0;
+}
+
+static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
+		       void *buf_start, u16 queue_id)
+{
+	struct dpaa2_eth_fq *fq;
+	struct dpaa2_faead *faead;
+	u32 ctrl, frc;
+	int i, err;
+
+	/* Mark the egress frame hardware annotation area as valid */
+	frc = dpaa2_fd_get_frc(fd);
+	dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);
+	dpaa2_fd_set_ctrl(fd, DPAA2_FD_CTRL_ASAL);
+
+	/* Instruct hardware to release the FD buffer directly into
+	 * the buffer pool once transmission is completed, instead of
+	 * sending a Tx confirmation frame to us
+	 */
+	ctrl = DPAA2_FAEAD_A4V | DPAA2_FAEAD_A2V | DPAA2_FAEAD_EBDDV;
+	faead = dpaa2_get_faead(buf_start, false);
+	faead->ctrl = cpu_to_le32(ctrl);
+	faead->conf_fqid = 0;
+
+	fq = &priv->fq[queue_id];
+	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+		err = priv->enqueue(priv, fq, fd, 0);
+		if (err != -EBUSY)
+			break;
+	}
+
+	return err;
+}
+
+static u32 run_xdp(struct dpaa2_eth_priv *priv,
+		   struct dpaa2_eth_channel *ch,
+		   struct dpaa2_eth_fq *rx_fq,
+		   struct dpaa2_fd *fd, void *vaddr)
+{
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	struct rtnl_link_stats64 *percpu_stats;
+	struct bpf_prog *xdp_prog;
+	struct xdp_buff xdp;
+	u32 xdp_act = XDP_PASS;
+	int err;
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+
+	rcu_read_lock();
+
+	xdp_prog = READ_ONCE(ch->xdp.prog);
+	if (!xdp_prog)
+		goto out;
+
+	xdp.data = vaddr + dpaa2_fd_get_offset(fd);
+	xdp.data_end = xdp.data + dpaa2_fd_get_len(fd);
+	xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+	xdp_set_data_meta_invalid(&xdp);
+	xdp.rxq = &ch->xdp_rxq;
+
+	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	/* xdp.data pointer may have changed */
+	dpaa2_fd_set_offset(fd, xdp.data - vaddr);
+	dpaa2_fd_set_len(fd, xdp.data_end - xdp.data);
+
+	switch (xdp_act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid);
+		if (err) {
+			xdp_release_buf(priv, ch, addr);
+			percpu_stats->tx_errors++;
+			ch->stats.xdp_tx_err++;
+		} else {
+			percpu_stats->tx_packets++;
+			percpu_stats->tx_bytes += dpaa2_fd_get_len(fd);
+			ch->stats.xdp_tx++;
+		}
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(xdp_act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
+		/* fall through */
+	case XDP_DROP:
+		xdp_release_buf(priv, ch, addr);
+		ch->stats.xdp_drop++;
+		break;
+	case XDP_REDIRECT:
+		dma_unmap_page(priv->net_dev->dev.parent, addr,
+			       DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
+		ch->buf_count--;
+		xdp.data_hard_start = vaddr;
+		err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
+		if (unlikely(err))
+			ch->stats.xdp_drop++;
+		else
+			ch->stats.xdp_redirect++;
+		break;
+	}
+
+	ch->xdp.res |= xdp_act;
+out:
+	rcu_read_unlock();
+	return xdp_act;
+}
+
+/* Main Rx frame processing routine */
+static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
+			 struct dpaa2_eth_channel *ch,
+			 const struct dpaa2_fd *fd,
+			 struct dpaa2_eth_fq *fq)
+{
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u8 fd_format = dpaa2_fd_get_format(fd);
+	void *vaddr;
+	struct sk_buff *skb;
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpaa2_fas *fas;
+	void *buf_data;
+	u32 status = 0;
+	u32 xdp_act;
+
+	/* Tracing point */
+	trace_dpaa2_rx_fd(priv->net_dev, fd);
+
+	vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
+	dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+				DMA_BIDIRECTIONAL);
+
+	fas = dpaa2_get_fas(vaddr, false);
+	prefetch(fas);
+	buf_data = vaddr + dpaa2_fd_get_offset(fd);
+	prefetch(buf_data);
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
+	if (fd_format == dpaa2_fd_single) {
+		xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
+		if (xdp_act != XDP_PASS) {
+			percpu_stats->rx_packets++;
+			percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
+			return;
+		}
+
+		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+			       DMA_BIDIRECTIONAL);
+		skb = build_linear_skb(ch, fd, vaddr);
+	} else if (fd_format == dpaa2_fd_sg) {
+		WARN_ON(priv->xdp_prog);
+
+		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+			       DMA_BIDIRECTIONAL);
+		skb = build_frag_skb(priv, ch, buf_data);
+		free_pages((unsigned long)vaddr, 0);
+		percpu_extras->rx_sg_frames++;
+		percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd);
+	} else {
+		/* We don't support any other format */
+		goto err_frame_format;
+	}
+
+	if (unlikely(!skb))
+		goto err_build_skb;
+
+	prefetch(skb->data);
+
+	/* Get the timestamp value */
+	if (priv->rx_tstamp) {
+		struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+		__le64 *ts = dpaa2_get_ts(vaddr, false);
+		u64 ns;
+
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+
+		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
+		shhwtstamps->hwtstamp = ns_to_ktime(ns);
+	}
+
+	/* Check if we need to validate the L4 csum */
+	if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) {
+		status = le32_to_cpu(fas->status);
+		validate_rx_csum(priv, status, skb);
+	}
+
+	skb->protocol = eth_type_trans(skb, priv->net_dev);
+	skb_record_rx_queue(skb, fq->flowid);
+
+	percpu_stats->rx_packets++;
+	percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
+
+	list_add_tail(&skb->list, ch->rx_list);
+
+	return;
+
+err_build_skb:
+	free_rx_fd(priv, fd, vaddr);
+err_frame_format:
+	percpu_stats->rx_dropped++;
+}
+
+/* Consume all frames pull-dequeued into the store. This is the simplest way to
+ * make sure we don't accidentally issue another volatile dequeue which would
+ * overwrite (leak) frames already in the store.
+ *
+ * Observance of NAPI budget is not our concern, leaving that to the caller.
+ */
+static int consume_frames(struct dpaa2_eth_channel *ch,
+			  struct dpaa2_eth_fq **src)
+{
+	struct dpaa2_eth_priv *priv = ch->priv;
+	struct dpaa2_eth_fq *fq = NULL;
+	struct dpaa2_dq *dq;
+	const struct dpaa2_fd *fd;
+	int cleaned = 0;
+	int is_last;
+
+	do {
+		dq = dpaa2_io_store_next(ch->store, &is_last);
+		if (unlikely(!dq)) {
+			/* If we're here, we *must* have placed a
+			 * volatile dequeue comnmand, so keep reading through
+			 * the store until we get some sort of valid response
+			 * token (either a valid frame or an "empty dequeue")
+			 */
+			continue;
+		}
+
+		fd = dpaa2_dq_fd(dq);
+		fq = (struct dpaa2_eth_fq *)(uintptr_t)dpaa2_dq_fqd_ctx(dq);
+
+		fq->consume(priv, ch, fd, fq);
+		cleaned++;
+	} while (!is_last);
+
+	if (!cleaned)
+		return 0;
+
+	fq->stats.frames += cleaned;
+
+	/* A dequeue operation only pulls frames from a single queue
+	 * into the store. Return the frame queue as an out param.
+	 */
+	if (src)
+		*src = fq;
+
+	return cleaned;
+}
+
+/* Configure the egress frame annotation for timestamp update */
+static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
+{
+	struct dpaa2_faead *faead;
+	u32 ctrl, frc;
+
+	/* Mark the egress frame annotation area as valid */
+	frc = dpaa2_fd_get_frc(fd);
+	dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);
+
+	/* Set hardware annotation size */
+	ctrl = dpaa2_fd_get_ctrl(fd);
+	dpaa2_fd_set_ctrl(fd, ctrl | DPAA2_FD_CTRL_ASAL);
+
+	/* enable UPD (update prepanded data) bit in FAEAD field of
+	 * hardware frame annotation area
+	 */
+	ctrl = DPAA2_FAEAD_A2V | DPAA2_FAEAD_UPDV | DPAA2_FAEAD_UPD;
+	faead = dpaa2_get_faead(buf_start, true);
+	faead->ctrl = cpu_to_le32(ctrl);
+}
+
+/* Create a frame descriptor based on a fragmented skb */
+static int build_sg_fd(struct dpaa2_eth_priv *priv,
+		       struct sk_buff *skb,
+		       struct dpaa2_fd *fd)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	void *sgt_buf = NULL;
+	dma_addr_t addr;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	struct dpaa2_sg_entry *sgt;
+	int i, err;
+	int sgt_buf_size;
+	struct scatterlist *scl, *crt_scl;
+	int num_sg;
+	int num_dma_bufs;
+	struct dpaa2_eth_swa *swa;
+
+	/* Create and map scatterlist.
+	 * We don't advertise NETIF_F_FRAGLIST, so skb_to_sgvec() will not have
+	 * to go beyond nr_frags+1.
+	 * Note: We don't support chained scatterlists
+	 */
+	if (unlikely(PAGE_SIZE / sizeof(struct scatterlist) < nr_frags + 1))
+		return -EINVAL;
+
+	scl = kcalloc(nr_frags + 1, sizeof(struct scatterlist), GFP_ATOMIC);
+	if (unlikely(!scl))
+		return -ENOMEM;
+
+	sg_init_table(scl, nr_frags + 1);
+	num_sg = skb_to_sgvec(skb, scl, 0, skb->len);
+	num_dma_bufs = dma_map_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
+	if (unlikely(!num_dma_bufs)) {
+		err = -ENOMEM;
+		goto dma_map_sg_failed;
+	}
+
+	/* Prepare the HW SGT structure */
+	sgt_buf_size = priv->tx_data_offset +
+		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
+	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
+	if (unlikely(!sgt_buf)) {
+		err = -ENOMEM;
+		goto sgt_buf_alloc_failed;
+	}
+	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
+	memset(sgt_buf, 0, sgt_buf_size);
+
+	sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);
+
+	/* Fill in the HW SGT structure.
+	 *
+	 * sgt_buf is zeroed out, so the following fields are implicit
+	 * in all sgt entries:
+	 *   - offset is 0
+	 *   - format is 'dpaa2_sg_single'
+	 */
+	for_each_sg(scl, crt_scl, num_dma_bufs, i) {
+		dpaa2_sg_set_addr(&sgt[i], sg_dma_address(crt_scl));
+		dpaa2_sg_set_len(&sgt[i], sg_dma_len(crt_scl));
+	}
+	dpaa2_sg_set_final(&sgt[i - 1], true);
+
+	/* Store the skb backpointer in the SGT buffer.
+	 * Fit the scatterlist and the number of buffers alongside the
+	 * skb backpointer in the software annotation area. We'll need
+	 * all of them on Tx Conf.
+	 */
+	swa = (struct dpaa2_eth_swa *)sgt_buf;
+	swa->type = DPAA2_ETH_SWA_SG;
+	swa->sg.skb = skb;
+	swa->sg.scl = scl;
+	swa->sg.num_sg = num_sg;
+	swa->sg.sgt_size = sgt_buf_size;
+
+	/* Separately map the SGT buffer */
+	addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		err = -ENOMEM;
+		goto dma_map_single_failed;
+	}
+	dpaa2_fd_set_offset(fd, priv->tx_data_offset);
+	dpaa2_fd_set_format(fd, dpaa2_fd_sg);
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_len(fd, skb->len);
+	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
+
+	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+		enable_tx_tstamp(fd, sgt_buf);
+
+	return 0;
+
+dma_map_single_failed:
+	skb_free_frag(sgt_buf);
+sgt_buf_alloc_failed:
+	dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
+dma_map_sg_failed:
+	kfree(scl);
+	return err;
+}
+
+/* Create a frame descriptor based on a linear skb */
+static int build_single_fd(struct dpaa2_eth_priv *priv,
+			   struct sk_buff *skb,
+			   struct dpaa2_fd *fd)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	u8 *buffer_start, *aligned_start;
+	struct dpaa2_eth_swa *swa;
+	dma_addr_t addr;
+
+	buffer_start = skb->data - dpaa2_eth_needed_headroom(priv, skb);
+
+	/* If there's enough room to align the FD address, do it.
+	 * It will help hardware optimize accesses.
+	 */
+	aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
+				  DPAA2_ETH_TX_BUF_ALIGN);
+	if (aligned_start >= skb->head)
+		buffer_start = aligned_start;
+
+	/* Store a backpointer to the skb at the beginning of the buffer
+	 * (in the private data area) such that we can release it
+	 * on Tx confirm
+	 */
+	swa = (struct dpaa2_eth_swa *)buffer_start;
+	swa->type = DPAA2_ETH_SWA_SINGLE;
+	swa->single.skb = skb;
+
+	addr = dma_map_single(dev, buffer_start,
+			      skb_tail_pointer(skb) - buffer_start,
+			      DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(dev, addr)))
+		return -ENOMEM;
+
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start));
+	dpaa2_fd_set_len(fd, skb->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
+
+	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+		enable_tx_tstamp(fd, buffer_start);
+
+	return 0;
+}
+
+/* FD freeing routine on the Tx path
+ *
+ * DMA-unmap and free FD and possibly SGT buffer allocated on Tx. The skb
+ * back-pointed to is also freed.
+ * This can be called either from dpaa2_eth_tx_conf() or on the error path of
+ * dpaa2_eth_tx().
+ */
+static void free_tx_fd(const struct dpaa2_eth_priv *priv,
+		       struct dpaa2_eth_fq *fq,
+		       const struct dpaa2_fd *fd, bool in_napi)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	dma_addr_t fd_addr;
+	struct sk_buff *skb = NULL;
+	unsigned char *buffer_start;
+	struct dpaa2_eth_swa *swa;
+	u8 fd_format = dpaa2_fd_get_format(fd);
+	u32 fd_len = dpaa2_fd_get_len(fd);
+
+	fd_addr = dpaa2_fd_get_addr(fd);
+	buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
+	swa = (struct dpaa2_eth_swa *)buffer_start;
+
+	if (fd_format == dpaa2_fd_single) {
+		if (swa->type == DPAA2_ETH_SWA_SINGLE) {
+			skb = swa->single.skb;
+			/* Accessing the skb buffer is safe before dma unmap,
+			 * because we didn't map the actual skb shell.
+			 */
+			dma_unmap_single(dev, fd_addr,
+					 skb_tail_pointer(skb) - buffer_start,
+					 DMA_BIDIRECTIONAL);
+		} else {
+			WARN_ONCE(swa->type != DPAA2_ETH_SWA_XDP, "Wrong SWA type");
+			dma_unmap_single(dev, fd_addr, swa->xdp.dma_size,
+					 DMA_BIDIRECTIONAL);
+		}
+	} else if (fd_format == dpaa2_fd_sg) {
+		skb = swa->sg.skb;
+
+		/* Unmap the scatterlist */
+		dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
+			     DMA_BIDIRECTIONAL);
+		kfree(swa->sg.scl);
+
+		/* Unmap the SGT buffer */
+		dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
+				 DMA_BIDIRECTIONAL);
+	} else {
+		netdev_dbg(priv->net_dev, "Invalid FD format\n");
+		return;
+	}
+
+	if (swa->type != DPAA2_ETH_SWA_XDP && in_napi) {
+		fq->dq_frames++;
+		fq->dq_bytes += fd_len;
+	}
+
+	if (swa->type == DPAA2_ETH_SWA_XDP) {
+		xdp_return_frame(swa->xdp.xdpf);
+		return;
+	}
+
+	/* Get the timestamp value */
+	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+		struct skb_shared_hwtstamps shhwtstamps;
+		__le64 *ts = dpaa2_get_ts(buffer_start, true);
+		u64 ns;
+
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
+		shhwtstamps.hwtstamp = ns_to_ktime(ns);
+		skb_tstamp_tx(skb, &shhwtstamps);
+	}
+
+	/* Free SGT buffer allocated on tx */
+	if (fd_format != dpaa2_fd_single)
+		skb_free_frag(buffer_start);
+
+	/* Move on with skb release */
+	napi_consume_skb(skb, in_napi);
+}
+
+static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpaa2_fd fd;
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct dpaa2_eth_fq *fq;
+	struct netdev_queue *nq;
+	u16 queue_mapping;
+	unsigned int needed_headroom;
+	u32 fd_len;
+	u8 prio = 0;
+	int err, i;
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
+	needed_headroom = dpaa2_eth_needed_headroom(priv, skb);
+	if (skb_headroom(skb) < needed_headroom) {
+		struct sk_buff *ns;
+
+		ns = skb_realloc_headroom(skb, needed_headroom);
+		if (unlikely(!ns)) {
+			percpu_stats->tx_dropped++;
+			goto err_alloc_headroom;
+		}
+		percpu_extras->tx_reallocs++;
+
+		if (skb->sk)
+			skb_set_owner_w(ns, skb->sk);
+
+		dev_kfree_skb(skb);
+		skb = ns;
+	}
+
+	/* We'll be holding a back-reference to the skb until Tx Confirmation;
+	 * we don't want that overwritten by a concurrent Tx with a cloned skb.
+	 */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		/* skb_unshare() has already freed the skb */
+		percpu_stats->tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	/* Setup the FD fields */
+	memset(&fd, 0, sizeof(fd));
+
+	if (skb_is_nonlinear(skb)) {
+		err = build_sg_fd(priv, skb, &fd);
+		percpu_extras->tx_sg_frames++;
+		percpu_extras->tx_sg_bytes += skb->len;
+	} else {
+		err = build_single_fd(priv, skb, &fd);
+	}
+
+	if (unlikely(err)) {
+		percpu_stats->tx_dropped++;
+		goto err_build_fd;
+	}
+
+	/* Tracing point */
+	trace_dpaa2_tx_fd(net_dev, &fd);
+
+	/* TxConf FQ selection relies on queue id from the stack.
+	 * In case of a forwarded frame from another DPNI interface, we choose
+	 * a queue affined to the same core that processed the Rx frame
+	 */
+	queue_mapping = skb_get_queue_mapping(skb);
+
+	if (net_dev->num_tc) {
+		prio = netdev_txq_to_tc(net_dev, queue_mapping);
+		/* Hardware interprets priority level 0 as being the highest,
+		 * so we need to do a reverse mapping to the netdev tc index
+		 */
+		prio = net_dev->num_tc - prio - 1;
+		/* We have only one FQ array entry for all Tx hardware queues
+		 * with the same flow id (but different priority levels)
+		 */
+		queue_mapping %= dpaa2_eth_queue_count(priv);
+	}
+	fq = &priv->fq[queue_mapping];
+
+	fd_len = dpaa2_fd_get_len(&fd);
+	nq = netdev_get_tx_queue(net_dev, queue_mapping);
+	netdev_tx_sent_queue(nq, fd_len);
+
+	/* Everything that happens after this enqueues might race with
+	 * the Tx confirmation callback for this frame
+	 */
+	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+		err = priv->enqueue(priv, fq, &fd, prio);
+		if (err != -EBUSY)
+			break;
+	}
+	percpu_extras->tx_portal_busy += i;
+	if (unlikely(err < 0)) {
+		percpu_stats->tx_errors++;
+		/* Clean up everything, including freeing the skb */
+		free_tx_fd(priv, fq, &fd, false);
+		netdev_tx_completed_queue(nq, 1, fd_len);
+	} else {
+		percpu_stats->tx_packets++;
+		percpu_stats->tx_bytes += fd_len;
+	}
+
+	return NETDEV_TX_OK;
+
+err_build_fd:
+err_alloc_headroom:
+	dev_kfree_skb(skb);
+
+	return NETDEV_TX_OK;
+}
+
+/* Tx confirmation frame processing routine */
+static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
+			      struct dpaa2_eth_channel *ch __always_unused,
+			      const struct dpaa2_fd *fd,
+			      struct dpaa2_eth_fq *fq)
+{
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	u32 fd_len = dpaa2_fd_get_len(fd);
+	u32 fd_errors;
+
+	/* Tracing point */
+	trace_dpaa2_tx_conf_fd(priv->net_dev, fd);
+
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+	percpu_extras->tx_conf_frames++;
+	percpu_extras->tx_conf_bytes += fd_len;
+
+	/* Check frame errors in the FD field */
+	fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
+	free_tx_fd(priv, fq, fd, true);
+
+	if (likely(!fd_errors))
+		return;
+
+	if (net_ratelimit())
+		netdev_dbg(priv->net_dev, "TX frame FD error: 0x%08x\n",
+			   fd_errors);
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	/* Tx-conf logically pertains to the egress path. */
+	percpu_stats->tx_errors++;
+}
+
+static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
+{
+	int err;
+
+	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
+			       DPNI_OFF_RX_L3_CSUM, enable);
+	if (err) {
+		netdev_err(priv->net_dev,
+			   "dpni_set_offload(RX_L3_CSUM) failed\n");
+		return err;
+	}
+
+	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
+			       DPNI_OFF_RX_L4_CSUM, enable);
+	if (err) {
+		netdev_err(priv->net_dev,
+			   "dpni_set_offload(RX_L4_CSUM) failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
+{
+	int err;
+
+	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
+			       DPNI_OFF_TX_L3_CSUM, enable);
+	if (err) {
+		netdev_err(priv->net_dev, "dpni_set_offload(TX_L3_CSUM) failed\n");
+		return err;
+	}
+
+	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
+			       DPNI_OFF_TX_L4_CSUM, enable);
+	if (err) {
+		netdev_err(priv->net_dev, "dpni_set_offload(TX_L4_CSUM) failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Perform a single release command to add buffers
+ * to the specified buffer pool
+ */
+static int add_bufs(struct dpaa2_eth_priv *priv,
+		    struct dpaa2_eth_channel *ch, u16 bpid)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
+	struct page *page;
+	dma_addr_t addr;
+	int i, err;
+
+	for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) {
+		/* Allocate buffer visible to WRIOP + skb shared info +
+		 * alignment padding
+		 */
+		/* allocate one page for each Rx buffer. WRIOP sees
+		 * the entire page except for a tailroom reserved for
+		 * skb shared info
+		 */
+		page = dev_alloc_pages(0);
+		if (!page)
+			goto err_alloc;
+
+		addr = dma_map_page(dev, page, 0, DPAA2_ETH_RX_BUF_SIZE,
+				    DMA_BIDIRECTIONAL);
+		if (unlikely(dma_mapping_error(dev, addr)))
+			goto err_map;
+
+		buf_array[i] = addr;
+
+		/* tracing point */
+		trace_dpaa2_eth_buf_seed(priv->net_dev,
+					 page, DPAA2_ETH_RX_BUF_RAW_SIZE,
+					 addr, DPAA2_ETH_RX_BUF_SIZE,
+					 bpid);
+	}
+
+release_bufs:
+	/* In case the portal is busy, retry until successful */
+	while ((err = dpaa2_io_service_release(ch->dpio, bpid,
+					       buf_array, i)) == -EBUSY)
+		cpu_relax();
+
+	/* If release command failed, clean up and bail out;
+	 * not much else we can do about it
+	 */
+	if (err) {
+		free_bufs(priv, buf_array, i);
+		return 0;
+	}
+
+	return i;
+
+err_map:
+	__free_pages(page, 0);
+err_alloc:
+	/* If we managed to allocate at least some buffers,
+	 * release them to hardware
+	 */
+	if (i)
+		goto release_bufs;
+
+	return 0;
+}
+
+static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
+{
+	int i, j;
+	int new_count;
+
+	for (j = 0; j < priv->num_channels; j++) {
+		for (i = 0; i < DPAA2_ETH_NUM_BUFS;
+		     i += DPAA2_ETH_BUFS_PER_CMD) {
+			new_count = add_bufs(priv, priv->channel[j], bpid);
+			priv->channel[j]->buf_count += new_count;
+
+			if (new_count < DPAA2_ETH_BUFS_PER_CMD) {
+				return -ENOMEM;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Drain the specified number of buffers from the DPNI's private buffer pool.
+ * @count must not exceeed DPAA2_ETH_BUFS_PER_CMD
+ */
+static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
+{
+	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
+	int ret;
+
+	do {
+		ret = dpaa2_io_service_acquire(NULL, priv->bpid,
+					       buf_array, count);
+		if (ret < 0) {
+			netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
+			return;
+		}
+		free_bufs(priv, buf_array, ret);
+	} while (ret);
+}
+
+static void drain_pool(struct dpaa2_eth_priv *priv)
+{
+	int i;
+
+	drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
+	drain_bufs(priv, 1);
+
+	for (i = 0; i < priv->num_channels; i++)
+		priv->channel[i]->buf_count = 0;
+}
+
+/* Function is called from softirq context only, so we don't need to guard
+ * the access to percpu count
+ */
+static int refill_pool(struct dpaa2_eth_priv *priv,
+		       struct dpaa2_eth_channel *ch,
+		       u16 bpid)
+{
+	int new_count;
+
+	if (likely(ch->buf_count >= DPAA2_ETH_REFILL_THRESH))
+		return 0;
+
+	do {
+		new_count = add_bufs(priv, ch, bpid);
+		if (unlikely(!new_count)) {
+			/* Out of memory; abort for now, we'll try later on */
+			break;
+		}
+		ch->buf_count += new_count;
+	} while (ch->buf_count < DPAA2_ETH_NUM_BUFS);
+
+	if (unlikely(ch->buf_count < DPAA2_ETH_NUM_BUFS))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int pull_channel(struct dpaa2_eth_channel *ch)
+{
+	int err;
+	int dequeues = -1;
+
+	/* Retry while portal is busy */
+	do {
+		err = dpaa2_io_service_pull_channel(ch->dpio, ch->ch_id,
+						    ch->store);
+		dequeues++;
+		cpu_relax();
+	} while (err == -EBUSY);
+
+	ch->stats.dequeue_portal_busy += dequeues;
+	if (unlikely(err))
+		ch->stats.pull_err++;
+
+	return err;
+}
+
+/* NAPI poll routine
+ *
+ * Frames are dequeued from the QMan channel associated with this NAPI context.
+ * Rx, Tx confirmation and (if configured) Rx error frames all count
+ * towards the NAPI budget.
+ */
+static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
+{
+	struct dpaa2_eth_channel *ch;
+	struct dpaa2_eth_priv *priv;
+	int rx_cleaned = 0, txconf_cleaned = 0;
+	struct dpaa2_eth_fq *fq, *txc_fq = NULL;
+	struct netdev_queue *nq;
+	int store_cleaned, work_done;
+	struct list_head rx_list;
+	int err;
+
+	ch = container_of(napi, struct dpaa2_eth_channel, napi);
+	ch->xdp.res = 0;
+	priv = ch->priv;
+
+	INIT_LIST_HEAD(&rx_list);
+	ch->rx_list = &rx_list;
+
+	do {
+		err = pull_channel(ch);
+		if (unlikely(err))
+			break;
+
+		/* Refill pool if appropriate */
+		refill_pool(priv, ch, priv->bpid);
+
+		store_cleaned = consume_frames(ch, &fq);
+		if (!store_cleaned)
+			break;
+		if (fq->type == DPAA2_RX_FQ) {
+			rx_cleaned += store_cleaned;
+		} else {
+			txconf_cleaned += store_cleaned;
+			/* We have a single Tx conf FQ on this channel */
+			txc_fq = fq;
+		}
+
+		/* If we either consumed the whole NAPI budget with Rx frames
+		 * or we reached the Tx confirmations threshold, we're done.
+		 */
+		if (rx_cleaned >= budget ||
+		    txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
+			work_done = budget;
+			goto out;
+		}
+	} while (store_cleaned);
+
+	/* We didn't consume the entire budget, so finish napi and
+	 * re-enable data availability notifications
+	 */
+	napi_complete_done(napi, rx_cleaned);
+	do {
+		err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
+		cpu_relax();
+	} while (err == -EBUSY);
+	WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
+		  ch->nctx.desired_cpu);
+
+	work_done = max(rx_cleaned, 1);
+
+out:
+	netif_receive_skb_list(ch->rx_list);
+
+	if (txc_fq && txc_fq->dq_frames) {
+		nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid);
+		netdev_tx_completed_queue(nq, txc_fq->dq_frames,
+					  txc_fq->dq_bytes);
+		txc_fq->dq_frames = 0;
+		txc_fq->dq_bytes = 0;
+	}
+
+	if (ch->xdp.res & XDP_REDIRECT)
+		xdp_do_flush_map();
+
+	return work_done;
+}
+
+static void enable_ch_napi(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_channel *ch;
+	int i;
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		napi_enable(&ch->napi);
+	}
+}
+
+static void disable_ch_napi(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_channel *ch;
+	int i;
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		napi_disable(&ch->napi);
+	}
+}
+
+static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
+{
+	struct dpni_taildrop td = {0};
+	int i, err;
+
+	if (priv->rx_td_enabled == enable)
+		return;
+
+	td.enable = enable;
+	td.threshold = DPAA2_ETH_TAILDROP_THRESH;
+
+	for (i = 0; i < priv->num_fqs; i++) {
+		if (priv->fq[i].type != DPAA2_RX_FQ)
+			continue;
+		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
+					DPNI_CP_QUEUE, DPNI_QUEUE_RX, 0,
+					priv->fq[i].flowid, &td);
+		if (err) {
+			netdev_err(priv->net_dev,
+				   "dpni_set_taildrop() failed\n");
+			break;
+		}
+	}
+
+	priv->rx_td_enabled = enable;
+}
+
+static void update_tx_fqids(struct dpaa2_eth_priv *priv);
+
+static int link_state_update(struct dpaa2_eth_priv *priv)
+{
+	struct dpni_link_state state = {0};
+	bool tx_pause;
+	int err;
+
+	err = dpni_get_link_state(priv->mc_io, 0, priv->mc_token, &state);
+	if (unlikely(err)) {
+		netdev_err(priv->net_dev,
+			   "dpni_get_link_state() failed\n");
+		return err;
+	}
+
+	/* If Tx pause frame settings have changed, we need to update
+	 * Rx FQ taildrop configuration as well. We configure taildrop
+	 * only when pause frame generation is disabled.
+	 */
+	tx_pause = !!(state.options & DPNI_LINK_OPT_PAUSE) ^
+		   !!(state.options & DPNI_LINK_OPT_ASYM_PAUSE);
+	dpaa2_eth_set_rx_taildrop(priv, !tx_pause);
+
+	/* Chech link state; speed / duplex changes are not treated yet */
+	if (priv->link_state.up == state.up)
+		goto out;
+
+	if (state.up) {
+		update_tx_fqids(priv);
+		netif_carrier_on(priv->net_dev);
+		netif_tx_start_all_queues(priv->net_dev);
+	} else {
+		netif_tx_stop_all_queues(priv->net_dev);
+		netif_carrier_off(priv->net_dev);
+	}
+
+	netdev_info(priv->net_dev, "Link Event: state %s\n",
+		    state.up ? "up" : "down");
+
+out:
+	priv->link_state = state;
+
+	return 0;
+}
+
+static int dpaa2_eth_open(struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int err;
+
+	err = seed_pool(priv, priv->bpid);
+	if (err) {
+		/* Not much to do; the buffer pool, though not filled up,
+		 * may still contain some buffers which would enable us
+		 * to limp on.
+		 */
+		netdev_err(net_dev, "Buffer seeding failed for DPBP %d (bpid=%d)\n",
+			   priv->dpbp_dev->obj_desc.id, priv->bpid);
+	}
+
+	/* We'll only start the txqs when the link is actually ready; make sure
+	 * we don't race against the link up notification, which may come
+	 * immediately after dpni_enable();
+	 */
+	netif_tx_stop_all_queues(net_dev);
+	enable_ch_napi(priv);
+	/* Also, explicitly set carrier off, otherwise netif_carrier_ok() will
+	 * return true and cause 'ip link show' to report the LOWER_UP flag,
+	 * even though the link notification wasn't even received.
+	 */
+	netif_carrier_off(net_dev);
+
+	err = dpni_enable(priv->mc_io, 0, priv->mc_token);
+	if (err < 0) {
+		netdev_err(net_dev, "dpni_enable() failed\n");
+		goto enable_err;
+	}
+
+	/* If the DPMAC object has already processed the link up interrupt,
+	 * we have to learn the link state ourselves.
+	 */
+	err = link_state_update(priv);
+	if (err < 0) {
+		netdev_err(net_dev, "Can't update link state\n");
+		goto link_state_err;
+	}
+
+	return 0;
+
+link_state_err:
+enable_err:
+	disable_ch_napi(priv);
+	drain_pool(priv);
+	return err;
+}
+
+/* Total number of in-flight frames on ingress queues */
+static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_fq *fq;
+	u32 fcnt = 0, bcnt = 0, total = 0;
+	int i, err;
+
+	for (i = 0; i < priv->num_fqs; i++) {
+		fq = &priv->fq[i];
+		err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
+		if (err) {
+			netdev_warn(priv->net_dev, "query_fq_count failed");
+			break;
+		}
+		total += fcnt;
+	}
+
+	return total;
+}
+
+static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
+{
+	int retries = 10;
+	u32 pending;
+
+	do {
+		pending = ingress_fq_count(priv);
+		if (pending)
+			msleep(100);
+	} while (pending && --retries);
+}
+
+#define DPNI_TX_PENDING_VER_MAJOR	7
+#define DPNI_TX_PENDING_VER_MINOR	13
+static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+{
+	union dpni_statistics stats;
+	int retries = 10;
+	int err;
+
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR,
+				   DPNI_TX_PENDING_VER_MINOR) < 0)
+		goto out;
+
+	do {
+		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6,
+					  &stats);
+		if (err)
+			goto out;
+		if (stats.page_6.tx_pending_frames == 0)
+			return;
+	} while (--retries);
+
+out:
+	msleep(500);
+}
+
+static int dpaa2_eth_stop(struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int dpni_enabled = 0;
+	int retries = 10;
+
+	netif_tx_stop_all_queues(net_dev);
+	netif_carrier_off(net_dev);
+
+	/* On dpni_disable(), the MC firmware will:
+	 * - stop MAC Rx and wait for all Rx frames to be enqueued to software
+	 * - cut off WRIOP dequeues from egress FQs and wait until transmission
+	 * of all in flight Tx frames is finished (and corresponding Tx conf
+	 * frames are enqueued back to software)
+	 *
+	 * Before calling dpni_disable(), we wait for all Tx frames to arrive
+	 * on WRIOP. After it finishes, wait until all remaining frames on Rx
+	 * and Tx conf queues are consumed on NAPI poll.
+	 */
+	wait_for_egress_fq_empty(priv);
+
+	do {
+		dpni_disable(priv->mc_io, 0, priv->mc_token);
+		dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled);
+		if (dpni_enabled)
+			/* Allow the hardware some slack */
+			msleep(100);
+	} while (dpni_enabled && --retries);
+	if (!retries) {
+		netdev_warn(net_dev, "Retry count exceeded disabling DPNI\n");
+		/* Must go on and disable NAPI nonetheless, so we don't crash at
+		 * the next "ifconfig up"
+		 */
+	}
+
+	wait_for_ingress_fq_empty(priv);
+	disable_ch_napi(priv);
+
+	/* Empty the buffer pool */
+	drain_pool(priv);
+
+	return 0;
+}
+
+static int dpaa2_eth_set_addr(struct net_device *net_dev, void *addr)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct device *dev = net_dev->dev.parent;
+	int err;
+
+	err = eth_mac_addr(net_dev, addr);
+	if (err < 0) {
+		dev_err(dev, "eth_mac_addr() failed (%d)\n", err);
+		return err;
+	}
+
+	err = dpni_set_primary_mac_addr(priv->mc_io, 0, priv->mc_token,
+					net_dev->dev_addr);
+	if (err) {
+		dev_err(dev, "dpni_set_primary_mac_addr() failed (%d)\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/** Fill in counters maintained by the GPP driver. These may be different from
+ * the hardware counters obtained by ethtool.
+ */
+static void dpaa2_eth_get_stats(struct net_device *net_dev,
+				struct rtnl_link_stats64 *stats)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct rtnl_link_stats64 *percpu_stats;
+	u64 *cpustats;
+	u64 *netstats = (u64 *)stats;
+	int i, j;
+	int num = sizeof(struct rtnl_link_stats64) / sizeof(u64);
+
+	for_each_possible_cpu(i) {
+		percpu_stats = per_cpu_ptr(priv->percpu_stats, i);
+		cpustats = (u64 *)percpu_stats;
+		for (j = 0; j < num; j++)
+			netstats[j] += cpustats[j];
+	}
+}
+
+/* Copy mac unicast addresses from @net_dev to @priv.
+ * Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
+ */
+static void add_uc_hw_addr(const struct net_device *net_dev,
+			   struct dpaa2_eth_priv *priv)
+{
+	struct netdev_hw_addr *ha;
+	int err;
+
+	netdev_for_each_uc_addr(ha, net_dev) {
+		err = dpni_add_mac_addr(priv->mc_io, 0, priv->mc_token,
+					ha->addr);
+		if (err)
+			netdev_warn(priv->net_dev,
+				    "Could not add ucast MAC %pM to the filtering table (err %d)\n",
+				    ha->addr, err);
+	}
+}
+
+/* Copy mac multicast addresses from @net_dev to @priv
+ * Its sole purpose is to make dpaa2_eth_set_rx_mode() more readable.
+ */
+static void add_mc_hw_addr(const struct net_device *net_dev,
+			   struct dpaa2_eth_priv *priv)
+{
+	struct netdev_hw_addr *ha;
+	int err;
+
+	netdev_for_each_mc_addr(ha, net_dev) {
+		err = dpni_add_mac_addr(priv->mc_io, 0, priv->mc_token,
+					ha->addr);
+		if (err)
+			netdev_warn(priv->net_dev,
+				    "Could not add mcast MAC %pM to the filtering table (err %d)\n",
+				    ha->addr, err);
+	}
+}
+
+static void dpaa2_eth_set_rx_mode(struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int uc_count = netdev_uc_count(net_dev);
+	int mc_count = netdev_mc_count(net_dev);
+	u8 max_mac = priv->dpni_attrs.mac_filter_entries;
+	u32 options = priv->dpni_attrs.options;
+	u16 mc_token = priv->mc_token;
+	struct fsl_mc_io *mc_io = priv->mc_io;
+	int err;
+
+	/* Basic sanity checks; these probably indicate a misconfiguration */
+	if (options & DPNI_OPT_NO_MAC_FILTER && max_mac != 0)
+		netdev_info(net_dev,
+			    "mac_filter_entries=%d, DPNI_OPT_NO_MAC_FILTER option must be disabled\n",
+			    max_mac);
+
+	/* Force promiscuous if the uc or mc counts exceed our capabilities. */
+	if (uc_count > max_mac) {
+		netdev_info(net_dev,
+			    "Unicast addr count reached %d, max allowed is %d; forcing promisc\n",
+			    uc_count, max_mac);
+		goto force_promisc;
+	}
+	if (mc_count + uc_count > max_mac) {
+		netdev_info(net_dev,
+			    "Unicast + multicast addr count reached %d, max allowed is %d; forcing promisc\n",
+			    uc_count + mc_count, max_mac);
+		goto force_mc_promisc;
+	}
+
+	/* Adjust promisc settings due to flag combinations */
+	if (net_dev->flags & IFF_PROMISC)
+		goto force_promisc;
+	if (net_dev->flags & IFF_ALLMULTI) {
+		/* First, rebuild unicast filtering table. This should be done
+		 * in promisc mode, in order to avoid frame loss while we
+		 * progressively add entries to the table.
+		 * We don't know whether we had been in promisc already, and
+		 * making an MC call to find out is expensive; so set uc promisc
+		 * nonetheless.
+		 */
+		err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 1);
+		if (err)
+			netdev_warn(net_dev, "Can't set uc promisc\n");
+
+		/* Actual uc table reconstruction. */
+		err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 0);
+		if (err)
+			netdev_warn(net_dev, "Can't clear uc filters\n");
+		add_uc_hw_addr(net_dev, priv);
+
+		/* Finally, clear uc promisc and set mc promisc as requested. */
+		err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 0);
+		if (err)
+			netdev_warn(net_dev, "Can't clear uc promisc\n");
+		goto force_mc_promisc;
+	}
+
+	/* Neither unicast, nor multicast promisc will be on... eventually.
+	 * For now, rebuild mac filtering tables while forcing both of them on.
+	 */
+	err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 1);
+	if (err)
+		netdev_warn(net_dev, "Can't set uc promisc (%d)\n", err);
+	err = dpni_set_multicast_promisc(mc_io, 0, mc_token, 1);
+	if (err)
+		netdev_warn(net_dev, "Can't set mc promisc (%d)\n", err);
+
+	/* Actual mac filtering tables reconstruction */
+	err = dpni_clear_mac_filters(mc_io, 0, mc_token, 1, 1);
+	if (err)
+		netdev_warn(net_dev, "Can't clear mac filters\n");
+	add_mc_hw_addr(net_dev, priv);
+	add_uc_hw_addr(net_dev, priv);
+
+	/* Now we can clear both ucast and mcast promisc, without risking
+	 * to drop legitimate frames anymore.
+	 */
+	err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 0);
+	if (err)
+		netdev_warn(net_dev, "Can't clear ucast promisc\n");
+	err = dpni_set_multicast_promisc(mc_io, 0, mc_token, 0);
+	if (err)
+		netdev_warn(net_dev, "Can't clear mcast promisc\n");
+
+	return;
+
+force_promisc:
+	err = dpni_set_unicast_promisc(mc_io, 0, mc_token, 1);
+	if (err)
+		netdev_warn(net_dev, "Can't set ucast promisc\n");
+force_mc_promisc:
+	err = dpni_set_multicast_promisc(mc_io, 0, mc_token, 1);
+	if (err)
+		netdev_warn(net_dev, "Can't set mcast promisc\n");
+}
+
+static int dpaa2_eth_set_features(struct net_device *net_dev,
+				  netdev_features_t features)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	netdev_features_t changed = features ^ net_dev->features;
+	bool enable;
+	int err;
+
+	if (changed & NETIF_F_RXCSUM) {
+		enable = !!(features & NETIF_F_RXCSUM);
+		err = set_rx_csum(priv, enable);
+		if (err)
+			return err;
+	}
+
+	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+		enable = !!(features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
+		err = set_tx_csum(priv, enable);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(dev);
+	struct hwtstamp_config config;
+
+	if (copy_from_user(&config, rq->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		priv->tx_tstamp = false;
+		break;
+	case HWTSTAMP_TX_ON:
+		priv->tx_tstamp = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (config.rx_filter == HWTSTAMP_FILTER_NONE) {
+		priv->rx_tstamp = false;
+	} else {
+		priv->rx_tstamp = true;
+		/* TS is set for all frame types, not only those requested */
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+	}
+
+	return copy_to_user(rq->ifr_data, &config, sizeof(config)) ?
+			-EFAULT : 0;
+}
+
+static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	if (cmd == SIOCSHWTSTAMP)
+		return dpaa2_eth_ts_ioctl(dev, rq, cmd);
+
+	return -EINVAL;
+}
+
+static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu)
+{
+	int mfl, linear_mfl;
+
+	mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
+	linear_mfl = DPAA2_ETH_RX_BUF_SIZE - DPAA2_ETH_RX_HWA_SIZE -
+		     dpaa2_eth_rx_head_room(priv) - XDP_PACKET_HEADROOM;
+
+	if (mfl > linear_mfl) {
+		netdev_warn(priv->net_dev, "Maximum MTU for XDP is %d\n",
+			    linear_mfl - VLAN_ETH_HLEN);
+		return false;
+	}
+
+	return true;
+}
+
+static int set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
+{
+	int mfl, err;
+
+	/* We enforce a maximum Rx frame length based on MTU only if we have
+	 * an XDP program attached (in order to avoid Rx S/G frames).
+	 * Otherwise, we accept all incoming frames as long as they are not
+	 * larger than maximum size supported in hardware
+	 */
+	if (has_xdp)
+		mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
+	else
+		mfl = DPAA2_ETH_MFL;
+
+	err = dpni_set_max_frame_length(priv->mc_io, 0, priv->mc_token, mfl);
+	if (err) {
+		netdev_err(priv->net_dev, "dpni_set_max_frame_length failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (!priv->xdp_prog)
+		goto out;
+
+	if (!xdp_mtu_valid(priv, new_mtu))
+		return -EINVAL;
+
+	err = set_rx_mfl(priv, new_mtu, true);
+	if (err)
+		return err;
+
+out:
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
+{
+	struct dpni_buffer_layout buf_layout = {0};
+	int err;
+
+	err = dpni_get_buffer_layout(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_RX, &buf_layout);
+	if (err) {
+		netdev_err(priv->net_dev, "dpni_get_buffer_layout failed\n");
+		return err;
+	}
+
+	/* Reserve extra headroom for XDP header size changes */
+	buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv) +
+				    (has_xdp ? XDP_PACKET_HEADROOM : 0);
+	buf_layout.options = DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM;
+	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_RX, &buf_layout);
+	if (err) {
+		netdev_err(priv->net_dev, "dpni_set_buffer_layout failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(dev);
+	struct dpaa2_eth_channel *ch;
+	struct bpf_prog *old;
+	bool up, need_update;
+	int i, err;
+
+	if (prog && !xdp_mtu_valid(priv, dev->mtu))
+		return -EINVAL;
+
+	if (prog) {
+		prog = bpf_prog_add(prog, priv->num_channels);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+	}
+
+	up = netif_running(dev);
+	need_update = (!!priv->xdp_prog != !!prog);
+
+	if (up)
+		dpaa2_eth_stop(dev);
+
+	/* While in xdp mode, enforce a maximum Rx frame size based on MTU.
+	 * Also, when switching between xdp/non-xdp modes we need to reconfigure
+	 * our Rx buffer layout. Buffer pool was drained on dpaa2_eth_stop,
+	 * so we are sure no old format buffers will be used from now on.
+	 */
+	if (need_update) {
+		err = set_rx_mfl(priv, dev->mtu, !!prog);
+		if (err)
+			goto out_err;
+		err = update_rx_buffer_headroom(priv, !!prog);
+		if (err)
+			goto out_err;
+	}
+
+	old = xchg(&priv->xdp_prog, prog);
+	if (old)
+		bpf_prog_put(old);
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		old = xchg(&ch->xdp.prog, prog);
+		if (old)
+			bpf_prog_put(old);
+	}
+
+	if (up) {
+		err = dpaa2_eth_open(dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+
+out_err:
+	if (prog)
+		bpf_prog_sub(prog, priv->num_channels);
+	if (up)
+		dpaa2_eth_open(dev);
+
+	return err;
+}
+
+static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return setup_xdp(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
+				    struct xdp_frame *xdpf)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct device *dev = net_dev->dev.parent;
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	unsigned int needed_headroom;
+	struct dpaa2_eth_swa *swa;
+	struct dpaa2_eth_fq *fq;
+	struct dpaa2_fd fd;
+	void *buffer_start, *aligned_start;
+	dma_addr_t addr;
+	int err, i;
+
+	/* We require a minimum headroom to be able to transmit the frame.
+	 * Otherwise return an error and let the original net_device handle it
+	 */
+	needed_headroom = dpaa2_eth_needed_headroom(priv, NULL);
+	if (xdpf->headroom < needed_headroom)
+		return -EINVAL;
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
+	/* Setup the FD fields */
+	memset(&fd, 0, sizeof(fd));
+
+	/* Align FD address, if possible */
+	buffer_start = xdpf->data - needed_headroom;
+	aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
+				  DPAA2_ETH_TX_BUF_ALIGN);
+	if (aligned_start >= xdpf->data - xdpf->headroom)
+		buffer_start = aligned_start;
+
+	swa = (struct dpaa2_eth_swa *)buffer_start;
+	/* fill in necessary fields here */
+	swa->type = DPAA2_ETH_SWA_XDP;
+	swa->xdp.dma_size = xdpf->data + xdpf->len - buffer_start;
+	swa->xdp.xdpf = xdpf;
+
+	addr = dma_map_single(dev, buffer_start,
+			      swa->xdp.dma_size,
+			      DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		percpu_stats->tx_dropped++;
+		return -ENOMEM;
+	}
+
+	dpaa2_fd_set_addr(&fd, addr);
+	dpaa2_fd_set_offset(&fd, xdpf->data - buffer_start);
+	dpaa2_fd_set_len(&fd, xdpf->len);
+	dpaa2_fd_set_format(&fd, dpaa2_fd_single);
+	dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA);
+
+	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
+	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+		err = priv->enqueue(priv, fq, &fd, 0);
+		if (err != -EBUSY)
+			break;
+	}
+	percpu_extras->tx_portal_busy += i;
+	if (unlikely(err < 0)) {
+		percpu_stats->tx_errors++;
+		/* let the Rx device handle the cleanup */
+		return err;
+	}
+
+	percpu_stats->tx_packets++;
+	percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
+
+	return 0;
+}
+
+static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
+			      struct xdp_frame **frames, u32 flags)
+{
+	int drops = 0;
+	int i, err;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	if (!netif_running(net_dev))
+		return -ENETDOWN;
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+
+		err = dpaa2_eth_xdp_xmit_frame(net_dev, xdpf);
+		if (err) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	return n - drops;
+}
+
+static int update_xps(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct cpumask xps_mask;
+	struct dpaa2_eth_fq *fq;
+	int i, num_queues, netdev_queues;
+	int err = 0;
+
+	num_queues = dpaa2_eth_queue_count(priv);
+	netdev_queues = (net_dev->num_tc ? : 1) * num_queues;
+
+	/* The first <num_queues> entries in priv->fq array are Tx/Tx conf
+	 * queues, so only process those
+	 */
+	for (i = 0; i < netdev_queues; i++) {
+		fq = &priv->fq[i % num_queues];
+
+		cpumask_clear(&xps_mask);
+		cpumask_set_cpu(fq->target_cpu, &xps_mask);
+
+		err = netif_set_xps_queue(net_dev, &xps_mask, i);
+		if (err) {
+			netdev_warn_once(net_dev, "Error setting XPS queue\n");
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int dpaa2_eth_setup_tc(struct net_device *net_dev,
+			      enum tc_setup_type type, void *type_data)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	u8 num_tc, num_queues;
+	int i;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EINVAL;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_queues = dpaa2_eth_queue_count(priv);
+	num_tc = mqprio->num_tc;
+
+	if (num_tc == net_dev->num_tc)
+		return 0;
+
+	if (num_tc  > dpaa2_eth_tc_count(priv)) {
+		netdev_err(net_dev, "Max %d traffic classes supported\n",
+			   dpaa2_eth_tc_count(priv));
+		return -EINVAL;
+	}
+
+	if (!num_tc) {
+		netdev_reset_tc(net_dev);
+		netif_set_real_num_tx_queues(net_dev, num_queues);
+		goto out;
+	}
+
+	netdev_set_num_tc(net_dev, num_tc);
+	netif_set_real_num_tx_queues(net_dev, num_tc * num_queues);
+
+	for (i = 0; i < num_tc; i++)
+		netdev_set_tc_queue(net_dev, i, num_queues, i * num_queues);
+
+out:
+	update_xps(priv);
+
+	return 0;
+}
+
+static const struct net_device_ops dpaa2_eth_ops = {
+	.ndo_open = dpaa2_eth_open,
+	.ndo_start_xmit = dpaa2_eth_tx,
+	.ndo_stop = dpaa2_eth_stop,
+	.ndo_set_mac_address = dpaa2_eth_set_addr,
+	.ndo_get_stats64 = dpaa2_eth_get_stats,
+	.ndo_set_rx_mode = dpaa2_eth_set_rx_mode,
+	.ndo_set_features = dpaa2_eth_set_features,
+	.ndo_do_ioctl = dpaa2_eth_ioctl,
+	.ndo_change_mtu = dpaa2_eth_change_mtu,
+	.ndo_bpf = dpaa2_eth_xdp,
+	.ndo_xdp_xmit = dpaa2_eth_xdp_xmit,
+	.ndo_setup_tc = dpaa2_eth_setup_tc,
+};
+
+static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
+{
+	struct dpaa2_eth_channel *ch;
+
+	ch = container_of(ctx, struct dpaa2_eth_channel, nctx);
+
+	/* Update NAPI statistics */
+	ch->stats.cdan++;
+
+	napi_schedule_irqoff(&ch->napi);
+}
+
+/* Allocate and configure a DPCON object */
+static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
+{
+	struct fsl_mc_device *dpcon;
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpcon_attr attrs;
+	int err;
+
+	err = fsl_mc_object_allocate(to_fsl_mc_device(dev),
+				     FSL_MC_POOL_DPCON, &dpcon);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_info(dev, "Not enough DPCONs, will go on as-is\n");
+		return ERR_PTR(err);
+	}
+
+	err = dpcon_open(priv->mc_io, 0, dpcon->obj_desc.id, &dpcon->mc_handle);
+	if (err) {
+		dev_err(dev, "dpcon_open() failed\n");
+		goto free;
+	}
+
+	err = dpcon_reset(priv->mc_io, 0, dpcon->mc_handle);
+	if (err) {
+		dev_err(dev, "dpcon_reset() failed\n");
+		goto close;
+	}
+
+	err = dpcon_get_attributes(priv->mc_io, 0, dpcon->mc_handle, &attrs);
+	if (err) {
+		dev_err(dev, "dpcon_get_attributes() failed\n");
+		goto close;
+	}
+
+	err = dpcon_enable(priv->mc_io, 0, dpcon->mc_handle);
+	if (err) {
+		dev_err(dev, "dpcon_enable() failed\n");
+		goto close;
+	}
+
+	return dpcon;
+
+close:
+	dpcon_close(priv->mc_io, 0, dpcon->mc_handle);
+free:
+	fsl_mc_object_free(dpcon);
+
+	return NULL;
+}
+
+static void free_dpcon(struct dpaa2_eth_priv *priv,
+		       struct fsl_mc_device *dpcon)
+{
+	dpcon_disable(priv->mc_io, 0, dpcon->mc_handle);
+	dpcon_close(priv->mc_io, 0, dpcon->mc_handle);
+	fsl_mc_object_free(dpcon);
+}
+
+static struct dpaa2_eth_channel *
+alloc_channel(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_eth_channel *channel;
+	struct dpcon_attr attr;
+	struct device *dev = priv->net_dev->dev.parent;
+	int err;
+
+	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return NULL;
+
+	channel->dpcon = setup_dpcon(priv);
+	if (IS_ERR_OR_NULL(channel->dpcon)) {
+		err = PTR_ERR_OR_ZERO(channel->dpcon);
+		goto err_setup;
+	}
+
+	err = dpcon_get_attributes(priv->mc_io, 0, channel->dpcon->mc_handle,
+				   &attr);
+	if (err) {
+		dev_err(dev, "dpcon_get_attributes() failed\n");
+		goto err_get_attr;
+	}
+
+	channel->dpcon_id = attr.id;
+	channel->ch_id = attr.qbman_ch_id;
+	channel->priv = priv;
+
+	return channel;
+
+err_get_attr:
+	free_dpcon(priv, channel->dpcon);
+err_setup:
+	kfree(channel);
+	return ERR_PTR(err);
+}
+
+static void free_channel(struct dpaa2_eth_priv *priv,
+			 struct dpaa2_eth_channel *channel)
+{
+	free_dpcon(priv, channel->dpcon);
+	kfree(channel);
+}
+
+/* DPIO setup: allocate and configure QBMan channels, setup core affinity
+ * and register data availability notifications
+ */
+static int setup_dpio(struct dpaa2_eth_priv *priv)
+{
+	struct dpaa2_io_notification_ctx *nctx;
+	struct dpaa2_eth_channel *channel;
+	struct dpcon_notification_cfg dpcon_notif_cfg;
+	struct device *dev = priv->net_dev->dev.parent;
+	int i, err;
+
+	/* We want the ability to spread ingress traffic (RX, TX conf) to as
+	 * many cores as possible, so we need one channel for each core
+	 * (unless there's fewer queues than cores, in which case the extra
+	 * channels would be wasted).
+	 * Allocate one channel per core and register it to the core's
+	 * affine DPIO. If not enough channels are available for all cores
+	 * or if some cores don't have an affine DPIO, there will be no
+	 * ingress frame processing on those cores.
+	 */
+	cpumask_clear(&priv->dpio_cpumask);
+	for_each_online_cpu(i) {
+		/* Try to allocate a channel */
+		channel = alloc_channel(priv);
+		if (IS_ERR_OR_NULL(channel)) {
+			err = PTR_ERR_OR_ZERO(channel);
+			if (err != -EPROBE_DEFER)
+				dev_info(dev,
+					 "No affine channel for cpu %d and above\n", i);
+			goto err_alloc_ch;
+		}
+
+		priv->channel[priv->num_channels] = channel;
+
+		nctx = &channel->nctx;
+		nctx->is_cdan = 1;
+		nctx->cb = cdan_cb;
+		nctx->id = channel->ch_id;
+		nctx->desired_cpu = i;
+
+		/* Register the new context */
+		channel->dpio = dpaa2_io_service_select(i);
+		err = dpaa2_io_service_register(channel->dpio, nctx, dev);
+		if (err) {
+			dev_dbg(dev, "No affine DPIO for cpu %d\n", i);
+			/* If no affine DPIO for this core, there's probably
+			 * none available for next cores either. Signal we want
+			 * to retry later, in case the DPIO devices weren't
+			 * probed yet.
+			 */
+			err = -EPROBE_DEFER;
+			goto err_service_reg;
+		}
+
+		/* Register DPCON notification with MC */
+		dpcon_notif_cfg.dpio_id = nctx->dpio_id;
+		dpcon_notif_cfg.priority = 0;
+		dpcon_notif_cfg.user_ctx = nctx->qman64;
+		err = dpcon_set_notification(priv->mc_io, 0,
+					     channel->dpcon->mc_handle,
+					     &dpcon_notif_cfg);
+		if (err) {
+			dev_err(dev, "dpcon_set_notification failed()\n");
+			goto err_set_cdan;
+		}
+
+		/* If we managed to allocate a channel and also found an affine
+		 * DPIO for this core, add it to the final mask
+		 */
+		cpumask_set_cpu(i, &priv->dpio_cpumask);
+		priv->num_channels++;
+
+		/* Stop if we already have enough channels to accommodate all
+		 * RX and TX conf queues
+		 */
+		if (priv->num_channels == priv->dpni_attrs.num_queues)
+			break;
+	}
+
+	return 0;
+
+err_set_cdan:
+	dpaa2_io_service_deregister(channel->dpio, nctx, dev);
+err_service_reg:
+	free_channel(priv, channel);
+err_alloc_ch:
+	if (err == -EPROBE_DEFER) {
+		for (i = 0; i < priv->num_channels; i++) {
+			channel = priv->channel[i];
+			nctx = &channel->nctx;
+			dpaa2_io_service_deregister(channel->dpio, nctx, dev);
+			free_channel(priv, channel);
+		}
+		priv->num_channels = 0;
+		return err;
+	}
+
+	if (cpumask_empty(&priv->dpio_cpumask)) {
+		dev_err(dev, "No cpu with an affine DPIO/DPCON\n");
+		return -ENODEV;
+	}
+
+	dev_info(dev, "Cores %*pbl available for processing ingress traffic\n",
+		 cpumask_pr_args(&priv->dpio_cpumask));
+
+	return 0;
+}
+
+static void free_dpio(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpaa2_eth_channel *ch;
+	int i;
+
+	/* deregister CDAN notifications and free channels */
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		dpaa2_io_service_deregister(ch->dpio, &ch->nctx, dev);
+		free_channel(priv, ch);
+	}
+}
+
+static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv,
+						    int cpu)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	int i;
+
+	for (i = 0; i < priv->num_channels; i++)
+		if (priv->channel[i]->nctx.desired_cpu == cpu)
+			return priv->channel[i];
+
+	/* We should never get here. Issue a warning and return
+	 * the first channel, because it's still better than nothing
+	 */
+	dev_warn(dev, "No affine channel found for cpu %d\n", cpu);
+
+	return priv->channel[0];
+}
+
+static void set_fq_affinity(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpaa2_eth_fq *fq;
+	int rx_cpu, txc_cpu;
+	int i;
+
+	/* For each FQ, pick one channel/CPU to deliver frames to.
+	 * This may well change at runtime, either through irqbalance or
+	 * through direct user intervention.
+	 */
+	rx_cpu = txc_cpu = cpumask_first(&priv->dpio_cpumask);
+
+	for (i = 0; i < priv->num_fqs; i++) {
+		fq = &priv->fq[i];
+		switch (fq->type) {
+		case DPAA2_RX_FQ:
+			fq->target_cpu = rx_cpu;
+			rx_cpu = cpumask_next(rx_cpu, &priv->dpio_cpumask);
+			if (rx_cpu >= nr_cpu_ids)
+				rx_cpu = cpumask_first(&priv->dpio_cpumask);
+			break;
+		case DPAA2_TX_CONF_FQ:
+			fq->target_cpu = txc_cpu;
+			txc_cpu = cpumask_next(txc_cpu, &priv->dpio_cpumask);
+			if (txc_cpu >= nr_cpu_ids)
+				txc_cpu = cpumask_first(&priv->dpio_cpumask);
+			break;
+		default:
+			dev_err(dev, "Unknown FQ type: %d\n", fq->type);
+		}
+		fq->channel = get_affine_channel(priv, fq->target_cpu);
+	}
+
+	update_xps(priv);
+}
+
+static void setup_fqs(struct dpaa2_eth_priv *priv)
+{
+	int i;
+
+	/* We have one TxConf FQ per Tx flow.
+	 * The number of Tx and Rx queues is the same.
+	 * Tx queues come first in the fq array.
+	 */
+	for (i = 0; i < dpaa2_eth_queue_count(priv); i++) {
+		priv->fq[priv->num_fqs].type = DPAA2_TX_CONF_FQ;
+		priv->fq[priv->num_fqs].consume = dpaa2_eth_tx_conf;
+		priv->fq[priv->num_fqs++].flowid = (u16)i;
+	}
+
+	for (i = 0; i < dpaa2_eth_queue_count(priv); i++) {
+		priv->fq[priv->num_fqs].type = DPAA2_RX_FQ;
+		priv->fq[priv->num_fqs].consume = dpaa2_eth_rx;
+		priv->fq[priv->num_fqs++].flowid = (u16)i;
+	}
+
+	/* For each FQ, decide on which core to process incoming frames */
+	set_fq_affinity(priv);
+}
+
+/* Allocate and configure one buffer pool for each interface */
+static int setup_dpbp(struct dpaa2_eth_priv *priv)
+{
+	int err;
+	struct fsl_mc_device *dpbp_dev;
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpbp_attr dpbp_attrs;
+
+	err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP,
+				     &dpbp_dev);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "DPBP device allocation failed\n");
+		return err;
+	}
+
+	priv->dpbp_dev = dpbp_dev;
+
+	err = dpbp_open(priv->mc_io, 0, priv->dpbp_dev->obj_desc.id,
+			&dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_open() failed\n");
+		goto err_open;
+	}
+
+	err = dpbp_reset(priv->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_reset() failed\n");
+		goto err_reset;
+	}
+
+	err = dpbp_enable(priv->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_enable() failed\n");
+		goto err_enable;
+	}
+
+	err = dpbp_get_attributes(priv->mc_io, 0, dpbp_dev->mc_handle,
+				  &dpbp_attrs);
+	if (err) {
+		dev_err(dev, "dpbp_get_attributes() failed\n");
+		goto err_get_attr;
+	}
+	priv->bpid = dpbp_attrs.bpid;
+
+	return 0;
+
+err_get_attr:
+	dpbp_disable(priv->mc_io, 0, dpbp_dev->mc_handle);
+err_enable:
+err_reset:
+	dpbp_close(priv->mc_io, 0, dpbp_dev->mc_handle);
+err_open:
+	fsl_mc_object_free(dpbp_dev);
+
+	return err;
+}
+
+static void free_dpbp(struct dpaa2_eth_priv *priv)
+{
+	drain_pool(priv);
+	dpbp_disable(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
+	dpbp_close(priv->mc_io, 0, priv->dpbp_dev->mc_handle);
+	fsl_mc_object_free(priv->dpbp_dev);
+}
+
+static int set_buffer_layout(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_buffer_layout buf_layout = {0};
+	u16 rx_buf_align;
+	int err;
+
+	/* We need to check for WRIOP version 1.0.0, but depending on the MC
+	 * version, this number is not always provided correctly on rev1.
+	 * We need to check for both alternatives in this situation.
+	 */
+	if (priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(0, 0, 0) ||
+	    priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(1, 0, 0))
+		rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN_REV1;
+	else
+		rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN;
+
+	/* tx buffer */
+	buf_layout.private_data_size = DPAA2_ETH_SWA_SIZE;
+	buf_layout.pass_timestamp = true;
+	buf_layout.options = DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE |
+			     DPNI_BUF_LAYOUT_OPT_TIMESTAMP;
+	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_TX, &buf_layout);
+	if (err) {
+		dev_err(dev, "dpni_set_buffer_layout(TX) failed\n");
+		return err;
+	}
+
+	/* tx-confirm buffer */
+	buf_layout.options = DPNI_BUF_LAYOUT_OPT_TIMESTAMP;
+	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_TX_CONFIRM, &buf_layout);
+	if (err) {
+		dev_err(dev, "dpni_set_buffer_layout(TX_CONF) failed\n");
+		return err;
+	}
+
+	/* Now that we've set our tx buffer layout, retrieve the minimum
+	 * required tx data offset.
+	 */
+	err = dpni_get_tx_data_offset(priv->mc_io, 0, priv->mc_token,
+				      &priv->tx_data_offset);
+	if (err) {
+		dev_err(dev, "dpni_get_tx_data_offset() failed\n");
+		return err;
+	}
+
+	if ((priv->tx_data_offset % 64) != 0)
+		dev_warn(dev, "Tx data offset (%d) not a multiple of 64B\n",
+			 priv->tx_data_offset);
+
+	/* rx buffer */
+	buf_layout.pass_frame_status = true;
+	buf_layout.pass_parser_result = true;
+	buf_layout.data_align = rx_buf_align;
+	buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv);
+	buf_layout.private_data_size = 0;
+	buf_layout.options = DPNI_BUF_LAYOUT_OPT_PARSER_RESULT |
+			     DPNI_BUF_LAYOUT_OPT_FRAME_STATUS |
+			     DPNI_BUF_LAYOUT_OPT_DATA_ALIGN |
+			     DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM |
+			     DPNI_BUF_LAYOUT_OPT_TIMESTAMP;
+	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_RX, &buf_layout);
+	if (err) {
+		dev_err(dev, "dpni_set_buffer_layout(RX) failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+#define DPNI_ENQUEUE_FQID_VER_MAJOR	7
+#define DPNI_ENQUEUE_FQID_VER_MINOR	9
+
+static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
+				       struct dpaa2_eth_fq *fq,
+				       struct dpaa2_fd *fd, u8 prio)
+{
+	return dpaa2_io_service_enqueue_qd(fq->channel->dpio,
+					   priv->tx_qdid, prio,
+					   fq->tx_qdbin, fd);
+}
+
+static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
+				       struct dpaa2_eth_fq *fq,
+				       struct dpaa2_fd *fd, u8 prio)
+{
+	return dpaa2_io_service_enqueue_fq(fq->channel->dpio,
+					   fq->tx_fqid[prio], fd);
+}
+
+static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
+{
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+				   DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+		priv->enqueue = dpaa2_eth_enqueue_qd;
+	else
+		priv->enqueue = dpaa2_eth_enqueue_fq;
+}
+
+static int set_pause(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_link_cfg link_cfg = {0};
+	int err;
+
+	/* Get the default link options so we don't override other flags */
+	err = dpni_get_link_cfg(priv->mc_io, 0, priv->mc_token, &link_cfg);
+	if (err) {
+		dev_err(dev, "dpni_get_link_cfg() failed\n");
+		return err;
+	}
+
+	/* By default, enable both Rx and Tx pause frames */
+	link_cfg.options |= DPNI_LINK_OPT_PAUSE;
+	link_cfg.options &= ~DPNI_LINK_OPT_ASYM_PAUSE;
+	err = dpni_set_link_cfg(priv->mc_io, 0, priv->mc_token, &link_cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_link_cfg() failed\n");
+		return err;
+	}
+
+	priv->link_state.options = link_cfg.options;
+
+	return 0;
+}
+
+static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+{
+	struct dpni_queue_id qid = {0};
+	struct dpaa2_eth_fq *fq;
+	struct dpni_queue queue;
+	int i, j, err;
+
+	/* We only use Tx FQIDs for FQID-based enqueue, so check
+	 * if DPNI version supports it before updating FQIDs
+	 */
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+				   DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+		return;
+
+	for (i = 0; i < priv->num_fqs; i++) {
+		fq = &priv->fq[i];
+		if (fq->type != DPAA2_TX_CONF_FQ)
+			continue;
+		for (j = 0; j < dpaa2_eth_tc_count(priv); j++) {
+			err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+					     DPNI_QUEUE_TX, j, fq->flowid,
+					     &queue, &qid);
+			if (err)
+				goto out_err;
+
+			fq->tx_fqid[j] = qid.fqid;
+			if (fq->tx_fqid[j] == 0)
+				goto out_err;
+		}
+	}
+
+	priv->enqueue = dpaa2_eth_enqueue_fq;
+
+	return;
+
+out_err:
+	netdev_info(priv->net_dev,
+		    "Error reading Tx FQID, fallback to QDID-based enqueue\n");
+	priv->enqueue = dpaa2_eth_enqueue_qd;
+}
+
+/* Configure the DPNI object this interface is associated with */
+static int setup_dpni(struct fsl_mc_device *ls_dev)
+{
+	struct device *dev = &ls_dev->dev;
+	struct dpaa2_eth_priv *priv;
+	struct net_device *net_dev;
+	int err;
+
+	net_dev = dev_get_drvdata(dev);
+	priv = netdev_priv(net_dev);
+
+	/* get a handle for the DPNI object */
+	err = dpni_open(priv->mc_io, 0, ls_dev->obj_desc.id, &priv->mc_token);
+	if (err) {
+		dev_err(dev, "dpni_open() failed\n");
+		return err;
+	}
+
+	/* Check if we can work with this DPNI object */
+	err = dpni_get_api_version(priv->mc_io, 0, &priv->dpni_ver_major,
+				   &priv->dpni_ver_minor);
+	if (err) {
+		dev_err(dev, "dpni_get_api_version() failed\n");
+		goto close;
+	}
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_VER_MAJOR, DPNI_VER_MINOR) < 0) {
+		dev_err(dev, "DPNI version %u.%u not supported, need >= %u.%u\n",
+			priv->dpni_ver_major, priv->dpni_ver_minor,
+			DPNI_VER_MAJOR, DPNI_VER_MINOR);
+		err = -ENOTSUPP;
+		goto close;
+	}
+
+	ls_dev->mc_io = priv->mc_io;
+	ls_dev->mc_handle = priv->mc_token;
+
+	err = dpni_reset(priv->mc_io, 0, priv->mc_token);
+	if (err) {
+		dev_err(dev, "dpni_reset() failed\n");
+		goto close;
+	}
+
+	err = dpni_get_attributes(priv->mc_io, 0, priv->mc_token,
+				  &priv->dpni_attrs);
+	if (err) {
+		dev_err(dev, "dpni_get_attributes() failed (err=%d)\n", err);
+		goto close;
+	}
+
+	err = set_buffer_layout(priv);
+	if (err)
+		goto close;
+
+	set_enqueue_mode(priv);
+
+	/* Enable pause frame support */
+	if (dpaa2_eth_has_pause_support(priv)) {
+		err = set_pause(priv);
+		if (err)
+			goto close;
+	}
+
+	priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) *
+				       dpaa2_eth_fs_count(priv), GFP_KERNEL);
+	if (!priv->cls_rules)
+		goto close;
+
+	return 0;
+
+close:
+	dpni_close(priv->mc_io, 0, priv->mc_token);
+
+	return err;
+}
+
+static void free_dpni(struct dpaa2_eth_priv *priv)
+{
+	int err;
+
+	err = dpni_reset(priv->mc_io, 0, priv->mc_token);
+	if (err)
+		netdev_warn(priv->net_dev, "dpni_reset() failed (err %d)\n",
+			    err);
+
+	dpni_close(priv->mc_io, 0, priv->mc_token);
+}
+
+static int setup_rx_flow(struct dpaa2_eth_priv *priv,
+			 struct dpaa2_eth_fq *fq)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_queue queue;
+	struct dpni_queue_id qid;
+	int err;
+
+	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_RX, 0, fq->flowid, &queue, &qid);
+	if (err) {
+		dev_err(dev, "dpni_get_queue(RX) failed\n");
+		return err;
+	}
+
+	fq->fqid = qid.fqid;
+
+	queue.destination.id = fq->channel->dpcon_id;
+	queue.destination.type = DPNI_DEST_DPCON;
+	queue.destination.priority = 1;
+	queue.user_context = (u64)(uintptr_t)fq;
+	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_RX, 0, fq->flowid,
+			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
+			     &queue);
+	if (err) {
+		dev_err(dev, "dpni_set_queue(RX) failed\n");
+		return err;
+	}
+
+	/* xdp_rxq setup */
+	err = xdp_rxq_info_reg(&fq->channel->xdp_rxq, priv->net_dev,
+			       fq->flowid);
+	if (err) {
+		dev_err(dev, "xdp_rxq_info_reg failed\n");
+		return err;
+	}
+
+	err = xdp_rxq_info_reg_mem_model(&fq->channel->xdp_rxq,
+					 MEM_TYPE_PAGE_ORDER0, NULL);
+	if (err) {
+		dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int setup_tx_flow(struct dpaa2_eth_priv *priv,
+			 struct dpaa2_eth_fq *fq)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_queue queue;
+	struct dpni_queue_id qid;
+	int i, err;
+
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+				     DPNI_QUEUE_TX, i, fq->flowid,
+				     &queue, &qid);
+		if (err) {
+			dev_err(dev, "dpni_get_queue(TX) failed\n");
+			return err;
+		}
+		fq->tx_fqid[i] = qid.fqid;
+	}
+
+	/* All Tx queues belonging to the same flowid have the same qdbin */
+	fq->tx_qdbin = qid.qdbin;
+
+	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid,
+			     &queue, &qid);
+	if (err) {
+		dev_err(dev, "dpni_get_queue(TX_CONF) failed\n");
+		return err;
+	}
+
+	fq->fqid = qid.fqid;
+
+	queue.destination.id = fq->channel->dpcon_id;
+	queue.destination.type = DPNI_DEST_DPCON;
+	queue.destination.priority = 0;
+	queue.user_context = (u64)(uintptr_t)fq;
+	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
+			     DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid,
+			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
+			     &queue);
+	if (err) {
+		dev_err(dev, "dpni_set_queue(TX_CONF) failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Supported header fields for Rx hash distribution key */
+static const struct dpaa2_eth_dist_fields dist_fields[] = {
+	{
+		/* L2 header */
+		.rxnfc_field = RXH_L2DA,
+		.cls_prot = NET_PROT_ETH,
+		.cls_field = NH_FLD_ETH_DA,
+		.id = DPAA2_ETH_DIST_ETHDST,
+		.size = 6,
+	}, {
+		.cls_prot = NET_PROT_ETH,
+		.cls_field = NH_FLD_ETH_SA,
+		.id = DPAA2_ETH_DIST_ETHSRC,
+		.size = 6,
+	}, {
+		/* This is the last ethertype field parsed:
+		 * depending on frame format, it can be the MAC ethertype
+		 * or the VLAN etype.
+		 */
+		.cls_prot = NET_PROT_ETH,
+		.cls_field = NH_FLD_ETH_TYPE,
+		.id = DPAA2_ETH_DIST_ETHTYPE,
+		.size = 2,
+	}, {
+		/* VLAN header */
+		.rxnfc_field = RXH_VLAN,
+		.cls_prot = NET_PROT_VLAN,
+		.cls_field = NH_FLD_VLAN_TCI,
+		.id = DPAA2_ETH_DIST_VLAN,
+		.size = 2,
+	}, {
+		/* IP header */
+		.rxnfc_field = RXH_IP_SRC,
+		.cls_prot = NET_PROT_IP,
+		.cls_field = NH_FLD_IP_SRC,
+		.id = DPAA2_ETH_DIST_IPSRC,
+		.size = 4,
+	}, {
+		.rxnfc_field = RXH_IP_DST,
+		.cls_prot = NET_PROT_IP,
+		.cls_field = NH_FLD_IP_DST,
+		.id = DPAA2_ETH_DIST_IPDST,
+		.size = 4,
+	}, {
+		.rxnfc_field = RXH_L3_PROTO,
+		.cls_prot = NET_PROT_IP,
+		.cls_field = NH_FLD_IP_PROTO,
+		.id = DPAA2_ETH_DIST_IPPROTO,
+		.size = 1,
+	}, {
+		/* Using UDP ports, this is functionally equivalent to raw
+		 * byte pairs from L4 header.
+		 */
+		.rxnfc_field = RXH_L4_B_0_1,
+		.cls_prot = NET_PROT_UDP,
+		.cls_field = NH_FLD_UDP_PORT_SRC,
+		.id = DPAA2_ETH_DIST_L4SRC,
+		.size = 2,
+	}, {
+		.rxnfc_field = RXH_L4_B_2_3,
+		.cls_prot = NET_PROT_UDP,
+		.cls_field = NH_FLD_UDP_PORT_DST,
+		.id = DPAA2_ETH_DIST_L4DST,
+		.size = 2,
+	},
+};
+
+/* Configure the Rx hash key using the legacy API */
+static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_rx_tc_dist_cfg dist_cfg;
+	int err;
+
+	memset(&dist_cfg, 0, sizeof(dist_cfg));
+
+	dist_cfg.key_cfg_iova = key;
+	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
+	dist_cfg.dist_mode = DPNI_DIST_MODE_HASH;
+
+	err = dpni_set_rx_tc_dist(priv->mc_io, 0, priv->mc_token, 0, &dist_cfg);
+	if (err)
+		dev_err(dev, "dpni_set_rx_tc_dist failed\n");
+
+	return err;
+}
+
+/* Configure the Rx hash key using the new API */
+static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_rx_dist_cfg dist_cfg;
+	int err;
+
+	memset(&dist_cfg, 0, sizeof(dist_cfg));
+
+	dist_cfg.key_cfg_iova = key;
+	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
+	dist_cfg.enable = 1;
+
+	err = dpni_set_rx_hash_dist(priv->mc_io, 0, priv->mc_token, &dist_cfg);
+	if (err)
+		dev_err(dev, "dpni_set_rx_hash_dist failed\n");
+
+	return err;
+}
+
+/* Configure the Rx flow classification key */
+static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_rx_dist_cfg dist_cfg;
+	int err;
+
+	memset(&dist_cfg, 0, sizeof(dist_cfg));
+
+	dist_cfg.key_cfg_iova = key;
+	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
+	dist_cfg.enable = 1;
+
+	err = dpni_set_rx_fs_dist(priv->mc_io, 0, priv->mc_token, &dist_cfg);
+	if (err)
+		dev_err(dev, "dpni_set_rx_fs_dist failed\n");
+
+	return err;
+}
+
+/* Size of the Rx flow classification key */
+int dpaa2_eth_cls_key_size(u64 fields)
+{
+	int i, size = 0;
+
+	for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+		if (!(fields & dist_fields[i].id))
+			continue;
+		size += dist_fields[i].size;
+	}
+
+	return size;
+}
+
+/* Offset of header field in Rx classification key */
+int dpaa2_eth_cls_fld_off(int prot, int field)
+{
+	int i, off = 0;
+
+	for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+		if (dist_fields[i].cls_prot == prot &&
+		    dist_fields[i].cls_field == field)
+			return off;
+		off += dist_fields[i].size;
+	}
+
+	WARN_ONCE(1, "Unsupported header field used for Rx flow cls\n");
+	return 0;
+}
+
+/* Prune unused fields from the classification rule.
+ * Used when masking is not supported
+ */
+void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields)
+{
+	int off = 0, new_off = 0;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+		size = dist_fields[i].size;
+		if (dist_fields[i].id & fields) {
+			memcpy(key_mem + new_off, key_mem + off, size);
+			new_off += size;
+		}
+		off += size;
+	}
+}
+
+/* Set Rx distribution (hash or flow classification) key
+ * flags is a combination of RXH_ bits
+ */
+static int dpaa2_eth_set_dist_key(struct net_device *net_dev,
+				  enum dpaa2_eth_rx_dist type, u64 flags)
+{
+	struct device *dev = net_dev->dev.parent;
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpkg_profile_cfg cls_cfg;
+	u32 rx_hash_fields = 0;
+	dma_addr_t key_iova;
+	u8 *dma_mem;
+	int i;
+	int err = 0;
+
+	memset(&cls_cfg, 0, sizeof(cls_cfg));
+
+	for (i = 0; i < ARRAY_SIZE(dist_fields); i++) {
+		struct dpkg_extract *key =
+			&cls_cfg.extracts[cls_cfg.num_extracts];
+
+		/* For both Rx hashing and classification keys
+		 * we set only the selected fields.
+		 */
+		if (!(flags & dist_fields[i].id))
+			continue;
+		if (type == DPAA2_ETH_RX_DIST_HASH)
+			rx_hash_fields |= dist_fields[i].rxnfc_field;
+
+		if (cls_cfg.num_extracts >= DPKG_MAX_NUM_OF_EXTRACTS) {
+			dev_err(dev, "error adding key extraction rule, too many rules?\n");
+			return -E2BIG;
+		}
+
+		key->type = DPKG_EXTRACT_FROM_HDR;
+		key->extract.from_hdr.prot = dist_fields[i].cls_prot;
+		key->extract.from_hdr.type = DPKG_FULL_FIELD;
+		key->extract.from_hdr.field = dist_fields[i].cls_field;
+		cls_cfg.num_extracts++;
+	}
+
+	dma_mem = kzalloc(DPAA2_CLASSIFIER_DMA_SIZE, GFP_KERNEL);
+	if (!dma_mem)
+		return -ENOMEM;
+
+	err = dpni_prepare_key_cfg(&cls_cfg, dma_mem);
+	if (err) {
+		dev_err(dev, "dpni_prepare_key_cfg error %d\n", err);
+		goto free_key;
+	}
+
+	/* Prepare for setting the rx dist */
+	key_iova = dma_map_single(dev, dma_mem, DPAA2_CLASSIFIER_DMA_SIZE,
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, key_iova)) {
+		dev_err(dev, "DMA mapping failed\n");
+		err = -ENOMEM;
+		goto free_key;
+	}
+
+	if (type == DPAA2_ETH_RX_DIST_HASH) {
+		if (dpaa2_eth_has_legacy_dist(priv))
+			err = config_legacy_hash_key(priv, key_iova);
+		else
+			err = config_hash_key(priv, key_iova);
+	} else {
+		err = config_cls_key(priv, key_iova);
+	}
+
+	dma_unmap_single(dev, key_iova, DPAA2_CLASSIFIER_DMA_SIZE,
+			 DMA_TO_DEVICE);
+	if (!err && type == DPAA2_ETH_RX_DIST_HASH)
+		priv->rx_hash_fields = rx_hash_fields;
+
+free_key:
+	kfree(dma_mem);
+	return err;
+}
+
+int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	u64 key = 0;
+	int i;
+
+	if (!dpaa2_eth_hash_enabled(priv))
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < ARRAY_SIZE(dist_fields); i++)
+		if (dist_fields[i].rxnfc_field & flags)
+			key |= dist_fields[i].id;
+
+	return dpaa2_eth_set_dist_key(net_dev, DPAA2_ETH_RX_DIST_HASH, key);
+}
+
+int dpaa2_eth_set_cls(struct net_device *net_dev, u64 flags)
+{
+	return dpaa2_eth_set_dist_key(net_dev, DPAA2_ETH_RX_DIST_CLS, flags);
+}
+
+static int dpaa2_eth_set_default_cls(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	int err;
+
+	/* Check if we actually support Rx flow classification */
+	if (dpaa2_eth_has_legacy_dist(priv)) {
+		dev_dbg(dev, "Rx cls not supported by current MC version\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!dpaa2_eth_fs_enabled(priv)) {
+		dev_dbg(dev, "Rx cls disabled in DPNI options\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!dpaa2_eth_hash_enabled(priv)) {
+		dev_dbg(dev, "Rx cls disabled for single queue DPNIs\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If there is no support for masking in the classification table,
+	 * we don't set a default key, as it will depend on the rules
+	 * added by the user at runtime.
+	 */
+	if (!dpaa2_eth_fs_mask_enabled(priv))
+		goto out;
+
+	err = dpaa2_eth_set_cls(priv->net_dev, DPAA2_ETH_DIST_ALL);
+	if (err)
+		return err;
+
+out:
+	priv->rx_cls_enabled = 1;
+
+	return 0;
+}
+
+/* Bind the DPNI to its needed objects and resources: buffer pool, DPIOs,
+ * frame queues and channels
+ */
+static int bind_dpni(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	struct dpni_pools_cfg pools_params;
+	struct dpni_error_cfg err_cfg;
+	int err = 0;
+	int i;
+
+	pools_params.num_dpbp = 1;
+	pools_params.pools[0].dpbp_id = priv->dpbp_dev->obj_desc.id;
+	pools_params.pools[0].backup_pool = 0;
+	pools_params.pools[0].buffer_size = DPAA2_ETH_RX_BUF_SIZE;
+	err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params);
+	if (err) {
+		dev_err(dev, "dpni_set_pools() failed\n");
+		return err;
+	}
+
+	/* have the interface implicitly distribute traffic based on
+	 * the default hash key
+	 */
+	err = dpaa2_eth_set_hash(net_dev, DPAA2_RXH_DEFAULT);
+	if (err && err != -EOPNOTSUPP)
+		dev_err(dev, "Failed to configure hashing\n");
+
+	/* Configure the flow classification key; it includes all
+	 * supported header fields and cannot be modified at runtime
+	 */
+	err = dpaa2_eth_set_default_cls(priv);
+	if (err && err != -EOPNOTSUPP)
+		dev_err(dev, "Failed to configure Rx classification key\n");
+
+	/* Configure handling of error frames */
+	err_cfg.errors = DPAA2_FAS_RX_ERR_MASK;
+	err_cfg.set_frame_annotation = 1;
+	err_cfg.error_action = DPNI_ERROR_ACTION_DISCARD;
+	err = dpni_set_errors_behavior(priv->mc_io, 0, priv->mc_token,
+				       &err_cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_errors_behavior failed\n");
+		return err;
+	}
+
+	/* Configure Rx and Tx conf queues to generate CDANs */
+	for (i = 0; i < priv->num_fqs; i++) {
+		switch (priv->fq[i].type) {
+		case DPAA2_RX_FQ:
+			err = setup_rx_flow(priv, &priv->fq[i]);
+			break;
+		case DPAA2_TX_CONF_FQ:
+			err = setup_tx_flow(priv, &priv->fq[i]);
+			break;
+		default:
+			dev_err(dev, "Invalid FQ type %d\n", priv->fq[i].type);
+			return -EINVAL;
+		}
+		if (err)
+			return err;
+	}
+
+	err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token,
+			    DPNI_QUEUE_TX, &priv->tx_qdid);
+	if (err) {
+		dev_err(dev, "dpni_get_qdid() failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Allocate rings for storing incoming frame descriptors */
+static int alloc_rings(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	int i;
+
+	for (i = 0; i < priv->num_channels; i++) {
+		priv->channel[i]->store =
+			dpaa2_io_store_create(DPAA2_ETH_STORE_SIZE, dev);
+		if (!priv->channel[i]->store) {
+			netdev_err(net_dev, "dpaa2_io_store_create() failed\n");
+			goto err_ring;
+		}
+	}
+
+	return 0;
+
+err_ring:
+	for (i = 0; i < priv->num_channels; i++) {
+		if (!priv->channel[i]->store)
+			break;
+		dpaa2_io_store_destroy(priv->channel[i]->store);
+	}
+
+	return -ENOMEM;
+}
+
+static void free_rings(struct dpaa2_eth_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_channels; i++)
+		dpaa2_io_store_destroy(priv->channel[i]->store);
+}
+
+static int set_mac_addr(struct dpaa2_eth_priv *priv)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	u8 mac_addr[ETH_ALEN], dpni_mac_addr[ETH_ALEN];
+	int err;
+
+	/* Get firmware address, if any */
+	err = dpni_get_port_mac_addr(priv->mc_io, 0, priv->mc_token, mac_addr);
+	if (err) {
+		dev_err(dev, "dpni_get_port_mac_addr() failed\n");
+		return err;
+	}
+
+	/* Get DPNI attributes address, if any */
+	err = dpni_get_primary_mac_addr(priv->mc_io, 0, priv->mc_token,
+					dpni_mac_addr);
+	if (err) {
+		dev_err(dev, "dpni_get_primary_mac_addr() failed\n");
+		return err;
+	}
+
+	/* First check if firmware has any address configured by bootloader */
+	if (!is_zero_ether_addr(mac_addr)) {
+		/* If the DPMAC addr != DPNI addr, update it */
+		if (!ether_addr_equal(mac_addr, dpni_mac_addr)) {
+			err = dpni_set_primary_mac_addr(priv->mc_io, 0,
+							priv->mc_token,
+							mac_addr);
+			if (err) {
+				dev_err(dev, "dpni_set_primary_mac_addr() failed\n");
+				return err;
+			}
+		}
+		memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+	} else if (is_zero_ether_addr(dpni_mac_addr)) {
+		/* No MAC address configured, fill in net_dev->dev_addr
+		 * with a random one
+		 */
+		eth_hw_addr_random(net_dev);
+		dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n");
+
+		err = dpni_set_primary_mac_addr(priv->mc_io, 0, priv->mc_token,
+						net_dev->dev_addr);
+		if (err) {
+			dev_err(dev, "dpni_set_primary_mac_addr() failed\n");
+			return err;
+		}
+
+		/* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all
+		 * practical purposes, this will be our "permanent" mac address,
+		 * at least until the next reboot. This move will also permit
+		 * register_netdevice() to properly fill up net_dev->perm_addr.
+		 */
+		net_dev->addr_assign_type = NET_ADDR_PERM;
+	} else {
+		/* NET_ADDR_PERM is default, all we have to do is
+		 * fill in the device addr.
+		 */
+		memcpy(net_dev->dev_addr, dpni_mac_addr, net_dev->addr_len);
+	}
+
+	return 0;
+}
+
+static int netdev_init(struct net_device *net_dev)
+{
+	struct device *dev = net_dev->dev.parent;
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	u32 options = priv->dpni_attrs.options;
+	u64 supported = 0, not_supported = 0;
+	u8 bcast_addr[ETH_ALEN];
+	u8 num_queues;
+	int err;
+
+	net_dev->netdev_ops = &dpaa2_eth_ops;
+	net_dev->ethtool_ops = &dpaa2_ethtool_ops;
+
+	err = set_mac_addr(priv);
+	if (err)
+		return err;
+
+	/* Explicitly add the broadcast address to the MAC filtering table */
+	eth_broadcast_addr(bcast_addr);
+	err = dpni_add_mac_addr(priv->mc_io, 0, priv->mc_token, bcast_addr);
+	if (err) {
+		dev_err(dev, "dpni_add_mac_addr() failed\n");
+		return err;
+	}
+
+	/* Set MTU upper limit; lower limit is 68B (default value) */
+	net_dev->max_mtu = DPAA2_ETH_MAX_MTU;
+	err = dpni_set_max_frame_length(priv->mc_io, 0, priv->mc_token,
+					DPAA2_ETH_MFL);
+	if (err) {
+		dev_err(dev, "dpni_set_max_frame_length() failed\n");
+		return err;
+	}
+
+	/* Set actual number of queues in the net device */
+	num_queues = dpaa2_eth_queue_count(priv);
+	err = netif_set_real_num_tx_queues(net_dev, num_queues);
+	if (err) {
+		dev_err(dev, "netif_set_real_num_tx_queues() failed\n");
+		return err;
+	}
+	err = netif_set_real_num_rx_queues(net_dev, num_queues);
+	if (err) {
+		dev_err(dev, "netif_set_real_num_rx_queues() failed\n");
+		return err;
+	}
+
+	/* Capabilities listing */
+	supported |= IFF_LIVE_ADDR_CHANGE;
+
+	if (options & DPNI_OPT_NO_MAC_FILTER)
+		not_supported |= IFF_UNICAST_FLT;
+	else
+		supported |= IFF_UNICAST_FLT;
+
+	net_dev->priv_flags |= supported;
+	net_dev->priv_flags &= ~not_supported;
+
+	/* Features */
+	net_dev->features = NETIF_F_RXCSUM |
+			    NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+			    NETIF_F_SG | NETIF_F_HIGHDMA |
+			    NETIF_F_LLTX;
+	net_dev->hw_features = net_dev->features;
+
+	return 0;
+}
+
+static int poll_link_state(void *arg)
+{
+	struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)arg;
+	int err;
+
+	while (!kthread_should_stop()) {
+		err = link_state_update(priv);
+		if (unlikely(err))
+			return err;
+
+		msleep(DPAA2_ETH_LINK_STATE_REFRESH);
+	}
+
+	return 0;
+}
+
+static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
+{
+	u32 status = ~0;
+	struct device *dev = (struct device *)arg;
+	struct fsl_mc_device *dpni_dev = to_fsl_mc_device(dev);
+	struct net_device *net_dev = dev_get_drvdata(dev);
+	int err;
+
+	err = dpni_get_irq_status(dpni_dev->mc_io, 0, dpni_dev->mc_handle,
+				  DPNI_IRQ_INDEX, &status);
+	if (unlikely(err)) {
+		netdev_err(net_dev, "Can't get irq status (err %d)\n", err);
+		return IRQ_HANDLED;
+	}
+
+	if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
+		link_state_update(netdev_priv(net_dev));
+
+	if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+		set_mac_addr(netdev_priv(net_dev));
+
+	return IRQ_HANDLED;
+}
+
+static int setup_irqs(struct fsl_mc_device *ls_dev)
+{
+	int err = 0;
+	struct fsl_mc_device_irq *irq;
+
+	err = fsl_mc_allocate_irqs(ls_dev);
+	if (err) {
+		dev_err(&ls_dev->dev, "MC irqs allocation failed\n");
+		return err;
+	}
+
+	irq = ls_dev->irqs[0];
+	err = devm_request_threaded_irq(&ls_dev->dev, irq->msi_desc->irq,
+					NULL, dpni_irq0_handler_thread,
+					IRQF_NO_SUSPEND | IRQF_ONESHOT,
+					dev_name(&ls_dev->dev), &ls_dev->dev);
+	if (err < 0) {
+		dev_err(&ls_dev->dev, "devm_request_threaded_irq(): %d\n", err);
+		goto free_mc_irq;
+	}
+
+	err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle,
+				DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED |
+				DPNI_IRQ_EVENT_ENDPOINT_CHANGED);
+	if (err < 0) {
+		dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err);
+		goto free_irq;
+	}
+
+	err = dpni_set_irq_enable(ls_dev->mc_io, 0, ls_dev->mc_handle,
+				  DPNI_IRQ_INDEX, 1);
+	if (err < 0) {
+		dev_err(&ls_dev->dev, "dpni_set_irq_enable(): %d\n", err);
+		goto free_irq;
+	}
+
+	return 0;
+
+free_irq:
+	devm_free_irq(&ls_dev->dev, irq->msi_desc->irq, &ls_dev->dev);
+free_mc_irq:
+	fsl_mc_free_irqs(ls_dev);
+
+	return err;
+}
+
+static void add_ch_napi(struct dpaa2_eth_priv *priv)
+{
+	int i;
+	struct dpaa2_eth_channel *ch;
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		/* NAPI weight *MUST* be a multiple of DPAA2_ETH_STORE_SIZE */
+		netif_napi_add(priv->net_dev, &ch->napi, dpaa2_eth_poll,
+			       NAPI_POLL_WEIGHT);
+	}
+}
+
+static void del_ch_napi(struct dpaa2_eth_priv *priv)
+{
+	int i;
+	struct dpaa2_eth_channel *ch;
+
+	for (i = 0; i < priv->num_channels; i++) {
+		ch = priv->channel[i];
+		netif_napi_del(&ch->napi);
+	}
+}
+
+static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
+{
+	struct device *dev;
+	struct net_device *net_dev = NULL;
+	struct dpaa2_eth_priv *priv = NULL;
+	int err = 0;
+
+	dev = &dpni_dev->dev;
+
+	/* Net device */
+	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA2_ETH_MAX_NETDEV_QUEUES);
+	if (!net_dev) {
+		dev_err(dev, "alloc_etherdev_mq() failed\n");
+		return -ENOMEM;
+	}
+
+	SET_NETDEV_DEV(net_dev, dev);
+	dev_set_drvdata(dev, net_dev);
+
+	priv = netdev_priv(net_dev);
+	priv->net_dev = net_dev;
+
+	priv->iommu_domain = iommu_get_domain_for_dev(dev);
+
+	/* Obtain a MC portal */
+	err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
+				     &priv->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "MC portal allocation failed\n");
+		goto err_portal_alloc;
+	}
+
+	/* MC objects initialization and configuration */
+	err = setup_dpni(dpni_dev);
+	if (err)
+		goto err_dpni_setup;
+
+	err = setup_dpio(priv);
+	if (err)
+		goto err_dpio_setup;
+
+	setup_fqs(priv);
+
+	err = setup_dpbp(priv);
+	if (err)
+		goto err_dpbp_setup;
+
+	err = bind_dpni(priv);
+	if (err)
+		goto err_bind;
+
+	/* Add a NAPI context for each channel */
+	add_ch_napi(priv);
+
+	/* Percpu statistics */
+	priv->percpu_stats = alloc_percpu(*priv->percpu_stats);
+	if (!priv->percpu_stats) {
+		dev_err(dev, "alloc_percpu(percpu_stats) failed\n");
+		err = -ENOMEM;
+		goto err_alloc_percpu_stats;
+	}
+	priv->percpu_extras = alloc_percpu(*priv->percpu_extras);
+	if (!priv->percpu_extras) {
+		dev_err(dev, "alloc_percpu(percpu_extras) failed\n");
+		err = -ENOMEM;
+		goto err_alloc_percpu_extras;
+	}
+
+	err = netdev_init(net_dev);
+	if (err)
+		goto err_netdev_init;
+
+	/* Configure checksum offload based on current interface flags */
+	err = set_rx_csum(priv, !!(net_dev->features & NETIF_F_RXCSUM));
+	if (err)
+		goto err_csum;
+
+	err = set_tx_csum(priv, !!(net_dev->features &
+				   (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)));
+	if (err)
+		goto err_csum;
+
+	err = alloc_rings(priv);
+	if (err)
+		goto err_alloc_rings;
+
+	err = setup_irqs(dpni_dev);
+	if (err) {
+		netdev_warn(net_dev, "Failed to set link interrupt, fall back to polling\n");
+		priv->poll_thread = kthread_run(poll_link_state, priv,
+						"%s_poll_link", net_dev->name);
+		if (IS_ERR(priv->poll_thread)) {
+			dev_err(dev, "Error starting polling thread\n");
+			goto err_poll_thread;
+		}
+		priv->do_link_poll = true;
+	}
+
+	err = register_netdev(net_dev);
+	if (err < 0) {
+		dev_err(dev, "register_netdev() failed\n");
+		goto err_netdev_reg;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	dpaa2_dbg_add(priv);
+#endif
+
+	dev_info(dev, "Probed interface %s\n", net_dev->name);
+	return 0;
+
+err_netdev_reg:
+	if (priv->do_link_poll)
+		kthread_stop(priv->poll_thread);
+	else
+		fsl_mc_free_irqs(dpni_dev);
+err_poll_thread:
+	free_rings(priv);
+err_alloc_rings:
+err_csum:
+err_netdev_init:
+	free_percpu(priv->percpu_extras);
+err_alloc_percpu_extras:
+	free_percpu(priv->percpu_stats);
+err_alloc_percpu_stats:
+	del_ch_napi(priv);
+err_bind:
+	free_dpbp(priv);
+err_dpbp_setup:
+	free_dpio(priv);
+err_dpio_setup:
+	free_dpni(priv);
+err_dpni_setup:
+	fsl_mc_portal_free(priv->mc_io);
+err_portal_alloc:
+	dev_set_drvdata(dev, NULL);
+	free_netdev(net_dev);
+
+	return err;
+}
+
+static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
+{
+	struct device *dev;
+	struct net_device *net_dev;
+	struct dpaa2_eth_priv *priv;
+
+	dev = &ls_dev->dev;
+	net_dev = dev_get_drvdata(dev);
+	priv = netdev_priv(net_dev);
+
+#ifdef CONFIG_DEBUG_FS
+	dpaa2_dbg_remove(priv);
+#endif
+	unregister_netdev(net_dev);
+
+	if (priv->do_link_poll)
+		kthread_stop(priv->poll_thread);
+	else
+		fsl_mc_free_irqs(ls_dev);
+
+	free_rings(priv);
+	free_percpu(priv->percpu_stats);
+	free_percpu(priv->percpu_extras);
+
+	del_ch_napi(priv);
+	free_dpbp(priv);
+	free_dpio(priv);
+	free_dpni(priv);
+
+	fsl_mc_portal_free(priv->mc_io);
+
+	free_netdev(net_dev);
+
+	dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
+
+	return 0;
+}
+
+static const struct fsl_mc_device_id dpaa2_eth_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpni",
+	},
+	{ .vendor = 0x0 }
+};
+MODULE_DEVICE_TABLE(fslmc, dpaa2_eth_match_id_table);
+
+static struct fsl_mc_driver dpaa2_eth_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = dpaa2_eth_probe,
+	.remove = dpaa2_eth_remove,
+	.match_id_table = dpaa2_eth_match_id_table
+};
+
+static int __init dpaa2_eth_driver_init(void)
+{
+	int err;
+
+	dpaa2_eth_dbg_init();
+	err = fsl_mc_driver_register(&dpaa2_eth_driver);
+	if (err) {
+		dpaa2_eth_dbg_exit();
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit dpaa2_eth_driver_exit(void)
+{
+	dpaa2_eth_dbg_exit();
+	fsl_mc_driver_unregister(&dpaa2_eth_driver);
+}
+
+module_init(dpaa2_eth_driver_init);
+module_exit(dpaa2_eth_driver_exit);

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
new file mode 100644
index 0000000..8a0e65b
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h

@@ -0,0 +1,525 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ */
+
+#ifndef __DPAA2_ETH_H
+#define __DPAA2_ETH_H
+
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/fsl/mc.h>
+
+#include <soc/fsl/dpaa2-io.h>
+#include <soc/fsl/dpaa2-fd.h>
+#include "dpni.h"
+#include "dpni-cmd.h"
+
+#include "dpaa2-eth-trace.h"
+#include "dpaa2-eth-debugfs.h"
+
+#define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0)
+
+#define DPAA2_ETH_STORE_SIZE		16
+
+/* Maximum number of scatter-gather entries in an ingress frame,
+ * considering the maximum receive frame size is 64K
+ */
+#define DPAA2_ETH_MAX_SG_ENTRIES	((64 * 1024) / DPAA2_ETH_RX_BUF_SIZE)
+
+/* Maximum acceptable MTU value. It is in direct relation with the hardware
+ * enforced Max Frame Length (currently 10k).
+ */
+#define DPAA2_ETH_MFL			(10 * 1024)
+#define DPAA2_ETH_MAX_MTU		(DPAA2_ETH_MFL - VLAN_ETH_HLEN)
+/* Convert L3 MTU to L2 MFL */
+#define DPAA2_ETH_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN)
+
+/* Set the taildrop threshold (in bytes) to allow the enqueue of several jumbo
+ * frames in the Rx queues (length of the current frame is not
+ * taken into account when making the taildrop decision)
+ */
+#define DPAA2_ETH_TAILDROP_THRESH	(64 * 1024)
+
+/* Maximum number of Tx confirmation frames to be processed
+ * in a single NAPI call
+ */
+#define DPAA2_ETH_TXCONF_PER_NAPI	256
+
+/* Buffer quota per queue. Must be large enough such that for minimum sized
+ * frames taildrop kicks in before the bpool gets depleted, so we compute
+ * how many 64B frames fit inside the taildrop threshold and add a margin
+ * to accommodate the buffer refill delay.
+ */
+#define DPAA2_ETH_MAX_FRAMES_PER_QUEUE	(DPAA2_ETH_TAILDROP_THRESH / 64)
+#define DPAA2_ETH_NUM_BUFS		(DPAA2_ETH_MAX_FRAMES_PER_QUEUE + 256)
+#define DPAA2_ETH_REFILL_THRESH \
+	(DPAA2_ETH_NUM_BUFS - DPAA2_ETH_BUFS_PER_CMD)
+
+/* Maximum number of buffers that can be acquired/released through a single
+ * QBMan command
+ */
+#define DPAA2_ETH_BUFS_PER_CMD		7
+
+/* Hardware requires alignment for ingress/egress buffer addresses */
+#define DPAA2_ETH_TX_BUF_ALIGN		64
+
+#define DPAA2_ETH_RX_BUF_RAW_SIZE	PAGE_SIZE
+#define DPAA2_ETH_RX_BUF_TAILROOM \
+	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+#define DPAA2_ETH_RX_BUF_SIZE \
+	(DPAA2_ETH_RX_BUF_RAW_SIZE - DPAA2_ETH_RX_BUF_TAILROOM)
+
+/* Hardware annotation area in RX/TX buffers */
+#define DPAA2_ETH_RX_HWA_SIZE		64
+#define DPAA2_ETH_TX_HWA_SIZE		128
+
+/* PTP nominal frequency 1GHz */
+#define DPAA2_PTP_CLK_PERIOD_NS		1
+
+/* Due to a limitation in WRIOP 1.0.0, the RX buffer data must be aligned
+ * to 256B. For newer revisions, the requirement is only for 64B alignment
+ */
+#define DPAA2_ETH_RX_BUF_ALIGN_REV1	256
+#define DPAA2_ETH_RX_BUF_ALIGN		64
+
+/* We are accommodating a skb backpointer and some S/G info
+ * in the frame's software annotation. The hardware
+ * options are either 0 or 64, so we choose the latter.
+ */
+#define DPAA2_ETH_SWA_SIZE		64
+
+/* We store different information in the software annotation area of a Tx frame
+ * based on what type of frame it is
+ */
+enum dpaa2_eth_swa_type {
+	DPAA2_ETH_SWA_SINGLE,
+	DPAA2_ETH_SWA_SG,
+	DPAA2_ETH_SWA_XDP,
+};
+
+/* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */
+struct dpaa2_eth_swa {
+	enum dpaa2_eth_swa_type type;
+	union {
+		struct {
+			struct sk_buff *skb;
+		} single;
+		struct {
+			struct sk_buff *skb;
+			struct scatterlist *scl;
+			int num_sg;
+			int sgt_size;
+		} sg;
+		struct {
+			int dma_size;
+			struct xdp_frame *xdpf;
+		} xdp;
+	};
+};
+
+/* Annotation valid bits in FD FRC */
+#define DPAA2_FD_FRC_FASV		0x8000
+#define DPAA2_FD_FRC_FAEADV		0x4000
+#define DPAA2_FD_FRC_FAPRV		0x2000
+#define DPAA2_FD_FRC_FAIADV		0x1000
+#define DPAA2_FD_FRC_FASWOV		0x0800
+#define DPAA2_FD_FRC_FAICFDV		0x0400
+
+/* Error bits in FD CTRL */
+#define DPAA2_FD_RX_ERR_MASK		(FD_CTRL_SBE | FD_CTRL_FAERR)
+#define DPAA2_FD_TX_ERR_MASK		(FD_CTRL_UFD	| \
+					 FD_CTRL_SBE	| \
+					 FD_CTRL_FSE	| \
+					 FD_CTRL_FAERR)
+
+/* Annotation bits in FD CTRL */
+#define DPAA2_FD_CTRL_ASAL		0x00020000	/* ASAL = 128B */
+
+/* Frame annotation status */
+struct dpaa2_fas {
+	u8 reserved;
+	u8 ppid;
+	__le16 ifpid;
+	__le32 status;
+};
+
+/* Frame annotation status word is located in the first 8 bytes
+ * of the buffer's hardware annoatation area
+ */
+#define DPAA2_FAS_OFFSET		0
+#define DPAA2_FAS_SIZE			(sizeof(struct dpaa2_fas))
+
+/* Timestamp is located in the next 8 bytes of the buffer's
+ * hardware annotation area
+ */
+#define DPAA2_TS_OFFSET			0x8
+
+/* Frame annotation egress action descriptor */
+#define DPAA2_FAEAD_OFFSET		0x58
+
+struct dpaa2_faead {
+	__le32 conf_fqid;
+	__le32 ctrl;
+};
+
+#define DPAA2_FAEAD_A2V			0x20000000
+#define DPAA2_FAEAD_A4V			0x08000000
+#define DPAA2_FAEAD_UPDV		0x00001000
+#define DPAA2_FAEAD_EBDDV		0x00002000
+#define DPAA2_FAEAD_UPD			0x00000010
+
+/* Accessors for the hardware annotation fields that we use */
+static inline void *dpaa2_get_hwa(void *buf_addr, bool swa)
+{
+	return buf_addr + (swa ? DPAA2_ETH_SWA_SIZE : 0);
+}
+
+static inline struct dpaa2_fas *dpaa2_get_fas(void *buf_addr, bool swa)
+{
+	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_FAS_OFFSET;
+}
+
+static inline __le64 *dpaa2_get_ts(void *buf_addr, bool swa)
+{
+	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_TS_OFFSET;
+}
+
+static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa)
+{
+	return dpaa2_get_hwa(buf_addr, swa) + DPAA2_FAEAD_OFFSET;
+}
+
+/* Error and status bits in the frame annotation status word */
+/* Debug frame, otherwise supposed to be discarded */
+#define DPAA2_FAS_DISC			0x80000000
+/* MACSEC frame */
+#define DPAA2_FAS_MS			0x40000000
+#define DPAA2_FAS_PTP			0x08000000
+/* Ethernet multicast frame */
+#define DPAA2_FAS_MC			0x04000000
+/* Ethernet broadcast frame */
+#define DPAA2_FAS_BC			0x02000000
+#define DPAA2_FAS_KSE			0x00040000
+#define DPAA2_FAS_EOFHE			0x00020000
+#define DPAA2_FAS_MNLE			0x00010000
+#define DPAA2_FAS_TIDE			0x00008000
+#define DPAA2_FAS_PIEE			0x00004000
+/* Frame length error */
+#define DPAA2_FAS_FLE			0x00002000
+/* Frame physical error */
+#define DPAA2_FAS_FPE			0x00001000
+#define DPAA2_FAS_PTE			0x00000080
+#define DPAA2_FAS_ISP			0x00000040
+#define DPAA2_FAS_PHE			0x00000020
+#define DPAA2_FAS_BLE			0x00000010
+/* L3 csum validation performed */
+#define DPAA2_FAS_L3CV			0x00000008
+/* L3 csum error */
+#define DPAA2_FAS_L3CE			0x00000004
+/* L4 csum validation performed */
+#define DPAA2_FAS_L4CV			0x00000002
+/* L4 csum error */
+#define DPAA2_FAS_L4CE			0x00000001
+/* Possible errors on the ingress path */
+#define DPAA2_FAS_RX_ERR_MASK		(DPAA2_FAS_KSE		| \
+					 DPAA2_FAS_EOFHE	| \
+					 DPAA2_FAS_MNLE		| \
+					 DPAA2_FAS_TIDE		| \
+					 DPAA2_FAS_PIEE		| \
+					 DPAA2_FAS_FLE		| \
+					 DPAA2_FAS_FPE		| \
+					 DPAA2_FAS_PTE		| \
+					 DPAA2_FAS_ISP		| \
+					 DPAA2_FAS_PHE		| \
+					 DPAA2_FAS_BLE		| \
+					 DPAA2_FAS_L3CE		| \
+					 DPAA2_FAS_L4CE)
+
+/* Time in milliseconds between link state updates */
+#define DPAA2_ETH_LINK_STATE_REFRESH	1000
+
+/* Number of times to retry a frame enqueue before giving up.
+ * Value determined empirically, in order to minimize the number
+ * of frames dropped on Tx
+ */
+#define DPAA2_ETH_ENQUEUE_RETRIES	10
+
+/* Driver statistics, other than those in struct rtnl_link_stats64.
+ * These are usually collected per-CPU and aggregated by ethtool.
+ */
+struct dpaa2_eth_drv_stats {
+	__u64	tx_conf_frames;
+	__u64	tx_conf_bytes;
+	__u64	tx_sg_frames;
+	__u64	tx_sg_bytes;
+	__u64	tx_reallocs;
+	__u64	rx_sg_frames;
+	__u64	rx_sg_bytes;
+	/* Enqueues retried due to portal busy */
+	__u64	tx_portal_busy;
+};
+
+/* Per-FQ statistics */
+struct dpaa2_eth_fq_stats {
+	/* Number of frames received on this queue */
+	__u64 frames;
+};
+
+/* Per-channel statistics */
+struct dpaa2_eth_ch_stats {
+	/* Volatile dequeues retried due to portal busy */
+	__u64 dequeue_portal_busy;
+	/* Pull errors */
+	__u64 pull_err;
+	/* Number of CDANs; useful to estimate avg NAPI len */
+	__u64 cdan;
+	/* XDP counters */
+	__u64 xdp_drop;
+	__u64 xdp_tx;
+	__u64 xdp_tx_err;
+	__u64 xdp_redirect;
+};
+
+/* Maximum number of queues associated with a DPNI */
+#define DPAA2_ETH_MAX_TCS		8
+#define DPAA2_ETH_MAX_RX_QUEUES		16
+#define DPAA2_ETH_MAX_TX_QUEUES		16
+#define DPAA2_ETH_MAX_QUEUES		(DPAA2_ETH_MAX_RX_QUEUES + \
+					DPAA2_ETH_MAX_TX_QUEUES)
+#define DPAA2_ETH_MAX_NETDEV_QUEUES	\
+	(DPAA2_ETH_MAX_TX_QUEUES * DPAA2_ETH_MAX_TCS)
+
+#define DPAA2_ETH_MAX_DPCONS		16
+
+enum dpaa2_eth_fq_type {
+	DPAA2_RX_FQ = 0,
+	DPAA2_TX_CONF_FQ,
+};
+
+struct dpaa2_eth_priv;
+
+struct dpaa2_eth_fq {
+	u32 fqid;
+	u32 tx_qdbin;
+	u32 tx_fqid[DPAA2_ETH_MAX_TCS];
+	u16 flowid;
+	u8 tc;
+	int target_cpu;
+	u32 dq_frames;
+	u32 dq_bytes;
+	struct dpaa2_eth_channel *channel;
+	enum dpaa2_eth_fq_type type;
+
+	void (*consume)(struct dpaa2_eth_priv *priv,
+			struct dpaa2_eth_channel *ch,
+			const struct dpaa2_fd *fd,
+			struct dpaa2_eth_fq *fq);
+	struct dpaa2_eth_fq_stats stats;
+};
+
+struct dpaa2_eth_ch_xdp {
+	struct bpf_prog *prog;
+	u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD];
+	int drop_cnt;
+	unsigned int res;
+};
+
+struct dpaa2_eth_channel {
+	struct dpaa2_io_notification_ctx nctx;
+	struct fsl_mc_device *dpcon;
+	int dpcon_id;
+	int ch_id;
+	struct napi_struct napi;
+	struct dpaa2_io *dpio;
+	struct dpaa2_io_store *store;
+	struct dpaa2_eth_priv *priv;
+	int buf_count;
+	struct dpaa2_eth_ch_stats stats;
+	struct dpaa2_eth_ch_xdp xdp;
+	struct xdp_rxq_info xdp_rxq;
+	struct list_head *rx_list;
+};
+
+struct dpaa2_eth_dist_fields {
+	u64 rxnfc_field;
+	enum net_prot cls_prot;
+	int cls_field;
+	int size;
+	u64 id;
+};
+
+struct dpaa2_eth_cls_rule {
+	struct ethtool_rx_flow_spec fs;
+	u8 in_use;
+};
+
+/* Driver private data */
+struct dpaa2_eth_priv {
+	struct net_device *net_dev;
+
+	u8 num_fqs;
+	struct dpaa2_eth_fq fq[DPAA2_ETH_MAX_QUEUES];
+	int (*enqueue)(struct dpaa2_eth_priv *priv,
+		       struct dpaa2_eth_fq *fq,
+		       struct dpaa2_fd *fd, u8 prio);
+
+	u8 num_channels;
+	struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS];
+
+	struct dpni_attr dpni_attrs;
+	u16 dpni_ver_major;
+	u16 dpni_ver_minor;
+	u16 tx_data_offset;
+
+	struct fsl_mc_device *dpbp_dev;
+	u16 bpid;
+	struct iommu_domain *iommu_domain;
+
+	bool tx_tstamp; /* Tx timestamping enabled */
+	bool rx_tstamp; /* Rx timestamping enabled */
+
+	u16 tx_qdid;
+	struct fsl_mc_io *mc_io;
+	/* Cores which have an affine DPIO/DPCON.
+	 * This is the cpu set on which Rx and Tx conf frames are processed
+	 */
+	struct cpumask dpio_cpumask;
+
+	/* Standard statistics */
+	struct rtnl_link_stats64 __percpu *percpu_stats;
+	/* Extra stats, in addition to the ones known by the kernel */
+	struct dpaa2_eth_drv_stats __percpu *percpu_extras;
+
+	u16 mc_token;
+	u8 rx_td_enabled;
+
+	struct dpni_link_state link_state;
+	bool do_link_poll;
+	struct task_struct *poll_thread;
+
+	/* enabled ethtool hashing bits */
+	u64 rx_hash_fields;
+	u64 rx_cls_fields;
+	struct dpaa2_eth_cls_rule *cls_rules;
+	u8 rx_cls_enabled;
+	struct bpf_prog *xdp_prog;
+#ifdef CONFIG_DEBUG_FS
+	struct dpaa2_debugfs dbg;
+#endif
+};
+
+#define DPAA2_RXH_SUPPORTED	(RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
+				| RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 \
+				| RXH_L4_B_2_3)
+
+/* default Rx hash options, set during probing */
+#define DPAA2_RXH_DEFAULT	(RXH_L3_PROTO | RXH_IP_SRC | RXH_IP_DST | \
+				 RXH_L4_B_0_1 | RXH_L4_B_2_3)
+
+#define dpaa2_eth_hash_enabled(priv)	\
+	((priv)->dpni_attrs.num_queues > 1)
+
+/* Required by struct dpni_rx_tc_dist_cfg::key_cfg_iova */
+#define DPAA2_CLASSIFIER_DMA_SIZE 256
+
+extern const struct ethtool_ops dpaa2_ethtool_ops;
+extern int dpaa2_phc_index;
+
+static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv,
+					 u16 ver_major, u16 ver_minor)
+{
+	if (priv->dpni_ver_major == ver_major)
+		return priv->dpni_ver_minor - ver_minor;
+	return priv->dpni_ver_major - ver_major;
+}
+
+/* Minimum firmware version that supports a more flexible API
+ * for configuring the Rx flow hash key
+ */
+#define DPNI_RX_DIST_KEY_VER_MAJOR	7
+#define DPNI_RX_DIST_KEY_VER_MINOR	5
+
+#define dpaa2_eth_has_legacy_dist(priv)					\
+	(dpaa2_eth_cmp_dpni_ver((priv), DPNI_RX_DIST_KEY_VER_MAJOR,	\
+				DPNI_RX_DIST_KEY_VER_MINOR) < 0)
+
+#define dpaa2_eth_fs_enabled(priv)	\
+	(!((priv)->dpni_attrs.options & DPNI_OPT_NO_FS))
+
+#define dpaa2_eth_fs_mask_enabled(priv)	\
+	((priv)->dpni_attrs.options & DPNI_OPT_HAS_KEY_MASKING)
+
+#define dpaa2_eth_fs_count(priv)        \
+	((priv)->dpni_attrs.fs_entries)
+
+#define dpaa2_eth_tc_count(priv)	\
+	((priv)->dpni_attrs.num_tcs)
+
+/* We have exactly one {Rx, Tx conf} queue per channel */
+#define dpaa2_eth_queue_count(priv)     \
+	((priv)->num_channels)
+
+enum dpaa2_eth_rx_dist {
+	DPAA2_ETH_RX_DIST_HASH,
+	DPAA2_ETH_RX_DIST_CLS
+};
+
+/* Unique IDs for the supported Rx classification header fields */
+#define DPAA2_ETH_DIST_ETHDST		BIT(0)
+#define DPAA2_ETH_DIST_ETHSRC		BIT(1)
+#define DPAA2_ETH_DIST_ETHTYPE		BIT(2)
+#define DPAA2_ETH_DIST_VLAN		BIT(3)
+#define DPAA2_ETH_DIST_IPSRC		BIT(4)
+#define DPAA2_ETH_DIST_IPDST		BIT(5)
+#define DPAA2_ETH_DIST_IPPROTO		BIT(6)
+#define DPAA2_ETH_DIST_L4SRC		BIT(7)
+#define DPAA2_ETH_DIST_L4DST		BIT(8)
+#define DPAA2_ETH_DIST_ALL		(~0ULL)
+
+#define DPNI_PAUSE_VER_MAJOR		7
+#define DPNI_PAUSE_VER_MINOR		13
+#define dpaa2_eth_has_pause_support(priv)			\
+	(dpaa2_eth_cmp_dpni_ver((priv), DPNI_PAUSE_VER_MAJOR,	\
+				DPNI_PAUSE_VER_MINOR) >= 0)
+
+static inline
+unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv,
+				       struct sk_buff *skb)
+{
+	unsigned int headroom = DPAA2_ETH_SWA_SIZE;
+
+	/* If we don't have an skb (e.g. XDP buffer), we only need space for
+	 * the software annotation area
+	 */
+	if (!skb)
+		return headroom;
+
+	/* For non-linear skbs we have no headroom requirement, as we build a
+	 * SG frame with a newly allocated SGT buffer
+	 */
+	if (skb_is_nonlinear(skb))
+		return 0;
+
+	/* If we have Tx timestamping, need 128B hardware annotation */
+	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+		headroom += DPAA2_ETH_TX_HWA_SIZE;
+
+	return headroom;
+}
+
+/* Extra headroom space requested to hardware, in order to make sure there's
+ * no realloc'ing in forwarding scenarios
+ */
+static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv)
+{
+	return priv->tx_data_offset - DPAA2_ETH_RX_HWA_SIZE;
+}
+
+int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags);
+int dpaa2_eth_set_cls(struct net_device *net_dev, u64 key);
+int dpaa2_eth_cls_key_size(u64 key);
+int dpaa2_eth_cls_fld_off(int prot, int field);
+void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields);
+
+#endif	/* __DPAA2_H */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
new file mode 100644
index 0000000..0aa1c34
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c

@@ -0,0 +1,739 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ */
+
+#include <linux/net_tstamp.h>
+#include <linux/nospec.h>
+
+#include "dpni.h"	/* DPNI_LINK_OPT_* */
+#include "dpaa2-eth.h"
+
+/* To be kept in sync with DPNI statistics */
+static char dpaa2_ethtool_stats[][ETH_GSTRING_LEN] = {
+	"[hw] rx frames",
+	"[hw] rx bytes",
+	"[hw] rx mcast frames",
+	"[hw] rx mcast bytes",
+	"[hw] rx bcast frames",
+	"[hw] rx bcast bytes",
+	"[hw] tx frames",
+	"[hw] tx bytes",
+	"[hw] tx mcast frames",
+	"[hw] tx mcast bytes",
+	"[hw] tx bcast frames",
+	"[hw] tx bcast bytes",
+	"[hw] rx filtered frames",
+	"[hw] rx discarded frames",
+	"[hw] rx nobuffer discards",
+	"[hw] tx discarded frames",
+	"[hw] tx confirmed frames",
+	"[hw] tx dequeued bytes",
+	"[hw] tx dequeued frames",
+	"[hw] tx rejected bytes",
+	"[hw] tx rejected frames",
+	"[hw] tx pending frames",
+};
+
+#define DPAA2_ETH_NUM_STATS	ARRAY_SIZE(dpaa2_ethtool_stats)
+
+static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
+	/* per-cpu stats */
+	"[drv] tx conf frames",
+	"[drv] tx conf bytes",
+	"[drv] tx sg frames",
+	"[drv] tx sg bytes",
+	"[drv] tx realloc frames",
+	"[drv] rx sg frames",
+	"[drv] rx sg bytes",
+	"[drv] enqueue portal busy",
+	/* Channel stats */
+	"[drv] dequeue portal busy",
+	"[drv] channel pull errors",
+	"[drv] cdan",
+	"[drv] xdp drop",
+	"[drv] xdp tx",
+	"[drv] xdp tx errors",
+	"[drv] xdp redirect",
+	/* FQ stats */
+	"[qbman] rx pending frames",
+	"[qbman] rx pending bytes",
+	"[qbman] tx conf pending frames",
+	"[qbman] tx conf pending bytes",
+	"[qbman] buffer count",
+};
+
+#define DPAA2_ETH_NUM_EXTRA_STATS	ARRAY_SIZE(dpaa2_ethtool_extras)
+
+static void dpaa2_eth_get_drvinfo(struct net_device *net_dev,
+				  struct ethtool_drvinfo *drvinfo)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%u.%u", priv->dpni_ver_major, priv->dpni_ver_minor);
+
+	strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+		sizeof(drvinfo->bus_info));
+}
+
+static int
+dpaa2_eth_get_link_ksettings(struct net_device *net_dev,
+			     struct ethtool_link_ksettings *link_settings)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	link_settings->base.autoneg = AUTONEG_DISABLE;
+	if (!(priv->link_state.options & DPNI_LINK_OPT_HALF_DUPLEX))
+		link_settings->base.duplex = DUPLEX_FULL;
+	link_settings->base.speed = priv->link_state.rate;
+
+	return 0;
+}
+
+static void dpaa2_eth_get_pauseparam(struct net_device *net_dev,
+				     struct ethtool_pauseparam *pause)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	u64 link_options = priv->link_state.options;
+
+	pause->rx_pause = !!(link_options & DPNI_LINK_OPT_PAUSE);
+	pause->tx_pause = pause->rx_pause ^
+			  !!(link_options & DPNI_LINK_OPT_ASYM_PAUSE);
+	pause->autoneg = AUTONEG_DISABLE;
+}
+
+static int dpaa2_eth_set_pauseparam(struct net_device *net_dev,
+				    struct ethtool_pauseparam *pause)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpni_link_cfg cfg = {0};
+	int err;
+
+	if (!dpaa2_eth_has_pause_support(priv)) {
+		netdev_info(net_dev, "No pause frame support for DPNI version < %d.%d\n",
+			    DPNI_PAUSE_VER_MAJOR, DPNI_PAUSE_VER_MINOR);
+		return -EOPNOTSUPP;
+	}
+
+	if (pause->autoneg)
+		return -EOPNOTSUPP;
+
+	cfg.rate = priv->link_state.rate;
+	cfg.options = priv->link_state.options;
+	if (pause->rx_pause)
+		cfg.options |= DPNI_LINK_OPT_PAUSE;
+	else
+		cfg.options &= ~DPNI_LINK_OPT_PAUSE;
+	if (!!pause->rx_pause ^ !!pause->tx_pause)
+		cfg.options |= DPNI_LINK_OPT_ASYM_PAUSE;
+	else
+		cfg.options &= ~DPNI_LINK_OPT_ASYM_PAUSE;
+
+	if (cfg.options == priv->link_state.options)
+		return 0;
+
+	err = dpni_set_link_cfg(priv->mc_io, 0, priv->mc_token, &cfg);
+	if (err) {
+		netdev_err(net_dev, "dpni_set_link_state failed\n");
+		return err;
+	}
+
+	priv->link_state.options = cfg.options;
+
+	return 0;
+}
+
+static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset,
+				  u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < DPAA2_ETH_NUM_STATS; i++) {
+			strlcpy(p, dpaa2_ethtool_stats[i], ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < DPAA2_ETH_NUM_EXTRA_STATS; i++) {
+			strlcpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	}
+}
+
+static int dpaa2_eth_get_sset_count(struct net_device *net_dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS: /* ethtool_get_stats(), ethtool_get_drvinfo() */
+		return DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/** Fill in hardware counters, as returned by MC.
+ */
+static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
+					struct ethtool_stats *stats,
+					u64 *data)
+{
+	int i = 0;
+	int j, k, err;
+	int num_cnt;
+	union dpni_statistics dpni_stats;
+	u32 fcnt, bcnt;
+	u32 fcnt_rx_total = 0, fcnt_tx_total = 0;
+	u32 bcnt_rx_total = 0, bcnt_tx_total = 0;
+	u32 buf_cnt;
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpaa2_eth_drv_stats *extras;
+	struct dpaa2_eth_ch_stats *ch_stats;
+	int dpni_stats_page_size[DPNI_STATISTICS_CNT] = {
+		sizeof(dpni_stats.page_0),
+		sizeof(dpni_stats.page_1),
+		sizeof(dpni_stats.page_2),
+		sizeof(dpni_stats.page_3),
+		sizeof(dpni_stats.page_4),
+		sizeof(dpni_stats.page_5),
+		sizeof(dpni_stats.page_6),
+	};
+
+	memset(data, 0,
+	       sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
+
+	/* Print standard counters, from DPNI statistics */
+	for (j = 0; j <= 6; j++) {
+		/* We're not interested in pages 4 & 5 for now */
+		if (j == 4 || j == 5)
+			continue;
+		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token,
+					  j, &dpni_stats);
+		if (err == -EINVAL)
+			/* Older firmware versions don't support all pages */
+			memset(&dpni_stats, 0, sizeof(dpni_stats));
+		else
+			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
+
+		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
+		for (k = 0; k < num_cnt; k++)
+			*(data + i++) = dpni_stats.raw.counter[k];
+	}
+
+	/* Print per-cpu extra stats */
+	for_each_online_cpu(k) {
+		extras = per_cpu_ptr(priv->percpu_extras, k);
+		for (j = 0; j < sizeof(*extras) / sizeof(__u64); j++)
+			*((__u64 *)data + i + j) += *((__u64 *)extras + j);
+	}
+	i += j;
+
+	/* Per-channel stats */
+	for (k = 0; k < priv->num_channels; k++) {
+		ch_stats = &priv->channel[k]->stats;
+		for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64); j++)
+			*((__u64 *)data + i + j) += *((__u64 *)ch_stats + j);
+	}
+	i += j;
+
+	for (j = 0; j < priv->num_fqs; j++) {
+		/* Print FQ instantaneous counts */
+		err = dpaa2_io_query_fq_count(NULL, priv->fq[j].fqid,
+					      &fcnt, &bcnt);
+		if (err) {
+			netdev_warn(net_dev, "FQ query error %d", err);
+			return;
+		}
+
+		if (priv->fq[j].type == DPAA2_TX_CONF_FQ) {
+			fcnt_tx_total += fcnt;
+			bcnt_tx_total += bcnt;
+		} else {
+			fcnt_rx_total += fcnt;
+			bcnt_rx_total += bcnt;
+		}
+	}
+
+	*(data + i++) = fcnt_rx_total;
+	*(data + i++) = bcnt_rx_total;
+	*(data + i++) = fcnt_tx_total;
+	*(data + i++) = bcnt_tx_total;
+
+	err = dpaa2_io_query_bp_count(NULL, priv->bpid, &buf_cnt);
+	if (err) {
+		netdev_warn(net_dev, "Buffer count query error %d\n", err);
+		return;
+	}
+	*(data + i++) = buf_cnt;
+}
+
+static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
+			 void *key, void *mask, u64 *fields)
+{
+	int off;
+
+	if (eth_mask->h_proto) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
+		*(__be16 *)(key + off) = eth_value->h_proto;
+		*(__be16 *)(mask + off) = eth_mask->h_proto;
+		*fields |= DPAA2_ETH_DIST_ETHTYPE;
+	}
+
+	if (!is_zero_ether_addr(eth_mask->h_source)) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_SA);
+		ether_addr_copy(key + off, eth_value->h_source);
+		ether_addr_copy(mask + off, eth_mask->h_source);
+		*fields |= DPAA2_ETH_DIST_ETHSRC;
+	}
+
+	if (!is_zero_ether_addr(eth_mask->h_dest)) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_DA);
+		ether_addr_copy(key + off, eth_value->h_dest);
+		ether_addr_copy(mask + off, eth_mask->h_dest);
+		*fields |= DPAA2_ETH_DIST_ETHDST;
+	}
+
+	return 0;
+}
+
+static int prep_uip_rule(struct ethtool_usrip4_spec *uip_value,
+			 struct ethtool_usrip4_spec *uip_mask,
+			 void *key, void *mask, u64 *fields)
+{
+	int off;
+	u32 tmp_value, tmp_mask;
+
+	if (uip_mask->tos || uip_mask->ip_ver)
+		return -EOPNOTSUPP;
+
+	if (uip_mask->ip4src) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_SRC);
+		*(__be32 *)(key + off) = uip_value->ip4src;
+		*(__be32 *)(mask + off) = uip_mask->ip4src;
+		*fields |= DPAA2_ETH_DIST_IPSRC;
+	}
+
+	if (uip_mask->ip4dst) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_DST);
+		*(__be32 *)(key + off) = uip_value->ip4dst;
+		*(__be32 *)(mask + off) = uip_mask->ip4dst;
+		*fields |= DPAA2_ETH_DIST_IPDST;
+	}
+
+	if (uip_mask->proto) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_PROTO);
+		*(u8 *)(key + off) = uip_value->proto;
+		*(u8 *)(mask + off) = uip_mask->proto;
+		*fields |= DPAA2_ETH_DIST_IPPROTO;
+	}
+
+	if (uip_mask->l4_4_bytes) {
+		tmp_value = be32_to_cpu(uip_value->l4_4_bytes);
+		tmp_mask = be32_to_cpu(uip_mask->l4_4_bytes);
+
+		off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_SRC);
+		*(__be16 *)(key + off) = htons(tmp_value >> 16);
+		*(__be16 *)(mask + off) = htons(tmp_mask >> 16);
+		*fields |= DPAA2_ETH_DIST_L4SRC;
+
+		off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_DST);
+		*(__be16 *)(key + off) = htons(tmp_value & 0xFFFF);
+		*(__be16 *)(mask + off) = htons(tmp_mask & 0xFFFF);
+		*fields |= DPAA2_ETH_DIST_L4DST;
+	}
+
+	/* Only apply the rule for IPv4 frames */
+	off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
+	*(__be16 *)(key + off) = htons(ETH_P_IP);
+	*(__be16 *)(mask + off) = htons(0xFFFF);
+	*fields |= DPAA2_ETH_DIST_ETHTYPE;
+
+	return 0;
+}
+
+static int prep_l4_rule(struct ethtool_tcpip4_spec *l4_value,
+			struct ethtool_tcpip4_spec *l4_mask,
+			void *key, void *mask, u8 l4_proto, u64 *fields)
+{
+	int off;
+
+	if (l4_mask->tos)
+		return -EOPNOTSUPP;
+
+	if (l4_mask->ip4src) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_SRC);
+		*(__be32 *)(key + off) = l4_value->ip4src;
+		*(__be32 *)(mask + off) = l4_mask->ip4src;
+		*fields |= DPAA2_ETH_DIST_IPSRC;
+	}
+
+	if (l4_mask->ip4dst) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_DST);
+		*(__be32 *)(key + off) = l4_value->ip4dst;
+		*(__be32 *)(mask + off) = l4_mask->ip4dst;
+		*fields |= DPAA2_ETH_DIST_IPDST;
+	}
+
+	if (l4_mask->psrc) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_SRC);
+		*(__be16 *)(key + off) = l4_value->psrc;
+		*(__be16 *)(mask + off) = l4_mask->psrc;
+		*fields |= DPAA2_ETH_DIST_L4SRC;
+	}
+
+	if (l4_mask->pdst) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_UDP, NH_FLD_UDP_PORT_DST);
+		*(__be16 *)(key + off) = l4_value->pdst;
+		*(__be16 *)(mask + off) = l4_mask->pdst;
+		*fields |= DPAA2_ETH_DIST_L4DST;
+	}
+
+	/* Only apply the rule for IPv4 frames with the specified L4 proto */
+	off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_TYPE);
+	*(__be16 *)(key + off) = htons(ETH_P_IP);
+	*(__be16 *)(mask + off) = htons(0xFFFF);
+	*fields |= DPAA2_ETH_DIST_ETHTYPE;
+
+	off = dpaa2_eth_cls_fld_off(NET_PROT_IP, NH_FLD_IP_PROTO);
+	*(u8 *)(key + off) = l4_proto;
+	*(u8 *)(mask + off) = 0xFF;
+	*fields |= DPAA2_ETH_DIST_IPPROTO;
+
+	return 0;
+}
+
+static int prep_ext_rule(struct ethtool_flow_ext *ext_value,
+			 struct ethtool_flow_ext *ext_mask,
+			 void *key, void *mask, u64 *fields)
+{
+	int off;
+
+	if (ext_mask->vlan_etype)
+		return -EOPNOTSUPP;
+
+	if (ext_mask->vlan_tci) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_VLAN, NH_FLD_VLAN_TCI);
+		*(__be16 *)(key + off) = ext_value->vlan_tci;
+		*(__be16 *)(mask + off) = ext_mask->vlan_tci;
+		*fields |= DPAA2_ETH_DIST_VLAN;
+	}
+
+	return 0;
+}
+
+static int prep_mac_ext_rule(struct ethtool_flow_ext *ext_value,
+			     struct ethtool_flow_ext *ext_mask,
+			     void *key, void *mask, u64 *fields)
+{
+	int off;
+
+	if (!is_zero_ether_addr(ext_mask->h_dest)) {
+		off = dpaa2_eth_cls_fld_off(NET_PROT_ETH, NH_FLD_ETH_DA);
+		ether_addr_copy(key + off, ext_value->h_dest);
+		ether_addr_copy(mask + off, ext_mask->h_dest);
+		*fields |= DPAA2_ETH_DIST_ETHDST;
+	}
+
+	return 0;
+}
+
+static int prep_cls_rule(struct ethtool_rx_flow_spec *fs, void *key, void *mask,
+			 u64 *fields)
+{
+	int err;
+
+	switch (fs->flow_type & 0xFF) {
+	case ETHER_FLOW:
+		err = prep_eth_rule(&fs->h_u.ether_spec, &fs->m_u.ether_spec,
+				    key, mask, fields);
+		break;
+	case IP_USER_FLOW:
+		err = prep_uip_rule(&fs->h_u.usr_ip4_spec,
+				    &fs->m_u.usr_ip4_spec, key, mask, fields);
+		break;
+	case TCP_V4_FLOW:
+		err = prep_l4_rule(&fs->h_u.tcp_ip4_spec, &fs->m_u.tcp_ip4_spec,
+				   key, mask, IPPROTO_TCP, fields);
+		break;
+	case UDP_V4_FLOW:
+		err = prep_l4_rule(&fs->h_u.udp_ip4_spec, &fs->m_u.udp_ip4_spec,
+				   key, mask, IPPROTO_UDP, fields);
+		break;
+	case SCTP_V4_FLOW:
+		err = prep_l4_rule(&fs->h_u.sctp_ip4_spec,
+				   &fs->m_u.sctp_ip4_spec, key, mask,
+				   IPPROTO_SCTP, fields);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (err)
+		return err;
+
+	if (fs->flow_type & FLOW_EXT) {
+		err = prep_ext_rule(&fs->h_ext, &fs->m_ext, key, mask, fields);
+		if (err)
+			return err;
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		err = prep_mac_ext_rule(&fs->h_ext, &fs->m_ext, key, mask,
+					fields);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int do_cls_rule(struct net_device *net_dev,
+		       struct ethtool_rx_flow_spec *fs,
+		       bool add)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct device *dev = net_dev->dev.parent;
+	struct dpni_rule_cfg rule_cfg = { 0 };
+	struct dpni_fs_action_cfg fs_act = { 0 };
+	dma_addr_t key_iova;
+	u64 fields = 0;
+	void *key_buf;
+	int err;
+
+	if (fs->ring_cookie != RX_CLS_FLOW_DISC &&
+	    fs->ring_cookie >= dpaa2_eth_queue_count(priv))
+		return -EINVAL;
+
+	rule_cfg.key_size = dpaa2_eth_cls_key_size(DPAA2_ETH_DIST_ALL);
+
+	/* allocate twice the key size, for the actual key and for mask */
+	key_buf = kzalloc(rule_cfg.key_size * 2, GFP_KERNEL);
+	if (!key_buf)
+		return -ENOMEM;
+
+	/* Fill the key and mask memory areas */
+	err = prep_cls_rule(fs, key_buf, key_buf + rule_cfg.key_size, &fields);
+	if (err)
+		goto free_mem;
+
+	if (!dpaa2_eth_fs_mask_enabled(priv)) {
+		/* Masking allows us to configure a maximal key during init and
+		 * use it for all flow steering rules. Without it, we include
+		 * in the key only the fields actually used, so we need to
+		 * extract the others from the final key buffer.
+		 *
+		 * Program the FS key if needed, or return error if previously
+		 * set key can't be used for the current rule. User needs to
+		 * delete existing rules in this case to allow for the new one.
+		 */
+		if (!priv->rx_cls_fields) {
+			err = dpaa2_eth_set_cls(net_dev, fields);
+			if (err)
+				goto free_mem;
+
+			priv->rx_cls_fields = fields;
+		} else if (priv->rx_cls_fields != fields) {
+			netdev_err(net_dev, "No support for multiple FS keys, need to delete existing rules\n");
+			err = -EOPNOTSUPP;
+			goto free_mem;
+		}
+
+		dpaa2_eth_cls_trim_rule(key_buf, fields);
+		rule_cfg.key_size = dpaa2_eth_cls_key_size(fields);
+	}
+
+	key_iova = dma_map_single(dev, key_buf, rule_cfg.key_size * 2,
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, key_iova)) {
+		err = -ENOMEM;
+		goto free_mem;
+	}
+
+	rule_cfg.key_iova = key_iova;
+	if (dpaa2_eth_fs_mask_enabled(priv))
+		rule_cfg.mask_iova = key_iova + rule_cfg.key_size;
+
+	if (add) {
+		if (fs->ring_cookie == RX_CLS_FLOW_DISC)
+			fs_act.options |= DPNI_FS_OPT_DISCARD;
+		else
+			fs_act.flow_id = fs->ring_cookie;
+		err = dpni_add_fs_entry(priv->mc_io, 0, priv->mc_token, 0,
+					fs->location, &rule_cfg, &fs_act);
+	} else {
+		err = dpni_remove_fs_entry(priv->mc_io, 0, priv->mc_token, 0,
+					   &rule_cfg);
+	}
+
+	dma_unmap_single(dev, key_iova, rule_cfg.key_size * 2, DMA_TO_DEVICE);
+
+free_mem:
+	kfree(key_buf);
+
+	return err;
+}
+
+static int num_rules(struct dpaa2_eth_priv *priv)
+{
+	int i, rules = 0;
+
+	for (i = 0; i < dpaa2_eth_fs_count(priv); i++)
+		if (priv->cls_rules[i].in_use)
+			rules++;
+
+	return rules;
+}
+
+static int update_cls_rule(struct net_device *net_dev,
+			   struct ethtool_rx_flow_spec *new_fs,
+			   int location)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpaa2_eth_cls_rule *rule;
+	int err = -EINVAL;
+
+	if (!priv->rx_cls_enabled)
+		return -EOPNOTSUPP;
+
+	if (location >= dpaa2_eth_fs_count(priv))
+		return -EINVAL;
+
+	rule = &priv->cls_rules[location];
+
+	/* If a rule is present at the specified location, delete it. */
+	if (rule->in_use) {
+		err = do_cls_rule(net_dev, &rule->fs, false);
+		if (err)
+			return err;
+
+		rule->in_use = 0;
+
+		if (!dpaa2_eth_fs_mask_enabled(priv) && !num_rules(priv))
+			priv->rx_cls_fields = 0;
+	}
+
+	/* If no new entry to add, return here */
+	if (!new_fs)
+		return err;
+
+	err = do_cls_rule(net_dev, new_fs, true);
+	if (err)
+		return err;
+
+	rule->in_use = 1;
+	rule->fs = *new_fs;
+
+	return 0;
+}
+
+static int dpaa2_eth_get_rxnfc(struct net_device *net_dev,
+			       struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int max_rules = dpaa2_eth_fs_count(priv);
+	int i, j = 0;
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_GRXFH:
+		/* we purposely ignore cmd->flow_type for now, because the
+		 * classifier only supports a single set of fields for all
+		 * protocols
+		 */
+		rxnfc->data = priv->rx_hash_fields;
+		break;
+	case ETHTOOL_GRXRINGS:
+		rxnfc->data = dpaa2_eth_queue_count(priv);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		rxnfc->rule_cnt = 0;
+		rxnfc->rule_cnt = num_rules(priv);
+		rxnfc->data = max_rules;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		if (rxnfc->fs.location >= max_rules)
+			return -EINVAL;
+		rxnfc->fs.location = array_index_nospec(rxnfc->fs.location,
+							max_rules);
+		if (!priv->cls_rules[rxnfc->fs.location].in_use)
+			return -EINVAL;
+		rxnfc->fs = priv->cls_rules[rxnfc->fs.location].fs;
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		for (i = 0; i < max_rules; i++) {
+			if (!priv->cls_rules[i].in_use)
+				continue;
+			if (j == rxnfc->rule_cnt)
+				return -EMSGSIZE;
+			rule_locs[j++] = i;
+		}
+		rxnfc->rule_cnt = j;
+		rxnfc->data = max_rules;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int dpaa2_eth_set_rxnfc(struct net_device *net_dev,
+			       struct ethtool_rxnfc *rxnfc)
+{
+	int err = 0;
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_SRXFH:
+		if ((rxnfc->data & DPAA2_RXH_SUPPORTED) != rxnfc->data)
+			return -EOPNOTSUPP;
+		err = dpaa2_eth_set_hash(net_dev, rxnfc->data);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		err = update_cls_rule(net_dev, &rxnfc->fs, rxnfc->fs.location);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = update_cls_rule(net_dev, NULL, rxnfc->fs.location);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+int dpaa2_phc_index = -1;
+EXPORT_SYMBOL(dpaa2_phc_index);
+
+static int dpaa2_eth_get_ts_info(struct net_device *dev,
+				 struct ethtool_ts_info *info)
+{
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->phc_index = dpaa2_phc_index;
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+			 (1 << HWTSTAMP_TX_ON);
+
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_ALL);
+	return 0;
+}
+
+const struct ethtool_ops dpaa2_ethtool_ops = {
+	.get_drvinfo = dpaa2_eth_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = dpaa2_eth_get_link_ksettings,
+	.get_pauseparam = dpaa2_eth_get_pauseparam,
+	.set_pauseparam = dpaa2_eth_set_pauseparam,
+	.get_sset_count = dpaa2_eth_get_sset_count,
+	.get_ethtool_stats = dpaa2_eth_get_ethtool_stats,
+	.get_strings = dpaa2_eth_get_strings,
+	.get_rxnfc = dpaa2_eth_get_rxnfc,
+	.set_rxnfc = dpaa2_eth_set_rxnfc,
+	.get_ts_info = dpaa2_eth_get_ts_info,
+};

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
new file mode 100644
index 0000000..a9503ae
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c

@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2018 NXP
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/msi.h>
+#include <linux/fsl/mc.h>
+#include <linux/fsl/ptp_qoriq.h>
+
+#include "dpaa2-ptp.h"
+
+static int dpaa2_ptp_enable(struct ptp_clock_info *ptp,
+			    struct ptp_clock_request *rq, int on)
+{
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+	struct fsl_mc_device *mc_dev;
+	struct device *dev;
+	u32 mask = 0;
+	u32 bit;
+	int err;
+
+	dev = ptp_qoriq->dev;
+	mc_dev = to_fsl_mc_device(dev);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PPS:
+		bit = DPRTC_EVENT_PPS;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = dprtc_get_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				 DPRTC_IRQ_INDEX, &mask);
+	if (err < 0) {
+		dev_err(dev, "dprtc_get_irq_mask(): %d\n", err);
+		return err;
+	}
+
+	if (on)
+		mask |= bit;
+	else
+		mask &= ~bit;
+
+	err = dprtc_set_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				 DPRTC_IRQ_INDEX, mask);
+	if (err < 0) {
+		dev_err(dev, "dprtc_set_irq_mask(): %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct ptp_clock_info dpaa2_ptp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "DPAA2 PTP Clock",
+	.max_adj	= 512000,
+	.n_alarm	= 2,
+	.n_ext_ts	= 2,
+	.n_per_out	= 3,
+	.n_pins		= 0,
+	.pps		= 1,
+	.adjfine	= ptp_qoriq_adjfine,
+	.adjtime	= ptp_qoriq_adjtime,
+	.gettime64	= ptp_qoriq_gettime,
+	.settime64	= ptp_qoriq_settime,
+	.enable		= dpaa2_ptp_enable,
+};
+
+static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv)
+{
+	struct ptp_qoriq *ptp_qoriq = priv;
+	struct ptp_clock_event event;
+	struct fsl_mc_device *mc_dev;
+	struct device *dev;
+	u32 status = 0;
+	int err;
+
+	dev = ptp_qoriq->dev;
+	mc_dev = to_fsl_mc_device(dev);
+
+	err = dprtc_get_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				   DPRTC_IRQ_INDEX, &status);
+	if (unlikely(err)) {
+		dev_err(dev, "dprtc_get_irq_status err %d\n", err);
+		return IRQ_NONE;
+	}
+
+	if (status & DPRTC_EVENT_PPS) {
+		event.type = PTP_CLOCK_PPS;
+		ptp_clock_event(ptp_qoriq->clock, &event);
+	}
+
+	err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				     DPRTC_IRQ_INDEX, status);
+	if (unlikely(err)) {
+		dev_err(dev, "dprtc_clear_irq_status err %d\n", err);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
+{
+	struct device *dev = &mc_dev->dev;
+	struct fsl_mc_device_irq *irq;
+	struct ptp_qoriq *ptp_qoriq;
+	struct device_node *node;
+	void __iomem *base;
+	int err;
+
+	ptp_qoriq = devm_kzalloc(dev, sizeof(*ptp_qoriq), GFP_KERNEL);
+	if (!ptp_qoriq)
+		return -ENOMEM;
+
+	err = fsl_mc_portal_allocate(mc_dev, 0, &mc_dev->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "fsl_mc_portal_allocate err %d\n", err);
+		goto err_exit;
+	}
+
+	err = dprtc_open(mc_dev->mc_io, 0, mc_dev->obj_desc.id,
+			 &mc_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dprtc_open err %d\n", err);
+		goto err_free_mcp;
+	}
+
+	ptp_qoriq->dev = dev;
+
+	node = of_find_compatible_node(NULL, NULL, "fsl,dpaa2-ptp");
+	if (!node) {
+		err = -ENODEV;
+		goto err_close;
+	}
+
+	dev->of_node = node;
+
+	base = of_iomap(node, 0);
+	if (!base) {
+		err = -ENOMEM;
+		goto err_close;
+	}
+
+	err = fsl_mc_allocate_irqs(mc_dev);
+	if (err) {
+		dev_err(dev, "MC irqs allocation failed\n");
+		goto err_unmap;
+	}
+
+	irq = mc_dev->irqs[0];
+	ptp_qoriq->irq = irq->msi_desc->irq;
+
+	err = devm_request_threaded_irq(dev, ptp_qoriq->irq, NULL,
+					dpaa2_ptp_irq_handler_thread,
+					IRQF_NO_SUSPEND | IRQF_ONESHOT,
+					dev_name(dev), ptp_qoriq);
+	if (err < 0) {
+		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
+		goto err_free_mc_irq;
+	}
+
+	err = dprtc_set_irq_enable(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				   DPRTC_IRQ_INDEX, 1);
+	if (err < 0) {
+		dev_err(dev, "dprtc_set_irq_enable(): %d\n", err);
+		goto err_free_mc_irq;
+	}
+
+	err = ptp_qoriq_init(ptp_qoriq, base, &dpaa2_ptp_caps);
+	if (err)
+		goto err_free_mc_irq;
+
+	dpaa2_phc_index = ptp_qoriq->phc_index;
+	dev_set_drvdata(dev, ptp_qoriq);
+
+	return 0;
+
+err_free_mc_irq:
+	fsl_mc_free_irqs(mc_dev);
+err_unmap:
+	iounmap(base);
+err_close:
+	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
+err_free_mcp:
+	fsl_mc_portal_free(mc_dev->mc_io);
+err_exit:
+	return err;
+}
+
+static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev)
+{
+	struct device *dev = &mc_dev->dev;
+	struct ptp_qoriq *ptp_qoriq;
+
+	ptp_qoriq = dev_get_drvdata(dev);
+
+	dpaa2_phc_index = -1;
+	ptp_qoriq_free(ptp_qoriq);
+
+	fsl_mc_free_irqs(mc_dev);
+	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
+	fsl_mc_portal_free(mc_dev->mc_io);
+
+	return 0;
+}
+
+static const struct fsl_mc_device_id dpaa2_ptp_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dprtc",
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(fslmc, dpaa2_ptp_match_id_table);
+
+static struct fsl_mc_driver dpaa2_ptp_drv = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = dpaa2_ptp_probe,
+	.remove = dpaa2_ptp_remove,
+	.match_id_table = dpaa2_ptp_match_id_table,
+};
+
+module_fsl_mc_driver(dpaa2_ptp_drv);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DPAA2 PTP Clock Driver");

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
new file mode 100644
index 0000000..df2458a
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h

@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018 NXP
+ */
+
+#ifndef __RTC_H
+#define __RTC_H
+
+#include "dprtc.h"
+#include "dprtc-cmd.h"
+
+extern int dpaa2_phc_index;
+
+#endif

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpkg.h b/drivers/net/ethernet/freescale/dpaa2/dpkg.h
new file mode 100644
index 0000000..6de613b
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpkg.h

@@ -0,0 +1,480 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2015 Freescale Semiconductor Inc.
+ */
+#ifndef __FSL_DPKG_H_
+#define __FSL_DPKG_H_
+
+#include <linux/types.h>
+
+/* Data Path Key Generator API
+ * Contains initialization APIs and runtime APIs for the Key Generator
+ */
+
+/** Key Generator properties */
+
+/**
+ * Number of masks per key extraction
+ */
+#define DPKG_NUM_OF_MASKS		4
+/**
+ * Number of extractions per key profile
+ */
+#define DPKG_MAX_NUM_OF_EXTRACTS	10
+
+/**
+ * enum dpkg_extract_from_hdr_type - Selecting extraction by header types
+ * @DPKG_FROM_HDR: Extract selected bytes from header, by offset
+ * @DPKG_FROM_FIELD: Extract selected bytes from header, by offset from field
+ * @DPKG_FULL_FIELD: Extract a full field
+ */
+enum dpkg_extract_from_hdr_type {
+	DPKG_FROM_HDR = 0,
+	DPKG_FROM_FIELD = 1,
+	DPKG_FULL_FIELD = 2
+};
+
+/**
+ * enum dpkg_extract_type - Enumeration for selecting extraction type
+ * @DPKG_EXTRACT_FROM_HDR: Extract from the header
+ * @DPKG_EXTRACT_FROM_DATA: Extract from data not in specific header
+ * @DPKG_EXTRACT_FROM_PARSE: Extract from parser-result;
+ *	e.g. can be used to extract header existence;
+ *	please refer to 'Parse Result definition' section in the parser BG
+ */
+enum dpkg_extract_type {
+	DPKG_EXTRACT_FROM_HDR = 0,
+	DPKG_EXTRACT_FROM_DATA = 1,
+	DPKG_EXTRACT_FROM_PARSE = 3
+};
+
+/**
+ * struct dpkg_mask - A structure for defining a single extraction mask
+ * @mask: Byte mask for the extracted content
+ * @offset: Offset within the extracted content
+ */
+struct dpkg_mask {
+	u8 mask;
+	u8 offset;
+};
+
+/* Protocol fields */
+
+/* Ethernet fields */
+#define NH_FLD_ETH_DA				BIT(0)
+#define NH_FLD_ETH_SA				BIT(1)
+#define NH_FLD_ETH_LENGTH			BIT(2)
+#define NH_FLD_ETH_TYPE				BIT(3)
+#define NH_FLD_ETH_FINAL_CKSUM			BIT(4)
+#define NH_FLD_ETH_PADDING			BIT(5)
+#define NH_FLD_ETH_ALL_FIELDS			(BIT(6) - 1)
+
+/* VLAN fields */
+#define NH_FLD_VLAN_VPRI			BIT(0)
+#define NH_FLD_VLAN_CFI				BIT(1)
+#define NH_FLD_VLAN_VID				BIT(2)
+#define NH_FLD_VLAN_LENGTH			BIT(3)
+#define NH_FLD_VLAN_TYPE			BIT(4)
+#define NH_FLD_VLAN_ALL_FIELDS			(BIT(5) - 1)
+
+#define NH_FLD_VLAN_TCI				(NH_FLD_VLAN_VPRI | \
+						 NH_FLD_VLAN_CFI | \
+						 NH_FLD_VLAN_VID)
+
+/* IP (generic) fields */
+#define NH_FLD_IP_VER				BIT(0)
+#define NH_FLD_IP_DSCP				BIT(2)
+#define NH_FLD_IP_ECN				BIT(3)
+#define NH_FLD_IP_PROTO				BIT(4)
+#define NH_FLD_IP_SRC				BIT(5)
+#define NH_FLD_IP_DST				BIT(6)
+#define NH_FLD_IP_TOS_TC			BIT(7)
+#define NH_FLD_IP_ID				BIT(8)
+#define NH_FLD_IP_ALL_FIELDS			(BIT(9) - 1)
+
+/* IPV4 fields */
+#define NH_FLD_IPV4_VER				BIT(0)
+#define NH_FLD_IPV4_HDR_LEN			BIT(1)
+#define NH_FLD_IPV4_TOS				BIT(2)
+#define NH_FLD_IPV4_TOTAL_LEN			BIT(3)
+#define NH_FLD_IPV4_ID				BIT(4)
+#define NH_FLD_IPV4_FLAG_D			BIT(5)
+#define NH_FLD_IPV4_FLAG_M			BIT(6)
+#define NH_FLD_IPV4_OFFSET			BIT(7)
+#define NH_FLD_IPV4_TTL				BIT(8)
+#define NH_FLD_IPV4_PROTO			BIT(9)
+#define NH_FLD_IPV4_CKSUM			BIT(10)
+#define NH_FLD_IPV4_SRC_IP			BIT(11)
+#define NH_FLD_IPV4_DST_IP			BIT(12)
+#define NH_FLD_IPV4_OPTS			BIT(13)
+#define NH_FLD_IPV4_OPTS_COUNT			BIT(14)
+#define NH_FLD_IPV4_ALL_FIELDS			(BIT(15) - 1)
+
+/* IPV6 fields */
+#define NH_FLD_IPV6_VER				BIT(0)
+#define NH_FLD_IPV6_TC				BIT(1)
+#define NH_FLD_IPV6_SRC_IP			BIT(2)
+#define NH_FLD_IPV6_DST_IP			BIT(3)
+#define NH_FLD_IPV6_NEXT_HDR			BIT(4)
+#define NH_FLD_IPV6_FL				BIT(5)
+#define NH_FLD_IPV6_HOP_LIMIT			BIT(6)
+#define NH_FLD_IPV6_ID				BIT(7)
+#define NH_FLD_IPV6_ALL_FIELDS			(BIT(8) - 1)
+
+/* ICMP fields */
+#define NH_FLD_ICMP_TYPE			BIT(0)
+#define NH_FLD_ICMP_CODE			BIT(1)
+#define NH_FLD_ICMP_CKSUM			BIT(2)
+#define NH_FLD_ICMP_ID				BIT(3)
+#define NH_FLD_ICMP_SQ_NUM			BIT(4)
+#define NH_FLD_ICMP_ALL_FIELDS			(BIT(5) - 1)
+
+/* IGMP fields */
+#define NH_FLD_IGMP_VERSION			BIT(0)
+#define NH_FLD_IGMP_TYPE			BIT(1)
+#define NH_FLD_IGMP_CKSUM			BIT(2)
+#define NH_FLD_IGMP_DATA			BIT(3)
+#define NH_FLD_IGMP_ALL_FIELDS			(BIT(4) - 1)
+
+/* TCP fields */
+#define NH_FLD_TCP_PORT_SRC			BIT(0)
+#define NH_FLD_TCP_PORT_DST			BIT(1)
+#define NH_FLD_TCP_SEQ				BIT(2)
+#define NH_FLD_TCP_ACK				BIT(3)
+#define NH_FLD_TCP_OFFSET			BIT(4)
+#define NH_FLD_TCP_FLAGS			BIT(5)
+#define NH_FLD_TCP_WINDOW			BIT(6)
+#define NH_FLD_TCP_CKSUM			BIT(7)
+#define NH_FLD_TCP_URGPTR			BIT(8)
+#define NH_FLD_TCP_OPTS				BIT(9)
+#define NH_FLD_TCP_OPTS_COUNT			BIT(10)
+#define NH_FLD_TCP_ALL_FIELDS			(BIT(11) - 1)
+
+/* UDP fields */
+#define NH_FLD_UDP_PORT_SRC			BIT(0)
+#define NH_FLD_UDP_PORT_DST			BIT(1)
+#define NH_FLD_UDP_LEN				BIT(2)
+#define NH_FLD_UDP_CKSUM			BIT(3)
+#define NH_FLD_UDP_ALL_FIELDS			(BIT(4) - 1)
+
+/* UDP-lite fields */
+#define NH_FLD_UDP_LITE_PORT_SRC		BIT(0)
+#define NH_FLD_UDP_LITE_PORT_DST		BIT(1)
+#define NH_FLD_UDP_LITE_ALL_FIELDS		(BIT(2) - 1)
+
+/* UDP-encap-ESP fields */
+#define NH_FLD_UDP_ENC_ESP_PORT_SRC		BIT(0)
+#define NH_FLD_UDP_ENC_ESP_PORT_DST		BIT(1)
+#define NH_FLD_UDP_ENC_ESP_LEN			BIT(2)
+#define NH_FLD_UDP_ENC_ESP_CKSUM		BIT(3)
+#define NH_FLD_UDP_ENC_ESP_SPI			BIT(4)
+#define NH_FLD_UDP_ENC_ESP_SEQUENCE_NUM		BIT(5)
+#define NH_FLD_UDP_ENC_ESP_ALL_FIELDS		(BIT(6) - 1)
+
+/* SCTP fields */
+#define NH_FLD_SCTP_PORT_SRC			BIT(0)
+#define NH_FLD_SCTP_PORT_DST			BIT(1)
+#define NH_FLD_SCTP_VER_TAG			BIT(2)
+#define NH_FLD_SCTP_CKSUM			BIT(3)
+#define NH_FLD_SCTP_ALL_FIELDS			(BIT(4) - 1)
+
+/* DCCP fields */
+#define NH_FLD_DCCP_PORT_SRC			BIT(0)
+#define NH_FLD_DCCP_PORT_DST			BIT(1)
+#define NH_FLD_DCCP_ALL_FIELDS			(BIT(2) - 1)
+
+/* IPHC fields */
+#define NH_FLD_IPHC_CID				BIT(0)
+#define NH_FLD_IPHC_CID_TYPE			BIT(1)
+#define NH_FLD_IPHC_HCINDEX			BIT(2)
+#define NH_FLD_IPHC_GEN				BIT(3)
+#define NH_FLD_IPHC_D_BIT			BIT(4)
+#define NH_FLD_IPHC_ALL_FIELDS			(BIT(5) - 1)
+
+/* SCTP fields */
+#define NH_FLD_SCTP_CHUNK_DATA_TYPE		BIT(0)
+#define NH_FLD_SCTP_CHUNK_DATA_FLAGS		BIT(1)
+#define NH_FLD_SCTP_CHUNK_DATA_LENGTH		BIT(2)
+#define NH_FLD_SCTP_CHUNK_DATA_TSN		BIT(3)
+#define NH_FLD_SCTP_CHUNK_DATA_STREAM_ID	BIT(4)
+#define NH_FLD_SCTP_CHUNK_DATA_STREAM_SQN	BIT(5)
+#define NH_FLD_SCTP_CHUNK_DATA_PAYLOAD_PID	BIT(6)
+#define NH_FLD_SCTP_CHUNK_DATA_UNORDERED	BIT(7)
+#define NH_FLD_SCTP_CHUNK_DATA_BEGGINING	BIT(8)
+#define NH_FLD_SCTP_CHUNK_DATA_END		BIT(9)
+#define NH_FLD_SCTP_CHUNK_DATA_ALL_FIELDS	(BIT(10) - 1)
+
+/* L2TPV2 fields */
+#define NH_FLD_L2TPV2_TYPE_BIT			BIT(0)
+#define NH_FLD_L2TPV2_LENGTH_BIT		BIT(1)
+#define NH_FLD_L2TPV2_SEQUENCE_BIT		BIT(2)
+#define NH_FLD_L2TPV2_OFFSET_BIT		BIT(3)
+#define NH_FLD_L2TPV2_PRIORITY_BIT		BIT(4)
+#define NH_FLD_L2TPV2_VERSION			BIT(5)
+#define NH_FLD_L2TPV2_LEN			BIT(6)
+#define NH_FLD_L2TPV2_TUNNEL_ID			BIT(7)
+#define NH_FLD_L2TPV2_SESSION_ID		BIT(8)
+#define NH_FLD_L2TPV2_NS			BIT(9)
+#define NH_FLD_L2TPV2_NR			BIT(10)
+#define NH_FLD_L2TPV2_OFFSET_SIZE		BIT(11)
+#define NH_FLD_L2TPV2_FIRST_BYTE		BIT(12)
+#define NH_FLD_L2TPV2_ALL_FIELDS		(BIT(13) - 1)
+
+/* L2TPV3 fields */
+#define NH_FLD_L2TPV3_CTRL_TYPE_BIT		BIT(0)
+#define NH_FLD_L2TPV3_CTRL_LENGTH_BIT		BIT(1)
+#define NH_FLD_L2TPV3_CTRL_SEQUENCE_BIT		BIT(2)
+#define NH_FLD_L2TPV3_CTRL_VERSION		BIT(3)
+#define NH_FLD_L2TPV3_CTRL_LENGTH		BIT(4)
+#define NH_FLD_L2TPV3_CTRL_CONTROL		BIT(5)
+#define NH_FLD_L2TPV3_CTRL_SENT			BIT(6)
+#define NH_FLD_L2TPV3_CTRL_RECV			BIT(7)
+#define NH_FLD_L2TPV3_CTRL_FIRST_BYTE		BIT(8)
+#define NH_FLD_L2TPV3_CTRL_ALL_FIELDS		(BIT(9) - 1)
+
+#define NH_FLD_L2TPV3_SESS_TYPE_BIT		BIT(0)
+#define NH_FLD_L2TPV3_SESS_VERSION		BIT(1)
+#define NH_FLD_L2TPV3_SESS_ID			BIT(2)
+#define NH_FLD_L2TPV3_SESS_COOKIE		BIT(3)
+#define NH_FLD_L2TPV3_SESS_ALL_FIELDS		(BIT(4) - 1)
+
+/* PPP fields */
+#define NH_FLD_PPP_PID				BIT(0)
+#define NH_FLD_PPP_COMPRESSED			BIT(1)
+#define NH_FLD_PPP_ALL_FIELDS			(BIT(2) - 1)
+
+/* PPPoE fields */
+#define NH_FLD_PPPOE_VER			BIT(0)
+#define NH_FLD_PPPOE_TYPE			BIT(1)
+#define NH_FLD_PPPOE_CODE			BIT(2)
+#define NH_FLD_PPPOE_SID			BIT(3)
+#define NH_FLD_PPPOE_LEN			BIT(4)
+#define NH_FLD_PPPOE_SESSION			BIT(5)
+#define NH_FLD_PPPOE_PID			BIT(6)
+#define NH_FLD_PPPOE_ALL_FIELDS			(BIT(7) - 1)
+
+/* PPP-Mux fields */
+#define NH_FLD_PPPMUX_PID			BIT(0)
+#define NH_FLD_PPPMUX_CKSUM			BIT(1)
+#define NH_FLD_PPPMUX_COMPRESSED		BIT(2)
+#define NH_FLD_PPPMUX_ALL_FIELDS		(BIT(3) - 1)
+
+/* PPP-Mux sub-frame fields */
+#define NH_FLD_PPPMUX_SUBFRM_PFF		BIT(0)
+#define NH_FLD_PPPMUX_SUBFRM_LXT		BIT(1)
+#define NH_FLD_PPPMUX_SUBFRM_LEN		BIT(2)
+#define NH_FLD_PPPMUX_SUBFRM_PID		BIT(3)
+#define NH_FLD_PPPMUX_SUBFRM_USE_PID		BIT(4)
+#define NH_FLD_PPPMUX_SUBFRM_ALL_FIELDS		(BIT(5) - 1)
+
+/* LLC fields */
+#define NH_FLD_LLC_DSAP				BIT(0)
+#define NH_FLD_LLC_SSAP				BIT(1)
+#define NH_FLD_LLC_CTRL				BIT(2)
+#define NH_FLD_LLC_ALL_FIELDS			(BIT(3) - 1)
+
+/* NLPID fields */
+#define NH_FLD_NLPID_NLPID			BIT(0)
+#define NH_FLD_NLPID_ALL_FIELDS			(BIT(1) - 1)
+
+/* SNAP fields */
+#define NH_FLD_SNAP_OUI				BIT(0)
+#define NH_FLD_SNAP_PID				BIT(1)
+#define NH_FLD_SNAP_ALL_FIELDS			(BIT(2) - 1)
+
+/* LLC SNAP fields */
+#define NH_FLD_LLC_SNAP_TYPE			BIT(0)
+#define NH_FLD_LLC_SNAP_ALL_FIELDS		(BIT(1) - 1)
+
+/* ARP fields */
+#define NH_FLD_ARP_HTYPE			BIT(0)
+#define NH_FLD_ARP_PTYPE			BIT(1)
+#define NH_FLD_ARP_HLEN				BIT(2)
+#define NH_FLD_ARP_PLEN				BIT(3)
+#define NH_FLD_ARP_OPER				BIT(4)
+#define NH_FLD_ARP_SHA				BIT(5)
+#define NH_FLD_ARP_SPA				BIT(6)
+#define NH_FLD_ARP_THA				BIT(7)
+#define NH_FLD_ARP_TPA				BIT(8)
+#define NH_FLD_ARP_ALL_FIELDS			(BIT(9) - 1)
+
+/* RFC2684 fields */
+#define NH_FLD_RFC2684_LLC			BIT(0)
+#define NH_FLD_RFC2684_NLPID			BIT(1)
+#define NH_FLD_RFC2684_OUI			BIT(2)
+#define NH_FLD_RFC2684_PID			BIT(3)
+#define NH_FLD_RFC2684_VPN_OUI			BIT(4)
+#define NH_FLD_RFC2684_VPN_IDX			BIT(5)
+#define NH_FLD_RFC2684_ALL_FIELDS		(BIT(6) - 1)
+
+/* User defined fields */
+#define NH_FLD_USER_DEFINED_SRCPORT		BIT(0)
+#define NH_FLD_USER_DEFINED_PCDID		BIT(1)
+#define NH_FLD_USER_DEFINED_ALL_FIELDS		(BIT(2) - 1)
+
+/* Payload fields */
+#define NH_FLD_PAYLOAD_BUFFER			BIT(0)
+#define NH_FLD_PAYLOAD_SIZE			BIT(1)
+#define NH_FLD_MAX_FRM_SIZE			BIT(2)
+#define NH_FLD_MIN_FRM_SIZE			BIT(3)
+#define NH_FLD_PAYLOAD_TYPE			BIT(4)
+#define NH_FLD_FRAME_SIZE			BIT(5)
+#define NH_FLD_PAYLOAD_ALL_FIELDS		(BIT(6) - 1)
+
+/* GRE fields */
+#define NH_FLD_GRE_TYPE				BIT(0)
+#define NH_FLD_GRE_ALL_FIELDS			(BIT(1) - 1)
+
+/* MINENCAP fields */
+#define NH_FLD_MINENCAP_SRC_IP			BIT(0)
+#define NH_FLD_MINENCAP_DST_IP			BIT(1)
+#define NH_FLD_MINENCAP_TYPE			BIT(2)
+#define NH_FLD_MINENCAP_ALL_FIELDS		(BIT(3) - 1)
+
+/* IPSEC AH fields */
+#define NH_FLD_IPSEC_AH_SPI			BIT(0)
+#define NH_FLD_IPSEC_AH_NH			BIT(1)
+#define NH_FLD_IPSEC_AH_ALL_FIELDS		(BIT(2) - 1)
+
+/* IPSEC ESP fields */
+#define NH_FLD_IPSEC_ESP_SPI			BIT(0)
+#define NH_FLD_IPSEC_ESP_SEQUENCE_NUM		BIT(1)
+#define NH_FLD_IPSEC_ESP_ALL_FIELDS		(BIT(2) - 1)
+
+/* MPLS fields */
+#define NH_FLD_MPLS_LABEL_STACK			BIT(0)
+#define NH_FLD_MPLS_LABEL_STACK_ALL_FIELDS	(BIT(1) - 1)
+
+/* MACSEC fields */
+#define NH_FLD_MACSEC_SECTAG			BIT(0)
+#define NH_FLD_MACSEC_ALL_FIELDS		(BIT(1) - 1)
+
+/* GTP fields */
+#define NH_FLD_GTP_TEID				BIT(0)
+
+/* Supported protocols */
+enum net_prot {
+	NET_PROT_NONE = 0,
+	NET_PROT_PAYLOAD,
+	NET_PROT_ETH,
+	NET_PROT_VLAN,
+	NET_PROT_IPV4,
+	NET_PROT_IPV6,
+	NET_PROT_IP,
+	NET_PROT_TCP,
+	NET_PROT_UDP,
+	NET_PROT_UDP_LITE,
+	NET_PROT_IPHC,
+	NET_PROT_SCTP,
+	NET_PROT_SCTP_CHUNK_DATA,
+	NET_PROT_PPPOE,
+	NET_PROT_PPP,
+	NET_PROT_PPPMUX,
+	NET_PROT_PPPMUX_SUBFRM,
+	NET_PROT_L2TPV2,
+	NET_PROT_L2TPV3_CTRL,
+	NET_PROT_L2TPV3_SESS,
+	NET_PROT_LLC,
+	NET_PROT_LLC_SNAP,
+	NET_PROT_NLPID,
+	NET_PROT_SNAP,
+	NET_PROT_MPLS,
+	NET_PROT_IPSEC_AH,
+	NET_PROT_IPSEC_ESP,
+	NET_PROT_UDP_ENC_ESP, /* RFC 3948 */
+	NET_PROT_MACSEC,
+	NET_PROT_GRE,
+	NET_PROT_MINENCAP,
+	NET_PROT_DCCP,
+	NET_PROT_ICMP,
+	NET_PROT_IGMP,
+	NET_PROT_ARP,
+	NET_PROT_CAPWAP_DATA,
+	NET_PROT_CAPWAP_CTRL,
+	NET_PROT_RFC2684,
+	NET_PROT_ICMPV6,
+	NET_PROT_FCOE,
+	NET_PROT_FIP,
+	NET_PROT_ISCSI,
+	NET_PROT_GTP,
+	NET_PROT_USER_DEFINED_L2,
+	NET_PROT_USER_DEFINED_L3,
+	NET_PROT_USER_DEFINED_L4,
+	NET_PROT_USER_DEFINED_L5,
+	NET_PROT_USER_DEFINED_SHIM1,
+	NET_PROT_USER_DEFINED_SHIM2,
+
+	NET_PROT_DUMMY_LAST
+};
+
+/**
+ * struct dpkg_extract - A structure for defining a single extraction
+ * @type: Determines how the union below is interpreted:
+ *	DPKG_EXTRACT_FROM_HDR: selects 'from_hdr';
+ *	DPKG_EXTRACT_FROM_DATA: selects 'from_data';
+ *	DPKG_EXTRACT_FROM_PARSE: selects 'from_parse'
+ * @extract: Selects extraction method
+ * @extract.from_hdr: Used when 'type = DPKG_EXTRACT_FROM_HDR'
+ * @extract.from_data: Used when 'type = DPKG_EXTRACT_FROM_DATA'
+ * @extract.from_parse:  Used when 'type = DPKG_EXTRACT_FROM_PARSE'
+ * @extract.from_hdr.prot: Any of the supported headers
+ * @extract.from_hdr.type: Defines the type of header extraction:
+ *	DPKG_FROM_HDR: use size & offset below;
+ *	DPKG_FROM_FIELD: use field, size and offset below;
+ *	DPKG_FULL_FIELD: use field below
+ * @extract.from_hdr.field: One of the supported fields (NH_FLD_)
+ * @extract.from_hdr.size: Size in bytes
+ * @extract.from_hdr.offset: Byte offset
+ * @extract.from_hdr.hdr_index: Clear for cases not listed below;
+ *	Used for protocols that may have more than a single
+ *	header, 0 indicates an outer header;
+ *	Supported protocols (possible values):
+ *	NET_PROT_VLAN (0, HDR_INDEX_LAST);
+ *	NET_PROT_MPLS (0, 1, HDR_INDEX_LAST);
+ *	NET_PROT_IP(0, HDR_INDEX_LAST);
+ *	NET_PROT_IPv4(0, HDR_INDEX_LAST);
+ *	NET_PROT_IPv6(0, HDR_INDEX_LAST);
+ * @extract.from_data.size: Size in bytes
+ * @extract.from_data.offset: Byte offset
+ * @extract.from_parse.size: Size in bytes
+ * @extract.from_parse.offset: Byte offset
+ * @num_of_byte_masks: Defines the number of valid entries in the array below;
+ *		This is	also the number of bytes to be used as masks
+ * @masks: Masks parameters
+ */
+struct dpkg_extract {
+	enum dpkg_extract_type type;
+	union {
+		struct {
+			enum net_prot			prot;
+			enum dpkg_extract_from_hdr_type type;
+			u32			field;
+			u8			size;
+			u8			offset;
+			u8			hdr_index;
+		} from_hdr;
+		struct {
+			u8 size;
+			u8 offset;
+		} from_data;
+		struct {
+			u8 size;
+			u8 offset;
+		} from_parse;
+	} extract;
+
+	u8		num_of_byte_masks;
+	struct dpkg_mask	masks[DPKG_NUM_OF_MASKS];
+};
+
+/**
+ * struct dpkg_profile_cfg - A structure for defining a full Key Generation
+ *				profile (rule)
+ * @num_extracts: Defines the number of valid entries in the array below
+ * @extracts: Array of required extractions
+ */
+struct dpkg_profile_cfg {
+	u8 num_extracts;
+	struct dpkg_extract extracts[DPKG_MAX_NUM_OF_EXTRACTS];
+};
+
+#endif /* __FSL_DPKG_H_ */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
new file mode 100644
index 0000000..d9b6918
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h

@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ */
+#ifndef _FSL_DPNI_CMD_H
+#define _FSL_DPNI_CMD_H
+
+#include "dpni.h"
+
+/* DPNI Version */
+#define DPNI_VER_MAJOR				7
+#define DPNI_VER_MINOR				0
+#define DPNI_CMD_BASE_VERSION			1
+#define DPNI_CMD_ID_OFFSET			4
+
+#define DPNI_CMD(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION)
+
+#define DPNI_CMDID_OPEN					DPNI_CMD(0x801)
+#define DPNI_CMDID_CLOSE				DPNI_CMD(0x800)
+#define DPNI_CMDID_CREATE				DPNI_CMD(0x901)
+#define DPNI_CMDID_DESTROY				DPNI_CMD(0x900)
+#define DPNI_CMDID_GET_API_VERSION			DPNI_CMD(0xa01)
+
+#define DPNI_CMDID_ENABLE				DPNI_CMD(0x002)
+#define DPNI_CMDID_DISABLE				DPNI_CMD(0x003)
+#define DPNI_CMDID_GET_ATTR				DPNI_CMD(0x004)
+#define DPNI_CMDID_RESET				DPNI_CMD(0x005)
+#define DPNI_CMDID_IS_ENABLED				DPNI_CMD(0x006)
+
+#define DPNI_CMDID_SET_IRQ				DPNI_CMD(0x010)
+#define DPNI_CMDID_GET_IRQ				DPNI_CMD(0x011)
+#define DPNI_CMDID_SET_IRQ_ENABLE			DPNI_CMD(0x012)
+#define DPNI_CMDID_GET_IRQ_ENABLE			DPNI_CMD(0x013)
+#define DPNI_CMDID_SET_IRQ_MASK				DPNI_CMD(0x014)
+#define DPNI_CMDID_GET_IRQ_MASK				DPNI_CMD(0x015)
+#define DPNI_CMDID_GET_IRQ_STATUS			DPNI_CMD(0x016)
+#define DPNI_CMDID_CLEAR_IRQ_STATUS			DPNI_CMD(0x017)
+
+#define DPNI_CMDID_SET_POOLS				DPNI_CMD(0x200)
+#define DPNI_CMDID_SET_ERRORS_BEHAVIOR			DPNI_CMD(0x20B)
+
+#define DPNI_CMDID_GET_QDID				DPNI_CMD(0x210)
+#define DPNI_CMDID_GET_TX_DATA_OFFSET			DPNI_CMD(0x212)
+#define DPNI_CMDID_GET_LINK_STATE			DPNI_CMD(0x215)
+#define DPNI_CMDID_SET_MAX_FRAME_LENGTH			DPNI_CMD(0x216)
+#define DPNI_CMDID_GET_MAX_FRAME_LENGTH			DPNI_CMD(0x217)
+#define DPNI_CMDID_SET_LINK_CFG				DPNI_CMD(0x21A)
+#define DPNI_CMDID_SET_TX_SHAPING			DPNI_CMD(0x21B)
+
+#define DPNI_CMDID_SET_MCAST_PROMISC			DPNI_CMD(0x220)
+#define DPNI_CMDID_GET_MCAST_PROMISC			DPNI_CMD(0x221)
+#define DPNI_CMDID_SET_UNICAST_PROMISC			DPNI_CMD(0x222)
+#define DPNI_CMDID_GET_UNICAST_PROMISC			DPNI_CMD(0x223)
+#define DPNI_CMDID_SET_PRIM_MAC				DPNI_CMD(0x224)
+#define DPNI_CMDID_GET_PRIM_MAC				DPNI_CMD(0x225)
+#define DPNI_CMDID_ADD_MAC_ADDR				DPNI_CMD(0x226)
+#define DPNI_CMDID_REMOVE_MAC_ADDR			DPNI_CMD(0x227)
+#define DPNI_CMDID_CLR_MAC_FILTERS			DPNI_CMD(0x228)
+
+#define DPNI_CMDID_SET_RX_TC_DIST			DPNI_CMD(0x235)
+
+#define DPNI_CMDID_ADD_FS_ENT				DPNI_CMD(0x244)
+#define DPNI_CMDID_REMOVE_FS_ENT			DPNI_CMD(0x245)
+#define DPNI_CMDID_CLR_FS_ENT				DPNI_CMD(0x246)
+
+#define DPNI_CMDID_GET_STATISTICS			DPNI_CMD(0x25D)
+#define DPNI_CMDID_GET_QUEUE				DPNI_CMD(0x25F)
+#define DPNI_CMDID_SET_QUEUE				DPNI_CMD(0x260)
+#define DPNI_CMDID_GET_TAILDROP				DPNI_CMD(0x261)
+#define DPNI_CMDID_SET_TAILDROP				DPNI_CMD(0x262)
+
+#define DPNI_CMDID_GET_PORT_MAC_ADDR			DPNI_CMD(0x263)
+
+#define DPNI_CMDID_GET_BUFFER_LAYOUT			DPNI_CMD(0x264)
+#define DPNI_CMDID_SET_BUFFER_LAYOUT			DPNI_CMD(0x265)
+
+#define DPNI_CMDID_SET_TX_CONFIRMATION_MODE		DPNI_CMD(0x266)
+#define DPNI_CMDID_SET_CONGESTION_NOTIFICATION		DPNI_CMD(0x267)
+#define DPNI_CMDID_GET_CONGESTION_NOTIFICATION		DPNI_CMD(0x268)
+#define DPNI_CMDID_SET_EARLY_DROP			DPNI_CMD(0x269)
+#define DPNI_CMDID_GET_EARLY_DROP			DPNI_CMD(0x26A)
+#define DPNI_CMDID_GET_OFFLOAD				DPNI_CMD(0x26B)
+#define DPNI_CMDID_SET_OFFLOAD				DPNI_CMD(0x26C)
+
+#define DPNI_CMDID_SET_RX_FS_DIST			DPNI_CMD(0x273)
+#define DPNI_CMDID_SET_RX_HASH_DIST			DPNI_CMD(0x274)
+#define DPNI_CMDID_GET_LINK_CFG				DPNI_CMD(0x278)
+
+/* Macros for accessing command fields smaller than 1byte */
+#define DPNI_MASK(field)	\
+	GENMASK(DPNI_##field##_SHIFT + DPNI_##field##_SIZE - 1, \
+		DPNI_##field##_SHIFT)
+
+#define dpni_set_field(var, field, val)	\
+	((var) |= (((val) << DPNI_##field##_SHIFT) & DPNI_MASK(field)))
+#define dpni_get_field(var, field)	\
+	(((var) & DPNI_MASK(field)) >> DPNI_##field##_SHIFT)
+
+struct dpni_cmd_open {
+	__le32 dpni_id;
+};
+
+#define DPNI_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
+struct dpni_cmd_set_pools {
+	/* cmd word 0 */
+	u8 num_dpbp;
+	u8 backup_pool_mask;
+	__le16 pad;
+	/* cmd word 0..4 */
+	__le32 dpbp_id[DPNI_MAX_DPBP];
+	/* cmd word 4..6 */
+	__le16 buffer_size[DPNI_MAX_DPBP];
+};
+
+/* The enable indication is always the least significant bit */
+#define DPNI_ENABLE_SHIFT		0
+#define DPNI_ENABLE_SIZE		1
+
+struct dpni_rsp_is_enabled {
+	u8 enabled;
+};
+
+struct dpni_rsp_get_irq {
+	/* response word 0 */
+	__le32 irq_val;
+	__le32 pad;
+	/* response word 1 */
+	__le64 irq_addr;
+	/* response word 2 */
+	__le32 irq_num;
+	__le32 type;
+};
+
+struct dpni_cmd_set_irq_enable {
+	u8 enable;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dpni_cmd_get_irq_enable {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpni_rsp_get_irq_enable {
+	u8 enabled;
+};
+
+struct dpni_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dpni_cmd_get_irq_mask {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpni_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dpni_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpni_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dpni_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpni_rsp_get_attr {
+	/* response word 0 */
+	__le32 options;
+	u8 num_queues;
+	u8 num_tcs;
+	u8 mac_filter_entries;
+	u8 pad0;
+	/* response word 1 */
+	u8 vlan_filter_entries;
+	u8 pad1;
+	u8 qos_entries;
+	u8 pad2;
+	__le16 fs_entries;
+	__le16 pad3;
+	/* response word 2 */
+	u8 qos_key_size;
+	u8 fs_key_size;
+	__le16 wriop_version;
+};
+
+#define DPNI_ERROR_ACTION_SHIFT		0
+#define DPNI_ERROR_ACTION_SIZE		4
+#define DPNI_FRAME_ANN_SHIFT		4
+#define DPNI_FRAME_ANN_SIZE		1
+
+struct dpni_cmd_set_errors_behavior {
+	__le32 errors;
+	/* from least significant bit: error_action:4, set_frame_annotation:1 */
+	u8 flags;
+};
+
+/* There are 3 separate commands for configuring Rx, Tx and Tx confirmation
+ * buffer layouts, but they all share the same parameters.
+ * If one of the functions changes, below structure needs to be split.
+ */
+
+#define DPNI_PASS_TS_SHIFT		0
+#define DPNI_PASS_TS_SIZE		1
+#define DPNI_PASS_PR_SHIFT		1
+#define DPNI_PASS_PR_SIZE		1
+#define DPNI_PASS_FS_SHIFT		2
+#define DPNI_PASS_FS_SIZE		1
+
+struct dpni_cmd_get_buffer_layout {
+	u8 qtype;
+};
+
+struct dpni_rsp_get_buffer_layout {
+	/* response word 0 */
+	u8 pad0[6];
+	/* from LSB: pass_timestamp:1, parser_result:1, frame_status:1 */
+	u8 flags;
+	u8 pad1;
+	/* response word 1 */
+	__le16 private_data_size;
+	__le16 data_align;
+	__le16 head_room;
+	__le16 tail_room;
+};
+
+struct dpni_cmd_set_buffer_layout {
+	/* cmd word 0 */
+	u8 qtype;
+	u8 pad0[3];
+	__le16 options;
+	/* from LSB: pass_timestamp:1, parser_result:1, frame_status:1 */
+	u8 flags;
+	u8 pad1;
+	/* cmd word 1 */
+	__le16 private_data_size;
+	__le16 data_align;
+	__le16 head_room;
+	__le16 tail_room;
+};
+
+struct dpni_cmd_set_offload {
+	u8 pad[3];
+	u8 dpni_offload;
+	__le32 config;
+};
+
+struct dpni_cmd_get_offload {
+	u8 pad[3];
+	u8 dpni_offload;
+};
+
+struct dpni_rsp_get_offload {
+	__le32 pad;
+	__le32 config;
+};
+
+struct dpni_cmd_get_qdid {
+	u8 qtype;
+};
+
+struct dpni_rsp_get_qdid {
+	__le16 qdid;
+};
+
+struct dpni_rsp_get_tx_data_offset {
+	__le16 data_offset;
+};
+
+struct dpni_cmd_get_statistics {
+	u8 page_number;
+};
+
+struct dpni_rsp_get_statistics {
+	__le64 counter[DPNI_STATISTICS_CNT];
+};
+
+struct dpni_cmd_link_cfg {
+	/* cmd word 0 */
+	__le64 pad0;
+	/* cmd word 1 */
+	__le32 rate;
+	__le32 pad1;
+	/* cmd word 2 */
+	__le64 options;
+};
+
+#define DPNI_LINK_STATE_SHIFT		0
+#define DPNI_LINK_STATE_SIZE		1
+
+struct dpni_rsp_get_link_state {
+	/* response word 0 */
+	__le32 pad0;
+	/* from LSB: up:1 */
+	u8 flags;
+	u8 pad1[3];
+	/* response word 1 */
+	__le32 rate;
+	__le32 pad2;
+	/* response word 2 */
+	__le64 options;
+};
+
+struct dpni_cmd_set_max_frame_length {
+	__le16 max_frame_length;
+};
+
+struct dpni_rsp_get_max_frame_length {
+	__le16 max_frame_length;
+};
+
+struct dpni_cmd_set_multicast_promisc {
+	u8 enable;
+};
+
+struct dpni_rsp_get_multicast_promisc {
+	u8 enabled;
+};
+
+struct dpni_cmd_set_unicast_promisc {
+	u8 enable;
+};
+
+struct dpni_rsp_get_unicast_promisc {
+	u8 enabled;
+};
+
+struct dpni_cmd_set_primary_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+struct dpni_rsp_get_primary_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+struct dpni_rsp_get_port_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+struct dpni_cmd_add_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+struct dpni_cmd_remove_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+#define DPNI_UNICAST_FILTERS_SHIFT	0
+#define DPNI_UNICAST_FILTERS_SIZE	1
+#define DPNI_MULTICAST_FILTERS_SHIFT	1
+#define DPNI_MULTICAST_FILTERS_SIZE	1
+
+struct dpni_cmd_clear_mac_filters {
+	/* from LSB: unicast:1, multicast:1 */
+	u8 flags;
+};
+
+#define DPNI_DIST_MODE_SHIFT		0
+#define DPNI_DIST_MODE_SIZE		4
+#define DPNI_MISS_ACTION_SHIFT		4
+#define DPNI_MISS_ACTION_SIZE		4
+
+struct dpni_cmd_set_rx_tc_dist {
+	/* cmd word 0 */
+	__le16 dist_size;
+	u8 tc_id;
+	/* from LSB: dist_mode:4, miss_action:4 */
+	u8 flags;
+	__le16 pad0;
+	__le16 default_flow_id;
+	/* cmd word 1..5 */
+	__le64 pad1[5];
+	/* cmd word 6 */
+	__le64 key_cfg_iova;
+};
+
+/* dpni_set_rx_tc_dist extension (structure of the DMA-able memory at
+ * key_cfg_iova)
+ */
+struct dpni_mask_cfg {
+	u8 mask;
+	u8 offset;
+};
+
+#define DPNI_EFH_TYPE_SHIFT		0
+#define DPNI_EFH_TYPE_SIZE		4
+#define DPNI_EXTRACT_TYPE_SHIFT		0
+#define DPNI_EXTRACT_TYPE_SIZE		4
+
+struct dpni_dist_extract {
+	/* word 0 */
+	u8 prot;
+	/* EFH type stored in the 4 least significant bits */
+	u8 efh_type;
+	u8 size;
+	u8 offset;
+	__le32 field;
+	/* word 1 */
+	u8 hdr_index;
+	u8 constant;
+	u8 num_of_repeats;
+	u8 num_of_byte_masks;
+	/* Extraction type is stored in the 4 LSBs */
+	u8 extract_type;
+	u8 pad[3];
+	/* word 2 */
+	struct dpni_mask_cfg masks[4];
+};
+
+struct dpni_ext_set_rx_tc_dist {
+	/* extension word 0 */
+	u8 num_extracts;
+	u8 pad[7];
+	/* words 1..25 */
+	struct dpni_dist_extract extracts[DPKG_MAX_NUM_OF_EXTRACTS];
+};
+
+struct dpni_cmd_get_queue {
+	u8 qtype;
+	u8 tc;
+	u8 index;
+};
+
+#define DPNI_DEST_TYPE_SHIFT		0
+#define DPNI_DEST_TYPE_SIZE		4
+#define DPNI_STASH_CTRL_SHIFT		6
+#define DPNI_STASH_CTRL_SIZE		1
+#define DPNI_HOLD_ACTIVE_SHIFT		7
+#define DPNI_HOLD_ACTIVE_SIZE		1
+
+struct dpni_rsp_get_queue {
+	/* response word 0 */
+	__le64 pad0;
+	/* response word 1 */
+	__le32 dest_id;
+	__le16 pad1;
+	u8 dest_prio;
+	/* From LSB: dest_type:4, pad:2, flc_stash_ctrl:1, hold_active:1 */
+	u8 flags;
+	/* response word 2 */
+	__le64 flc;
+	/* response word 3 */
+	__le64 user_context;
+	/* response word 4 */
+	__le32 fqid;
+	__le16 qdbin;
+};
+
+struct dpni_cmd_set_queue {
+	/* cmd word 0 */
+	u8 qtype;
+	u8 tc;
+	u8 index;
+	u8 options;
+	__le32 pad0;
+	/* cmd word 1 */
+	__le32 dest_id;
+	__le16 pad1;
+	u8 dest_prio;
+	u8 flags;
+	/* cmd word 2 */
+	__le64 flc;
+	/* cmd word 3 */
+	__le64 user_context;
+};
+
+struct dpni_cmd_set_taildrop {
+	/* cmd word 0 */
+	u8 congestion_point;
+	u8 qtype;
+	u8 tc;
+	u8 index;
+	__le32 pad0;
+	/* cmd word 1 */
+	/* Only least significant bit is relevant */
+	u8 enable;
+	u8 pad1;
+	u8 units;
+	u8 pad2;
+	__le32 threshold;
+};
+
+struct dpni_cmd_get_taildrop {
+	u8 congestion_point;
+	u8 qtype;
+	u8 tc;
+	u8 index;
+};
+
+struct dpni_rsp_get_taildrop {
+	/* cmd word 0 */
+	__le64 pad0;
+	/* cmd word 1 */
+	/* only least significant bit is relevant */
+	u8 enable;
+	u8 pad1;
+	u8 units;
+	u8 pad2;
+	__le32 threshold;
+};
+
+struct dpni_rsp_get_api_version {
+	__le16 major;
+	__le16 minor;
+};
+
+#define DPNI_RX_FS_DIST_ENABLE_SHIFT	0
+#define DPNI_RX_FS_DIST_ENABLE_SIZE	1
+struct dpni_cmd_set_rx_fs_dist {
+	__le16 dist_size;
+	u8 enable;
+	u8 tc;
+	__le16 miss_flow_id;
+	__le16 pad;
+	__le64 key_cfg_iova;
+};
+
+#define DPNI_RX_HASH_DIST_ENABLE_SHIFT	0
+#define DPNI_RX_HASH_DIST_ENABLE_SIZE	1
+struct dpni_cmd_set_rx_hash_dist {
+	__le16 dist_size;
+	u8 enable;
+	u8 tc;
+	__le32 pad;
+	__le64 key_cfg_iova;
+};
+
+struct dpni_cmd_add_fs_entry {
+	/* cmd word 0 */
+	__le16 options;
+	u8 tc_id;
+	u8 key_size;
+	__le16 index;
+	__le16 flow_id;
+	/* cmd word 1 */
+	__le64 key_iova;
+	/* cmd word 2 */
+	__le64 mask_iova;
+	/* cmd word 3 */
+	__le64 flc;
+};
+
+struct dpni_cmd_remove_fs_entry {
+	/* cmd word 0 */
+	__le16 pad0;
+	u8 tc_id;
+	u8 key_size;
+	__le32 pad1;
+	/* cmd word 1 */
+	__le64 key_iova;
+	/* cmd word 2 */
+	__le64 mask_iova;
+};
+
+#endif /* _FSL_DPNI_CMD_H */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
new file mode 100644
index 0000000..dd54e69
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c

@@ -0,0 +1,1788 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fsl/mc.h>
+#include "dpni.h"
+#include "dpni-cmd.h"
+
+/**
+ * dpni_prepare_key_cfg() - function prepare extract parameters
+ * @cfg: defining a full Key Generation profile (rule)
+ * @key_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA
+ *
+ * This function has to be called before the following functions:
+ *	- dpni_set_rx_tc_dist()
+ *	- dpni_set_qos_table()
+ */
+int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, u8 *key_cfg_buf)
+{
+	int i, j;
+	struct dpni_ext_set_rx_tc_dist *dpni_ext;
+	struct dpni_dist_extract *extr;
+
+	if (cfg->num_extracts > DPKG_MAX_NUM_OF_EXTRACTS)
+		return -EINVAL;
+
+	dpni_ext = (struct dpni_ext_set_rx_tc_dist *)key_cfg_buf;
+	dpni_ext->num_extracts = cfg->num_extracts;
+
+	for (i = 0; i < cfg->num_extracts; i++) {
+		extr = &dpni_ext->extracts[i];
+
+		switch (cfg->extracts[i].type) {
+		case DPKG_EXTRACT_FROM_HDR:
+			extr->prot = cfg->extracts[i].extract.from_hdr.prot;
+			dpni_set_field(extr->efh_type, EFH_TYPE,
+				       cfg->extracts[i].extract.from_hdr.type);
+			extr->size = cfg->extracts[i].extract.from_hdr.size;
+			extr->offset = cfg->extracts[i].extract.from_hdr.offset;
+			extr->field = cpu_to_le32(
+				cfg->extracts[i].extract.from_hdr.field);
+			extr->hdr_index =
+				cfg->extracts[i].extract.from_hdr.hdr_index;
+			break;
+		case DPKG_EXTRACT_FROM_DATA:
+			extr->size = cfg->extracts[i].extract.from_data.size;
+			extr->offset =
+				cfg->extracts[i].extract.from_data.offset;
+			break;
+		case DPKG_EXTRACT_FROM_PARSE:
+			extr->size = cfg->extracts[i].extract.from_parse.size;
+			extr->offset =
+				cfg->extracts[i].extract.from_parse.offset;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		extr->num_of_byte_masks = cfg->extracts[i].num_of_byte_masks;
+		dpni_set_field(extr->extract_type, EXTRACT_TYPE,
+			       cfg->extracts[i].type);
+
+		for (j = 0; j < DPKG_NUM_OF_MASKS; j++) {
+			extr->masks[j].mask = cfg->extracts[i].masks[j].mask;
+			extr->masks[j].offset =
+				cfg->extracts[i].masks[j].offset;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * dpni_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpni_id:	DPNI unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpni_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_open(struct fsl_mc_io *mc_io,
+	      u32 cmd_flags,
+	      int dpni_id,
+	      u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_open *cmd_params;
+
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	cmd_params = (struct dpni_cmd_open *)cmd.params;
+	cmd_params->dpni_id = cpu_to_le32(dpni_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dpni_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_close(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLOSE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_pools() - Set buffer pools configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Buffer pools configuration
+ *
+ * mandatory for DPNI operation
+ * warning:Allowed only when DPNI is disabled
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_pools(struct fsl_mc_io *mc_io,
+		   u32 cmd_flags,
+		   u16 token,
+		   const struct dpni_pools_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_pools *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_POOLS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_pools *)cmd.params;
+	cmd_params->num_dpbp = cfg->num_dpbp;
+	for (i = 0; i < DPNI_MAX_DPBP; i++) {
+		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
+		cmd_params->buffer_size[i] =
+			cpu_to_le16(cfg->pools[i].buffer_size);
+		cmd_params->backup_pool_mask |=
+			DPNI_BACKUP_POOL(cfg->pools[i].backup_pool, i);
+	}
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_enable() - Enable the DPNI, allow sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:		Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_enable(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ENABLE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_disable() - Disable the DPNI, stop sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_disable(struct fsl_mc_io *mc_io,
+		 u32 cmd_flags,
+		 u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_DISABLE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_is_enabled() - Check if the DPNI is enabled.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if object is enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_is_enabled(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    int *en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_is_enabled *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_IS_ENABLED,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_is_enabled *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpni_reset() - Reset the DPNI, returns the object to initial state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_reset(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_RESET,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_irq_enable() - Set overall interrupt state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Interrupt state: - enable = 1, disable = 0
+ *
+ * Allows GPP software to control when interrupts are generated.
+ * Each interrupt can have up to 32 causes.  The enable/disable control's the
+ * overall interrupt state. if the interrupt is disabled no causes will cause
+ * an interrupt.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_irq_enable(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u8 en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_irq_enable *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_irq_enable *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_irq_enable() - Get overall interrupt state
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Returned interrupt state - enable = 1, disable = 0
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_enable(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u8 *en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_enable *cmd_params;
+	struct dpni_rsp_get_irq_enable *rsp_params;
+
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_enable *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpni_set_irq_mask() - Set interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	event mask to trigger interrupt;
+ *			each bit:
+ *				0 = ignore event
+ *				1 = consider event for asserting IRQ
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_irq_mask(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 irq_index,
+		      u32 mask)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_irq_mask *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_irq_mask() - Get interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Returned event mask to trigger interrupt
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_mask(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 irq_index,
+		      u32 *mask)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_mask *cmd_params;
+	struct dpni_rsp_get_irq_mask *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_mask *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
+
+	return 0;
+}
+
+/**
+ * dpni_get_irq_status() - Get the current status of any pending interrupts.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @status:	Returned interrupts status - one bit per cause:
+ *			0 = no interrupt pending
+ *			1 = interrupt pending
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_status(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u32 *status)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_status *cmd_params;
+	struct dpni_rsp_get_irq_status *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
+	return 0;
+}
+
+/**
+ * dpni_clear_irq_status() - Clear a pending interrupt's status
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @status:	bits to clear (W1C) - one bit per cause:
+ *			0 = don't change
+ *			1 = clear status bit
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_clear_irq_status(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  u8 irq_index,
+			  u32 status)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_clear_irq_status *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLEAR_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_clear_irq_status *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->status = cpu_to_le32(status);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_attributes() - Retrieve DPNI attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @attr:	Object's attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_attributes(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			struct dpni_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_attr *rsp_params;
+
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_ATTR,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_attr *)cmd.params;
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->num_queues = rsp_params->num_queues;
+	attr->num_tcs = rsp_params->num_tcs;
+	attr->mac_filter_entries = rsp_params->mac_filter_entries;
+	attr->vlan_filter_entries = rsp_params->vlan_filter_entries;
+	attr->qos_entries = rsp_params->qos_entries;
+	attr->fs_entries = le16_to_cpu(rsp_params->fs_entries);
+	attr->qos_key_size = rsp_params->qos_key_size;
+	attr->fs_key_size = rsp_params->fs_key_size;
+	attr->wriop_version = le16_to_cpu(rsp_params->wriop_version);
+
+	return 0;
+}
+
+/**
+ * dpni_set_errors_behavior() - Set errors behavior
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Errors configuration
+ *
+ * this function may be called numerous times with different
+ * error masks
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_errors_behavior(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     struct dpni_error_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_errors_behavior *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_ERRORS_BEHAVIOR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_errors_behavior *)cmd.params;
+	cmd_params->errors = cpu_to_le32(cfg->errors);
+	dpni_set_field(cmd_params->flags, ERROR_ACTION, cfg->error_action);
+	dpni_set_field(cmd_params->flags, FRAME_ANN, cfg->set_frame_annotation);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_buffer_layout() - Retrieve buffer layout attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue to retrieve configuration for
+ * @layout:	Returns buffer layout attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   enum dpni_queue_type qtype,
+			   struct dpni_buffer_layout *layout)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_buffer_layout *cmd_params;
+	struct dpni_rsp_get_buffer_layout *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_BUFFER_LAYOUT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_buffer_layout *)cmd.params;
+	cmd_params->qtype = qtype;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_buffer_layout *)cmd.params;
+	layout->pass_timestamp = dpni_get_field(rsp_params->flags, PASS_TS);
+	layout->pass_parser_result = dpni_get_field(rsp_params->flags, PASS_PR);
+	layout->pass_frame_status = dpni_get_field(rsp_params->flags, PASS_FS);
+	layout->private_data_size = le16_to_cpu(rsp_params->private_data_size);
+	layout->data_align = le16_to_cpu(rsp_params->data_align);
+	layout->data_head_room = le16_to_cpu(rsp_params->head_room);
+	layout->data_tail_room = le16_to_cpu(rsp_params->tail_room);
+
+	return 0;
+}
+
+/**
+ * dpni_set_buffer_layout() - Set buffer layout configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue this configuration applies to
+ * @layout:	Buffer layout configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ *
+ * @warning	Allowed only when DPNI is disabled
+ */
+int dpni_set_buffer_layout(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   enum dpni_queue_type qtype,
+			   const struct dpni_buffer_layout *layout)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_buffer_layout *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_BUFFER_LAYOUT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_buffer_layout *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->options = cpu_to_le16(layout->options);
+	dpni_set_field(cmd_params->flags, PASS_TS, layout->pass_timestamp);
+	dpni_set_field(cmd_params->flags, PASS_PR, layout->pass_parser_result);
+	dpni_set_field(cmd_params->flags, PASS_FS, layout->pass_frame_status);
+	cmd_params->private_data_size = cpu_to_le16(layout->private_data_size);
+	cmd_params->data_align = cpu_to_le16(layout->data_align);
+	cmd_params->head_room = cpu_to_le16(layout->data_head_room);
+	cmd_params->tail_room = cpu_to_le16(layout->data_tail_room);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_offload() - Set DPNI offload configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @type:	Type of DPNI offload
+ * @config:	Offload configuration.
+ *		For checksum offloads, non-zero value enables the offload
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ *
+ * @warning    Allowed only when DPNI is disabled
+ */
+
+int dpni_set_offload(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     enum dpni_offload type,
+		     u32 config)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_offload *cmd_params;
+
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_OFFLOAD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_offload *)cmd.params;
+	cmd_params->dpni_offload = type;
+	cmd_params->config = cpu_to_le32(config);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+int dpni_get_offload(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     enum dpni_offload type,
+		     u32 *config)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_offload *cmd_params;
+	struct dpni_rsp_get_offload *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_OFFLOAD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_offload *)cmd.params;
+	cmd_params->dpni_offload = type;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_offload *)cmd.params;
+	*config = le32_to_cpu(rsp_params->config);
+
+	return 0;
+}
+
+/**
+ * dpni_get_qdid() - Get the Queuing Destination ID (QDID) that should be used
+ *			for enqueue operations
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue to receive QDID for
+ * @qdid:	Returned virtual QDID value that should be used as an argument
+ *			in all enqueue operations
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_qdid(struct fsl_mc_io *mc_io,
+		  u32 cmd_flags,
+		  u16 token,
+		  enum dpni_queue_type qtype,
+		  u16 *qdid)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_qdid *cmd_params;
+	struct dpni_rsp_get_qdid *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_QDID,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_qdid *)cmd.params;
+	cmd_params->qtype = qtype;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_qdid *)cmd.params;
+	*qdid = le16_to_cpu(rsp_params->qdid);
+
+	return 0;
+}
+
+/**
+ * dpni_get_tx_data_offset() - Get the Tx data offset (from start of buffer)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @data_offset: Tx data offset (from start of buffer)
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io,
+			    u32 cmd_flags,
+			    u16 token,
+			    u16 *data_offset)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_tx_data_offset *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_DATA_OFFSET,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_tx_data_offset *)cmd.params;
+	*data_offset = le16_to_cpu(rsp_params->data_offset);
+
+	return 0;
+}
+
+/**
+ * dpni_set_link_cfg() - set the link configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Link configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      const struct dpni_link_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_link_cfg *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_LINK_CFG,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_link_cfg *)cmd.params;
+	cmd_params->rate = cpu_to_le32(cfg->rate);
+	cmd_params->options = cpu_to_le64(cfg->options);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_link_cfg() - return the link configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Link configuration from dpni object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_link_cfg(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      struct dpni_link_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_link_cfg *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_LINK_CFG,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_cmd_link_cfg *)cmd.params;
+	cfg->rate = le32_to_cpu(rsp_params->rate);
+	cfg->options = le64_to_cpu(rsp_params->options);
+
+	return err;
+}
+
+/**
+ * dpni_get_link_state() - Return the link state (either up or down)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @state:	Returned link state;
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_link_state(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			struct dpni_link_state *state)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_link_state *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_LINK_STATE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_link_state *)cmd.params;
+	state->up = dpni_get_field(rsp_params->flags, LINK_STATE);
+	state->rate = le32_to_cpu(rsp_params->rate);
+	state->options = le64_to_cpu(rsp_params->options);
+
+	return 0;
+}
+
+/**
+ * dpni_set_max_frame_length() - Set the maximum received frame length.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @max_frame_length:	Maximum received frame length (in
+ *				bytes); frame is discarded if its
+ *				length exceeds this value
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_max_frame_length(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 max_frame_length)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_max_frame_length *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MAX_FRAME_LENGTH,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_max_frame_length *)cmd.params;
+	cmd_params->max_frame_length = cpu_to_le16(max_frame_length);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_max_frame_length() - Get the maximum received frame length.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @max_frame_length:	Maximum received frame length (in
+ *				bytes); frame is discarded if its
+ *				length exceeds this value
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_max_frame_length(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 *max_frame_length)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_max_frame_length *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_MAX_FRAME_LENGTH,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_max_frame_length *)cmd.params;
+	*max_frame_length = le16_to_cpu(rsp_params->max_frame_length);
+
+	return 0;
+}
+
+/**
+ * dpni_set_multicast_promisc() - Enable/disable multicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Set to '1' to enable; '0' to disable
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io,
+			       u32 cmd_flags,
+			       u16 token,
+			       int en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_multicast_promisc *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MCAST_PROMISC,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_multicast_promisc *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_multicast_promisc() - Get multicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io,
+			       u32 cmd_flags,
+			       u16 token,
+			       int *en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_multicast_promisc *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_MCAST_PROMISC,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_multicast_promisc *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpni_set_unicast_promisc() - Enable/disable unicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Set to '1' to enable; '0' to disable
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_unicast_promisc(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     int en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_unicast_promisc *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_UNICAST_PROMISC,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_unicast_promisc *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_unicast_promisc() - Get unicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io,
+			     u32 cmd_flags,
+			     u16 token,
+			     int *en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_unicast_promisc *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_UNICAST_PROMISC,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_unicast_promisc *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpni_set_primary_mac_addr() - Set the primary MAC address
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to set as primary address
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      const u8 mac_addr[6])
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_primary_mac_addr *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_PRIM_MAC,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_primary_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_primary_mac_addr() - Get the primary MAC address
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	Returned MAC address
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u8 mac_addr[6])
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_primary_mac_addr *rsp_params;
+	int i, err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_PRIM_MAC,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_primary_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
+
+	return 0;
+}
+
+/**
+ * dpni_get_port_mac_addr() - Retrieve MAC address associated to the physical
+ *			port the DPNI is attached to
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ *
+ * The primary MAC address is not cleared by this operation.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_port_mac_addr(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u8 mac_addr[6])
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_rsp_get_port_mac_addr *rsp_params;
+	int i, err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_PORT_MAC_ADDR,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_port_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
+
+	return 0;
+}
+
+/**
+ * dpni_add_mac_addr() - Add MAC address filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to add
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_add_mac_addr(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      const u8 mac_addr[6])
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_add_mac_addr *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_MAC_ADDR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_add_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_remove_mac_addr() - Remove MAC address filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to remove
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_remove_mac_addr(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 const u8 mac_addr[6])
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_remove_mac_addr *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_MAC_ADDR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_remove_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_clear_mac_filters() - Clear all unicast and/or multicast MAC filters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @unicast:	Set to '1' to clear unicast addresses
+ * @multicast:	Set to '1' to clear multicast addresses
+ *
+ * The primary MAC address is not cleared by this operation.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_clear_mac_filters(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   int unicast,
+			   int multicast)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_clear_mac_filters *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_MAC_FILTERS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_clear_mac_filters *)cmd.params;
+	dpni_set_field(cmd_params->flags, UNICAST_FILTERS, unicast);
+	dpni_set_field(cmd_params->flags, MULTICAST_FILTERS, multicast);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_rx_tc_dist() - Set Rx traffic class distribution configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	Traffic class distribution configuration
+ *
+ * warning: if 'dist_mode != DPNI_DIST_MODE_NONE', call dpni_prepare_key_cfg()
+ *			first to prepare the key_cfg_iova parameter
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 tc_id,
+			const struct dpni_rx_tc_dist_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_rx_tc_dist *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_TC_DIST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_rx_tc_dist *)cmd.params;
+	cmd_params->dist_size = cpu_to_le16(cfg->dist_size);
+	cmd_params->tc_id = tc_id;
+	dpni_set_field(cmd_params->flags, DIST_MODE, cfg->dist_mode);
+	dpni_set_field(cmd_params->flags, MISS_ACTION, cfg->fs_cfg.miss_action);
+	cmd_params->default_flow_id = cpu_to_le16(cfg->fs_cfg.default_flow_id);
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_queue() - Set queue parameters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - all queue types are supported, although
+ *		the command is ignored for Tx
+ * @tc:		Traffic class, in range 0 to NUM_TCS - 1
+ * @index:	Selects the specific queue out of the set allocated for the
+ *		same TC. Value must be in range 0 to NUM_QUEUES - 1
+ * @options:	A combination of DPNI_QUEUE_OPT_ values that control what
+ *		configuration options are set on the queue
+ * @queue:	Queue structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_queue(struct fsl_mc_io *mc_io,
+		   u32 cmd_flags,
+		   u16 token,
+		   enum dpni_queue_type qtype,
+		   u8 tc,
+		   u8 index,
+		   u8 options,
+		   const struct dpni_queue *queue)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_queue *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_queue *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+	cmd_params->options = options;
+	cmd_params->dest_id = cpu_to_le32(queue->destination.id);
+	cmd_params->dest_prio = queue->destination.priority;
+	dpni_set_field(cmd_params->flags, DEST_TYPE, queue->destination.type);
+	dpni_set_field(cmd_params->flags, STASH_CTRL, queue->flc.stash_control);
+	dpni_set_field(cmd_params->flags, HOLD_ACTIVE,
+		       queue->destination.hold_active);
+	cmd_params->flc = cpu_to_le64(queue->flc.value);
+	cmd_params->user_context = cpu_to_le64(queue->user_context);
+
+	/* send command to mc */
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_queue() - Get queue parameters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - all queue types are supported
+ * @tc:		Traffic class, in range 0 to NUM_TCS - 1
+ * @index:	Selects the specific queue out of the set allocated for the
+ *		same TC. Value must be in range 0 to NUM_QUEUES - 1
+ * @queue:	Queue configuration structure
+ * @qid:	Queue identification
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_queue(struct fsl_mc_io *mc_io,
+		   u32 cmd_flags,
+		   u16 token,
+		   enum dpni_queue_type qtype,
+		   u8 tc,
+		   u8 index,
+		   struct dpni_queue *queue,
+		   struct dpni_queue_id *qid)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_queue *cmd_params;
+	struct dpni_rsp_get_queue *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_queue *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+
+	/* send command to mc */
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_queue *)cmd.params;
+	queue->destination.id = le32_to_cpu(rsp_params->dest_id);
+	queue->destination.priority = rsp_params->dest_prio;
+	queue->destination.type = dpni_get_field(rsp_params->flags,
+						 DEST_TYPE);
+	queue->flc.stash_control = dpni_get_field(rsp_params->flags,
+						  STASH_CTRL);
+	queue->destination.hold_active = dpni_get_field(rsp_params->flags,
+							HOLD_ACTIVE);
+	queue->flc.value = le64_to_cpu(rsp_params->flc);
+	queue->user_context = le64_to_cpu(rsp_params->user_context);
+	qid->fqid = le32_to_cpu(rsp_params->fqid);
+	qid->qdbin = le16_to_cpu(rsp_params->qdbin);
+
+	return 0;
+}
+
+/**
+ * dpni_get_statistics() - Get DPNI statistics
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @page:	Selects the statistics page to retrieve, see
+ *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 6.
+ * @stat:	Structure containing the statistics
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_statistics(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 page,
+			union dpni_statistics *stat)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_statistics *cmd_params;
+	struct dpni_rsp_get_statistics *rsp_params;
+	int i, err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_STATISTICS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_statistics *)cmd.params;
+	cmd_params->page_number = page;
+
+	/* send command to mc */
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_statistics *)cmd.params;
+	for (i = 0; i < DPNI_STATISTICS_CNT; i++)
+		stat->raw.counter[i] = le64_to_cpu(rsp_params->counter[i]);
+
+	return 0;
+}
+
+/**
+ * dpni_set_taildrop() - Set taildrop per queue or TC
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cg_point:	Congestion point
+ * @q_type:	Queue type on which the taildrop is configured.
+ *		Only Rx queues are supported for now
+ * @tc:		Traffic class to apply this taildrop to
+ * @q_index:	Index of the queue if the DPNI supports multiple queues for
+ *		traffic distribution. Ignored if CONGESTION_POINT is not 0.
+ * @taildrop:	Taildrop structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_taildrop(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      enum dpni_congestion_point cg_point,
+		      enum dpni_queue_type qtype,
+		      u8 tc,
+		      u8 index,
+		      struct dpni_taildrop *taildrop)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_set_taildrop *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TAILDROP,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_taildrop *)cmd.params;
+	cmd_params->congestion_point = cg_point;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+	dpni_set_field(cmd_params->enable, ENABLE, taildrop->enable);
+	cmd_params->units = taildrop->units;
+	cmd_params->threshold = cpu_to_le32(taildrop->threshold);
+
+	/* send command to mc */
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_taildrop() - Get taildrop information
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cg_point:	Congestion point
+ * @q_type:	Queue type on which the taildrop is configured.
+ *		Only Rx queues are supported for now
+ * @tc:		Traffic class to apply this taildrop to
+ * @q_index:	Index of the queue if the DPNI supports multiple queues for
+ *		traffic distribution. Ignored if CONGESTION_POINT is not 0.
+ * @taildrop:	Taildrop structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_taildrop(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      enum dpni_congestion_point cg_point,
+		      enum dpni_queue_type qtype,
+		      u8 tc,
+		      u8 index,
+		      struct dpni_taildrop *taildrop)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpni_cmd_get_taildrop *cmd_params;
+	struct dpni_rsp_get_taildrop *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TAILDROP,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_taildrop *)cmd.params;
+	cmd_params->congestion_point = cg_point;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+
+	/* send command to mc */
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_taildrop *)cmd.params;
+	taildrop->enable = dpni_get_field(rsp_params->enable, ENABLE);
+	taildrop->units = rsp_params->units;
+	taildrop->threshold = le32_to_cpu(rsp_params->threshold);
+
+	return 0;
+}
+
+/**
+ * dpni_get_api_version() - Get Data Path Network Interface API version
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @major_ver:	Major version of data path network interface API
+ * @minor_ver:	Minor version of data path network interface API
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver)
+{
+	struct dpni_rsp_get_api_version *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_API_VERSION,
+					  cmd_flags, 0);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpni_rsp_get_api_version *)cmd.params;
+	*major_ver = le16_to_cpu(rsp_params->major);
+	*minor_ver = le16_to_cpu(rsp_params->minor);
+
+	return 0;
+}
+
+/**
+ * dpni_set_rx_fs_dist() - Set Rx flow steering distribution
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg: Distribution configuration
+ *
+ * If the FS is already enabled with a previous call the classification
+ * key will be changed but all the table rules are kept. If the
+ * existing rules do not match the key the results will not be
+ * predictable. It is the user responsibility to keep key integrity.
+ * If cfg.enable is set to 1 the command will create a flow steering table
+ * and will classify packets according to this table. The packets that
+ * miss all the table rules will be classified according to settings
+ * made in dpni_set_rx_hash_dist()
+ * If cfg.enable is set to 0 the command will clear flow steering table.
+ * The packets will be classified according to settings made in
+ * dpni_set_rx_hash_dist()
+ */
+int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			const struct dpni_rx_dist_cfg *cfg)
+{
+	struct dpni_cmd_set_rx_fs_dist *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_FS_DIST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_rx_fs_dist *)cmd.params;
+	cmd_params->dist_size = cpu_to_le16(cfg->dist_size);
+	dpni_set_field(cmd_params->enable, RX_FS_DIST_ENABLE, cfg->enable);
+	cmd_params->tc = cfg->tc;
+	cmd_params->miss_flow_id = cpu_to_le16(cfg->fs_miss_flow_id);
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_set_rx_hash_dist() - Set Rx hash distribution
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg: Distribution configuration
+ * If cfg.enable is set to 1 the packets will be classified using a hash
+ * function based on the key received in cfg.key_cfg_iova parameter.
+ * If cfg.enable is set to 0 the packets will be sent to the default queue
+ */
+int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  const struct dpni_rx_dist_cfg *cfg)
+{
+	struct dpni_cmd_set_rx_hash_dist *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_HASH_DIST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_rx_hash_dist *)cmd.params;
+	cmd_params->dist_size = cpu_to_le16(cfg->dist_size);
+	dpni_set_field(cmd_params->enable, RX_HASH_DIST_ENABLE, cfg->enable);
+	cmd_params->tc = cfg->tc;
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_add_fs_entry() - Add Flow Steering entry for a specific traffic class
+ *			(to select a flow ID)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @tc_id:	Traffic class selection (0-7)
+ * @index:	Location in the FS table where to insert the entry.
+ *		Only relevant if MASKING is enabled for FS
+ *		classification on this DPNI, it is ignored for exact match.
+ * @cfg:	Flow steering rule to add
+ * @action:	Action to be taken as result of a classification hit
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_add_fs_entry(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 tc_id,
+		      u16 index,
+		      const struct dpni_rule_cfg *cfg,
+		      const struct dpni_fs_action_cfg *action)
+{
+	struct dpni_cmd_add_fs_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_FS_ENT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_add_fs_entry *)cmd.params;
+	cmd_params->tc_id = tc_id;
+	cmd_params->key_size = cfg->key_size;
+	cmd_params->index = cpu_to_le16(index);
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	cmd_params->mask_iova = cpu_to_le64(cfg->mask_iova);
+	cmd_params->options = cpu_to_le16(action->options);
+	cmd_params->flow_id = cpu_to_le16(action->flow_id);
+	cmd_params->flc = cpu_to_le64(action->flc);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_remove_fs_entry() - Remove Flow Steering entry from a specific
+ *			    traffic class
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	Flow steering rule to remove
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_remove_fs_entry(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 tc_id,
+			 const struct dpni_rule_cfg *cfg)
+{
+	struct dpni_cmd_remove_fs_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_FS_ENT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_remove_fs_entry *)cmd.params;
+	cmd_params->tc_id = tc_id;
+	cmd_params->key_size = cfg->key_size;
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	cmd_params->mask_iova = cpu_to_le64(cfg->mask_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
new file mode 100644
index 0000000..ee0711d
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h

@@ -0,0 +1,969 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ */
+#ifndef __FSL_DPNI_H
+#define __FSL_DPNI_H
+
+#include "dpkg.h"
+
+struct fsl_mc_io;
+
+/**
+ * Data Path Network Interface API
+ * Contains initialization APIs and runtime control APIs for DPNI
+ */
+
+/** General DPNI macros */
+
+/**
+ * Maximum number of traffic classes
+ */
+#define DPNI_MAX_TC				8
+/**
+ * Maximum number of buffer pools per DPNI
+ */
+#define DPNI_MAX_DPBP				8
+
+/**
+ * All traffic classes considered; see dpni_set_queue()
+ */
+#define DPNI_ALL_TCS				(u8)(-1)
+/**
+ * All flows within traffic class considered; see dpni_set_queue()
+ */
+#define DPNI_ALL_TC_FLOWS			(u16)(-1)
+/**
+ * Generate new flow ID; see dpni_set_queue()
+ */
+#define DPNI_NEW_FLOW_ID			(u16)(-1)
+
+/**
+ * Tx traffic is always released to a buffer pool on transmit, there are no
+ * resources allocated to have the frames confirmed back to the source after
+ * transmission.
+ */
+#define DPNI_OPT_TX_FRM_RELEASE			0x000001
+/**
+ * Disables support for MAC address filtering for addresses other than primary
+ * MAC address. This affects both unicast and multicast. Promiscuous mode can
+ * still be enabled/disabled for both unicast and multicast. If promiscuous mode
+ * is disabled, only traffic matching the primary MAC address will be accepted.
+ */
+#define DPNI_OPT_NO_MAC_FILTER			0x000002
+/**
+ * Allocate policers for this DPNI. They can be used to rate-limit traffic per
+ * traffic class (TC) basis.
+ */
+#define DPNI_OPT_HAS_POLICING			0x000004
+/**
+ * Congestion can be managed in several ways, allowing the buffer pool to
+ * deplete on ingress, taildrop on each queue or use congestion groups for sets
+ * of queues. If set, it configures a single congestion groups across all TCs.
+ * If reset, a congestion group is allocated for each TC. Only relevant if the
+ * DPNI has multiple traffic classes.
+ */
+#define DPNI_OPT_SHARED_CONGESTION		0x000008
+/**
+ * Enables TCAM for Flow Steering and QoS look-ups. If not specified, all
+ * look-ups are exact match. Note that TCAM is not available on LS1088 and its
+ * variants. Setting this bit on these SoCs will trigger an error.
+ */
+#define DPNI_OPT_HAS_KEY_MASKING		0x000010
+/**
+ * Disables the flow steering table.
+ */
+#define DPNI_OPT_NO_FS				0x000020
+
+int dpni_open(struct fsl_mc_io	*mc_io,
+	      u32		cmd_flags,
+	      int		dpni_id,
+	      u16		*token);
+
+int dpni_close(struct fsl_mc_io	*mc_io,
+	       u32		cmd_flags,
+	       u16		token);
+
+/**
+ * struct dpni_pools_cfg - Structure representing buffer pools configuration
+ * @num_dpbp: Number of DPBPs
+ * @pools: Array of buffer pools parameters; The number of valid entries
+ *	must match 'num_dpbp' value
+ * @pools.dpbp_id: DPBP object ID
+ * @pools.buffer_size: Buffer size
+ * @pools.backup_pool: Backup pool
+ */
+struct dpni_pools_cfg {
+	u8		num_dpbp;
+	struct {
+		int	dpbp_id;
+		u16	buffer_size;
+		int	backup_pool;
+	} pools[DPNI_MAX_DPBP];
+};
+
+int dpni_set_pools(struct fsl_mc_io		*mc_io,
+		   u32				cmd_flags,
+		   u16				token,
+		   const struct dpni_pools_cfg	*cfg);
+
+int dpni_enable(struct fsl_mc_io	*mc_io,
+		u32			cmd_flags,
+		u16			token);
+
+int dpni_disable(struct fsl_mc_io	*mc_io,
+		 u32			cmd_flags,
+		 u16			token);
+
+int dpni_is_enabled(struct fsl_mc_io	*mc_io,
+		    u32			cmd_flags,
+		    u16			token,
+		    int			*en);
+
+int dpni_reset(struct fsl_mc_io	*mc_io,
+	       u32		cmd_flags,
+	       u16		token);
+
+/**
+ * DPNI IRQ Index and Events
+ */
+
+/**
+ * IRQ index
+ */
+#define DPNI_IRQ_INDEX				0
+/**
+ * IRQ events:
+ *       indicates a change in link state
+ *       indicates a change in endpoint
+ */
+#define DPNI_IRQ_EVENT_LINK_CHANGED		0x00000001
+#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED		0x00000002
+
+int dpni_set_irq_enable(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			u8			irq_index,
+			u8			en);
+
+int dpni_get_irq_enable(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			u8			irq_index,
+			u8			*en);
+
+int dpni_set_irq_mask(struct fsl_mc_io	*mc_io,
+		      u32		cmd_flags,
+		      u16		token,
+		      u8		irq_index,
+		      u32		mask);
+
+int dpni_get_irq_mask(struct fsl_mc_io	*mc_io,
+		      u32		cmd_flags,
+		      u16		token,
+		      u8		irq_index,
+		      u32		*mask);
+
+int dpni_get_irq_status(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			u8			irq_index,
+			u32			*status);
+
+int dpni_clear_irq_status(struct fsl_mc_io	*mc_io,
+			  u32			cmd_flags,
+			  u16			token,
+			  u8			irq_index,
+			  u32			status);
+
+/**
+ * struct dpni_attr - Structure representing DPNI attributes
+ * @options: Any combination of the following options:
+ *		DPNI_OPT_TX_FRM_RELEASE
+ *		DPNI_OPT_NO_MAC_FILTER
+ *		DPNI_OPT_HAS_POLICING
+ *		DPNI_OPT_SHARED_CONGESTION
+ *		DPNI_OPT_HAS_KEY_MASKING
+ *		DPNI_OPT_NO_FS
+ * @num_queues: Number of Tx and Rx queues used for traffic distribution.
+ * @num_tcs: Number of traffic classes (TCs), reserved for the DPNI.
+ * @mac_filter_entries: Number of entries in the MAC address filtering table.
+ * @vlan_filter_entries: Number of entries in the VLAN address filtering table.
+ * @qos_entries: Number of entries in the QoS classification table.
+ * @fs_entries: Number of entries in the flow steering table.
+ * @qos_key_size: Size, in bytes, of the QoS look-up key. Defining a key larger
+ *		than this when adding QoS entries will result in an error.
+ * @fs_key_size: Size, in bytes, of the flow steering look-up key. Defining a
+ *		key larger than this when composing the hash + FS key will
+ *		result in an error.
+ * @wriop_version: Version of WRIOP HW block. The 3 version values are stored
+ *		on 6, 5, 5 bits respectively.
+ */
+struct dpni_attr {
+	u32 options;
+	u8 num_queues;
+	u8 num_tcs;
+	u8 mac_filter_entries;
+	u8 vlan_filter_entries;
+	u8 qos_entries;
+	u16 fs_entries;
+	u8 qos_key_size;
+	u8 fs_key_size;
+	u16 wriop_version;
+};
+
+int dpni_get_attributes(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			struct dpni_attr	*attr);
+
+/**
+ * DPNI errors
+ */
+
+/**
+ * Extract out of frame header error
+ */
+#define DPNI_ERROR_EOFHE	0x00020000
+/**
+ * Frame length error
+ */
+#define DPNI_ERROR_FLE		0x00002000
+/**
+ * Frame physical error
+ */
+#define DPNI_ERROR_FPE		0x00001000
+/**
+ * Parsing header error
+ */
+#define DPNI_ERROR_PHE		0x00000020
+/**
+ * Parser L3 checksum error
+ */
+#define DPNI_ERROR_L3CE		0x00000004
+/**
+ * Parser L3 checksum error
+ */
+#define DPNI_ERROR_L4CE		0x00000001
+
+/**
+ * enum dpni_error_action - Defines DPNI behavior for errors
+ * @DPNI_ERROR_ACTION_DISCARD: Discard the frame
+ * @DPNI_ERROR_ACTION_CONTINUE: Continue with the normal flow
+ * @DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE: Send the frame to the error queue
+ */
+enum dpni_error_action {
+	DPNI_ERROR_ACTION_DISCARD = 0,
+	DPNI_ERROR_ACTION_CONTINUE = 1,
+	DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE = 2
+};
+
+/**
+ * struct dpni_error_cfg - Structure representing DPNI errors treatment
+ * @errors: Errors mask; use 'DPNI_ERROR__<X>
+ * @error_action: The desired action for the errors mask
+ * @set_frame_annotation: Set to '1' to mark the errors in frame annotation
+ *		status (FAS); relevant only for the non-discard action
+ */
+struct dpni_error_cfg {
+	u32			errors;
+	enum dpni_error_action	error_action;
+	int			set_frame_annotation;
+};
+
+int dpni_set_errors_behavior(struct fsl_mc_io		*mc_io,
+			     u32			cmd_flags,
+			     u16			token,
+			     struct dpni_error_cfg	*cfg);
+
+/**
+ * DPNI buffer layout modification options
+ */
+
+/**
+ * Select to modify the time-stamp setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_TIMESTAMP		0x00000001
+/**
+ * Select to modify the parser-result setting; not applicable for Tx
+ */
+#define DPNI_BUF_LAYOUT_OPT_PARSER_RESULT	0x00000002
+/**
+ * Select to modify the frame-status setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_FRAME_STATUS	0x00000004
+/**
+ * Select to modify the private-data-size setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE	0x00000008
+/**
+ * Select to modify the data-alignment setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_DATA_ALIGN		0x00000010
+/**
+ * Select to modify the data-head-room setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM	0x00000020
+/**
+ * Select to modify the data-tail-room setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM	0x00000040
+
+/**
+ * struct dpni_buffer_layout - Structure representing DPNI buffer layout
+ * @options: Flags representing the suggested modifications to the buffer
+ *		layout; Use any combination of 'DPNI_BUF_LAYOUT_OPT_<X>' flags
+ * @pass_timestamp: Pass timestamp value
+ * @pass_parser_result: Pass parser results
+ * @pass_frame_status: Pass frame status
+ * @private_data_size: Size kept for private data (in bytes)
+ * @data_align: Data alignment
+ * @data_head_room: Data head room
+ * @data_tail_room: Data tail room
+ */
+struct dpni_buffer_layout {
+	u32	options;
+	int	pass_timestamp;
+	int	pass_parser_result;
+	int	pass_frame_status;
+	u16	private_data_size;
+	u16	data_align;
+	u16	data_head_room;
+	u16	data_tail_room;
+};
+
+/**
+ * enum dpni_queue_type - Identifies a type of queue targeted by the command
+ * @DPNI_QUEUE_RX: Rx queue
+ * @DPNI_QUEUE_TX: Tx queue
+ * @DPNI_QUEUE_TX_CONFIRM: Tx confirmation queue
+ * @DPNI_QUEUE_RX_ERR: Rx error queue
+ */enum dpni_queue_type {
+	DPNI_QUEUE_RX,
+	DPNI_QUEUE_TX,
+	DPNI_QUEUE_TX_CONFIRM,
+	DPNI_QUEUE_RX_ERR,
+};
+
+int dpni_get_buffer_layout(struct fsl_mc_io		*mc_io,
+			   u32				cmd_flags,
+			   u16				token,
+			   enum dpni_queue_type		qtype,
+			   struct dpni_buffer_layout	*layout);
+
+int dpni_set_buffer_layout(struct fsl_mc_io		   *mc_io,
+			   u32				   cmd_flags,
+			   u16				   token,
+			   enum dpni_queue_type		   qtype,
+			   const struct dpni_buffer_layout *layout);
+
+/**
+ * enum dpni_offload - Identifies a type of offload targeted by the command
+ * @DPNI_OFF_RX_L3_CSUM: Rx L3 checksum validation
+ * @DPNI_OFF_RX_L4_CSUM: Rx L4 checksum validation
+ * @DPNI_OFF_TX_L3_CSUM: Tx L3 checksum generation
+ * @DPNI_OFF_TX_L4_CSUM: Tx L4 checksum generation
+ */
+enum dpni_offload {
+	DPNI_OFF_RX_L3_CSUM,
+	DPNI_OFF_RX_L4_CSUM,
+	DPNI_OFF_TX_L3_CSUM,
+	DPNI_OFF_TX_L4_CSUM,
+};
+
+int dpni_set_offload(struct fsl_mc_io	*mc_io,
+		     u32		cmd_flags,
+		     u16		token,
+		     enum dpni_offload	type,
+		     u32		config);
+
+int dpni_get_offload(struct fsl_mc_io	*mc_io,
+		     u32		cmd_flags,
+		     u16		token,
+		     enum dpni_offload	type,
+		     u32		*config);
+
+int dpni_get_qdid(struct fsl_mc_io	*mc_io,
+		  u32			cmd_flags,
+		  u16			token,
+		  enum dpni_queue_type	qtype,
+		  u16			*qdid);
+
+int dpni_get_tx_data_offset(struct fsl_mc_io	*mc_io,
+			    u32			cmd_flags,
+			    u16			token,
+			    u16			*data_offset);
+
+#define DPNI_STATISTICS_CNT		7
+
+/**
+ * union dpni_statistics - Union describing the DPNI statistics
+ * @page_0: Page_0 statistics structure
+ * @page_0.ingress_all_frames: Ingress frame count
+ * @page_0.ingress_all_bytes: Ingress byte count
+ * @page_0.ingress_multicast_frames: Ingress multicast frame count
+ * @page_0.ingress_multicast_bytes: Ingress multicast byte count
+ * @page_0.ingress_broadcast_frames: Ingress broadcast frame count
+ * @page_0.ingress_broadcast_bytes: Ingress broadcast byte count
+ * @page_1: Page_1 statistics structure
+ * @page_1.egress_all_frames: Egress frame count
+ * @page_1.egress_all_bytes: Egress byte count
+ * @page_1.egress_multicast_frames: Egress multicast frame count
+ * @page_1.egress_multicast_bytes: Egress multicast byte count
+ * @page_1.egress_broadcast_frames: Egress broadcast frame count
+ * @page_1.egress_broadcast_bytes: Egress broadcast byte count
+ * @page_2: Page_2 statistics structure
+ * @page_2.ingress_filtered_frames: Ingress filtered frame count
+ * @page_2.ingress_discarded_frames: Ingress discarded frame count
+ * @page_2.ingress_nobuffer_discards: Ingress discarded frame count due to
+ *	lack of buffers
+ * @page_2.egress_discarded_frames: Egress discarded frame count
+ * @page_2.egress_confirmed_frames: Egress confirmed frame count
+ * @page3: Page_3 statistics structure
+ * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
+ *	dequeued from egress FQs
+ * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
+ *	dequeued from egress FQs
+ * @page_3.egress_reject_bytes: Cumulative count of the number of bytes in
+ *	egress frames whose enqueue was rejected
+ * @page_3.egress_reject_frames: Cumulative count of the number of egress
+ *	frames whose enqueue was rejected
+ * @page_4: Page_4 statistics structure: congestion points
+ * @page_4.cgr_reject_frames: number of rejected frames due to congestion point
+ * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point
+ * @page_5: Page_5 statistics structure: policer
+ * @page_5.policer_cnt_red: NUmber of red colored frames
+ * @page_5.policer_cnt_yellow: number of yellow colored frames
+ * @page_5.policer_cnt_green: number of green colored frames
+ * @page_5.policer_cnt_re_red: number of recolored red frames
+ * @page_5.policer_cnt_re_yellow: number of recolored yellow frames
+ * @page_6: Page_6 statistics structure
+ * @page_6.tx_pending_frames: total number of frames pending in egress FQs
+ * @raw: raw statistics structure, used to index counters
+ */
+union dpni_statistics {
+	struct {
+		u64 ingress_all_frames;
+		u64 ingress_all_bytes;
+		u64 ingress_multicast_frames;
+		u64 ingress_multicast_bytes;
+		u64 ingress_broadcast_frames;
+		u64 ingress_broadcast_bytes;
+	} page_0;
+	struct {
+		u64 egress_all_frames;
+		u64 egress_all_bytes;
+		u64 egress_multicast_frames;
+		u64 egress_multicast_bytes;
+		u64 egress_broadcast_frames;
+		u64 egress_broadcast_bytes;
+	} page_1;
+	struct {
+		u64 ingress_filtered_frames;
+		u64 ingress_discarded_frames;
+		u64 ingress_nobuffer_discards;
+		u64 egress_discarded_frames;
+		u64 egress_confirmed_frames;
+	} page_2;
+	struct {
+		u64 egress_dequeue_bytes;
+		u64 egress_dequeue_frames;
+		u64 egress_reject_bytes;
+		u64 egress_reject_frames;
+	} page_3;
+	struct {
+		u64 cgr_reject_frames;
+		u64 cgr_reject_bytes;
+	} page_4;
+	struct {
+		u64 policer_cnt_red;
+		u64 policer_cnt_yellow;
+		u64 policer_cnt_green;
+		u64 policer_cnt_re_red;
+		u64 policer_cnt_re_yellow;
+	} page_5;
+	struct {
+		u64 tx_pending_frames;
+	} page_6;
+	struct {
+		u64 counter[DPNI_STATISTICS_CNT];
+	} raw;
+};
+
+int dpni_get_statistics(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			u8			page,
+			union dpni_statistics	*stat);
+
+/**
+ * Enable auto-negotiation
+ */
+#define DPNI_LINK_OPT_AUTONEG		0x0000000000000001ULL
+/**
+ * Enable half-duplex mode
+ */
+#define DPNI_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
+/**
+ * Enable pause frames
+ */
+#define DPNI_LINK_OPT_PAUSE		0x0000000000000004ULL
+/**
+ * Enable a-symmetric pause frames
+ */
+#define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+
+/**
+ * struct - Structure representing DPNI link configuration
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
+ */
+struct dpni_link_cfg {
+	u32 rate;
+	u64 options;
+};
+
+int dpni_set_link_cfg(struct fsl_mc_io			*mc_io,
+		      u32				cmd_flags,
+		      u16				token,
+		      const struct dpni_link_cfg	*cfg);
+
+int dpni_get_link_cfg(struct fsl_mc_io			*mc_io,
+		      u32				cmd_flags,
+		      u16				token,
+		      struct dpni_link_cfg		*cfg);
+
+/**
+ * struct dpni_link_state - Structure representing DPNI link state
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
+ * @up: Link state; '0' for down, '1' for up
+ */
+struct dpni_link_state {
+	u32	rate;
+	u64	options;
+	int	up;
+};
+
+int dpni_get_link_state(struct fsl_mc_io	*mc_io,
+			u32			cmd_flags,
+			u16			token,
+			struct dpni_link_state	*state);
+
+int dpni_set_max_frame_length(struct fsl_mc_io	*mc_io,
+			      u32		cmd_flags,
+			      u16		token,
+			      u16		max_frame_length);
+
+int dpni_get_max_frame_length(struct fsl_mc_io	*mc_io,
+			      u32		cmd_flags,
+			      u16		token,
+			      u16		*max_frame_length);
+
+int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io,
+			       u32		cmd_flags,
+			       u16		token,
+			       int		en);
+
+int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io,
+			       u32		cmd_flags,
+			       u16		token,
+			       int		*en);
+
+int dpni_set_unicast_promisc(struct fsl_mc_io	*mc_io,
+			     u32		cmd_flags,
+			     u16		token,
+			     int		en);
+
+int dpni_get_unicast_promisc(struct fsl_mc_io	*mc_io,
+			     u32		cmd_flags,
+			     u16		token,
+			     int		*en);
+
+int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io,
+			      u32		cmd_flags,
+			      u16		token,
+			      const u8		mac_addr[6]);
+
+int dpni_get_primary_mac_addr(struct fsl_mc_io	*mc_io,
+			      u32		cmd_flags,
+			      u16		token,
+			      u8		mac_addr[6]);
+
+int dpni_get_port_mac_addr(struct fsl_mc_io	*mc_io,
+			   u32			cm_flags,
+			   u16			token,
+			   u8			mac_addr[6]);
+
+int dpni_add_mac_addr(struct fsl_mc_io	*mc_io,
+		      u32		cmd_flags,
+		      u16		token,
+		      const u8		mac_addr[6]);
+
+int dpni_remove_mac_addr(struct fsl_mc_io	*mc_io,
+			 u32			cmd_flags,
+			 u16			token,
+			 const u8		mac_addr[6]);
+
+int dpni_clear_mac_filters(struct fsl_mc_io	*mc_io,
+			   u32			cmd_flags,
+			   u16			token,
+			   int			unicast,
+			   int			multicast);
+
+/**
+ * enum dpni_dist_mode - DPNI distribution mode
+ * @DPNI_DIST_MODE_NONE: No distribution
+ * @DPNI_DIST_MODE_HASH: Use hash distribution; only relevant if
+ *		the 'DPNI_OPT_DIST_HASH' option was set at DPNI creation
+ * @DPNI_DIST_MODE_FS:  Use explicit flow steering; only relevant if
+ *	 the 'DPNI_OPT_DIST_FS' option was set at DPNI creation
+ */
+enum dpni_dist_mode {
+	DPNI_DIST_MODE_NONE = 0,
+	DPNI_DIST_MODE_HASH = 1,
+	DPNI_DIST_MODE_FS = 2
+};
+
+/**
+ * enum dpni_fs_miss_action -   DPNI Flow Steering miss action
+ * @DPNI_FS_MISS_DROP: In case of no-match, drop the frame
+ * @DPNI_FS_MISS_EXPLICIT_FLOWID: In case of no-match, use explicit flow-id
+ * @DPNI_FS_MISS_HASH: In case of no-match, distribute using hash
+ */
+enum dpni_fs_miss_action {
+	DPNI_FS_MISS_DROP = 0,
+	DPNI_FS_MISS_EXPLICIT_FLOWID = 1,
+	DPNI_FS_MISS_HASH = 2
+};
+
+/**
+ * struct dpni_fs_tbl_cfg - Flow Steering table configuration
+ * @miss_action: Miss action selection
+ * @default_flow_id: Used when 'miss_action = DPNI_FS_MISS_EXPLICIT_FLOWID'
+ */
+struct dpni_fs_tbl_cfg {
+	enum dpni_fs_miss_action	miss_action;
+	u16				default_flow_id;
+};
+
+int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg,
+			 u8 *key_cfg_buf);
+
+/**
+ * struct dpni_rx_tc_dist_cfg - Rx traffic class distribution configuration
+ * @dist_size: Set the distribution size;
+ *	supported values: 1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,96,
+ *	112,128,192,224,256,384,448,512,768,896,1024
+ * @dist_mode: Distribution mode
+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with
+ *		the extractions to be used for the distribution key by calling
+ *		dpni_prepare_key_cfg() relevant only when
+ *		'dist_mode != DPNI_DIST_MODE_NONE', otherwise it can be '0'
+ * @fs_cfg: Flow Steering table configuration; only relevant if
+ *		'dist_mode = DPNI_DIST_MODE_FS'
+ */
+struct dpni_rx_tc_dist_cfg {
+	u16			dist_size;
+	enum dpni_dist_mode	dist_mode;
+	u64			key_cfg_iova;
+	struct dpni_fs_tbl_cfg	fs_cfg;
+};
+
+int dpni_set_rx_tc_dist(struct fsl_mc_io			*mc_io,
+			u32					cmd_flags,
+			u16					token,
+			u8					tc_id,
+			const struct dpni_rx_tc_dist_cfg	*cfg);
+
+/**
+ * When used for fs_miss_flow_id in function dpni_set_rx_dist,
+ * will signal to dpni to drop all unclassified frames
+ */
+#define DPNI_FS_MISS_DROP		((uint16_t)-1)
+
+/**
+ * struct dpni_rx_dist_cfg - Rx distribution configuration
+ * @dist_size:	distribution size
+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with
+ *		the extractions to be used for the distribution key by calling
+ *		dpni_prepare_key_cfg(); relevant only when enable!=0 otherwise
+ *		it can be '0'
+ * @enable: enable/disable the distribution.
+ * @tc: TC id for which distribution is set
+ * @fs_miss_flow_id: when packet misses all rules from flow steering table and
+ *		hash is disabled it will be put into this queue id; use
+ *		DPNI_FS_MISS_DROP to drop frames. The value of this field is
+ *		used only when flow steering distribution is enabled and hash
+ *		distribution is disabled
+ */
+struct dpni_rx_dist_cfg {
+	u16 dist_size;
+	u64 key_cfg_iova;
+	u8 enable;
+	u8 tc;
+	u16 fs_miss_flow_id;
+};
+
+int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			const struct dpni_rx_dist_cfg *cfg);
+
+int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  const struct dpni_rx_dist_cfg *cfg);
+
+/**
+ * enum dpni_dest - DPNI destination types
+ * @DPNI_DEST_NONE: Unassigned destination; The queue is set in parked mode and
+ *		does not generate FQDAN notifications; user is expected to
+ *		dequeue from the queue based on polling or other user-defined
+ *		method
+ * @DPNI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN
+ *		notifications to the specified DPIO; user is expected to dequeue
+ *		from the queue only after notification is received
+ * @DPNI_DEST_DPCON: The queue is set in schedule mode and does not generate
+ *		FQDAN notifications, but is connected to the specified DPCON
+ *		object; user is expected to dequeue from the DPCON channel
+ */
+enum dpni_dest {
+	DPNI_DEST_NONE = 0,
+	DPNI_DEST_DPIO = 1,
+	DPNI_DEST_DPCON = 2
+};
+
+/**
+ * struct dpni_queue - Queue structure
+ * @destination - Destination structure
+ * @destination.id: ID of the destination, only relevant if DEST_TYPE is > 0.
+ *	Identifies either a DPIO or a DPCON object.
+ *	Not relevant for Tx queues.
+ * @destination.type:	May be one of the following:
+ *	0 - No destination, queue can be manually
+ *		queried, but will not push traffic or
+ *		notifications to a DPIO;
+ *	1 - The destination is a DPIO. When traffic
+ *		becomes available in the queue a FQDAN
+ *		(FQ data available notification) will be
+ *		generated to selected DPIO;
+ *	2 - The destination is a DPCON. The queue is
+ *		associated with a DPCON object for the
+ *		purpose of scheduling between multiple
+ *		queues. The DPCON may be independently
+ *		configured to generate notifications.
+ *		Not relevant for Tx queues.
+ * @destination.hold_active: Hold active, maintains a queue scheduled for longer
+ *	in a DPIO during dequeue to reduce spread of traffic.
+ *	Only relevant if queues are
+ *	not affined to a single DPIO.
+ * @user_context: User data, presented to the user along with any frames
+ *	from this queue. Not relevant for Tx queues.
+ * @flc: FD FLow Context structure
+ * @flc.value: Default FLC value for traffic dequeued from
+ *      this queue.  Please check description of FD
+ *      structure for more information.
+ *      Note that FLC values set using dpni_add_fs_entry,
+ *      if any, take precedence over values per queue.
+ * @flc.stash_control: Boolean, indicates whether the 6 lowest
+ *      - significant bits are used for stash control.
+ *      significant bits are used for stash control.  If set, the 6
+ *      least significant bits in value are interpreted as follows:
+ *      - bits 0-1: indicates the number of 64 byte units of context
+ *      that are stashed.  FLC value is interpreted as a memory address
+ *      in this case, excluding the 6 LS bits.
+ *      - bits 2-3: indicates the number of 64 byte units of frame
+ *      annotation to be stashed.  Annotation is placed at FD[ADDR].
+ *      - bits 4-5: indicates the number of 64 byte units of frame
+ *      data to be stashed.  Frame data is placed at FD[ADDR] +
+ *      FD[OFFSET].
+ *      For more details check the Frame Descriptor section in the
+ *      hardware documentation.
+ */
+struct dpni_queue {
+	struct {
+		u16 id;
+		enum dpni_dest type;
+		char hold_active;
+		u8 priority;
+	} destination;
+	u64 user_context;
+	struct {
+		u64 value;
+		char stash_control;
+	} flc;
+};
+
+/**
+ * struct dpni_queue_id - Queue identification, used for enqueue commands
+ *			or queue control
+ * @fqid: FQID used for enqueueing to and/or configuration of this specific FQ
+ * @qdbin: Queueing bin, used to enqueue using QDID, DQBIN, QPRI. Only relevant
+ *		for Tx queues.
+ */
+struct dpni_queue_id {
+	u32 fqid;
+	u16 qdbin;
+};
+
+/**
+ * Set User Context
+ */
+#define DPNI_QUEUE_OPT_USER_CTX		0x00000001
+#define DPNI_QUEUE_OPT_DEST		0x00000002
+#define DPNI_QUEUE_OPT_FLC		0x00000004
+#define DPNI_QUEUE_OPT_HOLD_ACTIVE	0x00000008
+
+int dpni_set_queue(struct fsl_mc_io	*mc_io,
+		   u32			cmd_flags,
+		   u16			token,
+		   enum dpni_queue_type	qtype,
+		   u8			tc,
+		   u8			index,
+		   u8			options,
+		   const struct dpni_queue *queue);
+
+int dpni_get_queue(struct fsl_mc_io	*mc_io,
+		   u32			cmd_flags,
+		   u16			token,
+		   enum dpni_queue_type	qtype,
+		   u8			tc,
+		   u8			index,
+		   struct dpni_queue	*queue,
+		   struct dpni_queue_id	*qid);
+
+/**
+ * enum dpni_congestion_unit - DPNI congestion units
+ * @DPNI_CONGESTION_UNIT_BYTES: bytes units
+ * @DPNI_CONGESTION_UNIT_FRAMES: frames units
+ */
+enum dpni_congestion_unit {
+	DPNI_CONGESTION_UNIT_BYTES = 0,
+	DPNI_CONGESTION_UNIT_FRAMES
+};
+
+/**
+ * enum dpni_congestion_point - Structure representing congestion point
+ * @DPNI_CP_QUEUE: Set taildrop per queue, identified by QUEUE_TYPE, TC and
+ *		QUEUE_INDEX
+ * @DPNI_CP_GROUP: Set taildrop per queue group. Depending on options used to
+ *		define the DPNI this can be either per TC (default) or per
+ *		interface (DPNI_OPT_SHARED_CONGESTION set at DPNI create).
+ *		QUEUE_INDEX is ignored if this type is used.
+ */
+enum dpni_congestion_point {
+	DPNI_CP_QUEUE,
+	DPNI_CP_GROUP,
+};
+
+/**
+ * struct dpni_taildrop - Structure representing the taildrop
+ * @enable:	Indicates whether the taildrop is active or not.
+ * @units:	Indicates the unit of THRESHOLD. Queue taildrop only supports
+ *		byte units, this field is ignored and assumed = 0 if
+ *		CONGESTION_POINT is 0.
+ * @threshold:	Threshold value, in units identified by UNITS field. Value 0
+ *		cannot be used as a valid taildrop threshold, THRESHOLD must
+ *		be > 0 if the taildrop is enabled.
+ */
+struct dpni_taildrop {
+	char enable;
+	enum dpni_congestion_unit units;
+	u32 threshold;
+};
+
+int dpni_set_taildrop(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      enum dpni_congestion_point cg_point,
+		      enum dpni_queue_type q_type,
+		      u8 tc,
+		      u8 q_index,
+		      struct dpni_taildrop *taildrop);
+
+int dpni_get_taildrop(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      enum dpni_congestion_point cg_point,
+		      enum dpni_queue_type q_type,
+		      u8 tc,
+		      u8 q_index,
+		      struct dpni_taildrop *taildrop);
+
+/**
+ * struct dpni_rule_cfg - Rule configuration for table lookup
+ * @key_iova: I/O virtual address of the key (must be in DMA-able memory)
+ * @mask_iova: I/O virtual address of the mask (must be in DMA-able memory)
+ * @key_size: key and mask size (in bytes)
+ */
+struct dpni_rule_cfg {
+	u64	key_iova;
+	u64	mask_iova;
+	u8	key_size;
+};
+
+/**
+ * Discard matching traffic. If set, this takes precedence over any other
+ * configuration and matching traffic is always discarded.
+ */
+ #define DPNI_FS_OPT_DISCARD            0x1
+
+/**
+ * Set FLC value. If set, flc member of struct dpni_fs_action_cfg is used to
+ * override the FLC value set per queue.
+ * For more details check the Frame Descriptor section in the hardware
+ * documentation.
+ */
+#define DPNI_FS_OPT_SET_FLC            0x2
+
+/**
+ * Indicates whether the 6 lowest significant bits of FLC are used for stash
+ * control. If set, the 6 least significant bits in value are interpreted as
+ * follows:
+ *     - bits 0-1: indicates the number of 64 byte units of context that are
+ *     stashed. FLC value is interpreted as a memory address in this case,
+ *     excluding the 6 LS bits.
+ *     - bits 2-3: indicates the number of 64 byte units of frame annotation
+ *     to be stashed. Annotation is placed at FD[ADDR].
+ *     - bits 4-5: indicates the number of 64 byte units of frame data to be
+ *     stashed. Frame data is placed at FD[ADDR] + FD[OFFSET].
+ * This flag is ignored if DPNI_FS_OPT_SET_FLC is not specified.
+ */
+#define DPNI_FS_OPT_SET_STASH_CONTROL  0x4
+
+/**
+ * struct dpni_fs_action_cfg - Action configuration for table look-up
+ * @flc:	FLC value for traffic matching this rule. Please check the
+ *		Frame Descriptor section in the hardware documentation for
+ *		more information.
+ * @flow_id:	Identifies the Rx queue used for matching traffic. Supported
+ *		values are in range 0 to num_queue-1.
+ * @options:	Any combination of DPNI_FS_OPT_ values.
+ */
+struct dpni_fs_action_cfg {
+	u64 flc;
+	u16 flow_id;
+	u16 options;
+};
+
+int dpni_add_fs_entry(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 tc_id,
+		      u16 index,
+		      const struct dpni_rule_cfg *cfg,
+		      const struct dpni_fs_action_cfg *action);
+
+int dpni_remove_fs_entry(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 tc_id,
+			 const struct dpni_rule_cfg *cfg);
+
+int dpni_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver);
+
+#endif /* __FSL_DPNI_H */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
new file mode 100644
index 0000000..4ac05bf
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h

@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2018 NXP
+ */
+
+#ifndef _FSL_DPRTC_CMD_H
+#define _FSL_DPRTC_CMD_H
+
+/* Command versioning */
+#define DPRTC_CMD_BASE_VERSION		1
+#define DPRTC_CMD_ID_OFFSET		4
+
+#define DPRTC_CMD(id)	(((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION)
+
+/* Command IDs */
+#define DPRTC_CMDID_CLOSE			DPRTC_CMD(0x800)
+#define DPRTC_CMDID_OPEN			DPRTC_CMD(0x810)
+
+#define DPRTC_CMDID_SET_IRQ_ENABLE		DPRTC_CMD(0x012)
+#define DPRTC_CMDID_GET_IRQ_ENABLE		DPRTC_CMD(0x013)
+#define DPRTC_CMDID_SET_IRQ_MASK		DPRTC_CMD(0x014)
+#define DPRTC_CMDID_GET_IRQ_MASK		DPRTC_CMD(0x015)
+#define DPRTC_CMDID_GET_IRQ_STATUS		DPRTC_CMD(0x016)
+#define DPRTC_CMDID_CLEAR_IRQ_STATUS		DPRTC_CMD(0x017)
+
+#pragma pack(push, 1)
+struct dprtc_cmd_open {
+	__le32 dprtc_id;
+};
+
+struct dprtc_cmd_get_irq {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dprtc_cmd_set_irq_enable {
+	u8 en;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_enable {
+	u8 en;
+};
+
+struct dprtc_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dprtc_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dprtc_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dprtc_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+#pragma pack(pop)
+
+#endif /* _FSL_DPRTC_CMD_H */

diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.c b/drivers/net/ethernet/freescale/dpaa2/dprtc.c
new file mode 100644
index 0000000..ed52a34
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.c

@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2018 NXP
+ */
+
+#include <linux/fsl/mc.h>
+
+#include "dprtc.h"
+#include "dprtc-cmd.h"
+
+/**
+ * dprtc_open() - Open a control session for the specified object.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dprtc_id:	DPRTC unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dprtc_create function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_open(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       int dprtc_id,
+	       u16 *token)
+{
+	struct dprtc_cmd_open *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	cmd_params = (struct dprtc_cmd_open *)cmd.params;
+	cmd_params->dprtc_id = cpu_to_le32(dprtc_id);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dprtc_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_close(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLOSE, cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dprtc_set_irq_enable() - Set overall interrupt state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Interrupt state - enable = 1, disable = 0
+ *
+ * Allows GPP software to control when interrupts are generated.
+ * Each interrupt can have up to 32 causes.  The enable/disable control's the
+ * overall interrupt state. if the interrupt is disabled no causes will cause
+ * an interrupt.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 en)
+{
+	struct dprtc_cmd_set_irq_enable *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_set_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->en = en;
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dprtc_get_irq_enable() - Get overall interrupt state
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Returned interrupt state - enable = 1, disable = 0
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 *en)
+{
+	struct dprtc_rsp_get_irq_enable *rsp_params;
+	struct dprtc_cmd_get_irq *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dprtc_rsp_get_irq_enable *)cmd.params;
+	*en = rsp_params->en;
+
+	return 0;
+}
+
+/**
+ * dprtc_set_irq_mask() - Set interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Event mask to trigger interrupt;
+ *		each bit:
+ *			0 = ignore event
+ *			1 = consider event for asserting IRQ
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 mask)
+{
+	struct dprtc_cmd_set_irq_mask *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dprtc_get_irq_mask() - Get interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Returned event mask to trigger interrupt
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 *mask)
+{
+	struct dprtc_rsp_get_irq_mask *rsp_params;
+	struct dprtc_cmd_get_irq *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_get_irq *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dprtc_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
+
+	return 0;
+}
+
+/**
+ * dprtc_get_irq_status() - Get the current status of any pending interrupts.
+ *
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @status:	Returned interrupts status - one bit per cause:
+ *			0 = no interrupt pending
+ *			1 = interrupt pending
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_get_irq_status(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u32 *status)
+{
+	struct dprtc_cmd_get_irq_status *cmd_params;
+	struct dprtc_rsp_get_irq_status *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dprtc_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
+	return 0;
+}
+
+/**
+ * dprtc_clear_irq_status() - Clear a pending interrupt's status
+ *
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPRTC object
+ * @irq_index:	The interrupt index to configure
+ * @status:	Bits to clear (W1C) - one bit per cause:
+ *			0 = don't change
+ *			1 = clear status bit
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u8 irq_index,
+			   u32 status)
+{
+	struct dprtc_cmd_clear_irq_status *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLEAR_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dprtc_cmd_clear_irq_status *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->status = cpu_to_le32(status);
+
+	return mc_send_command(mc_io, &cmd);
+}

diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
new file mode 100644
index 0000000..311c184
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h

@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2018 NXP
+ */
+
+#ifndef __FSL_DPRTC_H
+#define __FSL_DPRTC_H
+
+/* Data Path Real Time Counter API
+ * Contains initialization APIs and runtime control APIs for RTC
+ */
+
+struct fsl_mc_io;
+
+/**
+ * Number of irq's
+ */
+#define DPRTC_MAX_IRQ_NUM	1
+#define DPRTC_IRQ_INDEX		0
+
+#define DPRTC_EVENT_PPS		0x08000000
+
+int dprtc_open(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       int dprtc_id,
+	       u16 *token);
+
+int dprtc_close(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token);
+
+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 en);
+
+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u8 *en);
+
+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 mask);
+
+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       u8 irq_index,
+		       u32 *mask);
+
+int dprtc_get_irq_status(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u8 irq_index,
+			 u32 *status);
+
+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u8 irq_index,
+			   u32 status);
+
+#endif /* __FSL_DPRTC_H */

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
new file mode 100644
index 0000000..c219587
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig

@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+config FSL_ENETC
+	tristate "ENETC PF driver"
+	depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	select PHYLIB
+	help
+	  This driver supports NXP ENETC gigabit ethernet controller PCIe
+	  physical function (PF) devices, managing ENETC Ports at a privileged
+	  level.
+
+	  If compiled as module (M), the module name is fsl-enetc.
+
+config FSL_ENETC_VF
+	tristate "ENETC VF driver"
+	depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	select PHYLIB
+	help
+	  This driver supports NXP ENETC gigabit ethernet controller PCIe
+	  virtual function (VF) devices enabled by the ENETC PF driver.
+
+	  If compiled as module (M), the module name is fsl-enetc-vf.
+
+config FSL_ENETC_MDIO
+	tristate "ENETC MDIO driver"
+	depends on PCI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	help
+	  This driver supports NXP ENETC Central MDIO controller as a PCIe
+	  physical function (PF) device.
+
+	  If compiled as module (M), the module name is fsl-enetc-mdio.
+
+config FSL_ENETC_PTP_CLOCK
+	tristate "ENETC PTP clock driver"
+	depends on PTP_1588_CLOCK_QORIQ && (FSL_ENETC || FSL_ENETC_VF)
+	default y
+	help
+	  This driver adds support for using the ENETC 1588 timer
+	  as a PTP clock. This clock is only useful if your PTP
+	  programs are getting hardware time stamps on the PTP Ethernet
+	  packets using the SO_TIMESTAMPING API.
+
+	  If compiled as module (M), the module name is fsl-enetc-ptp.
+
+config FSL_ENETC_HW_TIMESTAMPING
+	bool "ENETC hardware timestamping support"
+	depends on FSL_ENETC || FSL_ENETC_VF
+	help
+	  Enable hardware timestamping support on the Ethernet packets
+	  using the SO_TIMESTAMPING API. Because the RX BD ring dynamic
+	  allocation has not been supported and it is too expensive to use
+	  extended RX BDs if timestamping is not used, this option enables
+	  extended RX BDs in order to support hardware timestamping.

diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
new file mode 100644
index 0000000..d200c27
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/Makefile

@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+common-objs := enetc.o enetc_cbdr.o enetc_ethtool.o
+
+obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o
+fsl-enetc-y := enetc_pf.o enetc_mdio.o $(common-objs)
+fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o
+
+obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
+fsl-enetc-vf-y := enetc_vf.o $(common-objs)
+
+obj-$(CONFIG_FSL_ENETC_MDIO) += fsl-enetc-mdio.o
+fsl-enetc-mdio-y := enetc_pci_mdio.o enetc_mdio.o
+
+obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o
+fsl-enetc-ptp-y := enetc_ptp.o

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
new file mode 100644
index 0000000..b6ff893
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c

@@ -0,0 +1,1812 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include "enetc.h"
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/of_mdio.h>
+#include <linux/vmalloc.h>
+
+/* ENETC overhead: optional extension BD + 1 BD gap */
+#define ENETC_TXBDS_NEEDED(val)	((val) + 2)
+/* max # of chained Tx BDs is 15, including head and extension BD */
+#define ENETC_MAX_SKB_FRAGS	13
+#define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
+
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads);
+
+netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_bdr *tx_ring;
+	int count;
+
+	tx_ring = priv->tx_ring[skb->queue_mapping];
+
+	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
+		if (unlikely(skb_linearize(skb)))
+			goto drop_packet_err;
+
+	count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
+		netif_stop_subqueue(ndev, tx_ring->index);
+		return NETDEV_TX_BUSY;
+	}
+
+	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+	if (unlikely(!count))
+		goto drop_packet_err;
+
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED)
+		netif_stop_subqueue(ndev, tx_ring->index);
+
+	return NETDEV_TX_OK;
+
+drop_packet_err:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static bool enetc_tx_csum(struct sk_buff *skb, union enetc_tx_bd *txbd)
+{
+	int l3_start, l3_hsize;
+	u16 l3_flags, l4_flags;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return false;
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		l4_flags = ENETC_TXBD_L4_TCP;
+		break;
+	case offsetof(struct udphdr, check):
+		l4_flags = ENETC_TXBD_L4_UDP;
+		break;
+	default:
+		skb_checksum_help(skb);
+		return false;
+	}
+
+	l3_start = skb_network_offset(skb);
+	l3_hsize = skb_network_header_len(skb);
+
+	l3_flags = 0;
+	if (skb->protocol == htons(ETH_P_IPV6))
+		l3_flags = ENETC_TXBD_L3_IPV6;
+
+	/* write BD fields */
+	txbd->l3_csoff = enetc_txbd_l3_csoff(l3_start, l3_hsize, l3_flags);
+	txbd->l4_csoff = l4_flags;
+
+	return true;
+}
+
+static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
+				struct enetc_tx_swbd *tx_swbd)
+{
+	if (tx_swbd->is_dma_page)
+		dma_unmap_page(tx_ring->dev, tx_swbd->dma,
+			       tx_swbd->len, DMA_TO_DEVICE);
+	else
+		dma_unmap_single(tx_ring->dev, tx_swbd->dma,
+				 tx_swbd->len, DMA_TO_DEVICE);
+	tx_swbd->dma = 0;
+}
+
+static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
+			      struct enetc_tx_swbd *tx_swbd)
+{
+	if (tx_swbd->dma)
+		enetc_unmap_tx_buff(tx_ring, tx_swbd);
+
+	if (tx_swbd->skb) {
+		dev_kfree_skb_any(tx_swbd->skb);
+		tx_swbd->skb = NULL;
+	}
+}
+
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads)
+{
+	struct enetc_tx_swbd *tx_swbd;
+	skb_frag_t *frag;
+	int len = skb_headlen(skb);
+	union enetc_tx_bd temp_bd;
+	union enetc_tx_bd *txbd;
+	bool do_vlan, do_tstamp;
+	int i, count = 0;
+	unsigned int f;
+	dma_addr_t dma;
+	u8 flags = 0;
+
+	i = tx_ring->next_to_use;
+	txbd = ENETC_TXBD(*tx_ring, i);
+	prefetchw(txbd);
+
+	dma = dma_map_single(tx_ring->dev, skb->data, len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+		goto dma_err;
+
+	temp_bd.addr = cpu_to_le64(dma);
+	temp_bd.buf_len = cpu_to_le16(len);
+	temp_bd.lstatus = 0;
+
+	tx_swbd = &tx_ring->tx_swbd[i];
+	tx_swbd->dma = dma;
+	tx_swbd->len = len;
+	tx_swbd->is_dma_page = 0;
+	count++;
+
+	do_vlan = skb_vlan_tag_present(skb);
+	do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) &&
+		    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
+	tx_swbd->do_tstamp = do_tstamp;
+	tx_swbd->check_wb = tx_swbd->do_tstamp;
+
+	if (do_vlan || do_tstamp)
+		flags |= ENETC_TXBD_FLAGS_EX;
+
+	if (enetc_tx_csum(skb, &temp_bd))
+		flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS;
+
+	/* first BD needs frm_len and offload flags set */
+	temp_bd.frm_len = cpu_to_le16(skb->len);
+	temp_bd.flags = flags;
+
+	if (flags & ENETC_TXBD_FLAGS_EX) {
+		u8 e_flags = 0;
+		*txbd = temp_bd;
+		enetc_clear_tx_bd(&temp_bd);
+
+		/* add extension BD for VLAN and/or timestamping */
+		flags = 0;
+		tx_swbd++;
+		txbd++;
+		i++;
+		if (unlikely(i == tx_ring->bd_count)) {
+			i = 0;
+			tx_swbd = tx_ring->tx_swbd;
+			txbd = ENETC_TXBD(*tx_ring, 0);
+		}
+		prefetchw(txbd);
+
+		if (do_vlan) {
+			temp_bd.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
+			temp_bd.ext.tpid = 0; /* < C-TAG */
+			e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS;
+		}
+
+		if (do_tstamp) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP;
+		}
+
+		temp_bd.ext.e_flags = e_flags;
+		count++;
+	}
+
+	frag = &skb_shinfo(skb)->frags[0];
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) {
+		len = skb_frag_size(frag);
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_err;
+
+		*txbd = temp_bd;
+		enetc_clear_tx_bd(&temp_bd);
+
+		flags = 0;
+		tx_swbd++;
+		txbd++;
+		i++;
+		if (unlikely(i == tx_ring->bd_count)) {
+			i = 0;
+			tx_swbd = tx_ring->tx_swbd;
+			txbd = ENETC_TXBD(*tx_ring, 0);
+		}
+		prefetchw(txbd);
+
+		temp_bd.addr = cpu_to_le64(dma);
+		temp_bd.buf_len = cpu_to_le16(len);
+
+		tx_swbd->dma = dma;
+		tx_swbd->len = len;
+		tx_swbd->is_dma_page = 1;
+		count++;
+	}
+
+	/* last BD needs 'F' bit set */
+	flags |= ENETC_TXBD_FLAGS_F;
+	temp_bd.flags = flags;
+	*txbd = temp_bd;
+
+	tx_ring->tx_swbd[i].skb = skb;
+
+	enetc_bdr_idx_inc(tx_ring, &i);
+	tx_ring->next_to_use = i;
+
+	/* let H/W know BD ring has been updated */
+	enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */
+
+	return count;
+
+dma_err:
+	dev_err(tx_ring->dev, "DMA map error");
+
+	do {
+		tx_swbd = &tx_ring->tx_swbd[i];
+		enetc_free_tx_skb(tx_ring, tx_swbd);
+		if (i == 0)
+			i = tx_ring->bd_count;
+		i--;
+	} while (count--);
+
+	return 0;
+}
+
+static irqreturn_t enetc_msix(int irq, void *data)
+{
+	struct enetc_int_vector	*v = data;
+	int i;
+
+	/* disable interrupts */
+	enetc_wr_reg(v->rbier, 0);
+
+	for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings)
+		enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0);
+
+	napi_schedule_irqoff(&v->napi);
+
+	return IRQ_HANDLED;
+}
+
+static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget);
+static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
+			       struct napi_struct *napi, int work_limit);
+
+static int enetc_poll(struct napi_struct *napi, int budget)
+{
+	struct enetc_int_vector
+		*v = container_of(napi, struct enetc_int_vector, napi);
+	bool complete = true;
+	int work_done;
+	int i;
+
+	for (i = 0; i < v->count_tx_rings; i++)
+		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
+			complete = false;
+
+	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
+	if (work_done == budget)
+		complete = false;
+
+	if (!complete)
+		return budget;
+
+	napi_complete_done(napi, work_done);
+
+	/* enable interrupts */
+	enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE);
+
+	for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings)
+		enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i),
+			     ENETC_TBIER_TXTIE);
+
+	return work_done;
+}
+
+static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
+{
+	int pi = enetc_rd_reg(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
+
+	return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
+}
+
+static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
+				u64 *tstamp)
+{
+	u32 lo, hi, tstamp_lo;
+
+	lo = enetc_rd(hw, ENETC_SICTR0);
+	hi = enetc_rd(hw, ENETC_SICTR1);
+	tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
+	if (lo <= tstamp_lo)
+		hi -= 1;
+	*tstamp = (u64)hi << 32 | tstamp_lo;
+}
+
+static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+
+	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+		skb_tstamp_tx(skb, &shhwtstamps);
+	}
+}
+
+static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
+{
+	struct net_device *ndev = tx_ring->ndev;
+	int tx_frm_cnt = 0, tx_byte_cnt = 0;
+	struct enetc_tx_swbd *tx_swbd;
+	int i, bds_to_clean;
+	bool do_tstamp;
+	u64 tstamp = 0;
+
+	i = tx_ring->next_to_clean;
+	tx_swbd = &tx_ring->tx_swbd[i];
+	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+
+	do_tstamp = false;
+
+	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
+		bool is_eof = !!tx_swbd->skb;
+
+		if (unlikely(tx_swbd->check_wb)) {
+			struct enetc_ndev_priv *priv = netdev_priv(ndev);
+			union enetc_tx_bd *txbd;
+
+			txbd = ENETC_TXBD(*tx_ring, i);
+
+			if (txbd->flags & ENETC_TXBD_FLAGS_W &&
+			    tx_swbd->do_tstamp) {
+				enetc_get_tx_tstamp(&priv->si->hw, txbd,
+						    &tstamp);
+				do_tstamp = true;
+			}
+		}
+
+		if (likely(tx_swbd->dma))
+			enetc_unmap_tx_buff(tx_ring, tx_swbd);
+
+		if (is_eof) {
+			if (unlikely(do_tstamp)) {
+				enetc_tstamp_tx(tx_swbd->skb, tstamp);
+				do_tstamp = false;
+			}
+			napi_consume_skb(tx_swbd->skb, napi_budget);
+			tx_swbd->skb = NULL;
+		}
+
+		tx_byte_cnt += tx_swbd->len;
+
+		bds_to_clean--;
+		tx_swbd++;
+		i++;
+		if (unlikely(i == tx_ring->bd_count)) {
+			i = 0;
+			tx_swbd = tx_ring->tx_swbd;
+		}
+
+		/* BD iteration loop end */
+		if (is_eof) {
+			tx_frm_cnt++;
+			/* re-arm interrupt source */
+			enetc_wr_reg(tx_ring->idr, BIT(tx_ring->index) |
+				     BIT(16 + tx_ring->index));
+		}
+
+		if (unlikely(!bds_to_clean))
+			bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+	}
+
+	tx_ring->next_to_clean = i;
+	tx_ring->stats.packets += tx_frm_cnt;
+	tx_ring->stats.bytes += tx_byte_cnt;
+
+	if (unlikely(tx_frm_cnt && netif_carrier_ok(ndev) &&
+		     __netif_subqueue_stopped(ndev, tx_ring->index) &&
+		     (enetc_bd_unused(tx_ring) >= ENETC_TXBDS_MAX_NEEDED))) {
+		netif_wake_subqueue(ndev, tx_ring->index);
+	}
+
+	return tx_frm_cnt != ENETC_DEFAULT_TX_WORK;
+}
+
+static bool enetc_new_page(struct enetc_bdr *rx_ring,
+			   struct enetc_rx_swbd *rx_swbd)
+{
+	struct page *page;
+	dma_addr_t addr;
+
+	page = dev_alloc_page();
+	if (unlikely(!page))
+		return false;
+
+	addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(rx_ring->dev, addr))) {
+		__free_page(page);
+
+		return false;
+	}
+
+	rx_swbd->dma = addr;
+	rx_swbd->page = page;
+	rx_swbd->page_offset = ENETC_RXB_PAD;
+
+	return true;
+}
+
+static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
+{
+	struct enetc_rx_swbd *rx_swbd;
+	union enetc_rx_bd *rxbd;
+	int i, j;
+
+	i = rx_ring->next_to_use;
+	rx_swbd = &rx_ring->rx_swbd[i];
+	rxbd = ENETC_RXBD(*rx_ring, i);
+
+	for (j = 0; j < buff_cnt; j++) {
+		/* try reuse page */
+		if (unlikely(!rx_swbd->page)) {
+			if (unlikely(!enetc_new_page(rx_ring, rx_swbd))) {
+				rx_ring->stats.rx_alloc_errs++;
+				break;
+			}
+		}
+
+		/* update RxBD */
+		rxbd->w.addr = cpu_to_le64(rx_swbd->dma +
+					   rx_swbd->page_offset);
+		/* clear 'R" as well */
+		rxbd->r.lstatus = 0;
+
+		rx_swbd++;
+		rxbd++;
+		i++;
+		if (unlikely(i == rx_ring->bd_count)) {
+			i = 0;
+			rx_swbd = rx_ring->rx_swbd;
+			rxbd = ENETC_RXBD(*rx_ring, 0);
+		}
+	}
+
+	if (likely(j)) {
+		rx_ring->next_to_alloc = i; /* keep track from page reuse */
+		rx_ring->next_to_use = i;
+		/* update ENETC's consumer index */
+		enetc_wr_reg(rx_ring->rcir, i);
+	}
+
+	return j;
+}
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static void enetc_get_rx_tstamp(struct net_device *ndev,
+				union enetc_rx_bd *rxbd,
+				struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 lo, hi, tstamp_lo;
+	u64 tstamp;
+
+	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
+		lo = enetc_rd(hw, ENETC_SICTR0);
+		hi = enetc_rd(hw, ENETC_SICTR1);
+		tstamp_lo = le32_to_cpu(rxbd->r.tstamp);
+		if (lo <= tstamp_lo)
+			hi -= 1;
+
+		tstamp = (u64)hi << 32 | tstamp_lo;
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+		shhwtstamps->hwtstamp = ns_to_ktime(tstamp);
+	}
+}
+#endif
+
+static void enetc_get_offloads(struct enetc_bdr *rx_ring,
+			       union enetc_rx_bd *rxbd, struct sk_buff *skb)
+{
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
+#endif
+	/* TODO: hashing */
+	if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
+		u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
+
+		skb->csum = csum_unfold((__force __sum16)~htons(inet_csum));
+		skb->ip_summed = CHECKSUM_COMPLETE;
+	}
+
+	/* copy VLAN to skb, if one is extracted, for now we assume it's a
+	 * standard TPID, but HW also supports custom values
+	 */
+	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       le16_to_cpu(rxbd->r.vlan_opt));
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (priv->active_offloads & ENETC_F_RX_TSTAMP)
+		enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
+#endif
+}
+
+static void enetc_process_skb(struct enetc_bdr *rx_ring,
+			      struct sk_buff *skb)
+{
+	skb_record_rx_queue(skb, rx_ring->index);
+	skb->protocol = eth_type_trans(skb, rx_ring->ndev);
+}
+
+static bool enetc_page_reusable(struct page *page)
+{
+	return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
+}
+
+static void enetc_reuse_page(struct enetc_bdr *rx_ring,
+			     struct enetc_rx_swbd *old)
+{
+	struct enetc_rx_swbd *new;
+
+	new = &rx_ring->rx_swbd[rx_ring->next_to_alloc];
+
+	/* next buf that may reuse a page */
+	enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc);
+
+	/* copy page reference */
+	*new = *old;
+}
+
+static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
+					       int i, u16 size)
+{
+	struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
+
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma,
+				      rx_swbd->page_offset,
+				      size, DMA_FROM_DEVICE);
+	return rx_swbd;
+}
+
+static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
+			      struct enetc_rx_swbd *rx_swbd)
+{
+	if (likely(enetc_page_reusable(rx_swbd->page))) {
+		rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE;
+		page_ref_inc(rx_swbd->page);
+
+		enetc_reuse_page(rx_ring, rx_swbd);
+
+		/* sync for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
+						 rx_swbd->page_offset,
+						 ENETC_RXB_DMA_SIZE,
+						 DMA_FROM_DEVICE);
+	} else {
+		dma_unmap_page(rx_ring->dev, rx_swbd->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+
+	rx_swbd->page = NULL;
+}
+
+static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
+						int i, u16 size)
+{
+	struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
+	struct sk_buff *skb;
+	void *ba;
+
+	ba = page_address(rx_swbd->page) + rx_swbd->page_offset;
+	skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE);
+	if (unlikely(!skb)) {
+		rx_ring->stats.rx_alloc_errs++;
+		return NULL;
+	}
+
+	skb_reserve(skb, ENETC_RXB_PAD);
+	__skb_put(skb, size);
+
+	enetc_put_rx_buff(rx_ring, rx_swbd);
+
+	return skb;
+}
+
+static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
+				     u16 size, struct sk_buff *skb)
+{
+	struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page,
+			rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE);
+
+	enetc_put_rx_buff(rx_ring, rx_swbd);
+}
+
+#define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */
+
+static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
+			       struct napi_struct *napi, int work_limit)
+{
+	int rx_frm_cnt = 0, rx_byte_cnt = 0;
+	int cleaned_cnt, i;
+
+	cleaned_cnt = enetc_bd_unused(rx_ring);
+	/* next descriptor to process */
+	i = rx_ring->next_to_clean;
+
+	while (likely(rx_frm_cnt < work_limit)) {
+		union enetc_rx_bd *rxbd;
+		struct sk_buff *skb;
+		u32 bd_status;
+		u16 size;
+
+		if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
+			int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
+
+			cleaned_cnt -= count;
+		}
+
+		rxbd = ENETC_RXBD(*rx_ring, i);
+		bd_status = le32_to_cpu(rxbd->r.lstatus);
+		if (!bd_status)
+			break;
+
+		enetc_wr_reg(rx_ring->idr, BIT(rx_ring->index));
+		dma_rmb(); /* for reading other rxbd fields */
+		size = le16_to_cpu(rxbd->r.buf_len);
+		skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
+		if (!skb)
+			break;
+
+		enetc_get_offloads(rx_ring, rxbd, skb);
+
+		cleaned_cnt++;
+		rxbd++;
+		i++;
+		if (unlikely(i == rx_ring->bd_count)) {
+			i = 0;
+			rxbd = ENETC_RXBD(*rx_ring, 0);
+		}
+
+		if (unlikely(bd_status &
+			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
+			dev_kfree_skb(skb);
+			while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+				dma_rmb();
+				bd_status = le32_to_cpu(rxbd->r.lstatus);
+				rxbd++;
+				i++;
+				if (unlikely(i == rx_ring->bd_count)) {
+					i = 0;
+					rxbd = ENETC_RXBD(*rx_ring, 0);
+				}
+			}
+
+			rx_ring->ndev->stats.rx_dropped++;
+			rx_ring->ndev->stats.rx_errors++;
+
+			break;
+		}
+
+		/* not last BD in frame? */
+		while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+			bd_status = le32_to_cpu(rxbd->r.lstatus);
+			size = ENETC_RXB_DMA_SIZE;
+
+			if (bd_status & ENETC_RXBD_LSTATUS_F) {
+				dma_rmb();
+				size = le16_to_cpu(rxbd->r.buf_len);
+			}
+
+			enetc_add_rx_buff_to_skb(rx_ring, i, size, skb);
+
+			cleaned_cnt++;
+			rxbd++;
+			i++;
+			if (unlikely(i == rx_ring->bd_count)) {
+				i = 0;
+				rxbd = ENETC_RXBD(*rx_ring, 0);
+			}
+		}
+
+		rx_byte_cnt += skb->len;
+
+		enetc_process_skb(rx_ring, skb);
+
+		napi_gro_receive(napi, skb);
+
+		rx_frm_cnt++;
+	}
+
+	rx_ring->next_to_clean = i;
+
+	rx_ring->stats.packets += rx_frm_cnt;
+	rx_ring->stats.bytes += rx_byte_cnt;
+
+	return rx_frm_cnt;
+}
+
+/* Probing and Init */
+#define ENETC_MAX_RFS_SIZE 64
+void enetc_get_si_caps(struct enetc_si *si)
+{
+	struct enetc_hw *hw = &si->hw;
+	u32 val;
+
+	/* find out how many of various resources we have to work with */
+	val = enetc_rd(hw, ENETC_SICAPR0);
+	si->num_rx_rings = (val >> 16) & 0xff;
+	si->num_tx_rings = val & 0xff;
+
+	val = enetc_rd(hw, ENETC_SIRFSCAPR);
+	si->num_fs_entries = ENETC_SIRFSCAPR_GET_NUM_RFS(val);
+	si->num_fs_entries = min(si->num_fs_entries, ENETC_MAX_RFS_SIZE);
+
+	si->num_rss = 0;
+	val = enetc_rd(hw, ENETC_SIPCAPR0);
+	if (val & ENETC_SIPCAPR0_RSS) {
+		val = enetc_rd(hw, ENETC_SIRSSCAPR);
+		si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(val);
+	}
+}
+
+static int enetc_dma_alloc_bdr(struct enetc_bdr *r, size_t bd_size)
+{
+	r->bd_base = dma_alloc_coherent(r->dev, r->bd_count * bd_size,
+					&r->bd_dma_base, GFP_KERNEL);
+	if (!r->bd_base)
+		return -ENOMEM;
+
+	/* h/w requires 128B alignment */
+	if (!IS_ALIGNED(r->bd_dma_base, 128)) {
+		dma_free_coherent(r->dev, r->bd_count * bd_size, r->bd_base,
+				  r->bd_dma_base);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int enetc_alloc_txbdr(struct enetc_bdr *txr)
+{
+	int err;
+
+	txr->tx_swbd = vzalloc(txr->bd_count * sizeof(struct enetc_tx_swbd));
+	if (!txr->tx_swbd)
+		return -ENOMEM;
+
+	err = enetc_dma_alloc_bdr(txr, sizeof(union enetc_tx_bd));
+	if (err) {
+		vfree(txr->tx_swbd);
+		return err;
+	}
+
+	txr->next_to_clean = 0;
+	txr->next_to_use = 0;
+
+	return 0;
+}
+
+static void enetc_free_txbdr(struct enetc_bdr *txr)
+{
+	int size, i;
+
+	for (i = 0; i < txr->bd_count; i++)
+		enetc_free_tx_skb(txr, &txr->tx_swbd[i]);
+
+	size = txr->bd_count * sizeof(union enetc_tx_bd);
+
+	dma_free_coherent(txr->dev, size, txr->bd_base, txr->bd_dma_base);
+	txr->bd_base = NULL;
+
+	vfree(txr->tx_swbd);
+	txr->tx_swbd = NULL;
+}
+
+static int enetc_alloc_tx_resources(struct enetc_ndev_priv *priv)
+{
+	int i, err;
+
+	for (i = 0; i < priv->num_tx_rings; i++) {
+		err = enetc_alloc_txbdr(priv->tx_ring[i]);
+
+		if (err)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	while (i-- > 0)
+		enetc_free_txbdr(priv->tx_ring[i]);
+
+	return err;
+}
+
+static void enetc_free_tx_resources(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_free_txbdr(priv->tx_ring[i]);
+}
+
+static int enetc_alloc_rxbdr(struct enetc_bdr *rxr)
+{
+	int err;
+
+	rxr->rx_swbd = vzalloc(rxr->bd_count * sizeof(struct enetc_rx_swbd));
+	if (!rxr->rx_swbd)
+		return -ENOMEM;
+
+	err = enetc_dma_alloc_bdr(rxr, sizeof(union enetc_rx_bd));
+	if (err) {
+		vfree(rxr->rx_swbd);
+		return err;
+	}
+
+	rxr->next_to_clean = 0;
+	rxr->next_to_use = 0;
+	rxr->next_to_alloc = 0;
+
+	return 0;
+}
+
+static void enetc_free_rxbdr(struct enetc_bdr *rxr)
+{
+	int size;
+
+	size = rxr->bd_count * sizeof(union enetc_rx_bd);
+
+	dma_free_coherent(rxr->dev, size, rxr->bd_base, rxr->bd_dma_base);
+	rxr->bd_base = NULL;
+
+	vfree(rxr->rx_swbd);
+	rxr->rx_swbd = NULL;
+}
+
+static int enetc_alloc_rx_resources(struct enetc_ndev_priv *priv)
+{
+	int i, err;
+
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		err = enetc_alloc_rxbdr(priv->rx_ring[i]);
+
+		if (err)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	while (i-- > 0)
+		enetc_free_rxbdr(priv->rx_ring[i]);
+
+	return err;
+}
+
+static void enetc_free_rx_resources(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_free_rxbdr(priv->rx_ring[i]);
+}
+
+static void enetc_free_tx_ring(struct enetc_bdr *tx_ring)
+{
+	int i;
+
+	if (!tx_ring->tx_swbd)
+		return;
+
+	for (i = 0; i < tx_ring->bd_count; i++) {
+		struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i];
+
+		enetc_free_tx_skb(tx_ring, tx_swbd);
+	}
+
+	tx_ring->next_to_clean = 0;
+	tx_ring->next_to_use = 0;
+}
+
+static void enetc_free_rx_ring(struct enetc_bdr *rx_ring)
+{
+	int i;
+
+	if (!rx_ring->rx_swbd)
+		return;
+
+	for (i = 0; i < rx_ring->bd_count; i++) {
+		struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
+
+		if (!rx_swbd->page)
+			continue;
+
+		dma_unmap_page(rx_ring->dev, rx_swbd->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_page(rx_swbd->page);
+		rx_swbd->page = NULL;
+	}
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_alloc = 0;
+}
+
+static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_free_rx_ring(priv->rx_ring[i]);
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_free_tx_ring(priv->tx_ring[i]);
+}
+
+static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
+					   GFP_KERNEL);
+	if (!cbdr->bd_base)
+		return -ENOMEM;
+
+	/* h/w requires 128B alignment */
+	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
+		dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+		return -EINVAL;
+	}
+
+	cbdr->next_to_clean = 0;
+	cbdr->next_to_use = 0;
+
+	return 0;
+}
+
+static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+	cbdr->bd_base = NULL;
+}
+
+static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
+{
+	/* set CBDR cache attributes */
+	enetc_wr(hw, ENETC_SICAR2,
+		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
+
+	enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
+
+	enetc_wr(hw, ENETC_SICBDRPIR, 0);
+	enetc_wr(hw, ENETC_SICBDRCIR, 0);
+
+	/* enable ring */
+	enetc_wr(hw, ENETC_SICBDRMR, BIT(31));
+
+	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
+	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+}
+
+static void enetc_clear_cbdr(struct enetc_hw *hw)
+{
+	enetc_wr(hw, ENETC_SICBDRMR, 0);
+}
+
+static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
+{
+	int *rss_table;
+	int i;
+
+	rss_table = kmalloc_array(si->num_rss, sizeof(*rss_table), GFP_KERNEL);
+	if (!rss_table)
+		return -ENOMEM;
+
+	/* Set up RSS table defaults */
+	for (i = 0; i < si->num_rss; i++)
+		rss_table[i] = i % num_groups;
+
+	enetc_set_rss_table(si, rss_table, si->num_rss);
+
+	kfree(rss_table);
+
+	return 0;
+}
+
+static int enetc_configure_si(struct enetc_ndev_priv *priv)
+{
+	struct enetc_si *si = priv->si;
+	struct enetc_hw *hw = &si->hw;
+	int err;
+
+	enetc_setup_cbdr(hw, &si->cbd_ring);
+	/* set SI cache attributes */
+	enetc_wr(hw, ENETC_SICAR0,
+		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
+	enetc_wr(hw, ENETC_SICAR1, ENETC_SICAR_MSI);
+	/* enable SI */
+	enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN);
+
+	if (si->num_rss) {
+		err = enetc_setup_default_rss_table(si, priv->num_rx_rings);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
+{
+	struct enetc_si *si = priv->si;
+	int cpus = num_online_cpus();
+
+	priv->tx_bd_count = ENETC_BDR_DEFAULT_SIZE;
+	priv->rx_bd_count = ENETC_BDR_DEFAULT_SIZE;
+
+	/* Enable all available TX rings in order to configure as many
+	 * priorities as possible, when needed.
+	 * TODO: Make # of TX rings run-time configurable
+	 */
+	priv->num_rx_rings = min_t(int, cpus, si->num_rx_rings);
+	priv->num_tx_rings = si->num_tx_rings;
+	priv->bdr_int_num = cpus;
+
+	/* SI specific */
+	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
+}
+
+int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
+{
+	struct enetc_si *si = priv->si;
+	int err;
+
+	err = enetc_alloc_cbdr(priv->dev, &si->cbd_ring);
+	if (err)
+		return err;
+
+	priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
+				  GFP_KERNEL);
+	if (!priv->cls_rules) {
+		err = -ENOMEM;
+		goto err_alloc_cls;
+	}
+
+	err = enetc_configure_si(priv);
+	if (err)
+		goto err_config_si;
+
+	return 0;
+
+err_config_si:
+	kfree(priv->cls_rules);
+err_alloc_cls:
+	enetc_clear_cbdr(&si->hw);
+	enetc_free_cbdr(priv->dev, &si->cbd_ring);
+
+	return err;
+}
+
+void enetc_free_si_resources(struct enetc_ndev_priv *priv)
+{
+	struct enetc_si *si = priv->si;
+
+	enetc_clear_cbdr(&si->hw);
+	enetc_free_cbdr(priv->dev, &si->cbd_ring);
+
+	kfree(priv->cls_rules);
+}
+
+static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+{
+	int idx = tx_ring->index;
+	u32 tbmr;
+
+	enetc_txbdr_wr(hw, idx, ENETC_TBBAR0,
+		       lower_32_bits(tx_ring->bd_dma_base));
+
+	enetc_txbdr_wr(hw, idx, ENETC_TBBAR1,
+		       upper_32_bits(tx_ring->bd_dma_base));
+
+	WARN_ON(!IS_ALIGNED(tx_ring->bd_count, 64)); /* multiple of 64 */
+	enetc_txbdr_wr(hw, idx, ENETC_TBLENR,
+		       ENETC_RTBLENR_LEN(tx_ring->bd_count));
+
+	/* clearing PI/CI registers for Tx not supported, adjust sw indexes */
+	tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR);
+	tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR);
+
+	/* enable Tx ints by setting pkt thr to 1 */
+	enetc_txbdr_wr(hw, idx, ENETC_TBICIR0, ENETC_TBICIR0_ICEN | 0x1);
+
+	tbmr = ENETC_TBMR_EN;
+	if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		tbmr |= ENETC_TBMR_VIH;
+
+	/* enable ring */
+	enetc_txbdr_wr(hw, idx, ENETC_TBMR, tbmr);
+
+	tx_ring->tpir = hw->reg + ENETC_BDR(TX, idx, ENETC_TBPIR);
+	tx_ring->tcir = hw->reg + ENETC_BDR(TX, idx, ENETC_TBCIR);
+	tx_ring->idr = hw->reg + ENETC_SITXIDR;
+}
+
+static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+{
+	int idx = rx_ring->index;
+	u32 rbmr;
+
+	enetc_rxbdr_wr(hw, idx, ENETC_RBBAR0,
+		       lower_32_bits(rx_ring->bd_dma_base));
+
+	enetc_rxbdr_wr(hw, idx, ENETC_RBBAR1,
+		       upper_32_bits(rx_ring->bd_dma_base));
+
+	WARN_ON(!IS_ALIGNED(rx_ring->bd_count, 64)); /* multiple of 64 */
+	enetc_rxbdr_wr(hw, idx, ENETC_RBLENR,
+		       ENETC_RTBLENR_LEN(rx_ring->bd_count));
+
+	enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE);
+
+	enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
+
+	/* enable Rx ints by setting pkt thr to 1 */
+	enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1);
+
+	rbmr = ENETC_RBMR_EN;
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	rbmr |= ENETC_RBMR_BDS;
+#endif
+	if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		rbmr |= ENETC_RBMR_VTE;
+
+	rx_ring->rcir = hw->reg + ENETC_BDR(RX, idx, ENETC_RBCIR);
+	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
+
+	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
+
+	/* enable ring */
+	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
+}
+
+static void enetc_setup_bdrs(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_setup_txbdr(&priv->si->hw, priv->tx_ring[i]);
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_setup_rxbdr(&priv->si->hw, priv->rx_ring[i]);
+}
+
+static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+{
+	int idx = rx_ring->index;
+
+	/* disable EN bit on ring */
+	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, 0);
+}
+
+static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+{
+	int delay = 8, timeout = 100;
+	int idx = tx_ring->index;
+
+	/* disable EN bit on ring */
+	enetc_txbdr_wr(hw, idx, ENETC_TBMR, 0);
+
+	/* wait for busy to clear */
+	while (delay < timeout &&
+	       enetc_txbdr_rd(hw, idx, ENETC_TBSR) & ENETC_TBSR_BUSY) {
+		msleep(delay);
+		delay *= 2;
+	}
+
+	if (delay >= timeout)
+		netdev_warn(tx_ring->ndev, "timeout for tx ring #%d clear\n",
+			    idx);
+}
+
+static void enetc_clear_bdrs(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_clear_txbdr(&priv->si->hw, priv->tx_ring[i]);
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_clear_rxbdr(&priv->si->hw, priv->rx_ring[i]);
+
+	udelay(1);
+}
+
+static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
+{
+	struct pci_dev *pdev = priv->si->pdev;
+	cpumask_t cpu_mask;
+	int i, j, err;
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i);
+		struct enetc_int_vector *v = priv->int_vector[i];
+		int entry = ENETC_BDR_INT_BASE_IDX + i;
+		struct enetc_hw *hw = &priv->si->hw;
+
+		snprintf(v->name, sizeof(v->name), "%s-rxtx%d",
+			 priv->ndev->name, i);
+		err = request_irq(irq, enetc_msix, 0, v->name, v);
+		if (err) {
+			dev_err(priv->dev, "request_irq() failed!\n");
+			goto irq_err;
+		}
+
+		v->tbier_base = hw->reg + ENETC_BDR(TX, 0, ENETC_TBIER);
+		v->rbier = hw->reg + ENETC_BDR(RX, i, ENETC_RBIER);
+
+		enetc_wr(hw, ENETC_SIMSIRRV(i), entry);
+
+		for (j = 0; j < v->count_tx_rings; j++) {
+			int idx = v->tx_ring[j].index;
+
+			enetc_wr(hw, ENETC_SIMSITRV(idx), entry);
+		}
+		cpumask_clear(&cpu_mask);
+		cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
+		irq_set_affinity_hint(irq, &cpu_mask);
+	}
+
+	return 0;
+
+irq_err:
+	while (i--) {
+		int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i);
+
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, priv->int_vector[i]);
+	}
+
+	return err;
+}
+
+static void enetc_free_irqs(struct enetc_ndev_priv *priv)
+{
+	struct pci_dev *pdev = priv->si->pdev;
+	int i;
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i);
+
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, priv->int_vector[i]);
+	}
+}
+
+static void enetc_enable_interrupts(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	/* enable Tx & Rx event indication */
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		enetc_rxbdr_wr(&priv->si->hw, i,
+			       ENETC_RBIER, ENETC_RBIER_RXTIE);
+	}
+
+	for (i = 0; i < priv->num_tx_rings; i++) {
+		enetc_txbdr_wr(&priv->si->hw, i,
+			       ENETC_TBIER, ENETC_TBIER_TXTIE);
+	}
+}
+
+static void enetc_disable_interrupts(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_txbdr_wr(&priv->si->hw, i, ENETC_TBIER, 0);
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0);
+}
+
+static void adjust_link(struct net_device *ndev)
+{
+	struct phy_device *phydev = ndev->phydev;
+
+	phy_print_status(phydev);
+}
+
+static int enetc_phy_connect(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev;
+
+	if (!priv->phy_node)
+		return 0; /* phy-less mode */
+
+	phydev = of_phy_connect(ndev, priv->phy_node, &adjust_link,
+				0, priv->if_mode);
+	if (!phydev) {
+		dev_err(&ndev->dev, "could not attach to PHY\n");
+		return -ENODEV;
+	}
+
+	phy_attached_info(phydev);
+
+	return 0;
+}
+
+int enetc_open(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int i, err;
+
+	err = enetc_setup_irqs(priv);
+	if (err)
+		return err;
+
+	err = enetc_phy_connect(ndev);
+	if (err)
+		goto err_phy_connect;
+
+	err = enetc_alloc_tx_resources(priv);
+	if (err)
+		goto err_alloc_tx;
+
+	err = enetc_alloc_rx_resources(priv);
+	if (err)
+		goto err_alloc_rx;
+
+	enetc_setup_bdrs(priv);
+
+	err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(ndev, priv->num_rx_rings);
+	if (err)
+		goto err_set_queues;
+
+	for (i = 0; i < priv->bdr_int_num; i++)
+		napi_enable(&priv->int_vector[i]->napi);
+
+	enetc_enable_interrupts(priv);
+
+	if (ndev->phydev)
+		phy_start(ndev->phydev);
+	else
+		netif_carrier_on(ndev);
+
+	netif_tx_start_all_queues(ndev);
+
+	return 0;
+
+err_set_queues:
+	enetc_free_rx_resources(priv);
+err_alloc_rx:
+	enetc_free_tx_resources(priv);
+err_alloc_tx:
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
+err_phy_connect:
+	enetc_free_irqs(priv);
+
+	return err;
+}
+
+int enetc_close(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int i;
+
+	netif_tx_stop_all_queues(ndev);
+
+	if (ndev->phydev) {
+		phy_stop(ndev->phydev);
+		phy_disconnect(ndev->phydev);
+	} else {
+		netif_carrier_off(ndev);
+	}
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		napi_synchronize(&priv->int_vector[i]->napi);
+		napi_disable(&priv->int_vector[i]->napi);
+	}
+
+	enetc_disable_interrupts(priv);
+	enetc_clear_bdrs(priv);
+
+	enetc_free_rxtx_rings(priv);
+	enetc_free_rx_resources(priv);
+	enetc_free_tx_resources(priv);
+	enetc_free_irqs(priv);
+
+	return 0;
+}
+
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	struct enetc_bdr *tx_ring;
+	u8 num_tc;
+	int i;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
+
+	if (!num_tc) {
+		netdev_reset_tc(ndev);
+		netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+
+		/* Reset all ring priorities to 0 */
+		for (i = 0; i < priv->num_tx_rings; i++) {
+			tx_ring = priv->tx_ring[i];
+			enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0);
+		}
+
+		return 0;
+	}
+
+	/* Check if we have enough BD rings available to accommodate all TCs */
+	if (num_tc > priv->num_tx_rings) {
+		netdev_err(ndev, "Max %d traffic classes supported\n",
+			   priv->num_tx_rings);
+		return -EINVAL;
+	}
+
+	/* For the moment, we use only one BD ring per TC.
+	 *
+	 * Configure num_tc BD rings with increasing priorities.
+	 */
+	for (i = 0; i < num_tc; i++) {
+		tx_ring = priv->tx_ring[i];
+		enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i);
+	}
+
+	/* Reset the number of netdev queues based on the TC count */
+	netif_set_real_num_tx_queues(ndev, num_tc);
+
+	netdev_set_num_tc(ndev, num_tc);
+
+	/* Each TC is associated with one netdev queue */
+	for (i = 0; i < num_tc; i++)
+		netdev_set_tc_queue(ndev, i, 1, i);
+
+	return 0;
+}
+
+struct net_device_stats *enetc_get_stats(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned long packets = 0, bytes = 0;
+	int i;
+
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		packets += priv->rx_ring[i]->stats.packets;
+		bytes	+= priv->rx_ring[i]->stats.bytes;
+	}
+
+	stats->rx_packets = packets;
+	stats->rx_bytes = bytes;
+	bytes = 0;
+	packets = 0;
+
+	for (i = 0; i < priv->num_tx_rings; i++) {
+		packets += priv->tx_ring[i]->stats.packets;
+		bytes	+= priv->tx_ring[i]->stats.bytes;
+	}
+
+	stats->tx_packets = packets;
+	stats->tx_bytes = bytes;
+
+	return stats;
+}
+
+static int enetc_set_rss(struct net_device *ndev, int en)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 reg;
+
+	enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings);
+
+	reg = enetc_rd(hw, ENETC_SIMR);
+	reg &= ~ENETC_SIMR_RSSE;
+	reg |= (en) ? ENETC_SIMR_RSSE : 0;
+	enetc_wr(hw, ENETC_SIMR, reg);
+
+	return 0;
+}
+
+int enetc_set_features(struct net_device *ndev,
+		       netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+
+	if (changed & NETIF_F_RXHASH)
+		enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH));
+
+	return 0;
+}
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP;
+		break;
+	case HWTSTAMP_TX_ON:
+		priv->active_offloads |= ENETC_F_TX_TSTAMP;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		priv->active_offloads &= ~ENETC_F_RX_TSTAMP;
+		break;
+	default:
+		priv->active_offloads |= ENETC_F_RX_TSTAMP;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+
+static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	config.flags = 0;
+
+	if (priv->active_offloads & ENETC_F_TX_TSTAMP)
+		config.tx_type = HWTSTAMP_TX_ON;
+	else
+		config.tx_type = HWTSTAMP_TX_OFF;
+
+	config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ?
+			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+#endif
+
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
+{
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (cmd == SIOCSHWTSTAMP)
+		return enetc_hwtstamp_set(ndev, rq);
+	if (cmd == SIOCGHWTSTAMP)
+		return enetc_hwtstamp_get(ndev, rq);
+#endif
+	return -EINVAL;
+}
+
+int enetc_alloc_msix(struct enetc_ndev_priv *priv)
+{
+	struct pci_dev *pdev = priv->si->pdev;
+	int size, v_tx_rings;
+	int i, n, err, nvec;
+
+	nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num;
+	/* allocate MSIX for both messaging and Rx/Tx interrupts */
+	n = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+
+	if (n < 0)
+		return n;
+
+	if (n != nvec)
+		return -EPERM;
+
+	/* # of tx rings per int vector */
+	v_tx_rings = priv->num_tx_rings / priv->bdr_int_num;
+	size = sizeof(struct enetc_int_vector) +
+	       sizeof(struct enetc_bdr) * v_tx_rings;
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		struct enetc_int_vector *v;
+		struct enetc_bdr *bdr;
+		int j;
+
+		v = kzalloc(size, GFP_KERNEL);
+		if (!v) {
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		priv->int_vector[i] = v;
+
+		netif_napi_add(priv->ndev, &v->napi, enetc_poll,
+			       NAPI_POLL_WEIGHT);
+		v->count_tx_rings = v_tx_rings;
+
+		for (j = 0; j < v_tx_rings; j++) {
+			int idx;
+
+			/* default tx ring mapping policy */
+			if (priv->bdr_int_num == ENETC_MAX_BDR_INT)
+				idx = 2 * j + i; /* 2 CPUs */
+			else
+				idx = j + i * v_tx_rings; /* default */
+
+			__set_bit(idx, &v->tx_rings_map);
+			bdr = &v->tx_ring[j];
+			bdr->index = idx;
+			bdr->ndev = priv->ndev;
+			bdr->dev = priv->dev;
+			bdr->bd_count = priv->tx_bd_count;
+			priv->tx_ring[idx] = bdr;
+		}
+
+		bdr = &v->rx_ring;
+		bdr->index = i;
+		bdr->ndev = priv->ndev;
+		bdr->dev = priv->dev;
+		bdr->bd_count = priv->rx_bd_count;
+		priv->rx_ring[i] = bdr;
+	}
+
+	return 0;
+
+fail:
+	while (i--) {
+		netif_napi_del(&priv->int_vector[i]->napi);
+		kfree(priv->int_vector[i]);
+	}
+
+	pci_free_irq_vectors(pdev);
+
+	return err;
+}
+
+void enetc_free_msix(struct enetc_ndev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		struct enetc_int_vector *v = priv->int_vector[i];
+
+		netif_napi_del(&v->napi);
+	}
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		priv->rx_ring[i] = NULL;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		priv->tx_ring[i] = NULL;
+
+	for (i = 0; i < priv->bdr_int_num; i++) {
+		kfree(priv->int_vector[i]);
+		priv->int_vector[i] = NULL;
+	}
+
+	/* disable all MSIX for this device */
+	pci_free_irq_vectors(priv->si->pdev);
+}
+
+static void enetc_kfree_si(struct enetc_si *si)
+{
+	char *p = (char *)si - si->pad;
+
+	kfree(p);
+}
+
+static void enetc_detect_errata(struct enetc_si *si)
+{
+	if (si->pdev->revision == ENETC_REV1)
+		si->errata = ENETC_ERR_TXCSUM | ENETC_ERR_VLAN_ISOL |
+			     ENETC_ERR_UCMCSWP;
+}
+
+int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv)
+{
+	struct enetc_si *si, *p;
+	struct enetc_hw *hw;
+	size_t alloc_size;
+	int err, len;
+
+	pcie_flr(pdev);
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "device enable failed\n");
+		return err;
+	}
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev,
+				"DMA configuration failed: 0x%x\n", err);
+			goto err_dma;
+		}
+	}
+
+	err = pci_request_mem_regions(pdev, name);
+	if (err) {
+		dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err);
+		goto err_pci_mem_reg;
+	}
+
+	pci_set_master(pdev);
+
+	alloc_size = sizeof(struct enetc_si);
+	if (sizeof_priv) {
+		/* align priv to 32B */
+		alloc_size = ALIGN(alloc_size, ENETC_SI_ALIGN);
+		alloc_size += sizeof_priv;
+	}
+	/* force 32B alignment for enetc_si */
+	alloc_size += ENETC_SI_ALIGN - 1;
+
+	p = kzalloc(alloc_size, GFP_KERNEL);
+	if (!p) {
+		err = -ENOMEM;
+		goto err_alloc_si;
+	}
+
+	si = PTR_ALIGN(p, ENETC_SI_ALIGN);
+	si->pad = (char *)si - (char *)p;
+
+	pci_set_drvdata(pdev, si);
+	si->pdev = pdev;
+	hw = &si->hw;
+
+	len = pci_resource_len(pdev, ENETC_BAR_REGS);
+	hw->reg = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len);
+	if (!hw->reg) {
+		err = -ENXIO;
+		dev_err(&pdev->dev, "ioremap() failed\n");
+		goto err_ioremap;
+	}
+	if (len > ENETC_PORT_BASE)
+		hw->port = hw->reg + ENETC_PORT_BASE;
+	if (len > ENETC_GLOBAL_BASE)
+		hw->global = hw->reg + ENETC_GLOBAL_BASE;
+
+	enetc_detect_errata(si);
+
+	return 0;
+
+err_ioremap:
+	enetc_kfree_si(si);
+err_alloc_si:
+	pci_release_mem_regions(pdev);
+err_pci_mem_reg:
+err_dma:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+void enetc_pci_remove(struct pci_dev *pdev)
+{
+	struct enetc_si *si = pci_get_drvdata(pdev);
+	struct enetc_hw *hw = &si->hw;
+
+	iounmap(hw->reg);
+	enetc_kfree_si(si);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
new file mode 100644
index 0000000..541b4e2
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h

@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2017-2019 NXP */
+
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/dma-mapping.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/phy.h>
+
+#include "enetc_hw.h"
+
+#define ENETC_MAC_MAXFRM_SIZE	9600
+#define ENETC_MAX_MTU		(ENETC_MAC_MAXFRM_SIZE - \
+				(ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
+
+struct enetc_tx_swbd {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+	u16 len;
+	u8 is_dma_page:1;
+	u8 check_wb:1;
+	u8 do_tstamp:1;
+};
+
+#define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
+#define ENETC_RXB_TRUESIZE	2048 /* PAGE_SIZE >> 1 */
+#define ENETC_RXB_PAD		NET_SKB_PAD /* add extra space if needed */
+#define ENETC_RXB_DMA_SIZE	\
+	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
+
+struct enetc_rx_swbd {
+	dma_addr_t dma;
+	struct page *page;
+	u16 page_offset;
+};
+
+struct enetc_ring_stats {
+	unsigned int packets;
+	unsigned int bytes;
+	unsigned int rx_alloc_errs;
+};
+
+#define ENETC_BDR_DEFAULT_SIZE	1024
+#define ENETC_DEFAULT_TX_WORK	256
+
+struct enetc_bdr {
+	struct device *dev; /* for DMA mapping */
+	struct net_device *ndev;
+	void *bd_base; /* points to Rx or Tx BD ring */
+	union {
+		void __iomem *tpir;
+		void __iomem *rcir;
+	};
+	u16 index;
+	int bd_count; /* # of BDs */
+	int next_to_use;
+	int next_to_clean;
+	union {
+		struct enetc_tx_swbd *tx_swbd;
+		struct enetc_rx_swbd *rx_swbd;
+	};
+	union {
+		void __iomem *tcir; /* Tx */
+		int next_to_alloc; /* Rx */
+	};
+	void __iomem *idr; /* Interrupt Detect Register pointer */
+
+	struct enetc_ring_stats stats;
+
+	dma_addr_t bd_dma_base;
+} ____cacheline_aligned_in_smp;
+
+static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i)
+{
+	if (unlikely(++*i == bdr->bd_count))
+		*i = 0;
+}
+
+static inline int enetc_bd_unused(struct enetc_bdr *bdr)
+{
+	if (bdr->next_to_clean > bdr->next_to_use)
+		return bdr->next_to_clean - bdr->next_to_use - 1;
+
+	return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1;
+}
+
+/* Control BD ring */
+#define ENETC_CBDR_DEFAULT_SIZE	64
+struct enetc_cbdr {
+	void *bd_base; /* points to Rx or Tx BD ring */
+	void __iomem *pir;
+	void __iomem *cir;
+
+	int bd_count; /* # of BDs */
+	int next_to_use;
+	int next_to_clean;
+
+	dma_addr_t bd_dma_base;
+};
+
+#define ENETC_TXBD(BDR, i) (&(((union enetc_tx_bd *)((BDR).bd_base))[i]))
+#define ENETC_RXBD(BDR, i) (&(((union enetc_rx_bd *)((BDR).bd_base))[i]))
+
+struct enetc_msg_swbd {
+	void *vaddr;
+	dma_addr_t dma;
+	int size;
+};
+
+#define ENETC_REV1	0x1
+enum enetc_errata {
+	ENETC_ERR_TXCSUM	= BIT(0),
+	ENETC_ERR_VLAN_ISOL	= BIT(1),
+	ENETC_ERR_UCMCSWP	= BIT(2),
+};
+
+/* PCI IEP device data */
+struct enetc_si {
+	struct pci_dev *pdev;
+	struct enetc_hw hw;
+	enum enetc_errata errata;
+
+	struct net_device *ndev; /* back ref. */
+
+	struct enetc_cbdr cbd_ring;
+
+	int num_rx_rings; /* how many rings are available in the SI */
+	int num_tx_rings;
+	int num_fs_entries;
+	int num_rss; /* number of RSS buckets */
+	unsigned short pad;
+};
+
+#define ENETC_SI_ALIGN	32
+
+static inline void *enetc_si_priv(const struct enetc_si *si)
+{
+	return (char *)si + ALIGN(sizeof(struct enetc_si), ENETC_SI_ALIGN);
+}
+
+static inline bool enetc_si_is_pf(struct enetc_si *si)
+{
+	return !!(si->hw.port);
+}
+
+#define ENETC_MAX_NUM_TXQS	8
+#define ENETC_INT_NAME_MAX	(IFNAMSIZ + 8)
+
+struct enetc_int_vector {
+	void __iomem *rbier;
+	void __iomem *tbier_base;
+	unsigned long tx_rings_map;
+	int count_tx_rings;
+	struct napi_struct napi;
+	char name[ENETC_INT_NAME_MAX];
+
+	struct enetc_bdr rx_ring ____cacheline_aligned_in_smp;
+	struct enetc_bdr tx_ring[0];
+};
+
+struct enetc_cls_rule {
+	struct ethtool_rx_flow_spec fs;
+	int used;
+};
+
+#define ENETC_MAX_BDR_INT	2 /* fixed to max # of available cpus */
+
+/* TODO: more hardware offloads */
+enum enetc_active_offloads {
+	ENETC_F_RX_TSTAMP	= BIT(0),
+	ENETC_F_TX_TSTAMP	= BIT(1),
+};
+
+struct enetc_ndev_priv {
+	struct net_device *ndev;
+	struct device *dev; /* dma-mapping device */
+	struct enetc_si *si;
+
+	int bdr_int_num; /* number of Rx/Tx ring interrupts */
+	struct enetc_int_vector *int_vector[ENETC_MAX_BDR_INT];
+	u16 num_rx_rings, num_tx_rings;
+	u16 rx_bd_count, tx_bd_count;
+
+	u16 msg_enable;
+	int active_offloads;
+
+	struct enetc_bdr *tx_ring[16];
+	struct enetc_bdr *rx_ring[16];
+
+	struct enetc_cls_rule *cls_rules;
+
+	struct device_node *phy_node;
+	phy_interface_t if_mode;
+};
+
+/* Messaging */
+
+/* VF-PF set primary MAC address message format */
+struct enetc_msg_cmd_set_primary_mac {
+	struct enetc_msg_cmd_header header;
+	struct sockaddr mac;
+};
+
+#define ENETC_CBD(R, i)	(&(((struct enetc_cbd *)((R).bd_base))[i]))
+
+#define ENETC_CBDR_TIMEOUT	1000 /* usecs */
+
+/* PTP driver exports */
+extern int enetc_phc_index;
+
+/* SI common */
+int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv);
+void enetc_pci_remove(struct pci_dev *pdev);
+int enetc_alloc_msix(struct enetc_ndev_priv *priv);
+void enetc_free_msix(struct enetc_ndev_priv *priv);
+void enetc_get_si_caps(struct enetc_si *si);
+void enetc_init_si_rings_params(struct enetc_ndev_priv *priv);
+int enetc_alloc_si_resources(struct enetc_ndev_priv *priv);
+void enetc_free_si_resources(struct enetc_ndev_priv *priv);
+
+int enetc_open(struct net_device *ndev);
+int enetc_close(struct net_device *ndev);
+netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
+struct net_device_stats *enetc_get_stats(struct net_device *ndev);
+int enetc_set_features(struct net_device *ndev,
+		       netdev_features_t features);
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data);
+
+/* ethtool */
+void enetc_set_ethtool_ops(struct net_device *ndev);
+
+/* control buffer descriptor ring (CBDR) */
+int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
+			    char *mac_addr, int si_map);
+int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
+int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
+		       int index);
+void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes);
+int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count);
+int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count);

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
new file mode 100644
index 0000000..de466b7
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c

@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include "enetc.h"
+
+static void enetc_clean_cbdr(struct enetc_si *si)
+{
+	struct enetc_cbdr *ring = &si->cbd_ring;
+	struct enetc_cbd *dest_cbd;
+	int i, status;
+
+	i = ring->next_to_clean;
+
+	while (enetc_rd_reg(ring->cir) != i) {
+		dest_cbd = ENETC_CBD(*ring, i);
+		status = dest_cbd->status_flags & ENETC_CBD_STATUS_MASK;
+		if (status)
+			dev_warn(&si->pdev->dev, "CMD err %04x for cmd %04x\n",
+				 status, dest_cbd->cmd);
+
+		memset(dest_cbd, 0, sizeof(*dest_cbd));
+
+		i = (i + 1) % ring->bd_count;
+	}
+
+	ring->next_to_clean = i;
+}
+
+static int enetc_cbd_unused(struct enetc_cbdr *r)
+{
+	return (r->next_to_clean - r->next_to_use - 1 + r->bd_count) %
+		r->bd_count;
+}
+
+static int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd)
+{
+	struct enetc_cbdr *ring = &si->cbd_ring;
+	int timeout = ENETC_CBDR_TIMEOUT;
+	struct enetc_cbd *dest_cbd;
+	int i;
+
+	if (unlikely(!ring->bd_base))
+		return -EIO;
+
+	if (unlikely(!enetc_cbd_unused(ring)))
+		enetc_clean_cbdr(si);
+
+	i = ring->next_to_use;
+	dest_cbd = ENETC_CBD(*ring, i);
+
+	/* copy command to the ring */
+	*dest_cbd = *cbd;
+	i = (i + 1) % ring->bd_count;
+
+	ring->next_to_use = i;
+	/* let H/W know BD ring has been updated */
+	enetc_wr_reg(ring->pir, i);
+
+	do {
+		if (enetc_rd_reg(ring->cir) == i)
+			break;
+		udelay(10); /* cannot sleep, rtnl_lock() */
+		timeout -= 10;
+	} while (timeout);
+
+	if (!timeout)
+		return -EBUSY;
+
+	enetc_clean_cbdr(si);
+
+	return 0;
+}
+
+int enetc_clear_mac_flt_entry(struct enetc_si *si, int index)
+{
+	struct enetc_cbd cbd;
+
+	memset(&cbd, 0, sizeof(cbd));
+
+	cbd.cls = 1;
+	cbd.status_flags = ENETC_CBD_FLAGS_SF;
+	cbd.index = cpu_to_le16(index);
+
+	return enetc_send_cmd(si, &cbd);
+}
+
+int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
+			    char *mac_addr, int si_map)
+{
+	struct enetc_cbd cbd;
+	u32 upper;
+	u16 lower;
+
+	memset(&cbd, 0, sizeof(cbd));
+
+	/* fill up the "set" descriptor */
+	cbd.cls = 1;
+	cbd.status_flags = ENETC_CBD_FLAGS_SF;
+	cbd.index = cpu_to_le16(index);
+	cbd.opt[3] = cpu_to_le32(si_map);
+	/* enable entry */
+	cbd.opt[0] = cpu_to_le32(BIT(31));
+
+	upper = *(const u32 *)mac_addr;
+	lower = *(const u16 *)(mac_addr + 4);
+	cbd.addr[0] = cpu_to_le32(upper);
+	cbd.addr[1] = cpu_to_le32(lower);
+
+	return enetc_send_cmd(si, &cbd);
+}
+
+#define RFSE_ALIGN	64
+/* Set entry in RFS table */
+int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
+		       int index)
+{
+	struct enetc_cbd cbd = {.cmd = 0};
+	dma_addr_t dma, dma_align;
+	void *tmp, *tmp_align;
+	int err;
+
+	/* fill up the "set" descriptor */
+	cbd.cmd = 0;
+	cbd.cls = 4;
+	cbd.index = cpu_to_le16(index);
+	cbd.length = cpu_to_le16(sizeof(*rfse));
+	cbd.opt[3] = cpu_to_le32(0); /* SI */
+
+	tmp = dma_alloc_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+				 &dma, GFP_KERNEL);
+	if (!tmp) {
+		dev_err(&si->pdev->dev, "DMA mapping of RFS entry failed!\n");
+		return -ENOMEM;
+	}
+
+	dma_align = ALIGN(dma, RFSE_ALIGN);
+	tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN);
+	memcpy(tmp_align, rfse, sizeof(*rfse));
+
+	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
+	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+
+	err = enetc_send_cmd(si, &cbd);
+	if (err)
+		dev_err(&si->pdev->dev, "FS entry add failed (%d)!", err);
+
+	dma_free_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+			  tmp, dma);
+
+	return err;
+}
+
+#define RSSE_ALIGN	64
+static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
+			       bool read)
+{
+	struct enetc_cbd cbd = {.cmd = 0};
+	dma_addr_t dma, dma_align;
+	u8 *tmp, *tmp_align;
+	int err, i;
+
+	if (count < RSSE_ALIGN)
+		/* HW only takes in a full 64 entry table */
+		return -EINVAL;
+
+	tmp = dma_alloc_coherent(&si->pdev->dev, count + RSSE_ALIGN,
+				 &dma, GFP_KERNEL);
+	if (!tmp) {
+		dev_err(&si->pdev->dev, "DMA mapping of RSS table failed!\n");
+		return -ENOMEM;
+	}
+	dma_align = ALIGN(dma, RSSE_ALIGN);
+	tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN);
+
+	if (!read)
+		for (i = 0; i < count; i++)
+			tmp_align[i] = (u8)(table[i]);
+
+	/* fill up the descriptor */
+	cbd.cmd = read ? 2 : 1;
+	cbd.cls = 3;
+	cbd.length = cpu_to_le16(count);
+
+	cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
+	cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+
+	err = enetc_send_cmd(si, &cbd);
+	if (err)
+		dev_err(&si->pdev->dev, "RSS cmd failed (%d)!", err);
+
+	if (read)
+		for (i = 0; i < count; i++)
+			table[i] = tmp_align[i];
+
+	dma_free_coherent(&si->pdev->dev, count + RSSE_ALIGN, tmp, dma);
+
+	return err;
+}
+
+/* Get RSS table */
+int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count)
+{
+	return enetc_cmd_rss_table(si, table, count, true);
+}
+
+/* Set RSS table */
+int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count)
+{
+	return enetc_cmd_rss_table(si, (u32 *)table, count, false);
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
new file mode 100644
index 0000000..fcb52ef
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c

@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include <linux/net_tstamp.h>
+#include <linux/module.h>
+#include "enetc.h"
+
+static const u32 enetc_si_regs[] = {
+	ENETC_SIMR, ENETC_SIPMAR0, ENETC_SIPMAR1, ENETC_SICBDRMR,
+	ENETC_SICBDRSR,	ENETC_SICBDRBAR0, ENETC_SICBDRBAR1, ENETC_SICBDRPIR,
+	ENETC_SICBDRCIR, ENETC_SICBDRLENR, ENETC_SICAPR0, ENETC_SICAPR1,
+	ENETC_SIUEFDCR
+};
+
+static const u32 enetc_txbdr_regs[] = {
+	ENETC_TBMR, ENETC_TBSR, ENETC_TBBAR0, ENETC_TBBAR1,
+	ENETC_TBPIR, ENETC_TBCIR, ENETC_TBLENR, ENETC_TBIER
+};
+
+static const u32 enetc_rxbdr_regs[] = {
+	ENETC_RBMR, ENETC_RBSR, ENETC_RBBSR, ENETC_RBCIR, ENETC_RBBAR0,
+	ENETC_RBBAR1, ENETC_RBPIR, ENETC_RBLENR, ENETC_RBICIR0, ENETC_RBIER
+};
+
+static const u32 enetc_port_regs[] = {
+	ENETC_PMR, ENETC_PSR, ENETC_PSIPMR, ENETC_PSIPMAR0(0),
+	ENETC_PSIPMAR1(0), ENETC_PTXMBAR, ENETC_PCAPR0, ENETC_PCAPR1,
+	ENETC_PSICFGR0(0), ENETC_PRFSCAPR, ENETC_PTCMSDUR(0),
+	ENETC_PM0_CMD_CFG, ENETC_PM0_MAXFRM, ENETC_PM0_IF_MODE
+};
+
+static int enetc_get_reglen(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	int len;
+
+	len = ARRAY_SIZE(enetc_si_regs);
+	len += ARRAY_SIZE(enetc_txbdr_regs) * priv->num_tx_rings;
+	len += ARRAY_SIZE(enetc_rxbdr_regs) * priv->num_rx_rings;
+
+	if (hw->port)
+		len += ARRAY_SIZE(enetc_port_regs);
+
+	len *= sizeof(u32) * 2; /* store 2 entries per reg: addr and value */
+
+	return len;
+}
+
+static void enetc_get_regs(struct net_device *ndev, struct ethtool_regs *regs,
+			   void *regbuf)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 *buf = (u32 *)regbuf;
+	int i, j;
+	u32 addr;
+
+	for (i = 0; i < ARRAY_SIZE(enetc_si_regs); i++) {
+		*buf++ = enetc_si_regs[i];
+		*buf++ = enetc_rd(hw, enetc_si_regs[i]);
+	}
+
+	for (i = 0; i < priv->num_tx_rings; i++) {
+		for (j = 0; j < ARRAY_SIZE(enetc_txbdr_regs); j++) {
+			addr = ENETC_BDR(TX, i, enetc_txbdr_regs[j]);
+
+			*buf++ = addr;
+			*buf++ = enetc_rd(hw, addr);
+		}
+	}
+
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		for (j = 0; j < ARRAY_SIZE(enetc_rxbdr_regs); j++) {
+			addr = ENETC_BDR(RX, i, enetc_rxbdr_regs[j]);
+
+			*buf++ = addr;
+			*buf++ = enetc_rd(hw, addr);
+		}
+	}
+
+	if (!hw->port)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(enetc_port_regs); i++) {
+		addr = ENETC_PORT_BASE + enetc_port_regs[i];
+		*buf++ = addr;
+		*buf++ = enetc_rd(hw, addr);
+	}
+}
+
+static const struct {
+	int reg;
+	char name[ETH_GSTRING_LEN];
+} enetc_si_counters[] =  {
+	{ ENETC_SIROCT, "SI rx octets" },
+	{ ENETC_SIRFRM, "SI rx frames" },
+	{ ENETC_SIRUCA, "SI rx u-cast frames" },
+	{ ENETC_SIRMCA, "SI rx m-cast frames" },
+	{ ENETC_SITOCT, "SI tx octets" },
+	{ ENETC_SITFRM, "SI tx frames" },
+	{ ENETC_SITUCA, "SI tx u-cast frames" },
+	{ ENETC_SITMCA, "SI tx m-cast frames" },
+	{ ENETC_RBDCR(0), "Rx ring  0 discarded frames" },
+	{ ENETC_RBDCR(1), "Rx ring  1 discarded frames" },
+	{ ENETC_RBDCR(2), "Rx ring  2 discarded frames" },
+	{ ENETC_RBDCR(3), "Rx ring  3 discarded frames" },
+	{ ENETC_RBDCR(4), "Rx ring  4 discarded frames" },
+	{ ENETC_RBDCR(5), "Rx ring  5 discarded frames" },
+	{ ENETC_RBDCR(6), "Rx ring  6 discarded frames" },
+	{ ENETC_RBDCR(7), "Rx ring  7 discarded frames" },
+	{ ENETC_RBDCR(8), "Rx ring  8 discarded frames" },
+	{ ENETC_RBDCR(9), "Rx ring  9 discarded frames" },
+	{ ENETC_RBDCR(10), "Rx ring 10 discarded frames" },
+	{ ENETC_RBDCR(11), "Rx ring 11 discarded frames" },
+	{ ENETC_RBDCR(12), "Rx ring 12 discarded frames" },
+	{ ENETC_RBDCR(13), "Rx ring 13 discarded frames" },
+	{ ENETC_RBDCR(14), "Rx ring 14 discarded frames" },
+	{ ENETC_RBDCR(15), "Rx ring 15 discarded frames" },
+};
+
+static const struct {
+	int reg;
+	char name[ETH_GSTRING_LEN];
+} enetc_port_counters[] = {
+	{ ENETC_PM0_REOCT,  "MAC rx ethernet octets" },
+	{ ENETC_PM0_RALN,   "MAC rx alignment errors" },
+	{ ENETC_PM0_RXPF,   "MAC rx valid pause frames" },
+	{ ENETC_PM0_RFRM,   "MAC rx valid frames" },
+	{ ENETC_PM0_RFCS,   "MAC rx fcs errors" },
+	{ ENETC_PM0_RVLAN,  "MAC rx VLAN frames" },
+	{ ENETC_PM0_RERR,   "MAC rx frame errors" },
+	{ ENETC_PM0_RUCA,   "MAC rx unicast frames" },
+	{ ENETC_PM0_RMCA,   "MAC rx multicast frames" },
+	{ ENETC_PM0_RBCA,   "MAC rx broadcast frames" },
+	{ ENETC_PM0_RDRP,   "MAC rx dropped packets" },
+	{ ENETC_PM0_RPKT,   "MAC rx packets" },
+	{ ENETC_PM0_RUND,   "MAC rx undersized packets" },
+	{ ENETC_PM0_R64,    "MAC rx 64 byte packets" },
+	{ ENETC_PM0_R127,   "MAC rx 65-127 byte packets" },
+	{ ENETC_PM0_R255,   "MAC rx 128-255 byte packets" },
+	{ ENETC_PM0_R511,   "MAC rx 256-511 byte packets" },
+	{ ENETC_PM0_R1023,  "MAC rx 512-1023 byte packets" },
+	{ ENETC_PM0_R1518,  "MAC rx 1024-1518 byte packets" },
+	{ ENETC_PM0_R1519X, "MAC rx 1519 to max-octet packets" },
+	{ ENETC_PM0_ROVR,   "MAC rx oversized packets" },
+	{ ENETC_PM0_RJBR,   "MAC rx jabber packets" },
+	{ ENETC_PM0_RFRG,   "MAC rx fragment packets" },
+	{ ENETC_PM0_RCNP,   "MAC rx control packets" },
+	{ ENETC_PM0_RDRNTP, "MAC rx fifo drop" },
+	{ ENETC_PM0_TEOCT,  "MAC tx ethernet octets" },
+	{ ENETC_PM0_TOCT,   "MAC tx octets" },
+	{ ENETC_PM0_TCRSE,  "MAC tx carrier sense errors" },
+	{ ENETC_PM0_TXPF,   "MAC tx valid pause frames" },
+	{ ENETC_PM0_TFRM,   "MAC tx frames" },
+	{ ENETC_PM0_TFCS,   "MAC tx fcs errors" },
+	{ ENETC_PM0_TVLAN,  "MAC tx VLAN frames" },
+	{ ENETC_PM0_TERR,   "MAC tx frames" },
+	{ ENETC_PM0_TUCA,   "MAC tx unicast frames" },
+	{ ENETC_PM0_TMCA,   "MAC tx multicast frames" },
+	{ ENETC_PM0_TBCA,   "MAC tx broadcast frames" },
+	{ ENETC_PM0_TPKT,   "MAC tx packets" },
+	{ ENETC_PM0_TUND,   "MAC tx undersized packets" },
+	{ ENETC_PM0_T127,   "MAC tx 65-127 byte packets" },
+	{ ENETC_PM0_T1023,  "MAC tx 512-1023 byte packets" },
+	{ ENETC_PM0_T1518,  "MAC tx 1024-1518 byte packets" },
+	{ ENETC_PM0_TCNP,   "MAC tx control packets" },
+	{ ENETC_PM0_TDFR,   "MAC tx deferred packets" },
+	{ ENETC_PM0_TMCOL,  "MAC tx multiple collisions" },
+	{ ENETC_PM0_TSCOL,  "MAC tx single collisions" },
+	{ ENETC_PM0_TLCOL,  "MAC tx late collisions" },
+	{ ENETC_PM0_TECOL,  "MAC tx excessive collisions" },
+	{ ENETC_UFDMF,      "SI MAC nomatch u-cast discards" },
+	{ ENETC_MFDMF,      "SI MAC nomatch m-cast discards" },
+	{ ENETC_PBFDSIR,    "SI MAC nomatch b-cast discards" },
+	{ ENETC_PUFDVFR,    "SI VLAN nomatch u-cast discards" },
+	{ ENETC_PMFDVFR,    "SI VLAN nomatch m-cast discards" },
+	{ ENETC_PBFDVFR,    "SI VLAN nomatch b-cast discards" },
+	{ ENETC_PFDMSAPR,   "SI pruning discarded frames" },
+	{ ENETC_PICDR(0),   "ICM DR0 discarded frames" },
+	{ ENETC_PICDR(1),   "ICM DR1 discarded frames" },
+	{ ENETC_PICDR(2),   "ICM DR2 discarded frames" },
+	{ ENETC_PICDR(3),   "ICM DR3 discarded frames" },
+};
+
+static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
+	"Rx ring %2d frames",
+	"Rx ring %2d alloc errors",
+};
+
+static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
+	"Tx ring %2d frames",
+};
+
+static int enetc_get_sset_count(struct net_device *ndev, int sset)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	if (sset == ETH_SS_STATS)
+		return ARRAY_SIZE(enetc_si_counters) +
+			ARRAY_SIZE(tx_ring_stats) * priv->num_tx_rings +
+			ARRAY_SIZE(rx_ring_stats) * priv->num_rx_rings +
+			(enetc_si_is_pf(priv->si) ?
+			ARRAY_SIZE(enetc_port_counters) : 0);
+
+	return -EOPNOTSUPP;
+}
+
+static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	u8 *p = data;
+	int i, j;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) {
+			strlcpy(p, enetc_si_counters[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < priv->num_tx_rings; i++) {
+			for (j = 0; j < ARRAY_SIZE(tx_ring_stats); j++) {
+				snprintf(p, ETH_GSTRING_LEN, tx_ring_stats[j],
+					 i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+		for (i = 0; i < priv->num_rx_rings; i++) {
+			for (j = 0; j < ARRAY_SIZE(rx_ring_stats); j++) {
+				snprintf(p, ETH_GSTRING_LEN, rx_ring_stats[j],
+					 i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
+		if (!enetc_si_is_pf(priv->si))
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) {
+			strlcpy(p, enetc_port_counters[i].name,
+				ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	}
+}
+
+static void enetc_get_ethtool_stats(struct net_device *ndev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	int i, o = 0;
+
+	for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++)
+		data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg);
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		data[o++] = priv->tx_ring[i]->stats.packets;
+
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		data[o++] = priv->rx_ring[i]->stats.packets;
+		data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
+	}
+
+	if (!enetc_si_is_pf(priv->si))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++)
+		data[o++] = enetc_port_rd(hw, enetc_port_counters[i].reg);
+}
+
+#define ENETC_RSSHASH_L3 (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO | RXH_IP_SRC | \
+			  RXH_IP_DST)
+#define ENETC_RSSHASH_L4 (ENETC_RSSHASH_L3 | RXH_L4_B_0_1 | RXH_L4_B_2_3)
+static int enetc_get_rsshash(struct ethtool_rxnfc *rxnfc)
+{
+	static const u32 rsshash[] = {
+			[TCP_V4_FLOW]    = ENETC_RSSHASH_L4,
+			[UDP_V4_FLOW]    = ENETC_RSSHASH_L4,
+			[SCTP_V4_FLOW]   = ENETC_RSSHASH_L4,
+			[AH_ESP_V4_FLOW] = ENETC_RSSHASH_L3,
+			[IPV4_FLOW]      = ENETC_RSSHASH_L3,
+			[TCP_V6_FLOW]    = ENETC_RSSHASH_L4,
+			[UDP_V6_FLOW]    = ENETC_RSSHASH_L4,
+			[SCTP_V6_FLOW]   = ENETC_RSSHASH_L4,
+			[AH_ESP_V6_FLOW] = ENETC_RSSHASH_L3,
+			[IPV6_FLOW]      = ENETC_RSSHASH_L3,
+			[ETHER_FLOW]     = 0,
+	};
+
+	if (rxnfc->flow_type >= ARRAY_SIZE(rsshash))
+		return -EINVAL;
+
+	rxnfc->data = rsshash[rxnfc->flow_type];
+
+	return 0;
+}
+
+/* current HW spec does byte reversal on everything including MAC addresses */
+static void ether_addr_copy_swap(u8 *dst, const u8 *src)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dst[i] = src[ETH_ALEN - i - 1];
+}
+
+static int enetc_set_cls_entry(struct enetc_si *si,
+			       struct ethtool_rx_flow_spec *fs, bool en)
+{
+	struct ethtool_tcpip4_spec *l4ip4_h, *l4ip4_m;
+	struct ethtool_usrip4_spec *l3ip4_h, *l3ip4_m;
+	struct ethhdr *eth_h, *eth_m;
+	struct enetc_cmd_rfse rfse = { {0} };
+
+	if (!en)
+		goto done;
+
+	switch (fs->flow_type & 0xff) {
+	case TCP_V4_FLOW:
+		l4ip4_h = &fs->h_u.tcp_ip4_spec;
+		l4ip4_m = &fs->m_u.tcp_ip4_spec;
+		goto l4ip4;
+	case UDP_V4_FLOW:
+		l4ip4_h = &fs->h_u.udp_ip4_spec;
+		l4ip4_m = &fs->m_u.udp_ip4_spec;
+		goto l4ip4;
+	case SCTP_V4_FLOW:
+		l4ip4_h = &fs->h_u.sctp_ip4_spec;
+		l4ip4_m = &fs->m_u.sctp_ip4_spec;
+l4ip4:
+		rfse.sip_h[0] = l4ip4_h->ip4src;
+		rfse.sip_m[0] = l4ip4_m->ip4src;
+		rfse.dip_h[0] = l4ip4_h->ip4dst;
+		rfse.dip_m[0] = l4ip4_m->ip4dst;
+		rfse.sport_h = ntohs(l4ip4_h->psrc);
+		rfse.sport_m = ntohs(l4ip4_m->psrc);
+		rfse.dport_h = ntohs(l4ip4_h->pdst);
+		rfse.dport_m = ntohs(l4ip4_m->pdst);
+		if (l4ip4_m->tos)
+			netdev_warn(si->ndev, "ToS field is not supported and was ignored\n");
+		rfse.ethtype_h = ETH_P_IP; /* IPv4 */
+		rfse.ethtype_m = 0xffff;
+		break;
+	case IP_USER_FLOW:
+		l3ip4_h = &fs->h_u.usr_ip4_spec;
+		l3ip4_m = &fs->m_u.usr_ip4_spec;
+
+		rfse.sip_h[0] = l3ip4_h->ip4src;
+		rfse.sip_m[0] = l3ip4_m->ip4src;
+		rfse.dip_h[0] = l3ip4_h->ip4dst;
+		rfse.dip_m[0] = l3ip4_m->ip4dst;
+		if (l3ip4_m->tos)
+			netdev_warn(si->ndev, "ToS field is not supported and was ignored\n");
+		rfse.ethtype_h = ETH_P_IP; /* IPv4 */
+		rfse.ethtype_m = 0xffff;
+		break;
+	case ETHER_FLOW:
+		eth_h = &fs->h_u.ether_spec;
+		eth_m = &fs->m_u.ether_spec;
+
+		ether_addr_copy_swap(rfse.smac_h, eth_h->h_source);
+		ether_addr_copy_swap(rfse.smac_m, eth_m->h_source);
+		ether_addr_copy_swap(rfse.dmac_h, eth_h->h_dest);
+		ether_addr_copy_swap(rfse.dmac_m, eth_m->h_dest);
+		rfse.ethtype_h = ntohs(eth_h->h_proto);
+		rfse.ethtype_m = ntohs(eth_m->h_proto);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	rfse.mode |= ENETC_RFSE_EN;
+	if (fs->ring_cookie != RX_CLS_FLOW_DISC) {
+		rfse.mode |= ENETC_RFSE_MODE_BD;
+		rfse.result = fs->ring_cookie;
+	}
+done:
+	return enetc_set_fs_entry(si, &rfse, fs->location);
+}
+
+static int enetc_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc,
+			   u32 *rule_locs)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int i, j;
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_GRXRINGS:
+		rxnfc->data = priv->num_rx_rings;
+		break;
+	case ETHTOOL_GRXFH:
+		/* get RSS hash config */
+		return enetc_get_rsshash(rxnfc);
+	case ETHTOOL_GRXCLSRLCNT:
+		/* total number of entries */
+		rxnfc->data = priv->si->num_fs_entries;
+		/* number of entries in use */
+		rxnfc->rule_cnt = 0;
+		for (i = 0; i < priv->si->num_fs_entries; i++)
+			if (priv->cls_rules[i].used)
+				rxnfc->rule_cnt++;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		if (rxnfc->fs.location >= priv->si->num_fs_entries)
+			return -EINVAL;
+
+		/* get entry x */
+		rxnfc->fs = priv->cls_rules[rxnfc->fs.location].fs;
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		/* total number of entries */
+		rxnfc->data = priv->si->num_fs_entries;
+		/* array of indexes of used entries */
+		j = 0;
+		for (i = 0; i < priv->si->num_fs_entries; i++) {
+			if (!priv->cls_rules[i].used)
+				continue;
+			if (j == rxnfc->rule_cnt)
+				return -EMSGSIZE;
+			rule_locs[j++] = i;
+		}
+		/* number of entries in use */
+		rxnfc->rule_cnt = j;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int enetc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int err;
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		if (rxnfc->fs.location >= priv->si->num_fs_entries)
+			return -EINVAL;
+
+		if (rxnfc->fs.ring_cookie >= priv->num_rx_rings &&
+		    rxnfc->fs.ring_cookie != RX_CLS_FLOW_DISC)
+			return -EINVAL;
+
+		err = enetc_set_cls_entry(priv->si, &rxnfc->fs, true);
+		if (err)
+			return err;
+		priv->cls_rules[rxnfc->fs.location].fs = rxnfc->fs;
+		priv->cls_rules[rxnfc->fs.location].used = 1;
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		if (rxnfc->fs.location >= priv->si->num_fs_entries)
+			return -EINVAL;
+
+		err = enetc_set_cls_entry(priv->si, &rxnfc->fs, false);
+		if (err)
+			return err;
+		priv->cls_rules[rxnfc->fs.location].used = 0;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static u32 enetc_get_rxfh_key_size(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	/* return the size of the RX flow hash key.  PF only */
+	return (priv->si->hw.port) ? ENETC_RSSHASH_KEY_SIZE : 0;
+}
+
+static u32 enetc_get_rxfh_indir_size(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	/* return the size of the RX flow hash indirection table */
+	return priv->si->num_rss;
+}
+
+static int enetc_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key,
+			  u8 *hfunc)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	int err = 0, i;
+
+	/* return hash function */
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	/* return hash key */
+	if (key && hw->port)
+		for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++)
+			((u32 *)key)[i] = enetc_port_rd(hw, ENETC_PRSSK(i));
+
+	/* return RSS table */
+	if (indir)
+		err = enetc_get_rss_table(priv->si, indir, priv->si->num_rss);
+
+	return err;
+}
+
+void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes)
+{
+	int i;
+
+	for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++)
+		enetc_port_wr(hw, ENETC_PRSSK(i), ((u32 *)bytes)[i]);
+}
+
+static int enetc_set_rxfh(struct net_device *ndev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	int err = 0;
+
+	/* set hash key, if PF */
+	if (key && hw->port)
+		enetc_set_rss_key(hw, key);
+
+	/* set RSS table */
+	if (indir)
+		err = enetc_set_rss_table(priv->si, indir, priv->si->num_rss);
+
+	return err;
+}
+
+static void enetc_get_ringparam(struct net_device *ndev,
+				struct ethtool_ringparam *ring)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	ring->rx_pending = priv->rx_bd_count;
+	ring->tx_pending = priv->tx_bd_count;
+
+	/* do some h/w sanity checks for BDR length */
+	if (netif_running(ndev)) {
+		struct enetc_hw *hw = &priv->si->hw;
+		u32 val = enetc_rxbdr_rd(hw, 0, ENETC_RBLENR);
+
+		if (val != priv->rx_bd_count)
+			netif_err(priv, hw, ndev, "RxBDR[RBLENR] = %d!\n", val);
+
+		val = enetc_txbdr_rd(hw, 0, ENETC_TBLENR);
+
+		if (val != priv->tx_bd_count)
+			netif_err(priv, hw, ndev, "TxBDR[TBLENR] = %d!\n", val);
+	}
+}
+
+static int enetc_get_ts_info(struct net_device *ndev,
+			     struct ethtool_ts_info *info)
+{
+	int *phc_idx;
+
+	phc_idx = symbol_get(enetc_phc_index);
+	if (phc_idx) {
+		info->phc_index = *phc_idx;
+		symbol_put(enetc_phc_index);
+	} else {
+		info->phc_index = -1;
+	}
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+			 (1 << HWTSTAMP_TX_ON);
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_ALL);
+#else
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+#endif
+	return 0;
+}
+
+static const struct ethtool_ops enetc_pf_ethtool_ops = {
+	.get_regs_len = enetc_get_reglen,
+	.get_regs = enetc_get_regs,
+	.get_sset_count = enetc_get_sset_count,
+	.get_strings = enetc_get_strings,
+	.get_ethtool_stats = enetc_get_ethtool_stats,
+	.get_rxnfc = enetc_get_rxnfc,
+	.set_rxnfc = enetc_set_rxnfc,
+	.get_rxfh_key_size = enetc_get_rxfh_key_size,
+	.get_rxfh_indir_size = enetc_get_rxfh_indir_size,
+	.get_rxfh = enetc_get_rxfh,
+	.set_rxfh = enetc_set_rxfh,
+	.get_ringparam = enetc_get_ringparam,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
+};
+
+static const struct ethtool_ops enetc_vf_ethtool_ops = {
+	.get_regs_len = enetc_get_reglen,
+	.get_regs = enetc_get_regs,
+	.get_sset_count = enetc_get_sset_count,
+	.get_strings = enetc_get_strings,
+	.get_ethtool_stats = enetc_get_ethtool_stats,
+	.get_rxnfc = enetc_get_rxnfc,
+	.set_rxnfc = enetc_set_rxnfc,
+	.get_rxfh_indir_size = enetc_get_rxfh_indir_size,
+	.get_rxfh = enetc_get_rxfh,
+	.set_rxfh = enetc_set_rxfh,
+	.get_ringparam = enetc_get_ringparam,
+	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
+};
+
+void enetc_set_ethtool_ops(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	if (enetc_si_is_pf(priv->si))
+		ndev->ethtool_ops = &enetc_pf_ethtool_ops;
+	else
+		ndev->ethtool_ops = &enetc_vf_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
new file mode 100644
index 0000000..8827629
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h

@@ -0,0 +1,556 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2017-2019 NXP */
+
+#include <linux/bitops.h>
+
+/* ENETC device IDs */
+#define ENETC_DEV_ID_PF		0xe100
+#define ENETC_DEV_ID_VF		0xef00
+#define ENETC_DEV_ID_PTP	0xee02
+
+/* ENETC register block BAR */
+#define ENETC_BAR_REGS	0
+
+/** SI regs, offset: 0h */
+#define ENETC_SIMR	0
+#define ENETC_SIMR_EN	BIT(31)
+#define ENETC_SIMR_RSSE	BIT(0)
+#define ENETC_SICTR0	0x18
+#define ENETC_SICTR1	0x1c
+#define ENETC_SIPCAPR0	0x20
+#define ENETC_SIPCAPR0_RSS	BIT(8)
+#define ENETC_SIPCAPR1	0x24
+#define ENETC_SITGTGR	0x30
+#define ENETC_SIRBGCR	0x38
+/* cache attribute registers for transactions initiated by ENETC */
+#define ENETC_SICAR0	0x40
+#define ENETC_SICAR1	0x44
+#define ENETC_SICAR2	0x48
+/* rd snoop, no alloc
+ * wr snoop, no alloc, partial cache line update for BDs and full cache line
+ * update for data
+ */
+#define ENETC_SICAR_RD_COHERENT	0x2b2b0000
+#define ENETC_SICAR_WR_COHERENT	0x00006727
+#define ENETC_SICAR_MSI	0x00300030 /* rd/wr device, no snoop, no alloc */
+
+#define ENETC_SIPMAR0	0x80
+#define ENETC_SIPMAR1	0x84
+
+/* VF-PF Message passing */
+#define ENETC_DEFAULT_MSG_SIZE	1024	/* and max size */
+/* msg size encoding: default and max msg value of 1024B encoded as 0 */
+static inline u32 enetc_vsi_set_msize(u32 size)
+{
+	return size < ENETC_DEFAULT_MSG_SIZE ? size >> 5 : 0;
+}
+
+#define ENETC_PSIMSGRR	0x204
+#define ENETC_PSIMSGRR_MR_MASK	GENMASK(2, 1)
+#define ENETC_PSIMSGRR_MR(n) BIT((n) + 1) /* n = VSI index */
+#define ENETC_PSIVMSGRCVAR0(n)	(0x210 + (n) * 0x8) /* n = VSI index */
+#define ENETC_PSIVMSGRCVAR1(n)	(0x214 + (n) * 0x8)
+
+#define ENETC_VSIMSGSR	0x204	/* RO */
+#define ENETC_VSIMSGSR_MB	BIT(0)
+#define ENETC_VSIMSGSR_MS	BIT(1)
+#define ENETC_VSIMSGSNDAR0	0x210
+#define ENETC_VSIMSGSNDAR1	0x214
+
+#define ENETC_SIMSGSR_SET_MC(val) ((val) << 16)
+#define ENETC_SIMSGSR_GET_MC(val) ((val) >> 16)
+
+/* SI statistics */
+#define ENETC_SIROCT	0x300
+#define ENETC_SIRFRM	0x308
+#define ENETC_SIRUCA	0x310
+#define ENETC_SIRMCA	0x318
+#define ENETC_SITOCT	0x320
+#define ENETC_SITFRM	0x328
+#define ENETC_SITUCA	0x330
+#define ENETC_SITMCA	0x338
+#define ENETC_RBDCR(n)	(0x8180 + (n) * 0x200)
+
+/* Control BDR regs */
+#define ENETC_SICBDRMR		0x800
+#define ENETC_SICBDRSR		0x804	/* RO */
+#define ENETC_SICBDRBAR0	0x810
+#define ENETC_SICBDRBAR1	0x814
+#define ENETC_SICBDRPIR		0x818
+#define ENETC_SICBDRCIR		0x81c
+#define ENETC_SICBDRLENR	0x820
+
+#define ENETC_SICAPR0	0x900
+#define ENETC_SICAPR1	0x904
+
+#define ENETC_PSIIER	0xa00
+#define ENETC_PSIIER_MR_MASK	GENMASK(2, 1)
+#define ENETC_PSIIDR	0xa08
+#define ENETC_SITXIDR	0xa18
+#define ENETC_SIRXIDR	0xa28
+#define ENETC_SIMSIVR	0xa30
+
+#define ENETC_SIMSITRV(n) (0xB00 + (n) * 0x4)
+#define ENETC_SIMSIRRV(n) (0xB80 + (n) * 0x4)
+
+#define ENETC_SIUEFDCR	0xe28
+
+#define ENETC_SIRFSCAPR	0x1200
+#define ENETC_SIRFSCAPR_GET_NUM_RFS(val) ((val) & 0x7f)
+#define ENETC_SIRSSCAPR	0x1600
+#define ENETC_SIRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32)
+
+/** SI BDR sub-blocks, n = 0..7 */
+enum enetc_bdr_type {TX, RX};
+#define ENETC_BDR_OFF(i)	((i) * 0x200)
+#define ENETC_BDR(t, i, r)	(0x8000 + (t) * 0x100 + ENETC_BDR_OFF(i) + (r))
+/* RX BDR reg offsets */
+#define ENETC_RBMR	0
+#define ENETC_RBMR_BDS	BIT(2)
+#define ENETC_RBMR_VTE	BIT(5)
+#define ENETC_RBMR_EN	BIT(31)
+#define ENETC_RBSR	0x4
+#define ENETC_RBBSR	0x8
+#define ENETC_RBCIR	0xc
+#define ENETC_RBBAR0	0x10
+#define ENETC_RBBAR1	0x14
+#define ENETC_RBPIR	0x18
+#define ENETC_RBLENR	0x20
+#define ENETC_RBIER	0xa0
+#define ENETC_RBIER_RXTIE	BIT(0)
+#define ENETC_RBIDR	0xa4
+#define ENETC_RBICIR0	0xa8
+#define ENETC_RBICIR0_ICEN	BIT(31)
+
+/* TX BDR reg offsets */
+#define ENETC_TBMR	0
+#define ENETC_TBSR_BUSY	BIT(0)
+#define ENETC_TBMR_VIH	BIT(9)
+#define ENETC_TBMR_PRIO_MASK		GENMASK(2, 0)
+#define ENETC_TBMR_SET_PRIO(val)	((val) & ENETC_TBMR_PRIO_MASK)
+#define ENETC_TBMR_EN	BIT(31)
+#define ENETC_TBSR	0x4
+#define ENETC_TBBAR0	0x10
+#define ENETC_TBBAR1	0x14
+#define ENETC_TBPIR	0x18
+#define ENETC_TBCIR	0x1c
+#define ENETC_TBCIR_IDX_MASK	0xffff
+#define ENETC_TBLENR	0x20
+#define ENETC_TBIER	0xa0
+#define ENETC_TBIER_TXTIE	BIT(0)
+#define ENETC_TBIDR	0xa4
+#define ENETC_TBICIR0	0xa8
+#define ENETC_TBICIR0_ICEN	BIT(31)
+
+#define ENETC_RTBLENR_LEN(n)	((n) & ~0x7)
+
+/* Port regs, offset: 1_0000h */
+#define ENETC_PORT_BASE		0x10000
+#define ENETC_PMR		0x0000
+#define ENETC_PMR_EN	GENMASK(18, 16)
+#define ENETC_PSR		0x0004 /* RO */
+#define ENETC_PSIPMR		0x0018
+#define ENETC_PSIPMR_SET_UP(n)	BIT(n) /* n = SI index */
+#define ENETC_PSIPMR_SET_MP(n)	BIT((n) + 16)
+#define ENETC_PSIPVMR		0x001c
+#define ENETC_VLAN_PROMISC_MAP_ALL	0x7
+#define ENETC_PSIPVMR_SET_VP(simap)	((simap) & 0x7)
+#define ENETC_PSIPVMR_SET_VUTA(simap)	(((simap) & 0x7) << 16)
+#define ENETC_PSIPMAR0(n)	(0x0100 + (n) * 0x8) /* n = SI index */
+#define ENETC_PSIPMAR1(n)	(0x0104 + (n) * 0x8)
+#define ENETC_PVCLCTR		0x0208
+#define ENETC_VLAN_TYPE_C	BIT(0)
+#define ENETC_VLAN_TYPE_S	BIT(1)
+#define ENETC_PVCLCTR_OVTPIDL(bmp)	((bmp) & 0xff) /* VLAN_TYPE */
+#define ENETC_PSIVLANR(n)	(0x0240 + (n) * 4) /* n = SI index */
+#define ENETC_PSIVLAN_EN	BIT(31)
+#define ENETC_PSIVLAN_SET_QOS(val)	((u32)(val) << 12)
+#define ENETC_PTXMBAR		0x0608
+#define ENETC_PCAPR0		0x0900
+#define ENETC_PCAPR0_RXBDR(val)	((val) >> 24)
+#define ENETC_PCAPR0_TXBDR(val)	(((val) >> 16) & 0xff)
+#define ENETC_PCAPR1		0x0904
+#define ENETC_PSICFGR0(n)	(0x0940 + (n) * 0xc)  /* n = SI index */
+#define ENETC_PSICFGR0_SET_TXBDR(val)	((val) & 0xff)
+#define ENETC_PSICFGR0_SET_RXBDR(val)	(((val) & 0xff) << 16)
+#define ENETC_PSICFGR0_VTE	BIT(12)
+#define ENETC_PSICFGR0_SIVIE	BIT(14)
+#define ENETC_PSICFGR0_ASE	BIT(15)
+#define ENETC_PSICFGR0_SIVC(bmp)	(((bmp) & 0xff) << 24) /* VLAN_TYPE */
+
+#define ENETC_PTCCBSR0(n)	(0x1110 + (n) * 8) /* n = 0 to 7*/
+#define ENETC_PTCCBSR1(n)	(0x1114 + (n) * 8) /* n = 0 to 7*/
+#define ENETC_RSSHASH_KEY_SIZE	40
+#define ENETC_PRSSK(n)		(0x1410 + (n) * 4) /* n = [0..9] */
+#define ENETC_PSIVLANFMR	0x1700
+#define ENETC_PSIVLANFMR_VS	BIT(0)
+#define ENETC_PRFSMR		0x1800
+#define ENETC_PRFSMR_RFSE	BIT(31)
+#define ENETC_PRFSCAPR		0x1804
+#define ENETC_PRFSCAPR_GET_NUM_RFS(val)	((((val) & 0xf) + 1) * 16)
+#define ENETC_PSIRFSCFGR(n)	(0x1814 + (n) * 4) /* n = SI index */
+#define ENETC_PFPMR		0x1900
+#define ENETC_PFPMR_PMACE	BIT(1)
+#define ENETC_PFPMR_MWLM	BIT(0)
+#define ENETC_PSIUMHFR0(n, err)	(((err) ? 0x1d08 : 0x1d00) + (n) * 0x10)
+#define ENETC_PSIUMHFR1(n)	(0x1d04 + (n) * 0x10)
+#define ENETC_PSIMMHFR0(n, err)	(((err) ? 0x1d00 : 0x1d08) + (n) * 0x10)
+#define ENETC_PSIMMHFR1(n)	(0x1d0c + (n) * 0x10)
+#define ENETC_PSIVHFR0(n)	(0x1e00 + (n) * 8) /* n = SI index */
+#define ENETC_PSIVHFR1(n)	(0x1e04 + (n) * 8) /* n = SI index */
+#define ENETC_MMCSR		0x1f00
+#define ENETC_MMCSR_ME		BIT(16)
+#define ENETC_PTCMSDUR(n)	(0x2020 + (n) * 4) /* n = TC index [0..7] */
+
+#define ENETC_PM0_CMD_CFG	0x8008
+#define ENETC_PM1_CMD_CFG	0x9008
+#define ENETC_PM0_TX_EN		BIT(0)
+#define ENETC_PM0_RX_EN		BIT(1)
+#define ENETC_PM0_PROMISC	BIT(4)
+#define ENETC_PM0_CMD_XGLP	BIT(10)
+#define ENETC_PM0_CMD_TXP	BIT(11)
+#define ENETC_PM0_CMD_PHY_TX_EN	BIT(15)
+#define ENETC_PM0_CMD_SFD	BIT(21)
+#define ENETC_PM0_MAXFRM	0x8014
+#define ENETC_SET_TX_MTU(val)	((val) << 16)
+#define ENETC_SET_MAXFRM(val)	((val) & 0xffff)
+#define ENETC_PM0_IF_MODE	0x8300
+#define ENETC_PMO_IFM_RG	BIT(2)
+#define ENETC_PM0_IFM_RLP	(BIT(5) | BIT(11))
+#define ENETC_PM0_IFM_RGAUTO	(BIT(15) | ENETC_PMO_IFM_RG | BIT(1))
+#define ENETC_PM0_IFM_XGMII	BIT(12)
+
+/* MAC counters */
+#define ENETC_PM0_REOCT		0x8100
+#define ENETC_PM0_RALN		0x8110
+#define ENETC_PM0_RXPF		0x8118
+#define ENETC_PM0_RFRM		0x8120
+#define ENETC_PM0_RFCS		0x8128
+#define ENETC_PM0_RVLAN		0x8130
+#define ENETC_PM0_RERR		0x8138
+#define ENETC_PM0_RUCA		0x8140
+#define ENETC_PM0_RMCA		0x8148
+#define ENETC_PM0_RBCA		0x8150
+#define ENETC_PM0_RDRP		0x8158
+#define ENETC_PM0_RPKT		0x8160
+#define ENETC_PM0_RUND		0x8168
+#define ENETC_PM0_R64		0x8170
+#define ENETC_PM0_R127		0x8178
+#define ENETC_PM0_R255		0x8180
+#define ENETC_PM0_R511		0x8188
+#define ENETC_PM0_R1023		0x8190
+#define ENETC_PM0_R1518		0x8198
+#define ENETC_PM0_R1519X	0x81A0
+#define ENETC_PM0_ROVR		0x81A8
+#define ENETC_PM0_RJBR		0x81B0
+#define ENETC_PM0_RFRG		0x81B8
+#define ENETC_PM0_RCNP		0x81C0
+#define ENETC_PM0_RDRNTP	0x81C8
+#define ENETC_PM0_TEOCT		0x8200
+#define ENETC_PM0_TOCT		0x8208
+#define ENETC_PM0_TCRSE		0x8210
+#define ENETC_PM0_TXPF		0x8218
+#define ENETC_PM0_TFRM		0x8220
+#define ENETC_PM0_TFCS		0x8228
+#define ENETC_PM0_TVLAN		0x8230
+#define ENETC_PM0_TERR		0x8238
+#define ENETC_PM0_TUCA		0x8240
+#define ENETC_PM0_TMCA		0x8248
+#define ENETC_PM0_TBCA		0x8250
+#define ENETC_PM0_TPKT		0x8260
+#define ENETC_PM0_TUND		0x8268
+#define ENETC_PM0_T127		0x8278
+#define ENETC_PM0_T1023		0x8290
+#define ENETC_PM0_T1518		0x8298
+#define ENETC_PM0_TCNP		0x82C0
+#define ENETC_PM0_TDFR		0x82D0
+#define ENETC_PM0_TMCOL		0x82D8
+#define ENETC_PM0_TSCOL		0x82E0
+#define ENETC_PM0_TLCOL		0x82E8
+#define ENETC_PM0_TECOL		0x82F0
+
+/* Port counters */
+#define ENETC_PICDR(n)		(0x0700 + (n) * 8) /* n = [0..3] */
+#define ENETC_PBFDSIR		0x0810
+#define ENETC_PFDMSAPR		0x0814
+#define ENETC_UFDMF		0x1680
+#define ENETC_MFDMF		0x1684
+#define ENETC_PUFDVFR		0x1780
+#define ENETC_PMFDVFR		0x1784
+#define ENETC_PBFDVFR		0x1788
+
+/** Global regs, offset: 2_0000h */
+#define ENETC_GLOBAL_BASE	0x20000
+#define ENETC_G_EIPBRR0		0x0bf8
+#define ENETC_G_EIPBRR1		0x0bfc
+#define ENETC_G_EPFBLPR(n)	(0xd00 + 4 * (n))
+#define ENETC_G_EPFBLPR1_XGMII	0x80000000
+
+/* PCI device info */
+struct enetc_hw {
+	/* SI registers, used by all PCI functions */
+	void __iomem *reg;
+	/* Port registers, PF only */
+	void __iomem *port;
+	/* IP global registers, PF only */
+	void __iomem *global;
+};
+
+/* general register accessors */
+#define enetc_rd_reg(reg)	ioread32((reg))
+#define enetc_wr_reg(reg, val)	iowrite32((val), (reg))
+#ifdef ioread64
+#define enetc_rd_reg64(reg)	ioread64((reg))
+#else
+/* using this to read out stats on 32b systems */
+static inline u64 enetc_rd_reg64(void __iomem *reg)
+{
+	u32 low, high, tmp;
+
+	do {
+		high = ioread32(reg + 4);
+		low = ioread32(reg);
+		tmp = ioread32(reg + 4);
+	} while (high != tmp);
+
+	return le64_to_cpu((__le64)high << 32 | low);
+}
+#endif
+
+#define enetc_rd(hw, off)		enetc_rd_reg((hw)->reg + (off))
+#define enetc_wr(hw, off, val)		enetc_wr_reg((hw)->reg + (off), val)
+#define enetc_rd64(hw, off)		enetc_rd_reg64((hw)->reg + (off))
+/* port register accessors - PF only */
+#define enetc_port_rd(hw, off)		enetc_rd_reg((hw)->port + (off))
+#define enetc_port_wr(hw, off, val)	enetc_wr_reg((hw)->port + (off), val)
+/* global register accessors - PF only */
+#define enetc_global_rd(hw, off)	enetc_rd_reg((hw)->global + (off))
+#define enetc_global_wr(hw, off, val)	enetc_wr_reg((hw)->global + (off), val)
+/* BDR register accessors, see ENETC_BDR() */
+#define enetc_bdr_rd(hw, t, n, off) \
+				enetc_rd(hw, ENETC_BDR(t, n, off))
+#define enetc_bdr_wr(hw, t, n, off, val) \
+				enetc_wr(hw, ENETC_BDR(t, n, off), val)
+#define enetc_txbdr_rd(hw, n, off) enetc_bdr_rd(hw, TX, n, off)
+#define enetc_rxbdr_rd(hw, n, off) enetc_bdr_rd(hw, RX, n, off)
+#define enetc_txbdr_wr(hw, n, off, val) \
+				enetc_bdr_wr(hw, TX, n, off, val)
+#define enetc_rxbdr_wr(hw, n, off, val) \
+				enetc_bdr_wr(hw, RX, n, off, val)
+
+/* Buffer Descriptors (BD) */
+union enetc_tx_bd {
+	struct {
+		__le64 addr;
+		__le16 buf_len;
+		__le16 frm_len;
+		union {
+			struct {
+				__le16 l3_csoff;
+				u8 l4_csoff;
+				u8 flags;
+			}; /* default layout */
+			__le32 lstatus;
+		};
+	};
+	struct {
+		__le32 tstamp;
+		__le16 tpid;
+		__le16 vid;
+		u8 reserved[6];
+		u8 e_flags;
+		u8 flags;
+	} ext; /* Tx BD extension */
+	struct {
+		__le32 tstamp;
+		u8 reserved[10];
+		u8 status;
+		u8 flags;
+	} wb; /* writeback descriptor */
+};
+
+#define ENETC_TXBD_FLAGS_L4CS	BIT(0)
+#define ENETC_TXBD_FLAGS_W	BIT(2)
+#define ENETC_TXBD_FLAGS_CSUM	BIT(3)
+#define ENETC_TXBD_FLAGS_EX	BIT(6)
+#define ENETC_TXBD_FLAGS_F	BIT(7)
+
+static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd)
+{
+	memset(txbd, 0, sizeof(*txbd));
+}
+
+/* L3 csum flags */
+#define ENETC_TXBD_L3_IPCS	BIT(7)
+#define ENETC_TXBD_L3_IPV6	BIT(15)
+
+#define ENETC_TXBD_L3_START_MASK	GENMASK(6, 0)
+#define ENETC_TXBD_L3_SET_HSIZE(val)	((((val) >> 2) & 0x7f) << 8)
+
+/* Extension flags */
+#define ENETC_TXBD_E_FLAGS_VLAN_INS	BIT(0)
+#define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP	BIT(2)
+
+static inline __le16 enetc_txbd_l3_csoff(int start, int hdr_sz, u16 l3_flags)
+{
+	return cpu_to_le16(l3_flags | ENETC_TXBD_L3_SET_HSIZE(hdr_sz) |
+			   (start & ENETC_TXBD_L3_START_MASK));
+}
+
+/* L4 csum flags */
+#define ENETC_TXBD_L4_UDP	BIT(5)
+#define ENETC_TXBD_L4_TCP	BIT(6)
+
+union enetc_rx_bd {
+	struct {
+		__le64 addr;
+		u8 reserved[8];
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		u8 reserved1[16];
+#endif
+	} w;
+	struct {
+		__le16 inet_csum;
+		__le16 parse_summary;
+		__le32 rss_hash;
+		__le16 buf_len;
+		__le16 vlan_opt;
+		union {
+			struct {
+				__le16 flags;
+				__le16 error;
+			};
+			__le32 lstatus;
+		};
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		__le32 tstamp;
+		u8 reserved[12];
+#endif
+	} r;
+};
+
+#define ENETC_RXBD_LSTATUS_R	BIT(30)
+#define ENETC_RXBD_LSTATUS_F	BIT(31)
+#define ENETC_RXBD_ERR_MASK	0xff
+#define ENETC_RXBD_LSTATUS(flags)	((flags) << 16)
+#define ENETC_RXBD_FLAG_VLAN	BIT(9)
+#define ENETC_RXBD_FLAG_TSTMP	BIT(10)
+
+#define ENETC_MAC_ADDR_FILT_CNT	8 /* # of supported entries per port */
+#define EMETC_MAC_ADDR_FILT_RES	3 /* # of reserved entries at the beginning */
+#define ENETC_MAX_NUM_VFS	2
+
+struct enetc_cbd {
+	union {
+		struct {
+			__le32 addr[2];
+			__le32 opt[4];
+		};
+		__le32 data[6];
+	};
+	__le16 index;
+	__le16 length;
+	u8 cmd;
+	u8 cls;
+	u8 _res;
+	u8 status_flags;
+};
+
+#define ENETC_CBD_FLAGS_SF	BIT(7) /* short format */
+#define ENETC_CBD_STATUS_MASK	0xf
+
+struct enetc_cmd_rfse {
+	u8 smac_h[6];
+	u8 smac_m[6];
+	u8 dmac_h[6];
+	u8 dmac_m[6];
+	u32 sip_h[4];
+	u32 sip_m[4];
+	u32 dip_h[4];
+	u32 dip_m[4];
+	u16 ethtype_h;
+	u16 ethtype_m;
+	u16 ethtype4_h;
+	u16 ethtype4_m;
+	u16 sport_h;
+	u16 sport_m;
+	u16 dport_h;
+	u16 dport_m;
+	u16 vlan_h;
+	u16 vlan_m;
+	u8 proto_h;
+	u8 proto_m;
+	u16 flags;
+	u16 result;
+	u16 mode;
+};
+
+#define ENETC_RFSE_EN	BIT(15)
+#define ENETC_RFSE_MODE_BD	2
+
+static inline void enetc_get_primary_mac_addr(struct enetc_hw *hw, u8 *addr)
+{
+	*(u32 *)addr = __raw_readl(hw->reg + ENETC_SIPMAR0);
+	*(u16 *)(addr + 4) = __raw_readw(hw->reg + ENETC_SIPMAR1);
+}
+
+#define ENETC_SI_INT_IDX	0
+/* base index for Rx/Tx interrupts */
+#define ENETC_BDR_INT_BASE_IDX	1
+
+/* Messaging */
+
+/* Command completion status */
+enum enetc_msg_cmd_status {
+	ENETC_MSG_CMD_STATUS_OK,
+	ENETC_MSG_CMD_STATUS_FAIL
+};
+
+/* VSI-PSI command message types */
+enum enetc_msg_cmd_type {
+	ENETC_MSG_CMD_MNG_MAC = 1, /* manage MAC address */
+	ENETC_MSG_CMD_MNG_RX_MAC_FILTER,/* manage RX MAC table */
+	ENETC_MSG_CMD_MNG_RX_VLAN_FILTER /* manage RX VLAN table */
+};
+
+/* VSI-PSI command action types */
+enum enetc_msg_cmd_action_type {
+	ENETC_MSG_CMD_MNG_ADD = 1,
+	ENETC_MSG_CMD_MNG_REMOVE
+};
+
+/* PSI-VSI command header format */
+struct enetc_msg_cmd_header {
+	u16 type;	/* command class type */
+	u16 id;		/* denotes the specific required action */
+};
+
+/* Common H/W utility functions */
+
+static inline void enetc_enable_rxvlan(struct enetc_hw *hw, int si_idx,
+				       bool en)
+{
+	u32 val = enetc_rxbdr_rd(hw, si_idx, ENETC_RBMR);
+
+	val = (val & ~ENETC_RBMR_VTE) | (en ? ENETC_RBMR_VTE : 0);
+	enetc_rxbdr_wr(hw, si_idx, ENETC_RBMR, val);
+}
+
+static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx,
+				       bool en)
+{
+	u32 val = enetc_txbdr_rd(hw, si_idx, ENETC_TBMR);
+
+	val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0);
+	enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val);
+}
+
+static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx,
+				      int prio)
+{
+	u32 val = enetc_txbdr_rd(hw, bdr_idx, ENETC_TBMR);
+
+	val &= ~ENETC_TBMR_PRIO_MASK;
+	val |= ENETC_TBMR_SET_PRIO(prio);
+	enetc_txbdr_wr(hw, bdr_idx, ENETC_TBMR, val);
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
new file mode 100644
index 0000000..149883c
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c

@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include <linux/mdio.h>
+#include <linux/of_mdio.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+
+#include "enetc_mdio.h"
+
+#define	ENETC_MDIO_REG_OFFSET	0x1c00
+#define	ENETC_MDIO_CFG	0x0	/* MDIO configuration and status */
+#define	ENETC_MDIO_CTL	0x4	/* MDIO control */
+#define	ENETC_MDIO_DATA	0x8	/* MDIO data */
+#define	ENETC_MDIO_ADDR	0xc	/* MDIO address */
+
+#define enetc_mdio_rd(hw, off) \
+	enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET)
+#define enetc_mdio_wr(hw, off, val) \
+	enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
+#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(hw, off)
+
+#define ENETC_MDC_DIV		258
+
+#define MDIO_CFG_CLKDIV(x)	((((x) >> 1) & 0xff) << 8)
+#define MDIO_CFG_BSY		BIT(0)
+#define MDIO_CFG_RD_ER		BIT(1)
+#define MDIO_CFG_ENC45		BIT(6)
+ /* external MDIO only - driven on neg MDC edge */
+#define MDIO_CFG_NEG		BIT(23)
+
+#define MDIO_CTL_DEV_ADDR(x)	((x) & 0x1f)
+#define MDIO_CTL_PORT_ADDR(x)	(((x) & 0x1f) << 5)
+#define MDIO_CTL_READ		BIT(15)
+#define MDIO_DATA(x)		((x) & 0xffff)
+
+#define TIMEOUT	1000
+static int enetc_mdio_wait_complete(struct enetc_hw *hw)
+{
+	u32 val;
+
+	return readx_poll_timeout(enetc_mdio_rd_reg, MDIO_CFG, val,
+				  !(val & MDIO_CFG_BSY), 10, 10 * TIMEOUT);
+}
+
+int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
+{
+	struct enetc_mdio_priv *mdio_priv = bus->priv;
+	struct enetc_hw *hw = mdio_priv->hw;
+	u32 mdio_ctl, mdio_cfg;
+	u16 dev_addr;
+	int ret;
+
+	mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+	if (regnum & MII_ADDR_C45) {
+		dev_addr = (regnum >> 16) & 0x1f;
+		mdio_cfg |= MDIO_CFG_ENC45;
+	} else {
+		/* clause 22 (ie 1G) */
+		dev_addr = regnum & 0x1f;
+		mdio_cfg &= ~MDIO_CFG_ENC45;
+	}
+
+	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+
+	ret = enetc_mdio_wait_complete(hw);
+	if (ret)
+		return ret;
+
+	/* set port and dev addr */
+	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+
+	/* set the register address */
+	if (regnum & MII_ADDR_C45) {
+		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+
+		ret = enetc_mdio_wait_complete(hw);
+		if (ret)
+			return ret;
+	}
+
+	/* write the value */
+	enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value));
+
+	ret = enetc_mdio_wait_complete(hw);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+{
+	struct enetc_mdio_priv *mdio_priv = bus->priv;
+	struct enetc_hw *hw = mdio_priv->hw;
+	u32 mdio_ctl, mdio_cfg;
+	u16 dev_addr, value;
+	int ret;
+
+	mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+	if (regnum & MII_ADDR_C45) {
+		dev_addr = (regnum >> 16) & 0x1f;
+		mdio_cfg |= MDIO_CFG_ENC45;
+	} else {
+		dev_addr = regnum & 0x1f;
+		mdio_cfg &= ~MDIO_CFG_ENC45;
+	}
+
+	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+
+	ret = enetc_mdio_wait_complete(hw);
+	if (ret)
+		return ret;
+
+	/* set port and device addr */
+	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+
+	/* set the register address */
+	if (regnum & MII_ADDR_C45) {
+		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+
+		ret = enetc_mdio_wait_complete(hw);
+		if (ret)
+			return ret;
+	}
+
+	/* initiate the read */
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
+
+	ret = enetc_mdio_wait_complete(hw);
+	if (ret)
+		return ret;
+
+	/* return all Fs if nothing was there */
+	if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) {
+		dev_dbg(&bus->dev,
+			"Error while reading PHY%d reg at %d.%hhu\n",
+			phy_id, dev_addr, regnum);
+		return 0xffff;
+	}
+
+	value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff;
+
+	return value;
+}
+
+int enetc_mdio_probe(struct enetc_pf *pf)
+{
+	struct device *dev = &pf->si->pdev->dev;
+	struct enetc_mdio_priv *mdio_priv;
+	struct device_node *np;
+	struct mii_bus *bus;
+	int err;
+
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "Freescale ENETC MDIO Bus";
+	bus->read = enetc_mdio_read;
+	bus->write = enetc_mdio_write;
+	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = &pf->si->hw;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+	np = of_get_child_by_name(dev->of_node, "mdio");
+	if (!np) {
+		dev_err(dev, "MDIO node missing\n");
+		return -EINVAL;
+	}
+
+	err = of_mdiobus_register(bus, np);
+	if (err) {
+		of_node_put(np);
+		dev_err(dev, "cannot register MDIO bus\n");
+		return err;
+	}
+
+	of_node_put(np);
+	pf->mdio = bus;
+
+	return 0;
+}
+
+void enetc_mdio_remove(struct enetc_pf *pf)
+{
+	if (pf->mdio)
+		mdiobus_unregister(pf->mdio);
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
new file mode 100644
index 0000000..60c9a38
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h

@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+
+#include <linux/phy.h>
+#include "enetc_pf.h"
+
+struct enetc_mdio_priv {
+	struct enetc_hw *hw;
+};
+
+int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
+int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum);

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c
new file mode 100644
index 0000000..40d22eb
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c

@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include "enetc_pf.h"
+
+static void enetc_msg_disable_mr_int(struct enetc_hw *hw)
+{
+	u32 psiier = enetc_rd(hw, ENETC_PSIIER);
+	/* disable MR int source(s) */
+	enetc_wr(hw, ENETC_PSIIER, psiier & ~ENETC_PSIIER_MR_MASK);
+}
+
+static void enetc_msg_enable_mr_int(struct enetc_hw *hw)
+{
+	u32 psiier = enetc_rd(hw, ENETC_PSIIER);
+
+	enetc_wr(hw, ENETC_PSIIER, psiier | ENETC_PSIIER_MR_MASK);
+}
+
+static irqreturn_t enetc_msg_psi_msix(int irq, void *data)
+{
+	struct enetc_si *si = (struct enetc_si *)data;
+	struct enetc_pf *pf = enetc_si_priv(si);
+
+	enetc_msg_disable_mr_int(&si->hw);
+	schedule_work(&pf->msg_task);
+
+	return IRQ_HANDLED;
+}
+
+static void enetc_msg_task(struct work_struct *work)
+{
+	struct enetc_pf *pf = container_of(work, struct enetc_pf, msg_task);
+	struct enetc_hw *hw = &pf->si->hw;
+	unsigned long mr_mask;
+	int i;
+
+	for (;;) {
+		mr_mask = enetc_rd(hw, ENETC_PSIMSGRR) & ENETC_PSIMSGRR_MR_MASK;
+		if (!mr_mask) {
+			/* re-arm MR interrupts, w1c the IDR reg */
+			enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIIER_MR_MASK);
+			enetc_msg_enable_mr_int(hw);
+			return;
+		}
+
+		for (i = 0; i < pf->num_vfs; i++) {
+			u32 psimsgrr;
+			u16 msg_code;
+
+			if (!(ENETC_PSIMSGRR_MR(i) & mr_mask))
+				continue;
+
+			enetc_msg_handle_rxmsg(pf, i, &msg_code);
+
+			psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code);
+			psimsgrr |= ENETC_PSIMSGRR_MR(i); /* w1c */
+			enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr);
+		}
+	}
+}
+
+/* Init */
+static int enetc_msg_alloc_mbx(struct enetc_si *si, int idx)
+{
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct device *dev = &si->pdev->dev;
+	struct enetc_hw *hw = &si->hw;
+	struct enetc_msg_swbd *msg;
+	u32 val;
+
+	msg = &pf->rxmsg[idx];
+	/* allocate and set receive buffer */
+	msg->size = ENETC_DEFAULT_MSG_SIZE;
+
+	msg->vaddr = dma_alloc_coherent(dev, msg->size, &msg->dma,
+					GFP_KERNEL);
+	if (!msg->vaddr) {
+		dev_err(dev, "msg: fail to alloc dma buffer of size: %d\n",
+			msg->size);
+		return -ENOMEM;
+	}
+
+	/* set multiple of 32 bytes */
+	val = lower_32_bits(msg->dma);
+	enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), val);
+	val = upper_32_bits(msg->dma);
+	enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), val);
+
+	return 0;
+}
+
+static void enetc_msg_free_mbx(struct enetc_si *si, int idx)
+{
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct enetc_hw *hw = &si->hw;
+	struct enetc_msg_swbd *msg;
+
+	msg = &pf->rxmsg[idx];
+	dma_free_coherent(&si->pdev->dev, msg->size, msg->vaddr, msg->dma);
+	memset(msg, 0, sizeof(*msg));
+
+	enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0);
+	enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0);
+}
+
+int enetc_msg_psi_init(struct enetc_pf *pf)
+{
+	struct enetc_si *si = pf->si;
+	int vector, i, err;
+
+	/* register message passing interrupt handler */
+	snprintf(pf->msg_int_name, sizeof(pf->msg_int_name), "%s-vfmsg",
+		 si->ndev->name);
+	vector = pci_irq_vector(si->pdev, ENETC_SI_INT_IDX);
+	err = request_irq(vector, enetc_msg_psi_msix, 0, pf->msg_int_name, si);
+	if (err) {
+		dev_err(&si->pdev->dev,
+			"PSI messaging: request_irq() failed!\n");
+		return err;
+	}
+
+	/* set one IRQ entry for PSI message receive notification (SI int) */
+	enetc_wr(&si->hw, ENETC_SIMSIVR, ENETC_SI_INT_IDX);
+
+	/* initialize PSI mailbox */
+	INIT_WORK(&pf->msg_task, enetc_msg_task);
+
+	for (i = 0; i < pf->num_vfs; i++) {
+		err = enetc_msg_alloc_mbx(si, i);
+		if (err)
+			goto err_init_mbx;
+	}
+
+	/* enable MR interrupts */
+	enetc_msg_enable_mr_int(&si->hw);
+
+	return 0;
+
+err_init_mbx:
+	for (i--; i >= 0; i--)
+		enetc_msg_free_mbx(si, i);
+
+	free_irq(vector, si);
+
+	return err;
+}
+
+void enetc_msg_psi_free(struct enetc_pf *pf)
+{
+	struct enetc_si *si = pf->si;
+	int i;
+
+	cancel_work_sync(&pf->msg_task);
+
+	/* disable MR interrupts */
+	enetc_msg_disable_mr_int(&si->hw);
+
+	for (i = 0; i < pf->num_vfs; i++)
+		enetc_msg_free_mbx(si, i);
+
+	/* de-register message passing interrupt handler */
+	free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si);
+}

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
new file mode 100644
index 0000000..fbd41ce
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c

@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+#include <linux/of_mdio.h>
+#include "enetc_mdio.h"
+
+#define ENETC_MDIO_DEV_ID	0xee01
+#define ENETC_MDIO_DEV_NAME	"FSL PCIe IE Central MDIO"
+#define ENETC_MDIO_BUS_NAME	ENETC_MDIO_DEV_NAME " Bus"
+#define ENETC_MDIO_DRV_NAME	ENETC_MDIO_DEV_NAME " driver"
+
+static int enetc_pci_mdio_probe(struct pci_dev *pdev,
+				const struct pci_device_id *ent)
+{
+	struct enetc_mdio_priv *mdio_priv;
+	struct device *dev = &pdev->dev;
+	struct enetc_hw *hw;
+	struct mii_bus *bus;
+	int err;
+
+	hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
+	if (!hw)
+		return -ENOMEM;
+
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = ENETC_MDIO_BUS_NAME;
+	bus->read = enetc_mdio_read;
+	bus->write = enetc_mdio_write;
+	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = hw;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+	pcie_flr(pdev);
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(dev, "device enable failed\n");
+		return err;
+	}
+
+	err = pci_request_region(pdev, 0, KBUILD_MODNAME);
+	if (err) {
+		dev_err(dev, "pci_request_region failed\n");
+		goto err_pci_mem_reg;
+	}
+
+	hw->port = pci_iomap(pdev, 0, 0);
+	if (!hw->port) {
+		err = -ENXIO;
+		dev_err(dev, "iomap failed\n");
+		goto err_ioremap;
+	}
+
+	err = of_mdiobus_register(bus, dev->of_node);
+	if (err)
+		goto err_mdiobus_reg;
+
+	pci_set_drvdata(pdev, bus);
+
+	return 0;
+
+err_mdiobus_reg:
+	iounmap(mdio_priv->hw->port);
+err_ioremap:
+	pci_release_mem_regions(pdev);
+err_pci_mem_reg:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void enetc_pci_mdio_remove(struct pci_dev *pdev)
+{
+	struct mii_bus *bus = pci_get_drvdata(pdev);
+	struct enetc_mdio_priv *mdio_priv;
+
+	mdiobus_unregister(bus);
+	mdio_priv = bus->priv;
+	iounmap(mdio_priv->hw->port);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id enetc_pci_mdio_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_MDIO_DEV_ID) },
+	{ 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_pci_mdio_id_table);
+
+static struct pci_driver enetc_pci_mdio_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = enetc_pci_mdio_id_table,
+	.probe = enetc_pci_mdio_probe,
+	.remove = enetc_pci_mdio_remove,
+};
+module_pci_driver(enetc_pci_mdio_driver);
+
+MODULE_DESCRIPTION(ENETC_MDIO_DRV_NAME);
+MODULE_LICENSE("Dual BSD/GPL");

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
new file mode 100644
index 0000000..b73421c
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c

@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include <linux/module.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include "enetc_pf.h"
+
+#define ENETC_DRV_VER_MAJ 1
+#define ENETC_DRV_VER_MIN 0
+
+#define ENETC_DRV_VER_STR __stringify(ENETC_DRV_VER_MAJ) "." \
+			  __stringify(ENETC_DRV_VER_MIN)
+static const char enetc_drv_ver[] = ENETC_DRV_VER_STR;
+#define ENETC_DRV_NAME_STR "ENETC PF driver"
+static const char enetc_drv_name[] = ENETC_DRV_NAME_STR;
+
+static void enetc_pf_get_primary_mac_addr(struct enetc_hw *hw, int si, u8 *addr)
+{
+	u32 upper = __raw_readl(hw->port + ENETC_PSIPMAR0(si));
+	u16 lower = __raw_readw(hw->port + ENETC_PSIPMAR1(si));
+
+	*(u32 *)addr = upper;
+	*(u16 *)(addr + 4) = lower;
+}
+
+static void enetc_pf_set_primary_mac_addr(struct enetc_hw *hw, int si,
+					  const u8 *addr)
+{
+	u32 upper = *(const u32 *)addr;
+	u16 lower = *(const u16 *)(addr + 4);
+
+	__raw_writel(upper, hw->port + ENETC_PSIPMAR0(si));
+	__raw_writew(lower, hw->port + ENETC_PSIPMAR1(si));
+}
+
+static int enetc_pf_set_mac_addr(struct net_device *ndev, void *addr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct sockaddr *saddr = addr;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(ndev->dev_addr, saddr->sa_data, ndev->addr_len);
+	enetc_pf_set_primary_mac_addr(&priv->si->hw, 0, saddr->sa_data);
+
+	return 0;
+}
+
+static void enetc_set_vlan_promisc(struct enetc_hw *hw, char si_map)
+{
+	u32 val = enetc_port_rd(hw, ENETC_PSIPVMR);
+
+	val &= ~ENETC_PSIPVMR_SET_VP(ENETC_VLAN_PROMISC_MAP_ALL);
+	enetc_port_wr(hw, ENETC_PSIPVMR, ENETC_PSIPVMR_SET_VP(si_map) | val);
+}
+
+static bool enetc_si_vlan_promisc_is_on(struct enetc_pf *pf, int si_idx)
+{
+	return pf->vlan_promisc_simap & BIT(si_idx);
+}
+
+static bool enetc_vlan_filter_is_on(struct enetc_pf *pf)
+{
+	int i;
+
+	for_each_set_bit(i, pf->active_vlans, VLAN_N_VID)
+		return true;
+
+	return false;
+}
+
+static void enetc_enable_si_vlan_promisc(struct enetc_pf *pf, int si_idx)
+{
+	pf->vlan_promisc_simap |= BIT(si_idx);
+	enetc_set_vlan_promisc(&pf->si->hw, pf->vlan_promisc_simap);
+}
+
+static void enetc_disable_si_vlan_promisc(struct enetc_pf *pf, int si_idx)
+{
+	pf->vlan_promisc_simap &= ~BIT(si_idx);
+	enetc_set_vlan_promisc(&pf->si->hw, pf->vlan_promisc_simap);
+}
+
+static void enetc_set_isol_vlan(struct enetc_hw *hw, int si, u16 vlan, u8 qos)
+{
+	u32 val = 0;
+
+	if (vlan)
+		val = ENETC_PSIVLAN_EN | ENETC_PSIVLAN_SET_QOS(qos) | vlan;
+
+	enetc_port_wr(hw, ENETC_PSIVLANR(si), val);
+}
+
+static int enetc_mac_addr_hash_idx(const u8 *addr)
+{
+	u64 fold = __swab64(ether_addr_to_u64(addr)) >> 16;
+	u64 mask = 0;
+	int res = 0;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		mask |= BIT_ULL(i * 6);
+
+	for (i = 0; i < 6; i++)
+		res |= (hweight64(fold & (mask << i)) & 0x1) << i;
+
+	return res;
+}
+
+static void enetc_reset_mac_addr_filter(struct enetc_mac_filter *filter)
+{
+	filter->mac_addr_cnt = 0;
+
+	bitmap_zero(filter->mac_hash_table,
+		    ENETC_MADDR_HASH_TBL_SZ);
+}
+
+static void enetc_add_mac_addr_em_filter(struct enetc_mac_filter *filter,
+					 const unsigned char *addr)
+{
+	/* add exact match addr */
+	ether_addr_copy(filter->mac_addr, addr);
+	filter->mac_addr_cnt++;
+}
+
+static void enetc_add_mac_addr_ht_filter(struct enetc_mac_filter *filter,
+					 const unsigned char *addr)
+{
+	int idx = enetc_mac_addr_hash_idx(addr);
+
+	/* add hash table entry */
+	__set_bit(idx, filter->mac_hash_table);
+	filter->mac_addr_cnt++;
+}
+
+static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type)
+{
+	bool err = si->errata & ENETC_ERR_UCMCSWP;
+
+	if (type == UC) {
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), 0);
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), 0);
+	} else { /* MC */
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), 0);
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), 0);
+	}
+}
+
+static void enetc_set_mac_ht_flt(struct enetc_si *si, int si_idx, int type,
+				 u32 *hash)
+{
+	bool err = si->errata & ENETC_ERR_UCMCSWP;
+
+	if (type == UC) {
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), *hash);
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), *(hash + 1));
+	} else { /* MC */
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), *hash);
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), *(hash + 1));
+	}
+}
+
+static void enetc_sync_mac_filters(struct enetc_pf *pf)
+{
+	struct enetc_mac_filter *f = pf->mac_filter;
+	struct enetc_si *si = pf->si;
+	int i, pos;
+
+	pos = EMETC_MAC_ADDR_FILT_RES;
+
+	for (i = 0; i < MADDR_TYPE; i++, f++) {
+		bool em = (f->mac_addr_cnt == 1) && (i == UC);
+		bool clear = !f->mac_addr_cnt;
+
+		if (clear) {
+			if (i == UC)
+				enetc_clear_mac_flt_entry(si, pos);
+
+			enetc_clear_mac_ht_flt(si, 0, i);
+			continue;
+		}
+
+		/* exact match filter */
+		if (em) {
+			int err;
+
+			enetc_clear_mac_ht_flt(si, 0, UC);
+
+			err = enetc_set_mac_flt_entry(si, pos, f->mac_addr,
+						      BIT(0));
+			if (!err)
+				continue;
+
+			/* fallback to HT filtering */
+			dev_warn(&si->pdev->dev, "fallback to HT filt (%d)\n",
+				 err);
+		}
+
+		/* hash table filter, clear EM filter for UC entries */
+		if (i == UC)
+			enetc_clear_mac_flt_entry(si, pos);
+
+		enetc_set_mac_ht_flt(si, 0, i, (u32 *)f->mac_hash_table);
+	}
+}
+
+static void enetc_pf_set_rx_mode(struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	struct enetc_hw *hw = &priv->si->hw;
+	bool uprom = false, mprom = false;
+	struct enetc_mac_filter *filter;
+	struct netdev_hw_addr *ha;
+	u32 psipmr = 0;
+	bool em;
+
+	if (ndev->flags & IFF_PROMISC) {
+		/* enable promisc mode for SI0 (PF) */
+		psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0);
+		uprom = true;
+		mprom = true;
+		/* enable VLAN promisc mode for SI0 */
+		if (!enetc_si_vlan_promisc_is_on(pf, 0))
+			enetc_enable_si_vlan_promisc(pf, 0);
+
+	} else if (ndev->flags & IFF_ALLMULTI) {
+		/* enable multi cast promisc mode for SI0 (PF) */
+		psipmr = ENETC_PSIPMR_SET_MP(0);
+		mprom = true;
+	}
+
+	/* first 2 filter entries belong to PF */
+	if (!uprom) {
+		/* Update unicast filters */
+		filter = &pf->mac_filter[UC];
+		enetc_reset_mac_addr_filter(filter);
+
+		em = (netdev_uc_count(ndev) == 1);
+		netdev_for_each_uc_addr(ha, ndev) {
+			if (em) {
+				enetc_add_mac_addr_em_filter(filter, ha->addr);
+				break;
+			}
+
+			enetc_add_mac_addr_ht_filter(filter, ha->addr);
+		}
+	}
+
+	if (!mprom) {
+		/* Update multicast filters */
+		filter = &pf->mac_filter[MC];
+		enetc_reset_mac_addr_filter(filter);
+
+		netdev_for_each_mc_addr(ha, ndev) {
+			if (!is_multicast_ether_addr(ha->addr))
+				continue;
+
+			enetc_add_mac_addr_ht_filter(filter, ha->addr);
+		}
+	}
+
+	if (!uprom || !mprom)
+		/* update PF entries */
+		enetc_sync_mac_filters(pf);
+
+	psipmr |= enetc_port_rd(hw, ENETC_PSIPMR) &
+		  ~(ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0));
+	enetc_port_wr(hw, ENETC_PSIPMR, psipmr);
+}
+
+static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx,
+				     u32 *hash)
+{
+	enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), *hash);
+	enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), *(hash + 1));
+}
+
+static int enetc_vid_hash_idx(unsigned int vid)
+{
+	int res = 0;
+	int i;
+
+	for (i = 0; i < 6; i++)
+		res |= (hweight8(vid & (BIT(i) | BIT(i + 6))) & 0x1) << i;
+
+	return res;
+}
+
+static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash)
+{
+	int i;
+
+	if (rehash) {
+		bitmap_zero(pf->vlan_ht_filter, ENETC_VLAN_HT_SIZE);
+
+		for_each_set_bit(i, pf->active_vlans, VLAN_N_VID) {
+			int hidx = enetc_vid_hash_idx(i);
+
+			__set_bit(hidx, pf->vlan_ht_filter);
+		}
+	}
+
+	enetc_set_vlan_ht_filter(&pf->si->hw, 0, (u32 *)pf->vlan_ht_filter);
+}
+
+static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	int idx;
+
+	if (enetc_si_vlan_promisc_is_on(pf, 0))
+		enetc_disable_si_vlan_promisc(pf, 0);
+
+	__set_bit(vid, pf->active_vlans);
+
+	idx = enetc_vid_hash_idx(vid);
+	if (!__test_and_set_bit(idx, pf->vlan_ht_filter))
+		enetc_sync_vlan_ht_filter(pf, false);
+
+	return 0;
+}
+
+static int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+
+	__clear_bit(vid, pf->active_vlans);
+	enetc_sync_vlan_ht_filter(pf, true);
+
+	if (!enetc_vlan_filter_is_on(pf))
+		enetc_enable_si_vlan_promisc(pf, 0);
+
+	return 0;
+}
+
+static void enetc_set_loopback(struct net_device *ndev, bool en)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 reg;
+
+	reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
+	if (reg & ENETC_PMO_IFM_RG) {
+		/* RGMII mode */
+		reg = (reg & ~ENETC_PM0_IFM_RLP) |
+		      (en ? ENETC_PM0_IFM_RLP : 0);
+		enetc_port_wr(hw, ENETC_PM0_IF_MODE, reg);
+	} else {
+		/* assume SGMII mode */
+		reg = enetc_port_rd(hw, ENETC_PM0_CMD_CFG);
+		reg = (reg & ~ENETC_PM0_CMD_XGLP) |
+		      (en ? ENETC_PM0_CMD_XGLP : 0);
+		reg = (reg & ~ENETC_PM0_CMD_PHY_TX_EN) |
+		      (en ? ENETC_PM0_CMD_PHY_TX_EN : 0);
+		enetc_port_wr(hw, ENETC_PM0_CMD_CFG, reg);
+		enetc_port_wr(hw, ENETC_PM1_CMD_CFG, reg);
+	}
+}
+
+static int enetc_pf_set_vf_mac(struct net_device *ndev, int vf, u8 *mac)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	struct enetc_vf_state *vf_state;
+
+	if (vf >= pf->total_vfs)
+		return -EINVAL;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	vf_state = &pf->vf_state[vf];
+	vf_state->flags |= ENETC_VF_FLAG_PF_SET_MAC;
+	enetc_pf_set_primary_mac_addr(&priv->si->hw, vf + 1, mac);
+	return 0;
+}
+
+static int enetc_pf_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan,
+				u8 qos, __be16 proto)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+
+	if (priv->si->errata & ENETC_ERR_VLAN_ISOL)
+		return -EOPNOTSUPP;
+
+	if (vf >= pf->total_vfs)
+		return -EINVAL;
+
+	if (proto != htons(ETH_P_8021Q))
+		/* only C-tags supported for now */
+		return -EPROTONOSUPPORT;
+
+	enetc_set_isol_vlan(&priv->si->hw, vf + 1, vlan, qos);
+	return 0;
+}
+
+static int enetc_pf_set_vf_spoofchk(struct net_device *ndev, int vf, bool en)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	u32 cfgr;
+
+	if (vf >= pf->total_vfs)
+		return -EINVAL;
+
+	cfgr = enetc_port_rd(&priv->si->hw, ENETC_PSICFGR0(vf + 1));
+	cfgr = (cfgr & ~ENETC_PSICFGR0_ASE) | (en ? ENETC_PSICFGR0_ASE : 0);
+	enetc_port_wr(&priv->si->hw, ENETC_PSICFGR0(vf + 1), cfgr);
+
+	return 0;
+}
+
+static void enetc_port_setup_primary_mac_address(struct enetc_si *si)
+{
+	unsigned char mac_addr[MAX_ADDR_LEN];
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct enetc_hw *hw = &si->hw;
+	int i;
+
+	/* check MAC addresses for PF and all VFs, if any is 0 set it ro rand */
+	for (i = 0; i < pf->total_vfs + 1; i++) {
+		enetc_pf_get_primary_mac_addr(hw, i, mac_addr);
+		if (!is_zero_ether_addr(mac_addr))
+			continue;
+		eth_random_addr(mac_addr);
+		dev_info(&si->pdev->dev, "no MAC address specified for SI%d, using %pM\n",
+			 i, mac_addr);
+		enetc_pf_set_primary_mac_addr(hw, i, mac_addr);
+	}
+}
+
+static void enetc_port_assign_rfs_entries(struct enetc_si *si)
+{
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct enetc_hw *hw = &si->hw;
+	int num_entries, vf_entries, i;
+	u32 val;
+
+	/* split RFS entries between functions */
+	val = enetc_port_rd(hw, ENETC_PRFSCAPR);
+	num_entries = ENETC_PRFSCAPR_GET_NUM_RFS(val);
+	vf_entries = num_entries / (pf->total_vfs + 1);
+
+	for (i = 0; i < pf->total_vfs; i++)
+		enetc_port_wr(hw, ENETC_PSIRFSCFGR(i + 1), vf_entries);
+	enetc_port_wr(hw, ENETC_PSIRFSCFGR(0),
+		      num_entries - vf_entries * pf->total_vfs);
+
+	/* enable RFS on port */
+	enetc_port_wr(hw, ENETC_PRFSMR, ENETC_PRFSMR_RFSE);
+}
+
+static void enetc_port_si_configure(struct enetc_si *si)
+{
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct enetc_hw *hw = &si->hw;
+	int num_rings, i;
+	u32 val;
+
+	val = enetc_port_rd(hw, ENETC_PCAPR0);
+	num_rings = min(ENETC_PCAPR0_RXBDR(val), ENETC_PCAPR0_TXBDR(val));
+
+	val = ENETC_PSICFGR0_SET_TXBDR(ENETC_PF_NUM_RINGS);
+	val |= ENETC_PSICFGR0_SET_RXBDR(ENETC_PF_NUM_RINGS);
+
+	if (unlikely(num_rings < ENETC_PF_NUM_RINGS)) {
+		val = ENETC_PSICFGR0_SET_TXBDR(num_rings);
+		val |= ENETC_PSICFGR0_SET_RXBDR(num_rings);
+
+		dev_warn(&si->pdev->dev, "Found %d rings, expected %d!\n",
+			 num_rings, ENETC_PF_NUM_RINGS);
+
+		num_rings = 0;
+	}
+
+	/* Add default one-time settings for SI0 (PF) */
+	val |= ENETC_PSICFGR0_SIVC(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S);
+
+	enetc_port_wr(hw, ENETC_PSICFGR0(0), val);
+
+	if (num_rings)
+		num_rings -= ENETC_PF_NUM_RINGS;
+
+	/* Configure the SIs for each available VF */
+	val = ENETC_PSICFGR0_SIVC(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S);
+	val |= ENETC_PSICFGR0_VTE | ENETC_PSICFGR0_SIVIE;
+
+	if (num_rings) {
+		num_rings /= pf->total_vfs;
+		val |= ENETC_PSICFGR0_SET_TXBDR(num_rings);
+		val |= ENETC_PSICFGR0_SET_RXBDR(num_rings);
+	}
+
+	for (i = 0; i < pf->total_vfs; i++)
+		enetc_port_wr(hw, ENETC_PSICFGR0(i + 1), val);
+
+	/* Port level VLAN settings */
+	val = ENETC_PVCLCTR_OVTPIDL(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S);
+	enetc_port_wr(hw, ENETC_PVCLCTR, val);
+	/* use outer tag for VLAN filtering */
+	enetc_port_wr(hw, ENETC_PSIVLANFMR, ENETC_PSIVLANFMR_VS);
+}
+
+static void enetc_configure_port_mac(struct enetc_hw *hw)
+{
+	enetc_port_wr(hw, ENETC_PM0_MAXFRM,
+		      ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE));
+
+	enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE);
+	enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE);
+
+	enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
+		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC |
+		      ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+
+	enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
+		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC |
+		      ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+	/* set auto-speed for RGMII */
+	if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG)
+		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO);
+	if (enetc_global_rd(hw, ENETC_G_EPFBLPR(1)) == ENETC_G_EPFBLPR1_XGMII)
+		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII);
+}
+
+static void enetc_configure_port_pmac(struct enetc_hw *hw)
+{
+	u32 temp;
+
+	/* Set pMAC step lock */
+	temp = enetc_port_rd(hw, ENETC_PFPMR);
+	enetc_port_wr(hw, ENETC_PFPMR,
+		      temp | ENETC_PFPMR_PMACE | ENETC_PFPMR_MWLM);
+
+	temp = enetc_port_rd(hw, ENETC_MMCSR);
+	enetc_port_wr(hw, ENETC_MMCSR, temp | ENETC_MMCSR_ME);
+}
+
+static void enetc_configure_port(struct enetc_pf *pf)
+{
+	u8 hash_key[ENETC_RSSHASH_KEY_SIZE];
+	struct enetc_hw *hw = &pf->si->hw;
+
+	enetc_configure_port_pmac(hw);
+
+	enetc_configure_port_mac(hw);
+
+	enetc_port_si_configure(pf->si);
+
+	/* set up hash key */
+	get_random_bytes(hash_key, ENETC_RSSHASH_KEY_SIZE);
+	enetc_set_rss_key(hw, hash_key);
+
+	/* split up RFS entries */
+	enetc_port_assign_rfs_entries(pf->si);
+
+	/* fix-up primary MAC addresses, if not set already */
+	enetc_port_setup_primary_mac_address(pf->si);
+
+	/* enforce VLAN promisc mode for all SIs */
+	pf->vlan_promisc_simap = ENETC_VLAN_PROMISC_MAP_ALL;
+	enetc_set_vlan_promisc(hw, pf->vlan_promisc_simap);
+
+	enetc_port_wr(hw, ENETC_PSIPMR, 0);
+
+	/* enable port */
+	enetc_port_wr(hw, ENETC_PMR, ENETC_PMR_EN);
+}
+
+/* Messaging */
+static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf,
+						int vf_id)
+{
+	struct enetc_vf_state *vf_state = &pf->vf_state[vf_id];
+	struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id];
+	struct enetc_msg_cmd_set_primary_mac *cmd;
+	struct device *dev = &pf->si->pdev->dev;
+	u16 cmd_id;
+	char *addr;
+
+	cmd = (struct enetc_msg_cmd_set_primary_mac *)msg->vaddr;
+	cmd_id = cmd->header.id;
+	if (cmd_id != ENETC_MSG_CMD_MNG_ADD)
+		return ENETC_MSG_CMD_STATUS_FAIL;
+
+	addr = cmd->mac.sa_data;
+	if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC)
+		dev_warn(dev, "Attempt to override PF set mac addr for VF%d\n",
+			 vf_id);
+	else
+		enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr);
+
+	return ENETC_MSG_CMD_STATUS_OK;
+}
+
+void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status)
+{
+	struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id];
+	struct device *dev = &pf->si->pdev->dev;
+	struct enetc_msg_cmd_header *cmd_hdr;
+	u16 cmd_type;
+
+	*status = ENETC_MSG_CMD_STATUS_OK;
+	cmd_hdr = (struct enetc_msg_cmd_header *)msg->vaddr;
+	cmd_type = cmd_hdr->type;
+
+	switch (cmd_type) {
+	case ENETC_MSG_CMD_MNG_MAC:
+		*status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id);
+		break;
+	default:
+		dev_err(dev, "command not supported (cmd_type: 0x%x)\n",
+			cmd_type);
+	}
+}
+
+#ifdef CONFIG_PCI_IOV
+static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct enetc_si *si = pci_get_drvdata(pdev);
+	struct enetc_pf *pf = enetc_si_priv(si);
+	int err;
+
+	if (!num_vfs) {
+		enetc_msg_psi_free(pf);
+		kfree(pf->vf_state);
+		pf->num_vfs = 0;
+		pci_disable_sriov(pdev);
+	} else {
+		pf->num_vfs = num_vfs;
+
+		pf->vf_state = kcalloc(num_vfs, sizeof(struct enetc_vf_state),
+				       GFP_KERNEL);
+		if (!pf->vf_state) {
+			pf->num_vfs = 0;
+			return -ENOMEM;
+		}
+
+		err = enetc_msg_psi_init(pf);
+		if (err) {
+			dev_err(&pdev->dev, "enetc_msg_psi_init (%d)\n", err);
+			goto err_msg_psi;
+		}
+
+		err = pci_enable_sriov(pdev, num_vfs);
+		if (err) {
+			dev_err(&pdev->dev, "pci_enable_sriov err %d\n", err);
+			goto err_en_sriov;
+		}
+	}
+
+	return num_vfs;
+
+err_en_sriov:
+	enetc_msg_psi_free(pf);
+err_msg_psi:
+	kfree(pf->vf_state);
+	pf->num_vfs = 0;
+
+	return err;
+}
+#else
+#define enetc_sriov_configure(pdev, num_vfs)	(void)0
+#endif
+
+static int enetc_pf_set_features(struct net_device *ndev,
+				 netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		enetc_enable_rxvlan(&priv->si->hw, 0,
+				    !!(features & NETIF_F_HW_VLAN_CTAG_RX));
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_TX)
+		enetc_enable_txvlan(&priv->si->hw, 0,
+				    !!(features & NETIF_F_HW_VLAN_CTAG_TX));
+
+	if (changed & NETIF_F_LOOPBACK)
+		enetc_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK));
+
+	return enetc_set_features(ndev, features);
+}
+
+static const struct net_device_ops enetc_ndev_ops = {
+	.ndo_open		= enetc_open,
+	.ndo_stop		= enetc_close,
+	.ndo_start_xmit		= enetc_xmit,
+	.ndo_get_stats		= enetc_get_stats,
+	.ndo_set_mac_address	= enetc_pf_set_mac_addr,
+	.ndo_set_rx_mode	= enetc_pf_set_rx_mode,
+	.ndo_vlan_rx_add_vid	= enetc_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= enetc_vlan_rx_del_vid,
+	.ndo_set_vf_mac		= enetc_pf_set_vf_mac,
+	.ndo_set_vf_vlan	= enetc_pf_set_vf_vlan,
+	.ndo_set_vf_spoofchk	= enetc_pf_set_vf_spoofchk,
+	.ndo_set_features	= enetc_pf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
+};
+
+static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
+				  const struct net_device_ops *ndev_ops)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	SET_NETDEV_DEV(ndev, &si->pdev->dev);
+	priv->ndev = ndev;
+	priv->si = si;
+	priv->dev = &si->pdev->dev;
+	si->ndev = ndev;
+
+	priv->msg_enable = (NETIF_MSG_WOL << 1) - 1;
+	ndev->netdev_ops = ndev_ops;
+	enetc_set_ethtool_ops(ndev);
+	ndev->watchdog_timeo = 5 * HZ;
+	ndev->max_mtu = ENETC_MAX_MTU;
+
+	ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+			    NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_LOOPBACK;
+	ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG |
+			 NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+			 NETIF_F_HW_VLAN_CTAG_TX |
+			 NETIF_F_HW_VLAN_CTAG_RX |
+			 NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (si->num_rss)
+		ndev->hw_features |= NETIF_F_RXHASH;
+
+	if (si->errata & ENETC_ERR_TXCSUM) {
+		ndev->hw_features &= ~NETIF_F_HW_CSUM;
+		ndev->features &= ~NETIF_F_HW_CSUM;
+	}
+
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* pick up primary MAC address from SI */
+	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
+}
+
+static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
+{
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	struct device_node *np = priv->dev->of_node;
+	struct device_node *mdio_np;
+	int err;
+
+	if (!np) {
+		dev_err(priv->dev, "missing ENETC port node\n");
+		return -ENODEV;
+	}
+
+	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!priv->phy_node) {
+		if (!of_phy_is_fixed_link(np)) {
+			dev_err(priv->dev, "PHY not specified\n");
+			return -ENODEV;
+		}
+
+		err = of_phy_register_fixed_link(np);
+		if (err < 0) {
+			dev_err(priv->dev, "fixed link registration failed\n");
+			return err;
+		}
+
+		priv->phy_node = of_node_get(np);
+	}
+
+	mdio_np = of_get_child_by_name(np, "mdio");
+	if (mdio_np) {
+		of_node_put(mdio_np);
+		err = enetc_mdio_probe(pf);
+		if (err) {
+			of_node_put(priv->phy_node);
+			return err;
+		}
+	}
+
+	priv->if_mode = of_get_phy_mode(np);
+	if ((int)priv->if_mode < 0) {
+		dev_err(priv->dev, "missing phy type\n");
+		of_node_put(priv->phy_node);
+		if (of_phy_is_fixed_link(np))
+			of_phy_deregister_fixed_link(np);
+		else
+			enetc_mdio_remove(pf);
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void enetc_of_put_phy(struct enetc_ndev_priv *priv)
+{
+	struct device_node *np = priv->dev->of_node;
+
+	if (np && of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+	if (priv->phy_node)
+		of_node_put(priv->phy_node);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *ent)
+{
+	struct enetc_ndev_priv *priv;
+	struct net_device *ndev;
+	struct enetc_si *si;
+	struct enetc_pf *pf;
+	int err;
+
+	if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) {
+		dev_info(&pdev->dev, "device is disabled, skipping\n");
+		return -ENODEV;
+	}
+
+	err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
+	if (err) {
+		dev_err(&pdev->dev, "PCI probing failed\n");
+		return err;
+	}
+
+	si = pci_get_drvdata(pdev);
+	if (!si->hw.port || !si->hw.global) {
+		err = -ENODEV;
+		dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
+		goto err_map_pf_space;
+	}
+
+	pf = enetc_si_priv(si);
+	pf->si = si;
+	pf->total_vfs = pci_sriov_get_totalvfs(pdev);
+
+	enetc_configure_port(pf);
+
+	enetc_get_si_caps(si);
+
+	ndev = alloc_etherdev_mq(sizeof(*priv), ENETC_MAX_NUM_TXQS);
+	if (!ndev) {
+		err = -ENOMEM;
+		dev_err(&pdev->dev, "netdev creation failed\n");
+		goto err_alloc_netdev;
+	}
+
+	enetc_pf_netdev_setup(si, ndev, &enetc_ndev_ops);
+
+	priv = netdev_priv(ndev);
+
+	enetc_init_si_rings_params(priv);
+
+	err = enetc_alloc_si_resources(priv);
+	if (err) {
+		dev_err(&pdev->dev, "SI resource alloc failed\n");
+		goto err_alloc_si_res;
+	}
+
+	err = enetc_alloc_msix(priv);
+	if (err) {
+		dev_err(&pdev->dev, "MSIX alloc failed\n");
+		goto err_alloc_msix;
+	}
+
+	err = enetc_of_get_phy(priv);
+	if (err)
+		dev_warn(&pdev->dev, "Fallback to PHY-less operation\n");
+
+	err = register_netdev(ndev);
+	if (err)
+		goto err_reg_netdev;
+
+	netif_carrier_off(ndev);
+
+	netif_info(priv, probe, ndev, "%s v%s\n",
+		   enetc_drv_name, enetc_drv_ver);
+
+	return 0;
+
+err_reg_netdev:
+	enetc_of_put_phy(priv);
+	enetc_free_msix(priv);
+err_alloc_msix:
+	enetc_free_si_resources(priv);
+err_alloc_si_res:
+	si->ndev = NULL;
+	free_netdev(ndev);
+err_alloc_netdev:
+err_map_pf_space:
+	enetc_pci_remove(pdev);
+
+	return err;
+}
+
+static void enetc_pf_remove(struct pci_dev *pdev)
+{
+	struct enetc_si *si = pci_get_drvdata(pdev);
+	struct enetc_pf *pf = enetc_si_priv(si);
+	struct enetc_ndev_priv *priv;
+
+	if (pf->num_vfs)
+		enetc_sriov_configure(pdev, 0);
+
+	priv = netdev_priv(si->ndev);
+	netif_info(priv, drv, si->ndev, "%s v%s remove\n",
+		   enetc_drv_name, enetc_drv_ver);
+
+	unregister_netdev(si->ndev);
+
+	enetc_mdio_remove(pf);
+	enetc_of_put_phy(priv);
+
+	enetc_free_msix(priv);
+
+	enetc_free_si_resources(priv);
+
+	free_netdev(si->ndev);
+
+	enetc_pci_remove(pdev);
+}
+
+static const struct pci_device_id enetc_pf_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
+	{ 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_pf_id_table);
+
+static struct pci_driver enetc_pf_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = enetc_pf_id_table,
+	.probe = enetc_pf_probe,
+	.remove = enetc_pf_remove,
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure = enetc_sriov_configure,
+#endif
+};
+module_pci_driver(enetc_pf_driver);
+
+MODULE_DESCRIPTION(ENETC_DRV_NAME_STR);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ENETC_DRV_VER_STR);

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
new file mode 100644
index 0000000..10dd1b5
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h

@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2017-2019 NXP */
+
+#include "enetc.h"
+
+#define ENETC_PF_NUM_RINGS	8
+
+enum enetc_mac_addr_type {UC, MC, MADDR_TYPE};
+#define ENETC_MAX_NUM_MAC_FLT	((ENETC_MAX_NUM_VFS + 1) * MADDR_TYPE)
+
+#define ENETC_MADDR_HASH_TBL_SZ	64
+struct enetc_mac_filter {
+	union {
+		char mac_addr[ETH_ALEN];
+		DECLARE_BITMAP(mac_hash_table, ENETC_MADDR_HASH_TBL_SZ);
+	};
+	int mac_addr_cnt;
+};
+
+#define ENETC_VLAN_HT_SIZE	64
+
+enum enetc_vf_flags {
+	ENETC_VF_FLAG_PF_SET_MAC	= BIT(0),
+};
+
+struct enetc_vf_state {
+	enum enetc_vf_flags flags;
+};
+
+struct enetc_pf {
+	struct enetc_si *si;
+	int num_vfs; /* number of active VFs, after sriov_init */
+	int total_vfs; /* max number of VFs, set for PF at probe */
+	struct enetc_vf_state *vf_state;
+
+	struct enetc_mac_filter mac_filter[ENETC_MAX_NUM_MAC_FLT];
+
+	struct enetc_msg_swbd rxmsg[ENETC_MAX_NUM_VFS];
+	struct work_struct msg_task;
+	char msg_int_name[ENETC_INT_NAME_MAX];
+
+	char vlan_promisc_simap; /* bitmap of SIs in VLAN promisc mode */
+	DECLARE_BITMAP(vlan_ht_filter, ENETC_VLAN_HT_SIZE);
+	DECLARE_BITMAP(active_vlans, VLAN_N_VID);
+
+	struct mii_bus *mdio; /* saved for cleanup */
+};
+
+int enetc_msg_psi_init(struct enetc_pf *pf);
+void enetc_msg_psi_free(struct enetc_pf *pf);
+void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status);
+
+/* MDIO */
+int enetc_mdio_probe(struct enetc_pf *pf);
+void enetc_mdio_remove(struct enetc_pf *pf);

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
new file mode 100644
index 0000000..bc59489
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c

@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/fsl/ptp_qoriq.h>
+
+#include "enetc.h"
+
+int enetc_phc_index = -1;
+EXPORT_SYMBOL(enetc_phc_index);
+
+static struct ptp_clock_info enetc_ptp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "ENETC PTP clock",
+	.max_adj	= 512000,
+	.n_alarm	= 0,
+	.n_ext_ts	= 2,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 1,
+	.adjfine	= ptp_qoriq_adjfine,
+	.adjtime	= ptp_qoriq_adjtime,
+	.gettime64	= ptp_qoriq_gettime,
+	.settime64	= ptp_qoriq_settime,
+	.enable		= ptp_qoriq_enable,
+};
+
+static int enetc_ptp_probe(struct pci_dev *pdev,
+			   const struct pci_device_id *ent)
+{
+	struct ptp_qoriq *ptp_qoriq;
+	void __iomem *base;
+	int err, len, n;
+
+	if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) {
+		dev_info(&pdev->dev, "device is disabled, skipping\n");
+		return -ENODEV;
+	}
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "device enable failed\n");
+		return err;
+	}
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev,
+				"DMA configuration failed: 0x%x\n", err);
+			goto err_dma;
+		}
+	}
+
+	err = pci_request_mem_regions(pdev, KBUILD_MODNAME);
+	if (err) {
+		dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err);
+		goto err_pci_mem_reg;
+	}
+
+	pci_set_master(pdev);
+
+	ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
+	if (!ptp_qoriq) {
+		err = -ENOMEM;
+		goto err_alloc_ptp;
+	}
+
+	len = pci_resource_len(pdev, ENETC_BAR_REGS);
+
+	base = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len);
+	if (!base) {
+		err = -ENXIO;
+		dev_err(&pdev->dev, "ioremap() failed\n");
+		goto err_ioremap;
+	}
+
+	/* Allocate 1 interrupt */
+	n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+	if (n != 1) {
+		err = -EPERM;
+		goto err_irq_vectors;
+	}
+
+	ptp_qoriq->irq = pci_irq_vector(pdev, 0);
+
+	err = request_irq(ptp_qoriq->irq, ptp_qoriq_isr, 0, DRIVER, ptp_qoriq);
+	if (err) {
+		dev_err(&pdev->dev, "request_irq() failed!\n");
+		goto err_irq;
+	}
+
+	ptp_qoriq->dev = &pdev->dev;
+
+	err = ptp_qoriq_init(ptp_qoriq, base, &enetc_ptp_caps);
+	if (err)
+		goto err_no_clock;
+
+	enetc_phc_index = ptp_qoriq->phc_index;
+	pci_set_drvdata(pdev, ptp_qoriq);
+
+	return 0;
+
+err_no_clock:
+	free_irq(ptp_qoriq->irq, ptp_qoriq);
+err_irq:
+	pci_free_irq_vectors(pdev);
+err_irq_vectors:
+	iounmap(base);
+err_ioremap:
+	kfree(ptp_qoriq);
+err_alloc_ptp:
+	pci_release_mem_regions(pdev);
+err_pci_mem_reg:
+err_dma:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void enetc_ptp_remove(struct pci_dev *pdev)
+{
+	struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev);
+
+	enetc_phc_index = -1;
+	ptp_qoriq_free(ptp_qoriq);
+	pci_free_irq_vectors(pdev);
+	kfree(ptp_qoriq);
+
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id enetc_ptp_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PTP) },
+	{ 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_ptp_id_table);
+
+static struct pci_driver enetc_ptp_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = enetc_ptp_id_table,
+	.probe = enetc_ptp_probe,
+	.remove = enetc_ptp_remove,
+};
+module_pci_driver(enetc_ptp_driver);
+
+MODULE_DESCRIPTION("ENETC PTP clock driver");
+MODULE_LICENSE("Dual BSD/GPL");

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
new file mode 100644
index 0000000..ebd21bf
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c

@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2017-2019 NXP */
+
+#include <linux/module.h>
+#include "enetc.h"
+
+#define ENETC_DRV_VER_MAJ 1
+#define ENETC_DRV_VER_MIN 0
+
+#define ENETC_DRV_VER_STR __stringify(ENETC_DRV_VER_MAJ) "." \
+			  __stringify(ENETC_DRV_VER_MIN)
+static const char enetc_drv_ver[] = ENETC_DRV_VER_STR;
+#define ENETC_DRV_NAME_STR "ENETC VF driver"
+static const char enetc_drv_name[] = ENETC_DRV_NAME_STR;
+
+/* Messaging */
+static void enetc_msg_vsi_write_msg(struct enetc_hw *hw,
+				    struct enetc_msg_swbd *msg)
+{
+	u32 val;
+
+	val = enetc_vsi_set_msize(msg->size) | lower_32_bits(msg->dma);
+	enetc_wr(hw, ENETC_VSIMSGSNDAR1, upper_32_bits(msg->dma));
+	enetc_wr(hw, ENETC_VSIMSGSNDAR0, val);
+}
+
+static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg)
+{
+	int timeout = 100;
+	u32 vsimsgsr;
+
+	enetc_msg_vsi_write_msg(&si->hw, msg);
+
+	do {
+		vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR);
+		if (!(vsimsgsr & ENETC_VSIMSGSR_MB))
+			break;
+
+		usleep_range(1000, 2000);
+	} while (--timeout);
+
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	/* check for message delivery error */
+	if (vsimsgsr & ENETC_VSIMSGSR_MS) {
+		dev_err(&si->pdev->dev, "VSI command execute error: %d\n",
+			ENETC_SIMSGSR_GET_MC(vsimsgsr));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv,
+					      struct sockaddr *saddr)
+{
+	struct enetc_msg_cmd_set_primary_mac *cmd;
+	struct enetc_msg_swbd msg;
+	int err;
+
+	msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64);
+	msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma,
+				       GFP_KERNEL);
+	if (!msg.vaddr) {
+		dev_err(priv->dev, "Failed to alloc Tx msg (size: %d)\n",
+			msg.size);
+		return -ENOMEM;
+	}
+
+	cmd = (struct enetc_msg_cmd_set_primary_mac *)msg.vaddr;
+	cmd->header.type = ENETC_MSG_CMD_MNG_MAC;
+	cmd->header.id = ENETC_MSG_CMD_MNG_ADD;
+	memcpy(&cmd->mac, saddr, sizeof(struct sockaddr));
+
+	/* send the command and wait */
+	err = enetc_msg_vsi_send(priv->si, &msg);
+
+	dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma);
+
+	return err;
+}
+
+static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct sockaddr *saddr = addr;
+	int err;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	err = enetc_msg_vsi_set_primary_mac_addr(priv, saddr);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int enetc_vf_set_features(struct net_device *ndev,
+				 netdev_features_t features)
+{
+	return enetc_set_features(ndev, features);
+}
+
+/* Probing/ Init */
+static const struct net_device_ops enetc_ndev_ops = {
+	.ndo_open		= enetc_open,
+	.ndo_stop		= enetc_close,
+	.ndo_start_xmit		= enetc_xmit,
+	.ndo_get_stats		= enetc_get_stats,
+	.ndo_set_mac_address	= enetc_vf_set_mac_addr,
+	.ndo_set_features	= enetc_vf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
+};
+
+static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
+				  const struct net_device_ops *ndev_ops)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	SET_NETDEV_DEV(ndev, &si->pdev->dev);
+	priv->ndev = ndev;
+	priv->si = si;
+	priv->dev = &si->pdev->dev;
+	si->ndev = ndev;
+
+	priv->msg_enable = (NETIF_MSG_IFUP << 1) - 1;
+	ndev->netdev_ops = ndev_ops;
+	enetc_set_ethtool_ops(ndev);
+	ndev->watchdog_timeo = 5 * HZ;
+	ndev->max_mtu = ENETC_MAX_MTU;
+
+	ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+			    NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_CTAG_RX;
+	ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG |
+			 NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+			 NETIF_F_HW_VLAN_CTAG_TX |
+			 NETIF_F_HW_VLAN_CTAG_RX;
+
+	if (si->num_rss)
+		ndev->hw_features |= NETIF_F_RXHASH;
+
+	if (si->errata & ENETC_ERR_TXCSUM) {
+		ndev->hw_features &= ~NETIF_F_HW_CSUM;
+		ndev->features &= ~NETIF_F_HW_CSUM;
+	}
+
+	/* pick up primary MAC address from SI */
+	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
+}
+
+static int enetc_vf_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *ent)
+{
+	struct enetc_ndev_priv *priv;
+	struct net_device *ndev;
+	struct enetc_si *si;
+	int err;
+
+	err = enetc_pci_probe(pdev, KBUILD_MODNAME, 0);
+	if (err) {
+		dev_err(&pdev->dev, "PCI probing failed\n");
+		return err;
+	}
+
+	si = pci_get_drvdata(pdev);
+
+	enetc_get_si_caps(si);
+
+	ndev = alloc_etherdev_mq(sizeof(*priv), ENETC_MAX_NUM_TXQS);
+	if (!ndev) {
+		err = -ENOMEM;
+		dev_err(&pdev->dev, "netdev creation failed\n");
+		goto err_alloc_netdev;
+	}
+
+	enetc_vf_netdev_setup(si, ndev, &enetc_ndev_ops);
+
+	priv = netdev_priv(ndev);
+
+	enetc_init_si_rings_params(priv);
+
+	err = enetc_alloc_si_resources(priv);
+	if (err) {
+		dev_err(&pdev->dev, "SI resource alloc failed\n");
+		goto err_alloc_si_res;
+	}
+
+	err = enetc_alloc_msix(priv);
+	if (err) {
+		dev_err(&pdev->dev, "MSIX alloc failed\n");
+		goto err_alloc_msix;
+	}
+
+	err = register_netdev(ndev);
+	if (err)
+		goto err_reg_netdev;
+
+	netif_carrier_off(ndev);
+
+	netif_info(priv, probe, ndev, "%s v%s\n",
+		   enetc_drv_name, enetc_drv_ver);
+
+	return 0;
+
+err_reg_netdev:
+	enetc_free_msix(priv);
+err_alloc_msix:
+	enetc_free_si_resources(priv);
+err_alloc_si_res:
+	si->ndev = NULL;
+	free_netdev(ndev);
+err_alloc_netdev:
+	enetc_pci_remove(pdev);
+
+	return err;
+}
+
+static void enetc_vf_remove(struct pci_dev *pdev)
+{
+	struct enetc_si *si = pci_get_drvdata(pdev);
+	struct enetc_ndev_priv *priv;
+
+	priv = netdev_priv(si->ndev);
+	netif_info(priv, drv, si->ndev, "%s v%s remove\n",
+		   enetc_drv_name, enetc_drv_ver);
+	unregister_netdev(si->ndev);
+
+	enetc_free_msix(priv);
+
+	enetc_free_si_resources(priv);
+
+	free_netdev(si->ndev);
+
+	enetc_pci_remove(pdev);
+}
+
+static const struct pci_device_id enetc_vf_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_VF) },
+	{ 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_vf_id_table);
+
+static struct pci_driver enetc_vf_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = enetc_vf_id_table,
+	.probe = enetc_vf_probe,
+	.remove = enetc_vf_remove,
+};
+module_pci_driver(enetc_vf_driver);
+
+MODULE_DESCRIPTION(ENETC_DRV_NAME_STR);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ENETC_DRV_VER_STR);

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index bf80855..f79e57f 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h

@@ -531,7 +531,6 @@
 
 	/* Phylib and MDIO interface */
 	struct	mii_bus *mii_bus;
-	int	mii_timeout;
 	uint	phy_speed;
 	phy_interface_t	phy_interface;
 	struct device_node *phy_node;

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 7b98bb7..4bb3076 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -208,8 +208,11 @@
 
 /* FEC MII MMFR bits definition */
 #define FEC_MMFR_ST		(1 << 30)
+#define FEC_MMFR_ST_C45		(0)
 #define FEC_MMFR_OP_READ	(2 << 28)
+#define FEC_MMFR_OP_READ_C45	(3 << 28)
 #define FEC_MMFR_OP_WRITE	(1 << 28)
+#define FEC_MMFR_OP_ADDR_WRITE	(0)
 #define FEC_MMFR_PA(v)		((v & 0x1f) << 23)
 #define FEC_MMFR_RA(v)		((v & 0x1f) << 18)
 #define FEC_MMFR_TA		(2 << 16)
@@ -365,7 +368,7 @@
 		status = fec16_to_cpu(bdp->cbd_sc);
 		status &= ~BD_ENET_TX_STATS;
 		status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
-		frag_len = skb_shinfo(skb)->frags[frag].size;
+		frag_len = skb_frag_size(&skb_shinfo(skb)->frags[frag]);
 
 		/* Handle the last BD specially */
 		if (frag == nr_frags - 1) {
@@ -387,7 +390,7 @@
 			ebdp->cbd_esc = cpu_to_fec32(estatus);
 		}
 
-		bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
+		bufaddr = skb_frag_address(this_frag);
 
 		index = fec_enet_get_bd_index(bdp, &txq->bd);
 		if (((unsigned long) bufaddr) & fep->tx_align ||
@@ -1655,7 +1658,7 @@
 		struct device_node *np = fep->pdev->dev.of_node;
 		if (np) {
 			const char *mac = of_get_mac_address(np);
-			if (mac)
+			if (!IS_ERR(mac))
 				iap = (unsigned char *) mac;
 		}
 	}
@@ -1689,10 +1692,10 @@
 	 */
 	if (!is_valid_ether_addr(iap)) {
 		/* Report it and use a random ethernet address instead */
-		netdev_err(ndev, "Invalid MAC address: %pM\n", iap);
+		dev_err(&fep->pdev->dev, "Invalid MAC address: %pM\n", iap);
 		eth_hw_addr_random(ndev);
-		netdev_info(ndev, "Using random MAC address: %pM\n",
-			    ndev->dev_addr);
+		dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n",
+			 ndev->dev_addr);
 		return;
 	}
 
@@ -1714,12 +1717,6 @@
 	struct phy_device *phy_dev = ndev->phydev;
 	int status_change = 0;
 
-	/* Prevent a state halted on mii error */
-	if (fep->mii_timeout && phy_dev->state == PHY_HALTED) {
-		phy_dev->state = PHY_RESUMING;
-		return;
-	}
-
 	/*
 	 * If the netdev is down, or is going down, we're not interested
 	 * in link state events, so just mark our idea of the link as down
@@ -1773,25 +1770,52 @@
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
 	unsigned long time_left;
-	int ret = 0;
+	int ret = 0, frame_start, frame_addr, frame_op;
+	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
 		return ret;
 
-	fep->mii_timeout = 0;
 	reinit_completion(&fep->mdio_done);
 
+	if (is_c45) {
+		frame_start = FEC_MMFR_ST_C45;
+
+		/* write address */
+		frame_addr = (regnum >> 16);
+		writel(frame_start | FEC_MMFR_OP_ADDR_WRITE |
+		       FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
+		       FEC_MMFR_TA | (regnum & 0xFFFF),
+		       fep->hwp + FEC_MII_DATA);
+
+		/* wait for end of transfer */
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+				usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
+			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
+			goto out;
+		}
+
+		frame_op = FEC_MMFR_OP_READ_C45;
+
+	} else {
+		/* C22 read */
+		frame_op = FEC_MMFR_OP_READ;
+		frame_start = FEC_MMFR_ST;
+		frame_addr = regnum;
+	}
+
 	/* start a read op */
-	writel(FEC_MMFR_ST | FEC_MMFR_OP_READ |
-		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
+	writel(frame_start | frame_op |
+		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
 	time_left = wait_for_completion_timeout(&fep->mdio_done,
 			usecs_to_jiffies(FEC_MII_TIMEOUT));
 	if (time_left == 0) {
-		fep->mii_timeout = 1;
 		netdev_err(fep->netdev, "MDIO read timeout\n");
 		ret = -ETIMEDOUT;
 		goto out;
@@ -1812,7 +1836,8 @@
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
 	unsigned long time_left;
-	int ret;
+	int ret, frame_start, frame_addr;
+	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
@@ -1820,12 +1845,35 @@
 	else
 		ret = 0;
 
-	fep->mii_timeout = 0;
 	reinit_completion(&fep->mdio_done);
 
+	if (is_c45) {
+		frame_start = FEC_MMFR_ST_C45;
+
+		/* write address */
+		frame_addr = (regnum >> 16);
+		writel(frame_start | FEC_MMFR_OP_ADDR_WRITE |
+		       FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
+		       FEC_MMFR_TA | (regnum & 0xFFFF),
+		       fep->hwp + FEC_MII_DATA);
+
+		/* wait for end of transfer */
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
+			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
+			goto out;
+		}
+	} else {
+		/* C22 write */
+		frame_start = FEC_MMFR_ST;
+		frame_addr = regnum;
+	}
+
 	/* start a write op */
-	writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE |
-		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
+	writel(frame_start | FEC_MMFR_OP_WRITE |
+		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
 		FEC_MMFR_TA | FEC_MMFR_DATA(value),
 		fep->hwp + FEC_MII_DATA);
 
@@ -1833,11 +1881,11 @@
 	time_left = wait_for_completion_timeout(&fep->mdio_done,
 			usecs_to_jiffies(FEC_MII_TIMEOUT));
 	if (time_left == 0) {
-		fep->mii_timeout = 1;
 		netdev_err(fep->netdev, "MDIO write timeout\n");
 		ret  = -ETIMEDOUT;
 	}
 
+out:
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 
@@ -1850,13 +1898,9 @@
 	int ret;
 
 	if (enable) {
-		ret = clk_prepare_enable(fep->clk_ahb);
-		if (ret)
-			return ret;
-
 		ret = clk_prepare_enable(fep->clk_enet_out);
 		if (ret)
-			goto failed_clk_enet_out;
+			return ret;
 
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
@@ -1876,7 +1920,6 @@
 
 		phy_reset_after_clk_enable(ndev->phydev);
 	} else {
-		clk_disable_unprepare(fep->clk_ahb);
 		clk_disable_unprepare(fep->clk_enet_out);
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
@@ -1895,8 +1938,6 @@
 failed_clk_ptp:
 	if (fep->clk_enet_out)
 		clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-		clk_disable_unprepare(fep->clk_ahb);
 
 	return ret;
 }
@@ -1948,16 +1989,15 @@
 
 	/* mask with MAC supported features */
 	if (fep->quirks & FEC_QUIRK_HAS_GBIT) {
-		phy_dev->supported &= PHY_GBIT_FEATURES;
-		phy_dev->supported &= ~SUPPORTED_1000baseT_Half;
+		phy_set_max_speed(phy_dev, 1000);
+		phy_remove_link_mode(phy_dev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 #if !defined(CONFIG_M5272)
-		phy_dev->supported |= SUPPORTED_Pause;
+		phy_support_sym_pause(phy_dev);
 #endif
 	}
 	else
-		phy_dev->supported &= PHY_BASIC_FEATURES;
-
-	phy_dev->advertising = phy_dev->supported;
+		phy_set_max_speed(phy_dev, 100);
 
 	fep->link = 0;
 	fep->full_duplex = 0;
@@ -2002,8 +2042,6 @@
 		return -ENOENT;
 	}
 
-	fep->mii_timeout = 0;
-
 	/*
 	 * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed)
 	 *
@@ -2057,8 +2095,7 @@
 
 	node = of_get_child_by_name(pdev->dev.of_node, "mdio");
 	err = of_mdiobus_register(fep->mii_bus, node);
-	if (node)
-		of_node_put(node);
+	of_node_put(node);
 	if (err)
 		goto err_out_free_mdiobus;
 
@@ -2112,6 +2149,7 @@
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
 	defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
 	defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST)
+static __u32 fec_enet_register_version = 2;
 static u32 fec_enet_register_offset[] = {
 	FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
 	FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2142,6 +2180,7 @@
 	IEEE_R_FDXFC, IEEE_R_OCTETS_OK
 };
 #else
+static __u32 fec_enet_register_version = 1;
 static u32 fec_enet_register_offset[] = {
 	FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0,
 	FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0,
@@ -2163,6 +2202,8 @@
 	u32 *buf = (u32 *)regbuf;
 	u32 i, off;
 
+	regs->version = fec_enet_register_version;
+
 	memset(buf, 0, regs->len);
 
 	for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
@@ -2238,13 +2279,8 @@
 	fep->pause_flag |= pause->rx_pause ? FEC_PAUSE_FLAG_ENABLE : 0;
 	fep->pause_flag |= pause->autoneg ? FEC_PAUSE_FLAG_AUTONEG : 0;
 
-	if (pause->rx_pause || pause->autoneg) {
-		ndev->phydev->supported |= ADVERTISED_Pause;
-		ndev->phydev->advertising |= ADVERTISED_Pause;
-	} else {
-		ndev->phydev->supported &= ~ADVERTISED_Pause;
-		ndev->phydev->advertising &= ~ADVERTISED_Pause;
-	}
+	phy_set_sym_pause(ndev->phydev, pause->rx_pause, pause->tx_pause,
+			  pause->autoneg);
 
 	if (pause->autoneg) {
 		if (netif_running(ndev))
@@ -2468,30 +2504,31 @@
 fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct device *dev = &fep->pdev->dev;
 	unsigned int cycle;
 
 	if (!(fep->quirks & FEC_QUIRK_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	if (ec->rx_max_coalesced_frames > 255) {
-		pr_err("Rx coalesced frames exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced frames exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	if (ec->tx_max_coalesced_frames > 255) {
-		pr_err("Tx coalesced frame exceed hardware limitation\n");
+		dev_err(dev, "Tx coalesced frame exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesced usec exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesced usec exceed hardware limitation\n");
+		dev_err(dev, "Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
@@ -3165,8 +3202,6 @@
 		return -ENOMEM;
 	}
 
-	memset(cbd_base, 0, bd_size);
-
 	/* Get the Ethernet address */
 	fec_get_mac(ndev);
 	/* make sure MAC we just acquired is programmed into the hw */
@@ -3361,7 +3396,6 @@
 	struct fec_platform_data *pdata;
 	struct net_device *ndev;
 	int i, irq, ret = 0;
-	struct resource *r;
 	const struct of_device_id *of_id;
 	static int dev_id;
 	struct device_node *np = pdev->dev.of_node, *phy_node;
@@ -3401,8 +3435,7 @@
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	fep->hwp = devm_ioremap_resource(&pdev->dev, r);
+	fep->hwp = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(fep->hwp)) {
 		ret = PTR_ERR(fep->hwp);
 		goto failed_ioremap;
@@ -3485,14 +3518,16 @@
 	ret = clk_prepare_enable(fep->clk_ipg);
 	if (ret)
 		goto failed_clk_ipg;
+	ret = clk_prepare_enable(fep->clk_ahb);
+	if (ret)
+		goto failed_clk_ahb;
 
-	fep->reg_phy = devm_regulator_get(&pdev->dev, "phy");
+	fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy");
 	if (!IS_ERR(fep->reg_phy)) {
 		ret = regulator_enable(fep->reg_phy);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"Failed to enable phy regulator: %d\n", ret);
-			clk_disable_unprepare(fep->clk_ipg);
 			goto failed_regulator;
 		}
 	} else {
@@ -3523,7 +3558,7 @@
 
 	for (i = 0; i < irq_cnt; i++) {
 		snprintf(irq_name, sizeof(irq_name), "int%d", i);
-		irq = platform_get_irq_byname(pdev, irq_name);
+		irq = platform_get_irq_byname_optional(pdev, irq_name);
 		if (irq < 0)
 			irq = platform_get_irq(pdev, i);
 		if (irq < 0) {
@@ -3575,9 +3610,12 @@
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
 failed_reset:
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 failed_regulator:
+	clk_disable_unprepare(fep->clk_ahb);
+failed_clk_ahb:
+	clk_disable_unprepare(fep->clk_ipg);
 failed_clk_ipg:
 	fec_enet_clk_enable(ndev, false);
 failed_clk:
@@ -3598,6 +3636,11 @@
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	struct device_node *np = pdev->dev.of_node;
+	int ret;
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0)
+		return ret;
 
 	cancel_work_sync(&fep->tx_timeout_work);
 	fec_ptp_stop(pdev);
@@ -3605,13 +3648,17 @@
 	fec_enet_mii_remove(fep);
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
-	pm_runtime_put(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
+
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	of_node_put(fep->phy_node);
 	free_netdev(ndev);
 
+	clk_disable_unprepare(fep->clk_ahb);
+	clk_disable_unprepare(fep->clk_ipg);
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
 	return 0;
 }
 
@@ -3701,6 +3748,7 @@
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
+	clk_disable_unprepare(fep->clk_ahb);
 	clk_disable_unprepare(fep->clk_ipg);
 
 	return 0;
@@ -3710,8 +3758,20 @@
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	int ret;
 
-	return clk_prepare_enable(fep->clk_ipg);
+	ret = clk_prepare_enable(fep->clk_ahb);
+	if (ret)
+		return ret;
+	ret = clk_prepare_enable(fep->clk_ipg);
+	if (ret)
+		goto failed_clk_ipg;
+
+	return 0;
+
+failed_clk_ipg:
+	clk_disable_unprepare(fep->clk_ahb);
+	return ret;
 }
 
 static const struct dev_pm_ops fec_pm_ops = {

diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 6d7269d..30cdb24 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c

@@ -305,7 +305,8 @@
  * invariant will hold if you make sure that the netif_*_queue()
  * calls are done at the proper times.
  */
-static int mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 	struct bcom_fec_bd *bd;
@@ -368,7 +369,7 @@
 		dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len,
 				 DMA_TO_DEVICE);
 
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 	spin_unlock(&priv->lock);
 
@@ -901,8 +902,8 @@
 	 * First try to read MAC address from DT
 	 */
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr) {
-		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr)) {
+		ether_addr_copy(ndev->dev_addr, mac_addr);
 	} else {
 		struct mpc52xx_fec __iomem *fec = priv->fec;
 

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 7e892b1..945643c 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c

@@ -600,9 +600,9 @@
 
 	INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep);
 
-	irq = platform_get_irq_byname(pdev, "pps");
+	irq = platform_get_irq_byname_optional(pdev, "pps");
 	if (irq < 0)
-		irq = platform_get_irq(pdev, irq_idx);
+		irq = platform_get_irq_optional(pdev, irq_idx);
 	/* Failure to get an irq is not fatal,
 	 * only the PTP_CLOCK_PPS clock events should stop
 	 */
@@ -617,7 +617,7 @@
 	fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev);
 	if (IS_ERR(fep->ptp_clock)) {
 		fep->ptp_clock = NULL;
-		pr_err("ptp_clock_register failed\n");
+		dev_err(&pdev->dev, "ptp_clock_register failed\n");
 	}
 
 	schedule_delayed_work(&fep->time_keep, HZ);

diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index dc0850b..0139cb9 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config FSL_FMAN
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST

diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index e80fedb..210749b 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c

@@ -2439,9 +2439,6 @@
  * buffers when not using jumbo frames.
  * Must be large enough to accommodate the network MTU, but small enough
  * to avoid wasting skb memory.
- *
- * Could be overridden once, at boot-time, via the
- * fm_set_max_frm() callback.
  */
 static int fsl_fm_max_frm = FSL_FM_MAX_FRAME_SIZE;
 module_param(fsl_fm_max_frm, int, 0);

diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
index f54da3c..e1bdfed 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.c
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c

@@ -144,7 +144,8 @@
 /* Hash Key extraction fields: */
 #define DEFAULT_HASH_KEY_EXTRACT_FIELDS		\
 	(KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \
-	    KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST)
+	 KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST | \
+	 KG_SCH_KN_IPSEC_SPI)
 
 /* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP)
  * don't exist in the frame

diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index bc6eb30..41c6fa2 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c

@@ -928,7 +928,7 @@
 	hash = get_mac_addr_hash_code(addr) & HASH_CTRL_ADDR_MASK;
 
 	/* Create element to be added to the driver hash table */
-	hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+	hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
 	if (!hash_entry)
 		return -ENOMEM;
 	hash_entry->addr = addr;

diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4070593..f75b9c1 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c

@@ -553,7 +553,7 @@
 	hash = (crc >> TGEC_HASH_MCAST_SHIFT) & TGEC_HASH_ADR_MSK;
 
 	/* Create element to be added to the driver hash table */
-	hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+	hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
 	if (!hash_entry)
 		return -ENOMEM;
 	hash_entry->addr = addr;

diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index a847b9c..7ab8095 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c

@@ -393,11 +393,7 @@
 	 */
 
 	/* get local capabilities */
-	lcl_adv = 0;
-	if (phy_dev->advertising & ADVERTISED_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (phy_dev->advertising & ADVERTISED_Asym_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+	lcl_adv = linkmode_adv_to_lcl_adv_t(phy_dev->advertising);
 
 	/* get link partner capabilities */
 	rmt_adv = 0;
@@ -728,12 +724,12 @@
 
 	/* Get the MAC address */
 	mac_addr = of_get_mac_address(mac_node);
-	if (!mac_addr) {
+	if (IS_ERR(mac_addr)) {
 		dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
 		err = -EINVAL;
 		goto _return_of_get_parent;
 	}
-	memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr));
+	ether_addr_copy(mac_dev->addr, mac_addr);
 
 	/* Get the port handles */
 	nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL);
@@ -859,9 +855,7 @@
 	if (err < 0)
 		dev_err(dev, "fman_set_mac_active_pause() = %d\n", err);
 
-	dev_info(dev, "FMan MAC address: %02hx:%02hx:%02hx:%02hx:%02hx:%02hx\n",
-		 mac_dev->addr[0], mac_dev->addr[1], mac_dev->addr[2],
-		 mac_dev->addr[3], mac_dev->addr[4], mac_dev->addr[5]);
+	dev_info(dev, "FMan MAC address: %pM\n", mac_dev->addr);
 
 	priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev);
 	if (IS_ERR(priv->eth_dev)) {

diff --git a/drivers/net/ethernet/freescale/fs_enet/Kconfig b/drivers/net/ethernet/freescale/fs_enet/Kconfig
index be92229..245d9a6 100644
--- a/drivers/net/ethernet/freescale/fs_enet/Kconfig
+++ b/drivers/net/ethernet/freescale/fs_enet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config FS_ENET
        tristate "Freescale Ethernet Driver"
        depends on NET_VENDOR_FREESCALE && (CPM1 || CPM2 || PPC_MPC512x)

diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 2c2976a..3981c06 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c

@@ -481,7 +481,8 @@
 }
 #endif
 
-static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	cbd_t __iomem *bdp;
@@ -500,7 +501,7 @@
 		nr_frags = skb_shinfo(skb)->nr_frags;
 		frag = skb_shinfo(skb)->frags;
 		for (i = 0; i < nr_frags; i++, frag++) {
-			if (!IS_ALIGNED(frag->page_offset, 4)) {
+			if (!IS_ALIGNED(skb_frag_off(frag), 4)) {
 				is_aligned = 0;
 				break;
 			}
@@ -1013,8 +1014,8 @@
 	spin_lock_init(&fep->tx_lock);
 
 	mac_addr = of_get_mac_address(ofdev->dev.of_node);
-	if (mac_addr)
-		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(ndev->dev_addr, mac_addr);
 
 	ret = fep->ops->allocate_bd(ndev);
 	if (ret)

diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index ac2c3f6..c6481bd 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Freescale PowerQUICC Ethernet Driver -- MIIM bus implementation
  * Provides Bus interface for MIIM regs
@@ -8,12 +9,6 @@
  * Copyright 2002-2004, 2008-2009 Freescale Semiconductor, Inc.
  *
  * Based on gianfar_mii.c and ucc_geth_mii.c (Li Yang, Kim Phillips)
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 
 #include <linux/kernel.h>
@@ -446,8 +441,8 @@
 		goto error;
 	}
 
-	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s@%llx", np->name,
-		(unsigned long long)res.start);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%pOFn@%llx", np,
+		 (unsigned long long)res.start);
 
 	priv->map = of_iomap(np, 0);
 	if (!priv->map) {
@@ -473,7 +468,7 @@
 
 	if (data->get_tbipa) {
 		for_each_child_of_node(np, tbi) {
-			if (strcmp(tbi->type, "tbi-phy") == 0) {
+			if (of_node_is_type(tbi, "tbi-phy")) {
 				dev_dbg(&pdev->dev, "found TBI PHY node %pOFP\n",
 					tbi);
 				break;

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index f27f9ba..51ad864 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* drivers/net/ethernet/freescale/gianfar.c
  *
  * Gianfar Ethernet Driver
@@ -12,11 +13,6 @@
  * Copyright 2002-2009, 2011-2013 Freescale Semiconductor, Inc.
  * Copyright 2007 MontaVista Software, Inc.
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  *  Gianfar:  AKA Lambda Draconis, "Dragon"
  *  RA 11 31 24.2
  *  Dec +69 19 52
@@ -102,8 +98,6 @@
 #include <linux/phy_fixed.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 
 #include "gianfar.h"
 
@@ -111,43 +105,6 @@
 
 const char gfar_driver_version[] = "2.0";
 
-static int gfar_enet_open(struct net_device *dev);
-static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static void gfar_reset_task(struct work_struct *work);
-static void gfar_timeout(struct net_device *dev);
-static int gfar_close(struct net_device *dev);
-static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
-				int alloc_cnt);
-static int gfar_set_mac_address(struct net_device *dev);
-static int gfar_change_mtu(struct net_device *dev, int new_mtu);
-static irqreturn_t gfar_error(int irq, void *dev_id);
-static irqreturn_t gfar_transmit(int irq, void *dev_id);
-static irqreturn_t gfar_interrupt(int irq, void *dev_id);
-static void adjust_link(struct net_device *dev);
-static noinline void gfar_update_link_state(struct gfar_private *priv);
-static int init_phy(struct net_device *dev);
-static int gfar_probe(struct platform_device *ofdev);
-static int gfar_remove(struct platform_device *ofdev);
-static void free_skb_resources(struct gfar_private *priv);
-static void gfar_set_multi(struct net_device *dev);
-static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
-static void gfar_configure_serdes(struct net_device *dev);
-static int gfar_poll_rx(struct napi_struct *napi, int budget);
-static int gfar_poll_tx(struct napi_struct *napi, int budget);
-static int gfar_poll_rx_sq(struct napi_struct *napi, int budget);
-static int gfar_poll_tx_sq(struct napi_struct *napi, int budget);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void gfar_netpoll(struct net_device *dev);
-#endif
-int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit);
-static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue);
-static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb);
-static void gfar_halt_nodisable(struct gfar_private *priv);
-static void gfar_clear_exact_match(struct net_device *dev);
-static void gfar_set_mac_for_addr(struct net_device *dev, int num,
-				  const u8 *addr);
-static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-
 MODULE_AUTHOR("Freescale Semiconductor, Inc");
 MODULE_DESCRIPTION("Gianfar Ethernet Driver");
 MODULE_LICENSE("GPL");
@@ -168,138 +125,6 @@
 	bdp->lstatus = cpu_to_be32(lstatus);
 }
 
-static void gfar_init_bds(struct net_device *ndev)
-{
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-	struct txbd8 *txbdp;
-	u32 __iomem *rfbptr;
-	int i, j;
-
-	for (i = 0; i < priv->num_tx_queues; i++) {
-		tx_queue = priv->tx_queue[i];
-		/* Initialize some variables in our dev structure */
-		tx_queue->num_txbdfree = tx_queue->tx_ring_size;
-		tx_queue->dirty_tx = tx_queue->tx_bd_base;
-		tx_queue->cur_tx = tx_queue->tx_bd_base;
-		tx_queue->skb_curtx = 0;
-		tx_queue->skb_dirtytx = 0;
-
-		/* Initialize Transmit Descriptor Ring */
-		txbdp = tx_queue->tx_bd_base;
-		for (j = 0; j < tx_queue->tx_ring_size; j++) {
-			txbdp->lstatus = 0;
-			txbdp->bufPtr = 0;
-			txbdp++;
-		}
-
-		/* Set the last descriptor in the ring to indicate wrap */
-		txbdp--;
-		txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
-					    TXBD_WRAP);
-	}
-
-	rfbptr = &regs->rfbptr0;
-	for (i = 0; i < priv->num_rx_queues; i++) {
-		rx_queue = priv->rx_queue[i];
-
-		rx_queue->next_to_clean = 0;
-		rx_queue->next_to_use = 0;
-		rx_queue->next_to_alloc = 0;
-
-		/* make sure next_to_clean != next_to_use after this
-		 * by leaving at least 1 unused descriptor
-		 */
-		gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue));
-
-		rx_queue->rfbptr = rfbptr;
-		rfbptr += 2;
-	}
-}
-
-static int gfar_alloc_skb_resources(struct net_device *ndev)
-{
-	void *vaddr;
-	dma_addr_t addr;
-	int i, j;
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct device *dev = priv->dev;
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-
-	priv->total_tx_ring_size = 0;
-	for (i = 0; i < priv->num_tx_queues; i++)
-		priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size;
-
-	priv->total_rx_ring_size = 0;
-	for (i = 0; i < priv->num_rx_queues; i++)
-		priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size;
-
-	/* Allocate memory for the buffer descriptors */
-	vaddr = dma_alloc_coherent(dev,
-				   (priv->total_tx_ring_size *
-				    sizeof(struct txbd8)) +
-				   (priv->total_rx_ring_size *
-				    sizeof(struct rxbd8)),
-				   &addr, GFP_KERNEL);
-	if (!vaddr)
-		return -ENOMEM;
-
-	for (i = 0; i < priv->num_tx_queues; i++) {
-		tx_queue = priv->tx_queue[i];
-		tx_queue->tx_bd_base = vaddr;
-		tx_queue->tx_bd_dma_base = addr;
-		tx_queue->dev = ndev;
-		/* enet DMA only understands physical addresses */
-		addr  += sizeof(struct txbd8) * tx_queue->tx_ring_size;
-		vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
-	}
-
-	/* Start the rx descriptor ring where the tx ring leaves off */
-	for (i = 0; i < priv->num_rx_queues; i++) {
-		rx_queue = priv->rx_queue[i];
-		rx_queue->rx_bd_base = vaddr;
-		rx_queue->rx_bd_dma_base = addr;
-		rx_queue->ndev = ndev;
-		rx_queue->dev = dev;
-		addr  += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
-		vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
-	}
-
-	/* Setup the skbuff rings */
-	for (i = 0; i < priv->num_tx_queues; i++) {
-		tx_queue = priv->tx_queue[i];
-		tx_queue->tx_skbuff =
-			kmalloc_array(tx_queue->tx_ring_size,
-				      sizeof(*tx_queue->tx_skbuff),
-				      GFP_KERNEL);
-		if (!tx_queue->tx_skbuff)
-			goto cleanup;
-
-		for (j = 0; j < tx_queue->tx_ring_size; j++)
-			tx_queue->tx_skbuff[j] = NULL;
-	}
-
-	for (i = 0; i < priv->num_rx_queues; i++) {
-		rx_queue = priv->rx_queue[i];
-		rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size,
-					    sizeof(*rx_queue->rx_buff),
-					    GFP_KERNEL);
-		if (!rx_queue->rx_buff)
-			goto cleanup;
-	}
-
-	gfar_init_bds(ndev);
-
-	return 0;
-
-cleanup:
-	free_skb_resources(priv);
-	return -ENOMEM;
-}
-
 static void gfar_init_tx_rx_base(struct gfar_private *priv)
 {
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
@@ -450,7 +275,7 @@
 	}
 }
 
-void gfar_configure_coalescing_all(struct gfar_private *priv)
+static void gfar_configure_coalescing_all(struct gfar_private *priv)
 {
 	gfar_configure_coalescing(priv, 0xFF, 0xFF);
 }
@@ -483,6 +308,62 @@
 	return &dev->stats;
 }
 
+/* Set the appropriate hash bit for the given addr */
+/* The algorithm works like so:
+ * 1) Take the Destination Address (ie the multicast address), and
+ * do a CRC on it (little endian), and reverse the bits of the
+ * result.
+ * 2) Use the 8 most significant bits as a hash into a 256-entry
+ * table.  The table is controlled through 8 32-bit registers:
+ * gaddr0-7.  gaddr0's MSB is entry 0, and gaddr7's LSB is
+ * gaddr7.  This means that the 3 most significant bits in the
+ * hash index which gaddr register to use, and the 5 other bits
+ * indicate which bit (assuming an IBM numbering scheme, which
+ * for PowerPC (tm) is usually the case) in the register holds
+ * the entry.
+ */
+static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr)
+{
+	u32 tempval;
+	struct gfar_private *priv = netdev_priv(dev);
+	u32 result = ether_crc(ETH_ALEN, addr);
+	int width = priv->hash_width;
+	u8 whichbit = (result >> (32 - width)) & 0x1f;
+	u8 whichreg = result >> (32 - width + 5);
+	u32 value = (1 << (31-whichbit));
+
+	tempval = gfar_read(priv->hash_regs[whichreg]);
+	tempval |= value;
+	gfar_write(priv->hash_regs[whichreg], tempval);
+}
+
+/* There are multiple MAC Address register pairs on some controllers
+ * This function sets the numth pair to a given address
+ */
+static void gfar_set_mac_for_addr(struct net_device *dev, int num,
+				  const u8 *addr)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	u32 __iomem *macptr = &regs->macstnaddr1;
+
+	macptr += num*2;
+
+	/* For a station address of 0x12345678ABCD in transmission
+	 * order (BE), MACnADDR1 is set to 0xCDAB7856 and
+	 * MACnADDR2 is set to 0x34120000.
+	 */
+	tempval = (addr[5] << 24) | (addr[4] << 16) |
+		  (addr[3] << 8)  |  addr[2];
+
+	gfar_write(macptr, tempval);
+
+	tempval = (addr[1] << 24) | (addr[0] << 16);
+
+	gfar_write(macptr+1, tempval);
+}
+
 static int gfar_set_mac_addr(struct net_device *dev, void *p)
 {
 	eth_mac_addr(dev, p);
@@ -492,23 +373,6 @@
 	return 0;
 }
 
-static const struct net_device_ops gfar_netdev_ops = {
-	.ndo_open = gfar_enet_open,
-	.ndo_start_xmit = gfar_start_xmit,
-	.ndo_stop = gfar_close,
-	.ndo_change_mtu = gfar_change_mtu,
-	.ndo_set_features = gfar_set_features,
-	.ndo_set_rx_mode = gfar_set_multi,
-	.ndo_tx_timeout = gfar_timeout,
-	.ndo_do_ioctl = gfar_ioctl,
-	.ndo_get_stats = gfar_get_stats,
-	.ndo_set_mac_address = gfar_set_mac_addr,
-	.ndo_validate_addr = eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = gfar_netpoll,
-#endif
-};
-
 static void gfar_ints_disable(struct gfar_private *priv)
 {
 	int i;
@@ -722,16 +586,59 @@
 	int num = 0;
 
 	for_each_available_child_of_node(np, child)
-		if (!of_node_cmp(child->name, "queue-group"))
+		if (of_node_name_eq(child, "queue-group"))
 			num++;
 
 	return num;
 }
 
+/* Reads the controller's registers to determine what interface
+ * connects it to the PHY.
+ */
+static phy_interface_t gfar_get_interface(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 ecntrl;
+
+	ecntrl = gfar_read(&regs->ecntrl);
+
+	if (ecntrl & ECNTRL_SGMII_MODE)
+		return PHY_INTERFACE_MODE_SGMII;
+
+	if (ecntrl & ECNTRL_TBI_MODE) {
+		if (ecntrl & ECNTRL_REDUCED_MODE)
+			return PHY_INTERFACE_MODE_RTBI;
+		else
+			return PHY_INTERFACE_MODE_TBI;
+	}
+
+	if (ecntrl & ECNTRL_REDUCED_MODE) {
+		if (ecntrl & ECNTRL_REDUCED_MII_MODE) {
+			return PHY_INTERFACE_MODE_RMII;
+		}
+		else {
+			phy_interface_t interface = priv->interface;
+
+			/* This isn't autodetected right now, so it must
+			 * be set by the device tree or platform code.
+			 */
+			if (interface == PHY_INTERFACE_MODE_RGMII_ID)
+				return PHY_INTERFACE_MODE_RGMII_ID;
+
+			return PHY_INTERFACE_MODE_RGMII;
+		}
+	}
+
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
+		return PHY_INTERFACE_MODE_GMII;
+
+	return PHY_INTERFACE_MODE_MII;
+}
+
 static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 {
 	const char *model;
-	const char *ctype;
 	const void *mac_addr;
 	int err = 0, i;
 	struct net_device *dev = NULL;
@@ -840,7 +747,7 @@
 	/* Parse and initialize group specific information */
 	if (priv->mode == MQ_MG_MODE) {
 		for_each_available_child_of_node(np, child) {
-			if (of_node_cmp(child->name, "queue-group"))
+			if (!of_node_name_eq(child, "queue-group"))
 				continue;
 
 			err = gfar_parse_group(child, priv, model);
@@ -873,8 +780,8 @@
 
 	mac_addr = of_get_mac_address(np);
 
-	if (mac_addr)
-		memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(dev->dev_addr, mac_addr);
 
 	if (model && !strcasecmp(model, "TSEC"))
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT |
@@ -894,13 +801,15 @@
 				     FSL_GIANFAR_DEV_HAS_TIMER |
 				     FSL_GIANFAR_DEV_HAS_RX_FILER;
 
-	err = of_property_read_string(np, "phy-connection-type", &ctype);
-
-	/* We only care about rgmii-id.  The rest are autodetected */
-	if (err == 0 && !strcmp(ctype, "rgmii-id"))
-		priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
+	/* Use PHY connection type from the DT node if one is specified there.
+	 * rgmii-id really needs to be specified. Other types can be
+	 * detected by hardware
+	 */
+	err = of_get_phy_mode(np);
+	if (err >= 0)
+		priv->interface = err;
 	else
-		priv->interface = PHY_INTERFACE_MODE_MII;
+		priv->interface = gfar_get_interface(dev);
 
 	if (of_find_property(np, "fsl,magic-packet", NULL))
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
@@ -936,85 +845,6 @@
 	return err;
 }
 
-static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
-{
-	struct hwtstamp_config config;
-	struct gfar_private *priv = netdev_priv(netdev);
-
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
-
-	/* reserved for future extensions */
-	if (config.flags)
-		return -EINVAL;
-
-	switch (config.tx_type) {
-	case HWTSTAMP_TX_OFF:
-		priv->hwts_tx_en = 0;
-		break;
-	case HWTSTAMP_TX_ON:
-		if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
-			return -ERANGE;
-		priv->hwts_tx_en = 1;
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	switch (config.rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-		if (priv->hwts_rx_en) {
-			priv->hwts_rx_en = 0;
-			reset_gfar(netdev);
-		}
-		break;
-	default:
-		if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
-			return -ERANGE;
-		if (!priv->hwts_rx_en) {
-			priv->hwts_rx_en = 1;
-			reset_gfar(netdev);
-		}
-		config.rx_filter = HWTSTAMP_FILTER_ALL;
-		break;
-	}
-
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-		-EFAULT : 0;
-}
-
-static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
-{
-	struct hwtstamp_config config;
-	struct gfar_private *priv = netdev_priv(netdev);
-
-	config.flags = 0;
-	config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-	config.rx_filter = (priv->hwts_rx_en ?
-			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE);
-
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-		-EFAULT : 0;
-}
-
-static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	struct phy_device *phydev = dev->phydev;
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (cmd == SIOCSHWTSTAMP)
-		return gfar_hwtstamp_set(dev, rq);
-	if (cmd == SIOCGHWTSTAMP)
-		return gfar_hwtstamp_get(dev, rq);
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(phydev, rq, cmd);
-}
-
 static u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar,
 				   u32 class)
 {
@@ -1138,6 +968,2183 @@
 			 priv->errata);
 }
 
+static void gfar_init_addr_hash_table(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
+		priv->extended_hash = 1;
+		priv->hash_width = 9;
+
+		priv->hash_regs[0] = &regs->igaddr0;
+		priv->hash_regs[1] = &regs->igaddr1;
+		priv->hash_regs[2] = &regs->igaddr2;
+		priv->hash_regs[3] = &regs->igaddr3;
+		priv->hash_regs[4] = &regs->igaddr4;
+		priv->hash_regs[5] = &regs->igaddr5;
+		priv->hash_regs[6] = &regs->igaddr6;
+		priv->hash_regs[7] = &regs->igaddr7;
+		priv->hash_regs[8] = &regs->gaddr0;
+		priv->hash_regs[9] = &regs->gaddr1;
+		priv->hash_regs[10] = &regs->gaddr2;
+		priv->hash_regs[11] = &regs->gaddr3;
+		priv->hash_regs[12] = &regs->gaddr4;
+		priv->hash_regs[13] = &regs->gaddr5;
+		priv->hash_regs[14] = &regs->gaddr6;
+		priv->hash_regs[15] = &regs->gaddr7;
+
+	} else {
+		priv->extended_hash = 0;
+		priv->hash_width = 8;
+
+		priv->hash_regs[0] = &regs->gaddr0;
+		priv->hash_regs[1] = &regs->gaddr1;
+		priv->hash_regs[2] = &regs->gaddr2;
+		priv->hash_regs[3] = &regs->gaddr3;
+		priv->hash_regs[4] = &regs->gaddr4;
+		priv->hash_regs[5] = &regs->gaddr5;
+		priv->hash_regs[6] = &regs->gaddr6;
+		priv->hash_regs[7] = &regs->gaddr7;
+	}
+}
+
+static int __gfar_is_rx_idle(struct gfar_private *priv)
+{
+	u32 res;
+
+	/* Normaly TSEC should not hang on GRS commands, so we should
+	 * actually wait for IEVENT_GRSC flag.
+	 */
+	if (!gfar_has_errata(priv, GFAR_ERRATA_A002))
+		return 0;
+
+	/* Read the eTSEC register at offset 0xD1C. If bits 7-14 are
+	 * the same as bits 23-30, the eTSEC Rx is assumed to be idle
+	 * and the Rx can be safely reset.
+	 */
+	res = gfar_read((void __iomem *)priv->gfargrp[0].regs + 0xd1c);
+	res &= 0x7f807f80;
+	if ((res & 0xffff) == (res >> 16))
+		return 1;
+
+	return 0;
+}
+
+/* Halt the receive and transmit queues */
+static void gfar_halt_nodisable(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	unsigned int timeout;
+	int stopped;
+
+	gfar_ints_disable(priv);
+
+	if (gfar_is_dma_stopped(priv))
+		return;
+
+	/* Stop the DMA, and wait for it to stop */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval |= (DMACTRL_GRS | DMACTRL_GTS);
+	gfar_write(&regs->dmactrl, tempval);
+
+retry:
+	timeout = 1000;
+	while (!(stopped = gfar_is_dma_stopped(priv)) && timeout) {
+		cpu_relax();
+		timeout--;
+	}
+
+	if (!timeout)
+		stopped = gfar_is_dma_stopped(priv);
+
+	if (!stopped && !gfar_is_rx_dma_stopped(priv) &&
+	    !__gfar_is_rx_idle(priv))
+		goto retry;
+}
+
+/* Halt the receive and transmit queues */
+static void gfar_halt(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+
+	/* Dissable the Rx/Tx hw queues */
+	gfar_write(&regs->rqueue, 0);
+	gfar_write(&regs->tqueue, 0);
+
+	mdelay(10);
+
+	gfar_halt_nodisable(priv);
+
+	/* Disable Rx/Tx DMA */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval &= ~(MACCFG1_RX_EN | MACCFG1_TX_EN);
+	gfar_write(&regs->maccfg1, tempval);
+}
+
+static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
+{
+	struct txbd8 *txbdp;
+	struct gfar_private *priv = netdev_priv(tx_queue->dev);
+	int i, j;
+
+	txbdp = tx_queue->tx_bd_base;
+
+	for (i = 0; i < tx_queue->tx_ring_size; i++) {
+		if (!tx_queue->tx_skbuff[i])
+			continue;
+
+		dma_unmap_single(priv->dev, be32_to_cpu(txbdp->bufPtr),
+				 be16_to_cpu(txbdp->length), DMA_TO_DEVICE);
+		txbdp->lstatus = 0;
+		for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
+		     j++) {
+			txbdp++;
+			dma_unmap_page(priv->dev, be32_to_cpu(txbdp->bufPtr),
+				       be16_to_cpu(txbdp->length),
+				       DMA_TO_DEVICE);
+		}
+		txbdp++;
+		dev_kfree_skb_any(tx_queue->tx_skbuff[i]);
+		tx_queue->tx_skbuff[i] = NULL;
+	}
+	kfree(tx_queue->tx_skbuff);
+	tx_queue->tx_skbuff = NULL;
+}
+
+static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
+{
+	int i;
+
+	struct rxbd8 *rxbdp = rx_queue->rx_bd_base;
+
+	dev_kfree_skb(rx_queue->skb);
+
+	for (i = 0; i < rx_queue->rx_ring_size; i++) {
+		struct	gfar_rx_buff *rxb = &rx_queue->rx_buff[i];
+
+		rxbdp->lstatus = 0;
+		rxbdp->bufPtr = 0;
+		rxbdp++;
+
+		if (!rxb->page)
+			continue;
+
+		dma_unmap_page(rx_queue->dev, rxb->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_page(rxb->page);
+
+		rxb->page = NULL;
+	}
+
+	kfree(rx_queue->rx_buff);
+	rx_queue->rx_buff = NULL;
+}
+
+/* If there are any tx skbs or rx skbs still around, free them.
+ * Then free tx_skbuff and rx_skbuff
+ */
+static void free_skb_resources(struct gfar_private *priv)
+{
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+	int i;
+
+	/* Go through all the buffer descriptors and free their data buffers */
+	for (i = 0; i < priv->num_tx_queues; i++) {
+		struct netdev_queue *txq;
+
+		tx_queue = priv->tx_queue[i];
+		txq = netdev_get_tx_queue(tx_queue->dev, tx_queue->qindex);
+		if (tx_queue->tx_skbuff)
+			free_skb_tx_queue(tx_queue);
+		netdev_tx_reset_queue(txq);
+	}
+
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		rx_queue = priv->rx_queue[i];
+		if (rx_queue->rx_buff)
+			free_skb_rx_queue(rx_queue);
+	}
+
+	dma_free_coherent(priv->dev,
+			  sizeof(struct txbd8) * priv->total_tx_ring_size +
+			  sizeof(struct rxbd8) * priv->total_rx_ring_size,
+			  priv->tx_queue[0]->tx_bd_base,
+			  priv->tx_queue[0]->tx_bd_dma_base);
+}
+
+void stop_gfar(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+
+	netif_tx_stop_all_queues(dev);
+
+	smp_mb__before_atomic();
+	set_bit(GFAR_DOWN, &priv->state);
+	smp_mb__after_atomic();
+
+	disable_napi(priv);
+
+	/* disable ints and gracefully shut down Rx/Tx DMA */
+	gfar_halt(priv);
+
+	phy_stop(dev->phydev);
+
+	free_skb_resources(priv);
+}
+
+static void gfar_start(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	int i = 0;
+
+	/* Enable Rx/Tx hw queues */
+	gfar_write(&regs->rqueue, priv->rqueue);
+	gfar_write(&regs->tqueue, priv->tqueue);
+
+	/* Initialize DMACTRL to have WWR and WOP */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval |= DMACTRL_INIT_SETTINGS;
+	gfar_write(&regs->dmactrl, tempval);
+
+	/* Make sure we aren't stopped */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval &= ~(DMACTRL_GRS | DMACTRL_GTS);
+	gfar_write(&regs->dmactrl, tempval);
+
+	for (i = 0; i < priv->num_grps; i++) {
+		regs = priv->gfargrp[i].regs;
+		/* Clear THLT/RHLT, so that the DMA starts polling now */
+		gfar_write(&regs->tstat, priv->gfargrp[i].tstat);
+		gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
+	}
+
+	/* Enable Rx/Tx DMA */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval |= (MACCFG1_RX_EN | MACCFG1_TX_EN);
+	gfar_write(&regs->maccfg1, tempval);
+
+	gfar_ints_enable(priv);
+
+	netif_trans_update(priv->ndev); /* prevent tx timeout */
+}
+
+static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb)
+{
+	struct page *page;
+	dma_addr_t addr;
+
+	page = dev_alloc_page();
+	if (unlikely(!page))
+		return false;
+
+	addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(rxq->dev, addr))) {
+		__free_page(page);
+
+		return false;
+	}
+
+	rxb->dma = addr;
+	rxb->page = page;
+	rxb->page_offset = 0;
+
+	return true;
+}
+
+static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue)
+{
+	struct gfar_private *priv = netdev_priv(rx_queue->ndev);
+	struct gfar_extra_stats *estats = &priv->extra_stats;
+
+	netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n");
+	atomic64_inc(&estats->rx_alloc_err);
+}
+
+static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
+				int alloc_cnt)
+{
+	struct rxbd8 *bdp;
+	struct gfar_rx_buff *rxb;
+	int i;
+
+	i = rx_queue->next_to_use;
+	bdp = &rx_queue->rx_bd_base[i];
+	rxb = &rx_queue->rx_buff[i];
+
+	while (alloc_cnt--) {
+		/* try reuse page */
+		if (unlikely(!rxb->page)) {
+			if (unlikely(!gfar_new_page(rx_queue, rxb))) {
+				gfar_rx_alloc_err(rx_queue);
+				break;
+			}
+		}
+
+		/* Setup the new RxBD */
+		gfar_init_rxbdp(rx_queue, bdp,
+				rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT);
+
+		/* Update to the next pointer */
+		bdp++;
+		rxb++;
+
+		if (unlikely(++i == rx_queue->rx_ring_size)) {
+			i = 0;
+			bdp = rx_queue->rx_bd_base;
+			rxb = rx_queue->rx_buff;
+		}
+	}
+
+	rx_queue->next_to_use = i;
+	rx_queue->next_to_alloc = i;
+}
+
+static void gfar_init_bds(struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+	struct txbd8 *txbdp;
+	u32 __iomem *rfbptr;
+	int i, j;
+
+	for (i = 0; i < priv->num_tx_queues; i++) {
+		tx_queue = priv->tx_queue[i];
+		/* Initialize some variables in our dev structure */
+		tx_queue->num_txbdfree = tx_queue->tx_ring_size;
+		tx_queue->dirty_tx = tx_queue->tx_bd_base;
+		tx_queue->cur_tx = tx_queue->tx_bd_base;
+		tx_queue->skb_curtx = 0;
+		tx_queue->skb_dirtytx = 0;
+
+		/* Initialize Transmit Descriptor Ring */
+		txbdp = tx_queue->tx_bd_base;
+		for (j = 0; j < tx_queue->tx_ring_size; j++) {
+			txbdp->lstatus = 0;
+			txbdp->bufPtr = 0;
+			txbdp++;
+		}
+
+		/* Set the last descriptor in the ring to indicate wrap */
+		txbdp--;
+		txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
+					    TXBD_WRAP);
+	}
+
+	rfbptr = &regs->rfbptr0;
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		rx_queue = priv->rx_queue[i];
+
+		rx_queue->next_to_clean = 0;
+		rx_queue->next_to_use = 0;
+		rx_queue->next_to_alloc = 0;
+
+		/* make sure next_to_clean != next_to_use after this
+		 * by leaving at least 1 unused descriptor
+		 */
+		gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue));
+
+		rx_queue->rfbptr = rfbptr;
+		rfbptr += 2;
+	}
+}
+
+static int gfar_alloc_skb_resources(struct net_device *ndev)
+{
+	void *vaddr;
+	dma_addr_t addr;
+	int i, j;
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct device *dev = priv->dev;
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+
+	priv->total_tx_ring_size = 0;
+	for (i = 0; i < priv->num_tx_queues; i++)
+		priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size;
+
+	priv->total_rx_ring_size = 0;
+	for (i = 0; i < priv->num_rx_queues; i++)
+		priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size;
+
+	/* Allocate memory for the buffer descriptors */
+	vaddr = dma_alloc_coherent(dev,
+				   (priv->total_tx_ring_size *
+				    sizeof(struct txbd8)) +
+				   (priv->total_rx_ring_size *
+				    sizeof(struct rxbd8)),
+				   &addr, GFP_KERNEL);
+	if (!vaddr)
+		return -ENOMEM;
+
+	for (i = 0; i < priv->num_tx_queues; i++) {
+		tx_queue = priv->tx_queue[i];
+		tx_queue->tx_bd_base = vaddr;
+		tx_queue->tx_bd_dma_base = addr;
+		tx_queue->dev = ndev;
+		/* enet DMA only understands physical addresses */
+		addr  += sizeof(struct txbd8) * tx_queue->tx_ring_size;
+		vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
+	}
+
+	/* Start the rx descriptor ring where the tx ring leaves off */
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		rx_queue = priv->rx_queue[i];
+		rx_queue->rx_bd_base = vaddr;
+		rx_queue->rx_bd_dma_base = addr;
+		rx_queue->ndev = ndev;
+		rx_queue->dev = dev;
+		addr  += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
+		vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
+	}
+
+	/* Setup the skbuff rings */
+	for (i = 0; i < priv->num_tx_queues; i++) {
+		tx_queue = priv->tx_queue[i];
+		tx_queue->tx_skbuff =
+			kmalloc_array(tx_queue->tx_ring_size,
+				      sizeof(*tx_queue->tx_skbuff),
+				      GFP_KERNEL);
+		if (!tx_queue->tx_skbuff)
+			goto cleanup;
+
+		for (j = 0; j < tx_queue->tx_ring_size; j++)
+			tx_queue->tx_skbuff[j] = NULL;
+	}
+
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		rx_queue = priv->rx_queue[i];
+		rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size,
+					    sizeof(*rx_queue->rx_buff),
+					    GFP_KERNEL);
+		if (!rx_queue->rx_buff)
+			goto cleanup;
+	}
+
+	gfar_init_bds(ndev);
+
+	return 0;
+
+cleanup:
+	free_skb_resources(priv);
+	return -ENOMEM;
+}
+
+/* Bring the controller up and running */
+int startup_gfar(struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+	int err;
+
+	gfar_mac_reset(priv);
+
+	err = gfar_alloc_skb_resources(ndev);
+	if (err)
+		return err;
+
+	gfar_init_tx_rx_base(priv);
+
+	smp_mb__before_atomic();
+	clear_bit(GFAR_DOWN, &priv->state);
+	smp_mb__after_atomic();
+
+	/* Start Rx/Tx DMA and enable the interrupts */
+	gfar_start(priv);
+
+	/* force link state update after mac reset */
+	priv->oldlink = 0;
+	priv->oldspeed = 0;
+	priv->oldduplex = -1;
+
+	phy_start(ndev->phydev);
+
+	enable_napi(priv);
+
+	netif_tx_wake_all_queues(ndev);
+
+	return 0;
+}
+
+static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
+{
+	struct net_device *ndev = priv->ndev;
+	struct phy_device *phydev = ndev->phydev;
+	u32 val = 0;
+
+	if (!phydev->duplex)
+		return val;
+
+	if (!priv->pause_aneg_en) {
+		if (priv->tx_pause_en)
+			val |= MACCFG1_TX_FLOW;
+		if (priv->rx_pause_en)
+			val |= MACCFG1_RX_FLOW;
+	} else {
+		u16 lcl_adv, rmt_adv;
+		u8 flowctrl;
+		/* get link partner capabilities */
+		rmt_adv = 0;
+		if (phydev->pause)
+			rmt_adv = LPA_PAUSE_CAP;
+		if (phydev->asym_pause)
+			rmt_adv |= LPA_PAUSE_ASYM;
+
+		lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising);
+		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+		if (flowctrl & FLOW_CTRL_TX)
+			val |= MACCFG1_TX_FLOW;
+		if (flowctrl & FLOW_CTRL_RX)
+			val |= MACCFG1_RX_FLOW;
+	}
+
+	return val;
+}
+
+static noinline void gfar_update_link_state(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	struct net_device *ndev = priv->ndev;
+	struct phy_device *phydev = ndev->phydev;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+	int i;
+
+	if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
+		return;
+
+	if (phydev->link) {
+		u32 tempval1 = gfar_read(&regs->maccfg1);
+		u32 tempval = gfar_read(&regs->maccfg2);
+		u32 ecntrl = gfar_read(&regs->ecntrl);
+		u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
+
+		if (phydev->duplex != priv->oldduplex) {
+			if (!(phydev->duplex))
+				tempval &= ~(MACCFG2_FULL_DUPLEX);
+			else
+				tempval |= MACCFG2_FULL_DUPLEX;
+
+			priv->oldduplex = phydev->duplex;
+		}
+
+		if (phydev->speed != priv->oldspeed) {
+			switch (phydev->speed) {
+			case 1000:
+				tempval =
+				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
+
+				ecntrl &= ~(ECNTRL_R100);
+				break;
+			case 100:
+			case 10:
+				tempval =
+				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
+
+				/* Reduced mode distinguishes
+				 * between 10 and 100
+				 */
+				if (phydev->speed == SPEED_100)
+					ecntrl |= ECNTRL_R100;
+				else
+					ecntrl &= ~(ECNTRL_R100);
+				break;
+			default:
+				netif_warn(priv, link, priv->ndev,
+					   "Ack!  Speed (%d) is not 10/100/1000!\n",
+					   phydev->speed);
+				break;
+			}
+
+			priv->oldspeed = phydev->speed;
+		}
+
+		tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
+		tempval1 |= gfar_get_flowctrl_cfg(priv);
+
+		/* Turn last free buffer recording on */
+		if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) {
+			for (i = 0; i < priv->num_rx_queues; i++) {
+				u32 bdp_dma;
+
+				rx_queue = priv->rx_queue[i];
+				bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
+				gfar_write(rx_queue->rfbptr, bdp_dma);
+			}
+
+			priv->tx_actual_en = 1;
+		}
+
+		if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval))
+			priv->tx_actual_en = 0;
+
+		gfar_write(&regs->maccfg1, tempval1);
+		gfar_write(&regs->maccfg2, tempval);
+		gfar_write(&regs->ecntrl, ecntrl);
+
+		if (!priv->oldlink)
+			priv->oldlink = 1;
+
+	} else if (priv->oldlink) {
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
+	}
+
+	if (netif_msg_link(priv))
+		phy_print_status(phydev);
+}
+
+/* Called every time the controller might need to be made
+ * aware of new link state.  The PHY code conveys this
+ * information through variables in the phydev structure, and this
+ * function converts those variables into the appropriate
+ * register values, and can bring down the device if needed.
+ */
+static void adjust_link(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
+
+	if (unlikely(phydev->link != priv->oldlink ||
+		     (phydev->link && (phydev->duplex != priv->oldduplex ||
+				       phydev->speed != priv->oldspeed))))
+		gfar_update_link_state(priv);
+}
+
+/* Initialize TBI PHY interface for communicating with the
+ * SERDES lynx PHY on the chip.  We communicate with this PHY
+ * through the MDIO bus on each controller, treating it as a
+ * "normal" PHY at the address found in the TBIPA register.  We assume
+ * that the TBIPA register is valid.  Either the MDIO bus code will set
+ * it to a value that doesn't conflict with other PHYs on the bus, or the
+ * value doesn't matter, as there are no other PHYs on the bus.
+ */
+static void gfar_configure_serdes(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct phy_device *tbiphy;
+
+	if (!priv->tbi_node) {
+		dev_warn(&dev->dev, "error: SGMII mode requires that the "
+				    "device tree specify a tbi-handle\n");
+		return;
+	}
+
+	tbiphy = of_phy_find_device(priv->tbi_node);
+	if (!tbiphy) {
+		dev_err(&dev->dev, "error: Could not get TBI device\n");
+		return;
+	}
+
+	/* If the link is already up, we must already be ok, and don't need to
+	 * configure and reset the TBI<->SerDes link.  Maybe U-Boot configured
+	 * everything for us?  Resetting it takes the link down and requires
+	 * several seconds for it to come back.
+	 */
+	if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) {
+		put_device(&tbiphy->mdio.dev);
+		return;
+	}
+
+	/* Single clk mode, mii mode off(for serdes communication) */
+	phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT);
+
+	phy_write(tbiphy, MII_ADVERTISE,
+		  ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
+		  ADVERTISE_1000XPSE_ASYM);
+
+	phy_write(tbiphy, MII_BMCR,
+		  BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX |
+		  BMCR_SPEED1000);
+
+	put_device(&tbiphy->mdio.dev);
+}
+
+/* Initializes driver's PHY state, and attaches to the PHY.
+ * Returns 0 on success.
+ */
+static int init_phy(struct net_device *dev)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+	struct gfar_private *priv = netdev_priv(dev);
+	phy_interface_t interface = priv->interface;
+	struct phy_device *phydev;
+	struct ethtool_eee edata;
+
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask);
+
+	priv->oldlink = 0;
+	priv->oldspeed = 0;
+	priv->oldduplex = -1;
+
+	phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
+				interface);
+	if (!phydev) {
+		dev_err(&dev->dev, "could not attach to PHY\n");
+		return -ENODEV;
+	}
+
+	if (interface == PHY_INTERFACE_MODE_SGMII)
+		gfar_configure_serdes(dev);
+
+	/* Remove any features not supported by the controller */
+	linkmode_and(phydev->supported, phydev->supported, mask);
+	linkmode_copy(phydev->advertising, phydev->supported);
+
+	/* Add support for flow control */
+	phy_support_asym_pause(phydev);
+
+	/* disable EEE autoneg, EEE not supported by eTSEC */
+	memset(&edata, 0, sizeof(struct ethtool_eee));
+	phy_ethtool_set_eee(phydev, &edata);
+
+	return 0;
+}
+
+static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb)
+{
+	struct txfcb *fcb = skb_push(skb, GMAC_FCB_LEN);
+
+	memset(fcb, 0, GMAC_FCB_LEN);
+
+	return fcb;
+}
+
+static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
+				    int fcb_length)
+{
+	/* If we're here, it's a IP packet with a TCP or UDP
+	 * payload.  We set it to checksum, using a pseudo-header
+	 * we provide
+	 */
+	u8 flags = TXFCB_DEFAULT;
+
+	/* Tell the controller what the protocol is
+	 * And provide the already calculated phcs
+	 */
+	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
+		flags |= TXFCB_UDP;
+		fcb->phcs = (__force __be16)(udp_hdr(skb)->check);
+	} else
+		fcb->phcs = (__force __be16)(tcp_hdr(skb)->check);
+
+	/* l3os is the distance between the start of the
+	 * frame (skb->data) and the start of the IP hdr.
+	 * l4os is the distance between the start of the
+	 * l3 hdr and the l4 hdr
+	 */
+	fcb->l3os = (u8)(skb_network_offset(skb) - fcb_length);
+	fcb->l4os = skb_network_header_len(skb);
+
+	fcb->flags = flags;
+}
+
+static inline void gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
+{
+	fcb->flags |= TXFCB_VLN;
+	fcb->vlctl = cpu_to_be16(skb_vlan_tag_get(skb));
+}
+
+static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
+				      struct txbd8 *base, int ring_size)
+{
+	struct txbd8 *new_bd = bdp + stride;
+
+	return (new_bd >= (base + ring_size)) ? (new_bd - ring_size) : new_bd;
+}
+
+static inline struct txbd8 *next_txbd(struct txbd8 *bdp, struct txbd8 *base,
+				      int ring_size)
+{
+	return skip_txbd(bdp, 1, base, ring_size);
+}
+
+/* eTSEC12: csum generation not supported for some fcb offsets */
+static inline bool gfar_csum_errata_12(struct gfar_private *priv,
+				       unsigned long fcb_addr)
+{
+	return (gfar_has_errata(priv, GFAR_ERRATA_12) &&
+	       (fcb_addr % 0x20) > 0x18);
+}
+
+/* eTSEC76: csum generation for frames larger than 2500 may
+ * cause excess delays before start of transmission
+ */
+static inline bool gfar_csum_errata_76(struct gfar_private *priv,
+				       unsigned int len)
+{
+	return (gfar_has_errata(priv, GFAR_ERRATA_76) &&
+	       (len > 2500));
+}
+
+/* This is called by the kernel when a frame is ready for transmission.
+ * It is pointed to by the dev->hard_start_xmit function pointer
+ */
+static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	struct netdev_queue *txq;
+	struct gfar __iomem *regs = NULL;
+	struct txfcb *fcb = NULL;
+	struct txbd8 *txbdp, *txbdp_start, *base, *txbdp_tstamp = NULL;
+	u32 lstatus;
+	skb_frag_t *frag;
+	int i, rq = 0;
+	int do_tstamp, do_csum, do_vlan;
+	u32 bufaddr;
+	unsigned int nr_frags, nr_txbds, bytes_sent, fcb_len = 0;
+
+	rq = skb->queue_mapping;
+	tx_queue = priv->tx_queue[rq];
+	txq = netdev_get_tx_queue(dev, rq);
+	base = tx_queue->tx_bd_base;
+	regs = tx_queue->grp->regs;
+
+	do_csum = (CHECKSUM_PARTIAL == skb->ip_summed);
+	do_vlan = skb_vlan_tag_present(skb);
+	do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+		    priv->hwts_tx_en;
+
+	if (do_csum || do_vlan)
+		fcb_len = GMAC_FCB_LEN;
+
+	/* check if time stamp should be generated */
+	if (unlikely(do_tstamp))
+		fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN;
+
+	/* make space for additional header when fcb is needed */
+	if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) {
+		struct sk_buff *skb_new;
+
+		skb_new = skb_realloc_headroom(skb, fcb_len);
+		if (!skb_new) {
+			dev->stats.tx_errors++;
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_OK;
+		}
+
+		if (skb->sk)
+			skb_set_owner_w(skb_new, skb->sk);
+		dev_consume_skb_any(skb);
+		skb = skb_new;
+	}
+
+	/* total number of fragments in the SKB */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	/* calculate the required number of TxBDs for this skb */
+	if (unlikely(do_tstamp))
+		nr_txbds = nr_frags + 2;
+	else
+		nr_txbds = nr_frags + 1;
+
+	/* check if there is space to queue this packet */
+	if (nr_txbds > tx_queue->num_txbdfree) {
+		/* no space, stop the queue */
+		netif_tx_stop_queue(txq);
+		dev->stats.tx_fifo_errors++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Update transmit stats */
+	bytes_sent = skb->len;
+	tx_queue->stats.tx_bytes += bytes_sent;
+	/* keep Tx bytes on wire for BQL accounting */
+	GFAR_CB(skb)->bytes_sent = bytes_sent;
+	tx_queue->stats.tx_packets++;
+
+	txbdp = txbdp_start = tx_queue->cur_tx;
+	lstatus = be32_to_cpu(txbdp->lstatus);
+
+	/* Add TxPAL between FCB and frame if required */
+	if (unlikely(do_tstamp)) {
+		skb_push(skb, GMAC_TXPAL_LEN);
+		memset(skb->data, 0, GMAC_TXPAL_LEN);
+	}
+
+	/* Add TxFCB if required */
+	if (fcb_len) {
+		fcb = gfar_add_fcb(skb);
+		lstatus |= BD_LFLAG(TXBD_TOE);
+	}
+
+	/* Set up checksumming */
+	if (do_csum) {
+		gfar_tx_checksum(skb, fcb, fcb_len);
+
+		if (unlikely(gfar_csum_errata_12(priv, (unsigned long)fcb)) ||
+		    unlikely(gfar_csum_errata_76(priv, skb->len))) {
+			__skb_pull(skb, GMAC_FCB_LEN);
+			skb_checksum_help(skb);
+			if (do_vlan || do_tstamp) {
+				/* put back a new fcb for vlan/tstamp TOE */
+				fcb = gfar_add_fcb(skb);
+			} else {
+				/* Tx TOE not used */
+				lstatus &= ~(BD_LFLAG(TXBD_TOE));
+				fcb = NULL;
+			}
+		}
+	}
+
+	if (do_vlan)
+		gfar_tx_vlan(skb, fcb);
+
+	bufaddr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
+				 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
+		goto dma_map_err;
+
+	txbdp_start->bufPtr = cpu_to_be32(bufaddr);
+
+	/* Time stamp insertion requires one additional TxBD */
+	if (unlikely(do_tstamp))
+		txbdp_tstamp = txbdp = next_txbd(txbdp, base,
+						 tx_queue->tx_ring_size);
+
+	if (likely(!nr_frags)) {
+		if (likely(!do_tstamp))
+			lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+	} else {
+		u32 lstatus_start = lstatus;
+
+		/* Place the fragment addresses and lengths into the TxBDs */
+		frag = &skb_shinfo(skb)->frags[0];
+		for (i = 0; i < nr_frags; i++, frag++) {
+			unsigned int size;
+
+			/* Point at the next BD, wrapping as needed */
+			txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+
+			size = skb_frag_size(frag);
+
+			lstatus = be32_to_cpu(txbdp->lstatus) | size |
+				  BD_LFLAG(TXBD_READY);
+
+			/* Handle the last BD specially */
+			if (i == nr_frags - 1)
+				lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+
+			bufaddr = skb_frag_dma_map(priv->dev, frag, 0,
+						   size, DMA_TO_DEVICE);
+			if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
+				goto dma_map_err;
+
+			/* set the TxBD length and buffer pointer */
+			txbdp->bufPtr = cpu_to_be32(bufaddr);
+			txbdp->lstatus = cpu_to_be32(lstatus);
+		}
+
+		lstatus = lstatus_start;
+	}
+
+	/* If time stamping is requested one additional TxBD must be set up. The
+	 * first TxBD points to the FCB and must have a data length of
+	 * GMAC_FCB_LEN. The second TxBD points to the actual frame data with
+	 * the full frame length.
+	 */
+	if (unlikely(do_tstamp)) {
+		u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
+
+		bufaddr = be32_to_cpu(txbdp_start->bufPtr);
+		bufaddr += fcb_len;
+
+		lstatus_ts |= BD_LFLAG(TXBD_READY) |
+			      (skb_headlen(skb) - fcb_len);
+		if (!nr_frags)
+			lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+
+		txbdp_tstamp->bufPtr = cpu_to_be32(bufaddr);
+		txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
+		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
+
+		/* Setup tx hardware time stamping */
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		fcb->ptp = 1;
+	} else {
+		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
+	}
+
+	netdev_tx_sent_queue(txq, bytes_sent);
+
+	gfar_wmb();
+
+	txbdp_start->lstatus = cpu_to_be32(lstatus);
+
+	gfar_wmb(); /* force lstatus write before tx_skbuff */
+
+	tx_queue->tx_skbuff[tx_queue->skb_curtx] = skb;
+
+	/* Update the current skb pointer to the next entry we will use
+	 * (wrapping if necessary)
+	 */
+	tx_queue->skb_curtx = (tx_queue->skb_curtx + 1) &
+			      TX_RING_MOD_MASK(tx_queue->tx_ring_size);
+
+	tx_queue->cur_tx = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+
+	/* We can work in parallel with gfar_clean_tx_ring(), except
+	 * when modifying num_txbdfree. Note that we didn't grab the lock
+	 * when we were reading the num_txbdfree and checking for available
+	 * space, that's because outside of this function it can only grow.
+	 */
+	spin_lock_bh(&tx_queue->txlock);
+	/* reduce TxBD free count */
+	tx_queue->num_txbdfree -= (nr_txbds);
+	spin_unlock_bh(&tx_queue->txlock);
+
+	/* If the next BD still needs to be cleaned up, then the bds
+	 * are full.  We need to tell the kernel to stop sending us stuff.
+	 */
+	if (!tx_queue->num_txbdfree) {
+		netif_tx_stop_queue(txq);
+
+		dev->stats.tx_fifo_errors++;
+	}
+
+	/* Tell the DMA to go go go */
+	gfar_write(&regs->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex);
+
+	return NETDEV_TX_OK;
+
+dma_map_err:
+	txbdp = next_txbd(txbdp_start, base, tx_queue->tx_ring_size);
+	if (do_tstamp)
+		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+	for (i = 0; i < nr_frags; i++) {
+		lstatus = be32_to_cpu(txbdp->lstatus);
+		if (!(lstatus & BD_LFLAG(TXBD_READY)))
+			break;
+
+		lstatus &= ~BD_LFLAG(TXBD_READY);
+		txbdp->lstatus = cpu_to_be32(lstatus);
+		bufaddr = be32_to_cpu(txbdp->bufPtr);
+		dma_unmap_page(priv->dev, bufaddr, be16_to_cpu(txbdp->length),
+			       DMA_TO_DEVICE);
+		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+	}
+	gfar_wmb();
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/* Changes the mac address if the controller is not running. */
+static int gfar_set_mac_address(struct net_device *dev)
+{
+	gfar_set_mac_for_addr(dev, 0, dev->dev_addr);
+
+	return 0;
+}
+
+static int gfar_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+
+	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
+		cpu_relax();
+
+	if (dev->flags & IFF_UP)
+		stop_gfar(dev);
+
+	dev->mtu = new_mtu;
+
+	if (dev->flags & IFF_UP)
+		startup_gfar(dev);
+
+	clear_bit_unlock(GFAR_RESETTING, &priv->state);
+
+	return 0;
+}
+
+static void reset_gfar(struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+
+	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
+		cpu_relax();
+
+	stop_gfar(ndev);
+	startup_gfar(ndev);
+
+	clear_bit_unlock(GFAR_RESETTING, &priv->state);
+}
+
+/* gfar_reset_task gets scheduled when a packet has not been
+ * transmitted after a set amount of time.
+ * For now, assume that clearing out all the structures, and
+ * starting over will fix the problem.
+ */
+static void gfar_reset_task(struct work_struct *work)
+{
+	struct gfar_private *priv = container_of(work, struct gfar_private,
+						 reset_task);
+	reset_gfar(priv->ndev);
+}
+
+static void gfar_timeout(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+
+	dev->stats.tx_errors++;
+	schedule_work(&priv->reset_task);
+}
+
+static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	struct gfar_private *priv = netdev_priv(netdev);
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		priv->hwts_tx_en = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
+			return -ERANGE;
+		priv->hwts_tx_en = 1;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		if (priv->hwts_rx_en) {
+			priv->hwts_rx_en = 0;
+			reset_gfar(netdev);
+		}
+		break;
+	default:
+		if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
+			return -ERANGE;
+		if (!priv->hwts_rx_en) {
+			priv->hwts_rx_en = 1;
+			reset_gfar(netdev);
+		}
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	struct gfar_private *priv = netdev_priv(netdev);
+
+	config.flags = 0;
+	config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	config.rx_filter = (priv->hwts_rx_en ?
+			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct phy_device *phydev = dev->phydev;
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (cmd == SIOCSHWTSTAMP)
+		return gfar_hwtstamp_set(dev, rq);
+	if (cmd == SIOCGHWTSTAMP)
+		return gfar_hwtstamp_get(dev, rq);
+
+	if (!phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(phydev, rq, cmd);
+}
+
+/* Interrupt Handler for Transmit complete */
+static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
+{
+	struct net_device *dev = tx_queue->dev;
+	struct netdev_queue *txq;
+	struct gfar_private *priv = netdev_priv(dev);
+	struct txbd8 *bdp, *next = NULL;
+	struct txbd8 *lbdp = NULL;
+	struct txbd8 *base = tx_queue->tx_bd_base;
+	struct sk_buff *skb;
+	int skb_dirtytx;
+	int tx_ring_size = tx_queue->tx_ring_size;
+	int frags = 0, nr_txbds = 0;
+	int i;
+	int howmany = 0;
+	int tqi = tx_queue->qindex;
+	unsigned int bytes_sent = 0;
+	u32 lstatus;
+	size_t buflen;
+
+	txq = netdev_get_tx_queue(dev, tqi);
+	bdp = tx_queue->dirty_tx;
+	skb_dirtytx = tx_queue->skb_dirtytx;
+
+	while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
+
+		frags = skb_shinfo(skb)->nr_frags;
+
+		/* When time stamping, one additional TxBD must be freed.
+		 * Also, we need to dma_unmap_single() the TxPAL.
+		 */
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
+			nr_txbds = frags + 2;
+		else
+			nr_txbds = frags + 1;
+
+		lbdp = skip_txbd(bdp, nr_txbds - 1, base, tx_ring_size);
+
+		lstatus = be32_to_cpu(lbdp->lstatus);
+
+		/* Only clean completed frames */
+		if ((lstatus & BD_LFLAG(TXBD_READY)) &&
+		    (lstatus & BD_LENGTH_MASK))
+			break;
+
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+			next = next_txbd(bdp, base, tx_ring_size);
+			buflen = be16_to_cpu(next->length) +
+				 GMAC_FCB_LEN + GMAC_TXPAL_LEN;
+		} else
+			buflen = be16_to_cpu(bdp->length);
+
+		dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
+				 buflen, DMA_TO_DEVICE);
+
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+			struct skb_shared_hwtstamps shhwtstamps;
+			u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
+					  ~0x7UL);
+
+			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+			shhwtstamps.hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
+			skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
+			skb_tstamp_tx(skb, &shhwtstamps);
+			gfar_clear_txbd_status(bdp);
+			bdp = next;
+		}
+
+		gfar_clear_txbd_status(bdp);
+		bdp = next_txbd(bdp, base, tx_ring_size);
+
+		for (i = 0; i < frags; i++) {
+			dma_unmap_page(priv->dev, be32_to_cpu(bdp->bufPtr),
+				       be16_to_cpu(bdp->length),
+				       DMA_TO_DEVICE);
+			gfar_clear_txbd_status(bdp);
+			bdp = next_txbd(bdp, base, tx_ring_size);
+		}
+
+		bytes_sent += GFAR_CB(skb)->bytes_sent;
+
+		dev_kfree_skb_any(skb);
+
+		tx_queue->tx_skbuff[skb_dirtytx] = NULL;
+
+		skb_dirtytx = (skb_dirtytx + 1) &
+			      TX_RING_MOD_MASK(tx_ring_size);
+
+		howmany++;
+		spin_lock(&tx_queue->txlock);
+		tx_queue->num_txbdfree += nr_txbds;
+		spin_unlock(&tx_queue->txlock);
+	}
+
+	/* If we freed a buffer, we can restart transmission, if necessary */
+	if (tx_queue->num_txbdfree &&
+	    netif_tx_queue_stopped(txq) &&
+	    !(test_bit(GFAR_DOWN, &priv->state)))
+		netif_wake_subqueue(priv->ndev, tqi);
+
+	/* Update dirty indicators */
+	tx_queue->skb_dirtytx = skb_dirtytx;
+	tx_queue->dirty_tx = bdp;
+
+	netdev_tx_completed_queue(txq, howmany, bytes_sent);
+}
+
+static void count_errors(u32 lstatus, struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct gfar_extra_stats *estats = &priv->extra_stats;
+
+	/* If the packet was truncated, none of the other errors matter */
+	if (lstatus & BD_LFLAG(RXBD_TRUNCATED)) {
+		stats->rx_length_errors++;
+
+		atomic64_inc(&estats->rx_trunc);
+
+		return;
+	}
+	/* Count the errors, if there were any */
+	if (lstatus & BD_LFLAG(RXBD_LARGE | RXBD_SHORT)) {
+		stats->rx_length_errors++;
+
+		if (lstatus & BD_LFLAG(RXBD_LARGE))
+			atomic64_inc(&estats->rx_large);
+		else
+			atomic64_inc(&estats->rx_short);
+	}
+	if (lstatus & BD_LFLAG(RXBD_NONOCTET)) {
+		stats->rx_frame_errors++;
+		atomic64_inc(&estats->rx_nonoctet);
+	}
+	if (lstatus & BD_LFLAG(RXBD_CRCERR)) {
+		atomic64_inc(&estats->rx_crcerr);
+		stats->rx_crc_errors++;
+	}
+	if (lstatus & BD_LFLAG(RXBD_OVERRUN)) {
+		atomic64_inc(&estats->rx_overrun);
+		stats->rx_over_errors++;
+	}
+}
+
+static irqreturn_t gfar_receive(int irq, void *grp_id)
+{
+	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
+	unsigned long flags;
+	u32 imask, ievent;
+
+	ievent = gfar_read(&grp->regs->ievent);
+
+	if (unlikely(ievent & IEVENT_FGPI)) {
+		gfar_write(&grp->regs->ievent, IEVENT_FGPI);
+		return IRQ_HANDLED;
+	}
+
+	if (likely(napi_schedule_prep(&grp->napi_rx))) {
+		spin_lock_irqsave(&grp->grplock, flags);
+		imask = gfar_read(&grp->regs->imask);
+		imask &= IMASK_RX_DISABLED;
+		gfar_write(&grp->regs->imask, imask);
+		spin_unlock_irqrestore(&grp->grplock, flags);
+		__napi_schedule(&grp->napi_rx);
+	} else {
+		/* Clear IEVENT, so interrupts aren't called again
+		 * because of the packets that have already arrived.
+		 */
+		gfar_write(&grp->regs->ievent, IEVENT_RX_MASK);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* Interrupt Handler for Transmit complete */
+static irqreturn_t gfar_transmit(int irq, void *grp_id)
+{
+	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
+	unsigned long flags;
+	u32 imask;
+
+	if (likely(napi_schedule_prep(&grp->napi_tx))) {
+		spin_lock_irqsave(&grp->grplock, flags);
+		imask = gfar_read(&grp->regs->imask);
+		imask &= IMASK_TX_DISABLED;
+		gfar_write(&grp->regs->imask, imask);
+		spin_unlock_irqrestore(&grp->grplock, flags);
+		__napi_schedule(&grp->napi_tx);
+	} else {
+		/* Clear IEVENT, so interrupts aren't called again
+		 * because of the packets that have already arrived.
+		 */
+		gfar_write(&grp->regs->ievent, IEVENT_TX_MASK);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
+			     struct sk_buff *skb, bool first)
+{
+	int size = lstatus & BD_LENGTH_MASK;
+	struct page *page = rxb->page;
+
+	if (likely(first)) {
+		skb_put(skb, size);
+	} else {
+		/* the last fragments' length contains the full frame length */
+		if (lstatus & BD_LFLAG(RXBD_LAST))
+			size -= skb->len;
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				rxb->page_offset + RXBUF_ALIGNMENT,
+				size, GFAR_RXB_TRUESIZE);
+	}
+
+	/* try reuse page */
+	if (unlikely(page_count(page) != 1 || page_is_pfmemalloc(page)))
+		return false;
+
+	/* change offset to the other half */
+	rxb->page_offset ^= GFAR_RXB_TRUESIZE;
+
+	page_ref_inc(page);
+
+	return true;
+}
+
+static void gfar_reuse_rx_page(struct gfar_priv_rx_q *rxq,
+			       struct gfar_rx_buff *old_rxb)
+{
+	struct gfar_rx_buff *new_rxb;
+	u16 nta = rxq->next_to_alloc;
+
+	new_rxb = &rxq->rx_buff[nta];
+
+	/* find next buf that can reuse a page */
+	nta++;
+	rxq->next_to_alloc = (nta < rxq->rx_ring_size) ? nta : 0;
+
+	/* copy page reference */
+	*new_rxb = *old_rxb;
+
+	/* sync for use by the device */
+	dma_sync_single_range_for_device(rxq->dev, old_rxb->dma,
+					 old_rxb->page_offset,
+					 GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
+}
+
+static struct sk_buff *gfar_get_next_rxbuff(struct gfar_priv_rx_q *rx_queue,
+					    u32 lstatus, struct sk_buff *skb)
+{
+	struct gfar_rx_buff *rxb = &rx_queue->rx_buff[rx_queue->next_to_clean];
+	struct page *page = rxb->page;
+	bool first = false;
+
+	if (likely(!skb)) {
+		void *buff_addr = page_address(page) + rxb->page_offset;
+
+		skb = build_skb(buff_addr, GFAR_SKBFRAG_SIZE);
+		if (unlikely(!skb)) {
+			gfar_rx_alloc_err(rx_queue);
+			return NULL;
+		}
+		skb_reserve(skb, RXBUF_ALIGNMENT);
+		first = true;
+	}
+
+	dma_sync_single_range_for_cpu(rx_queue->dev, rxb->dma, rxb->page_offset,
+				      GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
+
+	if (gfar_add_rx_frag(rxb, lstatus, skb, first)) {
+		/* reuse the free half of the page */
+		gfar_reuse_rx_page(rx_queue, rxb);
+	} else {
+		/* page cannot be reused, unmap it */
+		dma_unmap_page(rx_queue->dev, rxb->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+
+	/* clear rxb content */
+	rxb->page = NULL;
+
+	return skb;
+}
+
+static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb)
+{
+	/* If valid headers were found, and valid sums
+	 * were verified, then we tell the kernel that no
+	 * checksumming is necessary.  Otherwise, it is [FIXME]
+	 */
+	if ((be16_to_cpu(fcb->flags) & RXFCB_CSUM_MASK) ==
+	    (RXFCB_CIP | RXFCB_CTU))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb_checksum_none_assert(skb);
+}
+
+/* gfar_process_frame() -- handle one incoming packet if skb isn't NULL. */
+static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct rxfcb *fcb = NULL;
+
+	/* fcb is at the beginning if exists */
+	fcb = (struct rxfcb *)skb->data;
+
+	/* Remove the FCB from the skb
+	 * Remove the padded bytes, if there are any
+	 */
+	if (priv->uses_rxfcb)
+		skb_pull(skb, GMAC_FCB_LEN);
+
+	/* Get receive timestamp from the skb */
+	if (priv->hwts_rx_en) {
+		struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+		u64 *ns = (u64 *) skb->data;
+
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+		shhwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
+	}
+
+	if (priv->padding)
+		skb_pull(skb, priv->padding);
+
+	/* Trim off the FCS */
+	pskb_trim(skb, skb->len - ETH_FCS_LEN);
+
+	if (ndev->features & NETIF_F_RXCSUM)
+		gfar_rx_checksum(skb, fcb);
+
+	/* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
+	 * Even if vlan rx accel is disabled, on some chips
+	 * RXFCB_VLN is pseudo randomly set.
+	 */
+	if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+	    be16_to_cpu(fcb->flags) & RXFCB_VLN)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       be16_to_cpu(fcb->vlctl));
+}
+
+/* gfar_clean_rx_ring() -- Processes each frame in the rx ring
+ * until the budget/quota has been reached. Returns the number
+ * of frames handled
+ */
+static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue,
+			      int rx_work_limit)
+{
+	struct net_device *ndev = rx_queue->ndev;
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct rxbd8 *bdp;
+	int i, howmany = 0;
+	struct sk_buff *skb = rx_queue->skb;
+	int cleaned_cnt = gfar_rxbd_unused(rx_queue);
+	unsigned int total_bytes = 0, total_pkts = 0;
+
+	/* Get the first full descriptor */
+	i = rx_queue->next_to_clean;
+
+	while (rx_work_limit--) {
+		u32 lstatus;
+
+		if (cleaned_cnt >= GFAR_RX_BUFF_ALLOC) {
+			gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
+			cleaned_cnt = 0;
+		}
+
+		bdp = &rx_queue->rx_bd_base[i];
+		lstatus = be32_to_cpu(bdp->lstatus);
+		if (lstatus & BD_LFLAG(RXBD_EMPTY))
+			break;
+
+		/* order rx buffer descriptor reads */
+		rmb();
+
+		/* fetch next to clean buffer from the ring */
+		skb = gfar_get_next_rxbuff(rx_queue, lstatus, skb);
+		if (unlikely(!skb))
+			break;
+
+		cleaned_cnt++;
+		howmany++;
+
+		if (unlikely(++i == rx_queue->rx_ring_size))
+			i = 0;
+
+		rx_queue->next_to_clean = i;
+
+		/* fetch next buffer if not the last in frame */
+		if (!(lstatus & BD_LFLAG(RXBD_LAST)))
+			continue;
+
+		if (unlikely(lstatus & BD_LFLAG(RXBD_ERR))) {
+			count_errors(lstatus, ndev);
+
+			/* discard faulty buffer */
+			dev_kfree_skb(skb);
+			skb = NULL;
+			rx_queue->stats.rx_dropped++;
+			continue;
+		}
+
+		gfar_process_frame(ndev, skb);
+
+		/* Increment the number of packets */
+		total_pkts++;
+		total_bytes += skb->len;
+
+		skb_record_rx_queue(skb, rx_queue->qindex);
+
+		skb->protocol = eth_type_trans(skb, ndev);
+
+		/* Send the packet up the stack */
+		napi_gro_receive(&rx_queue->grp->napi_rx, skb);
+
+		skb = NULL;
+	}
+
+	/* Store incomplete frames for completion */
+	rx_queue->skb = skb;
+
+	rx_queue->stats.rx_packets += total_pkts;
+	rx_queue->stats.rx_bytes += total_bytes;
+
+	if (cleaned_cnt)
+		gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
+
+	/* Update Last Free RxBD pointer for LFC */
+	if (unlikely(priv->tx_actual_en)) {
+		u32 bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
+
+		gfar_write(rx_queue->rfbptr, bdp_dma);
+	}
+
+	return howmany;
+}
+
+static int gfar_poll_rx_sq(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_rx);
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_rx_q *rx_queue = gfargrp->rx_queue;
+	int work_done = 0;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_RX_MASK);
+
+	work_done = gfar_clean_rx_ring(rx_queue, budget);
+
+	if (work_done < budget) {
+		u32 imask;
+		napi_complete_done(napi, work_done);
+		/* Clear the halt bit in RSTAT */
+		gfar_write(&regs->rstat, gfargrp->rstat);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_RX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
+
+	return work_done;
+}
+
+static int gfar_poll_tx_sq(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_tx);
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_tx_q *tx_queue = gfargrp->tx_queue;
+	u32 imask;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_TX_MASK);
+
+	/* run Tx cleanup to completion */
+	if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx])
+		gfar_clean_tx_ring(tx_queue);
+
+	napi_complete(napi);
+
+	spin_lock_irq(&gfargrp->grplock);
+	imask = gfar_read(&regs->imask);
+	imask |= IMASK_TX_DEFAULT;
+	gfar_write(&regs->imask, imask);
+	spin_unlock_irq(&gfargrp->grplock);
+
+	return 0;
+}
+
+static int gfar_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_rx);
+	struct gfar_private *priv = gfargrp->priv;
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+	int work_done = 0, work_done_per_q = 0;
+	int i, budget_per_q = 0;
+	unsigned long rstat_rxf;
+	int num_act_queues;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_RX_MASK);
+
+	rstat_rxf = gfar_read(&regs->rstat) & RSTAT_RXF_MASK;
+
+	num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS);
+	if (num_act_queues)
+		budget_per_q = budget/num_act_queues;
+
+	for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
+		/* skip queue if not active */
+		if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i)))
+			continue;
+
+		rx_queue = priv->rx_queue[i];
+		work_done_per_q =
+			gfar_clean_rx_ring(rx_queue, budget_per_q);
+		work_done += work_done_per_q;
+
+		/* finished processing this queue */
+		if (work_done_per_q < budget_per_q) {
+			/* clear active queue hw indication */
+			gfar_write(&regs->rstat,
+				   RSTAT_CLEAR_RXF0 >> i);
+			num_act_queues--;
+
+			if (!num_act_queues)
+				break;
+		}
+	}
+
+	if (!num_act_queues) {
+		u32 imask;
+		napi_complete_done(napi, work_done);
+
+		/* Clear the halt bit in RSTAT */
+		gfar_write(&regs->rstat, gfargrp->rstat);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_RX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
+
+	return work_done;
+}
+
+static int gfar_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_tx);
+	struct gfar_private *priv = gfargrp->priv;
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	int has_tx_work = 0;
+	int i;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_TX_MASK);
+
+	for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) {
+		tx_queue = priv->tx_queue[i];
+		/* run Tx cleanup to completion */
+		if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) {
+			gfar_clean_tx_ring(tx_queue);
+			has_tx_work = 1;
+		}
+	}
+
+	if (!has_tx_work) {
+		u32 imask;
+		napi_complete(napi);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_TX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
+
+	return 0;
+}
+
+/* GFAR error interrupt handler */
+static irqreturn_t gfar_error(int irq, void *grp_id)
+{
+	struct gfar_priv_grp *gfargrp = grp_id;
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_private *priv= gfargrp->priv;
+	struct net_device *dev = priv->ndev;
+
+	/* Save ievent for future reference */
+	u32 events = gfar_read(&regs->ievent);
+
+	/* Clear IEVENT */
+	gfar_write(&regs->ievent, events & IEVENT_ERR_MASK);
+
+	/* Magic Packet is not an error. */
+	if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
+	    (events & IEVENT_MAG))
+		events &= ~IEVENT_MAG;
+
+	/* Hmm... */
+	if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv))
+		netdev_dbg(dev,
+			   "error interrupt (ievent=0x%08x imask=0x%08x)\n",
+			   events, gfar_read(&regs->imask));
+
+	/* Update the error counters */
+	if (events & IEVENT_TXE) {
+		dev->stats.tx_errors++;
+
+		if (events & IEVENT_LC)
+			dev->stats.tx_window_errors++;
+		if (events & IEVENT_CRL)
+			dev->stats.tx_aborted_errors++;
+		if (events & IEVENT_XFUN) {
+			netif_dbg(priv, tx_err, dev,
+				  "TX FIFO underrun, packet dropped\n");
+			dev->stats.tx_dropped++;
+			atomic64_inc(&priv->extra_stats.tx_underrun);
+
+			schedule_work(&priv->reset_task);
+		}
+		netif_dbg(priv, tx_err, dev, "Transmit Error\n");
+	}
+	if (events & IEVENT_BSY) {
+		dev->stats.rx_over_errors++;
+		atomic64_inc(&priv->extra_stats.rx_bsy);
+
+		netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
+			  gfar_read(&regs->rstat));
+	}
+	if (events & IEVENT_BABR) {
+		dev->stats.rx_errors++;
+		atomic64_inc(&priv->extra_stats.rx_babr);
+
+		netif_dbg(priv, rx_err, dev, "babbling RX error\n");
+	}
+	if (events & IEVENT_EBERR) {
+		atomic64_inc(&priv->extra_stats.eberr);
+		netif_dbg(priv, rx_err, dev, "bus error\n");
+	}
+	if (events & IEVENT_RXC)
+		netif_dbg(priv, rx_status, dev, "control frame\n");
+
+	if (events & IEVENT_BABT) {
+		atomic64_inc(&priv->extra_stats.tx_babt);
+		netif_dbg(priv, tx_err, dev, "babbling TX error\n");
+	}
+	return IRQ_HANDLED;
+}
+
+/* The interrupt handler for devices with one interrupt */
+static irqreturn_t gfar_interrupt(int irq, void *grp_id)
+{
+	struct gfar_priv_grp *gfargrp = grp_id;
+
+	/* Save ievent for future reference */
+	u32 events = gfar_read(&gfargrp->regs->ievent);
+
+	/* Check for reception */
+	if (events & IEVENT_RX_MASK)
+		gfar_receive(irq, grp_id);
+
+	/* Check for transmit completion */
+	if (events & IEVENT_TX_MASK)
+		gfar_transmit(irq, grp_id);
+
+	/* Check for errors */
+	if (events & IEVENT_ERR_MASK)
+		gfar_error(irq, grp_id);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void gfar_netpoll(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	int i;
+
+	/* If the device has multiple interrupts, run tx/rx */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+		for (i = 0; i < priv->num_grps; i++) {
+			struct gfar_priv_grp *grp = &priv->gfargrp[i];
+
+			disable_irq(gfar_irq(grp, TX)->irq);
+			disable_irq(gfar_irq(grp, RX)->irq);
+			disable_irq(gfar_irq(grp, ER)->irq);
+			gfar_interrupt(gfar_irq(grp, TX)->irq, grp);
+			enable_irq(gfar_irq(grp, ER)->irq);
+			enable_irq(gfar_irq(grp, RX)->irq);
+			enable_irq(gfar_irq(grp, TX)->irq);
+		}
+	} else {
+		for (i = 0; i < priv->num_grps; i++) {
+			struct gfar_priv_grp *grp = &priv->gfargrp[i];
+
+			disable_irq(gfar_irq(grp, TX)->irq);
+			gfar_interrupt(gfar_irq(grp, TX)->irq, grp);
+			enable_irq(gfar_irq(grp, TX)->irq);
+		}
+	}
+}
+#endif
+
+static void free_grp_irqs(struct gfar_priv_grp *grp)
+{
+	free_irq(gfar_irq(grp, TX)->irq, grp);
+	free_irq(gfar_irq(grp, RX)->irq, grp);
+	free_irq(gfar_irq(grp, ER)->irq, grp);
+}
+
+static int register_grp_irqs(struct gfar_priv_grp *grp)
+{
+	struct gfar_private *priv = grp->priv;
+	struct net_device *dev = priv->ndev;
+	int err;
+
+	/* If the device has multiple interrupts, register for
+	 * them.  Otherwise, only register for the one
+	 */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+		/* Install our interrupt handlers for Error,
+		 * Transmit, and Receive
+		 */
+		err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0,
+				  gfar_irq(grp, ER)->name, grp);
+		if (err < 0) {
+			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+				  gfar_irq(grp, ER)->irq);
+
+			goto err_irq_fail;
+		}
+		enable_irq_wake(gfar_irq(grp, ER)->irq);
+
+		err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0,
+				  gfar_irq(grp, TX)->name, grp);
+		if (err < 0) {
+			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+				  gfar_irq(grp, TX)->irq);
+			goto tx_irq_fail;
+		}
+		err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0,
+				  gfar_irq(grp, RX)->name, grp);
+		if (err < 0) {
+			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+				  gfar_irq(grp, RX)->irq);
+			goto rx_irq_fail;
+		}
+		enable_irq_wake(gfar_irq(grp, RX)->irq);
+
+	} else {
+		err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0,
+				  gfar_irq(grp, TX)->name, grp);
+		if (err < 0) {
+			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+				  gfar_irq(grp, TX)->irq);
+			goto err_irq_fail;
+		}
+		enable_irq_wake(gfar_irq(grp, TX)->irq);
+	}
+
+	return 0;
+
+rx_irq_fail:
+	free_irq(gfar_irq(grp, TX)->irq, grp);
+tx_irq_fail:
+	free_irq(gfar_irq(grp, ER)->irq, grp);
+err_irq_fail:
+	return err;
+
+}
+
+static void gfar_free_irq(struct gfar_private *priv)
+{
+	int i;
+
+	/* Free the IRQs */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+		for (i = 0; i < priv->num_grps; i++)
+			free_grp_irqs(&priv->gfargrp[i]);
+	} else {
+		for (i = 0; i < priv->num_grps; i++)
+			free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
+				 &priv->gfargrp[i]);
+	}
+}
+
+static int gfar_request_irq(struct gfar_private *priv)
+{
+	int err, i, j;
+
+	for (i = 0; i < priv->num_grps; i++) {
+		err = register_grp_irqs(&priv->gfargrp[i]);
+		if (err) {
+			for (j = 0; j < i; j++)
+				free_grp_irqs(&priv->gfargrp[j]);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/* Called when something needs to use the ethernet device
+ * Returns 0 for success.
+ */
+static int gfar_enet_open(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	int err;
+
+	err = init_phy(dev);
+	if (err)
+		return err;
+
+	err = gfar_request_irq(priv);
+	if (err)
+		return err;
+
+	err = startup_gfar(dev);
+	if (err)
+		return err;
+
+	return err;
+}
+
+/* Stops the kernel queue, and halts the controller */
+static int gfar_close(struct net_device *dev)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+
+	cancel_work_sync(&priv->reset_task);
+	stop_gfar(dev);
+
+	/* Disconnect from the PHY */
+	phy_disconnect(dev->phydev);
+
+	gfar_free_irq(priv);
+
+	return 0;
+}
+
+/* Clears each of the exact match registers to zero, so they
+ * don't interfere with normal reception
+ */
+static void gfar_clear_exact_match(struct net_device *dev)
+{
+	int idx;
+	static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
+
+	for (idx = 1; idx < GFAR_EM_NUM + 1; idx++)
+		gfar_set_mac_for_addr(dev, idx, zero_arr);
+}
+
+/* Update the hash table based on the current list of multicast
+ * addresses we subscribe to.  Also, change the promiscuity of
+ * the device based on the flags (this function is called
+ * whenever dev->flags is changed
+ */
+static void gfar_set_multi(struct net_device *dev)
+{
+	struct netdev_hw_addr *ha;
+	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+
+	if (dev->flags & IFF_PROMISC) {
+		/* Set RCTRL to PROM */
+		tempval = gfar_read(&regs->rctrl);
+		tempval |= RCTRL_PROM;
+		gfar_write(&regs->rctrl, tempval);
+	} else {
+		/* Set RCTRL to not PROM */
+		tempval = gfar_read(&regs->rctrl);
+		tempval &= ~(RCTRL_PROM);
+		gfar_write(&regs->rctrl, tempval);
+	}
+
+	if (dev->flags & IFF_ALLMULTI) {
+		/* Set the hash to rx all multicast frames */
+		gfar_write(&regs->igaddr0, 0xffffffff);
+		gfar_write(&regs->igaddr1, 0xffffffff);
+		gfar_write(&regs->igaddr2, 0xffffffff);
+		gfar_write(&regs->igaddr3, 0xffffffff);
+		gfar_write(&regs->igaddr4, 0xffffffff);
+		gfar_write(&regs->igaddr5, 0xffffffff);
+		gfar_write(&regs->igaddr6, 0xffffffff);
+		gfar_write(&regs->igaddr7, 0xffffffff);
+		gfar_write(&regs->gaddr0, 0xffffffff);
+		gfar_write(&regs->gaddr1, 0xffffffff);
+		gfar_write(&regs->gaddr2, 0xffffffff);
+		gfar_write(&regs->gaddr3, 0xffffffff);
+		gfar_write(&regs->gaddr4, 0xffffffff);
+		gfar_write(&regs->gaddr5, 0xffffffff);
+		gfar_write(&regs->gaddr6, 0xffffffff);
+		gfar_write(&regs->gaddr7, 0xffffffff);
+	} else {
+		int em_num;
+		int idx;
+
+		/* zero out the hash */
+		gfar_write(&regs->igaddr0, 0x0);
+		gfar_write(&regs->igaddr1, 0x0);
+		gfar_write(&regs->igaddr2, 0x0);
+		gfar_write(&regs->igaddr3, 0x0);
+		gfar_write(&regs->igaddr4, 0x0);
+		gfar_write(&regs->igaddr5, 0x0);
+		gfar_write(&regs->igaddr6, 0x0);
+		gfar_write(&regs->igaddr7, 0x0);
+		gfar_write(&regs->gaddr0, 0x0);
+		gfar_write(&regs->gaddr1, 0x0);
+		gfar_write(&regs->gaddr2, 0x0);
+		gfar_write(&regs->gaddr3, 0x0);
+		gfar_write(&regs->gaddr4, 0x0);
+		gfar_write(&regs->gaddr5, 0x0);
+		gfar_write(&regs->gaddr6, 0x0);
+		gfar_write(&regs->gaddr7, 0x0);
+
+		/* If we have extended hash tables, we need to
+		 * clear the exact match registers to prepare for
+		 * setting them
+		 */
+		if (priv->extended_hash) {
+			em_num = GFAR_EM_NUM + 1;
+			gfar_clear_exact_match(dev);
+			idx = 1;
+		} else {
+			idx = 0;
+			em_num = 0;
+		}
+
+		if (netdev_mc_empty(dev))
+			return;
+
+		/* Parse the list, and set the appropriate bits */
+		netdev_for_each_mc_addr(ha, dev) {
+			if (idx < em_num) {
+				gfar_set_mac_for_addr(dev, idx, ha->addr);
+				idx++;
+			} else
+				gfar_set_hash_for_addr(dev, ha->addr);
+		}
+	}
+}
+
 void gfar_mac_reset(struct gfar_private *priv)
 {
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
@@ -1267,45 +3274,23 @@
 		gfar_write_isrg(priv);
 }
 
-static void gfar_init_addr_hash_table(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
-		priv->extended_hash = 1;
-		priv->hash_width = 9;
-
-		priv->hash_regs[0] = &regs->igaddr0;
-		priv->hash_regs[1] = &regs->igaddr1;
-		priv->hash_regs[2] = &regs->igaddr2;
-		priv->hash_regs[3] = &regs->igaddr3;
-		priv->hash_regs[4] = &regs->igaddr4;
-		priv->hash_regs[5] = &regs->igaddr5;
-		priv->hash_regs[6] = &regs->igaddr6;
-		priv->hash_regs[7] = &regs->igaddr7;
-		priv->hash_regs[8] = &regs->gaddr0;
-		priv->hash_regs[9] = &regs->gaddr1;
-		priv->hash_regs[10] = &regs->gaddr2;
-		priv->hash_regs[11] = &regs->gaddr3;
-		priv->hash_regs[12] = &regs->gaddr4;
-		priv->hash_regs[13] = &regs->gaddr5;
-		priv->hash_regs[14] = &regs->gaddr6;
-		priv->hash_regs[15] = &regs->gaddr7;
-
-	} else {
-		priv->extended_hash = 0;
-		priv->hash_width = 8;
-
-		priv->hash_regs[0] = &regs->gaddr0;
-		priv->hash_regs[1] = &regs->gaddr1;
-		priv->hash_regs[2] = &regs->gaddr2;
-		priv->hash_regs[3] = &regs->gaddr3;
-		priv->hash_regs[4] = &regs->gaddr4;
-		priv->hash_regs[5] = &regs->gaddr5;
-		priv->hash_regs[6] = &regs->gaddr6;
-		priv->hash_regs[7] = &regs->gaddr7;
-	}
-}
+static const struct net_device_ops gfar_netdev_ops = {
+	.ndo_open = gfar_enet_open,
+	.ndo_start_xmit = gfar_start_xmit,
+	.ndo_stop = gfar_close,
+	.ndo_change_mtu = gfar_change_mtu,
+	.ndo_set_features = gfar_set_features,
+	.ndo_set_rx_mode = gfar_set_multi,
+	.ndo_tx_timeout = gfar_timeout,
+	.ndo_do_ioctl = gfar_ioctl,
+	.ndo_get_stats = gfar_get_stats,
+	.ndo_change_carrier = fixed_phy_change_carrier,
+	.ndo_set_mac_address = gfar_set_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = gfar_netpoll,
+#endif
+};
 
 /* Set up the ethernet device structure, private data,
  * and anything else we need before we start
@@ -1736,2037 +3721,6 @@
 
 #endif
 
-/* Reads the controller's registers to determine what interface
- * connects it to the PHY.
- */
-static phy_interface_t gfar_get_interface(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 ecntrl;
-
-	ecntrl = gfar_read(&regs->ecntrl);
-
-	if (ecntrl & ECNTRL_SGMII_MODE)
-		return PHY_INTERFACE_MODE_SGMII;
-
-	if (ecntrl & ECNTRL_TBI_MODE) {
-		if (ecntrl & ECNTRL_REDUCED_MODE)
-			return PHY_INTERFACE_MODE_RTBI;
-		else
-			return PHY_INTERFACE_MODE_TBI;
-	}
-
-	if (ecntrl & ECNTRL_REDUCED_MODE) {
-		if (ecntrl & ECNTRL_REDUCED_MII_MODE) {
-			return PHY_INTERFACE_MODE_RMII;
-		}
-		else {
-			phy_interface_t interface = priv->interface;
-
-			/* This isn't autodetected right now, so it must
-			 * be set by the device tree or platform code.
-			 */
-			if (interface == PHY_INTERFACE_MODE_RGMII_ID)
-				return PHY_INTERFACE_MODE_RGMII_ID;
-
-			return PHY_INTERFACE_MODE_RGMII;
-		}
-	}
-
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
-		return PHY_INTERFACE_MODE_GMII;
-
-	return PHY_INTERFACE_MODE_MII;
-}
-
-
-/* Initializes driver's PHY state, and attaches to the PHY.
- * Returns 0 on success.
- */
-static int init_phy(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	uint gigabit_support =
-		priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-		GFAR_SUPPORTED_GBIT : 0;
-	phy_interface_t interface;
-	struct phy_device *phydev;
-	struct ethtool_eee edata;
-
-	priv->oldlink = 0;
-	priv->oldspeed = 0;
-	priv->oldduplex = -1;
-
-	interface = gfar_get_interface(dev);
-
-	phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
-				interface);
-	if (!phydev) {
-		dev_err(&dev->dev, "could not attach to PHY\n");
-		return -ENODEV;
-	}
-
-	if (interface == PHY_INTERFACE_MODE_SGMII)
-		gfar_configure_serdes(dev);
-
-	/* Remove any features not supported by the controller */
-	phydev->supported &= (GFAR_SUPPORTED | gigabit_support);
-	phydev->advertising = phydev->supported;
-
-	/* Add support for flow control, but don't advertise it by default */
-	phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-
-	/* disable EEE autoneg, EEE not supported by eTSEC */
-	memset(&edata, 0, sizeof(struct ethtool_eee));
-	phy_ethtool_set_eee(phydev, &edata);
-
-	return 0;
-}
-
-/* Initialize TBI PHY interface for communicating with the
- * SERDES lynx PHY on the chip.  We communicate with this PHY
- * through the MDIO bus on each controller, treating it as a
- * "normal" PHY at the address found in the TBIPA register.  We assume
- * that the TBIPA register is valid.  Either the MDIO bus code will set
- * it to a value that doesn't conflict with other PHYs on the bus, or the
- * value doesn't matter, as there are no other PHYs on the bus.
- */
-static void gfar_configure_serdes(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct phy_device *tbiphy;
-
-	if (!priv->tbi_node) {
-		dev_warn(&dev->dev, "error: SGMII mode requires that the "
-				    "device tree specify a tbi-handle\n");
-		return;
-	}
-
-	tbiphy = of_phy_find_device(priv->tbi_node);
-	if (!tbiphy) {
-		dev_err(&dev->dev, "error: Could not get TBI device\n");
-		return;
-	}
-
-	/* If the link is already up, we must already be ok, and don't need to
-	 * configure and reset the TBI<->SerDes link.  Maybe U-Boot configured
-	 * everything for us?  Resetting it takes the link down and requires
-	 * several seconds for it to come back.
-	 */
-	if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) {
-		put_device(&tbiphy->mdio.dev);
-		return;
-	}
-
-	/* Single clk mode, mii mode off(for serdes communication) */
-	phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT);
-
-	phy_write(tbiphy, MII_ADVERTISE,
-		  ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
-		  ADVERTISE_1000XPSE_ASYM);
-
-	phy_write(tbiphy, MII_BMCR,
-		  BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX |
-		  BMCR_SPEED1000);
-
-	put_device(&tbiphy->mdio.dev);
-}
-
-static int __gfar_is_rx_idle(struct gfar_private *priv)
-{
-	u32 res;
-
-	/* Normaly TSEC should not hang on GRS commands, so we should
-	 * actually wait for IEVENT_GRSC flag.
-	 */
-	if (!gfar_has_errata(priv, GFAR_ERRATA_A002))
-		return 0;
-
-	/* Read the eTSEC register at offset 0xD1C. If bits 7-14 are
-	 * the same as bits 23-30, the eTSEC Rx is assumed to be idle
-	 * and the Rx can be safely reset.
-	 */
-	res = gfar_read((void __iomem *)priv->gfargrp[0].regs + 0xd1c);
-	res &= 0x7f807f80;
-	if ((res & 0xffff) == (res >> 16))
-		return 1;
-
-	return 0;
-}
-
-/* Halt the receive and transmit queues */
-static void gfar_halt_nodisable(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 tempval;
-	unsigned int timeout;
-	int stopped;
-
-	gfar_ints_disable(priv);
-
-	if (gfar_is_dma_stopped(priv))
-		return;
-
-	/* Stop the DMA, and wait for it to stop */
-	tempval = gfar_read(&regs->dmactrl);
-	tempval |= (DMACTRL_GRS | DMACTRL_GTS);
-	gfar_write(&regs->dmactrl, tempval);
-
-retry:
-	timeout = 1000;
-	while (!(stopped = gfar_is_dma_stopped(priv)) && timeout) {
-		cpu_relax();
-		timeout--;
-	}
-
-	if (!timeout)
-		stopped = gfar_is_dma_stopped(priv);
-
-	if (!stopped && !gfar_is_rx_dma_stopped(priv) &&
-	    !__gfar_is_rx_idle(priv))
-		goto retry;
-}
-
-/* Halt the receive and transmit queues */
-void gfar_halt(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 tempval;
-
-	/* Dissable the Rx/Tx hw queues */
-	gfar_write(&regs->rqueue, 0);
-	gfar_write(&regs->tqueue, 0);
-
-	mdelay(10);
-
-	gfar_halt_nodisable(priv);
-
-	/* Disable Rx/Tx DMA */
-	tempval = gfar_read(&regs->maccfg1);
-	tempval &= ~(MACCFG1_RX_EN | MACCFG1_TX_EN);
-	gfar_write(&regs->maccfg1, tempval);
-}
-
-void stop_gfar(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-
-	netif_tx_stop_all_queues(dev);
-
-	smp_mb__before_atomic();
-	set_bit(GFAR_DOWN, &priv->state);
-	smp_mb__after_atomic();
-
-	disable_napi(priv);
-
-	/* disable ints and gracefully shut down Rx/Tx DMA */
-	gfar_halt(priv);
-
-	phy_stop(dev->phydev);
-
-	free_skb_resources(priv);
-}
-
-static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
-{
-	struct txbd8 *txbdp;
-	struct gfar_private *priv = netdev_priv(tx_queue->dev);
-	int i, j;
-
-	txbdp = tx_queue->tx_bd_base;
-
-	for (i = 0; i < tx_queue->tx_ring_size; i++) {
-		if (!tx_queue->tx_skbuff[i])
-			continue;
-
-		dma_unmap_single(priv->dev, be32_to_cpu(txbdp->bufPtr),
-				 be16_to_cpu(txbdp->length), DMA_TO_DEVICE);
-		txbdp->lstatus = 0;
-		for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
-		     j++) {
-			txbdp++;
-			dma_unmap_page(priv->dev, be32_to_cpu(txbdp->bufPtr),
-				       be16_to_cpu(txbdp->length),
-				       DMA_TO_DEVICE);
-		}
-		txbdp++;
-		dev_kfree_skb_any(tx_queue->tx_skbuff[i]);
-		tx_queue->tx_skbuff[i] = NULL;
-	}
-	kfree(tx_queue->tx_skbuff);
-	tx_queue->tx_skbuff = NULL;
-}
-
-static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
-{
-	int i;
-
-	struct rxbd8 *rxbdp = rx_queue->rx_bd_base;
-
-	if (rx_queue->skb)
-		dev_kfree_skb(rx_queue->skb);
-
-	for (i = 0; i < rx_queue->rx_ring_size; i++) {
-		struct	gfar_rx_buff *rxb = &rx_queue->rx_buff[i];
-
-		rxbdp->lstatus = 0;
-		rxbdp->bufPtr = 0;
-		rxbdp++;
-
-		if (!rxb->page)
-			continue;
-
-		dma_unmap_page(rx_queue->dev, rxb->dma,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
-		__free_page(rxb->page);
-
-		rxb->page = NULL;
-	}
-
-	kfree(rx_queue->rx_buff);
-	rx_queue->rx_buff = NULL;
-}
-
-/* If there are any tx skbs or rx skbs still around, free them.
- * Then free tx_skbuff and rx_skbuff
- */
-static void free_skb_resources(struct gfar_private *priv)
-{
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-	int i;
-
-	/* Go through all the buffer descriptors and free their data buffers */
-	for (i = 0; i < priv->num_tx_queues; i++) {
-		struct netdev_queue *txq;
-
-		tx_queue = priv->tx_queue[i];
-		txq = netdev_get_tx_queue(tx_queue->dev, tx_queue->qindex);
-		if (tx_queue->tx_skbuff)
-			free_skb_tx_queue(tx_queue);
-		netdev_tx_reset_queue(txq);
-	}
-
-	for (i = 0; i < priv->num_rx_queues; i++) {
-		rx_queue = priv->rx_queue[i];
-		if (rx_queue->rx_buff)
-			free_skb_rx_queue(rx_queue);
-	}
-
-	dma_free_coherent(priv->dev,
-			  sizeof(struct txbd8) * priv->total_tx_ring_size +
-			  sizeof(struct rxbd8) * priv->total_rx_ring_size,
-			  priv->tx_queue[0]->tx_bd_base,
-			  priv->tx_queue[0]->tx_bd_dma_base);
-}
-
-void gfar_start(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 tempval;
-	int i = 0;
-
-	/* Enable Rx/Tx hw queues */
-	gfar_write(&regs->rqueue, priv->rqueue);
-	gfar_write(&regs->tqueue, priv->tqueue);
-
-	/* Initialize DMACTRL to have WWR and WOP */
-	tempval = gfar_read(&regs->dmactrl);
-	tempval |= DMACTRL_INIT_SETTINGS;
-	gfar_write(&regs->dmactrl, tempval);
-
-	/* Make sure we aren't stopped */
-	tempval = gfar_read(&regs->dmactrl);
-	tempval &= ~(DMACTRL_GRS | DMACTRL_GTS);
-	gfar_write(&regs->dmactrl, tempval);
-
-	for (i = 0; i < priv->num_grps; i++) {
-		regs = priv->gfargrp[i].regs;
-		/* Clear THLT/RHLT, so that the DMA starts polling now */
-		gfar_write(&regs->tstat, priv->gfargrp[i].tstat);
-		gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
-	}
-
-	/* Enable Rx/Tx DMA */
-	tempval = gfar_read(&regs->maccfg1);
-	tempval |= (MACCFG1_RX_EN | MACCFG1_TX_EN);
-	gfar_write(&regs->maccfg1, tempval);
-
-	gfar_ints_enable(priv);
-
-	netif_trans_update(priv->ndev); /* prevent tx timeout */
-}
-
-static void free_grp_irqs(struct gfar_priv_grp *grp)
-{
-	free_irq(gfar_irq(grp, TX)->irq, grp);
-	free_irq(gfar_irq(grp, RX)->irq, grp);
-	free_irq(gfar_irq(grp, ER)->irq, grp);
-}
-
-static int register_grp_irqs(struct gfar_priv_grp *grp)
-{
-	struct gfar_private *priv = grp->priv;
-	struct net_device *dev = priv->ndev;
-	int err;
-
-	/* If the device has multiple interrupts, register for
-	 * them.  Otherwise, only register for the one
-	 */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-		/* Install our interrupt handlers for Error,
-		 * Transmit, and Receive
-		 */
-		err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0,
-				  gfar_irq(grp, ER)->name, grp);
-		if (err < 0) {
-			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
-				  gfar_irq(grp, ER)->irq);
-
-			goto err_irq_fail;
-		}
-		enable_irq_wake(gfar_irq(grp, ER)->irq);
-
-		err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0,
-				  gfar_irq(grp, TX)->name, grp);
-		if (err < 0) {
-			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
-				  gfar_irq(grp, TX)->irq);
-			goto tx_irq_fail;
-		}
-		err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0,
-				  gfar_irq(grp, RX)->name, grp);
-		if (err < 0) {
-			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
-				  gfar_irq(grp, RX)->irq);
-			goto rx_irq_fail;
-		}
-		enable_irq_wake(gfar_irq(grp, RX)->irq);
-
-	} else {
-		err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0,
-				  gfar_irq(grp, TX)->name, grp);
-		if (err < 0) {
-			netif_err(priv, intr, dev, "Can't get IRQ %d\n",
-				  gfar_irq(grp, TX)->irq);
-			goto err_irq_fail;
-		}
-		enable_irq_wake(gfar_irq(grp, TX)->irq);
-	}
-
-	return 0;
-
-rx_irq_fail:
-	free_irq(gfar_irq(grp, TX)->irq, grp);
-tx_irq_fail:
-	free_irq(gfar_irq(grp, ER)->irq, grp);
-err_irq_fail:
-	return err;
-
-}
-
-static void gfar_free_irq(struct gfar_private *priv)
-{
-	int i;
-
-	/* Free the IRQs */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-		for (i = 0; i < priv->num_grps; i++)
-			free_grp_irqs(&priv->gfargrp[i]);
-	} else {
-		for (i = 0; i < priv->num_grps; i++)
-			free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
-				 &priv->gfargrp[i]);
-	}
-}
-
-static int gfar_request_irq(struct gfar_private *priv)
-{
-	int err, i, j;
-
-	for (i = 0; i < priv->num_grps; i++) {
-		err = register_grp_irqs(&priv->gfargrp[i]);
-		if (err) {
-			for (j = 0; j < i; j++)
-				free_grp_irqs(&priv->gfargrp[j]);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-/* Bring the controller up and running */
-int startup_gfar(struct net_device *ndev)
-{
-	struct gfar_private *priv = netdev_priv(ndev);
-	int err;
-
-	gfar_mac_reset(priv);
-
-	err = gfar_alloc_skb_resources(ndev);
-	if (err)
-		return err;
-
-	gfar_init_tx_rx_base(priv);
-
-	smp_mb__before_atomic();
-	clear_bit(GFAR_DOWN, &priv->state);
-	smp_mb__after_atomic();
-
-	/* Start Rx/Tx DMA and enable the interrupts */
-	gfar_start(priv);
-
-	/* force link state update after mac reset */
-	priv->oldlink = 0;
-	priv->oldspeed = 0;
-	priv->oldduplex = -1;
-
-	phy_start(ndev->phydev);
-
-	enable_napi(priv);
-
-	netif_tx_wake_all_queues(ndev);
-
-	return 0;
-}
-
-/* Called when something needs to use the ethernet device
- * Returns 0 for success.
- */
-static int gfar_enet_open(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	int err;
-
-	err = init_phy(dev);
-	if (err)
-		return err;
-
-	err = gfar_request_irq(priv);
-	if (err)
-		return err;
-
-	err = startup_gfar(dev);
-	if (err)
-		return err;
-
-	return err;
-}
-
-static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb)
-{
-	struct txfcb *fcb = skb_push(skb, GMAC_FCB_LEN);
-
-	memset(fcb, 0, GMAC_FCB_LEN);
-
-	return fcb;
-}
-
-static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
-				    int fcb_length)
-{
-	/* If we're here, it's a IP packet with a TCP or UDP
-	 * payload.  We set it to checksum, using a pseudo-header
-	 * we provide
-	 */
-	u8 flags = TXFCB_DEFAULT;
-
-	/* Tell the controller what the protocol is
-	 * And provide the already calculated phcs
-	 */
-	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
-		flags |= TXFCB_UDP;
-		fcb->phcs = (__force __be16)(udp_hdr(skb)->check);
-	} else
-		fcb->phcs = (__force __be16)(tcp_hdr(skb)->check);
-
-	/* l3os is the distance between the start of the
-	 * frame (skb->data) and the start of the IP hdr.
-	 * l4os is the distance between the start of the
-	 * l3 hdr and the l4 hdr
-	 */
-	fcb->l3os = (u8)(skb_network_offset(skb) - fcb_length);
-	fcb->l4os = skb_network_header_len(skb);
-
-	fcb->flags = flags;
-}
-
-static inline void gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
-{
-	fcb->flags |= TXFCB_VLN;
-	fcb->vlctl = cpu_to_be16(skb_vlan_tag_get(skb));
-}
-
-static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
-				      struct txbd8 *base, int ring_size)
-{
-	struct txbd8 *new_bd = bdp + stride;
-
-	return (new_bd >= (base + ring_size)) ? (new_bd - ring_size) : new_bd;
-}
-
-static inline struct txbd8 *next_txbd(struct txbd8 *bdp, struct txbd8 *base,
-				      int ring_size)
-{
-	return skip_txbd(bdp, 1, base, ring_size);
-}
-
-/* eTSEC12: csum generation not supported for some fcb offsets */
-static inline bool gfar_csum_errata_12(struct gfar_private *priv,
-				       unsigned long fcb_addr)
-{
-	return (gfar_has_errata(priv, GFAR_ERRATA_12) &&
-	       (fcb_addr % 0x20) > 0x18);
-}
-
-/* eTSEC76: csum generation for frames larger than 2500 may
- * cause excess delays before start of transmission
- */
-static inline bool gfar_csum_errata_76(struct gfar_private *priv,
-				       unsigned int len)
-{
-	return (gfar_has_errata(priv, GFAR_ERRATA_76) &&
-	       (len > 2500));
-}
-
-/* This is called by the kernel when a frame is ready for transmission.
- * It is pointed to by the dev->hard_start_xmit function pointer
- */
-static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	struct netdev_queue *txq;
-	struct gfar __iomem *regs = NULL;
-	struct txfcb *fcb = NULL;
-	struct txbd8 *txbdp, *txbdp_start, *base, *txbdp_tstamp = NULL;
-	u32 lstatus;
-	skb_frag_t *frag;
-	int i, rq = 0;
-	int do_tstamp, do_csum, do_vlan;
-	u32 bufaddr;
-	unsigned int nr_frags, nr_txbds, bytes_sent, fcb_len = 0;
-
-	rq = skb->queue_mapping;
-	tx_queue = priv->tx_queue[rq];
-	txq = netdev_get_tx_queue(dev, rq);
-	base = tx_queue->tx_bd_base;
-	regs = tx_queue->grp->regs;
-
-	do_csum = (CHECKSUM_PARTIAL == skb->ip_summed);
-	do_vlan = skb_vlan_tag_present(skb);
-	do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
-		    priv->hwts_tx_en;
-
-	if (do_csum || do_vlan)
-		fcb_len = GMAC_FCB_LEN;
-
-	/* check if time stamp should be generated */
-	if (unlikely(do_tstamp))
-		fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN;
-
-	/* make space for additional header when fcb is needed */
-	if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) {
-		struct sk_buff *skb_new;
-
-		skb_new = skb_realloc_headroom(skb, fcb_len);
-		if (!skb_new) {
-			dev->stats.tx_errors++;
-			dev_kfree_skb_any(skb);
-			return NETDEV_TX_OK;
-		}
-
-		if (skb->sk)
-			skb_set_owner_w(skb_new, skb->sk);
-		dev_consume_skb_any(skb);
-		skb = skb_new;
-	}
-
-	/* total number of fragments in the SKB */
-	nr_frags = skb_shinfo(skb)->nr_frags;
-
-	/* calculate the required number of TxBDs for this skb */
-	if (unlikely(do_tstamp))
-		nr_txbds = nr_frags + 2;
-	else
-		nr_txbds = nr_frags + 1;
-
-	/* check if there is space to queue this packet */
-	if (nr_txbds > tx_queue->num_txbdfree) {
-		/* no space, stop the queue */
-		netif_tx_stop_queue(txq);
-		dev->stats.tx_fifo_errors++;
-		return NETDEV_TX_BUSY;
-	}
-
-	/* Update transmit stats */
-	bytes_sent = skb->len;
-	tx_queue->stats.tx_bytes += bytes_sent;
-	/* keep Tx bytes on wire for BQL accounting */
-	GFAR_CB(skb)->bytes_sent = bytes_sent;
-	tx_queue->stats.tx_packets++;
-
-	txbdp = txbdp_start = tx_queue->cur_tx;
-	lstatus = be32_to_cpu(txbdp->lstatus);
-
-	/* Add TxPAL between FCB and frame if required */
-	if (unlikely(do_tstamp)) {
-		skb_push(skb, GMAC_TXPAL_LEN);
-		memset(skb->data, 0, GMAC_TXPAL_LEN);
-	}
-
-	/* Add TxFCB if required */
-	if (fcb_len) {
-		fcb = gfar_add_fcb(skb);
-		lstatus |= BD_LFLAG(TXBD_TOE);
-	}
-
-	/* Set up checksumming */
-	if (do_csum) {
-		gfar_tx_checksum(skb, fcb, fcb_len);
-
-		if (unlikely(gfar_csum_errata_12(priv, (unsigned long)fcb)) ||
-		    unlikely(gfar_csum_errata_76(priv, skb->len))) {
-			__skb_pull(skb, GMAC_FCB_LEN);
-			skb_checksum_help(skb);
-			if (do_vlan || do_tstamp) {
-				/* put back a new fcb for vlan/tstamp TOE */
-				fcb = gfar_add_fcb(skb);
-			} else {
-				/* Tx TOE not used */
-				lstatus &= ~(BD_LFLAG(TXBD_TOE));
-				fcb = NULL;
-			}
-		}
-	}
-
-	if (do_vlan)
-		gfar_tx_vlan(skb, fcb);
-
-	bufaddr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
-				 DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
-		goto dma_map_err;
-
-	txbdp_start->bufPtr = cpu_to_be32(bufaddr);
-
-	/* Time stamp insertion requires one additional TxBD */
-	if (unlikely(do_tstamp))
-		txbdp_tstamp = txbdp = next_txbd(txbdp, base,
-						 tx_queue->tx_ring_size);
-
-	if (likely(!nr_frags)) {
-		if (likely(!do_tstamp))
-			lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-	} else {
-		u32 lstatus_start = lstatus;
-
-		/* Place the fragment addresses and lengths into the TxBDs */
-		frag = &skb_shinfo(skb)->frags[0];
-		for (i = 0; i < nr_frags; i++, frag++) {
-			unsigned int size;
-
-			/* Point at the next BD, wrapping as needed */
-			txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
-
-			size = skb_frag_size(frag);
-
-			lstatus = be32_to_cpu(txbdp->lstatus) | size |
-				  BD_LFLAG(TXBD_READY);
-
-			/* Handle the last BD specially */
-			if (i == nr_frags - 1)
-				lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-
-			bufaddr = skb_frag_dma_map(priv->dev, frag, 0,
-						   size, DMA_TO_DEVICE);
-			if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
-				goto dma_map_err;
-
-			/* set the TxBD length and buffer pointer */
-			txbdp->bufPtr = cpu_to_be32(bufaddr);
-			txbdp->lstatus = cpu_to_be32(lstatus);
-		}
-
-		lstatus = lstatus_start;
-	}
-
-	/* If time stamping is requested one additional TxBD must be set up. The
-	 * first TxBD points to the FCB and must have a data length of
-	 * GMAC_FCB_LEN. The second TxBD points to the actual frame data with
-	 * the full frame length.
-	 */
-	if (unlikely(do_tstamp)) {
-		u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
-
-		bufaddr = be32_to_cpu(txbdp_start->bufPtr);
-		bufaddr += fcb_len;
-
-		lstatus_ts |= BD_LFLAG(TXBD_READY) |
-			      (skb_headlen(skb) - fcb_len);
-		if (!nr_frags)
-			lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
-
-		txbdp_tstamp->bufPtr = cpu_to_be32(bufaddr);
-		txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
-		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
-
-		/* Setup tx hardware time stamping */
-		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		fcb->ptp = 1;
-	} else {
-		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
-	}
-
-	netdev_tx_sent_queue(txq, bytes_sent);
-
-	gfar_wmb();
-
-	txbdp_start->lstatus = cpu_to_be32(lstatus);
-
-	gfar_wmb(); /* force lstatus write before tx_skbuff */
-
-	tx_queue->tx_skbuff[tx_queue->skb_curtx] = skb;
-
-	/* Update the current skb pointer to the next entry we will use
-	 * (wrapping if necessary)
-	 */
-	tx_queue->skb_curtx = (tx_queue->skb_curtx + 1) &
-			      TX_RING_MOD_MASK(tx_queue->tx_ring_size);
-
-	tx_queue->cur_tx = next_txbd(txbdp, base, tx_queue->tx_ring_size);
-
-	/* We can work in parallel with gfar_clean_tx_ring(), except
-	 * when modifying num_txbdfree. Note that we didn't grab the lock
-	 * when we were reading the num_txbdfree and checking for available
-	 * space, that's because outside of this function it can only grow.
-	 */
-	spin_lock_bh(&tx_queue->txlock);
-	/* reduce TxBD free count */
-	tx_queue->num_txbdfree -= (nr_txbds);
-	spin_unlock_bh(&tx_queue->txlock);
-
-	/* If the next BD still needs to be cleaned up, then the bds
-	 * are full.  We need to tell the kernel to stop sending us stuff.
-	 */
-	if (!tx_queue->num_txbdfree) {
-		netif_tx_stop_queue(txq);
-
-		dev->stats.tx_fifo_errors++;
-	}
-
-	/* Tell the DMA to go go go */
-	gfar_write(&regs->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex);
-
-	return NETDEV_TX_OK;
-
-dma_map_err:
-	txbdp = next_txbd(txbdp_start, base, tx_queue->tx_ring_size);
-	if (do_tstamp)
-		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
-	for (i = 0; i < nr_frags; i++) {
-		lstatus = be32_to_cpu(txbdp->lstatus);
-		if (!(lstatus & BD_LFLAG(TXBD_READY)))
-			break;
-
-		lstatus &= ~BD_LFLAG(TXBD_READY);
-		txbdp->lstatus = cpu_to_be32(lstatus);
-		bufaddr = be32_to_cpu(txbdp->bufPtr);
-		dma_unmap_page(priv->dev, bufaddr, be16_to_cpu(txbdp->length),
-			       DMA_TO_DEVICE);
-		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
-	}
-	gfar_wmb();
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
-/* Stops the kernel queue, and halts the controller */
-static int gfar_close(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-
-	cancel_work_sync(&priv->reset_task);
-	stop_gfar(dev);
-
-	/* Disconnect from the PHY */
-	phy_disconnect(dev->phydev);
-
-	gfar_free_irq(priv);
-
-	return 0;
-}
-
-/* Changes the mac address if the controller is not running. */
-static int gfar_set_mac_address(struct net_device *dev)
-{
-	gfar_set_mac_for_addr(dev, 0, dev->dev_addr);
-
-	return 0;
-}
-
-static int gfar_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-
-	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
-		cpu_relax();
-
-	if (dev->flags & IFF_UP)
-		stop_gfar(dev);
-
-	dev->mtu = new_mtu;
-
-	if (dev->flags & IFF_UP)
-		startup_gfar(dev);
-
-	clear_bit_unlock(GFAR_RESETTING, &priv->state);
-
-	return 0;
-}
-
-void reset_gfar(struct net_device *ndev)
-{
-	struct gfar_private *priv = netdev_priv(ndev);
-
-	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
-		cpu_relax();
-
-	stop_gfar(ndev);
-	startup_gfar(ndev);
-
-	clear_bit_unlock(GFAR_RESETTING, &priv->state);
-}
-
-/* gfar_reset_task gets scheduled when a packet has not been
- * transmitted after a set amount of time.
- * For now, assume that clearing out all the structures, and
- * starting over will fix the problem.
- */
-static void gfar_reset_task(struct work_struct *work)
-{
-	struct gfar_private *priv = container_of(work, struct gfar_private,
-						 reset_task);
-	reset_gfar(priv->ndev);
-}
-
-static void gfar_timeout(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-
-	dev->stats.tx_errors++;
-	schedule_work(&priv->reset_task);
-}
-
-/* Interrupt Handler for Transmit complete */
-static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
-{
-	struct net_device *dev = tx_queue->dev;
-	struct netdev_queue *txq;
-	struct gfar_private *priv = netdev_priv(dev);
-	struct txbd8 *bdp, *next = NULL;
-	struct txbd8 *lbdp = NULL;
-	struct txbd8 *base = tx_queue->tx_bd_base;
-	struct sk_buff *skb;
-	int skb_dirtytx;
-	int tx_ring_size = tx_queue->tx_ring_size;
-	int frags = 0, nr_txbds = 0;
-	int i;
-	int howmany = 0;
-	int tqi = tx_queue->qindex;
-	unsigned int bytes_sent = 0;
-	u32 lstatus;
-	size_t buflen;
-
-	txq = netdev_get_tx_queue(dev, tqi);
-	bdp = tx_queue->dirty_tx;
-	skb_dirtytx = tx_queue->skb_dirtytx;
-
-	while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
-
-		frags = skb_shinfo(skb)->nr_frags;
-
-		/* When time stamping, one additional TxBD must be freed.
-		 * Also, we need to dma_unmap_single() the TxPAL.
-		 */
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-			nr_txbds = frags + 2;
-		else
-			nr_txbds = frags + 1;
-
-		lbdp = skip_txbd(bdp, nr_txbds - 1, base, tx_ring_size);
-
-		lstatus = be32_to_cpu(lbdp->lstatus);
-
-		/* Only clean completed frames */
-		if ((lstatus & BD_LFLAG(TXBD_READY)) &&
-		    (lstatus & BD_LENGTH_MASK))
-			break;
-
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
-			next = next_txbd(bdp, base, tx_ring_size);
-			buflen = be16_to_cpu(next->length) +
-				 GMAC_FCB_LEN + GMAC_TXPAL_LEN;
-		} else
-			buflen = be16_to_cpu(bdp->length);
-
-		dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
-				 buflen, DMA_TO_DEVICE);
-
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
-			struct skb_shared_hwtstamps shhwtstamps;
-			u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
-					  ~0x7UL);
-
-			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-			shhwtstamps.hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
-			skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
-			skb_tstamp_tx(skb, &shhwtstamps);
-			gfar_clear_txbd_status(bdp);
-			bdp = next;
-		}
-
-		gfar_clear_txbd_status(bdp);
-		bdp = next_txbd(bdp, base, tx_ring_size);
-
-		for (i = 0; i < frags; i++) {
-			dma_unmap_page(priv->dev, be32_to_cpu(bdp->bufPtr),
-				       be16_to_cpu(bdp->length),
-				       DMA_TO_DEVICE);
-			gfar_clear_txbd_status(bdp);
-			bdp = next_txbd(bdp, base, tx_ring_size);
-		}
-
-		bytes_sent += GFAR_CB(skb)->bytes_sent;
-
-		dev_kfree_skb_any(skb);
-
-		tx_queue->tx_skbuff[skb_dirtytx] = NULL;
-
-		skb_dirtytx = (skb_dirtytx + 1) &
-			      TX_RING_MOD_MASK(tx_ring_size);
-
-		howmany++;
-		spin_lock(&tx_queue->txlock);
-		tx_queue->num_txbdfree += nr_txbds;
-		spin_unlock(&tx_queue->txlock);
-	}
-
-	/* If we freed a buffer, we can restart transmission, if necessary */
-	if (tx_queue->num_txbdfree &&
-	    netif_tx_queue_stopped(txq) &&
-	    !(test_bit(GFAR_DOWN, &priv->state)))
-		netif_wake_subqueue(priv->ndev, tqi);
-
-	/* Update dirty indicators */
-	tx_queue->skb_dirtytx = skb_dirtytx;
-	tx_queue->dirty_tx = bdp;
-
-	netdev_tx_completed_queue(txq, howmany, bytes_sent);
-}
-
-static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb)
-{
-	struct page *page;
-	dma_addr_t addr;
-
-	page = dev_alloc_page();
-	if (unlikely(!page))
-		return false;
-
-	addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(rxq->dev, addr))) {
-		__free_page(page);
-
-		return false;
-	}
-
-	rxb->dma = addr;
-	rxb->page = page;
-	rxb->page_offset = 0;
-
-	return true;
-}
-
-static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue)
-{
-	struct gfar_private *priv = netdev_priv(rx_queue->ndev);
-	struct gfar_extra_stats *estats = &priv->extra_stats;
-
-	netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n");
-	atomic64_inc(&estats->rx_alloc_err);
-}
-
-static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
-				int alloc_cnt)
-{
-	struct rxbd8 *bdp;
-	struct gfar_rx_buff *rxb;
-	int i;
-
-	i = rx_queue->next_to_use;
-	bdp = &rx_queue->rx_bd_base[i];
-	rxb = &rx_queue->rx_buff[i];
-
-	while (alloc_cnt--) {
-		/* try reuse page */
-		if (unlikely(!rxb->page)) {
-			if (unlikely(!gfar_new_page(rx_queue, rxb))) {
-				gfar_rx_alloc_err(rx_queue);
-				break;
-			}
-		}
-
-		/* Setup the new RxBD */
-		gfar_init_rxbdp(rx_queue, bdp,
-				rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT);
-
-		/* Update to the next pointer */
-		bdp++;
-		rxb++;
-
-		if (unlikely(++i == rx_queue->rx_ring_size)) {
-			i = 0;
-			bdp = rx_queue->rx_bd_base;
-			rxb = rx_queue->rx_buff;
-		}
-	}
-
-	rx_queue->next_to_use = i;
-	rx_queue->next_to_alloc = i;
-}
-
-static void count_errors(u32 lstatus, struct net_device *ndev)
-{
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct net_device_stats *stats = &ndev->stats;
-	struct gfar_extra_stats *estats = &priv->extra_stats;
-
-	/* If the packet was truncated, none of the other errors matter */
-	if (lstatus & BD_LFLAG(RXBD_TRUNCATED)) {
-		stats->rx_length_errors++;
-
-		atomic64_inc(&estats->rx_trunc);
-
-		return;
-	}
-	/* Count the errors, if there were any */
-	if (lstatus & BD_LFLAG(RXBD_LARGE | RXBD_SHORT)) {
-		stats->rx_length_errors++;
-
-		if (lstatus & BD_LFLAG(RXBD_LARGE))
-			atomic64_inc(&estats->rx_large);
-		else
-			atomic64_inc(&estats->rx_short);
-	}
-	if (lstatus & BD_LFLAG(RXBD_NONOCTET)) {
-		stats->rx_frame_errors++;
-		atomic64_inc(&estats->rx_nonoctet);
-	}
-	if (lstatus & BD_LFLAG(RXBD_CRCERR)) {
-		atomic64_inc(&estats->rx_crcerr);
-		stats->rx_crc_errors++;
-	}
-	if (lstatus & BD_LFLAG(RXBD_OVERRUN)) {
-		atomic64_inc(&estats->rx_overrun);
-		stats->rx_over_errors++;
-	}
-}
-
-irqreturn_t gfar_receive(int irq, void *grp_id)
-{
-	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
-	unsigned long flags;
-	u32 imask, ievent;
-
-	ievent = gfar_read(&grp->regs->ievent);
-
-	if (unlikely(ievent & IEVENT_FGPI)) {
-		gfar_write(&grp->regs->ievent, IEVENT_FGPI);
-		return IRQ_HANDLED;
-	}
-
-	if (likely(napi_schedule_prep(&grp->napi_rx))) {
-		spin_lock_irqsave(&grp->grplock, flags);
-		imask = gfar_read(&grp->regs->imask);
-		imask &= IMASK_RX_DISABLED;
-		gfar_write(&grp->regs->imask, imask);
-		spin_unlock_irqrestore(&grp->grplock, flags);
-		__napi_schedule(&grp->napi_rx);
-	} else {
-		/* Clear IEVENT, so interrupts aren't called again
-		 * because of the packets that have already arrived.
-		 */
-		gfar_write(&grp->regs->ievent, IEVENT_RX_MASK);
-	}
-
-	return IRQ_HANDLED;
-}
-
-/* Interrupt Handler for Transmit complete */
-static irqreturn_t gfar_transmit(int irq, void *grp_id)
-{
-	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
-	unsigned long flags;
-	u32 imask;
-
-	if (likely(napi_schedule_prep(&grp->napi_tx))) {
-		spin_lock_irqsave(&grp->grplock, flags);
-		imask = gfar_read(&grp->regs->imask);
-		imask &= IMASK_TX_DISABLED;
-		gfar_write(&grp->regs->imask, imask);
-		spin_unlock_irqrestore(&grp->grplock, flags);
-		__napi_schedule(&grp->napi_tx);
-	} else {
-		/* Clear IEVENT, so interrupts aren't called again
-		 * because of the packets that have already arrived.
-		 */
-		gfar_write(&grp->regs->ievent, IEVENT_TX_MASK);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
-			     struct sk_buff *skb, bool first)
-{
-	int size = lstatus & BD_LENGTH_MASK;
-	struct page *page = rxb->page;
-
-	if (likely(first)) {
-		skb_put(skb, size);
-	} else {
-		/* the last fragments' length contains the full frame length */
-		if (lstatus & BD_LFLAG(RXBD_LAST))
-			size -= skb->len;
-
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-				rxb->page_offset + RXBUF_ALIGNMENT,
-				size, GFAR_RXB_TRUESIZE);
-	}
-
-	/* try reuse page */
-	if (unlikely(page_count(page) != 1 || page_is_pfmemalloc(page)))
-		return false;
-
-	/* change offset to the other half */
-	rxb->page_offset ^= GFAR_RXB_TRUESIZE;
-
-	page_ref_inc(page);
-
-	return true;
-}
-
-static void gfar_reuse_rx_page(struct gfar_priv_rx_q *rxq,
-			       struct gfar_rx_buff *old_rxb)
-{
-	struct gfar_rx_buff *new_rxb;
-	u16 nta = rxq->next_to_alloc;
-
-	new_rxb = &rxq->rx_buff[nta];
-
-	/* find next buf that can reuse a page */
-	nta++;
-	rxq->next_to_alloc = (nta < rxq->rx_ring_size) ? nta : 0;
-
-	/* copy page reference */
-	*new_rxb = *old_rxb;
-
-	/* sync for use by the device */
-	dma_sync_single_range_for_device(rxq->dev, old_rxb->dma,
-					 old_rxb->page_offset,
-					 GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
-}
-
-static struct sk_buff *gfar_get_next_rxbuff(struct gfar_priv_rx_q *rx_queue,
-					    u32 lstatus, struct sk_buff *skb)
-{
-	struct gfar_rx_buff *rxb = &rx_queue->rx_buff[rx_queue->next_to_clean];
-	struct page *page = rxb->page;
-	bool first = false;
-
-	if (likely(!skb)) {
-		void *buff_addr = page_address(page) + rxb->page_offset;
-
-		skb = build_skb(buff_addr, GFAR_SKBFRAG_SIZE);
-		if (unlikely(!skb)) {
-			gfar_rx_alloc_err(rx_queue);
-			return NULL;
-		}
-		skb_reserve(skb, RXBUF_ALIGNMENT);
-		first = true;
-	}
-
-	dma_sync_single_range_for_cpu(rx_queue->dev, rxb->dma, rxb->page_offset,
-				      GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
-
-	if (gfar_add_rx_frag(rxb, lstatus, skb, first)) {
-		/* reuse the free half of the page */
-		gfar_reuse_rx_page(rx_queue, rxb);
-	} else {
-		/* page cannot be reused, unmap it */
-		dma_unmap_page(rx_queue->dev, rxb->dma,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
-	}
-
-	/* clear rxb content */
-	rxb->page = NULL;
-
-	return skb;
-}
-
-static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb)
-{
-	/* If valid headers were found, and valid sums
-	 * were verified, then we tell the kernel that no
-	 * checksumming is necessary.  Otherwise, it is [FIXME]
-	 */
-	if ((be16_to_cpu(fcb->flags) & RXFCB_CSUM_MASK) ==
-	    (RXFCB_CIP | RXFCB_CTU))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	else
-		skb_checksum_none_assert(skb);
-}
-
-/* gfar_process_frame() -- handle one incoming packet if skb isn't NULL. */
-static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
-{
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct rxfcb *fcb = NULL;
-
-	/* fcb is at the beginning if exists */
-	fcb = (struct rxfcb *)skb->data;
-
-	/* Remove the FCB from the skb
-	 * Remove the padded bytes, if there are any
-	 */
-	if (priv->uses_rxfcb)
-		skb_pull(skb, GMAC_FCB_LEN);
-
-	/* Get receive timestamp from the skb */
-	if (priv->hwts_rx_en) {
-		struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
-		u64 *ns = (u64 *) skb->data;
-
-		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
-		shhwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*ns));
-	}
-
-	if (priv->padding)
-		skb_pull(skb, priv->padding);
-
-	/* Trim off the FCS */
-	pskb_trim(skb, skb->len - ETH_FCS_LEN);
-
-	if (ndev->features & NETIF_F_RXCSUM)
-		gfar_rx_checksum(skb, fcb);
-
-	/* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
-	 * Even if vlan rx accel is disabled, on some chips
-	 * RXFCB_VLN is pseudo randomly set.
-	 */
-	if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX &&
-	    be16_to_cpu(fcb->flags) & RXFCB_VLN)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-				       be16_to_cpu(fcb->vlctl));
-}
-
-/* gfar_clean_rx_ring() -- Processes each frame in the rx ring
- * until the budget/quota has been reached. Returns the number
- * of frames handled
- */
-int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
-{
-	struct net_device *ndev = rx_queue->ndev;
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct rxbd8 *bdp;
-	int i, howmany = 0;
-	struct sk_buff *skb = rx_queue->skb;
-	int cleaned_cnt = gfar_rxbd_unused(rx_queue);
-	unsigned int total_bytes = 0, total_pkts = 0;
-
-	/* Get the first full descriptor */
-	i = rx_queue->next_to_clean;
-
-	while (rx_work_limit--) {
-		u32 lstatus;
-
-		if (cleaned_cnt >= GFAR_RX_BUFF_ALLOC) {
-			gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
-			cleaned_cnt = 0;
-		}
-
-		bdp = &rx_queue->rx_bd_base[i];
-		lstatus = be32_to_cpu(bdp->lstatus);
-		if (lstatus & BD_LFLAG(RXBD_EMPTY))
-			break;
-
-		/* order rx buffer descriptor reads */
-		rmb();
-
-		/* fetch next to clean buffer from the ring */
-		skb = gfar_get_next_rxbuff(rx_queue, lstatus, skb);
-		if (unlikely(!skb))
-			break;
-
-		cleaned_cnt++;
-		howmany++;
-
-		if (unlikely(++i == rx_queue->rx_ring_size))
-			i = 0;
-
-		rx_queue->next_to_clean = i;
-
-		/* fetch next buffer if not the last in frame */
-		if (!(lstatus & BD_LFLAG(RXBD_LAST)))
-			continue;
-
-		if (unlikely(lstatus & BD_LFLAG(RXBD_ERR))) {
-			count_errors(lstatus, ndev);
-
-			/* discard faulty buffer */
-			dev_kfree_skb(skb);
-			skb = NULL;
-			rx_queue->stats.rx_dropped++;
-			continue;
-		}
-
-		gfar_process_frame(ndev, skb);
-
-		/* Increment the number of packets */
-		total_pkts++;
-		total_bytes += skb->len;
-
-		skb_record_rx_queue(skb, rx_queue->qindex);
-
-		skb->protocol = eth_type_trans(skb, ndev);
-
-		/* Send the packet up the stack */
-		napi_gro_receive(&rx_queue->grp->napi_rx, skb);
-
-		skb = NULL;
-	}
-
-	/* Store incomplete frames for completion */
-	rx_queue->skb = skb;
-
-	rx_queue->stats.rx_packets += total_pkts;
-	rx_queue->stats.rx_bytes += total_bytes;
-
-	if (cleaned_cnt)
-		gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
-
-	/* Update Last Free RxBD pointer for LFC */
-	if (unlikely(priv->tx_actual_en)) {
-		u32 bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
-
-		gfar_write(rx_queue->rfbptr, bdp_dma);
-	}
-
-	return howmany;
-}
-
-static int gfar_poll_rx_sq(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_rx);
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_rx_q *rx_queue = gfargrp->rx_queue;
-	int work_done = 0;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_RX_MASK);
-
-	work_done = gfar_clean_rx_ring(rx_queue, budget);
-
-	if (work_done < budget) {
-		u32 imask;
-		napi_complete_done(napi, work_done);
-		/* Clear the halt bit in RSTAT */
-		gfar_write(&regs->rstat, gfargrp->rstat);
-
-		spin_lock_irq(&gfargrp->grplock);
-		imask = gfar_read(&regs->imask);
-		imask |= IMASK_RX_DEFAULT;
-		gfar_write(&regs->imask, imask);
-		spin_unlock_irq(&gfargrp->grplock);
-	}
-
-	return work_done;
-}
-
-static int gfar_poll_tx_sq(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_tx);
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_tx_q *tx_queue = gfargrp->tx_queue;
-	u32 imask;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_TX_MASK);
-
-	/* run Tx cleanup to completion */
-	if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx])
-		gfar_clean_tx_ring(tx_queue);
-
-	napi_complete(napi);
-
-	spin_lock_irq(&gfargrp->grplock);
-	imask = gfar_read(&regs->imask);
-	imask |= IMASK_TX_DEFAULT;
-	gfar_write(&regs->imask, imask);
-	spin_unlock_irq(&gfargrp->grplock);
-
-	return 0;
-}
-
-static int gfar_poll_rx(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_rx);
-	struct gfar_private *priv = gfargrp->priv;
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-	int work_done = 0, work_done_per_q = 0;
-	int i, budget_per_q = 0;
-	unsigned long rstat_rxf;
-	int num_act_queues;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_RX_MASK);
-
-	rstat_rxf = gfar_read(&regs->rstat) & RSTAT_RXF_MASK;
-
-	num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS);
-	if (num_act_queues)
-		budget_per_q = budget/num_act_queues;
-
-	for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
-		/* skip queue if not active */
-		if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i)))
-			continue;
-
-		rx_queue = priv->rx_queue[i];
-		work_done_per_q =
-			gfar_clean_rx_ring(rx_queue, budget_per_q);
-		work_done += work_done_per_q;
-
-		/* finished processing this queue */
-		if (work_done_per_q < budget_per_q) {
-			/* clear active queue hw indication */
-			gfar_write(&regs->rstat,
-				   RSTAT_CLEAR_RXF0 >> i);
-			num_act_queues--;
-
-			if (!num_act_queues)
-				break;
-		}
-	}
-
-	if (!num_act_queues) {
-		u32 imask;
-		napi_complete_done(napi, work_done);
-
-		/* Clear the halt bit in RSTAT */
-		gfar_write(&regs->rstat, gfargrp->rstat);
-
-		spin_lock_irq(&gfargrp->grplock);
-		imask = gfar_read(&regs->imask);
-		imask |= IMASK_RX_DEFAULT;
-		gfar_write(&regs->imask, imask);
-		spin_unlock_irq(&gfargrp->grplock);
-	}
-
-	return work_done;
-}
-
-static int gfar_poll_tx(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_tx);
-	struct gfar_private *priv = gfargrp->priv;
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	int has_tx_work = 0;
-	int i;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_TX_MASK);
-
-	for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) {
-		tx_queue = priv->tx_queue[i];
-		/* run Tx cleanup to completion */
-		if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) {
-			gfar_clean_tx_ring(tx_queue);
-			has_tx_work = 1;
-		}
-	}
-
-	if (!has_tx_work) {
-		u32 imask;
-		napi_complete(napi);
-
-		spin_lock_irq(&gfargrp->grplock);
-		imask = gfar_read(&regs->imask);
-		imask |= IMASK_TX_DEFAULT;
-		gfar_write(&regs->imask, imask);
-		spin_unlock_irq(&gfargrp->grplock);
-	}
-
-	return 0;
-}
-
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-static void gfar_netpoll(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	int i;
-
-	/* If the device has multiple interrupts, run tx/rx */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-		for (i = 0; i < priv->num_grps; i++) {
-			struct gfar_priv_grp *grp = &priv->gfargrp[i];
-
-			disable_irq(gfar_irq(grp, TX)->irq);
-			disable_irq(gfar_irq(grp, RX)->irq);
-			disable_irq(gfar_irq(grp, ER)->irq);
-			gfar_interrupt(gfar_irq(grp, TX)->irq, grp);
-			enable_irq(gfar_irq(grp, ER)->irq);
-			enable_irq(gfar_irq(grp, RX)->irq);
-			enable_irq(gfar_irq(grp, TX)->irq);
-		}
-	} else {
-		for (i = 0; i < priv->num_grps; i++) {
-			struct gfar_priv_grp *grp = &priv->gfargrp[i];
-
-			disable_irq(gfar_irq(grp, TX)->irq);
-			gfar_interrupt(gfar_irq(grp, TX)->irq, grp);
-			enable_irq(gfar_irq(grp, TX)->irq);
-		}
-	}
-}
-#endif
-
-/* The interrupt handler for devices with one interrupt */
-static irqreturn_t gfar_interrupt(int irq, void *grp_id)
-{
-	struct gfar_priv_grp *gfargrp = grp_id;
-
-	/* Save ievent for future reference */
-	u32 events = gfar_read(&gfargrp->regs->ievent);
-
-	/* Check for reception */
-	if (events & IEVENT_RX_MASK)
-		gfar_receive(irq, grp_id);
-
-	/* Check for transmit completion */
-	if (events & IEVENT_TX_MASK)
-		gfar_transmit(irq, grp_id);
-
-	/* Check for errors */
-	if (events & IEVENT_ERR_MASK)
-		gfar_error(irq, grp_id);
-
-	return IRQ_HANDLED;
-}
-
-/* Called every time the controller might need to be made
- * aware of new link state.  The PHY code conveys this
- * information through variables in the phydev structure, and this
- * function converts those variables into the appropriate
- * register values, and can bring down the device if needed.
- */
-static void adjust_link(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-
-	if (unlikely(phydev->link != priv->oldlink ||
-		     (phydev->link && (phydev->duplex != priv->oldduplex ||
-				       phydev->speed != priv->oldspeed))))
-		gfar_update_link_state(priv);
-}
-
-/* Update the hash table based on the current list of multicast
- * addresses we subscribe to.  Also, change the promiscuity of
- * the device based on the flags (this function is called
- * whenever dev->flags is changed
- */
-static void gfar_set_multi(struct net_device *dev)
-{
-	struct netdev_hw_addr *ha;
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 tempval;
-
-	if (dev->flags & IFF_PROMISC) {
-		/* Set RCTRL to PROM */
-		tempval = gfar_read(&regs->rctrl);
-		tempval |= RCTRL_PROM;
-		gfar_write(&regs->rctrl, tempval);
-	} else {
-		/* Set RCTRL to not PROM */
-		tempval = gfar_read(&regs->rctrl);
-		tempval &= ~(RCTRL_PROM);
-		gfar_write(&regs->rctrl, tempval);
-	}
-
-	if (dev->flags & IFF_ALLMULTI) {
-		/* Set the hash to rx all multicast frames */
-		gfar_write(&regs->igaddr0, 0xffffffff);
-		gfar_write(&regs->igaddr1, 0xffffffff);
-		gfar_write(&regs->igaddr2, 0xffffffff);
-		gfar_write(&regs->igaddr3, 0xffffffff);
-		gfar_write(&regs->igaddr4, 0xffffffff);
-		gfar_write(&regs->igaddr5, 0xffffffff);
-		gfar_write(&regs->igaddr6, 0xffffffff);
-		gfar_write(&regs->igaddr7, 0xffffffff);
-		gfar_write(&regs->gaddr0, 0xffffffff);
-		gfar_write(&regs->gaddr1, 0xffffffff);
-		gfar_write(&regs->gaddr2, 0xffffffff);
-		gfar_write(&regs->gaddr3, 0xffffffff);
-		gfar_write(&regs->gaddr4, 0xffffffff);
-		gfar_write(&regs->gaddr5, 0xffffffff);
-		gfar_write(&regs->gaddr6, 0xffffffff);
-		gfar_write(&regs->gaddr7, 0xffffffff);
-	} else {
-		int em_num;
-		int idx;
-
-		/* zero out the hash */
-		gfar_write(&regs->igaddr0, 0x0);
-		gfar_write(&regs->igaddr1, 0x0);
-		gfar_write(&regs->igaddr2, 0x0);
-		gfar_write(&regs->igaddr3, 0x0);
-		gfar_write(&regs->igaddr4, 0x0);
-		gfar_write(&regs->igaddr5, 0x0);
-		gfar_write(&regs->igaddr6, 0x0);
-		gfar_write(&regs->igaddr7, 0x0);
-		gfar_write(&regs->gaddr0, 0x0);
-		gfar_write(&regs->gaddr1, 0x0);
-		gfar_write(&regs->gaddr2, 0x0);
-		gfar_write(&regs->gaddr3, 0x0);
-		gfar_write(&regs->gaddr4, 0x0);
-		gfar_write(&regs->gaddr5, 0x0);
-		gfar_write(&regs->gaddr6, 0x0);
-		gfar_write(&regs->gaddr7, 0x0);
-
-		/* If we have extended hash tables, we need to
-		 * clear the exact match registers to prepare for
-		 * setting them
-		 */
-		if (priv->extended_hash) {
-			em_num = GFAR_EM_NUM + 1;
-			gfar_clear_exact_match(dev);
-			idx = 1;
-		} else {
-			idx = 0;
-			em_num = 0;
-		}
-
-		if (netdev_mc_empty(dev))
-			return;
-
-		/* Parse the list, and set the appropriate bits */
-		netdev_for_each_mc_addr(ha, dev) {
-			if (idx < em_num) {
-				gfar_set_mac_for_addr(dev, idx, ha->addr);
-				idx++;
-			} else
-				gfar_set_hash_for_addr(dev, ha->addr);
-		}
-	}
-}
-
-
-/* Clears each of the exact match registers to zero, so they
- * don't interfere with normal reception
- */
-static void gfar_clear_exact_match(struct net_device *dev)
-{
-	int idx;
-	static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
-
-	for (idx = 1; idx < GFAR_EM_NUM + 1; idx++)
-		gfar_set_mac_for_addr(dev, idx, zero_arr);
-}
-
-/* Set the appropriate hash bit for the given addr */
-/* The algorithm works like so:
- * 1) Take the Destination Address (ie the multicast address), and
- * do a CRC on it (little endian), and reverse the bits of the
- * result.
- * 2) Use the 8 most significant bits as a hash into a 256-entry
- * table.  The table is controlled through 8 32-bit registers:
- * gaddr0-7.  gaddr0's MSB is entry 0, and gaddr7's LSB is
- * gaddr7.  This means that the 3 most significant bits in the
- * hash index which gaddr register to use, and the 5 other bits
- * indicate which bit (assuming an IBM numbering scheme, which
- * for PowerPC (tm) is usually the case) in the register holds
- * the entry.
- */
-static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr)
-{
-	u32 tempval;
-	struct gfar_private *priv = netdev_priv(dev);
-	u32 result = ether_crc(ETH_ALEN, addr);
-	int width = priv->hash_width;
-	u8 whichbit = (result >> (32 - width)) & 0x1f;
-	u8 whichreg = result >> (32 - width + 5);
-	u32 value = (1 << (31-whichbit));
-
-	tempval = gfar_read(priv->hash_regs[whichreg]);
-	tempval |= value;
-	gfar_write(priv->hash_regs[whichreg], tempval);
-}
-
-
-/* There are multiple MAC Address register pairs on some controllers
- * This function sets the numth pair to a given address
- */
-static void gfar_set_mac_for_addr(struct net_device *dev, int num,
-				  const u8 *addr)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 tempval;
-	u32 __iomem *macptr = &regs->macstnaddr1;
-
-	macptr += num*2;
-
-	/* For a station address of 0x12345678ABCD in transmission
-	 * order (BE), MACnADDR1 is set to 0xCDAB7856 and
-	 * MACnADDR2 is set to 0x34120000.
-	 */
-	tempval = (addr[5] << 24) | (addr[4] << 16) |
-		  (addr[3] << 8)  |  addr[2];
-
-	gfar_write(macptr, tempval);
-
-	tempval = (addr[1] << 24) | (addr[0] << 16);
-
-	gfar_write(macptr+1, tempval);
-}
-
-/* GFAR error interrupt handler */
-static irqreturn_t gfar_error(int irq, void *grp_id)
-{
-	struct gfar_priv_grp *gfargrp = grp_id;
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_private *priv= gfargrp->priv;
-	struct net_device *dev = priv->ndev;
-
-	/* Save ievent for future reference */
-	u32 events = gfar_read(&regs->ievent);
-
-	/* Clear IEVENT */
-	gfar_write(&regs->ievent, events & IEVENT_ERR_MASK);
-
-	/* Magic Packet is not an error. */
-	if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
-	    (events & IEVENT_MAG))
-		events &= ~IEVENT_MAG;
-
-	/* Hmm... */
-	if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv))
-		netdev_dbg(dev,
-			   "error interrupt (ievent=0x%08x imask=0x%08x)\n",
-			   events, gfar_read(&regs->imask));
-
-	/* Update the error counters */
-	if (events & IEVENT_TXE) {
-		dev->stats.tx_errors++;
-
-		if (events & IEVENT_LC)
-			dev->stats.tx_window_errors++;
-		if (events & IEVENT_CRL)
-			dev->stats.tx_aborted_errors++;
-		if (events & IEVENT_XFUN) {
-			netif_dbg(priv, tx_err, dev,
-				  "TX FIFO underrun, packet dropped\n");
-			dev->stats.tx_dropped++;
-			atomic64_inc(&priv->extra_stats.tx_underrun);
-
-			schedule_work(&priv->reset_task);
-		}
-		netif_dbg(priv, tx_err, dev, "Transmit Error\n");
-	}
-	if (events & IEVENT_BSY) {
-		dev->stats.rx_over_errors++;
-		atomic64_inc(&priv->extra_stats.rx_bsy);
-
-		netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
-			  gfar_read(&regs->rstat));
-	}
-	if (events & IEVENT_BABR) {
-		dev->stats.rx_errors++;
-		atomic64_inc(&priv->extra_stats.rx_babr);
-
-		netif_dbg(priv, rx_err, dev, "babbling RX error\n");
-	}
-	if (events & IEVENT_EBERR) {
-		atomic64_inc(&priv->extra_stats.eberr);
-		netif_dbg(priv, rx_err, dev, "bus error\n");
-	}
-	if (events & IEVENT_RXC)
-		netif_dbg(priv, rx_status, dev, "control frame\n");
-
-	if (events & IEVENT_BABT) {
-		atomic64_inc(&priv->extra_stats.tx_babt);
-		netif_dbg(priv, tx_err, dev, "babbling TX error\n");
-	}
-	return IRQ_HANDLED;
-}
-
-static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
-{
-	struct net_device *ndev = priv->ndev;
-	struct phy_device *phydev = ndev->phydev;
-	u32 val = 0;
-
-	if (!phydev->duplex)
-		return val;
-
-	if (!priv->pause_aneg_en) {
-		if (priv->tx_pause_en)
-			val |= MACCFG1_TX_FLOW;
-		if (priv->rx_pause_en)
-			val |= MACCFG1_RX_FLOW;
-	} else {
-		u16 lcl_adv, rmt_adv;
-		u8 flowctrl;
-		/* get link partner capabilities */
-		rmt_adv = 0;
-		if (phydev->pause)
-			rmt_adv = LPA_PAUSE_CAP;
-		if (phydev->asym_pause)
-			rmt_adv |= LPA_PAUSE_ASYM;
-
-		lcl_adv = 0;
-		if (phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
-
-		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
-		if (flowctrl & FLOW_CTRL_TX)
-			val |= MACCFG1_TX_FLOW;
-		if (flowctrl & FLOW_CTRL_RX)
-			val |= MACCFG1_RX_FLOW;
-	}
-
-	return val;
-}
-
-static noinline void gfar_update_link_state(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	struct net_device *ndev = priv->ndev;
-	struct phy_device *phydev = ndev->phydev;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-	int i;
-
-	if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
-		return;
-
-	if (phydev->link) {
-		u32 tempval1 = gfar_read(&regs->maccfg1);
-		u32 tempval = gfar_read(&regs->maccfg2);
-		u32 ecntrl = gfar_read(&regs->ecntrl);
-		u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
-
-		if (phydev->duplex != priv->oldduplex) {
-			if (!(phydev->duplex))
-				tempval &= ~(MACCFG2_FULL_DUPLEX);
-			else
-				tempval |= MACCFG2_FULL_DUPLEX;
-
-			priv->oldduplex = phydev->duplex;
-		}
-
-		if (phydev->speed != priv->oldspeed) {
-			switch (phydev->speed) {
-			case 1000:
-				tempval =
-				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
-
-				ecntrl &= ~(ECNTRL_R100);
-				break;
-			case 100:
-			case 10:
-				tempval =
-				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
-
-				/* Reduced mode distinguishes
-				 * between 10 and 100
-				 */
-				if (phydev->speed == SPEED_100)
-					ecntrl |= ECNTRL_R100;
-				else
-					ecntrl &= ~(ECNTRL_R100);
-				break;
-			default:
-				netif_warn(priv, link, priv->ndev,
-					   "Ack!  Speed (%d) is not 10/100/1000!\n",
-					   phydev->speed);
-				break;
-			}
-
-			priv->oldspeed = phydev->speed;
-		}
-
-		tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
-		tempval1 |= gfar_get_flowctrl_cfg(priv);
-
-		/* Turn last free buffer recording on */
-		if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) {
-			for (i = 0; i < priv->num_rx_queues; i++) {
-				u32 bdp_dma;
-
-				rx_queue = priv->rx_queue[i];
-				bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
-				gfar_write(rx_queue->rfbptr, bdp_dma);
-			}
-
-			priv->tx_actual_en = 1;
-		}
-
-		if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval))
-			priv->tx_actual_en = 0;
-
-		gfar_write(&regs->maccfg1, tempval1);
-		gfar_write(&regs->maccfg2, tempval);
-		gfar_write(&regs->ecntrl, ecntrl);
-
-		if (!priv->oldlink)
-			priv->oldlink = 1;
-
-	} else if (priv->oldlink) {
-		priv->oldlink = 0;
-		priv->oldspeed = 0;
-		priv->oldduplex = -1;
-	}
-
-	if (netif_msg_link(priv))
-		phy_print_status(phydev);
-}
-
 static const struct of_device_id gfar_match[] =
 {
 	{

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 8e42c02..f472a6d 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/freescale/gianfar.h
  *
@@ -11,11 +12,6 @@
  *
  * Copyright 2002-2009, 2011-2013 Freescale Semiconductor, Inc.
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  *  Still left to do:
  *      -Add support for module parameters
  *	-Add patch for ethtool phys id
@@ -71,8 +67,6 @@
 /* Number of bytes to align the rx bufs to */
 #define RXBUF_ALIGNMENT 64
 
-#define PHY_INIT_TIMEOUT 100000
-
 #define DRV_NAME "gfar-enet"
 extern const char gfar_driver_version[];
 
@@ -92,10 +86,6 @@
 #define GFAR_RX_MAX_RING_SIZE   256
 #define GFAR_TX_MAX_RING_SIZE   256
 
-#define GFAR_MAX_FIFO_THRESHOLD 511
-#define GFAR_MAX_FIFO_STARVE	511
-#define GFAR_MAX_FIFO_STARVE_OFF 511
-
 #define FBTHR_SHIFT        24
 #define DEFAULT_RX_LFC_THR  16
 #define DEFAULT_LFC_PTVVAL  4
@@ -113,9 +103,6 @@
 #define DEFAULT_FIFO_TX_THR 0x100
 #define DEFAULT_FIFO_TX_STARVE 0x40
 #define DEFAULT_FIFO_TX_STARVE_OFF 0x80
-#define DEFAULT_BD_STASH 1
-#define DEFAULT_STASH_LENGTH	96
-#define DEFAULT_STASH_INDEX	0
 
 /* The number of Exact Match registers */
 #define GFAR_EM_NUM	15
@@ -143,15 +130,6 @@
 #define DEFAULT_RX_COALESCE 0
 #define DEFAULT_RXCOUNT	0
 
-#define GFAR_SUPPORTED (SUPPORTED_10baseT_Half \
-		| SUPPORTED_10baseT_Full \
-		| SUPPORTED_100baseT_Half \
-		| SUPPORTED_100baseT_Full \
-		| SUPPORTED_Autoneg \
-		| SUPPORTED_MII)
-
-#define GFAR_SUPPORTED_GBIT SUPPORTED_1000baseT_Full
-
 /* TBI register addresses */
 #define MII_TBICON		0x11
 
@@ -189,8 +167,6 @@
 #define ECNTRL_REDUCED_MII_MODE	0x00000004
 #define ECNTRL_SGMII_MODE	0x00000002
 
-#define MRBLR_INIT_SETTINGS	DEFAULT_RX_BUFFER_SIZE
-
 #define MINFLR_INIT_SETTINGS	0x00000040
 
 /* Tqueue control */
@@ -270,12 +246,6 @@
 #define DEFAULT_TXIC mk_ic_value(DEFAULT_TXCOUNT, DEFAULT_TXTIME)
 #define DEFAULT_RXIC mk_ic_value(DEFAULT_RXCOUNT, DEFAULT_RXTIME)
 
-#define skip_bd(bdp, stride, base, ring_size) ({ \
-	typeof(bdp) new_bd = (bdp) + (stride); \
-	(new_bd >= (base) + (ring_size)) ? (new_bd - (ring_size)) : new_bd; })
-
-#define next_bd(bdp, base, ring_size) skip_bd(bdp, 1, base, ring_size)
-
 #define RCTRL_TS_ENABLE 	0x01000000
 #define RCTRL_PAL_MASK		0x001f0000
 #define RCTRL_LFC		0x00004000
@@ -389,11 +359,6 @@
 #define IMASK_RX_DISABLED ((~(IMASK_RX_DEFAULT)) & IMASK_DEFAULT)
 #define IMASK_TX_DISABLED ((~(IMASK_TX_DEFAULT)) & IMASK_DEFAULT)
 
-/* Fifo management */
-#define FIFO_TX_THR_MASK	0x01ff
-#define FIFO_TX_STARVE_MASK	0x01ff
-#define FIFO_TX_STARVE_OFF_MASK	0x01ff
-
 /* Attribute fields */
 
 /* This enables rx snooping for buffers and descriptors */
@@ -1330,16 +1295,9 @@
 	return bdp_dma;
 }
 
-irqreturn_t gfar_receive(int irq, void *dev_id);
 int startup_gfar(struct net_device *dev);
 void stop_gfar(struct net_device *dev);
-void reset_gfar(struct net_device *dev);
 void gfar_mac_reset(struct gfar_private *priv);
-void gfar_halt(struct gfar_private *priv);
-void gfar_start(struct gfar_private *priv);
-void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev, int enable,
-		   u32 regnum, u32 read);
-void gfar_configure_coalescing_all(struct gfar_private *priv);
 int gfar_set_features(struct net_device *dev, netdev_features_t features);
 
 extern const struct ethtool_ops gfar_ethtool_ops;
@@ -1352,13 +1310,6 @@
 #define RQFCR_PID_PORT_MASK 0xFFFF0000
 #define RQFCR_PID_MAC_MASK 0xFF000000
 
-struct gfar_mask_entry {
-	unsigned int mask; /* The mask value which is valid form start to end */
-	unsigned int start;
-	unsigned int end;
-	unsigned int block; /* Same block values indicate depended entries */
-};
-
 /* Represents a receive filer table entry */
 struct gfar_filer_entry {
 	u32 ctrl;

diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 395a526..3c8e4e2 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  drivers/net/ethernet/freescale/gianfar_ethtool.c
  *
@@ -10,10 +11,6 @@
  *  Modifier: Sandeep Gopalpet <sandeep.kumar@freescale.com>
  *
  *  Copyright 2003-2006, 2008-2009, 2011 Freescale Semiconductor, Inc.
- *
- *  This software may be used and distributed according to
- *  the terms of the GNU Public License, Version 2, incorporated herein
- *  by reference.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -48,19 +45,6 @@
 
 #define GFAR_MAX_COAL_USECS 0xffff
 #define GFAR_MAX_COAL_FRAMES 0xff
-static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
-			    u64 *buf);
-static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf);
-static int gfar_gcoalesce(struct net_device *dev,
-			  struct ethtool_coalesce *cvals);
-static int gfar_scoalesce(struct net_device *dev,
-			  struct ethtool_coalesce *cvals);
-static void gfar_gringparam(struct net_device *dev,
-			    struct ethtool_ringparam *rvals);
-static int gfar_sringparam(struct net_device *dev,
-			   struct ethtool_ringparam *rvals);
-static void gfar_gdrvinfo(struct net_device *dev,
-			  struct ethtool_drvinfo *drvinfo);
 
 static const char stat_gstrings[][ETH_GSTRING_LEN] = {
 	/* extra stats */
@@ -230,7 +214,7 @@
 
 	/* Make sure we return a number greater than 0
 	 * if usecs > 0 */
-	return (usecs * 1000 + count - 1) / count;
+	return DIV_ROUND_UP(usecs * 1000, count);
 }
 
 /* Convert ethernet clock ticks to microseconds */
@@ -503,65 +487,44 @@
 	struct gfar_private *priv = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 oldadv, newadv;
 
 	if (!phydev)
 		return -ENODEV;
 
-	if (!(phydev->supported & SUPPORTED_Pause) ||
-	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-	     (epause->rx_pause != epause->tx_pause)))
+	if (!phy_validate_pause(phydev, epause))
 		return -EINVAL;
 
 	priv->rx_pause_en = priv->tx_pause_en = 0;
+	phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 	if (epause->rx_pause) {
 		priv->rx_pause_en = 1;
 
 		if (epause->tx_pause) {
 			priv->tx_pause_en = 1;
-			/* FLOW_CTRL_RX & TX */
-			newadv = ADVERTISED_Pause;
-		} else  /* FLOW_CTLR_RX */
-			newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+		}
 	} else if (epause->tx_pause) {
 		priv->tx_pause_en = 1;
-		/* FLOW_CTLR_TX */
-		newadv = ADVERTISED_Asym_Pause;
-	} else
-		newadv = 0;
+	}
 
 	if (epause->autoneg)
 		priv->pause_aneg_en = 1;
 	else
 		priv->pause_aneg_en = 0;
 
-	oldadv = phydev->advertising &
-		(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	if (oldadv != newadv) {
-		phydev->advertising &=
-			~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-		phydev->advertising |= newadv;
-		if (phydev->autoneg)
-			/* inform link partner of our
-			 * new flow ctrl settings
-			 */
-			return phy_start_aneg(phydev);
+	if (!epause->autoneg) {
+		u32 tempval = gfar_read(&regs->maccfg1);
 
-		if (!epause->autoneg) {
-			u32 tempval;
-			tempval = gfar_read(&regs->maccfg1);
-			tempval &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
+		tempval &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
 
-			priv->tx_actual_en = 0;
-			if (priv->tx_pause_en) {
-				priv->tx_actual_en = 1;
-				tempval |= MACCFG1_TX_FLOW;
-			}
-
-			if (priv->rx_pause_en)
-				tempval |= MACCFG1_RX_FLOW;
-			gfar_write(&regs->maccfg1, tempval);
+		priv->tx_actual_en = 0;
+		if (priv->tx_pause_en) {
+			priv->tx_actual_en = 1;
+			tempval |= MACCFG1_TX_FLOW;
 		}
+
+		if (priv->rx_pause_en)
+			tempval |= MACCFG1_RX_FLOW;
+		gfar_write(&regs->maccfg1, tempval);
 	}
 
 	return 0;
@@ -1155,11 +1118,9 @@
 		prio = vlan_tci_prio(rule);
 		prio_mask = vlan_tci_priom(rule);
 
-		if (cfi == VLAN_TAG_PRESENT && cfi_mask == VLAN_TAG_PRESENT) {
-			vlan |= RQFPR_CFI;
-			vlan_mask |= RQFPR_CFI;
-		} else if (cfi != VLAN_TAG_PRESENT &&
-			   cfi_mask == VLAN_TAG_PRESENT) {
+		if (cfi_mask) {
+			if (cfi)
+				vlan |= RQFPR_CFI;
 			vlan_mask |= RQFPR_CFI;
 		}
 	}
@@ -1515,7 +1476,7 @@
 	struct gfar_private *priv = netdev_priv(dev);
 	struct platform_device *ptp_dev;
 	struct device_node *ptp_node;
-	struct qoriq_ptp *ptp = NULL;
+	struct ptp_qoriq *ptp = NULL;
 
 	info->phc_index = -1;
 

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 22a817d..f839fa9 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2006-2009 Freescale Semicondutor, Inc. All rights reserved.
  *
@@ -6,11 +7,6 @@
  *
  * Description:
  * QE UCC Gigabit Ethernet Driver
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -30,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/workqueue.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -1742,17 +1739,7 @@
 	if (priv->phy_interface == PHY_INTERFACE_MODE_SGMII)
 		uec_configure_serdes(dev);
 
-	phydev->supported &= (SUPPORTED_MII |
-			      SUPPORTED_Autoneg |
-			      ADVERTISED_10baseT_Half |
-			      ADVERTISED_10baseT_Full |
-			      ADVERTISED_100baseT_Half |
-			      ADVERTISED_100baseT_Full);
-
-	if (priv->max_speed == SPEED_1000)
-		phydev->supported |= ADVERTISED_1000baseT_Full;
-
-	phydev->advertising = phydev->supported;
+	phy_set_max_speed(phydev, priv->max_speed);
 
 	priv->phydev = phydev;
 
@@ -1888,6 +1875,8 @@
 	u16 i, j;
 	u8 __iomem *bd;
 
+	netdev_reset_queue(ugeth->ndev);
+
 	ug_info = ugeth->ug_info;
 	uf_info = &ug_info->uf_info;
 
@@ -3083,7 +3072,8 @@
 
 /* This is called by the kernel when a frame is ready for transmission. */
 /* It is pointed to by the dev->hard_start_xmit function pointer */
-static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ucc_geth_private *ugeth = netdev_priv(dev);
 #ifdef CONFIG_UGETH_TX_ON_DEMAND
@@ -3685,6 +3675,7 @@
 	.ndo_stop		= ucc_geth_close,
 	.ndo_start_xmit		= ucc_geth_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_carrier     = fixed_phy_change_carrier,
 	.ndo_set_mac_address	= ucc_geth_set_mac_addr,
 	.ndo_set_rx_mode	= ucc_geth_set_multi,
 	.ndo_tx_timeout		= ucc_geth_timeout,
@@ -3915,8 +3906,8 @@
 	}
 
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
-		memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(dev->dev_addr, mac_addr);
 
 	ugeth->ug_info = ug_info;
 	ugeth->dev = device;

diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 5da19b4..a86a421 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) Freescale Semicondutor, Inc. 2006-2009. All rights reserved.
  *
@@ -9,11 +10,6 @@
  * Changelog:
  * Jun 28, 2006 Li Yang <LeoLi@freescale.com>
  * - Rearrange code and style fixes
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #ifndef __UCC_GETH_H__
 #define __UCC_GETH_H__

diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 0beee2c..dfebacf 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2007 Freescale Semiconductor, Inc. All rights reserved.
  *
@@ -8,11 +9,6 @@
  * Limitation:
  * Can only get/set settings of the first queue.
  * Need to re-open the interface manually after changing some parameters.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -252,14 +248,12 @@
 		return -EINVAL;
 	}
 
+	if (netif_running(netdev))
+		return -EBUSY;
+
 	ug_info->bdRingLenRx[queue] = ring->rx_pending;
 	ug_info->bdRingLenTx[queue] = ring->tx_pending;
 
-	if (netif_running(netdev)) {
-		/* FIXME: restart automatically */
-		netdev_info(netdev, "Please re-open the interface\n");
-	}
-
 	return ret;
 }
 

diff --git a/drivers/net/ethernet/fujitsu/Kconfig b/drivers/net/ethernet/fujitsu/Kconfig
index faee34e..cee99f2 100644
--- a/drivers/net/ethernet/fujitsu/Kconfig
+++ b/drivers/net/ethernet/fujitsu/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Fujitsu Network device configuration
 #

diff --git a/drivers/net/ethernet/fujitsu/Makefile b/drivers/net/ethernet/fujitsu/Makefile
index 21561fd..74feebb 100644
--- a/drivers/net/ethernet/fujitsu/Makefile
+++ b/drivers/net/ethernet/fujitsu/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Fujitsu network device drivers.
 #

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a69cd19..1eca0fd 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c

@@ -547,6 +547,11 @@
 	return -1;
 
     base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
+    if (!base) {
+	    pcmcia_release_window(link, link->resource[2]);
+	    return -ENOMEM;
+    }
+
     pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*

diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
new file mode 100644
index 0000000..b8f04d0
--- /dev/null
+++ b/drivers/net/ethernet/google/Kconfig

@@ -0,0 +1,27 @@
+#
+# Google network device configuration
+#
+
+config NET_VENDOR_GOOGLE
+	bool "Google Devices"
+	default y
+	help
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Google devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_GOOGLE
+
+config GVE
+	tristate "Google Virtual NIC (gVNIC) support"
+	depends on PCI_MSI
+	help
+	  This driver supports Google Virtual NIC (gVNIC)"
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called gve.
+
+endif #NET_VENDOR_GOOGLE

diff --git a/drivers/net/ethernet/google/Makefile b/drivers/net/ethernet/google/Makefile
new file mode 100644
index 0000000..402cc3b
--- /dev/null
+++ b/drivers/net/ethernet/google/Makefile

@@ -0,0 +1,5 @@
+#
+# Makefile for the Google network device drivers.
+#
+
+obj-$(CONFIG_GVE) += gve/

diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
new file mode 100644
index 0000000..3354ce4
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/Makefile

@@ -0,0 +1,4 @@
+# Makefile for the Google virtual Ethernet (gve) driver
+
+obj-$(CONFIG_GVE) += gve.o
+gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
new file mode 100644
index 0000000..ebc37e2
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve.h

@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_H_
+#define _GVE_H_
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/u64_stats_sync.h>
+#include "gve_desc.h"
+
+#ifndef PCI_VENDOR_ID_GOOGLE
+#define PCI_VENDOR_ID_GOOGLE	0x1ae0
+#endif
+
+#define PCI_DEV_ID_GVNIC	0x0042
+
+#define GVE_REGISTER_BAR	0
+#define GVE_DOORBELL_BAR	2
+
+/* Driver can alloc up to 2 segments for the header and 2 for the payload. */
+#define GVE_TX_MAX_IOVEC	4
+/* 1 for management, 1 for rx, 1 for tx */
+#define GVE_MIN_MSIX 3
+
+/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
+struct gve_rx_desc_queue {
+	struct gve_rx_desc *desc_ring; /* the descriptor ring */
+	dma_addr_t bus; /* the bus for the desc_ring */
+	u8 seqno; /* the next expected seqno for this desc*/
+};
+
+/* The page info for a single slot in the RX data queue */
+struct gve_rx_slot_page_info {
+	struct page *page;
+	void *page_address;
+	u32 page_offset; /* offset to write to in page */
+};
+
+/* A list of pages registered with the device during setup and used by a queue
+ * as buffers
+ */
+struct gve_queue_page_list {
+	u32 id; /* unique id */
+	u32 num_entries;
+	struct page **pages; /* list of num_entries pages */
+	dma_addr_t *page_buses; /* the dma addrs of the pages */
+};
+
+/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */
+struct gve_rx_data_queue {
+	struct gve_rx_data_slot *data_ring; /* read by NIC */
+	dma_addr_t data_bus; /* dma mapping of the slots */
+	struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
+	struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+};
+
+struct gve_priv;
+
+/* An RX ring that contains a power-of-two sized desc and data ring. */
+struct gve_rx_ring {
+	struct gve_priv *gve;
+	struct gve_rx_desc_queue desc;
+	struct gve_rx_data_queue data;
+	u64 rbytes; /* free-running bytes received */
+	u64 rpackets; /* free-running packets received */
+	u32 cnt; /* free-running total number of completed packets */
+	u32 fill_cnt; /* free-running total number of descs and buffs posted */
+	u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+	u32 q_num; /* queue index */
+	u32 ntfy_id; /* notification block index */
+	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+	dma_addr_t q_resources_bus; /* dma address for the queue resources */
+	struct u64_stats_sync statss; /* sync stats for 32bit archs */
+};
+
+/* A TX desc ring entry */
+union gve_tx_desc {
+	struct gve_tx_pkt_desc pkt; /* first desc for a packet */
+	struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
+};
+
+/* Tracks the memory in the fifo occupied by a segment of a packet */
+struct gve_tx_iovec {
+	u32 iov_offset; /* offset into this segment */
+	u32 iov_len; /* length */
+	u32 iov_padding; /* padding associated with this segment */
+};
+
+/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
+ * ring entry but only used for a pkt_desc not a seg_desc
+ */
+struct gve_tx_buffer_state {
+	struct sk_buff *skb; /* skb for this pkt */
+	struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+};
+
+/* A TX buffer - each queue has one */
+struct gve_tx_fifo {
+	void *base; /* address of base of FIFO */
+	u32 size; /* total size */
+	atomic_t available; /* how much space is still available */
+	u32 head; /* offset to write at */
+	struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */
+};
+
+/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */
+struct gve_tx_ring {
+	/* Cacheline 0 -- Accessed & dirtied during transmit */
+	struct gve_tx_fifo tx_fifo;
+	u32 req; /* driver tracked head pointer */
+	u32 done; /* driver tracked tail pointer */
+
+	/* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */
+	__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
+	u64 pkt_done; /* free-running - total packets completed */
+	u64 bytes_done; /* free-running - total bytes completed */
+
+	/* Cacheline 2 -- Read-mostly fields */
+	union gve_tx_desc *desc ____cacheline_aligned;
+	struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
+	struct netdev_queue *netdev_txq;
+	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+	u32 mask; /* masks req and done down to queue size */
+
+	/* Slow-path fields */
+	u32 q_num ____cacheline_aligned; /* queue idx */
+	u32 stop_queue; /* count of queue stops */
+	u32 wake_queue; /* count of queue wakes */
+	u32 ntfy_id; /* notification block index */
+	dma_addr_t bus; /* dma address of the descr ring */
+	dma_addr_t q_resources_bus; /* dma address of the queue resources */
+	struct u64_stats_sync statss; /* sync stats for 32bit archs */
+} ____cacheline_aligned;
+
+/* Wraps the info for one irq including the napi struct and the queues
+ * associated with that irq.
+ */
+struct gve_notify_block {
+	__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+	char name[IFNAMSIZ + 16]; /* name registered with the kernel */
+	struct napi_struct napi; /* kernel napi struct for this block */
+	struct gve_priv *priv;
+	struct gve_tx_ring *tx; /* tx rings on this block */
+	struct gve_rx_ring *rx; /* rx rings on this block */
+} ____cacheline_aligned;
+
+/* Tracks allowed and current queue settings */
+struct gve_queue_config {
+	u16 max_queues;
+	u16 num_queues; /* current */
+};
+
+/* Tracks the available and used qpl IDs */
+struct gve_qpl_config {
+	u32 qpl_map_size; /* map memory size */
+	unsigned long *qpl_id_map; /* bitmap of used qpl ids */
+};
+
+struct gve_priv {
+	struct net_device *dev;
+	struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
+	struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
+	struct gve_queue_page_list *qpls; /* array of num qpls */
+	struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
+	dma_addr_t ntfy_block_bus;
+	struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
+	char mgmt_msix_name[IFNAMSIZ + 16];
+	u32 mgmt_msix_idx;
+	__be32 *counter_array; /* array of num_event_counters */
+	dma_addr_t counter_array_bus;
+
+	u16 num_event_counters;
+	u16 tx_desc_cnt; /* num desc per ring */
+	u16 rx_desc_cnt; /* num desc per ring */
+	u16 tx_pages_per_qpl; /* tx buffer length */
+	u16 rx_pages_per_qpl; /* rx buffer length */
+	u64 max_registered_pages;
+	u64 num_registered_pages; /* num pages registered with NIC */
+	u32 rx_copybreak; /* copy packets smaller than this */
+	u16 default_num_queues; /* default num queues to set up */
+
+	struct gve_queue_config tx_cfg;
+	struct gve_queue_config rx_cfg;
+	struct gve_qpl_config qpl_cfg; /* map used QPL ids */
+	u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
+
+	struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
+	__be32 __iomem *db_bar2; /* "array" of doorbells */
+	u32 msg_enable;	/* level for netif* netdev print macros	*/
+	struct pci_dev *pdev;
+
+	/* metrics */
+	u32 tx_timeo_cnt;
+
+	/* Admin queue - see gve_adminq.h*/
+	union gve_adminq_command *adminq;
+	dma_addr_t adminq_bus_addr;
+	u32 adminq_mask; /* masks prod_cnt to adminq size */
+	u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
+
+	struct workqueue_struct *gve_wq;
+	struct work_struct service_task;
+	unsigned long service_task_flags;
+	unsigned long state_flags;
+};
+
+enum gve_service_task_flags {
+	GVE_PRIV_FLAGS_DO_RESET			= BIT(1),
+	GVE_PRIV_FLAGS_RESET_IN_PROGRESS	= BIT(2),
+	GVE_PRIV_FLAGS_PROBE_IN_PROGRESS	= BIT(3),
+};
+
+enum gve_state_flags {
+	GVE_PRIV_FLAGS_ADMIN_QUEUE_OK		= BIT(1),
+	GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK	= BIT(2),
+	GVE_PRIV_FLAGS_DEVICE_RINGS_OK		= BIT(3),
+	GVE_PRIV_FLAGS_NAPI_ENABLED		= BIT(4),
+};
+
+static inline bool gve_get_do_reset(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_set_do_reset(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_reset(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline bool gve_get_reset_in_progress(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_reset_in_progress(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_reset_in_progress(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_probe_in_progress(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS,
+			&priv->service_task_flags);
+}
+
+static inline void gve_set_probe_in_progress(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_probe_in_progress(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_set_admin_queue_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_admin_queue_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_resources_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_resources_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_resources_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_rings_ok(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_rings_ok(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_rings_ok(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_napi_enabled(struct gve_priv *priv)
+{
+	return test_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_set_napi_enabled(struct gve_priv *priv)
+{
+	set_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_clear_napi_enabled(struct gve_priv *priv)
+{
+	clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+/* Returns the address of the ntfy_blocks irq doorbell
+ */
+static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
+					       struct gve_notify_block *block)
+{
+	return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+}
+
+/* Returns the index into ntfy_blocks of the given tx ring's block
+ */
+static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+	return queue_idx;
+}
+
+/* Returns the index into ntfy_blocks of the given rx ring's block
+ */
+static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+	return (priv->num_ntfy_blks / 2) + queue_idx;
+}
+
+/* Returns the number of tx queue page lists
+ */
+static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
+{
+	return priv->tx_cfg.num_queues;
+}
+
+/* Returns the number of rx queue page lists
+ */
+static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+{
+	return priv->rx_cfg.num_queues;
+}
+
+/* Returns a pointer to the next available tx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+{
+	int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
+				     priv->qpl_cfg.qpl_map_size);
+
+	/* we are out of tx qpls */
+	if (id >= gve_num_tx_qpls(priv))
+		return NULL;
+
+	set_bit(id, priv->qpl_cfg.qpl_id_map);
+	return &priv->qpls[id];
+}
+
+/* Returns a pointer to the next available rx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+{
+	int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
+				    priv->qpl_cfg.qpl_map_size,
+				    gve_num_tx_qpls(priv));
+
+	/* we are out of rx qpls */
+	if (id == priv->qpl_cfg.qpl_map_size)
+		return NULL;
+
+	set_bit(id, priv->qpl_cfg.qpl_id_map);
+	return &priv->qpls[id];
+}
+
+/* Unassigns the qpl with the given id
+ */
+static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+{
+	clear_bit(id, priv->qpl_cfg.qpl_id_map);
+}
+
+/* Returns the correct dma direction for tx and rx qpls
+ */
+static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
+						      int id)
+{
+	if (id < gve_num_tx_qpls(priv))
+		return DMA_TO_DEVICE;
+	else
+		return DMA_FROM_DEVICE;
+}
+
+/* Returns true if the max mtu allows page recycling */
+static inline bool gve_can_recycle_pages(struct net_device *dev)
+{
+	/* We can't recycle the pages if we can't fit a packet into half a
+	 * page.
+	 */
+	return dev->max_mtu <= PAGE_SIZE / 2;
+}
+
+/* buffers */
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+		   enum dma_data_direction);
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+		   enum dma_data_direction);
+/* tx handling */
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings(struct gve_priv *priv);
+void gve_tx_free_rings(struct gve_priv *priv);
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+				 struct gve_tx_ring *tx);
+/* rx handling */
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
+bool gve_rx_poll(struct gve_notify_block *block, int budget);
+int gve_rx_alloc_rings(struct gve_priv *priv);
+void gve_rx_free_rings(struct gve_priv *priv);
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+		       netdev_features_t feat);
+/* Reset */
+void gve_schedule_reset(struct gve_priv *priv);
+int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+int gve_adjust_queues(struct gve_priv *priv,
+		      struct gve_queue_config new_rx_config,
+		      struct gve_queue_config new_tx_config);
+/* exported by ethtool.c */
+extern const struct ethtool_ops gve_ethtool_ops;
+/* needed by ethtool */
+extern const char gve_version_str[];
+#endif /* _GVE_H_ */

diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
new file mode 100644
index 0000000..c3ba7ba
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c

@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_MAX_ADMINQ_RELEASE_CHECK	500
+#define GVE_ADMINQ_SLEEP_LEN		20
+#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK	100
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
+{
+	priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE,
+					  &priv->adminq_bus_addr, GFP_KERNEL);
+	if (unlikely(!priv->adminq))
+		return -ENOMEM;
+
+	priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
+	priv->adminq_prod_cnt = 0;
+
+	/* Setup Admin queue with the device */
+	iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
+		    &priv->reg_bar0->adminq_pfn);
+
+	gve_set_admin_queue_ok(priv);
+	return 0;
+}
+
+void gve_adminq_release(struct gve_priv *priv)
+{
+	int i = 0;
+
+	/* Tell the device the adminq is leaving */
+	iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
+	while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
+		/* If this is reached the device is unrecoverable and still
+		 * holding memory. Continue looping to avoid memory corruption,
+		 * but WARN so it is visible what is going on.
+		 */
+		if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
+			WARN(1, "Unrecoverable platform error!");
+		i++;
+		msleep(GVE_ADMINQ_SLEEP_LEN);
+	}
+	gve_clear_device_rings_ok(priv);
+	gve_clear_device_resources_ok(priv);
+	gve_clear_admin_queue_ok(priv);
+}
+
+void gve_adminq_free(struct device *dev, struct gve_priv *priv)
+{
+	if (!gve_get_admin_queue_ok(priv))
+		return;
+	gve_adminq_release(priv);
+	dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr);
+	gve_clear_admin_queue_ok(priv);
+}
+
+static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+	iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
+}
+
+static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+	int i;
+
+	for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
+		if (ioread32be(&priv->reg_bar0->adminq_event_counter)
+		    == prod_cnt)
+			return true;
+		msleep(GVE_ADMINQ_SLEEP_LEN);
+	}
+
+	return false;
+}
+
+static int gve_adminq_parse_err(struct device *dev, u32 status)
+{
+	if (status != GVE_ADMINQ_COMMAND_PASSED &&
+	    status != GVE_ADMINQ_COMMAND_UNSET)
+		dev_err(dev, "AQ command failed with status %d\n", status);
+
+	switch (status) {
+	case GVE_ADMINQ_COMMAND_PASSED:
+		return 0;
+	case GVE_ADMINQ_COMMAND_UNSET:
+		dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+		return -EINVAL;
+	case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
+	case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
+	case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
+	case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
+	case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
+		return -EAGAIN;
+	case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
+	case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
+	case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
+	case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
+	case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
+	case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
+		return -EINVAL;
+	case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
+		return -ETIME;
+	case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
+	case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
+		return -EACCES;
+	case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
+		return -ENOMEM;
+	case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
+		return -ENOTSUPP;
+	default:
+		dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+		return -EINVAL;
+	}
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ */
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+			   union gve_adminq_command *cmd_orig)
+{
+	union gve_adminq_command *cmd;
+	u32 status = 0;
+	u32 prod_cnt;
+
+	cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
+	priv->adminq_prod_cnt++;
+	prod_cnt = priv->adminq_prod_cnt;
+
+	memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+
+	gve_adminq_kick_cmd(priv, prod_cnt);
+	if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
+		dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
+		return -ENOTRECOVERABLE;
+	}
+
+	memcpy(cmd_orig, cmd, sizeof(*cmd));
+	status = be32_to_cpu(READ_ONCE(cmd->status));
+	return gve_adminq_parse_err(&priv->pdev->dev, status);
+}
+
+/* The device specifies that the management vector can either be the first irq
+ * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
+ * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
+ * the management vector is first.
+ *
+ * gve arranges the msix vectors so that the management vector is last.
+ */
+#define GVE_NTFY_BLK_BASE_MSIX_IDX	0
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+					  dma_addr_t counter_array_bus_addr,
+					  u32 num_counters,
+					  dma_addr_t db_array_bus_addr,
+					  u32 num_ntfy_blks)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
+	cmd.configure_device_resources =
+		(struct gve_adminq_configure_device_resources) {
+		.counter_array = cpu_to_be64(counter_array_bus_addr),
+		.num_counters = cpu_to_be32(num_counters),
+		.irq_db_addr = cpu_to_be64(db_array_bus_addr),
+		.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
+		.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
+		.ntfy_blk_msix_base_idx =
+					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	struct gve_tx_ring *tx = &priv->tx[queue_index];
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+		.reserved = 0,
+		.queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
+		.tx_ring_addr = cpu_to_be64(tx->bus),
+		.queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
+		.ntfy_id = cpu_to_be32(tx->ntfy_id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	struct gve_rx_ring *rx = &priv->rx[queue_index];
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+		.index = cpu_to_be32(queue_index),
+		.reserved = 0,
+		.ntfy_id = cpu_to_be32(rx->ntfy_id),
+		.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+		.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
+		.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
+		.queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+	cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+	cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+		.queue_id = cpu_to_be32(queue_index),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_describe_device(struct gve_priv *priv)
+{
+	struct gve_device_descriptor *descriptor;
+	union gve_adminq_command cmd;
+	dma_addr_t descriptor_bus;
+	int err = 0;
+	u8 *mac;
+	u16 mtu;
+
+	memset(&cmd, 0, sizeof(cmd));
+	descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
+					&descriptor_bus, GFP_KERNEL);
+	if (!descriptor)
+		return -ENOMEM;
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
+	cmd.describe_device.device_descriptor_addr =
+						cpu_to_be64(descriptor_bus);
+	cmd.describe_device.device_descriptor_version =
+			cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
+	cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE);
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	if (err)
+		goto free_device_descriptor;
+
+	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
+		netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
+			  priv->tx_desc_cnt);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+	if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
+	    < PAGE_SIZE ||
+	    priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
+	    < PAGE_SIZE) {
+		netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
+			  priv->rx_desc_cnt);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->max_registered_pages =
+				be64_to_cpu(descriptor->max_registered_pages);
+	mtu = be16_to_cpu(descriptor->mtu);
+	if (mtu < ETH_MIN_MTU) {
+		netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
+			  mtu);
+		err = -EINVAL;
+		goto free_device_descriptor;
+	}
+	priv->dev->max_mtu = mtu;
+	priv->num_event_counters = be16_to_cpu(descriptor->counters);
+	ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
+	mac = descriptor->mac;
+	netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
+	priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
+	if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
+		netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+			  priv->rx_pages_per_qpl);
+		priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+	}
+	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+
+free_device_descriptor:
+	dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+			  descriptor_bus);
+	return err;
+}
+
+int gve_adminq_register_page_list(struct gve_priv *priv,
+				  struct gve_queue_page_list *qpl)
+{
+	struct device *hdev = &priv->pdev->dev;
+	u32 num_entries = qpl->num_entries;
+	u32 size = num_entries * sizeof(qpl->page_buses[0]);
+	union gve_adminq_command cmd;
+	dma_addr_t page_list_bus;
+	__be64 *page_list;
+	int err;
+	int i;
+
+	memset(&cmd, 0, sizeof(cmd));
+	page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL);
+	if (!page_list)
+		return -ENOMEM;
+
+	for (i = 0; i < num_entries; i++)
+		page_list[i] = cpu_to_be64(qpl->page_buses[i]);
+
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
+	cmd.reg_page_list = (struct gve_adminq_register_page_list) {
+		.page_list_id = cpu_to_be32(qpl->id),
+		.num_pages = cpu_to_be32(num_entries),
+		.page_address_list_addr = cpu_to_be64(page_list_bus),
+	};
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	dma_free_coherent(hdev, size, page_list, page_list_bus);
+	return err;
+}
+
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
+	cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
+		.page_list_id = cpu_to_be32(page_list_id),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
+{
+	union gve_adminq_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
+	cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
+		.parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
+		.parameter_value = cpu_to_be64(mtu),
+	};
+
+	return gve_adminq_execute_cmd(priv, &cmd);
+}

diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
new file mode 100644
index 0000000..4dfa06e
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h

@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_ADMINQ_H
+#define _GVE_ADMINQ_H
+
+#include <linux/build_bug.h>
+
+/* Admin queue opcodes */
+enum gve_adminq_opcodes {
+	GVE_ADMINQ_DESCRIBE_DEVICE		= 0x1,
+	GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES	= 0x2,
+	GVE_ADMINQ_REGISTER_PAGE_LIST		= 0x3,
+	GVE_ADMINQ_UNREGISTER_PAGE_LIST		= 0x4,
+	GVE_ADMINQ_CREATE_TX_QUEUE		= 0x5,
+	GVE_ADMINQ_CREATE_RX_QUEUE		= 0x6,
+	GVE_ADMINQ_DESTROY_TX_QUEUE		= 0x7,
+	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
+	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
+	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
+};
+
+/* Admin queue status codes */
+enum gve_adminq_statuses {
+	GVE_ADMINQ_COMMAND_UNSET			= 0x0,
+	GVE_ADMINQ_COMMAND_PASSED			= 0x1,
+	GVE_ADMINQ_COMMAND_ERROR_ABORTED		= 0xFFFFFFF0,
+	GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS		= 0xFFFFFFF1,
+	GVE_ADMINQ_COMMAND_ERROR_CANCELLED		= 0xFFFFFFF2,
+	GVE_ADMINQ_COMMAND_ERROR_DATALOSS		= 0xFFFFFFF3,
+	GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED	= 0xFFFFFFF4,
+	GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION	= 0xFFFFFFF5,
+	GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR		= 0xFFFFFFF6,
+	GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT	= 0xFFFFFFF7,
+	GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND		= 0xFFFFFFF8,
+	GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE		= 0xFFFFFFF9,
+	GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED	= 0xFFFFFFFA,
+	GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED	= 0xFFFFFFFB,
+	GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED	= 0xFFFFFFFC,
+	GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE		= 0xFFFFFFFD,
+	GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED		= 0xFFFFFFFE,
+	GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR		= 0xFFFFFFFF,
+};
+
+#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
+
+/* All AdminQ command structs should be naturally packed. The static_assert
+ * calls make sure this is the case at compile time.
+ */
+
+struct gve_adminq_describe_device {
+	__be64 device_descriptor_addr;
+	__be32 device_descriptor_version;
+	__be32 available_length;
+};
+
+static_assert(sizeof(struct gve_adminq_describe_device) == 16);
+
+struct gve_device_descriptor {
+	__be64 max_registered_pages;
+	__be16 reserved1;
+	__be16 tx_queue_entries;
+	__be16 rx_queue_entries;
+	__be16 default_num_queues;
+	__be16 mtu;
+	__be16 counters;
+	__be16 tx_pages_per_qpl;
+	__be16 rx_pages_per_qpl;
+	u8  mac[ETH_ALEN];
+	__be16 num_device_options;
+	__be16 total_length;
+	u8  reserved2[6];
+};
+
+static_assert(sizeof(struct gve_device_descriptor) == 40);
+
+struct device_option {
+	__be32 option_id;
+	__be32 option_length;
+};
+
+static_assert(sizeof(struct device_option) == 8);
+
+struct gve_adminq_configure_device_resources {
+	__be64 counter_array;
+	__be64 irq_db_addr;
+	__be32 num_counters;
+	__be32 num_irq_dbs;
+	__be32 irq_db_stride;
+	__be32 ntfy_blk_msix_base_idx;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32);
+
+struct gve_adminq_register_page_list {
+	__be32 page_list_id;
+	__be32 num_pages;
+	__be64 page_address_list_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_register_page_list) == 16);
+
+struct gve_adminq_unregister_page_list {
+	__be32 page_list_id;
+};
+
+static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
+
+struct gve_adminq_create_tx_queue {
+	__be32 queue_id;
+	__be32 reserved;
+	__be64 queue_resources_addr;
+	__be64 tx_ring_addr;
+	__be32 queue_page_list_id;
+	__be32 ntfy_id;
+};
+
+static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32);
+
+struct gve_adminq_create_rx_queue {
+	__be32 queue_id;
+	__be32 index;
+	__be32 reserved;
+	__be32 ntfy_id;
+	__be64 queue_resources_addr;
+	__be64 rx_desc_ring_addr;
+	__be64 rx_data_ring_addr;
+	__be32 queue_page_list_id;
+	u8 padding[4];
+};
+
+static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48);
+
+/* Queue resources that are shared with the device */
+struct gve_queue_resources {
+	union {
+		struct {
+			__be32 db_index;	/* Device -> Guest */
+			__be32 counter_index;	/* Device -> Guest */
+		};
+		u8 reserved[64];
+	};
+};
+
+static_assert(sizeof(struct gve_queue_resources) == 64);
+
+struct gve_adminq_destroy_tx_queue {
+	__be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4);
+
+struct gve_adminq_destroy_rx_queue {
+	__be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4);
+
+/* GVE Set Driver Parameter Types */
+enum gve_set_driver_param_types {
+	GVE_SET_PARAM_MTU	= 0x1,
+};
+
+struct gve_adminq_set_driver_parameter {
+	__be32 parameter_type;
+	u8 reserved[4];
+	__be64 parameter_value;
+};
+
+static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
+
+union gve_adminq_command {
+	struct {
+		__be32 opcode;
+		__be32 status;
+		union {
+			struct gve_adminq_configure_device_resources
+						configure_device_resources;
+			struct gve_adminq_create_tx_queue create_tx_queue;
+			struct gve_adminq_create_rx_queue create_rx_queue;
+			struct gve_adminq_destroy_tx_queue destroy_tx_queue;
+			struct gve_adminq_destroy_rx_queue destroy_rx_queue;
+			struct gve_adminq_describe_device describe_device;
+			struct gve_adminq_register_page_list reg_page_list;
+			struct gve_adminq_unregister_page_list unreg_page_list;
+			struct gve_adminq_set_driver_parameter set_driver_param;
+		};
+	};
+	u8 reserved[64];
+};
+
+static_assert(sizeof(union gve_adminq_command) == 64);
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
+void gve_adminq_free(struct device *dev, struct gve_priv *priv);
+void gve_adminq_release(struct gve_priv *priv);
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+			   union gve_adminq_command *cmd_orig);
+int gve_adminq_describe_device(struct gve_priv *priv);
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+					  dma_addr_t counter_array_bus_addr,
+					  u32 num_counters,
+					  dma_addr_t db_array_bus_addr,
+					  u32 num_ntfy_blks);
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_register_page_list(struct gve_priv *priv,
+				  struct gve_queue_page_list *qpl);
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+#endif /* _GVE_ADMINQ_H */

diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
new file mode 100644
index 0000000..5477987
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_desc.h

@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+/* GVE Transmit Descriptor formats */
+
+#ifndef _GVE_DESC_H_
+#define _GVE_DESC_H_
+
+#include <linux/build_bug.h>
+
+/* A note on seg_addrs
+ *
+ * Base addresses encoded in seg_addr are not assumed to be physical
+ * addresses. The ring format assumes these come from some linear address
+ * space. This could be physical memory, kernel virtual memory, user virtual
+ * memory. gVNIC uses lists of registered pages. Each queue is assumed
+ * to be associated with a single such linear address space to ensure a
+ * consistent meaning for seg_addrs posted to its rings.
+ */
+
+struct gve_tx_pkt_desc {
+	u8	type_flags;  /* desc type is lower 4 bits, flags upper */
+	u8	l4_csum_offset;  /* relative offset of L4 csum word */
+	u8	l4_hdr_offset;  /* Offset of start of L4 headers in packet */
+	u8	desc_cnt;  /* Total descriptors for this packet */
+	__be16	len;  /* Total length of this packet (in bytes) */
+	__be16	seg_len;  /* Length of this descriptor's segment */
+	__be64	seg_addr;  /* Base address (see note) of this segment */
+} __packed;
+
+struct gve_tx_seg_desc {
+	u8	type_flags;	/* type is lower 4 bits, flags upper	*/
+	u8	l3_offset;	/* TSO: 2 byte units to start of IPH	*/
+	__be16	reserved;
+	__be16	mss;		/* TSO MSS				*/
+	__be16	seg_len;
+	__be64	seg_addr;
+} __packed;
+
+/* GVE Transmit Descriptor Types */
+#define	GVE_TXD_STD		(0x0 << 4) /* Std with Host Address	*/
+#define	GVE_TXD_TSO		(0x1 << 4) /* TSO with Host Address	*/
+#define	GVE_TXD_SEG		(0x2 << 4) /* Seg with Host Address	*/
+
+/* GVE Transmit Descriptor Flags for Std Pkts */
+#define	GVE_TXF_L4CSUM	BIT(0)	/* Need csum offload */
+#define	GVE_TXF_TSTAMP	BIT(2)	/* Timestamp required */
+
+/* GVE Transmit Descriptor Flags for TSO Segs */
+#define	GVE_TXSF_IPV6	BIT(1)	/* IPv6 TSO */
+
+/* GVE Receive Packet Descriptor */
+/* The start of an ethernet packet comes 2 bytes into the rx buffer.
+ * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
+ * access is aligned.
+ */
+#define GVE_RX_PAD 2
+
+struct gve_rx_desc {
+	u8	padding[48];
+	__be32	rss_hash;  /* Receive-side scaling hash (Toeplitz for gVNIC) */
+	__be16	mss;
+	__be16	reserved;  /* Reserved to zero */
+	u8	hdr_len;  /* Header length (L2-L4) including padding */
+	u8	hdr_off;  /* 64-byte-scaled offset into RX_DATA entry */
+	__sum16	csum;  /* 1's-complement partial checksum of L3+ bytes */
+	__be16	len;  /* Length of the received packet */
+	__be16	flags_seq;  /* Flags [15:3] and sequence number [2:0] (1-7) */
+} __packed;
+static_assert(sizeof(struct gve_rx_desc) == 64);
+
+/* As with the Tx ring format, the qpl_offset entries below are offsets into an
+ * ordered list of registered pages.
+ */
+struct gve_rx_data_slot {
+	/* byte offset into the rx registered segment of this slot */
+	__be64 qpl_offset;
+};
+
+/* GVE Recive Packet Descriptor Seq No */
+#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
+
+/* GVE Recive Packet Descriptor Flags */
+#define GVE_RXFLG(x)	cpu_to_be16(1 << (3 + (x)))
+#define	GVE_RXF_FRAG	GVE_RXFLG(3)	/* IP Fragment			*/
+#define	GVE_RXF_IPV4	GVE_RXFLG(4)	/* IPv4				*/
+#define	GVE_RXF_IPV6	GVE_RXFLG(5)	/* IPv6				*/
+#define	GVE_RXF_TCP	GVE_RXFLG(6)	/* TCP Packet			*/
+#define	GVE_RXF_UDP	GVE_RXFLG(7)	/* UDP Packet			*/
+#define	GVE_RXF_ERR	GVE_RXFLG(8)	/* Packet Error Detected	*/
+
+/* GVE IRQ */
+#define GVE_IRQ_ACK	BIT(31)
+#define GVE_IRQ_MASK	BIT(30)
+#define GVE_IRQ_EVENT	BIT(29)
+
+static inline bool gve_needs_rss(__be16 flag)
+{
+	if (flag & GVE_RXF_FRAG)
+		return false;
+	if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+		return true;
+	return false;
+}
+
+static inline u8 gve_next_seqno(u8 seq)
+{
+	return (seq + 1) == 8 ? 1 : seq + 1;
+}
+#endif /* _GVE_DESC_H_ */

diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
new file mode 100644
index 0000000..d8fa816
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c

@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/rtnetlink.h>
+#include "gve.h"
+
+static void gve_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *info)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	strlcpy(info->driver, "gve", sizeof(info->driver));
+	strlcpy(info->version, gve_version_str, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
+}
+
+static void gve_set_msglevel(struct net_device *netdev, u32 value)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	priv->msg_enable = value;
+}
+
+static u32 gve_get_msglevel(struct net_device *netdev)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	return priv->msg_enable;
+}
+
+static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
+	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
+	"rx_dropped", "tx_dropped", "tx_timeouts",
+};
+
+#define GVE_MAIN_STATS_LEN  ARRAY_SIZE(gve_gstrings_main_stats)
+#define NUM_GVE_TX_CNTS	5
+#define NUM_GVE_RX_CNTS	2
+
+static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	char *s = (char *)data;
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	memcpy(s, *gve_gstrings_main_stats,
+	       sizeof(gve_gstrings_main_stats));
+	s += sizeof(gve_gstrings_main_stats);
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
+		s += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
+		s += ETH_GSTRING_LEN;
+		snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
+		s += ETH_GSTRING_LEN;
+	}
+}
+
+static int gve_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		return GVE_MAIN_STATS_LEN +
+		       (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
+		       (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+gve_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats *stats, u64 *data)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
+	unsigned int start;
+	int ring;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (rx_pkts = 0, rx_bytes = 0, ring = 0;
+	     ring < priv->rx_cfg.num_queues; ring++) {
+		if (priv->rx) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				rx_pkts += priv->rx[ring].rpackets;
+				rx_bytes += priv->rx[ring].rbytes;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+		}
+	}
+	for (tx_pkts = 0, tx_bytes = 0, ring = 0;
+	     ring < priv->tx_cfg.num_queues; ring++) {
+		if (priv->tx) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				tx_pkts += priv->tx[ring].pkt_done;
+				tx_bytes += priv->tx[ring].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+						       start));
+		}
+	}
+
+	i = 0;
+	data[i++] = rx_pkts;
+	data[i++] = tx_pkts;
+	data[i++] = rx_bytes;
+	data[i++] = tx_bytes;
+	/* Skip rx_dropped and tx_dropped */
+	i += 2;
+	data[i++] = priv->tx_timeo_cnt;
+	i = GVE_MAIN_STATS_LEN;
+
+	/* walk RX rings */
+	if (priv->rx) {
+		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+			struct gve_rx_ring *rx = &priv->rx[ring];
+
+			data[i++] = rx->cnt;
+			data[i++] = rx->fill_cnt;
+		}
+	} else {
+		i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
+	}
+	/* walk TX rings */
+	if (priv->tx) {
+		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+			struct gve_tx_ring *tx = &priv->tx[ring];
+
+			data[i++] = tx->req;
+			data[i++] = tx->done;
+			data[i++] = tx->wake_queue;
+			data[i++] = tx->stop_queue;
+			data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
+									  tx));
+		}
+	} else {
+		i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
+	}
+}
+
+static void gve_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	cmd->max_rx = priv->rx_cfg.max_queues;
+	cmd->max_tx = priv->tx_cfg.max_queues;
+	cmd->max_other = 0;
+	cmd->max_combined = 0;
+	cmd->rx_count = priv->rx_cfg.num_queues;
+	cmd->tx_count = priv->tx_cfg.num_queues;
+	cmd->other_count = 0;
+	cmd->combined_count = 0;
+}
+
+static int gve_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+	struct gve_queue_config new_tx_cfg = priv->tx_cfg;
+	struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+	struct ethtool_channels old_settings;
+	int new_tx = cmd->tx_count;
+	int new_rx = cmd->rx_count;
+
+	gve_get_channels(netdev, &old_settings);
+
+	/* Changing combined is not allowed allowed */
+	if (cmd->combined_count != old_settings.combined_count)
+		return -EINVAL;
+
+	if (!new_rx || !new_tx)
+		return -EINVAL;
+
+	if (!netif_carrier_ok(netdev)) {
+		priv->tx_cfg.num_queues = new_tx;
+		priv->rx_cfg.num_queues = new_rx;
+		return 0;
+	}
+
+	new_tx_cfg.num_queues = new_tx;
+	new_rx_cfg.num_queues = new_rx;
+
+	return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+}
+
+static void gve_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *cmd)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	cmd->rx_max_pending = priv->rx_desc_cnt;
+	cmd->tx_max_pending = priv->tx_desc_cnt;
+	cmd->rx_pending = priv->rx_desc_cnt;
+	cmd->tx_pending = priv->tx_desc_cnt;
+}
+
+static int gve_user_reset(struct net_device *netdev, u32 *flags)
+{
+	struct gve_priv *priv = netdev_priv(netdev);
+
+	if (*flags == ETH_RESET_ALL) {
+		*flags = 0;
+		return gve_reset(priv, true);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+const struct ethtool_ops gve_ethtool_ops = {
+	.get_drvinfo = gve_get_drvinfo,
+	.get_strings = gve_get_strings,
+	.get_sset_count = gve_get_sset_count,
+	.get_ethtool_stats = gve_get_ethtool_stats,
+	.set_msglevel = gve_set_msglevel,
+	.get_msglevel = gve_get_msglevel,
+	.set_channels = gve_set_channels,
+	.get_channels = gve_get_channels,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = gve_get_ringparam,
+	.reset = gve_user_reset,
+};

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
new file mode 100644
index 0000000..9b7a8db
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_main.c

@@ -0,0 +1,1232 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <net/sch_generic.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_DEFAULT_RX_COPYBREAK	(256)
+
+#define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
+#define GVE_VERSION		"1.0.0"
+#define GVE_VERSION_PREFIX	"GVE-"
+
+const char gve_version_str[] = GVE_VERSION;
+static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+
+static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	unsigned int start;
+	int ring;
+
+	if (priv->rx) {
+		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->rx[ring].statss);
+				s->rx_packets += priv->rx[ring].rpackets;
+				s->rx_bytes += priv->rx[ring].rbytes;
+			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+						       start));
+		}
+	}
+	if (priv->tx) {
+		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+			do {
+				start =
+				  u64_stats_fetch_begin(&priv->tx[ring].statss);
+				s->tx_packets += priv->tx[ring].pkt_done;
+				s->tx_bytes += priv->tx[ring].bytes_done;
+			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+						       start));
+		}
+	}
+}
+
+static int gve_alloc_counter_array(struct gve_priv *priv)
+{
+	priv->counter_array =
+		dma_alloc_coherent(&priv->pdev->dev,
+				   priv->num_event_counters *
+				   sizeof(*priv->counter_array),
+				   &priv->counter_array_bus, GFP_KERNEL);
+	if (!priv->counter_array)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void gve_free_counter_array(struct gve_priv *priv)
+{
+	dma_free_coherent(&priv->pdev->dev,
+			  priv->num_event_counters *
+			  sizeof(*priv->counter_array),
+			  priv->counter_array, priv->counter_array_bus);
+	priv->counter_array = NULL;
+}
+
+static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
+{
+	struct gve_priv *priv = arg;
+
+	queue_work(priv->gve_wq, &priv->service_task);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t gve_intr(int irq, void *arg)
+{
+	struct gve_notify_block *block = arg;
+	struct gve_priv *priv = block->priv;
+
+	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
+	napi_schedule_irqoff(&block->napi);
+	return IRQ_HANDLED;
+}
+
+static int gve_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct gve_notify_block *block;
+	__be32 __iomem *irq_doorbell;
+	bool reschedule = false;
+	struct gve_priv *priv;
+
+	block = container_of(napi, struct gve_notify_block, napi);
+	priv = block->priv;
+
+	if (block->tx)
+		reschedule |= gve_tx_poll(block, budget);
+	if (block->rx)
+		reschedule |= gve_rx_poll(block, budget);
+
+	if (reschedule)
+		return budget;
+
+	napi_complete(napi);
+	irq_doorbell = gve_irq_doorbell(priv, block);
+	iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
+
+	/* Double check we have no extra work.
+	 * Ensure unmask synchronizes with checking for work.
+	 */
+	dma_rmb();
+	if (block->tx)
+		reschedule |= gve_tx_poll(block, -1);
+	if (block->rx)
+		reschedule |= gve_rx_poll(block, -1);
+	if (reschedule && napi_reschedule(napi))
+		iowrite32be(GVE_IRQ_MASK, irq_doorbell);
+
+	return 0;
+}
+
+static int gve_alloc_notify_blocks(struct gve_priv *priv)
+{
+	int num_vecs_requested = priv->num_ntfy_blks + 1;
+	char *name = priv->dev->name;
+	unsigned int active_cpus;
+	int vecs_enabled;
+	int i, j;
+	int err;
+
+	priv->msix_vectors = kvzalloc(num_vecs_requested *
+				      sizeof(*priv->msix_vectors), GFP_KERNEL);
+	if (!priv->msix_vectors)
+		return -ENOMEM;
+	for (i = 0; i < num_vecs_requested; i++)
+		priv->msix_vectors[i].entry = i;
+	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
+					     GVE_MIN_MSIX, num_vecs_requested);
+	if (vecs_enabled < 0) {
+		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
+			GVE_MIN_MSIX, vecs_enabled);
+		err = vecs_enabled;
+		goto abort_with_msix_vectors;
+	}
+	if (vecs_enabled != num_vecs_requested) {
+		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
+		int vecs_per_type = new_num_ntfy_blks / 2;
+		int vecs_left = new_num_ntfy_blks % 2;
+
+		priv->num_ntfy_blks = new_num_ntfy_blks;
+		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
+						vecs_per_type);
+		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
+						vecs_per_type + vecs_left);
+		dev_err(&priv->pdev->dev,
+			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
+			vecs_enabled, priv->tx_cfg.max_queues,
+			priv->rx_cfg.max_queues);
+		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
+			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
+			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+	}
+	/* Half the notification blocks go to TX and half to RX */
+	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
+
+	/* Setup Management Vector  - the last vector */
+	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
+		 name);
+	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
+			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
+	if (err) {
+		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
+		goto abort_with_msix_enabled;
+	}
+	priv->ntfy_blocks =
+		dma_alloc_coherent(&priv->pdev->dev,
+				   priv->num_ntfy_blks *
+				   sizeof(*priv->ntfy_blocks),
+				   &priv->ntfy_block_bus, GFP_KERNEL);
+	if (!priv->ntfy_blocks) {
+		err = -ENOMEM;
+		goto abort_with_mgmt_vector;
+	}
+	/* Setup the other blocks - the first n-1 vectors */
+	for (i = 0; i < priv->num_ntfy_blks; i++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[i];
+		int msix_idx = i;
+
+		snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
+			 name, i);
+		block->priv = priv;
+		err = request_irq(priv->msix_vectors[msix_idx].vector,
+				  gve_intr, 0, block->name, block);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Failed to receive msix vector %d\n", i);
+			goto abort_with_some_ntfy_blocks;
+		}
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      get_cpu_mask(i % active_cpus));
+	}
+	return 0;
+abort_with_some_ntfy_blocks:
+	for (j = 0; j < i; j++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[j];
+		int msix_idx = j;
+
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      NULL);
+		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	}
+	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+			  sizeof(*priv->ntfy_blocks),
+			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	priv->ntfy_blocks = NULL;
+abort_with_mgmt_vector:
+	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+abort_with_msix_enabled:
+	pci_disable_msix(priv->pdev);
+abort_with_msix_vectors:
+	kvfree(priv->msix_vectors);
+	priv->msix_vectors = NULL;
+	return err;
+}
+
+static void gve_free_notify_blocks(struct gve_priv *priv)
+{
+	int i;
+
+	/* Free the irqs */
+	for (i = 0; i < priv->num_ntfy_blks; i++) {
+		struct gve_notify_block *block = &priv->ntfy_blocks[i];
+		int msix_idx = i;
+
+		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+				      NULL);
+		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	}
+	dma_free_coherent(&priv->pdev->dev,
+			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
+			  priv->ntfy_blocks, priv->ntfy_block_bus);
+	priv->ntfy_blocks = NULL;
+	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+	pci_disable_msix(priv->pdev);
+	kvfree(priv->msix_vectors);
+	priv->msix_vectors = NULL;
+}
+
+static int gve_setup_device_resources(struct gve_priv *priv)
+{
+	int err;
+
+	err = gve_alloc_counter_array(priv);
+	if (err)
+		return err;
+	err = gve_alloc_notify_blocks(priv);
+	if (err)
+		goto abort_with_counter;
+	err = gve_adminq_configure_device_resources(priv,
+						    priv->counter_array_bus,
+						    priv->num_event_counters,
+						    priv->ntfy_block_bus,
+						    priv->num_ntfy_blks);
+	if (unlikely(err)) {
+		dev_err(&priv->pdev->dev,
+			"could not setup device_resources: err=%d\n", err);
+		err = -ENXIO;
+		goto abort_with_ntfy_blocks;
+	}
+	gve_set_device_resources_ok(priv);
+	return 0;
+abort_with_ntfy_blocks:
+	gve_free_notify_blocks(priv);
+abort_with_counter:
+	gve_free_counter_array(priv);
+	return err;
+}
+
+static void gve_trigger_reset(struct gve_priv *priv);
+
+static void gve_teardown_device_resources(struct gve_priv *priv)
+{
+	int err;
+
+	/* Tell device its resources are being freed */
+	if (gve_get_device_resources_ok(priv)) {
+		err = gve_adminq_deconfigure_device_resources(priv);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Could not deconfigure device resources: err=%d\n",
+				err);
+			gve_trigger_reset(priv);
+		}
+	}
+	gve_free_counter_array(priv);
+	gve_free_notify_blocks(priv);
+	gve_clear_device_resources_ok(priv);
+}
+
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+{
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+	netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+		       NAPI_POLL_WEIGHT);
+}
+
+static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+{
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+	netif_napi_del(&block->napi);
+}
+
+static int gve_register_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int err;
+	int i;
+
+	for (i = 0; i < num_qpls; i++) {
+		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to register queue page list %d\n",
+				  priv->qpls[i].id);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int gve_unregister_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int err;
+	int i;
+
+	for (i = 0; i < num_qpls; i++) {
+		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+		/* This failure will trigger a reset - no need to clean up */
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to unregister queue page list %d\n",
+				  priv->qpls[i].id);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int gve_create_rings(struct gve_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_adminq_create_tx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
+	}
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_adminq_create_rx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		/* Rx data ring has been prefilled with packet buffers at
+		 * queue allocation time.
+		 * Write the doorbell to provide descriptor slots and packet
+		 * buffers to the NIC.
+		 */
+		gve_rx_write_doorbell(priv, &priv->rx[i]);
+		netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
+	}
+
+	return 0;
+}
+
+static int gve_alloc_rings(struct gve_priv *priv)
+{
+	int ntfy_idx;
+	int err;
+	int i;
+
+	/* Setup tx rings */
+	priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
+			    GFP_KERNEL);
+	if (!priv->tx)
+		return -ENOMEM;
+	err = gve_tx_alloc_rings(priv);
+	if (err)
+		goto free_tx;
+	/* Setup rx rings */
+	priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
+			    GFP_KERNEL);
+	if (!priv->rx) {
+		err = -ENOMEM;
+		goto free_tx_queue;
+	}
+	err = gve_rx_alloc_rings(priv);
+	if (err)
+		goto free_rx;
+	/* Add tx napi & init sync stats*/
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		u64_stats_init(&priv->tx[i].statss);
+		ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+		gve_add_napi(priv, ntfy_idx);
+	}
+	/* Add rx napi  & init sync stats*/
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		u64_stats_init(&priv->rx[i].statss);
+		ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+		gve_add_napi(priv, ntfy_idx);
+	}
+
+	return 0;
+
+free_rx:
+	kvfree(priv->rx);
+	priv->rx = NULL;
+free_tx_queue:
+	gve_tx_free_rings(priv);
+free_tx:
+	kvfree(priv->tx);
+	priv->tx = NULL;
+	return err;
+}
+
+static int gve_destroy_rings(struct gve_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_adminq_destroy_tx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to destroy tx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+	}
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_adminq_destroy_rx_queue(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "failed to destroy rx queue %d\n",
+				  i);
+			/* This failure will trigger a reset - no need to clean
+			 * up
+			 */
+			return err;
+		}
+		netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+	}
+	return 0;
+}
+
+static void gve_free_rings(struct gve_priv *priv)
+{
+	int ntfy_idx;
+	int i;
+
+	if (priv->tx) {
+		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+			gve_remove_napi(priv, ntfy_idx);
+		}
+		gve_tx_free_rings(priv);
+		kvfree(priv->tx);
+		priv->tx = NULL;
+	}
+	if (priv->rx) {
+		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+			gve_remove_napi(priv, ntfy_idx);
+		}
+		gve_rx_free_rings(priv);
+		kvfree(priv->rx);
+		priv->rx = NULL;
+	}
+}
+
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+		   enum dma_data_direction dir)
+{
+	*page = alloc_page(GFP_KERNEL);
+	if (!*page)
+		return -ENOMEM;
+	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
+	if (dma_mapping_error(dev, *dma)) {
+		put_page(*page);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
+				     int pages)
+{
+	struct gve_queue_page_list *qpl = &priv->qpls[id];
+	int err;
+	int i;
+
+	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
+		netif_err(priv, drv, priv->dev,
+			  "Reached max number of registered pages %llu > %llu\n",
+			  pages + priv->num_registered_pages,
+			  priv->max_registered_pages);
+		return -EINVAL;
+	}
+
+	qpl->id = id;
+	qpl->num_entries = 0;
+	qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
+	/* caller handles clean up */
+	if (!qpl->pages)
+		return -ENOMEM;
+	qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
+				   GFP_KERNEL);
+	/* caller handles clean up */
+	if (!qpl->page_buses)
+		return -ENOMEM;
+
+	for (i = 0; i < pages; i++) {
+		err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+				     &qpl->page_buses[i],
+				     gve_qpl_dma_dir(priv, id));
+		/* caller handles clean up */
+		if (err)
+			return -ENOMEM;
+		qpl->num_entries++;
+	}
+	priv->num_registered_pages += pages;
+
+	return 0;
+}
+
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+		   enum dma_data_direction dir)
+{
+	if (!dma_mapping_error(dev, dma))
+		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
+	if (page)
+		put_page(page);
+}
+
+static void gve_free_queue_page_list(struct gve_priv *priv,
+				     int id)
+{
+	struct gve_queue_page_list *qpl = &priv->qpls[id];
+	int i;
+
+	if (!qpl->pages)
+		return;
+	if (!qpl->page_buses)
+		goto free_pages;
+
+	for (i = 0; i < qpl->num_entries; i++)
+		gve_free_page(&priv->pdev->dev, qpl->pages[i],
+			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
+
+	kvfree(qpl->page_buses);
+free_pages:
+	kvfree(qpl->pages);
+	priv->num_registered_pages -= qpl->num_entries;
+}
+
+static int gve_alloc_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int i, j;
+	int err;
+
+	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
+	if (!priv->qpls)
+		return -ENOMEM;
+
+	for (i = 0; i < gve_num_tx_qpls(priv); i++) {
+		err = gve_alloc_queue_page_list(priv, i,
+						priv->tx_pages_per_qpl);
+		if (err)
+			goto free_qpls;
+	}
+	for (; i < num_qpls; i++) {
+		err = gve_alloc_queue_page_list(priv, i,
+						priv->rx_pages_per_qpl);
+		if (err)
+			goto free_qpls;
+	}
+
+	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
+				     sizeof(unsigned long) * BITS_PER_BYTE;
+	priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
+					    sizeof(unsigned long), GFP_KERNEL);
+	if (!priv->qpl_cfg.qpl_id_map) {
+		err = -ENOMEM;
+		goto free_qpls;
+	}
+
+	return 0;
+
+free_qpls:
+	for (j = 0; j <= i; j++)
+		gve_free_queue_page_list(priv, j);
+	kvfree(priv->qpls);
+	return err;
+}
+
+static void gve_free_qpls(struct gve_priv *priv)
+{
+	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+	int i;
+
+	kvfree(priv->qpl_cfg.qpl_id_map);
+
+	for (i = 0; i < num_qpls; i++)
+		gve_free_queue_page_list(priv, i);
+
+	kvfree(priv->qpls);
+}
+
+/* Use this to schedule a reset when the device is capable of continuing
+ * to handle other requests in its current state. If it is not, do a reset
+ * in thread instead.
+ */
+void gve_schedule_reset(struct gve_priv *priv)
+{
+	gve_set_do_reset(priv);
+	queue_work(priv->gve_wq, &priv->service_task);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
+static void gve_turndown(struct gve_priv *priv);
+static void gve_turnup(struct gve_priv *priv);
+
+static int gve_open(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = gve_alloc_qpls(priv);
+	if (err)
+		return err;
+	err = gve_alloc_rings(priv);
+	if (err)
+		goto free_qpls;
+
+	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
+	if (err)
+		goto free_rings;
+	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
+	if (err)
+		goto free_rings;
+
+	err = gve_register_qpls(priv);
+	if (err)
+		goto reset;
+	err = gve_create_rings(priv);
+	if (err)
+		goto reset;
+	gve_set_device_rings_ok(priv);
+
+	gve_turnup(priv);
+	netif_carrier_on(dev);
+	return 0;
+
+free_rings:
+	gve_free_rings(priv);
+free_qpls:
+	gve_free_qpls(priv);
+	return err;
+
+reset:
+	/* This must have been called from a reset due to the rtnl lock
+	 * so just return at this point.
+	 */
+	if (gve_get_reset_in_progress(priv))
+		return err;
+	/* Otherwise reset before returning */
+	gve_reset_and_teardown(priv, true);
+	/* if this fails there is nothing we can do so just ignore the return */
+	gve_reset_recovery(priv, false);
+	/* return the original error */
+	return err;
+}
+
+static int gve_close(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	int err;
+
+	netif_carrier_off(dev);
+	if (gve_get_device_rings_ok(priv)) {
+		gve_turndown(priv);
+		err = gve_destroy_rings(priv);
+		if (err)
+			goto err;
+		err = gve_unregister_qpls(priv);
+		if (err)
+			goto err;
+		gve_clear_device_rings_ok(priv);
+	}
+
+	gve_free_rings(priv);
+	gve_free_qpls(priv);
+	return 0;
+
+err:
+	/* This must have been called from a reset due to the rtnl lock
+	 * so just return at this point.
+	 */
+	if (gve_get_reset_in_progress(priv))
+		return err;
+	/* Otherwise reset before returning */
+	gve_reset_and_teardown(priv, true);
+	return gve_reset_recovery(priv, false);
+}
+
+int gve_adjust_queues(struct gve_priv *priv,
+		      struct gve_queue_config new_rx_config,
+		      struct gve_queue_config new_tx_config)
+{
+	int err;
+
+	if (netif_carrier_ok(priv->dev)) {
+		/* To make this process as simple as possible we teardown the
+		 * device, set the new configuration, and then bring the device
+		 * up again.
+		 */
+		err = gve_close(priv->dev);
+		/* we have already tried to reset in close,
+		 * just fail at this point
+		 */
+		if (err)
+			return err;
+		priv->tx_cfg = new_tx_config;
+		priv->rx_cfg = new_rx_config;
+
+		err = gve_open(priv->dev);
+		if (err)
+			goto err;
+
+		return 0;
+	}
+	/* Set the config for the next up. */
+	priv->tx_cfg = new_tx_config;
+	priv->rx_cfg = new_rx_config;
+
+	return 0;
+err:
+	netif_err(priv, drv, priv->dev,
+		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
+	gve_turndown(priv);
+	return err;
+}
+
+static void gve_turndown(struct gve_priv *priv)
+{
+	int idx;
+
+	if (netif_carrier_ok(priv->dev))
+		netif_carrier_off(priv->dev);
+
+	if (!gve_get_napi_enabled(priv))
+		return;
+
+	/* Disable napi to prevent more work from coming in */
+	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_disable(&block->napi);
+	}
+	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_disable(&block->napi);
+	}
+
+	/* Stop tx queues */
+	netif_tx_disable(priv->dev);
+
+	gve_clear_napi_enabled(priv);
+}
+
+static void gve_turnup(struct gve_priv *priv)
+{
+	int idx;
+
+	/* Start the tx queues */
+	netif_tx_start_all_queues(priv->dev);
+
+	/* Enable napi and unmask interrupts for all queues */
+	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_enable(&block->napi);
+		iowrite32be(0, gve_irq_doorbell(priv, block));
+	}
+	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+		napi_enable(&block->napi);
+		iowrite32be(0, gve_irq_doorbell(priv, block));
+	}
+
+	gve_set_napi_enabled(priv);
+}
+
+static void gve_tx_timeout(struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+
+	gve_schedule_reset(priv);
+	priv->tx_timeo_cnt++;
+}
+
+static const struct net_device_ops gve_netdev_ops = {
+	.ndo_start_xmit		=	gve_tx,
+	.ndo_open		=	gve_open,
+	.ndo_stop		=	gve_close,
+	.ndo_get_stats64	=	gve_get_stats,
+	.ndo_tx_timeout         =       gve_tx_timeout,
+};
+
+static void gve_handle_status(struct gve_priv *priv, u32 status)
+{
+	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
+		dev_info(&priv->pdev->dev, "Device requested reset.\n");
+		gve_set_do_reset(priv);
+	}
+}
+
+static void gve_handle_reset(struct gve_priv *priv)
+{
+	/* A service task will be scheduled at the end of probe to catch any
+	 * resets that need to happen, and we don't want to reset until
+	 * probe is done.
+	 */
+	if (gve_get_probe_in_progress(priv))
+		return;
+
+	if (gve_get_do_reset(priv)) {
+		rtnl_lock();
+		gve_reset(priv, false);
+		rtnl_unlock();
+	}
+}
+
+/* Handle NIC status register changes and reset requests */
+static void gve_service_task(struct work_struct *work)
+{
+	struct gve_priv *priv = container_of(work, struct gve_priv,
+					     service_task);
+
+	gve_handle_status(priv,
+			  ioread32be(&priv->reg_bar0->device_status));
+
+	gve_handle_reset(priv);
+}
+
+static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
+{
+	int num_ntfy;
+	int err;
+
+	/* Set up the adminq */
+	err = gve_adminq_alloc(&priv->pdev->dev, priv);
+	if (err) {
+		dev_err(&priv->pdev->dev,
+			"Failed to alloc admin queue: err=%d\n", err);
+		return err;
+	}
+
+	if (skip_describe_device)
+		goto setup_device;
+
+	/* Get the initial information we need from the device */
+	err = gve_adminq_describe_device(priv);
+	if (err) {
+		dev_err(&priv->pdev->dev,
+			"Could not get device information: err=%d\n", err);
+		goto err;
+	}
+	if (priv->dev->max_mtu > PAGE_SIZE) {
+		priv->dev->max_mtu = PAGE_SIZE;
+		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
+		if (err) {
+			netif_err(priv, drv, priv->dev, "Could not set mtu");
+			goto err;
+		}
+	}
+	priv->dev->mtu = priv->dev->max_mtu;
+	num_ntfy = pci_msix_vec_count(priv->pdev);
+	if (num_ntfy <= 0) {
+		dev_err(&priv->pdev->dev,
+			"could not count MSI-x vectors: err=%d\n", num_ntfy);
+		err = num_ntfy;
+		goto err;
+	} else if (num_ntfy < GVE_MIN_MSIX) {
+		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
+			GVE_MIN_MSIX, num_ntfy);
+		err = -EINVAL;
+		goto err;
+	}
+
+	priv->num_registered_pages = 0;
+	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
+	/* gvnic has one Notification Block per MSI-x vector, except for the
+	 * management vector
+	 */
+	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
+	priv->mgmt_msix_idx = priv->num_ntfy_blks;
+
+	priv->tx_cfg.max_queues =
+		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
+	priv->rx_cfg.max_queues =
+		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
+
+	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+	if (priv->default_num_queues > 0) {
+		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
+						priv->tx_cfg.num_queues);
+		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
+						priv->rx_cfg.num_queues);
+	}
+
+	netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
+		   priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+	netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
+		   priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+
+setup_device:
+	err = gve_setup_device_resources(priv);
+	if (!err)
+		return 0;
+err:
+	gve_adminq_free(&priv->pdev->dev, priv);
+	return err;
+}
+
+static void gve_teardown_priv_resources(struct gve_priv *priv)
+{
+	gve_teardown_device_resources(priv);
+	gve_adminq_free(&priv->pdev->dev, priv);
+}
+
+static void gve_trigger_reset(struct gve_priv *priv)
+{
+	/* Reset the device by releasing the AQ */
+	gve_adminq_release(priv);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
+{
+	gve_trigger_reset(priv);
+	/* With the reset having already happened, close cannot fail */
+	if (was_up)
+		gve_close(priv->dev);
+	gve_teardown_priv_resources(priv);
+}
+
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
+{
+	int err;
+
+	err = gve_init_priv(priv, true);
+	if (err)
+		goto err;
+	if (was_up) {
+		err = gve_open(priv->dev);
+		if (err)
+			goto err;
+	}
+	return 0;
+err:
+	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
+	gve_turndown(priv);
+	return err;
+}
+
+int gve_reset(struct gve_priv *priv, bool attempt_teardown)
+{
+	bool was_up = netif_carrier_ok(priv->dev);
+	int err;
+
+	dev_info(&priv->pdev->dev, "Performing reset\n");
+	gve_clear_do_reset(priv);
+	gve_set_reset_in_progress(priv);
+	/* If we aren't attempting to teardown normally, just go turndown and
+	 * reset right away.
+	 */
+	if (!attempt_teardown) {
+		gve_turndown(priv);
+		gve_reset_and_teardown(priv, was_up);
+	} else {
+		/* Otherwise attempt to close normally */
+		if (was_up) {
+			err = gve_close(priv->dev);
+			/* If that fails reset as we did above */
+			if (err)
+				gve_reset_and_teardown(priv, was_up);
+		}
+		/* Clean up any remaining resources */
+		gve_teardown_priv_resources(priv);
+	}
+
+	/* Set it all back up */
+	err = gve_reset_recovery(priv, was_up);
+	gve_clear_reset_in_progress(priv);
+	return err;
+}
+
+static void gve_write_version(u8 __iomem *driver_version_register)
+{
+	const char *c = gve_version_prefix;
+
+	while (*c) {
+		writeb(*c, driver_version_register);
+		c++;
+	}
+
+	c = gve_version_str;
+	while (*c) {
+		writeb(*c, driver_version_register);
+		c++;
+	}
+	writeb('\n', driver_version_register);
+}
+
+static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int max_tx_queues, max_rx_queues;
+	struct net_device *dev;
+	__be32 __iomem *db_bar;
+	struct gve_registers __iomem *reg_bar;
+	struct gve_priv *priv;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return -ENXIO;
+
+	err = pci_request_regions(pdev, "gvnic-cfg");
+	if (err)
+		goto abort_with_enabled;
+
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
+		goto abort_with_pci_region;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"Failed to set consistent dma mask: err=%d\n", err);
+		goto abort_with_pci_region;
+	}
+
+	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
+	if (!reg_bar) {
+		dev_err(&pdev->dev, "Failed to map pci bar!\n");
+		err = -ENOMEM;
+		goto abort_with_pci_region;
+	}
+
+	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
+	if (!db_bar) {
+		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
+		err = -ENOMEM;
+		goto abort_with_reg_bar;
+	}
+
+	gve_write_version(&reg_bar->driver_version);
+	/* Get max queues to alloc etherdev */
+	max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
+	max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
+	/* Alloc and setup the netdev and priv */
+	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
+	if (!dev) {
+		dev_err(&pdev->dev, "could not allocate netdev\n");
+		goto abort_with_db_bar;
+	}
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	pci_set_drvdata(pdev, dev);
+	dev->ethtool_ops = &gve_ethtool_ops;
+	dev->netdev_ops = &gve_netdev_ops;
+	/* advertise features */
+	dev->hw_features = NETIF_F_HIGHDMA;
+	dev->hw_features |= NETIF_F_SG;
+	dev->hw_features |= NETIF_F_HW_CSUM;
+	dev->hw_features |= NETIF_F_TSO;
+	dev->hw_features |= NETIF_F_TSO6;
+	dev->hw_features |= NETIF_F_TSO_ECN;
+	dev->hw_features |= NETIF_F_RXCSUM;
+	dev->hw_features |= NETIF_F_RXHASH;
+	dev->features = dev->hw_features;
+	dev->watchdog_timeo = 5 * HZ;
+	dev->min_mtu = ETH_MIN_MTU;
+	netif_carrier_off(dev);
+
+	priv = netdev_priv(dev);
+	priv->dev = dev;
+	priv->pdev = pdev;
+	priv->msg_enable = DEFAULT_MSG_LEVEL;
+	priv->reg_bar0 = reg_bar;
+	priv->db_bar2 = db_bar;
+	priv->service_task_flags = 0x0;
+	priv->state_flags = 0x0;
+
+	gve_set_probe_in_progress(priv);
+	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
+	if (!priv->gve_wq) {
+		dev_err(&pdev->dev, "Could not allocate workqueue");
+		err = -ENOMEM;
+		goto abort_with_netdev;
+	}
+	INIT_WORK(&priv->service_task, gve_service_task);
+	priv->tx_cfg.max_queues = max_tx_queues;
+	priv->rx_cfg.max_queues = max_rx_queues;
+
+	err = gve_init_priv(priv, false);
+	if (err)
+		goto abort_with_wq;
+
+	err = register_netdev(dev);
+	if (err)
+		goto abort_with_wq;
+
+	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
+	gve_clear_probe_in_progress(priv);
+	queue_work(priv->gve_wq, &priv->service_task);
+	return 0;
+
+abort_with_wq:
+	destroy_workqueue(priv->gve_wq);
+
+abort_with_netdev:
+	free_netdev(dev);
+
+abort_with_db_bar:
+	pci_iounmap(pdev, db_bar);
+
+abort_with_reg_bar:
+	pci_iounmap(pdev, reg_bar);
+
+abort_with_pci_region:
+	pci_release_regions(pdev);
+
+abort_with_enabled:
+	pci_disable_device(pdev);
+	return -ENXIO;
+}
+
+static void gve_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct gve_priv *priv = netdev_priv(netdev);
+	__be32 __iomem *db_bar = priv->db_bar2;
+	void __iomem *reg_bar = priv->reg_bar0;
+
+	unregister_netdev(netdev);
+	gve_teardown_priv_resources(priv);
+	destroy_workqueue(priv->gve_wq);
+	free_netdev(netdev);
+	pci_iounmap(pdev, db_bar);
+	pci_iounmap(pdev, reg_bar);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id gve_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
+	{ }
+};
+
+static struct pci_driver gvnic_driver = {
+	.name		= "gvnic",
+	.id_table	= gve_id_table,
+	.probe		= gve_probe,
+	.remove		= gve_remove,
+};
+
+module_pci_driver(gvnic_driver);
+
+MODULE_DEVICE_TABLE(pci, gve_id_table);
+MODULE_AUTHOR("Google, Inc.");
+MODULE_DESCRIPTION("gVNIC Driver");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_VERSION(GVE_VERSION);

diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
new file mode 100644
index 0000000..84ab889
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_register.h

@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_REGISTER_H_
+#define _GVE_REGISTER_H_
+
+/* Fixed Configuration Registers */
+struct gve_registers {
+	__be32	device_status;
+	__be32	driver_status;
+	__be32	max_tx_queues;
+	__be32	max_rx_queues;
+	__be32	adminq_pfn;
+	__be32	adminq_doorbell;
+	__be32	adminq_event_counter;
+	u8	reserved[3];
+	u8	driver_version;
+};
+
+enum gve_device_status_flags {
+	GVE_DEVICE_STATUS_RESET_MASK		= BIT(1),
+	GVE_DEVICE_STATUS_LINK_STATUS_MASK	= BIT(2),
+};
+#endif /* _GVE_REGISTER_H_ */

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
new file mode 100644
index 0000000..edec61d
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_rx.c

@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/etherdevice.h>
+
+static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+	block->rx = NULL;
+}
+
+static void gve_rx_free_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *dev = &priv->pdev->dev;
+	size_t bytes;
+	u32 slots;
+
+	gve_rx_remove_from_block(priv, idx);
+
+	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+	dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
+	rx->desc.desc_ring = NULL;
+
+	dma_free_coherent(dev, sizeof(*rx->q_resources),
+			  rx->q_resources, rx->q_resources_bus);
+	rx->q_resources = NULL;
+
+	gve_unassign_qpl(priv, rx->data.qpl->id);
+	rx->data.qpl = NULL;
+	kvfree(rx->data.page_info);
+
+	slots = rx->mask + 1;
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	dma_free_coherent(dev, bytes, rx->data.data_ring,
+			  rx->data.data_bus);
+	rx->data.data_ring = NULL;
+	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
+}
+
+static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
+				struct gve_rx_data_slot *slot,
+				dma_addr_t addr, struct page *page)
+{
+	page_info->page = page;
+	page_info->page_offset = 0;
+	page_info->page_address = page_address(page);
+	slot->qpl_offset = cpu_to_be64(addr);
+}
+
+static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+{
+	struct gve_priv *priv = rx->gve;
+	u32 slots;
+	int i;
+
+	/* Allocate one page per Rx queue slot. Each page is split into two
+	 * packet buffers, when possible we "page flip" between the two.
+	 */
+	slots = rx->mask + 1;
+
+	rx->data.page_info = kvzalloc(slots *
+				      sizeof(*rx->data.page_info), GFP_KERNEL);
+	if (!rx->data.page_info)
+		return -ENOMEM;
+
+	rx->data.qpl = gve_assign_rx_qpl(priv);
+
+	for (i = 0; i < slots; i++) {
+		struct page *page = rx->data.qpl->pages[i];
+		dma_addr_t addr = i * PAGE_SIZE;
+
+		gve_setup_rx_buffer(&rx->data.page_info[i],
+				    &rx->data.data_ring[i], addr, page);
+	}
+
+	return slots;
+}
+
+static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_rx_ring *rx = &priv->rx[queue_idx];
+
+	block->rx = rx;
+	rx->ntfy_id = ntfy_idx;
+}
+
+static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	u32 slots, npages;
+	int filled_pages;
+	size_t bytes;
+	int err;
+
+	netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
+	/* Make sure everything is zeroed to start with */
+	memset(rx, 0, sizeof(*rx));
+
+	rx->gve = priv;
+	rx->q_num = idx;
+
+	slots = priv->rx_pages_per_qpl;
+	rx->mask = slots - 1;
+
+	/* alloc rx data ring */
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
+						&rx->data.data_bus,
+						GFP_KERNEL);
+	if (!rx->data.data_ring)
+		return -ENOMEM;
+	filled_pages = gve_prefill_rx_pages(rx);
+	if (filled_pages < 0) {
+		err = -ENOMEM;
+		goto abort_with_slots;
+	}
+	rx->fill_cnt = filled_pages;
+	/* Ensure data ring slots (packet buffers) are visible. */
+	dma_wmb();
+
+	/* Alloc gve_queue_resources */
+	rx->q_resources =
+		dma_alloc_coherent(hdev,
+				   sizeof(*rx->q_resources),
+				   &rx->q_resources_bus,
+				   GFP_KERNEL);
+	if (!rx->q_resources) {
+		err = -ENOMEM;
+		goto abort_filled;
+	}
+	netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
+		  (unsigned long)rx->data.data_bus);
+
+	/* alloc rx desc ring */
+	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+	npages = bytes / PAGE_SIZE;
+	if (npages * PAGE_SIZE != bytes) {
+		err = -EIO;
+		goto abort_with_q_resources;
+	}
+
+	rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
+						GFP_KERNEL);
+	if (!rx->desc.desc_ring) {
+		err = -ENOMEM;
+		goto abort_with_q_resources;
+	}
+	rx->mask = slots - 1;
+	rx->cnt = 0;
+	rx->desc.seqno = 1;
+	gve_rx_add_to_block(priv, idx);
+
+	return 0;
+
+abort_with_q_resources:
+	dma_free_coherent(hdev, sizeof(*rx->q_resources),
+			  rx->q_resources, rx->q_resources_bus);
+	rx->q_resources = NULL;
+abort_filled:
+	kvfree(rx->data.page_info);
+abort_with_slots:
+	bytes = sizeof(*rx->data.data_ring) * slots;
+	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
+	rx->data.data_ring = NULL;
+
+	return err;
+}
+
+int gve_rx_alloc_rings(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_rx_alloc_ring(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc rx ring=%d: err=%d\n",
+				  i, err);
+			break;
+		}
+	}
+	/* Unallocate if there was an error */
+	if (err) {
+		int j;
+
+		for (j = 0; j < i; j++)
+			gve_rx_free_ring(priv, j);
+	}
+	return err;
+}
+
+void gve_rx_free_rings(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++)
+		gve_rx_free_ring(priv, i);
+}
+
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+	u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
+
+	iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
+}
+
+static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
+{
+	if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
+		return PKT_HASH_TYPE_L4;
+	if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+		return PKT_HASH_TYPE_L3;
+	return PKT_HASH_TYPE_L2;
+}
+
+static struct sk_buff *gve_rx_copy(struct net_device *dev,
+				   struct napi_struct *napi,
+				   struct gve_rx_slot_page_info *page_info,
+				   u16 len)
+{
+	struct sk_buff *skb = napi_alloc_skb(napi, len);
+	void *va = page_info->page_address + GVE_RX_PAD +
+		   page_info->page_offset;
+
+	if (unlikely(!skb))
+		return NULL;
+
+	__skb_put(skb, len);
+
+	skb_copy_to_linear_data(skb, va, len);
+
+	skb->protocol = eth_type_trans(skb, dev);
+	return skb;
+}
+
+static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
+					struct napi_struct *napi,
+					struct gve_rx_slot_page_info *page_info,
+					u16 len)
+{
+	struct sk_buff *skb = napi_get_frags(napi);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_add_rx_frag(skb, 0, page_info->page,
+			page_info->page_offset +
+			GVE_RX_PAD, len, PAGE_SIZE / 2);
+
+	return skb;
+}
+
+static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
+			     struct gve_rx_data_slot *data_ring)
+{
+	u64 addr = be64_to_cpu(data_ring->qpl_offset);
+
+	page_info->page_offset ^= PAGE_SIZE / 2;
+	addr ^= PAGE_SIZE / 2;
+	data_ring->qpl_offset = cpu_to_be64(addr);
+}
+
+static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
+		   netdev_features_t feat, u32 idx)
+{
+	struct gve_rx_slot_page_info *page_info;
+	struct gve_priv *priv = rx->gve;
+	struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+	struct net_device *dev = priv->dev;
+	struct sk_buff *skb;
+	int pagecount;
+	u16 len;
+
+	/* drop this packet */
+	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
+		return true;
+
+	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
+	page_info = &rx->data.page_info[idx];
+	dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
+				PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* gvnic can only receive into registered segments. If the buffer
+	 * can't be recycled, our only choice is to copy the data out of
+	 * it so that we can return it to the device.
+	 */
+
+	if (PAGE_SIZE == 4096) {
+		if (len <= priv->rx_copybreak) {
+			/* Just copy small packets */
+			skb = gve_rx_copy(dev, napi, page_info, len);
+			goto have_skb;
+		}
+		if (unlikely(!gve_can_recycle_pages(dev))) {
+			skb = gve_rx_copy(dev, napi, page_info, len);
+			goto have_skb;
+		}
+		pagecount = page_count(page_info->page);
+		if (pagecount == 1) {
+			/* No part of this page is used by any SKBs; we attach
+			 * the page fragment to a new SKB and pass it up the
+			 * stack.
+			 */
+			skb = gve_rx_add_frags(dev, napi, page_info, len);
+			if (!skb)
+				return true;
+			/* Make sure the kernel stack can't release the page */
+			get_page(page_info->page);
+			/* "flip" to other packet buffer on this page */
+			gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
+		} else if (pagecount >= 2) {
+			/* We have previously passed the other half of this
+			 * page up the stack, but it has not yet been freed.
+			 */
+			skb = gve_rx_copy(dev, napi, page_info, len);
+		} else {
+			WARN(pagecount < 1, "Pagecount should never be < 1");
+			return false;
+		}
+	} else {
+		skb = gve_rx_copy(dev, napi, page_info, len);
+	}
+
+have_skb:
+	/* We didn't manage to allocate an skb but we haven't had any
+	 * reset worthy failures.
+	 */
+	if (!skb)
+		return true;
+
+	if (likely(feat & NETIF_F_RXCSUM)) {
+		/* NIC passes up the partial sum */
+		if (rx_desc->csum)
+			skb->ip_summed = CHECKSUM_COMPLETE;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+		skb->csum = csum_unfold(rx_desc->csum);
+	}
+
+	/* parse flags & pass relevant info up */
+	if (likely(feat & NETIF_F_RXHASH) &&
+	    gve_needs_rss(rx_desc->flags_seq))
+		skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
+			     gve_rss_type(rx_desc->flags_seq));
+
+	if (skb_is_nonlinear(skb))
+		napi_gro_frags(napi);
+	else
+		napi_gro_receive(napi, skb);
+	return true;
+}
+
+static bool gve_rx_work_pending(struct gve_rx_ring *rx)
+{
+	struct gve_rx_desc *desc;
+	__be16 flags_seq;
+	u32 next_idx;
+
+	next_idx = rx->cnt & rx->mask;
+	desc = rx->desc.desc_ring + next_idx;
+
+	flags_seq = desc->flags_seq;
+	/* Make sure we have synchronized the seq no with the device */
+	smp_rmb();
+
+	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
+}
+
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+		       netdev_features_t feat)
+{
+	struct gve_priv *priv = rx->gve;
+	struct gve_rx_desc *desc;
+	u32 cnt = rx->cnt;
+	u32 idx = cnt & rx->mask;
+	u32 work_done = 0;
+	u64 bytes = 0;
+
+	desc = rx->desc.desc_ring + idx;
+	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
+	       work_done < budget) {
+		netif_info(priv, rx_status, priv->dev,
+			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
+			   rx->q_num, idx, desc, desc->flags_seq);
+		netif_info(priv, rx_status, priv->dev,
+			   "[%d] seqno=%d rx->desc.seqno=%d\n",
+			   rx->q_num, GVE_SEQNO(desc->flags_seq),
+			   rx->desc.seqno);
+		bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+		if (!gve_rx(rx, desc, feat, idx))
+			gve_schedule_reset(priv);
+		cnt++;
+		idx = cnt & rx->mask;
+		desc = rx->desc.desc_ring + idx;
+		rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
+		work_done++;
+	}
+
+	if (!work_done)
+		return false;
+
+	u64_stats_update_begin(&rx->statss);
+	rx->rpackets += work_done;
+	rx->rbytes += bytes;
+	u64_stats_update_end(&rx->statss);
+	rx->cnt = cnt;
+	rx->fill_cnt += work_done;
+
+	/* restock desc ring slots */
+	dma_wmb();	/* Ensure descs are visible before ringing doorbell */
+	gve_rx_write_doorbell(priv, rx);
+	return gve_rx_work_pending(rx);
+}
+
+bool gve_rx_poll(struct gve_notify_block *block, int budget)
+{
+	struct gve_rx_ring *rx = block->rx;
+	netdev_features_t feat;
+	bool repoll = false;
+
+	feat = block->napi.dev->features;
+
+	/* If budget is 0, do all the work */
+	if (budget == 0)
+		budget = INT_MAX;
+
+	if (budget > 0)
+		repoll |= gve_clean_rx_done(rx, budget, feat);
+	else
+		repoll |= gve_rx_work_pending(rx);
+	return repoll;
+}

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
new file mode 100644
index 0000000..f488943
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_tx.c

@@ -0,0 +1,605 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+static inline void gve_tx_put_doorbell(struct gve_priv *priv,
+				       struct gve_queue_resources *q_resources,
+				       u32 val)
+{
+	iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
+}
+
+/* gvnic can only transmit from a Registered Segment.
+ * We copy skb payloads into the registered segment before writing Tx
+ * descriptors and ringing the Tx doorbell.
+ *
+ * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must
+ * free allocations in the order they were allocated.
+ */
+
+static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+	fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
+			  PAGE_KERNEL);
+	if (unlikely(!fifo->base)) {
+		netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
+			  fifo->qpl->id);
+		return -ENOMEM;
+	}
+
+	fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
+	atomic_set(&fifo->available, fifo->size);
+	fifo->head = 0;
+	return 0;
+}
+
+static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+	WARN(atomic_read(&fifo->available) != fifo->size,
+	     "Releasing non-empty fifo");
+
+	vunmap(fifo->base);
+}
+
+static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
+					  size_t bytes)
+{
+	return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
+}
+
+static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
+{
+	return (atomic_read(&fifo->available) <= bytes) ? false : true;
+}
+
+/* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO
+ * @fifo: FIFO to allocate from
+ * @bytes: Allocation size
+ * @iov: Scatter-gather elements to fill with allocation fragment base/len
+ *
+ * Returns number of valid elements in iov[] or negative on error.
+ *
+ * Allocations from a given FIFO must be externally synchronized but concurrent
+ * allocation and frees are allowed.
+ */
+static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
+			     struct gve_tx_iovec iov[2])
+{
+	size_t overflow, padding;
+	u32 aligned_head;
+	int nfrags = 0;
+
+	if (!bytes)
+		return 0;
+
+	/* This check happens before we know how much padding is needed to
+	 * align to a cacheline boundary for the payload, but that is fine,
+	 * because the FIFO head always start aligned, and the FIFO's boundaries
+	 * are aligned, so if there is space for the data, there is space for
+	 * the padding to the next alignment.
+	 */
+	WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
+	     "Reached %s when there's not enough space in the fifo", __func__);
+
+	nfrags++;
+
+	iov[0].iov_offset = fifo->head;
+	iov[0].iov_len = bytes;
+	fifo->head += bytes;
+
+	if (fifo->head > fifo->size) {
+		/* If the allocation did not fit in the tail fragment of the
+		 * FIFO, also use the head fragment.
+		 */
+		nfrags++;
+		overflow = fifo->head - fifo->size;
+		iov[0].iov_len -= overflow;
+		iov[1].iov_offset = 0;	/* Start of fifo*/
+		iov[1].iov_len = overflow;
+
+		fifo->head = overflow;
+	}
+
+	/* Re-align to a cacheline boundary */
+	aligned_head = L1_CACHE_ALIGN(fifo->head);
+	padding = aligned_head - fifo->head;
+	iov[nfrags - 1].iov_padding = padding;
+	atomic_sub(bytes + padding, &fifo->available);
+	fifo->head = aligned_head;
+
+	if (fifo->head == fifo->size)
+		fifo->head = 0;
+
+	return nfrags;
+}
+
+/* gve_tx_free_fifo - Return space to Tx FIFO
+ * @fifo: FIFO to return fragments to
+ * @bytes: Bytes to free
+ */
+static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
+{
+	atomic_add(bytes, &fifo->available);
+}
+
+static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+	block->tx = NULL;
+}
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+			     u32 to_do, bool try_to_wake);
+
+static void gve_tx_free_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	size_t bytes;
+	u32 slots;
+
+	gve_tx_remove_from_block(priv, idx);
+	slots = tx->mask + 1;
+	gve_clean_tx_done(priv, tx, tx->req, false);
+	netdev_tx_reset_queue(tx->netdev_txq);
+
+	dma_free_coherent(hdev, sizeof(*tx->q_resources),
+			  tx->q_resources, tx->q_resources_bus);
+	tx->q_resources = NULL;
+
+	gve_tx_fifo_release(priv, &tx->tx_fifo);
+	gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+	tx->tx_fifo.qpl = NULL;
+
+	bytes = sizeof(*tx->desc) * slots;
+	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+	tx->desc = NULL;
+
+	vfree(tx->info);
+	tx->info = NULL;
+
+	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
+}
+
+static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_tx_ring *tx = &priv->tx[queue_idx];
+
+	block->tx = tx;
+	tx->ntfy_id = ntfy_idx;
+}
+
+static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	u32 slots = priv->tx_desc_cnt;
+	size_t bytes;
+
+	/* Make sure everything is zeroed to start */
+	memset(tx, 0, sizeof(*tx));
+	tx->q_num = idx;
+
+	tx->mask = slots - 1;
+
+	/* alloc metadata */
+	tx->info = vzalloc(sizeof(*tx->info) * slots);
+	if (!tx->info)
+		return -ENOMEM;
+
+	/* alloc tx queue */
+	bytes = sizeof(*tx->desc) * slots;
+	tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
+	if (!tx->desc)
+		goto abort_with_info;
+
+	tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+
+	/* map Tx FIFO */
+	if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+		goto abort_with_desc;
+
+	tx->q_resources =
+		dma_alloc_coherent(hdev,
+				   sizeof(*tx->q_resources),
+				   &tx->q_resources_bus,
+				   GFP_KERNEL);
+	if (!tx->q_resources)
+		goto abort_with_fifo;
+
+	netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
+		  (unsigned long)tx->bus);
+	tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+	gve_tx_add_to_block(priv, idx);
+
+	return 0;
+
+abort_with_fifo:
+	gve_tx_fifo_release(priv, &tx->tx_fifo);
+abort_with_desc:
+	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+	tx->desc = NULL;
+abort_with_info:
+	vfree(tx->info);
+	tx->info = NULL;
+	return -ENOMEM;
+}
+
+int gve_tx_alloc_rings(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_tx_alloc_ring(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc tx ring=%d: err=%d\n",
+				  i, err);
+			break;
+		}
+	}
+	/* Unallocate if there was an error */
+	if (err) {
+		int j;
+
+		for (j = 0; j < i; j++)
+			gve_tx_free_ring(priv, j);
+	}
+	return err;
+}
+
+void gve_tx_free_rings(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++)
+		gve_tx_free_ring(priv, i);
+}
+
+/* gve_tx_avail - Calculates the number of slots available in the ring
+ * @tx: tx ring to check
+ *
+ * Returns the number of slots available
+ *
+ * The capacity of the queue is mask + 1. We don't need to reserve an entry.
+ **/
+static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
+{
+	return tx->mask + 1 - (tx->req - tx->done);
+}
+
+static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
+					      struct sk_buff *skb)
+{
+	int pad_bytes, align_hdr_pad;
+	int bytes;
+	int hlen;
+
+	hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
+				 tcp_hdrlen(skb) : skb_headlen(skb);
+
+	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
+						   hlen);
+	/* We need to take into account the header alignment padding. */
+	align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
+	bytes = align_hdr_pad + pad_bytes + skb->len;
+
+	return bytes;
+}
+
+/* The most descriptors we could need are 3 - 1 for the headers, 1 for
+ * the beginning of the payload at the end of the FIFO, and 1 if the
+ * payload wraps to the beginning of the FIFO.
+ */
+#define MAX_TX_DESC_NEEDED	3
+
+/* Check if sufficient resources (descriptor ring space, FIFO space) are
+ * available to transmit the given number of bytes.
+ */
+static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
+{
+	return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
+		gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+}
+
+/* Stops the queue if the skb cannot be transmitted. */
+static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
+{
+	int bytes_required;
+
+	bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+	if (likely(gve_can_tx(tx, bytes_required)))
+		return 0;
+
+	/* No space, so stop the queue */
+	tx->stop_queue++;
+	netif_tx_stop_queue(tx->netdev_txq);
+	smp_mb();	/* sync with restarting queue in gve_clean_tx_done() */
+
+	/* Now check for resources again, in case gve_clean_tx_done() freed
+	 * resources after we checked and we stopped the queue after
+	 * gve_clean_tx_done() checked.
+	 *
+	 * gve_maybe_stop_tx()			gve_clean_tx_done()
+	 *   nsegs/can_alloc test failed
+	 *					  gve_tx_free_fifo()
+	 *					  if (tx queue stopped)
+	 *					    netif_tx_queue_wake()
+	 *   netif_tx_stop_queue()
+	 *   Need to check again for space here!
+	 */
+	if (likely(!gve_can_tx(tx, bytes_required)))
+		return -EBUSY;
+
+	netif_tx_start_queue(tx->netdev_txq);
+	tx->wake_queue++;
+	return 0;
+}
+
+static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
+				 struct sk_buff *skb, bool is_gso,
+				 int l4_hdr_offset, u32 desc_cnt,
+				 u16 hlen, u64 addr)
+{
+	/* l4_hdr_offset and csum_offset are in units of 16-bit words */
+	if (is_gso) {
+		pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
+		pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+		pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+		pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
+		pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+		pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+	} else {
+		pkt_desc->pkt.type_flags = GVE_TXD_STD;
+		pkt_desc->pkt.l4_csum_offset = 0;
+		pkt_desc->pkt.l4_hdr_offset = 0;
+	}
+	pkt_desc->pkt.desc_cnt = desc_cnt;
+	pkt_desc->pkt.len = cpu_to_be16(skb->len);
+	pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
+	pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
+}
+
+static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
+				 struct sk_buff *skb, bool is_gso,
+				 u16 len, u64 addr)
+{
+	seg_desc->seg.type_flags = GVE_TXD_SEG;
+	if (is_gso) {
+		if (skb_is_gso_v6(skb))
+			seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
+		seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
+		seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+	}
+	seg_desc->seg.seg_len = cpu_to_be16(len);
+	seg_desc->seg.seg_addr = cpu_to_be64(addr);
+}
+
+static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
+				    u64 iov_offset, u64 iov_len)
+{
+	u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
+	u64 first_page = iov_offset / PAGE_SIZE;
+	dma_addr_t dma;
+	u64 page;
+
+	for (page = first_page; page <= last_page; page++) {
+		dma = page_buses[page];
+		dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
+	}
+}
+
+static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
+			  struct device *dev)
+{
+	int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
+	union gve_tx_desc *pkt_desc, *seg_desc;
+	struct gve_tx_buffer_state *info;
+	bool is_gso = skb_is_gso(skb);
+	u32 idx = tx->req & tx->mask;
+	int payload_iov = 2;
+	int copy_offset;
+	u32 next_idx;
+	int i;
+
+	info = &tx->info[idx];
+	pkt_desc = &tx->desc[idx];
+
+	l4_hdr_offset = skb_checksum_start_offset(skb);
+	/* If the skb is gso, then we want the tcp header in the first segment
+	 * otherwise we want the linear portion of the skb (which will contain
+	 * the checksum because skb->csum_start and skb->csum_offset are given
+	 * relative to skb->head) in the first segment.
+	 */
+	hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+			skb_headlen(skb);
+
+	info->skb =  skb;
+	/* We don't want to split the header, so if necessary, pad to the end
+	 * of the fifo and then put the header at the beginning of the fifo.
+	 */
+	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
+	hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
+				       &info->iov[0]);
+	WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
+	payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
+					   &info->iov[payload_iov]);
+
+	gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+			     1 + payload_nfrags, hlen,
+			     info->iov[hdr_nfrags - 1].iov_offset);
+
+	skb_copy_bits(skb, 0,
+		      tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
+		      hlen);
+	gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+				info->iov[hdr_nfrags - 1].iov_offset,
+				info->iov[hdr_nfrags - 1].iov_len);
+	copy_offset = hlen;
+
+	for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
+		next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
+		seg_desc = &tx->desc[next_idx];
+
+		gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
+				     info->iov[i].iov_len,
+				     info->iov[i].iov_offset);
+
+		skb_copy_bits(skb, copy_offset,
+			      tx->tx_fifo.base + info->iov[i].iov_offset,
+			      info->iov[i].iov_len);
+		gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+					info->iov[i].iov_offset,
+					info->iov[i].iov_len);
+		copy_offset += info->iov[i].iov_len;
+	}
+
+	return 1 + payload_nfrags;
+}
+
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+	struct gve_tx_ring *tx;
+	int nsegs;
+
+	WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
+	     "skb queue index out of range");
+	tx = &priv->tx[skb_get_queue_mapping(skb)];
+	if (unlikely(gve_maybe_stop_tx(tx, skb))) {
+		/* We need to ring the txq doorbell -- we have stopped the Tx
+		 * queue for want of resources, but prior calls to gve_tx()
+		 * may have added descriptors without ringing the doorbell.
+		 */
+
+		/* Ensure tx descs from a prior gve_tx are visible before
+		 * ringing doorbell.
+		 */
+		dma_wmb();
+		gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+		return NETDEV_TX_BUSY;
+	}
+	nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev);
+
+	netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+	skb_tx_timestamp(skb);
+
+	/* give packets to NIC */
+	tx->req += nsegs;
+
+	if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
+		return NETDEV_TX_OK;
+
+	/* Ensure tx descs are visible before ringing doorbell */
+	dma_wmb();
+	gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+	return NETDEV_TX_OK;
+}
+
+#define GVE_TX_START_THRESH	PAGE_SIZE
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+			     u32 to_do, bool try_to_wake)
+{
+	struct gve_tx_buffer_state *info;
+	u64 pkts = 0, bytes = 0;
+	size_t space_freed = 0;
+	struct sk_buff *skb;
+	int i, j;
+	u32 idx;
+
+	for (j = 0; j < to_do; j++) {
+		idx = tx->done & tx->mask;
+		netif_info(priv, tx_done, priv->dev,
+			   "[%d] %s: idx=%d (req=%u done=%u)\n",
+			   tx->q_num, __func__, idx, tx->req, tx->done);
+		info = &tx->info[idx];
+		skb = info->skb;
+
+		/* Mark as free */
+		if (skb) {
+			info->skb = NULL;
+			bytes += skb->len;
+			pkts++;
+			dev_consume_skb_any(skb);
+			/* FIFO free */
+			for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+				space_freed += info->iov[i].iov_len +
+					       info->iov[i].iov_padding;
+				info->iov[i].iov_len = 0;
+				info->iov[i].iov_padding = 0;
+			}
+		}
+		tx->done++;
+	}
+
+	gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+	u64_stats_update_begin(&tx->statss);
+	tx->bytes_done += bytes;
+	tx->pkt_done += pkts;
+	u64_stats_update_end(&tx->statss);
+	netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
+
+	/* start the queue if we've stopped it */
+#ifndef CONFIG_BQL
+	/* Make sure that the doorbells are synced */
+	smp_mb();
+#endif
+	if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
+	    likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
+		tx->wake_queue++;
+		netif_tx_wake_queue(tx->netdev_txq);
+	}
+
+	return pkts;
+}
+
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+				 struct gve_tx_ring *tx)
+{
+	u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
+
+	return READ_ONCE(priv->counter_array[counter_index]);
+}
+
+bool gve_tx_poll(struct gve_notify_block *block, int budget)
+{
+	struct gve_priv *priv = block->priv;
+	struct gve_tx_ring *tx = block->tx;
+	bool repoll = false;
+	u32 nic_done;
+	u32 to_do;
+
+	/* If budget is 0, do all the work */
+	if (budget == 0)
+		budget = INT_MAX;
+
+	/* Find out how much work there is to be done */
+	tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
+	nic_done = be32_to_cpu(tx->last_nic_done);
+	if (budget > 0) {
+		/* Do as much work as we have that the budget will
+		 * allow
+		 */
+		to_do = min_t(u32, (nic_done - tx->done), budget);
+		gve_clean_tx_done(priv, tx, to_do, true);
+	}
+	/* If we still have work we want to repoll */
+	repoll |= (nic_done != tx->done);
+	return repoll;
+}

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 2515271..3892a20 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # HISILICON device configuration
 #
@@ -45,6 +46,16 @@
 	  If you wish to compile a kernel for a hardware with hisilicon p04 SoC and
 	  want to use the internal ethernet then you should answer Y to this.
 
+config HI13X1_GMAC
+	bool "Hisilicon HI13X1 Network Device Support"
+	depends on HIP04_ETH
+	help
+	  If you wish to compile a kernel for a hardware with hisilicon hi13x1_gamc
+	  then you should answer Y to this. This makes this driver suitable for use
+	  on certain boards such as the HI13X1.
+
+	  If you are unsure, say N.
+
 config HNS_MDIO
 	tristate
 	select PHYLIB
@@ -118,6 +129,7 @@
 	tristate "Hisilicon HNS3 Ethernet Device Support"
 	default m
 	depends on 64BIT && PCI
+	depends on INET
 	---help---
 	  This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
 	  family of SoCs. This module depends upon HNAE3 driver to access the HNAE3

diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 6127697..4606a7e 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c

@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 
 /* Copyright (c) 2014 Linaro Ltd.
  * Copyright (c) 2014 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -20,6 +16,8 @@
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
+#define SC_PPE_RESET_DREQ		0x026C
+
 #define PPE_CFG_RX_ADDR			0x100
 #define PPE_CFG_POOL_GRP		0x300
 #define PPE_CFG_RX_BUF_SIZE		0x400
@@ -37,10 +35,23 @@
 #define GE_MODE_CHANGE_REG		0x1b4
 #define GE_RECV_CONTROL_REG		0x1e0
 #define GE_STATION_MAC_ADDRESS		0x210
-#define PPE_CFG_CPU_ADD_ADDR		0x580
-#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
+
 #define PPE_CFG_BUS_CTRL_REG		0x424
 #define PPE_CFG_RX_CTRL_REG		0x428
+
+#if defined(CONFIG_HI13X1_GMAC)
+#define PPE_CFG_CPU_ADD_ADDR		0x6D0
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x500
+#define PPE_CFG_RX_PKT_MODE_REG		0x504
+#define PPE_CFG_QOS_VMID_GEN		0x520
+#define PPE_CFG_RX_PKT_INT		0x740
+#define PPE_INTEN			0x700
+#define PPE_INTSTS			0x708
+#define PPE_RINT			0x704
+#define PPE_CFG_STS_MODE		0x880
+#else
+#define PPE_CFG_CPU_ADD_ADDR		0x580
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
 #define PPE_CFG_RX_PKT_MODE_REG		0x438
 #define PPE_CFG_QOS_VMID_GEN		0x500
 #define PPE_CFG_RX_PKT_INT		0x538
@@ -48,8 +59,12 @@
 #define PPE_INTSTS			0x608
 #define PPE_RINT			0x604
 #define PPE_CFG_STS_MODE		0x700
+#endif /* CONFIG_HI13X1_GMAC */
+
 #define PPE_HIS_RX_PKT_CNT		0x804
 
+#define RESET_DREQ_ALL			0xffffffff
+
 /* REG_INTERRUPT */
 #define RCV_INT				BIT(10)
 #define RCV_NOBUF			BIT(8)
@@ -61,8 +76,15 @@
 /* TX descriptor config */
 #define TX_FREE_MEM			BIT(0)
 #define TX_READ_ALLOC_L3		BIT(1)
-#define TX_FINISH_CACHE_INV		BIT(2)
+#if defined(CONFIG_HI13X1_GMAC)
+#define TX_CLEAR_WB			BIT(7)
+#define TX_RELEASE_TO_PPE		BIT(4)
+#define TX_FINISH_CACHE_INV		BIT(6)
+#define TX_POOL_SHIFT			16
+#else
 #define TX_CLEAR_WB			BIT(4)
+#define TX_FINISH_CACHE_INV		BIT(2)
+#endif
 #define TX_L3_CHECKSUM			BIT(5)
 #define TX_LOOP_BACK			BIT(11)
 
@@ -97,18 +119,35 @@
 #define GE_RX_PORT_EN			BIT(1)
 #define GE_TX_PORT_EN			BIT(2)
 
-#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
-
 #define PPE_CFG_RX_PKT_ALIGN		BIT(18)
-#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+
+#if defined(CONFIG_HI13X1_GMAC)
+#define PPE_CFG_QOS_VMID_GRP_SHIFT	4
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	7
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(0)
+#define PPE_CFG_QOS_VMID_MODE		BIT(15)
+#define PPE_CFG_BUS_LOCAL_REL		(BIT(9) | BIT(15) | BIT(19) | BIT(23))
+
+/* buf unit size is cache_line_size, which is 64, so the shift is 6 */
+#define PPE_BUF_SIZE_SHIFT		6
+#define PPE_TX_BUF_HOLD			BIT(31)
+#define CACHE_LINE_MASK			0x3F
+#else
 #define PPE_CFG_QOS_VMID_GRP_SHIFT	8
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
+#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
+
+/* buf unit size is 1, so the shift is 6 */
+#define PPE_BUF_SIZE_SHIFT		0
+#define PPE_TX_BUF_HOLD			0
+#endif /* CONFIG_HI13X1_GMAC */
 
 #define PPE_CFG_RX_FIFO_FSFU		BIT(11)
 #define PPE_CFG_RX_DEPTH_SHIFT		16
 #define PPE_CFG_RX_START_SHIFT		0
-#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
 
-#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
 #define PPE_CFG_BUS_BIG_ENDIEN		BIT(0)
 
 #define RX_DESC_NUM			128
@@ -132,31 +171,56 @@
 #define HIP04_MIN_TX_COALESCE_FRAMES	100
 
 struct tx_desc {
+#if defined(CONFIG_HI13X1_GMAC)
+	u32 reserved1[2];
+	u32 send_addr;
+	u16 send_size;
+	u16 data_offset;
+	u32 reserved2[7];
+	u32 cfg;
+	u32 wb_addr;
+	u32 reserved3[3];
+#else
 	u32 send_addr;
 	u32 send_size;
 	u32 next_addr;
 	u32 cfg;
 	u32 wb_addr;
+#endif
 } __aligned(64);
 
 struct rx_desc {
+#if defined(CONFIG_HI13X1_GMAC)
+	u32 reserved1[3];
+	u16 pkt_len;
+	u16 reserved_16;
+	u32 reserved2[6];
+	u32 pkt_err;
+	u32 reserved3[5];
+#else
 	u16 reserved_16;
 	u16 pkt_len;
 	u32 reserve1[3];
 	u32 pkt_err;
 	u32 reserve2[4];
+#endif
 };
 
 struct hip04_priv {
 	void __iomem *base;
+#if defined(CONFIG_HI13X1_GMAC)
+	void __iomem *sysctrl_base;
+#endif
 	int phy_mode;
 	int chan;
 	unsigned int port;
+	unsigned int group;
 	unsigned int speed;
 	unsigned int duplex;
 	unsigned int reg_inten;
 
 	struct napi_struct napi;
+	struct device *dev;
 	struct net_device *ndev;
 
 	struct tx_desc *tx_desc;
@@ -173,6 +237,7 @@
 	dma_addr_t rx_phys[RX_DESC_NUM];
 	unsigned int rx_head;
 	unsigned int rx_buf_size;
+	unsigned int rx_cnt_remaining;
 
 	struct device_node *phy_node;
 	struct phy_device *phy;
@@ -185,7 +250,7 @@
 
 static inline unsigned int tx_count(unsigned int head, unsigned int tail)
 {
-	return (head - tail) % (TX_DESC_NUM - 1);
+	return (head - tail) % TX_DESC_NUM;
 }
 
 static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex)
@@ -225,6 +290,13 @@
 	writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG);
 }
 
+static void hip04_reset_dreq(struct hip04_priv *priv)
+{
+#if defined(CONFIG_HI13X1_GMAC)
+	writel_relaxed(RESET_DREQ_ALL, priv->sysctrl_base + SC_PPE_RESET_DREQ);
+#endif
+}
+
 static void hip04_reset_ppe(struct hip04_priv *priv)
 {
 	u32 val, tmp, timeout = 0;
@@ -245,14 +317,14 @@
 	val |= PPE_CFG_STS_RX_PKT_CNT_RC;
 	writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
 
-	val = BIT(priv->port);
+	val = BIT(priv->group);
 	regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val);
 
-	val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT;
+	val = priv->group << PPE_CFG_QOS_VMID_GRP_SHIFT;
 	val |= PPE_CFG_QOS_VMID_MODE;
 	writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
 
-	val = RX_BUF_SIZE;
+	val = RX_BUF_SIZE >> PPE_BUF_SIZE_SHIFT;
 	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val);
 
 	val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT;
@@ -289,8 +361,10 @@
 	val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN;
 	writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
 
+#ifndef CONFIG_HI13X1_GMAC
 	val = GE_AUTO_NEG_CTL;
 	writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+#endif
 }
 
 static void hip04_mac_enable(struct net_device *ndev)
@@ -333,12 +407,18 @@
 
 static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
 {
-	writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR);
+	u32 val;
+
+	val = phys >> PPE_BUF_SIZE_SHIFT | PPE_TX_BUF_HOLD;
+	writel(val, priv->base + PPE_CFG_CPU_ADD_ADDR);
 }
 
 static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
 {
-	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys);
+	u32 val;
+
+	val = phys >> PPE_BUF_SIZE_SHIFT;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, val);
 }
 
 static u32 hip04_recv_cnt(struct hip04_priv *priv)
@@ -387,7 +467,7 @@
 		}
 
 		if (priv->tx_phys[tx_tail]) {
-			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
+			dma_unmap_single(priv->dev, priv->tx_phys[tx_tail],
 					 priv->tx_skb[tx_tail]->len,
 					 DMA_TO_DEVICE);
 			priv->tx_phys[tx_tail] = 0;
@@ -422,7 +502,8 @@
 			       ns, HRTIMER_MODE_REL);
 }
 
-static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t
+hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct hip04_priv *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &ndev->stats;
@@ -437,19 +518,28 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&ndev->dev, phys)) {
+	phys = dma_map_single(priv->dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(priv->dev, phys)) {
 		dev_kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
 
 	priv->tx_skb[tx_head] = skb;
 	priv->tx_phys[tx_head] = phys;
-	desc->send_addr = cpu_to_be32(phys);
-	desc->send_size = cpu_to_be32(skb->len);
-	desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+
+	desc->send_size = (__force u32)cpu_to_be32(skb->len);
+#if defined(CONFIG_HI13X1_GMAC)
+	desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV
+		| TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT);
+	desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK);
+	desc->send_addr =  (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK);
+#else
+	desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+	desc->send_addr = (__force u32)cpu_to_be32(phys);
+#endif
 	phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
-	desc->wb_addr = cpu_to_be32(phys);
+	desc->wb_addr = (__force u32)cpu_to_be32(phys +
+		offsetof(struct tx_desc, send_addr));
 	skb_tx_timestamp(skb);
 
 	hip04_set_xmit_desc(priv, phys);
@@ -486,7 +576,6 @@
 	struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
 	struct net_device *ndev = priv->ndev;
 	struct net_device_stats *stats = &ndev->stats;
-	unsigned int cnt = hip04_recv_cnt(priv);
 	struct rx_desc *desc;
 	struct sk_buff *skb;
 	unsigned char *buf;
@@ -497,7 +586,10 @@
 	u16 len;
 	u32 err;
 
-	while (cnt && !last) {
+	/* clean up tx descriptors */
+	tx_remaining = hip04_tx_reclaim(ndev, false);
+	priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+	while (priv->rx_cnt_remaining && !last) {
 		buf = priv->rx_buf[priv->rx_head];
 		skb = build_skb(buf, priv->rx_buf_size);
 		if (unlikely(!skb)) {
@@ -505,13 +597,13 @@
 			goto refill;
 		}
 
-		dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head],
+		dma_unmap_single(priv->dev, priv->rx_phys[priv->rx_head],
 				 RX_BUF_SIZE, DMA_FROM_DEVICE);
 		priv->rx_phys[priv->rx_head] = 0;
 
 		desc = (struct rx_desc *)skb->data;
-		len = be16_to_cpu(desc->pkt_len);
-		err = be32_to_cpu(desc->pkt_err);
+		len = be16_to_cpu((__force __be16)desc->pkt_len);
+		err = be32_to_cpu((__force __be32)desc->pkt_err);
 
 		if (0 == len) {
 			dev_kfree_skb_any(skb);
@@ -534,20 +626,22 @@
 		buf = netdev_alloc_frag(priv->rx_buf_size);
 		if (!buf)
 			goto done;
-		phys = dma_map_single(&ndev->dev, buf,
+		phys = dma_map_single(priv->dev, buf,
 				      RX_BUF_SIZE, DMA_FROM_DEVICE);
-		if (dma_mapping_error(&ndev->dev, phys))
+		if (dma_mapping_error(priv->dev, phys))
 			goto done;
 		priv->rx_buf[priv->rx_head] = buf;
 		priv->rx_phys[priv->rx_head] = phys;
 		hip04_set_recv_desc(priv, phys);
 
 		priv->rx_head = RX_NEXT(priv->rx_head);
-		if (rx >= budget)
+		if (rx >= budget) {
+			--priv->rx_cnt_remaining;
 			goto done;
+		}
 
-		if (--cnt == 0)
-			cnt = hip04_recv_cnt(priv);
+		if (--priv->rx_cnt_remaining == 0)
+			priv->rx_cnt_remaining += hip04_recv_cnt(priv);
 	}
 
 	if (!(priv->reg_inten & RCV_INT)) {
@@ -557,8 +651,7 @@
 	}
 	napi_complete_done(napi, rx);
 done:
-	/* clean up tx descriptors and start a new timer if necessary */
-	tx_remaining = hip04_tx_reclaim(ndev, false);
+	/* start a new timer if necessary */
 	if (rx < budget && tx_remaining)
 		hip04_start_tx_timer(priv);
 
@@ -633,6 +726,7 @@
 	int i;
 
 	priv->rx_head = 0;
+	priv->rx_cnt_remaining = 0;
 	priv->tx_head = 0;
 	priv->tx_tail = 0;
 	hip04_reset_ppe(priv);
@@ -640,9 +734,9 @@
 	for (i = 0; i < RX_DESC_NUM; i++) {
 		dma_addr_t phys;
 
-		phys = dma_map_single(&ndev->dev, priv->rx_buf[i],
+		phys = dma_map_single(priv->dev, priv->rx_buf[i],
 				      RX_BUF_SIZE, DMA_FROM_DEVICE);
-		if (dma_mapping_error(&ndev->dev, phys))
+		if (dma_mapping_error(priv->dev, phys))
 			return -EIO;
 
 		priv->rx_phys[i] = phys;
@@ -676,7 +770,7 @@
 
 	for (i = 0; i < RX_DESC_NUM; i++) {
 		if (priv->rx_phys[i]) {
-			dma_unmap_single(&ndev->dev, priv->rx_phys[i],
+			dma_unmap_single(priv->dev, priv->rx_phys[i],
 					 RX_BUF_SIZE, DMA_FROM_DEVICE);
 			priv->rx_phys[i] = 0;
 		}
@@ -811,7 +905,6 @@
 	struct of_phandle_args arg;
 	struct net_device *ndev;
 	struct hip04_priv *priv;
-	struct resource *res;
 	int irq;
 	int ret;
 
@@ -820,18 +913,26 @@
 		return -ENOMEM;
 
 	priv = netdev_priv(ndev);
+	priv->dev = d;
 	priv->ndev = ndev;
 	platform_set_drvdata(pdev, ndev);
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(d, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto init_fail;
 	}
 
-	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg);
+#if defined(CONFIG_HI13X1_GMAC)
+	priv->sysctrl_base = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(priv->sysctrl_base)) {
+		ret = PTR_ERR(priv->sysctrl_base);
+		goto init_fail;
+	}
+#endif
+
+	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 3, 0, &arg);
 	if (ret < 0) {
 		dev_warn(d, "no port-handle\n");
 		goto init_fail;
@@ -839,6 +940,7 @@
 
 	priv->port = arg.args[0];
 	priv->chan = arg.args[1] * RX_DESC_NUM;
+	priv->group = arg.args[2];
 
 	hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
@@ -899,6 +1001,7 @@
 	ndev->irq = irq;
 	netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT);
 
+	hip04_reset_dreq(priv);
 	hip04_reset_ppe(priv);
 	if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
 		hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
@@ -938,7 +1041,6 @@
 
 	hip04_free_ring(ndev, d);
 	unregister_netdev(ndev);
-	free_irq(ndev->irq, ndev);
 	of_node_put(priv->phy_node);
 	cancel_work_sync(&priv->tx_timeout_task);
 	free_netdev(ndev);

diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 2c28088..90ab7ad 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Hisilicon Fast Ethernet MAC Driver
  *
  * Copyright (c) 2016 HiSilicon Technologies Co., Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/circ_buf.h>
@@ -793,7 +781,6 @@
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *node = dev->of_node;
-	struct resource *res;
 	struct net_device *ndev;
 	struct hisi_femac_priv *priv;
 	struct phy_device *phy;
@@ -811,15 +798,13 @@
 	priv->dev = dev;
 	priv->ndev = ndev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->port_base = devm_ioremap_resource(dev, res);
+	priv->port_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->port_base)) {
 		ret = PTR_ERR(priv->port_base);
 		goto out_free_netdev;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	priv->glb_base = devm_ioremap_resource(dev, res);
+	priv->glb_base = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(priv->glb_base)) {
 		ret = PTR_ERR(priv->glb_base);
 		goto out_free_netdev;
@@ -870,7 +855,7 @@
 			   phy_modes(phy->interface));
 
 	mac_addr = of_get_mac_address(node);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(ndev->dev_addr, mac_addr);
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		eth_hw_addr_random(ndev);
@@ -892,7 +877,6 @@
 
 	ndev->irq = platform_get_irq(pdev, 0);
 	if (ndev->irq <= 0) {
-		dev_err(dev, "No irq resource\n");
 		ret = -ENODEV;
 		goto out_disconnect_phy;
 	}

diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index c572700..c41b19c 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Copyright (c) 2014 Linaro Ltd.
  * Copyright (c) 2014 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -723,7 +719,7 @@
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		int len = frag->size;
+		int len = skb_frag_size(frag);
 
 		addr = skb_frag_dma_map(priv->dev, frag, 0, len, DMA_TO_DEVICE);
 		ret = dma_mapping_error(priv->dev, addr);
@@ -736,7 +732,7 @@
 	return 0;
 }
 
-static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
 	struct hix5hd2_desc *desc;
@@ -1006,8 +1002,8 @@
 
 	for (i = 0; i < QUEUE_NUMS; i++) {
 		size = priv->pool[i].count * sizeof(struct hix5hd2_desc);
-		virt_addr = dma_zalloc_coherent(dev, size, &phys_addr,
-						GFP_KERNEL);
+		virt_addr = dma_alloc_coherent(dev, size, &phys_addr,
+					       GFP_KERNEL);
 		if (virt_addr == NULL)
 			goto error_free_pool;
 
@@ -1101,7 +1097,6 @@
 	const struct of_device_id *of_id = NULL;
 	struct net_device *ndev;
 	struct hix5hd2_priv *priv;
-	struct resource *res;
 	struct mii_bus *bus;
 	const char *mac_addr;
 	int ret;
@@ -1123,15 +1118,13 @@
 	}
 	priv->hw_cap = (unsigned long)of_id->data;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto out_free_netdev;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	priv->ctrl_base = devm_ioremap_resource(dev, res);
+	priv->ctrl_base = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(priv->ctrl_base)) {
 		ret = PTR_ERR(priv->ctrl_base);
 		goto out_free_netdev;
@@ -1201,7 +1194,7 @@
 		goto err_free_mdio;
 
 	priv->phy_mode = of_get_phy_mode(node);
-	if (priv->phy_mode < 0) {
+	if ((int)priv->phy_mode < 0) {
 		netdev_err(ndev, "not find phy-mode\n");
 		ret = -EINVAL;
 		goto err_mdiobus;
@@ -1229,7 +1222,7 @@
 	}
 
 	mac_addr = of_get_mac_address(node);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(ndev->dev_addr, mac_addr);
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		eth_hw_addr_random(ndev);

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 79d03f8..0833927 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/dma-mapping.h>
@@ -150,7 +146,6 @@
 /* free desc along with its attached buffer */
 static void hnae_free_desc(struct hnae_ring *ring)
 {
-	hnae_free_buffers(ring);
 	dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
 			 ring->desc_num * sizeof(ring->desc[0]),
 			 ring_to_dma_dir(ring));
@@ -183,6 +178,9 @@
 /* fini ring, also free the buffer for the ring */
 static void hnae_fini_ring(struct hnae_ring *ring)
 {
+	if (is_rx_ring(ring))
+		hnae_free_buffers(ring);
+
 	hnae_free_desc(ring);
 	kfree(ring->desc_cb);
 	ring->desc_cb = NULL;
@@ -201,7 +199,6 @@
 
 	ring->q = q;
 	ring->flags = flags;
-	spin_lock_init(&ring->lock);
 	ring->coal_param = q->handle->coal_param;
 	assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr);
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 08a750f..6ab9458 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __HNAE_H
@@ -278,9 +274,6 @@
 	/* statistic */
 	struct ring_stats stats;
 
-	/* ring lock for poll one */
-	spinlock_t lock;
-
 	dma_addr_t desc_dma_addr;
 	u32 buf_size;       /* size for hnae_desc->addr, preset by AE */
 	u16 desc_num;       /* total number of desc */
@@ -357,7 +350,7 @@
 };
 
 struct hnae_queue {
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	phys_addr_t phy_base;
 	struct hnae_ae_dev *dev;	/* the device who use this queue */
 	struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b52029e..b43dec0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -147,12 +143,10 @@
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
 	int i;
 
-	vf_cb->mac_cb	 = NULL;
-
-	kfree(vf_cb);
-
 	for (i = 0; i < handle->q_num; i++)
 		hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0;
+
+	kfree(vf_cb);
 }
 
 static int hns_ae_wait_flow_down(struct hnae_handle *handle)
@@ -379,6 +373,9 @@
 
 	hns_ae_ring_enable_all(handle, 0);
 
+	/* clean rx fbd. */
+	hns_rcb_wait_fbd_clean(handle->qs, handle->q_num, RCB_INT_FLAG_RX);
+
 	(void)hns_mac_vm_config_bc_en(mac_cb, 0, false);
 }
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 09e4061..7fb7a41 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/delay.h>
@@ -67,11 +63,14 @@
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
 	/*enable GE rX/tX */
-	if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+	if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX)
 		dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 1);
 
-	if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+	if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) {
+		/* enable rx pcs */
+		dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 0);
 		dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 1);
+	}
 }
 
 static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode)
@@ -79,11 +78,14 @@
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
 	/*disable GE rX/tX */
-	if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+	if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX)
 		dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 0);
 
-	if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+	if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) {
+		/* disable rx pcs */
+		dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 1);
 		dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 0);
+	}
 }
 
 /* hns_gmac_get_en - get port enable
@@ -319,7 +321,7 @@
 		hns_gmac_set_uc_match(mac_drv, en);
 }
 
-int hns_gmac_wait_fifo_clean(void *mac_drv)
+static int hns_gmac_wait_fifo_clean(void *mac_drv)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 	int wait_cnt;

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.h
index 44fe301..ec266e7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_GMAC_H

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 6ed6f14..8aace2d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/acpi.h>
@@ -370,7 +366,7 @@
 static void hns_mac_param_get(struct mac_params *param,
 			      struct hns_mac_cb *mac_cb)
 {
-	param->vaddr = (void *)mac_cb->vaddr;
+	param->vaddr = mac_cb->vaddr;
 	param->mac_mode = hns_get_enet_interface(mac_cb);
 	ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr);
 	param->mac_id = mac_cb->mac_id;
@@ -778,6 +774,17 @@
 	return rc;
 }
 
+static void hns_mac_remove_phydev(struct hns_mac_cb *mac_cb)
+{
+	if (!to_acpi_device_node(mac_cb->fw_port) || !mac_cb->phy_dev)
+		return;
+
+	phy_device_remove(mac_cb->phy_dev);
+	phy_device_free(mac_cb->phy_dev);
+
+	mac_cb->phy_dev = NULL;
+}
+
 #define MAC_MEDIA_TYPE_MAX_LEN		16
 
 static const struct {
@@ -837,8 +844,8 @@
 			 */
 			put_device(&mac_cb->phy_dev->mdio.dev);
 
-			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
-				mac_cb->mac_id, np->name);
+			dev_dbg(mac_cb->dev, "mac%d phy_node: %pOFn\n",
+				mac_cb->mac_id, np);
 		}
 		of_node_put(np);
 
@@ -855,8 +862,8 @@
 			 * if the phy_dev is found
 			 */
 			put_device(&mac_cb->phy_dev->mdio.dev);
-			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
-				mac_cb->mac_id, np->name);
+			dev_dbg(mac_cb->dev, "mac%d phy_node: %pOFn\n",
+				mac_cb->mac_id, np);
 		}
 		of_node_put(np);
 
@@ -1117,7 +1124,11 @@
 	int max_port_num = hns_mac_get_max_port_num(dsaf_dev);
 
 	for (i = 0; i < max_port_num; i++) {
+		if (!dsaf_dev->mac_cb[i])
+			continue;
+
 		dsaf_dev->misc_op->cpld_reset_led(dsaf_dev->mac_cb[i]);
+		hns_mac_remove_phydev(dsaf_dev->mac_cb[i]);
 		dsaf_dev->mac_cb[i] = NULL;
 	}
 }

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index fbc7534..3278bf4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_DSAF_MAC_H
@@ -187,7 +183,7 @@
 /*mac para struct ,mac get param from nic or dsaf when initialize*/
 struct mac_params {
 	char addr[ETH_ALEN];
-	void *vaddr; /*virtual address*/
+	u8 __iomem *vaddr; /*virtual address*/
 	struct device *dev;
 	u8 mac_id;
 	/**< Ethernet operation mode (MAC-PHY interface and speed) */
@@ -402,7 +398,7 @@
 	enum mac_mode mac_mode;
 	u8 mac_id;
 	struct hns_mac_cb *mac_cb;
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	unsigned int mac_en_flg;/*you'd better don't enable mac twice*/
 	unsigned int virt_dev_num;
 	struct device *dev;

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index e557a4e..3a14bbc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/acpi.h>
@@ -28,7 +24,7 @@
 #include "hns_dsaf_rcb.h"
 #include "hns_dsaf_misc.h"
 
-const static char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
+static const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
 	[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
 	[DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss",
 	[DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf",
@@ -935,6 +931,62 @@
 }
 
 /**
+ * hns_dsaf_tcam_uc_cfg_vague - INT
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address,
+ * @ptbl_tcam_data,
+ */
+static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev,
+				       u32 address,
+				       struct dsaf_tbl_tcam_data *tcam_data,
+				       struct dsaf_tbl_tcam_data *tcam_mask,
+				       struct dsaf_tbl_tcam_ucast_cfg *tcam_uc)
+{
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
+	hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data);
+	hns_dsaf_tbl_tcam_ucast_cfg(dsaf_dev, tcam_uc);
+	hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+	hns_dsaf_tbl_tcam_data_ucast_pul(dsaf_dev);
+
+	/*Restore Match Data*/
+	tcam_mask->tbl_tcam_data_high = 0xffffffff;
+	tcam_mask->tbl_tcam_data_low = 0xffffffff;
+	hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
+}
+
+/**
+ * hns_dsaf_tcam_mc_cfg_vague - INT
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address,
+ * @ptbl_tcam_data,
+ * @ptbl_tcam_mask
+ * @ptbl_tcam_mcast
+ */
+static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev,
+				       u32 address,
+				       struct dsaf_tbl_tcam_data *tcam_data,
+				       struct dsaf_tbl_tcam_data *tcam_mask,
+				       struct dsaf_tbl_tcam_mcast_cfg *tcam_mc)
+{
+	spin_lock_bh(&dsaf_dev->tcam_lock);
+	hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
+	hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data);
+	hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, tcam_mc);
+	hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+	hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev);
+
+	/*Restore Match Data*/
+	tcam_mask->tbl_tcam_data_high = 0xffffffff;
+	tcam_mask->tbl_tcam_data_low = 0xffffffff;
+	hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+
+	spin_unlock_bh(&dsaf_dev->tcam_lock);
+}
+
+/**
  * hns_dsaf_tcam_mc_invld - INT
  * @dsaf_id: dsa fabric id
  * @address
@@ -1493,6 +1545,27 @@
 }
 
 /**
+ * hns_dsaf_find_empty_mac_entry_reverse
+ * search dsa fabric soft empty-entry from the end
+ * @dsaf_dev: dsa fabric device struct pointer
+ */
+static u16 hns_dsaf_find_empty_mac_entry_reverse(struct dsaf_device *dsaf_dev)
+{
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+	int i;
+
+	soft_mac_entry = priv->soft_mac_tbl + (DSAF_TCAM_SUM - 1);
+	for (i = (DSAF_TCAM_SUM - 1); i > 0; i--) {
+		/* search all entry from end to start.*/
+		if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX)
+			return i;
+		soft_mac_entry--;
+	}
+	return DSAF_INVALID_ENTRY_IDX;
+}
+
+/**
  * hns_dsaf_set_mac_key - set mac key
  * @dsaf_dev: dsa fabric device struct pointer
  * @mac_key: tcam key pointer
@@ -1525,8 +1598,6 @@
 		       DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
 		       DSAF_TBL_TCAM_KEY_PORT_S, port);
-
-	mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan);
 }
 
 /**
@@ -1586,8 +1657,8 @@
 	/* default config dvc to 0 */
 	mac_data.tbl_ucast_dvc = 0;
 	mac_data.tbl_ucast_out_port = mac_entry->port_num;
-	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+	tcam_data.tbl_tcam_data_high = mac_key.high.val;
+	tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 	hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data);
 
@@ -1709,9 +1780,6 @@
 				     0xff,
 				     mc_mask);
 
-		mask_key.high.val = le32_to_cpu(mask_key.high.val);
-		mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
 		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
 	}
 
@@ -1763,8 +1831,8 @@
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+	tcam_data.tbl_tcam_data_high = mac_key.high.val;
+	tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 	/* config mc entry with mask */
 	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data,
@@ -1879,9 +1947,6 @@
 		/* config key mask */
 		hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_mask);
 
-		mask_key.high.val = le32_to_cpu(mask_key.high.val);
-		mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
 		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
 	}
 
@@ -1935,8 +2000,8 @@
 		soft_mac_entry += entry_index;
 		soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
 	} else { /* not zero, just del port, update */
-		tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-		tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+		tcam_data.tbl_tcam_data_high = mac_key.high.val;
+		tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 		hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
 				     &tcam_data,
@@ -2166,9 +2231,9 @@
 		DSAF_INODE_LOCAL_ADDR_FALSE_NUM_0_REG + 0x80 * (u64)node_num);
 
 	hw_stats->vlan_drop += dsaf_read_dev(dsaf_dev,
-		DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 0x80 * (u64)node_num);
+		DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 4 * (u64)node_num);
 	hw_stats->stp_drop += dsaf_read_dev(dsaf_dev,
-		DSAF_INODE_IN_DATA_STP_DISC_0_REG + 0x80 * (u64)node_num);
+		DSAF_INODE_IN_DATA_STP_DISC_0_REG + 4 * (u64)node_num);
 
 	/* pfc pause frame statistics stored in dsaf inode*/
 	if ((node_num < DSAF_SERVICE_NW_NUM) && !is_ver1) {
@@ -2285,237 +2350,237 @@
 				DSAF_INODE_BD_ORDER_STATUS_0_REG + j * 4);
 		p[223 + i] = dsaf_read_dev(ddev,
 				DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + j * 4);
-		p[224 + i] = dsaf_read_dev(ddev,
+		p[226 + i] = dsaf_read_dev(ddev,
 				DSAF_INODE_IN_DATA_STP_DISC_0_REG + j * 4);
 	}
 
-	p[227] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4);
+	p[229] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4);
 
 	for (i = 0; i < DSAF_INODE_NUM / DSAF_COMM_CHN; i++) {
 		j = i * DSAF_COMM_CHN + port;
-		p[228 + i] = dsaf_read_dev(ddev,
+		p[230 + i] = dsaf_read_dev(ddev,
 				DSAF_INODE_VC0_IN_PKT_NUM_0_REG + j * 4);
 	}
 
-	p[231] = dsaf_read_dev(ddev,
-		DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4);
+	p[233] = dsaf_read_dev(ddev,
+		DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 0x80);
 
 	/* dsaf inode registers */
 	for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) {
 		j = i * DSAF_COMM_CHN + port;
-		p[232 + i] = dsaf_read_dev(ddev,
+		p[234 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_CFG_REG_0_REG + j * 0x80);
-		p[235 + i] = dsaf_read_dev(ddev,
+		p[237 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + j * 0x80);
-		p[238 + i] = dsaf_read_dev(ddev,
+		p[240 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CFG_1_REG_0_REG + j * 0x80);
-		p[241 + i] = dsaf_read_dev(ddev,
+		p[243 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + j * 0x80);
-		p[244 + i] = dsaf_read_dev(ddev,
+		p[246 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_FREE_CNT_0_0_REG + j * 0x80);
-		p[245 + i] = dsaf_read_dev(ddev,
+		p[249 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_FREE_CNT_1_0_REG + j * 0x80);
-		p[248 + i] = dsaf_read_dev(ddev,
+		p[252 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CNT_0_0_REG + j * 0x80);
-		p[251 + i] = dsaf_read_dev(ddev,
+		p[255 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CNT_1_0_REG + j * 0x80);
-		p[254 + i] = dsaf_read_dev(ddev,
+		p[258 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CNT_2_0_REG + j * 0x80);
-		p[257 + i] = dsaf_read_dev(ddev,
+		p[261 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CNT_3_0_REG + j * 0x80);
-		p[260 + i] = dsaf_read_dev(ddev,
+		p[264 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_INER_ST_0_REG + j * 0x80);
-		p[263 + i] = dsaf_read_dev(ddev,
+		p[267 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_MIB_REQ_FAILED_TC_0_REG + j * 0x80);
-		p[266 + i] = dsaf_read_dev(ddev,
+		p[270 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_CNT_0_REG + j * 0x80);
-		p[269 + i] = dsaf_read_dev(ddev,
+		p[273 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_DROP_CNT_0_REG + j * 0x80);
-		p[272 + i] = dsaf_read_dev(ddev,
+		p[276 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_INF_OUTPORT_CNT_0_REG + j * 0x80);
-		p[275 + i] = dsaf_read_dev(ddev,
+		p[279 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC0_CNT_0_REG + j * 0x80);
-		p[278 + i] = dsaf_read_dev(ddev,
+		p[282 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC1_CNT_0_REG + j * 0x80);
-		p[281 + i] = dsaf_read_dev(ddev,
+		p[285 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC2_CNT_0_REG + j * 0x80);
-		p[284 + i] = dsaf_read_dev(ddev,
+		p[288 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC3_CNT_0_REG + j * 0x80);
-		p[287 + i] = dsaf_read_dev(ddev,
+		p[291 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC4_CNT_0_REG + j * 0x80);
-		p[290 + i] = dsaf_read_dev(ddev,
+		p[294 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC5_CNT_0_REG + j * 0x80);
-		p[293 + i] = dsaf_read_dev(ddev,
+		p[297 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC6_CNT_0_REG + j * 0x80);
-		p[296 + i] = dsaf_read_dev(ddev,
+		p[300 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_INPORT_TC7_CNT_0_REG + j * 0x80);
-		p[299 + i] = dsaf_read_dev(ddev,
+		p[303 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_REQ_CNT_0_REG + j * 0x80);
-		p[302 + i] = dsaf_read_dev(ddev,
+		p[306 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_LNK_RELS_CNT_0_REG + j * 0x80);
-		p[305 + i] = dsaf_read_dev(ddev,
+		p[309 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CFG_3_REG_0_REG + j * 0x80);
-		p[308 + i] = dsaf_read_dev(ddev,
+		p[312 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_BP_CFG_4_REG_0_REG + j * 0x80);
 	}
 
 	/* dsaf onode registers */
 	for (i = 0; i < DSAF_XOD_NUM; i++) {
-		p[311 + i] = dsaf_read_dev(ddev,
+		p[315 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + i * 0x90);
-		p[319 + i] = dsaf_read_dev(ddev,
+		p[323 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + i * 0x90);
-		p[327 + i] = dsaf_read_dev(ddev,
+		p[331 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + i * 0x90);
-		p[335 + i] = dsaf_read_dev(ddev,
+		p[339 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + i * 0x90);
-		p[343 + i] = dsaf_read_dev(ddev,
+		p[347 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + i * 0x90);
-		p[351 + i] = dsaf_read_dev(ddev,
+		p[355 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_ETS_TOKEN_CFG_0_REG + i * 0x90);
 	}
 
-	p[359] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90);
-	p[360] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90);
-	p[361] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90);
+	p[363] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90);
+	p[364] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90);
+	p[365] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90);
 
 	for (i = 0; i < DSAF_XOD_BIG_NUM / DSAF_COMM_CHN; i++) {
 		j = i * DSAF_COMM_CHN + port;
-		p[362 + i] = dsaf_read_dev(ddev,
+		p[366 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_GNT_L_0_REG + j * 0x90);
-		p[365 + i] = dsaf_read_dev(ddev,
+		p[369 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_GNT_H_0_REG + j * 0x90);
-		p[368 + i] = dsaf_read_dev(ddev,
+		p[372 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_CONNECT_STATE_0_REG + j * 0x90);
-		p[371 + i] = dsaf_read_dev(ddev,
+		p[375 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVPKT_CNT_0_REG + j * 0x90);
-		p[374 + i] = dsaf_read_dev(ddev,
+		p[378 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVTC0_CNT_0_REG + j * 0x90);
-		p[377 + i] = dsaf_read_dev(ddev,
+		p[381 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVTC1_CNT_0_REG + j * 0x90);
-		p[380 + i] = dsaf_read_dev(ddev,
+		p[384 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVTC2_CNT_0_REG + j * 0x90);
-		p[383 + i] = dsaf_read_dev(ddev,
+		p[387 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVTC3_CNT_0_REG + j * 0x90);
-		p[386 + i] = dsaf_read_dev(ddev,
+		p[390 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVVC0_CNT_0_REG + j * 0x90);
-		p[389 + i] = dsaf_read_dev(ddev,
+		p[393 + i] = dsaf_read_dev(ddev,
 				DSAF_XOD_RCVVC1_CNT_0_REG + j * 0x90);
 	}
 
-	p[392] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN0_CNT_0_REG + port * 0x90);
-	p[393] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN1_CNT_0_REG + port * 0x90);
-	p[394] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN2_CNT_0_REG + port * 0x90);
-	p[395] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN3_CNT_0_REG + port * 0x90);
 	p[396] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN4_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN0_CNT_0_REG + port * 0x90);
 	p[397] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN5_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN1_CNT_0_REG + port * 0x90);
 	p[398] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN6_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN2_CNT_0_REG + port * 0x90);
 	p[399] = dsaf_read_dev(ddev,
-		DSAF_XOD_XGE_RCVIN7_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN3_CNT_0_REG + port * 0x90);
 	p[400] = dsaf_read_dev(ddev,
-		DSAF_XOD_PPE_RCVIN0_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN4_CNT_0_REG + port * 0x90);
 	p[401] = dsaf_read_dev(ddev,
-		DSAF_XOD_PPE_RCVIN1_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN5_CNT_0_REG + port * 0x90);
 	p[402] = dsaf_read_dev(ddev,
-		DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN6_CNT_0_REG + port * 0x90);
 	p[403] = dsaf_read_dev(ddev,
-		DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG + port * 0x90);
+		DSAF_XOD_XGE_RCVIN7_CNT_0_REG + port * 0x90);
 	p[404] = dsaf_read_dev(ddev,
+		DSAF_XOD_PPE_RCVIN0_CNT_0_REG + port * 0x90);
+	p[405] = dsaf_read_dev(ddev,
+		DSAF_XOD_PPE_RCVIN1_CNT_0_REG + port * 0x90);
+	p[406] = dsaf_read_dev(ddev,
+		DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG + port * 0x90);
+	p[407] = dsaf_read_dev(ddev,
+		DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG + port * 0x90);
+	p[408] = dsaf_read_dev(ddev,
 		DSAF_XOD_FIFO_STATUS_0_REG + port * 0x90);
 
 	/* dsaf voq registers */
 	for (i = 0; i < DSAF_VOQ_NUM / DSAF_COMM_CHN; i++) {
 		j = (i * DSAF_COMM_CHN + port) * 0x90;
-		p[405 + i] = dsaf_read_dev(ddev,
+		p[409 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_ECC_INVERT_EN_0_REG + j);
-		p[408 + i] = dsaf_read_dev(ddev,
+		p[412 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_SRAM_PKT_NUM_0_REG + j);
-		p[411 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j);
-		p[414 + i] = dsaf_read_dev(ddev,
+		p[415 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j);
+		p[418 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_OUT_PKT_NUM_0_REG + j);
-		p[417 + i] = dsaf_read_dev(ddev,
+		p[421 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_ECC_ERR_ADDR_0_REG + j);
-		p[420 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j);
-		p[423 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j);
-		p[426 + i] = dsaf_read_dev(ddev,
+		p[424 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j);
+		p[427 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j);
+		p[430 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_XGE_XOD_REQ_0_0_REG + j);
-		p[429 + i] = dsaf_read_dev(ddev,
+		p[433 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_XGE_XOD_REQ_1_0_REG + j);
-		p[432 + i] = dsaf_read_dev(ddev,
+		p[436 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_PPE_XOD_REQ_0_REG + j);
-		p[435 + i] = dsaf_read_dev(ddev,
+		p[439 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_ROCEE_XOD_REQ_0_REG + j);
-		p[438 + i] = dsaf_read_dev(ddev,
+		p[442 + i] = dsaf_read_dev(ddev,
 			DSAF_VOQ_BP_ALL_THRD_0_REG + j);
 	}
 
 	/* dsaf tbl registers */
-	p[441] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG);
-	p[442] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG);
-	p[443] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG);
-	p[444] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG);
-	p[445] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG);
-	p[446] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG);
-	p[447] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG);
-	p[448] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG);
-	p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG);
-	p[450] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG);
-	p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG);
-	p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG);
-	p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG);
-	p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG);
-	p[455] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG);
-	p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG);
-	p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
-	p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG);
-	p[459] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG);
-	p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG);
-	p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG);
-	p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG);
-	p[463] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG);
+	p[445] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG);
+	p[446] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG);
+	p[447] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG);
+	p[448] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG);
+	p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG);
+	p[450] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG);
+	p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG);
+	p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG);
+	p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG);
+	p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG);
+	p[455] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG);
+	p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG);
+	p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG);
+	p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG);
+	p[459] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG);
+	p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG);
+	p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG);
+	p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG);
+	p[463] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG);
+	p[464] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG);
+	p[465] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG);
+	p[466] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG);
+	p[467] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG);
 
 	for (i = 0; i < DSAF_SW_PORT_NUM; i++) {
 		j = i * 0x8;
-		p[464 + 2 * i] = dsaf_read_dev(ddev,
+		p[468 + 2 * i] = dsaf_read_dev(ddev,
 			DSAF_TBL_DA0_MIS_INFO1_0_REG + j);
-		p[465 + 2 * i] = dsaf_read_dev(ddev,
+		p[469 + 2 * i] = dsaf_read_dev(ddev,
 			DSAF_TBL_DA0_MIS_INFO0_0_REG + j);
 	}
 
-	p[480] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG);
-	p[481] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG);
-	p[482] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG);
-	p[483] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG);
-	p[484] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG);
-	p[485] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG);
-	p[486] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG);
-	p[487] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG);
-	p[488] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG);
-	p[489] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG);
-	p[490] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG);
-	p[491] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG);
+	p[484] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG);
+	p[485] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG);
+	p[486] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG);
+	p[487] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG);
+	p[488] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG);
+	p[489] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG);
+	p[490] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG);
+	p[491] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG);
+	p[492] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG);
+	p[493] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG);
+	p[494] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG);
+	p[495] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG);
 
 	/* dsaf other registers */
-	p[492] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4);
-	p[493] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4);
-	p[494] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4);
-	p[495] = dsaf_read_dev(ddev,
+	p[496] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4);
+	p[497] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4);
+	p[498] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4);
+	p[499] = dsaf_read_dev(ddev,
 		DSAF_XGE_APP_RX_LINK_UP_0_REG + port * 0x4);
-	p[496] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4);
-	p[497] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4);
+	p[500] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4);
+	p[501] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4);
 
 	if (!is_ver1)
-		p[498] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4);
+		p[502] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4);
 
 	/* mark end of dsaf regs */
-	for (i = 499; i < 504; i++)
+	for (i = 503; i < 504; i++)
 		p[i] = 0xdddddddd;
 }
 
@@ -2673,58 +2738,177 @@
 	return DSAF_DUMP_REGS_NUM;
 }
 
+static int hns_dsaf_get_port_id(u8 port)
+{
+	if (port < DSAF_SERVICE_NW_NUM)
+		return port;
+
+	if (port >= DSAF_BASE_INNER_PORT_NUM)
+		return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM;
+
+	return -EINVAL;
+}
+
+static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
+{
+	struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80};
+	struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port};
+	struct dsaf_tbl_tcam_data tbl_tcam_mask_uc = {0x01000000, 0xf};
+	struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} };
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, port};
+	struct dsaf_drv_mac_single_dest_entry mask_entry;
+	struct dsaf_drv_tbl_tcam_key temp_key, mask_key;
+	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	struct dsaf_drv_tbl_tcam_key mac_key;
+	struct hns_mac_cb *mac_cb;
+	u8 addr[ETH_ALEN] = {0};
+	u8 port_num;
+	int mskid;
+
+	/* promisc use vague table match with vlanid = 0 & macaddr = 0 */
+	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr);
+	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
+	if (entry_index != DSAF_INVALID_ENTRY_IDX)
+		return;
+
+	/* put promisc tcam entry in the end. */
+	/* 1. set promisc unicast vague tcam entry. */
+	entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev);
+	if (entry_index == DSAF_INVALID_ENTRY_IDX) {
+		dev_err(dsaf_dev->dev,
+			"enable uc promisc failed (port:%#x)\n",
+			port);
+		return;
+	}
+
+	mac_cb = dsaf_dev->mac_cb[port];
+	(void)hns_mac_get_inner_port_num(mac_cb, 0, &port_num);
+	tbl_tcam_ucast.tbl_ucast_out_port = port_num;
+
+	/* config uc vague table */
+	hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc,
+				   &tbl_tcam_mask_uc, &tbl_tcam_ucast);
+
+	/* update software entry */
+	soft_mac_entry = priv->soft_mac_tbl;
+	soft_mac_entry += entry_index;
+	soft_mac_entry->index = entry_index;
+	soft_mac_entry->tcam_key.high.val = mac_key.high.val;
+	soft_mac_entry->tcam_key.low.val = mac_key.low.val;
+	/* step back to the START for mc. */
+	soft_mac_entry = priv->soft_mac_tbl;
+
+	/* 2. set promisc multicast vague tcam entry. */
+	entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev);
+	if (entry_index == DSAF_INVALID_ENTRY_IDX) {
+		dev_err(dsaf_dev->dev,
+			"enable mc promisc failed (port:%#x)\n",
+			port);
+		return;
+	}
+
+	memset(&mask_entry, 0x0, sizeof(mask_entry));
+	memset(&mask_key, 0x0, sizeof(mask_key));
+	memset(&temp_key, 0x0, sizeof(temp_key));
+	mask_entry.addr[0] = 0x01;
+	hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id,
+			     0xf, mask_entry.addr);
+	tbl_tcam_mcast.tbl_mcast_item_vld = 1;
+	tbl_tcam_mcast.tbl_mcast_old_en = 0;
+
+	/* set MAC port to handle multicast */
+	mskid = hns_dsaf_get_port_id(port);
+	if (mskid == -EINVAL) {
+		dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n",
+			dsaf_dev->ae_dev.name, port,
+			mask_key.high.val, mask_key.low.val);
+		return;
+	}
+	dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
+		     mskid % 32, 1);
+
+	/* set pool bit map to handle multicast */
+	mskid = hns_dsaf_get_port_id(port_num);
+	if (mskid == -EINVAL) {
+		dev_err(dsaf_dev->dev,
+			"%s, pool bit map pnum(%d)error,key(%#x:%#x)\n",
+			dsaf_dev->ae_dev.name, port_num,
+			mask_key.high.val, mask_key.low.val);
+		return;
+	}
+	dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
+		     mskid % 32, 1);
+
+	memcpy(&temp_key, &mask_key, sizeof(mask_key));
+	hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc,
+				   (struct dsaf_tbl_tcam_data *)(&mask_key),
+				   &tbl_tcam_mcast);
+
+	/* update software entry */
+	soft_mac_entry += entry_index;
+	soft_mac_entry->index = entry_index;
+	soft_mac_entry->tcam_key.high.val = temp_key.high.val;
+	soft_mac_entry->tcam_key.low.val = temp_key.low.val;
+}
+
+static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port)
+{
+	struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port};
+	struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 0, 0, 0, 0};
+	struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} };
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0};
+	struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0};
+	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	struct dsaf_drv_tbl_tcam_key mac_key;
+	u8 addr[ETH_ALEN] = {0};
+
+	/* 1. delete uc vague tcam entry. */
+	/* promisc use vague table match with vlanid = 0 & macaddr = 0 */
+	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr);
+	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
+
+	if (entry_index == DSAF_INVALID_ENTRY_IDX)
+		return;
+
+	/* config uc vague table */
+	hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc,
+				   &tbl_tcam_mask, &tbl_tcam_ucast);
+	/* update soft management table. */
+	soft_mac_entry = priv->soft_mac_tbl;
+	soft_mac_entry += entry_index;
+	soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
+	/* step back to the START for mc. */
+	soft_mac_entry = priv->soft_mac_tbl;
+
+	/* 2. delete mc vague tcam entry. */
+	addr[0] = 0x01;
+	memset(&mac_key, 0x0, sizeof(mac_key));
+	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr);
+	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
+
+	if (entry_index == DSAF_INVALID_ENTRY_IDX)
+		return;
+
+	/* config mc vague table */
+	hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc,
+				   &tbl_tcam_mask, &tbl_tcam_mcast);
+	/* update soft management table. */
+	soft_mac_entry += entry_index;
+	soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
+}
+
 /* Reserve the last TCAM entry for promisc support */
-#define dsaf_promisc_tcam_entry(port) \
-	(DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port))
 void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
 			       u32 port, bool enable)
 {
-	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
-	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
-	u16 entry_index;
-	struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask;
-	struct dsaf_tbl_tcam_mcast_cfg mac_data = {0};
-
-	if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev))
-		return;
-
-	/* find the tcam entry index for promisc */
-	entry_index = dsaf_promisc_tcam_entry(port);
-
-	memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
-	memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
-
-	/* config key mask */
-	if (enable) {
-		dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
-			       DSAF_TBL_TCAM_KEY_PORT_M,
-			       DSAF_TBL_TCAM_KEY_PORT_S, port);
-		dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan,
-			       DSAF_TBL_TCAM_KEY_PORT_M,
-			       DSAF_TBL_TCAM_KEY_PORT_S, 0xf);
-
-		/* SUB_QID */
-		dsaf_set_bit(mac_data.tbl_mcast_port_msk[0],
-			     DSAF_SERVICE_NW_NUM, true);
-		mac_data.tbl_mcast_item_vld = true;	/* item_vld bit */
-	} else {
-		mac_data.tbl_mcast_item_vld = false;	/* item_vld bit */
-	}
-
-	dev_dbg(dsaf_dev->dev,
-		"set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
-		dsaf_dev->ae_dev.name, tbl_tcam_data.high.val,
-		tbl_tcam_data.low.val, entry_index);
-
-	/* config promisc entry with mask */
-	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
-			     (struct dsaf_tbl_tcam_data *)&tbl_tcam_data,
-			     (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask,
-			     &mac_data);
-
-	/* config software entry */
-	soft_mac_entry += entry_index;
-	soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX;
+	if (enable)
+		set_promisc_tcam_enable(dsaf_dev, port);
+	else
+		set_promisc_tcam_disable(dsaf_dev, port);
 }
 
 int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port)
@@ -2906,6 +3090,7 @@
 	dsaf_dev = dev_get_drvdata(&pdev->dev);
 	if (!dsaf_dev) {
 		dev_err(&pdev->dev, "dsaf_dev is NULL\n");
+		put_device(&pdev->dev);
 		return -ENODEV;
 	}
 
@@ -2913,6 +3098,7 @@
 	if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
 		dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n",
 			dsaf_dev->ae_dev.name);
+		put_device(&pdev->dev);
 		return -ENODEV;
 	}
 
@@ -2951,6 +3137,9 @@
 		dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1);
 		dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
 	}
+
+	put_device(&pdev->dev);
+
 	return 0;
 }
 EXPORT_SYMBOL(hns_dsaf_roce_reset);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 0e1cd99..cba04bf 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __HNS_DSAF_MAIN_H
@@ -467,4 +463,6 @@
 			     u8 mac_id, u8 port_num);
 int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port);
 
+int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+
 #endif /* __HNS_DSAF_MAIN_H__ */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 16294cd..ed3829a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include "hns_dsaf_mac.h"
@@ -670,7 +666,7 @@
 		dsaf_set_field(origin, 1ull << 10, 10, en);
 		dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin);
 	} else {
-		u8 *base_addr = (u8 *)mac_cb->serdes_vaddr +
+		u8 __iomem *base_addr = mac_cb->serdes_vaddr +
 				(mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000);
 		dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en);
 	}
@@ -758,17 +754,11 @@
 	return (void *)misc_op;
 }
 
-static int hns_dsaf_dev_match(struct device *dev, void *fwnode)
-{
-	return dev->fwnode == fwnode;
-}
-
 struct
 platform_device *hns_dsaf_find_platform_device(struct fwnode_handle *fwnode)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&platform_bus_type, NULL,
-			      fwnode, hns_dsaf_dev_match);
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
 	return dev ? to_platform_device(dev) : NULL;
 }

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h
index 310e802..f64c666 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_DSAF_MISC_H

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 0942e49..2b34b55 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -61,7 +57,7 @@
 	}
 }
 
-static void __iomem *
+static u8 __iomem *
 hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common)
 {
 	return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET;
@@ -83,8 +79,9 @@
 	else
 		ppe_num = HNS_PPE_DEBUG_NW_ENGINE_NUM;
 
-	ppe_common = devm_kzalloc(dsaf_dev->dev, sizeof(*ppe_common) +
-		ppe_num * sizeof(struct hns_ppe_cb), GFP_KERNEL);
+	ppe_common = devm_kzalloc(dsaf_dev->dev,
+				  struct_size(ppe_common, ppe_cb, ppe_num),
+				  GFP_KERNEL);
 	if (!ppe_common)
 		return -ENOMEM;
 
@@ -110,8 +107,8 @@
 	dsaf_dev->ppe_common[comm_index] = NULL;
 }
 
-static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
-					int ppe_idx)
+static u8 __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
+				      int ppe_idx)
 {
 	return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET;
 }

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index f670e63..2721f1f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_DSAF_PPE_H
@@ -80,7 +76,7 @@
 	struct hns_ppe_hw_stats hw_stats;
 
 	u8 index;	/* index in a ppe common device */
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	int virq;
 	u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */
 	u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */
@@ -89,7 +85,7 @@
 struct ppe_common_cb {
 	struct device *dev;
 	struct dsaf_device *dsaf_dev;
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 
 	enum ppe_common_mode ppe_mode;
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 5d64519..5453597 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/cdev.h>
@@ -458,7 +454,7 @@
 		mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT;
 	} else {
 		ring = &q->tx_ring;
-		ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base +
+		ring->io_base = ring_pair_cb->q.io_base +
 			HNS_RCB_TX_REG_OFFSET;
 		irq_idx = HNS_RCB_IRQ_IDX_TX;
 		mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT :
@@ -764,7 +760,7 @@
 	}
 }
 
-static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
+static u8 __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
 {
 	struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev;
 
@@ -788,8 +784,9 @@
 	int ring_num = hns_rcb_get_ring_num(dsaf_dev);
 
 	rcb_common =
-		devm_kzalloc(dsaf_dev->dev, sizeof(*rcb_common) +
-			ring_num * sizeof(struct ring_pair_cb), GFP_KERNEL);
+		devm_kzalloc(dsaf_dev->dev,
+			     struct_size(rcb_common, ring_pair_cb, ring_num),
+			     GFP_KERNEL);
 	if (!rcb_common) {
 		dev_err(dsaf_dev->dev, "rcb common devm_kzalloc fail!\n");
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index 2319b77..3741bef 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_DSAF_RCB_H

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 74d935d..47ccb6e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _DSAF_REG_H_
@@ -176,7 +172,7 @@
 #define DSAF_INODE_IN_DATA_STP_DISC_0_REG	0x1A50
 #define DSAF_INODE_GE_FC_EN_0_REG		0x1B00
 #define DSAF_INODE_VC0_IN_PKT_NUM_0_REG		0x1B50
-#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG		0x1C00
+#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG		0x103C
 #define DSAF_INODE_IN_PRIO_PAUSE_BASE_REG	0x1C00
 #define DSAF_INODE_IN_PRIO_PAUSE_BASE_OFFSET	0x100
 #define DSAF_INODE_IN_PRIO_PAUSE_OFFSET		0x50
@@ -404,11 +400,11 @@
 #define RCB_ECC_ERR_ADDR4_REG			0x460
 #define RCB_ECC_ERR_ADDR5_REG			0x464
 
-#define RCB_COM_SF_CFG_INTMASK_RING		0x480
-#define RCB_COM_SF_CFG_RING_STS			0x484
-#define RCB_COM_SF_CFG_RING			0x488
-#define RCB_COM_SF_CFG_INTMASK_BD		0x48C
-#define RCB_COM_SF_CFG_BD_RINT_STS		0x470
+#define RCB_COM_SF_CFG_INTMASK_RING		0x470
+#define RCB_COM_SF_CFG_RING_STS			0x474
+#define RCB_COM_SF_CFG_RING			0x478
+#define RCB_COM_SF_CFG_INTMASK_BD		0x47C
+#define RCB_COM_SF_CFG_BD_RINT_STS		0x480
 #define RCB_COM_RCB_RD_BD_BUSY			0x490
 #define RCB_COM_RCB_FBD_CRT_EN			0x494
 #define RCB_COM_AXI_WR_ERR_INTMASK		0x498
@@ -534,6 +530,7 @@
 #define GMAC_LD_LINK_COUNTER_REG		0x01D0UL
 #define GMAC_LOOP_REG				0x01DCUL
 #define GMAC_RECV_CONTROL_REG			0x01E0UL
+#define GMAC_PCS_RX_EN_REG			0x01E4UL
 #define GMAC_VLAN_CODE_REG			0x01E8UL
 #define GMAC_RX_OVERRUN_CNT_REG			0x01ECUL
 #define GMAC_RX_LENGTHFIELD_ERR_CNT_REG		0x01F4UL
@@ -1017,7 +1014,7 @@
 #define XGMAC_PAUSE_CTL_RSP_MODE_B	2
 #define XGMAC_PAUSE_CTL_TX_XOFF_B	3
 
-static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
+static inline void dsaf_write_reg(u8 __iomem *base, u32 reg, u32 value)
 {
 	writel(value, base + reg);
 }
@@ -1052,7 +1049,7 @@
 #define dsaf_set_bit(origin, shift, val) \
 	dsaf_set_field((origin), (1ull << (shift)), (shift), (val))
 
-static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline void dsaf_set_reg_field(u8 __iomem *base, u32 reg, u32 mask,
 				      u32 shift, u32 val)
 {
 	u32 origin = dsaf_read_reg(base, reg);
@@ -1072,7 +1069,7 @@
 #define dsaf_get_bit(origin, shift) \
 	dsaf_get_field((origin), (1ull << (shift)), (shift))
 
-static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline u32 dsaf_get_reg_field(u8 __iomem *base, u32 reg, u32 mask,
 				     u32 shift)
 {
 	u32 origin;
@@ -1088,11 +1085,11 @@
 	dsaf_get_reg_field((dev)->io_base, (reg), (1ull << (bit)), (bit))
 
 #define dsaf_write_b(addr, data)\
-	writeb((data), (__iomem unsigned char *)(addr))
+	writeb((data), (__iomem u8 *)(addr))
 #define dsaf_read_b(addr)\
-	readb((__iomem unsigned char *)(addr))
+	readb((__iomem u8 *)(addr))
 
 #define hns_mac_reg_read64(drv, offset) \
-	readq((__iomem void *)(((u8 *)(drv)->io_base + 0xc00 + (offset))))
+	readq((__iomem void *)(((drv)->io_base + 0xc00 + (offset))))
 
 #endif	/* _DSAF_REG_H */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index ba43169..0a3dbab 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/io-64-nonatomic-hi-lo.h>
@@ -129,7 +125,7 @@
 	dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0);
 	dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1);
 	dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0);
-	dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
+	dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
 }
 
 /**

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
index da6c534..e1b3db9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _HNS_XGMAC_H

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 28e9078..14ab204 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/clk.h>
@@ -29,9 +25,6 @@
 
 #define SERVICE_TIMER_HZ (1 * HZ)
 
-#define NIC_TX_CLEAN_MAX_NUM 256
-#define NIC_RX_CLEAN_MAX_NUM 64
-
 #define RCB_IRQ_NOT_INITED 0
 #define RCB_IRQ_INITED 1
 #define HNS_BUFFER_SIZE_2048 2048
@@ -252,7 +245,7 @@
 	int frag_num;
 	struct sk_buff *skb = *out_skb;
 	struct sk_buff *new_skb = NULL;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 
 	size = skb_headlen(skb);
 	buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE;
@@ -316,7 +309,7 @@
 	struct hnae_ring *ring = ring_data->ring;
 	struct device *dev = ring_to_dev(ring);
 	struct netdev_queue *dev_queue;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	int buf_num;
 	int seg_num;
 	dma_addr_t dma;
@@ -376,8 +369,6 @@
 	wmb(); /* commit all data before submit */
 	assert(skb->queue_mapping < priv->ae_handle->q_num);
 	hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num);
-	ring->stats.tx_pkts++;
-	ring->stats.tx_bytes += skb->len;
 
 	return NETDEV_TX_OK;
 
@@ -603,7 +594,7 @@
 	} else {
 		ring->stats.seg_pkt_cnt++;
 
-		pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE);
+		pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE);
 		memcpy(__skb_put(skb, pull_len), va,
 		       ALIGN(pull_len, sizeof(long)));
 
@@ -952,15 +943,6 @@
 	return u > c ? (h > c && h <= u) : (h > c || h <= u);
 }
 
-/* netif_tx_lock will turn down the performance, set only when necessary */
-#ifdef CONFIG_NET_POLL_CONTROLLER
-#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
-#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
-#else
-#define NETIF_TX_LOCK(ring)
-#define NETIF_TX_UNLOCK(ring)
-#endif
-
 /* reclaim all desc in one budget
  * return error or number of desc left
  */
@@ -974,21 +956,16 @@
 	int head;
 	int bytes, pkts;
 
-	NETIF_TX_LOCK(ring);
-
 	head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
 	rmb(); /* make sure head is ready before touch any data */
 
-	if (is_ring_empty(ring) || head == ring->next_to_clean) {
-		NETIF_TX_UNLOCK(ring);
+	if (is_ring_empty(ring) || head == ring->next_to_clean)
 		return 0; /* no data to poll */
-	}
 
 	if (!is_valid_clean_head(ring, head)) {
 		netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
 			   ring->next_to_use, ring->next_to_clean);
 		ring->stats.io_err_cnt++;
-		NETIF_TX_UNLOCK(ring);
 		return -EIO;
 	}
 
@@ -999,8 +976,9 @@
 		/* issue prefetch for next Tx descriptor */
 		prefetch(&ring->desc_cb[ring->next_to_clean]);
 	}
-
-	NETIF_TX_UNLOCK(ring);
+	/* update tx ring statistics. */
+	ring->stats.tx_pkts += pkts;
+	ring->stats.tx_bytes += bytes;
 
 	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
 	netdev_tx_completed_queue(dev_queue, pkts, bytes);
@@ -1061,16 +1039,12 @@
 	int head;
 	int bytes, pkts;
 
-	NETIF_TX_LOCK(ring);
-
 	head = ring->next_to_use; /* ntu :soft setted ring position*/
 	bytes = 0;
 	pkts = 0;
 	while (head != ring->next_to_clean)
 		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
 
-	NETIF_TX_UNLOCK(ring);
-
 	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
 	netdev_tx_reset_queue(dev_queue);
 }
@@ -1163,12 +1137,20 @@
  */
 int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
 	struct phy_device *phy_dev = h->phy_dev;
 	int ret;
 
 	if (!h->phy_dev)
 		return 0;
 
+	ethtool_convert_legacy_u32_to_link_mode(supported, h->if_support);
+	linkmode_and(phy_dev->supported, phy_dev->supported, supported);
+	linkmode_copy(phy_dev->advertising, phy_dev->supported);
+
+	if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
+		phy_dev->autoneg = false;
+
 	if (h->phy_if != PHY_INTERFACE_MODE_XGMII) {
 		phy_dev->dev_flags = 0;
 
@@ -1180,11 +1162,7 @@
 	if (unlikely(ret))
 		return -ENODEV;
 
-	phy_dev->supported &= h->if_support;
-	phy_dev->advertising = phy_dev->supported;
-
-	if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
-		phy_dev->autoneg = false;
+	phy_attached_info(phy_dev);
 
 	return 0;
 }
@@ -1281,6 +1259,22 @@
 	return cpu;
 }
 
+static void hns_nic_free_irq(int q_num, struct hns_nic_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < q_num * 2; i++) {
+		if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) {
+			irq_set_affinity_hint(priv->ring_data[i].ring->irq,
+					      NULL);
+			free_irq(priv->ring_data[i].ring->irq,
+				 &priv->ring_data[i]);
+			priv->ring_data[i].ring->irq_init_flag =
+				RCB_IRQ_NOT_INITED;
+		}
+	}
+}
+
 static int hns_nic_init_irq(struct hns_nic_priv *priv)
 {
 	struct hnae_handle *h = priv->ae_handle;
@@ -1306,7 +1300,7 @@
 		if (ret) {
 			netdev_err(priv->netdev, "request irq(%d) fail\n",
 				   rd->ring->irq);
-			return ret;
+			goto out_free_irq;
 		}
 		disable_irq(rd->ring->irq);
 
@@ -1321,6 +1315,10 @@
 	}
 
 	return 0;
+
+out_free_irq:
+	hns_nic_free_irq(h->q_num, priv);
+	return ret;
 }
 
 static int hns_nic_net_up(struct net_device *ndev)
@@ -1330,6 +1328,9 @@
 	int i, j;
 	int ret;
 
+	if (!test_bit(NIC_STATE_DOWN, &priv->state))
+		return 0;
+
 	ret = hns_nic_init_irq(priv);
 	if (ret != 0) {
 		netdev_err(ndev, "hns init irq failed! ret=%d\n", ret);
@@ -1365,6 +1366,7 @@
 	for (j = i - 1; j >= 0; j--)
 		hns_nic_ring_close(ndev, j);
 
+	hns_nic_free_irq(h->q_num, priv);
 	set_bit(NIC_STATE_DOWN, &priv->state);
 
 	return ret;
@@ -1482,11 +1484,19 @@
 }
 
 static void hns_tx_timeout_reset(struct hns_nic_priv *priv);
+#define HNS_TX_TIMEO_LIMIT (40 * HZ)
 static void hns_nic_net_timeout(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 
-	hns_tx_timeout_reset(priv);
+	if (ndev->watchdog_timeo < HNS_TX_TIMEO_LIMIT) {
+		ndev->watchdog_timeo *= 2;
+		netdev_info(ndev, "watchdog_timo changed to %d.\n",
+			    ndev->watchdog_timeo);
+	} else {
+		ndev->watchdog_timeo = HNS_NIC_TX_TIMEOUT;
+		hns_tx_timeout_reset(priv);
+	}
 }
 
 static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
@@ -1930,8 +1940,7 @@
 
 static u16
 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
-		     struct net_device *sb_dev,
-		     select_queue_fallback_t fallback)
+		     struct net_device *sb_dev)
 {
 	struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -1941,7 +1950,7 @@
 	    is_multicast_ether_addr(eth_hdr->h_dest))
 		return 0;
 	else
-		return fallback(ndev, skb, NULL);
+		return netdev_pick_tx(ndev, skb, NULL);
 }
 
 static const struct net_device_ops hns_nic_netdev_ops = {
@@ -2049,11 +2058,11 @@
 		= container_of(work, struct hns_nic_priv, service_task);
 	struct hnae_handle *h = priv->ae_handle;
 
+	hns_nic_reset_subtask(priv);
 	hns_nic_update_link_status(priv->netdev);
 	h->dev->ops->update_led_status(h);
 	hns_nic_update_stats(priv->netdev);
 
-	hns_nic_reset_subtask(priv);
 	hns_nic_service_event_complete(priv);
 }
 
@@ -2118,7 +2127,7 @@
 			hns_nic_tx_fini_pro_v2;
 
 		netif_napi_add(priv->netdev, &rd->napi,
-			       hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM);
+			       hns_nic_common_poll, NAPI_POLL_WEIGHT);
 		rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
 	}
 	for (i = h->q_num; i < h->q_num * 2; i++) {
@@ -2131,7 +2140,7 @@
 			hns_nic_rx_fini_pro_v2;
 
 		netif_napi_add(priv->netdev, &rd->napi,
-			       hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM);
+			       hns_nic_common_poll, NAPI_POLL_WEIGHT);
 		rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
 	}
 
@@ -2339,10 +2348,11 @@
 	ndev->min_mtu = MAC_MIN_MTU;
 	switch (priv->enet_ver) {
 	case AE_VERSION_2:
-		ndev->features |= NETIF_F_TSO | NETIF_F_TSO6;
+		ndev->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_NTUPLE;
 		ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 			NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6;
+		ndev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6;
 		ndev->max_mtu = MAC_MAX_MTU_V2 -
 				(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 		break;
@@ -2384,6 +2394,8 @@
 out_notify_fail:
 	(void)cancel_work_sync(&priv->service_task);
 out_read_prop_fail:
+	/* safe for ACPI FW */
+	of_node_put(to_of_node(priv->fwnode));
 	free_netdev(ndev);
 	return ret;
 }
@@ -2413,6 +2425,9 @@
 	set_bit(NIC_STATE_REMOVING, &priv->state);
 	(void)cancel_work_sync(&priv->service_task);
 
+	/* safe for ACPI FW */
+	of_node_put(to_of_node(priv->fwnode));
+
 	free_netdev(ndev);
 	return 0;
 }

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 26e9afc..ffa9d65 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h

@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __HNS_ENET_H

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 774beda..717fccc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/etherdevice.h>
@@ -339,6 +335,7 @@
 static int __lb_up(struct net_device *ndev,
 		   enum hnae_loop loop_mode)
 {
+#define NIC_LB_TEST_WAIT_PHY_LINK_TIME 300
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
 	int speed, duplex;
@@ -365,6 +362,9 @@
 
 	h->dev->ops->adjust_link(h, speed, duplex);
 
+	/* wait adjust link done and phy ready */
+	msleep(NIC_LB_TEST_WAIT_PHY_LINK_TIME);
+
 	return 0;
 }
 
@@ -624,7 +624,7 @@
 		clear_bit(NIC_STATE_TESTING, &priv->state);
 
 		if (if_running)
-			(void)dev_open(ndev);
+			(void)dev_open(ndev, NULL);
 	}
 	/* Online tests aren't run; pass by default */
 
@@ -1157,16 +1157,18 @@
  */
 static int hns_nic_nway_reset(struct net_device *netdev)
 {
-	int ret = 0;
 	struct phy_device *phy = netdev->phydev;
 
-	if (netif_running(netdev)) {
-		/* if autoneg is disabled, don't restart auto-negotiation */
-		if (phy && phy->autoneg == AUTONEG_ENABLE)
-			ret = genphy_restart_aneg(phy);
-	}
+	if (!netif_running(netdev))
+		return 0;
 
-	return ret;
+	if (!phy)
+		return -EOPNOTSUPP;
+
+	if (phy->autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+
+	return genphy_restart_aneg(phy);
 }
 
 static u32

diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
index 002534f..d01bf53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile

@@ -9,6 +9,6 @@
 obj-$(CONFIG_HNS3) += hnae3.o
 
 obj-$(CONFIG_HNS3_ENET) += hns3.o
-hns3-objs = hns3_enet.o hns3_ethtool.o
+hns3-objs = hns3_enet.o hns3_ethtool.o hns3_debugfs.o
 
 hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index be9dc08..f8a87f8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h

@@ -21,6 +21,7 @@
 	HCLGE_MBX_SET_MACVLAN,		/* (VF -> PF) set unicast filter */
 	HCLGE_MBX_API_NEGOTIATE,	/* (VF -> PF) negotiate API version */
 	HCLGE_MBX_GET_QINFO,		/* (VF -> PF) get queue config */
+	HCLGE_MBX_GET_QDEPTH,		/* (VF -> PF) get queue depth */
 	HCLGE_MBX_GET_TCINFO,		/* (VF -> PF) get TC config */
 	HCLGE_MBX_GET_RETA,		/* (VF -> PF) get RETA */
 	HCLGE_MBX_GET_RSS_KEY,		/* (VF -> PF) get RSS key */
@@ -36,6 +37,18 @@
 	HCLGE_MBX_BIND_FUNC_QUEUE,	/* (VF -> PF) bind function and queue */
 	HCLGE_MBX_GET_LINK_STATUS,	/* (VF -> PF) get link status */
 	HCLGE_MBX_QUEUE_RESET,		/* (VF -> PF) reset queue */
+	HCLGE_MBX_KEEP_ALIVE,		/* (VF -> PF) send keep alive cmd */
+	HCLGE_MBX_SET_ALIVE,		/* (VF -> PF) set alive state */
+	HCLGE_MBX_SET_MTU,		/* (VF -> PF) set mtu */
+	HCLGE_MBX_GET_QID_IN_PF,	/* (VF -> PF) get queue id in pf */
+	HCLGE_MBX_LINK_STAT_MODE,	/* (PF -> VF) link mode has changed */
+	HCLGE_MBX_GET_LINK_MODE,	/* (VF -> PF) get the link mode of pf */
+	HCLGE_MBX_PUSH_VLAN_INFO,	/* (PF -> VF) push port base vlan */
+	HCLGE_MBX_GET_MEDIA_TYPE,       /* (VF -> PF) get media type */
+
+	HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
+	HCLGE_MBX_PUSH_LINK_STATUS,	/* (M7 -> PF) get port link status */
+	HCLGE_MBX_NCSI_ERROR,		/* (M7 -> PF) receive a NCSI error */
 };
 
 /* below are per-VF mac-vlan subcodes */
@@ -46,9 +59,6 @@
 	HCLGE_MBX_MAC_VLAN_MC_MODIFY,		/* modify MC mac addr */
 	HCLGE_MBX_MAC_VLAN_MC_ADD,		/* add new MC mac addr */
 	HCLGE_MBX_MAC_VLAN_MC_REMOVE,		/* remove MC mac addr */
-	HCLGE_MBX_MAC_VLAN_MC_FUNC_MTA_ENABLE,	/* config func MTA enable */
-	HCLGE_MBX_MAC_VLAN_MTA_TYPE_READ,	/* read func MTA type */
-	HCLGE_MBX_MAC_VLAN_MTA_STATUS_UPDATE,	/* update MTA status */
 };
 
 /* below are per-VF vlan cfg subcodes */
@@ -56,6 +66,8 @@
 	HCLGE_MBX_VLAN_FILTER = 0,	/* set vlan filter */
 	HCLGE_MBX_VLAN_TX_OFF_CFG,	/* set tx side vlan offload */
 	HCLGE_MBX_VLAN_RX_OFF_CFG,	/* set rx side vlan offload */
+	HCLGE_MBX_PORT_BASE_VLAN_CFG,	/* set port based vlan configuration */
+	HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,	/* get port based vlan state */
 };
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
@@ -74,12 +86,15 @@
 struct hclge_mbx_vf_to_pf_cmd {
 	u8 rsv;
 	u8 mbx_src_vfid; /* Auto filled by IMP */
-	u8 rsv1[2];
+	u8 mbx_need_resp;
+	u8 rsv1[1];
 	u8 msg_len;
 	u8 rsv2[3];
 	u8 msg[HCLGE_MBX_MAX_MSG_SIZE];
 };
 
+#define HCLGE_MBX_NEED_RESP_BIT		BIT(0)
+
 struct hclge_mbx_pf_to_vf_cmd {
 	u8 dest_vfid;
 	u8 rsv[3];
@@ -88,6 +103,12 @@
 	u16 msg[8];
 };
 
+struct hclge_vf_rst_cmd {
+	u8 dest_vfid;
+	u8 vf_rst;
+	u8 rsv[22];
+};
+
 /* used by VF to store the received Async responses from PF */
 struct hclgevf_mbx_arq_ring {
 #define HCLGE_MBX_MAX_ARQ_MSG_SIZE	8
@@ -95,7 +116,7 @@
 	struct hclgevf_dev *hdev;
 	u32 head;
 	u32 tail;
-	u32 count;
+	atomic_t count;
 	u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
 };
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index fff5be8..03ca7d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c

@@ -16,29 +16,26 @@
  */
 static DEFINE_MUTEX(hnae3_common_lock);
 
-static bool hnae3_client_match(enum hnae3_client_type client_type,
-			       enum hnae3_dev_type dev_type)
+static bool hnae3_client_match(enum hnae3_client_type client_type)
 {
-	if ((dev_type == HNAE3_DEV_KNIC) && (client_type == HNAE3_CLIENT_KNIC ||
-					     client_type == HNAE3_CLIENT_ROCE))
-		return true;
-
-	if (dev_type == HNAE3_DEV_UNIC && client_type == HNAE3_CLIENT_UNIC)
+	if (client_type == HNAE3_CLIENT_KNIC ||
+	    client_type == HNAE3_CLIENT_ROCE)
 		return true;
 
 	return false;
 }
 
-static void hnae3_set_client_init_flag(struct hnae3_client *client,
-				       struct hnae3_ae_dev *ae_dev, int inited)
+void hnae3_set_client_init_flag(struct hnae3_client *client,
+				struct hnae3_ae_dev *ae_dev,
+				unsigned int inited)
 {
+	if (!client || !ae_dev)
+		return;
+
 	switch (client->type) {
 	case HNAE3_CLIENT_KNIC:
 		hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
 		break;
-	case HNAE3_CLIENT_UNIC:
-		hnae3_set_bit(ae_dev->flag, HNAE3_UNIC_CLIENT_INITED_B, inited);
-		break;
 	case HNAE3_CLIENT_ROCE:
 		hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
 		break;
@@ -46,9 +43,10 @@
 		break;
 	}
 }
+EXPORT_SYMBOL(hnae3_set_client_init_flag);
 
 static int hnae3_get_client_init_flag(struct hnae3_client *client,
-				       struct hnae3_ae_dev *ae_dev)
+				      struct hnae3_ae_dev *ae_dev)
 {
 	int inited = 0;
 
@@ -57,10 +55,6 @@
 		inited = hnae3_get_bit(ae_dev->flag,
 				       HNAE3_KNIC_CLIENT_INITED_B);
 		break;
-	case HNAE3_CLIENT_UNIC:
-		inited = hnae3_get_bit(ae_dev->flag,
-				       HNAE3_UNIC_CLIENT_INITED_B);
-		break;
 	case HNAE3_CLIENT_ROCE:
 		inited = hnae3_get_bit(ae_dev->flag,
 				       HNAE3_ROCE_CLIENT_INITED_B);
@@ -72,44 +66,47 @@
 	return inited;
 }
 
-static int hnae3_match_n_instantiate(struct hnae3_client *client,
-				     struct hnae3_ae_dev *ae_dev, bool is_reg)
+static int hnae3_init_client_instance(struct hnae3_client *client,
+				      struct hnae3_ae_dev *ae_dev)
 {
 	int ret;
 
 	/* check if this client matches the type of ae_dev */
-	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
+	if (!(hnae3_client_match(client->type) &&
 	      hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
 		return 0;
 	}
 
-	/* now, (un-)instantiate client by calling lower layer */
-	if (is_reg) {
-		ret = ae_dev->ops->init_client_instance(client, ae_dev);
-		if (ret) {
-			dev_err(&ae_dev->pdev->dev,
-				"fail to instantiate client, ret = %d\n", ret);
-			return ret;
-		}
+	ret = ae_dev->ops->init_client_instance(client, ae_dev);
+	if (ret)
+		dev_err(&ae_dev->pdev->dev,
+			"fail to instantiate client, ret = %d\n", ret);
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
-		return 0;
-	}
+	return ret;
+}
+
+static void hnae3_uninit_client_instance(struct hnae3_client *client,
+					 struct hnae3_ae_dev *ae_dev)
+{
+	/* check if this client matches the type of ae_dev */
+	if (!(hnae3_client_match(client->type) &&
+	      hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)))
+		return;
 
 	if (hnae3_get_client_init_flag(client, ae_dev)) {
 		ae_dev->ops->uninit_client_instance(client, ae_dev);
 
 		hnae3_set_client_init_flag(client, ae_dev, 0);
 	}
-
-	return 0;
 }
 
 int hnae3_register_client(struct hnae3_client *client)
 {
 	struct hnae3_client *client_tmp;
 	struct hnae3_ae_dev *ae_dev;
-	int ret = 0;
+
+	if (!client)
+		return -ENODEV;
 
 	mutex_lock(&hnae3_common_lock);
 	/* one system should only have one client for every type */
@@ -125,7 +122,7 @@
 		/* if the client could not be initialized on current port, for
 		 * any error reasons, move on to next available port
 		 */
-		ret = hnae3_match_n_instantiate(client, ae_dev, true);
+		int ret = hnae3_init_client_instance(client, ae_dev);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed for port, ret = %d\n",
@@ -141,12 +138,31 @@
 
 void hnae3_unregister_client(struct hnae3_client *client)
 {
+	struct hnae3_client *client_tmp;
 	struct hnae3_ae_dev *ae_dev;
+	bool existed = false;
+
+	if (!client)
+		return;
 
 	mutex_lock(&hnae3_common_lock);
+
+	list_for_each_entry(client_tmp, &hnae3_client_list, node) {
+		if (client_tmp->type == client->type) {
+			existed = true;
+			break;
+		}
+	}
+
+	if (!existed) {
+		mutex_unlock(&hnae3_common_lock);
+		pr_err("client %s does not exist!\n", client->name);
+		return;
+	}
+
 	/* un-initialize the client on every matched port */
 	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
-		hnae3_match_n_instantiate(client, ae_dev, false);
+		hnae3_uninit_client_instance(client, ae_dev);
 	}
 
 	list_del(&client->node);
@@ -163,7 +179,10 @@
 	const struct pci_device_id *id;
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
-	int ret = 0;
+	int ret;
+
+	if (!ae_algo)
+		return;
 
 	mutex_lock(&hnae3_common_lock);
 
@@ -175,8 +194,12 @@
 		if (!id)
 			continue;
 
-		/* ae_dev init should set flag */
+		if (!ae_algo->ops) {
+			dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
+			continue;
+		}
 		ae_dev->ops = ae_algo->ops;
+
 		ret = ae_algo->ops->init_ae_dev(ae_dev);
 		if (ret) {
 			dev_err(&ae_dev->pdev->dev,
@@ -184,13 +207,14 @@
 			continue;
 		}
 
+		/* ae_dev init should set flag */
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
 
 		/* check the client list for the match with this ae_dev type and
 		 * initialize the figure out client instance
 		 */
 		list_for_each_entry(client, &hnae3_client_list, node) {
-			ret = hnae3_match_n_instantiate(client, ae_dev, true);
+			ret = hnae3_init_client_instance(client, ae_dev);
 			if (ret)
 				dev_err(&ae_dev->pdev->dev,
 					"match and instantiation failed, ret = %d\n",
@@ -211,6 +235,9 @@
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
 
+	if (!ae_algo)
+		return;
+
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_dev */
 	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
@@ -225,10 +252,11 @@
 		 * un-initialize the figure out client instance
 		 */
 		list_for_each_entry(client, &hnae3_client_list, node)
-			hnae3_match_n_instantiate(client, ae_dev, false);
+			hnae3_uninit_client_instance(client, ae_dev);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+		ae_dev->ops = NULL;
 	}
 
 	list_del(&ae_algo->node);
@@ -240,12 +268,15 @@
  * @ae_dev: the AE device
  * NOTE: the duplicated name will not be checked
  */
-void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	const struct pci_device_id *id;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
-	int ret = 0;
+	int ret;
+
+	if (!ae_dev)
+		return -ENODEV;
 
 	mutex_lock(&hnae3_common_lock);
 
@@ -257,14 +288,13 @@
 		if (!id)
 			continue;
 
-		ae_dev->ops = ae_algo->ops;
-
-		if (!ae_dev->ops) {
-			dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n");
+		if (!ae_algo->ops) {
+			dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
+			ret = -EOPNOTSUPP;
 			goto out_err;
 		}
+		ae_dev->ops = ae_algo->ops;
 
-		/* ae_dev init should set flag */
 		ret = ae_dev->ops->init_ae_dev(ae_dev);
 		if (ret) {
 			dev_err(&ae_dev->pdev->dev,
@@ -272,6 +302,7 @@
 			goto out_err;
 		}
 
+		/* ae_dev init should set flag */
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
 		break;
 	}
@@ -280,15 +311,22 @@
 	 * initialize the figure out client instance
 	 */
 	list_for_each_entry(client, &hnae3_client_list, node) {
-		ret = hnae3_match_n_instantiate(client, ae_dev, true);
+		ret = hnae3_init_client_instance(client, ae_dev);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed, ret = %d\n",
 				ret);
 	}
 
-out_err:
 	mutex_unlock(&hnae3_common_lock);
+
+	return 0;
+
+out_err:
+	list_del(&ae_dev->node);
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL(hnae3_register_ae_dev);
 
@@ -301,6 +339,9 @@
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
 
+	if (!ae_dev)
+		return;
+
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_algo */
 	list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
@@ -312,10 +353,11 @@
 			continue;
 
 		list_for_each_entry(client, &hnae3_client_list, node)
-			hnae3_match_n_instantiate(client, ae_dev, false);
+			hnae3_uninit_client_instance(client, ae_dev);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+		ae_dev->ops = NULL;
 	}
 
 	list_del(&ae_dev->node);

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 67befff..a099893 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HNAE3_H
@@ -32,6 +32,8 @@
 
 #define HNAE3_MOD_VERSION "1.0"
 
+#define HNAE3_MIN_VECTOR_NUM	2 /* first one for misc, another for IO */
+
 /* Device IDs */
 #define HNAE3_DEV_ID_GE				0xA220
 #define HNAE3_DEV_ID_25GE			0xA221
@@ -51,15 +53,23 @@
 #define HNAE3_KNIC_CLIENT_INITED_B		0x3
 #define HNAE3_UNIC_CLIENT_INITED_B		0x4
 #define HNAE3_ROCE_CLIENT_INITED_B		0x5
+#define HNAE3_DEV_SUPPORT_FD_B			0x6
+#define HNAE3_DEV_SUPPORT_GRO_B			0x7
 
 #define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
 		BIT(HNAE3_DEV_SUPPORT_ROCE_B))
 
 #define hnae3_dev_roce_supported(hdev) \
-	hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
+	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
 #define hnae3_dev_dcb_supported(hdev) \
-	hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
+	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
+
+#define hnae3_dev_fd_supported(hdev) \
+	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_FD_B)
+
+#define hnae3_dev_gro_supported(hdev) \
+	hnae3_get_bit((hdev)->ae_dev->flag, HNAE3_DEV_SUPPORT_GRO_B)
 
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
@@ -77,36 +87,57 @@
 	void __iomem *io_base;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_handle *handle;
-	int tqp_index;	/* index in a handle */
-	u32 buf_size;	/* size for hnae_desc->addr, preset by AE */
-	u16 desc_num;	/* total number of desc */
+	int tqp_index;		/* index in a handle */
+	u32 buf_size;		/* size for hnae_desc->addr, preset by AE */
+	u16 tx_desc_num;	/* total number of tx desc */
+	u16 rx_desc_num;	/* total number of rx desc */
 };
 
-/*hnae3 loop mode*/
+struct hns3_mac_stats {
+	u64 tx_pause_cnt;
+	u64 rx_pause_cnt;
+};
+
+/* hnae3 loop mode */
 enum hnae3_loop {
-	HNAE3_MAC_INTER_LOOP_MAC,
-	HNAE3_MAC_INTER_LOOP_SERDES,
-	HNAE3_MAC_INTER_LOOP_PHY,
-	HNAE3_MAC_LOOP_NONE,
+	HNAE3_LOOP_APP,
+	HNAE3_LOOP_SERIAL_SERDES,
+	HNAE3_LOOP_PARALLEL_SERDES,
+	HNAE3_LOOP_PHY,
+	HNAE3_LOOP_NONE,
 };
 
 enum hnae3_client_type {
 	HNAE3_CLIENT_KNIC,
-	HNAE3_CLIENT_UNIC,
 	HNAE3_CLIENT_ROCE,
 };
 
-enum hnae3_dev_type {
-	HNAE3_DEV_KNIC,
-	HNAE3_DEV_UNIC,
-};
-
 /* mac media type */
 enum hnae3_media_type {
 	HNAE3_MEDIA_TYPE_UNKNOWN,
 	HNAE3_MEDIA_TYPE_FIBER,
 	HNAE3_MEDIA_TYPE_COPPER,
 	HNAE3_MEDIA_TYPE_BACKPLANE,
+	HNAE3_MEDIA_TYPE_NONE,
+};
+
+/* must be consistent with definition in firmware */
+enum hnae3_module_type {
+	HNAE3_MODULE_TYPE_UNKNOWN	= 0x00,
+	HNAE3_MODULE_TYPE_FIBRE_LR	= 0x01,
+	HNAE3_MODULE_TYPE_FIBRE_SR	= 0x02,
+	HNAE3_MODULE_TYPE_AOC		= 0x03,
+	HNAE3_MODULE_TYPE_CR		= 0x04,
+	HNAE3_MODULE_TYPE_KR		= 0x05,
+	HNAE3_MODULE_TYPE_TP		= 0x06,
+
+};
+
+enum hnae3_fec_mode {
+	HNAE3_FEC_AUTO = 0,
+	HNAE3_FEC_BASER,
+	HNAE3_FEC_RS,
+	HNAE3_FEC_USER_DEF,
 };
 
 enum hnae3_reset_notify_type {
@@ -114,18 +145,40 @@
 	HNAE3_DOWN_CLIENT,
 	HNAE3_INIT_CLIENT,
 	HNAE3_UNINIT_CLIENT,
+	HNAE3_RESTORE_CLIENT,
+};
+
+enum hnae3_hw_error_type {
+	HNAE3_PPU_POISON_ERROR,
+	HNAE3_CMDQ_ECC_ERROR,
+	HNAE3_IMP_RD_POISON_ERROR,
 };
 
 enum hnae3_reset_type {
 	HNAE3_VF_RESET,
+	HNAE3_VF_FUNC_RESET,
+	HNAE3_VF_PF_FUNC_RESET,
 	HNAE3_VF_FULL_RESET,
+	HNAE3_FLR_RESET,
 	HNAE3_FUNC_RESET,
-	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
 	HNAE3_IMP_RESET,
+	HNAE3_UNKNOWN_RESET,
 	HNAE3_NONE_RESET,
 };
 
+enum hnae3_flr_state {
+	HNAE3_FLR_DOWN,
+	HNAE3_FLR_DONE,
+};
+
+enum hnae3_port_base_vlan_state {
+	HNAE3_PORT_BASE_VLAN_DISABLE,
+	HNAE3_PORT_BASE_VLAN_ENABLE,
+	HNAE3_PORT_BASE_VLAN_MODIFY,
+	HNAE3_PORT_BASE_VLAN_NOCHANGE,
+};
+
 struct hnae3_vector_info {
 	u8 __iomem *io_addr;
 	int vector;
@@ -139,6 +192,15 @@
 #define HNAE3_RING_GL_RX 0
 #define HNAE3_RING_GL_TX 1
 
+#define HNAE3_FW_VERSION_BYTE3_SHIFT	24
+#define HNAE3_FW_VERSION_BYTE3_MASK	GENMASK(31, 24)
+#define HNAE3_FW_VERSION_BYTE2_SHIFT	16
+#define HNAE3_FW_VERSION_BYTE2_MASK	GENMASK(23, 16)
+#define HNAE3_FW_VERSION_BYTE1_SHIFT	8
+#define HNAE3_FW_VERSION_BYTE1_MASK	GENMASK(15, 8)
+#define HNAE3_FW_VERSION_BYTE0_SHIFT	0
+#define HNAE3_FW_VERSION_BYTE0_MASK	GENMASK(7, 0)
+
 struct hnae3_ring_chain_node {
 	struct hnae3_ring_chain_node *next;
 	u32 tqp_index;
@@ -156,6 +218,8 @@
 	int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
 	int (*reset_notify)(struct hnae3_handle *handle,
 			    enum hnae3_reset_notify_type type);
+	void (*process_hw_error)(struct hnae3_handle *handle,
+				 enum hnae3_hw_error_type);
 };
 
 #define HNAE3_CLIENT_NAME_LENGTH 16
@@ -172,7 +236,8 @@
 	const struct hnae3_ae_ops *ops;
 	struct list_head node;
 	u32 flag;
-	enum hnae3_dev_type dev_type;
+	unsigned long hw_err_reset_req;
+	enum hnae3_reset_type reset_type;
 	void *priv;
 };
 
@@ -190,15 +255,19 @@
  *   Enable the hardware
  * stop()
  *   Disable the hardware
+ * start_client()
+ *   Inform the hclge that client has been started
+ * stop_client()
+ *   Inform the hclge that client has been stopped
  * get_status()
  *   Get the carrier state of the back channel of the handle, 1 for ok, 0 for
  *   non-ok
  * get_ksettings_an_result()
  *   Get negotiation status,speed and duplex
- * update_speed_duplex_h()
- *   Update hardware speed and duplex
  * get_media_type()
  *   Get media type of MAC
+ * check_port_speed()
+ *   Check target speed whether is supported
  * adjust_link()
  *   Adjust link status
  * set_loopback()
@@ -215,6 +284,10 @@
  *   set auto autonegotiation of pause frame use
  * get_autoneg()
  *   get auto autonegotiation of pause frame use
+ * restart_autoneg()
+ *   restart autonegotiation
+ * halt_autoneg()
+ *   halt/resume autonegotiation when autonegotiation on
  * get_coalesce_usecs()
  *   get usecs to delay a TX interrupt after a packet is sent
  * get_rx_max_coalesced_frames()
@@ -239,6 +312,8 @@
  *   Remove multicast address from mac table
  * update_stats()
  *   Update Old network device statistics
+ * get_mac_stats()
+ *   get mac pause statistics including tx_cnt and rx_cnt
  * get_ethtool_stats()
  *   Get ethtool network device statistics
  * get_strings()
@@ -283,34 +358,47 @@
  *   Set vlan filter config of Ports
  * set_vf_vlan_filter()
  *   Set vlan filter config of vf
+ * restore_vlan_table()
+ *   Restore vlan filter entries after reset
  * enable_hw_strip_rxvtag()
  *   Enable/disable hardware strip vlan tag of packets received
+ * set_gro_en
+ *   Enable/disable HW GRO
+ * add_arfs_entry
+ *   Check the 5-tuples of flow, and create flow director rule
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
 	void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev);
-
+	void (*flr_prepare)(struct hnae3_ae_dev *ae_dev);
+	void (*flr_done)(struct hnae3_ae_dev *ae_dev);
 	int (*init_client_instance)(struct hnae3_client *client,
 				    struct hnae3_ae_dev *ae_dev);
 	void (*uninit_client_instance)(struct hnae3_client *client,
 				       struct hnae3_ae_dev *ae_dev);
 	int (*start)(struct hnae3_handle *handle);
 	void (*stop)(struct hnae3_handle *handle);
+	int (*client_start)(struct hnae3_handle *handle);
+	void (*client_stop)(struct hnae3_handle *handle);
 	int (*get_status)(struct hnae3_handle *handle);
 	void (*get_ksettings_an_result)(struct hnae3_handle *handle,
 					u8 *auto_neg, u32 *speed, u8 *duplex);
 
-	int (*update_speed_duplex_h)(struct hnae3_handle *handle);
 	int (*cfg_mac_speed_dup_h)(struct hnae3_handle *handle, int speed,
 				   u8 duplex);
 
-	void (*get_media_type)(struct hnae3_handle *handle, u8 *media_type);
+	void (*get_media_type)(struct hnae3_handle *handle, u8 *media_type,
+			       u8 *module_type);
+	int (*check_port_speed)(struct hnae3_handle *handle, u32 speed);
+	void (*get_fec)(struct hnae3_handle *handle, u8 *fec_ability,
+			u8 *fec_mode);
+	int (*set_fec)(struct hnae3_handle *handle, u32 fec_mode);
 	void (*adjust_link)(struct hnae3_handle *handle, int speed, int duplex);
 	int (*set_loopback)(struct hnae3_handle *handle,
 			    enum hnae3_loop loop_mode, bool en);
 
-	void (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
-				 bool en_mc_pmc);
+	int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
+				bool en_mc_pmc);
 	int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
 
 	void (*get_pauseparam)(struct hnae3_handle *handle,
@@ -320,6 +408,8 @@
 
 	int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
 	int (*get_autoneg)(struct hnae3_handle *handle);
+	int (*restart_autoneg)(struct hnae3_handle *handle);
+	int (*halt_autoneg)(struct hnae3_handle *handle, bool halt);
 
 	void (*get_coalesce_usecs)(struct hnae3_handle *handle,
 				   u32 *tx_usecs, u32 *rx_usecs);
@@ -337,6 +427,8 @@
 	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
 	int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
 			    bool is_first);
+	int (*do_ioctl)(struct hnae3_handle *handle,
+			struct ifreq *ifr, int cmd);
 	int (*add_uc_addr)(struct hnae3_handle *handle,
 			   const unsigned char *addr);
 	int (*rm_uc_addr)(struct hnae3_handle *handle,
@@ -346,13 +438,12 @@
 			   const unsigned char *addr);
 	int (*rm_mc_addr)(struct hnae3_handle *handle,
 			  const unsigned char *addr);
-	int (*update_mta_status)(struct hnae3_handle *handle);
-
 	void (*set_tso_stats)(struct hnae3_handle *handle, int enable);
 	void (*update_stats)(struct hnae3_handle *handle,
 			     struct net_device_stats *net_stats);
 	void (*get_stats)(struct hnae3_handle *handle, u64 *data);
-
+	void (*get_mac_stats)(struct hnae3_handle *handle,
+			      struct hns3_mac_stats *mac_stats);
 	void (*get_strings)(struct hnae3_handle *handle,
 			    u32 stringset, u8 *data);
 	int (*get_sset_count)(struct hnae3_handle *handle, int stringset);
@@ -384,7 +475,7 @@
 				      int vector_num,
 				      struct hnae3_ring_chain_node *vr_chain);
 
-	void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+	int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
 	u32 (*get_fw_version)(struct hnae3_handle *handle);
 	void (*get_mdix_mode)(struct hnae3_handle *handle,
 			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
@@ -395,12 +486,17 @@
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-	void (*reset_event)(struct hnae3_handle *handle);
+	void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle);
+	enum hnae3_reset_type (*get_reset_level)(struct hnae3_ae_dev *ae_dev,
+						 unsigned long *addr);
+	void (*set_default_reset_request)(struct hnae3_ae_dev *ae_dev,
+					  enum hnae3_reset_type rst_type);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
-				      u16 *free_tqps, u16 *max_rss_size);
-	int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num);
+				      u16 *alloc_tqps, u16 *max_rss_size);
+	int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num,
+			    bool rxfh_configured);
 	void (*get_flowctrl_adv)(struct hnae3_handle *handle,
 				 u32 *flowctrl_adv);
 	int (*set_led_id)(struct hnae3_handle *handle,
@@ -408,7 +504,33 @@
 	void (*get_link_mode)(struct hnae3_handle *handle,
 			      unsigned long *supported,
 			      unsigned long *advertising);
-	void (*get_port_type)(struct hnae3_handle *handle, u8 *port_type);
+	int (*add_fd_entry)(struct hnae3_handle *handle,
+			    struct ethtool_rxnfc *cmd);
+	int (*del_fd_entry)(struct hnae3_handle *handle,
+			    struct ethtool_rxnfc *cmd);
+	void (*del_all_fd_entries)(struct hnae3_handle *handle,
+				   bool clear_list);
+	int (*get_fd_rule_cnt)(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *cmd);
+	int (*get_fd_rule_info)(struct hnae3_handle *handle,
+				struct ethtool_rxnfc *cmd);
+	int (*get_fd_all_rules)(struct hnae3_handle *handle,
+				struct ethtool_rxnfc *cmd, u32 *rule_locs);
+	int (*restore_fd_rules)(struct hnae3_handle *handle);
+	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
+	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
+			      u16 flow_id, struct flow_keys *fkeys);
+	int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf);
+	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
+	bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
+	bool (*ae_dev_resetting)(struct hnae3_handle *handle);
+	unsigned long (*ae_dev_reset_cnt)(struct hnae3_handle *handle);
+	int (*set_gro_en)(struct hnae3_handle *handle, bool enable);
+	u16 (*get_global_queue_id)(struct hnae3_handle *handle, u16 queue_id);
+	void (*set_timer_task)(struct hnae3_handle *handle, bool enable);
+	int (*mac_connect_phy)(struct hnae3_handle *handle);
+	void (*mac_disconnect_phy)(struct hnae3_handle *handle);
+	void (*restore_vlan_table)(struct hnae3_handle *handle);
 };
 
 struct hnae3_dcb_ops {
@@ -422,7 +544,6 @@
 	u8   (*getdcbx)(struct hnae3_handle *);
 	u8   (*setdcbx)(struct hnae3_handle *, u8);
 
-	int (*map_update)(struct hnae3_handle *);
 	int (*setup_tc)(struct hnae3_handle *, u8, u8 *);
 };
 
@@ -447,8 +568,10 @@
 struct hnae3_knic_private_info {
 	struct net_device *netdev; /* Set by KNIC client when init instance */
 	u16 rss_size;		   /* Allocated RSS queues */
+	u16 req_rss_size;
 	u16 rx_buf_len;
-	u16 num_desc;
+	u16 num_tx_desc;
+	u16 num_rx_desc;
 
 	u8 num_tc;		   /* Total number of enabled TCs */
 	u8 prio_tc[HNAE3_MAX_USER_PRIO];  /* TC indexed by prio */
@@ -459,6 +582,7 @@
 	const struct hnae3_dcb_ops *dcb_ops;
 
 	u16 int_rl_setting;
+	enum pkt_hash_types rss_type;
 };
 
 struct hnae3_roce_private_info {
@@ -466,30 +590,47 @@
 	void __iomem *roce_io_base;
 	int base_vector;
 	int num_vectors;
+
+	/* The below attributes defined for RoCE client, hnae3 gives
+	 * initial values to them, and RoCE client can modify and use
+	 * them.
+	 */
+	unsigned long reset_state;
+	unsigned long instance_state;
+	unsigned long state;
 };
 
 struct hnae3_unic_private_info {
 	struct net_device *netdev;
 	u16 rx_buf_len;
-	u16 num_desc;
+	u16 num_tx_desc;
+	u16 num_rx_desc;
+
 	u16 num_tqps;	/* total number of tqps in this handle */
 	struct hnae3_queue **tqp;  /* array base of all TQPs of this instance */
 };
 
-#define HNAE3_SUPPORT_MAC_LOOPBACK    BIT(0)
+#define HNAE3_SUPPORT_APP_LOOPBACK    BIT(0)
 #define HNAE3_SUPPORT_PHY_LOOPBACK    BIT(1)
-#define HNAE3_SUPPORT_SERDES_LOOPBACK BIT(2)
+#define HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK	BIT(2)
 #define HNAE3_SUPPORT_VF	      BIT(3)
+#define HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK	BIT(4)
+
+#define HNAE3_USER_UPE		BIT(0)	/* unicast promisc enabled by user */
+#define HNAE3_USER_MPE		BIT(1)	/* mulitcast promisc enabled by user */
+#define HNAE3_BPE		BIT(2)	/* broadcast promisc enable */
+#define HNAE3_OVERFLOW_UPE	BIT(3)	/* unicast mac vlan overflow */
+#define HNAE3_OVERFLOW_MPE	BIT(4)	/* multicast mac vlan overflow */
+#define HNAE3_VLAN_FLTR		BIT(5)	/* enable vlan filter */
+#define HNAE3_UPE		(HNAE3_USER_UPE | HNAE3_OVERFLOW_UPE)
+#define HNAE3_MPE		(HNAE3_USER_MPE | HNAE3_OVERFLOW_MPE)
 
 struct hnae3_handle {
 	struct hnae3_client *client;
 	struct pci_dev *pdev;
 	void *priv;
 	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
-	u64 flags; /* Indicate the capabilities for this handle*/
-
-	unsigned long last_reset_time;
-	enum hnae3_reset_type reset_level;
+	u64 flags; /* Indicate the capabilities for this handle */
 
 	union {
 		struct net_device *netdev; /* first member */
@@ -499,6 +640,14 @@
 	};
 
 	u32 numa_node_mask;	/* for multi-chip support */
+
+	enum hnae3_port_base_vlan_state port_base_vlan_state;
+
+	u8 netdev_flags;
+	struct dentry *hnae3_dbgfs;
+
+	/* Network interface message level enabled bits */
+	u32 msg_enable;
 };
 
 #define hnae3_set_field(origin, mask, shift, val) \
@@ -513,7 +662,7 @@
 #define hnae3_get_bit(origin, shift) \
 	hnae3_get_field((origin), (0x1 << (shift)), (shift))
 
-void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
 void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
 
 void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo);
@@ -521,4 +670,8 @@
 
 void hnae3_unregister_client(struct hnae3_client *client);
 int hnae3_register_client(struct hnae3_client *client);
+
+void hnae3_set_client_init_flag(struct hnae3_client *client,
+				struct hnae3_ae_dev *ae_dev,
+				unsigned int inited);
 #endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
index ea5f8a8..d2ec4c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c

@@ -4,44 +4,52 @@
 #include "hnae3.h"
 #include "hns3_enet.h"
 
-static
-int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+static int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
+	if (hns3_nic_resetting(ndev))
+		return -EBUSY;
+
 	if (h->kinfo.dcb_ops->ieee_getets)
 		return h->kinfo.dcb_ops->ieee_getets(h, ets);
 
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+static int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
+	if (hns3_nic_resetting(ndev))
+		return -EBUSY;
+
 	if (h->kinfo.dcb_ops->ieee_setets)
 		return h->kinfo.dcb_ops->ieee_setets(h, ets);
 
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+static int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
+	if (hns3_nic_resetting(ndev))
+		return -EBUSY;
+
 	if (h->kinfo.dcb_ops->ieee_getpfc)
 		return h->kinfo.dcb_ops->ieee_getpfc(h, pfc);
 
 	return -EOPNOTSUPP;
 }
 
-static
-int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+static int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 
+	if (hns3_nic_resetting(ndev))
+		return -EBUSY;
+
 	if (h->kinfo.dcb_ops->ieee_setpfc)
 		return h->kinfo.dcb_ops->ieee_setpfc(h, pfc);
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
new file mode 100644
index 0000000..28961a6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c

@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+#define HNS3_DBG_READ_LEN 256
+#define HNS3_DBG_WRITE_LEN 1024
+
+static struct dentry *hns3_dbgfs_root;
+
+static int hns3_dbg_queue_info(struct hnae3_handle *h,
+			       const char *cmd_buf)
+{
+	struct hns3_nic_priv *priv = h->priv;
+	struct hns3_nic_ring_data *ring_data;
+	struct hns3_enet_ring *ring;
+	u32 base_add_l, base_add_h;
+	u32 queue_num, queue_max;
+	u32 value, i = 0;
+	int cnt;
+
+	if (!priv->ring_data) {
+		dev_err(&h->pdev->dev, "ring_data is NULL\n");
+		return -EFAULT;
+	}
+
+	queue_max = h->kinfo.num_tqps;
+	cnt = kstrtouint(&cmd_buf[11], 0, &queue_num);
+	if (cnt)
+		queue_num = 0;
+	else
+		queue_max = queue_num + 1;
+
+	dev_info(&h->pdev->dev, "queue info\n");
+
+	if (queue_num >= h->kinfo.num_tqps) {
+		dev_err(&h->pdev->dev,
+			"Queue number(%u) is out of range(0-%u)\n", queue_num,
+			h->kinfo.num_tqps - 1);
+		return -EINVAL;
+	}
+
+	ring_data = priv->ring_data;
+	for (i = queue_num; i < queue_max; i++) {
+		/* Each cycle needs to determine whether the instance is reset,
+		 * to prevent reference to invalid memory. And need to ensure
+		 * that the following code is executed within 100ms.
+		 */
+		if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+		    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
+			return -EPERM;
+
+		ring = ring_data[(u32)(i + h->kinfo.num_tqps)].ring;
+		base_add_h = readl_relaxed(ring->tqp->io_base +
+					   HNS3_RING_RX_RING_BASEADDR_H_REG);
+		base_add_l = readl_relaxed(ring->tqp->io_base +
+					   HNS3_RING_RX_RING_BASEADDR_L_REG);
+		dev_info(&h->pdev->dev, "RX(%d) BASE ADD: 0x%08x%08x\n", i,
+			 base_add_h, base_add_l);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_BD_NUM_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING BD NUM: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_BD_LEN_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING BD LEN: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_TAIL_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING TAIL: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_HEAD_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING HEAD: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_FBDNUM_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING FBDNUM: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_RX_RING_PKTNUM_RECORD_REG);
+		dev_info(&h->pdev->dev, "RX(%d) RING PKTNUM: %u\n", i, value);
+
+		ring = ring_data[i].ring;
+		base_add_h = readl_relaxed(ring->tqp->io_base +
+					   HNS3_RING_TX_RING_BASEADDR_H_REG);
+		base_add_l = readl_relaxed(ring->tqp->io_base +
+					   HNS3_RING_TX_RING_BASEADDR_L_REG);
+		dev_info(&h->pdev->dev, "TX(%d) BASE ADD: 0x%08x%08x\n", i,
+			 base_add_h, base_add_l);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_BD_NUM_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING BD NUM: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_TC_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING TC: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_TAIL_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING TAIL: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_HEAD_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING HEAD: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_FBDNUM_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING FBDNUM: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_OFFSET_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING OFFSET: %u\n", i, value);
+
+		value = readl_relaxed(ring->tqp->io_base +
+				      HNS3_RING_TX_RING_PKTNUM_RECORD_REG);
+		dev_info(&h->pdev->dev, "TX(%d) RING PKTNUM: %u\n\n", i,
+			 value);
+	}
+
+	return 0;
+}
+
+static int hns3_dbg_queue_map(struct hnae3_handle *h)
+{
+	struct hns3_nic_priv *priv = h->priv;
+	struct hns3_nic_ring_data *ring_data;
+	int i;
+
+	if (!h->ae_algo->ops->get_global_queue_id)
+		return -EOPNOTSUPP;
+
+	dev_info(&h->pdev->dev, "map info for queue id and vector id\n");
+	dev_info(&h->pdev->dev,
+		 "local queue id | global queue id | vector id\n");
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		u16 global_qid;
+
+		global_qid = h->ae_algo->ops->get_global_queue_id(h, i);
+		ring_data = &priv->ring_data[i];
+		if (!ring_data || !ring_data->ring ||
+		    !ring_data->ring->tqp_vector)
+			continue;
+
+		dev_info(&h->pdev->dev,
+			 "      %4d            %4d            %4d\n",
+			 i, global_qid,
+			 ring_data->ring->tqp_vector->vector_irq);
+	}
+
+	return 0;
+}
+
+static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
+{
+	struct hns3_nic_priv *priv = h->priv;
+	struct hns3_nic_ring_data *ring_data;
+	struct hns3_desc *rx_desc, *tx_desc;
+	struct device *dev = &h->pdev->dev;
+	struct hns3_enet_ring *ring;
+	u32 tx_index, rx_index;
+	u32 q_num, value;
+	dma_addr_t addr;
+	int cnt;
+
+	cnt = sscanf(&cmd_buf[8], "%u %u", &q_num, &tx_index);
+	if (cnt == 2) {
+		rx_index = tx_index;
+	} else if (cnt != 1) {
+		dev_err(dev, "bd info: bad command string, cnt=%d\n", cnt);
+		return -EINVAL;
+	}
+
+	if (q_num >= h->kinfo.num_tqps) {
+		dev_err(dev, "Queue number(%u) is out of range(0-%u)\n", q_num,
+			h->kinfo.num_tqps - 1);
+		return -EINVAL;
+	}
+
+	ring_data = priv->ring_data;
+	ring  = ring_data[q_num].ring;
+	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
+	tx_index = (cnt == 1) ? value : tx_index;
+
+	if (tx_index >= ring->desc_num) {
+		dev_err(dev, "bd index(%u) is out of range(0-%u)\n", tx_index,
+			ring->desc_num - 1);
+		return -EINVAL;
+	}
+
+	tx_desc = &ring->desc[tx_index];
+	addr = le64_to_cpu(tx_desc->addr);
+	dev_info(dev, "TX Queue Num: %u, BD Index: %u\n", q_num, tx_index);
+	dev_info(dev, "(TX)addr: %pad\n", &addr);
+	dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.vlan_tag);
+	dev_info(dev, "(TX)send_size: %u\n", tx_desc->tx.send_size);
+	dev_info(dev, "(TX)vlan_tso: %u\n", tx_desc->tx.type_cs_vlan_tso);
+	dev_info(dev, "(TX)l2_len: %u\n", tx_desc->tx.l2_len);
+	dev_info(dev, "(TX)l3_len: %u\n", tx_desc->tx.l3_len);
+	dev_info(dev, "(TX)l4_len: %u\n", tx_desc->tx.l4_len);
+	dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.outer_vlan_tag);
+	dev_info(dev, "(TX)tv: %u\n", tx_desc->tx.tv);
+	dev_info(dev, "(TX)vlan_msec: %u\n", tx_desc->tx.ol_type_vlan_msec);
+	dev_info(dev, "(TX)ol2_len: %u\n", tx_desc->tx.ol2_len);
+	dev_info(dev, "(TX)ol3_len: %u\n", tx_desc->tx.ol3_len);
+	dev_info(dev, "(TX)ol4_len: %u\n", tx_desc->tx.ol4_len);
+	dev_info(dev, "(TX)paylen: %u\n", tx_desc->tx.paylen);
+	dev_info(dev, "(TX)vld_ra_ri: %u\n", tx_desc->tx.bdtp_fe_sc_vld_ra_ri);
+	dev_info(dev, "(TX)mss: %u\n", tx_desc->tx.mss);
+
+	ring  = ring_data[q_num + h->kinfo.num_tqps].ring;
+	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
+	rx_index = (cnt == 1) ? value : tx_index;
+	rx_desc	 = &ring->desc[rx_index];
+
+	addr = le64_to_cpu(rx_desc->addr);
+	dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
+	dev_info(dev, "(RX)addr: %pad\n", &addr);
+	dev_info(dev, "(RX)l234_info: %u\n", rx_desc->rx.l234_info);
+	dev_info(dev, "(RX)pkt_len: %u\n", rx_desc->rx.pkt_len);
+	dev_info(dev, "(RX)size: %u\n", rx_desc->rx.size);
+	dev_info(dev, "(RX)rss_hash: %u\n", rx_desc->rx.rss_hash);
+	dev_info(dev, "(RX)fd_id: %u\n", rx_desc->rx.fd_id);
+	dev_info(dev, "(RX)vlan_tag: %u\n", rx_desc->rx.vlan_tag);
+	dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n", rx_desc->rx.o_dm_vlan_id_fb);
+	dev_info(dev, "(RX)ot_vlan_tag: %u\n", rx_desc->rx.ot_vlan_tag);
+	dev_info(dev, "(RX)bd_base_info: %u\n", rx_desc->rx.bd_base_info);
+
+	return 0;
+}
+
+static void hns3_dbg_help(struct hnae3_handle *h)
+{
+#define HNS3_DBG_BUF_LEN 256
+
+	char printf_buf[HNS3_DBG_BUF_LEN];
+
+	dev_info(&h->pdev->dev, "available commands\n");
+	dev_info(&h->pdev->dev, "queue info <number>\n");
+	dev_info(&h->pdev->dev, "queue map\n");
+	dev_info(&h->pdev->dev, "bd info <q_num> <bd index>\n");
+
+	if (!hns3_is_phys_func(h->pdev))
+		return;
+
+	dev_info(&h->pdev->dev, "dump fd tcam\n");
+	dev_info(&h->pdev->dev, "dump tc\n");
+	dev_info(&h->pdev->dev, "dump tm map <q_num>\n");
+	dev_info(&h->pdev->dev, "dump tm\n");
+	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
+	dev_info(&h->pdev->dev, "dump qos pri map\n");
+	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
+	dev_info(&h->pdev->dev, "dump mng tbl\n");
+	dev_info(&h->pdev->dev, "dump reset info\n");
+	dev_info(&h->pdev->dev, "dump m7 info\n");
+	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
+	dev_info(&h->pdev->dev, "dump mac tnl status\n");
+
+	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
+	strncat(printf_buf, "dump reg [[bios common] [ssu <port_id>]",
+		HNS3_DBG_BUF_LEN - 1);
+	strncat(printf_buf + strlen(printf_buf),
+		" [igu egu <port_id>] [rpu <tc_queue_num>]",
+		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+	strncat(printf_buf + strlen(printf_buf),
+		" [rtc] [ppp] [rcb] [tqp <queue_num>]]\n",
+		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+	dev_info(&h->pdev->dev, "%s", printf_buf);
+
+	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
+	strncat(printf_buf, "dump reg dcb <port_id> <pri_id> <pg_id>",
+		HNS3_DBG_BUF_LEN - 1);
+	strncat(printf_buf + strlen(printf_buf), " <rq_id> <nq_id> <qset_id>\n",
+		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
+	dev_info(&h->pdev->dev, "%s", printf_buf);
+}
+
+static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
+				 size_t count, loff_t *ppos)
+{
+	int uncopy_bytes;
+	char *buf;
+	int len;
+
+	if (*ppos != 0)
+		return 0;
+
+	if (count < HNS3_DBG_READ_LEN)
+		return -ENOSPC;
+
+	buf = kzalloc(HNS3_DBG_READ_LEN, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len = snprintf(buf, HNS3_DBG_READ_LEN, "%s\n",
+		       "Please echo help to cmd to get help information");
+	uncopy_bytes = copy_to_user(buffer, buf, len);
+
+	kfree(buf);
+
+	if (uncopy_bytes)
+		return -EFAULT;
+
+	return (*ppos = len);
+}
+
+static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
+				  size_t count, loff_t *ppos)
+{
+	struct hnae3_handle *handle = filp->private_data;
+	struct hns3_nic_priv *priv  = handle->priv;
+	char *cmd_buf, *cmd_buf_tmp;
+	int uncopied_bytes;
+	int ret = 0;
+
+	if (*ppos != 0)
+		return 0;
+
+	/* Judge if the instance is being reset. */
+	if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+	    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
+		return 0;
+
+	if (count > HNS3_DBG_WRITE_LEN)
+		return -ENOSPC;
+
+	cmd_buf = kzalloc(count + 1, GFP_KERNEL);
+	if (!cmd_buf)
+		return count;
+
+	uncopied_bytes = copy_from_user(cmd_buf, buffer, count);
+	if (uncopied_bytes) {
+		kfree(cmd_buf);
+		return -EFAULT;
+	}
+
+	cmd_buf[count] = '\0';
+
+	cmd_buf_tmp = strchr(cmd_buf, '\n');
+	if (cmd_buf_tmp) {
+		*cmd_buf_tmp = '\0';
+		count = cmd_buf_tmp - cmd_buf + 1;
+	}
+
+	if (strncmp(cmd_buf, "help", 4) == 0)
+		hns3_dbg_help(handle);
+	else if (strncmp(cmd_buf, "queue info", 10) == 0)
+		ret = hns3_dbg_queue_info(handle, cmd_buf);
+	else if (strncmp(cmd_buf, "queue map", 9) == 0)
+		ret = hns3_dbg_queue_map(handle);
+	else if (strncmp(cmd_buf, "bd info", 7) == 0)
+		ret = hns3_dbg_bd_info(handle, cmd_buf);
+	else if (handle->ae_algo->ops->dbg_run_cmd)
+		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
+	else
+		ret = -EOPNOTSUPP;
+
+	if (ret)
+		hns3_dbg_help(handle);
+
+	kfree(cmd_buf);
+	cmd_buf = NULL;
+
+	return count;
+}
+
+static const struct file_operations hns3_dbg_cmd_fops = {
+	.owner = THIS_MODULE,
+	.open  = simple_open,
+	.read  = hns3_dbg_cmd_read,
+	.write = hns3_dbg_cmd_write,
+};
+
+void hns3_dbg_init(struct hnae3_handle *handle)
+{
+	const char *name = pci_name(handle->pdev);
+
+	handle->hnae3_dbgfs = debugfs_create_dir(name, hns3_dbgfs_root);
+
+	debugfs_create_file("cmd", 0600, handle->hnae3_dbgfs, handle,
+			    &hns3_dbg_cmd_fops);
+}
+
+void hns3_dbg_uninit(struct hnae3_handle *handle)
+{
+	debugfs_remove_recursive(handle->hnae3_dbgfs);
+	handle->hnae3_dbgfs = NULL;
+}
+
+void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
+{
+	hns3_dbgfs_root = debugfs_create_dir(debugfs_dir_name, NULL);
+}
+
+void hns3_dbg_unregister_debugfs(void)
+{
+	debugfs_remove_recursive(hns3_dbgfs_root);
+	hns3_dbgfs_root = NULL;
+}

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b7b2f82..616cad0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c

@@ -4,23 +4,38 @@
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/aer.h>
 #include <linux/skbuff.h>
 #include <linux/sctp.h>
 #include <linux/vermagic.h>
 #include <net/gre.h>
+#include <net/ip6_checksum.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <net/vxlan.h>
 
 #include "hnae3.h"
 #include "hns3_enet.h"
 
-static void hns3_clear_all_ring(struct hnae3_handle *h);
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h);
+#define hns3_set_field(origin, shift, val)	((origin) |= ((val) << (shift)))
+#define hns3_tx_bd_count(S)	DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
+
+#define hns3_rl_err(fmt, ...)						\
+	do {								\
+		if (net_ratelimit())					\
+			netdev_err(fmt, ##__VA_ARGS__);			\
+	} while (0)
+
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force);
+static void hns3_remove_hw_addr(struct net_device *netdev);
 
 static const char hns3_driver_name[] = "hns3";
 const char hns3_driver_version[] = VERMAGIC_STRING;
@@ -29,6 +44,16 @@
 static const char hns3_copyright[] = "Copyright (c) 2017 Huawei Corporation.";
 static struct hnae3_client client;
 
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, " Network interface message level setting");
+
+#define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
+			   NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
+
+#define HNS3_INNER_VLAN_TAG	1
+#define HNS3_OUTER_VLAN_TAG	2
+
 /* hns3_pci_tbl - PCI Device ID Table
  *
  * Last entry must be all 0s
@@ -61,7 +86,7 @@
 {
 	struct hns3_enet_tqp_vector *tqp_vector = vector;
 
-	napi_schedule(&tqp_vector->napi);
+	napi_schedule_irqoff(&tqp_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -77,6 +102,9 @@
 		if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
 			continue;
 
+		/* clear the affinity mask */
+		irq_set_affinity_hint(tqp_vectors->vector_irq, NULL);
+
 		/* release the irq resource */
 		free_irq(tqp_vectors->vector_irq, tqp_vectors);
 		tqp_vectors->irq_init_flag = HNS3_VECTOR_NOT_INITED;
@@ -119,14 +147,17 @@
 		tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0';
 
 		ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0,
-				  tqp_vectors->name,
-				       tqp_vectors);
+				  tqp_vectors->name, tqp_vectors);
 		if (ret) {
 			netdev_err(priv->netdev, "request irq(%d) fail\n",
 				   tqp_vectors->vector_irq);
+			hns3_nic_uninit_irq(priv);
 			return ret;
 		}
 
+		irq_set_affinity_hint(tqp_vectors->vector_irq,
+				      &tqp_vectors->affinity_mask);
+
 		tqp_vectors->irq_init_flag = HNS3_VECTOR_INITED;
 	}
 
@@ -195,24 +226,18 @@
 static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 				   struct hns3_nic_priv *priv)
 {
-	struct hnae3_handle *h = priv->ae_handle;
-
 	/* initialize the configuration for interrupt coalescing.
 	 * 1. GL (Interrupt Gap Limiter)
 	 * 2. RL (Interrupt Rate Limiter)
+	 *
+	 * Default: enable interrupt coalescing self-adaptive and GL
 	 */
-
-	/* Default: enable interrupt coalescing self-adaptive and GL */
 	tqp_vector->tx_group.coal.gl_adapt_enable = 1;
 	tqp_vector->rx_group.coal.gl_adapt_enable = 1;
 
 	tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
 	tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
 
-	/* Default: disable RL */
-	h->kinfo.int_rl_setting = 0;
-
-	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 	tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
 	tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
 }
@@ -260,8 +285,7 @@
 	ret = netif_set_real_num_tx_queues(netdev, queue_size);
 	if (ret) {
 		netdev_err(netdev,
-			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
-			   ret);
+			   "netif_set_real_num_tx_queues fail, ret=%d!\n", ret);
 		return ret;
 	}
 
@@ -277,12 +301,64 @@
 
 static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
 {
-	u16 free_tqps, max_rss_size, max_tqps;
+	u16 alloc_tqps, max_rss_size, rss_size;
 
-	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
-	max_tqps = h->kinfo.num_tc * max_rss_size;
+	h->ae_algo->ops->get_tqps_and_rss_info(h, &alloc_tqps, &max_rss_size);
+	rss_size = alloc_tqps / h->kinfo.num_tc;
 
-	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+	return min_t(u16, rss_size, max_rss_size);
+}
+
+static void hns3_tqp_enable(struct hnae3_queue *tqp)
+{
+	u32 rcb_reg;
+
+	rcb_reg = hns3_read_dev(tqp, HNS3_RING_EN_REG);
+	rcb_reg |= BIT(HNS3_RING_EN_B);
+	hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
+}
+
+static void hns3_tqp_disable(struct hnae3_queue *tqp)
+{
+	u32 rcb_reg;
+
+	rcb_reg = hns3_read_dev(tqp, HNS3_RING_EN_REG);
+	rcb_reg &= ~BIT(HNS3_RING_EN_B);
+	hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
+}
+
+static void hns3_free_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int hns3_set_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int i, ret;
+
+	if (!netdev->rx_cpu_rmap) {
+		netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num);
+		if (!netdev->rx_cpu_rmap)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				       tqp_vector->vector_irq);
+		if (ret) {
+			hns3_free_rx_cpu_rmap(netdev);
+			return ret;
+		}
+	}
+#endif
+	return 0;
 }
 
 static int hns3_nic_net_up(struct net_device *netdev)
@@ -296,35 +372,72 @@
 	if (ret)
 		return ret;
 
+	/* the device can work without cpu rmap, only aRFS needs it */
+	ret = hns3_set_rx_cpu_rmap(netdev);
+	if (ret)
+		netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
+
 	/* get irq resource for all vectors */
 	ret = hns3_nic_init_irq(priv);
 	if (ret) {
-		netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
-		return ret;
+		netdev_err(netdev, "init irq failed! ret=%d\n", ret);
+		goto free_rmap;
 	}
 
+	clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+
 	/* enable the vectors */
 	for (i = 0; i < priv->vector_num; i++)
 		hns3_vector_enable(&priv->tqp_vector[i]);
 
+	/* enable rcb */
+	for (j = 0; j < h->kinfo.num_tqps; j++)
+		hns3_tqp_enable(h->kinfo.tqp[j]);
+
 	/* start the ae_dev */
 	ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0;
 	if (ret)
 		goto out_start_err;
 
-	clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
-
 	return 0;
 
 out_start_err:
+	set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+	while (j--)
+		hns3_tqp_disable(h->kinfo.tqp[j]);
+
 	for (j = i - 1; j >= 0; j--)
 		hns3_vector_disable(&priv->tqp_vector[j]);
 
 	hns3_nic_uninit_irq(priv);
-
+free_rmap:
+	hns3_free_rx_cpu_rmap(netdev);
 	return ret;
 }
 
+static void hns3_config_xps(struct hns3_nic_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		struct hns3_enet_tqp_vector *tqp_vector = &priv->tqp_vector[i];
+		struct hns3_enet_ring *ring = tqp_vector->tx_group.ring;
+
+		while (ring) {
+			int ret;
+
+			ret = netif_set_xps_queue(priv->netdev,
+						  &tqp_vector->affinity_mask,
+						  ring->tqp->tqp_index);
+			if (ret)
+				netdev_warn(priv->netdev,
+					    "set xps queue failed: %d", ret);
+
+			ring = ring->next;
+		}
+	}
+}
+
 static int hns3_nic_net_open(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -332,6 +445,9 @@
 	struct hnae3_knic_private_info *kinfo;
 	int i, ret;
 
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
 	netif_carrier_off(netdev);
 
 	ret = hns3_nic_set_real_num_queue(netdev);
@@ -340,47 +456,86 @@
 
 	ret = hns3_nic_net_up(netdev);
 	if (ret) {
-		netdev_err(netdev,
-			   "hns net up fail, ret=%d!\n", ret);
+		netdev_err(netdev, "net up fail, ret=%d!\n", ret);
 		return ret;
 	}
 
 	kinfo = &h->kinfo;
-	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
-		netdev_set_prio_tc_map(netdev, i,
-				       kinfo->prio_tc[i]);
-	}
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+		netdev_set_prio_tc_map(netdev, i, kinfo->prio_tc[i]);
 
-	priv->ae_handle->last_reset_time = jiffies;
+	if (h->ae_algo->ops->set_timer_task)
+		h->ae_algo->ops->set_timer_task(priv->ae_handle, true);
+
+	hns3_config_xps(priv);
+
+	netif_dbg(h, drv, netdev, "net open\n");
+
 	return 0;
 }
 
+static void hns3_reset_tx_queue(struct hnae3_handle *h)
+{
+	struct net_device *ndev = h->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct netdev_queue *dev_queue;
+	u32 i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		dev_queue = netdev_get_tx_queue(ndev,
+						priv->ring_data[i].queue_index);
+		netdev_tx_reset_queue(dev_queue);
+	}
+}
+
 static void hns3_nic_net_down(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	const struct hnae3_ae_ops *ops;
 	int i;
 
-	if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
-		return;
-
 	/* disable vectors */
 	for (i = 0; i < priv->vector_num; i++)
 		hns3_vector_disable(&priv->tqp_vector[i]);
 
+	/* disable rcb */
+	for (i = 0; i < h->kinfo.num_tqps; i++)
+		hns3_tqp_disable(h->kinfo.tqp[i]);
+
 	/* stop ae_dev */
 	ops = priv->ae_handle->ae_algo->ops;
 	if (ops->stop)
 		ops->stop(priv->ae_handle);
 
+	hns3_free_rx_cpu_rmap(netdev);
+
 	/* free irq resources */
 	hns3_nic_uninit_irq(priv);
 
-	hns3_clear_all_ring(priv->ae_handle);
+	/* delay ring buffer clearing to hns3_reset_notify_uninit_enet
+	 * during reset process, because driver may not be able
+	 * to disable the ring through firmware when downing the netdev.
+	 */
+	if (!hns3_nic_resetting(netdev))
+		hns3_clear_all_ring(priv->ae_handle, false);
+
+	hns3_reset_tx_queue(priv->ae_handle);
 }
 
 static int hns3_nic_net_stop(struct net_device *netdev)
 {
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+		return 0;
+
+	netif_dbg(h, drv, netdev, "net stop\n");
+
+	if (h->ae_algo->ops->set_timer_task)
+		h->ae_algo->ops->set_timer_task(priv->ae_handle, false);
+
 	netif_tx_stop_all_queues(netdev);
 	netif_carrier_off(netdev);
 
@@ -433,26 +588,83 @@
 	return 0;
 }
 
+static u8 hns3_get_netdev_flags(struct net_device *netdev)
+{
+	u8 flags = 0;
+
+	if (netdev->flags & IFF_PROMISC) {
+		flags = HNAE3_USER_UPE | HNAE3_USER_MPE | HNAE3_BPE;
+	} else {
+		flags |= HNAE3_VLAN_FLTR;
+		if (netdev->flags & IFF_ALLMULTI)
+			flags |= HNAE3_USER_MPE;
+	}
+
+	return flags;
+}
+
 static void hns3_nic_set_rx_mode(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	u8 new_flags;
+	int ret;
+
+	new_flags = hns3_get_netdev_flags(netdev);
+
+	ret = __dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync);
+	if (ret) {
+		netdev_err(netdev, "sync uc address fail\n");
+		if (ret == -ENOSPC)
+			new_flags |= HNAE3_OVERFLOW_UPE;
+	}
+
+	if (netdev->flags & IFF_MULTICAST) {
+		ret = __dev_mc_sync(netdev, hns3_nic_mc_sync,
+				    hns3_nic_mc_unsync);
+		if (ret) {
+			netdev_err(netdev, "sync mc address fail\n");
+			if (ret == -ENOSPC)
+				new_flags |= HNAE3_OVERFLOW_MPE;
+		}
+	}
+
+	/* User mode Promisc mode enable and vlan filtering is disabled to
+	 * let all packets in. MAC-VLAN Table overflow Promisc enabled and
+	 * vlan fitering is enabled
+	 */
+	hns3_enable_vlan_filter(netdev, new_flags & HNAE3_VLAN_FLTR);
+	h->netdev_flags = new_flags;
+	hns3_update_promisc_mode(netdev, new_flags);
+}
+
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
 
 	if (h->ae_algo->ops->set_promisc_mode) {
-		if (netdev->flags & IFF_PROMISC)
-			h->ae_algo->ops->set_promisc_mode(h, true, true);
-		else if (netdev->flags & IFF_ALLMULTI)
-			h->ae_algo->ops->set_promisc_mode(h, false, true);
-		else
-			h->ae_algo->ops->set_promisc_mode(h, false, false);
+		return h->ae_algo->ops->set_promisc_mode(h,
+						promisc_flags & HNAE3_UPE,
+						promisc_flags & HNAE3_MPE);
 	}
-	if (__dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync))
-		netdev_err(netdev, "sync uc address fail\n");
-	if (netdev->flags & IFF_MULTICAST) {
-		if (__dev_mc_sync(netdev, hns3_nic_mc_sync, hns3_nic_mc_unsync))
-			netdev_err(netdev, "sync mc address fail\n");
 
-		if (h->ae_algo->ops->update_mta_status)
-			h->ae_algo->ops->update_mta_status(h);
+	return 0;
+}
+
+void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	bool last_state;
+
+	if (h->pdev->revision >= 0x21 && h->ae_algo->ops->enable_vlan_filter) {
+		last_state = h->netdev_flags & HNAE3_VLAN_FLTR ? true : false;
+		if (enable != last_state) {
+			netdev_info(netdev,
+				    "%s vlan filter\n",
+				    enable ? "enable" : "disable");
+			h->ae_algo->ops->enable_vlan_filter(h, enable);
+		}
 	}
 }
 
@@ -469,7 +681,7 @@
 		return 0;
 
 	ret = skb_cow_head(skb, 0);
-	if (ret)
+	if (unlikely(ret))
 		return ret;
 
 	l3.hdr = skb_network_header(skb);
@@ -481,7 +693,7 @@
 	if (l3.v4->version == 4)
 		l3.v4->check = 0;
 
-	/* tunnel packet.*/
+	/* tunnel packet */
 	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
 					 SKB_GSO_GRE_CSUM |
 					 SKB_GSO_UDP_TUNNEL |
@@ -506,19 +718,18 @@
 			l3.v4->check = 0;
 	}
 
-	/* normal or tunnel packet*/
+	/* normal or tunnel packet */
 	l4_offset = l4.hdr - skb->data;
-	hdr_len = (l4.tcp->doff * 4) + l4_offset;
+	hdr_len = (l4.tcp->doff << 2) + l4_offset;
 
-	/* remove payload length from inner pseudo checksum when tso*/
+	/* remove payload length from inner pseudo checksum when tso */
 	l4_paylen = skb->len - l4_offset;
 	csum_replace_by_diff(&l4.tcp->check,
 			     (__force __wsum)htonl(l4_paylen));
 
 	/* find the txbd field values */
 	*paylen = skb->len - hdr_len;
-	hnae3_set_bit(*type_cs_vlan_tso,
-		      HNS3_TXD_TSO_B, 1);
+	hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_TSO_B, 1);
 
 	/* get MSS for TSO */
 	*mss = skb_shinfo(skb)->gso_size;
@@ -529,11 +740,7 @@
 static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
 				u8 *il4_proto)
 {
-	union {
-		struct iphdr *v4;
-		struct ipv6hdr *v6;
-		unsigned char *hdr;
-	} l3;
+	union l3_hdr_info l3;
 	unsigned char *l4_hdr;
 	unsigned char *exthdr;
 	u8 l4_proto_tmp;
@@ -582,110 +789,6 @@
 	return 0;
 }
 
-static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
-				u8 il4_proto, u32 *type_cs_vlan_tso,
-				u32 *ol_type_vlan_len_msec)
-{
-	union {
-		struct iphdr *v4;
-		struct ipv6hdr *v6;
-		unsigned char *hdr;
-	} l3;
-	union {
-		struct tcphdr *tcp;
-		struct udphdr *udp;
-		struct gre_base_hdr *gre;
-		unsigned char *hdr;
-	} l4;
-	unsigned char *l2_hdr;
-	u8 l4_proto = ol4_proto;
-	u32 ol2_len;
-	u32 ol3_len;
-	u32 ol4_len;
-	u32 l2_len;
-	u32 l3_len;
-
-	l3.hdr = skb_network_header(skb);
-	l4.hdr = skb_transport_header(skb);
-
-	/* compute L2 header size for normal packet, defined in 2 Bytes */
-	l2_len = l3.hdr - skb->data;
-	hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
-			HNS3_TXD_L2LEN_S, l2_len >> 1);
-
-	/* tunnel packet*/
-	if (skb->encapsulation) {
-		/* compute OL2 header size, defined in 2 Bytes */
-		ol2_len = l2_len;
-		hnae3_set_field(*ol_type_vlan_len_msec,
-				HNS3_TXD_L2LEN_M,
-				HNS3_TXD_L2LEN_S, ol2_len >> 1);
-
-		/* compute OL3 header size, defined in 4 Bytes */
-		ol3_len = l4.hdr - l3.hdr;
-		hnae3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_M,
-				HNS3_TXD_L3LEN_S, ol3_len >> 2);
-
-		/* MAC in UDP, MAC in GRE (0x6558)*/
-		if ((ol4_proto == IPPROTO_UDP) || (ol4_proto == IPPROTO_GRE)) {
-			/* switch MAC header ptr from outer to inner header.*/
-			l2_hdr = skb_inner_mac_header(skb);
-
-			/* compute OL4 header size, defined in 4 Bytes. */
-			ol4_len = l2_hdr - l4.hdr;
-			hnae3_set_field(*ol_type_vlan_len_msec,
-					HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
-					ol4_len >> 2);
-
-			/* switch IP header ptr from outer to inner header */
-			l3.hdr = skb_inner_network_header(skb);
-
-			/* compute inner l2 header size, defined in 2 Bytes. */
-			l2_len = l3.hdr - l2_hdr;
-			hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
-					HNS3_TXD_L2LEN_S, l2_len >> 1);
-		} else {
-			/* skb packet types not supported by hardware,
-			 * txbd len fild doesn't be filled.
-			 */
-			return;
-		}
-
-		/* switch L4 header pointer from outer to inner */
-		l4.hdr = skb_inner_transport_header(skb);
-
-		l4_proto = il4_proto;
-	}
-
-	/* compute inner(/normal) L3 header size, defined in 4 Bytes */
-	l3_len = l4.hdr - l3.hdr;
-	hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_M,
-			HNS3_TXD_L3LEN_S, l3_len >> 2);
-
-	/* compute inner(/normal) L4 header size, defined in 4 Bytes */
-	switch (l4_proto) {
-	case IPPROTO_TCP:
-		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-				HNS3_TXD_L4LEN_S, l4.tcp->doff);
-		break;
-	case IPPROTO_SCTP:
-		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-				HNS3_TXD_L4LEN_S,
-				(sizeof(struct sctphdr) >> 2));
-		break;
-	case IPPROTO_UDP:
-		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-				HNS3_TXD_L4LEN_S,
-				(sizeof(struct udphdr) >> 2));
-		break;
-	default:
-		/* skb packet types not supported by hardware,
-		 * txbd len fild doesn't be filled.
-		 */
-		return;
-	}
-}
-
 /* when skb->encapsulation is 0, skb->ip_summed is CHECKSUM_PARTIAL
  * and it is udp packet, which has a dest port as the IANA assigned.
  * the hardware is expected to do the checksum offload, but the
@@ -694,17 +797,12 @@
  */
 static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
 {
-#define IANA_VXLAN_PORT	4789
-	union {
-		struct tcphdr *tcp;
-		struct udphdr *udp;
-		struct gre_base_hdr *gre;
-		unsigned char *hdr;
-	} l4;
+	union l4_hdr_info l4;
 
 	l4.hdr = skb_transport_header(skb);
 
-	if (!(!skb->encapsulation && l4.udp->dest == htons(IANA_VXLAN_PORT)))
+	if (!(!skb->encapsulation &&
+	      l4.udp->dest == htons(IANA_VXLAN_UDP_PORT)))
 		return false;
 
 	skb_checksum_help(skb);
@@ -712,54 +810,71 @@
 	return true;
 }
 
-static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
-				   u8 il4_proto, u32 *type_cs_vlan_tso,
-				   u32 *ol_type_vlan_len_msec)
+static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
+				  u32 *ol_type_vlan_len_msec)
 {
-	union {
-		struct iphdr *v4;
-		struct ipv6hdr *v6;
-		unsigned char *hdr;
-	} l3;
-	u32 l4_proto = ol4_proto;
+	u32 l2_len, l3_len, l4_len;
+	unsigned char *il2_hdr;
+	union l3_hdr_info l3;
+	union l4_hdr_info l4;
 
 	l3.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
 
-	/* define OL3 type and tunnel type(OL4).*/
+	/* compute OL2 header size, defined in 2 Bytes */
+	l2_len = l3.hdr - skb->data;
+	hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L2LEN_S, l2_len >> 1);
+
+	/* compute OL3 header size, defined in 4 Bytes */
+	l3_len = l4.hdr - l3.hdr;
+	hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_S, l3_len >> 2);
+
+	il2_hdr = skb_inner_mac_header(skb);
+	/* compute OL4 header size, defined in 4 Bytes */
+	l4_len = il2_hdr - l4.hdr;
+	hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_S, l4_len >> 2);
+
+	/* define outer network header type */
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb_is_gso(skb))
+			hns3_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_OL3T_S,
+				       HNS3_OL3T_IPV4_CSUM);
+		else
+			hns3_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_OL3T_S,
+				       HNS3_OL3T_IPV4_NO_CSUM);
+
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S,
+			       HNS3_OL3T_IPV6);
+	}
+
+	if (ol4_proto == IPPROTO_UDP)
+		hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_TUNTYPE_S,
+			       HNS3_TUN_MAC_IN_UDP);
+	else if (ol4_proto == IPPROTO_GRE)
+		hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_TUNTYPE_S,
+			       HNS3_TUN_NVGRE);
+}
+
+static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
+			   u8 il4_proto, u32 *type_cs_vlan_tso,
+			   u32 *ol_type_vlan_len_msec)
+{
+	unsigned char *l2_hdr = skb->data;
+	u32 l4_proto = ol4_proto;
+	union l4_hdr_info l4;
+	union l3_hdr_info l3;
+	u32 l2_len, l3_len;
+
+	l4.hdr = skb_transport_header(skb);
+	l3.hdr = skb_network_header(skb);
+
+	/* handle encapsulation skb */
 	if (skb->encapsulation) {
-		/* define outer network header type.*/
-		if (skb->protocol == htons(ETH_P_IP)) {
-			if (skb_is_gso(skb))
-				hnae3_set_field(*ol_type_vlan_len_msec,
-						HNS3_TXD_OL3T_M,
-						HNS3_TXD_OL3T_S,
-						HNS3_OL3T_IPV4_CSUM);
-			else
-				hnae3_set_field(*ol_type_vlan_len_msec,
-						HNS3_TXD_OL3T_M,
-						HNS3_TXD_OL3T_S,
-						HNS3_OL3T_IPV4_NO_CSUM);
-
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			hnae3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_M,
-					HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6);
-		}
-
-		/* define tunnel type(OL4).*/
-		switch (l4_proto) {
-		case IPPROTO_UDP:
-			hnae3_set_field(*ol_type_vlan_len_msec,
-					HNS3_TXD_TUNTYPE_M,
-					HNS3_TXD_TUNTYPE_S,
-					HNS3_TUN_MAC_IN_UDP);
-			break;
-		case IPPROTO_GRE:
-			hnae3_set_field(*ol_type_vlan_len_msec,
-					HNS3_TXD_TUNTYPE_M,
-					HNS3_TXD_TUNTYPE_S,
-					HNS3_TUN_NVGRE);
-			break;
-		default:
+		/* If this is a not UDP/GRE encapsulation skb */
+		if (!(ol4_proto == IPPROTO_UDP || ol4_proto == IPPROTO_GRE)) {
 			/* drop the skb tunnel packet if hardware don't support,
 			 * because hardware can't calculate csum when TSO.
 			 */
@@ -773,48 +888,62 @@
 			return 0;
 		}
 
+		hns3_set_outer_l2l3l4(skb, ol4_proto, ol_type_vlan_len_msec);
+
+		/* switch to inner header */
+		l2_hdr = skb_inner_mac_header(skb);
 		l3.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
 		l4_proto = il4_proto;
 	}
 
 	if (l3.v4->version == 4) {
-		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
-				HNS3_TXD_L3T_S, HNS3_L3T_IPV4);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_S,
+			       HNS3_L3T_IPV4);
 
 		/* the stack computes the IP header already, the only time we
 		 * need the hardware to recompute it is in the case of TSO.
 		 */
 		if (skb_is_gso(skb))
-			hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
+			hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
 	} else if (l3.v6->version == 6) {
-		hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
-				HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_S,
+			       HNS3_L3T_IPV6);
 	}
 
+	/* compute inner(/normal) L2 header size, defined in 2 Bytes */
+	l2_len = l3.hdr - l2_hdr;
+	hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_S, l2_len >> 1);
+
+	/* compute inner(/normal) L3 header size, defined in 4 Bytes */
+	l3_len = l4.hdr - l3.hdr;
+	hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_S, l3_len >> 2);
+
+	/* compute inner(/normal) L4 header size, defined in 4 Bytes */
 	switch (l4_proto) {
 	case IPPROTO_TCP:
-		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
-		hnae3_set_field(*type_cs_vlan_tso,
-				HNS3_TXD_L4T_M,
-				HNS3_TXD_L4T_S,
-				HNS3_L4T_TCP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
+			       HNS3_L4T_TCP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_S,
+			       l4.tcp->doff);
 		break;
 	case IPPROTO_UDP:
 		if (hns3_tunnel_csum_bug(skb))
 			break;
 
-		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
-		hnae3_set_field(*type_cs_vlan_tso,
-				HNS3_TXD_L4T_M,
-				HNS3_TXD_L4T_S,
-				HNS3_L4T_UDP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
+			       HNS3_L4T_UDP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_S,
+			       (sizeof(struct udphdr) >> 2));
 		break;
 	case IPPROTO_SCTP:
-		hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
-		hnae3_set_field(*type_cs_vlan_tso,
-				HNS3_TXD_L4T_M,
-				HNS3_TXD_L4T_S,
-				HNS3_L4T_SCTP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
+			       HNS3_L4T_SCTP);
+		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_S,
+			       (sizeof(struct sctphdr) >> 2));
 		break;
 	default:
 		/* drop the skb tunnel packet if hardware don't support,
@@ -836,25 +965,32 @@
 static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 {
 	/* Config bd buffer end */
-	hnae3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
-			HNS3_TXD_BDTYPE_S, 0);
-	hnae3_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
-	hnae3_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
-	hnae3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
+	if (!!frag_end)
+		hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U);
+	hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U);
 }
 
-static int hns3_fill_desc_vtags(struct sk_buff *skb,
-				struct hns3_enet_ring *tx_ring,
-				u32 *inner_vlan_flag,
-				u32 *out_vlan_flag,
-				u16 *inner_vtag,
-				u16 *out_vtag)
+static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring,
+			     struct sk_buff *skb)
 {
-#define HNS3_TX_VLAN_PRIO_SHIFT 13
+	struct hnae3_handle *handle = tx_ring->tqp->handle;
+	struct vlan_ethhdr *vhdr;
+	int rc;
+
+	if (!(skb->protocol == htons(ETH_P_8021Q) ||
+	      skb_vlan_tag_present(skb)))
+		return 0;
+
+	/* Since HW limitation, if port based insert VLAN enabled, only one VLAN
+	 * header is allowed in skb, otherwise it will cause RAS error.
+	 */
+	if (unlikely(skb_vlan_tagged_multi(skb) &&
+		     handle->port_base_vlan_state ==
+		     HNAE3_PORT_BASE_VLAN_ENABLE))
+		return -EINVAL;
 
 	if (skb->protocol == htons(ETH_P_8021Q) &&
-	    !(tx_ring->tqp->handle->kinfo.netdev->features &
-	    NETIF_F_HW_VLAN_CTAG_TX)) {
+	    !(handle->kinfo.netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
 		/* When HW VLAN acceleration is turned off, and the stack
 		 * sets the protocol to 802.1q, the driver just need to
 		 * set the protocol to the encapsulated ethertype.
@@ -864,195 +1000,291 @@
 	}
 
 	if (skb_vlan_tag_present(skb)) {
-		u16 vlan_tag;
-
-		vlan_tag = skb_vlan_tag_get(skb);
-		vlan_tag |= (skb->priority & 0x7) << HNS3_TX_VLAN_PRIO_SHIFT;
-
 		/* Based on hw strategy, use out_vtag in two layer tag case,
 		 * and use inner_vtag in one tag case.
 		 */
-		if (skb->protocol == htons(ETH_P_8021Q)) {
-			hnae3_set_bit(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
-			*out_vtag = vlan_tag;
-		} else {
-			hnae3_set_bit(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
-			*inner_vtag = vlan_tag;
-		}
-	} else if (skb->protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *vhdr;
-		int rc;
+		if (skb->protocol == htons(ETH_P_8021Q) &&
+		    handle->port_base_vlan_state ==
+		    HNAE3_PORT_BASE_VLAN_DISABLE)
+			rc = HNS3_OUTER_VLAN_TAG;
+		else
+			rc = HNS3_INNER_VLAN_TAG;
 
-		rc = skb_cow_head(skb, 0);
-		if (rc < 0)
-			return rc;
-		vhdr = (struct vlan_ethhdr *)skb->data;
-		vhdr->h_vlan_TCI |= cpu_to_be16((skb->priority & 0x7)
-					<< HNS3_TX_VLAN_PRIO_SHIFT);
+		skb->protocol = vlan_get_protocol(skb);
+		return rc;
 	}
 
+	rc = skb_cow_head(skb, 0);
+	if (unlikely(rc < 0))
+		return rc;
+
+	vhdr = (struct vlan_ethhdr *)skb->data;
+	vhdr->h_vlan_TCI |= cpu_to_be16((skb->priority << VLAN_PRIO_SHIFT)
+					 & VLAN_PRIO_MASK);
+
 	skb->protocol = vlan_get_protocol(skb);
 	return 0;
 }
 
+static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
+			      struct sk_buff *skb, struct hns3_desc *desc)
+{
+	u32 ol_type_vlan_len_msec = 0;
+	u32 type_cs_vlan_tso = 0;
+	u32 paylen = skb->len;
+	u16 inner_vtag = 0;
+	u16 out_vtag = 0;
+	u16 mss = 0;
+	int ret;
+
+	ret = hns3_handle_vtags(ring, skb);
+	if (unlikely(ret < 0)) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_vlan_err++;
+		u64_stats_update_end(&ring->syncp);
+		return ret;
+	} else if (ret == HNS3_INNER_VLAN_TAG) {
+		inner_vtag = skb_vlan_tag_get(skb);
+		inner_vtag |= (skb->priority << VLAN_PRIO_SHIFT) &
+				VLAN_PRIO_MASK;
+		hns3_set_field(type_cs_vlan_tso, HNS3_TXD_VLAN_B, 1);
+	} else if (ret == HNS3_OUTER_VLAN_TAG) {
+		out_vtag = skb_vlan_tag_get(skb);
+		out_vtag |= (skb->priority << VLAN_PRIO_SHIFT) &
+				VLAN_PRIO_MASK;
+		hns3_set_field(ol_type_vlan_len_msec, HNS3_TXD_OVLAN_B,
+			       1);
+	}
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		u8 ol4_proto, il4_proto;
+
+		skb_reset_mac_len(skb);
+
+		ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
+		if (unlikely(ret)) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.tx_l4_proto_err++;
+			u64_stats_update_end(&ring->syncp);
+			return ret;
+		}
+
+		ret = hns3_set_l2l3l4(skb, ol4_proto, il4_proto,
+				      &type_cs_vlan_tso,
+				      &ol_type_vlan_len_msec);
+		if (unlikely(ret)) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.tx_l2l3l4_err++;
+			u64_stats_update_end(&ring->syncp);
+			return ret;
+		}
+
+		ret = hns3_set_tso(skb, &paylen, &mss,
+				   &type_cs_vlan_tso);
+		if (unlikely(ret)) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.tx_tso_err++;
+			u64_stats_update_end(&ring->syncp);
+			return ret;
+		}
+	}
+
+	/* Set txbd */
+	desc->tx.ol_type_vlan_len_msec =
+		cpu_to_le32(ol_type_vlan_len_msec);
+	desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso);
+	desc->tx.paylen = cpu_to_le32(paylen);
+	desc->tx.mss = cpu_to_le16(mss);
+	desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
+	desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag);
+
+	return 0;
+}
+
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  int size, dma_addr_t dma, int frag_end,
+			  unsigned int size, int frag_end,
 			  enum hns_desc_type type)
 {
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
-	u32 ol_type_vlan_len_msec = 0;
-	u16 bdtp_fe_sc_vld_ra_ri = 0;
-	u32 type_cs_vlan_tso = 0;
-	struct sk_buff *skb;
-	u16 inner_vtag = 0;
-	u16 out_vtag = 0;
-	u32 paylen = 0;
-	u16 mss = 0;
-	u8 ol4_proto;
-	u8 il4_proto;
-	int ret;
-
-	/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
-	desc_cb->priv = priv;
-	desc_cb->length = size;
-	desc_cb->dma = dma;
-	desc_cb->type = type;
-
-	/* now, fill the descriptor */
-	desc->addr = cpu_to_le64(dma);
-	desc->tx.send_size = cpu_to_le16((u16)size);
-	hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
-	desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+	struct device *dev = ring_to_dev(ring);
+	skb_frag_t *frag;
+	unsigned int frag_buf_num;
+	int k, sizeoflast;
+	dma_addr_t dma;
 
 	if (type == DESC_TYPE_SKB) {
-		skb = (struct sk_buff *)priv;
-		paylen = skb->len;
+		struct sk_buff *skb = (struct sk_buff *)priv;
+		int ret;
 
-		ret = hns3_fill_desc_vtags(skb, ring, &type_cs_vlan_tso,
-					   &ol_type_vlan_len_msec,
-					   &inner_vtag, &out_vtag);
+		ret = hns3_fill_skb_desc(ring, skb, desc);
 		if (unlikely(ret))
 			return ret;
 
-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			skb_reset_mac_len(skb);
-
-			ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
-			if (ret)
-				return ret;
-			hns3_set_l2l3l4_len(skb, ol4_proto, il4_proto,
-					    &type_cs_vlan_tso,
-					    &ol_type_vlan_len_msec);
-			ret = hns3_set_l3l4_type_csum(skb, ol4_proto, il4_proto,
-						      &type_cs_vlan_tso,
-						      &ol_type_vlan_len_msec);
-			if (ret)
-				return ret;
-
-			ret = hns3_set_tso(skb, &paylen, &mss,
-					   &type_cs_vlan_tso);
-			if (ret)
-				return ret;
-		}
-
-		/* Set txbd */
-		desc->tx.ol_type_vlan_len_msec =
-			cpu_to_le32(ol_type_vlan_len_msec);
-		desc->tx.type_cs_vlan_tso_len =
-			cpu_to_le32(type_cs_vlan_tso);
-		desc->tx.paylen = cpu_to_le32(paylen);
-		desc->tx.mss = cpu_to_le16(mss);
-		desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
-		desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag);
+		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else {
+		frag = (skb_frag_t *)priv;
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
 	}
 
-	/* move ring pointer to next.*/
-	ring_ptr_move_fw(ring, next_to_use);
+	if (unlikely(dma_mapping_error(dev, dma))) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
 
-	return 0;
-}
+	desc_cb->length = size;
 
-static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv,
-			      int size, dma_addr_t dma, int frag_end,
-			      enum hns_desc_type type)
-{
-	unsigned int frag_buf_num;
-	unsigned int k;
-	int sizeoflast;
-	int ret;
+	if (likely(size <= HNS3_MAX_BD_SIZE)) {
+		u16 bdtp_fe_sc_vld_ra_ri = 0;
 
-	frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
-	sizeoflast = size % HNS3_MAX_BD_SIZE;
+		desc_cb->priv = priv;
+		desc_cb->dma = dma;
+		desc_cb->type = type;
+		desc->addr = cpu_to_le64(dma);
+		desc->tx.send_size = cpu_to_le16(size);
+		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
+		desc->tx.bdtp_fe_sc_vld_ra_ri =
+			cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+		ring_ptr_move_fw(ring, next_to_use);
+		return 0;
+	}
+
+	frag_buf_num = hns3_tx_bd_count(size);
+	sizeoflast = size & HNS3_TX_LAST_SIZE_M;
 	sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
 
-	/* When the frag size is bigger than hardware, split this frag */
+	/* When frag size is bigger than hardware limit, split this frag */
 	for (k = 0; k < frag_buf_num; k++) {
-		ret = hns3_fill_desc(ring, priv,
-				     (k == frag_buf_num - 1) ?
-				sizeoflast : HNS3_MAX_BD_SIZE,
-				dma + HNS3_MAX_BD_SIZE * k,
-				frag_end && (k == frag_buf_num - 1) ? 1 : 0,
-				(type == DESC_TYPE_SKB && !k) ?
-					DESC_TYPE_SKB : DESC_TYPE_PAGE);
-		if (ret)
-			return ret;
+		u16 bdtp_fe_sc_vld_ra_ri = 0;
+
+		/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
+		desc_cb->priv = priv;
+		desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
+		desc_cb->type = (type == DESC_TYPE_SKB && !k) ?
+				DESC_TYPE_SKB : DESC_TYPE_PAGE;
+
+		/* now, fill the descriptor */
+		desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
+		desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
+				     (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
+		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
+				       frag_end && (k == frag_buf_num - 1) ?
+						1 : 0);
+		desc->tx.bdtp_fe_sc_vld_ra_ri =
+				cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+		/* move ring pointer to next */
+		ring_ptr_move_fw(ring, next_to_use);
+
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+		desc = &ring->desc[ring->next_to_use];
 	}
 
 	return 0;
 }
 
-static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum,
-				   struct hns3_enet_ring *ring)
+static unsigned int hns3_nic_bd_num(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *out_skb;
-	struct skb_frag_struct *frag;
-	int bdnum_for_frag;
-	int frag_num;
-	int buf_num;
-	int size;
+	unsigned int bd_num;
 	int i;
 
-	size = skb_headlen(skb);
-	buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+	/* if the total len is within the max bd limit */
+	if (likely(skb->len <= HNS3_MAX_BD_SIZE))
+		return skb_shinfo(skb)->nr_frags + 1;
 
-	frag_num = skb_shinfo(skb)->nr_frags;
-	for (i = 0; i < frag_num; i++) {
-		frag = &skb_shinfo(skb)->frags[i];
-		size = skb_frag_size(frag);
-		bdnum_for_frag =
-			(size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
-		if (bdnum_for_frag > HNS3_MAX_BD_PER_FRAG)
-			return -ENOMEM;
+	bd_num = hns3_tx_bd_count(skb_headlen(skb));
 
-		buf_num += bdnum_for_frag;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		bd_num += hns3_tx_bd_count(skb_frag_size(frag));
 	}
 
-	if (buf_num > ring_space(ring))
-		return -EBUSY;
-
-	*bnum = buf_num;
-	return 0;
+	return bd_num;
 }
 
-static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
-				  struct hns3_enet_ring *ring)
+static unsigned int hns3_gso_hdr_len(struct sk_buff *skb)
+{
+	if (!skb->encapsulation)
+		return skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
+}
+
+/* HW need every continuous 8 buffer data to be larger than MSS,
+ * we simplify it by ensuring skb_headlen + the first continuous
+ * 7 frags to to be larger than gso header len + mss, and the remaining
+ * continuous 7 frags to be larger than MSS except the last 7 frags.
+ */
+static bool hns3_skb_need_linearized(struct sk_buff *skb)
+{
+	int bd_limit = HNS3_MAX_BD_NUM_NORMAL - 1;
+	unsigned int tot_len = 0;
+	int i;
+
+	for (i = 0; i < bd_limit; i++)
+		tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+	/* ensure headlen + the first 7 frags is greater than mss + header
+	 * and the first 7 frags is greater than mss.
+	 */
+	if (((tot_len + skb_headlen(skb)) < (skb_shinfo(skb)->gso_size +
+	    hns3_gso_hdr_len(skb))) || (tot_len < skb_shinfo(skb)->gso_size))
+		return true;
+
+	/* ensure the remaining continuous 7 buffer is greater than mss */
+	for (i = 0; i < (skb_shinfo(skb)->nr_frags - bd_limit - 1); i++) {
+		tot_len -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i + bd_limit]);
+
+		if (tot_len < skb_shinfo(skb)->gso_size)
+			return true;
+	}
+
+	return false;
+}
+
+static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
+				  struct sk_buff **out_skb)
 {
 	struct sk_buff *skb = *out_skb;
-	int buf_num;
+	unsigned int bd_num;
 
-	/* No. of segments (plus a header) */
-	buf_num = skb_shinfo(skb)->nr_frags + 1;
+	bd_num = hns3_nic_bd_num(skb);
+	if (unlikely(bd_num > HNS3_MAX_BD_NUM_NORMAL)) {
+		struct sk_buff *new_skb;
 
-	if (buf_num > ring_space(ring))
+		if (skb_is_gso(skb) && bd_num <= HNS3_MAX_BD_NUM_TSO &&
+		    !hns3_skb_need_linearized(skb))
+			goto out;
+
+		/* manual split the send packet */
+		new_skb = skb_copy(skb, GFP_ATOMIC);
+		if (!new_skb)
+			return -ENOMEM;
+		dev_kfree_skb_any(skb);
+		*out_skb = new_skb;
+
+		bd_num = hns3_nic_bd_num(new_skb);
+		if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_TSO) ||
+		    (!skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_NORMAL))
+			return -ENOMEM;
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_copy++;
+		u64_stats_update_end(&ring->syncp);
+	}
+
+out:
+	if (unlikely(ring_space(ring) < bd_num))
 		return -EBUSY;
 
-	*bnum = buf_num;
-
-	return 0;
+	return bd_num;
 }
 
-static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
+static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 {
 	struct device *dev = ring_to_dev(ring);
 	unsigned int i;
@@ -1062,20 +1294,23 @@
 		if (ring->next_to_use == next_to_use_orig)
 			break;
 
+		/* rollback one */
+		ring_ptr_move_bw(ring, next_to_use);
+
 		/* unmap the descriptor dma address */
 		if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB)
 			dma_unmap_single(dev,
 					 ring->desc_cb[ring->next_to_use].dma,
 					ring->desc_cb[ring->next_to_use].length,
 					DMA_TO_DEVICE);
-		else
+		else if (ring->desc_cb[ring->next_to_use].length)
 			dma_unmap_page(dev,
 				       ring->desc_cb[ring->next_to_use].dma,
 				       ring->desc_cb[ring->next_to_use].length,
 				       DMA_TO_DEVICE);
 
-		/* rollback one */
-		ring_ptr_move_bw(ring, next_to_use);
+		ring->desc_cb[ring->next_to_use].length = 0;
+		ring->desc_cb[ring->next_to_use].dma = 0;
 	}
 }
 
@@ -1085,12 +1320,9 @@
 	struct hns3_nic_ring_data *ring_data =
 		&tx_ring_data(priv, skb->queue_mapping);
 	struct hns3_enet_ring *ring = ring_data->ring;
-	struct device *dev = priv->dev;
 	struct netdev_queue *dev_queue;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	int next_to_use_head;
-	int next_to_use_frag;
-	dma_addr_t dma;
 	int buf_num;
 	int seg_num;
 	int size;
@@ -1100,22 +1332,21 @@
 	/* Prefetch the data used later */
 	prefetch(skb->data);
 
-	switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) {
-	case -EBUSY:
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.tx_busy++;
-		u64_stats_update_end(&ring->syncp);
+	buf_num = hns3_nic_maybe_stop_tx(ring, &skb);
+	if (unlikely(buf_num <= 0)) {
+		if (buf_num == -EBUSY) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.tx_busy++;
+			u64_stats_update_end(&ring->syncp);
+			goto out_net_tx_busy;
+		} else if (buf_num == -ENOMEM) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.sw_err_cnt++;
+			u64_stats_update_end(&ring->syncp);
+		}
 
-		goto out_net_tx_busy;
-	case -ENOMEM:
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.sw_err_cnt++;
-		u64_stats_update_end(&ring->syncp);
-		netdev_err(netdev, "no memory to xmit!\n");
-
+		hns3_rl_err(netdev, "xmit error: %d!\n", buf_num);
 		goto out_err_tx_ok;
-	default:
-		break;
 	}
 
 	/* No. of segments (plus a header) */
@@ -1125,35 +1356,22 @@
 
 	next_to_use_head = ring->next_to_use;
 
-	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, dma)) {
-		netdev_err(netdev, "TX head DMA map failed\n");
-		ring->stats.sw_err_cnt++;
-		goto out_err_tx_ok;
-	}
+	ret = hns3_fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0,
+			     DESC_TYPE_SKB);
+	if (unlikely(ret))
+		goto fill_err;
 
-	ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
-			   DESC_TYPE_SKB);
-	if (ret)
-		goto head_dma_map_err;
-
-	next_to_use_frag = ring->next_to_use;
 	/* Fill the fragments */
 	for (i = 1; i < seg_num; i++) {
 		frag = &skb_shinfo(skb)->frags[i - 1];
 		size = skb_frag_size(frag);
-		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma)) {
-			netdev_err(netdev, "TX frag(%d) DMA map failed\n", i);
-			ring->stats.sw_err_cnt++;
-			goto frag_dma_map_err;
-		}
-		ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
-				    seg_num - 1 == i ? 1 : 0,
-				    DESC_TYPE_PAGE);
 
-		if (ret)
-			goto frag_dma_map_err;
+		ret = hns3_fill_desc(ring, frag, size,
+				     seg_num - 1 == i ? 1 : 0,
+				     DESC_TYPE_PAGE);
+
+		if (unlikely(ret))
+			goto fill_err;
 	}
 
 	/* Complete translate all packets */
@@ -1166,11 +1384,8 @@
 
 	return NETDEV_TX_OK;
 
-frag_dma_map_err:
-	hns_nic_dma_unmap(ring, next_to_use_frag);
-
-head_dma_map_err:
-	hns_nic_dma_unmap(ring, next_to_use_head);
+fill_err:
+	hns3_clear_desc(ring, next_to_use_head);
 
 out_err_tx_ok:
 	dev_kfree_skb_any(skb);
@@ -1209,43 +1424,55 @@
 	return 0;
 }
 
+static int hns3_nic_do_ioctl(struct net_device *netdev,
+			     struct ifreq *ifr, int cmd)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (!netif_running(netdev))
+		return -EINVAL;
+
+	if (!h->ae_algo->ops->do_ioctl)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->do_ioctl(h, ifr, cmd);
+}
+
 static int hns3_nic_set_features(struct net_device *netdev,
 				 netdev_features_t features)
 {
 	netdev_features_t changed = netdev->features ^ features;
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
+	bool enable;
 	int ret;
 
-	if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
-		if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
-			priv->ops.fill_desc = hns3_fill_desc_tso;
-			priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
-		} else {
-			priv->ops.fill_desc = hns3_fill_desc;
-			priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
-		}
+	if (changed & (NETIF_F_GRO_HW) && h->ae_algo->ops->set_gro_en) {
+		enable = !!(features & NETIF_F_GRO_HW);
+		ret = h->ae_algo->ops->set_gro_en(h, enable);
+		if (ret)
+			return ret;
 	}
 
 	if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
 	    h->ae_algo->ops->enable_vlan_filter) {
-		if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
-			h->ae_algo->ops->enable_vlan_filter(h, true);
-		else
-			h->ae_algo->ops->enable_vlan_filter(h, false);
+		enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER);
+		h->ae_algo->ops->enable_vlan_filter(h, enable);
 	}
 
 	if ((changed & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    h->ae_algo->ops->enable_hw_strip_rxvtag) {
-		if (features & NETIF_F_HW_VLAN_CTAG_RX)
-			ret = h->ae_algo->ops->enable_hw_strip_rxvtag(h, true);
-		else
-			ret = h->ae_algo->ops->enable_hw_strip_rxvtag(h, false);
-
+		enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+		ret = h->ae_algo->ops->enable_hw_strip_rxvtag(h, enable);
 		if (ret)
 			return ret;
 	}
 
+	if ((changed & NETIF_F_NTUPLE) && h->ae_algo->ops->enable_fd) {
+		enable = !!(features & NETIF_F_NTUPLE);
+		h->ae_algo->ops->enable_fd(h, enable);
+	}
+
 	netdev->features = features;
 	return 0;
 }
@@ -1257,7 +1484,12 @@
 	int queue_num = priv->ae_handle->kinfo.num_tqps;
 	struct hnae3_handle *handle = priv->ae_handle;
 	struct hns3_enet_ring *ring;
+	u64 rx_length_errors = 0;
+	u64 rx_crc_errors = 0;
+	u64 rx_multicast = 0;
 	unsigned int start;
+	u64 tx_errors = 0;
+	u64 rx_errors = 0;
 	unsigned int idx;
 	u64 tx_bytes = 0;
 	u64 rx_bytes = 0;
@@ -1278,8 +1510,16 @@
 			start = u64_stats_fetch_begin_irq(&ring->syncp);
 			tx_bytes += ring->stats.tx_bytes;
 			tx_pkts += ring->stats.tx_pkts;
-			tx_drop += ring->stats.tx_busy;
 			tx_drop += ring->stats.sw_err_cnt;
+			tx_drop += ring->stats.tx_vlan_err;
+			tx_drop += ring->stats.tx_l4_proto_err;
+			tx_drop += ring->stats.tx_l2l3l4_err;
+			tx_drop += ring->stats.tx_tso_err;
+			tx_errors += ring->stats.sw_err_cnt;
+			tx_errors += ring->stats.tx_vlan_err;
+			tx_errors += ring->stats.tx_l4_proto_err;
+			tx_errors += ring->stats.tx_l2l3l4_err;
+			tx_errors += ring->stats.tx_tso_err;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
@@ -1288,9 +1528,12 @@
 			start = u64_stats_fetch_begin_irq(&ring->syncp);
 			rx_bytes += ring->stats.rx_bytes;
 			rx_pkts += ring->stats.rx_pkts;
-			rx_drop += ring->stats.non_vld_descs;
-			rx_drop += ring->stats.err_pkt_len;
 			rx_drop += ring->stats.l2_err;
+			rx_errors += ring->stats.l2_err;
+			rx_errors += ring->stats.l3l4_csum_err;
+			rx_crc_errors += ring->stats.l2_err;
+			rx_multicast += ring->stats.rx_multicast;
+			rx_length_errors += ring->stats.err_pkt_len;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 	}
 
@@ -1299,15 +1542,15 @@
 	stats->rx_bytes = rx_bytes;
 	stats->rx_packets = rx_pkts;
 
-	stats->rx_errors = netdev->stats.rx_errors;
-	stats->multicast = netdev->stats.multicast;
-	stats->rx_length_errors = netdev->stats.rx_length_errors;
-	stats->rx_crc_errors = netdev->stats.rx_crc_errors;
+	stats->rx_errors = rx_errors;
+	stats->multicast = rx_multicast;
+	stats->rx_length_errors = rx_length_errors;
+	stats->rx_crc_errors = rx_crc_errors;
 	stats->rx_missed_errors = netdev->stats.rx_missed_errors;
 
-	stats->tx_errors = netdev->stats.tx_errors;
-	stats->rx_dropped = rx_drop + netdev->stats.rx_dropped;
-	stats->tx_dropped = tx_drop + netdev->stats.tx_dropped;
+	stats->tx_errors = tx_errors;
+	stats->rx_dropped = rx_drop;
+	stats->tx_dropped = tx_drop;
 	stats->collisions = netdev->stats.collisions;
 	stats->rx_over_errors = netdev->stats.rx_over_errors;
 	stats->rx_frame_errors = netdev->stats.rx_frame_errors;
@@ -1324,14 +1567,12 @@
 static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 {
 	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hnae3_knic_private_info *kinfo = &h->kinfo;
 	u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map;
+	struct hnae3_knic_private_info *kinfo;
 	u8 tc = mqprio_qopt->qopt.num_tc;
 	u16 mode = mqprio_qopt->mode;
 	u8 hw = mqprio_qopt->qopt.hw;
-	bool if_running;
-	int ret;
+	struct hnae3_handle *h;
 
 	if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
 	       mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0)))
@@ -1343,24 +1584,13 @@
 	if (!netdev)
 		return -EINVAL;
 
-	if_running = netif_running(netdev);
-	if (if_running) {
-		hns3_nic_net_stop(netdev);
-		msleep(100);
-	}
+	h = hns3_get_handle(netdev);
+	kinfo = &h->kinfo;
 
-	ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
+	netif_dbg(h, drv, netdev, "setup tc: num_tc=%u\n", tc);
+
+	return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
 		kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
-	if (ret)
-		goto out;
-
-	ret = hns3_nic_set_real_num_queue(netdev);
-
-out:
-	if (if_running)
-		hns3_nic_net_open(netdev);
-
-	return ret;
 }
 
 static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -1376,15 +1606,11 @@
 				__be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
-	if (!ret)
-		set_bit(vid, priv->active_vlans);
-
 	return ret;
 }
 
@@ -1392,41 +1618,27 @@
 				 __be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
-	if (!ret)
-		clear_bit(vid, priv->active_vlans);
-
 	return ret;
 }
 
-static void hns3_restore_vlan(struct net_device *netdev)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	u16 vid;
-	int ret;
-
-	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
-		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
-		if (ret)
-			netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
-				    vid, ret);
-	}
-}
-
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 				u8 qos, __be16 vlan_proto)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	int ret = -EIO;
 
+	netif_dbg(h, drv, netdev,
+		  "set vf vlan: vf=%d, vlan=%u, qos=%u, vlan_proto=%u\n",
+		  vf, vlan, qos, vlan_proto);
+
 	if (h->ae_algo->ops->set_vf_vlan_filter)
 		ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
-						   qos, vlan_proto);
+							  qos, vlan_proto);
 
 	return ret;
 }
@@ -1434,30 +1646,23 @@
 static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	bool if_running = netif_running(netdev);
 	int ret;
 
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
 	if (!h->ae_algo->ops->set_mtu)
 		return -EOPNOTSUPP;
 
-	/* if this was called with netdev up then bring netdevice down */
-	if (if_running) {
-		(void)hns3_nic_net_stop(netdev);
-		msleep(100);
-	}
+	netif_dbg(h, drv, netdev,
+		  "change mtu from %u to %d\n", netdev->mtu, new_mtu);
 
 	ret = h->ae_algo->ops->set_mtu(h, new_mtu);
-	if (ret) {
+	if (ret)
 		netdev_err(netdev, "failed to change MTU in hardware %d\n",
 			   ret);
-		return ret;
-	}
-
-	netdev->mtu = new_mtu;
-
-	/* if the netdev was running earlier, bring it up again */
-	if (if_running && hns3_nic_net_open(netdev))
-		ret = -EINVAL;
+	else
+		netdev->mtu = new_mtu;
 
 	return ret;
 }
@@ -1465,13 +1670,19 @@
 static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct hnae3_handle *h = hns3_get_handle(ndev);
 	struct hns3_enet_ring *tx_ring = NULL;
+	struct napi_struct *napi;
 	int timeout_queue = 0;
 	int hw_head, hw_tail;
+	int fbd_num, fbd_oft;
+	int ebd_num, ebd_oft;
+	int bd_num, bd_err;
+	int ring_en, tc;
 	int i;
 
 	/* Find the stopped queue the same way the stack does */
-	for (i = 0; i < ndev->real_num_tx_queues; i++) {
+	for (i = 0; i < ndev->num_tx_queues; i++) {
 		struct netdev_queue *q;
 		unsigned long trans_start;
 
@@ -1492,21 +1703,63 @@
 		return false;
 	}
 
+	priv->tx_timeout_count++;
+
 	tx_ring = priv->ring_data[timeout_queue].ring;
+	napi = &tx_ring->tqp_vector->napi;
+
+	netdev_info(ndev,
+		    "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, napi state: %lu\n",
+		    priv->tx_timeout_count, timeout_queue, tx_ring->next_to_use,
+		    tx_ring->next_to_clean, napi->state);
+
+	netdev_info(ndev,
+		    "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu\n",
+		    tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes,
+		    tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt);
+
+	netdev_info(ndev,
+		    "seg_pkt_cnt: %llu, tx_err_cnt: %llu, restart_queue: %llu, tx_busy: %llu\n",
+		    tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_err_cnt,
+		    tx_ring->stats.restart_queue, tx_ring->stats.tx_busy);
+
+	/* When mac received many pause frames continuous, it's unable to send
+	 * packets, which may cause tx timeout
+	 */
+	if (h->ae_algo->ops->get_mac_stats) {
+		struct hns3_mac_stats mac_stats;
+
+		h->ae_algo->ops->get_mac_stats(h, &mac_stats);
+		netdev_info(ndev, "tx_pause_cnt: %llu, rx_pause_cnt: %llu\n",
+			    mac_stats.tx_pause_cnt, mac_stats.rx_pause_cnt);
+	}
 
 	hw_head = readl_relaxed(tx_ring->tqp->io_base +
 				HNS3_RING_TX_RING_HEAD_REG);
 	hw_tail = readl_relaxed(tx_ring->tqp->io_base +
 				HNS3_RING_TX_RING_TAIL_REG);
+	fbd_num = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_FBDNUM_REG);
+	fbd_oft = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_OFFSET_REG);
+	ebd_num = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_EBDNUM_REG);
+	ebd_oft = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_EBD_OFFSET_REG);
+	bd_num = readl_relaxed(tx_ring->tqp->io_base +
+			       HNS3_RING_TX_RING_BD_NUM_REG);
+	bd_err = readl_relaxed(tx_ring->tqp->io_base +
+			       HNS3_RING_TX_RING_BD_ERR_REG);
+	ring_en = readl_relaxed(tx_ring->tqp->io_base + HNS3_RING_EN_REG);
+	tc = readl_relaxed(tx_ring->tqp->io_base + HNS3_RING_TX_RING_TC_REG);
+
 	netdev_info(ndev,
-		    "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, HW_HEAD: 0x%x, HW_TAIL: 0x%x, INT: 0x%x\n",
-		    priv->tx_timeout_count,
-		    timeout_queue,
-		    tx_ring->next_to_use,
-		    tx_ring->next_to_clean,
-		    hw_head,
-		    hw_tail,
+		    "BD_NUM: 0x%x HW_HEAD: 0x%x, HW_TAIL: 0x%x, BD_ERR: 0x%x, INT: 0x%x\n",
+		    bd_num, hw_head, hw_tail, bd_err,
 		    readl(tx_ring->tqp_vector->mask_addr));
+	netdev_info(ndev,
+		    "RING_EN: 0x%x, TC: 0x%x, FBD_NUM: 0x%x FBD_OFT: 0x%x, EBD_NUM: 0x%x, EBD_OFT: 0x%x\n",
+		    ring_en, tc, fbd_num, fbd_oft, ebd_num, ebd_oft);
 
 	return true;
 }
@@ -1519,22 +1772,46 @@
 	if (!hns3_get_tx_timeo_queue_info(ndev))
 		return;
 
-	priv->tx_timeout_count++;
-
-	if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
-		return;
-
-	/* request the reset */
+	/* request the reset, and let the hclge to determine
+	 * which reset level should be done
+	 */
 	if (h->ae_algo->ops->reset_event)
-		h->ae_algo->ops->reset_event(h);
+		h->ae_algo->ops->reset_event(h->pdev, h);
 }
 
+#ifdef CONFIG_RFS_ACCEL
+static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+			      u16 rxq_index, u32 flow_id)
+{
+	struct hnae3_handle *h = hns3_get_handle(dev);
+	struct flow_keys fkeys;
+
+	if (!h->ae_algo->ops->add_arfs_entry)
+		return -EOPNOTSUPP;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0))
+		return -EPROTONOSUPPORT;
+
+	if ((fkeys.basic.n_proto != htons(ETH_P_IP) &&
+	     fkeys.basic.n_proto != htons(ETH_P_IPV6)) ||
+	    (fkeys.basic.ip_proto != IPPROTO_TCP &&
+	     fkeys.basic.ip_proto != IPPROTO_UDP))
+		return -EPROTONOSUPPORT;
+
+	return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys);
+}
+#endif
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_open		= hns3_nic_net_open,
 	.ndo_stop		= hns3_nic_net_stop,
 	.ndo_start_xmit		= hns3_nic_net_xmit,
 	.ndo_tx_timeout		= hns3_nic_net_timeout,
 	.ndo_set_mac_address	= hns3_nic_net_set_mac_address,
+	.ndo_do_ioctl		= hns3_nic_do_ioctl,
 	.ndo_change_mtu		= hns3_nic_change_mtu,
 	.ndo_set_features	= hns3_nic_set_features,
 	.ndo_get_stats64	= hns3_nic_get_stats64,
@@ -1543,9 +1820,13 @@
 	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
 	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer	= hns3_rx_flow_steer,
+#endif
+
 };
 
-static bool hns3_is_phys_func(struct pci_dev *pdev)
+bool hns3_is_phys_func(struct pci_dev *pdev)
 {
 	u32 dev_id = pdev->device;
 
@@ -1584,6 +1865,15 @@
 	pci_disable_sriov(pdev);
 }
 
+static void hns3_get_dev_capability(struct pci_dev *pdev,
+				    struct hnae3_ae_dev *ae_dev)
+{
+	if (pdev->revision >= 0x21) {
+		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_SUPPORT_FD_B, 1);
+		hnae3_set_bit(ae_dev->flag, HNAE3_DEV_SUPPORT_GRO_B, 1);
+	}
+}
+
 /* hns3_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in hns3_pci_tbl
@@ -1599,8 +1889,7 @@
 	struct hnae3_ae_dev *ae_dev;
 	int ret;
 
-	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev),
-			      GFP_KERNEL);
+	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), GFP_KERNEL);
 	if (!ae_dev) {
 		ret = -ENOMEM;
 		return ret;
@@ -1608,12 +1897,17 @@
 
 	ae_dev->pdev = pdev;
 	ae_dev->flag = ent->driver_data;
-	ae_dev->dev_type = HNAE3_DEV_KNIC;
+	ae_dev->reset_type = HNAE3_NONE_RESET;
+	hns3_get_dev_capability(pdev, ae_dev);
 	pci_set_drvdata(pdev, ae_dev);
 
-	hnae3_register_ae_dev(ae_dev);
+	ret = hnae3_register_ae_dev(ae_dev);
+	if (ret) {
+		devm_kfree(&pdev->dev, ae_dev);
+		pci_set_drvdata(pdev, NULL);
+	}
 
-	return 0;
+	return ret;
 }
 
 /* hns3_remove - Device removal routine
@@ -1627,6 +1921,7 @@
 		hns3_disable_sriov(pdev);
 
 	hnae3_unregister_ae_dev(ae_dev);
+	pci_set_drvdata(pdev, NULL);
 }
 
 /**
@@ -1662,12 +1957,104 @@
 	return 0;
 }
 
+static void hns3_shutdown(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+
+	hnae3_unregister_ae_dev(ae_dev);
+	devm_kfree(&pdev->dev, ae_dev);
+	pci_set_drvdata(pdev, NULL);
+
+	if (system_state == SYSTEM_POWER_OFF)
+		pci_set_power_state(pdev, PCI_D3hot);
+}
+
+static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
+					    pci_channel_state_t state)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	pci_ers_result_t ret;
+
+	dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (!ae_dev || !ae_dev->ops) {
+		dev_err(&pdev->dev,
+			"Can't recover - error happened before device initialized\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	if (ae_dev->ops->handle_hw_ras_error)
+		ret = ae_dev->ops->handle_hw_ras_error(ae_dev);
+	else
+		return PCI_ERS_RESULT_NONE;
+
+	return ret;
+}
+
+static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	const struct hnae3_ae_ops *ops;
+	enum hnae3_reset_type reset_type;
+	struct device *dev = &pdev->dev;
+
+	if (!ae_dev || !ae_dev->ops)
+		return PCI_ERS_RESULT_NONE;
+
+	ops = ae_dev->ops;
+	/* request the reset */
+	if (ops->reset_event && ops->get_reset_level &&
+	    ops->set_default_reset_request) {
+		if (ae_dev->hw_err_reset_req) {
+			reset_type = ops->get_reset_level(ae_dev,
+						&ae_dev->hw_err_reset_req);
+			ops->set_default_reset_request(ae_dev, reset_type);
+			dev_info(dev, "requesting reset due to PCI error\n");
+			ops->reset_event(pdev, NULL);
+		}
+
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+static void hns3_reset_prepare(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+
+	dev_info(&pdev->dev, "hns3 flr prepare\n");
+	if (ae_dev && ae_dev->ops && ae_dev->ops->flr_prepare)
+		ae_dev->ops->flr_prepare(ae_dev);
+}
+
+static void hns3_reset_done(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+
+	dev_info(&pdev->dev, "hns3 flr done\n");
+	if (ae_dev && ae_dev->ops && ae_dev->ops->flr_done)
+		ae_dev->ops->flr_done(ae_dev);
+}
+
+static const struct pci_error_handlers hns3_err_handler = {
+	.error_detected = hns3_error_detected,
+	.slot_reset     = hns3_slot_reset,
+	.reset_prepare	= hns3_reset_prepare,
+	.reset_done	= hns3_reset_done,
+};
+
 static struct pci_driver hns3_driver = {
 	.name     = hns3_driver_name,
 	.id_table = hns3_pci_tbl,
 	.probe    = hns3_probe,
 	.remove   = hns3_remove,
+	.shutdown = hns3_shutdown,
 	.sriov_configure = hns3_pci_sriov_configure,
+	.err_handler    = &hns3_err_handler,
 };
 
 /* set default feature to hns3 */
@@ -1682,7 +2069,7 @@
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
 
 	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
 
@@ -1694,30 +2081,37 @@
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
 
 	netdev->vlan_features |=
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 		NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
 		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
 
 	netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
 
-	if (pdev->revision != 0x20)
-		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	if (pdev->revision >= 0x21) {
+		netdev->hw_features |= NETIF_F_GRO_HW;
+		netdev->features |= NETIF_F_GRO_HW;
+
+		if (!(h->flags & HNAE3_SUPPORT_VF)) {
+			netdev->hw_features |= NETIF_F_NTUPLE;
+			netdev->features |= NETIF_F_NTUPLE;
+		}
+	}
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb)
 {
-	unsigned int order = hnae3_page_order(ring);
+	unsigned int order = hns3_page_order(ring);
 	struct page *p;
 
 	p = dev_alloc_pages(order);
@@ -1728,7 +2122,7 @@
 	cb->page_offset = 0;
 	cb->reuse_flag = 0;
 	cb->buf  = page_address(p);
-	cb->length = hnae3_page_size(ring);
+	cb->length = hns3_page_size(ring);
 	cb->type = DESC_TYPE_PAGE;
 
 	return 0;
@@ -1749,7 +2143,7 @@
 	cb->dma = dma_map_page(ring_to_dev(ring), cb->priv, 0,
 			       cb->length, ring_to_dma_dir(ring));
 
-	if (dma_mapping_error(ring_to_dev(ring), cb->dma))
+	if (unlikely(dma_mapping_error(ring_to_dev(ring), cb->dma)))
 		return -EIO;
 
 	return 0;
@@ -1761,7 +2155,7 @@
 	if (cb->type == DESC_TYPE_SKB)
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
-	else
+	else if (cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
 }
@@ -1809,9 +2203,8 @@
 {
 	int size = ring->desc_num * sizeof(ring->desc[0]);
 
-	ring->desc = dma_zalloc_coherent(ring_to_dev(ring), size,
-					 &ring->desc_dma_addr,
-					 GFP_KERNEL);
+	ring->desc = dma_alloc_coherent(ring_to_dev(ring), size,
+					&ring->desc_dma_addr, GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 
@@ -1870,7 +2263,7 @@
 	return ret;
 }
 
-/* detach a in-used buffer and replace with a reserved one  */
+/* detach a in-used buffer and replace with a reserved one */
 static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 				struct hns3_desc_cb *res_cb)
 {
@@ -1883,22 +2276,35 @@
 static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 {
 	ring->desc_cb[i].reuse_flag = 0;
-	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
-		+ ring->desc_cb[i].page_offset);
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+					 ring->desc_cb[i].page_offset);
 	ring->desc[i].rx.bd_base_info = 0;
 }
 
-static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes,
-				      int *pkts)
+static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head,
+				  int *bytes, int *pkts)
 {
-	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
+	int ntc = ring->next_to_clean;
+	struct hns3_desc_cb *desc_cb;
 
-	(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
-	(*bytes) += desc_cb->length;
-	/* desc_cb will be cleaned, after hnae3_free_buffer_detach*/
-	hns3_free_buffer_detach(ring, ring->next_to_clean);
+	while (head != ntc) {
+		desc_cb = &ring->desc_cb[ntc];
+		(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
+		(*bytes) += desc_cb->length;
+		/* desc_cb will be cleaned, after hnae3_free_buffer_detach */
+		hns3_free_buffer_detach(ring, ntc);
 
-	ring_ptr_move_fw(ring, next_to_clean);
+		if (++ntc == ring->desc_num)
+			ntc = 0;
+
+		/* Issue prefetch for next Tx descriptor */
+		prefetch(&ring->desc_cb[ntc]);
+	}
+
+	/* This smp_store_release() pairs with smp_load_acquire() in
+	 * ring_space called by hns3_nic_net_xmit.
+	 */
+	smp_store_release(&ring->next_to_clean, ntc);
 }
 
 static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
@@ -1912,7 +2318,7 @@
 	return u > c ? (h > c && h <= u) : (h > c || h <= u);
 }
 
-bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 {
 	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1924,7 +2330,7 @@
 	rmb(); /* Make sure head is ready before touch any data */
 
 	if (is_ring_empty(ring) || head == ring->next_to_clean)
-		return true; /* no data to poll */
+		return; /* no data to poll */
 
 	if (unlikely(!is_valid_clean_head(ring, head))) {
 		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
@@ -1933,17 +2339,12 @@
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.io_err_cnt++;
 		u64_stats_update_end(&ring->syncp);
-		return true;
+		return;
 	}
 
 	bytes = 0;
 	pkts = 0;
-	while (head != ring->next_to_clean && budget) {
-		hns3_nic_reclaim_one_desc(ring, &bytes, &pkts);
-		/* Issue prefetch for next Tx descriptor */
-		prefetch(&ring->desc_cb[ring->next_to_clean]);
-		budget--;
-	}
+	hns3_nic_reclaim_desc(ring, head, &bytes, &pkts);
 
 	ring->tqp_vector->tx_group.total_bytes += bytes;
 	ring->tqp_vector->tx_group.total_packets += pkts;
@@ -1968,8 +2369,6 @@
 			ring->stats.restart_queue++;
 		}
 	}
-
-	return !!budget;
 }
 
 static int hns3_desc_unused(struct hns3_enet_ring *ring)
@@ -1980,8 +2379,8 @@
 	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
 }
 
-static void
-hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
+static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
+				      int cleand_count)
 {
 	struct hns3_desc_cb *desc_cb;
 	struct hns3_desc_cb res_cbs;
@@ -2002,11 +2401,16 @@
 				ring->stats.sw_err_cnt++;
 				u64_stats_update_end(&ring->syncp);
 
-				netdev_err(ring->tqp->handle->kinfo.netdev,
-					   "hnae reserve buffer map failed.\n");
+				hns3_rl_err(ring->tqp_vector->napi.dev,
+					    "alloc rx buffer failed: %d\n",
+					    ret);
 				break;
 			}
 			hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
+
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.non_reuse_pg++;
+			u64_stats_update_end(&ring->syncp);
 		}
 
 		ring_ptr_move_fw(ring, next_to_use);
@@ -2020,64 +2424,95 @@
 				struct hns3_enet_ring *ring, int pull_len,
 				struct hns3_desc_cb *desc_cb)
 {
-	struct hns3_desc *desc;
-	u32 truesize;
-	int size;
-	int last_offset;
-	bool twobufs;
-
-	twobufs = ((PAGE_SIZE < 8192) &&
-		hnae3_buf_size(ring) == HNS3_BUFFER_SIZE_2048);
-
-	desc = &ring->desc[ring->next_to_clean];
-	size = le16_to_cpu(desc->rx.size);
-
-	truesize = hnae3_buf_size(ring);
-
-	if (!twobufs)
-		last_offset = hnae3_page_size(ring) - hnae3_buf_size(ring);
+	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
+	int size = le16_to_cpu(desc->rx.size);
+	u32 truesize = hns3_buf_size(ring);
 
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
 			size - pull_len, truesize);
 
-	 /* Avoid re-using remote pages,flag default unreuse */
-	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
+	/* Avoid re-using remote pages, or the stack is still using the page
+	 * when page_offset rollback to zero, flag default unreuse
+	 */
+	if (unlikely(page_to_nid(desc_cb->priv) != numa_mem_id()) ||
+	    (!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
 		return;
 
-	if (twobufs) {
-		/* If we are only owner of page we can reuse it */
-		if (likely(page_count(desc_cb->priv) == 1)) {
-			/* Flip page offset to other buffer */
-			desc_cb->page_offset ^= truesize;
-
-			desc_cb->reuse_flag = 1;
-			/* bump ref count on page before it is given*/
-			get_page(desc_cb->priv);
-		}
-		return;
-	}
-
 	/* Move offset up to the next cache line */
 	desc_cb->page_offset += truesize;
 
-	if (desc_cb->page_offset <= last_offset) {
+	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
 		desc_cb->reuse_flag = 1;
-		/* Bump ref count on page before it is given*/
+		/* Bump ref count on page before it is given */
+		get_page(desc_cb->priv);
+	} else if (page_count(desc_cb->priv) == 1) {
+		desc_cb->reuse_flag = 1;
+		desc_cb->page_offset = 0;
 		get_page(desc_cb->priv);
 	}
 }
 
+static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
+{
+	__be16 type = skb->protocol;
+	struct tcphdr *th;
+	int depth = 0;
+
+	while (eth_type_vlan(type)) {
+		struct vlan_hdr *vh;
+
+		if ((depth + VLAN_HLEN) > skb_headlen(skb))
+			return -EFAULT;
+
+		vh = (struct vlan_hdr *)(skb->data + depth);
+		type = vh->h_vlan_encapsulated_proto;
+		depth += VLAN_HLEN;
+	}
+
+	skb_set_network_header(skb, depth);
+
+	if (type == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		depth += sizeof(struct iphdr);
+		skb_set_transport_header(skb, depth);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v4_check(skb->len - depth, iph->saddr,
+					  iph->daddr, 0);
+	} else if (type == htons(ETH_P_IPV6)) {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		depth += sizeof(struct ipv6hdr);
+		skb_set_transport_header(skb, depth);
+		th = tcp_hdr(skb);
+		th->check = ~tcp_v6_check(skb->len - depth, &iph->saddr,
+					  &iph->daddr, 0);
+	} else {
+		hns3_rl_err(skb->dev,
+			    "Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n",
+			    be16_to_cpu(type), depth);
+		return -EFAULT;
+	}
+
+	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+	if (th->cwr)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+	if (l234info & BIT(HNS3_RXD_GRO_FIXID_B))
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
+	skb->csum_start = (unsigned char *)th - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	return 0;
+}
+
 static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
-			     struct hns3_desc *desc)
+			     u32 l234info, u32 bd_base_info, u32 ol_info)
 {
 	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
 	int l3_type, l4_type;
-	u32 bd_base_info;
 	int ol4_type;
-	u32 l234info;
-
-	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-	l234info = le32_to_cpu(desc->rx.l234_info);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -2087,14 +2522,12 @@
 		return;
 
 	/* check if hardware has done checksum */
-	if (!hnae3_get_bit(bd_base_info, HNS3_RXD_L3L4P_B))
+	if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
 		return;
 
-	if (unlikely(hnae3_get_bit(l234info, HNS3_RXD_L3E_B) ||
-		     hnae3_get_bit(l234info, HNS3_RXD_L4E_B) ||
-		     hnae3_get_bit(l234info, HNS3_RXD_OL3E_B) ||
-		     hnae3_get_bit(l234info, HNS3_RXD_OL4E_B))) {
-		netdev_err(netdev, "L3/L4 error pkt\n");
+	if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
+				 BIT(HNS3_RXD_OL3E_B) |
+				 BIT(HNS3_RXD_OL4E_B)))) {
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.l3l4_csum_err++;
 		u64_stats_update_end(&ring->syncp);
@@ -2102,12 +2535,7 @@
 		return;
 	}
 
-	l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
-				  HNS3_RXD_L3ID_S);
-	l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M,
-				  HNS3_RXD_L4ID_S);
-
-	ol4_type = hnae3_get_field(l234info, HNS3_RXD_OL4ID_M,
+	ol4_type = hnae3_get_field(ol_info, HNS3_RXD_OL4ID_M,
 				   HNS3_RXD_OL4ID_S);
 	switch (ol4_type) {
 	case HNS3_OL4_TYPE_MAC_IN_UDP:
@@ -2115,6 +2543,11 @@
 		skb->csum_level = 1;
 		/* fall through */
 	case HNS3_OL4_TYPE_NO_TUN:
+		l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
+					  HNS3_RXD_L3ID_S);
+		l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M,
+					  HNS3_RXD_L4ID_S);
+
 		/* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */
 		if ((l3_type == HNS3_L3_TYPE_IPV4 ||
 		     l3_type == HNS3_L3_TYPE_IPV6) &&
@@ -2123,91 +2556,83 @@
 		     l4_type == HNS3_L4_TYPE_SCTP))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		break;
+	default:
+		break;
 	}
 }
 
 static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb)
 {
+	if (skb_has_frag_list(skb))
+		napi_gro_flush(&ring->tqp_vector->napi, false);
+
 	napi_gro_receive(&ring->tqp_vector->napi, skb);
 }
 
-static u16 hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
-			       struct hns3_desc *desc, u32 l234info)
+static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
+				struct hns3_desc *desc, u32 l234info,
+				u16 *vlan_tag)
 {
+	struct hnae3_handle *handle = ring->tqp->handle;
 	struct pci_dev *pdev = ring->tqp->handle->pdev;
-	u16 vlan_tag;
 
 	if (pdev->revision == 0x20) {
-		vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
-		if (!(vlan_tag & VLAN_VID_MASK))
-			vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+		*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+		if (!(*vlan_tag & VLAN_VID_MASK))
+			*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
 
-		return vlan_tag;
+		return (*vlan_tag != 0);
 	}
 
 #define HNS3_STRP_OUTER_VLAN	0x1
 #define HNS3_STRP_INNER_VLAN	0x2
+#define HNS3_STRP_BOTH		0x3
 
+	/* Hardware always insert VLAN tag into RX descriptor when
+	 * remove the tag from packet, driver needs to determine
+	 * reporting which tag to stack.
+	 */
 	switch (hnae3_get_field(l234info, HNS3_RXD_STRP_TAGP_M,
 				HNS3_RXD_STRP_TAGP_S)) {
 	case HNS3_STRP_OUTER_VLAN:
-		vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
-		break;
-	case HNS3_STRP_INNER_VLAN:
-		vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
-		break;
-	default:
-		vlan_tag = 0;
-		break;
-	}
+		if (handle->port_base_vlan_state !=
+				HNAE3_PORT_BASE_VLAN_DISABLE)
+			return false;
 
-	return vlan_tag;
+		*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+		return true;
+	case HNS3_STRP_INNER_VLAN:
+		if (handle->port_base_vlan_state !=
+				HNAE3_PORT_BASE_VLAN_DISABLE)
+			return false;
+
+		*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+		return true;
+	case HNS3_STRP_BOTH:
+		if (handle->port_base_vlan_state ==
+				HNAE3_PORT_BASE_VLAN_DISABLE)
+			*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+		else
+			*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+
+		return true;
+	default:
+		return false;
+	}
 }
 
-static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
-			     struct sk_buff **out_skb, int *out_bnum)
+static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
+			  unsigned char *va)
 {
+#define HNS3_NEED_ADD_FRAG	1
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
 	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
-	struct hns3_desc_cb *desc_cb;
-	struct hns3_desc *desc;
 	struct sk_buff *skb;
-	unsigned char *va;
-	u32 bd_base_info;
-	int pull_len;
-	u32 l234info;
-	int length;
-	int bnum;
 
-	desc = &ring->desc[ring->next_to_clean];
-	desc_cb = &ring->desc_cb[ring->next_to_clean];
-
-	prefetch(desc);
-
-	length = le16_to_cpu(desc->rx.size);
-	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-
-	/* Check valid BD */
-	if (unlikely(!hnae3_get_bit(bd_base_info, HNS3_RXD_VLD_B)))
-		return -EFAULT;
-
-	va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
-
-	/* Prefetch first cache line of first page
-	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
-	 * line size is 64B so need to prefetch twice to make it 128B. But in
-	 * actual we can have greater size of caches with 128B Level 1 cache
-	 * lines. In such a case, single fetch would suffice to cache in the
-	 * relevant part of the header.
-	 */
-	prefetch(va);
-#if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
-#endif
-
-	skb = *out_skb = napi_alloc_skb(&ring->tqp_vector->napi,
-					HNS3_RX_HEAD_SIZE);
+	ring->skb = napi_alloc_skb(&ring->tqp_vector->napi, HNS3_RX_HEAD_SIZE);
+	skb = ring->skb;
 	if (unlikely(!skb)) {
-		netdev_err(netdev, "alloc rx skb fail\n");
+		hns3_rl_err(netdev, "alloc rx skb fail\n");
 
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.sw_err_cnt++;
@@ -2218,43 +2643,163 @@
 
 	prefetchw(skb->data);
 
-	bnum = 1;
+	ring->pending_buf = 1;
+	ring->frag_num = 0;
+	ring->tail_skb = NULL;
 	if (length <= HNS3_RX_HEAD_SIZE) {
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
 		/* We can reuse buffer as-is, just make sure it is local */
-		if (likely(page_to_nid(desc_cb->priv) == numa_node_id()))
+		if (likely(page_to_nid(desc_cb->priv) == numa_mem_id()))
 			desc_cb->reuse_flag = 1;
 		else /* This page cannot be reused so discard it */
 			put_page(desc_cb->priv);
 
 		ring_ptr_move_fw(ring, next_to_clean);
+		return 0;
+	}
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.seg_pkt_cnt++;
+	u64_stats_update_end(&ring->syncp);
+
+	ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE);
+	__skb_put(skb, ring->pull_len);
+	hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
+			    desc_cb);
+	ring_ptr_move_fw(ring, next_to_clean);
+
+	return HNS3_NEED_ADD_FRAG;
+}
+
+static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
+			 struct sk_buff **out_skb, bool pending)
+{
+	struct sk_buff *skb = *out_skb;
+	struct sk_buff *head_skb = *out_skb;
+	struct sk_buff *new_skb;
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc *pre_desc;
+	u32 bd_base_info;
+	int pre_bd;
+
+	/* if there is pending bd, the SW param next_to_clean has moved
+	 * to next and the next is NULL
+	 */
+	if (pending) {
+		pre_bd = (ring->next_to_clean - 1 + ring->desc_num) %
+			 ring->desc_num;
+		pre_desc = &ring->desc[pre_bd];
+		bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info);
 	} else {
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.seg_pkt_cnt++;
-		u64_stats_update_end(&ring->syncp);
-
-		pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE);
-
-		memcpy(__skb_put(skb, pull_len), va,
-		       ALIGN(pull_len, sizeof(long)));
-
-		hns3_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
-		ring_ptr_move_fw(ring, next_to_clean);
-
-		while (!hnae3_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
-			desc = &ring->desc[ring->next_to_clean];
-			desc_cb = &ring->desc_cb[ring->next_to_clean];
-			bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-			hns3_nic_reuse_page(skb, bnum, ring, 0, desc_cb);
-			ring_ptr_move_fw(ring, next_to_clean);
-			bnum++;
-		}
+		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
 	}
 
-	*out_bnum = bnum;
+	while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) {
+		desc = &ring->desc[ring->next_to_clean];
+		desc_cb = &ring->desc_cb[ring->next_to_clean];
+		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+		/* make sure HW write desc complete */
+		dma_rmb();
+		if (!(bd_base_info & BIT(HNS3_RXD_VLD_B)))
+			return -ENXIO;
 
+		if (unlikely(ring->frag_num >= MAX_SKB_FRAGS)) {
+			new_skb = napi_alloc_skb(&ring->tqp_vector->napi,
+						 HNS3_RX_HEAD_SIZE);
+			if (unlikely(!new_skb)) {
+				hns3_rl_err(ring->tqp_vector->napi.dev,
+					    "alloc rx fraglist skb fail\n");
+				return -ENXIO;
+			}
+			ring->frag_num = 0;
+
+			if (ring->tail_skb) {
+				ring->tail_skb->next = new_skb;
+				ring->tail_skb = new_skb;
+			} else {
+				skb_shinfo(skb)->frag_list = new_skb;
+				ring->tail_skb = new_skb;
+			}
+		}
+
+		if (ring->tail_skb) {
+			head_skb->truesize += hns3_buf_size(ring);
+			head_skb->data_len += le16_to_cpu(desc->rx.size);
+			head_skb->len += le16_to_cpu(desc->rx.size);
+			skb = ring->tail_skb;
+		}
+
+		hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb);
+		ring_ptr_move_fw(ring, next_to_clean);
+		ring->pending_buf++;
+	}
+
+	return 0;
+}
+
+static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
+				     struct sk_buff *skb, u32 l234info,
+				     u32 bd_base_info, u32 ol_info)
+{
+	u32 l3_type;
+
+	skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
+						    HNS3_RXD_GRO_SIZE_M,
+						    HNS3_RXD_GRO_SIZE_S);
+	/* if there is no HW GRO, do not set gro params */
+	if (!skb_shinfo(skb)->gso_size) {
+		hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info);
+		return 0;
+	}
+
+	NAPI_GRO_CB(skb)->count = hnae3_get_field(l234info,
+						  HNS3_RXD_GRO_COUNT_M,
+						  HNS3_RXD_GRO_COUNT_S);
+
+	l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S);
+	if (l3_type == HNS3_L3_TYPE_IPV4)
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	else if (l3_type == HNS3_L3_TYPE_IPV6)
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+	else
+		return -EFAULT;
+
+	return  hns3_gro_complete(skb, l234info);
+}
+
+static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
+				     struct sk_buff *skb, u32 rss_hash)
+{
+	struct hnae3_handle *handle = ring->tqp->handle;
+	enum pkt_hash_types rss_type;
+
+	if (rss_hash)
+		rss_type = handle->kinfo.rss_type;
+	else
+		rss_type = PKT_HASH_TYPE_NONE;
+
+	skb_set_hash(skb, rss_hash, rss_type);
+}
+
+static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	enum hns3_pkt_l2t_type l2_frame_type;
+	u32 bd_base_info, l234info, ol_info;
+	struct hns3_desc *desc;
+	unsigned int len;
+	int pre_ntc, ret;
+
+	/* bdinfo handled below is only valid on the last BD of the
+	 * current packet, and ring->next_to_clean indicates the first
+	 * descriptor of next packet, so need - 1 below.
+	 */
+	pre_ntc = ring->next_to_clean ? (ring->next_to_clean - 1) :
+					(ring->desc_num - 1);
+	desc = &ring->desc[pre_ntc];
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
 	l234info = le32_to_cpu(desc->rx.l234_info);
+	ol_info = le32_to_cpu(desc->rx.ol_info);
 
 	/* Based on hw strategy, the tag offloaded will be stored at
 	 * ot_vlan_tag in two layer tag case, and stored at vlan_tag
@@ -2263,140 +2808,204 @@
 	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
 		u16 vlan_tag;
 
-		vlan_tag = hns3_parse_vlan_tag(ring, desc, l234info);
-		if (vlan_tag & VLAN_VID_MASK)
-			__vlan_hwaccel_put_tag(skb,
-					       htons(ETH_P_8021Q),
+		if (hns3_parse_vlan_tag(ring, desc, l234info, &vlan_tag))
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       vlan_tag);
 	}
 
-	if (unlikely(!hnae3_get_bit(bd_base_info, HNS3_RXD_VLD_B))) {
-		netdev_err(netdev, "no valid bd,%016llx,%016llx\n",
-			   ((u64 *)desc)[0], ((u64 *)desc)[1]);
+	if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
+				  BIT(HNS3_RXD_L2E_B))))) {
 		u64_stats_update_begin(&ring->syncp);
-		ring->stats.non_vld_descs++;
+		if (l234info & BIT(HNS3_RXD_L2E_B))
+			ring->stats.l2_err++;
+		else
+			ring->stats.err_pkt_len++;
 		u64_stats_update_end(&ring->syncp);
 
-		dev_kfree_skb_any(skb);
-		return -EINVAL;
-	}
-
-	if (unlikely((!desc->rx.pkt_len) ||
-		     hnae3_get_bit(l234info, HNS3_RXD_TRUNCAT_B))) {
-		netdev_err(netdev, "truncated pkt\n");
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.err_pkt_len++;
-		u64_stats_update_end(&ring->syncp);
-
-		dev_kfree_skb_any(skb);
 		return -EFAULT;
 	}
 
-	if (unlikely(hnae3_get_bit(l234info, HNS3_RXD_L2E_B))) {
-		netdev_err(netdev, "L2 error pkt\n");
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.l2_err++;
-		u64_stats_update_end(&ring->syncp);
+	len = skb->len;
 
-		dev_kfree_skb_any(skb);
-		return -EFAULT;
+	/* Do update ip stack process */
+	skb->protocol = eth_type_trans(skb, netdev);
+
+	/* This is needed in order to enable forwarding support */
+	ret = hns3_set_gro_and_checksum(ring, skb, l234info,
+					bd_base_info, ol_info);
+	if (unlikely(ret)) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.rx_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return ret;
 	}
 
+	l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M,
+					HNS3_RXD_DMAC_S);
+
 	u64_stats_update_begin(&ring->syncp);
 	ring->stats.rx_pkts++;
-	ring->stats.rx_bytes += skb->len;
+	ring->stats.rx_bytes += len;
+
+	if (l2_frame_type == HNS3_L2_TYPE_MULTICAST)
+		ring->stats.rx_multicast++;
+
 	u64_stats_update_end(&ring->syncp);
 
-	ring->tqp_vector->rx_group.total_bytes += skb->len;
+	ring->tqp_vector->rx_group.total_bytes += len;
 
-	hns3_rx_checksum(ring, skb, desc);
+	hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash));
 	return 0;
 }
 
-int hns3_clean_rx_ring(
-		struct hns3_enet_ring *ring, int budget,
-		void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
+			     struct sk_buff **out_skb)
+{
+	struct sk_buff *skb = ring->skb;
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc *desc;
+	unsigned int length;
+	u32 bd_base_info;
+	int ret;
+
+	desc = &ring->desc[ring->next_to_clean];
+	desc_cb = &ring->desc_cb[ring->next_to_clean];
+
+	prefetch(desc);
+
+	length = le16_to_cpu(desc->rx.size);
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+
+	/* Check valid BD */
+	if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
+		return -ENXIO;
+
+	if (!skb)
+		ring->va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
+
+	/* Prefetch first cache line of first page
+	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
+	 * line size is 64B so need to prefetch twice to make it 128B. But in
+	 * actual we can have greater size of caches with 128B Level 1 cache
+	 * lines. In such a case, single fetch would suffice to cache in the
+	 * relevant part of the header.
+	 */
+	prefetch(ring->va);
+#if L1_CACHE_BYTES < 128
+	prefetch(ring->va + L1_CACHE_BYTES);
+#endif
+
+	if (!skb) {
+		ret = hns3_alloc_skb(ring, length, ring->va);
+		*out_skb = skb = ring->skb;
+
+		if (ret < 0) /* alloc buffer fail */
+			return ret;
+		if (ret > 0) { /* need add frag */
+			ret = hns3_add_frag(ring, desc, &skb, false);
+			if (ret)
+				return ret;
+
+			/* As the head data may be changed when GRO enable, copy
+			 * the head data in after other data rx completed
+			 */
+			memcpy(skb->data, ring->va,
+			       ALIGN(ring->pull_len, sizeof(long)));
+		}
+	} else {
+		ret = hns3_add_frag(ring, desc, &skb, true);
+		if (ret)
+			return ret;
+
+		/* As the head data may be changed when GRO enable, copy
+		 * the head data in after other data rx completed
+		 */
+		memcpy(skb->data, ring->va,
+		       ALIGN(ring->pull_len, sizeof(long)));
+	}
+
+	ret = hns3_handle_bdinfo(ring, skb);
+	if (unlikely(ret)) {
+		dev_kfree_skb_any(skb);
+		return ret;
+	}
+
+	skb_record_rx_queue(skb, ring->tqp->tqp_index);
+	*out_skb = skb;
+
+	return 0;
+}
+
+int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
+		       void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
 {
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
-	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
-	int recv_pkts, recv_bds, clean_count, err;
 	int unused_count = hns3_desc_unused(ring);
-	struct sk_buff *skb = NULL;
-	int num, bnum = 0;
+	struct sk_buff *skb = ring->skb;
+	int recv_pkts = 0;
+	int recv_bds = 0;
+	int err, num;
 
 	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
 	rmb(); /* Make sure num taken effect before the other data is touched */
 
-	recv_pkts = 0, recv_bds = 0, clean_count = 0;
 	num -= unused_count;
+	unused_count -= ring->pending_buf;
 
 	while (recv_pkts < budget && recv_bds < num) {
 		/* Reuse or realloc buffers */
-		if (clean_count + unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
-			hns3_nic_alloc_rx_buffers(ring,
-						  clean_count + unused_count);
-			clean_count = 0;
-			unused_count = hns3_desc_unused(ring);
+		if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
+			hns3_nic_alloc_rx_buffers(ring, unused_count);
+			unused_count = hns3_desc_unused(ring) -
+					ring->pending_buf;
 		}
 
 		/* Poll one pkt */
-		err = hns3_handle_rx_bd(ring, &skb, &bnum);
+		err = hns3_handle_rx_bd(ring, &skb);
 		if (unlikely(!skb)) /* This fault cannot be repaired */
 			goto out;
 
-		recv_bds += bnum;
-		clean_count += bnum;
-		if (unlikely(err)) {  /* Do jump the err */
-			recv_pkts++;
+		if (err == -ENXIO) { /* Do not get FE for the packet */
+			goto out;
+		} else if (unlikely(err)) {  /* Do jump the err */
+			recv_bds += ring->pending_buf;
+			unused_count += ring->pending_buf;
+			ring->skb = NULL;
+			ring->pending_buf = 0;
 			continue;
 		}
 
-		/* Do update ip stack process */
-		skb->protocol = eth_type_trans(skb, netdev);
 		rx_fn(ring, skb);
+		recv_bds += ring->pending_buf;
+		unused_count += ring->pending_buf;
+		ring->skb = NULL;
+		ring->pending_buf = 0;
 
 		recv_pkts++;
 	}
 
 out:
 	/* Make all data has been write before submit */
-	if (clean_count + unused_count > 0)
-		hns3_nic_alloc_rx_buffers(ring,
-					  clean_count + unused_count);
+	if (unused_count > 0)
+		hns3_nic_alloc_rx_buffers(ring, unused_count);
 
 	return recv_pkts;
 }
 
-static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group)
 {
-	struct hns3_enet_tqp_vector *tqp_vector =
-					ring_group->ring->tqp_vector;
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
 	enum hns3_flow_level_range new_flow_level;
-	int packets_per_msecs;
-	int bytes_per_msecs;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int packets_per_msecs, bytes_per_msecs;
 	u32 time_passed_ms;
-	u16 new_int_gl;
 
-	if (!ring_group->coal.int_gl || !tqp_vector->last_jiffies)
-		return false;
-
-	if (ring_group->total_packets == 0) {
-		ring_group->coal.int_gl = HNS3_INT_GL_50K;
-		ring_group->coal.flow_level = HNS3_FLOW_LOW;
-		return true;
-	}
-
-	/* Simple throttlerate management
-	 * 0-10MB/s   lower     (50000 ints/s)
-	 * 10-20MB/s   middle    (20000 ints/s)
-	 * 20-1249MB/s high      (18000 ints/s)
-	 * > 40000pps  ultra     (8000 ints/s)
-	 */
-	new_flow_level = ring_group->coal.flow_level;
-	new_int_gl = ring_group->coal.int_gl;
+	tqp_vector = ring_group->ring->tqp_vector;
 	time_passed_ms =
 		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
-
 	if (!time_passed_ms)
 		return false;
 
@@ -2406,9 +3015,14 @@
 	do_div(ring_group->total_bytes, time_passed_ms);
 	bytes_per_msecs = ring_group->total_bytes;
 
-#define HNS3_RX_LOW_BYTE_RATE 10000
-#define HNS3_RX_MID_BYTE_RATE 20000
+	new_flow_level = ring_group->coal.flow_level;
 
+	/* Simple throttlerate management
+	 * 0-10MB/s   lower     (50000 ints/s)
+	 * 10-20MB/s   middle    (20000 ints/s)
+	 * 20-1249MB/s high      (18000 ints/s)
+	 * > 40000pps  ultra     (8000 ints/s)
+	 */
 	switch (new_flow_level) {
 	case HNS3_FLOW_LOW:
 		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
@@ -2428,13 +3042,40 @@
 		break;
 	}
 
-#define HNS3_RX_ULTRA_PACKET_RATE 40
-
 	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
 	    &tqp_vector->rx_group == ring_group)
 		new_flow_level = HNS3_FLOW_ULTRA;
 
-	switch (new_flow_level) {
+	ring_group->total_bytes = 0;
+	ring_group->total_packets = 0;
+	ring_group->coal.flow_level = new_flow_level;
+
+	return true;
+}
+
+static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+{
+	struct hns3_enet_tqp_vector *tqp_vector;
+	u16 new_int_gl;
+
+	if (!ring_group->ring)
+		return false;
+
+	tqp_vector = ring_group->ring->tqp_vector;
+	if (!tqp_vector->last_jiffies)
+		return false;
+
+	if (ring_group->total_packets == 0) {
+		ring_group->coal.int_gl = HNS3_INT_GL_50K;
+		ring_group->coal.flow_level = HNS3_FLOW_LOW;
+		return true;
+	}
+
+	if (!hns3_get_new_flow_lvl(ring_group))
+		return false;
+
+	new_int_gl = ring_group->coal.int_gl;
+	switch (ring_group->coal.flow_level) {
 	case HNS3_FLOW_LOW:
 		new_int_gl = HNS3_INT_GL_50K;
 		break;
@@ -2451,9 +3092,6 @@
 		break;
 	}
 
-	ring_group->total_bytes = 0;
-	ring_group->total_packets = 0;
-	ring_group->coal.flow_level = new_flow_level;
 	if (new_int_gl != ring_group->coal.int_gl) {
 		ring_group->coal.int_gl = new_int_gl;
 		return true;
@@ -2467,10 +3105,10 @@
 	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
 	bool rx_update, tx_update;
 
-	if (tqp_vector->int_adapt_down > 0) {
-		tqp_vector->int_adapt_down--;
+	/* update param every 1000ms */
+	if (time_before(jiffies,
+			tqp_vector->last_jiffies + msecs_to_jiffies(1000)))
 		return;
-	}
 
 	if (rx_group->coal.gl_adapt_enable) {
 		rx_update = hns3_get_new_int_gl(rx_group);
@@ -2480,36 +3118,40 @@
 	}
 
 	if (tx_group->coal.gl_adapt_enable) {
-		tx_update = hns3_get_new_int_gl(&tqp_vector->tx_group);
+		tx_update = hns3_get_new_int_gl(tx_group);
 		if (tx_update)
 			hns3_set_vector_coalesce_tx_gl(tqp_vector,
 						       tx_group->coal.int_gl);
 	}
 
 	tqp_vector->last_jiffies = jiffies;
-	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 {
+	struct hns3_nic_priv *priv = netdev_priv(napi->dev);
 	struct hns3_enet_ring *ring;
 	int rx_pkt_total = 0;
 
 	struct hns3_enet_tqp_vector *tqp_vector =
 		container_of(napi, struct hns3_enet_tqp_vector, napi);
 	bool clean_complete = true;
-	int rx_budget;
+	int rx_budget = budget;
+
+	if (unlikely(test_bit(HNS3_NIC_STATE_DOWN, &priv->state))) {
+		napi_complete(napi);
+		return 0;
+	}
 
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
-	hns3_for_each_ring(ring, tqp_vector->tx_group) {
-		if (!hns3_clean_tx_ring(ring, budget))
-			clean_complete = false;
-	}
+	hns3_for_each_ring(ring, tqp_vector->tx_group)
+		hns3_clean_tx_ring(ring);
 
 	/* make sure rx ring budget not smaller than 1 */
-	rx_budget = max(budget / tqp_vector->num_tqps, 1);
+	if (tqp_vector->num_tqps > 1)
+		rx_budget = max(budget / tqp_vector->num_tqps, 1);
 
 	hns3_for_each_ring(ring, tqp_vector->rx_group) {
 		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget,
@@ -2526,9 +3168,11 @@
 	if (!clean_complete)
 		return budget;
 
-	napi_complete(napi);
-	hns3_update_new_int_gl(tqp_vector);
-	hns3_mask_vector_irq(tqp_vector, 1);
+	if (napi_complete(napi) &&
+	    likely(!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))) {
+		hns3_update_new_int_gl(tqp_vector);
+		hns3_mask_vector_irq(tqp_vector, 1);
+	}
 
 	return rx_pkt_total;
 }
@@ -2558,7 +3202,7 @@
 			chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
 					     GFP_KERNEL);
 			if (!chain)
-				return -ENOMEM;
+				goto err_free_chain;
 
 			cur_chain->next = chain;
 			chain->tqp_index = tx_ring->tqp->tqp_index;
@@ -2588,7 +3232,7 @@
 	while (rx_ring) {
 		chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
 		if (!chain)
-			return -ENOMEM;
+			goto err_free_chain;
 
 		cur_chain->next = chain;
 		chain->tqp_index = rx_ring->tqp->tqp_index;
@@ -2603,6 +3247,17 @@
 	}
 
 	return 0;
+
+err_free_chain:
+	cur_chain = head->next;
+	while (cur_chain) {
+		chain = cur_chain->next;
+		devm_kfree(&pdev->dev, cur_chain);
+		cur_chain = chain;
+	}
+	head->next = NULL;
+
+	return -ENOMEM;
 }
 
 static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
@@ -2629,13 +3284,32 @@
 	group->count++;
 }
 
+static void hns3_nic_set_cpumask(struct hns3_nic_priv *priv)
+{
+	struct pci_dev *pdev = priv->ae_handle->pdev;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int num_vectors = priv->vector_num;
+	int numa_node;
+	int vector_i;
+
+	numa_node = dev_to_node(&pdev->dev);
+
+	for (vector_i = 0; vector_i < num_vectors; vector_i++) {
+		tqp_vector = &priv->tqp_vector[vector_i];
+		cpumask_set_cpu(cpumask_local_spread(vector_i, numa_node),
+				&tqp_vector->affinity_mask);
+	}
+}
+
 static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	int ret = 0;
-	u16 i;
+	int i;
+
+	hns3_nic_set_cpumask(priv);
 
 	for (i = 0; i < priv->vector_num; i++) {
 		tqp_vector = &priv->tqp_vector[i];
@@ -2672,7 +3346,7 @@
 		ret = hns3_get_vector_ring_chain(tqp_vector,
 						 &vector_ring_chain);
 		if (ret)
-			return ret;
+			goto map_ring_fail;
 
 		ret = h->ae_algo->ops->map_ring_to_vector(h,
 			tqp_vector->vector_irq, &vector_ring_chain);
@@ -2680,17 +3354,25 @@
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (ret)
-			return ret;
+			goto map_ring_fail;
 
 		netif_napi_add(priv->netdev, &tqp_vector->napi,
 			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
 	}
 
 	return 0;
+
+map_ring_fail:
+	while (i--)
+		netif_napi_del(&priv->tqp_vector[i].napi);
+
+	return ret;
 }
 
 static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
 {
+#define HNS3_VECTOR_PF_MAX_NUM		64
+
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	struct hnae3_vector_info *vector;
@@ -2703,11 +3385,14 @@
 	/* RSS size, cpu online and vector_num should be the same */
 	/* Should consider 2p/4p later */
 	vector_num = min_t(u16, num_online_cpus(), tqp_num);
+	vector_num = min_t(u16, vector_num, HNS3_VECTOR_PF_MAX_NUM);
+
 	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
 			      GFP_KERNEL);
 	if (!vector)
 		return -ENOMEM;
 
+	/* save the actual available vector number */
 	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
 
 	priv->vector_num = vector_num;
@@ -2738,43 +3423,36 @@
 	group->count = 0;
 }
 
-static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
+static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	int i, ret;
+	int i;
 
 	for (i = 0; i < priv->vector_num; i++) {
 		tqp_vector = &priv->tqp_vector[i];
 
-		ret = hns3_get_vector_ring_chain(tqp_vector,
-						 &vector_ring_chain);
-		if (ret)
-			return ret;
+		if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring)
+			continue;
 
-		ret = h->ae_algo->ops->unmap_ring_from_vector(h,
+		hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		h->ae_algo->ops->unmap_ring_from_vector(h,
 			tqp_vector->vector_irq, &vector_ring_chain);
-		if (ret)
-			return ret;
 
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
-		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
-			(void)irq_set_affinity_hint(
-				priv->tqp_vector[i].vector_irq,
-						    NULL);
-			free_irq(priv->tqp_vector[i].vector_irq,
-				 &priv->tqp_vector[i]);
+		if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
+			irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
+			free_irq(tqp_vector->vector_irq, tqp_vector);
+			tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
 		}
 
-		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
 		hns3_clear_ring_group(&tqp_vector->rx_group);
 		hns3_clear_ring_group(&tqp_vector->tx_group);
 		netif_napi_del(&priv->tqp_vector[i].napi);
 	}
-
-	return 0;
 }
 
 static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
@@ -2797,22 +3475,25 @@
 }
 
 static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
-			     int ring_type)
+			     unsigned int ring_type)
 {
 	struct hns3_nic_ring_data *ring_data = priv->ring_data;
 	int queue_num = priv->ae_handle->kinfo.num_tqps;
 	struct pci_dev *pdev = priv->ae_handle->pdev;
 	struct hns3_enet_ring *ring;
+	int desc_num;
 
 	ring = devm_kzalloc(&pdev->dev, sizeof(*ring), GFP_KERNEL);
 	if (!ring)
 		return -ENOMEM;
 
 	if (ring_type == HNAE3_RING_TYPE_TX) {
+		desc_num = priv->ae_handle->kinfo.num_tx_desc;
 		ring_data[q->tqp_index].ring = ring;
 		ring_data[q->tqp_index].queue_index = q->tqp_index;
 		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
 	} else {
+		desc_num = priv->ae_handle->kinfo.num_rx_desc;
 		ring_data[q->tqp_index + queue_num].ring = ring;
 		ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index;
 		ring->io_base = q->io_base;
@@ -2826,7 +3507,7 @@
 	ring->dev = priv->dev;
 	ring->desc_dma_addr = 0;
 	ring->buf_size = q->buf_size;
-	ring->desc_num = q->desc_num;
+	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
 
@@ -2843,8 +3524,10 @@
 		return ret;
 
 	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
-	if (ret)
+	if (ret) {
+		devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
 		return ret;
+	}
 
 	return 0;
 }
@@ -2871,7 +3554,14 @@
 
 	return 0;
 err:
+	while (i--) {
+		devm_kfree(priv->dev, priv->ring_data[i].ring);
+		devm_kfree(priv->dev,
+			   priv->ring_data[i + h->kinfo.num_tqps].ring);
+	}
+
 	devm_kfree(&pdev->dev, priv->ring_data);
+	priv->ring_data = NULL;
 	return ret;
 }
 
@@ -2880,12 +3570,16 @@
 	struct hnae3_handle *h = priv->ae_handle;
 	int i;
 
+	if (!priv->ring_data)
+		return;
+
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		devm_kfree(priv->dev, priv->ring_data[i].ring);
 		devm_kfree(priv->dev,
 			   priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
 	devm_kfree(priv->dev, priv->ring_data);
+	priv->ring_data = NULL;
 }
 
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
@@ -2895,8 +3589,8 @@
 	if (ring->desc_num <= 0 || ring->buf_size <= 0)
 		return -EINVAL;
 
-	ring->desc_cb = kcalloc(ring->desc_num, sizeof(ring->desc_cb[0]),
-				GFP_KERNEL);
+	ring->desc_cb = devm_kcalloc(ring_to_dev(ring), ring->desc_num,
+				     sizeof(ring->desc_cb[0]), GFP_KERNEL);
 	if (!ring->desc_cb) {
 		ret = -ENOMEM;
 		goto out;
@@ -2917,19 +3611,24 @@
 out_with_desc:
 	hns3_free_desc(ring);
 out_with_desc_cb:
-	kfree(ring->desc_cb);
+	devm_kfree(ring_to_dev(ring), ring->desc_cb);
 	ring->desc_cb = NULL;
 out:
 	return ret;
 }
 
-static void hns3_fini_ring(struct hns3_enet_ring *ring)
+void hns3_fini_ring(struct hns3_enet_ring *ring)
 {
 	hns3_free_desc(ring);
-	kfree(ring->desc_cb);
+	devm_kfree(ring_to_dev(ring), ring->desc_cb);
 	ring->desc_cb = NULL;
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
+	ring->pending_buf = 0;
+	if (ring->skb) {
+		dev_kfree_skb_any(ring->skb);
+		ring->skb = NULL;
+	}
 }
 
 static int hns3_buf_size2type(u32 buf_size)
@@ -2962,8 +3661,7 @@
 	struct hnae3_queue *q = ring->tqp;
 
 	if (!HNAE3_IS_TX_RING(ring)) {
-		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG,
-			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG, (u32)dma);
 		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG,
 			       (u32)((dma >> 31) >> 1));
 
@@ -3038,9 +3736,6 @@
 	int i;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		if (h->ae_algo->ops->reset_queue)
-			h->ae_algo->ops->reset_queue(h, i);
-
 		hns3_fini_ring(priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
@@ -3048,11 +3743,12 @@
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static void hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev, bool init)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
 	u8 mac_addr_temp[ETH_ALEN];
+	int ret = 0;
 
 	if (h->ae_algo->ops->get_mac_addr && init) {
 		h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
@@ -3067,42 +3763,91 @@
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
-		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
+		ret = h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
+	return ret;
 }
 
-static void hns3_uninit_mac_addr(struct net_device *netdev)
+static int hns3_init_phy(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	int ret = 0;
 
-	if (h->ae_algo->ops->rm_uc_addr)
-		h->ae_algo->ops->rm_uc_addr(h, netdev->dev_addr);
+	if (h->ae_algo->ops->mac_connect_phy)
+		ret = h->ae_algo->ops->mac_connect_phy(h);
+
+	return ret;
 }
 
-static void hns3_nic_set_priv_ops(struct net_device *netdev)
+static void hns3_uninit_phy(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if ((netdev->features & NETIF_F_TSO) ||
-	    (netdev->features & NETIF_F_TSO6)) {
-		priv->ops.fill_desc = hns3_fill_desc_tso;
-		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
-	} else {
-		priv->ops.fill_desc = hns3_fill_desc;
-		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
-	}
+	if (h->ae_algo->ops->mac_disconnect_phy)
+		h->ae_algo->ops->mac_disconnect_phy(h);
+}
+
+static int hns3_restore_fd_rules(struct net_device *netdev)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	int ret = 0;
+
+	if (h->ae_algo->ops->restore_fd_rules)
+		ret = h->ae_algo->ops->restore_fd_rules(h);
+
+	return ret;
+}
+
+static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (h->ae_algo->ops->del_all_fd_entries)
+		h->ae_algo->ops->del_all_fd_entries(h, clear_list);
+}
+
+static int hns3_client_start(struct hnae3_handle *handle)
+{
+	if (!handle->ae_algo->ops->client_start)
+		return 0;
+
+	return handle->ae_algo->ops->client_start(handle);
+}
+
+static void hns3_client_stop(struct hnae3_handle *handle)
+{
+	if (!handle->ae_algo->ops->client_stop)
+		return;
+
+	handle->ae_algo->ops->client_stop(handle);
+}
+
+static void hns3_info_show(struct hns3_nic_priv *priv)
+{
+	struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo;
+
+	dev_info(priv->dev, "MAC address: %pM\n", priv->netdev->dev_addr);
+	dev_info(priv->dev, "Task queue pairs numbers: %d\n", kinfo->num_tqps);
+	dev_info(priv->dev, "RSS size: %d\n", kinfo->rss_size);
+	dev_info(priv->dev, "Allocated RSS size: %d\n", kinfo->req_rss_size);
+	dev_info(priv->dev, "RX buffer length: %d\n", kinfo->rx_buf_len);
+	dev_info(priv->dev, "Desc num per TX queue: %d\n", kinfo->num_tx_desc);
+	dev_info(priv->dev, "Desc num per RX queue: %d\n", kinfo->num_rx_desc);
+	dev_info(priv->dev, "Total number of enabled TCs: %d\n", kinfo->num_tc);
+	dev_info(priv->dev, "Max mtu size: %d\n", priv->netdev->max_mtu);
 }
 
 static int hns3_client_init(struct hnae3_handle *handle)
 {
 	struct pci_dev *pdev = handle->pdev;
+	u16 alloc_tqps, max_rss_size;
 	struct hns3_nic_priv *priv;
 	struct net_device *netdev;
 	int ret;
 
-	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
-				   hns3_get_max_available_channels(handle));
+	handle->ae_algo->ops->get_tqps_and_rss_info(handle, &alloc_tqps,
+						    &max_rss_size);
+	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv), alloc_tqps);
 	if (!netdev)
 		return -ENOMEM;
 
@@ -3110,8 +3855,10 @@
 	priv->dev = &pdev->dev;
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
-	priv->ae_handle->last_reset_time = jiffies;
 	priv->tx_timeout_count = 0;
+	set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+
+	handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL);
 
 	handle->kinfo.netdev = netdev;
 	handle->priv = (void *)priv;
@@ -3125,16 +3872,10 @@
 	netdev->netdev_ops = &hns3_nic_netdev_ops;
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	hns3_ethtool_set_ops(netdev);
-	hns3_nic_set_priv_ops(netdev);
 
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
-	if (handle->flags & HNAE3_SUPPORT_VF)
-		handle->reset_level = HNAE3_VF_RESET;
-	else
-		handle->reset_level = HNAE3_FUNC_RESET;
-
 	ret = hns3_get_ring_config(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -3159,22 +3900,44 @@
 		goto out_init_ring_data;
 	}
 
+	ret = hns3_init_phy(netdev);
+	if (ret)
+		goto out_init_phy;
+
 	ret = register_netdev(netdev);
 	if (ret) {
 		dev_err(priv->dev, "probe register netdev fail!\n");
 		goto out_reg_netdev_fail;
 	}
 
+	ret = hns3_client_start(handle);
+	if (ret) {
+		dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
+		goto out_client_start;
+	}
+
 	hns3_dcbnl_setup(handle);
 
-	/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
-	netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+	hns3_dbg_init(handle);
+
+	/* MTU range: (ETH_MIN_MTU(kernel default) - 9702) */
+	netdev->max_mtu = HNS3_MAX_MTU;
+
+	set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+
+	if (netif_msg_drv(handle))
+		hns3_info_show(priv);
 
 	return ret;
 
+out_client_start:
+	unregister_netdev(netdev);
 out_reg_netdev_fail:
+	hns3_uninit_phy(netdev);
+out_init_phy:
+	hns3_uninit_all_ring(priv);
 out_init_ring_data:
-	(void)hns3_nic_uninit_vector_data(priv);
+	hns3_nic_uninit_vector_data(priv);
 out_init_vector_data:
 	hns3_nic_dealloc_vector_data(priv);
 out_alloc_vector_data:
@@ -3191,14 +3954,25 @@
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
+	hns3_remove_hw_addr(netdev);
+
 	if (netdev->reg_state != NETREG_UNINITIALIZED)
 		unregister_netdev(netdev);
 
-	hns3_force_clear_all_rx_ring(handle);
+	hns3_client_stop(handle);
 
-	ret = hns3_nic_uninit_vector_data(priv);
-	if (ret)
-		netdev_err(netdev, "uninit vector error\n");
+	hns3_uninit_phy(netdev);
+
+	if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+		netdev_warn(netdev, "already uninitialized\n");
+		goto out_netdev_free;
+	}
+
+	hns3_del_all_fd_rules(netdev, true);
+
+	hns3_clear_all_ring(handle, true);
+
+	hns3_nic_uninit_vector_data(priv);
 
 	ret = hns3_nic_dealloc_vector_data(priv);
 	if (ret)
@@ -3210,10 +3984,9 @@
 
 	hns3_put_ring_config(priv);
 
-	priv->ring_data = NULL;
+	hns3_dbg_uninit(handle);
 
-	hns3_uninit_mac_addr(netdev);
-
+out_netdev_free:
 	free_netdev(netdev);
 }
 
@@ -3227,11 +4000,13 @@
 	if (linkup) {
 		netif_carrier_on(netdev);
 		netif_tx_wake_all_queues(netdev);
-		netdev_info(netdev, "link up\n");
+		if (netif_msg_link(handle))
+			netdev_info(netdev, "link up\n");
 	} else {
 		netif_carrier_off(netdev);
 		netif_tx_stop_all_queues(netdev);
-		netdev_info(netdev, "link down\n");
+		if (netif_msg_link(handle))
+			netdev_info(netdev, "link down\n");
 	}
 }
 
@@ -3239,8 +4014,6 @@
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct net_device *ndev = kinfo->netdev;
-	bool if_running;
-	int ret;
 
 	if (tc > HNAE3_MAX_TC)
 		return -EINVAL;
@@ -3248,41 +4021,57 @@
 	if (!ndev)
 		return -ENODEV;
 
-	if_running = netif_running(ndev);
+	return hns3_nic_set_real_num_queue(ndev);
+}
 
-	if (if_running) {
-		(void)hns3_nic_net_stop(ndev);
-		msleep(100);
+static int hns3_recover_hw_addr(struct net_device *ndev)
+{
+	struct netdev_hw_addr_list *list;
+	struct netdev_hw_addr *ha, *tmp;
+	int ret = 0;
+
+	netif_addr_lock_bh(ndev);
+	/* go through and sync uc_addr entries to the device */
+	list = &ndev->uc;
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		ret = hns3_nic_uc_sync(ndev, ha->addr);
+		if (ret)
+			goto out;
 	}
 
-	ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
-		kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
-	if (ret)
-		goto err_out;
+	/* go through and sync mc_addr entries to the device */
+	list = &ndev->mc;
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		ret = hns3_nic_mc_sync(ndev, ha->addr);
+		if (ret)
+			goto out;
+	}
 
-	ret = hns3_nic_set_real_num_queue(ndev);
-
-err_out:
-	if (if_running)
-		(void)hns3_nic_net_open(ndev);
-
+out:
+	netif_addr_unlock_bh(ndev);
 	return ret;
 }
 
-static void hns3_recover_hw_addr(struct net_device *ndev)
+static void hns3_remove_hw_addr(struct net_device *netdev)
 {
 	struct netdev_hw_addr_list *list;
 	struct netdev_hw_addr *ha, *tmp;
 
-	/* go through and sync uc_addr entries to the device */
-	list = &ndev->uc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		hns3_nic_uc_sync(ndev, ha->addr);
+	hns3_nic_uc_unsync(netdev, netdev->dev_addr);
 
-	/* go through and sync mc_addr entries to the device */
-	list = &ndev->mc;
+	netif_addr_lock_bh(netdev);
+	/* go through and unsync uc_addr entries to the device */
+	list = &netdev->uc;
 	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		hns3_nic_mc_sync(ndev, ha->addr);
+		hns3_nic_uc_unsync(netdev, ha->addr);
+
+	/* go through and unsync mc_addr entries to the device */
+	list = &netdev->mc;
+	list_for_each_entry_safe(ha, tmp, &list->list, list)
+		if (ha->refcount > 1)
+			hns3_nic_mc_unsync(netdev, ha->addr);
+
+	netif_addr_unlock_bh(netdev);
 }
 
 static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
@@ -3318,12 +4107,18 @@
 					    ret);
 				return ret;
 			}
-			hns3_replace_buffer(ring, ring->next_to_use,
-					    &res_cbs);
+			hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
 		}
 		ring_ptr_move_fw(ring, next_to_use);
 	}
 
+	/* Free the pending skb in rx ring */
+	if (ring->skb) {
+		dev_kfree_skb_any(ring->skb);
+		ring->skb = NULL;
+		ring->pending_buf = 0;
+	}
+
 	return 0;
 }
 
@@ -3344,40 +4139,26 @@
 	}
 }
 
-static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h)
-{
-	struct net_device *ndev = h->kinfo.netdev;
-	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	struct hns3_enet_ring *ring;
-	u32 i;
-
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
-		hns3_force_clear_rx_ring(ring);
-	}
-}
-
-static void hns3_clear_all_ring(struct hnae3_handle *h)
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force)
 {
 	struct net_device *ndev = h->kinfo.netdev;
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
 	u32 i;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		struct netdev_queue *dev_queue;
 		struct hns3_enet_ring *ring;
 
 		ring = priv->ring_data[i].ring;
 		hns3_clear_tx_ring(ring);
-		dev_queue = netdev_get_tx_queue(ndev,
-						priv->ring_data[i].queue_index);
-		netdev_tx_reset_queue(dev_queue);
 
 		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
 		/* Continue to clear other rings even if clearing some
 		 * rings failed.
 		 */
-		hns3_clear_rx_ring(ring);
+		if (force)
+			hns3_force_clear_rx_ring(ring);
+		else
+			hns3_clear_rx_ring(ring);
 	}
 }
 
@@ -3390,7 +4171,10 @@
 	int ret;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		h->ae_algo->ops->reset_queue(h, i);
+		ret = h->ae_algo->ops->reset_queue(h, i);
+		if (ret)
+			return ret;
+
 		hns3_init_ring_hw(priv->ring_data[i].ring);
 
 		/* We need to clear tx ring here because self test will
@@ -3421,10 +4205,49 @@
 	return 0;
 }
 
+static void hns3_store_coal(struct hns3_nic_priv *priv)
+{
+	/* ethtool only support setting and querying one coal
+	 * configuration for now, so save the vector 0' coal
+	 * configuration here in order to restore it.
+	 */
+	memcpy(&priv->tx_coal, &priv->tqp_vector[0].tx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+	memcpy(&priv->rx_coal, &priv->tqp_vector[0].rx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+}
+
+static void hns3_restore_coal(struct hns3_nic_priv *priv)
+{
+	u16 vector_num = priv->vector_num;
+	int i;
+
+	for (i = 0; i < vector_num; i++) {
+		memcpy(&priv->tqp_vector[i].tx_group.coal, &priv->tx_coal,
+		       sizeof(struct hns3_enet_coalesce));
+		memcpy(&priv->tqp_vector[i].rx_group.coal, &priv->rx_coal,
+		       sizeof(struct hns3_enet_coalesce));
+	}
+}
+
 static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct net_device *ndev = kinfo->netdev;
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+
+	if (test_and_set_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
+		return 0;
+
+	/* it is cumbersome for hardware to pick-and-choose entries for deletion
+	 * from table space. Hence, for function reset software intervention is
+	 * required to delete the entries
+	 */
+	if (hns3_dev_ongoing_func_reset(ae_dev)) {
+		hns3_remove_hw_addr(ndev);
+		hns3_del_all_fd_rules(ndev, false);
+	}
 
 	if (!netif_running(ndev))
 		return 0;
@@ -3435,16 +4258,19 @@
 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
 	int ret = 0;
 
+	clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
+
 	if (netif_running(kinfo->netdev)) {
-		ret = hns3_nic_net_up(kinfo->netdev);
+		ret = hns3_nic_net_open(kinfo->netdev);
 		if (ret) {
+			set_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
 			netdev_err(kinfo->netdev,
-				   "hns net up fail, ret=%d!\n", ret);
+				   "net up fail, ret=%d!\n", ret);
 			return ret;
 		}
-		handle->last_reset_time = jiffies;
 	}
 
 	return ret;
@@ -3456,28 +4282,74 @@
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	hns3_init_mac_addr(netdev, false);
-	hns3_nic_set_rx_mode(netdev);
-	hns3_recover_hw_addr(netdev);
-
-	/* Hardware table is only clear when pf resets */
-	if (!(handle->flags & HNAE3_SUPPORT_VF))
-		hns3_restore_vlan(netdev);
-
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
-	ret = hns3_nic_init_vector_data(priv);
+	ret = hns3_get_ring_config(priv);
 	if (ret)
 		return ret;
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret)
+		goto err_put_ring;
+
+	hns3_restore_coal(priv);
+
+	ret = hns3_nic_init_vector_data(priv);
+	if (ret)
+		goto err_dealloc_vector;
+
 	ret = hns3_init_all_ring(priv);
+	if (ret)
+		goto err_uninit_vector;
+
+	ret = hns3_client_start(handle);
 	if (ret) {
-		hns3_nic_uninit_vector_data(priv);
-		priv->ring_data = NULL;
+		dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
+		goto err_uninit_ring;
 	}
 
+	set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+
 	return ret;
+
+err_uninit_ring:
+	hns3_uninit_all_ring(priv);
+err_uninit_vector:
+	hns3_nic_uninit_vector_data(priv);
+err_dealloc_vector:
+	hns3_nic_dealloc_vector_data(priv);
+err_put_ring:
+	hns3_put_ring_config(priv);
+
+	return ret;
+}
+
+static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+	bool vlan_filter_enable;
+	int ret;
+
+	ret = hns3_init_mac_addr(netdev, false);
+	if (ret)
+		return ret;
+
+	ret = hns3_recover_hw_addr(netdev);
+	if (ret)
+		return ret;
+
+	ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
+	if (ret)
+		return ret;
+
+	vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
+	hns3_enable_vlan_filter(netdev, vlan_filter_enable);
+
+	if (handle->ae_algo->ops->restore_vlan_table)
+		handle->ae_algo->ops->restore_vlan_table(handle);
+
+	return hns3_restore_fd_rules(netdev);
 }
 
 static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
@@ -3486,19 +4358,27 @@
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	hns3_force_clear_all_rx_ring(handle);
-
-	ret = hns3_nic_uninit_vector_data(priv);
-	if (ret) {
-		netdev_err(netdev, "uninit vector error\n");
-		return ret;
+	if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+		netdev_warn(netdev, "already uninitialized\n");
+		return 0;
 	}
 
+	hns3_clear_all_ring(handle, true);
+	hns3_reset_tx_queue(priv->ae_handle);
+
+	hns3_nic_uninit_vector_data(priv);
+
+	hns3_store_coal(priv);
+
+	ret = hns3_nic_dealloc_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "dealloc vector error\n");
+
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
-	hns3_uninit_mac_addr(netdev);
+	hns3_put_ring_config(priv);
 
 	return ret;
 }
@@ -3521,6 +4401,9 @@
 	case HNAE3_UNINIT_CLIENT:
 		ret = hns3_reset_notify_uninit_enet(handle);
 		break;
+	case HNAE3_RESTORE_CLIENT:
+		ret = hns3_reset_notify_restore_enet(handle);
+		break;
 	default:
 		break;
 	}
@@ -3528,140 +4411,110 @@
 	return ret;
 }
 
-static void hns3_restore_coal(struct hns3_nic_priv *priv,
-			      struct hns3_enet_coalesce *tx,
-			      struct hns3_enet_coalesce *rx)
+static int hns3_change_channels(struct hnae3_handle *handle, u32 new_tqp_num,
+				bool rxfh_configured)
 {
-	u16 vector_num = priv->vector_num;
-	int i;
-
-	for (i = 0; i < vector_num; i++) {
-		memcpy(&priv->tqp_vector[i].tx_group.coal, tx,
-		       sizeof(struct hns3_enet_coalesce));
-		memcpy(&priv->tqp_vector[i].rx_group.coal, rx,
-		       sizeof(struct hns3_enet_coalesce));
-	}
-}
-
-static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num,
-			       struct hns3_enet_coalesce *tx,
-			       struct hns3_enet_coalesce *rx)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = hns3_get_handle(netdev);
 	int ret;
 
-	ret = h->ae_algo->ops->set_channels(h, new_tqp_num);
+	ret = handle->ae_algo->ops->set_channels(handle, new_tqp_num,
+						 rxfh_configured);
+	if (ret) {
+		dev_err(&handle->pdev->dev,
+			"Change tqp num(%u) fail.\n", new_tqp_num);
+		return ret;
+	}
+
+	ret = hns3_reset_notify(handle, HNAE3_INIT_CLIENT);
 	if (ret)
 		return ret;
 
-	ret = hns3_get_ring_config(priv);
+	ret =  hns3_reset_notify(handle, HNAE3_UP_CLIENT);
 	if (ret)
-		return ret;
+		hns3_reset_notify(handle, HNAE3_UNINIT_CLIENT);
 
-	ret = hns3_nic_alloc_vector_data(priv);
-	if (ret)
-		goto err_alloc_vector;
-
-	hns3_restore_coal(priv, tx, rx);
-
-	ret = hns3_nic_init_vector_data(priv);
-	if (ret)
-		goto err_uninit_vector;
-
-	ret = hns3_init_all_ring(priv);
-	if (ret)
-		goto err_put_ring;
-
-	return 0;
-
-err_put_ring:
-	hns3_put_ring_config(priv);
-err_uninit_vector:
-	hns3_nic_uninit_vector_data(priv);
-err_alloc_vector:
-	hns3_nic_dealloc_vector_data(priv);
 	return ret;
 }
 
-static int hns3_adjust_tqps_num(u8 num_tc, u32 new_tqp_num)
-{
-	return (new_tqp_num / num_tc) * num_tc;
-}
-
 int hns3_set_channels(struct net_device *netdev,
 		      struct ethtool_channels *ch)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
-	struct hns3_enet_coalesce tx_coal, rx_coal;
-	bool if_running = netif_running(netdev);
+	bool rxfh_configured = netif_is_rxfh_configured(netdev);
 	u32 new_tqp_num = ch->combined_count;
 	u16 org_tqp_num;
 	int ret;
 
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
 	if (ch->rx_count || ch->tx_count)
 		return -EINVAL;
 
 	if (new_tqp_num > hns3_get_max_available_channels(h) ||
-	    new_tqp_num < kinfo->num_tc) {
+	    new_tqp_num < 1) {
 		dev_err(&netdev->dev,
-			"Change tqps fail, the tqp range is from %d to %d",
-			kinfo->num_tc,
+			"Change tqps fail, the tqp range is from 1 to %d",
 			hns3_get_max_available_channels(h));
 		return -EINVAL;
 	}
 
-	new_tqp_num = hns3_adjust_tqps_num(kinfo->num_tc, new_tqp_num);
-	if (kinfo->num_tqps == new_tqp_num)
+	if (kinfo->rss_size == new_tqp_num)
 		return 0;
 
-	if (if_running)
-		hns3_nic_net_stop(netdev);
+	netif_dbg(h, drv, netdev,
+		  "set channels: tqp_num=%u, rxfh=%d\n",
+		  new_tqp_num, rxfh_configured);
 
-	ret = hns3_nic_uninit_vector_data(priv);
-	if (ret) {
-		dev_err(&netdev->dev,
-			"Unbind vector with tqp fail, nothing is changed");
-		goto open_netdev;
-	}
+	ret = hns3_reset_notify(h, HNAE3_DOWN_CLIENT);
+	if (ret)
+		return ret;
 
-	/* Changing the tqp num may also change the vector num,
-	 * ethtool only support setting and querying one coal
-	 * configuation for now, so save the vector 0' coal
-	 * configuation here in order to restore it.
-	 */
-	memcpy(&tx_coal, &priv->tqp_vector[0].tx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-	memcpy(&rx_coal, &priv->tqp_vector[0].rx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-
-	hns3_nic_dealloc_vector_data(priv);
-
-	hns3_uninit_all_ring(priv);
-	hns3_put_ring_config(priv);
+	ret = hns3_reset_notify(h, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		return ret;
 
 	org_tqp_num = h->kinfo.num_tqps;
-	ret = hns3_modify_tqp_num(netdev, new_tqp_num, &tx_coal, &rx_coal);
+	ret = hns3_change_channels(h, new_tqp_num, rxfh_configured);
 	if (ret) {
-		ret = hns3_modify_tqp_num(netdev, org_tqp_num,
-					  &tx_coal, &rx_coal);
-		if (ret) {
-			/* If revert to old tqp failed, fatal error occurred */
-			dev_err(&netdev->dev,
-				"Revert to old tqp num fail, ret=%d", ret);
-			return ret;
+		int ret1;
+
+		netdev_warn(netdev,
+			    "Change channels fail, revert to old value\n");
+		ret1 = hns3_change_channels(h, org_tqp_num, rxfh_configured);
+		if (ret1) {
+			netdev_err(netdev,
+				   "revert to old channel fail\n");
+			return ret1;
 		}
-		dev_info(&netdev->dev,
-			 "Change tqp num fail, Revert to old tqp num");
+
+		return ret;
 	}
 
-open_netdev:
-	if (if_running)
-		hns3_nic_net_open(netdev);
+	return 0;
+}
 
-	return ret;
+static const struct hns3_hw_error_info hns3_hw_err[] = {
+	{ .type = HNAE3_PPU_POISON_ERROR,
+	  .msg = "PPU poison" },
+	{ .type = HNAE3_CMDQ_ECC_ERROR,
+	  .msg = "IMP CMDQ error" },
+	{ .type = HNAE3_IMP_RD_POISON_ERROR,
+	  .msg = "IMP RD poison" },
+};
+
+static void hns3_process_hw_error(struct hnae3_handle *handle,
+				  enum hnae3_hw_error_type type)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_hw_err); i++) {
+		if (hns3_hw_err[i].type == type) {
+			dev_err(&handle->pdev->dev, "Detected %s!\n",
+				hns3_hw_err[i].msg);
+			break;
+		}
+	}
 }
 
 static const struct hnae3_client_ops client_ops = {
@@ -3670,6 +4523,7 @@
 	.link_status_change = hns3_link_status_change,
 	.setup_tc = hns3_client_setup_tc,
 	.reset_notify = hns3_reset_notify,
+	.process_hw_error = hns3_process_hw_error,
 };
 
 /* hns3_init_module - Driver registration routine
@@ -3691,15 +4545,23 @@
 
 	INIT_LIST_HEAD(&client.node);
 
+	hns3_dbg_register_debugfs(hns3_driver_name);
+
 	ret = hnae3_register_client(&client);
 	if (ret)
-		return ret;
+		goto err_reg_client;
 
 	ret = pci_register_driver(&hns3_driver);
 	if (ret)
-		hnae3_unregister_client(&client);
+		goto err_reg_driver;
 
 	return ret;
+
+err_reg_driver:
+	hnae3_unregister_client(&client);
+err_reg_client:
+	hns3_dbg_unregister_debugfs();
+	return ret;
 }
 module_init(hns3_init_module);
 
@@ -3711,6 +4573,7 @@
 {
 	pci_unregister_driver(&hns3_driver);
 	hnae3_unregister_client(&client);
+	hns3_dbg_unregister_debugfs();
 }
 module_exit(hns3_exit_module);
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index cb450d7..5d468ed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HNS3_ENET_H
@@ -15,7 +15,7 @@
 enum hns3_nic_state {
 	HNS3_NIC_STATE_TESTING,
 	HNS3_NIC_STATE_RESETTING,
-	HNS3_NIC_STATE_REINITING,
+	HNS3_NIC_STATE_INITED,
 	HNS3_NIC_STATE_DOWN,
 	HNS3_NIC_STATE_DISABLED,
 	HNS3_NIC_STATE_REMOVING,
@@ -42,12 +42,14 @@
 #define HNS3_RING_TX_RING_HEAD_REG		0x0005C
 #define HNS3_RING_TX_RING_FBDNUM_REG		0x00060
 #define HNS3_RING_TX_RING_OFFSET_REG		0x00064
+#define HNS3_RING_TX_RING_EBDNUM_REG		0x00068
 #define HNS3_RING_TX_RING_PKTNUM_RECORD_REG	0x0006C
-
+#define HNS3_RING_TX_RING_EBD_OFFSET_REG	0x00070
+#define HNS3_RING_TX_RING_BD_ERR_REG		0x00074
 #define HNS3_RING_PREFETCH_EN_REG		0x0007C
 #define HNS3_RING_CFG_VF_NUM_REG		0x00080
 #define HNS3_RING_ASID_REG			0x0008C
-#define HNS3_RING_RX_VM_REG			0x00090
+#define HNS3_RING_EN_REG			0x00090
 #define HNS3_RING_T0_BE_RST			0x00094
 #define HNS3_RING_COULD_BE_RST			0x00098
 #define HNS3_RING_WRR_WEIGHT_REG		0x0009c
@@ -73,10 +75,13 @@
 #define HNS3_TX_TIMEOUT (5 * HZ)
 #define HNS3_RING_NAME_LEN			16
 #define HNS3_BUFFER_SIZE_2048			2048
-#define HNS3_RING_MAX_PENDING			32768
-#define HNS3_RING_MIN_PENDING			8
+#define HNS3_RING_MAX_PENDING			32760
+#define HNS3_RING_MIN_PENDING			24
 #define HNS3_RING_BD_MULTIPLE			8
-#define HNS3_MAX_MTU				9728
+/* max frame size of mac */
+#define HNS3_MAC_MAX_FRAME			9728
+#define HNS3_MAX_MTU \
+	(HNS3_MAC_MAX_FRAME - (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN))
 
 #define HNS3_BD_SIZE_512_TYPE			0
 #define HNS3_BD_SIZE_1024_TYPE			1
@@ -109,6 +114,10 @@
 #define HNS3_RXD_DOI_B				21
 #define HNS3_RXD_OL3E_B				22
 #define HNS3_RXD_OL4E_B				23
+#define HNS3_RXD_GRO_COUNT_S			24
+#define HNS3_RXD_GRO_COUNT_M			(0x3f << HNS3_RXD_GRO_COUNT_S)
+#define HNS3_RXD_GRO_FIXID_B			30
+#define HNS3_RXD_GRO_ECN_B			31
 
 #define HNS3_RXD_ODMAC_S			0
 #define HNS3_RXD_ODMAC_M			(0x3 << HNS3_RXD_ODMAC_S)
@@ -135,9 +144,8 @@
 #define HNS3_RXD_TSIND_S			12
 #define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
 #define HNS3_RXD_LKBK_B				15
-#define HNS3_RXD_HDL_S				16
-#define HNS3_RXD_HDL_M				(0x7ff << HNS3_RXD_HDL_S)
-#define HNS3_RXD_HSIND_B			31
+#define HNS3_RXD_GRO_SIZE_S			16
+#define HNS3_RXD_GRO_SIZE_M			(0x3fff << HNS3_RXD_GRO_SIZE_S)
 
 #define HNS3_TXD_L3T_S				0
 #define HNS3_TXD_L3T_M				(0x3 << HNS3_TXD_L3T_S)
@@ -178,6 +186,8 @@
 #define HNS3_TXD_MSS_S				0
 #define HNS3_TXD_MSS_M				(0x3fff << HNS3_TXD_MSS_S)
 
+#define HNS3_TX_LAST_SIZE_M                    0xffff
+
 #define HNS3_VECTOR_TX_IRQ			BIT_ULL(0)
 #define HNS3_VECTOR_RX_IRQ			BIT_ULL(1)
 
@@ -185,7 +195,8 @@
 #define HNS3_VECTOR_INITED			1
 
 #define HNS3_MAX_BD_SIZE			65535
-#define HNS3_MAX_BD_PER_FRAG			8
+#define HNS3_MAX_BD_NUM_NORMAL			8
+#define HNS3_MAX_BD_NUM_TSO			63
 #define HNS3_MAX_BD_PER_PKT			MAX_SKB_FRAGS
 
 #define HNS3_VECTOR_GL0_OFFSET			0x100
@@ -194,6 +205,15 @@
 #define HNS3_VECTOR_RL_OFFSET			0x900
 #define HNS3_VECTOR_RL_EN_B			6
 
+#define HNS3_RING_EN_B				0
+
+enum hns3_pkt_l2t_type {
+	HNS3_L2_TYPE_UNICAST,
+	HNS3_L2_TYPE_MULTICAST,
+	HNS3_L2_TYPE_BROADCAST,
+	HNS3_L2_TYPE_INVALID,
+};
+
 enum hns3_pkt_l3t_type {
 	HNS3_L3T_NONE,
 	HNS3_L3T_IPV6,
@@ -282,7 +302,7 @@
 	dma_addr_t dma; /* dma address of this desc */
 	void *buf;      /* cpu addr for a desc */
 
-	/* priv data for the desc, e.g. skb when use with ip stack*/
+	/* priv data for the desc, e.g. skb when use with ip stack */
 	void *priv;
 	u32 page_offset;
 	u32 length;     /* length of the buffer */
@@ -305,11 +325,11 @@
 	HNS3_L3_TYPE_MAC_PAUSE,
 	HNS3_L3_TYPE_PFC_PAUSE,/* 0x9*/
 
-	/* reserved for 0xA~0xB*/
+	/* reserved for 0xA~0xB */
 
 	HNS3_L3_TYPE_CNM = 0xc,
 
-	/* reserved for 0xD~0xE*/
+	/* reserved for 0xD~0xE */
 
 	HNS3_L3_TYPE_PARSE_FAIL	= 0xf /* must be last */
 };
@@ -334,7 +354,7 @@
 	HNS3_OL3_TYPE_IPV4_OPT = 4,
 	HNS3_OL3_TYPE_IPV6_EXT,
 
-	/* reserved for 0x6~0xE*/
+	/* reserved for 0x6~0xE */
 
 	HNS3_OL3_TYPE_PARSE_FAIL = 0xf	/* must be last */
 };
@@ -357,6 +377,11 @@
 			u64 tx_err_cnt;
 			u64 restart_queue;
 			u64 tx_busy;
+			u64 tx_copy;
+			u64 tx_vlan_err;
+			u64 tx_l4_proto_err;
+			u64 tx_l2l3l4_err;
+			u64 tx_tso_err;
 		};
 		struct {
 			u64 rx_pkts;
@@ -364,10 +389,11 @@
 			u64 rx_err_cnt;
 			u64 reuse_pg_cnt;
 			u64 err_pkt_len;
-			u64 non_vld_descs;
 			u64 err_bd_num;
 			u64 l2_err;
 			u64 l3l4_csum_err;
+			u64 rx_multicast;
+			u64 non_reuse_pg;
 		};
 	};
 };
@@ -379,7 +405,6 @@
 	struct hns3_enet_ring *next;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	struct hnae3_queue *tqp;
-	char ring_name[HNS3_RING_NAME_LEN];
 	struct device *dev; /* will be used for DMA mapping of descriptors */
 
 	/* statistic */
@@ -389,9 +414,6 @@
 	dma_addr_t desc_dma_addr;
 	u32 buf_size;       /* size for hnae_desc->addr, preset by AE */
 	u16 desc_num;       /* total number of desc */
-	u16 max_desc_num_per_pkt;
-	u16 max_raw_data_sz_per_desc;
-	u16 max_pkt_size;
 	int next_to_use;    /* idx of next spare desc */
 
 	/* idx of lastest sent desc, the ring is empty when equal to
@@ -399,11 +421,15 @@
 	 */
 	int next_to_clean;
 
-	u32 flag;          /* ring attribute */
-	int irq_init_flag;
+	u32 pull_len; /* head length for current packet */
+	u32 frag_num;
+	unsigned char *va; /* first buffer address for current packet */
 
-	int numa_node;
-	cpumask_t affinity_mask;
+	u32 flag;          /* ring attribute */
+
+	int pending_buf;
+	struct sk_buff *skb;
+	struct sk_buff *tail_skb;
 };
 
 struct hns_queue;
@@ -417,15 +443,6 @@
 	void (*fini_process)(struct hns3_nic_ring_data *);
 };
 
-struct hns3_nic_ops {
-	int (*fill_desc)(struct hns3_enet_ring *ring, void *priv,
-			 int size, dma_addr_t dma, int frag_end,
-			 enum hns_desc_type type);
-	int (*maybe_stop_tx)(struct sk_buff **out_skb,
-			     int *bnum, struct hns3_enet_ring *ring);
-	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
-};
-
 enum hns3_flow_level_range {
 	HNS3_FLOW_LOW = 0,
 	HNS3_FLOW_MID = 1,
@@ -433,25 +450,6 @@
 	HNS3_FLOW_ULTRA = 3,
 };
 
-enum hns3_link_mode_bits {
-	HNS3_LM_FIBRE_BIT = BIT(0),
-	HNS3_LM_AUTONEG_BIT = BIT(1),
-	HNS3_LM_TP_BIT = BIT(2),
-	HNS3_LM_PAUSE_BIT = BIT(3),
-	HNS3_LM_BACKPLANE_BIT = BIT(4),
-	HNS3_LM_10BASET_HALF_BIT = BIT(5),
-	HNS3_LM_10BASET_FULL_BIT = BIT(6),
-	HNS3_LM_100BASET_HALF_BIT = BIT(7),
-	HNS3_LM_100BASET_FULL_BIT = BIT(8),
-	HNS3_LM_1000BASET_FULL_BIT = BIT(9),
-	HNS3_LM_10000BASEKR_FULL_BIT = BIT(10),
-	HNS3_LM_25000BASEKR_FULL_BIT = BIT(11),
-	HNS3_LM_40000BASELR4_FULL_BIT = BIT(12),
-	HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13),
-	HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14),
-	HNS3_LM_COUNT = 15
-};
-
 #define HNS3_INT_GL_MAX			0x1FE0
 #define HNS3_INT_GL_50K			0x0014
 #define HNS3_INT_GL_20K			0x0032
@@ -461,8 +459,6 @@
 #define HNS3_INT_RL_MAX			0x00EC
 #define HNS3_INT_RL_ENABLE_MASK		0x40
 
-#define HNS3_INT_ADAPT_DOWN_START	100
-
 struct hns3_enet_coalesce {
 	u16 int_gl;
 	u8 gl_adapt_enable;
@@ -491,12 +487,12 @@
 	struct hns3_enet_ring_group rx_group;
 	struct hns3_enet_ring_group tx_group;
 
+	cpumask_t affinity_mask;
 	u16 num_tqps;	/* total number of tqps in TQP vector */
+	struct irq_affinity_notify affinity_notify;
 
 	char name[HNAE3_INT_NAME_LEN];
 
-	/* when 0 should adjust interrupt coalesce parameter */
-	u8 int_adapt_down;
 	unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
@@ -517,7 +513,6 @@
 	u32 port_id;
 	struct net_device *netdev;
 	struct device *dev;
-	struct hns3_nic_ops ops;
 
 	/**
 	 * the cb for nic to manage the ring buffer, the first half of the
@@ -540,7 +535,8 @@
 	struct notifier_block notifier_block;
 	/* Vxlan/Geneve information */
 	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	struct hns3_enet_coalesce tx_coal;
+	struct hns3_enet_coalesce rx_coal;
 };
 
 union l3_hdr_info {
@@ -552,21 +548,25 @@
 union l4_hdr_info {
 	struct tcphdr *tcp;
 	struct udphdr *udp;
+	struct gre_base_hdr *gre;
 	unsigned char *hdr;
 };
 
-/* the distance between [begin, end) in a ring buffer
- * note: there is a unuse slot between the begin and the end
- */
-static inline int ring_dist(struct hns3_enet_ring *ring, int begin, int end)
-{
-	return (end - begin + ring->desc_num) % ring->desc_num;
-}
+struct hns3_hw_error_info {
+	enum hnae3_hw_error_type type;
+	const char *msg;
+};
 
 static inline int ring_space(struct hns3_enet_ring *ring)
 {
-	return ring->desc_num -
-		ring_dist(ring, ring->next_to_clean, ring->next_to_use) - 1;
+	/* This smp_load_acquire() pairs with smp_store_release() in
+	 * hns3_nic_reclaim_one_desc called by hns3_clean_tx_ring.
+	 */
+	int begin = smp_load_acquire(&ring->next_to_clean);
+	int end = READ_ONCE(ring->next_to_use);
+
+	return ((end >= begin) ? (ring->desc_num - end + begin) :
+			(begin - end)) - 1;
 }
 
 static inline int is_ring_empty(struct hns3_enet_ring *ring)
@@ -574,6 +574,11 @@
 	return ring->next_to_use == ring->next_to_clean;
 }
 
+static inline u32 hns3_read_reg(void __iomem *base, u32 reg)
+{
+	return readl(base + reg);
+}
+
 static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 {
 	u8 __iomem *reg_addr = READ_ONCE(base);
@@ -581,22 +586,50 @@
 	writel(value, reg_addr + reg);
 }
 
+static inline bool hns3_dev_ongoing_func_reset(struct hnae3_ae_dev *ae_dev)
+{
+	return (ae_dev && (ae_dev->reset_type == HNAE3_FUNC_RESET ||
+			   ae_dev->reset_type == HNAE3_FLR_RESET ||
+			   ae_dev->reset_type == HNAE3_VF_FUNC_RESET ||
+			   ae_dev->reset_type == HNAE3_VF_FULL_RESET ||
+			   ae_dev->reset_type == HNAE3_VF_PF_FUNC_RESET));
+}
+
+#define hns3_read_dev(a, reg) \
+	hns3_read_reg((a)->io_base, (reg))
+
+static inline bool hns3_nic_resetting(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	return test_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
+}
+
 #define hns3_write_dev(a, reg, value) \
 	hns3_write_reg((a)->io_base, (reg), (value))
 
 #define hnae3_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
 		(tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
 
-#define ring_to_dev(ring) (&(ring)->tqp->handle->pdev->dev)
+#define ring_to_dev(ring) ((ring)->dev)
 
 #define ring_to_dma_dir(ring) (HNAE3_IS_TX_RING(ring) ? \
 	DMA_TO_DEVICE : DMA_FROM_DEVICE)
 
 #define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
 
-#define hnae3_buf_size(_ring) ((_ring)->buf_size)
-#define hnae3_page_order(_ring) (get_order(hnae3_buf_size(_ring)))
-#define hnae3_page_size(_ring) (PAGE_SIZE << hnae3_page_order(_ring))
+#define hns3_buf_size(_ring) ((_ring)->buf_size)
+
+static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->buf_size > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define hns3_page_size(_ring) (PAGE_SIZE << hns3_page_order(_ring))
 
 /* iterator for handling rings in ring group */
 #define hns3_for_each_ring(pos, head) \
@@ -615,11 +648,13 @@
 int hns3_set_channels(struct net_device *netdev,
 		      struct ethtool_channels *ch);
 
-bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+void hns3_clean_tx_ring(struct hns3_enet_ring *ring);
 int hns3_init_all_ring(struct hns3_nic_priv *priv);
 int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
 int hns3_nic_reset_all_ring(struct hnae3_handle *h);
+void hns3_fini_ring(struct hns3_enet_ring *ring);
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
+bool hns3_is_phys_func(struct pci_dev *pdev);
 int hns3_clean_rx_ring(
 		struct hns3_enet_ring *ring, int budget,
 		void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *));
@@ -631,10 +666,17 @@
 void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 				 u32 rl_value);
 
+void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
+
 #ifdef CONFIG_HNS3_DCB
 void hns3_dcbnl_setup(struct hnae3_handle *handle);
 #else
 static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
 #endif
 
+void hns3_dbg_init(struct hnae3_handle *handle);
+void hns3_dbg_uninit(struct hnae3_handle *handle);
+void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
+void hns3_dbg_unregister_debugfs(void);
 #endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 9684ad0..52c9d20 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c

@@ -22,13 +22,18 @@
 static const struct hns3_stats hns3_txq_stats[] = {
 	/* Tx per-queue statistics */
 	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
-	HNS3_TQP_STAT("tx_dropped", sw_err_cnt),
+	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", tx_pkts),
 	HNS3_TQP_STAT("bytes", tx_bytes),
 	HNS3_TQP_STAT("errors", tx_err_cnt),
-	HNS3_TQP_STAT("tx_wake", restart_queue),
-	HNS3_TQP_STAT("tx_busy", tx_busy),
+	HNS3_TQP_STAT("wake", restart_queue),
+	HNS3_TQP_STAT("busy", tx_busy),
+	HNS3_TQP_STAT("copy", tx_copy),
+	HNS3_TQP_STAT("vlan_err", tx_vlan_err),
+	HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err),
+	HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err),
+	HNS3_TQP_STAT("tso_err", tx_tso_err),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
@@ -36,41 +41,39 @@
 static const struct hns3_stats hns3_rxq_stats[] = {
 	/* Rx per-queue statistics */
 	HNS3_TQP_STAT("io_err_cnt", io_err_cnt),
-	HNS3_TQP_STAT("rx_dropped", sw_err_cnt),
+	HNS3_TQP_STAT("dropped", sw_err_cnt),
 	HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt),
 	HNS3_TQP_STAT("packets", rx_pkts),
 	HNS3_TQP_STAT("bytes", rx_bytes),
 	HNS3_TQP_STAT("errors", rx_err_cnt),
 	HNS3_TQP_STAT("reuse_pg_cnt", reuse_pg_cnt),
 	HNS3_TQP_STAT("err_pkt_len", err_pkt_len),
-	HNS3_TQP_STAT("non_vld_descs", non_vld_descs),
 	HNS3_TQP_STAT("err_bd_num", err_bd_num),
 	HNS3_TQP_STAT("l2_err", l2_err),
 	HNS3_TQP_STAT("l3l4_csum_err", l3l4_csum_err),
+	HNS3_TQP_STAT("multicast", rx_multicast),
+	HNS3_TQP_STAT("non_reuse_pg", non_reuse_pg),
 };
 
 #define HNS3_RXQ_STATS_COUNT ARRAY_SIZE(hns3_rxq_stats)
 
 #define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT)
 
-#define HNS3_SELF_TEST_TYPE_NUM		2
+#define HNS3_SELF_TEST_TYPE_NUM         4
 #define HNS3_NIC_LB_TEST_PKT_NUM	1
 #define HNS3_NIC_LB_TEST_RING_ID	0
 #define HNS3_NIC_LB_TEST_PACKET_SIZE	128
+#define HNS3_NIC_LB_SETUP_USEC		10000
 
 /* Nic loopback test err  */
 #define HNS3_NIC_LB_TEST_NO_MEM_ERR	1
 #define HNS3_NIC_LB_TEST_TX_CNT_ERR	2
 #define HNS3_NIC_LB_TEST_RX_CNT_ERR	3
 
-struct hns3_link_mode_mapping {
-	u32 hns3_link_mode;
-	u32 ethtool_link_mode;
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
+	bool vlan_filter_enable;
 	int ret;
 
 	if (!h->ae_algo->ops->set_loopback ||
@@ -78,8 +81,10 @@
 		return -EOPNOTSUPP;
 
 	switch (loop) {
-	case HNAE3_MAC_INTER_LOOP_SERDES:
-	case HNAE3_MAC_INTER_LOOP_MAC:
+	case HNAE3_LOOP_SERIAL_SERDES:
+	case HNAE3_LOOP_PARALLEL_SERDES:
+	case HNAE3_LOOP_APP:
+	case HNAE3_LOOP_PHY:
 		ret = h->ae_algo->ops->set_loopback(h, loop, en);
 		break;
 	default:
@@ -87,10 +92,17 @@
 		break;
 	}
 
-	if (ret)
+	if (ret || h->pdev->revision >= 0x21)
 		return ret;
 
-	h->ae_algo->ops->set_promisc_mode(h, en, en);
+	if (en) {
+		h->ae_algo->ops->set_promisc_mode(h, true, true);
+	} else {
+		/* recover promisc mode before loopback test */
+		hns3_update_promisc_mode(ndev, h->netdev_flags);
+		vlan_filter_enable = ndev->flags & IFF_PROMISC ? false : true;
+		hns3_enable_vlan_filter(ndev, vlan_filter_enable);
+	}
 
 	return ret;
 }
@@ -100,49 +112,37 @@
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 	int ret;
 
-	if (!h->ae_algo->ops->start)
-		return -EOPNOTSUPP;
-
 	ret = hns3_nic_reset_all_ring(h);
 	if (ret)
 		return ret;
 
-	ret = h->ae_algo->ops->start(h);
-	if (ret) {
-		netdev_err(ndev,
-			   "hns3_lb_up ae start return error: %d\n", ret);
-		return ret;
-	}
-
 	ret = hns3_lp_setup(ndev, loop_mode, true);
-	usleep_range(10000, 20000);
+	usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2);
 
 	return ret;
 }
 
 static int hns3_lp_down(struct net_device *ndev, enum hnae3_loop loop_mode)
 {
-	struct hnae3_handle *h = hns3_get_handle(ndev);
 	int ret;
 
-	if (!h->ae_algo->ops->stop)
-		return -EOPNOTSUPP;
-
 	ret = hns3_lp_setup(ndev, loop_mode, false);
 	if (ret) {
 		netdev_err(ndev, "lb_setup return error: %d\n", ret);
 		return ret;
 	}
 
-	h->ae_algo->ops->stop(h);
-	usleep_range(10000, 20000);
+	usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2);
 
 	return 0;
 }
 
 static void hns3_lp_setup_skb(struct sk_buff *skb)
 {
+#define	HNS3_NIC_LB_DST_MAC_ADDR	0x1f
+
 	struct net_device *ndev = skb->dev;
+	struct hnae3_handle *handle;
 	unsigned char *packet;
 	struct ethhdr *ethh;
 	unsigned int i;
@@ -152,6 +152,15 @@
 	packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE);
 
 	memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN);
+
+	/* The dst mac addr of loopback packet is the same as the host'
+	 * mac addr, the SSU component may loop back the packet to host
+	 * before the packet reaches mac or serdes, which will defect
+	 * the purpose of mac or serdes selftest.
+	 */
+	handle = hns3_get_handle(ndev);
+	if (handle->pdev->revision == 0x20)
+		ethh->h_dest[5] += HNS3_NIC_LB_DST_MAC_ADDR;
 	eth_zero_addr(ethh->h_source);
 	ethh->h_proto = htons(ETH_P_ARP);
 	skb_reset_mac_header(skb);
@@ -214,7 +223,7 @@
 	for (i = start_ringid; i <= end_ringid; i++) {
 		struct hns3_enet_ring *ring = priv->ring_data[i].ring;
 
-		hns3_clean_tx_ring(ring, budget);
+		hns3_clean_tx_ring(ring);
 	}
 }
 
@@ -245,11 +254,13 @@
 
 		skb_get(skb);
 		tx_ret = hns3_nic_net_xmit(skb, ndev);
-		if (tx_ret == NETDEV_TX_OK)
+		if (tx_ret == NETDEV_TX_OK) {
 			good_cnt++;
-		else
+		} else {
+			kfree_skb(skb);
 			netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n",
 				   tx_ret);
+		}
 	}
 	if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
 		ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR;
@@ -296,17 +307,33 @@
 	int test_index = 0;
 	u32 i;
 
+	if (hns3_nic_resetting(ndev)) {
+		netdev_err(ndev, "dev resetting!");
+		return;
+	}
+
 	/* Only do offline selftest, or pass by default */
 	if (eth_test->flags != ETH_TEST_FL_OFFLINE)
 		return;
 
-	st_param[HNAE3_MAC_INTER_LOOP_MAC][0] = HNAE3_MAC_INTER_LOOP_MAC;
-	st_param[HNAE3_MAC_INTER_LOOP_MAC][1] =
-			h->flags & HNAE3_SUPPORT_MAC_LOOPBACK;
+	netif_dbg(h, drv, ndev, "self test start");
 
-	st_param[HNAE3_MAC_INTER_LOOP_SERDES][0] = HNAE3_MAC_INTER_LOOP_SERDES;
-	st_param[HNAE3_MAC_INTER_LOOP_SERDES][1] =
-			h->flags & HNAE3_SUPPORT_SERDES_LOOPBACK;
+	st_param[HNAE3_LOOP_APP][0] = HNAE3_LOOP_APP;
+	st_param[HNAE3_LOOP_APP][1] =
+			h->flags & HNAE3_SUPPORT_APP_LOOPBACK;
+
+	st_param[HNAE3_LOOP_SERIAL_SERDES][0] = HNAE3_LOOP_SERIAL_SERDES;
+	st_param[HNAE3_LOOP_SERIAL_SERDES][1] =
+			h->flags & HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
+
+	st_param[HNAE3_LOOP_PARALLEL_SERDES][0] =
+			HNAE3_LOOP_PARALLEL_SERDES;
+	st_param[HNAE3_LOOP_PARALLEL_SERDES][1] =
+			h->flags & HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
+
+	st_param[HNAE3_LOOP_PHY][0] = HNAE3_LOOP_PHY;
+	st_param[HNAE3_LOOP_PHY][1] =
+			h->flags & HNAE3_SUPPORT_PHY_LOOPBACK;
 
 	if (if_running)
 		ndev->netdev_ops->ndo_stop(ndev);
@@ -319,6 +346,13 @@
 		h->ae_algo->ops->enable_vlan_filter(h, false);
 #endif
 
+	/* Tell firmware to stop mac autoneg before loopback test start,
+	 * otherwise loopback test may be failed when the port is still
+	 * negotiating.
+	 */
+	if (h->ae_algo->ops->halt_autoneg)
+		h->ae_algo->ops->halt_autoneg(h, true);
+
 	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
 	for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) {
@@ -328,10 +362,10 @@
 			continue;
 
 		data[test_index] = hns3_lp_up(ndev, loop_type);
-		if (!data[test_index]) {
+		if (!data[test_index])
 			data[test_index] = hns3_lp_run_test(ndev, loop_type);
-			hns3_lp_down(ndev, loop_type);
-		}
+
+		hns3_lp_down(ndev, loop_type);
 
 		if (data[test_index])
 			eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -341,6 +375,9 @@
 
 	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+	if (h->ae_algo->ops->halt_autoneg)
+		h->ae_algo->ops->halt_autoneg(h, false);
+
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	if (dis_vlan_filter)
 		h->ae_algo->ops->enable_vlan_filter(h, true);
@@ -348,6 +385,8 @@
 
 	if (if_running)
 		ndev->netdev_ops->ndo_open(ndev);
+
+	netif_dbg(h, drv, ndev, "self test end\n");
 }
 
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
@@ -365,9 +404,10 @@
 
 	case ETH_SS_TEST:
 		return ops->get_sset_count(h, stringset);
-	}
 
-	return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats,
@@ -383,7 +423,7 @@
 			data[ETH_GSTRING_LEN - 1] = '\0';
 
 			/* first, prepend the prefix string */
-			n1 = snprintf(data, MAX_PREFIX_SIZE, "%s#%d_",
+			n1 = snprintf(data, MAX_PREFIX_SIZE, "%s%d_",
 				      prefix, i);
 			n1 = min_t(uint, n1, MAX_PREFIX_SIZE - 1);
 			size_left = (ETH_GSTRING_LEN - 1) - n1;
@@ -426,11 +466,13 @@
 	switch (stringset) {
 	case ETH_SS_STATS:
 		buff = hns3_get_strings_tqps(h, buff);
-		h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff);
+		ops->get_strings(h, stringset, (u8 *)buff);
 		break;
 	case ETH_SS_TEST:
 		ops->get_strings(h, stringset, data);
 		break;
+	default:
+		break;
 	}
 }
 
@@ -474,6 +516,11 @@
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u64 *p = data;
 
+	if (hns3_nic_resetting(netdev)) {
+		netdev_err(netdev, "dev resetting, could not get stats\n");
+		return;
+	}
+
 	if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) {
 		netdev_err(netdev, "could not get any statistics\n");
 		return;
@@ -493,6 +540,12 @@
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
+	u32 fw_version;
+
+	if (!h->ae_algo->ops->get_fw_version) {
+		netdev_err(netdev, "could not get fw version!\n");
+		return;
+	}
 
 	strncpy(drvinfo->version, hns3_driver_version,
 		sizeof(drvinfo->version));
@@ -506,15 +559,25 @@
 		sizeof(drvinfo->bus_info));
 	drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0';
 
-	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "0x%08x",
-		 priv->ae_handle->ae_algo->ops->get_fw_version(h));
+	fw_version = priv->ae_handle->ae_algo->ops->get_fw_version(h);
+
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%lu.%lu.%lu.%lu",
+		 hnae3_get_field(fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
+				 HNAE3_FW_VERSION_BYTE3_SHIFT),
+		 hnae3_get_field(fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
+				 HNAE3_FW_VERSION_BYTE2_SHIFT),
+		 hnae3_get_field(fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
+				 HNAE3_FW_VERSION_BYTE1_SHIFT),
+		 hnae3_get_field(fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
+				 HNAE3_FW_VERSION_BYTE0_SHIFT));
 }
 
 static u32 hns3_get_link(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
+	if (h->ae_algo->ops->get_status)
 		return h->ae_algo->ops->get_status(h);
 	else
 		return 0;
@@ -527,6 +590,11 @@
 	struct hnae3_handle *h = priv->ae_handle;
 	int queue_num = h->kinfo.num_tqps;
 
+	if (hns3_nic_resetting(netdev)) {
+		netdev_err(netdev, "dev resetting!");
+		return;
+	}
+
 	param->tx_max_pending = HNS3_RING_MAX_PENDING;
 	param->rx_max_pending = HNS3_RING_MAX_PENDING;
 
@@ -539,7 +607,7 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
+	if (h->ae_algo->ops->get_pauseparam)
 		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
 			&param->rx_pause, &param->tx_pause);
 }
@@ -549,6 +617,10 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
+	netif_dbg(h, drv, netdev,
+		  "set pauseparam: autoneg=%u, rx:%u, tx:%u\n",
+		  param->autoneg, param->rx_pause, param->tx_pause);
+
 	if (h->ae_algo->ops->set_pauseparam)
 		return h->ae_algo->ops->set_pauseparam(h, param->autoneg,
 						       param->rx_pause,
@@ -556,30 +628,77 @@
 	return -EOPNOTSUPP;
 }
 
+static void hns3_get_ksettings(struct hnae3_handle *h,
+			       struct ethtool_link_ksettings *cmd)
+{
+	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+
+	/* 1.auto_neg & speed & duplex from cmd */
+	if (ops->get_ksettings_an_result)
+		ops->get_ksettings_an_result(h,
+					     &cmd->base.autoneg,
+					     &cmd->base.speed,
+					     &cmd->base.duplex);
+
+	/* 2.get link mode */
+	if (ops->get_link_mode)
+		ops->get_link_mode(h,
+				   cmd->link_modes.supported,
+				   cmd->link_modes.advertising);
+
+	/* 3.mdix_ctrl&mdix get from phy reg */
+	if (ops->get_mdix_mode)
+		ops->get_mdix_mode(h, &cmd->base.eth_tp_mdix_ctrl,
+				   &cmd->base.eth_tp_mdix);
+}
+
 static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
-	u32 flowctrl_adv = 0;
+	const struct hnae3_ae_ops *ops;
+	u8 module_type;
+	u8 media_type;
 	u8 link_stat;
 
-	if (!h->ae_algo || !h->ae_algo->ops)
+	ops = h->ae_algo->ops;
+	if (ops->get_media_type)
+		ops->get_media_type(h, &media_type, &module_type);
+	else
 		return -EOPNOTSUPP;
 
-	/* 1.auto_neg & speed & duplex from cmd */
-	if (netdev->phydev) {
-		phy_ethtool_ksettings_get(netdev->phydev, cmd);
+	switch (media_type) {
+	case HNAE3_MEDIA_TYPE_NONE:
+		cmd->base.port = PORT_NONE;
+		hns3_get_ksettings(h, cmd);
+		break;
+	case HNAE3_MEDIA_TYPE_FIBER:
+		if (module_type == HNAE3_MODULE_TYPE_CR)
+			cmd->base.port = PORT_DA;
+		else
+			cmd->base.port = PORT_FIBRE;
 
+		hns3_get_ksettings(h, cmd);
+		break;
+	case HNAE3_MEDIA_TYPE_BACKPLANE:
+		cmd->base.port = PORT_NONE;
+		hns3_get_ksettings(h, cmd);
+		break;
+	case HNAE3_MEDIA_TYPE_COPPER:
+		cmd->base.port = PORT_TP;
+		if (!netdev->phydev)
+			hns3_get_ksettings(h, cmd);
+		else
+			phy_ethtool_ksettings_get(netdev->phydev, cmd);
+		break;
+	default:
+
+		netdev_warn(netdev, "Unknown media type");
 		return 0;
 	}
 
-	if (h->ae_algo->ops->get_ksettings_an_result)
-		h->ae_algo->ops->get_ksettings_an_result(h,
-							 &cmd->base.autoneg,
-							 &cmd->base.speed,
-							 &cmd->base.duplex);
-	else
-		return -EOPNOTSUPP;
+	/* mdio_support */
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
 
 	link_stat = hns3_get_link(netdev);
 	if (!link_stat) {
@@ -587,35 +706,51 @@
 		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	/* 2.get link mode and port type*/
-	if (h->ae_algo->ops->get_link_mode)
-		h->ae_algo->ops->get_link_mode(h,
-					       cmd->link_modes.supported,
-					       cmd->link_modes.advertising);
+	return 0;
+}
 
-	cmd->base.port = PORT_NONE;
-	if (h->ae_algo->ops->get_port_type)
-		h->ae_algo->ops->get_port_type(h,
-					       &cmd->base.port);
+static int hns3_check_ksettings_param(const struct net_device *netdev,
+				      const struct ethtool_link_ksettings *cmd)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	u8 module_type = HNAE3_MODULE_TYPE_UNKNOWN;
+	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
+	u8 autoneg;
+	u32 speed;
+	u8 duplex;
+	int ret;
 
-	/* 3.mdix_ctrl&mdix get from phy reg */
-	if (h->ae_algo->ops->get_mdix_mode)
-		h->ae_algo->ops->get_mdix_mode(h, &cmd->base.eth_tp_mdix_ctrl,
-					       &cmd->base.eth_tp_mdix);
-	/* 4.mdio_support */
-	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
+	/* hw doesn't support use specified speed and duplex to negotiate,
+	 * unnecessary to check them when autoneg on.
+	 */
+	if (cmd->base.autoneg)
+		return 0;
 
-	/* 5.get flow control setttings */
-	if (h->ae_algo->ops->get_flowctrl_adv)
-		h->ae_algo->ops->get_flowctrl_adv(h, &flowctrl_adv);
+	if (ops->get_ksettings_an_result) {
+		ops->get_ksettings_an_result(handle, &autoneg, &speed, &duplex);
+		if (cmd->base.autoneg == autoneg && cmd->base.speed == speed &&
+		    cmd->base.duplex == duplex)
+			return 0;
+	}
 
-	if (flowctrl_adv & ADVERTISED_Pause)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Pause);
+	if (ops->get_media_type)
+		ops->get_media_type(handle, &media_type, &module_type);
 
-	if (flowctrl_adv & ADVERTISED_Asym_Pause)
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Asym_Pause);
+	if (cmd->base.duplex != DUPLEX_FULL &&
+	    media_type != HNAE3_MEDIA_TYPE_COPPER) {
+		netdev_err(netdev,
+			   "only copper port supports half duplex!");
+		return -EINVAL;
+	}
+
+	if (ops->check_port_speed) {
+		ret = ops->check_port_speed(handle, cmd->base.speed);
+		if (ret) {
+			netdev_err(netdev, "unsupported speed\n");
+			return ret;
+		}
+	}
 
 	return 0;
 }
@@ -623,19 +758,57 @@
 static int hns3_set_link_ksettings(struct net_device *netdev,
 				   const struct ethtool_link_ksettings *cmd)
 {
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	int ret;
+
+	/* Chip don't support this mode. */
+	if (cmd->base.speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF)
+		return -EINVAL;
+
+	netif_dbg(handle, drv, netdev,
+		  "set link(%s): autoneg=%u, speed=%u, duplex=%u\n",
+		  netdev->phydev ? "phy" : "mac",
+		  cmd->base.autoneg, cmd->base.speed, cmd->base.duplex);
+
 	/* Only support ksettings_set for netdev with phy attached for now */
 	if (netdev->phydev)
 		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
 
-	return -EOPNOTSUPP;
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	ret = hns3_check_ksettings_param(netdev, cmd);
+	if (ret)
+		return ret;
+
+	if (ops->set_autoneg) {
+		ret = ops->set_autoneg(handle, cmd->base.autoneg);
+		if (ret)
+			return ret;
+	}
+
+	/* hw doesn't support use specified speed and duplex to negotiate,
+	 * ignore them when autoneg on.
+	 */
+	if (cmd->base.autoneg) {
+		netdev_info(netdev,
+			    "autoneg is on, ignore the speed and duplex\n");
+		return 0;
+	}
+
+	if (ops->cfg_mac_speed_dup_h)
+		ret = ops->cfg_mac_speed_dup_h(handle, cmd->base.speed,
+					       cmd->base.duplex);
+
+	return ret;
 }
 
 static u32 hns3_get_rss_key_size(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops ||
-	    !h->ae_algo->ops->get_rss_key_size)
+	if (!h->ae_algo->ops->get_rss_key_size)
 		return 0;
 
 	return h->ae_algo->ops->get_rss_key_size(h);
@@ -645,8 +818,7 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops ||
-	    !h->ae_algo->ops->get_rss_indir_size)
+	if (!h->ae_algo->ops->get_rss_indir_size)
 		return 0;
 
 	return h->ae_algo->ops->get_rss_indir_size(h);
@@ -657,7 +829,7 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
+	if (!h->ae_algo->ops->get_rss)
 		return -EOPNOTSUPP;
 
 	return h->ae_algo->ops->get_rss(h, indir, key, hfunc);
@@ -668,15 +840,16 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
+	if (!h->ae_algo->ops->set_rss)
 		return -EOPNOTSUPP;
 
-	/* currently we only support Toeplitz hash */
-	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_TOP)) {
-		netdev_err(netdev,
-			   "hash func not supported (only Toeplitz hash)\n");
+	if ((h->pdev->revision == 0x20 &&
+	     hfunc != ETH_RSS_HASH_TOP) || (hfunc != ETH_RSS_HASH_NO_CHANGE &&
+	     hfunc != ETH_RSS_HASH_TOP && hfunc != ETH_RSS_HASH_XOR)) {
+		netdev_err(netdev, "hash func not supported\n");
 		return -EOPNOTSUPP;
 	}
+
 	if (!indir) {
 		netdev_err(netdev,
 			   "set rss failed for indir is empty\n");
@@ -692,34 +865,87 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_tuple)
-		return -EOPNOTSUPP;
-
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		cmd->data = h->kinfo.rss_size;
-		break;
+		cmd->data = h->kinfo.num_tqps;
+		return 0;
 	case ETHTOOL_GRXFH:
-		return h->ae_algo->ops->get_rss_tuple(h, cmd);
+		if (h->ae_algo->ops->get_rss_tuple)
+			return h->ae_algo->ops->get_rss_tuple(h, cmd);
+		return -EOPNOTSUPP;
+	case ETHTOOL_GRXCLSRLCNT:
+		if (h->ae_algo->ops->get_fd_rule_cnt)
+			return h->ae_algo->ops->get_fd_rule_cnt(h, cmd);
+		return -EOPNOTSUPP;
+	case ETHTOOL_GRXCLSRULE:
+		if (h->ae_algo->ops->get_fd_rule_info)
+			return h->ae_algo->ops->get_fd_rule_info(h, cmd);
+		return -EOPNOTSUPP;
+	case ETHTOOL_GRXCLSRLALL:
+		if (h->ae_algo->ops->get_fd_all_rules)
+			return h->ae_algo->ops->get_fd_all_rules(h, cmd,
+								 rule_locs);
+		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
 	}
-
-	return 0;
 }
 
-static int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
-				       u32 new_desc_num)
+static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
+					u32 tx_desc_num, u32 rx_desc_num)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	int i;
 
-	h->kinfo.num_desc = new_desc_num;
+	h->kinfo.num_tx_desc = tx_desc_num;
+	h->kinfo.num_rx_desc = rx_desc_num;
 
-	for (i = 0; i < h->kinfo.num_tqps * 2; i++)
-		priv->ring_data[i].ring->desc_num = new_desc_num;
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		priv->ring_data[i].ring->desc_num = tx_desc_num;
+		priv->ring_data[i + h->kinfo.num_tqps].ring->desc_num =
+			rx_desc_num;
+	}
+}
 
-	return hns3_init_all_ring(priv);
+static struct hns3_enet_ring *hns3_backup_ringparam(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *handle = priv->ae_handle;
+	struct hns3_enet_ring *tmp_rings;
+	int i;
+
+	tmp_rings = kcalloc(handle->kinfo.num_tqps * 2,
+			    sizeof(struct hns3_enet_ring), GFP_KERNEL);
+	if (!tmp_rings)
+		return NULL;
+
+	for (i = 0; i < handle->kinfo.num_tqps * 2; i++) {
+		memcpy(&tmp_rings[i], priv->ring_data[i].ring,
+		       sizeof(struct hns3_enet_ring));
+		tmp_rings[i].skb = NULL;
+	}
+
+	return tmp_rings;
+}
+
+static int hns3_check_ringparam(struct net_device *ndev,
+				struct ethtool_ringparam *param)
+{
+	if (hns3_nic_resetting(ndev))
+		return -EBUSY;
+
+	if (param->rx_mini_pending || param->rx_jumbo_pending)
+		return -EINVAL;
+
+	if (param->tx_pending > HNS3_RING_MAX_PENDING ||
+	    param->tx_pending < HNS3_RING_MIN_PENDING ||
+	    param->rx_pending > HNS3_RING_MAX_PENDING ||
+	    param->rx_pending < HNS3_RING_MIN_PENDING) {
+		netdev_err(ndev, "Queue depth out of range [%d-%d]\n",
+			   HNS3_RING_MIN_PENDING, HNS3_RING_MAX_PENDING);
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int hns3_set_ringparam(struct net_device *ndev,
@@ -727,59 +953,61 @@
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
 	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_ring *tmp_rings;
 	bool if_running = netif_running(ndev);
-	u32 old_desc_num, new_desc_num;
-	int ret;
+	u32 old_tx_desc_num, new_tx_desc_num;
+	u32 old_rx_desc_num, new_rx_desc_num;
+	u16 queue_num = h->kinfo.num_tqps;
+	int ret, i;
 
-	if (param->rx_mini_pending || param->rx_jumbo_pending)
-		return -EINVAL;
-
-	if (param->tx_pending != param->rx_pending) {
-		netdev_err(ndev,
-			   "Descriptors of tx and rx must be equal");
-		return -EINVAL;
-	}
-
-	if (param->tx_pending > HNS3_RING_MAX_PENDING ||
-	    param->tx_pending < HNS3_RING_MIN_PENDING) {
-		netdev_err(ndev,
-			   "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n",
-			   param->tx_pending, HNS3_RING_MIN_PENDING,
-			   HNS3_RING_MAX_PENDING);
-		return -EINVAL;
-	}
-
-	new_desc_num = param->tx_pending;
-
-	/* Hardware requires that its descriptors must be multiple of eight */
-	new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE);
-	old_desc_num = h->kinfo.num_desc;
-	if (old_desc_num == new_desc_num)
-		return 0;
-
-	netdev_info(ndev,
-		    "Changing descriptor count from %d to %d.\n",
-		    old_desc_num, new_desc_num);
-
-	if (if_running)
-		dev_close(ndev);
-
-	ret = hns3_uninit_all_ring(priv);
+	ret = hns3_check_ringparam(ndev, param);
 	if (ret)
 		return ret;
 
-	ret = hns3_change_all_ring_bd_num(priv, new_desc_num);
-	if (ret) {
-		ret = hns3_change_all_ring_bd_num(priv, old_desc_num);
-		if (ret) {
-			netdev_err(ndev,
-				   "Revert to old bd num fail, ret=%d.\n", ret);
-			return ret;
-		}
+	/* Hardware requires that its descriptors must be multiple of eight */
+	new_tx_desc_num = ALIGN(param->tx_pending, HNS3_RING_BD_MULTIPLE);
+	new_rx_desc_num = ALIGN(param->rx_pending, HNS3_RING_BD_MULTIPLE);
+	old_tx_desc_num = priv->ring_data[0].ring->desc_num;
+	old_rx_desc_num = priv->ring_data[queue_num].ring->desc_num;
+	if (old_tx_desc_num == new_tx_desc_num &&
+	    old_rx_desc_num == new_rx_desc_num)
+		return 0;
+
+	tmp_rings = hns3_backup_ringparam(priv);
+	if (!tmp_rings) {
+		netdev_err(ndev,
+			   "backup ring param failed by allocating memory fail\n");
+		return -ENOMEM;
 	}
 
+	netdev_info(ndev,
+		    "Changing Tx/Rx ring depth from %d/%d to %d/%d\n",
+		    old_tx_desc_num, old_rx_desc_num,
+		    new_tx_desc_num, new_rx_desc_num);
+
 	if (if_running)
-		ret = dev_open(ndev);
+		ndev->netdev_ops->ndo_stop(ndev);
+
+	hns3_change_all_ring_bd_num(priv, new_tx_desc_num, new_rx_desc_num);
+	ret = hns3_init_all_ring(priv);
+	if (ret) {
+		netdev_err(ndev, "Change bd num fail, revert to old value(%d)\n",
+			   ret);
+
+		hns3_change_all_ring_bd_num(priv, old_tx_desc_num,
+					    old_rx_desc_num);
+		for (i = 0; i < h->kinfo.num_tqps * 2; i++)
+			memcpy(priv->ring_data[i].ring, &tmp_rings[i],
+			       sizeof(struct hns3_enet_ring));
+	} else {
+		for (i = 0; i < h->kinfo.num_tqps * 2; i++)
+			hns3_fini_ring(&tmp_rings[i]);
+	}
+
+	kfree(tmp_rings);
+
+	if (if_running)
+		ret = ndev->netdev_ops->ndo_open(ndev);
 
 	return ret;
 }
@@ -788,12 +1016,19 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple)
-		return -EOPNOTSUPP;
-
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXFH:
-		return h->ae_algo->ops->set_rss_tuple(h, cmd);
+		if (h->ae_algo->ops->set_rss_tuple)
+			return h->ae_algo->ops->set_rss_tuple(h, cmd);
+		return -EOPNOTSUPP;
+	case ETHTOOL_SRXCLSRLINS:
+		if (h->ae_algo->ops->add_fd_entry)
+			return h->ae_algo->ops->add_fd_entry(h, cmd);
+		return -EOPNOTSUPP;
+	case ETHTOOL_SRXCLSRLDEL:
+		if (h->ae_algo->ops->del_fd_entry)
+			return h->ae_algo->ops->del_fd_entry(h, cmd);
+		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -801,19 +1036,39 @@
 
 static int hns3_nway_reset(struct net_device *netdev)
 {
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	struct phy_device *phy = netdev->phydev;
+	int autoneg;
 
 	if (!netif_running(netdev))
 		return 0;
 
-	/* Only support nway_reset for netdev with phy attached for now */
-	if (!phy)
+	if (hns3_nic_resetting(netdev)) {
+		netdev_err(netdev, "dev resetting!");
+		return -EBUSY;
+	}
+
+	if (!ops->get_autoneg || !ops->restart_autoneg)
 		return -EOPNOTSUPP;
 
-	if (phy->autoneg != AUTONEG_ENABLE)
+	autoneg = ops->get_autoneg(handle);
+	if (autoneg != AUTONEG_ENABLE) {
+		netdev_err(netdev,
+			   "Autoneg is off, don't support to restart it\n");
 		return -EINVAL;
+	}
 
-	return genphy_restart_aneg(phy);
+	netif_dbg(handle, drv, netdev,
+		  "nway reset (using %s)\n", phy ? "phy" : "mac");
+
+	if (phy)
+		return genphy_restart_aneg(phy);
+
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	return ops->restart_autoneg(handle);
 }
 
 static void hns3_get_channels(struct net_device *netdev,
@@ -833,6 +1088,9 @@
 	struct hnae3_handle *h = priv->ae_handle;
 	u16 queue_num = h->kinfo.num_tqps;
 
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
 	if (queue >= queue_num) {
 		netdev_err(netdev,
 			   "Invalid queue value %d! Queue max id=%d\n",
@@ -994,6 +1252,9 @@
 	int ret;
 	int i;
 
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
 	ret = hns3_check_coalesce_para(netdev, cmd);
 	if (ret)
 		return ret;
@@ -1033,12 +1294,104 @@
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_led_id)
+	if (!h->ae_algo->ops->set_led_id)
 		return -EOPNOTSUPP;
 
 	return h->ae_algo->ops->set_led_id(h, state);
 }
 
+static u32 hns3_get_msglevel(struct net_device *netdev)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	return h->msg_enable;
+}
+
+static void hns3_set_msglevel(struct net_device *netdev, u32 msg_level)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	h->msg_enable = msg_level;
+}
+
+/* Translate local fec value into ethtool value. */
+static unsigned int loc_to_eth_fec(u8 loc_fec)
+{
+	u32 eth_fec = 0;
+
+	if (loc_fec & BIT(HNAE3_FEC_AUTO))
+		eth_fec |= ETHTOOL_FEC_AUTO;
+	if (loc_fec & BIT(HNAE3_FEC_RS))
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (loc_fec & BIT(HNAE3_FEC_BASER))
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate ethtool fec value into local value. */
+static unsigned int eth_to_loc_fec(unsigned int eth_fec)
+{
+	u32 loc_fec = 0;
+
+	if (eth_fec & ETHTOOL_FEC_OFF)
+		return loc_fec;
+
+	if (eth_fec & ETHTOOL_FEC_AUTO)
+		loc_fec |= BIT(HNAE3_FEC_AUTO);
+	if (eth_fec & ETHTOOL_FEC_RS)
+		loc_fec |= BIT(HNAE3_FEC_RS);
+	if (eth_fec & ETHTOOL_FEC_BASER)
+		loc_fec |= BIT(HNAE3_FEC_BASER);
+
+	return loc_fec;
+}
+
+static int hns3_get_fecparam(struct net_device *netdev,
+			     struct ethtool_fecparam *fec)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	u8 fec_ability;
+	u8 fec_mode;
+
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	if (!ops->get_fec)
+		return -EOPNOTSUPP;
+
+	ops->get_fec(handle, &fec_ability, &fec_mode);
+
+	fec->fec = loc_to_eth_fec(fec_ability);
+	fec->active_fec = loc_to_eth_fec(fec_mode);
+
+	return 0;
+}
+
+static int hns3_set_fecparam(struct net_device *netdev,
+			     struct ethtool_fecparam *fec)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	u32 fec_mode;
+
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	if (!ops->set_fec)
+		return -EOPNOTSUPP;
+	fec_mode = eth_to_loc_fec(fec->fec);
+
+	netif_dbg(handle, drv, netdev, "set fecparam: mode=%u\n", fec_mode);
+
+	return ops->set_fec(handle, fec_mode);
+}
+
 static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.get_drvinfo = hns3_get_drvinfo,
 	.get_ringparam = hns3_get_ringparam,
@@ -1047,15 +1400,21 @@
 	.get_ethtool_stats = hns3_get_stats,
 	.get_sset_count = hns3_get_sset_count,
 	.get_rxnfc = hns3_get_rxnfc,
+	.set_rxnfc = hns3_set_rxnfc,
 	.get_rxfh_key_size = hns3_get_rss_key_size,
 	.get_rxfh_indir_size = hns3_get_rss_indir_size,
 	.get_rxfh = hns3_get_rss,
 	.set_rxfh = hns3_set_rss,
 	.get_link_ksettings = hns3_get_link_ksettings,
 	.get_channels = hns3_get_channels,
+	.set_channels = hns3_set_channels,
 	.get_coalesce = hns3_get_coalesce,
 	.set_coalesce = hns3_set_coalesce,
+	.get_regs_len = hns3_get_regs_len,
+	.get_regs = hns3_get_regs,
 	.get_link = hns3_get_link,
+	.get_msglevel = hns3_get_msglevel,
+	.set_msglevel = hns3_set_msglevel,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1085,6 +1444,10 @@
 	.get_regs_len = hns3_get_regs_len,
 	.get_regs = hns3_get_regs,
 	.set_phys_id = hns3_set_phys_id,
+	.get_msglevel = hns3_get_msglevel,
+	.set_msglevel = hns3_set_msglevel,
+	.get_fecparam = hns3_get_fecparam,
+	.set_fecparam = hns3_set_fecparam,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index cb8ddd0..0fb61d4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile

@@ -3,9 +3,9 @@
 # Makefile for the HISILICON network device drivers.
 #
 
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
-hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o
 
 hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 68026a5..ecf58cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c

@@ -24,24 +24,23 @@
 	return ring->desc_num - used - 1;
 }
 
-static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head)
 {
-	int u = ring->next_to_use;
-	int c = ring->next_to_clean;
+	int ntu = ring->next_to_use;
+	int ntc = ring->next_to_clean;
 
-	if (unlikely(h >= ring->desc_num))
-		return 0;
+	if (ntu > ntc)
+		return head >= ntc && head <= ntu;
 
-	return u > c ? (h > c && h <= u) : (h > c || h <= u);
+	return head >= ntc || head <= ntu;
 }
 
 static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
 {
 	int size  = ring->desc_num * sizeof(struct hclge_desc);
 
-	ring->desc = dma_zalloc_coherent(cmq_ring_to_dev(ring),
-					 size, &ring->desc_dma_addr,
-					 GFP_KERNEL);
+	ring->desc = dma_alloc_coherent(cmq_ring_to_dev(ring), size,
+					&ring->desc_dma_addr, GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 
@@ -104,15 +103,17 @@
 	dma_addr_t dma = ring->desc_dma_addr;
 	struct hclge_dev *hdev = ring->dev;
 	struct hclge_hw *hw = &hdev->hw;
+	u32 reg_val;
 
 	if (ring->ring_type == HCLGE_TYPE_CSQ) {
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG,
 				lower_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
-		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+		reg_val = hclge_read_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG);
+		reg_val &= HCLGE_NIC_SW_RST_RDY;
+		reg_val |= ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S;
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG, reg_val);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
 	} else {
@@ -121,8 +122,7 @@
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+				ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
 	}
@@ -171,8 +171,16 @@
 	/* these commands have several descriptors,
 	 * and use the first one to save opcode and return value
 	 */
-	u16 spec_opcode[3] = {HCLGE_OPC_STATS_64_BIT,
-		HCLGE_OPC_STATS_32_BIT, HCLGE_OPC_STATS_MAC};
+	u16 spec_opcode[] = {HCLGE_OPC_STATS_64_BIT,
+			     HCLGE_OPC_STATS_32_BIT,
+			     HCLGE_OPC_STATS_MAC,
+			     HCLGE_OPC_STATS_MAC_ALL,
+			     HCLGE_OPC_QUERY_32_BIT_REG,
+			     HCLGE_OPC_QUERY_64_BIT_REG,
+			     HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+			     HCLGE_QUERY_CLEAR_PF_RAS_INT,
+			     HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -183,6 +191,61 @@
 	return false;
 }
 
+static int hclge_cmd_convert_err_code(u16 desc_ret)
+{
+	switch (desc_ret) {
+	case HCLGE_CMD_EXEC_SUCCESS:
+		return 0;
+	case HCLGE_CMD_NO_AUTH:
+		return -EPERM;
+	case HCLGE_CMD_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case HCLGE_CMD_QUEUE_FULL:
+		return -EXFULL;
+	case HCLGE_CMD_NEXT_ERR:
+		return -ENOSR;
+	case HCLGE_CMD_UNEXE_ERR:
+		return -ENOTBLK;
+	case HCLGE_CMD_PARA_ERR:
+		return -EINVAL;
+	case HCLGE_CMD_RESULT_ERR:
+		return -ERANGE;
+	case HCLGE_CMD_TIMEOUT:
+		return -ETIME;
+	case HCLGE_CMD_HILINK_ERR:
+		return -ENOLINK;
+	case HCLGE_CMD_QUEUE_ILLEGAL:
+		return -ENXIO;
+	case HCLGE_CMD_INVALID:
+		return -EBADR;
+	default:
+		return -EIO;
+	}
+}
+
+static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
+				  int num, int ntc)
+{
+	u16 opcode, desc_ret;
+	int handle;
+
+	opcode = le16_to_cpu(desc[0].opcode);
+	for (handle = 0; handle < num; handle++) {
+		desc[handle] = hw->cmq.csq.desc[ntc];
+		ntc++;
+		if (ntc >= hw->cmq.csq.desc_num)
+			ntc = 0;
+	}
+	if (likely(!hclge_is_special_opcode(opcode)))
+		desc_ret = le16_to_cpu(desc[num - 1].retval);
+	else
+		desc_ret = le16_to_cpu(desc[0].retval);
+
+	hw->cmq.last_status = desc_ret;
+
+	return hclge_cmd_convert_err_code(desc_ret);
+}
+
 /**
  * hclge_cmd_send - send command to command queue
  * @hw: pointer to the hw struct
@@ -195,18 +258,26 @@
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 {
 	struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
+	struct hclge_cmq_ring *csq = &hw->cmq.csq;
 	struct hclge_desc *desc_to_use;
 	bool complete = false;
 	u32 timeout = 0;
 	int handle = 0;
 	int retval = 0;
-	u16 opcode, desc_ret;
 	int ntc;
 
 	spin_lock_bh(&hw->cmq.csq.lock);
 
-	if (num > hclge_ring_space(&hw->cmq.csq) ||
-	    test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
+	if (num > hclge_ring_space(&hw->cmq.csq)) {
+		/* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+		 * need update the SW HEAD pointer csq->next_to_clean
+		 */
+		csq->next_to_clean = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
 		spin_unlock_bh(&hw->cmq.csq.lock);
 		return -EBUSY;
 	}
@@ -216,12 +287,11 @@
 	 * which will be use for hardware to write back
 	 */
 	ntc = hw->cmq.csq.next_to_use;
-	opcode = le16_to_cpu(desc[0].opcode);
 	while (handle < num) {
 		desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
 		*desc_to_use = desc[handle];
 		(hw->cmq.csq.next_to_use)++;
-		if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num)
+		if (hw->cmq.csq.next_to_use >= hw->cmq.csq.desc_num)
 			hw->cmq.csq.next_to_use = 0;
 		handle++;
 	}
@@ -245,29 +315,9 @@
 	}
 
 	if (!complete) {
-		retval = -EAGAIN;
+		retval = -EBADE;
 	} else {
-		handle = 0;
-		while (handle < num) {
-			/* Get the result of hardware write back */
-			desc_to_use = &hw->cmq.csq.desc[ntc];
-			desc[handle] = *desc_to_use;
-
-			if (likely(!hclge_is_special_opcode(opcode)))
-				desc_ret = le16_to_cpu(desc[handle].retval);
-			else
-				desc_ret = le16_to_cpu(desc[0].retval);
-
-			if (desc_ret == HCLGE_CMD_EXEC_SUCCESS)
-				retval = 0;
-			else
-				retval = -EIO;
-			hw->cmq.last_status = desc_ret;
-			ntc++;
-			handle++;
-			if (ntc == hw->cmq.csq.desc_num)
-				ntc = 0;
-		}
+		retval = hclge_cmd_check_retval(hw, desc, num, ntc);
 	}
 
 	/* Clean the command send queue */
@@ -336,13 +386,30 @@
 	return ret;
 }
 
+static int hclge_firmware_compat_config(struct hclge_dev *hdev)
+{
+	struct hclge_firmware_compat_cmd *req;
+	struct hclge_desc desc;
+	u32 compat = 0;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_COMPAT_CFG, false);
+
+	req = (struct hclge_firmware_compat_cmd *)desc.data;
+
+	hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
+	hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
+	req->compat = cpu_to_le32(compat);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
 int hclge_cmd_init(struct hclge_dev *hdev)
 {
 	u32 version;
 	int ret;
 
 	spin_lock_bh(&hdev->hw.cmq.csq.lock);
-	spin_lock_bh(&hdev->hw.cmq.crq.lock);
+	spin_lock(&hdev->hw.cmq.crq.lock);
 
 	hdev->hw.cmq.csq.next_to_clean = 0;
 	hdev->hw.cmq.csq.next_to_use = 0;
@@ -350,22 +417,67 @@
 	hdev->hw.cmq.crq.next_to_use = 0;
 
 	hclge_cmd_init_regs(&hdev->hw);
+
+	spin_unlock(&hdev->hw.cmq.crq.lock);
+	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
 	clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 
-	spin_unlock_bh(&hdev->hw.cmq.crq.lock);
-	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+	/* Check if there is new reset pending, because the higher level
+	 * reset may happen when lower level reset is being processed.
+	 */
+	if ((hclge_is_reset_pending(hdev))) {
+		ret = -EBUSY;
+		goto err_cmd_init;
+	}
 
 	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"firmware version query failed %d\n", ret);
-		return ret;
+		goto err_cmd_init;
 	}
 	hdev->fw_version = version;
 
-	dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
+	dev_info(&hdev->pdev->dev, "The firmware version is %lu.%lu.%lu.%lu\n",
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE3_MASK,
+				 HNAE3_FW_VERSION_BYTE3_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE2_MASK,
+				 HNAE3_FW_VERSION_BYTE2_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE1_MASK,
+				 HNAE3_FW_VERSION_BYTE1_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE0_MASK,
+				 HNAE3_FW_VERSION_BYTE0_SHIFT));
+
+	/* ask the firmware to enable some features, driver can work without
+	 * it.
+	 */
+	ret = hclge_firmware_compat_config(hdev);
+	if (ret)
+		dev_warn(&hdev->pdev->dev,
+			 "Firmware compatible features not enabled(%d).\n",
+			 ret);
 
 	return 0;
+
+err_cmd_init:
+	set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+
+	return ret;
+}
+
+static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
+{
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_L_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
+	hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
 }
 
 static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
@@ -375,8 +487,20 @@
 	spin_unlock(&ring->lock);
 }
 
-void hclge_destroy_cmd_queue(struct hclge_hw *hw)
+static void hclge_destroy_cmd_queue(struct hclge_hw *hw)
 {
 	hclge_destroy_queue(&hw->cmq.csq);
 	hclge_destroy_queue(&hw->cmq.crq);
 }
+
+void hclge_cmd_uninit(struct hclge_dev *hdev)
+{
+	spin_lock_bh(&hdev->hw.cmq.csq.lock);
+	spin_lock(&hdev->hw.cmq.crq.lock);
+	set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+	hclge_cmd_uninit_regs(&hdev->hw);
+	spin_unlock(&hdev->hw.cmq.crq.lock);
+	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
+	hclge_destroy_cmd_queue(&hdev->hw);
+}

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 821d4c2..1426eb5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_CMD_H
@@ -39,8 +39,16 @@
 enum hclge_cmd_return_status {
 	HCLGE_CMD_EXEC_SUCCESS	= 0,
 	HCLGE_CMD_NO_AUTH	= 1,
-	HCLGE_CMD_NOT_EXEC	= 2,
+	HCLGE_CMD_NOT_SUPPORTED	= 2,
 	HCLGE_CMD_QUEUE_FULL	= 3,
+	HCLGE_CMD_NEXT_ERR	= 4,
+	HCLGE_CMD_UNEXE_ERR	= 5,
+	HCLGE_CMD_PARA_ERR	= 6,
+	HCLGE_CMD_RESULT_ERR	= 7,
+	HCLGE_CMD_TIMEOUT	= 8,
+	HCLGE_CMD_HILINK_ERR	= 9,
+	HCLGE_CMD_QUEUE_ILLEGAL	= 10,
+	HCLGE_CMD_INVALID	= 11,
 };
 
 enum hclge_cmd_status {
@@ -78,23 +86,44 @@
 	HCLGE_OPC_QUERY_PF_RSRC		= 0x0023,
 	HCLGE_OPC_QUERY_VF_RSRC		= 0x0024,
 	HCLGE_OPC_GET_CFG_PARAM		= 0x0025,
+	HCLGE_OPC_PF_RST_DONE		= 0x0026,
+	HCLGE_OPC_QUERY_VF_RST_RDY	= 0x0027,
 
 	HCLGE_OPC_STATS_64_BIT		= 0x0030,
 	HCLGE_OPC_STATS_32_BIT		= 0x0031,
 	HCLGE_OPC_STATS_MAC		= 0x0032,
+	HCLGE_OPC_QUERY_MAC_REG_NUM	= 0x0033,
+	HCLGE_OPC_STATS_MAC_ALL		= 0x0034,
 
 	HCLGE_OPC_QUERY_REG_NUM		= 0x0040,
 	HCLGE_OPC_QUERY_32_BIT_REG	= 0x0041,
 	HCLGE_OPC_QUERY_64_BIT_REG	= 0x0042,
+	HCLGE_OPC_DFX_BD_NUM		= 0x0043,
+	HCLGE_OPC_DFX_BIOS_COMMON_REG	= 0x0044,
+	HCLGE_OPC_DFX_SSU_REG_0		= 0x0045,
+	HCLGE_OPC_DFX_SSU_REG_1		= 0x0046,
+	HCLGE_OPC_DFX_IGU_EGU_REG	= 0x0047,
+	HCLGE_OPC_DFX_RPU_REG_0		= 0x0048,
+	HCLGE_OPC_DFX_RPU_REG_1		= 0x0049,
+	HCLGE_OPC_DFX_NCSI_REG		= 0x004A,
+	HCLGE_OPC_DFX_RTC_REG		= 0x004B,
+	HCLGE_OPC_DFX_PPP_REG		= 0x004C,
+	HCLGE_OPC_DFX_RCB_REG		= 0x004D,
+	HCLGE_OPC_DFX_TQP_REG		= 0x004E,
+	HCLGE_OPC_DFX_SSU_REG_2		= 0x004F,
+	HCLGE_OPC_DFX_QUERY_CHIP_CAP	= 0x0050,
 
 	/* MAC command */
 	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
 	HCLGE_OPC_CONFIG_AN_MODE	= 0x0304,
-	HCLGE_OPC_QUERY_AN_RESULT	= 0x0306,
 	HCLGE_OPC_QUERY_LINK_STATUS	= 0x0307,
 	HCLGE_OPC_CONFIG_MAX_FRM_SIZE	= 0x0308,
 	HCLGE_OPC_CONFIG_SPEED_DUP	= 0x0309,
+	HCLGE_OPC_QUERY_MAC_TNL_INT	= 0x0310,
+	HCLGE_OPC_MAC_TNL_INT_EN	= 0x0311,
+	HCLGE_OPC_CLEAR_MAC_TNL_INT	= 0x0312,
 	HCLGE_OPC_SERDES_LOOPBACK       = 0x0315,
+	HCLGE_OPC_CONFIG_FEC_MODE	= 0x031A,
 
 	/* PFC/Pause commands */
 	HCLGE_OPC_CFG_MAC_PAUSE_EN      = 0x0701,
@@ -126,6 +155,16 @@
 	HCLGE_OPC_TM_PRI_SCH_MODE_CFG   = 0x0813,
 	HCLGE_OPC_TM_QS_SCH_MODE_CFG    = 0x0814,
 	HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815,
+	HCLGE_OPC_ETS_TC_WEIGHT		= 0x0843,
+	HCLGE_OPC_QSET_DFX_STS		= 0x0844,
+	HCLGE_OPC_PRI_DFX_STS		= 0x0845,
+	HCLGE_OPC_PG_DFX_STS		= 0x0846,
+	HCLGE_OPC_PORT_DFX_STS		= 0x0847,
+	HCLGE_OPC_SCH_NQ_CNT		= 0x0848,
+	HCLGE_OPC_SCH_RQ_CNT		= 0x0849,
+	HCLGE_OPC_TM_INTERNAL_STS	= 0x0850,
+	HCLGE_OPC_TM_INTERNAL_CNT	= 0x0851,
+	HCLGE_OPC_TM_INTERNAL_STS_1	= 0x0852,
 
 	/* Packet buffer allocate commands */
 	HCLGE_OPC_TX_BUFF_ALLOC		= 0x0901,
@@ -142,6 +181,7 @@
 	HCLGE_OPC_CFG_TX_QUEUE		= 0x0B01,
 	HCLGE_OPC_QUERY_TX_POINTER	= 0x0B02,
 	HCLGE_OPC_QUERY_TX_STATUS	= 0x0B03,
+	HCLGE_OPC_TQP_TX_QUEUE_TC	= 0x0B04,
 	HCLGE_OPC_CFG_RX_QUEUE		= 0x0B11,
 	HCLGE_OPC_QUERY_RX_POINTER	= 0x0B12,
 	HCLGE_OPC_QUERY_RX_STATUS	= 0x0B13,
@@ -150,8 +190,12 @@
 	HCLGE_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
 	HCLGE_OPC_RESET_TQP_QUEUE	= 0x0B22,
 
+	/* PPU commands */
+	HCLGE_OPC_PPU_PF_OTHER_INT_DFX	= 0x0B4A,
+
 	/* TSO command */
 	HCLGE_OPC_TSO_GENERIC_CONFIG	= 0x0C01,
+	HCLGE_OPC_GRO_GENERIC_CONFIG    = 0x0C10,
 
 	/* RSS commands */
 	HCLGE_OPC_RSS_GENERIC_CONFIG	= 0x0D01,
@@ -175,21 +219,25 @@
 	HCLGE_OPC_MAC_VLAN_REMOVE	    = 0x1001,
 	HCLGE_OPC_MAC_VLAN_TYPE_ID	    = 0x1002,
 	HCLGE_OPC_MAC_VLAN_INSERT	    = 0x1003,
+	HCLGE_OPC_MAC_VLAN_ALLOCATE	    = 0x1004,
 	HCLGE_OPC_MAC_ETHTYPE_ADD	    = 0x1010,
 	HCLGE_OPC_MAC_ETHTYPE_REMOVE	= 0x1011,
-	HCLGE_OPC_MAC_VLAN_MASK_SET	= 0x1012,
 
-	/* Multicast linear table commands */
-	HCLGE_OPC_MTA_MAC_MODE_CFG	    = 0x1020,
-	HCLGE_OPC_MTA_MAC_FUNC_CFG	    = 0x1021,
-	HCLGE_OPC_MTA_TBL_ITEM_CFG	    = 0x1022,
-	HCLGE_OPC_MTA_TBL_ITEM_QUERY	= 0x1023,
+	/* MAC VLAN commands */
+	HCLGE_OPC_MAC_VLAN_SWITCH_PARAM	= 0x1033,
 
 	/* VLAN commands */
 	HCLGE_OPC_VLAN_FILTER_CTRL	    = 0x1100,
 	HCLGE_OPC_VLAN_FILTER_PF_CFG	= 0x1101,
 	HCLGE_OPC_VLAN_FILTER_VF_CFG	= 0x1102,
 
+	/* Flow Director commands */
+	HCLGE_OPC_FD_MODE_CTRL		= 0x1200,
+	HCLGE_OPC_FD_GET_ALLOCATION	= 0x1201,
+	HCLGE_OPC_FD_KEY_CONFIG		= 0x1202,
+	HCLGE_OPC_FD_TCAM_OP		= 0x1203,
+	HCLGE_OPC_FD_AD_OP		= 0x1204,
+
 	/* MDIO command */
 	HCLGE_OPC_MDIO_CONFIG		= 0x1900,
 
@@ -208,6 +256,44 @@
 
 	/* Led command */
 	HCLGE_OPC_LED_STATUS_CFG	= 0xB000,
+
+	/* NCL config command */
+	HCLGE_OPC_QUERY_NCL_CONFIG	= 0x7011,
+	/* M7 stats command */
+	HCLGE_OPC_M7_STATS_BD		= 0x7012,
+	HCLGE_OPC_M7_STATS_INFO		= 0x7013,
+	HCLGE_OPC_M7_COMPAT_CFG		= 0x701A,
+
+	/* SFP command */
+	HCLGE_OPC_GET_SFP_INFO		= 0x7104,
+
+	/* Error INT commands */
+	HCLGE_MAC_COMMON_INT_EN		= 0x030E,
+	HCLGE_TM_SCH_ECC_INT_EN		= 0x0829,
+	HCLGE_SSU_ECC_INT_CMD		= 0x0989,
+	HCLGE_SSU_COMMON_INT_CMD	= 0x098C,
+	HCLGE_PPU_MPF_ECC_INT_CMD	= 0x0B40,
+	HCLGE_PPU_MPF_OTHER_INT_CMD	= 0x0B41,
+	HCLGE_PPU_PF_OTHER_INT_CMD	= 0x0B42,
+	HCLGE_COMMON_ECC_INT_CFG	= 0x1505,
+	HCLGE_QUERY_RAS_INT_STS_BD_NUM	= 0x1510,
+	HCLGE_QUERY_CLEAR_MPF_RAS_INT	= 0x1511,
+	HCLGE_QUERY_CLEAR_PF_RAS_INT	= 0x1512,
+	HCLGE_QUERY_MSIX_INT_STS_BD_NUM	= 0x1513,
+	HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT	= 0x1514,
+	HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT	= 0x1515,
+	HCLGE_CONFIG_ROCEE_RAS_INT_EN	= 0x1580,
+	HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
+	HCLGE_ROCEE_PF_RAS_INT_CMD	= 0x1584,
+	HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD	= 0x1585,
+	HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD	= 0x1586,
+	HCLGE_IGU_EGU_TNL_INT_EN	= 0x1803,
+	HCLGE_IGU_COMMON_INT_EN		= 0x1806,
+	HCLGE_TM_QCN_MEM_INT_CFG	= 0x1A14,
+	HCLGE_PPP_CMD0_INT_CMD		= 0x2100,
+	HCLGE_PPP_CMD1_INT_CMD		= 0x2101,
+	HCLGE_MAC_ETHERTYPE_IDX_RD      = 0x2105,
+	HCLGE_NCSI_INT_EN		= 0x2401,
 };
 
 #define HCLGE_TQP_REG_OFFSET		0x80000
@@ -255,16 +341,16 @@
 	u8 rsv;
 };
 
-#define HCLGE_TC_NUM		8
+#define HCLGE_MAX_TC_NUM		8
 #define HCLGE_TC0_PRI_BUF_EN_B	15 /* Bit 15 indicate enable or not */
 #define HCLGE_BUF_UNIT_S	7  /* Buf size is united by 128 bytes */
 struct hclge_tx_buff_alloc_cmd {
-	__le16 tx_pkt_buff[HCLGE_TC_NUM];
+	__le16 tx_pkt_buff[HCLGE_MAX_TC_NUM];
 	u8 tx_buff_rsv[8];
 };
 
 struct hclge_rx_priv_buff_cmd {
-	__le16 buf_num[HCLGE_TC_NUM];
+	__le16 buf_num[HCLGE_MAX_TC_NUM];
 	__le16 shared_buf;
 	u8 rsv[6];
 };
@@ -310,7 +396,6 @@
 	u32 enable;	/* Enable TC private buffer or not */
 };
 
-#define HCLGE_MAX_TC_NUM	8
 struct hclge_shared_buf {
 	struct hclge_waterline self;
 	struct hclge_tc_thrd tc_thrd[HCLGE_MAX_TC_NUM];
@@ -365,7 +450,9 @@
 #define HCLGE_PF_VEC_NUM_M		GENMASK(7, 0)
 	__le16 pf_intr_vector_number;
 	__le16 pf_own_fun_number;
-	__le32 rsv[3];
+	__le16 tx_buf_size;
+	__le16 dv_buf_size;
+	__le32 rsv[2];
 };
 
 #define HCLGE_CFG_OFFSET_S	0
@@ -395,6 +482,8 @@
 #define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
 #define HCLGE_CFG_SPEED_ABILITY_S	0
 #define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
+#define HCLGE_CFG_UMV_TBL_SPACE_S	16
+#define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 
 struct hclge_cfg_param_cmd {
 	__le32 offset;
@@ -503,6 +592,12 @@
 	u8 rsv[20];
 };
 
+struct hclge_pf_rst_sync_cmd {
+#define HCLGE_PF_RST_ALL_VF_RDY_B	0
+	u8 all_vf_ready;
+	u8 rsv[23];
+};
+
 #define HCLGE_CFG_SPEED_S		0
 #define HCLGE_CFG_SPEED_M		GENMASK(5, 0)
 
@@ -517,20 +612,6 @@
 	u8 rsv[22];
 };
 
-#define HCLGE_QUERY_SPEED_S		3
-#define HCLGE_QUERY_AN_B		0
-#define HCLGE_QUERY_DUPLEX_B		2
-
-#define HCLGE_QUERY_SPEED_M		GENMASK(4, 0)
-#define HCLGE_QUERY_AN_M		BIT(HCLGE_QUERY_AN_B)
-#define HCLGE_QUERY_DUPLEX_M		BIT(HCLGE_QUERY_DUPLEX_B)
-
-struct hclge_query_an_speed_dup_cmd {
-	u8 an_syn_dup_speed;
-	u8 pause;
-	u8 rsv[23];
-};
-
 #define HCLGE_RING_ID_MASK		GENMASK(9, 0)
 #define HCLGE_TQP_ENABLE_B		0
 
@@ -547,6 +628,32 @@
 	u8      rsv[20];
 };
 
+struct hclge_sfp_info_cmd {
+	__le32 speed;
+	u8 query_type; /* 0: sfp speed, 1: active speed */
+	u8 active_fec;
+	u8 autoneg; /* autoneg state */
+	u8 autoneg_ability; /* whether support autoneg */
+	__le32 speed_ability; /* speed ability for current media */
+	__le32 module_type;
+	u8 rsv[8];
+};
+
+#define HCLGE_MAC_CFG_FEC_AUTO_EN_B	0
+#define HCLGE_MAC_CFG_FEC_MODE_S	1
+#define HCLGE_MAC_CFG_FEC_MODE_M	GENMASK(3, 1)
+#define HCLGE_MAC_CFG_FEC_SET_DEF_B	0
+#define HCLGE_MAC_CFG_FEC_CLR_DEF_B	1
+
+#define HCLGE_MAC_FEC_OFF		0
+#define HCLGE_MAC_FEC_BASER		1
+#define HCLGE_MAC_FEC_RS		2
+struct hclge_config_fec_cmd {
+	u8 fec_mode;
+	u8 default_config;
+	u8 rsv[22];
+};
+
 #define HCLGE_MAC_UPLINK_PORT		0x100
 
 struct hclge_config_max_frm_size_cmd {
@@ -562,6 +669,11 @@
 	HCLGE_MAC_VLAN_LKUP,    /* Lookup a entry through mac_vlan key */
 };
 
+enum hclge_mac_vlan_add_resp_code {
+	HCLGE_ADD_UC_OVERFLOW = 2,	/* ADD failed for UC overflow */
+	HCLGE_ADD_MC_OVERFLOW,		/* ADD failed for MC overflow */
+};
+
 #define HCLGE_MAC_VLAN_BIT0_EN_B	0
 #define HCLGE_MAC_VLAN_BIT1_EN_B	1
 #define HCLGE_MAC_EPORT_SW_EN_B		12
@@ -584,19 +696,17 @@
 	u8      rsv2[6];
 };
 
-#define HCLGE_VLAN_MASK_EN_B		0
-struct hclge_mac_vlan_mask_entry_cmd {
-	u8 rsv0[2];
-	u8 vlan_mask;
-	u8 rsv1;
-	u8 mac_mask[6];
-	u8 rsv2[14];
+#define HCLGE_UMV_SPC_ALC_B	0
+struct hclge_umv_spc_alc_cmd {
+	u8 allocate;
+	u8 rsv1[3];
+	__le32 space_size;
+	u8 rsv2[16];
 };
 
 #define HCLGE_MAC_MGR_MASK_VLAN_B		BIT(0)
 #define HCLGE_MAC_MGR_MASK_MAC_B		BIT(1)
 #define HCLGE_MAC_MGR_MASK_ETHERTYPE_B		BIT(2)
-#define HCLGE_MAC_ETHERTYPE_LLDP		0x88cc
 
 struct hclge_mac_mgr_tbl_entry_cmd {
 	u8      flags;
@@ -615,30 +725,6 @@
 	u8      rsv3[2];
 };
 
-#define HCLGE_CFG_MTA_MAC_SEL_S		0
-#define HCLGE_CFG_MTA_MAC_SEL_M		GENMASK(1, 0)
-#define HCLGE_CFG_MTA_MAC_EN_B		7
-struct hclge_mta_filter_mode_cmd {
-	u8	dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
-	u8      rsv[23];
-};
-
-#define HCLGE_CFG_FUNC_MTA_ACCEPT_B	0
-struct hclge_cfg_func_mta_filter_cmd {
-	u8	accept; /* Only used lowest 1 bit */
-	u8      function_id;
-	u8      rsv[22];
-};
-
-#define HCLGE_CFG_MTA_ITEM_ACCEPT_B	0
-#define HCLGE_CFG_MTA_ITEM_IDX_S	0
-#define HCLGE_CFG_MTA_ITEM_IDX_M	GENMASK(11, 0)
-struct hclge_cfg_func_mta_item_cmd {
-	__le16	item_idx; /* Only used lowest 12 bit */
-	u8      accept;   /* Only used lowest 1 bit */
-	u8      rsv[21];
-};
-
 struct hclge_mac_vlan_add_cmd {
 	__le16  flags;
 	__le16  mac_addr_hi16;
@@ -667,7 +753,9 @@
 struct hclge_vlan_filter_ctrl_cmd {
 	u8 vlan_type;
 	u8 vlan_fe;
-	u8 rsv[22];
+	u8 rsv1[2];
+	u8 vf_id;
+	u8 rsv2[19];
 };
 
 struct hclge_vlan_filter_pf_cfg_cmd {
@@ -686,6 +774,31 @@
 	u8  vf_bitmap[16];
 };
 
+#define HCLGE_SWITCH_ANTI_SPOOF_B	0U
+#define HCLGE_SWITCH_ALW_LPBK_B		1U
+#define HCLGE_SWITCH_ALW_LCL_LPBK_B	2U
+#define HCLGE_SWITCH_ALW_DST_OVRD_B	3U
+#define HCLGE_SWITCH_NO_MASK		0x0
+#define HCLGE_SWITCH_ANTI_SPOOF_MASK	0xFE
+#define HCLGE_SWITCH_ALW_LPBK_MASK	0xFD
+#define HCLGE_SWITCH_ALW_LCL_LPBK_MASK	0xFB
+#define HCLGE_SWITCH_LW_DST_OVRD_MASK	0xF7
+
+struct hclge_mac_vlan_switch_cmd {
+	u8 roce_sel;
+	u8 rsv1[3];
+	__le32 func_id;
+	u8 switch_param;
+	u8 rsv2[3];
+	u8 param_mask;
+	u8 rsv3[11];
+};
+
+enum hclge_mac_vlan_cfg_sel {
+	HCLGE_MAC_VLAN_NIC_SEL = 0,
+	HCLGE_MAC_VLAN_ROCE_SEL,
+};
+
 #define HCLGE_ACCEPT_TAG1_B		0
 #define HCLGE_ACCEPT_UNTAG1_B		1
 #define HCLGE_PORT_INS_TAG1_EN_B	2
@@ -746,6 +859,24 @@
 	u8 rsv[14];
 };
 
+#pragma pack(1)
+struct hclge_mac_ethertype_idx_rd_cmd {
+	u8	flags;
+	u8	resp_code;
+	__le16  vlan_tag;
+	u8      mac_addr[6];
+	__le16  index;
+	__le16	ethter_type;
+	__le16  egress_port;
+	__le16  egress_queue;
+	__le16  rev0;
+	u8	i_port_bitmap;
+	u8	i_port_direction;
+	u8	rev1[2];
+};
+
+#pragma pack()
+
 #define HCLGE_TSO_MSS_MIN_S	0
 #define HCLGE_TSO_MSS_MIN_M	GENMASK(13, 0)
 
@@ -758,6 +889,12 @@
 	u8 rsv[20];
 };
 
+#define HCLGE_GRO_EN_B		0
+struct hclge_cfg_gro_status_cmd {
+	__le16 gro_en;
+	u8 rsv[22];
+};
+
 #define HCLGE_TSO_MSS_MIN	256
 #define HCLGE_TSO_MSS_MAX	9668
 
@@ -777,7 +914,15 @@
 	u8 rsv[22];
 };
 
+#define HCLGE_PF_RESET_DONE_BIT		BIT(0)
+
+struct hclge_pf_rst_done_cmd {
+	u8 pf_rst_done;
+	u8 rsv[23];
+};
+
 #define HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B	BIT(0)
+#define HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B	BIT(2)
 #define HCLGE_CMD_SERDES_DONE_B			BIT(0)
 #define HCLGE_CMD_SERDES_SUCCESS_B		BIT(1)
 struct hclge_serdes_lb_cmd {
@@ -791,6 +936,7 @@
 #define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
 #define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
 #define HCLGE_DEFAULT_NON_DCB_DV	0x7800	/* 30K byte */
+#define HCLGE_NON_DCB_ADDITIONAL_BUF	0x1400	/* 5120 byte */
 
 #define HCLGE_TYPE_CRQ			0
 #define HCLGE_TYPE_CSQ			1
@@ -804,8 +950,11 @@
 #define HCLGE_NIC_CRQ_DEPTH_REG		0x27020
 #define HCLGE_NIC_CRQ_TAIL_REG		0x27024
 #define HCLGE_NIC_CRQ_HEAD_REG		0x27028
-#define HCLGE_NIC_CMQ_EN_B		16
-#define HCLGE_NIC_CMQ_ENABLE		BIT(HCLGE_NIC_CMQ_EN_B)
+
+/* this bit indicates that the driver is ready for hardware reset */
+#define HCLGE_NIC_SW_RST_RDY_B		16
+#define HCLGE_NIC_SW_RST_RDY		BIT(HCLGE_NIC_SW_RST_RDY_B)
+
 #define HCLGE_NIC_CMQ_DESC_NUM		1024
 #define HCLGE_NIC_CMQ_DESC_NUM_S	3
 
@@ -818,6 +967,102 @@
 	u8 rsv2[20];
 };
 
+struct hclge_get_fd_mode_cmd {
+	u8 mode;
+	u8 enable;
+	u8 rsv[22];
+};
+
+struct hclge_get_fd_allocation_cmd {
+	__le32 stage1_entry_num;
+	__le32 stage2_entry_num;
+	__le16 stage1_counter_num;
+	__le16 stage2_counter_num;
+	u8 rsv[12];
+};
+
+struct hclge_set_fd_key_config_cmd {
+	u8 stage;
+	u8 key_select;
+	u8 inner_sipv6_word_en;
+	u8 inner_dipv6_word_en;
+	u8 outer_sipv6_word_en;
+	u8 outer_dipv6_word_en;
+	u8 rsv1[2];
+	__le32 tuple_mask;
+	__le32 meta_data_mask;
+	u8 rsv2[8];
+};
+
+#define HCLGE_FD_EPORT_SW_EN_B		0
+struct hclge_fd_tcam_config_1_cmd {
+	u8 stage;
+	u8 xy_sel;
+	u8 port_info;
+	u8 rsv1[1];
+	__le32 index;
+	u8 entry_vld;
+	u8 rsv2[7];
+	u8 tcam_data[8];
+};
+
+struct hclge_fd_tcam_config_2_cmd {
+	u8 tcam_data[24];
+};
+
+struct hclge_fd_tcam_config_3_cmd {
+	u8 tcam_data[20];
+	u8 rsv[4];
+};
+
+#define HCLGE_FD_AD_DROP_B		0
+#define HCLGE_FD_AD_DIRECT_QID_B	1
+#define HCLGE_FD_AD_QID_S		2
+#define HCLGE_FD_AD_QID_M		GENMASK(12, 2)
+#define HCLGE_FD_AD_USE_COUNTER_B	12
+#define HCLGE_FD_AD_COUNTER_NUM_S	13
+#define HCLGE_FD_AD_COUNTER_NUM_M	GENMASK(20, 13)
+#define HCLGE_FD_AD_NXT_STEP_B		20
+#define HCLGE_FD_AD_NXT_KEY_S		21
+#define HCLGE_FD_AD_NXT_KEY_M		GENMASK(26, 21)
+#define HCLGE_FD_AD_WR_RULE_ID_B	0
+#define HCLGE_FD_AD_RULE_ID_S		1
+#define HCLGE_FD_AD_RULE_ID_M		GENMASK(13, 1)
+
+struct hclge_fd_ad_config_cmd {
+	u8 stage;
+	u8 rsv1[3];
+	__le32 index;
+	__le64 ad_data;
+	u8 rsv2[8];
+};
+
+struct hclge_get_m7_bd_cmd {
+	__le32 bd_num;
+	u8 rsv[20];
+};
+
+struct hclge_query_ppu_pf_other_int_dfx_cmd {
+	__le16 over_8bd_no_fe_qid;
+	__le16 over_8bd_no_fe_vf_id;
+	__le16 tso_mss_cmp_min_err_qid;
+	__le16 tso_mss_cmp_min_err_vf_id;
+	__le16 tso_mss_cmp_max_err_qid;
+	__le16 tso_mss_cmp_max_err_vf_id;
+	__le16 tx_rd_fbd_poison_qid;
+	__le16 tx_rd_fbd_poison_vf_id;
+	__le16 rx_rd_fbd_poison_qid;
+	__le16 rx_rd_fbd_poison_vf_id;
+	u8 rsv[4];
+};
+
+#define HCLGE_LINK_EVENT_REPORT_EN_B	0
+#define HCLGE_NCSI_ERROR_REPORT_EN_B	1
+struct hclge_firmware_compat_cmd {
+	__le32 compat;
+	u8 rsv[20];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
@@ -853,6 +1098,6 @@
 enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
 					  struct hclge_desc *desc);
 
-void hclge_destroy_cmd_queue(struct hclge_hw *hw);
+void hclge_cmd_uninit(struct hclge_dev *hdev);
 int hclge_cmd_queue_init(struct hclge_dev *hdev);
 #endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 92f1938..a1790af 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c

@@ -35,7 +35,9 @@
 		}
 	}
 
-	return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+	hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+
+	return 0;
 }
 
 static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
@@ -70,25 +72,59 @@
 	return 0;
 }
 
+static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc,
+				     u8 *prio_tc)
+{
+	int i;
+
+	if (num_tc > hdev->tc_max) {
+		dev_err(&hdev->pdev->dev,
+			"tc num checking failed, %u > tc_max(%u)\n",
+			num_tc, hdev->tc_max);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+		if (prio_tc[i] >= num_tc) {
+			dev_err(&hdev->pdev->dev,
+				"prio_tc[%u] checking failed, %u >= num_tc(%u)\n",
+				i, prio_tc[i], num_tc);
+			return -EINVAL;
+		}
+	}
+
+	if (num_tc > hdev->vport[0].alloc_tqps) {
+		dev_err(&hdev->pdev->dev,
+			"allocated tqp checking failed, %u > tqp(%u)\n",
+			num_tc, hdev->vport[0].alloc_tqps);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
 			      u8 *tc, bool *changed)
 {
 	bool has_ets_tc = false;
 	u32 total_ets_bw = 0;
 	u8 max_tc = 0;
+	int ret;
 	u8 i;
 
-	for (i = 0; i < HNAE3_MAX_TC; i++) {
-		if (ets->prio_tc[i] >= hdev->tc_max ||
-		    i >= hdev->tc_max)
-			return -EINVAL;
-
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
 		if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
 			*changed = true;
 
 		if (ets->prio_tc[i] > max_tc)
 			max_tc = ets->prio_tc[i];
+	}
 
+	ret = hclge_dcb_common_validate(hdev, max_tc + 1, ets->prio_tc);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < hdev->tc_max; i++) {
 		switch (ets->tc_tsa[i]) {
 		case IEEE_8021QAZ_TSA_STRICT:
 			if (hdev->tm_info.tc_info[i].tc_sch_mode !=
@@ -118,21 +154,15 @@
 	return 0;
 }
 
-static int hclge_map_update(struct hnae3_handle *h)
+static int hclge_map_update(struct hclge_dev *hdev)
 {
-	struct hclge_vport *vport = hclge_get_vport(h);
-	struct hclge_dev *hdev = vport->back;
 	int ret;
 
-	ret = hclge_tm_map_cfg(hdev);
+	ret = hclge_tm_schd_setup_hw(hdev);
 	if (ret)
 		return ret;
 
-	ret = hclge_tm_schd_mode_hw(hdev);
-	if (ret)
-		return ret;
-
-	ret = hclge_pause_setup_hw(hdev);
+	ret = hclge_pause_setup_hw(hdev, false);
 	if (ret)
 		return ret;
 
@@ -168,9 +198,32 @@
 	return 0;
 }
 
+static int hclge_notify_down_uinit(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		return ret;
+
+	return hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+}
+
+static int hclge_notify_init_up(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+	if (ret)
+		return ret;
+
+	return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+}
+
 static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
 {
 	struct hclge_vport *vport = hclge_get_vport(h);
+	struct net_device *netdev = h->kinfo.netdev;
 	struct hclge_dev *hdev = vport->back;
 	bool map_changed = false;
 	u8 num_tc = 0;
@@ -184,19 +237,43 @@
 	if (ret)
 		return ret;
 
+	if (map_changed) {
+		netif_dbg(h, drv, netdev, "set ets\n");
+
+		ret = hclge_notify_down_uinit(hdev);
+		if (ret)
+			return ret;
+	}
+
 	hclge_tm_schd_info_update(hdev, num_tc);
 
 	ret = hclge_ieee_ets_to_tm_info(hdev, ets);
 	if (ret)
-		return ret;
+		goto err_out;
 
 	if (map_changed) {
+		ret = hclge_map_update(hdev);
+		if (ret)
+			goto err_out;
+
 		ret = hclge_client_setup_tc(hdev);
 		if (ret)
+			goto err_out;
+
+		ret = hclge_notify_init_up(hdev);
+		if (ret)
 			return ret;
 	}
 
 	return hclge_tm_dwrr_cfg(hdev);
+
+err_out:
+	if (!map_changed)
+		return ret;
+
+	hclge_notify_init_up(hdev);
+
+	return ret;
 }
 
 static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
@@ -238,13 +315,18 @@
 static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 {
 	struct hclge_vport *vport = hclge_get_vport(h);
+	struct net_device *netdev = h->kinfo.netdev;
 	struct hclge_dev *hdev = vport->back;
 	u8 i, j, pfc_map, *prio_tc;
+	int ret;
 
 	if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
 	    hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
 		return -EINVAL;
 
+	if (pfc->pfc_en == hdev->tm_info.pfc_en)
+		return 0;
+
 	prio_tc = hdev->tm_info.prio_tc;
 	pfc_map = 0;
 
@@ -257,12 +339,30 @@
 		}
 	}
 
-	if (pfc_map == hdev->tm_info.hw_pfc_map)
-		return 0;
-
 	hdev->tm_info.hw_pfc_map = pfc_map;
+	hdev->tm_info.pfc_en = pfc->pfc_en;
 
-	return hclge_pause_setup_hw(hdev);
+	netif_dbg(h, drv, netdev,
+		  "set pfc: pfc_en=%x, pfc_map=%x, num_tc=%u\n",
+		  pfc->pfc_en, pfc_map, hdev->tm_info.num_tc);
+
+	hclge_tm_pfc_info_update(hdev);
+
+	ret = hclge_pause_setup_hw(hdev, false);
+	if (ret)
+		return ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		return ret;
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret) {
+		hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+		return ret;
+	}
+
+	return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
 /* DCBX configuration */
@@ -280,8 +380,11 @@
 static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode)
 {
 	struct hclge_vport *vport = hclge_get_vport(h);
+	struct net_device *netdev = h->kinfo.netdev;
 	struct hclge_dev *hdev = vport->back;
 
+	netif_dbg(h, drv, netdev, "set dcbx: mode=%u\n", mode);
+
 	/* No support for LLD_MANAGED modes or CEE */
 	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
 	    (mode & DCB_CAP_DCBX_VER_CEE) ||
@@ -303,22 +406,24 @@
 	if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
 		return -EINVAL;
 
-	if (tc > hdev->tc_max) {
-		dev_err(&hdev->pdev->dev,
-			"setup tc failed, tc(%u) > tc_max(%u)\n",
-			tc, hdev->tc_max);
+	ret = hclge_dcb_common_validate(hdev, tc, prio_tc);
+	if (ret)
 		return -EINVAL;
-	}
+
+	ret = hclge_notify_down_uinit(hdev);
+	if (ret)
+		return ret;
 
 	hclge_tm_schd_info_update(hdev, tc);
+	hclge_tm_prio_tc_info_update(hdev, prio_tc);
 
-	ret = hclge_tm_prio_tc_info_update(hdev, prio_tc);
+	ret = hclge_tm_init_hw(hdev, false);
 	if (ret)
-		return ret;
+		goto err_out;
 
-	ret = hclge_tm_init_hw(hdev);
+	ret = hclge_client_setup_tc(hdev);
 	if (ret)
-		return ret;
+		goto err_out;
 
 	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 
@@ -327,7 +432,12 @@
 	else
 		hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
 
-	return 0;
+	return hclge_notify_init_up(hdev);
+
+err_out:
+	hclge_notify_init_up(hdev);
+
+	return ret;
 }
 
 static const struct hnae3_dcb_ops hns3_dcb_ops = {
@@ -337,7 +447,6 @@
 	.ieee_setpfc	= hclge_ieee_setpfc,
 	.getdcbx	= hclge_getdcbx,
 	.setdcbx	= hclge_setdcbx,
-	.map_update	= hclge_map_update,
 	.setup_tc	= hclge_setup_tc,
 };
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
index 278f21e..b04702e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_DCB_H__

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
new file mode 100644
index 0000000..d0128d7
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c

@@ -0,0 +1,1154 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+#include <linux/device.h>
+
+#include "hclge_debugfs.h"
+#include "hclge_main.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+static struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
+	{ .reg_type = "bios common",
+	  .dfx_msg = &hclge_dbg_bios_common_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_bios_common_reg),
+		       .offset = HCLGE_DBG_DFX_BIOS_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_BIOS_COMMON_REG } },
+	{ .reg_type = "ssu",
+	  .dfx_msg = &hclge_dbg_ssu_reg_0[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_0),
+		       .offset = HCLGE_DBG_DFX_SSU_0_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_SSU_REG_0 } },
+	{ .reg_type = "ssu",
+	  .dfx_msg = &hclge_dbg_ssu_reg_1[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_1),
+		       .offset = HCLGE_DBG_DFX_SSU_1_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_SSU_REG_1 } },
+	{ .reg_type = "ssu",
+	  .dfx_msg = &hclge_dbg_ssu_reg_2[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_2),
+		       .offset = HCLGE_DBG_DFX_SSU_2_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_SSU_REG_2 } },
+	{ .reg_type = "igu egu",
+	  .dfx_msg = &hclge_dbg_igu_egu_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_igu_egu_reg),
+		       .offset = HCLGE_DBG_DFX_IGU_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_IGU_EGU_REG } },
+	{ .reg_type = "rpu",
+	  .dfx_msg = &hclge_dbg_rpu_reg_0[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rpu_reg_0),
+		       .offset = HCLGE_DBG_DFX_RPU_0_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_RPU_REG_0 } },
+	{ .reg_type = "rpu",
+	  .dfx_msg = &hclge_dbg_rpu_reg_1[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rpu_reg_1),
+		       .offset = HCLGE_DBG_DFX_RPU_1_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_RPU_REG_1 } },
+	{ .reg_type = "ncsi",
+	  .dfx_msg = &hclge_dbg_ncsi_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ncsi_reg),
+		       .offset = HCLGE_DBG_DFX_NCSI_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_NCSI_REG } },
+	{ .reg_type = "rtc",
+	  .dfx_msg = &hclge_dbg_rtc_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rtc_reg),
+		       .offset = HCLGE_DBG_DFX_RTC_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_RTC_REG } },
+	{ .reg_type = "ppp",
+	  .dfx_msg = &hclge_dbg_ppp_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ppp_reg),
+		       .offset = HCLGE_DBG_DFX_PPP_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_PPP_REG } },
+	{ .reg_type = "rcb",
+	  .dfx_msg = &hclge_dbg_rcb_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rcb_reg),
+		       .offset = HCLGE_DBG_DFX_RCB_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_RCB_REG } },
+	{ .reg_type = "tqp",
+	  .dfx_msg = &hclge_dbg_tqp_reg[0],
+	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_tqp_reg),
+		       .offset = HCLGE_DBG_DFX_TQP_OFFSET,
+		       .cmd = HCLGE_OPC_DFX_TQP_REG } },
+};
+
+static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
+{
+#define HCLGE_GET_DFX_REG_TYPE_CNT	4
+
+	struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT];
+	int entries_per_desc;
+	int index;
+	int ret;
+
+	ret = hclge_query_bd_num_cmd_send(hdev, desc);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get dfx bdnum fail, ret = %d\n", ret);
+		return ret;
+	}
+
+	entries_per_desc = ARRAY_SIZE(desc[0].data);
+	index = offset % entries_per_desc;
+	return (int)desc[offset / entries_per_desc].data[index];
+}
+
+static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
+			      struct hclge_desc *desc_src,
+			      int index, int bd_num,
+			      enum hclge_opcode_type cmd)
+{
+	struct hclge_desc *desc = desc_src;
+	int ret, i;
+
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	desc->data[0] = cpu_to_le32(index);
+
+	for (i = 1; i < bd_num; i++) {
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		desc++;
+		hclge_cmd_setup_basic_desc(desc, cmd, true);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc_src, bd_num);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"cmd(0x%x) send fail, ret = %d\n", cmd, ret);
+	return ret;
+}
+
+static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
+				      struct hclge_dbg_reg_type_info *reg_info,
+				      const char *cmd_buf)
+{
+#define IDX_OFFSET	1
+
+	const char *s = &cmd_buf[strlen(reg_info->reg_type) + IDX_OFFSET];
+	struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
+	struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
+	struct hclge_desc *desc_src;
+	struct hclge_desc *desc;
+	int entries_per_desc;
+	int bd_num, buf_len;
+	int index = 0;
+	int min_num;
+	int ret, i;
+
+	if (*s) {
+		ret = kstrtouint(s, 0, &index);
+		index = (ret != 0) ? 0 : index;
+	}
+
+	bd_num = hclge_dbg_get_dfx_bd_num(hdev, reg_msg->offset);
+	if (bd_num <= 0) {
+		dev_err(&hdev->pdev->dev, "get cmd(%d) bd num(%d) failed\n",
+			reg_msg->offset, bd_num);
+		return;
+	}
+
+	buf_len	 = sizeof(struct hclge_desc) * bd_num;
+	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_src) {
+		dev_err(&hdev->pdev->dev, "call kzalloc failed\n");
+		return;
+	}
+
+	desc = desc_src;
+	ret  = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
+	if (ret) {
+		kfree(desc_src);
+		return;
+	}
+
+	entries_per_desc = ARRAY_SIZE(desc->data);
+	min_num = min_t(int, bd_num * entries_per_desc, reg_msg->msg_num);
+
+	desc = desc_src;
+	for (i = 0; i < min_num; i++) {
+		if (i > 0 && (i % entries_per_desc) == 0)
+			desc++;
+		if (dfx_message->flag)
+			dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
+				 dfx_message->message,
+				 desc->data[i % entries_per_desc]);
+
+		dfx_message++;
+	}
+
+	kfree(desc_src);
+}
+
+static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_dbg_bitmap_cmd *bitmap;
+	int rq_id, pri_id, qset_id;
+	int port_id, nq_id, pg_id;
+	struct hclge_desc desc[2];
+
+	int cnt, ret;
+
+	cnt = sscanf(cmd_buf, "%i %i %i %i %i %i",
+		     &port_id, &pri_id, &pg_id, &rq_id, &nq_id, &qset_id);
+	if (cnt != 6) {
+		dev_err(&hdev->pdev->dev,
+			"dump dcb: bad command parameter, cnt=%d\n", cnt);
+		return;
+	}
+
+	ret = hclge_dbg_cmd_send(hdev, desc, qset_id, 1,
+				 HCLGE_OPC_QSET_DFX_STS);
+	if (ret)
+		return;
+
+	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+	dev_info(dev, "roce_qset_mask: 0x%x\n", bitmap->bit0);
+	dev_info(dev, "nic_qs_mask: 0x%x\n", bitmap->bit1);
+	dev_info(dev, "qs_shaping_pass: 0x%x\n", bitmap->bit2);
+	dev_info(dev, "qs_bp_sts: 0x%x\n", bitmap->bit3);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, pri_id, 1, HCLGE_OPC_PRI_DFX_STS);
+	if (ret)
+		return;
+
+	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+	dev_info(dev, "pri_mask: 0x%x\n", bitmap->bit0);
+	dev_info(dev, "pri_cshaping_pass: 0x%x\n", bitmap->bit1);
+	dev_info(dev, "pri_pshaping_pass: 0x%x\n", bitmap->bit2);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, pg_id, 1, HCLGE_OPC_PG_DFX_STS);
+	if (ret)
+		return;
+
+	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+	dev_info(dev, "pg_mask: 0x%x\n", bitmap->bit0);
+	dev_info(dev, "pg_cshaping_pass: 0x%x\n", bitmap->bit1);
+	dev_info(dev, "pg_pshaping_pass: 0x%x\n", bitmap->bit2);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+				 HCLGE_OPC_PORT_DFX_STS);
+	if (ret)
+		return;
+
+	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
+	dev_info(dev, "port_mask: 0x%x\n", bitmap->bit0);
+	dev_info(dev, "port_shaping_pass: 0x%x\n", bitmap->bit1);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_NQ_CNT);
+	if (ret)
+		return;
+
+	dev_info(dev, "sch_nq_cnt: 0x%x\n", desc[0].data[1]);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_RQ_CNT);
+	if (ret)
+		return;
+
+	dev_info(dev, "sch_rq_cnt: 0x%x\n", desc[0].data[1]);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, 0, 2, HCLGE_OPC_TM_INTERNAL_STS);
+	if (ret)
+		return;
+
+	dev_info(dev, "pri_bp: 0x%x\n", desc[0].data[1]);
+	dev_info(dev, "fifo_dfx_info: 0x%x\n", desc[0].data[2]);
+	dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n", desc[0].data[3]);
+	dev_info(dev, "tx_private_waterline: 0x%x\n", desc[0].data[4]);
+	dev_info(dev, "tm_bypass_en: 0x%x\n", desc[0].data[5]);
+	dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", desc[1].data[0]);
+	dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", desc[1].data[1]);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+				 HCLGE_OPC_TM_INTERNAL_CNT);
+	if (ret)
+		return;
+
+	dev_info(dev, "SCH_NIC_NUM: 0x%x\n", desc[0].data[1]);
+	dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", desc[0].data[2]);
+
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+				 HCLGE_OPC_TM_INTERNAL_STS_1);
+	if (ret)
+		return;
+
+	dev_info(dev, "TC_MAP_SEL: 0x%x\n", desc[0].data[1]);
+	dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", desc[0].data[2]);
+	dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", desc[0].data[3]);
+	dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[4]);
+	dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]);
+}
+
+static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
+{
+	struct hclge_dbg_reg_type_info *reg_info;
+	bool has_dump = false;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hclge_dbg_reg_info); i++) {
+		reg_info = &hclge_dbg_reg_info[i];
+		if (!strncmp(cmd_buf, reg_info->reg_type,
+			     strlen(reg_info->reg_type))) {
+			hclge_dbg_dump_reg_common(hdev, reg_info, cmd_buf);
+			has_dump = true;
+		}
+	}
+
+	if (strncmp(cmd_buf, "dcb", 3) == 0) {
+		hclge_dbg_dump_dcb(hdev, &cmd_buf[sizeof("dcb")]);
+		has_dump = true;
+	}
+
+	if (!has_dump) {
+		dev_info(&hdev->pdev->dev, "unknown command\n");
+		return;
+	}
+}
+
+static void hclge_title_idx_print(struct hclge_dev *hdev, bool flag, int index,
+				  char *title_buf, char *true_buf,
+				  char *false_buf)
+{
+	if (flag)
+		dev_info(&hdev->pdev->dev, "%s(%d): %s\n", title_buf, index,
+			 true_buf);
+	else
+		dev_info(&hdev->pdev->dev, "%s(%d): %s\n", title_buf, index,
+			 false_buf);
+}
+
+static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
+{
+	struct hclge_ets_tc_weight_cmd *ets_weight;
+	struct hclge_desc desc;
+	int i, ret;
+
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		dev_info(&hdev->pdev->dev,
+			 "Only DCB-supported dev supports tc\n");
+		return;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "dump tc fail, ret = %d\n", ret);
+		return;
+	}
+
+	ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data;
+
+	dev_info(&hdev->pdev->dev, "dump tc\n");
+	dev_info(&hdev->pdev->dev, "weight_offset: %u\n",
+		 ets_weight->weight_offset);
+
+	for (i = 0; i < HNAE3_MAX_TC; i++)
+		hclge_title_idx_print(hdev, ets_weight->tc_weight[i], i,
+				      "tc", "no sp mode", "sp mode");
+}
+
+static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
+{
+	struct hclge_port_shapping_cmd *port_shap_cfg_cmd;
+	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
+	struct hclge_pg_shapping_cmd *pg_shap_cfg_cmd;
+	enum hclge_opcode_type cmd;
+	struct hclge_desc desc;
+	int ret;
+
+	cmd = HCLGE_OPC_TM_PG_C_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PG_C pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
+	dev_info(&hdev->pdev->dev, "PG_C pg_shapping: 0x%x\n",
+		 pg_shap_cfg_cmd->pg_shapping_para);
+
+	cmd = HCLGE_OPC_TM_PG_P_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PG_P pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
+	dev_info(&hdev->pdev->dev, "PG_P pg_shapping: 0x%x\n",
+		 pg_shap_cfg_cmd->pg_shapping_para);
+
+	cmd = HCLGE_OPC_TM_PORT_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PORT port_shapping: 0x%x\n",
+		 port_shap_cfg_cmd->port_shapping_para);
+
+	cmd = HCLGE_OPC_TM_PG_SCH_MODE_CFG;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n", desc.data[0]);
+
+	cmd = HCLGE_OPC_TM_PRI_SCH_MODE_CFG;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n", desc.data[0]);
+
+	cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]);
+
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		dev_info(&hdev->pdev->dev,
+			 "Only DCB-supported dev supports tm mapping\n");
+		return;
+	}
+
+	cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_pg_cmd_send;
+
+	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "BP_TO_QSET tc_id: %u\n",
+		 bp_to_qs_map_cmd->tc_id);
+	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n",
+		 bp_to_qs_map_cmd->qs_group_id);
+	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n",
+		 bp_to_qs_map_cmd->qs_bit_map);
+	return;
+
+err_tm_pg_cmd_send:
+	dev_err(&hdev->pdev->dev, "dump tm_pg fail(0x%x), ret = %d\n",
+		cmd, ret);
+}
+
+static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
+{
+	struct hclge_priority_weight_cmd *priority_weight;
+	struct hclge_pg_to_pri_link_cmd *pg_to_pri_map;
+	struct hclge_qs_to_pri_link_cmd *qs_to_pri_map;
+	struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
+	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
+	struct hclge_pg_weight_cmd *pg_weight;
+	struct hclge_qs_weight_cmd *qs_weight;
+	enum hclge_opcode_type cmd;
+	struct hclge_desc desc;
+	int ret;
+
+	cmd = HCLGE_OPC_TM_PG_TO_PRI_LINK;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	pg_to_pri_map = (struct hclge_pg_to_pri_link_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "dump tm\n");
+	dev_info(&hdev->pdev->dev, "PG_TO_PRI gp_id: %u\n",
+		 pg_to_pri_map->pg_id);
+	dev_info(&hdev->pdev->dev, "PG_TO_PRI map: 0x%x\n",
+		 pg_to_pri_map->pri_bit_map);
+
+	cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	qs_to_pri_map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "QS_TO_PRI qs_id: %u\n",
+		 qs_to_pri_map->qs_id);
+	dev_info(&hdev->pdev->dev, "QS_TO_PRI priority: %u\n",
+		 qs_to_pri_map->priority);
+	dev_info(&hdev->pdev->dev, "QS_TO_PRI link_vld: %u\n",
+		 qs_to_pri_map->link_vld);
+
+	cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n", nq_to_qs_map->nq_id);
+	dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n",
+		 nq_to_qs_map->qset_id);
+
+	cmd = HCLGE_OPC_TM_PG_WEIGHT;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	pg_weight = (struct hclge_pg_weight_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PG pg_id: %u\n", pg_weight->pg_id);
+	dev_info(&hdev->pdev->dev, "PG dwrr: %u\n", pg_weight->dwrr);
+
+	cmd = HCLGE_OPC_TM_QS_WEIGHT;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	qs_weight = (struct hclge_qs_weight_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "QS qs_id: %u\n", qs_weight->qs_id);
+	dev_info(&hdev->pdev->dev, "QS dwrr: %u\n", qs_weight->dwrr);
+
+	cmd = HCLGE_OPC_TM_PRI_WEIGHT;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	priority_weight = (struct hclge_priority_weight_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PRI pri_id: %u\n", priority_weight->pri_id);
+	dev_info(&hdev->pdev->dev, "PRI dwrr: %u\n", priority_weight->dwrr);
+
+	cmd = HCLGE_OPC_TM_PRI_C_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PRI_C pri_id: %u\n", shap_cfg_cmd->pri_id);
+	dev_info(&hdev->pdev->dev, "PRI_C pri_shapping: 0x%x\n",
+		 shap_cfg_cmd->pri_shapping_para);
+
+	cmd = HCLGE_OPC_TM_PRI_P_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_cmd_send;
+
+	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "PRI_P pri_id: %u\n", shap_cfg_cmd->pri_id);
+	dev_info(&hdev->pdev->dev, "PRI_P pri_shapping: 0x%x\n",
+		 shap_cfg_cmd->pri_shapping_para);
+
+	hclge_dbg_dump_tm_pg(hdev);
+
+	return;
+
+err_tm_cmd_send:
+	dev_err(&hdev->pdev->dev, "dump tm fail(0x%x), ret = %d\n",
+		cmd, ret);
+}
+
+static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
+				  const char *cmd_buf)
+{
+	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
+	struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
+	struct hclge_qs_to_pri_link_cmd *map;
+	struct hclge_tqp_tx_queue_tc_cmd *tc;
+	enum hclge_opcode_type cmd;
+	struct hclge_desc desc;
+	int queue_id, group_id;
+	u32 qset_maping[32];
+	int tc_id, qset_id;
+	int pri_id, ret;
+	u32 i;
+
+	ret = kstrtouint(cmd_buf, 0, &queue_id);
+	queue_id = (ret != 0) ? 0 : queue_id;
+
+	cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
+	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	nq_to_qs_map->nq_id = cpu_to_le16(queue_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_map_cmd_send;
+	qset_id = nq_to_qs_map->qset_id & 0x3FF;
+
+	cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK;
+	map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	map->qs_id = cpu_to_le16(qset_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_map_cmd_send;
+	pri_id = map->priority;
+
+	cmd = HCLGE_OPC_TQP_TX_QUEUE_TC;
+	tc = (struct hclge_tqp_tx_queue_tc_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	tc->queue_id = cpu_to_le16(queue_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		goto err_tm_map_cmd_send;
+	tc_id = tc->tc_id & 0x7;
+
+	dev_info(&hdev->pdev->dev, "queue_id | qset_id | pri_id | tc_id\n");
+	dev_info(&hdev->pdev->dev, "%04d     | %04d    | %02d     | %02d\n",
+		 queue_id, qset_id, pri_id, tc_id);
+
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		dev_info(&hdev->pdev->dev,
+			 "Only DCB-supported dev supports tm mapping\n");
+		return;
+	}
+
+	cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
+	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+	for (group_id = 0; group_id < 32; group_id++) {
+		hclge_cmd_setup_basic_desc(&desc, cmd, true);
+		bp_to_qs_map_cmd->tc_id = tc_id;
+		bp_to_qs_map_cmd->qs_group_id = group_id;
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret)
+			goto err_tm_map_cmd_send;
+
+		qset_maping[group_id] = bp_to_qs_map_cmd->qs_bit_map;
+	}
+
+	dev_info(&hdev->pdev->dev, "index | tm bp qset maping:\n");
+
+	i = 0;
+	for (group_id = 0; group_id < 4; group_id++) {
+		dev_info(&hdev->pdev->dev,
+			 "%04d  | %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
+			 group_id * 256, qset_maping[(u32)(i + 7)],
+			 qset_maping[(u32)(i + 6)], qset_maping[(u32)(i + 5)],
+			 qset_maping[(u32)(i + 4)], qset_maping[(u32)(i + 3)],
+			 qset_maping[(u32)(i + 2)], qset_maping[(u32)(i + 1)],
+			 qset_maping[i]);
+		i += 8;
+	}
+
+	return;
+
+err_tm_map_cmd_send:
+	dev_err(&hdev->pdev->dev, "dump tqp map fail(0x%x), ret = %d\n",
+		cmd, ret);
+}
+
+static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_cfg_pause_param_cmd *pause_param;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PARA, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "dump checksum fail, ret = %d\n",
+			ret);
+		return;
+	}
+
+	pause_param = (struct hclge_cfg_pause_param_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "dump qos pause cfg\n");
+	dev_info(&hdev->pdev->dev, "pause_trans_gap: 0x%x\n",
+		 pause_param->pause_trans_gap);
+	dev_info(&hdev->pdev->dev, "pause_trans_time: 0x%x\n",
+		 pause_param->pause_trans_time);
+}
+
+static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev)
+{
+	struct hclge_qos_pri_map_cmd *pri_map;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"dump qos pri map fail, ret = %d\n", ret);
+		return;
+	}
+
+	pri_map = (struct hclge_qos_pri_map_cmd *)desc.data;
+	dev_info(&hdev->pdev->dev, "dump qos pri map\n");
+	dev_info(&hdev->pdev->dev, "vlan_to_pri: 0x%x\n", pri_map->vlan_pri);
+	dev_info(&hdev->pdev->dev, "pri_0_to_tc: 0x%x\n", pri_map->pri0_tc);
+	dev_info(&hdev->pdev->dev, "pri_1_to_tc: 0x%x\n", pri_map->pri1_tc);
+	dev_info(&hdev->pdev->dev, "pri_2_to_tc: 0x%x\n", pri_map->pri2_tc);
+	dev_info(&hdev->pdev->dev, "pri_3_to_tc: 0x%x\n", pri_map->pri3_tc);
+	dev_info(&hdev->pdev->dev, "pri_4_to_tc: 0x%x\n", pri_map->pri4_tc);
+	dev_info(&hdev->pdev->dev, "pri_5_to_tc: 0x%x\n", pri_map->pri5_tc);
+	dev_info(&hdev->pdev->dev, "pri_6_to_tc: 0x%x\n", pri_map->pri6_tc);
+	dev_info(&hdev->pdev->dev, "pri_7_to_tc: 0x%x\n", pri_map->pri7_tc);
+}
+
+static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_tx_buff_alloc_cmd *tx_buf_cmd;
+	struct hclge_rx_priv_buff_cmd *rx_buf_cmd;
+	struct hclge_rx_priv_wl_buf *rx_priv_wl;
+	struct hclge_rx_com_wl *rx_packet_cnt;
+	struct hclge_rx_com_thrd *rx_com_thrd;
+	struct hclge_rx_com_wl *rx_com_wl;
+	enum hclge_opcode_type cmd;
+	struct hclge_desc desc[2];
+	int i, ret;
+
+	cmd = HCLGE_OPC_TX_BUFF_ALLOC;
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "dump qos buf cfg\n");
+
+	tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc[0].data;
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		dev_info(&hdev->pdev->dev, "tx_packet_buf_tc_%d: 0x%x\n", i,
+			 tx_buf_cmd->tx_pkt_buff[i]);
+
+	cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC;
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "\n");
+	rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc[0].data;
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		dev_info(&hdev->pdev->dev, "rx_packet_buf_tc_%d: 0x%x\n", i,
+			 rx_buf_cmd->buf_num[i]);
+
+	dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n",
+		 rx_buf_cmd->shared_buf);
+
+	cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data;
+	dev_info(&hdev->pdev->dev, "\n");
+	dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
+		 rx_com_wl->com_wl.high, rx_com_wl->com_wl.low);
+
+	cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data;
+	dev_info(&hdev->pdev->dev,
+		 "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
+		 rx_packet_cnt->com_wl.high, rx_packet_cnt->com_wl.low);
+	dev_info(&hdev->pdev->dev, "\n");
+
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		dev_info(&hdev->pdev->dev,
+			 "Only DCB-supported dev supports rx priv wl\n");
+		return;
+	}
+	cmd = HCLGE_OPC_RX_PRIV_WL_ALLOC;
+	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[0].data;
+	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
+		dev_info(&hdev->pdev->dev,
+			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n", i,
+			 rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+
+	rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[1].data;
+	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
+		dev_info(&hdev->pdev->dev,
+			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n",
+			 i + HCLGE_TC_NUM_ONE_DESC,
+			 rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+
+	cmd = HCLGE_OPC_RX_COM_THRD_ALLOC;
+	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret)
+		goto err_qos_cmd_send;
+
+	dev_info(&hdev->pdev->dev, "\n");
+	rx_com_thrd = (struct hclge_rx_com_thrd *)desc[0].data;
+	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
+		dev_info(&hdev->pdev->dev,
+			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n", i,
+			 rx_com_thrd->com_thrd[i].high,
+			 rx_com_thrd->com_thrd[i].low);
+
+	rx_com_thrd = (struct hclge_rx_com_thrd *)desc[1].data;
+	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
+		dev_info(&hdev->pdev->dev,
+			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n",
+			 i + HCLGE_TC_NUM_ONE_DESC,
+			 rx_com_thrd->com_thrd[i].high,
+			 rx_com_thrd->com_thrd[i].low);
+	return;
+
+err_qos_cmd_send:
+	dev_err(&hdev->pdev->dev,
+		"dump qos buf cfg fail(0x%x), ret = %d\n", cmd, ret);
+}
+
+static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
+{
+	struct hclge_mac_ethertype_idx_rd_cmd *req0;
+	char printf_buf[HCLGE_DBG_BUF_LEN];
+	struct hclge_desc desc;
+	int ret, i;
+
+	dev_info(&hdev->pdev->dev, "mng tab:\n");
+	memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
+	strncat(printf_buf,
+		"entry|mac_addr         |mask|ether|mask|vlan|mask",
+		HCLGE_DBG_BUF_LEN - 1);
+	strncat(printf_buf + strlen(printf_buf),
+		"|i_map|i_dir|e_type|pf_id|vf_id|q_id|drop\n",
+		HCLGE_DBG_BUF_LEN - strlen(printf_buf) - 1);
+
+	dev_info(&hdev->pdev->dev, "%s", printf_buf);
+
+	for (i = 0; i < HCLGE_DBG_MNG_TBL_MAX; i++) {
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_ETHERTYPE_IDX_RD,
+					   true);
+		req0 = (struct hclge_mac_ethertype_idx_rd_cmd *)&desc.data;
+		req0->index = cpu_to_le16(i);
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"call hclge_cmd_send fail, ret = %d\n", ret);
+			return;
+		}
+
+		if (!req0->resp_code)
+			continue;
+
+		memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
+		snprintf(printf_buf, HCLGE_DBG_BUF_LEN,
+			 "%02u   |%02x:%02x:%02x:%02x:%02x:%02x|",
+			 req0->index, req0->mac_addr[0], req0->mac_addr[1],
+			 req0->mac_addr[2], req0->mac_addr[3],
+			 req0->mac_addr[4], req0->mac_addr[5]);
+
+		snprintf(printf_buf + strlen(printf_buf),
+			 HCLGE_DBG_BUF_LEN - strlen(printf_buf),
+			 "%x   |%04x |%x   |%04x|%x   |%02x   |%02x   |",
+			 !!(req0->flags & HCLGE_DBG_MNG_MAC_MASK_B),
+			 req0->ethter_type,
+			 !!(req0->flags & HCLGE_DBG_MNG_ETHER_MASK_B),
+			 req0->vlan_tag & HCLGE_DBG_MNG_VLAN_TAG,
+			 !!(req0->flags & HCLGE_DBG_MNG_VLAN_MASK_B),
+			 req0->i_port_bitmap, req0->i_port_direction);
+
+		snprintf(printf_buf + strlen(printf_buf),
+			 HCLGE_DBG_BUF_LEN - strlen(printf_buf),
+			 "%d     |%d    |%02d   |%04d|%x\n",
+			 !!(req0->egress_port & HCLGE_DBG_MNG_E_TYPE_B),
+			 req0->egress_port & HCLGE_DBG_MNG_PF_ID,
+			 (req0->egress_port >> 3) & HCLGE_DBG_MNG_VF_ID,
+			 req0->egress_queue,
+			 !!(req0->egress_port & HCLGE_DBG_MNG_DROP_B));
+
+		dev_info(&hdev->pdev->dev, "%s", printf_buf);
+	}
+}
+
+static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
+				   bool sel_x, u32 loc)
+{
+	struct hclge_fd_tcam_config_1_cmd *req1;
+	struct hclge_fd_tcam_config_2_cmd *req2;
+	struct hclge_fd_tcam_config_3_cmd *req3;
+	struct hclge_desc desc[3];
+	int ret, i;
+	u32 *req;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_FD_TCAM_OP, true);
+	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_FD_TCAM_OP, true);
+
+	req1 = (struct hclge_fd_tcam_config_1_cmd *)desc[0].data;
+	req2 = (struct hclge_fd_tcam_config_2_cmd *)desc[1].data;
+	req3 = (struct hclge_fd_tcam_config_3_cmd *)desc[2].data;
+
+	req1->stage  = stage;
+	req1->xy_sel = sel_x ? 1 : 0;
+	req1->index  = cpu_to_le32(loc);
+
+	ret = hclge_cmd_send(&hdev->hw, desc, 3);
+	if (ret)
+		return;
+
+	dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n",
+		 sel_x ? "x" : "y", loc);
+
+	/* tcam_data0 ~ tcam_data1 */
+	req = (u32 *)req1->tcam_data;
+	for (i = 0; i < 2; i++)
+		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+
+	/* tcam_data2 ~ tcam_data7 */
+	req = (u32 *)req2->tcam_data;
+	for (i = 0; i < 6; i++)
+		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+
+	/* tcam_data8 ~ tcam_data12 */
+	req = (u32 *)req3->tcam_data;
+	for (i = 0; i < 5; i++)
+		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+}
+
+static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
+{
+	u32 i;
+
+	for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) {
+		hclge_dbg_fd_tcam_read(hdev, 0, true, i);
+		hclge_dbg_fd_tcam_read(hdev, 0, false, i);
+	}
+}
+
+static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
+{
+	dev_info(&hdev->pdev->dev, "PF reset count: %u\n",
+		 hdev->rst_stats.pf_rst_cnt);
+	dev_info(&hdev->pdev->dev, "FLR reset count: %u\n",
+		 hdev->rst_stats.flr_rst_cnt);
+	dev_info(&hdev->pdev->dev, "GLOBAL reset count: %u\n",
+		 hdev->rst_stats.global_rst_cnt);
+	dev_info(&hdev->pdev->dev, "IMP reset count: %u\n",
+		 hdev->rst_stats.imp_rst_cnt);
+	dev_info(&hdev->pdev->dev, "reset done count: %u\n",
+		 hdev->rst_stats.reset_done_cnt);
+	dev_info(&hdev->pdev->dev, "HW reset done count: %u\n",
+		 hdev->rst_stats.hw_reset_done_cnt);
+	dev_info(&hdev->pdev->dev, "reset count: %u\n",
+		 hdev->rst_stats.reset_cnt);
+	dev_info(&hdev->pdev->dev, "reset count: %u\n",
+		 hdev->rst_stats.reset_cnt);
+	dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
+		 hdev->rst_stats.reset_fail_cnt);
+	dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_REG_BASE));
+	dev_info(&hdev->pdev->dev, "reset interrupt source: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG));
+	dev_info(&hdev->pdev->dev, "reset interrupt status: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS));
+	dev_info(&hdev->pdev->dev, "hardware reset status: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
+	dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG));
+	dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
+		 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING));
+}
+
+static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+{
+	struct hclge_desc *desc_src, *desc_tmp;
+	struct hclge_get_m7_bd_cmd *req;
+	struct hclge_desc desc;
+	u32 bd_num, buf_len;
+	int ret, i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true);
+
+	req = (struct hclge_get_m7_bd_cmd *)desc.data;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics bd number failed, ret = %d\n",
+			ret);
+		return;
+	}
+
+	bd_num = le32_to_cpu(req->bd_num);
+
+	buf_len	 = sizeof(struct hclge_desc) * bd_num;
+	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_src) {
+		dev_err(&hdev->pdev->dev,
+			"allocate desc for get_m7_stats failed\n");
+		return;
+	}
+
+	desc_tmp = desc_src;
+	ret  = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num,
+				  HCLGE_OPC_M7_STATS_INFO);
+	if (ret) {
+		kfree(desc_src);
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics failed, ret = %d\n", ret);
+		return;
+	}
+
+	for (i = 0; i < bd_num; i++) {
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[0]),
+			 le32_to_cpu(desc_tmp->data[1]),
+			 le32_to_cpu(desc_tmp->data[2]));
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[3]),
+			 le32_to_cpu(desc_tmp->data[4]),
+			 le32_to_cpu(desc_tmp->data[5]));
+
+		desc_tmp++;
+	}
+
+	kfree(desc_src);
+}
+
+#define HCLGE_CMD_NCL_CONFIG_BD_NUM	5
+
+static void hclge_ncl_config_data_print(struct hclge_dev *hdev,
+					struct hclge_desc *desc, int *offset,
+					int *length)
+{
+#define HCLGE_CMD_DATA_NUM		6
+
+	int i;
+	int j;
+
+	for (i = 0; i < HCLGE_CMD_NCL_CONFIG_BD_NUM; i++) {
+		for (j = 0; j < HCLGE_CMD_DATA_NUM; j++) {
+			if (i == 0 && j == 0)
+				continue;
+
+			dev_info(&hdev->pdev->dev, "0x%04x | 0x%08x\n",
+				 *offset,
+				 le32_to_cpu(desc[i].data[j]));
+			*offset += sizeof(u32);
+			*length -= sizeof(u32);
+			if (*length <= 0)
+				return;
+		}
+	}
+}
+
+/* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
+ * @hdev: pointer to struct hclge_dev
+ * @cmd_buf: string that contains offset and length
+ */
+static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
+				      const char *cmd_buf)
+{
+#define HCLGE_MAX_NCL_CONFIG_OFFSET	4096
+#define HCLGE_MAX_NCL_CONFIG_LENGTH	(20 + 24 * 4)
+
+	struct hclge_desc desc[HCLGE_CMD_NCL_CONFIG_BD_NUM];
+	int bd_num = HCLGE_CMD_NCL_CONFIG_BD_NUM;
+	int offset;
+	int length;
+	int data0;
+	int ret;
+
+	ret = sscanf(cmd_buf, "%x %x", &offset, &length);
+	if (ret != 2 || offset >= HCLGE_MAX_NCL_CONFIG_OFFSET ||
+	    length > HCLGE_MAX_NCL_CONFIG_OFFSET - offset) {
+		dev_err(&hdev->pdev->dev, "Invalid offset or length.\n");
+		return;
+	}
+	if (offset < 0 || length <= 0) {
+		dev_err(&hdev->pdev->dev, "Non-positive offset or length.\n");
+		return;
+	}
+
+	dev_info(&hdev->pdev->dev, "offset |    data\n");
+
+	while (length > 0) {
+		data0 = offset;
+		if (length >= HCLGE_MAX_NCL_CONFIG_LENGTH)
+			data0 |= HCLGE_MAX_NCL_CONFIG_LENGTH << 16;
+		else
+			data0 |= length << 16;
+		ret = hclge_dbg_cmd_send(hdev, desc, data0, bd_num,
+					 HCLGE_OPC_QUERY_NCL_CONFIG);
+		if (ret)
+			return;
+
+		hclge_ncl_config_data_print(hdev, desc, &offset, &length);
+	}
+}
+
+/* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
+ * @hdev: pointer to struct hclge_dev
+ */
+static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
+{
+#define HCLGE_BILLION_NANO_SECONDS 1000000000
+
+	struct hclge_mac_tnl_stats stats;
+	unsigned long rem_nsec;
+
+	dev_info(&hdev->pdev->dev, "Recently generated mac tnl interruption:\n");
+
+	while (kfifo_get(&hdev->mac_tnl_log, &stats)) {
+		rem_nsec = do_div(stats.time, HCLGE_BILLION_NANO_SECONDS);
+		dev_info(&hdev->pdev->dev, "[%07lu.%03lu] status = 0x%x\n",
+			 (unsigned long)stats.time, rem_nsec / 1000,
+			 stats.status);
+	}
+}
+
+int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
+{
+#define DUMP_REG	"dump reg"
+#define DUMP_TM_MAP	"dump tm map"
+
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (strncmp(cmd_buf, "dump fd tcam", 12) == 0) {
+		hclge_dbg_fd_tcam(hdev);
+	} else if (strncmp(cmd_buf, "dump tc", 7) == 0) {
+		hclge_dbg_dump_tc(hdev);
+	} else if (strncmp(cmd_buf, DUMP_TM_MAP, strlen(DUMP_TM_MAP)) == 0) {
+		hclge_dbg_dump_tm_map(hdev, &cmd_buf[sizeof(DUMP_TM_MAP)]);
+	} else if (strncmp(cmd_buf, "dump tm", 7) == 0) {
+		hclge_dbg_dump_tm(hdev);
+	} else if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
+		hclge_dbg_dump_qos_pause_cfg(hdev);
+	} else if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
+		hclge_dbg_dump_qos_pri_map(hdev);
+	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
+		hclge_dbg_dump_qos_buf_cfg(hdev);
+	} else if (strncmp(cmd_buf, "dump mng tbl", 12) == 0) {
+		hclge_dbg_dump_mng_table(hdev);
+	} else if (strncmp(cmd_buf, DUMP_REG, strlen(DUMP_REG)) == 0) {
+		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
+	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
+		hclge_dbg_dump_rst_info(hdev);
+	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
+		hclge_dbg_get_m7_stats_info(hdev);
+	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
+		hclge_dbg_dump_ncl_config(hdev,
+					  &cmd_buf[sizeof("dump ncl_config")]);
+	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
+		hclge_dbg_dump_mac_tnl_status(hdev);
+	} else {
+		dev_info(&hdev->pdev->dev, "unknown command\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
new file mode 100644
index 0000000..38b7932
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h

@@ -0,0 +1,730 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+#ifndef __HCLGE_DEBUGFS_H
+#define __HCLGE_DEBUGFS_H
+
+#include <linux/etherdevice.h>
+#include "hclge_cmd.h"
+
+#define HCLGE_DBG_BUF_LEN	   256
+#define HCLGE_DBG_MNG_TBL_MAX	   64
+
+#define HCLGE_DBG_MNG_VLAN_MASK_B  BIT(0)
+#define HCLGE_DBG_MNG_MAC_MASK_B   BIT(1)
+#define HCLGE_DBG_MNG_ETHER_MASK_B BIT(2)
+#define HCLGE_DBG_MNG_E_TYPE_B	   BIT(11)
+#define HCLGE_DBG_MNG_DROP_B	   BIT(13)
+#define HCLGE_DBG_MNG_VLAN_TAG	   0x0FFF
+#define HCLGE_DBG_MNG_PF_ID	   0x0007
+#define HCLGE_DBG_MNG_VF_ID	   0x00FF
+
+/* Get DFX BD number offset */
+#define HCLGE_DBG_DFX_BIOS_OFFSET  1
+#define HCLGE_DBG_DFX_SSU_0_OFFSET 2
+#define HCLGE_DBG_DFX_SSU_1_OFFSET 3
+#define HCLGE_DBG_DFX_IGU_OFFSET   4
+#define HCLGE_DBG_DFX_RPU_0_OFFSET 5
+
+#define HCLGE_DBG_DFX_RPU_1_OFFSET 6
+#define HCLGE_DBG_DFX_NCSI_OFFSET  7
+#define HCLGE_DBG_DFX_RTC_OFFSET   8
+#define HCLGE_DBG_DFX_PPP_OFFSET   9
+#define HCLGE_DBG_DFX_RCB_OFFSET   10
+#define HCLGE_DBG_DFX_TQP_OFFSET   11
+
+#define HCLGE_DBG_DFX_SSU_2_OFFSET 12
+
+#pragma pack(1)
+
+struct hclge_qos_pri_map_cmd {
+	u8 pri0_tc  : 4,
+	   pri1_tc  : 4;
+	u8 pri2_tc  : 4,
+	   pri3_tc  : 4;
+	u8 pri4_tc  : 4,
+	   pri5_tc  : 4;
+	u8 pri6_tc  : 4,
+	   pri7_tc  : 4;
+	u8 vlan_pri : 4,
+	   rev	    : 4;
+};
+
+struct hclge_dbg_bitmap_cmd {
+	union {
+		u8 bitmap;
+		struct {
+			u8 bit0 : 1,
+			   bit1 : 1,
+			   bit2 : 1,
+			   bit3 : 1,
+			   bit4 : 1,
+			   bit5 : 1,
+			   bit6 : 1,
+			   bit7 : 1;
+		};
+	};
+};
+
+struct hclge_dbg_reg_common_msg {
+	int msg_num;
+	int offset;
+	enum hclge_opcode_type cmd;
+};
+
+#define	HCLGE_DBG_MAX_DFX_MSG_LEN	60
+struct hclge_dbg_dfx_message {
+	int flag;
+	char message[HCLGE_DBG_MAX_DFX_MSG_LEN];
+};
+
+#define HCLGE_DBG_MAC_REG_TYPE_LEN	32
+struct hclge_dbg_reg_type_info {
+	const char *reg_type;
+	struct hclge_dbg_dfx_message *dfx_msg;
+	struct hclge_dbg_reg_common_msg reg_msg;
+};
+
+#pragma pack()
+
+static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
+	{false, "Reserved"},
+	{true,	"BP_CPU_STATE"},
+	{true,	"DFX_MSIX_INFO_NIC_0"},
+	{true,	"DFX_MSIX_INFO_NIC_1"},
+	{true,	"DFX_MSIX_INFO_NIC_2"},
+	{true,	"DFX_MSIX_INFO_NIC_3"},
+
+	{true,	"DFX_MSIX_INFO_ROC_0"},
+	{true,	"DFX_MSIX_INFO_ROC_1"},
+	{true,	"DFX_MSIX_INFO_ROC_2"},
+	{true,	"DFX_MSIX_INFO_ROC_3"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
+	{false, "Reserved"},
+	{true,	"SSU_ETS_PORT_STATUS"},
+	{true,	"SSU_ETS_TCG_STATUS"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{true,	"SSU_BP_STATUS_0"},
+
+	{true,	"SSU_BP_STATUS_1"},
+	{true,	"SSU_BP_STATUS_2"},
+	{true,	"SSU_BP_STATUS_3"},
+	{true,	"SSU_BP_STATUS_4"},
+	{true,	"SSU_BP_STATUS_5"},
+	{true,	"SSU_MAC_TX_PFC_IND"},
+
+	{true,	"MAC_SSU_RX_PFC_IND"},
+	{true,	"BTMP_AGEING_ST_B0"},
+	{true,	"BTMP_AGEING_ST_B1"},
+	{true,	"BTMP_AGEING_ST_B2"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+
+	{true,	"FULL_DROP_NUM"},
+	{true,	"PART_DROP_NUM"},
+	{true,	"PPP_KEY_DROP_NUM"},
+	{true,	"PPP_RLT_DROP_NUM"},
+	{true,	"LO_PRI_UNICAST_RLT_DROP_NUM"},
+	{true,	"HI_PRI_MULTICAST_RLT_DROP_NUM"},
+
+	{true,	"LO_PRI_MULTICAST_RLT_DROP_NUM"},
+	{true,	"NCSI_PACKET_CURR_BUFFER_CNT"},
+	{true,	"BTMP_AGEING_RLS_CNT_BANK0"},
+	{true,	"BTMP_AGEING_RLS_CNT_BANK1"},
+	{true,	"BTMP_AGEING_RLS_CNT_BANK2"},
+	{true,	"SSU_MB_RD_RLT_DROP_CNT"},
+
+	{true,	"SSU_PPP_MAC_KEY_NUM_L"},
+	{true,	"SSU_PPP_MAC_KEY_NUM_H"},
+	{true,	"SSU_PPP_HOST_KEY_NUM_L"},
+	{true,	"SSU_PPP_HOST_KEY_NUM_H"},
+	{true,	"PPP_SSU_MAC_RLT_NUM_L"},
+	{true,	"PPP_SSU_MAC_RLT_NUM_H"},
+
+	{true,	"PPP_SSU_HOST_RLT_NUM_L"},
+	{true,	"PPP_SSU_HOST_RLT_NUM_H"},
+	{true,	"NCSI_RX_PACKET_IN_CNT_L"},
+	{true,	"NCSI_RX_PACKET_IN_CNT_H"},
+	{true,	"NCSI_TX_PACKET_OUT_CNT_L"},
+	{true,	"NCSI_TX_PACKET_OUT_CNT_H"},
+
+	{true,	"SSU_KEY_DROP_NUM"},
+	{true,	"MB_UNCOPY_NUM"},
+	{true,	"RX_OQ_DROP_PKT_CNT"},
+	{true,	"TX_OQ_DROP_PKT_CNT"},
+	{true,	"BANK_UNBALANCE_DROP_CNT"},
+	{true,	"BANK_UNBALANCE_RX_DROP_CNT"},
+
+	{true,	"NIC_L2_ERR_DROP_PKT_CNT"},
+	{true,	"ROC_L2_ERR_DROP_PKT_CNT"},
+	{true,	"NIC_L2_ERR_DROP_PKT_CNT_RX"},
+	{true,	"ROC_L2_ERR_DROP_PKT_CNT_RX"},
+	{true,	"RX_OQ_GLB_DROP_PKT_CNT"},
+	{false, "Reserved"},
+
+	{true,	"LO_PRI_UNICAST_CUR_CNT"},
+	{true,	"HI_PRI_MULTICAST_CUR_CNT"},
+	{true,	"LO_PRI_MULTICAST_CUR_CNT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
+	{true,	"prt_id"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_0"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_1"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_2"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_3"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_4"},
+
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_5"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_6"},
+	{true,	"PACKET_TC_CURR_BUFFER_CNT_7"},
+	{true,	"PACKET_CURR_BUFFER_CNT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+
+	{true,	"RX_PACKET_IN_CNT_L"},
+	{true,	"RX_PACKET_IN_CNT_H"},
+	{true,	"RX_PACKET_OUT_CNT_L"},
+	{true,	"RX_PACKET_OUT_CNT_H"},
+	{true,	"TX_PACKET_IN_CNT_L"},
+	{true,	"TX_PACKET_IN_CNT_H"},
+
+	{true,	"TX_PACKET_OUT_CNT_L"},
+	{true,	"TX_PACKET_OUT_CNT_H"},
+	{true,	"ROC_RX_PACKET_IN_CNT_L"},
+	{true,	"ROC_RX_PACKET_IN_CNT_H"},
+	{true,	"ROC_TX_PACKET_OUT_CNT_L"},
+	{true,	"ROC_TX_PACKET_OUT_CNT_H"},
+
+	{true,	"RX_PACKET_TC_IN_CNT_0_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_0_H"},
+	{true,	"RX_PACKET_TC_IN_CNT_1_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_1_H"},
+	{true,	"RX_PACKET_TC_IN_CNT_2_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_2_H"},
+
+	{true,	"RX_PACKET_TC_IN_CNT_3_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_3_H"},
+	{true,	"RX_PACKET_TC_IN_CNT_4_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_4_H"},
+	{true,	"RX_PACKET_TC_IN_CNT_5_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_5_H"},
+
+	{true,	"RX_PACKET_TC_IN_CNT_6_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_6_H"},
+	{true,	"RX_PACKET_TC_IN_CNT_7_L"},
+	{true,	"RX_PACKET_TC_IN_CNT_7_H"},
+	{true,	"RX_PACKET_TC_OUT_CNT_0_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_0_H"},
+
+	{true,	"RX_PACKET_TC_OUT_CNT_1_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_1_H"},
+	{true,	"RX_PACKET_TC_OUT_CNT_2_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_2_H"},
+	{true,	"RX_PACKET_TC_OUT_CNT_3_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_3_H"},
+
+	{true,	"RX_PACKET_TC_OUT_CNT_4_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_4_H"},
+	{true,	"RX_PACKET_TC_OUT_CNT_5_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_5_H"},
+	{true,	"RX_PACKET_TC_OUT_CNT_6_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_6_H"},
+
+	{true,	"RX_PACKET_TC_OUT_CNT_7_L"},
+	{true,	"RX_PACKET_TC_OUT_CNT_7_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_0_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_0_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_1_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_1_H"},
+
+	{true,	"TX_PACKET_TC_IN_CNT_2_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_2_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_3_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_3_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_4_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_4_H"},
+
+	{true,	"TX_PACKET_TC_IN_CNT_5_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_5_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_6_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_6_H"},
+	{true,	"TX_PACKET_TC_IN_CNT_7_L"},
+	{true,	"TX_PACKET_TC_IN_CNT_7_H"},
+
+	{true,	"TX_PACKET_TC_OUT_CNT_0_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_0_H"},
+	{true,	"TX_PACKET_TC_OUT_CNT_1_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_1_H"},
+	{true,	"TX_PACKET_TC_OUT_CNT_2_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_2_H"},
+
+	{true,	"TX_PACKET_TC_OUT_CNT_3_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_3_H"},
+	{true,	"TX_PACKET_TC_OUT_CNT_4_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_4_H"},
+	{true,	"TX_PACKET_TC_OUT_CNT_5_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_5_H"},
+
+	{true,	"TX_PACKET_TC_OUT_CNT_6_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_6_H"},
+	{true,	"TX_PACKET_TC_OUT_CNT_7_L"},
+	{true,	"TX_PACKET_TC_OUT_CNT_7_H"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
+	{true,	"OQ_INDEX"},
+	{true,	"QUEUE_CNT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
+	{true,	"prt_id"},
+	{true,	"IGU_RX_ERR_PKT"},
+	{true,	"IGU_RX_NO_SOF_PKT"},
+	{true,	"EGU_TX_1588_SHORT_PKT"},
+	{true,	"EGU_TX_1588_PKT"},
+	{true,	"EGU_TX_ERR_PKT"},
+
+	{true,	"IGU_RX_OUT_L2_PKT"},
+	{true,	"IGU_RX_OUT_L3_PKT"},
+	{true,	"IGU_RX_OUT_L4_PKT"},
+	{true,	"IGU_RX_IN_L2_PKT"},
+	{true,	"IGU_RX_IN_L3_PKT"},
+	{true,	"IGU_RX_IN_L4_PKT"},
+
+	{true,	"IGU_RX_EL3E_PKT"},
+	{true,	"IGU_RX_EL4E_PKT"},
+	{true,	"IGU_RX_L3E_PKT"},
+	{true,	"IGU_RX_L4E_PKT"},
+	{true,	"IGU_RX_ROCEE_PKT"},
+	{true,	"IGU_RX_OUT_UDP0_PKT"},
+
+	{true,	"IGU_RX_IN_UDP0_PKT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+
+	{true,	"IGU_RX_OVERSIZE_PKT_L"},
+	{true,	"IGU_RX_OVERSIZE_PKT_H"},
+	{true,	"IGU_RX_UNDERSIZE_PKT_L"},
+	{true,	"IGU_RX_UNDERSIZE_PKT_H"},
+	{true,	"IGU_RX_OUT_ALL_PKT_L"},
+	{true,	"IGU_RX_OUT_ALL_PKT_H"},
+
+	{true,	"IGU_TX_OUT_ALL_PKT_L"},
+	{true,	"IGU_TX_OUT_ALL_PKT_H"},
+	{true,	"IGU_RX_UNI_PKT_L"},
+	{true,	"IGU_RX_UNI_PKT_H"},
+	{true,	"IGU_RX_MULTI_PKT_L"},
+	{true,	"IGU_RX_MULTI_PKT_H"},
+
+	{true,	"IGU_RX_BROAD_PKT_L"},
+	{true,	"IGU_RX_BROAD_PKT_H"},
+	{true,	"EGU_TX_OUT_ALL_PKT_L"},
+	{true,	"EGU_TX_OUT_ALL_PKT_H"},
+	{true,	"EGU_TX_UNI_PKT_L"},
+	{true,	"EGU_TX_UNI_PKT_H"},
+
+	{true,	"EGU_TX_MULTI_PKT_L"},
+	{true,	"EGU_TX_MULTI_PKT_H"},
+	{true,	"EGU_TX_BROAD_PKT_L"},
+	{true,	"EGU_TX_BROAD_PKT_H"},
+	{true,	"IGU_TX_KEY_NUM_L"},
+	{true,	"IGU_TX_KEY_NUM_H"},
+
+	{true,	"IGU_RX_NON_TUN_PKT_L"},
+	{true,	"IGU_RX_NON_TUN_PKT_H"},
+	{true,	"IGU_RX_TUN_PKT_L"},
+	{true,	"IGU_RX_TUN_PKT_H"},
+	{false,	"Reserved"},
+	{false,	"Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
+	{true, "tc_queue_num"},
+	{true, "FSM_DFX_ST0"},
+	{true, "FSM_DFX_ST1"},
+	{true, "RPU_RX_PKT_DROP_CNT"},
+	{true, "BUF_WAIT_TIMEOUT"},
+	{true, "BUF_WAIT_TIMEOUT_QID"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
+	{false, "Reserved"},
+	{true,	"FIFO_DFX_ST0"},
+	{true,	"FIFO_DFX_ST1"},
+	{true,	"FIFO_DFX_ST2"},
+	{true,	"FIFO_DFX_ST3"},
+	{true,	"FIFO_DFX_ST4"},
+
+	{true,	"FIFO_DFX_ST5"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
+	{false, "Reserved"},
+	{true,	"NCSI_EGU_TX_FIFO_STS"},
+	{true,	"NCSI_PAUSE_STATUS"},
+	{true,	"NCSI_RX_CTRL_DMAC_ERR_CNT"},
+	{true,	"NCSI_RX_CTRL_SMAC_ERR_CNT"},
+	{true,	"NCSI_RX_CTRL_CKS_ERR_CNT"},
+
+	{true,	"NCSI_RX_CTRL_PKT_CNT"},
+	{true,	"NCSI_RX_PT_DMAC_ERR_CNT"},
+	{true,	"NCSI_RX_PT_SMAC_ERR_CNT"},
+	{true,	"NCSI_RX_PT_PKT_CNT"},
+	{true,	"NCSI_RX_FCS_ERR_CNT"},
+	{true,	"NCSI_TX_CTRL_DMAC_ERR_CNT"},
+
+	{true,	"NCSI_TX_CTRL_SMAC_ERR_CNT"},
+	{true,	"NCSI_TX_CTRL_PKT_CNT"},
+	{true,	"NCSI_TX_PT_DMAC_ERR_CNT"},
+	{true,	"NCSI_TX_PT_SMAC_ERR_CNT"},
+	{true,	"NCSI_TX_PT_PKT_CNT"},
+	{true,	"NCSI_TX_PT_PKT_TRUNC_CNT"},
+
+	{true,	"NCSI_TX_PT_PKT_ERR_CNT"},
+	{true,	"NCSI_TX_CTRL_PKT_ERR_CNT"},
+	{true,	"NCSI_RX_CTRL_PKT_TRUNC_CNT"},
+	{true,	"NCSI_RX_CTRL_PKT_CFLIT_CNT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+
+	{true,	"NCSI_MAC_RX_OCTETS_OK"},
+	{true,	"NCSI_MAC_RX_OCTETS_BAD"},
+	{true,	"NCSI_MAC_RX_UC_PKTS"},
+	{true,	"NCSI_MAC_RX_MC_PKTS"},
+	{true,	"NCSI_MAC_RX_BC_PKTS"},
+	{true,	"NCSI_MAC_RX_PKTS_64OCTETS"},
+
+	{true,	"NCSI_MAC_RX_PKTS_65TO127OCTETS"},
+	{true,	"NCSI_MAC_RX_PKTS_128TO255OCTETS"},
+	{true,	"NCSI_MAC_RX_PKTS_255TO511OCTETS"},
+	{true,	"NCSI_MAC_RX_PKTS_512TO1023OCTETS"},
+	{true,	"NCSI_MAC_RX_PKTS_1024TO1518OCTETS"},
+	{true,	"NCSI_MAC_RX_PKTS_1519TOMAXOCTETS"},
+
+	{true,	"NCSI_MAC_RX_FCS_ERRORS"},
+	{true,	"NCSI_MAC_RX_LONG_ERRORS"},
+	{true,	"NCSI_MAC_RX_JABBER_ERRORS"},
+	{true,	"NCSI_MAC_RX_RUNT_ERR_CNT"},
+	{true,	"NCSI_MAC_RX_SHORT_ERR_CNT"},
+	{true,	"NCSI_MAC_RX_FILT_PKT_CNT"},
+
+	{true,	"NCSI_MAC_RX_OCTETS_TOTAL_FILT"},
+	{true,	"NCSI_MAC_TX_OCTETS_OK"},
+	{true,	"NCSI_MAC_TX_OCTETS_BAD"},
+	{true,	"NCSI_MAC_TX_UC_PKTS"},
+	{true,	"NCSI_MAC_TX_MC_PKTS"},
+	{true,	"NCSI_MAC_TX_BC_PKTS"},
+
+	{true,	"NCSI_MAC_TX_PKTS_64OCTETS"},
+	{true,	"NCSI_MAC_TX_PKTS_65TO127OCTETS"},
+	{true,	"NCSI_MAC_TX_PKTS_128TO255OCTETS"},
+	{true,	"NCSI_MAC_TX_PKTS_256TO511OCTETS"},
+	{true,	"NCSI_MAC_TX_PKTS_512TO1023OCTETS"},
+	{true,	"NCSI_MAC_TX_PKTS_1024TO1518OCTETS"},
+
+	{true,	"NCSI_MAC_TX_PKTS_1519TOMAXOCTETS"},
+	{true,	"NCSI_MAC_TX_UNDERRUN"},
+	{true,	"NCSI_MAC_TX_CRC_ERROR"},
+	{true,	"NCSI_MAC_TX_PAUSE_FRAMES"},
+	{true,	"NCSI_MAC_RX_PAD_PKTS"},
+	{true,	"NCSI_MAC_RX_PAUSE_FRAMES"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
+	{false, "Reserved"},
+	{true,	"LGE_IGU_AFIFO_DFX_0"},
+	{true,	"LGE_IGU_AFIFO_DFX_1"},
+	{true,	"LGE_IGU_AFIFO_DFX_2"},
+	{true,	"LGE_IGU_AFIFO_DFX_3"},
+	{true,	"LGE_IGU_AFIFO_DFX_4"},
+
+	{true,	"LGE_IGU_AFIFO_DFX_5"},
+	{true,	"LGE_IGU_AFIFO_DFX_6"},
+	{true,	"LGE_IGU_AFIFO_DFX_7"},
+	{true,	"LGE_EGU_AFIFO_DFX_0"},
+	{true,	"LGE_EGU_AFIFO_DFX_1"},
+	{true,	"LGE_EGU_AFIFO_DFX_2"},
+
+	{true,	"LGE_EGU_AFIFO_DFX_3"},
+	{true,	"LGE_EGU_AFIFO_DFX_4"},
+	{true,	"LGE_EGU_AFIFO_DFX_5"},
+	{true,	"LGE_EGU_AFIFO_DFX_6"},
+	{true,	"LGE_EGU_AFIFO_DFX_7"},
+	{true,	"CGE_IGU_AFIFO_DFX_0"},
+
+	{true,	"CGE_IGU_AFIFO_DFX_1"},
+	{true,	"CGE_EGU_AFIFO_DFX_0"},
+	{true,	"CGE_EGU_AFIFO_DFX_1"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
+	{false, "Reserved"},
+	{true,	"DROP_FROM_PRT_PKT_CNT"},
+	{true,	"DROP_FROM_HOST_PKT_CNT"},
+	{true,	"DROP_TX_VLAN_PROC_CNT"},
+	{true,	"DROP_MNG_CNT"},
+	{true,	"DROP_FD_CNT"},
+
+	{true,	"DROP_NO_DST_CNT"},
+	{true,	"DROP_MC_MBID_FULL_CNT"},
+	{true,	"DROP_SC_FILTERED"},
+	{true,	"PPP_MC_DROP_PKT_CNT"},
+	{true,	"DROP_PT_CNT"},
+	{true,	"DROP_MAC_ANTI_SPOOF_CNT"},
+
+	{true,	"DROP_IG_VFV_CNT"},
+	{true,	"DROP_IG_PRTV_CNT"},
+	{true,	"DROP_CNM_PFC_PAUSE_CNT"},
+	{true,	"DROP_TORUS_TC_CNT"},
+	{true,	"DROP_TORUS_LPBK_CNT"},
+	{true,	"PPP_HFS_STS"},
+
+	{true,	"PPP_MC_RSLT_STS"},
+	{true,	"PPP_P3U_STS"},
+	{true,	"PPP_RSLT_DESCR_STS"},
+	{true,	"PPP_UMV_STS_0"},
+	{true,	"PPP_UMV_STS_1"},
+	{true,	"PPP_VFV_STS"},
+
+	{true,	"PPP_GRO_KEY_CNT"},
+	{true,	"PPP_GRO_INFO_CNT"},
+	{true,	"PPP_GRO_DROP_CNT"},
+	{true,	"PPP_GRO_OUT_CNT"},
+	{true,	"PPP_GRO_KEY_MATCH_DATA_CNT"},
+	{true,	"PPP_GRO_KEY_MATCH_TCAM_CNT"},
+
+	{true,	"PPP_GRO_INFO_MATCH_CNT"},
+	{true,	"PPP_GRO_FREE_ENTRY_CNT"},
+	{true,	"PPP_GRO_INNER_DFX_SIGNAL"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+
+	{true,	"GET_RX_PKT_CNT_L"},
+	{true,	"GET_RX_PKT_CNT_H"},
+	{true,	"GET_TX_PKT_CNT_L"},
+	{true,	"GET_TX_PKT_CNT_H"},
+	{true,	"SEND_UC_PRT2HOST_PKT_CNT_L"},
+	{true,	"SEND_UC_PRT2HOST_PKT_CNT_H"},
+
+	{true,	"SEND_UC_PRT2PRT_PKT_CNT_L"},
+	{true,	"SEND_UC_PRT2PRT_PKT_CNT_H"},
+	{true,	"SEND_UC_HOST2HOST_PKT_CNT_L"},
+	{true,	"SEND_UC_HOST2HOST_PKT_CNT_H"},
+	{true,	"SEND_UC_HOST2PRT_PKT_CNT_L"},
+	{true,	"SEND_UC_HOST2PRT_PKT_CNT_H"},
+
+	{true,	"SEND_MC_FROM_PRT_CNT_L"},
+	{true,	"SEND_MC_FROM_PRT_CNT_H"},
+	{true,	"SEND_MC_FROM_HOST_CNT_L"},
+	{true,	"SEND_MC_FROM_HOST_CNT_H"},
+	{true,	"SSU_MC_RD_CNT_L"},
+	{true,	"SSU_MC_RD_CNT_H"},
+
+	{true,	"SSU_MC_DROP_CNT_L"},
+	{true,	"SSU_MC_DROP_CNT_H"},
+	{true,	"SSU_MC_RD_PKT_CNT_L"},
+	{true,	"SSU_MC_RD_PKT_CNT_H"},
+	{true,	"PPP_MC_2HOST_PKT_CNT_L"},
+	{true,	"PPP_MC_2HOST_PKT_CNT_H"},
+
+	{true,	"PPP_MC_2PRT_PKT_CNT_L"},
+	{true,	"PPP_MC_2PRT_PKT_CNT_H"},
+	{true,	"NTSNOS_PKT_CNT_L"},
+	{true,	"NTSNOS_PKT_CNT_H"},
+	{true,	"NTUP_PKT_CNT_L"},
+	{true,	"NTUP_PKT_CNT_H"},
+
+	{true,	"NTLCL_PKT_CNT_L"},
+	{true,	"NTLCL_PKT_CNT_H"},
+	{true,	"NTTGT_PKT_CNT_L"},
+	{true,	"NTTGT_PKT_CNT_H"},
+	{true,	"RTNS_PKT_CNT_L"},
+	{true,	"RTNS_PKT_CNT_H"},
+
+	{true,	"RTLPBK_PKT_CNT_L"},
+	{true,	"RTLPBK_PKT_CNT_H"},
+	{true,	"NR_PKT_CNT_L"},
+	{true,	"NR_PKT_CNT_H"},
+	{true,	"RR_PKT_CNT_L"},
+	{true,	"RR_PKT_CNT_H"},
+
+	{true,	"MNG_TBL_HIT_CNT_L"},
+	{true,	"MNG_TBL_HIT_CNT_H"},
+	{true,	"FD_TBL_HIT_CNT_L"},
+	{true,	"FD_TBL_HIT_CNT_H"},
+	{true,	"FD_LKUP_CNT_L"},
+	{true,	"FD_LKUP_CNT_H"},
+
+	{true,	"BC_HIT_CNT_L"},
+	{true,	"BC_HIT_CNT_H"},
+	{true,	"UM_TBL_UC_HIT_CNT_L"},
+	{true,	"UM_TBL_UC_HIT_CNT_H"},
+	{true,	"UM_TBL_MC_HIT_CNT_L"},
+	{true,	"UM_TBL_MC_HIT_CNT_H"},
+
+	{true,	"UM_TBL_VMDQ1_HIT_CNT_L"},
+	{true,	"UM_TBL_VMDQ1_HIT_CNT_H"},
+	{true,	"MTA_TBL_HIT_CNT_L"},
+	{true,	"MTA_TBL_HIT_CNT_H"},
+	{true,	"FWD_BONDING_HIT_CNT_L"},
+	{true,	"FWD_BONDING_HIT_CNT_H"},
+
+	{true,	"PROMIS_TBL_HIT_CNT_L"},
+	{true,	"PROMIS_TBL_HIT_CNT_H"},
+	{true,	"GET_TUNL_PKT_CNT_L"},
+	{true,	"GET_TUNL_PKT_CNT_H"},
+	{true,	"GET_BMC_PKT_CNT_L"},
+	{true,	"GET_BMC_PKT_CNT_H"},
+
+	{true,	"SEND_UC_PRT2BMC_PKT_CNT_L"},
+	{true,	"SEND_UC_PRT2BMC_PKT_CNT_H"},
+	{true,	"SEND_UC_HOST2BMC_PKT_CNT_L"},
+	{true,	"SEND_UC_HOST2BMC_PKT_CNT_H"},
+	{true,	"SEND_UC_BMC2HOST_PKT_CNT_L"},
+	{true,	"SEND_UC_BMC2HOST_PKT_CNT_H"},
+
+	{true,	"SEND_UC_BMC2PRT_PKT_CNT_L"},
+	{true,	"SEND_UC_BMC2PRT_PKT_CNT_H"},
+	{true,	"PPP_MC_2BMC_PKT_CNT_L"},
+	{true,	"PPP_MC_2BMC_PKT_CNT_H"},
+	{true,	"VLAN_MIRR_CNT_L"},
+	{true,	"VLAN_MIRR_CNT_H"},
+
+	{true,	"IG_MIRR_CNT_L"},
+	{true,	"IG_MIRR_CNT_H"},
+	{true,	"EG_MIRR_CNT_L"},
+	{true,	"EG_MIRR_CNT_H"},
+	{true,	"RX_DEFAULT_HOST_HIT_CNT_L"},
+	{true,	"RX_DEFAULT_HOST_HIT_CNT_H"},
+
+	{true,	"LAN_PAIR_CNT_L"},
+	{true,	"LAN_PAIR_CNT_H"},
+	{true,	"UM_TBL_MC_HIT_PKT_CNT_L"},
+	{true,	"UM_TBL_MC_HIT_PKT_CNT_H"},
+	{true,	"MTA_TBL_HIT_PKT_CNT_L"},
+	{true,	"MTA_TBL_HIT_PKT_CNT_H"},
+
+	{true,	"PROMIS_TBL_HIT_PKT_CNT_L"},
+	{true,	"PROMIS_TBL_HIT_PKT_CNT_H"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
+	{false, "Reserved"},
+	{true,	"FSM_DFX_ST0"},
+	{true,	"FSM_DFX_ST1"},
+	{true,	"FSM_DFX_ST2"},
+	{true,	"FIFO_DFX_ST0"},
+	{true,	"FIFO_DFX_ST1"},
+
+	{true,	"FIFO_DFX_ST2"},
+	{true,	"FIFO_DFX_ST3"},
+	{true,	"FIFO_DFX_ST4"},
+	{true,	"FIFO_DFX_ST5"},
+	{true,	"FIFO_DFX_ST6"},
+	{true,	"FIFO_DFX_ST7"},
+
+	{true,	"FIFO_DFX_ST8"},
+	{true,	"FIFO_DFX_ST9"},
+	{true,	"FIFO_DFX_ST10"},
+	{true,	"FIFO_DFX_ST11"},
+	{true,	"Q_CREDIT_VLD_0"},
+	{true,	"Q_CREDIT_VLD_1"},
+
+	{true,	"Q_CREDIT_VLD_2"},
+	{true,	"Q_CREDIT_VLD_3"},
+	{true,	"Q_CREDIT_VLD_4"},
+	{true,	"Q_CREDIT_VLD_5"},
+	{true,	"Q_CREDIT_VLD_6"},
+	{true,	"Q_CREDIT_VLD_7"},
+
+	{true,	"Q_CREDIT_VLD_8"},
+	{true,	"Q_CREDIT_VLD_9"},
+	{true,	"Q_CREDIT_VLD_10"},
+	{true,	"Q_CREDIT_VLD_11"},
+	{true,	"Q_CREDIT_VLD_12"},
+	{true,	"Q_CREDIT_VLD_13"},
+
+	{true,	"Q_CREDIT_VLD_14"},
+	{true,	"Q_CREDIT_VLD_15"},
+	{true,	"Q_CREDIT_VLD_16"},
+	{true,	"Q_CREDIT_VLD_17"},
+	{true,	"Q_CREDIT_VLD_18"},
+	{true,	"Q_CREDIT_VLD_19"},
+
+	{true,	"Q_CREDIT_VLD_20"},
+	{true,	"Q_CREDIT_VLD_21"},
+	{true,	"Q_CREDIT_VLD_22"},
+	{true,	"Q_CREDIT_VLD_23"},
+	{true,	"Q_CREDIT_VLD_24"},
+	{true,	"Q_CREDIT_VLD_25"},
+
+	{true,	"Q_CREDIT_VLD_26"},
+	{true,	"Q_CREDIT_VLD_27"},
+	{true,	"Q_CREDIT_VLD_28"},
+	{true,	"Q_CREDIT_VLD_29"},
+	{true,	"Q_CREDIT_VLD_30"},
+	{true,	"Q_CREDIT_VLD_31"},
+
+	{true,	"GRO_BD_SERR_CNT"},
+	{true,	"GRO_CONTEXT_SERR_CNT"},
+	{true,	"RX_STASH_CFG_SERR_CNT"},
+	{true,	"AXI_RD_FBD_SERR_CNT"},
+	{true,	"GRO_BD_MERR_CNT"},
+	{true,	"GRO_CONTEXT_MERR_CNT"},
+
+	{true,	"RX_STASH_CFG_MERR_CNT"},
+	{true,	"AXI_RD_FBD_MERR_CNT"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+	{false, "Reserved"},
+};
+
+static struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
+	{true, "q_num"},
+	{true, "RCB_CFG_RX_RING_TAIL"},
+	{true, "RCB_CFG_RX_RING_HEAD"},
+	{true, "RCB_CFG_RX_RING_FBDNUM"},
+	{true, "RCB_CFG_RX_RING_OFFSET"},
+	{true, "RCB_CFG_RX_RING_FBDOFFSET"},
+
+	{true, "RCB_CFG_RX_RING_PKTNUM_RECORD"},
+	{true, "RCB_CFG_TX_RING_TAIL"},
+	{true, "RCB_CFG_TX_RING_HEAD"},
+	{true, "RCB_CFG_TX_RING_FBDNUM"},
+	{true, "RCB_CFG_TX_RING_OFFSET"},
+	{true, "RCB_CFG_TX_RING_EBDNUM"},
+};
+
+#endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
new file mode 100644
index 0000000..87dece0
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c

@@ -0,0 +1,2011 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2016-2017 Hisilicon Limited. */
+
+#include "hclge_err.h"
+
+static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
+	{ .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
+	{ .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
+	{ .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
+	{ .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_igu_int[] = {
+	{ .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
+	{ .int_msk = BIT(0), .msg = "rx_buf_overflow",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "tx_buf_overflow",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "tx_buf_underrun",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ncsi_err_int[] = {
+	{ .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
+	{ .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
+	{ .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
+	{ .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_tm_sch_rint[] = {
+	{ .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
+	{ .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
+	{ .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
+	{ .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
+	{ .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(26), .msg = "rd_bus_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(27), .msg = "wr_bus_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(28), .msg = "reg_search_miss",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(29), .msg = "rx_q_search_miss",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
+	{ .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
+	{ .int_msk = BIT(0), .msg = "over_8bd_no_fe",
+	  .reset_level = HNAE3_FUNC_RESET },
+	{ .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
+	  .reset_level = HNAE3_FUNC_RESET },
+	{ .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
+	  .reset_level = HNAE3_FUNC_RESET },
+	{ .int_msk = BIT(5), .msg = "buf_wait_timeout",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
+	{ .int_msk = BIT(0), .msg = "buf_sum_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(1), .msg = "ppp_mb_num_err",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(2), .msg = "ppp_mbid_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "cks_edit_position_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "cks_edit_condition_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "vlan_num_ot_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "vlan_num_in_err",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+#define HCLGE_SSU_MEM_ECC_ERR(x) \
+	{ .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
+	  .reset_level = HNAE3_GLOBAL_RESET }
+
+static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
+	HCLGE_SSU_MEM_ECC_ERR(0),
+	HCLGE_SSU_MEM_ECC_ERR(1),
+	HCLGE_SSU_MEM_ECC_ERR(2),
+	HCLGE_SSU_MEM_ECC_ERR(3),
+	HCLGE_SSU_MEM_ECC_ERR(4),
+	HCLGE_SSU_MEM_ECC_ERR(5),
+	HCLGE_SSU_MEM_ECC_ERR(6),
+	HCLGE_SSU_MEM_ECC_ERR(7),
+	HCLGE_SSU_MEM_ECC_ERR(8),
+	HCLGE_SSU_MEM_ECC_ERR(9),
+	HCLGE_SSU_MEM_ECC_ERR(10),
+	HCLGE_SSU_MEM_ECC_ERR(11),
+	HCLGE_SSU_MEM_ECC_ERR(12),
+	HCLGE_SSU_MEM_ECC_ERR(13),
+	HCLGE_SSU_MEM_ECC_ERR(14),
+	HCLGE_SSU_MEM_ECC_ERR(15),
+	HCLGE_SSU_MEM_ECC_ERR(16),
+	HCLGE_SSU_MEM_ECC_ERR(17),
+	HCLGE_SSU_MEM_ECC_ERR(18),
+	HCLGE_SSU_MEM_ECC_ERR(19),
+	HCLGE_SSU_MEM_ECC_ERR(20),
+	HCLGE_SSU_MEM_ECC_ERR(21),
+	HCLGE_SSU_MEM_ECC_ERR(22),
+	HCLGE_SSU_MEM_ECC_ERR(23),
+	HCLGE_SSU_MEM_ECC_ERR(24),
+	HCLGE_SSU_MEM_ECC_ERR(25),
+	HCLGE_SSU_MEM_ECC_ERR(26),
+	HCLGE_SSU_MEM_ECC_ERR(27),
+	HCLGE_SSU_MEM_ECC_ERR(28),
+	HCLGE_SSU_MEM_ECC_ERR(29),
+	HCLGE_SSU_MEM_ECC_ERR(30),
+	HCLGE_SSU_MEM_ECC_ERR(31),
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
+	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
+	{ .int_msk = BIT(0), .msg = "ig_mac_inf_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "ig_host_inf_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "ig_roc_buf_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
+	{ .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
+	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ .int_msk = BIT(9), .msg = "low_water_line_err_port",
+	  .reset_level = HNAE3_NONE_RESET },
+	{ .int_msk = BIT(10), .msg = "hi_water_line_err_port",
+	  .reset_level = HNAE3_GLOBAL_RESET },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
+	{ .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" },
+	{ .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" },
+	{ .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" },
+	{ .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" },
+	{ .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" },
+	{ .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" },
+	{ .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" },
+	{ .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" },
+	{ .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" },
+	{ .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" },
+	{ .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" },
+	{ .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" },
+	{ .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" },
+	{ .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" },
+	{ .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" },
+	{ .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" },
+	{ .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" },
+	{ .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" },
+	{ .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" },
+	{ .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" },
+	{ /* sentinel */ }
+};
+
+static void hclge_log_error(struct device *dev, char *reg,
+			    const struct hclge_hw_error *err,
+			    u32 err_sts, unsigned long *reset_requests)
+{
+	while (err->msg) {
+		if (err->int_msk & err_sts) {
+			dev_err(dev, "%s %s found [error status=0x%x]\n",
+				reg, err->msg, err_sts);
+			if (err->reset_level &&
+			    err->reset_level != HNAE3_NONE_RESET)
+				set_bit(err->reset_level, reset_requests);
+		}
+		err++;
+	}
+}
+
+/* hclge_cmd_query_error: read the error information
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @cmd:  command opcode
+ * @flag: flag for extended command structure
+ *
+ * This function query the error info from hw register/s using command
+ */
+static int hclge_cmd_query_error(struct hclge_dev *hdev,
+				 struct hclge_desc *desc, u32 cmd, u16 flag)
+{
+	struct device *dev = &hdev->pdev->dev;
+	int desc_num = 1;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
+	if (flag) {
+		desc[0].flag |= cpu_to_le16(flag);
+		hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
+		desc_num = 2;
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
+	if (ret)
+		dev_err(dev, "query error cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false);
+	desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	/* configure common error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
+
+	if (en) {
+		desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
+		desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
+					HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
+		desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
+		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
+					      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
+		desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
+	}
+
+	desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
+	desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
+				HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
+	desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
+	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
+				      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
+	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure common err interrupts\n", ret);
+
+	return ret;
+}
+
+static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	int ret;
+
+	if (hdev->pdev->revision < 0x21)
+		return 0;
+
+	/* configure NCSI error interrupts */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure  NCSI error interrupts\n", ret);
+
+	return ret;
+}
+
+static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	int ret;
+
+	/* configure IGU,EGU error interrupts */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
+
+	desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(dev,
+			"fail(%d) to configure IGU common interrupts\n", ret);
+		return ret;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN);
+
+	desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(dev,
+			"fail(%d) to configure IGU-EGU TNL interrupts\n", ret);
+		return ret;
+	}
+
+	ret = hclge_config_ncsi_hw_err_int(hdev, en);
+
+	return ret;
+}
+
+static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
+					    bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	/* configure PPP error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
+
+	if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
+		if (en) {
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN);
+			desc[0].data[1] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN);
+			desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN);
+		}
+
+		desc[1].data[0] =
+			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
+		desc[1].data[1] =
+			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
+		if (hdev->pdev->revision >= 0x21)
+			desc[1].data[2] =
+				cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
+	} else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
+		if (en) {
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN);
+			desc[0].data[1] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN);
+		}
+
+		desc[1].data[0] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK);
+		desc[1].data[1] =
+				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+	if (ret)
+		dev_err(dev, "fail(%d) to configure PPP error intr\n", ret);
+
+	return ret;
+}
+
+static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	int ret;
+
+	ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
+					       en);
+	if (ret)
+		return ret;
+
+	ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
+					       en);
+
+	return ret;
+}
+
+static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	int ret;
+
+	/* configure TM SCH hw errors */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret);
+		return ret;
+	}
+
+	/* configure TM QCN hw errors */
+	ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0);
+	if (ret) {
+		dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret);
+		return ret;
+	}
+
+	hclge_cmd_reuse_desc(&desc, false);
+	if (en)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure TM QCN mem errors\n", ret);
+
+	return ret;
+}
+
+static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	int ret;
+
+	/* configure MAC common error interrupts */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN);
+
+	desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure MAC COMMON error intr\n", ret);
+
+	return ret;
+}
+
+int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN);
+	else
+		desc.data[0] = 0;
+
+	desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
+					     bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int desc_num = 1;
+	int ret;
+
+	/* configure PPU error interrupts */
+	if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
+		if (en) {
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN);
+			desc[0].data[1] =
+				cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN);
+			desc[1].data[3] =
+				cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN);
+			desc[1].data[4] =
+				cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN);
+		}
+
+		desc[1].data[0] =
+			cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK);
+		desc[1].data[1] =
+			cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK);
+		desc[1].data[2] =
+			cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK);
+		desc[1].data[3] |=
+			cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK);
+		desc_num = 2;
+	} else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		if (en)
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2);
+
+		desc[0].data[2] =
+			cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK);
+	} else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		if (en)
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN);
+
+		desc[0].data[2] =
+			cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK);
+	} else {
+		dev_err(dev, "Invalid cmd to configure PPU error interrupts\n");
+		return -EINVAL;
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
+
+	return ret;
+}
+
+static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	int ret;
+
+	ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD,
+						en);
+	if (ret) {
+		dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_config_ppu_error_interrupts(hdev,
+						HCLGE_PPU_MPF_OTHER_INT_CMD,
+						en);
+	if (ret) {
+		dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret);
+		return ret;
+	}
+
+	ret = hclge_config_ppu_error_interrupts(hdev,
+						HCLGE_PPU_PF_OTHER_INT_CMD, en);
+	if (ret)
+		dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n",
+			ret);
+	return ret;
+}
+
+static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	/* configure SSU ecc error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false);
+	if (en) {
+		desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN);
+		desc[0].data[1] =
+			cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN);
+		desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN);
+	}
+
+	desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK);
+	desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK);
+	desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+	if (ret) {
+		dev_err(dev,
+			"fail(%d) to configure SSU ECC error interrupt\n", ret);
+		return ret;
+	}
+
+	/* configure SSU common error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
+
+	if (en) {
+		if (hdev->pdev->revision >= 0x21)
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
+		else
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5));
+		desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN);
+		desc[0].data[2] =
+			cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN);
+	}
+
+	desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK |
+				HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK);
+	desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure SSU COMMON error intr\n", ret);
+
+	return ret;
+}
+
+/* hclge_query_bd_num: query number of buffer descriptors
+ * @hdev: pointer to struct hclge_dev
+ * @is_ras: true for ras, false for msix
+ * @mpf_bd_num: number of main PF interrupt buffer descriptors
+ * @pf_bd_num: number of not main PF interrupt buffer descriptors
+ *
+ * This function querys number of mpf and pf buffer descriptors.
+ */
+static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
+			      int *mpf_bd_num, int *pf_bd_num)
+{
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_min_bd_num, pf_min_bd_num;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc_bd;
+	int ret;
+
+	if (is_ras) {
+		opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM;
+		mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM;
+		pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM;
+	} else {
+		opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM;
+		mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM;
+		pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc_bd, opcode, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	if (ret) {
+		dev_err(dev, "fail(%d) to query msix int status bd num\n",
+			ret);
+		return ret;
+	}
+
+	*mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+	*pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+	if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
+		dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n",
+			*mpf_bd_num, *pf_bd_num);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* hclge_handle_mpf_ras_error: handle all main PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the main PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
+				      struct hclge_desc *desc,
+				      int num)
+{
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+
+	/* query all main PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret) {
+		dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log HNS common errors */
+	status = le32_to_cpu(desc[0].data[0]);
+	if (status)
+		hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+				&hclge_imp_tcm_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(desc[0].data[1]);
+	if (status)
+		hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+				&hclge_cmdq_nic_mem_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
+		dev_warn(dev, "imp_rd_data_poison_err found\n");
+
+	status = le32_to_cpu(desc[0].data[3]);
+	if (status)
+		hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+				&hclge_tqp_int_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(desc[0].data[4]);
+	if (status)
+		hclge_log_error(dev, "MSIX_ECC_INT_STS",
+				&hclge_msix_sram_ecc_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log SSU(Storage Switch Unit) errors */
+	desc_data = (__le32 *)&desc[2];
+	status = le32_to_cpu(*(desc_data + 2));
+	if (status)
+		hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
+				&hclge_ssu_mem_ecc_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
+	if (status) {
+		dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
+			status);
+		set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
+	}
+
+	status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+				&hclge_ssu_com_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log IGU(Ingress Unit) errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_INT_STS",
+				&hclge_igu_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log PPP(Programmable Packet Process) errors */
+	desc_data = (__le32 *)&desc[4];
+	status = le32_to_cpu(*(desc_data + 1));
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+				&hclge_ppp_mpf_abnormal_int_st1[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppp_mpf_abnormal_int_st3[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log PPU(RCB) errors */
+	desc_data = (__le32 *)&desc[5];
+	status = le32_to_cpu(*(desc_data + 1));
+	if (status) {
+		dev_err(dev,
+			"PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n");
+		set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
+	}
+
+	status = le32_to_cpu(*(desc_data + 2));
+	if (status)
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+				&hclge_ppu_mpf_abnormal_int_st2[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
+	if (status)
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppu_mpf_abnormal_int_st3[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log TM(Traffic Manager) errors */
+	desc_data = (__le32 *)&desc[6];
+	status = le32_to_cpu(*desc_data);
+	if (status)
+		hclge_log_error(dev, "TM_SCH_RINT",
+				&hclge_tm_sch_rint[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log QCN(Quantized Congestion Control) errors */
+	desc_data = (__le32 *)&desc[7];
+	status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "QCN_FIFO_RINT",
+				&hclge_qcn_fifo_rint[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "QCN_ECC_RINT",
+				&hclge_qcn_ecc_rint[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log NCSI errors */
+	desc_data = (__le32 *)&desc[9];
+	status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+				&hclge_ncsi_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* clear all main PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+/* hclge_handle_pf_ras_error: handle all PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
+				     struct hclge_desc *desc,
+				     int num)
+{
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+
+	/* query all PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret) {
+		dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log SSU(Storage Switch Unit) errors */
+	status = le32_to_cpu(desc[0].data[0]);
+	if (status)
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_err_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(desc[0].data[1]);
+	if (status)
+		hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+				&hclge_ssu_fifo_overflow_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	status = le32_to_cpu(desc[0].data[2]);
+	if (status)
+		hclge_log_error(dev, "SSU_ETS_TCG_INT",
+				&hclge_ssu_ets_tcg_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
+	desc_data = (__le32 *)&desc[1];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+				&hclge_igu_egu_tnl_int[0], status,
+				&ae_dev->hw_err_reset_req);
+
+	/* log PPU(RCB) errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
+	if (status) {
+		hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
+				&hclge_ppu_pf_abnormal_int[0], status,
+				&ae_dev->hw_err_reset_req);
+		hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR);
+	}
+
+	/* clear all PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
+{
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc *desc;
+	int ret;
+
+	/* query the number of registers in the RAS int status */
+	ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num);
+	if (ret)
+		return ret;
+
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	/* handle all main PF RAS errors */
+	ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
+	if (ret) {
+		kfree(desc);
+		return ret;
+	}
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+
+	/* handle all PF RAS errors */
+	ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
+	kfree(desc);
+
+	return ret;
+}
+
+static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[3];
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
+		return ret;
+	}
+
+	dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
+		le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+		le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+		le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+	dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
+		le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
+		le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
+		le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
+	dev_err(dev, "AXI3: %08X %08X %08X %08X\n",
+		le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
+		le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
+
+	return 0;
+}
+
+static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	ret = hclge_cmd_query_error(hdev, &desc[0],
+				    HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
+				    HCLGE_CMD_FLAG_NEXT);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
+		return ret;
+	}
+
+	dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
+		le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+		le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+		le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+	dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
+		le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
+
+	return 0;
+}
+
+static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	/* read overflow error status */
+	ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD,
+				    0);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
+		return ret;
+	}
+
+	/* log overflow error */
+	if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		const struct hclge_hw_error *err;
+		u32 err_sts;
+
+		err = &hclge_rocee_qmm_ovf_err_int[0];
+		err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK &
+			  le32_to_cpu(desc[0].data[0]);
+		while (err->msg) {
+			if (err->int_msk == err_sts) {
+				dev_err(dev, "%s [error status=0x%x] found\n",
+					err->msg,
+					le32_to_cpu(desc[0].data[0]));
+				break;
+			}
+			err++;
+		}
+	}
+
+	if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n",
+			le32_to_cpu(desc[0].data[1]));
+	}
+
+	if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n",
+			le32_to_cpu(desc[0].data[2]));
+	}
+
+	return 0;
+}
+
+static enum hnae3_reset_type
+hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
+{
+	enum hnae3_reset_type reset_type = HNAE3_NONE_RESET;
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	unsigned int status;
+	int ret;
+
+	/* read RAS error interrupt status */
+	ret = hclge_cmd_query_error(hdev, &desc[0],
+				    HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret);
+		/* reset everything for now */
+		return HNAE3_GLOBAL_RESET;
+	}
+
+	status = le32_to_cpu(desc[0].data[0]);
+
+	if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
+		if (status & HCLGE_ROCEE_RERR_INT_MASK)
+			dev_err(dev, "ROCEE RAS AXI rresp error\n");
+
+		if (status & HCLGE_ROCEE_BERR_INT_MASK)
+			dev_err(dev, "ROCEE RAS AXI bresp error\n");
+
+		reset_type = HNAE3_FUNC_RESET;
+
+		ret = hclge_log_rocee_axi_error(hdev);
+		if (ret)
+			return HNAE3_GLOBAL_RESET;
+	}
+
+	if (status & HCLGE_ROCEE_ECC_INT_MASK) {
+		dev_err(dev, "ROCEE RAS 2bit ECC error\n");
+		reset_type = HNAE3_GLOBAL_RESET;
+
+		ret = hclge_log_rocee_ecc_error(hdev);
+		if (ret)
+			return HNAE3_GLOBAL_RESET;
+	}
+
+	if (status & HCLGE_ROCEE_OVF_INT_MASK) {
+		ret = hclge_log_rocee_ovf_error(hdev);
+		if (ret) {
+			dev_err(dev, "failed(%d) to process ovf error\n", ret);
+			/* reset everything for now */
+			return HNAE3_GLOBAL_RESET;
+		}
+	}
+
+	/* clear error status */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
+	if (ret) {
+		dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret);
+		/* reset everything for now */
+		return HNAE3_GLOBAL_RESET;
+	}
+
+	return reset_type;
+}
+
+int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	int ret;
+
+	if (hdev->pdev->revision < 0x21 || !hnae3_dev_roce_supported(hdev))
+		return 0;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
+	if (en) {
+		/* enable ROCEE hw error interrupts */
+		desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN);
+		desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN);
+
+		hclge_log_and_clear_rocee_ras_error(hdev);
+	}
+	desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK);
+	desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret);
+
+	return ret;
+}
+
+static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	enum hnae3_reset_type reset_type;
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    hdev->pdev->revision < 0x21)
+		return;
+
+	reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
+	if (reset_type != HNAE3_NONE_RESET)
+		set_bit(reset_type, &ae_dev->hw_err_reset_req);
+}
+
+static const struct hclge_hw_blk hw_blk[] = {
+	{
+	  .msk = BIT(0), .name = "IGU_EGU",
+	  .config_err_int = hclge_config_igu_egu_hw_err_int,
+	},
+	{
+	  .msk = BIT(1), .name = "PPP",
+	  .config_err_int = hclge_config_ppp_hw_err_int,
+	},
+	{
+	  .msk = BIT(2), .name = "SSU",
+	  .config_err_int = hclge_config_ssu_hw_err_int,
+	},
+	{
+	  .msk = BIT(3), .name = "PPU",
+	  .config_err_int = hclge_config_ppu_hw_err_int,
+	},
+	{
+	  .msk = BIT(4), .name = "TM",
+	  .config_err_int = hclge_config_tm_hw_err_int,
+	},
+	{
+	  .msk = BIT(5), .name = "COMMON",
+	  .config_err_int = hclge_config_common_hw_err_int,
+	},
+	{
+	  .msk = BIT(8), .name = "MAC",
+	  .config_err_int = hclge_config_mac_err_int,
+	},
+	{ /* sentinel */ }
+};
+
+int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
+{
+	const struct hclge_hw_blk *module = hw_blk;
+	int ret = 0;
+
+	while (module->name) {
+		if (module->config_err_int) {
+			ret = module->config_err_int(hdev, state);
+			if (ret)
+				return ret;
+		}
+		module++;
+	}
+
+	return ret;
+}
+
+pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct device *dev = &hdev->pdev->dev;
+	u32 status;
+
+	if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
+		dev_err(dev,
+			"Can't recover - RAS error reported during dev init\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+
+	if (status & HCLGE_RAS_REG_NFE_MASK ||
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
+		ae_dev->hw_err_reset_req = 0;
+	else
+		goto out;
+
+	/* Handling Non-fatal HNS RAS errors */
+	if (status & HCLGE_RAS_REG_NFE_MASK) {
+		dev_err(dev,
+			"HNS Non-Fatal RAS error(status=0x%x) identified\n",
+			status);
+		hclge_handle_all_ras_errors(hdev);
+	}
+
+	/* Handling Non-fatal Rocee RAS errors */
+	if (hdev->pdev->revision >= 0x21 &&
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
+		dev_err(dev, "ROCEE Non-Fatal RAS error identified\n");
+		hclge_handle_rocee_ras_error(ae_dev);
+	}
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		goto out;
+
+	if (ae_dev->hw_err_reset_req)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+out:
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static int hclge_clear_hw_msix_error(struct hclge_dev *hdev,
+				     struct hclge_desc *desc, bool is_mpf,
+				     u32 bd_num)
+{
+	if (is_mpf)
+		desc[0].opcode =
+			cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT);
+	else
+		desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT);
+
+	desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+
+	return hclge_cmd_send(&hdev->hw, &desc[0], bd_num);
+}
+
+/* hclge_query_8bd_info: query information about over_8bd_nfe_err
+ * @hdev: pointer to struct hclge_dev
+ * @vf_id: Index of the virtual function with error
+ * @q_id: Physical index of the queue with error
+ *
+ * This function get specific index of queue and function which causes
+ * over_8bd_nfe_err by using command. If vf_id is 0, it means error is
+ * caused by PF instead of VF.
+ */
+static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id,
+					 u16 *q_id)
+{
+	struct hclge_query_ppu_pf_other_int_dfx_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		return ret;
+
+	req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data;
+	*vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id);
+	*q_id = le16_to_cpu(req->over_8bd_no_fe_qid);
+
+	return 0;
+}
+
+/* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err
+ * @hdev: pointer to struct hclge_dev
+ * @reset_requests: reset level that we need to trigger later
+ *
+ * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in
+ * that case, we need to trigger VF reset. Otherwise, a PF reset is needed.
+ */
+static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
+				      unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	u16 vf_id;
+	u16 q_id;
+	int ret;
+
+	ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id);
+	if (ret) {
+		dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n",
+			ret);
+		return;
+	}
+
+	dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n",
+		vf_id, q_id);
+
+	if (vf_id) {
+		if (vf_id >= hdev->num_alloc_vport) {
+			dev_err(dev, "invalid vf id(%d)\n", vf_id);
+			return;
+		}
+
+		/* If we need to trigger other reset whose level is higher
+		 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset
+		 * here.
+		 */
+		if (*reset_requests != 0)
+			return;
+
+		ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
+		if (ret)
+			dev_err(dev, "inform reset to vf(%u) failed %d!\n",
+				hdev->vport->vport_id, ret);
+	} else {
+		set_bit(HNAE3_FUNC_RESET, reset_requests);
+	}
+}
+
+/* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @mpf_bd_num: number of extended command structures
+ * @reset_requests: record of the reset level that we need
+ *
+ * This function handles all the main PF MSI-X errors in the hw register/s
+ * using command.
+ */
+static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev,
+				       struct hclge_desc *desc,
+				       int mpf_bd_num,
+				       unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+	/* query all main PF MSIx errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
+	if (ret) {
+		dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log MAC errors */
+	desc_data = (__le32 *)&desc[1];
+	status = le32_to_cpu(*desc_data);
+	if (status)
+		hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+				&hclge_mac_afifo_tnl_int[0], status,
+				reset_requests);
+
+	/* log PPU(RCB) MPF errors */
+	desc_data = (__le32 *)&desc[5];
+	status = le32_to_cpu(*(desc_data + 2)) &
+			HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
+	if (status)
+		dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]",
+			status);
+
+	/* clear all main PF MSIx errors */
+	ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
+	if (ret)
+		dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+/* hclge_handle_pf_msix_error: handle all PF MSI-X errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @mpf_bd_num: number of extended command structures
+ * @reset_requests: record of the reset level that we need
+ *
+ * This function handles all the PF MSI-X errors in the hw register/s using
+ * command.
+ */
+static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
+				      struct hclge_desc *desc,
+				      int pf_bd_num,
+				      unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+
+	/* query all PF MSIx errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
+	if (ret) {
+		dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log SSU PF errors */
+	status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
+	if (status)
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_pf_int[0],
+				status, reset_requests);
+
+	/* read and log PPP PF errors */
+	desc_data = (__le32 *)&desc[2];
+	status = le32_to_cpu(*desc_data);
+	if (status)
+		hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+				&hclge_ppp_pf_abnormal_int[0],
+				status, reset_requests);
+
+	/* log PPU(RCB) PF errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
+	if (status)
+		hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+				&hclge_ppu_pf_abnormal_int[0],
+				status, reset_requests);
+
+	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
+	if (status)
+		hclge_handle_over_8bd_err(hdev, reset_requests);
+
+	/* clear all PF MSIx errors */
+	ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
+	if (ret)
+		dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
+					  unsigned long *reset_requests)
+{
+	struct hclge_mac_tnl_stats mac_tnl_stats;
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc *desc;
+	u32 status;
+	int ret;
+
+	/* query the number of bds for the MSIx int status */
+	ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
+	if (ret)
+		goto out;
+
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
+					  reset_requests);
+	if (ret)
+		goto msi_error;
+
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+	ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests);
+	if (ret)
+		goto msi_error;
+
+	/* query and clear mac tnl interruptions */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
+	if (ret) {
+		dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
+		goto msi_error;
+	}
+
+	status = le32_to_cpu(desc->data[0]);
+	if (status) {
+		/* When mac tnl interrupt occurs, we record current time and
+		 * register status here in a fifo, then clear the status. So
+		 * that if link status changes suddenly at some time, we can
+		 * query them by debugfs.
+		 */
+		mac_tnl_stats.time = local_clock();
+		mac_tnl_stats.status = status;
+		kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
+		ret = hclge_clear_mac_tnl_int(hdev);
+		if (ret)
+			dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
+	}
+
+msi_error:
+	kfree(desc);
+out:
+	return ret;
+}
+
+int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
+			       unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+
+	if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
+		dev_err(dev,
+			"Can't handle - MSIx error reported during dev init\n");
+		return 0;
+	}
+
+	return hclge_handle_all_hw_msix_error(hdev, reset_requests);
+}
+
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+{
+#define HCLGE_DESC_NO_DATA_LEN 8
+
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc *desc;
+	u32 status;
+	int ret;
+
+	ae_dev->hw_err_reset_req = 0;
+	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+
+	/* query the number of bds for the MSIx int status */
+	ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
+	if (ret)
+		return;
+
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		return;
+
+	/* Clear HNS hw errors reported through msix  */
+	memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) -
+	       HCLGE_DESC_NO_DATA_LEN);
+	ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
+	if (ret) {
+		dev_err(dev, "fail(%d) to clear mpf msix int during init\n",
+			ret);
+		goto msi_error;
+	}
+
+	memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) -
+	       HCLGE_DESC_NO_DATA_LEN);
+	ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
+	if (ret) {
+		dev_err(dev, "fail(%d) to clear pf msix int during init\n",
+			ret);
+		goto msi_error;
+	}
+
+	/* Handle Non-fatal HNS RAS errors */
+	if (status & HCLGE_RAS_REG_NFE_MASK) {
+		dev_err(dev, "HNS hw error(RAS) identified during init\n");
+		hclge_handle_all_ras_errors(hdev);
+	}
+
+msi_error:
+	kfree(desc);
+}

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
new file mode 100644
index 0000000..876fd81
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h

@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*  Copyright (c) 2016-2017 Hisilicon Limited. */
+
+#ifndef __HCLGE_ERR_H
+#define __HCLGE_ERR_H
+
+#include "hclge_main.h"
+#include "hnae3.h"
+
+#define HCLGE_MPF_RAS_INT_MIN_BD_NUM	10
+#define HCLGE_PF_RAS_INT_MIN_BD_NUM	4
+#define HCLGE_MPF_MSIX_INT_MIN_BD_NUM	10
+#define HCLGE_PF_MSIX_INT_MIN_BD_NUM	4
+
+#define HCLGE_RAS_PF_OTHER_INT_STS_REG   0x20B00
+#define HCLGE_RAS_REG_NFE_MASK   0xFF00
+#define HCLGE_RAS_REG_ROCEE_ERR_MASK   0x3000000
+
+#define HCLGE_VECTOR0_PF_OTHER_INT_STS_REG   0x20800
+#define HCLGE_VECTOR0_REG_MSIX_MASK   0x1FF00
+
+#define HCLGE_IMP_TCM_ECC_ERR_INT_EN	0xFFFF0000
+#define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK	0xFFFF0000
+#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN	0x300
+#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK	0x300
+#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN	0xFFFF
+#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK	0xFFFF
+#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN	0xFFFF0000
+#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK	0xFFFF0000
+#define HCLGE_IMP_RD_POISON_ERR_INT_EN	0x0100
+#define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK	0x0100
+#define HCLGE_TQP_ECC_ERR_INT_EN	0x0FFF
+#define HCLGE_TQP_ECC_ERR_INT_EN_MASK	0x0FFF
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK	0x0F000000
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN	0x0F000000
+#define HCLGE_IGU_ERR_INT_EN	0x0000066F
+#define HCLGE_IGU_ERR_INT_EN_MASK	0x000F
+#define HCLGE_IGU_TNL_ERR_INT_EN    0x0002AABF
+#define HCLGE_IGU_TNL_ERR_INT_EN_MASK  0x003F
+#define HCLGE_PPP_MPF_ECC_ERR_INT0_EN	0xFFFFFFFF
+#define HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK	0xFFFFFFFF
+#define HCLGE_PPP_MPF_ECC_ERR_INT1_EN	0xFFFFFFFF
+#define HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK	0xFFFFFFFF
+#define HCLGE_PPP_PF_ERR_INT_EN	0x0003
+#define HCLGE_PPP_PF_ERR_INT_EN_MASK	0x0003
+#define HCLGE_PPP_MPF_ECC_ERR_INT2_EN	0x003F
+#define HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK	0x003F
+#define HCLGE_PPP_MPF_ECC_ERR_INT3_EN	0x003F
+#define HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK	0x003F
+#define HCLGE_TM_SCH_ECC_ERR_INT_EN	0x3
+#define HCLGE_TM_QCN_MEM_ERR_INT_EN	0xFFFFFF
+#define HCLGE_NCSI_ERR_INT_EN	0x3
+#define HCLGE_NCSI_ERR_INT_TYPE	0x9
+#define HCLGE_MAC_COMMON_ERR_INT_EN		0x107FF
+#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK	0x107FF
+#define HCLGE_MAC_TNL_INT_EN			GENMASK(9, 0)
+#define HCLGE_MAC_TNL_INT_EN_MASK		GENMASK(9, 0)
+#define HCLGE_MAC_TNL_INT_CLR			GENMASK(9, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN		GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK	GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN		GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK	GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN		0x3FFF3FFF
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK	0x3FFF3FFF
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2		0xB
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK	0xB
+#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN		GENMASK(7, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK	GENMASK(23, 16)
+#define HCLGE_PPU_PF_ABNORMAL_INT_EN		GENMASK(5, 0)
+#define HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK	GENMASK(5, 0)
+#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN		GENMASK(31, 0)
+#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK	GENMASK(31, 0)
+#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN	GENMASK(31, 0)
+#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK	GENMASK(31, 0)
+#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN		0x0101
+#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK	0x0101
+#define HCLGE_SSU_COMMON_INT_EN			GENMASK(9, 0)
+#define HCLGE_SSU_COMMON_INT_EN_MASK		GENMASK(9, 0)
+#define HCLGE_SSU_PORT_BASED_ERR_INT_EN		0x0BFF
+#define HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK	0x0BFF0000
+#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN	GENMASK(23, 0)
+#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK	GENMASK(23, 0)
+
+#define HCLGE_SSU_COMMON_ERR_INT_MASK	GENMASK(9, 0)
+#define HCLGE_SSU_PORT_INT_MSIX_MASK	0x7BFF
+#define HCLGE_IGU_INT_MASK		GENMASK(3, 0)
+#define HCLGE_IGU_EGU_TNL_INT_MASK	GENMASK(5, 0)
+#define HCLGE_PPP_MPF_INT_ST3_MASK	GENMASK(5, 0)
+#define HCLGE_PPU_MPF_INT_ST3_MASK	GENMASK(7, 0)
+#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK	BIT(29)
+#define HCLGE_PPU_PF_INT_RAS_MASK	0x18
+#define HCLGE_PPU_PF_INT_MSIX_MASK	0x26
+#define HCLGE_PPU_PF_OVER_8BD_ERR_MASK	0x01
+#define HCLGE_QCN_FIFO_INT_MASK		GENMASK(17, 0)
+#define HCLGE_QCN_ECC_INT_MASK		GENMASK(21, 0)
+#define HCLGE_NCSI_ECC_INT_MASK		GENMASK(1, 0)
+
+#define HCLGE_ROCEE_RAS_NFE_INT_EN		0xF
+#define HCLGE_ROCEE_RAS_CE_INT_EN		0x1
+#define HCLGE_ROCEE_RAS_NFE_INT_EN_MASK		0xF
+#define HCLGE_ROCEE_RAS_CE_INT_EN_MASK		0x1
+#define HCLGE_ROCEE_RERR_INT_MASK		BIT(0)
+#define HCLGE_ROCEE_BERR_INT_MASK		BIT(1)
+#define HCLGE_ROCEE_AXI_ERR_INT_MASK		GENMASK(1, 0)
+#define HCLGE_ROCEE_ECC_INT_MASK		BIT(2)
+#define HCLGE_ROCEE_OVF_INT_MASK		BIT(3)
+#define HCLGE_ROCEE_OVF_ERR_INT_MASK		0x10000
+#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK		0x3F
+
+enum hclge_err_int_type {
+	HCLGE_ERR_INT_MSIX = 0,
+	HCLGE_ERR_INT_RAS_CE = 1,
+	HCLGE_ERR_INT_RAS_NFE = 2,
+	HCLGE_ERR_INT_RAS_FE = 3,
+};
+
+struct hclge_hw_blk {
+	u32 msk;
+	const char *name;
+	int (*config_err_int)(struct hclge_dev *hdev, bool en);
+};
+
+struct hclge_hw_error {
+	u32 int_msk;
+	const char *msg;
+	enum hnae3_reset_type reset_level;
+};
+
+int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
+int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
+int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
+pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
+int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
+			       unsigned long *reset_requests);
+#endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 340baf6..c052bb3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

@@ -12,6 +12,7 @@
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/if_vlan.h>
+#include <linux/crash_dump.h>
 #include <net/rtnetlink.h>
 #include "hclge_cmd.h"
 #include "hclge_dcb.h"
@@ -19,20 +20,53 @@
 #include "hclge_mbx.h"
 #include "hclge_mdio.h"
 #include "hclge_tm.h"
+#include "hclge_err.h"
 #include "hnae3.h"
 
 #define HCLGE_NAME			"hclge"
 #define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
-#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
-#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
 
-static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
-				     enum hclge_mta_dmac_sel_type mta_mac_sel,
-				     bool enable);
-static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu);
+#define HCLGE_BUF_SIZE_UNIT	256U
+#define HCLGE_BUF_MUL_BY	2
+#define HCLGE_BUF_DIV_BY	2
+#define NEED_RESERVE_TC_NUM	2
+#define BUF_MAX_PERCENT		100
+#define BUF_RESERVE_PERCENT	90
+
+#define HCLGE_RESET_MAX_FAIL_CNT	5
+#define HCLGE_RESET_SYNC_TIME		100
+#define HCLGE_PF_RESET_SYNC_TIME	20
+#define HCLGE_PF_RESET_SYNC_CNT		1500
+
+/* Get DFX BD number offset */
+#define HCLGE_DFX_BIOS_BD_OFFSET        1
+#define HCLGE_DFX_SSU_0_BD_OFFSET       2
+#define HCLGE_DFX_SSU_1_BD_OFFSET       3
+#define HCLGE_DFX_IGU_BD_OFFSET         4
+#define HCLGE_DFX_RPU_0_BD_OFFSET       5
+#define HCLGE_DFX_RPU_1_BD_OFFSET       6
+#define HCLGE_DFX_NCSI_BD_OFFSET        7
+#define HCLGE_DFX_RTC_BD_OFFSET         8
+#define HCLGE_DFX_PPP_BD_OFFSET         9
+#define HCLGE_DFX_RCB_BD_OFFSET         10
+#define HCLGE_DFX_TQP_BD_OFFSET         11
+#define HCLGE_DFX_SSU_2_BD_OFFSET       12
+
+#define HCLGE_LINK_STATUS_MS	10
+
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
 static int hclge_init_vlan_config(struct hclge_dev *hdev);
+static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
+static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
+static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
+			       u16 *allocated_size, bool is_alloc);
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
+static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
+						   unsigned long *addr);
+static int hclge_set_default_loopback(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -50,181 +84,80 @@
 
 MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
 
+static const u32 cmdq_reg_addr_list[] = {HCLGE_CMDQ_TX_ADDR_L_REG,
+					 HCLGE_CMDQ_TX_ADDR_H_REG,
+					 HCLGE_CMDQ_TX_DEPTH_REG,
+					 HCLGE_CMDQ_TX_TAIL_REG,
+					 HCLGE_CMDQ_TX_HEAD_REG,
+					 HCLGE_CMDQ_RX_ADDR_L_REG,
+					 HCLGE_CMDQ_RX_ADDR_H_REG,
+					 HCLGE_CMDQ_RX_DEPTH_REG,
+					 HCLGE_CMDQ_RX_TAIL_REG,
+					 HCLGE_CMDQ_RX_HEAD_REG,
+					 HCLGE_VECTOR0_CMDQ_SRC_REG,
+					 HCLGE_CMDQ_INTR_STS_REG,
+					 HCLGE_CMDQ_INTR_EN_REG,
+					 HCLGE_CMDQ_INTR_GEN_REG};
+
+static const u32 common_reg_addr_list[] = {HCLGE_MISC_VECTOR_REG_BASE,
+					   HCLGE_VECTOR0_OTER_EN_REG,
+					   HCLGE_MISC_RESET_STS_REG,
+					   HCLGE_MISC_VECTOR_INT_STS,
+					   HCLGE_GLOBAL_RESET_REG,
+					   HCLGE_FUN_RST_ING,
+					   HCLGE_GRO_EN_REG};
+
+static const u32 ring_reg_addr_list[] = {HCLGE_RING_RX_ADDR_L_REG,
+					 HCLGE_RING_RX_ADDR_H_REG,
+					 HCLGE_RING_RX_BD_NUM_REG,
+					 HCLGE_RING_RX_BD_LENGTH_REG,
+					 HCLGE_RING_RX_MERGE_EN_REG,
+					 HCLGE_RING_RX_TAIL_REG,
+					 HCLGE_RING_RX_HEAD_REG,
+					 HCLGE_RING_RX_FBD_NUM_REG,
+					 HCLGE_RING_RX_OFFSET_REG,
+					 HCLGE_RING_RX_FBD_OFFSET_REG,
+					 HCLGE_RING_RX_STASH_REG,
+					 HCLGE_RING_RX_BD_ERR_REG,
+					 HCLGE_RING_TX_ADDR_L_REG,
+					 HCLGE_RING_TX_ADDR_H_REG,
+					 HCLGE_RING_TX_BD_NUM_REG,
+					 HCLGE_RING_TX_PRIORITY_REG,
+					 HCLGE_RING_TX_TC_REG,
+					 HCLGE_RING_TX_MERGE_EN_REG,
+					 HCLGE_RING_TX_TAIL_REG,
+					 HCLGE_RING_TX_HEAD_REG,
+					 HCLGE_RING_TX_FBD_NUM_REG,
+					 HCLGE_RING_TX_OFFSET_REG,
+					 HCLGE_RING_TX_EBD_NUM_REG,
+					 HCLGE_RING_TX_EBD_OFFSET_REG,
+					 HCLGE_RING_TX_BD_ERR_REG,
+					 HCLGE_RING_EN_REG};
+
+static const u32 tqp_intr_reg_addr_list[] = {HCLGE_TQP_INTR_CTRL_REG,
+					     HCLGE_TQP_INTR_GL0_REG,
+					     HCLGE_TQP_INTR_GL1_REG,
+					     HCLGE_TQP_INTR_GL2_REG,
+					     HCLGE_TQP_INTR_RL_REG};
+
 static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
-	"Mac    Loopback test",
-	"Serdes Loopback test",
+	"App    Loopback test",
+	"Serdes serial Loopback test",
+	"Serdes parallel Loopback test",
 	"Phy    Loopback test"
 };
 
-static const struct hclge_comm_stats_str g_all_64bit_stats_string[] = {
-	{"igu_rx_oversize_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_oversize_pkt)},
-	{"igu_rx_undersize_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_undersize_pkt)},
-	{"igu_rx_out_all_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_out_all_pkt)},
-	{"igu_rx_uni_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_uni_pkt)},
-	{"igu_rx_multi_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_multi_pkt)},
-	{"igu_rx_broad_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_broad_pkt)},
-	{"egu_tx_out_all_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_out_all_pkt)},
-	{"egu_tx_uni_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_uni_pkt)},
-	{"egu_tx_multi_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_multi_pkt)},
-	{"egu_tx_broad_pkt",
-		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_broad_pkt)},
-	{"ssu_ppp_mac_key_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_mac_key_num)},
-	{"ssu_ppp_host_key_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_host_key_num)},
-	{"ppp_ssu_mac_rlt_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_mac_rlt_num)},
-	{"ppp_ssu_host_rlt_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_host_rlt_num)},
-	{"ssu_tx_in_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_in_num)},
-	{"ssu_tx_out_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_out_num)},
-	{"ssu_rx_in_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_in_num)},
-	{"ssu_rx_out_num",
-		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_out_num)}
-};
-
-static const struct hclge_comm_stats_str g_all_32bit_stats_string[] = {
-	{"igu_rx_err_pkt",
-		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_err_pkt)},
-	{"igu_rx_no_eof_pkt",
-		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_eof_pkt)},
-	{"igu_rx_no_sof_pkt",
-		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_sof_pkt)},
-	{"egu_tx_1588_pkt",
-		HCLGE_32BIT_STATS_FIELD_OFF(egu_tx_1588_pkt)},
-	{"ssu_full_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(ssu_full_drop_num)},
-	{"ssu_part_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(ssu_part_drop_num)},
-	{"ppp_key_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(ppp_key_drop_num)},
-	{"ppp_rlt_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(ppp_rlt_drop_num)},
-	{"ssu_key_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(ssu_key_drop_num)},
-	{"pkt_curr_buf_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_cnt)},
-	{"qcn_fb_rcv_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_rcv_cnt)},
-	{"qcn_fb_drop_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_drop_cnt)},
-	{"qcn_fb_invaild_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_invaild_cnt)},
-	{"rx_packet_tc0_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_in_cnt)},
-	{"rx_packet_tc1_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_in_cnt)},
-	{"rx_packet_tc2_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_in_cnt)},
-	{"rx_packet_tc3_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_in_cnt)},
-	{"rx_packet_tc4_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_in_cnt)},
-	{"rx_packet_tc5_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_in_cnt)},
-	{"rx_packet_tc6_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_in_cnt)},
-	{"rx_packet_tc7_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_in_cnt)},
-	{"rx_packet_tc0_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_out_cnt)},
-	{"rx_packet_tc1_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_out_cnt)},
-	{"rx_packet_tc2_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_out_cnt)},
-	{"rx_packet_tc3_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_out_cnt)},
-	{"rx_packet_tc4_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_out_cnt)},
-	{"rx_packet_tc5_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_out_cnt)},
-	{"rx_packet_tc6_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_out_cnt)},
-	{"rx_packet_tc7_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_out_cnt)},
-	{"tx_packet_tc0_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_in_cnt)},
-	{"tx_packet_tc1_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_in_cnt)},
-	{"tx_packet_tc2_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_in_cnt)},
-	{"tx_packet_tc3_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_in_cnt)},
-	{"tx_packet_tc4_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_in_cnt)},
-	{"tx_packet_tc5_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_in_cnt)},
-	{"tx_packet_tc6_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_in_cnt)},
-	{"tx_packet_tc7_in_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_in_cnt)},
-	{"tx_packet_tc0_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_out_cnt)},
-	{"tx_packet_tc1_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_out_cnt)},
-	{"tx_packet_tc2_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_out_cnt)},
-	{"tx_packet_tc3_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_out_cnt)},
-	{"tx_packet_tc4_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_out_cnt)},
-	{"tx_packet_tc5_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_out_cnt)},
-	{"tx_packet_tc6_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_out_cnt)},
-	{"tx_packet_tc7_out_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_out_cnt)},
-	{"pkt_curr_buf_tc0_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc0_cnt)},
-	{"pkt_curr_buf_tc1_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc1_cnt)},
-	{"pkt_curr_buf_tc2_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc2_cnt)},
-	{"pkt_curr_buf_tc3_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc3_cnt)},
-	{"pkt_curr_buf_tc4_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc4_cnt)},
-	{"pkt_curr_buf_tc5_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc5_cnt)},
-	{"pkt_curr_buf_tc6_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc6_cnt)},
-	{"pkt_curr_buf_tc7_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc7_cnt)},
-	{"mb_uncopy_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(mb_uncopy_num)},
-	{"lo_pri_unicast_rlt_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_unicast_rlt_drop_num)},
-	{"hi_pri_multicast_rlt_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(hi_pri_multicast_rlt_drop_num)},
-	{"lo_pri_multicast_rlt_drop_num",
-		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_multicast_rlt_drop_num)},
-	{"rx_oq_drop_pkt_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(rx_oq_drop_pkt_cnt)},
-	{"tx_oq_drop_pkt_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(tx_oq_drop_pkt_cnt)},
-	{"nic_l2_err_drop_pkt_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(nic_l2_err_drop_pkt_cnt)},
-	{"roc_l2_err_drop_pkt_cnt",
-		HCLGE_32BIT_STATS_FIELD_OFF(roc_l2_err_drop_pkt_cnt)}
-};
-
 static const struct hclge_comm_stats_str g_mac_stats_string[] = {
 	{"mac_tx_mac_pause_num",
 		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_mac_pause_num)},
 	{"mac_rx_mac_pause_num",
 		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_mac_pause_num)},
+	{"mac_tx_control_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_ctrl_pkt_num)},
+	{"mac_rx_control_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_ctrl_pkt_num)},
+	{"mac_tx_pfc_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pause_pkt_num)},
 	{"mac_tx_pfc_pri0_pkt_num",
 		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num)},
 	{"mac_tx_pfc_pri1_pkt_num",
@@ -241,6 +174,8 @@
 		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num)},
 	{"mac_tx_pfc_pri7_pkt_num",
 		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num)},
+	{"mac_rx_pfc_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pause_pkt_num)},
 	{"mac_rx_pfc_pri0_pkt_num",
 		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num)},
 	{"mac_rx_pfc_pri1_pkt_num",
@@ -387,120 +322,98 @@
 static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = {
 	{
 		.flags = HCLGE_MAC_MGR_MASK_VLAN_B,
-		.ethter_type = cpu_to_le16(HCLGE_MAC_ETHERTYPE_LLDP),
+		.ethter_type = cpu_to_le16(ETH_P_LLDP),
 		.mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
 		.mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
 		.i_port_bitmap = 0x1,
 	},
 };
 
-static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
-{
-#define HCLGE_64_BIT_CMD_NUM 5
-#define HCLGE_64_BIT_RTN_DATANUM 4
-	u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats);
-	struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM];
-	__le64 *desc_data;
-	int i, k, n;
-	int ret;
+static const u8 hclge_hash_key[] = {
+	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
+	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
+	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
+	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA
+};
 
-	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_64_BIT, true);
-	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_64_BIT_CMD_NUM);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Get 64 bit pkt stats fail, status = %d.\n", ret);
-		return ret;
-	}
+static const u32 hclge_dfx_bd_offset_list[] = {
+	HCLGE_DFX_BIOS_BD_OFFSET,
+	HCLGE_DFX_SSU_0_BD_OFFSET,
+	HCLGE_DFX_SSU_1_BD_OFFSET,
+	HCLGE_DFX_IGU_BD_OFFSET,
+	HCLGE_DFX_RPU_0_BD_OFFSET,
+	HCLGE_DFX_RPU_1_BD_OFFSET,
+	HCLGE_DFX_NCSI_BD_OFFSET,
+	HCLGE_DFX_RTC_BD_OFFSET,
+	HCLGE_DFX_PPP_BD_OFFSET,
+	HCLGE_DFX_RCB_BD_OFFSET,
+	HCLGE_DFX_TQP_BD_OFFSET,
+	HCLGE_DFX_SSU_2_BD_OFFSET
+};
 
-	for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) {
-		if (unlikely(i == 0)) {
-			desc_data = (__le64 *)(&desc[i].data[0]);
-			n = HCLGE_64_BIT_RTN_DATANUM - 1;
-		} else {
-			desc_data = (__le64 *)(&desc[i]);
-			n = HCLGE_64_BIT_RTN_DATANUM;
-		}
-		for (k = 0; k < n; k++) {
-			*data++ += le64_to_cpu(*desc_data);
-			desc_data++;
-		}
-	}
+static const enum hclge_opcode_type hclge_dfx_reg_opcode_list[] = {
+	HCLGE_OPC_DFX_BIOS_COMMON_REG,
+	HCLGE_OPC_DFX_SSU_REG_0,
+	HCLGE_OPC_DFX_SSU_REG_1,
+	HCLGE_OPC_DFX_IGU_EGU_REG,
+	HCLGE_OPC_DFX_RPU_REG_0,
+	HCLGE_OPC_DFX_RPU_REG_1,
+	HCLGE_OPC_DFX_NCSI_REG,
+	HCLGE_OPC_DFX_RTC_REG,
+	HCLGE_OPC_DFX_PPP_REG,
+	HCLGE_OPC_DFX_RCB_REG,
+	HCLGE_OPC_DFX_TQP_REG,
+	HCLGE_OPC_DFX_SSU_REG_2
+};
 
-	return 0;
-}
+static const struct key_info meta_data_key_info[] = {
+	{ PACKET_TYPE_ID, 6},
+	{ IP_FRAGEMENT, 1},
+	{ ROCE_TYPE, 1},
+	{ NEXT_KEY, 5},
+	{ VLAN_NUMBER, 2},
+	{ SRC_VPORT, 12},
+	{ DST_VPORT, 12},
+	{ TUNNEL_PACKET, 1},
+};
 
-static void hclge_reset_partial_32bit_counter(struct hclge_32_bit_stats *stats)
-{
-	stats->pkt_curr_buf_cnt     = 0;
-	stats->pkt_curr_buf_tc0_cnt = 0;
-	stats->pkt_curr_buf_tc1_cnt = 0;
-	stats->pkt_curr_buf_tc2_cnt = 0;
-	stats->pkt_curr_buf_tc3_cnt = 0;
-	stats->pkt_curr_buf_tc4_cnt = 0;
-	stats->pkt_curr_buf_tc5_cnt = 0;
-	stats->pkt_curr_buf_tc6_cnt = 0;
-	stats->pkt_curr_buf_tc7_cnt = 0;
-}
+static const struct key_info tuple_key_info[] = {
+	{ OUTER_DST_MAC, 48},
+	{ OUTER_SRC_MAC, 48},
+	{ OUTER_VLAN_TAG_FST, 16},
+	{ OUTER_VLAN_TAG_SEC, 16},
+	{ OUTER_ETH_TYPE, 16},
+	{ OUTER_L2_RSV, 16},
+	{ OUTER_IP_TOS, 8},
+	{ OUTER_IP_PROTO, 8},
+	{ OUTER_SRC_IP, 32},
+	{ OUTER_DST_IP, 32},
+	{ OUTER_L3_RSV, 16},
+	{ OUTER_SRC_PORT, 16},
+	{ OUTER_DST_PORT, 16},
+	{ OUTER_L4_RSV, 32},
+	{ OUTER_TUN_VNI, 24},
+	{ OUTER_TUN_FLOW_ID, 8},
+	{ INNER_DST_MAC, 48},
+	{ INNER_SRC_MAC, 48},
+	{ INNER_VLAN_TAG_FST, 16},
+	{ INNER_VLAN_TAG_SEC, 16},
+	{ INNER_ETH_TYPE, 16},
+	{ INNER_L2_RSV, 16},
+	{ INNER_IP_TOS, 8},
+	{ INNER_IP_PROTO, 8},
+	{ INNER_SRC_IP, 32},
+	{ INNER_DST_IP, 32},
+	{ INNER_L3_RSV, 16},
+	{ INNER_SRC_PORT, 16},
+	{ INNER_DST_PORT, 16},
+	{ INNER_L4_RSV, 32},
+};
 
-static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
-{
-#define HCLGE_32_BIT_CMD_NUM 8
-#define HCLGE_32_BIT_RTN_DATANUM 8
-
-	struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM];
-	struct hclge_32_bit_stats *all_32_bit_stats;
-	__le32 *desc_data;
-	int i, k, n;
-	u64 *data;
-	int ret;
-
-	all_32_bit_stats = &hdev->hw_stats.all_32_bit_stats;
-	data = (u64 *)(&all_32_bit_stats->egu_tx_1588_pkt);
-
-	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_32_BIT, true);
-	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_32_BIT_CMD_NUM);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Get 32 bit pkt stats fail, status = %d.\n", ret);
-
-		return ret;
-	}
-
-	hclge_reset_partial_32bit_counter(all_32_bit_stats);
-	for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) {
-		if (unlikely(i == 0)) {
-			__le16 *desc_data_16bit;
-
-			all_32_bit_stats->igu_rx_err_pkt +=
-				le32_to_cpu(desc[i].data[0]);
-
-			desc_data_16bit = (__le16 *)&desc[i].data[1];
-			all_32_bit_stats->igu_rx_no_eof_pkt +=
-				le16_to_cpu(*desc_data_16bit);
-
-			desc_data_16bit++;
-			all_32_bit_stats->igu_rx_no_sof_pkt +=
-				le16_to_cpu(*desc_data_16bit);
-
-			desc_data = &desc[i].data[2];
-			n = HCLGE_32_BIT_RTN_DATANUM - 4;
-		} else {
-			desc_data = (__le32 *)&desc[i];
-			n = HCLGE_32_BIT_RTN_DATANUM;
-		}
-		for (k = 0; k < n; k++) {
-			*data++ += le32_to_cpu(*desc_data);
-			desc_data++;
-		}
-	}
-
-	return 0;
-}
-
-static int hclge_mac_update_stats(struct hclge_dev *hdev)
+static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
 {
 #define HCLGE_MAC_CMD_NUM 21
-#define HCLGE_RTN_DATA_NUM 4
 
 	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
 	struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
@@ -518,15 +431,18 @@
 	}
 
 	for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) {
+		/* for special opcode 0032, only the first desc has the head */
 		if (unlikely(i == 0)) {
 			desc_data = (__le64 *)(&desc[i].data[0]);
-			n = HCLGE_RTN_DATA_NUM - 2;
+			n = HCLGE_RD_FIRST_STATS_NUM;
 		} else {
 			desc_data = (__le64 *)(&desc[i]);
-			n = HCLGE_RTN_DATA_NUM;
+			n = HCLGE_RD_OTHER_STATS_NUM;
 		}
+
 		for (k = 0; k < n; k++) {
-			*data++ += le64_to_cpu(*desc_data);
+			*data += le64_to_cpu(*desc_data);
+			data++;
 			desc_data++;
 		}
 	}
@@ -534,6 +450,89 @@
 	return 0;
 }
 
+static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num)
+{
+	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+	struct hclge_desc *desc;
+	__le64 *desc_data;
+	u16 i, k, n;
+	int ret;
+
+	/* This may be called inside atomic sections,
+	 * so GFP_ATOMIC is more suitalbe here
+	 */
+	desc = kcalloc(desc_num, sizeof(struct hclge_desc), GFP_ATOMIC);
+	if (!desc)
+		return -ENOMEM;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_MAC_ALL, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, desc_num);
+	if (ret) {
+		kfree(desc);
+		return ret;
+	}
+
+	for (i = 0; i < desc_num; i++) {
+		/* for special opcode 0034, only the first desc has the head */
+		if (i == 0) {
+			desc_data = (__le64 *)(&desc[i].data[0]);
+			n = HCLGE_RD_FIRST_STATS_NUM;
+		} else {
+			desc_data = (__le64 *)(&desc[i]);
+			n = HCLGE_RD_OTHER_STATS_NUM;
+		}
+
+		for (k = 0; k < n; k++) {
+			*data += le64_to_cpu(*desc_data);
+			data++;
+			desc_data++;
+		}
+	}
+
+	kfree(desc);
+
+	return 0;
+}
+
+static int hclge_mac_query_reg_num(struct hclge_dev *hdev, u32 *desc_num)
+{
+	struct hclge_desc desc;
+	__le32 *desc_data;
+	u32 reg_num;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_REG_NUM, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		return ret;
+
+	desc_data = (__le32 *)(&desc.data[0]);
+	reg_num = le32_to_cpu(*desc_data);
+
+	*desc_num = 1 + ((reg_num - 3) >> 2) +
+		    (u32)(((reg_num - 3) & 0x3) ? 1 : 0);
+
+	return 0;
+}
+
+static int hclge_mac_update_stats(struct hclge_dev *hdev)
+{
+	u32 desc_num;
+	int ret;
+
+	ret = hclge_mac_query_reg_num(hdev, &desc_num);
+
+	/* The firmware supports the new statistics acquisition method */
+	if (!ret)
+		ret = hclge_mac_update_stats_complete(hdev, desc_num);
+	else if (ret == -EOPNOTSUPP)
+		ret = hclge_mac_update_stats_defective(hdev);
+	else
+		dev_err(&hdev->pdev->dev, "query mac reg num fail!\n");
+
+	return ret;
+}
+
 static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
@@ -548,8 +547,7 @@
 		queue = handle->kinfo.tqp[i];
 		tqp = container_of(queue, struct hclge_tqp, q);
 		/* command : HCLGE_OPC_QUERY_IGU_STAT */
-		hclge_cmd_setup_basic_desc(&desc[0],
-					   HCLGE_OPC_QUERY_RX_STATUS,
+		hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATUS,
 					   true);
 
 		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
@@ -557,7 +555,7 @@
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"Query tqp stat fail, status = %d,queue = %d\n",
-				ret,	i);
+				ret, i);
 			return ret;
 		}
 		tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
@@ -611,6 +609,7 @@
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 
+	/* each tqp has TX & RX two queues */
 	return kinfo->num_tqps * (2);
 }
 
@@ -623,7 +622,7 @@
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
 			struct hclge_tqp, q);
-		snprintf(buff, ETH_GSTRING_LEN, "txq#%d_pktnum_rcd",
+		snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
 			 tqp->index);
 		buff = buff + ETH_GSTRING_LEN;
 	}
@@ -631,7 +630,7 @@
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
 			struct hclge_tqp, q);
-		snprintf(buff, ETH_GSTRING_LEN, "rxq#%d_pktnum_rcd",
+		snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
 			 tqp->index);
 		buff = buff + ETH_GSTRING_LEN;
 	}
@@ -639,7 +638,7 @@
 	return buff;
 }
 
-static u64 *hclge_comm_get_stats(void *comm_stats,
+static u64 *hclge_comm_get_stats(const void *comm_stats,
 				 const struct hclge_comm_stats_str strs[],
 				 int size, u64 *data)
 {
@@ -663,40 +662,13 @@
 		return buff;
 
 	for (i = 0; i < size; i++) {
-		snprintf(buff, ETH_GSTRING_LEN,
-			 strs[i].desc);
+		snprintf(buff, ETH_GSTRING_LEN, "%s", strs[i].desc);
 		buff = buff + ETH_GSTRING_LEN;
 	}
 
 	return (u8 *)buff;
 }
 
-static void hclge_update_netstat(struct hclge_hw_stats *hw_stats,
-				 struct net_device_stats *net_stats)
-{
-	net_stats->tx_dropped = 0;
-	net_stats->rx_dropped = hw_stats->all_32_bit_stats.ssu_full_drop_num;
-	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ppp_key_drop_num;
-	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ssu_key_drop_num;
-
-	net_stats->rx_errors = hw_stats->mac_stats.mac_rx_oversize_pkt_num;
-	net_stats->rx_errors += hw_stats->mac_stats.mac_rx_undersize_pkt_num;
-	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_eof_pkt;
-	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_sof_pkt;
-	net_stats->rx_errors += hw_stats->mac_stats.mac_rx_fcs_err_pkt_num;
-
-	net_stats->multicast = hw_stats->mac_stats.mac_tx_multi_pkt_num;
-	net_stats->multicast += hw_stats->mac_stats.mac_rx_multi_pkt_num;
-
-	net_stats->rx_crc_errors = hw_stats->mac_stats.mac_rx_fcs_err_pkt_num;
-	net_stats->rx_length_errors =
-		hw_stats->mac_stats.mac_rx_undersize_pkt_num;
-	net_stats->rx_length_errors +=
-		hw_stats->mac_stats.mac_rx_oversize_pkt_num;
-	net_stats->rx_over_errors =
-		hw_stats->mac_stats.mac_rx_oversize_pkt_num;
-}
-
 static void hclge_update_stats_for_all(struct hclge_dev *hdev)
 {
 	struct hnae3_handle *handle;
@@ -716,14 +688,6 @@
 	if (status)
 		dev_err(&hdev->pdev->dev,
 			"Update MAC stats fail, status = %d.\n", status);
-
-	status = hclge_32_bit_update_stats(hdev);
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"Update 32 bit stats fail, status = %d.\n",
-			status);
-
-	hclge_update_netstat(&hdev->hw_stats, &handle->kinfo.netdev->stats);
 }
 
 static void hclge_update_stats(struct hnae3_handle *handle,
@@ -731,7 +695,6 @@
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_hw_stats *hw_stats = &hdev->hw_stats;
 	int status;
 
 	if (test_and_set_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state))
@@ -743,32 +706,21 @@
 			"Update MAC stats fail, status = %d.\n",
 			status);
 
-	status = hclge_32_bit_update_stats(hdev);
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"Update 32 bit stats fail, status = %d.\n",
-			status);
-
-	status = hclge_64_bit_update_stats(hdev);
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"Update 64 bit stats fail, status = %d.\n",
-			status);
-
 	status = hclge_tqps_update_stats(handle);
 	if (status)
 		dev_err(&hdev->pdev->dev,
 			"Update TQPS stats fail, status = %d.\n",
 			status);
 
-	hclge_update_netstat(hw_stats, net_stats);
-
 	clear_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state);
 }
 
 static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 {
-#define HCLGE_LOOPBACK_TEST_FLAGS 0x7
+#define HCLGE_LOOPBACK_TEST_FLAGS (HNAE3_SUPPORT_APP_LOOPBACK |\
+		HNAE3_SUPPORT_PHY_LOOPBACK |\
+		HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK |\
+		HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK)
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -782,27 +734,32 @@
 	if (stringset == ETH_SS_TEST) {
 		/* clear loopback bit flags at first */
 		handle->flags = (handle->flags & (~HCLGE_LOOPBACK_TEST_FLAGS));
-		if (hdev->hw.mac.speed == HCLGE_MAC_SPEED_10M ||
+		if (hdev->pdev->revision >= 0x21 ||
+		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_10M ||
 		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_100M ||
 		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_1G) {
 			count += 1;
-			handle->flags |= HNAE3_SUPPORT_MAC_LOOPBACK;
+			handle->flags |= HNAE3_SUPPORT_APP_LOOPBACK;
 		}
 
-		count++;
-		handle->flags |= HNAE3_SUPPORT_SERDES_LOOPBACK;
+		count += 2;
+		handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
+		handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
+
+		if (hdev->hw.mac.phydev) {
+			count += 1;
+			handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
+		}
+
 	} else if (stringset == ETH_SS_STATS) {
 		count = ARRAY_SIZE(g_mac_stats_string) +
-			ARRAY_SIZE(g_all_32bit_stats_string) +
-			ARRAY_SIZE(g_all_64bit_stats_string) +
 			hclge_tqps_get_sset_count(handle, stringset);
 	}
 
 	return count;
 }
 
-static void hclge_get_strings(struct hnae3_handle *handle,
-			      u32 stringset,
+static void hclge_get_strings(struct hnae3_handle *handle, u32 stringset,
 			      u8 *data)
 {
 	u8 *p = (char *)data;
@@ -810,37 +767,28 @@
 
 	if (stringset == ETH_SS_STATS) {
 		size = ARRAY_SIZE(g_mac_stats_string);
-		p = hclge_comm_get_strings(stringset,
-					   g_mac_stats_string,
-					   size,
-					   p);
-		size = ARRAY_SIZE(g_all_32bit_stats_string);
-		p = hclge_comm_get_strings(stringset,
-					   g_all_32bit_stats_string,
-					   size,
-					   p);
-		size = ARRAY_SIZE(g_all_64bit_stats_string);
-		p = hclge_comm_get_strings(stringset,
-					   g_all_64bit_stats_string,
-					   size,
-					   p);
+		p = hclge_comm_get_strings(stringset, g_mac_stats_string,
+					   size, p);
 		p = hclge_tqps_get_strings(handle, p);
 	} else if (stringset == ETH_SS_TEST) {
-		if (handle->flags & HNAE3_SUPPORT_MAC_LOOPBACK) {
-			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_MAC],
+		if (handle->flags & HNAE3_SUPPORT_APP_LOOPBACK) {
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_APP],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
-		if (handle->flags & HNAE3_SUPPORT_SERDES_LOOPBACK) {
+		if (handle->flags & HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK) {
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_SERIAL_SERDES],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		if (handle->flags & HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK) {
 			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_SERDES],
+			       hns3_nic_test_strs[HNAE3_LOOP_PARALLEL_SERDES],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
 		if (handle->flags & HNAE3_SUPPORT_PHY_LOOPBACK) {
-			memcpy(p,
-			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_PHY],
+			memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_PHY],
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
@@ -853,21 +801,23 @@
 	struct hclge_dev *hdev = vport->back;
 	u64 *p;
 
-	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats,
-				 g_mac_stats_string,
-				 ARRAY_SIZE(g_mac_stats_string),
-				 data);
-	p = hclge_comm_get_stats(&hdev->hw_stats.all_32_bit_stats,
-				 g_all_32bit_stats_string,
-				 ARRAY_SIZE(g_all_32bit_stats_string),
-				 p);
-	p = hclge_comm_get_stats(&hdev->hw_stats.all_64_bit_stats,
-				 g_all_64bit_stats_string,
-				 ARRAY_SIZE(g_all_64bit_stats_string),
-				 p);
+	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string,
+				 ARRAY_SIZE(g_mac_stats_string), data);
 	p = hclge_tqps_get_stats(handle, p);
 }
 
+static void hclge_get_mac_stat(struct hnae3_handle *handle,
+			       struct hns3_mac_stats *mac_stats)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_update_stats(handle, NULL);
+
+	mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num;
+	mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num;
+}
+
 static int hclge_parse_func_status(struct hclge_dev *hdev,
 				   struct hclge_func_status_cmd *status)
 {
@@ -885,6 +835,8 @@
 
 static int hclge_query_function_status(struct hclge_dev *hdev)
 {
+#define HCLGE_QUERY_MAX_CNT	5
+
 	struct hclge_func_status_cmd *req;
 	struct hclge_desc desc;
 	int timeout = 0;
@@ -897,9 +849,7 @@
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"query function status failed %d.\n",
-				ret);
-
+				"query function status failed %d.\n", ret);
 			return ret;
 		}
 
@@ -907,7 +857,7 @@
 		if (req->pf_state)
 			break;
 		usleep_range(1000, 2000);
-	} while (timeout++ < 5);
+	} while (timeout++ < HCLGE_QUERY_MAX_CNT);
 
 	ret = hclge_parse_func_status(hdev, req);
 
@@ -932,6 +882,22 @@
 	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
 	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
+	if (req->tx_buf_size)
+		hdev->tx_buf_size =
+			__le16_to_cpu(req->tx_buf_size) << HCLGE_BUF_UNIT_S;
+	else
+		hdev->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+
+	hdev->tx_buf_size = roundup(hdev->tx_buf_size, HCLGE_BUF_SIZE_UNIT);
+
+	if (req->dv_buf_size)
+		hdev->dv_buf_size =
+			__le16_to_cpu(req->dv_buf_size) << HCLGE_BUF_UNIT_S;
+	else
+		hdev->dv_buf_size = HCLGE_DEFAULT_DV;
+
+	hdev->dv_buf_size = roundup(hdev->dv_buf_size, HCLGE_BUF_SIZE_UNIT);
+
 	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->roce_base_msix_offset =
 		hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
@@ -940,15 +906,27 @@
 		hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
 				HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
+		/* nic's msix numbers is always equals to the roce's. */
+		hdev->num_nic_msi = hdev->num_roce_msi;
+
 		/* PF should have NIC vectors and Roce vectors,
 		 * NIC vectors are queued before Roce vectors.
 		 */
-		hdev->num_msi = hdev->num_roce_msi  +
+		hdev->num_msi = hdev->num_roce_msi +
 				hdev->roce_base_msix_offset;
 	} else {
 		hdev->num_msi =
 		hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
 				HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+		hdev->num_nic_msi = hdev->num_msi;
+	}
+
+	if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
+		dev_err(&hdev->pdev->dev,
+			"Just %u msi resources, not enough for pf(min:2).\n",
+			hdev->num_nic_msi);
+		return -EINVAL;
 	}
 
 	return 0;
@@ -988,43 +966,232 @@
 	return 0;
 }
 
+static int hclge_check_port_speed(struct hnae3_handle *handle, u32 speed)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u32 speed_ability = hdev->hw.mac.speed_ability;
+	u32 speed_bit = 0;
+
+	switch (speed) {
+	case HCLGE_MAC_SPEED_10M:
+		speed_bit = HCLGE_SUPPORT_10M_BIT;
+		break;
+	case HCLGE_MAC_SPEED_100M:
+		speed_bit = HCLGE_SUPPORT_100M_BIT;
+		break;
+	case HCLGE_MAC_SPEED_1G:
+		speed_bit = HCLGE_SUPPORT_1G_BIT;
+		break;
+	case HCLGE_MAC_SPEED_10G:
+		speed_bit = HCLGE_SUPPORT_10G_BIT;
+		break;
+	case HCLGE_MAC_SPEED_25G:
+		speed_bit = HCLGE_SUPPORT_25G_BIT;
+		break;
+	case HCLGE_MAC_SPEED_40G:
+		speed_bit = HCLGE_SUPPORT_40G_BIT;
+		break;
+	case HCLGE_MAC_SPEED_50G:
+		speed_bit = HCLGE_SUPPORT_50G_BIT;
+		break;
+	case HCLGE_MAC_SPEED_100G:
+		speed_bit = HCLGE_SUPPORT_100G_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (speed_bit & speed_ability)
+		return 0;
+
+	return -EINVAL;
+}
+
+static void hclge_convert_setting_sr(struct hclge_mac *mac, u8 speed_ability)
+{
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+				 mac->supported);
+}
+
+static void hclge_convert_setting_lr(struct hclge_mac *mac, u8 speed_ability)
+{
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+				 mac->supported);
+}
+
+static void hclge_convert_setting_cr(struct hclge_mac *mac, u8 speed_ability)
+{
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+				 mac->supported);
+}
+
+static void hclge_convert_setting_kr(struct hclge_mac *mac, u8 speed_ability)
+{
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+				 mac->supported);
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+				 mac->supported);
+}
+
+static void hclge_convert_setting_fec(struct hclge_mac *mac)
+{
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, mac->supported);
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, mac->supported);
+
+	switch (mac->speed) {
+	case HCLGE_MAC_SPEED_10G:
+	case HCLGE_MAC_SPEED_40G:
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+				 mac->supported);
+		mac->fec_ability =
+			BIT(HNAE3_FEC_BASER) | BIT(HNAE3_FEC_AUTO);
+		break;
+	case HCLGE_MAC_SPEED_25G:
+	case HCLGE_MAC_SPEED_50G:
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT,
+				 mac->supported);
+		mac->fec_ability =
+			BIT(HNAE3_FEC_BASER) | BIT(HNAE3_FEC_RS) |
+			BIT(HNAE3_FEC_AUTO);
+		break;
+	case HCLGE_MAC_SPEED_100G:
+		linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, mac->supported);
+		mac->fec_ability = BIT(HNAE3_FEC_RS) | BIT(HNAE3_FEC_AUTO);
+		break;
+	default:
+		mac->fec_ability = 0;
+		break;
+	}
+}
+
 static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
 					u8 speed_ability)
 {
-	unsigned long *supported = hdev->hw.mac.supported;
+	struct hclge_mac *mac = &hdev->hw.mac;
 
 	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
-		set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
-			supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+				 mac->supported);
 
-	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
-		set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
-			supported);
+	hclge_convert_setting_sr(mac, speed_ability);
+	hclge_convert_setting_lr(mac, speed_ability);
+	hclge_convert_setting_cr(mac, speed_ability);
+	if (hdev->pdev->revision >= 0x21)
+		hclge_convert_setting_fec(mac);
 
-	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
-		set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
-			supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
+}
 
-	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
-		set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
-			supported);
+static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev,
+					    u8 speed_ability)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
 
-	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
-		set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
-			supported);
+	hclge_convert_setting_kr(mac, speed_ability);
+	if (hdev->pdev->revision >= 0x21)
+		hclge_convert_setting_fec(mac);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
+}
 
-	set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
-	set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
+					 u8 speed_ability)
+{
+	unsigned long *supported = hdev->hw.mac.supported;
+
+	/* default to support all speed for GE port */
+	if (!speed_ability)
+		speed_ability = HCLGE_SUPPORT_GE;
+
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				 supported);
+
+	if (speed_ability & HCLGE_SUPPORT_100M_BIT) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				 supported);
+	}
+
+	if (speed_ability & HCLGE_SUPPORT_10M_BIT) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
+	}
+
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 }
 
 static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
 {
 	u8 media_type = hdev->hw.mac.media_type;
 
-	if (media_type != HNAE3_MEDIA_TYPE_FIBER)
-		return;
-
-	hclge_parse_fiber_link_mode(hdev, speed_ability);
+	if (media_type == HNAE3_MEDIA_TYPE_FIBER)
+		hclge_parse_fiber_link_mode(hdev, speed_ability);
+	else if (media_type == HNAE3_MEDIA_TYPE_COPPER)
+		hclge_parse_copper_link_mode(hdev, speed_ability);
+	else if (media_type == HNAE3_MEDIA_TYPE_BACKPLANE)
+		hclge_parse_backplane_link_mode(hdev, speed_ability);
 }
 
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
@@ -1032,7 +1199,7 @@
 	struct hclge_cfg_param_cmd *req;
 	u64 mac_addr_tmp_high;
 	u64 mac_addr_tmp;
-	int i;
+	unsigned int i;
 
 	req = (struct hclge_cfg_param_cmd *)desc[0].data;
 
@@ -1079,6 +1246,11 @@
 	cfg->speed_ability = hnae3_get_field(__le32_to_cpu(req->param[1]),
 					     HCLGE_CFG_SPEED_ABILITY_M,
 					     HCLGE_CFG_SPEED_ABILITY_S);
+	cfg->umv_space = hnae3_get_field(__le32_to_cpu(req->param[1]),
+					 HCLGE_CFG_UMV_TBL_SPACE_M,
+					 HCLGE_CFG_UMV_TBL_SPACE_S);
+	if (!cfg->umv_space)
+		cfg->umv_space = HCLGE_DEFAULT_UMV_SPACE_PER_PF;
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1089,7 +1261,8 @@
 {
 	struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
 	struct hclge_cfg_param_cmd *req;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
 		u32 offset = 0;
@@ -1135,10 +1308,28 @@
 	return ret;
 }
 
+static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev)
+{
+#define HCLGE_MIN_TX_DESC	64
+#define HCLGE_MIN_RX_DESC	64
+
+	if (!is_kdump_kernel())
+		return;
+
+	dev_info(&hdev->pdev->dev,
+		 "Running kdump kernel. Using minimal resources\n");
+
+	/* minimal queue pairs equals to the number of vports */
+	hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+	hdev->num_tx_desc = HCLGE_MIN_TX_DESC;
+	hdev->num_rx_desc = HCLGE_MIN_RX_DESC;
+}
+
 static int hclge_configure(struct hclge_dev *hdev)
 {
 	struct hclge_cfg cfg;
-	int ret, i;
+	unsigned int i;
+	int ret;
 
 	ret = hclge_get_cfg(hdev, &cfg);
 	if (ret) {
@@ -1153,10 +1344,17 @@
 	ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr);
 	hdev->hw.mac.media_type = cfg.media_type;
 	hdev->hw.mac.phy_addr = cfg.phy_addr;
-	hdev->num_desc = cfg.tqp_desc_num;
+	hdev->num_tx_desc = cfg.tqp_desc_num;
+	hdev->num_rx_desc = cfg.tqp_desc_num;
 	hdev->tm_info.num_pg = 1;
 	hdev->tc_max = cfg.tc_num;
 	hdev->tm_info.hw_pfc_map = 0;
+	hdev->wanted_umv_size = cfg.umv_space;
+
+	if (hnae3_dev_fd_supported(hdev)) {
+		hdev->fd_en = true;
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+	}
 
 	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
 	if (ret) {
@@ -1181,7 +1379,7 @@
 		hdev->pfc_max = hdev->tc_max;
 	}
 
-	hdev->tm_info.num_tc = hdev->tc_max;
+	hdev->tm_info.num_tc = 1;
 
 	/* Currently not support uncontiuous tc */
 	for (i = 0; i < hdev->tm_info.num_tc; i++)
@@ -1189,11 +1387,19 @@
 
 	hdev->tx_sch_mode = HCLGE_FLAG_TC_BASE_SCH_MODE;
 
+	hclge_init_kdump_kernel_config(hdev);
+
+	/* Set the init affinity based on pci func number */
+	i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev)));
+	i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
+	cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)),
+			&hdev->affinity_mask);
+
 	return ret;
 }
 
-static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
-			    int tso_mss_max)
+static int hclge_config_tso(struct hclge_dev *hdev, unsigned int tso_mss_min,
+			    unsigned int tso_mss_max)
 {
 	struct hclge_cfg_tso_status_cmd *req;
 	struct hclge_desc desc;
@@ -1216,6 +1422,28 @@
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
+static int hclge_config_gro(struct hclge_dev *hdev, bool en)
+{
+	struct hclge_cfg_gro_status_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	if (!hnae3_dev_gro_supported(hdev))
+		return 0;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GRO_GENERIC_CONFIG, false);
+	req = (struct hclge_cfg_gro_status_cmd *)desc.data;
+
+	req->gro_en = cpu_to_le16(en ? 1 : 0);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"GRO hardware config cmd failed, ret = %d\n", ret);
+
+	return ret;
+}
+
 static int hclge_alloc_tqps(struct hclge_dev *hdev)
 {
 	struct hclge_tqp *tqp;
@@ -1234,7 +1462,8 @@
 
 		tqp->q.ae_algo = &ae_algo;
 		tqp->q.buf_size = hdev->rx_buf_len;
-		tqp->q.desc_num = hdev->num_desc;
+		tqp->q.tx_desc_num = hdev->num_tx_desc;
+		tqp->q.rx_desc_num = hdev->num_rx_desc;
 		tqp->q.io_base = hdev->hw.io_base + HCLGE_TQP_REG_OFFSET +
 			i * HCLGE_TQP_REG_SIZE;
 
@@ -1256,8 +1485,9 @@
 	req = (struct hclge_tqp_map_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(tqp_pid);
 	req->tqp_vf = func_id;
-	req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
-			1 << HCLGE_TQP_MAP_EN_B;
+	req->tqp_flag = 1U << HCLGE_TQP_MAP_EN_B;
+	if (!is_pf)
+		req->tqp_flag |= 1U << HCLGE_TQP_MAP_TYPE_B;
 	req->tqp_vid = cpu_to_le16(tqp_vid);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1267,64 +1497,55 @@
 	return ret;
 }
 
-static int  hclge_assign_tqp(struct hclge_vport *vport)
+static int  hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_dev *hdev = vport->back;
 	int i, alloced;
 
 	for (i = 0, alloced = 0; i < hdev->num_tqps &&
-	     alloced < kinfo->num_tqps; i++) {
+	     alloced < num_tqps; i++) {
 		if (!hdev->htqp[i].alloced) {
 			hdev->htqp[i].q.handle = &vport->nic;
 			hdev->htqp[i].q.tqp_index = alloced;
-			hdev->htqp[i].q.desc_num = kinfo->num_desc;
+			hdev->htqp[i].q.tx_desc_num = kinfo->num_tx_desc;
+			hdev->htqp[i].q.rx_desc_num = kinfo->num_rx_desc;
 			kinfo->tqp[alloced] = &hdev->htqp[i].q;
 			hdev->htqp[i].alloced = true;
 			alloced++;
 		}
 	}
-	vport->alloc_tqps = kinfo->num_tqps;
+	vport->alloc_tqps = alloced;
+	kinfo->rss_size = min_t(u16, hdev->rss_size_max,
+				vport->alloc_tqps / hdev->tm_info.num_tc);
+
+	/* ensure one to one mapping between irq and queue at default */
+	kinfo->rss_size = min_t(u16, kinfo->rss_size,
+				(hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
 
 	return 0;
 }
 
-static int hclge_knic_setup(struct hclge_vport *vport,
-			    u16 num_tqps, u16 num_desc)
+static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps,
+			    u16 num_tx_desc, u16 num_rx_desc)
+
 {
 	struct hnae3_handle *nic = &vport->nic;
 	struct hnae3_knic_private_info *kinfo = &nic->kinfo;
 	struct hclge_dev *hdev = vport->back;
-	int i, ret;
+	int ret;
 
-	kinfo->num_desc = num_desc;
+	kinfo->num_tx_desc = num_tx_desc;
+	kinfo->num_rx_desc = num_rx_desc;
+
 	kinfo->rx_buf_len = hdev->rx_buf_len;
-	kinfo->num_tc = min_t(u16, num_tqps, hdev->tm_info.num_tc);
-	kinfo->rss_size
-		= min_t(u16, hdev->rss_size_max, num_tqps / kinfo->num_tc);
-	kinfo->num_tqps = kinfo->rss_size * kinfo->num_tc;
 
-	for (i = 0; i < HNAE3_MAX_TC; i++) {
-		if (hdev->hw_tc_map & BIT(i)) {
-			kinfo->tc_info[i].enable = true;
-			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
-			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
-			kinfo->tc_info[i].tc = i;
-		} else {
-			/* Set to default queue if TC is disable */
-			kinfo->tc_info[i].enable = false;
-			kinfo->tc_info[i].tqp_offset = 0;
-			kinfo->tc_info[i].tqp_count = 1;
-			kinfo->tc_info[i].tc = 0;
-		}
-	}
-
-	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
+	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
 		return -ENOMEM;
 
-	ret = hclge_assign_tqp(vport);
+	ret = hclge_assign_tqp(vport, num_tqps);
 	if (ret)
 		dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret);
 
@@ -1339,7 +1560,7 @@
 	u16 i;
 
 	kinfo = &nic->kinfo;
-	for (i = 0; i < kinfo->num_tqps; i++) {
+	for (i = 0; i < vport->alloc_tqps; i++) {
 		struct hclge_tqp *q =
 			container_of(kinfo->tqp[i], struct hclge_tqp, q);
 		bool is_pf;
@@ -1374,11 +1595,6 @@
 	return 0;
 }
 
-static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps)
-{
-	/* this would be initialized later */
-}
-
 static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
 {
 	struct hnae3_handle *nic = &vport->nic;
@@ -1389,18 +1605,12 @@
 	nic->ae_algo = &ae_algo;
 	nic->numa_node_mask = hdev->numa_node_mask;
 
-	if (hdev->ae_dev->dev_type == HNAE3_DEV_KNIC) {
-		ret = hclge_knic_setup(vport, num_tqps, hdev->num_desc);
-		if (ret) {
-			dev_err(&hdev->pdev->dev, "knic setup failed %d\n",
-				ret);
-			return ret;
-		}
-	} else {
-		hclge_unic_setup(vport, num_tqps);
-	}
+	ret = hclge_knic_setup(vport, num_tqps,
+			       hdev->num_tx_desc, hdev->num_rx_desc);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "knic setup failed %d\n", ret);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_alloc_vport(struct hclge_dev *hdev)
@@ -1439,6 +1649,12 @@
 	for (i = 0; i < num_vport; i++) {
 		vport->back = hdev;
 		vport->vport_id = i;
+		vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+		vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
+		vport->rxvlan_cfg.rx_vlan_offload_en = true;
+		INIT_LIST_HEAD(&vport->vlan_list);
+		INIT_LIST_HEAD(&vport->uc_mac_list);
+		INIT_LIST_HEAD(&vport->mc_mac_list);
 
 		if (i == 0)
 			ret = hclge_vport_setup(vport, tqp_main_vport);
@@ -1471,7 +1687,7 @@
 	req = (struct hclge_tx_buff_alloc_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
-	for (i = 0; i < HCLGE_TC_NUM; i++) {
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size;
 
 		req->tx_pkt_buff[i] =
@@ -1498,9 +1714,10 @@
 	return ret;
 }
 
-static int hclge_get_tc_num(struct hclge_dev *hdev)
+static u32 hclge_get_tc_num(struct hclge_dev *hdev)
 {
-	int i, cnt = 0;
+	unsigned int i;
+	u32 cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
 		if (hdev->hw_tc_map & BIT(i))
@@ -1508,23 +1725,13 @@
 	return cnt;
 }
 
-static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
-{
-	int i, cnt = 0;
-
-	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
-		if (hdev->hw_tc_map & BIT(i) &&
-		    hdev->tm_info.hw_pfc_map & BIT(i))
-			cnt++;
-	return cnt;
-}
-
 /* Get the number of pfc enabled TCs, which have private buffer */
 static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
 				  struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
-	int i, cnt = 0;
+	unsigned int i;
+	int cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &buf_alloc->priv_buf[i];
@@ -1541,7 +1748,8 @@
 				     struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
-	int i, cnt = 0;
+	unsigned int i;
+	int cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &buf_alloc->priv_buf[i];
@@ -1582,43 +1790,63 @@
 				struct hclge_pkt_buf_alloc *buf_alloc,
 				u32 rx_all)
 {
-	u32 shared_buf_min, shared_buf_tc, shared_std;
-	int tc_num, pfc_enable_num;
-	u32 shared_buf;
+	u32 shared_buf_min, shared_buf_tc, shared_std, hi_thrd, lo_thrd;
+	u32 tc_num = hclge_get_tc_num(hdev);
+	u32 shared_buf, aligned_mps;
 	u32 rx_priv;
 	int i;
 
-	tc_num = hclge_get_tc_num(hdev);
-	pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
+	aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT);
 
 	if (hnae3_dev_dcb_supported(hdev))
-		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+		shared_buf_min = HCLGE_BUF_MUL_BY * aligned_mps +
+					hdev->dv_buf_size;
 	else
-		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV;
+		shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF
+					+ hdev->dv_buf_size;
 
-	shared_buf_tc = pfc_enable_num * hdev->mps +
-			(tc_num - pfc_enable_num) * hdev->mps / 2 +
-			hdev->mps;
-	shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
+	shared_buf_tc = tc_num * aligned_mps + aligned_mps;
+	shared_std = roundup(max_t(u32, shared_buf_min, shared_buf_tc),
+			     HCLGE_BUF_SIZE_UNIT);
 
 	rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
-	if (rx_all <= rx_priv + shared_std)
+	if (rx_all < rx_priv + shared_std)
 		return false;
 
-	shared_buf = rx_all - rx_priv;
+	shared_buf = rounddown(rx_all - rx_priv, HCLGE_BUF_SIZE_UNIT);
 	buf_alloc->s_buf.buf_size = shared_buf;
-	buf_alloc->s_buf.self.high = shared_buf;
-	buf_alloc->s_buf.self.low =  2 * hdev->mps;
+	if (hnae3_dev_dcb_supported(hdev)) {
+		buf_alloc->s_buf.self.high = shared_buf - hdev->dv_buf_size;
+		buf_alloc->s_buf.self.low = buf_alloc->s_buf.self.high
+			- roundup(aligned_mps / HCLGE_BUF_DIV_BY,
+				  HCLGE_BUF_SIZE_UNIT);
+	} else {
+		buf_alloc->s_buf.self.high = aligned_mps +
+						HCLGE_NON_DCB_ADDITIONAL_BUF;
+		buf_alloc->s_buf.self.low = aligned_mps;
+	}
+
+	if (hnae3_dev_dcb_supported(hdev)) {
+		hi_thrd = shared_buf - hdev->dv_buf_size;
+
+		if (tc_num <= NEED_RESERVE_TC_NUM)
+			hi_thrd = hi_thrd * BUF_RESERVE_PERCENT
+					/ BUF_MAX_PERCENT;
+
+		if (tc_num)
+			hi_thrd = hi_thrd / tc_num;
+
+		hi_thrd = max_t(u32, hi_thrd, HCLGE_BUF_MUL_BY * aligned_mps);
+		hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT);
+		lo_thrd = hi_thrd - aligned_mps / HCLGE_BUF_DIV_BY;
+	} else {
+		hi_thrd = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF;
+		lo_thrd = aligned_mps;
+	}
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		if ((hdev->hw_tc_map & BIT(i)) &&
-		    (hdev->tm_info.hw_pfc_map & BIT(i))) {
-			buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
-			buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
-		} else {
-			buf_alloc->s_buf.tc_thrd[i].low = 0;
-			buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
-		}
+		buf_alloc->s_buf.tc_thrd[i].low = lo_thrd;
+		buf_alloc->s_buf.tc_thrd[i].high = hi_thrd;
 	}
 
 	return true;
@@ -1635,13 +1863,14 @@
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
 
-		if (total_size < HCLGE_DEFAULT_TX_BUF)
-			return -ENOMEM;
+		if (hdev->hw_tc_map & BIT(i)) {
+			if (total_size < hdev->tx_buf_size)
+				return -ENOMEM;
 
-		if (hdev->hw_tc_map & BIT(i))
-			priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
-		else
+			priv->tx_buf_size = hdev->tx_buf_size;
+		} else {
 			priv->tx_buf_size = 0;
+		}
 
 		total_size -= priv->tx_buf_size;
 	}
@@ -1649,64 +1878,15 @@
 	return 0;
 }
 
-/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
- * @hdev: pointer to struct hclge_dev
- * @buf_alloc: pointer to buffer calculation data
- * @return: 0: calculate sucessful, negative: fail
- */
-static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
-				struct hclge_pkt_buf_alloc *buf_alloc)
+static bool hclge_rx_buf_calc_all(struct hclge_dev *hdev, bool max,
+				  struct hclge_pkt_buf_alloc *buf_alloc)
 {
-#define HCLGE_BUF_SIZE_UNIT	128
-	u32 rx_all = hdev->pkt_buf_size, aligned_mps;
-	int no_pfc_priv_num, pfc_priv_num;
-	struct hclge_priv_buf *priv;
-	int i;
+	u32 rx_all = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+	u32 aligned_mps = round_up(hdev->mps, HCLGE_BUF_SIZE_UNIT);
+	unsigned int i;
 
-	aligned_mps = round_up(hdev->mps, HCLGE_BUF_SIZE_UNIT);
-	rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
-
-	/* When DCB is not supported, rx private
-	 * buffer is not allocated.
-	 */
-	if (!hnae3_dev_dcb_supported(hdev)) {
-		if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
-			return -ENOMEM;
-
-		return 0;
-	}
-
-	/* step 1, try to alloc private buffer for all enabled tc */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &buf_alloc->priv_buf[i];
-		if (hdev->hw_tc_map & BIT(i)) {
-			priv->enable = 1;
-			if (hdev->tm_info.hw_pfc_map & BIT(i)) {
-				priv->wl.low = aligned_mps;
-				priv->wl.high = priv->wl.low + aligned_mps;
-				priv->buf_size = priv->wl.high +
-						HCLGE_DEFAULT_DV;
-			} else {
-				priv->wl.low = 0;
-				priv->wl.high = 2 * aligned_mps;
-				priv->buf_size = priv->wl.high;
-			}
-		} else {
-			priv->enable = 0;
-			priv->wl.low = 0;
-			priv->wl.high = 0;
-			priv->buf_size = 0;
-		}
-	}
-
-	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
-		return 0;
-
-	/* step 2, try to decrease the buffer size of
-	 * no pfc TC's private buffer
-	 */
-	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &buf_alloc->priv_buf[i];
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
 
 		priv->enable = 0;
 		priv->wl.low = 0;
@@ -1719,31 +1899,35 @@
 		priv->enable = 1;
 
 		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
-			priv->wl.low = 128;
-			priv->wl.high = priv->wl.low + aligned_mps;
-			priv->buf_size = priv->wl.high + HCLGE_DEFAULT_DV;
+			priv->wl.low = max ? aligned_mps : HCLGE_BUF_SIZE_UNIT;
+			priv->wl.high = roundup(priv->wl.low + aligned_mps,
+						HCLGE_BUF_SIZE_UNIT);
 		} else {
 			priv->wl.low = 0;
-			priv->wl.high = aligned_mps;
-			priv->buf_size = priv->wl.high;
+			priv->wl.high = max ? (aligned_mps * HCLGE_BUF_MUL_BY) :
+					aligned_mps;
 		}
+
+		priv->buf_size = priv->wl.high + hdev->dv_buf_size;
 	}
 
-	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
-		return 0;
+	return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
+}
 
-	/* step 3, try to reduce the number of pfc disabled TCs,
-	 * which have private buffer
-	 */
-	/* get the total no pfc enable TC number, which have private buffer */
-	no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
+static bool hclge_drop_nopfc_buf_till_fit(struct hclge_dev *hdev,
+					  struct hclge_pkt_buf_alloc *buf_alloc)
+{
+	u32 rx_all = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+	int no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
+	int i;
 
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
-		priv = &buf_alloc->priv_buf[i];
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+		unsigned int mask = BIT((unsigned int)i);
 
-		if (hdev->hw_tc_map & BIT(i) &&
-		    !(hdev->tm_info.hw_pfc_map & BIT(i))) {
+		if (hdev->hw_tc_map & mask &&
+		    !(hdev->tm_info.hw_pfc_map & mask)) {
 			/* Clear the no pfc TC private buffer */
 			priv->wl.low = 0;
 			priv->wl.high = 0;
@@ -1757,20 +1941,23 @@
 			break;
 	}
 
-	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
-		return 0;
+	return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
+}
 
-	/* step 4, try to reduce the number of pfc enabled TCs
-	 * which have private buffer.
-	 */
-	pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
+static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev,
+					struct hclge_pkt_buf_alloc *buf_alloc)
+{
+	u32 rx_all = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+	int pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
+	int i;
 
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
-		priv = &buf_alloc->priv_buf[i];
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+		unsigned int mask = BIT((unsigned int)i);
 
-		if (hdev->hw_tc_map & BIT(i) &&
-		    hdev->tm_info.hw_pfc_map & BIT(i)) {
+		if (hdev->hw_tc_map & mask &&
+		    hdev->tm_info.hw_pfc_map & mask) {
 			/* Reduce the number of pfc TC with private buffer */
 			priv->wl.low = 0;
 			priv->enable = 0;
@@ -1783,7 +1970,92 @@
 		    pfc_priv_num == 0)
 			break;
 	}
-	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
+
+	return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
+}
+
+static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
+				      struct hclge_pkt_buf_alloc *buf_alloc)
+{
+#define COMPENSATE_BUFFER	0x3C00
+#define COMPENSATE_HALF_MPS_NUM	5
+#define PRIV_WL_GAP		0x1800
+
+	u32 rx_priv = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+	u32 tc_num = hclge_get_tc_num(hdev);
+	u32 half_mps = hdev->mps >> 1;
+	u32 min_rx_priv;
+	unsigned int i;
+
+	if (tc_num)
+		rx_priv = rx_priv / tc_num;
+
+	if (tc_num <= NEED_RESERVE_TC_NUM)
+		rx_priv = rx_priv * BUF_RESERVE_PERCENT / BUF_MAX_PERCENT;
+
+	min_rx_priv = hdev->dv_buf_size + COMPENSATE_BUFFER +
+			COMPENSATE_HALF_MPS_NUM * half_mps;
+	min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT);
+	rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT);
+
+	if (rx_priv < min_rx_priv)
+		return false;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+
+		priv->enable = 0;
+		priv->wl.low = 0;
+		priv->wl.high = 0;
+		priv->buf_size = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		priv->enable = 1;
+		priv->buf_size = rx_priv;
+		priv->wl.high = rx_priv - hdev->dv_buf_size;
+		priv->wl.low = priv->wl.high - PRIV_WL_GAP;
+	}
+
+	buf_alloc->s_buf.buf_size = 0;
+
+	return true;
+}
+
+/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
+ * @hdev: pointer to struct hclge_dev
+ * @buf_alloc: pointer to buffer calculation data
+ * @return: 0: calculate sucessful, negative: fail
+ */
+static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
+				struct hclge_pkt_buf_alloc *buf_alloc)
+{
+	/* When DCB is not supported, rx private buffer is not allocated. */
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		u32 rx_all = hdev->pkt_buf_size;
+
+		rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
+		if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
+			return -ENOMEM;
+
+		return 0;
+	}
+
+	if (hclge_only_alloc_priv_buff(hdev, buf_alloc))
+		return 0;
+
+	if (hclge_rx_buf_calc_all(hdev, true, buf_alloc))
+		return 0;
+
+	/* try to decrease the buffer size */
+	if (hclge_rx_buf_calc_all(hdev, false, buf_alloc))
+		return 0;
+
+	if (hclge_drop_nopfc_buf_till_fit(hdev, buf_alloc))
+		return 0;
+
+	if (hclge_drop_pfc_buf_till_fit(hdev, buf_alloc))
 		return 0;
 
 	return -ENOMEM;
@@ -2029,7 +2301,8 @@
 	int vectors;
 	int i;
 
-	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+	vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+					hdev->num_msi,
 					PCI_IRQ_MSI | PCI_IRQ_MSIX);
 	if (vectors < 0) {
 		dev_err(&pdev->dev,
@@ -2044,6 +2317,7 @@
 
 	hdev->num_msi = vectors;
 	hdev->num_msi_left = vectors;
+
 	hdev->base_msi_vector = pdev->irq;
 	hdev->roce_base_vector = hdev->base_msi_vector +
 				hdev->roce_base_msix_offset;
@@ -2068,19 +2342,16 @@
 	return 0;
 }
 
-static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed)
+static u8 hclge_check_speed_dup(u8 duplex, int speed)
 {
-	struct hclge_mac *mac = &hdev->hw.mac;
+	if (!(speed == HCLGE_MAC_SPEED_10M || speed == HCLGE_MAC_SPEED_100M))
+		duplex = HCLGE_MAC_FULL;
 
-	if ((speed == HCLGE_MAC_SPEED_10M) || (speed == HCLGE_MAC_SPEED_100M))
-		mac->duplex = (u8)duplex;
-	else
-		mac->duplex = HCLGE_MAC_FULL;
-
-	mac->speed = speed;
+	return duplex;
 }
 
-int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
+static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
+				      u8 duplex)
 {
 	struct hclge_config_mac_speed_dup_cmd *req;
 	struct hclge_desc desc;
@@ -2090,7 +2361,8 @@
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
 
-	hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
+	if (duplex)
+		hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, 1);
 
 	switch (speed) {
 	case HCLGE_MAC_SPEED_10M:
@@ -2140,7 +2412,23 @@
 		return ret;
 	}
 
-	hclge_check_speed_dup(hdev, duplex, speed);
+	return 0;
+}
+
+int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
+{
+	int ret;
+
+	duplex = hclge_check_speed_dup(duplex, speed);
+	if (hdev->hw.mac.speed == speed && hdev->hw.mac.duplex == duplex)
+		return 0;
+
+	ret = hclge_cfg_mac_speed_dup_hw(hdev, speed, duplex);
+	if (ret)
+		return ret;
+
+	hdev->hw.mac.speed = speed;
+	hdev->hw.mac.duplex = duplex;
 
 	return 0;
 }
@@ -2154,37 +2442,6 @@
 	return hclge_cfg_mac_speed_dup(hdev, speed, duplex);
 }
 
-static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
-					u8 *duplex)
-{
-	struct hclge_query_an_speed_dup_cmd *req;
-	struct hclge_desc desc;
-	int speed_tmp;
-	int ret;
-
-	req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"mac speed/autoneg/duplex query cmd failed %d\n",
-			ret);
-		return ret;
-	}
-
-	*duplex = hnae3_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_DUPLEX_B);
-	speed_tmp = hnae3_get_field(req->an_syn_dup_speed, HCLGE_QUERY_SPEED_M,
-				    HCLGE_QUERY_SPEED_S);
-
-	ret = hclge_parse_speed(speed_tmp, speed);
-	if (ret)
-		dev_err(&hdev->pdev->dev,
-			"could not parse speed(=%d), %d\n", speed_tmp, ret);
-
-	return ret;
-}
-
 static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
 {
 	struct hclge_config_auto_neg_cmd *req;
@@ -2195,7 +2452,8 @@
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
 
 	req = (struct hclge_config_auto_neg_cmd *)desc.data;
-	hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+	if (enable)
+		hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, 1U);
 	req->cfg_an_cmd_flag = cpu_to_le32(flag);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2211,6 +2469,16 @@
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
+	if (!hdev->hw.mac.support_autoneg) {
+		if (enable) {
+			dev_err(&hdev->pdev->dev,
+				"autoneg is not supported by current port\n");
+			return -EOPNOTSUPP;
+		} else {
+			return 0;
+		}
+	}
+
 	return hclge_set_autoneg_en(hdev, enable);
 }
 
@@ -2226,114 +2494,169 @@
 	return hdev->hw.mac.autoneg;
 }
 
-static int hclge_set_default_mac_vlan_mask(struct hclge_dev *hdev,
-					   bool mask_vlan,
-					   u8 *mac_mask)
+static int hclge_restart_autoneg(struct hnae3_handle *handle)
 {
-	struct hclge_mac_vlan_mask_entry_cmd *req;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	dev_dbg(&hdev->pdev->dev, "restart autoneg\n");
+
+	ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		return ret;
+	return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+}
+
+static int hclge_halt_autoneg(struct hnae3_handle *handle, bool halt)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->hw.mac.support_autoneg && hdev->hw.mac.autoneg)
+		return hclge_set_autoneg_en(hdev, !halt);
+
+	return 0;
+}
+
+static int hclge_set_fec_hw(struct hclge_dev *hdev, u32 fec_mode)
+{
+	struct hclge_config_fec_cmd *req;
 	struct hclge_desc desc;
-	int status;
+	int ret;
 
-	req = (struct hclge_mac_vlan_mask_entry_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_MASK_SET, false);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_FEC_MODE, false);
 
-	hnae3_set_bit(req->vlan_mask, HCLGE_VLAN_MASK_EN_B,
-		      mask_vlan ? 1 : 0);
-	ether_addr_copy(req->mac_mask, mac_mask);
+	req = (struct hclge_config_fec_cmd *)desc.data;
+	if (fec_mode & BIT(HNAE3_FEC_AUTO))
+		hnae3_set_bit(req->fec_mode, HCLGE_MAC_CFG_FEC_AUTO_EN_B, 1);
+	if (fec_mode & BIT(HNAE3_FEC_RS))
+		hnae3_set_field(req->fec_mode, HCLGE_MAC_CFG_FEC_MODE_M,
+				HCLGE_MAC_CFG_FEC_MODE_S, HCLGE_MAC_FEC_RS);
+	if (fec_mode & BIT(HNAE3_FEC_BASER))
+		hnae3_set_field(req->fec_mode, HCLGE_MAC_CFG_FEC_MODE_M,
+				HCLGE_MAC_CFG_FEC_MODE_S, HCLGE_MAC_FEC_BASER);
 
-	status = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"Config mac_vlan_mask failed for cmd_send, ret =%d\n",
-			status);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "set fec mode failed %d.\n", ret);
 
-	return status;
+	return ret;
+}
+
+static int hclge_set_fec(struct hnae3_handle *handle, u32 fec_mode)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac *mac = &hdev->hw.mac;
+	int ret;
+
+	if (fec_mode && !(mac->fec_ability & fec_mode)) {
+		dev_err(&hdev->pdev->dev, "unsupported fec mode\n");
+		return -EINVAL;
+	}
+
+	ret = hclge_set_fec_hw(hdev, fec_mode);
+	if (ret)
+		return ret;
+
+	mac->user_fec_mode = fec_mode | BIT(HNAE3_FEC_USER_DEF);
+	return 0;
+}
+
+static void hclge_get_fec(struct hnae3_handle *handle, u8 *fec_ability,
+			  u8 *fec_mode)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac *mac = &hdev->hw.mac;
+
+	if (fec_ability)
+		*fec_ability = mac->fec_ability;
+	if (fec_mode)
+		*fec_mode = mac->fec_mode;
 }
 
 static int hclge_mac_init(struct hclge_dev *hdev)
 {
-	struct hnae3_handle *handle = &hdev->vport[0].nic;
-	struct net_device *netdev = handle->kinfo.netdev;
 	struct hclge_mac *mac = &hdev->hw.mac;
-	u8 mac_mask[ETH_ALEN] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-	struct hclge_vport *vport;
-	int mtu;
 	int ret;
-	int i;
 
-	ret = hclge_cfg_mac_speed_dup(hdev, hdev->hw.mac.speed, HCLGE_MAC_FULL);
+	hdev->support_sfp_query = true;
+	hdev->hw.mac.duplex = HCLGE_MAC_FULL;
+	ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
+					 hdev->hw.mac.duplex);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Config mac speed dup fail ret=%d\n", ret);
 		return ret;
 	}
 
-	mac->link = 0;
-
-	/* Initialize the MTA table work mode */
-	hdev->enable_mta	= true;
-	hdev->mta_mac_sel_type	= HCLGE_MAC_ADDR_47_36;
-
-	ret = hclge_set_mta_filter_mode(hdev,
-					hdev->mta_mac_sel_type,
-					hdev->enable_mta);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "set mta filter mode failed %d\n",
-			ret);
-		return ret;
-	}
-
-	for (i = 0; i < hdev->num_alloc_vport; i++) {
-		vport = &hdev->vport[i];
-		vport->accept_mta_mc = false;
-
-		memset(vport->mta_shadow, 0, sizeof(vport->mta_shadow));
-		ret = hclge_cfg_func_mta_filter(hdev, vport->vport_id, false);
+	if (hdev->hw.mac.support_autoneg) {
+		ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"set mta filter mode fail ret=%d\n", ret);
+				"Config mac autoneg fail ret=%d\n", ret);
 			return ret;
 		}
 	}
 
-	ret = hclge_set_default_mac_vlan_mask(hdev, true, mac_mask);
+	mac->link = 0;
+
+	if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
+		ret = hclge_set_fec_hw(hdev, mac->user_fec_mode);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Fec mode init fail, ret = %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = hclge_set_mac_mtu(hdev, hdev->mps);
 	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"set default mac_vlan_mask fail ret=%d\n", ret);
+		dev_err(&hdev->pdev->dev, "set mtu failed ret=%d\n", ret);
 		return ret;
 	}
 
-	if (netdev)
-		mtu = netdev->mtu;
-	else
-		mtu = ETH_DATA_LEN;
+	ret = hclge_set_default_loopback(hdev);
+	if (ret)
+		return ret;
 
-	ret = hclge_set_mtu(handle, mtu);
+	ret = hclge_buffer_alloc(hdev);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"set mtu failed ret=%d\n", ret);
+			"allocate buffer fail, ret=%d\n", ret);
 
 	return ret;
 }
 
 static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
-		schedule_work(&hdev->mbx_service_task);
+	if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
+		queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
+			      &hdev->mbx_service_task);
 }
 
 static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
-		schedule_work(&hdev->rst_service_task);
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
+		queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
+			      &hdev->rst_service_task);
 }
 
-static void hclge_task_schedule(struct hclge_dev *hdev)
+void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
 {
 	if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
 	    !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
-	    !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
-		(void)schedule_work(&hdev->service_task);
+	    !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) {
+		hdev->hw_stats.stats_timer++;
+		hdev->fd_arfs_expire_timer++;
+		mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+				    system_wq, &hdev->service_task,
+				    delay_time);
+	}
 }
 
 static int hclge_get_mac_link_status(struct hclge_dev *hdev)
@@ -2359,7 +2682,7 @@
 
 static int hclge_get_mac_phy_link(struct hclge_dev *hdev)
 {
-	int mac_state;
+	unsigned int mac_state;
 	int link_stat;
 
 	if (test_bit(HCLGE_STATE_DOWN, &hdev->state))
@@ -2368,7 +2691,7 @@
 	mac_state = hclge_get_mac_link_status(hdev);
 
 	if (hdev->hw.mac.phydev) {
-		if (!genphy_read_status(hdev->hw.mac.phydev))
+		if (hdev->hw.mac.phydev->state == PHY_RUNNING)
 			link_stat = mac_state &
 				hdev->hw.mac.phydev->link;
 		else
@@ -2383,7 +2706,9 @@
 
 static void hclge_update_link_status(struct hclge_dev *hdev)
 {
+	struct hnae3_client *rclient = hdev->roce_client;
 	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *rhandle;
 	struct hnae3_handle *handle;
 	int state;
 	int i;
@@ -2395,49 +2720,145 @@
 		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 			handle = &hdev->vport[i].nic;
 			client->ops->link_status_change(handle, state);
+			hclge_config_mac_tnl_int(hdev, state);
+			rhandle = &hdev->vport[i].roce;
+			if (rclient && rclient->ops->link_status_change)
+				rclient->ops->link_status_change(rhandle,
+								 state);
 		}
 		hdev->hw.mac.link = state;
 	}
 }
 
-static int hclge_update_speed_duplex(struct hclge_dev *hdev)
+static void hclge_update_port_capability(struct hclge_mac *mac)
 {
-	struct hclge_mac mac = hdev->hw.mac;
-	u8 duplex;
-	int speed;
+	/* update fec ability by speed */
+	hclge_convert_setting_fec(mac);
+
+	/* firmware can not identify back plane type, the media type
+	 * read from configuration can help deal it
+	 */
+	if (mac->media_type == HNAE3_MEDIA_TYPE_BACKPLANE &&
+	    mac->module_type == HNAE3_MODULE_TYPE_UNKNOWN)
+		mac->module_type = HNAE3_MODULE_TYPE_KR;
+	else if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
+		mac->module_type = HNAE3_MODULE_TYPE_TP;
+
+	if (mac->support_autoneg == true) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mac->supported);
+		linkmode_copy(mac->advertising, mac->supported);
+	} else {
+		linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+				   mac->supported);
+		linkmode_zero(mac->advertising);
+	}
+}
+
+static int hclge_get_sfp_speed(struct hclge_dev *hdev, u32 *speed)
+{
+	struct hclge_sfp_info_cmd *resp;
+	struct hclge_desc desc;
 	int ret;
 
-	/* get the speed and duplex as autoneg'result from mac cmd when phy
-	 * doesn't exit.
-	 */
-	if (mac.phydev || !mac.autoneg)
-		return 0;
-
-	ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"mac autoneg/speed/duplex query failed %d\n", ret);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GET_SFP_INFO, true);
+	resp = (struct hclge_sfp_info_cmd *)desc.data;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret == -EOPNOTSUPP) {
+		dev_warn(&hdev->pdev->dev,
+			 "IMP do not support get SFP speed %d\n", ret);
+		return ret;
+	} else if (ret) {
+		dev_err(&hdev->pdev->dev, "get sfp speed failed %d\n", ret);
 		return ret;
 	}
 
-	if ((mac.speed != speed) || (mac.duplex != duplex)) {
-		ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"mac speed/duplex config failed %d\n", ret);
-			return ret;
-		}
+	*speed = le32_to_cpu(resp->speed);
+
+	return 0;
+}
+
+static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
+{
+	struct hclge_sfp_info_cmd *resp;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GET_SFP_INFO, true);
+	resp = (struct hclge_sfp_info_cmd *)desc.data;
+
+	resp->query_type = QUERY_ACTIVE_SPEED;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret == -EOPNOTSUPP) {
+		dev_warn(&hdev->pdev->dev,
+			 "IMP does not support get SFP info %d\n", ret);
+		return ret;
+	} else if (ret) {
+		dev_err(&hdev->pdev->dev, "get sfp info failed %d\n", ret);
+		return ret;
+	}
+
+	mac->speed = le32_to_cpu(resp->speed);
+	/* if resp->speed_ability is 0, it means it's an old version
+	 * firmware, do not update these params
+	 */
+	if (resp->speed_ability) {
+		mac->module_type = le32_to_cpu(resp->module_type);
+		mac->speed_ability = le32_to_cpu(resp->speed_ability);
+		mac->autoneg = resp->autoneg;
+		mac->support_autoneg = resp->autoneg_ability;
+		mac->speed_type = QUERY_ACTIVE_SPEED;
+		if (!resp->active_fec)
+			mac->fec_mode = 0;
+		else
+			mac->fec_mode = BIT(resp->active_fec);
+	} else {
+		mac->speed_type = QUERY_SFP_SPEED;
 	}
 
 	return 0;
 }
 
-static int hclge_update_speed_duplex_h(struct hnae3_handle *handle)
+static int hclge_update_port_info(struct hclge_dev *hdev)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac *mac = &hdev->hw.mac;
+	int speed = HCLGE_MAC_SPEED_UNKNOWN;
+	int ret;
 
-	return hclge_update_speed_duplex(hdev);
+	/* get the port info from SFP cmd if not copper port */
+	if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
+		return 0;
+
+	/* if IMP does not support get SFP/qSFP info, return directly */
+	if (!hdev->support_sfp_query)
+		return 0;
+
+	if (hdev->pdev->revision >= 0x21)
+		ret = hclge_get_sfp_info(hdev, mac);
+	else
+		ret = hclge_get_sfp_speed(hdev, &speed);
+
+	if (ret == -EOPNOTSUPP) {
+		hdev->support_sfp_query = false;
+		return ret;
+	} else if (ret) {
+		return ret;
+	}
+
+	if (hdev->pdev->revision >= 0x21) {
+		if (mac->speed_type == QUERY_ACTIVE_SPEED) {
+			hclge_update_port_capability(mac);
+			return 0;
+		}
+		return hclge_cfg_mac_speed_dup(hdev, mac->speed,
+					       HCLGE_MAC_FULL);
+	} else {
+		if (speed == HCLGE_MAC_SPEED_UNKNOWN)
+			return 0; /* do nothing if no SFP */
+
+		/* must config full duplex for SFP */
+		return hclge_cfg_mac_speed_dup(hdev, speed, HCLGE_MAC_FULL);
+	}
 }
 
 static int hclge_get_status(struct hnae3_handle *handle)
@@ -2450,59 +2871,48 @@
 	return hdev->hw.mac.link;
 }
 
-static void hclge_service_timer(struct timer_list *t)
-{
-	struct hclge_dev *hdev = from_timer(hdev, t, service_timer);
-
-	mod_timer(&hdev->service_timer, jiffies + HZ);
-	hdev->hw_stats.stats_timer++;
-	hclge_task_schedule(hdev);
-}
-
-static void hclge_service_complete(struct hclge_dev *hdev)
-{
-	WARN_ON(!test_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state));
-
-	/* Flush memory before next watchdog */
-	smp_mb__before_atomic();
-	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
-}
-
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
-	u32 rst_src_reg;
-	u32 cmdq_src_reg;
+	u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
 
 	/* fetch the events from their corresponding regs */
 	rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
 	cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
+	msix_src_reg = hclge_read_dev(&hdev->hw,
+				      HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
 
 	/* Assumption: If by any chance reset and mailbox events are reported
 	 * together then we will only process reset event in this go and will
 	 * defer the processing of the mailbox events. Since, we would have not
 	 * cleared RX CMDQ event this time we would receive again another
 	 * interrupt from H/W just for the mailbox.
+	 *
+	 * check for vector0 reset event sources
 	 */
+	if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_src_reg) {
+		dev_info(&hdev->pdev->dev, "IMP reset interrupt\n");
+		set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
+		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+		*clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
+		hdev->rst_stats.imp_rst_cnt++;
+		return HCLGE_VECTOR0_EVENT_RST;
+	}
 
-	/* check for vector0 reset event sources */
 	if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+		dev_info(&hdev->pdev->dev, "global reset interrupt\n");
 		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
 		*clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
+		hdev->rst_stats.global_rst_cnt++;
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
-	if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
-		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
-		set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
-		*clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
-		return HCLGE_VECTOR0_EVENT_RST;
-	}
-
-	if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_src_reg) {
-		set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
-		*clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
-		return HCLGE_VECTOR0_EVENT_RST;
+	/* check for vector0 msix event source */
+	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
+		dev_info(&hdev->pdev->dev, "received event 0x%x\n",
+			 msix_src_reg);
+		*clearval = msix_src_reg;
+		return HCLGE_VECTOR0_EVENT_ERR;
 	}
 
 	/* check for vector0 mailbox(=CMDQ RX) event source */
@@ -2512,6 +2922,12 @@
 		return HCLGE_VECTOR0_EVENT_MBX;
 	}
 
+	/* print other vector0 event source */
+	dev_info(&hdev->pdev->dev,
+		 "CMDQ INT status:0x%x, other INT status:0x%x\n",
+		 cmdq_src_reg, msix_src_reg);
+	*clearval = msix_src_reg;
+
 	return HCLGE_VECTOR0_EVENT_OTHER;
 }
 
@@ -2525,6 +2941,8 @@
 	case HCLGE_VECTOR0_EVENT_MBX:
 		hclge_write_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG, regclr);
 		break;
+	default:
+		break;
 	}
 }
 
@@ -2545,14 +2963,27 @@
 static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 {
 	struct hclge_dev *hdev = data;
+	u32 clearval = 0;
 	u32 event_cause;
-	u32 clearval;
 
 	hclge_enable_vector(&hdev->misc_vector, false);
 	event_cause = hclge_check_event_cause(hdev, &clearval);
 
 	/* vector 0 interrupt is shared with reset and mailbox source events.*/
 	switch (event_cause) {
+	case HCLGE_VECTOR0_EVENT_ERR:
+		/* we do not know what type of reset is required now. This could
+		 * only be decided after we fetch the type of errors which
+		 * caused this event. Therefore, we will do below for now:
+		 * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we
+		 *    have defered type of reset to be used.
+		 * 2. Schedule the reset serivce task.
+		 * 3. When service task receives  HNAE3_UNKNOWN_RESET type it
+		 *    will fetch the correct type of reset.  This would be done
+		 *    by first decoding the types of errors.
+		 */
+		set_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request);
+		/* fall through */
 	case HCLGE_VECTOR0_EVENT_RST:
 		hclge_reset_task_schedule(hdev);
 		break;
@@ -2574,9 +3005,15 @@
 		break;
 	}
 
-	/* clear the source of interrupt if it is not cause by reset */
-	if (event_cause != HCLGE_VECTOR0_EVENT_RST) {
-		hclge_clear_event_cause(hdev, event_cause, clearval);
+	hclge_clear_event_cause(hdev, event_cause, clearval);
+
+	/* Enable interrupt if it is not cause by reset. And when
+	 * clearval equal to 0, it means interrupt status may be
+	 * cleared by hardware before driver reads status register.
+	 * For this case, vector0 interrupt also should be enabled.
+	 */
+	if (!clearval ||
+	    event_cause == HCLGE_VECTOR0_EVENT_MBX) {
 		hclge_enable_vector(&hdev->misc_vector, true);
 	}
 
@@ -2609,6 +3046,36 @@
 	hdev->num_msi_used += 1;
 }
 
+static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify,
+				      const cpumask_t *mask)
+{
+	struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
+					      affinity_notify);
+
+	cpumask_copy(&hdev->affinity_mask, mask);
+}
+
+static void hclge_irq_affinity_release(struct kref *ref)
+{
+}
+
+static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
+{
+	irq_set_affinity_hint(hdev->misc_vector.vector_irq,
+			      &hdev->affinity_mask);
+
+	hdev->affinity_notify.notify = hclge_irq_affinity_notify;
+	hdev->affinity_notify.release = hclge_irq_affinity_release;
+	irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
+				  &hdev->affinity_notify);
+}
+
+static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
+{
+	irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
+	irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
+}
+
 static int hclge_misc_irq_init(struct hclge_dev *hdev)
 {
 	int ret;
@@ -2633,12 +3100,15 @@
 	hclge_free_vector(hdev, 0);
 }
 
-static int hclge_notify_client(struct hclge_dev *hdev,
-			       enum hnae3_reset_notify_type type)
+int hclge_notify_client(struct hclge_dev *hdev,
+			enum hnae3_reset_notify_type type)
 {
 	struct hnae3_client *client = hdev->nic_client;
 	u16 i;
 
+	if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client)
+		return 0;
+
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
@@ -2647,33 +3117,66 @@
 		int ret;
 
 		ret = client->ops->reset_notify(handle, type);
-		if (ret)
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"notify nic client failed %d(%d)\n", type, ret);
 			return ret;
+		}
 	}
 
 	return 0;
 }
 
+static int hclge_notify_roce_client(struct hclge_dev *hdev,
+				    enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->roce_client;
+	int ret = 0;
+	u16 i;
+
+	if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client)
+		return 0;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		struct hnae3_handle *handle = &hdev->vport[i].roce;
+
+		ret = client->ops->reset_notify(handle, type);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"notify roce client failed %d(%d)",
+				type, ret);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
 static int hclge_reset_wait(struct hclge_dev *hdev)
 {
 #define HCLGE_RESET_WATI_MS	100
-#define HCLGE_RESET_WAIT_CNT	5
+#define HCLGE_RESET_WAIT_CNT	200
 	u32 val, reg, reg_bit;
 	u32 cnt = 0;
 
 	switch (hdev->reset_type) {
+	case HNAE3_IMP_RESET:
+		reg = HCLGE_GLOBAL_RESET_REG;
+		reg_bit = HCLGE_IMP_RESET_BIT;
+		break;
 	case HNAE3_GLOBAL_RESET:
 		reg = HCLGE_GLOBAL_RESET_REG;
 		reg_bit = HCLGE_GLOBAL_RESET_BIT;
 		break;
-	case HNAE3_CORE_RESET:
-		reg = HCLGE_GLOBAL_RESET_REG;
-		reg_bit = HCLGE_CORE_RESET_BIT;
-		break;
 	case HNAE3_FUNC_RESET:
 		reg = HCLGE_FUN_RST_ING;
 		reg_bit = HCLGE_FUN_RST_ING_B;
 		break;
+	case HNAE3_FLR_RESET:
+		break;
 	default:
 		dev_err(&hdev->pdev->dev,
 			"Wait for unsupported reset type: %d\n",
@@ -2681,6 +3184,20 @@
 		return -EINVAL;
 	}
 
+	if (hdev->reset_type == HNAE3_FLR_RESET) {
+		while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
+		       cnt++ < HCLGE_RESET_WAIT_CNT)
+			msleep(HCLGE_RESET_WATI_MS);
+
+		if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
+			dev_err(&hdev->pdev->dev,
+				"flr wait timeout: %d\n", cnt);
+			return -EBUSY;
+		}
+
+		return 0;
+	}
+
 	val = hclge_read_dev(&hdev->hw, reg);
 	while (hnae3_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
 		msleep(HCLGE_RESET_WATI_MS);
@@ -2697,6 +3214,120 @@
 	return 0;
 }
 
+static int hclge_set_vf_rst(struct hclge_dev *hdev, int func_id, bool reset)
+{
+	struct hclge_vf_rst_cmd *req;
+	struct hclge_desc desc;
+
+	req = (struct hclge_vf_rst_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GBL_RST_STATUS, false);
+	req->dest_vfid = func_id;
+
+	if (reset)
+		req->vf_rst = 0x1;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
+{
+	int i;
+
+	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+		int ret;
+
+		/* Send cmd to set/clear VF's FUNC_RST_ING */
+		ret = hclge_set_vf_rst(hdev, vport->vport_id, reset);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"set vf(%d) rst failed %d!\n",
+				vport->vport_id, ret);
+			return ret;
+		}
+
+		if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+			continue;
+
+		/* Inform VF to process the reset.
+		 * hclge_inform_reset_assert_to_vf may fail if VF
+		 * driver is not loaded.
+		 */
+		ret = hclge_inform_reset_assert_to_vf(vport);
+		if (ret)
+			dev_warn(&hdev->pdev->dev,
+				 "inform reset to vf(%d) failed %d!\n",
+				 vport->vport_id, ret);
+	}
+
+	return 0;
+}
+
+static int hclge_func_reset_sync_vf(struct hclge_dev *hdev)
+{
+	struct hclge_pf_rst_sync_cmd *req;
+	struct hclge_desc desc;
+	int cnt = 0;
+	int ret;
+
+	req = (struct hclge_pf_rst_sync_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true);
+
+	do {
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		/* for compatible with old firmware, wait
+		 * 100 ms for VF to stop IO
+		 */
+		if (ret == -EOPNOTSUPP) {
+			msleep(HCLGE_RESET_SYNC_TIME);
+			return 0;
+		} else if (ret) {
+			dev_err(&hdev->pdev->dev, "sync with VF fail %d!\n",
+				ret);
+			return ret;
+		} else if (req->all_vf_ready) {
+			return 0;
+		}
+		msleep(HCLGE_PF_RESET_SYNC_TIME);
+		hclge_cmd_reuse_desc(&desc, true);
+	} while (cnt++ < HCLGE_PF_RESET_SYNC_CNT);
+
+	dev_err(&hdev->pdev->dev, "sync with VF timeout!\n");
+	return -ETIME;
+}
+
+void hclge_report_hw_error(struct hclge_dev *hdev,
+			   enum hnae3_hw_error_type type)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	u16 i;
+
+	if (!client || !client->ops->process_hw_error ||
+	    !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
+		return;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++)
+		client->ops->process_hw_error(&hdev->vport[i].nic, type);
+}
+
+static void hclge_handle_imp_error(struct hclge_dev *hdev)
+{
+	u32 reg_val;
+
+	reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+	if (reg_val & BIT(HCLGE_VECTOR0_IMP_RD_POISON_B)) {
+		hclge_report_hw_error(hdev, HNAE3_IMP_RD_POISON_ERROR);
+		reg_val &= ~BIT(HCLGE_VECTOR0_IMP_RD_POISON_B);
+		hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
+	}
+
+	if (reg_val & BIT(HCLGE_VECTOR0_IMP_CMDQ_ERR_B)) {
+		hclge_report_hw_error(hdev, HNAE3_CMDQ_ECC_ERROR);
+		reg_val &= ~BIT(HCLGE_VECTOR0_IMP_CMDQ_ERR_B);
+		hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
+	}
+}
+
 int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
 {
 	struct hclge_desc desc;
@@ -2717,9 +3348,18 @@
 
 static void hclge_do_reset(struct hclge_dev *hdev)
 {
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct pci_dev *pdev = hdev->pdev;
 	u32 val;
 
+	if (hclge_get_hw_reset_stat(handle)) {
+		dev_info(&pdev->dev, "Hardware reset not finish\n");
+		dev_info(&pdev->dev, "func_rst_reg:0x%x, global_rst_reg:0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING),
+			 hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
+		return;
+	}
+
 	switch (hdev->reset_type) {
 	case HNAE3_GLOBAL_RESET:
 		val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
@@ -2727,19 +3367,18 @@
 		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
 		dev_info(&pdev->dev, "Global Reset requested\n");
 		break;
-	case HNAE3_CORE_RESET:
-		val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
-		hnae3_set_bit(val, HCLGE_CORE_RESET_BIT, 1);
-		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
-		dev_info(&pdev->dev, "Core Reset requested\n");
-		break;
 	case HNAE3_FUNC_RESET:
 		dev_info(&pdev->dev, "PF Reset requested\n");
-		hclge_func_reset_cmd(hdev, 0);
 		/* schedule again to check later */
 		set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
 		hclge_reset_task_schedule(hdev);
 		break;
+	case HNAE3_FLR_RESET:
+		dev_info(&pdev->dev, "FLR requested\n");
+		/* schedule again to check later */
+		set_bit(HNAE3_FLR_RESET, &hdev->reset_pending);
+		hclge_reset_task_schedule(hdev);
+		break;
 	default:
 		dev_warn(&pdev->dev,
 			 "Unsupported reset type: %d\n", hdev->reset_type);
@@ -2747,26 +3386,50 @@
 	}
 }
 
-static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
+static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 						   unsigned long *addr)
 {
 	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
+	struct hclge_dev *hdev = ae_dev->priv;
+
+	/* first, resolve any unknown reset type to the known type(s) */
+	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
+		/* we will intentionally ignore any errors from this function
+		 *  as we will end up in *some* reset request in any case
+		 */
+		hclge_handle_hw_msix_error(hdev, addr);
+		clear_bit(HNAE3_UNKNOWN_RESET, addr);
+		/* We defered the clearing of the error event which caused
+		 * interrupt since it was not posssible to do that in
+		 * interrupt context (and this is the reason we introduced
+		 * new UNKNOWN reset type). Now, the errors have been
+		 * handled and cleared in hardware we can safely enable
+		 * interrupts. This is an exception to the norm.
+		 */
+		hclge_enable_vector(&hdev->misc_vector, true);
+	}
 
 	/* return the highest priority reset level amongst all */
-	if (test_bit(HNAE3_GLOBAL_RESET, addr))
-		rst_level = HNAE3_GLOBAL_RESET;
-	else if (test_bit(HNAE3_CORE_RESET, addr))
-		rst_level = HNAE3_CORE_RESET;
-	else if (test_bit(HNAE3_IMP_RESET, addr))
+	if (test_bit(HNAE3_IMP_RESET, addr)) {
 		rst_level = HNAE3_IMP_RESET;
-	else if (test_bit(HNAE3_FUNC_RESET, addr))
+		clear_bit(HNAE3_IMP_RESET, addr);
+		clear_bit(HNAE3_GLOBAL_RESET, addr);
+		clear_bit(HNAE3_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_GLOBAL_RESET, addr)) {
+		rst_level = HNAE3_GLOBAL_RESET;
+		clear_bit(HNAE3_GLOBAL_RESET, addr);
+		clear_bit(HNAE3_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_FUNC_RESET, addr)) {
 		rst_level = HNAE3_FUNC_RESET;
+		clear_bit(HNAE3_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_FLR_RESET, addr)) {
+		rst_level = HNAE3_FLR_RESET;
+		clear_bit(HNAE3_FLR_RESET, addr);
+	}
 
-	/* now, clear all other resets */
-	clear_bit(HNAE3_GLOBAL_RESET, addr);
-	clear_bit(HNAE3_CORE_RESET, addr);
-	clear_bit(HNAE3_IMP_RESET, addr);
-	clear_bit(HNAE3_FUNC_RESET, addr);
+	if (hdev->reset_type != HNAE3_NONE_RESET &&
+	    rst_level < hdev->reset_type)
+		return HNAE3_NONE_RESET;
 
 	return rst_level;
 }
@@ -2782,9 +3445,6 @@
 	case HNAE3_GLOBAL_RESET:
 		clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
 		break;
-	case HNAE3_CORE_RESET:
-		clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
-		break;
 	default:
 		break;
 	}
@@ -2792,45 +3452,318 @@
 	if (!clearval)
 		return;
 
-	hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, clearval);
+	/* For revision 0x20, the reset interrupt source
+	 * can only be cleared after hardware reset done
+	 */
+	if (hdev->pdev->revision == 0x20)
+		hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG,
+				clearval);
+
 	hclge_enable_vector(&hdev->misc_vector, true);
 }
 
+static int hclge_reset_prepare_down(struct hclge_dev *hdev)
+{
+	int ret = 0;
+
+	switch (hdev->reset_type) {
+	case HNAE3_FUNC_RESET:
+		/* fall through */
+	case HNAE3_FLR_RESET:
+		ret = hclge_set_all_vf_rst(hdev, true);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable)
+{
+	u32 reg_val;
+
+	reg_val = hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG);
+	if (enable)
+		reg_val |= HCLGE_NIC_SW_RST_RDY;
+	else
+		reg_val &= ~HCLGE_NIC_SW_RST_RDY;
+
+	hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG, reg_val);
+}
+
+static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
+{
+	u32 reg_val;
+	int ret = 0;
+
+	switch (hdev->reset_type) {
+	case HNAE3_FUNC_RESET:
+		/* to confirm whether all running VF is ready
+		 * before request PF reset
+		 */
+		ret = hclge_func_reset_sync_vf(hdev);
+		if (ret)
+			return ret;
+
+		ret = hclge_func_reset_cmd(hdev, 0);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"asserting function reset fail %d!\n", ret);
+			return ret;
+		}
+
+		/* After performaning pf reset, it is not necessary to do the
+		 * mailbox handling or send any command to firmware, because
+		 * any mailbox handling or command to firmware is only valid
+		 * after hclge_cmd_init is called.
+		 */
+		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+		hdev->rst_stats.pf_rst_cnt++;
+		break;
+	case HNAE3_FLR_RESET:
+		/* to confirm whether all running VF is ready
+		 * before request PF reset
+		 */
+		ret = hclge_func_reset_sync_vf(hdev);
+		if (ret)
+			return ret;
+
+		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+		hdev->rst_stats.flr_rst_cnt++;
+		break;
+	case HNAE3_IMP_RESET:
+		hclge_handle_imp_error(hdev);
+		reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+		hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG,
+				BIT(HCLGE_VECTOR0_IMP_RESET_INT_B) | reg_val);
+		break;
+	default:
+		break;
+	}
+
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGE_RESET_SYNC_TIME);
+	hclge_reset_handshake(hdev, true);
+	dev_info(&hdev->pdev->dev, "prepare wait ok\n");
+
+	return ret;
+}
+
+static bool hclge_reset_err_handle(struct hclge_dev *hdev)
+{
+#define MAX_RESET_FAIL_CNT 5
+
+	if (hdev->reset_pending) {
+		dev_info(&hdev->pdev->dev, "Reset pending %lu\n",
+			 hdev->reset_pending);
+		return true;
+	} else if (hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
+		   HCLGE_RESET_INT_M) {
+		dev_info(&hdev->pdev->dev,
+			 "reset failed because new reset interrupt\n");
+		hclge_clear_reset_cause(hdev);
+		return false;
+	} else if (hdev->rst_stats.reset_fail_cnt < MAX_RESET_FAIL_CNT) {
+		hdev->rst_stats.reset_fail_cnt++;
+		set_bit(hdev->reset_type, &hdev->reset_pending);
+		dev_info(&hdev->pdev->dev,
+			 "re-schedule reset task(%d)\n",
+			 hdev->rst_stats.reset_fail_cnt);
+		return true;
+	}
+
+	hclge_clear_reset_cause(hdev);
+
+	/* recover the handshake status when reset fail */
+	hclge_reset_handshake(hdev, true);
+
+	dev_err(&hdev->pdev->dev, "Reset fail!\n");
+	return false;
+}
+
+static int hclge_set_rst_done(struct hclge_dev *hdev)
+{
+	struct hclge_pf_rst_done_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_pf_rst_done_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false);
+	req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	/* To be compatible with the old firmware, which does not support
+	 * command HCLGE_OPC_PF_RST_DONE, just print a warning and
+	 * return success
+	 */
+	if (ret == -EOPNOTSUPP) {
+		dev_warn(&hdev->pdev->dev,
+			 "current firmware does not support command(0x%x)!\n",
+			 HCLGE_OPC_PF_RST_DONE);
+		return 0;
+	} else if (ret) {
+		dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n",
+			ret);
+	}
+
+	return ret;
+}
+
+static int hclge_reset_prepare_up(struct hclge_dev *hdev)
+{
+	int ret = 0;
+
+	switch (hdev->reset_type) {
+	case HNAE3_FUNC_RESET:
+		/* fall through */
+	case HNAE3_FLR_RESET:
+		ret = hclge_set_all_vf_rst(hdev, false);
+		break;
+	case HNAE3_GLOBAL_RESET:
+		/* fall through */
+	case HNAE3_IMP_RESET:
+		ret = hclge_set_rst_done(hdev);
+		break;
+	default:
+		break;
+	}
+
+	/* clear up the handshake status after re-initialize done */
+	hclge_reset_handshake(hdev, false);
+
+	return ret;
+}
+
+static int hclge_reset_stack(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		return ret;
+
+	ret = hclge_reset_ae_dev(hdev->ae_dev);
+	if (ret)
+		return ret;
+
+	ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+	if (ret)
+		return ret;
+
+	return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
+}
+
 static void hclge_reset(struct hclge_dev *hdev)
 {
-	struct hnae3_handle *handle;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	enum hnae3_reset_type reset_level;
+	int ret;
 
+	/* Initialize ae_dev reset status as well, in case enet layer wants to
+	 * know if device is undergoing reset
+	 */
+	ae_dev->reset_type = hdev->reset_type;
+	hdev->rst_stats.reset_cnt++;
 	/* perform reset of the stack & ae device for a client */
-	handle = &hdev->vport[0].nic;
+	ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		goto err_reset;
+
+	ret = hclge_reset_prepare_down(hdev);
+	if (ret)
+		goto err_reset;
+
 	rtnl_lock();
-	hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		goto err_reset_lock;
+
 	rtnl_unlock();
 
-	if (!hclge_reset_wait(hdev)) {
-		rtnl_lock();
-		hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
-		hclge_reset_ae_dev(hdev->ae_dev);
-		hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+	ret = hclge_reset_prepare_wait(hdev);
+	if (ret)
+		goto err_reset;
 
-		hclge_clear_reset_cause(hdev);
-	} else {
-		rtnl_lock();
-		/* schedule again to check pending resets later */
-		set_bit(hdev->reset_type, &hdev->reset_pending);
+	if (hclge_reset_wait(hdev))
+		goto err_reset;
+
+	hdev->rst_stats.hw_reset_done_cnt++;
+
+	ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		goto err_reset;
+
+	rtnl_lock();
+
+	ret = hclge_reset_stack(hdev);
+	if (ret)
+		goto err_reset_lock;
+
+	hclge_clear_reset_cause(hdev);
+
+	ret = hclge_reset_prepare_up(hdev);
+	if (ret)
+		goto err_reset_lock;
+
+	rtnl_unlock();
+
+	ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
+	/* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1
+	 * times
+	 */
+	if (ret &&
+	    hdev->rst_stats.reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1)
+		goto err_reset;
+
+	rtnl_lock();
+
+	ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+	if (ret)
+		goto err_reset_lock;
+
+	rtnl_unlock();
+
+	ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
+	if (ret)
+		goto err_reset;
+
+	hdev->last_reset_time = jiffies;
+	hdev->rst_stats.reset_fail_cnt = 0;
+	hdev->rst_stats.reset_done_cnt++;
+	ae_dev->reset_type = HNAE3_NONE_RESET;
+
+	/* if default_reset_request has a higher level reset request,
+	 * it should be handled as soon as possible. since some errors
+	 * need this kind of reset to fix.
+	 */
+	reset_level = hclge_get_reset_level(ae_dev,
+					    &hdev->default_reset_request);
+	if (reset_level != HNAE3_NONE_RESET)
+		set_bit(reset_level, &hdev->reset_request);
+
+	return;
+
+err_reset_lock:
+	rtnl_unlock();
+err_reset:
+	if (hclge_reset_err_handle(hdev))
 		hclge_reset_task_schedule(hdev);
-	}
-
-	hclge_notify_client(hdev, HNAE3_UP_CLIENT);
-	handle->last_reset_time = jiffies;
-	rtnl_unlock();
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle)
+static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	struct hclge_dev *hdev = ae_dev->priv;
 
-	/* check if this is a new reset request and we are not here just because
+	/* We might end up getting called broadly because of 2 below cases:
+	 * 1. Recoverable error was conveyed through APEI and only way to bring
+	 *    normalcy is to reset.
+	 * 2. A new reset request from the stack due to timeout
+	 *
+	 * For the first case,error event might not have ae handle available.
+	 * check if this is a new reset request and we are not here just because
 	 * last reset attempt did not succeed and watchdog hit us again. We will
 	 * know this if last reset request did not occur very recently (watchdog
 	 * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
@@ -2839,24 +3772,58 @@
 	 * want to make sure we throttle the reset request. Therefore, we will
 	 * not allow it again before 3*HZ times.
 	 */
-	if (time_before(jiffies, (handle->last_reset_time + 3 * HZ)))
-		return;
-	else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
-		handle->reset_level = HNAE3_FUNC_RESET;
+	if (!handle)
+		handle = &hdev->vport[0].nic;
 
-	dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
-		 handle->reset_level);
+	if (time_before(jiffies, (hdev->last_reset_time +
+				  HCLGE_RESET_INTERVAL))) {
+		mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
+		return;
+	} else if (hdev->default_reset_request)
+		hdev->reset_level =
+			hclge_get_reset_level(ae_dev,
+					      &hdev->default_reset_request);
+	else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ)))
+		hdev->reset_level = HNAE3_FUNC_RESET;
+
+	dev_info(&hdev->pdev->dev, "received reset event, reset type is %d\n",
+		 hdev->reset_level);
 
 	/* request reset & schedule reset task */
-	set_bit(handle->reset_level, &hdev->reset_request);
+	set_bit(hdev->reset_level, &hdev->reset_request);
 	hclge_reset_task_schedule(hdev);
 
-	if (handle->reset_level < HNAE3_GLOBAL_RESET)
-		handle->reset_level++;
+	if (hdev->reset_level < HNAE3_GLOBAL_RESET)
+		hdev->reset_level++;
+}
+
+static void hclge_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
+					enum hnae3_reset_type rst_type)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+
+	set_bit(rst_type, &hdev->default_reset_request);
+}
+
+static void hclge_reset_timer(struct timer_list *t)
+{
+	struct hclge_dev *hdev = from_timer(hdev, t, reset_timer);
+
+	/* if default_reset_request has no value, it means that this reset
+	 * request has already be handled, so just return here
+	 */
+	if (!hdev->default_reset_request)
+		return;
+
+	dev_info(&hdev->pdev->dev,
+		 "triggering reset in reset timer\n");
+	hclge_reset_event(hdev->pdev, NULL);
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
 	/* check if there is any ongoing reset in the hardware. This status can
 	 * be checked from reset_pending. If there is then, we need to wait for
 	 * hardware to complete reset.
@@ -2866,12 +3833,13 @@
 	 *    b. else, we can come back later to check this status so re-sched
 	 *       now.
 	 */
-	hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_pending);
+	hdev->last_reset_time = jiffies;
+	hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_pending);
 	if (hdev->reset_type != HNAE3_NONE_RESET)
 		hclge_reset(hdev);
 
 	/* check if we got any *new* reset requests to be honored */
-	hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_request);
+	hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_request);
 	if (hdev->reset_type != HNAE3_NONE_RESET)
 		hclge_do_reset(hdev);
 
@@ -2908,19 +3876,45 @@
 	clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
 }
 
+static void hclge_update_vport_alive(struct hclge_dev *hdev)
+{
+	int i;
+
+	/* start from vport 1 for PF is always alive */
+	for (i = 1; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+
+		if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ))
+			clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+
+		/* If vf is not alive, set to default value */
+		if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+			vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+	}
+}
+
 static void hclge_service_task(struct work_struct *work)
 {
 	struct hclge_dev *hdev =
-		container_of(work, struct hclge_dev, service_task);
+		container_of(work, struct hclge_dev, service_task.work);
+
+	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
 
 	if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) {
 		hclge_update_stats_for_all(hdev);
 		hdev->hw_stats.stats_timer = 0;
 	}
 
-	hclge_update_speed_duplex(hdev);
+	hclge_update_port_info(hdev);
 	hclge_update_link_status(hdev);
-	hclge_service_complete(hdev);
+	hclge_update_vport_alive(hdev);
+	hclge_sync_vlan_filter(hdev);
+	if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+		hclge_rfs_filter_expire(hdev);
+		hdev->fd_arfs_expire_timer = 0;
+	}
+
+	hclge_task_schedule(hdev, round_jiffies_relative(HZ));
 }
 
 struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
@@ -2943,6 +3937,7 @@
 	int alloc = 0;
 	int i, j;
 
+	vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
 	vector_num = min(hdev->num_msi_left, vector_num);
 
 	for (j = 0; j < vector_num; j++) {
@@ -3013,29 +4008,28 @@
 				  const u8 hfunc, const u8 *key)
 {
 	struct hclge_rss_config_cmd *req;
+	unsigned int key_offset = 0;
 	struct hclge_desc desc;
-	int key_offset;
+	int key_counts;
 	int key_size;
 	int ret;
 
+	key_counts = HCLGE_RSS_KEY_SIZE;
 	req = (struct hclge_rss_config_cmd *)desc.data;
 
-	for (key_offset = 0; key_offset < 3; key_offset++) {
+	while (key_counts) {
 		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
 					   false);
 
 		req->hash_config |= (hfunc & HCLGE_RSS_HASH_ALGO_MASK);
 		req->hash_config |= (key_offset << HCLGE_RSS_HASH_KEY_OFFSET_B);
 
-		if (key_offset == 2)
-			key_size =
-			HCLGE_RSS_KEY_SIZE - HCLGE_RSS_HASH_KEY_NUM * 2;
-		else
-			key_size = HCLGE_RSS_HASH_KEY_NUM;
-
+		key_size = min(HCLGE_RSS_HASH_KEY_NUM, key_counts);
 		memcpy(req->hash_key,
 		       key + key_offset * HCLGE_RSS_HASH_KEY_NUM, key_size);
 
+		key_counts -= key_size;
+		key_offset++;
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -3110,6 +4104,22 @@
 	return ret;
 }
 
+static void hclge_get_rss_type(struct hclge_vport *vport)
+{
+	if (vport->rss_tuple_sets.ipv4_tcp_en ||
+	    vport->rss_tuple_sets.ipv4_udp_en ||
+	    vport->rss_tuple_sets.ipv4_sctp_en ||
+	    vport->rss_tuple_sets.ipv6_tcp_en ||
+	    vport->rss_tuple_sets.ipv6_udp_en ||
+	    vport->rss_tuple_sets.ipv6_sctp_en)
+		vport->nic.kinfo.rss_type = PKT_HASH_TYPE_L4;
+	else if (vport->rss_tuple_sets.ipv4_fragment_en ||
+		 vport->rss_tuple_sets.ipv6_fragment_en)
+		vport->nic.kinfo.rss_type = PKT_HASH_TYPE_L3;
+	else
+		vport->nic.kinfo.rss_type = PKT_HASH_TYPE_NONE;
+}
+
 static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 {
 	struct hclge_rss_input_tuple_cmd *req;
@@ -3129,6 +4139,7 @@
 	req->ipv6_udp_en = hdev->vport[0].rss_tuple_sets.ipv6_udp_en;
 	req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
 	req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
+	hclge_get_rss_type(&hdev->vport[0]);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
@@ -3143,8 +4154,19 @@
 	int i;
 
 	/* Get hash algorithm */
-	if (hfunc)
-		*hfunc = vport->rss_algo;
+	if (hfunc) {
+		switch (vport->rss_algo) {
+		case HCLGE_RSS_HASH_ALGO_TOEPLITZ:
+			*hfunc = ETH_RSS_HASH_TOP;
+			break;
+		case HCLGE_RSS_HASH_ALGO_SIMPLE:
+			*hfunc = ETH_RSS_HASH_XOR;
+			break;
+		default:
+			*hfunc = ETH_RSS_HASH_UNKNOWN;
+			break;
+		}
+	}
 
 	/* Get the RSS Key required by the user */
 	if (key)
@@ -3168,12 +4190,20 @@
 
 	/* Set the RSS Hash Key if specififed by the user */
 	if (key) {
-
-		if (hfunc == ETH_RSS_HASH_TOP ||
-		    hfunc == ETH_RSS_HASH_NO_CHANGE)
+		switch (hfunc) {
+		case ETH_RSS_HASH_TOP:
 			hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
-		else
+			break;
+		case ETH_RSS_HASH_XOR:
+			hash_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
+			break;
+		case ETH_RSS_HASH_NO_CHANGE:
+			hash_algo = vport->rss_algo;
+			break;
+		default:
 			return -EINVAL;
+		}
+
 		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
 		if (ret)
 			return ret;
@@ -3291,6 +4321,7 @@
 	vport->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
 	vport->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
 	vport->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+	hclge_get_rss_type(vport);
 	return 0;
 }
 
@@ -3357,13 +4388,14 @@
 	struct hclge_vport *vport = hdev->vport;
 	u8 *rss_indir = vport[0].rss_indirection_tbl;
 	u16 rss_size = vport[0].alloc_rss_size;
+	u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
+	u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
 	u8 *key = vport[0].rss_hash_key;
 	u8 hfunc = vport[0].rss_algo;
-	u16 tc_offset[HCLGE_MAX_TC_NUM];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
-	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u16 roundup_size;
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	ret = hclge_set_rss_indir_table(hdev, rss_indir);
 	if (ret)
@@ -3419,8 +4451,11 @@
 
 static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 {
+	int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
-	int i;
+
+	if (hdev->pdev->revision >= 0x21)
+		rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
 
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport[i].rss_tuple_sets.ipv4_tcp_en =
@@ -3440,9 +4475,10 @@
 		vport[i].rss_tuple_sets.ipv6_fragment_en =
 			HCLGE_RSS_INPUT_TUPLE_OTHER;
 
-		vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+		vport[i].rss_algo = rss_algo;
 
-		netdev_rss_key_fill(vport[i].rss_hash_key, HCLGE_RSS_KEY_SIZE);
+		memcpy(vport[i].rss_hash_key, hclge_hash_key,
+		       HCLGE_RSS_KEY_SIZE);
 	}
 
 	hclge_rss_indir_init_cfg(hdev);
@@ -3455,8 +4491,8 @@
 	struct hclge_dev *hdev = vport->back;
 	struct hnae3_ring_chain_node *node;
 	struct hclge_desc desc;
-	struct hclge_ctrl_vector_chain_cmd *req
-		= (struct hclge_ctrl_vector_chain_cmd *)desc.data;
+	struct hclge_ctrl_vector_chain_cmd *req =
+		(struct hclge_ctrl_vector_chain_cmd *)desc.data;
 	enum hclge_cmd_status status;
 	enum hclge_opcode_type op;
 	u16 tqp_type_and_id;
@@ -3514,8 +4550,7 @@
 	return 0;
 }
 
-static int hclge_map_ring_to_vector(struct hnae3_handle *handle,
-				    int vector,
+static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector,
 				    struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -3532,8 +4567,7 @@
 	return hclge_bind_ring_with_vector(vport, vector_id, true, ring_chain);
 }
 
-static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
-				       int vector,
+static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -3554,8 +4588,7 @@
 	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vectorid=%d, ret =%d\n",
-			vector_id,
-			ret);
+			vector_id, ret);
 
 	return ret;
 }
@@ -3604,16 +4637,1589 @@
 	param->vf_id = vport_id;
 }
 
-static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
-				   bool en_mc_pmc)
+static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+				  bool en_mc_pmc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_promisc_param param;
+	bool en_bc_pmc = true;
 
-	hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, true,
+	/* For revision 0x20, if broadcast promisc enabled, vlan filter is
+	 * always bypassed. So broadcast promisc should be disabled until
+	 * user enable promisc mode
+	 */
+	if (handle->pdev->revision == 0x20)
+		en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
+
+	hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, en_bc_pmc,
 				 vport->vport_id);
-	hclge_cmd_set_promisc_mode(hdev, &param);
+	return hclge_cmd_set_promisc_mode(hdev, &param);
+}
+
+static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
+{
+	struct hclge_get_fd_mode_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_MODE_CTRL, true);
+
+	req = (struct hclge_get_fd_mode_cmd *)desc.data;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "get fd mode fail, ret=%d\n", ret);
+		return ret;
+	}
+
+	*fd_mode = req->mode;
+
+	return ret;
+}
+
+static int hclge_get_fd_allocation(struct hclge_dev *hdev,
+				   u32 *stage1_entry_num,
+				   u32 *stage2_entry_num,
+				   u16 *stage1_counter_num,
+				   u16 *stage2_counter_num)
+{
+	struct hclge_get_fd_allocation_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_GET_ALLOCATION, true);
+
+	req = (struct hclge_get_fd_allocation_cmd *)desc.data;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "query fd allocation fail, ret=%d\n",
+			ret);
+		return ret;
+	}
+
+	*stage1_entry_num = le32_to_cpu(req->stage1_entry_num);
+	*stage2_entry_num = le32_to_cpu(req->stage2_entry_num);
+	*stage1_counter_num = le16_to_cpu(req->stage1_counter_num);
+	*stage2_counter_num = le16_to_cpu(req->stage2_counter_num);
+
+	return ret;
+}
+
+static int hclge_set_fd_key_config(struct hclge_dev *hdev, int stage_num)
+{
+	struct hclge_set_fd_key_config_cmd *req;
+	struct hclge_fd_key_cfg *stage;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_KEY_CONFIG, false);
+
+	req = (struct hclge_set_fd_key_config_cmd *)desc.data;
+	stage = &hdev->fd_cfg.key_cfg[stage_num];
+	req->stage = stage_num;
+	req->key_select = stage->key_sel;
+	req->inner_sipv6_word_en = stage->inner_sipv6_word_en;
+	req->inner_dipv6_word_en = stage->inner_dipv6_word_en;
+	req->outer_sipv6_word_en = stage->outer_sipv6_word_en;
+	req->outer_dipv6_word_en = stage->outer_dipv6_word_en;
+	req->tuple_mask = cpu_to_le32(~stage->tuple_active);
+	req->meta_data_mask = cpu_to_le32(~stage->meta_data_active);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "set fd key fail, ret=%d\n", ret);
+
+	return ret;
+}
+
+static int hclge_init_fd_config(struct hclge_dev *hdev)
+{
+#define LOW_2_WORDS		0x03
+	struct hclge_fd_key_cfg *key_cfg;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return 0;
+
+	ret = hclge_get_fd_mode(hdev, &hdev->fd_cfg.fd_mode);
+	if (ret)
+		return ret;
+
+	switch (hdev->fd_cfg.fd_mode) {
+	case HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1:
+		hdev->fd_cfg.max_key_length = MAX_KEY_LENGTH;
+		break;
+	case HCLGE_FD_MODE_DEPTH_4K_WIDTH_200B_STAGE_1:
+		hdev->fd_cfg.max_key_length = MAX_KEY_LENGTH / 2;
+		break;
+	default:
+		dev_err(&hdev->pdev->dev,
+			"Unsupported flow director mode %d\n",
+			hdev->fd_cfg.fd_mode);
+		return -EOPNOTSUPP;
+	}
+
+	hdev->fd_cfg.proto_support =
+		TCP_V4_FLOW | UDP_V4_FLOW | SCTP_V4_FLOW | TCP_V6_FLOW |
+		UDP_V6_FLOW | SCTP_V6_FLOW | IPV4_USER_FLOW | IPV6_USER_FLOW;
+	key_cfg = &hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1];
+	key_cfg->key_sel = HCLGE_FD_KEY_BASE_ON_TUPLE,
+	key_cfg->inner_sipv6_word_en = LOW_2_WORDS;
+	key_cfg->inner_dipv6_word_en = LOW_2_WORDS;
+	key_cfg->outer_sipv6_word_en = 0;
+	key_cfg->outer_dipv6_word_en = 0;
+
+	key_cfg->tuple_active = BIT(INNER_VLAN_TAG_FST) | BIT(INNER_ETH_TYPE) |
+				BIT(INNER_IP_PROTO) | BIT(INNER_IP_TOS) |
+				BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
+				BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+
+	/* If use max 400bit key, we can support tuples for ether type */
+	if (hdev->fd_cfg.max_key_length == MAX_KEY_LENGTH) {
+		hdev->fd_cfg.proto_support |= ETHER_FLOW;
+		key_cfg->tuple_active |=
+				BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC);
+	}
+
+	/* roce_type is used to filter roce frames
+	 * dst_vport is used to specify the rule
+	 */
+	key_cfg->meta_data_active = BIT(ROCE_TYPE) | BIT(DST_VPORT);
+
+	ret = hclge_get_fd_allocation(hdev,
+				      &hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1],
+				      &hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_2],
+				      &hdev->fd_cfg.cnt_num[HCLGE_FD_STAGE_1],
+				      &hdev->fd_cfg.cnt_num[HCLGE_FD_STAGE_2]);
+	if (ret)
+		return ret;
+
+	return hclge_set_fd_key_config(hdev, HCLGE_FD_STAGE_1);
+}
+
+static int hclge_fd_tcam_config(struct hclge_dev *hdev, u8 stage, bool sel_x,
+				int loc, u8 *key, bool is_add)
+{
+	struct hclge_fd_tcam_config_1_cmd *req1;
+	struct hclge_fd_tcam_config_2_cmd *req2;
+	struct hclge_fd_tcam_config_3_cmd *req3;
+	struct hclge_desc desc[3];
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_FD_TCAM_OP, false);
+	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_FD_TCAM_OP, false);
+
+	req1 = (struct hclge_fd_tcam_config_1_cmd *)desc[0].data;
+	req2 = (struct hclge_fd_tcam_config_2_cmd *)desc[1].data;
+	req3 = (struct hclge_fd_tcam_config_3_cmd *)desc[2].data;
+
+	req1->stage = stage;
+	req1->xy_sel = sel_x ? 1 : 0;
+	hnae3_set_bit(req1->port_info, HCLGE_FD_EPORT_SW_EN_B, 0);
+	req1->index = cpu_to_le32(loc);
+	req1->entry_vld = sel_x ? is_add : 0;
+
+	if (key) {
+		memcpy(req1->tcam_data, &key[0], sizeof(req1->tcam_data));
+		memcpy(req2->tcam_data, &key[sizeof(req1->tcam_data)],
+		       sizeof(req2->tcam_data));
+		memcpy(req3->tcam_data, &key[sizeof(req1->tcam_data) +
+		       sizeof(req2->tcam_data)], sizeof(req3->tcam_data));
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, 3);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"config tcam key fail, ret=%d\n",
+			ret);
+
+	return ret;
+}
+
+static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc,
+			      struct hclge_fd_ad_data *action)
+{
+	struct hclge_fd_ad_config_cmd *req;
+	struct hclge_desc desc;
+	u64 ad_data = 0;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_AD_OP, false);
+
+	req = (struct hclge_fd_ad_config_cmd *)desc.data;
+	req->index = cpu_to_le32(loc);
+	req->stage = stage;
+
+	hnae3_set_bit(ad_data, HCLGE_FD_AD_WR_RULE_ID_B,
+		      action->write_rule_id_to_bd);
+	hnae3_set_field(ad_data, HCLGE_FD_AD_RULE_ID_M, HCLGE_FD_AD_RULE_ID_S,
+			action->rule_id);
+	ad_data <<= 32;
+	hnae3_set_bit(ad_data, HCLGE_FD_AD_DROP_B, action->drop_packet);
+	hnae3_set_bit(ad_data, HCLGE_FD_AD_DIRECT_QID_B,
+		      action->forward_to_direct_queue);
+	hnae3_set_field(ad_data, HCLGE_FD_AD_QID_M, HCLGE_FD_AD_QID_S,
+			action->queue_id);
+	hnae3_set_bit(ad_data, HCLGE_FD_AD_USE_COUNTER_B, action->use_counter);
+	hnae3_set_field(ad_data, HCLGE_FD_AD_COUNTER_NUM_M,
+			HCLGE_FD_AD_COUNTER_NUM_S, action->counter_id);
+	hnae3_set_bit(ad_data, HCLGE_FD_AD_NXT_STEP_B, action->use_next_stage);
+	hnae3_set_field(ad_data, HCLGE_FD_AD_NXT_KEY_M, HCLGE_FD_AD_NXT_KEY_S,
+			action->counter_id);
+
+	req->ad_data = cpu_to_le64(ad_data);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "fd ad config fail, ret=%d\n", ret);
+
+	return ret;
+}
+
+static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
+				   struct hclge_fd_rule *rule)
+{
+	u16 tmp_x_s, tmp_y_s;
+	u32 tmp_x_l, tmp_y_l;
+	int i;
+
+	if (rule->unused_tuple & tuple_bit)
+		return true;
+
+	switch (tuple_bit) {
+	case 0:
+		return false;
+	case BIT(INNER_DST_MAC):
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
+			       rule->tuples_mask.dst_mac[i]);
+			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
+			       rule->tuples_mask.dst_mac[i]);
+		}
+
+		return true;
+	case BIT(INNER_SRC_MAC):
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
+			       rule->tuples.src_mac[i]);
+			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
+			       rule->tuples.src_mac[i]);
+		}
+
+		return true;
+	case BIT(INNER_VLAN_TAG_FST):
+		calc_x(tmp_x_s, rule->tuples.vlan_tag1,
+		       rule->tuples_mask.vlan_tag1);
+		calc_y(tmp_y_s, rule->tuples.vlan_tag1,
+		       rule->tuples_mask.vlan_tag1);
+		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
+		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+
+		return true;
+	case BIT(INNER_ETH_TYPE):
+		calc_x(tmp_x_s, rule->tuples.ether_proto,
+		       rule->tuples_mask.ether_proto);
+		calc_y(tmp_y_s, rule->tuples.ether_proto,
+		       rule->tuples_mask.ether_proto);
+		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
+		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+
+		return true;
+	case BIT(INNER_IP_TOS):
+		calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
+		calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
+
+		return true;
+	case BIT(INNER_IP_PROTO):
+		calc_x(*key_x, rule->tuples.ip_proto,
+		       rule->tuples_mask.ip_proto);
+		calc_y(*key_y, rule->tuples.ip_proto,
+		       rule->tuples_mask.ip_proto);
+
+		return true;
+	case BIT(INNER_SRC_IP):
+		calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX],
+		       rule->tuples_mask.src_ip[IPV4_INDEX]);
+		calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX],
+		       rule->tuples_mask.src_ip[IPV4_INDEX]);
+		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
+		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
+
+		return true;
+	case BIT(INNER_DST_IP):
+		calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX],
+		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
+		calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX],
+		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
+		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
+		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
+
+		return true;
+	case BIT(INNER_SRC_PORT):
+		calc_x(tmp_x_s, rule->tuples.src_port,
+		       rule->tuples_mask.src_port);
+		calc_y(tmp_y_s, rule->tuples.src_port,
+		       rule->tuples_mask.src_port);
+		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
+		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+
+		return true;
+	case BIT(INNER_DST_PORT):
+		calc_x(tmp_x_s, rule->tuples.dst_port,
+		       rule->tuples_mask.dst_port);
+		calc_y(tmp_y_s, rule->tuples.dst_port,
+		       rule->tuples_mask.dst_port);
+		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
+		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+
+		return true;
+	default:
+		return false;
+	}
+}
+
+static u32 hclge_get_port_number(enum HLCGE_PORT_TYPE port_type, u8 pf_id,
+				 u8 vf_id, u8 network_port_id)
+{
+	u32 port_number = 0;
+
+	if (port_type == HOST_PORT) {
+		hnae3_set_field(port_number, HCLGE_PF_ID_M, HCLGE_PF_ID_S,
+				pf_id);
+		hnae3_set_field(port_number, HCLGE_VF_ID_M, HCLGE_VF_ID_S,
+				vf_id);
+		hnae3_set_bit(port_number, HCLGE_PORT_TYPE_B, HOST_PORT);
+	} else {
+		hnae3_set_field(port_number, HCLGE_NETWORK_PORT_ID_M,
+				HCLGE_NETWORK_PORT_ID_S, network_port_id);
+		hnae3_set_bit(port_number, HCLGE_PORT_TYPE_B, NETWORK_PORT);
+	}
+
+	return port_number;
+}
+
+static void hclge_fd_convert_meta_data(struct hclge_fd_key_cfg *key_cfg,
+				       __le32 *key_x, __le32 *key_y,
+				       struct hclge_fd_rule *rule)
+{
+	u32 tuple_bit, meta_data = 0, tmp_x, tmp_y, port_number;
+	u8 cur_pos = 0, tuple_size, shift_bits;
+	unsigned int i;
+
+	for (i = 0; i < MAX_META_DATA; i++) {
+		tuple_size = meta_data_key_info[i].key_length;
+		tuple_bit = key_cfg->meta_data_active & BIT(i);
+
+		switch (tuple_bit) {
+		case BIT(ROCE_TYPE):
+			hnae3_set_bit(meta_data, cur_pos, NIC_PACKET);
+			cur_pos += tuple_size;
+			break;
+		case BIT(DST_VPORT):
+			port_number = hclge_get_port_number(HOST_PORT, 0,
+							    rule->vf_id, 0);
+			hnae3_set_field(meta_data,
+					GENMASK(cur_pos + tuple_size, cur_pos),
+					cur_pos, port_number);
+			cur_pos += tuple_size;
+			break;
+		default:
+			break;
+		}
+	}
+
+	calc_x(tmp_x, meta_data, 0xFFFFFFFF);
+	calc_y(tmp_y, meta_data, 0xFFFFFFFF);
+	shift_bits = sizeof(meta_data) * 8 - cur_pos;
+
+	*key_x = cpu_to_le32(tmp_x << shift_bits);
+	*key_y = cpu_to_le32(tmp_y << shift_bits);
+}
+
+/* A complete key is combined with meta data key and tuple key.
+ * Meta data key is stored at the MSB region, and tuple key is stored at
+ * the LSB region, unused bits will be filled 0.
+ */
+static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
+			    struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_key_cfg *key_cfg = &hdev->fd_cfg.key_cfg[stage];
+	u8 key_x[MAX_KEY_BYTES], key_y[MAX_KEY_BYTES];
+	u8 *cur_key_x, *cur_key_y;
+	unsigned int i;
+	int ret, tuple_size;
+	u8 meta_data_region;
+
+	memset(key_x, 0, sizeof(key_x));
+	memset(key_y, 0, sizeof(key_y));
+	cur_key_x = key_x;
+	cur_key_y = key_y;
+
+	for (i = 0 ; i < MAX_TUPLE; i++) {
+		bool tuple_valid;
+		u32 check_tuple;
+
+		tuple_size = tuple_key_info[i].key_length / 8;
+		check_tuple = key_cfg->tuple_active & BIT(i);
+
+		tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x,
+						     cur_key_y, rule);
+		if (tuple_valid) {
+			cur_key_x += tuple_size;
+			cur_key_y += tuple_size;
+		}
+	}
+
+	meta_data_region = hdev->fd_cfg.max_key_length / 8 -
+			MAX_META_DATA_LENGTH / 8;
+
+	hclge_fd_convert_meta_data(key_cfg,
+				   (__le32 *)(key_x + meta_data_region),
+				   (__le32 *)(key_y + meta_data_region),
+				   rule);
+
+	ret = hclge_fd_tcam_config(hdev, stage, false, rule->location, key_y,
+				   true);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"fd key_y config fail, loc=%d, ret=%d\n",
+			rule->queue_id, ret);
+		return ret;
+	}
+
+	ret = hclge_fd_tcam_config(hdev, stage, true, rule->location, key_x,
+				   true);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"fd key_x config fail, loc=%d, ret=%d\n",
+			rule->queue_id, ret);
+	return ret;
+}
+
+static int hclge_config_action(struct hclge_dev *hdev, u8 stage,
+			       struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_ad_data ad_data;
+
+	ad_data.ad_id = rule->location;
+
+	if (rule->action == HCLGE_FD_ACTION_DROP_PACKET) {
+		ad_data.drop_packet = true;
+		ad_data.forward_to_direct_queue = false;
+		ad_data.queue_id = 0;
+	} else {
+		ad_data.drop_packet = false;
+		ad_data.forward_to_direct_queue = true;
+		ad_data.queue_id = rule->queue_id;
+	}
+
+	ad_data.use_counter = false;
+	ad_data.counter_id = 0;
+
+	ad_data.use_next_stage = false;
+	ad_data.next_input_key = 0;
+
+	ad_data.write_rule_id_to_bd = true;
+	ad_data.rule_id = rule->location;
+
+	return hclge_fd_ad_config(hdev, stage, ad_data.ad_id, &ad_data);
+}
+
+static int hclge_fd_check_spec(struct hclge_dev *hdev,
+			       struct ethtool_rx_flow_spec *fs, u32 *unused)
+{
+	struct ethtool_tcpip4_spec *tcp_ip4_spec;
+	struct ethtool_usrip4_spec *usr_ip4_spec;
+	struct ethtool_tcpip6_spec *tcp_ip6_spec;
+	struct ethtool_usrip6_spec *usr_ip6_spec;
+	struct ethhdr *ether_spec;
+
+	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		return -EINVAL;
+
+	if (!(fs->flow_type & hdev->fd_cfg.proto_support))
+		return -EOPNOTSUPP;
+
+	if ((fs->flow_type & FLOW_EXT) &&
+	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
+		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case SCTP_V4_FLOW:
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		tcp_ip4_spec = &fs->h_u.tcp_ip4_spec;
+		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
+
+		if (!tcp_ip4_spec->ip4src)
+			*unused |= BIT(INNER_SRC_IP);
+
+		if (!tcp_ip4_spec->ip4dst)
+			*unused |= BIT(INNER_DST_IP);
+
+		if (!tcp_ip4_spec->psrc)
+			*unused |= BIT(INNER_SRC_PORT);
+
+		if (!tcp_ip4_spec->pdst)
+			*unused |= BIT(INNER_DST_PORT);
+
+		if (!tcp_ip4_spec->tos)
+			*unused |= BIT(INNER_IP_TOS);
+
+		break;
+	case IP_USER_FLOW:
+		usr_ip4_spec = &fs->h_u.usr_ip4_spec;
+		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+
+		if (!usr_ip4_spec->ip4src)
+			*unused |= BIT(INNER_SRC_IP);
+
+		if (!usr_ip4_spec->ip4dst)
+			*unused |= BIT(INNER_DST_IP);
+
+		if (!usr_ip4_spec->tos)
+			*unused |= BIT(INNER_IP_TOS);
+
+		if (!usr_ip4_spec->proto)
+			*unused |= BIT(INNER_IP_PROTO);
+
+		if (usr_ip4_spec->l4_4_bytes)
+			return -EOPNOTSUPP;
+
+		if (usr_ip4_spec->ip_ver != ETH_RX_NFC_IP4)
+			return -EOPNOTSUPP;
+
+		break;
+	case SCTP_V6_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		tcp_ip6_spec = &fs->h_u.tcp_ip6_spec;
+		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			BIT(INNER_IP_TOS);
+
+		/* check whether src/dst ip address used */
+		if (!tcp_ip6_spec->ip6src[0] && !tcp_ip6_spec->ip6src[1] &&
+		    !tcp_ip6_spec->ip6src[2] && !tcp_ip6_spec->ip6src[3])
+			*unused |= BIT(INNER_SRC_IP);
+
+		if (!tcp_ip6_spec->ip6dst[0] && !tcp_ip6_spec->ip6dst[1] &&
+		    !tcp_ip6_spec->ip6dst[2] && !tcp_ip6_spec->ip6dst[3])
+			*unused |= BIT(INNER_DST_IP);
+
+		if (!tcp_ip6_spec->psrc)
+			*unused |= BIT(INNER_SRC_PORT);
+
+		if (!tcp_ip6_spec->pdst)
+			*unused |= BIT(INNER_DST_PORT);
+
+		if (tcp_ip6_spec->tclass)
+			return -EOPNOTSUPP;
+
+		break;
+	case IPV6_USER_FLOW:
+		usr_ip6_spec = &fs->h_u.usr_ip6_spec;
+		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) |
+			BIT(INNER_DST_PORT);
+
+		/* check whether src/dst ip address used */
+		if (!usr_ip6_spec->ip6src[0] && !usr_ip6_spec->ip6src[1] &&
+		    !usr_ip6_spec->ip6src[2] && !usr_ip6_spec->ip6src[3])
+			*unused |= BIT(INNER_SRC_IP);
+
+		if (!usr_ip6_spec->ip6dst[0] && !usr_ip6_spec->ip6dst[1] &&
+		    !usr_ip6_spec->ip6dst[2] && !usr_ip6_spec->ip6dst[3])
+			*unused |= BIT(INNER_DST_IP);
+
+		if (!usr_ip6_spec->l4_proto)
+			*unused |= BIT(INNER_IP_PROTO);
+
+		if (usr_ip6_spec->tclass)
+			return -EOPNOTSUPP;
+
+		if (usr_ip6_spec->l4_4_bytes)
+			return -EOPNOTSUPP;
+
+		break;
+	case ETHER_FLOW:
+		ether_spec = &fs->h_u.ether_spec;
+		*unused |= BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
+			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT) |
+			BIT(INNER_IP_TOS) | BIT(INNER_IP_PROTO);
+
+		if (is_zero_ether_addr(ether_spec->h_source))
+			*unused |= BIT(INNER_SRC_MAC);
+
+		if (is_zero_ether_addr(ether_spec->h_dest))
+			*unused |= BIT(INNER_DST_MAC);
+
+		if (!ether_spec->h_proto)
+			*unused |= BIT(INNER_ETH_TYPE);
+
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if ((fs->flow_type & FLOW_EXT)) {
+		if (fs->h_ext.vlan_etype)
+			return -EOPNOTSUPP;
+		if (!fs->h_ext.vlan_tci)
+			*unused |= BIT(INNER_VLAN_TAG_FST);
+
+		if (fs->m_ext.vlan_tci) {
+			if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
+				return -EINVAL;
+		}
+	} else {
+		*unused |= BIT(INNER_VLAN_TAG_FST);
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		if (!(hdev->fd_cfg.proto_support & ETHER_FLOW))
+			return -EOPNOTSUPP;
+
+		if (is_zero_ether_addr(fs->h_ext.h_dest))
+			*unused |= BIT(INNER_DST_MAC);
+		else
+			*unused &= ~(BIT(INNER_DST_MAC));
+	}
+
+	return 0;
+}
+
+static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
+{
+	struct hclge_fd_rule *rule = NULL;
+	struct hlist_node *node2;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
+		if (rule->location >= location)
+			break;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return  rule && rule->location == location;
+}
+
+/* make sure being called after lock up with fd_rule_lock */
+static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
+				     struct hclge_fd_rule *new_rule,
+				     u16 location,
+				     bool is_add)
+{
+	struct hclge_fd_rule *rule = NULL, *parent = NULL;
+	struct hlist_node *node2;
+
+	if (is_add && !new_rule)
+		return -EINVAL;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &hdev->fd_rule_list, rule_node) {
+		if (rule->location >= location)
+			break;
+		parent = rule;
+	}
+
+	if (rule && rule->location == location) {
+		hlist_del(&rule->rule_node);
+		kfree(rule);
+		hdev->hclge_fd_rule_num--;
+
+		if (!is_add) {
+			if (!hdev->hclge_fd_rule_num)
+				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+			clear_bit(location, hdev->fd_bmap);
+
+			return 0;
+		}
+	} else if (!is_add) {
+		dev_err(&hdev->pdev->dev,
+			"delete fail, rule %d is inexistent\n",
+			location);
+		return -EINVAL;
+	}
+
+	INIT_HLIST_NODE(&new_rule->rule_node);
+
+	if (parent)
+		hlist_add_behind(&new_rule->rule_node, &parent->rule_node);
+	else
+		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
+
+	set_bit(location, hdev->fd_bmap);
+	hdev->hclge_fd_rule_num++;
+	hdev->fd_active_type = new_rule->rule_type;
+
+	return 0;
+}
+
+static int hclge_fd_get_tuple(struct hclge_dev *hdev,
+			      struct ethtool_rx_flow_spec *fs,
+			      struct hclge_fd_rule *rule)
+{
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+
+	switch (flow_type) {
+	case SCTP_V4_FLOW:
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		rule->tuples.src_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
+		rule->tuples_mask.src_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
+
+		rule->tuples.dst_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
+		rule->tuples_mask.dst_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
+
+		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
+		rule->tuples_mask.src_port =
+				be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
+
+		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
+		rule->tuples_mask.dst_port =
+				be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
+
+		rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
+		rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
+
+		rule->tuples.ether_proto = ETH_P_IP;
+		rule->tuples_mask.ether_proto = 0xFFFF;
+
+		break;
+	case IP_USER_FLOW:
+		rule->tuples.src_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
+		rule->tuples_mask.src_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
+
+		rule->tuples.dst_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
+		rule->tuples_mask.dst_ip[IPV4_INDEX] =
+				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
+
+		rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
+		rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
+
+		rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
+		rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
+
+		rule->tuples.ether_proto = ETH_P_IP;
+		rule->tuples_mask.ether_proto = 0xFFFF;
+
+		break;
+	case SCTP_V6_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		be32_to_cpu_array(rule->tuples.src_ip,
+				  fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
+		be32_to_cpu_array(rule->tuples_mask.src_ip,
+				  fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
+
+		be32_to_cpu_array(rule->tuples.dst_ip,
+				  fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
+		be32_to_cpu_array(rule->tuples_mask.dst_ip,
+				  fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
+
+		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
+		rule->tuples_mask.src_port =
+				be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
+
+		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
+		rule->tuples_mask.dst_port =
+				be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
+
+		rule->tuples.ether_proto = ETH_P_IPV6;
+		rule->tuples_mask.ether_proto = 0xFFFF;
+
+		break;
+	case IPV6_USER_FLOW:
+		be32_to_cpu_array(rule->tuples.src_ip,
+				  fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE);
+		be32_to_cpu_array(rule->tuples_mask.src_ip,
+				  fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE);
+
+		be32_to_cpu_array(rule->tuples.dst_ip,
+				  fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
+		be32_to_cpu_array(rule->tuples_mask.dst_ip,
+				  fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
+
+		rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
+		rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
+
+		rule->tuples.ether_proto = ETH_P_IPV6;
+		rule->tuples_mask.ether_proto = 0xFFFF;
+
+		break;
+	case ETHER_FLOW:
+		ether_addr_copy(rule->tuples.src_mac,
+				fs->h_u.ether_spec.h_source);
+		ether_addr_copy(rule->tuples_mask.src_mac,
+				fs->m_u.ether_spec.h_source);
+
+		ether_addr_copy(rule->tuples.dst_mac,
+				fs->h_u.ether_spec.h_dest);
+		ether_addr_copy(rule->tuples_mask.dst_mac,
+				fs->m_u.ether_spec.h_dest);
+
+		rule->tuples.ether_proto =
+				be16_to_cpu(fs->h_u.ether_spec.h_proto);
+		rule->tuples_mask.ether_proto =
+				be16_to_cpu(fs->m_u.ether_spec.h_proto);
+
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	switch (flow_type) {
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		rule->tuples.ip_proto = IPPROTO_SCTP;
+		rule->tuples_mask.ip_proto = 0xFF;
+		break;
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		rule->tuples.ip_proto = IPPROTO_TCP;
+		rule->tuples_mask.ip_proto = 0xFF;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		rule->tuples.ip_proto = IPPROTO_UDP;
+		rule->tuples_mask.ip_proto = 0xFF;
+		break;
+	default:
+		break;
+	}
+
+	if ((fs->flow_type & FLOW_EXT)) {
+		rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci);
+		rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci);
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		ether_addr_copy(rule->tuples.dst_mac, fs->h_ext.h_dest);
+		ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_ext.h_dest);
+	}
+
+	return 0;
+}
+
+/* make sure being called after lock up with fd_rule_lock */
+static int hclge_fd_config_rule(struct hclge_dev *hdev,
+				struct hclge_fd_rule *rule)
+{
+	int ret;
+
+	if (!rule) {
+		dev_err(&hdev->pdev->dev,
+			"The flow director rule is NULL\n");
+		return -EINVAL;
+	}
+
+	/* it will never fail here, so needn't to check return value */
+	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
+
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	return 0;
+
+clear_rule:
+	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+	return ret;
+}
+
+static int hclge_add_fd_entry(struct hnae3_handle *handle,
+			      struct ethtool_rxnfc *cmd)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u16 dst_vport_id = 0, q_index = 0;
+	struct ethtool_rx_flow_spec *fs;
+	struct hclge_fd_rule *rule;
+	u32 unused = 0;
+	u8 action;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	if (!hdev->fd_en) {
+		dev_warn(&hdev->pdev->dev,
+			 "Please enable flow director first\n");
+		return -EOPNOTSUPP;
+	}
+
+	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	ret = hclge_fd_check_spec(hdev, fs, &unused);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "Check fd spec failed\n");
+		return ret;
+	}
+
+	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+		action = HCLGE_FD_ACTION_DROP_PACKET;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+		u16 tqps;
+
+		if (vf > hdev->num_req_vfs) {
+			dev_err(&hdev->pdev->dev,
+				"Error: vf id (%d) > max vf num (%d)\n",
+				vf, hdev->num_req_vfs);
+			return -EINVAL;
+		}
+
+		dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
+		tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps;
+
+		if (ring >= tqps) {
+			dev_err(&hdev->pdev->dev,
+				"Error: queue id (%d) > max tqp num (%d)\n",
+				ring, tqps - 1);
+			return -EINVAL;
+		}
+
+		action = HCLGE_FD_ACTION_ACCEPT_PACKET;
+		q_index = ring;
+	}
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	ret = hclge_fd_get_tuple(hdev, fs, rule);
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
+
+	rule->flow_type = fs->flow_type;
+
+	rule->location = fs->location;
+	rule->unused_tuple = unused;
+	rule->vf_id = dst_vport_id;
+	rule->queue_id = q_index;
+	rule->action = action;
+	rule->rule_type = HCLGE_FD_EP_ACTIVE;
+
+	/* to avoid rule conflict, when user configure rule by ethtool,
+	 * we need to clear all arfs rules
+	 */
+	hclge_clear_arfs_rules(handle);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_config_rule(hdev, rule);
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return ret;
+}
+
+static int hclge_del_fd_entry(struct hnae3_handle *handle,
+			      struct ethtool_rxnfc *cmd)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct ethtool_rx_flow_spec *fs;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		return -EINVAL;
+
+	if (!hclge_fd_rule_exist(hdev, fs->location)) {
+		dev_err(&hdev->pdev->dev,
+			"Delete fail, rule %d is inexistent\n", fs->location);
+		return -ENOENT;
+	}
+
+	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location,
+				   NULL, false);
+	if (ret)
+		return ret;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return ret;
+}
+
+static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
+				     bool clear_list)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	u16 location;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	for_each_set_bit(location, hdev->fd_bmap,
+			 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
+				     NULL, false);
+
+	if (clear_list) {
+		hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
+					  rule_node) {
+			hlist_del(&rule->rule_node);
+			kfree(rule);
+		}
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+		hdev->hclge_fd_rule_num = 0;
+		bitmap_zero(hdev->fd_bmap,
+			    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+}
+
+static int hclge_restore_fd_entries(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int ret;
+
+	/* Return ok here, because reset error handling will check this
+	 * return value. If error is returned here, the reset process will
+	 * fail.
+	 */
+	if (!hnae3_dev_fd_supported(hdev))
+		return 0;
+
+	/* if fd is disabled, should not restore it when reset */
+	if (!hdev->fd_en)
+		return 0;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+		if (!ret)
+			ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+
+		if (ret) {
+			dev_warn(&hdev->pdev->dev,
+				 "Restore rule %d failed, remove it\n",
+				 rule->location);
+			clear_bit(rule->location, hdev->fd_bmap);
+			hlist_del(&rule->rule_node);
+			kfree(rule);
+			hdev->hclge_fd_rule_num--;
+		}
+	}
+
+	if (hdev->hclge_fd_rule_num)
+		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return 0;
+}
+
+static int hclge_get_fd_rule_cnt(struct hnae3_handle *handle,
+				 struct ethtool_rxnfc *cmd)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	cmd->rule_cnt = hdev->hclge_fd_rule_num;
+	cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
+
+	return 0;
+}
+
+static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
+				  struct ethtool_rxnfc *cmd)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_fd_rule *rule = NULL;
+	struct hclge_dev *hdev = vport->back;
+	struct ethtool_rx_flow_spec *fs;
+	struct hlist_node *node2;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
+		if (rule->location >= fs->location)
+			break;
+	}
+
+	if (!rule || fs->location != rule->location) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		return -ENOENT;
+	}
+
+	fs->flow_type = rule->flow_type;
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case SCTP_V4_FLOW:
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		fs->h_u.tcp_ip4_spec.ip4src =
+				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
+		fs->m_u.tcp_ip4_spec.ip4src =
+			rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
+
+		fs->h_u.tcp_ip4_spec.ip4dst =
+				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
+		fs->m_u.tcp_ip4_spec.ip4dst =
+			rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
+
+		fs->h_u.tcp_ip4_spec.psrc = cpu_to_be16(rule->tuples.src_port);
+		fs->m_u.tcp_ip4_spec.psrc =
+				rule->unused_tuple & BIT(INNER_SRC_PORT) ?
+				0 : cpu_to_be16(rule->tuples_mask.src_port);
+
+		fs->h_u.tcp_ip4_spec.pdst = cpu_to_be16(rule->tuples.dst_port);
+		fs->m_u.tcp_ip4_spec.pdst =
+				rule->unused_tuple & BIT(INNER_DST_PORT) ?
+				0 : cpu_to_be16(rule->tuples_mask.dst_port);
+
+		fs->h_u.tcp_ip4_spec.tos = rule->tuples.ip_tos;
+		fs->m_u.tcp_ip4_spec.tos =
+				rule->unused_tuple & BIT(INNER_IP_TOS) ?
+				0 : rule->tuples_mask.ip_tos;
+
+		break;
+	case IP_USER_FLOW:
+		fs->h_u.usr_ip4_spec.ip4src =
+				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
+		fs->m_u.tcp_ip4_spec.ip4src =
+			rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
+
+		fs->h_u.usr_ip4_spec.ip4dst =
+				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
+		fs->m_u.usr_ip4_spec.ip4dst =
+			rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
+
+		fs->h_u.usr_ip4_spec.tos = rule->tuples.ip_tos;
+		fs->m_u.usr_ip4_spec.tos =
+				rule->unused_tuple & BIT(INNER_IP_TOS) ?
+				0 : rule->tuples_mask.ip_tos;
+
+		fs->h_u.usr_ip4_spec.proto = rule->tuples.ip_proto;
+		fs->m_u.usr_ip4_spec.proto =
+				rule->unused_tuple & BIT(INNER_IP_PROTO) ?
+				0 : rule->tuples_mask.ip_proto;
+
+		fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+
+		break;
+	case SCTP_V6_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6src,
+				  rule->tuples.src_ip, IPV6_SIZE);
+		if (rule->unused_tuple & BIT(INNER_SRC_IP))
+			memset(fs->m_u.tcp_ip6_spec.ip6src, 0,
+			       sizeof(int) * IPV6_SIZE);
+		else
+			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6src,
+					  rule->tuples_mask.src_ip, IPV6_SIZE);
+
+		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6dst,
+				  rule->tuples.dst_ip, IPV6_SIZE);
+		if (rule->unused_tuple & BIT(INNER_DST_IP))
+			memset(fs->m_u.tcp_ip6_spec.ip6dst, 0,
+			       sizeof(int) * IPV6_SIZE);
+		else
+			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6dst,
+					  rule->tuples_mask.dst_ip, IPV6_SIZE);
+
+		fs->h_u.tcp_ip6_spec.psrc = cpu_to_be16(rule->tuples.src_port);
+		fs->m_u.tcp_ip6_spec.psrc =
+				rule->unused_tuple & BIT(INNER_SRC_PORT) ?
+				0 : cpu_to_be16(rule->tuples_mask.src_port);
+
+		fs->h_u.tcp_ip6_spec.pdst = cpu_to_be16(rule->tuples.dst_port);
+		fs->m_u.tcp_ip6_spec.pdst =
+				rule->unused_tuple & BIT(INNER_DST_PORT) ?
+				0 : cpu_to_be16(rule->tuples_mask.dst_port);
+
+		break;
+	case IPV6_USER_FLOW:
+		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6src,
+				  rule->tuples.src_ip, IPV6_SIZE);
+		if (rule->unused_tuple & BIT(INNER_SRC_IP))
+			memset(fs->m_u.usr_ip6_spec.ip6src, 0,
+			       sizeof(int) * IPV6_SIZE);
+		else
+			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6src,
+					  rule->tuples_mask.src_ip, IPV6_SIZE);
+
+		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6dst,
+				  rule->tuples.dst_ip, IPV6_SIZE);
+		if (rule->unused_tuple & BIT(INNER_DST_IP))
+			memset(fs->m_u.usr_ip6_spec.ip6dst, 0,
+			       sizeof(int) * IPV6_SIZE);
+		else
+			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6dst,
+					  rule->tuples_mask.dst_ip, IPV6_SIZE);
+
+		fs->h_u.usr_ip6_spec.l4_proto = rule->tuples.ip_proto;
+		fs->m_u.usr_ip6_spec.l4_proto =
+				rule->unused_tuple & BIT(INNER_IP_PROTO) ?
+				0 : rule->tuples_mask.ip_proto;
+
+		break;
+	case ETHER_FLOW:
+		ether_addr_copy(fs->h_u.ether_spec.h_source,
+				rule->tuples.src_mac);
+		if (rule->unused_tuple & BIT(INNER_SRC_MAC))
+			eth_zero_addr(fs->m_u.ether_spec.h_source);
+		else
+			ether_addr_copy(fs->m_u.ether_spec.h_source,
+					rule->tuples_mask.src_mac);
+
+		ether_addr_copy(fs->h_u.ether_spec.h_dest,
+				rule->tuples.dst_mac);
+		if (rule->unused_tuple & BIT(INNER_DST_MAC))
+			eth_zero_addr(fs->m_u.ether_spec.h_dest);
+		else
+			ether_addr_copy(fs->m_u.ether_spec.h_dest,
+					rule->tuples_mask.dst_mac);
+
+		fs->h_u.ether_spec.h_proto =
+				cpu_to_be16(rule->tuples.ether_proto);
+		fs->m_u.ether_spec.h_proto =
+				rule->unused_tuple & BIT(INNER_ETH_TYPE) ?
+				0 : cpu_to_be16(rule->tuples_mask.ether_proto);
+
+		break;
+	default:
+		spin_unlock_bh(&hdev->fd_rule_lock);
+		return -EOPNOTSUPP;
+	}
+
+	if (fs->flow_type & FLOW_EXT) {
+		fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1);
+		fs->m_ext.vlan_tci =
+				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
+				cpu_to_be16(VLAN_VID_MASK) :
+				cpu_to_be16(rule->tuples_mask.vlan_tag1);
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		ether_addr_copy(fs->h_ext.h_dest, rule->tuples.dst_mac);
+		if (rule->unused_tuple & BIT(INNER_DST_MAC))
+			eth_zero_addr(fs->m_u.ether_spec.h_dest);
+		else
+			ether_addr_copy(fs->m_u.ether_spec.h_dest,
+					rule->tuples_mask.dst_mac);
+	}
+
+	if (rule->action == HCLGE_FD_ACTION_DROP_PACKET) {
+		fs->ring_cookie = RX_CLS_FLOW_DISC;
+	} else {
+		u64 vf_id;
+
+		fs->ring_cookie = rule->queue_id;
+		vf_id = rule->vf_id;
+		vf_id <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+		fs->ring_cookie |= vf_id;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return 0;
+}
+
+static int hclge_get_all_rules(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node2;
+	int cnt = 0;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	hlist_for_each_entry_safe(rule, node2,
+				  &hdev->fd_rule_list, rule_node) {
+		if (cnt == cmd->rule_cnt) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+			return -EMSGSIZE;
+		}
+
+		rule_locs[cnt] = rule->location;
+		cnt++;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
+				     struct hclge_fd_rule_tuples *tuples)
+{
+	tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto);
+	tuples->ip_proto = fkeys->basic.ip_proto;
+	tuples->dst_port = be16_to_cpu(fkeys->ports.dst);
+
+	if (fkeys->basic.n_proto == htons(ETH_P_IP)) {
+		tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src);
+		tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst);
+	} else {
+		memcpy(tuples->src_ip,
+		       fkeys->addrs.v6addrs.src.in6_u.u6_addr32,
+		       sizeof(tuples->src_ip));
+		memcpy(tuples->dst_ip,
+		       fkeys->addrs.v6addrs.dst.in6_u.u6_addr32,
+		       sizeof(tuples->dst_ip));
+	}
+}
+
+/* traverse all rules, check whether an existed rule has the same tuples */
+static struct hclge_fd_rule *
+hclge_fd_search_flow_keys(struct hclge_dev *hdev,
+			  const struct hclge_fd_rule_tuples *tuples)
+{
+	struct hclge_fd_rule *rule = NULL;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (!memcmp(tuples, &rule->tuples, sizeof(*tuples)))
+			return rule;
+	}
+
+	return NULL;
+}
+
+static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples,
+				     struct hclge_fd_rule *rule)
+{
+	rule->unused_tuple = BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			     BIT(INNER_VLAN_TAG_FST) | BIT(INNER_IP_TOS) |
+			     BIT(INNER_SRC_PORT);
+	rule->action = 0;
+	rule->vf_id = 0;
+	rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
+	if (tuples->ether_proto == ETH_P_IP) {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V4_FLOW;
+		else
+			rule->flow_type = UDP_V4_FLOW;
+	} else {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V6_FLOW;
+		else
+			rule->flow_type = UDP_V6_FLOW;
+	}
+	memcpy(&rule->tuples, tuples, sizeof(rule->tuples));
+	memset(&rule->tuples_mask, 0xFF, sizeof(rule->tuples_mask));
+}
+
+static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
+				      u16 flow_id, struct flow_keys *fkeys)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_fd_rule_tuples new_tuples;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	u16 tmp_queue_id;
+	u16 bit_id;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	memset(&new_tuples, 0, sizeof(new_tuples));
+	hclge_fd_get_flow_tuples(fkeys, &new_tuples);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	/* when there is already fd rule existed add by user,
+	 * arfs should not work
+	 */
+	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		return -EOPNOTSUPP;
+	}
+
+	/* check is there flow director filter existed for this flow,
+	 * if not, create a new filter for it;
+	 * if filter exist with different queue id, modify the filter;
+	 * if filter exist with same queue id, do nothing
+	 */
+	rule = hclge_fd_search_flow_keys(hdev, &new_tuples);
+	if (!rule) {
+		bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM);
+		if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOSPC;
+		}
+
+		rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
+		if (!rule) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOMEM;
+		}
+
+		set_bit(bit_id, hdev->fd_bmap);
+		rule->location = bit_id;
+		rule->flow_id = flow_id;
+		rule->queue_id = queue_id;
+		hclge_fd_build_arfs_rule(&new_tuples, rule);
+		ret = hclge_fd_config_rule(hdev, rule);
+
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		if (ret)
+			return ret;
+
+		return rule->location;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	if (rule->queue_id == queue_id)
+		return rule->location;
+
+	tmp_queue_id = rule->queue_id;
+	rule->queue_id = queue_id;
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret) {
+		rule->queue_id = tmp_queue_id;
+		return ret;
+	}
+
+	return rule->location;
+}
+
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	HLIST_HEAD(del_list);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+		return;
+	}
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (rps_may_expire_flow(handle->netdev, rule->queue_id,
+					rule->flow_id, rule->location)) {
+			hlist_del_init(&rule->rule_node);
+			hlist_add_head(&rule->rule_node, &del_list);
+			hdev->hclge_fd_rule_num--;
+			clear_bit(rule->location, hdev->fd_bmap);
+		}
+	}
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+				     rule->location, NULL, false);
+		kfree(rule);
+	}
+#endif
+}
+
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
+		hclge_del_all_fd_entries(handle, true);
+#endif
+}
+
+static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG) ||
+	       hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING);
+}
+
+static bool hclge_ae_dev_resetting(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+}
+
+static unsigned long hclge_ae_dev_reset_cnt(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hdev->rst_stats.hw_reset_done_cnt;
+}
+
+static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	bool clear;
+
+	hdev->fd_en = enable;
+	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
+	if (!enable)
+		hclge_del_all_fd_entries(handle, clear);
+	else
+		hclge_restore_fd_entries(handle);
 }
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
@@ -3625,20 +6231,20 @@
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+
+	if (enable) {
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, 1U);
+		hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, 1U);
+	}
+
 	req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3647,7 +6253,102 @@
 			"mac enable fail, ret =%d.\n", ret);
 }
 
-static int hclge_set_mac_loopback(struct hclge_dev *hdev, bool en)
+static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
+				     u8 switch_param, u8 param_mask)
+{
+	struct hclge_mac_vlan_switch_cmd *req;
+	struct hclge_desc desc;
+	u32 func_id;
+	int ret;
+
+	func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0);
+	req = (struct hclge_mac_vlan_switch_cmd *)desc.data;
+
+	/* read current config parameter */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM,
+				   true);
+	req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL;
+	req->func_id = cpu_to_le32(func_id);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"read mac vlan switch parameter fail, ret = %d\n", ret);
+		return ret;
+	}
+
+	/* modify and write new config parameter */
+	hclge_cmd_reuse_desc(&desc, false);
+	req->switch_param = (req->switch_param & param_mask) | switch_param;
+	req->param_mask = param_mask;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"set mac vlan switch parameter fail, ret = %d\n", ret);
+	return ret;
+}
+
+static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
+				       int link_ret)
+{
+#define HCLGE_PHY_LINK_STATUS_NUM  200
+
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int i = 0;
+	int ret;
+
+	do {
+		ret = phy_read_status(phydev);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"phy update link status fail, ret = %d\n", ret);
+			return;
+		}
+
+		if (phydev->link == link_ret)
+			break;
+
+		msleep(HCLGE_LINK_STATUS_MS);
+	} while (++i < HCLGE_PHY_LINK_STATUS_NUM);
+}
+
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+{
+#define HCLGE_MAC_LINK_STATUS_NUM  100
+
+	int i = 0;
+	int ret;
+
+	do {
+		ret = hclge_get_mac_link_status(hdev);
+		if (ret < 0)
+			return ret;
+		else if (ret == link_ret)
+			return 0;
+
+		msleep(HCLGE_LINK_STATUS_MS);
+	} while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+	return -EBUSY;
+}
+
+static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+					  bool is_phy)
+{
+#define HCLGE_LINK_STATUS_DOWN 0
+#define HCLGE_LINK_STATUS_UP   1
+
+	int link_ret;
+
+	link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
+
+	if (is_phy)
+		hclge_phy_link_status_wait(hdev, link_ret);
+
+	return hclge_mac_link_status_wait(hdev, link_ret);
+}
+
+static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
 {
 	struct hclge_config_mac_mode_cmd *req;
 	struct hclge_desc desc;
@@ -3667,6 +6368,8 @@
 	/* 2 Then setup the loopback flag */
 	loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
 	hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, en ? 1 : 0);
+	hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, en ? 1 : 0);
+	hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, en ? 1 : 0);
 
 	req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
@@ -3681,22 +6384,38 @@
 	return ret;
 }
 
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en)
+static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
+				     enum hnae3_loop loop_mode)
 {
 #define HCLGE_SERDES_RETRY_MS	10
 #define HCLGE_SERDES_RETRY_NUM	100
+
 	struct hclge_serdes_lb_cmd *req;
 	struct hclge_desc desc;
 	int ret, i = 0;
+	u8 loop_mode_b;
 
-	req = (struct hclge_serdes_lb_cmd *)&desc.data[0];
+	req = (struct hclge_serdes_lb_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false);
 
+	switch (loop_mode) {
+	case HNAE3_LOOP_SERIAL_SERDES:
+		loop_mode_b = HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
+		break;
+	case HNAE3_LOOP_PARALLEL_SERDES:
+		loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
+		break;
+	default:
+		dev_err(&hdev->pdev->dev,
+			"unsupported serdes loopback mode %d\n", loop_mode);
+		return -ENOTSUPP;
+	}
+
 	if (en) {
-		req->enable = HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
-		req->mask = HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
+		req->enable = loop_mode_b;
+		req->mask = loop_mode_b;
 	} else {
-		req->mask = HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
+		req->mask = loop_mode_b;
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3726,35 +6445,87 @@
 		dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n");
 		return -EIO;
 	}
-
-	return 0;
+	return ret;
 }
 
-static int hclge_set_loopback(struct hnae3_handle *handle,
-			      enum hnae3_loop loop_mode, bool en)
+static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
+				     enum hnae3_loop loop_mode)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	int ret;
 
-	switch (loop_mode) {
-	case HNAE3_MAC_INTER_LOOP_MAC:
-		ret = hclge_set_mac_loopback(hdev, en);
-		break;
-	case HNAE3_MAC_INTER_LOOP_SERDES:
-		ret = hclge_set_serdes_loopback(hdev, en);
-		break;
-	default:
-		ret = -ENOTSUPP;
+	ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode);
+	if (ret)
+		return ret;
+
+	hclge_cfg_mac_mode(hdev, en);
+
+	ret = hclge_mac_phy_link_status_wait(hdev, en, FALSE);
+	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"loop_mode %d is not supported\n", loop_mode);
-		break;
-	}
+			"serdes loopback config mac mode timeout\n");
 
 	return ret;
 }
 
-static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
+static int hclge_enable_phy_loopback(struct hclge_dev *hdev,
+				     struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->suspended) {
+		ret = phy_suspend(phydev);
+		if (ret)
+			return ret;
+	}
+
+	ret = phy_resume(phydev);
+	if (ret)
+		return ret;
+
+	return phy_loopback(phydev, true);
+}
+
+static int hclge_disable_phy_loopback(struct hclge_dev *hdev,
+				      struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_loopback(phydev, false);
+	if (ret)
+		return ret;
+
+	return phy_suspend(phydev);
+}
+
+static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en)
+{
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int ret;
+
+	if (!phydev)
+		return -ENOTSUPP;
+
+	if (en)
+		ret = hclge_enable_phy_loopback(hdev, phydev);
+	else
+		ret = hclge_disable_phy_loopback(hdev, phydev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"set phy loopback fail, ret = %d\n", ret);
+		return ret;
+	}
+
+	hclge_cfg_mac_mode(hdev, en);
+
+	ret = hclge_mac_phy_link_status_wait(hdev, en, TRUE);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"phy loopback config mac mode timeout\n");
+
+	return ret;
+}
+
+static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id,
 			    int stream_id, bool enable)
 {
 	struct hclge_desc desc;
@@ -3765,7 +6536,8 @@
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
 	req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK);
 	req->stream_id = cpu_to_le16(stream_id);
-	req->enable |= enable << HCLGE_TQP_ENABLE_B;
+	if (enable)
+		req->enable |= 1U << HCLGE_TQP_ENABLE_B;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
@@ -3774,33 +6546,116 @@
 	return ret;
 }
 
+static int hclge_set_loopback(struct hnae3_handle *handle,
+			      enum hnae3_loop loop_mode, bool en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_knic_private_info *kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int i, ret;
+
+	/* Loopback can be enabled in three places: SSU, MAC, and serdes. By
+	 * default, SSU loopback is enabled, so if the SMAC and the DMAC are
+	 * the same, the packets are looped back in the SSU. If SSU loopback
+	 * is disabled, packets can reach MAC even if SMAC is the same as DMAC.
+	 */
+	if (hdev->pdev->revision >= 0x21) {
+		u8 switch_param = en ? 0 : BIT(HCLGE_SWITCH_ALW_LPBK_B);
+
+		ret = hclge_config_switch_param(hdev, PF_VPORT_ID, switch_param,
+						HCLGE_SWITCH_ALW_LPBK_MASK);
+		if (ret)
+			return ret;
+	}
+
+	switch (loop_mode) {
+	case HNAE3_LOOP_APP:
+		ret = hclge_set_app_loopback(hdev, en);
+		break;
+	case HNAE3_LOOP_SERIAL_SERDES:
+	case HNAE3_LOOP_PARALLEL_SERDES:
+		ret = hclge_set_serdes_loopback(hdev, en, loop_mode);
+		break;
+	case HNAE3_LOOP_PHY:
+		ret = hclge_set_phy_loopback(hdev, en);
+		break;
+	default:
+		ret = -ENOTSUPP;
+		dev_err(&hdev->pdev->dev,
+			"loop_mode %d is not supported\n", loop_mode);
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	kinfo = &vport->nic.kinfo;
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		ret = hclge_tqp_enable(hdev, i, 0, en);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_default_loopback(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_set_app_loopback(hdev, false);
+	if (ret)
+		return ret;
+
+	ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
+	if (ret)
+		return ret;
+
+	return hclge_cfg_serdes_loopback(hdev, false,
+					 HNAE3_LOOP_PARALLEL_SERDES);
+}
+
 static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_knic_private_info *kinfo;
 	struct hnae3_queue *queue;
 	struct hclge_tqp *tqp;
 	int i;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
+	kinfo = &vport->nic.kinfo;
+	for (i = 0; i < kinfo->num_tqps; i++) {
 		queue = handle->kinfo.tqp[i];
 		tqp = container_of(queue, struct hclge_tqp, q);
 		memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
 	}
 }
 
+static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (enable) {
+		hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+	} else {
+		/* Set the DOWN flag here to disable the service to be
+		 * scheduled again
+		 */
+		set_bit(HCLGE_STATE_DOWN, &hdev->state);
+		cancel_delayed_work_sync(&hdev->service_task);
+		clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+	}
+}
+
 static int hclge_ae_start(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i;
-
-	for (i = 0; i < vport->alloc_tqps; i++)
-		hclge_tqp_enable(hdev, i, 0, true);
 
 	/* mac enable */
 	hclge_cfg_mac_mode(hdev, true);
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
-	mod_timer(&hdev->service_timer, jiffies + HZ);
 	hdev->hw.mac.link = 0;
 
 	/* reset tqp stats */
@@ -3819,17 +6674,22 @@
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
-	del_timer_sync(&hdev->service_timer);
-	cancel_work_sync(&hdev->service_task);
-	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+	hclge_clear_arfs_rules(handle);
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) {
+	/* If it is not PF reset, the firmware will disable the MAC,
+	 * so it only need to stop phy here.
+	 */
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) &&
+	    hdev->reset_type != HNAE3_FUNC_RESET) {
 		hclge_mac_stop_phy(hdev);
+		hclge_update_link_status(hdev);
 		return;
 	}
 
-	for (i = 0; i < vport->alloc_tqps; i++)
-		hclge_tqp_enable(hdev, i, 0, false);
+	for (i = 0; i < handle->kinfo.num_tqps; i++)
+		hclge_reset_tqp(handle, i);
+
+	hclge_config_mac_tnl_int(hdev, false);
 
 	/* Mac disable */
 	hclge_cfg_mac_mode(hdev, false);
@@ -3838,17 +6698,40 @@
 
 	/* reset tqp stats */
 	hclge_reset_tqp_stats(handle);
-	del_timer_sync(&hdev->service_timer);
-	cancel_work_sync(&hdev->service_task);
 	hclge_update_link_status(hdev);
 }
 
+int hclge_vport_start(struct hclge_vport *vport)
+{
+	set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+	vport->last_active_jiffies = jiffies;
+	return 0;
+}
+
+void hclge_vport_stop(struct hclge_vport *vport)
+{
+	clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+}
+
+static int hclge_client_start(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_vport_start(vport);
+}
+
+static void hclge_client_stop(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	hclge_vport_stop(vport);
+}
+
 static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 					 u16 cmdq_resp, u8  resp_code,
 					 enum hclge_mac_vlan_tbl_opcode op)
 {
 	struct hclge_dev *hdev = vport->back;
-	int return_status = -EIO;
 
 	if (cmdq_resp) {
 		dev_err(&hdev->pdev->dev,
@@ -3859,63 +6742,66 @@
 
 	if (op == HCLGE_MAC_VLAN_ADD) {
 		if ((!resp_code) || (resp_code == 1)) {
-			return_status = 0;
-		} else if (resp_code == 2) {
-			return_status = -ENOSPC;
+			return 0;
+		} else if (resp_code == HCLGE_ADD_UC_OVERFLOW) {
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for uc_overflow.\n");
-		} else if (resp_code == 3) {
-			return_status = -ENOSPC;
+			return -ENOSPC;
+		} else if (resp_code == HCLGE_ADD_MC_OVERFLOW) {
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for mc_overflow.\n");
-		} else {
-			dev_err(&hdev->pdev->dev,
-				"add mac addr failed for undefined, code=%d.\n",
-				resp_code);
+			return -ENOSPC;
 		}
+
+		dev_err(&hdev->pdev->dev,
+			"add mac addr failed for undefined, code=%u.\n",
+			resp_code);
+		return -EIO;
 	} else if (op == HCLGE_MAC_VLAN_REMOVE) {
 		if (!resp_code) {
-			return_status = 0;
+			return 0;
 		} else if (resp_code == 1) {
-			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"remove mac addr failed for miss.\n");
-		} else {
-			dev_err(&hdev->pdev->dev,
-				"remove mac addr failed for undefined, code=%d.\n",
-				resp_code);
+			return -ENOENT;
 		}
+
+		dev_err(&hdev->pdev->dev,
+			"remove mac addr failed for undefined, code=%u.\n",
+			resp_code);
+		return -EIO;
 	} else if (op == HCLGE_MAC_VLAN_LKUP) {
 		if (!resp_code) {
-			return_status = 0;
+			return 0;
 		} else if (resp_code == 1) {
-			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"lookup mac addr failed for miss.\n");
-		} else {
-			dev_err(&hdev->pdev->dev,
-				"lookup mac addr failed for undefined, code=%d.\n",
-				resp_code);
+			return -ENOENT;
 		}
-	} else {
-		return_status = -EINVAL;
+
 		dev_err(&hdev->pdev->dev,
-			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
-			op);
+			"lookup mac addr failed for undefined, code=%u.\n",
+			resp_code);
+		return -EIO;
 	}
 
-	return return_status;
+	dev_err(&hdev->pdev->dev,
+		"unknown opcode for get_mac_vlan_cmd_status, opcode=%d.\n", op);
+
+	return -EINVAL;
 }
 
 static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
 {
-	int word_num;
-	int bit_num;
+#define HCLGE_VF_NUM_IN_FIRST_DESC 192
+
+	unsigned int word_num;
+	unsigned int bit_num;
 
 	if (vfid > 255 || vfid < 0)
 		return -EIO;
 
-	if (vfid >= 0 && vfid <= 191) {
+	if (vfid >= 0 && vfid < HCLGE_VF_NUM_IN_FIRST_DESC) {
 		word_num = vfid / 32;
 		bit_num  = vfid % 32;
 		if (clr)
@@ -3923,7 +6809,7 @@
 		else
 			desc[1].data[word_num] |= cpu_to_le32(1 << bit_num);
 	} else {
-		word_num = (vfid - 192) / 32;
+		word_num = (vfid - HCLGE_VF_NUM_IN_FIRST_DESC) / 32;
 		bit_num  = vfid % 32;
 		if (clr)
 			desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num));
@@ -3949,185 +6835,23 @@
 }
 
 static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req,
-				   const u8 *addr)
+				   const u8 *addr, bool is_mc)
 {
 	const unsigned char *mac_addr = addr;
 	u32 high_val = mac_addr[2] << 16 | (mac_addr[3] << 24) |
 		       (mac_addr[0]) | (mac_addr[1] << 8);
 	u32 low_val  = mac_addr[4] | (mac_addr[5] << 8);
 
+	hnae3_set_bit(new_req->flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	if (is_mc) {
+		hnae3_set_bit(new_req->entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+		hnae3_set_bit(new_req->mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	}
+
 	new_req->mac_addr_hi32 = cpu_to_le32(high_val);
 	new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff);
 }
 
-static u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
-					   const u8 *addr)
-{
-	u16 high_val = addr[1] | (addr[0] << 8);
-	struct hclge_dev *hdev = vport->back;
-	u32 rsh = 4 - hdev->mta_mac_sel_type;
-	u16 ret_val = (high_val >> rsh) & 0xfff;
-
-	return ret_val;
-}
-
-static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
-				     enum hclge_mta_dmac_sel_type mta_mac_sel,
-				     bool enable)
-{
-	struct hclge_mta_filter_mode_cmd *req;
-	struct hclge_desc desc;
-	int ret;
-
-	req = (struct hclge_mta_filter_mode_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
-
-	hnae3_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
-		      enable);
-	hnae3_set_field(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_SEL_M,
-			HCLGE_CFG_MTA_MAC_SEL_S, mta_mac_sel);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		dev_err(&hdev->pdev->dev,
-			"Config mat filter mode failed for cmd_send, ret =%d.\n",
-			ret);
-
-	return ret;
-}
-
-int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
-			      u8 func_id,
-			      bool enable)
-{
-	struct hclge_cfg_func_mta_filter_cmd *req;
-	struct hclge_desc desc;
-	int ret;
-
-	req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
-
-	hnae3_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
-		      enable);
-	req->function_id = func_id;
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		dev_err(&hdev->pdev->dev,
-			"Config func_id enable failed for cmd_send, ret =%d.\n",
-			ret);
-
-	return ret;
-}
-
-static int hclge_set_mta_table_item(struct hclge_vport *vport,
-				    u16 idx,
-				    bool enable)
-{
-	struct hclge_dev *hdev = vport->back;
-	struct hclge_cfg_func_mta_item_cmd *req;
-	struct hclge_desc desc;
-	u16 item_idx = 0;
-	int ret;
-
-	req = (struct hclge_cfg_func_mta_item_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
-	hnae3_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
-
-	hnae3_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
-			HCLGE_CFG_MTA_ITEM_IDX_S, idx);
-	req->item_idx = cpu_to_le16(item_idx);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Config mta table item failed for cmd_send, ret =%d.\n",
-			ret);
-		return ret;
-	}
-
-	if (enable)
-		set_bit(idx, vport->mta_shadow);
-	else
-		clear_bit(idx, vport->mta_shadow);
-
-	return 0;
-}
-
-static int hclge_update_mta_status(struct hnae3_handle *handle)
-{
-	unsigned long mta_status[BITS_TO_LONGS(HCLGE_MTA_TBL_SIZE)];
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct net_device *netdev = handle->kinfo.netdev;
-	struct netdev_hw_addr *ha;
-	u16 tbl_idx;
-
-	memset(mta_status, 0, sizeof(mta_status));
-
-	/* update mta_status from mc addr list */
-	netdev_for_each_mc_addr(ha, netdev) {
-		tbl_idx = hclge_get_mac_addr_to_mta_index(vport, ha->addr);
-		set_bit(tbl_idx, mta_status);
-	}
-
-	return hclge_update_mta_status_common(vport, mta_status,
-					0, HCLGE_MTA_TBL_SIZE, true);
-}
-
-int hclge_update_mta_status_common(struct hclge_vport *vport,
-				   unsigned long *status,
-				   u16 idx,
-				   u16 count,
-				   bool update_filter)
-{
-	struct hclge_dev *hdev = vport->back;
-	u16 update_max = idx + count;
-	u16 check_max;
-	int ret = 0;
-	bool used;
-	u16 i;
-
-	/* setup mta check range */
-	if (update_filter) {
-		i = 0;
-		check_max = HCLGE_MTA_TBL_SIZE;
-	} else {
-		i = idx;
-		check_max = update_max;
-	}
-
-	used = false;
-	/* check and update all mta item */
-	for (; i < check_max; i++) {
-		/* ignore unused item */
-		if (!test_bit(i, vport->mta_shadow))
-			continue;
-
-		/* if i in update range then update it */
-		if (i >= idx && i < update_max)
-			if (!test_bit(i - idx, status))
-				hclge_set_mta_table_item(vport, i, false);
-
-		if (!used && test_bit(i, vport->mta_shadow))
-			used = true;
-	}
-
-	/* no longer use mta, disable it */
-	if (vport->accept_mta_mc && update_filter && !used) {
-		ret = hclge_cfg_func_mta_filter(hdev,
-						vport->vport_id,
-						false);
-		if (ret)
-			dev_err(&hdev->pdev->dev,
-				"disable func mta filter fail ret=%d\n",
-				ret);
-		else
-			vport->accept_mta_mc = false;
-	}
-
-	return ret;
-}
-
 static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
 				     struct hclge_mac_vlan_tbl_entry_cmd *req)
 {
@@ -4251,6 +6975,127 @@
 	return cfg_status;
 }
 
+static int hclge_init_umv_space(struct hclge_dev *hdev)
+{
+	u16 allocated_size = 0;
+	int ret;
+
+	ret = hclge_set_umv_space(hdev, hdev->wanted_umv_size, &allocated_size,
+				  true);
+	if (ret)
+		return ret;
+
+	if (allocated_size < hdev->wanted_umv_size)
+		dev_warn(&hdev->pdev->dev,
+			 "Alloc umv space failed, want %d, get %d\n",
+			 hdev->wanted_umv_size, allocated_size);
+
+	mutex_init(&hdev->umv_mutex);
+	hdev->max_umv_size = allocated_size;
+	/* divide max_umv_size by (hdev->num_req_vfs + 2), in order to
+	 * preserve some unicast mac vlan table entries shared by pf
+	 * and its vfs.
+	 */
+	hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_req_vfs + 2);
+	hdev->share_umv_size = hdev->priv_umv_size +
+			hdev->max_umv_size % (hdev->num_req_vfs + 2);
+
+	return 0;
+}
+
+static int hclge_uninit_umv_space(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if (hdev->max_umv_size > 0) {
+		ret = hclge_set_umv_space(hdev, hdev->max_umv_size, NULL,
+					  false);
+		if (ret)
+			return ret;
+		hdev->max_umv_size = 0;
+	}
+	mutex_destroy(&hdev->umv_mutex);
+
+	return 0;
+}
+
+static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
+			       u16 *allocated_size, bool is_alloc)
+{
+	struct hclge_umv_spc_alc_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_umv_spc_alc_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ALLOCATE, false);
+	if (!is_alloc)
+		hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, 1);
+
+	req->space_size = cpu_to_le32(space_size);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"%s umv space failed for cmd_send, ret =%d\n",
+			is_alloc ? "allocate" : "free", ret);
+		return ret;
+	}
+
+	if (is_alloc && allocated_size)
+		*allocated_size = le32_to_cpu(desc.data[1]);
+
+	return 0;
+}
+
+static void hclge_reset_umv_space(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport;
+	int i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		vport->used_umv_num = 0;
+	}
+
+	mutex_lock(&hdev->umv_mutex);
+	hdev->share_umv_size = hdev->priv_umv_size +
+			hdev->max_umv_size % (hdev->num_req_vfs + 2);
+	mutex_unlock(&hdev->umv_mutex);
+}
+
+static bool hclge_is_umv_space_full(struct hclge_vport *vport)
+{
+	struct hclge_dev *hdev = vport->back;
+	bool is_full;
+
+	mutex_lock(&hdev->umv_mutex);
+	is_full = (vport->used_umv_num >= hdev->priv_umv_size &&
+		   hdev->share_umv_size == 0);
+	mutex_unlock(&hdev->umv_mutex);
+
+	return is_full;
+}
+
+static void hclge_update_umv_space(struct hclge_vport *vport, bool is_free)
+{
+	struct hclge_dev *hdev = vport->back;
+
+	mutex_lock(&hdev->umv_mutex);
+	if (is_free) {
+		if (vport->used_umv_num > hdev->priv_umv_size)
+			hdev->share_umv_size++;
+
+		if (vport->used_umv_num > 0)
+			vport->used_umv_num--;
+	} else {
+		if (vport->used_umv_num >= hdev->priv_umv_size &&
+		    hdev->share_umv_size > 0)
+			hdev->share_umv_size--;
+		vport->used_umv_num++;
+	}
+	mutex_unlock(&hdev->umv_mutex);
+}
+
 static int hclge_add_uc_addr(struct hnae3_handle *handle,
 			     const unsigned char *addr)
 {
@@ -4274,34 +7119,46 @@
 	    is_multicast_ether_addr(addr)) {
 		dev_err(&hdev->pdev->dev,
 			"Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n",
-			 addr,
-			 is_zero_ether_addr(addr),
+			 addr, is_zero_ether_addr(addr),
 			 is_broadcast_ether_addr(addr),
 			 is_multicast_ether_addr(addr));
 		return -EINVAL;
 	}
 
 	memset(&req, 0, sizeof(req));
-	hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 
 	hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
 			HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
 
 	req.egress_port = cpu_to_le16(egress_port);
 
-	hclge_prepare_mac_addr(&req, addr);
+	hclge_prepare_mac_addr(&req, addr, false);
 
 	/* Lookup the mac address in the mac_vlan table, and add
 	 * it if the entry is inexistent. Repeated unicast entry
 	 * is not allowed in the mac vlan table.
 	 */
 	ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
-	if (ret == -ENOENT)
-		return hclge_add_mac_vlan_tbl(vport, &req, NULL);
+	if (ret == -ENOENT) {
+		if (!hclge_is_umv_space_full(vport)) {
+			ret = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+			if (!ret)
+				hclge_update_umv_space(vport, false);
+			return ret;
+		}
+
+		dev_err(&hdev->pdev->dev, "UC MAC table full(%u)\n",
+			hdev->priv_umv_size);
+
+		return -ENOSPC;
+	}
 
 	/* check if we just hit the duplicate */
-	if (!ret)
-		ret = -EINVAL;
+	if (!ret) {
+		dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n",
+			 vport->vport_id, addr);
+		return 0;
+	}
 
 	dev_err(&hdev->pdev->dev,
 		"PF failed to add unicast entry(%pM) in the MAC table\n",
@@ -4329,17 +7186,17 @@
 	if (is_zero_ether_addr(addr) ||
 	    is_broadcast_ether_addr(addr) ||
 	    is_multicast_ether_addr(addr)) {
-		dev_dbg(&hdev->pdev->dev,
-			"Remove mac err! invalid mac:%pM.\n",
-			 addr);
+		dev_dbg(&hdev->pdev->dev, "Remove mac err! invalid mac:%pM.\n",
+			addr);
 		return -EINVAL;
 	}
 
 	memset(&req, 0, sizeof(req));
-	hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hclge_prepare_mac_addr(&req, addr);
+	hclge_prepare_mac_addr(&req, addr, false);
 	ret = hclge_remove_mac_vlan_tbl(vport, &req);
+	if (!ret)
+		hclge_update_umv_space(vport, true);
 
 	return ret;
 }
@@ -4358,7 +7215,6 @@
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
 	struct hclge_desc desc[3];
-	u16 tbl_idx;
 	int status;
 
 	/* mac addr check */
@@ -4369,44 +7225,22 @@
 		return -EINVAL;
 	}
 	memset(&req, 0, sizeof(req));
-	hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
-	hnae3_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hclge_prepare_mac_addr(&req, addr);
+	hclge_prepare_mac_addr(&req, addr, true);
 	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
-	if (!status) {
-		/* This mac addr exist, update VFID for it */
-		hclge_update_desc_vfid(desc, vport->vport_id, false);
-		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
-	} else {
+	if (status) {
 		/* This mac addr do not exist, add new entry for it */
 		memset(desc[0].data, 0, sizeof(desc[0].data));
 		memset(desc[1].data, 0, sizeof(desc[0].data));
 		memset(desc[2].data, 0, sizeof(desc[0].data));
-		hclge_update_desc_vfid(desc, vport->vport_id, false);
-		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 	}
+	status = hclge_update_desc_vfid(desc, vport->vport_id, false);
+	if (status)
+		return status;
+	status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 
-	/* If mc mac vlan table is full, use MTA table */
-	if (status == -ENOSPC) {
-		if (!vport->accept_mta_mc) {
-			status = hclge_cfg_func_mta_filter(hdev,
-							   vport->vport_id,
-							   true);
-			if (status) {
-				dev_err(&hdev->pdev->dev,
-					"set mta filter mode fail ret=%d\n",
-					status);
-				return status;
-			}
-			vport->accept_mta_mc = true;
-		}
-
-		/* Set MTA table for this MAC address */
-		tbl_idx = hclge_get_mac_addr_to_mta_index(vport, addr);
-		status = hclge_set_mta_table_item(vport, tbl_idx, true);
-	}
+	if (status == -ENOSPC)
+		dev_err(&hdev->pdev->dev, "mc mac vlan table is full\n");
 
 	return status;
 }
@@ -4436,15 +7270,14 @@
 	}
 
 	memset(&req, 0, sizeof(req));
-	hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
-	hnae3_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hclge_prepare_mac_addr(&req, addr);
+	hclge_prepare_mac_addr(&req, addr, true);
 	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
 	if (!status) {
 		/* This mac addr exist, remove this handle's VFID for it */
-		hclge_update_desc_vfid(desc, vport->vport_id, true);
+		status = hclge_update_desc_vfid(desc, vport->vport_id, true);
+		if (status)
+			return status;
 
 		if (hclge_is_all_function_id_zero(desc))
 			/* All the vfid is zero, so need to delete this entry */
@@ -4466,6 +7299,103 @@
 	return status;
 }
 
+void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
+			       enum HCLGE_MAC_ADDR_TYPE mac_type)
+{
+	struct hclge_vport_mac_addr_cfg *mac_cfg;
+	struct list_head *list;
+
+	if (!vport->vport_id)
+		return;
+
+	mac_cfg = kzalloc(sizeof(*mac_cfg), GFP_KERNEL);
+	if (!mac_cfg)
+		return;
+
+	mac_cfg->hd_tbl_status = true;
+	memcpy(mac_cfg->mac_addr, mac_addr, ETH_ALEN);
+
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+	       &vport->uc_mac_list : &vport->mc_mac_list;
+
+	list_add_tail(&mac_cfg->node, list);
+}
+
+void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
+			      bool is_write_tbl,
+			      enum HCLGE_MAC_ADDR_TYPE mac_type)
+{
+	struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp;
+	struct list_head *list;
+	bool uc_flag, mc_flag;
+
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+	       &vport->uc_mac_list : &vport->mc_mac_list;
+
+	uc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_UC;
+	mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
+
+	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
+		if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) {
+			if (uc_flag && mac_cfg->hd_tbl_status)
+				hclge_rm_uc_addr_common(vport, mac_addr);
+
+			if (mc_flag && mac_cfg->hd_tbl_status)
+				hclge_rm_mc_addr_common(vport, mac_addr);
+
+			list_del(&mac_cfg->node);
+			kfree(mac_cfg);
+			break;
+		}
+	}
+}
+
+void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
+				  enum HCLGE_MAC_ADDR_TYPE mac_type)
+{
+	struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp;
+	struct list_head *list;
+
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+	       &vport->uc_mac_list : &vport->mc_mac_list;
+
+	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
+		if (mac_type == HCLGE_MAC_ADDR_UC && mac_cfg->hd_tbl_status)
+			hclge_rm_uc_addr_common(vport, mac_cfg->mac_addr);
+
+		if (mac_type == HCLGE_MAC_ADDR_MC && mac_cfg->hd_tbl_status)
+			hclge_rm_mc_addr_common(vport, mac_cfg->mac_addr);
+
+		mac_cfg->hd_tbl_status = false;
+		if (is_del_list) {
+			list_del(&mac_cfg->node);
+			kfree(mac_cfg);
+		}
+	}
+}
+
+void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
+{
+	struct hclge_vport_mac_addr_cfg *mac, *tmp;
+	struct hclge_vport *vport;
+	int i;
+
+	mutex_lock(&hdev->vport_cfg_mutex);
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) {
+			list_del(&mac->node);
+			kfree(mac);
+		}
+
+		list_for_each_entry_safe(mac, tmp, &vport->mc_mac_list, node) {
+			list_del(&mac->node);
+			kfree(mac);
+		}
+	}
+	mutex_unlock(&hdev->vport_cfg_mutex);
+}
+
 static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
 					      u16 cmdq_resp, u8 resp_code)
 {
@@ -4572,12 +7502,13 @@
 	    is_broadcast_ether_addr(new_addr) ||
 	    is_multicast_ether_addr(new_addr)) {
 		dev_err(&hdev->pdev->dev,
-			"Change uc mac err! invalid mac:%p.\n",
+			"Change uc mac err! invalid mac:%pM.\n",
 			 new_addr);
 		return -EINVAL;
 	}
 
-	if (!is_first && hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
+	if ((!is_first || is_kdump_kernel()) &&
+	    hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
 		dev_warn(&hdev->pdev->dev,
 			 "remove old uc mac address fail.\n");
 
@@ -4608,8 +7539,20 @@
 	return 0;
 }
 
+static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
+			  int cmd)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (!hdev->hw.mac.phydev)
+		return -EOPNOTSUPP;
+
+	return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
+}
+
 static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
-				      bool filter_en)
+				      u8 fe_type, bool filter_en, u8 vf_id)
 {
 	struct hclge_vlan_filter_ctrl_cmd *req;
 	struct hclge_desc desc;
@@ -4619,7 +7562,8 @@
 
 	req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
 	req->vlan_type = vlan_type;
-	req->vlan_fe = filter_en;
+	req->vlan_fe = filter_en ? fe_type : 0;
+	req->vf_id = vf_id;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
@@ -4631,17 +7575,39 @@
 
 #define HCLGE_FILTER_TYPE_VF		0
 #define HCLGE_FILTER_TYPE_PORT		1
+#define HCLGE_FILTER_FE_EGRESS_V1_B	BIT(0)
+#define HCLGE_FILTER_FE_NIC_INGRESS_B	BIT(0)
+#define HCLGE_FILTER_FE_NIC_EGRESS_B	BIT(1)
+#define HCLGE_FILTER_FE_ROCE_INGRESS_B	BIT(2)
+#define HCLGE_FILTER_FE_ROCE_EGRESS_B	BIT(3)
+#define HCLGE_FILTER_FE_EGRESS		(HCLGE_FILTER_FE_NIC_EGRESS_B \
+					| HCLGE_FILTER_FE_ROCE_EGRESS_B)
+#define HCLGE_FILTER_FE_INGRESS		(HCLGE_FILTER_FE_NIC_INGRESS_B \
+					| HCLGE_FILTER_FE_ROCE_INGRESS_B)
 
 static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, enable);
+	if (hdev->pdev->revision >= 0x21) {
+		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+					   HCLGE_FILTER_FE_EGRESS, enable, 0);
+		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
+					   HCLGE_FILTER_FE_INGRESS, enable, 0);
+	} else {
+		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+					   HCLGE_FILTER_FE_EGRESS_V1_B, enable,
+					   0);
+	}
+	if (enable)
+		handle->netdev_flags |= HNAE3_VLAN_FLTR;
+	else
+		handle->netdev_flags &= ~HNAE3_VLAN_FLTR;
 }
 
-static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
-				    bool is_kill, u16 vlan, u8 qos,
+static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
+				    bool is_kill, u16 vlan,
 				    __be16 proto)
 {
 #define HCLGE_MAX_VF_BYTES  16
@@ -4652,6 +7618,12 @@
 	u8 vf_byte_off;
 	int ret;
 
+	/* if vf vlan table is full, firmware will close vf vlan filter, it
+	 * is unable and unnecessary to add new vlan id to vf vlan filter
+	 */
+	if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill)
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc[0],
 				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
 	hclge_cmd_setup_basic_desc(&desc[1],
@@ -4687,6 +7659,7 @@
 			return 0;
 
 		if (req0->resp_code == HCLGE_VF_VLAN_NO_ENTRY) {
+			set_bit(vfid, hdev->vf_vlan_full);
 			dev_warn(&hdev->pdev->dev,
 				 "vf vlan table is full, vf vlan filter is disabled\n");
 			return 0;
@@ -4700,12 +7673,13 @@
 		if (!req0->resp_code)
 			return 0;
 
-		if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) {
-			dev_warn(&hdev->pdev->dev,
-				 "vlan %d filter is not in vf vlan table\n",
-				 vlan);
+		/* vf vlan filter is disabled when vf vlan table is full,
+		 * then new vlan id will not be added into vf vlan table.
+		 * Just return 0 without warning, avoid massive verbose
+		 * print logs when unload.
+		 */
+		if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND)
 			return 0;
-		}
 
 		dev_err(&hdev->pdev->dev,
 			"Kill vf vlan filter fail, ret =%d.\n",
@@ -4744,7 +7718,7 @@
 }
 
 static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
-				    u16 vport_id, u16 vlan_id, u8 qos,
+				    u16 vport_id, u16 vlan_id,
 				    bool is_kill)
 {
 	u16 vport_idx, vport_num = 0;
@@ -4754,7 +7728,7 @@
 		return 0;
 
 	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id,
-				       0, proto);
+				       proto);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Set %d vport vlan filter config fail, ret =%d.\n",
@@ -4782,7 +7756,7 @@
 		return -EINVAL;
 	}
 
-	for_each_set_bit(vport_idx, hdev->vlan_table[vlan_id], VLAN_N_VID)
+	for_each_set_bit(vport_idx, hdev->vlan_table[vlan_id], HCLGE_VPORT_NUM)
 		vport_num++;
 
 	if ((is_kill && vport_num == 0) || (!is_kill && vport_num == 1))
@@ -4792,36 +7766,13 @@
 	return ret;
 }
 
-int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
-			  u16 vlan_id, bool is_kill)
-{
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-
-	return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id,
-					0, is_kill);
-}
-
-static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
-				    u16 vlan, u8 qos, __be16 proto)
-{
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-
-	if ((vfid >= hdev->num_alloc_vfs) || (vlan > 4095) || (qos > 7))
-		return -EINVAL;
-	if (proto != htons(ETH_P_8021Q))
-		return -EPROTONOSUPPORT;
-
-	return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false);
-}
-
 static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
 {
 	struct hclge_tx_vtag_cfg *vcfg = &vport->txvlan_cfg;
 	struct hclge_vport_vtag_tx_cfg_cmd *req;
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
+	u16 bmap_index;
 	int status;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_TX_CFG, false);
@@ -4844,8 +7795,10 @@
 	hnae3_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0);
 
 	req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
-	req->vf_bitmap[req->vf_offset] =
-		1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+	bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD /
+			HCLGE_VF_NUM_PER_BYTE;
+	req->vf_bitmap[bmap_index] =
+		1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
 
 	status = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (status)
@@ -4862,6 +7815,7 @@
 	struct hclge_vport_vtag_rx_cfg_cmd *req;
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
+	u16 bmap_index;
 	int status;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, false);
@@ -4877,8 +7831,10 @@
 		      vcfg->vlan2_vlan_prionly ? 1 : 0);
 
 	req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
-	req->vf_bitmap[req->vf_offset] =
-		1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+	bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD /
+			HCLGE_VF_NUM_PER_BYTE;
+	req->vf_bitmap[bmap_index] =
+		1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
 
 	status = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (status)
@@ -4889,6 +7845,52 @@
 	return status;
 }
 
+static int hclge_vlan_offload_cfg(struct hclge_vport *vport,
+				  u16 port_base_vlan_state,
+				  u16 vlan_tag)
+{
+	int ret;
+
+	if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+		vport->txvlan_cfg.accept_tag1 = true;
+		vport->txvlan_cfg.insert_tag1_en = false;
+		vport->txvlan_cfg.default_tag1 = 0;
+	} else {
+		vport->txvlan_cfg.accept_tag1 = false;
+		vport->txvlan_cfg.insert_tag1_en = true;
+		vport->txvlan_cfg.default_tag1 = vlan_tag;
+	}
+
+	vport->txvlan_cfg.accept_untag1 = true;
+
+	/* accept_tag2 and accept_untag2 are not supported on
+	 * pdev revision(0x20), new revision support them,
+	 * this two fields can not be configured by user.
+	 */
+	vport->txvlan_cfg.accept_tag2 = true;
+	vport->txvlan_cfg.accept_untag2 = true;
+	vport->txvlan_cfg.insert_tag2_en = false;
+	vport->txvlan_cfg.default_tag2 = 0;
+
+	if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+		vport->rxvlan_cfg.strip_tag1_en = false;
+		vport->rxvlan_cfg.strip_tag2_en =
+				vport->rxvlan_cfg.rx_vlan_offload_en;
+	} else {
+		vport->rxvlan_cfg.strip_tag1_en =
+				vport->rxvlan_cfg.rx_vlan_offload_en;
+		vport->rxvlan_cfg.strip_tag2_en = true;
+	}
+	vport->rxvlan_cfg.vlan1_vlan_prionly = false;
+	vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+
+	ret = hclge_set_vlan_tx_offload_cfg(vport);
+	if (ret)
+		return ret;
+
+	return hclge_set_vlan_rx_offload_cfg(vport);
+}
+
 static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev)
 {
 	struct hclge_rx_vlan_type_cfg_cmd *rx_req;
@@ -4917,7 +7919,7 @@
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_INSERT, false);
 
-	tx_req = (struct hclge_tx_vlan_type_cfg_cmd *)&desc.data;
+	tx_req = (struct hclge_tx_vlan_type_cfg_cmd *)desc.data;
 	tx_req->ot_vlan_type = cpu_to_le16(hdev->vlan_type_cfg.tx_ot_vlan_type);
 	tx_req->in_vlan_type = cpu_to_le16(hdev->vlan_type_cfg.tx_in_vlan_type);
 
@@ -4934,18 +7936,38 @@
 {
 #define HCLGE_DEF_VLAN_TYPE		0x8100
 
-	struct hnae3_handle *handle;
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct hclge_vport *vport;
 	int ret;
 	int i;
 
-	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, true);
-	if (ret)
-		return ret;
+	if (hdev->pdev->revision >= 0x21) {
+		/* for revision 0x21, vf vlan filter is per function */
+		for (i = 0; i < hdev->num_alloc_vport; i++) {
+			vport = &hdev->vport[i];
+			ret = hclge_set_vlan_filter_ctrl(hdev,
+							 HCLGE_FILTER_TYPE_VF,
+							 HCLGE_FILTER_FE_EGRESS,
+							 true,
+							 vport->vport_id);
+			if (ret)
+				return ret;
+		}
 
-	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, true);
-	if (ret)
-		return ret;
+		ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
+						 HCLGE_FILTER_FE_INGRESS, true,
+						 0);
+		if (ret)
+			return ret;
+	} else {
+		ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+						 HCLGE_FILTER_FE_EGRESS_V1_B,
+						 true, 0);
+		if (ret)
+			return ret;
+	}
+
+	handle->netdev_flags |= HNAE3_VLAN_FLTR;
 
 	hdev->vlan_type_cfg.rx_in_fst_vlan_type = HCLGE_DEF_VLAN_TYPE;
 	hdev->vlan_type_cfg.rx_in_sec_vlan_type = HCLGE_DEF_VLAN_TYPE;
@@ -4959,102 +7981,473 @@
 		return ret;
 
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		u16 vlan_tag;
+
 		vport = &hdev->vport[i];
-		vport->txvlan_cfg.accept_tag1 = true;
-		vport->txvlan_cfg.accept_untag1 = true;
+		vlan_tag = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
 
-		/* accept_tag2 and accept_untag2 are not supported on
-		 * pdev revision(0x20), new revision support them. The
-		 * value of this two fields will not return error when driver
-		 * send command to fireware in revision(0x20).
-		 * This two fields can not configured by user.
-		 */
-		vport->txvlan_cfg.accept_tag2 = true;
-		vport->txvlan_cfg.accept_untag2 = true;
-
-		vport->txvlan_cfg.insert_tag1_en = false;
-		vport->txvlan_cfg.insert_tag2_en = false;
-		vport->txvlan_cfg.default_tag1 = 0;
-		vport->txvlan_cfg.default_tag2 = 0;
-
-		ret = hclge_set_vlan_tx_offload_cfg(vport);
-		if (ret)
-			return ret;
-
-		vport->rxvlan_cfg.strip_tag1_en = false;
-		vport->rxvlan_cfg.strip_tag2_en = true;
-		vport->rxvlan_cfg.vlan1_vlan_prionly = false;
-		vport->rxvlan_cfg.vlan2_vlan_prionly = false;
-
-		ret = hclge_set_vlan_rx_offload_cfg(vport);
+		ret = hclge_vlan_offload_cfg(vport,
+					     vport->port_base_vlan_cfg.state,
+					     vlan_tag);
 		if (ret)
 			return ret;
 	}
 
-	handle = &hdev->vport[0].nic;
 	return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
 
+static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+				       bool writen_to_tbl)
+{
+	struct hclge_vport_vlan_cfg *vlan;
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return;
+
+	vlan->hd_tbl_status = writen_to_tbl;
+	vlan->vlan_id = vlan_id;
+
+	list_add_tail(&vlan->node, &vport->vlan_list);
+}
+
+static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+{
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+		if (!vlan->hd_tbl_status) {
+			ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+						       vport->vport_id,
+						       vlan->vlan_id, false);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"restore vport vlan list failed, ret=%d\n",
+					ret);
+				return ret;
+			}
+		}
+		vlan->hd_tbl_status = true;
+	}
+
+	return 0;
+}
+
+static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+				      bool is_write_tbl)
+{
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_dev *hdev = vport->back;
+
+	list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+		if (vlan->vlan_id == vlan_id) {
+			if (is_write_tbl && vlan->hd_tbl_status)
+				hclge_set_vlan_filter_hw(hdev,
+							 htons(ETH_P_8021Q),
+							 vport->vport_id,
+							 vlan_id,
+							 true);
+
+			list_del(&vlan->node);
+			kfree(vlan);
+			break;
+		}
+	}
+}
+
+void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
+{
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_dev *hdev = vport->back;
+
+	list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+		if (vlan->hd_tbl_status)
+			hclge_set_vlan_filter_hw(hdev,
+						 htons(ETH_P_8021Q),
+						 vport->vport_id,
+						 vlan->vlan_id,
+						 true);
+
+		vlan->hd_tbl_status = false;
+		if (is_del_list) {
+			list_del(&vlan->node);
+			kfree(vlan);
+		}
+	}
+}
+
+void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
+{
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_vport *vport;
+	int i;
+
+	mutex_lock(&hdev->vport_cfg_mutex);
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+			list_del(&vlan->node);
+			kfree(vlan);
+		}
+	}
+	mutex_unlock(&hdev->vport_cfg_mutex);
+}
+
+static void hclge_restore_vlan_table(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
+	struct hclge_dev *hdev = vport->back;
+	u16 vlan_proto;
+	u16 state, vlan_id;
+	int i;
+
+	mutex_lock(&hdev->vport_cfg_mutex);
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
+		vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+		state = vport->port_base_vlan_cfg.state;
+
+		if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
+			hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
+						 vport->vport_id, vlan_id,
+						 false);
+			continue;
+		}
+
+		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+			if (vlan->hd_tbl_status)
+				hclge_set_vlan_filter_hw(hdev,
+							 htons(ETH_P_8021Q),
+							 vport->vport_id,
+							 vlan->vlan_id,
+							 false);
+		}
+	}
+
+	mutex_unlock(&hdev->vport_cfg_mutex);
+}
+
 int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 
-	vport->rxvlan_cfg.strip_tag1_en = false;
-	vport->rxvlan_cfg.strip_tag2_en = enable;
+	if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+		vport->rxvlan_cfg.strip_tag1_en = false;
+		vport->rxvlan_cfg.strip_tag2_en = enable;
+	} else {
+		vport->rxvlan_cfg.strip_tag1_en = enable;
+		vport->rxvlan_cfg.strip_tag2_en = true;
+	}
 	vport->rxvlan_cfg.vlan1_vlan_prionly = false;
 	vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+	vport->rxvlan_cfg.rx_vlan_offload_en = enable;
 
 	return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
-static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
+static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
+					    u16 port_base_vlan_state,
+					    struct hclge_vlan_info *new_info,
+					    struct hclge_vlan_info *old_info)
+{
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_ENABLE) {
+		hclge_rm_vport_all_vlan_table(vport, false);
+		return hclge_set_vlan_filter_hw(hdev,
+						 htons(new_info->vlan_proto),
+						 vport->vport_id,
+						 new_info->vlan_tag,
+						 false);
+	}
+
+	ret = hclge_set_vlan_filter_hw(hdev, htons(old_info->vlan_proto),
+				       vport->vport_id, old_info->vlan_tag,
+				       true);
+	if (ret)
+		return ret;
+
+	return hclge_add_vport_all_vlan_table(vport);
+}
+
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+				    struct hclge_vlan_info *vlan_info)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hclge_vlan_info *old_vlan_info;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	old_vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+
+	ret = hclge_vlan_offload_cfg(vport, state, vlan_info->vlan_tag);
+	if (ret)
+		return ret;
+
+	if (state == HNAE3_PORT_BASE_VLAN_MODIFY) {
+		/* add new VLAN tag */
+		ret = hclge_set_vlan_filter_hw(hdev,
+					       htons(vlan_info->vlan_proto),
+					       vport->vport_id,
+					       vlan_info->vlan_tag,
+					       false);
+		if (ret)
+			return ret;
+
+		/* remove old VLAN tag */
+		ret = hclge_set_vlan_filter_hw(hdev,
+					       htons(old_vlan_info->vlan_proto),
+					       vport->vport_id,
+					       old_vlan_info->vlan_tag,
+					       true);
+		if (ret)
+			return ret;
+
+		goto update;
+	}
+
+	ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
+					       old_vlan_info);
+	if (ret)
+		return ret;
+
+	/* update state only when disable/enable port based VLAN */
+	vport->port_base_vlan_cfg.state = state;
+	if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+	else
+		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+update:
+	vport->port_base_vlan_cfg.vlan_info.vlan_tag = vlan_info->vlan_tag;
+	vport->port_base_vlan_cfg.vlan_info.qos = vlan_info->qos;
+	vport->port_base_vlan_cfg.vlan_info.vlan_proto = vlan_info->vlan_proto;
+
+	return 0;
+}
+
+static u16 hclge_get_port_base_vlan_state(struct hclge_vport *vport,
+					  enum hnae3_port_base_vlan_state state,
+					  u16 vlan)
+{
+	if (state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+		if (!vlan)
+			return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+		else
+			return HNAE3_PORT_BASE_VLAN_ENABLE;
+	} else {
+		if (!vlan)
+			return HNAE3_PORT_BASE_VLAN_DISABLE;
+		else if (vport->port_base_vlan_cfg.vlan_info.vlan_tag == vlan)
+			return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+		else
+			return HNAE3_PORT_BASE_VLAN_MODIFY;
+	}
+}
+
+static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+				    u16 vlan, u8 qos, __be16 proto)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_vlan_info vlan_info;
+	u16 state;
+	int ret;
+
+	if (hdev->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	/* qos is a 3 bits value, so can not be bigger than 7 */
+	if (vfid >= hdev->num_alloc_vfs || vlan > VLAN_N_VID - 1 || qos > 7)
+		return -EINVAL;
+	if (proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	vport = &hdev->vport[vfid];
+	state = hclge_get_port_base_vlan_state(vport,
+					       vport->port_base_vlan_cfg.state,
+					       vlan);
+	if (state == HNAE3_PORT_BASE_VLAN_NOCHANGE)
+		return 0;
+
+	vlan_info.vlan_tag = vlan;
+	vlan_info.qos = qos;
+	vlan_info.vlan_proto = ntohs(proto);
+
+	/* update port based VLAN for PF */
+	if (!vfid) {
+		hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+		ret = hclge_update_port_base_vlan_cfg(vport, state, &vlan_info);
+		hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+
+		return ret;
+	}
+
+	if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
+		return hclge_update_port_base_vlan_cfg(vport, state,
+						       &vlan_info);
+	} else {
+		ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+							(u8)vfid, state,
+							vlan, qos,
+							ntohs(proto));
+		return ret;
+	}
+}
+
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+			  u16 vlan_id, bool is_kill)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	bool writen_to_tbl = false;
+	int ret = 0;
+
+	/* When device is resetting, firmware is unable to handle
+	 * mailbox. Just record the vlan id, and remove it after
+	 * reset finished.
+	 */
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+		set_bit(vlan_id, vport->vlan_del_fail_bmap);
+		return -EBUSY;
+	}
+
+	/* when port base vlan enabled, we use port base vlan as the vlan
+	 * filter entry. In this case, we don't update vlan filter table
+	 * when user add new vlan or remove exist vlan, just update the vport
+	 * vlan list. The vlan id in vlan list will be writen in vlan filter
+	 * table until port base vlan disabled
+	 */
+	if (handle->port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+		ret = hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id,
+					       vlan_id, is_kill);
+		writen_to_tbl = true;
+	}
+
+	if (!ret) {
+		if (is_kill)
+			hclge_rm_vport_vlan_table(vport, vlan_id, false);
+		else
+			hclge_add_vport_vlan_table(vport, vlan_id,
+						   writen_to_tbl);
+	} else if (is_kill) {
+		/* when remove hw vlan filter failed, record the vlan id,
+		 * and try to remove it from hw later, to be consistence
+		 * with stack
+		 */
+		set_bit(vlan_id, vport->vlan_del_fail_bmap);
+	}
+	return ret;
+}
+
+static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
+{
+#define HCLGE_MAX_SYNC_COUNT	60
+
+	int i, ret, sync_cnt = 0;
+	u16 vlan_id;
+
+	/* start from vport 1 for PF is always alive */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+
+		vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
+					 VLAN_N_VID);
+		while (vlan_id != VLAN_N_VID) {
+			ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+						       vport->vport_id, vlan_id,
+						       true);
+			if (ret && ret != -EINVAL)
+				return;
+
+			clear_bit(vlan_id, vport->vlan_del_fail_bmap);
+			hclge_rm_vport_vlan_table(vport, vlan_id, false);
+
+			sync_cnt++;
+			if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
+				return;
+
+			vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
+						 VLAN_N_VID);
+		}
+	}
+}
+
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps)
 {
 	struct hclge_config_max_frm_size_cmd *req;
 	struct hclge_desc desc;
-	int max_frm_size;
-	int ret;
-
-	max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-
-	if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
-	    max_frm_size > HCLGE_MAC_MAX_FRAME)
-		return -EINVAL;
-
-	max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME);
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false);
 
 	req = (struct hclge_config_max_frm_size_cmd *)desc.data;
-	req->max_frm_size = cpu_to_le16(max_frm_size);
+	req->max_frm_size = cpu_to_le16(new_mps);
 	req->min_frm_size = HCLGE_MAC_MIN_FRAME;
 
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		dev_err(&hdev->pdev->dev, "set mtu fail, ret =%d.\n", ret);
-	else
-		hdev->mps = max_frm_size;
-
-	return ret;
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	int ret;
 
-	ret = hclge_set_mac_mtu(hdev, new_mtu);
+	return hclge_set_vport_mtu(vport, new_mtu);
+}
+
+int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu)
+{
+	struct hclge_dev *hdev = vport->back;
+	int i, max_frm_size, ret;
+
+	max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
+	if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
+	    max_frm_size > HCLGE_MAC_MAX_FRAME)
+		return -EINVAL;
+
+	max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME);
+	mutex_lock(&hdev->vport_lock);
+	/* VF's mps must fit within hdev->mps */
+	if (vport->vport_id && max_frm_size > hdev->mps) {
+		mutex_unlock(&hdev->vport_lock);
+		return -EINVAL;
+	} else if (vport->vport_id) {
+		vport->mps = max_frm_size;
+		mutex_unlock(&hdev->vport_lock);
+		return 0;
+	}
+
+	/* PF's mps must be greater then VF's mps */
+	for (i = 1; i < hdev->num_alloc_vport; i++)
+		if (max_frm_size < hdev->vport[i].mps) {
+			mutex_unlock(&hdev->vport_lock);
+			return -EINVAL;
+		}
+
+	hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+	ret = hclge_set_mac_mtu(hdev, max_frm_size);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Change mtu fail, ret =%d\n", ret);
-		return ret;
+		goto out;
 	}
 
+	hdev->mps = max_frm_size;
+	vport->mps = max_frm_size;
+
 	ret = hclge_buffer_alloc(hdev);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"Allocate buffer fail, ret =%d\n", ret);
 
+out:
+	hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+	mutex_unlock(&hdev->vport_lock);
 	return ret;
 }
 
@@ -5069,7 +8462,8 @@
 
 	req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
-	hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+	if (enable)
+		hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, 1U);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -5102,8 +8496,7 @@
 	return hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
-static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
-					  u16 queue_id)
+u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hnae3_queue *queue;
 	struct hclge_tqp *tqp;
@@ -5114,7 +8507,7 @@
 	return tqp->index;
 }
 
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -5123,44 +8516,41 @@
 	u16 queue_gid;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
-		return;
-
 	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 	if (ret) {
-		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+		return ret;
 	}
 
 	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "Send reset tqp cmd fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev,
+			"Send reset tqp cmd fail, ret = %d\n", ret);
+		return ret;
 	}
 
-	reset_try_times = 0;
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
-		/* Wait for tqp hw reset */
-		msleep(20);
 		reset_status = hclge_get_reset_status(hdev, queue_gid);
 		if (reset_status)
 			break;
+
+		/* Wait for tqp hw reset */
+		usleep_range(1000, 1200);
 	}
 
 	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
-		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
-		return;
+		dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
+		return ret;
 	}
 
 	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
-	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "Deassert the soft reset fail, ret = %d\n", ret);
-		return;
-	}
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Deassert the soft reset fail, ret = %d\n", ret);
+
+	return ret;
 }
 
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
@@ -5180,13 +8570,13 @@
 		return;
 	}
 
-	reset_try_times = 0;
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
-		/* Wait for tqp hw reset */
-		msleep(20);
 		reset_status = hclge_get_reset_status(hdev, queue_gid);
 		if (reset_status)
 			break;
+
+		/* Wait for tqp hw reset */
+		usleep_range(1000, 1200);
 	}
 
 	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
@@ -5208,20 +8598,6 @@
 	return hdev->fw_version;
 }
 
-static void hclge_get_flowctrl_adv(struct hnae3_handle *handle,
-				   u32 *flowctrl_adv)
-{
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	struct phy_device *phydev = hdev->hw.mac.phydev;
-
-	if (!phydev)
-		return;
-
-	*flowctrl_adv |= (phydev->advertising & ADVERTISED_Pause) |
-			 (phydev->advertising & ADVERTISED_Asym_Pause);
-}
-
 static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
@@ -5229,59 +8605,36 @@
 	if (!phydev)
 		return;
 
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
-	if (rx_en)
-		phydev->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-
-	if (tx_en)
-		phydev->advertising ^= ADVERTISED_Asym_Pause;
+	phy_set_asym_pause(phydev, rx_en, tx_en);
 }
 
 static int hclge_cfg_pauseparam(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 {
 	int ret;
 
-	if (rx_en && tx_en)
-		hdev->fc_mode_last_time = HCLGE_FC_FULL;
-	else if (rx_en && !tx_en)
-		hdev->fc_mode_last_time = HCLGE_FC_RX_PAUSE;
-	else if (!rx_en && tx_en)
-		hdev->fc_mode_last_time = HCLGE_FC_TX_PAUSE;
-	else
-		hdev->fc_mode_last_time = HCLGE_FC_NONE;
-
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC)
 		return 0;
 
 	ret = hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "configure pauseparam error, ret = %d.\n",
-			ret);
-		return ret;
-	}
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"configure pauseparam error, ret = %d.\n", ret);
 
-	hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
-
-	return 0;
+	return ret;
 }
 
 int hclge_cfg_flowctrl(struct hclge_dev *hdev)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u16 remote_advertising = 0;
-	u16 local_advertising = 0;
+	u16 local_advertising;
 	u32 rx_pause, tx_pause;
 	u8 flowctl;
 
 	if (!phydev->link || !phydev->autoneg)
 		return 0;
 
-	if (phydev->advertising & ADVERTISED_Pause)
-		local_advertising = ADVERTISE_PAUSE_CAP;
-
-	if (phydev->advertising & ADVERTISED_Asym_Pause)
-		local_advertising |= ADVERTISE_PAUSE_ASYM;
+	local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
 
 	if (phydev->pause)
 		remote_advertising = LPA_PAUSE_CAP;
@@ -5307,8 +8660,9 @@
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
 
-	*auto_neg = hclge_get_autoneg(handle);
+	*auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
 		*rx_en = 0;
@@ -5331,6 +8685,21 @@
 	}
 }
 
+static void hclge_record_user_pauseparam(struct hclge_dev *hdev,
+					 u32 rx_en, u32 tx_en)
+{
+	if (rx_en && tx_en)
+		hdev->fc_mode_last_time = HCLGE_FC_FULL;
+	else if (rx_en && !tx_en)
+		hdev->fc_mode_last_time = HCLGE_FC_RX_PAUSE;
+	else if (!rx_en && tx_en)
+		hdev->fc_mode_last_time = HCLGE_FC_TX_PAUSE;
+	else
+		hdev->fc_mode_last_time = HCLGE_FC_NONE;
+
+	hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
+}
+
 static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 				u32 rx_en, u32 tx_en)
 {
@@ -5339,11 +8708,13 @@
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u32 fc_autoneg;
 
-	fc_autoneg = hclge_get_autoneg(handle);
-	if (auto_neg != fc_autoneg) {
-		dev_info(&hdev->pdev->dev,
-			 "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
-		return -EOPNOTSUPP;
+	if (phydev) {
+		fc_autoneg = hclge_get_autoneg(handle);
+		if (auto_neg != fc_autoneg) {
+			dev_info(&hdev->pdev->dev,
+				 "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+			return -EOPNOTSUPP;
+		}
 	}
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
@@ -5354,16 +8725,15 @@
 
 	hclge_set_flowctrl_adv(hdev, rx_en, tx_en);
 
-	if (!fc_autoneg)
+	hclge_record_user_pauseparam(hdev, rx_en, tx_en);
+
+	if (!auto_neg)
 		return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
 
-	/* Only support flow control negotiation for netdev with
-	 * phy attached for now.
-	 */
-	if (!phydev)
-		return -EOPNOTSUPP;
+	if (phydev)
+		return phy_start_aneg(phydev);
 
-	return phy_start_aneg(phydev);
+	return -EOPNOTSUPP;
 }
 
 static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
@@ -5380,13 +8750,17 @@
 		*auto_neg = hdev->hw.mac.autoneg;
 }
 
-static void hclge_get_media_type(struct hnae3_handle *handle, u8 *media_type)
+static void hclge_get_media_type(struct hnae3_handle *handle, u8 *media_type,
+				 u8 *module_type)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
 	if (media_type)
 		*media_type = hdev->hw.mac.media_type;
+
+	if (module_type)
+		*module_type = hdev->hw.mac.module_type;
 }
 
 static void hclge_get_mdix_mode(struct hnae3_handle *handle,
@@ -5395,7 +8769,8 @@
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	struct phy_device *phydev = hdev->hw.mac.phydev;
-	int mdix_ctrl, mdix, retval, is_resolved;
+	int mdix_ctrl, mdix, is_resolved;
+	unsigned int retval;
 
 	if (!phydev) {
 		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
@@ -5438,14 +8813,126 @@
 		*tp_mdix = ETH_TP_MDI;
 }
 
-static int hclge_init_instance_hw(struct hclge_dev *hdev)
+static void hclge_info_show(struct hclge_dev *hdev)
 {
-	return hclge_mac_connect_phy(hdev);
+	struct device *dev = &hdev->pdev->dev;
+
+	dev_info(dev, "PF info begin:\n");
+
+	dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
+	dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
+	dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
+	dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
+	dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport);
+	dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs);
+	dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
+	dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size);
+	dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size);
+	dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size);
+	dev_info(dev, "This is %s PF\n",
+		 hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
+	dev_info(dev, "DCB %s\n",
+		 hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
+	dev_info(dev, "MQPRIO %s\n",
+		 hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+
+	dev_info(dev, "PF info end.\n");
 }
 
-static void hclge_uninit_instance_hw(struct hclge_dev *hdev)
+static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					  struct hclge_vport *vport)
 {
-	hclge_mac_disconnect_phy(hdev);
+	struct hnae3_client *client = vport->nic.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int rst_cnt;
+	int ret;
+
+	rst_cnt = hdev->rst_stats.reset_cnt;
+	ret = client->ops->init_instance(&vport->nic);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    rst_cnt != hdev->rst_stats.reset_cnt) {
+		ret = -EBUSY;
+		goto init_nic_err;
+	}
+
+	/* Enable nic hw error interrupts */
+	ret = hclge_config_nic_hw_error(hdev, true);
+	if (ret) {
+		dev_err(&ae_dev->pdev->dev,
+			"fail(%d) to enable hw error interrupts\n", ret);
+		goto init_nic_err;
+	}
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	if (netif_msg_drv(&hdev->vport->nic))
+		hclge_info_show(hdev);
+
+	return ret;
+
+init_nic_err:
+	clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+	while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		msleep(HCLGE_WAIT_RESET_DONE);
+
+	client->ops->uninit_instance(&vport->nic, 0);
+
+	return ret;
+}
+
+static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					   struct hclge_vport *vport)
+{
+	struct hnae3_client *client = vport->roce.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int rst_cnt;
+	int ret;
+
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
+
+	client = hdev->roce_client;
+	ret = hclge_init_roce_base_info(vport);
+	if (ret)
+		return ret;
+
+	rst_cnt = hdev->rst_stats.reset_cnt;
+	ret = client->ops->init_instance(&vport->roce);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    rst_cnt != hdev->rst_stats.reset_cnt) {
+		ret = -EBUSY;
+		goto init_roce_err;
+	}
+
+	/* Enable roce ras interrupts */
+	ret = hclge_config_rocee_ras_interrupt(hdev, true);
+	if (ret) {
+		dev_err(&ae_dev->pdev->dev,
+			"fail(%d) to enable roce ras interrupts\n", ret);
+		goto init_roce_err;
+	}
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+
+init_roce_err:
+	clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+	while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		msleep(HCLGE_WAIT_RESET_DONE);
+
+	hdev->roce_client->ops->uninit_instance(&vport->roce, 0);
+
+	return ret;
 }
 
 static int hclge_init_client_instance(struct hnae3_client *client,
@@ -5463,38 +8950,13 @@
 
 			hdev->nic_client = client;
 			vport->nic.client = client;
-			ret = client->ops->init_instance(&vport->nic);
+			ret = hclge_init_nic_client_instance(ae_dev, vport);
 			if (ret)
-				return ret;
+				goto clear_nic;
 
-			ret = hclge_init_instance_hw(hdev);
-			if (ret) {
-			        client->ops->uninit_instance(&vport->nic,
-			                                     0);
-			        return ret;
-			}
-
-			if (hdev->roce_client &&
-			    hnae3_dev_roce_supported(hdev)) {
-				struct hnae3_client *rc = hdev->roce_client;
-
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					return ret;
-
-				ret = rc->ops->init_instance(&vport->roce);
-				if (ret)
-					return ret;
-			}
-
-			break;
-		case HNAE3_CLIENT_UNIC:
-			hdev->nic_client = client;
-			vport->nic.client = client;
-
-			ret = client->ops->init_instance(&vport->nic);
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
 			if (ret)
-				return ret;
+				goto clear_roce;
 
 			break;
 		case HNAE3_CLIENT_ROCE:
@@ -5503,19 +8965,26 @@
 				vport->roce.client = client;
 			}
 
-			if (hdev->roce_client && hdev->nic_client) {
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					return ret;
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
+			if (ret)
+				goto clear_roce;
 
-				ret = client->ops->init_instance(&vport->roce);
-				if (ret)
-					return ret;
-			}
+			break;
+		default:
+			return -EINVAL;
 		}
 	}
 
 	return 0;
+
+clear_nic:
+	hdev->nic_client = NULL;
+	vport->nic.client = NULL;
+	return ret;
+clear_roce:
+	hdev->roce_client = NULL;
+	vport->roce.client = NULL;
+	return ret;
 }
 
 static void hclge_uninit_client_instance(struct hnae3_client *client,
@@ -5528,6 +8997,10 @@
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport = &hdev->vport[i];
 		if (hdev->roce_client) {
+			clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+				msleep(HCLGE_WAIT_RESET_DONE);
+
 			hdev->roce_client->ops->uninit_instance(&vport->roce,
 								0);
 			hdev->roce_client = NULL;
@@ -5535,8 +9008,11 @@
 		}
 		if (client->type == HNAE3_CLIENT_ROCE)
 			return;
-		if (client->ops->uninit_instance) {
-			hclge_uninit_instance_hw(hdev);
+		if (hdev->nic_client && client->ops->uninit_instance) {
+			clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+				msleep(HCLGE_WAIT_RESET_DONE);
+
 			client->ops->uninit_instance(&vport->nic, 0);
 			hdev->nic_client = NULL;
 			vport->nic.client = NULL;
@@ -5618,17 +9094,63 @@
 static void hclge_state_uninit(struct hclge_dev *hdev)
 {
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+	set_bit(HCLGE_STATE_REMOVING, &hdev->state);
 
-	if (hdev->service_timer.function)
-		del_timer_sync(&hdev->service_timer);
-	if (hdev->service_task.func)
-		cancel_work_sync(&hdev->service_task);
+	if (hdev->reset_timer.function)
+		del_timer_sync(&hdev->reset_timer);
+	if (hdev->service_task.work.func)
+		cancel_delayed_work_sync(&hdev->service_task);
 	if (hdev->rst_service_task.func)
 		cancel_work_sync(&hdev->rst_service_task);
 	if (hdev->mbx_service_task.func)
 		cancel_work_sync(&hdev->mbx_service_task);
 }
 
+static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
+{
+#define HCLGE_FLR_WAIT_MS	100
+#define HCLGE_FLR_WAIT_CNT	50
+	struct hclge_dev *hdev = ae_dev->priv;
+	int cnt = 0;
+
+	clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+	clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+	set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
+	hclge_reset_event(hdev->pdev, NULL);
+
+	while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
+	       cnt++ < HCLGE_FLR_WAIT_CNT)
+		msleep(HCLGE_FLR_WAIT_MS);
+
+	if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
+		dev_err(&hdev->pdev->dev,
+			"flr wait down timeout: %d\n", cnt);
+}
+
+static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+
+	set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+}
+
+static void hclge_clear_resetting_state(struct hclge_dev *hdev)
+{
+	u16 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+		int ret;
+
+		 /* Send cmd to clear VF's FUNC_RST_ING */
+		ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
+		if (ret)
+			dev_warn(&hdev->pdev->dev,
+				 "clear vf(%d) rst failed %d!\n",
+				 vport->vport_id, ret);
+	}
+}
+
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
@@ -5644,7 +9166,13 @@
 	hdev->pdev = pdev;
 	hdev->ae_dev = ae_dev;
 	hdev->reset_type = HNAE3_NONE_RESET;
+	hdev->reset_level = HNAE3_FUNC_RESET;
 	ae_dev->priv = hdev;
+	hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
+
+	mutex_init(&hdev->vport_lock);
+	mutex_init(&hdev->vport_cfg_mutex);
+	spin_lock_init(&hdev->fd_rule_lock);
 
 	ret = hclge_pci_init(hdev);
 	if (ret) {
@@ -5718,6 +9246,12 @@
 		}
 	}
 
+	ret = hclge_init_umv_space(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "umv space init error, ret=%d.\n", ret);
+		goto err_mdiobus_unreg;
+	}
+
 	ret = hclge_mac_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
@@ -5730,6 +9264,10 @@
 		goto err_mdiobus_unreg;
 	}
 
+	ret = hclge_config_gro(hdev, true);
+	if (ret)
+		goto err_mdiobus_unreg;
+
 	ret = hclge_init_vlan_config(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
@@ -5755,21 +9293,54 @@
 		goto err_mdiobus_unreg;
 	}
 
+	ret = hclge_init_fd_config(hdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"fd table init fail, ret=%d\n", ret);
+		goto err_mdiobus_unreg;
+	}
+
+	INIT_KFIFO(hdev->mac_tnl_log);
+
 	hclge_dcb_ops_set(hdev);
 
-	timer_setup(&hdev->service_timer, hclge_service_timer, 0);
-	INIT_WORK(&hdev->service_task, hclge_service_task);
+	timer_setup(&hdev->reset_timer, hclge_reset_timer, 0);
+	INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task);
 	INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
 	INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
 
+	/* Setup affinity after service timer setup because add_timer_on
+	 * is called in affinity notify.
+	 */
+	hclge_misc_affinity_setup(hdev);
+
 	hclge_clear_all_event_cause(hdev);
+	hclge_clear_resetting_state(hdev);
+
+	/* Log and clear the hw errors those already occurred */
+	hclge_handle_all_hns_hw_errors(ae_dev);
+
+	/* request delayed reset for the error recovery because an immediate
+	 * global reset on a PF affecting pending initialization of other PFs
+	 */
+	if (ae_dev->hw_err_reset_req) {
+		enum hnae3_reset_type reset_level;
+
+		reset_level = hclge_get_reset_level(ae_dev,
+						    &ae_dev->hw_err_reset_req);
+		hclge_set_def_reset_request(ae_dev, reset_level);
+		mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
+	}
 
 	/* Enable MISC vector(vector0) */
 	hclge_enable_vector(&hdev->misc_vector, true);
 
 	hclge_state_init(hdev);
+	hdev->last_reset_time = jiffies;
 
-	pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
+	dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n",
+		 HCLGE_DRIVER_NAME);
+
 	return 0;
 
 err_mdiobus_unreg:
@@ -5780,7 +9351,7 @@
 err_msi_uninit:
 	pci_free_irq_vectors(pdev);
 err_cmd_uninit:
-	hclge_destroy_cmd_queue(&hdev->hw);
+	hclge_cmd_uninit(hdev);
 err_pci_uninit:
 	pcim_iounmap(pdev, hdev->hw.io_base);
 	pci_clear_master(pdev);
@@ -5795,6 +9366,17 @@
 	memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
 }
 
+static void hclge_reset_vport_state(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		hclge_vport_stop(vport);
+		vport++;
+	}
+}
+
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
@@ -5805,6 +9387,7 @@
 
 	hclge_stats_clear(hdev);
 	memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
+	memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full));
 
 	ret = hclge_cmd_init(hdev);
 	if (ret) {
@@ -5812,25 +9395,14 @@
 		return ret;
 	}
 
-	ret = hclge_get_cap(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	ret = hclge_configure(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
-		return ret;
-	}
-
 	ret = hclge_map_tqp(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
 		return ret;
 	}
 
+	hclge_reset_umv_space(hdev);
+
 	ret = hclge_mac_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
@@ -5843,13 +9415,17 @@
 		return ret;
 	}
 
+	ret = hclge_config_gro(hdev, true);
+	if (ret)
+		return ret;
+
 	ret = hclge_init_vlan_config(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
 		return ret;
 	}
 
-	ret = hclge_tm_init_hw(hdev);
+	ret = hclge_tm_init_hw(hdev, true);
 	if (ret) {
 		dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
 		return ret;
@@ -5861,6 +9437,35 @@
 		return ret;
 	}
 
+	ret = hclge_init_fd_config(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret);
+		return ret;
+	}
+
+	/* Re-enable the hw error interrupts because
+	 * the interrupts get disabled on global reset.
+	 */
+	ret = hclge_config_nic_hw_error(hdev, true);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"fail(%d) to re-enable NIC hw error interrupts\n",
+			ret);
+		return ret;
+	}
+
+	if (hdev->roce_client) {
+		ret = hclge_config_rocee_ras_interrupt(hdev, true);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"fail(%d) to re-enable roce ras interrupts\n",
+				ret);
+			return ret;
+		}
+	}
+
+	hclge_reset_vport_state(hdev);
+
 	dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
 		 HCLGE_DRIVER_NAME);
 
@@ -5872,18 +9477,30 @@
 	struct hclge_dev *hdev = ae_dev->priv;
 	struct hclge_mac *mac = &hdev->hw.mac;
 
+	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
 
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
 
+	hclge_uninit_umv_space(hdev);
+
 	/* Disable MISC vector(vector0) */
 	hclge_enable_vector(&hdev->misc_vector, false);
 	synchronize_irq(hdev->misc_vector.vector_irq);
 
-	hclge_destroy_cmd_queue(&hdev->hw);
+	/* Disable all hw interrupts */
+	hclge_config_mac_tnl_int(hdev, false);
+	hclge_config_nic_hw_error(hdev, false);
+	hclge_config_rocee_ras_interrupt(hdev, false);
+
+	hclge_cmd_uninit(hdev);
 	hclge_misc_irq_uninit(hdev);
 	hclge_pci_uninit(hdev);
+	mutex_destroy(&hdev->vport_lock);
+	hclge_uninit_vport_mac_table(hdev);
+	hclge_uninit_vport_vlan_table(hdev);
+	mutex_destroy(&hdev->vport_cfg_mutex);
 	ae_dev->priv = NULL;
 }
 
@@ -5893,87 +9510,50 @@
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps);
+	return min_t(u32, hdev->rss_size_max,
+		     vport->alloc_tqps / kinfo->num_tc);
 }
 
 static void hclge_get_channels(struct hnae3_handle *handle,
 			       struct ethtool_channels *ch)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-
 	ch->max_combined = hclge_get_max_channels(handle);
 	ch->other_count = 1;
 	ch->max_other = 1;
-	ch->combined_count = vport->alloc_tqps;
+	ch->combined_count = handle->kinfo.rss_size;
 }
 
 static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle,
-					u16 *free_tqps, u16 *max_rss_size)
+					u16 *alloc_tqps, u16 *max_rss_size)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	u16 temp_tqps = 0;
-	int i;
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		if (!hdev->htqp[i].alloced)
-			temp_tqps++;
-	}
-	*free_tqps = temp_tqps;
+	*alloc_tqps = vport->alloc_tqps;
 	*max_rss_size = hdev->rss_size_max;
 }
 
-static void hclge_release_tqp(struct hclge_vport *vport)
-{
-	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
-	struct hclge_dev *hdev = vport->back;
-	int i;
-
-	for (i = 0; i < kinfo->num_tqps; i++) {
-		struct hclge_tqp *tqp =
-			container_of(kinfo->tqp[i], struct hclge_tqp, q);
-
-		tqp->q.handle = NULL;
-		tqp->q.tqp_index = 0;
-		tqp->alloced = false;
-	}
-
-	devm_kfree(&hdev->pdev->dev, kinfo->tqp);
-	kinfo->tqp = NULL;
-}
-
-static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
+static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
+			      bool rxfh_configured)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
 	struct hclge_dev *hdev = vport->back;
+	u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
 	int cur_rss_size = kinfo->rss_size;
 	int cur_tqps = kinfo->num_tqps;
-	u16 tc_offset[HCLGE_MAX_TC_NUM];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
-	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u16 roundup_size;
 	u32 *rss_indir;
-	int ret, i;
+	unsigned int i;
+	int ret;
 
-	/* Free old tqps, and reallocate with new tqp number when nic setup */
-	hclge_release_tqp(vport);
+	kinfo->req_rss_size = new_tqps_num;
 
-	ret = hclge_knic_setup(vport, new_tqps_num, kinfo->num_desc);
+	ret = hclge_tm_vport_map_update(hdev);
 	if (ret) {
-		dev_err(&hdev->pdev->dev, "setup nic fail, ret =%d\n", ret);
-		return ret;
-	}
-
-	ret = hclge_map_tqp_to_vport(hdev, vport);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "map vport tqp fail, ret =%d\n", ret);
-		return ret;
-	}
-
-	ret = hclge_tm_schd_init(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		dev_err(&hdev->pdev->dev, "tm vport map fail, ret =%d\n", ret);
 		return ret;
 	}
 
@@ -5994,6 +9574,10 @@
 	if (ret)
 		return ret;
 
+	/* RSS indirection table has been configuared by user */
+	if (rxfh_configured)
+		goto out;
+
 	/* Reinitializes the rss indirect table according to the new RSS size */
 	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
 	if (!rss_indir)
@@ -6009,6 +9593,7 @@
 
 	kfree(rss_indir);
 
+out:
 	if (!ret)
 		dev_info(&hdev->pdev->dev,
 			 "Channels changed, rss_size from %d to %d, tqps from %d to %d",
@@ -6047,10 +9632,12 @@
 				 void *data)
 {
 #define HCLGE_32_BIT_REG_RTN_DATANUM 8
+#define HCLGE_32_BIT_DESC_NODATA_LEN 2
 
 	struct hclge_desc *desc;
 	u32 *reg_val = data;
 	__le32 *desc_data;
+	int nodata_num;
 	int cmd_num;
 	int i, k, n;
 	int ret;
@@ -6058,7 +9645,9 @@
 	if (regs_num == 0)
 		return 0;
 
-	cmd_num = DIV_ROUND_UP(regs_num + 2, HCLGE_32_BIT_REG_RTN_DATANUM);
+	nodata_num = HCLGE_32_BIT_DESC_NODATA_LEN;
+	cmd_num = DIV_ROUND_UP(regs_num + nodata_num,
+			       HCLGE_32_BIT_REG_RTN_DATANUM);
 	desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc)
 		return -ENOMEM;
@@ -6075,7 +9664,7 @@
 	for (i = 0; i < cmd_num; i++) {
 		if (i == 0) {
 			desc_data = (__le32 *)(&desc[i].data[0]);
-			n = HCLGE_32_BIT_REG_RTN_DATANUM - 2;
+			n = HCLGE_32_BIT_REG_RTN_DATANUM - nodata_num;
 		} else {
 			desc_data = (__le32 *)(&desc[i]);
 			n = HCLGE_32_BIT_REG_RTN_DATANUM;
@@ -6097,10 +9686,12 @@
 				 void *data)
 {
 #define HCLGE_64_BIT_REG_RTN_DATANUM 4
+#define HCLGE_64_BIT_DESC_NODATA_LEN 1
 
 	struct hclge_desc *desc;
 	u64 *reg_val = data;
 	__le64 *desc_data;
+	int nodata_len;
 	int cmd_num;
 	int i, k, n;
 	int ret;
@@ -6108,7 +9699,9 @@
 	if (regs_num == 0)
 		return 0;
 
-	cmd_num = DIV_ROUND_UP(regs_num + 1, HCLGE_64_BIT_REG_RTN_DATANUM);
+	nodata_len = HCLGE_64_BIT_DESC_NODATA_LEN;
+	cmd_num = DIV_ROUND_UP(regs_num + nodata_len,
+			       HCLGE_64_BIT_REG_RTN_DATANUM);
 	desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc)
 		return -ENOMEM;
@@ -6125,7 +9718,7 @@
 	for (i = 0; i < cmd_num; i++) {
 		if (i == 0) {
 			desc_data = (__le64 *)(&desc[i].data[0]);
-			n = HCLGE_64_BIT_REG_RTN_DATANUM - 1;
+			n = HCLGE_64_BIT_REG_RTN_DATANUM - nodata_len;
 		} else {
 			desc_data = (__le64 *)(&desc[i]);
 			n = HCLGE_64_BIT_REG_RTN_DATANUM;
@@ -6143,30 +9736,275 @@
 	return 0;
 }
 
+#define MAX_SEPARATE_NUM	4
+#define SEPARATOR_VALUE		0xFDFCFBFA
+#define REG_NUM_PER_LINE	4
+#define REG_LEN_PER_LINE	(REG_NUM_PER_LINE * sizeof(u32))
+#define REG_SEPARATOR_LINE	1
+#define REG_NUM_REMAIN_MASK	3
+#define BD_LIST_MAX_NUM		30
+
+int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc)
+{
+	/*prepare 4 commands to query DFX BD number*/
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_DFX_BD_NUM, true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_DFX_BD_NUM, true);
+	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_DFX_BD_NUM, true);
+	desc[2].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[3], HCLGE_OPC_DFX_BD_NUM, true);
+
+	return hclge_cmd_send(&hdev->hw, desc, 4);
+}
+
+static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev,
+				    int *bd_num_list,
+				    u32 type_num)
+{
+#define HCLGE_DFX_REG_BD_NUM	4
+
+	u32 entries_per_desc, desc_index, index, offset, i;
+	struct hclge_desc desc[HCLGE_DFX_REG_BD_NUM];
+	int ret;
+
+	ret = hclge_query_bd_num_cmd_send(hdev, desc);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get dfx bd num fail, status is %d.\n", ret);
+		return ret;
+	}
+
+	entries_per_desc = ARRAY_SIZE(desc[0].data);
+	for (i = 0; i < type_num; i++) {
+		offset = hclge_dfx_bd_offset_list[i];
+		index = offset % entries_per_desc;
+		desc_index = offset / entries_per_desc;
+		bd_num_list[i] = le32_to_cpu(desc[desc_index].data[index]);
+	}
+
+	return ret;
+}
+
+static int hclge_dfx_reg_cmd_send(struct hclge_dev *hdev,
+				  struct hclge_desc *desc_src, int bd_num,
+				  enum hclge_opcode_type cmd)
+{
+	struct hclge_desc *desc = desc_src;
+	int i, ret;
+
+	hclge_cmd_setup_basic_desc(desc, cmd, true);
+	for (i = 0; i < bd_num - 1; i++) {
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		desc++;
+		hclge_cmd_setup_basic_desc(desc, cmd, true);
+	}
+
+	desc = desc_src;
+	ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Query dfx reg cmd(0x%x) send fail, status is %d.\n",
+			cmd, ret);
+
+	return ret;
+}
+
+static int hclge_dfx_reg_fetch_data(struct hclge_desc *desc_src, int bd_num,
+				    void *data)
+{
+	int entries_per_desc, reg_num, separator_num, desc_index, index, i;
+	struct hclge_desc *desc = desc_src;
+	u32 *reg = data;
+
+	entries_per_desc = ARRAY_SIZE(desc->data);
+	reg_num = entries_per_desc * bd_num;
+	separator_num = REG_NUM_PER_LINE - (reg_num & REG_NUM_REMAIN_MASK);
+	for (i = 0; i < reg_num; i++) {
+		index = i % entries_per_desc;
+		desc_index = i / entries_per_desc;
+		*reg++ = le32_to_cpu(desc[desc_index].data[index]);
+	}
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+
+	return reg_num + separator_num;
+}
+
+static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len)
+{
+	u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
+	int data_len_per_desc, data_len, bd_num, i;
+	int bd_num_list[BD_LIST_MAX_NUM];
+	int ret;
+
+	ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get dfx reg bd num fail, status is %d.\n", ret);
+		return ret;
+	}
+
+	data_len_per_desc = FIELD_SIZEOF(struct hclge_desc, data);
+	*len = 0;
+	for (i = 0; i < dfx_reg_type_num; i++) {
+		bd_num = bd_num_list[i];
+		data_len = data_len_per_desc * bd_num;
+		*len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE;
+	}
+
+	return ret;
+}
+
+static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
+{
+	u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
+	int bd_num, bd_num_max, buf_len, i;
+	int bd_num_list[BD_LIST_MAX_NUM];
+	struct hclge_desc *desc_src;
+	u32 *reg = data;
+	int ret;
+
+	ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get dfx reg bd num fail, status is %d.\n", ret);
+		return ret;
+	}
+
+	bd_num_max = bd_num_list[0];
+	for (i = 1; i < dfx_reg_type_num; i++)
+		bd_num_max = max_t(int, bd_num_max, bd_num_list[i]);
+
+	buf_len = sizeof(*desc_src) * bd_num_max;
+	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_src) {
+		dev_err(&hdev->pdev->dev, "%s kzalloc failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < dfx_reg_type_num; i++) {
+		bd_num = bd_num_list[i];
+		ret = hclge_dfx_reg_cmd_send(hdev, desc_src, bd_num,
+					     hclge_dfx_reg_opcode_list[i]);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Get dfx reg fail, status is %d.\n", ret);
+			break;
+		}
+
+		reg += hclge_dfx_reg_fetch_data(desc_src, bd_num, reg);
+	}
+
+	kfree(desc_src);
+	return ret;
+}
+
+static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data,
+			      struct hnae3_knic_private_info *kinfo)
+{
+#define HCLGE_RING_REG_OFFSET		0x200
+#define HCLGE_RING_INT_REG_OFFSET	0x4
+
+	int i, j, reg_num, separator_num;
+	int data_num_sum;
+	u32 *reg = data;
+
+	/* fetching per-PF registers valus from PF PCIe register space */
+	reg_num = ARRAY_SIZE(cmdq_reg_addr_list);
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (i = 0; i < reg_num; i++)
+		*reg++ = hclge_read_dev(&hdev->hw, cmdq_reg_addr_list[i]);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+	data_num_sum = reg_num + separator_num;
+
+	reg_num = ARRAY_SIZE(common_reg_addr_list);
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (i = 0; i < reg_num; i++)
+		*reg++ = hclge_read_dev(&hdev->hw, common_reg_addr_list[i]);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+	data_num_sum += reg_num + separator_num;
+
+	reg_num = ARRAY_SIZE(ring_reg_addr_list);
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (j = 0; j < kinfo->num_tqps; j++) {
+		for (i = 0; i < reg_num; i++)
+			*reg++ = hclge_read_dev(&hdev->hw,
+						ring_reg_addr_list[i] +
+						HCLGE_RING_REG_OFFSET * j);
+		for (i = 0; i < separator_num; i++)
+			*reg++ = SEPARATOR_VALUE;
+	}
+	data_num_sum += (reg_num + separator_num) * kinfo->num_tqps;
+
+	reg_num = ARRAY_SIZE(tqp_intr_reg_addr_list);
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (j = 0; j < hdev->num_msi_used - 1; j++) {
+		for (i = 0; i < reg_num; i++)
+			*reg++ = hclge_read_dev(&hdev->hw,
+						tqp_intr_reg_addr_list[i] +
+						HCLGE_RING_INT_REG_OFFSET * j);
+		for (i = 0; i < separator_num; i++)
+			*reg++ = SEPARATOR_VALUE;
+	}
+	data_num_sum += (reg_num + separator_num) * (hdev->num_msi_used - 1);
+
+	return data_num_sum;
+}
+
 static int hclge_get_regs_len(struct hnae3_handle *handle)
 {
+	int cmdq_lines, common_lines, ring_lines, tqp_intr_lines;
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	u32 regs_num_32_bit, regs_num_64_bit;
+	int regs_num_32_bit, regs_num_64_bit, dfx_regs_len;
+	int regs_lines_32_bit, regs_lines_64_bit;
 	int ret;
 
 	ret = hclge_get_regs_num(hdev, &regs_num_32_bit, &regs_num_64_bit);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Get register number failed, ret = %d.\n", ret);
-		return -EOPNOTSUPP;
+		return ret;
 	}
 
-	return regs_num_32_bit * sizeof(u32) + regs_num_64_bit * sizeof(u64);
+	ret = hclge_get_dfx_reg_len(hdev, &dfx_regs_len);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get dfx reg len failed, ret = %d.\n", ret);
+		return ret;
+	}
+
+	cmdq_lines = sizeof(cmdq_reg_addr_list) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+	common_lines = sizeof(common_reg_addr_list) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+	ring_lines = sizeof(ring_reg_addr_list) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+	tqp_intr_lines = sizeof(tqp_intr_reg_addr_list) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+	regs_lines_32_bit = regs_num_32_bit * sizeof(u32) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+	regs_lines_64_bit = regs_num_64_bit * sizeof(u64) / REG_LEN_PER_LINE +
+		REG_SEPARATOR_LINE;
+
+	return (cmdq_lines + common_lines + ring_lines * kinfo->num_tqps +
+		tqp_intr_lines * (hdev->num_msi_used - 1) + regs_lines_32_bit +
+		regs_lines_64_bit) * REG_LEN_PER_LINE + dfx_regs_len;
 }
 
 static void hclge_get_regs(struct hnae3_handle *handle, u32 *version,
 			   void *data)
 {
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	u32 regs_num_32_bit, regs_num_64_bit;
-	int ret;
+	int i, reg_num, separator_num, ret;
+	u32 *reg = data;
 
 	*version = hdev->fw_version;
 
@@ -6177,19 +10015,36 @@
 		return;
 	}
 
-	ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, data);
+	reg += hclge_fetch_pf_reg(hdev, reg, kinfo);
+
+	ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, reg);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Get 32 bit register failed, ret = %d.\n", ret);
 		return;
 	}
+	reg_num = regs_num_32_bit;
+	reg += reg_num;
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
 
-	data = (u32 *)data + regs_num_32_bit;
-	ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit,
-				    data);
-	if (ret)
+	ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit, reg);
+	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Get 64 bit register failed, ret = %d.\n", ret);
+		return;
+	}
+	reg_num = regs_num_64_bit * 2;
+	reg += reg_num;
+	separator_num = MAX_SEPARATE_NUM - (reg_num & REG_NUM_REMAIN_MASK);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+
+	ret = hclge_get_dfx_reg(hdev, reg);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Get dfx register failed, ret = %d.\n", ret);
 }
 
 static int hclge_set_led_status(struct hclge_dev *hdev, u8 locate_led_status)
@@ -6249,30 +10104,19 @@
 	}
 }
 
-static void hclge_get_port_type(struct hnae3_handle *handle,
-				u8 *port_type)
+static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	u8 media_type = hdev->hw.mac.media_type;
 
-	switch (media_type) {
-	case HNAE3_MEDIA_TYPE_FIBER:
-		*port_type = PORT_FIBRE;
-		break;
-	case HNAE3_MEDIA_TYPE_COPPER:
-		*port_type = PORT_TP;
-		break;
-	case HNAE3_MEDIA_TYPE_UNKNOWN:
-	default:
-		*port_type = PORT_OTHER;
-		break;
-	}
+	return hclge_config_gro(hdev, enable);
 }
 
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
+	.flr_prepare = hclge_flr_prepare,
+	.flr_done = hclge_flr_done,
 	.init_client_instance = hclge_init_client_instance,
 	.uninit_client_instance = hclge_uninit_client_instance,
 	.map_ring_to_vector = hclge_map_ring_to_vector,
@@ -6283,11 +10127,15 @@
 	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
 	.stop = hclge_ae_stop,
+	.client_start = hclge_client_start,
+	.client_stop = hclge_client_stop,
 	.get_status = hclge_get_status,
 	.get_ksettings_an_result = hclge_get_ksettings_an_result,
-	.update_speed_duplex_h = hclge_update_speed_duplex_h,
 	.cfg_mac_speed_dup_h = hclge_cfg_mac_speed_dup_h,
 	.get_media_type = hclge_get_media_type,
+	.check_port_speed = hclge_check_port_speed,
+	.get_fec = hclge_get_fec,
+	.set_fec = hclge_set_fec,
 	.get_rss_key_size = hclge_get_rss_key_size,
 	.get_rss_indir_size = hclge_get_rss_indir_size,
 	.get_rss = hclge_get_rss,
@@ -6297,18 +10145,21 @@
 	.get_tc_size = hclge_get_tc_size,
 	.get_mac_addr = hclge_get_mac_addr,
 	.set_mac_addr = hclge_set_mac_addr,
+	.do_ioctl = hclge_do_ioctl,
 	.add_uc_addr = hclge_add_uc_addr,
 	.rm_uc_addr = hclge_rm_uc_addr,
 	.add_mc_addr = hclge_add_mc_addr,
 	.rm_mc_addr = hclge_rm_mc_addr,
-	.update_mta_status = hclge_update_mta_status,
 	.set_autoneg = hclge_set_autoneg,
 	.get_autoneg = hclge_get_autoneg,
+	.restart_autoneg = hclge_restart_autoneg,
+	.halt_autoneg = hclge_halt_autoneg,
 	.get_pauseparam = hclge_get_pauseparam,
 	.set_pauseparam = hclge_set_pauseparam,
 	.set_mtu = hclge_set_mtu,
 	.reset_queue = hclge_reset_tqp,
 	.get_stats = hclge_get_stats,
+	.get_mac_stats = hclge_get_mac_stat,
 	.update_stats = hclge_update_stats,
 	.get_strings = hclge_get_strings,
 	.get_sset_count = hclge_get_sset_count,
@@ -6319,15 +10170,35 @@
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
 	.enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag,
 	.reset_event = hclge_reset_event,
+	.get_reset_level = hclge_get_reset_level,
+	.set_default_reset_request = hclge_set_def_reset_request,
 	.get_tqps_and_rss_info = hclge_get_tqps_and_rss_info,
 	.set_channels = hclge_set_channels,
 	.get_channels = hclge_get_channels,
-	.get_flowctrl_adv = hclge_get_flowctrl_adv,
 	.get_regs_len = hclge_get_regs_len,
 	.get_regs = hclge_get_regs,
 	.set_led_id = hclge_set_led_id,
 	.get_link_mode = hclge_get_link_mode,
-	.get_port_type = hclge_get_port_type,
+	.add_fd_entry = hclge_add_fd_entry,
+	.del_fd_entry = hclge_del_fd_entry,
+	.del_all_fd_entries = hclge_del_all_fd_entries,
+	.get_fd_rule_cnt = hclge_get_fd_rule_cnt,
+	.get_fd_rule_info = hclge_get_fd_rule_info,
+	.get_fd_all_rules = hclge_get_all_rules,
+	.restore_fd_rules = hclge_restore_fd_entries,
+	.enable_fd = hclge_enable_fd,
+	.add_arfs_entry = hclge_add_fd_entry_by_arfs,
+	.dbg_run_cmd = hclge_dbg_run_cmd,
+	.handle_hw_ras_error = hclge_handle_hw_ras_error,
+	.get_hw_reset_stat = hclge_get_hw_reset_stat,
+	.ae_dev_resetting = hclge_ae_dev_resetting,
+	.ae_dev_reset_cnt = hclge_ae_dev_reset_cnt,
+	.set_gro_en = hclge_gro_en,
+	.get_global_queue_id = hclge_covert_handle_qid_global,
+	.set_timer_task = hclge_set_timer_task,
+	.mac_connect_phy = hclge_mac_connect_phy,
+	.mac_disconnect_phy = hclge_mac_disconnect_phy,
+	.restore_vlan_table = hclge_restore_vlan_table,
 };
 
 static struct hnae3_ae_algo ae_algo = {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 1528fb3..615cde1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_MAIN_H
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/phy.h>
 #include <linux/if_vlan.h>
+#include <linux/kfifo.h>
 
 #include "hclge_cmd.h"
 #include "hnae3.h"
@@ -14,6 +15,11 @@
 #define HCLGE_MOD_VERSION "1.0"
 #define HCLGE_DRIVER_NAME "hclge"
 
+#define HCLGE_MAX_PF_NUM		8
+
+#define HCLGE_RD_FIRST_STATS_NUM        2
+#define HCLGE_RD_OTHER_STATS_NUM        4
+
 #define HCLGE_INVALID_VPORT 0xffff
 
 #define HCLGE_PF_CFG_BLOCK_SIZE		32
@@ -26,6 +32,62 @@
 #define HCLGE_VECTOR_REG_OFFSET		0x4
 #define HCLGE_VECTOR_VF_OFFSET		0x100000
 
+#define HCLGE_CMDQ_TX_ADDR_L_REG	0x27000
+#define HCLGE_CMDQ_TX_ADDR_H_REG	0x27004
+#define HCLGE_CMDQ_TX_DEPTH_REG		0x27008
+#define HCLGE_CMDQ_TX_TAIL_REG		0x27010
+#define HCLGE_CMDQ_TX_HEAD_REG		0x27014
+#define HCLGE_CMDQ_RX_ADDR_L_REG	0x27018
+#define HCLGE_CMDQ_RX_ADDR_H_REG	0x2701C
+#define HCLGE_CMDQ_RX_DEPTH_REG		0x27020
+#define HCLGE_CMDQ_RX_TAIL_REG		0x27024
+#define HCLGE_CMDQ_RX_HEAD_REG		0x27028
+#define HCLGE_CMDQ_INTR_SRC_REG		0x27100
+#define HCLGE_CMDQ_INTR_STS_REG		0x27104
+#define HCLGE_CMDQ_INTR_EN_REG		0x27108
+#define HCLGE_CMDQ_INTR_GEN_REG		0x2710C
+
+/* bar registers for common func */
+#define HCLGE_VECTOR0_OTER_EN_REG	0x20600
+#define HCLGE_RAS_OTHER_STS_REG		0x20B00
+#define HCLGE_FUNC_RESET_STS_REG	0x20C00
+#define HCLGE_GRO_EN_REG		0x28000
+
+/* bar registers for rcb */
+#define HCLGE_RING_RX_ADDR_L_REG	0x80000
+#define HCLGE_RING_RX_ADDR_H_REG	0x80004
+#define HCLGE_RING_RX_BD_NUM_REG	0x80008
+#define HCLGE_RING_RX_BD_LENGTH_REG	0x8000C
+#define HCLGE_RING_RX_MERGE_EN_REG	0x80014
+#define HCLGE_RING_RX_TAIL_REG		0x80018
+#define HCLGE_RING_RX_HEAD_REG		0x8001C
+#define HCLGE_RING_RX_FBD_NUM_REG	0x80020
+#define HCLGE_RING_RX_OFFSET_REG	0x80024
+#define HCLGE_RING_RX_FBD_OFFSET_REG	0x80028
+#define HCLGE_RING_RX_STASH_REG		0x80030
+#define HCLGE_RING_RX_BD_ERR_REG	0x80034
+#define HCLGE_RING_TX_ADDR_L_REG	0x80040
+#define HCLGE_RING_TX_ADDR_H_REG	0x80044
+#define HCLGE_RING_TX_BD_NUM_REG	0x80048
+#define HCLGE_RING_TX_PRIORITY_REG	0x8004C
+#define HCLGE_RING_TX_TC_REG		0x80050
+#define HCLGE_RING_TX_MERGE_EN_REG	0x80054
+#define HCLGE_RING_TX_TAIL_REG		0x80058
+#define HCLGE_RING_TX_HEAD_REG		0x8005C
+#define HCLGE_RING_TX_FBD_NUM_REG	0x80060
+#define HCLGE_RING_TX_OFFSET_REG	0x80064
+#define HCLGE_RING_TX_EBD_NUM_REG	0x80068
+#define HCLGE_RING_TX_EBD_OFFSET_REG	0x80070
+#define HCLGE_RING_TX_BD_ERR_REG	0x80074
+#define HCLGE_RING_EN_REG		0x80090
+
+/* bar registers for tqp interrupt */
+#define HCLGE_TQP_INTR_CTRL_REG		0x20000
+#define HCLGE_TQP_INTR_GL0_REG		0x20100
+#define HCLGE_TQP_INTR_GL1_REG		0x20200
+#define HCLGE_TQP_INTR_GL2_REG		0x20300
+#define HCLGE_TQP_INTR_RL_REG		0x20900
+
 #define HCLGE_RSS_IND_TBL_SIZE		512
 #define HCLGE_RSS_SET_BITMAP_MSK	GENMASK(15, 0)
 #define HCLGE_RSS_KEY_SIZE		40
@@ -53,9 +115,11 @@
 #define HCLGE_RSS_TC_SIZE_6		64
 #define HCLGE_RSS_TC_SIZE_7		128
 
-#define HCLGE_MTA_TBL_SIZE		4096
+#define HCLGE_UMV_TBL_SIZE		3072
+#define HCLGE_DEFAULT_UMV_SPACE_PER_PF \
+	(HCLGE_UMV_TBL_SIZE / HCLGE_MAX_PF_NUM)
 
-#define HCLGE_TQP_RESET_TRY_TIMES	10
+#define HCLGE_TQP_RESET_TRY_TIMES	200
 
 #define HCLGE_PHY_PAGE_MDIX		0
 #define HCLGE_PHY_PAGE_COPPER		0
@@ -79,12 +143,30 @@
 #define HCLGE_VF_NUM_PER_CMD           64
 #define HCLGE_VF_NUM_PER_BYTE          8
 
+enum HLCGE_PORT_TYPE {
+	HOST_PORT,
+	NETWORK_PORT
+};
+
+#define PF_VPORT_ID			0
+
+#define HCLGE_PF_ID_S			0
+#define HCLGE_PF_ID_M			GENMASK(2, 0)
+#define HCLGE_VF_ID_S			3
+#define HCLGE_VF_ID_M			GENMASK(10, 3)
+#define HCLGE_PORT_TYPE_B		11
+#define HCLGE_NETWORK_PORT_ID_S		0
+#define HCLGE_NETWORK_PORT_ID_M		GENMASK(3, 0)
+
 /* Reset related Registers */
+#define HCLGE_PF_OTHER_INT_REG		0x20600
 #define HCLGE_MISC_RESET_STS_REG	0x20700
 #define HCLGE_MISC_VECTOR_INT_STS	0x20800
 #define HCLGE_GLOBAL_RESET_REG		0x20A00
 #define HCLGE_GLOBAL_RESET_BIT		0
 #define HCLGE_CORE_RESET_BIT		1
+#define HCLGE_IMP_RESET_BIT		2
+#define HCLGE_RESET_INT_M		GENMASK(7, 5)
 #define HCLGE_FUN_RST_ING		0x20C00
 #define HCLGE_FUN_RST_ING_B		0
 
@@ -98,8 +180,12 @@
 /* CMDQ register bits for RX event(=MBX event) */
 #define HCLGE_VECTOR0_RX_CMDQ_INT_B	1
 
+#define HCLGE_VECTOR0_IMP_RESET_INT_B	1
+#define HCLGE_VECTOR0_IMP_CMDQ_ERR_B	4U
+#define HCLGE_VECTOR0_IMP_RD_POISON_B	5U
+
 #define HCLGE_MAC_DEFAULT_FRAME \
-	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN + ETH_DATA_LEN)
+	(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
 #define HCLGE_MAC_MIN_FRAME		64
 #define HCLGE_MAC_MAX_FRAME		9728
 
@@ -108,12 +194,20 @@
 #define HCLGE_SUPPORT_25G_BIT		BIT(2)
 #define HCLGE_SUPPORT_50G_BIT		BIT(3)
 #define HCLGE_SUPPORT_100G_BIT		BIT(4)
+/* to be compatible with exsit board */
+#define HCLGE_SUPPORT_40G_BIT		BIT(5)
+#define HCLGE_SUPPORT_100M_BIT		BIT(6)
+#define HCLGE_SUPPORT_10M_BIT		BIT(7)
+#define HCLGE_SUPPORT_GE \
+	(HCLGE_SUPPORT_1G_BIT | HCLGE_SUPPORT_100M_BIT | HCLGE_SUPPORT_10M_BIT)
 
 enum HCLGE_DEV_STATE {
 	HCLGE_STATE_REINITING,
 	HCLGE_STATE_DOWN,
 	HCLGE_STATE_DISABLED,
 	HCLGE_STATE_REMOVING,
+	HCLGE_STATE_NIC_REGISTERED,
+	HCLGE_STATE_ROCE_REGISTERED,
 	HCLGE_STATE_SERVICE_INITED,
 	HCLGE_STATE_SERVICE_SCHED,
 	HCLGE_STATE_RST_SERVICE_SCHED,
@@ -128,12 +222,14 @@
 enum hclge_evt_cause {
 	HCLGE_VECTOR0_EVENT_RST,
 	HCLGE_VECTOR0_EVENT_MBX,
+	HCLGE_VECTOR0_EVENT_ERR,
 	HCLGE_VECTOR0_EVENT_OTHER,
 };
 
 #define HCLGE_MPF_ENBALE 1
 
 enum HCLGE_MAC_SPEED {
+	HCLGE_MAC_SPEED_UNKNOWN = 0,		/* unknown */
 	HCLGE_MAC_SPEED_10M	= 10,		/* 10 Mbps */
 	HCLGE_MAC_SPEED_100M	= 100,		/* 100 Mbps */
 	HCLGE_MAC_SPEED_1G	= 1000,		/* 1000 Mbps   = 1 Gbps */
@@ -149,22 +245,25 @@
 	HCLGE_MAC_FULL
 };
 
-enum hclge_mta_dmac_sel_type {
-	HCLGE_MAC_ADDR_47_36,
-	HCLGE_MAC_ADDR_46_35,
-	HCLGE_MAC_ADDR_45_34,
-	HCLGE_MAC_ADDR_44_33,
-};
+#define QUERY_SFP_SPEED		0
+#define QUERY_ACTIVE_SPEED	1
 
 struct hclge_mac {
 	u8 phy_addr;
 	u8 flag;
-	u8 media_type;
+	u8 media_type;	/* port media type, e.g. fibre/copper/backplane */
 	u8 mac_addr[ETH_ALEN];
 	u8 autoneg;
 	u8 duplex;
+	u8 support_autoneg;
+	u8 speed_type;	/* 0: sfp speed, 1: active speed */
 	u32 speed;
-	int link;	/* store the link status of mac & phy (if phy exit)*/
+	u32 speed_ability; /* speed ability supported by current media */
+	u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */
+	u32 fec_mode; /* active fec mode */
+	u32 user_fec_mode;
+	u32 fec_ability;
+	int link;	/* store the link status of mac & phy (if phy exit) */
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	phy_interface_t phy_if;
@@ -208,6 +307,13 @@
 	HCLGE_FC_DEFAULT
 };
 
+enum hclge_link_fail_code {
+	HCLGE_LF_NORMAL,
+	HCLGE_LF_REF_CLOCK_LOST,
+	HCLGE_LF_XSFP_TX_DISABLE,
+	HCLGE_LF_XSFP_ABSENT,
+};
+
 #define HCLGE_PG_NUM		4
 #define HCLGE_SCH_MODE_SP	0
 #define HCLGE_SCH_MODE_DWRR	1
@@ -238,6 +344,7 @@
 	u8 default_speed;
 	u32 numa_node_map;
 	u8 speed_ability;
+	u16 umv_space;
 };
 
 struct hclge_tm_info {
@@ -249,6 +356,7 @@
 	struct hclge_tc_info tc_info[HNAE3_MAX_TC];
 	enum hclge_fc_mode fc_mode;
 	u8 hw_pfc_map; /* Allow for packet drop or not on this TC */
+	u8 pfc_en;	/* PFC enabled or not for user priority */
 };
 
 struct hclge_comm_stats_str {
@@ -256,109 +364,6 @@
 	unsigned long offset;
 };
 
-/* all 64bit stats, opcode id: 0x0030 */
-struct hclge_64_bit_stats {
-	/* query_igu_stat */
-	u64 igu_rx_oversize_pkt;
-	u64 igu_rx_undersize_pkt;
-	u64 igu_rx_out_all_pkt;
-	u64 igu_rx_uni_pkt;
-	u64 igu_rx_multi_pkt;
-	u64 igu_rx_broad_pkt;
-	u64 rsv0;
-
-	/* query_egu_stat */
-	u64 egu_tx_out_all_pkt;
-	u64 egu_tx_uni_pkt;
-	u64 egu_tx_multi_pkt;
-	u64 egu_tx_broad_pkt;
-
-	/* ssu_ppp packet stats */
-	u64 ssu_ppp_mac_key_num;
-	u64 ssu_ppp_host_key_num;
-	u64 ppp_ssu_mac_rlt_num;
-	u64 ppp_ssu_host_rlt_num;
-
-	/* ssu_tx_in_out_dfx_stats */
-	u64 ssu_tx_in_num;
-	u64 ssu_tx_out_num;
-	/* ssu_rx_in_out_dfx_stats */
-	u64 ssu_rx_in_num;
-	u64 ssu_rx_out_num;
-};
-
-/* all 32bit stats, opcode id: 0x0031 */
-struct hclge_32_bit_stats {
-	u64 igu_rx_err_pkt;
-	u64 igu_rx_no_eof_pkt;
-	u64 igu_rx_no_sof_pkt;
-	u64 egu_tx_1588_pkt;
-	u64 egu_tx_err_pkt;
-	u64 ssu_full_drop_num;
-	u64 ssu_part_drop_num;
-	u64 ppp_key_drop_num;
-	u64 ppp_rlt_drop_num;
-	u64 ssu_key_drop_num;
-	u64 pkt_curr_buf_cnt;
-	u64 qcn_fb_rcv_cnt;
-	u64 qcn_fb_drop_cnt;
-	u64 qcn_fb_invaild_cnt;
-	u64 rsv0;
-	u64 rx_packet_tc0_in_cnt;
-	u64 rx_packet_tc1_in_cnt;
-	u64 rx_packet_tc2_in_cnt;
-	u64 rx_packet_tc3_in_cnt;
-	u64 rx_packet_tc4_in_cnt;
-	u64 rx_packet_tc5_in_cnt;
-	u64 rx_packet_tc6_in_cnt;
-	u64 rx_packet_tc7_in_cnt;
-	u64 rx_packet_tc0_out_cnt;
-	u64 rx_packet_tc1_out_cnt;
-	u64 rx_packet_tc2_out_cnt;
-	u64 rx_packet_tc3_out_cnt;
-	u64 rx_packet_tc4_out_cnt;
-	u64 rx_packet_tc5_out_cnt;
-	u64 rx_packet_tc6_out_cnt;
-	u64 rx_packet_tc7_out_cnt;
-
-	/* Tx packet level statistics */
-	u64 tx_packet_tc0_in_cnt;
-	u64 tx_packet_tc1_in_cnt;
-	u64 tx_packet_tc2_in_cnt;
-	u64 tx_packet_tc3_in_cnt;
-	u64 tx_packet_tc4_in_cnt;
-	u64 tx_packet_tc5_in_cnt;
-	u64 tx_packet_tc6_in_cnt;
-	u64 tx_packet_tc7_in_cnt;
-	u64 tx_packet_tc0_out_cnt;
-	u64 tx_packet_tc1_out_cnt;
-	u64 tx_packet_tc2_out_cnt;
-	u64 tx_packet_tc3_out_cnt;
-	u64 tx_packet_tc4_out_cnt;
-	u64 tx_packet_tc5_out_cnt;
-	u64 tx_packet_tc6_out_cnt;
-	u64 tx_packet_tc7_out_cnt;
-
-	/* packet buffer statistics */
-	u64 pkt_curr_buf_tc0_cnt;
-	u64 pkt_curr_buf_tc1_cnt;
-	u64 pkt_curr_buf_tc2_cnt;
-	u64 pkt_curr_buf_tc3_cnt;
-	u64 pkt_curr_buf_tc4_cnt;
-	u64 pkt_curr_buf_tc5_cnt;
-	u64 pkt_curr_buf_tc6_cnt;
-	u64 pkt_curr_buf_tc7_cnt;
-
-	u64 mb_uncopy_num;
-	u64 lo_pri_unicast_rlt_drop_num;
-	u64 hi_pri_multicast_rlt_drop_num;
-	u64 lo_pri_multicast_rlt_drop_num;
-	u64 rx_oq_drop_pkt_cnt;
-	u64 tx_oq_drop_pkt_cnt;
-	u64 nic_l2_err_drop_pkt_cnt;
-	u64 roc_l2_err_drop_pkt_cnt;
-};
-
 /* mac stats ,opcode id: 0x0032 */
 struct hclge_mac_stats {
 	u64 mac_tx_mac_pause_num;
@@ -445,13 +450,15 @@
 	u64 mac_rx_fcs_err_pkt_num;
 	u64 mac_rx_send_app_good_pkt_num;
 	u64 mac_rx_send_app_bad_pkt_num;
+	u64 mac_tx_pfc_pause_pkt_num;
+	u64 mac_rx_pfc_pause_pkt_num;
+	u64 mac_tx_ctrl_pkt_num;
+	u64 mac_rx_ctrl_pkt_num;
 };
 
 #define HCLGE_STATS_TIMER_INTERVAL	(60 * 5)
 struct hclge_hw_stats {
 	struct hclge_mac_stats      mac_stats;
-	struct hclge_64_bit_stats   all_64_bit_stats;
-	struct hclge_32_bit_stats   all_32_bit_stats;
 	u32 stats_timer;
 };
 
@@ -464,6 +471,244 @@
 	u16 tx_in_vlan_type;
 };
 
+enum HCLGE_FD_MODE {
+	HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1,
+	HCLGE_FD_MODE_DEPTH_1K_WIDTH_400B_STAGE_2,
+	HCLGE_FD_MODE_DEPTH_4K_WIDTH_200B_STAGE_1,
+	HCLGE_FD_MODE_DEPTH_2K_WIDTH_200B_STAGE_2,
+};
+
+enum HCLGE_FD_KEY_TYPE {
+	HCLGE_FD_KEY_BASE_ON_PTYPE,
+	HCLGE_FD_KEY_BASE_ON_TUPLE,
+};
+
+enum HCLGE_FD_STAGE {
+	HCLGE_FD_STAGE_1,
+	HCLGE_FD_STAGE_2,
+	MAX_STAGE_NUM,
+};
+
+/* OUTER_XXX indicates tuples in tunnel header of tunnel packet
+ * INNER_XXX indicate tuples in tunneled header of tunnel packet or
+ *           tuples of non-tunnel packet
+ */
+enum HCLGE_FD_TUPLE {
+	OUTER_DST_MAC,
+	OUTER_SRC_MAC,
+	OUTER_VLAN_TAG_FST,
+	OUTER_VLAN_TAG_SEC,
+	OUTER_ETH_TYPE,
+	OUTER_L2_RSV,
+	OUTER_IP_TOS,
+	OUTER_IP_PROTO,
+	OUTER_SRC_IP,
+	OUTER_DST_IP,
+	OUTER_L3_RSV,
+	OUTER_SRC_PORT,
+	OUTER_DST_PORT,
+	OUTER_L4_RSV,
+	OUTER_TUN_VNI,
+	OUTER_TUN_FLOW_ID,
+	INNER_DST_MAC,
+	INNER_SRC_MAC,
+	INNER_VLAN_TAG_FST,
+	INNER_VLAN_TAG_SEC,
+	INNER_ETH_TYPE,
+	INNER_L2_RSV,
+	INNER_IP_TOS,
+	INNER_IP_PROTO,
+	INNER_SRC_IP,
+	INNER_DST_IP,
+	INNER_L3_RSV,
+	INNER_SRC_PORT,
+	INNER_DST_PORT,
+	INNER_L4_RSV,
+	MAX_TUPLE,
+};
+
+enum HCLGE_FD_META_DATA {
+	PACKET_TYPE_ID,
+	IP_FRAGEMENT,
+	ROCE_TYPE,
+	NEXT_KEY,
+	VLAN_NUMBER,
+	SRC_VPORT,
+	DST_VPORT,
+	TUNNEL_PACKET,
+	MAX_META_DATA,
+};
+
+struct key_info {
+	u8 key_type;
+	u8 key_length; /* use bit as unit */
+};
+
+#define MAX_KEY_LENGTH	400
+#define MAX_KEY_DWORDS	DIV_ROUND_UP(MAX_KEY_LENGTH / 8, 4)
+#define MAX_KEY_BYTES	(MAX_KEY_DWORDS * 4)
+#define MAX_META_DATA_LENGTH	32
+
+/* assigned by firmware, the real filter number for each pf may be less */
+#define MAX_FD_FILTER_NUM	4096
+#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL	5
+
+enum HCLGE_FD_ACTIVE_RULE_TYPE {
+	HCLGE_FD_RULE_NONE,
+	HCLGE_FD_ARFS_ACTIVE,
+	HCLGE_FD_EP_ACTIVE,
+};
+
+enum HCLGE_FD_PACKET_TYPE {
+	NIC_PACKET,
+	ROCE_PACKET,
+};
+
+enum HCLGE_FD_ACTION {
+	HCLGE_FD_ACTION_ACCEPT_PACKET,
+	HCLGE_FD_ACTION_DROP_PACKET,
+};
+
+struct hclge_fd_key_cfg {
+	u8 key_sel;
+	u8 inner_sipv6_word_en;
+	u8 inner_dipv6_word_en;
+	u8 outer_sipv6_word_en;
+	u8 outer_dipv6_word_en;
+	u32 tuple_active;
+	u32 meta_data_active;
+};
+
+struct hclge_fd_cfg {
+	u8 fd_mode;
+	u16 max_key_length; /* use bit as unit */
+	u32 proto_support;
+	u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */
+	u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */
+	struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM];
+};
+
+#define IPV4_INDEX	3
+#define IPV6_SIZE	4
+struct hclge_fd_rule_tuples {
+	u8 src_mac[ETH_ALEN];
+	u8 dst_mac[ETH_ALEN];
+	/* Be compatible for ip address of both ipv4 and ipv6.
+	 * For ipv4 address, we store it in src/dst_ip[3].
+	 */
+	u32 src_ip[IPV6_SIZE];
+	u32 dst_ip[IPV6_SIZE];
+	u16 src_port;
+	u16 dst_port;
+	u16 vlan_tag1;
+	u16 ether_proto;
+	u8 ip_tos;
+	u8 ip_proto;
+};
+
+struct hclge_fd_rule {
+	struct hlist_node rule_node;
+	struct hclge_fd_rule_tuples tuples;
+	struct hclge_fd_rule_tuples tuples_mask;
+	u32 unused_tuple;
+	u32 flow_type;
+	u8 action;
+	u16 vf_id;
+	u16 queue_id;
+	u16 location;
+	u16 flow_id;	/* only used for arfs */
+	enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
+};
+
+struct hclge_fd_ad_data {
+	u16 ad_id;
+	u8 drop_packet;
+	u8 forward_to_direct_queue;
+	u16 queue_id;
+	u8 use_counter;
+	u8 counter_id;
+	u8 use_next_stage;
+	u8 write_rule_id_to_bd;
+	u8 next_input_key;
+	u16 rule_id;
+};
+
+struct hclge_vport_mac_addr_cfg {
+	struct list_head node;
+	int hd_tbl_status;
+	u8 mac_addr[ETH_ALEN];
+};
+
+enum HCLGE_MAC_ADDR_TYPE {
+	HCLGE_MAC_ADDR_UC,
+	HCLGE_MAC_ADDR_MC
+};
+
+struct hclge_vport_vlan_cfg {
+	struct list_head node;
+	int hd_tbl_status;
+	u16 vlan_id;
+};
+
+struct hclge_rst_stats {
+	u32 reset_done_cnt;	/* the number of reset has completed */
+	u32 hw_reset_done_cnt;	/* the number of HW reset has completed */
+	u32 pf_rst_cnt;		/* the number of PF reset */
+	u32 flr_rst_cnt;	/* the number of FLR */
+	u32 core_rst_cnt;	/* the number of CORE reset */
+	u32 global_rst_cnt;	/* the number of GLOBAL */
+	u32 imp_rst_cnt;	/* the number of IMP reset */
+	u32 reset_cnt;		/* the number of reset */
+	u32 reset_fail_cnt;	/* the number of reset fail */
+};
+
+/* time and register status when mac tunnel interruption occur */
+struct hclge_mac_tnl_stats {
+	u64 time;
+	u32 status;
+};
+
+#define HCLGE_RESET_INTERVAL	(10 * HZ)
+#define HCLGE_WAIT_RESET_DONE	100
+
+#pragma pack(1)
+struct hclge_vf_vlan_cfg {
+	u8 mbx_cmd;
+	u8 subcode;
+	u8 is_kill;
+	u16 vlan;
+	u16 proto;
+};
+
+#pragma pack()
+
+/* For each bit of TCAM entry, it uses a pair of 'x' and
+ * 'y' to indicate which value to match, like below:
+ * ----------------------------------
+ * | bit x | bit y |  search value  |
+ * ----------------------------------
+ * |   0   |   0   |   always hit   |
+ * ----------------------------------
+ * |   1   |   0   |   match '0'    |
+ * ----------------------------------
+ * |   0   |   1   |   match '1'    |
+ * ----------------------------------
+ * |   1   |   1   |   invalid      |
+ * ----------------------------------
+ * Then for input key(k) and mask(v), we can calculate the value by
+ * the formulae:
+ *	x = (~k) & v
+ *	y = (k ^ ~v) & k
+ */
+#define calc_x(x, k, v) ((x) = (~(k) & (v)))
+#define calc_y(y, k, v) \
+	do { \
+		const typeof(k) _k_ = (k); \
+		const typeof(v) _v_ = (v); \
+		(y) = (_k_ ^ ~_v_) & (_k_); \
+	} while (0)
+
+#define HCLGE_MAC_TNL_LOG_SIZE	8
 #define HCLGE_VPORT_NUM 256
 struct hclge_dev {
 	struct pci_dev *pdev;
@@ -472,10 +717,15 @@
 	struct hclge_misc_vector misc_vector;
 	struct hclge_hw_stats hw_stats;
 	unsigned long state;
+	unsigned long flr_state;
+	unsigned long last_reset_time;
 
 	enum hnae3_reset_type reset_type;
+	enum hnae3_reset_type reset_level;
+	unsigned long default_reset_request;
 	unsigned long reset_request;	/* reset has been requested */
 	unsigned long reset_pending;	/* client rst is pending to be served */
+	struct hclge_rst_stats rst_stats;
 	u32 fw_version;
 	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
 	u16 num_tqps;			/* Num task queue pairs of this PF */
@@ -489,10 +739,12 @@
 	u16 num_alloc_vport;		/* Num vports this driver supports */
 	u32 numa_node_mask;
 	u16 rx_buf_len;
-	u16 num_desc;
+	u16 num_tx_desc;		/* desc num of per tx queue */
+	u16 num_rx_desc;		/* desc num of per rx queue */
 	u8 hw_tc_map;
 	u8 tc_num_last_time;
 	enum hclge_fc_mode fc_mode_last_time;
+	u8 support_sfp_query;
 
 #define HCLGE_FLAG_TC_BASE_SCH_MODE		1
 #define HCLGE_FLAG_VNET_BASE_SCH_MODE		2
@@ -511,6 +763,7 @@
 	u32 base_msi_vector;
 	u16 *vector_status;
 	int *vector_irq;
+	u16 num_nic_msi;	/* Num of nic vectors for this PF */
 	u16 num_roce_msi;	/* Num of roce vectors for this PF */
 	int roce_base_vector;
 
@@ -522,8 +775,8 @@
 	u16 adminq_work_limit; /* Num of admin receive queue desc to process */
 	unsigned long service_timer_period;
 	unsigned long service_timer_previous;
-	struct timer_list service_timer;
-	struct work_struct service_task;
+	struct timer_list reset_timer;
+	struct delayed_work service_task;
 	struct work_struct rst_service_task;
 	struct work_struct mbx_service_task;
 
@@ -545,14 +798,44 @@
 	u32 flag;
 
 	u32 pkt_buf_size; /* Total pf buf size for tx/rx */
-	u32 mps; /* Max packet size */
+	u32 tx_buf_size; /* Tx buffer size for each TC */
+	u32 dv_buf_size; /* Dv buffer size for each TC */
 
-	enum hclge_mta_dmac_sel_type mta_mac_sel_type;
-	bool enable_mta; /* Multicast filter enable */
+	u32 mps; /* Max packet size */
+	/* vport_lock protect resource shared by vports */
+	struct mutex vport_lock;
 
 	struct hclge_vlan_type_cfg vlan_type_cfg;
 
 	unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)];
+	unsigned long vf_vlan_full[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
+
+	struct hclge_fd_cfg fd_cfg;
+	struct hlist_head fd_rule_list;
+	spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
+	u16 hclge_fd_rule_num;
+	u16 fd_arfs_expire_timer;
+	unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
+	enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
+	u8 fd_en;
+
+	u16 wanted_umv_size;
+	/* max available unicast mac vlan space */
+	u16 max_umv_size;
+	/* private unicast mac vlan space, it's same for PF and its VFs */
+	u16 priv_umv_size;
+	/* unicast mac vlan space shared by PF and its VFs */
+	u16 share_umv_size;
+	struct mutex umv_mutex; /* protect share_umv_size */
+
+	struct mutex vport_cfg_mutex;   /* Protect stored vf table */
+
+	DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
+		      HCLGE_MAC_TNL_LOG_SIZE);
+
+	/* affinity mask and notify for misc interrupt */
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
 };
 
 /* VPort level vlan tag configuration for TX direction */
@@ -569,10 +852,11 @@
 
 /* VPort level vlan tag configuration for RX direction */
 struct hclge_rx_vtag_cfg {
-	bool strip_tag1_en;	/* Whether strip inner vlan tag */
-	bool strip_tag2_en;	/* Whether strip outer vlan tag */
-	bool vlan1_vlan_prionly;/* Inner VLAN Tag up to descriptor Enable */
-	bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
+	u8 rx_vlan_offload_en;	/* Whether enable rx vlan offload */
+	u8 strip_tag1_en;	/* Whether strip inner vlan tag */
+	u8 strip_tag2_en;	/* Whether strip outer vlan tag */
+	u8 vlan1_vlan_prionly;	/* Inner VLAN Tag up to descriptor Enable */
+	u8 vlan2_vlan_prionly;	/* Outer VLAN Tag up to descriptor Enable */
 };
 
 struct hclge_rss_tuple_cfg {
@@ -586,6 +870,22 @@
 	u8 ipv6_fragment_en;
 };
 
+enum HCLGE_VPORT_STATE {
+	HCLGE_VPORT_STATE_ALIVE,
+	HCLGE_VPORT_STATE_MAX
+};
+
+struct hclge_vlan_info {
+	u16 vlan_proto; /* so far support 802.1Q only */
+	u16 qos;
+	u16 vlan_tag;
+};
+
+struct hclge_port_base_vlan_config {
+	u16 state;
+	struct hclge_vlan_info vlan_info;
+};
+
 struct hclge_vport {
 	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
 
@@ -599,19 +899,28 @@
 	u16 alloc_rss_size;
 
 	u16 qs_offset;
-	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
+	u32 bw_limit;		/* VSI BW Limit (0 = disabled) */
 	u8  dwrr;
 
+	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
+	struct hclge_port_base_vlan_config port_base_vlan_cfg;
 	struct hclge_tx_vtag_cfg  txvlan_cfg;
 	struct hclge_rx_vtag_cfg  rxvlan_cfg;
 
-	int vport_id;
+	u16 used_umv_num;
+
+	u16 vport_id;
 	struct hclge_dev *back;  /* Back reference to associated dev */
 	struct hnae3_handle nic;
 	struct hnae3_handle roce;
 
-	bool accept_mta_mc; /* whether to accept mta filter multicast */
-	unsigned long mta_shadow[BITS_TO_LONGS(HCLGE_MTA_TBL_SIZE)];
+	unsigned long state;
+	unsigned long last_active_jiffies;
+	u32 mps; /* Max packet size */
+
+	struct list_head uc_mac_list;   /* Store VF unicast table */
+	struct list_head mc_mac_list;   /* Store VF multicast table */
+	struct list_head vlan_list;     /* Store VF vlan table */
 };
 
 void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
@@ -626,15 +935,6 @@
 int hclge_rm_mc_addr_common(struct hclge_vport *vport,
 			    const unsigned char *addr);
 
-int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
-			      u8 func_id,
-			      bool enable);
-int hclge_update_mta_status_common(struct hclge_vport *vport,
-				   unsigned long *status,
-				   u16 idx,
-				   u16 count,
-				   bool update_filter);
-
 struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle);
 int hclge_bind_ring_with_vector(struct hclge_vport *vport,
 				int vector_id, bool en,
@@ -647,6 +947,12 @@
 	return tqp->index;
 }
 
+static inline bool hclge_is_reset_pending(struct hclge_dev *hdev)
+{
+	return !!hdev->reset_pending;
+}
+
+int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport);
 int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
 int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 			  u16 vlan_id, bool is_kill);
@@ -657,8 +963,35 @@
 void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
+int hclge_vport_start(struct hclge_vport *vport);
+void hclge_vport_stop(struct hclge_vport *vport);
+int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
+int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf);
+u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
+int hclge_notify_client(struct hclge_dev *hdev,
+			enum hnae3_reset_notify_type type);
+void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
+			       enum HCLGE_MAC_ADDR_TYPE mac_type);
+void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
+			      bool is_write_tbl,
+			      enum HCLGE_MAC_ADDR_TYPE mac_type);
+void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
+				  enum HCLGE_MAC_ADDR_TYPE mac_type);
+void hclge_uninit_vport_mac_table(struct hclge_dev *hdev);
+void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
+void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+				    struct hclge_vlan_info *vlan_info);
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+				      u16 state, u16 vlan_tag, u16 qos,
+				      u16 vlan_proto);
+void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time);
+int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev,
+				struct hclge_desc *desc);
+void hclge_report_hw_error(struct hclge_dev *hdev,
+			   enum hnae3_hw_error_type type);
 #endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index e08e820..f5da28a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c

@@ -29,6 +29,10 @@
 			"PF fail to gen resp to VF len %d exceeds max len %d\n",
 			resp_data_len,
 			HCLGE_MBX_MAX_RESP_DATA_SIZE);
+		/* If resp_data_len is too long, set the value to max length
+		 * and return the msg to VF
+		 */
+		resp_data_len = HCLGE_MBX_MAX_RESP_DATA_SIZE;
 	}
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
@@ -79,15 +83,26 @@
 	return status;
 }
 
-static int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
 {
+	struct hclge_dev *hdev = vport->back;
+	enum hnae3_reset_type reset_type;
 	u8 msg_data[2];
 	u8 dest_vfid;
 
 	dest_vfid = (u8)vport->vport_id;
 
+	if (hdev->reset_type == HNAE3_FUNC_RESET)
+		reset_type = HNAE3_VF_PF_FUNC_RESET;
+	else if (hdev->reset_type == HNAE3_FLR_RESET)
+		reset_type = HNAE3_VF_FULL_RESET;
+	else
+		reset_type = HNAE3_VF_FUNC_RESET;
+
+	memcpy(&msg_data[0], &reset_type, sizeof(u16));
+
 	/* send this requested info to VF */
-	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
 				  HCLGE_MBX_ASSERTING_RESET, dest_vfid);
 }
 
@@ -181,29 +196,25 @@
 		return ret;
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain);
-	if (ret)
-		return ret;
 
 	hclge_free_vector_ring_chain(&ring_chain);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
 				     struct hclge_mbx_vf_to_pf_cmd *req)
 {
-	bool en_uc = req->msg[1] ? true : false;
-	bool en_mc = req->msg[2] ? true : false;
+	bool en_bc = req->msg[1] ? true : false;
 	struct hclge_promisc_param param;
 
-	/* always enable broadcast promisc bit */
-	hclge_promisc_param_init(&param, en_uc, en_mc, true, vport->vport_id);
+	/* vf is not allowed to enable unicast/multicast broadcast */
+	hclge_promisc_param_init(&param, false, false, en_bc, vport->vport_id);
 	return hclge_cmd_set_promisc_mode(vport->back, &param);
 }
 
 static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
-				    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
-				    bool gen_resp)
+				    struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
 	const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]);
 	struct hclge_dev *hdev = vport->back;
@@ -214,12 +225,24 @@
 
 		hclge_rm_uc_addr_common(vport, old_addr);
 		status = hclge_add_uc_addr_common(vport, mac_addr);
-		if (status)
+		if (status) {
 			hclge_add_uc_addr_common(vport, old_addr);
+		} else {
+			hclge_rm_vport_mac_table(vport, mac_addr,
+						 false, HCLGE_MAC_ADDR_UC);
+			hclge_add_vport_mac_table(vport, mac_addr,
+						  HCLGE_MAC_ADDR_UC);
+		}
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
 		status = hclge_add_uc_addr_common(vport, mac_addr);
+		if (!status)
+			hclge_add_vport_mac_table(vport, mac_addr,
+						  HCLGE_MAC_ADDR_UC);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
 		status = hclge_rm_uc_addr_common(vport, mac_addr);
+		if (!status)
+			hclge_rm_vport_mac_table(vport, mac_addr,
+						 false, HCLGE_MAC_ADDR_UC);
 	} else {
 		dev_err(&hdev->pdev->dev,
 			"failed to set unicast mac addr, unknown subcode %d\n",
@@ -227,49 +250,12 @@
 		return -EIO;
 	}
 
-	if (gen_resp)
+	if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT)
 		hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
 
 	return 0;
 }
 
-static int hclge_set_vf_mc_mta_status(struct hclge_vport *vport,
-				      u8 *msg, u8 idx, bool is_end)
-{
-#define HCLGE_MTA_STATUS_MSG_SIZE 13
-#define HCLGE_MTA_STATUS_MSG_BITS \
-				(HCLGE_MTA_STATUS_MSG_SIZE * BITS_PER_BYTE)
-#define HCLGE_MTA_STATUS_MSG_END_BITS \
-				(HCLGE_MTA_TBL_SIZE % HCLGE_MTA_STATUS_MSG_BITS)
-	unsigned long status[BITS_TO_LONGS(HCLGE_MTA_STATUS_MSG_BITS)];
-	u16 tbl_cnt;
-	u16 tbl_idx;
-	u8 msg_ofs;
-	u8 msg_bit;
-
-	tbl_cnt = is_end ? HCLGE_MTA_STATUS_MSG_END_BITS :
-			HCLGE_MTA_STATUS_MSG_BITS;
-
-	/* set msg field */
-	msg_ofs = 0;
-	msg_bit = 0;
-	memset(status, 0, sizeof(status));
-	for (tbl_idx = 0; tbl_idx < tbl_cnt; tbl_idx++) {
-		if (msg[msg_ofs] & BIT(msg_bit))
-			set_bit(tbl_idx, status);
-
-		msg_bit++;
-		if (msg_bit == BITS_PER_BYTE) {
-			msg_bit = 0;
-			msg_ofs++;
-		}
-	}
-
-	return hclge_update_mta_status_common(vport,
-					status, idx * HCLGE_MTA_STATUS_MSG_BITS,
-					tbl_cnt, is_end);
-}
-
 static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
 				    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
 				    bool gen_resp)
@@ -282,29 +268,14 @@
 
 	if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_ADD) {
 		status = hclge_add_mc_addr_common(vport, mac_addr);
+		if (!status)
+			hclge_add_vport_mac_table(vport, mac_addr,
+						  HCLGE_MAC_ADDR_MC);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_REMOVE) {
 		status = hclge_rm_mc_addr_common(vport, mac_addr);
-	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_FUNC_MTA_ENABLE) {
-		u8 func_id = vport->vport_id;
-		bool enable = mbx_req->msg[2];
-
-		status = hclge_cfg_func_mta_filter(hdev, func_id, enable);
-	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MTA_TYPE_READ) {
-		resp_data = hdev->mta_mac_sel_type;
-		resp_len = sizeof(u8);
-		gen_resp = true;
-		status = 0;
-	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MTA_STATUS_UPDATE) {
-		/* mta status update msg format
-		 * msg[2.6 : 2.0]  msg index
-		 * msg[2.7]        msg is end
-		 * msg[15 : 3]     mta status bits[103 : 0]
-		 */
-		bool is_end = (mbx_req->msg[2] & 0x80) ? true : false;
-
-		status = hclge_set_vf_mc_mta_status(vport, &mbx_req->msg[3],
-						    mbx_req->msg[2] & 0x7F,
-						    is_end);
+		if (!status)
+			hclge_rm_vport_mac_table(vport, mac_addr,
+						 false, HCLGE_MAC_ADDR_MC);
 	} else {
 		dev_err(&hdev->pdev->dev,
 			"failed to set mcast mac addr, unknown subcode %d\n",
@@ -319,44 +290,93 @@
 	return 0;
 }
 
-static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
-				 struct hclge_mbx_vf_to_pf_cmd *mbx_req,
-				 bool gen_resp)
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+				      u16 state, u16 vlan_tag, u16 qos,
+				      u16 vlan_proto)
 {
+#define MSG_DATA_SIZE	8
+
+	u8 msg_data[MSG_DATA_SIZE];
+
+	memcpy(&msg_data[0], &state, sizeof(u16));
+	memcpy(&msg_data[2], &vlan_proto, sizeof(u16));
+	memcpy(&msg_data[4], &qos, sizeof(u16));
+	memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
+
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+				  HCLGE_MBX_PUSH_VLAN_INFO, vfid);
+}
+
+static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
+				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_vf_vlan_cfg *msg_cmd;
 	int status = 0;
 
-	if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) {
+	msg_cmd = (struct hclge_vf_vlan_cfg *)mbx_req->msg;
+	if (msg_cmd->subcode == HCLGE_MBX_VLAN_FILTER) {
 		struct hnae3_handle *handle = &vport->nic;
 		u16 vlan, proto;
 		bool is_kill;
 
-		is_kill = !!mbx_req->msg[2];
-		memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan));
-		memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
+		is_kill = !!msg_cmd->is_kill;
+		vlan =  msg_cmd->vlan;
+		proto =  msg_cmd->proto;
 		status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
 					       vlan, is_kill);
-	} else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) {
+	} else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) {
 		struct hnae3_handle *handle = &vport->nic;
-		bool en = mbx_req->msg[2] ? true : false;
+		bool en = msg_cmd->is_kill ? true : false;
 
 		status = hclge_en_hw_strip_rxvtag(handle, en);
+	} else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) {
+		struct hclge_vlan_info *vlan_info;
+		u16 *state;
+
+		state = (u16 *)&mbx_req->msg[2];
+		vlan_info = (struct hclge_vlan_info *)&mbx_req->msg[4];
+		status = hclge_update_port_base_vlan_cfg(vport, *state,
+							 vlan_info);
+	} else if (mbx_req->msg[1] == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) {
+		u8 state;
+
+		state = vport->port_base_vlan_cfg.state;
+		status = hclge_gen_resp_to_vf(vport, mbx_req, 0, &state,
+					      sizeof(u8));
 	}
 
-	if (gen_resp)
-		status = hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
-
 	return status;
 }
 
+static int hclge_set_vf_alive(struct hclge_vport *vport,
+			      struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+			      bool gen_resp)
+{
+	bool alive = !!mbx_req->msg[2];
+	int ret = 0;
+
+	if (alive)
+		ret = hclge_vport_start(vport);
+	else
+		hclge_vport_stop(vport);
+
+	return ret;
+}
+
 static int hclge_get_vf_tcinfo(struct hclge_vport *vport,
 			       struct hclge_mbx_vf_to_pf_cmd *mbx_req,
 			       bool gen_resp)
 {
-	struct hclge_dev *hdev = vport->back;
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	u8 vf_tc_map = 0;
+	unsigned int i;
 	int ret;
 
-	ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &hdev->hw_tc_map,
-				   sizeof(u8));
+	for (i = 0; i < kinfo->num_tc; i++)
+		vf_tc_map |= BIT(i);
+
+	ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &vf_tc_map,
+				   sizeof(vf_tc_map));
 
 	return ret;
 }
@@ -365,20 +385,46 @@
 				   struct hclge_mbx_vf_to_pf_cmd *mbx_req,
 				   bool gen_resp)
 {
-#define HCLGE_TQPS_RSS_INFO_LEN		8
+#define HCLGE_TQPS_RSS_INFO_LEN		6
 	u8 resp_data[HCLGE_TQPS_RSS_INFO_LEN];
 	struct hclge_dev *hdev = vport->back;
 
 	/* get the queue related info */
 	memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
 	memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16));
-	memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
-	memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
+	memcpy(&resp_data[4], &hdev->rx_buf_len, sizeof(u16));
 
 	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
 				    HCLGE_TQPS_RSS_INFO_LEN);
 }
 
+static int hclge_get_vf_queue_depth(struct hclge_vport *vport,
+				    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+				    bool gen_resp)
+{
+#define HCLGE_TQPS_DEPTH_INFO_LEN	4
+	u8 resp_data[HCLGE_TQPS_DEPTH_INFO_LEN];
+	struct hclge_dev *hdev = vport->back;
+
+	/* get the queue depth info */
+	memcpy(&resp_data[0], &hdev->num_tx_desc, sizeof(u16));
+	memcpy(&resp_data[2], &hdev->num_rx_desc, sizeof(u16));
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+				    HCLGE_TQPS_DEPTH_INFO_LEN);
+}
+
+static int hclge_get_vf_media_type(struct hclge_vport *vport,
+				   struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_dev *hdev = vport->back;
+	u8 resp_data[2];
+
+	resp_data[0] = hdev->hw.mac.media_type;
+	resp_data[1] = hdev->hw.mac.module_type;
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+				    sizeof(resp_data));
+}
+
 static int hclge_get_link_info(struct hclge_vport *vport,
 			       struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
@@ -401,6 +447,29 @@
 				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
+static void hclge_get_link_mode(struct hclge_vport *vport,
+				struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+#define HCLGE_SUPPORTED   1
+	struct hclge_dev *hdev = vport->back;
+	unsigned long advertising;
+	unsigned long supported;
+	unsigned long send_data;
+	u8 msg_data[10];
+	u8 dest_vfid;
+
+	advertising = hdev->hw.mac.advertising[0];
+	supported = hdev->hw.mac.supported[0];
+	dest_vfid = mbx_req->mbx_src_vfid;
+	msg_data[0] = mbx_req->msg[2];
+
+	send_data = msg_data[0] == HCLGE_SUPPORTED ? supported : advertising;
+
+	memcpy(&msg_data[2], &send_data, sizeof(unsigned long));
+	hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+			   HCLGE_MBX_LINK_STAT_MODE, dest_vfid);
+}
+
 static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
 				     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
@@ -410,7 +479,7 @@
 
 	hclge_reset_vf_queue(vport, queue_id);
 
-	/* send response msg to VF after queue reset complete*/
+	/* send response msg to VF after queue reset complete */
 	hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
 }
 
@@ -421,24 +490,89 @@
 	int ret;
 
 	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
-		 mbx_req->mbx_src_vfid);
+		 vport->vport_id);
 
-	/* Acknowledge VF that PF is now about to assert the reset for the VF.
-	 * On receiving this message VF will get into pending state and will
-	 * start polling for the hardware reset completion status.
-	 */
-	ret = hclge_inform_reset_assert_to_vf(vport);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
-			ret, vport->vport_id);
-		return;
+	ret = hclge_func_reset_cmd(hdev, vport->vport_id);
+	hclge_gen_resp_to_vf(vport, mbx_req, ret, NULL, 0);
+}
+
+static void hclge_vf_keep_alive(struct hclge_vport *vport,
+				struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	vport->last_active_jiffies = jiffies;
+}
+
+static int hclge_set_vf_mtu(struct hclge_vport *vport,
+			    struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	int ret;
+	u32 mtu;
+
+	memcpy(&mtu, &mbx_req->msg[2], sizeof(mtu));
+	ret = hclge_set_vport_mtu(vport, mtu);
+
+	return hclge_gen_resp_to_vf(vport, mbx_req, ret, NULL, 0);
+}
+
+static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+				    struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	u16 queue_id, qid_in_pf;
+	u8 resp_data[2];
+
+	memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
+	qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+	memcpy(resp_data, &qid_in_pf, sizeof(qid_in_pf));
+
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, 2);
+}
+
+static int hclge_get_rss_key(struct hclge_vport *vport,
+			     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+#define HCLGE_RSS_MBX_RESP_LEN	8
+	u8 resp_data[HCLGE_RSS_MBX_RESP_LEN];
+	struct hclge_dev *hdev = vport->back;
+	u8 index;
+
+	index = mbx_req->msg[2];
+
+	memcpy(&resp_data[0],
+	       &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
+	       HCLGE_RSS_MBX_RESP_LEN);
+
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+				    HCLGE_RSS_MBX_RESP_LEN);
+}
+
+static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code)
+{
+	switch (link_fail_code) {
+	case HCLGE_LF_REF_CLOCK_LOST:
+		dev_warn(&hdev->pdev->dev, "Reference clock lost!\n");
+		break;
+	case HCLGE_LF_XSFP_TX_DISABLE:
+		dev_warn(&hdev->pdev->dev, "SFP tx is disabled!\n");
+		break;
+	case HCLGE_LF_XSFP_ABSENT:
+		dev_warn(&hdev->pdev->dev, "SFP is absent!\n");
+		break;
+	default:
+		break;
 	}
+}
 
-	dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
-		 mbx_req->mbx_src_vfid);
-	/* reset this virtual function */
-	hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
+static void hclge_handle_link_change_event(struct hclge_dev *hdev,
+					   struct hclge_mbx_vf_to_pf_cmd *req)
+{
+#define LINK_STATUS_OFFSET	1
+#define LINK_FAIL_CODE_OFFSET	2
+
+	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+	hclge_task_schedule(hdev, 0);
+
+	if (!req->msg[LINK_STATUS_OFFSET])
+		hclge_link_fail_parse(hdev, req->msg[LINK_FAIL_CODE_OFFSET]);
 }
 
 static bool hclge_cmd_crq_empty(struct hclge_hw *hw)
@@ -448,13 +582,23 @@
 	return tail == hw->cmq.crq.next_to_use;
 }
 
+static void hclge_handle_ncsi_error(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+
+	ae_dev->ops->set_default_reset_request(ae_dev, HNAE3_GLOBAL_RESET);
+	dev_warn(&hdev->pdev->dev, "requesting reset due to NCSI error\n");
+	ae_dev->ops->reset_event(hdev->pdev, NULL);
+}
+
 void hclge_mbx_handler(struct hclge_dev *hdev)
 {
 	struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
-	int ret, flag;
+	unsigned int flag;
+	int ret;
 
 	/* handle all the mailbox requests in the queue */
 	while (!hclge_cmd_crq_empty(&hdev->hw)) {
@@ -498,7 +642,7 @@
 					ret);
 			break;
 		case HCLGE_MBX_SET_UNICAST:
-			ret = hclge_set_vf_uc_mac_addr(vport, req, true);
+			ret = hclge_set_vf_uc_mac_addr(vport, req);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"PF fail(%d) to set VF UC MAC Addr\n",
@@ -512,12 +656,19 @@
 					ret);
 			break;
 		case HCLGE_MBX_SET_VLAN:
-			ret = hclge_set_vf_vlan_cfg(vport, req, false);
+			ret = hclge_set_vf_vlan_cfg(vport, req);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"PF failed(%d) to config VF's VLAN\n",
 					ret);
 			break;
+		case HCLGE_MBX_SET_ALIVE:
+			ret = hclge_set_vf_alive(vport, req, false);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF failed(%d) to set VF's ALIVE\n",
+					ret);
+			break;
 		case HCLGE_MBX_GET_QINFO:
 			ret = hclge_get_vf_queue_info(vport, req, true);
 			if (ret)
@@ -525,6 +676,14 @@
 					"PF failed(%d) to get Q info for VF\n",
 					ret);
 			break;
+		case HCLGE_MBX_GET_QDEPTH:
+			ret = hclge_get_vf_queue_depth(vport, req, true);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF failed(%d) to get Q depth for VF\n",
+					ret);
+			break;
+
 		case HCLGE_MBX_GET_TCINFO:
 			ret = hclge_get_vf_tcinfo(vport, req, true);
 			if (ret)
@@ -545,6 +704,54 @@
 		case HCLGE_MBX_RESET:
 			hclge_reset_vf(vport, req);
 			break;
+		case HCLGE_MBX_KEEP_ALIVE:
+			hclge_vf_keep_alive(vport, req);
+			break;
+		case HCLGE_MBX_SET_MTU:
+			ret = hclge_set_vf_mtu(vport, req);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"VF fail(%d) to set mtu\n", ret);
+			break;
+		case HCLGE_MBX_GET_QID_IN_PF:
+			ret = hclge_get_queue_id_in_pf(vport, req);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF failed(%d) to get qid for VF\n",
+					ret);
+			break;
+		case HCLGE_MBX_GET_RSS_KEY:
+			ret = hclge_get_rss_key(vport, req);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF fail(%d) to get rss key for VF\n",
+					ret);
+			break;
+		case HCLGE_MBX_GET_LINK_MODE:
+			hclge_get_link_mode(vport, req);
+			break;
+		case HCLGE_MBX_GET_VF_FLR_STATUS:
+			mutex_lock(&hdev->vport_cfg_mutex);
+			hclge_rm_vport_all_mac_table(vport, true,
+						     HCLGE_MAC_ADDR_UC);
+			hclge_rm_vport_all_mac_table(vport, true,
+						     HCLGE_MAC_ADDR_MC);
+			hclge_rm_vport_all_vlan_table(vport, true);
+			mutex_unlock(&hdev->vport_cfg_mutex);
+			break;
+		case HCLGE_MBX_GET_MEDIA_TYPE:
+			ret = hclge_get_vf_media_type(vport, req);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF fail(%d) to media type for VF\n",
+					ret);
+			break;
+		case HCLGE_MBX_PUSH_LINK_STATUS:
+			hclge_handle_link_change_event(hdev, req);
+			break;
+		case HCLGE_MBX_NCSI_ERROR:
+			hclge_handle_ncsi_error(hdev);
+			break;
 		default:
 			dev_err(&hdev->pdev->dev,
 				"un-supported mailbox message, code = %d\n",

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 398971a..dc4dfd4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c

@@ -3,19 +3,12 @@
 
 #include <linux/etherdevice.h>
 #include <linux/kernel.h>
+#include <linux/marvell_phy.h>
 
 #include "hclge_cmd.h"
 #include "hclge_main.h"
 #include "hclge_mdio.h"
 
-#define HCLGE_PHY_SUPPORTED_FEATURES	(SUPPORTED_Autoneg | \
-					 SUPPORTED_TP | \
-					 SUPPORTED_Pause | \
-					 SUPPORTED_Asym_Pause | \
-					 PHY_10BT_FEATURES | \
-					 PHY_100BT_FEATURES | \
-					 PHY_1000BT_FEATURES)
-
 enum hclge_mdio_c22_op_seq {
 	HCLGE_MDIO_C22_WRITE = 1,
 	HCLGE_MDIO_C22_READ = 2
@@ -54,7 +47,7 @@
 	struct hclge_desc desc;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
 		return 0;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
@@ -62,9 +55,9 @@
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
 	hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-			HCLGE_MDIO_PHYID_S, phyid);
+			HCLGE_MDIO_PHYID_S, (u32)phyid);
 	hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-			HCLGE_MDIO_PHYREG_S, regnum);
+			HCLGE_MDIO_PHYREG_S, (u32)regnum);
 
 	hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
 	hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
@@ -92,7 +85,7 @@
 	struct hclge_desc desc;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
 		return 0;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
@@ -100,9 +93,9 @@
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
 	hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-			HCLGE_MDIO_PHYID_S, phyid);
+			HCLGE_MDIO_PHYID_S, (u32)phyid);
 	hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-			HCLGE_MDIO_PHYREG_S, regnum);
+			HCLGE_MDIO_PHYREG_S, (u32)regnum);
 
 	hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
 	hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
@@ -129,12 +122,18 @@
 
 int hclge_mac_mdio_config(struct hclge_dev *hdev)
 {
+#define PHY_INEXISTENT	255
+
 	struct hclge_mac *mac = &hdev->hw.mac;
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	int ret;
 
-	if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
+	if (hdev->hw.mac.phy_addr == PHY_INEXISTENT) {
+		dev_info(&hdev->pdev->dev,
+			 "no phy device is connected to mdio bus\n");
+		return 0;
+	} else if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
 		dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
 			hdev->hw.mac.phy_addr);
 		return -EINVAL;
@@ -181,6 +180,10 @@
 	int duplex, speed;
 	int ret;
 
+	/* When phy link down, do nothing */
+	if (netdev->phydev->link == 0)
+		return;
+
 	speed = netdev->phydev->speed;
 	duplex = netdev->phydev->duplex;
 
@@ -193,16 +196,21 @@
 		netdev_err(netdev, "failed to configure flow control.\n");
 }
 
-int hclge_mac_connect_phy(struct hclge_dev *hdev)
+int hclge_mac_connect_phy(struct hnae3_handle *handle)
 {
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
 	struct net_device *netdev = hdev->vport[0].nic.netdev;
 	struct phy_device *phydev = hdev->hw.mac.phydev;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 	int ret;
 
 	if (!phydev)
 		return 0;
 
-	phydev->supported &= ~SUPPORTED_FIBRE;
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported);
+
+	phydev->dev_flags |= MARVELL_PHY_LED0_LINK_LED1_ACTIVE;
 
 	ret = phy_connect_direct(netdev, phydev,
 				 hclge_mac_adjust_link,
@@ -212,14 +220,26 @@
 		return ret;
 	}
 
-	phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES;
-	phydev->advertising = phydev->supported;
+	linkmode_copy(mask, hdev->hw.mac.supported);
+	linkmode_and(phydev->supported, phydev->supported, mask);
+	linkmode_copy(phydev->advertising, phydev->supported);
+
+	/* supported flag is Pause and Asym Pause, but default advertising
+	 * should be rx on, tx on, so need clear Asym Pause in advertising
+	 * flag
+	 */
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+			   phydev->advertising);
+
+	phy_attached_info(phydev);
 
 	return 0;
 }
 
-void hclge_mac_disconnect_phy(struct hclge_dev *hdev)
+void hclge_mac_disconnect_phy(struct hnae3_handle *handle)
 {
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 
 	if (!phydev)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
index 5fbf7dd..dd9a121 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h

@@ -1,12 +1,12 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_MDIO_H
 #define __HCLGE_MDIO_H
 
 int hclge_mac_mdio_config(struct hclge_dev *hdev);
-int hclge_mac_connect_phy(struct hclge_dev *hdev);
-void hclge_mac_disconnect_phy(struct hclge_dev *hdev);
+int hclge_mac_connect_phy(struct hnae3_handle *handle);
+void hclge_mac_disconnect_phy(struct hnae3_handle *handle);
 void hclge_mac_start_phy(struct hclge_dev *hdev);
 void hclge_mac_stop_phy(struct hclge_dev *hdev);
 

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 48235dc..62399cc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c

@@ -43,18 +43,23 @@
 static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 				  u8 *ir_b, u8 *ir_u, u8 *ir_s)
 {
+#define DIVISOR_CLK		(1000 * 8)
+#define DIVISOR_IR_B_126	(126 * DIVISOR_CLK)
+
 	const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
 		6 * 256,        /* Prioriy level */
 		6 * 32,         /* Prioriy group level */
 		6 * 8,          /* Port level */
 		6 * 256         /* Qset level */
 	};
-	u8 ir_u_calc = 0, ir_s_calc = 0;
+	u8 ir_u_calc = 0;
+	u8 ir_s_calc = 0;
 	u32 ir_calc;
 	u32 tick;
 
 	/* Calc tick */
-	if (shaper_level >= HCLGE_SHAPER_LVL_CNT)
+	if (shaper_level >= HCLGE_SHAPER_LVL_CNT ||
+	    ir > HCLGE_ETHER_MAX_RATE)
 		return -EINVAL;
 
 	tick = tick_array[shaper_level];
@@ -66,7 +71,7 @@
 	 * ir_calc = ---------------- * 1000
 	 *		tick * 1
 	 */
-	ir_calc = (1008000 + (tick >> 1) - 1) / tick;
+	ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick;
 
 	if (ir_calc == ir) {
 		*ir_b = 126;
@@ -76,29 +81,27 @@
 		return 0;
 	} else if (ir_calc > ir) {
 		/* Increasing the denominator to select ir_s value */
-		while (ir_calc > ir) {
+		while (ir_calc >= ir && ir) {
 			ir_s_calc++;
-			ir_calc = 1008000 / (tick * (1 << ir_s_calc));
+			ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc));
 		}
 
-		if (ir_calc == ir)
-			*ir_b = 126;
-		else
-			*ir_b = (ir * tick * (1 << ir_s_calc) + 4000) / 8000;
+		*ir_b = (ir * tick * (1 << ir_s_calc) + (DIVISOR_CLK >> 1)) /
+			DIVISOR_CLK;
 	} else {
 		/* Increasing the numerator to select ir_u value */
 		u32 numerator;
 
 		while (ir_calc < ir) {
 			ir_u_calc++;
-			numerator = 1008000 * (1 << ir_u_calc);
+			numerator = DIVISOR_IR_B_126 * (1 << ir_u_calc);
 			ir_calc = (numerator + (tick >> 1)) / tick;
 		}
 
 		if (ir_calc == ir) {
 			*ir_b = 126;
 		} else {
-			u32 denominator = (8000 * (1 << --ir_u_calc));
+			u32 denominator = DIVISOR_CLK * (1 << --ir_u_calc);
 			*ir_b = (ir * tick + (denominator >> 1)) / denominator;
 		}
 	}
@@ -119,14 +122,13 @@
 	      opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
 		return -EINVAL;
 
-	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1; i++) {
 		hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
-		if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
-			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-		else
-			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	}
 
+	hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+
 	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
 	if (ret)
 		return ret;
@@ -172,7 +174,7 @@
 				  u8 pfc_bitmap)
 {
 	struct hclge_desc desc;
-	struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data;
+	struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false);
 
@@ -188,11 +190,12 @@
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
 
-	pause_param = (struct hclge_cfg_pause_param_cmd *)&desc.data;
+	pause_param = (struct hclge_cfg_pause_param_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PARA, false);
 
 	ether_addr_copy(pause_param->mac_addr, addr);
+	ether_addr_copy(pause_param->mac_addr_extra, addr);
 	pause_param->pause_trans_gap = pause_trans_gap;
 	pause_param->pause_trans_time = cpu_to_le16(pause_trans_time);
 
@@ -207,7 +210,7 @@
 	u8 trans_gap;
 	int ret;
 
-	pause_param = (struct hclge_cfg_pause_param_cmd *)&desc.data;
+	pause_param = (struct hclge_cfg_pause_param_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PARA, true);
 
@@ -218,8 +221,7 @@
 	trans_gap = pause_param->pause_trans_gap;
 	trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
-					 trans_time);
+	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap, trans_time);
 }
 
 static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
@@ -297,7 +299,7 @@
 }
 
 static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev,
-				    u8 q_id, u16 qs_id)
+				    u16 q_id, u16 qs_id)
 {
 	struct hclge_nq_to_qs_link_cmd *map;
 	struct hclge_desc desc;
@@ -360,29 +362,36 @@
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
-				    enum hclge_shap_bucket bucket, u8 pg_id,
-				    u8 ir_b, u8 ir_u, u8 ir_s, u8 bs_b, u8 bs_s)
+static u32 hclge_tm_get_shapping_para(u8 ir_b, u8 ir_u, u8 ir_s,
+				      u8 bs_b, u8 bs_s)
 {
-	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
-	enum hclge_opcode_type opcode;
-	struct hclge_desc desc;
 	u32 shapping_para = 0;
 
-	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
-		HCLGE_OPC_TM_PG_C_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, opcode, false);
-
-	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
-
-	shap_cfg_cmd->pg_id = pg_id;
-
 	hclge_tm_set_field(shapping_para, IR_B, ir_b);
 	hclge_tm_set_field(shapping_para, IR_U, ir_u);
 	hclge_tm_set_field(shapping_para, IR_S, ir_s);
 	hclge_tm_set_field(shapping_para, BS_B, bs_b);
 	hclge_tm_set_field(shapping_para, BS_S, bs_s);
 
+	return shapping_para;
+}
+
+static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
+				    enum hclge_shap_bucket bucket, u8 pg_id,
+				    u32 shapping_para)
+{
+	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc;
+
+	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
+		 HCLGE_OPC_TM_PG_C_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, opcode, false);
+
+	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+
+	shap_cfg_cmd->pg_id = pg_id;
+
 	shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -392,11 +401,11 @@
 {
 	struct hclge_port_shapping_cmd *shap_cfg_cmd;
 	struct hclge_desc desc;
-	u32 shapping_para = 0;
 	u8 ir_u, ir_b, ir_s;
+	u32 shapping_para;
 	int ret;
 
-	ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+	ret = hclge_shaper_para_calc(hdev->hw.mac.speed,
 				     HCLGE_SHAPER_LVL_PORT,
 				     &ir_b, &ir_u, &ir_s);
 	if (ret)
@@ -405,11 +414,9 @@
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
 	shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
 
-	hclge_tm_set_field(shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF);
-	hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF);
+	shapping_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+						   HCLGE_SHAPER_BS_U_DEF,
+						   HCLGE_SHAPER_BS_S_DEF);
 
 	shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para);
 
@@ -418,16 +425,14 @@
 
 static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 				     enum hclge_shap_bucket bucket, u8 pri_id,
-				     u8 ir_b, u8 ir_u, u8 ir_s,
-				     u8 bs_b, u8 bs_s)
+				     u32 shapping_para)
 {
 	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
 	enum hclge_opcode_type opcode;
 	struct hclge_desc desc;
-	u32 shapping_para = 0;
 
 	opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
-		HCLGE_OPC_TM_PRI_C_SHAPPING;
+		 HCLGE_OPC_TM_PRI_C_SHAPPING;
 
 	hclge_cmd_setup_basic_desc(&desc, opcode, false);
 
@@ -435,12 +440,6 @@
 
 	shap_cfg_cmd->pri_id = pri_id;
 
-	hclge_tm_set_field(shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shapping_para, BS_B, bs_b);
-	hclge_tm_set_field(shapping_para, BS_S, bs_s);
-
 	shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -516,20 +515,48 @@
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_dev *hdev = vport->back;
+	u16 max_rss_size;
 	u8 i;
 
-	vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
-	kinfo->num_tc =
-		min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc);
-	kinfo->rss_size
-		= min_t(u16, hdev->rss_size_max,
-			kinfo->num_tqps / kinfo->num_tc);
-	vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
+	/* TC configuration is shared by PF/VF in one port, only allow
+	 * one tc for VF for simplicity. VF's vport_id is non zero.
+	 */
+	kinfo->num_tc = vport->vport_id ? 1 :
+			min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
+	vport->qs_offset = (vport->vport_id ? hdev->tm_info.num_tc : 0) +
+				(vport->vport_id ? (vport->vport_id - 1) : 0);
+
+	max_rss_size = min_t(u16, hdev->rss_size_max,
+			     vport->alloc_tqps / kinfo->num_tc);
+
+	/* Set to user value, no larger than max_rss_size. */
+	if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
+	    kinfo->req_rss_size <= max_rss_size) {
+		dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
+			 kinfo->rss_size, kinfo->req_rss_size);
+		kinfo->rss_size = kinfo->req_rss_size;
+	} else if (kinfo->rss_size > max_rss_size ||
+		   (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+		/* if user not set rss, the rss_size should compare with the
+		 * valid msi numbers to ensure one to one map between tqp and
+		 * irq as default.
+		 */
+		if (!kinfo->req_rss_size)
+			max_rss_size = min_t(u16, max_rss_size,
+					     (hdev->num_nic_msi - 1) /
+					     kinfo->num_tc);
+
+		/* Set to the maximum specification value (max_rss_size). */
+		kinfo->rss_size = max_rss_size;
+	}
+
+	kinfo->num_tqps = kinfo->num_tc * kinfo->rss_size;
 	vport->dwrr = 100;  /* 100 percent as init */
 	vport->alloc_rss_size = kinfo->rss_size;
+	vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
 
-	for (i = 0; i < kinfo->num_tc; i++) {
-		if (hdev->hw_tc_map & BIT(i)) {
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (hdev->hw_tc_map & BIT(i) && i < kinfo->num_tc) {
 			kinfo->tc_info[i].enable = true;
 			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
 			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
@@ -575,8 +602,10 @@
 		hdev->tm_info.prio_tc[i] =
 			(i >= hdev->tm_info.num_tc) ? 0 : i;
 
-	/* DCB is enabled if we have more than 1 TC */
-	if (hdev->tm_info.num_tc > 1)
+	/* DCB is enabled if we have more than 1 TC or pfc_en is
+	 * non-zero.
+	 */
+	if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
 		hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
 	else
 		hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
@@ -584,12 +613,14 @@
 
 static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 {
+#define BW_PERCENT	100
+
 	u8 i;
 
 	for (i = 0; i < hdev->tm_info.num_pg; i++) {
 		int k;
 
-		hdev->tm_info.pg_dwrr[i] = i ? 0 : 100;
+		hdev->tm_info.pg_dwrr[i] = i ? 0 : BW_PERCENT;
 
 		hdev->tm_info.pg_info[i].pg_id = i;
 		hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR;
@@ -601,7 +632,7 @@
 
 		hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map;
 		for (k = 0; k < hdev->tm_info.num_tc; k++)
-			hdev->tm_info.pg_info[i].tc_dwrr[k] = 100;
+			hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT;
 	}
 }
 
@@ -623,12 +654,8 @@
 	}
 }
 
-static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
+static void hclge_tm_schd_info_init(struct hclge_dev *hdev)
 {
-	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
-	    (hdev->tm_info.num_pg != 1))
-		return -EINVAL;
-
 	hclge_tm_pg_info_init(hdev);
 
 	hclge_tm_tc_info_init(hdev);
@@ -636,8 +663,6 @@
 	hclge_tm_vport_info_update(hdev);
 
 	hclge_pfc_info_init(hdev);
-
-	return 0;
 }
 
 static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
@@ -662,6 +687,7 @@
 static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
 {
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 	u32 i;
 
@@ -679,18 +705,21 @@
 		if (ret)
 			return ret;
 
+		shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pg_shapping_cfg(hdev,
 					       HCLGE_TM_SHAP_C_BUCKET, i,
-					       0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-					       HCLGE_SHAPER_BS_S_DEF);
+					       shaper_para);
 		if (ret)
 			return ret;
 
+		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
 		ret = hclge_tm_pg_shapping_cfg(hdev,
 					       HCLGE_TM_SHAP_P_BUCKET, i,
-					       ir_b, ir_u, ir_s,
-					       HCLGE_SHAPER_BS_U_DEF,
-					       HCLGE_SHAPER_BS_S_DEF);
+					       shaper_para);
 		if (ret)
 			return ret;
 	}
@@ -710,8 +739,7 @@
 	/* pg to prio */
 	for (i = 0; i < hdev->tm_info.num_pg; i++) {
 		/* Cfg dwrr */
-		ret = hclge_tm_pg_weight_cfg(hdev, i,
-					     hdev->tm_info.pg_dwrr[i]);
+		ret = hclge_tm_pg_weight_cfg(hdev, i, hdev->tm_info.pg_dwrr[i]);
 		if (ret)
 			return ret;
 	}
@@ -752,13 +780,17 @@
 
 	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
 		/* Cfg qs -> pri mapping, one by one mapping */
-		for (k = 0; k < hdev->num_alloc_vport; k++)
-			for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		for (k = 0; k < hdev->num_alloc_vport; k++) {
+			struct hnae3_knic_private_info *kinfo =
+				&vport[k].nic.kinfo;
+
+			for (i = 0; i < kinfo->num_tc; i++) {
 				ret = hclge_tm_qs_to_pri_map_cfg(
 					hdev, vport[k].qs_offset + i, i);
 				if (ret)
 					return ret;
 			}
+		}
 	} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
 		/* Cfg qs -> pri mapping,  qs = tc, pri = vf, 8 qs -> 1 pri */
 		for (k = 0; k < hdev->num_alloc_vport; k++)
@@ -787,6 +819,7 @@
 static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
 {
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 	u32 i;
 
@@ -798,17 +831,19 @@
 		if (ret)
 			return ret;
 
-		ret = hclge_tm_pri_shapping_cfg(
-			hdev, HCLGE_TM_SHAP_C_BUCKET, i,
-			0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-			HCLGE_SHAPER_BS_S_DEF);
+		shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
+		ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i,
+						shaper_para);
 		if (ret)
 			return ret;
 
-		ret = hclge_tm_pri_shapping_cfg(
-			hdev, HCLGE_TM_SHAP_P_BUCKET, i,
-			ir_b, ir_u, ir_s, HCLGE_SHAPER_BS_U_DEF,
-			HCLGE_SHAPER_BS_S_DEF);
+		shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+							 HCLGE_SHAPER_BS_U_DEF,
+							 HCLGE_SHAPER_BS_S_DEF);
+		ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
+						shaper_para);
 		if (ret)
 			return ret;
 	}
@@ -820,6 +855,7 @@
 {
 	struct hclge_dev *hdev = vport->back;
 	u8 ir_u, ir_b, ir_s;
+	u32 shaper_para;
 	int ret;
 
 	ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF,
@@ -827,18 +863,19 @@
 	if (ret)
 		return ret;
 
+	shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
+						 HCLGE_SHAPER_BS_U_DEF,
+						 HCLGE_SHAPER_BS_S_DEF);
 	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET,
-					vport->vport_id,
-					0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
-					HCLGE_SHAPER_BS_S_DEF);
+					vport->vport_id, shaper_para);
 	if (ret)
 		return ret;
 
+	shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+						 HCLGE_SHAPER_BS_U_DEF,
+						 HCLGE_SHAPER_BS_S_DEF);
 	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET,
-					vport->vport_id,
-					ir_b, ir_u, ir_s,
-					HCLGE_SHAPER_BS_U_DEF,
-					HCLGE_SHAPER_BS_S_DEF);
+					vport->vport_id, shaper_para);
 	if (ret)
 		return ret;
 
@@ -933,6 +970,36 @@
 	return 0;
 }
 
+static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev)
+{
+#define DEFAULT_TC_WEIGHT	1
+#define DEFAULT_TC_OFFSET	14
+
+	struct hclge_ets_tc_weight_cmd *ets_weight;
+	struct hclge_desc desc;
+	unsigned int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, false);
+	ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		struct hclge_pg_info *pg_info;
+
+		ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		pg_info =
+			&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
+		ets_weight->tc_weight[i] = pg_info->tc_dwrr[i];
+	}
+
+	ets_weight->weight_offset = DEFAULT_TC_OFFSET;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
 static int hclge_tm_pri_vnet_base_dwrr_pri_cfg(struct hclge_vport *vport)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
@@ -982,6 +1049,19 @@
 		ret = hclge_tm_pri_tc_base_dwrr_cfg(hdev);
 		if (ret)
 			return ret;
+
+		if (!hnae3_dev_dcb_supported(hdev))
+			return 0;
+
+		ret = hclge_tm_ets_tc_dwrr_cfg(hdev);
+		if (ret == -EOPNOTSUPP) {
+			dev_warn(&hdev->pdev->dev,
+				 "fw %08x does't support ets tc weight cmd\n",
+				 hdev->fw_version);
+			ret = 0;
+		}
+
+		return ret;
 	} else {
 		ret = hclge_tm_pri_vnet_base_dwrr_cfg(hdev);
 		if (ret)
@@ -991,7 +1071,7 @@
 	return 0;
 }
 
-int hclge_tm_map_cfg(struct hclge_dev *hdev)
+static int hclge_tm_map_cfg(struct hclge_dev *hdev)
 {
 	int ret;
 
@@ -1057,6 +1137,9 @@
 	int ret;
 	u8 i;
 
+	if (vport->vport_id >= HNAE3_MAX_TC)
+		return -EINVAL;
+
 	ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id);
 	if (ret)
 		return ret;
@@ -1106,7 +1189,7 @@
 	return 0;
 }
 
-int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
 {
 	int ret;
 
@@ -1117,7 +1200,7 @@
 	return hclge_tm_lvl34_schd_mode_cfg(hdev);
 }
 
-static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
+int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
 {
 	int ret;
 
@@ -1145,8 +1228,8 @@
 	struct hclge_mac *mac = &hdev->hw.mac;
 
 	return hclge_pause_param_cfg(hdev, mac->mac_addr,
-					 HCLGE_DEFAULT_PAUSE_TRANS_GAP,
-					 HCLGE_DEFAULT_PAUSE_TRANS_TIME);
+				     HCLGE_DEFAULT_PAUSE_TRANS_GAP,
+				     HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
 
 static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
@@ -1158,7 +1241,7 @@
 				HCLGE_RX_MAC_PAUSE_EN_MSK;
 
 	return hclge_pfc_pause_en_cfg(hdev, enable_bitmap,
-				      hdev->tm_info.hw_pfc_map);
+				      hdev->tm_info.pfc_en);
 }
 
 /* Each Tc has a 1024 queue sets to backpress, it divides to
@@ -1227,10 +1310,23 @@
 	return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
 }
 
-int hclge_pause_setup_hw(struct hclge_dev *hdev)
+static int hclge_tm_bp_setup(struct hclge_dev *hdev)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		ret = hclge_bp_setup_hw(hdev, i);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init)
 {
 	int ret;
-	u8 i;
 
 	ret = hclge_pause_param_setup_hw(hdev);
 	if (ret)
@@ -1244,29 +1340,29 @@
 	if (!hnae3_dev_dcb_supported(hdev))
 		return 0;
 
-	/* When MAC is GE Mode, hdev does not support pfc setting */
+	/* GE MAC does not support PFC, when driver is initializing and MAC
+	 * is in GE Mode, ignore the error here, otherwise initialization
+	 * will fail.
+	 */
 	ret = hclge_pfc_setup_hw(hdev);
-	if (ret)
-		dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
-
-	for (i = 0; i < hdev->tm_info.num_tc; i++) {
-		ret = hclge_bp_setup_hw(hdev, i);
-		if (ret)
-			return ret;
+	if (init && ret == -EOPNOTSUPP)
+		dev_warn(&hdev->pdev->dev, "GE MAC does not support pfc\n");
+	else if (ret) {
+		dev_err(&hdev->pdev->dev, "config pfc failed! ret = %d\n",
+			ret);
+		return ret;
 	}
 
-	return 0;
+	return hclge_tm_bp_setup(hdev);
 }
 
-int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
+void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
 {
 	struct hclge_vport *vport = hdev->vport;
 	struct hnae3_knic_private_info *kinfo;
 	u32 i, k;
 
 	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
-		if (prio_tc[i] >= hdev->tm_info.num_tc)
-			return -EINVAL;
 		hdev->tm_info.prio_tc[i] = prio_tc[i];
 
 		for (k = 0;  k < hdev->num_alloc_vport; k++) {
@@ -1274,12 +1370,12 @@
 			kinfo->prio_tc[i] = prio_tc[i];
 		}
 	}
-	return 0;
 }
 
 void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
 {
-	u8 i, bit_map = 0;
+	u8 bit_map = 0;
+	u8 i;
 
 	hdev->tm_info.num_tc = num_tc;
 
@@ -1296,7 +1392,20 @@
 	hclge_tm_schd_info_init(hdev);
 }
 
-int hclge_tm_init_hw(struct hclge_dev *hdev)
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
+{
+	/* DCB is enabled if we have more than 1 TC or pfc_en is
+	 * non-zero.
+	 */
+	if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
+		hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+	else
+		hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+
+	hclge_pfc_info_init(hdev);
+}
+
+int hclge_tm_init_hw(struct hclge_dev *hdev, bool init)
 {
 	int ret;
 
@@ -1308,7 +1417,7 @@
 	if (ret)
 		return ret;
 
-	ret = hclge_pause_setup_hw(hdev);
+	ret = hclge_pause_setup_hw(hdev, init);
 	if (ret)
 		return ret;
 
@@ -1317,15 +1426,32 @@
 
 int hclge_tm_schd_init(struct hclge_dev *hdev)
 {
-	int ret;
-
 	/* fc_mode is HCLGE_FC_FULL on reset */
 	hdev->tm_info.fc_mode = HCLGE_FC_FULL;
 	hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
 
-	ret = hclge_tm_schd_info_init(hdev);
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE &&
+	    hdev->tm_info.num_pg != 1)
+		return -EINVAL;
+
+	hclge_tm_schd_info_init(hdev);
+
+	return hclge_tm_init_hw(hdev, true);
+}
+
+int hclge_tm_vport_map_update(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+
+	hclge_tm_vport_tc_info_update(vport);
+
+	ret = hclge_vport_q_to_qs_map(hdev, vport);
 	if (ret)
 		return ret;
 
-	return hclge_tm_init_hw(hdev);
+	if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE))
+		return 0;
+
+	return hclge_tm_bp_setup(hdev);
 }

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index dd4c194..260f22d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_TM_H
@@ -12,7 +12,7 @@
 
 #define HCLGE_TM_PORT_BASE_MODE_MSK	BIT(0)
 
-#define HCLGE_DEFAULT_PAUSE_TRANS_GAP	0xFF
+#define HCLGE_DEFAULT_PAUSE_TRANS_GAP	0x7F
 #define HCLGE_DEFAULT_PAUSE_TRANS_TIME	0xFFFF
 
 /* SP or DWRR */
@@ -40,6 +40,13 @@
 	__le16 qset_id;
 };
 
+struct hclge_tqp_tx_queue_tc_cmd {
+	__le16 queue_id;
+	__le16 rsvd;
+	u8 tc_id;
+	u8 rev[3];
+};
+
 struct hclge_pg_weight_cmd {
 	u8 pg_id;
 	u8 dwrr;
@@ -55,6 +62,12 @@
 	u8 dwrr;
 };
 
+struct hclge_ets_tc_weight_cmd {
+	u8 tc_weight[HNAE3_MAX_TC];
+	u8 weight_offset;
+	u8 rsvd[15];
+};
+
 #define HCLGE_TM_SHAP_IR_B_MSK  GENMASK(7, 0)
 #define HCLGE_TM_SHAP_IR_B_LSH	0
 #define HCLGE_TM_SHAP_IR_U_MSK  GENMASK(11, 8)
@@ -106,6 +119,10 @@
 	u8 pause_trans_gap;
 	u8 rsvd;
 	__le16 pause_trans_time;
+	u8 rsvd1[6];
+	/* extra mac address to do double check for pause frame */
+	u8 mac_addr_extra[ETH_ALEN];
+	u16 rsvd2;
 };
 
 struct hclge_pfc_stats_cmd {
@@ -125,13 +142,14 @@
 				       (HCLGE_TM_SHAP_##string##_LSH))
 
 int hclge_tm_schd_init(struct hclge_dev *hdev);
-int hclge_pause_setup_hw(struct hclge_dev *hdev);
-int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
-int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
+int hclge_tm_vport_map_update(struct hclge_dev *hdev);
+int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init);
+int hclge_tm_schd_setup_hw(struct hclge_dev *hdev);
+void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
 void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev);
 int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
-int hclge_tm_map_cfg(struct hclge_dev *hdev);
-int hclge_tm_init_hw(struct hclge_dev *hdev);
+int hclge_tm_init_hw(struct hclge_dev *hdev, bool init);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
 int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index fb93bbd..53804d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile

@@ -3,7 +3,7 @@
 # Makefile for the HISILICON network device drivers.
 #
 
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
-hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
\ No newline at end of file
+hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index fb471fe..d5d1cc5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c

@@ -27,26 +27,39 @@
 	return ring->desc_num - used - 1;
 }
 
+static int hclgevf_is_valid_csq_clean_head(struct hclgevf_cmq_ring *ring,
+					   int head)
+{
+	int ntu = ring->next_to_use;
+	int ntc = ring->next_to_clean;
+
+	if (ntu > ntc)
+		return head >= ntc && head <= ntu;
+
+	return head >= ntc || head <= ntu;
+}
+
 static int hclgevf_cmd_csq_clean(struct hclgevf_hw *hw)
 {
+	struct hclgevf_dev *hdev = container_of(hw, struct hclgevf_dev, hw);
 	struct hclgevf_cmq_ring *csq = &hw->cmq.csq;
-	u16 ntc = csq->next_to_clean;
-	struct hclgevf_desc *desc;
-	int clean = 0;
+	int clean;
 	u32 head;
 
-	desc = &csq->desc[ntc];
 	head = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG);
-	while (head != ntc) {
-		memset(desc, 0, sizeof(*desc));
-		ntc++;
-		if (ntc == csq->desc_num)
-			ntc = 0;
-		desc = &csq->desc[ntc];
-		clean++;
-	}
-	csq->next_to_clean = ntc;
+	rmb(); /* Make sure head is ready before touch any data */
 
+	if (!hclgevf_is_valid_csq_clean_head(csq, head)) {
+		dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+			 csq->next_to_use, csq->next_to_clean);
+		dev_warn(&hdev->pdev->dev,
+			 "Disabling any further commands to IMP firmware\n");
+		set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+		return -EIO;
+	}
+
+	clean = (head - csq->next_to_clean + csq->desc_num) % csq->desc_num;
+	csq->next_to_clean = head;
 	return clean;
 }
 
@@ -61,7 +74,7 @@
 
 static bool hclgevf_is_special_opcode(u16 opcode)
 {
-	u16 spec_opcode[] = {0x30, 0x31, 0x32};
+	static const u16 spec_opcode[] = {0x30, 0x31, 0x32};
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -72,13 +85,51 @@
 	return false;
 }
 
+static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
+{
+	struct hclgevf_dev *hdev = ring->dev;
+	struct hclgevf_hw *hw = &hdev->hw;
+	u32 reg_val;
+
+	if (ring->flag == HCLGEVF_TYPE_CSQ) {
+		reg_val = (u32)ring->desc_dma_addr;
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, reg_val);
+		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
+
+		reg_val = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG);
+		reg_val &= HCLGEVF_NIC_SW_RST_RDY;
+		reg_val |= (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
+
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
+	} else {
+		reg_val = (u32)ring->desc_dma_addr;
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, reg_val);
+		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
+
+		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
+
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
+		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_TAIL_REG, 0);
+	}
+}
+
+static void hclgevf_cmd_init_regs(struct hclgevf_hw *hw)
+{
+	hclgevf_cmd_config_regs(&hw->cmq.csq);
+	hclgevf_cmd_config_regs(&hw->cmq.crq);
+}
+
 static int hclgevf_alloc_cmd_desc(struct hclgevf_cmq_ring *ring)
 {
 	int size = ring->desc_num * sizeof(struct hclgevf_desc);
 
-	ring->desc = dma_zalloc_coherent(cmq_ring_to_dev(ring),
-					 size, &ring->desc_dma_addr,
-					 GFP_KERNEL);
+	ring->desc = dma_alloc_coherent(cmq_ring_to_dev(ring), size,
+					&ring->desc_dma_addr, GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 
@@ -96,61 +147,23 @@
 	}
 }
 
-static int hclgevf_init_cmd_queue(struct hclgevf_dev *hdev,
-				  struct hclgevf_cmq_ring *ring)
+static int hclgevf_alloc_cmd_queue(struct hclgevf_dev *hdev, int ring_type)
 {
 	struct hclgevf_hw *hw = &hdev->hw;
-	int ring_type = ring->flag;
-	u32 reg_val;
+	struct hclgevf_cmq_ring *ring =
+		(ring_type == HCLGEVF_TYPE_CSQ) ? &hw->cmq.csq : &hw->cmq.crq;
 	int ret;
 
-	ring->desc_num = HCLGEVF_NIC_CMQ_DESC_NUM;
-	spin_lock_init(&ring->lock);
-	ring->next_to_clean = 0;
-	ring->next_to_use = 0;
 	ring->dev = hdev;
+	ring->flag = ring_type;
 
 	/* allocate CSQ/CRQ descriptor */
 	ret = hclgevf_alloc_cmd_desc(ring);
-	if (ret) {
+	if (ret)
 		dev_err(&hdev->pdev->dev, "failed(%d) to alloc %s desc\n", ret,
 			(ring_type == HCLGEVF_TYPE_CSQ) ? "CSQ" : "CRQ");
-		return ret;
-	}
 
-	/* initialize the hardware registers with csq/crq dma-address,
-	 * descriptor number, head & tail pointers
-	 */
-	switch (ring_type) {
-	case HCLGEVF_TYPE_CSQ:
-		reg_val = (u32)ring->desc_dma_addr;
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, reg_val);
-		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
-
-		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
-
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
-		break;
-	case HCLGEVF_TYPE_CRQ:
-		reg_val = (u32)ring->desc_dma_addr;
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, reg_val);
-		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
-
-		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
-
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_TAIL_REG, 0);
-		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
-		break;
-	}
-
-	return 0;
+	return ret;
 }
 
 void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
@@ -166,6 +179,38 @@
 		desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR);
 }
 
+static int hclgevf_cmd_convert_err_code(u16 desc_ret)
+{
+	switch (desc_ret) {
+	case HCLGEVF_CMD_EXEC_SUCCESS:
+		return 0;
+	case HCLGEVF_CMD_NO_AUTH:
+		return -EPERM;
+	case HCLGEVF_CMD_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case HCLGEVF_CMD_QUEUE_FULL:
+		return -EXFULL;
+	case HCLGEVF_CMD_NEXT_ERR:
+		return -ENOSR;
+	case HCLGEVF_CMD_UNEXE_ERR:
+		return -ENOTBLK;
+	case HCLGEVF_CMD_PARA_ERR:
+		return -EINVAL;
+	case HCLGEVF_CMD_RESULT_ERR:
+		return -ERANGE;
+	case HCLGEVF_CMD_TIMEOUT:
+		return -ETIME;
+	case HCLGEVF_CMD_HILINK_ERR:
+		return -ENOLINK;
+	case HCLGEVF_CMD_QUEUE_ILLEGAL:
+		return -ENXIO;
+	case HCLGEVF_CMD_INVALID:
+		return -EBADR;
+	default:
+		return -EIO;
+	}
+}
+
 /* hclgevf_cmd_send - send command to command queue
  * @hw: pointer to the hw struct
  * @desc: prefilled descriptor for describing the command
@@ -177,6 +222,7 @@
 int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
 {
 	struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev;
+	struct hclgevf_cmq_ring *csq = &hw->cmq.csq;
 	struct hclgevf_desc *desc_to_use;
 	bool complete = false;
 	u32 timeout = 0;
@@ -188,7 +234,17 @@
 
 	spin_lock_bh(&hw->cmq.csq.lock);
 
+	if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
 	if (num > hclgevf_ring_space(&hw->cmq.csq)) {
+		/* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+		 * need update the SW HEAD pointer csq->next_to_clean
+		 */
+		csq->next_to_clean = hclgevf_read_dev(hw,
+						      HCLGEVF_NIC_CSQ_HEAD_REG);
 		spin_unlock_bh(&hw->cmq.csq.lock);
 		return -EBUSY;
 	}
@@ -237,11 +293,7 @@
 			else
 				retval = le16_to_cpu(desc[0].retval);
 
-			if ((enum hclgevf_cmd_return_status)retval ==
-			    HCLGEVF_CMD_EXEC_SUCCESS)
-				status = 0;
-			else
-				status = -EIO;
+			status = hclgevf_cmd_convert_err_code(retval);
 			hw->cmq.last_status = (enum hclgevf_cmd_status)retval;
 			ntc++;
 			handle++;
@@ -251,14 +303,13 @@
 	}
 
 	if (!complete)
-		status = -EAGAIN;
+		status = -EBADE;
 
 	/* Clean the command send queue */
 	handle = hclgevf_cmd_csq_clean(hw);
-	if (handle != num) {
+	if (handle != num)
 		dev_warn(&hdev->pdev->dev,
 			 "cleaned %d, need to clean %d\n", handle, num);
-	}
 
 	spin_unlock_bh(&hw->cmq.csq.lock);
 
@@ -282,59 +333,120 @@
 	return status;
 }
 
+int hclgevf_cmd_queue_init(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	/* Setup the lock for command queue */
+	spin_lock_init(&hdev->hw.cmq.csq.lock);
+	spin_lock_init(&hdev->hw.cmq.crq.lock);
+
+	hdev->hw.cmq.tx_timeout = HCLGEVF_CMDQ_TX_TIMEOUT;
+	hdev->hw.cmq.csq.desc_num = HCLGEVF_NIC_CMQ_DESC_NUM;
+	hdev->hw.cmq.crq.desc_num = HCLGEVF_NIC_CMQ_DESC_NUM;
+
+	ret = hclgevf_alloc_cmd_queue(hdev, HCLGEVF_TYPE_CSQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CSQ ring setup error %d\n", ret);
+		return ret;
+	}
+
+	ret = hclgevf_alloc_cmd_queue(hdev, HCLGEVF_TYPE_CRQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CRQ ring setup error %d\n", ret);
+		goto err_csq;
+	}
+
+	return 0;
+err_csq:
+	hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
+	return ret;
+}
+
 int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 {
 	u32 version;
 	int ret;
 
-	/* setup Tx write back timeout */
-	hdev->hw.cmq.tx_timeout = HCLGEVF_CMDQ_TX_TIMEOUT;
-
-	/* setup queue CSQ/CRQ rings */
-	hdev->hw.cmq.csq.flag = HCLGEVF_TYPE_CSQ;
-	ret = hclgevf_init_cmd_queue(hdev, &hdev->hw.cmq.csq);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed(%d) to initialize CSQ ring\n", ret);
-		return ret;
-	}
-
-	hdev->hw.cmq.crq.flag = HCLGEVF_TYPE_CRQ;
-	ret = hclgevf_init_cmd_queue(hdev, &hdev->hw.cmq.crq);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed(%d) to initialize CRQ ring\n", ret);
-		goto err_csq;
-	}
+	spin_lock_bh(&hdev->hw.cmq.csq.lock);
+	spin_lock(&hdev->hw.cmq.crq.lock);
 
 	/* initialize the pointers of async rx queue of mailbox */
 	hdev->arq.hdev = hdev;
 	hdev->arq.head = 0;
 	hdev->arq.tail = 0;
-	hdev->arq.count = 0;
+	atomic_set(&hdev->arq.count, 0);
+	hdev->hw.cmq.csq.next_to_clean = 0;
+	hdev->hw.cmq.csq.next_to_use = 0;
+	hdev->hw.cmq.crq.next_to_clean = 0;
+	hdev->hw.cmq.crq.next_to_use = 0;
+
+	hclgevf_cmd_init_regs(&hdev->hw);
+
+	spin_unlock(&hdev->hw.cmq.crq.lock);
+	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
+	clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+
+	/* Check if there is new reset pending, because the higher level
+	 * reset may happen when lower level reset is being processed.
+	 */
+	if (hclgevf_is_reset_pending(hdev)) {
+		ret = -EBUSY;
+		goto err_cmd_init;
+	}
 
 	/* get firmware version */
 	ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"failed(%d) to query firmware version\n", ret);
-		goto err_crq;
+		goto err_cmd_init;
 	}
 	hdev->fw_version = version;
 
-	dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
+	dev_info(&hdev->pdev->dev, "The firmware version is %lu.%lu.%lu.%lu\n",
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE3_MASK,
+				 HNAE3_FW_VERSION_BYTE3_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE2_MASK,
+				 HNAE3_FW_VERSION_BYTE2_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE1_MASK,
+				 HNAE3_FW_VERSION_BYTE1_SHIFT),
+		 hnae3_get_field(version, HNAE3_FW_VERSION_BYTE0_MASK,
+				 HNAE3_FW_VERSION_BYTE0_SHIFT));
 
 	return 0;
-err_crq:
-	hclgevf_free_cmd_desc(&hdev->hw.cmq.crq);
-err_csq:
-	hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
+
+err_cmd_init:
+	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
 
 	return ret;
 }
 
+static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw)
+{
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
+	hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_TAIL_REG, 0);
+}
+
 void hclgevf_cmd_uninit(struct hclgevf_dev *hdev)
 {
+	spin_lock_bh(&hdev->hw.cmq.csq.lock);
+	spin_lock(&hdev->hw.cmq.crq.lock);
+	clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+	hclgevf_cmd_uninit_regs(&hdev->hw);
+	spin_unlock(&hdev->hw.cmq.crq.lock);
+	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
 	hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
 	hclgevf_free_cmd_desc(&hdev->hw.cmq.crq);
 }

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 19b3286..f830eef 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h

@@ -46,9 +46,17 @@
 
 enum hclgevf_cmd_return_status {
 	HCLGEVF_CMD_EXEC_SUCCESS	= 0,
-	HCLGEVF_CMD_NO_AUTH	= 1,
-	HCLGEVF_CMD_NOT_EXEC	= 2,
-	HCLGEVF_CMD_QUEUE_FULL	= 3,
+	HCLGEVF_CMD_NO_AUTH		= 1,
+	HCLGEVF_CMD_NOT_SUPPORTED	= 2,
+	HCLGEVF_CMD_QUEUE_FULL		= 3,
+	HCLGEVF_CMD_NEXT_ERR		= 4,
+	HCLGEVF_CMD_UNEXE_ERR		= 5,
+	HCLGEVF_CMD_PARA_ERR		= 6,
+	HCLGEVF_CMD_RESULT_ERR		= 7,
+	HCLGEVF_CMD_TIMEOUT		= 8,
+	HCLGEVF_CMD_HILINK_ERR		= 9,
+	HCLGEVF_CMD_QUEUE_ILLEGAL	= 10,
+	HCLGEVF_CMD_INVALID		= 11,
 };
 
 enum hclgevf_cmd_status {
@@ -87,8 +95,11 @@
 	HCLGEVF_OPC_QUERY_TX_STATUS	= 0x0B03,
 	HCLGEVF_OPC_QUERY_RX_STATUS	= 0x0B13,
 	HCLGEVF_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
+	/* GRO command */
+	HCLGEVF_OPC_GRO_GENERIC_CONFIG  = 0x0C10,
 	/* RSS cmd */
 	HCLGEVF_OPC_RSS_GENERIC_CONFIG	= 0x0D01,
+	HCLGEVF_OPC_RSS_INPUT_TUPLE     = 0x0D02,
 	HCLGEVF_OPC_RSS_INDIR_TABLE	= 0x0D07,
 	HCLGEVF_OPC_RSS_TC_MODE		= 0x0D08,
 	/* Mailbox cmd */
@@ -148,7 +159,14 @@
 	__le16 rsv[7];
 };
 
-#define HCLGEVF_RSS_HASH_KEY_OFFSET	4
+#define HCLGEVF_GRO_EN_B               0
+struct hclgevf_cfg_gro_status_cmd {
+	__le16 gro_en;
+	u8 rsv[22];
+};
+
+#define HCLGEVF_RSS_DEFAULT_OUTPORT_B	4
+#define HCLGEVF_RSS_HASH_KEY_OFFSET_B	4
 #define HCLGEVF_RSS_HASH_KEY_NUM	16
 struct hclgevf_rss_config_cmd {
 	u8 hash_config;
@@ -159,11 +177,11 @@
 struct hclgevf_rss_input_tuple_cmd {
 	u8 ipv4_tcp_en;
 	u8 ipv4_udp_en;
-	u8 ipv4_stcp_en;
+	u8 ipv4_sctp_en;
 	u8 ipv4_fragment_en;
 	u8 ipv6_tcp_en;
 	u8 ipv6_udp_en;
-	u8 ipv6_stcp_en;
+	u8 ipv6_sctp_en;
 	u8 ipv6_fragment_en;
 	u8 rsv[16];
 };
@@ -226,8 +244,11 @@
 #define HCLGEVF_NIC_CRQ_DEPTH_REG	0x27020
 #define HCLGEVF_NIC_CRQ_TAIL_REG	0x27024
 #define HCLGEVF_NIC_CRQ_HEAD_REG	0x27028
-#define HCLGEVF_NIC_CMQ_EN_B		16
-#define HCLGEVF_NIC_CMQ_ENABLE		BIT(HCLGEVF_NIC_CMQ_EN_B)
+
+/* this bit indicates that the driver is ready for hardware reset */
+#define HCLGEVF_NIC_SW_RST_RDY_B	16
+#define HCLGEVF_NIC_SW_RST_RDY		BIT(HCLGEVF_NIC_SW_RST_RDY_B)
+
 #define HCLGEVF_NIC_CMQ_DESC_NUM	1024
 #define HCLGEVF_NIC_CMQ_DESC_NUM_S	3
 #define HCLGEVF_NIC_CMDQ_INT_SRC_REG	0x27100
@@ -254,6 +275,7 @@
 
 int hclgevf_cmd_init(struct hclgevf_dev *hdev);
 void hclgevf_cmd_uninit(struct hclgevf_dev *hdev);
+int hclgevf_cmd_queue_init(struct hclgevf_dev *hdev);
 
 int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num);
 void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5570fb5..7d7e712 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c

@@ -2,6 +2,7 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
+#include <linux/iopoll.h>
 #include <net/rtnetlink.h>
 #include "hclgevf_cmd.h"
 #include "hclgevf_main.h"
@@ -10,8 +11,9 @@
 
 #define HCLGEVF_NAME	"hclgevf"
 
-static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
-static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
+#define HCLGEVF_RESET_MAX_FAIL_CNT	5
+
+static int hclgevf_reset_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -21,26 +23,89 @@
 	{0, }
 };
 
+static const u8 hclgevf_hash_key[] = {
+	0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
+	0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
+	0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+	0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
+	0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA
+};
+
 MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
 
-static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
-	struct hnae3_handle *handle)
+static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG,
+					 HCLGEVF_CMDQ_TX_ADDR_H_REG,
+					 HCLGEVF_CMDQ_TX_DEPTH_REG,
+					 HCLGEVF_CMDQ_TX_TAIL_REG,
+					 HCLGEVF_CMDQ_TX_HEAD_REG,
+					 HCLGEVF_CMDQ_RX_ADDR_L_REG,
+					 HCLGEVF_CMDQ_RX_ADDR_H_REG,
+					 HCLGEVF_CMDQ_RX_DEPTH_REG,
+					 HCLGEVF_CMDQ_RX_TAIL_REG,
+					 HCLGEVF_CMDQ_RX_HEAD_REG,
+					 HCLGEVF_VECTOR0_CMDQ_SRC_REG,
+					 HCLGEVF_CMDQ_INTR_STS_REG,
+					 HCLGEVF_CMDQ_INTR_EN_REG,
+					 HCLGEVF_CMDQ_INTR_GEN_REG};
+
+static const u32 common_reg_addr_list[] = {HCLGEVF_MISC_VECTOR_REG_BASE,
+					   HCLGEVF_RST_ING,
+					   HCLGEVF_GRO_EN_REG};
+
+static const u32 ring_reg_addr_list[] = {HCLGEVF_RING_RX_ADDR_L_REG,
+					 HCLGEVF_RING_RX_ADDR_H_REG,
+					 HCLGEVF_RING_RX_BD_NUM_REG,
+					 HCLGEVF_RING_RX_BD_LENGTH_REG,
+					 HCLGEVF_RING_RX_MERGE_EN_REG,
+					 HCLGEVF_RING_RX_TAIL_REG,
+					 HCLGEVF_RING_RX_HEAD_REG,
+					 HCLGEVF_RING_RX_FBD_NUM_REG,
+					 HCLGEVF_RING_RX_OFFSET_REG,
+					 HCLGEVF_RING_RX_FBD_OFFSET_REG,
+					 HCLGEVF_RING_RX_STASH_REG,
+					 HCLGEVF_RING_RX_BD_ERR_REG,
+					 HCLGEVF_RING_TX_ADDR_L_REG,
+					 HCLGEVF_RING_TX_ADDR_H_REG,
+					 HCLGEVF_RING_TX_BD_NUM_REG,
+					 HCLGEVF_RING_TX_PRIORITY_REG,
+					 HCLGEVF_RING_TX_TC_REG,
+					 HCLGEVF_RING_TX_MERGE_EN_REG,
+					 HCLGEVF_RING_TX_TAIL_REG,
+					 HCLGEVF_RING_TX_HEAD_REG,
+					 HCLGEVF_RING_TX_FBD_NUM_REG,
+					 HCLGEVF_RING_TX_OFFSET_REG,
+					 HCLGEVF_RING_TX_EBD_NUM_REG,
+					 HCLGEVF_RING_TX_EBD_OFFSET_REG,
+					 HCLGEVF_RING_TX_BD_ERR_REG,
+					 HCLGEVF_RING_EN_REG};
+
+static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG,
+					     HCLGEVF_TQP_INTR_GL0_REG,
+					     HCLGEVF_TQP_INTR_GL1_REG,
+					     HCLGEVF_TQP_INTR_GL2_REG,
+					     HCLGEVF_TQP_INTR_RL_REG};
+
+static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle)
 {
-	return container_of(handle, struct hclgevf_dev, nic);
+	if (!handle->client)
+		return container_of(handle, struct hclgevf_dev, nic);
+	else if (handle->client->type == HNAE3_CLIENT_ROCE)
+		return container_of(handle, struct hclgevf_dev, roce);
+	else
+		return container_of(handle, struct hclgevf_dev, nic);
 }
 
 static int hclgevf_tqps_update_stats(struct hnae3_handle *handle)
 {
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hnae3_queue *queue;
 	struct hclgevf_desc desc;
 	struct hclgevf_tqp *tqp;
 	int status;
 	int i;
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		queue = handle->kinfo.tqp[i];
-		tqp = container_of(queue, struct hclgevf_tqp, q);
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q);
 		hclgevf_cmd_setup_basic_desc(&desc,
 					     HCLGEVF_OPC_QUERY_RX_STATUS,
 					     true);
@@ -77,17 +142,16 @@
 static u64 *hclgevf_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hclgevf_tqp *tqp;
 	u64 *buff = data;
 	int i;
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		tqp = container_of(handle->kinfo.tqp[i], struct hclgevf_tqp, q);
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q);
 		*buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd;
 	}
 	for (i = 0; i < kinfo->num_tqps; i++) {
-		tqp = container_of(handle->kinfo.tqp[i], struct hclgevf_tqp, q);
+		tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q);
 		*buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd;
 	}
 
@@ -96,29 +160,29 @@
 
 static int hclgevf_tqps_get_sset_count(struct hnae3_handle *handle, int strset)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 
-	return hdev->num_tqps * 2;
+	return kinfo->num_tqps * 2;
 }
 
 static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	u8 *buff = data;
 	int i = 0;
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		struct hclgevf_tqp *tqp = container_of(handle->kinfo.tqp[i],
-			struct hclgevf_tqp, q);
-		snprintf(buff, ETH_GSTRING_LEN, "txq#%d_pktnum_rcd",
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i],
+						       struct hclgevf_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd",
 			 tqp->index);
 		buff += ETH_GSTRING_LEN;
 	}
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		struct hclgevf_tqp *tqp = container_of(handle->kinfo.tqp[i],
-			struct hclgevf_tqp, q);
-		snprintf(buff, ETH_GSTRING_LEN, "rxq#%d_pktnum_rcd",
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i],
+						       struct hclgevf_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd",
 			 tqp->index);
 		buff += ETH_GSTRING_LEN;
 	}
@@ -169,7 +233,7 @@
 	int status;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_TCINFO, 0, NULL, 0,
-				      true, &resp_msg, sizeof(u8));
+				      true, &resp_msg, sizeof(resp_msg));
 	if (status) {
 		dev_err(&hdev->pdev->dev,
 			"VF request to get TC info from PF failed %d",
@@ -182,9 +246,30 @@
 	return 0;
 }
 
-static int hclge_get_queue_info(struct hclgevf_dev *hdev)
+static int hclgevf_get_port_base_vlan_filter_state(struct hclgevf_dev *hdev)
 {
-#define HCLGEVF_TQPS_RSS_INFO_LEN	8
+	struct hnae3_handle *nic = &hdev->nic;
+	u8 resp_msg;
+	int ret;
+
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+				   HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,
+				   NULL, 0, true, &resp_msg, sizeof(u8));
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"VF request to get port based vlan state failed %d",
+			ret);
+		return ret;
+	}
+
+	nic->port_base_vlan_state = resp_msg;
+
+	return 0;
+}
+
+static int hclgevf_get_queue_info(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_TQPS_RSS_INFO_LEN	6
 	u8 resp_msg[HCLGEVF_TQPS_RSS_INFO_LEN];
 	int status;
 
@@ -200,8 +285,67 @@
 
 	memcpy(&hdev->num_tqps, &resp_msg[0], sizeof(u16));
 	memcpy(&hdev->rss_size_max, &resp_msg[2], sizeof(u16));
-	memcpy(&hdev->num_desc, &resp_msg[4], sizeof(u16));
-	memcpy(&hdev->rx_buf_len, &resp_msg[6], sizeof(u16));
+	memcpy(&hdev->rx_buf_len, &resp_msg[4], sizeof(u16));
+
+	return 0;
+}
+
+static int hclgevf_get_queue_depth(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_TQPS_DEPTH_INFO_LEN	4
+	u8 resp_msg[HCLGEVF_TQPS_DEPTH_INFO_LEN];
+	int ret;
+
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QDEPTH, 0, NULL, 0,
+				   true, resp_msg,
+				   HCLGEVF_TQPS_DEPTH_INFO_LEN);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"VF request to get tqp depth info from PF failed %d",
+			ret);
+		return ret;
+	}
+
+	memcpy(&hdev->num_tx_desc, &resp_msg[0], sizeof(u16));
+	memcpy(&hdev->num_rx_desc, &resp_msg[2], sizeof(u16));
+
+	return 0;
+}
+
+static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	u8 msg_data[2], resp_data[2];
+	u16 qid_in_pf = 0;
+	int ret;
+
+	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
+
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QID_IN_PF, 0, msg_data,
+				   sizeof(msg_data), true, resp_data,
+				   sizeof(resp_data));
+	if (!ret)
+		qid_in_pf = *(u16 *)resp_data;
+
+	return qid_in_pf;
+}
+
+static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev)
+{
+	u8 resp_msg[2];
+	int ret;
+
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MEDIA_TYPE, 0, NULL, 0,
+				   true, resp_msg, sizeof(resp_msg));
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"VF request to get the pf port media type failed %d",
+			ret);
+		return ret;
+	}
+
+	hdev->hw.mac.media_type = resp_msg[0];
+	hdev->hw.mac.module_type = resp_msg[1];
 
 	return 0;
 }
@@ -211,12 +355,6 @@
 	struct hclgevf_tqp *tqp;
 	int i;
 
-	/* if this is on going reset then we need to re-allocate the TPQs
-	 * since we cannot assume we would get same number of TPQs back from PF
-	 */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		devm_kfree(&hdev->pdev->dev, hdev->htqp);
-
 	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
 				  sizeof(struct hclgevf_tqp), GFP_KERNEL);
 	if (!hdev->htqp)
@@ -230,7 +368,8 @@
 
 		tqp->q.ae_algo = &ae_algovf;
 		tqp->q.buf_size = hdev->rx_buf_len;
-		tqp->q.desc_num = hdev->num_desc;
+		tqp->q.tx_desc_num = hdev->num_tx_desc;
+		tqp->q.rx_desc_num = hdev->num_rx_desc;
 		tqp->q.io_base = hdev->hw.io_base + HCLGEVF_TQP_REG_OFFSET +
 			i * HCLGEVF_TQP_REG_SIZE;
 
@@ -245,11 +384,12 @@
 	struct hnae3_handle *nic = &hdev->nic;
 	struct hnae3_knic_private_info *kinfo;
 	u16 new_tqps = hdev->num_tqps;
-	int i;
+	unsigned int i;
 
 	kinfo = &nic->kinfo;
 	kinfo->num_tc = 0;
-	kinfo->num_desc = hdev->num_desc;
+	kinfo->num_tx_desc = hdev->num_tx_desc;
+	kinfo->num_rx_desc = hdev->num_rx_desc;
 	kinfo->rx_buf_len = hdev->rx_buf_len;
 	for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++)
 		if (hdev->hw_tc_map & BIT(i))
@@ -260,12 +400,6 @@
 	new_tqps = kinfo->rss_size * kinfo->num_tc;
 	kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
-	/* if this is on going reset then we need to re-allocate the hnae queues
-	 * as well since number of TPQs from PF might have changed.
-	 */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		devm_kfree(&hdev->pdev->dev, kinfo->tqp);
-
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
@@ -277,6 +411,13 @@
 		kinfo->tqp[i] = &hdev->htqp[i].q;
 	}
 
+	/* after init the max rss_size and tqps, adjust the default tqp numbers
+	 * and rss size with the actual vector numbers
+	 */
+	kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps);
+	kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc,
+				kinfo->rss_size);
+
 	return 0;
 }
 
@@ -286,7 +427,7 @@
 	u8 resp_msg;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_STATUS, 0, NULL,
-				      0, false, &resp_msg, sizeof(u8));
+				      0, false, &resp_msg, sizeof(resp_msg));
 	if (status)
 		dev_err(&hdev->pdev->dev,
 			"VF failed to fetch link status(%d) from PF", status);
@@ -294,20 +435,42 @@
 
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
 {
+	struct hnae3_handle *rhandle = &hdev->roce;
 	struct hnae3_handle *handle = &hdev->nic;
+	struct hnae3_client *rclient;
 	struct hnae3_client *client;
 
 	client = handle->client;
+	rclient = hdev->roce_client;
 
 	link_state =
 		test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state;
 
 	if (link_state != hdev->hw.mac.link) {
 		client->ops->link_status_change(handle, !!link_state);
+		if (rclient && rclient->ops->link_status_change)
+			rclient->ops->link_status_change(rhandle, !!link_state);
 		hdev->hw.mac.link = link_state;
 	}
 }
 
+static void hclgevf_update_link_mode(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_ADVERTISING 0
+#define HCLGEVF_SUPPORTED   1
+	u8 send_msg;
+	u8 resp_msg;
+
+	send_msg = HCLGEVF_ADVERTISING;
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0,
+			     &send_msg, sizeof(send_msg), false,
+			     &resp_msg, sizeof(resp_msg));
+	send_msg = HCLGEVF_SUPPORTED;
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0,
+			     &send_msg, sizeof(send_msg), false,
+			     &resp_msg, sizeof(resp_msg));
+}
+
 static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
 {
 	struct hnae3_handle *nic = &hdev->nic;
@@ -318,12 +481,6 @@
 	nic->numa_node_mask = hdev->numa_node_mask;
 	nic->flags |= HNAE3_SUPPORT_VF;
 
-	if (hdev->ae_dev->dev_type != HNAE3_DEV_KNIC) {
-		dev_err(&hdev->pdev->dev, "unsupported device type %d\n",
-			hdev->ae_dev->dev_type);
-		return -EINVAL;
-	}
-
 	ret = hclgevf_knic_setup(hdev);
 	if (ret)
 		dev_err(&hdev->pdev->dev, "VF knic setup failed %d\n",
@@ -352,6 +509,7 @@
 	int alloc = 0;
 	int i, j;
 
+	vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num);
 	vector_num = min(hdev->num_msi_left, vector_num);
 
 	for (j = 0; j < vector_num; j++) {
@@ -388,6 +546,46 @@
 	return -EINVAL;
 }
 
+static int hclgevf_set_rss_algo_key(struct hclgevf_dev *hdev,
+				    const u8 hfunc, const u8 *key)
+{
+	struct hclgevf_rss_config_cmd *req;
+	unsigned int key_offset = 0;
+	struct hclgevf_desc desc;
+	int key_counts;
+	int key_size;
+	int ret;
+
+	key_counts = HCLGEVF_RSS_KEY_SIZE;
+	req = (struct hclgevf_rss_config_cmd *)desc.data;
+
+	while (key_counts) {
+		hclgevf_cmd_setup_basic_desc(&desc,
+					     HCLGEVF_OPC_RSS_GENERIC_CONFIG,
+					     false);
+
+		req->hash_config |= (hfunc & HCLGEVF_RSS_HASH_ALGO_MASK);
+		req->hash_config |=
+			(key_offset << HCLGEVF_RSS_HASH_KEY_OFFSET_B);
+
+		key_size = min(HCLGEVF_RSS_HASH_KEY_NUM, key_counts);
+		memcpy(req->hash_key,
+		       key + key_offset * HCLGEVF_RSS_HASH_KEY_NUM, key_size);
+
+		key_counts -= key_size;
+		key_offset++;
+		ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Configure RSS config fail, status = %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static u32 hclgevf_get_rss_key_size(struct hnae3_handle *handle)
 {
 	return HCLGEVF_RSS_KEY_SIZE;
@@ -438,7 +636,7 @@
 	struct hclgevf_desc desc;
 	u16 roundup_size;
 	int status;
-	int i;
+	unsigned int i;
 
 	req = (struct hclgevf_rss_tc_mode_cmd *)desc.data;
 
@@ -468,51 +666,39 @@
 	return status;
 }
 
-static int hclgevf_get_rss_hw_cfg(struct hnae3_handle *handle, u8 *hash,
-				  u8 *key)
+/* for revision 0x20, vf shared the same rss config with pf */
+static int hclgevf_get_rss_hash_key(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hclgevf_rss_config_cmd *req;
-	int lkup_times = key ? 3 : 1;
-	struct hclgevf_desc desc;
-	int key_offset;
-	int key_size;
-	int status;
+#define HCLGEVF_RSS_MBX_RESP_LEN	8
 
-	req = (struct hclgevf_rss_config_cmd *)desc.data;
-	lkup_times = (lkup_times == 3) ? 3 : ((hash) ? 1 : 0);
+	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+	u8 resp_msg[HCLGEVF_RSS_MBX_RESP_LEN];
+	u16 msg_num, hash_key_index;
+	u8 index;
+	int ret;
 
-	for (key_offset = 0; key_offset < lkup_times; key_offset++) {
-		hclgevf_cmd_setup_basic_desc(&desc,
-					     HCLGEVF_OPC_RSS_GENERIC_CONFIG,
-					     true);
-		req->hash_config |= (key_offset << HCLGEVF_RSS_HASH_KEY_OFFSET);
-
-		status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-		if (status) {
+	msg_num = (HCLGEVF_RSS_KEY_SIZE + HCLGEVF_RSS_MBX_RESP_LEN - 1) /
+			HCLGEVF_RSS_MBX_RESP_LEN;
+	for (index = 0; index < msg_num; index++) {
+		ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_RSS_KEY, 0,
+					   &index, sizeof(index),
+					   true, resp_msg,
+					   HCLGEVF_RSS_MBX_RESP_LEN);
+		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"failed to get hardware RSS cfg, status = %d\n",
-				status);
-			return status;
+				"VF get rss hash key from PF failed, ret=%d",
+				ret);
+			return ret;
 		}
 
-		if (key_offset == 2)
-			key_size =
-			HCLGEVF_RSS_KEY_SIZE - HCLGEVF_RSS_HASH_KEY_NUM * 2;
+		hash_key_index = HCLGEVF_RSS_MBX_RESP_LEN * index;
+		if (index == msg_num - 1)
+			memcpy(&rss_cfg->rss_hash_key[hash_key_index],
+			       &resp_msg[0],
+			       HCLGEVF_RSS_KEY_SIZE - hash_key_index);
 		else
-			key_size = HCLGEVF_RSS_HASH_KEY_NUM;
-
-		if (key)
-			memcpy(key + key_offset * HCLGEVF_RSS_HASH_KEY_NUM,
-			       req->hash_key,
-			       key_size);
-	}
-
-	if (hash) {
-		if ((req->hash_config & 0xf) == HCLGEVF_RSS_HASH_ALGO_TOEPLITZ)
-			*hash = ETH_RSS_HASH_TOP;
-		else
-			*hash = ETH_RSS_HASH_UNKNOWN;
+			memcpy(&rss_cfg->rss_hash_key[hash_key_index],
+			       &resp_msg[0], HCLGEVF_RSS_MBX_RESP_LEN);
 	}
 
 	return 0;
@@ -523,21 +709,82 @@
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
-	int i;
+	int i, ret;
+
+	if (handle->pdev->revision >= 0x21) {
+		/* Get hash algorithm */
+		if (hfunc) {
+			switch (rss_cfg->hash_algo) {
+			case HCLGEVF_RSS_HASH_ALGO_TOEPLITZ:
+				*hfunc = ETH_RSS_HASH_TOP;
+				break;
+			case HCLGEVF_RSS_HASH_ALGO_SIMPLE:
+				*hfunc = ETH_RSS_HASH_XOR;
+				break;
+			default:
+				*hfunc = ETH_RSS_HASH_UNKNOWN;
+				break;
+			}
+		}
+
+		/* Get the RSS Key required by the user */
+		if (key)
+			memcpy(key, rss_cfg->rss_hash_key,
+			       HCLGEVF_RSS_KEY_SIZE);
+	} else {
+		if (hfunc)
+			*hfunc = ETH_RSS_HASH_TOP;
+		if (key) {
+			ret = hclgevf_get_rss_hash_key(hdev);
+			if (ret)
+				return ret;
+			memcpy(key, rss_cfg->rss_hash_key,
+			       HCLGEVF_RSS_KEY_SIZE);
+		}
+	}
 
 	if (indir)
 		for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
 			indir[i] = rss_cfg->rss_indirection_tbl[i];
 
-	return hclgevf_get_rss_hw_cfg(handle, hfunc, key);
+	return 0;
 }
 
 static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir,
-			   const  u8 *key, const  u8 hfunc)
+			   const u8 *key, const u8 hfunc)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
-	int i;
+	int ret, i;
+
+	if (handle->pdev->revision >= 0x21) {
+		/* Set the RSS Hash Key if specififed by the user */
+		if (key) {
+			switch (hfunc) {
+			case ETH_RSS_HASH_TOP:
+				rss_cfg->hash_algo =
+					HCLGEVF_RSS_HASH_ALGO_TOEPLITZ;
+				break;
+			case ETH_RSS_HASH_XOR:
+				rss_cfg->hash_algo =
+					HCLGEVF_RSS_HASH_ALGO_SIMPLE;
+				break;
+			case ETH_RSS_HASH_NO_CHANGE:
+				break;
+			default:
+				return -EINVAL;
+			}
+
+			ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo,
+						       key);
+			if (ret)
+				return ret;
+
+			/* Update the shadow RSS key with user specified qids */
+			memcpy(rss_cfg->rss_hash_key, key,
+			       HCLGEVF_RSS_KEY_SIZE);
+		}
+	}
 
 	/* update the shadow RSS table with user specified qids */
 	for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
@@ -547,6 +794,193 @@
 	return hclgevf_set_rss_indir_table(hdev);
 }
 
+static u8 hclgevf_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
+{
+	u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGEVF_S_PORT_BIT : 0;
+
+	if (nfc->data & RXH_L4_B_2_3)
+		hash_sets |= HCLGEVF_D_PORT_BIT;
+	else
+		hash_sets &= ~HCLGEVF_D_PORT_BIT;
+
+	if (nfc->data & RXH_IP_SRC)
+		hash_sets |= HCLGEVF_S_IP_BIT;
+	else
+		hash_sets &= ~HCLGEVF_S_IP_BIT;
+
+	if (nfc->data & RXH_IP_DST)
+		hash_sets |= HCLGEVF_D_IP_BIT;
+	else
+		hash_sets &= ~HCLGEVF_D_IP_BIT;
+
+	if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW)
+		hash_sets |= HCLGEVF_V_TAG_BIT;
+
+	return hash_sets;
+}
+
+static int hclgevf_set_rss_tuple(struct hnae3_handle *handle,
+				 struct ethtool_rxnfc *nfc)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+	struct hclgevf_rss_input_tuple_cmd *req;
+	struct hclgevf_desc desc;
+	u8 tuple_sets;
+	int ret;
+
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	if (nfc->data &
+	    ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	req = (struct hclgevf_rss_input_tuple_cmd *)desc.data;
+	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INPUT_TUPLE, false);
+
+	req->ipv4_tcp_en = rss_cfg->rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = rss_cfg->rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = rss_cfg->rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = rss_cfg->rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = rss_cfg->rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = rss_cfg->rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = rss_cfg->rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = rss_cfg->rss_tuple_sets.ipv6_fragment_en;
+
+	tuple_sets = hclgevf_get_rss_hash_bits(nfc);
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		req->ipv4_tcp_en = tuple_sets;
+		break;
+	case TCP_V6_FLOW:
+		req->ipv6_tcp_en = tuple_sets;
+		break;
+	case UDP_V4_FLOW:
+		req->ipv4_udp_en = tuple_sets;
+		break;
+	case UDP_V6_FLOW:
+		req->ipv6_udp_en = tuple_sets;
+		break;
+	case SCTP_V4_FLOW:
+		req->ipv4_sctp_en = tuple_sets;
+		break;
+	case SCTP_V6_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+
+		req->ipv6_sctp_en = tuple_sets;
+		break;
+	case IPV4_FLOW:
+		req->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		break;
+	case IPV6_FLOW:
+		req->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
+
+	rss_cfg->rss_tuple_sets.ipv4_tcp_en = req->ipv4_tcp_en;
+	rss_cfg->rss_tuple_sets.ipv4_udp_en = req->ipv4_udp_en;
+	rss_cfg->rss_tuple_sets.ipv4_sctp_en = req->ipv4_sctp_en;
+	rss_cfg->rss_tuple_sets.ipv4_fragment_en = req->ipv4_fragment_en;
+	rss_cfg->rss_tuple_sets.ipv6_tcp_en = req->ipv6_tcp_en;
+	rss_cfg->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
+	rss_cfg->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
+	rss_cfg->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+	return 0;
+}
+
+static int hclgevf_get_rss_tuple(struct hnae3_handle *handle,
+				 struct ethtool_rxnfc *nfc)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+	u8 tuple_sets;
+
+	if (handle->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	nfc->data = 0;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv4_tcp_en;
+		break;
+	case UDP_V4_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv4_udp_en;
+		break;
+	case TCP_V6_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv6_tcp_en;
+		break;
+	case UDP_V6_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv6_udp_en;
+		break;
+	case SCTP_V4_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv4_sctp_en;
+		break;
+	case SCTP_V6_FLOW:
+		tuple_sets = rss_cfg->rss_tuple_sets.ipv6_sctp_en;
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		tuple_sets = HCLGEVF_S_IP_BIT | HCLGEVF_D_IP_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!tuple_sets)
+		return 0;
+
+	if (tuple_sets & HCLGEVF_D_PORT_BIT)
+		nfc->data |= RXH_L4_B_2_3;
+	if (tuple_sets & HCLGEVF_S_PORT_BIT)
+		nfc->data |= RXH_L4_B_0_1;
+	if (tuple_sets & HCLGEVF_D_IP_BIT)
+		nfc->data |= RXH_IP_DST;
+	if (tuple_sets & HCLGEVF_S_IP_BIT)
+		nfc->data |= RXH_IP_SRC;
+
+	return 0;
+}
+
+static int hclgevf_set_rss_input_tuple(struct hclgevf_dev *hdev,
+				       struct hclgevf_rss_cfg *rss_cfg)
+{
+	struct hclgevf_rss_input_tuple_cmd *req;
+	struct hclgevf_desc desc;
+	int ret;
+
+	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INPUT_TUPLE, false);
+
+	req = (struct hclgevf_rss_input_tuple_cmd *)desc.data;
+
+	req->ipv4_tcp_en = rss_cfg->rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = rss_cfg->rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = rss_cfg->rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = rss_cfg->rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = rss_cfg->rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = rss_cfg->rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = rss_cfg->rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = rss_cfg->rss_tuple_sets.ipv6_fragment_en;
+
+	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Configure rss input fail, status = %d\n", ret);
+	return ret;
+}
+
 static int hclgevf_get_tc_size(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -568,6 +1002,8 @@
 	u8 type;
 
 	req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+	type = en ? HCLGE_MBX_MAP_RING_TO_VECTOR :
+		HCLGE_MBX_UNMAP_RING_TO_VECTOR;
 
 	for (node = ring_chain; node; node = node->next) {
 		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
@@ -577,9 +1013,6 @@
 			hclgevf_cmd_setup_basic_desc(&desc,
 						     HCLGEVF_OPC_MBX_VF_TO_PF,
 						     false);
-			type = en ?
-				HCLGE_MBX_MAP_RING_TO_VECTOR :
-				HCLGE_MBX_UNMAP_RING_TO_VECTOR;
 			req->msg[0] = type;
 			req->msg[1] = vector_id;
 		}
@@ -641,6 +1074,9 @@
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	int ret, vector_id;
 
+	if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	vector_id = hclgevf_get_vector_index(hdev, vector);
 	if (vector_id < 0) {
 		dev_err(&handle->pdev->dev,
@@ -677,36 +1113,32 @@
 }
 
 static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
-					bool en_uc_pmc, bool en_mc_pmc)
+					bool en_bc_pmc)
 {
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclgevf_desc desc;
-	int status;
+	int ret;
 
 	req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
 
 	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
 	req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE;
-	req->msg[1] = en_uc_pmc ? 1 : 0;
-	req->msg[2] = en_mc_pmc ? 1 : 0;
+	req->msg[1] = en_bc_pmc ? 1 : 0;
 
-	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-	if (status)
+	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"Set promisc mode fail, status is %d.\n", status);
+			"Set promisc mode fail, status is %d.\n", ret);
 
-	return status;
+	return ret;
 }
 
-static void hclgevf_set_promisc_mode(struct hnae3_handle *handle,
-				     bool en_uc_pmc, bool en_mc_pmc)
+static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-
-	hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
+	return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc);
 }
 
-static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
+static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
 			      int stream_id, bool enable)
 {
 	struct hclgevf_cfg_com_tqp_queue_cmd *req;
@@ -719,7 +1151,8 @@
 				     false);
 	req->tqp_id = cpu_to_le16(tqp_id & HCLGEVF_RING_ID_MASK);
 	req->stream_id = cpu_to_le16(stream_id);
-	req->enable |= enable << HCLGEVF_TQP_ENABLE_B;
+	if (enable)
+		req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
 
 	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 	if (status)
@@ -729,147 +1162,18 @@
 	return status;
 }
 
-static int hclgevf_get_queue_id(struct hnae3_queue *queue)
-{
-	struct hclgevf_tqp *tqp = container_of(queue, struct hclgevf_tqp, q);
-
-	return tqp->index;
-}
-
 static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hnae3_queue *queue;
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct hclgevf_tqp *tqp;
 	int i;
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		queue = handle->kinfo.tqp[i];
-		tqp = container_of(queue, struct hclgevf_tqp, q);
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q);
 		memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
 	}
 }
 
-static int hclgevf_cfg_func_mta_type(struct hclgevf_dev *hdev)
-{
-	u8 resp_msg = HCLGEVF_MTA_TYPE_SEL_MAX;
-	int ret;
-
-	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST,
-				   HCLGE_MBX_MAC_VLAN_MTA_TYPE_READ,
-				   NULL, 0, true, &resp_msg, sizeof(u8));
-
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read mta type fail, ret=%d.\n", ret);
-		return ret;
-	}
-
-	if (resp_msg > HCLGEVF_MTA_TYPE_SEL_MAX) {
-		dev_err(&hdev->pdev->dev,
-			"Read mta type invalid, resp=%d.\n", resp_msg);
-		return -EINVAL;
-	}
-
-	hdev->mta_mac_sel_type = resp_msg;
-
-	return 0;
-}
-
-static u16 hclgevf_get_mac_addr_to_mta_index(struct hclgevf_dev *hdev,
-					     const u8 *addr)
-{
-	u32 rsh = HCLGEVF_MTA_TYPE_SEL_MAX - hdev->mta_mac_sel_type;
-	u16 high_val = addr[1] | (addr[0] << 8);
-
-	return (high_val >> rsh) & 0xfff;
-}
-
-static int hclgevf_do_update_mta_status(struct hclgevf_dev *hdev,
-					unsigned long *status)
-{
-#define HCLGEVF_MTA_STATUS_MSG_SIZE 13
-#define HCLGEVF_MTA_STATUS_MSG_BITS \
-			(HCLGEVF_MTA_STATUS_MSG_SIZE * BITS_PER_BYTE)
-#define HCLGEVF_MTA_STATUS_MSG_END_BITS \
-			(HCLGEVF_MTA_TBL_SIZE % HCLGEVF_MTA_STATUS_MSG_BITS)
-	u16 tbl_cnt;
-	u16 tbl_idx;
-	u8 msg_cnt;
-	u8 msg_idx;
-	int ret;
-
-	msg_cnt = DIV_ROUND_UP(HCLGEVF_MTA_TBL_SIZE,
-			       HCLGEVF_MTA_STATUS_MSG_BITS);
-	tbl_idx = 0;
-	msg_idx = 0;
-	while (msg_cnt--) {
-		u8 msg[HCLGEVF_MTA_STATUS_MSG_SIZE + 1];
-		u8 *p = &msg[1];
-		u8 msg_ofs;
-		u8 msg_bit;
-
-		memset(msg, 0, sizeof(msg));
-
-		/* set index field */
-		msg[0] = 0x7F & msg_idx;
-
-		/* set end flag field */
-		if (msg_cnt == 0) {
-			msg[0] |= 0x80;
-			tbl_cnt = HCLGEVF_MTA_STATUS_MSG_END_BITS;
-		} else {
-			tbl_cnt = HCLGEVF_MTA_STATUS_MSG_BITS;
-		}
-
-		/* set status field */
-		msg_ofs = 0;
-		msg_bit = 0;
-		while (tbl_cnt--) {
-			if (test_bit(tbl_idx, status))
-				p[msg_ofs] |= BIT(msg_bit);
-
-			tbl_idx++;
-
-			msg_bit++;
-			if (msg_bit == BITS_PER_BYTE) {
-				msg_bit = 0;
-				msg_ofs++;
-			}
-		}
-
-		ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST,
-					   HCLGE_MBX_MAC_VLAN_MTA_STATUS_UPDATE,
-					   msg, sizeof(msg), false, NULL, 0);
-		if (ret)
-			break;
-
-		msg_idx++;
-	}
-
-	return ret;
-}
-
-static int hclgevf_update_mta_status(struct hnae3_handle *handle)
-{
-	unsigned long mta_status[BITS_TO_LONGS(HCLGEVF_MTA_TBL_SIZE)];
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct net_device *netdev = hdev->nic.kinfo.netdev;
-	struct netdev_hw_addr *ha;
-	u16 tbl_idx;
-
-	/* clear status */
-	memset(mta_status, 0, sizeof(mta_status));
-
-	/* update status from mc addr list */
-	netdev_for_each_mc_addr(ha, netdev) {
-		tbl_idx = hclgevf_get_mac_addr_to_mta_index(hdev, ha->addr);
-		set_bit(tbl_idx, mta_status);
-	}
-
-	return hclgevf_do_update_mta_status(hdev, mta_status);
-}
-
 static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -894,7 +1198,7 @@
 			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-				      subcode, msg_data, ETH_ALEN * 2,
+				      subcode, msg_data, sizeof(msg_data),
 				      true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
@@ -949,19 +1253,61 @@
 #define HCLGEVF_VLAN_MBX_MSG_LEN 5
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[HCLGEVF_VLAN_MBX_MSG_LEN];
+	int ret;
 
-	if (vlan_id > 4095)
+	if (vlan_id > HCLGEVF_MAX_VLAN_ID)
 		return -EINVAL;
 
 	if (proto != htons(ETH_P_8021Q))
 		return -EPROTONOSUPPORT;
 
+	/* When device is resetting, firmware is unable to handle
+	 * mailbox. Just record the vlan id, and remove it after
+	 * reset finished.
+	 */
+	if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+		set_bit(vlan_id, hdev->vlan_del_fail_bmap);
+		return -EBUSY;
+	}
+
 	msg_data[0] = is_kill;
 	memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id));
 	memcpy(&msg_data[3], &proto, sizeof(proto));
-	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
-				    HCLGE_MBX_VLAN_FILTER, msg_data,
-				    HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+				   HCLGE_MBX_VLAN_FILTER, msg_data,
+				   HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+
+	/* when remove hw vlan filter failed, record the vlan id,
+	 * and try to remove it from hw later, to be consistence
+	 * with stack.
+	 */
+	if (is_kill && ret)
+		set_bit(vlan_id, hdev->vlan_del_fail_bmap);
+
+	return ret;
+}
+
+static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_MAX_SYNC_COUNT	60
+	struct hnae3_handle *handle = &hdev->nic;
+	int ret, sync_cnt = 0;
+	u16 vlan_id;
+
+	vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
+	while (vlan_id != VLAN_N_VID) {
+		ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
+					      vlan_id, true);
+		if (ret)
+			return;
+
+		clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
+		sync_cnt++;
+		if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
+			return;
+
+		vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
+	}
 }
 
 static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -975,21 +1321,29 @@
 				    1, false, NULL, 0);
 }
 
-static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[2];
 	int ret;
 
-	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
+	memcpy(msg_data, &queue_id, sizeof(queue_id));
 
 	/* disable vf queue before send queue reset msg to PF */
 	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
 	if (ret)
-		return;
+		return ret;
 
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
-			     2, true, NULL, 0);
+	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+				    sizeof(msg_data), true, NULL, 0);
+}
+
+static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MTU, 0, (u8 *)&new_mtu,
+				    sizeof(new_mtu), true, NULL, 0);
 }
 
 static int hclgevf_notify_client(struct hclgevf_dev *hdev,
@@ -997,33 +1351,81 @@
 {
 	struct hnae3_client *client = hdev->nic_client;
 	struct hnae3_handle *handle = &hdev->nic;
+	int ret;
+
+	if (!test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state) ||
+	    !client)
+		return 0;
 
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
-	return client->ops->reset_notify(handle, type);
+	ret = client->ops->reset_notify(handle, type);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n",
+			type, ret);
+
+	return ret;
+}
+
+static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+}
+
+static int hclgevf_flr_poll_timeout(struct hclgevf_dev *hdev,
+				    unsigned long delay_us,
+				    unsigned long wait_cnt)
+{
+	unsigned long cnt = 0;
+
+	while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
+	       cnt++ < wait_cnt)
+		usleep_range(delay_us, delay_us * 2);
+
+	if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
+		dev_err(&hdev->pdev->dev,
+			"flr wait timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
 }
 
 static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 {
-#define HCLGEVF_RESET_WAIT_MS	500
-#define HCLGEVF_RESET_WAIT_CNT	20
-	u32 val, cnt = 0;
+#define HCLGEVF_RESET_WAIT_US	20000
+#define HCLGEVF_RESET_WAIT_CNT	2000
+#define HCLGEVF_RESET_WAIT_TIMEOUT_US	\
+	(HCLGEVF_RESET_WAIT_US * HCLGEVF_RESET_WAIT_CNT)
 
-	/* wait to check the hardware reset completion status */
-	val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
-	while (hnae3_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
-	       (cnt < HCLGEVF_RESET_WAIT_CNT)) {
-		msleep(HCLGEVF_RESET_WAIT_MS);
-		val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
-		cnt++;
-	}
+	u32 val;
+	int ret;
+
+	if (hdev->reset_type == HNAE3_FLR_RESET)
+		return hclgevf_flr_poll_timeout(hdev,
+						HCLGEVF_RESET_WAIT_US,
+						HCLGEVF_RESET_WAIT_CNT);
+	else if (hdev->reset_type == HNAE3_VF_RESET)
+		ret = readl_poll_timeout(hdev->hw.io_base +
+					 HCLGEVF_VF_RST_ING, val,
+					 !(val & HCLGEVF_VF_RST_ING_BIT),
+					 HCLGEVF_RESET_WAIT_US,
+					 HCLGEVF_RESET_WAIT_TIMEOUT_US);
+	else
+		ret = readl_poll_timeout(hdev->hw.io_base +
+					 HCLGEVF_RST_ING, val,
+					 !(val & HCLGEVF_RST_ING_BITS),
+					 HCLGEVF_RESET_WAIT_US,
+					 HCLGEVF_RESET_WAIT_TIMEOUT_US);
 
 	/* hardware completion status should be available by this time */
-	if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
-		dev_warn(&hdev->pdev->dev,
-			 "could'nt get reset done status from h/w, timeout!\n");
-		return -EBUSY;
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could'nt get reset done status from h/w, timeout!\n");
+		return ret;
 	}
 
 	/* we will wait a bit more to let reset of the stack to complete. This
@@ -1035,15 +1437,31 @@
 	return 0;
 }
 
+static void hclgevf_reset_handshake(struct hclgevf_dev *hdev, bool enable)
+{
+	u32 reg_val;
+
+	reg_val = hclgevf_read_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG);
+	if (enable)
+		reg_val |= HCLGEVF_NIC_SW_RST_RDY;
+	else
+		reg_val &= ~HCLGEVF_NIC_SW_RST_RDY;
+
+	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+			  reg_val);
+}
+
 static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 {
 	int ret;
 
 	/* uninitialize the nic client */
-	hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+	ret = hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+	if (ret)
+		return ret;
 
 	/* re-initialize the hclge device */
-	ret = hclgevf_init_hdev(hdev);
+	ret = hclgevf_reset_hdev(hdev);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"hclge device re-init failed, VF is disabled!\n");
@@ -1051,22 +1469,90 @@
 	}
 
 	/* bring up the nic client again */
-	hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+	ret = hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+	if (ret)
+		return ret;
+
+	ret = hclgevf_notify_client(hdev, HNAE3_RESTORE_CLIENT);
+	if (ret)
+		return ret;
+
+	/* clear handshake status with IMP */
+	hclgevf_reset_handshake(hdev, false);
 
 	return 0;
 }
 
+static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_SYNC_TIME 100
+
+	int ret = 0;
+
+	switch (hdev->reset_type) {
+	case HNAE3_VF_FUNC_RESET:
+		ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+					   0, true, NULL, sizeof(u8));
+		hdev->rst_stats.vf_func_rst_cnt++;
+		break;
+	case HNAE3_FLR_RESET:
+		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+		hdev->rst_stats.flr_rst_cnt++;
+		break;
+	default:
+		break;
+	}
+
+	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGEVF_RESET_SYNC_TIME);
+	hclgevf_reset_handshake(hdev, true);
+	dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n",
+		 hdev->reset_type, ret);
+
+	return ret;
+}
+
+static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
+{
+	/* recover handshake status with IMP when reset fail */
+	hclgevf_reset_handshake(hdev, true);
+	hdev->rst_stats.rst_fail_cnt++;
+	dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n",
+		hdev->rst_stats.rst_fail_cnt);
+
+	if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
+		set_bit(hdev->reset_type, &hdev->reset_pending);
+
+	if (hclgevf_is_reset_pending(hdev)) {
+		set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		hclgevf_reset_task_schedule(hdev);
+	}
+}
+
 static int hclgevf_reset(struct hclgevf_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	int ret;
 
+	/* Initialize ae_dev reset status as well, in case enet layer wants to
+	 * know if device is undergoing reset
+	 */
+	ae_dev->reset_type = hdev->reset_type;
+	hdev->rst_stats.rst_cnt++;
 	rtnl_lock();
 
 	/* bring down the nic to stop any ongoing TX/RX */
-	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	if (ret)
+		goto err_reset_lock;
 
 	rtnl_unlock();
 
+	ret = hclgevf_reset_prepare_wait(hdev);
+	if (ret)
+		goto err_reset;
+
 	/* check if VF could successfully fetch the hardware reset completion
 	 * status from the hardware
 	 */
@@ -1076,57 +1562,120 @@
 		dev_err(&hdev->pdev->dev,
 			"VF failed(=%d) to fetch H/W reset completion status\n",
 			ret);
-
-		dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
-		rtnl_lock();
-		hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
-
-		rtnl_unlock();
-		return ret;
+		goto err_reset;
 	}
 
+	hdev->rst_stats.hw_rst_done_cnt++;
+
 	rtnl_lock();
 
-	/* now, re-initialize the nic client and ae device*/
+	/* now, re-initialize the nic client and ae device */
 	ret = hclgevf_reset_stack(hdev);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+		goto err_reset_lock;
+	}
 
 	/* bring up the nic to enable TX/RX again */
-	hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+	ret = hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+	if (ret)
+		goto err_reset_lock;
 
 	rtnl_unlock();
 
+	hdev->last_reset_time = jiffies;
+	ae_dev->reset_type = HNAE3_NONE_RESET;
+	hdev->rst_stats.rst_done_cnt++;
+	hdev->rst_stats.rst_fail_cnt = 0;
+
+	return ret;
+err_reset_lock:
+	rtnl_unlock();
+err_reset:
+	hclgevf_reset_err_handle(hdev);
+
 	return ret;
 }
 
-static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev,
+						     unsigned long *addr)
 {
-	int status;
-	u8 respmsg;
+	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
 
-	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
-				      0, false, &respmsg, sizeof(u8));
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"VF reset request to PF failed(=%d)\n", status);
+	/* return the highest priority reset level amongst all */
+	if (test_bit(HNAE3_VF_RESET, addr)) {
+		rst_level = HNAE3_VF_RESET;
+		clear_bit(HNAE3_VF_RESET, addr);
+		clear_bit(HNAE3_VF_PF_FUNC_RESET, addr);
+		clear_bit(HNAE3_VF_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_VF_FULL_RESET, addr)) {
+		rst_level = HNAE3_VF_FULL_RESET;
+		clear_bit(HNAE3_VF_FULL_RESET, addr);
+		clear_bit(HNAE3_VF_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_VF_PF_FUNC_RESET, addr)) {
+		rst_level = HNAE3_VF_PF_FUNC_RESET;
+		clear_bit(HNAE3_VF_PF_FUNC_RESET, addr);
+		clear_bit(HNAE3_VF_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_VF_FUNC_RESET, addr)) {
+		rst_level = HNAE3_VF_FUNC_RESET;
+		clear_bit(HNAE3_VF_FUNC_RESET, addr);
+	} else if (test_bit(HNAE3_FLR_RESET, addr)) {
+		rst_level = HNAE3_FLR_RESET;
+		clear_bit(HNAE3_FLR_RESET, addr);
+	}
 
-	return status;
+	return rst_level;
 }
 
-static void hclgevf_reset_event(struct hnae3_handle *handle)
+static void hclgevf_reset_event(struct pci_dev *pdev,
+				struct hnae3_handle *handle)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	struct hclgevf_dev *hdev = ae_dev->priv;
 
 	dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
 
-	handle->reset_level = HNAE3_VF_RESET;
+	if (hdev->default_reset_request)
+		hdev->reset_level =
+			hclgevf_get_reset_level(hdev,
+						&hdev->default_reset_request);
+	else
+		hdev->reset_level = HNAE3_VF_FUNC_RESET;
 
 	/* reset of this VF requested */
 	set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
 	hclgevf_reset_task_schedule(hdev);
 
-	handle->last_reset_time = jiffies;
+	hdev->last_reset_time = jiffies;
+}
+
+static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
+					  enum hnae3_reset_type rst_type)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	set_bit(rst_type, &hdev->default_reset_request);
+}
+
+static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
+{
+#define HCLGEVF_FLR_WAIT_MS	100
+#define HCLGEVF_FLR_WAIT_CNT	50
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int cnt = 0;
+
+	clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
+	clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+	set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
+	hclgevf_reset_event(hdev->pdev, NULL);
+
+	while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
+	       cnt++ < HCLGEVF_FLR_WAIT_CNT)
+		msleep(HCLGEVF_FLR_WAIT_MS);
+
+	if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
+		dev_err(&hdev->pdev->dev,
+			"flr wait down timeout: %d\n", cnt);
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -1154,7 +1703,7 @@
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 {
 	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
-	    !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+	    !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
 		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->rst_service_task);
 	}
@@ -1190,8 +1739,10 @@
 {
 	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
 
-	mod_timer(&hdev->service_timer, jiffies + 5 * HZ);
+	mod_timer(&hdev->service_timer, jiffies +
+		  HCLGEVF_GENERAL_TASK_INTERVAL * HZ);
 
+	hdev->stats_timer++;
 	hclgevf_task_schedule(hdev);
 }
 
@@ -1209,19 +1760,25 @@
 	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
 			       &hdev->reset_state)) {
 		/* PF has initmated that it is about to reset the hardware.
-		 * We now have to poll & check if harware has actually completed
-		 * the reset sequence. On hardware reset completion, VF needs to
-		 * reset the client and ae device.
+		 * We now have to poll & check if hardware has actually
+		 * completed the reset sequence. On hardware reset completion,
+		 * VF needs to reset the client and ae device.
 		 */
 		hdev->reset_attempts = 0;
 
-		ret = hclgevf_reset(hdev);
-		if (ret)
-			dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
+		hdev->last_reset_time = jiffies;
+		while ((hdev->reset_type =
+			hclgevf_get_reset_level(hdev, &hdev->reset_pending))
+		       != HNAE3_NONE_RESET) {
+			ret = hclgevf_reset(hdev);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"VF stack reset failed %d.\n", ret);
+		}
 	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
 				      &hdev->reset_state)) {
 		/* we could be here when either of below happens:
-		 * 1. reset was initiated due to watchdog timeout due to
+		 * 1. reset was initiated due to watchdog timeout caused by
 		 *    a. IMP was earlier reset and our TX got choked down and
 		 *       which resulted in watchdog reacting and inducing VF
 		 *       reset. This also means our cmdq would be unreliable.
@@ -1235,9 +1792,8 @@
 		 * 1b and 2. cases but we will not get any intimation about 1a
 		 * from PF as cmdq would be in unreliable state i.e. mailbox
 		 * communication between PF and VF would be broken.
-		 */
-
-		/* if we are never geting into pending state it means either:
+		 *
+		 * if we are never geting into pending state it means either:
 		 * 1. PF is not receiving our request which could be due to IMP
 		 *    reset
 		 * 2. PF is screwed
@@ -1246,19 +1802,17 @@
 		 */
 		if (hdev->reset_attempts > 3) {
 			/* prepare for full reset of stack + pcie interface */
-			hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+			set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending);
 
 			/* "defer" schedule the reset task again */
 			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
 		} else {
 			hdev->reset_attempts++;
 
-			/* request PF for resetting this VF via mailbox */
-			ret = hclgevf_do_reset(hdev);
-			if (ret)
-				dev_warn(&hdev->pdev->dev,
-					 "VF rst fail, stack will call\n");
+			set_bit(hdev->reset_level, &hdev->reset_pending);
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
 		}
+		hclgevf_reset_task_schedule(hdev);
 	}
 
 	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
@@ -1280,17 +1834,55 @@
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
 
+static void hclgevf_keep_alive_timer(struct timer_list *t)
+{
+	struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer);
+
+	schedule_work(&hdev->keep_alive_task);
+	mod_timer(&hdev->keep_alive_timer, jiffies +
+		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
+}
+
+static void hclgevf_keep_alive_task(struct work_struct *work)
+{
+	struct hclgevf_dev *hdev;
+	u8 respmsg;
+	int ret;
+
+	hdev = container_of(work, struct hclgevf_dev, keep_alive_task);
+
+	if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state))
+		return;
+
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL,
+				   0, false, &respmsg, sizeof(respmsg));
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"VF sends keep alive cmd failed(=%d)\n", ret);
+}
+
 static void hclgevf_service_task(struct work_struct *work)
 {
+	struct hnae3_handle *handle;
 	struct hclgevf_dev *hdev;
 
 	hdev = container_of(work, struct hclgevf_dev, service_task);
+	handle = &hdev->nic;
+
+	if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) {
+		hclgevf_tqps_update_stats(handle);
+		hdev->stats_timer = 0;
+	}
 
 	/* request the link status from the PF. PF would be able to tell VF
 	 * about such updates in future so we might remove this later
 	 */
 	hclgevf_request_link_info(hdev);
 
+	hclgevf_update_link_mode(hdev);
+
+	hclgevf_sync_vlan_filter(hdev);
+
 	hclgevf_deferred_task_schedule(hdev);
 
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
@@ -1301,24 +1893,54 @@
 	hclgevf_write_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_SRC_REG, regclr);
 }
 
-static bool hclgevf_check_event_cause(struct hclgevf_dev *hdev, u32 *clearval)
+static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
+						      u32 *clearval)
 {
-	u32 cmdq_src_reg;
+	u32 val, cmdq_stat_reg, rst_ing_reg;
 
 	/* fetch the events from their corresponding regs */
-	cmdq_src_reg = hclgevf_read_dev(&hdev->hw,
-					HCLGEVF_VECTOR0_CMDQ_SRC_REG);
+	cmdq_stat_reg = hclgevf_read_dev(&hdev->hw,
+					 HCLGEVF_VECTOR0_CMDQ_STAT_REG);
+
+	if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
+		rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
+		dev_info(&hdev->pdev->dev,
+			 "receive reset interrupt 0x%x!\n", rst_ing_reg);
+		set_bit(HNAE3_VF_RESET, &hdev->reset_pending);
+		set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+		*clearval = ~(1U << HCLGEVF_VECTOR0_RST_INT_B);
+		hdev->rst_stats.vf_rst_cnt++;
+		/* set up VF hardware reset status, its PF will clear
+		 * this status when PF has initialized done.
+		 */
+		val = hclgevf_read_dev(&hdev->hw, HCLGEVF_VF_RST_ING);
+		hclgevf_write_dev(&hdev->hw, HCLGEVF_VF_RST_ING,
+				  val | HCLGEVF_VF_RST_ING_BIT);
+		return HCLGEVF_VECTOR0_EVENT_RST;
+	}
 
 	/* check for vector0 mailbox(=CMDQ RX) event source */
-	if (BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
-		cmdq_src_reg &= ~BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B);
-		*clearval = cmdq_src_reg;
-		return true;
+	if (BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B) & cmdq_stat_reg) {
+		/* for revision 0x21, clearing interrupt is writing bit 0
+		 * to the clear register, writing bit 1 means to keep the
+		 * old value.
+		 * for revision 0x20, the clear register is a read & write
+		 * register, so we should just write 0 to the bit we are
+		 * handling, and keep other bits as cmdq_stat_reg.
+		 */
+		if (hdev->pdev->revision >= 0x21)
+			*clearval = ~(1U << HCLGEVF_VECTOR0_RX_CMDQ_INT_B);
+		else
+			*clearval = cmdq_stat_reg &
+				    ~BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B);
+
+		return HCLGEVF_VECTOR0_EVENT_MBX;
 	}
 
 	dev_dbg(&hdev->pdev->dev, "vector 0 interrupt from unknown source\n");
 
-	return false;
+	return HCLGEVF_VECTOR0_EVENT_OTHER;
 }
 
 static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
@@ -1328,19 +1950,28 @@
 
 static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 {
+	enum hclgevf_evt_cause event_cause;
 	struct hclgevf_dev *hdev = data;
 	u32 clearval;
 
 	hclgevf_enable_vector(&hdev->misc_vector, false);
-	if (!hclgevf_check_event_cause(hdev, &clearval))
-		goto skip_sched;
+	event_cause = hclgevf_check_evt_cause(hdev, &clearval);
 
-	hclgevf_mbx_handler(hdev);
+	switch (event_cause) {
+	case HCLGEVF_VECTOR0_EVENT_RST:
+		hclgevf_reset_task_schedule(hdev);
+		break;
+	case HCLGEVF_VECTOR0_EVENT_MBX:
+		hclgevf_mbx_handler(hdev);
+		break;
+	default:
+		break;
+	}
 
-	hclgevf_clear_event_cause(hdev, clearval);
-
-skip_sched:
-	hclgevf_enable_vector(&hdev->misc_vector, true);
+	if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) {
+		hclgevf_clear_event_cause(hdev, clearval);
+		hclgevf_enable_vector(&hdev->misc_vector, true);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -1349,10 +1980,25 @@
 {
 	int ret;
 
-	/* get queue configuration from PF */
-	ret = hclge_get_queue_info(hdev);
+	/* get current port based vlan state from PF */
+	ret = hclgevf_get_port_base_vlan_filter_state(hdev);
 	if (ret)
 		return ret;
+
+	/* get queue configuration from PF */
+	ret = hclgevf_get_queue_info(hdev);
+	if (ret)
+		return ret;
+
+	/* get queue depth info from PF */
+	ret = hclgevf_get_queue_depth(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclgevf_get_pf_media_type(hdev);
+	if (ret)
+		return ret;
+
 	/* get tc configuration from PF */
 	return hclgevf_get_tc_info(hdev);
 }
@@ -1360,7 +2006,7 @@
 static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
-	struct hclgevf_dev *hdev = ae_dev->priv;
+	struct hclgevf_dev *hdev;
 
 	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
 	if (!hdev)
@@ -1396,57 +2042,111 @@
 	return 0;
 }
 
+static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en)
+{
+	struct hclgevf_cfg_gro_status_cmd *req;
+	struct hclgevf_desc desc;
+	int ret;
+
+	if (!hnae3_dev_gro_supported(hdev))
+		return 0;
+
+	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_GRO_GENERIC_CONFIG,
+				     false);
+	req = (struct hclgevf_cfg_gro_status_cmd *)desc.data;
+
+	req->gro_en = cpu_to_le16(en ? 1 : 0);
+
+	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"VF GRO hardware config cmd failed, ret = %d.\n", ret);
+
+	return ret;
+}
+
 static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
 {
 	struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
-	int i, ret;
+	int ret;
+	u32 i;
 
-	rss_cfg->rss_size = hdev->rss_size_max;
+	rss_cfg->rss_size = hdev->nic.kinfo.rss_size;
 
-	/* Initialize RSS indirect table for each vport */
+	if (hdev->pdev->revision >= 0x21) {
+		rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE;
+		memcpy(rss_cfg->rss_hash_key, hclgevf_hash_key,
+		       HCLGEVF_RSS_KEY_SIZE);
+
+		ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo,
+					       rss_cfg->rss_hash_key);
+		if (ret)
+			return ret;
+
+		rss_cfg->rss_tuple_sets.ipv4_tcp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		rss_cfg->rss_tuple_sets.ipv4_udp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		rss_cfg->rss_tuple_sets.ipv4_sctp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_SCTP;
+		rss_cfg->rss_tuple_sets.ipv4_fragment_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		rss_cfg->rss_tuple_sets.ipv6_tcp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		rss_cfg->rss_tuple_sets.ipv6_udp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+		rss_cfg->rss_tuple_sets.ipv6_sctp_en =
+					HCLGEVF_RSS_INPUT_TUPLE_SCTP;
+		rss_cfg->rss_tuple_sets.ipv6_fragment_en =
+					HCLGEVF_RSS_INPUT_TUPLE_OTHER;
+
+		ret = hclgevf_set_rss_input_tuple(hdev, rss_cfg);
+		if (ret)
+			return ret;
+
+	}
+
+	/* Initialize RSS indirect table */
 	for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
-		rss_cfg->rss_indirection_tbl[i] = i % hdev->rss_size_max;
+		rss_cfg->rss_indirection_tbl[i] = i % rss_cfg->rss_size;
 
 	ret = hclgevf_set_rss_indir_table(hdev);
 	if (ret)
 		return ret;
 
-	return hclgevf_set_rss_tc_mode(hdev, hdev->rss_size_max);
+	return hclgevf_set_rss_tc_mode(hdev, rss_cfg->rss_size);
 }
 
 static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
 {
-	/* other vlan config(like, VLAN TX/RX offload) would also be added
-	 * here later
-	 */
 	return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0,
 				       false);
 }
 
+static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	if (enable) {
+		mod_timer(&hdev->service_timer, jiffies + HZ);
+	} else {
+		del_timer_sync(&hdev->service_timer);
+		cancel_work_sync(&hdev->service_task);
+		clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+	}
+}
+
 static int hclgevf_ae_start(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	int i, queue_id;
 
-	for (i = 0; i < handle->kinfo.num_tqps; i++) {
-		/* ring enable */
-		queue_id = hclgevf_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
-
-		hclgevf_tqp_enable(hdev, queue_id, 0, true);
-	}
-
-	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
 
 	hclgevf_request_link_info(hdev);
 
+	hclgevf_update_link_mode(hdev);
+
 	clear_bit(HCLGEVF_STATE_DOWN, &hdev->state);
-	mod_timer(&hdev->service_timer, jiffies + HZ);
 
 	return 0;
 }
@@ -1454,36 +2154,60 @@
 static void hclgevf_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	int i, queue_id;
+	int i;
 
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
 
-	for (i = 0; i < hdev->num_tqps; i++) {
-		/* Ring disable */
-		queue_id = hclgevf_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	if (hdev->reset_type != HNAE3_VF_RESET)
+		for (i = 0; i < handle->kinfo.num_tqps; i++)
+			if (hclgevf_reset_tqp(handle, i))
+				break;
 
-		hclgevf_tqp_enable(hdev, queue_id, 0, false);
-	}
-
-	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
-	del_timer_sync(&hdev->service_timer);
-	cancel_work_sync(&hdev->service_task);
-	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 	hclgevf_update_link_status(hdev, 0);
 }
 
+static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	u8 msg_data;
+
+	msg_data = alive ? 1 : 0;
+	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_ALIVE,
+				    0, &msg_data, 1, false, NULL, 0);
+}
+
+static int hclgevf_client_start(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int ret;
+
+	ret = hclgevf_set_alive(handle, true);
+	if (ret)
+		return ret;
+
+	mod_timer(&hdev->keep_alive_timer, jiffies +
+		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
+
+	return 0;
+}
+
+static void hclgevf_client_stop(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int ret;
+
+	ret = hclgevf_set_alive(handle, false);
+	if (ret)
+		dev_warn(&hdev->pdev->dev,
+			 "%s failed %d\n", __func__, ret);
+
+	del_timer_sync(&hdev->keep_alive_timer);
+	cancel_work_sync(&hdev->keep_alive_task);
+}
+
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
-	/* if this is on going reset then skip this initialization */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		return;
-
 	/* setup tasks for the MBX */
 	INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1506,7 +2230,12 @@
 static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 {
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+	set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
 
+	if (hdev->keep_alive_timer.function)
+		del_timer_sync(&hdev->keep_alive_timer);
+	if (hdev->keep_alive_task.func)
+		cancel_work_sync(&hdev->keep_alive_task);
 	if (hdev->service_timer.function)
 		del_timer_sync(&hdev->service_timer);
 	if (hdev->service_task.func)
@@ -1525,17 +2254,14 @@
 	int vectors;
 	int i;
 
-	/* if this is on going reset then skip this initialization */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		return 0;
-
-	if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
+	if (hnae3_dev_roce_supported(hdev))
 		vectors = pci_alloc_irq_vectors(pdev,
 						hdev->roce_base_msix_offset + 1,
 						hdev->num_msi,
 						PCI_IRQ_MSIX);
 	else
-		vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+		vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+						hdev->num_msi,
 						PCI_IRQ_MSI | PCI_IRQ_MSIX);
 
 	if (vectors < 0) {
@@ -1551,6 +2277,7 @@
 
 	hdev->num_msi = vectors;
 	hdev->num_msi_left = vectors;
+
 	hdev->base_msi_vector = pdev->irq;
 	hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
 
@@ -1567,6 +2294,7 @@
 	hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi,
 					sizeof(int), GFP_KERNEL);
 	if (!hdev->vector_irq) {
+		devm_kfree(&pdev->dev, hdev->vector_status);
 		pci_free_irq_vectors(pdev);
 		return -ENOMEM;
 	}
@@ -1578,16 +2306,14 @@
 {
 	struct pci_dev *pdev = hdev->pdev;
 
+	devm_kfree(&pdev->dev, hdev->vector_status);
+	devm_kfree(&pdev->dev, hdev->vector_irq);
 	pci_free_irq_vectors(pdev);
 }
 
 static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 {
-	int ret = 0;
-
-	/* if this is on going reset then skip this initialization */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		return 0;
+	int ret;
 
 	hclgevf_get_misc_vector(hdev);
 
@@ -1616,6 +2342,65 @@
 	hclgevf_free_vector(hdev, 0);
 }
 
+static void hclgevf_info_show(struct hclgevf_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+
+	dev_info(dev, "VF info begin:\n");
+
+	dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
+	dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
+	dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
+	dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
+	dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
+	dev_info(dev, "PF media type of this VF: %d\n",
+		 hdev->hw.mac.media_type);
+
+	dev_info(dev, "VF info end.\n");
+}
+
+static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					    struct hnae3_client *client)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	ret = client->ops->init_instance(&hdev->nic);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	if (netif_msg_drv(&hdev->nic))
+		hclgevf_info_show(hdev);
+
+	return 0;
+}
+
+static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					     struct hnae3_client *client)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
+
+	ret = hclgevf_init_roce_base_info(hdev);
+	if (ret)
+		return ret;
+
+	ret = client->ops->init_instance(&hdev->roce);
+	if (ret)
+		return ret;
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+}
+
 static int hclgevf_init_client_instance(struct hnae3_client *client,
 					struct hnae3_ae_dev *ae_dev)
 {
@@ -1627,28 +2412,15 @@
 		hdev->nic_client = client;
 		hdev->nic.client = client;
 
-		ret = client->ops->init_instance(&hdev->nic);
+		ret = hclgevf_init_nic_client_instance(ae_dev, client);
 		if (ret)
-			return ret;
+			goto clear_nic;
 
-		if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
-			struct hnae3_client *rc = hdev->roce_client;
-
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				return ret;
-			ret = rc->ops->init_instance(&hdev->roce);
-			if (ret)
-				return ret;
-		}
-		break;
-	case HNAE3_CLIENT_UNIC:
-		hdev->nic_client = client;
-		hdev->nic.client = client;
-
-		ret = client->ops->init_instance(&hdev->nic);
+		ret = hclgevf_init_roce_client_instance(ae_dev,
+							hdev->roce_client);
 		if (ret)
-			return ret;
+			goto clear_roce;
+
 		break;
 	case HNAE3_CLIENT_ROCE:
 		if (hnae3_dev_roce_supported(hdev)) {
@@ -1656,18 +2428,25 @@
 			hdev->roce.client = client;
 		}
 
-		if (hdev->roce_client && hdev->nic_client) {
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				return ret;
+		ret = hclgevf_init_roce_client_instance(ae_dev, client);
+		if (ret)
+			goto clear_roce;
 
-			ret = client->ops->init_instance(&hdev->roce);
-			if (ret)
-				return ret;
-		}
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	return 0;
+
+clear_nic:
+	hdev->nic_client = NULL;
+	hdev->nic.client = NULL;
+	return ret;
+clear_roce:
+	hdev->roce_client = NULL;
+	hdev->roce.client = NULL;
+	return ret;
 }
 
 static void hclgevf_uninit_client_instance(struct hnae3_client *client,
@@ -1676,13 +2455,21 @@
 	struct hclgevf_dev *hdev = ae_dev->priv;
 
 	/* un-init roce, if it exists */
-	if (hdev->roce_client)
+	if (hdev->roce_client) {
 		hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
+		hdev->roce_client = NULL;
+		hdev->roce.client = NULL;
+	}
 
 	/* un-init nic/unic, if this was not called by roce client */
-	if ((client->ops->uninit_instance) &&
-	    (client->type != HNAE3_CLIENT_ROCE))
+	if (client->ops->uninit_instance && hdev->nic_client &&
+	    client->type != HNAE3_CLIENT_ROCE) {
+		clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
 		client->ops->uninit_instance(&hdev->nic, 0);
+		hdev->nic_client = NULL;
+		hdev->nic.client = NULL;
+	}
 }
 
 static int hclgevf_pci_init(struct hclgevf_dev *hdev)
@@ -1691,14 +2478,6 @@
 	struct hclgevf_hw *hw;
 	int ret;
 
-	/* check if we need to skip initialization of pci. This will happen if
-	 * device is undergoing VF reset. Otherwise, we would need to
-	 * re-initialize pci interface again i.e. when device is not going
-	 * through *any* reset or actually undergoing full reset.
-	 */
-	if (hclgevf_dev_ongoing_reset(hdev))
-		return 0;
-
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1764,7 +2543,7 @@
 
 	req = (struct hclgevf_query_res_cmd *)desc.data;
 
-	if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
+	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->roce_base_msix_offset =
 		hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
 				HCLGEVF_MSIX_OFT_ROCEE_M,
@@ -1773,6 +2552,9 @@
 		hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
 				HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
 
+		/* nic's msix numbers is always equals to the roce's. */
+		hdev->num_nic_msix = hdev->num_roce_msix;
+
 		/* VF should have NIC vectors and Roce vectors, NIC vectors
 		 * are queued before Roce vectors. The offset is fixed to 64.
 		 */
@@ -1782,8 +2564,98 @@
 		hdev->num_msi =
 		hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
 				HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
+		hdev->num_nic_msix = hdev->num_msi;
 	}
 
+	if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) {
+		dev_err(&hdev->pdev->dev,
+			"Just %u msi resources, not enough for vf(min:2).\n",
+			hdev->num_nic_msix);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int ret = 0;
+
+	if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
+	    test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+		hclgevf_misc_irq_uninit(hdev);
+		hclgevf_uninit_msi(hdev);
+		clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
+	}
+
+	if (!test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+		pci_set_master(pdev);
+		ret = hclgevf_init_msi(hdev);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"failed(%d) to init MSI/MSI-X\n", ret);
+			return ret;
+		}
+
+		ret = hclgevf_misc_irq_init(hdev);
+		if (ret) {
+			hclgevf_uninit_msi(hdev);
+			dev_err(&pdev->dev, "failed(%d) to init Misc IRQ(vector0)\n",
+				ret);
+			return ret;
+		}
+
+		set_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
+	}
+
+	return ret;
+}
+
+static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int ret;
+
+	ret = hclgevf_pci_reset(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "pci reset failed %d\n", ret);
+		return ret;
+	}
+
+	ret = hclgevf_cmd_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "cmd failed %d\n", ret);
+		return ret;
+	}
+
+	ret = hclgevf_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed(%d) to initialize RSS\n", ret);
+		return ret;
+	}
+
+	ret = hclgevf_config_gro(hdev, true);
+	if (ret)
+		return ret;
+
+	ret = hclgevf_init_vlan_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed(%d) to initialize VLAN config\n", ret);
+		return ret;
+	}
+
+	if (pdev->revision >= 0x21) {
+		ret = hclgevf_set_promisc_mode(hdev, true);
+		if (ret)
+			return ret;
+	}
+
+	dev_info(&hdev->pdev->dev, "Reset done\n");
+
 	return 0;
 }
 
@@ -1792,18 +2664,18 @@
 	struct pci_dev *pdev = hdev->pdev;
 	int ret;
 
-	/* check if device is on-going full reset(i.e. pcie as well) */
-	if (hclgevf_dev_ongoing_full_reset(hdev)) {
-		dev_warn(&pdev->dev, "device is going full reset\n");
-		hclgevf_uninit_hdev(hdev);
-	}
-
 	ret = hclgevf_pci_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "PCI initialization failed\n");
 		return ret;
 	}
 
+	ret = hclgevf_cmd_queue_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cmd queue init failed: %d\n", ret);
+		goto err_cmd_queue_init;
+	}
+
 	ret = hclgevf_cmd_init(hdev);
 	if (ret)
 		goto err_cmd_init;
@@ -1813,16 +2685,17 @@
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Query vf status error, ret = %d.\n", ret);
-		goto err_query_vf;
+		goto err_cmd_init;
 	}
 
 	ret = hclgevf_init_msi(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed(%d) to init MSI/MSI-X\n", ret);
-		goto err_query_vf;
+		goto err_cmd_init;
 	}
 
 	hclgevf_state_init(hdev);
+	hdev->reset_level = HNAE3_VF_FUNC_RESET;
 
 	ret = hclgevf_misc_irq_init(hdev);
 	if (ret) {
@@ -1831,6 +2704,8 @@
 		goto err_misc_irq_init;
 	}
 
+	set_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
+
 	ret = hclgevf_configure(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed(%d) to fetch configuration\n", ret);
@@ -1849,12 +2724,19 @@
 		goto err_config;
 	}
 
-	/* Initialize mta type for this VF */
-	ret = hclgevf_cfg_func_mta_type(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed(%d) to initialize MTA type\n", ret);
+	ret = hclgevf_config_gro(hdev, true);
+	if (ret)
 		goto err_config;
+
+	/* vf is not allowed to enable unicast/multicast promisc mode.
+	 * For revision 0x20, default to disable broadcast promisc mode,
+	 * firmware makes sure broadcast packets can be accepted.
+	 * For revision 0x21, default to enable broadcast promisc mode.
+	 */
+	if (pdev->revision >= 0x21) {
+		ret = hclgevf_set_promisc_mode(hdev, true);
+		if (ret)
+			goto err_config;
 	}
 
 	/* Initialize RSS for this VF */
@@ -1872,7 +2754,9 @@
 		goto err_config;
 	}
 
-	pr_info("finished initializing %s driver\n", HCLGEVF_DRIVER_NAME);
+	hdev->last_reset_time = jiffies;
+	dev_info(&hdev->pdev->dev, "finished initializing %s driver\n",
+		 HCLGEVF_DRIVER_NAME);
 
 	return 0;
 
@@ -1881,25 +2765,31 @@
 err_misc_irq_init:
 	hclgevf_state_uninit(hdev);
 	hclgevf_uninit_msi(hdev);
-err_query_vf:
-	hclgevf_cmd_uninit(hdev);
 err_cmd_init:
+	hclgevf_cmd_uninit(hdev);
+err_cmd_queue_init:
 	hclgevf_pci_uninit(hdev);
+	clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
 	return ret;
 }
 
 static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
 	hclgevf_state_uninit(hdev);
-	hclgevf_misc_irq_uninit(hdev);
-	hclgevf_cmd_uninit(hdev);
-	hclgevf_uninit_msi(hdev);
+
+	if (test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+		hclgevf_misc_irq_uninit(hdev);
+		hclgevf_uninit_msi(hdev);
+	}
+
 	hclgevf_pci_uninit(hdev);
+	hclgevf_cmd_uninit(hdev);
 }
 
 static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
+	struct hclgevf_dev *hdev;
 	int ret;
 
 	ret = hclgevf_alloc_hdev(ae_dev);
@@ -1909,10 +2799,16 @@
 	}
 
 	ret = hclgevf_init_hdev(ae_dev->priv);
-	if (ret)
+	if (ret) {
 		dev_err(&pdev->dev, "hclge device initialization failed\n");
+		return ret;
+	}
 
-	return ret;
+	hdev = ae_dev->priv;
+	timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0);
+	INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task);
+
+	return 0;
 }
 
 static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
@@ -1928,7 +2824,8 @@
 	struct hnae3_handle *nic = &hdev->nic;
 	struct hnae3_knic_private_info *kinfo = &nic->kinfo;
 
-	return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps);
+	return min_t(u32, hdev->rss_size_max,
+		     hdev->num_tqps / kinfo->num_tc);
 }
 
 /**
@@ -1949,18 +2846,89 @@
 	ch->max_combined = hclgevf_get_max_channels(hdev);
 	ch->other_count = 0;
 	ch->max_other = 0;
-	ch->combined_count = hdev->num_tqps;
+	ch->combined_count = handle->kinfo.rss_size;
 }
 
 static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
-					  u16 *free_tqps, u16 *max_rss_size)
+					  u16 *alloc_tqps, u16 *max_rss_size)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
-	*free_tqps = 0;
+	*alloc_tqps = hdev->num_tqps;
 	*max_rss_size = hdev->rss_size_max;
 }
 
+static void hclgevf_update_rss_size(struct hnae3_handle *handle,
+				    u32 new_tqps_num)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	u16 max_rss_size;
+
+	kinfo->req_rss_size = new_tqps_num;
+
+	max_rss_size = min_t(u16, hdev->rss_size_max,
+			     hdev->num_tqps / kinfo->num_tc);
+
+	/* Use the user's configuration when it is not larger than
+	 * max_rss_size, otherwise, use the maximum specification value.
+	 */
+	if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
+	    kinfo->req_rss_size <= max_rss_size)
+		kinfo->rss_size = kinfo->req_rss_size;
+	else if (kinfo->rss_size > max_rss_size ||
+		 (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size))
+		kinfo->rss_size = max_rss_size;
+
+	kinfo->num_tqps = kinfo->num_tc * kinfo->rss_size;
+}
+
+static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
+				bool rxfh_configured)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	u16 cur_rss_size = kinfo->rss_size;
+	u16 cur_tqps = kinfo->num_tqps;
+	u32 *rss_indir;
+	unsigned int i;
+	int ret;
+
+	hclgevf_update_rss_size(handle, new_tqps_num);
+
+	ret = hclgevf_set_rss_tc_mode(hdev, kinfo->rss_size);
+	if (ret)
+		return ret;
+
+	/* RSS indirection table has been configuared by user */
+	if (rxfh_configured)
+		goto out;
+
+	/* Reinitializes the rss indirect table according to the new RSS size */
+	rss_indir = kcalloc(HCLGEVF_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!rss_indir)
+		return -ENOMEM;
+
+	for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
+		rss_indir[i] = i % kinfo->rss_size;
+
+	ret = hclgevf_set_rss(handle, rss_indir, NULL, 0);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "set rss indir table fail, ret=%d\n",
+			ret);
+
+	kfree(rss_indir);
+
+out:
+	if (!ret)
+		dev_info(&hdev->pdev->dev,
+			 "Channels changed, rss_size from %u to %u, tqps from %u to %u",
+			 cur_rss_size, kinfo->rss_size,
+			 cur_tqps, kinfo->rss_size * kinfo->num_tc);
+
+	return ret;
+}
+
 static int hclgevf_get_status(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -1989,26 +2957,169 @@
 	hdev->hw.mac.duplex = duplex;
 }
 
+static int hclgevf_gro_en(struct hnae3_handle *handle, bool enable)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hclgevf_config_gro(hdev, enable);
+}
+
+static void hclgevf_get_media_type(struct hnae3_handle *handle, u8 *media_type,
+				   u8 *module_type)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	if (media_type)
+		*media_type = hdev->hw.mac.media_type;
+
+	if (module_type)
+		*module_type = hdev->hw.mac.module_type;
+}
+
+static bool hclgevf_get_hw_reset_stat(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return !!hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
+}
+
+static bool hclgevf_ae_dev_resetting(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
+static unsigned long hclgevf_ae_dev_reset_cnt(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hdev->rst_stats.hw_rst_done_cnt;
+}
+
+static void hclgevf_get_link_mode(struct hnae3_handle *handle,
+				  unsigned long *supported,
+				  unsigned long *advertising)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	*supported = hdev->hw.mac.supported;
+	*advertising = hdev->hw.mac.advertising;
+}
+
+#define MAX_SEPARATE_NUM	4
+#define SEPARATOR_VALUE		0xFFFFFFFF
+#define REG_NUM_PER_LINE	4
+#define REG_LEN_PER_LINE	(REG_NUM_PER_LINE * sizeof(u32))
+
+static int hclgevf_get_regs_len(struct hnae3_handle *handle)
+{
+	int cmdq_lines, common_lines, ring_lines, tqp_intr_lines;
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	cmdq_lines = sizeof(cmdq_reg_addr_list) / REG_LEN_PER_LINE + 1;
+	common_lines = sizeof(common_reg_addr_list) / REG_LEN_PER_LINE + 1;
+	ring_lines = sizeof(ring_reg_addr_list) / REG_LEN_PER_LINE + 1;
+	tqp_intr_lines = sizeof(tqp_intr_reg_addr_list) / REG_LEN_PER_LINE + 1;
+
+	return (cmdq_lines + common_lines + ring_lines * hdev->num_tqps +
+		tqp_intr_lines * (hdev->num_msi_used - 1)) * REG_LEN_PER_LINE;
+}
+
+static void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
+			     void *data)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int i, j, reg_um, separator_num;
+	u32 *reg = data;
+
+	*version = hdev->fw_version;
+
+	/* fetching per-VF registers values from VF PCIe register space */
+	reg_um = sizeof(cmdq_reg_addr_list) / sizeof(u32);
+	separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE;
+	for (i = 0; i < reg_um; i++)
+		*reg++ = hclgevf_read_dev(&hdev->hw, cmdq_reg_addr_list[i]);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+
+	reg_um = sizeof(common_reg_addr_list) / sizeof(u32);
+	separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE;
+	for (i = 0; i < reg_um; i++)
+		*reg++ = hclgevf_read_dev(&hdev->hw, common_reg_addr_list[i]);
+	for (i = 0; i < separator_num; i++)
+		*reg++ = SEPARATOR_VALUE;
+
+	reg_um = sizeof(ring_reg_addr_list) / sizeof(u32);
+	separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE;
+	for (j = 0; j < hdev->num_tqps; j++) {
+		for (i = 0; i < reg_um; i++)
+			*reg++ = hclgevf_read_dev(&hdev->hw,
+						  ring_reg_addr_list[i] +
+						  0x200 * j);
+		for (i = 0; i < separator_num; i++)
+			*reg++ = SEPARATOR_VALUE;
+	}
+
+	reg_um = sizeof(tqp_intr_reg_addr_list) / sizeof(u32);
+	separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE;
+	for (j = 0; j < hdev->num_msi_used - 1; j++) {
+		for (i = 0; i < reg_um; i++)
+			*reg++ = hclgevf_read_dev(&hdev->hw,
+						  tqp_intr_reg_addr_list[i] +
+						  4 * j);
+		for (i = 0; i < separator_num; i++)
+			*reg++ = SEPARATOR_VALUE;
+	}
+}
+
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+					u8 *port_base_vlan_info, u8 data_size)
+{
+	struct hnae3_handle *nic = &hdev->nic;
+
+	rtnl_lock();
+	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	rtnl_unlock();
+
+	/* send msg to PF and wait update port based vlan info */
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+			     HCLGE_MBX_PORT_BASE_VLAN_CFG,
+			     port_base_vlan_info, data_size,
+			     false, NULL, 0);
+
+	if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+	else
+		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+	rtnl_lock();
+	hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+	rtnl_unlock();
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
+	.flr_prepare = hclgevf_flr_prepare,
+	.flr_done = hclgevf_flr_done,
 	.init_client_instance = hclgevf_init_client_instance,
 	.uninit_client_instance = hclgevf_uninit_client_instance,
 	.start = hclgevf_ae_start,
 	.stop = hclgevf_ae_stop,
+	.client_start = hclgevf_client_start,
+	.client_stop = hclgevf_client_stop,
 	.map_ring_to_vector = hclgevf_map_ring_to_vector,
 	.unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
 	.get_vector = hclgevf_get_vector,
 	.put_vector = hclgevf_put_vector,
 	.reset_queue = hclgevf_reset_tqp,
-	.set_promisc_mode = hclgevf_set_promisc_mode,
 	.get_mac_addr = hclgevf_get_mac_addr,
 	.set_mac_addr = hclgevf_set_mac_addr,
 	.add_uc_addr = hclgevf_add_uc_addr,
 	.rm_uc_addr = hclgevf_rm_uc_addr,
 	.add_mc_addr = hclgevf_add_mc_addr,
 	.rm_mc_addr = hclgevf_rm_mc_addr,
-	.update_mta_status = hclgevf_update_mta_status,
 	.get_stats = hclgevf_get_stats,
 	.update_stats = hclgevf_update_stats,
 	.get_strings = hclgevf_get_strings,
@@ -2017,15 +3128,30 @@
 	.get_rss_indir_size = hclgevf_get_rss_indir_size,
 	.get_rss = hclgevf_get_rss,
 	.set_rss = hclgevf_set_rss,
+	.get_rss_tuple = hclgevf_get_rss_tuple,
+	.set_rss_tuple = hclgevf_set_rss_tuple,
 	.get_tc_size = hclgevf_get_tc_size,
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
 	.enable_hw_strip_rxvtag = hclgevf_en_hw_strip_rxvtag,
 	.reset_event = hclgevf_reset_event,
+	.set_default_reset_request = hclgevf_set_def_reset_request,
+	.set_channels = hclgevf_set_channels,
 	.get_channels = hclgevf_get_channels,
 	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
+	.get_regs_len = hclgevf_get_regs_len,
+	.get_regs = hclgevf_get_regs,
 	.get_status = hclgevf_get_status,
 	.get_ksettings_an_result = hclgevf_get_ksettings_an_result,
+	.get_media_type = hclgevf_get_media_type,
+	.get_hw_reset_stat = hclgevf_get_hw_reset_stat,
+	.ae_dev_resetting = hclgevf_ae_dev_resetting,
+	.ae_dev_reset_cnt = hclgevf_ae_dev_reset_cnt,
+	.set_gro_en = hclgevf_gro_en,
+	.set_mtu = hclgevf_set_mtu,
+	.get_global_queue_id = hclgevf_get_qid_global,
+	.set_timer_task = hclgevf_set_timer_task,
+	.get_link_mode = hclgevf_get_link_mode,
 };
 
 static struct hnae3_ae_algo ae_algovf = {

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index b23ba17..2b8d6bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h

@@ -4,6 +4,7 @@
 #ifndef __HCLGEVF_MAIN_H
 #define __HCLGEVF_MAIN_H
 #include <linux/fs.h>
+#include <linux/if_vlan.h>
 #include <linux/types.h>
 #include "hclge_mbx.h"
 #include "hclgevf_cmd.h"
@@ -12,9 +13,12 @@
 #define HCLGEVF_MOD_VERSION "1.0"
 #define HCLGEVF_DRIVER_NAME "hclgevf"
 
+#define HCLGEVF_MAX_VLAN_ID	4095
 #define HCLGEVF_MISC_VECTOR_NUM		0
 
 #define HCLGEVF_INVALID_VPORT		0xffff
+#define HCLGEVF_GENERAL_TASK_INTERVAL	  5
+#define HCLGEVF_KEEP_ALIVE_TASK_INTERVAL  2
 
 /* This number in actual depends upon the total number of VFs
  * created by physical function. But the maximum number of
@@ -27,15 +31,82 @@
 #define HCLGEVF_VECTOR_REG_OFFSET	0x4
 #define HCLGEVF_VECTOR_VF_OFFSET		0x100000
 
+/* bar registers for cmdq */
+#define HCLGEVF_CMDQ_TX_ADDR_L_REG		0x27000
+#define HCLGEVF_CMDQ_TX_ADDR_H_REG		0x27004
+#define HCLGEVF_CMDQ_TX_DEPTH_REG		0x27008
+#define HCLGEVF_CMDQ_TX_TAIL_REG		0x27010
+#define HCLGEVF_CMDQ_TX_HEAD_REG		0x27014
+#define HCLGEVF_CMDQ_RX_ADDR_L_REG		0x27018
+#define HCLGEVF_CMDQ_RX_ADDR_H_REG		0x2701C
+#define HCLGEVF_CMDQ_RX_DEPTH_REG		0x27020
+#define HCLGEVF_CMDQ_RX_TAIL_REG		0x27024
+#define HCLGEVF_CMDQ_RX_HEAD_REG		0x27028
+#define HCLGEVF_CMDQ_INTR_SRC_REG		0x27100
+#define HCLGEVF_CMDQ_INTR_STS_REG		0x27104
+#define HCLGEVF_CMDQ_INTR_EN_REG		0x27108
+#define HCLGEVF_CMDQ_INTR_GEN_REG		0x2710C
+
+/* bar registers for common func */
+#define HCLGEVF_GRO_EN_REG			0x28000
+
+/* bar registers for rcb */
+#define HCLGEVF_RING_RX_ADDR_L_REG		0x80000
+#define HCLGEVF_RING_RX_ADDR_H_REG		0x80004
+#define HCLGEVF_RING_RX_BD_NUM_REG		0x80008
+#define HCLGEVF_RING_RX_BD_LENGTH_REG		0x8000C
+#define HCLGEVF_RING_RX_MERGE_EN_REG		0x80014
+#define HCLGEVF_RING_RX_TAIL_REG		0x80018
+#define HCLGEVF_RING_RX_HEAD_REG		0x8001C
+#define HCLGEVF_RING_RX_FBD_NUM_REG		0x80020
+#define HCLGEVF_RING_RX_OFFSET_REG		0x80024
+#define HCLGEVF_RING_RX_FBD_OFFSET_REG		0x80028
+#define HCLGEVF_RING_RX_STASH_REG		0x80030
+#define HCLGEVF_RING_RX_BD_ERR_REG		0x80034
+#define HCLGEVF_RING_TX_ADDR_L_REG		0x80040
+#define HCLGEVF_RING_TX_ADDR_H_REG		0x80044
+#define HCLGEVF_RING_TX_BD_NUM_REG		0x80048
+#define HCLGEVF_RING_TX_PRIORITY_REG		0x8004C
+#define HCLGEVF_RING_TX_TC_REG			0x80050
+#define HCLGEVF_RING_TX_MERGE_EN_REG		0x80054
+#define HCLGEVF_RING_TX_TAIL_REG		0x80058
+#define HCLGEVF_RING_TX_HEAD_REG		0x8005C
+#define HCLGEVF_RING_TX_FBD_NUM_REG		0x80060
+#define HCLGEVF_RING_TX_OFFSET_REG		0x80064
+#define HCLGEVF_RING_TX_EBD_NUM_REG		0x80068
+#define HCLGEVF_RING_TX_EBD_OFFSET_REG		0x80070
+#define HCLGEVF_RING_TX_BD_ERR_REG		0x80074
+#define HCLGEVF_RING_EN_REG			0x80090
+
+/* bar registers for tqp interrupt */
+#define HCLGEVF_TQP_INTR_CTRL_REG		0x20000
+#define HCLGEVF_TQP_INTR_GL0_REG		0x20100
+#define HCLGEVF_TQP_INTR_GL1_REG		0x20200
+#define HCLGEVF_TQP_INTR_GL2_REG		0x20300
+#define HCLGEVF_TQP_INTR_RL_REG			0x20900
+
 /* Vector0 interrupt CMDQ event source register(RW) */
 #define HCLGEVF_VECTOR0_CMDQ_SRC_REG	0x27100
+/* Vector0 interrupt CMDQ event status register(RO) */
+#define HCLGEVF_VECTOR0_CMDQ_STAT_REG	0x27104
 /* CMDQ register bits for RX event(=MBX event) */
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B	1
+/* RST register bits for RESET event */
+#define HCLGEVF_VECTOR0_RST_INT_B	2
 
 #define HCLGEVF_TQP_RESET_TRY_TIMES	10
 /* Reset related Registers */
-#define HCLGEVF_FUN_RST_ING		0x20C00
-#define HCLGEVF_FUN_RST_ING_B		0
+#define HCLGEVF_RST_ING			0x20C00
+#define HCLGEVF_FUN_RST_ING_BIT		BIT(0)
+#define HCLGEVF_GLOBAL_RST_ING_BIT	BIT(5)
+#define HCLGEVF_CORE_RST_ING_BIT	BIT(6)
+#define HCLGEVF_IMP_RST_ING_BIT		BIT(7)
+#define HCLGEVF_RST_ING_BITS \
+	(HCLGEVF_FUN_RST_ING_BIT | HCLGEVF_GLOBAL_RST_ING_BIT | \
+	 HCLGEVF_CORE_RST_ING_BIT | HCLGEVF_IMP_RST_ING_BIT)
+
+#define HCLGEVF_VF_RST_ING		0x07008
+#define HCLGEVF_VF_RST_ING_BIT		BIT(16)
 
 #define HCLGEVF_RSS_IND_TBL_SIZE		512
 #define HCLGEVF_RSS_SET_BITMAP_MSK	0xffff
@@ -46,30 +117,50 @@
 #define HCLGEVF_RSS_HASH_ALGO_MASK	0xf
 #define HCLGEVF_RSS_CFG_TBL_NUM \
 	(HCLGEVF_RSS_IND_TBL_SIZE / HCLGEVF_RSS_CFG_TBL_SIZE)
+#define HCLGEVF_RSS_INPUT_TUPLE_OTHER	GENMASK(3, 0)
+#define HCLGEVF_RSS_INPUT_TUPLE_SCTP	GENMASK(4, 0)
+#define HCLGEVF_D_PORT_BIT		BIT(0)
+#define HCLGEVF_S_PORT_BIT		BIT(1)
+#define HCLGEVF_D_IP_BIT		BIT(2)
+#define HCLGEVF_S_IP_BIT		BIT(3)
+#define HCLGEVF_V_TAG_BIT		BIT(4)
 
-#define HCLGEVF_MTA_TBL_SIZE		4096
-#define HCLGEVF_MTA_TYPE_SEL_MAX	4
+#define HCLGEVF_STATS_TIMER_INTERVAL	36U
+
+enum hclgevf_evt_cause {
+	HCLGEVF_VECTOR0_EVENT_RST,
+	HCLGEVF_VECTOR0_EVENT_MBX,
+	HCLGEVF_VECTOR0_EVENT_OTHER,
+};
 
 /* states of hclgevf device & tasks */
 enum hclgevf_states {
 	/* device states */
 	HCLGEVF_STATE_DOWN,
 	HCLGEVF_STATE_DISABLED,
+	HCLGEVF_STATE_IRQ_INITED,
+	HCLGEVF_STATE_REMOVING,
+	HCLGEVF_STATE_NIC_REGISTERED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_HANDLING,
 	HCLGEVF_STATE_MBX_SERVICE_SCHED,
 	HCLGEVF_STATE_MBX_HANDLING,
+	HCLGEVF_STATE_CMD_DISABLE,
 };
 
 #define HCLGEVF_MPF_ENBALE 1
 
 struct hclgevf_mac {
+	u8 media_type;
+	u8 module_type;
 	u8 mac_addr[ETH_ALEN];
 	int link;
 	u8 duplex;
 	u32 speed;
+	u64 supported;
+	u64 advertising;
 };
 
 struct hclgevf_hw {
@@ -108,12 +199,24 @@
 	u32 numa_node_map;
 };
 
+struct hclgevf_rss_tuple_cfg {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+};
+
 struct hclgevf_rss_cfg {
 	u8  rss_hash_key[HCLGEVF_RSS_KEY_SIZE]; /* user configured hash keys */
 	u32 hash_algo;
 	u32 rss_size;
 	u8 hw_tc_map;
 	u8  rss_indirection_tbl[HCLGEVF_RSS_IND_TBL_SIZE]; /* shadow table */
+	struct hclgevf_rss_tuple_cfg rss_tuple_sets;
 };
 
 struct hclgevf_misc_vector {
@@ -121,6 +224,16 @@
 	int vector_irq;
 };
 
+struct hclgevf_rst_stats {
+	u32 rst_cnt;			/* the number of reset */
+	u32 vf_func_rst_cnt;		/* the number of VF function reset */
+	u32 flr_rst_cnt;		/* the number of FLR */
+	u32 vf_rst_cnt;			/* the number of VF reset */
+	u32 rst_done_cnt;		/* the number of reset completed */
+	u32 hw_rst_done_cnt;		/* the number of HW reset completed */
+	u32 rst_fail_cnt;		/* the number of VF reset fail */
+};
+
 struct hclgevf_dev {
 	struct pci_dev *pdev;
 	struct hnae3_ae_dev *ae_dev;
@@ -128,10 +241,17 @@
 	struct hclgevf_misc_vector misc_vector;
 	struct hclgevf_rss_cfg rss_cfg;
 	unsigned long state;
+	unsigned long flr_state;
+	unsigned long default_reset_request;
+	unsigned long last_reset_time;
+	enum hnae3_reset_type reset_level;
+	unsigned long reset_pending;
+	enum hnae3_reset_type reset_type;
 
 #define HCLGEVF_RESET_REQUESTED		0
 #define HCLGEVF_RESET_PENDING		1
 	unsigned long reset_state;	/* requested, pending */
+	struct hclgevf_rst_stats rst_stats;
 	u32 reset_attempts;
 
 	u32 fw_version;
@@ -143,12 +263,14 @@
 	u16 num_alloc_vport;	/* num vports this driver supports */
 	u32 numa_node_mask;
 	u16 rx_buf_len;
-	u16 num_desc;
+	u16 num_tx_desc;	/* desc num of per tx queue */
+	u16 num_rx_desc;	/* desc num of per rx queue */
 	u8 hw_tc_map;
 
 	u16 num_msi;
 	u16 num_msi_left;
 	u16 num_msi_used;
+	u16 num_nic_msix;	/* Num of nic vectors for this VF */
 	u16 num_roce_msix;	/* Num of roce vectors for this VF */
 	u16 roce_base_msix_offset;
 	int roce_base_vector;
@@ -156,14 +278,16 @@
 	u16 *vector_status;
 	int *vector_irq;
 
-	bool accept_mta_mc; /* whether to accept mta filter multicast */
-	u8 mta_mac_sel_type;
+	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
+
 	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
 	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
 	struct timer_list service_timer;
+	struct timer_list keep_alive_timer;
 	struct work_struct service_task;
+	struct work_struct keep_alive_task;
 	struct work_struct rst_service_task;
 	struct work_struct mbx_service_task;
 
@@ -175,20 +299,12 @@
 	struct hnae3_client *nic_client;
 	struct hnae3_client *roce_client;
 	u32 flag;
+	u32 stats_timer;
 };
 
-static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
 {
-	return (hdev &&
-		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
-		(hdev->nic.reset_level == HNAE3_VF_RESET));
-}
-
-static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
-{
-	return (hdev &&
-		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
-		(hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+	return !!hdev->reset_pending;
 }
 
 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
@@ -202,4 +318,6 @@
 				 u8 duplex);
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
 void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+					u8 *port_base_vlan_info, u8 data_size);
 #endif

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index e9d5a4f..a108191 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c

@@ -26,7 +26,7 @@
 				u8 *resp_data, u16 resp_len)
 {
 #define HCLGEVF_MAX_TRY_TIMES	500
-#define HCLGEVF_SLEEP_USCOEND	1000
+#define HCLGEVF_SLEEP_USECOND	1000
 	struct hclgevf_mbx_resp_status *mbx_resp;
 	u16 r_code0, r_code1;
 	int i = 0;
@@ -40,14 +40,17 @@
 	}
 
 	while ((!hdev->mbx_resp.received_resp) && (i < HCLGEVF_MAX_TRY_TIMES)) {
-		udelay(HCLGEVF_SLEEP_USCOEND);
+		if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state))
+			return -EIO;
+
+		usleep_range(HCLGEVF_SLEEP_USECOND, HCLGEVF_SLEEP_USECOND * 2);
 		i++;
 	}
 
 	if (i >= HCLGEVF_MAX_TRY_TIMES) {
 		dev_err(&hdev->pdev->dev,
-			"VF could not get mbx resp(=%d) from PF in %d tries\n",
-			hdev->mbx_resp.received_resp, i);
+			"VF could not get mbx(%d,%d) resp(=%d) from PF in %d tries\n",
+			code0, code1, hdev->mbx_resp.received_resp, i);
 		return -EIO;
 	}
 
@@ -65,8 +68,11 @@
 
 	if (!(r_code0 == code0 && r_code1 == code1 && !mbx_resp->resp_status)) {
 		dev_err(&hdev->pdev->dev,
-			"VF could not match resp code(code0=%d,code1=%d), %d",
+			"VF could not match resp code(code0=%d,code1=%d), %d\n",
 			code0, code1, mbx_resp->resp_status);
+		dev_err(&hdev->pdev->dev,
+			"VF could not match resp r_code(r_code0=%d,r_code1=%d)\n",
+			r_code0, r_code1);
 		return -EIO;
 	}
 
@@ -92,9 +98,12 @@
 	}
 
 	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
+	req->mbx_need_resp |= need_resp ? HCLGE_MBX_NEED_RESP_BIT :
+					  ~HCLGE_MBX_NEED_RESP_BIT;
 	req->msg[0] = code;
 	req->msg[1] = subcode;
-	memcpy(&req->msg[2], msg_data, msg_len);
+	if (msg_data)
+		memcpy(&req->msg[2], msg_data, msg_len);
 
 	/* synchronous send */
 	if (need_resp) {
@@ -148,6 +157,11 @@
 	crq = &hdev->hw.cmq.crq;
 
 	while (!hclgevf_cmd_crq_empty(&hdev->hw)) {
+		if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+			dev_info(&hdev->pdev->dev, "vf crq need init\n");
+			return;
+		}
+
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
@@ -189,6 +203,8 @@
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
 		case HCLGE_MBX_ASSERTING_RESET:
+		case HCLGE_MBX_LINK_STAT_MODE:
+		case HCLGE_MBX_PUSH_VLAN_INFO:
 			/* set this mbx event as pending. This is required as we
 			 * might loose interrupt event when mbx task is busy
 			 * handling. This shall be cleared when mbx task just
@@ -199,7 +215,8 @@
 			/* we will drop the async msg if we find ARQ as full
 			 * and continue with next message
 			 */
-			if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+			if (atomic_read(&hdev->arq.count) >=
+			    HCLGE_MBX_MAX_ARQ_MSG_NUM) {
 				dev_warn(&hdev->pdev->dev,
 					 "Async Q full, dropping msg(%d)\n",
 					 req->msg[1]);
@@ -211,7 +228,7 @@
 			memcpy(&msg_q[0], req->msg,
 			       HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
 			hclge_mbx_tail_ptr_move_arq(hdev->arq);
-			hdev->arq.count++;
+			atomic_inc(&hdev->arq.count);
 
 			hclgevf_mbx_task_schedule(hdev);
 
@@ -233,11 +250,13 @@
 
 void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 {
-	u16 link_status;
-	u16 *msg_q;
+	enum hnae3_reset_type reset_type;
+	u16 link_status, state;
+	u16 *msg_q, *vlan_info;
 	u8 duplex;
 	u32 speed;
 	u32 tail;
+	u8 idx;
 
 	/* we can safely clear it now as we are at start of the async message
 	 * processing
@@ -248,30 +267,52 @@
 
 	/* process all the async queue messages */
 	while (tail != hdev->arq.head) {
+		if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+			dev_info(&hdev->pdev->dev,
+				 "vf crq need init in async\n");
+			return;
+		}
+
 		msg_q = hdev->arq.msg_q[hdev->arq.head];
 
 		switch (msg_q[0]) {
 		case HCLGE_MBX_LINK_STAT_CHANGE:
-			link_status = le16_to_cpu(msg_q[1]);
+			link_status = msg_q[1];
 			memcpy(&speed, &msg_q[2], sizeof(speed));
-			duplex = (u8)le16_to_cpu(msg_q[4]);
+			duplex = (u8)msg_q[4];
 
 			/* update upper layer with new link link status */
 			hclgevf_update_link_status(hdev, link_status);
 			hclgevf_update_speed_duplex(hdev, speed, duplex);
 
 			break;
+		case HCLGE_MBX_LINK_STAT_MODE:
+			idx = (u8)msg_q[1];
+			if (idx)
+				memcpy(&hdev->hw.mac.supported, &msg_q[2],
+				       sizeof(unsigned long));
+			else
+				memcpy(&hdev->hw.mac.advertising, &msg_q[2],
+				       sizeof(unsigned long));
+			break;
 		case HCLGE_MBX_ASSERTING_RESET:
 			/* PF has asserted reset hence VF should go in pending
 			 * state and poll for the hardware reset status till it
 			 * has been completely reset. After this stack should
 			 * eventually be re-initialized.
 			 */
-			hdev->nic.reset_level = HNAE3_VF_RESET;
+			reset_type = (enum hnae3_reset_type)msg_q[1];
+			set_bit(reset_type, &hdev->reset_pending);
 			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
 			hclgevf_reset_task_schedule(hdev);
 
 			break;
+		case HCLGE_MBX_PUSH_VLAN_INFO:
+			state = msg_q[1];
+			vlan_info = &msg_q[1];
+			hclgevf_update_port_base_vlan_info(hdev, state,
+							   (u8 *)vlan_info, 8);
+			break;
 		default:
 			dev_err(&hdev->pdev->dev,
 				"fetched unsupported(%d) message from arq\n",
@@ -280,7 +321,7 @@
 		}
 
 		hclge_mbx_head_ptr_move_arq(hdev->arq);
-		hdev->arq.count--;
+		atomic_dec(&hdev->arq.count);
 		msg_q = hdev->arq.msg_q[hdev->arq.head];
 	}
 }

diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 017e084..7df5d7d 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c

@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2014-2015 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/acpi.h>
@@ -39,7 +35,7 @@
 };
 
 struct hns_mdio_device {
-	void *vbase;		/* mdio reg base address */
+	u8 __iomem *vbase;		/* mdio reg base address */
 	struct regmap *subctrl_vbase;
 	struct hns_mdio_sc_reg sc_reg;
 };
@@ -96,21 +92,17 @@
 #define MDIO_SC_CLK_ST		0x531C
 #define MDIO_SC_RESET_ST	0x5A1C
 
-static void mdio_write_reg(void *base, u32 reg, u32 value)
+static void mdio_write_reg(u8 __iomem *base, u32 reg, u32 value)
 {
-	u8 __iomem *reg_addr = (u8 __iomem *)base;
-
-	writel_relaxed(value, reg_addr + reg);
+	writel_relaxed(value, base + reg);
 }
 
 #define MDIO_WRITE_REG(a, reg, value) \
 	mdio_write_reg((a)->vbase, (reg), (value))
 
-static u32 mdio_read_reg(void *base, u32 reg)
+static u32 mdio_read_reg(u8 __iomem *base, u32 reg)
 {
-	u8 __iomem *reg_addr = (u8 __iomem *)base;
-
-	return readl_relaxed(reg_addr + reg);
+	return readl_relaxed(base + reg);
 }
 
 #define mdio_set_field(origin, mask, shift, val) \
@@ -121,7 +113,7 @@
 
 #define mdio_get_field(origin, mask, shift) (((origin) >> (shift)) & (mask))
 
-static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
+static void mdio_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift,
 			       u32 val)
 {
 	u32 origin = mdio_read_reg(base, reg);
@@ -133,7 +125,7 @@
 #define MDIO_SET_REG_FIELD(dev, reg, mask, shift, val) \
 	mdio_set_reg_field((dev)->vbase, (reg), (mask), (shift), (val))
 
-static u32 mdio_get_reg_field(void *base, u32 reg, u32 mask, u32 shift)
+static u32 mdio_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift)
 {
 	u32 origin;
 
@@ -156,11 +148,15 @@
 {
 	u32 time_cnt;
 	u32 reg_value;
+	int ret;
 
 	regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val);
 
 	for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) {
-		regmap_read(mdio_dev->subctrl_vbase, st_reg, &reg_value);
+		ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, &reg_value);
+		if (ret)
+			return ret;
+
 		reg_value &= st_msk;
 		if ((!!check_st) == (!!reg_value))
 			break;
@@ -321,7 +317,7 @@
 		}
 
 		hns_mdio_cmd_write(mdio_dev, is_c45,
-				   MDIO_C45_WRITE_ADDR, phy_id, devad);
+				   MDIO_C45_READ, phy_id, devad);
 	}
 
 	/* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/
@@ -425,7 +421,6 @@
 {
 	struct hns_mdio_device *mdio_dev;
 	struct mii_bus *new_bus;
-	struct resource *res;
 	int ret = -ENODEV;
 
 	if (!pdev) {
@@ -450,8 +445,7 @@
 	new_bus->priv = mdio_dev;
 	new_bus->parent = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mdio_dev->vbase = devm_ioremap_resource(&pdev->dev, res);
+	mdio_dev->vbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mdio_dev->vbase)) {
 		ret = PTR_ERR(mdio_dev->vbase);
 		return ret;

diff --git a/drivers/net/ethernet/hp/Kconfig b/drivers/net/ethernet/hp/Kconfig
index d4df78c..fb395cf 100644
--- a/drivers/net/ethernet/hp/Kconfig
+++ b/drivers/net/ethernet/hp/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # HP network device configuration
 #

diff --git a/drivers/net/ethernet/hp/Makefile b/drivers/net/ethernet/hp/Makefile
index 20b6918..5ed723b 100644
--- a/drivers/net/ethernet/hp/Makefile
+++ b/drivers/net/ethernet/hp/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the HP network device drivers.
 #

diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 9b5a68b..6ec78f5 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
 ** hp100.c
 ** HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters
@@ -31,19 +32,6 @@
 **       -  some updates for EISA version of card
 **
 **
-**   This code is free software; you can redistribute it and/or modify
-**   it under the terms of the GNU General Public License as published by
-**   the Free Software Foundation; either version 2 of the License, or
-**   (at your option) any later version.
-**
-**   This code is distributed in the hope that it will be useful,
-**   but WITHOUT ANY WARRANTY; without even the implied warranty of
-**   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-**   GNU General Public License for more details.
-**
-**   You should have received a copy of the GNU General Public License
-**   along with this program; if not, write to the Free Software
-**   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 **
 ** 1.57c -> 1.58
 **   - used indent to change coding-style

diff --git a/drivers/net/ethernet/hp/hp100.h b/drivers/net/ethernet/hp/hp100.h
index b60e96f..7239b94 100644
--- a/drivers/net/ethernet/hp/hp100.h
+++ b/drivers/net/ethernet/hp/hp100.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * hp100.h: Hewlett Packard HP10/100VG ANY LAN ethernet driver for Linux.
  *
@@ -7,11 +8,6 @@
  *           Siegfried Loeffler <floeff@tunix.mathematik.uni-stuttgart.de>
  *
  * This driver is based on the 'hpfepkt' crynwr packet driver.
- *
- * This source/code is public free; you can distribute it and/or modify
- * it under terms of the GNU General Public License (published by the
- * Free Software Foundation) either version two of this License, or any
- * later version.
  */
 
 /****************************************************************************

diff --git a/drivers/net/ethernet/huawei/Kconfig b/drivers/net/ethernet/huawei/Kconfig
index c1a95ae..bdcbfac 100644
--- a/drivers/net/ethernet/huawei/Kconfig
+++ b/drivers/net/ethernet/huawei/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Huawei driver configuration
 #

diff --git a/drivers/net/ethernet/huawei/Makefile b/drivers/net/ethernet/huawei/Makefile
index 5c37cc8..2549ad5 100644
--- a/drivers/net/ethernet/huawei/Makefile
+++ b/drivers/net/ethernet/huawei/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Huawei device drivers.
 #

diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig
index e4e8b24..cabc2f7 100644
--- a/drivers/net/ethernet/huawei/hinic/Kconfig
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Huawei driver configuration
 #

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index 289ce88..fe88ab8 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile

@@ -1,6 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_HINIC) += hinic.o
 
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
 	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
 	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
-	   hinic_common.o
+	   hinic_common.o hinic_ethtool.o

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.c b/drivers/net/ethernet/huawei/hinic/hinic_common.c
index 02c74fd..8e9b4a6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_common.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.h b/drivers/net/ethernet/huawei/hinic/hinic_common.h
index 2c06b76..a0de9d9 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_common.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_COMMON_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index 5186cc9..a209b14 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_DEV_H
@@ -31,6 +22,7 @@
 enum hinic_flags {
 	HINIC_LINK_UP = BIT(0),
 	HINIC_INTF_UP = BIT(1),
+	HINIC_RSS_ENABLE = BIT(2),
 };
 
 struct hinic_rx_mode_work {
@@ -38,6 +30,23 @@
 	u32                     rx_mode;
 };
 
+struct hinic_rss_type {
+	u8 tcp_ipv6_ext;
+	u8 ipv6_ext;
+	u8 tcp_ipv6;
+	u8 ipv6;
+	u8 tcp_ipv4;
+	u8 ipv4;
+	u8 udp_ipv6;
+	u8 udp_ipv4;
+};
+
+enum hinic_rss_hash_type {
+	HINIC_RSS_HASH_ENGINE_TYPE_XOR,
+	HINIC_RSS_HASH_ENGINE_TYPE_TOEP,
+	HINIC_RSS_HASH_ENGINE_TYPE_MAX,
+};
+
 struct hinic_dev {
 	struct net_device               *netdev;
 	struct hinic_hwdev              *hwdev;
@@ -45,6 +54,8 @@
 	u32                             msg_enable;
 	unsigned int                    tx_weight;
 	unsigned int                    rx_weight;
+	u16				num_qps;
+	u16				max_qps;
 
 	unsigned int                    flags;
 
@@ -59,6 +70,14 @@
 
 	struct hinic_txq_stats          tx_stats;
 	struct hinic_rxq_stats          rx_stats;
+
+	u8				rss_tmpl_idx;
+	u8				rss_hash_engine;
+	u16				num_rss;
+	u16				rss_limit;
+	struct hinic_rss_type		rss_type;
+	u8				*rss_hkey_user;
+	s32				*rss_indir_user;
 };
 
 #endif

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
new file mode 100644
index 0000000..60ec48f
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c

@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+
+#include "hinic_hw_qp.h"
+#include "hinic_hw_dev.h"
+#include "hinic_port.h"
+#include "hinic_tx.h"
+#include "hinic_rx.h"
+#include "hinic_dev.h"
+
+static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
+			   enum hinic_speed speed)
+{
+	switch (speed) {
+	case HINIC_SPEED_10MB_LINK:
+		link_ksettings->base.speed = SPEED_10;
+		break;
+
+	case HINIC_SPEED_100MB_LINK:
+		link_ksettings->base.speed = SPEED_100;
+		break;
+
+	case HINIC_SPEED_1000MB_LINK:
+		link_ksettings->base.speed = SPEED_1000;
+		break;
+
+	case HINIC_SPEED_10GB_LINK:
+		link_ksettings->base.speed = SPEED_10000;
+		break;
+
+	case HINIC_SPEED_25GB_LINK:
+		link_ksettings->base.speed = SPEED_25000;
+		break;
+
+	case HINIC_SPEED_40GB_LINK:
+		link_ksettings->base.speed = SPEED_40000;
+		break;
+
+	case HINIC_SPEED_100GB_LINK:
+		link_ksettings->base.speed = SPEED_100000;
+		break;
+
+	default:
+		link_ksettings->base.speed = SPEED_UNKNOWN;
+		break;
+	}
+}
+
+static int hinic_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings
+				    *link_ksettings)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	enum hinic_port_link_state link_state;
+	struct hinic_port_cap port_cap;
+	int err;
+
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+					     Autoneg);
+
+	link_ksettings->base.speed = SPEED_UNKNOWN;
+	link_ksettings->base.autoneg = AUTONEG_DISABLE;
+	link_ksettings->base.duplex = DUPLEX_UNKNOWN;
+
+	err = hinic_port_get_cap(nic_dev, &port_cap);
+	if (err)
+		return err;
+
+	err = hinic_port_link_state(nic_dev, &link_state);
+	if (err)
+		return err;
+
+	if (link_state != HINIC_LINK_STATE_UP)
+		return err;
+
+	set_link_speed(link_ksettings, port_cap.speed);
+
+	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
+
+	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
+		link_ksettings->base.autoneg = AUTONEG_ENABLE;
+
+	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
+					   DUPLEX_FULL : DUPLEX_HALF;
+	return 0;
+}
+
+static void hinic_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *info)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u8 mgmt_ver[HINIC_MGMT_VERSION_MAX_LEN] = {0};
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	int err;
+
+	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
+
+	err = hinic_get_mgmt_version(nic_dev, mgmt_ver);
+	if (err)
+		return;
+
+	snprintf(info->fw_version, sizeof(info->fw_version), "%s", mgmt_ver);
+}
+
+static void hinic_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	ring->rx_max_pending = HINIC_RQ_DEPTH;
+	ring->tx_max_pending = HINIC_SQ_DEPTH;
+	ring->rx_pending = HINIC_RQ_DEPTH;
+	ring->tx_pending = HINIC_SQ_DEPTH;
+}
+
+static void hinic_get_channels(struct net_device *netdev,
+			       struct ethtool_channels *channels)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+
+	channels->max_rx = hwdev->nic_cap.max_qps;
+	channels->max_tx = hwdev->nic_cap.max_qps;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = hinic_hwdev_num_qps(hwdev);
+	channels->tx_count = hinic_hwdev_num_qps(hwdev);
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
+static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct hinic_rss_type rss_type = { 0 };
+	int err;
+
+	cmd->data = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return 0;
+
+	err = hinic_get_rss_type(nic_dev, nic_dev->rss_tmpl_idx,
+				 &rss_type);
+	if (err)
+		return err;
+
+	cmd->data = RXH_IP_SRC | RXH_IP_DST;
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		if (rss_type.tcp_ipv4)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case TCP_V6_FLOW:
+		if (rss_type.tcp_ipv6)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V4_FLOW:
+		if (rss_type.udp_ipv4)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V6_FLOW:
+		if (rss_type.udp_ipv6)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		break;
+	default:
+		cmd->data = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_l4_rss_hash_ops(struct ethtool_rxnfc *cmd,
+			       struct hinic_rss_type *rss_type)
+{
+	u8 rss_l4_en = 0;
+
+	switch (cmd->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+	case 0:
+		rss_l4_en = 0;
+		break;
+	case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+		rss_l4_en = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		rss_type->tcp_ipv4 = rss_l4_en;
+		break;
+	case TCP_V6_FLOW:
+		rss_type->tcp_ipv6 = rss_l4_en;
+		break;
+	case UDP_V4_FLOW:
+		rss_type->udp_ipv4 = rss_l4_en;
+		break;
+	case UDP_V6_FLOW:
+		rss_type->udp_ipv6 = rss_l4_en;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rss_hash_opts(struct hinic_dev *nic_dev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct hinic_rss_type *rss_type = &nic_dev->rss_type;
+	int err;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE)) {
+		cmd->data = 0;
+		return -EOPNOTSUPP;
+	}
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (cmd->data & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 |
+		RXH_L4_B_2_3))
+		return -EINVAL;
+
+	/* We need at least the IP SRC and DEST fields for hashing */
+	if (!(cmd->data & RXH_IP_SRC) || !(cmd->data & RXH_IP_DST))
+		return -EINVAL;
+
+	err = hinic_get_rss_type(nic_dev,
+				 nic_dev->rss_tmpl_idx, rss_type);
+	if (err)
+		return -EFAULT;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		err = set_l4_rss_hash_ops(cmd, rss_type);
+		if (err)
+			return err;
+		break;
+	case IPV4_FLOW:
+		rss_type->ipv4 = 1;
+		break;
+	case IPV6_FLOW:
+		rss_type->ipv6 = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = hinic_set_rss_type(nic_dev, nic_dev->rss_tmpl_idx,
+				 *rss_type);
+	if (err)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int __set_rss_rxfh(struct net_device *netdev,
+			  const u32 *indir, const u8 *key)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+
+	if (indir) {
+		if (!nic_dev->rss_indir_user) {
+			nic_dev->rss_indir_user =
+				kzalloc(sizeof(u32) * HINIC_RSS_INDIR_SIZE,
+					GFP_KERNEL);
+			if (!nic_dev->rss_indir_user)
+				return -ENOMEM;
+		}
+
+		memcpy(nic_dev->rss_indir_user, indir,
+		       sizeof(u32) * HINIC_RSS_INDIR_SIZE);
+
+		err = hinic_rss_set_indir_tbl(nic_dev,
+					      nic_dev->rss_tmpl_idx, indir);
+		if (err)
+			return -EFAULT;
+	}
+
+	if (key) {
+		if (!nic_dev->rss_hkey_user) {
+			nic_dev->rss_hkey_user =
+				kzalloc(HINIC_RSS_KEY_SIZE * 2, GFP_KERNEL);
+
+			if (!nic_dev->rss_hkey_user)
+				return -ENOMEM;
+		}
+
+		memcpy(nic_dev->rss_hkey_user, key, HINIC_RSS_KEY_SIZE);
+
+		err = hinic_rss_set_template_tbl(nic_dev,
+						 nic_dev->rss_tmpl_idx, key);
+		if (err)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int hinic_get_rxnfc(struct net_device *netdev,
+			   struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = nic_dev->num_qps;
+		break;
+	case ETHTOOL_GRXFH:
+		err = hinic_get_rss_hash_opts(nic_dev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int hinic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		err = hinic_set_rss_hash_opts(nic_dev, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int hinic_get_rxfh(struct net_device *netdev,
+			  u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u8 hash_engine_type = 0;
+	int err = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return -EOPNOTSUPP;
+
+	if (hfunc) {
+		err = hinic_rss_get_hash_engine(nic_dev,
+						nic_dev->rss_tmpl_idx,
+						&hash_engine_type);
+		if (err)
+			return -EFAULT;
+
+		*hfunc = hash_engine_type ? ETH_RSS_HASH_TOP : ETH_RSS_HASH_XOR;
+	}
+
+	if (indir) {
+		err = hinic_rss_get_indir_tbl(nic_dev,
+					      nic_dev->rss_tmpl_idx, indir);
+		if (err)
+			return -EFAULT;
+	}
+
+	if (key)
+		err = hinic_rss_get_template_tbl(nic_dev,
+						 nic_dev->rss_tmpl_idx, key);
+
+	return err;
+}
+
+static int hinic_set_rxfh(struct net_device *netdev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err = 0;
+
+	if (!(nic_dev->flags & HINIC_RSS_ENABLE))
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE) {
+		if (hfunc != ETH_RSS_HASH_TOP && hfunc != ETH_RSS_HASH_XOR)
+			return -EOPNOTSUPP;
+
+		nic_dev->rss_hash_engine = (hfunc == ETH_RSS_HASH_XOR) ?
+			HINIC_RSS_HASH_ENGINE_TYPE_XOR :
+			HINIC_RSS_HASH_ENGINE_TYPE_TOEP;
+		err = hinic_rss_set_hash_engine
+			(nic_dev, nic_dev->rss_tmpl_idx,
+			nic_dev->rss_hash_engine);
+		if (err)
+			return -EFAULT;
+	}
+
+	err = __set_rss_rxfh(netdev, indir, key);
+
+	return err;
+}
+
+static u32 hinic_get_rxfh_key_size(struct net_device *netdev)
+{
+	return HINIC_RSS_KEY_SIZE;
+}
+
+static u32 hinic_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return HINIC_RSS_INDIR_SIZE;
+}
+
+#define ARRAY_LEN(arr) ((int)((int)sizeof(arr) / (int)sizeof(arr[0])))
+
+#define HINIC_FUNC_STAT(_stat_item) {	\
+	.name = #_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_vport_stats, _stat_item), \
+	.offset = offsetof(struct hinic_vport_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_function_stats[] = {
+	HINIC_FUNC_STAT(tx_unicast_pkts_vport),
+	HINIC_FUNC_STAT(tx_unicast_bytes_vport),
+	HINIC_FUNC_STAT(tx_multicast_pkts_vport),
+	HINIC_FUNC_STAT(tx_multicast_bytes_vport),
+	HINIC_FUNC_STAT(tx_broadcast_pkts_vport),
+	HINIC_FUNC_STAT(tx_broadcast_bytes_vport),
+
+	HINIC_FUNC_STAT(rx_unicast_pkts_vport),
+	HINIC_FUNC_STAT(rx_unicast_bytes_vport),
+	HINIC_FUNC_STAT(rx_multicast_pkts_vport),
+	HINIC_FUNC_STAT(rx_multicast_bytes_vport),
+	HINIC_FUNC_STAT(rx_broadcast_pkts_vport),
+	HINIC_FUNC_STAT(rx_broadcast_bytes_vport),
+
+	HINIC_FUNC_STAT(tx_discard_vport),
+	HINIC_FUNC_STAT(rx_discard_vport),
+	HINIC_FUNC_STAT(tx_err_vport),
+	HINIC_FUNC_STAT(rx_err_vport),
+};
+
+#define HINIC_PORT_STAT(_stat_item) { \
+	.name = #_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_phy_port_stats, _stat_item), \
+	.offset = offsetof(struct hinic_phy_port_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_port_stats[] = {
+	HINIC_PORT_STAT(mac_rx_total_pkt_num),
+	HINIC_PORT_STAT(mac_rx_total_oct_num),
+	HINIC_PORT_STAT(mac_rx_bad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_bad_oct_num),
+	HINIC_PORT_STAT(mac_rx_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_good_oct_num),
+	HINIC_PORT_STAT(mac_rx_uni_pkt_num),
+	HINIC_PORT_STAT(mac_rx_multi_pkt_num),
+	HINIC_PORT_STAT(mac_rx_broad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_total_pkt_num),
+	HINIC_PORT_STAT(mac_tx_total_oct_num),
+	HINIC_PORT_STAT(mac_tx_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_bad_oct_num),
+	HINIC_PORT_STAT(mac_tx_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_good_oct_num),
+	HINIC_PORT_STAT(mac_tx_uni_pkt_num),
+	HINIC_PORT_STAT(mac_tx_multi_pkt_num),
+	HINIC_PORT_STAT(mac_tx_broad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_fragment_pkt_num),
+	HINIC_PORT_STAT(mac_rx_undersize_pkt_num),
+	HINIC_PORT_STAT(mac_rx_undermin_pkt_num),
+	HINIC_PORT_STAT(mac_rx_64_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_65_127_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_128_255_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_256_511_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_512_1023_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1024_1518_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_2047_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_2048_4095_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_4096_8191_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_8192_9216_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_9217_12287_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_12288_16383_oct_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_max_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_1519_max_bad_pkt_num),
+	HINIC_PORT_STAT(mac_rx_oversize_pkt_num),
+	HINIC_PORT_STAT(mac_rx_jabber_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pause_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri0_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri1_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri2_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri3_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri4_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri5_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri6_pkt_num),
+	HINIC_PORT_STAT(mac_rx_pfc_pri7_pkt_num),
+	HINIC_PORT_STAT(mac_rx_control_pkt_num),
+	HINIC_PORT_STAT(mac_rx_sym_err_pkt_num),
+	HINIC_PORT_STAT(mac_rx_fcs_err_pkt_num),
+	HINIC_PORT_STAT(mac_rx_send_app_good_pkt_num),
+	HINIC_PORT_STAT(mac_rx_send_app_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_fragment_pkt_num),
+	HINIC_PORT_STAT(mac_tx_undersize_pkt_num),
+	HINIC_PORT_STAT(mac_tx_undermin_pkt_num),
+	HINIC_PORT_STAT(mac_tx_64_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_65_127_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_128_255_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_256_511_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_512_1023_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1024_1518_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_2047_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_2048_4095_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_4096_8191_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_8192_9216_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_9217_12287_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_12288_16383_oct_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_max_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_1519_max_bad_pkt_num),
+	HINIC_PORT_STAT(mac_tx_oversize_pkt_num),
+	HINIC_PORT_STAT(mac_tx_jabber_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pause_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri0_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri1_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri2_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri3_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri4_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri5_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri6_pkt_num),
+	HINIC_PORT_STAT(mac_tx_pfc_pri7_pkt_num),
+	HINIC_PORT_STAT(mac_tx_control_pkt_num),
+	HINIC_PORT_STAT(mac_tx_err_all_pkt_num),
+	HINIC_PORT_STAT(mac_tx_from_app_good_pkt_num),
+	HINIC_PORT_STAT(mac_tx_from_app_bad_pkt_num),
+};
+
+#define HINIC_TXQ_STAT(_stat_item) { \
+	.name = "txq%d_"#_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_txq_stats, _stat_item), \
+	.offset = offsetof(struct hinic_txq_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_tx_queue_stats[] = {
+	HINIC_TXQ_STAT(pkts),
+	HINIC_TXQ_STAT(bytes),
+	HINIC_TXQ_STAT(tx_busy),
+	HINIC_TXQ_STAT(tx_wake),
+	HINIC_TXQ_STAT(tx_dropped),
+	HINIC_TXQ_STAT(big_frags_pkts),
+};
+
+#define HINIC_RXQ_STAT(_stat_item) { \
+	.name = "rxq%d_"#_stat_item, \
+	.size = FIELD_SIZEOF(struct hinic_rxq_stats, _stat_item), \
+	.offset = offsetof(struct hinic_rxq_stats, _stat_item) \
+}
+
+static struct hinic_stats hinic_rx_queue_stats[] = {
+	HINIC_RXQ_STAT(pkts),
+	HINIC_RXQ_STAT(bytes),
+	HINIC_RXQ_STAT(errors),
+	HINIC_RXQ_STAT(csum_errors),
+	HINIC_RXQ_STAT(other_errors),
+};
+
+static void get_drv_queue_stats(struct hinic_dev *nic_dev, u64 *data)
+{
+	struct hinic_txq_stats txq_stats;
+	struct hinic_rxq_stats rxq_stats;
+	u16 i = 0, j = 0, qid = 0;
+	char *p;
+
+	for (qid = 0; qid < nic_dev->num_qps; qid++) {
+		if (!nic_dev->txqs)
+			break;
+
+		hinic_txq_get_stats(&nic_dev->txqs[qid], &txq_stats);
+		for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++, i++) {
+			p = (char *)&txq_stats +
+				hinic_tx_queue_stats[j].offset;
+			data[i] = (hinic_tx_queue_stats[j].size ==
+					sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+		}
+	}
+
+	for (qid = 0; qid < nic_dev->num_qps; qid++) {
+		if (!nic_dev->rxqs)
+			break;
+
+		hinic_rxq_get_stats(&nic_dev->rxqs[qid], &rxq_stats);
+		for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++, i++) {
+			p = (char *)&rxq_stats +
+				hinic_rx_queue_stats[j].offset;
+			data[i] = (hinic_rx_queue_stats[j].size ==
+					sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+		}
+	}
+}
+
+static void hinic_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_vport_stats vport_stats = {0};
+	struct hinic_phy_port_stats *port_stats;
+	u16 i = 0, j = 0;
+	char *p;
+	int err;
+
+	err = hinic_get_vport_stats(nic_dev, &vport_stats);
+	if (err)
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get vport stats from firmware\n");
+
+	for (j = 0; j < ARRAY_LEN(hinic_function_stats); j++, i++) {
+		p = (char *)&vport_stats + hinic_function_stats[j].offset;
+		data[i] = (hinic_function_stats[j].size ==
+				sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL);
+	if (!port_stats) {
+		memset(&data[i], 0,
+		       ARRAY_LEN(hinic_port_stats) * sizeof(*data));
+		i += ARRAY_LEN(hinic_port_stats);
+		goto get_drv_stats;
+	}
+
+	err = hinic_get_phy_port_stats(nic_dev, port_stats);
+	if (err)
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get port stats from firmware\n");
+
+	for (j = 0; j < ARRAY_LEN(hinic_port_stats); j++, i++) {
+		p = (char *)port_stats + hinic_port_stats[j].offset;
+		data[i] = (hinic_port_stats[j].size ==
+				sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	kfree(port_stats);
+
+get_drv_stats:
+	get_drv_queue_stats(nic_dev, data + i);
+}
+
+static int hinic_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int count, q_num;
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		q_num = nic_dev->num_qps;
+		count = ARRAY_LEN(hinic_function_stats) +
+			(ARRAY_LEN(hinic_tx_queue_stats) +
+			ARRAY_LEN(hinic_rx_queue_stats)) * q_num;
+
+		count += ARRAY_LEN(hinic_port_stats);
+
+		return count;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void hinic_get_strings(struct net_device *netdev,
+			      u32 stringset, u8 *data)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	char *p = (char *)data;
+	u16 i, j;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_LEN(hinic_function_stats); i++) {
+			memcpy(p, hinic_function_stats[i].name,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < ARRAY_LEN(hinic_port_stats); i++) {
+			memcpy(p, hinic_port_stats[i].name,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < nic_dev->num_qps; i++) {
+			for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++) {
+				sprintf(p, hinic_tx_queue_stats[j].name, i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
+		for (i = 0; i < nic_dev->num_qps; i++) {
+			for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++) {
+				sprintf(p, hinic_rx_queue_stats[j].name, i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
+		return;
+	default:
+		return;
+	}
+}
+
+static const struct ethtool_ops hinic_ethtool_ops = {
+	.get_link_ksettings = hinic_get_link_ksettings,
+	.get_drvinfo = hinic_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = hinic_get_ringparam,
+	.get_channels = hinic_get_channels,
+	.get_rxnfc = hinic_get_rxnfc,
+	.set_rxnfc = hinic_set_rxnfc,
+	.get_rxfh_key_size = hinic_get_rxfh_key_size,
+	.get_rxfh_indir_size = hinic_get_rxfh_indir_size,
+	.get_rxfh = hinic_get_rxfh,
+	.set_rxfh = hinic_set_rxfh,
+	.get_sset_count = hinic_get_sset_count,
+	.get_ethtool_stats = hinic_get_ethtool_stats,
+	.get_strings = hinic_get_strings,
+};
+
+void hinic_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &hinic_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index c40603a..583fd24 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -613,8 +604,8 @@
 	u8 *cmd_vaddr;
 	int err = 0;
 
-	cmd_vaddr = dma_zalloc_coherent(&pdev->dev, API_CMD_BUF_SIZE,
-					&cmd_paddr, GFP_KERNEL);
+	cmd_vaddr = dma_alloc_coherent(&pdev->dev, API_CMD_BUF_SIZE,
+				       &cmd_paddr, GFP_KERNEL);
 	if (!cmd_vaddr) {
 		dev_err(&pdev->dev, "Failed to allocate API CMD DMA memory\n");
 		return -ENOMEM;
@@ -663,8 +654,8 @@
 	dma_addr_t node_paddr;
 	int err;
 
-	node = dma_zalloc_coherent(&pdev->dev, chain->cell_size,
-				   &node_paddr, GFP_KERNEL);
+	node = dma_alloc_coherent(&pdev->dev, chain->cell_size, &node_paddr,
+				  GFP_KERNEL);
 	if (!node) {
 		dev_err(&pdev->dev, "Failed to allocate dma API CMD cell\n");
 		return -ENOMEM;
@@ -821,10 +812,10 @@
 	if (!chain->cell_ctxt)
 		return -ENOMEM;
 
-	chain->wb_status = dma_zalloc_coherent(&pdev->dev,
-					       sizeof(*chain->wb_status),
-					       &chain->wb_status_paddr,
-					       GFP_KERNEL);
+	chain->wb_status = dma_alloc_coherent(&pdev->dev,
+					      sizeof(*chain->wb_status),
+					      &chain->wb_status_paddr,
+					      GFP_KERNEL);
 	if (!chain->wb_status) {
 		dev_err(&pdev->dev, "Failed to allocate DMA wb status\n");
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
index 31b94d5..0ba00fd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_API_CMD_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 4d09ea7..eb53c15 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
index 23f8d39..7a434b6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_CMDQ_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index f39b184..cdec1d0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_CSR_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 6b19607..6f2cf56 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -98,9 +89,6 @@
 	if (nic_cap->num_qps > HINIC_Q_CTXT_MAX)
 		nic_cap->num_qps = HINIC_Q_CTXT_MAX;
 
-	/* num_qps must be power of 2 */
-	nic_cap->num_qps = BIT(fls(nic_cap->num_qps) - 1);
-
 	nic_cap->max_qps = dev_cap->max_sqs + 1;
 	if (nic_cap->max_qps != (dev_cap->max_rqs + 1))
 		return -EFAULT;
@@ -313,6 +301,8 @@
 	hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT;
 	hw_ioctxt.cmdq_depth = 0;
 
+	hw_ioctxt.lro_en = 1;
+
 	hw_ioctxt.rq_depth  = ilog2(rq_depth);
 
 	hw_ioctxt.rx_buf_sz_idx = HINIC_RX_BUF_SZ_IDX;
@@ -881,6 +871,13 @@
 	hinic_free_hwif(hwdev->hwif);
 }
 
+int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+
+	return nic_cap->max_qps;
+}
+
 /**
  * hinic_hwdev_num_qps - return the number QPs available for use
  * @hwdev: the NIC HW device
@@ -1008,3 +1005,16 @@
 				 &hw_ci, sizeof(hw_ci), NULL,
 				 NULL, HINIC_MGMT_MSG_SYNC);
 }
+
+/**
+ * hinic_hwdev_set_msix_state- set msix state
+ * @hwdev: the NIC HW device
+ * @msix_index: IRQ corresponding index number
+ * @flag: msix state
+ *
+ **/
+void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index,
+				enum hinic_msix_state flag)
+{
+	hinic_set_msix_state(hwdev->hwif, msix_index, flag);
+}

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 0f5563f..b069045 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_DEV_H
@@ -50,17 +41,73 @@
 
 	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
 
+	HINIC_PORT_CMD_SET_LRO		= 25,
+
+	HINIC_PORT_CMD_SET_RX_CSUM	= 26,
+
+	HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27,
+
+	HINIC_PORT_CMD_GET_PORT_STATISTICS = 28,
+
+	HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29,
+
+	HINIC_PORT_CMD_GET_VPORT_STAT	= 30,
+
+	HINIC_PORT_CMD_CLEAN_VPORT_STAT	= 31,
+
+	HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL = 37,
+
 	HINIC_PORT_CMD_SET_PORT_STATE   = 41,
 
+	HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL = 43,
+
+	HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL = 44,
+
+	HINIC_PORT_CMD_SET_RSS_HASH_ENGINE = 45,
+
+	HINIC_PORT_CMD_GET_RSS_HASH_ENGINE = 46,
+
+	HINIC_PORT_CMD_GET_RSS_CTX_TBL  = 47,
+
+	HINIC_PORT_CMD_SET_RSS_CTX_TBL  = 48,
+
+	HINIC_PORT_CMD_RSS_TEMP_MGR	= 49,
+
+	HINIC_PORT_CMD_RSS_CFG		= 66,
+
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
 
+	HINIC_PORT_CMD_GET_MGMT_VERSION = 88,
+
 	HINIC_PORT_CMD_SET_FUNC_STATE   = 93,
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
+	HINIC_PORT_CMD_SET_TSO          = 112,
+
+	HINIC_PORT_CMD_SET_RQ_IQ_MAP	= 115,
+
 	HINIC_PORT_CMD_GET_CAP          = 170,
+
+	HINIC_PORT_CMD_SET_LRO_TIMER	= 244,
 };
 
+enum hinic_ucode_cmd {
+	HINIC_UCODE_CMD_MODIFY_QUEUE_CONTEXT    = 0,
+	HINIC_UCODE_CMD_CLEAN_QUEUE_CONTEXT,
+	HINIC_UCODE_CMD_ARM_SQ,
+	HINIC_UCODE_CMD_ARM_RQ,
+	HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+	HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE,
+	HINIC_UCODE_CMD_GET_RSS_INDIR_TABLE,
+	HINIC_UCODE_CMD_GET_RSS_CONTEXT_TABLE,
+	HINIC_UCODE_CMD_SET_IQ_ENABLE,
+	HINIC_UCODE_CMD_SET_RQ_FLUSH            = 10
+};
+
+#define NIC_RSS_CMD_TEMP_ALLOC  0x01
+#define NIC_RSS_CMD_TEMP_FREE   0x02
+
 enum hinic_mgmt_msg_cmd {
 	HINIC_MGMT_MSG_CMD_BASE         = 160,
 
@@ -102,7 +149,7 @@
 	u8      set_cmdq_depth;
 	u8      cmdq_depth;
 
-	u8      rsvd2;
+	u8      lro_en;
 	u8      rsvd3;
 	u8      rsvd4;
 	u8      rsvd5;
@@ -220,6 +267,8 @@
 
 void hinic_free_hwdev(struct hinic_hwdev *hwdev);
 
+int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev);
+
 int hinic_hwdev_num_qps(struct hinic_hwdev *hwdev);
 
 struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i);
@@ -236,4 +285,7 @@
 int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
 			       u8 pending_limit, u8 coalesc_timer);
 
+void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index,
+				enum hinic_msix_state flag);
+
 #endif

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index 7cb8b9b..79243b6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -593,10 +584,10 @@
 	}
 
 	for (pg = 0; pg < eq->num_pages; pg++) {
-		eq->virt_addr[pg] = dma_zalloc_coherent(&pdev->dev,
-							eq->page_size,
-							&eq->dma_addr[pg],
-							GFP_KERNEL);
+		eq->virt_addr[pg] = dma_alloc_coherent(&pdev->dev,
+						       eq->page_size,
+						       &eq->dma_addr[pg],
+						       GFP_KERNEL);
 		if (!eq->virt_addr[pg]) {
 			err = -ENOMEM;
 			goto err_dma_alloc;

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
index ecb9c2b..d35f206 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_EQS_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index 823a170..07bbfbf 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/pci.h>
@@ -168,6 +159,22 @@
 	hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4);
 }
 
+void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx,
+			  enum hinic_msix_state flag)
+{
+	u32 offset = msix_idx * HINIC_PCI_MSIX_ENTRY_SIZE +
+			HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL;
+	u32 mask_bits;
+
+	mask_bits = readl(hwif->intr_regs_base + offset);
+	mask_bits &= ~HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+	if (flag)
+		mask_bits |= HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+	writel(mask_bits, hwif->intr_regs_base + offset);
+}
+
 /**
  * hwif_ready - test if the HW is ready for use
  * @hwif: the HW interface of a pci function device
@@ -321,6 +328,13 @@
 		return -ENOMEM;
 	}
 
+	hwif->intr_regs_base = pci_ioremap_bar(pdev, HINIC_PCI_INTR_REGS_BAR);
+	if (!hwif->intr_regs_base) {
+		dev_err(&pdev->dev, "Failed to map configuration regs\n");
+		err = -ENOMEM;
+		goto err_map_intr_bar;
+	}
+
 	err = hwif_ready(hwif);
 	if (err) {
 		dev_err(&pdev->dev, "HW interface is not ready\n");
@@ -337,7 +351,11 @@
 	return 0;
 
 err_hwif_ready:
+	iounmap(hwif->intr_regs_base);
+
+err_map_intr_bar:
 	iounmap(hwif->cfg_regs_bar);
+
 	return err;
 }
 
@@ -347,5 +365,6 @@
  **/
 void hinic_free_hwif(struct hinic_hwif *hwif)
 {
+	iounmap(hwif->intr_regs_base);
 	iounmap(hwif->cfg_regs_bar);
 }

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 5b4760c..5177945 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_IF_H
@@ -152,6 +143,7 @@
 #define HINIC_IS_PPF(hwif)              (HINIC_FUNC_TYPE(hwif) == HINIC_PPF)
 
 #define HINIC_PCI_CFG_REGS_BAR          0
+#define HINIC_PCI_INTR_REGS_BAR         2
 #define HINIC_PCI_DB_BAR                4
 
 #define HINIC_PCIE_ST_DISABLE           0
@@ -164,6 +156,10 @@
 #define HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT  0       /* Disabled */
 #define HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT      7       /* max */
 
+#define HINIC_PCI_MSIX_ENTRY_SIZE               16
+#define HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL        12
+#define HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT       1
+
 enum hinic_pcie_nosnoop {
 	HINIC_PCIE_SNOOP        = 0,
 	HINIC_PCIE_NO_SNOOP     = 1,
@@ -207,6 +203,11 @@
 	HINIC_DB_DISABLE = 1,
 };
 
+enum hinic_msix_state {
+	HINIC_MSIX_ENABLE,
+	HINIC_MSIX_DISABLE,
+};
+
 struct hinic_func_attr {
 	u16                     func_idx;
 	u8                      pf_idx;
@@ -226,6 +227,7 @@
 struct hinic_hwif {
 	struct pci_dev          *pdev;
 	void __iomem            *cfg_regs_bar;
+	void __iomem		*intr_regs_base;
 
 	struct hinic_func_attr  attr;
 };
@@ -251,6 +253,9 @@
 			u8 *lli_timer, u8 *lli_credit_limit,
 			u8 *resend_timer);
 
+void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx,
+			  enum hinic_msix_state flag);
+
 int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index);
 
 void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action);

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 8e58976..d66f86f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -45,6 +36,7 @@
 
 enum io_cmd {
 	IO_CMD_MODIFY_QUEUE_CTXT = 0,
+	IO_CMD_CLEAN_QUEUE_CTXT,
 };
 
 static void init_db_area_idx(struct hinic_free_db_area *free_db_area)
@@ -210,6 +202,59 @@
 		write_rq_ctxts(func_to_io, base_qpn, num_qps));
 }
 
+static int hinic_clean_queue_offload_ctxt(struct hinic_func_to_io *func_to_io,
+					  enum hinic_qp_ctxt_type ctxt_type)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct hinic_clean_queue_ctxt *ctxt_block;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmdq_buf cmdq_buf;
+	u64 out_param = 0;
+	int err;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	ctxt_block = cmdq_buf.buf;
+	ctxt_block->cmdq_hdr.num_queues = func_to_io->max_qps;
+	ctxt_block->cmdq_hdr.queue_type = ctxt_type;
+	ctxt_block->cmdq_hdr.addr_offset = 0;
+
+	/* TSO/LRO ctxt size: 0x0:0B; 0x1:160B; 0x2:200B; 0x3:240B */
+	ctxt_block->ctxt_size = 0x3;
+
+	hinic_cpu_to_be32(ctxt_block, sizeof(*ctxt_block));
+
+	cmdq_buf.size = sizeof(*ctxt_block);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     IO_CMD_CLEAN_QUEUE_CTXT,
+				     &cmdq_buf, &out_param);
+
+	if (err || out_param) {
+		dev_err(&pdev->dev, "Failed to clean offload ctxts, err: %d, out_param: 0x%llx\n",
+			err, out_param);
+
+		err = -EFAULT;
+	}
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+
+	return err;
+}
+
+static int hinic_clean_qp_offload_ctxt(struct hinic_func_to_io *func_to_io)
+{
+	/* clean LRO/TSO context space */
+	return (hinic_clean_queue_offload_ctxt(func_to_io,
+					       HINIC_QP_CTXT_TYPE_SQ) ||
+		hinic_clean_queue_offload_ctxt(func_to_io,
+					       HINIC_QP_CTXT_TYPE_RQ));
+}
+
 /**
  * init_qp - Initialize a Queue Pair
  * @func_to_io: func to io channel that holds the IO components
@@ -355,9 +400,9 @@
 		goto err_sq_db;
 	}
 
-	ci_addr_base = dma_zalloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
-					   &func_to_io->ci_dma_base,
-					   GFP_KERNEL);
+	ci_addr_base = dma_alloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
+					  &func_to_io->ci_dma_base,
+					  GFP_KERNEL);
 	if (!ci_addr_base) {
 		dev_err(&pdev->dev, "Failed to allocate CI area\n");
 		err = -ENOMEM;
@@ -381,6 +426,12 @@
 		goto err_write_qp_ctxts;
 	}
 
+	err = hinic_clean_qp_offload_ctxt(func_to_io);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to clean QP contexts space\n");
+		goto err_write_qp_ctxts;
+	}
+
 	return 0;
 
 err_write_qp_ctxts:

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index adb6417..cac2b72 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_IO_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 278dc13..c1a6be6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
index 320711e..182fba1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_MGMT_H

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index cb23962..be364b7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -70,8 +61,6 @@
 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
 
-#define TX_MAX_MSS_DEFAULT      0x3E00
-
 enum sq_wqe_type {
 	SQ_NORMAL_WQE = 0,
 };
@@ -338,9 +327,9 @@
 		goto err_cqe_dma_arr_alloc;
 
 	for (i = 0; i < wq->q_depth; i++) {
-		rq->cqe[i] = dma_zalloc_coherent(&pdev->dev,
-						 sizeof(*rq->cqe[i]),
-						 &rq->cqe_dma[i], GFP_KERNEL);
+		rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
+						sizeof(*rq->cqe[i]),
+						&rq->cqe_dma[i], GFP_KERNEL);
 		if (!rq->cqe[i])
 			goto err_cqe_alloc;
 	}
@@ -417,8 +406,8 @@
 
 	/* HW requirements: Must be at least 32 bit */
 	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
-	rq->pi_virt_addr = dma_zalloc_coherent(&pdev->dev, pi_size,
-					       &rq->pi_dma_addr, GFP_KERNEL);
+	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
+					      &rq->pi_dma_addr, GFP_KERNEL);
 	if (!rq->pi_virt_addr) {
 		dev_err(&pdev->dev, "Failed to allocate PI address\n");
 		err = -ENOMEM;
@@ -494,33 +483,16 @@
 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
 
-	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
-					     QUEUE_INFO_MSS);
+	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
+					     QUEUE_INFO_MSS) |
+			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
 }
 
 static void sq_prepare_task(struct hinic_sq_task *task)
 {
-	task->pkt_info0 =
-		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					INNER_L3TYPE) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
-					VLAN_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
-
-	task->pkt_info1 =
-		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
-
-	task->pkt_info2 =
-		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
-		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
-					TUNNEL_L4TYPE)    |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					OUTER_L3TYPE);
+	task->pkt_info0 = 0;
+	task->pkt_info1 = 0;
+	task->pkt_info2 = 0;
 
 	task->ufo_v6_identify = 0;
 
@@ -529,6 +501,86 @@
 	task->zero_pad = 0;
 }
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
+}
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
+}
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
+}
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_tunnel_type l4_type,
+			      u32 tunnel_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
+}
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset)
+{
+	u32 tcp_udp_cs = 0, sctp = 0;
+	u32 mss = HINIC_MSS_DEFAULT;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE ||
+	    l4_offload == UDP_OFFLOAD_ENABLE)
+		tcp_udp_cs = 1;
+	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
+		sctp = 1;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
+		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
+
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
+{
+	u32 tso = 0, ufo = 0;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE)
+		tso = 1;
+	else if (l4_offload == UDP_OFFLOAD_ENABLE)
+		ufo = 1;
+
+	task->ufo_v6_identify = ip_ident;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
+		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
+		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
+
+	/* set MSS value */
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
 /**
  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
  * @sq: send queue
@@ -613,6 +665,16 @@
 }
 
 /**
+ * hinic_sq_return_wqe - return the wqe to the sq
+ * @sq: send queue
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
+{
+	hinic_return_wqe(sq->wq, wqe_size);
+}
+
+/**
  * hinic_sq_write_wqe - write the wqe to the sq
  * @sq: send queue
  * @prod_idx: pi of the wqe

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index 6c84f83..f4a339b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_QP_H
@@ -149,6 +140,31 @@
 
 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len);
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len);
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_tunnel_type l4_type,
+			      u32 tunnel_len);
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
+			   u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset);
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
+			    u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len,
+			    u32 offset, u32 ip_ident, u32 mss);
+
 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
 			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
 			  int nr_sges);
@@ -159,6 +175,8 @@
 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 				      unsigned int wqe_size, u16 *prod_idx);
 
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
+
 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
 			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
 			unsigned int wqe_size);

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
index 376abf0..00900a6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_QP_CTXT_H
@@ -201,6 +192,11 @@
 	u32     wq_block_lo_pfn;
 };
 
+struct hinic_clean_queue_ctxt {
+	struct hinic_qp_ctxt_header	cmdq_hdr;
+	u32				ctxt_size;
+};
+
 struct hinic_sq_ctxt_block {
 	struct hinic_qp_ctxt_header hdr;
 	struct hinic_sq_ctxt sq_ctxt[HINIC_Q_CTXT_MAX];

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 3e3181c..0336321 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -74,12 +65,6 @@
 			((void *)((cmdq_pages)->shadow_page_vaddr) \
 				+ (wq)->block_idx * CMDQ_BLOCK_SIZE)
 
-#define WQE_PAGE_OFF(wq, idx)   (((idx) & ((wq)->num_wqebbs_per_page - 1)) * \
-					(wq)->wqebb_size)
-
-#define WQE_PAGE_NUM(wq, idx)   (((idx) / ((wq)->num_wqebbs_per_page)) \
-					& ((wq)->num_q_pages - 1))
-
 #define WQ_PAGE_ADDR(wq, idx)           \
 			((wq)->shadow_block_vaddr[WQE_PAGE_NUM(wq, idx)])
 
@@ -93,6 +78,17 @@
 		(((unsigned long)(wqe) - (unsigned long)(wq)->shadow_wqe) \
 			/ (wq)->max_wqe_size)
 
+static inline int WQE_PAGE_OFF(struct hinic_wq *wq, u16 idx)
+{
+	return (((idx) & ((wq)->num_wqebbs_per_page - 1))
+		<< (wq)->wqebb_size_shift);
+}
+
+static inline int WQE_PAGE_NUM(struct hinic_wq *wq, u16 idx)
+{
+	return (((idx) >> ((wq)->wqebbs_per_page_shift))
+		& ((wq)->num_q_pages - 1));
+}
 /**
  * queue_alloc_page - allocate page for Queue
  * @hwif: HW interface for allocating DMA
@@ -109,8 +105,8 @@
 	struct pci_dev *pdev = hwif->pdev;
 	dma_addr_t dma_addr;
 
-	*vaddr = dma_zalloc_coherent(&pdev->dev, page_sz, &dma_addr,
-				     GFP_KERNEL);
+	*vaddr = dma_alloc_coherent(&pdev->dev, page_sz, &dma_addr,
+				    GFP_KERNEL);
 	if (!*vaddr) {
 		dev_err(&pdev->dev, "Failed to allocate dma for wqs page\n");
 		return -ENOMEM;
@@ -477,8 +473,8 @@
 		u64 *paddr = &wq->block_vaddr[i];
 		dma_addr_t dma_addr;
 
-		*vaddr = dma_zalloc_coherent(&pdev->dev, wq->wq_page_size,
-					     &dma_addr, GFP_KERNEL);
+		*vaddr = dma_alloc_coherent(&pdev->dev, wq->wq_page_size,
+					    &dma_addr, GFP_KERNEL);
 		if (!*vaddr) {
 			dev_err(&pdev->dev, "Failed to allocate wq page\n");
 			goto err_alloc_wq_pages;
@@ -513,10 +509,11 @@
 	struct hinic_hwif *hwif = wqs->hwif;
 	struct pci_dev *pdev = hwif->pdev;
 	u16 num_wqebbs_per_page;
+	u16 wqebb_size_shift;
 	int err;
 
-	if (wqebb_size == 0) {
-		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+	if (!is_power_of_2(wqebb_size)) {
+		dev_err(&pdev->dev, "wqebb_size must be power of 2\n");
 		return -EINVAL;
 	}
 
@@ -530,9 +527,11 @@
 		return -EINVAL;
 	}
 
-	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+	wqebb_size_shift = ilog2(wqebb_size);
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size)
+				>> wqebb_size_shift;
 
-	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+	if (!is_power_of_2(num_wqebbs_per_page)) {
 		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
 		return -EINVAL;
 	}
@@ -550,7 +549,8 @@
 	wq->q_depth = q_depth;
 	wq->max_wqe_size = max_wqe_size;
 	wq->num_wqebbs_per_page = num_wqebbs_per_page;
-
+	wq->wqebbs_per_page_shift = ilog2(num_wqebbs_per_page);
+	wq->wqebb_size_shift = wqebb_size_shift;
 	wq->block_vaddr = WQ_BASE_VADDR(wqs, wq);
 	wq->shadow_block_vaddr = WQ_BASE_ADDR(wqs, wq);
 	wq->block_paddr = WQ_BASE_PADDR(wqs, wq);
@@ -604,11 +604,13 @@
 			 u16 q_depth, u16 max_wqe_size)
 {
 	struct pci_dev *pdev = hwif->pdev;
+	u16 num_wqebbs_per_page_shift;
 	u16 num_wqebbs_per_page;
+	u16 wqebb_size_shift;
 	int i, j, err = -ENOMEM;
 
-	if (wqebb_size == 0) {
-		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+	if (!is_power_of_2(wqebb_size)) {
+		dev_err(&pdev->dev, "wqebb_size must be power of 2\n");
 		return -EINVAL;
 	}
 
@@ -622,9 +624,11 @@
 		return -EINVAL;
 	}
 
-	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+	wqebb_size_shift = ilog2(wqebb_size);
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size)
+				>> wqebb_size_shift;
 
-	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+	if (!is_power_of_2(num_wqebbs_per_page)) {
 		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
 		return -EINVAL;
 	}
@@ -636,6 +640,7 @@
 		dev_err(&pdev->dev, "Failed to allocate CMDQ page\n");
 		return err;
 	}
+	num_wqebbs_per_page_shift = ilog2(num_wqebbs_per_page);
 
 	for (i = 0; i < cmdq_blocks; i++) {
 		wq[i].hwif = hwif;
@@ -647,7 +652,8 @@
 		wq[i].q_depth = q_depth;
 		wq[i].max_wqe_size = max_wqe_size;
 		wq[i].num_wqebbs_per_page = num_wqebbs_per_page;
-
+		wq[i].wqebbs_per_page_shift = num_wqebbs_per_page_shift;
+		wq[i].wqebb_size_shift = wqebb_size_shift;
 		wq[i].block_vaddr = CMDQ_BASE_VADDR(cmdq_pages, &wq[i]);
 		wq[i].shadow_block_vaddr = CMDQ_BASE_ADDR(cmdq_pages, &wq[i]);
 		wq[i].block_paddr = CMDQ_BASE_PADDR(cmdq_pages, &wq[i]);
@@ -741,7 +747,7 @@
 
 	*prod_idx = MASKED_WQE_IDX(wq, atomic_read(&wq->prod_idx));
 
-	num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) >> wq->wqebb_size_shift;
 
 	if (atomic_sub_return(num_wqebbs, &wq->delta) <= 0) {
 		atomic_add(num_wqebbs, &wq->delta);
@@ -775,13 +781,28 @@
 }
 
 /**
+ * hinic_return_wqe - return the wqe when transmit failed
+ * @wq: wq to return wqe
+ * @wqe_size: wqe size
+ **/
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
+{
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	atomic_sub(num_wqebbs, &wq->prod_idx);
+
+	atomic_add(num_wqebbs, &wq->delta);
+}
+
+/**
  * hinic_put_wqe - return the wqe place to use for a new wqe
  * @wq: wq to return wqe
  * @wqe_size: wqe size
  **/
 void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size)
 {
-	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size)
+			>> wq->wqebb_size_shift;
 
 	atomic_add(num_wqebbs, &wq->cons_idx);
 
@@ -799,7 +820,8 @@
 struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 				    u16 *cons_idx)
 {
-	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size)
+			>> wq->wqebb_size_shift;
 	u16 curr_cons_idx, end_cons_idx;
 	int curr_pg, end_pg;
 

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 9c030a0..811eef7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_WQ_H
@@ -39,7 +30,8 @@
 	u16             q_depth;
 	u16             max_wqe_size;
 	u16             num_wqebbs_per_page;
-
+	u16		wqebbs_per_page_shift;
+	u16		wqebb_size_shift;
 	/* The addresses are 64 bit in the HW */
 	u64             block_paddr;
 	void            **shadow_block_vaddr;
@@ -104,6 +96,8 @@
 struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 				   u16 *prod_idx);
 
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
+
 void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
 
 struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index bc73485..f4b6d2c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_HW_WQE_H
@@ -62,19 +53,33 @@
 			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
 			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
 
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
-#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
-#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
-#define HINIC_SQ_CTRL_LEN_SHIFT                 29
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
+#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
+#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
+#define HINIC_SQ_CTRL_LEN_SHIFT                        29
 
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
-#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
-#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
-#define HINIC_SQ_CTRL_LEN_MASK                  0x3
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
+#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
+#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
+#define HINIC_SQ_CTRL_LEN_MASK                         0x3
 
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
 
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
 
 #define HINIC_SQ_CTRL_SET(val, member)          \
 		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
@@ -84,6 +89,10 @@
 		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
 		 & HINIC_SQ_CTRL_##member##_MASK)
 
+#define HINIC_SQ_CTRL_CLEAR(val, member)	\
+		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
+		 << HINIC_SQ_CTRL_##member##_SHIFT)))
+
 #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
 #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
 #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
@@ -108,28 +117,28 @@
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
 
 #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
 		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
 		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
 /* 1 bit reserved */
-#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
 /* 8 bits reserved */
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
 /* 1 bit reserved */
 #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
 /* 8 bits reserved */
@@ -152,6 +161,10 @@
 
 #define HINIC_RQ_CQE_STATUS_RXDONE_MASK         0x1
 
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_SHIFT	0
+
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_MASK	0xFFFFU
+
 #define HINIC_RQ_CQE_STATUS_GET(val, member)    \
 		(((val) >> HINIC_RQ_CQE_STATUS_##member##_SHIFT) & \
 		 HINIC_RQ_CQE_STATUS_##member##_MASK)
@@ -187,12 +200,66 @@
 		 sizeof(struct hinic_sq_task) + \
 		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
 
-#define HINIC_SCMD_DATA_LEN             16
+#define HINIC_SCMD_DATA_LEN                     16
 
-#define HINIC_MAX_SQ_BUFDESCS           17
+#define HINIC_MAX_SQ_BUFDESCS                   17
 
-#define HINIC_SQ_WQE_MAX_SIZE           320
-#define HINIC_RQ_WQE_SIZE               32
+#define HINIC_SQ_WQE_MAX_SIZE                   320
+#define HINIC_RQ_WQE_SIZE                       32
+
+#define HINIC_MSS_DEFAULT		        0x3E00
+#define HINIC_MSS_MIN		                0x50
+
+#define RQ_CQE_STATUS_NUM_LRO_SHIFT		16
+#define RQ_CQE_STATUS_NUM_LRO_MASK		0xFFU
+
+#define RQ_CQE_STATUS_GET(val, member)		(((val) >> \
+			RQ_CQE_STATUS_##member##_SHIFT) & \
+			RQ_CQE_STATUS_##member##_MASK)
+
+#define HINIC_GET_RX_NUM_LRO(status)	\
+		RQ_CQE_STATUS_GET(status, NUM_LRO)
+
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT		0
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK		0xFFFU
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT		21
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK		0x1U
+
+#define RQ_CQE_OFFOLAD_TYPE_GET(val, member)		(((val) >> \
+				RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
+				RQ_CQE_OFFOLAD_TYPE_##member##_MASK)
+
+#define HINIC_GET_RX_PKT_TYPE(offload_type)	\
+			RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
+
+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)	\
+			RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
+
+#define RQ_CQE_SGE_VLAN_MASK				0xFFFFU
+#define RQ_CQE_SGE_VLAN_SHIFT				0
+
+#define RQ_CQE_SGE_GET(val, member)			(((val) >> \
+					RQ_CQE_SGE_##member##_SHIFT) & \
+					RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_TAG(vlan_len)	\
+		RQ_CQE_SGE_GET(vlan_len, VLAN)
+
+#define HINIC_RSS_TYPE_VALID_SHIFT			23
+#define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT		24
+#define HINIC_RSS_TYPE_IPV6_EXT_SHIFT			25
+#define HINIC_RSS_TYPE_TCP_IPV6_SHIFT			26
+#define HINIC_RSS_TYPE_IPV6_SHIFT			27
+#define HINIC_RSS_TYPE_TCP_IPV4_SHIFT			28
+#define HINIC_RSS_TYPE_IPV4_SHIFT			29
+#define HINIC_RSS_TYPE_UDP_IPV6_SHIFT			30
+#define HINIC_RSS_TYPE_UDP_IPV4_SHIFT			31
+
+#define HINIC_RSS_TYPE_SET(val, member)                        \
+		(((u32)(val) & 0x1) << HINIC_RSS_TYPE_##member##_SHIFT)
+
+#define HINIC_RSS_TYPE_GET(val, member)                        \
+		(((u32)(val) >> HINIC_RSS_TYPE_##member##_SHIFT) & 0x1)
 
 enum hinic_l4offload_type {
 	HINIC_L4_OFF_DISABLE            = 0,
@@ -211,6 +278,26 @@
 	HINIC_PKT_PARSED     = 1,
 };
 
+enum hinic_l3_offload_type {
+	L3TYPE_UNKNOWN = 0,
+	IPV6_PKT = 1,
+	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
+	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
+};
+
+enum hinic_l4_offload_type {
+	OFFLOAD_DISABLE     = 0,
+	TCP_OFFLOAD_ENABLE  = 1,
+	SCTP_OFFLOAD_ENABLE = 2,
+	UDP_OFFLOAD_ENABLE  = 3,
+};
+
+enum hinic_l4_tunnel_type {
+	NOT_TUNNEL,
+	TUNNEL_UDP_NO_CSUM,
+	TUNNEL_UDP_CSUM,
+};
+
 enum hinic_outer_l3type {
 	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
 	HINIC_OUTER_L3TYPE_IPV6                 = 1,
@@ -327,7 +414,7 @@
 	u32     status;
 	u32     len;
 
-	u32     rsvd2;
+	u32     offload_type;
 	u32     rsvd3;
 	u32     rsvd4;
 	u32     rsvd5;

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 4a8f829..2411ad2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -51,9 +42,10 @@
 module_param(rx_weight, uint, 0644);
 MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 
-#define HINIC_DEV_ID_QUAD_PORT_25GE     0x1822
-#define HINIC_DEV_ID_DUAL_PORT_25GE     0x0200
-#define HINIC_DEV_ID_DUAL_PORT_100GE    0x0201
+#define HINIC_DEV_ID_QUAD_PORT_25GE         0x1822
+#define HINIC_DEV_ID_DUAL_PORT_100GE        0x0200
+#define HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ   0x0205
+#define HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ    0x0210
 
 #define HINIC_WQ_NAME                   "hinic_dev"
 
@@ -61,6 +53,10 @@
 					 NETIF_MSG_IFUP |                  \
 					 NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
 
+#define HINIC_LRO_MAX_WQE_NUM_DEFAULT	8
+
+#define HINIC_LRO_RX_TIMER_DEFAULT	16
+
 #define VLAN_BITMAP_SIZE(nic_dev)       (ALIGN(VLAN_N_VID, 8) / 8)
 
 #define work_to_rx_mode_work(work)      \
@@ -71,137 +67,9 @@
 
 static int change_mac_addr(struct net_device *netdev, const u8 *addr);
 
-static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
-			   enum hinic_speed speed)
-{
-	switch (speed) {
-	case HINIC_SPEED_10MB_LINK:
-		link_ksettings->base.speed = SPEED_10;
-		break;
-
-	case HINIC_SPEED_100MB_LINK:
-		link_ksettings->base.speed = SPEED_100;
-		break;
-
-	case HINIC_SPEED_1000MB_LINK:
-		link_ksettings->base.speed = SPEED_1000;
-		break;
-
-	case HINIC_SPEED_10GB_LINK:
-		link_ksettings->base.speed = SPEED_10000;
-		break;
-
-	case HINIC_SPEED_25GB_LINK:
-		link_ksettings->base.speed = SPEED_25000;
-		break;
-
-	case HINIC_SPEED_40GB_LINK:
-		link_ksettings->base.speed = SPEED_40000;
-		break;
-
-	case HINIC_SPEED_100GB_LINK:
-		link_ksettings->base.speed = SPEED_100000;
-		break;
-
-	default:
-		link_ksettings->base.speed = SPEED_UNKNOWN;
-		break;
-	}
-}
-
-static int hinic_get_link_ksettings(struct net_device *netdev,
-				    struct ethtool_link_ksettings
-				    *link_ksettings)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	enum hinic_port_link_state link_state;
-	struct hinic_port_cap port_cap;
-	int err;
-
-	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
-	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
-					     Autoneg);
-
-	link_ksettings->base.speed   = SPEED_UNKNOWN;
-	link_ksettings->base.autoneg = AUTONEG_DISABLE;
-	link_ksettings->base.duplex  = DUPLEX_UNKNOWN;
-
-	err = hinic_port_get_cap(nic_dev, &port_cap);
-	if (err) {
-		netif_err(nic_dev, drv, netdev,
-			  "Failed to get port capabilities\n");
-		return err;
-	}
-
-	err = hinic_port_link_state(nic_dev, &link_state);
-	if (err) {
-		netif_err(nic_dev, drv, netdev,
-			  "Failed to get port link state\n");
-		return err;
-	}
-
-	if (link_state != HINIC_LINK_STATE_UP) {
-		netif_info(nic_dev, drv, netdev, "No link\n");
-		return err;
-	}
-
-	set_link_speed(link_ksettings, port_cap.speed);
-
-	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
-		ethtool_link_ksettings_add_link_mode(link_ksettings,
-						     advertising, Autoneg);
-
-	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
-		link_ksettings->base.autoneg = AUTONEG_ENABLE;
-
-	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
-				       DUPLEX_FULL : DUPLEX_HALF;
-	return 0;
-}
-
-static void hinic_get_drvinfo(struct net_device *netdev,
-			      struct ethtool_drvinfo *info)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-	struct hinic_hwif *hwif = hwdev->hwif;
-
-	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
-	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
-}
-
-static void hinic_get_ringparam(struct net_device *netdev,
-				struct ethtool_ringparam *ring)
-{
-	ring->rx_max_pending = HINIC_RQ_DEPTH;
-	ring->tx_max_pending = HINIC_SQ_DEPTH;
-	ring->rx_pending = HINIC_RQ_DEPTH;
-	ring->tx_pending = HINIC_SQ_DEPTH;
-}
-
-static void hinic_get_channels(struct net_device *netdev,
-			       struct ethtool_channels *channels)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-
-	channels->max_rx = hwdev->nic_cap.max_qps;
-	channels->max_tx = hwdev->nic_cap.max_qps;
-	channels->max_other    = 0;
-	channels->max_combined = 0;
-	channels->rx_count = hinic_hwdev_num_qps(hwdev);
-	channels->tx_count = hinic_hwdev_num_qps(hwdev);
-	channels->other_count    = 0;
-	channels->combined_count = 0;
-}
-
-static const struct ethtool_ops hinic_ethtool_ops = {
-	.get_link_ksettings = hinic_get_link_ksettings,
-	.get_drvinfo = hinic_get_drvinfo,
-	.get_link = ethtool_op_get_link,
-	.get_ringparam = hinic_get_ringparam,
-	.get_channels = hinic_get_channels,
-};
+static int set_features(struct hinic_dev *nic_dev,
+			netdev_features_t pre_features,
+			netdev_features_t features, bool force_change);
 
 static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq)
 {
@@ -215,6 +83,9 @@
 	u64_stats_update_begin(&nic_rx_stats->syncp);
 	nic_rx_stats->bytes += rx_stats.bytes;
 	nic_rx_stats->pkts  += rx_stats.pkts;
+	nic_rx_stats->errors += rx_stats.errors;
+	nic_rx_stats->csum_errors += rx_stats.csum_errors;
+	nic_rx_stats->other_errors += rx_stats.other_errors;
 	u64_stats_update_end(&nic_rx_stats->syncp);
 
 	hinic_rxq_clean_stats(rxq);
@@ -235,6 +106,7 @@
 	nic_tx_stats->tx_busy += tx_stats.tx_busy;
 	nic_tx_stats->tx_wake += tx_stats.tx_wake;
 	nic_tx_stats->tx_dropped += tx_stats.tx_dropped;
+	nic_tx_stats->big_frags_pkts += tx_stats.big_frags_pkts;
 	u64_stats_update_end(&nic_tx_stats->syncp);
 
 	hinic_txq_clean_stats(txq);
@@ -371,11 +243,135 @@
 	nic_dev->rxqs = NULL;
 }
 
+static int hinic_configure_max_qnum(struct hinic_dev *nic_dev)
+{
+	int err;
+
+	err = hinic_set_max_qnum(nic_dev, nic_dev->hwdev->nic_cap.max_qps);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int hinic_rss_init(struct hinic_dev *nic_dev)
+{
+	u8 default_rss_key[HINIC_RSS_KEY_SIZE];
+	u8 tmpl_idx = nic_dev->rss_tmpl_idx;
+	u32 *indir_tbl;
+	int err, i;
+
+	indir_tbl = kcalloc(HINIC_RSS_INDIR_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!indir_tbl)
+		return -ENOMEM;
+
+	netdev_rss_key_fill(default_rss_key, sizeof(default_rss_key));
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
+		indir_tbl[i] = ethtool_rxfh_indir_default(i, nic_dev->num_rss);
+
+	err = hinic_rss_set_template_tbl(nic_dev, tmpl_idx, default_rss_key);
+	if (err)
+		goto out;
+
+	err = hinic_rss_set_indir_tbl(nic_dev, tmpl_idx, indir_tbl);
+	if (err)
+		goto out;
+
+	err = hinic_set_rss_type(nic_dev, tmpl_idx, nic_dev->rss_type);
+	if (err)
+		goto out;
+
+	err = hinic_rss_set_hash_engine(nic_dev, tmpl_idx,
+					nic_dev->rss_hash_engine);
+	if (err)
+		goto out;
+
+	err = hinic_rss_cfg(nic_dev, 1, tmpl_idx);
+	if (err)
+		goto out;
+
+out:
+	kfree(indir_tbl);
+	return err;
+}
+
+static void hinic_rss_deinit(struct hinic_dev *nic_dev)
+{
+	hinic_rss_cfg(nic_dev, 0, nic_dev->rss_tmpl_idx);
+}
+
+static void hinic_init_rss_parameters(struct hinic_dev *nic_dev)
+{
+	nic_dev->rss_hash_engine = HINIC_RSS_HASH_ENGINE_TYPE_XOR;
+	nic_dev->rss_type.tcp_ipv6_ext = 1;
+	nic_dev->rss_type.ipv6_ext = 1;
+	nic_dev->rss_type.tcp_ipv6 = 1;
+	nic_dev->rss_type.ipv6 = 1;
+	nic_dev->rss_type.tcp_ipv4 = 1;
+	nic_dev->rss_type.ipv4 = 1;
+	nic_dev->rss_type.udp_ipv6 = 1;
+	nic_dev->rss_type.udp_ipv4 = 1;
+}
+
+static void hinic_enable_rss(struct hinic_dev *nic_dev)
+{
+	struct net_device *netdev = nic_dev->netdev;
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int i, node, err = 0;
+	u16 num_cpus = 0;
+
+	nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev);
+	if (nic_dev->max_qps <= 1) {
+		nic_dev->flags &= ~HINIC_RSS_ENABLE;
+		nic_dev->rss_limit = nic_dev->max_qps;
+		nic_dev->num_qps = nic_dev->max_qps;
+		nic_dev->num_rss = nic_dev->max_qps;
+
+		return;
+	}
+
+	err = hinic_rss_template_alloc(nic_dev, &nic_dev->rss_tmpl_idx);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to alloc tmpl_idx for rss, can't enable rss for this function\n");
+		nic_dev->flags &= ~HINIC_RSS_ENABLE;
+		nic_dev->max_qps = 1;
+		nic_dev->rss_limit = nic_dev->max_qps;
+		nic_dev->num_qps = nic_dev->max_qps;
+		nic_dev->num_rss = nic_dev->max_qps;
+
+		return;
+	}
+
+	nic_dev->flags |= HINIC_RSS_ENABLE;
+
+	for (i = 0; i < num_online_cpus(); i++) {
+		node = cpu_to_node(i);
+		if (node == dev_to_node(&pdev->dev))
+			num_cpus++;
+	}
+
+	if (!num_cpus)
+		num_cpus = num_online_cpus();
+
+	nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus);
+
+	nic_dev->rss_limit = nic_dev->num_qps;
+	nic_dev->num_rss = nic_dev->num_qps;
+
+	hinic_init_rss_parameters(nic_dev);
+	err = hinic_rss_init(nic_dev);
+	if (err)
+		netif_err(nic_dev, drv, netdev, "Failed to init rss\n");
+}
+
 static int hinic_open(struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	enum hinic_port_link_state link_state;
-	int err, ret, num_qps;
+	int err, ret;
 
 	if (!(nic_dev->flags & HINIC_INTF_UP)) {
 		err = hinic_hwdev_ifup(nic_dev->hwdev);
@@ -400,9 +396,17 @@
 		goto err_create_rxqs;
 	}
 
-	num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
-	netif_set_real_num_tx_queues(netdev, num_qps);
-	netif_set_real_num_rx_queues(netdev, num_qps);
+	hinic_enable_rss(nic_dev);
+
+	err = hinic_configure_max_qnum(nic_dev);
+	if (err) {
+		netif_err(nic_dev, drv, nic_dev->netdev,
+			  "Failed to configure the maximum number of queues\n");
+		goto err_port_state;
+	}
+
+	netif_set_real_num_tx_queues(netdev, nic_dev->num_qps);
+	netif_set_real_num_rx_queues(netdev, nic_dev->num_qps);
 
 	err = hinic_port_set_state(nic_dev, HINIC_PORT_ENABLE);
 	if (err) {
@@ -458,9 +462,12 @@
 	if (ret)
 		netif_warn(nic_dev, drv, netdev,
 			   "Failed to revert port state\n");
-
 err_port_state:
 	free_rxqs(nic_dev);
+	if (nic_dev->flags & HINIC_RSS_ENABLE) {
+		hinic_rss_deinit(nic_dev);
+		hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx);
+	}
 
 err_create_rxqs:
 	free_txqs(nic_dev);
@@ -504,6 +511,11 @@
 		return err;
 	}
 
+	if (nic_dev->flags & HINIC_RSS_ENABLE) {
+		hinic_rss_deinit(nic_dev);
+		hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx);
+	}
+
 	free_rxqs(nic_dev);
 	free_txqs(nic_dev);
 
@@ -600,9 +612,6 @@
 	u16 vid = 0;
 	int err;
 
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
-
 	netif_info(nic_dev, drv, netdev, "set mac addr = %02x %02x %02x %02x %02x %02x\n",
 		   addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
 
@@ -783,12 +792,36 @@
 
 	stats->rx_bytes   = nic_rx_stats->bytes;
 	stats->rx_packets = nic_rx_stats->pkts;
+	stats->rx_errors  = nic_rx_stats->errors;
 
 	stats->tx_bytes   = nic_tx_stats->bytes;
 	stats->tx_packets = nic_tx_stats->pkts;
 	stats->tx_errors  = nic_tx_stats->tx_dropped;
 }
 
+static int hinic_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	return set_features(nic_dev, nic_dev->netdev->features,
+			    features, false);
+}
+
+static netdev_features_t hinic_fix_features(struct net_device *netdev,
+					    netdev_features_t features)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	/* If Rx checksum is disabled, then LRO should also be disabled */
+	if (!(features & NETIF_F_RXCSUM)) {
+		netif_info(nic_dev, drv, netdev, "disabling LRO as RXCSUM is off\n");
+		features &= ~NETIF_F_LRO;
+	}
+
+	return features;
+}
+
 static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_open = hinic_open,
 	.ndo_stop = hinic_close,
@@ -801,11 +834,16 @@
 	.ndo_start_xmit = hinic_xmit_frame,
 	.ndo_tx_timeout = hinic_tx_timeout,
 	.ndo_get_stats64 = hinic_get_stats64,
+	.ndo_fix_features = hinic_fix_features,
+	.ndo_set_features = hinic_set_features,
 };
 
 static void netdev_features_init(struct net_device *netdev)
 {
-	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
+			      NETIF_F_RXCSUM | NETIF_F_LRO |
+			      NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -863,6 +901,36 @@
 	*out_size = sizeof(*ret_link_status);
 }
 
+static int set_features(struct hinic_dev *nic_dev,
+			netdev_features_t pre_features,
+			netdev_features_t features, bool force_change)
+{
+	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+	u32 csum_en = HINIC_RX_CSUM_OFFLOAD_EN;
+	int err = 0;
+
+	if (changed & NETIF_F_TSO)
+		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
+					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
+
+	if (changed & NETIF_F_RXCSUM)
+		err = hinic_set_rx_csum_offload(nic_dev, csum_en);
+
+	if (changed & NETIF_F_LRO) {
+		err = hinic_set_rx_lro_state(nic_dev,
+					     !!(features & NETIF_F_LRO),
+					     HINIC_LRO_RX_TIMER_DEFAULT,
+					     HINIC_LRO_MAX_WQE_NUM_DEFAULT);
+	}
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		err = hinic_set_rx_vlan_offload(nic_dev,
+						!!(features &
+						   NETIF_F_HW_VLAN_CTAG_RX));
+
+	return err;
+}
+
 /**
  * nic_dev_init - Initialize the NIC device
  * @pdev: the NIC pci device
@@ -899,8 +967,8 @@
 		goto err_alloc_etherdev;
 	}
 
+	hinic_set_ethtool_ops(netdev);
 	netdev->netdev_ops = &hinic_netdev_ops;
-	netdev->ethtool_ops = &hinic_ethtool_ops;
 	netdev->max_mtu = ETH_MAX_MTU;
 
 	nic_dev = netdev_priv(netdev);
@@ -963,7 +1031,12 @@
 	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
 				nic_dev, link_status_event_handler);
 
+	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
+	if (err)
+		goto err_set_features;
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -973,6 +1046,7 @@
 	return 0;
 
 err_reg_netdev:
+err_set_features:
 	hinic_hwdev_cb_unregister(nic_dev->hwdev,
 				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
 	cancel_work_sync(&rx_mode_work->work);
@@ -1079,10 +1153,16 @@
 	dev_info(&pdev->dev, "HiNIC driver - removed\n");
 }
 
+static void hinic_shutdown(struct pci_dev *pdev)
+{
+	pci_disable_device(pdev);
+}
+
 static const struct pci_device_id hinic_pci_table[] = {
 	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE), 0},
-	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_25GE), 0},
 	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE), 0},
+	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ), 0},
+	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ), 0},
 	{ 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, hinic_pci_table);
@@ -1092,6 +1172,7 @@
 	.id_table       = hinic_pci_table,
 	.probe          = hinic_probe,
 	.remove         = hinic_remove,
+	.shutdown       = hinic_shutdown,
 };
 
 module_pci_driver(hinic_driver);

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 4d4e3f0..1e389a0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/types.h>
@@ -377,3 +368,703 @@
 
 	return 0;
 }
+
+/**
+ * hinic_port_set_tso - set port tso configuration
+ * @nic_dev: nic device
+ * @state: the tso state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_tso_config tso_cfg = {0};
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	tso_cfg.tso_en = state;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
+				 &tso_cfg, sizeof(tso_cfg),
+				 &tso_cfg, &out_size);
+	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set port tso, ret = %d\n",
+			tso_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
+{
+	struct hinic_checksum_offload rx_csum_cfg = {0};
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+	rx_csum_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rx_csum_cfg.rx_csum_offload = en;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_CSUM,
+				 &rx_csum_cfg, sizeof(rx_csum_cfg),
+				 &rx_csum_cfg, &out_size);
+	if (err || !out_size || rx_csum_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rx csum offload, ret = %d\n",
+			rx_csum_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_vlan_cfg vlan_cfg;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+	vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	vlan_cfg.vlan_rx_offload = en;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD,
+				 &vlan_cfg, sizeof(vlan_cfg),
+				 &vlan_cfg, &out_size);
+	if (err || !out_size || vlan_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, vlan_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_rq_num rq_num = { 0 };
+	u16 out_size = sizeof(rq_num);
+	int err;
+
+	rq_num.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rq_num.num_rqs = num_rqs;
+	rq_num.rq_depth = ilog2(HINIC_SQ_DEPTH);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RQ_IQ_MAP,
+				 &rq_num, sizeof(rq_num),
+				 &rq_num, &out_size);
+	if (err || !out_size || rq_num.status) {
+		dev_err(&pdev->dev,
+			"Failed to rxq number, ret = %d\n",
+			rq_num.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rx_lro(struct hinic_dev *nic_dev, u8 ipv4_en, u8 ipv6_en,
+			    u8 max_wqe_num)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_lro_config lro_cfg = { 0 };
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(lro_cfg);
+	int err;
+
+	lro_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	lro_cfg.lro_ipv4_en = ipv4_en;
+	lro_cfg.lro_ipv6_en = ipv6_en;
+	lro_cfg.lro_max_wqe_num = max_wqe_num;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO,
+				 &lro_cfg, sizeof(lro_cfg),
+				 &lro_cfg, &out_size);
+	if (err || !out_size || lro_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set lro offload, ret = %d\n",
+			lro_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_rx_lro_timer(struct hinic_dev *nic_dev, u32 timer_value)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_lro_timer lro_timer = { 0 };
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(lro_timer);
+	int err;
+
+	lro_timer.status = 0;
+	lro_timer.type = 0;
+	lro_timer.enable = 1;
+	lro_timer.timer = timer_value;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO_TIMER,
+				 &lro_timer, sizeof(lro_timer),
+				 &lro_timer, &out_size);
+	if (lro_timer.status == 0xFF) {
+		/* For this case, we think status (0xFF) is OK */
+		lro_timer.status = 0;
+		dev_dbg(&pdev->dev,
+			"Set lro timer not supported by the current FW version, it will be 1ms default\n");
+	}
+
+	if (err || !out_size || lro_timer.status) {
+		dev_err(&pdev->dev,
+			"Failed to set lro timer, ret = %d\n",
+			lro_timer.status);
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en,
+			   u32 lro_timer, u32 wqe_num)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u8 ipv4_en;
+	u8 ipv6_en;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	ipv4_en = lro_en ? 1 : 0;
+	ipv6_en = lro_en ? 1 : 0;
+
+	err = hinic_set_rx_lro(nic_dev, ipv4_en, ipv6_en, (u8)wqe_num);
+	if (err)
+		return err;
+
+	err = hinic_set_rx_lro_timer(nic_dev, lro_timer);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    const u32 *indir_table)
+{
+	struct hinic_rss_indirect_tbl *indir_tbl;
+	struct hinic_func_to_io *func_to_io;
+	struct hinic_cmdq_buf cmd_buf;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u32 indir_size;
+	u64 out_param;
+	int err, i;
+	u32 *temp;
+
+	hwdev = nic_dev->hwdev;
+	func_to_io = &hwdev->func_to_io;
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	cmd_buf.size = sizeof(*indir_tbl);
+
+	indir_tbl = cmd_buf.buf;
+	indir_tbl->group_index = cpu_to_be32(tmpl_idx);
+
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++) {
+		indir_tbl->entry[i] = indir_table[i];
+
+		if (0x3 == (i & 0x3)) {
+			temp = (u32 *)&indir_tbl->entry[i - 3];
+			*temp = cpu_to_be32(*temp);
+		}
+	}
+
+	/* cfg the rss indirect table by command queue */
+	indir_size = HINIC_RSS_INDIR_SIZE / 2;
+	indir_tbl->offset = 0;
+	indir_tbl->size = cpu_to_be32(indir_size);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+				     &cmd_buf, &out_param);
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss indir table\n");
+		err = -EFAULT;
+		goto free_buf;
+	}
+
+	indir_tbl->offset = cpu_to_be32(indir_size);
+	indir_tbl->size = cpu_to_be32(indir_size);
+	memcpy(&indir_tbl->entry[0], &indir_tbl->entry[indir_size], indir_size);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE,
+				     &cmd_buf, &out_param);
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss indir table\n");
+		err = -EFAULT;
+	}
+
+free_buf:
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+
+	return err;
+}
+
+int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    u32 *indir_table)
+{
+	struct hinic_rss_indir_table rss_cfg = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size = sizeof(rss_cfg);
+	int err = 0, i;
+
+	rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_cfg.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev,
+				 HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL,
+				 &rss_cfg, sizeof(rss_cfg), &rss_cfg,
+				 &out_size);
+	if (err || !out_size || rss_cfg.status) {
+		dev_err(&pdev->dev, "Failed to get indir table, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	hinic_be32_to_cpu(rss_cfg.indir, HINIC_RSS_INDIR_SIZE);
+	for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
+		indir_table[i] = rss_cfg.indir[i];
+
+	return 0;
+}
+
+int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type rss_type)
+{
+	struct hinic_rss_context_tbl *ctx_tbl;
+	struct hinic_func_to_io *func_to_io;
+	struct hinic_cmdq_buf cmd_buf;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u64 out_param;
+	u32 ctx = 0;
+	int err;
+
+	hwdev = nic_dev->hwdev;
+	func_to_io = &hwdev->func_to_io;
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmd buf\n");
+		return -ENOMEM;
+	}
+
+	ctx |=  HINIC_RSS_TYPE_SET(1, VALID) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv4, IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv6, IPV6) |
+		HINIC_RSS_TYPE_SET(rss_type.ipv6_ext, IPV6_EXT) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv4, TCP_IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6, TCP_IPV6) |
+		HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6_ext, TCP_IPV6_EXT) |
+		HINIC_RSS_TYPE_SET(rss_type.udp_ipv4, UDP_IPV4) |
+		HINIC_RSS_TYPE_SET(rss_type.udp_ipv6, UDP_IPV6);
+
+	cmd_buf.size = sizeof(struct hinic_rss_context_tbl);
+
+	ctx_tbl = (struct hinic_rss_context_tbl *)cmd_buf.buf;
+	ctx_tbl->group_index = cpu_to_be32(tmpl_idx);
+	ctx_tbl->offset = 0;
+	ctx_tbl->size = sizeof(u32);
+	ctx_tbl->size = cpu_to_be32(ctx_tbl->size);
+	ctx_tbl->rsvd = 0;
+	ctx_tbl->ctx = cpu_to_be32(ctx);
+
+	/* cfg the rss context table by command queue */
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE,
+				     &cmd_buf, &out_param);
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf);
+
+	if (err || out_param != 0) {
+		dev_err(&pdev->dev, "Failed to set rss context table, err: %d\n",
+			err);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type *rss_type)
+{
+	struct hinic_rss_context_table ctx_tbl = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(ctx_tbl);
+	int err;
+
+	if (!hwdev || !rss_type)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	ctx_tbl.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	ctx_tbl.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_CTX_TBL,
+				 &ctx_tbl, sizeof(ctx_tbl),
+				 &ctx_tbl, &out_size);
+	if (err || !out_size || ctx_tbl.status) {
+		dev_err(&pdev->dev, "Failed to get hash type, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, ctx_tbl.status, out_size);
+		return -EINVAL;
+	}
+
+	rss_type->ipv4          = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV4);
+	rss_type->ipv6          = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6);
+	rss_type->ipv6_ext      = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6_EXT);
+	rss_type->tcp_ipv4      = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV4);
+	rss_type->tcp_ipv6      = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV6);
+	rss_type->tcp_ipv6_ext  = HINIC_RSS_TYPE_GET(ctx_tbl.context,
+						     TCP_IPV6_EXT);
+	rss_type->udp_ipv4      = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV4);
+	rss_type->udp_ipv6      = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV6);
+
+	return 0;
+}
+
+int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id,
+			       const u8 *temp)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_rss_key rss_key = { 0 };
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_key.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_key.template_id = template_id;
+	memcpy(rss_key.key, temp, HINIC_RSS_KEY_SIZE);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL,
+				 &rss_key, sizeof(rss_key),
+				 &rss_key, &out_size);
+	if (err || !out_size || rss_key.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rss hash key, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_key.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			       u8 *temp)
+{
+	struct hinic_rss_template_key temp_key = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(temp_key);
+	int err;
+
+	if (!hwdev || !temp)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	temp_key.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	temp_key.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL,
+				 &temp_key, sizeof(temp_key),
+				 &temp_key, &out_size);
+	if (err || !out_size || temp_key.status) {
+		dev_err(&pdev->dev, "Failed to set hash key, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, temp_key.status, out_size);
+		return -EINVAL;
+	}
+
+	memcpy(temp, temp_key.key, HINIC_RSS_KEY_SIZE);
+
+	return 0;
+}
+
+int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id,
+			      u8 type)
+{
+	struct hinic_rss_engine_type rss_engine = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_engine.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_engine.hash_engine = type;
+	rss_engine.template_id = template_id;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_HASH_ENGINE,
+				 &rss_engine, sizeof(rss_engine),
+				 &rss_engine, &out_size);
+	if (err || !out_size || rss_engine.status) {
+		dev_err(&pdev->dev,
+			"Failed to set hash engine, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_engine.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx, u8 *type)
+{
+	struct hinic_rss_engine_type hash_type = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size = sizeof(hash_type);
+	int err;
+
+	if (!hwdev || !type)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	hash_type.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	hash_type.template_id = tmpl_idx;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_HASH_ENGINE,
+				 &hash_type, sizeof(hash_type),
+				 &hash_type, &out_size);
+	if (err || !out_size || hash_type.status) {
+		dev_err(&pdev->dev, "Failed to get hash engine, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, hash_type.status, out_size);
+		return -EINVAL;
+	}
+
+	*type = hash_type.hash_engine;
+	return 0;
+}
+
+int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_rss_config rss_cfg = { 0 };
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rss_cfg.rss_en = rss_en;
+	rss_cfg.template_id = template_id;
+	rss_cfg.rq_priority_number = 0;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_CFG,
+				 &rss_cfg, sizeof(rss_cfg),
+				 &rss_cfg, &out_size);
+	if (err || !out_size || rss_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rss cfg, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, rss_cfg.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx)
+{
+	struct hinic_rss_template_mgmt template_mgmt = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	template_mgmt.cmd = NIC_RSS_CMD_TEMP_ALLOC;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR,
+				 &template_mgmt, sizeof(template_mgmt),
+				 &template_mgmt, &out_size);
+	if (err || !out_size || template_mgmt.status) {
+		dev_err(&pdev->dev, "Failed to alloc rss template, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, template_mgmt.status, out_size);
+		return -EINVAL;
+	}
+
+	*tmpl_idx = template_mgmt.template_id;
+
+	return 0;
+}
+
+int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx)
+{
+	struct hinic_rss_template_mgmt template_mgmt = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	template_mgmt.template_id = tmpl_idx;
+	template_mgmt.cmd = NIC_RSS_CMD_TEMP_FREE;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR,
+				 &template_mgmt, sizeof(template_mgmt),
+				 &template_mgmt, &out_size);
+	if (err || !out_size || template_mgmt.status) {
+		dev_err(&pdev->dev, "Failed to free rss template, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, template_mgmt.status, out_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_get_vport_stats(struct hinic_dev *nic_dev,
+			  struct hinic_vport_stats *stats)
+{
+	struct hinic_cmd_vport_stats vport_stats = { 0 };
+	struct hinic_port_stats_info stats_info = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(vport_stats);
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	stats_info.stats_version = HINIC_PORT_STATS_VERSION;
+	stats_info.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	stats_info.stats_size = sizeof(vport_stats);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_VPORT_STAT,
+				 &stats_info, sizeof(stats_info),
+				 &vport_stats, &out_size);
+	if (err || !out_size || vport_stats.status) {
+		dev_err(&pdev->dev,
+			"Failed to get function statistics, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, vport_stats.status, out_size);
+		return -EFAULT;
+	}
+
+	memcpy(stats, &vport_stats.stats, sizeof(*stats));
+	return 0;
+}
+
+int hinic_get_phy_port_stats(struct hinic_dev *nic_dev,
+			     struct hinic_phy_port_stats *stats)
+{
+	struct hinic_port_stats_info stats_info = { 0 };
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_port_stats *port_stats;
+	u16 out_size = sizeof(*port_stats);
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL);
+	if (!port_stats)
+		return -ENOMEM;
+
+	stats_info.stats_version = HINIC_PORT_STATS_VERSION;
+	stats_info.stats_size = sizeof(*port_stats);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_PORT_STATISTICS,
+				 &stats_info, sizeof(stats_info),
+				 port_stats, &out_size);
+	if (err || !out_size || port_stats->status) {
+		dev_err(&pdev->dev,
+			"Failed to get port statistics, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, port_stats->status, out_size);
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(stats, &port_stats->stats, sizeof(*stats));
+
+out:
+	kfree(port_stats);
+
+	return err;
+}
+
+int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_version_info up_ver = {0};
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_MGMT_VERSION,
+				 &up_ver, sizeof(up_ver), &up_ver,
+				 &out_size);
+	if (err || !out_size || up_ver.status) {
+		dev_err(&pdev->dev,
+			"Failed to get mgmt version, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, up_ver.status, out_size);
+		return -EINVAL;
+	}
+
+	snprintf(mgmt_ver, HINIC_MGMT_VERSION_MAX_LEN, "%s", up_ver.ver);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 9404365..44772fd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_PORT_H
@@ -22,6 +13,22 @@
 
 #include "hinic_dev.h"
 
+#define HINIC_RSS_KEY_SIZE	40
+#define HINIC_RSS_INDIR_SIZE	256
+#define HINIC_PORT_STATS_VERSION	0
+#define HINIC_FW_VERSION_NAME	16
+#define HINIC_COMPILE_TIME_LEN	20
+#define HINIC_MGMT_VERSION_MAX_LEN	32
+
+struct hinic_version_info {
+	u8 status;
+	u8 version;
+	u8 rsvd[6];
+
+	u8 ver[HINIC_FW_VERSION_NAME];
+	u8 time[HINIC_COMPILE_TIME_LEN];
+};
+
 enum hinic_rx_mode {
 	HINIC_RX_MODE_UC        = BIT(0),
 	HINIC_RX_MODE_MC        = BIT(1),
@@ -72,6 +79,11 @@
 	HINIC_SPEED_UNKNOWN = 0xFF,
 };
 
+enum hinic_tso_state {
+	HINIC_TSO_DISABLE = 0,
+	HINIC_TSO_ENABLE  = 1,
+};
+
 struct hinic_port_mac_cmd {
 	u8              status;
 	u8              version;
@@ -167,6 +179,333 @@
 	u8      rsvd2[3];
 };
 
+struct hinic_tso_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u8	tso_en;
+	u8	resv2[3];
+};
+
+struct hinic_checksum_offload {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	rx_csum_offload;
+};
+
+struct hinic_rq_num {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1[33];
+	u32	num_rqs;
+	u32	rq_depth;
+};
+
+struct hinic_lro_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u8	lro_ipv4_en;
+	u8	lro_ipv6_en;
+	u8	lro_max_wqe_num;
+	u8	resv2[13];
+};
+
+struct hinic_lro_timer {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u8	type;   /* 0: set timer value, 1: get timer value */
+	u8	enable; /* when set lro time, enable should be 1 */
+	u16	rsvd1;
+	u32	timer;
+};
+
+struct hinic_vlan_cfg {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      vlan_rx_offload;
+	u8      rsvd1[5];
+};
+
+struct hinic_rss_template_mgmt {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	cmd;
+	u8	template_id;
+	u8	rsvd1[4];
+};
+
+struct hinic_rss_template_key {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	rsvd1;
+	u8	key[HINIC_RSS_KEY_SIZE];
+};
+
+struct hinic_rss_context_tbl {
+	u32 group_index;
+	u32 offset;
+	u32 size;
+	u32 rsvd;
+	u32 ctx;
+};
+
+struct hinic_rss_context_table {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      template_id;
+	u8      rsvd1;
+	u32     context;
+};
+
+struct hinic_rss_indirect_tbl {
+	u32 group_index;
+	u32 offset;
+	u32 size;
+	u32 rsvd;
+	u8 entry[HINIC_RSS_INDIR_SIZE];
+};
+
+struct hinic_rss_indir_table {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_id;
+	u8      template_id;
+	u8      rsvd1;
+	u8      indir[HINIC_RSS_INDIR_SIZE];
+};
+
+struct hinic_rss_key {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	rsvd1;
+	u8	key[HINIC_RSS_KEY_SIZE];
+};
+
+struct hinic_rss_engine_type {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	template_id;
+	u8	hash_engine;
+	u8	rsvd1[4];
+};
+
+struct hinic_rss_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u8	rss_en;
+	u8	template_id;
+	u8	rq_priority_number;
+	u8	rsvd1[11];
+};
+
+struct hinic_stats {
+	char name[ETH_GSTRING_LEN];
+	u32 size;
+	int offset;
+};
+
+struct hinic_vport_stats {
+	u64 tx_unicast_pkts_vport;
+	u64 tx_unicast_bytes_vport;
+	u64 tx_multicast_pkts_vport;
+	u64 tx_multicast_bytes_vport;
+	u64 tx_broadcast_pkts_vport;
+	u64 tx_broadcast_bytes_vport;
+
+	u64 rx_unicast_pkts_vport;
+	u64 rx_unicast_bytes_vport;
+	u64 rx_multicast_pkts_vport;
+	u64 rx_multicast_bytes_vport;
+	u64 rx_broadcast_pkts_vport;
+	u64 rx_broadcast_bytes_vport;
+
+	u64 tx_discard_vport;
+	u64 rx_discard_vport;
+	u64 tx_err_vport;
+	u64 rx_err_vport;
+};
+
+struct hinic_phy_port_stats {
+	u64 mac_rx_total_pkt_num;
+	u64 mac_rx_total_oct_num;
+	u64 mac_rx_bad_pkt_num;
+	u64 mac_rx_bad_oct_num;
+	u64 mac_rx_good_pkt_num;
+	u64 mac_rx_good_oct_num;
+	u64 mac_rx_uni_pkt_num;
+	u64 mac_rx_multi_pkt_num;
+	u64 mac_rx_broad_pkt_num;
+
+	u64 mac_tx_total_pkt_num;
+	u64 mac_tx_total_oct_num;
+	u64 mac_tx_bad_pkt_num;
+	u64 mac_tx_bad_oct_num;
+	u64 mac_tx_good_pkt_num;
+	u64 mac_tx_good_oct_num;
+	u64 mac_tx_uni_pkt_num;
+	u64 mac_tx_multi_pkt_num;
+	u64 mac_tx_broad_pkt_num;
+
+	u64 mac_rx_fragment_pkt_num;
+	u64 mac_rx_undersize_pkt_num;
+	u64 mac_rx_undermin_pkt_num;
+	u64 mac_rx_64_oct_pkt_num;
+	u64 mac_rx_65_127_oct_pkt_num;
+	u64 mac_rx_128_255_oct_pkt_num;
+	u64 mac_rx_256_511_oct_pkt_num;
+	u64 mac_rx_512_1023_oct_pkt_num;
+	u64 mac_rx_1024_1518_oct_pkt_num;
+	u64 mac_rx_1519_2047_oct_pkt_num;
+	u64 mac_rx_2048_4095_oct_pkt_num;
+	u64 mac_rx_4096_8191_oct_pkt_num;
+	u64 mac_rx_8192_9216_oct_pkt_num;
+	u64 mac_rx_9217_12287_oct_pkt_num;
+	u64 mac_rx_12288_16383_oct_pkt_num;
+	u64 mac_rx_1519_max_bad_pkt_num;
+	u64 mac_rx_1519_max_good_pkt_num;
+	u64 mac_rx_oversize_pkt_num;
+	u64 mac_rx_jabber_pkt_num;
+
+	u64 mac_rx_pause_num;
+	u64 mac_rx_pfc_pkt_num;
+	u64 mac_rx_pfc_pri0_pkt_num;
+	u64 mac_rx_pfc_pri1_pkt_num;
+	u64 mac_rx_pfc_pri2_pkt_num;
+	u64 mac_rx_pfc_pri3_pkt_num;
+	u64 mac_rx_pfc_pri4_pkt_num;
+	u64 mac_rx_pfc_pri5_pkt_num;
+	u64 mac_rx_pfc_pri6_pkt_num;
+	u64 mac_rx_pfc_pri7_pkt_num;
+	u64 mac_rx_control_pkt_num;
+	u64 mac_rx_y1731_pkt_num;
+	u64 mac_rx_sym_err_pkt_num;
+	u64 mac_rx_fcs_err_pkt_num;
+	u64 mac_rx_send_app_good_pkt_num;
+	u64 mac_rx_send_app_bad_pkt_num;
+
+	u64 mac_tx_fragment_pkt_num;
+	u64 mac_tx_undersize_pkt_num;
+	u64 mac_tx_undermin_pkt_num;
+	u64 mac_tx_64_oct_pkt_num;
+	u64 mac_tx_65_127_oct_pkt_num;
+	u64 mac_tx_128_255_oct_pkt_num;
+	u64 mac_tx_256_511_oct_pkt_num;
+	u64 mac_tx_512_1023_oct_pkt_num;
+	u64 mac_tx_1024_1518_oct_pkt_num;
+	u64 mac_tx_1519_2047_oct_pkt_num;
+	u64 mac_tx_2048_4095_oct_pkt_num;
+	u64 mac_tx_4096_8191_oct_pkt_num;
+	u64 mac_tx_8192_9216_oct_pkt_num;
+	u64 mac_tx_9217_12287_oct_pkt_num;
+	u64 mac_tx_12288_16383_oct_pkt_num;
+	u64 mac_tx_1519_max_bad_pkt_num;
+	u64 mac_tx_1519_max_good_pkt_num;
+	u64 mac_tx_oversize_pkt_num;
+	u64 mac_tx_jabber_pkt_num;
+
+	u64 mac_tx_pause_num;
+	u64 mac_tx_pfc_pkt_num;
+	u64 mac_tx_pfc_pri0_pkt_num;
+	u64 mac_tx_pfc_pri1_pkt_num;
+	u64 mac_tx_pfc_pri2_pkt_num;
+	u64 mac_tx_pfc_pri3_pkt_num;
+	u64 mac_tx_pfc_pri4_pkt_num;
+	u64 mac_tx_pfc_pri5_pkt_num;
+	u64 mac_tx_pfc_pri6_pkt_num;
+	u64 mac_tx_pfc_pri7_pkt_num;
+	u64 mac_tx_control_pkt_num;
+	u64 mac_tx_y1731_pkt_num;
+	u64 mac_tx_1588_pkt_num;
+	u64 mac_tx_err_all_pkt_num;
+	u64 mac_tx_from_app_good_pkt_num;
+	u64 mac_tx_from_app_bad_pkt_num;
+
+	u64 mac_rx_higig2_ext_pkt_num;
+	u64 mac_rx_higig2_message_pkt_num;
+	u64 mac_rx_higig2_error_pkt_num;
+	u64 mac_rx_higig2_cpu_ctrl_pkt_num;
+	u64 mac_rx_higig2_unicast_pkt_num;
+	u64 mac_rx_higig2_broadcast_pkt_num;
+	u64 mac_rx_higig2_l2_multicast_pkt_num;
+	u64 mac_rx_higig2_l3_multicast_pkt_num;
+
+	u64 mac_tx_higig2_message_pkt_num;
+	u64 mac_tx_higig2_ext_pkt_num;
+	u64 mac_tx_higig2_cpu_ctrl_pkt_num;
+	u64 mac_tx_higig2_unicast_pkt_num;
+	u64 mac_tx_higig2_broadcast_pkt_num;
+	u64 mac_tx_higig2_l2_multicast_pkt_num;
+	u64 mac_tx_higig2_l3_multicast_pkt_num;
+};
+
+struct hinic_port_stats_info {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	stats_version;
+	u32	stats_size;
+};
+
+struct hinic_port_stats {
+	u8 status;
+	u8 version;
+	u8 rsvd[6];
+
+	struct hinic_phy_port_stats stats;
+};
+
+struct hinic_cmd_vport_stats {
+	u8 status;
+	u8 version;
+	u8 rsvd0[6];
+
+	struct hinic_vport_stats stats;
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -195,4 +534,55 @@
 int hinic_port_get_cap(struct hinic_dev *nic_dev,
 		       struct hinic_port_cap *port_cap);
 
+int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs);
+
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
+
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en);
+
+int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en,
+			   u32 lro_timer, u32 wqe_num);
+
+int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type rss_type);
+
+int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    const u32 *indir_table);
+
+int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id,
+			       const u8 *temp);
+
+int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id,
+			      u8 type);
+
+int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id);
+
+int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx);
+
+int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx);
+
+void hinic_set_ethtool_ops(struct net_device *netdev);
+
+int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
+		       struct hinic_rss_type *rss_type);
+
+int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			    u32 *indir_table);
+
+int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
+			       u8 *temp);
+
+int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx,
+			      u8 *type);
+
+int hinic_get_phy_port_stats(struct hinic_dev *nic_dev,
+			     struct hinic_phy_port_stats *stats);
+
+int hinic_get_vport_stats(struct hinic_dev *nic_dev,
+			  struct hinic_vport_stats *stats);
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en);
+
+int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver);
+
 #endif

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 4c0f7ed..56ea6d6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -27,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
 #include <linux/cpumask.h>
+#include <linux/if_vlan.h>
 #include <asm/barrier.h>
 
 #include "hinic_common.h"
@@ -43,6 +35,16 @@
 #define RX_IRQ_NO_LLI_TIMER             0
 #define RX_IRQ_NO_CREDIT                0
 #define RX_IRQ_NO_RESEND_TIMER          0
+#define HINIC_RX_BUFFER_WRITE           16
+
+#define HINIC_RX_IPV6_PKT		7
+#define LRO_PKT_HDR_LEN_IPV4		66
+#define LRO_PKT_HDR_LEN_IPV6		86
+#define LRO_REPLENISH_THLD		256
+
+#define LRO_PKT_HDR_LEN(cqe)		\
+	(HINIC_GET_RX_PKT_TYPE(be32_to_cpu((cqe)->offload_type)) == \
+	 HINIC_RX_IPV6_PKT ? LRO_PKT_HDR_LEN_IPV6 : LRO_PKT_HDR_LEN_IPV4)
 
 /**
  * hinic_rxq_clean_stats - Clean the statistics of specific queue
@@ -55,6 +57,9 @@
 	u64_stats_update_begin(&rxq_stats->syncp);
 	rxq_stats->pkts  = 0;
 	rxq_stats->bytes = 0;
+	rxq_stats->errors = 0;
+	rxq_stats->csum_errors = 0;
+	rxq_stats->other_errors = 0;
 	u64_stats_update_end(&rxq_stats->syncp);
 }
 
@@ -73,6 +78,10 @@
 		start = u64_stats_fetch_begin(&rxq_stats->syncp);
 		stats->pkts = rxq_stats->pkts;
 		stats->bytes = rxq_stats->bytes;
+		stats->errors = rxq_stats->csum_errors +
+				rxq_stats->other_errors;
+		stats->csum_errors = rxq_stats->csum_errors;
+		stats->other_errors = rxq_stats->other_errors;
 	} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
 	u64_stats_update_end(&stats->syncp);
 }
@@ -89,6 +98,26 @@
 	hinic_rxq_clean_stats(rxq);
 }
 
+static void rx_csum(struct hinic_rxq *rxq, u32 status,
+		    struct sk_buff *skb)
+{
+	struct net_device *netdev = rxq->netdev;
+	u32 csum_err;
+
+	csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (!csum_err) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		if (!(csum_err & (HINIC_RX_CSUM_HW_CHECK_NONE |
+			HINIC_RX_CSUM_IPSU_OTHER_ERR)))
+			rxq->rxq_stats.csum_errors++;
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+}
 /**
  * rx_alloc_skb - allocate skb and map it to dma address
  * @rxq: rx queue
@@ -209,7 +238,6 @@
 		hinic_rq_update(rxq->rq, prod_idx);
 	}
 
-	tasklet_schedule(&rxq->rx_task);
 	return i;
 }
 
@@ -237,17 +265,6 @@
 }
 
 /**
- * rx_alloc_task - tasklet for queue allocation
- * @data: rx queue
- **/
-static void rx_alloc_task(unsigned long data)
-{
-	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
-
-	(void)rx_alloc_pkts(rxq);
-}
-
-/**
  * rx_recv_jumbo_pkt - Rx handler for jumbo pkt
  * @rxq: rx queue
  * @head_skb: the first skb in the list
@@ -309,12 +326,21 @@
 static int rxq_recv(struct hinic_rxq *rxq, int budget)
 {
 	struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
+	struct net_device *netdev = rxq->netdev;
 	u64 pkt_len = 0, rx_bytes = 0;
+	struct hinic_rq *rq = rxq->rq;
 	struct hinic_rq_wqe *rq_wqe;
+	unsigned int free_wqebbs;
+	struct hinic_rq_cqe *cqe;
 	int num_wqes, pkts = 0;
 	struct hinic_sge sge;
+	unsigned int status;
 	struct sk_buff *skb;
-	u16 ci;
+	u32 offload_type;
+	u16 ci, num_lro;
+	u16 num_wqe = 0;
+	u32 vlan_len;
+	u16 vid;
 
 	while (pkts < budget) {
 		num_wqes = 0;
@@ -324,10 +350,14 @@
 		if (!rq_wqe)
 			break;
 
+		cqe = rq->cqe[ci];
+		status =  be32_to_cpu(cqe->status);
 		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
 
 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
 
+		rx_csum(rxq, status, skb);
+
 		prefetch(skb->data);
 
 		pkt_len = sge.len;
@@ -340,9 +370,17 @@
 						     HINIC_RX_BUF_SZ, ci);
 		}
 
-		hinic_rq_put_wqe(rxq->rq, ci,
+		hinic_rq_put_wqe(rq, ci,
 				 (num_wqes + 1) * HINIC_RQ_WQE_SIZE);
 
+		offload_type = be32_to_cpu(cqe->offload_type);
+		vlan_len = be32_to_cpu(cqe->len);
+		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) {
+			vid = HINIC_GET_RX_VLAN_TAG(vlan_len);
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+		}
+
 		skb_record_rx_queue(skb, qp->q_id);
 		skb->protocol = eth_type_trans(skb, rxq->netdev);
 
@@ -350,10 +388,26 @@
 
 		pkts++;
 		rx_bytes += pkt_len;
+
+		num_lro = HINIC_GET_RX_NUM_LRO(status);
+		if (num_lro) {
+			rx_bytes += ((num_lro - 1) *
+				     LRO_PKT_HDR_LEN(cqe));
+
+			num_wqe +=
+			(u16)(pkt_len >> rxq->rx_buff_shift) +
+			((pkt_len & (rxq->buf_len - 1)) ? 1 : 0);
+		}
+
+		cqe->status = 0;
+
+		if (num_wqe >= LRO_REPLENISH_THLD)
+			break;
 	}
 
-	if (pkts)
-		tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */
+	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
+	if (free_wqebbs > HINIC_RX_BUFFER_WRITE)
+		rx_alloc_pkts(rxq);
 
 	u64_stats_update_begin(&rxq->rxq_stats.syncp);
 	rxq->rxq_stats.pkts += pkts;
@@ -366,6 +420,7 @@
 static int rx_poll(struct napi_struct *napi, int budget)
 {
 	struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi);
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
 	struct hinic_rq *rq = rxq->rq;
 	int pkts;
 
@@ -374,7 +429,10 @@
 		return budget;
 
 	napi_complete(napi);
-	enable_irq(rq->irq);
+	hinic_hwdev_set_msix_state(nic_dev->hwdev,
+				   rq->msix_entry,
+				   HINIC_MSIX_ENABLE);
+
 	return pkts;
 }
 
@@ -399,7 +457,10 @@
 	struct hinic_dev *nic_dev;
 
 	/* Disable the interrupt until napi will be completed */
-	disable_irq_nosync(rq->irq);
+	nic_dev = netdev_priv(rxq->netdev);
+	hinic_hwdev_set_msix_state(nic_dev->hwdev,
+				   rq->msix_entry,
+				   HINIC_MSIX_DISABLE);
 
 	nic_dev = netdev_priv(rxq->netdev);
 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
@@ -456,22 +517,20 @@
 		   struct net_device *netdev)
 {
 	struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq);
-	int err, pkts, irqname_len;
+	int err, pkts;
 
 	rxq->netdev = netdev;
 	rxq->rq = rq;
+	rxq->buf_len = HINIC_RX_BUF_SZ;
+	rxq->rx_buff_shift = ilog2(HINIC_RX_BUF_SZ);
 
 	rxq_stats_init(rxq);
 
-	irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1;
-	rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+	rxq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL,
+				       "hinic_rxq%d", qp->q_id);
 	if (!rxq->irq_name)
 		return -ENOMEM;
 
-	sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id);
-
-	tasklet_init(&rxq->rx_task, rx_alloc_task, (unsigned long)rxq);
-
 	pkts = rx_alloc_pkts(rxq);
 	if (!pkts) {
 		err = -ENOMEM;
@@ -488,7 +547,6 @@
 
 err_req_rx_irq:
 err_rx_pkts:
-	tasklet_kill(&rxq->rx_task);
 	free_all_rx_skbs(rxq);
 	devm_kfree(&netdev->dev, rxq->irq_name);
 	return err;
@@ -504,7 +562,6 @@
 
 	rx_free_irq(rxq);
 
-	tasklet_kill(&rxq->rx_task);
 	free_all_rx_skbs(rxq);
 	devm_kfree(&netdev->dev, rxq->irq_name);
 }

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
index 27c9af4..507dcba 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_RX_H
@@ -23,10 +14,17 @@
 
 #include "hinic_hw_qp.h"
 
+#define HINIC_RX_CSUM_OFFLOAD_EN	0xFFF
+#define HINIC_RX_CSUM_HW_CHECK_NONE	BIT(7)
+#define HINIC_RX_CSUM_IPSU_OTHER_ERR	BIT(8)
+
 struct hinic_rxq_stats {
 	u64                     pkts;
 	u64                     bytes;
-
+	u64			errors;
+	u64			csum_errors;
+	u64			other_errors;
+	u64			alloc_skb_err;
 	struct u64_stats_sync   syncp;
 };
 
@@ -37,8 +35,8 @@
 	struct hinic_rxq_stats  rxq_stats;
 
 	char                    *irq_name;
-
-	struct tasklet_struct   rx_task;
+	u16			buf_len;
+	u32			rx_buff_shift;
 
 	struct napi_struct      napi;
 };

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index c5fca03..0e13d1c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #include <linux/kernel.h>
@@ -26,6 +17,13 @@
 #include <linux/skbuff.h>
 #include <linux/smp.h>
 #include <asm/byteorder.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 
 #include "hinic_common.h"
 #include "hinic_hw_if.h"
@@ -45,9 +43,31 @@
 #define CI_UPDATE_NO_PENDING            0
 #define CI_UPDATE_NO_COALESC            0
 
-#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
+#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
 
-#define MIN_SKB_LEN             64
+#define MIN_SKB_LEN                     17
+
+#define	MAX_PAYLOAD_OFFSET	        221
+#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
+
+union hinic_l3 {
+	struct iphdr *v4;
+	struct ipv6hdr *v6;
+	unsigned char *hdr;
+};
+
+union hinic_l4 {
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	unsigned char *hdr;
+};
+
+enum hinic_offload_type {
+	TX_OFFLOAD_TSO     = BIT(0),
+	TX_OFFLOAD_CSUM    = BIT(1),
+	TX_OFFLOAD_VLAN    = BIT(2),
+	TX_OFFLOAD_INVALID = BIT(3),
+};
 
 /**
  * hinic_txq_clean_stats - Clean the statistics of specific queue
@@ -63,6 +83,7 @@
 	txq_stats->tx_busy = 0;
 	txq_stats->tx_wake = 0;
 	txq_stats->tx_dropped = 0;
+	txq_stats->big_frags_pkts = 0;
 	u64_stats_update_end(&txq_stats->syncp);
 }
 
@@ -84,6 +105,7 @@
 		stats->tx_busy = txq_stats->tx_busy;
 		stats->tx_wake = txq_stats->tx_wake;
 		stats->tx_dropped = txq_stats->tx_dropped;
+		stats->big_frags_pkts = txq_stats->big_frags_pkts;
 	} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
 	u64_stats_update_end(&stats->syncp);
 }
@@ -114,7 +136,7 @@
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	dma_addr_t dma_addr;
 	int i, j;
 
@@ -175,18 +197,280 @@
 			 DMA_TO_DEVICE);
 }
 
+static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
+				 union hinic_l4 *l4,
+				 enum hinic_offload_type offload_type,
+				 enum hinic_l3_offload_type *l3_type,
+				 u8 *l4_proto)
+{
+	u8 *exthdr;
+
+	if (ip->v4->version == 4) {
+		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
+			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
+			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		*l4_proto = ip->v4->protocol;
+	} else if (ip->v4->version == 6) {
+		*l3_type = IPV6_PKT;
+		exthdr = ip->hdr + sizeof(*ip->v6);
+		*l4_proto = ip->v6->nexthdr;
+		if (exthdr != l4->hdr) {
+			int start = exthdr - skb->data;
+			__be16 frag_off;
+
+			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
+		}
+	} else {
+		*l3_type = L3TYPE_UNKNOWN;
+		*l4_proto = 0;
+	}
+}
+
+static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
+			      enum hinic_offload_type offload_type, u8 l4_proto,
+			      enum hinic_l4_offload_type *l4_offload,
+			      u32 *l4_len, u32 *offset)
+{
+	*l4_offload = OFFLOAD_DISABLE;
+	*offset = 0;
+	*l4_len = 0;
+
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		*l4_offload = TCP_OFFLOAD_ENABLE;
+		/* doff in unit of 4B */
+		*l4_len = l4->tcp->doff * 4;
+		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_UDP:
+		*l4_offload = UDP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct udphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_SCTP:
+		/* only csum offload support sctp */
+		if (offload_type != TX_OFFLOAD_CSUM)
+			break;
+
+		*l4_offload = SCTP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct sctphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
+{
+	return (ip->v4->version == 4) ?
+		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
+		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
+}
+
+static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
+		       struct sk_buff *skb)
+{
+	u32 offset, l4_len, ip_identify, network_hdr_len;
+	enum hinic_l3_offload_type l3_offload;
+	enum hinic_l4_offload_type l4_offload;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	if (skb_cow_head(skb, 0) < 0)
+		return -EPROTONOSUPPORT;
+
+	if (skb->encapsulation) {
+		u32 gso_type = skb_shinfo(skb)->gso_type;
+		u32 tunnel_type = 0;
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		} else if (ip.v4->version == 6) {
+			l3_offload = IPV6_PKT;
+		} else {
+			l3_offload = 0;
+		}
+
+		hinic_task_set_outter_l3(task, l3_offload,
+					 skb_network_header_len(skb));
+
+		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
+			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
+			tunnel_type = TUNNEL_UDP_CSUM;
+		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
+			tunnel_type = TUNNEL_UDP_NO_CSUM;
+		}
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	/* initialize inner IP header fields */
+	if (ip.v4->version == 4)
+		ip.v4->tot_len = 0;
+	else
+		ip.v6->payload_len = 0;
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
+
+	ip_identify = 0;
+	if (l4_proto == IPPROTO_TCP)
+		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
+			       ip_identify, skb_shinfo(skb)->gso_size);
+
+	return 1;
+}
+
+static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
+			struct sk_buff *skb)
+{
+	enum hinic_l4_offload_type l4_offload;
+	u32 offset, l4_len, network_hdr_len;
+	enum hinic_l3_offload_type l3_type;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (skb->encapsulation) {
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+
+		if (ip.v4->version == 4)
+			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
+		else if (ip.v4->version == 6)
+			l3_type = IPV6_PKT;
+		else
+			l3_type = L3TYPE_UNKNOWN;
+
+		hinic_task_set_outter_l3(task, l3_type,
+					 skb_network_header_len(skb));
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+
+		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
+					 l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
+
+	return 1;
+}
+
+static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info,
+			 u16 vlan_tag, u16 vlan_pri)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
+				HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI);
+}
+
+static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
+			    u32 *queue_info)
+{
+	enum hinic_offload_type offload = 0;
+	u16 vlan_tag;
+	int enabled;
+
+	enabled = offload_tso(task, queue_info, skb);
+	if (enabled > 0) {
+		offload |= TX_OFFLOAD_TSO;
+	} else if (enabled == 0) {
+		enabled = offload_csum(task, queue_info, skb);
+		if (enabled)
+			offload |= TX_OFFLOAD_CSUM;
+	} else {
+		return -EPROTONOSUPPORT;
+	}
+
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		vlan_tag = skb_vlan_tag_get(skb);
+		offload_vlan(task, queue_info, vlan_tag,
+			     vlan_tag >> VLAN_PRIO_SHIFT);
+		offload |= TX_OFFLOAD_VLAN;
+	}
+
+	if (offload)
+		hinic_task_set_l2hdr(task, skb_network_offset(skb));
+
+	/* payload offset should not more than 221 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
+	    MAX_PAYLOAD_OFFSET) {
+		return -EPROTONOSUPPORT;
+	}
+
+	/* mss should not less than 80 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
+		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
+	}
+
+	return 0;
+}
+
 netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 prod_idx, q_id = skb->queue_mapping;
 	struct netdev_queue *netdev_txq;
 	int nr_sges, err = NETDEV_TX_OK;
 	struct hinic_sq_wqe *sq_wqe;
 	unsigned int wqe_size;
 	struct hinic_txq *txq;
 	struct hinic_qp *qp;
-	u16 prod_idx;
 
-	txq = &nic_dev->txqs[skb->queue_mapping];
+	txq = &nic_dev->txqs[q_id];
 	qp = container_of(txq->sq, struct hinic_qp, sq);
 
 	if (skb->len < MIN_SKB_LEN) {
@@ -199,6 +483,12 @@
 	}
 
 	nr_sges = skb_shinfo(skb)->nr_frags + 1;
+	if (nr_sges > 17) {
+		u64_stats_update_begin(&txq->txq_stats.syncp);
+		txq->txq_stats.big_frags_pkts++;
+		u64_stats_update_end(&txq->txq_stats.syncp);
+	}
+
 	if (nr_sges > txq->max_sges) {
 		netdev_err(netdev, "Too many Tx sges\n");
 		goto skb_error;
@@ -236,15 +526,23 @@
 process_sq_wqe:
 	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
 
+	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
+	if (err)
+		goto offload_error;
+
 	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
 
 flush_skbs:
-	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
-	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
+	netdev_txq = netdev_get_tx_queue(netdev, q_id);
+	if ((!netdev_xmit_more()) || (netif_xmit_stopped(netdev_txq)))
 		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
 
 	return err;
 
+offload_error:
+	hinic_sq_return_wqe(txq->sq, wqe_size);
+	tx_unmap_skb(nic_dev, skb, txq->sges);
+
 skb_error:
 	dev_kfree_skb_any(skb);
 
@@ -252,7 +550,8 @@
 	u64_stats_update_begin(&txq->txq_stats.syncp);
 	txq->txq_stats.tx_dropped++;
 	u64_stats_update_end(&txq->txq_stats.syncp);
-	return err;
+
+	return NETDEV_TX_OK;
 }
 
 /**
@@ -372,7 +671,9 @@
 
 	if (pkts < budget) {
 		napi_complete(napi);
-		enable_irq(sq->irq);
+		hinic_hwdev_set_msix_state(nic_dev->hwdev,
+					   sq->msix_entry,
+					   HINIC_MSIX_ENABLE);
 		return pkts;
 	}
 
@@ -399,7 +700,9 @@
 	nic_dev = netdev_priv(txq->netdev);
 
 	/* Disable the interrupt until napi will be completed */
-	disable_irq_nosync(txq->sq->irq);
+	hinic_hwdev_set_msix_state(nic_dev->hwdev,
+				   txq->sq->msix_entry,
+				   HINIC_MSIX_DISABLE);
 
 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry);
 

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.h b/drivers/net/ethernet/huawei/hinic/hinic_tx.h
index 1fa55dc..f158b7d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Huawei HiNIC PCI Express Linux driver
  * Copyright(c) 2017 Huawei Technologies Co., Ltd
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
  */
 
 #ifndef HINIC_TX_H
@@ -30,6 +21,7 @@
 	u64     tx_busy;
 	u64     tx_wake;
 	u64     tx_dropped;
+	u64	big_frags_pkts;
 
 	struct u64_stats_sync   syncp;
 };

diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index d719668..9292975 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c

@@ -1310,7 +1310,7 @@
 						dev->stats.tx_aborted_errors++;
 				}
 
-				dev_kfree_skb_irq(skb);
+				dev_consume_skb_irq(skb);
 
 				tx_cmd->cmd.command = 0; /* Mark free */
 				break;

diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig
index e8d61f6..33faff9 100644
--- a/drivers/net/ethernet/i825xx/Kconfig
+++ b/drivers/net/ethernet/i825xx/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Intel 82596/82593/82596 network device configuration
 #

diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index 35f6291..bb3b8ad 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/drivers/acorn/net/ether1.c
  *
  *  Copyright (C) 1996-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  Acorn ether1 driver (82586 chip) for Acorn machines
  *
  * We basically keep two queues in the cards memory - one for transmit

diff --git a/drivers/net/ethernet/i825xx/ether1.h b/drivers/net/ethernet/i825xx/ether1.h
index 3a5830a..3926e04 100644
--- a/drivers/net/ethernet/i825xx/ether1.h
+++ b/drivers/net/ethernet/i825xx/ether1.h

@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/drivers/acorn/net/ether1.h
  *
  *  Copyright (C) 1996 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  Network driver for Acorn Ether1 cards.
  */
 

diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c
index b69c622..aec7e98 100644
--- a/drivers/net/ethernet/i825xx/lasi_82596.c
+++ b/drivers/net/ethernet/i825xx/lasi_82596.c

@@ -96,6 +96,8 @@
 
 #define OPT_SWAP_PORT	0x0001	/* Need to wordswp on the MPU port */
 
+#define LIB82596_DMA_ATTR	DMA_ATTR_NON_CONSISTENT
+
 #define DMA_WBACK(ndev, addr, len) \
 	do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0)
 
@@ -105,7 +107,7 @@
 #define DMA_WBACK_INV(ndev, addr, len) \
 	do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_BIDIRECTIONAL); } while (0)
 
-#define SYSBUS      0x0000006c;
+#define SYSBUS      0x0000006c
 
 /* big endian CPU, 82596 "big" endian mode */
 #define SWAP32(x)   (((u32)(x)<<16) | ((((u32)(x)))>>16))
@@ -141,7 +143,8 @@
 	}
 
 	gsc_writel(a, dev->base_addr + PA_CPU_PORT_L_ACCESS);
-	udelay(1);
+	if (!running_on_qemu)
+		udelay(1);
 	gsc_writel(b, dev->base_addr + PA_CPU_PORT_L_ACCESS);
 }
 
@@ -199,7 +202,7 @@
 
 	unregister_netdev (dev);
 	dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma,
-		       lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+		       lp->dma_addr, LIB82596_DMA_ATTR);
 	free_netdev (dev);
 	return 0;
 }

diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index 2f7ae11..f9742af 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c

@@ -1065,7 +1065,7 @@
 
 	dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma),
 			      &lp->dma_addr, GFP_KERNEL,
-			      DMA_ATTR_NON_CONSISTENT);
+			      LIB82596_DMA_ATTR);
 	if (!dma) {
 		printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__);
 		return -ENOMEM;
@@ -1087,7 +1087,7 @@
 	i = register_netdev(dev);
 	if (i) {
 		dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma),
-			       dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+			       dma, lp->dma_addr, LIB82596_DMA_ATTR);
 		return i;
 	}
 
@@ -1194,7 +1194,7 @@
 				dma_unmap_single(dev->dev.parent,
 						 tx_cmd->dma_addr,
 						 skb->len, DMA_TO_DEVICE);
-				dev_kfree_skb_irq(skb);
+				dev_consume_skb_irq(skb);
 
 				tx_cmd->cmd.command = 0; /* Mark free */
 				break;

diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index b2c04a7..6436a98 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sni_82596.c -- driver for intel 82596 ethernet controller, as
  *  		  used in older SNI RM machines
@@ -23,6 +24,8 @@
 
 static const char sni_82596_string[] = "snirm_82596";
 
+#define LIB82596_DMA_ATTR	0
+
 #define DMA_WBACK(priv, addr, len)     do { } while (0)
 #define DMA_INV(priv, addr, len)       do { } while (0)
 #define DMA_WBACK_INV(priv, addr, len) do { } while (0)
@@ -151,7 +154,7 @@
 
 	unregister_netdev(dev);
 	dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma,
-		       lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+		       lp->dma_addr, LIB82596_DMA_ATTR);
 	iounmap(lp->ca);
 	iounmap(lp->mpu_port);
 	free_netdev (dev);

diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig
index 37dceab..a95d941 100644
--- a/drivers/net/ethernet/ibm/Kconfig
+++ b/drivers/net/ethernet/ibm/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # IBM device configuration.
 #

diff --git a/drivers/net/ethernet/ibm/Makefile b/drivers/net/ethernet/ibm/Makefile
index 447865c..1d17d0c 100644
--- a/drivers/net/ethernet/ibm/Makefile
+++ b/drivers/net/ethernet/ibm/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for th IBM network device drivers.
 #

diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile
index cd473e2..9e1e5c7 100644
--- a/drivers/net/ethernet/ibm/ehea/Makefile
+++ b/drivers/net/ethernet/ibm/ehea/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the eHEA ethernet device driver for IBM eServer System p
 #

diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h
index 6be7b98..b140835 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea.h
+++ b/drivers/net/ethernet/ibm/ehea/ehea.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea.h
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __EHEA_H__

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
index 4f58d33..6cb8603 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_hw.h b/drivers/net/ethernet/ibm/ehea/ehea_hw.h
index 180d412..590933a 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_hw.h
+++ b/drivers/net/ethernet/ibm/ehea/ehea_hw.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_hw.h
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __EHEA_HW_H__

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 03f64f4..13e30eb 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_main.c
  *
@@ -9,21 +10,6 @@
  *	 Christoph Raisch <raisch@de.ibm.com>
  *	 Jan-Bernd Themann <themann@de.ibm.com>
  *	 Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -778,12 +764,11 @@
 {
 	struct ehea_swqe *swqe;
 	int swqe_index;
-	int i, k;
+	int i;
 
 	for (i = 0; i < port->num_def_qps; i++) {
 		struct ehea_port_res *pr = &port->port_res[i];
 		int ret;
-		k = 0;
 		swqe = ehea_get_swqe(pr->qp, &swqe_index);
 		memset(swqe, 0, SWQE_HEADER_SIZE);
 		atomic_dec(&pr->swqe_avail);
@@ -1464,7 +1449,7 @@
 
 	memset(pr, 0, sizeof(struct ehea_port_res));
 
-	pr->tx_bytes = rx_bytes;
+	pr->tx_bytes = tx_bytes;
 	pr->tx_packets = tx_packets;
 	pr->rx_bytes = rx_bytes;
 	pr->rx_packets = rx_packets;
@@ -1592,20 +1577,16 @@
 		ehea_destroy_eq(pr->eq);
 
 		for (i = 0; i < pr->rq1_skba.len; i++)
-			if (pr->rq1_skba.arr[i])
-				dev_kfree_skb(pr->rq1_skba.arr[i]);
+			dev_kfree_skb(pr->rq1_skba.arr[i]);
 
 		for (i = 0; i < pr->rq2_skba.len; i++)
-			if (pr->rq2_skba.arr[i])
-				dev_kfree_skb(pr->rq2_skba.arr[i]);
+			dev_kfree_skb(pr->rq2_skba.arr[i]);
 
 		for (i = 0; i < pr->rq3_skba.len; i++)
-			if (pr->rq3_skba.arr[i])
-				dev_kfree_skb(pr->rq3_skba.arr[i]);
+			dev_kfree_skb(pr->rq3_skba.arr[i]);
 
 		for (i = 0; i < pr->sq_skba.len; i++)
-			if (pr->sq_skba.arr[i])
-				dev_kfree_skb(pr->sq_skba.arr[i]);
+			dev_kfree_skb(pr->sq_skba.arr[i]);
 
 		vfree(pr->rq1_skba.arr);
 		vfree(pr->rq2_skba.arr);
@@ -2027,7 +2008,7 @@
 	dev_consume_skb_any(skb);
 }
 
-static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ehea_port *port = netdev_priv(dev);
 	struct ehea_swqe *swqe;
@@ -3161,6 +3142,7 @@
 
 	if (ehea_add_adapter_mr(adapter)) {
 		pr_err("creating MR failed\n");
+		of_node_put(eth_dn);
 		return -EIO;
 	}
 
@@ -3265,7 +3247,7 @@
 	switch (action) {
 	case MEM_CANCEL_OFFLINE:
 		pr_info("memory offlining canceled");
-		/* Fall through: re-add canceled memory block */
+		/* Fall through - re-add canceled memory block */
 
 	case MEM_ONLINE:
 		pr_info("memory is going online");

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c
index d3a130c..e63716e 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.c
  *
@@ -9,21 +10,6 @@
  *	 Christoph Raisch <raisch@de.ibm.com>
  *	 Jan-Bernd Themann <themann@de.ibm.com>
  *	 Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h
index 99b6c2a..e8b56c1 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h
+++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.h
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __EHEA_PHYP_H__

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
index a0820f7..6e70658 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -123,9 +109,8 @@
 			       int nr_of_cqe, u64 eq_handle, u32 cq_token)
 {
 	struct ehea_cq *cq;
-	struct h_epa epa;
-	u64 *cq_handle_ref, hret, rpage;
-	u32 act_nr_of_entries, act_pages, counter;
+	u64 hret, rpage;
+	u32 counter;
 	int ret;
 	void *vpage;
 
@@ -139,10 +124,6 @@
 
 	cq->adapter = adapter;
 
-	cq_handle_ref = &cq->fw_handle;
-	act_nr_of_entries = 0;
-	act_pages = 0;
-
 	hret = ehea_h_alloc_resource_cq(adapter->handle, &cq->attr,
 					&cq->fw_handle, &cq->epas);
 	if (hret != H_SUCCESS) {
@@ -190,7 +171,6 @@
 	}
 
 	hw_qeit_reset(&cq->hw_queue);
-	epa = cq->epas.kernel;
 	ehea_reset_cq_ep(cq);
 	ehea_reset_cq_n1(cq);
 

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h
index 8e4a70c..7c7cccd 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h
+++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.h
  *
@@ -9,21 +10,6 @@
  *       Christoph Raisch <raisch@de.ibm.com>
  *       Jan-Bernd Themann <themann@de.ibm.com>
  *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __EHEA_QMR_H__

diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig
index 90d4919..c8e5de5 100644
--- a/drivers/net/ethernet/ibm/emac/Kconfig
+++ b/drivers/net/ethernet/ibm/emac/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config IBM_EMAC
 	tristate "IBM EMAC Ethernet support"
 	depends on PPC_DCR
@@ -28,18 +29,6 @@
 	depends on IBM_EMAC
 	default "256"
 
-config IBM_EMAC_RX_SKB_HEADROOM
-	int "Additional RX skb headroom (bytes)"
-	depends on IBM_EMAC
-	default "0"
-	help
-	  Additional receive skb headroom. Note, that driver
-	  will always reserve at least 2 bytes to make IP header
-	  aligned, so usually there is no need to add any additional
-	  headroom.
-
-	  If unsure, set to 0.
-
 config IBM_EMAC_DEBUG
 	bool "Debugging"
 	depends on IBM_EMAC

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 129f4e9..9e43c9a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/ibm/emac/core.c
  *
@@ -16,12 +17,6 @@
  *	(c) 2003 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  *      Armin Kuster <akuster@mvista.com>
  * 	Johnnie Peters <jpeters@mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 
 #include <linux/module.h>
@@ -423,7 +418,7 @@
 {
 	const int regs = EMAC_XAHT_REGS(dev);
 	u32 *gaht_base = emac_gaht_base(dev);
-	u32 gaht_temp[regs];
+	u32 gaht_temp[EMAC_XAHT_MAX_REGS];
 	struct netdev_hw_addr *ha;
 	int i;
 
@@ -1071,7 +1066,9 @@
 
 	/* Second pass, allocate new skbs */
 	for (i = 0; i < NUM_RX_BUFF; ++i) {
-		struct sk_buff *skb = alloc_skb(rx_skb_size, GFP_ATOMIC);
+		struct sk_buff *skb;
+
+		skb = netdev_alloc_skb_ip_align(dev->ndev, rx_skb_size);
 		if (!skb) {
 			ret = -ENOMEM;
 			goto oom;
@@ -1080,10 +1077,10 @@
 		BUG_ON(!dev->rx_skb[i]);
 		dev_kfree_skb(dev->rx_skb[i]);
 
-		skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2);
 		dev->rx_desc[i].data_ptr =
-		    dma_map_single(&dev->ofdev->dev, skb->data - 2, rx_sync_size,
-				   DMA_FROM_DEVICE) + 2;
+		    dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN,
+				   rx_sync_size, DMA_FROM_DEVICE)
+				   + NET_IP_ALIGN;
 		dev->rx_skb[i] = skb;
 	}
  skip:
@@ -1174,20 +1171,18 @@
 	}
 }
 
-static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot,
-				    gfp_t flags)
+static int
+__emac_prepare_rx_skb(struct sk_buff *skb, struct emac_instance *dev, int slot)
 {
-	struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags);
 	if (unlikely(!skb))
 		return -ENOMEM;
 
 	dev->rx_skb[slot] = skb;
 	dev->rx_desc[slot].data_len = 0;
 
-	skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2);
 	dev->rx_desc[slot].data_ptr =
-	    dma_map_single(&dev->ofdev->dev, skb->data - 2, dev->rx_sync_size,
-			   DMA_FROM_DEVICE) + 2;
+	    dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN,
+			   dev->rx_sync_size, DMA_FROM_DEVICE) + NET_IP_ALIGN;
 	wmb();
 	dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY |
 	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);
@@ -1195,6 +1190,27 @@
 	return 0;
 }
 
+static int
+emac_alloc_rx_skb(struct emac_instance *dev, int slot)
+{
+	struct sk_buff *skb;
+
+	skb = __netdev_alloc_skb_ip_align(dev->ndev, dev->rx_skb_size,
+					  GFP_KERNEL);
+
+	return __emac_prepare_rx_skb(skb, dev, slot);
+}
+
+static int
+emac_alloc_rx_skb_napi(struct emac_instance *dev, int slot)
+{
+	struct sk_buff *skb;
+
+	skb = napi_alloc_skb(&dev->mal->napi, dev->rx_skb_size);
+
+	return __emac_prepare_rx_skb(skb, dev, slot);
+}
+
 static void emac_print_link_status(struct emac_instance *dev)
 {
 	if (netif_carrier_ok(dev->ndev))
@@ -1225,7 +1241,7 @@
 
 	/* Allocate RX ring */
 	for (i = 0; i < NUM_RX_BUFF; ++i)
-		if (emac_alloc_rx_skb(dev, i, GFP_KERNEL)) {
+		if (emac_alloc_rx_skb(dev, i)) {
 			printk(KERN_ERR "%s: failed to allocate RX ring\n",
 			       ndev->name);
 			goto oom;
@@ -1409,7 +1425,7 @@
 	return 0;
 }
 
-static inline int emac_xmit_finish(struct emac_instance *dev, int len)
+static inline netdev_tx_t emac_xmit_finish(struct emac_instance *dev, int len)
 {
 	struct emac_regs __iomem *p = dev->emacp;
 	struct net_device *ndev = dev->ndev;
@@ -1436,7 +1452,7 @@
 }
 
 /* Tx lock BH */
-static int emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
 	unsigned int len = skb->len;
@@ -1494,7 +1510,8 @@
 }
 
 /* Tx lock BH disabled (SG version for TAH equipped EMACs) */
-static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t
+emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
@@ -1532,7 +1549,7 @@
 				       ctrl);
 	/* skb fragments */
 	for (i = 0; i < nr_frags; ++i) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		len = skb_frag_size(frag);
 
 		if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF))
@@ -1659,8 +1676,9 @@
 	DBG2(dev, "recycle %d %d" NL, slot, len);
 
 	if (len)
-		dma_map_single(&dev->ofdev->dev, skb->data - 2,
-			       EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE);
+		dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN,
+			       SKB_DATA_ALIGN(len + NET_IP_ALIGN),
+			       DMA_FROM_DEVICE);
 
 	dev->rx_desc[slot].data_len = 0;
 	wmb();
@@ -1712,7 +1730,7 @@
 		int len = dev->rx_desc[slot].data_len;
 		int tot_len = dev->rx_sg_skb->len + len;
 
-		if (unlikely(tot_len + 2 > dev->rx_skb_size)) {
+		if (unlikely(tot_len + NET_IP_ALIGN > dev->rx_skb_size)) {
 			++dev->estats.rx_dropped_mtu;
 			dev_kfree_skb(dev->rx_sg_skb);
 			dev->rx_sg_skb = NULL;
@@ -1768,16 +1786,18 @@
 		}
 
 		if (len && len < EMAC_RX_COPY_THRESH) {
-			struct sk_buff *copy_skb =
-			    alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC);
+			struct sk_buff *copy_skb;
+
+			copy_skb = napi_alloc_skb(&dev->mal->napi, len);
 			if (unlikely(!copy_skb))
 				goto oom;
 
-			skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
-			memcpy(copy_skb->data - 2, skb->data - 2, len + 2);
+			memcpy(copy_skb->data - NET_IP_ALIGN,
+			       skb->data - NET_IP_ALIGN,
+			       len + NET_IP_ALIGN);
 			emac_recycle_rx_skb(dev, slot, len);
 			skb = copy_skb;
-		} else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))
+		} else if (unlikely(emac_alloc_rx_skb_napi(dev, slot)))
 			goto oom;
 
 		skb_put(skb, len);
@@ -1798,7 +1818,7 @@
 	sg:
 		if (ctrl & MAL_RX_CTRL_FIRST) {
 			BUG_ON(dev->rx_sg_skb);
-			if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) {
+			if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) {
 				DBG(dev, "rx OOM %d" NL, slot);
 				++dev->estats.rx_dropped_oom;
 				emac_recycle_rx_skb(dev, slot, 0);
@@ -2454,7 +2474,8 @@
 	dev->phy.duplex = phy->duplex;
 	dev->phy.pause = phy->pause;
 	dev->phy.asym_pause = phy->asym_pause;
-	dev->phy.advertising = phy->advertising;
+	ethtool_convert_link_mode_to_legacy_u32(&dev->phy.advertising,
+						phy->advertising);
 }
 
 static int emac_mii_bus_read(struct mii_bus *bus, int addr, int regnum)
@@ -2489,7 +2510,8 @@
 	phy_dev->autoneg = phy->autoneg;
 	phy_dev->speed = phy->speed;
 	phy_dev->duplex = phy->duplex;
-	phy_dev->advertising = phy->advertising;
+	ethtool_convert_legacy_u32_to_link_mode(phy_dev->advertising,
+						phy->advertising);
 	return phy_start_aneg(phy_dev);
 }
 
@@ -2623,7 +2645,8 @@
 	dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask;
 	dev->phy.def->name = dev->phy_dev->drv->name;
 	dev->phy.def->ops = &emac_dt_mdio_phy_ops;
-	dev->phy.features = dev->phy_dev->supported;
+	ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features,
+						dev->phy_dev->supported);
 	dev->phy.address = dev->phy_dev->mdio.addr;
 	dev->phy.mode = dev->phy_dev->interface;
 	return 0;
@@ -2969,6 +2992,10 @@
 		dev->xaht_width_shift = EMAC4_XAHT_WIDTH_SHIFT;
 	}
 
+	/* This should never happen */
+	if (WARN_ON(EMAC_XAHT_REGS(dev) > EMAC_XAHT_MAX_REGS))
+		return -ENXIO;
+
 	DBG(dev, "features     : 0x%08x / 0x%08x\n", dev->features, EMAC_FTRS_POSSIBLE);
 	DBG(dev, "tx_fifo_size : %d (%d gige)\n", dev->tx_fifo_size, dev->tx_fifo_size_gige);
 	DBG(dev, "rx_fifo_size : %d (%d gige)\n", dev->rx_fifo_size, dev->rx_fifo_size_gige);

diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 369de2c..e9cda02 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/core.h
  *
@@ -15,12 +16,6 @@
  *      Armin Kuster <akuster@mvista.com>
  * 	Johnnie Peters <jpeters@mvista.com>
  *      Copyright 2000, 2001 MontaVista Softare Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #ifndef __IBM_NEWEMAC_CORE_H
 #define __IBM_NEWEMAC_CORE_H
@@ -68,22 +63,18 @@
 		return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD);
 }
 
-#define EMAC_DMA_ALIGN(x)		ALIGN((x), dma_get_cache_alignment())
-
-#define EMAC_RX_SKB_HEADROOM		\
-	EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM)
-
 /* Size of RX skb for the given MTU */
 static inline int emac_rx_skb_size(int mtu)
 {
 	int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu));
-	return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM;
+
+	return SKB_DATA_ALIGN(size + NET_IP_ALIGN) + NET_SKB_PAD;
 }
 
 /* RX DMA sync size */
 static inline int emac_rx_sync_size(int mtu)
 {
-	return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2);
+	return SKB_DATA_ALIGN(emac_rx_size(mtu) + NET_IP_ALIGN);
 }
 
 /* Driver statistcs is split into two parts to make it more cache friendly:
@@ -390,6 +381,9 @@
 #define	EMAC4SYNC_XAHT_SLOTS_SHIFT	8
 #define	EMAC4SYNC_XAHT_WIDTH_SHIFT	5
 
+/* The largest span between slots and widths above is 3 */
+#define	EMAC_XAHT_MAX_REGS		(1 << 3)
+
 #define	EMAC_XAHT_SLOTS(dev)         	(1 << (dev)->xaht_slots_shift)
 #define	EMAC_XAHT_WIDTH(dev)         	(1 << (dev)->xaht_width_shift)
 #define	EMAC_XAHT_REGS(dev)          	(1 << ((dev)->xaht_slots_shift - \

diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h
index 9d06d3b..c09a46a 100644
--- a/drivers/net/ethernet/ibm/emac/debug.h
+++ b/drivers/net/ethernet/ibm/emac/debug.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/debug.h
  *
@@ -10,12 +11,6 @@
  *
  * Copyright (c) 2004, 2005 Zultys Technologies
  * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #ifndef __IBM_NEWEMAC_DEBUG_H
 #define __IBM_NEWEMAC_DEBUG_H

diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index 0d2de6f..aa9f651 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/emac.h
  *
@@ -15,12 +16,6 @@
  *      Matt Porter <mporter@kernel.crashing.org>
  *      Armin Kuster <akuster@mvista.com>
  * 	Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #ifndef __IBM_NEWEMAC_H
 #define __IBM_NEWEMAC_H

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index fff09dc..075c073 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/ibm/emac/mal.c
  *
@@ -17,12 +18,6 @@
  *
  *      Armin Kuster <akuster@mvista.com>
  *      Copyright 2002 MontaVista Softare Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 
 #include <linux/delay.h>
@@ -636,8 +631,8 @@
 	bd_size = sizeof(struct mal_descriptor) *
 		(NUM_TX_BUFF * mal->num_tx_chans +
 		 NUM_RX_BUFF * mal->num_rx_chans);
-	mal->bd_virt = dma_zalloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma,
-					   GFP_KERNEL);
+	mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma,
+					  GFP_KERNEL);
 	if (mal->bd_virt == NULL) {
 		err = -ENOMEM;
 		goto fail_unmap;

diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h
index eeade2e..d212373 100644
--- a/drivers/net/ethernet/ibm/emac/mal.h
+++ b/drivers/net/ethernet/ibm/emac/mal.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/mal.h
  *
@@ -14,12 +15,6 @@
  * Based on original work by
  *      Armin Kuster <akuster@mvista.com>
  *      Copyright 2002 MontaVista Softare Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #ifndef __IBM_NEWEMAC_MAL_H
 #define __IBM_NEWEMAC_MAL_H
@@ -136,7 +131,7 @@
 
 static inline int mal_tx_chunks(int len)
 {
-	return (len + MAL_MAX_TX_SIZE - 1) / MAL_MAX_TX_SIZE;
+	return DIV_ROUND_UP(len, MAL_MAX_TX_SIZE);
 }
 
 #define MAL_CHAN_MASK(n)	(0x80000000 >> (n))

diff --git a/drivers/net/ethernet/ibm/emac/phy.c b/drivers/net/ethernet/ibm/emac/phy.c
index aa070c0..1e798cc 100644
--- a/drivers/net/ethernet/ibm/emac/phy.c
+++ b/drivers/net/ethernet/ibm/emac/phy.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * drivers/net/ethernet/ibm/emac/phy.c
  *

diff --git a/drivers/net/ethernet/ibm/emac/phy.h b/drivers/net/ethernet/ibm/emac/phy.h
index d7e41ec..2184e83 100644
--- a/drivers/net/ethernet/ibm/emac/phy.h
+++ b/drivers/net/ethernet/ibm/emac/phy.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/phy.h
  *
@@ -13,11 +14,6 @@
  *
  * Minor additions by Eugene Surovegin <ebs@ebshome.net>, 2004
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  * This file basically duplicates sungem_phy.{c,h} with different PHYs
  * supported. I'm looking into merging that in a single mii layer more
  * flexible than mii.c

diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index 00f5999..242ef97 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/ibm/emac/rgmii.c
  *
@@ -14,12 +15,6 @@
  * Based on original work by
  * 	Matt Porter <mporter@kernel.crashing.org>
  * 	Copyright 2004 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/ibm/emac/rgmii.h b/drivers/net/ethernet/ibm/emac/rgmii.h
index d4f1374..8e4e36e 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.h
+++ b/drivers/net/ethernet/ibm/emac/rgmii.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/rgmii.h
  *
@@ -16,11 +17,6 @@
  *
  * Copyright (c) 2004, 2005 Zultys Technologies.
  * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #ifndef __IBM_NEWEMAC_RGMII_H

diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index 9912456..008bbda 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/ibm/emac/tah.c
  *
@@ -12,11 +13,6 @@
  * Matt Porter <mporter@kernel.crashing.org>
  *
  * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 #include <linux/of_address.h>
 #include <asm/io.h>

diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h
index 4d5f336..86c2b6b 100644
--- a/drivers/net/ethernet/ibm/emac/tah.h
+++ b/drivers/net/ethernet/ibm/emac/tah.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/tah.h
  *
@@ -12,11 +13,6 @@
  * Matt Porter <mporter@kernel.crashing.org>
  *
  * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #ifndef __IBM_NEWEMAC_TAH_H

diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index fdcc734..b9e821d 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/ibm/emac/zmii.c
  *
@@ -14,12 +15,6 @@
  * Based on original work by
  *      Armin Kuster <akuster@mvista.com>
  * 	Copyright 2001 MontaVista Softare Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/ibm/emac/zmii.h b/drivers/net/ethernet/ibm/emac/zmii.h
index 0959c55..41d46e9 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.h
+++ b/drivers/net/ethernet/ibm/emac/zmii.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/ibm/emac/zmii.h
  *
@@ -14,12 +15,6 @@
  * Based on original work by
  *      Armin Kuster <akuster@mvista.com>
  * 	Copyright 2001 MontaVista Softare Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
  */
 #ifndef __IBM_NEWEMAC_ZMII_H
 #define __IBM_NEWEMAC_ZMII_H

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 525d8b8..c5be4eb 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * IBM Power Virtual Ethernet Device Driver
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Copyright (C) IBM Corporation, 2003, 2010
  *
  * Authors: Dave Larson <larson1@us.ibm.com>
@@ -24,7 +12,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/dma-mapping.h>
@@ -94,7 +81,7 @@
 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
 
-struct ibmveth_stat ibmveth_stats[] = {
+static struct ibmveth_stat ibmveth_stats[] = {
 	{ "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
 	{ "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
 	{ "replenish_add_buff_failure",
@@ -1172,11 +1159,15 @@
 
 map_failed_frags:
 	last = i+1;
-	for (i = 0; i < last; i++)
+	for (i = 1; i < last; i++)
 		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
 			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
 			       DMA_TO_DEVICE);
 
+	dma_unmap_single(&adapter->vdev->dev,
+			 descs[0].fields.address,
+			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
+			 DMA_TO_DEVICE);
 map_failed:
 	if (!firmware_has_feature(FW_FEATURE_CMO))
 		netdev_err(netdev, "tx: unable to map xmit buffer\n");
@@ -1310,7 +1301,6 @@
 	unsigned long lpar_rc;
 	u16 mss = 0;
 
-restart_poll:
 	while (frames_processed < budget) {
 		if (!ibmveth_rxq_pending_buffer(adapter))
 			break;
@@ -1398,7 +1388,6 @@
 		    napi_reschedule(napi)) {
 			lpar_rc = h_vio_signal(adapter->vdev->unit_address,
 					       VIO_IRQ_DISABLE);
-			goto restart_poll;
 		}
 	}
 
@@ -1616,7 +1605,7 @@
 	struct net_device *netdev;
 	struct ibmveth_adapter *adapter;
 	unsigned char *mac_addr_p;
-	unsigned int *mcastFilterSize_p;
+	__be32 *mcastFilterSize_p;
 	long ret;
 	unsigned long ret_attr;
 
@@ -1638,8 +1627,9 @@
 		return -EINVAL;
 	}
 
-	mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev,
-						VETH_MCAST_FILTER_SIZE, NULL);
+	mcastFilterSize_p = (__be32 *)vio_get_attribute(dev,
+							VETH_MCAST_FILTER_SIZE,
+							NULL);
 	if (!mcastFilterSize_p) {
 		dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE "
 			"attribute\n");
@@ -1656,7 +1646,7 @@
 
 	adapter->vdev = dev;
 	adapter->netdev = netdev;
-	adapter->mcastFilterSize = *mcastFilterSize_p;
+	adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
 	adapter->pool_config = 0;
 
 	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);

diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 01c587f..4e9bf34 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h

@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * IBM Power Virtual Ethernet Device Driver
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Copyright (C) IBM Corporation, 2003, 2010
  *
  * Authors: Dave Larson <larson1@us.ibm.com>

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5ab21a1..0686ded 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /**************************************************************************/
 /*                                                                        */
 /*  IBM System i and System p Virtual NIC Device Driver                   */
@@ -6,18 +7,6 @@
 /*  Thomas Falcon (tlfalcon@linux.vnet.ibm.com)                           */
 /*  John Allen (jallen@linux.vnet.ibm.com)                                */
 /*                                                                        */
-/*  This program is free software; you can redistribute it and/or modify  */
-/*  it under the terms of the GNU General Public License as published by  */
-/*  the Free Software Foundation; either version 2 of the License, or     */
-/*  (at your option) any later version.                                   */
-/*                                                                        */
-/*  This program is distributed in the hope that it will be useful,       */
-/*  but WITHOUT ANY WARRANTY; without even the implied warranty of        */
-/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
-/*  GNU General Public License for more details.                          */
-/*                                                                        */
-/*  You should have received a copy of the GNU General Public License     */
-/*  along with this program.                                              */
 /*                                                                        */
 /* This module contains the implementation of a virtual ethernet device   */
 /* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN    */
@@ -118,8 +107,9 @@
 static int ibmvnic_init(struct ibmvnic_adapter *);
 static int ibmvnic_reset_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
-static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
+static int __ibmvnic_set_mac(struct net_device *, u8 *);
 static int init_crq_queue(struct ibmvnic_adapter *adapter);
+static int send_query_phys_parms(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
 	char name[ETH_GSTRING_LEN];
@@ -438,9 +428,10 @@
 		if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
 			free_long_term_buff(adapter, &rx_pool->long_term_buff);
 			rx_pool->buff_size = be64_to_cpu(size_array[i]);
-			alloc_long_term_buff(adapter, &rx_pool->long_term_buff,
-					     rx_pool->size *
-					     rx_pool->buff_size);
+			rc = alloc_long_term_buff(adapter,
+						  &rx_pool->long_term_buff,
+						  rx_pool->size *
+						  rx_pool->buff_size);
 		} else {
 			rc = reset_long_term_buff(adapter,
 						  &rx_pool->long_term_buff);
@@ -706,9 +697,9 @@
 			return rc;
 		}
 
-		init_one_tx_pool(netdev, &adapter->tso_pool[i],
-				 IBMVNIC_TSO_BUFS,
-				 IBMVNIC_TSO_BUF_SZ);
+		rc = init_one_tx_pool(netdev, &adapter->tso_pool[i],
+				      IBMVNIC_TSO_BUFS,
+				      IBMVNIC_TSO_BUF_SZ);
 		if (rc) {
 			release_tx_pools(adapter);
 			return rc;
@@ -773,11 +764,8 @@
 		return;
 
 	for (i = 0; i < adapter->num_active_rx_napi; i++) {
-		if (&adapter->napi[i]) {
-			netdev_dbg(adapter->netdev,
-				   "Releasing napi[%d]\n", i);
-			netif_napi_del(&adapter->napi[i]);
-		}
+		netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i);
+		netif_napi_del(&adapter->napi[i]);
 	}
 
 	kfree(adapter->napi);
@@ -851,11 +839,7 @@
 		}
 	} while (retry);
 
-	/* handle pending MAC address changes after successful login */
-	if (adapter->mac_change_pending) {
-		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
-		adapter->mac_change_pending = false;
-	}
+	__ibmvnic_set_mac(netdev, adapter->mac_addr);
 
 	return 0;
 }
@@ -1117,7 +1101,6 @@
 	}
 
 	rc = __ibmvnic_open(netdev);
-	netif_carrier_on(netdev);
 
 	return rc;
 }
@@ -1224,7 +1207,7 @@
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
 	/* ensure that transmissions are stopped if called by do_reset */
-	if (adapter->resetting)
+	if (test_bit(0, &adapter->resetting))
 		netif_tx_disable(netdev);
 	else
 		netif_tx_stop_all_queues(netdev);
@@ -1419,7 +1402,7 @@
 	return 0;
 }
 
-static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int queue_num = skb_get_queue_mapping(skb);
@@ -1443,9 +1426,9 @@
 	u64 *handle_array;
 	int index = 0;
 	u8 proto = 0;
-	int ret = 0;
+	netdev_tx_t ret = NETDEV_TX_OK;
 
-	if (adapter->resetting) {
+	if (test_bit(0, &adapter->resetting)) {
 		if (!netif_subqueue_stopped(netdev, skb))
 			netif_stop_subqueue(netdev, queue_num);
 		dev_kfree_skb_any(skb);
@@ -1502,7 +1485,7 @@
 
 			memcpy(dst + cur,
 			       page_address(skb_frag_page(frag)) +
-			       frag->page_offset, skb_frag_size(frag));
+			       skb_frag_off(frag), skb_frag_size(frag));
 			cur += skb_frag_size(frag);
 		}
 	} else {
@@ -1585,6 +1568,8 @@
 		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
 					       (u64)tx_buff->indir_dma,
 					       (u64)num_entries);
+		dma_unmap_single(dev, tx_buff->indir_dma,
+				 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
 	} else {
 		tx_buff->num_entries = num_entries;
 		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
@@ -1688,28 +1673,40 @@
 	}
 }
 
-static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
+static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct sockaddr *addr = p;
 	union ibmvnic_crq crq;
 	int rc;
 
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
+	if (!is_valid_ether_addr(dev_addr)) {
+		rc = -EADDRNOTAVAIL;
+		goto err;
+	}
 
 	memset(&crq, 0, sizeof(crq));
 	crq.change_mac_addr.first = IBMVNIC_CRQ_CMD;
 	crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
-	ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data);
+	ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr);
 
 	init_completion(&adapter->fw_done);
 	rc = ibmvnic_send_crq(adapter, &crq);
-	if (rc)
-		return rc;
+	if (rc) {
+		rc = -EIO;
+		goto err;
+	}
+
 	wait_for_completion(&adapter->fw_done);
 	/* netdev->dev_addr is changed in handle_change_mac_rsp function */
-	return adapter->fw_done_rc ? -EIO : 0;
+	if (adapter->fw_done_rc) {
+		rc = -EIO;
+		goto err;
+	}
+
+	return 0;
+err:
+	ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+	return rc;
 }
 
 static int ibmvnic_set_mac(struct net_device *netdev, void *p)
@@ -1718,18 +1715,95 @@
 	struct sockaddr *addr = p;
 	int rc;
 
-	if (adapter->state == VNIC_PROBED) {
-		memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
-		adapter->mac_change_pending = true;
-		return 0;
-	}
-
-	rc = __ibmvnic_set_mac(netdev, addr);
+	rc = 0;
+	ether_addr_copy(adapter->mac_addr, addr->sa_data);
+	if (adapter->state != VNIC_PROBED)
+		rc = __ibmvnic_set_mac(netdev, addr->sa_data);
 
 	return rc;
 }
 
 /**
+ * do_change_param_reset returns zero if we are able to keep processing reset
+ * events, or non-zero if we hit a fatal error and must halt.
+ */
+static int do_change_param_reset(struct ibmvnic_adapter *adapter,
+				 struct ibmvnic_rwi *rwi,
+				 u32 reset_state)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i, rc;
+
+	netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
+		   rwi->reset_reason);
+
+	netif_carrier_off(netdev);
+	adapter->reset_reason = rwi->reset_reason;
+
+	ibmvnic_cleanup(netdev);
+
+	if (reset_state == VNIC_OPEN) {
+		rc = __ibmvnic_close(netdev);
+		if (rc)
+			return rc;
+	}
+
+	release_resources(adapter);
+	release_sub_crqs(adapter, 1);
+	release_crq_queue(adapter);
+
+	adapter->state = VNIC_PROBED;
+
+	rc = init_crq_queue(adapter);
+
+	if (rc) {
+		netdev_err(adapter->netdev,
+			   "Couldn't initialize crq. rc=%d\n", rc);
+		return rc;
+	}
+
+	rc = ibmvnic_reset_init(adapter);
+	if (rc)
+		return IBMVNIC_INIT_FAILED;
+
+	/* If the adapter was in PROBE state prior to the reset,
+	 * exit here.
+	 */
+	if (reset_state == VNIC_PROBED)
+		return 0;
+
+	rc = ibmvnic_login(netdev);
+	if (rc) {
+		adapter->state = reset_state;
+		return rc;
+	}
+
+	rc = init_resources(adapter);
+	if (rc)
+		return rc;
+
+	ibmvnic_disable_irqs(adapter);
+
+	adapter->state = VNIC_CLOSED;
+
+	if (reset_state == VNIC_CLOSED)
+		return 0;
+
+	rc = __ibmvnic_open(netdev);
+	if (rc)
+		return IBMVNIC_OPEN_FAILED;
+
+	/* refresh device's multicast list */
+	ibmvnic_set_multi(netdev);
+
+	/* kick napi */
+	for (i = 0; i < adapter->req_rx_queues; i++)
+		napi_schedule(&adapter->napi[i]);
+
+	return 0;
+}
+
+/**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
  */
@@ -1744,6 +1818,8 @@
 	netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
 		   rwi->reset_reason);
 
+	rtnl_lock();
+
 	netif_carrier_off(netdev);
 	adapter->reset_reason = rwi->reset_reason;
 
@@ -1754,18 +1830,28 @@
 
 	ibmvnic_cleanup(netdev);
 
-	if (adapter->reset_reason != VNIC_RESET_MOBILITY &&
+	if (reset_state == VNIC_OPEN &&
+	    adapter->reset_reason != VNIC_RESET_MOBILITY &&
 	    adapter->reset_reason != VNIC_RESET_FAILOVER) {
-		rc = __ibmvnic_close(netdev);
-		if (rc)
-			return rc;
-	}
+		adapter->state = VNIC_CLOSING;
 
-	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
-	    adapter->wait_for_reset) {
-		release_resources(adapter);
-		release_sub_crqs(adapter, 1);
-		release_crq_queue(adapter);
+		/* Release the RTNL lock before link state change and
+		 * re-acquire after the link state change to allow
+		 * linkwatch_event to grab the RTNL lock and run during
+		 * a reset.
+		 */
+		rtnl_unlock();
+		rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+		rtnl_lock();
+		if (rc)
+			goto out;
+
+		if (adapter->state != VNIC_CLOSING) {
+			rc = -1;
+			goto out;
+		}
+
+		adapter->state = VNIC_CLOSED;
 	}
 
 	if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
@@ -1774,9 +1860,7 @@
 		 */
 		adapter->state = VNIC_PROBED;
 
-		if (adapter->wait_for_reset) {
-			rc = init_crq_queue(adapter);
-		} else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+		if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
 			rc = ibmvnic_reenable_crq_queue(adapter);
 			release_sub_crqs(adapter, 1);
 		} else {
@@ -1788,36 +1872,35 @@
 		if (rc) {
 			netdev_err(adapter->netdev,
 				   "Couldn't initialize crq. rc=%d\n", rc);
-			return rc;
+			goto out;
 		}
 
 		rc = ibmvnic_reset_init(adapter);
-		if (rc)
-			return IBMVNIC_INIT_FAILED;
+		if (rc) {
+			rc = IBMVNIC_INIT_FAILED;
+			goto out;
+		}
 
 		/* If the adapter was in PROBE state prior to the reset,
 		 * exit here.
 		 */
-		if (reset_state == VNIC_PROBED)
-			return 0;
+		if (reset_state == VNIC_PROBED) {
+			rc = 0;
+			goto out;
+		}
 
 		rc = ibmvnic_login(netdev);
 		if (rc) {
 			adapter->state = reset_state;
-			return rc;
+			goto out;
 		}
 
-		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
-		    adapter->wait_for_reset) {
-			rc = init_resources(adapter);
-			if (rc)
-				return rc;
-		} else if (adapter->req_rx_queues != old_num_rx_queues ||
-			   adapter->req_tx_queues != old_num_tx_queues ||
-			   adapter->req_rx_add_entries_per_subcrq !=
-							old_num_rx_slots ||
-			   adapter->req_tx_entries_per_subcrq !=
-							old_num_tx_slots) {
+		if (adapter->req_rx_queues != old_num_rx_queues ||
+		    adapter->req_tx_queues != old_num_tx_queues ||
+		    adapter->req_rx_add_entries_per_subcrq !=
+		    old_num_rx_slots ||
+		    adapter->req_tx_entries_per_subcrq !=
+		    old_num_tx_slots) {
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
 			release_napi(adapter);
@@ -1825,45 +1908,48 @@
 
 			rc = init_resources(adapter);
 			if (rc)
-				return rc;
+				goto out;
 
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
-				return rc;
+				goto out;
 
 			rc = reset_rx_pools(adapter);
 			if (rc)
-				return rc;
+				goto out;
 		}
 		ibmvnic_disable_irqs(adapter);
 	}
 	adapter->state = VNIC_CLOSED;
 
-	if (reset_state == VNIC_CLOSED)
-		return 0;
+	if (reset_state == VNIC_CLOSED) {
+		rc = 0;
+		goto out;
+	}
 
 	rc = __ibmvnic_open(netdev);
 	if (rc) {
-		if (list_empty(&adapter->rwi_list))
-			adapter->state = VNIC_CLOSED;
-		else
-			adapter->state = reset_state;
-
-		return 0;
+		rc = IBMVNIC_OPEN_FAILED;
+		goto out;
 	}
 
+	/* refresh device's multicast list */
+	ibmvnic_set_multi(netdev);
+
 	/* kick napi */
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_schedule(&adapter->napi[i]);
 
-	if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
-	    adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
-		netdev_notify_peers(netdev);
+	if (adapter->reset_reason != VNIC_RESET_FAILOVER)
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
 
-	netif_carrier_on(netdev);
+	rc = 0;
 
-	return 0;
+out:
+	rtnl_unlock();
+
+	return rc;
 }
 
 static int do_hard_reset(struct ibmvnic_adapter *adapter,
@@ -1888,6 +1974,7 @@
 	 */
 	adapter->state = VNIC_PROBED;
 
+	reinit_completion(&adapter->init_done);
 	rc = init_crq_queue(adapter);
 	if (rc) {
 		netdev_err(adapter->netdev,
@@ -1922,16 +2009,8 @@
 		return 0;
 
 	rc = __ibmvnic_open(netdev);
-	if (rc) {
-		if (list_empty(&adapter->rwi_list))
-			adapter->state = VNIC_CLOSED;
-		else
-			adapter->state = reset_state;
-
-		return 0;
-	}
-
-	netif_carrier_on(netdev);
+	if (rc)
+		return IBMVNIC_OPEN_FAILED;
 
 	return 0;
 }
@@ -1939,8 +2018,9 @@
 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_rwi *rwi;
+	unsigned long flags;
 
-	mutex_lock(&adapter->rwi_lock);
+	spin_lock_irqsave(&adapter->rwi_lock, flags);
 
 	if (!list_empty(&adapter->rwi_list)) {
 		rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi,
@@ -1950,7 +2030,7 @@
 		rwi = NULL;
 	}
 
-	mutex_unlock(&adapter->rwi_lock);
+	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 	return rwi;
 }
 
@@ -1969,42 +2049,65 @@
 {
 	struct ibmvnic_rwi *rwi;
 	struct ibmvnic_adapter *adapter;
-	struct net_device *netdev;
-	bool we_lock_rtnl = false;
 	u32 reset_state;
 	int rc = 0;
 
 	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
-	netdev = adapter->netdev;
 
-	/* netif_set_real_num_xx_queues needs to take rtnl lock here
-	 * unless wait_for_reset is set, in which case the rtnl lock
-	 * has already been taken before initializing the reset
-	 */
-	if (!adapter->wait_for_reset) {
-		rtnl_lock();
-		we_lock_rtnl = true;
+	if (test_and_set_bit_lock(0, &adapter->resetting)) {
+		schedule_delayed_work(&adapter->ibmvnic_delayed_reset,
+				      IBMVNIC_RESET_DELAY);
+		return;
 	}
+
 	reset_state = adapter->state;
 
 	rwi = get_next_rwi(adapter);
 	while (rwi) {
-		if (adapter->force_reset_recovery) {
-			adapter->force_reset_recovery = false;
-			rc = do_hard_reset(adapter, rwi, reset_state);
+		if (adapter->state == VNIC_REMOVING ||
+		    adapter->state == VNIC_REMOVED) {
+			kfree(rwi);
+			rc = EBUSY;
+			break;
+		}
+
+		if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+			/* CHANGE_PARAM requestor holds rtnl_lock */
+			rc = do_change_param_reset(adapter, rwi, reset_state);
+		} else if (adapter->force_reset_recovery) {
+			/* Transport event occurred during previous reset */
+			if (adapter->wait_for_reset) {
+				/* Previous was CHANGE_PARAM; caller locked */
+				adapter->force_reset_recovery = false;
+				rc = do_hard_reset(adapter, rwi, reset_state);
+			} else {
+				rtnl_lock();
+				adapter->force_reset_recovery = false;
+				rc = do_hard_reset(adapter, rwi, reset_state);
+				rtnl_unlock();
+			}
 		} else {
 			rc = do_reset(adapter, rwi, reset_state);
 		}
 		kfree(rwi);
-		if (rc && rc != IBMVNIC_INIT_FAILED &&
+		if (rc == IBMVNIC_OPEN_FAILED) {
+			if (list_empty(&adapter->rwi_list))
+				adapter->state = VNIC_CLOSED;
+			else
+				adapter->state = reset_state;
+			rc = 0;
+		} else if (rc && rc != IBMVNIC_INIT_FAILED &&
 		    !adapter->force_reset_recovery)
 			break;
 
 		rwi = get_next_rwi(adapter);
+
+		if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+			    rwi->reset_reason == VNIC_RESET_MOBILITY))
+			adapter->force_reset_recovery = true;
 	}
 
 	if (adapter->wait_for_reset) {
-		adapter->wait_for_reset = false;
 		adapter->reset_done_rc = rc;
 		complete(&adapter->reset_done);
 	}
@@ -2014,9 +2117,16 @@
 		free_all_rwi(adapter);
 	}
 
-	adapter->resetting = false;
-	if (we_lock_rtnl)
-		rtnl_unlock();
+	clear_bit_unlock(0, &adapter->resetting);
+}
+
+static void __ibmvnic_delayed_reset(struct work_struct *work)
+{
+	struct ibmvnic_adapter *adapter;
+
+	adapter = container_of(work, struct ibmvnic_adapter,
+			       ibmvnic_delayed_reset.work);
+	__ibmvnic_reset(&adapter->ibmvnic_reset);
 }
 
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -2025,6 +2135,7 @@
 	struct list_head *entry, *tmp_entry;
 	struct ibmvnic_rwi *rwi, *tmp;
 	struct net_device *netdev = adapter->netdev;
+	unsigned long flags;
 	int ret;
 
 	if (adapter->state == VNIC_REMOVING ||
@@ -2041,21 +2152,21 @@
 		goto err;
 	}
 
-	mutex_lock(&adapter->rwi_lock);
+	spin_lock_irqsave(&adapter->rwi_lock, flags);
 
 	list_for_each(entry, &adapter->rwi_list) {
 		tmp = list_entry(entry, struct ibmvnic_rwi, list);
 		if (tmp->reset_reason == reason) {
 			netdev_dbg(netdev, "Skipping matching reset\n");
-			mutex_unlock(&adapter->rwi_lock);
+			spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 			ret = EBUSY;
 			goto err;
 		}
 	}
 
-	rwi = kzalloc(sizeof(*rwi), GFP_KERNEL);
+	rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
 	if (!rwi) {
-		mutex_unlock(&adapter->rwi_lock);
+		spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 		ibmvnic_close(netdev);
 		ret = ENOMEM;
 		goto err;
@@ -2069,15 +2180,12 @@
 	}
 	rwi->reset_reason = reason;
 	list_add_tail(&rwi->list, &adapter->rwi_list);
-	mutex_unlock(&adapter->rwi_lock);
-	adapter->resetting = true;
+	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
 	schedule_work(&adapter->ibmvnic_reset);
 
 	return 0;
 err:
-	if (adapter->wait_for_reset)
-		adapter->wait_for_reset = false;
 	return -ret;
 }
 
@@ -2117,7 +2225,7 @@
 		u16 offset;
 		u8 flags = 0;
 
-		if (unlikely(adapter->resetting &&
+		if (unlikely(test_bit(0, &adapter->resetting) &&
 			     adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
 			enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
 			napi_complete_done(napi, frames_processed);
@@ -2279,23 +2387,20 @@
 static int ibmvnic_get_link_ksettings(struct net_device *netdev,
 				      struct ethtool_link_ksettings *cmd)
 {
-	u32 supported, advertising;
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc;
 
-	supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
-			  SUPPORTED_FIBRE);
-	advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
-			    ADVERTISED_FIBRE);
-	cmd->base.speed = SPEED_1000;
-	cmd->base.duplex = DUPLEX_FULL;
+	rc = send_query_phys_parms(adapter);
+	if (rc) {
+		adapter->speed = SPEED_UNKNOWN;
+		adapter->duplex = DUPLEX_UNKNOWN;
+	}
+	cmd->base.speed = adapter->speed;
+	cmd->base.duplex = adapter->duplex;
 	cmd->base.port = PORT_FIBRE;
 	cmd->base.phy_address = 0;
 	cmd->base.autoneg = AUTONEG_ENABLE;
 
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
-
 	return 0;
 }
 
@@ -2339,8 +2444,13 @@
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
-	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
-	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
+	if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
+		ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+		ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
+	} else {
+		ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
+		ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
+	}
 	ring->rx_mini_max_pending = 0;
 	ring->rx_jumbo_max_pending = 0;
 	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
@@ -2353,21 +2463,23 @@
 				 struct ethtool_ringparam *ring)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int ret;
 
-	if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq  ||
-	    ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
-		netdev_err(netdev, "Invalid request.\n");
-		netdev_err(netdev, "Max tx buffers = %llu\n",
-			   adapter->max_rx_add_entries_per_subcrq);
-		netdev_err(netdev, "Max rx buffers = %llu\n",
-			   adapter->max_tx_entries_per_subcrq);
-		return -EINVAL;
-	}
-
+	ret = 0;
 	adapter->desired.rx_entries = ring->rx_pending;
 	adapter->desired.tx_entries = ring->tx_pending;
 
-	return wait_for_reset(adapter);
+	ret = wait_for_reset(adapter);
+
+	if (!ret &&
+	    (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending ||
+	     adapter->req_tx_entries_per_subcrq != ring->tx_pending))
+		netdev_info(netdev,
+			    "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
+			    ring->rx_pending, ring->tx_pending,
+			    adapter->req_rx_add_entries_per_subcrq,
+			    adapter->req_tx_entries_per_subcrq);
+	return ret;
 }
 
 static void ibmvnic_get_channels(struct net_device *netdev,
@@ -2375,8 +2487,14 @@
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
-	channels->max_rx = adapter->max_rx_queues;
-	channels->max_tx = adapter->max_tx_queues;
+	if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
+		channels->max_rx = adapter->max_rx_queues;
+		channels->max_tx = adapter->max_tx_queues;
+	} else {
+		channels->max_rx = IBMVNIC_MAX_QUEUES;
+		channels->max_tx = IBMVNIC_MAX_QUEUES;
+	}
+
 	channels->max_other = 0;
 	channels->max_combined = 0;
 	channels->rx_count = adapter->req_rx_queues;
@@ -2389,11 +2507,23 @@
 				struct ethtool_channels *channels)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int ret;
 
+	ret = 0;
 	adapter->desired.rx_queues = channels->rx_count;
 	adapter->desired.tx_queues = channels->tx_count;
 
-	return wait_for_reset(adapter);
+	ret = wait_for_reset(adapter);
+
+	if (!ret &&
+	    (adapter->req_rx_queues != channels->rx_count ||
+	     adapter->req_tx_queues != channels->tx_count))
+		netdev_info(netdev,
+			    "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
+			    channels->rx_count, channels->tx_count,
+			    adapter->req_rx_queues, adapter->req_tx_queues);
+	return ret;
+
 }
 
 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -2401,32 +2531,43 @@
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 	int i;
 
-	if (stringset != ETH_SS_STATS)
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(ibmvnic_stats);
+				i++, data += ETH_GSTRING_LEN)
+			memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+
+		for (i = 0; i < adapter->req_tx_queues; i++) {
+			snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+			data += ETH_GSTRING_LEN;
+
+			snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+			data += ETH_GSTRING_LEN;
+
+			snprintf(data, ETH_GSTRING_LEN,
+				 "tx%d_dropped_packets", i);
+			data += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < adapter->req_rx_queues; i++) {
+			snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+			data += ETH_GSTRING_LEN;
+
+			snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+			data += ETH_GSTRING_LEN;
+
+			snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+			data += ETH_GSTRING_LEN;
+		}
+		break;
+
+	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++)
+			strcpy(data + i * ETH_GSTRING_LEN,
+			       ibmvnic_priv_flags[i]);
+		break;
+	default:
 		return;
-
-	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
-		memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
-
-	for (i = 0; i < adapter->req_tx_queues; i++) {
-		snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
-		data += ETH_GSTRING_LEN;
-
-		snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
-		data += ETH_GSTRING_LEN;
-
-		snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
-		data += ETH_GSTRING_LEN;
-	}
-
-	for (i = 0; i < adapter->req_rx_queues; i++) {
-		snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
-		data += ETH_GSTRING_LEN;
-
-		snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
-		data += ETH_GSTRING_LEN;
-
-		snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
-		data += ETH_GSTRING_LEN;
 	}
 }
 
@@ -2439,6 +2580,8 @@
 		return ARRAY_SIZE(ibmvnic_stats) +
 		       adapter->req_tx_queues * NUM_TX_STATS +
 		       adapter->req_rx_queues * NUM_RX_STATS;
+	case ETH_SS_PRIV_FLAGS:
+		return ARRAY_SIZE(ibmvnic_priv_flags);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2489,6 +2632,25 @@
 	}
 }
 
+static u32 ibmvnic_get_priv_flags(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->priv_flags;
+}
+
+static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES);
+
+	if (which_maxes)
+		adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES;
+	else
+		adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES;
+
+	return 0;
+}
 static const struct ethtool_ops ibmvnic_ethtool_ops = {
 	.get_drvinfo		= ibmvnic_get_drvinfo,
 	.get_msglevel		= ibmvnic_get_msglevel,
@@ -2502,6 +2664,8 @@
 	.get_sset_count         = ibmvnic_get_sset_count,
 	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
 	.get_link_ksettings	= ibmvnic_get_link_ksettings,
+	.get_priv_flags		= ibmvnic_get_priv_flags,
+	.set_priv_flags		= ibmvnic_set_priv_flags,
 };
 
 /* Routines for managing CRQs/sCRQs  */
@@ -2712,12 +2876,15 @@
 		return 1;
 	}
 
-	if (adapter->resetting &&
+	if (test_bit(0, &adapter->resetting) &&
 	    adapter->reset_reason == VNIC_RESET_MOBILITY) {
 		u64 val = (0xff000000) | scrq->hw_irq;
 
 		rc = plpar_hcall_norets(H_EOI, val);
-		if (rc)
+		/* H_EOI would fail with rc = H_FUNCTION when running
+		 * in XIVE mode which is expected, but not an error.
+		 */
+		if (rc && (rc != H_FUNCTION))
 			dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
 				val, rc);
 	}
@@ -2739,7 +2906,6 @@
 	union sub_crq *next;
 	int index;
 	int i, j;
-	u8 *first;
 
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
@@ -2769,14 +2935,6 @@
 
 				txbuff->data_dma[j] = 0;
 			}
-			/* if sub_crq was sent indirectly */
-			first = &txbuff->indir_arr[0].generic.first;
-			if (*first == IBMVNIC_CRQ_CMD) {
-				dma_unmap_single(dev, txbuff->indir_dma,
-						 sizeof(txbuff->indir_arr),
-						 DMA_TO_DEVICE);
-				*first = 0;
-			}
 
 			if (txbuff->last_frag) {
 				dev_kfree_skb_any(txbuff->skb);
@@ -2864,8 +3022,10 @@
 			goto req_tx_irq_failed;
 		}
 
+		snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d",
+			 adapter->vdev->unit_address, i);
 		rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
-				 0, "ibmvnic_tx", scrq);
+				 0, scrq->name, scrq);
 
 		if (rc) {
 			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
@@ -2885,8 +3045,10 @@
 			dev_err(dev, "Error mapping irq\n");
 			goto req_rx_irq_failed;
 		}
+		snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d",
+			 adapter->vdev->unit_address, i);
 		rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
-				 0, "ibmvnic_rx", scrq);
+				 0, scrq->name, scrq);
 		if (rc) {
 			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
 				scrq->irq, rc);
@@ -3267,7 +3429,7 @@
 	if (rc) {
 		if (rc == H_CLOSED) {
 			dev_warn(dev, "CRQ Queue closed\n");
-			if (adapter->resetting)
+			if (test_bit(0, &adapter->resetting))
 				ibmvnic_reset(adapter, VNIC_RESET_FATAL);
 		}
 
@@ -3703,6 +3865,7 @@
 {
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
+	netdev_features_t old_hw_features = 0;
 	union ibmvnic_crq crq;
 	int i;
 
@@ -3778,24 +3941,41 @@
 	adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
 	adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
 
-	adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
+	if (adapter->state != VNIC_PROBING) {
+		old_hw_features = adapter->netdev->hw_features;
+		adapter->netdev->hw_features = 0;
+	}
+
+	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
 
 	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
-		adapter->netdev->features |= NETIF_F_IP_CSUM;
+		adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
 
 	if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
-		adapter->netdev->features |= NETIF_F_IPV6_CSUM;
+		adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
 
 	if ((adapter->netdev->features &
 	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
-		adapter->netdev->features |= NETIF_F_RXCSUM;
+		adapter->netdev->hw_features |= NETIF_F_RXCSUM;
 
 	if (buf->large_tx_ipv4)
-		adapter->netdev->features |= NETIF_F_TSO;
+		adapter->netdev->hw_features |= NETIF_F_TSO;
 	if (buf->large_tx_ipv6)
-		adapter->netdev->features |= NETIF_F_TSO6;
+		adapter->netdev->hw_features |= NETIF_F_TSO6;
 
-	adapter->netdev->hw_features |= adapter->netdev->features;
+	if (adapter->state == VNIC_PROBING) {
+		adapter->netdev->features |= adapter->netdev->hw_features;
+	} else if (old_hw_features != adapter->netdev->hw_features) {
+		netdev_features_t tmp = 0;
+
+		/* disable features no longer supported */
+		adapter->netdev->features &= adapter->netdev->hw_features;
+		/* turn on features now supported if previously enabled */
+		tmp = (old_hw_features ^ adapter->netdev->hw_features) &
+			adapter->netdev->hw_features;
+		adapter->netdev->features |=
+				tmp & adapter->netdev->wanted_features;
+	}
 
 	memset(&crq, 0, sizeof(crq));
 	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
@@ -3860,8 +4040,8 @@
 		dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc);
 		goto out;
 	}
-	memcpy(netdev->dev_addr, &crq->change_mac_addr_rsp.mac_addr[0],
-	       ETH_ALEN);
+	ether_addr_copy(netdev->dev_addr,
+			&crq->change_mac_addr_rsp.mac_addr[0]);
 out:
 	complete(&adapter->fw_done);
 	return rc;
@@ -4220,6 +4400,74 @@
 	}
 }
 
+static int send_query_phys_parms(struct ibmvnic_adapter *adapter)
+{
+	union ibmvnic_crq crq;
+	int rc;
+
+	memset(&crq, 0, sizeof(crq));
+	crq.query_phys_parms.first = IBMVNIC_CRQ_CMD;
+	crq.query_phys_parms.cmd = QUERY_PHYS_PARMS;
+	init_completion(&adapter->fw_done);
+	rc = ibmvnic_send_crq(adapter, &crq);
+	if (rc)
+		return rc;
+	wait_for_completion(&adapter->fw_done);
+	return adapter->fw_done_rc ? -EIO : 0;
+}
+
+static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
+				       struct ibmvnic_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int rc;
+	__be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed);
+
+	rc = crq->query_phys_parms_rsp.rc.code;
+	if (rc) {
+		netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc);
+		return rc;
+	}
+	switch (rspeed) {
+	case IBMVNIC_10MBPS:
+		adapter->speed = SPEED_10;
+		break;
+	case IBMVNIC_100MBPS:
+		adapter->speed = SPEED_100;
+		break;
+	case IBMVNIC_1GBPS:
+		adapter->speed = SPEED_1000;
+		break;
+	case IBMVNIC_10GBP:
+		adapter->speed = SPEED_10000;
+		break;
+	case IBMVNIC_25GBPS:
+		adapter->speed = SPEED_25000;
+		break;
+	case IBMVNIC_40GBPS:
+		adapter->speed = SPEED_40000;
+		break;
+	case IBMVNIC_50GBPS:
+		adapter->speed = SPEED_50000;
+		break;
+	case IBMVNIC_100GBPS:
+		adapter->speed = SPEED_100000;
+		break;
+	default:
+		if (netif_carrier_ok(netdev))
+			netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
+		adapter->speed = SPEED_UNKNOWN;
+	}
+	if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX)
+		adapter->duplex = DUPLEX_FULL;
+	else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX)
+		adapter->duplex = DUPLEX_HALF;
+	else
+		adapter->duplex = DUPLEX_UNKNOWN;
+
+	return rc;
+}
+
 static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			       struct ibmvnic_adapter *adapter)
 {
@@ -4257,7 +4505,7 @@
 	case IBMVNIC_CRQ_XPORT_EVENT:
 		netif_carrier_off(netdev);
 		adapter->crq.active = false;
-		if (adapter->resetting)
+		if (test_bit(0, &adapter->resetting))
 			adapter->force_reset_recovery = true;
 		if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
 			dev_info(dev, "Migrated, re-enabling adapter\n");
@@ -4331,6 +4579,10 @@
 		    crq->link_state_indication.phys_link_state;
 		adapter->logical_link_state =
 		    crq->link_state_indication.logical_link_state;
+		if (adapter->phys_link_state && adapter->logical_link_state)
+			netif_carrier_on(netdev);
+		else
+			netif_carrier_off(netdev);
 		break;
 	case CHANGE_MAC_ADDR_RSP:
 		netdev_dbg(netdev, "Got MAC address change Response\n");
@@ -4368,6 +4620,10 @@
 	case GET_VPD_RSP:
 		handle_vpd_rsp(crq, adapter);
 		break;
+	case QUERY_PHYS_PARMS_RSP:
+		adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter);
+		complete(&adapter->fw_done);
+		break;
 	default:
 		netdev_err(netdev, "Got an invalid cmd type 0x%02x\n",
 			   gen_crq->cmd);
@@ -4523,8 +4779,9 @@
 		     (unsigned long)adapter);
 
 	netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq);
-	rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, IBMVNIC_NAME,
-			 adapter);
+	snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x",
+		 adapter->vdev->unit_address);
+	rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter);
 	if (rc) {
 		dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n",
 			vdev->irq, rc);
@@ -4567,7 +4824,7 @@
 	old_num_rx_queues = adapter->req_rx_queues;
 	old_num_tx_queues = adapter->req_tx_queues;
 
-	init_completion(&adapter->init_done);
+	reinit_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4586,7 +4843,7 @@
 		return -1;
 	}
 
-	if (adapter->resetting && !adapter->wait_for_reset &&
+	if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
 	    adapter->reset_reason != VNIC_RESET_MOBILITY) {
 		if (adapter->req_rx_queues != old_num_rx_queues ||
 		    adapter->req_tx_queues != old_num_tx_queues) {
@@ -4622,7 +4879,6 @@
 
 	adapter->from_passive_init = false;
 
-	init_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4699,11 +4955,12 @@
 	spin_lock_init(&adapter->stats_lock);
 
 	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
+	INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
+			  __ibmvnic_delayed_reset);
 	INIT_LIST_HEAD(&adapter->rwi_list);
-	mutex_init(&adapter->rwi_lock);
-	adapter->resetting = false;
-
-	adapter->mac_change_pending = false;
+	spin_lock_init(&adapter->rwi_lock);
+	init_completion(&adapter->init_done);
+	clear_bit(0, &adapter->resetting);
 
 	do {
 		rc = init_crq_queue(adapter);

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 735f481..ebc3924 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /**************************************************************************/
 /*                                                                        */
 /*  IBM System i and System p Virtual NIC Device Driver                   */
@@ -6,18 +7,6 @@
 /*  Thomas Falcon (tlfalcon@linux.vnet.ibm.com)                           */
 /*  John Allen (jallen@linux.vnet.ibm.com)                                */
 /*                                                                        */
-/*  This program is free software; you can redistribute it and/or modify  */
-/*  it under the terms of the GNU General Public License as published by  */
-/*  the Free Software Foundation; either version 2 of the License, or     */
-/*  (at your option) any later version.                                   */
-/*                                                                        */
-/*  This program is distributed in the hope that it will be useful,       */
-/*  but WITHOUT ANY WARRANTY; without even the implied warranty of        */
-/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
-/*  GNU General Public License for more details.                          */
-/*                                                                        */
-/*  You should have received a copy of the GNU General Public License     */
-/*  along with this program.                                              */
 /*                                                                        */
 /* This module contains the implementation of a virtual ethernet device   */
 /* for use with IBM i/pSeries LPAR Linux.  It utilizes the logical LAN    */
@@ -31,6 +20,7 @@
 #define IBMVNIC_INVALID_MAP	-1
 #define IBMVNIC_STATS_TIMEOUT	1
 #define IBMVNIC_INIT_FAILED	2
+#define IBMVNIC_OPEN_FAILED	3
 
 /* basic structures plus 100 2k buffers */
 #define IBMVNIC_IO_ENTITLEMENT_DEFAULT	610305
@@ -39,7 +29,8 @@
 #define IBMVNIC_RX_WEIGHT		16
 /* when changing this, update IBMVNIC_IO_ENTITLEMENT_DEFAULT */
 #define IBMVNIC_BUFFS_PER_POOL	100
-#define IBMVNIC_MAX_QUEUES	10
+#define IBMVNIC_MAX_QUEUES	16
+#define IBMVNIC_MAX_QUEUE_SZ   4096
 
 #define IBMVNIC_TSO_BUF_SZ	65536
 #define IBMVNIC_TSO_BUFS	64
@@ -48,6 +39,13 @@
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
 
+#define IBMVNIC_RESET_DELAY 100
+
+static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
+#define IBMVNIC_USE_SERVER_MAXES 0x1
+	"use-server-maxes"
+};
+
 struct ibmvnic_login_buffer {
 	__be32 len;
 	__be32 version;
@@ -371,11 +369,16 @@
 	u8 flags2;
 #define IBMVNIC_LOGICAL_LNK_ACTIVE 0x80
 	__be32 speed;
-#define IBMVNIC_AUTONEG		0x80
-#define IBMVNIC_10MBPS		0x40
-#define IBMVNIC_100MBPS		0x20
-#define IBMVNIC_1GBPS		0x10
-#define IBMVNIC_10GBPS		0x08
+#define IBMVNIC_AUTONEG		0x80000000
+#define IBMVNIC_10MBPS		0x40000000
+#define IBMVNIC_100MBPS		0x20000000
+#define IBMVNIC_1GBPS		0x10000000
+#define IBMVNIC_10GBP		0x08000000
+#define IBMVNIC_40GBPS		0x04000000
+#define IBMVNIC_100GBPS		0x02000000
+#define IBMVNIC_25GBPS		0x01000000
+#define IBMVNIC_50GBPS		0x00800000
+#define IBMVNIC_200GBPS		0x00400000
 	__be32 mtu;
 	struct ibmvnic_rc rc;
 } __packed __aligned(8);
@@ -844,6 +847,7 @@
 	dma_addr_t msg_token;
 	spinlock_t lock;
 	bool active;
+	char name[32];
 };
 
 union sub_crq {
@@ -870,6 +874,7 @@
 	struct sk_buff *rx_skb_top;
 	struct ibmvnic_adapter *adapter;
 	atomic_t used;
+	char name[32];
 };
 
 struct ibmvnic_long_term_buff {
@@ -956,7 +961,6 @@
 	u64 rx_entries;
 	u64 tx_entries;
 	u64 mtu;
-	struct sockaddr mac;
 };
 
 struct ibmvnic_adapter {
@@ -969,6 +973,7 @@
 	struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
 	dma_addr_t ip_offload_ctrl_tok;
 	u32 msg_enable;
+	u32 priv_flags;
 
 	/* Vital Product Data (VPD) */
 	struct ibmvnic_vpd *vpd;
@@ -992,6 +997,9 @@
 	int phys_link_state;
 	int logical_link_state;
 
+	u32 speed;
+	u8 duplex;
+
 	/* login data */
 	struct ibmvnic_login_buffer *login_buf;
 	dma_addr_t login_buf_token;
@@ -1068,13 +1076,13 @@
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
 	enum ibmvnic_reset_reason reset_reason;
-	struct mutex rwi_lock;
+	spinlock_t rwi_lock;
 	struct list_head rwi_list;
 	struct work_struct ibmvnic_reset;
-	bool resetting;
+	struct delayed_work ibmvnic_delayed_reset;
+	unsigned long resetting;
 	bool napi_enabled, from_passive_init;
 
-	bool mac_change_pending;
 	bool failover_pending;
 	bool force_reset_recovery;
 

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1ab613e..154e2e8 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Intel network device configuration
 #
@@ -33,7 +34,7 @@
 	  to identify the adapter.
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e100.rst>.
+	  <file:Documentation/networking/device_drivers/intel/e100.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called e100.
@@ -49,7 +50,7 @@
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e1000.rst>.
+	  <file:Documentation/networking/device_drivers/intel/e1000.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called e1000.
@@ -68,6 +69,9 @@
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/e1000e.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called e1000e.
 
@@ -94,7 +98,7 @@
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e1000.rst>.
+	  <file:Documentation/networking/device_drivers/intel/igb.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called igb.
@@ -130,7 +134,7 @@
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/e1000.rst>.
+	  <file:Documentation/networking/device_drivers/intel/igbvf.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called igbvf.
@@ -147,7 +151,7 @@
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/ixgb.txt>.
+	  <file:Documentation/networking/device_drivers/intel/ixgb.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgb.
@@ -156,6 +160,7 @@
 	tristate "Intel(R) 10GbE PCI Express adapters support"
 	depends on PCI
 	select MDIO
+	select PHYLIB
 	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Intel(R) 10GbE PCI Express family of
@@ -164,6 +169,9 @@
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/ixgbe.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgbe.
 
@@ -194,6 +202,15 @@
 
 	  If unsure, say N.
 
+config IXGBE_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBE
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	---help---
+	  Enable support for IPSec offload in ixgbe.ko
+
 config IXGBEVF
 	tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support"
 	depends on PCI_MSI
@@ -205,12 +222,21 @@
 	  <http://support.intel.com>
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/ixgbevf.txt>.
+	  <file:Documentation/networking/device_drivers/intel/ixgbevf.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called ixgbevf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
 
+config IXGBEVF_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBEVF
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	---help---
+	  Enable support for IPSec offload in ixgbevf.ko
+
 config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
@@ -222,6 +248,9 @@
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/i40e.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called i40e.
 
@@ -235,20 +264,30 @@
 
 	  If unsure, say N.
 
+# this is here to allow seamless migration from I40EVF --> IAVF name
+# so that CONFIG_IAVF symbol will always mirror the state of CONFIG_I40EVF
+config IAVF
+	tristate
 config I40EVF
 	tristate "Intel(R) Ethernet Adaptive Virtual Function support"
+	select IAVF
 	depends on PCI_MSI
 	---help---
 	  This driver supports virtual functions for Intel XL710,
-	  X710, X722, and all devices advertising support for Intel
-	  Ethernet Adaptive Virtual Function devices. For more
+	  X710, X722, XXV710, and all devices advertising support for
+	  Intel Ethernet Adaptive Virtual Function devices. For more
 	  information on how to identify your adapter, go to the Adapter
 	  & Driver ID Guide that can be located at:
 
-	  <http://support.intel.com>
+	  <https://support.intel.com>
+
+	  This driver was formerly named i40evf.
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/iavf.rst>.
 
 	  To compile this driver as a module, choose M here. The module
-	  will be called i40evf.  MSI-X interrupt support is required
+	  will be called iavf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
 
 config ICE
@@ -262,6 +301,9 @@
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/ice.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called ice.
 
@@ -277,7 +319,26 @@
 
 	  <http://support.intel.com>
 
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/intel/fm10k.rst>.
+
 	  To compile this driver as a module, choose M here. The module
 	  will be called fm10k.  MSI-X interrupt support is required
 
+config IGC
+	tristate "Intel(R) Ethernet Controller I225-LM/I225-V support"
+	default n
+	depends on PCI
+	---help---
+	  This driver supports Intel(R) Ethernet Controller I225-LM/I225-V
+	  family of adapters.
+
+	  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called igc.
+
 endif # NET_VENDOR_INTEL

diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index 807a4f8..3075290 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile

@@ -7,11 +7,12 @@
 obj-$(CONFIG_E1000) += e1000/
 obj-$(CONFIG_E1000E) += e1000e/
 obj-$(CONFIG_IGB) += igb/
+obj-$(CONFIG_IGC) += igc/
 obj-$(CONFIG_IGBVF) += igbvf/
 obj-$(CONFIG_IXGBE) += ixgbe/
 obj-$(CONFIG_IXGBEVF) += ixgbevf/
 obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IXGB) += ixgb/
-obj-$(CONFIG_I40EVF) += i40evf/
+obj-$(CONFIG_IAVF) += iavf/
 obj-$(CONFIG_FM10K) += fm10k/
 obj-$(CONFIG_ICE) += ice/

diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 27d5f27..a65d5a9 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c

@@ -164,7 +164,7 @@
 
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
 MODULE_AUTHOR(DRV_COPYRIGHT);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 MODULE_FIRMWARE(FIRMWARE_D101M);
 MODULE_FIRMWARE(FIRMWARE_D101S);
@@ -1345,8 +1345,8 @@
 
 	fw = e100_request_firmware(nic);
 	/* If it's NULL, then no ucode is required */
-	if (!fw || IS_ERR(fw))
-		return PTR_ERR(fw);
+	if (IS_ERR_OR_NULL(fw))
+		return PTR_ERR_OR_ZERO(fw);
 
 	if ((err = e100_exec_cb(nic, (void *)fw, e100_setup_ucode)))
 		netif_err(nic, probe, nic->netdev,
@@ -2225,11 +2225,13 @@
 	e100_rx_clean(nic, &work_done, budget);
 	e100_tx_clean(nic);
 
-	/* If budget not fully consumed, exit the polling mode */
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
+	/* If budget fully consumed, continue polling */
+	if (work_done == budget)
+		return budget;
+
+	/* only re-enable interrupt if stack agrees polling is really done */
+	if (likely(napi_complete_done(napi, work_done)))
 		e100_enable_irq(nic);
-	}
 
 	return work_done;
 }
@@ -2795,7 +2797,7 @@
 
 	netdev->features = features;
 	e100_exec_cb(nic, NULL, e100_configure);
-	return 0;
+	return 1;
 }
 
 static const struct net_device_ops e100_netdev_ops = {

diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 2569a16..be56e63 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c

@@ -607,6 +607,7 @@
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		rxdr[i].count = rxdr->count;
 
+	err = 0;
 	if (netif_running(adapter->netdev)) {
 		/* Try to get new resources before deleting old */
 		err = e1000_setup_all_rx_resources(adapter);
@@ -627,14 +628,13 @@
 		adapter->rx_ring = rxdr;
 		adapter->tx_ring = txdr;
 		err = e1000_up(adapter);
-		if (err)
-			goto err_setup;
 	}
 	kfree(tx_old);
 	kfree(rx_old);
 
 	clear_bit(__E1000_RESETTING, &adapter->flags);
-	return 0;
+	return err;
+
 err_setup_tx:
 	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
@@ -646,7 +646,6 @@
 err_alloc_tx:
 	if (netif_running(adapter->netdev))
 		e1000_up(adapter);
-err_setup:
 	clear_bit(__E1000_RESETTING, &adapter->flags);
 	return err;
 }
@@ -937,8 +936,7 @@
 						 txdr->buffer_info[i].dma,
 						 txdr->buffer_info[i].length,
 						 DMA_TO_DEVICE);
-			if (txdr->buffer_info[i].skb)
-				dev_kfree_skb(txdr->buffer_info[i].skb);
+			dev_kfree_skb(txdr->buffer_info[i].skb);
 		}
 	}
 
@@ -993,8 +991,8 @@
 
 	txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
 	txdr->size = ALIGN(txdr->size, 4096);
-	txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
-					 GFP_KERNEL);
+	txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+					GFP_KERNEL);
 	if (!txdr->desc) {
 		ret_val = 2;
 		goto err_nomem;
@@ -1051,8 +1049,8 @@
 	}
 
 	rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
-	rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
-					 GFP_KERNEL);
+	rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+					GFP_KERNEL);
 	if (!rxdr->desc) {
 		ret_val = 6;
 		goto err_nomem;

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 2110d5f..86493fe 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c

@@ -195,7 +195,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
@@ -820,7 +820,7 @@
 	else
 		e1000_reset(adapter);
 
-	return 0;
+	return 1;
 }
 
 static const struct net_device_ops e1000_netdev_ops = {
@@ -2433,7 +2433,6 @@
 	if (link) {
 		if (!netif_carrier_ok(netdev)) {
 			u32 ctrl;
-			bool txb2b = true;
 			/* update snapshot of PHY registers on LSC */
 			e1000_get_speed_and_duplex(hw,
 						   &adapter->link_speed,
@@ -2455,11 +2454,9 @@
 			adapter->tx_timeout_factor = 1;
 			switch (adapter->link_speed) {
 			case SPEED_10:
-				txb2b = false;
 				adapter->tx_timeout_factor = 16;
 				break;
 			case SPEED_100:
-				txb2b = false;
 				/* maybe add some timeout factor ? */
 				break;
 			}
@@ -2892,9 +2889,8 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
-		frag = &skb_shinfo(skb)->frags[f];
 		len = skb_frag_size(frag);
 		offset = 0;
 
@@ -3022,7 +3018,7 @@
 	 * applicable for weak-ordered memory model archs,
 	 * such as IA-64).
 	 */
-	wmb();
+	dma_wmb();
 
 	tx_ring->next_to_use = i;
 }
@@ -3270,14 +3266,9 @@
 		/* Make sure there is space in the ring for the next send. */
 		e1000_maybe_stop_tx(netdev, tx_ring, desc_needed);
 
-		if (!skb->xmit_more ||
+		if (!netdev_xmit_more() ||
 		    netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
 			writel(tx_ring->next_to_use, hw->hw_addr + tx_ring->tdt);
-			/* we need this if more than one processor can write to
-			 * our tail at a time, it synchronizes IO on IA64/Altix
-			 * systems
-			 */
-			mmiowb();
 		}
 	} else {
 		dev_kfree_skb_any(skb);
@@ -3806,14 +3797,15 @@
 
 	adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget);
 
-	if (!tx_clean_complete)
-		work_done = budget;
+	if (!tx_clean_complete || work_done == budget)
+		return budget;
 
-	/* If budget not fully consumed, exit the polling mode */
-	if (work_done < budget) {
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
 		if (likely(adapter->itr_setting & 3))
 			e1000_set_itr(adapter);
-		napi_complete_done(napi, work_done);
 		if (!test_bit(__E1000_DOWN, &adapter->flags))
 			e1000_irq_enable(adapter);
 	}
@@ -4183,8 +4175,7 @@
 				/* an error means any chain goes out the window
 				 * too
 				 */
-				if (rx_ring->rx_skb_top)
-					dev_kfree_skb(rx_ring->rx_skb_top);
+				dev_kfree_skb(rx_ring->rx_skb_top);
 				rx_ring->rx_skb_top = NULL;
 				goto next_desc;
 			}
@@ -4547,7 +4538,7 @@
 		 * applicable for weak-ordered memory model archs,
 		 * such as IA-64).
 		 */
-		wmb();
+		dma_wmb();
 		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
 	}
 }
@@ -4662,7 +4653,7 @@
 		 * applicable for weak-ordered memory model archs,
 		 * such as IA-64).
 		 */
-		wmb();
+		dma_wmb();
 		writel(i, hw->hw_addr + rx_ring->rdt);
 	}
 }

diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index 257bd59..4b103cc 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c

@@ -680,7 +680,7 @@
 	ew32(TCTL, E1000_TCTL_PSP);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	ctrl = er32(CTRL);
 
@@ -696,11 +696,16 @@
 	ret_val =
 	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
 					    &kum_reg_data);
-	if (ret_val)
-		return ret_val;
-	kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
-	e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					 kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
 
 	ret_val = e1000e_get_auto_rd_done(hw);
 	if (ret_val)
@@ -754,11 +759,19 @@
 		return ret_val;
 
 	/* Disable IBIST slave mode (far-end loopback) */
-	e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					&kum_reg_data);
-	kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
-	e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					 kum_reg_data);
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+					    &kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
 
 	/* Set the transmit descriptor write-back policy */
 	reg_data = er32(TXDCTL(0));

diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index b930930..2c1bab3 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c

@@ -959,7 +959,7 @@
 	ew32(TCTL, tctl);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Must acquire the MDIO ownership before MAC reset.
 	 * Ownership defaults to firmware after a reset.

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index fd550de..63c3c79 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h

@@ -222,6 +222,9 @@
 #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
 #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
 
+/* PCIm function state */
+#define E1000_STATUS_PCIM_STATE	0x40000000
+
 #define HALF_DUPLEX 1
 #define FULL_DUPLEX 2
 

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index c760dc7..6c51b1b 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h

@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
 #include <linux/timecounter.h>
@@ -186,12 +185,13 @@
 
 /* board specific private data structure */
 struct e1000_adapter {
-	struct timer_list watchdog_timer;
 	struct timer_list phy_info_timer;
 	struct timer_list blink_timer;
 
 	struct work_struct reset_task;
-	struct work_struct watchdog_task;
+	struct delayed_work watchdog_task;
+
+	struct workqueue_struct *e1000_workqueue;
 
 	const struct e1000_info *ei;
 
@@ -505,6 +505,9 @@
 void e1000e_ptp_init(struct e1000_adapter *adapter);
 void e1000e_ptp_remove(struct e1000_adapter *adapter);
 
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+		       struct ptp_system_timestamp *sts);
+
 static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw)
 {
 	return hw->phy.ops.reset(hw);

diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 02ebf20..de8c581 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c

@@ -1014,7 +1014,7 @@
 	/* Disable all the interrupts */
 	ew32(IMC, 0xFFFFFFFF);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Test each interrupt */
 	for (i = 0; i < 10; i++) {
@@ -1046,7 +1046,7 @@
 			ew32(IMC, mask);
 			ew32(ICS, mask);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr & mask) {
 				*data = 3;
@@ -1064,7 +1064,7 @@
 		ew32(IMS, mask);
 		ew32(ICS, mask);
 		e1e_flush();
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 
 		if (!(adapter->test_icr & mask)) {
 			*data = 4;
@@ -1082,7 +1082,7 @@
 			ew32(IMC, ~mask & 0x00007FFF);
 			ew32(ICS, ~mask & 0x00007FFF);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 			if (adapter->test_icr) {
 				*data = 5;
@@ -1094,7 +1094,7 @@
 	/* Disable all the interrupts */
 	ew32(IMC, 0xFFFFFFFF);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Unhook test interrupt handler */
 	free_irq(irq, netdev);
@@ -1126,8 +1126,7 @@
 						 buffer_info->dma,
 						 buffer_info->length,
 						 DMA_TO_DEVICE);
-			if (buffer_info->skb)
-				dev_kfree_skb(buffer_info->skb);
+			dev_kfree_skb(buffer_info->skb);
 		}
 	}
 
@@ -1139,8 +1138,7 @@
 				dma_unmap_single(&pdev->dev,
 						 buffer_info->dma,
 						 2048, DMA_FROM_DEVICE);
-			if (buffer_info->skb)
-				dev_kfree_skb(buffer_info->skb);
+			dev_kfree_skb(buffer_info->skb);
 		}
 	}
 
@@ -1470,7 +1468,7 @@
 	 */
 	ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	return 0;
 }
@@ -1584,7 +1582,7 @@
 		    hw->phy.media_type == e1000_media_type_internal_serdes) {
 			ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
 			e1e_flush();
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 			break;
 		}
 		/* Fall Through */

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index cdae0ef..a1fab77 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c

@@ -271,7 +271,7 @@
 		u16 count = 20;
 
 		do {
-			usleep_range(5000, 10000);
+			usleep_range(5000, 6000);
 		} while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
 
 		msleep(30);
@@ -405,7 +405,7 @@
 	/* Ungate automatic PHY configuration on non-managed 82579 */
 	if ((hw->mac.type == e1000_pch2lan) &&
 	    !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		e1000_gate_hw_phy_config_ich8lan(hw, false);
 	}
 
@@ -531,7 +531,7 @@
 	phy->id = 0;
 	while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
 	       (i++ < 100)) {
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 		ret_val = e1000e_get_phy_id(hw);
 		if (ret_val)
 			return ret_val;
@@ -1244,7 +1244,7 @@
 				goto out;
 			}
 
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 		}
 		e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
 
@@ -1429,6 +1429,16 @@
 			else
 				phy_reg |= 0xFA;
 			e1e_wphy_locked(hw, I217_PLL_CLOCK_GATE_REG, phy_reg);
+
+			if (speed == SPEED_1000) {
+				hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+							    &phy_reg);
+
+				phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+				hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+							     phy_reg);
+			}
 		}
 		hw->phy.ops.release(hw);
 
@@ -1999,7 +2009,7 @@
 
 	while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
 	       (i++ < 30))
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	return blocked ? E1000_BLK_PHY_RESET : 0;
 }
 
@@ -2818,7 +2828,7 @@
 		return 0;
 
 	/* Allow time for h/w to get to quiescent state after reset */
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Perform any necessary post-reset workarounds */
 	switch (hw->mac.type) {
@@ -2854,7 +2864,7 @@
 	if (hw->mac.type == e1000_pch2lan) {
 		/* Ungate automatic PHY configuration on non-managed 82579 */
 		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 			e1000_gate_hw_phy_config_ich8lan(hw, false);
 		}
 
@@ -3875,7 +3885,7 @@
 	 */
 	if (!ret_val) {
 		nvm->ops.reload(hw);
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	}
 
 out:
@@ -4026,7 +4036,7 @@
 	 */
 	if (!ret_val) {
 		nvm->ops.reload(hw);
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 	}
 
 out:
@@ -4650,7 +4660,7 @@
 	ew32(TCTL, E1000_TCTL_PSP);
 	e1e_flush();
 
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	/* Workaround for ICH8 bit corruption issue in FIFO memory */
 	if (hw->mac.type == e1000_ich8lan) {

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index eb09c75..1502895 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h

@@ -210,7 +210,7 @@
 
 /* PHY Power Management Control */
 #define HV_PM_CTRL		PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA	0x100
+#define HV_PM_CTRL_K1_CLK_REQ		0x200
 #define HV_PM_CTRL_K1_ENABLE		0x4000
 
 #define I217_PLL_CLOCK_GATE_REG	PHY_REG(772, 28)

diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 4abd55d..e531976 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c

@@ -797,7 +797,7 @@
 	 * milliseconds even if the other end is doing it in SW).
 	 */
 	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		status = er32(STATUS);
 		if (status & E1000_STATUS_LU)
 			break;

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3ba0c90..d7d56e4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c

@@ -1780,7 +1780,8 @@
 		}
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			mod_delayed_work(adapter->e1000_workqueue,
+					 &adapter->watchdog_task, HZ);
 	}
 
 	/* Reset on uncorrectable ECC error */
@@ -1860,7 +1861,8 @@
 		}
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			mod_delayed_work(adapter->e1000_workqueue,
+					 &adapter->watchdog_task, HZ);
 	}
 
 	/* Reset on uncorrectable ECC error */
@@ -1905,7 +1907,8 @@
 		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
-			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+			mod_delayed_work(adapter->e1000_workqueue,
+					 &adapter->watchdog_task, HZ);
 	}
 
 	if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -2106,7 +2109,7 @@
 	if (strlen(netdev->name) < (IFNAMSIZ - 5))
 		snprintf(adapter->rx_ring->name,
 			 sizeof(adapter->rx_ring->name) - 1,
-			 "%s-rx-0", netdev->name);
+			 "%.14s-rx-0", netdev->name);
 	else
 		memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
 	err = request_irq(adapter->msix_entries[vector].vector,
@@ -2122,7 +2125,7 @@
 	if (strlen(netdev->name) < (IFNAMSIZ - 5))
 		snprintf(adapter->tx_ring->name,
 			 sizeof(adapter->tx_ring->name) - 1,
-			 "%s-tx-0", netdev->name);
+			 "%.14s-tx-0", netdev->name);
 	else
 		memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
 	err = request_irq(adapter->msix_entries[vector].vector,
@@ -2305,8 +2308,8 @@
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma,
-					 GFP_KERNEL);
+	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
+					GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;
 
@@ -2651,9 +2654,9 @@
 /**
  * e1000e_poll - NAPI Rx polling callback
  * @napi: struct associated with this polling callback
- * @weight: number of packets driver is allowed to process this poll
+ * @budget: number of packets driver is allowed to process this poll
  **/
-static int e1000e_poll(struct napi_struct *napi, int weight)
+static int e1000e_poll(struct napi_struct *napi, int budget)
 {
 	struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
 						     napi);
@@ -2667,16 +2670,17 @@
 	    (adapter->rx_ring->ims_val & adapter->tx_ring->ims_val))
 		tx_cleaned = e1000_clean_tx_irq(adapter->tx_ring);
 
-	adapter->clean_rx(adapter->rx_ring, &work_done, weight);
+	adapter->clean_rx(adapter->rx_ring, &work_done, budget);
 
-	if (!tx_cleaned)
-		work_done = weight;
+	if (!tx_cleaned || work_done == budget)
+		return budget;
 
-	/* If weight not fully consumed, exit the polling mode */
-	if (work_done < weight) {
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
 		if (adapter->itr_setting & 3)
 			e1000_set_itr(adapter);
-		napi_complete_done(napi, work_done);
 		if (!test_bit(__E1000_DOWN, &adapter->state)) {
 			if (adapter->msix_entries)
 				ew32(IMS, adapter->rx_ring->ims_val);
@@ -3207,7 +3211,7 @@
 	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
 		ew32(RCTL, rctl & ~E1000_RCTL_EN);
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	if (adapter->flags2 & FLAG2_DMA_BURST) {
 		/* set the writeback threshold (only takes effect if the RDTR
@@ -3815,7 +3819,6 @@
 	if (tx_ring->next_to_use == tx_ring->count)
 		tx_ring->next_to_use = 0;
 	ew32(TDT(0), tx_ring->next_to_use);
-	mmiowb();
 	usleep_range(200, 250);
 }
 
@@ -4046,12 +4049,12 @@
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
-		fc->refresh_time = 0x0400;
+		fc->refresh_time = 0xFFFF;
+		fc->pause_time = 0xFFFF;
 
 		if (adapter->netdev->mtu <= ETH_DATA_LEN) {
 			fc->high_water = 0x05C20;
 			fc->low_water = 0x05048;
-			fc->pause_time = 0x0650;
 			break;
 		}
 
@@ -4208,7 +4211,7 @@
 		e1000_configure_msix(adapter);
 	e1000_irq_enable(adapter);
 
-	netif_start_queue(adapter->netdev);
+	/* Tx queue started by watchdog timer when link is up */
 
 	e1000e_trigger_lsc(adapter);
 }
@@ -4272,13 +4275,12 @@
 
 	/* flush both disables and wait for them to finish */
 	e1e_flush();
-	usleep_range(10000, 20000);
+	usleep_range(10000, 11000);
 
 	e1000_irq_disable(adapter);
 
 	napi_synchronize(&adapter->napi);
 
-	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
 	spin_lock(&adapter->stats64_lock);
@@ -4310,7 +4312,7 @@
 {
 	might_sleep();
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 	e1000e_down(adapter, true);
 	e1000e_up(adapter);
 	clear_bit(__E1000_RESETTING, &adapter->state);
@@ -4319,13 +4321,16 @@
 /**
  * e1000e_sanitize_systim - sanitize raw cycle counter reads
  * @hw: pointer to the HW structure
- * @systim: time value read, sanitized and returned
+ * @systim: PHC time value read, sanitized and returned
+ * @sts: structure to hold system time before and after reading SYSTIML,
+ * may be NULL
  *
  * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
  * check to see that the time is incrementing at a reasonable
  * rate and is a multiple of incvalue.
  **/
-static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim)
+static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim,
+				  struct ptp_system_timestamp *sts)
 {
 	u64 time_delta, rem, temp;
 	u64 systim_next;
@@ -4335,7 +4340,9 @@
 	incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
 	for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
 		/* latch SYSTIMH on read of SYSTIML */
+		ptp_read_system_prets(sts);
 		systim_next = (u64)er32(SYSTIML);
+		ptp_read_system_postts(sts);
 		systim_next |= (u64)er32(SYSTIMH) << 32;
 
 		time_delta = systim_next - systim;
@@ -4353,15 +4360,16 @@
 }
 
 /**
- * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
- * @cc: cyclecounter structure
+ * e1000e_read_systim - read SYSTIM register
+ * @adapter: board private structure
+ * @sts: structure which will contain system time before and after reading
+ * SYSTIML, may be NULL
  **/
-static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+		       struct ptp_system_timestamp *sts)
 {
-	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
-						     cc);
 	struct e1000_hw *hw = &adapter->hw;
-	u32 systimel, systimeh;
+	u32 systimel, systimel_2, systimeh;
 	u64 systim;
 	/* SYSTIMH latching upon SYSTIML read does not work well.
 	 * This means that if SYSTIML overflows after we read it but before
@@ -4369,11 +4377,15 @@
 	 * will experience a huge non linear increment in the systime value
 	 * to fix that we test for overflow and if true, we re-read systime.
 	 */
+	ptp_read_system_prets(sts);
 	systimel = er32(SYSTIML);
+	ptp_read_system_postts(sts);
 	systimeh = er32(SYSTIMH);
 	/* Is systimel is so large that overflow is possible? */
 	if (systimel >= (u32)0xffffffff - E1000_TIMINCA_INCVALUE_MASK) {
-		u32 systimel_2 = er32(SYSTIML);
+		ptp_read_system_prets(sts);
+		systimel_2 = er32(SYSTIML);
+		ptp_read_system_postts(sts);
 		if (systimel > systimel_2) {
 			/* There was an overflow, read again SYSTIMH, and use
 			 * systimel_2
@@ -4386,12 +4398,24 @@
 	systim |= (u64)systimeh << 32;
 
 	if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
-		systim = e1000e_sanitize_systim(hw, systim);
+		systim = e1000e_sanitize_systim(hw, systim, sts);
 
 	return systim;
 }
 
 /**
+ * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
+ * @cc: cyclecounter structure
+ **/
+static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
+{
+	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
+						     cc);
+
+	return e1000e_read_systim(adapter, NULL);
+}
+
+/**
  * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
  * @adapter: board private structure to initialize
  *
@@ -4584,6 +4608,7 @@
 	pm_runtime_get_sync(&pdev->dev);
 
 	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
 
 	/* allocate transmit descriptors */
 	err = e1000e_setup_tx_resources(adapter->tx_ring);
@@ -4644,7 +4669,6 @@
 	e1000_irq_enable(adapter);
 
 	adapter->tx_hang_recheck = false;
-	netif_start_queue(netdev);
 
 	hw->mac.get_link_status = true;
 	pm_runtime_put(&pdev->dev);
@@ -4685,7 +4709,7 @@
 	int count = E1000_CHECK_RESET_COUNT;
 
 	while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 
 	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -5128,31 +5152,18 @@
 	}
 }
 
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
-	struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
-	/* Do the rest outside of interrupt context */
-	schedule_work(&adapter->watchdog_task);
-
-	/* TODO: make this use queue_delayed_work() */
-}
-
 static void e1000_watchdog_task(struct work_struct *work)
 {
 	struct e1000_adapter *adapter = container_of(work,
 						     struct e1000_adapter,
-						     watchdog_task);
+						     watchdog_task.work);
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_mac_info *mac = &adapter->hw.mac;
 	struct e1000_phy_info *phy = &adapter->hw.phy;
 	struct e1000_ring *tx_ring = adapter->tx_ring;
+	u32 dmoff_exit_timeout = 100, tries = 0;
 	struct e1000_hw *hw = &adapter->hw;
-	u32 link, tctl;
+	u32 link, tctl, pcim_state;
 
 	if (test_bit(__E1000_DOWN, &adapter->state))
 		return;
@@ -5177,6 +5188,21 @@
 			/* Cancel scheduled suspend requests. */
 			pm_runtime_resume(netdev->dev.parent);
 
+			/* Checking if MAC is in DMoff state*/
+			pcim_state = er32(STATUS);
+			while (pcim_state & E1000_STATUS_PCIM_STATE) {
+				if (tries++ == dmoff_exit_timeout) {
+					e_dbg("Error in exiting dmoff\n");
+					break;
+				}
+				usleep_range(10000, 20000);
+				pcim_state = er32(STATUS);
+
+				/* Checking if MAC exited DMoff state */
+				if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+					e1000_phy_hw_reset(&adapter->hw);
+			}
+
 			/* update snapshot of PHY registers on LSC */
 			e1000_phy_read_status(adapter);
 			mac->ops.get_link_up_info(&adapter->hw,
@@ -5266,6 +5292,7 @@
 			if (phy->ops.cfg_on_link_up)
 				phy->ops.cfg_on_link_up(hw);
 
+			netif_wake_queue(netdev);
 			netif_carrier_on(netdev);
 
 			if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5279,6 +5306,7 @@
 			/* Link status message must follow this format */
 			pr_info("%s NIC Link is Down\n", adapter->netdev->name);
 			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
 			if (!test_bit(__E1000_DOWN, &adapter->state))
 				mod_timer(&adapter->phy_info_timer,
 					  round_jiffies(jiffies + 2 * HZ));
@@ -5376,8 +5404,9 @@
 
 	/* Reset the timer */
 	if (!test_bit(__E1000_DOWN, &adapter->state))
-		mod_timer(&adapter->watchdog_timer,
-			  round_jiffies(jiffies + 2 * HZ));
+		queue_delayed_work(adapter->e1000_workqueue,
+				   &adapter->watchdog_task,
+				   round_jiffies(2 * HZ));
 }
 
 #define E1000_TX_FLAGS_CSUM		0x00000001
@@ -5550,9 +5579,8 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
-		frag = &skb_shinfo(skb)->frags[f];
 		len = skb_frag_size(frag);
 		offset = 0;
 
@@ -5877,19 +5905,13 @@
 				     DIV_ROUND_UP(PAGE_SIZE,
 						  adapter->tx_fifo_limit) + 2));
 
-		if (!skb->xmit_more ||
+		if (!netdev_xmit_more() ||
 		    netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
 			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
 				e1000e_update_tdt_wa(tx_ring,
 						     tx_ring->next_to_use);
 			else
 				writel(tx_ring->next_to_use, tx_ring->tail);
-
-			/* we need this if more than one processor can write
-			 * to our tail at a time, it synchronizes IO on
-			 *IA64/Altix systems
-			 */
-			mmiowb();
 		}
 	} else {
 		dev_kfree_skb_any(skb);
@@ -6003,7 +6025,7 @@
 	}
 
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
-		usleep_range(1000, 2000);
+		usleep_range(1000, 1100);
 	/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
 	adapter->max_frame_size = max_frame;
 	e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
@@ -6274,7 +6296,7 @@
 
 static int e1000e_pm_freeze(struct device *dev)
 {
-	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
 	netif_device_detach(netdev);
@@ -6283,7 +6305,7 @@
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -6607,7 +6629,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int e1000e_pm_thaw(struct device *dev)
 {
-	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
 	e1000e_set_interrupt_capability(adapter);
@@ -6656,8 +6678,7 @@
 
 static int e1000e_pm_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	u16 eee_lp;
 
@@ -6698,7 +6719,7 @@
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
-			usleep_range(10000, 20000);
+			usleep_range(10000, 11000);
 
 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -6854,8 +6875,6 @@
 		result = PCI_ERS_RESULT_RECOVERED;
 	}
 
-	pci_cleanup_aer_uncorrect_error_status(pdev);
-
 	return result;
 }
 
@@ -6985,7 +7004,7 @@
 	else
 		e1000e_reset(adapter);
 
-	return 0;
+	return 1;
 }
 
 static const struct net_device_ops e1000e_netdev_ops = {
@@ -7240,11 +7259,21 @@
 		goto err_eeprom;
 	}
 
-	timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+	adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+						   e1000e_driver_name);
+
+	if (!adapter->e1000_workqueue) {
+		err = -ENOMEM;
+		goto err_workqueue;
+	}
+
+	INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+	queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task,
+			   0);
+
 	timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
 
 	INIT_WORK(&adapter->reset_task, e1000_reset_task);
-	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
 	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
 	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
 	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
@@ -7330,12 +7359,17 @@
 
 	e1000_print_device_info(adapter);
 
-	if (pci_dev_run_wake(pdev))
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+
+	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
 
 	return 0;
 
 err_register:
+	flush_workqueue(adapter->e1000_workqueue);
+	destroy_workqueue(adapter->e1000_workqueue);
+err_workqueue:
 	if (!(adapter->flags & FLAG_HAS_AMT))
 		e1000e_release_hw_control(adapter);
 err_eeprom:
@@ -7382,15 +7416,17 @@
 	 */
 	if (!down)
 		set_bit(__E1000_DOWN, &adapter->state);
-	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
 	cancel_work_sync(&adapter->reset_task);
-	cancel_work_sync(&adapter->watchdog_task);
 	cancel_work_sync(&adapter->downshift_task);
 	cancel_work_sync(&adapter->update_phy_task);
 	cancel_work_sync(&adapter->print_hang_task);
 
+	cancel_delayed_work(&adapter->watchdog_task);
+	flush_workqueue(adapter->e1000_workqueue);
+	destroy_workqueue(adapter->e1000_workqueue);
+
 	if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
 		cancel_work_sync(&adapter->tx_hwtstamp_work);
 		if (adapter->tx_hwtstamp_skb) {
@@ -7592,7 +7628,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 /* netdev.c */

diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 937f9af..e609f4d 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c

@@ -392,7 +392,7 @@
 				break;
 			}
 		}
-		usleep_range(10000, 20000);
+		usleep_range(10000, 11000);
 		nvm->ops.release(hw);
 	}
 

diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 37c7694..1a4c65d 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c

@@ -161,22 +161,30 @@
 #endif/*CONFIG_E1000E_HWTS*/
 
 /**
- * e1000e_phc_gettime - Reads the current time from the hardware clock
+ * e1000e_phc_gettimex - Reads the current time from the hardware clock and
+ *                       system clock
  * @ptp: ptp clock structure
- * @ts: timespec structure to hold the current time value
+ * @ts: timespec structure to hold the current PHC time
+ * @sts: structure to hold the current system time
  *
  * Read the timecounter and return the correct value in ns after converting
  * it into a struct timespec.
  **/
-static int e1000e_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int e1000e_phc_gettimex(struct ptp_clock_info *ptp,
+			       struct timespec64 *ts,
+			       struct ptp_system_timestamp *sts)
 {
 	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
 						     ptp_clock_info);
 	unsigned long flags;
-	u64 ns;
+	u64 cycles, ns;
 
 	spin_lock_irqsave(&adapter->systim_lock, flags);
-	ns = timecounter_read(&adapter->tc);
+
+	/* NOTE: Non-monotonic SYSTIM readings may be returned */
+	cycles = e1000e_read_systim(adapter, sts);
+	ns = timecounter_cyc2time(&adapter->tc, cycles);
+
 	spin_unlock_irqrestore(&adapter->systim_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
@@ -232,9 +240,12 @@
 						     systim_overflow_work.work);
 	struct e1000_hw *hw = &adapter->hw;
 	struct timespec64 ts;
+	u64 ns;
 
-	adapter->ptp_clock_info.gettime64(&adapter->ptp_clock_info, &ts);
+	/* Update the timecounter */
+	ns = timecounter_read(&adapter->tc);
 
+	ts = ns_to_timespec64(ns);
 	e_dbg("SYSTIM overflow check at %lld.%09lu\n",
 	      (long long) ts.tv_sec, ts.tv_nsec);
 
@@ -251,7 +262,7 @@
 	.pps		= 0,
 	.adjfreq	= e1000e_phc_adjfreq,
 	.adjtime	= e1000e_phc_adjtime,
-	.gettime64	= e1000e_phc_gettime,
+	.gettimex64	= e1000e_phc_gettimex,
 	.settime64	= e1000e_phc_settime,
 	.enable		= e1000e_phc_enable,
 };

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 7d42582..b144419 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h

@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #ifndef _FM10K_H_
 #define _FM10K_H_
@@ -177,14 +177,10 @@
 #define MIN_Q_VECTORS	1
 enum fm10k_non_q_vectors {
 	FM10K_MBX_VECTOR,
-#define NON_Q_VECTORS_VF NON_Q_VECTORS_PF
-	NON_Q_VECTORS_PF
+	NON_Q_VECTORS
 };
 
-#define NON_Q_VECTORS(hw)	(((hw)->mac.type == fm10k_mac_pf) ? \
-						NON_Q_VECTORS_PF : \
-						NON_Q_VECTORS_VF)
-#define MIN_MSIX_COUNT(hw)	(MIN_Q_VECTORS + NON_Q_VECTORS(hw))
+#define MIN_MSIX_COUNT(hw)	(MIN_Q_VECTORS + NON_Q_VECTORS)
 
 struct fm10k_q_vector {
 	struct fm10k_intfc *interface;

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index 20768ac..c453154 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k.h"
 
@@ -36,7 +36,7 @@
 static int fm10k_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
 {
 	u8 num_tc = 0;
-	int i, err;
+	int i;
 
 	/* verify type and determine num_tcs needed */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
@@ -57,7 +57,7 @@
 
 	/* update TC hardware mapping if necessary */
 	if (num_tc != netdev_get_num_tc(dev)) {
-		err = fm10k_setup_tc(dev, num_tc);
+		int err = fm10k_setup_tc(dev, num_tc);
 		if (err)
 			return err;
 	}

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index dca1041..1d27b2f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c

@@ -160,8 +160,6 @@
 	snprintf(name, sizeof(name), "q_vector.%03d", q_vector->v_idx);
 
 	q_vector->dbg_q_vector = debugfs_create_dir(name, interface->dbg_intfc);
-	if (!q_vector->dbg_q_vector)
-		return;
 
 	/* Generate a file for each rx ring in the q_vector */
 	for (i = 0; i < q_vector->tx.count; i++) {

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 4895dd8..c681d2d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include <linux/vmalloc.h>
 
@@ -222,7 +222,6 @@
 				      const unsigned int size)
 {
 	unsigned int i;
-	char *p;
 
 	if (!pointer) {
 		/* memory is not zero allocated so we have to clear it */
@@ -232,7 +231,7 @@
 	}
 
 	for (i = 0; i < size; i++) {
-		p = (char *)pointer + stats[i].stat_offset;
+		char *p = (char *)pointer + stats[i].stat_offset;
 
 		switch (stats[i].sizeof_stat) {
 		case sizeof(u64):
@@ -651,7 +650,6 @@
 			      struct ethtool_coalesce *ec)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
-	struct fm10k_q_vector *qv;
 	u16 tx_itr, rx_itr;
 	int i;
 
@@ -677,7 +675,8 @@
 
 	/* update q_vectors */
 	for (i = 0; i < interface->num_q_vectors; i++) {
-		qv = interface->q_vector[i];
+		struct fm10k_q_vector *qv = interface->q_vector[i];
+
 		qv->tx.itr = tx_itr;
 		qv->rx.itr = rx_itr;
 	}
@@ -1115,13 +1114,12 @@
 			       struct ethtool_channels *ch)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
-	struct fm10k_hw *hw = &interface->hw;
 
 	/* report maximum channels */
 	ch->max_combined = fm10k_max_channels(dev);
 
 	/* report info for other vector */
-	ch->max_other = NON_Q_VECTORS(hw);
+	ch->max_other = NON_Q_VECTORS;
 	ch->other_count = ch->max_other;
 
 	/* record RSS queues */
@@ -1133,14 +1131,13 @@
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
 	unsigned int count = ch->combined_count;
-	struct fm10k_hw *hw = &interface->hw;
 
 	/* verify they are not requesting separate vectors */
 	if (!count || ch->rx_count || ch->tx_count)
 		return -EINVAL;
 
 	/* verify other_count has not changed */
-	if (ch->other_count != NON_Q_VECTORS(hw))
+	if (ch->other_count != NON_Q_VECTORS)
 		return -EINVAL;
 
 	/* verify the number of channels does not exceed hardware limits */

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index e707d71..afe1faf 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k.h"
 #include "fm10k_vf.h"
@@ -244,7 +244,8 @@
 		}
 
 		/* guarantee we have free space in the SM mailbox */
-		if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
+		if (hw->mbx.state == FM10K_STATE_OPEN &&
+		    !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
 			/* keep track of how many times this occurs */
 			interface->hw_sm_mbx_full++;
 
@@ -302,6 +303,26 @@
 	}
 }
 
+static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
+{
+	u32 err_mask;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Mask the completion abort bit in the ERR_UNCOR_MASK register,
+	 * preventing the device from reporting these errors to the upstream
+	 * PCIe root device. This avoids bringing down platforms which upgrade
+	 * non-fatal completer aborts into machine check exceptions. Completer
+	 * aborts can occur whenever a VF reads a queue it doesn't own.
+	 */
+	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
+	err_mask |= PCI_ERR_UNC_COMP_ABORT;
+	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
+}
+
 int fm10k_iov_resume(struct pci_dev *pdev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -317,6 +338,12 @@
 	if (!iov_data)
 		return -ENOMEM;
 
+	/* Lower severity of completer abort error reporting as
+	 * the VFs can trigger this any time they read a queue
+	 * that they don't own.
+	 */
+	fm10k_mask_aer_comp_abort(pdev);
+
 	/* allocate hardware resources for the VFs */
 	hw->iov.ops.assign_resources(hw, num_vfs, num_vfs);
 
@@ -399,7 +426,7 @@
 	struct fm10k_iov_data *iov_data = interface->iov_data;
 	struct fm10k_hw *hw = &interface->hw;
 	size_t size;
-	int i, err;
+	int i;
 
 	/* return error if iov_data is already populated */
 	if (iov_data)
@@ -425,6 +452,7 @@
 	/* loop through vf_info structures initializing each entry */
 	for (i = 0; i < num_vfs; i++) {
 		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+		int err;
 
 		/* Record VF VSI value */
 		vf_info->vsi = i + 1;
@@ -460,20 +488,6 @@
 	fm10k_iov_free_data(pdev);
 }
 
-static void fm10k_disable_aer_comp_abort(struct pci_dev *pdev)
-{
-	u32 err_sev;
-	int pos;
-
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
-	if (!pos)
-		return;
-
-	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &err_sev);
-	err_sev &= ~PCI_ERR_UNC_COMP_ABORT;
-	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, err_sev);
-}
-
 int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	int current_vfs = pci_num_vf(pdev);
@@ -495,12 +509,6 @@
 
 	/* allocate VFs if not already allocated */
 	if (num_vfs && num_vfs != current_vfs) {
-		/* Disable completer abort error reporting as
-		 * the VFs can trigger this any time they read a queue
-		 * that they don't own.
-		 */
-		fm10k_disable_aer_comp_abort(pdev);
-
 		err = pci_enable_sriov(pdev, num_vfs);
 		if (err) {
 			dev_err(&pdev->dev,

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 3f53654..2be9222 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include <linux/types.h>
 #include <linux/module.h>
@@ -11,17 +11,17 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.23.4-k"
+#define DRV_VERSION	"0.26.1-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-	"Copyright(c) 2013 - 2018 Intel Corporation.";
+	"Copyright(c) 2013 - 2019 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 /* single workqueue for entire fm10k driver */
@@ -41,6 +41,8 @@
 	/* create driver workqueue */
 	fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
 					  fm10k_driver_name);
+	if (!fm10k_workqueue)
+		return -ENOMEM;
 
 	fm10k_dbg_init();
 
@@ -278,7 +280,7 @@
 	/* we need the header to contain the greater of either ETH_HLEN or
 	 * 60 bytes if the skb->len is less than 60 for skb_pad.
 	 */
-	pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
+	pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
@@ -313,7 +315,7 @@
 		/* prefetch first cache line of first page */
 		prefetch(page_addr);
 #if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
+		prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
 #endif
 
 		/* allocate a skb to store the frags */
@@ -944,7 +946,7 @@
 	struct sk_buff *skb = first->skb;
 	struct fm10k_tx_buffer *tx_buffer;
 	struct fm10k_tx_desc *tx_desc;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	unsigned char *data;
 	dma_addr_t dma;
 	unsigned int data_len, size;
@@ -1035,13 +1037,8 @@
 	fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	/* notify HW of packet */
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
@@ -1076,8 +1073,11 @@
 	 *       + 2 desc gap to keep tail from touching head
 	 * otherwise try next time
 	 */
-	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
-		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		count += TXD_USE_COUNT(skb_frag_size(frag));
+	}
 
 	if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
 		tx_ring->tx_stats.tx_busy++;
@@ -1465,11 +1465,11 @@
 	if (!clean_complete)
 		return budget;
 
-	/* all work done, exit the polling mode */
-	napi_complete_done(napi, work_done);
-
-	/* re-enable the q_vector */
-	fm10k_qv_enable(q_vector);
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		fm10k_qv_enable(q_vector);
 
 	return min(work_done, budget - 1);
 }
@@ -1603,14 +1603,12 @@
 {
 	struct fm10k_q_vector *q_vector;
 	struct fm10k_ring *ring;
-	int ring_count, size;
+	int ring_count;
 
 	ring_count = txr_count + rxr_count;
-	size = sizeof(struct fm10k_q_vector) +
-	       (sizeof(struct fm10k_ring) * ring_count);
 
 	/* allocate q_vector and rings */
-	q_vector = kzalloc(size, GFP_KERNEL);
+	q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL);
 	if (!q_vector)
 		return -ENOMEM;
 
@@ -1828,7 +1826,7 @@
 	v_budget = min_t(u16, v_budget, num_online_cpus());
 
 	/* account for vectors not related to queues */
-	v_budget += NON_Q_VECTORS(hw);
+	v_budget += NON_Q_VECTORS;
 
 	/* At the same time, hardware can only support a maximum of
 	 * hw.mac->max_msix_vectors vectors.  With features
@@ -1860,7 +1858,7 @@
 	}
 
 	/* record the number of queues available for q_vectors */
-	interface->num_q_vectors = v_budget - NON_Q_VECTORS(hw);
+	interface->num_q_vectors = v_budget - NON_Q_VECTORS;
 
 	return 0;
 }
@@ -1874,7 +1872,7 @@
 static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface)
 {
 	struct net_device *dev = interface->netdev;
-	int pc, offset, rss_i, i, q_idx;
+	int pc, offset, rss_i, i;
 	u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + 1;
 	u8 num_pcs = netdev_get_num_tc(dev);
 
@@ -1884,7 +1882,8 @@
 	rss_i = interface->ring_feature[RING_F_RSS].indices;
 
 	for (pc = 0, offset = 0; pc < num_pcs; pc++, offset += rss_i) {
-		q_idx = pc;
+		int q_idx = pc;
+
 		for (i = 0; i < rss_i; i++) {
 			interface->tx_ring[offset + i]->reg_idx = q_idx;
 			interface->tx_ring[offset + i]->qos_pc = pc;

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 21021fe..75e51f9 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_common.h"
 
@@ -297,13 +297,14 @@
 {
 	struct fm10k_mbx_fifo *fifo = &mbx->rx;
 	u16 total_len = 0, msg_len;
-	u32 *msg;
 
 	/* length should include previous amounts pushed */
 	len += mbx->pushed;
 
 	/* offset in message is based off of current message size */
 	do {
+		u32 *msg;
+
 		msg = fifo->buffer + fm10k_fifo_tail_offset(fifo, total_len);
 		msg_len = FM10K_TLV_DWORD_LEN(*msg);
 		total_len += msg_len;
@@ -1920,7 +1921,6 @@
 	/* reduce length by 1 to convert to a mask */
 	u16 mbmem_len = mbx->mbmem_len - 1;
 	u16 tail_len, len = 0;
-	u32 *msg;
 
 	/* push head behind tail */
 	if (mbx->tail < head)
@@ -1930,6 +1930,8 @@
 
 	/* determine msg aligned offset for end of buffer */
 	do {
+		u32 *msg;
+
 		msg = fifo->buffer + fm10k_fifo_head_offset(fifo, len);
 		tail_len = len;
 		len += FM10K_TLV_DWORD_LEN(*msg);
@@ -2132,7 +2134,8 @@
  *  DWORDs, not bytes.  Any invalid values will cause the mailbox to return
  *  error.
  **/
-s32 fm10k_sm_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
+s32 fm10k_sm_mbx_init(struct fm10k_hw __always_unused *hw,
+		      struct fm10k_mbx_info *mbx,
 		      const struct fm10k_msg_data *msg_data)
 {
 	mbx->mbx_reg = FM10K_GMBX;

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 538a846..09f7a24 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k.h"
 #include <linux/vmalloc.h>
@@ -54,7 +54,7 @@
  **/
 static int fm10k_setup_all_tx_resources(struct fm10k_intfc *interface)
 {
-	int i, err = 0;
+	int i, err;
 
 	for (i = 0; i < interface->num_tx_queues; i++) {
 		err = fm10k_setup_tx_resources(interface->tx_ring[i]);
@@ -121,7 +121,7 @@
  **/
 static int fm10k_setup_all_rx_resources(struct fm10k_intfc *interface)
 {
-	int i, err = 0;
+	int i, err;
 
 	for (i = 0; i < interface->num_rx_queues; i++) {
 		err = fm10k_setup_rx_resources(interface->rx_ring[i]);
@@ -169,7 +169,6 @@
  **/
 static void fm10k_clean_tx_ring(struct fm10k_ring *tx_ring)
 {
-	struct fm10k_tx_buffer *tx_buffer;
 	unsigned long size;
 	u16 i;
 
@@ -179,7 +178,8 @@
 
 	/* Free all the Tx ring sk_buffs */
 	for (i = 0; i < tx_ring->count; i++) {
-		tx_buffer = &tx_ring->tx_buffer[i];
+		struct fm10k_tx_buffer *tx_buffer = &tx_ring->tx_buffer[i];
+
 		fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
 	}
 
@@ -253,8 +253,7 @@
 	if (!rx_ring->rx_buffer)
 		return;
 
-	if (rx_ring->skb)
-		dev_kfree_skb(rx_ring->skb);
+	dev_kfree_skb(rx_ring->skb);
 	rx_ring->skb = NULL;
 
 	/* Free all the Rx ring sk_buffs */
@@ -871,7 +870,7 @@
 	u16 glort = interface->glort;
 	u16 vid = interface->vid;
 	bool set = !!(vid / VLAN_N_VID);
-	int err = -EHOSTDOWN;
+	int err;
 
 	/* drop any leading bits on the VLAN ID */
 	vid &= VLAN_N_VID - 1;
@@ -891,7 +890,7 @@
 	u16 glort = interface->glort;
 	u16 vid = interface->vid;
 	bool set = !!(vid / VLAN_N_VID);
-	int err = -EHOSTDOWN;
+	int err;
 
 	/* drop any leading bits on the VLAN ID */
 	vid &= VLAN_N_VID - 1;
@@ -1444,11 +1443,11 @@
 static void fm10k_assign_l2_accel(struct fm10k_intfc *interface,
 				  struct fm10k_l2_accel *l2_accel)
 {
-	struct fm10k_ring *ring;
 	int i;
 
 	for (i = 0; i < interface->num_rx_queues; i++) {
-		ring = interface->rx_ring[i];
+		struct fm10k_ring *ring = interface->rx_ring[i];
+
 		rcu_assign_pointer(ring->l2_accel, l2_accel);
 	}
 
@@ -1463,7 +1462,7 @@
 	struct fm10k_l2_accel *old_l2_accel = NULL;
 	struct fm10k_dglort_cfg dglort = { 0 };
 	struct fm10k_hw *hw = &interface->hw;
-	int size = 0, i;
+	int size, i;
 	u16 vid, glort;
 
 	/* The hardware supported by fm10k only filters on the destination MAC

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index c859aba..bb236fa 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -23,6 +23,8 @@
  */
 static const struct pci_device_id fm10k_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
 	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
 	/* required last entry */
 	{ 0, }
@@ -342,7 +344,6 @@
 	struct net_device *netdev = interface->netdev;
 	u32 __iomem *hw_addr;
 	u32 value;
-	int err;
 
 	/* do nothing if netdev is still present or hw_addr is set */
 	if (netif_device_present(netdev) || interface->hw.hw_addr)
@@ -360,6 +361,8 @@
 	hw_addr = READ_ONCE(interface->uc_addr);
 	value = readl(hw_addr);
 	if (~value) {
+		int err;
+
 		/* Make sure the reset was initiated because we detached,
 		 * otherwise we might race with a different reset flow.
 		 */
@@ -695,8 +698,6 @@
  */
 static void fm10k_check_hang_subtask(struct fm10k_intfc *interface)
 {
-	int i;
-
 	/* If we're down or resetting, just bail */
 	if (test_bit(__FM10K_DOWN, interface->state) ||
 	    test_bit(__FM10K_RESETTING, interface->state))
@@ -708,6 +709,8 @@
 	interface->next_tx_hang_check = jiffies + (2 * HZ);
 
 	if (netif_carrier_ok(interface->netdev)) {
+		int i;
+
 		/* Force detection of hung controller */
 		for (i = 0; i < interface->num_tx_queues; i++)
 			set_check_for_tx_hang(interface->tx_ring[i]);
@@ -895,7 +898,7 @@
 
 	/* Map interrupt */
 	if (ring->q_vector) {
-		txint = ring->q_vector->v_idx + NON_Q_VECTORS(hw);
+		txint = ring->q_vector->v_idx + NON_Q_VECTORS;
 		txint |= FM10K_INT_MAP_TIMER0;
 	}
 
@@ -1034,7 +1037,7 @@
 
 	/* Map interrupt */
 	if (ring->q_vector) {
-		rxint = ring->q_vector->v_idx + NON_Q_VECTORS(hw);
+		rxint = ring->q_vector->v_idx + NON_Q_VECTORS;
 		rxint |= FM10K_INT_MAP_TIMER1;
 	}
 
@@ -1717,10 +1720,9 @@
 void fm10k_qv_free_irq(struct fm10k_intfc *interface)
 {
 	int vector = interface->num_q_vectors;
-	struct fm10k_hw *hw = &interface->hw;
 	struct msix_entry *entry;
 
-	entry = &interface->msix_entries[NON_Q_VECTORS(hw) + vector];
+	entry = &interface->msix_entries[NON_Q_VECTORS + vector];
 
 	while (vector) {
 		struct fm10k_q_vector *q_vector;
@@ -1757,7 +1759,7 @@
 	unsigned int ri = 0, ti = 0;
 	int vector, err;
 
-	entry = &interface->msix_entries[NON_Q_VECTORS(hw)];
+	entry = &interface->msix_entries[NON_Q_VECTORS];
 
 	for (vector = 0; vector < interface->num_q_vectors; vector++) {
 		struct fm10k_q_vector *q_vector = interface->q_vector[vector];
@@ -2337,7 +2339,7 @@
 	/* Restart the MAC/VLAN request queue in-case of outstanding events */
 	fm10k_macvlan_schedule(interface);
 
-	return err;
+	return 0;
 }
 
 /**
@@ -2350,7 +2352,7 @@
  **/
 static int __maybe_unused fm10k_resume(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
 	struct net_device *netdev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
 	int err;
@@ -2377,7 +2379,7 @@
  **/
 static int __maybe_unused fm10k_suspend(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
 	struct net_device *netdev = interface->netdev;
 
 	netif_device_detach(netdev);
@@ -2440,8 +2442,6 @@
 		result = PCI_ERS_RESULT_RECOVERED;
 	}
 
-	pci_cleanup_aer_uncorrect_error_status(pdev);
-
 	return result;
 }
 

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 8f0a99b..be07bfd 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_pf.h"
 #include "fm10k_vf.h"
@@ -1148,11 +1148,11 @@
  *  @results: Pointer array to message, results[0] is pointer to message
  *  @mbx: Pointer to mailbox information structure
  *
- *  This function is a default handler for MSI-X requests from the VF.  The
+ *  This function is a default handler for MSI-X requests from the VF. The
  *  assumption is that in this case it is acceptable to just directly
  *  hand off the message from the VF to the underlying shared code.
  **/
-s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
+s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 __always_unused **results,
 			  struct fm10k_mbx_info *mbx)
 {
 	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
@@ -1352,7 +1352,6 @@
 				 struct fm10k_mbx_info *mbx)
 {
 	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
-	u32 *result;
 	s32 err = 0;
 	u32 msg[2];
 	u8 mode = 0;
@@ -1362,7 +1361,7 @@
 		return FM10K_ERR_PARAM;
 
 	if (!!results[FM10K_LPORT_STATE_MSG_XCAST_MODE]) {
-		result = results[FM10K_LPORT_STATE_MSG_XCAST_MODE];
+		u32 *result = results[FM10K_LPORT_STATE_MSG_XCAST_MODE];
 
 		/* XCAST mode update requested */
 		err = fm10k_tlv_attr_get_u8(result, &mode);
@@ -1566,7 +1565,7 @@
 	/* read remaining fields */
 	fault->address = fm10k_read_reg(hw, type + FM10K_FAULT_ADDR_HI);
 	fault->address <<= 32;
-	fault->address = fm10k_read_reg(hw, type + FM10K_FAULT_ADDR_LO);
+	fault->address |= fm10k_read_reg(hw, type + FM10K_FAULT_ADDR_LO);
 	fault->specinfo = fm10k_read_reg(hw, type + FM10K_FAULT_SPECINFO);
 
 	/* clear valid bit to allow for next error */
@@ -1642,7 +1641,7 @@
  *  switch API.
  **/
 s32 fm10k_msg_lport_map_pf(struct fm10k_hw *hw, u32 **results,
-			   struct fm10k_mbx_info *mbx)
+			   struct fm10k_mbx_info __always_unused *mbx)
 {
 	u16 glort, mask;
 	u32 dglort_map;
@@ -1685,7 +1684,7 @@
  *  This handler configures the default VLAN for the PF
  **/
 static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
-				    struct fm10k_mbx_info *mbx)
+				    struct fm10k_mbx_info __always_unused *mbx)
 {
 	u16 glort, pvid;
 	u32 pvid_update;
@@ -1746,7 +1745,7 @@
  *  messages that the PF has sent.
  **/
 s32 fm10k_msg_err_pf(struct fm10k_hw *hw, u32 **results,
-		     struct fm10k_mbx_info *mbx)
+		     struct fm10k_mbx_info __always_unused *mbx)
 {
 	struct fm10k_swapi_error err_msg;
 	s32 err;

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index 2a7a40b..21eff08 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_tlv.h"
 
@@ -472,7 +472,7 @@
 				const struct fm10k_tlv_attr *tlv_attr)
 {
 	u32 i, attr_id, offset = 0;
-	s32 err = 0;
+	s32 err;
 	u16 len;
 
 	/* verify pointers are not NULL */
@@ -587,8 +587,9 @@
  *  a minimum it just indicates that the message requested was
  *  unimplemented.
  **/
-s32 fm10k_tlv_msg_error(struct fm10k_hw *hw, u32 **results,
-			struct fm10k_mbx_info *mbx)
+s32 fm10k_tlv_msg_error(struct fm10k_hw __always_unused *hw,
+			u32 __always_unused **results,
+			struct fm10k_mbx_info __always_unused *mbx)
 {
 	return FM10K_NOT_IMPLEMENTED;
 }

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 3e608e4..15ac1c7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h

@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #ifndef _FM10K_TYPE_H_
 #define _FM10K_TYPE_H_
@@ -15,6 +15,8 @@
 
 #define FM10K_DEV_ID_PF			0x15A4
 #define FM10K_DEV_ID_VF			0x15A5
+#define FM10K_DEV_ID_SDI_FM10420_QDA2	0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2	0x15D5
 
 #define FM10K_MAX_QUEUES		256
 #define FM10K_MAX_QUEUES_PF		128

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index a8519c1..dc8ccd3 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c

@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #include "fm10k_vf.h"
 
@@ -198,7 +198,7 @@
  *  This function should determine the MAC address for the VF
  **/
 s32 fm10k_msg_mac_vlan_vf(struct fm10k_hw *hw, u32 **results,
-			  struct fm10k_mbx_info *mbx)
+			  struct fm10k_mbx_info __always_unused *mbx)
 {
 	u8 perm_addr[ETH_ALEN];
 	u16 vid;
@@ -267,8 +267,10 @@
  *  This function is used to add or remove unicast MAC addresses for
  *  the VF.
  **/
-static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw, u16 glort,
-				   const u8 *mac, u16 vid, bool add, u8 flags)
+static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
+				   const u8 *mac, u16 vid, bool add,
+				   u8 __always_unused flags)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[7];
@@ -309,7 +311,8 @@
  *  This function is used to add or remove multicast MAC addresses for
  *  the VF.
  **/
-static s32 fm10k_update_mc_addr_vf(struct fm10k_hw *hw, u16 glort,
+static s32 fm10k_update_mc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
 				   const u8 *mac, u16 vid, bool add)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
@@ -373,7 +376,7 @@
  *  are ready to bring up the interface.
  **/
 s32 fm10k_msg_lport_state_vf(struct fm10k_hw *hw, u32 **results,
-			     struct fm10k_mbx_info *mbx)
+			     struct fm10k_mbx_info __always_unused *mbx)
 {
 	hw->mac.dglort_map = !results[FM10K_LPORT_STATE_MSG_READY] ?
 			     FM10K_DGLORTMAP_NONE : FM10K_DGLORTMAP_ZERO;
@@ -392,8 +395,9 @@
  *  enabled we can add filters, if it is disabled all filters for this
  *  logical port are flushed.
  **/
-static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw, u16 glort,
-				       u16 count, bool enable)
+static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw,
+				       u16 __always_unused glort,
+				       u16 __always_unused count, bool enable)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[2];
@@ -420,7 +424,8 @@
  *  so that it can enable either multicast, multicast promiscuous, or
  *  promiscuous mode of operation.
  **/
-static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw, u16 glort, u8 mode)
+static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw,
+				      u16 __always_unused glort, u8 mode)
 {
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 msg[3];
@@ -475,7 +480,7 @@
  *  that information to then populate a DGLORTMAP/DEC entry and the queues
  *  to which it has been assigned.
  **/
-static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw *hw,
+static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw __always_unused *hw,
 					 struct fm10k_dglort_cfg *dglort)
 {
 	/* verify the dglort pointer */

diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 14397e7..2f21b3e 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile

@@ -21,7 +21,9 @@
 	i40e_diag.o	\
 	i40e_txrx.o	\
 	i40e_ptp.o	\
+	i40e_ddp.o \
 	i40e_client.o   \
-	i40e_virtchnl_pf.o
+	i40e_virtchnl_pf.o \
+	i40e_xsk.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 7a80652..2af9f63 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h

@@ -27,6 +27,7 @@
 #include <net/ip6_checksum.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
 #include <linux/if_bridge.h>
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
@@ -34,6 +35,7 @@
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/xdp_sock.h>
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #include "i40e_client.h"
@@ -122,13 +124,13 @@
 	__I40E_MDD_EVENT_PENDING,
 	__I40E_VFLR_EVENT_PENDING,
 	__I40E_RESET_RECOVERY_PENDING,
+	__I40E_TIMEOUT_RECOVERY_PENDING,
 	__I40E_MISC_IRQ_REQUESTED,
 	__I40E_RESET_INTR_RECEIVED,
 	__I40E_REINIT_REQUESTED,
 	__I40E_PF_RESET_REQUESTED,
 	__I40E_CORE_RESET_REQUESTED,
 	__I40E_GLOBAL_RESET_REQUESTED,
-	__I40E_EMP_RESET_REQUESTED,
 	__I40E_EMP_RESET_INTR_RECEIVED,
 	__I40E_SUSPENDED,
 	__I40E_PTP_TX_IN_PROGRESS,
@@ -146,6 +148,8 @@
 	__I40E_CLIENT_SERVICE_REQUESTED,
 	__I40E_CLIENT_L2_CHANGE,
 	__I40E_CLIENT_RESET,
+	__I40E_VIRTCHNL_OP_PENDING,
+	__I40E_RECOVERY_MODE,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -239,11 +243,11 @@
 	u32 fd_id;
 };
 
-#define I40E_CLOUD_FIELD_OMAC	0x01
-#define I40E_CLOUD_FIELD_IMAC	0x02
-#define I40E_CLOUD_FIELD_IVLAN	0x04
-#define I40E_CLOUD_FIELD_TEN_ID	0x08
-#define I40E_CLOUD_FIELD_IIP	0x10
+#define I40E_CLOUD_FIELD_OMAC		BIT(0)
+#define I40E_CLOUD_FIELD_IMAC		BIT(1)
+#define I40E_CLOUD_FIELD_IVLAN		BIT(2)
+#define I40E_CLOUD_FIELD_TEN_ID		BIT(3)
+#define I40E_CLOUD_FIELD_IIP		BIT(4)
 
 #define I40E_CLOUD_FILTER_FLAGS_OMAC	I40E_CLOUD_FIELD_OMAC
 #define I40E_CLOUD_FILTER_FLAGS_IMAC	I40E_CLOUD_FIELD_IMAC
@@ -291,8 +295,6 @@
 	u8 tunnel_type;
 };
 
-#define I40E_ETH_P_LLDP			0x88cc
-
 #define I40E_DCB_PRIO_TYPE_STRICT	0
 #define I40E_DCB_PRIO_TYPE_ETS		1
 #define I40E_DCB_STRICT_PRIO_CREDITS	127
@@ -318,6 +320,29 @@
 	u8 filter_index;
 };
 
+#define I40_DDP_FLASH_REGION 100
+#define I40E_PROFILE_INFO_SIZE 48
+#define I40E_MAX_PROFILE_NUM 16
+#define I40E_PROFILE_LIST_SIZE \
+	(I40E_PROFILE_INFO_SIZE * I40E_MAX_PROFILE_NUM + 4)
+#define I40E_DDP_PROFILE_PATH "intel/i40e/ddp/"
+#define I40E_DDP_PROFILE_NAME_MAX 64
+
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+		  bool is_add);
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
+
+struct i40e_ddp_profile_list {
+	u32 p_count;
+	struct i40e_profile_info p_info[0];
+};
+
+struct i40e_ddp_old_profile_list {
+	struct list_head list;
+	size_t old_ddp_size;
+	u8 old_ddp_buf[0];
+};
+
 /* macros related to FLX_PIT */
 #define I40E_FLEX_SET_FSIZE(fsize) (((fsize) << \
 				    I40E_PRTQF_FLX_PIT_FSIZE_SHIFT) & \
@@ -387,6 +412,11 @@
 	u8 pit_index;
 };
 
+struct i40e_fwd_adapter {
+	struct net_device *netdev;
+	int bit_no;
+};
+
 struct i40e_channel {
 	struct list_head list;
 	bool initialized;
@@ -401,11 +431,25 @@
 	struct i40e_aqc_vsi_properties_data info;
 
 	u64 max_tx_rate;
+	struct i40e_fwd_adapter *fwd;
 
 	/* track this channel belongs to which VSI */
 	struct i40e_vsi *parent_vsi;
 };
 
+static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
+{
+	return !!ch->fwd;
+}
+
+static inline u8 *i40e_channel_mac(struct i40e_channel *ch)
+{
+	if (i40e_is_channel_macvlan(ch))
+		return ch->fwd->netdev->dev_addr;
+	else
+		return NULL;
+}
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
 	struct pci_dev *pdev;
@@ -494,7 +538,6 @@
 #define I40E_HW_STOP_FW_LLDP			BIT(16)
 #define I40E_HW_PORT_ID_VALID			BIT(17)
 #define I40E_HW_RESTART_AUTONEG			BIT(18)
-#define I40E_HW_STOPPABLE_FW_LLDP		BIT(19)
 
 	u32 flags;
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
@@ -522,6 +565,8 @@
 #define I40E_FLAG_FD_SB_INACTIVE		BIT(22)
 #define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(23)
 #define I40E_FLAG_DISABLE_FW_LLDP		BIT(24)
+#define I40E_FLAG_RS_FEC			BIT(25)
+#define I40E_FLAG_BASE_R_FEC			BIT(26)
 
 	struct i40e_client_instance *cinst;
 	bool stat_offsets_loaded;
@@ -585,6 +630,8 @@
 	struct sk_buff *ptp_tx_skb;
 	unsigned long ptp_tx_start;
 	struct hwtstamp_config tstamp_config;
+	struct timespec64 ptp_prev_hw_time;
+	ktime_t ptp_reset_start;
 	struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
 	u32 ptp_adj_mult;
 	u32 tx_hwtstamp_timeouts;
@@ -606,6 +653,8 @@
 	u16 override_q_count;
 	u16 last_sw_conf_flags;
 	u16 last_sw_conf_valid_flags;
+	/* List to keep previous DDP profiles to be rolled back in the future */
+	struct list_head ddp_old_prof;
 };
 
 /**
@@ -745,7 +794,8 @@
 	u16 alloc_queue_pairs;	/* Allocated Tx/Rx queues */
 	u16 req_queue_pairs;	/* User requested queue pairs */
 	u16 num_queue_pairs;	/* Used tx and rx pairs */
-	u16 num_desc;
+	u16 num_tx_desc;
+	u16 num_rx_desc;
 	enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
 	s16 vf_id;		/* Virtual function ID for SRIOV VSIs */
 
@@ -782,10 +832,19 @@
 	struct list_head ch_list;
 	u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
 
+	/* macvlan fields */
+#define I40E_MAX_MACVLANS		128 /* Max HW vectors - 1 on FVL */
+#define I40E_MIN_MACVLAN_VECTORS	2   /* Min vectors to enable macvlans */
+	DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS);
+	struct list_head macvlan_list;
+	int macvlan_cnt;
+
 	void *priv;	/* client driver data reference. */
 
 	/* VSI specific handlers */
 	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
 } ____cacheline_internodealigned_in_smp;
 
 struct i40e_netdev_priv {
@@ -961,6 +1020,7 @@
 	return NULL;
 }
 void i40e_update_stats(struct i40e_vsi *vsi);
+void i40e_update_veb_stats(struct i40e_veb *veb);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
 struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
 int i40e_fetch_switch_configuration(struct i40e_pf *pf,
@@ -1077,6 +1137,8 @@
 void i40e_ptp_set_increment(struct i40e_pf *pf);
 int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
 int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
+void i40e_ptp_save_hw_time(struct i40e_pf *pf);
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
 void i40e_ptp_init(struct i40e_pf *pf);
 void i40e_ptp_stop(struct i40e_pf *pf);
 int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
@@ -1085,6 +1147,8 @@
 i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf);
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
 
+void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
+
 static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 {
 	return !!vsi->xdp_prog;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 501ee71..72c0488 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c

@@ -588,6 +588,12 @@
 	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
 	    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
 		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+	}
+	if (hw->mac.type == I40E_MAC_X722 &&
+	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) {
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
 	}
 
 	/* Newer versions of firmware require lock when reading the NVM */
@@ -602,6 +608,13 @@
 	     hw->aq.api_min_ver >= 7))
 		hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
 
+	if (hw->aq.api_maj_ver > 1 ||
+	    (hw->aq.api_maj_ver == 1 &&
+	     hw->aq.api_min_ver >= 8)) {
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
+		hw->flags |= I40E_HW_FLAG_DROP_MODE;
+	}
+
 	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
 		ret_code = I40E_ERR_FIRMWARE_API_VERSION;
 		goto init_adminq_free_arq;
@@ -664,7 +677,7 @@
 	desc = I40E_ADMINQ_DESC(*asq, ntc);
 	details = I40E_ADMINQ_DETAILS(*asq, ntc);
 	while (rd32(hw, hw->aq.asq.head) != ntc) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
 			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
 
 		if (details->callback) {
@@ -743,7 +756,7 @@
 	if (val >= hw->aq.num_asq_entries) {
 		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
 			   "AQTX: head overrun at %d\n", val);
-		status = I40E_ERR_QUEUE_EMPTY;
+		status = I40E_ERR_ADMIN_QUEUE_FULL;
 		goto asq_send_command_error;
 	}
 
@@ -824,7 +837,7 @@
 	}
 
 	/* bump the tail */
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQTX: desc and buffer:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
 		      buff, buff_size);
 	(hw->aq.asq.next_to_use)++;
@@ -875,7 +888,7 @@
 		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
 	}
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
 		   "AQTX: desc and buffer writeback:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
 
@@ -984,7 +997,7 @@
 		memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
 		       e->msg_len);
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQRX: desc and buffer:\n");
 	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
 		      hw->aq.arq_buf_size);
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 80e3eec..69a2daa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h

@@ -11,8 +11,8 @@
  */
 
 #define I40E_FW_API_VERSION_MAJOR	0x0001
-#define I40E_FW_API_VERSION_MINOR_X722	0x0005
-#define I40E_FW_API_VERSION_MINOR_X710	0x0007
+#define I40E_FW_API_VERSION_MINOR_X722	0x0009
+#define I40E_FW_API_VERSION_MINOR_X710	0x0009
 
 #define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
 					I40E_FW_API_VERSION_MINOR_X710 : \
@@ -20,6 +20,10 @@
 
 /* API version 1.7 implements additional link and PHY-specific APIs  */
 #define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+/* API version 1.9 for X722 implements additional link and PHY-specific APIs */
+#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009
+/* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
+#define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
 
 struct i40e_aq_desc {
 	__le16 flags;
@@ -259,6 +263,7 @@
 	i40e_aqc_opc_get_cee_dcb_cfg	= 0x0A07,
 	i40e_aqc_opc_lldp_set_local_mib	= 0x0A08,
 	i40e_aqc_opc_lldp_stop_start_spec_agent	= 0x0A09,
+	i40e_aqc_opc_lldp_restore		= 0x0A0A,
 
 	/* Tunnel commands */
 	i40e_aqc_opc_add_udp_tunnel	= 0x0B00,
@@ -1379,7 +1384,7 @@
 #define I40E_AQC_ADD_CLOUD_FILTER_MASK	(0x3F << \
 					I40E_AQC_ADD_CLOUD_FILTER_SHIFT)
 /* 0x0000 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_OIP			0x0001
+/* 0x0001 reserved */
 /* 0x0002 reserved */
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN		0x0003
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID	0x0004
@@ -1391,6 +1396,9 @@
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
 #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
 #define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
+/* 0x000D reserved */
+/* 0x000E reserved */
+/* 0x000F reserved */
 /* 0x0010 to 0x0017 is for custom filters */
 #define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
 #define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
@@ -1885,6 +1893,8 @@
 	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
 	I40E_PHY_TYPE_25GBASE_AOC		= 0x23,
 	I40E_PHY_TYPE_25GBASE_ACC		= 0x24,
+	I40E_PHY_TYPE_2_5GBASE_T		= 0x30,
+	I40E_PHY_TYPE_5GBASE_T			= 0x31,
 	I40E_PHY_TYPE_MAX,
 	I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP	= 0xFD,
 	I40E_PHY_TYPE_EMPTY			= 0xFE,
@@ -1926,19 +1936,25 @@
 				BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
 				BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
 				BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
-				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC) | \
+				BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_5GBASE_T))
 
+#define I40E_LINK_SPEED_2_5GB_SHIFT	0x0
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT	0x2
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
 #define I40E_LINK_SPEED_40GB_SHIFT	0x4
 #define I40E_LINK_SPEED_20GB_SHIFT	0x5
 #define I40E_LINK_SPEED_25GB_SHIFT	0x6
+#define I40E_LINK_SPEED_5GB_SHIFT	0x7
 
 enum i40e_aq_link_speed {
 	I40E_LINK_SPEED_UNKNOWN	= 0,
 	I40E_LINK_SPEED_100MB	= BIT(I40E_LINK_SPEED_100MB_SHIFT),
 	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
+	I40E_LINK_SPEED_2_5GB	= (1 << I40E_LINK_SPEED_2_5GB_SHIFT),
+	I40E_LINK_SPEED_5GB	= (1 << I40E_LINK_SPEED_5GB_SHIFT),
 	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
 	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
 	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
@@ -1984,6 +2000,8 @@
 #define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
 #define I40E_AQ_PHY_TYPE_EXT_25G_AOC	0x10
 #define I40E_AQ_PHY_TYPE_EXT_25G_ACC	0x20
+#define I40E_AQ_PHY_TYPE_EXT_2_5GBASE_T	0x40
+#define I40E_AQ_PHY_TYPE_EXT_5GBASE_T	0x80
 	u8	fec_cfg_curr_mod_ext_info;
 #define I40E_AQ_ENABLE_FEC_KR		0x01
 #define I40E_AQ_ENABLE_FEC_RS		0x02
@@ -2038,20 +2056,21 @@
 struct i40e_aq_set_mac_config {
 	__le16	max_frame_size;
 	u8	params;
-#define I40E_AQ_SET_MAC_CONFIG_CRC_EN		0x04
-#define I40E_AQ_SET_MAC_CONFIG_PACING_MASK	0x78
-#define I40E_AQ_SET_MAC_CONFIG_PACING_SHIFT	3
-#define I40E_AQ_SET_MAC_CONFIG_PACING_NONE	0x0
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1B_13TX	0xF
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_9TX	0x9
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_4TX	0x8
-#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_7TX	0x7
-#define I40E_AQ_SET_MAC_CONFIG_PACING_2DW_3TX	0x6
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_1TX	0x5
-#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_2TX	0x4
-#define I40E_AQ_SET_MAC_CONFIG_PACING_7DW_3TX	0x3
-#define I40E_AQ_SET_MAC_CONFIG_PACING_4DW_1TX	0x2
-#define I40E_AQ_SET_MAC_CONFIG_PACING_9DW_1TX	0x1
+#define I40E_AQ_SET_MAC_CONFIG_CRC_EN			0x04
+#define I40E_AQ_SET_MAC_CONFIG_PACING_MASK		0x78
+#define I40E_AQ_SET_MAC_CONFIG_PACING_SHIFT		3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_NONE		0x0
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1B_13TX		0xF
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_9TX		0x9
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_4TX		0x8
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_7TX		0x7
+#define I40E_AQ_SET_MAC_CONFIG_PACING_2DW_3TX		0x6
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_1TX		0x5
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_2TX		0x4
+#define I40E_AQ_SET_MAC_CONFIG_PACING_7DW_3TX		0x3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_4DW_1TX		0x2
+#define I40E_AQ_SET_MAC_CONFIG_PACING_9DW_1TX		0x1
+#define I40E_AQ_SET_MAC_CONFIG_DROP_BLOCKING_PACKET_EN	0x80
 	u8	tx_timer_priority; /* bitmap */
 	__le16	tx_timer_value;
 	__le16	fc_refresh_threshold;
@@ -2496,18 +2515,19 @@
 /* Stop LLDP (direct 0x0A05) */
 struct i40e_aqc_lldp_stop {
 	u8	command;
-#define I40E_AQ_LLDP_AGENT_STOP		0x0
-#define I40E_AQ_LLDP_AGENT_SHUTDOWN	0x1
+#define I40E_AQ_LLDP_AGENT_STOP			0x0
+#define I40E_AQ_LLDP_AGENT_SHUTDOWN		0x1
+#define I40E_AQ_LLDP_AGENT_STOP_PERSIST		0x2
 	u8	reserved[15];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop);
 
 /* Start LLDP (direct 0x0A06) */
-
 struct i40e_aqc_lldp_start {
 	u8	command;
-#define I40E_AQ_LLDP_AGENT_START	0x1
+#define I40E_AQ_LLDP_AGENT_START		0x1
+#define I40E_AQ_LLDP_AGENT_START_PERSIST	0x2
 	u8	reserved[15];
 };
 
@@ -2631,6 +2651,16 @@
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop_start_specific_agent);
 
+/* Restore LLDP Agent factory settings (direct 0x0A0A) */
+struct i40e_aqc_lldp_restore {
+	u8	command;
+#define I40E_AQ_LLDP_AGENT_RESTORE_NOT		0x0
+#define I40E_AQ_LLDP_AGENT_RESTORE		0x1
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_restore);
+
 /* Add Udp Tunnel command and completion (direct 0x0B00) */
 struct i40e_aqc_add_udp_tunnel {
 	__le16	udp_port;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 5f3b8b9..e81530c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c

@@ -578,11 +578,9 @@
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_qv_info *qv_info;
 	u32 v_idx, i, reg_idx, reg;
-	u32 size;
 
-	size = sizeof(struct i40e_qvlist_info) +
-	       (sizeof(struct i40e_qv_info) * (qvlist_info->num_vectors - 1));
-	ldev->qvlist_info = kzalloc(size, GFP_KERNEL);
+	ldev->qvlist_info = kzalloc(struct_size(ldev->qvlist_info, qv_info,
+				    qvlist_info->num_vectors - 1), GFP_KERNEL);
 	if (!ldev->qvlist_info)
 		return -ENOMEM;
 	ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 85f75b5..7560f06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c

@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include "i40e.h"
 #include "i40e_type.h"
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"
@@ -13,7 +14,7 @@
  * This function sets the mac type of the adapter based on the
  * vendor ID and device ID stored in the hw structure.
  **/
-static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
+i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 {
 	i40e_status status = 0;
 
@@ -28,10 +29,14 @@
 		case I40E_DEV_ID_QSFP_C:
 		case I40E_DEV_ID_10G_BASE_T:
 		case I40E_DEV_ID_10G_BASE_T4:
+		case I40E_DEV_ID_10G_B:
+		case I40E_DEV_ID_10G_SFP:
 		case I40E_DEV_ID_20G_KR2:
 		case I40E_DEV_ID_20G_KR2_A:
 		case I40E_DEV_ID_25G_B:
 		case I40E_DEV_ID_25G_SFP28:
+		case I40E_DEV_ID_X710_N3000:
+		case I40E_DEV_ID_XXV710_N3000:
 			hw->mac.type = I40E_MAC_XL710;
 			break;
 		case I40E_DEV_ID_KX_X722:
@@ -277,47 +282,49 @@
 		   void *buffer, u16 buf_len)
 {
 	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+	u32 effective_mask = hw->debug_mask & mask;
+	char prefix[27];
 	u16 len;
 	u8 *buf = (u8 *)buffer;
 
-	if ((!(mask & hw->debug_mask)) || (desc == NULL))
+	if (!effective_mask || !desc)
 		return;
 
 	len = le16_to_cpu(aq_desc->datalen);
 
-	i40e_debug(hw, mask,
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
 		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
 		   le16_to_cpu(aq_desc->opcode),
 		   le16_to_cpu(aq_desc->flags),
 		   le16_to_cpu(aq_desc->datalen),
 		   le16_to_cpu(aq_desc->retval));
-	i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tcookie (h,l) 0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->cookie_high),
 		   le32_to_cpu(aq_desc->cookie_low));
-	i40e_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tparam (0,1)  0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->params.internal.param0),
 		   le32_to_cpu(aq_desc->params.internal.param1));
-	i40e_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\taddr (h,l)   0x%08X 0x%08X\n",
 		   le32_to_cpu(aq_desc->params.external.addr_high),
 		   le32_to_cpu(aq_desc->params.external.addr_low));
 
-	if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+	if (buffer && buf_len != 0 && len != 0 &&
+	    (effective_mask & I40E_DEBUG_AQ_DESC_BUFFER)) {
 		i40e_debug(hw, mask, "AQ CMD Buffer:\n");
 		if (buf_len < len)
 			len = buf_len;
-		/* write the full 16-byte chunks */
-		if (hw->debug_mask & mask) {
-			char prefix[27];
 
-			snprintf(prefix, sizeof(prefix),
-				 "i40e %02x:%02x.%x: \t0x",
-				 hw->bus.bus_id,
-				 hw->bus.device,
-				 hw->bus.func);
+		snprintf(prefix, sizeof(prefix),
+			 "i40e %02x:%02x.%x: \t0x",
+			 hw->bus.bus_id,
+			 hw->bus.device,
+			 hw->bus.func);
 
-			print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
-				       16, 1, buf, len, false);
-		}
+		print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
+			       16, 1, buf, len, false);
 	}
 }
 
@@ -1149,6 +1156,8 @@
 		break;
 	case I40E_PHY_TYPE_100BASE_TX:
 	case I40E_PHY_TYPE_1000BASE_T:
+	case I40E_PHY_TYPE_2_5GBASE_T:
+	case I40E_PHY_TYPE_5GBASE_T:
 	case I40E_PHY_TYPE_10GBASE_T:
 		media = I40E_MEDIA_TYPE_BASET;
 		break;
@@ -1466,7 +1475,6 @@
  **/
 u32 i40e_led_get(struct i40e_hw *hw)
 {
-	u32 current_mode = 0;
 	u32 mode = 0;
 	int i;
 
@@ -1479,21 +1487,6 @@
 		if (!gpio_val)
 			continue;
 
-		/* ignore gpio LED src mode entries related to the activity
-		 * LEDs
-		 */
-		current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
-				>> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
-		switch (current_mode) {
-		case I40E_COMBINED_ACTIVITY:
-		case I40E_FILTER_ACTIVITY:
-		case I40E_MAC_ACTIVITY:
-		case I40E_LINK_ACTIVITY:
-			continue;
-		default:
-			break;
-		}
-
 		mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK) >>
 			I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT;
 		break;
@@ -1513,7 +1506,6 @@
  **/
 void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
 {
-	u32 current_mode = 0;
 	int i;
 
 	if (mode & 0xfffffff0)
@@ -1527,22 +1519,6 @@
 
 		if (!gpio_val)
 			continue;
-
-		/* ignore gpio LED src mode entries related to the activity
-		 * LEDs
-		 */
-		current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
-				>> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
-		switch (current_mode) {
-		case I40E_COMBINED_ACTIVITY:
-		case I40E_FILTER_ACTIVITY:
-		case I40E_MAC_ACTIVITY:
-		case I40E_LINK_ACTIVITY:
-			continue;
-		default:
-			break;
-		}
-
 		gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
 		/* this & is a bit of paranoia, but serves as a range check */
 		gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
@@ -1602,19 +1578,22 @@
 		status = i40e_asq_send_command(hw, &desc, abilities,
 					       abilities_size, cmd_details);
 
-		if (status)
-			break;
-
-		if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) {
+		switch (hw->aq.asq_last_status) {
+		case I40E_AQ_RC_EIO:
 			status = I40E_ERR_UNKNOWN_PHY;
 			break;
-		} else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) {
+		case I40E_AQ_RC_EAGAIN:
 			usleep_range(1000, 2000);
 			total_delay++;
 			status = I40E_ERR_TIMEOUT;
+			break;
+		/* also covers I40E_AQ_RC_OK */
+		default:
+			break;
 		}
-	} while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) &&
-		 (total_delay < max_delay));
+
+	} while ((hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) &&
+		(total_delay < max_delay));
 
 	if (status)
 		return status;
@@ -1668,25 +1647,15 @@
 	return status;
 }
 
-/**
- * i40e_set_fc
- * @hw: pointer to the hw struct
- * @aq_failures: buffer to return AdminQ failure information
- * @atomic_restart: whether to enable atomic link restart
- *
- * Set the requested flow control mode using set_phy_config.
- **/
-enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
-				  bool atomic_restart)
+static noinline_for_stack enum i40e_status_code
+i40e_set_fc_status(struct i40e_hw *hw,
+		   struct i40e_aq_get_phy_abilities_resp *abilities,
+		   bool atomic_restart)
 {
-	enum i40e_fc_mode fc_mode = hw->fc.requested_mode;
-	struct i40e_aq_get_phy_abilities_resp abilities;
 	struct i40e_aq_set_phy_config config;
-	enum i40e_status_code status;
+	enum i40e_fc_mode fc_mode = hw->fc.requested_mode;
 	u8 pause_mask = 0x0;
 
-	*aq_failures = 0x0;
-
 	switch (fc_mode) {
 	case I40E_FC_FULL:
 		pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_TX;
@@ -1702,6 +1671,48 @@
 		break;
 	}
 
+	memset(&config, 0, sizeof(struct i40e_aq_set_phy_config));
+	/* clear the old pause settings */
+	config.abilities = abilities->abilities & ~(I40E_AQ_PHY_FLAG_PAUSE_TX) &
+			   ~(I40E_AQ_PHY_FLAG_PAUSE_RX);
+	/* set the new abilities */
+	config.abilities |= pause_mask;
+	/* If the abilities have changed, then set the new config */
+	if (config.abilities == abilities->abilities)
+		return 0;
+
+	/* Auto restart link so settings take effect */
+	if (atomic_restart)
+		config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+	/* Copy over all the old settings */
+	config.phy_type = abilities->phy_type;
+	config.phy_type_ext = abilities->phy_type_ext;
+	config.link_speed = abilities->link_speed;
+	config.eee_capability = abilities->eee_capability;
+	config.eeer = abilities->eeer_val;
+	config.low_power_ctrl = abilities->d3_lpan;
+	config.fec_config = abilities->fec_cfg_curr_mod_ext_info &
+			    I40E_AQ_PHY_FEC_CONFIG_MASK;
+
+	return i40e_aq_set_phy_config(hw, &config, NULL);
+}
+
+/**
+ * i40e_set_fc
+ * @hw: pointer to the hw struct
+ * @aq_failures: buffer to return AdminQ failure information
+ * @atomic_restart: whether to enable atomic link restart
+ *
+ * Set the requested flow control mode using set_phy_config.
+ **/
+enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+				  bool atomic_restart)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	enum i40e_status_code status;
+
+	*aq_failures = 0x0;
+
 	/* Get the current phy config */
 	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
 					      NULL);
@@ -1710,31 +1721,10 @@
 		return status;
 	}
 
-	memset(&config, 0, sizeof(struct i40e_aq_set_phy_config));
-	/* clear the old pause settings */
-	config.abilities = abilities.abilities & ~(I40E_AQ_PHY_FLAG_PAUSE_TX) &
-			   ~(I40E_AQ_PHY_FLAG_PAUSE_RX);
-	/* set the new abilities */
-	config.abilities |= pause_mask;
-	/* If the abilities have changed, then set the new config */
-	if (config.abilities != abilities.abilities) {
-		/* Auto restart link so settings take effect */
-		if (atomic_restart)
-			config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
-		/* Copy over all the old settings */
-		config.phy_type = abilities.phy_type;
-		config.phy_type_ext = abilities.phy_type_ext;
-		config.link_speed = abilities.link_speed;
-		config.eee_capability = abilities.eee_capability;
-		config.eeer = abilities.eeer_val;
-		config.low_power_ctrl = abilities.d3_lpan;
-		config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
-				    I40E_AQ_PHY_FEC_CONFIG_MASK;
-		status = i40e_aq_set_phy_config(hw, &config, NULL);
+	status = i40e_set_fc_status(hw, &abilities, atomic_restart);
+	if (status)
+		*aq_failures |= I40E_SET_FC_AQ_FAIL_SET;
 
-		if (status)
-			*aq_failures |= I40E_SET_FC_AQ_FAIL_SET;
-	}
 	/* Update the link info */
 	status = i40e_update_link_info(hw);
 	if (status) {
@@ -1886,8 +1876,8 @@
 	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
 		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
-	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver >= 7) {
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE &&
+	    hw->mac.type != I40E_MAC_X722) {
 		__le32 tmp;
 
 		memcpy(&tmp, resp->link_type, sizeof(tmp));
@@ -2563,7 +2553,7 @@
  * i40e_updatelink_status - update status of the HW network link
  * @hw: pointer to the hw struct
  **/
-i40e_status i40e_update_link_info(struct i40e_hw *hw)
+noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
 {
 	struct i40e_aq_get_phy_abilities_resp abilities;
 	i40e_status status = 0;
@@ -3657,14 +3647,54 @@
 }
 
 /**
+ * i40e_aq_restore_lldp
+ * @hw: pointer to the hw struct
+ * @setting: pointer to factory setting variable or NULL
+ * @restore: True if factory settings should be restored
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Restore LLDP Agent factory settings if @restore set to True. In other case
+ * only returns factory setting in AQ response.
+ **/
+enum i40e_status_code
+i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+		     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_restore *cmd =
+		(struct i40e_aqc_lldp_restore *)&desc.params.raw;
+	i40e_status status;
+
+	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) {
+		i40e_debug(hw, I40E_DEBUG_ALL,
+			   "Restore LLDP not supported by current FW version.\n");
+		return I40E_ERR_DEVICE_NOT_SUPPORTED;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_restore);
+
+	if (restore)
+		cmd->command |= I40E_AQ_LLDP_AGENT_RESTORE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (setting)
+		*setting = cmd->command & 1;
+
+	return status;
+}
+
+/**
  * i40e_aq_stop_lldp
  * @hw: pointer to the hw struct
  * @shutdown_agent: True if LLDP Agent needs to be Shutdown
+ * @persist: True if stop of LLDP should be persistent across power cycles
  * @cmd_details: pointer to command details structure or NULL
  *
  * Stop or Shutdown the embedded LLDP Agent
  **/
 i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+				bool persist,
 				struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
@@ -3677,6 +3707,14 @@
 	if (shutdown_agent)
 		cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
 
+	if (persist) {
+		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+			cmd->command |= I40E_AQ_LLDP_AGENT_STOP_PERSIST;
+		else
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Persistent Stop LLDP not supported by current FW version.\n");
+	}
+
 	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
 	return status;
@@ -3686,13 +3724,14 @@
  * i40e_aq_start_lldp
  * @hw: pointer to the hw struct
  * @buff: buffer for result
+ * @persist: True if start of LLDP should be persistent across power cycles
  * @buff_size: buffer size
  * @cmd_details: pointer to command details structure or NULL
  *
  * Start the embedded LLDP Agent on all ports.
  **/
-i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
-				struct i40e_asq_cmd_details *cmd_details)
+i40e_status i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+			       struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
 	struct i40e_aqc_lldp_start *cmd =
@@ -3702,6 +3741,15 @@
 	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
 
 	cmd->command = I40E_AQ_LLDP_AGENT_START;
+
+	if (persist) {
+		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+			cmd->command |= I40E_AQ_LLDP_AGENT_START_PERSIST;
+		else
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Persistent Start LLDP not supported by current FW version.\n");
+	}
+
 	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
 	return status;
@@ -3723,6 +3771,9 @@
 		(struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
 	i40e_status status;
 
+	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+		return I40E_ERR_DEVICE_NOT_SUPPORTED;
+
 	i40e_fill_default_direct_cmd_desc(&desc,
 					  i40e_aqc_opc_set_dcb_parameters);
 
@@ -4870,6 +4921,7 @@
 		break;
 	case I40E_DEV_ID_10G_BASE_T:
 	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_BC:
 	case I40E_DEV_ID_10G_BASE_T_X722:
 	case I40E_DEV_ID_25G_B:
 	case I40E_DEV_ID_25G_SFP28:
@@ -5445,6 +5497,163 @@
 	return NULL;
 }
 
+/* Get section table in profile */
+#define I40E_SECTION_TABLE(profile, sec_tbl)				\
+	do {								\
+		struct i40e_profile_segment *p = (profile);		\
+		u32 count;						\
+		u32 *nvm;						\
+		count = p->device_table_count;				\
+		nvm = (u32 *)&p->device_table[count];			\
+		sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1]; \
+	} while (0)
+
+/* Get section header in profile */
+#define I40E_SECTION_HEADER(profile, offset)				\
+	(struct i40e_profile_section_header *)((u8 *)(profile) + (offset))
+
+/**
+ * i40e_find_section_in_profile
+ * @section_type: the section type to search for (i.e., SECTION_TYPE_NOTE)
+ * @profile: pointer to the i40e segment header to be searched
+ *
+ * This function searches i40e segment for a particular section type. On
+ * success it returns a pointer to the section header, otherwise it will
+ * return NULL.
+ **/
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+			     struct i40e_profile_segment *profile)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_section_table *sec_tbl;
+	u32 sec_off;
+	u32 i;
+
+	if (profile->header.type != SEGMENT_TYPE_I40E)
+		return NULL;
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	for (i = 0; i < sec_tbl->section_count; i++) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		if (sec->section.type == section_type)
+			return sec;
+	}
+
+	return NULL;
+}
+
+/**
+ * i40e_ddp_exec_aq_section - Execute generic AQ for DDP
+ * @hw: pointer to the hw struct
+ * @aq: command buffer containing all data to execute AQ
+ **/
+static enum
+i40e_status_code i40e_ddp_exec_aq_section(struct i40e_hw *hw,
+					  struct i40e_profile_aq_section *aq)
+{
+	i40e_status status;
+	struct i40e_aq_desc desc;
+	u8 *msg = NULL;
+	u16 msglen;
+
+	i40e_fill_default_direct_cmd_desc(&desc, aq->opcode);
+	desc.flags |= cpu_to_le16(aq->flags);
+	memcpy(desc.params.raw, aq->param, sizeof(desc.params.raw));
+
+	msglen = aq->datalen;
+	if (msglen) {
+		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+						I40E_AQ_FLAG_RD));
+		if (msglen > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc.datalen = cpu_to_le16(msglen);
+		msg = &aq->data[0];
+	}
+
+	status = i40e_asq_send_command(hw, &desc, msg, msglen, NULL);
+
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE,
+			   "unable to exec DDP AQ opcode %u, error %d\n",
+			   aq->opcode, status);
+		return status;
+	}
+
+	/* copy returned desc to aq_buf */
+	memcpy(aq->param, desc.params.raw, sizeof(desc.params.raw));
+
+	return 0;
+}
+
+/**
+ * i40e_validate_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be validated
+ * @track_id: package tracking id
+ * @rollback: flag if the profile is for rollback.
+ *
+ * Validates supported devices and profile's sections.
+ */
+static enum i40e_status_code
+i40e_validate_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+		      u32 track_id, bool rollback)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	i40e_status status = 0;
+	struct i40e_section_table *sec_tbl;
+	u32 vendor_dev_id;
+	u32 dev_cnt;
+	u32 sec_off;
+	u32 i;
+
+	if (track_id == I40E_DDP_TRACKID_INVALID) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE, "Invalid track_id\n");
+		return I40E_NOT_SUPPORTED;
+	}
+
+	dev_cnt = profile->device_table_count;
+	for (i = 0; i < dev_cnt; i++) {
+		vendor_dev_id = profile->device_table[i].vendor_dev_id;
+		if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL &&
+		    hw->device_id == (vendor_dev_id & 0xFFFF))
+			break;
+	}
+	if (dev_cnt && i == dev_cnt) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE,
+			   "Device doesn't support DDP\n");
+		return I40E_ERR_DEVICE_NOT_SUPPORTED;
+	}
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	/* Validate sections types */
+	for (i = 0; i < sec_tbl->section_count; i++) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		if (rollback) {
+			if (sec->section.type == SECTION_TYPE_MMIO ||
+			    sec->section.type == SECTION_TYPE_AQ ||
+			    sec->section.type == SECTION_TYPE_RB_AQ) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Not a roll-back package\n");
+				return I40E_NOT_SUPPORTED;
+			}
+		} else {
+			if (sec->section.type == SECTION_TYPE_RB_AQ ||
+			    sec->section.type == SECTION_TYPE_RB_MMIO) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Not an original package\n");
+				return I40E_NOT_SUPPORTED;
+			}
+		}
+	}
+
+	return status;
+}
+
 /**
  * i40e_write_profile
  * @hw: pointer to the hardware structure
@@ -5460,47 +5669,99 @@
 	i40e_status status = 0;
 	struct i40e_section_table *sec_tbl;
 	struct i40e_profile_section_header *sec = NULL;
-	u32 dev_cnt;
-	u32 vendor_dev_id;
-	u32 *nvm;
+	struct i40e_profile_aq_section *ddp_aq;
 	u32 section_size = 0;
 	u32 offset = 0, info = 0;
+	u32 sec_off;
 	u32 i;
 
-	dev_cnt = profile->device_table_count;
+	status = i40e_validate_profile(hw, profile, track_id, false);
+	if (status)
+		return status;
 
-	for (i = 0; i < dev_cnt; i++) {
-		vendor_dev_id = profile->device_table[i].vendor_dev_id;
-		if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL)
-			if (hw->device_id == (vendor_dev_id & 0xFFFF))
-				break;
-	}
-	if (i == dev_cnt) {
-		i40e_debug(hw, I40E_DEBUG_PACKAGE, "Device doesn't support DDP");
-		return I40E_ERR_DEVICE_NOT_SUPPORTED;
-	}
-
-	nvm = (u32 *)&profile->device_table[dev_cnt];
-	sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1];
+	I40E_SECTION_TABLE(profile, sec_tbl);
 
 	for (i = 0; i < sec_tbl->section_count; i++) {
-		sec = (struct i40e_profile_section_header *)((u8 *)profile +
-					     sec_tbl->section_offset[i]);
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		/* Process generic admin command */
+		if (sec->section.type == SECTION_TYPE_AQ) {
+			ddp_aq = (struct i40e_profile_aq_section *)&sec[1];
+			status = i40e_ddp_exec_aq_section(hw, ddp_aq);
+			if (status) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Failed to execute aq: section %d, opcode %u\n",
+					   i, ddp_aq->opcode);
+				break;
+			}
+			sec->section.type = SECTION_TYPE_RB_AQ;
+		}
 
-		/* Skip 'AQ', 'note' and 'name' sections */
+		/* Skip any non-mmio sections */
 		if (sec->section.type != SECTION_TYPE_MMIO)
 			continue;
 
 		section_size = sec->section.size +
 			sizeof(struct i40e_profile_section_header);
 
-		/* Write profile */
+		/* Write MMIO section */
 		status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
 					   track_id, &offset, &info, NULL);
 		if (status) {
 			i40e_debug(hw, I40E_DEBUG_PACKAGE,
-				   "Failed to write profile: offset %d, info %d",
-				   offset, info);
+				   "Failed to write profile: section %d, offset %d, info %d\n",
+				   i, offset, info);
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40e_rollback_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be removed
+ * @track_id: package tracking id
+ *
+ * Rolls back previously loaded package.
+ */
+enum i40e_status_code
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+		      u32 track_id)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	i40e_status status = 0;
+	struct i40e_section_table *sec_tbl;
+	u32 offset = 0, info = 0;
+	u32 section_size = 0;
+	u32 sec_off;
+	int i;
+
+	status = i40e_validate_profile(hw, profile, track_id, true);
+	if (status)
+		return status;
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	/* For rollback write sections in reverse */
+	for (i = sec_tbl->section_count - 1; i >= 0; i--) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+
+		/* Skip any non-rollback sections */
+		if (sec->section.type != SECTION_TYPE_RB_MMIO)
+			continue;
+
+		section_size = sec->section.size +
+			sizeof(struct i40e_profile_section_header);
+
+		/* Write roll-back MMIO section */
+		status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
+					   track_id, &offset, &info, NULL);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_PACKAGE,
+				   "Failed to write profile: section %d, offset %d, info %d\n",
+				   i, offset, info);
 			break;
 		}
 	}

diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 56bff8f..200a1cb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c

@@ -863,22 +863,39 @@
 /**
  * i40e_init_dcb
  * @hw: pointer to the hw struct
+ * @enable_mib_change: enable mib change event
  *
  * Update DCB configuration from the Firmware
  **/
-i40e_status i40e_init_dcb(struct i40e_hw *hw)
+i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
 {
 	i40e_status ret = 0;
 	struct i40e_lldp_variables lldp_cfg;
 	u8 adminstatus = 0;
 
 	if (!hw->func_caps.dcb)
-		return ret;
+		return I40E_NOT_SUPPORTED;
 
 	/* Read LLDP NVM area */
-	ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) {
+		u8 offset = 0;
+
+		if (hw->mac.type == I40E_MAC_XL710)
+			offset = I40E_LLDP_CURRENT_STATUS_XL710_OFFSET;
+		else if (hw->mac.type == I40E_MAC_X722)
+			offset = I40E_LLDP_CURRENT_STATUS_X722_OFFSET;
+		else
+			return I40E_NOT_SUPPORTED;
+
+		ret = i40e_read_nvm_module_data(hw,
+						I40E_SR_EMP_SR_SETTINGS_PTR,
+						offset, 1,
+						&lldp_cfg.adminstatus);
+	} else {
+		ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	}
 	if (ret)
-		return ret;
+		return I40E_ERR_NOT_READY;
 
 	/* Get the LLDP AdminStatus for the current port */
 	adminstatus = lldp_cfg.adminstatus >> (hw->port * 4);
@@ -887,7 +904,7 @@
 	/* LLDP agent disabled */
 	if (!adminstatus) {
 		hw->dcbx_status = I40E_DCBX_STATUS_DISABLED;
-		return ret;
+		return I40E_ERR_NOT_READY;
 	}
 
 	/* Get DCBX status */
@@ -896,26 +913,19 @@
 		return ret;
 
 	/* Check the DCBX Status */
-	switch (hw->dcbx_status) {
-	case I40E_DCBX_STATUS_DONE:
-	case I40E_DCBX_STATUS_IN_PROGRESS:
+	if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
+	    hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
 		/* Get current DCBX configuration */
 		ret = i40e_get_dcb_config(hw);
 		if (ret)
 			return ret;
-		break;
-	case I40E_DCBX_STATUS_DISABLED:
-		return ret;
-	case I40E_DCBX_STATUS_NOT_STARTED:
-	case I40E_DCBX_STATUS_MULTIPLE_PEERS:
-	default:
-		break;
+	} else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
+		return I40E_ERR_NOT_READY;
 	}
 
 	/* Configure the LLDP MIB change event */
-	ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
-	if (ret)
-		return ret;
+	if (enable_mib_change)
+		ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
 
 	return ret;
 }

diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 2b748a6..2a80c5d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h

@@ -30,6 +30,8 @@
 #define I40E_CEE_SUBTYPE_APP_PRI	4
 
 #define I40E_CEE_MAX_FEAT_TYPE		3
+#define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET	0x2B
+#define I40E_LLDP_CURRENT_STATUS_X722_OFFSET	0x31
 /* Defines for LLDP TLV header */
 #define I40E_LLDP_TLV_LEN_SHIFT		0
 #define I40E_LLDP_TLV_LEN_MASK		(0x01FF << I40E_LLDP_TLV_LEN_SHIFT)
@@ -124,5 +126,5 @@
 					     u8 bridgetype,
 					     struct i40e_dcbx_config *dcbcfg);
 i40e_status i40e_get_dcb_config(struct i40e_hw *hw);
-i40e_status i40e_init_dcb(struct i40e_hw *hw);
+i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change);
 #endif /* _I40E_DCB_H_ */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
new file mode 100644
index 0000000..5e08f10
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c

@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+
+#include <linux/firmware.h>
+
+/**
+ * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
+ * @a: new profile info
+ * @b: old profile info
+ *
+ * checks if DDP profiles are the equivalent.
+ * Returns true if profiles are the same.
+ **/
+static bool i40e_ddp_profiles_eq(struct i40e_profile_info *a,
+				 struct i40e_profile_info *b)
+{
+	return a->track_id == b->track_id &&
+		!memcmp(&a->version, &b->version, sizeof(a->version)) &&
+		!memcmp(&a->name, &b->name, I40E_DDP_NAME_SIZE);
+}
+
+/**
+ * i40e_ddp_does_profile_exist - checks if DDP profile loaded already
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile loaded already.
+ * Returns >0 if the profile exists.
+ * Returns  0 if the profile is absent.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_exist(struct i40e_hw *hw,
+				       struct i40e_profile_info *pinfo)
+{
+	struct i40e_ddp_profile_list *profile_list;
+	u8 buff[I40E_PROFILE_LIST_SIZE];
+	i40e_status status;
+	int i;
+
+	status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+				      NULL);
+	if (status)
+		return -1;
+
+	profile_list = (struct i40e_ddp_profile_list *)buff;
+	for (i = 0; i < profile_list->p_count; i++) {
+		if (i40e_ddp_profiles_eq(pinfo, &profile_list->p_info[i]))
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * i40e_ddp_profiles_overlap - checks if DDP profiles overlap.
+ * @new: new profile info
+ * @old: old profile info
+ *
+ * checks if DDP profiles overlap.
+ * Returns true if profiles are overlap.
+ **/
+static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new,
+				      struct i40e_profile_info *old)
+{
+	unsigned int group_id_old = (u8)((old->track_id & 0x00FF0000) >> 16);
+	unsigned int group_id_new = (u8)((new->track_id & 0x00FF0000) >> 16);
+
+	/* 0x00 group must be only the first */
+	if (group_id_new == 0)
+		return true;
+	/* 0xFF group is compatible with anything else */
+	if (group_id_new == 0xFF || group_id_old == 0xFF)
+		return false;
+	/* otherwise only profiles from the same group are compatible*/
+	return group_id_old != group_id_new;
+}
+
+/**
+ * i40e_ddp_does_profiles_ - checks if DDP overlaps with existing one.
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile overlaps with existing one.
+ * Returns >0 if the profile overlaps.
+ * Returns  0 if the profile is ok.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_overlap(struct i40e_hw *hw,
+					 struct i40e_profile_info *pinfo)
+{
+	struct i40e_ddp_profile_list *profile_list;
+	u8 buff[I40E_PROFILE_LIST_SIZE];
+	i40e_status status;
+	int i;
+
+	status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+				      NULL);
+	if (status)
+		return -EIO;
+
+	profile_list = (struct i40e_ddp_profile_list *)buff;
+	for (i = 0; i < profile_list->p_count; i++) {
+		if (i40e_ddp_profiles_overlap(pinfo,
+					      &profile_list->p_info[i]))
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * i40e_add_pinfo
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package
+ * @profile_info_sec: buffer for information section
+ * @track_id: package tracking id
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+static enum i40e_status_code
+i40e_add_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+	       u8 *profile_info_sec, u32 track_id)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_profile_info *pinfo;
+	i40e_status status;
+	u32 offset = 0, info = 0;
+
+	sec = (struct i40e_profile_section_header *)profile_info_sec;
+	sec->tbl_size = 1;
+	sec->data_end = sizeof(struct i40e_profile_section_header) +
+			sizeof(struct i40e_profile_info);
+	sec->section.type = SECTION_TYPE_INFO;
+	sec->section.offset = sizeof(struct i40e_profile_section_header);
+	sec->section.size = sizeof(struct i40e_profile_info);
+	pinfo = (struct i40e_profile_info *)(profile_info_sec +
+					     sec->section.offset);
+	pinfo->track_id = track_id;
+	pinfo->version = profile->version;
+	pinfo->op = I40E_DDP_ADD_TRACKID;
+
+	/* Clear reserved field */
+	memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+	status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+				   track_id, &offset, &info, NULL);
+	return status;
+}
+
+/**
+ * i40e_del_pinfo - delete DDP profile info from NIC
+ * @hw: HW data structure
+ * @profile: DDP profile segment to be deleted
+ * @profile_info_sec: DDP profile section header
+ * @track_id: track ID of the profile for deletion
+ *
+ * Removes DDP profile from the NIC.
+ **/
+static enum i40e_status_code
+i40e_del_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+	       u8 *profile_info_sec, u32 track_id)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_profile_info *pinfo;
+	i40e_status status;
+	u32 offset = 0, info = 0;
+
+	sec = (struct i40e_profile_section_header *)profile_info_sec;
+	sec->tbl_size = 1;
+	sec->data_end = sizeof(struct i40e_profile_section_header) +
+			sizeof(struct i40e_profile_info);
+	sec->section.type = SECTION_TYPE_INFO;
+	sec->section.offset = sizeof(struct i40e_profile_section_header);
+	sec->section.size = sizeof(struct i40e_profile_info);
+	pinfo = (struct i40e_profile_info *)(profile_info_sec +
+					     sec->section.offset);
+	pinfo->track_id = track_id;
+	pinfo->version = profile->version;
+	pinfo->op = I40E_DDP_REMOVE_TRACKID;
+
+	/* Clear reserved field */
+	memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+	status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+				   track_id, &offset, &info, NULL);
+	return status;
+}
+
+/**
+ * i40e_ddp_is_pkg_hdr_valid - performs basic pkg header integrity checks
+ * @netdev: net device structure (for logging purposes)
+ * @pkg_hdr: pointer to package header
+ * @size_huge: size of the whole DDP profile package in size_t
+ *
+ * Checks correctness of pkg header: Version, size too big/small, and
+ * all segment offsets alignment and boundaries. This function lets
+ * reject non DDP profile file to be loaded by administrator mistake.
+ **/
+static bool i40e_ddp_is_pkg_hdr_valid(struct net_device *netdev,
+				      struct i40e_package_header *pkg_hdr,
+				      size_t size_huge)
+{
+	u32 size = 0xFFFFFFFFU & size_huge;
+	u32 pkg_hdr_size;
+	u32 segment;
+
+	if (!pkg_hdr)
+		return false;
+
+	if (pkg_hdr->version.major > 0) {
+		struct i40e_ddp_version ver = pkg_hdr->version;
+
+		netdev_err(netdev, "Unsupported DDP profile version %u.%u.%u.%u",
+			   ver.major, ver.minor, ver.update, ver.draft);
+		return false;
+	}
+	if (size_huge > size) {
+		netdev_err(netdev, "Invalid DDP profile - size is bigger than 4G");
+		return false;
+	}
+	if (size < (sizeof(struct i40e_package_header) +
+		sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+		netdev_err(netdev, "Invalid DDP profile - size is too small.");
+		return false;
+	}
+
+	pkg_hdr_size = sizeof(u32) * (pkg_hdr->segment_count + 2U);
+	if (size < pkg_hdr_size) {
+		netdev_err(netdev, "Invalid DDP profile - too many segments");
+		return false;
+	}
+	for (segment = 0; segment < pkg_hdr->segment_count; ++segment) {
+		u32 offset = pkg_hdr->segment_offset[segment];
+
+		if (0xFU & offset) {
+			netdev_err(netdev,
+				   "Invalid DDP profile %u segment alignment",
+				   segment);
+			return false;
+		}
+		if (pkg_hdr_size > offset || offset >= size) {
+			netdev_err(netdev,
+				   "Invalid DDP profile %u segment offset",
+				   segment);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * i40e_ddp_load - performs DDP loading
+ * @netdev: net device structure
+ * @data: buffer containing recipe file
+ * @size: size of the buffer
+ * @is_add: true when loading profile, false when rolling back the previous one
+ *
+ * Checks correctness and loads DDP profile to the NIC. The function is
+ * also used for rolling back previously loaded profile.
+ **/
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+		  bool is_add)
+{
+	u8 profile_info_sec[sizeof(struct i40e_profile_section_header) +
+			    sizeof(struct i40e_profile_info)];
+	struct i40e_metadata_segment *metadata_hdr;
+	struct i40e_profile_segment *profile_hdr;
+	struct i40e_profile_info pinfo;
+	struct i40e_package_header *pkg_hdr;
+	i40e_status status;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u32 track_id;
+	int istatus;
+
+	pkg_hdr = (struct i40e_package_header *)data;
+	if (!i40e_ddp_is_pkg_hdr_valid(netdev, pkg_hdr, size))
+		return -EINVAL;
+
+	if (size < (sizeof(struct i40e_package_header) +
+		    sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+		netdev_err(netdev, "Invalid DDP recipe size.");
+		return -EINVAL;
+	}
+
+	/* Find beginning of segment data in buffer */
+	metadata_hdr = (struct i40e_metadata_segment *)
+		i40e_find_segment_in_package(SEGMENT_TYPE_METADATA, pkg_hdr);
+	if (!metadata_hdr) {
+		netdev_err(netdev, "Failed to find metadata segment in DDP recipe.");
+		return -EINVAL;
+	}
+
+	track_id = metadata_hdr->track_id;
+	profile_hdr = (struct i40e_profile_segment *)
+		i40e_find_segment_in_package(SEGMENT_TYPE_I40E, pkg_hdr);
+	if (!profile_hdr) {
+		netdev_err(netdev, "Failed to find profile segment in DDP recipe.");
+		return -EINVAL;
+	}
+
+	pinfo.track_id = track_id;
+	pinfo.version = profile_hdr->version;
+	if (is_add)
+		pinfo.op = I40E_DDP_ADD_TRACKID;
+	else
+		pinfo.op = I40E_DDP_REMOVE_TRACKID;
+
+	memcpy(pinfo.name, profile_hdr->name, I40E_DDP_NAME_SIZE);
+
+	/* Check if profile data already exists*/
+	istatus = i40e_ddp_does_profile_exist(&pf->hw, &pinfo);
+	if (istatus < 0) {
+		netdev_err(netdev, "Failed to fetch loaded profiles.");
+		return istatus;
+	}
+	if (is_add) {
+		if (istatus > 0) {
+			netdev_err(netdev, "DDP profile already loaded.");
+			return -EINVAL;
+		}
+		istatus = i40e_ddp_does_profile_overlap(&pf->hw, &pinfo);
+		if (istatus < 0) {
+			netdev_err(netdev, "Failed to fetch loaded profiles.");
+			return istatus;
+		}
+		if (istatus > 0) {
+			netdev_err(netdev, "DDP profile overlaps with existing one.");
+			return -EINVAL;
+		}
+	} else {
+		if (istatus == 0) {
+			netdev_err(netdev,
+				   "DDP profile for deletion does not exist.");
+			return -EINVAL;
+		}
+	}
+
+	/* Load profile data */
+	if (is_add) {
+		status = i40e_write_profile(&pf->hw, profile_hdr, track_id);
+		if (status) {
+			if (status == I40E_ERR_DEVICE_NOT_SUPPORTED) {
+				netdev_err(netdev,
+					   "Profile is not supported by the device.");
+				return -EPERM;
+			}
+			netdev_err(netdev, "Failed to write DDP profile.");
+			return -EIO;
+		}
+	} else {
+		status = i40e_rollback_profile(&pf->hw, profile_hdr, track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to remove DDP profile.");
+			return -EIO;
+		}
+	}
+
+	/* Add/remove profile to/from profile list in FW */
+	if (is_add) {
+		status = i40e_add_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+					track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to add DDP profile info.");
+			return -EIO;
+		}
+	} else {
+		status = i40e_del_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+					track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to restore DDP profile info.");
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_ddp_restore - restore previously loaded profile and remove from list
+ * @pf: PF data struct
+ *
+ * Restores previously loaded profile stored on the list in driver memory.
+ * After rolling back removes entry from the list.
+ **/
+static int i40e_ddp_restore(struct i40e_pf *pf)
+{
+	struct i40e_ddp_old_profile_list *entry;
+	struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev;
+	int status = 0;
+
+	if (!list_empty(&pf->ddp_old_prof)) {
+		entry = list_first_entry(&pf->ddp_old_prof,
+					 struct i40e_ddp_old_profile_list,
+					 list);
+		status = i40e_ddp_load(netdev, entry->old_ddp_buf,
+				       entry->old_ddp_size, false);
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	return status;
+}
+
+/**
+ * i40e_ddp_flash - callback function for ethtool flash feature
+ * @netdev: net device structure
+ * @flash: kernel flash structure
+ *
+ * Ethtool callback function used for loading and unloading DDP profiles.
+ **/
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash)
+{
+	const struct firmware *ddp_config;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int status = 0;
+
+	/* Check for valid region first */
+	if (flash->region != I40_DDP_FLASH_REGION) {
+		netdev_err(netdev, "Requested firmware region is not recognized by this driver.");
+		return -EINVAL;
+	}
+	if (pf->hw.bus.func != 0) {
+		netdev_err(netdev, "Any DDP operation is allowed only on Phy0 NIC interface");
+		return -EINVAL;
+	}
+
+	/* If the user supplied "-" instead of file name rollback previously
+	 * stored profile.
+	 */
+	if (strncmp(flash->data, "-", 2) != 0) {
+		struct i40e_ddp_old_profile_list *list_entry;
+		char profile_name[sizeof(I40E_DDP_PROFILE_PATH)
+				  + I40E_DDP_PROFILE_NAME_MAX];
+
+		profile_name[sizeof(profile_name) - 1] = 0;
+		strncpy(profile_name, I40E_DDP_PROFILE_PATH,
+			sizeof(profile_name) - 1);
+		strncat(profile_name, flash->data, I40E_DDP_PROFILE_NAME_MAX);
+		/* Load DDP recipe. */
+		status = request_firmware(&ddp_config, profile_name,
+					  &netdev->dev);
+		if (status) {
+			netdev_err(netdev, "DDP recipe file request failed.");
+			return status;
+		}
+
+		status = i40e_ddp_load(netdev, ddp_config->data,
+				       ddp_config->size, true);
+
+		if (!status) {
+			list_entry =
+			  kzalloc(sizeof(struct i40e_ddp_old_profile_list) +
+				  ddp_config->size, GFP_KERNEL);
+			if (!list_entry) {
+				netdev_info(netdev, "Failed to allocate memory for previous DDP profile data.");
+				netdev_info(netdev, "New profile loaded but roll-back will be impossible.");
+			} else {
+				memcpy(list_entry->old_ddp_buf,
+				       ddp_config->data, ddp_config->size);
+				list_entry->old_ddp_size = ddp_config->size;
+				list_add(&list_entry->list, &pf->ddp_old_prof);
+			}
+		}
+
+		release_firmware(ddp_config);
+	} else {
+		if (!list_empty(&pf->ddp_old_prof)) {
+			status = i40e_ddp_restore(pf);
+		} else {
+			netdev_warn(netdev, "There is no DDP profile to restore.");
+			status = -ENOENT;
+		}
+	}
+	return status;
+}

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 56b911a..99ea543 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c

@@ -132,8 +132,6 @@
 		dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
 			 (unsigned long int)nd->vlan_features);
 	}
-	dev_info(&pf->pdev->dev, "    active_vlans is %s\n",
-		 vsi->active_vlans ? "<valid>" : "<null>");
 	dev_info(&pf->pdev->dev,
 		 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
 		 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -335,8 +333,9 @@
 		 "    seid = %d, id = %d, uplink_seid = %d\n",
 		 vsi->seid, vsi->id, vsi->uplink_seid);
 	dev_info(&pf->pdev->dev,
-		 "    base_queue = %d, num_queue_pairs = %d, num_desc = %d\n",
-		 vsi->base_queue, vsi->num_queue_pairs, vsi->num_desc);
+		 "    base_queue = %d, num_queue_pairs = %d, num_tx_desc = %d, num_rx_desc = %d\n",
+		 vsi->base_queue, vsi->num_queue_pairs, vsi->num_tx_desc,
+		 vsi->num_rx_desc);
 	dev_info(&pf->pdev->dev, "    type = %i\n", vsi->type);
 	if (vsi->type == I40E_VSI_SRIOV)
 		dev_info(&pf->pdev->dev, "    VF ID = %i\n", vsi->vf_id);
@@ -1126,10 +1125,6 @@
 		dev_info(&pf->pdev->dev, "debugfs: forcing GlobR\n");
 		i40e_do_reset_safe(pf, BIT(__I40E_GLOBAL_RESET_REQUESTED));
 
-	} else if (strncmp(cmd_buf, "empr", 4) == 0) {
-		dev_info(&pf->pdev->dev, "debugfs: forcing EMPR\n");
-		i40e_do_reset_safe(pf, BIT(__I40E_EMP_RESET_REQUESTED));
-
 	} else if (strncmp(cmd_buf, "read", 4) == 0) {
 		u32 address;
 		u32 value;
@@ -1323,7 +1318,7 @@
 		if (strncmp(&cmd_buf[5], "stop", 4) == 0) {
 			int ret;
 
-			ret = i40e_aq_stop_lldp(&pf->hw, false, NULL);
+			ret = i40e_aq_stop_lldp(&pf->hw, false, false, NULL);
 			if (ret) {
 				dev_info(&pf->pdev->dev,
 					 "Stop LLDP AQ command failed =0x%x\n",
@@ -1332,7 +1327,7 @@
 			}
 			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
 						pf->hw.mac.addr,
-						I40E_ETH_P_LLDP, 0,
+						ETH_P_LLDP, 0,
 						pf->vsi[pf->lan_vsi]->seid,
 						0, true, NULL, NULL);
 			if (ret) {
@@ -1350,7 +1345,7 @@
 
 			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
 						pf->hw.mac.addr,
-						I40E_ETH_P_LLDP, 0,
+						ETH_P_LLDP, 0,
 						pf->vsi[pf->lan_vsi]->seid,
 						0, false, NULL, NULL);
 			if (ret) {
@@ -1360,7 +1355,7 @@
 				/* Continue and start FW LLDP anyways */
 			}
 
-			ret = i40e_aq_start_lldp(&pf->hw, NULL);
+			ret = i40e_aq_start_lldp(&pf->hw, false, NULL);
 			if (ret) {
 				dev_info(&pf->pdev->dev,
 					 "Start LLDP AQ command failed =0x%x\n",
@@ -1644,30 +1639,7 @@
 		count = buf_tmp - i40e_dbg_netdev_ops_buf + 1;
 	}
 
-	if (strncmp(i40e_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
-		cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid);
-		if (cnt != 1) {
-			dev_info(&pf->pdev->dev, "tx_timeout <vsi_seid>\n");
-			goto netdev_ops_write_done;
-		}
-		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
-		if (!vsi) {
-			dev_info(&pf->pdev->dev,
-				 "tx_timeout: VSI %d not found\n", vsi_seid);
-		} else if (!vsi->netdev) {
-			dev_info(&pf->pdev->dev, "tx_timeout: no netdev for VSI %d\n",
-				 vsi_seid);
-		} else if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
-			dev_info(&pf->pdev->dev, "tx_timeout: VSI %d not UP\n",
-				 vsi_seid);
-		} else if (rtnl_trylock()) {
-			vsi->netdev->netdev_ops->ndo_tx_timeout(vsi->netdev);
-			rtnl_unlock();
-			dev_info(&pf->pdev->dev, "tx_timeout called\n");
-		} else {
-			dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
-		}
-	} else if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) {
+	if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) {
 		int mtu;
 
 		cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i",
@@ -1735,7 +1707,6 @@
 		dev_info(&pf->pdev->dev, "unknown command '%s'\n",
 			 i40e_dbg_netdev_ops_buf);
 		dev_info(&pf->pdev->dev, "available commands\n");
-		dev_info(&pf->pdev->dev, "  tx_timeout <vsi_seid>\n");
 		dev_info(&pf->pdev->dev, "  change_mtu <vsi_seid> <mtu>\n");
 		dev_info(&pf->pdev->dev, "  set_rx_mode <vsi_seid>\n");
 		dev_info(&pf->pdev->dev, "  napi <vsi_seid>\n");
@@ -1757,29 +1728,15 @@
  **/
 void i40e_dbg_pf_init(struct i40e_pf *pf)
 {
-	struct dentry *pfile;
 	const char *name = pci_name(pf->pdev);
-	const struct device *dev = &pf->pdev->dev;
 
 	pf->i40e_dbg_pf = debugfs_create_dir(name, i40e_dbg_root);
-	if (!pf->i40e_dbg_pf)
-		return;
 
-	pfile = debugfs_create_file("command", 0600, pf->i40e_dbg_pf, pf,
-				    &i40e_dbg_command_fops);
-	if (!pfile)
-		goto create_failed;
+	debugfs_create_file("command", 0600, pf->i40e_dbg_pf, pf,
+			    &i40e_dbg_command_fops);
 
-	pfile = debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf,
-				    &i40e_dbg_netdev_ops_fops);
-	if (!pfile)
-		goto create_failed;
-
-	return;
-
-create_failed:
-	dev_info(dev, "debugfs dir/file for %s failed\n", name);
-	debugfs_remove_recursive(pf->i40e_dbg_pf);
+	debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf,
+			    &i40e_dbg_netdev_ops_fops);
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index 334b05f..bac4da0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h

@@ -5,6 +5,8 @@
 #define _I40E_DEVIDS_H_
 
 /* Device IDs */
+#define I40E_DEV_ID_X710_N3000		0x0CF8
+#define I40E_DEV_ID_XXV710_N3000	0x0D58
 #define I40E_DEV_ID_SFP_XL710		0x1572
 #define I40E_DEV_ID_QEMU		0x1574
 #define I40E_DEV_ID_KX_B		0x1580
@@ -18,6 +20,9 @@
 #define I40E_DEV_ID_10G_BASE_T4		0x1589
 #define I40E_DEV_ID_25G_B		0x158A
 #define I40E_DEV_ID_25G_SFP28		0x158B
+#define I40E_DEV_ID_10G_BASE_T_BC	0x15FF
+#define I40E_DEV_ID_10G_B		0x104F
+#define I40E_DEV_ID_10G_SFP		0x104E
 #define I40E_DEV_ID_KX_X722		0x37CE
 #define I40E_DEV_ID_QSFP_X722		0x37CF
 #define I40E_DEV_ID_SFP_X722		0x37D0

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5ff6caa..41e1240 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c

@@ -5,26 +5,227 @@
 
 #include "i40e.h"
 #include "i40e_diag.h"
+#include "i40e_txrx_common.h"
 
+/* ethtool statistics helpers */
+
+/**
+ * struct i40e_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the I40E_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the i40e_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the i40e_add_stat_string() helper function.
+ **/
 struct i40e_stats {
-	/* The stat_string is expected to be a format string formatted using
-	 * vsnprintf by i40e_add_stat_strings. Every member of a stats array
-	 * should use the same format specifiers as they will be formatted
-	 * using the same variadic arguments.
-	 */
 	char stat_string[ETH_GSTRING_LEN];
 	int sizeof_stat;
 	int stat_offset;
 };
 
+/* Helper macro to define an i40e_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
 #define I40E_STAT(_type, _name, _stat) { \
 	.stat_string = _name, \
 	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
 	.stat_offset = offsetof(_type, _stat) \
 }
 
+/* Helper macro for defining some statistics directly copied from the netdev
+ * stats structure.
+ */
 #define I40E_NETDEV_STAT(_net_stat) \
 	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
+
+/* Helper macro for defining some statistics related to queues */
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct i40e_stats i40e_gstrings_queue_stats[] = {
+	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
+	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
+ * i40e_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static void
+i40e_add_one_ethtool_stat(u64 *data, void *pointer,
+			  const struct i40e_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __i40e_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static void
+__i40e_add_ethtool_stats(u64 **data, void *pointer,
+			 const struct i40e_stats stats[],
+			 const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __i40e_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define i40e_add_ethtool_stats(data, pointer, stats) \
+	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * i40e_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats.
+ * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static void
+i40e_add_queue_stats(u64 **data, struct i40e_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
+	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
+		for (i = 0; i < size; i++) {
+			i40e_add_one_ethtool_stat(&(*data)[i], ring,
+						  &stats[i]);
+		}
+	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __i40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * 40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define i40e_add_stat_strings(p, stats, ...) \
+	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
 #define I40E_PF_STAT(_name, _stat) \
 	I40E_STAT(struct i40e_pf, _name, _stat)
 #define I40E_VSI_STAT(_name, _stat) \
@@ -33,6 +234,8 @@
 	I40E_STAT(struct i40e_veb, _name, _stat)
 #define I40E_PFC_STAT(_name, _stat) \
 	I40E_STAT(struct i40e_pfc_stats, _name, _stat)
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
 
 static const struct i40e_stats i40e_gstrings_net_stats[] = {
 	I40E_NETDEV_STAT(rx_packets),
@@ -171,20 +374,11 @@
 	I40E_PFC_STAT("port.rx_priority_%u_xon_2_xoff", priority_xon_2_xoff),
 };
 
-/* We use num_tx_queues here as a proxy for the maximum number of queues
- * available because we always allocate queues symmetrically.
- */
-#define I40E_MAX_NUM_QUEUES(n) ((n)->num_tx_queues)
-#define I40E_QUEUE_STATS_LEN(n)                                              \
-	   (I40E_MAX_NUM_QUEUES(n)                                           \
-	    * 2 /* Tx and Rx together */                                     \
-	    * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
-#define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
 #define I40E_NETDEV_STATS_LEN	ARRAY_SIZE(i40e_gstrings_net_stats)
+
 #define I40E_MISC_STATS_LEN	ARRAY_SIZE(i40e_gstrings_misc_stats)
-#define I40E_VSI_STATS_LEN(n)	(I40E_NETDEV_STATS_LEN + \
-				 I40E_MISC_STATS_LEN + \
-				 I40E_QUEUE_STATS_LEN((n)))
+
+#define I40E_VSI_STATS_LEN	(I40E_NETDEV_STATS_LEN + I40E_MISC_STATS_LEN)
 
 #define I40E_PFC_STATS_LEN	(ARRAY_SIZE(i40e_gstrings_pfc_stats) * \
 				 I40E_MAX_USER_PRIORITY)
@@ -193,10 +387,15 @@
 				 (ARRAY_SIZE(i40e_gstrings_veb_tc_stats) * \
 				  I40E_MAX_TRAFFIC_CLASS))
 
-#define I40E_PF_STATS_LEN(n)	(I40E_GLOBAL_STATS_LEN + \
+#define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
+
+#define I40E_PF_STATS_LEN	(I40E_GLOBAL_STATS_LEN + \
 				 I40E_PFC_STATS_LEN + \
 				 I40E_VEB_STATS_LEN + \
-				 I40E_VSI_STATS_LEN((n)))
+				 I40E_VSI_STATS_LEN)
+
+/* Length of stats for a single queue */
+#define I40E_QUEUE_STATS_LEN	ARRAY_SIZE(i40e_gstrings_queue_stats)
 
 enum i40e_ethtool_test_id {
 	I40E_ETH_TEST_REG = 0,
@@ -239,6 +438,8 @@
 	I40E_PRIV_FLAG("disable-source-pruning",
 		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
 	I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0),
+	I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0),
+	I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
 };
 
 #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -307,6 +508,20 @@
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseT_Full);
 	}
+	if (phy_types & I40E_CAP_PHY_TYPE_2_5GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_2_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_5GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     5000baseT_Full);
+	}
 	if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
 	    phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
 	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
@@ -334,18 +549,24 @@
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseSR4_Full);
-	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
-		ethtool_link_ksettings_add_link_mode(ks, supported,
-						     40000baseLR4_Full);
-	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseSR4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseLR4_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     40000baseLR4_Full);
 	}
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseKR4_Full);
+	}
 	if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     20000baseKR2_Full);
@@ -407,6 +628,24 @@
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseCR_Full);
 	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) {
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_NONE);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_RS);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_BASER);
+		}
+	}
 	/* need to add new 10G PHY types */
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
@@ -449,13 +688,15 @@
 	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
 	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
 	    phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_5GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_2_5GBASE_T ||
 	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
 	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
 	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
@@ -470,6 +711,35 @@
 }
 
 /**
+ * i40e_get_settings_link_up_fec - Get the FEC mode encoding from mask
+ * @req_fec_info: mask request FEC info
+ * @ks: ethtool ksettings to fill in
+ **/
+static void i40e_get_settings_link_up_fec(u8 req_fec_info,
+					  struct ethtool_link_ksettings *ks)
+{
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+
+	if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+	} else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	} else {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_NONE);
+		if (I40E_AQ_SET_FEC_AUTO & req_fec_info) {
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_RS);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_BASER);
+		}
+	}
+}
+
+/**
  * i40e_get_settings_link_up - Get the Link settings for when link is up
  * @hw: hw structure
  * @ks: ethtool ksettings to fill in
@@ -501,14 +771,20 @@
 	case I40E_PHY_TYPE_40GBASE_AOC:
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseCR4_Full);
 		break;
 	case I40E_PHY_TYPE_40GBASE_SR4:
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseSR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseSR4_Full);
 		break;
 	case I40E_PHY_TYPE_40GBASE_LR4:
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseLR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseLR4_Full);
 		break;
 	case I40E_PHY_TYPE_25GBASE_SR:
 	case I40E_PHY_TYPE_25GBASE_LR:
@@ -522,6 +798,7 @@
 						     25000baseSR_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     25000baseSR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseSR_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -552,12 +829,18 @@
 							     10000baseT_Full);
 		break;
 	case I40E_PHY_TYPE_10GBASE_T:
+	case I40E_PHY_TYPE_5GBASE_T:
+	case I40E_PHY_TYPE_2_5GBASE_T:
 	case I40E_PHY_TYPE_1000BASE_T:
 	case I40E_PHY_TYPE_100BASE_TX:
 		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseT_Full);
 		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseT_Full);
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100baseT_Full);
@@ -565,6 +848,12 @@
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     5000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_2_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
@@ -639,6 +928,7 @@
 						     40000baseKR4_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     25000baseKR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     20000baseKR2_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -656,6 +946,8 @@
 						     25000baseCR_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     25000baseCR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+
 		break;
 	case I40E_PHY_TYPE_25GBASE_AOC:
 	case I40E_PHY_TYPE_25GBASE_ACC:
@@ -663,9 +955,10 @@
 		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseCR_Full);
-
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     25000baseCR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseCR_Full);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -700,6 +993,12 @@
 	case I40E_LINK_SPEED_10GB:
 		ks->base.speed = SPEED_10000;
 		break;
+	case I40E_LINK_SPEED_5GB:
+		ks->base.speed = SPEED_5000;
+		break;
+	case I40E_LINK_SPEED_2_5GB:
+		ks->base.speed = SPEED_2500;
+		break;
 	case I40E_LINK_SPEED_1GB:
 		ks->base.speed = SPEED_1000;
 		break;
@@ -707,6 +1006,7 @@
 		ks->base.speed = SPEED_100;
 		break;
 	default:
+		ks->base.speed = SPEED_UNKNOWN;
 		break;
 	}
 	ks->base.duplex = DUPLEX_FULL;
@@ -786,6 +1086,7 @@
 		break;
 	case I40E_MEDIA_TYPE_FIBER:
 		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
 		ks->base.port = PORT_FIBRE;
 		break;
 	case I40E_MEDIA_TYPE_UNKNOWN:
@@ -984,6 +1285,12 @@
 						  10000baseLR_Full))
 		config.link_speed |= I40E_LINK_SPEED_10GB;
 	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseT_Full))
+		config.link_speed |= I40E_LINK_SPEED_2_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  5000baseT_Full))
+		config.link_speed |= I40E_LINK_SPEED_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
 						  20000baseKR2_Full))
 		config.link_speed |= I40E_LINK_SPEED_20GB;
 	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
@@ -1061,6 +1368,154 @@
 	return err;
 }
 
+static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status status = 0;
+	u32 flags = 0;
+	int err = 0;
+
+	flags = READ_ONCE(pf->flags);
+	i40e_set_fec_in_flags(fec_cfg, &flags);
+
+	/* Get the current phy config */
+	memset(&abilities, 0, sizeof(abilities));
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	if (abilities.fec_cfg_curr_mod_ext_info != fec_cfg) {
+		struct i40e_aq_set_phy_config config;
+
+		memset(&config, 0, sizeof(config));
+		config.phy_type = abilities.phy_type;
+		config.abilities = abilities.abilities;
+		config.phy_type_ext = abilities.phy_type_ext;
+		config.link_speed = abilities.link_speed;
+		config.eee_capability = abilities.eee_capability;
+		config.eeer = abilities.eeer_val;
+		config.low_power_ctrl = abilities.d3_lpan;
+		config.fec_config = fec_cfg & I40E_AQ_PHY_FEC_CONFIG_MASK;
+		status = i40e_aq_set_phy_config(hw, &config, NULL);
+		if (status) {
+			netdev_info(netdev,
+				    "Set phy config failed, err %s aq_err %s\n",
+				    i40e_stat_str(hw, status),
+				    i40e_aq_str(hw, hw->aq.asq_last_status));
+			err = -EAGAIN;
+			goto done;
+		}
+		pf->flags = flags;
+		status = i40e_update_link_info(hw);
+		if (status)
+			/* debug level message only due to relation to the link
+			 * itself rather than to the FEC settings
+			 * (e.g. no physical connection etc.)
+			 */
+			netdev_dbg(netdev,
+				   "Updating link info failed with err %s aq_err %s\n",
+				   i40e_stat_str(hw, status),
+				   i40e_aq_str(hw, hw->aq.asq_last_status));
+	}
+
+done:
+	return err;
+}
+
+static int i40e_get_fec_param(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status status = 0;
+	int err = 0;
+
+	/* Get the current phy config */
+	memset(&abilities, 0, sizeof(abilities));
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	fecparam->fec = 0;
+	if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if ((abilities.fec_cfg_curr_mod_ext_info &
+	     I40E_AQ_SET_FEC_REQUEST_RS) ||
+	    (abilities.fec_cfg_curr_mod_ext_info &
+	     I40E_AQ_SET_FEC_ABILITY_RS))
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if ((abilities.fec_cfg_curr_mod_ext_info &
+	     I40E_AQ_SET_FEC_REQUEST_KR) ||
+	    (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR))
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (abilities.fec_cfg_curr_mod_ext_info == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+	if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA)
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+	else if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA)
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+	else
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+done:
+	return err;
+}
+
+static int i40e_set_fec_param(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u8 fec_cfg = 0;
+	int err = 0;
+
+	if (hw->device_id != I40E_DEV_ID_25G_SFP28 &&
+	    hw->device_id != I40E_DEV_ID_25G_B) {
+		err = -EPERM;
+		goto done;
+	}
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec_cfg = I40E_AQ_SET_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
+			     I40E_AQ_SET_FEC_ABILITY_RS);
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
+			     I40E_AQ_SET_FEC_ABILITY_KR);
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec_cfg = 0;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unsupported FEC mode: %d",
+			 fecparam->fec);
+		err = -EINVAL;
+		goto done;
+	}
+
+	err = i40e_set_fec_cfg(netdev, fec_cfg);
+
+done:
+	return err;
+}
+
 static int i40e_nway_reset(struct net_device *netdev)
 {
 	/* restart autonegotiation */
@@ -1136,6 +1591,7 @@
 	i40e_status status;
 	u8 aq_failures;
 	int err = 0;
+	u32 is_an;
 
 	/* Changing the port's flow control is not supported if this isn't the
 	 * port's controlling PF
@@ -1148,15 +1604,14 @@
 	if (vsi != pf->vsi[pf->lan_vsi])
 		return -EOPNOTSUPP;
 
-	if (pause->autoneg != ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
-	    AUTONEG_ENABLE : AUTONEG_DISABLE)) {
+	is_an = hw_link_info->an_info & I40E_AQ_AN_COMPLETED;
+	if (pause->autoneg != is_an) {
 		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
 		return -EOPNOTSUPP;
 	}
 
 	/* If we have link and don't have autoneg */
-	if (!test_bit(__I40E_DOWN, pf->state) &&
-	    !(hw_link_info->an_info & I40E_AQ_AN_COMPLETED)) {
+	if (!test_bit(__I40E_DOWN, pf->state) && !is_an) {
 		/* Send message that it might not necessarily work*/
 		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
 	}
@@ -1176,7 +1631,7 @@
 	else if (!pause->rx_pause && !pause->tx_pause)
 		hw->fc.requested_mode = I40E_FC_NONE;
 	else
-		 return -EINVAL;
+		return -EINVAL;
 
 	/* Tell the OS link is going down, the link will go back up when fw
 	 * says it is ready asynchronously
@@ -1207,7 +1662,7 @@
 		err = -EAGAIN;
 	}
 
-	if (!test_bit(__I40E_DOWN, pf->state)) {
+	if (!test_bit(__I40E_DOWN, pf->state) && is_an) {
 		/* Give it a little more time to try to come back */
 		msleep(75);
 		if (!test_bit(__I40E_DOWN, pf->state))
@@ -1512,6 +1967,13 @@
 	    (new_rx_count == vsi->rx_rings[0]->count))
 		return 0;
 
+	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (i40e_xsk_any_rx_ring_enabled(vsi))
+		return -EBUSY;
+
 	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout)
@@ -1527,6 +1989,8 @@
 			if (i40e_enabled_xdp_vsi(vsi))
 				vsi->xdp_rings[i]->count = new_tx_count;
 		}
+		vsi->num_tx_desc = new_tx_count;
+		vsi->num_rx_desc = new_rx_count;
 		goto done;
 	}
 
@@ -1663,6 +2127,8 @@
 		rx_rings = NULL;
 	}
 
+	vsi->num_tx_desc = new_tx_count;
+	vsi->num_rx_desc = new_rx_count;
 	i40e_up(vsi);
 
 free_tx:
@@ -1701,11 +2167,30 @@
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	int stats_len;
 
 	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1)
-		return I40E_PF_STATS_LEN(netdev);
+		stats_len = I40E_PF_STATS_LEN;
 	else
-		return I40E_VSI_STATS_LEN(netdev);
+		stats_len = I40E_VSI_STATS_LEN;
+
+	/* The number of stats reported for a given net_device must remain
+	 * constant throughout the life of that device.
+	 *
+	 * This is because the API for obtaining the size, strings, and stats
+	 * is spread out over three separate ethtool ioctls. There is no safe
+	 * way to lock the number of stats across these calls, so we must
+	 * assume that they will never change.
+	 *
+	 * Due to this, we report the maximum number of queues, even if not
+	 * every queue is currently configured. Since we always allocate
+	 * queues in pairs, we'll just use netdev->num_tx_queues * 2. This
+	 * works because the num_tx_queues is set at device creation and never
+	 * changes.
+	 */
+	stats_len += I40E_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues;
+
+	return stats_len;
 }
 
 static int i40e_get_sset_count(struct net_device *netdev, int sset)
@@ -1728,89 +2213,6 @@
 }
 
 /**
- * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer
- * @data: location to store the stat value
- * @pointer: basis for where to copy from
- * @stat: the stat definition
- *
- * Copies the stat data defined by the pointer and stat structure pair into
- * the memory supplied as data. Used to implement i40e_add_ethtool_stats.
- * If the pointer is null, data will be zero'd.
- */
-static inline void
-i40e_add_one_ethtool_stat(u64 *data, void *pointer,
-			  const struct i40e_stats *stat)
-{
-	char *p;
-
-	if (!pointer) {
-		/* ensure that the ethtool data buffer is zero'd for any stats
-		 * which don't have a valid pointer.
-		 */
-		*data = 0;
-		return;
-	}
-
-	p = (char *)pointer + stat->stat_offset;
-	switch (stat->sizeof_stat) {
-	case sizeof(u64):
-		*data = *((u64 *)p);
-		break;
-	case sizeof(u32):
-		*data = *((u32 *)p);
-		break;
-	case sizeof(u16):
-		*data = *((u16 *)p);
-		break;
-	case sizeof(u8):
-		*data = *((u8 *)p);
-		break;
-	default:
-		WARN_ONCE(1, "unexpected stat size for %s",
-			  stat->stat_string);
-		*data = 0;
-	}
-}
-
-/**
- * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer
- * @data: ethtool stats buffer
- * @pointer: location to copy stats from
- * @stats: array of stats to copy
- * @size: the size of the stats definition
- *
- * Copy the stats defined by the stats array using the pointer as a base into
- * the data buffer supplied by ethtool. Updates the data pointer to point to
- * the next empty location for successive calls to __i40e_add_ethtool_stats.
- * If pointer is null, set the data values to zero and update the pointer to
- * skip these stats.
- **/
-static inline void
-__i40e_add_ethtool_stats(u64 **data, void *pointer,
-			 const struct i40e_stats stats[],
-			 const unsigned int size)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++)
-		i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
-}
-
-/**
- * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
- * @data: ethtool stats buffer
- * @pointer: location where stats are stored
- * @stats: static const array of stat definitions
- *
- * Macro to ease the use of __i40e_add_ethtool_stats by taking a static
- * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
- * ensuring that we pass the size associated with the given stats array.
- * Assumes that stats is an array.
- **/
-#define i40e_add_ethtool_stats(data, pointer, stats) \
-	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
-
-/**
  * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure
  * @pf: the PF device structure
  * @i: the priority value to copy
@@ -1853,12 +2255,10 @@
 				   struct ethtool_stats *stats, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	struct i40e_veb *veb = pf->veb[pf->lan_veb];
+	struct i40e_veb *veb = NULL;
 	unsigned int i;
-	unsigned int start;
 	bool veb_stats;
 	u64 *p = data;
 
@@ -1870,44 +2270,24 @@
 	i40e_add_ethtool_stats(&data, vsi, i40e_gstrings_misc_stats);
 
 	rcu_read_lock();
-	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev) ; i++) {
-		tx_ring = READ_ONCE(vsi->tx_rings[i]);
-
-		if (!tx_ring) {
-			/* Bump the stat counter to skip these stats, and make
-			 * sure the memory is zero'd
-			 */
-			*(data++) = 0;
-			*(data++) = 0;
-			*(data++) = 0;
-			*(data++) = 0;
-			continue;
-		}
-
-		/* process Tx ring statistics */
-		do {
-			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
-			data[0] = tx_ring->stats.packets;
-			data[1] = tx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
-		data += 2;
-
-		/* Rx ring is the 2nd half of the queue pair */
-		rx_ring = &tx_ring[1];
-		do {
-			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			data[0] = rx_ring->stats.packets;
-			data[1] = rx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
-		data += 2;
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->tx_rings[i]));
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->rx_rings[i]));
 	}
 	rcu_read_unlock();
+
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
 		goto check_data_pointer;
 
 	veb_stats = ((pf->lan_veb != I40E_NO_VEB) &&
+		     (pf->lan_veb < I40E_MAX_VEB) &&
 		     (pf->flags & I40E_FLAG_VEB_STATS_ENABLED));
 
+	if (veb_stats) {
+		veb = pf->veb[pf->lan_veb];
+		i40e_update_veb_stats(veb);
+	}
+
 	/* If veb stats aren't enabled, pass NULL instead of the veb so that
 	 * we initialize stats to zero and update the data pointer
 	 * intelligently
@@ -1933,42 +2313,6 @@
 }
 
 /**
- * __i40e_add_stat_strings - copy stat strings into ethtool buffer
- * @p: ethtool supplied buffer
- * @stats: stat definitions array
- * @size: size of the stats array
- *
- * Format and copy the strings described by stats into the buffer pointed at
- * by p.
- **/
-static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
-				    const unsigned int size, ...)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++) {
-		va_list args;
-
-		va_start(args, size);
-		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
-		*p += ETH_GSTRING_LEN;
-		va_end(args);
-	}
-}
-
-/**
- * 40e_add_stat_strings - copy stat strings into ethtool buffer
- * @p: ethtool supplied buffer
- * @stats: stat definitions array
- *
- * Format and copy the strings described by the const static stats value into
- * the buffer pointed at by p. Assumes that stats can have ARRAY_SIZE called
- * for it.
- **/
-#define i40e_add_stat_strings(p, stats, ...) \
-	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
-
-/**
  * i40e_get_stat_strings - copy stat strings into supplied buffer
  * @netdev: the netdev to collect strings for
  * @data: supplied buffer to copy strings into
@@ -1990,18 +2334,15 @@
 
 	i40e_add_stat_strings(&data, i40e_gstrings_misc_stats);
 
-	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
-		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
-		data += ETH_GSTRING_LEN;
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "tx", i);
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "rx", i);
 	}
+
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
-		return;
+		goto check_data_pointer;
 
 	i40e_add_stat_strings(&data, i40e_gstrings_veb_stats);
 
@@ -2013,6 +2354,7 @@
 	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
 		i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i);
 
+check_data_pointer:
 	WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
 		  "stat strings count mismatch!");
 }
@@ -2099,7 +2441,7 @@
 	return 0;
 }
 
-static int i40e_link_test(struct net_device *netdev, u64 *data)
+static u64 i40e_link_test(struct net_device *netdev, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2122,7 +2464,7 @@
 	return *data;
 }
 
-static int i40e_reg_test(struct net_device *netdev, u64 *data)
+static u64 i40e_reg_test(struct net_device *netdev, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2133,7 +2475,7 @@
 	return *data;
 }
 
-static int i40e_eeprom_test(struct net_device *netdev, u64 *data)
+static u64 i40e_eeprom_test(struct net_device *netdev, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2147,7 +2489,7 @@
 	return *data;
 }
 
-static int i40e_intr_test(struct net_device *netdev, u64 *data)
+static u64 i40e_intr_test(struct net_device *netdev, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2302,7 +2644,7 @@
 		return -EOPNOTSUPP;
 
 	/* only magic packet is supported */
-	if (wol->wolopts && (wol->wolopts != WAKE_MAGIC))
+	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EOPNOTSUPP;
 
 	/* is this a new value? */
@@ -2363,10 +2705,10 @@
 	default:
 		break;
 	}
-		if (ret)
-			return -ENOENT;
-		else
-			return 0;
+	if (ret)
+		return -ENOENT;
+	else
+		return 0;
 }
 
 /* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt
@@ -4528,9 +4870,12 @@
 static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
+	u64 orig_flags, new_flags, changed_flags;
+	enum i40e_admin_queue_err adq_err;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u64 orig_flags, new_flags, changed_flags;
+	bool is_reset_needed;
+	i40e_status status;
 	u32 i, j;
 
 	orig_flags = READ_ONCE(pf->flags);
@@ -4574,6 +4919,10 @@
 flags_complete:
 	changed_flags = orig_flags ^ new_flags;
 
+	is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+		I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED |
+		I40E_FLAG_DISABLE_FW_LLDP));
+
 	/* Before we finalize any flag changes, we need to perform some
 	 * checks to ensure that the changes are supported and safe.
 	 */
@@ -4584,26 +4933,29 @@
 		return -EOPNOTSUPP;
 
 	/* If the driver detected FW LLDP was disabled on init, this flag could
-	 * be set, however we do not support _changing_ the flag if NPAR is
-	 * enabled or FW API version < 1.7.  There are situations where older
-	 * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
-	 * not allow the user to enable/disable LLDP with this flag on
-	 * unsupported FW versions.
+	 * be set, however we do not support _changing_ the flag:
+	 * - on XL710 if NPAR is enabled or FW API version < 1.7
+	 * - on X722 with FW API version < 1.6
+	 * There are situations where older FW versions/NPAR enabled PFs could
+	 * disable LLDP, however we _must_ not allow the user to enable/disable
+	 * LLDP with this flag on unsupported FW versions.
 	 */
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
+		if (!(pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) {
 			dev_warn(&pf->pdev->dev,
 				 "Device does not support changing FW LLDP\n");
 			return -EOPNOTSUPP;
 		}
 	}
 
-	/* Now that we've checked to ensure that the new flags are valid, load
-	 * them into place. Since we only modify flags either (a) during
-	 * initialization or (b) while holding the RTNL lock, we don't need
-	 * anything fancy here.
-	 */
-	pf->flags = new_flags;
+	if (((changed_flags & I40E_FLAG_RS_FEC) ||
+	     (changed_flags & I40E_FLAG_BASE_R_FEC)) &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_B) {
+		dev_warn(&pf->pdev->dev,
+			 "Device does not support changing FEC configuration\n");
+		return -EOPNOTSUPP;
+	}
 
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
@@ -4612,7 +4964,7 @@
 
 	/* Flush current ATR settings if ATR was disabled */
 	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
-	    !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) {
+	    !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) {
 		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
@@ -4621,7 +4973,7 @@
 		u16 sw_flags = 0, valid_flags = 0;
 		int ret;
 
-		if (!(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+		if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
 			sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
@@ -4636,17 +4988,35 @@
 		}
 	}
 
-	if ((changed_flags & pf->flags &
+	if ((changed_flags & I40E_FLAG_RS_FEC) ||
+	    (changed_flags & I40E_FLAG_BASE_R_FEC)) {
+		u8 fec_cfg = 0;
+
+		if (new_flags & I40E_FLAG_RS_FEC &&
+		    new_flags & I40E_FLAG_BASE_R_FEC) {
+			fec_cfg = I40E_AQ_SET_FEC_AUTO;
+		} else if (new_flags & I40E_FLAG_RS_FEC) {
+			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
+				   I40E_AQ_SET_FEC_ABILITY_RS);
+		} else if (new_flags & I40E_FLAG_BASE_R_FEC) {
+			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
+				   I40E_AQ_SET_FEC_ABILITY_KR);
+		}
+		if (i40e_set_fec_cfg(dev, fec_cfg))
+			dev_warn(&pf->pdev->dev, "Cannot change FEC config\n");
+	}
+
+	if ((changed_flags & new_flags &
 	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
-	    (pf->flags & I40E_FLAG_MFP_ENABLED))
+	    (new_flags & I40E_FLAG_MFP_ENABLED))
 		dev_warn(&pf->pdev->dev,
 			 "Turning on link-down-on-close flag may affect other partitions\n");
 
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
 			struct i40e_dcbx_config *dcbcfg;
 
-			i40e_aq_stop_lldp(&pf->hw, true, NULL);
+			i40e_aq_stop_lldp(&pf->hw, true, false, NULL);
 			i40e_aq_set_dcb_parameters(&pf->hw, true, NULL);
 			/* reset local_dcbx_config to default */
 			dcbcfg = &pf->hw.local_dcbx_config;
@@ -4661,17 +5031,43 @@
 			dcbcfg->pfc.willing = 1;
 			dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
 		} else {
-			i40e_aq_start_lldp(&pf->hw, NULL);
+			status = i40e_aq_start_lldp(&pf->hw, false, NULL);
+			if (status) {
+				adq_err = pf->hw.aq.asq_last_status;
+				switch (adq_err) {
+				case I40E_AQ_RC_EEXIST:
+					dev_warn(&pf->pdev->dev,
+						 "FW LLDP agent is already running\n");
+					is_reset_needed = false;
+					break;
+				case I40E_AQ_RC_EPERM:
+					dev_warn(&pf->pdev->dev,
+						 "Device configuration forbids SW from starting the LLDP agent.\n");
+					return -EINVAL;
+				default:
+					dev_warn(&pf->pdev->dev,
+						 "Starting FW LLDP agent failed: error: %s, %s\n",
+						 i40e_stat_str(&pf->hw,
+							       status),
+						 i40e_aq_str(&pf->hw,
+							     adq_err));
+					return -EINVAL;
+				}
+			}
 		}
 	}
 
+	/* Now that we've checked to ensure that the new flags are valid, load
+	 * them into place. Since we only modify flags either (a) during
+	 * initialization or (b) while holding the RTNL lock, we don't need
+	 * anything fancy here.
+	 */
+	pf->flags = new_flags;
+
 	/* Issue reset to cause things to take effect, as additional bits
 	 * are added we will need to create a mask of bits requiring reset
 	 */
-	if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
-			     I40E_FLAG_LEGACY_RX |
-			     I40E_FLAG_SOURCE_PRUNING_DISABLED |
-			     I40E_FLAG_DISABLE_FW_LLDP))
+	if (is_reset_needed)
 		i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
 
 	return 0;
@@ -4741,6 +5137,12 @@
 			/* Module is not SFF-8472 compliant */
 			modinfo->type = ETH_MODULE_SFF_8079;
 			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (!(sff8472_swap & I40E_MODULE_SFF_DDM_IMPLEMENTED)) {
+			/* Module is SFF-8472 compliant but doesn't implement
+			 * Digital Diagnostic Monitoring (DDM).
+			 */
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
 		} else {
 			modinfo->type = ETH_MODULE_SFF_8472;
 			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
@@ -4829,6 +5231,22 @@
 	return 0;
 }
 
+static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	return -EOPNOTSUPP;
+}
+
+static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
+	.set_eeprom		= i40e_set_eeprom,
+	.get_eeprom_len		= i40e_get_eeprom_len,
+	.get_eeprom		= i40e_get_eeprom,
+};
+
 static const struct ethtool_ops i40e_ethtool_ops = {
 	.get_drvinfo		= i40e_get_drvinfo,
 	.get_regs_len		= i40e_get_regs_len,
@@ -4850,6 +5268,8 @@
 	.set_rxnfc		= i40e_set_rxnfc,
 	.self_test		= i40e_diag_test,
 	.get_strings		= i40e_get_strings,
+	.get_eee		= i40e_get_eee,
+	.set_eee		= i40e_set_eee,
 	.set_phys_id		= i40e_set_phys_id,
 	.get_sset_count		= i40e_get_sset_count,
 	.get_ethtool_stats	= i40e_get_ethtool_stats,
@@ -4870,9 +5290,18 @@
 	.set_per_queue_coalesce	= i40e_set_per_queue_coalesce,
 	.get_link_ksettings	= i40e_get_link_ksettings,
 	.set_link_ksettings	= i40e_set_link_ksettings,
+	.get_fecparam = i40e_get_fec_param,
+	.set_fecparam = i40e_set_fec_param,
+	.flash_device = i40e_ddp_flash,
 };
 
 void i40e_set_ethtool_ops(struct net_device *netdev)
 {
-	netdev->ethtool_ops = &i40e_ethtool_ops;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf		*pf = np->vsi->back;
+
+	if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
+		netdev->ethtool_ops = &i40e_ethtool_ops;
+	else
+		netdev->ethtool_ops = &i40e_ethtool_recovery_mode_ops;
 }

diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index 19ce93d..163ee8c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c

@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include "i40e.h"
 #include "i40e_osdep.h"
 #include "i40e_register.h"
 #include "i40e_status.h"

diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index 994011c..be24d42 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c

@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include "i40e.h"
 #include "i40e_osdep.h"
 #include "i40e_register.h"
 #include "i40e_type.h"
@@ -963,7 +964,7 @@
 
 /**
  * i40e_hmc_get_object_va - retrieves an object's virtual address
- * @hmc_info: pointer to i40e_hmc_info struct
+ * @hw: the hardware struct, from which we obtain the i40e_hmc_info pointer
  * @object_base: pointer to u64 to get the va
  * @rsrc_type: the hmc resource type
  * @obj_idx: hmc object index
@@ -972,16 +973,16 @@
  * base pointer.  This function is used for LAN Queue contexts.
  **/
 static
-i40e_status i40e_hmc_get_object_va(struct i40e_hmc_info *hmc_info,
-					u8 **object_base,
-					enum i40e_hmc_lan_rsrc_type rsrc_type,
-					u32 obj_idx)
+i40e_status i40e_hmc_get_object_va(struct i40e_hw *hw, u8 **object_base,
+				   enum i40e_hmc_lan_rsrc_type rsrc_type,
+				   u32 obj_idx)
 {
+	struct i40e_hmc_info *hmc_info = &hw->hmc;
 	u32 obj_offset_in_sd, obj_offset_in_pd;
-	i40e_status ret_code = 0;
 	struct i40e_hmc_sd_entry *sd_entry;
 	struct i40e_hmc_pd_entry *pd_entry;
 	u32 pd_idx, pd_lmt, rel_pd_idx;
+	i40e_status ret_code = 0;
 	u64 obj_offset_in_fpm;
 	u32 sd_idx, sd_lmt;
 
@@ -1047,7 +1048,7 @@
 	i40e_status err;
 	u8 *context_bytes;
 
-	err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
 				     I40E_HMC_LAN_TX, queue);
 	if (err < 0)
 		return err;
@@ -1068,7 +1069,7 @@
 	i40e_status err;
 	u8 *context_bytes;
 
-	err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
 				     I40E_HMC_LAN_TX, queue);
 	if (err < 0)
 		return err;
@@ -1088,7 +1089,7 @@
 	i40e_status err;
 	u8 *context_bytes;
 
-	err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
 				     I40E_HMC_LAN_RX, queue);
 	if (err < 0)
 		return err;
@@ -1109,7 +1110,7 @@
 	i40e_status err;
 	u8 *context_bytes;
 
-	err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
 				     I40E_HMC_LAN_RX, queue);
 	if (err < 0)
 		return err;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3c34270..6031223 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -9,7 +9,9 @@
 /* Local includes */
 #include "i40e.h"
 #include "i40e_diag.h"
+#include "i40e_xsk.h"
 #include <net/udp_tunnel.h>
+#include <net/xdp_sock.h>
 /* All i40e tracepoints are defined by the include below, which
  * must be included exactly once across the whole kernel with
  * CREATE_TRACE_POINTS defined
@@ -24,13 +26,13 @@
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 2
-#define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 2
+#define DRV_VERSION_MINOR 8
+#define DRV_VERSION_BUILD 20
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
 const char i40e_driver_version_str[] = DRV_VERSION;
-static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation.";
+static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
 
 /* a bit of forward declarations */
 static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
@@ -44,6 +46,10 @@
 static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired);
 static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
+static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf);
+static int i40e_restore_interrupt_scheme(struct i40e_pf *pf);
+static bool i40e_check_recovery_mode(struct i40e_pf *pf);
+static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 static int i40e_get_capabilities(struct i40e_pf *pf,
@@ -67,6 +73,9 @@
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
@@ -75,6 +84,8 @@
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0},
 	/* required last entry */
@@ -89,7 +100,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 static struct workqueue_struct *i40e_wq;
@@ -107,8 +118,8 @@
 	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
 
 	mem->size = ALIGN(size, alignment);
-	mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size,
-				      &mem->pa, GFP_KERNEL);
+	mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
+				     GFP_KERNEL);
 	if (!mem->va)
 		return -ENOMEM;
 
@@ -276,8 +287,9 @@
  **/
 void i40e_service_event_schedule(struct i40e_pf *pf)
 {
-	if (!test_bit(__I40E_DOWN, pf->state) &&
-	    !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+	if ((!test_bit(__I40E_DOWN, pf->state) &&
+	     !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) ||
+	      test_bit(__I40E_RECOVERY_MODE, pf->state))
 		queue_work(i40e_wq, &pf->service_task);
 }
 
@@ -336,6 +348,10 @@
 		      (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
 		return;   /* don't do any new action before the next timeout */
 
+	/* don't kick off another recovery if one is already pending */
+	if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state))
+		return;
+
 	if (tx_ring) {
 		head = i40e_get_head(tx_ring);
 		/* Read interrupt register */
@@ -420,9 +436,9 @@
 				  struct rtnl_link_stats64 *stats)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
+	struct i40e_ring *ring;
 	int i;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
@@ -436,24 +452,26 @@
 		u64 bytes, packets;
 		unsigned int start;
 
-		tx_ring = READ_ONCE(vsi->tx_rings[i]);
-		if (!tx_ring)
+		ring = READ_ONCE(vsi->tx_rings[i]);
+		if (!ring)
 			continue;
-		i40e_get_netdev_stats_struct_tx(tx_ring, stats);
+		i40e_get_netdev_stats_struct_tx(ring, stats);
 
-		rx_ring = &tx_ring[1];
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			ring++;
+			i40e_get_netdev_stats_struct_tx(ring, stats);
+		}
 
+		ring++;
 		do {
-			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			packets = rx_ring->stats.packets;
-			bytes   = rx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
+			start   = u64_stats_fetch_begin_irq(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		stats->rx_packets += packets;
 		stats->rx_bytes   += bytes;
 
-		if (i40e_enabled_xdp_vsi(vsi))
-			i40e_get_netdev_stats_struct_tx(&rx_ring[1], stats);
 	}
 	rcu_read_unlock();
 
@@ -517,6 +535,10 @@
 			       sizeof(pf->veb[i]->stats));
 			memset(&pf->veb[i]->stats_offsets, 0,
 			       sizeof(pf->veb[i]->stats_offsets));
+			memset(&pf->veb[i]->tc_stats, 0,
+			       sizeof(pf->veb[i]->tc_stats));
+			memset(&pf->veb[i]->tc_stats_offsets, 0,
+			       sizeof(pf->veb[i]->tc_stats_offsets));
 			pf->veb[i]->stat_offsets_loaded = false;
 		}
 	}
@@ -619,9 +641,6 @@
 	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
 			   vsi->stat_offsets_loaded,
 			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
-	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
-			   vsi->stat_offsets_loaded,
-			   &oes->tx_errors, &es->tx_errors);
 
 	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
 			   I40E_GLV_GORCL(stat_idx),
@@ -663,7 +682,7 @@
  * i40e_update_veb_stats - Update Switch component statistics
  * @veb: the VEB being updated
  **/
-static void i40e_update_veb_stats(struct i40e_veb *veb)
+void i40e_update_veb_stats(struct i40e_veb *veb)
 {
 	struct i40e_pf *pf = veb->pf;
 	struct i40e_hw *hw = &pf->hw;
@@ -1489,8 +1508,7 @@
 	bool found = false;
 	int bkt;
 
-	WARN(!spin_is_locked(&vsi->mac_filter_hash_lock),
-	     "Missing mac_filter_hash_lock\n");
+	lockdep_assert_held(&vsi->mac_filter_hash_lock);
 	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
 		if (ether_addr_equal(macaddr, f->macaddr)) {
 			__i40e_del_filter(vsi, f);
@@ -1528,8 +1546,8 @@
 		return 0;
 	}
 
-	if (test_bit(__I40E_VSI_DOWN, vsi->back->state) ||
-	    test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state))
+	if (test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
 		return -EADDRNOTAVAIL;
 
 	if (ether_addr_equal(hw->mac.addr, addr->sa_data))
@@ -1539,22 +1557,21 @@
 		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
 	/* Copy the address first, so that we avoid a possible race with
-	 * .set_rx_mode(). If we copy after changing the address in the filter
-	 * list, we might open ourselves to a narrow race window where
-	 * .set_rx_mode could delete our dev_addr filter and prevent traffic
-	 * from passing.
+	 * .set_rx_mode().
+	 * - Remove old address from MAC filter
+	 * - Copy new address
+	 * - Add new address to MAC filter
 	 */
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
-
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	i40e_del_mac_filter(vsi, netdev->dev_addr);
-	i40e_add_mac_filter(vsi, addr->sa_data);
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+	i40e_add_mac_filter(vsi, netdev->dev_addr);
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
 	if (vsi->type == I40E_VSI_MAIN) {
 		i40e_status ret;
 
-		ret = i40e_aq_mac_address_write(&vsi->back->hw,
-						I40E_AQC_WRITE_TYPE_LAA_WOL,
+		ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
 						addr->sa_data, NULL);
 		if (ret)
 			netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n",
@@ -1565,7 +1582,7 @@
 	/* schedule our worker thread which will take care of
 	 * applying the new filter changes
 	 */
-	i40e_service_event_schedule(vsi->back);
+	i40e_service_event_schedule(pf);
 	return 0;
 }
 
@@ -2101,11 +2118,22 @@
 	fcnt = i40e_update_filter_state(num_add, list, add_head);
 
 	if (fcnt != num_add) {
-		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
-		dev_warn(&vsi->back->pdev->dev,
-			 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-			 i40e_aq_str(hw, aq_err),
-			 vsi_name);
+		if (vsi->type == I40E_VSI_MAIN) {
+			set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+				 i40e_aq_str(hw, aq_err), vsi_name);
+		} else if (vsi->type == I40E_VSI_SRIOV ||
+			   vsi->type == I40E_VSI_VMDQ1 ||
+			   vsi->type == I40E_VSI_VMDQ2) {
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
+				 i40e_aq_str(hw, aq_err), vsi_name, vsi_name);
+		} else {
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
+				 i40e_aq_str(hw, aq_err), vsi_name, vsi->type);
+		}
 	}
 }
 
@@ -2507,6 +2535,10 @@
 				 vsi_name,
 				 i40e_stat_str(hw, aq_ret),
 				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		} else {
+			dev_info(&pf->pdev->dev, "%s is %s allmulti mode.\n",
+				 vsi->netdev->name,
+				 cur_multipromisc ? "entering" : "leaving");
 		}
 	}
 
@@ -2560,6 +2592,10 @@
 		return;
 	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
 		return;
+	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) {
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
+		return;
+	}
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
@@ -2574,6 +2610,7 @@
 			}
 		}
 	}
+	clear_bit(__I40E_VF_DISABLE, pf->state);
 }
 
 /**
@@ -2648,6 +2685,10 @@
 	struct i40e_vsi_context ctxt;
 	i40e_status ret;
 
+	/* Don't modify stripping options if a port VLAN is active */
+	if (vsi->info.pvid)
+		return;
+
 	if ((vsi->info.valid_sections &
 	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
 	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
@@ -2678,6 +2719,10 @@
 	struct i40e_vsi_context ctxt;
 	i40e_status ret;
 
+	/* Don't modify stripping options if a port VLAN is active */
+	if (vsi->info.pvid)
+		return;
+
 	if ((vsi->info.valid_sections &
 	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
 	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
@@ -2943,9 +2988,9 @@
  **/
 void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
 {
-	i40e_vlan_stripping_disable(vsi);
-
 	vsi->info.pvid = 0;
+
+	i40e_vlan_stripping_disable(vsi);
 }
 
 /**
@@ -3058,6 +3103,26 @@
 }
 
 /**
+ * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled
+ * @ring: The Tx or Rx ring
+ *
+ * Returns the UMEM or NULL.
+ **/
+static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+{
+	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
+	int qid = ring->queue_index;
+
+	if (ring_is_xdp(ring))
+		qid -= ring->vsi->alloc_queue_pairs;
+
+	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
+		return NULL;
+
+	return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
+}
+
+/**
  * i40e_configure_tx_ring - Configure a transmit ring context and rest
  * @ring: The Tx ring to configure
  *
@@ -3072,6 +3137,9 @@
 	i40e_status err = 0;
 	u32 qtx_ctl = 0;
 
+	if (ring_is_xdp(ring))
+		ring->xsk_umem = i40e_xsk_umem(ring);
+
 	/* some ATR related tx ring init */
 	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
 		ring->atr_sample_rate = vsi->back->atr_sample_rate;
@@ -3181,13 +3249,46 @@
 	struct i40e_hw *hw = &vsi->back->hw;
 	struct i40e_hmc_obj_rxq rx_ctx;
 	i40e_status err = 0;
+	bool ok;
+	int ret;
 
 	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
 
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	ring->rx_buf_len = vsi->rx_buf_len;
+	if (ring->vsi->type == I40E_VSI_MAIN)
+		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+	ring->xsk_umem = i40e_xsk_umem(ring);
+	if (ring->xsk_umem) {
+		ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+				   XDP_PACKET_HEADROOM;
+		/* For AF_XDP ZC, we disallow packets to span on
+		 * multiple buffers, thus letting us skip that
+		 * handling in the fast-path.
+		 */
+		chain_len = 1;
+		ring->zca.free = i40e_zca_free;
+		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						 MEM_TYPE_ZERO_COPY,
+						 &ring->zca);
+		if (ret)
+			return ret;
+		dev_info(&vsi->back->pdev->dev,
+			 "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+			 ring->queue_index);
+
+	} else {
+		ring->rx_buf_len = vsi->rx_buf_len;
+		if (ring->vsi->type == I40E_VSI_MAIN) {
+			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (ret)
+				return ret;
+		}
+	}
 
 	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
 				    BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3243,7 +3344,18 @@
 	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
-	i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+	ok = ring->xsk_umem ?
+	     i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
+	     !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+	if (!ok) {
+		/* Log this in case the user has forgotten to give the kernel
+		 * any buffers, even later in the application.
+		 */
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
+			 ring->xsk_umem ? "UMEM enabled " : "",
+			 ring->queue_index, pf_q);
+	}
 
 	return 0;
 }
@@ -3262,7 +3374,7 @@
 	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
 		err = i40e_configure_tx_ring(vsi->tx_rings[i]);
 
-	if (!i40e_enabled_xdp_vsi(vsi))
+	if (err || !i40e_enabled_xdp_vsi(vsi))
 		return err;
 
 	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
@@ -3560,7 +3672,7 @@
 		      (I40E_QUEUE_TYPE_TX
 		       << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
 
-	       wr32(hw, I40E_QINT_TQCTL(nextqp), val);
+		wr32(hw, I40E_QINT_TQCTL(nextqp), val);
 	}
 
 	val = I40E_QINT_TQCTL_CAUSE_ENA_MASK		      |
@@ -3927,7 +4039,8 @@
 enable_intr:
 	/* re-enable interrupt causes */
 	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
-	if (!test_bit(__I40E_DOWN, pf->state)) {
+	if (!test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_RECOVERY_MODE, pf->state)) {
 		i40e_service_event_schedule(pf);
 		i40e_irq_dynamic_enable_icr0(pf);
 	}
@@ -5762,8 +5875,10 @@
 		return -ENOENT;
 	}
 
-	/* Success, update channel */
-	ch->enabled_tc = enabled_tc;
+	/* Success, update channel, set enabled_tc only if the channel
+	 * is not a macvlan
+	 */
+	ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
 	ch->seid = ctxt.seid;
 	ch->vsi_number = ctxt.vsi_number;
 	ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
@@ -6326,11 +6441,13 @@
 	 * Also do not enable DCBx if FW LLDP agent is disabled
 	 */
 	if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) ||
-	    (pf->flags & I40E_FLAG_DISABLE_FW_LLDP))
+	    (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) {
+		dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n");
+		err = I40E_NOT_SUPPORTED;
 		goto out;
+	}
 
-	/* Get the initial DCB configuration */
-	err = i40e_init_dcb(hw);
+	err = i40e_init_dcb(hw, true);
 	if (!err) {
 		/* Device/Function is not DCBX capable */
 		if ((!hw->func_caps.dcb) ||
@@ -6384,7 +6501,10 @@
 	char *req_fec = "";
 	char *an = "";
 
-	new_speed = pf->hw.phy.link_info.link_speed;
+	if (isup)
+		new_speed = pf->hw.phy.link_info.link_speed;
+	else
+		new_speed = I40E_LINK_SPEED_UNKNOWN;
 
 	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
 		return;
@@ -6417,6 +6537,12 @@
 	case I40E_LINK_SPEED_10GB:
 		speed = "10 G";
 		break;
+	case I40E_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case I40E_LINK_SPEED_2_5GB:
+		speed = "2.5 G";
+		break;
 	case I40E_LINK_SPEED_1GB:
 		speed = "1000 M";
 		break;
@@ -6443,19 +6569,19 @@
 	}
 
 	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
-		req_fec = ", Requested FEC: None";
-		fec = ", FEC: None";
-		an = ", Autoneg: False";
+		req_fec = "None";
+		fec = "None";
+		an = "False";
 
 		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
-			an = ", Autoneg: True";
+			an = "True";
 
 		if (pf->hw.phy.link_info.fec_info &
 		    I40E_AQ_CONFIG_FEC_KR_ENA)
-			fec = ", FEC: CL74 FC-FEC/BASE-R";
+			fec = "CL74 FC-FEC/BASE-R";
 		else if (pf->hw.phy.link_info.fec_info &
 			 I40E_AQ_CONFIG_FEC_RS_ENA)
-			fec = ", FEC: CL108 RS-FEC";
+			fec = "CL108 RS-FEC";
 
 		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
 		 * both RS and FC are requested
@@ -6464,14 +6590,19 @@
 		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
 			if (vsi->back->hw.phy.link_info.req_fec_info &
 			    I40E_AQ_REQUEST_FEC_RS)
-				req_fec = ", Requested FEC: CL108 RS-FEC";
+				req_fec = "CL108 RS-FEC";
 			else
-				req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+				req_fec = "CL74 FC-FEC/BASE-R";
 		}
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+			    speed, req_fec, fec, an, fc);
+	} else {
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
+			    speed, fc);
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
-		    speed, req_fec, fec, an, fc);
 }
 
 /**
@@ -6568,6 +6699,24 @@
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status err;
 	u64 mask;
+	u8 speed;
+
+	/* Card might've been put in an unstable state by other drivers
+	 * and applications, which causes incorrect speed values being
+	 * set on startup. In order to clear speed registers, we call
+	 * get_phy_capabilities twice, once to get initial state of
+	 * available speeds, and once to get current PHY config.
+	 */
+	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %s last_status =  %s\n",
+			i40e_stat_str(hw, err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+	speed = abilities.link_speed;
 
 	/* Get the current phy config */
 	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
@@ -6581,9 +6730,9 @@
 	}
 
 	/* If link needs to go up, but was not forced to go down,
-	 * no need for a flap
+	 * and its speed values are OK, no need for a flap
 	 */
-	if (is_up && abilities.phy_type != 0)
+	if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
 		return I40E_SUCCESS;
 
 	/* To force link we need to set bits for all supported PHY types,
@@ -6595,7 +6744,10 @@
 	config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
 	/* Copy the old settings, except of phy_type */
 	config.abilities = abilities.abilities;
-	config.link_speed = abilities.link_speed;
+	if (abilities.link_speed != 0)
+		config.link_speed = abilities.link_speed;
+	else
+		config.link_speed = speed;
 	config.eee_capability = abilities.eee_capability;
 	config.eeer = abilities.eeer_val;
 	config.low_power_ctrl = abilities.d3_lpan;
@@ -6651,8 +6803,13 @@
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
 		i40e_clean_tx_ring(vsi->tx_rings[i]);
-		if (i40e_enabled_xdp_vsi(vsi))
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			/* Make sure that in-progress ndo_xdp_xmit
+			 * calls are completed.
+			 */
+			synchronize_rcu();
 			i40e_clean_tx_ring(vsi->xdp_rings[i]);
+		}
 		i40e_clean_rx_ring(vsi->rx_rings[i]);
 	}
 
@@ -6732,6 +6889,489 @@
 }
 
 /**
+ * i40e_del_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+					   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_remove_macvlan_element_data element;
+	i40e_status status;
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+	status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_add_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+					   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_add_macvlan_element_data element;
+	i40e_status status;
+	u16 cmd_flags = 0;
+
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.queue_number = 0;
+	element.match_method = I40E_AQC_MM_ERR_NO_RES;
+	cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+	element.flags = cpu_to_le16(cmd_flags);
+	status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_reset_ch_rings - Reset the queue contexts in a channel
+ * @vsi: the VSI we want to access
+ * @ch: the channel we want to access
+ */
+static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
+{
+	struct i40e_ring *tx_ring, *rx_ring;
+	u16 pf_q;
+	int i;
+
+	for (i = 0; i < ch->num_queue_pairs; i++) {
+		pf_q = ch->base_queue + i;
+		tx_ring = vsi->tx_rings[pf_q];
+		tx_ring->ch = NULL;
+		rx_ring = vsi->rx_rings[pf_q];
+		rx_ring->ch = NULL;
+	}
+}
+
+/**
+ * i40e_free_macvlan_channels
+ * @vsi: the VSI we want to access
+ *
+ * This function frees the Qs of the channel VSI from
+ * the stack and also deletes the channel VSIs which
+ * serve as macvlans.
+ */
+static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int ret;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		struct i40e_vsi *parent_vsi;
+
+		if (i40e_is_channel_macvlan(ch)) {
+			i40e_reset_ch_rings(vsi, ch);
+			clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+			netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
+			netdev_set_sb_channel(ch->fwd->netdev, 0);
+			kfree(ch->fwd);
+			ch->fwd = NULL;
+		}
+
+		list_del(&ch->list);
+		parent_vsi = ch->parent_vsi;
+		if (!parent_vsi || !ch->initialized) {
+			kfree(ch);
+			continue;
+		}
+
+		/* remove the VSI */
+		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+					     NULL);
+		if (ret)
+			dev_err(&vsi->back->pdev->dev,
+				"unable to remove channel (%d) for parent VSI(%d)\n",
+				ch->seid, parent_vsi->seid);
+		kfree(ch);
+	}
+	vsi->macvlan_cnt = 0;
+}
+
+/**
+ * i40e_fwd_ring_up - bring the macvlan device up
+ * @vsi: the VSI we want to access
+ * @vdev: macvlan netdevice
+ * @fwd: the private fwd structure
+ */
+static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+			    struct i40e_fwd_adapter *fwd)
+{
+	int ret = 0, num_tc = 1,  i, aq_err;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	if (list_empty(&vsi->macvlan_list))
+		return -EINVAL;
+
+	/* Go through the list and find an available channel */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (!i40e_is_channel_macvlan(ch)) {
+			ch->fwd = fwd;
+			/* record configuration for macvlan interface in vdev */
+			for (i = 0; i < num_tc; i++)
+				netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+							     i,
+							     ch->num_queue_pairs,
+							     ch->base_queue);
+			for (i = 0; i < ch->num_queue_pairs; i++) {
+				struct i40e_ring *tx_ring, *rx_ring;
+				u16 pf_q;
+
+				pf_q = ch->base_queue + i;
+
+				/* Get to TX ring ptr */
+				tx_ring = vsi->tx_rings[pf_q];
+				tx_ring->ch = ch;
+
+				/* Get the RX ring ptr */
+				rx_ring = vsi->rx_rings[pf_q];
+				rx_ring->ch = ch;
+			}
+			break;
+		}
+	}
+
+	/* Guarantee all rings are updated before we update the
+	 * MAC address filter.
+	 */
+	wmb();
+
+	/* Add a mac filter */
+	ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
+	if (ret) {
+		/* if we cannot add the MAC rule then disable the offload */
+		macvlan_release_l2fw_offload(vdev);
+		for (i = 0; i < ch->num_queue_pairs; i++) {
+			struct i40e_ring *rx_ring;
+			u16 pf_q;
+
+			pf_q = ch->base_queue + i;
+			rx_ring = vsi->rx_rings[pf_q];
+			rx_ring->netdev = NULL;
+		}
+		dev_info(&pf->pdev->dev,
+			 "Error adding mac filter on macvlan err %s, aq_err %s\n",
+			  i40e_stat_str(hw, ret),
+			  i40e_aq_str(hw, aq_err));
+		netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_setup_macvlans - create the channels which will be macvlans
+ * @vsi: the VSI we want to access
+ * @macvlan_cnt: no. of macvlans to be setup
+ * @qcnt: no. of Qs per macvlan
+ * @vdev: macvlan netdevice
+ */
+static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+			       struct net_device *vdev)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	u16 sections, qmap, num_qps;
+	struct i40e_channel *ch;
+	int i, pow, ret = 0;
+	u8 offset = 0;
+
+	if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
+		return -EINVAL;
+
+	num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = fls(roundup_pow_of_two(num_qps) - 1);
+
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup context bits for the main VSI */
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = vsi->back->hw.pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	ctxt.info.valid_sections |= cpu_to_le16(sections);
+
+	/* Reconfigure RSS for main VSI with new max queue count */
+	vsi->rss_size = max_t(u16, num_qps, qcnt);
+	ret = i40e_vsi_config_rss(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed to reconfig RSS for num_queues (%u)\n",
+			 vsi->rss_size);
+		return ret;
+	}
+	vsi->reconfig_rss = true;
+	dev_dbg(&vsi->back->pdev->dev,
+		"Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
+	vsi->next_base_queue = num_qps;
+	vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
+
+	/* Update the VSI after updating the VSI queue-mapping
+	 * information
+	 */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Update vsi tc config failed, err %s aq_err %s\n",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	/* Create channels for macvlans */
+	INIT_LIST_HEAD(&vsi->macvlan_list);
+	for (i = 0; i < macvlan_cnt; i++) {
+		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+		if (!ch) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+		INIT_LIST_HEAD(&ch->list);
+		ch->num_queue_pairs = qcnt;
+		if (!i40e_setup_channel(pf, vsi, ch)) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+		ch->parent_vsi = vsi;
+		vsi->cnt_q_avail -= ch->num_queue_pairs;
+		vsi->macvlan_cnt++;
+		list_add_tail(&ch->list, &vsi->macvlan_list);
+	}
+
+	return ret;
+
+err_free:
+	dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
+	i40e_free_macvlan_channels(vsi);
+
+	return ret;
+}
+
+/**
+ * i40e_fwd_add - configure macvlans
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ **/
+static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_fwd_adapter *fwd;
+	int avail_macvlan, ret;
+
+	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
+		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
+		netdev_info(netdev, "Not enough vectors available to support macvlans\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The macvlan device has to be a single Q device so that the
+	 * tc_to_txq field can be reused to pick the tx queue.
+	 */
+	if (netif_is_multiqueue(vdev))
+		return ERR_PTR(-ERANGE);
+
+	if (!vsi->macvlan_cnt) {
+		/* reserve bit 0 for the pf device */
+		set_bit(0, vsi->fwd_bitmask);
+
+		/* Try to reserve as many queues as possible for macvlans. First
+		 * reserve 3/4th of max vectors, then half, then quarter and
+		 * calculate Qs per macvlan as you go
+		 */
+		vectors = pf->num_lan_msix;
+		if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
+			/* allocate 4 Qs per macvlan and 32 Qs to the PF*/
+			q_per_macvlan = 4;
+			macvlan_cnt = (vectors - 32) / 4;
+		} else if (vectors <= 64 && vectors > 32) {
+			/* allocate 2 Qs per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 2;
+			macvlan_cnt = (vectors - 16) / 2;
+		} else if (vectors <= 32 && vectors > 16) {
+			/* allocate 1 Q per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 16;
+		} else if (vectors <= 16 && vectors > 8) {
+			/* allocate 1 Q per macvlan and 8 Qs to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 8;
+		} else {
+			/* allocate 1 Q per macvlan and 1 Q to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 1;
+		}
+
+		if (macvlan_cnt == 0)
+			return ERR_PTR(-EBUSY);
+
+		/* Quiesce VSI queues */
+		i40e_quiesce_vsi(vsi);
+
+		/* sets up the macvlans but does not "enable" them */
+		ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
+					  vdev);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Unquiesce VSI */
+		i40e_unquiesce_vsi(vsi);
+	}
+	avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
+					    vsi->macvlan_cnt);
+	if (avail_macvlan >= I40E_MAX_MACVLANS)
+		return ERR_PTR(-EBUSY);
+
+	/* create the fwd struct */
+	fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
+	if (!fwd)
+		return ERR_PTR(-ENOMEM);
+
+	set_bit(avail_macvlan, vsi->fwd_bitmask);
+	fwd->bit_no = avail_macvlan;
+	netdev_set_sb_channel(vdev, avail_macvlan);
+	fwd->netdev = vdev;
+
+	if (!netif_running(netdev))
+		return fwd;
+
+	/* Set fwd ring up */
+	ret = i40e_fwd_ring_up(vsi, vdev, fwd);
+	if (ret) {
+		/* unbind the queues and drop the subordinate channel config */
+		netdev_unbind_sb_channel(netdev, vdev);
+		netdev_set_sb_channel(vdev, 0);
+
+		kfree(fwd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return fwd;
+}
+
+/**
+ * i40e_del_all_macvlans - Delete all the mac filters on the channels
+ * @vsi: the VSI we want to access
+ */
+static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(vsi->netdev,
+							 ch->fwd->netdev);
+				netdev_set_sb_channel(ch->fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * i40e_fwd_del - delete macvlan interfaces
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ */
+static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_fwd_adapter *fwd = vdev;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	/* Find the channel associated with the macvlan and del mac filter */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch) &&
+		    ether_addr_equal(i40e_channel_mac(ch),
+				     fwd->netdev->dev_addr)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(netdev, fwd->netdev);
+				netdev_set_sb_channel(fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			} else {
+				dev_info(&pf->pdev->dev,
+					 "Error deleting mac filter on macvlan err %s, aq_err %s\n",
+					  i40e_stat_str(hw, ret),
+					  i40e_aq_str(hw, aq_err));
+			}
+			break;
+		}
+	}
+}
+
+/**
  * i40e_setup_tc - configure multiple traffic classes
  * @netdev: net device to configure
  * @type_data: tc offload data
@@ -6744,10 +7384,12 @@
 	struct i40e_pf *pf = vsi->back;
 	u8 enabled_tc = 0, num_tc, hw;
 	bool need_reset = false;
+	int old_queue_pairs;
 	int ret = -EINVAL;
 	u16 mode;
 	int i;
 
+	old_queue_pairs = vsi->num_queue_pairs;
 	num_tc = mqprio_qopt->qopt.num_tc;
 	hw = mqprio_qopt->qopt.hw;
 	mode = mqprio_qopt->mode;
@@ -6824,6 +7466,10 @@
 			    vsi->seid);
 		need_reset = true;
 		goto exit;
+	} else {
+		dev_info(&vsi->back->pdev->dev,
+			 "Setup channel (id:%u) utilizing num_queues %d\n",
+			 vsi->seid, vsi->tc_config.tc_info[0].qcount);
 	}
 
 	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
@@ -6848,6 +7494,7 @@
 		}
 		ret = i40e_configure_queue_channels(vsi);
 		if (ret) {
+			vsi->num_queue_pairs = old_queue_pairs;
 			netdev_info(netdev,
 				    "Failed configuring queue channels\n");
 			need_reset = true;
@@ -7087,19 +7734,21 @@
 /**
  * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  * @filter: Pointer to cloud filter structure
  *
  **/
 static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
-				 struct tc_cls_flower_offload *f,
+				 struct flow_cls_offload *f,
 				 struct i40e_cloud_filter *filter)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
 	struct i40e_pf *pf = vsi->back;
 	u8 field_flags = 0;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -7109,143 +7758,109 @@
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
 		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
-			f->dissector->used_keys);
+			dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
-
-		if (mask->keyid != 0)
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
 			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
 
-		filter->tenant_id = be32_to_cpu(key->keyid);
+		filter->tenant_id = be32_to_cpu(match.key->keyid);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
 			n_proto_mask = 0;
 		}
 		filter->n_proto = n_proto_key & n_proto_mask;
-		filter->ip_proto = key->ip_proto;
+		filter->ip_proto = match.key->ip_proto;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+		flow_rule_match_eth_addrs(rule, &match);
 
 		/* use is_broadcast and is_zero to check for all 0xf or 0 */
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (is_broadcast_ether_addr(mask->dst)) {
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
 				field_flags |= I40E_CLOUD_FIELD_OMAC;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
-					mask->dst);
+					match.mask->dst);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (!is_zero_ether_addr(mask->src)) {
-			if (is_broadcast_ether_addr(mask->src)) {
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
 				field_flags |= I40E_CLOUD_FIELD_IMAC;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
-					mask->src);
+					match.mask->src);
 				return I40E_ERR_CONFIG;
 			}
 		}
-		ether_addr_copy(filter->dst_mac, key->dst);
-		ether_addr_copy(filter->src_mac, key->src);
+		ether_addr_copy(filter->dst_mac, match.key->dst);
+		ether_addr_copy(filter->src_mac, match.key->src);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
-		if (mask->vlan_id) {
-			if (mask->vlan_id == VLAN_VID_MASK) {
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
 				field_flags |= I40E_CLOUD_FIELD_IVLAN;
 
 			} else {
 				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
-					mask->vlan_id);
+					match.mask->vlan_id);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		filter->vlan_id = cpu_to_be16(key->vlan_id);
+		filter->vlan_id = cpu_to_be16(match.key->vlan_id);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
+		struct flow_match_ipv4_addrs match;
 
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be32(0xffffffff)) {
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
-					&mask->dst);
+					&match.mask->dst);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->src) {
-			if (mask->src == cpu_to_be32(0xffffffff)) {
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
-					&mask->src);
+					&match.mask->src);
 				return I40E_ERR_CONFIG;
 			}
 		}
@@ -7254,70 +7869,60 @@
 			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
 			return I40E_ERR_CONFIG;
 		}
-		filter->dst_ipv4 = key->dst;
-		filter->src_ipv4 = key->src;
+		filter->dst_ipv4 = match.key->dst;
+		filter->src_ipv4 = match.key->src;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
 
 		/* src and dest IPV6 address should not be LOOPBACK
 		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
 		 */
-		if (ipv6_addr_loopback(&key->dst) ||
-		    ipv6_addr_loopback(&key->src)) {
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
 			dev_err(&pf->pdev->dev,
 				"Bad ipv6, addr is LOOPBACK\n");
 			return I40E_ERR_CONFIG;
 		}
-		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
 			field_flags |= I40E_CLOUD_FIELD_IIP;
 
-		memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+		memcpy(&filter->src_ipv6, &match.key->src.s6_addr32,
 		       sizeof(filter->src_ipv6));
-		memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+		memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32,
 		       sizeof(filter->dst_ipv6));
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
-		if (mask->src) {
-			if (mask->src == cpu_to_be16(0xffff)) {
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
-					be16_to_cpu(mask->src));
+					be16_to_cpu(match.mask->src));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be16(0xffff)) {
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
-					be16_to_cpu(mask->dst));
+					be16_to_cpu(match.mask->dst));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		filter->dst_port = key->dst;
-		filter->src_port = key->src;
+		filter->dst_port = match.key->dst;
+		filter->src_port = match.key->src;
 
 		switch (filter->ip_proto) {
 		case IPPROTO_TCP:
@@ -7371,11 +7976,11 @@
 /**
  * i40e_configure_clsflower - Configure tc flower filters
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  *
  **/
 static int i40e_configure_clsflower(struct i40e_vsi *vsi,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
 	struct i40e_cloud_filter *filter = NULL;
@@ -7467,11 +8072,11 @@
 /**
  * i40e_delete_clsflower - Remove tc flower filters
  * @vsi: Pointer to VSI
- * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @cls_flower: Pointer to struct flow_cls_offload
  *
  **/
 static int i40e_delete_clsflower(struct i40e_vsi *vsi,
-				 struct tc_cls_flower_offload *cls_flower)
+				 struct flow_cls_offload *cls_flower)
 {
 	struct i40e_cloud_filter *filter = NULL;
 	struct i40e_pf *pf = vsi->back;
@@ -7514,16 +8119,16 @@
  * @type_data: offload data
  **/
 static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
-				    struct tc_cls_flower_offload *cls_flower)
+				    struct flow_cls_offload *cls_flower)
 {
 	struct i40e_vsi *vsi = np->vsi;
 
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return i40e_configure_clsflower(vsi, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return i40e_delete_clsflower(vsi, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
@@ -7547,34 +8152,21 @@
 	}
 }
 
-static int i40e_setup_tc_block(struct net_device *dev,
-			       struct tc_block_offload *f)
-{
-	struct i40e_netdev_priv *np = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
-					     np, np, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(i40e_block_cb_list);
 
 static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			   void *type_data)
 {
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+
 	switch (type) {
 	case TC_SETUP_QDISC_MQPRIO:
 		return i40e_setup_tc(netdev, type_data);
 	case TC_SETUP_BLOCK:
-		return i40e_setup_tc_block(netdev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &i40e_block_cb_list,
+						  i40e_setup_tc_block_cb,
+						  np, np, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -7869,6 +8461,11 @@
 		dev_dbg(&pf->pdev->dev, "PFR requested\n");
 		i40e_handle_reset_warning(pf, lock_acquired);
 
+		dev_info(&pf->pdev->dev,
+			 pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
+			 "FW LLDP is disabled\n" :
+			 "FW LLDP is enabled\n");
+
 	} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
 		int v;
 
@@ -8057,8 +8654,8 @@
 		i40e_service_event_schedule(pf);
 	} else {
 		i40e_pf_unquiesce_all_vsi(pf);
-	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
-	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
+		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+		set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 	}
 
 exit:
@@ -8440,14 +9037,9 @@
 	i40e_status status;
 	bool new_link, old_link;
 
-	/* save off old link status information */
-	pf->hw.phy.link_info_old = pf->hw.phy.link_info;
-
 	/* set this to force the get_link_status call to refresh state */
 	pf->hw.phy.get_link_info = true;
-
 	old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
-
 	status = i40e_get_link_status(&pf->hw, &new_link);
 
 	/* On success, disable temp link polling */
@@ -8477,7 +9069,7 @@
 	/* Notify the base of the switch tree connected to
 	 * the link.  Floating VEBs are not notified.
 	 */
-	if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+	if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
 		i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
 	else
 		i40e_vsi_link_event(vsi, new_link);
@@ -9235,6 +9827,11 @@
 			dev_warn(&pf->pdev->dev,
 				 "shutdown_lan_hmc failed: %d\n", ret);
 	}
+
+	/* Save the current PTP time so that we can restore the time after the
+	 * reset completes.
+	 */
+	i40e_ptp_save_hw_time(pf);
 }
 
 /**
@@ -9327,6 +9924,7 @@
  **/
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 {
+	int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	struct i40e_hw *hw = &pf->hw;
 	u8 set_fc_aq_fail = 0;
@@ -9334,7 +9932,14 @@
 	u32 val;
 	int v;
 
-	if (test_bit(__I40E_DOWN, pf->state))
+	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+	    i40e_check_recovery_mode(pf)) {
+		i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
+	}
+
+	if (test_bit(__I40E_DOWN, pf->state) &&
+	    !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+	    !old_recovery_mode_bit)
 		goto clear_recovery;
 	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
 
@@ -9363,6 +9968,44 @@
 	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
 		i40e_verify_eeprom(pf);
 
+	/* if we are going out of or into recovery mode we have to act
+	 * accordingly with regard to resources initialization
+	 * and deinitialization
+	 */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
+	    old_recovery_mode_bit) {
+		if (i40e_get_capabilities(pf,
+					  i40e_aqc_opc_list_func_capabilities))
+			goto end_unlock;
+
+		if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+			/* we're staying in recovery mode so we'll reinitialize
+			 * misc vector here
+			 */
+			if (i40e_setup_misc_vector_for_recovery_mode(pf))
+				goto end_unlock;
+		} else {
+			if (!lock_acquired)
+				rtnl_lock();
+			/* we're going out of recovery mode so we'll free
+			 * the IRQ allocated specifically for recovery mode
+			 * and restore the interrupt scheme
+			 */
+			free_irq(pf->pdev->irq, pf);
+			i40e_clear_interrupt_scheme(pf);
+			if (i40e_restore_interrupt_scheme(pf))
+				goto end_unlock;
+		}
+
+		/* tell the firmware that we're starting */
+		i40e_send_version(pf);
+
+		/* bail out in case recovery mode was detected, as there is
+		 * no need for further configuration.
+		 */
+		goto end_unlock;
+	}
+
 	i40e_clear_pxe_mode(hw);
 	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
 	if (ret)
@@ -9566,6 +10209,7 @@
 	clear_bit(__I40E_RESET_FAILED, pf->state);
 clear_recovery:
 	clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
+	clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state);
 }
 
 /**
@@ -9613,7 +10257,6 @@
 {
 	struct i40e_hw *hw = &pf->hw;
 	bool mdd_detected = false;
-	bool pf_mdd_detected = false;
 	struct i40e_vf *vf;
 	u32 reg;
 	int i;
@@ -9659,19 +10302,12 @@
 		reg = rd32(hw, I40E_PF_MDET_TX);
 		if (reg & I40E_PF_MDET_TX_VALID_MASK) {
 			wr32(hw, I40E_PF_MDET_TX, 0xFFFF);
-			dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
-			pf_mdd_detected = true;
+			dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n");
 		}
 		reg = rd32(hw, I40E_PF_MDET_RX);
 		if (reg & I40E_PF_MDET_RX_VALID_MASK) {
 			wr32(hw, I40E_PF_MDET_RX, 0xFFFF);
-			dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n");
-			pf_mdd_detected = true;
-		}
-		/* Queue belongs to the PF, initiate a reset */
-		if (pf_mdd_detected) {
-			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
-			i40e_service_event_schedule(pf);
+			dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n");
 		}
 	}
 
@@ -9684,6 +10320,9 @@
 			vf->num_mdd_events++;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
+			dev_info(&pf->pdev->dev,
+				 "Use PF Control I/F to re-enable the VF\n");
+			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
 		}
 
 		reg = rd32(hw, I40E_VP_MDET_RX(i));
@@ -9692,11 +10331,6 @@
 			vf->num_mdd_events++;
 			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
 				 i);
-		}
-
-		if (vf->num_mdd_events > I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED) {
-			dev_info(&pf->pdev->dev,
-				 "Too many MDD events on VF %d, disabled\n", i);
 			dev_info(&pf->pdev->dev,
 				 "Use PF Control I/F to re-enable the VF\n");
 			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
@@ -9823,31 +10457,38 @@
 	unsigned long start_time = jiffies;
 
 	/* don't bother with service tasks if a reset is in progress */
-	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_SUSPENDED, pf->state))
 		return;
 
 	if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
 		return;
 
-	i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
-	i40e_sync_filters_subtask(pf);
-	i40e_reset_subtask(pf);
-	i40e_handle_mdd_event(pf);
-	i40e_vc_process_vflr_event(pf);
-	i40e_watchdog_subtask(pf);
-	i40e_fdir_reinit_subtask(pf);
-	if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
-		/* Client subtask will reopen next time through. */
-		i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true);
+	if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
+		i40e_sync_filters_subtask(pf);
+		i40e_reset_subtask(pf);
+		i40e_handle_mdd_event(pf);
+		i40e_vc_process_vflr_event(pf);
+		i40e_watchdog_subtask(pf);
+		i40e_fdir_reinit_subtask(pf);
+		if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
+			/* Client subtask will reopen next time through. */
+			i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi],
+							   true);
+		} else {
+			i40e_client_subtask(pf);
+			if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
+					       pf->state))
+				i40e_notify_client_of_l2_param_changes(
+								pf->vsi[pf->lan_vsi]);
+		}
+		i40e_sync_filters_subtask(pf);
+		i40e_sync_udp_filters_subtask(pf);
 	} else {
-		i40e_client_subtask(pf);
-		if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
-				       pf->state))
-			i40e_notify_client_of_l2_param_changes(
-							pf->vsi[pf->lan_vsi]);
+		i40e_reset_subtask(pf);
 	}
-	i40e_sync_filters_subtask(pf);
-	i40e_sync_udp_filters_subtask(pf);
+
 	i40e_clean_adminq_subtask(pf);
 
 	/* flush memory to make sure state is correct before next watchdog */
@@ -9889,8 +10530,12 @@
 	switch (vsi->type) {
 	case I40E_VSI_MAIN:
 		vsi->alloc_queue_pairs = pf->num_lan_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
 			vsi->num_q_vectors = pf->num_lan_msix;
 		else
@@ -9900,22 +10545,32 @@
 
 	case I40E_VSI_FDIR:
 		vsi->alloc_queue_pairs = 1;
-		vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		vsi->num_q_vectors = pf->num_fdsb_msix;
 		break;
 
 	case I40E_VSI_VMDQ2:
 		vsi->alloc_queue_pairs = pf->num_vmdq_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		vsi->num_q_vectors = pf->num_vmdq_msix;
 		break;
 
 	case I40E_VSI_SRIOV:
 		vsi->alloc_queue_pairs = pf->num_vf_qps;
-		vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
-				      I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
 		break;
 
 	default:
@@ -10028,6 +10683,12 @@
 	hash_init(vsi->mac_filter_hash);
 	vsi->irqs_ready = false;
 
+	if (type == I40E_VSI_MAIN) {
+		vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
+		if (!vsi->af_xdp_zc_qps)
+			goto err_rings;
+	}
+
 	ret = i40e_set_num_rings_in_vsi(vsi);
 	if (ret)
 		goto err_rings;
@@ -10046,6 +10707,7 @@
 	goto unlock_pf;
 
 err_rings:
+	bitmap_free(vsi->af_xdp_zc_qps);
 	pf->next_vsi = i - 1;
 	kfree(vsi);
 unlock_pf:
@@ -10126,6 +10788,7 @@
 	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
 	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
 
+	bitmap_free(vsi->af_xdp_zc_qps);
 	i40e_vsi_free_arrays(vsi, true);
 	i40e_clear_rss_config_user(vsi);
 
@@ -10183,7 +10846,7 @@
 		ring->vsi = vsi;
 		ring->netdev = vsi->netdev;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10200,7 +10863,7 @@
 		ring->vsi = vsi;
 		ring->netdev = NULL;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10216,7 +10879,7 @@
 		ring->vsi = vsi;
 		ring->netdev = vsi->netdev;
 		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
+		ring->count = vsi->num_rx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
 		ring->itr_setting = pf->rx_itr_default;
@@ -10662,6 +11325,48 @@
 }
 
 /**
+ * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle
+ * non queue events in recovery mode
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage
+ * the non-queue interrupts, e.g. AdminQ and errors in recovery mode.
+ * This is handled differently than in recovery mode since no Tx/Rx resources
+ * are being allocated.
+ **/
+static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
+{
+	int err;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		err = i40e_setup_misc_vector(pf);
+
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "MSI-X misc vector request failed, error %d\n",
+				 err);
+			return err;
+		}
+	} else {
+		u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
+
+		err = request_irq(pf->pdev->irq, i40e_intr, flags,
+				  pf->int_name, pf);
+
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "MSI/legacy misc vector request failed, error %d\n",
+				 err);
+			return err;
+		}
+		i40e_enable_misc_int_causes(pf);
+		i40e_irq_dynamic_enable_icr0(pf);
+	}
+
+	return 0;
+}
+
+/**
  * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
  * @pf: board private structure
  *
@@ -10972,6 +11677,7 @@
 	if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
 		return 0;
 
+	queue_count = min_t(int, queue_count, num_online_cpus());
 	new_rss_size = min_t(int, queue_count, pf->rss_size_max);
 
 	if (queue_count != vsi->num_queue_pairs) {
@@ -11266,16 +11972,15 @@
 		/* IWARP needs one extra vector for CQP just like MISC.*/
 		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
 	}
-	/* Stopping the FW LLDP engine is only supported on the
-	 * XL710 with a FW ver >= 1.7.  Also, stopping FW LLDP
-	 * engine is not supported if NPAR is functioning on this
-	 * part
+	/* Stopping FW LLDP engine is supported on XL710 and X722
+	 * starting from FW versions determined in i40e_init_adminq.
+	 * Stopping the FW LLDP engine is not supported on XL710
+	 * if NPAR is functioning so unset this hw flag in this case.
 	 */
 	if (pf->hw.mac.type == I40E_MAC_XL710 &&
-	    !pf->hw.func_caps.npar_enable &&
-	    (pf->hw.aq.api_maj_ver > 1 ||
-	     (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
-		pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
+	    pf->hw.func_caps.npar_enable &&
+	    (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+		pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
 
 #ifdef CONFIG_PCI_IOV
 	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
@@ -11412,6 +12117,9 @@
 		return -EINVAL;
 	}
 
+	if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
+		i40e_del_all_macvlans(vsi);
+
 	need_reset = i40e_set_ntuple(pf, features);
 
 	if (need_reset)
@@ -11575,7 +12283,8 @@
 static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
 			    const unsigned char *addr, u16 vid,
-			    u16 flags)
+			    u16 flags,
+			    struct netlink_ext_ack *extack)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -11616,6 +12325,7 @@
  * @dev: the netdev being configured
  * @nlh: RTNL message
  * @flags: bridge flags
+ * @extack: netlink extended ack
  *
  * Inserts a new hardware bridge if not already created and
  * enables the bridging mode requested (VEB or VEPA). If the
@@ -11628,7 +12338,8 @@
  **/
 static int i40e_ndo_bridge_setlink(struct net_device *dev,
 				   struct nlmsghdr *nlh,
-				   u16 flags)
+				   u16 flags,
+				   struct netlink_ext_ack *extack)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
@@ -11824,10 +12535,274 @@
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
+	/* Kick start the NAPI context if there is an AF_XDP socket open
+	 * on that queue id. This so that receiving will start.
+	 */
+	if (need_reset && prog)
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			if (vsi->xdp_rings[i]->xsk_umem)
+				(void)i40e_xsk_wakeup(vsi->netdev, i,
+						      XDP_WAKEUP_RX);
+
 	return 0;
 }
 
 /**
+ * i40e_enter_busy_conf - Enters busy config state
+ * @vsi: vsi
+ *
+ * Returns 0 on success, <0 for failure.
+ **/
+static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int timeout = 50;
+
+	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_exit_busy_conf - Exits busy config state
+ * @vsi: vsi
+ **/
+static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	clear_bit(__I40E_CONFIG_BUSY, pf->state);
+}
+
+/**
+ * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
+{
+	memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
+	       sizeof(vsi->rx_rings[queue_pair]->rx_stats));
+	memset(&vsi->tx_rings[queue_pair]->stats, 0,
+	       sizeof(vsi->tx_rings[queue_pair]->stats));
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		memset(&vsi->xdp_rings[queue_pair]->stats, 0,
+		       sizeof(vsi->xdp_rings[queue_pair]->stats));
+	}
+}
+
+/**
+ * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
+{
+	i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		/* Make sure that in-progress ndo_xdp_xmit calls are
+		 * completed.
+		 */
+		synchronize_rcu();
+		i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+	}
+	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+}
+
+/**
+ * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ **/
+static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
+					bool enable)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_q_vector *q_vector = rxr->q_vector;
+
+	if (!vsi->netdev)
+		return;
+
+	/* All rings in a qp belong to the same qvector. */
+	if (q_vector->rx.ring || q_vector->tx.ring) {
+		if (enable)
+			napi_enable(&q_vector->napi);
+		else
+			napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
+					bool enable)
+{
+	struct i40e_pf *pf = vsi->back;
+	int pf_q, ret = 0;
+
+	pf_q = vsi->base_queue + queue_pair;
+	ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
+				     false /*is xdp*/, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d Tx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+		return ret;
+	}
+
+	i40e_control_rx_q(pf, pf_q, enable);
+	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d Rx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+		return ret;
+	}
+
+	/* Due to HW errata, on Rx disable only, the register can
+	 * indicate done before it really is. Needs 50ms to be sure
+	 */
+	if (!enable)
+		mdelay(50);
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		return ret;
+
+	ret = i40e_control_wait_tx_q(vsi->seid, pf,
+				     pf_q + vsi->alloc_queue_pairs,
+				     true /*is xdp*/, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d XDP Tx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* All rings in a qp belong to the same qvector. */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
+	else
+		i40e_irq_dynamic_enable_icr0(pf);
+
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* For simplicity, instead of removing the qp interrupt causes
+	 * from the interrupt linked list, we simply disable the interrupt, and
+	 * leave the list intact.
+	 *
+	 * All rings in a qp belong to the same qvector.
+	 */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
+
+		wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
+		i40e_flush(hw);
+		synchronize_irq(pf->msix_entries[intpf].vector);
+	} else {
+		/* Legacy and MSI mode - this stops all interrupt handling */
+		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+		i40e_flush(hw);
+		synchronize_irq(pf->pdev->irq);
+	}
+}
+
+/**
+ * i40e_queue_pair_disable - Disables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
+{
+	int err;
+
+	err = i40e_enter_busy_conf(vsi);
+	if (err)
+		return err;
+
+	i40e_queue_pair_disable_irq(vsi, queue_pair);
+	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
+	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
+	i40e_queue_pair_clean_rings(vsi, queue_pair);
+	i40e_queue_pair_reset_stats(vsi, queue_pair);
+
+	return err;
+}
+
+/**
+ * i40e_queue_pair_enable - Enables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
+{
+	int err;
+
+	err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
+	if (err)
+		return err;
+
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
+		if (err)
+			return err;
+	}
+
+	err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
+	if (err)
+		return err;
+
+	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
+	i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
+	i40e_queue_pair_enable_irq(vsi, queue_pair);
+
+	i40e_exit_busy_conf(vsi);
+
+	return err;
+}
+
+/**
  * i40e_xdp - implements ndo_bpf for i40e
  * @dev: netdevice
  * @xdp: XDP command
@@ -11847,6 +12822,9 @@
 	case XDP_QUERY_PROG:
 		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
+					   xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
 	}
@@ -11886,6 +12864,9 @@
 	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
 	.ndo_bpf		= i40e_xdp,
 	.ndo_xdp_xmit		= i40e_xdp_xmit,
+	.ndo_xsk_wakeup	        = i40e_xsk_wakeup,
+	.ndo_dfwd_add_station	= i40e_fwd_add,
+	.ndo_dfwd_del_station	= i40e_fwd_del,
 };
 
 /**
@@ -11945,13 +12926,16 @@
 	/* record features VLANs can make use of */
 	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
 
-	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-		netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+	/* enable macvlan offloads */
+	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
 
 	hw_features = hw_enc_features		|
 		      NETIF_F_HW_VLAN_CTAG_TX	|
 		      NETIF_F_HW_VLAN_CTAG_RX;
 
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+
 	netdev->hw_features |= hw_features;
 
 	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
@@ -12011,6 +12995,9 @@
 	ether_addr_copy(netdev->dev_addr, mac_addr);
 	ether_addr_copy(netdev->perm_addr, mac_addr);
 
+	/* i40iw_net_event() reads 16 bytes from neigh->primary_key */
+	netdev->neigh_priv_len = sizeof(u32) * 4;
+
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 	netdev->priv_flags |= IFF_SUPP_NOFCS;
 	/* Setup netdev TC information */
@@ -12054,7 +13041,7 @@
 	struct i40e_pf *pf = vsi->back;
 
 	/* Uplink is not a bridge so default to VEB */
-	if (vsi->veb_idx == I40E_NO_VEB)
+	if (vsi->veb_idx >= I40E_MAX_VEB)
 		return 1;
 
 	veb = pf->veb[vsi->veb_idx];
@@ -13035,7 +14022,7 @@
 	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
 		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
 			break;
-	if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) {
+	if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
 		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
 			 vsi_seid);
 		return NULL;
@@ -13112,7 +14099,7 @@
 		/* Main VEB? */
 		if (uplink_seid != pf->mac_seid)
 			break;
-		if (pf->lan_veb == I40E_NO_VEB) {
+		if (pf->lan_veb >= I40E_MAX_VEB) {
 			int v;
 
 			/* find existing or else empty VEB */
@@ -13122,13 +14109,15 @@
 					break;
 				}
 			}
-			if (pf->lan_veb == I40E_NO_VEB) {
+			if (pf->lan_veb >= I40E_MAX_VEB) {
 				v = i40e_veb_mem_alloc(pf);
 				if (v < 0)
 					break;
 				pf->lan_veb = v;
 			}
 		}
+		if (pf->lan_veb >= I40E_MAX_VEB)
+			break;
 
 		pf->veb[pf->lan_veb]->seid = seid;
 		pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
@@ -13282,7 +14271,7 @@
 		/* Set up the PF VSI associated with the PF's main VSI
 		 * that is already in the HW switch
 		 */
-		if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+		if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
 			uplink_seid = pf->veb[pf->lan_veb]->seid;
 		else
 			uplink_seid = pf->mac_seid;
@@ -13528,6 +14517,204 @@
 }
 
 /**
+ * i40e_set_fec_in_flags - helper function for setting FEC options in flags
+ * @fec_cfg: FEC option to set in flags
+ * @flags: ptr to flags in which we set FEC option
+ **/
+void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
+{
+	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
+		*flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
+	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
+	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
+		*flags |= I40E_FLAG_RS_FEC;
+		*flags &= ~I40E_FLAG_BASE_R_FEC;
+	}
+	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
+	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
+		*flags |= I40E_FLAG_BASE_R_FEC;
+		*flags &= ~I40E_FLAG_RS_FEC;
+	}
+	if (fec_cfg == 0)
+		*flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
+}
+
+/**
+ * i40e_check_recovery_mode - check if we are running transition firmware
+ * @pf: board private structure
+ *
+ * Check registers indicating the firmware runs in recovery mode. Sets the
+ * appropriate driver state.
+ *
+ * Returns true if the recovery mode was detected, false otherwise
+ **/
+static bool i40e_check_recovery_mode(struct i40e_pf *pf)
+{
+	u32 val = rd32(&pf->hw, I40E_GL_FWSTS) & I40E_GL_FWSTS_FWS1B_MASK;
+	bool is_recovery_mode = false;
+
+	if (pf->hw.mac.type == I40E_MAC_XL710)
+		is_recovery_mode =
+		val == I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK ||
+		val == I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK ||
+		val == I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_TRANSITION_MASK ||
+		val == I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_NVM_MASK;
+	if (pf->hw.mac.type == I40E_MAC_X722)
+		is_recovery_mode =
+		val == I40E_X722_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK ||
+		val == I40E_X722_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK;
+	if (is_recovery_mode) {
+		dev_notice(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
+		dev_notice(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
+		set_bit(__I40E_RECOVERY_MODE, pf->state);
+
+		return true;
+	}
+	if (test_and_clear_bit(__I40E_RECOVERY_MODE, pf->state))
+		dev_info(&pf->pdev->dev, "Reinitializing in normal mode with full functionality.\n");
+
+	return false;
+}
+
+/**
+ * i40e_pf_loop_reset - perform reset in a loop.
+ * @pf: board private structure
+ *
+ * This function is useful when a NIC is about to enter recovery mode.
+ * When a NIC's internal data structures are corrupted the NIC's
+ * firmware is going to enter recovery mode.
+ * Right after a POR it takes about 7 minutes for firmware to enter
+ * recovery mode. Until that time a NIC is in some kind of intermediate
+ * state. After that time period the NIC almost surely enters
+ * recovery mode. The only way for a driver to detect intermediate
+ * state is to issue a series of pf-resets and check a return value.
+ * If a PF reset returns success then the firmware could be in recovery
+ * mode so the caller of this code needs to check for recovery mode
+ * if this function returns success. There is a little chance that
+ * firmware will hang in intermediate state forever.
+ * Since waiting 7 minutes is quite a lot of time this function waits
+ * 10 seconds and then gives up by returning an error.
+ *
+ * Return 0 on success, negative on failure.
+ **/
+static i40e_status i40e_pf_loop_reset(struct i40e_pf *pf)
+{
+	const unsigned short MAX_CNT = 1000;
+	const unsigned short MSECS = 10;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status ret;
+	int cnt;
+
+	for (cnt = 0; cnt < MAX_CNT; ++cnt) {
+		ret = i40e_pf_reset(hw);
+		if (!ret)
+			break;
+		msleep(MSECS);
+	}
+
+	if (cnt == MAX_CNT) {
+		dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret);
+		return ret;
+	}
+
+	pf->pfr_count++;
+	return ret;
+}
+
+/**
+ * i40e_init_recovery_mode - initialize subsystems needed in recovery mode
+ * @pf: board private structure
+ * @hw: ptr to the hardware info
+ *
+ * This function does a minimal setup of all subsystems needed for running
+ * recovery mode.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+{
+	struct i40e_vsi *vsi;
+	int err;
+	int v_idx;
+
+	pci_save_state(pf->pdev);
+
+	/* set up periodic task facility */
+	timer_setup(&pf->service_timer, i40e_service_timer, 0);
+	pf->service_timer_period = HZ;
+
+	INIT_WORK(&pf->service_task, i40e_service_task);
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+
+	err = i40e_init_interrupt_scheme(pf);
+	if (err)
+		goto err_switch_setup;
+
+	/* The number of VSIs reported by the FW is the minimum guaranteed
+	 * to us; HW supports far more and we share the remaining pool with
+	 * the other PFs. We allocate space for more than the guarantee with
+	 * the understanding that we might not get them all later.
+	 */
+	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
+		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
+	else
+		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
+
+	/* Set up the vsi struct and our local tracking of the MAIN PF vsi. */
+	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+			  GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_switch_setup;
+	}
+
+	/* We allocate one VSI which is needed as absolute minimum
+	 * in order to register the netdev
+	 */
+	v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN);
+	if (v_idx < 0)
+		goto err_switch_setup;
+	pf->lan_vsi = v_idx;
+	vsi = pf->vsi[v_idx];
+	if (!vsi)
+		goto err_switch_setup;
+	vsi->alloc_queue_pairs = 1;
+	err = i40e_config_netdev(vsi);
+	if (err)
+		goto err_switch_setup;
+	err = register_netdev(vsi->netdev);
+	if (err)
+		goto err_switch_setup;
+	vsi->netdev_registered = true;
+	i40e_dbg_pf_init(pf);
+
+	err = i40e_setup_misc_vector_for_recovery_mode(pf);
+	if (err)
+		goto err_switch_setup;
+
+	/* tell the firmware that we're starting */
+	i40e_send_version(pf);
+
+	/* since everything's happy, start the service_task timer */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+
+	return 0;
+
+err_switch_setup:
+	i40e_reset_interrupt_capability(pf);
+	del_timer_sync(&pf->service_timer);
+	i40e_shutdown_adminq(hw);
+	iounmap(hw->hw_addr);
+	pci_disable_pcie_error_reporting(pf->pdev);
+	pci_release_mem_regions(pf->pdev);
+	pci_disable_device(pf->pdev);
+	kfree(pf);
+
+	return err;
+}
+
+/**
  * i40e_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in i40e_pci_tbl
@@ -13596,7 +14783,17 @@
 
 	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
 				I40E_MAX_CSR_SPACE);
-
+	/* We believe that the highest register to read is
+	 * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size
+	 * is not less than that before mapping to prevent a
+	 * kernel panic.
+	 */
+	if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
+		dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
+			pf->ioremap_len);
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
 	hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
 	if (!hw->hw_addr) {
 		err = -EIO;
@@ -13624,6 +14821,7 @@
 
 	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
 	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+	INIT_LIST_HEAD(&pf->ddp_old_prof);
 
 	/* set up the locks for the AQ, do this only once in probe
 	 * and destroy them only once in remove
@@ -13651,12 +14849,21 @@
 
 	/* Reset here to make sure all is clean and to define PF 'n' */
 	i40e_clear_hw(hw);
-	err = i40e_pf_reset(hw);
+
+	err = i40e_set_mac_type(hw);
+	if (err) {
+		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
+			 err);
+		goto err_pf_reset;
+	}
+
+	err = i40e_pf_loop_reset(pf);
 	if (err) {
 		dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err);
 		goto err_pf_reset;
 	}
-	pf->pfr_count++;
+
+	i40e_check_recovery_mode(pf);
 
 	hw->aq.num_arq_entries = I40E_AQ_LEN;
 	hw->aq.num_asq_entries = I40E_AQ_LEN;
@@ -13682,7 +14889,11 @@
 	if (err) {
 		if (err == I40E_ERR_FIRMWARE_API_VERSION)
 			dev_info(&pdev->dev,
-				 "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+				 "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
+				 hw->aq.api_maj_ver,
+				 hw->aq.api_min_ver,
+				 I40E_FW_API_VERSION_MAJOR,
+				 I40E_FW_MINOR_VERSION(hw));
 		else
 			dev_info(&pdev->dev,
 				 "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
@@ -13691,19 +14902,28 @@
 	}
 	i40e_get_oem_version(hw);
 
-	/* provide nvm, fw, api versions */
-	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n",
+	/* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */
+	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n",
 		 hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
 		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
-		 i40e_nvm_version_str(hw));
+		 i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id,
+		 hw->subsystem_vendor_id, hw->subsystem_device_id);
 
 	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
 	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
 		dev_info(&pdev->dev,
-			 "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+			 "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n",
+			 hw->aq.api_maj_ver,
+			 hw->aq.api_min_ver,
+			 I40E_FW_API_VERSION_MAJOR,
+			 I40E_FW_MINOR_VERSION(hw));
 	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
 		dev_info(&pdev->dev,
-			 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+			 "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
+			 hw->aq.api_maj_ver,
+			 hw->aq.api_min_ver,
+			 I40E_FW_API_VERSION_MAJOR,
+			 I40E_FW_MINOR_VERSION(hw));
 
 	i40e_verify_eeprom(pf);
 
@@ -13712,6 +14932,7 @@
 		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
 
 	i40e_clear_pxe_mode(hw);
+
 	err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
 	if (err)
 		goto err_adminq_setup;
@@ -13722,6 +14943,9 @@
 		goto err_sw_init;
 	}
 
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
+		return i40e_init_recovery_mode(pf, hw);
+
 	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
 				hw->func_caps.num_rx_qp, 0, 0);
 	if (err) {
@@ -13742,7 +14966,7 @@
 	 */
 	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
 		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
-		i40e_aq_stop_lldp(hw, true, NULL);
+		i40e_aq_stop_lldp(hw, true, false, NULL);
 	}
 
 	/* allow a platform config to override the HW addr */
@@ -13762,6 +14986,11 @@
 	pci_set_drvdata(pdev, pf);
 	pci_save_state(pdev);
 
+	dev_info(&pdev->dev,
+		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+			"FW LLDP is disabled\n" :
+			"FW LLDP is enabled\n");
+
 	/* Enable FW to write default DCB config on link-up */
 	i40e_aq_set_dcb_parameters(hw, true, NULL);
 
@@ -13979,23 +15208,23 @@
 
 		switch (hw->bus.speed) {
 		case i40e_bus_speed_8000:
-			strncpy(speed, "8.0", PCI_SPEED_SIZE); break;
+			strlcpy(speed, "8.0", PCI_SPEED_SIZE); break;
 		case i40e_bus_speed_5000:
-			strncpy(speed, "5.0", PCI_SPEED_SIZE); break;
+			strlcpy(speed, "5.0", PCI_SPEED_SIZE); break;
 		case i40e_bus_speed_2500:
-			strncpy(speed, "2.5", PCI_SPEED_SIZE); break;
+			strlcpy(speed, "2.5", PCI_SPEED_SIZE); break;
 		default:
 			break;
 		}
 		switch (hw->bus.width) {
 		case i40e_bus_width_pcie_x8:
-			strncpy(width, "8", PCI_WIDTH_SIZE); break;
+			strlcpy(width, "8", PCI_WIDTH_SIZE); break;
 		case i40e_bus_width_pcie_x4:
-			strncpy(width, "4", PCI_WIDTH_SIZE); break;
+			strlcpy(width, "4", PCI_WIDTH_SIZE); break;
 		case i40e_bus_width_pcie_x2:
-			strncpy(width, "2", PCI_WIDTH_SIZE); break;
+			strlcpy(width, "2", PCI_WIDTH_SIZE); break;
 		case i40e_bus_width_pcie_x1:
-			strncpy(width, "1", PCI_WIDTH_SIZE); break;
+			strlcpy(width, "1", PCI_WIDTH_SIZE); break;
 		default:
 			break;
 		}
@@ -14018,6 +15247,9 @@
 			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 	pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
 
+	/* set the FEC config due to the board capabilities */
+	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
+
 	/* get the supported phy types from the fw */
 	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
 	if (err)
@@ -14104,6 +15336,19 @@
 	if (pf->service_task.func)
 		cancel_work_sync(&pf->service_task);
 
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		struct i40e_vsi *vsi = pf->vsi[0];
+
+		/* We know that we have allocated only one vsi for this PF,
+		 * it was just for registering netdevice, so the interface
+		 * could be visible in the 'ifconfig' output
+		 */
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+
+		goto unmap;
+	}
+
 	/* Client close must be called explicitly here because the timer
 	 * has been stopped.
 	 */
@@ -14153,6 +15398,12 @@
 				 ret_code);
 	}
 
+unmap:
+	/* Free MSI/legacy interrupt 0 when in recovery mode. */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		free_irq(pf->pdev->irq, pf);
+
 	/* shutdown the adminq */
 	i40e_shutdown_adminq(hw);
 
@@ -14161,14 +15412,17 @@
 	mutex_destroy(&hw->aq.asq_mutex);
 
 	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
+	rtnl_lock();
 	i40e_clear_interrupt_scheme(pf);
 	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i]) {
-			i40e_vsi_clear_rings(pf->vsi[i]);
+			if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
+				i40e_vsi_clear_rings(pf->vsi[i]);
 			i40e_vsi_clear(pf->vsi[i]);
 			pf->vsi[i] = NULL;
 		}
 	}
+	rtnl_unlock();
 
 	for (i = 0; i < I40E_MAX_VEB; i++) {
 		kfree(pf->veb[i]);
@@ -14229,7 +15483,6 @@
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	pci_ers_result_t result;
-	int err;
 	u32 reg;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
@@ -14250,14 +15503,6 @@
 			result = PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	err = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (err) {
-		dev_info(&pdev->dev,
-			 "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
-			 err);
-		/* non-fatal, continue */
-	}
-
 	return result;
 }
 
@@ -14380,7 +15625,18 @@
 	wr32(hw, I40E_PFPM_WUFC,
 	     (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
+	/* Free MSI/legacy interrupt 0 when in recovery mode. */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		free_irq(pf->pdev->irq, pf);
+
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
 	i40e_clear_interrupt_scheme(pf);
+	rtnl_unlock();
 
 	if (system_state == SYSTEM_POWER_OFF) {
 		pci_wake_from_d3(pdev, pf->wol_en);
@@ -14394,8 +15650,7 @@
  **/
 static int __maybe_unused i40e_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_pf *pf = dev_get_drvdata(dev);
 	struct i40e_hw *hw = &pf->hw;
 
 	/* If we're already suspended, then there is nothing to do */
@@ -14445,8 +15700,7 @@
  **/
 static int __maybe_unused i40e_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_pf *pf = dev_get_drvdata(dev);
 	int err;
 
 	/* If we're not suspended, then there is nothing to do */
@@ -14463,7 +15717,7 @@
 	 */
 	err = i40e_restore_interrupt_scheme(pf);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
 			err);
 	}
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 0299e5b..e4d8d20 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c

@@ -322,6 +322,77 @@
 }
 
 /**
+ * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location
+ * @hw: pointer to the HW structure
+ * @module_ptr: Pointer to module in words with respect to NVM beginning
+ * @offset: offset in words from module start
+ * @words_data_size: Words to read from NVM
+ * @data_ptr: Pointer to memory location where resulting buffer will be stored
+ **/
+i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
+				      u8 module_ptr, u16 offset,
+				      u16 words_data_size,
+				      u16 *data_ptr)
+{
+	i40e_status status;
+	u16 ptr_value = 0;
+	u32 flat_offset;
+
+	if (module_ptr != 0) {
+		status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm word failed.Error code: %d.\n",
+				   status);
+			return I40E_ERR_NVM;
+		}
+	}
+#define I40E_NVM_INVALID_PTR_VAL 0x7FFF
+#define I40E_NVM_INVALID_VAL 0xFFFF
+
+	/* Pointer not initialized */
+	if (ptr_value == I40E_NVM_INVALID_PTR_VAL ||
+	    ptr_value == I40E_NVM_INVALID_VAL)
+		return I40E_ERR_BAD_PTR;
+
+	/* Check whether the module is in SR mapped area or outside */
+	if (ptr_value & I40E_PTR_TYPE) {
+		/* Pointer points outside of the Shared RAM mapped area */
+		ptr_value &= ~I40E_PTR_TYPE;
+
+		/* PtrValue in 4kB units, need to convert to words */
+		ptr_value /= 2;
+		flat_offset = ((u32)ptr_value * 0x1000) + (u32)offset;
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (!status) {
+			status = i40e_aq_read_nvm(hw, 0, 2 * flat_offset,
+						  2 * words_data_size,
+						  data_ptr, true, NULL);
+			i40e_release_nvm(hw);
+			if (status) {
+				i40e_debug(hw, I40E_DEBUG_ALL,
+					   "Reading nvm aq failed.Error code: %d.\n",
+					   status);
+				return I40E_ERR_NVM;
+			}
+		} else {
+			return I40E_ERR_NVM;
+		}
+	} else {
+		/* Read from the Shadow RAM */
+		status = i40e_read_nvm_buffer(hw, ptr_value + offset,
+					      &words_data_size, data_ptr);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm buffer failed.Error code: %d.\n",
+				   status);
+		}
+	}
+
+	return status;
+}
+
+/**
  * i40e_read_nvm_buffer_srctl - Reads Shadow RAM buffer via SRCTL register
  * @hw: pointer to the HW structure
  * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
@@ -430,6 +501,36 @@
 }
 
 /**
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+				 u16 *words, u16 *data)
+{
+	i40e_status ret_code = 0;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (!ret_code) {
+			ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
+							   data);
+			i40e_release_nvm(hw);
+		}
+	} else {
+		ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
+	}
+
+	return ret_code;
+}
+
+/**
  * i40e_write_nvm_aq - Writes Shadow RAM.
  * @hw: pointer to the HW structure.
  * @module_pointer: module pointer location in words from the NVM beginning
@@ -578,11 +679,10 @@
 	__le16 le_sum;
 
 	ret_code = i40e_calc_nvm_checksum(hw, &checksum);
-	if (!ret_code) {
-		le_sum = cpu_to_le16(checksum);
+	le_sum = cpu_to_le16(checksum);
+	if (!ret_code)
 		ret_code = i40e_write_nvm_aq(hw, 0x00, I40E_SR_SW_CHECKSUM_WORD,
 					     1, &le_sum, true);
-	}
 
 	return ret_code;
 }

diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index a07574b..c302ef2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h

@@ -18,7 +18,10 @@
  * actual OS primitives
  */
 
-#define hw_dbg(hw, S, A...)	do {} while (0)
+#define hw_dbg(hw, S, A...)							\
+do {										\
+	dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A);		\
+} while (0)
 
 #define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
 #define rd32(a, reg)		readl((a)->hw_addr + (reg))

diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index e08d754..5250441 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h

@@ -203,14 +203,18 @@
 i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
 				bool enable_update,
 				struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+		     struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+			      bool persist,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_dcb_parameters(struct i40e_hw *hw,
 				       bool dcb_enable,
 				       struct i40e_asq_cmd_details
 				       *cmd_details);
-i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
-				struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+			       struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
 				       void *buff, u16 buff_size,
 				       struct i40e_asq_cmd_details *cmd_details);
@@ -311,6 +315,12 @@
 void i40e_release_nvm(struct i40e_hw *hw);
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 					 u16 *data);
+i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
+				      u8 module_ptr, u16 offset,
+				      u16 words_data_size,
+				      u16 *data_ptr);
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+				 u16 *words, u16 *data);
 i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
 i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 						 u16 *checksum);
@@ -322,6 +332,8 @@
 void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
 void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
 
+i40e_status i40e_set_mac_type(struct i40e_hw *hw);
+
 extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
 
 static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
@@ -346,6 +358,10 @@
 		return VIRTCHNL_LINK_SPEED_100MB;
 	case I40E_LINK_SPEED_1GB:
 		return VIRTCHNL_LINK_SPEED_1GB;
+	case I40E_LINK_SPEED_2_5GB:
+		return VIRTCHNL_LINK_SPEED_2_5GB;
+	case I40E_LINK_SPEED_5GB:
+		return VIRTCHNL_LINK_SPEED_5GB;
 	case I40E_LINK_SPEED_10GB:
 		return VIRTCHNL_LINK_SPEED_10GB;
 	case I40E_LINK_SPEED_40GB:
@@ -429,10 +445,16 @@
 struct i40e_generic_seg_header *
 i40e_find_segment_in_package(u32 segment_type,
 			     struct i40e_package_header *pkg_header);
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+			     struct i40e_profile_segment *profile);
 enum i40e_status_code
 i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
 		   u32 track_id);
 enum i40e_status_code
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+		      u32 track_id);
+enum i40e_status_code
 i40e_add_pinfo_to_list(struct i40e_hw *hw,
 		       struct i40e_profile_segment *profile,
 		       u8 *profile_info_sec, u32 track_id);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 35f2866..9bf1ad4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c

@@ -28,19 +28,23 @@
  * i40e_ptp_read - Read the PHC time from the device
  * @pf: Board private structure
  * @ts: timespec structure to hold the current time value
+ * @sts: structure to hold the system time before and after reading the PHC
  *
  * This function reads the PRTTSYN_TIME registers and stores them in a
  * timespec. However, since the registers are 64 bits of nanoseconds, we must
  * convert the result to a timespec before we can return.
  **/
-static void i40e_ptp_read(struct i40e_pf *pf, struct timespec64 *ts)
+static void i40e_ptp_read(struct i40e_pf *pf, struct timespec64 *ts,
+			  struct ptp_system_timestamp *sts)
 {
 	struct i40e_hw *hw = &pf->hw;
 	u32 hi, lo;
 	u64 ns;
 
 	/* The timer latches on the lowest register read. */
+	ptp_read_system_prets(sts);
 	lo = rd32(hw, I40E_PRTTSYN_TIME_L);
+	ptp_read_system_postts(sts);
 	hi = rd32(hw, I40E_PRTTSYN_TIME_H);
 
 	ns = (((u64)hi) << 32) | lo;
@@ -136,18 +140,18 @@
  * @ptp: The PTP clock structure
  * @delta: Offset in nanoseconds to adjust the PHC time by
  *
- * Adjust the frequency of the PHC by the indicated parts per billion from the
- * base frequency.
+ * Adjust the current clock time by a delta specified in nanoseconds.
  **/
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	struct timespec64 now;
+	struct timespec64 now, then;
 
+	then = ns_to_timespec64(delta);
 	mutex_lock(&pf->tmreg_lock);
 
-	i40e_ptp_read(pf, &now);
-	timespec64_add_ns(&now, delta);
+	i40e_ptp_read(pf, &now, NULL);
+	now = timespec64_add(now, then);
 	i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
 	mutex_unlock(&pf->tmreg_lock);
@@ -156,19 +160,21 @@
 }
 
 /**
- * i40e_ptp_gettime - Get the time of the PHC
+ * i40e_ptp_gettimex - Get the time of the PHC
  * @ptp: The PTP clock structure
  * @ts: timespec structure to hold the current time value
+ * @sts: structure to hold the system time before and after reading the PHC
  *
  * Read the device clock and return the correct value on ns, after converting it
  * into a timespec struct.
  **/
-static int i40e_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
 
 	mutex_lock(&pf->tmreg_lock);
-	i40e_ptp_read(pf, ts);
+	i40e_ptp_read(pf, ts, sts);
 	mutex_unlock(&pf->tmreg_lock);
 
 	return 0;
@@ -694,14 +700,15 @@
 	if (!IS_ERR_OR_NULL(pf->ptp_clock))
 		return 0;
 
-	strncpy(pf->ptp_caps.name, i40e_driver_name, sizeof(pf->ptp_caps.name));
+	strlcpy(pf->ptp_caps.name, i40e_driver_name,
+		sizeof(pf->ptp_caps.name) - 1);
 	pf->ptp_caps.owner = THIS_MODULE;
 	pf->ptp_caps.max_adj = 999999999;
 	pf->ptp_caps.n_ext_ts = 0;
 	pf->ptp_caps.pps = 0;
 	pf->ptp_caps.adjfreq = i40e_ptp_adjfreq;
 	pf->ptp_caps.adjtime = i40e_ptp_adjtime;
-	pf->ptp_caps.gettime64 = i40e_ptp_gettime;
+	pf->ptp_caps.gettimex64 = i40e_ptp_gettimex;
 	pf->ptp_caps.settime64 = i40e_ptp_settime;
 	pf->ptp_caps.enable = i40e_ptp_feature_enable;
 
@@ -717,16 +724,68 @@
 	pf->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
 	pf->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 
+	/* Set the previous "reset" time to the current Kernel clock time */
+	ktime_get_real_ts64(&pf->ptp_prev_hw_time);
+	pf->ptp_reset_start = ktime_get();
+
 	return 0;
 }
 
 /**
+ * i40e_ptp_save_hw_time - Save the current PTP time as ptp_prev_hw_time
+ * @pf: Board private structure
+ *
+ * Read the current PTP time and save it into pf->ptp_prev_hw_time. This should
+ * be called at the end of preparing to reset, just before hardware reset
+ * occurs, in order to preserve the PTP time as close as possible across
+ * resets.
+ */
+void i40e_ptp_save_hw_time(struct i40e_pf *pf)
+{
+	/* don't try to access the PTP clock if it's not enabled */
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return;
+
+	i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL);
+	/* Get a monotonic starting time for this reset */
+	pf->ptp_reset_start = ktime_get();
+}
+
+/**
+ * i40e_ptp_restore_hw_time - Restore the ptp_prev_hw_time + delta to PTP regs
+ * @pf: Board private structure
+ *
+ * Restore the PTP hardware clock registers. We previously cached the PTP
+ * hardware time as pf->ptp_prev_hw_time. To be as accurate as possible,
+ * update this value based on the time delta since the time was saved, using
+ * CLOCK_MONOTONIC (via ktime_get()) to calculate the time difference.
+ *
+ * This ensures that the hardware clock is restored to nearly what it should
+ * have been if a reset had not occurred.
+ */
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf)
+{
+	ktime_t delta = ktime_sub(ktime_get(), pf->ptp_reset_start);
+
+	/* Update the previous HW time with the ktime delta */
+	timespec64_add_ns(&pf->ptp_prev_hw_time, ktime_to_ns(delta));
+
+	/* Restore the hardware clock registers */
+	i40e_ptp_settime(&pf->ptp_caps, &pf->ptp_prev_hw_time);
+}
+
+/**
  * i40e_ptp_init - Initialize the 1588 support after device probe or reset
  * @pf: Board private structure
  *
  * This function sets device up for 1588 support. The first time it is run, it
  * will create a PHC clock device. It does not create a clock device if one
  * already exists. It also reconfigures the device after a reset.
+ *
+ * The first time a clock is created, i40e_ptp_create_clock will set
+ * pf->ptp_prev_hw_time to the current system time. During resets, it is
+ * expected that this timespec will be set to the last known PTP clock time,
+ * in order to preserve the clock time as close as possible across a reset.
  **/
 void i40e_ptp_init(struct i40e_pf *pf)
 {
@@ -758,7 +817,6 @@
 		dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
 			__func__);
 	} else if (pf->ptp_clock) {
-		struct timespec64 ts;
 		u32 regval;
 
 		if (pf->hw.debug_mask & I40E_DEBUG_LAN)
@@ -779,9 +837,8 @@
 		/* reset timestamping mode */
 		i40e_ptp_set_timestamp_mode(pf, &pf->tstamp_config);
 
-		/* Set the clock value. */
-		ts = ktime_to_timespec64(ktime_get_real());
-		i40e_ptp_settime(&pf->ptp_caps, &ts);
+		/* Restore the clock time based on last known value */
+		i40e_ptp_restore_hw_time(pf);
 	}
 }
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 52e3680..d35d690 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h

@@ -58,7 +58,7 @@
 #define I40E_PF_ARQLEN_ARQCRIT_SHIFT 30
 #define I40E_PF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_PF_ARQT 0x00080480 /* Reset: EMPR */
 #define I40E_PF_ARQT_ARQT_SHIFT 0
 #define I40E_PF_ARQT_ARQT_MASK I40E_MASK(0x3FF, I40E_PF_ARQT_ARQT_SHIFT)
@@ -81,7 +81,7 @@
 #define I40E_PF_ATQLEN_ATQCRIT_SHIFT 30
 #define I40E_PF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_PF_ATQT 0x00080400 /* Reset: EMPR */
 #define I40E_PF_ATQT_ATQT_SHIFT 0
 #define I40E_PF_ATQT_ATQT_MASK I40E_MASK(0x3FF, I40E_PF_ATQT_ATQT_SHIFT)
@@ -108,7 +108,7 @@
 #define I40E_VF_ARQLEN_ARQCRIT_SHIFT 30
 #define I40E_VF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT(_VF) (0x00082C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ARQT_MAX_INDEX 127
 #define I40E_VF_ARQT_ARQT_SHIFT 0
@@ -136,7 +136,7 @@
 #define I40E_VF_ATQLEN_ATQCRIT_SHIFT 30
 #define I40E_VF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT(_VF) (0x00082800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ATQT_MAX_INDEX 127
 #define I40E_VF_ATQT_ATQT_SHIFT 0
@@ -259,7 +259,7 @@
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
 #define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31
-#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1u, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
 #define I40E_PRTDCB_RPPMC 0x001223A0 /* Reset: CORER */
 #define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0
 #define I40E_PRTDCB_RPPMC_LANRPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
@@ -363,6 +363,12 @@
 #define I40E_GL_FWSTS_FWRI_MASK I40E_MASK(0x1, I40E_GL_FWSTS_FWRI_SHIFT)
 #define I40E_GL_FWSTS_FWS1B_SHIFT 16
 #define I40E_GL_FWSTS_FWS1B_MASK I40E_MASK(0xFF, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK I40E_MASK(0x30, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK I40E_MASK(0x31, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_TRANSITION_MASK I40E_MASK(0x32, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_NVM_MASK I40E_MASK(0x33, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_X722_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK I40E_MASK(0xB, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_X722_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK I40E_MASK(0xC, I40E_GL_FWSTS_FWS1B_SHIFT)
 #define I40E_GLGEN_CLKSTAT 0x000B8184 /* Reset: POR */
 #define I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT 0
 #define I40E_GLGEN_CLKSTAT_CLKMODE_MASK I40E_MASK(0x1, I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT)
@@ -503,7 +509,7 @@
 #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
 #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
 #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
-#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1u, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
 #define I40E_GLGEN_MSRWD(_i) (0x0008819C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MSRWD_MAX_INDEX 3
 #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
@@ -1242,14 +1248,14 @@
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
 #define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
-#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1u, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
 #define I40E_PFLAN_QALLOC 0x001C0400 /* Reset: CORER */
 #define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
 #define I40E_PFLAN_QALLOC_FIRSTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_LASTQ_SHIFT 16
 #define I40E_PFLAN_QALLOC_LASTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_LASTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_VALID_SHIFT 31
-#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1, I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PFLAN_QALLOC_VALID_SHIFT)
 #define I40E_QRX_ENA(_Q) (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
 #define I40E_QRX_ENA_MAX_INDEX 1535
 #define I40E_QRX_ENA_QENA_REQ_SHIFT 0
@@ -1658,7 +1664,7 @@
 #define I40E_GLNVM_SRCTL_START_SHIFT 30
 #define I40E_GLNVM_SRCTL_START_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_START_SHIFT)
 #define I40E_GLNVM_SRCTL_DONE_SHIFT 31
-#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1u, I40E_GLNVM_SRCTL_DONE_SHIFT)
 #define I40E_GLNVM_SRDATA 0x000B6114 /* Reset: POR */
 #define I40E_GLNVM_SRDATA_WRDATA_SHIFT 0
 #define I40E_GLNVM_SRDATA_WRDATA_MASK I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_WRDATA_SHIFT)
@@ -3025,7 +3031,7 @@
 #define I40E_PF_VT_PFALLOC_LASTVF_SHIFT 8
 #define I40E_PF_VT_PFALLOC_LASTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
 #define I40E_PF_VT_PFALLOC_VALID_SHIFT 31
-#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1, I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PF_VT_PFALLOC_VALID_SHIFT)
 #define I40E_VP_MDET_RX(_VF) (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
 #define I40E_VP_MDET_RX_MAX_INDEX 127
 #define I40E_VP_MDET_RX_VALID_SHIFT 0
@@ -3161,7 +3167,7 @@
 #define I40E_VF_ARQLEN1_ARQCRIT_SHIFT 30
 #define I40E_VF_ARQLEN1_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT1 0x00007000 /* Reset: EMPR */
 #define I40E_VF_ARQT1_ARQT_SHIFT 0
 #define I40E_VF_ARQT1_ARQT_MASK I40E_MASK(0x3FF, I40E_VF_ARQT1_ARQT_SHIFT)
@@ -3184,7 +3190,7 @@
 #define I40E_VF_ATQLEN1_ATQCRIT_SHIFT 30
 #define I40E_VF_ATQLEN1_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1u, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT1 0x00008400 /* Reset: EMPR */
 #define I40E_VF_ATQT1_ATQT_SHIFT 0
 #define I40E_VF_ATQT1_ATQT_MASK I40E_MASK(0x3FF, I40E_VF_ATQT1_ATQT_SHIFT)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b5042d1..e3f29dc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c

@@ -2,22 +2,13 @@
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/prefetch.h>
-#include <net/busy_poll.h>
 #include <linux/bpf_trace.h>
 #include <net/xdp.h>
 #include "i40e.h"
 #include "i40e_trace.h"
 #include "i40e_prototype.h"
-
-static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
-				u32 td_tag)
-{
-	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
-			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
-			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
-			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
-			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
-}
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
 
 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 /**
@@ -536,8 +527,8 @@
  * This is used to verify if the FD programming or invalidation
  * requested by SW to the HW is successful or not and take actions accordingly.
  **/
-static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
-				  union i40e_rx_desc *rx_desc, u8 prog_id)
+void i40e_fd_handle_status(struct i40e_ring *rx_ring,
+			   union i40e_rx_desc *rx_desc, u8 prog_id)
 {
 	struct i40e_pf *pf = rx_ring->vsi->back;
 	struct pci_dev *pdev = pf->pdev;
@@ -644,13 +635,18 @@
 	unsigned long bi_size;
 	u16 i;
 
-	/* ring already cleared, nothing to do */
-	if (!tx_ring->tx_bi)
-		return;
+	if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+		i40e_xsk_clean_tx_ring(tx_ring);
+	} else {
+		/* ring already cleared, nothing to do */
+		if (!tx_ring->tx_bi)
+			return;
 
-	/* Free all the Tx ring sk_buffs */
-	for (i = 0; i < tx_ring->count; i++)
-		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+		/* Free all the Tx ring sk_buffs */
+		for (i = 0; i < tx_ring->count; i++)
+			i40e_unmap_and_free_tx_resource(tx_ring,
+							&tx_ring->tx_bi[i]);
+	}
 
 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
 	memset(tx_ring->tx_bi, 0, bi_size);
@@ -767,8 +763,6 @@
 	}
 }
 
-#define WB_STRIDE 4
-
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
  * @vsi: the VSI we care about
@@ -780,7 +774,7 @@
 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 			      struct i40e_ring *tx_ring, int napi_budget)
 {
-	u16 i = tx_ring->next_to_clean;
+	int i = tx_ring->next_to_clean;
 	struct i40e_tx_buffer *tx_buf;
 	struct i40e_tx_desc *tx_head;
 	struct i40e_tx_desc *tx_desc;
@@ -873,27 +867,8 @@
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->stats.bytes += total_bytes;
-	tx_ring->stats.packets += total_packets;
-	u64_stats_update_end(&tx_ring->syncp);
-	tx_ring->q_vector->tx.total_bytes += total_bytes;
-	tx_ring->q_vector->tx.total_packets += total_packets;
-
-	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
-		/* check to see if there are < 4 descriptors
-		 * waiting to be written back, then kick the hardware to force
-		 * them to be written back in case we stay in NAPI.
-		 * In this mode on X722 we do not enable Interrupt.
-		 */
-		unsigned int j = i40e_get_tx_pending(tx_ring, false);
-
-		if (budget &&
-		    ((j / WB_STRIDE) == 0) && (j > 0) &&
-		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
-		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
-			tx_ring->arm_wb = true;
-	}
+	i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
+	i40e_arm_wb(tx_ring, vsi, budget);
 
 	if (ring_is_xdp(tx_ring))
 		return !!budget;
@@ -1244,6 +1219,11 @@
 	new_buff->page		= old_buff->page;
 	new_buff->page_offset	= old_buff->page_offset;
 	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+
+	rx_ring->rx_stats.page_reuse_count++;
+
+	/* clear contents of buffer_info */
+	old_buff->page = NULL;
 }
 
 /**
@@ -1266,7 +1246,7 @@
 }
 
 /**
- * i40e_clean_programming_status - clean the programming status descriptor
+ * i40e_clean_programming_status - try clean the programming status descriptor
  * @rx_ring: the rx ring that has this descriptor
  * @rx_desc: the rx descriptor written back by HW
  * @qw: qword representing status_error_len in CPU ordering
@@ -1275,15 +1255,22 @@
  * status being successful or not and take actions accordingly. FCoE should
  * handle its context/filter programming/invalidation status and take actions.
  *
+ * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
  **/
-static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
-					  union i40e_rx_desc *rx_desc,
-					  u64 qw)
+struct i40e_rx_buffer *i40e_clean_programming_status(
+	struct i40e_ring *rx_ring,
+	union i40e_rx_desc *rx_desc,
+	u64 qw)
 {
 	struct i40e_rx_buffer *rx_buffer;
-	u32 ntc = rx_ring->next_to_clean;
+	u32 ntc;
 	u8 id;
 
+	if (!i40e_rx_is_programming_status(qw))
+		return NULL;
+
+	ntc = rx_ring->next_to_clean;
+
 	/* fetch, update, and store next to clean */
 	rx_buffer = &rx_ring->rx_bi[ntc++];
 	ntc = (ntc < rx_ring->count) ? ntc : 0;
@@ -1291,18 +1278,13 @@
 
 	prefetch(I40E_RX_DESC(rx_ring, ntc));
 
-	/* place unused page back on the ring */
-	i40e_reuse_rx_page(rx_ring, rx_buffer);
-	rx_ring->rx_stats.page_reuse_count++;
-
-	/* clear contents of buffer_info */
-	rx_buffer->page = NULL;
-
 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
 
 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
 		i40e_fd_handle_status(rx_ring, rx_desc, id);
+
+	return rx_buffer;
 }
 
 /**
@@ -1372,6 +1354,11 @@
 		rx_ring->skb = NULL;
 	}
 
+	if (rx_ring->xsk_umem) {
+		i40e_xsk_clean_rx_ring(rx_ring);
+		goto skip_free;
+	}
+
 	/* Free all the Rx ring sk_buffs */
 	for (i = 0; i < rx_ring->count; i++) {
 		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
@@ -1400,6 +1387,7 @@
 		rx_bi->page_offset = 0;
 	}
 
+skip_free:
 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
 	memset(rx_ring->rx_bi, 0, bi_size);
 
@@ -1492,7 +1480,7 @@
  * @rx_ring: ring to bump
  * @val: new head index
  **/
-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 {
 	rx_ring->next_to_use = val;
 
@@ -1571,24 +1559,6 @@
 }
 
 /**
- * i40e_receive_skb - Send a completed packet up the stack
- * @rx_ring:  rx ring in play
- * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
- **/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
-			     struct sk_buff *skb, u16 vlan_tag)
-{
-	struct i40e_q_vector *q_vector = rx_ring->q_vector;
-
-	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-	    (vlan_tag & VLAN_VID_MASK))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-
-	napi_gro_receive(&q_vector->napi, skb);
-}
-
-/**
  * i40e_alloc_rx_buffers - Replace used receive buffers
  * @rx_ring: ring to place buffers on
  * @cleaned_count: number of buffers to replace
@@ -1804,10 +1774,8 @@
  * order to populate the hash, checksum, VLAN, protocol, and
  * other fields within the skb.
  **/
-static inline
 void i40e_process_skb_fields(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc, struct sk_buff *skb,
-			     u8 rx_ptype)
+			     union i40e_rx_desc *rx_desc, struct sk_buff *skb)
 {
 	u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 	u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
@@ -1815,6 +1783,8 @@
 	u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
 	u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
 		   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
+	u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+		      I40E_RXD_QW1_PTYPE_SHIFT;
 
 	if (unlikely(tsynvalid))
 		i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
@@ -1825,6 +1795,13 @@
 
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
+	if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
+		u16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       le16_to_cpu(vlan_tag));
+	}
+
 	/* modifies the skb - consumes the enet header */
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
@@ -2058,7 +2035,8 @@
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > I40E_RX_HDR_SIZE)
-		headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
+		headlen = eth_get_headlen(skb->dev, xdp->data,
+					  I40E_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	memcpy(__skb_put(skb, headlen), xdp->data,
@@ -2152,7 +2130,6 @@
 	if (i40e_can_reuse_rx_page(rx_buffer)) {
 		/* hand second half of page back to the ring */
 		i40e_reuse_rx_page(rx_ring, rx_buffer);
-		rx_ring->rx_stats.page_reuse_count++;
 	} else {
 		/* we are not reusing the buffer so unmap it */
 		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
@@ -2160,10 +2137,9 @@
 				     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buffer->page,
 					rx_buffer->pagecnt_bias);
+		/* clear contents of buffer_info */
+		rx_buffer->page = NULL;
 	}
-
-	/* clear contents of buffer_info */
-	rx_buffer->page = NULL;
 }
 
 /**
@@ -2199,16 +2175,10 @@
 	return true;
 }
 
-#define I40E_XDP_PASS		0
-#define I40E_XDP_CONSUMED	BIT(0)
-#define I40E_XDP_TX		BIT(1)
-#define I40E_XDP_REDIR		BIT(2)
-
 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
 			      struct i40e_ring *xdp_ring);
 
-static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
-				 struct i40e_ring *xdp_ring)
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
 {
 	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
 
@@ -2287,7 +2257,13 @@
 #endif
 }
 
-static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+/**
+ * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ **/
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
 {
 	/* Force memory writes to complete before letting h/w
 	 * know there are new descriptors to fetch.
@@ -2297,6 +2273,48 @@
 }
 
 /**
+ * i40e_update_rx_stats - Update Rx ring statistics
+ * @rx_ring: rx descriptor ring
+ * @total_rx_bytes: number of bytes received
+ * @total_rx_packets: number of packets received
+ *
+ * This function updates the Rx ring statistics.
+ **/
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+			  unsigned int total_rx_bytes,
+			  unsigned int total_rx_packets)
+{
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	rx_ring->q_vector->rx.total_packets += total_rx_packets;
+	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+}
+
+/**
+ * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ **/
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
+{
+	if (xdp_res & I40E_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & I40E_XDP_TX) {
+		struct i40e_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+		i40e_xdp_ring_update_tail(xdp_ring);
+	}
+}
+
+/**
  * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
@@ -2323,8 +2341,6 @@
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
 		unsigned int size;
-		u16 vlan_tag;
-		u8 rx_ptype;
 		u64 qword;
 
 		/* return some buffers to hardware, one at a time is too slow */
@@ -2349,11 +2365,14 @@
 		 */
 		dma_rmb();
 
-		if (unlikely(i40e_rx_is_programming_status(qword))) {
-			i40e_clean_programming_status(rx_ring, rx_desc, qword);
+		rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
+							  qword);
+		if (unlikely(rx_buffer)) {
+			i40e_reuse_rx_page(rx_ring, rx_buffer);
 			cleaned_count++;
 			continue;
 		}
+
 		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
 		if (!size)
@@ -2414,42 +2433,21 @@
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 
-		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-			   I40E_RXD_QW1_PTYPE_SHIFT;
-
 		/* populate checksum, VLAN, and protocol */
-		i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
-
-		vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
-			   le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
+		i40e_process_skb_fields(rx_ring, rx_desc, skb);
 
 		i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
-		i40e_receive_skb(rx_ring, skb, vlan_tag);
+		napi_gro_receive(&rx_ring->q_vector->napi, skb);
 		skb = NULL;
 
 		/* update budget accounting */
 		total_rx_packets++;
 	}
 
-	if (xdp_xmit & I40E_XDP_REDIR)
-		xdp_do_flush_map();
-
-	if (xdp_xmit & I40E_XDP_TX) {
-		struct i40e_ring *xdp_ring =
-			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-
-		i40e_xdp_ring_update_tail(xdp_ring);
-	}
-
+	i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
 	rx_ring->skb = skb;
 
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->stats.packets += total_rx_packets;
-	rx_ring->stats.bytes += total_rx_bytes;
-	u64_stats_update_end(&rx_ring->syncp);
-	rx_ring->q_vector->rx.total_packets += total_rx_packets;
-	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+	i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
 
 	/* guarantee a trip back through this routine if there was a failure */
 	return failure ? budget : (int)total_rx_packets;
@@ -2587,7 +2585,11 @@
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
-		if (!i40e_clean_tx_irq(vsi, ring, budget)) {
+		bool wd = ring->xsk_umem ?
+			  i40e_clean_xdp_tx_irq(vsi, ring, budget) :
+			  i40e_clean_tx_irq(vsi, ring, budget);
+
+		if (!wd) {
 			clean_complete = false;
 			continue;
 		}
@@ -2605,7 +2607,9 @@
 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
 
 	i40e_for_each_ring(ring, q_vector->rx) {
-		int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
+		int cleaned = ring->xsk_umem ?
+			      i40e_clean_rx_irq_zc(ring, budget_per_ring) :
+			      i40e_clean_rx_irq(ring, budget_per_ring);
 
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
@@ -2645,10 +2649,11 @@
 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
 		q_vector->arm_wb_state = false;
 
-	/* Work is done so exit the polling mode and re-enable the interrupt */
-	napi_complete_done(napi, work_done);
-
-	i40e_update_enable_itr(vsi, q_vector);
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		i40e_update_enable_itr(vsi, q_vector);
 
 	return min(work_done, budget - 1);
 }
@@ -3257,7 +3262,7 @@
  **/
 bool __i40e_chk_linearize(struct sk_buff *skb)
 {
-	const struct skb_frag_struct *frag, *stale;
+	const skb_frag_t *frag, *stale;
 	int nr_frags, sum;
 
 	/* no need to check if number of frags is less than 7 */
@@ -3301,7 +3306,7 @@
 		 * descriptor associated with the fragment.
 		 */
 		if (stale_size > I40E_MAX_DATA_PER_TXD) {
-			int align_pad = -(stale->page_offset) &
+			int align_pad = -(skb_frag_off(stale)) &
 					(I40E_MAX_READ_REQ_SIZE - 1);
 
 			sum -= align_pad;
@@ -3344,7 +3349,7 @@
 {
 	unsigned int data_len = skb->data_len;
 	unsigned int size = skb_headlen(skb);
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	struct i40e_tx_buffer *tx_bi;
 	struct i40e_tx_desc *tx_desc;
 	u16 i = tx_ring->next_to_use;
@@ -3451,6 +3456,8 @@
 	tx_desc->cmd_type_offset_bsz =
 			build_ctob(td_cmd, td_offset, size, td_tag);
 
+	skb_tx_timestamp(skb);
+
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
 	 *
@@ -3463,13 +3470,8 @@
 	first->next_to_watch = tx_desc;
 
 	/* notify HW of packet */
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return 0;
@@ -3504,6 +3506,7 @@
 	u16 i = xdp_ring->next_to_use;
 	struct i40e_tx_buffer *tx_bi;
 	struct i40e_tx_desc *tx_desc;
+	void *data = xdpf->data;
 	u32 size = xdpf->len;
 	dma_addr_t dma;
 
@@ -3511,8 +3514,7 @@
 		xdp_ring->tx_stats.tx_busy++;
 		return I40E_XDP_CONSUMED;
 	}
-
-	dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
+	dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
 	if (dma_mapping_error(xdp_ring->dev, dma))
 		return I40E_XDP_CONSUMED;
 
@@ -3630,8 +3632,6 @@
 	if (tsyn)
 		tx_flags |= I40E_TX_FLAGS_TSYN;
 
-	skb_tx_timestamp(skb);
-
 	/* always enable CRC insertion offload */
 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
 
@@ -3705,6 +3705,7 @@
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	unsigned int queue_index = smp_processor_id();
 	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
 	struct i40e_ring *xdp_ring;
 	int drops = 0;
 	int i;
@@ -3712,7 +3713,8 @@
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
 		return -ENETDOWN;
 
-	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+	    test_bit(__I40E_CONFIG_BUSY, pf->state))
 		return -ENXIO;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index bb04f6a..36d37f3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h

@@ -296,13 +296,17 @@
 
 struct i40e_rx_buffer {
 	dma_addr_t dma;
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
-#else
-	__u16 page_offset;
-#endif
-	__u16 pagecnt_bias;
+	union {
+		struct {
+			struct page *page;
+			__u32 page_offset;
+			__u16 pagecnt_bias;
+		};
+		struct {
+			void *addr;
+			u64 handle;
+		};
+	};
 };
 
 struct i40e_queue_stats {
@@ -414,6 +418,8 @@
 
 	struct i40e_channel *ch;
 	struct xdp_rxq_info xdp_rxq;
+	struct xdp_umem *xsk_umem;
+	struct zero_copy_allocator zca; /* ZC allocator anchor */
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
@@ -515,7 +521,7 @@
  **/
 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb)
 {
-	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
 	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
 	int count = 0, size = skb_headlen(skb);
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
new file mode 100644
index 0000000..8af0e99
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h

@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef I40E_TXRX_COMMON_
+#define I40E_TXRX_COMMON_
+
+void i40e_fd_handle_status(struct i40e_ring *rx_ring,
+			   union i40e_rx_desc *rx_desc, u8 prog_id);
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
+struct i40e_rx_buffer *i40e_clean_programming_status(
+	struct i40e_ring *rx_ring,
+	union i40e_rx_desc *rx_desc,
+	u64 qw);
+void i40e_process_skb_fields(struct i40e_ring *rx_ring,
+			     union i40e_rx_desc *rx_desc, struct sk_buff *skb);
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+			  unsigned int total_rx_bytes,
+			  unsigned int total_rx_packets);
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res);
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val);
+
+#define I40E_XDP_PASS		0
+#define I40E_XDP_CONSUMED	BIT(0)
+#define I40E_XDP_TX		BIT(1)
+#define I40E_XDP_REDIR		BIT(2)
+
+/**
+ * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword
+ **/
+static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
+				u32 td_tag)
+{
+	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
+			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
+			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
+			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
+}
+
+/**
+ * i40e_update_tx_stats - Update the egress statistics for the Tx ring
+ * @tx_ring: Tx ring to update
+ * @total_packet: total packets sent
+ * @total_bytes: total bytes sent
+ **/
+static inline void i40e_update_tx_stats(struct i40e_ring *tx_ring,
+					unsigned int total_packets,
+					unsigned int total_bytes)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_packets += total_packets;
+}
+
+#define WB_STRIDE 4
+
+/**
+ * i40e_arm_wb - (Possibly) arms Tx write-back
+ * @tx_ring: Tx ring to update
+ * @vsi: the VSI
+ * @budget: the NAPI budget left
+ **/
+static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
+			       struct i40e_vsi *vsi,
+			       int budget)
+{
+	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+		/* check to see if there are < 4 descriptors
+		 * waiting to be written back, then kick the hardware to force
+		 * them to be written back in case we stay in NAPI.
+		 * In this mode on X722 we do not enable Interrupt.
+		 */
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
+
+		if (budget &&
+		    ((j / WB_STRIDE) == 0) && j > 0 &&
+		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
+		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+			tx_ring->arm_wb = true;
+	}
+}
+
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
+
+#endif /* I40E_TXRX_COMMON_ */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 7df969c..b43ec94 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h

@@ -252,6 +252,12 @@
 					     I40E_PHY_TYPE_OFFSET)
 #define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
 					     I40E_PHY_TYPE_OFFSET)
+/* Offset for 2.5G/5G PHY Types value to bit number conversion */
+#define I40E_PHY_TYPE_OFFSET2 (-10)
+#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T + \
+					     I40E_PHY_TYPE_OFFSET2)
+#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T + \
+					     I40E_PHY_TYPE_OFFSET2)
 #define I40E_HW_CAP_MAX_GPIO			30
 /* Capabilities of a PF or a VF or the whole device */
 struct i40e_hw_capabilities {
@@ -437,6 +443,7 @@
 #define I40E_MODULE_SFF_8472_COMP	0x5E
 #define I40E_MODULE_SFF_8472_SWAP	0x5C
 #define I40E_MODULE_SFF_ADDR_MODE	0x04
+#define I40E_MODULE_SFF_DDM_IMPLEMENTED 0x40
 #define I40E_MODULE_TYPE_QSFP_PLUS	0x0D
 #define I40E_MODULE_TYPE_QSFP28		0x11
 #define I40E_MODULE_QSFP_MAX_LEN	640
@@ -615,6 +622,9 @@
 #define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
 #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
 #define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
+#define I40E_HW_FLAG_FW_LLDP_STOPPABLE      BIT_ULL(4)
+#define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
+#define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
 	u64 flags;
 
 	/* Used in set switch config AQ command */
@@ -1308,6 +1318,7 @@
 #define I40E_SR_VPD_PTR				0x2F
 #define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR		0x3E
 #define I40E_SR_SW_CHECKSUM_WORD		0x3F
+#define I40E_SR_EMP_SR_SETTINGS_PTR		0x48
 
 /* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
 #define I40E_SR_VPD_MODULE_MAX_SIZE		1024
@@ -1526,6 +1537,8 @@
 struct i40e_metadata_segment {
 	struct i40e_generic_seg_header header;
 	struct i40e_ddp_version version;
+#define I40E_DDP_TRACKID_RDONLY		0
+#define I40E_DDP_TRACKID_INVALID	0xFFFFFFFF
 	u32 track_id;
 	char name[I40E_DDP_NAME_SIZE];
 };
@@ -1554,15 +1567,36 @@
 	struct {
 #define SECTION_TYPE_INFO	0x00000010
 #define SECTION_TYPE_MMIO	0x00000800
+#define SECTION_TYPE_RB_MMIO	0x00001800
 #define SECTION_TYPE_AQ		0x00000801
+#define SECTION_TYPE_RB_AQ	0x00001801
 #define SECTION_TYPE_NOTE	0x80000000
 #define SECTION_TYPE_NAME	0x80000001
+#define SECTION_TYPE_PROTO	0x80000002
+#define SECTION_TYPE_PCTYPE	0x80000003
+#define SECTION_TYPE_PTYPE	0x80000004
 		u32 type;
 		u32 offset;
 		u32 size;
 	} section;
 };
 
+struct i40e_profile_tlv_section_record {
+	u8 rtype;
+	u8 type;
+	u16 len;
+	u8 data[12];
+};
+
+/* Generic AQ section in proflie */
+struct i40e_profile_aq_section {
+	u16 opcode;
+	u16 flags;
+	u8  param[16];
+	u16 datalen;
+	u8  data[1];
+};
+
 struct i40e_profile_info {
 	u32 track_id;
 	struct i40e_ddp_version version;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index c6d24ea..3d24408 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

@@ -55,7 +55,12 @@
 
 	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	pfe.severity = PF_EVENT_SEVERITY_INFO;
-	if (vf->link_forced) {
+
+	/* Always report link is down if the VF queues aren't enabled */
+	if (!vf->queues_enabled) {
+		pfe.event_data.link_event.link_status = false;
+		pfe.event_data.link_event.link_speed = 0;
+	} else if (vf->link_forced) {
 		pfe.event_data.link_event.link_status = vf->link_up;
 		pfe.event_data.link_event.link_speed =
 			(vf->link_up ? VIRTCHNL_LINK_SPEED_40GB : 0);
@@ -65,6 +70,7 @@
 		pfe.event_data.link_event.link_speed =
 			i40e_virtchnl_link_speed(ls->link_speed);
 	}
+
 	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
 }
@@ -181,7 +187,7 @@
  * check for the valid queue id
  **/
 static inline bool i40e_vc_isvalid_queue_id(struct i40e_vf *vf, u16 vsi_id,
-					    u8 qid)
+					    u16 qid)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = i40e_find_vsi_from_id(pf, vsi_id);
@@ -196,7 +202,7 @@
  *
  * check for the valid vector id
  **/
-static inline bool i40e_vc_isvalid_vector_id(struct i40e_vf *vf, u8 vector_id)
+static inline bool i40e_vc_isvalid_vector_id(struct i40e_vf *vf, u32 vector_id)
 {
 	struct i40e_pf *pf = vf->pf;
 
@@ -440,15 +446,28 @@
 	struct virtchnl_iwarp_qv_info *qv_info;
 	u32 v_idx, i, reg_idx, reg;
 	u32 next_q_idx, next_q_type;
-	u32 msix_vf, size;
+	u32 msix_vf;
+	int ret = 0;
 
-	size = sizeof(struct virtchnl_iwarp_qvlist_info) +
-	       (sizeof(struct virtchnl_iwarp_qv_info) *
-						(qvlist_info->num_vectors - 1));
-	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
-	if (!vf->qvlist_info)
-		return -ENOMEM;
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
+	if (qvlist_info->num_vectors > msix_vf) {
+		dev_warn(&pf->pdev->dev,
+			 "Incorrect number of iwarp vectors %u. Maximum %u allowed.\n",
+			 qvlist_info->num_vectors,
+			 msix_vf);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = kzalloc(struct_size(vf->qvlist_info, qv_info,
+					      qvlist_info->num_vectors - 1),
+				  GFP_KERNEL);
+	if (!vf->qvlist_info) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
 	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
 
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
@@ -456,11 +475,14 @@
 		qv_info = &qvlist_info->qv_info[i];
 		if (!qv_info)
 			continue;
-		v_idx = qv_info->v_idx;
 
 		/* Validate vector id belongs to this vf */
-		if (!i40e_vc_isvalid_vector_id(vf, v_idx))
-			goto err;
+		if (!i40e_vc_isvalid_vector_id(vf, qv_info->v_idx)) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+
+		v_idx = qv_info->v_idx;
 
 		vf->qvlist_info->qv_info[i] = *qv_info;
 
@@ -502,10 +524,11 @@
 	}
 
 	return 0;
-err:
+err_free:
 	kfree(vf->qvlist_info);
 	vf->qvlist_info = NULL;
-	return -EINVAL;
+err_out:
+	return ret;
 }
 
 /**
@@ -1084,6 +1107,128 @@
 	return -EIO;
 }
 
+static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi);
+
+/**
+ * i40e_config_vf_promiscuous_mode
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI id
+ * @allmulti: set MAC L2 layer multicast promiscuous enable/disable
+ * @alluni: set MAC L2 layer unicast promiscuous enable/disable
+ *
+ * Called from the VF to configure the promiscuous mode of
+ * VF vsis and from the VF reset path to reset promiscuous mode.
+ **/
+static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
+						   u16 vsi_id,
+						   bool allmulti,
+						   bool alluni)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_mac_filter *f;
+	i40e_status aq_ret = 0;
+	struct i40e_vsi *vsi;
+	int bkt;
+
+	vsi = i40e_find_vsi_from_id(pf, vsi_id);
+	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id) || !vsi)
+		return I40E_ERR_PARAM;
+
+	if (vf->port_vlan_id) {
+		aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid,
+							    allmulti,
+							    vf->port_vlan_id,
+							    NULL);
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set multicast promiscuous mode err %s aq_err %s\n",
+				vf->vf_id,
+				i40e_stat_str(&pf->hw, aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+			return aq_ret;
+		}
+
+		aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid,
+							    alluni,
+							    vf->port_vlan_id,
+							    NULL);
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set unicast promiscuous mode err %s aq_err %s\n",
+				vf->vf_id,
+				i40e_stat_str(&pf->hw, aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+		}
+		return aq_ret;
+	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
+		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
+				continue;
+			aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw,
+								    vsi->seid,
+								    allmulti,
+								    f->vlan,
+								    NULL);
+			if (aq_ret) {
+				int aq_err = pf->hw.aq.asq_last_status;
+
+				dev_err(&pf->pdev->dev,
+					"Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n",
+					f->vlan,
+					i40e_stat_str(&pf->hw, aq_ret),
+					i40e_aq_str(&pf->hw, aq_err));
+			}
+
+			aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
+								    vsi->seid,
+								    alluni,
+								    f->vlan,
+								    NULL);
+			if (aq_ret) {
+				int aq_err = pf->hw.aq.asq_last_status;
+
+				dev_err(&pf->pdev->dev,
+					"Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n",
+					f->vlan,
+					i40e_stat_str(&pf->hw, aq_ret),
+					i40e_aq_str(&pf->hw, aq_err));
+			}
+		}
+		return aq_ret;
+	}
+	aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, allmulti,
+						       NULL);
+	if (aq_ret) {
+		int aq_err = pf->hw.aq.asq_last_status;
+
+		dev_err(&pf->pdev->dev,
+			"VF %d failed to set multicast promiscuous mode err %s aq_err %s\n",
+			vf->vf_id,
+			i40e_stat_str(&pf->hw, aq_ret),
+			i40e_aq_str(&pf->hw, aq_err));
+		return aq_ret;
+	}
+
+	aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, alluni,
+						     NULL, true);
+	if (aq_ret) {
+		int aq_err = pf->hw.aq.asq_last_status;
+
+		dev_err(&pf->pdev->dev,
+			"VF %d failed to set unicast promiscuous mode err %s aq_err %s\n",
+			vf->vf_id,
+			i40e_stat_str(&pf->hw, aq_ret),
+			i40e_aq_str(&pf->hw, aq_err));
+	}
+
+	return aq_ret;
+}
+
 /**
  * i40e_trigger_vf_reset
  * @vf: pointer to the VF structure
@@ -1145,6 +1290,9 @@
 	struct i40e_hw *hw = &pf->hw;
 	u32 reg;
 
+	/* disable promisc modes in case they were enabled */
+	i40e_config_vf_promiscuous_mode(vf, vf->lan_vsi_id, false, false);
+
 	/* free VF resources to begin resetting the VSI state */
 	i40e_free_vf_res(vf);
 
@@ -1542,13 +1690,20 @@
 int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
 
 	if (num_vfs) {
 		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
 			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
 			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 		}
-		return i40e_pci_sriov_enable(pdev, num_vfs);
+		ret = i40e_pci_sriov_enable(pdev, num_vfs);
+		goto sriov_configure_out;
 	}
 
 	if (!pci_vfs_assigned(pf->pdev)) {
@@ -1557,9 +1712,12 @@
 		i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 	} else {
 		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto sriov_configure_out;
 	}
-	return 0;
+sriov_configure_out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
 }
 
 /***********************virtual channel routines******************/
@@ -1693,7 +1851,7 @@
 	i40e_status aq_ret = 0;
 	struct i40e_vsi *vsi;
 	int num_vsis = 1;
-	int len = 0;
+	size_t len = 0;
 	int ret;
 
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
@@ -1701,9 +1859,7 @@
 		goto err;
 	}
 
-	len = (sizeof(struct virtchnl_vf_resource) +
-	       sizeof(struct virtchnl_vsi_resource) * num_vsis);
-
+	len = struct_size(vfres, vsi_res, num_vsis);
 	vfres = kzalloc(len, GFP_KERNEL);
 	if (!vfres) {
 		aq_ret = I40E_ERR_NO_MEMORY;
@@ -1840,143 +1996,81 @@
  * i40e_vc_config_promiscuous_mode_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to configure the promiscuous mode of
  * VF vsis
  **/
-static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
-					       u8 *msg, u16 msglen)
+static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_promisc_info *info =
 	    (struct virtchnl_promisc_info *)msg;
 	struct i40e_pf *pf = vf->pf;
-	struct i40e_hw *hw = &pf->hw;
-	struct i40e_mac_filter *f;
 	i40e_status aq_ret = 0;
 	bool allmulti = false;
-	struct i40e_vsi *vsi;
 	bool alluni = false;
-	int aq_err = 0;
-	int bkt;
 
-	vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
-	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) ||
-	    !vsi) {
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
-		goto error_param;
+		goto err_out;
 	}
 	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
 		dev_err(&pf->pdev->dev,
 			"Unprivileged VF %d is attempting to configure promiscuous mode\n",
 			vf->vf_id);
-		/* Lie to the VF on purpose. */
+
+		/* Lie to the VF on purpose, because this is an error we can
+		 * ignore. Unprivileged VF is not a virtual channel error.
+		 */
 		aq_ret = 0;
-		goto error_param;
+		goto err_out;
 	}
+
+	if (info->flags > I40E_MAX_VF_PROMISC_FLAGS) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err_out;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err_out;
+	}
+
 	/* Multicast promiscuous handling*/
 	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
 		allmulti = true;
 
-	if (vf->port_vlan_id) {
-		aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid,
-							    allmulti,
-							    vf->port_vlan_id,
-							    NULL);
-	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
-			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
-				continue;
-			aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw,
-								    vsi->seid,
-								    allmulti,
-								    f->vlan,
-								    NULL);
-			aq_err = pf->hw.aq.asq_last_status;
-			if (aq_ret) {
-				dev_err(&pf->pdev->dev,
-					"Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n",
-					f->vlan,
-					i40e_stat_str(&pf->hw, aq_ret),
-					i40e_aq_str(&pf->hw, aq_err));
-				break;
-			}
-		}
-	} else {
-		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
-							       allmulti, NULL);
-		aq_err = pf->hw.aq.asq_last_status;
-		if (aq_ret) {
-			dev_err(&pf->pdev->dev,
-				"VF %d failed to set multicast promiscuous mode err %s aq_err %s\n",
-				vf->vf_id,
-				i40e_stat_str(&pf->hw, aq_ret),
-				i40e_aq_str(&pf->hw, aq_err));
-			goto error_param;
-		}
-	}
-
-	if (!aq_ret) {
-		dev_info(&pf->pdev->dev,
-			 "VF %d successfully set multicast promiscuous mode\n",
-			 vf->vf_id);
-		if (allmulti)
-			set_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
-		else
-			clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
-	}
-
 	if (info->flags & FLAG_VF_UNICAST_PROMISC)
 		alluni = true;
-	if (vf->port_vlan_id) {
-		aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid,
-							    alluni,
-							    vf->port_vlan_id,
-							    NULL);
-	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
-			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
-				continue;
-			aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
-								    vsi->seid,
-								    alluni,
-								    f->vlan,
-								    NULL);
-			aq_err = pf->hw.aq.asq_last_status;
-			if (aq_ret)
-				dev_err(&pf->pdev->dev,
-					"Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n",
-					f->vlan,
-					i40e_stat_str(&pf->hw, aq_ret),
-					i40e_aq_str(&pf->hw, aq_err));
-		}
-	} else {
-		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid,
-							     alluni, NULL,
-							     true);
-		aq_err = pf->hw.aq.asq_last_status;
-		if (aq_ret) {
-			dev_err(&pf->pdev->dev,
-				"VF %d failed to set unicast promiscuous mode %8.8x err %s aq_err %s\n",
-				vf->vf_id, info->flags,
-				i40e_stat_str(&pf->hw, aq_ret),
-				i40e_aq_str(&pf->hw, aq_err));
-			goto error_param;
-		}
-	}
+	aq_ret = i40e_config_vf_promiscuous_mode(vf, info->vsi_id, allmulti,
+						 alluni);
+	if (aq_ret)
+		goto err_out;
 
-	if (!aq_ret) {
+	if (allmulti) {
+		if (!test_and_set_bit(I40E_VF_STATE_MC_PROMISC,
+				      &vf->vf_states))
+			dev_info(&pf->pdev->dev,
+				 "VF %d successfully set multicast promiscuous mode\n",
+				 vf->vf_id);
+	} else if (test_and_clear_bit(I40E_VF_STATE_MC_PROMISC,
+				      &vf->vf_states))
 		dev_info(&pf->pdev->dev,
-			 "VF %d successfully set unicast promiscuous mode\n",
+			 "VF %d successfully unset multicast promiscuous mode\n",
 			 vf->vf_id);
-		if (alluni)
-			set_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states);
-		else
-			clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states);
-	}
 
-error_param:
+	if (alluni) {
+		if (!test_and_set_bit(I40E_VF_STATE_UC_PROMISC,
+				      &vf->vf_states))
+			dev_info(&pf->pdev->dev,
+				 "VF %d successfully set unicast promiscuous mode\n",
+				 vf->vf_id);
+	} else if (test_and_clear_bit(I40E_VF_STATE_UC_PROMISC,
+				      &vf->vf_states))
+		dev_info(&pf->pdev->dev,
+			 "VF %d successfully unset unicast promiscuous mode\n",
+			 vf->vf_id);
+
+err_out:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf,
 				       VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
@@ -1987,37 +2081,57 @@
  * i40e_vc_config_queues_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to configure the rx/tx
  * queues
  **/
-static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_vsi_queue_config_info *qci =
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id, vsi_queue_id = 0;
+	u16 num_qps_all = 0;
 	i40e_status aq_ret = 0;
 	int i, j = 0, idx = 0;
 
-	vsi_id = qci->vsi_id;
-
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
-	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	if (!i40e_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
+	if (qci->num_queue_pairs > I40E_MAX_VF_QUEUES) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vf->adq_enabled) {
+		for (i = 0; i < I40E_MAX_VF_VSI; i++)
+			num_qps_all += vf->ch[i].num_qps;
+		if (num_qps_all != qci->num_queue_pairs) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
+		}
+	}
+
+	vsi_id = qci->vsi_id;
+
 	for (i = 0; i < qci->num_queue_pairs; i++) {
 		qpi = &qci->qpair[i];
 
 		if (!vf->adq_enabled) {
+			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
+						      qpi->txq.queue_id)) {
+				aq_ret = I40E_ERR_PARAM;
+				goto error_param;
+			}
+
 			vsi_queue_id = qpi->txq.queue_id;
 
 			if (qpi->txq.vsi_id != qci->vsi_id ||
@@ -2028,9 +2142,12 @@
 			}
 		}
 
-		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
-			aq_ret = I40E_ERR_PARAM;
-			goto error_param;
+		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = I40E_ERR_NO_AVAILABLE_VSI;
+				goto error_param;
+			}
+			vsi_id = vf->ch[idx].vsi_id;
 		}
 
 		if (i40e_config_vsi_rx_queue(vf, vsi_id, vsi_queue_id,
@@ -2045,8 +2162,12 @@
 		 * VF does not know about these additional VSIs and all
 		 * it cares is about its own queues. PF configures these queues
 		 * to its appropriate VSIs based on TC mapping
-		 **/
+		 */
 		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = I40E_ERR_NO_AVAILABLE_VSI;
+				goto error_param;
+			}
 			if (j == (vf->ch[idx].num_qps - 1)) {
 				idx++;
 				j = 0; /* resetting the queue count */
@@ -2055,7 +2176,6 @@
 				j++;
 				vsi_queue_id++;
 			}
-			vsi_id = vf->ch[idx].vsi_id;
 		}
 	}
 	/* set vsi num_queue_pairs in use to num configured by VF */
@@ -2105,17 +2225,16 @@
  * i40e_vc_config_irq_map_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to configure the irq to
  * queue map
  **/
-static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_irq_map_info *irqmap_info =
 	    (struct virtchnl_irq_map_info *)msg;
 	struct virtchnl_vector_map *map;
-	u16 vsi_id, vector_id;
+	u16 vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
@@ -2124,16 +2243,21 @@
 		goto error_param;
 	}
 
+	if (irqmap_info->num_vectors >
+	    vf->pf->hw.func_caps.num_msix_vectors_vf) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
 	for (i = 0; i < irqmap_info->num_vectors; i++) {
 		map = &irqmap_info->vecmap[i];
-		vector_id = map->vector_id;
-		vsi_id = map->vsi_id;
 		/* validate msg params */
-		if (!i40e_vc_isvalid_vector_id(vf, vector_id) ||
-		    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+		if (!i40e_vc_isvalid_vector_id(vf, map->vector_id) ||
+		    !i40e_vc_isvalid_vsi_id(vf, map->vsi_id)) {
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
+		vsi_id = map->vsi_id;
 
 		if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
 			aq_ret = I40E_ERR_PARAM;
@@ -2202,16 +2326,14 @@
  * i40e_vc_enable_queues_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to enable all or specific queue(s)
  **/
-static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
-	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
@@ -2220,7 +2342,7 @@
 		goto error_param;
 	}
 
-	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2251,6 +2373,8 @@
 		}
 	}
 
+	vf->queues_enabled = true;
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2261,18 +2385,20 @@
  * i40e_vc_disable_queues_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to disable all or specific
  * queue(s)
  **/
-static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
 	struct i40e_pf *pf = vf->pf;
 	i40e_status aq_ret = 0;
 
+	/* Immediately mark queues as disabled */
+	vf->queues_enabled = false;
+
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
@@ -2283,7 +2409,9 @@
 		goto error_param;
 	}
 
-	if ((0 == vqs->rx_queues) && (0 == vqs->tx_queues)) {
+	if ((vqs->rx_queues == 0 && vqs->tx_queues == 0) ||
+	    vqs->rx_queues > I40E_MAX_VF_QUEUES ||
+	    vqs->tx_queues > I40E_MAX_VF_QUEUES) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2309,29 +2437,24 @@
  * i40e_vc_request_queues_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * VFs get a default number of queues but can use this message to request a
  * different number.  If the request is successful, PF will reset the VF and
  * return 0.  If unsuccessful, PF will send message informing VF of number of
  * available queues and return result of sending VF a message.
  **/
-static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen)
+static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_vf_res_request *vfres =
 		(struct virtchnl_vf_res_request *)msg;
-	int req_pairs = vfres->num_queue_pairs;
-	int cur_pairs = vf->num_queue_pairs;
+	u16 req_pairs = vfres->num_queue_pairs;
+	u8 cur_pairs = vf->num_queue_pairs;
 	struct i40e_pf *pf = vf->pf;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
 		return -EINVAL;
 
-	if (req_pairs <= 0) {
-		dev_err(&pf->pdev->dev,
-			"VF %d tried to request %d queues.  Ignoring.\n",
-			vf->vf_id, req_pairs);
-	} else if (req_pairs > I40E_MAX_VF_QUEUES) {
+	if (req_pairs > I40E_MAX_VF_QUEUES) {
 		dev_err(&pf->pdev->dev,
 			"VF %d tried to request more than %d queues.\n",
 			vf->vf_id,
@@ -2360,11 +2483,10 @@
  * i40e_vc_get_stats_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * called from the VF to get vsi stats
  **/
-static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
@@ -2399,9 +2521,11 @@
 				      (u8 *)&stats, sizeof(stats));
 }
 
-/* If the VF is not trusted restrict the number of MAC/VLAN it can program */
-#define I40E_VC_MAX_MAC_ADDR_PER_VF 12
-#define I40E_VC_MAX_VLAN_PER_VF 8
+/* If the VF is not trusted restrict the number of MAC/VLAN it can program
+ * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast
+ */
+#define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1)
+#define I40E_VC_MAX_VLAN_PER_VF 16
 
 /**
  * i40e_check_vf_permission
@@ -2458,7 +2582,7 @@
 		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
 		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
 			dev_err(&pf->pdev->dev,
-				"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+				"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
 			return -EPERM;
 		}
 	}
@@ -2470,22 +2594,20 @@
  * i40e_vc_add_mac_addr_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * add guest mac address filter
  **/
-static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_ether_addr_list *al =
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = al->vsi_id;
 	i40e_status ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
 		ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2541,22 +2663,20 @@
  * i40e_vc_del_mac_addr_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * remove guest mac address filter
  **/
-static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_ether_addr_list *al =
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = al->vsi_id;
 	i40e_status ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
 		ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2569,6 +2689,16 @@
 			ret = I40E_ERR_INVALID_MAC_ADDR;
 			goto error_param;
 		}
+
+		if (vf->pf_set_mac &&
+		    ether_addr_equal(al->list[i].addr,
+				     vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n",
+				vf->default_lan_addr.addr, vf->vf_id);
+			ret = I40E_ERR_PARAM;
+			goto error_param;
+		}
 	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
@@ -2601,17 +2731,15 @@
  * i40e_vc_add_vlan_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * program guest vlan id
  **/
-static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_vlan_filter_list *vfl =
 	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vfl->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
@@ -2622,7 +2750,7 @@
 		goto error_param;
 	}
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2674,22 +2802,20 @@
  * i40e_vc_remove_vlan_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * remove programmed guest vlan id
  **/
-static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_vlan_filter_list *vfl =
 	    (struct virtchnl_vlan_filter_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vfl->vsi_id;
 	i40e_status aq_ret = 0;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2703,7 +2829,8 @@
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (vsi->info.pvid) {
-		aq_ret = I40E_ERR_PARAM;
+		if (vfl->num_elements > 1 || vfl->vlan_id[0])
+			aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
@@ -2761,13 +2888,11 @@
  * i40e_vc_iwarp_qvmap_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  * @config: config qvmap or release it
  *
  * called from the VF for the iwarp msgs
  **/
-static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
-				   bool config)
+static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, bool config)
 {
 	struct virtchnl_iwarp_qvlist_info *qvlist_info =
 				(struct virtchnl_iwarp_qvlist_info *)msg;
@@ -2798,21 +2923,19 @@
  * i40e_vc_config_rss_key
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Configure the VF's RSS key
  **/
-static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_rss_key *vrk =
 		(struct virtchnl_rss_key *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vrk->vsi_id;
 	i40e_status aq_ret = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) ||
 	    (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
@@ -2830,26 +2953,31 @@
  * i40e_vc_config_rss_lut
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Configure the VF's RSS LUT
  **/
-static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_rss_lut *vrl =
 		(struct virtchnl_rss_lut *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	u16 vsi_id = vrl->vsi_id;
 	i40e_status aq_ret = 0;
+	u16 i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, vsi_id) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) ||
 	    (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	for (i = 0; i < vrl->lut_entries; i++)
+		if (vrl->lut[i] >= vf->num_queue_pairs) {
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+		}
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE);
 	/* send the response to the VF */
@@ -2862,11 +2990,10 @@
  * i40e_vc_get_rss_hena
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Return the RSS HENA bits allowed by the hardware
  **/
-static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_rss_hena *vrh = NULL;
 	struct i40e_pf *pf = vf->pf;
@@ -2898,11 +3025,10 @@
  * i40e_vc_set_rss_hena
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Set the RSS HENA bits for the VF
  **/
-static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen)
+static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg)
 {
 	struct virtchnl_rss_hena *vrh =
 		(struct virtchnl_rss_hena *)msg;
@@ -2927,21 +3053,20 @@
  * i40e_vc_enable_vlan_stripping
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Enable vlan header stripping for the VF
  **/
-static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg,
-					 u16 msglen)
+static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_status aq_ret = 0;
+	struct i40e_vsi *vsi;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_vlan_stripping_enable(vsi);
 
 	/* send the response to the VF */
@@ -2954,21 +3079,20 @@
  * i40e_vc_disable_vlan_stripping
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
- * @msglen: msg length
  *
  * Disable vlan header stripping for the VF
  **/
-static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg,
-					  u16 msglen)
+static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
 {
-	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_status aq_ret = 0;
+	struct i40e_vsi *vsi;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
 
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	i40e_vlan_stripping_disable(vsi);
 
 	/* send the response to the VF */
@@ -3075,7 +3199,7 @@
 	}
 
 	if (mask.dst_port & data.dst_port) {
-		if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+		if (!data.dst_port) {
 			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
 				 vf->vf_id);
 			goto err;
@@ -3083,7 +3207,7 @@
 	}
 
 	if (mask.src_port & data.src_port) {
-		if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+		if (!data.src_port) {
 			dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
 				 vf->vf_id);
 			goto err;
@@ -3323,7 +3447,7 @@
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
-		goto err;
+		goto err_out;
 	}
 
 	if (!vf->adq_enabled) {
@@ -3331,15 +3455,15 @@
 			 "VF %d: ADq is not enabled, can't apply cloud filter\n",
 			 vf->vf_id);
 		aq_ret = I40E_ERR_PARAM;
-		goto err;
+		goto err_out;
 	}
 
 	if (i40e_validate_cloud_filter(vf, vcf)) {
 		dev_info(&pf->pdev->dev,
 			 "VF %d: Invalid input/s, can't apply cloud filter\n",
 			 vf->vf_id);
-			aq_ret = I40E_ERR_PARAM;
-			goto err;
+		aq_ret = I40E_ERR_PARAM;
+		goto err_out;
 	}
 
 	cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
@@ -3400,13 +3524,17 @@
 			"VF %d: Failed to add cloud filter, err %s aq_err %s\n",
 			vf->vf_id, i40e_stat_str(&pf->hw, ret),
 			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-		goto err;
+		goto err_free;
 	}
 
 	INIT_HLIST_NODE(&cfilter->cloud_node);
 	hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+	/* release the pointer passing it to the collection */
+	cfilter = NULL;
 	vf->num_cloud_filters++;
-err:
+err_free:
+	kfree(cfilter);
+err_out:
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
 				       aq_ret);
 }
@@ -3422,8 +3550,9 @@
 		(struct virtchnl_tc_info *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_link_status *ls = &pf->hw.phy.link_info;
-	int i, adq_request_qps = 0, speed = 0;
+	int i, adq_request_qps = 0;
 	i40e_status aq_ret = 0;
+	u64 speed = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -3449,8 +3578,8 @@
 	/* max number of traffic classes for VF currently capped at 4 */
 	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
 		dev_err(&pf->pdev->dev,
-			"VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
-			vf->vf_id, tci->num_tc);
+			"VF %d trying to set %u TCs, valid range 1-%u TCs per VF\n",
+			vf->vf_id, tci->num_tc, I40E_MAX_VF_VSI);
 		aq_ret = I40E_ERR_PARAM;
 		goto err;
 	}
@@ -3460,8 +3589,9 @@
 		if (!tci->list[i].count ||
 		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
 			dev_err(&pf->pdev->dev,
-				"VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
-				vf->vf_id, i, tci->list[i].count);
+				"VF %d: TC %d trying to set %u queues, valid range 1-%u queues per TC\n",
+				vf->vf_id, i, tci->list[i].count,
+				I40E_DEFAULT_QUEUES_PER_VF);
 			aq_ret = I40E_ERR_PARAM;
 			goto err;
 		}
@@ -3610,7 +3740,7 @@
 	int ret;
 
 	pf->vf_aq_requests++;
-	if (local_vf_id >= pf->num_alloc_vfs)
+	if (local_vf_id < 0 || local_vf_id >= pf->num_alloc_vfs)
 		return -EINVAL;
 	vf = &(pf->vf[local_vf_id]);
 
@@ -3621,25 +3751,12 @@
 	/* perform basic checks on the msg */
 	ret = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
 
-	/* perform additional checks specific to this driver */
-	if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) {
-		struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg;
-
-		if (vrk->key_len != I40E_HKEY_ARRAY_SIZE)
-			ret = -EINVAL;
-	} else if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) {
-		struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
-
-		if (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)
-			ret = -EINVAL;
-	}
-
 	if (ret) {
 		i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM);
 		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
 			local_vf_id, v_opcode, msglen);
 		switch (ret) {
-		case VIRTCHNL_ERR_PARAM:
+		case VIRTCHNL_STATUS_ERR_PARAM:
 			return -EPERM;
 		default:
 			return -EINVAL;
@@ -3659,65 +3776,65 @@
 		ret = 0;
 		break;
 	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
-		ret = i40e_vc_config_promiscuous_mode_msg(vf, msg, msglen);
+		ret = i40e_vc_config_promiscuous_mode_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
-		ret = i40e_vc_config_queues_msg(vf, msg, msglen);
+		ret = i40e_vc_config_queues_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
-		ret = i40e_vc_config_irq_map_msg(vf, msg, msglen);
+		ret = i40e_vc_config_irq_map_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_ENABLE_QUEUES:
-		ret = i40e_vc_enable_queues_msg(vf, msg, msglen);
+		ret = i40e_vc_enable_queues_msg(vf, msg);
 		i40e_vc_notify_vf_link_state(vf);
 		break;
 	case VIRTCHNL_OP_DISABLE_QUEUES:
-		ret = i40e_vc_disable_queues_msg(vf, msg, msglen);
+		ret = i40e_vc_disable_queues_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_ADD_ETH_ADDR:
-		ret = i40e_vc_add_mac_addr_msg(vf, msg, msglen);
+		ret = i40e_vc_add_mac_addr_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_DEL_ETH_ADDR:
-		ret = i40e_vc_del_mac_addr_msg(vf, msg, msglen);
+		ret = i40e_vc_del_mac_addr_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_ADD_VLAN:
-		ret = i40e_vc_add_vlan_msg(vf, msg, msglen);
+		ret = i40e_vc_add_vlan_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_DEL_VLAN:
-		ret = i40e_vc_remove_vlan_msg(vf, msg, msglen);
+		ret = i40e_vc_remove_vlan_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_GET_STATS:
-		ret = i40e_vc_get_stats_msg(vf, msg, msglen);
+		ret = i40e_vc_get_stats_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_IWARP:
 		ret = i40e_vc_iwarp_msg(vf, msg, msglen);
 		break;
 	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
-		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, true);
 		break;
 	case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
-		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, false);
 		break;
 	case VIRTCHNL_OP_CONFIG_RSS_KEY:
-		ret = i40e_vc_config_rss_key(vf, msg, msglen);
+		ret = i40e_vc_config_rss_key(vf, msg);
 		break;
 	case VIRTCHNL_OP_CONFIG_RSS_LUT:
-		ret = i40e_vc_config_rss_lut(vf, msg, msglen);
+		ret = i40e_vc_config_rss_lut(vf, msg);
 		break;
 	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
-		ret = i40e_vc_get_rss_hena(vf, msg, msglen);
+		ret = i40e_vc_get_rss_hena(vf, msg);
 		break;
 	case VIRTCHNL_OP_SET_RSS_HENA:
-		ret = i40e_vc_set_rss_hena(vf, msg, msglen);
+		ret = i40e_vc_set_rss_hena(vf, msg);
 		break;
 	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
-		ret = i40e_vc_enable_vlan_stripping(vf, msg, msglen);
+		ret = i40e_vc_enable_vlan_stripping(vf, msg);
 		break;
 	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
-		ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen);
+		ret = i40e_vc_disable_vlan_stripping(vf, msg);
 		break;
 	case VIRTCHNL_OP_REQUEST_QUEUES:
-		ret = i40e_vc_request_queues_msg(vf, msg, msglen);
+		ret = i40e_vc_request_queues_msg(vf, msg);
 		break;
 	case VIRTCHNL_OP_ENABLE_CHANNELS:
 		ret = i40e_vc_add_qch_msg(vf, msg);
@@ -3786,6 +3903,35 @@
 }
 
 /**
+ * i40e_validate_vf
+ * @pf: the physical function
+ * @vf_id: VF identifier
+ *
+ * Check that the VF is enabled and the VSI exists.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_validate_vf(struct i40e_pf *pf, int vf_id)
+{
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev,
+			"Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto err_out;
+	}
+	vf = &pf->vf[vf_id];
+	vsi = i40e_find_vsi_from_id(pf, vf->lan_vsi_id);
+	if (!vsi)
+		ret = -EINVAL;
+err_out:
+	return ret;
+}
+
+/**
  * i40e_ndo_set_vf_mac
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -3805,24 +3951,31 @@
 	int bkt;
 	u8 i;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev,
-			"Invalid VF Identifier %d\n", vf_id);
-		ret = -EINVAL;
-		goto error_param;
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
 	}
 
-	vf = &(pf->vf[vf_id]);
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_param;
+
+	vf = &pf->vf[vf_id];
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
 	/* When the VF is resetting wait until it is done.
 	 * It can take up to 200 milliseconds,
 	 * but wait for up to 300 milliseconds to be safe.
+	 * If the VF is indeed in reset, the vsi pointer has
+	 * to show on the newly loaded vsi under pf->vsi[id].
 	 */
 	for (i = 0; i < 15; i++) {
-		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+			if (i > 0)
+				vsi = pf->vsi[vf->lan_vsi_idx];
 			break;
+		}
 		msleep(20);
 	}
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
@@ -3873,11 +4026,14 @@
 			 mac, vf_id);
 	}
 
-	/* Force the VF driver stop so it has to reload with new MAC address */
+	/* Force the VF interface down so it has to bring up with new MAC
+	 * address
+	 */
 	i40e_vc_disable_vf(vf);
-	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
+	dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
 
 error_param:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -3924,18 +4080,22 @@
 {
 	u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	bool allmulti = false, alluni = false;
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_vsi *vsi;
 	struct i40e_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
-		ret = -EINVAL;
-		goto error_pvid;
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
 	}
 
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_pvid;
+
 	if ((vlan_id > I40E_MAX_VLANID) || (qos > 7)) {
 		dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
 		ret = -EINVAL;
@@ -3948,7 +4108,7 @@
 		goto error_pvid;
 	}
 
-	vf = &(pf->vf[vf_id]);
+	vf = &pf->vf[vf_id];
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
@@ -4005,6 +4165,15 @@
 	}
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* disable promisc modes in case they were enabled */
+	ret = i40e_config_vf_promiscuous_mode(vf, vf->lan_vsi_id,
+					      allmulti, alluni);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to config VF promiscuous mode\n");
+		goto error_pvid;
+	}
+
 	if (vlan_id || qos)
 		ret = i40e_vsi_add_pvid(vsi, vlanprio);
 	else
@@ -4031,6 +4200,12 @@
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
+	if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states))
+		alluni = true;
+
+	if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states))
+		allmulti = true;
+
 	/* Schedule the worker thread to take care of applying changes */
 	i40e_service_event_schedule(vsi->back);
 
@@ -4043,9 +4218,17 @@
 	 * default LAN MAC address.
 	 */
 	vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
+
+	ret = i40e_config_vf_promiscuous_mode(vf, vsi->id, allmulti, alluni);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to config vf promiscuous mode\n");
+		goto error_pvid;
+	}
+
 	ret = 0;
 
 error_pvid:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -4067,20 +4250,24 @@
 	struct i40e_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d.\n", vf_id);
-		ret = -EINVAL;
-		goto error;
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
 	}
 
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error;
+
 	if (min_tx_rate) {
 		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for VF %d.\n",
 			min_tx_rate, vf_id);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto error;
 	}
 
-	vf = &(pf->vf[vf_id]);
+	vf = &pf->vf[vf_id];
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
@@ -4095,6 +4282,7 @@
 
 	vf->tx_rate = max_tx_rate;
 error:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -4115,20 +4303,21 @@
 	struct i40e_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
-		ret = -EINVAL;
-		goto error_param;
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
 	}
 
-	vf = &(pf->vf[vf_id]);
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_param;
+
+	vf = &pf->vf[vf_id];
 	/* first vsi is always the LAN vsi */
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
-		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
-			vf_id);
-		ret = -EAGAIN;
+	if (!vsi) {
+		ret = -ENOENT;
 		goto error_param;
 	}
 
@@ -4152,6 +4341,7 @@
 	ret = 0;
 
 error_param:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -4173,6 +4363,11 @@
 	int abs_vf_id;
 	int ret = 0;
 
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
 		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
@@ -4199,7 +4394,7 @@
 		vf->link_forced = true;
 		vf->link_up = true;
 		pfe.event_data.link_event.link_status = true;
-		pfe.event_data.link_event.link_speed = I40E_LINK_SPEED_40GB;
+		pfe.event_data.link_event.link_speed = VIRTCHNL_LINK_SPEED_40GB;
 		break;
 	case IFLA_VF_LINK_STATE_DISABLE:
 		vf->link_forced = true;
@@ -4216,6 +4411,7 @@
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
 
 error_out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -4237,6 +4433,11 @@
 	struct i40e_vf *vf;
 	int ret = 0;
 
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
 		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
@@ -4270,6 +4471,7 @@
 		ret = -EIO;
 	}
 out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
 
@@ -4288,15 +4490,22 @@
 	struct i40e_vf *vf;
 	int ret = 0;
 
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
 		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
 		dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	vf = &pf->vf[vf_id];
@@ -4319,5 +4528,6 @@
 	}
 
 out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index bf67d62..7164b9b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

@@ -13,9 +13,11 @@
 #define I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED	3
 #define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED	10
 
-#define I40E_VLAN_PRIORITY_SHIFT	12
+#define I40E_VLAN_PRIORITY_SHIFT	13
 #define I40E_VLAN_MASK			0xFFF
-#define I40E_PRIORITY_MASK		0x7000
+#define I40E_PRIORITY_MASK		0xE000
+
+#define I40E_MAX_VF_PROMISC_FLAGS	3
 
 /* Various queue ctrls */
 enum i40e_queue_ctrl {
@@ -97,6 +99,7 @@
 	unsigned int tx_rate;	/* Tx bandwidth limit in Mbps */
 	bool link_forced;
 	bool link_up;		/* only valid if VF link is forced */
+	bool queues_enabled;	/* true if the VF queues are enabled */
 	bool spoofchk;
 	u16 num_mac;
 	u16 num_vlan;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
new file mode 100644
index 0000000..d07e1a8
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c

@@ -0,0 +1,880 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+
+#include "i40e.h"
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
+
+/**
+ * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev
+ * @vsi: Current VSI
+ * @umem: UMEM to DMA map
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+static int i40e_xsk_umem_dma_map(struct i40e_vsi *vsi, struct xdp_umem *umem)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i, j;
+	dma_addr_t dma;
+
+	dev = &pf->pdev->dev;
+	for (i = 0; i < umem->npgs; i++) {
+		dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
+					 DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
+		if (dma_mapping_error(dev, dma))
+			goto out_unmap;
+
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+out_unmap:
+	for (j = 0; j < i; j++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	return -1;
+}
+
+/**
+ * i40e_xsk_umem_dma_unmap - DMA unmaps all UMEM memory for the netdev
+ * @vsi: Current VSI
+ * @umem: UMEM to DMA map
+ **/
+static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i;
+
+	dev = &pf->pdev->dev;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
+
+		umem->pages[i].dma = 0;
+	}
+}
+
+/**
+ * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
+ * @vsi: Current VSI
+ * @umem: UMEM
+ * @qid: Rx ring to associate UMEM to
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
+				u16 qid)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct xdp_umem_fq_reuse *reuseq;
+	bool if_running;
+	int err;
+
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+
+	if (qid >= vsi->num_queue_pairs)
+		return -EINVAL;
+
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+	if (!reuseq)
+		return -ENOMEM;
+
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	err = i40e_xsk_umem_dma_map(vsi, umem);
+	if (err)
+		return err;
+
+	set_bit(qid, vsi->af_xdp_zc_qps);
+
+	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
+
+	if (if_running) {
+		err = i40e_queue_pair_disable(vsi, qid);
+		if (err)
+			return err;
+
+		err = i40e_queue_pair_enable(vsi, qid);
+		if (err)
+			return err;
+
+		/* Kick start the NAPI context so that receiving will start */
+		err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_xsk_umem_disable - Disassociate a UMEM from a certain ring/qid
+ * @vsi: Current VSI
+ * @qid: Rx ring to associate UMEM to
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct xdp_umem *umem;
+	bool if_running;
+	int err;
+
+	umem = xdp_get_umem_from_qid(netdev, qid);
+	if (!umem)
+		return -EINVAL;
+
+	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
+
+	if (if_running) {
+		err = i40e_queue_pair_disable(vsi, qid);
+		if (err)
+			return err;
+	}
+
+	clear_bit(qid, vsi->af_xdp_zc_qps);
+	i40e_xsk_umem_dma_unmap(vsi, umem);
+
+	if (if_running) {
+		err = i40e_queue_pair_enable(vsi, qid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, or NULL to disable
+ * @qid: Rx ring to (dis)associate UMEM (from)to
+ *
+ * This function enables or disables a UMEM to a certain ring.
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+			u16 qid)
+{
+	return umem ? i40e_xsk_umem_enable(vsi, umem, qid) :
+		i40e_xsk_umem_disable(vsi, qid);
+}
+
+/**
+ * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * This function enables or disables a UMEM to a certain ring.
+ *
+ * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
+ **/
+static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	int err, result = I40E_XDP_PASS;
+	struct i40e_ring *xdp_ring;
+	struct bpf_prog *xdp_prog;
+	u64 offset;
+	u32 act;
+
+	rcu_read_lock();
+	/* NB! xdp_prog will always be !NULL, due to the fact that
+	 * this path is enabled by setting an XDP program.
+	 */
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	offset = xdp->data - xdp->data_hard_start;
+
+	xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping packet */
+	case XDP_DROP:
+		result = I40E_XDP_CONSUMED;
+		break;
+	}
+	rcu_read_unlock();
+	return result;
+}
+
+/**
+ * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer
+ * @rx_ring: Rx ring
+ * @bi: Rx buffer to populate
+ *
+ * This function allocates an Rx buffer. The buffer can come from fill
+ * queue, or via the recycle queue (next_to_alloc).
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
+				 struct i40e_rx_buffer *bi)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	void *addr = bi->addr;
+	u64 handle, hr;
+
+	if (addr) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	if (!xsk_umem_peek_addr(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	bi->dma = xdp_umem_get_dma(umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
+
+	xsk_umem_discard_addr(umem);
+	return true;
+}
+
+/**
+ * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
+ * @rx_ring: Rx ring
+ * @bi: Rx buffer to populate
+ *
+ * This function allocates an Rx buffer. The buffer can come from fill
+ * queue, or via the reuse queue.
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
+				      struct i40e_rx_buffer *bi)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	u64 handle, hr;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	handle &= rx_ring->xsk_umem->chunk_mask;
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	bi->dma = xdp_umem_get_dma(umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
+
+	xsk_umem_discard_addr_rq(umem);
+	return true;
+}
+
+static __always_inline bool
+__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
+			   bool alloc(struct i40e_ring *rx_ring,
+				      struct i40e_rx_buffer *bi))
+{
+	u16 ntu = rx_ring->next_to_use;
+	union i40e_rx_desc *rx_desc;
+	struct i40e_rx_buffer *bi;
+	bool ok = true;
+
+	rx_desc = I40E_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_bi[ntu];
+	do {
+		if (!alloc(rx_ring, bi)) {
+			ok = false;
+			goto no_buffers;
+		}
+
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 0,
+						 rx_ring->rx_buf_len,
+						 DMA_BIDIRECTIONAL);
+
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = I40E_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_bi;
+			ntu = 0;
+		}
+
+		rx_desc->wb.qword1.status_error_len = 0;
+		count--;
+	} while (count);
+
+no_buffers:
+	if (rx_ring->next_to_use != ntu)
+		i40e_release_rx_desc(rx_ring, ntu);
+
+	return ok;
+}
+
+/**
+ * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the reuse queue
+ * or fill ring and places them on the Rx ring.
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
+{
+	return __i40e_alloc_rx_buffers_zc(rx_ring, count,
+					  i40e_alloc_buffer_slow_zc);
+}
+
+/**
+ * i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns true for a successful allocation, false otherwise
+ **/
+static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
+{
+	return __i40e_alloc_rx_buffers_zc(rx_ring, count,
+					  i40e_alloc_buffer_zc);
+}
+
+/**
+ * i40e_get_rx_buffer_zc - Return the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: The size of the rx buffer (read from descriptor)
+ *
+ * This function returns the current, received Rx buffer, and also
+ * does DMA synchronization.  the Rx ring.
+ *
+ * Returns the received Rx buffer
+ **/
+static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
+						    const unsigned int size)
+{
+	struct i40e_rx_buffer *bi;
+
+	bi = &rx_ring->rx_bi[rx_ring->next_to_clean];
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      bi->dma, 0,
+				      size,
+				      DMA_BIDIRECTIONAL);
+
+	return bi;
+}
+
+/**
+ * i40e_reuse_rx_buffer_zc - Recycle an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_bi: The Rx buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the
+ * recycle queue (next_to_alloc).
+ **/
+static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
+				    struct i40e_rx_buffer *old_bi)
+{
+	struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
+	u16 nta = rx_ring->next_to_alloc;
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	new_bi->dma = old_bi->dma;
+	new_bi->addr = old_bi->addr;
+	new_bi->handle = old_bi->handle;
+
+	old_bi->addr = NULL;
+}
+
+/**
+ * i40e_zca_free - Free callback for MEM_TYPE_ZERO_COPY allocations
+ * @alloc: Zero-copy allocator
+ * @handle: Buffer handle
+ **/
+void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
+{
+	struct i40e_rx_buffer *bi;
+	struct i40e_ring *rx_ring;
+	u64 hr, mask;
+	u16 nta;
+
+	rx_ring = container_of(alloc, struct i40e_ring, zca);
+	hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+	mask = rx_ring->xsk_umem->chunk_mask;
+
+	nta = rx_ring->next_to_alloc;
+	bi = &rx_ring->rx_bi[nta];
+
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	handle &= mask;
+
+	bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+					    rx_ring->xsk_umem->headroom);
+}
+
+/**
+ * i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer
+ * @rx_ring: Rx ring
+ * @bi: Rx buffer
+ * @xdp: xdp_buff
+ *
+ * This functions allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb, or NULL on failure.
+ **/
+static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
+					     struct i40e_rx_buffer *bi,
+					     struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	struct sk_buff *skb;
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+			       xdp->data_end - xdp->data_hard_start,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	i40e_reuse_rx_buffer_zc(rx_ring, bi);
+	return skb;
+}
+
+/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(I40E_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
+ * @rx_ring: Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns amount of work completed
+ **/
+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	bool failure = false;
+	struct sk_buff *skb;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		struct i40e_rx_buffer *bi;
+		union i40e_rx_desc *rx_desc;
+		unsigned int size;
+		u64 qword;
+
+		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
+			failure = failure ||
+				  !i40e_alloc_rx_buffers_fast_zc(rx_ring,
+								 cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		bi = i40e_clean_programming_status(rx_ring, rx_desc,
+						   qword);
+		if (unlikely(bi)) {
+			i40e_reuse_rx_buffer_zc(rx_ring, bi);
+			cleaned_count++;
+			continue;
+		}
+
+		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+		if (!size)
+			break;
+
+		bi = i40e_get_rx_buffer_zc(rx_ring, size);
+		xdp.data = bi->addr;
+		xdp.data_meta = xdp.data;
+		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+		xdp.data_end = xdp.data + size;
+		xdp.handle = bi->handle;
+
+		xdp_res = i40e_run_xdp_zc(rx_ring, &xdp);
+		if (xdp_res) {
+			if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				bi->addr = NULL;
+			} else {
+				i40e_reuse_rx_buffer_zc(rx_ring, bi);
+			}
+
+			total_rx_bytes += size;
+			total_rx_packets++;
+
+			cleaned_count++;
+			i40e_inc_ntc(rx_ring);
+			continue;
+		}
+
+		/* XDP_PASS path */
+
+		/* NB! We are not checking for errors using
+		 * i40e_test_staterr with
+		 * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that
+		 * SBP is *not* set in PRT_SBPVSI (default not set).
+		 */
+		skb = i40e_construct_skb_zc(rx_ring, bi, &xdp);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buff_failed++;
+			break;
+		}
+
+		cleaned_count++;
+		i40e_inc_ntc(rx_ring);
+
+		if (eth_skb_pad(skb))
+			continue;
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		i40e_process_skb_fields(rx_ring, rx_desc, skb);
+		napi_gro_receive(&rx_ring->q_vector->napi, skb);
+	}
+
+	i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
+	i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
+
+	if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+		return (int)total_rx_packets;
+	}
+	return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if the work is finished.
+ **/
+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
+{
+	struct i40e_tx_desc *tx_desc = NULL;
+	struct i40e_tx_buffer *tx_bi;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+
+	while (budget-- > 0) {
+		if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
+			xdp_ring->tx_stats.tx_busy++;
+			work_done = false;
+			break;
+		}
+
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+			break;
+
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+					   DMA_BIDIRECTIONAL);
+
+		tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
+		tx_bi->bytecount = desc.len;
+
+		tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz =
+			build_ctob(I40E_TX_DESC_CMD_ICRC
+				   | I40E_TX_DESC_CMD_EOP,
+				   0, desc.len, 0);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		/* Request an interrupt for the last frame and bump tail ptr. */
+		tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS <<
+						 I40E_TXD_QW1_CMD_SHIFT);
+		i40e_xdp_ring_update_tail(xdp_ring);
+
+		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+	}
+
+	return !!budget && work_done;
+}
+
+/**
+ * i40e_clean_xdp_tx_buffer - Frees and unmaps an XDP Tx entry
+ * @tx_ring: XDP Tx ring
+ * @tx_bi: Tx buffer info to clean
+ **/
+static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
+				     struct i40e_tx_buffer *tx_bi)
+{
+	xdp_return_frame(tx_bi->xdpf);
+	dma_unmap_single(tx_ring->dev,
+			 dma_unmap_addr(tx_bi, dma),
+			 dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_bi, len, 0);
+}
+
+/**
+ * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries
+ * @tx_ring: XDP Tx ring
+ * @tx_bi: Tx buffer info to clean
+ *
+ * Returns true if cleanup/tranmission is done.
+ **/
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
+			   struct i40e_ring *tx_ring, int napi_budget)
+{
+	unsigned int ntc, total_bytes = 0, budget = vsi->work_limit;
+	u32 i, completed_frames, frames_ready, xsk_frames = 0;
+	struct xdp_umem *umem = tx_ring->xsk_umem;
+	u32 head_idx = i40e_get_head(tx_ring);
+	bool work_done = true, xmit_done;
+	struct i40e_tx_buffer *tx_bi;
+
+	if (head_idx < tx_ring->next_to_clean)
+		head_idx += tx_ring->count;
+	frames_ready = head_idx - tx_ring->next_to_clean;
+
+	if (frames_ready == 0) {
+		goto out_xmit;
+	} else if (frames_ready > budget) {
+		completed_frames = budget;
+		work_done = false;
+	} else {
+		completed_frames = frames_ready;
+	}
+
+	ntc = tx_ring->next_to_clean;
+
+	for (i = 0; i < completed_frames; i++) {
+		tx_bi = &tx_ring->tx_bi[ntc];
+
+		if (tx_bi->xdpf)
+			i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+		total_bytes += tx_bi->bytecount;
+
+		if (++ntc >= tx_ring->count)
+			ntc = 0;
+	}
+
+	tx_ring->next_to_clean += completed_frames;
+	if (unlikely(tx_ring->next_to_clean >= tx_ring->count))
+		tx_ring->next_to_clean -= tx_ring->count;
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(umem, xsk_frames);
+
+	i40e_arm_wb(tx_ring, vsi, budget);
+	i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);
+
+out_xmit:
+	if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
+		xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+
+	xmit_done = i40e_xmit_zc(tx_ring, budget);
+
+	return work_done && xmit_done;
+}
+
+/**
+ * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
+ * @dev: the netdevice
+ * @queue_id: queue id to wake up
+ * @flags: ignored in our case since we have Rx and Tx in the same NAPI.
+ *
+ * Returns <0 for errors, 0 otherwise.
+ **/
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_ring *ring;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		return -ENXIO;
+
+	if (queue_id >= vsi->num_queue_pairs)
+		return -ENXIO;
+
+	if (!vsi->xdp_rings[queue_id]->xsk_umem)
+		return -ENXIO;
+
+	ring = vsi->xdp_rings[queue_id];
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi))
+		i40e_force_wb(vsi, ring->q_vector);
+
+	return 0;
+}
+
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
+{
+	u16 i;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+
+		if (!rx_bi->addr)
+			continue;
+
+		xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle);
+		rx_bi->addr = NULL;
+	}
+}
+
+/**
+ * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
+ * @xdp_ring: XDP Tx ring
+ **/
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct i40e_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		tx_bi = &tx_ring->tx_bi[ntc];
+
+		if (tx_bi->xdpf)
+			i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		ntc++;
+		if (ntc >= tx_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(umem, xsk_frames);
+}
+
+/**
+ * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: vsi
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ **/
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	int i;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		if (xdp_get_umem_from_qid(netdev, i))
+			return true;
+	}
+
+	return false;
+}

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
new file mode 100644
index 0000000..9ed59c1
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h

@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef _I40E_XSK_H_
+#define _I40E_XSK_H_
+
+struct i40e_vsi;
+struct xdp_umem;
+struct zero_copy_allocator;
+
+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
+int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
+			u16 qid);
+void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
+
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
+			   struct i40e_ring *tx_ring, int napi_budget);
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+
+#endif /* _I40E_XSK_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
deleted file mode 100644
index 3c5c6e9..0000000
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ /dev/null

@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright(c) 2013 - 2018 Intel Corporation.
-
-#
-## Makefile for the Intel(R) 40GbE VF driver
-#
-#
-
-ccflags-y += -I$(src)
-subdir-ccflags-y += -I$(src)
-
-obj-$(CONFIG_I40EVF) += i40evf.o
-
-i40evf-objs :=	i40evf_main.o i40evf_ethtool.o i40evf_virtchnl.o \
-		i40e_txrx.o i40e_common.o i40e_adminq.o i40evf_client.o
-

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
deleted file mode 100644
index 1f264b9..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ /dev/null

@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_ADMINQ_H_
-#define _I40E_ADMINQ_H_
-
-#include "i40e_osdep.h"
-#include "i40e_status.h"
-#include "i40e_adminq_cmd.h"
-
-#define I40E_ADMINQ_DESC(R, i)   \
-	(&(((struct i40e_aq_desc *)((R).desc_buf.va))[i]))
-
-#define I40E_ADMINQ_DESC_ALIGNMENT 4096
-
-struct i40e_adminq_ring {
-	struct i40e_virt_mem dma_head;	/* space for dma structures */
-	struct i40e_dma_mem desc_buf;	/* descriptor ring memory */
-	struct i40e_virt_mem cmd_buf;	/* command buffer memory */
-
-	union {
-		struct i40e_dma_mem *asq_bi;
-		struct i40e_dma_mem *arq_bi;
-	} r;
-
-	u16 count;		/* Number of descriptors */
-	u16 rx_buf_len;		/* Admin Receive Queue buffer length */
-
-	/* used for interrupt processing */
-	u16 next_to_use;
-	u16 next_to_clean;
-
-	/* used for queue tracking */
-	u32 head;
-	u32 tail;
-	u32 len;
-	u32 bah;
-	u32 bal;
-};
-
-/* ASQ transaction details */
-struct i40e_asq_cmd_details {
-	void *callback; /* cast from type I40E_ADMINQ_CALLBACK */
-	u64 cookie;
-	u16 flags_ena;
-	u16 flags_dis;
-	bool async;
-	bool postpone;
-	struct i40e_aq_desc *wb_desc;
-};
-
-#define I40E_ADMINQ_DETAILS(R, i)   \
-	(&(((struct i40e_asq_cmd_details *)((R).cmd_buf.va))[i]))
-
-/* ARQ event information */
-struct i40e_arq_event_info {
-	struct i40e_aq_desc desc;
-	u16 msg_len;
-	u16 buf_len;
-	u8 *msg_buf;
-};
-
-/* Admin Queue information */
-struct i40e_adminq_info {
-	struct i40e_adminq_ring arq;    /* receive queue */
-	struct i40e_adminq_ring asq;    /* send queue */
-	u32 asq_cmd_timeout;            /* send queue cmd write back timeout*/
-	u16 num_arq_entries;            /* receive queue depth */
-	u16 num_asq_entries;            /* send queue depth */
-	u16 arq_buf_size;               /* receive queue buffer size */
-	u16 asq_buf_size;               /* send queue buffer size */
-	u16 fw_maj_ver;                 /* firmware major version */
-	u16 fw_min_ver;                 /* firmware minor version */
-	u32 fw_build;                   /* firmware build number */
-	u16 api_maj_ver;                /* api major version */
-	u16 api_min_ver;                /* api minor version */
-
-	struct mutex asq_mutex; /* Send queue lock */
-	struct mutex arq_mutex; /* Receive queue lock */
-
-	/* last status values on send and receive queues */
-	enum i40e_admin_queue_err asq_last_status;
-	enum i40e_admin_queue_err arq_last_status;
-};
-
-/**
- * i40e_aq_rc_to_posix - convert errors to user-land codes
- * aq_ret: AdminQ handler error code can override aq_rc
- * aq_rc: AdminQ firmware error code to convert
- **/
-static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
-{
-	int aq_to_posix[] = {
-		0,           /* I40E_AQ_RC_OK */
-		-EPERM,      /* I40E_AQ_RC_EPERM */
-		-ENOENT,     /* I40E_AQ_RC_ENOENT */
-		-ESRCH,      /* I40E_AQ_RC_ESRCH */
-		-EINTR,      /* I40E_AQ_RC_EINTR */
-		-EIO,        /* I40E_AQ_RC_EIO */
-		-ENXIO,      /* I40E_AQ_RC_ENXIO */
-		-E2BIG,      /* I40E_AQ_RC_E2BIG */
-		-EAGAIN,     /* I40E_AQ_RC_EAGAIN */
-		-ENOMEM,     /* I40E_AQ_RC_ENOMEM */
-		-EACCES,     /* I40E_AQ_RC_EACCES */
-		-EFAULT,     /* I40E_AQ_RC_EFAULT */
-		-EBUSY,      /* I40E_AQ_RC_EBUSY */
-		-EEXIST,     /* I40E_AQ_RC_EEXIST */
-		-EINVAL,     /* I40E_AQ_RC_EINVAL */
-		-ENOTTY,     /* I40E_AQ_RC_ENOTTY */
-		-ENOSPC,     /* I40E_AQ_RC_ENOSPC */
-		-ENOSYS,     /* I40E_AQ_RC_ENOSYS */
-		-ERANGE,     /* I40E_AQ_RC_ERANGE */
-		-EPIPE,      /* I40E_AQ_RC_EFLUSHED */
-		-ESPIPE,     /* I40E_AQ_RC_BAD_ADDR */
-		-EROFS,      /* I40E_AQ_RC_EMODE */
-		-EFBIG,      /* I40E_AQ_RC_EFBIG */
-	};
-
-	/* aq_rc is invalid if AQ timed out */
-	if (aq_ret == I40E_ERR_ADMIN_QUEUE_TIMEOUT)
-		return -EAGAIN;
-
-	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
-		return -ERANGE;
-
-	return aq_to_posix[aq_rc];
-}
-
-/* general information */
-#define I40E_AQ_LARGE_BUF	512
-#define I40E_ASQ_CMD_TIMEOUT	250000  /* usecs */
-
-void i40evf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
-				       u16 opcode);
-
-#endif /* _I40E_ADMINQ_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
deleted file mode 100644
index 5fd8529..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ /dev/null

@@ -1,2717 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_ADMINQ_CMD_H_
-#define _I40E_ADMINQ_CMD_H_
-
-/* This header file defines the i40e Admin Queue commands and is shared between
- * i40e Firmware and Software.
- *
- * This file needs to comply with the Linux Kernel coding style.
- */
-
-#define I40E_FW_API_VERSION_MAJOR	0x0001
-#define I40E_FW_API_VERSION_MINOR_X722	0x0005
-#define I40E_FW_API_VERSION_MINOR_X710	0x0007
-
-#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
-					I40E_FW_API_VERSION_MINOR_X710 : \
-					I40E_FW_API_VERSION_MINOR_X722)
-
-/* API version 1.7 implements additional link and PHY-specific APIs  */
-#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
-
-struct i40e_aq_desc {
-	__le16 flags;
-	__le16 opcode;
-	__le16 datalen;
-	__le16 retval;
-	__le32 cookie_high;
-	__le32 cookie_low;
-	union {
-		struct {
-			__le32 param0;
-			__le32 param1;
-			__le32 param2;
-			__le32 param3;
-		} internal;
-		struct {
-			__le32 param0;
-			__le32 param1;
-			__le32 addr_high;
-			__le32 addr_low;
-		} external;
-		u8 raw[16];
-	} params;
-};
-
-/* Flags sub-structure
- * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
- * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
- */
-
-/* command flags and offsets*/
-#define I40E_AQ_FLAG_DD_SHIFT	0
-#define I40E_AQ_FLAG_CMP_SHIFT	1
-#define I40E_AQ_FLAG_ERR_SHIFT	2
-#define I40E_AQ_FLAG_VFE_SHIFT	3
-#define I40E_AQ_FLAG_LB_SHIFT	9
-#define I40E_AQ_FLAG_RD_SHIFT	10
-#define I40E_AQ_FLAG_VFC_SHIFT	11
-#define I40E_AQ_FLAG_BUF_SHIFT	12
-#define I40E_AQ_FLAG_SI_SHIFT	13
-#define I40E_AQ_FLAG_EI_SHIFT	14
-#define I40E_AQ_FLAG_FE_SHIFT	15
-
-#define I40E_AQ_FLAG_DD		BIT(I40E_AQ_FLAG_DD_SHIFT)  /* 0x1    */
-#define I40E_AQ_FLAG_CMP	BIT(I40E_AQ_FLAG_CMP_SHIFT) /* 0x2    */
-#define I40E_AQ_FLAG_ERR	BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4    */
-#define I40E_AQ_FLAG_VFE	BIT(I40E_AQ_FLAG_VFE_SHIFT) /* 0x8    */
-#define I40E_AQ_FLAG_LB		BIT(I40E_AQ_FLAG_LB_SHIFT)  /* 0x200  */
-#define I40E_AQ_FLAG_RD		BIT(I40E_AQ_FLAG_RD_SHIFT)  /* 0x400  */
-#define I40E_AQ_FLAG_VFC	BIT(I40E_AQ_FLAG_VFC_SHIFT) /* 0x800  */
-#define I40E_AQ_FLAG_BUF	BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
-#define I40E_AQ_FLAG_SI		BIT(I40E_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
-#define I40E_AQ_FLAG_EI		BIT(I40E_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
-#define I40E_AQ_FLAG_FE		BIT(I40E_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
-
-/* error codes */
-enum i40e_admin_queue_err {
-	I40E_AQ_RC_OK		= 0,  /* success */
-	I40E_AQ_RC_EPERM	= 1,  /* Operation not permitted */
-	I40E_AQ_RC_ENOENT	= 2,  /* No such element */
-	I40E_AQ_RC_ESRCH	= 3,  /* Bad opcode */
-	I40E_AQ_RC_EINTR	= 4,  /* operation interrupted */
-	I40E_AQ_RC_EIO		= 5,  /* I/O error */
-	I40E_AQ_RC_ENXIO	= 6,  /* No such resource */
-	I40E_AQ_RC_E2BIG	= 7,  /* Arg too long */
-	I40E_AQ_RC_EAGAIN	= 8,  /* Try again */
-	I40E_AQ_RC_ENOMEM	= 9,  /* Out of memory */
-	I40E_AQ_RC_EACCES	= 10, /* Permission denied */
-	I40E_AQ_RC_EFAULT	= 11, /* Bad address */
-	I40E_AQ_RC_EBUSY	= 12, /* Device or resource busy */
-	I40E_AQ_RC_EEXIST	= 13, /* object already exists */
-	I40E_AQ_RC_EINVAL	= 14, /* Invalid argument */
-	I40E_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
-	I40E_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
-	I40E_AQ_RC_ENOSYS	= 17, /* Function not implemented */
-	I40E_AQ_RC_ERANGE	= 18, /* Parameter out of range */
-	I40E_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
-	I40E_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
-	I40E_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
-	I40E_AQ_RC_EFBIG	= 22, /* File too large */
-};
-
-/* Admin Queue command opcodes */
-enum i40e_admin_queue_opc {
-	/* aq commands */
-	i40e_aqc_opc_get_version	= 0x0001,
-	i40e_aqc_opc_driver_version	= 0x0002,
-	i40e_aqc_opc_queue_shutdown	= 0x0003,
-	i40e_aqc_opc_set_pf_context	= 0x0004,
-
-	/* resource ownership */
-	i40e_aqc_opc_request_resource	= 0x0008,
-	i40e_aqc_opc_release_resource	= 0x0009,
-
-	i40e_aqc_opc_list_func_capabilities	= 0x000A,
-	i40e_aqc_opc_list_dev_capabilities	= 0x000B,
-
-	/* Proxy commands */
-	i40e_aqc_opc_set_proxy_config		= 0x0104,
-	i40e_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
-
-	/* LAA */
-	i40e_aqc_opc_mac_address_read	= 0x0107,
-	i40e_aqc_opc_mac_address_write	= 0x0108,
-
-	/* PXE */
-	i40e_aqc_opc_clear_pxe_mode	= 0x0110,
-
-	/* WoL commands */
-	i40e_aqc_opc_set_wol_filter	= 0x0120,
-	i40e_aqc_opc_get_wake_reason	= 0x0121,
-
-	/* internal switch commands */
-	i40e_aqc_opc_get_switch_config		= 0x0200,
-	i40e_aqc_opc_add_statistics		= 0x0201,
-	i40e_aqc_opc_remove_statistics		= 0x0202,
-	i40e_aqc_opc_set_port_parameters	= 0x0203,
-	i40e_aqc_opc_get_switch_resource_alloc	= 0x0204,
-	i40e_aqc_opc_set_switch_config		= 0x0205,
-	i40e_aqc_opc_rx_ctl_reg_read		= 0x0206,
-	i40e_aqc_opc_rx_ctl_reg_write		= 0x0207,
-
-	i40e_aqc_opc_add_vsi			= 0x0210,
-	i40e_aqc_opc_update_vsi_parameters	= 0x0211,
-	i40e_aqc_opc_get_vsi_parameters		= 0x0212,
-
-	i40e_aqc_opc_add_pv			= 0x0220,
-	i40e_aqc_opc_update_pv_parameters	= 0x0221,
-	i40e_aqc_opc_get_pv_parameters		= 0x0222,
-
-	i40e_aqc_opc_add_veb			= 0x0230,
-	i40e_aqc_opc_update_veb_parameters	= 0x0231,
-	i40e_aqc_opc_get_veb_parameters		= 0x0232,
-
-	i40e_aqc_opc_delete_element		= 0x0243,
-
-	i40e_aqc_opc_add_macvlan		= 0x0250,
-	i40e_aqc_opc_remove_macvlan		= 0x0251,
-	i40e_aqc_opc_add_vlan			= 0x0252,
-	i40e_aqc_opc_remove_vlan		= 0x0253,
-	i40e_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
-	i40e_aqc_opc_add_tag			= 0x0255,
-	i40e_aqc_opc_remove_tag			= 0x0256,
-	i40e_aqc_opc_add_multicast_etag		= 0x0257,
-	i40e_aqc_opc_remove_multicast_etag	= 0x0258,
-	i40e_aqc_opc_update_tag			= 0x0259,
-	i40e_aqc_opc_add_control_packet_filter	= 0x025A,
-	i40e_aqc_opc_remove_control_packet_filter	= 0x025B,
-	i40e_aqc_opc_add_cloud_filters		= 0x025C,
-	i40e_aqc_opc_remove_cloud_filters	= 0x025D,
-	i40e_aqc_opc_clear_wol_switch_filters	= 0x025E,
-
-	i40e_aqc_opc_add_mirror_rule	= 0x0260,
-	i40e_aqc_opc_delete_mirror_rule	= 0x0261,
-
-	/* Dynamic Device Personalization */
-	i40e_aqc_opc_write_personalization_profile	= 0x0270,
-	i40e_aqc_opc_get_personalization_profile_list	= 0x0271,
-
-	/* DCB commands */
-	i40e_aqc_opc_dcb_ignore_pfc	= 0x0301,
-	i40e_aqc_opc_dcb_updated	= 0x0302,
-	i40e_aqc_opc_set_dcb_parameters = 0x0303,
-
-	/* TX scheduler */
-	i40e_aqc_opc_configure_vsi_bw_limit		= 0x0400,
-	i40e_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
-	i40e_aqc_opc_configure_vsi_tc_bw		= 0x0407,
-	i40e_aqc_opc_query_vsi_bw_config		= 0x0408,
-	i40e_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
-	i40e_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
-
-	i40e_aqc_opc_enable_switching_comp_ets			= 0x0413,
-	i40e_aqc_opc_modify_switching_comp_ets			= 0x0414,
-	i40e_aqc_opc_disable_switching_comp_ets			= 0x0415,
-	i40e_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
-	i40e_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
-	i40e_aqc_opc_query_switching_comp_ets_config		= 0x0418,
-	i40e_aqc_opc_query_port_ets_config			= 0x0419,
-	i40e_aqc_opc_query_switching_comp_bw_config		= 0x041A,
-	i40e_aqc_opc_suspend_port_tx				= 0x041B,
-	i40e_aqc_opc_resume_port_tx				= 0x041C,
-	i40e_aqc_opc_configure_partition_bw			= 0x041D,
-	/* hmc */
-	i40e_aqc_opc_query_hmc_resource_profile	= 0x0500,
-	i40e_aqc_opc_set_hmc_resource_profile	= 0x0501,
-
-	/* phy commands*/
-	i40e_aqc_opc_get_phy_abilities		= 0x0600,
-	i40e_aqc_opc_set_phy_config		= 0x0601,
-	i40e_aqc_opc_set_mac_config		= 0x0603,
-	i40e_aqc_opc_set_link_restart_an	= 0x0605,
-	i40e_aqc_opc_get_link_status		= 0x0607,
-	i40e_aqc_opc_set_phy_int_mask		= 0x0613,
-	i40e_aqc_opc_get_local_advt_reg		= 0x0614,
-	i40e_aqc_opc_set_local_advt_reg		= 0x0615,
-	i40e_aqc_opc_get_partner_advt		= 0x0616,
-	i40e_aqc_opc_set_lb_modes		= 0x0618,
-	i40e_aqc_opc_get_phy_wol_caps		= 0x0621,
-	i40e_aqc_opc_set_phy_debug		= 0x0622,
-	i40e_aqc_opc_upload_ext_phy_fm		= 0x0625,
-	i40e_aqc_opc_run_phy_activity		= 0x0626,
-	i40e_aqc_opc_set_phy_register		= 0x0628,
-	i40e_aqc_opc_get_phy_register		= 0x0629,
-
-	/* NVM commands */
-	i40e_aqc_opc_nvm_read			= 0x0701,
-	i40e_aqc_opc_nvm_erase			= 0x0702,
-	i40e_aqc_opc_nvm_update			= 0x0703,
-	i40e_aqc_opc_nvm_config_read		= 0x0704,
-	i40e_aqc_opc_nvm_config_write		= 0x0705,
-	i40e_aqc_opc_oem_post_update		= 0x0720,
-	i40e_aqc_opc_thermal_sensor		= 0x0721,
-
-	/* virtualization commands */
-	i40e_aqc_opc_send_msg_to_pf		= 0x0801,
-	i40e_aqc_opc_send_msg_to_vf		= 0x0802,
-	i40e_aqc_opc_send_msg_to_peer		= 0x0803,
-
-	/* alternate structure */
-	i40e_aqc_opc_alternate_write		= 0x0900,
-	i40e_aqc_opc_alternate_write_indirect	= 0x0901,
-	i40e_aqc_opc_alternate_read		= 0x0902,
-	i40e_aqc_opc_alternate_read_indirect	= 0x0903,
-	i40e_aqc_opc_alternate_write_done	= 0x0904,
-	i40e_aqc_opc_alternate_set_mode		= 0x0905,
-	i40e_aqc_opc_alternate_clear_port	= 0x0906,
-
-	/* LLDP commands */
-	i40e_aqc_opc_lldp_get_mib	= 0x0A00,
-	i40e_aqc_opc_lldp_update_mib	= 0x0A01,
-	i40e_aqc_opc_lldp_add_tlv	= 0x0A02,
-	i40e_aqc_opc_lldp_update_tlv	= 0x0A03,
-	i40e_aqc_opc_lldp_delete_tlv	= 0x0A04,
-	i40e_aqc_opc_lldp_stop		= 0x0A05,
-	i40e_aqc_opc_lldp_start		= 0x0A06,
-
-	/* Tunnel commands */
-	i40e_aqc_opc_add_udp_tunnel	= 0x0B00,
-	i40e_aqc_opc_del_udp_tunnel	= 0x0B01,
-	i40e_aqc_opc_set_rss_key	= 0x0B02,
-	i40e_aqc_opc_set_rss_lut	= 0x0B03,
-	i40e_aqc_opc_get_rss_key	= 0x0B04,
-	i40e_aqc_opc_get_rss_lut	= 0x0B05,
-
-	/* Async Events */
-	i40e_aqc_opc_event_lan_overflow		= 0x1001,
-
-	/* OEM commands */
-	i40e_aqc_opc_oem_parameter_change	= 0xFE00,
-	i40e_aqc_opc_oem_device_status_change	= 0xFE01,
-	i40e_aqc_opc_oem_ocsd_initialize	= 0xFE02,
-	i40e_aqc_opc_oem_ocbb_initialize	= 0xFE03,
-
-	/* debug commands */
-	i40e_aqc_opc_debug_read_reg		= 0xFF03,
-	i40e_aqc_opc_debug_write_reg		= 0xFF04,
-	i40e_aqc_opc_debug_modify_reg		= 0xFF07,
-	i40e_aqc_opc_debug_dump_internals	= 0xFF08,
-};
-
-/* command structures and indirect data structures */
-
-/* Structure naming conventions:
- * - no suffix for direct command descriptor structures
- * - _data for indirect sent data
- * - _resp for indirect return data (data which is both will use _data)
- * - _completion for direct return data
- * - _element_ for repeated elements (may also be _data or _resp)
- *
- * Command structures are expected to overlay the params.raw member of the basic
- * descriptor, and as such cannot exceed 16 bytes in length.
- */
-
-/* This macro is used to generate a compilation error if a structure
- * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
- */
-#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \
-	{ i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
-
-/* This macro is used extensively to ensure that command structures are 16
- * bytes in length as they have to map to the raw array of that size.
- */
-#define I40E_CHECK_CMD_LENGTH(X)	I40E_CHECK_STRUCT_LEN(16, X)
-
-/* internal (0x00XX) commands */
-
-/* Get version (direct 0x0001) */
-struct i40e_aqc_get_version {
-	__le32 rom_ver;
-	__le32 fw_build;
-	__le16 fw_major;
-	__le16 fw_minor;
-	__le16 api_major;
-	__le16 api_minor;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_version);
-
-/* Send driver version (indirect 0x0002) */
-struct i40e_aqc_driver_version {
-	u8	driver_major_ver;
-	u8	driver_minor_ver;
-	u8	driver_build_ver;
-	u8	driver_subbuild_ver;
-	u8	reserved[4];
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_driver_version);
-
-/* Queue Shutdown (direct 0x0003) */
-struct i40e_aqc_queue_shutdown {
-	__le32	driver_unloading;
-#define I40E_AQ_DRIVER_UNLOADING	0x1
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
-
-/* Set PF context (0x0004, direct) */
-struct i40e_aqc_set_pf_context {
-	u8	pf_id;
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_pf_context);
-
-/* Request resource ownership (direct 0x0008)
- * Release resource ownership (direct 0x0009)
- */
-#define I40E_AQ_RESOURCE_NVM			1
-#define I40E_AQ_RESOURCE_SDP			2
-#define I40E_AQ_RESOURCE_ACCESS_READ		1
-#define I40E_AQ_RESOURCE_ACCESS_WRITE		2
-#define I40E_AQ_RESOURCE_NVM_READ_TIMEOUT	3000
-#define I40E_AQ_RESOURCE_NVM_WRITE_TIMEOUT	180000
-
-struct i40e_aqc_request_resource {
-	__le16	resource_id;
-	__le16	access_type;
-	__le32	timeout;
-	__le32	resource_number;
-	u8	reserved[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_request_resource);
-
-/* Get function capabilities (indirect 0x000A)
- * Get device capabilities (indirect 0x000B)
- */
-struct i40e_aqc_list_capabilites {
-	u8 command_flags;
-#define I40E_AQ_LIST_CAP_PF_INDEX_EN	1
-	u8 pf_index;
-	u8 reserved[2];
-	__le32 count;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_list_capabilites);
-
-struct i40e_aqc_list_capabilities_element_resp {
-	__le16	id;
-	u8	major_rev;
-	u8	minor_rev;
-	__le32	number;
-	__le32	logical_id;
-	__le32	phys_id;
-	u8	reserved[16];
-};
-
-/* list of caps */
-
-#define I40E_AQ_CAP_ID_SWITCH_MODE	0x0001
-#define I40E_AQ_CAP_ID_MNG_MODE		0x0002
-#define I40E_AQ_CAP_ID_NPAR_ACTIVE	0x0003
-#define I40E_AQ_CAP_ID_OS2BMC_CAP	0x0004
-#define I40E_AQ_CAP_ID_FUNCTIONS_VALID	0x0005
-#define I40E_AQ_CAP_ID_ALTERNATE_RAM	0x0006
-#define I40E_AQ_CAP_ID_WOL_AND_PROXY	0x0008
-#define I40E_AQ_CAP_ID_SRIOV		0x0012
-#define I40E_AQ_CAP_ID_VF		0x0013
-#define I40E_AQ_CAP_ID_VMDQ		0x0014
-#define I40E_AQ_CAP_ID_8021QBG		0x0015
-#define I40E_AQ_CAP_ID_8021QBR		0x0016
-#define I40E_AQ_CAP_ID_VSI		0x0017
-#define I40E_AQ_CAP_ID_DCB		0x0018
-#define I40E_AQ_CAP_ID_FCOE		0x0021
-#define I40E_AQ_CAP_ID_ISCSI		0x0022
-#define I40E_AQ_CAP_ID_RSS		0x0040
-#define I40E_AQ_CAP_ID_RXQ		0x0041
-#define I40E_AQ_CAP_ID_TXQ		0x0042
-#define I40E_AQ_CAP_ID_MSIX		0x0043
-#define I40E_AQ_CAP_ID_VF_MSIX		0x0044
-#define I40E_AQ_CAP_ID_FLOW_DIRECTOR	0x0045
-#define I40E_AQ_CAP_ID_1588		0x0046
-#define I40E_AQ_CAP_ID_IWARP		0x0051
-#define I40E_AQ_CAP_ID_LED		0x0061
-#define I40E_AQ_CAP_ID_SDP		0x0062
-#define I40E_AQ_CAP_ID_MDIO		0x0063
-#define I40E_AQ_CAP_ID_WSR_PROT		0x0064
-#define I40E_AQ_CAP_ID_NVM_MGMT		0x0080
-#define I40E_AQ_CAP_ID_FLEX10		0x00F1
-#define I40E_AQ_CAP_ID_CEM		0x00F2
-
-/* Set CPPM Configuration (direct 0x0103) */
-struct i40e_aqc_cppm_configuration {
-	__le16	command_flags;
-#define I40E_AQ_CPPM_EN_LTRC	0x0800
-#define I40E_AQ_CPPM_EN_DMCTH	0x1000
-#define I40E_AQ_CPPM_EN_DMCTLX	0x2000
-#define I40E_AQ_CPPM_EN_HPTC	0x4000
-#define I40E_AQ_CPPM_EN_DMARC	0x8000
-	__le16	ttlx;
-	__le32	dmacr;
-	__le16	dmcth;
-	u8	hptc;
-	u8	reserved;
-	__le32	pfltrc;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
-
-/* Set ARP Proxy command / response (indirect 0x0104) */
-struct i40e_aqc_arp_proxy_data {
-	__le16	command_flags;
-#define I40E_AQ_ARP_INIT_IPV4	0x0800
-#define I40E_AQ_ARP_UNSUP_CTL	0x1000
-#define I40E_AQ_ARP_ENA		0x2000
-#define I40E_AQ_ARP_ADD_IPV4	0x4000
-#define I40E_AQ_ARP_DEL_IPV4	0x8000
-	__le16	table_id;
-	__le32	enabled_offloads;
-#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE	0x00000020
-#define I40E_AQ_ARP_OFFLOAD_ENABLE		0x00000800
-	__le32	ip_addr;
-	u8	mac_addr[6];
-	u8	reserved[2];
-};
-
-I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
-
-/* Set NS Proxy Table Entry Command (indirect 0x0105) */
-struct i40e_aqc_ns_proxy_data {
-	__le16	table_idx_mac_addr_0;
-	__le16	table_idx_mac_addr_1;
-	__le16	table_idx_ipv6_0;
-	__le16	table_idx_ipv6_1;
-	__le16	control;
-#define I40E_AQ_NS_PROXY_ADD_0		0x0001
-#define I40E_AQ_NS_PROXY_DEL_0		0x0002
-#define I40E_AQ_NS_PROXY_ADD_1		0x0004
-#define I40E_AQ_NS_PROXY_DEL_1		0x0008
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0	0x0010
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0	0x0020
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1	0x0040
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1	0x0080
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ	0x0100
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL	0x0200
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL	0x0400
-#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE	0x0800
-#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE	0x1000
-	u8	mac_addr_0[6];
-	u8	mac_addr_1[6];
-	u8	local_mac_addr[6];
-	u8	ipv6_addr_0[16]; /* Warning! spec specifies BE byte order */
-	u8	ipv6_addr_1[16];
-};
-
-I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
-
-/* Manage LAA Command (0x0106) - obsolete */
-struct i40e_aqc_mng_laa {
-	__le16	command_flags;
-#define I40E_AQ_LAA_FLAG_WR	0x8000
-	u8	reserved[2];
-	__le32	sal;
-	__le16	sah;
-	u8	reserved2[6];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
-
-/* Manage MAC Address Read Command (indirect 0x0107) */
-struct i40e_aqc_mac_address_read {
-	__le16	command_flags;
-#define I40E_AQC_LAN_ADDR_VALID		0x10
-#define I40E_AQC_SAN_ADDR_VALID		0x20
-#define I40E_AQC_PORT_ADDR_VALID	0x40
-#define I40E_AQC_WOL_ADDR_VALID		0x80
-#define I40E_AQC_MC_MAG_EN_VALID	0x100
-#define I40E_AQC_ADDR_VALID_MASK	0x3F0
-	u8	reserved[6];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_read);
-
-struct i40e_aqc_mac_address_read_data {
-	u8 pf_lan_mac[6];
-	u8 pf_san_mac[6];
-	u8 port_mac[6];
-	u8 pf_wol_mac[6];
-};
-
-I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data);
-
-/* Manage MAC Address Write Command (0x0108) */
-struct i40e_aqc_mac_address_write {
-	__le16	command_flags;
-#define I40E_AQC_WRITE_TYPE_LAA_ONLY	0x0000
-#define I40E_AQC_WRITE_TYPE_LAA_WOL	0x4000
-#define I40E_AQC_WRITE_TYPE_PORT	0x8000
-#define I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG	0xC000
-#define I40E_AQC_WRITE_TYPE_MASK	0xC000
-
-	__le16	mac_sah;
-	__le32	mac_sal;
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_write);
-
-/* PXE commands (0x011x) */
-
-/* Clear PXE Command and response  (direct 0x0110) */
-struct i40e_aqc_clear_pxe {
-	u8	rx_cnt;
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
-
-/* Set WoL Filter (0x0120) */
-
-struct i40e_aqc_set_wol_filter {
-	__le16 filter_index;
-#define I40E_AQC_MAX_NUM_WOL_FILTERS	8
-#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT	15
-#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK	(0x1 << \
-		I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT)
-
-#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT		0
-#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK	(0x7 << \
-		I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT)
-	__le16 cmd_flags;
-#define I40E_AQC_SET_WOL_FILTER				0x8000
-#define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL		0x4000
-#define I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR	0x2000
-#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR		0
-#define I40E_AQC_SET_WOL_FILTER_ACTION_SET		1
-	__le16 valid_flags;
-#define I40E_AQC_SET_WOL_FILTER_ACTION_VALID		0x8000
-#define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID	0x4000
-	u8 reserved[2];
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter);
-
-struct i40e_aqc_set_wol_filter_data {
-	u8 filter[128];
-	u8 mask[16];
-};
-
-I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
-
-/* Get Wake Reason (0x0121) */
-
-struct i40e_aqc_get_wake_reason_completion {
-	u8 reserved_1[2];
-	__le16 wake_reason;
-#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT	0
-#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \
-		I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT)
-#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT	8
-#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK	(0xFF << \
-		I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT)
-	u8 reserved_2[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion);
-
-/* Switch configuration commands (0x02xx) */
-
-/* Used by many indirect commands that only pass an seid and a buffer in the
- * command
- */
-struct i40e_aqc_switch_seid {
-	__le16	seid;
-	u8	reserved[6];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_switch_seid);
-
-/* Get Switch Configuration command (indirect 0x0200)
- * uses i40e_aqc_switch_seid for the descriptor
- */
-struct i40e_aqc_get_switch_config_header_resp {
-	__le16	num_reported;
-	__le16	num_total;
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
-
-struct i40e_aqc_switch_config_element_resp {
-	u8	element_type;
-#define I40E_AQ_SW_ELEM_TYPE_MAC	1
-#define I40E_AQ_SW_ELEM_TYPE_PF		2
-#define I40E_AQ_SW_ELEM_TYPE_VF		3
-#define I40E_AQ_SW_ELEM_TYPE_EMP	4
-#define I40E_AQ_SW_ELEM_TYPE_BMC	5
-#define I40E_AQ_SW_ELEM_TYPE_PV		16
-#define I40E_AQ_SW_ELEM_TYPE_VEB	17
-#define I40E_AQ_SW_ELEM_TYPE_PA		18
-#define I40E_AQ_SW_ELEM_TYPE_VSI	19
-	u8	revision;
-#define I40E_AQ_SW_ELEM_REV_1		1
-	__le16	seid;
-	__le16	uplink_seid;
-	__le16	downlink_seid;
-	u8	reserved[3];
-	u8	connection_type;
-#define I40E_AQ_CONN_TYPE_REGULAR	0x1
-#define I40E_AQ_CONN_TYPE_DEFAULT	0x2
-#define I40E_AQ_CONN_TYPE_CASCADED	0x3
-	__le16	scheduler_id;
-	__le16	element_info;
-};
-
-I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
-
-/* Get Switch Configuration (indirect 0x0200)
- *    an array of elements are returned in the response buffer
- *    the first in the array is the header, remainder are elements
- */
-struct i40e_aqc_get_switch_config_resp {
-	struct i40e_aqc_get_switch_config_header_resp	header;
-	struct i40e_aqc_switch_config_element_resp	element[1];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
-
-/* Add Statistics (direct 0x0201)
- * Remove Statistics (direct 0x0202)
- */
-struct i40e_aqc_add_remove_statistics {
-	__le16	seid;
-	__le16	vlan;
-	__le16	stat_index;
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_statistics);
-
-/* Set Port Parameters command (direct 0x0203) */
-struct i40e_aqc_set_port_parameters {
-	__le16	command_flags;
-#define I40E_AQ_SET_P_PARAMS_SAVE_BAD_PACKETS	1
-#define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS	2 /* must set! */
-#define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA	4
-	__le16	bad_frame_vsi;
-#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT	0x0
-#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK	0x3FF
-	__le16	default_seid;        /* reserved for command */
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_port_parameters);
-
-/* Get Switch Resource Allocation (indirect 0x0204) */
-struct i40e_aqc_get_switch_resource_alloc {
-	u8	num_entries;         /* reserved for command */
-	u8	reserved[7];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_resource_alloc);
-
-/* expect an array of these structs in the response buffer */
-struct i40e_aqc_switch_resource_alloc_element_resp {
-	u8	resource_type;
-#define I40E_AQ_RESOURCE_TYPE_VEB		0x0
-#define I40E_AQ_RESOURCE_TYPE_VSI		0x1
-#define I40E_AQ_RESOURCE_TYPE_MACADDR		0x2
-#define I40E_AQ_RESOURCE_TYPE_STAG		0x3
-#define I40E_AQ_RESOURCE_TYPE_ETAG		0x4
-#define I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH	0x5
-#define I40E_AQ_RESOURCE_TYPE_UNICAST_HASH	0x6
-#define I40E_AQ_RESOURCE_TYPE_VLAN		0x7
-#define I40E_AQ_RESOURCE_TYPE_VSI_LIST_ENTRY	0x8
-#define I40E_AQ_RESOURCE_TYPE_ETAG_LIST_ENTRY	0x9
-#define I40E_AQ_RESOURCE_TYPE_VLAN_STAT_POOL	0xA
-#define I40E_AQ_RESOURCE_TYPE_MIRROR_RULE	0xB
-#define I40E_AQ_RESOURCE_TYPE_QUEUE_SETS	0xC
-#define I40E_AQ_RESOURCE_TYPE_VLAN_FILTERS	0xD
-#define I40E_AQ_RESOURCE_TYPE_INNER_MAC_FILTERS	0xF
-#define I40E_AQ_RESOURCE_TYPE_IP_FILTERS	0x10
-#define I40E_AQ_RESOURCE_TYPE_GRE_VN_KEYS	0x11
-#define I40E_AQ_RESOURCE_TYPE_VN2_KEYS		0x12
-#define I40E_AQ_RESOURCE_TYPE_TUNNEL_PORTS	0x13
-	u8	reserved1;
-	__le16	guaranteed;
-	__le16	total;
-	__le16	used;
-	__le16	total_unalloced;
-	u8	reserved2[6];
-};
-
-I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
-
-/* Set Switch Configuration (direct 0x0205) */
-struct i40e_aqc_set_switch_config {
-	__le16	flags;
-/* flags used for both fields below */
-#define I40E_AQ_SET_SWITCH_CFG_PROMISC		0x0001
-#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER	0x0002
-	__le16	valid_flags;
-	/* The ethertype in switch_tag is dropped on ingress and used
-	 * internally by the switch. Set this to zero for the default
-	 * of 0x88a8 (802.1ad). Should be zero for firmware API
-	 * versions lower than 1.7.
-	 */
-	__le16	switch_tag;
-	/* The ethertypes in first_tag and second_tag are used to
-	 * match the outer and inner VLAN tags (respectively) when HW
-	 * double VLAN tagging is enabled via the set port parameters
-	 * AQ command. Otherwise these are both ignored. Set them to
-	 * zero for their defaults of 0x8100 (802.1Q). Should be zero
-	 * for firmware API versions lower than 1.7.
-	 */
-	__le16	first_tag;
-	__le16	second_tag;
-	u8	reserved[6];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
-
-/* Read Receive control registers  (direct 0x0206)
- * Write Receive control registers (direct 0x0207)
- *     used for accessing Rx control registers that can be
- *     slow and need special handling when under high Rx load
- */
-struct i40e_aqc_rx_ctl_reg_read_write {
-	__le32 reserved1;
-	__le32 address;
-	__le32 reserved2;
-	__le32 value;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write);
-
-/* Add VSI (indirect 0x0210)
- *    this indirect command uses struct i40e_aqc_vsi_properties_data
- *    as the indirect buffer (128 bytes)
- *
- * Update VSI (indirect 0x211)
- *     uses the same data structure as Add VSI
- *
- * Get VSI (indirect 0x0212)
- *     uses the same completion and data structure as Add VSI
- */
-struct i40e_aqc_add_get_update_vsi {
-	__le16	uplink_seid;
-	u8	connection_type;
-#define I40E_AQ_VSI_CONN_TYPE_NORMAL	0x1
-#define I40E_AQ_VSI_CONN_TYPE_DEFAULT	0x2
-#define I40E_AQ_VSI_CONN_TYPE_CASCADED	0x3
-	u8	reserved1;
-	u8	vf_id;
-	u8	reserved2;
-	__le16	vsi_flags;
-#define I40E_AQ_VSI_TYPE_SHIFT		0x0
-#define I40E_AQ_VSI_TYPE_MASK		(0x3 << I40E_AQ_VSI_TYPE_SHIFT)
-#define I40E_AQ_VSI_TYPE_VF		0x0
-#define I40E_AQ_VSI_TYPE_VMDQ2		0x1
-#define I40E_AQ_VSI_TYPE_PF		0x2
-#define I40E_AQ_VSI_TYPE_EMP_MNG	0x3
-#define I40E_AQ_VSI_FLAG_CASCADED_PV	0x4
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi);
-
-struct i40e_aqc_add_get_update_vsi_completion {
-	__le16 seid;
-	__le16 vsi_number;
-	__le16 vsi_used;
-	__le16 vsi_free;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi_completion);
-
-struct i40e_aqc_vsi_properties_data {
-	/* first 96 byte are written by SW */
-	__le16	valid_sections;
-#define I40E_AQ_VSI_PROP_SWITCH_VALID		0x0001
-#define I40E_AQ_VSI_PROP_SECURITY_VALID		0x0002
-#define I40E_AQ_VSI_PROP_VLAN_VALID		0x0004
-#define I40E_AQ_VSI_PROP_CAS_PV_VALID		0x0008
-#define I40E_AQ_VSI_PROP_INGRESS_UP_VALID	0x0010
-#define I40E_AQ_VSI_PROP_EGRESS_UP_VALID	0x0020
-#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
-#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
-#define I40E_AQ_VSI_PROP_OUTER_UP_VALID		0x0100
-#define I40E_AQ_VSI_PROP_SCHED_VALID		0x0200
-	/* switch section */
-	__le16	switch_id; /* 12bit id combined with flags below */
-#define I40E_AQ_VSI_SW_ID_SHIFT		0x0000
-#define I40E_AQ_VSI_SW_ID_MASK		(0xFFF << I40E_AQ_VSI_SW_ID_SHIFT)
-#define I40E_AQ_VSI_SW_ID_FLAG_NOT_STAG	0x1000
-#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
-#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
-	u8	sw_reserved[2];
-	/* security section */
-	u8	sec_flags;
-#define I40E_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	0x01
-#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
-#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
-	u8	sec_reserved;
-	/* VLAN section */
-	__le16	pvid; /* VLANS include priority bits */
-	__le16	fcoe_pvid;
-	u8	port_vlan_flags;
-#define I40E_AQ_VSI_PVLAN_MODE_SHIFT	0x00
-#define I40E_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
-					 I40E_AQ_VSI_PVLAN_MODE_SHIFT)
-#define I40E_AQ_VSI_PVLAN_MODE_TAGGED	0x01
-#define I40E_AQ_VSI_PVLAN_MODE_UNTAGGED	0x02
-#define I40E_AQ_VSI_PVLAN_MODE_ALL	0x03
-#define I40E_AQ_VSI_PVLAN_INSERT_PVID	0x04
-#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
-#define I40E_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
-					 I40E_AQ_VSI_PVLAN_EMOD_SHIFT)
-#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
-#define I40E_AQ_VSI_PVLAN_EMOD_STR_UP	0x08
-#define I40E_AQ_VSI_PVLAN_EMOD_STR	0x10
-#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
-	u8	pvlan_reserved[3];
-	/* ingress egress up sections */
-	__le32	ingress_table; /* bitmap, 3 bits per up */
-#define I40E_AQ_VSI_UP_TABLE_UP0_SHIFT	0
-#define I40E_AQ_VSI_UP_TABLE_UP0_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP0_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP1_SHIFT	3
-#define I40E_AQ_VSI_UP_TABLE_UP1_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP1_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP2_SHIFT	6
-#define I40E_AQ_VSI_UP_TABLE_UP2_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP2_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP3_SHIFT	9
-#define I40E_AQ_VSI_UP_TABLE_UP3_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP3_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP4_SHIFT	12
-#define I40E_AQ_VSI_UP_TABLE_UP4_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP4_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP5_SHIFT	15
-#define I40E_AQ_VSI_UP_TABLE_UP5_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP5_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP6_SHIFT	18
-#define I40E_AQ_VSI_UP_TABLE_UP6_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP6_SHIFT)
-#define I40E_AQ_VSI_UP_TABLE_UP7_SHIFT	21
-#define I40E_AQ_VSI_UP_TABLE_UP7_MASK	(0x7 << \
-					 I40E_AQ_VSI_UP_TABLE_UP7_SHIFT)
-	__le32	egress_table;   /* same defines as for ingress table */
-	/* cascaded PV section */
-	__le16	cas_pv_tag;
-	u8	cas_pv_flags;
-#define I40E_AQ_VSI_CAS_PV_TAGX_SHIFT		0x00
-#define I40E_AQ_VSI_CAS_PV_TAGX_MASK		(0x03 << \
-						 I40E_AQ_VSI_CAS_PV_TAGX_SHIFT)
-#define I40E_AQ_VSI_CAS_PV_TAGX_LEAVE		0x00
-#define I40E_AQ_VSI_CAS_PV_TAGX_REMOVE		0x01
-#define I40E_AQ_VSI_CAS_PV_TAGX_COPY		0x02
-#define I40E_AQ_VSI_CAS_PV_INSERT_TAG		0x10
-#define I40E_AQ_VSI_CAS_PV_ETAG_PRUNE		0x20
-#define I40E_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG	0x40
-	u8	cas_pv_reserved;
-	/* queue mapping section */
-	__le16	mapping_flags;
-#define I40E_AQ_VSI_QUE_MAP_CONTIG	0x0
-#define I40E_AQ_VSI_QUE_MAP_NONCONTIG	0x1
-	__le16	queue_mapping[16];
-#define I40E_AQ_VSI_QUEUE_SHIFT		0x0
-#define I40E_AQ_VSI_QUEUE_MASK		(0x7FF << I40E_AQ_VSI_QUEUE_SHIFT)
-	__le16	tc_mapping[8];
-#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
-#define I40E_AQ_VSI_TC_QUE_OFFSET_MASK	(0x1FF << \
-					 I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT)
-#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
-#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK	(0x7 << \
-					 I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
-	/* queueing option section */
-	u8	queueing_opt_flags;
-#define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA	0x04
-#define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA	0x08
-#define I40E_AQ_VSI_QUE_OPT_TCP_ENA	0x10
-#define I40E_AQ_VSI_QUE_OPT_FCOE_ENA	0x20
-#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF	0x00
-#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
-	u8	queueing_opt_reserved[3];
-	/* scheduler section */
-	u8	up_enable_bits;
-	u8	sched_reserved;
-	/* outer up section */
-	__le32	outer_up_table; /* same structure and defines as ingress tbl */
-	u8	cmd_reserved[8];
-	/* last 32 bytes are written by FW */
-	__le16	qs_handle[8];
-#define I40E_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
-	__le16	stat_counter_idx;
-	__le16	sched_id;
-	u8	resp_reserved[12];
-};
-
-I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data);
-
-/* Add Port Virtualizer (direct 0x0220)
- * also used for update PV (direct 0x0221) but only flags are used
- * (IS_CTRL_PORT only works on add PV)
- */
-struct i40e_aqc_add_update_pv {
-	__le16	command_flags;
-#define I40E_AQC_PV_FLAG_PV_TYPE		0x1
-#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_STAG_EN	0x2
-#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_ETAG_EN	0x4
-#define I40E_AQC_PV_FLAG_IS_CTRL_PORT		0x8
-	__le16	uplink_seid;
-	__le16	connected_seid;
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv);
-
-struct i40e_aqc_add_update_pv_completion {
-	/* reserved for update; for add also encodes error if rc == ENOSPC */
-	__le16	pv_seid;
-#define I40E_AQC_PV_ERR_FLAG_NO_PV	0x1
-#define I40E_AQC_PV_ERR_FLAG_NO_SCHED	0x2
-#define I40E_AQC_PV_ERR_FLAG_NO_COUNTER	0x4
-#define I40E_AQC_PV_ERR_FLAG_NO_ENTRY	0x8
-	u8	reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv_completion);
-
-/* Get PV Params (direct 0x0222)
- * uses i40e_aqc_switch_seid for the descriptor
- */
-
-struct i40e_aqc_get_pv_params_completion {
-	__le16	seid;
-	__le16	default_stag;
-	__le16	pv_flags; /* same flags as add_pv */
-#define I40E_AQC_GET_PV_PV_TYPE			0x1
-#define I40E_AQC_GET_PV_FRWD_UNKNOWN_STAG	0x2
-#define I40E_AQC_GET_PV_FRWD_UNKNOWN_ETAG	0x4
-	u8	reserved[8];
-	__le16	default_port_seid;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_pv_params_completion);
-
-/* Add VEB (direct 0x0230) */
-struct i40e_aqc_add_veb {
-	__le16	uplink_seid;
-	__le16	downlink_seid;
-	__le16	veb_flags;
-#define I40E_AQC_ADD_VEB_FLOATING		0x1
-#define I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT	1
-#define I40E_AQC_ADD_VEB_PORT_TYPE_MASK		(0x3 << \
-					I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT)
-#define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT	0x2
-#define I40E_AQC_ADD_VEB_PORT_TYPE_DATA		0x4
-#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER	0x8     /* deprecated */
-#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS	0x10
-	u8	enable_tcs;
-	u8	reserved[9];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb);
-
-struct i40e_aqc_add_veb_completion {
-	u8	reserved[6];
-	__le16	switch_seid;
-	/* also encodes error if rc == ENOSPC; codes are the same as add_pv */
-	__le16	veb_seid;
-#define I40E_AQC_VEB_ERR_FLAG_NO_VEB		0x1
-#define I40E_AQC_VEB_ERR_FLAG_NO_SCHED		0x2
-#define I40E_AQC_VEB_ERR_FLAG_NO_COUNTER	0x4
-#define I40E_AQC_VEB_ERR_FLAG_NO_ENTRY		0x8
-	__le16	statistic_index;
-	__le16	vebs_used;
-	__le16	vebs_free;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb_completion);
-
-/* Get VEB Parameters (direct 0x0232)
- * uses i40e_aqc_switch_seid for the descriptor
- */
-struct i40e_aqc_get_veb_parameters_completion {
-	__le16	seid;
-	__le16	switch_id;
-	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
-	__le16	statistic_index;
-	__le16	vebs_used;
-	__le16	vebs_free;
-	u8	reserved[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion);
-
-/* Delete Element (direct 0x0243)
- * uses the generic i40e_aqc_switch_seid
- */
-
-/* Add MAC-VLAN (indirect 0x0250) */
-
-/* used for the command for most vlan commands */
-struct i40e_aqc_macvlan {
-	__le16	num_addresses;
-	__le16	seid[3];
-#define I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_MACVLAN_CMD_SEID_NUM_MASK	(0x3FF << \
-					I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
-#define I40E_AQC_MACVLAN_CMD_SEID_VALID		0x8000
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_macvlan);
-
-/* indirect data for command and response */
-struct i40e_aqc_add_macvlan_element_data {
-	u8	mac_addr[6];
-	__le16	vlan_tag;
-	__le16	flags;
-#define I40E_AQC_MACVLAN_ADD_PERFECT_MATCH	0x0001
-#define I40E_AQC_MACVLAN_ADD_HASH_MATCH		0x0002
-#define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN	0x0004
-#define I40E_AQC_MACVLAN_ADD_TO_QUEUE		0x0008
-#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC	0x0010
-	__le16	queue_number;
-#define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT	0
-#define I40E_AQC_MACVLAN_CMD_QUEUE_MASK		(0x7FF << \
-					I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
-	/* response section */
-	u8	match_method;
-#define I40E_AQC_MM_PERFECT_MATCH	0x01
-#define I40E_AQC_MM_HASH_MATCH		0x02
-#define I40E_AQC_MM_ERR_NO_RES		0xFF
-	u8	reserved1[3];
-};
-
-struct i40e_aqc_add_remove_macvlan_completion {
-	__le16 perfect_mac_used;
-	__le16 perfect_mac_free;
-	__le16 unicast_hash_free;
-	__le16 multicast_hash_free;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_macvlan_completion);
-
-/* Remove MAC-VLAN (indirect 0x0251)
- * uses i40e_aqc_macvlan for the descriptor
- * data points to an array of num_addresses of elements
- */
-
-struct i40e_aqc_remove_macvlan_element_data {
-	u8	mac_addr[6];
-	__le16	vlan_tag;
-	u8	flags;
-#define I40E_AQC_MACVLAN_DEL_PERFECT_MATCH	0x01
-#define I40E_AQC_MACVLAN_DEL_HASH_MATCH		0x02
-#define I40E_AQC_MACVLAN_DEL_IGNORE_VLAN	0x08
-#define I40E_AQC_MACVLAN_DEL_ALL_VSIS		0x10
-	u8	reserved[3];
-	/* reply section */
-	u8	error_code;
-#define I40E_AQC_REMOVE_MACVLAN_SUCCESS		0x0
-#define I40E_AQC_REMOVE_MACVLAN_FAIL		0xFF
-	u8	reply_reserved[3];
-};
-
-/* Add VLAN (indirect 0x0252)
- * Remove VLAN (indirect 0x0253)
- * use the generic i40e_aqc_macvlan for the command
- */
-struct i40e_aqc_add_remove_vlan_element_data {
-	__le16	vlan_tag;
-	u8	vlan_flags;
-/* flags for add VLAN */
-#define I40E_AQC_ADD_VLAN_LOCAL			0x1
-#define I40E_AQC_ADD_PVLAN_TYPE_SHIFT		1
-#define I40E_AQC_ADD_PVLAN_TYPE_MASK	(0x3 << I40E_AQC_ADD_PVLAN_TYPE_SHIFT)
-#define I40E_AQC_ADD_PVLAN_TYPE_REGULAR		0x0
-#define I40E_AQC_ADD_PVLAN_TYPE_PRIMARY		0x2
-#define I40E_AQC_ADD_PVLAN_TYPE_SECONDARY	0x4
-#define I40E_AQC_VLAN_PTYPE_SHIFT		3
-#define I40E_AQC_VLAN_PTYPE_MASK	(0x3 << I40E_AQC_VLAN_PTYPE_SHIFT)
-#define I40E_AQC_VLAN_PTYPE_REGULAR_VSI		0x0
-#define I40E_AQC_VLAN_PTYPE_PROMISC_VSI		0x8
-#define I40E_AQC_VLAN_PTYPE_COMMUNITY_VSI	0x10
-#define I40E_AQC_VLAN_PTYPE_ISOLATED_VSI	0x18
-/* flags for remove VLAN */
-#define I40E_AQC_REMOVE_VLAN_ALL	0x1
-	u8	reserved;
-	u8	result;
-/* flags for add VLAN */
-#define I40E_AQC_ADD_VLAN_SUCCESS	0x0
-#define I40E_AQC_ADD_VLAN_FAIL_REQUEST	0xFE
-#define I40E_AQC_ADD_VLAN_FAIL_RESOURCE	0xFF
-/* flags for remove VLAN */
-#define I40E_AQC_REMOVE_VLAN_SUCCESS	0x0
-#define I40E_AQC_REMOVE_VLAN_FAIL	0xFF
-	u8	reserved1[3];
-};
-
-struct i40e_aqc_add_remove_vlan_completion {
-	u8	reserved[4];
-	__le16	vlans_used;
-	__le16	vlans_free;
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-/* Set VSI Promiscuous Modes (direct 0x0254) */
-struct i40e_aqc_set_vsi_promiscuous_modes {
-	__le16	promiscuous_flags;
-	__le16	valid_flags;
-/* flags used for both fields above */
-#define I40E_AQC_SET_VSI_PROMISC_UNICAST	0x01
-#define I40E_AQC_SET_VSI_PROMISC_MULTICAST	0x02
-#define I40E_AQC_SET_VSI_PROMISC_BROADCAST	0x04
-#define I40E_AQC_SET_VSI_DEFAULT		0x08
-#define I40E_AQC_SET_VSI_PROMISC_VLAN		0x10
-#define I40E_AQC_SET_VSI_PROMISC_TX		0x8000
-	__le16	seid;
-#define I40E_AQC_VSI_PROM_CMD_SEID_MASK		0x3FF
-	__le16	vlan_tag;
-#define I40E_AQC_SET_VSI_VLAN_MASK		0x0FFF
-#define I40E_AQC_SET_VSI_VLAN_VALID		0x8000
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
-
-/* Add S/E-tag command (direct 0x0255)
- * Uses generic i40e_aqc_add_remove_tag_completion for completion
- */
-struct i40e_aqc_add_tag {
-	__le16	flags;
-#define I40E_AQC_ADD_TAG_FLAG_TO_QUEUE		0x0001
-	__le16	seid;
-#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_MASK	(0x3FF << \
-					I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT)
-	__le16	tag;
-	__le16	queue_number;
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_tag);
-
-struct i40e_aqc_add_remove_tag_completion {
-	u8	reserved[12];
-	__le16	tags_used;
-	__le16	tags_free;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_tag_completion);
-
-/* Remove S/E-tag command (direct 0x0256)
- * Uses generic i40e_aqc_add_remove_tag_completion for completion
- */
-struct i40e_aqc_remove_tag {
-	__le16	seid;
-#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_MASK	(0x3FF << \
-					I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT)
-	__le16	tag;
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
-
-/* Add multicast E-Tag (direct 0x0257)
- * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
- * and no external data
- */
-struct i40e_aqc_add_remove_mcast_etag {
-	__le16	pv_seid;
-	__le16	etag;
-	u8	num_unicast_etags;
-	u8	reserved[3];
-	__le32	addr_high;          /* address of array of 2-byte s-tags */
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag);
-
-struct i40e_aqc_add_remove_mcast_etag_completion {
-	u8	reserved[4];
-	__le16	mcast_etags_used;
-	__le16	mcast_etags_free;
-	__le32	addr_high;
-	__le32	addr_low;
-
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag_completion);
-
-/* Update S/E-Tag (direct 0x0259) */
-struct i40e_aqc_update_tag {
-	__le16	seid;
-#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_MASK	(0x3FF << \
-					I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT)
-	__le16	old_tag;
-	__le16	new_tag;
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag);
-
-struct i40e_aqc_update_tag_completion {
-	u8	reserved[12];
-	__le16	tags_used;
-	__le16	tags_free;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag_completion);
-
-/* Add Control Packet filter (direct 0x025A)
- * Remove Control Packet filter (direct 0x025B)
- * uses the i40e_aqc_add_oveb_cloud,
- * and the generic direct completion structure
- */
-struct i40e_aqc_add_remove_control_packet_filter {
-	u8	mac[6];
-	__le16	etype;
-	__le16	flags;
-#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC	0x0001
-#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP		0x0002
-#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE	0x0004
-#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX		0x0008
-#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX		0x0000
-	__le16	seid;
-#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_MASK	(0x3FF << \
-				I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT)
-	__le16	queue;
-	u8	reserved[2];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter);
-
-struct i40e_aqc_add_remove_control_packet_filter_completion {
-	__le16	mac_etype_used;
-	__le16	etype_used;
-	__le16	mac_etype_free;
-	__le16	etype_free;
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter_completion);
-
-/* Add Cloud filters (indirect 0x025C)
- * Remove Cloud filters (indirect 0x025D)
- * uses the i40e_aqc_add_remove_cloud_filters,
- * and the generic indirect completion structure
- */
-struct i40e_aqc_add_remove_cloud_filters {
-	u8	num_filters;
-	u8	reserved;
-	__le16	seid;
-#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT	0
-#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK	(0x3FF << \
-					I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
-	u8	big_buffer_flag;
-#define I40E_AQC_ADD_CLOUD_CMD_BB	1
-	u8	reserved2[3];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
-
-struct i40e_aqc_cloud_filters_element_data {
-	u8	outer_mac[6];
-	u8	inner_mac[6];
-	__le16	inner_vlan;
-	union {
-		struct {
-			u8 reserved[12];
-			u8 data[4];
-		} v4;
-		struct {
-			u8 data[16];
-		} v6;
-		struct {
-			__le16 data[8];
-		} raw_v6;
-	} ipaddr;
-	__le16	flags;
-#define I40E_AQC_ADD_CLOUD_FILTER_SHIFT			0
-#define I40E_AQC_ADD_CLOUD_FILTER_MASK	(0x3F << \
-					I40E_AQC_ADD_CLOUD_FILTER_SHIFT)
-/* 0x0000 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_OIP			0x0001
-/* 0x0002 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN		0x0003
-#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID	0x0004
-/* 0x0005 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID		0x0006
-/* 0x0007 reserved */
-/* 0x0008 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_OMAC			0x0009
-#define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
-#define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
-#define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
-/* 0x0010 to 0x0017 is for custom filters */
-#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
-#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
-#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT		0x0012 /* Dest MAC + VLAN + L4 Port */
-
-#define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE		0x0080
-#define I40E_AQC_ADD_CLOUD_VNK_SHIFT			6
-#define I40E_AQC_ADD_CLOUD_VNK_MASK			0x00C0
-#define I40E_AQC_ADD_CLOUD_FLAGS_IPV4			0
-#define I40E_AQC_ADD_CLOUD_FLAGS_IPV6			0x0100
-
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT		9
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK		0x1E00
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN		0
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC		1
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE		2
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP			3
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED		4
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE		5
-
-#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC	0x2000
-#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC	0x4000
-#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP	0x8000
-
-	__le32	tenant_id;
-	u8	reserved[4];
-	__le16	queue_number;
-#define I40E_AQC_ADD_CLOUD_QUEUE_SHIFT		0
-#define I40E_AQC_ADD_CLOUD_QUEUE_MASK		(0x7FF << \
-						 I40E_AQC_ADD_CLOUD_QUEUE_SHIFT)
-	u8	reserved2[14];
-	/* response section */
-	u8	allocation_result;
-#define I40E_AQC_ADD_CLOUD_FILTER_SUCCESS	0x0
-#define I40E_AQC_ADD_CLOUD_FILTER_FAIL		0xFF
-	u8	response_reserved[7];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data);
-
-/* i40e_aqc_cloud_filters_element_bb is used when
- * I40E_AQC_ADD_CLOUD_CMD_BB flag is set.
- */
-struct i40e_aqc_cloud_filters_element_bb {
-	struct i40e_aqc_cloud_filters_element_data element;
-	u16     general_fields[32];
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0	0
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1	1
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2	2
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0	3
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1	4
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2	5
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0	6
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1	7
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2	8
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0	9
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1	10
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2	11
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0	12
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1	13
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2	14
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0	15
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1	16
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2	17
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3	18
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4	19
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5	20
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6	21
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7	22
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0	23
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1	24
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2	25
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3	26
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4	27
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5	28
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6	29
-#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7	30
-};
-
-I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb);
-
-struct i40e_aqc_remove_cloud_filters_completion {
-	__le16 perfect_ovlan_used;
-	__le16 perfect_ovlan_free;
-	__le16 vlan_used;
-	__le16 vlan_free;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
-
-/* Replace filter Command 0x025F
- * uses the i40e_aqc_replace_cloud_filters,
- * and the generic indirect completion structure
- */
-struct i40e_filter_data {
-	u8 filter_type;
-	u8 input[3];
-};
-
-I40E_CHECK_STRUCT_LEN(4, i40e_filter_data);
-
-struct i40e_aqc_replace_cloud_filters_cmd {
-	u8      valid_flags;
-#define I40E_AQC_REPLACE_L1_FILTER		0x0
-#define I40E_AQC_REPLACE_CLOUD_FILTER		0x1
-#define I40E_AQC_GET_CLOUD_FILTERS		0x2
-#define I40E_AQC_MIRROR_CLOUD_FILTER		0x4
-#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER	0x8
-	u8      old_filter_type;
-	u8      new_filter_type;
-	u8      tr_bit;
-	u8      reserved[4];
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd);
-
-struct i40e_aqc_replace_cloud_filters_cmd_buf {
-	u8      data[32];
-/* Filter type INPUT codes*/
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX	3
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED	BIT(7)
-
-/* Field Vector offsets */
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA	0
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH	6
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG	7
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN	8
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN	9
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN	10
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY	11
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC	12
-/* big FLU */
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA	14
-/* big FLU */
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA	15
-
-#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN	37
-	struct i40e_filter_data filters[8];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf);
-
-/* Add Mirror Rule (indirect or direct 0x0260)
- * Delete Mirror Rule (indirect or direct 0x0261)
- * note: some rule types (4,5) do not use an external buffer.
- *       take care to set the flags correctly.
- */
-struct i40e_aqc_add_delete_mirror_rule {
-	__le16 seid;
-	__le16 rule_type;
-#define I40E_AQC_MIRROR_RULE_TYPE_SHIFT		0
-#define I40E_AQC_MIRROR_RULE_TYPE_MASK		(0x7 << \
-						I40E_AQC_MIRROR_RULE_TYPE_SHIFT)
-#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_INGRESS	1
-#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_EGRESS	2
-#define I40E_AQC_MIRROR_RULE_TYPE_VLAN		3
-#define I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS	4
-#define I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS	5
-	__le16 num_entries;
-	__le16 destination;  /* VSI for add, rule id for delete */
-	__le32 addr_high;    /* address of array of 2-byte VSI or VLAN ids */
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule);
-
-struct i40e_aqc_add_delete_mirror_rule_completion {
-	u8	reserved[2];
-	__le16	rule_id;  /* only used on add */
-	__le16	mirror_rules_used;
-	__le16	mirror_rules_free;
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
-
-/* Dynamic Device Personalization */
-struct i40e_aqc_write_personalization_profile {
-	u8      flags;
-	u8      reserved[3];
-	__le32  profile_track_id;
-	__le32  addr_high;
-	__le32  addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_write_personalization_profile);
-
-struct i40e_aqc_write_ddp_resp {
-	__le32 error_offset;
-	__le32 error_info;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-struct i40e_aqc_get_applied_profiles {
-	u8      flags;
-#define I40E_AQC_GET_DDP_GET_CONF	0x1
-#define I40E_AQC_GET_DDP_GET_RDPU_CONF	0x2
-	u8      rsv[3];
-	__le32  reserved;
-	__le32  addr_high;
-	__le32  addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_applied_profiles);
-
-/* DCB 0x03xx*/
-
-/* PFC Ignore (direct 0x0301)
- *    the command and response use the same descriptor structure
- */
-struct i40e_aqc_pfc_ignore {
-	u8	tc_bitmap;
-	u8	command_flags; /* unused on response */
-#define I40E_AQC_PFC_IGNORE_SET		0x80
-#define I40E_AQC_PFC_IGNORE_CLEAR	0x0
-	u8	reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_pfc_ignore);
-
-/* DCB Update (direct 0x0302) uses the i40e_aq_desc structure
- * with no parameters
- */
-
-/* TX scheduler 0x04xx */
-
-/* Almost all the indirect commands use
- * this generic struct to pass the SEID in param0
- */
-struct i40e_aqc_tx_sched_ind {
-	__le16	vsi_seid;
-	u8	reserved[6];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_tx_sched_ind);
-
-/* Several commands respond with a set of queue set handles */
-struct i40e_aqc_qs_handles_resp {
-	__le16 qs_handles[8];
-};
-
-/* Configure VSI BW limits (direct 0x0400) */
-struct i40e_aqc_configure_vsi_bw_limit {
-	__le16	vsi_seid;
-	u8	reserved[2];
-	__le16	credit;
-	u8	reserved1[2];
-	u8	max_credit; /* 0-3, limit = 2^max */
-	u8	reserved2[7];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_vsi_bw_limit);
-
-/* Configure VSI Bandwidth Limit per Traffic Type (indirect 0x0406)
- *    responds with i40e_aqc_qs_handles_resp
- */
-struct i40e_aqc_configure_vsi_ets_sla_bw_data {
-	u8	tc_valid_bits;
-	u8	reserved[15];
-	__le16	tc_bw_credits[8]; /* FW writesback QS handles here */
-
-	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
-	__le16	tc_bw_max[2];
-	u8	reserved1[28];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_vsi_ets_sla_bw_data);
-
-/* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
- *    responds with i40e_aqc_qs_handles_resp
- */
-struct i40e_aqc_configure_vsi_tc_bw_data {
-	u8	tc_valid_bits;
-	u8	reserved[3];
-	u8	tc_bw_credits[8];
-	u8	reserved1[4];
-	__le16	qs_handles[8];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_vsi_tc_bw_data);
-
-/* Query vsi bw configuration (indirect 0x0408) */
-struct i40e_aqc_query_vsi_bw_config_resp {
-	u8	tc_valid_bits;
-	u8	tc_suspended_bits;
-	u8	reserved[14];
-	__le16	qs_handles[8];
-	u8	reserved1[4];
-	__le16	port_bw_limit;
-	u8	reserved2[2];
-	u8	max_bw; /* 0-3, limit = 2^max */
-	u8	reserved3[23];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_vsi_bw_config_resp);
-
-/* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
-struct i40e_aqc_query_vsi_ets_sla_config_resp {
-	u8	tc_valid_bits;
-	u8	reserved[3];
-	u8	share_credits[8];
-	__le16	credits[8];
-
-	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
-	__le16	tc_bw_max[2];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_vsi_ets_sla_config_resp);
-
-/* Configure Switching Component Bandwidth Limit (direct 0x0410) */
-struct i40e_aqc_configure_switching_comp_bw_limit {
-	__le16	seid;
-	u8	reserved[2];
-	__le16	credit;
-	u8	reserved1[2];
-	u8	max_bw; /* 0-3, limit = 2^max */
-	u8	reserved2[7];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_switching_comp_bw_limit);
-
-/* Enable  Physical Port ETS (indirect 0x0413)
- * Modify  Physical Port ETS (indirect 0x0414)
- * Disable Physical Port ETS (indirect 0x0415)
- */
-struct i40e_aqc_configure_switching_comp_ets_data {
-	u8	reserved[4];
-	u8	tc_valid_bits;
-	u8	seepage;
-#define I40E_AQ_ETS_SEEPAGE_EN_MASK	0x1
-	u8	tc_strict_priority_flags;
-	u8	reserved1[17];
-	u8	tc_bw_share_credits[8];
-	u8	reserved2[96];
-};
-
-I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_configure_switching_comp_ets_data);
-
-/* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
-struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
-	u8	tc_valid_bits;
-	u8	reserved[15];
-	__le16	tc_bw_credit[8];
-
-	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
-	__le16	tc_bw_max[2];
-	u8	reserved1[28];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40,
-		      i40e_aqc_configure_switching_comp_ets_bw_limit_data);
-
-/* Configure Switching Component Bandwidth Allocation per Tc
- * (indirect 0x0417)
- */
-struct i40e_aqc_configure_switching_comp_bw_config_data {
-	u8	tc_valid_bits;
-	u8	reserved[2];
-	u8	absolute_credits; /* bool */
-	u8	tc_bw_share_credits[8];
-	u8	reserved1[20];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_switching_comp_bw_config_data);
-
-/* Query Switching Component Configuration (indirect 0x0418) */
-struct i40e_aqc_query_switching_comp_ets_config_resp {
-	u8	tc_valid_bits;
-	u8	reserved[35];
-	__le16	port_bw_limit;
-	u8	reserved1[2];
-	u8	tc_bw_max; /* 0-3, limit = 2^max */
-	u8	reserved2[23];
-};
-
-I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_switching_comp_ets_config_resp);
-
-/* Query PhysicalPort ETS Configuration (indirect 0x0419) */
-struct i40e_aqc_query_port_ets_config_resp {
-	u8	reserved[4];
-	u8	tc_valid_bits;
-	u8	reserved1;
-	u8	tc_strict_priority_bits;
-	u8	reserved2;
-	u8	tc_bw_share_credits[8];
-	__le16	tc_bw_limits[8];
-
-	/* 4 bits per tc 0-7, 4th bit reserved, limit = 2^max */
-	__le16	tc_bw_max[2];
-	u8	reserved3[32];
-};
-
-I40E_CHECK_STRUCT_LEN(0x44, i40e_aqc_query_port_ets_config_resp);
-
-/* Query Switching Component Bandwidth Allocation per Traffic Type
- * (indirect 0x041A)
- */
-struct i40e_aqc_query_switching_comp_bw_config_resp {
-	u8	tc_valid_bits;
-	u8	reserved[2];
-	u8	absolute_credits_enable; /* bool */
-	u8	tc_bw_share_credits[8];
-	__le16	tc_bw_limits[8];
-
-	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
-	__le16	tc_bw_max[2];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_switching_comp_bw_config_resp);
-
-/* Suspend/resume port TX traffic
- * (direct 0x041B and 0x041C) uses the generic SEID struct
- */
-
-/* Configure partition BW
- * (indirect 0x041D)
- */
-struct i40e_aqc_configure_partition_bw_data {
-	__le16	pf_valid_bits;
-	u8	min_bw[16];      /* guaranteed bandwidth */
-	u8	max_bw[16];      /* bandwidth limit */
-};
-
-I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
-
-/* Get and set the active HMC resource profile and status.
- * (direct 0x0500) and (direct 0x0501)
- */
-struct i40e_aq_get_set_hmc_resource_profile {
-	u8	pm_profile;
-	u8	pe_vf_enabled;
-	u8	reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
-
-enum i40e_aq_hmc_profile {
-	/* I40E_HMC_PROFILE_NO_CHANGE	= 0, reserved */
-	I40E_HMC_PROFILE_DEFAULT	= 1,
-	I40E_HMC_PROFILE_FAVOR_VF	= 2,
-	I40E_HMC_PROFILE_EQUAL		= 3,
-};
-
-/* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
-
-/* set in param0 for get phy abilities to report qualified modules */
-#define I40E_AQ_PHY_REPORT_QUALIFIED_MODULES	0x0001
-#define I40E_AQ_PHY_REPORT_INITIAL_VALUES	0x0002
-
-enum i40e_aq_phy_type {
-	I40E_PHY_TYPE_SGMII			= 0x0,
-	I40E_PHY_TYPE_1000BASE_KX		= 0x1,
-	I40E_PHY_TYPE_10GBASE_KX4		= 0x2,
-	I40E_PHY_TYPE_10GBASE_KR		= 0x3,
-	I40E_PHY_TYPE_40GBASE_KR4		= 0x4,
-	I40E_PHY_TYPE_XAUI			= 0x5,
-	I40E_PHY_TYPE_XFI			= 0x6,
-	I40E_PHY_TYPE_SFI			= 0x7,
-	I40E_PHY_TYPE_XLAUI			= 0x8,
-	I40E_PHY_TYPE_XLPPI			= 0x9,
-	I40E_PHY_TYPE_40GBASE_CR4_CU		= 0xA,
-	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
-	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
-	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
-	I40E_PHY_TYPE_UNRECOGNIZED		= 0xE,
-	I40E_PHY_TYPE_UNSUPPORTED		= 0xF,
-	I40E_PHY_TYPE_100BASE_TX		= 0x11,
-	I40E_PHY_TYPE_1000BASE_T		= 0x12,
-	I40E_PHY_TYPE_10GBASE_T			= 0x13,
-	I40E_PHY_TYPE_10GBASE_SR		= 0x14,
-	I40E_PHY_TYPE_10GBASE_LR		= 0x15,
-	I40E_PHY_TYPE_10GBASE_SFPP_CU		= 0x16,
-	I40E_PHY_TYPE_10GBASE_CR1		= 0x17,
-	I40E_PHY_TYPE_40GBASE_CR4		= 0x18,
-	I40E_PHY_TYPE_40GBASE_SR4		= 0x19,
-	I40E_PHY_TYPE_40GBASE_LR4		= 0x1A,
-	I40E_PHY_TYPE_1000BASE_SX		= 0x1B,
-	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
-	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
-	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
-	I40E_PHY_TYPE_25GBASE_KR		= 0x1F,
-	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
-	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
-	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
-	I40E_PHY_TYPE_25GBASE_AOC		= 0x23,
-	I40E_PHY_TYPE_25GBASE_ACC		= 0x24,
-	I40E_PHY_TYPE_MAX,
-	I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP	= 0xFD,
-	I40E_PHY_TYPE_EMPTY			= 0xFE,
-	I40E_PHY_TYPE_DEFAULT			= 0xFF,
-};
-
-#define I40E_LINK_SPEED_100MB_SHIFT	0x1
-#define I40E_LINK_SPEED_1000MB_SHIFT	0x2
-#define I40E_LINK_SPEED_10GB_SHIFT	0x3
-#define I40E_LINK_SPEED_40GB_SHIFT	0x4
-#define I40E_LINK_SPEED_20GB_SHIFT	0x5
-#define I40E_LINK_SPEED_25GB_SHIFT	0x6
-
-enum i40e_aq_link_speed {
-	I40E_LINK_SPEED_UNKNOWN	= 0,
-	I40E_LINK_SPEED_100MB	= BIT(I40E_LINK_SPEED_100MB_SHIFT),
-	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
-	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
-	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
-	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
-	I40E_LINK_SPEED_25GB	= BIT(I40E_LINK_SPEED_25GB_SHIFT),
-};
-
-struct i40e_aqc_module_desc {
-	u8 oui[3];
-	u8 reserved1;
-	u8 part_number[16];
-	u8 revision[4];
-	u8 reserved2[8];
-};
-
-I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_module_desc);
-
-struct i40e_aq_get_phy_abilities_resp {
-	__le32	phy_type;       /* bitmap using the above enum for offsets */
-	u8	link_speed;     /* bitmap using the above enum bit patterns */
-	u8	abilities;
-#define I40E_AQ_PHY_FLAG_PAUSE_TX	0x01
-#define I40E_AQ_PHY_FLAG_PAUSE_RX	0x02
-#define I40E_AQ_PHY_FLAG_LOW_POWER	0x04
-#define I40E_AQ_PHY_LINK_ENABLED	0x08
-#define I40E_AQ_PHY_AN_ENABLED		0x10
-#define I40E_AQ_PHY_FLAG_MODULE_QUAL	0x20
-#define I40E_AQ_PHY_FEC_ABILITY_KR	0x40
-#define I40E_AQ_PHY_FEC_ABILITY_RS	0x80
-	__le16	eee_capability;
-#define I40E_AQ_EEE_100BASE_TX		0x0002
-#define I40E_AQ_EEE_1000BASE_T		0x0004
-#define I40E_AQ_EEE_10GBASE_T		0x0008
-#define I40E_AQ_EEE_1000BASE_KX		0x0010
-#define I40E_AQ_EEE_10GBASE_KX4		0x0020
-#define I40E_AQ_EEE_10GBASE_KR		0x0040
-	__le32	eeer_val;
-	u8	d3_lpan;
-#define I40E_AQ_SET_PHY_D3_LPAN_ENA	0x01
-	u8	phy_type_ext;
-#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
-#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
-#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
-#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
-#define I40E_AQ_PHY_TYPE_EXT_25G_AOC	0x10
-#define I40E_AQ_PHY_TYPE_EXT_25G_ACC	0x20
-	u8	fec_cfg_curr_mod_ext_info;
-#define I40E_AQ_ENABLE_FEC_KR		0x01
-#define I40E_AQ_ENABLE_FEC_RS		0x02
-#define I40E_AQ_REQUEST_FEC_KR		0x04
-#define I40E_AQ_REQUEST_FEC_RS		0x08
-#define I40E_AQ_ENABLE_FEC_AUTO		0x10
-#define I40E_AQ_FEC
-#define I40E_AQ_MODULE_TYPE_EXT_MASK	0xE0
-#define I40E_AQ_MODULE_TYPE_EXT_SHIFT	5
-
-	u8	ext_comp_code;
-	u8	phy_id[4];
-	u8	module_type[3];
-	u8	qualified_module_count;
-#define I40E_AQ_PHY_MAX_QMS		16
-	struct i40e_aqc_module_desc	qualified_module[I40E_AQ_PHY_MAX_QMS];
-};
-
-I40E_CHECK_STRUCT_LEN(0x218, i40e_aq_get_phy_abilities_resp);
-
-/* Set PHY Config (direct 0x0601) */
-struct i40e_aq_set_phy_config { /* same bits as above in all */
-	__le32	phy_type;
-	u8	link_speed;
-	u8	abilities;
-/* bits 0-2 use the values from get_phy_abilities_resp */
-#define I40E_AQ_PHY_ENABLE_LINK		0x08
-#define I40E_AQ_PHY_ENABLE_AN		0x10
-#define I40E_AQ_PHY_ENABLE_ATOMIC_LINK	0x20
-	__le16	eee_capability;
-	__le32	eeer;
-	u8	low_power_ctrl;
-	u8	phy_type_ext;
-#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
-#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
-#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
-#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
-	u8	fec_config;
-#define I40E_AQ_SET_FEC_ABILITY_KR	BIT(0)
-#define I40E_AQ_SET_FEC_ABILITY_RS	BIT(1)
-#define I40E_AQ_SET_FEC_REQUEST_KR	BIT(2)
-#define I40E_AQ_SET_FEC_REQUEST_RS	BIT(3)
-#define I40E_AQ_SET_FEC_AUTO		BIT(4)
-#define I40E_AQ_PHY_FEC_CONFIG_SHIFT	0x0
-#define I40E_AQ_PHY_FEC_CONFIG_MASK	(0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT)
-	u8	reserved;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
-
-/* Set MAC Config command data structure (direct 0x0603) */
-struct i40e_aq_set_mac_config {
-	__le16	max_frame_size;
-	u8	params;
-#define I40E_AQ_SET_MAC_CONFIG_CRC_EN		0x04
-#define I40E_AQ_SET_MAC_CONFIG_PACING_MASK	0x78
-#define I40E_AQ_SET_MAC_CONFIG_PACING_SHIFT	3
-#define I40E_AQ_SET_MAC_CONFIG_PACING_NONE	0x0
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1B_13TX	0xF
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_9TX	0x9
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_4TX	0x8
-#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_7TX	0x7
-#define I40E_AQ_SET_MAC_CONFIG_PACING_2DW_3TX	0x6
-#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_1TX	0x5
-#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_2TX	0x4
-#define I40E_AQ_SET_MAC_CONFIG_PACING_7DW_3TX	0x3
-#define I40E_AQ_SET_MAC_CONFIG_PACING_4DW_1TX	0x2
-#define I40E_AQ_SET_MAC_CONFIG_PACING_9DW_1TX	0x1
-	u8	tx_timer_priority; /* bitmap */
-	__le16	tx_timer_value;
-	__le16	fc_refresh_threshold;
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aq_set_mac_config);
-
-/* Restart Auto-Negotiation (direct 0x605) */
-struct i40e_aqc_set_link_restart_an {
-	u8	command;
-#define I40E_AQ_PHY_RESTART_AN	0x02
-#define I40E_AQ_PHY_LINK_ENABLE	0x04
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_link_restart_an);
-
-/* Get Link Status cmd & response data structure (direct 0x0607) */
-struct i40e_aqc_get_link_status {
-	__le16	command_flags; /* only field set on command */
-#define I40E_AQ_LSE_MASK		0x3
-#define I40E_AQ_LSE_NOP			0x0
-#define I40E_AQ_LSE_DISABLE		0x2
-#define I40E_AQ_LSE_ENABLE		0x3
-/* only response uses this flag */
-#define I40E_AQ_LSE_IS_ENABLED		0x1
-	u8	phy_type;    /* i40e_aq_phy_type   */
-	u8	link_speed;  /* i40e_aq_link_speed */
-	u8	link_info;
-#define I40E_AQ_LINK_UP			0x01    /* obsolete */
-#define I40E_AQ_LINK_UP_FUNCTION	0x01
-#define I40E_AQ_LINK_FAULT		0x02
-#define I40E_AQ_LINK_FAULT_TX		0x04
-#define I40E_AQ_LINK_FAULT_RX		0x08
-#define I40E_AQ_LINK_FAULT_REMOTE	0x10
-#define I40E_AQ_LINK_UP_PORT		0x20
-#define I40E_AQ_MEDIA_AVAILABLE		0x40
-#define I40E_AQ_SIGNAL_DETECT		0x80
-	u8	an_info;
-#define I40E_AQ_AN_COMPLETED		0x01
-#define I40E_AQ_LP_AN_ABILITY		0x02
-#define I40E_AQ_PD_FAULT		0x04
-#define I40E_AQ_FEC_EN			0x08
-#define I40E_AQ_PHY_LOW_POWER		0x10
-#define I40E_AQ_LINK_PAUSE_TX		0x20
-#define I40E_AQ_LINK_PAUSE_RX		0x40
-#define I40E_AQ_QUALIFIED_MODULE	0x80
-	u8	ext_info;
-#define I40E_AQ_LINK_PHY_TEMP_ALARM	0x01
-#define I40E_AQ_LINK_XCESSIVE_ERRORS	0x02
-#define I40E_AQ_LINK_TX_SHIFT		0x02
-#define I40E_AQ_LINK_TX_MASK		(0x03 << I40E_AQ_LINK_TX_SHIFT)
-#define I40E_AQ_LINK_TX_ACTIVE		0x00
-#define I40E_AQ_LINK_TX_DRAINED		0x01
-#define I40E_AQ_LINK_TX_FLUSHED		0x03
-#define I40E_AQ_LINK_FORCED_40G		0x10
-/* 25G Error Codes */
-#define I40E_AQ_25G_NO_ERR		0X00
-#define I40E_AQ_25G_NOT_PRESENT		0X01
-#define I40E_AQ_25G_NVM_CRC_ERR		0X02
-#define I40E_AQ_25G_SBUS_UCODE_ERR	0X03
-#define I40E_AQ_25G_SERDES_UCODE_ERR	0X04
-#define I40E_AQ_25G_NIMB_UCODE_ERR	0X05
-	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
-/* Since firmware API 1.7 loopback field keeps power class info as well */
-#define I40E_AQ_LOOPBACK_MASK		0x07
-#define I40E_AQ_PWR_CLASS_SHIFT_LB	6
-#define I40E_AQ_PWR_CLASS_MASK_LB	(0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB)
-	__le16	max_frame_size;
-	u8	config;
-#define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
-#define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
-#define I40E_AQ_CONFIG_CRC_ENA		0x04
-#define I40E_AQ_CONFIG_PACING_MASK	0x78
-	union {
-		struct {
-			u8	power_desc;
-#define I40E_AQ_LINK_POWER_CLASS_1	0x00
-#define I40E_AQ_LINK_POWER_CLASS_2	0x01
-#define I40E_AQ_LINK_POWER_CLASS_3	0x02
-#define I40E_AQ_LINK_POWER_CLASS_4	0x03
-#define I40E_AQ_PWR_CLASS_MASK		0x03
-			u8	reserved[4];
-		};
-		struct {
-			u8	link_type[4];
-			u8	link_type_ext;
-		};
-	};
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
-
-/* Set event mask command (direct 0x613) */
-struct i40e_aqc_set_phy_int_mask {
-	u8	reserved[8];
-	__le16	event_mask;
-#define I40E_AQ_EVENT_LINK_UPDOWN	0x0002
-#define I40E_AQ_EVENT_MEDIA_NA		0x0004
-#define I40E_AQ_EVENT_LINK_FAULT	0x0008
-#define I40E_AQ_EVENT_PHY_TEMP_ALARM	0x0010
-#define I40E_AQ_EVENT_EXCESSIVE_ERRORS	0x0020
-#define I40E_AQ_EVENT_SIGNAL_DETECT	0x0040
-#define I40E_AQ_EVENT_AN_COMPLETED	0x0080
-#define I40E_AQ_EVENT_MODULE_QUAL_FAIL	0x0100
-#define I40E_AQ_EVENT_PORT_TX_SUSPENDED	0x0200
-	u8	reserved1[6];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_int_mask);
-
-/* Get Local AN advt register (direct 0x0614)
- * Set Local AN advt register (direct 0x0615)
- * Get Link Partner AN advt register (direct 0x0616)
- */
-struct i40e_aqc_an_advt_reg {
-	__le32	local_an_reg0;
-	__le16	local_an_reg1;
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_an_advt_reg);
-
-/* Set Loopback mode (0x0618) */
-struct i40e_aqc_set_lb_mode {
-	__le16	lb_mode;
-#define I40E_AQ_LB_PHY_LOCAL	0x01
-#define I40E_AQ_LB_PHY_REMOTE	0x02
-#define I40E_AQ_LB_MAC_LOCAL	0x04
-	u8	reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
-
-/* Set PHY Debug command (0x0622) */
-struct i40e_aqc_set_phy_debug {
-	u8	command_flags;
-#define I40E_AQ_PHY_DEBUG_RESET_INTERNAL	0x02
-#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT	2
-#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_MASK	(0x03 << \
-					I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT)
-#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_NONE	0x00
-#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_HARD	0x01
-#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SOFT	0x02
-#define I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW	0x10
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_debug);
-
-enum i40e_aq_phy_reg_type {
-	I40E_AQC_PHY_REG_INTERNAL	= 0x1,
-	I40E_AQC_PHY_REG_EXERNAL_BASET	= 0x2,
-	I40E_AQC_PHY_REG_EXERNAL_MODULE	= 0x3
-};
-
-/* Run PHY Activity (0x0626) */
-struct i40e_aqc_run_phy_activity {
-	__le16  activity_id;
-	u8      flags;
-	u8      reserved1;
-	__le32  control;
-	__le32  data;
-	u8      reserved2[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
-
-/* Set PHY Register command (0x0628) */
-/* Get PHY Register command (0x0629) */
-struct i40e_aqc_phy_register_access {
-	u8	phy_interface;
-#define I40E_AQ_PHY_REG_ACCESS_INTERNAL	0
-#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL	1
-#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE	2
-	u8	dev_address;
-	u8	reserved1[2];
-	__le32	reg_address;
-	__le32	reg_value;
-	u8	reserved2[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
-
-/* NVM Read command (indirect 0x0701)
- * NVM Erase commands (direct 0x0702)
- * NVM Update commands (indirect 0x0703)
- */
-struct i40e_aqc_nvm_update {
-	u8	command_flags;
-#define I40E_AQ_NVM_LAST_CMD			0x01
-#define I40E_AQ_NVM_REARRANGE_TO_FLAT		0x20
-#define I40E_AQ_NVM_REARRANGE_TO_STRUCT		0x40
-#define I40E_AQ_NVM_FLASH_ONLY			0x80
-#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT	1
-#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK	0x03
-#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED	0x03
-#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL	0x01
-	u8	module_pointer;
-	__le16	length;
-	__le32	offset;
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
-
-/* NVM Config Read (indirect 0x0704) */
-struct i40e_aqc_nvm_config_read {
-	__le16	cmd_flags;
-#define I40E_AQ_ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK	1
-#define I40E_AQ_ANVM_READ_SINGLE_FEATURE		0
-#define I40E_AQ_ANVM_READ_MULTIPLE_FEATURES		1
-	__le16	element_count;
-	__le16	element_id;	/* Feature/field ID */
-	__le16	element_id_msw;	/* MSWord of field ID */
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_read);
-
-/* NVM Config Write (indirect 0x0705) */
-struct i40e_aqc_nvm_config_write {
-	__le16	cmd_flags;
-	__le16	element_count;
-	u8	reserved[4];
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
-
-/* Used for 0x0704 as well as for 0x0705 commands */
-#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT		1
-#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK \
-				BIT(I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT)
-#define I40E_AQ_ANVM_FEATURE		0
-#define I40E_AQ_ANVM_IMMEDIATE_FIELD	BIT(FEATURE_OR_IMMEDIATE_SHIFT)
-struct i40e_aqc_nvm_config_data_feature {
-	__le16 feature_id;
-#define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY		0x01
-#define I40E_AQ_ANVM_FEATURE_OPTION_DWORD_MAP		0x08
-#define I40E_AQ_ANVM_FEATURE_OPTION_POR_CSR		0x10
-	__le16 feature_options;
-	__le16 feature_selection;
-};
-
-I40E_CHECK_STRUCT_LEN(0x6, i40e_aqc_nvm_config_data_feature);
-
-struct i40e_aqc_nvm_config_data_immediate_field {
-	__le32 field_id;
-	__le32 field_value;
-	__le16 field_options;
-	__le16 reserved;
-};
-
-I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
-
-/* OEM Post Update (indirect 0x0720)
- * no command data struct used
- */
- struct i40e_aqc_nvm_oem_post_update {
-#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA	0x01
-	u8 sel_data;
-	u8 reserved[7];
-};
-
-I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update);
-
-struct i40e_aqc_nvm_oem_post_update_buffer {
-	u8 str_len;
-	u8 dev_addr;
-	__le16 eeprom_addr;
-	u8 data[36];
-};
-
-I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer);
-
-/* Thermal Sensor (indirect 0x0721)
- *     read or set thermal sensor configs and values
- *     takes a sensor and command specific data buffer, not detailed here
- */
-struct i40e_aqc_thermal_sensor {
-	u8 sensor_action;
-#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG	0
-#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG	1
-#define I40E_AQ_THERMAL_SENSOR_READ_TEMP	2
-	u8 reserved[7];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor);
-
-/* Send to PF command (indirect 0x0801) id is only used by PF
- * Send to VF command (indirect 0x0802) id is only used by PF
- * Send to Peer PF command (indirect 0x0803)
- */
-struct i40e_aqc_pf_vf_message {
-	__le32	id;
-	u8	reserved[4];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message);
-
-/* Alternate structure */
-
-/* Direct write (direct 0x0900)
- * Direct read (direct 0x0902)
- */
-struct i40e_aqc_alternate_write {
-	__le32 address0;
-	__le32 data0;
-	__le32 address1;
-	__le32 data1;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write);
-
-/* Indirect write (indirect 0x0901)
- * Indirect read (indirect 0x0903)
- */
-
-struct i40e_aqc_alternate_ind_write {
-	__le32 address;
-	__le32 length;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_ind_write);
-
-/* Done alternate write (direct 0x0904)
- * uses i40e_aq_desc
- */
-struct i40e_aqc_alternate_write_done {
-	__le16	cmd_flags;
-#define I40E_AQ_ALTERNATE_MODE_BIOS_MASK	1
-#define I40E_AQ_ALTERNATE_MODE_BIOS_LEGACY	0
-#define I40E_AQ_ALTERNATE_MODE_BIOS_UEFI	1
-#define I40E_AQ_ALTERNATE_RESET_NEEDED		2
-	u8	reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write_done);
-
-/* Set OEM mode (direct 0x0905) */
-struct i40e_aqc_alternate_set_mode {
-	__le32	mode;
-#define I40E_AQ_ALTERNATE_MODE_NONE	0
-#define I40E_AQ_ALTERNATE_MODE_OEM	1
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_set_mode);
-
-/* Clear port Alternate RAM (direct 0x0906) uses i40e_aq_desc */
-
-/* async events 0x10xx */
-
-/* Lan Queue Overflow Event (direct, 0x1001) */
-struct i40e_aqc_lan_overflow {
-	__le32	prtdcb_rupto;
-	__le32	otx_ctl;
-	u8	reserved[8];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lan_overflow);
-
-/* Get LLDP MIB (indirect 0x0A00) */
-struct i40e_aqc_lldp_get_mib {
-	u8	type;
-	u8	reserved1;
-#define I40E_AQ_LLDP_MIB_TYPE_MASK		0x3
-#define I40E_AQ_LLDP_MIB_LOCAL			0x0
-#define I40E_AQ_LLDP_MIB_REMOTE			0x1
-#define I40E_AQ_LLDP_MIB_LOCAL_AND_REMOTE	0x2
-#define I40E_AQ_LLDP_BRIDGE_TYPE_MASK		0xC
-#define I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT		0x2
-#define I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE	0x0
-#define I40E_AQ_LLDP_BRIDGE_TYPE_NON_TPMR	0x1
-#define I40E_AQ_LLDP_TX_SHIFT			0x4
-#define I40E_AQ_LLDP_TX_MASK			(0x03 << I40E_AQ_LLDP_TX_SHIFT)
-/* TX pause flags use I40E_AQ_LINK_TX_* above */
-	__le16	local_len;
-	__le16	remote_len;
-	u8	reserved2[2];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_get_mib);
-
-/* Configure LLDP MIB Change Event (direct 0x0A01)
- * also used for the event (with type in the command field)
- */
-struct i40e_aqc_lldp_update_mib {
-	u8	command;
-#define I40E_AQ_LLDP_MIB_UPDATE_ENABLE	0x0
-#define I40E_AQ_LLDP_MIB_UPDATE_DISABLE	0x1
-	u8	reserved[7];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_mib);
-
-/* Add LLDP TLV (indirect 0x0A02)
- * Delete LLDP TLV (indirect 0x0A04)
- */
-struct i40e_aqc_lldp_add_tlv {
-	u8	type; /* only nearest bridge and non-TPMR from 0x0A00 */
-	u8	reserved1[1];
-	__le16	len;
-	u8	reserved2[4];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_add_tlv);
-
-/* Update LLDP TLV (indirect 0x0A03) */
-struct i40e_aqc_lldp_update_tlv {
-	u8	type; /* only nearest bridge and non-TPMR from 0x0A00 */
-	u8	reserved;
-	__le16	old_len;
-	__le16	new_offset;
-	__le16	new_len;
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_tlv);
-
-/* Stop LLDP (direct 0x0A05) */
-struct i40e_aqc_lldp_stop {
-	u8	command;
-#define I40E_AQ_LLDP_AGENT_STOP		0x0
-#define I40E_AQ_LLDP_AGENT_SHUTDOWN	0x1
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop);
-
-/* Start LLDP (direct 0x0A06) */
-
-struct i40e_aqc_lldp_start {
-	u8	command;
-#define I40E_AQ_LLDP_AGENT_START	0x1
-	u8	reserved[15];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
-
-/* Set DCB (direct 0x0303) */
-struct i40e_aqc_set_dcb_parameters {
-	u8 command;
-#define I40E_AQ_DCB_SET_AGENT	0x1
-#define I40E_DCB_VALID		0x1
-	u8 valid_flags;
-	u8 reserved[14];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_set_dcb_parameters);
-
-/* Apply MIB changes (0x0A07)
- * uses the generic struc as it contains no data
- */
-
-/* Add Udp Tunnel command and completion (direct 0x0B00) */
-struct i40e_aqc_add_udp_tunnel {
-	__le16	udp_port;
-	u8	reserved0[3];
-	u8	protocol_type;
-#define I40E_AQC_TUNNEL_TYPE_VXLAN	0x00
-#define I40E_AQC_TUNNEL_TYPE_NGE	0x01
-#define I40E_AQC_TUNNEL_TYPE_TEREDO	0x10
-#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE	0x11
-	u8	reserved1[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);
-
-struct i40e_aqc_add_udp_tunnel_completion {
-	__le16 udp_port;
-	u8	filter_entry_index;
-	u8	multiple_pfs;
-#define I40E_AQC_SINGLE_PF		0x0
-#define I40E_AQC_MULTIPLE_PFS		0x1
-	u8	total_filters;
-	u8	reserved[11];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel_completion);
-
-/* remove UDP Tunnel command (0x0B01) */
-struct i40e_aqc_remove_udp_tunnel {
-	u8	reserved[2];
-	u8	index; /* 0 to 15 */
-	u8	reserved2[13];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel);
-
-struct i40e_aqc_del_udp_tunnel_completion {
-	__le16	udp_port;
-	u8	index; /* 0 to 15 */
-	u8	multiple_pfs;
-	u8	total_filters_used;
-	u8	reserved1[11];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);
-
-struct i40e_aqc_get_set_rss_key {
-#define I40E_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
-#define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
-#define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
-					I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
-	__le16	vsi_id;
-	u8	reserved[6];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key);
-
-struct i40e_aqc_get_set_rss_key_data {
-	u8 standard_rss_key[0x28];
-	u8 extended_hash_key[0xc];
-};
-
-I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data);
-
-struct  i40e_aqc_get_set_rss_lut {
-#define I40E_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
-#define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
-#define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
-					I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
-	__le16	vsi_id;
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \
-				BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
-
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
-#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
-	__le16	flags;
-	u8	reserved[4];
-	__le32	addr_high;
-	__le32	addr_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
-
-/* tunnel key structure 0x0B10 */
-
-struct i40e_aqc_tunnel_key_structure_A0 {
-	__le16     key1_off;
-	__le16     key1_len;
-	__le16     key2_off;
-	__le16     key2_len;
-	__le16     flags;
-#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDE 0x01
-/* response flags */
-#define I40E_AQC_TUNNEL_KEY_STRUCT_SUCCESS    0x01
-#define I40E_AQC_TUNNEL_KEY_STRUCT_MODIFIED   0x02
-#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDDEN 0x03
-	u8         resreved[6];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure_A0);
-
-struct i40e_aqc_tunnel_key_structure {
-	u8	key1_off;
-	u8	key2_off;
-	u8	key1_len;  /* 0 to 15 */
-	u8	key2_len;  /* 0 to 15 */
-	u8	flags;
-#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDE	0x01
-/* response flags */
-#define I40E_AQC_TUNNEL_KEY_STRUCT_SUCCESS	0x01
-#define I40E_AQC_TUNNEL_KEY_STRUCT_MODIFIED	0x02
-#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDDEN	0x03
-	u8	network_key_index;
-#define I40E_AQC_NETWORK_KEY_INDEX_VXLAN		0x0
-#define I40E_AQC_NETWORK_KEY_INDEX_NGE			0x1
-#define I40E_AQC_NETWORK_KEY_INDEX_FLEX_MAC_IN_UDP	0x2
-#define I40E_AQC_NETWORK_KEY_INDEX_GRE			0x3
-	u8	reserved[10];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure);
-
-/* OEM mode commands (direct 0xFE0x) */
-struct i40e_aqc_oem_param_change {
-	__le32	param_type;
-#define I40E_AQ_OEM_PARAM_TYPE_PF_CTL	0
-#define I40E_AQ_OEM_PARAM_TYPE_BW_CTL	1
-#define I40E_AQ_OEM_PARAM_MAC		2
-	__le32	param_value1;
-	__le16	param_value2;
-	u8	reserved[6];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_param_change);
-
-struct i40e_aqc_oem_state_change {
-	__le32	state;
-#define I40E_AQ_OEM_STATE_LINK_DOWN	0x0
-#define I40E_AQ_OEM_STATE_LINK_UP	0x1
-	u8	reserved[12];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
-
-/* Initialize OCSD (0xFE02, direct) */
-struct i40e_aqc_opc_oem_ocsd_initialize {
-	u8 type_status;
-	u8 reserved1[3];
-	__le32 ocsd_memory_block_addr_high;
-	__le32 ocsd_memory_block_addr_low;
-	__le32 requested_update_interval;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocsd_initialize);
-
-/* Initialize OCBB  (0xFE03, direct) */
-struct i40e_aqc_opc_oem_ocbb_initialize {
-	u8 type_status;
-	u8 reserved1[3];
-	__le32 ocbb_memory_block_addr_high;
-	__le32 ocbb_memory_block_addr_low;
-	u8 reserved2[4];
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocbb_initialize);
-
-/* debug commands */
-
-/* get device id (0xFF00) uses the generic structure */
-
-/* set test more (0xFF01, internal) */
-
-struct i40e_acq_set_test_mode {
-	u8	mode;
-#define I40E_AQ_TEST_PARTIAL	0
-#define I40E_AQ_TEST_FULL	1
-#define I40E_AQ_TEST_NVM	2
-	u8	reserved[3];
-	u8	command;
-#define I40E_AQ_TEST_OPEN	0
-#define I40E_AQ_TEST_CLOSE	1
-#define I40E_AQ_TEST_INC	2
-	u8	reserved2[3];
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_acq_set_test_mode);
-
-/* Debug Read Register command (0xFF03)
- * Debug Write Register command (0xFF04)
- */
-struct i40e_aqc_debug_reg_read_write {
-	__le32 reserved;
-	__le32 address;
-	__le32 value_high;
-	__le32 value_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_reg_read_write);
-
-/* Scatter/gather Reg Read  (indirect 0xFF05)
- * Scatter/gather Reg Write (indirect 0xFF06)
- */
-
-/* i40e_aq_desc is used for the command */
-struct i40e_aqc_debug_reg_sg_element_data {
-	__le32 address;
-	__le32 value;
-};
-
-/* Debug Modify register (direct 0xFF07) */
-struct i40e_aqc_debug_modify_reg {
-	__le32 address;
-	__le32 value;
-	__le32 clear_mask;
-	__le32 set_mask;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_reg);
-
-/* dump internal data (0xFF08, indirect) */
-
-#define I40E_AQ_CLUSTER_ID_AUX		0
-#define I40E_AQ_CLUSTER_ID_SWITCH_FLU	1
-#define I40E_AQ_CLUSTER_ID_TXSCHED	2
-#define I40E_AQ_CLUSTER_ID_HMC		3
-#define I40E_AQ_CLUSTER_ID_MAC0		4
-#define I40E_AQ_CLUSTER_ID_MAC1		5
-#define I40E_AQ_CLUSTER_ID_MAC2		6
-#define I40E_AQ_CLUSTER_ID_MAC3		7
-#define I40E_AQ_CLUSTER_ID_DCB		8
-#define I40E_AQ_CLUSTER_ID_EMP_MEM	9
-#define I40E_AQ_CLUSTER_ID_PKT_BUF	10
-#define I40E_AQ_CLUSTER_ID_ALTRAM	11
-
-struct i40e_aqc_debug_dump_internals {
-	u8	cluster_id;
-	u8	table_id;
-	__le16	data_size;
-	__le32	idx;
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_dump_internals);
-
-struct i40e_aqc_debug_modify_internals {
-	u8	cluster_id;
-	u8	cluster_specific_params[7];
-	__le32	address_high;
-	__le32	address_low;
-};
-
-I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
-
-#endif /* _I40E_ADMINQ_CMD_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
deleted file mode 100644
index cb86892..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_ALLOC_H_
-#define _I40E_ALLOC_H_
-
-struct i40e_hw;
-
-/* Memory allocation types */
-enum i40e_memory_type {
-	i40e_mem_arq_buf = 0,		/* ARQ indirect command buffer */
-	i40e_mem_asq_buf = 1,
-	i40e_mem_atq_buf = 2,		/* ATQ indirect command buffer */
-	i40e_mem_arq_ring = 3,		/* ARQ descriptor ring */
-	i40e_mem_atq_ring = 4,		/* ATQ descriptor ring */
-	i40e_mem_pd = 5,		/* Page Descriptor */
-	i40e_mem_bp = 6,		/* Backing Page - 4KB */
-	i40e_mem_bp_jumbo = 7,		/* Backing Page - > 4KB */
-	i40e_mem_reserved
-};
-
-/* prototype for functions used for dynamic memory allocation */
-i40e_status i40e_allocate_dma_mem(struct i40e_hw *hw,
-					    struct i40e_dma_mem *mem,
-					    enum i40e_memory_type type,
-					    u64 size, u32 alignment);
-i40e_status i40e_free_dma_mem(struct i40e_hw *hw,
-					struct i40e_dma_mem *mem);
-i40e_status i40e_allocate_virt_mem(struct i40e_hw *hw,
-					     struct i40e_virt_mem *mem,
-					     u32 size);
-i40e_status i40e_free_virt_mem(struct i40e_hw *hw,
-					 struct i40e_virt_mem *mem);
-
-#endif /* _I40E_ALLOC_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
deleted file mode 100644
index eea280b..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ /dev/null

@@ -1,1320 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#include "i40e_type.h"
-#include "i40e_adminq.h"
-#include "i40e_prototype.h"
-#include <linux/avf/virtchnl.h>
-
-/**
- * i40e_set_mac_type - Sets MAC type
- * @hw: pointer to the HW structure
- *
- * This function sets the mac type of the adapter based on the
- * vendor ID and device ID stored in the hw structure.
- **/
-i40e_status i40e_set_mac_type(struct i40e_hw *hw)
-{
-	i40e_status status = 0;
-
-	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
-		switch (hw->device_id) {
-		case I40E_DEV_ID_SFP_XL710:
-		case I40E_DEV_ID_QEMU:
-		case I40E_DEV_ID_KX_B:
-		case I40E_DEV_ID_KX_C:
-		case I40E_DEV_ID_QSFP_A:
-		case I40E_DEV_ID_QSFP_B:
-		case I40E_DEV_ID_QSFP_C:
-		case I40E_DEV_ID_10G_BASE_T:
-		case I40E_DEV_ID_10G_BASE_T4:
-		case I40E_DEV_ID_20G_KR2:
-		case I40E_DEV_ID_20G_KR2_A:
-		case I40E_DEV_ID_25G_B:
-		case I40E_DEV_ID_25G_SFP28:
-			hw->mac.type = I40E_MAC_XL710;
-			break;
-		case I40E_DEV_ID_SFP_X722:
-		case I40E_DEV_ID_1G_BASE_T_X722:
-		case I40E_DEV_ID_10G_BASE_T_X722:
-		case I40E_DEV_ID_SFP_I_X722:
-			hw->mac.type = I40E_MAC_X722;
-			break;
-		case I40E_DEV_ID_X722_VF:
-			hw->mac.type = I40E_MAC_X722_VF;
-			break;
-		case I40E_DEV_ID_VF:
-		case I40E_DEV_ID_VF_HV:
-		case I40E_DEV_ID_ADAPTIVE_VF:
-			hw->mac.type = I40E_MAC_VF;
-			break;
-		default:
-			hw->mac.type = I40E_MAC_GENERIC;
-			break;
-		}
-	} else {
-		status = I40E_ERR_DEVICE_NOT_SUPPORTED;
-	}
-
-	hw_dbg(hw, "i40e_set_mac_type found mac: %d, returns: %d\n",
-		  hw->mac.type, status);
-	return status;
-}
-
-/**
- * i40evf_aq_str - convert AQ err code to a string
- * @hw: pointer to the HW structure
- * @aq_err: the AQ error code to convert
- **/
-const char *i40evf_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err)
-{
-	switch (aq_err) {
-	case I40E_AQ_RC_OK:
-		return "OK";
-	case I40E_AQ_RC_EPERM:
-		return "I40E_AQ_RC_EPERM";
-	case I40E_AQ_RC_ENOENT:
-		return "I40E_AQ_RC_ENOENT";
-	case I40E_AQ_RC_ESRCH:
-		return "I40E_AQ_RC_ESRCH";
-	case I40E_AQ_RC_EINTR:
-		return "I40E_AQ_RC_EINTR";
-	case I40E_AQ_RC_EIO:
-		return "I40E_AQ_RC_EIO";
-	case I40E_AQ_RC_ENXIO:
-		return "I40E_AQ_RC_ENXIO";
-	case I40E_AQ_RC_E2BIG:
-		return "I40E_AQ_RC_E2BIG";
-	case I40E_AQ_RC_EAGAIN:
-		return "I40E_AQ_RC_EAGAIN";
-	case I40E_AQ_RC_ENOMEM:
-		return "I40E_AQ_RC_ENOMEM";
-	case I40E_AQ_RC_EACCES:
-		return "I40E_AQ_RC_EACCES";
-	case I40E_AQ_RC_EFAULT:
-		return "I40E_AQ_RC_EFAULT";
-	case I40E_AQ_RC_EBUSY:
-		return "I40E_AQ_RC_EBUSY";
-	case I40E_AQ_RC_EEXIST:
-		return "I40E_AQ_RC_EEXIST";
-	case I40E_AQ_RC_EINVAL:
-		return "I40E_AQ_RC_EINVAL";
-	case I40E_AQ_RC_ENOTTY:
-		return "I40E_AQ_RC_ENOTTY";
-	case I40E_AQ_RC_ENOSPC:
-		return "I40E_AQ_RC_ENOSPC";
-	case I40E_AQ_RC_ENOSYS:
-		return "I40E_AQ_RC_ENOSYS";
-	case I40E_AQ_RC_ERANGE:
-		return "I40E_AQ_RC_ERANGE";
-	case I40E_AQ_RC_EFLUSHED:
-		return "I40E_AQ_RC_EFLUSHED";
-	case I40E_AQ_RC_BAD_ADDR:
-		return "I40E_AQ_RC_BAD_ADDR";
-	case I40E_AQ_RC_EMODE:
-		return "I40E_AQ_RC_EMODE";
-	case I40E_AQ_RC_EFBIG:
-		return "I40E_AQ_RC_EFBIG";
-	}
-
-	snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
-	return hw->err_str;
-}
-
-/**
- * i40evf_stat_str - convert status err code to a string
- * @hw: pointer to the HW structure
- * @stat_err: the status error code to convert
- **/
-const char *i40evf_stat_str(struct i40e_hw *hw, i40e_status stat_err)
-{
-	switch (stat_err) {
-	case 0:
-		return "OK";
-	case I40E_ERR_NVM:
-		return "I40E_ERR_NVM";
-	case I40E_ERR_NVM_CHECKSUM:
-		return "I40E_ERR_NVM_CHECKSUM";
-	case I40E_ERR_PHY:
-		return "I40E_ERR_PHY";
-	case I40E_ERR_CONFIG:
-		return "I40E_ERR_CONFIG";
-	case I40E_ERR_PARAM:
-		return "I40E_ERR_PARAM";
-	case I40E_ERR_MAC_TYPE:
-		return "I40E_ERR_MAC_TYPE";
-	case I40E_ERR_UNKNOWN_PHY:
-		return "I40E_ERR_UNKNOWN_PHY";
-	case I40E_ERR_LINK_SETUP:
-		return "I40E_ERR_LINK_SETUP";
-	case I40E_ERR_ADAPTER_STOPPED:
-		return "I40E_ERR_ADAPTER_STOPPED";
-	case I40E_ERR_INVALID_MAC_ADDR:
-		return "I40E_ERR_INVALID_MAC_ADDR";
-	case I40E_ERR_DEVICE_NOT_SUPPORTED:
-		return "I40E_ERR_DEVICE_NOT_SUPPORTED";
-	case I40E_ERR_MASTER_REQUESTS_PENDING:
-		return "I40E_ERR_MASTER_REQUESTS_PENDING";
-	case I40E_ERR_INVALID_LINK_SETTINGS:
-		return "I40E_ERR_INVALID_LINK_SETTINGS";
-	case I40E_ERR_AUTONEG_NOT_COMPLETE:
-		return "I40E_ERR_AUTONEG_NOT_COMPLETE";
-	case I40E_ERR_RESET_FAILED:
-		return "I40E_ERR_RESET_FAILED";
-	case I40E_ERR_SWFW_SYNC:
-		return "I40E_ERR_SWFW_SYNC";
-	case I40E_ERR_NO_AVAILABLE_VSI:
-		return "I40E_ERR_NO_AVAILABLE_VSI";
-	case I40E_ERR_NO_MEMORY:
-		return "I40E_ERR_NO_MEMORY";
-	case I40E_ERR_BAD_PTR:
-		return "I40E_ERR_BAD_PTR";
-	case I40E_ERR_RING_FULL:
-		return "I40E_ERR_RING_FULL";
-	case I40E_ERR_INVALID_PD_ID:
-		return "I40E_ERR_INVALID_PD_ID";
-	case I40E_ERR_INVALID_QP_ID:
-		return "I40E_ERR_INVALID_QP_ID";
-	case I40E_ERR_INVALID_CQ_ID:
-		return "I40E_ERR_INVALID_CQ_ID";
-	case I40E_ERR_INVALID_CEQ_ID:
-		return "I40E_ERR_INVALID_CEQ_ID";
-	case I40E_ERR_INVALID_AEQ_ID:
-		return "I40E_ERR_INVALID_AEQ_ID";
-	case I40E_ERR_INVALID_SIZE:
-		return "I40E_ERR_INVALID_SIZE";
-	case I40E_ERR_INVALID_ARP_INDEX:
-		return "I40E_ERR_INVALID_ARP_INDEX";
-	case I40E_ERR_INVALID_FPM_FUNC_ID:
-		return "I40E_ERR_INVALID_FPM_FUNC_ID";
-	case I40E_ERR_QP_INVALID_MSG_SIZE:
-		return "I40E_ERR_QP_INVALID_MSG_SIZE";
-	case I40E_ERR_QP_TOOMANY_WRS_POSTED:
-		return "I40E_ERR_QP_TOOMANY_WRS_POSTED";
-	case I40E_ERR_INVALID_FRAG_COUNT:
-		return "I40E_ERR_INVALID_FRAG_COUNT";
-	case I40E_ERR_QUEUE_EMPTY:
-		return "I40E_ERR_QUEUE_EMPTY";
-	case I40E_ERR_INVALID_ALIGNMENT:
-		return "I40E_ERR_INVALID_ALIGNMENT";
-	case I40E_ERR_FLUSHED_QUEUE:
-		return "I40E_ERR_FLUSHED_QUEUE";
-	case I40E_ERR_INVALID_PUSH_PAGE_INDEX:
-		return "I40E_ERR_INVALID_PUSH_PAGE_INDEX";
-	case I40E_ERR_INVALID_IMM_DATA_SIZE:
-		return "I40E_ERR_INVALID_IMM_DATA_SIZE";
-	case I40E_ERR_TIMEOUT:
-		return "I40E_ERR_TIMEOUT";
-	case I40E_ERR_OPCODE_MISMATCH:
-		return "I40E_ERR_OPCODE_MISMATCH";
-	case I40E_ERR_CQP_COMPL_ERROR:
-		return "I40E_ERR_CQP_COMPL_ERROR";
-	case I40E_ERR_INVALID_VF_ID:
-		return "I40E_ERR_INVALID_VF_ID";
-	case I40E_ERR_INVALID_HMCFN_ID:
-		return "I40E_ERR_INVALID_HMCFN_ID";
-	case I40E_ERR_BACKING_PAGE_ERROR:
-		return "I40E_ERR_BACKING_PAGE_ERROR";
-	case I40E_ERR_NO_PBLCHUNKS_AVAILABLE:
-		return "I40E_ERR_NO_PBLCHUNKS_AVAILABLE";
-	case I40E_ERR_INVALID_PBLE_INDEX:
-		return "I40E_ERR_INVALID_PBLE_INDEX";
-	case I40E_ERR_INVALID_SD_INDEX:
-		return "I40E_ERR_INVALID_SD_INDEX";
-	case I40E_ERR_INVALID_PAGE_DESC_INDEX:
-		return "I40E_ERR_INVALID_PAGE_DESC_INDEX";
-	case I40E_ERR_INVALID_SD_TYPE:
-		return "I40E_ERR_INVALID_SD_TYPE";
-	case I40E_ERR_MEMCPY_FAILED:
-		return "I40E_ERR_MEMCPY_FAILED";
-	case I40E_ERR_INVALID_HMC_OBJ_INDEX:
-		return "I40E_ERR_INVALID_HMC_OBJ_INDEX";
-	case I40E_ERR_INVALID_HMC_OBJ_COUNT:
-		return "I40E_ERR_INVALID_HMC_OBJ_COUNT";
-	case I40E_ERR_INVALID_SRQ_ARM_LIMIT:
-		return "I40E_ERR_INVALID_SRQ_ARM_LIMIT";
-	case I40E_ERR_SRQ_ENABLED:
-		return "I40E_ERR_SRQ_ENABLED";
-	case I40E_ERR_ADMIN_QUEUE_ERROR:
-		return "I40E_ERR_ADMIN_QUEUE_ERROR";
-	case I40E_ERR_ADMIN_QUEUE_TIMEOUT:
-		return "I40E_ERR_ADMIN_QUEUE_TIMEOUT";
-	case I40E_ERR_BUF_TOO_SHORT:
-		return "I40E_ERR_BUF_TOO_SHORT";
-	case I40E_ERR_ADMIN_QUEUE_FULL:
-		return "I40E_ERR_ADMIN_QUEUE_FULL";
-	case I40E_ERR_ADMIN_QUEUE_NO_WORK:
-		return "I40E_ERR_ADMIN_QUEUE_NO_WORK";
-	case I40E_ERR_BAD_IWARP_CQE:
-		return "I40E_ERR_BAD_IWARP_CQE";
-	case I40E_ERR_NVM_BLANK_MODE:
-		return "I40E_ERR_NVM_BLANK_MODE";
-	case I40E_ERR_NOT_IMPLEMENTED:
-		return "I40E_ERR_NOT_IMPLEMENTED";
-	case I40E_ERR_PE_DOORBELL_NOT_ENABLED:
-		return "I40E_ERR_PE_DOORBELL_NOT_ENABLED";
-	case I40E_ERR_DIAG_TEST_FAILED:
-		return "I40E_ERR_DIAG_TEST_FAILED";
-	case I40E_ERR_NOT_READY:
-		return "I40E_ERR_NOT_READY";
-	case I40E_NOT_SUPPORTED:
-		return "I40E_NOT_SUPPORTED";
-	case I40E_ERR_FIRMWARE_API_VERSION:
-		return "I40E_ERR_FIRMWARE_API_VERSION";
-	case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
-		return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
-	}
-
-	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
-	return hw->err_str;
-}
-
-/**
- * i40evf_debug_aq
- * @hw: debug mask related to admin queue
- * @mask: debug mask
- * @desc: pointer to admin queue descriptor
- * @buffer: pointer to command buffer
- * @buf_len: max length of buffer
- *
- * Dumps debug log about adminq command with descriptor contents.
- **/
-void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
-		   void *buffer, u16 buf_len)
-{
-	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
-	u8 *buf = (u8 *)buffer;
-
-	if ((!(mask & hw->debug_mask)) || (desc == NULL))
-		return;
-
-	i40e_debug(hw, mask,
-		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
-		   le16_to_cpu(aq_desc->opcode),
-		   le16_to_cpu(aq_desc->flags),
-		   le16_to_cpu(aq_desc->datalen),
-		   le16_to_cpu(aq_desc->retval));
-	i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
-		   le32_to_cpu(aq_desc->cookie_high),
-		   le32_to_cpu(aq_desc->cookie_low));
-	i40e_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
-		   le32_to_cpu(aq_desc->params.internal.param0),
-		   le32_to_cpu(aq_desc->params.internal.param1));
-	i40e_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
-		   le32_to_cpu(aq_desc->params.external.addr_high),
-		   le32_to_cpu(aq_desc->params.external.addr_low));
-
-	if ((buffer != NULL) && (aq_desc->datalen != 0)) {
-		u16 len = le16_to_cpu(aq_desc->datalen);
-
-		i40e_debug(hw, mask, "AQ CMD Buffer:\n");
-		if (buf_len < len)
-			len = buf_len;
-		/* write the full 16-byte chunks */
-		if (hw->debug_mask & mask) {
-			char prefix[27];
-
-			snprintf(prefix, sizeof(prefix),
-				 "i40evf %02x:%02x.%x: \t0x",
-				 hw->bus.bus_id,
-				 hw->bus.device,
-				 hw->bus.func);
-
-			print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
-				       16, 1, buf, len, false);
-		}
-	}
-}
-
-/**
- * i40evf_check_asq_alive
- * @hw: pointer to the hw struct
- *
- * Returns true if Queue is enabled else false.
- **/
-bool i40evf_check_asq_alive(struct i40e_hw *hw)
-{
-	if (hw->aq.asq.len)
-		return !!(rd32(hw, hw->aq.asq.len) &
-			  I40E_VF_ATQLEN1_ATQENABLE_MASK);
-	else
-		return false;
-}
-
-/**
- * i40evf_aq_queue_shutdown
- * @hw: pointer to the hw struct
- * @unloading: is the driver unloading itself
- *
- * Tell the Firmware that we're shutting down the AdminQ and whether
- * or not the driver is unloading as well.
- **/
-i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw,
-					     bool unloading)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_queue_shutdown *cmd =
-		(struct i40e_aqc_queue_shutdown *)&desc.params.raw;
-	i40e_status status;
-
-	i40evf_fill_default_direct_cmd_desc(&desc,
-					  i40e_aqc_opc_queue_shutdown);
-
-	if (unloading)
-		cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING);
-	status = i40evf_asq_send_command(hw, &desc, NULL, 0, NULL);
-
-	return status;
-}
-
-/**
- * i40e_aq_get_set_rss_lut
- * @hw: pointer to the hardware structure
- * @vsi_id: vsi fw index
- * @pf_lut: for PF table set true, for VSI table set false
- * @lut: pointer to the lut buffer provided by the caller
- * @lut_size: size of the lut buffer
- * @set: set true to set the table, false to get the table
- *
- * Internal function to get or set RSS look up table
- **/
-static i40e_status i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
-					   u16 vsi_id, bool pf_lut,
-					   u8 *lut, u16 lut_size,
-					   bool set)
-{
-	i40e_status status;
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_get_set_rss_lut *cmd_resp =
-		   (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw;
-
-	if (set)
-		i40evf_fill_default_direct_cmd_desc(&desc,
-						    i40e_aqc_opc_set_rss_lut);
-	else
-		i40evf_fill_default_direct_cmd_desc(&desc,
-						    i40e_aqc_opc_get_rss_lut);
-
-	/* Indirect command */
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
-
-	cmd_resp->vsi_id =
-			cpu_to_le16((u16)((vsi_id <<
-					  I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
-					  I40E_AQC_SET_RSS_LUT_VSI_ID_MASK));
-	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID);
-
-	if (pf_lut)
-		cmd_resp->flags |= cpu_to_le16((u16)
-					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
-	else
-		cmd_resp->flags |= cpu_to_le16((u16)
-					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
-					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
-
-	status = i40evf_asq_send_command(hw, &desc, lut, lut_size, NULL);
-
-	return status;
-}
-
-/**
- * i40evf_aq_get_rss_lut
- * @hw: pointer to the hardware structure
- * @vsi_id: vsi fw index
- * @pf_lut: for PF table set true, for VSI table set false
- * @lut: pointer to the lut buffer provided by the caller
- * @lut_size: size of the lut buffer
- *
- * get the RSS lookup table, PF or VSI type
- **/
-i40e_status i40evf_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
-				  bool pf_lut, u8 *lut, u16 lut_size)
-{
-	return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
-				       false);
-}
-
-/**
- * i40evf_aq_set_rss_lut
- * @hw: pointer to the hardware structure
- * @vsi_id: vsi fw index
- * @pf_lut: for PF table set true, for VSI table set false
- * @lut: pointer to the lut buffer provided by the caller
- * @lut_size: size of the lut buffer
- *
- * set the RSS lookup table, PF or VSI type
- **/
-i40e_status i40evf_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
-				  bool pf_lut, u8 *lut, u16 lut_size)
-{
-	return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
-}
-
-/**
- * i40e_aq_get_set_rss_key
- * @hw: pointer to the hw struct
- * @vsi_id: vsi fw index
- * @key: pointer to key info struct
- * @set: set true to set the key, false to get the key
- *
- * get the RSS key per VSI
- **/
-static i40e_status i40e_aq_get_set_rss_key(struct i40e_hw *hw,
-				      u16 vsi_id,
-				      struct i40e_aqc_get_set_rss_key_data *key,
-				      bool set)
-{
-	i40e_status status;
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_get_set_rss_key *cmd_resp =
-			(struct i40e_aqc_get_set_rss_key *)&desc.params.raw;
-	u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data);
-
-	if (set)
-		i40evf_fill_default_direct_cmd_desc(&desc,
-						    i40e_aqc_opc_set_rss_key);
-	else
-		i40evf_fill_default_direct_cmd_desc(&desc,
-						    i40e_aqc_opc_get_rss_key);
-
-	/* Indirect command */
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
-
-	cmd_resp->vsi_id =
-			cpu_to_le16((u16)((vsi_id <<
-					  I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
-					  I40E_AQC_SET_RSS_KEY_VSI_ID_MASK));
-	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID);
-
-	status = i40evf_asq_send_command(hw, &desc, key, key_size, NULL);
-
-	return status;
-}
-
-/**
- * i40evf_aq_get_rss_key
- * @hw: pointer to the hw struct
- * @vsi_id: vsi fw index
- * @key: pointer to key info struct
- *
- **/
-i40e_status i40evf_aq_get_rss_key(struct i40e_hw *hw,
-				  u16 vsi_id,
-				  struct i40e_aqc_get_set_rss_key_data *key)
-{
-	return i40e_aq_get_set_rss_key(hw, vsi_id, key, false);
-}
-
-/**
- * i40evf_aq_set_rss_key
- * @hw: pointer to the hw struct
- * @vsi_id: vsi fw index
- * @key: pointer to key info struct
- *
- * set the RSS key per VSI
- **/
-i40e_status i40evf_aq_set_rss_key(struct i40e_hw *hw,
-				  u16 vsi_id,
-				  struct i40e_aqc_get_set_rss_key_data *key)
-{
-	return i40e_aq_get_set_rss_key(hw, vsi_id, key, true);
-}
-
-
-/* The i40evf_ptype_lookup table is used to convert from the 8-bit ptype in the
- * hardware to a bit-field that can be used by SW to more easily determine the
- * packet type.
- *
- * Macros are used to shorten the table lines and make this table human
- * readable.
- *
- * We store the PTYPE in the top byte of the bit field - this is just so that
- * we can check that the table doesn't have a row missing, as the index into
- * the table should be the PTYPE.
- *
- * Typical work flow:
- *
- * IF NOT i40evf_ptype_lookup[ptype].known
- * THEN
- *      Packet is unknown
- * ELSE IF i40evf_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
- *      Use the rest of the fields to look at the tunnels, inner protocols, etc
- * ELSE
- *      Use the enum i40e_rx_l2_ptype to decode the packet type
- * ENDIF
- */
-
-/* macro to make the table lines short */
-#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	{	PTYPE, \
-		1, \
-		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
-		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
-		I40E_RX_PTYPE_##OUTER_FRAG, \
-		I40E_RX_PTYPE_TUNNEL_##T, \
-		I40E_RX_PTYPE_TUNNEL_END_##TE, \
-		I40E_RX_PTYPE_##TEF, \
-		I40E_RX_PTYPE_INNER_PROT_##I, \
-		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
-
-#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
-		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-
-/* shorter macros makes the table fit but are terse */
-#define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
-#define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
-#define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
-
-/* Lookup table mapping the HW PTYPE to the bit field for decoding */
-struct i40e_rx_ptype_decoded i40evf_ptype_lookup[] = {
-	/* L2 Packet types */
-	I40E_PTT_UNUSED_ENTRY(0),
-	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
-	I40E_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT_UNUSED_ENTRY(4),
-	I40E_PTT_UNUSED_ENTRY(5),
-	I40E_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT_UNUSED_ENTRY(8),
-	I40E_PTT_UNUSED_ENTRY(9),
-	I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
-	I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
-
-	/* Non Tunneled IPv4 */
-	I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(25),
-	I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv4 */
-	I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(32),
-	I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> IPv6 */
-	I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(39),
-	I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT */
-	I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> IPv4 */
-	I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(47),
-	I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> IPv6 */
-	I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(54),
-	I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC */
-	I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
-	I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(62),
-	I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
-	I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(69),
-	I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv4 --> GRE/NAT --> MAC/VLAN */
-	I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
-	I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(77),
-	I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
-	I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(84),
-	I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* Non Tunneled IPv6 */
-	I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-	I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
-	I40E_PTT_UNUSED_ENTRY(91),
-	I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
-	I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
-	I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv4 */
-	I40E_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(98),
-	I40E_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> IPv6 */
-	I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(105),
-	I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT */
-	I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> IPv4 */
-	I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(113),
-	I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> IPv6 */
-	I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(120),
-	I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC */
-	I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
-	I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(128),
-	I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
-	I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(135),
-	I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN */
-	I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
-	I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
-	I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
-	I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(143),
-	I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
-	I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
-	I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
-	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
-	I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
-	I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
-	I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
-	I40E_PTT_UNUSED_ENTRY(150),
-	I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
-	I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
-	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
-	/* unused entries */
-	I40E_PTT_UNUSED_ENTRY(154),
-	I40E_PTT_UNUSED_ENTRY(155),
-	I40E_PTT_UNUSED_ENTRY(156),
-	I40E_PTT_UNUSED_ENTRY(157),
-	I40E_PTT_UNUSED_ENTRY(158),
-	I40E_PTT_UNUSED_ENTRY(159),
-
-	I40E_PTT_UNUSED_ENTRY(160),
-	I40E_PTT_UNUSED_ENTRY(161),
-	I40E_PTT_UNUSED_ENTRY(162),
-	I40E_PTT_UNUSED_ENTRY(163),
-	I40E_PTT_UNUSED_ENTRY(164),
-	I40E_PTT_UNUSED_ENTRY(165),
-	I40E_PTT_UNUSED_ENTRY(166),
-	I40E_PTT_UNUSED_ENTRY(167),
-	I40E_PTT_UNUSED_ENTRY(168),
-	I40E_PTT_UNUSED_ENTRY(169),
-
-	I40E_PTT_UNUSED_ENTRY(170),
-	I40E_PTT_UNUSED_ENTRY(171),
-	I40E_PTT_UNUSED_ENTRY(172),
-	I40E_PTT_UNUSED_ENTRY(173),
-	I40E_PTT_UNUSED_ENTRY(174),
-	I40E_PTT_UNUSED_ENTRY(175),
-	I40E_PTT_UNUSED_ENTRY(176),
-	I40E_PTT_UNUSED_ENTRY(177),
-	I40E_PTT_UNUSED_ENTRY(178),
-	I40E_PTT_UNUSED_ENTRY(179),
-
-	I40E_PTT_UNUSED_ENTRY(180),
-	I40E_PTT_UNUSED_ENTRY(181),
-	I40E_PTT_UNUSED_ENTRY(182),
-	I40E_PTT_UNUSED_ENTRY(183),
-	I40E_PTT_UNUSED_ENTRY(184),
-	I40E_PTT_UNUSED_ENTRY(185),
-	I40E_PTT_UNUSED_ENTRY(186),
-	I40E_PTT_UNUSED_ENTRY(187),
-	I40E_PTT_UNUSED_ENTRY(188),
-	I40E_PTT_UNUSED_ENTRY(189),
-
-	I40E_PTT_UNUSED_ENTRY(190),
-	I40E_PTT_UNUSED_ENTRY(191),
-	I40E_PTT_UNUSED_ENTRY(192),
-	I40E_PTT_UNUSED_ENTRY(193),
-	I40E_PTT_UNUSED_ENTRY(194),
-	I40E_PTT_UNUSED_ENTRY(195),
-	I40E_PTT_UNUSED_ENTRY(196),
-	I40E_PTT_UNUSED_ENTRY(197),
-	I40E_PTT_UNUSED_ENTRY(198),
-	I40E_PTT_UNUSED_ENTRY(199),
-
-	I40E_PTT_UNUSED_ENTRY(200),
-	I40E_PTT_UNUSED_ENTRY(201),
-	I40E_PTT_UNUSED_ENTRY(202),
-	I40E_PTT_UNUSED_ENTRY(203),
-	I40E_PTT_UNUSED_ENTRY(204),
-	I40E_PTT_UNUSED_ENTRY(205),
-	I40E_PTT_UNUSED_ENTRY(206),
-	I40E_PTT_UNUSED_ENTRY(207),
-	I40E_PTT_UNUSED_ENTRY(208),
-	I40E_PTT_UNUSED_ENTRY(209),
-
-	I40E_PTT_UNUSED_ENTRY(210),
-	I40E_PTT_UNUSED_ENTRY(211),
-	I40E_PTT_UNUSED_ENTRY(212),
-	I40E_PTT_UNUSED_ENTRY(213),
-	I40E_PTT_UNUSED_ENTRY(214),
-	I40E_PTT_UNUSED_ENTRY(215),
-	I40E_PTT_UNUSED_ENTRY(216),
-	I40E_PTT_UNUSED_ENTRY(217),
-	I40E_PTT_UNUSED_ENTRY(218),
-	I40E_PTT_UNUSED_ENTRY(219),
-
-	I40E_PTT_UNUSED_ENTRY(220),
-	I40E_PTT_UNUSED_ENTRY(221),
-	I40E_PTT_UNUSED_ENTRY(222),
-	I40E_PTT_UNUSED_ENTRY(223),
-	I40E_PTT_UNUSED_ENTRY(224),
-	I40E_PTT_UNUSED_ENTRY(225),
-	I40E_PTT_UNUSED_ENTRY(226),
-	I40E_PTT_UNUSED_ENTRY(227),
-	I40E_PTT_UNUSED_ENTRY(228),
-	I40E_PTT_UNUSED_ENTRY(229),
-
-	I40E_PTT_UNUSED_ENTRY(230),
-	I40E_PTT_UNUSED_ENTRY(231),
-	I40E_PTT_UNUSED_ENTRY(232),
-	I40E_PTT_UNUSED_ENTRY(233),
-	I40E_PTT_UNUSED_ENTRY(234),
-	I40E_PTT_UNUSED_ENTRY(235),
-	I40E_PTT_UNUSED_ENTRY(236),
-	I40E_PTT_UNUSED_ENTRY(237),
-	I40E_PTT_UNUSED_ENTRY(238),
-	I40E_PTT_UNUSED_ENTRY(239),
-
-	I40E_PTT_UNUSED_ENTRY(240),
-	I40E_PTT_UNUSED_ENTRY(241),
-	I40E_PTT_UNUSED_ENTRY(242),
-	I40E_PTT_UNUSED_ENTRY(243),
-	I40E_PTT_UNUSED_ENTRY(244),
-	I40E_PTT_UNUSED_ENTRY(245),
-	I40E_PTT_UNUSED_ENTRY(246),
-	I40E_PTT_UNUSED_ENTRY(247),
-	I40E_PTT_UNUSED_ENTRY(248),
-	I40E_PTT_UNUSED_ENTRY(249),
-
-	I40E_PTT_UNUSED_ENTRY(250),
-	I40E_PTT_UNUSED_ENTRY(251),
-	I40E_PTT_UNUSED_ENTRY(252),
-	I40E_PTT_UNUSED_ENTRY(253),
-	I40E_PTT_UNUSED_ENTRY(254),
-	I40E_PTT_UNUSED_ENTRY(255)
-};
-
-/**
- * i40evf_aq_rx_ctl_read_register - use FW to read from an Rx control register
- * @hw: pointer to the hw struct
- * @reg_addr: register address
- * @reg_val: ptr to register value
- * @cmd_details: pointer to command details structure or NULL
- *
- * Use the firmware to read the Rx control register,
- * especially useful if the Rx unit is under heavy pressure
- **/
-i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw,
-				u32 reg_addr, u32 *reg_val,
-				struct i40e_asq_cmd_details *cmd_details)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp =
-		(struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
-	i40e_status status;
-
-	if (!reg_val)
-		return I40E_ERR_PARAM;
-
-	i40evf_fill_default_direct_cmd_desc(&desc,
-					    i40e_aqc_opc_rx_ctl_reg_read);
-
-	cmd_resp->address = cpu_to_le32(reg_addr);
-
-	status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
-
-	if (status == 0)
-		*reg_val = le32_to_cpu(cmd_resp->value);
-
-	return status;
-}
-
-/**
- * i40evf_read_rx_ctl - read from an Rx control register
- * @hw: pointer to the hw struct
- * @reg_addr: register address
- **/
-u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
-{
-	i40e_status status = 0;
-	bool use_register;
-	int retry = 5;
-	u32 val = 0;
-
-	use_register = (((hw->aq.api_maj_ver == 1) &&
-			(hw->aq.api_min_ver < 5)) ||
-			(hw->mac.type == I40E_MAC_X722));
-	if (!use_register) {
-do_retry:
-		status = i40evf_aq_rx_ctl_read_register(hw, reg_addr,
-							&val, NULL);
-		if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
-			usleep_range(1000, 2000);
-			retry--;
-			goto do_retry;
-		}
-	}
-
-	/* if the AQ access failed, try the old-fashioned way */
-	if (status || use_register)
-		val = rd32(hw, reg_addr);
-
-	return val;
-}
-
-/**
- * i40evf_aq_rx_ctl_write_register
- * @hw: pointer to the hw struct
- * @reg_addr: register address
- * @reg_val: register value
- * @cmd_details: pointer to command details structure or NULL
- *
- * Use the firmware to write to an Rx control register,
- * especially useful if the Rx unit is under heavy pressure
- **/
-i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw,
-				u32 reg_addr, u32 reg_val,
-				struct i40e_asq_cmd_details *cmd_details)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_rx_ctl_reg_read_write *cmd =
-		(struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
-	i40e_status status;
-
-	i40evf_fill_default_direct_cmd_desc(&desc,
-					    i40e_aqc_opc_rx_ctl_reg_write);
-
-	cmd->address = cpu_to_le32(reg_addr);
-	cmd->value = cpu_to_le32(reg_val);
-
-	status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
-
-	return status;
-}
-
-/**
- * i40evf_write_rx_ctl - write to an Rx control register
- * @hw: pointer to the hw struct
- * @reg_addr: register address
- * @reg_val: register value
- **/
-void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
-{
-	i40e_status status = 0;
-	bool use_register;
-	int retry = 5;
-
-	use_register = (((hw->aq.api_maj_ver == 1) &&
-			(hw->aq.api_min_ver < 5)) ||
-			(hw->mac.type == I40E_MAC_X722));
-	if (!use_register) {
-do_retry:
-		status = i40evf_aq_rx_ctl_write_register(hw, reg_addr,
-							 reg_val, NULL);
-		if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
-			usleep_range(1000, 2000);
-			retry--;
-			goto do_retry;
-		}
-	}
-
-	/* if the AQ access failed, try the old-fashioned way */
-	if (status || use_register)
-		wr32(hw, reg_addr, reg_val);
-}
-
-/**
- * i40e_aq_send_msg_to_pf
- * @hw: pointer to the hardware structure
- * @v_opcode: opcodes for VF-PF communication
- * @v_retval: return error code
- * @msg: pointer to the msg buffer
- * @msglen: msg length
- * @cmd_details: pointer to command details
- *
- * Send message to PF driver using admin queue. By default, this message
- * is sent asynchronously, i.e. i40evf_asq_send_command() does not wait for
- * completion before returning.
- **/
-i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
-				enum virtchnl_ops v_opcode,
-				i40e_status v_retval,
-				u8 *msg, u16 msglen,
-				struct i40e_asq_cmd_details *cmd_details)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_asq_cmd_details details;
-	i40e_status status;
-
-	i40evf_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_pf);
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_SI);
-	desc.cookie_high = cpu_to_le32(v_opcode);
-	desc.cookie_low = cpu_to_le32(v_retval);
-	if (msglen) {
-		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF
-						| I40E_AQ_FLAG_RD));
-		if (msglen > I40E_AQ_LARGE_BUF)
-			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
-		desc.datalen = cpu_to_le16(msglen);
-	}
-	if (!cmd_details) {
-		memset(&details, 0, sizeof(details));
-		details.async = true;
-		cmd_details = &details;
-	}
-	status = i40evf_asq_send_command(hw, &desc, msg, msglen, cmd_details);
-	return status;
-}
-
-/**
- * i40e_vf_parse_hw_config
- * @hw: pointer to the hardware structure
- * @msg: pointer to the virtual channel VF resource structure
- *
- * Given a VF resource message from the PF, populate the hw struct
- * with appropriate information.
- **/
-void i40e_vf_parse_hw_config(struct i40e_hw *hw,
-			     struct virtchnl_vf_resource *msg)
-{
-	struct virtchnl_vsi_resource *vsi_res;
-	int i;
-
-	vsi_res = &msg->vsi_res[0];
-
-	hw->dev_caps.num_vsis = msg->num_vsis;
-	hw->dev_caps.num_rx_qp = msg->num_queue_pairs;
-	hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
-	hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
-	hw->dev_caps.dcb = msg->vf_cap_flags &
-			   VIRTCHNL_VF_OFFLOAD_L2;
-	hw->dev_caps.fcoe = 0;
-	for (i = 0; i < msg->num_vsis; i++) {
-		if (vsi_res->vsi_type == VIRTCHNL_VSI_SRIOV) {
-			ether_addr_copy(hw->mac.perm_addr,
-					vsi_res->default_mac_addr);
-			ether_addr_copy(hw->mac.addr,
-					vsi_res->default_mac_addr);
-		}
-		vsi_res++;
-	}
-}
-
-/**
- * i40e_vf_reset
- * @hw: pointer to the hardware structure
- *
- * Send a VF_RESET message to the PF. Does not wait for response from PF
- * as none will be forthcoming. Immediately after calling this function,
- * the admin queue should be shut down and (optionally) reinitialized.
- **/
-i40e_status i40e_vf_reset(struct i40e_hw *hw)
-{
-	return i40e_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF,
-				      0, NULL, 0, NULL);
-}
-
-/**
- * i40evf_aq_write_ddp - Write dynamic device personalization (ddp)
- * @hw: pointer to the hw struct
- * @buff: command buffer (size in bytes = buff_size)
- * @buff_size: buffer size in bytes
- * @track_id: package tracking id
- * @error_offset: returns error offset
- * @error_info: returns error information
- * @cmd_details: pointer to command details structure or NULL
- **/
-enum
-i40e_status_code i40evf_aq_write_ddp(struct i40e_hw *hw, void *buff,
-				     u16 buff_size, u32 track_id,
-				     u32 *error_offset, u32 *error_info,
-				     struct i40e_asq_cmd_details *cmd_details)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_write_personalization_profile *cmd =
-		(struct i40e_aqc_write_personalization_profile *)
-		&desc.params.raw;
-	struct i40e_aqc_write_ddp_resp *resp;
-	i40e_status status;
-
-	i40evf_fill_default_direct_cmd_desc(&desc,
-					    i40e_aqc_opc_write_personalization_profile);
-
-	desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD);
-	if (buff_size > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
-
-	desc.datalen = cpu_to_le16(buff_size);
-
-	cmd->profile_track_id = cpu_to_le32(track_id);
-
-	status = i40evf_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
-	if (!status) {
-		resp = (struct i40e_aqc_write_ddp_resp *)&desc.params.raw;
-		if (error_offset)
-			*error_offset = le32_to_cpu(resp->error_offset);
-		if (error_info)
-			*error_info = le32_to_cpu(resp->error_info);
-	}
-
-	return status;
-}
-
-/**
- * i40evf_aq_get_ddp_list - Read dynamic device personalization (ddp)
- * @hw: pointer to the hw struct
- * @buff: command buffer (size in bytes = buff_size)
- * @buff_size: buffer size in bytes
- * @flags: AdminQ command flags
- * @cmd_details: pointer to command details structure or NULL
- **/
-enum
-i40e_status_code i40evf_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
-					u16 buff_size, u8 flags,
-				       struct i40e_asq_cmd_details *cmd_details)
-{
-	struct i40e_aq_desc desc;
-	struct i40e_aqc_get_applied_profiles *cmd =
-		(struct i40e_aqc_get_applied_profiles *)&desc.params.raw;
-	i40e_status status;
-
-	i40evf_fill_default_direct_cmd_desc(&desc,
-					    i40e_aqc_opc_get_personalization_profile_list);
-
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	if (buff_size > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
-	desc.datalen = cpu_to_le16(buff_size);
-
-	cmd->flags = flags;
-
-	status = i40evf_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
-
-	return status;
-}
-
-/**
- * i40evf_find_segment_in_package
- * @segment_type: the segment type to search for (i.e., SEGMENT_TYPE_I40E)
- * @pkg_hdr: pointer to the package header to be searched
- *
- * This function searches a package file for a particular segment type. On
- * success it returns a pointer to the segment header, otherwise it will
- * return NULL.
- **/
-struct i40e_generic_seg_header *
-i40evf_find_segment_in_package(u32 segment_type,
-			       struct i40e_package_header *pkg_hdr)
-{
-	struct i40e_generic_seg_header *segment;
-	u32 i;
-
-	/* Search all package segments for the requested segment type */
-	for (i = 0; i < pkg_hdr->segment_count; i++) {
-		segment =
-			(struct i40e_generic_seg_header *)((u8 *)pkg_hdr +
-			 pkg_hdr->segment_offset[i]);
-
-		if (segment->type == segment_type)
-			return segment;
-	}
-
-	return NULL;
-}
-
-/**
- * i40evf_write_profile
- * @hw: pointer to the hardware structure
- * @profile: pointer to the profile segment of the package to be downloaded
- * @track_id: package tracking id
- *
- * Handles the download of a complete package.
- */
-enum i40e_status_code
-i40evf_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
-		     u32 track_id)
-{
-	i40e_status status = 0;
-	struct i40e_section_table *sec_tbl;
-	struct i40e_profile_section_header *sec = NULL;
-	u32 dev_cnt;
-	u32 vendor_dev_id;
-	u32 *nvm;
-	u32 section_size = 0;
-	u32 offset = 0, info = 0;
-	u32 i;
-
-	dev_cnt = profile->device_table_count;
-
-	for (i = 0; i < dev_cnt; i++) {
-		vendor_dev_id = profile->device_table[i].vendor_dev_id;
-		if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL)
-			if (hw->device_id == (vendor_dev_id & 0xFFFF))
-				break;
-	}
-	if (i == dev_cnt) {
-		i40e_debug(hw, I40E_DEBUG_PACKAGE, "Device doesn't support DDP");
-		return I40E_ERR_DEVICE_NOT_SUPPORTED;
-	}
-
-	nvm = (u32 *)&profile->device_table[dev_cnt];
-	sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1];
-
-	for (i = 0; i < sec_tbl->section_count; i++) {
-		sec = (struct i40e_profile_section_header *)((u8 *)profile +
-					     sec_tbl->section_offset[i]);
-
-		/* Skip 'AQ', 'note' and 'name' sections */
-		if (sec->section.type != SECTION_TYPE_MMIO)
-			continue;
-
-		section_size = sec->section.size +
-			sizeof(struct i40e_profile_section_header);
-
-		/* Write profile */
-		status = i40evf_aq_write_ddp(hw, (void *)sec, (u16)section_size,
-					     track_id, &offset, &info, NULL);
-		if (status) {
-			i40e_debug(hw, I40E_DEBUG_PACKAGE,
-				   "Failed to write profile: offset %d, info %d",
-				   offset, info);
-			break;
-		}
-	}
-	return status;
-}
-
-/**
- * i40evf_add_pinfo_to_list
- * @hw: pointer to the hardware structure
- * @profile: pointer to the profile segment of the package
- * @profile_info_sec: buffer for information section
- * @track_id: package tracking id
- *
- * Register a profile to the list of loaded profiles.
- */
-enum i40e_status_code
-i40evf_add_pinfo_to_list(struct i40e_hw *hw,
-			 struct i40e_profile_segment *profile,
-			 u8 *profile_info_sec, u32 track_id)
-{
-	i40e_status status = 0;
-	struct i40e_profile_section_header *sec = NULL;
-	struct i40e_profile_info *pinfo;
-	u32 offset = 0, info = 0;
-
-	sec = (struct i40e_profile_section_header *)profile_info_sec;
-	sec->tbl_size = 1;
-	sec->data_end = sizeof(struct i40e_profile_section_header) +
-			sizeof(struct i40e_profile_info);
-	sec->section.type = SECTION_TYPE_INFO;
-	sec->section.offset = sizeof(struct i40e_profile_section_header);
-	sec->section.size = sizeof(struct i40e_profile_info);
-	pinfo = (struct i40e_profile_info *)(profile_info_sec +
-					     sec->section.offset);
-	pinfo->track_id = track_id;
-	pinfo->version = profile->version;
-	pinfo->op = I40E_DDP_ADD_TRACKID;
-	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
-
-	status = i40evf_aq_write_ddp(hw, (void *)sec, sec->data_end,
-				     track_id, &offset, &info, NULL);
-	return status;
-}

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
deleted file mode 100644
index f300bf2..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ /dev/null

@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_DEVIDS_H_
-#define _I40E_DEVIDS_H_
-
-/* Device IDs */
-#define I40E_DEV_ID_SFP_XL710		0x1572
-#define I40E_DEV_ID_QEMU		0x1574
-#define I40E_DEV_ID_KX_B		0x1580
-#define I40E_DEV_ID_KX_C		0x1581
-#define I40E_DEV_ID_QSFP_A		0x1583
-#define I40E_DEV_ID_QSFP_B		0x1584
-#define I40E_DEV_ID_QSFP_C		0x1585
-#define I40E_DEV_ID_10G_BASE_T		0x1586
-#define I40E_DEV_ID_20G_KR2		0x1587
-#define I40E_DEV_ID_20G_KR2_A		0x1588
-#define I40E_DEV_ID_10G_BASE_T4		0x1589
-#define I40E_DEV_ID_25G_B		0x158A
-#define I40E_DEV_ID_25G_SFP28		0x158B
-#define I40E_DEV_ID_VF			0x154C
-#define I40E_DEV_ID_VF_HV		0x1571
-#define I40E_DEV_ID_ADAPTIVE_VF		0x1889
-#define I40E_DEV_ID_SFP_X722		0x37D0
-#define I40E_DEV_ID_1G_BASE_T_X722	0x37D1
-#define I40E_DEV_ID_10G_BASE_T_X722	0x37D2
-#define I40E_DEV_ID_SFP_I_X722		0x37D3
-#define I40E_DEV_ID_X722_VF		0x37CD
-
-#define i40e_is_40G_device(d)		((d) == I40E_DEV_ID_QSFP_A  || \
-					 (d) == I40E_DEV_ID_QSFP_B  || \
-					 (d) == I40E_DEV_ID_QSFP_C)
-
-#endif /* _I40E_DEVIDS_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
deleted file mode 100644
index 1c78de8..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ /dev/null

@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_HMC_H_
-#define _I40E_HMC_H_
-
-#define I40E_HMC_MAX_BP_COUNT 512
-
-/* forward-declare the HW struct for the compiler */
-struct i40e_hw;
-
-#define I40E_HMC_INFO_SIGNATURE		0x484D5347 /* HMSG */
-#define I40E_HMC_PD_CNT_IN_SD		512
-#define I40E_HMC_DIRECT_BP_SIZE		0x200000 /* 2M */
-#define I40E_HMC_PAGED_BP_SIZE		4096
-#define I40E_HMC_PD_BP_BUF_ALIGNMENT	4096
-#define I40E_FIRST_VF_FPM_ID		16
-
-struct i40e_hmc_obj_info {
-	u64 base;	/* base addr in FPM */
-	u32 max_cnt;	/* max count available for this hmc func */
-	u32 cnt;	/* count of objects driver actually wants to create */
-	u64 size;	/* size in bytes of one object */
-};
-
-enum i40e_sd_entry_type {
-	I40E_SD_TYPE_INVALID = 0,
-	I40E_SD_TYPE_PAGED   = 1,
-	I40E_SD_TYPE_DIRECT  = 2
-};
-
-struct i40e_hmc_bp {
-	enum i40e_sd_entry_type entry_type;
-	struct i40e_dma_mem addr; /* populate to be used by hw */
-	u32 sd_pd_index;
-	u32 ref_cnt;
-};
-
-struct i40e_hmc_pd_entry {
-	struct i40e_hmc_bp bp;
-	u32 sd_index;
-	bool rsrc_pg;
-	bool valid;
-};
-
-struct i40e_hmc_pd_table {
-	struct i40e_dma_mem pd_page_addr; /* populate to be used by hw */
-	struct i40e_hmc_pd_entry  *pd_entry; /* [512] for sw book keeping */
-	struct i40e_virt_mem pd_entry_virt_mem; /* virt mem for pd_entry */
-
-	u32 ref_cnt;
-	u32 sd_index;
-};
-
-struct i40e_hmc_sd_entry {
-	enum i40e_sd_entry_type entry_type;
-	bool valid;
-
-	union {
-		struct i40e_hmc_pd_table pd_table;
-		struct i40e_hmc_bp bp;
-	} u;
-};
-
-struct i40e_hmc_sd_table {
-	struct i40e_virt_mem addr; /* used to track sd_entry allocations */
-	u32 sd_cnt;
-	u32 ref_cnt;
-	struct i40e_hmc_sd_entry *sd_entry; /* (sd_cnt*512) entries max */
-};
-
-struct i40e_hmc_info {
-	u32 signature;
-	/* equals to pci func num for PF and dynamically allocated for VFs */
-	u8 hmc_fn_id;
-	u16 first_sd_index; /* index of the first available SD */
-
-	/* hmc objects */
-	struct i40e_hmc_obj_info *hmc_obj;
-	struct i40e_virt_mem hmc_obj_virt_mem;
-	struct i40e_hmc_sd_table sd_table;
-};
-
-#define I40E_INC_SD_REFCNT(sd_table)	((sd_table)->ref_cnt++)
-#define I40E_INC_PD_REFCNT(pd_table)	((pd_table)->ref_cnt++)
-#define I40E_INC_BP_REFCNT(bp)		((bp)->ref_cnt++)
-
-#define I40E_DEC_SD_REFCNT(sd_table)	((sd_table)->ref_cnt--)
-#define I40E_DEC_PD_REFCNT(pd_table)	((pd_table)->ref_cnt--)
-#define I40E_DEC_BP_REFCNT(bp)		((bp)->ref_cnt--)
-
-/**
- * I40E_SET_PF_SD_ENTRY - marks the sd entry as valid in the hardware
- * @hw: pointer to our hw struct
- * @pa: pointer to physical address
- * @sd_index: segment descriptor index
- * @type: if sd entry is direct or paged
- **/
-#define I40E_SET_PF_SD_ENTRY(hw, pa, sd_index, type)			\
-{									\
-	u32 val1, val2, val3;						\
-	val1 = (u32)(upper_32_bits(pa));				\
-	val2 = (u32)(pa) | (I40E_HMC_MAX_BP_COUNT <<			\
-		 I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |		\
-		((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<		\
-		I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |			\
-		BIT(I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);		\
-	val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT);	\
-	wr32((hw), I40E_PFHMC_SDDATAHIGH, val1);			\
-	wr32((hw), I40E_PFHMC_SDDATALOW, val2);				\
-	wr32((hw), I40E_PFHMC_SDCMD, val3);				\
-}
-
-/**
- * I40E_CLEAR_PF_SD_ENTRY - marks the sd entry as invalid in the hardware
- * @hw: pointer to our hw struct
- * @sd_index: segment descriptor index
- * @type: if sd entry is direct or paged
- **/
-#define I40E_CLEAR_PF_SD_ENTRY(hw, sd_index, type)			\
-{									\
-	u32 val2, val3;							\
-	val2 = (I40E_HMC_MAX_BP_COUNT <<				\
-		I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |		\
-		((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<		\
-		I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);			\
-	val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT);	\
-	wr32((hw), I40E_PFHMC_SDDATAHIGH, 0);				\
-	wr32((hw), I40E_PFHMC_SDDATALOW, val2);				\
-	wr32((hw), I40E_PFHMC_SDCMD, val3);				\
-}
-
-/**
- * I40E_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
- * @hw: pointer to our hw struct
- * @sd_idx: segment descriptor index
- * @pd_idx: page descriptor index
- **/
-#define I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx)			\
-	wr32((hw), I40E_PFHMC_PDINV,					\
-	    (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		\
-	     ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
-/**
- * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
- * @hmc_info: pointer to the HMC configuration information structure
- * @type: type of HMC resources we're searching
- * @index: starting index for the object
- * @cnt: number of objects we're trying to create
- * @sd_idx: pointer to return index of the segment descriptor in question
- * @sd_limit: pointer to return the maximum number of segment descriptors
- *
- * This function calculates the segment descriptor index and index limit
- * for the resource defined by i40e_hmc_rsrc_type.
- **/
-#define I40E_FIND_SD_INDEX_LIMIT(hmc_info, type, index, cnt, sd_idx, sd_limit)\
-{									\
-	u64 fpm_addr, fpm_limit;					\
-	fpm_addr = (hmc_info)->hmc_obj[(type)].base +			\
-		   (hmc_info)->hmc_obj[(type)].size * (index);		\
-	fpm_limit = fpm_addr + (hmc_info)->hmc_obj[(type)].size * (cnt);\
-	*(sd_idx) = (u32)(fpm_addr / I40E_HMC_DIRECT_BP_SIZE);		\
-	*(sd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_DIRECT_BP_SIZE);	\
-	/* add one more to the limit to correct our range */		\
-	*(sd_limit) += 1;						\
-}
-
-/**
- * I40E_FIND_PD_INDEX_LIMIT - finds page descriptor index limit
- * @hmc_info: pointer to the HMC configuration information struct
- * @type: HMC resource type we're examining
- * @idx: starting index for the object
- * @cnt: number of objects we're trying to create
- * @pd_index: pointer to return page descriptor index
- * @pd_limit: pointer to return page descriptor index limit
- *
- * Calculates the page descriptor index and index limit for the resource
- * defined by i40e_hmc_rsrc_type.
- **/
-#define I40E_FIND_PD_INDEX_LIMIT(hmc_info, type, idx, cnt, pd_index, pd_limit)\
-{									\
-	u64 fpm_adr, fpm_limit;						\
-	fpm_adr = (hmc_info)->hmc_obj[(type)].base +			\
-		  (hmc_info)->hmc_obj[(type)].size * (idx);		\
-	fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);	\
-	*(pd_index) = (u32)(fpm_adr / I40E_HMC_PAGED_BP_SIZE);		\
-	*(pd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_PAGED_BP_SIZE);	\
-	/* add one more to the limit to correct our range */		\
-	*(pd_limit) += 1;						\
-}
-i40e_status i40e_add_sd_table_entry(struct i40e_hw *hw,
-					      struct i40e_hmc_info *hmc_info,
-					      u32 sd_index,
-					      enum i40e_sd_entry_type type,
-					      u64 direct_mode_sz);
-
-i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
-					      struct i40e_hmc_info *hmc_info,
-					      u32 pd_index,
-					      struct i40e_dma_mem *rsrc_pg);
-i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
-					struct i40e_hmc_info *hmc_info,
-					u32 idx);
-i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
-					     u32 idx);
-i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
-					    struct i40e_hmc_info *hmc_info,
-					    u32 idx, bool is_pf);
-i40e_status i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
-					       u32 idx);
-i40e_status i40e_remove_pd_page_new(struct i40e_hw *hw,
-					      struct i40e_hmc_info *hmc_info,
-					      u32 idx, bool is_pf);
-
-#endif /* _I40E_HMC_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
deleted file mode 100644
index 82b00f7..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ /dev/null

@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_LAN_HMC_H_
-#define _I40E_LAN_HMC_H_
-
-/* forward-declare the HW struct for the compiler */
-struct i40e_hw;
-
-/* HMC element context information */
-
-/* Rx queue context data
- *
- * The sizes of the variables may be larger than needed due to crossing byte
- * boundaries. If we do not have the width of the variable set to the correct
- * size then we could end up shifting bits off the top of the variable when the
- * variable is at the top of a byte and crosses over into the next byte.
- */
-struct i40e_hmc_obj_rxq {
-	u16 head;
-	u16 cpuid; /* bigger than needed, see above for reason */
-	u64 base;
-	u16 qlen;
-#define I40E_RXQ_CTX_DBUFF_SHIFT 7
-	u16 dbuff; /* bigger than needed, see above for reason */
-#define I40E_RXQ_CTX_HBUFF_SHIFT 6
-	u16 hbuff; /* bigger than needed, see above for reason */
-	u8  dtype;
-	u8  dsize;
-	u8  crcstrip;
-	u8  fc_ena;
-	u8  l2tsel;
-	u8  hsplit_0;
-	u8  hsplit_1;
-	u8  showiv;
-	u32 rxmax; /* bigger than needed, see above for reason */
-	u8  tphrdesc_ena;
-	u8  tphwdesc_ena;
-	u8  tphdata_ena;
-	u8  tphhead_ena;
-	u16 lrxqthresh; /* bigger than needed, see above for reason */
-	u8  prefena;	/* NOTE: normally must be set to 1 at init */
-};
-
-/* Tx queue context data
-*
-* The sizes of the variables may be larger than needed due to crossing byte
-* boundaries. If we do not have the width of the variable set to the correct
-* size then we could end up shifting bits off the top of the variable when the
-* variable is at the top of a byte and crosses over into the next byte.
-*/
-struct i40e_hmc_obj_txq {
-	u16 head;
-	u8  new_context;
-	u64 base;
-	u8  fc_ena;
-	u8  timesync_ena;
-	u8  fd_ena;
-	u8  alt_vlan_ena;
-	u16 thead_wb;
-	u8  cpuid;
-	u8  head_wb_ena;
-	u16 qlen;
-	u8  tphrdesc_ena;
-	u8  tphrpacket_ena;
-	u8  tphwdesc_ena;
-	u64 head_wb_addr;
-	u32 crc;
-	u16 rdylist;
-	u8  rdylist_act;
-};
-
-/* for hsplit_0 field of Rx HMC context */
-enum i40e_hmc_obj_rx_hsplit_0 {
-	I40E_HMC_OBJ_RX_HSPLIT_0_NO_SPLIT      = 0,
-	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2      = 1,
-	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP      = 2,
-	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP = 4,
-	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP    = 8,
-};
-
-/* fcoe_cntx and fcoe_filt are for debugging purpose only */
-struct i40e_hmc_obj_fcoe_cntx {
-	u32 rsv[32];
-};
-
-struct i40e_hmc_obj_fcoe_filt {
-	u32 rsv[8];
-};
-
-/* Context sizes for LAN objects */
-enum i40e_hmc_lan_object_size {
-	I40E_HMC_LAN_OBJ_SZ_8   = 0x3,
-	I40E_HMC_LAN_OBJ_SZ_16  = 0x4,
-	I40E_HMC_LAN_OBJ_SZ_32  = 0x5,
-	I40E_HMC_LAN_OBJ_SZ_64  = 0x6,
-	I40E_HMC_LAN_OBJ_SZ_128 = 0x7,
-	I40E_HMC_LAN_OBJ_SZ_256 = 0x8,
-	I40E_HMC_LAN_OBJ_SZ_512 = 0x9,
-};
-
-#define I40E_HMC_L2OBJ_BASE_ALIGNMENT 512
-#define I40E_HMC_OBJ_SIZE_TXQ         128
-#define I40E_HMC_OBJ_SIZE_RXQ         32
-#define I40E_HMC_OBJ_SIZE_FCOE_CNTX   128
-#define I40E_HMC_OBJ_SIZE_FCOE_FILT   64
-
-enum i40e_hmc_lan_rsrc_type {
-	I40E_HMC_LAN_FULL  = 0,
-	I40E_HMC_LAN_TX    = 1,
-	I40E_HMC_LAN_RX    = 2,
-	I40E_HMC_FCOE_CTX  = 3,
-	I40E_HMC_FCOE_FILT = 4,
-	I40E_HMC_LAN_MAX   = 5
-};
-
-enum i40e_hmc_model {
-	I40E_HMC_MODEL_DIRECT_PREFERRED = 0,
-	I40E_HMC_MODEL_DIRECT_ONLY      = 1,
-	I40E_HMC_MODEL_PAGED_ONLY       = 2,
-	I40E_HMC_MODEL_UNKNOWN,
-};
-
-struct i40e_hmc_lan_create_obj_info {
-	struct i40e_hmc_info *hmc_info;
-	u32 rsrc_type;
-	u32 start_idx;
-	u32 count;
-	enum i40e_sd_entry_type entry_type;
-	u64 direct_mode_sz;
-};
-
-struct i40e_hmc_lan_delete_obj_info {
-	struct i40e_hmc_info *hmc_info;
-	u32 rsrc_type;
-	u32 start_idx;
-	u32 count;
-};
-
-i40e_status i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
-					u32 rxq_num, u32 fcoe_cntx_num,
-					u32 fcoe_filt_num);
-i40e_status i40e_configure_lan_hmc(struct i40e_hw *hw,
-					     enum i40e_hmc_model model);
-i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw);
-
-i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
-						      u16 queue);
-i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
-						    u16 queue,
-						    struct i40e_hmc_obj_txq *s);
-i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
-						      u16 queue);
-i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
-						    u16 queue,
-						    struct i40e_hmc_obj_rxq *s);
-
-#endif /* _I40E_LAN_HMC_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
deleted file mode 100644
index 3ddddb4..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ /dev/null

@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_OSDEP_H_
-#define _I40E_OSDEP_H_
-
-#include <linux/types.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/tcp.h>
-#include <linux/pci.h>
-
-/* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-/* File to be the magic between shared code and
- * actual OS primitives
- */
-
-#define hw_dbg(hw, S, A...)	do {} while (0)
-
-#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
-#define rd32(a, reg)		readl((a)->hw_addr + (reg))
-
-#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
-#define rd64(a, reg)		readq((a)->hw_addr + (reg))
-#define i40e_flush(a)		readl((a)->hw_addr + I40E_VFGEN_RSTAT)
-
-/* memory allocation tracking */
-struct i40e_dma_mem {
-	void *va;
-	dma_addr_t pa;
-	u32 size;
-};
-
-#define i40e_allocate_dma_mem(h, m, unused, s, a) \
-	i40evf_allocate_dma_mem_d(h, m, s, a)
-#define i40e_free_dma_mem(h, m) i40evf_free_dma_mem_d(h, m)
-
-struct i40e_virt_mem {
-	void *va;
-	u32 size;
-};
-#define i40e_allocate_virt_mem(h, m, s) i40evf_allocate_virt_mem_d(h, m, s)
-#define i40e_free_virt_mem(h, m) i40evf_free_virt_mem_d(h, m)
-
-#define i40e_debug(h, m, s, ...)  i40evf_debug_d(h, m, s, ##__VA_ARGS__)
-extern void i40evf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
-	__attribute__ ((format(gnu_printf, 3, 4)));
-
-typedef enum i40e_status_code i40e_status;
-#endif /* _I40E_OSDEP_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
deleted file mode 100644
index a358f4b..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ /dev/null

@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_PROTOTYPE_H_
-#define _I40E_PROTOTYPE_H_
-
-#include "i40e_type.h"
-#include "i40e_alloc.h"
-#include <linux/avf/virtchnl.h>
-
-/* Prototypes for shared code functions that are not in
- * the standard function pointer structures.  These are
- * mostly because they are needed even before the init
- * has happened and will assist in the early SW and FW
- * setup.
- */
-
-/* adminq functions */
-i40e_status i40evf_init_adminq(struct i40e_hw *hw);
-i40e_status i40evf_shutdown_adminq(struct i40e_hw *hw);
-void i40e_adminq_init_ring_data(struct i40e_hw *hw);
-i40e_status i40evf_clean_arq_element(struct i40e_hw *hw,
-					     struct i40e_arq_event_info *e,
-					     u16 *events_pending);
-i40e_status i40evf_asq_send_command(struct i40e_hw *hw,
-				struct i40e_aq_desc *desc,
-				void *buff, /* can be NULL */
-				u16  buff_size,
-				struct i40e_asq_cmd_details *cmd_details);
-bool i40evf_asq_done(struct i40e_hw *hw);
-
-/* debug function for adminq */
-void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
-		     void *desc, void *buffer, u16 buf_len);
-
-void i40e_idle_aq(struct i40e_hw *hw);
-void i40evf_resume_aq(struct i40e_hw *hw);
-bool i40evf_check_asq_alive(struct i40e_hw *hw);
-i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
-const char *i40evf_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err);
-const char *i40evf_stat_str(struct i40e_hw *hw, i40e_status stat_err);
-
-i40e_status i40evf_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
-				  bool pf_lut, u8 *lut, u16 lut_size);
-i40e_status i40evf_aq_set_rss_lut(struct i40e_hw *hw, u16 seid,
-				  bool pf_lut, u8 *lut, u16 lut_size);
-i40e_status i40evf_aq_get_rss_key(struct i40e_hw *hw,
-				  u16 seid,
-				  struct i40e_aqc_get_set_rss_key_data *key);
-i40e_status i40evf_aq_set_rss_key(struct i40e_hw *hw,
-				  u16 seid,
-				  struct i40e_aqc_get_set_rss_key_data *key);
-
-i40e_status i40e_set_mac_type(struct i40e_hw *hw);
-
-extern struct i40e_rx_ptype_decoded i40evf_ptype_lookup[];
-
-static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
-{
-	return i40evf_ptype_lookup[ptype];
-}
-
-/* prototype for functions used for SW locks */
-
-/* i40e_common for VF drivers*/
-void i40e_vf_parse_hw_config(struct i40e_hw *hw,
-			     struct virtchnl_vf_resource *msg);
-i40e_status i40e_vf_reset(struct i40e_hw *hw);
-i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
-				enum virtchnl_ops v_opcode,
-				i40e_status v_retval,
-				u8 *msg, u16 msglen,
-				struct i40e_asq_cmd_details *cmd_details);
-i40e_status i40e_set_filter_control(struct i40e_hw *hw,
-				struct i40e_filter_control_settings *settings);
-i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
-				u8 *mac_addr, u16 ethtype, u16 flags,
-				u16 vsi_seid, u16 queue, bool is_add,
-				struct i40e_control_filter_stats *stats,
-				struct i40e_asq_cmd_details *cmd_details);
-void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
-						    u16 vsi_seid);
-i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw,
-				u32 reg_addr, u32 *reg_val,
-				struct i40e_asq_cmd_details *cmd_details);
-u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr);
-i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw,
-				u32 reg_addr, u32 reg_val,
-				struct i40e_asq_cmd_details *cmd_details);
-void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
-i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 reg_val,
-				     struct i40e_asq_cmd_details *cmd_details);
-i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 *reg_val,
-				     struct i40e_asq_cmd_details *cmd_details);
-
-i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page,
-				   u16 reg, u8 phy_addr, u16 *value);
-i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page,
-				    u16 reg, u8 phy_addr, u16 value);
-i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
-				   u8 phy_addr, u16 *value);
-i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
-				    u8 phy_addr, u16 value);
-u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
-i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
-				    u32 time, u32 interval);
-i40e_status i40evf_aq_write_ddp(struct i40e_hw *hw, void *buff,
-				u16 buff_size, u32 track_id,
-				u32 *error_offset, u32 *error_info,
-				struct i40e_asq_cmd_details *
-				cmd_details);
-i40e_status i40evf_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
-				   u16 buff_size, u8 flags,
-				   struct i40e_asq_cmd_details *
-				   cmd_details);
-struct i40e_generic_seg_header *
-i40evf_find_segment_in_package(u32 segment_type,
-			       struct i40e_package_header *pkg_header);
-enum i40e_status_code
-i40evf_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
-		     u32 track_id);
-enum i40e_status_code
-i40evf_add_pinfo_to_list(struct i40e_hw *hw,
-			 struct i40e_profile_segment *profile,
-			 u8 *profile_info_sec, u32 track_id);
-#endif /* _I40E_PROTOTYPE_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
deleted file mode 100644
index 49e1f57..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ /dev/null

@@ -1,313 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_REGISTER_H_
-#define _I40E_REGISTER_H_
-
-#define I40E_VFMSIX_PBA1(_i) (0x00002000 + ((_i) * 4)) /* _i=0...19 */ /* Reset: VFLR */
-#define I40E_VFMSIX_PBA1_MAX_INDEX 19
-#define I40E_VFMSIX_PBA1_PENBIT_SHIFT 0
-#define I40E_VFMSIX_PBA1_PENBIT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_PBA1_PENBIT_SHIFT)
-#define I40E_VFMSIX_TADD1(_i) (0x00002100 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TADD1_MAX_INDEX 639
-#define I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT 0
-#define I40E_VFMSIX_TADD1_MSIXTADD10_MASK I40E_MASK(0x3, I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT)
-#define I40E_VFMSIX_TADD1_MSIXTADD_SHIFT 2
-#define I40E_VFMSIX_TADD1_MSIXTADD_MASK I40E_MASK(0x3FFFFFFF, I40E_VFMSIX_TADD1_MSIXTADD_SHIFT)
-#define I40E_VFMSIX_TMSG1(_i) (0x00002108 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TMSG1_MAX_INDEX 639
-#define I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT 0
-#define I40E_VFMSIX_TMSG1_MSIXTMSG_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT)
-#define I40E_VFMSIX_TUADD1(_i) (0x00002104 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TUADD1_MAX_INDEX 639
-#define I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT 0
-#define I40E_VFMSIX_TUADD1_MSIXTUADD_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT)
-#define I40E_VFMSIX_TVCTRL1(_i) (0x0000210C + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TVCTRL1_MAX_INDEX 639
-#define I40E_VFMSIX_TVCTRL1_MASK_SHIFT 0
-#define I40E_VFMSIX_TVCTRL1_MASK_MASK I40E_MASK(0x1, I40E_VFMSIX_TVCTRL1_MASK_SHIFT)
-#define I40E_VF_ARQBAH1 0x00006000 /* Reset: EMPR */
-#define I40E_VF_ARQBAH1_ARQBAH_SHIFT 0
-#define I40E_VF_ARQBAH1_ARQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAH1_ARQBAH_SHIFT)
-#define I40E_VF_ARQBAL1 0x00006C00 /* Reset: EMPR */
-#define I40E_VF_ARQBAL1_ARQBAL_SHIFT 0
-#define I40E_VF_ARQBAL1_ARQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAL1_ARQBAL_SHIFT)
-#define I40E_VF_ARQH1 0x00007400 /* Reset: EMPR */
-#define I40E_VF_ARQH1_ARQH_SHIFT 0
-#define I40E_VF_ARQH1_ARQH_MASK I40E_MASK(0x3FF, I40E_VF_ARQH1_ARQH_SHIFT)
-#define I40E_VF_ARQLEN1 0x00008000 /* Reset: EMPR */
-#define I40E_VF_ARQLEN1_ARQLEN_SHIFT 0
-#define I40E_VF_ARQLEN1_ARQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ARQLEN1_ARQLEN_SHIFT)
-#define I40E_VF_ARQLEN1_ARQVFE_SHIFT 28
-#define I40E_VF_ARQLEN1_ARQVFE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQVFE_SHIFT)
-#define I40E_VF_ARQLEN1_ARQOVFL_SHIFT 29
-#define I40E_VF_ARQLEN1_ARQOVFL_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQOVFL_SHIFT)
-#define I40E_VF_ARQLEN1_ARQCRIT_SHIFT 30
-#define I40E_VF_ARQLEN1_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
-#define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
-#define I40E_VF_ARQT1 0x00007000 /* Reset: EMPR */
-#define I40E_VF_ARQT1_ARQT_SHIFT 0
-#define I40E_VF_ARQT1_ARQT_MASK I40E_MASK(0x3FF, I40E_VF_ARQT1_ARQT_SHIFT)
-#define I40E_VF_ATQBAH1 0x00007800 /* Reset: EMPR */
-#define I40E_VF_ATQBAH1_ATQBAH_SHIFT 0
-#define I40E_VF_ATQBAH1_ATQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAH1_ATQBAH_SHIFT)
-#define I40E_VF_ATQBAL1 0x00007C00 /* Reset: EMPR */
-#define I40E_VF_ATQBAL1_ATQBAL_SHIFT 0
-#define I40E_VF_ATQBAL1_ATQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAL1_ATQBAL_SHIFT)
-#define I40E_VF_ATQH1 0x00006400 /* Reset: EMPR */
-#define I40E_VF_ATQH1_ATQH_SHIFT 0
-#define I40E_VF_ATQH1_ATQH_MASK I40E_MASK(0x3FF, I40E_VF_ATQH1_ATQH_SHIFT)
-#define I40E_VF_ATQLEN1 0x00006800 /* Reset: EMPR */
-#define I40E_VF_ATQLEN1_ATQLEN_SHIFT 0
-#define I40E_VF_ATQLEN1_ATQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ATQLEN1_ATQLEN_SHIFT)
-#define I40E_VF_ATQLEN1_ATQVFE_SHIFT 28
-#define I40E_VF_ATQLEN1_ATQVFE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQVFE_SHIFT)
-#define I40E_VF_ATQLEN1_ATQOVFL_SHIFT 29
-#define I40E_VF_ATQLEN1_ATQOVFL_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQOVFL_SHIFT)
-#define I40E_VF_ATQLEN1_ATQCRIT_SHIFT 30
-#define I40E_VF_ATQLEN1_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
-#define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
-#define I40E_VF_ATQT1 0x00008400 /* Reset: EMPR */
-#define I40E_VF_ATQT1_ATQT_SHIFT 0
-#define I40E_VF_ATQT1_ATQT_MASK I40E_MASK(0x3FF, I40E_VF_ATQT1_ATQT_SHIFT)
-#define I40E_VFGEN_RSTAT 0x00008800 /* Reset: VFR */
-#define I40E_VFGEN_RSTAT_VFR_STATE_SHIFT 0
-#define I40E_VFGEN_RSTAT_VFR_STATE_MASK I40E_MASK(0x3, I40E_VFGEN_RSTAT_VFR_STATE_SHIFT)
-#define I40E_VFINT_DYN_CTL01 0x00005C00 /* Reset: VFR */
-#define I40E_VFINT_DYN_CTL01_INTENA_SHIFT 0
-#define I40E_VFINT_DYN_CTL01_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_INTENA_SHIFT)
-#define I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT 1
-#define I40E_VFINT_DYN_CTL01_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT)
-#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT 2
-#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT)
-#define I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
-#define I40E_VFINT_DYN_CTL01_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
-#define I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT 5
-#define I40E_VFINT_DYN_CTL01_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT)
-#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT 24
-#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT)
-#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT 25
-#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT)
-#define I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT 31
-#define I40E_VFINT_DYN_CTL01_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT)
-#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
-#define I40E_VFINT_DYN_CTLN1_MAX_INDEX 15
-#define I40E_VFINT_DYN_CTLN1_INTENA_SHIFT 0
-#define I40E_VFINT_DYN_CTLN1_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_INTENA_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT 1
-#define I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
-#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT 3
-#define I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT 5
-#define I40E_VFINT_DYN_CTLN1_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT 24
-#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT 25
-#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT 31
-#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT)
-#define I40E_VFINT_ICR0_ENA1 0x00005000 /* Reset: CORER */
-#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT 25
-#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT)
-#define I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT 30
-#define I40E_VFINT_ICR0_ENA1_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT)
-#define I40E_VFINT_ICR0_ENA1_RSVD_SHIFT 31
-#define I40E_VFINT_ICR0_ENA1_RSVD_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_RSVD_SHIFT)
-#define I40E_VFINT_ICR01 0x00004800 /* Reset: CORER */
-#define I40E_VFINT_ICR01_INTEVENT_SHIFT 0
-#define I40E_VFINT_ICR01_INTEVENT_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_INTEVENT_SHIFT)
-#define I40E_VFINT_ICR01_QUEUE_0_SHIFT 1
-#define I40E_VFINT_ICR01_QUEUE_0_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_0_SHIFT)
-#define I40E_VFINT_ICR01_QUEUE_1_SHIFT 2
-#define I40E_VFINT_ICR01_QUEUE_1_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_1_SHIFT)
-#define I40E_VFINT_ICR01_QUEUE_2_SHIFT 3
-#define I40E_VFINT_ICR01_QUEUE_2_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_2_SHIFT)
-#define I40E_VFINT_ICR01_QUEUE_3_SHIFT 4
-#define I40E_VFINT_ICR01_QUEUE_3_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_3_SHIFT)
-#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT 25
-#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT)
-#define I40E_VFINT_ICR01_ADMINQ_SHIFT 30
-#define I40E_VFINT_ICR01_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_ADMINQ_SHIFT)
-#define I40E_VFINT_ICR01_SWINT_SHIFT 31
-#define I40E_VFINT_ICR01_SWINT_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_SWINT_SHIFT)
-#define I40E_VFINT_ITR01(_i) (0x00004C00 + ((_i) * 4)) /* _i=0...2 */ /* Reset: VFR */
-#define I40E_VFINT_ITR01_MAX_INDEX 2
-#define I40E_VFINT_ITR01_INTERVAL_SHIFT 0
-#define I40E_VFINT_ITR01_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITR01_INTERVAL_SHIFT)
-#define I40E_VFINT_ITRN1(_i, _INTVF) (0x00002800 + ((_i) * 64 + (_INTVF) * 4)) /* _i=0...2, _INTVF=0...15 */ /* Reset: VFR */
-#define I40E_VFINT_ITRN1_MAX_INDEX 2
-#define I40E_VFINT_ITRN1_INTERVAL_SHIFT 0
-#define I40E_VFINT_ITRN1_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITRN1_INTERVAL_SHIFT)
-#define I40E_VFINT_STAT_CTL01 0x00005400 /* Reset: CORER */
-#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT 2
-#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT)
-#define I40E_QRX_TAIL1(_Q) (0x00002000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_QRX_TAIL1_MAX_INDEX 15
-#define I40E_QRX_TAIL1_TAIL_SHIFT 0
-#define I40E_QRX_TAIL1_TAIL_MASK I40E_MASK(0x1FFF, I40E_QRX_TAIL1_TAIL_SHIFT)
-#define I40E_QTX_TAIL1(_Q) (0x00000000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: PFR */
-#define I40E_QTX_TAIL1_MAX_INDEX 15
-#define I40E_QTX_TAIL1_TAIL_SHIFT 0
-#define I40E_QTX_TAIL1_TAIL_MASK I40E_MASK(0x1FFF, I40E_QTX_TAIL1_TAIL_SHIFT)
-#define I40E_VFMSIX_PBA 0x00002000 /* Reset: VFLR */
-#define I40E_VFMSIX_PBA_PENBIT_SHIFT 0
-#define I40E_VFMSIX_PBA_PENBIT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_PBA_PENBIT_SHIFT)
-#define I40E_VFMSIX_TADD(_i) (0x00000000 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TADD_MAX_INDEX 16
-#define I40E_VFMSIX_TADD_MSIXTADD10_SHIFT 0
-#define I40E_VFMSIX_TADD_MSIXTADD10_MASK I40E_MASK(0x3, I40E_VFMSIX_TADD_MSIXTADD10_SHIFT)
-#define I40E_VFMSIX_TADD_MSIXTADD_SHIFT 2
-#define I40E_VFMSIX_TADD_MSIXTADD_MASK I40E_MASK(0x3FFFFFFF, I40E_VFMSIX_TADD_MSIXTADD_SHIFT)
-#define I40E_VFMSIX_TMSG(_i) (0x00000008 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TMSG_MAX_INDEX 16
-#define I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT 0
-#define I40E_VFMSIX_TMSG_MSIXTMSG_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT)
-#define I40E_VFMSIX_TUADD(_i) (0x00000004 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TUADD_MAX_INDEX 16
-#define I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT 0
-#define I40E_VFMSIX_TUADD_MSIXTUADD_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT)
-#define I40E_VFMSIX_TVCTRL(_i) (0x0000000C + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
-#define I40E_VFMSIX_TVCTRL_MAX_INDEX 16
-#define I40E_VFMSIX_TVCTRL_MASK_SHIFT 0
-#define I40E_VFMSIX_TVCTRL_MASK_MASK I40E_MASK(0x1, I40E_VFMSIX_TVCTRL_MASK_SHIFT)
-#define I40E_VFCM_PE_ERRDATA 0x0000DC00 /* Reset: VFR */
-#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT 0
-#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_MASK I40E_MASK(0xF, I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT)
-#define I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT 4
-#define I40E_VFCM_PE_ERRDATA_Q_TYPE_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT)
-#define I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT 8
-#define I40E_VFCM_PE_ERRDATA_Q_NUM_MASK I40E_MASK(0x3FFFF, I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT)
-#define I40E_VFCM_PE_ERRINFO 0x0000D800 /* Reset: VFR */
-#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT 0
-#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_MASK I40E_MASK(0x1, I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT)
-#define I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT 4
-#define I40E_VFCM_PE_ERRINFO_ERROR_INST_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT)
-#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT 8
-#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT)
-#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT 16
-#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT)
-#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT 24
-#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT)
-#define I40E_VFQF_HENA(_i) (0x0000C400 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
-#define I40E_VFQF_HENA_MAX_INDEX 1
-#define I40E_VFQF_HENA_PTYPE_ENA_SHIFT 0
-#define I40E_VFQF_HENA_PTYPE_ENA_MASK I40E_MASK(0xFFFFFFFF, I40E_VFQF_HENA_PTYPE_ENA_SHIFT)
-#define I40E_VFQF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
-#define I40E_VFQF_HKEY_MAX_INDEX 12
-#define I40E_VFQF_HKEY_KEY_0_SHIFT 0
-#define I40E_VFQF_HKEY_KEY_0_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_0_SHIFT)
-#define I40E_VFQF_HKEY_KEY_1_SHIFT 8
-#define I40E_VFQF_HKEY_KEY_1_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_1_SHIFT)
-#define I40E_VFQF_HKEY_KEY_2_SHIFT 16
-#define I40E_VFQF_HKEY_KEY_2_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_2_SHIFT)
-#define I40E_VFQF_HKEY_KEY_3_SHIFT 24
-#define I40E_VFQF_HKEY_KEY_3_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_3_SHIFT)
-#define I40E_VFQF_HLUT(_i) (0x0000D000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_VFQF_HLUT_MAX_INDEX 15
-#define I40E_VFQF_HLUT_LUT0_SHIFT 0
-#define I40E_VFQF_HLUT_LUT0_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT0_SHIFT)
-#define I40E_VFQF_HLUT_LUT1_SHIFT 8
-#define I40E_VFQF_HLUT_LUT1_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT1_SHIFT)
-#define I40E_VFQF_HLUT_LUT2_SHIFT 16
-#define I40E_VFQF_HLUT_LUT2_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT2_SHIFT)
-#define I40E_VFQF_HLUT_LUT3_SHIFT 24
-#define I40E_VFQF_HLUT_LUT3_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT3_SHIFT)
-#define I40E_VFQF_HREGION(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...7 */ /* Reset: CORER */
-#define I40E_VFQF_HREGION_MAX_INDEX 7
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT 0
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT)
-#define I40E_VFQF_HREGION_REGION_0_SHIFT 1
-#define I40E_VFQF_HREGION_REGION_0_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_0_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT 4
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT)
-#define I40E_VFQF_HREGION_REGION_1_SHIFT 5
-#define I40E_VFQF_HREGION_REGION_1_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_1_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT 8
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT)
-#define I40E_VFQF_HREGION_REGION_2_SHIFT 9
-#define I40E_VFQF_HREGION_REGION_2_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_2_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT 12
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT)
-#define I40E_VFQF_HREGION_REGION_3_SHIFT 13
-#define I40E_VFQF_HREGION_REGION_3_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_3_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT 16
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT)
-#define I40E_VFQF_HREGION_REGION_4_SHIFT 17
-#define I40E_VFQF_HREGION_REGION_4_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_4_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT 20
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT)
-#define I40E_VFQF_HREGION_REGION_5_SHIFT 21
-#define I40E_VFQF_HREGION_REGION_5_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_5_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT 24
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT)
-#define I40E_VFQF_HREGION_REGION_6_SHIFT 25
-#define I40E_VFQF_HREGION_REGION_6_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_6_SHIFT)
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT 28
-#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
-#define I40E_VFQF_HREGION_REGION_7_SHIFT 29
-#define I40E_VFQF_HREGION_REGION_7_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_7_SHIFT)
-#define I40E_VFINT_DYN_CTL01_WB_ON_ITR_SHIFT 30
-#define I40E_VFINT_DYN_CTL01_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_WB_ON_ITR_SHIFT)
-#define I40E_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT 30
-#define I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT)
-#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
-#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
-#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
-#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
-#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK I40E_MASK(0x7, I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK I40E_MASK(0x3F, I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
-#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
-#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
-#define I40E_VFPE_CQACK1_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQACK1_PECQID_SHIFT)
-#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
-#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
-#define I40E_VFPE_CQARM1_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQARM1_PECQID_SHIFT)
-#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
-#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
-#define I40E_VFPE_CQPDB1_WQHEAD_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
-#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
-#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
-#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
-#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
-#define I40E_VFPE_IPCONFIG01 0x00008C00 /* Reset: VFR */
-#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
-#define I40E_VFPE_IPCONFIG01_PEIPID_MASK I40E_MASK(0xFFFF, I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK I40E_MASK(0x1, I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
-#define I40E_VFPE_MRTEIDXMASK1 0x00009000 /* Reset: VFR */
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK I40E_MASK(0x1F, I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
-#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400 /* Reset: VFR */
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK I40E_MASK(0xFFFFFF, I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_VFPE_TCPNOWTIMER1 0x0000A800 /* Reset: VFR */
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
-#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
-#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
-#define I40E_VFPE_WQEALLOC1_PEQPID_MASK I40E_MASK(0x3FFFF, I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
-#endif /* _I40E_REGISTER_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
deleted file mode 100644
index 77be070..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ /dev/null

@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_STATUS_H_
-#define _I40E_STATUS_H_
-
-/* Error Codes */
-enum i40e_status_code {
-	I40E_SUCCESS				= 0,
-	I40E_ERR_NVM				= -1,
-	I40E_ERR_NVM_CHECKSUM			= -2,
-	I40E_ERR_PHY				= -3,
-	I40E_ERR_CONFIG				= -4,
-	I40E_ERR_PARAM				= -5,
-	I40E_ERR_MAC_TYPE			= -6,
-	I40E_ERR_UNKNOWN_PHY			= -7,
-	I40E_ERR_LINK_SETUP			= -8,
-	I40E_ERR_ADAPTER_STOPPED		= -9,
-	I40E_ERR_INVALID_MAC_ADDR		= -10,
-	I40E_ERR_DEVICE_NOT_SUPPORTED		= -11,
-	I40E_ERR_MASTER_REQUESTS_PENDING	= -12,
-	I40E_ERR_INVALID_LINK_SETTINGS		= -13,
-	I40E_ERR_AUTONEG_NOT_COMPLETE		= -14,
-	I40E_ERR_RESET_FAILED			= -15,
-	I40E_ERR_SWFW_SYNC			= -16,
-	I40E_ERR_NO_AVAILABLE_VSI		= -17,
-	I40E_ERR_NO_MEMORY			= -18,
-	I40E_ERR_BAD_PTR			= -19,
-	I40E_ERR_RING_FULL			= -20,
-	I40E_ERR_INVALID_PD_ID			= -21,
-	I40E_ERR_INVALID_QP_ID			= -22,
-	I40E_ERR_INVALID_CQ_ID			= -23,
-	I40E_ERR_INVALID_CEQ_ID			= -24,
-	I40E_ERR_INVALID_AEQ_ID			= -25,
-	I40E_ERR_INVALID_SIZE			= -26,
-	I40E_ERR_INVALID_ARP_INDEX		= -27,
-	I40E_ERR_INVALID_FPM_FUNC_ID		= -28,
-	I40E_ERR_QP_INVALID_MSG_SIZE		= -29,
-	I40E_ERR_QP_TOOMANY_WRS_POSTED		= -30,
-	I40E_ERR_INVALID_FRAG_COUNT		= -31,
-	I40E_ERR_QUEUE_EMPTY			= -32,
-	I40E_ERR_INVALID_ALIGNMENT		= -33,
-	I40E_ERR_FLUSHED_QUEUE			= -34,
-	I40E_ERR_INVALID_PUSH_PAGE_INDEX	= -35,
-	I40E_ERR_INVALID_IMM_DATA_SIZE		= -36,
-	I40E_ERR_TIMEOUT			= -37,
-	I40E_ERR_OPCODE_MISMATCH		= -38,
-	I40E_ERR_CQP_COMPL_ERROR		= -39,
-	I40E_ERR_INVALID_VF_ID			= -40,
-	I40E_ERR_INVALID_HMCFN_ID		= -41,
-	I40E_ERR_BACKING_PAGE_ERROR		= -42,
-	I40E_ERR_NO_PBLCHUNKS_AVAILABLE		= -43,
-	I40E_ERR_INVALID_PBLE_INDEX		= -44,
-	I40E_ERR_INVALID_SD_INDEX		= -45,
-	I40E_ERR_INVALID_PAGE_DESC_INDEX	= -46,
-	I40E_ERR_INVALID_SD_TYPE		= -47,
-	I40E_ERR_MEMCPY_FAILED			= -48,
-	I40E_ERR_INVALID_HMC_OBJ_INDEX		= -49,
-	I40E_ERR_INVALID_HMC_OBJ_COUNT		= -50,
-	I40E_ERR_INVALID_SRQ_ARM_LIMIT		= -51,
-	I40E_ERR_SRQ_ENABLED			= -52,
-	I40E_ERR_ADMIN_QUEUE_ERROR		= -53,
-	I40E_ERR_ADMIN_QUEUE_TIMEOUT		= -54,
-	I40E_ERR_BUF_TOO_SHORT			= -55,
-	I40E_ERR_ADMIN_QUEUE_FULL		= -56,
-	I40E_ERR_ADMIN_QUEUE_NO_WORK		= -57,
-	I40E_ERR_BAD_IWARP_CQE			= -58,
-	I40E_ERR_NVM_BLANK_MODE			= -59,
-	I40E_ERR_NOT_IMPLEMENTED		= -60,
-	I40E_ERR_PE_DOORBELL_NOT_ENABLED	= -61,
-	I40E_ERR_DIAG_TEST_FAILED		= -62,
-	I40E_ERR_NOT_READY			= -63,
-	I40E_NOT_SUPPORTED			= -64,
-	I40E_ERR_FIRMWARE_API_VERSION		= -65,
-	I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR	= -66,
-};
-
-#endif /* _I40E_STATUS_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
deleted file mode 100644
index 3b5a63b..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ /dev/null

@@ -1,524 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_TXRX_H_
-#define _I40E_TXRX_H_
-
-/* Interrupt Throttling and Rate Limiting Goodies */
-#define I40E_DEFAULT_IRQ_WORK      256
-
-/* The datasheet for the X710 and XL710 indicate that the maximum value for
- * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
- * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
- * the register value which is divided by 2 lets use the actual values and
- * avoid an excessive amount of translation.
- */
-#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
-#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
-#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
-#define I40E_ITR_100K		    10	/* all values below must be even */
-#define I40E_ITR_50K		    20
-#define I40E_ITR_20K		    50
-#define I40E_ITR_18K		    60
-#define I40E_ITR_8K		   122
-#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
-#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
-#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
-#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
-
-#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
-
-/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
- * the value of the rate limit is non-zero
- */
-#define INTRL_ENA                  BIT(6)
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
-#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
-#define I40E_INTRL_8K              125     /* 8000 ints/sec */
-#define I40E_INTRL_62K             16      /* 62500 ints/sec */
-#define I40E_INTRL_83K             12      /* 83333 ints/sec */
-
-#define I40E_QUEUE_END_OF_LIST 0x7FF
-
-/* this enum matches hardware bits and is meant to be used by DYN_CTLN
- * registers and QINT registers or more generally anywhere in the manual
- * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
- * register but instead is a special value meaning "don't update" ITR0/1/2.
- */
-enum i40e_dyn_idx_t {
-	I40E_IDX_ITR0 = 0,
-	I40E_IDX_ITR1 = 1,
-	I40E_IDX_ITR2 = 2,
-	I40E_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
-};
-
-/* these are indexes into ITRN registers */
-#define I40E_RX_ITR    I40E_IDX_ITR0
-#define I40E_TX_ITR    I40E_IDX_ITR1
-#define I40E_PE_ITR    I40E_IDX_ITR2
-
-/* Supported RSS offloads */
-#define I40E_DEFAULT_RSS_HENA ( \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD))
-
-#define I40E_DEFAULT_RSS_HENA_EXPANDED (I40E_DEFAULT_RSS_HENA | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
-	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
-
-/* Supported Rx Buffer Sizes (a multiple of 128) */
-#define I40E_RXBUFFER_256   256
-#define I40E_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
-#define I40E_RXBUFFER_2048  2048
-#define I40E_RXBUFFER_3072  3072  /* Used for large frames w/ padding */
-#define I40E_MAX_RXBUFFER   9728  /* largest size for single descriptor */
-
-/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
- * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
- * this adds up to 512 bytes of extra data meaning the smallest allocation
- * we could have is 1K.
- * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
- * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
- */
-#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
-#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
-#define i40e_rx_desc i40e_32byte_rx_desc
-
-#define I40E_RX_DMA_ATTR \
-	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
-
-/* Attempt to maximize the headroom available for incoming frames.  We
- * use a 2K buffer for receives and need 1536/1534 to store the data for
- * the frame.  This leaves us with 512 bytes of room.  From that we need
- * to deduct the space needed for the shared info and the padding needed
- * to IP align the frame.
- *
- * Note: For cache line sizes 256 or larger this value is going to end
- *	 up negative.  In these cases we should fall back to the legacy
- *	 receive path.
- */
-#if (PAGE_SIZE < 8192)
-#define I40E_2K_TOO_SMALL_WITH_PADDING \
-((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
-
-static inline int i40e_compute_pad(int rx_buf_len)
-{
-	int page_size, pad_size;
-
-	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
-	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
-
-	return pad_size;
-}
-
-static inline int i40e_skb_pad(void)
-{
-	int rx_buf_len;
-
-	/* If a 2K buffer cannot handle a standard Ethernet frame then
-	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
-	 *
-	 * For a 3K buffer we need to add enough padding to allow for
-	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
-	 * cache-line alignment.
-	 */
-	if (I40E_2K_TOO_SMALL_WITH_PADDING)
-		rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
-	else
-		rx_buf_len = I40E_RXBUFFER_1536;
-
-	/* if needed make room for NET_IP_ALIGN */
-	rx_buf_len -= NET_IP_ALIGN;
-
-	return i40e_compute_pad(rx_buf_len);
-}
-
-#define I40E_SKB_PAD i40e_skb_pad()
-#else
-#define I40E_2K_TOO_SMALL_WITH_PADDING false
-#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#endif
-
-/**
- * i40e_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
-				     const u64 stat_err_bits)
-{
-	return !!(rx_desc->wb.qword1.status_error_len &
-		  cpu_to_le64(stat_err_bits));
-}
-
-/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40E_RX_BUFFER_WRITE	32	/* Must be power of 2 */
-#define I40E_RX_INCREMENT(r, i) \
-	do {					\
-		(i)++;				\
-		if ((i) == (r)->count)		\
-			i = 0;			\
-		r->next_to_clean = i;		\
-	} while (0)
-
-#define I40E_RX_NEXT_DESC(r, i, n)		\
-	do {					\
-		(i)++;				\
-		if ((i) == (r)->count)		\
-			i = 0;			\
-		(n) = I40E_RX_DESC((r), (i));	\
-	} while (0)
-
-#define I40E_RX_NEXT_DESC_PREFETCH(r, i, n)		\
-	do {						\
-		I40E_RX_NEXT_DESC((r), (i), (n));	\
-		prefetch((n));				\
-	} while (0)
-
-#define I40E_MAX_BUFFER_TXD	8
-#define I40E_MIN_TX_LEN		17
-
-/* The size limit for a transmit buffer in a descriptor is (16K - 1).
- * In order to align with the read requests we will align the value to
- * the nearest 4K which represents our maximum read request size.
- */
-#define I40E_MAX_READ_REQ_SIZE		4096
-#define I40E_MAX_DATA_PER_TXD		(16 * 1024 - 1)
-#define I40E_MAX_DATA_PER_TXD_ALIGNED \
-	(I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1))
-
-/**
- * i40e_txd_use_count  - estimate the number of descriptors needed for Tx
- * @size: transmit request size in bytes
- *
- * Due to hardware alignment restrictions (4K alignment), we need to
- * assume that we can have no more than 12K of data per descriptor, even
- * though each descriptor can take up to 16K - 1 bytes of aligned memory.
- * Thus, we need to divide by 12K. But division is slow! Instead,
- * we decompose the operation into shifts and one relatively cheap
- * multiply operation.
- *
- * To divide by 12K, we first divide by 4K, then divide by 3:
- *     To divide by 4K, shift right by 12 bits
- *     To divide by 3, multiply by 85, then divide by 256
- *     (Divide by 256 is done by shifting right by 8 bits)
- * Finally, we add one to round up. Because 256 isn't an exact multiple of
- * 3, we'll underestimate near each multiple of 12K. This is actually more
- * accurate as we have 4K - 1 of wiggle room that we can fit into the last
- * segment.  For our purposes this is accurate out to 1M which is orders of
- * magnitude greater than our largest possible GSO size.
- *
- * This would then be implemented as:
- *     return (((size >> 12) * 85) >> 8) + 1;
- *
- * Since multiplication and division are commutative, we can reorder
- * operations into:
- *     return ((size * 85) >> 20) + 1;
- */
-static inline unsigned int i40e_txd_use_count(unsigned int size)
-{
-	return ((size * 85) >> 20) + 1;
-}
-
-/* Tx Descriptors needed, worst case */
-#define DESC_NEEDED (MAX_SKB_FRAGS + 6)
-#define I40E_MIN_DESC_PENDING	4
-
-#define I40E_TX_FLAGS_HW_VLAN		BIT(1)
-#define I40E_TX_FLAGS_SW_VLAN		BIT(2)
-#define I40E_TX_FLAGS_TSO		BIT(3)
-#define I40E_TX_FLAGS_IPV4		BIT(4)
-#define I40E_TX_FLAGS_IPV6		BIT(5)
-#define I40E_TX_FLAGS_FCCRC		BIT(6)
-#define I40E_TX_FLAGS_FSO		BIT(7)
-#define I40E_TX_FLAGS_FD_SB		BIT(9)
-#define I40E_TX_FLAGS_VXLAN_TUNNEL	BIT(10)
-#define I40E_TX_FLAGS_VLAN_MASK		0xffff0000
-#define I40E_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
-#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT	29
-#define I40E_TX_FLAGS_VLAN_SHIFT	16
-
-struct i40e_tx_buffer {
-	struct i40e_tx_desc *next_to_watch;
-	union {
-		struct sk_buff *skb;
-		void *raw_buf;
-	};
-	unsigned int bytecount;
-	unsigned short gso_segs;
-
-	DEFINE_DMA_UNMAP_ADDR(dma);
-	DEFINE_DMA_UNMAP_LEN(len);
-	u32 tx_flags;
-};
-
-struct i40e_rx_buffer {
-	dma_addr_t dma;
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
-#else
-	__u16 page_offset;
-#endif
-	__u16 pagecnt_bias;
-};
-
-struct i40e_queue_stats {
-	u64 packets;
-	u64 bytes;
-};
-
-struct i40e_tx_queue_stats {
-	u64 restart_queue;
-	u64 tx_busy;
-	u64 tx_done_old;
-	u64 tx_linearize;
-	u64 tx_force_wb;
-	int prev_pkt_ctr;
-	u64 tx_lost_interrupt;
-};
-
-struct i40e_rx_queue_stats {
-	u64 non_eop_descs;
-	u64 alloc_page_failed;
-	u64 alloc_buff_failed;
-	u64 page_reuse_count;
-	u64 realloc_count;
-};
-
-enum i40e_ring_state_t {
-	__I40E_TX_FDIR_INIT_DONE,
-	__I40E_TX_XPS_INIT_DONE,
-	__I40E_RING_STATE_NBITS /* must be last */
-};
-
-/* some useful defines for virtchannel interface, which
- * is the only remaining user of header split
- */
-#define I40E_RX_DTYPE_NO_SPLIT      0
-#define I40E_RX_DTYPE_HEADER_SPLIT  1
-#define I40E_RX_DTYPE_SPLIT_ALWAYS  2
-#define I40E_RX_SPLIT_L2      0x1
-#define I40E_RX_SPLIT_IP      0x2
-#define I40E_RX_SPLIT_TCP_UDP 0x4
-#define I40E_RX_SPLIT_SCTP    0x8
-
-/* struct that defines a descriptor ring, associated with a VSI */
-struct i40e_ring {
-	struct i40e_ring *next;		/* pointer to next ring in q_vector */
-	void *desc;			/* Descriptor ring memory */
-	struct device *dev;		/* Used for DMA mapping */
-	struct net_device *netdev;	/* netdev ring maps to */
-	union {
-		struct i40e_tx_buffer *tx_bi;
-		struct i40e_rx_buffer *rx_bi;
-	};
-	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
-	u16 queue_index;		/* Queue number of ring */
-	u8 dcb_tc;			/* Traffic class of ring */
-	u8 __iomem *tail;
-
-	/* high bit set means dynamic, use accessors routines to read/write.
-	 * hardware only supports 2us resolution for the ITR registers.
-	 * these values always store the USER setting, and must be converted
-	 * before programming to a register.
-	 */
-	u16 itr_setting;
-
-	u16 count;			/* Number of descriptors */
-	u16 reg_idx;			/* HW register index of the ring */
-	u16 rx_buf_len;
-
-	/* used in interrupt processing */
-	u16 next_to_use;
-	u16 next_to_clean;
-
-	u8 atr_sample_rate;
-	u8 atr_count;
-
-	bool ring_active;		/* is ring online or not */
-	bool arm_wb;		/* do something to arm write back */
-	u8 packet_stride;
-
-	u16 flags;
-#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
-#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)
-
-	/* stats structs */
-	struct i40e_queue_stats	stats;
-	struct u64_stats_sync syncp;
-	union {
-		struct i40e_tx_queue_stats tx_stats;
-		struct i40e_rx_queue_stats rx_stats;
-	};
-
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
-
-	struct i40e_vsi *vsi;		/* Backreference to associated VSI */
-	struct i40e_q_vector *q_vector;	/* Backreference to associated vector */
-
-	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
-	struct sk_buff *skb;		/* When i40evf_clean_rx_ring_irq() must
-					 * return before it sees the EOP for
-					 * the current packet, we save that skb
-					 * here and resume receiving this
-					 * packet the next time
-					 * i40evf_clean_rx_ring_irq() is called
-					 * for this ring.
-					 */
-} ____cacheline_internodealigned_in_smp;
-
-static inline bool ring_uses_build_skb(struct i40e_ring *ring)
-{
-	return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
-}
-
-static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
-{
-	ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
-}
-
-static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
-{
-	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
-}
-
-#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
-#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
-#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
-#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
-#define I40E_ITR_ADAPTIVE_BULK		0x0000
-#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
-
-struct i40e_ring_container {
-	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
-	unsigned long next_update;	/* jiffies value of next update */
-	unsigned int total_bytes;	/* total bytes processed this int */
-	unsigned int total_packets;	/* total packets processed this int */
-	u16 count;
-	u16 target_itr;			/* target ITR setting for ring(s) */
-	u16 current_itr;		/* current ITR setting for ring(s) */
-};
-
-/* iterator for handling rings in ring container */
-#define i40e_for_each_ring(pos, head) \
-	for (pos = (head).ring; pos != NULL; pos = pos->next)
-
-static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
-{
-#if (PAGE_SIZE < 8192)
-	if (ring->rx_buf_len > (PAGE_SIZE / 2))
-		return 1;
-#endif
-	return 0;
-}
-
-#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
-
-bool i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
-netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-void i40evf_clean_tx_ring(struct i40e_ring *tx_ring);
-void i40evf_clean_rx_ring(struct i40e_ring *rx_ring);
-int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring);
-int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring);
-void i40evf_free_tx_resources(struct i40e_ring *tx_ring);
-void i40evf_free_rx_resources(struct i40e_ring *rx_ring);
-int i40evf_napi_poll(struct napi_struct *napi, int budget);
-void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw);
-void i40evf_detect_recover_hung(struct i40e_vsi *vsi);
-int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
-bool __i40evf_chk_linearize(struct sk_buff *skb);
-
-/**
- * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed
- * @skb:     send buffer
- * @tx_ring: ring to send buffer on
- *
- * Returns number of data descriptors needed for this skb. Returns 0 to indicate
- * there is not enough descriptors available in this ring since we need at least
- * one descriptor.
- **/
-static inline int i40e_xmit_descriptor_count(struct sk_buff *skb)
-{
-	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
-	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
-	int count = 0, size = skb_headlen(skb);
-
-	for (;;) {
-		count += i40e_txd_use_count(size);
-
-		if (!nr_frags--)
-			break;
-
-		size = skb_frag_size(frag++);
-	}
-
-	return count;
-}
-
-/**
- * i40e_maybe_stop_tx - 1st level check for Tx stop conditions
- * @tx_ring: the ring to be checked
- * @size:    the size buffer we want to assure is available
- *
- * Returns 0 if stop is not needed
- **/
-static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
-{
-	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
-		return 0;
-	return __i40evf_maybe_stop_tx(tx_ring, size);
-}
-
-/**
- * i40e_chk_linearize - Check if there are more than 8 fragments per packet
- * @skb:      send buffer
- * @count:    number of buffers used
- *
- * Note: Our HW can't scatter-gather more than 8 fragments to build
- * a packet on the wire and so we need to figure out the cases where we
- * need to linearize the skb.
- **/
-static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
-{
-	/* Both TSO and single send will work if count is less than 8 */
-	if (likely(count < I40E_MAX_BUFFER_TXD))
-		return false;
-
-	if (skb_is_gso(skb))
-		return __i40evf_chk_linearize(skb);
-
-	/* we can support up to 8 data buffers for a single send */
-	return count != I40E_MAX_BUFFER_TXD;
-}
-/**
- * @ring: Tx ring to find the netdev equivalent of
- **/
-static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
-{
-	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
-}
-#endif /* _I40E_TXRX_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
deleted file mode 100644
index 094387d..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ /dev/null

@@ -1,1496 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40E_TYPE_H_
-#define _I40E_TYPE_H_
-
-#include "i40e_status.h"
-#include "i40e_osdep.h"
-#include "i40e_register.h"
-#include "i40e_adminq.h"
-#include "i40e_hmc.h"
-#include "i40e_lan_hmc.h"
-#include "i40e_devids.h"
-
-/* I40E_MASK is a macro used on 32 bit registers */
-#define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
-
-#define I40E_MAX_VSI_QP			16
-#define I40E_MAX_VF_VSI			3
-#define I40E_MAX_CHAINED_RX_BUFFERS	5
-#define I40E_MAX_PF_UDP_OFFLOAD_PORTS	16
-
-/* Max default timeout in ms, */
-#define I40E_MAX_NVM_TIMEOUT		18000
-
-/* Max timeout in ms for the phy to respond */
-#define I40E_MAX_PHY_TIMEOUT		500
-
-/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
-#define I40E_MS_TO_GTIME(time)		((time) * 1000)
-
-/* forward declaration */
-struct i40e_hw;
-typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
-
-/* Data type manipulation macros. */
-
-#define I40E_DESC_UNUSED(R)	\
-	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
-	(R)->next_to_clean - (R)->next_to_use - 1)
-
-/* bitfields for Tx queue mapping in QTX_CTL */
-#define I40E_QTX_CTL_VF_QUEUE	0x0
-#define I40E_QTX_CTL_VM_QUEUE	0x1
-#define I40E_QTX_CTL_PF_QUEUE	0x2
-
-/* debug masks - set these bits in hw->debug_mask to control output */
-enum i40e_debug_mask {
-	I40E_DEBUG_INIT			= 0x00000001,
-	I40E_DEBUG_RELEASE		= 0x00000002,
-
-	I40E_DEBUG_LINK			= 0x00000010,
-	I40E_DEBUG_PHY			= 0x00000020,
-	I40E_DEBUG_HMC			= 0x00000040,
-	I40E_DEBUG_NVM			= 0x00000080,
-	I40E_DEBUG_LAN			= 0x00000100,
-	I40E_DEBUG_FLOW			= 0x00000200,
-	I40E_DEBUG_DCB			= 0x00000400,
-	I40E_DEBUG_DIAG			= 0x00000800,
-	I40E_DEBUG_FD			= 0x00001000,
-	I40E_DEBUG_PACKAGE		= 0x00002000,
-
-	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
-	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
-	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
-	I40E_DEBUG_AQ_COMMAND		= 0x06000000,
-	I40E_DEBUG_AQ			= 0x0F000000,
-
-	I40E_DEBUG_USER			= 0xF0000000,
-
-	I40E_DEBUG_ALL			= 0xFFFFFFFF
-};
-
-/* These are structs for managing the hardware information and the operations.
- * The structures of function pointers are filled out at init time when we
- * know for sure exactly which hardware we're working with.  This gives us the
- * flexibility of using the same main driver code but adapting to slightly
- * different hardware needs as new parts are developed.  For this architecture,
- * the Firmware and AdminQ are intended to insulate the driver from most of the
- * future changes, but these structures will also do part of the job.
- */
-enum i40e_mac_type {
-	I40E_MAC_UNKNOWN = 0,
-	I40E_MAC_XL710,
-	I40E_MAC_VF,
-	I40E_MAC_X722,
-	I40E_MAC_X722_VF,
-	I40E_MAC_GENERIC,
-};
-
-enum i40e_media_type {
-	I40E_MEDIA_TYPE_UNKNOWN = 0,
-	I40E_MEDIA_TYPE_FIBER,
-	I40E_MEDIA_TYPE_BASET,
-	I40E_MEDIA_TYPE_BACKPLANE,
-	I40E_MEDIA_TYPE_CX4,
-	I40E_MEDIA_TYPE_DA,
-	I40E_MEDIA_TYPE_VIRTUAL
-};
-
-enum i40e_fc_mode {
-	I40E_FC_NONE = 0,
-	I40E_FC_RX_PAUSE,
-	I40E_FC_TX_PAUSE,
-	I40E_FC_FULL,
-	I40E_FC_PFC,
-	I40E_FC_DEFAULT
-};
-
-enum i40e_set_fc_aq_failures {
-	I40E_SET_FC_AQ_FAIL_NONE = 0,
-	I40E_SET_FC_AQ_FAIL_GET = 1,
-	I40E_SET_FC_AQ_FAIL_SET = 2,
-	I40E_SET_FC_AQ_FAIL_UPDATE = 4,
-	I40E_SET_FC_AQ_FAIL_SET_UPDATE = 6
-};
-
-enum i40e_vsi_type {
-	I40E_VSI_MAIN	= 0,
-	I40E_VSI_VMDQ1	= 1,
-	I40E_VSI_VMDQ2	= 2,
-	I40E_VSI_CTRL	= 3,
-	I40E_VSI_FCOE	= 4,
-	I40E_VSI_MIRROR	= 5,
-	I40E_VSI_SRIOV	= 6,
-	I40E_VSI_FDIR	= 7,
-	I40E_VSI_TYPE_UNKNOWN
-};
-
-enum i40e_queue_type {
-	I40E_QUEUE_TYPE_RX = 0,
-	I40E_QUEUE_TYPE_TX,
-	I40E_QUEUE_TYPE_PE_CEQ,
-	I40E_QUEUE_TYPE_UNKNOWN
-};
-
-struct i40e_link_status {
-	enum i40e_aq_phy_type phy_type;
-	enum i40e_aq_link_speed link_speed;
-	u8 link_info;
-	u8 an_info;
-	u8 req_fec_info;
-	u8 fec_info;
-	u8 ext_info;
-	u8 loopback;
-	/* is Link Status Event notification to SW enabled */
-	bool lse_enable;
-	u16 max_frame_size;
-	bool crc_enable;
-	u8 pacing;
-	u8 requested_speeds;
-	u8 module_type[3];
-	/* 1st byte: module identifier */
-#define I40E_MODULE_TYPE_SFP		0x03
-#define I40E_MODULE_TYPE_QSFP		0x0D
-	/* 2nd byte: ethernet compliance codes for 10/40G */
-#define I40E_MODULE_TYPE_40G_ACTIVE	0x01
-#define I40E_MODULE_TYPE_40G_LR4	0x02
-#define I40E_MODULE_TYPE_40G_SR4	0x04
-#define I40E_MODULE_TYPE_40G_CR4	0x08
-#define I40E_MODULE_TYPE_10G_BASE_SR	0x10
-#define I40E_MODULE_TYPE_10G_BASE_LR	0x20
-#define I40E_MODULE_TYPE_10G_BASE_LRM	0x40
-#define I40E_MODULE_TYPE_10G_BASE_ER	0x80
-	/* 3rd byte: ethernet compliance codes for 1G */
-#define I40E_MODULE_TYPE_1000BASE_SX	0x01
-#define I40E_MODULE_TYPE_1000BASE_LX	0x02
-#define I40E_MODULE_TYPE_1000BASE_CX	0x04
-#define I40E_MODULE_TYPE_1000BASE_T	0x08
-};
-
-struct i40e_phy_info {
-	struct i40e_link_status link_info;
-	struct i40e_link_status link_info_old;
-	bool get_link_info;
-	enum i40e_media_type media_type;
-	/* all the phy types the NVM is capable of */
-	u64 phy_types;
-};
-
-#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII)
-#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX)
-#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4)
-#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR)
-#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4)
-#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI)
-#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI)
-#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI)
-#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI)
-#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI)
-#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU)
-#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU)
-#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC)
-#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC)
-#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX)
-#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T)
-#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T)
-#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR)
-#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR)
-#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU)
-#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1)
-#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4)
-#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4)
-#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4)
-#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX)
-#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX)
-#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \
-				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL)
-#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2)
-/* Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some
- * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit
- * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So,
- * a shift is needed to adjust for this with values larger than 31. The
- * only affected values are I40E_PHY_TYPE_25GBASE_*.
- */
-#define I40E_PHY_TYPE_OFFSET 1
-#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \
-					     I40E_PHY_TYPE_OFFSET)
-#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \
-					     I40E_PHY_TYPE_OFFSET)
-#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \
-					     I40E_PHY_TYPE_OFFSET)
-#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
-					     I40E_PHY_TYPE_OFFSET)
-#define I40E_HW_CAP_MAX_GPIO			30
-/* Capabilities of a PF or a VF or the whole device */
-struct i40e_hw_capabilities {
-	u32  switch_mode;
-#define I40E_NVM_IMAGE_TYPE_EVB		0x0
-#define I40E_NVM_IMAGE_TYPE_CLOUD	0x2
-#define I40E_NVM_IMAGE_TYPE_UDP_CLOUD	0x3
-
-	u32  management_mode;
-	u32  mng_protocols_over_mctp;
-#define I40E_MNG_PROTOCOL_PLDM		0x2
-#define I40E_MNG_PROTOCOL_OEM_COMMANDS	0x4
-#define I40E_MNG_PROTOCOL_NCSI		0x8
-	u32  npar_enable;
-	u32  os2bmc;
-	u32  valid_functions;
-	bool sr_iov_1_1;
-	bool vmdq;
-	bool evb_802_1_qbg; /* Edge Virtual Bridging */
-	bool evb_802_1_qbh; /* Bridge Port Extension */
-	bool dcb;
-	bool fcoe;
-	bool iscsi; /* Indicates iSCSI enabled */
-	bool flex10_enable;
-	bool flex10_capable;
-	u32  flex10_mode;
-#define I40E_FLEX10_MODE_UNKNOWN	0x0
-#define I40E_FLEX10_MODE_DCC		0x1
-#define I40E_FLEX10_MODE_DCI		0x2
-
-	u32 flex10_status;
-#define I40E_FLEX10_STATUS_DCC_ERROR	0x1
-#define I40E_FLEX10_STATUS_VC_MODE	0x2
-
-	bool sec_rev_disabled;
-	bool update_disabled;
-#define I40E_NVM_MGMT_SEC_REV_DISABLED	0x1
-#define I40E_NVM_MGMT_UPDATE_DISABLED	0x2
-
-	bool mgmt_cem;
-	bool ieee_1588;
-	bool iwarp;
-	bool fd;
-	u32 fd_filters_guaranteed;
-	u32 fd_filters_best_effort;
-	bool rss;
-	u32 rss_table_size;
-	u32 rss_table_entry_width;
-	bool led[I40E_HW_CAP_MAX_GPIO];
-	bool sdp[I40E_HW_CAP_MAX_GPIO];
-	u32 nvm_image_type;
-	u32 num_flow_director_filters;
-	u32 num_vfs;
-	u32 vf_base_id;
-	u32 num_vsis;
-	u32 num_rx_qp;
-	u32 num_tx_qp;
-	u32 base_queue;
-	u32 num_msix_vectors;
-	u32 num_msix_vectors_vf;
-	u32 led_pin_num;
-	u32 sdp_pin_num;
-	u32 mdio_port_num;
-	u32 mdio_port_mode;
-	u8 rx_buf_chain_len;
-	u32 enabled_tcmap;
-	u32 maxtc;
-	u64 wr_csr_prot;
-};
-
-struct i40e_mac_info {
-	enum i40e_mac_type type;
-	u8 addr[ETH_ALEN];
-	u8 perm_addr[ETH_ALEN];
-	u8 san_addr[ETH_ALEN];
-	u16 max_fcoeq;
-};
-
-enum i40e_aq_resources_ids {
-	I40E_NVM_RESOURCE_ID = 1
-};
-
-enum i40e_aq_resource_access_type {
-	I40E_RESOURCE_READ = 1,
-	I40E_RESOURCE_WRITE
-};
-
-struct i40e_nvm_info {
-	u64 hw_semaphore_timeout; /* usec global time (GTIME resolution) */
-	u32 timeout;              /* [ms] */
-	u16 sr_size;              /* Shadow RAM size in words */
-	bool blank_nvm_mode;      /* is NVM empty (no FW present)*/
-	u16 version;              /* NVM package version */
-	u32 eetrack;              /* NVM data version */
-	u32 oem_ver;              /* OEM version info */
-};
-
-/* definitions used in NVM update support */
-
-enum i40e_nvmupd_cmd {
-	I40E_NVMUPD_INVALID,
-	I40E_NVMUPD_READ_CON,
-	I40E_NVMUPD_READ_SNT,
-	I40E_NVMUPD_READ_LCB,
-	I40E_NVMUPD_READ_SA,
-	I40E_NVMUPD_WRITE_ERA,
-	I40E_NVMUPD_WRITE_CON,
-	I40E_NVMUPD_WRITE_SNT,
-	I40E_NVMUPD_WRITE_LCB,
-	I40E_NVMUPD_WRITE_SA,
-	I40E_NVMUPD_CSUM_CON,
-	I40E_NVMUPD_CSUM_SA,
-	I40E_NVMUPD_CSUM_LCB,
-	I40E_NVMUPD_STATUS,
-	I40E_NVMUPD_EXEC_AQ,
-	I40E_NVMUPD_GET_AQ_RESULT,
-	I40E_NVMUPD_GET_AQ_EVENT,
-};
-
-enum i40e_nvmupd_state {
-	I40E_NVMUPD_STATE_INIT,
-	I40E_NVMUPD_STATE_READING,
-	I40E_NVMUPD_STATE_WRITING,
-	I40E_NVMUPD_STATE_INIT_WAIT,
-	I40E_NVMUPD_STATE_WRITE_WAIT,
-	I40E_NVMUPD_STATE_ERROR
-};
-
-/* nvm_access definition and its masks/shifts need to be accessible to
- * application, core driver, and shared code.  Where is the right file?
- */
-#define I40E_NVM_READ	0xB
-#define I40E_NVM_WRITE	0xC
-
-#define I40E_NVM_MOD_PNT_MASK 0xFF
-
-#define I40E_NVM_TRANS_SHIFT			8
-#define I40E_NVM_TRANS_MASK			(0xf << I40E_NVM_TRANS_SHIFT)
-#define I40E_NVM_PRESERVATION_FLAGS_SHIFT	12
-#define I40E_NVM_PRESERVATION_FLAGS_MASK \
-				(0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
-#define I40E_NVM_PRESERVATION_FLAGS_SELECTED	0x01
-#define I40E_NVM_PRESERVATION_FLAGS_ALL		0x02
-#define I40E_NVM_CON				0x0
-#define I40E_NVM_SNT				0x1
-#define I40E_NVM_LCB				0x2
-#define I40E_NVM_SA				(I40E_NVM_SNT | I40E_NVM_LCB)
-#define I40E_NVM_ERA				0x4
-#define I40E_NVM_CSUM				0x8
-#define I40E_NVM_AQE				0xe
-#define I40E_NVM_EXEC				0xf
-
-#define I40E_NVM_ADAPT_SHIFT	16
-#define I40E_NVM_ADAPT_MASK	(0xffff << I40E_NVM_ADAPT_SHIFT)
-
-#define I40E_NVMUPD_MAX_DATA	4096
-#define I40E_NVMUPD_IFACE_TIMEOUT 2 /* seconds */
-
-struct i40e_nvm_access {
-	u32 command;
-	u32 config;
-	u32 offset;	/* in bytes */
-	u32 data_size;	/* in bytes */
-	u8 data[1];
-};
-
-/* (Q)SFP module access definitions */
-#define I40E_I2C_EEPROM_DEV_ADDR	0xA0
-#define I40E_I2C_EEPROM_DEV_ADDR2	0xA2
-#define I40E_MODULE_TYPE_ADDR		0x00
-#define I40E_MODULE_REVISION_ADDR	0x01
-#define I40E_MODULE_SFF_8472_COMP	0x5E
-#define I40E_MODULE_SFF_8472_SWAP	0x5C
-#define I40E_MODULE_SFF_ADDR_MODE	0x04
-#define I40E_MODULE_TYPE_QSFP_PLUS	0x0D
-#define I40E_MODULE_TYPE_QSFP28		0x11
-#define I40E_MODULE_QSFP_MAX_LEN	640
-
-/* PCI bus types */
-enum i40e_bus_type {
-	i40e_bus_type_unknown = 0,
-	i40e_bus_type_pci,
-	i40e_bus_type_pcix,
-	i40e_bus_type_pci_express,
-	i40e_bus_type_reserved
-};
-
-/* PCI bus speeds */
-enum i40e_bus_speed {
-	i40e_bus_speed_unknown	= 0,
-	i40e_bus_speed_33	= 33,
-	i40e_bus_speed_66	= 66,
-	i40e_bus_speed_100	= 100,
-	i40e_bus_speed_120	= 120,
-	i40e_bus_speed_133	= 133,
-	i40e_bus_speed_2500	= 2500,
-	i40e_bus_speed_5000	= 5000,
-	i40e_bus_speed_8000	= 8000,
-	i40e_bus_speed_reserved
-};
-
-/* PCI bus widths */
-enum i40e_bus_width {
-	i40e_bus_width_unknown	= 0,
-	i40e_bus_width_pcie_x1	= 1,
-	i40e_bus_width_pcie_x2	= 2,
-	i40e_bus_width_pcie_x4	= 4,
-	i40e_bus_width_pcie_x8	= 8,
-	i40e_bus_width_32	= 32,
-	i40e_bus_width_64	= 64,
-	i40e_bus_width_reserved
-};
-
-/* Bus parameters */
-struct i40e_bus_info {
-	enum i40e_bus_speed speed;
-	enum i40e_bus_width width;
-	enum i40e_bus_type type;
-
-	u16 func;
-	u16 device;
-	u16 lan_id;
-	u16 bus_id;
-};
-
-/* Flow control (FC) parameters */
-struct i40e_fc_info {
-	enum i40e_fc_mode current_mode; /* FC mode in effect */
-	enum i40e_fc_mode requested_mode; /* FC mode requested by caller */
-};
-
-#define I40E_MAX_TRAFFIC_CLASS		8
-#define I40E_MAX_USER_PRIORITY		8
-#define I40E_DCBX_MAX_APPS		32
-#define I40E_LLDPDU_SIZE		1500
-
-/* IEEE 802.1Qaz ETS Configuration data */
-struct i40e_ieee_ets_config {
-	u8 willing;
-	u8 cbs;
-	u8 maxtcs;
-	u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
-	u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
-	u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
-};
-
-/* IEEE 802.1Qaz ETS Recommendation data */
-struct i40e_ieee_ets_recommend {
-	u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
-	u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
-	u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
-};
-
-/* IEEE 802.1Qaz PFC Configuration data */
-struct i40e_ieee_pfc_config {
-	u8 willing;
-	u8 mbc;
-	u8 pfccap;
-	u8 pfcenable;
-};
-
-/* IEEE 802.1Qaz Application Priority data */
-struct i40e_ieee_app_priority_table {
-	u8  priority;
-	u8  selector;
-	u16 protocolid;
-};
-
-struct i40e_dcbx_config {
-	u32 numapps;
-	u32 tlv_status; /* CEE mode TLV status */
-	struct i40e_ieee_ets_config etscfg;
-	struct i40e_ieee_ets_recommend etsrec;
-	struct i40e_ieee_pfc_config pfc;
-	struct i40e_ieee_app_priority_table app[I40E_DCBX_MAX_APPS];
-};
-
-/* Port hardware description */
-struct i40e_hw {
-	u8 __iomem *hw_addr;
-	void *back;
-
-	/* subsystem structs */
-	struct i40e_phy_info phy;
-	struct i40e_mac_info mac;
-	struct i40e_bus_info bus;
-	struct i40e_nvm_info nvm;
-	struct i40e_fc_info fc;
-
-	/* pci info */
-	u16 device_id;
-	u16 vendor_id;
-	u16 subsystem_device_id;
-	u16 subsystem_vendor_id;
-	u8 revision_id;
-	u8 port;
-	bool adapter_stopped;
-
-	/* capabilities for entire device and PCI func */
-	struct i40e_hw_capabilities dev_caps;
-	struct i40e_hw_capabilities func_caps;
-
-	/* Flow Director shared filter space */
-	u16 fdir_shared_filter_count;
-
-	/* device profile info */
-	u8  pf_id;
-	u16 main_vsi_seid;
-
-	/* for multi-function MACs */
-	u16 partition_id;
-	u16 num_partitions;
-	u16 num_ports;
-
-	/* Closest numa node to the device */
-	u16 numa_node;
-
-	/* Admin Queue info */
-	struct i40e_adminq_info aq;
-
-	/* state of nvm update process */
-	enum i40e_nvmupd_state nvmupd_state;
-	struct i40e_aq_desc nvm_wb_desc;
-	struct i40e_aq_desc nvm_aq_event_desc;
-	struct i40e_virt_mem nvm_buff;
-	bool nvm_release_on_done;
-	u16 nvm_wait_opcode;
-
-	/* HMC info */
-	struct i40e_hmc_info hmc; /* HMC info struct */
-
-	/* LLDP/DCBX Status */
-	u16 dcbx_status;
-
-#define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
-#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
-
-	/* DCBX info */
-	struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */
-	struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
-	struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
-
-	/* Used in set switch config AQ command */
-	u16 switch_tag;
-	u16 first_tag;
-	u16 second_tag;
-
-	/* debug mask */
-	u32 debug_mask;
-	char err_str[16];
-};
-
-static inline bool i40e_is_vf(struct i40e_hw *hw)
-{
-	return (hw->mac.type == I40E_MAC_VF ||
-		hw->mac.type == I40E_MAC_X722_VF);
-}
-
-struct i40e_driver_version {
-	u8 major_version;
-	u8 minor_version;
-	u8 build_version;
-	u8 subbuild_version;
-	u8 driver_string[32];
-};
-
-/* RX Descriptors */
-union i40e_16byte_rx_desc {
-	struct {
-		__le64 pkt_addr; /* Packet buffer address */
-		__le64 hdr_addr; /* Header buffer address */
-	} read;
-	struct {
-		struct {
-			struct {
-				union {
-					__le16 mirroring_status;
-					__le16 fcoe_ctx_id;
-				} mirr_fcoe;
-				__le16 l2tag1;
-			} lo_dword;
-			union {
-				__le32 rss; /* RSS Hash */
-				__le32 fd_id; /* Flow director filter id */
-				__le32 fcoe_param; /* FCoE DDP Context id */
-			} hi_dword;
-		} qword0;
-		struct {
-			/* ext status/error/pktype/length */
-			__le64 status_error_len;
-		} qword1;
-	} wb;  /* writeback */
-};
-
-union i40e_32byte_rx_desc {
-	struct {
-		__le64  pkt_addr; /* Packet buffer address */
-		__le64  hdr_addr; /* Header buffer address */
-			/* bit 0 of hdr_buffer_addr is DD bit */
-		__le64  rsvd1;
-		__le64  rsvd2;
-	} read;
-	struct {
-		struct {
-			struct {
-				union {
-					__le16 mirroring_status;
-					__le16 fcoe_ctx_id;
-				} mirr_fcoe;
-				__le16 l2tag1;
-			} lo_dword;
-			union {
-				__le32 rss; /* RSS Hash */
-				__le32 fcoe_param; /* FCoE DDP Context id */
-				/* Flow director filter id in case of
-				 * Programming status desc WB
-				 */
-				__le32 fd_id;
-			} hi_dword;
-		} qword0;
-		struct {
-			/* status/error/pktype/length */
-			__le64 status_error_len;
-		} qword1;
-		struct {
-			__le16 ext_status; /* extended status */
-			__le16 rsvd;
-			__le16 l2tag2_1;
-			__le16 l2tag2_2;
-		} qword2;
-		struct {
-			union {
-				__le32 flex_bytes_lo;
-				__le32 pe_status;
-			} lo_dword;
-			union {
-				__le32 flex_bytes_hi;
-				__le32 fd_id;
-			} hi_dword;
-		} qword3;
-	} wb;  /* writeback */
-};
-
-enum i40e_rx_desc_status_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_DESC_STATUS_DD_SHIFT		= 0,
-	I40E_RX_DESC_STATUS_EOF_SHIFT		= 1,
-	I40E_RX_DESC_STATUS_L2TAG1P_SHIFT	= 2,
-	I40E_RX_DESC_STATUS_L3L4P_SHIFT		= 3,
-	I40E_RX_DESC_STATUS_CRCP_SHIFT		= 4,
-	I40E_RX_DESC_STATUS_TSYNINDX_SHIFT	= 5, /* 2 BITS */
-	I40E_RX_DESC_STATUS_TSYNVALID_SHIFT	= 7,
-	/* Note: Bit 8 is reserved in X710 and XL710 */
-	I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT	= 8,
-	I40E_RX_DESC_STATUS_UMBCAST_SHIFT	= 9, /* 2 BITS */
-	I40E_RX_DESC_STATUS_FLM_SHIFT		= 11,
-	I40E_RX_DESC_STATUS_FLTSTAT_SHIFT	= 12, /* 2 BITS */
-	I40E_RX_DESC_STATUS_LPBK_SHIFT		= 14,
-	I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
-	I40E_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
-	/* Note: For non-tunnel packets INT_UDP_0 is the right status for
-	 * UDP header
-	 */
-	I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT	= 18,
-	I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
-};
-
-#define I40E_RXD_QW1_STATUS_SHIFT	0
-#define I40E_RXD_QW1_STATUS_MASK	((BIT(I40E_RX_DESC_STATUS_LAST) - 1) \
-					 << I40E_RXD_QW1_STATUS_SHIFT)
-
-#define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT   I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
-#define I40E_RXD_QW1_STATUS_TSYNINDX_MASK	(0x3UL << \
-					     I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
-
-#define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT  I40E_RX_DESC_STATUS_TSYNVALID_SHIFT
-#define I40E_RXD_QW1_STATUS_TSYNVALID_MASK \
-				    BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT)
-
-enum i40e_rx_desc_fltstat_values {
-	I40E_RX_DESC_FLTSTAT_NO_DATA	= 0,
-	I40E_RX_DESC_FLTSTAT_RSV_FD_ID	= 1, /* 16byte desc? FD_ID : RSV */
-	I40E_RX_DESC_FLTSTAT_RSV	= 2,
-	I40E_RX_DESC_FLTSTAT_RSS_HASH	= 3,
-};
-
-#define I40E_RXD_QW1_ERROR_SHIFT	19
-#define I40E_RXD_QW1_ERROR_MASK		(0xFFUL << I40E_RXD_QW1_ERROR_SHIFT)
-
-enum i40e_rx_desc_error_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_DESC_ERROR_RXE_SHIFT		= 0,
-	I40E_RX_DESC_ERROR_RECIPE_SHIFT		= 1,
-	I40E_RX_DESC_ERROR_HBO_SHIFT		= 2,
-	I40E_RX_DESC_ERROR_L3L4E_SHIFT		= 3, /* 3 BITS */
-	I40E_RX_DESC_ERROR_IPE_SHIFT		= 3,
-	I40E_RX_DESC_ERROR_L4E_SHIFT		= 4,
-	I40E_RX_DESC_ERROR_EIPE_SHIFT		= 5,
-	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
-	I40E_RX_DESC_ERROR_PPRS_SHIFT		= 7
-};
-
-enum i40e_rx_desc_error_l3l4e_fcoe_masks {
-	I40E_RX_DESC_ERROR_L3L4E_NONE		= 0,
-	I40E_RX_DESC_ERROR_L3L4E_PROT		= 1,
-	I40E_RX_DESC_ERROR_L3L4E_FC		= 2,
-	I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR	= 3,
-	I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN	= 4
-};
-
-#define I40E_RXD_QW1_PTYPE_SHIFT	30
-#define I40E_RXD_QW1_PTYPE_MASK		(0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT)
-
-/* Packet type non-ip values */
-enum i40e_rx_l2_ptype {
-	I40E_RX_PTYPE_L2_RESERVED			= 0,
-	I40E_RX_PTYPE_L2_MAC_PAY2			= 1,
-	I40E_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
-	I40E_RX_PTYPE_L2_FIP_PAY2			= 3,
-	I40E_RX_PTYPE_L2_OUI_PAY2			= 4,
-	I40E_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
-	I40E_RX_PTYPE_L2_LLDP_PAY2			= 6,
-	I40E_RX_PTYPE_L2_ECP_PAY2			= 7,
-	I40E_RX_PTYPE_L2_EVB_PAY2			= 8,
-	I40E_RX_PTYPE_L2_QCN_PAY2			= 9,
-	I40E_RX_PTYPE_L2_EAPOL_PAY2			= 10,
-	I40E_RX_PTYPE_L2_ARP				= 11,
-	I40E_RX_PTYPE_L2_FCOE_PAY3			= 12,
-	I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
-	I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
-	I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
-	I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
-	I40E_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
-	I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
-	I40E_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
-	I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
-	I40E_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
-	I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
-};
-
-struct i40e_rx_ptype_decoded {
-	u32 ptype:8;
-	u32 known:1;
-	u32 outer_ip:1;
-	u32 outer_ip_ver:1;
-	u32 outer_frag:1;
-	u32 tunnel_type:3;
-	u32 tunnel_end_prot:2;
-	u32 tunnel_end_frag:1;
-	u32 inner_prot:4;
-	u32 payload_layer:3;
-};
-
-enum i40e_rx_ptype_outer_ip {
-	I40E_RX_PTYPE_OUTER_L2	= 0,
-	I40E_RX_PTYPE_OUTER_IP	= 1
-};
-
-enum i40e_rx_ptype_outer_ip_ver {
-	I40E_RX_PTYPE_OUTER_NONE	= 0,
-	I40E_RX_PTYPE_OUTER_IPV4	= 0,
-	I40E_RX_PTYPE_OUTER_IPV6	= 1
-};
-
-enum i40e_rx_ptype_outer_fragmented {
-	I40E_RX_PTYPE_NOT_FRAG	= 0,
-	I40E_RX_PTYPE_FRAG	= 1
-};
-
-enum i40e_rx_ptype_tunnel_type {
-	I40E_RX_PTYPE_TUNNEL_NONE		= 0,
-	I40E_RX_PTYPE_TUNNEL_IP_IP		= 1,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
-	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
-};
-
-enum i40e_rx_ptype_tunnel_end_prot {
-	I40E_RX_PTYPE_TUNNEL_END_NONE	= 0,
-	I40E_RX_PTYPE_TUNNEL_END_IPV4	= 1,
-	I40E_RX_PTYPE_TUNNEL_END_IPV6	= 2,
-};
-
-enum i40e_rx_ptype_inner_prot {
-	I40E_RX_PTYPE_INNER_PROT_NONE		= 0,
-	I40E_RX_PTYPE_INNER_PROT_UDP		= 1,
-	I40E_RX_PTYPE_INNER_PROT_TCP		= 2,
-	I40E_RX_PTYPE_INNER_PROT_SCTP		= 3,
-	I40E_RX_PTYPE_INNER_PROT_ICMP		= 4,
-	I40E_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
-};
-
-enum i40e_rx_ptype_payload_layer {
-	I40E_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
-	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
-};
-
-#define I40E_RXD_QW1_LENGTH_PBUF_SHIFT	38
-#define I40E_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
-					 I40E_RXD_QW1_LENGTH_PBUF_SHIFT)
-
-#define I40E_RXD_QW1_LENGTH_HBUF_SHIFT	52
-#define I40E_RXD_QW1_LENGTH_HBUF_MASK	(0x7FFULL << \
-					 I40E_RXD_QW1_LENGTH_HBUF_SHIFT)
-
-#define I40E_RXD_QW1_LENGTH_SPH_SHIFT	63
-#define I40E_RXD_QW1_LENGTH_SPH_MASK	BIT_ULL(I40E_RXD_QW1_LENGTH_SPH_SHIFT)
-
-enum i40e_rx_desc_ext_status_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT	= 0,
-	I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
-	I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
-	I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
-	I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
-	I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
-	I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
-};
-
-enum i40e_rx_desc_pe_status_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_DESC_PE_STATUS_QPID_SHIFT	= 0, /* 18 BITS */
-	I40E_RX_DESC_PE_STATUS_L4PORT_SHIFT	= 0, /* 16 BITS */
-	I40E_RX_DESC_PE_STATUS_IPINDEX_SHIFT	= 16, /* 8 BITS */
-	I40E_RX_DESC_PE_STATUS_QPIDHIT_SHIFT	= 24,
-	I40E_RX_DESC_PE_STATUS_APBVTHIT_SHIFT	= 25,
-	I40E_RX_DESC_PE_STATUS_PORTV_SHIFT	= 26,
-	I40E_RX_DESC_PE_STATUS_URG_SHIFT	= 27,
-	I40E_RX_DESC_PE_STATUS_IPFRAG_SHIFT	= 28,
-	I40E_RX_DESC_PE_STATUS_IPOPT_SHIFT	= 29
-};
-
-#define I40E_RX_PROG_STATUS_DESC_LENGTH_SHIFT		38
-#define I40E_RX_PROG_STATUS_DESC_LENGTH			0x2000000
-
-#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT	2
-#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK	(0x7UL << \
-				I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
-
-#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT	19
-#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK		(0x3FUL << \
-				I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
-
-enum i40e_rx_prog_status_desc_status_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_PROG_STATUS_DESC_DD_SHIFT	= 0,
-	I40E_RX_PROG_STATUS_DESC_PROG_ID_SHIFT	= 2 /* 3 BITS */
-};
-
-enum i40e_rx_prog_status_desc_prog_id_masks {
-	I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS	= 1,
-	I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS	= 2,
-	I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS	= 4,
-};
-
-enum i40e_rx_prog_status_desc_error_bits {
-	/* Note: These are predefined bit offsets */
-	I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT	= 0,
-	I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT	= 1,
-	I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT	= 2,
-	I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT	= 3
-};
-
-/* TX Descriptor */
-struct i40e_tx_desc {
-	__le64 buffer_addr; /* Address of descriptor's data buf */
-	__le64 cmd_type_offset_bsz;
-};
-
-#define I40E_TXD_QW1_DTYPE_SHIFT	0
-#define I40E_TXD_QW1_DTYPE_MASK		(0xFUL << I40E_TXD_QW1_DTYPE_SHIFT)
-
-enum i40e_tx_desc_dtype_value {
-	I40E_TX_DESC_DTYPE_DATA		= 0x0,
-	I40E_TX_DESC_DTYPE_NOP		= 0x1, /* same as Context desc */
-	I40E_TX_DESC_DTYPE_CONTEXT	= 0x1,
-	I40E_TX_DESC_DTYPE_FCOE_CTX	= 0x2,
-	I40E_TX_DESC_DTYPE_FILTER_PROG	= 0x8,
-	I40E_TX_DESC_DTYPE_DDP_CTX	= 0x9,
-	I40E_TX_DESC_DTYPE_FLEX_DATA	= 0xB,
-	I40E_TX_DESC_DTYPE_FLEX_CTX_1	= 0xC,
-	I40E_TX_DESC_DTYPE_FLEX_CTX_2	= 0xD,
-	I40E_TX_DESC_DTYPE_DESC_DONE	= 0xF
-};
-
-#define I40E_TXD_QW1_CMD_SHIFT	4
-#define I40E_TXD_QW1_CMD_MASK	(0x3FFUL << I40E_TXD_QW1_CMD_SHIFT)
-
-enum i40e_tx_desc_cmd_bits {
-	I40E_TX_DESC_CMD_EOP			= 0x0001,
-	I40E_TX_DESC_CMD_RS			= 0x0002,
-	I40E_TX_DESC_CMD_ICRC			= 0x0004,
-	I40E_TX_DESC_CMD_IL2TAG1		= 0x0008,
-	I40E_TX_DESC_CMD_DUMMY			= 0x0010,
-	I40E_TX_DESC_CMD_IIPT_NONIP		= 0x0000, /* 2 BITS */
-	I40E_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
-	I40E_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
-	I40E_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
-	I40E_TX_DESC_CMD_FCOET			= 0x0080,
-	I40E_TX_DESC_CMD_L4T_EOFT_UNK		= 0x0000, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_EOF_N		= 0x0000, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_EOF_T		= 0x0100, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI	= 0x0200, /* 2 BITS */
-	I40E_TX_DESC_CMD_L4T_EOFT_EOF_A		= 0x0300, /* 2 BITS */
-};
-
-#define I40E_TXD_QW1_OFFSET_SHIFT	16
-#define I40E_TXD_QW1_OFFSET_MASK	(0x3FFFFULL << \
-					 I40E_TXD_QW1_OFFSET_SHIFT)
-
-enum i40e_tx_desc_length_fields {
-	/* Note: These are predefined bit offsets */
-	I40E_TX_DESC_LENGTH_MACLEN_SHIFT	= 0, /* 7 BITS */
-	I40E_TX_DESC_LENGTH_IPLEN_SHIFT		= 7, /* 7 BITS */
-	I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT	= 14 /* 4 BITS */
-};
-
-#define I40E_TXD_QW1_TX_BUF_SZ_SHIFT	34
-#define I40E_TXD_QW1_TX_BUF_SZ_MASK	(0x3FFFULL << \
-					 I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
-
-#define I40E_TXD_QW1_L2TAG1_SHIFT	48
-#define I40E_TXD_QW1_L2TAG1_MASK	(0xFFFFULL << I40E_TXD_QW1_L2TAG1_SHIFT)
-
-/* Context descriptors */
-struct i40e_tx_context_desc {
-	__le32 tunneling_params;
-	__le16 l2tag2;
-	__le16 rsvd;
-	__le64 type_cmd_tso_mss;
-};
-
-#define I40E_TXD_CTX_QW1_DTYPE_SHIFT	0
-#define I40E_TXD_CTX_QW1_DTYPE_MASK	(0xFUL << I40E_TXD_CTX_QW1_DTYPE_SHIFT)
-
-#define I40E_TXD_CTX_QW1_CMD_SHIFT	4
-#define I40E_TXD_CTX_QW1_CMD_MASK	(0xFFFFUL << I40E_TXD_CTX_QW1_CMD_SHIFT)
-
-enum i40e_tx_ctx_desc_cmd_bits {
-	I40E_TX_CTX_DESC_TSO		= 0x01,
-	I40E_TX_CTX_DESC_TSYN		= 0x02,
-	I40E_TX_CTX_DESC_IL2TAG2	= 0x04,
-	I40E_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
-	I40E_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
-	I40E_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
-	I40E_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
-	I40E_TX_CTX_DESC_SWTCH_VSI	= 0x30,
-	I40E_TX_CTX_DESC_SWPE		= 0x40
-};
-
-#define I40E_TXD_CTX_QW1_TSO_LEN_SHIFT	30
-#define I40E_TXD_CTX_QW1_TSO_LEN_MASK	(0x3FFFFULL << \
-					 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT)
-
-#define I40E_TXD_CTX_QW1_MSS_SHIFT	50
-#define I40E_TXD_CTX_QW1_MSS_MASK	(0x3FFFULL << \
-					 I40E_TXD_CTX_QW1_MSS_SHIFT)
-
-#define I40E_TXD_CTX_QW1_VSI_SHIFT	50
-#define I40E_TXD_CTX_QW1_VSI_MASK	(0x1FFULL << I40E_TXD_CTX_QW1_VSI_SHIFT)
-
-#define I40E_TXD_CTX_QW0_EXT_IP_SHIFT	0
-#define I40E_TXD_CTX_QW0_EXT_IP_MASK	(0x3ULL << \
-					 I40E_TXD_CTX_QW0_EXT_IP_SHIFT)
-
-enum i40e_tx_ctx_desc_eipt_offload {
-	I40E_TX_CTX_EXT_IP_NONE		= 0x0,
-	I40E_TX_CTX_EXT_IP_IPV6		= 0x1,
-	I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM	= 0x2,
-	I40E_TX_CTX_EXT_IP_IPV4		= 0x3
-};
-
-#define I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT	2
-#define I40E_TXD_CTX_QW0_EXT_IPLEN_MASK	(0x3FULL << \
-					 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT)
-
-#define I40E_TXD_CTX_QW0_NATT_SHIFT	9
-#define I40E_TXD_CTX_QW0_NATT_MASK	(0x3ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
-
-#define I40E_TXD_CTX_UDP_TUNNELING	BIT_ULL(I40E_TXD_CTX_QW0_NATT_SHIFT)
-#define I40E_TXD_CTX_GRE_TUNNELING	(0x2ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
-
-#define I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT	11
-#define I40E_TXD_CTX_QW0_EIP_NOINC_MASK \
-				       BIT_ULL(I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT)
-
-#define I40E_TXD_CTX_EIP_NOINC_IPID_CONST	I40E_TXD_CTX_QW0_EIP_NOINC_MASK
-
-#define I40E_TXD_CTX_QW0_NATLEN_SHIFT	12
-#define I40E_TXD_CTX_QW0_NATLEN_MASK	(0X7FULL << \
-					 I40E_TXD_CTX_QW0_NATLEN_SHIFT)
-
-#define I40E_TXD_CTX_QW0_DECTTL_SHIFT	19
-#define I40E_TXD_CTX_QW0_DECTTL_MASK	(0xFULL << \
-					 I40E_TXD_CTX_QW0_DECTTL_SHIFT)
-
-#define I40E_TXD_CTX_QW0_L4T_CS_SHIFT	23
-#define I40E_TXD_CTX_QW0_L4T_CS_MASK	BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT)
-struct i40e_filter_program_desc {
-	__le32 qindex_flex_ptype_vsi;
-	__le32 rsvd;
-	__le32 dtype_cmd_cntindex;
-	__le32 fd_id;
-};
-#define I40E_TXD_FLTR_QW0_QINDEX_SHIFT	0
-#define I40E_TXD_FLTR_QW0_QINDEX_MASK	(0x7FFUL << \
-					 I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
-#define I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT	11
-#define I40E_TXD_FLTR_QW0_FLEXOFF_MASK	(0x7UL << \
-					 I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
-#define I40E_TXD_FLTR_QW0_PCTYPE_SHIFT	17
-#define I40E_TXD_FLTR_QW0_PCTYPE_MASK	(0x3FUL << \
-					 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
-
-/* Packet Classifier Types for filters */
-enum i40e_filter_pctype {
-	/* Note: Values 0-28 are reserved for future use.
-	 * Value 29, 30, 32 are not supported on XL710 and X710.
-	 */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
-	I40E_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
-	I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK	= 32,
-	I40E_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
-	I40E_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
-	I40E_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
-	I40E_FILTER_PCTYPE_FRAG_IPV4			= 36,
-	/* Note: Values 37-38 are reserved for future use.
-	 * Value 39, 40, 42 are not supported on XL710 and X710.
-	 */
-	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
-	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
-	I40E_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
-	I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK	= 42,
-	I40E_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
-	I40E_FILTER_PCTYPE_NONF_IPV6_SCTP		= 44,
-	I40E_FILTER_PCTYPE_NONF_IPV6_OTHER		= 45,
-	I40E_FILTER_PCTYPE_FRAG_IPV6			= 46,
-	/* Note: Value 47 is reserved for future use */
-	I40E_FILTER_PCTYPE_FCOE_OX			= 48,
-	I40E_FILTER_PCTYPE_FCOE_RX			= 49,
-	I40E_FILTER_PCTYPE_FCOE_OTHER			= 50,
-	/* Note: Values 51-62 are reserved for future use */
-	I40E_FILTER_PCTYPE_L2_PAYLOAD			= 63,
-};
-
-enum i40e_filter_program_desc_dest {
-	I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET		= 0x0,
-	I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX	= 0x1,
-	I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_OTHER	= 0x2,
-};
-
-enum i40e_filter_program_desc_fd_status {
-	I40E_FILTER_PROGRAM_DESC_FD_STATUS_NONE			= 0x0,
-	I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID		= 0x1,
-	I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID_4FLEX_BYTES	= 0x2,
-	I40E_FILTER_PROGRAM_DESC_FD_STATUS_8FLEX_BYTES		= 0x3,
-};
-
-#define I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT	23
-#define I40E_TXD_FLTR_QW0_DEST_VSI_MASK	(0x1FFUL << \
-					 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
-
-#define I40E_TXD_FLTR_QW1_CMD_SHIFT	4
-#define I40E_TXD_FLTR_QW1_CMD_MASK	(0xFFFFULL << \
-					 I40E_TXD_FLTR_QW1_CMD_SHIFT)
-
-#define I40E_TXD_FLTR_QW1_PCMD_SHIFT	(0x0ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
-#define I40E_TXD_FLTR_QW1_PCMD_MASK	(0x7ULL << I40E_TXD_FLTR_QW1_PCMD_SHIFT)
-
-enum i40e_filter_program_desc_pcmd {
-	I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE	= 0x1,
-	I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE		= 0x2,
-};
-
-#define I40E_TXD_FLTR_QW1_DEST_SHIFT	(0x3ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
-#define I40E_TXD_FLTR_QW1_DEST_MASK	(0x3ULL << I40E_TXD_FLTR_QW1_DEST_SHIFT)
-
-#define I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT	(0x7ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
-#define I40E_TXD_FLTR_QW1_CNT_ENA_MASK	BIT_ULL(I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT)
-
-#define I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT	(0x9ULL + \
-						 I40E_TXD_FLTR_QW1_CMD_SHIFT)
-#define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
-					  I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
-
-#define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
-#define I40E_TXD_FLTR_QW1_CNTINDEX_MASK	(0x1FFUL << \
-					 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
-
-enum i40e_filter_type {
-	I40E_FLOW_DIRECTOR_FLTR = 0,
-	I40E_PE_QUAD_HASH_FLTR = 1,
-	I40E_ETHERTYPE_FLTR,
-	I40E_FCOE_CTX_FLTR,
-	I40E_MAC_VLAN_FLTR,
-	I40E_HASH_FLTR
-};
-
-struct i40e_vsi_context {
-	u16 seid;
-	u16 uplink_seid;
-	u16 vsi_number;
-	u16 vsis_allocated;
-	u16 vsis_unallocated;
-	u16 flags;
-	u8 pf_num;
-	u8 vf_num;
-	u8 connection_type;
-	struct i40e_aqc_vsi_properties_data info;
-};
-
-struct i40e_veb_context {
-	u16 seid;
-	u16 uplink_seid;
-	u16 veb_number;
-	u16 vebs_allocated;
-	u16 vebs_unallocated;
-	u16 flags;
-	struct i40e_aqc_get_veb_parameters_completion info;
-};
-
-/* Statistics collected by each port, VSI, VEB, and S-channel */
-struct i40e_eth_stats {
-	u64 rx_bytes;			/* gorc */
-	u64 rx_unicast;			/* uprc */
-	u64 rx_multicast;		/* mprc */
-	u64 rx_broadcast;		/* bprc */
-	u64 rx_discards;		/* rdpc */
-	u64 rx_unknown_protocol;	/* rupp */
-	u64 tx_bytes;			/* gotc */
-	u64 tx_unicast;			/* uptc */
-	u64 tx_multicast;		/* mptc */
-	u64 tx_broadcast;		/* bptc */
-	u64 tx_discards;		/* tdpc */
-	u64 tx_errors;			/* tepc */
-};
-
-/* Statistics collected per VEB per TC */
-struct i40e_veb_tc_stats {
-	u64 tc_rx_packets[I40E_MAX_TRAFFIC_CLASS];
-	u64 tc_rx_bytes[I40E_MAX_TRAFFIC_CLASS];
-	u64 tc_tx_packets[I40E_MAX_TRAFFIC_CLASS];
-	u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
-};
-
-/* Statistics collected by the MAC */
-struct i40e_hw_port_stats {
-	/* eth stats collected by the port */
-	struct i40e_eth_stats eth;
-
-	/* additional port specific stats */
-	u64 tx_dropped_link_down;	/* tdold */
-	u64 crc_errors;			/* crcerrs */
-	u64 illegal_bytes;		/* illerrc */
-	u64 error_bytes;		/* errbc */
-	u64 mac_local_faults;		/* mlfc */
-	u64 mac_remote_faults;		/* mrfc */
-	u64 rx_length_errors;		/* rlec */
-	u64 link_xon_rx;		/* lxonrxc */
-	u64 link_xoff_rx;		/* lxoffrxc */
-	u64 priority_xon_rx[8];		/* pxonrxc[8] */
-	u64 priority_xoff_rx[8];	/* pxoffrxc[8] */
-	u64 link_xon_tx;		/* lxontxc */
-	u64 link_xoff_tx;		/* lxofftxc */
-	u64 priority_xon_tx[8];		/* pxontxc[8] */
-	u64 priority_xoff_tx[8];	/* pxofftxc[8] */
-	u64 priority_xon_2_xoff[8];	/* pxon2offc[8] */
-	u64 rx_size_64;			/* prc64 */
-	u64 rx_size_127;		/* prc127 */
-	u64 rx_size_255;		/* prc255 */
-	u64 rx_size_511;		/* prc511 */
-	u64 rx_size_1023;		/* prc1023 */
-	u64 rx_size_1522;		/* prc1522 */
-	u64 rx_size_big;		/* prc9522 */
-	u64 rx_undersize;		/* ruc */
-	u64 rx_fragments;		/* rfc */
-	u64 rx_oversize;		/* roc */
-	u64 rx_jabber;			/* rjc */
-	u64 tx_size_64;			/* ptc64 */
-	u64 tx_size_127;		/* ptc127 */
-	u64 tx_size_255;		/* ptc255 */
-	u64 tx_size_511;		/* ptc511 */
-	u64 tx_size_1023;		/* ptc1023 */
-	u64 tx_size_1522;		/* ptc1522 */
-	u64 tx_size_big;		/* ptc9522 */
-	u64 mac_short_packet_dropped;	/* mspdc */
-	u64 checksum_error;		/* xec */
-	/* flow director stats */
-	u64 fd_atr_match;
-	u64 fd_sb_match;
-	u64 fd_atr_tunnel_match;
-	u32 fd_atr_status;
-	u32 fd_sb_status;
-	/* EEE LPI */
-	u32 tx_lpi_status;
-	u32 rx_lpi_status;
-	u64 tx_lpi_count;		/* etlpic */
-	u64 rx_lpi_count;		/* erlpic */
-};
-
-/* Checksum and Shadow RAM pointers */
-#define I40E_SR_NVM_CONTROL_WORD		0x00
-#define I40E_EMP_MODULE_PTR			0x0F
-#define I40E_SR_EMP_MODULE_PTR			0x48
-#define I40E_NVM_OEM_VER_OFF			0x83
-#define I40E_SR_NVM_DEV_STARTER_VERSION		0x18
-#define I40E_SR_NVM_WAKE_ON_LAN			0x19
-#define I40E_SR_ALTERNATE_SAN_MAC_ADDRESS_PTR	0x27
-#define I40E_SR_NVM_EETRACK_LO			0x2D
-#define I40E_SR_NVM_EETRACK_HI			0x2E
-#define I40E_SR_VPD_PTR				0x2F
-#define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR		0x3E
-#define I40E_SR_SW_CHECKSUM_WORD		0x3F
-
-/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
-#define I40E_SR_VPD_MODULE_MAX_SIZE		1024
-#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE	1024
-#define I40E_SR_CONTROL_WORD_1_SHIFT		0x06
-#define I40E_SR_CONTROL_WORD_1_MASK	(0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
-#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID	BIT(5)
-#define I40E_SR_NVM_MAP_STRUCTURE_TYPE		BIT(12)
-#define I40E_PTR_TYPE				BIT(15)
-
-/* Shadow RAM related */
-#define I40E_SR_SECTOR_SIZE_IN_WORDS	0x800
-#define I40E_SR_WORDS_IN_1KB		512
-/* Checksum should be calculated such that after adding all the words,
- * including the checksum word itself, the sum should be 0xBABA.
- */
-#define I40E_SR_SW_CHECKSUM_BASE	0xBABA
-
-#define I40E_SRRD_SRCTL_ATTEMPTS	100000
-
-enum i40e_switch_element_types {
-	I40E_SWITCH_ELEMENT_TYPE_MAC	= 1,
-	I40E_SWITCH_ELEMENT_TYPE_PF	= 2,
-	I40E_SWITCH_ELEMENT_TYPE_VF	= 3,
-	I40E_SWITCH_ELEMENT_TYPE_EMP	= 4,
-	I40E_SWITCH_ELEMENT_TYPE_BMC	= 6,
-	I40E_SWITCH_ELEMENT_TYPE_PE	= 16,
-	I40E_SWITCH_ELEMENT_TYPE_VEB	= 17,
-	I40E_SWITCH_ELEMENT_TYPE_PA	= 18,
-	I40E_SWITCH_ELEMENT_TYPE_VSI	= 19,
-};
-
-/* Supported EtherType filters */
-enum i40e_ether_type_index {
-	I40E_ETHER_TYPE_1588		= 0,
-	I40E_ETHER_TYPE_FIP		= 1,
-	I40E_ETHER_TYPE_OUI_EXTENDED	= 2,
-	I40E_ETHER_TYPE_MAC_CONTROL	= 3,
-	I40E_ETHER_TYPE_LLDP		= 4,
-	I40E_ETHER_TYPE_EVB_PROTOCOL1	= 5,
-	I40E_ETHER_TYPE_EVB_PROTOCOL2	= 6,
-	I40E_ETHER_TYPE_QCN_CNM		= 7,
-	I40E_ETHER_TYPE_8021X		= 8,
-	I40E_ETHER_TYPE_ARP		= 9,
-	I40E_ETHER_TYPE_RSV1		= 10,
-	I40E_ETHER_TYPE_RSV2		= 11,
-};
-
-/* Filter context base size is 1K */
-#define I40E_HASH_FILTER_BASE_SIZE	1024
-/* Supported Hash filter values */
-enum i40e_hash_filter_size {
-	I40E_HASH_FILTER_SIZE_1K	= 0,
-	I40E_HASH_FILTER_SIZE_2K	= 1,
-	I40E_HASH_FILTER_SIZE_4K	= 2,
-	I40E_HASH_FILTER_SIZE_8K	= 3,
-	I40E_HASH_FILTER_SIZE_16K	= 4,
-	I40E_HASH_FILTER_SIZE_32K	= 5,
-	I40E_HASH_FILTER_SIZE_64K	= 6,
-	I40E_HASH_FILTER_SIZE_128K	= 7,
-	I40E_HASH_FILTER_SIZE_256K	= 8,
-	I40E_HASH_FILTER_SIZE_512K	= 9,
-	I40E_HASH_FILTER_SIZE_1M	= 10,
-};
-
-/* DMA context base size is 0.5K */
-#define I40E_DMA_CNTX_BASE_SIZE		512
-/* Supported DMA context values */
-enum i40e_dma_cntx_size {
-	I40E_DMA_CNTX_SIZE_512		= 0,
-	I40E_DMA_CNTX_SIZE_1K		= 1,
-	I40E_DMA_CNTX_SIZE_2K		= 2,
-	I40E_DMA_CNTX_SIZE_4K		= 3,
-	I40E_DMA_CNTX_SIZE_8K		= 4,
-	I40E_DMA_CNTX_SIZE_16K		= 5,
-	I40E_DMA_CNTX_SIZE_32K		= 6,
-	I40E_DMA_CNTX_SIZE_64K		= 7,
-	I40E_DMA_CNTX_SIZE_128K		= 8,
-	I40E_DMA_CNTX_SIZE_256K		= 9,
-};
-
-/* Supported Hash look up table (LUT) sizes */
-enum i40e_hash_lut_size {
-	I40E_HASH_LUT_SIZE_128		= 0,
-	I40E_HASH_LUT_SIZE_512		= 1,
-};
-
-/* Structure to hold a per PF filter control settings */
-struct i40e_filter_control_settings {
-	/* number of PE Quad Hash filter buckets */
-	enum i40e_hash_filter_size pe_filt_num;
-	/* number of PE Quad Hash contexts */
-	enum i40e_dma_cntx_size pe_cntx_num;
-	/* number of FCoE filter buckets */
-	enum i40e_hash_filter_size fcoe_filt_num;
-	/* number of FCoE DDP contexts */
-	enum i40e_dma_cntx_size fcoe_cntx_num;
-	/* size of the Hash LUT */
-	enum i40e_hash_lut_size	hash_lut_size;
-	/* enable FDIR filters for PF and its VFs */
-	bool enable_fdir;
-	/* enable Ethertype filters for PF and its VFs */
-	bool enable_ethtype;
-	/* enable MAC/VLAN filters for PF and its VFs */
-	bool enable_macvlan;
-};
-
-/* Structure to hold device level control filter counts */
-struct i40e_control_filter_stats {
-	u16 mac_etype_used;   /* Used perfect match MAC/EtherType filters */
-	u16 etype_used;       /* Used perfect EtherType filters */
-	u16 mac_etype_free;   /* Un-used perfect match MAC/EtherType filters */
-	u16 etype_free;       /* Un-used perfect EtherType filters */
-};
-
-enum i40e_reset_type {
-	I40E_RESET_POR		= 0,
-	I40E_RESET_CORER	= 1,
-	I40E_RESET_GLOBR	= 2,
-	I40E_RESET_EMPR		= 3,
-};
-
-/* IEEE 802.1AB LLDP Agent Variables from NVM */
-#define I40E_NVM_LLDP_CFG_PTR	0x06
-#define I40E_SR_LLDP_CFG_PTR	0x31
-
-/* RSS Hash Table Size */
-#define I40E_PFQF_CTL_0_HASHLUTSIZE_512	0x00010000
-
-/* INPUT SET MASK for RSS, flow director and flexible payload */
-#define I40E_FD_INSET_L3_SRC_SHIFT		47
-#define I40E_FD_INSET_L3_SRC_WORD_MASK		(0x3ULL << \
-						 I40E_FD_INSET_L3_SRC_SHIFT)
-#define I40E_FD_INSET_L3_DST_SHIFT		35
-#define I40E_FD_INSET_L3_DST_WORD_MASK		(0x3ULL << \
-						 I40E_FD_INSET_L3_DST_SHIFT)
-#define I40E_FD_INSET_L4_SRC_SHIFT		34
-#define I40E_FD_INSET_L4_SRC_WORD_MASK		(0x1ULL << \
-						 I40E_FD_INSET_L4_SRC_SHIFT)
-#define I40E_FD_INSET_L4_DST_SHIFT		33
-#define I40E_FD_INSET_L4_DST_WORD_MASK		(0x1ULL << \
-						 I40E_FD_INSET_L4_DST_SHIFT)
-#define I40E_FD_INSET_VERIFY_TAG_SHIFT		31
-#define I40E_FD_INSET_VERIFY_TAG_WORD_MASK	(0x3ULL << \
-						 I40E_FD_INSET_VERIFY_TAG_SHIFT)
-
-#define I40E_FD_INSET_FLEX_WORD50_SHIFT		17
-#define I40E_FD_INSET_FLEX_WORD50_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD50_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD51_SHIFT		16
-#define I40E_FD_INSET_FLEX_WORD51_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD51_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD52_SHIFT		15
-#define I40E_FD_INSET_FLEX_WORD52_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD52_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD53_SHIFT		14
-#define I40E_FD_INSET_FLEX_WORD53_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD53_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD54_SHIFT		13
-#define I40E_FD_INSET_FLEX_WORD54_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD54_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD55_SHIFT		12
-#define I40E_FD_INSET_FLEX_WORD55_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD55_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD56_SHIFT		11
-#define I40E_FD_INSET_FLEX_WORD56_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD56_SHIFT)
-#define I40E_FD_INSET_FLEX_WORD57_SHIFT		10
-#define I40E_FD_INSET_FLEX_WORD57_MASK		(0x1ULL << \
-					I40E_FD_INSET_FLEX_WORD57_SHIFT)
-
-/* Version format for Dynamic Device Personalization(DDP) */
-struct i40e_ddp_version {
-	u8 major;
-	u8 minor;
-	u8 update;
-	u8 draft;
-};
-
-#define I40E_DDP_NAME_SIZE	32
-
-/* Package header */
-struct i40e_package_header {
-	struct i40e_ddp_version version;
-	u32 segment_count;
-	u32 segment_offset[1];
-};
-
-/* Generic segment header */
-struct i40e_generic_seg_header {
-#define SEGMENT_TYPE_METADATA	0x00000001
-#define SEGMENT_TYPE_NOTES	0x00000002
-#define SEGMENT_TYPE_I40E	0x00000011
-#define SEGMENT_TYPE_X722	0x00000012
-	u32 type;
-	struct i40e_ddp_version version;
-	u32 size;
-	char name[I40E_DDP_NAME_SIZE];
-};
-
-struct i40e_metadata_segment {
-	struct i40e_generic_seg_header header;
-	struct i40e_ddp_version version;
-	u32 track_id;
-	char name[I40E_DDP_NAME_SIZE];
-};
-
-struct i40e_device_id_entry {
-	u32 vendor_dev_id;
-	u32 sub_vendor_dev_id;
-};
-
-struct i40e_profile_segment {
-	struct i40e_generic_seg_header header;
-	struct i40e_ddp_version version;
-	char name[I40E_DDP_NAME_SIZE];
-	u32 device_table_count;
-	struct i40e_device_id_entry device_table[1];
-};
-
-struct i40e_section_table {
-	u32 section_count;
-	u32 section_offset[1];
-};
-
-struct i40e_profile_section_header {
-	u16 tbl_size;
-	u16 data_end;
-	struct {
-#define SECTION_TYPE_INFO	0x00000010
-#define SECTION_TYPE_MMIO	0x00000800
-#define SECTION_TYPE_AQ		0x00000801
-#define SECTION_TYPE_NOTE	0x80000000
-#define SECTION_TYPE_NAME	0x80000001
-		u32 type;
-		u32 offset;
-		u32 size;
-	} section;
-};
-
-struct i40e_profile_info {
-	u32 track_id;
-	struct i40e_ddp_version version;
-	u8 op;
-#define I40E_DDP_ADD_TRACKID		0x01
-#define I40E_DDP_REMOVE_TRACKID	0x02
-	u8 reserved[7];
-	u8 name[I40E_DDP_NAME_SIZE];
-};
-#endif /* _I40E_TYPE_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
deleted file mode 100644
index 96e537a..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ /dev/null

@@ -1,427 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40EVF_H_
-#define _I40EVF_H_
-
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/aer.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/interrupt.h>
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/sctp.h>
-#include <linux/ipv6.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-#include <linux/wait.h>
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/skbuff.h>
-#include <linux/dma-mapping.h>
-#include <linux/etherdevice.h>
-#include <linux/socket.h>
-#include <linux/jiffies.h>
-#include <net/ip6_checksum.h>
-#include <net/pkt_cls.h>
-#include <net/udp.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_mirred.h>
-
-#include "i40e_type.h"
-#include <linux/avf/virtchnl.h>
-#include "i40e_txrx.h"
-
-#define DEFAULT_DEBUG_LEVEL_SHIFT 3
-#define PFX "i40evf: "
-
-/* VSI state flags shared with common code */
-enum i40evf_vsi_state_t {
-	__I40E_VSI_DOWN,
-	/* This must be last as it determines the size of the BITMAP */
-	__I40E_VSI_STATE_SIZE__,
-};
-
-/* dummy struct to make common code less painful */
-struct i40e_vsi {
-	struct i40evf_adapter *back;
-	struct net_device *netdev;
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-	u16 seid;
-	u16 id;
-	DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__);
-	int base_vector;
-	u16 work_limit;
-	u16 qs_handle;
-	void *priv;     /* client driver data reference. */
-};
-
-/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40EVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
-#define I40EVF_DEFAULT_TXD	512
-#define I40EVF_DEFAULT_RXD	512
-#define I40EVF_MAX_TXD		4096
-#define I40EVF_MIN_TXD		64
-#define I40EVF_MAX_RXD		4096
-#define I40EVF_MIN_RXD		64
-#define I40EVF_REQ_DESCRIPTOR_MULTIPLE	32
-#define I40EVF_MAX_AQ_BUF_SIZE	4096
-#define I40EVF_AQ_LEN		32
-#define I40EVF_AQ_MAX_ERR	20 /* times to try before resetting AQ */
-
-#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
-
-#define I40E_RX_DESC(R, i) (&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
-#define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i]))
-#define I40E_TX_CTXTDESC(R, i) \
-	(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
-#define I40EVF_MAX_REQ_QUEUES 4
-
-#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
-#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
-#define I40EVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
-
-/* MAX_MSIX_Q_VECTORS of these are allocated,
- * but we only use one per queue-specific vector.
- */
-struct i40e_q_vector {
-	struct i40evf_adapter *adapter;
-	struct i40e_vsi *vsi;
-	struct napi_struct napi;
-	struct i40e_ring_container rx;
-	struct i40e_ring_container tx;
-	u32 ring_mask;
-	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
-	u8 num_ringpairs;	/* total number of ring pairs in vector */
-	u16 v_idx;		/* index in the vsi->q_vector array. */
-	u16 reg_idx;		/* register index of the interrupt */
-	char name[IFNAMSIZ + 15];
-	bool arm_wb_state;
-	cpumask_t affinity_mask;
-	struct irq_affinity_notify affinity_notify;
-};
-
-/* Helper macros to switch between ints/sec and what the register uses.
- * And yes, it's the same math going both ways.  The lowest value
- * supported by all of the i40e hardware is 8.
- */
-#define EITR_INTS_PER_SEC_TO_REG(_eitr) \
-	((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
-#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG
-
-#define I40EVF_DESC_UNUSED(R) \
-	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
-	(R)->next_to_clean - (R)->next_to_use - 1)
-
-#define I40EVF_RX_DESC_ADV(R, i)	\
-	(&(((union i40e_adv_rx_desc *)((R).desc))[i]))
-#define I40EVF_TX_DESC_ADV(R, i)	\
-	(&(((union i40e_adv_tx_desc *)((R).desc))[i]))
-#define I40EVF_TX_CTXTDESC_ADV(R, i)	\
-	(&(((struct i40e_adv_tx_context_desc *)((R).desc))[i]))
-
-#define OTHER_VECTOR 1
-#define NONQ_VECS (OTHER_VECTOR)
-
-#define MIN_MSIX_Q_VECTORS 1
-#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS)
-
-#define I40EVF_QUEUE_END_OF_LIST 0x7FF
-#define I40EVF_FREE_VECTOR 0x7FFF
-struct i40evf_mac_filter {
-	struct list_head list;
-	u8 macaddr[ETH_ALEN];
-	bool remove;		/* filter needs to be removed */
-	bool add;		/* filter needs to be added */
-};
-
-struct i40evf_vlan_filter {
-	struct list_head list;
-	u16 vlan;
-	bool remove;		/* filter needs to be removed */
-	bool add;		/* filter needs to be added */
-};
-
-#define I40EVF_MAX_TRAFFIC_CLASS	4
-/* State of traffic class creation */
-enum i40evf_tc_state_t {
-	__I40EVF_TC_INVALID, /* no traffic class, default state */
-	__I40EVF_TC_RUNNING, /* traffic classes have been created */
-};
-
-/* channel info */
-struct i40evf_channel_config {
-	struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
-	enum i40evf_tc_state_t state;
-	u8 total_qps;
-};
-
-/* State of cloud filter */
-enum i40evf_cloud_filter_state_t {
-	__I40EVF_CF_INVALID,	 /* cloud filter not added */
-	__I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
-	__I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
-	__I40EVF_CF_ACTIVE,	 /* cloud filter is active */
-};
-
-/* Driver state. The order of these is important! */
-enum i40evf_state_t {
-	__I40EVF_STARTUP,		/* driver loaded, probe complete */
-	__I40EVF_REMOVE,		/* driver is being unloaded */
-	__I40EVF_INIT_VERSION_CHECK,	/* aq msg sent, awaiting reply */
-	__I40EVF_INIT_GET_RESOURCES,	/* aq msg sent, awaiting reply */
-	__I40EVF_INIT_SW,		/* got resources, setting up structs */
-	__I40EVF_RESETTING,		/* in reset */
-	/* Below here, watchdog is running */
-	__I40EVF_DOWN,			/* ready, can be opened */
-	__I40EVF_DOWN_PENDING,		/* descending, waiting for watchdog */
-	__I40EVF_TESTING,		/* in ethtool self-test */
-	__I40EVF_RUNNING,		/* opened, working */
-};
-
-enum i40evf_critical_section_t {
-	__I40EVF_IN_CRITICAL_TASK,	/* cannot be interrupted */
-	__I40EVF_IN_CLIENT_TASK,
-	__I40EVF_IN_REMOVE_TASK,	/* device being removed */
-};
-
-#define I40EVF_CLOUD_FIELD_OMAC		0x01
-#define I40EVF_CLOUD_FIELD_IMAC		0x02
-#define I40EVF_CLOUD_FIELD_IVLAN	0x04
-#define I40EVF_CLOUD_FIELD_TEN_ID	0x08
-#define I40EVF_CLOUD_FIELD_IIP		0x10
-
-#define I40EVF_CF_FLAGS_OMAC	I40EVF_CLOUD_FIELD_OMAC
-#define I40EVF_CF_FLAGS_IMAC	I40EVF_CLOUD_FIELD_IMAC
-#define I40EVF_CF_FLAGS_IMAC_IVLAN	(I40EVF_CLOUD_FIELD_IMAC |\
-					 I40EVF_CLOUD_FIELD_IVLAN)
-#define I40EVF_CF_FLAGS_IMAC_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
-					 I40EVF_CLOUD_FIELD_TEN_ID)
-#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(I40EVF_CLOUD_FIELD_OMAC |\
-						 I40EVF_CLOUD_FIELD_IMAC |\
-						 I40EVF_CLOUD_FIELD_TEN_ID)
-#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
-						 I40EVF_CLOUD_FIELD_IVLAN |\
-						 I40EVF_CLOUD_FIELD_TEN_ID)
-#define I40EVF_CF_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
-
-/* bookkeeping of cloud filters */
-struct i40evf_cloud_filter {
-	enum i40evf_cloud_filter_state_t state;
-	struct list_head list;
-	struct virtchnl_filter f;
-	unsigned long cookie;
-	bool del;		/* filter needs to be deleted */
-	bool add;		/* filter needs to be added */
-};
-
-/* board specific private data structure */
-struct i40evf_adapter {
-	struct timer_list watchdog_timer;
-	struct work_struct reset_task;
-	struct work_struct adminq_task;
-	struct delayed_work client_task;
-	struct delayed_work init_task;
-	wait_queue_head_t down_waitqueue;
-	struct i40e_q_vector *q_vectors;
-	struct list_head vlan_filter_list;
-	struct list_head mac_filter_list;
-	/* Lock to protect accesses to MAC and VLAN lists */
-	spinlock_t mac_vlan_list_lock;
-	char misc_vector_name[IFNAMSIZ + 9];
-	int num_active_queues;
-	int num_req_queues;
-
-	/* TX */
-	struct i40e_ring *tx_rings;
-	u32 tx_timeout_count;
-	u32 tx_desc_count;
-
-	/* RX */
-	struct i40e_ring *rx_rings;
-	u64 hw_csum_rx_error;
-	u32 rx_desc_count;
-	int num_msix_vectors;
-	int num_iwarp_msix;
-	int iwarp_base_vector;
-	u32 client_pending;
-	struct i40e_client_instance *cinst;
-	struct msix_entry *msix_entries;
-
-	u32 flags;
-#define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40EVF_FLAG_PF_COMMS_FAILED		BIT(3)
-#define I40EVF_FLAG_RESET_PENDING		BIT(4)
-#define I40EVF_FLAG_RESET_NEEDED		BIT(5)
-#define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
-#define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(8)
-#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
-#define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
-#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(11)
-#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
-#define I40EVF_FLAG_PROMISC_ON			BIT(13)
-#define I40EVF_FLAG_ALLMULTI_ON			BIT(14)
-#define I40EVF_FLAG_LEGACY_RX			BIT(15)
-#define I40EVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
-#define I40EVF_FLAG_QUEUES_DISABLED		BIT(17)
-/* duplicates for common code */
-#define I40E_FLAG_DCB_ENABLED			0
-#define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
-#define I40E_FLAG_LEGACY_RX			I40EVF_FLAG_LEGACY_RX
-	/* flags for admin queue service task */
-	u32 aq_required;
-#define I40EVF_FLAG_AQ_ENABLE_QUEUES		BIT(0)
-#define I40EVF_FLAG_AQ_DISABLE_QUEUES		BIT(1)
-#define I40EVF_FLAG_AQ_ADD_MAC_FILTER		BIT(2)
-#define I40EVF_FLAG_AQ_ADD_VLAN_FILTER		BIT(3)
-#define I40EVF_FLAG_AQ_DEL_MAC_FILTER		BIT(4)
-#define I40EVF_FLAG_AQ_DEL_VLAN_FILTER		BIT(5)
-#define I40EVF_FLAG_AQ_CONFIGURE_QUEUES		BIT(6)
-#define I40EVF_FLAG_AQ_MAP_VECTORS		BIT(7)
-#define I40EVF_FLAG_AQ_HANDLE_RESET		BIT(8)
-#define I40EVF_FLAG_AQ_CONFIGURE_RSS		BIT(9) /* direct AQ config */
-#define I40EVF_FLAG_AQ_GET_CONFIG		BIT(10)
-/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */
-#define I40EVF_FLAG_AQ_GET_HENA			BIT(11)
-#define I40EVF_FLAG_AQ_SET_HENA			BIT(12)
-#define I40EVF_FLAG_AQ_SET_RSS_KEY		BIT(13)
-#define I40EVF_FLAG_AQ_SET_RSS_LUT		BIT(14)
-#define I40EVF_FLAG_AQ_REQUEST_PROMISC		BIT(15)
-#define I40EVF_FLAG_AQ_RELEASE_PROMISC		BIT(16)
-#define I40EVF_FLAG_AQ_REQUEST_ALLMULTI		BIT(17)
-#define I40EVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
-#define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
-#define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
-#define I40EVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
-#define I40EVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
-#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
-#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
-
-	/* OS defined structs */
-	struct net_device *netdev;
-	struct pci_dev *pdev;
-
-	struct i40e_hw hw; /* defined in i40e_type.h */
-
-	enum i40evf_state_t state;
-	unsigned long crit_section;
-
-	struct work_struct watchdog_task;
-	bool netdev_registered;
-	bool link_up;
-	enum virtchnl_link_speed link_speed;
-	enum virtchnl_ops current_op;
-#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
-			    (_a)->vf_res->vf_cap_flags & \
-				VIRTCHNL_VF_OFFLOAD_IWARP : \
-			    0)
-#define CLIENT_ENABLED(_a) ((_a)->cinst)
-/* RSS by the PF should be preferred over RSS via other methods. */
-#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \
-		    VIRTCHNL_VF_OFFLOAD_RSS_PF)
-#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \
-		    VIRTCHNL_VF_OFFLOAD_RSS_AQ)
-#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \
-		       (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
-			VIRTCHNL_VF_OFFLOAD_RSS_PF)))
-#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
-			  VIRTCHNL_VF_OFFLOAD_VLAN)
-	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
-	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
-	struct virtchnl_version_info pf_version;
-#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \
-		       ((_a)->pf_version.minor == 1))
-	u16 msg_enable;
-	struct i40e_eth_stats current_stats;
-	struct i40e_vsi vsi;
-	u32 aq_wait_count;
-	/* RSS stuff */
-	u64 hena;
-	u16 rss_key_size;
-	u16 rss_lut_size;
-	u8 *rss_key;
-	u8 *rss_lut;
-	/* ADQ related members */
-	struct i40evf_channel_config ch_config;
-	u8 num_tc;
-	struct list_head cloud_filter_list;
-	/* lock to protest access to the cloud filter list */
-	spinlock_t cloud_filter_list_lock;
-	u16 num_cloud_filters;
-};
-
-
-/* Ethtool Private Flags */
-
-/* lan device */
-struct i40e_device {
-	struct list_head list;
-	struct i40evf_adapter *vf;
-};
-
-/* needed by i40evf_ethtool.c */
-extern char i40evf_driver_name[];
-extern const char i40evf_driver_version[];
-
-int i40evf_up(struct i40evf_adapter *adapter);
-void i40evf_down(struct i40evf_adapter *adapter);
-int i40evf_process_config(struct i40evf_adapter *adapter);
-void i40evf_schedule_reset(struct i40evf_adapter *adapter);
-void i40evf_reset(struct i40evf_adapter *adapter);
-void i40evf_set_ethtool_ops(struct net_device *netdev);
-void i40evf_update_stats(struct i40evf_adapter *adapter);
-void i40evf_reset_interrupt_capability(struct i40evf_adapter *adapter);
-int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter);
-void i40evf_irq_enable_queues(struct i40evf_adapter *adapter, u32 mask);
-void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter);
-void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter);
-
-void i40e_napi_add_all(struct i40evf_adapter *adapter);
-void i40e_napi_del_all(struct i40evf_adapter *adapter);
-
-int i40evf_send_api_ver(struct i40evf_adapter *adapter);
-int i40evf_verify_api_ver(struct i40evf_adapter *adapter);
-int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter);
-int i40evf_get_vf_config(struct i40evf_adapter *adapter);
-void i40evf_irq_enable(struct i40evf_adapter *adapter, bool flush);
-void i40evf_configure_queues(struct i40evf_adapter *adapter);
-void i40evf_deconfigure_queues(struct i40evf_adapter *adapter);
-void i40evf_enable_queues(struct i40evf_adapter *adapter);
-void i40evf_disable_queues(struct i40evf_adapter *adapter);
-void i40evf_map_queues(struct i40evf_adapter *adapter);
-int i40evf_request_queues(struct i40evf_adapter *adapter, int num);
-void i40evf_add_ether_addrs(struct i40evf_adapter *adapter);
-void i40evf_del_ether_addrs(struct i40evf_adapter *adapter);
-void i40evf_add_vlans(struct i40evf_adapter *adapter);
-void i40evf_del_vlans(struct i40evf_adapter *adapter);
-void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags);
-void i40evf_request_stats(struct i40evf_adapter *adapter);
-void i40evf_request_reset(struct i40evf_adapter *adapter);
-void i40evf_get_hena(struct i40evf_adapter *adapter);
-void i40evf_set_hena(struct i40evf_adapter *adapter);
-void i40evf_set_rss_key(struct i40evf_adapter *adapter);
-void i40evf_set_rss_lut(struct i40evf_adapter *adapter);
-void i40evf_enable_vlan_stripping(struct i40evf_adapter *adapter);
-void i40evf_disable_vlan_stripping(struct i40evf_adapter *adapter);
-void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
-				enum virtchnl_ops v_opcode,
-				i40e_status v_retval, u8 *msg, u16 msglen);
-int i40evf_config_rss(struct i40evf_adapter *adapter);
-int i40evf_lan_add_device(struct i40evf_adapter *adapter);
-int i40evf_lan_del_device(struct i40evf_adapter *adapter);
-void i40evf_client_subtask(struct i40evf_adapter *adapter);
-void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
-void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
-void i40evf_notify_client_open(struct i40e_vsi *vsi);
-void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
-void i40evf_enable_channels(struct i40evf_adapter *adapter);
-void i40evf_disable_channels(struct i40evf_adapter *adapter);
-void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
-void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
-#endif /* _I40EVF_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.c b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
deleted file mode 100644
index 3cc9d60..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.c
+++ /dev/null

@@ -1,579 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#include <linux/list.h>
-#include <linux/errno.h>
-
-#include "i40evf.h"
-#include "i40e_prototype.h"
-#include "i40evf_client.h"
-
-static
-const char i40evf_client_interface_version_str[] = I40EVF_CLIENT_VERSION_STR;
-static struct i40e_client *vf_registered_client;
-static LIST_HEAD(i40evf_devices);
-static DEFINE_MUTEX(i40evf_device_mutex);
-
-static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
-				       struct i40e_client *client,
-				       u8 *msg, u16 len);
-
-static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
-				      struct i40e_client *client,
-				      struct i40e_qvlist_info *qvlist_info);
-
-static struct i40e_ops i40evf_lan_ops = {
-	.virtchnl_send = i40evf_client_virtchnl_send,
-	.setup_qvlist = i40evf_client_setup_qvlist,
-};
-
-/**
- * i40evf_client_get_params - retrieve relevant client parameters
- * @vsi: VSI with parameters
- * @params: client param struct
- **/
-static
-void i40evf_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
-{
-	int i;
-
-	memset(params, 0, sizeof(struct i40e_params));
-	params->mtu = vsi->netdev->mtu;
-	params->link_up = vsi->back->link_up;
-
-	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		params->qos.prio_qos[i].tc = 0;
-		params->qos.prio_qos[i].qs_handle = vsi->qs_handle;
-	}
-}
-
-/**
- * i40evf_notify_client_message - call the client message receive callback
- * @vsi: the VSI associated with this client
- * @msg: message buffer
- * @len: length of message
- *
- * If there is a client to this VSI, call the client
- **/
-void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len)
-{
-	struct i40e_client_instance *cinst;
-
-	if (!vsi)
-		return;
-
-	cinst = vsi->back->cinst;
-	if (!cinst || !cinst->client || !cinst->client->ops ||
-	    !cinst->client->ops->virtchnl_receive) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Cannot locate client instance virtchnl_receive function\n");
-		return;
-	}
-	cinst->client->ops->virtchnl_receive(&cinst->lan_info,  cinst->client,
-					     msg, len);
-}
-
-/**
- * i40evf_notify_client_l2_params - call the client notify callback
- * @vsi: the VSI with l2 param changes
- *
- * If there is a client to this VSI, call the client
- **/
-void i40evf_notify_client_l2_params(struct i40e_vsi *vsi)
-{
-	struct i40e_client_instance *cinst;
-	struct i40e_params params;
-
-	if (!vsi)
-		return;
-
-	cinst = vsi->back->cinst;
-
-	if (!cinst || !cinst->client || !cinst->client->ops ||
-	    !cinst->client->ops->l2_param_change) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Cannot locate client instance l2_param_change function\n");
-		return;
-	}
-	i40evf_client_get_params(vsi, &params);
-	cinst->lan_info.params = params;
-	cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client,
-					    &params);
-}
-
-/**
- * i40evf_notify_client_open - call the client open callback
- * @vsi: the VSI with netdev opened
- *
- * If there is a client to this netdev, call the client with open
- **/
-void i40evf_notify_client_open(struct i40e_vsi *vsi)
-{
-	struct i40evf_adapter *adapter = vsi->back;
-	struct i40e_client_instance *cinst = adapter->cinst;
-	int ret;
-
-	if (!cinst || !cinst->client || !cinst->client->ops ||
-	    !cinst->client->ops->open) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Cannot locate client instance open function\n");
-		return;
-	}
-	if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) {
-		ret = cinst->client->ops->open(&cinst->lan_info, cinst->client);
-		if (!ret)
-			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
-	}
-}
-
-/**
- * i40evf_client_release_qvlist - send a message to the PF to release iwarp qv map
- * @ldev: pointer to L2 context.
- *
- * Return 0 on success or < 0 on error
- **/
-static int i40evf_client_release_qvlist(struct i40e_info *ldev)
-{
-	struct i40evf_adapter *adapter = ldev->vf;
-	i40e_status err;
-
-	if (adapter->aq_required)
-		return -EAGAIN;
-
-	err = i40e_aq_send_msg_to_pf(&adapter->hw,
-			VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
-			I40E_SUCCESS, NULL, 0, NULL);
-
-	if (err)
-		dev_err(&adapter->pdev->dev,
-			"Unable to send iWarp vector release message to PF, error %d, aq status %d\n",
-			err, adapter->hw.aq.asq_last_status);
-
-	return err;
-}
-
-/**
- * i40evf_notify_client_close - call the client close callback
- * @vsi: the VSI with netdev closed
- * @reset: true when close called due to reset pending
- *
- * If there is a client to this netdev, call the client with close
- **/
-void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset)
-{
-	struct i40evf_adapter *adapter = vsi->back;
-	struct i40e_client_instance *cinst = adapter->cinst;
-
-	if (!cinst || !cinst->client || !cinst->client->ops ||
-	    !cinst->client->ops->close) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Cannot locate client instance close function\n");
-		return;
-	}
-	cinst->client->ops->close(&cinst->lan_info, cinst->client, reset);
-	i40evf_client_release_qvlist(&cinst->lan_info);
-	clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
-}
-
-/**
- * i40evf_client_add_instance - add a client instance to the instance list
- * @adapter: pointer to the board struct
- *
- * Returns cinst ptr on success, NULL on failure
- **/
-static struct i40e_client_instance *
-i40evf_client_add_instance(struct i40evf_adapter *adapter)
-{
-	struct i40e_client_instance *cinst = NULL;
-	struct i40e_vsi *vsi = &adapter->vsi;
-	struct netdev_hw_addr *mac = NULL;
-	struct i40e_params params;
-
-	if (!vf_registered_client)
-		goto out;
-
-	if (adapter->cinst) {
-		cinst = adapter->cinst;
-		goto out;
-	}
-
-	cinst = kzalloc(sizeof(*cinst), GFP_KERNEL);
-	if (!cinst)
-		goto out;
-
-	cinst->lan_info.vf = (void *)adapter;
-	cinst->lan_info.netdev = vsi->netdev;
-	cinst->lan_info.pcidev = adapter->pdev;
-	cinst->lan_info.fid = 0;
-	cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF;
-	cinst->lan_info.hw_addr = adapter->hw.hw_addr;
-	cinst->lan_info.ops = &i40evf_lan_ops;
-	cinst->lan_info.version.major = I40EVF_CLIENT_VERSION_MAJOR;
-	cinst->lan_info.version.minor = I40EVF_CLIENT_VERSION_MINOR;
-	cinst->lan_info.version.build = I40EVF_CLIENT_VERSION_BUILD;
-	i40evf_client_get_params(vsi, &params);
-	cinst->lan_info.params = params;
-	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state);
-
-	cinst->lan_info.msix_count = adapter->num_iwarp_msix;
-	cinst->lan_info.msix_entries =
-			&adapter->msix_entries[adapter->iwarp_base_vector];
-
-	mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list,
-			       struct netdev_hw_addr, list);
-	if (mac)
-		ether_addr_copy(cinst->lan_info.lanmac, mac->addr);
-	else
-		dev_err(&adapter->pdev->dev, "MAC address list is empty!\n");
-
-	cinst->client = vf_registered_client;
-	adapter->cinst = cinst;
-out:
-	return cinst;
-}
-
-/**
- * i40evf_client_del_instance - removes a client instance from the list
- * @adapter: pointer to the board struct
- *
- **/
-static
-void i40evf_client_del_instance(struct i40evf_adapter *adapter)
-{
-	kfree(adapter->cinst);
-	adapter->cinst = NULL;
-}
-
-/**
- * i40evf_client_subtask - client maintenance work
- * @adapter: board private structure
- **/
-void i40evf_client_subtask(struct i40evf_adapter *adapter)
-{
-	struct i40e_client *client = vf_registered_client;
-	struct i40e_client_instance *cinst;
-	int ret = 0;
-
-	if (adapter->state < __I40EVF_DOWN)
-		return;
-
-	/* first check client is registered */
-	if (!client)
-		return;
-
-	/* Add the client instance to the instance list */
-	cinst = i40evf_client_add_instance(adapter);
-	if (!cinst)
-		return;
-
-	dev_info(&adapter->pdev->dev, "Added instance of Client %s\n",
-		 client->name);
-
-	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
-		/* Send an Open request to the client */
-
-		if (client->ops && client->ops->open)
-			ret = client->ops->open(&cinst->lan_info, client);
-		if (!ret)
-			set_bit(__I40E_CLIENT_INSTANCE_OPENED,
-				&cinst->state);
-		else
-			/* remove client instance */
-			i40evf_client_del_instance(adapter);
-	}
-}
-
-/**
- * i40evf_lan_add_device - add a lan device struct to the list of lan devices
- * @adapter: pointer to the board struct
- *
- * Returns 0 on success or none 0 on error
- **/
-int i40evf_lan_add_device(struct i40evf_adapter *adapter)
-{
-	struct i40e_device *ldev;
-	int ret = 0;
-
-	mutex_lock(&i40evf_device_mutex);
-	list_for_each_entry(ldev, &i40evf_devices, list) {
-		if (ldev->vf == adapter) {
-			ret = -EEXIST;
-			goto out;
-		}
-	}
-	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
-	if (!ldev) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	ldev->vf = adapter;
-	INIT_LIST_HEAD(&ldev->list);
-	list_add(&ldev->list, &i40evf_devices);
-	dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
-		 adapter->hw.bus.bus_id, adapter->hw.bus.device,
-		 adapter->hw.bus.func);
-
-	/* Since in some cases register may have happened before a device gets
-	 * added, we can schedule a subtask to go initiate the clients.
-	 */
-	adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
-
-out:
-	mutex_unlock(&i40evf_device_mutex);
-	return ret;
-}
-
-/**
- * i40evf_lan_del_device - removes a lan device from the device list
- * @adapter: pointer to the board struct
- *
- * Returns 0 on success or non-0 on error
- **/
-int i40evf_lan_del_device(struct i40evf_adapter *adapter)
-{
-	struct i40e_device *ldev, *tmp;
-	int ret = -ENODEV;
-
-	mutex_lock(&i40evf_device_mutex);
-	list_for_each_entry_safe(ldev, tmp, &i40evf_devices, list) {
-		if (ldev->vf == adapter) {
-			dev_info(&adapter->pdev->dev,
-				 "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
-				 adapter->hw.bus.bus_id, adapter->hw.bus.device,
-				 adapter->hw.bus.func);
-			list_del(&ldev->list);
-			kfree(ldev);
-			ret = 0;
-			break;
-		}
-	}
-
-	mutex_unlock(&i40evf_device_mutex);
-	return ret;
-}
-
-/**
- * i40evf_client_release - release client specific resources
- * @client: pointer to the registered client
- *
- **/
-static void i40evf_client_release(struct i40e_client *client)
-{
-	struct i40e_client_instance *cinst;
-	struct i40e_device *ldev;
-	struct i40evf_adapter *adapter;
-
-	mutex_lock(&i40evf_device_mutex);
-	list_for_each_entry(ldev, &i40evf_devices, list) {
-		adapter = ldev->vf;
-		cinst = adapter->cinst;
-		if (!cinst)
-			continue;
-		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) {
-			if (client->ops && client->ops->close)
-				client->ops->close(&cinst->lan_info, client,
-						   false);
-			i40evf_client_release_qvlist(&cinst->lan_info);
-			clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state);
-
-			dev_warn(&adapter->pdev->dev,
-				 "Client %s instance closed\n", client->name);
-		}
-		/* delete the client instance */
-		i40evf_client_del_instance(adapter);
-		dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n",
-			 client->name);
-	}
-	mutex_unlock(&i40evf_device_mutex);
-}
-
-/**
- * i40evf_client_prepare - prepare client specific resources
- * @client: pointer to the registered client
- *
- **/
-static void i40evf_client_prepare(struct i40e_client *client)
-{
-	struct i40e_device *ldev;
-	struct i40evf_adapter *adapter;
-
-	mutex_lock(&i40evf_device_mutex);
-	list_for_each_entry(ldev, &i40evf_devices, list) {
-		adapter = ldev->vf;
-		/* Signal the watchdog to service the client */
-		adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
-	}
-	mutex_unlock(&i40evf_device_mutex);
-}
-
-/**
- * i40evf_client_virtchnl_send - send a message to the PF instance
- * @ldev: pointer to L2 context.
- * @client: Client pointer.
- * @msg: pointer to message buffer
- * @len: message length
- *
- * Return 0 on success or < 0 on error
- **/
-static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
-				       struct i40e_client *client,
-				       u8 *msg, u16 len)
-{
-	struct i40evf_adapter *adapter = ldev->vf;
-	i40e_status err;
-
-	if (adapter->aq_required)
-		return -EAGAIN;
-
-	err = i40e_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP,
-				     I40E_SUCCESS, msg, len, NULL);
-	if (err)
-		dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n",
-			err, adapter->hw.aq.asq_last_status);
-
-	return err;
-}
-
-/**
- * i40evf_client_setup_qvlist - send a message to the PF to setup iwarp qv map
- * @ldev: pointer to L2 context.
- * @client: Client pointer.
- * @qvlist_info: queue and vector list
- *
- * Return 0 on success or < 0 on error
- **/
-static int i40evf_client_setup_qvlist(struct i40e_info *ldev,
-				      struct i40e_client *client,
-				      struct i40e_qvlist_info *qvlist_info)
-{
-	struct virtchnl_iwarp_qvlist_info *v_qvlist_info;
-	struct i40evf_adapter *adapter = ldev->vf;
-	struct i40e_qv_info *qv_info;
-	i40e_status err;
-	u32 v_idx, i;
-	u32 msg_size;
-
-	if (adapter->aq_required)
-		return -EAGAIN;
-
-	/* A quick check on whether the vectors belong to the client */
-	for (i = 0; i < qvlist_info->num_vectors; i++) {
-		qv_info = &qvlist_info->qv_info[i];
-		if (!qv_info)
-			continue;
-		v_idx = qv_info->v_idx;
-		if ((v_idx >=
-		    (adapter->iwarp_base_vector + adapter->num_iwarp_msix)) ||
-		    (v_idx < adapter->iwarp_base_vector))
-			return -EINVAL;
-	}
-
-	v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info;
-	msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) +
-			(sizeof(struct virtchnl_iwarp_qv_info) *
-			(v_qvlist_info->num_vectors - 1));
-
-	adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
-	err = i40e_aq_send_msg_to_pf(&adapter->hw,
-			VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
-			I40E_SUCCESS, (u8 *)v_qvlist_info, msg_size, NULL);
-
-	if (err) {
-		dev_err(&adapter->pdev->dev,
-			"Unable to send iWarp vector config message to PF, error %d, aq status %d\n",
-			err, adapter->hw.aq.asq_last_status);
-		goto out;
-	}
-
-	err = -EBUSY;
-	for (i = 0; i < 5; i++) {
-		msleep(100);
-		if (!(adapter->client_pending &
-		      BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) {
-			err = 0;
-			break;
-		}
-	}
-out:
-	return err;
-}
-
-/**
- * i40evf_register_client - Register a i40e client driver with the L2 driver
- * @client: pointer to the i40e_client struct
- *
- * Returns 0 on success or non-0 on error
- **/
-int i40evf_register_client(struct i40e_client *client)
-{
-	int ret = 0;
-
-	if (!client) {
-		ret = -EIO;
-		goto out;
-	}
-
-	if (strlen(client->name) == 0) {
-		pr_info("i40evf: Failed to register client with no name\n");
-		ret = -EIO;
-		goto out;
-	}
-
-	if (vf_registered_client) {
-		pr_info("i40evf: Client %s has already been registered!\n",
-			client->name);
-		ret = -EEXIST;
-		goto out;
-	}
-
-	if ((client->version.major != I40EVF_CLIENT_VERSION_MAJOR) ||
-	    (client->version.minor != I40EVF_CLIENT_VERSION_MINOR)) {
-		pr_info("i40evf: Failed to register client %s due to mismatched client interface version\n",
-			client->name);
-		pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
-			client->version.major, client->version.minor,
-			client->version.build,
-			i40evf_client_interface_version_str);
-		ret = -EIO;
-		goto out;
-	}
-
-	vf_registered_client = client;
-
-	i40evf_client_prepare(client);
-
-	pr_info("i40evf: Registered client %s with return code %d\n",
-		client->name, ret);
-out:
-	return ret;
-}
-EXPORT_SYMBOL(i40evf_register_client);
-
-/**
- * i40evf_unregister_client - Unregister a i40e client driver with the L2 driver
- * @client: pointer to the i40e_client struct
- *
- * Returns 0 on success or non-0 on error
- **/
-int i40evf_unregister_client(struct i40e_client *client)
-{
-	int ret = 0;
-
-	/* When a unregister request comes through we would have to send
-	 * a close for each of the client instances that were opened.
-	 * client_release function is called to handle this.
-	 */
-	i40evf_client_release(client);
-
-	if (vf_registered_client != client) {
-		pr_info("i40evf: Client %s has not been registered\n",
-			client->name);
-		ret = -ENODEV;
-		goto out;
-	}
-	vf_registered_client = NULL;
-	pr_info("i40evf: Unregistered client %s\n", client->name);
-out:
-	return ret;
-}
-EXPORT_SYMBOL(i40evf_unregister_client);

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.h b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
deleted file mode 100644
index 5585f36..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.h
+++ /dev/null

@@ -1,169 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#ifndef _I40EVF_CLIENT_H_
-#define _I40EVF_CLIENT_H_
-
-#define I40EVF_CLIENT_STR_LENGTH 10
-
-/* Client interface version should be updated anytime there is a change in the
- * existing APIs or data structures.
- */
-#define I40EVF_CLIENT_VERSION_MAJOR 0
-#define I40EVF_CLIENT_VERSION_MINOR 01
-#define I40EVF_CLIENT_VERSION_BUILD 00
-#define I40EVF_CLIENT_VERSION_STR     \
-	__stringify(I40EVF_CLIENT_VERSION_MAJOR) "." \
-	__stringify(I40EVF_CLIENT_VERSION_MINOR) "." \
-	__stringify(I40EVF_CLIENT_VERSION_BUILD)
-
-struct i40e_client_version {
-	u8 major;
-	u8 minor;
-	u8 build;
-	u8 rsvd;
-};
-
-enum i40e_client_state {
-	__I40E_CLIENT_NULL,
-	__I40E_CLIENT_REGISTERED
-};
-
-enum i40e_client_instance_state {
-	__I40E_CLIENT_INSTANCE_NONE,
-	__I40E_CLIENT_INSTANCE_OPENED,
-};
-
-struct i40e_ops;
-struct i40e_client;
-
-/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
- * In order for us to keep the interface simple, SW will define a
- * unique type value for AEQ.
- */
-#define I40E_QUEUE_TYPE_PE_AEQ  0x80
-#define I40E_QUEUE_INVALID_IDX	0xFFFF
-
-struct i40e_qv_info {
-	u32 v_idx; /* msix_vector */
-	u16 ceq_idx;
-	u16 aeq_idx;
-	u8 itr_idx;
-};
-
-struct i40e_qvlist_info {
-	u32 num_vectors;
-	struct i40e_qv_info qv_info[1];
-};
-
-#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
-
-/* set of LAN parameters useful for clients managed by LAN */
-
-/* Struct to hold per priority info */
-struct i40e_prio_qos_params {
-	u16 qs_handle; /* qs handle for prio */
-	u8 tc; /* TC mapped to prio */
-	u8 reserved;
-};
-
-#define I40E_CLIENT_MAX_USER_PRIORITY        8
-/* Struct to hold Client QoS */
-struct i40e_qos_params {
-	struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
-};
-
-struct i40e_params {
-	struct i40e_qos_params qos;
-	u16 mtu;
-	u16 link_up; /* boolean */
-};
-
-/* Structure to hold LAN device info for a client device */
-struct i40e_info {
-	struct i40e_client_version version;
-	u8 lanmac[6];
-	struct net_device *netdev;
-	struct pci_dev *pcidev;
-	u8 __iomem *hw_addr;
-	u8 fid;	/* function id, PF id or VF id */
-#define I40E_CLIENT_FTYPE_PF 0
-#define I40E_CLIENT_FTYPE_VF 1
-	u8 ftype; /* function type, PF or VF */
-	void *vf; /* cast to i40evf_adapter */
-
-	/* All L2 params that could change during the life span of the device
-	 * and needs to be communicated to the client when they change
-	 */
-	struct i40e_params params;
-	struct i40e_ops *ops;
-
-	u16 msix_count;	 /* number of msix vectors*/
-	/* Array down below will be dynamically allocated based on msix_count */
-	struct msix_entry *msix_entries;
-	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
-};
-
-struct i40e_ops {
-	/* setup_q_vector_list enables queues with a particular vector */
-	int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
-			    struct i40e_qvlist_info *qv_info);
-
-	u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
-			     u8 *msg, u16 len);
-
-	/* If the PE Engine is unresponsive, RDMA driver can request a reset.*/
-	void (*request_reset)(struct i40e_info *ldev,
-			      struct i40e_client *client);
-};
-
-struct i40e_client_ops {
-	/* Should be called from register_client() or whenever the driver is
-	 * ready to create a specific client instance.
-	 */
-	int (*open)(struct i40e_info *ldev, struct i40e_client *client);
-
-	/* Should be closed when netdev is unavailable or when unregister
-	 * call comes in. If the close happens due to a reset, set the reset
-	 * bit to true.
-	 */
-	void (*close)(struct i40e_info *ldev, struct i40e_client *client,
-		      bool reset);
-
-	/* called when some l2 managed parameters changes - mss */
-	void (*l2_param_change)(struct i40e_info *ldev,
-				struct i40e_client *client,
-				struct i40e_params *params);
-
-	/* called when a message is received from the PF */
-	int (*virtchnl_receive)(struct i40e_info *ldev,
-				struct i40e_client *client,
-				u8 *msg, u16 len);
-};
-
-/* Client device */
-struct i40e_client_instance {
-	struct list_head list;
-	struct i40e_info lan_info;
-	struct i40e_client *client;
-	unsigned long  state;
-};
-
-struct i40e_client {
-	struct list_head list;		/* list of registered clients */
-	char name[I40EVF_CLIENT_STR_LENGTH];
-	struct i40e_client_version version;
-	unsigned long state;		/* client state */
-	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
-	u32 flags;
-#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
-#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
-	u8 type;
-#define I40E_CLIENT_IWARP 0
-	struct i40e_client_ops *ops;	/* client ops provided by the client */
-};
-
-/* used by clients */
-int i40evf_register_client(struct i40e_client *client);
-int i40evf_unregister_client(struct i40e_client *client);
-#endif /* _I40EVF_CLIENT_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
deleted file mode 100644
index 69efe0a..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ /dev/null

@@ -1,820 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-/* ethtool support for i40evf */
-#include "i40evf.h"
-
-#include <linux/uaccess.h>
-
-struct i40evf_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int stat_offset;
-};
-
-#define I40EVF_STAT(_name, _stat) { \
-	.stat_string = _name, \
-	.stat_offset = offsetof(struct i40evf_adapter, _stat) \
-}
-
-/* All stats are u64, so we don't need to track the size of the field. */
-static const struct i40evf_stats i40evf_gstrings_stats[] = {
-	I40EVF_STAT("rx_bytes", current_stats.rx_bytes),
-	I40EVF_STAT("rx_unicast", current_stats.rx_unicast),
-	I40EVF_STAT("rx_multicast", current_stats.rx_multicast),
-	I40EVF_STAT("rx_broadcast", current_stats.rx_broadcast),
-	I40EVF_STAT("rx_discards", current_stats.rx_discards),
-	I40EVF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol),
-	I40EVF_STAT("tx_bytes", current_stats.tx_bytes),
-	I40EVF_STAT("tx_unicast", current_stats.tx_unicast),
-	I40EVF_STAT("tx_multicast", current_stats.tx_multicast),
-	I40EVF_STAT("tx_broadcast", current_stats.tx_broadcast),
-	I40EVF_STAT("tx_discards", current_stats.tx_discards),
-	I40EVF_STAT("tx_errors", current_stats.tx_errors),
-};
-
-#define I40EVF_GLOBAL_STATS_LEN ARRAY_SIZE(i40evf_gstrings_stats)
-#define I40EVF_QUEUE_STATS_LEN(_dev) \
-	(((struct i40evf_adapter *)\
-		netdev_priv(_dev))->num_active_queues \
-		  * 2 * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
-#define I40EVF_STATS_LEN(_dev) \
-	(I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
-
-/* For now we have one and only one private flag and it is only defined
- * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
- * of leaving all this code sitting around empty we will strip it unless
- * our one private flag is actually available.
- */
-struct i40evf_priv_flags {
-	char flag_string[ETH_GSTRING_LEN];
-	u32 flag;
-	bool read_only;
-};
-
-#define I40EVF_PRIV_FLAG(_name, _flag, _read_only) { \
-	.flag_string = _name, \
-	.flag = _flag, \
-	.read_only = _read_only, \
-}
-
-static const struct i40evf_priv_flags i40evf_gstrings_priv_flags[] = {
-	I40EVF_PRIV_FLAG("legacy-rx", I40EVF_FLAG_LEGACY_RX, 0),
-};
-
-#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_gstrings_priv_flags)
-
-/**
- * i40evf_get_link_ksettings - Get Link Speed and Duplex settings
- * @netdev: network interface device structure
- * @cmd: ethtool command
- *
- * Reports speed/duplex settings. Because this is a VF, we don't know what
- * kind of link we really have, so we fake it.
- **/
-static int i40evf_get_link_ksettings(struct net_device *netdev,
-				     struct ethtool_link_ksettings *cmd)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	ethtool_link_ksettings_zero_link_mode(cmd, supported);
-	cmd->base.autoneg = AUTONEG_DISABLE;
-	cmd->base.port = PORT_NONE;
-	/* Set speed and duplex */
-	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
-		cmd->base.speed = SPEED_40000;
-		break;
-	case I40E_LINK_SPEED_25GB:
-#ifdef SPEED_25000
-		cmd->base.speed = SPEED_25000;
-#else
-		netdev_info(netdev,
-			    "Speed is 25G, display not supported by this version of ethtool.\n");
-#endif
-		break;
-	case I40E_LINK_SPEED_20GB:
-		cmd->base.speed = SPEED_20000;
-		break;
-	case I40E_LINK_SPEED_10GB:
-		cmd->base.speed = SPEED_10000;
-		break;
-	case I40E_LINK_SPEED_1GB:
-		cmd->base.speed = SPEED_1000;
-		break;
-	case I40E_LINK_SPEED_100MB:
-		cmd->base.speed = SPEED_100;
-		break;
-	default:
-		break;
-	}
-	cmd->base.duplex = DUPLEX_FULL;
-
-	return 0;
-}
-
-/**
- * i40evf_get_sset_count - Get length of string set
- * @netdev: network interface device structure
- * @sset: id of string set
- *
- * Reports size of string table. This driver only supports
- * strings for statistics.
- **/
-static int i40evf_get_sset_count(struct net_device *netdev, int sset)
-{
-	if (sset == ETH_SS_STATS)
-		return I40EVF_STATS_LEN(netdev);
-	else if (sset == ETH_SS_PRIV_FLAGS)
-		return I40EVF_PRIV_FLAGS_STR_LEN;
-	else
-		return -EINVAL;
-}
-
-/**
- * i40evf_get_ethtool_stats - report device statistics
- * @netdev: network interface device structure
- * @stats: ethtool statistics structure
- * @data: pointer to data buffer
- *
- * All statistics are added to the data buffer as an array of u64.
- **/
-static void i40evf_get_ethtool_stats(struct net_device *netdev,
-				     struct ethtool_stats *stats, u64 *data)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	unsigned int i, j;
-	char *p;
-
-	for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
-		p = (char *)adapter + i40evf_gstrings_stats[i].stat_offset;
-		data[i] =  *(u64 *)p;
-	}
-	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->tx_rings[j].stats.packets;
-		data[i++] = adapter->tx_rings[j].stats.bytes;
-	}
-	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->rx_rings[j].stats.packets;
-		data[i++] = adapter->rx_rings[j].stats.bytes;
-	}
-}
-
-/**
- * i40evf_get_strings - Get string set
- * @netdev: network interface device structure
- * @sset: id of string set
- * @data: buffer for string data
- *
- * Builds stats string table.
- **/
-static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u8 *p = data;
-	int i;
-
-	if (sset == ETH_SS_STATS) {
-		for (i = 0; i < (int)I40EVF_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, i40evf_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_active_queues; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_active_queues; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-	} else if (sset == ETH_SS_PRIV_FLAGS) {
-		for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40evf_gstrings_priv_flags[i].flag_string);
-			p += ETH_GSTRING_LEN;
-		}
-	}
-}
-
-/**
- * i40evf_get_priv_flags - report device private flags
- * @netdev: network interface device structure
- *
- * The get string set count and the string set should be matched for each
- * flag returned.  Add new strings for each flag to the i40e_gstrings_priv_flags
- * array.
- *
- * Returns a u32 bitmap of flags.
- **/
-static u32 i40evf_get_priv_flags(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u32 i, ret_flags = 0;
-
-	for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
-		const struct i40evf_priv_flags *priv_flags;
-
-		priv_flags = &i40evf_gstrings_priv_flags[i];
-
-		if (priv_flags->flag & adapter->flags)
-			ret_flags |= BIT(i);
-	}
-
-	return ret_flags;
-}
-
-/**
- * i40evf_set_priv_flags - set private flags
- * @netdev: network interface device structure
- * @flags: bit flags to be set
- **/
-static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u32 orig_flags, new_flags, changed_flags;
-	u32 i;
-
-	orig_flags = READ_ONCE(adapter->flags);
-	new_flags = orig_flags;
-
-	for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
-		const struct i40evf_priv_flags *priv_flags;
-
-		priv_flags = &i40evf_gstrings_priv_flags[i];
-
-		if (flags & BIT(i))
-			new_flags |= priv_flags->flag;
-		else
-			new_flags &= ~(priv_flags->flag);
-
-		if (priv_flags->read_only &&
-		    ((orig_flags ^ new_flags) & ~BIT(i)))
-			return -EOPNOTSUPP;
-	}
-
-	/* Before we finalize any flag changes, any checks which we need to
-	 * perform to determine if the new flags will be supported should go
-	 * here...
-	 */
-
-	/* Compare and exchange the new flags into place. If we failed, that
-	 * is if cmpxchg returns anything but the old value, this means
-	 * something else must have modified the flags variable since we
-	 * copied it. We'll just punt with an error and log something in the
-	 * message buffer.
-	 */
-	if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) {
-		dev_warn(&adapter->pdev->dev,
-			 "Unable to update adapter->flags as it was modified by another thread...\n");
-		return -EAGAIN;
-	}
-
-	changed_flags = orig_flags ^ new_flags;
-
-	/* Process any additional changes needed as a result of flag changes.
-	 * The changed_flags value reflects the list of bits that were changed
-	 * in the code above.
-	 */
-
-	/* issue a reset to force legacy-rx change to take effect */
-	if (changed_flags & I40EVF_FLAG_LEGACY_RX) {
-		if (netif_running(netdev)) {
-			adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
-			schedule_work(&adapter->reset_task);
-		}
-	}
-
-	return 0;
-}
-
-/**
- * i40evf_get_msglevel - Get debug message level
- * @netdev: network interface device structure
- *
- * Returns current debug message level.
- **/
-static u32 i40evf_get_msglevel(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	return adapter->msg_enable;
-}
-
-/**
- * i40evf_set_msglevel - Set debug message level
- * @netdev: network interface device structure
- * @data: message level
- *
- * Set current debug message level. Higher values cause the driver to
- * be noisier.
- **/
-static void i40evf_set_msglevel(struct net_device *netdev, u32 data)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	if (I40E_DEBUG_USER & data)
-		adapter->hw.debug_mask = data;
-	adapter->msg_enable = data;
-}
-
-/**
- * i40evf_get_drvinfo - Get driver info
- * @netdev: network interface device structure
- * @drvinfo: ethool driver info structure
- *
- * Returns information about the driver and device for display to the user.
- **/
-static void i40evf_get_drvinfo(struct net_device *netdev,
-			       struct ethtool_drvinfo *drvinfo)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	strlcpy(drvinfo->driver, i40evf_driver_name, 32);
-	strlcpy(drvinfo->version, i40evf_driver_version, 32);
-	strlcpy(drvinfo->fw_version, "N/A", 4);
-	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
-	drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN;
-}
-
-/**
- * i40evf_get_ringparam - Get ring parameters
- * @netdev: network interface device structure
- * @ring: ethtool ringparam structure
- *
- * Returns current ring parameters. TX and RX rings are reported separately,
- * but the number of rings is not reported.
- **/
-static void i40evf_get_ringparam(struct net_device *netdev,
-				 struct ethtool_ringparam *ring)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	ring->rx_max_pending = I40EVF_MAX_RXD;
-	ring->tx_max_pending = I40EVF_MAX_TXD;
-	ring->rx_pending = adapter->rx_desc_count;
-	ring->tx_pending = adapter->tx_desc_count;
-}
-
-/**
- * i40evf_set_ringparam - Set ring parameters
- * @netdev: network interface device structure
- * @ring: ethtool ringparam structure
- *
- * Sets ring parameters. TX and RX rings are controlled separately, but the
- * number of rings is not specified, so all rings get the same settings.
- **/
-static int i40evf_set_ringparam(struct net_device *netdev,
-				struct ethtool_ringparam *ring)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u32 new_rx_count, new_tx_count;
-
-	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
-		return -EINVAL;
-
-	new_tx_count = clamp_t(u32, ring->tx_pending,
-			       I40EVF_MIN_TXD,
-			       I40EVF_MAX_TXD);
-	new_tx_count = ALIGN(new_tx_count, I40EVF_REQ_DESCRIPTOR_MULTIPLE);
-
-	new_rx_count = clamp_t(u32, ring->rx_pending,
-			       I40EVF_MIN_RXD,
-			       I40EVF_MAX_RXD);
-	new_rx_count = ALIGN(new_rx_count, I40EVF_REQ_DESCRIPTOR_MULTIPLE);
-
-	/* if nothing to do return success */
-	if ((new_tx_count == adapter->tx_desc_count) &&
-	    (new_rx_count == adapter->rx_desc_count))
-		return 0;
-
-	adapter->tx_desc_count = new_tx_count;
-	adapter->rx_desc_count = new_rx_count;
-
-	if (netif_running(netdev)) {
-		adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
-		schedule_work(&adapter->reset_task);
-	}
-
-	return 0;
-}
-
-/**
- * __i40evf_get_coalesce - get per-queue coalesce settings
- * @netdev: the netdev to check
- * @ec: ethtool coalesce data structure
- * @queue: which queue to pick
- *
- * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
- * are per queue. If queue is <0 then we default to queue 0 as the
- * representative value.
- **/
-static int __i40evf_get_coalesce(struct net_device *netdev,
-				 struct ethtool_coalesce *ec,
-				 int queue)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_vsi *vsi = &adapter->vsi;
-	struct i40e_ring *rx_ring, *tx_ring;
-
-	ec->tx_max_coalesced_frames = vsi->work_limit;
-	ec->rx_max_coalesced_frames = vsi->work_limit;
-
-	/* Rx and Tx usecs per queue value. If user doesn't specify the
-	 * queue, return queue 0's value to represent.
-	 */
-	if (queue < 0)
-		queue = 0;
-	else if (queue >= adapter->num_active_queues)
-		return -EINVAL;
-
-	rx_ring = &adapter->rx_rings[queue];
-	tx_ring = &adapter->tx_rings[queue];
-
-	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
-		ec->use_adaptive_rx_coalesce = 1;
-
-	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
-		ec->use_adaptive_tx_coalesce = 1;
-
-	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
-
-	return 0;
-}
-
-/**
- * i40evf_get_coalesce - Get interrupt coalescing settings
- * @netdev: network interface device structure
- * @ec: ethtool coalesce structure
- *
- * Returns current coalescing settings. This is referred to elsewhere in the
- * driver as Interrupt Throttle Rate, as this is how the hardware describes
- * this functionality. Note that if per-queue settings have been modified this
- * only represents the settings of queue 0.
- **/
-static int i40evf_get_coalesce(struct net_device *netdev,
-			       struct ethtool_coalesce *ec)
-{
-	return __i40evf_get_coalesce(netdev, ec, -1);
-}
-
-/**
- * i40evf_get_per_queue_coalesce - get coalesce values for specific queue
- * @netdev: netdev to read
- * @ec: coalesce settings from ethtool
- * @queue: the queue to read
- *
- * Read specific queue's coalesce settings.
- **/
-static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
-					 u32 queue,
-					 struct ethtool_coalesce *ec)
-{
-	return __i40evf_get_coalesce(netdev, ec, queue);
-}
-
-/**
- * i40evf_set_itr_per_queue - set ITR values for specific queue
- * @adapter: the VF adapter struct to set values for
- * @ec: coalesce settings from ethtool
- * @queue: the queue to modify
- *
- * Change the ITR settings for a specific queue.
- **/
-static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
-				     struct ethtool_coalesce *ec,
-				     int queue)
-{
-	struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
-	struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
-	struct i40e_q_vector *q_vector;
-
-	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
-	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
-
-	rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
-	if (!ec->use_adaptive_rx_coalesce)
-		rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
-
-	tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
-	if (!ec->use_adaptive_tx_coalesce)
-		tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
-
-	q_vector = rx_ring->q_vector;
-	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
-
-	q_vector = tx_ring->q_vector;
-	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
-
-	/* The interrupt handler itself will take care of programming
-	 * the Tx and Rx ITR values based on the values we have entered
-	 * into the q_vector, no need to write the values now.
-	 */
-}
-
-/**
- * __i40evf_set_coalesce - set coalesce settings for particular queue
- * @netdev: the netdev to change
- * @ec: ethtool coalesce settings
- * @queue: the queue to change
- *
- * Sets the coalesce settings for a particular queue.
- **/
-static int __i40evf_set_coalesce(struct net_device *netdev,
-				 struct ethtool_coalesce *ec,
-				 int queue)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_vsi *vsi = &adapter->vsi;
-	int i;
-
-	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
-		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
-	if (ec->rx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_rx_coalesce)
-			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-	} else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
-		   (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
-		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
-		return -EINVAL;
-	}
-
-	else
-	if (ec->tx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_tx_coalesce)
-			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-	} else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
-		   (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
-		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
-		return -EINVAL;
-	}
-
-	/* Rx and Tx usecs has per queue value. If user doesn't specify the
-	 * queue, apply to all queues.
-	 */
-	if (queue < 0) {
-		for (i = 0; i < adapter->num_active_queues; i++)
-			i40evf_set_itr_per_queue(adapter, ec, i);
-	} else if (queue < adapter->num_active_queues) {
-		i40evf_set_itr_per_queue(adapter, ec, queue);
-	} else {
-		netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
-			   adapter->num_active_queues - 1);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * i40evf_set_coalesce - Set interrupt coalescing settings
- * @netdev: network interface device structure
- * @ec: ethtool coalesce structure
- *
- * Change current coalescing settings for every queue.
- **/
-static int i40evf_set_coalesce(struct net_device *netdev,
-			       struct ethtool_coalesce *ec)
-{
-	return __i40evf_set_coalesce(netdev, ec, -1);
-}
-
-/**
- * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings
- * @netdev: the netdev to change
- * @ec: ethtool's coalesce settings
- * @queue: the queue to modify
- *
- * Modifies a specific queue's coalesce settings.
- */
-static int i40evf_set_per_queue_coalesce(struct net_device *netdev,
-					 u32 queue,
-					 struct ethtool_coalesce *ec)
-{
-	return __i40evf_set_coalesce(netdev, ec, queue);
-}
-
-/**
- * i40evf_get_rxnfc - command to get RX flow classification rules
- * @netdev: network interface device structure
- * @cmd: ethtool rxnfc command
- * @rule_locs: pointer to store rule locations
- *
- * Returns Success if the command is supported.
- **/
-static int i40evf_get_rxnfc(struct net_device *netdev,
-			    struct ethtool_rxnfc *cmd,
-			    u32 *rule_locs)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int ret = -EOPNOTSUPP;
-
-	switch (cmd->cmd) {
-	case ETHTOOL_GRXRINGS:
-		cmd->data = adapter->num_active_queues;
-		ret = 0;
-		break;
-	case ETHTOOL_GRXFH:
-		netdev_info(netdev,
-			    "RSS hash info is not available to vf, use pf.\n");
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-/**
- * i40evf_get_channels: get the number of channels supported by the device
- * @netdev: network interface device structure
- * @ch: channel information structure
- *
- * For the purposes of our device, we only use combined channels, i.e. a tx/rx
- * queue pair. Report one extra channel to match our "other" MSI-X vector.
- **/
-static void i40evf_get_channels(struct net_device *netdev,
-				struct ethtool_channels *ch)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	/* Report maximum channels */
-	ch->max_combined = I40EVF_MAX_REQ_QUEUES;
-
-	ch->max_other = NONQ_VECS;
-	ch->other_count = NONQ_VECS;
-
-	ch->combined_count = adapter->num_active_queues;
-}
-
-/**
- * i40evf_set_channels: set the new channel count
- * @netdev: network interface device structure
- * @ch: channel information structure
- *
- * Negotiate a new number of channels with the PF then do a reset.  During
- * reset we'll realloc queues and fix the RSS table.  Returns 0 on success,
- * negative on failure.
- **/
-static int i40evf_set_channels(struct net_device *netdev,
-			       struct ethtool_channels *ch)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int num_req = ch->combined_count;
-
-	if (num_req != adapter->num_active_queues &&
-	    !(adapter->vf_res->vf_cap_flags &
-	      VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) {
-		dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n");
-		return -EINVAL;
-	}
-
-	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
-	    adapter->num_tc) {
-		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
-		return -EINVAL;
-	}
-
-	/* All of these should have already been checked by ethtool before this
-	 * even gets to us, but just to be sure.
-	 */
-	if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES)
-		return -EINVAL;
-
-	if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS)
-		return -EINVAL;
-
-	adapter->num_req_queues = num_req;
-	return i40evf_request_queues(adapter, num_req);
-}
-
-/**
- * i40evf_get_rxfh_key_size - get the RSS hash key size
- * @netdev: network interface device structure
- *
- * Returns the table size.
- **/
-static u32 i40evf_get_rxfh_key_size(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	return adapter->rss_key_size;
-}
-
-/**
- * i40evf_get_rxfh_indir_size - get the rx flow hash indirection table size
- * @netdev: network interface device structure
- *
- * Returns the table size.
- **/
-static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	return adapter->rss_lut_size;
-}
-
-/**
- * i40evf_get_rxfh - get the rx flow hash indirection table
- * @netdev: network interface device structure
- * @indir: indirection table
- * @key: hash key
- * @hfunc: hash function in use
- *
- * Reads the indirection table directly from the hardware. Always returns 0.
- **/
-static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
-			   u8 *hfunc)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u16 i;
-
-	if (hfunc)
-		*hfunc = ETH_RSS_HASH_TOP;
-	if (!indir)
-		return 0;
-
-	memcpy(key, adapter->rss_key, adapter->rss_key_size);
-
-	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
-	for (i = 0; i < adapter->rss_lut_size; i++)
-		indir[i] = (u32)adapter->rss_lut[i];
-
-	return 0;
-}
-
-/**
- * i40evf_set_rxfh - set the rx flow hash indirection table
- * @netdev: network interface device structure
- * @indir: indirection table
- * @key: hash key
- * @hfunc: hash function to use
- *
- * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
- * returns 0 after programming the table.
- **/
-static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
-			   const u8 *key, const u8 hfunc)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u16 i;
-
-	/* We do not allow change in unsupported parameters */
-	if (key ||
-	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
-		return -EOPNOTSUPP;
-	if (!indir)
-		return 0;
-
-	if (key) {
-		memcpy(adapter->rss_key, key, adapter->rss_key_size);
-	}
-
-	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
-	for (i = 0; i < adapter->rss_lut_size; i++)
-		adapter->rss_lut[i] = (u8)(indir[i]);
-
-	return i40evf_config_rss(adapter);
-}
-
-static const struct ethtool_ops i40evf_ethtool_ops = {
-	.get_drvinfo		= i40evf_get_drvinfo,
-	.get_link		= ethtool_op_get_link,
-	.get_ringparam		= i40evf_get_ringparam,
-	.set_ringparam		= i40evf_set_ringparam,
-	.get_strings		= i40evf_get_strings,
-	.get_ethtool_stats	= i40evf_get_ethtool_stats,
-	.get_sset_count		= i40evf_get_sset_count,
-	.get_priv_flags		= i40evf_get_priv_flags,
-	.set_priv_flags		= i40evf_set_priv_flags,
-	.get_msglevel		= i40evf_get_msglevel,
-	.set_msglevel		= i40evf_set_msglevel,
-	.get_coalesce		= i40evf_get_coalesce,
-	.set_coalesce		= i40evf_set_coalesce,
-	.get_per_queue_coalesce = i40evf_get_per_queue_coalesce,
-	.set_per_queue_coalesce = i40evf_set_per_queue_coalesce,
-	.get_rxnfc		= i40evf_get_rxnfc,
-	.get_rxfh_indir_size	= i40evf_get_rxfh_indir_size,
-	.get_rxfh		= i40evf_get_rxfh,
-	.set_rxfh		= i40evf_set_rxfh,
-	.get_channels		= i40evf_get_channels,
-	.set_channels		= i40evf_set_channels,
-	.get_rxfh_key_size	= i40evf_get_rxfh_key_size,
-	.get_link_ksettings	= i40evf_get_link_ksettings,
-};
-
-/**
- * i40evf_set_ethtool_ops - Initialize ethtool ops struct
- * @netdev: network interface device structure
- *
- * Sets ethtool ops struct in our netdev so that ethtool can call
- * our functions.
- **/
-void i40evf_set_ethtool_ops(struct net_device *netdev)
-{
-	netdev->ethtool_ops = &i40evf_ethtool_ops;
-}

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
deleted file mode 100644
index fef6d89..0000000
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ /dev/null

@@ -1,3982 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
-
-#include "i40evf.h"
-#include "i40e_prototype.h"
-#include "i40evf_client.h"
-/* All i40evf tracepoints are defined by the include below, which must
- * be included exactly once across the whole kernel with
- * CREATE_TRACE_POINTS defined
- */
-#define CREATE_TRACE_POINTS
-#include "i40e_trace.h"
-
-static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter);
-static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter);
-static int i40evf_close(struct net_device *netdev);
-
-char i40evf_driver_name[] = "i40evf";
-static const char i40evf_driver_string[] =
-	"Intel(R) 40-10 Gigabit Virtual Function Network Driver";
-
-#define DRV_KERN "-k"
-
-#define DRV_VERSION_MAJOR 3
-#define DRV_VERSION_MINOR 2
-#define DRV_VERSION_BUILD 2
-#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
-	     __stringify(DRV_VERSION_MINOR) "." \
-	     __stringify(DRV_VERSION_BUILD) \
-	     DRV_KERN
-const char i40evf_driver_version[] = DRV_VERSION;
-static const char i40evf_copyright[] =
-	"Copyright (c) 2013 - 2015 Intel Corporation.";
-
-/* i40evf_pci_tbl - PCI Device ID Table
- *
- * Wildcard entries (PCI_ANY_ID) should come last
- * Last entry must be all 0s
- *
- * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
- *   Class, Class Mask, private data (not used) }
- */
-static const struct pci_device_id i40evf_pci_tbl[] = {
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0},
-	{PCI_VDEVICE(INTEL, I40E_DEV_ID_ADAPTIVE_VF), 0},
-	/* required last entry */
-	{0, }
-};
-
-MODULE_DEVICE_TABLE(pci, i40evf_pci_tbl);
-
-MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
-MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static struct workqueue_struct *i40evf_wq;
-
-/**
- * i40evf_allocate_dma_mem_d - OS specific memory alloc for shared code
- * @hw:   pointer to the HW structure
- * @mem:  ptr to mem struct to fill out
- * @size: size of memory requested
- * @alignment: what to align the allocation to
- **/
-i40e_status i40evf_allocate_dma_mem_d(struct i40e_hw *hw,
-				      struct i40e_dma_mem *mem,
-				      u64 size, u32 alignment)
-{
-	struct i40evf_adapter *adapter = (struct i40evf_adapter *)hw->back;
-
-	if (!mem)
-		return I40E_ERR_PARAM;
-
-	mem->size = ALIGN(size, alignment);
-	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
-				     (dma_addr_t *)&mem->pa, GFP_KERNEL);
-	if (mem->va)
-		return 0;
-	else
-		return I40E_ERR_NO_MEMORY;
-}
-
-/**
- * i40evf_free_dma_mem_d - OS specific memory free for shared code
- * @hw:   pointer to the HW structure
- * @mem:  ptr to mem struct to free
- **/
-i40e_status i40evf_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
-{
-	struct i40evf_adapter *adapter = (struct i40evf_adapter *)hw->back;
-
-	if (!mem || !mem->va)
-		return I40E_ERR_PARAM;
-	dma_free_coherent(&adapter->pdev->dev, mem->size,
-			  mem->va, (dma_addr_t)mem->pa);
-	return 0;
-}
-
-/**
- * i40evf_allocate_virt_mem_d - OS specific memory alloc for shared code
- * @hw:   pointer to the HW structure
- * @mem:  ptr to mem struct to fill out
- * @size: size of memory requested
- **/
-i40e_status i40evf_allocate_virt_mem_d(struct i40e_hw *hw,
-				       struct i40e_virt_mem *mem, u32 size)
-{
-	if (!mem)
-		return I40E_ERR_PARAM;
-
-	mem->size = size;
-	mem->va = kzalloc(size, GFP_KERNEL);
-
-	if (mem->va)
-		return 0;
-	else
-		return I40E_ERR_NO_MEMORY;
-}
-
-/**
- * i40evf_free_virt_mem_d - OS specific memory free for shared code
- * @hw:   pointer to the HW structure
- * @mem:  ptr to mem struct to free
- **/
-i40e_status i40evf_free_virt_mem_d(struct i40e_hw *hw,
-				   struct i40e_virt_mem *mem)
-{
-	if (!mem)
-		return I40E_ERR_PARAM;
-
-	/* it's ok to kfree a NULL pointer */
-	kfree(mem->va);
-
-	return 0;
-}
-
-/**
- * i40evf_debug_d - OS dependent version of debug printing
- * @hw:  pointer to the HW structure
- * @mask: debug level mask
- * @fmt_str: printf-type format description
- **/
-void i40evf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
-{
-	char buf[512];
-	va_list argptr;
-
-	if (!(mask & ((struct i40e_hw *)hw)->debug_mask))
-		return;
-
-	va_start(argptr, fmt_str);
-	vsnprintf(buf, sizeof(buf), fmt_str, argptr);
-	va_end(argptr);
-
-	/* the debug string is already formatted with a newline */
-	pr_info("%s", buf);
-}
-
-/**
- * i40evf_schedule_reset - Set the flags and schedule a reset event
- * @adapter: board private structure
- **/
-void i40evf_schedule_reset(struct i40evf_adapter *adapter)
-{
-	if (!(adapter->flags &
-	      (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED))) {
-		adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
-		schedule_work(&adapter->reset_task);
-	}
-}
-
-/**
- * i40evf_tx_timeout - Respond to a Tx Hang
- * @netdev: network interface device structure
- **/
-static void i40evf_tx_timeout(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	adapter->tx_timeout_count++;
-	i40evf_schedule_reset(adapter);
-}
-
-/**
- * i40evf_misc_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- **/
-static void i40evf_misc_irq_disable(struct i40evf_adapter *adapter)
-{
-	struct i40e_hw *hw = &adapter->hw;
-
-	if (!adapter->msix_entries)
-		return;
-
-	wr32(hw, I40E_VFINT_DYN_CTL01, 0);
-
-	/* read flush */
-	rd32(hw, I40E_VFGEN_RSTAT);
-
-	synchronize_irq(adapter->msix_entries[0].vector);
-}
-
-/**
- * i40evf_misc_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- **/
-static void i40evf_misc_irq_enable(struct i40evf_adapter *adapter)
-{
-	struct i40e_hw *hw = &adapter->hw;
-
-	wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK |
-				       I40E_VFINT_DYN_CTL01_ITR_INDX_MASK);
-	wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK);
-
-	/* read flush */
-	rd32(hw, I40E_VFGEN_RSTAT);
-}
-
-/**
- * i40evf_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- **/
-static void i40evf_irq_disable(struct i40evf_adapter *adapter)
-{
-	int i;
-	struct i40e_hw *hw = &adapter->hw;
-
-	if (!adapter->msix_entries)
-		return;
-
-	for (i = 1; i < adapter->num_msix_vectors; i++) {
-		wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), 0);
-		synchronize_irq(adapter->msix_entries[i].vector);
-	}
-	/* read flush */
-	rd32(hw, I40E_VFGEN_RSTAT);
-}
-
-/**
- * i40evf_irq_enable_queues - Enable interrupt for specified queues
- * @adapter: board private structure
- * @mask: bitmap of queues to enable
- **/
-void i40evf_irq_enable_queues(struct i40evf_adapter *adapter, u32 mask)
-{
-	struct i40e_hw *hw = &adapter->hw;
-	int i;
-
-	for (i = 1; i < adapter->num_msix_vectors; i++) {
-		if (mask & BIT(i - 1)) {
-			wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1),
-			     I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-			     I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK);
-		}
-	}
-}
-
-/**
- * i40evf_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- * @flush: boolean value whether to run rd32()
- **/
-void i40evf_irq_enable(struct i40evf_adapter *adapter, bool flush)
-{
-	struct i40e_hw *hw = &adapter->hw;
-
-	i40evf_misc_irq_enable(adapter);
-	i40evf_irq_enable_queues(adapter, ~0);
-
-	if (flush)
-		rd32(hw, I40E_VFGEN_RSTAT);
-}
-
-/**
- * i40evf_msix_aq - Interrupt handler for vector 0
- * @irq: interrupt number
- * @data: pointer to netdev
- **/
-static irqreturn_t i40evf_msix_aq(int irq, void *data)
-{
-	struct net_device *netdev = data;
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_hw *hw = &adapter->hw;
-
-	/* handle non-queue interrupts, these reads clear the registers */
-	rd32(hw, I40E_VFINT_ICR01);
-	rd32(hw, I40E_VFINT_ICR0_ENA1);
-
-	/* schedule work on the private workqueue */
-	schedule_work(&adapter->adminq_task);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * i40evf_msix_clean_rings - MSIX mode Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a q_vector
- **/
-static irqreturn_t i40evf_msix_clean_rings(int irq, void *data)
-{
-	struct i40e_q_vector *q_vector = data;
-
-	if (!q_vector->tx.ring && !q_vector->rx.ring)
-		return IRQ_HANDLED;
-
-	napi_schedule_irqoff(&q_vector->napi);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * i40evf_map_vector_to_rxq - associate irqs with rx queues
- * @adapter: board private structure
- * @v_idx: interrupt number
- * @r_idx: queue number
- **/
-static void
-i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
-{
-	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
-	struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx];
-	struct i40e_hw *hw = &adapter->hw;
-
-	rx_ring->q_vector = q_vector;
-	rx_ring->next = q_vector->rx.ring;
-	rx_ring->vsi = &adapter->vsi;
-	q_vector->rx.ring = rx_ring;
-	q_vector->rx.count++;
-	q_vector->rx.next_update = jiffies + 1;
-	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
-	q_vector->ring_mask |= BIT(r_idx);
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
-	     q_vector->rx.current_itr);
-	q_vector->rx.current_itr = q_vector->rx.target_itr;
-}
-
-/**
- * i40evf_map_vector_to_txq - associate irqs with tx queues
- * @adapter: board private structure
- * @v_idx: interrupt number
- * @t_idx: queue number
- **/
-static void
-i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
-{
-	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
-	struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx];
-	struct i40e_hw *hw = &adapter->hw;
-
-	tx_ring->q_vector = q_vector;
-	tx_ring->next = q_vector->tx.ring;
-	tx_ring->vsi = &adapter->vsi;
-	q_vector->tx.ring = tx_ring;
-	q_vector->tx.count++;
-	q_vector->tx.next_update = jiffies + 1;
-	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
-	q_vector->num_ringpairs++;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
-	     q_vector->tx.target_itr);
-	q_vector->tx.current_itr = q_vector->tx.target_itr;
-}
-
-/**
- * i40evf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
-{
-	int rings_remaining = adapter->num_active_queues;
-	int ridx = 0, vidx = 0;
-	int q_vectors;
-
-	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-
-	for (; ridx < rings_remaining; ridx++) {
-		i40evf_map_vector_to_rxq(adapter, vidx, ridx);
-		i40evf_map_vector_to_txq(adapter, vidx, ridx);
-
-		/* In the case where we have more queues than vectors, continue
-		 * round-robin on vectors until all queues are mapped.
-		 */
-		if (++vidx >= q_vectors)
-			vidx = 0;
-	}
-
-	adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
-}
-
-/**
- * i40evf_irq_affinity_notify - Callback for affinity changes
- * @notify: context as to what irq was changed
- * @mask: the new affinity mask
- *
- * This is a callback function used by the irq_set_affinity_notifier function
- * so that we may register to receive changes to the irq affinity masks.
- **/
-static void i40evf_irq_affinity_notify(struct irq_affinity_notify *notify,
-				       const cpumask_t *mask)
-{
-	struct i40e_q_vector *q_vector =
-		container_of(notify, struct i40e_q_vector, affinity_notify);
-
-	cpumask_copy(&q_vector->affinity_mask, mask);
-}
-
-/**
- * i40evf_irq_affinity_release - Callback for affinity notifier release
- * @ref: internal core kernel usage
- *
- * This is a callback function used by the irq_set_affinity_notifier function
- * to inform the current notification subscriber that they will no longer
- * receive notifications.
- **/
-static void i40evf_irq_affinity_release(struct kref *ref) {}
-
-/**
- * i40evf_request_traffic_irqs - Initialize MSI-X interrupts
- * @adapter: board private structure
- * @basename: device basename
- *
- * Allocates MSI-X vectors for tx and rx handling, and requests
- * interrupts from the kernel.
- **/
-static int
-i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
-{
-	unsigned int vector, q_vectors;
-	unsigned int rx_int_idx = 0, tx_int_idx = 0;
-	int irq_num, err;
-	int cpu;
-
-	i40evf_irq_disable(adapter);
-	/* Decrement for Other and TCP Timer vectors */
-	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-
-	for (vector = 0; vector < q_vectors; vector++) {
-		struct i40e_q_vector *q_vector = &adapter->q_vectors[vector];
-		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
-
-		if (q_vector->tx.ring && q_vector->rx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name),
-				 "i40evf-%s-TxRx-%d", basename, rx_int_idx++);
-			tx_int_idx++;
-		} else if (q_vector->rx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name),
-				 "i40evf-%s-rx-%d", basename, rx_int_idx++);
-		} else if (q_vector->tx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name),
-				 "i40evf-%s-tx-%d", basename, tx_int_idx++);
-		} else {
-			/* skip this unused q_vector */
-			continue;
-		}
-		err = request_irq(irq_num,
-				  i40evf_msix_clean_rings,
-				  0,
-				  q_vector->name,
-				  q_vector);
-		if (err) {
-			dev_info(&adapter->pdev->dev,
-				 "Request_irq failed, error: %d\n", err);
-			goto free_queue_irqs;
-		}
-		/* register for affinity change notifications */
-		q_vector->affinity_notify.notify = i40evf_irq_affinity_notify;
-		q_vector->affinity_notify.release =
-						   i40evf_irq_affinity_release;
-		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-		/* Spread the IRQ affinity hints across online CPUs. Note that
-		 * get_cpu_mask returns a mask with a permanent lifetime so
-		 * it's safe to use as a hint for irq_set_affinity_hint.
-		 */
-		cpu = cpumask_local_spread(q_vector->v_idx, -1);
-		irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
-	}
-
-	return 0;
-
-free_queue_irqs:
-	while (vector) {
-		vector--;
-		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
-		irq_set_affinity_notifier(irq_num, NULL);
-		irq_set_affinity_hint(irq_num, NULL);
-		free_irq(irq_num, &adapter->q_vectors[vector]);
-	}
-	return err;
-}
-
-/**
- * i40evf_request_misc_irq - Initialize MSI-X interrupts
- * @adapter: board private structure
- *
- * Allocates MSI-X vector 0 and requests interrupts from the kernel. This
- * vector is only for the admin queue, and stays active even when the netdev
- * is closed.
- **/
-static int i40evf_request_misc_irq(struct i40evf_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int err;
-
-	snprintf(adapter->misc_vector_name,
-		 sizeof(adapter->misc_vector_name) - 1, "i40evf-%s:mbx",
-		 dev_name(&adapter->pdev->dev));
-	err = request_irq(adapter->msix_entries[0].vector,
-			  &i40evf_msix_aq, 0,
-			  adapter->misc_vector_name, netdev);
-	if (err) {
-		dev_err(&adapter->pdev->dev,
-			"request_irq for %s failed: %d\n",
-			adapter->misc_vector_name, err);
-		free_irq(adapter->msix_entries[0].vector, netdev);
-	}
-	return err;
-}
-
-/**
- * i40evf_free_traffic_irqs - Free MSI-X interrupts
- * @adapter: board private structure
- *
- * Frees all MSI-X vectors other than 0.
- **/
-static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter)
-{
-	int vector, irq_num, q_vectors;
-
-	if (!adapter->msix_entries)
-		return;
-
-	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-
-	for (vector = 0; vector < q_vectors; vector++) {
-		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
-		irq_set_affinity_notifier(irq_num, NULL);
-		irq_set_affinity_hint(irq_num, NULL);
-		free_irq(irq_num, &adapter->q_vectors[vector]);
-	}
-}
-
-/**
- * i40evf_free_misc_irq - Free MSI-X miscellaneous vector
- * @adapter: board private structure
- *
- * Frees MSI-X vector 0.
- **/
-static void i40evf_free_misc_irq(struct i40evf_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-
-	if (!adapter->msix_entries)
-		return;
-
-	free_irq(adapter->msix_entries[0].vector, netdev);
-}
-
-/**
- * i40evf_configure_tx - Configure Transmit Unit after Reset
- * @adapter: board private structure
- *
- * Configure the Tx unit of the MAC after a reset.
- **/
-static void i40evf_configure_tx(struct i40evf_adapter *adapter)
-{
-	struct i40e_hw *hw = &adapter->hw;
-	int i;
-
-	for (i = 0; i < adapter->num_active_queues; i++)
-		adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i);
-}
-
-/**
- * i40evf_configure_rx - Configure Receive Unit after Reset
- * @adapter: board private structure
- *
- * Configure the Rx unit of the MAC after a reset.
- **/
-static void i40evf_configure_rx(struct i40evf_adapter *adapter)
-{
-	unsigned int rx_buf_len = I40E_RXBUFFER_2048;
-	struct i40e_hw *hw = &adapter->hw;
-	int i;
-
-	/* Legacy Rx will always default to a 2048 buffer size. */
-#if (PAGE_SIZE < 8192)
-	if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) {
-		struct net_device *netdev = adapter->netdev;
-
-		/* For jumbo frames on systems with 4K pages we have to use
-		 * an order 1 page, so we might as well increase the size
-		 * of our Rx buffer to make better use of the available space
-		 */
-		rx_buf_len = I40E_RXBUFFER_3072;
-
-		/* We use a 1536 buffer size for configurations with
-		 * standard Ethernet mtu.  On x86 this gives us enough room
-		 * for shared info and 192 bytes of padding.
-		 */
-		if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
-		    (netdev->mtu <= ETH_DATA_LEN))
-			rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
-	}
-#endif
-
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
-		adapter->rx_rings[i].rx_buf_len = rx_buf_len;
-
-		if (adapter->flags & I40EVF_FLAG_LEGACY_RX)
-			clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
-		else
-			set_ring_build_skb_enabled(&adapter->rx_rings[i]);
-	}
-}
-
-/**
- * i40evf_find_vlan - Search filter list for specific vlan filter
- * @adapter: board private structure
- * @vlan: vlan tag
- *
- * Returns ptr to the filter object or NULL. Must be called while holding the
- * mac_vlan_list_lock.
- **/
-static struct
-i40evf_vlan_filter *i40evf_find_vlan(struct i40evf_adapter *adapter, u16 vlan)
-{
-	struct i40evf_vlan_filter *f;
-
-	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-		if (vlan == f->vlan)
-			return f;
-	}
-	return NULL;
-}
-
-/**
- * i40evf_add_vlan - Add a vlan filter to the list
- * @adapter: board private structure
- * @vlan: VLAN tag
- *
- * Returns ptr to the filter object or NULL when no memory available.
- **/
-static struct
-i40evf_vlan_filter *i40evf_add_vlan(struct i40evf_adapter *adapter, u16 vlan)
-{
-	struct i40evf_vlan_filter *f = NULL;
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	f = i40evf_find_vlan(adapter, vlan);
-	if (!f) {
-		f = kzalloc(sizeof(*f), GFP_KERNEL);
-		if (!f)
-			goto clearout;
-
-		f->vlan = vlan;
-
-		INIT_LIST_HEAD(&f->list);
-		list_add(&f->list, &adapter->vlan_filter_list);
-		f->add = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
-	}
-
-clearout:
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-	return f;
-}
-
-/**
- * i40evf_del_vlan - Remove a vlan filter from the list
- * @adapter: board private structure
- * @vlan: VLAN tag
- **/
-static void i40evf_del_vlan(struct i40evf_adapter *adapter, u16 vlan)
-{
-	struct i40evf_vlan_filter *f;
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	f = i40evf_find_vlan(adapter, vlan);
-	if (f) {
-		f->remove = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
-	}
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-}
-
-/**
- * i40evf_vlan_rx_add_vid - Add a VLAN filter to a device
- * @netdev: network device struct
- * @proto: unused protocol data
- * @vid: VLAN tag
- **/
-static int i40evf_vlan_rx_add_vid(struct net_device *netdev,
-				  __always_unused __be16 proto, u16 vid)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	if (!VLAN_ALLOWED(adapter))
-		return -EIO;
-	if (i40evf_add_vlan(adapter, vid) == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-/**
- * i40evf_vlan_rx_kill_vid - Remove a VLAN filter from a device
- * @netdev: network device struct
- * @proto: unused protocol data
- * @vid: VLAN tag
- **/
-static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
-				   __always_unused __be16 proto, u16 vid)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	if (VLAN_ALLOWED(adapter)) {
-		i40evf_del_vlan(adapter, vid);
-		return 0;
-	}
-	return -EIO;
-}
-
-/**
- * i40evf_find_filter - Search filter list for specific mac filter
- * @adapter: board private structure
- * @macaddr: the MAC address
- *
- * Returns ptr to the filter object or NULL. Must be called while holding the
- * mac_vlan_list_lock.
- **/
-static struct
-i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
-				      const u8 *macaddr)
-{
-	struct i40evf_mac_filter *f;
-
-	if (!macaddr)
-		return NULL;
-
-	list_for_each_entry(f, &adapter->mac_filter_list, list) {
-		if (ether_addr_equal(macaddr, f->macaddr))
-			return f;
-	}
-	return NULL;
-}
-
-/**
- * i40e_add_filter - Add a mac filter to the filter list
- * @adapter: board private structure
- * @macaddr: the MAC address
- *
- * Returns ptr to the filter object or NULL when no memory available.
- **/
-static struct
-i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
-				     const u8 *macaddr)
-{
-	struct i40evf_mac_filter *f;
-
-	if (!macaddr)
-		return NULL;
-
-	f = i40evf_find_filter(adapter, macaddr);
-	if (!f) {
-		f = kzalloc(sizeof(*f), GFP_ATOMIC);
-		if (!f)
-			return f;
-
-		ether_addr_copy(f->macaddr, macaddr);
-
-		list_add_tail(&f->list, &adapter->mac_filter_list);
-		f->add = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
-	} else {
-		f->remove = false;
-	}
-
-	return f;
-}
-
-/**
- * i40evf_set_mac - NDO callback to set port mac address
- * @netdev: network interface device structure
- * @p: pointer to an address structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int i40evf_set_mac(struct net_device *netdev, void *p)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40evf_mac_filter *f;
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	if (ether_addr_equal(netdev->dev_addr, addr->sa_data))
-		return 0;
-
-	if (adapter->flags & I40EVF_FLAG_ADDR_SET_BY_PF)
-		return -EPERM;
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	f = i40evf_find_filter(adapter, hw->mac.addr);
-	if (f) {
-		f->remove = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-	}
-
-	f = i40evf_add_filter(adapter, addr->sa_data);
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	if (f) {
-		ether_addr_copy(hw->mac.addr, addr->sa_data);
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
-	}
-
-	return (f == NULL) ? -ENOMEM : 0;
-}
-
-/**
- * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
- * @netdev: the netdevice
- * @addr: address to add
- *
- * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
- * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
- */
-static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	if (i40evf_add_filter(adapter, addr))
-		return 0;
-	else
-		return -ENOMEM;
-}
-
-/**
- * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
- * @netdev: the netdevice
- * @addr: address to add
- *
- * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
- * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
- */
-static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40evf_mac_filter *f;
-
-	/* Under some circumstances, we might receive a request to delete
-	 * our own device address from our uc list. Because we store the
-	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
-	 * such requests and not delete our device address from this list.
-	 */
-	if (ether_addr_equal(addr, netdev->dev_addr))
-		return 0;
-
-	f = i40evf_find_filter(adapter, addr);
-	if (f) {
-		f->remove = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-	}
-	return 0;
-}
-
-/**
- * i40evf_set_rx_mode - NDO callback to set the netdev filters
- * @netdev: network interface device structure
- **/
-static void i40evf_set_rx_mode(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-	__dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
-	__dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	if (netdev->flags & IFF_PROMISC &&
-	    !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
-		adapter->aq_required |= I40EVF_FLAG_AQ_REQUEST_PROMISC;
-	else if (!(netdev->flags & IFF_PROMISC) &&
-		 adapter->flags & I40EVF_FLAG_PROMISC_ON)
-		adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_PROMISC;
-
-	if (netdev->flags & IFF_ALLMULTI &&
-	    !(adapter->flags & I40EVF_FLAG_ALLMULTI_ON))
-		adapter->aq_required |= I40EVF_FLAG_AQ_REQUEST_ALLMULTI;
-	else if (!(netdev->flags & IFF_ALLMULTI) &&
-		 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
-		adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-}
-
-/**
- * i40evf_napi_enable_all - enable NAPI on all queue vectors
- * @adapter: board private structure
- **/
-static void i40evf_napi_enable_all(struct i40evf_adapter *adapter)
-{
-	int q_idx;
-	struct i40e_q_vector *q_vector;
-	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-
-	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
-		struct napi_struct *napi;
-
-		q_vector = &adapter->q_vectors[q_idx];
-		napi = &q_vector->napi;
-		napi_enable(napi);
-	}
-}
-
-/**
- * i40evf_napi_disable_all - disable NAPI on all queue vectors
- * @adapter: board private structure
- **/
-static void i40evf_napi_disable_all(struct i40evf_adapter *adapter)
-{
-	int q_idx;
-	struct i40e_q_vector *q_vector;
-	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-
-	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
-		q_vector = &adapter->q_vectors[q_idx];
-		napi_disable(&q_vector->napi);
-	}
-}
-
-/**
- * i40evf_configure - set up transmit and receive data structures
- * @adapter: board private structure
- **/
-static void i40evf_configure(struct i40evf_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int i;
-
-	i40evf_set_rx_mode(netdev);
-
-	i40evf_configure_tx(adapter);
-	i40evf_configure_rx(adapter);
-	adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES;
-
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		struct i40e_ring *ring = &adapter->rx_rings[i];
-
-		i40evf_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
-	}
-}
-
-/**
- * i40evf_up_complete - Finish the last steps of bringing up a connection
- * @adapter: board private structure
- *
- * Expects to be called while holding the __I40EVF_IN_CRITICAL_TASK bit lock.
- **/
-static void i40evf_up_complete(struct i40evf_adapter *adapter)
-{
-	adapter->state = __I40EVF_RUNNING;
-	clear_bit(__I40E_VSI_DOWN, adapter->vsi.state);
-
-	i40evf_napi_enable_all(adapter);
-
-	adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES;
-	if (CLIENT_ENABLED(adapter))
-		adapter->flags |= I40EVF_FLAG_CLIENT_NEEDS_OPEN;
-	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-}
-
-/**
- * i40e_down - Shutdown the connection processing
- * @adapter: board private structure
- *
- * Expects to be called while holding the __I40EVF_IN_CRITICAL_TASK bit lock.
- **/
-void i40evf_down(struct i40evf_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct i40evf_vlan_filter *vlf;
-	struct i40evf_mac_filter *f;
-	struct i40evf_cloud_filter *cf;
-
-	if (adapter->state <= __I40EVF_DOWN_PENDING)
-		return;
-
-	netif_carrier_off(netdev);
-	netif_tx_disable(netdev);
-	adapter->link_up = false;
-	i40evf_napi_disable_all(adapter);
-	i40evf_irq_disable(adapter);
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	/* clear the sync flag on all filters */
-	__dev_uc_unsync(adapter->netdev, NULL);
-	__dev_mc_unsync(adapter->netdev, NULL);
-
-	/* remove all MAC filters */
-	list_for_each_entry(f, &adapter->mac_filter_list, list) {
-		f->remove = true;
-	}
-
-	/* remove all VLAN filters */
-	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-		vlf->remove = true;
-	}
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	/* remove all cloud filters */
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
-		cf->del = true;
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-
-	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
-	    adapter->state != __I40EVF_RESETTING) {
-		/* cancel any current operation */
-		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-		/* Schedule operations to close down the HW. Don't wait
-		 * here for this to complete. The watchdog is still running
-		 * and it will take care of this.
-		 */
-		adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
-	}
-
-	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-}
-
-/**
- * i40evf_acquire_msix_vectors - Setup the MSIX capability
- * @adapter: board private structure
- * @vectors: number of vectors to request
- *
- * Work with the OS to set up the MSIX vectors needed.
- *
- * Returns 0 on success, negative on failure
- **/
-static int
-i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors)
-{
-	int err, vector_threshold;
-
-	/* We'll want at least 3 (vector_threshold):
-	 * 0) Other (Admin Queue and link, mostly)
-	 * 1) TxQ[0] Cleanup
-	 * 2) RxQ[0] Cleanup
-	 */
-	vector_threshold = MIN_MSIX_COUNT;
-
-	/* The more we get, the more we will assign to Tx/Rx Cleanup
-	 * for the separate queues...where Rx Cleanup >= Tx Cleanup.
-	 * Right now, we simply care about how many we'll get; we'll
-	 * set them up later while requesting irq's.
-	 */
-	err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
-				    vector_threshold, vectors);
-	if (err < 0) {
-		dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n");
-		kfree(adapter->msix_entries);
-		adapter->msix_entries = NULL;
-		return err;
-	}
-
-	/* Adjust for only the vectors we'll use, which is minimum
-	 * of max_msix_q_vectors + NONQ_VECS, or the number of
-	 * vectors we were allocated.
-	 */
-	adapter->num_msix_vectors = err;
-	return 0;
-}
-
-/**
- * i40evf_free_queues - Free memory for all rings
- * @adapter: board private structure to initialize
- *
- * Free all of the memory associated with queue pairs.
- **/
-static void i40evf_free_queues(struct i40evf_adapter *adapter)
-{
-	if (!adapter->vsi_res)
-		return;
-	adapter->num_active_queues = 0;
-	kfree(adapter->tx_rings);
-	adapter->tx_rings = NULL;
-	kfree(adapter->rx_rings);
-	adapter->rx_rings = NULL;
-}
-
-/**
- * i40evf_alloc_queues - Allocate memory for all rings
- * @adapter: board private structure to initialize
- *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
- **/
-static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
-{
-	int i, num_active_queues;
-
-	/* If we're in reset reallocating queues we don't actually know yet for
-	 * certain the PF gave us the number of queues we asked for but we'll
-	 * assume it did.  Once basic reset is finished we'll confirm once we
-	 * start negotiating config with PF.
-	 */
-	if (adapter->num_req_queues)
-		num_active_queues = adapter->num_req_queues;
-	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
-		 adapter->num_tc)
-		num_active_queues = adapter->ch_config.total_qps;
-	else
-		num_active_queues = min_t(int,
-					  adapter->vsi_res->num_queue_pairs,
-					  (int)(num_online_cpus()));
-
-
-	adapter->tx_rings = kcalloc(num_active_queues,
-				    sizeof(struct i40e_ring), GFP_KERNEL);
-	if (!adapter->tx_rings)
-		goto err_out;
-	adapter->rx_rings = kcalloc(num_active_queues,
-				    sizeof(struct i40e_ring), GFP_KERNEL);
-	if (!adapter->rx_rings)
-		goto err_out;
-
-	for (i = 0; i < num_active_queues; i++) {
-		struct i40e_ring *tx_ring;
-		struct i40e_ring *rx_ring;
-
-		tx_ring = &adapter->tx_rings[i];
-
-		tx_ring->queue_index = i;
-		tx_ring->netdev = adapter->netdev;
-		tx_ring->dev = &adapter->pdev->dev;
-		tx_ring->count = adapter->tx_desc_count;
-		tx_ring->itr_setting = I40E_ITR_TX_DEF;
-		if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
-			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
-
-		rx_ring = &adapter->rx_rings[i];
-		rx_ring->queue_index = i;
-		rx_ring->netdev = adapter->netdev;
-		rx_ring->dev = &adapter->pdev->dev;
-		rx_ring->count = adapter->rx_desc_count;
-		rx_ring->itr_setting = I40E_ITR_RX_DEF;
-	}
-
-	adapter->num_active_queues = num_active_queues;
-
-	return 0;
-
-err_out:
-	i40evf_free_queues(adapter);
-	return -ENOMEM;
-}
-
-/**
- * i40evf_set_interrupt_capability - set MSI-X or FAIL if not supported
- * @adapter: board private structure to initialize
- *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
- **/
-static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter)
-{
-	int vector, v_budget;
-	int pairs = 0;
-	int err = 0;
-
-	if (!adapter->vsi_res) {
-		err = -EIO;
-		goto out;
-	}
-	pairs = adapter->num_active_queues;
-
-	/* It's easy to be greedy for MSI-X vectors, but it really doesn't do
-	 * us much good if we have more vectors than CPUs. However, we already
-	 * limit the total number of queues by the number of CPUs so we do not
-	 * need any further limiting here.
-	 */
-	v_budget = min_t(int, pairs + NONQ_VECS,
-			 (int)adapter->vf_res->max_vectors);
-
-	adapter->msix_entries = kcalloc(v_budget,
-					sizeof(struct msix_entry), GFP_KERNEL);
-	if (!adapter->msix_entries) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	for (vector = 0; vector < v_budget; vector++)
-		adapter->msix_entries[vector].entry = vector;
-
-	err = i40evf_acquire_msix_vectors(adapter, v_budget);
-
-out:
-	netif_set_real_num_rx_queues(adapter->netdev, pairs);
-	netif_set_real_num_tx_queues(adapter->netdev, pairs);
-	return err;
-}
-
-/**
- * i40e_config_rss_aq - Configure RSS keys and lut by using AQ commands
- * @adapter: board private structure
- *
- * Return 0 on success, negative on failure
- **/
-static int i40evf_config_rss_aq(struct i40evf_adapter *adapter)
-{
-	struct i40e_aqc_get_set_rss_key_data *rss_key =
-		(struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key;
-	struct i40e_hw *hw = &adapter->hw;
-	int ret = 0;
-
-	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
-		/* bail because we already have a command pending */
-		dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n",
-			adapter->current_op);
-		return -EBUSY;
-	}
-
-	ret = i40evf_aq_set_rss_key(hw, adapter->vsi.id, rss_key);
-	if (ret) {
-		dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n",
-			i40evf_stat_str(hw, ret),
-			i40evf_aq_str(hw, hw->aq.asq_last_status));
-		return ret;
-
-	}
-
-	ret = i40evf_aq_set_rss_lut(hw, adapter->vsi.id, false,
-				    adapter->rss_lut, adapter->rss_lut_size);
-	if (ret) {
-		dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n",
-			i40evf_stat_str(hw, ret),
-			i40evf_aq_str(hw, hw->aq.asq_last_status));
-	}
-
-	return ret;
-
-}
-
-/**
- * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers
- * @adapter: board private structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int i40evf_config_rss_reg(struct i40evf_adapter *adapter)
-{
-	struct i40e_hw *hw = &adapter->hw;
-	u32 *dw;
-	u16 i;
-
-	dw = (u32 *)adapter->rss_key;
-	for (i = 0; i <= adapter->rss_key_size / 4; i++)
-		wr32(hw, I40E_VFQF_HKEY(i), dw[i]);
-
-	dw = (u32 *)adapter->rss_lut;
-	for (i = 0; i <= adapter->rss_lut_size / 4; i++)
-		wr32(hw, I40E_VFQF_HLUT(i), dw[i]);
-
-	i40e_flush(hw);
-
-	return 0;
-}
-
-/**
- * i40evf_config_rss - Configure RSS keys and lut
- * @adapter: board private structure
- *
- * Returns 0 on success, negative on failure
- **/
-int i40evf_config_rss(struct i40evf_adapter *adapter)
-{
-
-	if (RSS_PF(adapter)) {
-		adapter->aq_required |= I40EVF_FLAG_AQ_SET_RSS_LUT |
-					I40EVF_FLAG_AQ_SET_RSS_KEY;
-		return 0;
-	} else if (RSS_AQ(adapter)) {
-		return i40evf_config_rss_aq(adapter);
-	} else {
-		return i40evf_config_rss_reg(adapter);
-	}
-}
-
-/**
- * i40evf_fill_rss_lut - Fill the lut with default values
- * @adapter: board private structure
- **/
-static void i40evf_fill_rss_lut(struct i40evf_adapter *adapter)
-{
-	u16 i;
-
-	for (i = 0; i < adapter->rss_lut_size; i++)
-		adapter->rss_lut[i] = i % adapter->num_active_queues;
-}
-
-/**
- * i40evf_init_rss - Prepare for RSS
- * @adapter: board private structure
- *
- * Return 0 on success, negative on failure
- **/
-static int i40evf_init_rss(struct i40evf_adapter *adapter)
-{
-	struct i40e_hw *hw = &adapter->hw;
-	int ret;
-
-	if (!RSS_PF(adapter)) {
-		/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
-		if (adapter->vf_res->vf_cap_flags &
-		    VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
-			adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED;
-		else
-			adapter->hena = I40E_DEFAULT_RSS_HENA;
-
-		wr32(hw, I40E_VFQF_HENA(0), (u32)adapter->hena);
-		wr32(hw, I40E_VFQF_HENA(1), (u32)(adapter->hena >> 32));
-	}
-
-	i40evf_fill_rss_lut(adapter);
-
-	netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size);
-	ret = i40evf_config_rss(adapter);
-
-	return ret;
-}
-
-/**
- * i40evf_alloc_q_vectors - Allocate memory for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * We allocate one q_vector per queue interrupt.  If allocation fails we
- * return -ENOMEM.
- **/
-static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter)
-{
-	int q_idx = 0, num_q_vectors;
-	struct i40e_q_vector *q_vector;
-
-	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-	adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector),
-				     GFP_KERNEL);
-	if (!adapter->q_vectors)
-		return -ENOMEM;
-
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		q_vector = &adapter->q_vectors[q_idx];
-		q_vector->adapter = adapter;
-		q_vector->vsi = &adapter->vsi;
-		q_vector->v_idx = q_idx;
-		q_vector->reg_idx = q_idx;
-		cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
-		netif_napi_add(adapter->netdev, &q_vector->napi,
-			       i40evf_napi_poll, NAPI_POLL_WEIGHT);
-	}
-
-	return 0;
-}
-
-/**
- * i40evf_free_q_vectors - Free memory allocated for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * This function frees the memory allocated to the q_vectors.  In addition if
- * NAPI is enabled it will delete any references to the NAPI struct prior
- * to freeing the q_vector.
- **/
-static void i40evf_free_q_vectors(struct i40evf_adapter *adapter)
-{
-	int q_idx, num_q_vectors;
-	int napi_vectors;
-
-	if (!adapter->q_vectors)
-		return;
-
-	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-	napi_vectors = adapter->num_active_queues;
-
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx];
-		if (q_idx < napi_vectors)
-			netif_napi_del(&q_vector->napi);
-	}
-	kfree(adapter->q_vectors);
-	adapter->q_vectors = NULL;
-}
-
-/**
- * i40evf_reset_interrupt_capability - Reset MSIX setup
- * @adapter: board private structure
- *
- **/
-void i40evf_reset_interrupt_capability(struct i40evf_adapter *adapter)
-{
-	if (!adapter->msix_entries)
-		return;
-
-	pci_disable_msix(adapter->pdev);
-	kfree(adapter->msix_entries);
-	adapter->msix_entries = NULL;
-}
-
-/**
- * i40evf_init_interrupt_scheme - Determine if MSIX is supported and init
- * @adapter: board private structure to initialize
- *
- **/
-int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
-{
-	int err;
-
-	err = i40evf_alloc_queues(adapter);
-	if (err) {
-		dev_err(&adapter->pdev->dev,
-			"Unable to allocate memory for queues\n");
-		goto err_alloc_queues;
-	}
-
-	rtnl_lock();
-	err = i40evf_set_interrupt_capability(adapter);
-	rtnl_unlock();
-	if (err) {
-		dev_err(&adapter->pdev->dev,
-			"Unable to setup interrupt capabilities\n");
-		goto err_set_interrupt;
-	}
-
-	err = i40evf_alloc_q_vectors(adapter);
-	if (err) {
-		dev_err(&adapter->pdev->dev,
-			"Unable to allocate memory for queue vectors\n");
-		goto err_alloc_q_vectors;
-	}
-
-	/* If we've made it so far while ADq flag being ON, then we haven't
-	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
-	 * resources have been allocated in the reset path.
-	 * Now we can truly claim that ADq is enabled.
-	 */
-	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
-	    adapter->num_tc)
-		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
-			 adapter->num_tc);
-
-	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
-		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
-		 adapter->num_active_queues);
-
-	return 0;
-err_alloc_q_vectors:
-	i40evf_reset_interrupt_capability(adapter);
-err_set_interrupt:
-	i40evf_free_queues(adapter);
-err_alloc_queues:
-	return err;
-}
-
-/**
- * i40evf_free_rss - Free memory used by RSS structs
- * @adapter: board private structure
- **/
-static void i40evf_free_rss(struct i40evf_adapter *adapter)
-{
-	kfree(adapter->rss_key);
-	adapter->rss_key = NULL;
-
-	kfree(adapter->rss_lut);
-	adapter->rss_lut = NULL;
-}
-
-/**
- * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors
- * @adapter: board private structure
- *
- * Returns 0 on success, negative on failure
- **/
-static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int err;
-
-	if (netif_running(netdev))
-		i40evf_free_traffic_irqs(adapter);
-	i40evf_free_misc_irq(adapter);
-	i40evf_reset_interrupt_capability(adapter);
-	i40evf_free_q_vectors(adapter);
-	i40evf_free_queues(adapter);
-
-	err =  i40evf_init_interrupt_scheme(adapter);
-	if (err)
-		goto err;
-
-	netif_tx_stop_all_queues(netdev);
-
-	err = i40evf_request_misc_irq(adapter);
-	if (err)
-		goto err;
-
-	set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
-
-	i40evf_map_rings_to_vectors(adapter);
-
-	if (RSS_AQ(adapter))
-		adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS;
-	else
-		err = i40evf_init_rss(adapter);
-err:
-	return err;
-}
-
-/**
- * i40evf_watchdog_timer - Periodic call-back timer
- * @data: pointer to adapter disguised as unsigned long
- **/
-static void i40evf_watchdog_timer(struct timer_list *t)
-{
-	struct i40evf_adapter *adapter = from_timer(adapter, t,
-						    watchdog_timer);
-
-	schedule_work(&adapter->watchdog_task);
-	/* timer will be rescheduled in watchdog task */
-}
-
-/**
- * i40evf_watchdog_task - Periodic call-back task
- * @work: pointer to work_struct
- **/
-static void i40evf_watchdog_task(struct work_struct *work)
-{
-	struct i40evf_adapter *adapter = container_of(work,
-						      struct i40evf_adapter,
-						      watchdog_task);
-	struct i40e_hw *hw = &adapter->hw;
-	u32 reg_val;
-
-	if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
-		goto restart_watchdog;
-
-	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
-		reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
-			  I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if ((reg_val == VIRTCHNL_VFR_VFACTIVE) ||
-		    (reg_val == VIRTCHNL_VFR_COMPLETED)) {
-			/* A chance for redemption! */
-			dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
-			adapter->state = __I40EVF_STARTUP;
-			adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
-			schedule_delayed_work(&adapter->init_task, 10);
-			clear_bit(__I40EVF_IN_CRITICAL_TASK,
-				  &adapter->crit_section);
-			/* Don't reschedule the watchdog, since we've restarted
-			 * the init task. When init_task contacts the PF and
-			 * gets everything set up again, it'll restart the
-			 * watchdog for us. Down, boy. Sit. Stay. Woof.
-			 */
-			return;
-		}
-		adapter->aq_required = 0;
-		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-		goto watchdog_done;
-	}
-
-	if ((adapter->state < __I40EVF_DOWN) ||
-	    (adapter->flags & I40EVF_FLAG_RESET_PENDING))
-		goto watchdog_done;
-
-	/* check for reset */
-	reg_val = rd32(hw, I40E_VF_ARQLEN1) & I40E_VF_ARQLEN1_ARQENABLE_MASK;
-	if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING) && !reg_val) {
-		adapter->state = __I40EVF_RESETTING;
-		adapter->flags |= I40EVF_FLAG_RESET_PENDING;
-		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
-		schedule_work(&adapter->reset_task);
-		adapter->aq_required = 0;
-		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-		goto watchdog_done;
-	}
-
-	/* Process admin queue tasks. After init, everything gets done
-	 * here so we don't race on the admin queue.
-	 */
-	if (adapter->current_op) {
-		if (!i40evf_asq_done(hw)) {
-			dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n");
-			i40evf_send_api_ver(adapter);
-		}
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & I40EVF_FLAG_AQ_GET_CONFIG) {
-		i40evf_send_vf_config_msg(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_QUEUES) {
-		i40evf_disable_queues(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_MAP_VECTORS) {
-		i40evf_map_queues(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_MAC_FILTER) {
-		i40evf_add_ether_addrs(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_VLAN_FILTER) {
-		i40evf_add_vlans(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_MAC_FILTER) {
-		i40evf_del_ether_addrs(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_VLAN_FILTER) {
-		i40evf_del_vlans(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) {
-		i40evf_enable_vlan_stripping(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) {
-		i40evf_disable_vlan_stripping(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_CONFIGURE_QUEUES) {
-		i40evf_configure_queues(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_QUEUES) {
-		i40evf_enable_queues(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_CONFIGURE_RSS) {
-		/* This message goes straight to the firmware, not the
-		 * PF, so we don't have to set current_op as we will
-		 * not get a response through the ARQ.
-		 */
-		i40evf_init_rss(adapter);
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS;
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & I40EVF_FLAG_AQ_GET_HENA) {
-		i40evf_get_hena(adapter);
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & I40EVF_FLAG_AQ_SET_HENA) {
-		i40evf_set_hena(adapter);
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & I40EVF_FLAG_AQ_SET_RSS_KEY) {
-		i40evf_set_rss_key(adapter);
-		goto watchdog_done;
-	}
-	if (adapter->aq_required & I40EVF_FLAG_AQ_SET_RSS_LUT) {
-		i40evf_set_rss_lut(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_PROMISC) {
-		i40evf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
-				       FLAG_VF_MULTICAST_PROMISC);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_REQUEST_ALLMULTI) {
-		i40evf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
-		goto watchdog_done;
-	}
-
-	if ((adapter->aq_required & I40EVF_FLAG_AQ_RELEASE_PROMISC) &&
-	    (adapter->aq_required & I40EVF_FLAG_AQ_RELEASE_ALLMULTI)) {
-		i40evf_set_promiscuous(adapter, 0);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
-		i40evf_enable_channels(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
-		i40evf_disable_channels(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
-		i40evf_add_cloud_filter(adapter);
-		goto watchdog_done;
-	}
-
-	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
-		i40evf_del_cloud_filter(adapter);
-		goto watchdog_done;
-	}
-
-	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
-
-	if (adapter->state == __I40EVF_RUNNING)
-		i40evf_request_stats(adapter);
-watchdog_done:
-	if (adapter->state == __I40EVF_RUNNING)
-		i40evf_detect_recover_hung(&adapter->vsi);
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-restart_watchdog:
-	if (adapter->state == __I40EVF_REMOVE)
-		return;
-	if (adapter->aq_required)
-		mod_timer(&adapter->watchdog_timer,
-			  jiffies + msecs_to_jiffies(20));
-	else
-		mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
-	schedule_work(&adapter->adminq_task);
-}
-
-static void i40evf_disable_vf(struct i40evf_adapter *adapter)
-{
-	struct i40evf_mac_filter *f, *ftmp;
-	struct i40evf_vlan_filter *fv, *fvtmp;
-	struct i40evf_cloud_filter *cf, *cftmp;
-
-	adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
-
-	/* We don't use netif_running() because it may be true prior to
-	 * ndo_open() returning, so we can't assume it means all our open
-	 * tasks have finished, since we're not holding the rtnl_lock here.
-	 */
-	if (adapter->state == __I40EVF_RUNNING) {
-		set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
-		netif_carrier_off(adapter->netdev);
-		netif_tx_disable(adapter->netdev);
-		adapter->link_up = false;
-		i40evf_napi_disable_all(adapter);
-		i40evf_irq_disable(adapter);
-		i40evf_free_traffic_irqs(adapter);
-		i40evf_free_all_tx_resources(adapter);
-		i40evf_free_all_rx_resources(adapter);
-	}
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	/* Delete all of the filters */
-	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-		list_del(&f->list);
-		kfree(f);
-	}
-
-	list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) {
-		list_del(&fv->list);
-		kfree(fv);
-	}
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
-		list_del(&cf->list);
-		kfree(cf);
-		adapter->num_cloud_filters--;
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-
-	i40evf_free_misc_irq(adapter);
-	i40evf_reset_interrupt_capability(adapter);
-	i40evf_free_queues(adapter);
-	i40evf_free_q_vectors(adapter);
-	kfree(adapter->vf_res);
-	i40evf_shutdown_adminq(&adapter->hw);
-	adapter->netdev->flags &= ~IFF_UP;
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-	adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-	adapter->state = __I40EVF_DOWN;
-	wake_up(&adapter->down_waitqueue);
-	dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
-}
-
-#define I40EVF_RESET_WAIT_MS 10
-#define I40EVF_RESET_WAIT_COUNT 500
-/**
- * i40evf_reset_task - Call-back task to handle hardware reset
- * @work: pointer to work_struct
- *
- * During reset we need to shut down and reinitialize the admin queue
- * before we can use it to communicate with the PF again. We also clear
- * and reinit the rings because that context is lost as well.
- **/
-static void i40evf_reset_task(struct work_struct *work)
-{
-	struct i40evf_adapter *adapter = container_of(work,
-						      struct i40evf_adapter,
-						      reset_task);
-	struct virtchnl_vf_resource *vfres = adapter->vf_res;
-	struct net_device *netdev = adapter->netdev;
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40evf_vlan_filter *vlf;
-	struct i40evf_cloud_filter *cf;
-	struct i40evf_mac_filter *f;
-	u32 reg_val;
-	int i = 0, err;
-	bool running;
-
-	/* When device is being removed it doesn't make sense to run the reset
-	 * task, just return in such a case.
-	 */
-	if (test_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section))
-		return;
-
-	while (test_and_set_bit(__I40EVF_IN_CLIENT_TASK,
-				&adapter->crit_section))
-		usleep_range(500, 1000);
-	if (CLIENT_ENABLED(adapter)) {
-		adapter->flags &= ~(I40EVF_FLAG_CLIENT_NEEDS_OPEN |
-				    I40EVF_FLAG_CLIENT_NEEDS_CLOSE |
-				    I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS |
-				    I40EVF_FLAG_SERVICE_CLIENT_REQUESTED);
-		cancel_delayed_work_sync(&adapter->client_task);
-		i40evf_notify_client_close(&adapter->vsi, true);
-	}
-	i40evf_misc_irq_disable(adapter);
-	if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) {
-		adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED;
-		/* Restart the AQ here. If we have been reset but didn't
-		 * detect it, or if the PF had to reinit, our AQ will be hosed.
-		 */
-		i40evf_shutdown_adminq(hw);
-		i40evf_init_adminq(hw);
-		i40evf_request_reset(adapter);
-	}
-	adapter->flags |= I40EVF_FLAG_RESET_PENDING;
-
-	/* poll until we see the reset actually happen */
-	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
-		reg_val = rd32(hw, I40E_VF_ARQLEN1) &
-			  I40E_VF_ARQLEN1_ARQENABLE_MASK;
-		if (!reg_val)
-			break;
-		usleep_range(5000, 10000);
-	}
-	if (i == I40EVF_RESET_WAIT_COUNT) {
-		dev_info(&adapter->pdev->dev, "Never saw reset\n");
-		goto continue_reset; /* act like the reset happened */
-	}
-
-	/* wait until the reset is complete and the PF is responding to us */
-	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
-		/* sleep first to make sure a minimum wait time is met */
-		msleep(I40EVF_RESET_WAIT_MS);
-
-		reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
-			  I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if (reg_val == VIRTCHNL_VFR_VFACTIVE)
-			break;
-	}
-
-	pci_set_master(adapter->pdev);
-
-	if (i == I40EVF_RESET_WAIT_COUNT) {
-		dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
-			reg_val);
-		i40evf_disable_vf(adapter);
-		clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
-		return; /* Do not attempt to reinit. It's dead, Jim. */
-	}
-
-continue_reset:
-	/* We don't use netif_running() because it may be true prior to
-	 * ndo_open() returning, so we can't assume it means all our open
-	 * tasks have finished, since we're not holding the rtnl_lock here.
-	 */
-	running = ((adapter->state == __I40EVF_RUNNING) ||
-		   (adapter->state == __I40EVF_RESETTING));
-
-	if (running) {
-		netif_carrier_off(netdev);
-		netif_tx_stop_all_queues(netdev);
-		adapter->link_up = false;
-		i40evf_napi_disable_all(adapter);
-	}
-	i40evf_irq_disable(adapter);
-
-	adapter->state = __I40EVF_RESETTING;
-	adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-
-	/* free the Tx/Rx rings and descriptors, might be better to just
-	 * re-use them sometime in the future
-	 */
-	i40evf_free_all_rx_resources(adapter);
-	i40evf_free_all_tx_resources(adapter);
-
-	adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
-	/* kill and reinit the admin queue */
-	i40evf_shutdown_adminq(hw);
-	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-	err = i40evf_init_adminq(hw);
-	if (err)
-		dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
-			 err);
-	adapter->aq_required = 0;
-
-	if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
-		err = i40evf_reinit_interrupt_scheme(adapter);
-		if (err)
-			goto reset_err;
-	}
-
-	adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG;
-	adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	/* re-add all MAC filters */
-	list_for_each_entry(f, &adapter->mac_filter_list, list) {
-		f->add = true;
-	}
-	/* re-add all VLAN filters */
-	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-		vlf->add = true;
-	}
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	/* check if TCs are running and re-add all cloud filters */
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
-	    adapter->num_tc) {
-		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
-			cf->add = true;
-		}
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-
-	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
-	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
-	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
-	i40evf_misc_irq_enable(adapter);
-
-	mod_timer(&adapter->watchdog_timer, jiffies + 2);
-
-	/* We were running when the reset started, so we need to restore some
-	 * state here.
-	 */
-	if (running) {
-		/* allocate transmit descriptors */
-		err = i40evf_setup_all_tx_resources(adapter);
-		if (err)
-			goto reset_err;
-
-		/* allocate receive descriptors */
-		err = i40evf_setup_all_rx_resources(adapter);
-		if (err)
-			goto reset_err;
-
-		if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
-			err = i40evf_request_traffic_irqs(adapter,
-							  netdev->name);
-			if (err)
-				goto reset_err;
-
-			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
-		}
-
-		i40evf_configure(adapter);
-
-		i40evf_up_complete(adapter);
-
-		i40evf_irq_enable(adapter, true);
-	} else {
-		adapter->state = __I40EVF_DOWN;
-		wake_up(&adapter->down_waitqueue);
-	}
-	clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-
-	return;
-reset_err:
-	clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-	i40evf_close(netdev);
-}
-
-/**
- * i40evf_adminq_task - worker thread to clean the admin queue
- * @work: pointer to work_struct containing our data
- **/
-static void i40evf_adminq_task(struct work_struct *work)
-{
-	struct i40evf_adapter *adapter =
-		container_of(work, struct i40evf_adapter, adminq_task);
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
-	enum virtchnl_ops v_op;
-	i40e_status ret, v_ret;
-	u32 val, oldval;
-	u16 pending;
-
-	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
-		goto out;
-
-	event.buf_len = I40EVF_MAX_AQ_BUF_SIZE;
-	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
-	if (!event.msg_buf)
-		goto out;
-
-	do {
-		ret = i40evf_clean_arq_element(hw, &event, &pending);
-		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
-		v_ret = (i40e_status)le32_to_cpu(event.desc.cookie_low);
-
-		if (ret || !v_op)
-			break; /* No event to process or error cleaning ARQ */
-
-		i40evf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
-					   event.msg_len);
-		if (pending != 0)
-			memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
-	} while (pending);
-
-	if ((adapter->flags &
-	     (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED)) ||
-	    adapter->state == __I40EVF_RESETTING)
-		goto freedom;
-
-	/* check for error indications */
-	val = rd32(hw, hw->aq.arq.len);
-	if (val == 0xdeadbeef) /* indicates device in reset */
-		goto freedom;
-	oldval = val;
-	if (val & I40E_VF_ARQLEN1_ARQVFE_MASK) {
-		dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n");
-		val &= ~I40E_VF_ARQLEN1_ARQVFE_MASK;
-	}
-	if (val & I40E_VF_ARQLEN1_ARQOVFL_MASK) {
-		dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n");
-		val &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK;
-	}
-	if (val & I40E_VF_ARQLEN1_ARQCRIT_MASK) {
-		dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n");
-		val &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK;
-	}
-	if (oldval != val)
-		wr32(hw, hw->aq.arq.len, val);
-
-	val = rd32(hw, hw->aq.asq.len);
-	oldval = val;
-	if (val & I40E_VF_ATQLEN1_ATQVFE_MASK) {
-		dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n");
-		val &= ~I40E_VF_ATQLEN1_ATQVFE_MASK;
-	}
-	if (val & I40E_VF_ATQLEN1_ATQOVFL_MASK) {
-		dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n");
-		val &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK;
-	}
-	if (val & I40E_VF_ATQLEN1_ATQCRIT_MASK) {
-		dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n");
-		val &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK;
-	}
-	if (oldval != val)
-		wr32(hw, hw->aq.asq.len, val);
-
-freedom:
-	kfree(event.msg_buf);
-out:
-	/* re-enable Admin queue interrupt cause */
-	i40evf_misc_irq_enable(adapter);
-}
-
-/**
- * i40evf_client_task - worker thread to perform client work
- * @work: pointer to work_struct containing our data
- *
- * This task handles client interactions. Because client calls can be
- * reentrant, we can't handle them in the watchdog.
- **/
-static void i40evf_client_task(struct work_struct *work)
-{
-	struct i40evf_adapter *adapter =
-		container_of(work, struct i40evf_adapter, client_task.work);
-
-	/* If we can't get the client bit, just give up. We'll be rescheduled
-	 * later.
-	 */
-
-	if (test_and_set_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section))
-		return;
-
-	if (adapter->flags & I40EVF_FLAG_SERVICE_CLIENT_REQUESTED) {
-		i40evf_client_subtask(adapter);
-		adapter->flags &= ~I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
-		goto out;
-	}
-	if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS) {
-		i40evf_notify_client_l2_params(&adapter->vsi);
-		adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS;
-		goto out;
-	}
-	if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_CLOSE) {
-		i40evf_notify_client_close(&adapter->vsi, false);
-		adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_CLOSE;
-		goto out;
-	}
-	if (adapter->flags & I40EVF_FLAG_CLIENT_NEEDS_OPEN) {
-		i40evf_notify_client_open(&adapter->vsi);
-		adapter->flags &= ~I40EVF_FLAG_CLIENT_NEEDS_OPEN;
-	}
-out:
-	clear_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section);
-}
-
-/**
- * i40evf_free_all_tx_resources - Free Tx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all transmit software resources
- **/
-void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter)
-{
-	int i;
-
-	if (!adapter->tx_rings)
-		return;
-
-	for (i = 0; i < adapter->num_active_queues; i++)
-		if (adapter->tx_rings[i].desc)
-			i40evf_free_tx_resources(&adapter->tx_rings[i]);
-}
-
-/**
- * i40evf_setup_all_tx_resources - allocate all queues Tx resources
- * @adapter: board private structure
- *
- * If this function returns with an error, then it's possible one or
- * more of the rings is populated (while the rest are not).  It is the
- * callers duty to clean those orphaned rings.
- *
- * Return 0 on success, negative on failure
- **/
-static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter)
-{
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->tx_rings[i].count = adapter->tx_desc_count;
-		err = i40evf_setup_tx_descriptors(&adapter->tx_rings[i]);
-		if (!err)
-			continue;
-		dev_err(&adapter->pdev->dev,
-			"Allocation for Tx Queue %u failed\n", i);
-		break;
-	}
-
-	return err;
-}
-
-/**
- * i40evf_setup_all_rx_resources - allocate all queues Rx resources
- * @adapter: board private structure
- *
- * If this function returns with an error, then it's possible one or
- * more of the rings is populated (while the rest are not).  It is the
- * callers duty to clean those orphaned rings.
- *
- * Return 0 on success, negative on failure
- **/
-static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter)
-{
-	int i, err = 0;
-
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i].count = adapter->rx_desc_count;
-		err = i40evf_setup_rx_descriptors(&adapter->rx_rings[i]);
-		if (!err)
-			continue;
-		dev_err(&adapter->pdev->dev,
-			"Allocation for Rx Queue %u failed\n", i);
-		break;
-	}
-	return err;
-}
-
-/**
- * i40evf_free_all_rx_resources - Free Rx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all receive software resources
- **/
-void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
-{
-	int i;
-
-	if (!adapter->rx_rings)
-		return;
-
-	for (i = 0; i < adapter->num_active_queues; i++)
-		if (adapter->rx_rings[i].desc)
-			i40evf_free_rx_resources(&adapter->rx_rings[i]);
-}
-
-/**
- * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
- * @adapter: board private structure
- * @max_tx_rate: max Tx bw for a tc
- **/
-static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
-					u64 max_tx_rate)
-{
-	int speed = 0, ret = 0;
-
-	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
-		speed = 40000;
-		break;
-	case I40E_LINK_SPEED_25GB:
-		speed = 25000;
-		break;
-	case I40E_LINK_SPEED_20GB:
-		speed = 20000;
-		break;
-	case I40E_LINK_SPEED_10GB:
-		speed = 10000;
-		break;
-	case I40E_LINK_SPEED_1GB:
-		speed = 1000;
-		break;
-	case I40E_LINK_SPEED_100MB:
-		speed = 100;
-		break;
-	default:
-		break;
-	}
-
-	if (max_tx_rate > speed) {
-		dev_err(&adapter->pdev->dev,
-			"Invalid tx rate specified\n");
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-/**
- * i40evf_validate_channel_config - validate queue mapping info
- * @adapter: board private structure
- * @mqprio_qopt: queue parameters
- *
- * This function validates if the config provided by the user to
- * configure queue channels is valid or not. Returns 0 on a valid
- * config.
- **/
-static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
-				     struct tc_mqprio_qopt_offload *mqprio_qopt)
-{
-	u64 total_max_rate = 0;
-	int i, num_qps = 0;
-	u64 tx_rate = 0;
-	int ret = 0;
-
-	if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
-	    mqprio_qopt->qopt.num_tc < 1)
-		return -EINVAL;
-
-	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
-		if (!mqprio_qopt->qopt.count[i] ||
-		    mqprio_qopt->qopt.offset[i] != num_qps)
-			return -EINVAL;
-		if (mqprio_qopt->min_rate[i]) {
-			dev_err(&adapter->pdev->dev,
-				"Invalid min tx rate (greater than 0) specified\n");
-			return -EINVAL;
-		}
-		/*convert to Mbps */
-		tx_rate = div_u64(mqprio_qopt->max_rate[i],
-				  I40EVF_MBPS_DIVISOR);
-		total_max_rate += tx_rate;
-		num_qps += mqprio_qopt->qopt.count[i];
-	}
-	if (num_qps > I40EVF_MAX_REQ_QUEUES)
-		return -EINVAL;
-
-	ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
-	return ret;
-}
-
-/**
- * i40evf_del_all_cloud_filters - delete all cloud filters
- * on the traffic classes
- **/
-static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
-{
-	struct i40evf_cloud_filter *cf, *cftmp;
-
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
-				 list) {
-		list_del(&cf->list);
-		kfree(cf);
-		adapter->num_cloud_filters--;
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-}
-
-/**
- * __i40evf_setup_tc - configure multiple traffic classes
- * @netdev: network interface device structure
- * @type_date: tc offload data
- *
- * This function processes the config information provided by the
- * user to configure traffic classes/queue channels and packages the
- * information to request the PF to setup traffic classes.
- *
- * Returns 0 on success.
- **/
-static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
-{
-	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct virtchnl_vf_resource *vfres = adapter->vf_res;
-	u8 num_tc = 0, total_qps = 0;
-	int ret = 0, netdev_tc = 0;
-	u64 max_tx_rate;
-	u16 mode;
-	int i;
-
-	num_tc = mqprio_qopt->qopt.num_tc;
-	mode = mqprio_qopt->mode;
-
-	/* delete queue_channel */
-	if (!mqprio_qopt->qopt.hw) {
-		if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
-			/* reset the tc configuration */
-			netdev_reset_tc(netdev);
-			adapter->num_tc = 0;
-			netif_tx_stop_all_queues(netdev);
-			netif_tx_disable(netdev);
-			i40evf_del_all_cloud_filters(adapter);
-			adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
-			goto exit;
-		} else {
-			return -EINVAL;
-		}
-	}
-
-	/* add queue channel */
-	if (mode == TC_MQPRIO_MODE_CHANNEL) {
-		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
-			dev_err(&adapter->pdev->dev, "ADq not supported\n");
-			return -EOPNOTSUPP;
-		}
-		if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
-			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
-			return -EINVAL;
-		}
-
-		ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
-		if (ret)
-			return ret;
-		/* Return if same TC config is requested */
-		if (adapter->num_tc == num_tc)
-			return 0;
-		adapter->num_tc = num_tc;
-
-		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
-			if (i < num_tc) {
-				adapter->ch_config.ch_info[i].count =
-					mqprio_qopt->qopt.count[i];
-				adapter->ch_config.ch_info[i].offset =
-					mqprio_qopt->qopt.offset[i];
-				total_qps += mqprio_qopt->qopt.count[i];
-				max_tx_rate = mqprio_qopt->max_rate[i];
-				/* convert to Mbps */
-				max_tx_rate = div_u64(max_tx_rate,
-						      I40EVF_MBPS_DIVISOR);
-				adapter->ch_config.ch_info[i].max_tx_rate =
-					max_tx_rate;
-			} else {
-				adapter->ch_config.ch_info[i].count = 1;
-				adapter->ch_config.ch_info[i].offset = 0;
-			}
-		}
-		adapter->ch_config.total_qps = total_qps;
-		netif_tx_stop_all_queues(netdev);
-		netif_tx_disable(netdev);
-		adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
-		netdev_reset_tc(netdev);
-		/* Report the tc mapping up the stack */
-		netdev_set_num_tc(adapter->netdev, num_tc);
-		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
-			u16 qcount = mqprio_qopt->qopt.count[i];
-			u16 qoffset = mqprio_qopt->qopt.offset[i];
-
-			if (i < num_tc)
-				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
-						    qoffset);
-		}
-	}
-exit:
-	return ret;
-}
-
-/**
- * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
- * @adapter: board private structure
- * @cls_flower: pointer to struct tc_cls_flower_offload
- * @filter: pointer to cloud filter structure
- */
-static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
-				   struct tc_cls_flower_offload *f,
-				   struct i40evf_cloud_filter *filter)
-{
-	u16 n_proto_mask = 0;
-	u16 n_proto_key = 0;
-	u8 field_flags = 0;
-	u16 addr_type = 0;
-	u16 n_proto = 0;
-	int i = 0;
-	struct virtchnl_filter *vf = &filter->f;
-
-	if (f->dissector->used_keys &
-	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
-	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
-	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
-	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
-	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
-	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
-	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
-	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
-		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
-			f->dissector->used_keys);
-		return -EOPNOTSUPP;
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
-
-		if (mask->keyid != 0)
-			field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
-
-		if (n_proto_key == ETH_P_ALL) {
-			n_proto_key = 0;
-			n_proto_mask = 0;
-		}
-		n_proto = n_proto_key & n_proto_mask;
-		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
-			return -EINVAL;
-		if (n_proto == ETH_P_IPV6) {
-			/* specify flow type as TCP IPv6 */
-			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
-		}
-
-		if (key->ip_proto != IPPROTO_TCP) {
-			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
-			return -EINVAL;
-		}
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
-
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
-		/* use is_broadcast and is_zero to check for all 0xf or 0 */
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (is_broadcast_ether_addr(mask->dst)) {
-				field_flags |= I40EVF_CLOUD_FIELD_OMAC;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
-					mask->dst);
-				return I40E_ERR_CONFIG;
-			}
-		}
-
-		if (!is_zero_ether_addr(mask->src)) {
-			if (is_broadcast_ether_addr(mask->src)) {
-				field_flags |= I40EVF_CLOUD_FIELD_IMAC;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
-					mask->src);
-				return I40E_ERR_CONFIG;
-			}
-		}
-
-		if (!is_zero_ether_addr(key->dst))
-			if (is_valid_ether_addr(key->dst) ||
-			    is_multicast_ether_addr(key->dst)) {
-				/* set the mask if a valid dst_mac address */
-				for (i = 0; i < ETH_ALEN; i++)
-					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
-				ether_addr_copy(vf->data.tcp_spec.dst_mac,
-						key->dst);
-			}
-
-		if (!is_zero_ether_addr(key->src))
-			if (is_valid_ether_addr(key->src) ||
-			    is_multicast_ether_addr(key->src)) {
-				/* set the mask if a valid dst_mac address */
-				for (i = 0; i < ETH_ALEN; i++)
-					vf->mask.tcp_spec.src_mac[i] |= 0xff;
-				ether_addr_copy(vf->data.tcp_spec.src_mac,
-						key->src);
-		}
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
-
-		if (mask->vlan_id) {
-			if (mask->vlan_id == VLAN_VID_MASK) {
-				field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
-					mask->vlan_id);
-				return I40E_ERR_CONFIG;
-			}
-		}
-		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
-		vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
-
-		addr_type = key->addr_type;
-	}
-
-	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
-
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be32(0xffffffff)) {
-				field_flags |= I40EVF_CLOUD_FIELD_IIP;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
-					be32_to_cpu(mask->dst));
-				return I40E_ERR_CONFIG;
-			}
-		}
-
-		if (mask->src) {
-			if (mask->src == cpu_to_be32(0xffffffff)) {
-				field_flags |= I40EVF_CLOUD_FIELD_IIP;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
-					be32_to_cpu(mask->dst));
-				return I40E_ERR_CONFIG;
-			}
-		}
-
-		if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
-			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
-			return I40E_ERR_CONFIG;
-		}
-		if (key->dst) {
-			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
-			vf->data.tcp_spec.dst_ip[0] = key->dst;
-		}
-		if (key->src) {
-			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
-			vf->data.tcp_spec.src_ip[0] = key->src;
-		}
-	}
-
-	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
-
-		/* validate mask, make sure it is not IPV6_ADDR_ANY */
-		if (ipv6_addr_any(&mask->dst)) {
-			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
-				IPV6_ADDR_ANY);
-			return I40E_ERR_CONFIG;
-		}
-
-		/* src and dest IPv6 address should not be LOOPBACK
-		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
-		 */
-		if (ipv6_addr_loopback(&key->dst) ||
-		    ipv6_addr_loopback(&key->src)) {
-			dev_err(&adapter->pdev->dev,
-				"ipv6 addr should not be loopback\n");
-			return I40E_ERR_CONFIG;
-		}
-		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
-			field_flags |= I40EVF_CLOUD_FIELD_IIP;
-
-		for (i = 0; i < 4; i++)
-			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
-		memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
-		       sizeof(vf->data.tcp_spec.dst_ip));
-		for (i = 0; i < 4; i++)
-			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
-		memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
-		       sizeof(vf->data.tcp_spec.src_ip));
-	}
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
-
-		if (mask->src) {
-			if (mask->src == cpu_to_be16(0xffff)) {
-				field_flags |= I40EVF_CLOUD_FIELD_IIP;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
-					be16_to_cpu(mask->src));
-				return I40E_ERR_CONFIG;
-			}
-		}
-
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be16(0xffff)) {
-				field_flags |= I40EVF_CLOUD_FIELD_IIP;
-			} else {
-				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
-					be16_to_cpu(mask->dst));
-				return I40E_ERR_CONFIG;
-			}
-		}
-		if (key->dst) {
-			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
-			vf->data.tcp_spec.dst_port = key->dst;
-		}
-
-		if (key->src) {
-			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-			vf->data.tcp_spec.src_port = key->src;
-		}
-	}
-	vf->field_flags = field_flags;
-
-	return 0;
-}
-
-/**
- * i40evf_handle_tclass - Forward to a traffic class on the device
- * @adapter: board private structure
- * @tc: traffic class index on the device
- * @filter: pointer to cloud filter structure
- */
-static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
-				struct i40evf_cloud_filter *filter)
-{
-	if (tc == 0)
-		return 0;
-	if (tc < adapter->num_tc) {
-		if (!filter->f.data.tcp_spec.dst_port) {
-			dev_err(&adapter->pdev->dev,
-				"Specify destination port to redirect to traffic class other than TC0\n");
-			return -EINVAL;
-		}
-	}
-	/* redirect to a traffic class on the same device */
-	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
-	filter->f.action_meta = tc;
-	return 0;
-}
-
-/**
- * i40evf_configure_clsflower - Add tc flower filters
- * @adapter: board private structure
- * @cls_flower: Pointer to struct tc_cls_flower_offload
- */
-static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
-				      struct tc_cls_flower_offload *cls_flower)
-{
-	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
-	struct i40evf_cloud_filter *filter = NULL;
-	int err = -EINVAL, count = 50;
-
-	if (tc < 0) {
-		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
-		return -EINVAL;
-	}
-
-	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!filter)
-		return -ENOMEM;
-
-	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-				&adapter->crit_section)) {
-		if (--count == 0)
-			goto err;
-		udelay(1);
-	}
-
-	filter->cookie = cls_flower->cookie;
-
-	/* set the mask to all zeroes to begin with */
-	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
-	/* start out with flow type and eth type IPv4 to begin with */
-	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
-	err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
-	if (err < 0)
-		goto err;
-
-	err = i40evf_handle_tclass(adapter, tc, filter);
-	if (err < 0)
-		goto err;
-
-	/* add filter to the list */
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	list_add_tail(&filter->list, &adapter->cloud_filter_list);
-	adapter->num_cloud_filters++;
-	filter->add = true;
-	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-err:
-	if (err)
-		kfree(filter);
-
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-	return err;
-}
-
-/* i40evf_find_cf - Find the cloud filter in the list
- * @adapter: Board private structure
- * @cookie: filter specific cookie
- *
- * Returns ptr to the filter object or NULL. Must be called while holding the
- * cloud_filter_list_lock.
- */
-static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
-						  unsigned long *cookie)
-{
-	struct i40evf_cloud_filter *filter = NULL;
-
-	if (!cookie)
-		return NULL;
-
-	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
-		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
-			return filter;
-	}
-	return NULL;
-}
-
-/**
- * i40evf_delete_clsflower - Remove tc flower filters
- * @adapter: board private structure
- * @cls_flower: Pointer to struct tc_cls_flower_offload
- */
-static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
-				   struct tc_cls_flower_offload *cls_flower)
-{
-	struct i40evf_cloud_filter *filter = NULL;
-	int err = 0;
-
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	filter = i40evf_find_cf(adapter, &cls_flower->cookie);
-	if (filter) {
-		filter->del = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
-	} else {
-		err = -EINVAL;
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-
-	return err;
-}
-
-/**
- * i40evf_setup_tc_cls_flower - flower classifier offloads
- * @netdev: net device to configure
- * @type_data: offload data
- */
-static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
-				      struct tc_cls_flower_offload *cls_flower)
-{
-	if (cls_flower->common.chain_index)
-		return -EOPNOTSUPP;
-
-	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
-		return i40evf_configure_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
-		return i40evf_delete_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_STATS:
-		return -EOPNOTSUPP;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-/**
- * i40evf_setup_tc_block_cb - block callback for tc
- * @type: type of offload
- * @type_data: offload data
- * @cb_priv:
- *
- * This function is the block callback for traffic classes
- **/
-static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
-				    void *cb_priv)
-{
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return i40evf_setup_tc_cls_flower(cb_priv, type_data);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-/**
- * i40evf_setup_tc_block - register callbacks for tc
- * @netdev: network interface device structure
- * @f: tc offload data
- *
- * This function registers block callbacks for tc
- * offloads
- **/
-static int i40evf_setup_tc_block(struct net_device *dev,
-				 struct tc_block_offload *f)
-{
-	struct i40evf_adapter *adapter = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-/**
- * i40evf_setup_tc - configure multiple traffic classes
- * @netdev: network interface device structure
- * @type: type of offload
- * @type_date: tc offload data
- *
- * This function is the callback to ndo_setup_tc in the
- * netdev_ops.
- *
- * Returns 0 on success
- **/
-static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-			   void *type_data)
-{
-	switch (type) {
-	case TC_SETUP_QDISC_MQPRIO:
-		return __i40evf_setup_tc(netdev, type_data);
-	case TC_SETUP_BLOCK:
-		return i40evf_setup_tc_block(netdev, type_data);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-/**
- * i40evf_open - Called when a network interface is made active
- * @netdev: network interface device structure
- *
- * Returns 0 on success, negative value on failure
- *
- * The open entry point is called when a network interface is made
- * active by the system (IFF_UP).  At this point all resources needed
- * for transmit and receive operations are allocated, the interrupt
- * handler is registered with the OS, the watchdog timer is started,
- * and the stack is notified that the interface is ready.
- **/
-static int i40evf_open(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int err;
-
-	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
-		dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
-		return -EIO;
-	}
-
-	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-				&adapter->crit_section))
-		usleep_range(500, 1000);
-
-	if (adapter->state != __I40EVF_DOWN) {
-		err = -EBUSY;
-		goto err_unlock;
-	}
-
-	/* allocate transmit descriptors */
-	err = i40evf_setup_all_tx_resources(adapter);
-	if (err)
-		goto err_setup_tx;
-
-	/* allocate receive descriptors */
-	err = i40evf_setup_all_rx_resources(adapter);
-	if (err)
-		goto err_setup_rx;
-
-	/* clear any pending interrupts, may auto mask */
-	err = i40evf_request_traffic_irqs(adapter, netdev->name);
-	if (err)
-		goto err_req_irq;
-
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	i40evf_add_filter(adapter, adapter->hw.mac.addr);
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	i40evf_configure(adapter);
-
-	i40evf_up_complete(adapter);
-
-	i40evf_irq_enable(adapter, true);
-
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-
-	return 0;
-
-err_req_irq:
-	i40evf_down(adapter);
-	i40evf_free_traffic_irqs(adapter);
-err_setup_rx:
-	i40evf_free_all_rx_resources(adapter);
-err_setup_tx:
-	i40evf_free_all_tx_resources(adapter);
-err_unlock:
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-
-	return err;
-}
-
-/**
- * i40evf_close - Disables a network interface
- * @netdev: network interface device structure
- *
- * Returns 0, this is not allowed to fail
- *
- * The close entry point is called when an interface is de-activated
- * by the OS.  The hardware is still under the drivers control, but
- * needs to be disabled. All IRQs except vector 0 (reserved for admin queue)
- * are freed, along with all transmit and receive resources.
- **/
-static int i40evf_close(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int status;
-
-	if (adapter->state <= __I40EVF_DOWN_PENDING)
-		return 0;
-
-	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-				&adapter->crit_section))
-		usleep_range(500, 1000);
-
-	set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
-	if (CLIENT_ENABLED(adapter))
-		adapter->flags |= I40EVF_FLAG_CLIENT_NEEDS_CLOSE;
-
-	i40evf_down(adapter);
-	adapter->state = __I40EVF_DOWN_PENDING;
-	i40evf_free_traffic_irqs(adapter);
-
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-
-	/* We explicitly don't free resources here because the hardware is
-	 * still active and can DMA into memory. Resources are cleared in
-	 * i40evf_virtchnl_completion() after we get confirmation from the PF
-	 * driver that the rings have been stopped.
-	 *
-	 * Also, we wait for state to transition to __I40EVF_DOWN before
-	 * returning. State change occurs in i40evf_virtchnl_completion() after
-	 * VF resources are released (which occurs after PF driver processes and
-	 * responds to admin queue commands).
-	 */
-
-	status = wait_event_timeout(adapter->down_waitqueue,
-				    adapter->state == __I40EVF_DOWN,
-				    msecs_to_jiffies(200));
-	if (!status)
-		netdev_warn(netdev, "Device resources not yet released\n");
-	return 0;
-}
-
-/**
- * i40evf_change_mtu - Change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns 0 on success, negative on failure
- **/
-static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	netdev->mtu = new_mtu;
-	if (CLIENT_ENABLED(adapter)) {
-		i40evf_notify_client_l2_params(&adapter->vsi);
-		adapter->flags |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
-	}
-	adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
-	schedule_work(&adapter->reset_task);
-
-	return 0;
-}
-
-/**
- * i40e_set_features - set the netdev feature flags
- * @netdev: ptr to the netdev being adjusted
- * @features: the feature set that the stack is suggesting
- * Note: expects to be called while under rtnl_lock()
- **/
-static int i40evf_set_features(struct net_device *netdev,
-			       netdev_features_t features)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	/* Don't allow changing VLAN_RX flag when VLAN is set for VF
-	 * and return an error in this case
-	 */
-	if (VLAN_ALLOWED(adapter)) {
-		if (features & NETIF_F_HW_VLAN_CTAG_RX)
-			adapter->aq_required |=
-				I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
-		else
-			adapter->aq_required |=
-				I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
-	} else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * i40evf_features_check - Validate encapsulated packet conforms to limits
- * @skb: skb buff
- * @dev: This physical port's netdev
- * @features: Offload features that the stack believes apply
- **/
-static netdev_features_t i40evf_features_check(struct sk_buff *skb,
-					       struct net_device *dev,
-					       netdev_features_t features)
-{
-	size_t len;
-
-	/* No point in doing any of this if neither checksum nor GSO are
-	 * being requested for this frame.  We can rule out both by just
-	 * checking for CHECKSUM_PARTIAL
-	 */
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return features;
-
-	/* We cannot support GSO if the MSS is going to be less than
-	 * 64 bytes.  If it is then we need to drop support for GSO.
-	 */
-	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
-		features &= ~NETIF_F_GSO_MASK;
-
-	/* MACLEN can support at most 63 words */
-	len = skb_network_header(skb) - skb->data;
-	if (len & ~(63 * 2))
-		goto out_err;
-
-	/* IPLEN and EIPLEN can support at most 127 dwords */
-	len = skb_transport_header(skb) - skb_network_header(skb);
-	if (len & ~(127 * 4))
-		goto out_err;
-
-	if (skb->encapsulation) {
-		/* L4TUNLEN can support 127 words */
-		len = skb_inner_network_header(skb) - skb_transport_header(skb);
-		if (len & ~(127 * 2))
-			goto out_err;
-
-		/* IPLEN can support at most 127 dwords */
-		len = skb_inner_transport_header(skb) -
-		      skb_inner_network_header(skb);
-		if (len & ~(127 * 4))
-			goto out_err;
-	}
-
-	/* No need to validate L4LEN as TCP is the only protocol with a
-	 * a flexible value and we support all possible values supported
-	 * by TCP, which is at most 15 dwords
-	 */
-
-	return features;
-out_err:
-	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
-}
-
-/**
- * i40evf_fix_features - fix up the netdev feature bits
- * @netdev: our net device
- * @features: desired feature bits
- *
- * Returns fixed-up features bits
- **/
-static netdev_features_t i40evf_fix_features(struct net_device *netdev,
-					     netdev_features_t features)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
-		features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
-			      NETIF_F_HW_VLAN_CTAG_RX |
-			      NETIF_F_HW_VLAN_CTAG_FILTER);
-
-	return features;
-}
-
-static const struct net_device_ops i40evf_netdev_ops = {
-	.ndo_open		= i40evf_open,
-	.ndo_stop		= i40evf_close,
-	.ndo_start_xmit		= i40evf_xmit_frame,
-	.ndo_set_rx_mode	= i40evf_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= i40evf_set_mac,
-	.ndo_change_mtu		= i40evf_change_mtu,
-	.ndo_tx_timeout		= i40evf_tx_timeout,
-	.ndo_vlan_rx_add_vid	= i40evf_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= i40evf_vlan_rx_kill_vid,
-	.ndo_features_check	= i40evf_features_check,
-	.ndo_fix_features	= i40evf_fix_features,
-	.ndo_set_features	= i40evf_set_features,
-	.ndo_setup_tc		= i40evf_setup_tc,
-};
-
-/**
- * i40evf_check_reset_complete - check that VF reset is complete
- * @hw: pointer to hw struct
- *
- * Returns 0 if device is ready to use, or -EBUSY if it's in reset.
- **/
-static int i40evf_check_reset_complete(struct i40e_hw *hw)
-{
-	u32 rstat;
-	int i;
-
-	for (i = 0; i < 100; i++) {
-		rstat = rd32(hw, I40E_VFGEN_RSTAT) &
-			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if ((rstat == VIRTCHNL_VFR_VFACTIVE) ||
-		    (rstat == VIRTCHNL_VFR_COMPLETED))
-			return 0;
-		usleep_range(10, 20);
-	}
-	return -EBUSY;
-}
-
-/**
- * i40evf_process_config - Process the config information we got from the PF
- * @adapter: board private structure
- *
- * Verify that we have a valid config struct, and set up our netdev features
- * and our VSI struct.
- **/
-int i40evf_process_config(struct i40evf_adapter *adapter)
-{
-	struct virtchnl_vf_resource *vfres = adapter->vf_res;
-	int i, num_req_queues = adapter->num_req_queues;
-	struct net_device *netdev = adapter->netdev;
-	struct i40e_vsi *vsi = &adapter->vsi;
-	netdev_features_t hw_enc_features;
-	netdev_features_t hw_features;
-
-	/* got VF config message back from PF, now we can parse it */
-	for (i = 0; i < vfres->num_vsis; i++) {
-		if (vfres->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV)
-			adapter->vsi_res = &vfres->vsi_res[i];
-	}
-	if (!adapter->vsi_res) {
-		dev_err(&adapter->pdev->dev, "No LAN VSI found\n");
-		return -ENODEV;
-	}
-
-	if (num_req_queues &&
-	    num_req_queues != adapter->vsi_res->num_queue_pairs) {
-		/* Problem.  The PF gave us fewer queues than what we had
-		 * negotiated in our request.  Need a reset to see if we can't
-		 * get back to a working state.
-		 */
-		dev_err(&adapter->pdev->dev,
-			"Requested %d queues, but PF only gave us %d.\n",
-			num_req_queues,
-			adapter->vsi_res->num_queue_pairs);
-		adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
-		adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
-		i40evf_schedule_reset(adapter);
-		return -ENODEV;
-	}
-	adapter->num_req_queues = 0;
-
-	hw_enc_features = NETIF_F_SG			|
-			  NETIF_F_IP_CSUM		|
-			  NETIF_F_IPV6_CSUM		|
-			  NETIF_F_HIGHDMA		|
-			  NETIF_F_SOFT_FEATURES	|
-			  NETIF_F_TSO			|
-			  NETIF_F_TSO_ECN		|
-			  NETIF_F_TSO6			|
-			  NETIF_F_SCTP_CRC		|
-			  NETIF_F_RXHASH		|
-			  NETIF_F_RXCSUM		|
-			  0;
-
-	/* advertise to stack only if offloads for encapsulated packets is
-	 * supported
-	 */
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
-		hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL	|
-				   NETIF_F_GSO_GRE		|
-				   NETIF_F_GSO_GRE_CSUM		|
-				   NETIF_F_GSO_IPXIP4		|
-				   NETIF_F_GSO_IPXIP6		|
-				   NETIF_F_GSO_UDP_TUNNEL_CSUM	|
-				   NETIF_F_GSO_PARTIAL		|
-				   0;
-
-		if (!(vfres->vf_cap_flags &
-		      VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
-			netdev->gso_partial_features |=
-				NETIF_F_GSO_UDP_TUNNEL_CSUM;
-
-		netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
-		netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
-		netdev->hw_enc_features |= hw_enc_features;
-	}
-	/* record features VLANs can make use of */
-	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
-
-	/* Write features and hw_features separately to avoid polluting
-	 * with, or dropping, features that are set when we registered.
-	 */
-	hw_features = hw_enc_features;
-
-	/* Enable VLAN features if supported */
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
-		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
-				NETIF_F_HW_VLAN_CTAG_RX);
-	/* Enable cloud filter if ADQ is supported */
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
-		hw_features |= NETIF_F_HW_TC;
-
-	netdev->hw_features |= hw_features;
-
-	netdev->features |= hw_features;
-
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
-		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-
-	/* Do not turn on offloads when they are requested to be turned off.
-	 * TSO needs minimum 576 bytes to work correctly.
-	 */
-	if (netdev->wanted_features) {
-		if (!(netdev->wanted_features & NETIF_F_TSO) ||
-		    netdev->mtu < 576)
-			netdev->features &= ~NETIF_F_TSO;
-		if (!(netdev->wanted_features & NETIF_F_TSO6) ||
-		    netdev->mtu < 576)
-			netdev->features &= ~NETIF_F_TSO6;
-		if (!(netdev->wanted_features & NETIF_F_TSO_ECN))
-			netdev->features &= ~NETIF_F_TSO_ECN;
-		if (!(netdev->wanted_features & NETIF_F_GRO))
-			netdev->features &= ~NETIF_F_GRO;
-		if (!(netdev->wanted_features & NETIF_F_GSO))
-			netdev->features &= ~NETIF_F_GSO;
-	}
-
-	adapter->vsi.id = adapter->vsi_res->vsi_id;
-
-	adapter->vsi.back = adapter;
-	adapter->vsi.base_vector = 1;
-	adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
-	vsi->netdev = adapter->netdev;
-	vsi->qs_handle = adapter->vsi_res->qset_handle;
-	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
-		adapter->rss_key_size = vfres->rss_key_size;
-		adapter->rss_lut_size = vfres->rss_lut_size;
-	} else {
-		adapter->rss_key_size = I40EVF_HKEY_ARRAY_SIZE;
-		adapter->rss_lut_size = I40EVF_HLUT_ARRAY_SIZE;
-	}
-
-	return 0;
-}
-
-/**
- * i40evf_init_task - worker thread to perform delayed initialization
- * @work: pointer to work_struct containing our data
- *
- * This task completes the work that was begun in probe. Due to the nature
- * of VF-PF communications, we may need to wait tens of milliseconds to get
- * responses back from the PF. Rather than busy-wait in probe and bog down the
- * whole system, we'll do it in a task so we can sleep.
- * This task only runs during driver init. Once we've established
- * communications with the PF driver and set up our netdev, the watchdog
- * takes over.
- **/
-static void i40evf_init_task(struct work_struct *work)
-{
-	struct i40evf_adapter *adapter = container_of(work,
-						      struct i40evf_adapter,
-						      init_task.work);
-	struct net_device *netdev = adapter->netdev;
-	struct i40e_hw *hw = &adapter->hw;
-	struct pci_dev *pdev = adapter->pdev;
-	int err, bufsz;
-
-	switch (adapter->state) {
-	case __I40EVF_STARTUP:
-		/* driver loaded, probe complete */
-		adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
-		adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-		err = i40e_set_mac_type(hw);
-		if (err) {
-			dev_err(&pdev->dev, "Failed to set MAC type (%d)\n",
-				err);
-			goto err;
-		}
-		err = i40evf_check_reset_complete(hw);
-		if (err) {
-			dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
-				 err);
-			goto err;
-		}
-		hw->aq.num_arq_entries = I40EVF_AQ_LEN;
-		hw->aq.num_asq_entries = I40EVF_AQ_LEN;
-		hw->aq.arq_buf_size = I40EVF_MAX_AQ_BUF_SIZE;
-		hw->aq.asq_buf_size = I40EVF_MAX_AQ_BUF_SIZE;
-
-		err = i40evf_init_adminq(hw);
-		if (err) {
-			dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n",
-				err);
-			goto err;
-		}
-		err = i40evf_send_api_ver(adapter);
-		if (err) {
-			dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err);
-			i40evf_shutdown_adminq(hw);
-			goto err;
-		}
-		adapter->state = __I40EVF_INIT_VERSION_CHECK;
-		goto restart;
-	case __I40EVF_INIT_VERSION_CHECK:
-		if (!i40evf_asq_done(hw)) {
-			dev_err(&pdev->dev, "Admin queue command never completed\n");
-			i40evf_shutdown_adminq(hw);
-			adapter->state = __I40EVF_STARTUP;
-			goto err;
-		}
-
-		/* aq msg sent, awaiting reply */
-		err = i40evf_verify_api_ver(adapter);
-		if (err) {
-			if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK)
-				err = i40evf_send_api_ver(adapter);
-			else
-				dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
-					adapter->pf_version.major,
-					adapter->pf_version.minor,
-					VIRTCHNL_VERSION_MAJOR,
-					VIRTCHNL_VERSION_MINOR);
-			goto err;
-		}
-		err = i40evf_send_vf_config_msg(adapter);
-		if (err) {
-			dev_err(&pdev->dev, "Unable to send config request (%d)\n",
-				err);
-			goto err;
-		}
-		adapter->state = __I40EVF_INIT_GET_RESOURCES;
-		goto restart;
-	case __I40EVF_INIT_GET_RESOURCES:
-		/* aq msg sent, awaiting reply */
-		if (!adapter->vf_res) {
-			bufsz = sizeof(struct virtchnl_vf_resource) +
-				(I40E_MAX_VF_VSI *
-				 sizeof(struct virtchnl_vsi_resource));
-			adapter->vf_res = kzalloc(bufsz, GFP_KERNEL);
-			if (!adapter->vf_res)
-				goto err;
-		}
-		err = i40evf_get_vf_config(adapter);
-		if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) {
-			err = i40evf_send_vf_config_msg(adapter);
-			goto err;
-		} else if (err == I40E_ERR_PARAM) {
-			/* We only get ERR_PARAM if the device is in a very bad
-			 * state or if we've been disabled for previous bad
-			 * behavior. Either way, we're done now.
-			 */
-			i40evf_shutdown_adminq(hw);
-			dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
-			return;
-		}
-		if (err) {
-			dev_err(&pdev->dev, "Unable to get VF config (%d)\n",
-				err);
-			goto err_alloc;
-		}
-		adapter->state = __I40EVF_INIT_SW;
-		break;
-	default:
-		goto err_alloc;
-	}
-
-	if (i40evf_process_config(adapter))
-		goto err_alloc;
-	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-
-	adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED;
-
-	netdev->netdev_ops = &i40evf_netdev_ops;
-	i40evf_set_ethtool_ops(netdev);
-	netdev->watchdog_timeo = 5 * HZ;
-
-	/* MTU range: 68 - 9710 */
-	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
-
-	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
-		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
-			 adapter->hw.mac.addr);
-		eth_hw_addr_random(netdev);
-		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
-	} else {
-		adapter->flags |= I40EVF_FLAG_ADDR_SET_BY_PF;
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
-		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
-	}
-
-	timer_setup(&adapter->watchdog_timer, i40evf_watchdog_timer, 0);
-	mod_timer(&adapter->watchdog_timer, jiffies + 1);
-
-	adapter->tx_desc_count = I40EVF_DEFAULT_TXD;
-	adapter->rx_desc_count = I40EVF_DEFAULT_RXD;
-	err = i40evf_init_interrupt_scheme(adapter);
-	if (err)
-		goto err_sw_init;
-	i40evf_map_rings_to_vectors(adapter);
-	if (adapter->vf_res->vf_cap_flags &
-	    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
-		adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE;
-
-	err = i40evf_request_misc_irq(adapter);
-	if (err)
-		goto err_sw_init;
-
-	netif_carrier_off(netdev);
-	adapter->link_up = false;
-
-	if (!adapter->netdev_registered) {
-		err = register_netdev(netdev);
-		if (err)
-			goto err_register;
-	}
-
-	adapter->netdev_registered = true;
-
-	netif_tx_stop_all_queues(netdev);
-	if (CLIENT_ALLOWED(adapter)) {
-		err = i40evf_lan_add_device(adapter);
-		if (err)
-			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
-				 err);
-	}
-
-	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
-	if (netdev->features & NETIF_F_GRO)
-		dev_info(&pdev->dev, "GRO is enabled\n");
-
-	adapter->state = __I40EVF_DOWN;
-	set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
-	i40evf_misc_irq_enable(adapter);
-	wake_up(&adapter->down_waitqueue);
-
-	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
-	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
-	if (!adapter->rss_key || !adapter->rss_lut)
-		goto err_mem;
-
-	if (RSS_AQ(adapter)) {
-		adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS;
-		mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-	} else {
-		i40evf_init_rss(adapter);
-	}
-	return;
-restart:
-	schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30));
-	return;
-err_mem:
-	i40evf_free_rss(adapter);
-err_register:
-	i40evf_free_misc_irq(adapter);
-err_sw_init:
-	i40evf_reset_interrupt_capability(adapter);
-err_alloc:
-	kfree(adapter->vf_res);
-	adapter->vf_res = NULL;
-err:
-	/* Things went into the weeds, so try again later */
-	if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
-		dev_err(&pdev->dev, "Failed to communicate with PF; waiting before retry\n");
-		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
-		i40evf_shutdown_adminq(hw);
-		adapter->state = __I40EVF_STARTUP;
-		schedule_delayed_work(&adapter->init_task, HZ * 5);
-		return;
-	}
-	schedule_delayed_work(&adapter->init_task, HZ);
-}
-
-/**
- * i40evf_shutdown - Shutdown the device in preparation for a reboot
- * @pdev: pci device structure
- **/
-static void i40evf_shutdown(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-
-	netif_device_detach(netdev);
-
-	if (netif_running(netdev))
-		i40evf_close(netdev);
-
-	/* Prevent the watchdog from running. */
-	adapter->state = __I40EVF_REMOVE;
-	adapter->aq_required = 0;
-
-#ifdef CONFIG_PM
-	pci_save_state(pdev);
-
-#endif
-	pci_disable_device(pdev);
-}
-
-/**
- * i40evf_probe - Device Initialization Routine
- * @pdev: PCI device information struct
- * @ent: entry in i40evf_pci_tbl
- *
- * Returns 0 on success, negative on failure
- *
- * i40evf_probe initializes an adapter identified by a pci_dev structure.
- * The OS initialization, configuring of the adapter private structure,
- * and a hardware reset occur.
- **/
-static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	struct net_device *netdev;
-	struct i40evf_adapter *adapter = NULL;
-	struct i40e_hw *hw = NULL;
-	int err;
-
-	err = pci_enable_device(pdev);
-	if (err)
-		return err;
-
-	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-	if (err) {
-		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev,
-				"DMA configuration failed: 0x%x\n", err);
-			goto err_dma;
-		}
-	}
-
-	err = pci_request_regions(pdev, i40evf_driver_name);
-	if (err) {
-		dev_err(&pdev->dev,
-			"pci_request_regions failed 0x%x\n", err);
-		goto err_pci_reg;
-	}
-
-	pci_enable_pcie_error_reporting(pdev);
-
-	pci_set_master(pdev);
-
-	netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter),
-				   I40EVF_MAX_REQ_QUEUES);
-	if (!netdev) {
-		err = -ENOMEM;
-		goto err_alloc_etherdev;
-	}
-
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	pci_set_drvdata(pdev, netdev);
-	adapter = netdev_priv(netdev);
-
-	adapter->netdev = netdev;
-	adapter->pdev = pdev;
-
-	hw = &adapter->hw;
-	hw->back = adapter;
-
-	adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
-	adapter->state = __I40EVF_STARTUP;
-
-	/* Call save state here because it relies on the adapter struct. */
-	pci_save_state(pdev);
-
-	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
-			      pci_resource_len(pdev, 0));
-	if (!hw->hw_addr) {
-		err = -EIO;
-		goto err_ioremap;
-	}
-	hw->vendor_id = pdev->vendor;
-	hw->device_id = pdev->device;
-	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
-	hw->subsystem_vendor_id = pdev->subsystem_vendor;
-	hw->subsystem_device_id = pdev->subsystem_device;
-	hw->bus.device = PCI_SLOT(pdev->devfn);
-	hw->bus.func = PCI_FUNC(pdev->devfn);
-	hw->bus.bus_id = pdev->bus->number;
-
-	/* set up the locks for the AQ, do this only once in probe
-	 * and destroy them only once in remove
-	 */
-	mutex_init(&hw->aq.asq_mutex);
-	mutex_init(&hw->aq.arq_mutex);
-
-	spin_lock_init(&adapter->mac_vlan_list_lock);
-	spin_lock_init(&adapter->cloud_filter_list_lock);
-
-	INIT_LIST_HEAD(&adapter->mac_filter_list);
-	INIT_LIST_HEAD(&adapter->vlan_filter_list);
-	INIT_LIST_HEAD(&adapter->cloud_filter_list);
-
-	INIT_WORK(&adapter->reset_task, i40evf_reset_task);
-	INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
-	INIT_WORK(&adapter->watchdog_task, i40evf_watchdog_task);
-	INIT_DELAYED_WORK(&adapter->client_task, i40evf_client_task);
-	INIT_DELAYED_WORK(&adapter->init_task, i40evf_init_task);
-	schedule_delayed_work(&adapter->init_task,
-			      msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
-
-	/* Setup the wait queue for indicating transition to down status */
-	init_waitqueue_head(&adapter->down_waitqueue);
-
-	return 0;
-
-err_ioremap:
-	free_netdev(netdev);
-err_alloc_etherdev:
-	pci_release_regions(pdev);
-err_pci_reg:
-err_dma:
-	pci_disable_device(pdev);
-	return err;
-}
-
-#ifdef CONFIG_PM
-/**
- * i40evf_suspend - Power management suspend routine
- * @pdev: PCI device information struct
- * @state: unused
- *
- * Called when the system (VM) is entering sleep/suspend.
- **/
-static int i40evf_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int retval = 0;
-
-	netif_device_detach(netdev);
-
-	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-				&adapter->crit_section))
-		usleep_range(500, 1000);
-
-	if (netif_running(netdev)) {
-		rtnl_lock();
-		i40evf_down(adapter);
-		rtnl_unlock();
-	}
-	i40evf_free_misc_irq(adapter);
-	i40evf_reset_interrupt_capability(adapter);
-
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-
-	pci_disable_device(pdev);
-
-	return 0;
-}
-
-/**
- * i40evf_resume - Power management resume routine
- * @pdev: PCI device information struct
- *
- * Called when the system (VM) is resumed from sleep/suspend.
- **/
-static int i40evf_resume(struct pci_dev *pdev)
-{
-	struct i40evf_adapter *adapter = pci_get_drvdata(pdev);
-	struct net_device *netdev = adapter->netdev;
-	u32 err;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	/* pci_restore_state clears dev->state_saved so call
-	 * pci_save_state to restore it.
-	 */
-	pci_save_state(pdev);
-
-	err = pci_enable_device_mem(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device from suspend.\n");
-		return err;
-	}
-	pci_set_master(pdev);
-
-	rtnl_lock();
-	err = i40evf_set_interrupt_capability(adapter);
-	if (err) {
-		rtnl_unlock();
-		dev_err(&pdev->dev, "Cannot enable MSI-X interrupts.\n");
-		return err;
-	}
-	err = i40evf_request_misc_irq(adapter);
-	rtnl_unlock();
-	if (err) {
-		dev_err(&pdev->dev, "Cannot get interrupt vector.\n");
-		return err;
-	}
-
-	schedule_work(&adapter->reset_task);
-
-	netif_device_attach(netdev);
-
-	return err;
-}
-
-#endif /* CONFIG_PM */
-/**
- * i40evf_remove - Device Removal Routine
- * @pdev: PCI device information struct
- *
- * i40evf_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
- * Hot-Plug event, or because the driver is going to be removed from
- * memory.
- **/
-static void i40evf_remove(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40evf_vlan_filter *vlf, *vlftmp;
-	struct i40evf_mac_filter *f, *ftmp;
-	struct i40evf_cloud_filter *cf, *cftmp;
-	struct i40e_hw *hw = &adapter->hw;
-	int err;
-	/* Indicate we are in remove and not to run reset_task */
-	set_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section);
-	cancel_delayed_work_sync(&adapter->init_task);
-	cancel_work_sync(&adapter->reset_task);
-	cancel_delayed_work_sync(&adapter->client_task);
-	if (adapter->netdev_registered) {
-		unregister_netdev(netdev);
-		adapter->netdev_registered = false;
-	}
-	if (CLIENT_ALLOWED(adapter)) {
-		err = i40evf_lan_del_device(adapter);
-		if (err)
-			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
-				 err);
-	}
-
-	/* Shut down all the garbage mashers on the detention level */
-	adapter->state = __I40EVF_REMOVE;
-	adapter->aq_required = 0;
-	adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
-	i40evf_request_reset(adapter);
-	msleep(50);
-	/* If the FW isn't responding, kick it once, but only once. */
-	if (!i40evf_asq_done(hw)) {
-		i40evf_request_reset(adapter);
-		msleep(50);
-	}
-	i40evf_free_all_tx_resources(adapter);
-	i40evf_free_all_rx_resources(adapter);
-	i40evf_misc_irq_disable(adapter);
-	i40evf_free_misc_irq(adapter);
-	i40evf_reset_interrupt_capability(adapter);
-	i40evf_free_q_vectors(adapter);
-
-	if (adapter->watchdog_timer.function)
-		del_timer_sync(&adapter->watchdog_timer);
-
-	i40evf_free_rss(adapter);
-
-	if (hw->aq.asq.count)
-		i40evf_shutdown_adminq(hw);
-
-	/* destroy the locks only once, here */
-	mutex_destroy(&hw->aq.arq_mutex);
-	mutex_destroy(&hw->aq.asq_mutex);
-
-	iounmap(hw->hw_addr);
-	pci_release_regions(pdev);
-	i40evf_free_all_tx_resources(adapter);
-	i40evf_free_all_rx_resources(adapter);
-	i40evf_free_queues(adapter);
-	kfree(adapter->vf_res);
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-	/* If we got removed before an up/down sequence, we've got a filter
-	 * hanging out there that we need to get rid of.
-	 */
-	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-		list_del(&f->list);
-		kfree(f);
-	}
-	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
-				 list) {
-		list_del(&vlf->list);
-		kfree(vlf);
-	}
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-
-	spin_lock_bh(&adapter->cloud_filter_list_lock);
-	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
-		list_del(&cf->list);
-		kfree(cf);
-	}
-	spin_unlock_bh(&adapter->cloud_filter_list_lock);
-
-	free_netdev(netdev);
-
-	pci_disable_pcie_error_reporting(pdev);
-
-	pci_disable_device(pdev);
-}
-
-static struct pci_driver i40evf_driver = {
-	.name     = i40evf_driver_name,
-	.id_table = i40evf_pci_tbl,
-	.probe    = i40evf_probe,
-	.remove   = i40evf_remove,
-#ifdef CONFIG_PM
-	.suspend  = i40evf_suspend,
-	.resume   = i40evf_resume,
-#endif
-	.shutdown = i40evf_shutdown,
-};
-
-/**
- * i40e_init_module - Driver Registration Routine
- *
- * i40e_init_module is the first routine called when the driver is
- * loaded. All it does is register with the PCI subsystem.
- **/
-static int __init i40evf_init_module(void)
-{
-	int ret;
-
-	pr_info("i40evf: %s - version %s\n", i40evf_driver_string,
-		i40evf_driver_version);
-
-	pr_info("%s\n", i40evf_copyright);
-
-	i40evf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
-				    i40evf_driver_name);
-	if (!i40evf_wq) {
-		pr_err("%s: Failed to create workqueue\n", i40evf_driver_name);
-		return -ENOMEM;
-	}
-	ret = pci_register_driver(&i40evf_driver);
-	return ret;
-}
-
-module_init(i40evf_init_module);
-
-/**
- * i40e_exit_module - Driver Exit Cleanup Routine
- *
- * i40e_exit_module is called just before the driver is removed
- * from memory.
- **/
-static void __exit i40evf_exit_module(void)
-{
-	pci_unregister_driver(&i40evf_driver);
-	destroy_workqueue(i40evf_wq);
-}
-
-module_exit(i40evf_exit_module);
-
-/* i40evf_main.c */

diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
new file mode 100644
index 0000000..c997063
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/Makefile

@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2013 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) Ethernet Adaptive Virtual Function (iavf)
+# driver
+#
+#
+
+ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_IAVF) += iavf.o
+
+iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \
+	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o

diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
new file mode 100644
index 0000000..29de3ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf.h

@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_H_
+#define _IAVF_H_
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/socket.h>
+#include <linux/jiffies.h>
+#include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
+#include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "iavf_type.h"
+#include <linux/avf/virtchnl.h>
+#include "iavf_txrx.h"
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+#define PFX "iavf: "
+
+/* VSI state flags shared with common code */
+enum iavf_vsi_state_t {
+	__IAVF_VSI_DOWN,
+	/* This must be last as it determines the size of the BITMAP */
+	__IAVF_VSI_STATE_SIZE__,
+};
+
+/* dummy struct to make common code less painful */
+struct iavf_vsi {
+	struct iavf_adapter *back;
+	struct net_device *netdev;
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	u16 seid;
+	u16 id;
+	DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
+	int base_vector;
+	u16 work_limit;
+	u16 qs_handle;
+	void *priv;     /* client driver data reference. */
+};
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IAVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+#define IAVF_DEFAULT_TXD	512
+#define IAVF_DEFAULT_RXD	512
+#define IAVF_MAX_TXD		4096
+#define IAVF_MIN_TXD		64
+#define IAVF_MAX_RXD		4096
+#define IAVF_MIN_RXD		64
+#define IAVF_REQ_DESCRIPTOR_MULTIPLE	32
+#define IAVF_MAX_AQ_BUF_SIZE	4096
+#define IAVF_AQ_LEN		32
+#define IAVF_AQ_MAX_ERR	20 /* times to try before resetting AQ */
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i]))
+#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i]))
+#define IAVF_TX_CTXTDESC(R, i) \
+	(&(((struct iavf_tx_context_desc *)((R)->desc))[i]))
+#define IAVF_MAX_REQ_QUEUES 4
+
+#define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4)
+#define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define IAVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct iavf_q_vector {
+	struct iavf_adapter *adapter;
+	struct iavf_vsi *vsi;
+	struct napi_struct napi;
+	struct iavf_ring_container rx;
+	struct iavf_ring_container tx;
+	u32 ring_mask;
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
+	u8 num_ringpairs;	/* total number of ring pairs in vector */
+	u16 v_idx;		/* index in the vsi->q_vector array. */
+	u16 reg_idx;		/* register index of the interrupt */
+	char name[IFNAMSIZ + 15];
+	bool arm_wb_state;
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+};
+
+/* Helper macros to switch between ints/sec and what the register uses.
+ * And yes, it's the same math going both ways.  The lowest value
+ * supported by all of the iavf hardware is 8.
+ */
+#define EITR_INTS_PER_SEC_TO_REG(_eitr) \
+	((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
+#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG
+
+#define IAVF_DESC_UNUSED(R) \
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+#define OTHER_VECTOR 1
+#define NONQ_VECS (OTHER_VECTOR)
+
+#define MIN_MSIX_Q_VECTORS 1
+#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS)
+
+#define IAVF_QUEUE_END_OF_LIST 0x7FF
+#define IAVF_FREE_VECTOR 0x7FFF
+struct iavf_mac_filter {
+	struct list_head list;
+	u8 macaddr[ETH_ALEN];
+	bool remove;		/* filter needs to be removed */
+	bool add;		/* filter needs to be added */
+};
+
+struct iavf_vlan_filter {
+	struct list_head list;
+	u16 vlan;
+	bool remove;		/* filter needs to be removed */
+	bool add;		/* filter needs to be added */
+};
+
+#define IAVF_MAX_TRAFFIC_CLASS	4
+/* State of traffic class creation */
+enum iavf_tc_state_t {
+	__IAVF_TC_INVALID, /* no traffic class, default state */
+	__IAVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct iavf_channel_config {
+	struct virtchnl_channel_info ch_info[IAVF_MAX_TRAFFIC_CLASS];
+	enum iavf_tc_state_t state;
+	u8 total_qps;
+};
+
+/* State of cloud filter */
+enum iavf_cloud_filter_state_t {
+	__IAVF_CF_INVALID,	 /* cloud filter not added */
+	__IAVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+	__IAVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+	__IAVF_CF_ACTIVE,	 /* cloud filter is active */
+};
+
+/* Driver state. The order of these is important! */
+enum iavf_state_t {
+	__IAVF_STARTUP,		/* driver loaded, probe complete */
+	__IAVF_REMOVE,		/* driver is being unloaded */
+	__IAVF_INIT_VERSION_CHECK,	/* aq msg sent, awaiting reply */
+	__IAVF_INIT_GET_RESOURCES,	/* aq msg sent, awaiting reply */
+	__IAVF_INIT_SW,		/* got resources, setting up structs */
+	__IAVF_RESETTING,		/* in reset */
+	__IAVF_COMM_FAILED,		/* communication with PF failed */
+	/* Below here, watchdog is running */
+	__IAVF_DOWN,			/* ready, can be opened */
+	__IAVF_DOWN_PENDING,		/* descending, waiting for watchdog */
+	__IAVF_TESTING,		/* in ethtool self-test */
+	__IAVF_RUNNING,		/* opened, working */
+};
+
+enum iavf_critical_section_t {
+	__IAVF_IN_CRITICAL_TASK,	/* cannot be interrupted */
+	__IAVF_IN_CLIENT_TASK,
+	__IAVF_IN_REMOVE_TASK,	/* device being removed */
+};
+
+#define IAVF_CLOUD_FIELD_OMAC		0x01
+#define IAVF_CLOUD_FIELD_IMAC		0x02
+#define IAVF_CLOUD_FIELD_IVLAN	0x04
+#define IAVF_CLOUD_FIELD_TEN_ID	0x08
+#define IAVF_CLOUD_FIELD_IIP		0x10
+
+#define IAVF_CF_FLAGS_OMAC	IAVF_CLOUD_FIELD_OMAC
+#define IAVF_CF_FLAGS_IMAC	IAVF_CLOUD_FIELD_IMAC
+#define IAVF_CF_FLAGS_IMAC_IVLAN	(IAVF_CLOUD_FIELD_IMAC |\
+					 IAVF_CLOUD_FIELD_IVLAN)
+#define IAVF_CF_FLAGS_IMAC_TEN_ID	(IAVF_CLOUD_FIELD_IMAC |\
+					 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(IAVF_CLOUD_FIELD_OMAC |\
+						 IAVF_CLOUD_FIELD_IMAC |\
+						 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(IAVF_CLOUD_FIELD_IMAC |\
+						 IAVF_CLOUD_FIELD_IVLAN |\
+						 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_IIP	IAVF_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct iavf_cloud_filter {
+	enum iavf_cloud_filter_state_t state;
+	struct list_head list;
+	struct virtchnl_filter f;
+	unsigned long cookie;
+	bool del;		/* filter needs to be deleted */
+	bool add;		/* filter needs to be added */
+};
+
+/* board specific private data structure */
+struct iavf_adapter {
+	struct work_struct reset_task;
+	struct work_struct adminq_task;
+	struct delayed_work client_task;
+	struct delayed_work init_task;
+	wait_queue_head_t down_waitqueue;
+	struct iavf_q_vector *q_vectors;
+	struct list_head vlan_filter_list;
+	struct list_head mac_filter_list;
+	/* Lock to protect accesses to MAC and VLAN lists */
+	spinlock_t mac_vlan_list_lock;
+	char misc_vector_name[IFNAMSIZ + 9];
+	int num_active_queues;
+	int num_req_queues;
+
+	/* TX */
+	struct iavf_ring *tx_rings;
+	u32 tx_timeout_count;
+	u32 tx_desc_count;
+
+	/* RX */
+	struct iavf_ring *rx_rings;
+	u64 hw_csum_rx_error;
+	u32 rx_desc_count;
+	int num_msix_vectors;
+	int num_iwarp_msix;
+	int iwarp_base_vector;
+	u32 client_pending;
+	struct iavf_client_instance *cinst;
+	struct msix_entry *msix_entries;
+
+	u32 flags;
+#define IAVF_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define IAVF_FLAG_PF_COMMS_FAILED		BIT(3)
+#define IAVF_FLAG_RESET_PENDING		BIT(4)
+#define IAVF_FLAG_RESET_NEEDED		BIT(5)
+#define IAVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
+#define IAVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
+#define IAVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
+#define IAVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(11)
+#define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
+#define IAVF_FLAG_PROMISC_ON			BIT(13)
+#define IAVF_FLAG_ALLMULTI_ON			BIT(14)
+#define IAVF_FLAG_LEGACY_RX			BIT(15)
+#define IAVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
+#define IAVF_FLAG_QUEUES_DISABLED		BIT(17)
+/* duplicates for common code */
+#define IAVF_FLAG_DCB_ENABLED			0
+	/* flags for admin queue service task */
+	u32 aq_required;
+#define IAVF_FLAG_AQ_ENABLE_QUEUES		BIT(0)
+#define IAVF_FLAG_AQ_DISABLE_QUEUES		BIT(1)
+#define IAVF_FLAG_AQ_ADD_MAC_FILTER		BIT(2)
+#define IAVF_FLAG_AQ_ADD_VLAN_FILTER		BIT(3)
+#define IAVF_FLAG_AQ_DEL_MAC_FILTER		BIT(4)
+#define IAVF_FLAG_AQ_DEL_VLAN_FILTER		BIT(5)
+#define IAVF_FLAG_AQ_CONFIGURE_QUEUES		BIT(6)
+#define IAVF_FLAG_AQ_MAP_VECTORS		BIT(7)
+#define IAVF_FLAG_AQ_HANDLE_RESET		BIT(8)
+#define IAVF_FLAG_AQ_CONFIGURE_RSS		BIT(9) /* direct AQ config */
+#define IAVF_FLAG_AQ_GET_CONFIG		BIT(10)
+/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */
+#define IAVF_FLAG_AQ_GET_HENA			BIT(11)
+#define IAVF_FLAG_AQ_SET_HENA			BIT(12)
+#define IAVF_FLAG_AQ_SET_RSS_KEY		BIT(13)
+#define IAVF_FLAG_AQ_SET_RSS_LUT		BIT(14)
+#define IAVF_FLAG_AQ_REQUEST_PROMISC		BIT(15)
+#define IAVF_FLAG_AQ_RELEASE_PROMISC		BIT(16)
+#define IAVF_FLAG_AQ_REQUEST_ALLMULTI		BIT(17)
+#define IAVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
+#define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
+#define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
+#define IAVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
+#define IAVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
+#define IAVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
+#define IAVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	struct iavf_hw hw; /* defined in iavf_type.h */
+
+	enum iavf_state_t state;
+	unsigned long crit_section;
+
+	struct delayed_work watchdog_task;
+	bool netdev_registered;
+	bool link_up;
+	enum virtchnl_link_speed link_speed;
+	enum virtchnl_ops current_op;
+#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
+			    (_a)->vf_res->vf_cap_flags & \
+				VIRTCHNL_VF_OFFLOAD_IWARP : \
+			    0)
+#define CLIENT_ENABLED(_a) ((_a)->cinst)
+/* RSS by the PF should be preferred over RSS via other methods. */
+#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \
+		    VIRTCHNL_VF_OFFLOAD_RSS_PF)
+#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \
+		    VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \
+		       (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
+			VIRTCHNL_VF_OFFLOAD_RSS_PF)))
+#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
+			  VIRTCHNL_VF_OFFLOAD_VLAN)
+	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
+	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
+	struct virtchnl_version_info pf_version;
+#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \
+		       ((_a)->pf_version.minor == 1))
+	u16 msg_enable;
+	struct iavf_eth_stats current_stats;
+	struct iavf_vsi vsi;
+	u32 aq_wait_count;
+	/* RSS stuff */
+	u64 hena;
+	u16 rss_key_size;
+	u16 rss_lut_size;
+	u8 *rss_key;
+	u8 *rss_lut;
+	/* ADQ related members */
+	struct iavf_channel_config ch_config;
+	u8 num_tc;
+	struct list_head cloud_filter_list;
+	/* lock to protect access to the cloud filter list */
+	spinlock_t cloud_filter_list_lock;
+	u16 num_cloud_filters;
+};
+
+
+/* Ethtool Private Flags */
+
+/* lan device, used by client interface */
+struct iavf_device {
+	struct list_head list;
+	struct iavf_adapter *vf;
+};
+
+/* needed by iavf_ethtool.c */
+extern char iavf_driver_name[];
+extern const char iavf_driver_version[];
+extern struct workqueue_struct *iavf_wq;
+
+int iavf_up(struct iavf_adapter *adapter);
+void iavf_down(struct iavf_adapter *adapter);
+int iavf_process_config(struct iavf_adapter *adapter);
+void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_reset(struct iavf_adapter *adapter);
+void iavf_set_ethtool_ops(struct net_device *netdev);
+void iavf_update_stats(struct iavf_adapter *adapter);
+void iavf_reset_interrupt_capability(struct iavf_adapter *adapter);
+int iavf_init_interrupt_scheme(struct iavf_adapter *adapter);
+void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask);
+void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
+void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
+
+void iavf_napi_add_all(struct iavf_adapter *adapter);
+void iavf_napi_del_all(struct iavf_adapter *adapter);
+
+int iavf_send_api_ver(struct iavf_adapter *adapter);
+int iavf_verify_api_ver(struct iavf_adapter *adapter);
+int iavf_send_vf_config_msg(struct iavf_adapter *adapter);
+int iavf_get_vf_config(struct iavf_adapter *adapter);
+void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
+void iavf_configure_queues(struct iavf_adapter *adapter);
+void iavf_deconfigure_queues(struct iavf_adapter *adapter);
+void iavf_enable_queues(struct iavf_adapter *adapter);
+void iavf_disable_queues(struct iavf_adapter *adapter);
+void iavf_map_queues(struct iavf_adapter *adapter);
+int iavf_request_queues(struct iavf_adapter *adapter, int num);
+void iavf_add_ether_addrs(struct iavf_adapter *adapter);
+void iavf_del_ether_addrs(struct iavf_adapter *adapter);
+void iavf_add_vlans(struct iavf_adapter *adapter);
+void iavf_del_vlans(struct iavf_adapter *adapter);
+void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags);
+void iavf_request_stats(struct iavf_adapter *adapter);
+void iavf_request_reset(struct iavf_adapter *adapter);
+void iavf_get_hena(struct iavf_adapter *adapter);
+void iavf_set_hena(struct iavf_adapter *adapter);
+void iavf_set_rss_key(struct iavf_adapter *adapter);
+void iavf_set_rss_lut(struct iavf_adapter *adapter);
+void iavf_enable_vlan_stripping(struct iavf_adapter *adapter);
+void iavf_disable_vlan_stripping(struct iavf_adapter *adapter);
+void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+			      enum virtchnl_ops v_opcode,
+			      enum iavf_status v_retval, u8 *msg, u16 msglen);
+int iavf_config_rss(struct iavf_adapter *adapter);
+int iavf_lan_add_device(struct iavf_adapter *adapter);
+int iavf_lan_del_device(struct iavf_adapter *adapter);
+void iavf_client_subtask(struct iavf_adapter *adapter);
+void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len);
+void iavf_notify_client_l2_params(struct iavf_vsi *vsi);
+void iavf_notify_client_open(struct iavf_vsi *vsi);
+void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset);
+void iavf_enable_channels(struct iavf_adapter *adapter);
+void iavf_disable_channels(struct iavf_adapter *adapter);
+void iavf_add_cloud_filter(struct iavf_adapter *adapter);
+void iavf_del_cloud_filter(struct iavf_adapter *adapter);
+#endif /* _IAVF_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
similarity index 60%
rename from drivers/net/ethernet/intel/i40evf/i40e_adminq.c
rename to drivers/net/ethernet/intel/iavf/iavf_adminq.c
index 21a0dbf..9fa3fa9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c

@@ -1,66 +1,54 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
-#include "i40e_status.h"
-#include "i40e_type.h"
-#include "i40e_register.h"
-#include "i40e_adminq.h"
-#include "i40e_prototype.h"
+#include "iavf_status.h"
+#include "iavf_type.h"
+#include "iavf_register.h"
+#include "iavf_adminq.h"
+#include "iavf_prototype.h"
 
 /**
- * i40e_is_nvm_update_op - return true if this is an NVM update operation
- * @desc: API request descriptor
- **/
-static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc)
-{
-	return (desc->opcode == i40e_aqc_opc_nvm_erase) ||
-	       (desc->opcode == i40e_aqc_opc_nvm_update);
-}
-
-/**
- *  i40e_adminq_init_regs - Initialize AdminQ registers
+ *  iavf_adminq_init_regs - Initialize AdminQ registers
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the alloc_asq and alloc_arq functions have already been called
  **/
-static void i40e_adminq_init_regs(struct i40e_hw *hw)
+static void iavf_adminq_init_regs(struct iavf_hw *hw)
 {
 	/* set head and tail registers in our local struct */
-	if (i40e_is_vf(hw)) {
-		hw->aq.asq.tail = I40E_VF_ATQT1;
-		hw->aq.asq.head = I40E_VF_ATQH1;
-		hw->aq.asq.len  = I40E_VF_ATQLEN1;
-		hw->aq.asq.bal  = I40E_VF_ATQBAL1;
-		hw->aq.asq.bah  = I40E_VF_ATQBAH1;
-		hw->aq.arq.tail = I40E_VF_ARQT1;
-		hw->aq.arq.head = I40E_VF_ARQH1;
-		hw->aq.arq.len  = I40E_VF_ARQLEN1;
-		hw->aq.arq.bal  = I40E_VF_ARQBAL1;
-		hw->aq.arq.bah  = I40E_VF_ARQBAH1;
-	}
+	hw->aq.asq.tail = IAVF_VF_ATQT1;
+	hw->aq.asq.head = IAVF_VF_ATQH1;
+	hw->aq.asq.len  = IAVF_VF_ATQLEN1;
+	hw->aq.asq.bal  = IAVF_VF_ATQBAL1;
+	hw->aq.asq.bah  = IAVF_VF_ATQBAH1;
+	hw->aq.arq.tail = IAVF_VF_ARQT1;
+	hw->aq.arq.head = IAVF_VF_ARQH1;
+	hw->aq.arq.len  = IAVF_VF_ARQLEN1;
+	hw->aq.arq.bal  = IAVF_VF_ARQBAL1;
+	hw->aq.arq.bah  = IAVF_VF_ARQBAH1;
 }
 
 /**
- *  i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ *  iavf_alloc_adminq_asq_ring - Allocate Admin Queue send rings
  *  @hw: pointer to the hardware structure
  **/
-static i40e_status i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+static enum iavf_status iavf_alloc_adminq_asq_ring(struct iavf_hw *hw)
 {
-	i40e_status ret_code;
+	enum iavf_status ret_code;
 
-	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
-					 i40e_mem_atq_ring,
+	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+					 iavf_mem_atq_ring,
 					 (hw->aq.num_asq_entries *
-					 sizeof(struct i40e_aq_desc)),
-					 I40E_ADMINQ_DESC_ALIGNMENT);
+					 sizeof(struct iavf_aq_desc)),
+					 IAVF_ADMINQ_DESC_ALIGNMENT);
 	if (ret_code)
 		return ret_code;
 
-	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
 					  (hw->aq.num_asq_entries *
-					  sizeof(struct i40e_asq_cmd_details)));
+					  sizeof(struct iavf_asq_cmd_details)));
 	if (ret_code) {
-		i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+		iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 		return ret_code;
 	}
 
@@ -68,55 +56,55 @@
 }
 
 /**
- *  i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ *  iavf_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
  *  @hw: pointer to the hardware structure
  **/
-static i40e_status i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+static enum iavf_status iavf_alloc_adminq_arq_ring(struct iavf_hw *hw)
 {
-	i40e_status ret_code;
+	enum iavf_status ret_code;
 
-	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
-					 i40e_mem_arq_ring,
+	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+					 iavf_mem_arq_ring,
 					 (hw->aq.num_arq_entries *
-					 sizeof(struct i40e_aq_desc)),
-					 I40E_ADMINQ_DESC_ALIGNMENT);
+					 sizeof(struct iavf_aq_desc)),
+					 IAVF_ADMINQ_DESC_ALIGNMENT);
 
 	return ret_code;
 }
 
 /**
- *  i40e_free_adminq_asq - Free Admin Queue send rings
+ *  iavf_free_adminq_asq - Free Admin Queue send rings
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the posted send buffers have already been cleaned
  *  and de-allocated
  **/
-static void i40e_free_adminq_asq(struct i40e_hw *hw)
+static void iavf_free_adminq_asq(struct iavf_hw *hw)
 {
-	i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+	iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 }
 
 /**
- *  i40e_free_adminq_arq - Free Admin Queue receive rings
+ *  iavf_free_adminq_arq - Free Admin Queue receive rings
  *  @hw: pointer to the hardware structure
  *
  *  This assumes the posted receive buffers have already been cleaned
  *  and de-allocated
  **/
-static void i40e_free_adminq_arq(struct i40e_hw *hw)
+static void iavf_free_adminq_arq(struct iavf_hw *hw)
 {
-	i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+	iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf);
 }
 
 /**
- *  i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ *  iavf_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
  *  @hw: pointer to the hardware structure
  **/
-static i40e_status i40e_alloc_arq_bufs(struct i40e_hw *hw)
+static enum iavf_status iavf_alloc_arq_bufs(struct iavf_hw *hw)
 {
-	i40e_status ret_code;
-	struct i40e_aq_desc *desc;
-	struct i40e_dma_mem *bi;
+	struct iavf_aq_desc *desc;
+	struct iavf_dma_mem *bi;
+	enum iavf_status ret_code;
 	int i;
 
 	/* We'll be allocating the buffer info memory first, then we can
@@ -124,28 +112,29 @@
 	 */
 
 	/* buffer_info structures do not need alignment */
-	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.arq.dma_head,
-		(hw->aq.num_arq_entries * sizeof(struct i40e_dma_mem)));
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.arq.dma_head,
+					  (hw->aq.num_arq_entries *
+					   sizeof(struct iavf_dma_mem)));
 	if (ret_code)
 		goto alloc_arq_bufs;
-	hw->aq.arq.r.arq_bi = (struct i40e_dma_mem *)hw->aq.arq.dma_head.va;
+	hw->aq.arq.r.arq_bi = (struct iavf_dma_mem *)hw->aq.arq.dma_head.va;
 
 	/* allocate the mapped buffers */
 	for (i = 0; i < hw->aq.num_arq_entries; i++) {
 		bi = &hw->aq.arq.r.arq_bi[i];
-		ret_code = i40e_allocate_dma_mem(hw, bi,
-						 i40e_mem_arq_buf,
+		ret_code = iavf_allocate_dma_mem(hw, bi,
+						 iavf_mem_arq_buf,
 						 hw->aq.arq_buf_size,
-						 I40E_ADMINQ_DESC_ALIGNMENT);
+						 IAVF_ADMINQ_DESC_ALIGNMENT);
 		if (ret_code)
 			goto unwind_alloc_arq_bufs;
 
 		/* now configure the descriptors for use */
-		desc = I40E_ADMINQ_DESC(hw->aq.arq, i);
+		desc = IAVF_ADMINQ_DESC(hw->aq.arq, i);
 
-		desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
-		if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
-			desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+		desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+		if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+			desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
 		desc->opcode = 0;
 		/* This is in accordance with Admin queue design, there is no
 		 * register for buffer size configuration
@@ -169,36 +158,37 @@
 	/* don't try to free the one that failed... */
 	i--;
 	for (; i >= 0; i--)
-		i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
-	i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+		iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
 
 	return ret_code;
 }
 
 /**
- *  i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ *  iavf_alloc_asq_bufs - Allocate empty buffer structs for the send queue
  *  @hw: pointer to the hardware structure
  **/
-static i40e_status i40e_alloc_asq_bufs(struct i40e_hw *hw)
+static enum iavf_status iavf_alloc_asq_bufs(struct iavf_hw *hw)
 {
-	i40e_status ret_code;
-	struct i40e_dma_mem *bi;
+	struct iavf_dma_mem *bi;
+	enum iavf_status ret_code;
 	int i;
 
 	/* No mapped memory needed yet, just the buffer info structures */
-	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.dma_head,
-		(hw->aq.num_asq_entries * sizeof(struct i40e_dma_mem)));
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.dma_head,
+					  (hw->aq.num_asq_entries *
+					   sizeof(struct iavf_dma_mem)));
 	if (ret_code)
 		goto alloc_asq_bufs;
-	hw->aq.asq.r.asq_bi = (struct i40e_dma_mem *)hw->aq.asq.dma_head.va;
+	hw->aq.asq.r.asq_bi = (struct iavf_dma_mem *)hw->aq.asq.dma_head.va;
 
 	/* allocate the mapped buffers */
 	for (i = 0; i < hw->aq.num_asq_entries; i++) {
 		bi = &hw->aq.asq.r.asq_bi[i];
-		ret_code = i40e_allocate_dma_mem(hw, bi,
-						 i40e_mem_asq_buf,
+		ret_code = iavf_allocate_dma_mem(hw, bi,
+						 iavf_mem_asq_buf,
 						 hw->aq.asq_buf_size,
-						 I40E_ADMINQ_DESC_ALIGNMENT);
+						 IAVF_ADMINQ_DESC_ALIGNMENT);
 		if (ret_code)
 			goto unwind_alloc_asq_bufs;
 	}
@@ -209,63 +199,63 @@
 	/* don't try to free the one that failed... */
 	i--;
 	for (; i >= 0; i--)
-		i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
-	i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+		iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
 
 	return ret_code;
 }
 
 /**
- *  i40e_free_arq_bufs - Free receive queue buffer info elements
+ *  iavf_free_arq_bufs - Free receive queue buffer info elements
  *  @hw: pointer to the hardware structure
  **/
-static void i40e_free_arq_bufs(struct i40e_hw *hw)
+static void iavf_free_arq_bufs(struct iavf_hw *hw)
 {
 	int i;
 
 	/* free descriptors */
 	for (i = 0; i < hw->aq.num_arq_entries; i++)
-		i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+		iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
 
 	/* free the descriptor memory */
-	i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+	iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf);
 
 	/* free the dma header */
-	i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+	iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
 }
 
 /**
- *  i40e_free_asq_bufs - Free send queue buffer info elements
+ *  iavf_free_asq_bufs - Free send queue buffer info elements
  *  @hw: pointer to the hardware structure
  **/
-static void i40e_free_asq_bufs(struct i40e_hw *hw)
+static void iavf_free_asq_bufs(struct iavf_hw *hw)
 {
 	int i;
 
 	/* only unmap if the address is non-NULL */
 	for (i = 0; i < hw->aq.num_asq_entries; i++)
 		if (hw->aq.asq.r.asq_bi[i].pa)
-			i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+			iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
 
 	/* free the buffer info list */
-	i40e_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
+	iavf_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
 
 	/* free the descriptor memory */
-	i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+	iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 
 	/* free the dma header */
-	i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+	iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
 }
 
 /**
- *  i40e_config_asq_regs - configure ASQ registers
+ *  iavf_config_asq_regs - configure ASQ registers
  *  @hw: pointer to the hardware structure
  *
  *  Configure base address and length registers for the transmit queue
  **/
-static i40e_status i40e_config_asq_regs(struct i40e_hw *hw)
+static enum iavf_status iavf_config_asq_regs(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 	u32 reg = 0;
 
 	/* Clear Head and Tail */
@@ -274,27 +264,27 @@
 
 	/* set starting point */
 	wr32(hw, hw->aq.asq.len, (hw->aq.num_asq_entries |
-				  I40E_VF_ATQLEN1_ATQENABLE_MASK));
+				  IAVF_VF_ATQLEN1_ATQENABLE_MASK));
 	wr32(hw, hw->aq.asq.bal, lower_32_bits(hw->aq.asq.desc_buf.pa));
 	wr32(hw, hw->aq.asq.bah, upper_32_bits(hw->aq.asq.desc_buf.pa));
 
 	/* Check one register to verify that config was applied */
 	reg = rd32(hw, hw->aq.asq.bal);
 	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
 
 	return ret_code;
 }
 
 /**
- *  i40e_config_arq_regs - ARQ register configuration
+ *  iavf_config_arq_regs - ARQ register configuration
  *  @hw: pointer to the hardware structure
  *
  * Configure base address and length registers for the receive (event queue)
  **/
-static i40e_status i40e_config_arq_regs(struct i40e_hw *hw)
+static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 	u32 reg = 0;
 
 	/* Clear Head and Tail */
@@ -303,7 +293,7 @@
 
 	/* set starting point */
 	wr32(hw, hw->aq.arq.len, (hw->aq.num_arq_entries |
-				  I40E_VF_ARQLEN1_ARQENABLE_MASK));
+				  IAVF_VF_ARQLEN1_ARQENABLE_MASK));
 	wr32(hw, hw->aq.arq.bal, lower_32_bits(hw->aq.arq.desc_buf.pa));
 	wr32(hw, hw->aq.arq.bah, upper_32_bits(hw->aq.arq.desc_buf.pa));
 
@@ -313,13 +303,13 @@
 	/* Check one register to verify that config was applied */
 	reg = rd32(hw, hw->aq.arq.bal);
 	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
 
 	return ret_code;
 }
 
 /**
- *  i40e_init_asq - main initialization routine for ASQ
+ *  iavf_init_asq - main initialization routine for ASQ
  *  @hw: pointer to the hardware structure
  *
  *  This is the main initialization routine for the Admin Send Queue
@@ -331,20 +321,20 @@
  *  Do *NOT* hold the lock when calling this as the memory allocation routines
  *  called are not going to be atomic context safe
  **/
-static i40e_status i40e_init_asq(struct i40e_hw *hw)
+static enum iavf_status iavf_init_asq(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	if (hw->aq.asq.count > 0) {
 		/* queue already initialized */
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto init_adminq_exit;
 	}
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_asq_entries == 0) ||
 	    (hw->aq.asq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
@@ -352,17 +342,17 @@
 	hw->aq.asq.next_to_clean = 0;
 
 	/* allocate the ring memory */
-	ret_code = i40e_alloc_adminq_asq_ring(hw);
+	ret_code = iavf_alloc_adminq_asq_ring(hw);
 	if (ret_code)
 		goto init_adminq_exit;
 
 	/* allocate buffers in the rings */
-	ret_code = i40e_alloc_asq_bufs(hw);
+	ret_code = iavf_alloc_asq_bufs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	ret_code = i40e_config_asq_regs(hw);
+	ret_code = iavf_config_asq_regs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
@@ -371,14 +361,14 @@
 	goto init_adminq_exit;
 
 init_adminq_free_rings:
-	i40e_free_adminq_asq(hw);
+	iavf_free_adminq_asq(hw);
 
 init_adminq_exit:
 	return ret_code;
 }
 
 /**
- *  i40e_init_arq - initialize ARQ
+ *  iavf_init_arq - initialize ARQ
  *  @hw: pointer to the hardware structure
  *
  *  The main initialization routine for the Admin Receive (Event) Queue.
@@ -390,20 +380,20 @@
  *  Do *NOT* hold the lock when calling this as the memory allocation routines
  *  called are not going to be atomic context safe
  **/
-static i40e_status i40e_init_arq(struct i40e_hw *hw)
+static enum iavf_status iavf_init_arq(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	if (hw->aq.arq.count > 0) {
 		/* queue already initialized */
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto init_adminq_exit;
 	}
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_arq_entries == 0) ||
 	    (hw->aq.arq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
@@ -411,17 +401,17 @@
 	hw->aq.arq.next_to_clean = 0;
 
 	/* allocate the ring memory */
-	ret_code = i40e_alloc_adminq_arq_ring(hw);
+	ret_code = iavf_alloc_adminq_arq_ring(hw);
 	if (ret_code)
 		goto init_adminq_exit;
 
 	/* allocate buffers in the rings */
-	ret_code = i40e_alloc_arq_bufs(hw);
+	ret_code = iavf_alloc_arq_bufs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
 	/* initialize base registers */
-	ret_code = i40e_config_arq_regs(hw);
+	ret_code = iavf_config_arq_regs(hw);
 	if (ret_code)
 		goto init_adminq_free_rings;
 
@@ -430,26 +420,26 @@
 	goto init_adminq_exit;
 
 init_adminq_free_rings:
-	i40e_free_adminq_arq(hw);
+	iavf_free_adminq_arq(hw);
 
 init_adminq_exit:
 	return ret_code;
 }
 
 /**
- *  i40e_shutdown_asq - shutdown the ASQ
+ *  iavf_shutdown_asq - shutdown the ASQ
  *  @hw: pointer to the hardware structure
  *
  *  The main shutdown routine for the Admin Send Queue
  **/
-static i40e_status i40e_shutdown_asq(struct i40e_hw *hw)
+static enum iavf_status iavf_shutdown_asq(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	mutex_lock(&hw->aq.asq_mutex);
 
 	if (hw->aq.asq.count == 0) {
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto shutdown_asq_out;
 	}
 
@@ -463,7 +453,7 @@
 	hw->aq.asq.count = 0; /* to indicate uninitialized queue */
 
 	/* free ring buffers */
-	i40e_free_asq_bufs(hw);
+	iavf_free_asq_bufs(hw);
 
 shutdown_asq_out:
 	mutex_unlock(&hw->aq.asq_mutex);
@@ -471,19 +461,19 @@
 }
 
 /**
- *  i40e_shutdown_arq - shutdown ARQ
+ *  iavf_shutdown_arq - shutdown ARQ
  *  @hw: pointer to the hardware structure
  *
  *  The main shutdown routine for the Admin Receive Queue
  **/
-static i40e_status i40e_shutdown_arq(struct i40e_hw *hw)
+static enum iavf_status iavf_shutdown_arq(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
 	mutex_lock(&hw->aq.arq_mutex);
 
 	if (hw->aq.arq.count == 0) {
-		ret_code = I40E_ERR_NOT_READY;
+		ret_code = IAVF_ERR_NOT_READY;
 		goto shutdown_arq_out;
 	}
 
@@ -497,7 +487,7 @@
 	hw->aq.arq.count = 0; /* to indicate uninitialized queue */
 
 	/* free ring buffers */
-	i40e_free_arq_bufs(hw);
+	iavf_free_arq_bufs(hw);
 
 shutdown_arq_out:
 	mutex_unlock(&hw->aq.arq_mutex);
@@ -505,7 +495,7 @@
 }
 
 /**
- *  i40evf_init_adminq - main initialization routine for Admin Queue
+ *  iavf_init_adminq - main initialization routine for Admin Queue
  *  @hw: pointer to the hardware structure
  *
  *  Prior to calling this function, drivers *MUST* set the following fields
@@ -515,32 +505,32 @@
  *     - hw->aq.arq_buf_size
  *     - hw->aq.asq_buf_size
  **/
-i40e_status i40evf_init_adminq(struct i40e_hw *hw)
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw)
 {
-	i40e_status ret_code;
+	enum iavf_status ret_code;
 
 	/* verify input for valid configuration */
 	if ((hw->aq.num_arq_entries == 0) ||
 	    (hw->aq.num_asq_entries == 0) ||
 	    (hw->aq.arq_buf_size == 0) ||
 	    (hw->aq.asq_buf_size == 0)) {
-		ret_code = I40E_ERR_CONFIG;
+		ret_code = IAVF_ERR_CONFIG;
 		goto init_adminq_exit;
 	}
 
 	/* Set up register offsets */
-	i40e_adminq_init_regs(hw);
+	iavf_adminq_init_regs(hw);
 
 	/* setup ASQ command write back timeout */
-	hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT;
+	hw->aq.asq_cmd_timeout = IAVF_ASQ_CMD_TIMEOUT;
 
 	/* allocate the ASQ */
-	ret_code = i40e_init_asq(hw);
+	ret_code = iavf_init_asq(hw);
 	if (ret_code)
 		goto init_adminq_destroy_locks;
 
 	/* allocate the ARQ */
-	ret_code = i40e_init_arq(hw);
+	ret_code = iavf_init_arq(hw);
 	if (ret_code)
 		goto init_adminq_free_asq;
 
@@ -548,7 +538,7 @@
 	goto init_adminq_exit;
 
 init_adminq_free_asq:
-	i40e_shutdown_asq(hw);
+	iavf_shutdown_asq(hw);
 init_adminq_destroy_locks:
 
 init_adminq_exit:
@@ -556,84 +546,80 @@
 }
 
 /**
- *  i40evf_shutdown_adminq - shutdown routine for the Admin Queue
+ *  iavf_shutdown_adminq - shutdown routine for the Admin Queue
  *  @hw: pointer to the hardware structure
  **/
-i40e_status i40evf_shutdown_adminq(struct i40e_hw *hw)
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw)
 {
-	i40e_status ret_code = 0;
+	enum iavf_status ret_code = 0;
 
-	if (i40evf_check_asq_alive(hw))
-		i40evf_aq_queue_shutdown(hw, true);
+	if (iavf_check_asq_alive(hw))
+		iavf_aq_queue_shutdown(hw, true);
 
-	i40e_shutdown_asq(hw);
-	i40e_shutdown_arq(hw);
-
-	if (hw->nvm_buff.va)
-		i40e_free_virt_mem(hw, &hw->nvm_buff);
+	iavf_shutdown_asq(hw);
+	iavf_shutdown_arq(hw);
 
 	return ret_code;
 }
 
 /**
- *  i40e_clean_asq - cleans Admin send queue
+ *  iavf_clean_asq - cleans Admin send queue
  *  @hw: pointer to the hardware structure
  *
  *  returns the number of free desc
  **/
-static u16 i40e_clean_asq(struct i40e_hw *hw)
+static u16 iavf_clean_asq(struct iavf_hw *hw)
 {
-	struct i40e_adminq_ring *asq = &(hw->aq.asq);
-	struct i40e_asq_cmd_details *details;
+	struct iavf_adminq_ring *asq = &hw->aq.asq;
+	struct iavf_asq_cmd_details *details;
 	u16 ntc = asq->next_to_clean;
-	struct i40e_aq_desc desc_cb;
-	struct i40e_aq_desc *desc;
+	struct iavf_aq_desc desc_cb;
+	struct iavf_aq_desc *desc;
 
-	desc = I40E_ADMINQ_DESC(*asq, ntc);
-	details = I40E_ADMINQ_DETAILS(*asq, ntc);
+	desc = IAVF_ADMINQ_DESC(*asq, ntc);
+	details = IAVF_ADMINQ_DETAILS(*asq, ntc);
 	while (rd32(hw, hw->aq.asq.head) != ntc) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
 
 		if (details->callback) {
-			I40E_ADMINQ_CALLBACK cb_func =
-					(I40E_ADMINQ_CALLBACK)details->callback;
+			IAVF_ADMINQ_CALLBACK cb_func =
+					(IAVF_ADMINQ_CALLBACK)details->callback;
 			desc_cb = *desc;
 			cb_func(hw, &desc_cb);
 		}
-		memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+		memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 		memset((void *)details, 0,
-		       sizeof(struct i40e_asq_cmd_details));
+		       sizeof(struct iavf_asq_cmd_details));
 		ntc++;
 		if (ntc == asq->count)
 			ntc = 0;
-		desc = I40E_ADMINQ_DESC(*asq, ntc);
-		details = I40E_ADMINQ_DETAILS(*asq, ntc);
+		desc = IAVF_ADMINQ_DESC(*asq, ntc);
+		details = IAVF_ADMINQ_DETAILS(*asq, ntc);
 	}
 
 	asq->next_to_clean = ntc;
 
-	return I40E_DESC_UNUSED(asq);
+	return IAVF_DESC_UNUSED(asq);
 }
 
 /**
- *  i40evf_asq_done - check if FW has processed the Admin Send Queue
+ *  iavf_asq_done - check if FW has processed the Admin Send Queue
  *  @hw: pointer to the hw struct
  *
  *  Returns true if the firmware has processed all descriptors on the
  *  admin send queue. Returns false if there are still requests pending.
  **/
-bool i40evf_asq_done(struct i40e_hw *hw)
+bool iavf_asq_done(struct iavf_hw *hw)
 {
 	/* AQ designers suggest use of head for better
 	 * timing reliability than DD bit
 	 */
 	return rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use;
-
 }
 
 /**
- *  i40evf_asq_send_command - send command to Admin Queue
+ *  iavf_asq_send_command - send command to Admin Queue
  *  @hw: pointer to the hw struct
  *  @desc: prefilled descriptor describing the command (non DMA mem)
  *  @buff: buffer to use for indirect commands
@@ -643,40 +629,40 @@
  *  This is the main send command driver routine for the Admin Queue send
  *  queue.  It runs the queue, cleans the queue, etc
  **/
-i40e_status i40evf_asq_send_command(struct i40e_hw *hw,
-				struct i40e_aq_desc *desc,
-				void *buff, /* can be NULL */
-				u16  buff_size,
-				struct i40e_asq_cmd_details *cmd_details)
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16  buff_size,
+				       struct iavf_asq_cmd_details *cmd_details)
 {
-	i40e_status status = 0;
-	struct i40e_dma_mem *dma_buff = NULL;
-	struct i40e_asq_cmd_details *details;
-	struct i40e_aq_desc *desc_on_ring;
+	struct iavf_dma_mem *dma_buff = NULL;
+	struct iavf_asq_cmd_details *details;
+	struct iavf_aq_desc *desc_on_ring;
 	bool cmd_completed = false;
+	enum iavf_status status = 0;
 	u16  retval = 0;
 	u32  val = 0;
 
 	mutex_lock(&hw->aq.asq_mutex);
 
 	if (hw->aq.asq.count == 0) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Admin queue not initialized.\n");
-		status = I40E_ERR_QUEUE_EMPTY;
+		status = IAVF_ERR_QUEUE_EMPTY;
 		goto asq_send_command_error;
 	}
 
-	hw->aq.asq_last_status = I40E_AQ_RC_OK;
+	hw->aq.asq_last_status = IAVF_AQ_RC_OK;
 
 	val = rd32(hw, hw->aq.asq.head);
 	if (val >= hw->aq.num_asq_entries) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: head overrun at %d\n", val);
-		status = I40E_ERR_QUEUE_EMPTY;
+		status = IAVF_ERR_QUEUE_EMPTY;
 		goto asq_send_command_error;
 	}
 
-	details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+	details = IAVF_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
 	if (cmd_details) {
 		*details = *cmd_details;
 
@@ -691,7 +677,7 @@
 				cpu_to_le32(lower_32_bits(details->cookie));
 		}
 	} else {
-		memset(details, 0, sizeof(struct i40e_asq_cmd_details));
+		memset(details, 0, sizeof(struct iavf_asq_cmd_details));
 	}
 
 	/* clear requested flags and then set additional flags if defined */
@@ -699,19 +685,19 @@
 	desc->flags |= cpu_to_le16(details->flags_ena);
 
 	if (buff_size > hw->aq.asq_buf_size) {
-		i40e_debug(hw,
-			   I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Invalid buffer size: %d.\n",
 			   buff_size);
-		status = I40E_ERR_INVALID_SIZE;
+		status = IAVF_ERR_INVALID_SIZE;
 		goto asq_send_command_error;
 	}
 
 	if (details->postpone && !details->async) {
-		i40e_debug(hw,
-			   I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Async flag not set along with postpone flag");
-		status = I40E_ERR_PARAM;
+		status = IAVF_ERR_PARAM;
 		goto asq_send_command_error;
 	}
 
@@ -722,23 +708,23 @@
 	/* the clean function called here could be called in a separate thread
 	 * in case of asynchronous completions
 	 */
-	if (i40e_clean_asq(hw) == 0) {
-		i40e_debug(hw,
-			   I40E_DEBUG_AQ_MESSAGE,
+	if (iavf_clean_asq(hw) == 0) {
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQTX: Error queue is full.\n");
-		status = I40E_ERR_ADMIN_QUEUE_FULL;
+		status = IAVF_ERR_ADMIN_QUEUE_FULL;
 		goto asq_send_command_error;
 	}
 
 	/* initialize the temp desc pointer with the right desc */
-	desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
+	desc_on_ring = IAVF_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
 
 	/* if the desc is available copy the temp desc to the right place */
 	*desc_on_ring = *desc;
 
 	/* if buff is not NULL assume indirect command */
-	if (buff != NULL) {
-		dma_buff = &(hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use]);
+	if (buff) {
+		dma_buff = &hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use];
 		/* copy the user buff into the respective DMA buff */
 		memcpy(dma_buff->va, buff, buff_size);
 		desc_on_ring->datalen = cpu_to_le16(buff_size);
@@ -753,9 +739,9 @@
 	}
 
 	/* bump the tail */
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
-	i40evf_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
-			buff, buff_size);
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
+		      buff, buff_size);
 	(hw->aq.asq.next_to_use)++;
 	if (hw->aq.asq.next_to_use == hw->aq.asq.count)
 		hw->aq.asq.next_to_use = 0;
@@ -772,7 +758,7 @@
 			/* AQ designers suggest use of head for better
 			 * timing reliability than DD bit
 			 */
-			if (i40evf_asq_done(hw))
+			if (iavf_asq_done(hw))
 				break;
 			udelay(50);
 			total_delay += 50;
@@ -780,14 +766,14 @@
 	}
 
 	/* if ready, copy the desc back to temp */
-	if (i40evf_asq_done(hw)) {
+	if (iavf_asq_done(hw)) {
 		*desc = *desc_on_ring;
-		if (buff != NULL)
+		if (buff)
 			memcpy(buff, dma_buff->va, buff_size);
 		retval = le16_to_cpu(desc->retval);
 		if (retval != 0) {
-			i40e_debug(hw,
-				   I40E_DEBUG_AQ_MESSAGE,
+			iavf_debug(hw,
+				   IAVF_DEBUG_AQ_MESSAGE,
 				   "AQTX: Command completed with error 0x%X.\n",
 				   retval);
 
@@ -795,19 +781,18 @@
 			retval &= 0xff;
 		}
 		cmd_completed = true;
-		if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
+		if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_OK)
 			status = 0;
-		else if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_EBUSY)
-			status = I40E_ERR_NOT_READY;
+		else if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_EBUSY)
+			status = IAVF_ERR_NOT_READY;
 		else
-			status = I40E_ERR_ADMIN_QUEUE_ERROR;
-		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
+			status = IAVF_ERR_ADMIN_QUEUE_ERROR;
+		hw->aq.asq_last_status = (enum iavf_admin_queue_err)retval;
 	}
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 		   "AQTX: desc and buffer writeback:\n");
-	i40evf_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff,
-			buff_size);
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
 
 	/* save writeback aq if requested */
 	if (details->wb_desc)
@@ -816,14 +801,14 @@
 	/* update the error if time out occurred */
 	if ((!cmd_completed) &&
 	    (!details->async && !details->postpone)) {
-		if (rd32(hw, hw->aq.asq.len) & I40E_VF_ATQLEN1_ATQCRIT_MASK) {
-			i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		if (rd32(hw, hw->aq.asq.len) & IAVF_VF_ATQLEN1_ATQCRIT_MASK) {
+			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 				   "AQTX: AQ Critical error.\n");
-			status = I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
+			status = IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
 		} else {
-			i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 				   "AQTX: Writeback timeout.\n");
-			status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+			status = IAVF_ERR_ADMIN_QUEUE_TIMEOUT;
 		}
 	}
 
@@ -833,23 +818,22 @@
 }
 
 /**
- *  i40evf_fill_default_direct_cmd_desc - AQ descriptor helper function
+ *  iavf_fill_default_direct_cmd_desc - AQ descriptor helper function
  *  @desc:     pointer to the temp descriptor (non DMA mem)
  *  @opcode:   the opcode can be used to decide which flags to turn off or on
  *
  *  Fill the desc with default values
  **/
-void i40evf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
-				       u16 opcode)
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode)
 {
 	/* zero out the desc */
-	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 	desc->opcode = cpu_to_le16(opcode);
-	desc->flags = cpu_to_le16(I40E_AQ_FLAG_SI);
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_SI);
 }
 
 /**
- *  i40evf_clean_arq_element
+ *  iavf_clean_arq_element
  *  @hw: pointer to the hw struct
  *  @e: event info from the receive descriptor, includes any buffers
  *  @pending: number of events that could be left to process
@@ -858,14 +842,14 @@
  *  the contents through e.  It can also return how many events are
  *  left to process through 'pending'
  **/
-i40e_status i40evf_clean_arq_element(struct i40e_hw *hw,
-					     struct i40e_arq_event_info *e,
-					     u16 *pending)
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *pending)
 {
-	i40e_status ret_code = 0;
 	u16 ntc = hw->aq.arq.next_to_clean;
-	struct i40e_aq_desc *desc;
-	struct i40e_dma_mem *bi;
+	struct iavf_aq_desc *desc;
+	enum iavf_status ret_code = 0;
+	struct iavf_dma_mem *bi;
 	u16 desc_idx;
 	u16 datalen;
 	u16 flags;
@@ -878,31 +862,31 @@
 	mutex_lock(&hw->aq.arq_mutex);
 
 	if (hw->aq.arq.count == 0) {
-		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
 			   "AQRX: Admin queue not initialized.\n");
-		ret_code = I40E_ERR_QUEUE_EMPTY;
+		ret_code = IAVF_ERR_QUEUE_EMPTY;
 		goto clean_arq_element_err;
 	}
 
 	/* set next_to_use to head */
-	ntu = rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK;
+	ntu = rd32(hw, hw->aq.arq.head) & IAVF_VF_ARQH1_ARQH_MASK;
 	if (ntu == ntc) {
 		/* nothing to do - shouldn't need to update ring's values */
-		ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
+		ret_code = IAVF_ERR_ADMIN_QUEUE_NO_WORK;
 		goto clean_arq_element_out;
 	}
 
 	/* now clean the next descriptor */
-	desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
+	desc = IAVF_ADMINQ_DESC(hw->aq.arq, ntc);
 	desc_idx = ntc;
 
 	hw->aq.arq_last_status =
-		(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
+		(enum iavf_admin_queue_err)le16_to_cpu(desc->retval);
 	flags = le16_to_cpu(desc->flags);
-	if (flags & I40E_AQ_FLAG_ERR) {
-		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-		i40e_debug(hw,
-			   I40E_DEBUG_AQ_MESSAGE,
+	if (flags & IAVF_AQ_FLAG_ERR) {
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
 			   "AQRX: Event received with error 0x%X.\n",
 			   hw->aq.arq_last_status);
 	}
@@ -910,24 +894,24 @@
 	e->desc = *desc;
 	datalen = le16_to_cpu(desc->datalen);
 	e->msg_len = min(datalen, e->buf_len);
-	if (e->msg_buf != NULL && (e->msg_len != 0))
+	if (e->msg_buf && (e->msg_len != 0))
 		memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
 		       e->msg_len);
 
-	i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
-	i40evf_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
-			hw->aq.arq_buf_size);
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
+		      hw->aq.arq_buf_size);
 
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message
 	 * size
 	 */
 	bi = &hw->aq.arq.r.arq_bi[ntc];
-	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
 
-	desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
-	if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
-		desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+	if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+		desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
 	desc->datalen = cpu_to_le16((u16)bi->size);
 	desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
 	desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
@@ -943,7 +927,7 @@
 
 clean_arq_element_out:
 	/* Set pending if needed, unlock and return */
-	if (pending != NULL)
+	if (pending)
 		*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
 
 clean_arq_element_err:
@@ -951,17 +935,3 @@
 
 	return ret_code;
 }
-
-void i40evf_resume_aq(struct i40e_hw *hw)
-{
-	/* Registers are reset after PF reset */
-	hw->aq.asq.next_to_use = 0;
-	hw->aq.asq.next_to_clean = 0;
-
-	i40e_config_asq_regs(hw);
-
-	hw->aq.arq.next_to_use = 0;
-	hw->aq.arq.next_to_clean = 0;
-
-	i40e_config_arq_regs(hw);
-}

diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
new file mode 100644
index 0000000..baf2fe2
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h

@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ADMINQ_H_
+#define _IAVF_ADMINQ_H_
+
+#include "iavf_osdep.h"
+#include "iavf_status.h"
+#include "iavf_adminq_cmd.h"
+
+#define IAVF_ADMINQ_DESC(R, i)   \
+	(&(((struct iavf_aq_desc *)((R).desc_buf.va))[i]))
+
+#define IAVF_ADMINQ_DESC_ALIGNMENT 4096
+
+struct iavf_adminq_ring {
+	struct iavf_virt_mem dma_head;	/* space for dma structures */
+	struct iavf_dma_mem desc_buf;	/* descriptor ring memory */
+	struct iavf_virt_mem cmd_buf;	/* command buffer memory */
+
+	union {
+		struct iavf_dma_mem *asq_bi;
+		struct iavf_dma_mem *arq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+	u16 rx_buf_len;		/* Admin Receive Queue buffer length */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+};
+
+/* ASQ transaction details */
+struct iavf_asq_cmd_details {
+	void *callback; /* cast from type IAVF_ADMINQ_CALLBACK */
+	u64 cookie;
+	u16 flags_ena;
+	u16 flags_dis;
+	bool async;
+	bool postpone;
+	struct iavf_aq_desc *wb_desc;
+};
+
+#define IAVF_ADMINQ_DETAILS(R, i)   \
+	(&(((struct iavf_asq_cmd_details *)((R).cmd_buf.va))[i]))
+
+/* ARQ event information */
+struct iavf_arq_event_info {
+	struct iavf_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Admin Queue information */
+struct iavf_adminq_info {
+	struct iavf_adminq_ring arq;    /* receive queue */
+	struct iavf_adminq_ring asq;    /* send queue */
+	u32 asq_cmd_timeout;            /* send queue cmd write back timeout*/
+	u16 num_arq_entries;            /* receive queue depth */
+	u16 num_asq_entries;            /* send queue depth */
+	u16 arq_buf_size;               /* receive queue buffer size */
+	u16 asq_buf_size;               /* send queue buffer size */
+	u16 fw_maj_ver;                 /* firmware major version */
+	u16 fw_min_ver;                 /* firmware minor version */
+	u32 fw_build;                   /* firmware build number */
+	u16 api_maj_ver;                /* api major version */
+	u16 api_min_ver;                /* api minor version */
+
+	struct mutex asq_mutex; /* Send queue lock */
+	struct mutex arq_mutex; /* Receive queue lock */
+
+	/* last status values on send and receive queues */
+	enum iavf_admin_queue_err asq_last_status;
+	enum iavf_admin_queue_err arq_last_status;
+};
+
+/**
+ * iavf_aq_rc_to_posix - convert errors to user-land codes
+ * aq_ret: AdminQ handler error code can override aq_rc
+ * aq_rc: AdminQ firmware error code to convert
+ **/
+static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc)
+{
+	int aq_to_posix[] = {
+		0,           /* IAVF_AQ_RC_OK */
+		-EPERM,      /* IAVF_AQ_RC_EPERM */
+		-ENOENT,     /* IAVF_AQ_RC_ENOENT */
+		-ESRCH,      /* IAVF_AQ_RC_ESRCH */
+		-EINTR,      /* IAVF_AQ_RC_EINTR */
+		-EIO,        /* IAVF_AQ_RC_EIO */
+		-ENXIO,      /* IAVF_AQ_RC_ENXIO */
+		-E2BIG,      /* IAVF_AQ_RC_E2BIG */
+		-EAGAIN,     /* IAVF_AQ_RC_EAGAIN */
+		-ENOMEM,     /* IAVF_AQ_RC_ENOMEM */
+		-EACCES,     /* IAVF_AQ_RC_EACCES */
+		-EFAULT,     /* IAVF_AQ_RC_EFAULT */
+		-EBUSY,      /* IAVF_AQ_RC_EBUSY */
+		-EEXIST,     /* IAVF_AQ_RC_EEXIST */
+		-EINVAL,     /* IAVF_AQ_RC_EINVAL */
+		-ENOTTY,     /* IAVF_AQ_RC_ENOTTY */
+		-ENOSPC,     /* IAVF_AQ_RC_ENOSPC */
+		-ENOSYS,     /* IAVF_AQ_RC_ENOSYS */
+		-ERANGE,     /* IAVF_AQ_RC_ERANGE */
+		-EPIPE,      /* IAVF_AQ_RC_EFLUSHED */
+		-ESPIPE,     /* IAVF_AQ_RC_BAD_ADDR */
+		-EROFS,      /* IAVF_AQ_RC_EMODE */
+		-EFBIG,      /* IAVF_AQ_RC_EFBIG */
+	};
+
+	/* aq_rc is invalid if AQ timed out */
+	if (aq_ret == IAVF_ERR_ADMIN_QUEUE_TIMEOUT)
+		return -EAGAIN;
+
+	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
+		return -ERANGE;
+
+	return aq_to_posix[aq_rc];
+}
+
+/* general information */
+#define IAVF_AQ_LARGE_BUF	512
+#define IAVF_ASQ_CMD_TIMEOUT	250000  /* usecs */
+
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode);
+
+#endif /* _IAVF_ADMINQ_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h
new file mode 100644
index 0000000..bc51230
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h

@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ADMINQ_CMD_H_
+#define _IAVF_ADMINQ_CMD_H_
+
+/* This header file defines the iavf Admin Queue commands and is shared between
+ * iavf Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define IAVF_FW_API_VERSION_MAJOR	0x0001
+#define IAVF_FW_API_VERSION_MINOR_X722	0x0005
+#define IAVF_FW_API_VERSION_MINOR_X710	0x0008
+
+#define IAVF_FW_MINOR_VERSION(_h) ((_h)->mac.type == IAVF_MAC_XL710 ? \
+					IAVF_FW_API_VERSION_MINOR_X710 : \
+					IAVF_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define IAVF_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+
+struct iavf_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 param2;
+			__le32 param3;
+		} internal;
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 addr_high;
+			__le32 addr_low;
+		} external;
+		u8 raw[16];
+	} params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define IAVF_AQ_FLAG_DD_SHIFT	0
+#define IAVF_AQ_FLAG_CMP_SHIFT	1
+#define IAVF_AQ_FLAG_ERR_SHIFT	2
+#define IAVF_AQ_FLAG_VFE_SHIFT	3
+#define IAVF_AQ_FLAG_LB_SHIFT	9
+#define IAVF_AQ_FLAG_RD_SHIFT	10
+#define IAVF_AQ_FLAG_VFC_SHIFT	11
+#define IAVF_AQ_FLAG_BUF_SHIFT	12
+#define IAVF_AQ_FLAG_SI_SHIFT	13
+#define IAVF_AQ_FLAG_EI_SHIFT	14
+#define IAVF_AQ_FLAG_FE_SHIFT	15
+
+#define IAVF_AQ_FLAG_DD		BIT(IAVF_AQ_FLAG_DD_SHIFT)  /* 0x1    */
+#define IAVF_AQ_FLAG_CMP	BIT(IAVF_AQ_FLAG_CMP_SHIFT) /* 0x2    */
+#define IAVF_AQ_FLAG_ERR	BIT(IAVF_AQ_FLAG_ERR_SHIFT) /* 0x4    */
+#define IAVF_AQ_FLAG_VFE	BIT(IAVF_AQ_FLAG_VFE_SHIFT) /* 0x8    */
+#define IAVF_AQ_FLAG_LB		BIT(IAVF_AQ_FLAG_LB_SHIFT)  /* 0x200  */
+#define IAVF_AQ_FLAG_RD		BIT(IAVF_AQ_FLAG_RD_SHIFT)  /* 0x400  */
+#define IAVF_AQ_FLAG_VFC	BIT(IAVF_AQ_FLAG_VFC_SHIFT) /* 0x800  */
+#define IAVF_AQ_FLAG_BUF	BIT(IAVF_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define IAVF_AQ_FLAG_SI		BIT(IAVF_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
+#define IAVF_AQ_FLAG_EI		BIT(IAVF_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
+#define IAVF_AQ_FLAG_FE		BIT(IAVF_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
+
+/* error codes */
+enum iavf_admin_queue_err {
+	IAVF_AQ_RC_OK		= 0,  /* success */
+	IAVF_AQ_RC_EPERM	= 1,  /* Operation not permitted */
+	IAVF_AQ_RC_ENOENT	= 2,  /* No such element */
+	IAVF_AQ_RC_ESRCH	= 3,  /* Bad opcode */
+	IAVF_AQ_RC_EINTR	= 4,  /* operation interrupted */
+	IAVF_AQ_RC_EIO		= 5,  /* I/O error */
+	IAVF_AQ_RC_ENXIO	= 6,  /* No such resource */
+	IAVF_AQ_RC_E2BIG	= 7,  /* Arg too long */
+	IAVF_AQ_RC_EAGAIN	= 8,  /* Try again */
+	IAVF_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	IAVF_AQ_RC_EACCES	= 10, /* Permission denied */
+	IAVF_AQ_RC_EFAULT	= 11, /* Bad address */
+	IAVF_AQ_RC_EBUSY	= 12, /* Device or resource busy */
+	IAVF_AQ_RC_EEXIST	= 13, /* object already exists */
+	IAVF_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	IAVF_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
+	IAVF_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
+	IAVF_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	IAVF_AQ_RC_ERANGE	= 18, /* Parameter out of range */
+	IAVF_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
+	IAVF_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
+	IAVF_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
+	IAVF_AQ_RC_EFBIG	= 22, /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum iavf_admin_queue_opc {
+	/* aq commands */
+	iavf_aqc_opc_get_version	= 0x0001,
+	iavf_aqc_opc_driver_version	= 0x0002,
+	iavf_aqc_opc_queue_shutdown	= 0x0003,
+	iavf_aqc_opc_set_pf_context	= 0x0004,
+
+	/* resource ownership */
+	iavf_aqc_opc_request_resource	= 0x0008,
+	iavf_aqc_opc_release_resource	= 0x0009,
+
+	iavf_aqc_opc_list_func_capabilities	= 0x000A,
+	iavf_aqc_opc_list_dev_capabilities	= 0x000B,
+
+	/* Proxy commands */
+	iavf_aqc_opc_set_proxy_config		= 0x0104,
+	iavf_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
+
+	/* LAA */
+	iavf_aqc_opc_mac_address_read	= 0x0107,
+	iavf_aqc_opc_mac_address_write	= 0x0108,
+
+	/* PXE */
+	iavf_aqc_opc_clear_pxe_mode	= 0x0110,
+
+	/* WoL commands */
+	iavf_aqc_opc_set_wol_filter	= 0x0120,
+	iavf_aqc_opc_get_wake_reason	= 0x0121,
+
+	/* internal switch commands */
+	iavf_aqc_opc_get_switch_config		= 0x0200,
+	iavf_aqc_opc_add_statistics		= 0x0201,
+	iavf_aqc_opc_remove_statistics		= 0x0202,
+	iavf_aqc_opc_set_port_parameters	= 0x0203,
+	iavf_aqc_opc_get_switch_resource_alloc	= 0x0204,
+	iavf_aqc_opc_set_switch_config		= 0x0205,
+	iavf_aqc_opc_rx_ctl_reg_read		= 0x0206,
+	iavf_aqc_opc_rx_ctl_reg_write		= 0x0207,
+
+	iavf_aqc_opc_add_vsi			= 0x0210,
+	iavf_aqc_opc_update_vsi_parameters	= 0x0211,
+	iavf_aqc_opc_get_vsi_parameters		= 0x0212,
+
+	iavf_aqc_opc_add_pv			= 0x0220,
+	iavf_aqc_opc_update_pv_parameters	= 0x0221,
+	iavf_aqc_opc_get_pv_parameters		= 0x0222,
+
+	iavf_aqc_opc_add_veb			= 0x0230,
+	iavf_aqc_opc_update_veb_parameters	= 0x0231,
+	iavf_aqc_opc_get_veb_parameters		= 0x0232,
+
+	iavf_aqc_opc_delete_element		= 0x0243,
+
+	iavf_aqc_opc_add_macvlan		= 0x0250,
+	iavf_aqc_opc_remove_macvlan		= 0x0251,
+	iavf_aqc_opc_add_vlan			= 0x0252,
+	iavf_aqc_opc_remove_vlan		= 0x0253,
+	iavf_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
+	iavf_aqc_opc_add_tag			= 0x0255,
+	iavf_aqc_opc_remove_tag			= 0x0256,
+	iavf_aqc_opc_add_multicast_etag		= 0x0257,
+	iavf_aqc_opc_remove_multicast_etag	= 0x0258,
+	iavf_aqc_opc_update_tag			= 0x0259,
+	iavf_aqc_opc_add_control_packet_filter	= 0x025A,
+	iavf_aqc_opc_remove_control_packet_filter	= 0x025B,
+	iavf_aqc_opc_add_cloud_filters		= 0x025C,
+	iavf_aqc_opc_remove_cloud_filters	= 0x025D,
+	iavf_aqc_opc_clear_wol_switch_filters	= 0x025E,
+
+	iavf_aqc_opc_add_mirror_rule	= 0x0260,
+	iavf_aqc_opc_delete_mirror_rule	= 0x0261,
+
+	/* Dynamic Device Personalization */
+	iavf_aqc_opc_write_personalization_profile	= 0x0270,
+	iavf_aqc_opc_get_personalization_profile_list	= 0x0271,
+
+	/* DCB commands */
+	iavf_aqc_opc_dcb_ignore_pfc	= 0x0301,
+	iavf_aqc_opc_dcb_updated	= 0x0302,
+	iavf_aqc_opc_set_dcb_parameters = 0x0303,
+
+	/* TX scheduler */
+	iavf_aqc_opc_configure_vsi_bw_limit		= 0x0400,
+	iavf_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
+	iavf_aqc_opc_configure_vsi_tc_bw		= 0x0407,
+	iavf_aqc_opc_query_vsi_bw_config		= 0x0408,
+	iavf_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
+	iavf_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
+
+	iavf_aqc_opc_enable_switching_comp_ets			= 0x0413,
+	iavf_aqc_opc_modify_switching_comp_ets			= 0x0414,
+	iavf_aqc_opc_disable_switching_comp_ets			= 0x0415,
+	iavf_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
+	iavf_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
+	iavf_aqc_opc_query_switching_comp_ets_config		= 0x0418,
+	iavf_aqc_opc_query_port_ets_config			= 0x0419,
+	iavf_aqc_opc_query_switching_comp_bw_config		= 0x041A,
+	iavf_aqc_opc_suspend_port_tx				= 0x041B,
+	iavf_aqc_opc_resume_port_tx				= 0x041C,
+	iavf_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	iavf_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	iavf_aqc_opc_set_hmc_resource_profile	= 0x0501,
+
+	/* phy commands*/
+	iavf_aqc_opc_get_phy_abilities		= 0x0600,
+	iavf_aqc_opc_set_phy_config		= 0x0601,
+	iavf_aqc_opc_set_mac_config		= 0x0603,
+	iavf_aqc_opc_set_link_restart_an	= 0x0605,
+	iavf_aqc_opc_get_link_status		= 0x0607,
+	iavf_aqc_opc_set_phy_int_mask		= 0x0613,
+	iavf_aqc_opc_get_local_advt_reg		= 0x0614,
+	iavf_aqc_opc_set_local_advt_reg		= 0x0615,
+	iavf_aqc_opc_get_partner_advt		= 0x0616,
+	iavf_aqc_opc_set_lb_modes		= 0x0618,
+	iavf_aqc_opc_get_phy_wol_caps		= 0x0621,
+	iavf_aqc_opc_set_phy_debug		= 0x0622,
+	iavf_aqc_opc_upload_ext_phy_fm		= 0x0625,
+	iavf_aqc_opc_run_phy_activity		= 0x0626,
+	iavf_aqc_opc_set_phy_register		= 0x0628,
+	iavf_aqc_opc_get_phy_register		= 0x0629,
+
+	/* NVM commands */
+	iavf_aqc_opc_nvm_read			= 0x0701,
+	iavf_aqc_opc_nvm_erase			= 0x0702,
+	iavf_aqc_opc_nvm_update			= 0x0703,
+	iavf_aqc_opc_nvm_config_read		= 0x0704,
+	iavf_aqc_opc_nvm_config_write		= 0x0705,
+	iavf_aqc_opc_oem_post_update		= 0x0720,
+	iavf_aqc_opc_thermal_sensor		= 0x0721,
+
+	/* virtualization commands */
+	iavf_aqc_opc_send_msg_to_pf		= 0x0801,
+	iavf_aqc_opc_send_msg_to_vf		= 0x0802,
+	iavf_aqc_opc_send_msg_to_peer		= 0x0803,
+
+	/* alternate structure */
+	iavf_aqc_opc_alternate_write		= 0x0900,
+	iavf_aqc_opc_alternate_write_indirect	= 0x0901,
+	iavf_aqc_opc_alternate_read		= 0x0902,
+	iavf_aqc_opc_alternate_read_indirect	= 0x0903,
+	iavf_aqc_opc_alternate_write_done	= 0x0904,
+	iavf_aqc_opc_alternate_set_mode		= 0x0905,
+	iavf_aqc_opc_alternate_clear_port	= 0x0906,
+
+	/* LLDP commands */
+	iavf_aqc_opc_lldp_get_mib	= 0x0A00,
+	iavf_aqc_opc_lldp_update_mib	= 0x0A01,
+	iavf_aqc_opc_lldp_add_tlv	= 0x0A02,
+	iavf_aqc_opc_lldp_update_tlv	= 0x0A03,
+	iavf_aqc_opc_lldp_delete_tlv	= 0x0A04,
+	iavf_aqc_opc_lldp_stop		= 0x0A05,
+	iavf_aqc_opc_lldp_start		= 0x0A06,
+
+	/* Tunnel commands */
+	iavf_aqc_opc_add_udp_tunnel	= 0x0B00,
+	iavf_aqc_opc_del_udp_tunnel	= 0x0B01,
+	iavf_aqc_opc_set_rss_key	= 0x0B02,
+	iavf_aqc_opc_set_rss_lut	= 0x0B03,
+	iavf_aqc_opc_get_rss_key	= 0x0B04,
+	iavf_aqc_opc_get_rss_lut	= 0x0B05,
+
+	/* Async Events */
+	iavf_aqc_opc_event_lan_overflow		= 0x1001,
+
+	/* OEM commands */
+	iavf_aqc_opc_oem_parameter_change	= 0xFE00,
+	iavf_aqc_opc_oem_device_status_change	= 0xFE01,
+	iavf_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	iavf_aqc_opc_oem_ocbb_initialize	= 0xFE03,
+
+	/* debug commands */
+	iavf_aqc_opc_debug_read_reg		= 0xFF03,
+	iavf_aqc_opc_debug_write_reg		= 0xFF04,
+	iavf_aqc_opc_debug_modify_reg		= 0xFF07,
+	iavf_aqc_opc_debug_dump_internals	= 0xFF08,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define IAVF_CHECK_STRUCT_LEN(n, X) enum iavf_static_assert_enum_##X \
+	{ iavf_static_assert_##X = (n) / ((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define IAVF_CHECK_CMD_LENGTH(X)	IAVF_CHECK_STRUCT_LEN(16, X)
+
+/* Queue Shutdown (direct 0x0003) */
+struct iavf_aqc_queue_shutdown {
+	__le32	driver_unloading;
+#define IAVF_AQ_DRIVER_UNLOADING	0x1
+	u8	reserved[12];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_queue_shutdown);
+
+struct iavf_aqc_vsi_properties_data {
+	/* first 96 byte are written by SW */
+	__le16	valid_sections;
+#define IAVF_AQ_VSI_PROP_SWITCH_VALID		0x0001
+#define IAVF_AQ_VSI_PROP_SECURITY_VALID		0x0002
+#define IAVF_AQ_VSI_PROP_VLAN_VALID		0x0004
+#define IAVF_AQ_VSI_PROP_CAS_PV_VALID		0x0008
+#define IAVF_AQ_VSI_PROP_INGRESS_UP_VALID	0x0010
+#define IAVF_AQ_VSI_PROP_EGRESS_UP_VALID	0x0020
+#define IAVF_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
+#define IAVF_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
+#define IAVF_AQ_VSI_PROP_OUTER_UP_VALID		0x0100
+#define IAVF_AQ_VSI_PROP_SCHED_VALID		0x0200
+	/* switch section */
+	__le16	switch_id; /* 12bit id combined with flags below */
+#define IAVF_AQ_VSI_SW_ID_SHIFT		0x0000
+#define IAVF_AQ_VSI_SW_ID_MASK		(0xFFF << IAVF_AQ_VSI_SW_ID_SHIFT)
+#define IAVF_AQ_VSI_SW_ID_FLAG_NOT_STAG	0x1000
+#define IAVF_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
+#define IAVF_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
+	u8	sw_reserved[2];
+	/* security section */
+	u8	sec_flags;
+#define IAVF_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	0x01
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
+	u8	sec_reserved;
+	/* VLAN section */
+	__le16	pvid; /* VLANS include priority bits */
+	__le16	fcoe_pvid;
+	u8	port_vlan_flags;
+#define IAVF_AQ_VSI_PVLAN_MODE_SHIFT	0x00
+#define IAVF_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
+					 IAVF_AQ_VSI_PVLAN_MODE_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_MODE_TAGGED	0x01
+#define IAVF_AQ_VSI_PVLAN_MODE_UNTAGGED	0x02
+#define IAVF_AQ_VSI_PVLAN_MODE_ALL	0x03
+#define IAVF_AQ_VSI_PVLAN_INSERT_PVID	0x04
+#define IAVF_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
+#define IAVF_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
+					 IAVF_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_UP	0x08
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR	0x10
+#define IAVF_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
+	u8	pvlan_reserved[3];
+	/* ingress egress up sections */
+	__le32	ingress_table; /* bitmap, 3 bits per up */
+#define IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT	0
+#define IAVF_AQ_VSI_UP_TABLE_UP0_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT	3
+#define IAVF_AQ_VSI_UP_TABLE_UP1_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT	6
+#define IAVF_AQ_VSI_UP_TABLE_UP2_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT	9
+#define IAVF_AQ_VSI_UP_TABLE_UP3_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT	12
+#define IAVF_AQ_VSI_UP_TABLE_UP4_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT	15
+#define IAVF_AQ_VSI_UP_TABLE_UP5_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT	18
+#define IAVF_AQ_VSI_UP_TABLE_UP6_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT	21
+#define IAVF_AQ_VSI_UP_TABLE_UP7_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT)
+	__le32	egress_table;   /* same defines as for ingress table */
+	/* cascaded PV section */
+	__le16	cas_pv_tag;
+	u8	cas_pv_flags;
+#define IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_MASK		(0x03 << \
+						 IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT)
+#define IAVF_AQ_VSI_CAS_PV_TAGX_LEAVE		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_REMOVE		0x01
+#define IAVF_AQ_VSI_CAS_PV_TAGX_COPY		0x02
+#define IAVF_AQ_VSI_CAS_PV_INSERT_TAG		0x10
+#define IAVF_AQ_VSI_CAS_PV_ETAG_PRUNE		0x20
+#define IAVF_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG	0x40
+	u8	cas_pv_reserved;
+	/* queue mapping section */
+	__le16	mapping_flags;
+#define IAVF_AQ_VSI_QUE_MAP_CONTIG	0x0
+#define IAVF_AQ_VSI_QUE_MAP_NONCONTIG	0x1
+	__le16	queue_mapping[16];
+#define IAVF_AQ_VSI_QUEUE_SHIFT		0x0
+#define IAVF_AQ_VSI_QUEUE_MASK		(0x7FF << IAVF_AQ_VSI_QUEUE_SHIFT)
+	__le16	tc_mapping[8];
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_MASK	(0x1FF << \
+					 IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT)
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_MASK	(0x7 << \
+					 IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+	/* queueing option section */
+	u8	queueing_opt_flags;
+#define IAVF_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA	0x04
+#define IAVF_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA	0x08
+#define IAVF_AQ_VSI_QUE_OPT_TCP_ENA	0x10
+#define IAVF_AQ_VSI_QUE_OPT_FCOE_ENA	0x20
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_PF	0x00
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
+	u8	queueing_opt_reserved[3];
+	/* scheduler section */
+	u8	up_enable_bits;
+	u8	sched_reserved;
+	/* outer up section */
+	__le32	outer_up_table; /* same structure and defines as ingress tbl */
+	u8	cmd_reserved[8];
+	/* last 32 bytes are written by FW */
+	__le16	qs_handle[8];
+#define IAVF_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
+	__le16	stat_counter_idx;
+	__le16	sched_id;
+	u8	resp_reserved[12];
+};
+
+IAVF_CHECK_STRUCT_LEN(128, iavf_aqc_vsi_properties_data);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses iavf_aqc_switch_seid for the descriptor
+ */
+struct iavf_aqc_get_veb_parameters_completion {
+	__le16	seid;
+	__le16	switch_id;
+	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
+	__le16	statistic_index;
+	__le16	vebs_used;
+	__le16	vebs_free;
+	u8	reserved[4];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_veb_parameters_completion);
+
+#define IAVF_LINK_SPEED_100MB_SHIFT	0x1
+#define IAVF_LINK_SPEED_1000MB_SHIFT	0x2
+#define IAVF_LINK_SPEED_10GB_SHIFT	0x3
+#define IAVF_LINK_SPEED_40GB_SHIFT	0x4
+#define IAVF_LINK_SPEED_20GB_SHIFT	0x5
+#define IAVF_LINK_SPEED_25GB_SHIFT	0x6
+
+enum iavf_aq_link_speed {
+	IAVF_LINK_SPEED_UNKNOWN	= 0,
+	IAVF_LINK_SPEED_100MB	= BIT(IAVF_LINK_SPEED_100MB_SHIFT),
+	IAVF_LINK_SPEED_1GB	= BIT(IAVF_LINK_SPEED_1000MB_SHIFT),
+	IAVF_LINK_SPEED_10GB	= BIT(IAVF_LINK_SPEED_10GB_SHIFT),
+	IAVF_LINK_SPEED_40GB	= BIT(IAVF_LINK_SPEED_40GB_SHIFT),
+	IAVF_LINK_SPEED_20GB	= BIT(IAVF_LINK_SPEED_20GB_SHIFT),
+	IAVF_LINK_SPEED_25GB	= BIT(IAVF_LINK_SPEED_25GB_SHIFT),
+};
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct iavf_aqc_pf_vf_message {
+	__le32	id;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_pf_vf_message);
+
+struct iavf_aqc_get_set_rss_key {
+#define IAVF_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
+	__le16	vsi_id;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_key);
+
+struct iavf_aqc_get_set_rss_key_data {
+	u8 standard_rss_key[0x28];
+	u8 extended_hash_key[0xc];
+};
+
+IAVF_CHECK_STRUCT_LEN(0x34, iavf_aqc_get_set_rss_key_data);
+
+struct  iavf_aqc_get_set_rss_lut {
+#define IAVF_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
+	__le16	vsi_id;
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \
+				BIT(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
+	__le16	flags;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_lut);
+#endif /* _IAVF_ADMINQ_CMD_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h
new file mode 100644
index 0000000..2711573
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h

@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ALLOC_H_
+#define _IAVF_ALLOC_H_
+
+struct iavf_hw;
+
+/* Memory allocation types */
+enum iavf_memory_type {
+	iavf_mem_arq_buf = 0,		/* ARQ indirect command buffer */
+	iavf_mem_asq_buf = 1,
+	iavf_mem_atq_buf = 2,		/* ATQ indirect command buffer */
+	iavf_mem_arq_ring = 3,		/* ARQ descriptor ring */
+	iavf_mem_atq_ring = 4,		/* ATQ descriptor ring */
+	iavf_mem_pd = 5,		/* Page Descriptor */
+	iavf_mem_bp = 6,		/* Backing Page - 4KB */
+	iavf_mem_bp_jumbo = 7,		/* Backing Page - > 4KB */
+	iavf_mem_reserved
+};
+
+/* prototype for functions used for dynamic memory allocation */
+enum iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw,
+				       struct iavf_dma_mem *mem,
+				       enum iavf_memory_type type,
+				       u64 size, u32 alignment);
+enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw,
+				   struct iavf_dma_mem *mem);
+enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw,
+					struct iavf_virt_mem *mem, u32 size);
+enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw,
+				    struct iavf_virt_mem *mem);
+
+#endif /* _IAVF_ALLOC_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.c b/drivers/net/ethernet/intel/iavf/iavf_client.c
new file mode 100644
index 0000000..0c77e41
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.c

@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
+
+static
+const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR;
+static struct iavf_client *vf_registered_client;
+static LIST_HEAD(iavf_devices);
+static DEFINE_MUTEX(iavf_device_mutex);
+
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
+				     u8 *msg, u16 len);
+
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info);
+
+static struct iavf_ops iavf_lan_ops = {
+	.virtchnl_send = iavf_client_virtchnl_send,
+	.setup_qvlist = iavf_client_setup_qvlist,
+};
+
+/**
+ * iavf_client_get_params - retrieve relevant client parameters
+ * @vsi: VSI with parameters
+ * @params: client param struct
+ **/
+static
+void iavf_client_get_params(struct iavf_vsi *vsi, struct iavf_params *params)
+{
+	int i;
+
+	memset(params, 0, sizeof(struct iavf_params));
+	params->mtu = vsi->netdev->mtu;
+	params->link_up = vsi->back->link_up;
+
+	for (i = 0; i < IAVF_MAX_USER_PRIORITY; i++) {
+		params->qos.prio_qos[i].tc = 0;
+		params->qos.prio_qos[i].qs_handle = vsi->qs_handle;
+	}
+}
+
+/**
+ * iavf_notify_client_message - call the client message receive callback
+ * @vsi: the VSI associated with this client
+ * @msg: message buffer
+ * @len: length of message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len)
+{
+	struct iavf_client_instance *cinst;
+
+	if (!vsi)
+		return;
+
+	cinst = vsi->back->cinst;
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->virtchnl_receive) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance virtchnl_receive function\n");
+		return;
+	}
+	cinst->client->ops->virtchnl_receive(&cinst->lan_info,  cinst->client,
+					     msg, len);
+}
+
+/**
+ * iavf_notify_client_l2_params - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void iavf_notify_client_l2_params(struct iavf_vsi *vsi)
+{
+	struct iavf_client_instance *cinst;
+	struct iavf_params params;
+
+	if (!vsi)
+		return;
+
+	cinst = vsi->back->cinst;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->l2_param_change) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance l2_param_change function\n");
+		return;
+	}
+	iavf_client_get_params(vsi, &params);
+	cinst->lan_info.params = params;
+	cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client,
+					    &params);
+}
+
+/**
+ * iavf_notify_client_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void iavf_notify_client_open(struct iavf_vsi *vsi)
+{
+	struct iavf_adapter *adapter = vsi->back;
+	struct iavf_client_instance *cinst = adapter->cinst;
+	int ret;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->open) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance open function\n");
+		return;
+	}
+	if (!(test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state))) {
+		ret = cinst->client->ops->open(&cinst->lan_info, cinst->client);
+		if (!ret)
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+	}
+}
+
+/**
+ * iavf_client_release_qvlist - send a message to the PF to release iwarp qv map
+ * @ldev: pointer to L2 context.
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int iavf_client_release_qvlist(struct iavf_info *ldev)
+{
+	struct iavf_adapter *adapter = ldev->vf;
+	enum iavf_status err;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	err = iavf_aq_send_msg_to_pf(&adapter->hw,
+				     VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
+				     IAVF_SUCCESS, NULL, 0, NULL);
+
+	if (err)
+		dev_err(&adapter->pdev->dev,
+			"Unable to send iWarp vector release message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * iavf_notify_client_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset)
+{
+	struct iavf_adapter *adapter = vsi->back;
+	struct iavf_client_instance *cinst = adapter->cinst;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->close) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance close function\n");
+		return;
+	}
+	cinst->client->ops->close(&cinst->lan_info, cinst->client, reset);
+	iavf_client_release_qvlist(&cinst->lan_info);
+	clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+}
+
+/**
+ * iavf_client_add_instance - add a client instance to the instance list
+ * @adapter: pointer to the board struct
+ *
+ * Returns cinst ptr on success, NULL on failure
+ **/
+static struct iavf_client_instance *
+iavf_client_add_instance(struct iavf_adapter *adapter)
+{
+	struct iavf_client_instance *cinst = NULL;
+	struct iavf_vsi *vsi = &adapter->vsi;
+	struct netdev_hw_addr *mac = NULL;
+	struct iavf_params params;
+
+	if (!vf_registered_client)
+		goto out;
+
+	if (adapter->cinst) {
+		cinst = adapter->cinst;
+		goto out;
+	}
+
+	cinst = kzalloc(sizeof(*cinst), GFP_KERNEL);
+	if (!cinst)
+		goto out;
+
+	cinst->lan_info.vf = (void *)adapter;
+	cinst->lan_info.netdev = vsi->netdev;
+	cinst->lan_info.pcidev = adapter->pdev;
+	cinst->lan_info.fid = 0;
+	cinst->lan_info.ftype = IAVF_CLIENT_FTYPE_VF;
+	cinst->lan_info.hw_addr = adapter->hw.hw_addr;
+	cinst->lan_info.ops = &iavf_lan_ops;
+	cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR;
+	cinst->lan_info.version.minor = IAVF_CLIENT_VERSION_MINOR;
+	cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD;
+	iavf_client_get_params(vsi, &params);
+	cinst->lan_info.params = params;
+	set_bit(__IAVF_CLIENT_INSTANCE_NONE, &cinst->state);
+
+	cinst->lan_info.msix_count = adapter->num_iwarp_msix;
+	cinst->lan_info.msix_entries =
+			&adapter->msix_entries[adapter->iwarp_base_vector];
+
+	mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list,
+			       struct netdev_hw_addr, list);
+	if (mac)
+		ether_addr_copy(cinst->lan_info.lanmac, mac->addr);
+	else
+		dev_err(&adapter->pdev->dev, "MAC address list is empty!\n");
+
+	cinst->client = vf_registered_client;
+	adapter->cinst = cinst;
+out:
+	return cinst;
+}
+
+/**
+ * iavf_client_del_instance - removes a client instance from the list
+ * @adapter: pointer to the board struct
+ *
+ **/
+static
+void iavf_client_del_instance(struct iavf_adapter *adapter)
+{
+	kfree(adapter->cinst);
+	adapter->cinst = NULL;
+}
+
+/**
+ * iavf_client_subtask - client maintenance work
+ * @adapter: board private structure
+ **/
+void iavf_client_subtask(struct iavf_adapter *adapter)
+{
+	struct iavf_client *client = vf_registered_client;
+	struct iavf_client_instance *cinst;
+	int ret = 0;
+
+	if (adapter->state < __IAVF_DOWN)
+		return;
+
+	/* first check client is registered */
+	if (!client)
+		return;
+
+	/* Add the client instance to the instance list */
+	cinst = iavf_client_add_instance(adapter);
+	if (!cinst)
+		return;
+
+	dev_info(&adapter->pdev->dev, "Added instance of Client %s\n",
+		 client->name);
+
+	if (!test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+		/* Send an Open request to the client */
+
+		if (client->ops && client->ops->open)
+			ret = client->ops->open(&cinst->lan_info, client);
+		if (!ret)
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED,
+				&cinst->state);
+		else
+			/* remove client instance */
+			iavf_client_del_instance(adapter);
+	}
+}
+
+/**
+ * iavf_lan_add_device - add a lan device struct to the list of lan devices
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int iavf_lan_add_device(struct iavf_adapter *adapter)
+{
+	struct iavf_device *ldev;
+	int ret = 0;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		if (ldev->vf == adapter) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ldev->vf = adapter;
+	INIT_LIST_HEAD(&ldev->list);
+	list_add(&ldev->list, &iavf_devices);
+	dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+		 adapter->hw.bus.bus_id, adapter->hw.bus.device,
+		 adapter->hw.bus.func);
+
+	/* Since in some cases register may have happened before a device gets
+	 * added, we can schedule a subtask to go initiate the clients.
+	 */
+	adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+
+out:
+	mutex_unlock(&iavf_device_mutex);
+	return ret;
+}
+
+/**
+ * iavf_lan_del_device - removes a lan device from the device list
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_lan_del_device(struct iavf_adapter *adapter)
+{
+	struct iavf_device *ldev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) {
+		if (ldev->vf == adapter) {
+			dev_info(&adapter->pdev->dev,
+				 "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+				 adapter->hw.bus.bus_id, adapter->hw.bus.device,
+				 adapter->hw.bus.func);
+			list_del(&ldev->list);
+			kfree(ldev);
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&iavf_device_mutex);
+	return ret;
+}
+
+/**
+ * iavf_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void iavf_client_release(struct iavf_client *client)
+{
+	struct iavf_client_instance *cinst;
+	struct iavf_device *ldev;
+	struct iavf_adapter *adapter;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		adapter = ldev->vf;
+		cinst = adapter->cinst;
+		if (!cinst)
+			continue;
+		if (test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+			if (client->ops && client->ops->close)
+				client->ops->close(&cinst->lan_info, client,
+						   false);
+			iavf_client_release_qvlist(&cinst->lan_info);
+			clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+
+			dev_warn(&adapter->pdev->dev,
+				 "Client %s instance closed\n", client->name);
+		}
+		/* delete the client instance */
+		iavf_client_del_instance(adapter);
+		dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n",
+			 client->name);
+	}
+	mutex_unlock(&iavf_device_mutex);
+}
+
+/**
+ * iavf_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void iavf_client_prepare(struct iavf_client *client)
+{
+	struct iavf_device *ldev;
+	struct iavf_adapter *adapter;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		adapter = ldev->vf;
+		/* Signal the watchdog to service the client */
+		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+	}
+	mutex_unlock(&iavf_device_mutex);
+}
+
+/**
+ * iavf_client_virtchnl_send - send a message to the PF instance
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @msg: pointer to message buffer
+ * @len: message length
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
+				     u8 *msg, u16 len)
+{
+	struct iavf_adapter *adapter = ldev->vf;
+	enum iavf_status err;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP,
+				     IAVF_SUCCESS, msg, len, NULL);
+	if (err)
+		dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * iavf_client_setup_qvlist - send a message to the PF to setup iwarp qv map
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info)
+{
+	struct virtchnl_iwarp_qvlist_info *v_qvlist_info;
+	struct iavf_adapter *adapter = ldev->vf;
+	struct iavf_qv_info *qv_info;
+	enum iavf_status err;
+	u32 v_idx, i;
+	size_t msg_size;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	/* A quick check on whether the vectors belong to the client */
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+		if ((v_idx >=
+		    (adapter->iwarp_base_vector + adapter->num_iwarp_msix)) ||
+		    (v_idx < adapter->iwarp_base_vector))
+			return -EINVAL;
+	}
+
+	v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info;
+	msg_size = struct_size(v_qvlist_info, qv_info,
+			       v_qvlist_info->num_vectors - 1);
+
+	adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP);
+	err = iavf_aq_send_msg_to_pf(&adapter->hw,
+				VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, IAVF_SUCCESS,
+				(u8 *)v_qvlist_info, msg_size, NULL);
+
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to send iWarp vector config message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+		goto out;
+	}
+
+	err = -EBUSY;
+	for (i = 0; i < 5; i++) {
+		msleep(100);
+		if (!(adapter->client_pending &
+		      BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) {
+			err = 0;
+			break;
+		}
+	}
+out:
+	return err;
+}
+
+/**
+ * iavf_register_client - Register a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_register_client(struct iavf_client *client)
+{
+	int ret = 0;
+
+	if (!client) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (strlen(client->name) == 0) {
+		pr_info("iavf: Failed to register client with no name\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	if (vf_registered_client) {
+		pr_info("iavf: Client %s has already been registered!\n",
+			client->name);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if ((client->version.major != IAVF_CLIENT_VERSION_MAJOR) ||
+	    (client->version.minor != IAVF_CLIENT_VERSION_MINOR)) {
+		pr_info("iavf: Failed to register client %s due to mismatched client interface version\n",
+			client->name);
+		pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+			client->version.major, client->version.minor,
+			client->version.build,
+			iavf_client_interface_version_str);
+		ret = -EIO;
+		goto out;
+	}
+
+	vf_registered_client = client;
+
+	iavf_client_prepare(client);
+
+	pr_info("iavf: Registered client %s with return code %d\n",
+		client->name, ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(iavf_register_client);
+
+/**
+ * iavf_unregister_client - Unregister a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_unregister_client(struct iavf_client *client)
+{
+	int ret = 0;
+
+	/* When a unregister request comes through we would have to send
+	 * a close for each of the client instances that were opened.
+	 * client_release function is called to handle this.
+	 */
+	iavf_client_release(client);
+
+	if (vf_registered_client != client) {
+		pr_info("iavf: Client %s has not been registered\n",
+			client->name);
+		ret = -ENODEV;
+		goto out;
+	}
+	vf_registered_client = NULL;
+	pr_info("iavf: Unregistered client %s\n", client->name);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(iavf_unregister_client);

diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.h b/drivers/net/ethernet/intel/iavf/iavf_client.h
new file mode 100644
index 0000000..9a7cf39
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.h

@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_CLIENT_H_
+#define _IAVF_CLIENT_H_
+
+#define IAVF_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define IAVF_CLIENT_VERSION_MAJOR 0
+#define IAVF_CLIENT_VERSION_MINOR 01
+#define IAVF_CLIENT_VERSION_BUILD 00
+#define IAVF_CLIENT_VERSION_STR     \
+	__stringify(IAVF_CLIENT_VERSION_MAJOR) "." \
+	__stringify(IAVF_CLIENT_VERSION_MINOR) "." \
+	__stringify(IAVF_CLIENT_VERSION_BUILD)
+
+struct iavf_client_version {
+	u8 major;
+	u8 minor;
+	u8 build;
+	u8 rsvd;
+};
+
+enum iavf_client_state {
+	__IAVF_CLIENT_NULL,
+	__IAVF_CLIENT_REGISTERED
+};
+
+enum iavf_client_instance_state {
+	__IAVF_CLIENT_INSTANCE_NONE,
+	__IAVF_CLIENT_INSTANCE_OPENED,
+};
+
+struct iavf_ops;
+struct iavf_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define IAVF_QUEUE_TYPE_PE_AEQ	0x80
+#define IAVF_QUEUE_INVALID_IDX	0xFFFF
+
+struct iavf_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct iavf_qvlist_info {
+	u32 num_vectors;
+	struct iavf_qv_info qv_info[1];
+};
+
+#define IAVF_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct iavf_prio_qos_params {
+	u16 qs_handle; /* qs handle for prio */
+	u8 tc; /* TC mapped to prio */
+	u8 reserved;
+};
+
+#define IAVF_CLIENT_MAX_USER_PRIORITY	8
+/* Struct to hold Client QoS */
+struct iavf_qos_params {
+	struct iavf_prio_qos_params prio_qos[IAVF_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct iavf_params {
+	struct iavf_qos_params qos;
+	u16 mtu;
+	u16 link_up; /* boolean */
+};
+
+/* Structure to hold LAN device info for a client device */
+struct iavf_info {
+	struct iavf_client_version version;
+	u8 lanmac[6];
+	struct net_device *netdev;
+	struct pci_dev *pcidev;
+	u8 __iomem *hw_addr;
+	u8 fid;	/* function id, PF id or VF id */
+#define IAVF_CLIENT_FTYPE_PF 0
+#define IAVF_CLIENT_FTYPE_VF 1
+	u8 ftype; /* function type, PF or VF */
+	void *vf; /* cast to iavf_adapter */
+
+	/* All L2 params that could change during the life span of the device
+	 * and needs to be communicated to the client when they change
+	 */
+	struct iavf_params params;
+	struct iavf_ops *ops;
+
+	u16 msix_count;	 /* number of msix vectors*/
+	/* Array down below will be dynamically allocated based on msix_count */
+	struct msix_entry *msix_entries;
+	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+};
+
+struct iavf_ops {
+	/* setup_q_vector_list enables queues with a particular vector */
+	int (*setup_qvlist)(struct iavf_info *ldev, struct iavf_client *client,
+			    struct iavf_qvlist_info *qv_info);
+
+	u32 (*virtchnl_send)(struct iavf_info *ldev, struct iavf_client *client,
+			     u8 *msg, u16 len);
+
+	/* If the PE Engine is unresponsive, RDMA driver can request a reset.*/
+	void (*request_reset)(struct iavf_info *ldev,
+			      struct iavf_client *client);
+};
+
+struct iavf_client_ops {
+	/* Should be called from register_client() or whenever the driver is
+	 * ready to create a specific client instance.
+	 */
+	int (*open)(struct iavf_info *ldev, struct iavf_client *client);
+
+	/* Should be closed when netdev is unavailable or when unregister
+	 * call comes in. If the close happens due to a reset, set the reset
+	 * bit to true.
+	 */
+	void (*close)(struct iavf_info *ldev, struct iavf_client *client,
+		      bool reset);
+
+	/* called when some l2 managed parameters changes - mss */
+	void (*l2_param_change)(struct iavf_info *ldev,
+				struct iavf_client *client,
+				struct iavf_params *params);
+
+	/* called when a message is received from the PF */
+	int (*virtchnl_receive)(struct iavf_info *ldev,
+				struct iavf_client *client,
+				u8 *msg, u16 len);
+};
+
+/* Client device */
+struct iavf_client_instance {
+	struct list_head list;
+	struct iavf_info lan_info;
+	struct iavf_client *client;
+	unsigned long  state;
+};
+
+struct iavf_client {
+	struct list_head list;		/* list of registered clients */
+	char name[IAVF_CLIENT_STR_LENGTH];
+	struct iavf_client_version version;
+	unsigned long state;		/* client state */
+	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
+	u32 flags;
+#define IAVF_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
+#define IAVF_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
+	u8 type;
+#define IAVF_CLIENT_IWARP 0
+	struct iavf_client_ops *ops;	/* client ops provided by the client */
+};
+
+/* used by clients */
+int iavf_register_client(struct iavf_client *client);
+int iavf_unregister_client(struct iavf_client *client);
+#endif /* _IAVF_CLIENT_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
new file mode 100644
index 0000000..8547fc8
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c

@@ -0,0 +1,956 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf_type.h"
+#include "iavf_adminq.h"
+#include "iavf_prototype.h"
+#include <linux/avf/virtchnl.h>
+
+/**
+ * iavf_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw)
+{
+	enum iavf_status status = 0;
+
+	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
+		switch (hw->device_id) {
+		case IAVF_DEV_ID_X722_VF:
+			hw->mac.type = IAVF_MAC_X722_VF;
+			break;
+		case IAVF_DEV_ID_VF:
+		case IAVF_DEV_ID_VF_HV:
+		case IAVF_DEV_ID_ADAPTIVE_VF:
+			hw->mac.type = IAVF_MAC_VF;
+			break;
+		default:
+			hw->mac.type = IAVF_MAC_GENERIC;
+			break;
+		}
+	} else {
+		status = IAVF_ERR_DEVICE_NOT_SUPPORTED;
+	}
+
+	hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status);
+	return status;
+}
+
+/**
+ * iavf_aq_str - convert AQ err code to a string
+ * @hw: pointer to the HW structure
+ * @aq_err: the AQ error code to convert
+ **/
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err)
+{
+	switch (aq_err) {
+	case IAVF_AQ_RC_OK:
+		return "OK";
+	case IAVF_AQ_RC_EPERM:
+		return "IAVF_AQ_RC_EPERM";
+	case IAVF_AQ_RC_ENOENT:
+		return "IAVF_AQ_RC_ENOENT";
+	case IAVF_AQ_RC_ESRCH:
+		return "IAVF_AQ_RC_ESRCH";
+	case IAVF_AQ_RC_EINTR:
+		return "IAVF_AQ_RC_EINTR";
+	case IAVF_AQ_RC_EIO:
+		return "IAVF_AQ_RC_EIO";
+	case IAVF_AQ_RC_ENXIO:
+		return "IAVF_AQ_RC_ENXIO";
+	case IAVF_AQ_RC_E2BIG:
+		return "IAVF_AQ_RC_E2BIG";
+	case IAVF_AQ_RC_EAGAIN:
+		return "IAVF_AQ_RC_EAGAIN";
+	case IAVF_AQ_RC_ENOMEM:
+		return "IAVF_AQ_RC_ENOMEM";
+	case IAVF_AQ_RC_EACCES:
+		return "IAVF_AQ_RC_EACCES";
+	case IAVF_AQ_RC_EFAULT:
+		return "IAVF_AQ_RC_EFAULT";
+	case IAVF_AQ_RC_EBUSY:
+		return "IAVF_AQ_RC_EBUSY";
+	case IAVF_AQ_RC_EEXIST:
+		return "IAVF_AQ_RC_EEXIST";
+	case IAVF_AQ_RC_EINVAL:
+		return "IAVF_AQ_RC_EINVAL";
+	case IAVF_AQ_RC_ENOTTY:
+		return "IAVF_AQ_RC_ENOTTY";
+	case IAVF_AQ_RC_ENOSPC:
+		return "IAVF_AQ_RC_ENOSPC";
+	case IAVF_AQ_RC_ENOSYS:
+		return "IAVF_AQ_RC_ENOSYS";
+	case IAVF_AQ_RC_ERANGE:
+		return "IAVF_AQ_RC_ERANGE";
+	case IAVF_AQ_RC_EFLUSHED:
+		return "IAVF_AQ_RC_EFLUSHED";
+	case IAVF_AQ_RC_BAD_ADDR:
+		return "IAVF_AQ_RC_BAD_ADDR";
+	case IAVF_AQ_RC_EMODE:
+		return "IAVF_AQ_RC_EMODE";
+	case IAVF_AQ_RC_EFBIG:
+		return "IAVF_AQ_RC_EFBIG";
+	}
+
+	snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
+	return hw->err_str;
+}
+
+/**
+ * iavf_stat_str - convert status err code to a string
+ * @hw: pointer to the HW structure
+ * @stat_err: the status error code to convert
+ **/
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err)
+{
+	switch (stat_err) {
+	case 0:
+		return "OK";
+	case IAVF_ERR_NVM:
+		return "IAVF_ERR_NVM";
+	case IAVF_ERR_NVM_CHECKSUM:
+		return "IAVF_ERR_NVM_CHECKSUM";
+	case IAVF_ERR_PHY:
+		return "IAVF_ERR_PHY";
+	case IAVF_ERR_CONFIG:
+		return "IAVF_ERR_CONFIG";
+	case IAVF_ERR_PARAM:
+		return "IAVF_ERR_PARAM";
+	case IAVF_ERR_MAC_TYPE:
+		return "IAVF_ERR_MAC_TYPE";
+	case IAVF_ERR_UNKNOWN_PHY:
+		return "IAVF_ERR_UNKNOWN_PHY";
+	case IAVF_ERR_LINK_SETUP:
+		return "IAVF_ERR_LINK_SETUP";
+	case IAVF_ERR_ADAPTER_STOPPED:
+		return "IAVF_ERR_ADAPTER_STOPPED";
+	case IAVF_ERR_INVALID_MAC_ADDR:
+		return "IAVF_ERR_INVALID_MAC_ADDR";
+	case IAVF_ERR_DEVICE_NOT_SUPPORTED:
+		return "IAVF_ERR_DEVICE_NOT_SUPPORTED";
+	case IAVF_ERR_MASTER_REQUESTS_PENDING:
+		return "IAVF_ERR_MASTER_REQUESTS_PENDING";
+	case IAVF_ERR_INVALID_LINK_SETTINGS:
+		return "IAVF_ERR_INVALID_LINK_SETTINGS";
+	case IAVF_ERR_AUTONEG_NOT_COMPLETE:
+		return "IAVF_ERR_AUTONEG_NOT_COMPLETE";
+	case IAVF_ERR_RESET_FAILED:
+		return "IAVF_ERR_RESET_FAILED";
+	case IAVF_ERR_SWFW_SYNC:
+		return "IAVF_ERR_SWFW_SYNC";
+	case IAVF_ERR_NO_AVAILABLE_VSI:
+		return "IAVF_ERR_NO_AVAILABLE_VSI";
+	case IAVF_ERR_NO_MEMORY:
+		return "IAVF_ERR_NO_MEMORY";
+	case IAVF_ERR_BAD_PTR:
+		return "IAVF_ERR_BAD_PTR";
+	case IAVF_ERR_RING_FULL:
+		return "IAVF_ERR_RING_FULL";
+	case IAVF_ERR_INVALID_PD_ID:
+		return "IAVF_ERR_INVALID_PD_ID";
+	case IAVF_ERR_INVALID_QP_ID:
+		return "IAVF_ERR_INVALID_QP_ID";
+	case IAVF_ERR_INVALID_CQ_ID:
+		return "IAVF_ERR_INVALID_CQ_ID";
+	case IAVF_ERR_INVALID_CEQ_ID:
+		return "IAVF_ERR_INVALID_CEQ_ID";
+	case IAVF_ERR_INVALID_AEQ_ID:
+		return "IAVF_ERR_INVALID_AEQ_ID";
+	case IAVF_ERR_INVALID_SIZE:
+		return "IAVF_ERR_INVALID_SIZE";
+	case IAVF_ERR_INVALID_ARP_INDEX:
+		return "IAVF_ERR_INVALID_ARP_INDEX";
+	case IAVF_ERR_INVALID_FPM_FUNC_ID:
+		return "IAVF_ERR_INVALID_FPM_FUNC_ID";
+	case IAVF_ERR_QP_INVALID_MSG_SIZE:
+		return "IAVF_ERR_QP_INVALID_MSG_SIZE";
+	case IAVF_ERR_QP_TOOMANY_WRS_POSTED:
+		return "IAVF_ERR_QP_TOOMANY_WRS_POSTED";
+	case IAVF_ERR_INVALID_FRAG_COUNT:
+		return "IAVF_ERR_INVALID_FRAG_COUNT";
+	case IAVF_ERR_QUEUE_EMPTY:
+		return "IAVF_ERR_QUEUE_EMPTY";
+	case IAVF_ERR_INVALID_ALIGNMENT:
+		return "IAVF_ERR_INVALID_ALIGNMENT";
+	case IAVF_ERR_FLUSHED_QUEUE:
+		return "IAVF_ERR_FLUSHED_QUEUE";
+	case IAVF_ERR_INVALID_PUSH_PAGE_INDEX:
+		return "IAVF_ERR_INVALID_PUSH_PAGE_INDEX";
+	case IAVF_ERR_INVALID_IMM_DATA_SIZE:
+		return "IAVF_ERR_INVALID_IMM_DATA_SIZE";
+	case IAVF_ERR_TIMEOUT:
+		return "IAVF_ERR_TIMEOUT";
+	case IAVF_ERR_OPCODE_MISMATCH:
+		return "IAVF_ERR_OPCODE_MISMATCH";
+	case IAVF_ERR_CQP_COMPL_ERROR:
+		return "IAVF_ERR_CQP_COMPL_ERROR";
+	case IAVF_ERR_INVALID_VF_ID:
+		return "IAVF_ERR_INVALID_VF_ID";
+	case IAVF_ERR_INVALID_HMCFN_ID:
+		return "IAVF_ERR_INVALID_HMCFN_ID";
+	case IAVF_ERR_BACKING_PAGE_ERROR:
+		return "IAVF_ERR_BACKING_PAGE_ERROR";
+	case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE:
+		return "IAVF_ERR_NO_PBLCHUNKS_AVAILABLE";
+	case IAVF_ERR_INVALID_PBLE_INDEX:
+		return "IAVF_ERR_INVALID_PBLE_INDEX";
+	case IAVF_ERR_INVALID_SD_INDEX:
+		return "IAVF_ERR_INVALID_SD_INDEX";
+	case IAVF_ERR_INVALID_PAGE_DESC_INDEX:
+		return "IAVF_ERR_INVALID_PAGE_DESC_INDEX";
+	case IAVF_ERR_INVALID_SD_TYPE:
+		return "IAVF_ERR_INVALID_SD_TYPE";
+	case IAVF_ERR_MEMCPY_FAILED:
+		return "IAVF_ERR_MEMCPY_FAILED";
+	case IAVF_ERR_INVALID_HMC_OBJ_INDEX:
+		return "IAVF_ERR_INVALID_HMC_OBJ_INDEX";
+	case IAVF_ERR_INVALID_HMC_OBJ_COUNT:
+		return "IAVF_ERR_INVALID_HMC_OBJ_COUNT";
+	case IAVF_ERR_INVALID_SRQ_ARM_LIMIT:
+		return "IAVF_ERR_INVALID_SRQ_ARM_LIMIT";
+	case IAVF_ERR_SRQ_ENABLED:
+		return "IAVF_ERR_SRQ_ENABLED";
+	case IAVF_ERR_ADMIN_QUEUE_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_ERROR";
+	case IAVF_ERR_ADMIN_QUEUE_TIMEOUT:
+		return "IAVF_ERR_ADMIN_QUEUE_TIMEOUT";
+	case IAVF_ERR_BUF_TOO_SHORT:
+		return "IAVF_ERR_BUF_TOO_SHORT";
+	case IAVF_ERR_ADMIN_QUEUE_FULL:
+		return "IAVF_ERR_ADMIN_QUEUE_FULL";
+	case IAVF_ERR_ADMIN_QUEUE_NO_WORK:
+		return "IAVF_ERR_ADMIN_QUEUE_NO_WORK";
+	case IAVF_ERR_BAD_IWARP_CQE:
+		return "IAVF_ERR_BAD_IWARP_CQE";
+	case IAVF_ERR_NVM_BLANK_MODE:
+		return "IAVF_ERR_NVM_BLANK_MODE";
+	case IAVF_ERR_NOT_IMPLEMENTED:
+		return "IAVF_ERR_NOT_IMPLEMENTED";
+	case IAVF_ERR_PE_DOORBELL_NOT_ENABLED:
+		return "IAVF_ERR_PE_DOORBELL_NOT_ENABLED";
+	case IAVF_ERR_DIAG_TEST_FAILED:
+		return "IAVF_ERR_DIAG_TEST_FAILED";
+	case IAVF_ERR_NOT_READY:
+		return "IAVF_ERR_NOT_READY";
+	case IAVF_NOT_SUPPORTED:
+		return "IAVF_NOT_SUPPORTED";
+	case IAVF_ERR_FIRMWARE_API_VERSION:
+		return "IAVF_ERR_FIRMWARE_API_VERSION";
+	case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
+	}
+
+	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
+	return hw->err_str;
+}
+
+/**
+ * iavf_debug_aq
+ * @hw: debug mask related to admin queue
+ * @mask: debug mask
+ * @desc: pointer to admin queue descriptor
+ * @buffer: pointer to command buffer
+ * @buf_len: max length of buffer
+ *
+ * Dumps debug log about adminq command with descriptor contents.
+ **/
+void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask, void *desc,
+		   void *buffer, u16 buf_len)
+{
+	struct iavf_aq_desc *aq_desc = (struct iavf_aq_desc *)desc;
+	u8 *buf = (u8 *)buffer;
+
+	if ((!(mask & hw->debug_mask)) || !desc)
+		return;
+
+	iavf_debug(hw, mask,
+		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		   le16_to_cpu(aq_desc->opcode),
+		   le16_to_cpu(aq_desc->flags),
+		   le16_to_cpu(aq_desc->datalen),
+		   le16_to_cpu(aq_desc->retval));
+	iavf_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->cookie_high),
+		   le32_to_cpu(aq_desc->cookie_low));
+	iavf_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.internal.param0),
+		   le32_to_cpu(aq_desc->params.internal.param1));
+	iavf_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.external.addr_high),
+		   le32_to_cpu(aq_desc->params.external.addr_low));
+
+	if (buffer && aq_desc->datalen) {
+		u16 len = le16_to_cpu(aq_desc->datalen);
+
+		iavf_debug(hw, mask, "AQ CMD Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+		/* write the full 16-byte chunks */
+		if (hw->debug_mask & mask) {
+			char prefix[27];
+
+			snprintf(prefix, sizeof(prefix),
+				 "iavf %02x:%02x.%x: \t0x",
+				 hw->bus.bus_id,
+				 hw->bus.device,
+				 hw->bus.func);
+
+			print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
+				       16, 1, buf, len, false);
+		}
+	}
+}
+
+/**
+ * iavf_check_asq_alive
+ * @hw: pointer to the hw struct
+ *
+ * Returns true if Queue is enabled else false.
+ **/
+bool iavf_check_asq_alive(struct iavf_hw *hw)
+{
+	if (hw->aq.asq.len)
+		return !!(rd32(hw, hw->aq.asq.len) &
+			  IAVF_VF_ATQLEN1_ATQENABLE_MASK);
+	else
+		return false;
+}
+
+/**
+ * iavf_aq_queue_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading)
+{
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_queue_shutdown *cmd =
+		(struct iavf_aqc_queue_shutdown *)&desc.params.raw;
+	enum iavf_status status;
+
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_queue_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = cpu_to_le32(IAVF_AQ_DRIVER_UNLOADING);
+	status = iavf_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get or set RSS look up table
+ **/
+static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw,
+						u16 vsi_id, bool pf_lut,
+						u8 *lut, u16 lut_size,
+						bool set)
+{
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_lut *cmd_resp =
+		   (struct iavf_aqc_get_set_rss_lut *)&desc.params.raw;
+
+	if (set)
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_set_rss_lut);
+	else
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_get_rss_lut);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_LUT_VSI_VALID);
+
+	if (pf_lut)
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+	else
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+
+	status = iavf_asq_send_command(hw, &desc, lut, lut_size, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ **/
+enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id,
+				     bool pf_lut, u8 *lut, u16 lut_size)
+{
+	return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
+				       false);
+}
+
+/**
+ * iavf_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ **/
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id,
+				     bool pf_lut, u8 *lut, u16 lut_size)
+{
+	return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
+}
+
+/**
+ * iavf_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get the RSS key per VSI
+ **/
+static enum
+iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				    struct iavf_aqc_get_set_rss_key_data *key,
+				    bool set)
+{
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_key *cmd_resp =
+			(struct iavf_aqc_get_set_rss_key *)&desc.params.raw;
+	u16 key_size = sizeof(struct iavf_aqc_get_set_rss_key_data);
+
+	if (set)
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_set_rss_key);
+	else
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_get_rss_key);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_KEY_VSI_VALID);
+
+	status = iavf_asq_send_command(hw, &desc, key, key_size, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_get_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ **/
+enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				     struct iavf_aqc_get_set_rss_key_data *key)
+{
+	return iavf_aq_get_set_rss_key(hw, vsi_id, key, false);
+}
+
+/**
+ * iavf_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ **/
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				     struct iavf_aqc_get_set_rss_key_data *key)
+{
+	return iavf_aq_get_set_rss_key(hw, vsi_id, key, true);
+}
+
+/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT iavf_ptype_lookup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum iavf_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		IAVF_RX_PTYPE_OUTER_##OUTER_IP, \
+		IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		IAVF_RX_PTYPE_##OUTER_FRAG, \
+		IAVF_RX_PTYPE_TUNNEL_##T, \
+		IAVF_RX_PTYPE_TUNNEL_END_##TE, \
+		IAVF_RX_PTYPE_##TEF, \
+		IAVF_RX_PTYPE_INNER_PROT_##I, \
+		IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define IAVF_PTT_UNUSED_ENTRY(PTYPE) \
+		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define IAVF_RX_PTYPE_NOF		IAVF_RX_PTYPE_NOT_FRAG
+#define IAVF_RX_PTYPE_FRG		IAVF_RX_PTYPE_FRAG
+#define IAVF_RX_PTYPE_INNER_PROT_TS	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = {
+	/* L2 Packet types */
+	IAVF_PTT_UNUSED_ENTRY(0),
+	IAVF_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
+	IAVF_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT_UNUSED_ENTRY(4),
+	IAVF_PTT_UNUSED_ENTRY(5),
+	IAVF_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT_UNUSED_ENTRY(8),
+	IAVF_PTT_UNUSED_ENTRY(9),
+	IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+	/* Non Tunneled IPv4 */
+	IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(25),
+	IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(32),
+	IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(39),
+	IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(47),
+	IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(54),
+	IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(62),
+	IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(69),
+	IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(77),
+	IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(84),
+	IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
+	IAVF_PTT_UNUSED_ENTRY(91),
+	IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	IAVF_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(98),
+	IAVF_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(105),
+	IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(113),
+	IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(120),
+	IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(128),
+	IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(135),
+	IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(143),
+	IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(150),
+	IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	IAVF_PTT_UNUSED_ENTRY(154),
+	IAVF_PTT_UNUSED_ENTRY(155),
+	IAVF_PTT_UNUSED_ENTRY(156),
+	IAVF_PTT_UNUSED_ENTRY(157),
+	IAVF_PTT_UNUSED_ENTRY(158),
+	IAVF_PTT_UNUSED_ENTRY(159),
+
+	IAVF_PTT_UNUSED_ENTRY(160),
+	IAVF_PTT_UNUSED_ENTRY(161),
+	IAVF_PTT_UNUSED_ENTRY(162),
+	IAVF_PTT_UNUSED_ENTRY(163),
+	IAVF_PTT_UNUSED_ENTRY(164),
+	IAVF_PTT_UNUSED_ENTRY(165),
+	IAVF_PTT_UNUSED_ENTRY(166),
+	IAVF_PTT_UNUSED_ENTRY(167),
+	IAVF_PTT_UNUSED_ENTRY(168),
+	IAVF_PTT_UNUSED_ENTRY(169),
+
+	IAVF_PTT_UNUSED_ENTRY(170),
+	IAVF_PTT_UNUSED_ENTRY(171),
+	IAVF_PTT_UNUSED_ENTRY(172),
+	IAVF_PTT_UNUSED_ENTRY(173),
+	IAVF_PTT_UNUSED_ENTRY(174),
+	IAVF_PTT_UNUSED_ENTRY(175),
+	IAVF_PTT_UNUSED_ENTRY(176),
+	IAVF_PTT_UNUSED_ENTRY(177),
+	IAVF_PTT_UNUSED_ENTRY(178),
+	IAVF_PTT_UNUSED_ENTRY(179),
+
+	IAVF_PTT_UNUSED_ENTRY(180),
+	IAVF_PTT_UNUSED_ENTRY(181),
+	IAVF_PTT_UNUSED_ENTRY(182),
+	IAVF_PTT_UNUSED_ENTRY(183),
+	IAVF_PTT_UNUSED_ENTRY(184),
+	IAVF_PTT_UNUSED_ENTRY(185),
+	IAVF_PTT_UNUSED_ENTRY(186),
+	IAVF_PTT_UNUSED_ENTRY(187),
+	IAVF_PTT_UNUSED_ENTRY(188),
+	IAVF_PTT_UNUSED_ENTRY(189),
+
+	IAVF_PTT_UNUSED_ENTRY(190),
+	IAVF_PTT_UNUSED_ENTRY(191),
+	IAVF_PTT_UNUSED_ENTRY(192),
+	IAVF_PTT_UNUSED_ENTRY(193),
+	IAVF_PTT_UNUSED_ENTRY(194),
+	IAVF_PTT_UNUSED_ENTRY(195),
+	IAVF_PTT_UNUSED_ENTRY(196),
+	IAVF_PTT_UNUSED_ENTRY(197),
+	IAVF_PTT_UNUSED_ENTRY(198),
+	IAVF_PTT_UNUSED_ENTRY(199),
+
+	IAVF_PTT_UNUSED_ENTRY(200),
+	IAVF_PTT_UNUSED_ENTRY(201),
+	IAVF_PTT_UNUSED_ENTRY(202),
+	IAVF_PTT_UNUSED_ENTRY(203),
+	IAVF_PTT_UNUSED_ENTRY(204),
+	IAVF_PTT_UNUSED_ENTRY(205),
+	IAVF_PTT_UNUSED_ENTRY(206),
+	IAVF_PTT_UNUSED_ENTRY(207),
+	IAVF_PTT_UNUSED_ENTRY(208),
+	IAVF_PTT_UNUSED_ENTRY(209),
+
+	IAVF_PTT_UNUSED_ENTRY(210),
+	IAVF_PTT_UNUSED_ENTRY(211),
+	IAVF_PTT_UNUSED_ENTRY(212),
+	IAVF_PTT_UNUSED_ENTRY(213),
+	IAVF_PTT_UNUSED_ENTRY(214),
+	IAVF_PTT_UNUSED_ENTRY(215),
+	IAVF_PTT_UNUSED_ENTRY(216),
+	IAVF_PTT_UNUSED_ENTRY(217),
+	IAVF_PTT_UNUSED_ENTRY(218),
+	IAVF_PTT_UNUSED_ENTRY(219),
+
+	IAVF_PTT_UNUSED_ENTRY(220),
+	IAVF_PTT_UNUSED_ENTRY(221),
+	IAVF_PTT_UNUSED_ENTRY(222),
+	IAVF_PTT_UNUSED_ENTRY(223),
+	IAVF_PTT_UNUSED_ENTRY(224),
+	IAVF_PTT_UNUSED_ENTRY(225),
+	IAVF_PTT_UNUSED_ENTRY(226),
+	IAVF_PTT_UNUSED_ENTRY(227),
+	IAVF_PTT_UNUSED_ENTRY(228),
+	IAVF_PTT_UNUSED_ENTRY(229),
+
+	IAVF_PTT_UNUSED_ENTRY(230),
+	IAVF_PTT_UNUSED_ENTRY(231),
+	IAVF_PTT_UNUSED_ENTRY(232),
+	IAVF_PTT_UNUSED_ENTRY(233),
+	IAVF_PTT_UNUSED_ENTRY(234),
+	IAVF_PTT_UNUSED_ENTRY(235),
+	IAVF_PTT_UNUSED_ENTRY(236),
+	IAVF_PTT_UNUSED_ENTRY(237),
+	IAVF_PTT_UNUSED_ENTRY(238),
+	IAVF_PTT_UNUSED_ENTRY(239),
+
+	IAVF_PTT_UNUSED_ENTRY(240),
+	IAVF_PTT_UNUSED_ENTRY(241),
+	IAVF_PTT_UNUSED_ENTRY(242),
+	IAVF_PTT_UNUSED_ENTRY(243),
+	IAVF_PTT_UNUSED_ENTRY(244),
+	IAVF_PTT_UNUSED_ENTRY(245),
+	IAVF_PTT_UNUSED_ENTRY(246),
+	IAVF_PTT_UNUSED_ENTRY(247),
+	IAVF_PTT_UNUSED_ENTRY(248),
+	IAVF_PTT_UNUSED_ENTRY(249),
+
+	IAVF_PTT_UNUSED_ENTRY(250),
+	IAVF_PTT_UNUSED_ENTRY(251),
+	IAVF_PTT_UNUSED_ENTRY(252),
+	IAVF_PTT_UNUSED_ENTRY(253),
+	IAVF_PTT_UNUSED_ENTRY(254),
+	IAVF_PTT_UNUSED_ENTRY(255)
+};
+
+/**
+ * iavf_aq_send_msg_to_pf
+ * @hw: pointer to the hardware structure
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * Send message to PF driver using admin queue. By default, this message
+ * is sent asynchronously, i.e. iavf_asq_send_command() does not wait for
+ * completion before returning.
+ **/
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details)
+{
+	struct iavf_asq_cmd_details details;
+	struct iavf_aq_desc desc;
+	enum iavf_status status;
+
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_send_msg_to_pf);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_SI);
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+	if (msglen) {
+		desc.flags |= cpu_to_le16((u16)(IAVF_AQ_FLAG_BUF
+						| IAVF_AQ_FLAG_RD));
+		if (msglen > IAVF_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_LB);
+		desc.datalen = cpu_to_le16(msglen);
+	}
+	if (!cmd_details) {
+		memset(&details, 0, sizeof(details));
+		details.async = true;
+		cmd_details = &details;
+	}
+	status = iavf_asq_send_command(hw, &desc, msg, msglen, cmd_details);
+	return status;
+}
+
+/**
+ * iavf_vf_parse_hw_config
+ * @hw: pointer to the hardware structure
+ * @msg: pointer to the virtual channel VF resource structure
+ *
+ * Given a VF resource message from the PF, populate the hw struct
+ * with appropriate information.
+ **/
+void iavf_vf_parse_hw_config(struct iavf_hw *hw,
+			     struct virtchnl_vf_resource *msg)
+{
+	struct virtchnl_vsi_resource *vsi_res;
+	int i;
+
+	vsi_res = &msg->vsi_res[0];
+
+	hw->dev_caps.num_vsis = msg->num_vsis;
+	hw->dev_caps.num_rx_qp = msg->num_queue_pairs;
+	hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
+	hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
+	hw->dev_caps.dcb = msg->vf_cap_flags &
+			   VIRTCHNL_VF_OFFLOAD_L2;
+	hw->dev_caps.fcoe = 0;
+	for (i = 0; i < msg->num_vsis; i++) {
+		if (vsi_res->vsi_type == VIRTCHNL_VSI_SRIOV) {
+			ether_addr_copy(hw->mac.perm_addr,
+					vsi_res->default_mac_addr);
+			ether_addr_copy(hw->mac.addr,
+					vsi_res->default_mac_addr);
+		}
+		vsi_res++;
+	}
+}
+
+/**
+ * iavf_vf_reset
+ * @hw: pointer to the hardware structure
+ *
+ * Send a VF_RESET message to the PF. Does not wait for response from PF
+ * as none will be forthcoming. Immediately after calling this function,
+ * the admin queue should be shut down and (optionally) reinitialized.
+ **/
+enum iavf_status iavf_vf_reset(struct iavf_hw *hw)
+{
+	return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF,
+				      0, NULL, 0, NULL);
+}

diff --git a/drivers/net/ethernet/intel/iavf/iavf_devids.h b/drivers/net/ethernet/intel/iavf/iavf_devids.h
new file mode 100644
index 0000000..8eb7b69
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_devids.h

@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_DEVIDS_H_
+#define _IAVF_DEVIDS_H_
+
+/* Device IDs for the VF driver */
+#define IAVF_DEV_ID_VF			0x154C
+#define IAVF_DEV_ID_VF_HV		0x1571
+#define IAVF_DEV_ID_ADAPTIVE_VF		0x1889
+#define IAVF_DEV_ID_X722_VF		0x37CD
+#endif /* _IAVF_DEVIDS_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
new file mode 100644
index 0000000..dad3eec
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c

@@ -0,0 +1,1036 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* ethtool support for iavf */
+#include "iavf.h"
+
+#include <linux/uaccess.h>
+
+/* ethtool statistics helpers */
+
+/**
+ * struct iavf_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the IAVF_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the iavf_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the iavf_add_stat_string() helper function.
+ **/
+struct iavf_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an iavf_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define IAVF_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics related to queues */
+#define IAVF_QUEUE_STAT(_name, _stat) \
+	IAVF_STAT(struct iavf_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct iavf_stats iavf_gstrings_queue_stats[] = {
+	IAVF_QUEUE_STAT("%s-%u.packets", stats.packets),
+	IAVF_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement iavf_add_ethtool_stats and
+ * iavf_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static void
+iavf_add_one_ethtool_stat(u64 *data, void *pointer,
+			  const struct iavf_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __iavf_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __iavf_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static void
+__iavf_add_ethtool_stats(u64 **data, void *pointer,
+			 const struct iavf_stats stats[],
+			 const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		iavf_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * iavf_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __iavf_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define iavf_add_ethtool_stats(data, pointer, stats) \
+	__iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * iavf_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin_irq, so we can't directly use iavf_add_ethtool_stats.
+ * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static void
+iavf_add_queue_stats(u64 **data, struct iavf_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(iavf_gstrings_queue_stats);
+	const struct iavf_stats *stats = iavf_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
+		for (i = 0; i < size; i++)
+			iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]);
+	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __iavf_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static void __iavf_add_stat_strings(u8 **p, const struct iavf_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * iavf_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define iavf_add_stat_strings(p, stats, ...) \
+	__iavf_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
+#define VF_STAT(_name, _stat) \
+	IAVF_STAT(struct iavf_adapter, _name, _stat)
+
+static const struct iavf_stats iavf_gstrings_stats[] = {
+	VF_STAT("rx_bytes", current_stats.rx_bytes),
+	VF_STAT("rx_unicast", current_stats.rx_unicast),
+	VF_STAT("rx_multicast", current_stats.rx_multicast),
+	VF_STAT("rx_broadcast", current_stats.rx_broadcast),
+	VF_STAT("rx_discards", current_stats.rx_discards),
+	VF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol),
+	VF_STAT("tx_bytes", current_stats.tx_bytes),
+	VF_STAT("tx_unicast", current_stats.tx_unicast),
+	VF_STAT("tx_multicast", current_stats.tx_multicast),
+	VF_STAT("tx_broadcast", current_stats.tx_broadcast),
+	VF_STAT("tx_discards", current_stats.tx_discards),
+	VF_STAT("tx_errors", current_stats.tx_errors),
+};
+
+#define IAVF_STATS_LEN	ARRAY_SIZE(iavf_gstrings_stats)
+
+#define IAVF_QUEUE_STATS_LEN	ARRAY_SIZE(iavf_gstrings_queue_stats)
+
+/* For now we have one and only one private flag and it is only defined
+ * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
+ * of leaving all this code sitting around empty we will strip it unless
+ * our one private flag is actually available.
+ */
+struct iavf_priv_flags {
+	char flag_string[ETH_GSTRING_LEN];
+	u32 flag;
+	bool read_only;
+};
+
+#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \
+	.flag_string = _name, \
+	.flag = _flag, \
+	.read_only = _read_only, \
+}
+
+static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = {
+	IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0),
+};
+
+#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags)
+
+/**
+ * iavf_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @cmd: ethtool command
+ *
+ * Reports speed/duplex settings. Because this is a VF, we don't know what
+ * kind of link we really have, so we fake it.
+ **/
+static int iavf_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	cmd->base.autoneg = AUTONEG_DISABLE;
+	cmd->base.port = PORT_NONE;
+	/* Set speed and duplex */
+	switch (adapter->link_speed) {
+	case IAVF_LINK_SPEED_40GB:
+		cmd->base.speed = SPEED_40000;
+		break;
+	case IAVF_LINK_SPEED_25GB:
+#ifdef SPEED_25000
+		cmd->base.speed = SPEED_25000;
+#else
+		netdev_info(netdev,
+			    "Speed is 25G, display not supported by this version of ethtool.\n");
+#endif
+		break;
+	case IAVF_LINK_SPEED_20GB:
+		cmd->base.speed = SPEED_20000;
+		break;
+	case IAVF_LINK_SPEED_10GB:
+		cmd->base.speed = SPEED_10000;
+		break;
+	case IAVF_LINK_SPEED_1GB:
+		cmd->base.speed = SPEED_1000;
+		break;
+	case IAVF_LINK_SPEED_100MB:
+		cmd->base.speed = SPEED_100;
+		break;
+	default:
+		break;
+	}
+	cmd->base.duplex = DUPLEX_FULL;
+
+	return 0;
+}
+
+/**
+ * iavf_get_sset_count - Get length of string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ *
+ * Reports size of various string tables.
+ **/
+static int iavf_get_sset_count(struct net_device *netdev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return IAVF_STATS_LEN +
+			(IAVF_QUEUE_STATS_LEN * 2 * IAVF_MAX_REQ_QUEUES);
+	else if (sset == ETH_SS_PRIV_FLAGS)
+		return IAVF_PRIV_FLAGS_STR_LEN;
+	else
+		return -EINVAL;
+}
+
+/**
+ * iavf_get_ethtool_stats - report device statistics
+ * @netdev: network interface device structure
+ * @stats: ethtool statistics structure
+ * @data: pointer to data buffer
+ *
+ * All statistics are added to the data buffer as an array of u64.
+ **/
+static void iavf_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	unsigned int i;
+
+	iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
+
+	rcu_read_lock();
+	for (i = 0; i < IAVF_MAX_REQ_QUEUES; i++) {
+		struct iavf_ring *ring;
+
+		/* Avoid accessing un-allocated queues */
+		ring = (i < adapter->num_active_queues ?
+			&adapter->tx_rings[i] : NULL);
+		iavf_add_queue_stats(&data, ring);
+
+		/* Avoid accessing un-allocated queues */
+		ring = (i < adapter->num_active_queues ?
+			&adapter->rx_rings[i] : NULL);
+		iavf_add_queue_stats(&data, ring);
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * iavf_get_priv_flag_strings - Get private flag strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the private flags string table
+ **/
+static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "%s",
+			 iavf_gstrings_priv_flags[i].flag_string);
+		data += ETH_GSTRING_LEN;
+	}
+}
+
+/**
+ * iavf_get_stat_strings - Get stat strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the statistics string table
+ **/
+static void iavf_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	iavf_add_stat_strings(&data, iavf_gstrings_stats);
+
+	/* Queues are always allocated in pairs, so we just use num_tx_queues
+	 * for both Tx and Rx queues.
+	 */
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
+				      "tx", i);
+		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
+				      "rx", i);
+	}
+}
+
+/**
+ * iavf_get_strings - Get string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ * @data: buffer for string data
+ *
+ * Builds string tables for various string sets
+ **/
+static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		iavf_get_stat_strings(netdev, data);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		iavf_get_priv_flag_strings(netdev, data);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * iavf_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the iavf_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ **/
+static u32 iavf_get_priv_flags(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		const struct iavf_priv_flags *priv_flags;
+
+		priv_flags = &iavf_gstrings_priv_flags[i];
+
+		if (priv_flags->flag & adapter->flags)
+			ret_flags |= BIT(i);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * iavf_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ **/
+static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 orig_flags, new_flags, changed_flags;
+	u32 i;
+
+	orig_flags = READ_ONCE(adapter->flags);
+	new_flags = orig_flags;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		const struct iavf_priv_flags *priv_flags;
+
+		priv_flags = &iavf_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			new_flags |= priv_flags->flag;
+		else
+			new_flags &= ~(priv_flags->flag);
+
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
+	}
+
+	/* Before we finalize any flag changes, any checks which we need to
+	 * perform to determine if the new flags will be supported should go
+	 * here...
+	 */
+
+	/* Compare and exchange the new flags into place. If we failed, that
+	 * is if cmpxchg returns anything but the old value, this means
+	 * something else must have modified the flags variable since we
+	 * copied it. We'll just punt with an error and log something in the
+	 * message buffer.
+	 */
+	if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) {
+		dev_warn(&adapter->pdev->dev,
+			 "Unable to update adapter->flags as it was modified by another thread...\n");
+		return -EAGAIN;
+	}
+
+	changed_flags = orig_flags ^ new_flags;
+
+	/* Process any additional changes needed as a result of flag changes.
+	 * The changed_flags value reflects the list of bits that were changed
+	 * in the code above.
+	 */
+
+	/* issue a reset to force legacy-rx change to take effect */
+	if (changed_flags & IAVF_FLAG_LEGACY_RX) {
+		if (netif_running(netdev)) {
+			adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+			queue_work(iavf_wq, &adapter->reset_task);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_get_msglevel - Get debug message level
+ * @netdev: network interface device structure
+ *
+ * Returns current debug message level.
+ **/
+static u32 iavf_get_msglevel(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+/**
+ * iavf_set_msglevel - Set debug message level
+ * @netdev: network interface device structure
+ * @data: message level
+ *
+ * Set current debug message level. Higher values cause the driver to
+ * be noisier.
+ **/
+static void iavf_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (IAVF_DEBUG_USER & data)
+		adapter->hw.debug_mask = data;
+	adapter->msg_enable = data;
+}
+
+/**
+ * iavf_get_drvinfo - Get driver info
+ * @netdev: network interface device structure
+ * @drvinfo: ethool driver info structure
+ *
+ * Returns information about the driver and device for display to the user.
+ **/
+static void iavf_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver, iavf_driver_name, 32);
+	strlcpy(drvinfo->version, iavf_driver_version, 32);
+	strlcpy(drvinfo->fw_version, "N/A", 4);
+	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
+	drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN;
+}
+
+/**
+ * iavf_get_ringparam - Get ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ *
+ * Returns current ring parameters. TX and RX rings are reported separately,
+ * but the number of rings is not reported.
+ **/
+static void iavf_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IAVF_MAX_RXD;
+	ring->tx_max_pending = IAVF_MAX_TXD;
+	ring->rx_pending = adapter->rx_desc_count;
+	ring->tx_pending = adapter->tx_desc_count;
+}
+
+/**
+ * iavf_set_ringparam - Set ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ *
+ * Sets ring parameters. TX and RX rings are controlled separately, but the
+ * number of rings is not specified, so all rings get the same settings.
+ **/
+static int iavf_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_tx_count = clamp_t(u32, ring->tx_pending,
+			       IAVF_MIN_TXD,
+			       IAVF_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+
+	new_rx_count = clamp_t(u32, ring->rx_pending,
+			       IAVF_MIN_RXD,
+			       IAVF_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+
+	/* if nothing to do return success */
+	if ((new_tx_count == adapter->tx_desc_count) &&
+	    (new_rx_count == adapter->rx_desc_count))
+		return 0;
+
+	adapter->tx_desc_count = new_tx_count;
+	adapter->rx_desc_count = new_rx_count;
+
+	if (netif_running(netdev)) {
+		adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+		queue_work(iavf_wq, &adapter->reset_task);
+	}
+
+	return 0;
+}
+
+/**
+ * __iavf_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
+static int __iavf_get_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_vsi *vsi = &adapter->vsi;
+	struct iavf_ring *rx_ring, *tx_ring;
+
+	ec->tx_max_coalesced_frames = vsi->work_limit;
+	ec->rx_max_coalesced_frames = vsi->work_limit;
+
+	/* Rx and Tx usecs per queue value. If user doesn't specify the
+	 * queue, return queue 0's value to represent.
+	 */
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= adapter->num_active_queues)
+		return -EINVAL;
+
+	rx_ring = &adapter->rx_rings[queue];
+	tx_ring = &adapter->tx_rings[queue];
+
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
+		ec->use_adaptive_rx_coalesce = 1;
+
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	return 0;
+}
+
+/**
+ * iavf_get_coalesce - Get interrupt coalescing settings
+ * @netdev: network interface device structure
+ * @ec: ethtool coalesce structure
+ *
+ * Returns current coalescing settings. This is referred to elsewhere in the
+ * driver as Interrupt Throttle Rate, as this is how the hardware describes
+ * this functionality. Note that if per-queue settings have been modified this
+ * only represents the settings of queue 0.
+ **/
+static int iavf_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec)
+{
+	return __iavf_get_coalesce(netdev, ec, -1);
+}
+
+/**
+ * iavf_get_per_queue_coalesce - get coalesce values for specific queue
+ * @netdev: netdev to read
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to read
+ *
+ * Read specific queue's coalesce settings.
+ **/
+static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __iavf_get_coalesce(netdev, ec, queue);
+}
+
+/**
+ * iavf_set_itr_per_queue - set ITR values for specific queue
+ * @adapter: the VF adapter struct to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+				   struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
+	struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
+	struct iavf_q_vector *q_vector;
+
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
+
+	rx_ring->itr_setting |= IAVF_ITR_DYNAMIC;
+	if (!ec->use_adaptive_rx_coalesce)
+		rx_ring->itr_setting ^= IAVF_ITR_DYNAMIC;
+
+	tx_ring->itr_setting |= IAVF_ITR_DYNAMIC;
+	if (!ec->use_adaptive_tx_coalesce)
+		tx_ring->itr_setting ^= IAVF_ITR_DYNAMIC;
+
+	q_vector = rx_ring->q_vector;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
+
+	q_vector = tx_ring->q_vector;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
+}
+
+/**
+ * __iavf_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
+static int __iavf_set_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_vsi *vsi = &adapter->vsi;
+	int i;
+
+	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
+
+	if (ec->rx_coalesce_usecs == 0) {
+		if (ec->use_adaptive_rx_coalesce)
+			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
+	} else if ((ec->rx_coalesce_usecs < IAVF_MIN_ITR) ||
+		   (ec->rx_coalesce_usecs > IAVF_MAX_ITR)) {
+		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
+		return -EINVAL;
+	} else if (ec->tx_coalesce_usecs == 0) {
+		if (ec->use_adaptive_tx_coalesce)
+			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
+	} else if ((ec->tx_coalesce_usecs < IAVF_MIN_ITR) ||
+		   (ec->tx_coalesce_usecs > IAVF_MAX_ITR)) {
+		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	/* Rx and Tx usecs has per queue value. If user doesn't specify the
+	 * queue, apply to all queues.
+	 */
+	if (queue < 0) {
+		for (i = 0; i < adapter->num_active_queues; i++)
+			iavf_set_itr_per_queue(adapter, ec, i);
+	} else if (queue < adapter->num_active_queues) {
+		iavf_set_itr_per_queue(adapter, ec, queue);
+	} else {
+		netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+			   adapter->num_active_queues - 1);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_set_coalesce - Set interrupt coalescing settings
+ * @netdev: network interface device structure
+ * @ec: ethtool coalesce structure
+ *
+ * Change current coalescing settings for every queue.
+ **/
+static int iavf_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec)
+{
+	return __iavf_set_coalesce(netdev, ec, -1);
+}
+
+/**
+ * iavf_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to modify
+ *
+ * Modifies a specific queue's coalesce settings.
+ */
+static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __iavf_set_coalesce(netdev, ec, queue);
+}
+
+/**
+ * iavf_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
+ *
+ * Returns Success if the command is supported.
+ **/
+static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			  u32 *rule_locs)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_active_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		netdev_info(netdev,
+			    "RSS hash info is not available to vf, use pf.\n");
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+/**
+ * iavf_get_channels: get the number of channels supported by the device
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * For the purposes of our device, we only use combined channels, i.e. a tx/rx
+ * queue pair. Report one extra channel to match our "other" MSI-X vector.
+ **/
+static void iavf_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = IAVF_MAX_REQ_QUEUES;
+
+	ch->max_other = NONQ_VECS;
+	ch->other_count = NONQ_VECS;
+
+	ch->combined_count = adapter->num_active_queues;
+}
+
+/**
+ * iavf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with the PF then do a reset.  During
+ * reset we'll realloc queues and fix the RSS table.  Returns 0 on success,
+ * negative on failure.
+ **/
+static int iavf_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int num_req = ch->combined_count;
+
+	if (num_req != adapter->num_active_queues &&
+	    !(adapter->vf_res->vf_cap_flags &
+	      VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) {
+		dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n");
+		return -EINVAL;
+	}
+
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+		return -EINVAL;
+	}
+
+	/* All of these should have already been checked by ethtool before this
+	 * even gets to us, but just to be sure.
+	 */
+	if (num_req <= 0 || num_req > IAVF_MAX_REQ_QUEUES)
+		return -EINVAL;
+
+	if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS)
+		return -EINVAL;
+
+	adapter->num_req_queues = num_req;
+	return iavf_request_queues(adapter, num_req);
+}
+
+/**
+ * iavf_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 iavf_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->rss_key_size;
+}
+
+/**
+ * iavf_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 iavf_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->rss_lut_size;
+}
+
+/**
+ * iavf_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function in use
+ *
+ * Reads the indirection table directly from the hardware. Always returns 0.
+ **/
+static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			 u8 *hfunc)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u16 i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+
+	memcpy(key, adapter->rss_key, adapter->rss_key_size);
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	for (i = 0; i < adapter->rss_lut_size; i++)
+		indir[i] = (u32)adapter->rss_lut[i];
+
+	return 0;
+}
+
+/**
+ * iavf_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function to use
+ *
+ * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
+ * returns 0 after programming the table.
+ **/
+static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u16 i;
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	if (key)
+		memcpy(adapter->rss_key, key, adapter->rss_key_size);
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	for (i = 0; i < adapter->rss_lut_size; i++)
+		adapter->rss_lut[i] = (u8)(indir[i]);
+
+	return iavf_config_rss(adapter);
+}
+
+static const struct ethtool_ops iavf_ethtool_ops = {
+	.get_drvinfo		= iavf_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_ringparam		= iavf_get_ringparam,
+	.set_ringparam		= iavf_set_ringparam,
+	.get_strings		= iavf_get_strings,
+	.get_ethtool_stats	= iavf_get_ethtool_stats,
+	.get_sset_count		= iavf_get_sset_count,
+	.get_priv_flags		= iavf_get_priv_flags,
+	.set_priv_flags		= iavf_set_priv_flags,
+	.get_msglevel		= iavf_get_msglevel,
+	.set_msglevel		= iavf_set_msglevel,
+	.get_coalesce		= iavf_get_coalesce,
+	.set_coalesce		= iavf_set_coalesce,
+	.get_per_queue_coalesce = iavf_get_per_queue_coalesce,
+	.set_per_queue_coalesce = iavf_set_per_queue_coalesce,
+	.get_rxnfc		= iavf_get_rxnfc,
+	.get_rxfh_indir_size	= iavf_get_rxfh_indir_size,
+	.get_rxfh		= iavf_get_rxfh,
+	.set_rxfh		= iavf_set_rxfh,
+	.get_channels		= iavf_get_channels,
+	.set_channels		= iavf_set_channels,
+	.get_rxfh_key_size	= iavf_get_rxfh_key_size,
+	.get_link_ksettings	= iavf_get_link_ksettings,
+};
+
+/**
+ * iavf_set_ethtool_ops - Initialize ethtool ops struct
+ * @netdev: network interface device structure
+ *
+ * Sets ethtool ops struct in our netdev so that ethtool can call
+ * our functions.
+ **/
+void iavf_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &iavf_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
new file mode 100644
index 0000000..821987d
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c

@@ -0,0 +1,3990 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
+/* All iavf tracepoints are defined by the include below, which must
+ * be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+ */
+#define CREATE_TRACE_POINTS
+#include "iavf_trace.h"
+
+static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter);
+static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter);
+static int iavf_close(struct net_device *netdev);
+static int iavf_init_get_resources(struct iavf_adapter *adapter);
+static int iavf_check_reset_complete(struct iavf_hw *hw);
+
+char iavf_driver_name[] = "iavf";
+static const char iavf_driver_string[] =
+	"Intel(R) Ethernet Adaptive Virtual Function Network Driver";
+
+#define DRV_KERN "-k"
+
+#define DRV_VERSION_MAJOR 3
+#define DRV_VERSION_MINOR 2
+#define DRV_VERSION_BUILD 3
+#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
+	     __stringify(DRV_VERSION_MINOR) "." \
+	     __stringify(DRV_VERSION_BUILD) \
+	     DRV_KERN
+const char iavf_driver_version[] = DRV_VERSION;
+static const char iavf_copyright[] =
+	"Copyright (c) 2013 - 2018 Intel Corporation.";
+
+/* iavf_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id iavf_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_VF), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_VF_HV), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_X722_VF), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_ADAPTIVE_VF), 0},
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, iavf_pci_tbl);
+
+MODULE_ALIAS("i40evf");
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+
+static const struct net_device_ops iavf_netdev_ops;
+struct workqueue_struct *iavf_wq;
+
+/**
+ * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ **/
+enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
+					 struct iavf_dma_mem *mem,
+					 u64 size, u32 alignment)
+{
+	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
+
+	if (!mem)
+		return IAVF_ERR_PARAM;
+
+	mem->size = ALIGN(size, alignment);
+	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
+				     (dma_addr_t *)&mem->pa, GFP_KERNEL);
+	if (mem->va)
+		return 0;
+	else
+		return IAVF_ERR_NO_MEMORY;
+}
+
+/**
+ * iavf_free_dma_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw,
+				     struct iavf_dma_mem *mem)
+{
+	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
+
+	if (!mem || !mem->va)
+		return IAVF_ERR_PARAM;
+	dma_free_coherent(&adapter->pdev->dev, mem->size,
+			  mem->va, (dma_addr_t)mem->pa);
+	return 0;
+}
+
+/**
+ * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ **/
+enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw,
+					  struct iavf_virt_mem *mem, u32 size)
+{
+	if (!mem)
+		return IAVF_ERR_PARAM;
+
+	mem->size = size;
+	mem->va = kzalloc(size, GFP_KERNEL);
+
+	if (mem->va)
+		return 0;
+	else
+		return IAVF_ERR_NO_MEMORY;
+}
+
+/**
+ * iavf_free_virt_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
+				      struct iavf_virt_mem *mem)
+{
+	if (!mem)
+		return IAVF_ERR_PARAM;
+
+	/* it's ok to kfree a NULL pointer */
+	kfree(mem->va);
+
+	return 0;
+}
+
+/**
+ * iavf_schedule_reset - Set the flags and schedule a reset event
+ * @adapter: board private structure
+ **/
+void iavf_schedule_reset(struct iavf_adapter *adapter)
+{
+	if (!(adapter->flags &
+	      (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+		adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+		queue_work(iavf_wq, &adapter->reset_task);
+	}
+}
+
+/**
+ * iavf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+static void iavf_tx_timeout(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	adapter->tx_timeout_count++;
+	iavf_schedule_reset(adapter);
+}
+
+/**
+ * iavf_misc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void iavf_misc_irq_disable(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (!adapter->msix_entries)
+		return;
+
+	wr32(hw, IAVF_VFINT_DYN_CTL01, 0);
+
+	iavf_flush(hw);
+
+	synchronize_irq(adapter->msix_entries[0].vector);
+}
+
+/**
+ * iavf_misc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void iavf_misc_irq_enable(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	wr32(hw, IAVF_VFINT_DYN_CTL01, IAVF_VFINT_DYN_CTL01_INTENA_MASK |
+				       IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
+	wr32(hw, IAVF_VFINT_ICR0_ENA1, IAVF_VFINT_ICR0_ENA1_ADMINQ_MASK);
+
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void iavf_irq_disable(struct iavf_adapter *adapter)
+{
+	int i;
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (!adapter->msix_entries)
+		return;
+
+	for (i = 1; i < adapter->num_msix_vectors; i++) {
+		wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), 0);
+		synchronize_irq(adapter->msix_entries[i].vector);
+	}
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_irq_enable_queues - Enable interrupt for specified queues
+ * @adapter: board private structure
+ * @mask: bitmap of queues to enable
+ **/
+void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 1; i < adapter->num_msix_vectors; i++) {
+		if (mask & BIT(i - 1)) {
+			wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
+			     IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+			     IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
+		}
+	}
+}
+
+/**
+ * iavf_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ * @flush: boolean value whether to run rd32()
+ **/
+void iavf_irq_enable(struct iavf_adapter *adapter, bool flush)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	iavf_misc_irq_enable(adapter);
+	iavf_irq_enable_queues(adapter, ~0);
+
+	if (flush)
+		iavf_flush(hw);
+}
+
+/**
+ * iavf_msix_aq - Interrupt handler for vector 0
+ * @irq: interrupt number
+ * @data: pointer to netdev
+ **/
+static irqreturn_t iavf_msix_aq(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_hw *hw = &adapter->hw;
+
+	/* handle non-queue interrupts, these reads clear the registers */
+	rd32(hw, IAVF_VFINT_ICR01);
+	rd32(hw, IAVF_VFINT_ICR0_ENA1);
+
+	/* schedule work on the private workqueue */
+	queue_work(iavf_wq, &adapter->adminq_task);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * iavf_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t iavf_msix_clean_rings(int irq, void *data)
+{
+	struct iavf_q_vector *q_vector = data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * iavf_map_vector_to_rxq - associate irqs with rx queues
+ * @adapter: board private structure
+ * @v_idx: interrupt number
+ * @r_idx: queue number
+ **/
+static void
+iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx)
+{
+	struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct iavf_ring *rx_ring = &adapter->rx_rings[r_idx];
+	struct iavf_hw *hw = &adapter->hw;
+
+	rx_ring->q_vector = q_vector;
+	rx_ring->next = q_vector->rx.ring;
+	rx_ring->vsi = &adapter->vsi;
+	q_vector->rx.ring = rx_ring;
+	q_vector->rx.count++;
+	q_vector->rx.next_update = jiffies + 1;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
+	q_vector->ring_mask |= BIT(r_idx);
+	wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.current_itr >> 1);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
+}
+
+/**
+ * iavf_map_vector_to_txq - associate irqs with tx queues
+ * @adapter: board private structure
+ * @v_idx: interrupt number
+ * @t_idx: queue number
+ **/
+static void
+iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx)
+{
+	struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct iavf_ring *tx_ring = &adapter->tx_rings[t_idx];
+	struct iavf_hw *hw = &adapter->hw;
+
+	tx_ring->q_vector = q_vector;
+	tx_ring->next = q_vector->tx.ring;
+	tx_ring->vsi = &adapter->vsi;
+	q_vector->tx.ring = tx_ring;
+	q_vector->tx.count++;
+	q_vector->tx.next_update = jiffies + 1;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+	q_vector->num_ringpairs++;
+	wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.target_itr >> 1);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
+}
+
+/**
+ * iavf_map_rings_to_vectors - Maps descriptor rings to vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function maps descriptor rings to the queue-specific vectors
+ * we were allotted through the MSI-X enabling code.  Ideally, we'd have
+ * one vector per ring/queue, but on a constrained vector budget, we
+ * group the rings as "efficiently" as possible.  You would add new
+ * mapping configurations in here.
+ **/
+static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter)
+{
+	int rings_remaining = adapter->num_active_queues;
+	int ridx = 0, vidx = 0;
+	int q_vectors;
+
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (; ridx < rings_remaining; ridx++) {
+		iavf_map_vector_to_rxq(adapter, vidx, ridx);
+		iavf_map_vector_to_txq(adapter, vidx, ridx);
+
+		/* In the case where we have more queues than vectors, continue
+		 * round-robin on vectors until all queues are mapped.
+		 */
+		if (++vidx >= q_vectors)
+			vidx = 0;
+	}
+
+	adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
+}
+
+/**
+ * iavf_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void iavf_irq_affinity_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct iavf_q_vector *q_vector =
+		container_of(notify, struct iavf_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * iavf_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void iavf_irq_affinity_release(struct kref *ref) {}
+
+/**
+ * iavf_request_traffic_irqs - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ * @basename: device basename
+ *
+ * Allocates MSI-X vectors for tx and rx handling, and requests
+ * interrupts from the kernel.
+ **/
+static int
+iavf_request_traffic_irqs(struct iavf_adapter *adapter, char *basename)
+{
+	unsigned int vector, q_vectors;
+	unsigned int rx_int_idx = 0, tx_int_idx = 0;
+	int irq_num, err;
+	int cpu;
+
+	iavf_irq_disable(adapter);
+	/* Decrement for Other and TCP Timer vectors */
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct iavf_q_vector *q_vector = &adapter->q_vectors[vector];
+
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-TxRx-%d", basename, rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-rx-%d", basename, rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-tx-%d", basename, tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = request_irq(irq_num,
+				  iavf_msix_clean_rings,
+				  0,
+				  q_vector->name,
+				  q_vector);
+		if (err) {
+			dev_info(&adapter->pdev->dev,
+				 "Request_irq failed, error: %d\n", err);
+			goto free_queue_irqs;
+		}
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = iavf_irq_affinity_notify;
+		q_vector->affinity_notify.release =
+						   iavf_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+		/* Spread the IRQ affinity hints across online CPUs. Note that
+		 * get_cpu_mask returns a mask with a permanent lifetime so
+		 * it's safe to use as a hint for irq_set_affinity_hint.
+		 */
+		cpu = cpumask_local_spread(q_vector->v_idx, -1);
+		irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * iavf_request_misc_irq - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * Allocates MSI-X vector 0 and requests interrupts from the kernel. This
+ * vector is only for the admin queue, and stays active even when the netdev
+ * is closed.
+ **/
+static int iavf_request_misc_irq(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	snprintf(adapter->misc_vector_name,
+		 sizeof(adapter->misc_vector_name) - 1, "iavf-%s:mbx",
+		 dev_name(&adapter->pdev->dev));
+	err = request_irq(adapter->msix_entries[0].vector,
+			  &iavf_msix_aq, 0,
+			  adapter->misc_vector_name, netdev);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"request_irq for %s failed: %d\n",
+			adapter->misc_vector_name, err);
+		free_irq(adapter->msix_entries[0].vector, netdev);
+	}
+	return err;
+}
+
+/**
+ * iavf_free_traffic_irqs - Free MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * Frees all MSI-X vectors other than 0.
+ **/
+static void iavf_free_traffic_irqs(struct iavf_adapter *adapter)
+{
+	int vector, irq_num, q_vectors;
+
+	if (!adapter->msix_entries)
+		return;
+
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
+	}
+}
+
+/**
+ * iavf_free_misc_irq - Free MSI-X miscellaneous vector
+ * @adapter: board private structure
+ *
+ * Frees MSI-X vector 0.
+ **/
+static void iavf_free_misc_irq(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if (!adapter->msix_entries)
+		return;
+
+	free_irq(adapter->msix_entries[0].vector, netdev);
+}
+
+/**
+ * iavf_configure_tx - Configure Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void iavf_configure_tx(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(i);
+}
+
+/**
+ * iavf_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void iavf_configure_rx(struct iavf_adapter *adapter)
+{
+	unsigned int rx_buf_len = IAVF_RXBUFFER_2048;
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	/* Legacy Rx will always default to a 2048 buffer size. */
+#if (PAGE_SIZE < 8192)
+	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) {
+		struct net_device *netdev = adapter->netdev;
+
+		/* For jumbo frames on systems with 4K pages we have to use
+		 * an order 1 page, so we might as well increase the size
+		 * of our Rx buffer to make better use of the available space
+		 */
+		rx_buf_len = IAVF_RXBUFFER_3072;
+
+		/* We use a 1536 buffer size for configurations with
+		 * standard Ethernet mtu.  On x86 this gives us enough room
+		 * for shared info and 192 bytes of padding.
+		 */
+		if (!IAVF_2K_TOO_SMALL_WITH_PADDING &&
+		    (netdev->mtu <= ETH_DATA_LEN))
+			rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
+	}
+#endif
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i);
+		adapter->rx_rings[i].rx_buf_len = rx_buf_len;
+
+		if (adapter->flags & IAVF_FLAG_LEGACY_RX)
+			clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
+		else
+			set_ring_build_skb_enabled(&adapter->rx_rings[i]);
+	}
+}
+
+/**
+ * iavf_find_vlan - Search filter list for specific vlan filter
+ * @adapter: board private structure
+ * @vlan: vlan tag
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_vlan_list_lock.
+ **/
+static struct
+iavf_vlan_filter *iavf_find_vlan(struct iavf_adapter *adapter, u16 vlan)
+{
+	struct iavf_vlan_filter *f;
+
+	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+		if (vlan == f->vlan)
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * iavf_add_vlan - Add a vlan filter to the list
+ * @adapter: board private structure
+ * @vlan: VLAN tag
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ **/
+static struct
+iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, u16 vlan)
+{
+	struct iavf_vlan_filter *f = NULL;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_vlan(adapter, vlan);
+	if (!f) {
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			goto clearout;
+
+		f->vlan = vlan;
+
+		list_add_tail(&f->list, &adapter->vlan_filter_list);
+		f->add = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+	}
+
+clearout:
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+	return f;
+}
+
+/**
+ * iavf_del_vlan - Remove a vlan filter from the list
+ * @adapter: board private structure
+ * @vlan: VLAN tag
+ **/
+static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan)
+{
+	struct iavf_vlan_filter *f;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_vlan(adapter, vlan);
+	if (f) {
+		f->remove = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_vlan_rx_add_vid - Add a VLAN filter to a device
+ * @netdev: network device struct
+ * @proto: unused protocol data
+ * @vid: VLAN tag
+ **/
+static int iavf_vlan_rx_add_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (!VLAN_ALLOWED(adapter))
+		return -EIO;
+	if (iavf_add_vlan(adapter, vid) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * iavf_vlan_rx_kill_vid - Remove a VLAN filter from a device
+ * @netdev: network device struct
+ * @proto: unused protocol data
+ * @vid: VLAN tag
+ **/
+static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (VLAN_ALLOWED(adapter)) {
+		iavf_del_vlan(adapter, vid);
+		return 0;
+	}
+	return -EIO;
+}
+
+/**
+ * iavf_find_filter - Search filter list for specific mac filter
+ * @adapter: board private structure
+ * @macaddr: the MAC address
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_vlan_list_lock.
+ **/
+static struct
+iavf_mac_filter *iavf_find_filter(struct iavf_adapter *adapter,
+				  const u8 *macaddr)
+{
+	struct iavf_mac_filter *f;
+
+	if (!macaddr)
+		return NULL;
+
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		if (ether_addr_equal(macaddr, f->macaddr))
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * iavf_add_filter - Add a mac filter to the filter list
+ * @adapter: board private structure
+ * @macaddr: the MAC address
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ **/
+static struct
+iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+				 const u8 *macaddr)
+{
+	struct iavf_mac_filter *f;
+
+	if (!macaddr)
+		return NULL;
+
+	f = iavf_find_filter(adapter, macaddr);
+	if (!f) {
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			return f;
+
+		ether_addr_copy(f->macaddr, macaddr);
+
+		list_add_tail(&f->list, &adapter->mac_filter_list);
+		f->add = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+	} else {
+		f->remove = false;
+	}
+
+	return f;
+}
+
+/**
+ * iavf_set_mac - NDO callback to set port mac address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_set_mac(struct net_device *netdev, void *p)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_mac_filter *f;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(netdev->dev_addr, addr->sa_data))
+		return 0;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_filter(adapter, hw->mac.addr);
+	if (f) {
+		f->remove = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
+	}
+
+	f = iavf_add_filter(adapter, addr->sa_data);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	if (f) {
+		ether_addr_copy(hw->mac.addr, addr->sa_data);
+	}
+
+	return (f == NULL) ? -ENOMEM : 0;
+}
+
+/**
+ * iavf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int iavf_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (iavf_add_filter(adapter, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+/**
+ * iavf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_mac_filter *f;
+
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	f = iavf_find_filter(adapter, addr);
+	if (f) {
+		f->remove = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
+	}
+	return 0;
+}
+
+/**
+ * iavf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void iavf_set_rx_mode(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	__dev_uc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
+	__dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	if (netdev->flags & IFF_PROMISC &&
+	    !(adapter->flags & IAVF_FLAG_PROMISC_ON))
+		adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC;
+	else if (!(netdev->flags & IFF_PROMISC) &&
+		 adapter->flags & IAVF_FLAG_PROMISC_ON)
+		adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC;
+
+	if (netdev->flags & IFF_ALLMULTI &&
+	    !(adapter->flags & IAVF_FLAG_ALLMULTI_ON))
+		adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI;
+	else if (!(netdev->flags & IFF_ALLMULTI) &&
+		 adapter->flags & IAVF_FLAG_ALLMULTI_ON)
+		adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI;
+}
+
+/**
+ * iavf_napi_enable_all - enable NAPI on all queue vectors
+ * @adapter: board private structure
+ **/
+static void iavf_napi_enable_all(struct iavf_adapter *adapter)
+{
+	int q_idx;
+	struct iavf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		struct napi_struct *napi;
+
+		q_vector = &adapter->q_vectors[q_idx];
+		napi = &q_vector->napi;
+		napi_enable(napi);
+	}
+}
+
+/**
+ * iavf_napi_disable_all - disable NAPI on all queue vectors
+ * @adapter: board private structure
+ **/
+static void iavf_napi_disable_all(struct iavf_adapter *adapter)
+{
+	int q_idx;
+	struct iavf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		q_vector = &adapter->q_vectors[q_idx];
+		napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * iavf_configure - set up transmit and receive data structures
+ * @adapter: board private structure
+ **/
+static void iavf_configure(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	iavf_set_rx_mode(netdev);
+
+	iavf_configure_tx(adapter);
+	iavf_configure_rx(adapter);
+	adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		struct iavf_ring *ring = &adapter->rx_rings[i];
+
+		iavf_alloc_rx_buffers(ring, IAVF_DESC_UNUSED(ring));
+	}
+}
+
+/**
+ * iavf_up_complete - Finish the last steps of bringing up a connection
+ * @adapter: board private structure
+ *
+ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ **/
+static void iavf_up_complete(struct iavf_adapter *adapter)
+{
+	adapter->state = __IAVF_RUNNING;
+	clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+	iavf_napi_enable_all(adapter);
+
+	adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES;
+	if (CLIENT_ENABLED(adapter))
+		adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN;
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_down - Shutdown the connection processing
+ * @adapter: board private structure
+ *
+ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ **/
+void iavf_down(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct iavf_vlan_filter *vlf;
+	struct iavf_mac_filter *f;
+	struct iavf_cloud_filter *cf;
+
+	if (adapter->state <= __IAVF_DOWN_PENDING)
+		return;
+
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+	adapter->link_up = false;
+	iavf_napi_disable_all(adapter);
+	iavf_irq_disable(adapter);
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	/* clear the sync flag on all filters */
+	__dev_uc_unsync(adapter->netdev, NULL);
+	__dev_mc_unsync(adapter->netdev, NULL);
+
+	/* remove all MAC filters */
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		f->remove = true;
+	}
+
+	/* remove all VLAN filters */
+	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+		vlf->remove = true;
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	/* remove all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		cf->del = true;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
+	    adapter->state != __IAVF_RESETTING) {
+		/* cancel any current operation */
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		/* Schedule operations to close down the HW. Don't wait
+		 * here for this to complete. The watchdog is still running
+		 * and it will take care of this.
+		 */
+		adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
+	}
+
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_acquire_msix_vectors - Setup the MSIX capability
+ * @adapter: board private structure
+ * @vectors: number of vectors to request
+ *
+ * Work with the OS to set up the MSIX vectors needed.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int
+iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors)
+{
+	int err, vector_threshold;
+
+	/* We'll want at least 3 (vector_threshold):
+	 * 0) Other (Admin Queue and link, mostly)
+	 * 1) TxQ[0] Cleanup
+	 * 2) RxQ[0] Cleanup
+	 */
+	vector_threshold = MIN_MSIX_COUNT;
+
+	/* The more we get, the more we will assign to Tx/Rx Cleanup
+	 * for the separate queues...where Rx Cleanup >= Tx Cleanup.
+	 * Right now, we simply care about how many we'll get; we'll
+	 * set them up later while requesting irq's.
+	 */
+	err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+				    vector_threshold, vectors);
+	if (err < 0) {
+		dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n");
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+		return err;
+	}
+
+	/* Adjust for only the vectors we'll use, which is minimum
+	 * of max_msix_q_vectors + NONQ_VECS, or the number of
+	 * vectors we were allocated.
+	 */
+	adapter->num_msix_vectors = err;
+	return 0;
+}
+
+/**
+ * iavf_free_queues - Free memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * Free all of the memory associated with queue pairs.
+ **/
+static void iavf_free_queues(struct iavf_adapter *adapter)
+{
+	if (!adapter->vsi_res)
+		return;
+	adapter->num_active_queues = 0;
+	kfree(adapter->tx_rings);
+	adapter->tx_rings = NULL;
+	kfree(adapter->rx_rings);
+	adapter->rx_rings = NULL;
+}
+
+/**
+ * iavf_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one ring per queue at run-time since we don't know the
+ * number of queues at compile-time.  The polling_netdev array is
+ * intended for Multiqueue, but should work fine with a single queue.
+ **/
+static int iavf_alloc_queues(struct iavf_adapter *adapter)
+{
+	int i, num_active_queues;
+
+	/* If we're in reset reallocating queues we don't actually know yet for
+	 * certain the PF gave us the number of queues we asked for but we'll
+	 * assume it did.  Once basic reset is finished we'll confirm once we
+	 * start negotiating config with PF.
+	 */
+	if (adapter->num_req_queues)
+		num_active_queues = adapter->num_req_queues;
+	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+		 adapter->num_tc)
+		num_active_queues = adapter->ch_config.total_qps;
+	else
+		num_active_queues = min_t(int,
+					  adapter->vsi_res->num_queue_pairs,
+					  (int)(num_online_cpus()));
+
+
+	adapter->tx_rings = kcalloc(num_active_queues,
+				    sizeof(struct iavf_ring), GFP_KERNEL);
+	if (!adapter->tx_rings)
+		goto err_out;
+	adapter->rx_rings = kcalloc(num_active_queues,
+				    sizeof(struct iavf_ring), GFP_KERNEL);
+	if (!adapter->rx_rings)
+		goto err_out;
+
+	for (i = 0; i < num_active_queues; i++) {
+		struct iavf_ring *tx_ring;
+		struct iavf_ring *rx_ring;
+
+		tx_ring = &adapter->tx_rings[i];
+
+		tx_ring->queue_index = i;
+		tx_ring->netdev = adapter->netdev;
+		tx_ring->dev = &adapter->pdev->dev;
+		tx_ring->count = adapter->tx_desc_count;
+		tx_ring->itr_setting = IAVF_ITR_TX_DEF;
+		if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE)
+			tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR;
+
+		rx_ring = &adapter->rx_rings[i];
+		rx_ring->queue_index = i;
+		rx_ring->netdev = adapter->netdev;
+		rx_ring->dev = &adapter->pdev->dev;
+		rx_ring->count = adapter->rx_desc_count;
+		rx_ring->itr_setting = IAVF_ITR_RX_DEF;
+	}
+
+	adapter->num_active_queues = num_active_queues;
+
+	return 0;
+
+err_out:
+	iavf_free_queues(adapter);
+	return -ENOMEM;
+}
+
+/**
+ * iavf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
+{
+	int vector, v_budget;
+	int pairs = 0;
+	int err = 0;
+
+	if (!adapter->vsi_res) {
+		err = -EIO;
+		goto out;
+	}
+	pairs = adapter->num_active_queues;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really doesn't do
+	 * us much good if we have more vectors than CPUs. However, we already
+	 * limit the total number of queues by the number of CPUs so we do not
+	 * need any further limiting here.
+	 */
+	v_budget = min_t(int, pairs + NONQ_VECS,
+			 (int)adapter->vf_res->max_vectors);
+
+	adapter->msix_entries = kcalloc(v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+	if (!adapter->msix_entries) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (vector = 0; vector < v_budget; vector++)
+		adapter->msix_entries[vector].entry = vector;
+
+	err = iavf_acquire_msix_vectors(adapter, v_budget);
+
+out:
+	netif_set_real_num_rx_queues(adapter->netdev, pairs);
+	netif_set_real_num_tx_queues(adapter->netdev, pairs);
+	return err;
+}
+
+/**
+ * iavf_config_rss_aq - Configure RSS keys and lut by using AQ commands
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_config_rss_aq(struct iavf_adapter *adapter)
+{
+	struct iavf_aqc_get_set_rss_key_data *rss_key =
+		(struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key;
+	struct iavf_hw *hw = &adapter->hw;
+	int ret = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n",
+			adapter->current_op);
+		return -EBUSY;
+	}
+
+	ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key);
+	if (ret) {
+		dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n",
+			iavf_stat_str(hw, ret),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+
+	}
+
+	ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false,
+				  adapter->rss_lut, adapter->rss_lut_size);
+	if (ret) {
+		dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n",
+			iavf_stat_str(hw, ret),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
+	}
+
+	return ret;
+
+}
+
+/**
+ * iavf_config_rss_reg - Configure RSS keys and lut by writing registers
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_config_rss_reg(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	u32 *dw;
+	u16 i;
+
+	dw = (u32 *)adapter->rss_key;
+	for (i = 0; i <= adapter->rss_key_size / 4; i++)
+		wr32(hw, IAVF_VFQF_HKEY(i), dw[i]);
+
+	dw = (u32 *)adapter->rss_lut;
+	for (i = 0; i <= adapter->rss_lut_size / 4; i++)
+		wr32(hw, IAVF_VFQF_HLUT(i), dw[i]);
+
+	iavf_flush(hw);
+
+	return 0;
+}
+
+/**
+ * iavf_config_rss - Configure RSS keys and lut
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int iavf_config_rss(struct iavf_adapter *adapter)
+{
+
+	if (RSS_PF(adapter)) {
+		adapter->aq_required |= IAVF_FLAG_AQ_SET_RSS_LUT |
+					IAVF_FLAG_AQ_SET_RSS_KEY;
+		return 0;
+	} else if (RSS_AQ(adapter)) {
+		return iavf_config_rss_aq(adapter);
+	} else {
+		return iavf_config_rss_reg(adapter);
+	}
+}
+
+/**
+ * iavf_fill_rss_lut - Fill the lut with default values
+ * @adapter: board private structure
+ **/
+static void iavf_fill_rss_lut(struct iavf_adapter *adapter)
+{
+	u16 i;
+
+	for (i = 0; i < adapter->rss_lut_size; i++)
+		adapter->rss_lut[i] = i % adapter->num_active_queues;
+}
+
+/**
+ * iavf_init_rss - Prepare for RSS
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_init_rss(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	int ret;
+
+	if (!RSS_PF(adapter)) {
+		/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
+		if (adapter->vf_res->vf_cap_flags &
+		    VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+			adapter->hena = IAVF_DEFAULT_RSS_HENA_EXPANDED;
+		else
+			adapter->hena = IAVF_DEFAULT_RSS_HENA;
+
+		wr32(hw, IAVF_VFQF_HENA(0), (u32)adapter->hena);
+		wr32(hw, IAVF_VFQF_HENA(1), (u32)(adapter->hena >> 32));
+	}
+
+	iavf_fill_rss_lut(adapter);
+	netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size);
+	ret = iavf_config_rss(adapter);
+
+	return ret;
+}
+
+/**
+ * iavf_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+{
+	int q_idx = 0, num_q_vectors;
+	struct iavf_q_vector *q_vector;
+
+	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+	adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector),
+				     GFP_KERNEL);
+	if (!adapter->q_vectors)
+		return -ENOMEM;
+
+	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+		q_vector = &adapter->q_vectors[q_idx];
+		q_vector->adapter = adapter;
+		q_vector->vsi = &adapter->vsi;
+		q_vector->v_idx = q_idx;
+		q_vector->reg_idx = q_idx;
+		cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
+		netif_napi_add(adapter->netdev, &q_vector->napi,
+			       iavf_napi_poll, NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+{
+	int q_idx, num_q_vectors;
+	int napi_vectors;
+
+	if (!adapter->q_vectors)
+		return;
+
+	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+	napi_vectors = adapter->num_active_queues;
+
+	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+		struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+
+		if (q_idx < napi_vectors)
+			netif_napi_del(&q_vector->napi);
+	}
+	kfree(adapter->q_vectors);
+	adapter->q_vectors = NULL;
+}
+
+/**
+ * iavf_reset_interrupt_capability - Reset MSIX setup
+ * @adapter: board private structure
+ *
+ **/
+void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+{
+	if (!adapter->msix_entries)
+		return;
+
+	pci_disable_msix(adapter->pdev);
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+/**
+ * iavf_init_interrupt_scheme - Determine if MSIX is supported and init
+ * @adapter: board private structure to initialize
+ *
+ **/
+int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+{
+	int err;
+
+	err = iavf_alloc_queues(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to allocate memory for queues\n");
+		goto err_alloc_queues;
+	}
+
+	rtnl_lock();
+	err = iavf_set_interrupt_capability(adapter);
+	rtnl_unlock();
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to setup interrupt capabilities\n");
+		goto err_set_interrupt;
+	}
+
+	err = iavf_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to allocate memory for queue vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	/* If we've made it so far while ADq flag being ON, then we haven't
+	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
+	 * resources have been allocated in the reset path.
+	 * Now we can truly claim that ADq is enabled.
+	 */
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc)
+		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+			 adapter->num_tc);
+
+	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
+		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
+		 adapter->num_active_queues);
+
+	return 0;
+err_alloc_q_vectors:
+	iavf_reset_interrupt_capability(adapter);
+err_set_interrupt:
+	iavf_free_queues(adapter);
+err_alloc_queues:
+	return err;
+}
+
+/**
+ * iavf_free_rss - Free memory used by RSS structs
+ * @adapter: board private structure
+ **/
+static void iavf_free_rss(struct iavf_adapter *adapter)
+{
+	kfree(adapter->rss_key);
+	adapter->rss_key = NULL;
+
+	kfree(adapter->rss_lut);
+	adapter->rss_lut = NULL;
+}
+
+/**
+ * iavf_reinit_interrupt_scheme - Reallocate queues and vectors
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (netif_running(netdev))
+		iavf_free_traffic_irqs(adapter);
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_q_vectors(adapter);
+	iavf_free_queues(adapter);
+
+	err =  iavf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err;
+
+	netif_tx_stop_all_queues(netdev);
+
+	err = iavf_request_misc_irq(adapter);
+	if (err)
+		goto err;
+
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+	iavf_map_rings_to_vectors(adapter);
+
+	if (RSS_AQ(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+	else
+		err = iavf_init_rss(adapter);
+err:
+	return err;
+}
+
+/**
+ * iavf_process_aq_command - process aq_required flags
+ * and sends aq command
+ * @adapter: pointer to iavf adapter structure
+ *
+ * Returns 0 on success
+ * Returns error code if no command was sent
+ * or error code if the command failed.
+ **/
+static int iavf_process_aq_command(struct iavf_adapter *adapter)
+{
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG)
+		return iavf_send_vf_config_msg(adapter);
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) {
+		iavf_disable_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) {
+		iavf_map_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) {
+		iavf_add_ether_addrs(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) {
+		iavf_add_vlans(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) {
+		iavf_del_ether_addrs(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) {
+		iavf_del_vlans(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) {
+		iavf_enable_vlan_stripping(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) {
+		iavf_disable_vlan_stripping(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) {
+		iavf_configure_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) {
+		iavf_enable_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) {
+		/* This message goes straight to the firmware, not the
+		 * PF, so we don't have to set current_op as we will
+		 * not get a response through the ARQ.
+		 */
+		adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS;
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) {
+		iavf_get_hena(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) {
+		iavf_set_hena(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) {
+		iavf_set_rss_key(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) {
+		iavf_set_rss_lut(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) {
+		iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
+				       FLAG_VF_MULTICAST_PROMISC);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) {
+		iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
+		return 0;
+	}
+
+	if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) &&
+	    (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
+		iavf_set_promiscuous(adapter, 0);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) {
+		iavf_enable_channels(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) {
+		iavf_disable_channels(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		iavf_add_cloud_filter(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		iavf_add_cloud_filter(adapter);
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+/**
+ * iavf_startup - first step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_STARTUP driver state.
+ * When success the state is changed to __IAVF_INIT_VERSION_CHECK
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_startup(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err;
+
+	WARN_ON(adapter->state != __IAVF_STARTUP);
+
+	/* driver loaded, probe complete */
+	adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+	err = iavf_set_mac_type(hw);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err);
+		goto err;
+	}
+
+	err = iavf_check_reset_complete(hw);
+	if (err) {
+		dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
+			 err);
+		goto err;
+	}
+	hw->aq.num_arq_entries = IAVF_AQ_LEN;
+	hw->aq.num_asq_entries = IAVF_AQ_LEN;
+	hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+	hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+
+	err = iavf_init_adminq(hw);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err);
+		goto err;
+	}
+	err = iavf_send_api_ver(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err);
+		iavf_shutdown_adminq(hw);
+		goto err;
+	}
+	adapter->state = __IAVF_INIT_VERSION_CHECK;
+err:
+	return err;
+}
+
+/**
+ * iavf_init_version_check - second step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_VERSION_CHECK driver state.
+ * When success the state is changed to __IAVF_INIT_GET_RESOURCES
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_init_version_check(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err = -EAGAIN;
+
+	WARN_ON(adapter->state != __IAVF_INIT_VERSION_CHECK);
+
+	if (!iavf_asq_done(hw)) {
+		dev_err(&pdev->dev, "Admin queue command never completed\n");
+		iavf_shutdown_adminq(hw);
+		adapter->state = __IAVF_STARTUP;
+		goto err;
+	}
+
+	/* aq msg sent, awaiting reply */
+	err = iavf_verify_api_ver(adapter);
+	if (err) {
+		if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK)
+			err = iavf_send_api_ver(adapter);
+		else
+			dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
+				adapter->pf_version.major,
+				adapter->pf_version.minor,
+				VIRTCHNL_VERSION_MAJOR,
+				VIRTCHNL_VERSION_MINOR);
+		goto err;
+	}
+	err = iavf_send_vf_config_msg(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to send config request (%d)\n",
+			err);
+		goto err;
+	}
+	adapter->state = __IAVF_INIT_GET_RESOURCES;
+
+err:
+	return err;
+}
+
+/**
+ * iavf_init_get_resources - third step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_GET_RESOURCES driver state and
+ * finishes driver initialization procedure.
+ * When success the state is changed to __IAVF_DOWN
+ * when fails it returns -EAGAIN
+ **/
+static int iavf_init_get_resources(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err = 0, bufsz;
+
+	WARN_ON(adapter->state != __IAVF_INIT_GET_RESOURCES);
+	/* aq msg sent, awaiting reply */
+	if (!adapter->vf_res) {
+		bufsz = sizeof(struct virtchnl_vf_resource) +
+			(IAVF_MAX_VF_VSI *
+			sizeof(struct virtchnl_vsi_resource));
+		adapter->vf_res = kzalloc(bufsz, GFP_KERNEL);
+		if (!adapter->vf_res)
+			goto err;
+	}
+	err = iavf_get_vf_config(adapter);
+	if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) {
+		err = iavf_send_vf_config_msg(adapter);
+		goto err;
+	} else if (err == IAVF_ERR_PARAM) {
+		/* We only get ERR_PARAM if the device is in a very bad
+		 * state or if we've been disabled for previous bad
+		 * behavior. Either way, we're done now.
+		 */
+		iavf_shutdown_adminq(hw);
+		dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
+		return 0;
+	}
+	if (err) {
+		dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err);
+		goto err_alloc;
+	}
+
+	if (iavf_process_config(adapter))
+		goto err_alloc;
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+
+	adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED;
+
+	netdev->netdev_ops = &iavf_netdev_ops;
+	iavf_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD;
+
+	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
+		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
+			 adapter->hw.mac.addr);
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+	} else {
+		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+	}
+
+	adapter->tx_desc_count = IAVF_DEFAULT_TXD;
+	adapter->rx_desc_count = IAVF_DEFAULT_RXD;
+	err = iavf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err_sw_init;
+	iavf_map_rings_to_vectors(adapter);
+	if (adapter->vf_res->vf_cap_flags &
+		VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE;
+
+	err = iavf_request_misc_irq(adapter);
+	if (err)
+		goto err_sw_init;
+
+	netif_carrier_off(netdev);
+	adapter->link_up = false;
+
+	/* set the semaphore to prevent any callbacks after device registration
+	 * up to time when state of driver will be set to __IAVF_DOWN
+	 */
+	rtnl_lock();
+	if (!adapter->netdev_registered) {
+		err = register_netdevice(netdev);
+		if (err) {
+			rtnl_unlock();
+			goto err_register;
+		}
+	}
+
+	adapter->netdev_registered = true;
+
+	netif_tx_stop_all_queues(netdev);
+	if (CLIENT_ALLOWED(adapter)) {
+		err = iavf_lan_add_device(adapter);
+		if (err) {
+			rtnl_unlock();
+			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
+				 err);
+		}
+	}
+	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
+	if (netdev->features & NETIF_F_GRO)
+		dev_info(&pdev->dev, "GRO is enabled\n");
+
+	adapter->state = __IAVF_DOWN;
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+	rtnl_unlock();
+
+	iavf_misc_irq_enable(adapter);
+	wake_up(&adapter->down_waitqueue);
+
+	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
+	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
+	if (!adapter->rss_key || !adapter->rss_lut)
+		goto err_mem;
+	if (RSS_AQ(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+	else
+		iavf_init_rss(adapter);
+
+	return err;
+err_mem:
+	iavf_free_rss(adapter);
+err_register:
+	iavf_free_misc_irq(adapter);
+err_sw_init:
+	iavf_reset_interrupt_capability(adapter);
+err_alloc:
+	kfree(adapter->vf_res);
+	adapter->vf_res = NULL;
+err:
+	return err;
+}
+
+/**
+ * iavf_watchdog_task - Periodic call-back task
+ * @work: pointer to work_struct
+ **/
+static void iavf_watchdog_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						    struct iavf_adapter,
+						    watchdog_task.work);
+	struct iavf_hw *hw = &adapter->hw;
+	u32 reg_val;
+
+	if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section))
+		goto restart_watchdog;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		adapter->state = __IAVF_COMM_FAILED;
+
+	switch (adapter->state) {
+	case __IAVF_COMM_FAILED:
+		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
+		    reg_val == VIRTCHNL_VFR_COMPLETED) {
+			/* A chance for redemption! */
+			dev_err(&adapter->pdev->dev,
+				"Hardware came out of reset. Attempting reinit.\n");
+			adapter->state = __IAVF_STARTUP;
+			adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+			queue_delayed_work(iavf_wq, &adapter->init_task, 10);
+			clear_bit(__IAVF_IN_CRITICAL_TASK,
+				  &adapter->crit_section);
+			/* Don't reschedule the watchdog, since we've restarted
+			 * the init task. When init_task contacts the PF and
+			 * gets everything set up again, it'll restart the
+			 * watchdog for us. Down, boy. Sit. Stay. Woof.
+			 */
+			return;
+		}
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		clear_bit(__IAVF_IN_CRITICAL_TASK,
+			  &adapter->crit_section);
+		queue_delayed_work(iavf_wq,
+				   &adapter->watchdog_task,
+				   msecs_to_jiffies(10));
+		goto watchdog_done;
+	case __IAVF_RESETTING:
+		clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+		return;
+	case __IAVF_DOWN:
+	case __IAVF_DOWN_PENDING:
+	case __IAVF_TESTING:
+	case __IAVF_RUNNING:
+		if (adapter->current_op) {
+			if (!iavf_asq_done(hw)) {
+				dev_dbg(&adapter->pdev->dev,
+					"Admin queue timeout\n");
+				iavf_send_api_ver(adapter);
+			}
+		} else {
+			if (!iavf_process_aq_command(adapter) &&
+			    adapter->state == __IAVF_RUNNING)
+				iavf_request_stats(adapter);
+		}
+		break;
+	case __IAVF_REMOVE:
+		clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+		return;
+	default:
+		goto restart_watchdog;
+	}
+
+		/* check for hw reset */
+	reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+	if (!reg_val) {
+		adapter->state = __IAVF_RESETTING;
+		adapter->flags |= IAVF_FLAG_RESET_PENDING;
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+		queue_work(iavf_wq, &adapter->reset_task);
+		goto watchdog_done;
+	}
+
+	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
+watchdog_done:
+	if (adapter->state == __IAVF_RUNNING ||
+	    adapter->state == __IAVF_COMM_FAILED)
+		iavf_detect_recover_hung(&adapter->vsi);
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+restart_watchdog:
+	if (adapter->aq_required)
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(20));
+	else
+		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+	queue_work(iavf_wq, &adapter->adminq_task);
+}
+
+static void iavf_disable_vf(struct iavf_adapter *adapter)
+{
+	struct iavf_mac_filter *f, *ftmp;
+	struct iavf_vlan_filter *fv, *fvtmp;
+	struct iavf_cloud_filter *cf, *cftmp;
+
+	adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+
+	/* We don't use netif_running() because it may be true prior to
+	 * ndo_open() returning, so we can't assume it means all our open
+	 * tasks have finished, since we're not holding the rtnl_lock here.
+	 */
+	if (adapter->state == __IAVF_RUNNING) {
+		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+		netif_carrier_off(adapter->netdev);
+		netif_tx_disable(adapter->netdev);
+		adapter->link_up = false;
+		iavf_napi_disable_all(adapter);
+		iavf_irq_disable(adapter);
+		iavf_free_traffic_irqs(adapter);
+		iavf_free_all_tx_resources(adapter);
+		iavf_free_all_rx_resources(adapter);
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	/* Delete all of the filters */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+
+	list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) {
+		list_del(&fv->list);
+		kfree(fv);
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_queues(adapter);
+	iavf_free_q_vectors(adapter);
+	kfree(adapter->vf_res);
+	iavf_shutdown_adminq(&adapter->hw);
+	adapter->netdev->flags &= ~IFF_UP;
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+	adapter->state = __IAVF_DOWN;
+	wake_up(&adapter->down_waitqueue);
+	dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
+}
+
+#define IAVF_RESET_WAIT_MS 10
+#define IAVF_RESET_WAIT_COUNT 500
+/**
+ * iavf_reset_task - Call-back task to handle hardware reset
+ * @work: pointer to work_struct
+ *
+ * During reset we need to shut down and reinitialize the admin queue
+ * before we can use it to communicate with the PF again. We also clear
+ * and reinit the rings because that context is lost as well.
+ **/
+static void iavf_reset_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						      struct iavf_adapter,
+						      reset_task);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	struct net_device *netdev = adapter->netdev;
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_vlan_filter *vlf;
+	struct iavf_cloud_filter *cf;
+	struct iavf_mac_filter *f;
+	u32 reg_val;
+	int i = 0, err;
+	bool running;
+
+	/* When device is being removed it doesn't make sense to run the reset
+	 * task, just return in such a case.
+	 */
+	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+		return;
+
+	while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
+				&adapter->crit_section))
+		usleep_range(500, 1000);
+	if (CLIENT_ENABLED(adapter)) {
+		adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN |
+				    IAVF_FLAG_CLIENT_NEEDS_CLOSE |
+				    IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS |
+				    IAVF_FLAG_SERVICE_CLIENT_REQUESTED);
+		cancel_delayed_work_sync(&adapter->client_task);
+		iavf_notify_client_close(&adapter->vsi, true);
+	}
+	iavf_misc_irq_disable(adapter);
+	if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
+		adapter->flags &= ~IAVF_FLAG_RESET_NEEDED;
+		/* Restart the AQ here. If we have been reset but didn't
+		 * detect it, or if the PF had to reinit, our AQ will be hosed.
+		 */
+		iavf_shutdown_adminq(hw);
+		iavf_init_adminq(hw);
+		iavf_request_reset(adapter);
+	}
+	adapter->flags |= IAVF_FLAG_RESET_PENDING;
+
+	/* poll until we see the reset actually happen */
+	for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) {
+		reg_val = rd32(hw, IAVF_VF_ARQLEN1) &
+			  IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+		if (!reg_val)
+			break;
+		usleep_range(5000, 10000);
+	}
+	if (i == IAVF_RESET_WAIT_COUNT) {
+		dev_info(&adapter->pdev->dev, "Never saw reset\n");
+		goto continue_reset; /* act like the reset happened */
+	}
+
+	/* wait until the reset is complete and the PF is responding to us */
+	for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) {
+		/* sleep first to make sure a minimum wait time is met */
+		msleep(IAVF_RESET_WAIT_MS);
+
+		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE)
+			break;
+	}
+
+	pci_set_master(adapter->pdev);
+
+	if (i == IAVF_RESET_WAIT_COUNT) {
+		dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
+			reg_val);
+		iavf_disable_vf(adapter);
+		clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+		return; /* Do not attempt to reinit. It's dead, Jim. */
+	}
+
+continue_reset:
+	/* We don't use netif_running() because it may be true prior to
+	 * ndo_open() returning, so we can't assume it means all our open
+	 * tasks have finished, since we're not holding the rtnl_lock here.
+	 */
+	running = ((adapter->state == __IAVF_RUNNING) ||
+		   (adapter->state == __IAVF_RESETTING));
+
+	if (running) {
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+		adapter->link_up = false;
+		iavf_napi_disable_all(adapter);
+	}
+	iavf_irq_disable(adapter);
+
+	adapter->state = __IAVF_RESETTING;
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+
+	/* free the Tx/Rx rings and descriptors, might be better to just
+	 * re-use them sometime in the future
+	 */
+	iavf_free_all_rx_resources(adapter);
+	iavf_free_all_tx_resources(adapter);
+
+	adapter->flags |= IAVF_FLAG_QUEUES_DISABLED;
+	/* kill and reinit the admin queue */
+	iavf_shutdown_adminq(hw);
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+	err = iavf_init_adminq(hw);
+	if (err)
+		dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
+			 err);
+	adapter->aq_required = 0;
+
+	if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+		err = iavf_reinit_interrupt_scheme(adapter);
+		if (err)
+			goto reset_err;
+	}
+
+	adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG;
+	adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	/* re-add all MAC filters */
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		f->add = true;
+	}
+	/* re-add all VLAN filters */
+	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+		vlf->add = true;
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	/* check if TCs are running and re-add all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			cf->add = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	iavf_misc_irq_enable(adapter);
+
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
+
+	/* We were running when the reset started, so we need to restore some
+	 * state here.
+	 */
+	if (running) {
+		/* allocate transmit descriptors */
+		err = iavf_setup_all_tx_resources(adapter);
+		if (err)
+			goto reset_err;
+
+		/* allocate receive descriptors */
+		err = iavf_setup_all_rx_resources(adapter);
+		if (err)
+			goto reset_err;
+
+		if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+			err = iavf_request_traffic_irqs(adapter, netdev->name);
+			if (err)
+				goto reset_err;
+
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+		}
+
+		iavf_configure(adapter);
+
+		iavf_up_complete(adapter);
+
+		iavf_irq_enable(adapter, true);
+	} else {
+		adapter->state = __IAVF_DOWN;
+		wake_up(&adapter->down_waitqueue);
+	}
+	clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	return;
+reset_err:
+	clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
+	iavf_close(netdev);
+}
+
+/**
+ * iavf_adminq_task - worker thread to clean the admin queue
+ * @work: pointer to work_struct containing our data
+ **/
+static void iavf_adminq_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter =
+		container_of(work, struct iavf_adapter, adminq_task);
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	enum virtchnl_ops v_op;
+	enum iavf_status ret, v_ret;
+	u32 val, oldval;
+	u16 pending;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		goto out;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+	if (!event.msg_buf)
+		goto out;
+
+	do {
+		ret = iavf_clean_arq_element(hw, &event, &pending);
+		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
+
+		if (ret || !v_op)
+			break; /* No event to process or error cleaning ARQ */
+
+		iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
+					 event.msg_len);
+		if (pending != 0)
+			memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
+	} while (pending);
+
+	if ((adapter->flags &
+	     (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+	    adapter->state == __IAVF_RESETTING)
+		goto freedom;
+
+	/* check for error indications */
+	val = rd32(hw, hw->aq.arq.len);
+	if (val == 0xdeadbeef) /* indicates device in reset */
+		goto freedom;
+	oldval = val;
+	if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQVFE_MASK;
+	}
+	if (val & IAVF_VF_ARQLEN1_ARQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQOVFL_MASK;
+	}
+	if (val & IAVF_VF_ARQLEN1_ARQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.arq.len, val);
+
+	val = rd32(hw, hw->aq.asq.len);
+	oldval = val;
+	if (val & IAVF_VF_ATQLEN1_ATQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQVFE_MASK;
+	}
+	if (val & IAVF_VF_ATQLEN1_ATQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQOVFL_MASK;
+	}
+	if (val & IAVF_VF_ATQLEN1_ATQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.asq.len, val);
+
+freedom:
+	kfree(event.msg_buf);
+out:
+	/* re-enable Admin queue interrupt cause */
+	iavf_misc_irq_enable(adapter);
+}
+
+/**
+ * iavf_client_task - worker thread to perform client work
+ * @work: pointer to work_struct containing our data
+ *
+ * This task handles client interactions. Because client calls can be
+ * reentrant, we can't handle them in the watchdog.
+ **/
+static void iavf_client_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter =
+		container_of(work, struct iavf_adapter, client_task.work);
+
+	/* If we can't get the client bit, just give up. We'll be rescheduled
+	 * later.
+	 */
+
+	if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section))
+		return;
+
+	if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) {
+		iavf_client_subtask(adapter);
+		adapter->flags &= ~IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS) {
+		iavf_notify_client_l2_params(&adapter->vsi);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_CLOSE) {
+		iavf_notify_client_close(&adapter->vsi, false);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_CLOSE;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_OPEN) {
+		iavf_notify_client_open(&adapter->vsi);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN;
+	}
+out:
+	clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+}
+
+/**
+ * iavf_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+void iavf_free_all_tx_resources(struct iavf_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->tx_rings)
+		return;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		if (adapter->tx_rings[i].desc)
+			iavf_free_tx_resources(&adapter->tx_rings[i]);
+}
+
+/**
+ * iavf_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->tx_rings[i].count = adapter->tx_desc_count;
+		err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]);
+		if (!err)
+			continue;
+		dev_err(&adapter->pdev->dev,
+			"Allocation for Tx Queue %u failed\n", i);
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * iavf_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->rx_rings[i].count = adapter->rx_desc_count;
+		err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]);
+		if (!err)
+			continue;
+		dev_err(&adapter->pdev->dev,
+			"Allocation for Rx Queue %u failed\n", i);
+		break;
+	}
+	return err;
+}
+
+/**
+ * iavf_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+void iavf_free_all_rx_resources(struct iavf_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->rx_rings)
+		return;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		if (adapter->rx_rings[i].desc)
+			iavf_free_rx_resources(&adapter->rx_rings[i]);
+}
+
+/**
+ * iavf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter,
+				      u64 max_tx_rate)
+{
+	int speed = 0, ret = 0;
+
+	switch (adapter->link_speed) {
+	case IAVF_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case IAVF_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case IAVF_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case IAVF_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case IAVF_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case IAVF_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	default:
+		break;
+	}
+
+	if (max_tx_rate > speed) {
+		dev_err(&adapter->pdev->dev,
+			"Invalid tx rate specified\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int iavf_validate_ch_config(struct iavf_adapter *adapter,
+				   struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 total_max_rate = 0;
+	int i, num_qps = 0;
+	u64 tx_rate = 0;
+	int ret = 0;
+
+	if (mqprio_qopt->qopt.num_tc > IAVF_MAX_TRAFFIC_CLASS ||
+	    mqprio_qopt->qopt.num_tc < 1)
+		return -EINVAL;
+
+	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+		if (!mqprio_qopt->qopt.count[i] ||
+		    mqprio_qopt->qopt.offset[i] != num_qps)
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		/*convert to Mbps */
+		tx_rate = div_u64(mqprio_qopt->max_rate[i],
+				  IAVF_MBPS_DIVISOR);
+		total_max_rate += tx_rate;
+		num_qps += mqprio_qopt->qopt.count[i];
+	}
+	if (num_qps > IAVF_MAX_REQ_QUEUES)
+		return -EINVAL;
+
+	ret = iavf_validate_tx_bandwidth(adapter, total_max_rate);
+	return ret;
+}
+
+/**
+ * iavf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_cloud_filter *cf, *cftmp;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __iavf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	u8 num_tc = 0, total_qps = 0;
+	int ret = 0, netdev_tc = 0;
+	u64 max_tx_rate;
+	u16 mode;
+	int i;
+
+	num_tc = mqprio_qopt->qopt.num_tc;
+	mode = mqprio_qopt->mode;
+
+	/* delete queue_channel */
+	if (!mqprio_qopt->qopt.hw) {
+		if (adapter->ch_config.state == __IAVF_TC_RUNNING) {
+			/* reset the tc configuration */
+			netdev_reset_tc(netdev);
+			adapter->num_tc = 0;
+			netif_tx_stop_all_queues(netdev);
+			netif_tx_disable(netdev);
+			iavf_del_all_cloud_filters(adapter);
+			adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS;
+			goto exit;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* add queue channel */
+	if (mode == TC_MQPRIO_MODE_CHANNEL) {
+		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+			dev_err(&adapter->pdev->dev, "ADq not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (adapter->ch_config.state != __IAVF_TC_INVALID) {
+			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+			return -EINVAL;
+		}
+
+		ret = iavf_validate_ch_config(adapter, mqprio_qopt);
+		if (ret)
+			return ret;
+		/* Return if same TC config is requested */
+		if (adapter->num_tc == num_tc)
+			return 0;
+		adapter->num_tc = num_tc;
+
+		for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+			if (i < num_tc) {
+				adapter->ch_config.ch_info[i].count =
+					mqprio_qopt->qopt.count[i];
+				adapter->ch_config.ch_info[i].offset =
+					mqprio_qopt->qopt.offset[i];
+				total_qps += mqprio_qopt->qopt.count[i];
+				max_tx_rate = mqprio_qopt->max_rate[i];
+				/* convert to Mbps */
+				max_tx_rate = div_u64(max_tx_rate,
+						      IAVF_MBPS_DIVISOR);
+				adapter->ch_config.ch_info[i].max_tx_rate =
+					max_tx_rate;
+			} else {
+				adapter->ch_config.ch_info[i].count = 1;
+				adapter->ch_config.ch_info[i].offset = 0;
+			}
+		}
+		adapter->ch_config.total_qps = total_qps;
+		netif_tx_stop_all_queues(netdev);
+		netif_tx_disable(netdev);
+		adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS;
+		netdev_reset_tc(netdev);
+		/* Report the tc mapping up the stack */
+		netdev_set_num_tc(adapter->netdev, num_tc);
+		for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+			u16 qcount = mqprio_qopt->qopt.count[i];
+			u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+			if (i < num_tc)
+				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+						    qoffset);
+		}
+	}
+exit:
+	return ret;
+}
+
+/**
+ * iavf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct flow_cls_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+				 struct flow_cls_offload *f,
+				 struct iavf_cloud_filter *filter)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u8 field_flags = 0;
+	u16 addr_type = 0;
+	u16 n_proto = 0;
+	int i = 0;
+	struct virtchnl_filter *vf = &filter->f;
+
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+			dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
+			field_flags |= IAVF_CLOUD_FIELD_TEN_ID;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		n_proto = n_proto_key & n_proto_mask;
+		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+			return -EINVAL;
+		if (n_proto == ETH_P_IPV6) {
+			/* specify flow type as TCP IPv6 */
+			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+		}
+
+		if (match.key->ip_proto != IPPROTO_TCP) {
+			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
+				field_flags |= IAVF_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+					match.mask->dst);
+				return IAVF_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
+				field_flags |= IAVF_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+					match.mask->src);
+				return IAVF_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(match.key->dst))
+			if (is_valid_ether_addr(match.key->dst) ||
+			    is_multicast_ether_addr(match.key->dst)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.dst_mac,
+						match.key->dst);
+			}
+
+		if (!is_zero_ether_addr(match.key->src))
+			if (is_valid_ether_addr(match.key->src) ||
+			    is_multicast_ether_addr(match.key->src)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.src_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.src_mac,
+						match.key->src);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= IAVF_CLOUD_FIELD_IVLAN;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+					match.mask->vlan_id);
+				return IAVF_ERR_CONFIG;
+			}
+		}
+		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(match.key->vlan_id);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+					be32_to_cpu(match.mask->dst));
+				return IAVF_ERR_CONFIG;
+			}
+		}
+
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+					be32_to_cpu(match.mask->dst));
+				return IAVF_ERR_CONFIG;
+			}
+		}
+
+		if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) {
+			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return IAVF_ERR_CONFIG;
+		}
+		if (match.key->dst) {
+			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.dst_ip[0] = match.key->dst;
+		}
+		if (match.key->src) {
+			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.src_ip[0] = match.key->src;
+		}
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+
+		/* validate mask, make sure it is not IPV6_ADDR_ANY */
+		if (ipv6_addr_any(&match.mask->dst)) {
+			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+				IPV6_ADDR_ANY);
+			return IAVF_ERR_CONFIG;
+		}
+
+		/* src and dest IPv6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
+			dev_err(&adapter->pdev->dev,
+				"ipv6 addr should not be loopback\n");
+			return IAVF_ERR_CONFIG;
+		}
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
+			field_flags |= IAVF_CLOUD_FIELD_IIP;
+
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.dst_ip, &match.key->dst.s6_addr32,
+		       sizeof(vf->data.tcp_spec.dst_ip));
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.src_ip, &match.key->src.s6_addr32,
+		       sizeof(vf->data.tcp_spec.src_ip));
+	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+					be16_to_cpu(match.mask->src));
+				return IAVF_ERR_CONFIG;
+			}
+		}
+
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+					be16_to_cpu(match.mask->dst));
+				return IAVF_ERR_CONFIG;
+			}
+		}
+		if (match.key->dst) {
+			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.dst_port = match.key->dst;
+		}
+
+		if (match.key->src) {
+			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.src_port = match.key->src;
+		}
+	}
+	vf->field_flags = field_flags;
+
+	return 0;
+}
+
+/**
+ * iavf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc,
+			      struct iavf_cloud_filter *filter)
+{
+	if (tc == 0)
+		return 0;
+	if (tc < adapter->num_tc) {
+		if (!filter->f.data.tcp_spec.dst_port) {
+			dev_err(&adapter->pdev->dev,
+				"Specify destination port to redirect to traffic class other than TC0\n");
+			return -EINVAL;
+		}
+	}
+	/* redirect to a traffic class on the same device */
+	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+	filter->f.action_meta = tc;
+	return 0;
+}
+
+/**
+ * iavf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+static int iavf_configure_clsflower(struct iavf_adapter *adapter,
+				    struct flow_cls_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	struct iavf_cloud_filter *filter = NULL;
+	int err = -EINVAL, count = 50;
+
+	if (tc < 0) {
+		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0)
+			goto err;
+		udelay(1);
+	}
+
+	filter->cookie = cls_flower->cookie;
+
+	/* set the mask to all zeroes to begin with */
+	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+	/* start out with flow type and eth type IPv4 to begin with */
+	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+	err = iavf_parse_cls_flower(adapter, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = iavf_handle_tclass(adapter, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* add filter to the list */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_add_tail(&filter->list, &adapter->cloud_filter_list);
+	adapter->num_cloud_filters++;
+	filter->add = true;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+	if (err)
+		kfree(filter);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/* iavf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter,
+					      unsigned long *cookie)
+{
+	struct iavf_cloud_filter *filter = NULL;
+
+	if (!cookie)
+		return NULL;
+
+	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	}
+	return NULL;
+}
+
+/**
+ * iavf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+static int iavf_delete_clsflower(struct iavf_adapter *adapter,
+				 struct flow_cls_offload *cls_flower)
+{
+	struct iavf_cloud_filter *filter = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	filter = iavf_find_cf(adapter, &cls_flower->cookie);
+	if (filter) {
+		filter->del = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+	} else {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	return err;
+}
+
+/**
+ * iavf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
+				    struct flow_cls_offload *cls_flower)
+{
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return iavf_configure_clsflower(adapter, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return iavf_delete_clsflower(adapter, cls_flower);
+	case FLOW_CLS_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * iavf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return iavf_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(iavf_block_cb_list);
+
+/**
+ * iavf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			 void *type_data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return __iavf_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &iavf_block_cb_list,
+						  iavf_setup_tc_block_cb,
+						  adapter, adapter, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * iavf_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog is started,
+ * and the stack is notified that the interface is ready.
+ **/
+static int iavf_open(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int err;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) {
+		dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
+		return -EIO;
+	}
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section))
+		usleep_range(500, 1000);
+
+	if (adapter->state != __IAVF_DOWN) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	/* allocate transmit descriptors */
+	err = iavf_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = iavf_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	/* clear any pending interrupts, may auto mask */
+	err = iavf_request_traffic_irqs(adapter, netdev->name);
+	if (err)
+		goto err_req_irq;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	iavf_configure(adapter);
+
+	iavf_up_complete(adapter);
+
+	iavf_irq_enable(adapter, true);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	return 0;
+
+err_req_irq:
+	iavf_down(adapter);
+	iavf_free_traffic_irqs(adapter);
+err_setup_rx:
+	iavf_free_all_rx_resources(adapter);
+err_setup_tx:
+	iavf_free_all_tx_resources(adapter);
+err_unlock:
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	return err;
+}
+
+/**
+ * iavf_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled. All IRQs except vector 0 (reserved for admin queue)
+ * are freed, along with all transmit and receive resources.
+ **/
+static int iavf_close(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int status;
+
+	if (adapter->state <= __IAVF_DOWN_PENDING)
+		return 0;
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section))
+		usleep_range(500, 1000);
+
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+	if (CLIENT_ENABLED(adapter))
+		adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE;
+
+	iavf_down(adapter);
+	adapter->state = __IAVF_DOWN_PENDING;
+	iavf_free_traffic_irqs(adapter);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	/* We explicitly don't free resources here because the hardware is
+	 * still active and can DMA into memory. Resources are cleared in
+	 * iavf_virtchnl_completion() after we get confirmation from the PF
+	 * driver that the rings have been stopped.
+	 *
+	 * Also, we wait for state to transition to __IAVF_DOWN before
+	 * returning. State change occurs in iavf_virtchnl_completion() after
+	 * VF resources are released (which occurs after PF driver processes and
+	 * responds to admin queue commands).
+	 */
+
+	status = wait_event_timeout(adapter->down_waitqueue,
+				    adapter->state == __IAVF_DOWN,
+				    msecs_to_jiffies(500));
+	if (!status)
+		netdev_warn(netdev, "Device resources not yet released\n");
+	return 0;
+}
+
+/**
+ * iavf_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	netdev->mtu = new_mtu;
+	if (CLIENT_ENABLED(adapter)) {
+		iavf_notify_client_l2_params(&adapter->vsi);
+		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+	}
+	adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+	queue_work(iavf_wq, &adapter->reset_task);
+
+	return 0;
+}
+
+/**
+ * iavf_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ * Note: expects to be called while under rtnl_lock()
+ **/
+static int iavf_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* Don't allow changing VLAN_RX flag when adapter is not capable
+	 * of VLAN offload
+	 */
+	if (!VLAN_ALLOWED(adapter)) {
+		if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
+			return -EINVAL;
+	} else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
+		if (features & NETIF_F_HW_VLAN_CTAG_RX)
+			adapter->aq_required |=
+				IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+		else
+			adapter->aq_required |=
+				IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buff
+ * @dev: This physical port's netdev
+ * @features: Offload features that the stack believes apply
+ **/
+static netdev_features_t iavf_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* MACLEN can support at most 63 words */
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(63 * 2))
+		goto out_err;
+
+	/* IPLEN and EIPLEN can support at most 127 dwords */
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(127 * 4))
+		goto out_err;
+
+	if (skb->encapsulation) {
+		/* L4TUNLEN can support 127 words */
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(127 * 2))
+			goto out_err;
+
+		/* IPLEN can support at most 127 dwords */
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(127 * 4))
+			goto out_err;
+	}
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * a flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
+
+	return features;
+out_err:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+/**
+ * iavf_fix_features - fix up the netdev feature bits
+ * @netdev: our net device
+ * @features: desired feature bits
+ *
+ * Returns fixed-up features bits
+ **/
+static netdev_features_t iavf_fix_features(struct net_device *netdev,
+					   netdev_features_t features)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+		features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+			      NETIF_F_HW_VLAN_CTAG_RX |
+			      NETIF_F_HW_VLAN_CTAG_FILTER);
+
+	return features;
+}
+
+static const struct net_device_ops iavf_netdev_ops = {
+	.ndo_open		= iavf_open,
+	.ndo_stop		= iavf_close,
+	.ndo_start_xmit		= iavf_xmit_frame,
+	.ndo_set_rx_mode	= iavf_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= iavf_set_mac,
+	.ndo_change_mtu		= iavf_change_mtu,
+	.ndo_tx_timeout		= iavf_tx_timeout,
+	.ndo_vlan_rx_add_vid	= iavf_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= iavf_vlan_rx_kill_vid,
+	.ndo_features_check	= iavf_features_check,
+	.ndo_fix_features	= iavf_fix_features,
+	.ndo_set_features	= iavf_set_features,
+	.ndo_setup_tc		= iavf_setup_tc,
+};
+
+/**
+ * iavf_check_reset_complete - check that VF reset is complete
+ * @hw: pointer to hw struct
+ *
+ * Returns 0 if device is ready to use, or -EBUSY if it's in reset.
+ **/
+static int iavf_check_reset_complete(struct iavf_hw *hw)
+{
+	u32 rstat;
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		rstat = rd32(hw, IAVF_VFGEN_RSTAT) &
+			     IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if ((rstat == VIRTCHNL_VFR_VFACTIVE) ||
+		    (rstat == VIRTCHNL_VFR_COMPLETED))
+			return 0;
+		usleep_range(10, 20);
+	}
+	return -EBUSY;
+}
+
+/**
+ * iavf_process_config - Process the config information we got from the PF
+ * @adapter: board private structure
+ *
+ * Verify that we have a valid config struct, and set up our netdev features
+ * and our VSI struct.
+ **/
+int iavf_process_config(struct iavf_adapter *adapter)
+{
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	int i, num_req_queues = adapter->num_req_queues;
+	struct net_device *netdev = adapter->netdev;
+	struct iavf_vsi *vsi = &adapter->vsi;
+	netdev_features_t hw_enc_features;
+	netdev_features_t hw_features;
+
+	/* got VF config message back from PF, now we can parse it */
+	for (i = 0; i < vfres->num_vsis; i++) {
+		if (vfres->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV)
+			adapter->vsi_res = &vfres->vsi_res[i];
+	}
+	if (!adapter->vsi_res) {
+		dev_err(&adapter->pdev->dev, "No LAN VSI found\n");
+		return -ENODEV;
+	}
+
+	if (num_req_queues &&
+	    num_req_queues != adapter->vsi_res->num_queue_pairs) {
+		/* Problem.  The PF gave us fewer queues than what we had
+		 * negotiated in our request.  Need a reset to see if we can't
+		 * get back to a working state.
+		 */
+		dev_err(&adapter->pdev->dev,
+			"Requested %d queues, but PF only gave us %d.\n",
+			num_req_queues,
+			adapter->vsi_res->num_queue_pairs);
+		adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+		adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+		iavf_schedule_reset(adapter);
+		return -ENODEV;
+	}
+	adapter->num_req_queues = 0;
+
+	hw_enc_features = NETIF_F_SG			|
+			  NETIF_F_IP_CSUM		|
+			  NETIF_F_IPV6_CSUM		|
+			  NETIF_F_HIGHDMA		|
+			  NETIF_F_SOFT_FEATURES	|
+			  NETIF_F_TSO			|
+			  NETIF_F_TSO_ECN		|
+			  NETIF_F_TSO6			|
+			  NETIF_F_SCTP_CRC		|
+			  NETIF_F_RXHASH		|
+			  NETIF_F_RXCSUM		|
+			  0;
+
+	/* advertise to stack only if offloads for encapsulated packets is
+	 * supported
+	 */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
+		hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL	|
+				   NETIF_F_GSO_GRE		|
+				   NETIF_F_GSO_GRE_CSUM		|
+				   NETIF_F_GSO_IPXIP4		|
+				   NETIF_F_GSO_IPXIP6		|
+				   NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+				   NETIF_F_GSO_PARTIAL		|
+				   0;
+
+		if (!(vfres->vf_cap_flags &
+		      VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
+			netdev->gso_partial_features |=
+				NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+		netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+		netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+		netdev->hw_enc_features |= hw_enc_features;
+	}
+	/* record features VLANs can make use of */
+	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+
+	/* Write features and hw_features separately to avoid polluting
+	 * with, or dropping, features that are set when we registered.
+	 */
+	hw_features = hw_enc_features;
+
+	/* Enable VLAN features if supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_CTAG_RX);
+	/* Enable cloud filter if ADQ is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+		hw_features |= NETIF_F_HW_TC;
+
+	netdev->hw_features |= hw_features;
+
+	netdev->features |= hw_features;
+
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* Do not turn on offloads when they are requested to be turned off.
+	 * TSO needs minimum 576 bytes to work correctly.
+	 */
+	if (netdev->wanted_features) {
+		if (!(netdev->wanted_features & NETIF_F_TSO) ||
+		    netdev->mtu < 576)
+			netdev->features &= ~NETIF_F_TSO;
+		if (!(netdev->wanted_features & NETIF_F_TSO6) ||
+		    netdev->mtu < 576)
+			netdev->features &= ~NETIF_F_TSO6;
+		if (!(netdev->wanted_features & NETIF_F_TSO_ECN))
+			netdev->features &= ~NETIF_F_TSO_ECN;
+		if (!(netdev->wanted_features & NETIF_F_GRO))
+			netdev->features &= ~NETIF_F_GRO;
+		if (!(netdev->wanted_features & NETIF_F_GSO))
+			netdev->features &= ~NETIF_F_GSO;
+	}
+
+	adapter->vsi.id = adapter->vsi_res->vsi_id;
+
+	adapter->vsi.back = adapter;
+	adapter->vsi.base_vector = 1;
+	adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
+	vsi->netdev = adapter->netdev;
+	vsi->qs_handle = adapter->vsi_res->qset_handle;
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		adapter->rss_key_size = vfres->rss_key_size;
+		adapter->rss_lut_size = vfres->rss_lut_size;
+	} else {
+		adapter->rss_key_size = IAVF_HKEY_ARRAY_SIZE;
+		adapter->rss_lut_size = IAVF_HLUT_ARRAY_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_init_task - worker thread to perform delayed initialization
+ * @work: pointer to work_struct containing our data
+ *
+ * This task completes the work that was begun in probe. Due to the nature
+ * of VF-PF communications, we may need to wait tens of milliseconds to get
+ * responses back from the PF. Rather than busy-wait in probe and bog down the
+ * whole system, we'll do it in a task so we can sleep.
+ * This task only runs during driver init. Once we've established
+ * communications with the PF driver and set up our netdev, the watchdog
+ * takes over.
+ **/
+static void iavf_init_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						    struct iavf_adapter,
+						    init_task.work);
+	struct iavf_hw *hw = &adapter->hw;
+
+	switch (adapter->state) {
+	case __IAVF_STARTUP:
+		if (iavf_startup(adapter) < 0)
+			goto init_failed;
+		break;
+	case __IAVF_INIT_VERSION_CHECK:
+		if (iavf_init_version_check(adapter) < 0)
+			goto init_failed;
+		break;
+	case __IAVF_INIT_GET_RESOURCES:
+		if (iavf_init_get_resources(adapter) < 0)
+			goto init_failed;
+		return;
+	default:
+		goto init_failed;
+	}
+
+	queue_delayed_work(iavf_wq, &adapter->init_task,
+			   msecs_to_jiffies(30));
+	return;
+init_failed:
+	if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
+		dev_err(&adapter->pdev->dev,
+			"Failed to communicate with PF; waiting before retry\n");
+		adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+		iavf_shutdown_adminq(hw);
+		adapter->state = __IAVF_STARTUP;
+		queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
+		return;
+	}
+	queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+}
+
+/**
+ * iavf_shutdown - Shutdown the device in preparation for a reboot
+ * @pdev: pci device structure
+ **/
+static void iavf_shutdown(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		iavf_close(netdev);
+
+	/* Prevent the watchdog from running. */
+	adapter->state = __IAVF_REMOVE;
+	adapter->aq_required = 0;
+
+#ifdef CONFIG_PM
+	pci_save_state(pdev);
+
+#endif
+	pci_disable_device(pdev);
+}
+
+/**
+ * iavf_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in iavf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * iavf_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct iavf_adapter *adapter = NULL;
+	struct iavf_hw *hw = NULL;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev,
+				"DMA configuration failed: 0x%x\n", err);
+			goto err_dma;
+		}
+	}
+
+	err = pci_request_regions(pdev, iavf_driver_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"pci_request_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+
+	netdev = alloc_etherdev_mq(sizeof(struct iavf_adapter),
+				   IAVF_MAX_REQ_QUEUES);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+
+	hw = &adapter->hw;
+	hw->back = adapter;
+
+	adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+	adapter->state = __IAVF_STARTUP;
+
+	/* Call save state here because it relies on the adapter struct. */
+	pci_save_state(pdev);
+
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	hw->bus.bus_id = pdev->bus->number;
+
+	/* set up the locks for the AQ, do this only once in probe
+	 * and destroy them only once in remove
+	 */
+	mutex_init(&hw->aq.asq_mutex);
+	mutex_init(&hw->aq.arq_mutex);
+
+	spin_lock_init(&adapter->mac_vlan_list_lock);
+	spin_lock_init(&adapter->cloud_filter_list_lock);
+
+	INIT_LIST_HEAD(&adapter->mac_filter_list);
+	INIT_LIST_HEAD(&adapter->vlan_filter_list);
+	INIT_LIST_HEAD(&adapter->cloud_filter_list);
+
+	INIT_WORK(&adapter->reset_task, iavf_reset_task);
+	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+	INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+	INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
+	INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task);
+	queue_delayed_work(iavf_wq, &adapter->init_task,
+			   msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
+
+	/* Setup the wait queue for indicating transition to down status */
+	init_waitqueue_head(&adapter->down_waitqueue);
+
+	return 0;
+
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+#ifdef CONFIG_PM
+/**
+ * iavf_suspend - Power management suspend routine
+ * @pdev: PCI device information struct
+ * @state: unused
+ *
+ * Called when the system (VM) is entering sleep/suspend.
+ **/
+static int iavf_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int retval = 0;
+
+	netif_device_detach(netdev);
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section))
+		usleep_range(500, 1000);
+
+	if (netif_running(netdev)) {
+		rtnl_lock();
+		iavf_down(adapter);
+		rtnl_unlock();
+	}
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+/**
+ * iavf_resume - Power management resume routine
+ * @pdev: PCI device information struct
+ *
+ * Called when the system (VM) is resumed from sleep/suspend.
+ **/
+static int iavf_resume(struct pci_dev *pdev)
+{
+	struct iavf_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+	u32 err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	/* pci_restore_state clears dev->state_saved so call
+	 * pci_save_state to restore it.
+	 */
+	pci_save_state(pdev);
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device from suspend.\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	rtnl_lock();
+	err = iavf_set_interrupt_capability(adapter);
+	if (err) {
+		rtnl_unlock();
+		dev_err(&pdev->dev, "Cannot enable MSI-X interrupts.\n");
+		return err;
+	}
+	err = iavf_request_misc_irq(adapter);
+	rtnl_unlock();
+	if (err) {
+		dev_err(&pdev->dev, "Cannot get interrupt vector.\n");
+		return err;
+	}
+
+	queue_work(iavf_wq, &adapter->reset_task);
+
+	netif_device_attach(netdev);
+
+	return err;
+}
+
+#endif /* CONFIG_PM */
+/**
+ * iavf_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * iavf_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void iavf_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_vlan_filter *vlf, *vlftmp;
+	struct iavf_mac_filter *f, *ftmp;
+	struct iavf_cloud_filter *cf, *cftmp;
+	struct iavf_hw *hw = &adapter->hw;
+	int err;
+	/* Indicate we are in remove and not to run reset_task */
+	set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
+	cancel_delayed_work_sync(&adapter->init_task);
+	cancel_work_sync(&adapter->reset_task);
+	cancel_delayed_work_sync(&adapter->client_task);
+	if (adapter->netdev_registered) {
+		unregister_netdev(netdev);
+		adapter->netdev_registered = false;
+	}
+	if (CLIENT_ALLOWED(adapter)) {
+		err = iavf_lan_del_device(adapter);
+		if (err)
+			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+				 err);
+	}
+
+	/* Shut down all the garbage mashers on the detention level */
+	adapter->state = __IAVF_REMOVE;
+	adapter->aq_required = 0;
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+	iavf_request_reset(adapter);
+	msleep(50);
+	/* If the FW isn't responding, kick it once, but only once. */
+	if (!iavf_asq_done(hw)) {
+		iavf_request_reset(adapter);
+		msleep(50);
+	}
+	iavf_free_all_tx_resources(adapter);
+	iavf_free_all_rx_resources(adapter);
+	iavf_misc_irq_disable(adapter);
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_q_vectors(adapter);
+
+	cancel_delayed_work_sync(&adapter->watchdog_task);
+
+	cancel_work_sync(&adapter->adminq_task);
+
+	iavf_free_rss(adapter);
+
+	if (hw->aq.asq.count)
+		iavf_shutdown_adminq(hw);
+
+	/* destroy the locks only once, here */
+	mutex_destroy(&hw->aq.arq_mutex);
+	mutex_destroy(&hw->aq.asq_mutex);
+
+	iounmap(hw->hw_addr);
+	pci_release_regions(pdev);
+	iavf_free_all_tx_resources(adapter);
+	iavf_free_all_rx_resources(adapter);
+	iavf_free_queues(adapter);
+	kfree(adapter->vf_res);
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	/* If we got removed before an up/down sequence, we've got a filter
+	 * hanging out there that we need to get rid of.
+	 */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list) {
+		list_del(&vlf->list);
+		kfree(vlf);
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	free_netdev(netdev);
+
+	pci_disable_pcie_error_reporting(pdev);
+
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver iavf_driver = {
+	.name     = iavf_driver_name,
+	.id_table = iavf_pci_tbl,
+	.probe    = iavf_probe,
+	.remove   = iavf_remove,
+#ifdef CONFIG_PM
+	.suspend  = iavf_suspend,
+	.resume   = iavf_resume,
+#endif
+	.shutdown = iavf_shutdown,
+};
+
+/**
+ * iavf_init_module - Driver Registration Routine
+ *
+ * iavf_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init iavf_init_module(void)
+{
+	int ret;
+
+	pr_info("iavf: %s - version %s\n", iavf_driver_string,
+		iavf_driver_version);
+
+	pr_info("%s\n", iavf_copyright);
+
+	iavf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
+				  iavf_driver_name);
+	if (!iavf_wq) {
+		pr_err("%s: Failed to create workqueue\n", iavf_driver_name);
+		return -ENOMEM;
+	}
+	ret = pci_register_driver(&iavf_driver);
+	return ret;
+}
+
+module_init(iavf_init_module);
+
+/**
+ * iavf_exit_module - Driver Exit Cleanup Routine
+ *
+ * iavf_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit iavf_exit_module(void)
+{
+	pci_unregister_driver(&iavf_driver);
+	destroy_workqueue(iavf_wq);
+}
+
+module_exit(iavf_exit_module);
+
+/* iavf_main.c */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
new file mode 100644
index 0000000..a452ce9
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h

@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_OSDEP_H_
+#define _IAVF_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/tcp.h>
+#include <linux/pci.h>
+
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+/* File to be the magic between shared code and
+ * actual OS primitives
+ */
+
+#define hw_dbg(hw, S, A...)	do {} while (0)
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+#define iavf_flush(a)		readl((a)->hw_addr + IAVF_VFGEN_RSTAT)
+
+/* memory allocation tracking */
+struct iavf_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	u32 size;
+};
+
+#define iavf_allocate_dma_mem(h, m, unused, s, a) \
+	iavf_allocate_dma_mem_d(h, m, s, a)
+#define iavf_free_dma_mem(h, m) iavf_free_dma_mem_d(h, m)
+
+struct iavf_virt_mem {
+	void *va;
+	u32 size;
+};
+#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s)
+#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m)
+
+#define iavf_debug(h, m, s, ...)				\
+do {								\
+	if (((m) & (h)->debug_mask))				\
+		pr_info("iavf %02x:%02x.%x " s,			\
+			(h)->bus.bus_id, (h)->bus.device,	\
+			(h)->bus.func, ##__VA_ARGS__);		\
+} while (0)
+
+#endif /* _IAVF_OSDEP_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
new file mode 100644
index 0000000..edebfbb
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h

@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_PROTOTYPE_H_
+#define _IAVF_PROTOTYPE_H_
+
+#include "iavf_type.h"
+#include "iavf_alloc.h"
+#include <linux/avf/virtchnl.h>
+
+/* Prototypes for shared code functions that are not in
+ * the standard function pointer structures.  These are
+ * mostly because they are needed even before the init
+ * has happened and will assist in the early SW and FW
+ * setup.
+ */
+
+/* adminq functions */
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw);
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw);
+void iavf_adminq_init_ring_data(struct iavf_hw *hw);
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *events_pending);
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16 buff_size,
+				       struct iavf_asq_cmd_details *cmd_details);
+bool iavf_asq_done(struct iavf_hw *hw);
+
+/* debug function for adminq */
+void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask,
+		   void *desc, void *buffer, u16 buf_len);
+
+void iavf_idle_aq(struct iavf_hw *hw);
+void iavf_resume_aq(struct iavf_hw *hw);
+bool iavf_check_asq_alive(struct iavf_hw *hw);
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading);
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err);
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err);
+
+enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid,
+				     bool pf_lut, u8 *lut, u16 lut_size);
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid,
+				     bool pf_lut, u8 *lut, u16 lut_size);
+enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid,
+				     struct iavf_aqc_get_set_rss_key_data *key);
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid,
+				     struct iavf_aqc_get_set_rss_key_data *key);
+
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw);
+
+extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[];
+
+static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+	return iavf_ptype_lookup[ptype];
+}
+
+void iavf_vf_parse_hw_config(struct iavf_hw *hw,
+			     struct virtchnl_vf_resource *msg);
+enum iavf_status iavf_vf_reset(struct iavf_hw *hw);
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details);
+#endif /* _IAVF_PROTOTYPE_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h
new file mode 100644
index 0000000..bf79333
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_register.h

@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_REGISTER_H_
+#define _IAVF_REGISTER_H_
+
+#define IAVF_VF_ARQBAH1 0x00006000 /* Reset: EMPR */
+#define IAVF_VF_ARQBAL1 0x00006C00 /* Reset: EMPR */
+#define IAVF_VF_ARQH1 0x00007400 /* Reset: EMPR */
+#define IAVF_VF_ARQH1_ARQH_SHIFT 0
+#define IAVF_VF_ARQH1_ARQH_MASK IAVF_MASK(0x3FF, IAVF_VF_ARQH1_ARQH_SHIFT)
+#define IAVF_VF_ARQLEN1 0x00008000 /* Reset: EMPR */
+#define IAVF_VF_ARQLEN1_ARQVFE_SHIFT 28
+#define IAVF_VF_ARQLEN1_ARQVFE_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQVFE_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQOVFL_SHIFT 29
+#define IAVF_VF_ARQLEN1_ARQOVFL_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQOVFL_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQCRIT_SHIFT 30
+#define IAVF_VF_ARQLEN1_ARQCRIT_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQCRIT_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQENABLE_SHIFT 31
+#define IAVF_VF_ARQLEN1_ARQENABLE_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define IAVF_VF_ARQT1 0x00007000 /* Reset: EMPR */
+#define IAVF_VF_ATQBAH1 0x00007800 /* Reset: EMPR */
+#define IAVF_VF_ATQBAL1 0x00007C00 /* Reset: EMPR */
+#define IAVF_VF_ATQH1 0x00006400 /* Reset: EMPR */
+#define IAVF_VF_ATQLEN1 0x00006800 /* Reset: EMPR */
+#define IAVF_VF_ATQLEN1_ATQVFE_SHIFT 28
+#define IAVF_VF_ATQLEN1_ATQVFE_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQVFE_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQOVFL_SHIFT 29
+#define IAVF_VF_ATQLEN1_ATQOVFL_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQOVFL_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQCRIT_SHIFT 30
+#define IAVF_VF_ATQLEN1_ATQCRIT_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQCRIT_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQENABLE_SHIFT 31
+#define IAVF_VF_ATQLEN1_ATQENABLE_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define IAVF_VF_ATQT1 0x00008400 /* Reset: EMPR */
+#define IAVF_VFGEN_RSTAT 0x00008800 /* Reset: VFR */
+#define IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT 0
+#define IAVF_VFGEN_RSTAT_VFR_STATE_MASK IAVF_MASK(0x3, IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT)
+#define IAVF_VFINT_DYN_CTL01 0x00005C00 /* Reset: VFR */
+#define IAVF_VFINT_DYN_CTL01_INTENA_SHIFT 0
+#define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT)
+#define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
+#define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0
+#define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
+#define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT 3
+#define IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT 5
+#define IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT 24
+#define IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT)
+#define IAVF_VFINT_ICR0_ENA1 0x00005000 /* Reset: CORER */
+#define IAVF_VFINT_ICR0_ENA1_ADMINQ_SHIFT 30
+#define IAVF_VFINT_ICR0_ENA1_ADMINQ_MASK IAVF_MASK(0x1, IAVF_VFINT_ICR0_ENA1_ADMINQ_SHIFT)
+#define IAVF_VFINT_ICR0_ENA1_RSVD_SHIFT 31
+#define IAVF_VFINT_ICR01 0x00004800 /* Reset: CORER */
+#define IAVF_VFINT_ITRN1(_i, _INTVF) (0x00002800 + ((_i) * 64 + (_INTVF) * 4)) /* _i=0...2, _INTVF=0...15 */ /* Reset: VFR */
+#define IAVF_QRX_TAIL1(_Q) (0x00002000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define IAVF_QTX_TAIL1(_Q) (0x00000000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: PFR */
+#define IAVF_VFQF_HENA(_i) (0x0000C400 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define IAVF_VFQF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
+#define IAVF_VFQF_HKEY_MAX_INDEX 12
+#define IAVF_VFQF_HLUT(_i) (0x0000D000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define IAVF_VFQF_HLUT_MAX_INDEX 15
+#define IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT 30
+#define IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT)
+#endif /* _IAVF_REGISTER_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h
new file mode 100644
index 0000000..46e3d1f
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_status.h

@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_STATUS_H_
+#define _IAVF_STATUS_H_
+
+/* Error Codes */
+enum iavf_status {
+	IAVF_SUCCESS				= 0,
+	IAVF_ERR_NVM				= -1,
+	IAVF_ERR_NVM_CHECKSUM			= -2,
+	IAVF_ERR_PHY				= -3,
+	IAVF_ERR_CONFIG				= -4,
+	IAVF_ERR_PARAM				= -5,
+	IAVF_ERR_MAC_TYPE			= -6,
+	IAVF_ERR_UNKNOWN_PHY			= -7,
+	IAVF_ERR_LINK_SETUP			= -8,
+	IAVF_ERR_ADAPTER_STOPPED		= -9,
+	IAVF_ERR_INVALID_MAC_ADDR		= -10,
+	IAVF_ERR_DEVICE_NOT_SUPPORTED		= -11,
+	IAVF_ERR_MASTER_REQUESTS_PENDING	= -12,
+	IAVF_ERR_INVALID_LINK_SETTINGS		= -13,
+	IAVF_ERR_AUTONEG_NOT_COMPLETE		= -14,
+	IAVF_ERR_RESET_FAILED			= -15,
+	IAVF_ERR_SWFW_SYNC			= -16,
+	IAVF_ERR_NO_AVAILABLE_VSI		= -17,
+	IAVF_ERR_NO_MEMORY			= -18,
+	IAVF_ERR_BAD_PTR			= -19,
+	IAVF_ERR_RING_FULL			= -20,
+	IAVF_ERR_INVALID_PD_ID			= -21,
+	IAVF_ERR_INVALID_QP_ID			= -22,
+	IAVF_ERR_INVALID_CQ_ID			= -23,
+	IAVF_ERR_INVALID_CEQ_ID			= -24,
+	IAVF_ERR_INVALID_AEQ_ID			= -25,
+	IAVF_ERR_INVALID_SIZE			= -26,
+	IAVF_ERR_INVALID_ARP_INDEX		= -27,
+	IAVF_ERR_INVALID_FPM_FUNC_ID		= -28,
+	IAVF_ERR_QP_INVALID_MSG_SIZE		= -29,
+	IAVF_ERR_QP_TOOMANY_WRS_POSTED		= -30,
+	IAVF_ERR_INVALID_FRAG_COUNT		= -31,
+	IAVF_ERR_QUEUE_EMPTY			= -32,
+	IAVF_ERR_INVALID_ALIGNMENT		= -33,
+	IAVF_ERR_FLUSHED_QUEUE			= -34,
+	IAVF_ERR_INVALID_PUSH_PAGE_INDEX	= -35,
+	IAVF_ERR_INVALID_IMM_DATA_SIZE		= -36,
+	IAVF_ERR_TIMEOUT			= -37,
+	IAVF_ERR_OPCODE_MISMATCH		= -38,
+	IAVF_ERR_CQP_COMPL_ERROR		= -39,
+	IAVF_ERR_INVALID_VF_ID			= -40,
+	IAVF_ERR_INVALID_HMCFN_ID		= -41,
+	IAVF_ERR_BACKING_PAGE_ERROR		= -42,
+	IAVF_ERR_NO_PBLCHUNKS_AVAILABLE		= -43,
+	IAVF_ERR_INVALID_PBLE_INDEX		= -44,
+	IAVF_ERR_INVALID_SD_INDEX		= -45,
+	IAVF_ERR_INVALID_PAGE_DESC_INDEX	= -46,
+	IAVF_ERR_INVALID_SD_TYPE		= -47,
+	IAVF_ERR_MEMCPY_FAILED			= -48,
+	IAVF_ERR_INVALID_HMC_OBJ_INDEX		= -49,
+	IAVF_ERR_INVALID_HMC_OBJ_COUNT		= -50,
+	IAVF_ERR_INVALID_SRQ_ARM_LIMIT		= -51,
+	IAVF_ERR_SRQ_ENABLED			= -52,
+	IAVF_ERR_ADMIN_QUEUE_ERROR		= -53,
+	IAVF_ERR_ADMIN_QUEUE_TIMEOUT		= -54,
+	IAVF_ERR_BUF_TOO_SHORT			= -55,
+	IAVF_ERR_ADMIN_QUEUE_FULL		= -56,
+	IAVF_ERR_ADMIN_QUEUE_NO_WORK		= -57,
+	IAVF_ERR_BAD_IWARP_CQE			= -58,
+	IAVF_ERR_NVM_BLANK_MODE			= -59,
+	IAVF_ERR_NOT_IMPLEMENTED		= -60,
+	IAVF_ERR_PE_DOORBELL_NOT_ENABLED	= -61,
+	IAVF_ERR_DIAG_TEST_FAILED		= -62,
+	IAVF_ERR_NOT_READY			= -63,
+	IAVF_NOT_SUPPORTED			= -64,
+	IAVF_ERR_FIRMWARE_API_VERSION		= -65,
+	IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR	= -66,
+};
+
+#endif /* _IAVF_STATUS_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
similarity index 64%
rename from drivers/net/ethernet/intel/i40evf/i40e_trace.h
rename to drivers/net/ethernet/intel/iavf/iavf_trace.h
index d7a4e68..1058e68 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_trace.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h

@@ -3,34 +3,34 @@
 
 /* Modeled on trace-events-sample.h */
 
-/* The trace subsystem name for i40evf will be "i40evf".
+/* The trace subsystem name for iavf will be "iavf".
  *
- * This file is named i40e_trace.h.
+ * This file is named iavf_trace.h.
  *
  * Since this include file's name is different from the trace
  * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
  * of this file.
  */
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM i40evf
+#define TRACE_SYSTEM iavf
 
 /* See trace-events-sample.h for a detailed description of why this
  * guard clause is different from most normal include files.
  */
-#if !defined(_I40E_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
-#define _I40E_TRACE_H_
+#if !defined(_IAVF_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _IAVF_TRACE_H_
 
 #include <linux/tracepoint.h>
 
 /**
- * i40e_trace() macro enables shared code to refer to trace points
+ * iavf_trace() macro enables shared code to refer to trace points
  * like:
  *
- * trace_i40e{,vf}_example(args...)
+ * trace_iavf{,vf}_example(args...)
  *
  * ... as:
  *
- * i40e_trace(example, args...)
+ * iavf_trace(example, args...)
  *
  * ... to resolve to the PF or VF version of the tracepoint without
  * ifdefs, and to allow tracepoints to be disabled entirely at build
@@ -39,29 +39,29 @@
  * Trace point should always be referred to in the driver via this
  * macro.
  *
- * Similarly, i40e_trace_enabled(trace_name) wraps references to
- * trace_i40e{,vf}_<trace_name>_enabled() functions.
+ * Similarly, iavf_trace_enabled(trace_name) wraps references to
+ * trace_iavf{,vf}_<trace_name>_enabled() functions.
  */
-#define _I40E_TRACE_NAME(trace_name) (trace_ ## i40evf ## _ ## trace_name)
-#define I40E_TRACE_NAME(trace_name) _I40E_TRACE_NAME(trace_name)
+#define _IAVF_TRACE_NAME(trace_name) (trace_ ## iavf ## _ ## trace_name)
+#define IAVF_TRACE_NAME(trace_name) _IAVF_TRACE_NAME(trace_name)
 
-#define i40e_trace(trace_name, args...) I40E_TRACE_NAME(trace_name)(args)
+#define iavf_trace(trace_name, args...) IAVF_TRACE_NAME(trace_name)(args)
 
-#define i40e_trace_enabled(trace_name) I40E_TRACE_NAME(trace_name##_enabled)()
+#define iavf_trace_enabled(trace_name) IAVF_TRACE_NAME(trace_name##_enabled)()
 
 /* Events common to PF and VF. Corresponding versions will be defined
- * for both, named trace_i40e_* and trace_i40evf_*. The i40e_trace()
+ * for both, named trace_iavf_* and trace_iavf_*. The iavf_trace()
  * macro above will select the right trace point name for the driver
  * being built from shared code.
  */
 
 /* Events related to a vsi & ring */
 DECLARE_EVENT_CLASS(
-	i40evf_tx_template,
+	iavf_tx_template,
 
-	TP_PROTO(struct i40e_ring *ring,
-		 struct i40e_tx_desc *desc,
-		 struct i40e_tx_buffer *buf),
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
 
 	TP_ARGS(ring, desc, buf),
 
@@ -93,26 +93,26 @@
 );
 
 DEFINE_EVENT(
-	i40evf_tx_template, i40evf_clean_tx_irq,
-	TP_PROTO(struct i40e_ring *ring,
-		 struct i40e_tx_desc *desc,
-		 struct i40e_tx_buffer *buf),
+	iavf_tx_template, iavf_clean_tx_irq,
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
 
 	TP_ARGS(ring, desc, buf));
 
 DEFINE_EVENT(
-	i40evf_tx_template, i40evf_clean_tx_irq_unmap,
-	TP_PROTO(struct i40e_ring *ring,
-		 struct i40e_tx_desc *desc,
-		 struct i40e_tx_buffer *buf),
+	iavf_tx_template, iavf_clean_tx_irq_unmap,
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
 
 	TP_ARGS(ring, desc, buf));
 
 DECLARE_EVENT_CLASS(
-	i40evf_rx_template,
+	iavf_rx_template,
 
-	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb),
@@ -138,26 +138,26 @@
 );
 
 DEFINE_EVENT(
-	i40evf_rx_template, i40evf_clean_rx_irq,
-	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+	iavf_rx_template, iavf_clean_rx_irq,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb));
 
 DEFINE_EVENT(
-	i40evf_rx_template, i40evf_clean_rx_irq_rx,
-	TP_PROTO(struct i40e_ring *ring,
-		 union i40e_32byte_rx_desc *desc,
+	iavf_rx_template, iavf_clean_rx_irq_rx,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
 		 struct sk_buff *skb),
 
 	TP_ARGS(ring, desc, skb));
 
 DECLARE_EVENT_CLASS(
-	i40evf_xmit_template,
+	iavf_xmit_template,
 
 	TP_PROTO(struct sk_buff *skb,
-		 struct i40e_ring *ring),
+		 struct iavf_ring *ring),
 
 	TP_ARGS(skb, ring),
 
@@ -180,23 +180,23 @@
 );
 
 DEFINE_EVENT(
-	i40evf_xmit_template, i40evf_xmit_frame_ring,
+	iavf_xmit_template, iavf_xmit_frame_ring,
 	TP_PROTO(struct sk_buff *skb,
-		 struct i40e_ring *ring),
+		 struct iavf_ring *ring),
 
 	TP_ARGS(skb, ring));
 
 DEFINE_EVENT(
-	i40evf_xmit_template, i40evf_xmit_frame_ring_drop,
+	iavf_xmit_template, iavf_xmit_frame_ring_drop,
 	TP_PROTO(struct sk_buff *skb,
-		 struct i40e_ring *ring),
+		 struct iavf_ring *ring),
 
 	TP_ARGS(skb, ring));
 
 /* Events unique to the VF. */
 
-#endif /* _I40E_TRACE_H_ */
-/* This must be outside ifdef _I40E_TRACE_H */
+#endif /* _IAVF_TRACE_H_ */
+/* This must be outside ifdef _IAVF_TRACE_H */
 
 /* This trace include file is not located in the .../include/trace
  * with the kernel tracepoint definitions, because we're a loadable
@@ -205,5 +205,5 @@
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
 #undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE i40e_trace
+#define TRACE_INCLUDE_FILE iavf_trace
 #include <trace/define_trace.h>

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
similarity index 71%
rename from drivers/net/ethernet/intel/i40evf/i40e_txrx.c
rename to drivers/net/ethernet/intel/iavf/iavf_txrx.c
index a973071..7a30d5d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c

@@ -2,34 +2,33 @@
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
 #include <linux/prefetch.h>
-#include <net/busy_poll.h>
 
-#include "i40evf.h"
-#include "i40e_trace.h"
-#include "i40e_prototype.h"
+#include "iavf.h"
+#include "iavf_trace.h"
+#include "iavf_prototype.h"
 
 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 				u32 td_tag)
 {
-	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
-			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
-			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
-			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
-			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
+	return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA |
+			   ((u64)td_cmd  << IAVF_TXD_QW1_CMD_SHIFT) |
+			   ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) |
+			   ((u64)size  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			   ((u64)td_tag  << IAVF_TXD_QW1_L2TAG1_SHIFT));
 }
 
-#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+#define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS)
 
 /**
- * i40e_unmap_and_free_tx_resource - Release a Tx buffer
+ * iavf_unmap_and_free_tx_resource - Release a Tx buffer
  * @ring:      the ring that owns the buffer
  * @tx_buffer: the buffer to free
  **/
-static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
-					    struct i40e_tx_buffer *tx_buffer)
+static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
+					    struct iavf_tx_buffer *tx_buffer)
 {
 	if (tx_buffer->skb) {
-		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+		if (tx_buffer->tx_flags & IAVF_TX_FLAGS_FD_SB)
 			kfree(tx_buffer->raw_buf);
 		else
 			dev_kfree_skb_any(tx_buffer->skb);
@@ -52,10 +51,10 @@
 }
 
 /**
- * i40evf_clean_tx_ring - Free any empty Tx buffers
+ * iavf_clean_tx_ring - Free any empty Tx buffers
  * @tx_ring: ring to be cleaned
  **/
-void i40evf_clean_tx_ring(struct i40e_ring *tx_ring)
+void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
 {
 	unsigned long bi_size;
 	u16 i;
@@ -66,9 +65,9 @@
 
 	/* Free all the Tx ring sk_buffs */
 	for (i = 0; i < tx_ring->count; i++)
-		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+		iavf_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
 
-	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
 	memset(tx_ring->tx_bi, 0, bi_size);
 
 	/* Zero out the descriptor ring */
@@ -85,14 +84,14 @@
 }
 
 /**
- * i40evf_free_tx_resources - Free Tx resources per queue
+ * iavf_free_tx_resources - Free Tx resources per queue
  * @tx_ring: Tx descriptor ring for a specific queue
  *
  * Free all transmit software resources
  **/
-void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
+void iavf_free_tx_resources(struct iavf_ring *tx_ring)
 {
-	i40evf_clean_tx_ring(tx_ring);
+	iavf_clean_tx_ring(tx_ring);
 	kfree(tx_ring->tx_bi);
 	tx_ring->tx_bi = NULL;
 
@@ -104,14 +103,14 @@
 }
 
 /**
- * i40evf_get_tx_pending - how many Tx descriptors not processed
+ * iavf_get_tx_pending - how many Tx descriptors not processed
  * @ring: the ring of descriptors
  * @in_sw: is tx_pending being checked in SW or HW
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
+u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
 {
 	u32 head, tail;
 
@@ -126,15 +125,15 @@
 }
 
 /**
- * i40evf_detect_recover_hung - Function to detect and recover hung_queues
+ * iavf_detect_recover_hung - Function to detect and recover hung_queues
  * @vsi:  pointer to vsi struct with tx queues
  *
  * VSI has netdev and netdev has TX queues. This function is to check each of
  * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
  **/
-void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
+void iavf_detect_recover_hung(struct iavf_vsi *vsi)
 {
-	struct i40e_ring *tx_ring = NULL;
+	struct iavf_ring *tx_ring = NULL;
 	struct net_device *netdev;
 	unsigned int i;
 	int packets;
@@ -142,7 +141,7 @@
 	if (!vsi)
 		return;
 
-	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+	if (test_bit(__IAVF_VSI_DOWN, vsi->state))
 		return;
 
 	netdev = vsi->netdev;
@@ -164,16 +163,16 @@
 			 */
 			packets = tx_ring->stats.packets & INT_MAX;
 			if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
-				i40evf_force_wb(vsi, tx_ring->q_vector);
+				iavf_force_wb(vsi, tx_ring->q_vector);
 				continue;
 			}
 
 			/* Memory barrier between read of packet count and call
-			 * to i40evf_get_tx_pending()
+			 * to iavf_get_tx_pending()
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			  i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
+			  iavf_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
@@ -181,28 +180,28 @@
 #define WB_STRIDE 4
 
 /**
- * i40e_clean_tx_irq - Reclaim resources after transmit completes
+ * iavf_clean_tx_irq - Reclaim resources after transmit completes
  * @vsi: the VSI we care about
  * @tx_ring: Tx ring to clean
  * @napi_budget: Used to determine if we are in netpoll
  *
  * Returns true if there's any budget left (e.g. the clean is finished)
  **/
-static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
-			      struct i40e_ring *tx_ring, int napi_budget)
+static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
+			      struct iavf_ring *tx_ring, int napi_budget)
 {
-	u16 i = tx_ring->next_to_clean;
-	struct i40e_tx_buffer *tx_buf;
-	struct i40e_tx_desc *tx_desc;
+	int i = tx_ring->next_to_clean;
+	struct iavf_tx_buffer *tx_buf;
+	struct iavf_tx_desc *tx_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
 	unsigned int budget = vsi->work_limit;
 
 	tx_buf = &tx_ring->tx_bi[i];
-	tx_desc = I40E_TX_DESC(tx_ring, i);
+	tx_desc = IAVF_TX_DESC(tx_ring, i);
 	i -= tx_ring->count;
 
 	do {
-		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
+		struct iavf_tx_desc *eop_desc = tx_buf->next_to_watch;
 
 		/* if next_to_watch is not set then there is no work pending */
 		if (!eop_desc)
@@ -211,10 +210,10 @@
 		/* prevent any other reads prior to eop_desc */
 		smp_rmb();
 
-		i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+		iavf_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
 		/* if the descriptor isn't done, no work yet to do */
 		if (!(eop_desc->cmd_type_offset_bsz &
-		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
+		      cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE)))
 			break;
 
 		/* clear next_to_watch to prevent false hangs */
@@ -239,7 +238,7 @@
 
 		/* unmap remaining buffers */
 		while (tx_desc != eop_desc) {
-			i40e_trace(clean_tx_irq_unmap,
+			iavf_trace(clean_tx_irq_unmap,
 				   tx_ring, tx_desc, tx_buf);
 
 			tx_buf++;
@@ -248,7 +247,7 @@
 			if (unlikely(!i)) {
 				i -= tx_ring->count;
 				tx_buf = tx_ring->tx_bi;
-				tx_desc = I40E_TX_DESC(tx_ring, 0);
+				tx_desc = IAVF_TX_DESC(tx_ring, 0);
 			}
 
 			/* unmap any remaining paged data */
@@ -268,7 +267,7 @@
 		if (unlikely(!i)) {
 			i -= tx_ring->count;
 			tx_buf = tx_ring->tx_bi;
-			tx_desc = I40E_TX_DESC(tx_ring, 0);
+			tx_desc = IAVF_TX_DESC(tx_ring, 0);
 		}
 
 		prefetch(tx_desc);
@@ -286,18 +285,18 @@
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
-	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+	if (tx_ring->flags & IAVF_TXR_FLAGS_WB_ON_ITR) {
 		/* check to see if there are < 4 descriptors
 		 * waiting to be written back, then kick the hardware to force
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		unsigned int j = i40evf_get_tx_pending(tx_ring, false);
+		unsigned int j = iavf_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / WB_STRIDE) == 0) && (j > 0) &&
-		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
-		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+		    !test_bit(__IAVF_VSI_DOWN, vsi->state) &&
+		    (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count))
 			tx_ring->arm_wb = true;
 	}
 
@@ -307,14 +306,14 @@
 
 #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
-		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		     (IAVF_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
 		 * sees the new next_to_clean.
 		 */
 		smp_mb();
 		if (__netif_subqueue_stopped(tx_ring->netdev,
 					     tx_ring->queue_index) &&
-		   !test_bit(__I40E_VSI_DOWN, vsi->state)) {
+		   !test_bit(__IAVF_VSI_DOWN, vsi->state)) {
 			netif_wake_subqueue(tx_ring->netdev,
 					    tx_ring->queue_index);
 			++tx_ring->tx_stats.restart_queue;
@@ -325,75 +324,75 @@
 }
 
 /**
- * i40evf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
+ * iavf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
  * @vsi: the VSI we care about
  * @q_vector: the vector on which to enable writeback
  *
  **/
-static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
-				  struct i40e_q_vector *q_vector)
+static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi,
+				  struct iavf_q_vector *q_vector)
 {
 	u16 flags = q_vector->tx.ring[0].flags;
 	u32 val;
 
-	if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
+	if (!(flags & IAVF_TXR_FLAGS_WB_ON_ITR))
 		return;
 
 	if (q_vector->arm_wb_state)
 		return;
 
-	val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK |
-	      I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */
+	val = IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK |
+	      IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */
 
 	wr32(&vsi->back->hw,
-	     I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), val);
+	     IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), val);
 	q_vector->arm_wb_state = true;
 }
 
 /**
- * i40evf_force_wb - Issue SW Interrupt so HW does a wb
+ * iavf_force_wb - Issue SW Interrupt so HW does a wb
  * @vsi: the VSI we care about
  * @q_vector: the vector  on which to force writeback
  *
  **/
-void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
 {
-	u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-		  I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
-		  I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
-		  I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
+	u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+		  IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
+		  IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+		  IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
 		  /* allow 00 to be written to the index */;
 
 	wr32(&vsi->back->hw,
-	     I40E_VFINT_DYN_CTLN1(q_vector->reg_idx),
+	     IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
 	     val);
 }
 
-static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
-					struct i40e_ring_container *rc)
+static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector,
+					struct iavf_ring_container *rc)
 {
 	return &q_vector->rx == rc;
 }
 
-static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector)
 {
 	unsigned int divisor;
 
 	switch (q_vector->adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
-		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+	case IAVF_LINK_SPEED_40GB:
+		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024;
 		break;
-	case I40E_LINK_SPEED_25GB:
-	case I40E_LINK_SPEED_20GB:
-		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+	case IAVF_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_20GB:
+		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512;
 		break;
 	default:
-	case I40E_LINK_SPEED_10GB:
-		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+	case IAVF_LINK_SPEED_10GB:
+		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256;
 		break;
-	case I40E_LINK_SPEED_1GB:
-	case I40E_LINK_SPEED_100MB:
-		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+	case IAVF_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_100MB:
+		divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32;
 		break;
 	}
 
@@ -401,7 +400,7 @@
 }
 
 /**
- * i40e_update_itr - update the dynamic ITR value based on statistics
+ * iavf_update_itr - update the dynamic ITR value based on statistics
  * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
@@ -413,8 +412,8 @@
  * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static void i40e_update_itr(struct i40e_q_vector *q_vector,
-			    struct i40e_ring_container *rc)
+static void iavf_update_itr(struct iavf_q_vector *q_vector,
+			    struct iavf_ring_container *rc)
 {
 	unsigned int avg_wire_size, packets, bytes, itr;
 	unsigned long next_update = jiffies;
@@ -428,9 +427,9 @@
 	/* For Rx we want to push the delay up and default to low latency.
 	 * for Tx we want to pull the delay down and default to high latency.
 	 */
-	itr = i40e_container_is_rx(q_vector, rc) ?
-	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
-	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+	itr = iavf_container_is_rx(q_vector, rc) ?
+	      IAVF_ITR_ADAPTIVE_MIN_USECS | IAVF_ITR_ADAPTIVE_LATENCY :
+	      IAVF_ITR_ADAPTIVE_MAX_USECS | IAVF_ITR_ADAPTIVE_LATENCY;
 
 	/* If we didn't update within up to 1 - 2 jiffies we can assume
 	 * that either packets are coming in so slow there hasn't been
@@ -454,15 +453,15 @@
 	packets = rc->total_packets;
 	bytes = rc->total_bytes;
 
-	if (i40e_container_is_rx(q_vector, rc)) {
+	if (iavf_container_is_rx(q_vector, rc)) {
 		/* If Rx there are 1 to 4 packets and bytes are less than
 		 * 9000 assume insufficient data to use bulk rate limiting
 		 * approach unless Tx is already in bulk rate limiting. We
 		 * are likely latency driven.
 		 */
 		if (packets && packets < 4 && bytes < 9000 &&
-		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
-			itr = I40E_ITR_ADAPTIVE_LATENCY;
+		    (q_vector->tx.target_itr & IAVF_ITR_ADAPTIVE_LATENCY)) {
+			itr = IAVF_ITR_ADAPTIVE_LATENCY;
 			goto adjust_by_size;
 		}
 	} else if (packets < 4) {
@@ -471,15 +470,15 @@
 		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
 		 * that the Rx can relax.
 		 */
-		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
-		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
-		     I40E_ITR_ADAPTIVE_MAX_USECS)
+		if (rc->target_itr == IAVF_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & IAVF_ITR_MASK) ==
+		     IAVF_ITR_ADAPTIVE_MAX_USECS)
 			goto clear_counts;
 	} else if (packets > 32) {
 		/* If we have processed over 32 packets in a single interrupt
 		 * for Tx assume we need to switch over to "bulk" mode.
 		 */
-		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+		rc->target_itr &= ~IAVF_ITR_ADAPTIVE_LATENCY;
 	}
 
 	/* We have no packets to actually measure against. This means
@@ -491,17 +490,17 @@
 	 * fixed amount.
 	 */
 	if (packets < 56) {
-		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
-		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
-			itr &= I40E_ITR_ADAPTIVE_LATENCY;
-			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		itr = rc->target_itr + IAVF_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= IAVF_ITR_ADAPTIVE_LATENCY;
+			itr += IAVF_ITR_ADAPTIVE_MAX_USECS;
 		}
 		goto clear_counts;
 	}
 
 	if (packets <= 256) {
 		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
-		itr &= I40E_ITR_MASK;
+		itr &= IAVF_ITR_MASK;
 
 		/* Between 56 and 112 is our "goldilocks" zone where we are
 		 * working out "just right". Just report that our current
@@ -516,9 +515,9 @@
 		 * in half per interrupt.
 		 */
 		itr /= 2;
-		itr &= I40E_ITR_MASK;
-		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
-			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+		itr &= IAVF_ITR_MASK;
+		if (itr < IAVF_ITR_ADAPTIVE_MIN_USECS)
+			itr = IAVF_ITR_ADAPTIVE_MIN_USECS;
 
 		goto clear_counts;
 	}
@@ -529,7 +528,7 @@
 	 * though for smaller packet sizes there isn't much we can do as
 	 * NAPI polling will likely be kicking in sooner rather than later.
 	 */
-	itr = I40E_ITR_ADAPTIVE_BULK;
+	itr = IAVF_ITR_ADAPTIVE_BULK;
 
 adjust_by_size:
 	/* If packet counts are 256 or greater we can assume we have a gross
@@ -577,7 +576,7 @@
 	/* If we are in low latency mode halve our delay which doubles the
 	 * rate to somewhere between 100K to 16K ints/sec
 	 */
-	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+	if (itr & IAVF_ITR_ADAPTIVE_LATENCY)
 		avg_wire_size /= 2;
 
 	/* Resultant value is 256 times larger than it needs to be. This
@@ -587,12 +586,12 @@
 	 * Use addition as we have already recorded the new latency flag
 	 * for the ITR value.
 	 */
-	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
-	       I40E_ITR_ADAPTIVE_MIN_INC;
+	itr += DIV_ROUND_UP(avg_wire_size, iavf_itr_divisor(q_vector)) *
+	       IAVF_ITR_ADAPTIVE_MIN_INC;
 
-	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
-		itr &= I40E_ITR_ADAPTIVE_LATENCY;
-		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+	if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= IAVF_ITR_ADAPTIVE_LATENCY;
+		itr += IAVF_ITR_ADAPTIVE_MAX_USECS;
 	}
 
 clear_counts:
@@ -607,12 +606,12 @@
 }
 
 /**
- * i40evf_setup_tx_descriptors - Allocate the Tx descriptors
+ * iavf_setup_tx_descriptors - Allocate the Tx descriptors
  * @tx_ring: the tx ring to set up
  *
  * Return 0 on success, negative on error
  **/
-int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
+int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
 	int bi_size;
@@ -622,13 +621,13 @@
 
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(tx_ring->tx_bi);
-	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
 	if (!tx_ring->tx_bi)
 		goto err;
 
 	/* round up to nearest 4K */
-	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
+	tx_ring->size = tx_ring->count * sizeof(struct iavf_tx_desc);
 	tx_ring->size = ALIGN(tx_ring->size, 4096);
 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 					   &tx_ring->dma, GFP_KERNEL);
@@ -650,10 +649,10 @@
 }
 
 /**
- * i40evf_clean_rx_ring - Free Rx buffers
+ * iavf_clean_rx_ring - Free Rx buffers
  * @rx_ring: ring to be cleaned
  **/
-void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
+void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
 {
 	unsigned long bi_size;
 	u16 i;
@@ -669,7 +668,7 @@
 
 	/* Free all the Rx ring sk_buffs */
 	for (i = 0; i < rx_ring->count; i++) {
-		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+		struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
 
 		if (!rx_bi->page)
 			continue;
@@ -685,9 +684,9 @@
 
 		/* free resources associated with mapping */
 		dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
-				     i40e_rx_pg_size(rx_ring),
+				     iavf_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE,
-				     I40E_RX_DMA_ATTR);
+				     IAVF_RX_DMA_ATTR);
 
 		__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
 
@@ -695,7 +694,7 @@
 		rx_bi->page_offset = 0;
 	}
 
-	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
+	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
 	memset(rx_ring->rx_bi, 0, bi_size);
 
 	/* Zero out the descriptor ring */
@@ -707,14 +706,14 @@
 }
 
 /**
- * i40evf_free_rx_resources - Free Rx resources
+ * iavf_free_rx_resources - Free Rx resources
  * @rx_ring: ring to clean the resources from
  *
  * Free all receive software resources
  **/
-void i40evf_free_rx_resources(struct i40e_ring *rx_ring)
+void iavf_free_rx_resources(struct iavf_ring *rx_ring)
 {
-	i40evf_clean_rx_ring(rx_ring);
+	iavf_clean_rx_ring(rx_ring);
 	kfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
 
@@ -726,19 +725,19 @@
 }
 
 /**
- * i40evf_setup_rx_descriptors - Allocate Rx descriptors
+ * iavf_setup_rx_descriptors - Allocate Rx descriptors
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
  *
  * Returns 0 on success, negative on failure
  **/
-int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring)
+int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
 	int bi_size;
 
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(rx_ring->rx_bi);
-	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
+	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
 	if (!rx_ring->rx_bi)
 		goto err;
@@ -746,7 +745,7 @@
 	u64_stats_init(&rx_ring->syncp);
 
 	/* Round up to nearest 4K */
-	rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+	rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc);
 	rx_ring->size = ALIGN(rx_ring->size, 4096);
 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
 					   &rx_ring->dma, GFP_KERNEL);
@@ -769,11 +768,11 @@
 }
 
 /**
- * i40e_release_rx_desc - Store the new tail and head values
+ * iavf_release_rx_desc - Store the new tail and head values
  * @rx_ring: ring to bump
  * @val: new head index
  **/
-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val)
 {
 	rx_ring->next_to_use = val;
 
@@ -790,26 +789,26 @@
 }
 
 /**
- * i40e_rx_offset - Return expected offset into page to access data
+ * iavf_rx_offset - Return expected offset into page to access data
  * @rx_ring: Ring we are requesting offset of
  *
  * Returns the offset value for ring into the data buffer.
  */
-static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
+static inline unsigned int iavf_rx_offset(struct iavf_ring *rx_ring)
 {
-	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
+	return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0;
 }
 
 /**
- * i40e_alloc_mapped_page - recycle or make a new page
+ * iavf_alloc_mapped_page - recycle or make a new page
  * @rx_ring: ring to use
  * @bi: rx_buffer struct to modify
  *
  * Returns true if the page was successfully allocated or
  * reused.
  **/
-static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
-				   struct i40e_rx_buffer *bi)
+static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring,
+				   struct iavf_rx_buffer *bi)
 {
 	struct page *page = bi->page;
 	dma_addr_t dma;
@@ -821,7 +820,7 @@
 	}
 
 	/* alloc new page for storage */
-	page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
+	page = dev_alloc_pages(iavf_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
@@ -829,22 +828,22 @@
 
 	/* map page for use */
 	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
-				 i40e_rx_pg_size(rx_ring),
+				 iavf_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE,
-				 I40E_RX_DMA_ATTR);
+				 IAVF_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_pages(page, i40e_rx_pg_order(rx_ring));
+		__free_pages(page, iavf_rx_pg_order(rx_ring));
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = i40e_rx_offset(rx_ring);
+	bi->page_offset = iavf_rx_offset(rx_ring);
 
 	/* initialize pagecnt_bias to 1 representing we fully own page */
 	bi->pagecnt_bias = 1;
@@ -853,15 +852,15 @@
 }
 
 /**
- * i40e_receive_skb - Send a completed packet up the stack
+ * iavf_receive_skb - Send a completed packet up the stack
  * @rx_ring:  rx ring in play
  * @skb: packet to send up
  * @vlan_tag: vlan tag for packet
  **/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
+static void iavf_receive_skb(struct iavf_ring *rx_ring,
 			     struct sk_buff *skb, u16 vlan_tag)
 {
-	struct i40e_q_vector *q_vector = rx_ring->q_vector;
+	struct iavf_q_vector *q_vector = rx_ring->q_vector;
 
 	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    (vlan_tag & VLAN_VID_MASK))
@@ -871,27 +870,27 @@
 }
 
 /**
- * i40evf_alloc_rx_buffers - Replace used receive buffers
+ * iavf_alloc_rx_buffers - Replace used receive buffers
  * @rx_ring: ring to place buffers on
  * @cleaned_count: number of buffers to replace
  *
  * Returns false if all allocations were successful, true if any fail
  **/
-bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
 {
 	u16 ntu = rx_ring->next_to_use;
-	union i40e_rx_desc *rx_desc;
-	struct i40e_rx_buffer *bi;
+	union iavf_rx_desc *rx_desc;
+	struct iavf_rx_buffer *bi;
 
 	/* do nothing if no valid netdev defined */
 	if (!rx_ring->netdev || !cleaned_count)
 		return false;
 
-	rx_desc = I40E_RX_DESC(rx_ring, ntu);
+	rx_desc = IAVF_RX_DESC(rx_ring, ntu);
 	bi = &rx_ring->rx_bi[ntu];
 
 	do {
-		if (!i40e_alloc_mapped_page(rx_ring, bi))
+		if (!iavf_alloc_mapped_page(rx_ring, bi))
 			goto no_buffers;
 
 		/* sync the buffer for use by the device */
@@ -909,7 +908,7 @@
 		bi++;
 		ntu++;
 		if (unlikely(ntu == rx_ring->count)) {
-			rx_desc = I40E_RX_DESC(rx_ring, 0);
+			rx_desc = IAVF_RX_DESC(rx_ring, 0);
 			bi = rx_ring->rx_bi;
 			ntu = 0;
 		}
@@ -921,13 +920,13 @@
 	} while (cleaned_count);
 
 	if (rx_ring->next_to_use != ntu)
-		i40e_release_rx_desc(rx_ring, ntu);
+		iavf_release_rx_desc(rx_ring, ntu);
 
 	return false;
 
 no_buffers:
 	if (rx_ring->next_to_use != ntu)
-		i40e_release_rx_desc(rx_ring, ntu);
+		iavf_release_rx_desc(rx_ring, ntu);
 
 	/* make sure to come back via polling to try again after
 	 * allocation failure
@@ -936,27 +935,27 @@
 }
 
 /**
- * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
+ * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
  * @vsi: the VSI we care about
  * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
  **/
-static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
+static inline void iavf_rx_checksum(struct iavf_vsi *vsi,
 				    struct sk_buff *skb,
-				    union i40e_rx_desc *rx_desc)
+				    union iavf_rx_desc *rx_desc)
 {
-	struct i40e_rx_ptype_decoded decoded;
+	struct iavf_rx_ptype_decoded decoded;
 	u32 rx_error, rx_status;
 	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
-	rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
-		   I40E_RXD_QW1_ERROR_SHIFT;
-	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
-		    I40E_RXD_QW1_STATUS_SHIFT;
+	ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT;
+	rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >>
+		   IAVF_RXD_QW1_ERROR_SHIFT;
+	rx_status = (qword & IAVF_RXD_QW1_STATUS_MASK) >>
+		    IAVF_RXD_QW1_STATUS_SHIFT;
 	decoded = decode_rx_desc_ptype(ptype);
 
 	skb->ip_summed = CHECKSUM_NONE;
@@ -968,45 +967,45 @@
 		return;
 
 	/* did the hardware decode the packet and checksum? */
-	if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+	if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
 		return;
 
 	/* both known and outer_ip must be set for the below code to work */
 	if (!(decoded.known && decoded.outer_ip))
 		return;
 
-	ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
+	ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6);
 
 	if (ipv4 &&
-	    (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
-			 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+	    (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
+			 BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
 		goto checksum_fail;
 
 	/* likely incorrect csum if alternate IP extension headers found */
 	if (ipv6 &&
-	    rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+	    rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
 		/* don't increment checksum err here, non-fatal err */
 		return;
 
 	/* there was some L4 error, count error and punt packet to the stack */
-	if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
+	if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT))
 		goto checksum_fail;
 
 	/* handle packets that were not able to be checksummed due
 	 * to arrival speed, in this case the stack can compute
 	 * the csum.
 	 */
-	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
+	if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
 	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
 	switch (decoded.inner_prot) {
-	case I40E_RX_PTYPE_INNER_PROT_TCP:
-	case I40E_RX_PTYPE_INNER_PROT_UDP:
-	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+	case IAVF_RX_PTYPE_INNER_PROT_TCP:
+	case IAVF_RX_PTYPE_INNER_PROT_UDP:
+	case IAVF_RX_PTYPE_INNER_PROT_SCTP:
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		/* fall though */
 	default:
@@ -1020,56 +1019,56 @@
 }
 
 /**
- * i40e_ptype_to_htype - get a hash type
+ * iavf_ptype_to_htype - get a hash type
  * @ptype: the ptype value from the descriptor
  *
  * Returns a hash type to be used by skb_set_hash
  **/
-static inline int i40e_ptype_to_htype(u8 ptype)
+static inline int iavf_ptype_to_htype(u8 ptype)
 {
-	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+	struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
 
 	if (!decoded.known)
 		return PKT_HASH_TYPE_NONE;
 
-	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
-	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+	if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
+	    decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4)
 		return PKT_HASH_TYPE_L4;
-	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
-		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+	else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
+		 decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3)
 		return PKT_HASH_TYPE_L3;
 	else
 		return PKT_HASH_TYPE_L2;
 }
 
 /**
- * i40e_rx_hash - set the hash value in the skb
+ * iavf_rx_hash - set the hash value in the skb
  * @ring: descriptor ring
  * @rx_desc: specific descriptor
  * @skb: skb currently being received and modified
  * @rx_ptype: Rx packet type
  **/
-static inline void i40e_rx_hash(struct i40e_ring *ring,
-				union i40e_rx_desc *rx_desc,
+static inline void iavf_rx_hash(struct iavf_ring *ring,
+				union iavf_rx_desc *rx_desc,
 				struct sk_buff *skb,
 				u8 rx_ptype)
 {
 	u32 hash;
 	const __le64 rss_mask =
-		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
-			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
+		cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
+			    IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
 
 	if (ring->netdev->features & NETIF_F_RXHASH)
 		return;
 
 	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
 		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
-		skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
+		skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype));
 	}
 }
 
 /**
- * i40evf_process_skb_fields - Populate skb header fields from Rx descriptor
+ * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
  * @rx_ring: rx descriptor ring packet is being transacted on
  * @rx_desc: pointer to the EOP Rx descriptor
  * @skb: pointer to current skb being populated
@@ -1080,13 +1079,13 @@
  * other fields within the skb.
  **/
 static inline
-void i40evf_process_skb_fields(struct i40e_ring *rx_ring,
-			       union i40e_rx_desc *rx_desc, struct sk_buff *skb,
-			       u8 rx_ptype)
+void iavf_process_skb_fields(struct iavf_ring *rx_ring,
+			     union iavf_rx_desc *rx_desc, struct sk_buff *skb,
+			     u8 rx_ptype)
 {
-	i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+	iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
 
-	i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
+	iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
 
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
@@ -1095,7 +1094,7 @@
 }
 
 /**
- * i40e_cleanup_headers - Correct empty headers
+ * iavf_cleanup_headers - Correct empty headers
  * @rx_ring: rx descriptor ring packet is being transacted on
  * @skb: pointer to current skb being fixed
  *
@@ -1107,7 +1106,7 @@
  *
  * Returns true if an error was encountered and skb was freed.
  **/
-static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
+static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb)
 {
 	/* if eth_skb_pad returns an error the skb was freed */
 	if (eth_skb_pad(skb))
@@ -1117,16 +1116,16 @@
 }
 
 /**
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * iavf_reuse_rx_page - page flip buffer and store it back on the ring
  * @rx_ring: rx descriptor ring to store buffers on
  * @old_buff: donor buffer to have page reused
  *
  * Synchronizes page for reuse by the adapter
  **/
-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
-			       struct i40e_rx_buffer *old_buff)
+static void iavf_reuse_rx_page(struct iavf_ring *rx_ring,
+			       struct iavf_rx_buffer *old_buff)
 {
-	struct i40e_rx_buffer *new_buff;
+	struct iavf_rx_buffer *new_buff;
 	u16 nta = rx_ring->next_to_alloc;
 
 	new_buff = &rx_ring->rx_bi[nta];
@@ -1143,20 +1142,20 @@
 }
 
 /**
- * i40e_page_is_reusable - check if any reuse is possible
+ * iavf_page_is_reusable - check if any reuse is possible
  * @page: page struct to check
  *
  * A page is not reusable if it was allocated under low memory
  * conditions, or it's not in the same NUMA node as this CPU.
  */
-static inline bool i40e_page_is_reusable(struct page *page)
+static inline bool iavf_page_is_reusable(struct page *page)
 {
 	return (page_to_nid(page) == numa_mem_id()) &&
 		!page_is_pfmemalloc(page);
 }
 
 /**
- * i40e_can_reuse_rx_page - Determine if this page can be reused by
+ * iavf_can_reuse_rx_page - Determine if this page can be reused by
  * the adapter for another receive
  *
  * @rx_buffer: buffer containing the page
@@ -1182,13 +1181,13 @@
  *
  * In either case, if the page is reusable its refcount is increased.
  **/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
+static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer)
 {
 	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
 	struct page *page = rx_buffer->page;
 
 	/* Is any reuse possible? */
-	if (unlikely(!i40e_page_is_reusable(page)))
+	if (unlikely(!iavf_page_is_reusable(page)))
 		return false;
 
 #if (PAGE_SIZE < 8192)
@@ -1196,9 +1195,9 @@
 	if (unlikely((page_count(page) - pagecnt_bias) > 1))
 		return false;
 #else
-#define I40E_LAST_OFFSET \
-	(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
-	if (rx_buffer->page_offset > I40E_LAST_OFFSET)
+#define IAVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048)
+	if (rx_buffer->page_offset > IAVF_LAST_OFFSET)
 		return false;
 #endif
 
@@ -1215,7 +1214,7 @@
 }
 
 /**
- * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
  * @skb: sk_buff to place the data into
@@ -1226,17 +1225,20 @@
  *
  * The function will then update the page offset.
  **/
-static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
-			     struct i40e_rx_buffer *rx_buffer,
+static void iavf_add_rx_frag(struct iavf_ring *rx_ring,
+			     struct iavf_rx_buffer *rx_buffer,
 			     struct sk_buff *skb,
 			     unsigned int size)
 {
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
+	unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring));
 #endif
 
+	if (!size)
+		return;
+
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
 			rx_buffer->page_offset, size, truesize);
 
@@ -1249,17 +1251,20 @@
 }
 
 /**
- * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
+ * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use
  * @rx_ring: rx descriptor ring to transact packets on
  * @size: size of buffer to add to skb
  *
  * This function will pull an Rx buffer from the ring and synchronize it
  * for use by the CPU.
  */
-static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
+static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
 						 const unsigned int size)
 {
-	struct i40e_rx_buffer *rx_buffer;
+	struct iavf_rx_buffer *rx_buffer;
+
+	if (!size)
+		return NULL;
 
 	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
 	prefetchw(rx_buffer->page);
@@ -1278,7 +1283,7 @@
 }
 
 /**
- * i40e_construct_skb - Allocate skb and populate it
+ * iavf_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: rx buffer to pull data from
  * @size: size of buffer to add to skb
@@ -1287,20 +1292,23 @@
  * data from the current receive descriptor, taking care to set up the
  * skb correctly.
  */
-static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
-					  struct i40e_rx_buffer *rx_buffer,
+static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
+					  struct iavf_rx_buffer *rx_buffer,
 					  unsigned int size)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	void *va;
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
 #else
 	unsigned int truesize = SKB_DATA_ALIGN(size);
 #endif
 	unsigned int headlen;
 	struct sk_buff *skb;
 
+	if (!rx_buffer)
+		return NULL;
 	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 	prefetch(va);
 #if L1_CACHE_BYTES < 128
 	prefetch(va + L1_CACHE_BYTES);
@@ -1308,15 +1316,15 @@
 
 	/* allocate a skb to store the frags */
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-			       I40E_RX_HDR_SIZE,
+			       IAVF_RX_HDR_SIZE,
 			       GFP_ATOMIC | __GFP_NOWARN);
 	if (unlikely(!skb))
 		return NULL;
 
 	/* Determine available headroom for copy */
 	headlen = size;
-	if (headlen > I40E_RX_HDR_SIZE)
-		headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+	if (headlen > IAVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
@@ -1343,7 +1351,7 @@
 }
 
 /**
- * i40e_build_skb - Build skb around an existing buffer
+ * iavf_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buffer: Rx buffer to pull data from
  * @size: size of buffer to add to skb
@@ -1351,31 +1359,34 @@
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
-static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer *rx_buffer,
+static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
+				      struct iavf_rx_buffer *rx_buffer,
 				      unsigned int size)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	void *va;
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
 #else
 	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
+				SKB_DATA_ALIGN(IAVF_SKB_PAD + size);
 #endif
 	struct sk_buff *skb;
 
+	if (!rx_buffer)
+		return NULL;
 	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 	prefetch(va);
 #if L1_CACHE_BYTES < 128
 	prefetch(va + L1_CACHE_BYTES);
 #endif
 	/* build an skb around the page buffer */
-	skb = build_skb(va - I40E_SKB_PAD, truesize);
+	skb = build_skb(va - IAVF_SKB_PAD, truesize);
 	if (unlikely(!skb))
 		return NULL;
 
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, I40E_SKB_PAD);
+	skb_reserve(skb, IAVF_SKB_PAD);
 	__skb_put(skb, size);
 
 	/* buffer is used by skb, update page_offset */
@@ -1389,25 +1400,28 @@
 }
 
 /**
- * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
+ * iavf_put_rx_buffer - Clean up used buffer and either recycle or free
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
  * either recycle the buffer or unmap it and free the associated resources.
  */
-static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
-			       struct i40e_rx_buffer *rx_buffer)
+static void iavf_put_rx_buffer(struct iavf_ring *rx_ring,
+			       struct iavf_rx_buffer *rx_buffer)
 {
-	if (i40e_can_reuse_rx_page(rx_buffer)) {
+	if (!rx_buffer)
+		return;
+
+	if (iavf_can_reuse_rx_page(rx_buffer)) {
 		/* hand second half of page back to the ring */
-		i40e_reuse_rx_page(rx_ring, rx_buffer);
+		iavf_reuse_rx_page(rx_ring, rx_buffer);
 		rx_ring->rx_stats.page_reuse_count++;
 	} else {
 		/* we are not reusing the buffer so unmap it */
 		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     i40e_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+				     iavf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buffer->page,
 					rx_buffer->pagecnt_bias);
 	}
@@ -1417,7 +1431,7 @@
 }
 
 /**
- * i40e_is_non_eop - process handling of non-EOP buffers
+ * iavf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
  * @skb: Current socket buffer containing buffer in progress
@@ -1427,8 +1441,8 @@
  * sk_buff in the next buffer to be chained and return true indicating
  * that this is in fact a non-EOP buffer.
  **/
-static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
-			    union i40e_rx_desc *rx_desc,
+static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
+			    union iavf_rx_desc *rx_desc,
 			    struct sk_buff *skb)
 {
 	u32 ntc = rx_ring->next_to_clean + 1;
@@ -1437,11 +1451,11 @@
 	ntc = (ntc < rx_ring->count) ? ntc : 0;
 	rx_ring->next_to_clean = ntc;
 
-	prefetch(I40E_RX_DESC(rx_ring, ntc));
+	prefetch(IAVF_RX_DESC(rx_ring, ntc));
 
 	/* if we are the last buffer then there is nothing else to do */
-#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
-	if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF)))
+#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
+	if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
 		return false;
 
 	rx_ring->rx_stats.non_eop_descs++;
@@ -1450,7 +1464,7 @@
 }
 
 /**
- * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
  *
@@ -1461,29 +1475,29 @@
  *
  * Returns amount of work completed
  **/
-static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
+static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	struct sk_buff *skb = rx_ring->skb;
-	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+	u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring);
 	bool failure = false;
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
-		struct i40e_rx_buffer *rx_buffer;
-		union i40e_rx_desc *rx_desc;
+		struct iavf_rx_buffer *rx_buffer;
+		union iavf_rx_desc *rx_desc;
 		unsigned int size;
 		u16 vlan_tag;
 		u8 rx_ptype;
 		u64 qword;
 
 		/* return some buffers to hardware, one at a time is too slow */
-		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
+		if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
 			failure = failure ||
-				  i40evf_alloc_rx_buffers(rx_ring, cleaned_count);
+				  iavf_alloc_rx_buffers(rx_ring, cleaned_count);
 			cleaned_count = 0;
 		}
 
-		rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
 		/* status_error_len will always be zero for unused descriptors
 		 * because it's cleared in cleanup, and overlaps with hdr_addr
@@ -1497,48 +1511,50 @@
 		 * verified the descriptor has been written back.
 		 */
 		dma_rmb();
-
-		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
-		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
-		if (!size)
+#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
+		if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
 			break;
 
-		i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
-		rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+		size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
+
+		iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
+		rx_buffer = iavf_get_rx_buffer(rx_ring, size);
 
 		/* retrieve a buffer from the ring */
 		if (skb)
-			i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
+			iavf_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		else if (ring_uses_build_skb(rx_ring))
-			skb = i40e_build_skb(rx_ring, rx_buffer, size);
+			skb = iavf_build_skb(rx_ring, rx_buffer, size);
 		else
-			skb = i40e_construct_skb(rx_ring, rx_buffer, size);
+			skb = iavf_construct_skb(rx_ring, rx_buffer, size);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_buff_failed++;
-			rx_buffer->pagecnt_bias++;
+			if (rx_buffer)
+				rx_buffer->pagecnt_bias++;
 			break;
 		}
 
-		i40e_put_rx_buffer(rx_ring, rx_buffer);
+		iavf_put_rx_buffer(rx_ring, rx_buffer);
 		cleaned_count++;
 
-		if (i40e_is_non_eop(rx_ring, rx_desc, skb))
+		if (iavf_is_non_eop(rx_ring, rx_desc, skb))
 			continue;
 
 		/* ERR_MASK will only have valid bits if EOP set, and
 		 * what we are doing here is actually checking
-		 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+		 * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
 		 * the error field
 		 */
-		if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
+		if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
 			dev_kfree_skb_any(skb);
 			skb = NULL;
 			continue;
 		}
 
-		if (i40e_cleanup_headers(rx_ring, skb)) {
+		if (iavf_cleanup_headers(rx_ring, skb)) {
 			skb = NULL;
 			continue;
 		}
@@ -1547,18 +1563,18 @@
 		total_rx_bytes += skb->len;
 
 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-			   I40E_RXD_QW1_PTYPE_SHIFT;
+		rx_ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >>
+			   IAVF_RXD_QW1_PTYPE_SHIFT;
 
 		/* populate checksum, VLAN, and protocol */
-		i40evf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
 
-		vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
+		vlan_tag = (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
 			   le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
 
-		i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
-		i40e_receive_skb(rx_ring, skb, vlan_tag);
+		iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
+		iavf_receive_skb(rx_ring, skb, vlan_tag);
 		skb = NULL;
 
 		/* update budget accounting */
@@ -1578,7 +1594,7 @@
 	return failure ? budget : (int)total_rx_packets;
 }
 
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+static inline u32 iavf_buildreg_itr(const int type, u16 itr)
 {
 	u32 val;
 
@@ -1597,17 +1613,17 @@
 	 * only need to shift by the interval shift - 1 instead of the
 	 * full value.
 	 */
-	itr &= I40E_ITR_MASK;
+	itr &= IAVF_ITR_MASK;
 
-	val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-	      (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
-	      (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
+	val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+	      (type << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
+	      (itr << (IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
 
 	return val;
 }
 
 /* a small macro to shorten up some long lines */
-#define INTREG I40E_VFINT_DYN_CTLN1
+#define INTREG IAVF_VFINT_DYN_CTLN1
 
 /* The act of updating the ITR will cause it to immediately trigger. In order
  * to prevent this from throwing off adaptive update statistics we defer the
@@ -1619,20 +1635,20 @@
 #define ITR_COUNTDOWN_START 3
 
 /**
- * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
+ * iavf_update_enable_itr - Update itr and re-enable MSIX interrupt
  * @vsi: the VSI we care about
  * @q_vector: q_vector for which itr is being updated and interrupt enabled
  *
  **/
-static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
-					  struct i40e_q_vector *q_vector)
+static inline void iavf_update_enable_itr(struct iavf_vsi *vsi,
+					  struct iavf_q_vector *q_vector)
 {
-	struct i40e_hw *hw = &vsi->back->hw;
+	struct iavf_hw *hw = &vsi->back->hw;
 	u32 intval;
 
 	/* These will do nothing if dynamic updates are not enabled */
-	i40e_update_itr(q_vector, &q_vector->tx);
-	i40e_update_itr(q_vector, &q_vector->rx);
+	iavf_update_itr(q_vector, &q_vector->tx);
+	iavf_update_itr(q_vector, &q_vector->rx);
 
 	/* This block of logic allows us to get away with only updating
 	 * one ITR value with each interrupt. The idea is to perform a
@@ -1644,7 +1660,7 @@
 	 */
 	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
 		/* Rx ITR needs to be reduced, this is highest priority */
-		intval = i40e_buildreg_itr(I40E_RX_ITR,
+		intval = iavf_buildreg_itr(IAVF_RX_ITR,
 					   q_vector->rx.target_itr);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
@@ -1654,29 +1670,29 @@
 		/* Tx ITR needs to be reduced, this is second priority
 		 * Tx ITR needs to be increased more than Rx, fourth priority
 		 */
-		intval = i40e_buildreg_itr(I40E_TX_ITR,
+		intval = iavf_buildreg_itr(IAVF_TX_ITR,
 					   q_vector->tx.target_itr);
 		q_vector->tx.current_itr = q_vector->tx.target_itr;
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
 		/* Rx ITR needs to be increased, third priority */
-		intval = i40e_buildreg_itr(I40E_RX_ITR,
+		intval = iavf_buildreg_itr(IAVF_RX_ITR,
 					   q_vector->rx.target_itr);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	} else {
 		/* No ITR update, lowest priority */
-		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		intval = iavf_buildreg_itr(IAVF_ITR_NONE, 0);
 		if (q_vector->itr_countdown)
 			q_vector->itr_countdown--;
 	}
 
-	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
+	if (!test_bit(__IAVF_VSI_DOWN, vsi->state))
 		wr32(hw, INTREG(q_vector->reg_idx), intval);
 }
 
 /**
- * i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * iavf_napi_poll - NAPI polling Rx/Tx cleanup routine
  * @napi: napi struct with our devices info in it
  * @budget: amount of work driver is allowed to do this pass, in packets
  *
@@ -1684,18 +1700,18 @@
  *
  * Returns the amount of work done
  **/
-int i40evf_napi_poll(struct napi_struct *napi, int budget)
+int iavf_napi_poll(struct napi_struct *napi, int budget)
 {
-	struct i40e_q_vector *q_vector =
-			       container_of(napi, struct i40e_q_vector, napi);
-	struct i40e_vsi *vsi = q_vector->vsi;
-	struct i40e_ring *ring;
+	struct iavf_q_vector *q_vector =
+			       container_of(napi, struct iavf_q_vector, napi);
+	struct iavf_vsi *vsi = q_vector->vsi;
+	struct iavf_ring *ring;
 	bool clean_complete = true;
 	bool arm_wb = false;
 	int budget_per_ring;
 	int work_done = 0;
 
-	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
+	if (test_bit(__IAVF_VSI_DOWN, vsi->state)) {
 		napi_complete(napi);
 		return 0;
 	}
@@ -1703,8 +1719,8 @@
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
-	i40e_for_each_ring(ring, q_vector->tx) {
-		if (!i40e_clean_tx_irq(vsi, ring, budget)) {
+	iavf_for_each_ring(ring, q_vector->tx) {
+		if (!iavf_clean_tx_irq(vsi, ring, budget)) {
 			clean_complete = false;
 			continue;
 		}
@@ -1721,8 +1737,8 @@
 	 */
 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
 
-	i40e_for_each_ring(ring, q_vector->rx) {
-		int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
+	iavf_for_each_ring(ring, q_vector->rx) {
+		int cleaned = iavf_clean_rx_irq(ring, budget_per_ring);
 
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
@@ -1746,7 +1762,7 @@
 			napi_complete_done(napi, work_done);
 
 			/* Force an interrupt */
-			i40evf_force_wb(vsi, q_vector);
+			iavf_force_wb(vsi, q_vector);
 
 			/* Return budget-1 so that polling stops */
 			return budget - 1;
@@ -1754,24 +1770,25 @@
 tx_only:
 		if (arm_wb) {
 			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-			i40e_enable_wb_on_itr(vsi, q_vector);
+			iavf_enable_wb_on_itr(vsi, q_vector);
 		}
 		return budget;
 	}
 
-	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
+	if (vsi->back->flags & IAVF_TXR_FLAGS_WB_ON_ITR)
 		q_vector->arm_wb_state = false;
 
-	/* Work is done so exit the polling mode and re-enable the interrupt */
-	napi_complete_done(napi, work_done);
-
-	i40e_update_enable_itr(vsi, q_vector);
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		iavf_update_enable_itr(vsi, q_vector);
 
 	return min(work_done, budget - 1);
 }
 
 /**
- * i40evf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * iavf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
  * @skb:     send buffer
  * @tx_ring: ring to send buffer on
  * @flags:   the tx flags to be set
@@ -1782,9 +1799,9 @@
  * Returns error code indicate the frame should be dropped upon error and the
  * otherwise  returns 0 to indicate the flags has been set properly.
  **/
-static inline int i40evf_tx_prepare_vlan_flags(struct sk_buff *skb,
-					       struct i40e_ring *tx_ring,
-					       u32 *flags)
+static inline int iavf_tx_prepare_vlan_flags(struct sk_buff *skb,
+					     struct iavf_ring *tx_ring,
+					     u32 *flags)
 {
 	__be16 protocol = skb->protocol;
 	u32  tx_flags = 0;
@@ -1804,8 +1821,8 @@
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
 	if (skb_vlan_tag_present(skb)) {
-		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
-		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
+		tx_flags |= skb_vlan_tag_get(skb) << IAVF_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IAVF_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN, check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_hdr *vhdr, _vhdr;
@@ -1815,8 +1832,8 @@
 			return -EINVAL;
 
 		protocol = vhdr->h_vlan_encapsulated_proto;
-		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
-		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
+		tx_flags |= ntohs(vhdr->h_vlan_TCI) << IAVF_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IAVF_TX_FLAGS_SW_VLAN;
 	}
 
 out:
@@ -1825,14 +1842,14 @@
 }
 
 /**
- * i40e_tso - set up the tso context descriptor
+ * iavf_tso - set up the tso context descriptor
  * @first:    pointer to first Tx buffer for xmit
  * @hdr_len:  ptr to the size of the packet header
  * @cd_type_cmd_tso_mss: Quad Word 1
  *
  * Returns 0 if no TSO can happen, 1 if tso is going, or error
  **/
-static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
+static int iavf_tso(struct iavf_tx_buffer *first, u8 *hdr_len,
 		    u64 *cd_type_cmd_tso_mss)
 {
 	struct sk_buff *skb = first->skb;
@@ -1923,17 +1940,17 @@
 	first->bytecount += (first->gso_segs - 1) * *hdr_len;
 
 	/* find the field values */
-	cd_cmd = I40E_TX_CTX_DESC_TSO;
+	cd_cmd = IAVF_TX_CTX_DESC_TSO;
 	cd_tso_len = skb->len - *hdr_len;
 	cd_mss = gso_size;
-	*cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
-				(cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
-				(cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+	*cd_type_cmd_tso_mss |= (cd_cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
+				(cd_tso_len << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+				(cd_mss << IAVF_TXD_CTX_QW1_MSS_SHIFT);
 	return 1;
 }
 
 /**
- * i40e_tx_enable_csum - Enable Tx checksum offloads
+ * iavf_tx_enable_csum - Enable Tx checksum offloads
  * @skb: send buffer
  * @tx_flags: pointer to Tx flags currently set
  * @td_cmd: Tx descriptor command bits to set
@@ -1941,9 +1958,9 @@
  * @tx_ring: Tx descriptor ring
  * @cd_tunneling: ptr to context desc bits
  **/
-static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
+static int iavf_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 			       u32 *td_cmd, u32 *td_offset,
-			       struct i40e_ring *tx_ring,
+			       struct iavf_ring *tx_ring,
 			       u32 *cd_tunneling)
 {
 	union {
@@ -1968,19 +1985,19 @@
 	l4.hdr = skb_transport_header(skb);
 
 	/* compute outer L2 header size */
-	offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+	offset = ((ip.hdr - skb->data) / 2) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 	if (skb->encapsulation) {
 		u32 tunnel = 0;
 		/* define outer network header type */
-		if (*tx_flags & I40E_TX_FLAGS_IPV4) {
-			tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
-				  I40E_TX_CTX_EXT_IP_IPV4 :
-				  I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+		if (*tx_flags & IAVF_TX_FLAGS_IPV4) {
+			tunnel |= (*tx_flags & IAVF_TX_FLAGS_TSO) ?
+				  IAVF_TX_CTX_EXT_IP_IPV4 :
+				  IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
 
 			l4_proto = ip.v4->protocol;
-		} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
-			tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
+		} else if (*tx_flags & IAVF_TX_FLAGS_IPV6) {
+			tunnel |= IAVF_TX_CTX_EXT_IP_IPV6;
 
 			exthdr = ip.hdr + sizeof(*ip.v6);
 			l4_proto = ip.v6->nexthdr;
@@ -1992,20 +2009,20 @@
 		/* define outer transport */
 		switch (l4_proto) {
 		case IPPROTO_UDP:
-			tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
-			*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
+			tunnel |= IAVF_TXD_CTX_UDP_TUNNELING;
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
 			break;
 		case IPPROTO_GRE:
-			tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
-			*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
+			tunnel |= IAVF_TXD_CTX_GRE_TUNNELING;
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
 			break;
 		case IPPROTO_IPIP:
 		case IPPROTO_IPV6:
-			*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
 			l4.hdr = skb_inner_network_header(skb);
 			break;
 		default:
-			if (*tx_flags & I40E_TX_FLAGS_TSO)
+			if (*tx_flags & IAVF_TX_FLAGS_TSO)
 				return -1;
 
 			skb_checksum_help(skb);
@@ -2014,20 +2031,20 @@
 
 		/* compute outer L3 header size */
 		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
-			  I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+			  IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
 
 		/* switch IP header pointer from outer to inner header */
 		ip.hdr = skb_inner_network_header(skb);
 
 		/* compute tunnel header size */
 		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
-			  I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+			  IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
 
 		/* indicate if we need to offload outer UDP header */
-		if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
+		if ((*tx_flags & IAVF_TX_FLAGS_TSO) &&
 		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
 		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
-			tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
+			tunnel |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
 
 		/* record tunnel offload values */
 		*cd_tunneling |= tunnel;
@@ -2037,24 +2054,24 @@
 		l4_proto = 0;
 
 		/* reset type as we transition from outer to inner headers */
-		*tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
+		*tx_flags &= ~(IAVF_TX_FLAGS_IPV4 | IAVF_TX_FLAGS_IPV6);
 		if (ip.v4->version == 4)
-			*tx_flags |= I40E_TX_FLAGS_IPV4;
+			*tx_flags |= IAVF_TX_FLAGS_IPV4;
 		if (ip.v6->version == 6)
-			*tx_flags |= I40E_TX_FLAGS_IPV6;
+			*tx_flags |= IAVF_TX_FLAGS_IPV6;
 	}
 
 	/* Enable IP checksum offloads */
-	if (*tx_flags & I40E_TX_FLAGS_IPV4) {
+	if (*tx_flags & IAVF_TX_FLAGS_IPV4) {
 		l4_proto = ip.v4->protocol;
 		/* the stack computes the IP header already, the only time we
 		 * need the hardware to recompute it is in the case of TSO.
 		 */
-		cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
-		       I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
-		       I40E_TX_DESC_CMD_IIPT_IPV4;
-	} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
-		cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
+		cmd |= (*tx_flags & IAVF_TX_FLAGS_TSO) ?
+		       IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM :
+		       IAVF_TX_DESC_CMD_IIPT_IPV4;
+	} else if (*tx_flags & IAVF_TX_FLAGS_IPV6) {
+		cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
 
 		exthdr = ip.hdr + sizeof(*ip.v6);
 		l4_proto = ip.v6->nexthdr;
@@ -2064,29 +2081,29 @@
 	}
 
 	/* compute inner L3 header size */
-	offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+	offset |= ((l4.hdr - ip.hdr) / 4) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
 
 	/* Enable L4 checksum offloads */
 	switch (l4_proto) {
 	case IPPROTO_TCP:
 		/* enable checksum offloads */
-		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
-		offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
+		offset |= l4.tcp->doff << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 		break;
 	case IPPROTO_SCTP:
 		/* enable SCTP checksum offload */
-		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
 		offset |= (sizeof(struct sctphdr) >> 2) <<
-			  I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+			  IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 		break;
 	case IPPROTO_UDP:
 		/* enable UDP checksum offload */
-		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
 		offset |= (sizeof(struct udphdr) >> 2) <<
-			  I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+			  IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 		break;
 	default:
-		if (*tx_flags & I40E_TX_FLAGS_TSO)
+		if (*tx_flags & IAVF_TX_FLAGS_TSO)
 			return -1;
 		skb_checksum_help(skb);
 		return 0;
@@ -2099,25 +2116,25 @@
 }
 
 /**
- * i40e_create_tx_ctx Build the Tx context descriptor
+ * iavf_create_tx_ctx Build the Tx context descriptor
  * @tx_ring:  ring to create the descriptor on
  * @cd_type_cmd_tso_mss: Quad Word 1
  * @cd_tunneling: Quad Word 0 - bits 0-31
  * @cd_l2tag2: Quad Word 0 - bits 32-63
  **/
-static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
+static void iavf_create_tx_ctx(struct iavf_ring *tx_ring,
 			       const u64 cd_type_cmd_tso_mss,
 			       const u32 cd_tunneling, const u32 cd_l2tag2)
 {
-	struct i40e_tx_context_desc *context_desc;
+	struct iavf_tx_context_desc *context_desc;
 	int i = tx_ring->next_to_use;
 
-	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
+	if ((cd_type_cmd_tso_mss == IAVF_TX_DESC_DTYPE_CONTEXT) &&
 	    !cd_tunneling && !cd_l2tag2)
 		return;
 
 	/* grab the next descriptor */
-	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
+	context_desc = IAVF_TX_CTXTDESC(tx_ring, i);
 
 	i++;
 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
@@ -2130,7 +2147,7 @@
 }
 
 /**
- * __i40evf_chk_linearize - Check if there are more than 8 buffers per packet
+ * __iavf_chk_linearize - Check if there are more than 8 buffers per packet
  * @skb:      send buffer
  *
  * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
@@ -2142,20 +2159,20 @@
  * the segment payload in the first descriptor, and another 7 for the
  * fragments.
  **/
-bool __i40evf_chk_linearize(struct sk_buff *skb)
+bool __iavf_chk_linearize(struct sk_buff *skb)
 {
-	const struct skb_frag_struct *frag, *stale;
+	const skb_frag_t *frag, *stale;
 	int nr_frags, sum;
 
 	/* no need to check if number of frags is less than 7 */
 	nr_frags = skb_shinfo(skb)->nr_frags;
-	if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
+	if (nr_frags < (IAVF_MAX_BUFFER_TXD - 1))
 		return false;
 
 	/* We need to walk through the list and validate that each group
 	 * of 6 fragments totals at least gso_size.
 	 */
-	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
+	nr_frags -= IAVF_MAX_BUFFER_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
 
 	/* Initialize size to the negative value of gso_size minus 1.  We
@@ -2187,17 +2204,17 @@
 		 * figure out what the remainder would be in the last
 		 * descriptor associated with the fragment.
 		 */
-		if (stale_size > I40E_MAX_DATA_PER_TXD) {
-			int align_pad = -(stale->page_offset) &
-					(I40E_MAX_READ_REQ_SIZE - 1);
+		if (stale_size > IAVF_MAX_DATA_PER_TXD) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(IAVF_MAX_READ_REQ_SIZE - 1);
 
 			sum -= align_pad;
 			stale_size -= align_pad;
 
 			do {
-				sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
-				stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
-			} while (stale_size > I40E_MAX_DATA_PER_TXD);
+				sum -= IAVF_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= IAVF_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > IAVF_MAX_DATA_PER_TXD);
 		}
 
 		/* if sum is negative we failed to make sufficient progress */
@@ -2214,20 +2231,20 @@
 }
 
 /**
- * __i40evf_maybe_stop_tx - 2nd level check for tx stop conditions
+ * __iavf_maybe_stop_tx - 2nd level check for tx stop conditions
  * @tx_ring: the ring to be checked
  * @size:    the size buffer we want to assure is available
  *
  * Returns -EBUSY if a stop is needed, else 0
  **/
-int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
 {
 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
 	/* Memory barrier before checking head and tail */
 	smp_mb();
 
 	/* Check again in a case another CPU has just made room available. */
-	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
+	if (likely(IAVF_DESC_UNUSED(tx_ring) < size))
 		return -EBUSY;
 
 	/* A reprieve! - use start_queue because it doesn't call schedule */
@@ -2237,7 +2254,7 @@
 }
 
 /**
- * i40evf_tx_map - Build the Tx descriptor
+ * iavf_tx_map - Build the Tx descriptor
  * @tx_ring:  ring to send buffer on
  * @skb:      send buffer
  * @first:    first buffer info buffer to use
@@ -2246,34 +2263,34 @@
  * @td_cmd:   the command field in the descriptor
  * @td_offset: offset for checksum or crc
  **/
-static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
-				 struct i40e_tx_buffer *first, u32 tx_flags,
-				 const u8 hdr_len, u32 td_cmd, u32 td_offset)
+static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
+			       struct iavf_tx_buffer *first, u32 tx_flags,
+			       const u8 hdr_len, u32 td_cmd, u32 td_offset)
 {
 	unsigned int data_len = skb->data_len;
 	unsigned int size = skb_headlen(skb);
-	struct skb_frag_struct *frag;
-	struct i40e_tx_buffer *tx_bi;
-	struct i40e_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	struct iavf_tx_buffer *tx_bi;
+	struct iavf_tx_desc *tx_desc;
 	u16 i = tx_ring->next_to_use;
 	u32 td_tag = 0;
 	dma_addr_t dma;
 
-	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
-		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
-		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
-			 I40E_TX_FLAGS_VLAN_SHIFT;
+	if (tx_flags & IAVF_TX_FLAGS_HW_VLAN) {
+		td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
+		td_tag = (tx_flags & IAVF_TX_FLAGS_VLAN_MASK) >>
+			 IAVF_TX_FLAGS_VLAN_SHIFT;
 	}
 
 	first->tx_flags = tx_flags;
 
 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
 
-	tx_desc = I40E_TX_DESC(tx_ring, i);
+	tx_desc = IAVF_TX_DESC(tx_ring, i);
 	tx_bi = first;
 
 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
-		unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
+		unsigned int max_data = IAVF_MAX_DATA_PER_TXD_ALIGNED;
 
 		if (dma_mapping_error(tx_ring->dev, dma))
 			goto dma_error;
@@ -2283,10 +2300,10 @@
 		dma_unmap_addr_set(tx_bi, dma, dma);
 
 		/* align size to end of page */
-		max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
+		max_data += -dma & (IAVF_MAX_READ_REQ_SIZE - 1);
 		tx_desc->buffer_addr = cpu_to_le64(dma);
 
-		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
+		while (unlikely(size > IAVF_MAX_DATA_PER_TXD)) {
 			tx_desc->cmd_type_offset_bsz =
 				build_ctob(td_cmd, td_offset,
 					   max_data, td_tag);
@@ -2295,14 +2312,14 @@
 			i++;
 
 			if (i == tx_ring->count) {
-				tx_desc = I40E_TX_DESC(tx_ring, 0);
+				tx_desc = IAVF_TX_DESC(tx_ring, 0);
 				i = 0;
 			}
 
 			dma += max_data;
 			size -= max_data;
 
-			max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
+			max_data = IAVF_MAX_DATA_PER_TXD_ALIGNED;
 			tx_desc->buffer_addr = cpu_to_le64(dma);
 		}
 
@@ -2316,7 +2333,7 @@
 		i++;
 
 		if (i == tx_ring->count) {
-			tx_desc = I40E_TX_DESC(tx_ring, 0);
+			tx_desc = IAVF_TX_DESC(tx_ring, 0);
 			i = 0;
 		}
 
@@ -2337,13 +2354,15 @@
 
 	tx_ring->next_to_use = i;
 
-	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
+	iavf_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	/* write last descriptor with RS and EOP bits */
-	td_cmd |= I40E_TXD_CMD;
+	td_cmd |= IAVF_TXD_CMD;
 	tx_desc->cmd_type_offset_bsz =
 			build_ctob(td_cmd, td_offset, size, td_tag);
 
+	skb_tx_timestamp(skb);
+
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
 	 *
@@ -2356,13 +2375,8 @@
 	first->next_to_watch = tx_desc;
 
 	/* notify HW of packet */
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
@@ -2373,7 +2387,7 @@
 	/* clear dma mappings for failed tx_bi map */
 	for (;;) {
 		tx_bi = &tx_ring->tx_bi[i];
-		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
+		iavf_unmap_and_free_tx_resource(tx_ring, tx_bi);
 		if (tx_bi == first)
 			break;
 		if (i == 0)
@@ -2385,18 +2399,18 @@
 }
 
 /**
- * i40e_xmit_frame_ring - Sends buffer on Tx ring
+ * iavf_xmit_frame_ring - Sends buffer on Tx ring
  * @skb:     send buffer
  * @tx_ring: ring to send buffer on
  *
  * Returns NETDEV_TX_OK if sent, else an error code
  **/
-static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
-					struct i40e_ring *tx_ring)
+static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb,
+					struct iavf_ring *tx_ring)
 {
-	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
+	u64 cd_type_cmd_tso_mss = IAVF_TX_DESC_DTYPE_CONTEXT;
 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
-	struct i40e_tx_buffer *first;
+	struct iavf_tx_buffer *first;
 	u32 td_offset = 0;
 	u32 tx_flags = 0;
 	__be16 protocol;
@@ -2407,25 +2421,25 @@
 	/* prefetch the data, we'll need it later */
 	prefetch(skb->data);
 
-	i40e_trace(xmit_frame_ring, skb, tx_ring);
+	iavf_trace(xmit_frame_ring, skb, tx_ring);
 
-	count = i40e_xmit_descriptor_count(skb);
-	if (i40e_chk_linearize(skb, count)) {
+	count = iavf_xmit_descriptor_count(skb);
+	if (iavf_chk_linearize(skb, count)) {
 		if (__skb_linearize(skb)) {
 			dev_kfree_skb_any(skb);
 			return NETDEV_TX_OK;
 		}
-		count = i40e_txd_use_count(skb->len);
+		count = iavf_txd_use_count(skb->len);
 		tx_ring->tx_stats.tx_linearize++;
 	}
 
-	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
-	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
+	/* need: 1 descriptor per page * PAGE_SIZE/IAVF_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/IAVF_MAX_DATA_PER_TXD,
 	 *       + 4 desc gap to avoid the cache line where head is,
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
-	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+	if (iavf_maybe_stop_tx(tx_ring, count + 4 + 1)) {
 		tx_ring->tx_stats.tx_busy++;
 		return NETDEV_TX_BUSY;
 	}
@@ -2437,7 +2451,7 @@
 	first->gso_segs = 1;
 
 	/* prepare the xmit flags */
-	if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
+	if (iavf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
 		goto out_drop;
 
 	/* obtain protocol of skb */
@@ -2445,64 +2459,62 @@
 
 	/* setup IPv4/IPv6 offloads */
 	if (protocol == htons(ETH_P_IP))
-		tx_flags |= I40E_TX_FLAGS_IPV4;
+		tx_flags |= IAVF_TX_FLAGS_IPV4;
 	else if (protocol == htons(ETH_P_IPV6))
-		tx_flags |= I40E_TX_FLAGS_IPV6;
+		tx_flags |= IAVF_TX_FLAGS_IPV6;
 
-	tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss);
+	tso = iavf_tso(first, &hdr_len, &cd_type_cmd_tso_mss);
 
 	if (tso < 0)
 		goto out_drop;
 	else if (tso)
-		tx_flags |= I40E_TX_FLAGS_TSO;
+		tx_flags |= IAVF_TX_FLAGS_TSO;
 
 	/* Always offload the checksum, since it's in the data descriptor */
-	tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
+	tso = iavf_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
 				  tx_ring, &cd_tunneling);
 	if (tso < 0)
 		goto out_drop;
 
-	skb_tx_timestamp(skb);
-
 	/* always enable CRC insertion offload */
-	td_cmd |= I40E_TX_DESC_CMD_ICRC;
+	td_cmd |= IAVF_TX_DESC_CMD_ICRC;
 
-	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
+	iavf_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
 			   cd_tunneling, cd_l2tag2);
 
-	i40evf_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
-		      td_cmd, td_offset);
+	iavf_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
+		    td_cmd, td_offset);
 
 	return NETDEV_TX_OK;
 
 out_drop:
-	i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring);
+	iavf_trace(xmit_frame_ring_drop, first->skb, tx_ring);
 	dev_kfree_skb_any(first->skb);
 	first->skb = NULL;
 	return NETDEV_TX_OK;
 }
 
 /**
- * i40evf_xmit_frame - Selects the correct VSI and Tx queue to send buffer
+ * iavf_xmit_frame - Selects the correct VSI and Tx queue to send buffer
  * @skb:    send buffer
  * @netdev: network interface device structure
  *
  * Returns NETDEV_TX_OK if sent, else an error code
  **/
-netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping];
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping];
 
 	/* hardware can't handle really short frames, hardware padding works
 	 * beyond this point
 	 */
-	if (unlikely(skb->len < I40E_MIN_TX_LEN)) {
-		if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len))
+	if (unlikely(skb->len < IAVF_MIN_TX_LEN)) {
+		if (skb_pad(skb, IAVF_MIN_TX_LEN - skb->len))
 			return NETDEV_TX_OK;
-		skb->len = I40E_MIN_TX_LEN;
-		skb_set_tail_pointer(skb, I40E_MIN_TX_LEN);
+		skb->len = IAVF_MIN_TX_LEN;
+		skb_set_tail_pointer(skb, IAVF_MIN_TX_LEN);
 	}
 
-	return i40e_xmit_frame_ring(skb, tx_ring);
+	return iavf_xmit_frame_ring(skb, tx_ring);
 }

diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
new file mode 100644
index 0000000..dd3348f
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h

@@ -0,0 +1,523 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_TXRX_H_
+#define _IAVF_TXRX_H_
+
+/* Interrupt Throttling and Rate Limiting Goodies */
+#define IAVF_DEFAULT_IRQ_WORK      256
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define IAVF_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define IAVF_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define IAVF_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define IAVF_ITR_100K		    10	/* all values below must be even */
+#define IAVF_ITR_50K		    20
+#define IAVF_ITR_20K		    50
+#define IAVF_ITR_18K		    60
+#define IAVF_ITR_8K		   122
+#define IAVF_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~IAVF_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~IAVF_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & IAVF_ITR_DYNAMIC))
+
+#define IAVF_ITR_RX_DEF		(IAVF_ITR_20K | IAVF_ITR_DYNAMIC)
+#define IAVF_ITR_TX_DEF		(IAVF_ITR_20K | IAVF_ITR_DYNAMIC)
+
+/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
+ * the value of the rate limit is non-zero
+ */
+#define INTRL_ENA                  BIT(6)
+#define IAVF_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
+#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
+#define IAVF_INTRL_8K              125     /* 8000 ints/sec */
+#define IAVF_INTRL_62K             16      /* 62500 ints/sec */
+#define IAVF_INTRL_83K             12      /* 83333 ints/sec */
+
+#define IAVF_QUEUE_END_OF_LIST 0x7FF
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum iavf_dyn_idx_t {
+	IAVF_IDX_ITR0 = 0,
+	IAVF_IDX_ITR1 = 1,
+	IAVF_IDX_ITR2 = 2,
+	IAVF_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* these are indexes into ITRN registers */
+#define IAVF_RX_ITR    IAVF_IDX_ITR0
+#define IAVF_TX_ITR    IAVF_IDX_ITR1
+#define IAVF_PE_ITR    IAVF_IDX_ITR2
+
+/* Supported RSS offloads */
+#define IAVF_DEFAULT_RSS_HENA ( \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_TCP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_FRAG_IPV4) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_TCP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_SCTP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_OTHER) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_FRAG_IPV6) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_L2_PAYLOAD))
+
+#define IAVF_DEFAULT_RSS_HENA_EXPANDED (IAVF_DEFAULT_RSS_HENA | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
+
+/* Supported Rx Buffer Sizes (a multiple of 128) */
+#define IAVF_RXBUFFER_256   256
+#define IAVF_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
+#define IAVF_RXBUFFER_2048  2048
+#define IAVF_RXBUFFER_3072  3072  /* Used for large frames w/ padding */
+#define IAVF_MAX_RXBUFFER   9728  /* largest size for single descriptor */
+
+/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
+ * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
+ */
+#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256
+#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+#define iavf_rx_desc iavf_32byte_rx_desc
+
+#define IAVF_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the legacy
+ *	 receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define IAVF_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048))
+
+static inline int iavf_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int iavf_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (IAVF_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = IAVF_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return iavf_compute_pad(rx_buf_len);
+}
+
+#define IAVF_SKB_PAD iavf_skb_pad()
+#else
+#define IAVF_2K_TOO_SMALL_WITH_PADDING false
+#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/**
+ * iavf_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc,
+				     const u64 stat_err_bits)
+{
+	return !!(rx_desc->wb.qword1.status_error_len &
+		  cpu_to_le64(stat_err_bits));
+}
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IAVF_RX_INCREMENT(r, i) \
+	do {					\
+		(i)++;				\
+		if ((i) == (r)->count)		\
+			i = 0;			\
+		r->next_to_clean = i;		\
+	} while (0)
+
+#define IAVF_RX_NEXT_DESC(r, i, n)		\
+	do {					\
+		(i)++;				\
+		if ((i) == (r)->count)		\
+			i = 0;			\
+		(n) = IAVF_RX_DESC((r), (i));	\
+	} while (0)
+
+#define IAVF_RX_NEXT_DESC_PREFETCH(r, i, n)		\
+	do {						\
+		IAVF_RX_NEXT_DESC((r), (i), (n));	\
+		prefetch((n));				\
+	} while (0)
+
+#define IAVF_MAX_BUFFER_TXD	8
+#define IAVF_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define IAVF_MAX_READ_REQ_SIZE		4096
+#define IAVF_MAX_DATA_PER_TXD		(16 * 1024 - 1)
+#define IAVF_MAX_DATA_PER_TXD_ALIGNED \
+	(IAVF_MAX_DATA_PER_TXD & ~(IAVF_MAX_READ_REQ_SIZE - 1))
+
+/**
+ * iavf_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
+ */
+static inline unsigned int iavf_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + 1;
+}
+
+/* Tx Descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + 6)
+#define IAVF_MIN_DESC_PENDING	4
+
+#define IAVF_TX_FLAGS_HW_VLAN		BIT(1)
+#define IAVF_TX_FLAGS_SW_VLAN		BIT(2)
+#define IAVF_TX_FLAGS_TSO		BIT(3)
+#define IAVF_TX_FLAGS_IPV4		BIT(4)
+#define IAVF_TX_FLAGS_IPV6		BIT(5)
+#define IAVF_TX_FLAGS_FCCRC		BIT(6)
+#define IAVF_TX_FLAGS_FSO		BIT(7)
+#define IAVF_TX_FLAGS_FD_SB		BIT(9)
+#define IAVF_TX_FLAGS_VXLAN_TUNNEL	BIT(10)
+#define IAVF_TX_FLAGS_VLAN_MASK		0xffff0000
+#define IAVF_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
+#define IAVF_TX_FLAGS_VLAN_PRIO_SHIFT	29
+#define IAVF_TX_FLAGS_VLAN_SHIFT	16
+
+struct iavf_tx_buffer {
+	struct iavf_tx_desc *next_to_watch;
+	union {
+		struct sk_buff *skb;
+		void *raw_buf;
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct iavf_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct iavf_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct iavf_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+	u64 tx_linearize;
+	u64 tx_force_wb;
+	int prev_pkt_ctr;
+	u64 tx_lost_interrupt;
+};
+
+struct iavf_rx_queue_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buff_failed;
+	u64 page_reuse_count;
+	u64 realloc_count;
+};
+
+enum iavf_ring_state_t {
+	__IAVF_TX_FDIR_INIT_DONE,
+	__IAVF_TX_XPS_INIT_DONE,
+	__IAVF_RING_STATE_NBITS /* must be last */
+};
+
+/* some useful defines for virtchannel interface, which
+ * is the only remaining user of header split
+ */
+#define IAVF_RX_DTYPE_NO_SPLIT      0
+#define IAVF_RX_DTYPE_HEADER_SPLIT  1
+#define IAVF_RX_DTYPE_SPLIT_ALWAYS  2
+#define IAVF_RX_SPLIT_L2      0x1
+#define IAVF_RX_SPLIT_IP      0x2
+#define IAVF_RX_SPLIT_TCP_UDP 0x4
+#define IAVF_RX_SPLIT_SCTP    0x8
+
+/* struct that defines a descriptor ring, associated with a VSI */
+struct iavf_ring {
+	struct iavf_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	union {
+		struct iavf_tx_buffer *tx_bi;
+		struct iavf_rx_buffer *rx_bi;
+	};
+	DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS);
+	u16 queue_index;		/* Queue number of ring */
+	u8 dcb_tc;			/* Traffic class of ring */
+	u8 __iomem *tail;
+
+	/* high bit set means dynamic, use accessors routines to read/write.
+	 * hardware only supports 2us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 itr_setting;
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 rx_buf_len;
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	u8 atr_sample_rate;
+	u8 atr_count;
+
+	bool ring_active;		/* is ring online or not */
+	bool arm_wb;		/* do something to arm write back */
+	u8 packet_stride;
+
+	u16 flags;
+#define IAVF_TXR_FLAGS_WB_ON_ITR		BIT(0)
+#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)
+
+	/* stats structs */
+	struct iavf_queue_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct iavf_tx_queue_stats tx_stats;
+		struct iavf_rx_queue_stats rx_stats;
+	};
+
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
+
+	struct iavf_vsi *vsi;		/* Backreference to associated VSI */
+	struct iavf_q_vector *q_vector;	/* Backreference to associated vector */
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	u16 next_to_alloc;
+	struct sk_buff *skb;		/* When iavf_clean_rx_ring_irq() must
+					 * return before it sees the EOP for
+					 * the current packet, we save that skb
+					 * here and resume receiving this
+					 * packet the next time
+					 * iavf_clean_rx_ring_irq() is called
+					 * for this ring.
+					 */
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ring_uses_build_skb(struct iavf_ring *ring)
+{
+	return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED);
+}
+
+static inline void set_ring_build_skb_enabled(struct iavf_ring *ring)
+{
+	ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring)
+{
+	ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+#define IAVF_ITR_ADAPTIVE_MIN_INC	0x0002
+#define IAVF_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define IAVF_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define IAVF_ITR_ADAPTIVE_LATENCY	0x8000
+#define IAVF_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & IAVF_ITR_ADAPTIVE_LATENCY))
+
+struct iavf_ring_container {
+	struct iavf_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 count;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
+};
+
+/* iterator for handling rings in ring container */
+#define iavf_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring))
+
+bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count);
+netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+void iavf_clean_tx_ring(struct iavf_ring *tx_ring);
+void iavf_clean_rx_ring(struct iavf_ring *rx_ring);
+int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring);
+int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring);
+void iavf_free_tx_resources(struct iavf_ring *tx_ring);
+void iavf_free_rx_resources(struct iavf_ring *rx_ring);
+int iavf_napi_poll(struct napi_struct *napi, int budget);
+void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector);
+u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw);
+void iavf_detect_recover_hung(struct iavf_vsi *vsi);
+int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size);
+bool __iavf_chk_linearize(struct sk_buff *skb);
+
+/**
+ * iavf_xmit_descriptor_count - calculate number of Tx descriptors needed
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns number of data descriptors needed for this skb. Returns 0 to indicate
+ * there is not enough descriptors available in this ring since we need at least
+ * one descriptor.
+ **/
+static inline int iavf_xmit_descriptor_count(struct sk_buff *skb)
+{
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += iavf_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * iavf_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ **/
+static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
+{
+	if (likely(IAVF_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __iavf_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * iavf_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ **/
+static inline bool iavf_chk_linearize(struct sk_buff *skb, int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < IAVF_MAX_BUFFER_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __iavf_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != IAVF_MAX_BUFFER_TXD;
+}
+/**
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct iavf_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+#endif /* _IAVF_TXRX_H_ */

diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
new file mode 100644
index 0000000..7190a40
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h

@@ -0,0 +1,688 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_TYPE_H_
+#define _IAVF_TYPE_H_
+
+#include "iavf_status.h"
+#include "iavf_osdep.h"
+#include "iavf_register.h"
+#include "iavf_adminq.h"
+#include "iavf_devids.h"
+
+#define IAVF_RXQ_CTX_DBUFF_SHIFT 7
+
+/* IAVF_MASK is a macro used on 32 bit registers */
+#define IAVF_MASK(mask, shift) ((u32)(mask) << (shift))
+
+#define IAVF_MAX_VSI_QP			16
+#define IAVF_MAX_VF_VSI			3
+#define IAVF_MAX_CHAINED_RX_BUFFERS	5
+
+/* forward declaration */
+struct iavf_hw;
+typedef void (*IAVF_ADMINQ_CALLBACK)(struct iavf_hw *, struct iavf_aq_desc *);
+
+/* Data type manipulation macros. */
+
+#define IAVF_DESC_UNUSED(R)	\
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+/* bitfields for Tx queue mapping in QTX_CTL */
+#define IAVF_QTX_CTL_VF_QUEUE	0x0
+#define IAVF_QTX_CTL_VM_QUEUE	0x1
+#define IAVF_QTX_CTL_PF_QUEUE	0x2
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+enum iavf_debug_mask {
+	IAVF_DEBUG_INIT			= 0x00000001,
+	IAVF_DEBUG_RELEASE		= 0x00000002,
+
+	IAVF_DEBUG_LINK			= 0x00000010,
+	IAVF_DEBUG_PHY			= 0x00000020,
+	IAVF_DEBUG_HMC			= 0x00000040,
+	IAVF_DEBUG_NVM			= 0x00000080,
+	IAVF_DEBUG_LAN			= 0x00000100,
+	IAVF_DEBUG_FLOW			= 0x00000200,
+	IAVF_DEBUG_DCB			= 0x00000400,
+	IAVF_DEBUG_DIAG			= 0x00000800,
+	IAVF_DEBUG_FD			= 0x00001000,
+	IAVF_DEBUG_PACKAGE		= 0x00002000,
+
+	IAVF_DEBUG_AQ_MESSAGE		= 0x01000000,
+	IAVF_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
+	IAVF_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
+	IAVF_DEBUG_AQ_COMMAND		= 0x06000000,
+	IAVF_DEBUG_AQ			= 0x0F000000,
+
+	IAVF_DEBUG_USER			= 0xF0000000,
+
+	IAVF_DEBUG_ALL			= 0xFFFFFFFF
+};
+
+/* These are structs for managing the hardware information and the operations.
+ * The structures of function pointers are filled out at init time when we
+ * know for sure exactly which hardware we're working with.  This gives us the
+ * flexibility of using the same main driver code but adapting to slightly
+ * different hardware needs as new parts are developed.  For this architecture,
+ * the Firmware and AdminQ are intended to insulate the driver from most of the
+ * future changes, but these structures will also do part of the job.
+ */
+enum iavf_mac_type {
+	IAVF_MAC_UNKNOWN = 0,
+	IAVF_MAC_XL710,
+	IAVF_MAC_VF,
+	IAVF_MAC_X722,
+	IAVF_MAC_X722_VF,
+	IAVF_MAC_GENERIC,
+};
+
+enum iavf_vsi_type {
+	IAVF_VSI_MAIN	= 0,
+	IAVF_VSI_VMDQ1	= 1,
+	IAVF_VSI_VMDQ2	= 2,
+	IAVF_VSI_CTRL	= 3,
+	IAVF_VSI_FCOE	= 4,
+	IAVF_VSI_MIRROR	= 5,
+	IAVF_VSI_SRIOV	= 6,
+	IAVF_VSI_FDIR	= 7,
+	IAVF_VSI_TYPE_UNKNOWN
+};
+
+enum iavf_queue_type {
+	IAVF_QUEUE_TYPE_RX = 0,
+	IAVF_QUEUE_TYPE_TX,
+	IAVF_QUEUE_TYPE_PE_CEQ,
+	IAVF_QUEUE_TYPE_UNKNOWN
+};
+
+#define IAVF_HW_CAP_MAX_GPIO		30
+/* Capabilities of a PF or a VF or the whole device */
+struct iavf_hw_capabilities {
+	bool dcb;
+	bool fcoe;
+	u32 num_vsis;
+	u32 num_rx_qp;
+	u32 num_tx_qp;
+	u32 base_queue;
+	u32 num_msix_vectors_vf;
+};
+
+struct iavf_mac_info {
+	enum iavf_mac_type type;
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+	u8 san_addr[ETH_ALEN];
+	u16 max_fcoeq;
+};
+
+/* PCI bus types */
+enum iavf_bus_type {
+	iavf_bus_type_unknown = 0,
+	iavf_bus_type_pci,
+	iavf_bus_type_pcix,
+	iavf_bus_type_pci_express,
+	iavf_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum iavf_bus_speed {
+	iavf_bus_speed_unknown	= 0,
+	iavf_bus_speed_33	= 33,
+	iavf_bus_speed_66	= 66,
+	iavf_bus_speed_100	= 100,
+	iavf_bus_speed_120	= 120,
+	iavf_bus_speed_133	= 133,
+	iavf_bus_speed_2500	= 2500,
+	iavf_bus_speed_5000	= 5000,
+	iavf_bus_speed_8000	= 8000,
+	iavf_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum iavf_bus_width {
+	iavf_bus_width_unknown	= 0,
+	iavf_bus_width_pcie_x1	= 1,
+	iavf_bus_width_pcie_x2	= 2,
+	iavf_bus_width_pcie_x4	= 4,
+	iavf_bus_width_pcie_x8	= 8,
+	iavf_bus_width_32	= 32,
+	iavf_bus_width_64	= 64,
+	iavf_bus_width_reserved
+};
+
+/* Bus parameters */
+struct iavf_bus_info {
+	enum iavf_bus_speed speed;
+	enum iavf_bus_width width;
+	enum iavf_bus_type type;
+
+	u16 func;
+	u16 device;
+	u16 lan_id;
+	u16 bus_id;
+};
+
+#define IAVF_MAX_USER_PRIORITY		8
+/* Port hardware description */
+struct iavf_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+
+	/* subsystem structs */
+	struct iavf_mac_info mac;
+	struct iavf_bus_info bus;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	/* capabilities for entire device and PCI func */
+	struct iavf_hw_capabilities dev_caps;
+
+	/* Admin Queue info */
+	struct iavf_adminq_info aq;
+
+	/* debug mask */
+	u32 debug_mask;
+	char err_str[16];
+};
+
+struct iavf_driver_version {
+	u8 major_version;
+	u8 minor_version;
+	u8 build_version;
+	u8 subbuild_version;
+	u8 driver_string[32];
+};
+
+/* RX Descriptors */
+union iavf_16byte_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow director filter id */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* ext status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+	} wb;  /* writeback */
+};
+
+union iavf_32byte_rx_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_buffer_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+				/* Flow director filter id in case of
+				 * Programming status desc WB
+				 */
+				__le32 fd_id;
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			union {
+				__le32 flex_bytes_lo;
+				__le32 pe_status;
+			} lo_dword;
+			union {
+				__le32 flex_bytes_hi;
+				__le32 fd_id;
+			} hi_dword;
+		} qword3;
+	} wb;  /* writeback */
+};
+
+enum iavf_rx_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_STATUS_DD_SHIFT		= 0,
+	IAVF_RX_DESC_STATUS_EOF_SHIFT		= 1,
+	IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT	= 2,
+	IAVF_RX_DESC_STATUS_L3L4P_SHIFT		= 3,
+	IAVF_RX_DESC_STATUS_CRCP_SHIFT		= 4,
+	IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT	= 5, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT	= 7,
+	/* Note: Bit 8 is reserved in X710 and XL710 */
+	IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT	= 8,
+	IAVF_RX_DESC_STATUS_UMBCAST_SHIFT	= 9, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_FLM_SHIFT		= 11,
+	IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT	= 12, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_LPBK_SHIFT		= 14,
+	IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
+	IAVF_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
+	/* Note: For non-tunnel packets INT_UDP_0 is the right status for
+	 * UDP header
+	 */
+	IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT	= 18,
+	IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */
+};
+
+#define IAVF_RXD_QW1_STATUS_SHIFT	0
+#define IAVF_RXD_QW1_STATUS_MASK	((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \
+					 << IAVF_RXD_QW1_STATUS_SHIFT)
+
+#define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT
+#define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK  (0x3UL << \
+					    IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT)
+
+#define IAVF_RXD_QW1_STATUS_TSYNVALID_SHIFT IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT
+#define IAVF_RXD_QW1_STATUS_TSYNVALID_MASK \
+				    BIT_ULL(IAVF_RXD_QW1_STATUS_TSYNVALID_SHIFT)
+
+enum iavf_rx_desc_fltstat_values {
+	IAVF_RX_DESC_FLTSTAT_NO_DATA	= 0,
+	IAVF_RX_DESC_FLTSTAT_RSV_FD_ID	= 1, /* 16byte desc? FD_ID : RSV */
+	IAVF_RX_DESC_FLTSTAT_RSV	= 2,
+	IAVF_RX_DESC_FLTSTAT_RSS_HASH	= 3,
+};
+
+#define IAVF_RXD_QW1_ERROR_SHIFT	19
+#define IAVF_RXD_QW1_ERROR_MASK		(0xFFUL << IAVF_RXD_QW1_ERROR_SHIFT)
+
+enum iavf_rx_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_ERROR_RXE_SHIFT		= 0,
+	IAVF_RX_DESC_ERROR_RECIPE_SHIFT		= 1,
+	IAVF_RX_DESC_ERROR_HBO_SHIFT		= 2,
+	IAVF_RX_DESC_ERROR_L3L4E_SHIFT		= 3, /* 3 BITS */
+	IAVF_RX_DESC_ERROR_IPE_SHIFT		= 3,
+	IAVF_RX_DESC_ERROR_L4E_SHIFT		= 4,
+	IAVF_RX_DESC_ERROR_EIPE_SHIFT		= 5,
+	IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
+	IAVF_RX_DESC_ERROR_PPRS_SHIFT		= 7
+};
+
+enum iavf_rx_desc_error_l3l4e_fcoe_masks {
+	IAVF_RX_DESC_ERROR_L3L4E_NONE		= 0,
+	IAVF_RX_DESC_ERROR_L3L4E_PROT		= 1,
+	IAVF_RX_DESC_ERROR_L3L4E_FC		= 2,
+	IAVF_RX_DESC_ERROR_L3L4E_DMAC_ERR	= 3,
+	IAVF_RX_DESC_ERROR_L3L4E_DMAC_WARN	= 4
+};
+
+#define IAVF_RXD_QW1_PTYPE_SHIFT	30
+#define IAVF_RXD_QW1_PTYPE_MASK		(0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT)
+
+/* Packet type non-ip values */
+enum iavf_rx_l2_ptype {
+	IAVF_RX_PTYPE_L2_RESERVED			= 0,
+	IAVF_RX_PTYPE_L2_MAC_PAY2			= 1,
+	IAVF_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
+	IAVF_RX_PTYPE_L2_FIP_PAY2			= 3,
+	IAVF_RX_PTYPE_L2_OUI_PAY2			= 4,
+	IAVF_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
+	IAVF_RX_PTYPE_L2_LLDP_PAY2			= 6,
+	IAVF_RX_PTYPE_L2_ECP_PAY2			= 7,
+	IAVF_RX_PTYPE_L2_EVB_PAY2			= 8,
+	IAVF_RX_PTYPE_L2_QCN_PAY2			= 9,
+	IAVF_RX_PTYPE_L2_EAPOL_PAY2			= 10,
+	IAVF_RX_PTYPE_L2_ARP				= 11,
+	IAVF_RX_PTYPE_L2_FCOE_PAY3			= 12,
+	IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
+	IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
+	IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
+	IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
+	IAVF_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
+	IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
+	IAVF_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
+	IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
+};
+
+struct iavf_rx_ptype_decoded {
+	u32 ptype:8;
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:1;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum iavf_rx_ptype_outer_ip {
+	IAVF_RX_PTYPE_OUTER_L2	= 0,
+	IAVF_RX_PTYPE_OUTER_IP	= 1
+};
+
+enum iavf_rx_ptype_outer_ip_ver {
+	IAVF_RX_PTYPE_OUTER_NONE	= 0,
+	IAVF_RX_PTYPE_OUTER_IPV4	= 0,
+	IAVF_RX_PTYPE_OUTER_IPV6	= 1
+};
+
+enum iavf_rx_ptype_outer_fragmented {
+	IAVF_RX_PTYPE_NOT_FRAG	= 0,
+	IAVF_RX_PTYPE_FRAG	= 1
+};
+
+enum iavf_rx_ptype_tunnel_type {
+	IAVF_RX_PTYPE_TUNNEL_NONE		= 0,
+	IAVF_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum iavf_rx_ptype_tunnel_end_prot {
+	IAVF_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	IAVF_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	IAVF_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum iavf_rx_ptype_inner_prot {
+	IAVF_RX_PTYPE_INNER_PROT_NONE		= 0,
+	IAVF_RX_PTYPE_INNER_PROT_UDP		= 1,
+	IAVF_RX_PTYPE_INNER_PROT_TCP		= 2,
+	IAVF_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	IAVF_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
+};
+
+enum iavf_rx_ptype_payload_layer {
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+#define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT	38
+#define IAVF_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
+					 IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
+
+#define IAVF_RXD_QW1_LENGTH_HBUF_SHIFT	52
+#define IAVF_RXD_QW1_LENGTH_HBUF_MASK	(0x7FFULL << \
+					 IAVF_RXD_QW1_LENGTH_HBUF_SHIFT)
+
+#define IAVF_RXD_QW1_LENGTH_SPH_SHIFT	63
+#define IAVF_RXD_QW1_LENGTH_SPH_MASK	BIT_ULL(IAVF_RXD_QW1_LENGTH_SPH_SHIFT)
+
+enum iavf_rx_desc_ext_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT	= 0,
+	IAVF_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
+	IAVF_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
+	IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
+	IAVF_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
+	IAVF_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
+	IAVF_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
+};
+
+enum iavf_rx_desc_pe_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_PE_STATUS_QPID_SHIFT	= 0, /* 18 BITS */
+	IAVF_RX_DESC_PE_STATUS_L4PORT_SHIFT	= 0, /* 16 BITS */
+	IAVF_RX_DESC_PE_STATUS_IPINDEX_SHIFT	= 16, /* 8 BITS */
+	IAVF_RX_DESC_PE_STATUS_QPIDHIT_SHIFT	= 24,
+	IAVF_RX_DESC_PE_STATUS_APBVTHIT_SHIFT	= 25,
+	IAVF_RX_DESC_PE_STATUS_PORTV_SHIFT	= 26,
+	IAVF_RX_DESC_PE_STATUS_URG_SHIFT	= 27,
+	IAVF_RX_DESC_PE_STATUS_IPFRAG_SHIFT	= 28,
+	IAVF_RX_DESC_PE_STATUS_IPOPT_SHIFT	= 29
+};
+
+#define IAVF_RX_PROG_STATUS_DESC_LENGTH_SHIFT		38
+#define IAVF_RX_PROG_STATUS_DESC_LENGTH			0x2000000
+
+#define IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT	2
+#define IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_MASK	(0x7UL << \
+				IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
+
+#define IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT	19
+#define IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_MASK		(0x3FUL << \
+				IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
+
+enum iavf_rx_prog_status_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_PROG_STATUS_DESC_DD_SHIFT	= 0,
+	IAVF_RX_PROG_STATUS_DESC_PROG_ID_SHIFT	= 2 /* 3 BITS */
+};
+
+enum iavf_rx_prog_status_desc_prog_id_masks {
+	IAVF_RX_PROG_STATUS_DESC_FD_FILTER_STATUS	= 1,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS	= 2,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS	= 4,
+};
+
+enum iavf_rx_prog_status_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT	= 0,
+	IAVF_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT	= 1,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT	= 2,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT	= 3
+};
+
+/* TX Descriptor */
+struct iavf_tx_desc {
+	__le64 buffer_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+#define IAVF_TXD_QW1_DTYPE_SHIFT	0
+#define IAVF_TXD_QW1_DTYPE_MASK		(0xFUL << IAVF_TXD_QW1_DTYPE_SHIFT)
+
+enum iavf_tx_desc_dtype_value {
+	IAVF_TX_DESC_DTYPE_DATA		= 0x0,
+	IAVF_TX_DESC_DTYPE_NOP		= 0x1, /* same as Context desc */
+	IAVF_TX_DESC_DTYPE_CONTEXT	= 0x1,
+	IAVF_TX_DESC_DTYPE_FCOE_CTX	= 0x2,
+	IAVF_TX_DESC_DTYPE_FILTER_PROG	= 0x8,
+	IAVF_TX_DESC_DTYPE_DDP_CTX	= 0x9,
+	IAVF_TX_DESC_DTYPE_FLEX_DATA	= 0xB,
+	IAVF_TX_DESC_DTYPE_FLEX_CTX_1	= 0xC,
+	IAVF_TX_DESC_DTYPE_FLEX_CTX_2	= 0xD,
+	IAVF_TX_DESC_DTYPE_DESC_DONE	= 0xF
+};
+
+#define IAVF_TXD_QW1_CMD_SHIFT	4
+#define IAVF_TXD_QW1_CMD_MASK	(0x3FFUL << IAVF_TXD_QW1_CMD_SHIFT)
+
+enum iavf_tx_desc_cmd_bits {
+	IAVF_TX_DESC_CMD_EOP			= 0x0001,
+	IAVF_TX_DESC_CMD_RS			= 0x0002,
+	IAVF_TX_DESC_CMD_ICRC			= 0x0004,
+	IAVF_TX_DESC_CMD_IL2TAG1		= 0x0008,
+	IAVF_TX_DESC_CMD_DUMMY			= 0x0010,
+	IAVF_TX_DESC_CMD_IIPT_NONIP		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
+	IAVF_TX_DESC_CMD_FCOET			= 0x0080,
+	IAVF_TX_DESC_CMD_L4T_EOFT_UNK		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_N		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_T		= 0x0100, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_NI	= 0x0200, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_A		= 0x0300, /* 2 BITS */
+};
+
+#define IAVF_TXD_QW1_OFFSET_SHIFT	16
+#define IAVF_TXD_QW1_OFFSET_MASK	(0x3FFFFULL << \
+					 IAVF_TXD_QW1_OFFSET_SHIFT)
+
+enum iavf_tx_desc_length_fields {
+	/* Note: These are predefined bit offsets */
+	IAVF_TX_DESC_LENGTH_MACLEN_SHIFT	= 0, /* 7 BITS */
+	IAVF_TX_DESC_LENGTH_IPLEN_SHIFT		= 7, /* 7 BITS */
+	IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT	= 14 /* 4 BITS */
+};
+
+#define IAVF_TXD_QW1_TX_BUF_SZ_SHIFT	34
+#define IAVF_TXD_QW1_TX_BUF_SZ_MASK	(0x3FFFULL << \
+					 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
+
+#define IAVF_TXD_QW1_L2TAG1_SHIFT	48
+#define IAVF_TXD_QW1_L2TAG1_MASK	(0xFFFFULL << IAVF_TXD_QW1_L2TAG1_SHIFT)
+
+/* Context descriptors */
+struct iavf_tx_context_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 type_cmd_tso_mss;
+};
+
+#define IAVF_TXD_CTX_QW1_CMD_SHIFT	4
+#define IAVF_TXD_CTX_QW1_CMD_MASK	(0xFFFFUL << IAVF_TXD_CTX_QW1_CMD_SHIFT)
+
+enum iavf_tx_ctx_desc_cmd_bits {
+	IAVF_TX_CTX_DESC_TSO		= 0x01,
+	IAVF_TX_CTX_DESC_TSYN		= 0x02,
+	IAVF_TX_CTX_DESC_IL2TAG2	= 0x04,
+	IAVF_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	IAVF_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	IAVF_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	IAVF_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	IAVF_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	IAVF_TX_CTX_DESC_SWPE		= 0x40
+};
+
+/* Packet Classifier Types for filters */
+enum iavf_filter_pctype {
+	/* Note: Values 0-28 are reserved for future use.
+	 * Value 29, 30, 32 are not supported on XL710 and X710.
+	 */
+	IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
+	IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK	= 32,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
+	IAVF_FILTER_PCTYPE_FRAG_IPV4			= 36,
+	/* Note: Values 37-38 are reserved for future use.
+	 * Value 39, 40, 42 are not supported on XL710 and X710.
+	 */
+	IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
+	IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK	= 42,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_SCTP		= 44,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_OTHER		= 45,
+	IAVF_FILTER_PCTYPE_FRAG_IPV6			= 46,
+	/* Note: Value 47 is reserved for future use */
+	IAVF_FILTER_PCTYPE_FCOE_OX			= 48,
+	IAVF_FILTER_PCTYPE_FCOE_RX			= 49,
+	IAVF_FILTER_PCTYPE_FCOE_OTHER			= 50,
+	/* Note: Values 51-62 are reserved for future use */
+	IAVF_FILTER_PCTYPE_L2_PAYLOAD			= 63,
+};
+
+#define IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT	30
+#define IAVF_TXD_CTX_QW1_TSO_LEN_MASK	(0x3FFFFULL << \
+					 IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW1_MSS_SHIFT	50
+#define IAVF_TXD_CTX_QW1_MSS_MASK	(0x3FFFULL << \
+					 IAVF_TXD_CTX_QW1_MSS_SHIFT)
+
+#define IAVF_TXD_CTX_QW1_VSI_SHIFT	50
+#define IAVF_TXD_CTX_QW1_VSI_MASK	(0x1FFULL << IAVF_TXD_CTX_QW1_VSI_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_EXT_IP_SHIFT	0
+#define IAVF_TXD_CTX_QW0_EXT_IP_MASK	(0x3ULL << \
+					 IAVF_TXD_CTX_QW0_EXT_IP_SHIFT)
+
+enum iavf_tx_ctx_desc_eipt_offload {
+	IAVF_TX_CTX_EXT_IP_NONE		= 0x0,
+	IAVF_TX_CTX_EXT_IP_IPV6		= 0x1,
+	IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM	= 0x2,
+	IAVF_TX_CTX_EXT_IP_IPV4		= 0x3
+};
+
+#define IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT	2
+#define IAVF_TXD_CTX_QW0_EXT_IPLEN_MASK	(0x3FULL << \
+					 IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_NATT_SHIFT	9
+#define IAVF_TXD_CTX_QW0_NATT_MASK	(0x3ULL << IAVF_TXD_CTX_QW0_NATT_SHIFT)
+
+#define IAVF_TXD_CTX_UDP_TUNNELING	BIT_ULL(IAVF_TXD_CTX_QW0_NATT_SHIFT)
+#define IAVF_TXD_CTX_GRE_TUNNELING	(0x2ULL << IAVF_TXD_CTX_QW0_NATT_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_EIP_NOINC_SHIFT	11
+#define IAVF_TXD_CTX_QW0_EIP_NOINC_MASK \
+				       BIT_ULL(IAVF_TXD_CTX_QW0_EIP_NOINC_SHIFT)
+
+#define IAVF_TXD_CTX_EIP_NOINC_IPID_CONST	IAVF_TXD_CTX_QW0_EIP_NOINC_MASK
+
+#define IAVF_TXD_CTX_QW0_NATLEN_SHIFT	12
+#define IAVF_TXD_CTX_QW0_NATLEN_MASK	(0X7FULL << \
+					 IAVF_TXD_CTX_QW0_NATLEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_DECTTL_SHIFT	19
+#define IAVF_TXD_CTX_QW0_DECTTL_MASK	(0xFULL << \
+					 IAVF_TXD_CTX_QW0_DECTTL_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_L4T_CS_SHIFT	23
+#define IAVF_TXD_CTX_QW0_L4T_CS_MASK	BIT_ULL(IAVF_TXD_CTX_QW0_L4T_CS_SHIFT)
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct iavf_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+#endif /* _IAVF_TYPE_H_ */

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
similarity index 68%
rename from drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
rename to drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 565677d..c46770e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c

@@ -1,16 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
-#include "i40evf.h"
-#include "i40e_prototype.h"
-#include "i40evf_client.h"
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
 
 /* busy wait delay in msec */
-#define I40EVF_BUSY_WAIT_DELAY 10
-#define I40EVF_BUSY_WAIT_COUNT 50
+#define IAVF_BUSY_WAIT_DELAY 10
+#define IAVF_BUSY_WAIT_COUNT 50
 
 /**
- * i40evf_send_pf_msg
+ * iavf_send_pf_msg
  * @adapter: adapter structure
  * @op: virtual channel opcode
  * @msg: pointer to message buffer
@@ -18,60 +18,60 @@
  *
  * Send message to PF and print status if failure.
  **/
-static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
-			      enum virtchnl_ops op, u8 *msg, u16 len)
+static int iavf_send_pf_msg(struct iavf_adapter *adapter,
+			    enum virtchnl_ops op, u8 *msg, u16 len)
 {
-	struct i40e_hw *hw = &adapter->hw;
-	i40e_status err;
+	struct iavf_hw *hw = &adapter->hw;
+	enum iavf_status err;
 
-	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
 		return 0; /* nothing to see here, move along */
 
-	err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
+	err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
 	if (err)
 		dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n",
-			op, i40evf_stat_str(hw, err),
-			i40evf_aq_str(hw, hw->aq.asq_last_status));
+			op, iavf_stat_str(hw, err),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
 	return err;
 }
 
 /**
- * i40evf_send_api_ver
+ * iavf_send_api_ver
  * @adapter: adapter structure
  *
  * Send API version admin queue message to the PF. The reply is not checked
  * in this function. Returns 0 if the message was successfully
- * sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not.
+ * sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
  **/
-int i40evf_send_api_ver(struct i40evf_adapter *adapter)
+int iavf_send_api_ver(struct iavf_adapter *adapter)
 {
 	struct virtchnl_version_info vvi;
 
 	vvi.major = VIRTCHNL_VERSION_MAJOR;
 	vvi.minor = VIRTCHNL_VERSION_MINOR;
 
-	return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi,
-				  sizeof(vvi));
+	return iavf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi,
+				sizeof(vvi));
 }
 
 /**
- * i40evf_verify_api_ver
+ * iavf_verify_api_ver
  * @adapter: adapter structure
  *
  * Compare API versions with the PF. Must be called after admin queue is
  * initialized. Returns 0 if API versions match, -EIO if they do not,
- * I40E_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors
+ * IAVF_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors
  * from the firmware are propagated.
  **/
-int i40evf_verify_api_ver(struct i40evf_adapter *adapter)
+int iavf_verify_api_ver(struct iavf_adapter *adapter)
 {
 	struct virtchnl_version_info *pf_vvi;
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
 	enum virtchnl_ops op;
-	i40e_status err;
+	enum iavf_status err;
 
-	event.buf_len = I40EVF_MAX_AQ_BUF_SIZE;
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
 	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
 	if (!event.msg_buf) {
 		err = -ENOMEM;
@@ -79,8 +79,8 @@
 	}
 
 	while (1) {
-		err = i40evf_clean_arq_element(hw, &event, NULL);
-		/* When the AQ is empty, i40evf_clean_arq_element will return
+		err = iavf_clean_arq_element(hw, &event, NULL);
+		/* When the AQ is empty, iavf_clean_arq_element will return
 		 * nonzero and this loop will terminate.
 		 */
 		if (err)
@@ -92,7 +92,7 @@
 	}
 
 
-	err = (i40e_status)le32_to_cpu(event.desc.cookie_low);
+	err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
 	if (err)
 		goto out_alloc;
 
@@ -118,14 +118,14 @@
 }
 
 /**
- * i40evf_send_vf_config_msg
+ * iavf_send_vf_config_msg
  * @adapter: adapter structure
  *
  * Send VF configuration request admin queue message to the PF. The reply
  * is not checked in this function. Returns 0 if the message was
- * successfully sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not.
+ * successfully sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
  **/
-int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
+int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 {
 	u32 caps;
 
@@ -142,19 +142,43 @@
 	       VIRTCHNL_VF_OFFLOAD_ADQ;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG;
 	if (PF_IS_V11(adapter))
-		return i40evf_send_pf_msg(adapter,
-					  VIRTCHNL_OP_GET_VF_RESOURCES,
-					  (u8 *)&caps, sizeof(caps));
+		return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES,
+					(u8 *)&caps, sizeof(caps));
 	else
-		return i40evf_send_pf_msg(adapter,
-					  VIRTCHNL_OP_GET_VF_RESOURCES,
-					  NULL, 0);
+		return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES,
+					NULL, 0);
 }
 
 /**
- * i40evf_get_vf_config
+ * iavf_validate_num_queues
+ * @adapter: adapter structure
+ *
+ * Validate that the number of queues the PF has sent in
+ * VIRTCHNL_OP_GET_VF_RESOURCES is not larger than the VF can handle.
+ **/
+static void iavf_validate_num_queues(struct iavf_adapter *adapter)
+{
+	if (adapter->vf_res->num_queue_pairs > IAVF_MAX_REQ_QUEUES) {
+		struct virtchnl_vsi_resource *vsi_res;
+		int i;
+
+		dev_info(&adapter->pdev->dev, "Received %d queues, but can only have a max of %d\n",
+			 adapter->vf_res->num_queue_pairs,
+			 IAVF_MAX_REQ_QUEUES);
+		dev_info(&adapter->pdev->dev, "Fixing by reducing queues to %d\n",
+			 IAVF_MAX_REQ_QUEUES);
+		adapter->vf_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES;
+		for (i = 0; i < adapter->vf_res->num_vsis; i++) {
+			vsi_res = &adapter->vf_res->vsi_res[i];
+			vsi_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES;
+		}
+	}
+}
+
+/**
+ * iavf_get_vf_config
  * @adapter: private adapter structure
  *
  * Get VF configuration from PF and populate hw structure. Must be called after
@@ -162,16 +186,16 @@
  * with maximum timeout. Response from PF is returned in the buffer for further
  * processing by the caller.
  **/
-int i40evf_get_vf_config(struct i40evf_adapter *adapter)
+int iavf_get_vf_config(struct iavf_adapter *adapter)
 {
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40e_arq_event_info event;
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
 	enum virtchnl_ops op;
-	i40e_status err;
+	enum iavf_status err;
 	u16 len;
 
 	len =  sizeof(struct virtchnl_vf_resource) +
-		I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource);
+		IAVF_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource);
 	event.buf_len = len;
 	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
 	if (!event.msg_buf) {
@@ -180,10 +204,10 @@
 	}
 
 	while (1) {
-		/* When the AQ is empty, i40evf_clean_arq_element will return
+		/* When the AQ is empty, iavf_clean_arq_element will return
 		 * nonzero and this loop will terminate.
 		 */
-		err = i40evf_clean_arq_element(hw, &event, NULL);
+		err = iavf_clean_arq_element(hw, &event, NULL);
 		if (err)
 			goto out_alloc;
 		op =
@@ -192,10 +216,15 @@
 			break;
 	}
 
-	err = (i40e_status)le32_to_cpu(event.desc.cookie_low);
+	err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
 	memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len));
 
-	i40e_vf_parse_hw_config(hw, adapter->vf_res);
+	/* some PFs send more queues than we should have so validate that
+	 * we aren't getting too many queues
+	 */
+	if (!err)
+		iavf_validate_num_queues(adapter);
+	iavf_vf_parse_hw_config(hw, adapter->vf_res);
 out_alloc:
 	kfree(event.msg_buf);
 out:
@@ -203,17 +232,18 @@
 }
 
 /**
- * i40evf_configure_queues
+ * iavf_configure_queues
  * @adapter: adapter structure
  *
  * Request that the PF set up our (previously allocated) queues.
  **/
-void i40evf_configure_queues(struct i40evf_adapter *adapter)
+void iavf_configure_queues(struct iavf_adapter *adapter)
 {
 	struct virtchnl_vsi_queue_config_info *vqci;
 	struct virtchnl_queue_pair_info *vqpi;
 	int pairs = adapter->num_active_queues;
-	int i, len, max_frame = I40E_MAX_RXBUFFER;
+	int i, max_frame = IAVF_MAX_RXBUFFER;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -222,16 +252,15 @@
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
-	len = sizeof(struct virtchnl_vsi_queue_config_info) +
-		       (sizeof(struct virtchnl_queue_pair_info) * pairs);
+	len = struct_size(vqci, qpair, pairs);
 	vqci = kzalloc(len, GFP_KERNEL);
 	if (!vqci)
 		return;
 
 	/* Limit maximum frame size when jumbo frames is not enabled */
-	if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX) &&
+	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) &&
 	    (adapter->netdev->mtu <= ETH_DATA_LEN))
-		max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+		max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
 
 	vqci->vsi_id = adapter->vsi_res->vsi_id;
 	vqci->num_queue_pairs = pairs;
@@ -251,23 +280,23 @@
 		vqpi->rxq.max_pkt_size = max_frame;
 		vqpi->rxq.databuffer_size =
 			ALIGN(adapter->rx_rings[i].rx_buf_len,
-			      BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
+			      BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT));
 		vqpi++;
 	}
 
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_QUEUES;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
-			   (u8 *)vqci, len);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+			 (u8 *)vqci, len);
 	kfree(vqci);
 }
 
 /**
- * i40evf_enable_queues
+ * iavf_enable_queues
  * @adapter: adapter structure
  *
  * Request that the PF enable all of our queues.
  **/
-void i40evf_enable_queues(struct i40evf_adapter *adapter)
+void iavf_enable_queues(struct iavf_adapter *adapter)
 {
 	struct virtchnl_queue_select vqs;
 
@@ -281,18 +310,18 @@
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
 	vqs.rx_queues = vqs.tx_queues;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_QUEUES;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
-			   (u8 *)&vqs, sizeof(vqs));
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
+			 (u8 *)&vqs, sizeof(vqs));
 }
 
 /**
- * i40evf_disable_queues
+ * iavf_disable_queues
  * @adapter: adapter structure
  *
  * Request that the PF disable all of our queues.
  **/
-void i40evf_disable_queues(struct i40evf_adapter *adapter)
+void iavf_disable_queues(struct iavf_adapter *adapter)
 {
 	struct virtchnl_queue_select vqs;
 
@@ -306,24 +335,25 @@
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
 	vqs.rx_queues = vqs.tx_queues;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_QUEUES;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES,
-			   (u8 *)&vqs, sizeof(vqs));
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES,
+			 (u8 *)&vqs, sizeof(vqs));
 }
 
 /**
- * i40evf_map_queues
+ * iavf_map_queues
  * @adapter: adapter structure
  *
  * Request that the PF map queues to interrupt vectors. Misc causes, including
  * admin queue, are always mapped to vector 0.
  **/
-void i40evf_map_queues(struct i40evf_adapter *adapter)
+void iavf_map_queues(struct iavf_adapter *adapter)
 {
 	struct virtchnl_irq_map_info *vimi;
 	struct virtchnl_vector_map *vecmap;
-	int v_idx, q_vectors, len;
-	struct i40e_q_vector *q_vector;
+	struct iavf_q_vector *q_vector;
+	int v_idx, q_vectors;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -335,9 +365,7 @@
 
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-	len = sizeof(struct virtchnl_irq_map_info) +
-	      (adapter->num_msix_vectors *
-		sizeof(struct virtchnl_vector_map));
+	len = struct_size(vimi, vecmap, adapter->num_msix_vectors);
 	vimi = kzalloc(len, GFP_KERNEL);
 	if (!vimi)
 		return;
@@ -352,8 +380,8 @@
 		vecmap->vector_id = v_idx + NONQ_VECS;
 		vecmap->txq_map = q_vector->ring_mask;
 		vecmap->rxq_map = q_vector->ring_mask;
-		vecmap->rxitr_idx = I40E_RX_ITR;
-		vecmap->txitr_idx = I40E_TX_ITR;
+		vecmap->rxitr_idx = IAVF_RX_ITR;
+		vecmap->txitr_idx = IAVF_TX_ITR;
 	}
 	/* Misc vector last - this is only for AdminQ messages */
 	vecmap = &vimi->vecmap[v_idx];
@@ -362,21 +390,21 @@
 	vecmap->txq_map = 0;
 	vecmap->rxq_map = 0;
 
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
-			   (u8 *)vimi, len);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_MAP_VECTORS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+			 (u8 *)vimi, len);
 	kfree(vimi);
 }
 
 /**
- * i40evf_request_queues
+ * iavf_request_queues
  * @adapter: adapter structure
  * @num: number of requested queues
  *
  * We get a default number of queues from the PF.  This enables us to request a
  * different number.  Returns 0 on success, negative on failure
  **/
-int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
+int iavf_request_queues(struct iavf_adapter *adapter, int num)
 {
 	struct virtchnl_vf_res_request vfres;
 
@@ -387,26 +415,27 @@
 		return -EBUSY;
 	}
 
-	vfres.num_queue_pairs = num;
+	vfres.num_queue_pairs = min_t(int, num, num_online_cpus());
 
 	adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES;
-	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
-	return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES,
-				  (u8 *)&vfres, sizeof(vfres));
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+	return iavf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES,
+				(u8 *)&vfres, sizeof(vfres));
 }
 
 /**
- * i40evf_add_ether_addrs
+ * iavf_add_ether_addrs
  * @adapter: adapter structure
  *
  * Request that the PF add one or more addresses to our filters.
  **/
-void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
+void iavf_add_ether_addrs(struct iavf_adapter *adapter)
 {
 	struct virtchnl_ether_addr_list *veal;
-	int len, i = 0, count = 0;
-	struct i40evf_mac_filter *f;
+	struct iavf_mac_filter *f;
+	int i = 0, count = 0;
 	bool more = false;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -422,21 +451,19 @@
 			count++;
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER;
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
 
-	len = sizeof(struct virtchnl_ether_addr_list) +
-	      (count * sizeof(struct virtchnl_ether_addr));
-	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
+	len = struct_size(veal, list, count);
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
-		count = (I40EVF_MAX_AQ_BUF_SIZE -
+		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_ether_addr_list)) /
 			sizeof(struct virtchnl_ether_addr);
-		len = sizeof(struct virtchnl_ether_addr_list) +
-		      (count * sizeof(struct virtchnl_ether_addr));
+		len = struct_size(veal, list, count);
 		more = true;
 	}
 
@@ -458,27 +485,27 @@
 		}
 	}
 	if (!more)
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER;
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR,
-			   (u8 *)veal, len);
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR, (u8 *)veal, len);
 	kfree(veal);
 }
 
 /**
- * i40evf_del_ether_addrs
+ * iavf_del_ether_addrs
  * @adapter: adapter structure
  *
  * Request that the PF remove one or more addresses from our filters.
  **/
-void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
+void iavf_del_ether_addrs(struct iavf_adapter *adapter)
 {
 	struct virtchnl_ether_addr_list *veal;
-	struct i40evf_mac_filter *f, *ftmp;
-	int len, i = 0, count = 0;
+	struct iavf_mac_filter *f, *ftmp;
+	int i = 0, count = 0;
 	bool more = false;
+	size_t len;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -494,21 +521,19 @@
 			count++;
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER;
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
 
-	len = sizeof(struct virtchnl_ether_addr_list) +
-	      (count * sizeof(struct virtchnl_ether_addr));
-	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
+	len = struct_size(veal, list, count);
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
-		count = (I40EVF_MAX_AQ_BUF_SIZE -
+		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_ether_addr_list)) /
 			sizeof(struct virtchnl_ether_addr);
-		len = sizeof(struct virtchnl_ether_addr_list) +
-		      (count * sizeof(struct virtchnl_ether_addr));
+		len = struct_size(veal, list, count);
 		more = true;
 	}
 	veal = kzalloc(len, GFP_ATOMIC);
@@ -530,26 +555,25 @@
 		}
 	}
 	if (!more)
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER;
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR,
-			   (u8 *)veal, len);
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR, (u8 *)veal, len);
 	kfree(veal);
 }
 
 /**
- * i40evf_add_vlans
+ * iavf_add_vlans
  * @adapter: adapter structure
  *
  * Request that the PF add one or more VLAN filters to our VSI.
  **/
-void i40evf_add_vlans(struct i40evf_adapter *adapter)
+void iavf_add_vlans(struct iavf_adapter *adapter)
 {
 	struct virtchnl_vlan_filter_list *vvfl;
 	int len, i = 0, count = 0;
-	struct i40evf_vlan_filter *f;
+	struct iavf_vlan_filter *f;
 	bool more = false;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
@@ -566,7 +590,7 @@
 			count++;
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
 	}
@@ -574,9 +598,9 @@
 
 	len = sizeof(struct virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
-	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n");
-		count = (I40EVF_MAX_AQ_BUF_SIZE -
+		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_vlan_filter_list)) /
 			sizeof(u16);
 		len = sizeof(struct virtchnl_vlan_filter_list) +
@@ -601,24 +625,24 @@
 		}
 	}
 	if (!more)
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
 
 /**
- * i40evf_del_vlans
+ * iavf_del_vlans
  * @adapter: adapter structure
  *
  * Request that the PF remove one or more VLAN filters from our VSI.
  **/
-void i40evf_del_vlans(struct i40evf_adapter *adapter)
+void iavf_del_vlans(struct iavf_adapter *adapter)
 {
 	struct virtchnl_vlan_filter_list *vvfl;
-	struct i40evf_vlan_filter *f, *ftmp;
+	struct iavf_vlan_filter *f, *ftmp;
 	int len, i = 0, count = 0;
 	bool more = false;
 
@@ -636,7 +660,7 @@
 			count++;
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
 	}
@@ -644,9 +668,9 @@
 
 	len = sizeof(struct virtchnl_vlan_filter_list) +
 	      (count * sizeof(u16));
-	if (len > I40EVF_MAX_AQ_BUF_SIZE) {
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
 		dev_warn(&adapter->pdev->dev, "Too many delete VLAN changes in one request\n");
-		count = (I40EVF_MAX_AQ_BUF_SIZE -
+		count = (IAVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct virtchnl_vlan_filter_list)) /
 			sizeof(u16);
 		len = sizeof(struct virtchnl_vlan_filter_list) +
@@ -672,22 +696,22 @@
 		}
 	}
 	if (!more)
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
 
 /**
- * i40evf_set_promiscuous
+ * iavf_set_promiscuous
  * @adapter: adapter structure
  * @flags: bitmask to control unicast/multicast promiscuous.
  *
  * Request that the PF enable promiscuous mode for our VSI.
  **/
-void i40evf_set_promiscuous(struct i40evf_adapter *adapter, int flags)
+void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags)
 {
 	struct virtchnl_promisc_info vpi;
 	int promisc_all;
@@ -702,39 +726,39 @@
 	promisc_all = FLAG_VF_UNICAST_PROMISC |
 		      FLAG_VF_MULTICAST_PROMISC;
 	if ((flags & promisc_all) == promisc_all) {
-		adapter->flags |= I40EVF_FLAG_PROMISC_ON;
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_REQUEST_PROMISC;
+		adapter->flags |= IAVF_FLAG_PROMISC_ON;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC;
 		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
 	}
 
 	if (flags & FLAG_VF_MULTICAST_PROMISC) {
-		adapter->flags |= I40EVF_FLAG_ALLMULTI_ON;
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_REQUEST_ALLMULTI;
+		adapter->flags |= IAVF_FLAG_ALLMULTI_ON;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI;
 		dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n");
 	}
 
 	if (!flags) {
-		adapter->flags &= ~(I40EVF_FLAG_PROMISC_ON |
-				    I40EVF_FLAG_ALLMULTI_ON);
-		adapter->aq_required &= ~(I40EVF_FLAG_AQ_RELEASE_PROMISC |
-					  I40EVF_FLAG_AQ_RELEASE_ALLMULTI);
+		adapter->flags &= ~(IAVF_FLAG_PROMISC_ON |
+				    IAVF_FLAG_ALLMULTI_ON);
+		adapter->aq_required &= ~(IAVF_FLAG_AQ_RELEASE_PROMISC |
+					  IAVF_FLAG_AQ_RELEASE_ALLMULTI);
 		dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
 	}
 
 	adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
 	vpi.vsi_id = adapter->vsi_res->vsi_id;
 	vpi.flags = flags;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
-			   (u8 *)&vpi, sizeof(vpi));
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+			 (u8 *)&vpi, sizeof(vpi));
 }
 
 /**
- * i40evf_request_stats
+ * iavf_request_stats
  * @adapter: adapter structure
  *
  * Request VSI statistics from PF.
  **/
-void i40evf_request_stats(struct i40evf_adapter *adapter)
+void iavf_request_stats(struct iavf_adapter *adapter)
 {
 	struct virtchnl_queue_select vqs;
 
@@ -745,19 +769,19 @@
 	adapter->current_op = VIRTCHNL_OP_GET_STATS;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	/* queue maps are ignored for this message - only the vsi is used */
-	if (i40evf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS,
-			       (u8 *)&vqs, sizeof(vqs)))
+	if (iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS, (u8 *)&vqs,
+			     sizeof(vqs)))
 		/* if the request failed, don't lock out others */
 		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 }
 
 /**
- * i40evf_get_hena
+ * iavf_get_hena
  * @adapter: adapter structure
  *
  * Request hash enable capabilities from PF
  **/
-void i40evf_get_hena(struct i40evf_adapter *adapter)
+void iavf_get_hena(struct iavf_adapter *adapter)
 {
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -766,18 +790,17 @@
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_GET_RSS_HENA_CAPS;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_HENA;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS,
-			   NULL, 0);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_GET_HENA;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS, NULL, 0);
 }
 
 /**
- * i40evf_set_hena
+ * iavf_set_hena
  * @adapter: adapter structure
  *
  * Request the PF to set our RSS hash capabilities
  **/
-void i40evf_set_hena(struct i40evf_adapter *adapter)
+void iavf_set_hena(struct iavf_adapter *adapter)
 {
 	struct virtchnl_rss_hena vrh;
 
@@ -789,18 +812,18 @@
 	}
 	vrh.hena = adapter->hena;
 	adapter->current_op = VIRTCHNL_OP_SET_RSS_HENA;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_HENA;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA,
-			   (u8 *)&vrh, sizeof(vrh));
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_HENA;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA, (u8 *)&vrh,
+			 sizeof(vrh));
 }
 
 /**
- * i40evf_set_rss_key
+ * iavf_set_rss_key
  * @adapter: adapter structure
  *
  * Request the PF to set our RSS hash key
  **/
-void i40evf_set_rss_key(struct i40evf_adapter *adapter)
+void iavf_set_rss_key(struct iavf_adapter *adapter)
 {
 	struct virtchnl_rss_key *vrk;
 	int len;
@@ -821,19 +844,18 @@
 	memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size);
 
 	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_KEY;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_KEY;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY,
-			   (u8 *)vrk, len);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_KEY;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY, (u8 *)vrk, len);
 	kfree(vrk);
 }
 
 /**
- * i40evf_set_rss_lut
+ * iavf_set_rss_lut
  * @adapter: adapter structure
  *
  * Request the PF to set our RSS lookup table
  **/
-void i40evf_set_rss_lut(struct i40evf_adapter *adapter)
+void iavf_set_rss_lut(struct iavf_adapter *adapter)
 {
 	struct virtchnl_rss_lut *vrl;
 	int len;
@@ -853,19 +875,18 @@
 	vrl->lut_entries = adapter->rss_lut_size;
 	memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size);
 	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_LUT;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_SET_RSS_LUT;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT,
-			   (u8 *)vrl, len);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_LUT;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT, (u8 *)vrl, len);
 	kfree(vrl);
 }
 
 /**
- * i40evf_enable_vlan_stripping
+ * iavf_enable_vlan_stripping
  * @adapter: adapter structure
  *
  * Request VLAN header stripping to be enabled
  **/
-void i40evf_enable_vlan_stripping(struct i40evf_adapter *adapter)
+void iavf_enable_vlan_stripping(struct iavf_adapter *adapter)
 {
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -874,18 +895,17 @@
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
-			   NULL, 0);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, NULL, 0);
 }
 
 /**
- * i40evf_disable_vlan_stripping
+ * iavf_disable_vlan_stripping
  * @adapter: adapter structure
  *
  * Request VLAN header stripping to be disabled
  **/
-void i40evf_disable_vlan_stripping(struct i40evf_adapter *adapter)
+void iavf_disable_vlan_stripping(struct iavf_adapter *adapter)
 {
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -894,18 +914,17 @@
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
-			   NULL, 0);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, NULL, 0);
 }
 
 /**
- * i40evf_print_link_message - print link up or down
+ * iavf_print_link_message - print link up or down
  * @adapter: adapter structure
  *
  * Log a message telling the world of our wonderous link status
  */
-static void i40evf_print_link_message(struct i40evf_adapter *adapter)
+static void iavf_print_link_message(struct iavf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	char *speed = "Unknown ";
@@ -916,22 +935,22 @@
 	}
 
 	switch (adapter->link_speed) {
-	case I40E_LINK_SPEED_40GB:
+	case IAVF_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
-	case I40E_LINK_SPEED_25GB:
+	case IAVF_LINK_SPEED_25GB:
 		speed = "25 G";
 		break;
-	case I40E_LINK_SPEED_20GB:
+	case IAVF_LINK_SPEED_20GB:
 		speed = "20 G";
 		break;
-	case I40E_LINK_SPEED_10GB:
+	case IAVF_LINK_SPEED_10GB:
 		speed = "10 G";
 		break;
-	case I40E_LINK_SPEED_1GB:
+	case IAVF_LINK_SPEED_1GB:
 		speed = "1000 M";
 		break;
-	case I40E_LINK_SPEED_100MB:
+	case IAVF_LINK_SPEED_100MB:
 		speed = "100 M";
 		break;
 	default:
@@ -942,16 +961,16 @@
 }
 
 /**
- * i40evf_enable_channel
+ * iavf_enable_channel
  * @adapter: adapter structure
  *
  * Request that the PF enable channels as specified by
  * the user via tc tool.
  **/
-void i40evf_enable_channels(struct i40evf_adapter *adapter)
+void iavf_enable_channels(struct iavf_adapter *adapter)
 {
 	struct virtchnl_tc_info *vti = NULL;
-	u16 len;
+	size_t len;
 	int i;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
@@ -961,9 +980,7 @@
 		return;
 	}
 
-	len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
-	       sizeof(struct virtchnl_tc_info);
-
+	len = struct_size(vti, list, adapter->num_tc - 1);
 	vti = kzalloc(len, GFP_KERNEL);
 	if (!vti)
 		return;
@@ -976,22 +993,21 @@
 				adapter->ch_config.ch_info[i].max_tx_rate;
 	}
 
-	adapter->ch_config.state = __I40EVF_TC_RUNNING;
-	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->ch_config.state = __IAVF_TC_RUNNING;
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
 	adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
-			   (u8 *)vti, len);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_CHANNELS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, (u8 *)vti, len);
 	kfree(vti);
 }
 
 /**
- * i40evf_disable_channel
+ * iavf_disable_channel
  * @adapter: adapter structure
  *
  * Request that the PF disable channels that are configured
  **/
-void i40evf_disable_channels(struct i40evf_adapter *adapter)
+void iavf_disable_channels(struct iavf_adapter *adapter)
 {
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -1000,23 +1016,22 @@
 		return;
 	}
 
-	adapter->ch_config.state = __I40EVF_TC_INVALID;
-	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->ch_config.state = __IAVF_TC_INVALID;
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
 	adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
-			   NULL, 0);
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_CHANNELS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, NULL, 0);
 }
 
 /**
- * i40evf_print_cloud_filter
+ * iavf_print_cloud_filter
  * @adapter: adapter structure
  * @f: cloud filter to print
  *
  * Print the cloud filter
  **/
-static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
-				      struct virtchnl_filter *f)
+static void iavf_print_cloud_filter(struct iavf_adapter *adapter,
+				    struct virtchnl_filter *f)
 {
 	switch (f->flow_type) {
 	case VIRTCHNL_TCP_V4_FLOW:
@@ -1043,15 +1058,15 @@
 }
 
 /**
- * i40evf_add_cloud_filter
+ * iavf_add_cloud_filter
  * @adapter: adapter structure
  *
  * Request that the PF add cloud filters as specified
  * by the user via tc tool.
  **/
-void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+void iavf_add_cloud_filter(struct iavf_adapter *adapter)
 {
-	struct i40evf_cloud_filter *cf;
+	struct iavf_cloud_filter *cf;
 	struct virtchnl_filter *f;
 	int len = 0, count = 0;
 
@@ -1068,7 +1083,7 @@
 		}
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
@@ -1082,25 +1097,24 @@
 		if (cf->add) {
 			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
 			cf->add = false;
-			cf->state = __I40EVF_CF_ADD_PENDING;
-			i40evf_send_pf_msg(adapter,
-					   VIRTCHNL_OP_ADD_CLOUD_FILTER,
-					   (u8 *)f, len);
+			cf->state = __IAVF_CF_ADD_PENDING;
+			iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+					 (u8 *)f, len);
 		}
 	}
 	kfree(f);
 }
 
 /**
- * i40evf_del_cloud_filter
+ * iavf_del_cloud_filter
  * @adapter: adapter structure
  *
  * Request that the PF delete cloud filters as specified
  * by the user via tc tool.
  **/
-void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+void iavf_del_cloud_filter(struct iavf_adapter *adapter)
 {
-	struct i40evf_cloud_filter *cf, *cftmp;
+	struct iavf_cloud_filter *cf, *cftmp;
 	struct virtchnl_filter *f;
 	int len = 0, count = 0;
 
@@ -1117,7 +1131,7 @@
 		}
 	}
 	if (!count) {
-		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
 		return;
 	}
 	adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
@@ -1131,30 +1145,29 @@
 		if (cf->del) {
 			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
 			cf->del = false;
-			cf->state = __I40EVF_CF_DEL_PENDING;
-			i40evf_send_pf_msg(adapter,
-					   VIRTCHNL_OP_DEL_CLOUD_FILTER,
-					   (u8 *)f, len);
+			cf->state = __IAVF_CF_DEL_PENDING;
+			iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+					 (u8 *)f, len);
 		}
 	}
 	kfree(f);
 }
 
 /**
- * i40evf_request_reset
+ * iavf_request_reset
  * @adapter: adapter structure
  *
  * Request that the PF reset this VF. No response is expected.
  **/
-void i40evf_request_reset(struct i40evf_adapter *adapter)
+void iavf_request_reset(struct iavf_adapter *adapter)
 {
 	/* Don't check CURRENT_OP - this is always higher priority */
-	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0);
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0);
 	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 }
 
 /**
- * i40evf_virtchnl_completion
+ * iavf_virtchnl_completion
  * @adapter: adapter structure
  * @v_opcode: opcode sent by PF
  * @v_retval: retval sent by PF
@@ -1165,10 +1178,9 @@
  * wait, we fire off our requests and assume that no errors will be returned.
  * This function handles the reply messages.
  **/
-void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
-				enum virtchnl_ops v_opcode,
-				i40e_status v_retval,
-				u8 *msg, u16 msglen)
+void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+			      enum virtchnl_ops v_opcode,
+			      enum iavf_status v_retval, u8 *msg, u16 msglen)
 {
 	struct net_device *netdev = adapter->netdev;
 
@@ -1176,6 +1188,7 @@
 		struct virtchnl_pf_event *vpe =
 			(struct virtchnl_pf_event *)msg;
 		bool link_up = vpe->event_data.link_event.link_status;
+
 		switch (vpe->event) {
 		case VIRTCHNL_EVENT_LINK_CHANGE:
 			adapter->link_speed =
@@ -1193,7 +1206,7 @@
 				 * after we enable queues and actually prepared
 				 * to send traffic.
 				 */
-				if (adapter->state != __I40EVF_RUNNING)
+				if (adapter->state != __IAVF_RUNNING)
 					break;
 
 				/* For ADq enabled VF, we reconfigure VSIs and
@@ -1201,7 +1214,7 @@
 				 * queues are enabled.
 				 */
 				if (adapter->flags &
-				    I40EVF_FLAG_QUEUES_DISABLED)
+				    IAVF_FLAG_QUEUES_DISABLED)
 					break;
 			}
 
@@ -1213,14 +1226,14 @@
 				netif_tx_stop_all_queues(netdev);
 				netif_carrier_off(netdev);
 			}
-			i40evf_print_link_message(adapter);
+			iavf_print_link_message(adapter);
 			break;
 		case VIRTCHNL_EVENT_RESET_IMPENDING:
 			dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
-			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
-				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+			if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
+				adapter->flags |= IAVF_FLAG_RESET_PENDING;
 				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
-				schedule_work(&adapter->reset_task);
+				queue_work(iavf_wq, &adapter->reset_task);
 			}
 			break;
 		default:
@@ -1234,48 +1247,50 @@
 		switch (v_opcode) {
 		case VIRTCHNL_OP_ADD_VLAN:
 			dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
+				iavf_stat_str(&adapter->hw, v_retval));
 			break;
 		case VIRTCHNL_OP_ADD_ETH_ADDR:
 			dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
+				iavf_stat_str(&adapter->hw, v_retval));
+			/* restore administratively set MAC address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
 			break;
 		case VIRTCHNL_OP_DEL_VLAN:
 			dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
+				iavf_stat_str(&adapter->hw, v_retval));
 			break;
 		case VIRTCHNL_OP_DEL_ETH_ADDR:
 			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
+				iavf_stat_str(&adapter->hw, v_retval));
 			break;
 		case VIRTCHNL_OP_ENABLE_CHANNELS:
 			dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
-			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
-			adapter->ch_config.state = __I40EVF_TC_INVALID;
+				iavf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __IAVF_TC_INVALID;
 			netdev_reset_tc(netdev);
 			netif_tx_start_all_queues(netdev);
 			break;
 		case VIRTCHNL_OP_DISABLE_CHANNELS:
 			dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
-				i40evf_stat_str(&adapter->hw, v_retval));
-			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
-			adapter->ch_config.state = __I40EVF_TC_RUNNING;
+				iavf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __IAVF_TC_RUNNING;
 			netif_tx_start_all_queues(netdev);
 			break;
 		case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
-			struct i40evf_cloud_filter *cf, *cftmp;
+			struct iavf_cloud_filter *cf, *cftmp;
 
 			list_for_each_entry_safe(cf, cftmp,
 						 &adapter->cloud_filter_list,
 						 list) {
-				if (cf->state == __I40EVF_CF_ADD_PENDING) {
-					cf->state = __I40EVF_CF_INVALID;
+				if (cf->state == __IAVF_CF_ADD_PENDING) {
+					cf->state = __IAVF_CF_INVALID;
 					dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
-						 i40evf_stat_str(&adapter->hw,
-								 v_retval));
-					i40evf_print_cloud_filter(adapter,
-								  &cf->f);
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_cloud_filter(adapter,
+								&cf->f);
 					list_del(&cf->list);
 					kfree(cf);
 					adapter->num_cloud_filters--;
@@ -1284,32 +1299,36 @@
 			}
 			break;
 		case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
-			struct i40evf_cloud_filter *cf;
+			struct iavf_cloud_filter *cf;
 
 			list_for_each_entry(cf, &adapter->cloud_filter_list,
 					    list) {
-				if (cf->state == __I40EVF_CF_DEL_PENDING) {
-					cf->state = __I40EVF_CF_ACTIVE;
+				if (cf->state == __IAVF_CF_DEL_PENDING) {
+					cf->state = __IAVF_CF_ACTIVE;
 					dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
-						 i40evf_stat_str(&adapter->hw,
-								 v_retval));
-					i40evf_print_cloud_filter(adapter,
-								  &cf->f);
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_cloud_filter(adapter,
+								&cf->f);
 				}
 			}
 			}
 			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
-				v_retval,
-				i40evf_stat_str(&adapter->hw, v_retval),
+				v_retval, iavf_stat_str(&adapter->hw, v_retval),
 				v_opcode);
 		}
 	}
 	switch (v_opcode) {
+	case VIRTCHNL_OP_ADD_ETH_ADDR: {
+		if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		}
+		break;
 	case VIRTCHNL_OP_GET_STATS: {
-		struct i40e_eth_stats *stats =
-			(struct i40e_eth_stats *)msg;
+		struct iavf_eth_stats *stats =
+			(struct iavf_eth_stats *)msg;
 		netdev->stats.rx_packets = stats->rx_unicast +
 					   stats->rx_multicast +
 					   stats->rx_broadcast;
@@ -1326,25 +1345,33 @@
 		break;
 	case VIRTCHNL_OP_GET_VF_RESOURCES: {
 		u16 len = sizeof(struct virtchnl_vf_resource) +
-			  I40E_MAX_VF_VSI *
+			  IAVF_MAX_VF_VSI *
 			  sizeof(struct virtchnl_vsi_resource);
 		memcpy(adapter->vf_res, msg, min(msglen, len));
-		i40e_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
-		/* restore current mac address */
-		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
-		i40evf_process_config(adapter);
+		iavf_validate_num_queues(adapter);
+		iavf_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
+		if (is_zero_ether_addr(adapter->hw.mac.addr)) {
+			/* restore current mac address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+		} else {
+			/* refresh current mac address if changed */
+			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+			ether_addr_copy(netdev->perm_addr,
+					adapter->hw.mac.addr);
+		}
+		iavf_process_config(adapter);
 		}
 		break;
 	case VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
-		i40evf_irq_enable(adapter, true);
-		adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
+		iavf_irq_enable(adapter, true);
+		adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
 		break;
 	case VIRTCHNL_OP_DISABLE_QUEUES:
-		i40evf_free_all_tx_resources(adapter);
-		i40evf_free_all_rx_resources(adapter);
-		if (adapter->state == __I40EVF_DOWN_PENDING) {
-			adapter->state = __I40EVF_DOWN;
+		iavf_free_all_tx_resources(adapter);
+		iavf_free_all_rx_resources(adapter);
+		if (adapter->state == __IAVF_DOWN_PENDING) {
+			adapter->state = __IAVF_DOWN;
 			wake_up(&adapter->down_waitqueue);
 		}
 		break;
@@ -1363,8 +1390,7 @@
 		 * care about that.
 		 */
 		if (msglen && CLIENT_ENABLED(adapter))
-			i40evf_notify_client_message(&adapter->vsi,
-						     msg, msglen);
+			iavf_notify_client_message(&adapter->vsi, msg, msglen);
 		break;
 
 	case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
@@ -1373,6 +1399,7 @@
 		break;
 	case VIRTCHNL_OP_GET_RSS_HENA_CAPS: {
 		struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg;
+
 		if (msglen == sizeof(*vrh))
 			adapter->hena = vrh->hena;
 		else
@@ -1383,32 +1410,33 @@
 	case VIRTCHNL_OP_REQUEST_QUEUES: {
 		struct virtchnl_vf_res_request *vfres =
 			(struct virtchnl_vf_res_request *)msg;
+
 		if (vfres->num_queue_pairs != adapter->num_req_queues) {
 			dev_info(&adapter->pdev->dev,
 				 "Requested %d queues, PF can support %d\n",
 				 adapter->num_req_queues,
 				 vfres->num_queue_pairs);
 			adapter->num_req_queues = 0;
-			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 		}
 		}
 		break;
 	case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
-		struct i40evf_cloud_filter *cf;
+		struct iavf_cloud_filter *cf;
 
 		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
-			if (cf->state == __I40EVF_CF_ADD_PENDING)
-				cf->state = __I40EVF_CF_ACTIVE;
+			if (cf->state == __IAVF_CF_ADD_PENDING)
+				cf->state = __IAVF_CF_ACTIVE;
 		}
 		}
 		break;
 	case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
-		struct i40evf_cloud_filter *cf, *cftmp;
+		struct iavf_cloud_filter *cf, *cftmp;
 
 		list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
 					 list) {
-			if (cf->state == __I40EVF_CF_DEL_PENDING) {
-				cf->state = __I40EVF_CF_INVALID;
+			if (cf->state == __IAVF_CF_DEL_PENDING) {
+				cf->state = __IAVF_CF_INVALID;
 				list_del(&cf->list);
 				kfree(cf);
 				adapter->num_cloud_filters--;

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 4058673..9edde96 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile

@@ -13,5 +13,9 @@
 	 ice_nvm.o	\
 	 ice_switch.o	\
 	 ice_sched.o	\
+	 ice_lib.o	\
 	 ice_txrx.o	\
+	 ice_flex_pipe.o	\
 	 ice_ethtool.o
+ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6759172..45e1006 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h

@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/firmware.h>
 #include <linux/netdevice.h>
 #include <linux/compiler.h>
 #include <linux/etherdevice.h>
@@ -26,31 +27,38 @@
 #include <linux/bitmap.h>
 #include <linux/log2.h>
 #include <linux/ip.h>
+#include <linux/sctp.h>
 #include <linux/ipv6.h>
 #include <linux/if_bridge.h>
+#include <linux/ctype.h>
+#include <linux/avf/virtchnl.h>
 #include <net/ipv6.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
+#include "ice_dcb.h"
 #include "ice_switch.h"
 #include "ice_common.h"
 #include "ice_sched.h"
+#include "ice_virtchnl_pf.h"
+#include "ice_sriov.h"
 
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
-#define ICE_DFLT_NUM_DESC	128
 #define ICE_REQ_DESC_MULTIPLE	32
-#define ICE_MIN_NUM_DESC	ICE_REQ_DESC_MULTIPLE
+#define ICE_MIN_NUM_DESC	64
 #define ICE_MAX_NUM_DESC	8160
+#define ICE_DFLT_MIN_RX_DESC	512
+#define ICE_DFLT_NUM_TX_DESC	256
+#define ICE_DFLT_NUM_RX_DESC	2048
+
 #define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
 #define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
-#define ICE_ETHTOOL_FWVER_LEN	32
 #define ICE_AQ_LEN		64
+#define ICE_MBXSQ_LEN		64
+#define ICE_MBXRQ_LEN		512
 #define ICE_MIN_MSIX		2
 #define ICE_NO_VSI		0xffff
-#define ICE_MAX_VSI_ALLOC	130
-#define ICE_MAX_TXQS		2048
-#define ICE_MAX_RXQS		2048
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
 #define ICE_MAX_SCATTER_TXQS	16
@@ -62,13 +70,16 @@
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
+#define ICE_INVAL_VFID		256
+
+#define ICE_MAX_RESET_WAIT		20
 
 #define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
 #define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
-			 ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN)
+			(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
 
 #define ICE_UP_TABLE_TRANSLATE(val, i) \
 		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -82,37 +93,70 @@
 #define ice_for_each_vsi(pf, i) \
 	for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
 
-/* Macros for each tx/rx ring in a VSI */
+/* Macros for each Tx/Rx ring in a VSI */
 #define ice_for_each_txq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
 
 #define ice_for_each_rxq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
 
-/* Macros for each allocated tx/rx ring whether used or not in a VSI */
+/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
 #define ice_for_each_alloc_txq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
 
 #define ice_for_each_alloc_rxq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
 
+#define ice_for_each_q_vector(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+
+#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
+				ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
+				     ICE_PROMISC_MCAST_TX | \
+				     ICE_PROMISC_UCAST_RX | \
+				     ICE_PROMISC_MCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
+#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
+				     ICE_PROMISC_MCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
 struct ice_tc_info {
 	u16 qoffset;
-	u16 qcount;
+	u16 qcount_tx;
+	u16 qcount_rx;
+	u8 netdev_tc;
 };
 
 struct ice_tc_cfg {
 	u8 numtc; /* Total number of enabled TCs */
-	u8 ena_tc; /* TX map */
+	u8 ena_tc; /* Tx map */
 	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
 };
 
 struct ice_res_tracker {
 	u16 num_entries;
-	u16 search_hint;
+	u16 end;
 	u16 list[1];
 };
 
+struct ice_qs_cfg {
+	struct mutex *qs_mutex;  /* will be assigned to &pf->avail_q_mutex */
+	unsigned long *pf_map;
+	unsigned long pf_map_size;
+	unsigned int q_count;
+	unsigned int scatter_count;
+	u16 *vsi_map;
+	u16 vsi_map_offset;
+	u8 mapping_mode;
+};
+
 struct ice_sw {
 	struct ice_pf *pf;
 	u16 sw_id;		/* switch ID for this switch */
@@ -120,9 +164,11 @@
 };
 
 enum ice_state {
+	__ICE_TESTING,
 	__ICE_DOWN,
 	__ICE_NEEDS_RESTART,
-	__ICE_RESET_RECOVERY_PENDING,	/* set by driver when reset starts */
+	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
+	__ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
 	__ICE_PFR_REQ,			/* set by driver and peers */
 	__ICE_CORER_REQ,		/* set by driver and peers */
 	__ICE_GLOBR_REQ,		/* set by driver and peers */
@@ -131,10 +177,25 @@
 	__ICE_EMPR_RECV,		/* set by OICR handler */
 	__ICE_SUSPENDED,		/* set on module remove path */
 	__ICE_RESET_FAILED,		/* set by reset/rebuild */
+	/* When checking for the PF to be in a nominal operating state, the
+	 * bits that are grouped at the beginning of the list need to be
+	 * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will
+	 * be checked. If you need to add a bit into consideration for nominal
+	 * operating state, it must be added before
+	 * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
+	 * without appropriate consideration.
+	 */
+	__ICE_STATE_NOMINAL_CHECK_BITS,
 	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_MAILBOXQ_EVENT_PENDING,
+	__ICE_MDD_EVENT_PENDING,
+	__ICE_VFLR_EVENT_PENDING,
 	__ICE_FLTR_OVERFLOW_PROMISC,
+	__ICE_VF_DIS,
 	__ICE_CFG_BUSY,
 	__ICE_SERVICE_SCHED,
+	__ICE_SERVICE_DIS,
+	__ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
 	__ICE_STATE_NBITS		/* must be last */
 };
 
@@ -152,8 +213,8 @@
 	struct ice_sw *vsw;		 /* switch this VSI is on */
 	struct ice_pf *back;		 /* back pointer to PF */
 	struct ice_port_info *port_info; /* back pointer to port_info */
-	struct ice_ring **rx_rings;	 /* rx ring array */
-	struct ice_ring **tx_rings;	 /* tx ring array */
+	struct ice_ring **rx_rings;	 /* Rx ring array */
+	struct ice_ring **tx_rings;	 /* Tx ring array */
 	struct ice_q_vector **q_vectors; /* q_vector array */
 
 	irqreturn_t (*irq_handler)(int irq, void *data);
@@ -161,20 +222,20 @@
 	u64 tx_linearize;
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
 	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	unsigned int current_netdev_flags;
 	u32 tx_restart;
 	u32 tx_busy;
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 	int num_q_vectors;
-	int base_vector;
+	int base_vector;		/* IRQ base for OS reserved vectors */
 	enum ice_vsi_type type;
-	u16 vsi_num;			 /* HW (absolute) index of this VSI */
-	u16 idx;			 /* software index in pf->vsi[] */
+	u16 vsi_num;			/* HW (absolute) index of this VSI */
+	u16 idx;			/* software index in pf->vsi[] */
 
-	/* Interrupt thresholds */
-	u16 work_lmt;
+	s16 vf_id;			/* VF ID for SR-IOV VSIs */
+
+	u16 ethtype;			/* Ethernet protocol for pause frame */
 
 	/* RSS config */
 	u16 rss_table_size;	/* HW RSS table size */
@@ -196,54 +257,89 @@
 	struct list_head tmp_sync_list;		/* MAC filters to be synced */
 	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
 
-	u8 irqs_ready;
-	u8 current_isup;		 /* Sync 'link up' logging */
-	u8 stat_offsets_loaded;
+	u8 irqs_ready:1;
+	u8 current_isup:1;		 /* Sync 'link up' logging */
+	u8 stat_offsets_loaded:1;
+	u8 vlan_ena:1;
 
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
-	u16 txq_map[ICE_MAX_TXQS];	 /* index in pf->avail_txqs */
-	u16 rxq_map[ICE_MAX_RXQS];	 /* index in pf->avail_rxqs */
+	u16 *txq_map;			 /* index in pf->avail_txqs */
+	u16 *rxq_map;			 /* index in pf->avail_rxqs */
 	u16 alloc_txq;			 /* Allocated Tx queues */
 	u16 num_txq;			 /* Used Tx queues */
 	u16 alloc_rxq;			 /* Allocated Rx queues */
 	u16 num_rxq;			 /* Used Rx queues */
-	u16 num_desc;
+	u16 num_rx_desc;
+	u16 num_tx_desc;
 	struct ice_tc_cfg tc_cfg;
 } ____cacheline_internodealigned_in_smp;
 
 /* struct that defines an interrupt vector */
 struct ice_q_vector {
 	struct ice_vsi *vsi;
-	cpumask_t affinity_mask;
+
+	u16 v_idx;			/* index in the vsi->q_vector array. */
+	u16 reg_idx;
+	u8 num_ring_rx;			/* total number of Rx rings in vector */
+	u8 num_ring_tx;			/* total number of Tx rings in vector */
+	u8 itr_countdown;		/* when 0 should adjust adaptive ITR */
+	/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
+	 * value to the device
+	 */
+	u8 intrl;
+
 	struct napi_struct napi;
+
 	struct ice_ring_container rx;
 	struct ice_ring_container tx;
+
+	cpumask_t affinity_mask;
 	struct irq_affinity_notify affinity_notify;
-	u16 v_idx;			/* index in the vsi->q_vector array. */
-	u8 num_ring_tx;			/* total number of tx rings in vector */
-	u8 num_ring_rx;			/* total number of rx rings in vector */
+
 	char name[ICE_INT_NAME_STR_LEN];
 } ____cacheline_internodealigned_in_smp;
 
 enum ice_pf_flags {
-	ICE_FLAG_MSIX_ENA,
 	ICE_FLAG_FLTR_SYNC,
 	ICE_FLAG_RSS_ENA,
+	ICE_FLAG_SRIOV_ENA,
+	ICE_FLAG_SRIOV_CAPABLE,
+	ICE_FLAG_DCB_CAPABLE,
+	ICE_FLAG_DCB_ENA,
+	ICE_FLAG_ADV_FEATURES,
+	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+	ICE_FLAG_NO_MEDIA,
+	ICE_FLAG_FW_LLDP_AGENT,
+	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
 
 struct ice_pf {
 	struct pci_dev *pdev;
+
+	/* OS reserved IRQ details */
 	struct msix_entry *msix_entries;
 	struct ice_res_tracker *irq_tracker;
+	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
+	 * MSIX vectors allowed on this PF.
+	 */
+	u16 sriov_base_vector;
+
 	struct ice_vsi **vsi;		/* VSIs created by the driver */
 	struct ice_sw *first_sw;	/* first switch created by firmware */
+	/* Virtchnl/SR-IOV config info */
+	struct ice_vf *vf;
+	int num_alloc_vfs;		/* actual number of VFs allocated */
+	u16 num_vfs_supported;		/* num VFs supported for this PF */
+	u16 num_vf_qps;			/* num queue pairs per VF */
+	u16 num_vf_msix;		/* num vectors per VF */
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
-	DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS);
-	DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS);
 	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
+	unsigned long *avail_rxqs;	/* bitmap to track PF Rx queue usage */
 	unsigned long serv_tmr_period;
 	unsigned long serv_tmr_prev;
 	struct timer_list serv_tmr;
@@ -252,13 +348,13 @@
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
-	u32 oicr_idx;		/* Other interrupt cause vector index */
+	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
+	u32 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
+	u16 max_pf_txqs;	/* Total Tx queues PF wide */
+	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
 	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
-	u32 num_avail_msix;	/* remaining MSIX vectors left unclaimed */
-	u16 num_lan_tx;		/* num lan tx queues setup */
-	u16 num_lan_rx;		/* num lan rx queues setup */
-	u16 q_left_tx;		/* remaining num tx queues left unclaimed */
-	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
+	u16 num_lan_tx;		/* num LAN Tx queues setup */
+	u16 num_lan_rx;		/* num LAN Rx queues setup */
 	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
 	u16 num_alloc_vsi;
 	u16 corer_count;	/* Core reset count */
@@ -269,8 +365,15 @@
 	struct ice_hw_port_stats stats;
 	struct ice_hw_port_stats stats_prev;
 	struct ice_hw hw;
-	u8 stat_prev_loaded;	/* has previous stats been loaded */
+	u8 stat_prev_loaded:1; /* has previous stats been loaded */
+#ifdef CONFIG_DCB
+	u16 dcbx_cap;
+#endif /* CONFIG_DCB */
+	u32 tx_timeout_count;
+	unsigned long tx_timeout_last_recovery;
+	u32 tx_timeout_recovery_level;
 	char int_name[ICE_INT_NAME_STR_LEN];
+	u32 sw_int_count;
 };
 
 struct ice_netdev_priv {
@@ -279,15 +382,16 @@
 
 /**
  * ice_irq_dynamic_ena - Enable default interrupt generation settings
- * @hw: pointer to hw struct
- * @vsi: pointer to vsi struct, can be NULL
+ * @hw: pointer to HW struct
+ * @vsi: pointer to VSI struct, can be NULL
  * @q_vector: pointer to q_vector, can be NULL
  */
-static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
-				       struct ice_q_vector *q_vector)
+static inline void
+ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+		    struct ice_q_vector *q_vector)
 {
-	u32 vector = (vsi && q_vector) ? vsi->base_vector + q_vector->v_idx :
-					((struct ice_pf *)hw->back)->oicr_idx;
+	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
+				((struct ice_pf *)hw->back)->oicr_idx;
 	int itr = ICE_ITR_NONE;
 	u32 val;
 
@@ -302,18 +406,52 @@
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
 
-static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+/**
+ * ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
+ * @netdev: pointer to the netdev struct
+ */
+static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
 {
-	vsi->tc_cfg.ena_tc =  ICE_DFLT_TRAFFIC_CLASS;
-	vsi->tc_cfg.numtc = 1;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->back;
 }
 
+/**
+ * ice_get_main_vsi - Get the PF VSI
+ * @pf: PF instance
+ *
+ * returns pf->vsi[0], which by definition is the PF VSI
+ */
+static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
+{
+	if (pf->vsi)
+		return pf->vsi[0];
+
+	return NULL;
+}
+
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+u16 ice_get_avail_txq_count(struct ice_pf *pf);
+u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+void ice_update_vsi_stats(struct ice_vsi *vsi);
+void ice_update_pf_stats(struct ice_pf *pf);
 int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+#ifdef CONFIG_DCB
+int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
+void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
+#endif /* CONFIG_DCB */
+int ice_open(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
 
 #endif /* _ICE_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index a0614f4..023e3d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h

@@ -5,7 +5,7 @@
 #define _ICE_ADMINQ_CMD_H_
 
 /* This header file defines the Admin Queue commands, error codes and
- * descriptor format.  It is shared between Firmware and Software.
+ * descriptor format. It is shared between Firmware and Software.
  */
 
 #define ICE_MAX_VSI			768
@@ -33,11 +33,22 @@
 	u8 api_patch;
 };
 
+/* Send driver version (indirect 0x0002) */
+struct ice_aqc_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* Queue Shutdown (direct 0x0003) */
 struct ice_aqc_q_shutdown {
+	u8 driver_unloading;
 #define ICE_AQC_DRIVER_UNLOADING	BIT(0)
-	__le32 driver_unloading;
-	u8 reserved[12];
+	u8 reserved[15];
 };
 
 /* Request resource ownership (direct 0x0008)
@@ -62,7 +73,7 @@
 #define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
 #define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
 #define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
-	/* For SDP: pin id of the SDP */
+	/* For SDP: pin ID of the SDP */
 	__le32 res_number;
 	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
 	__le16 status;
@@ -87,7 +98,11 @@
 /* Device/Function buffer entry, repeated per reported capability */
 struct ice_aqc_list_caps_elem {
 	__le16 cap;
+#define ICE_AQC_CAPS_VALID_FUNCTIONS			0x0005
+#define ICE_AQC_CAPS_SRIOV				0x0012
+#define ICE_AQC_CAPS_VF					0x0013
 #define ICE_AQC_CAPS_VSI				0x0017
+#define ICE_AQC_CAPS_DCB				0x0018
 #define ICE_AQC_CAPS_RSS				0x0040
 #define ICE_AQC_CAPS_RXQS				0x0041
 #define ICE_AQC_CAPS_TXQS				0x0042
@@ -117,11 +132,9 @@
 #define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
 #define ICE_AQC_MAN_MAC_READ_S			4
 #define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
-	u8 lport_num;
-	u8 lport_num_valid;
-#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID	BIT(0)
+	u8 rsvd[2];
 	u8 num_addr; /* Used in response */
-	u8 reserved[3];
+	u8 rsvd1[3];
 	__le32 addr_high;
 	__le32 addr_low;
 };
@@ -137,7 +150,7 @@
 
 /* Manage MAC address, write command - direct (0x0108) */
 struct ice_aqc_manage_mac_write {
-	u8 port_num;
+	u8 rsvd;
 	u8 flags;
 #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
 #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
@@ -443,6 +456,8 @@
 	u8 reserved[24];
 };
 
+#define ICE_MAX_NUM_RECIPES 64
+
 /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
  */
 struct ice_aqc_sw_rules {
@@ -458,7 +473,7 @@
 };
 
 /* Add/Update/Get/Remove lookup Rx/Tx command/response entry
- * This structures describes the lookup rules and associated actions.  "index"
+ * This structures describes the lookup rules and associated actions. "index"
  * is returned as part of a response to a successful Add command, and can be
  * used to identify the rule for Update/Get/Remove commands.
  */
@@ -652,8 +667,13 @@
 
 /* Update TSE (indirect 0x0403)
  * Get TSE (indirect 0x0404)
+ * Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
  */
-struct ice_aqc_get_cfg_elem {
+struct ice_aqc_sched_elem_cmd {
 	__le16 num_elem_req;	/* Used by commands */
 	__le16 num_elem_resp;	/* Used by responses */
 	__le32 reserved;
@@ -669,18 +689,6 @@
 	__le32 teid[1];
 };
 
-/* Add TSE (indirect 0x0401)
- * Delete TSE (indirect 0x040F)
- * Move TSE (indirect 0x0408)
- */
-struct ice_aqc_add_move_delete_elem {
-	__le16 num_grps_req;
-	__le16 num_grps_updated;
-	__le32 reserved;
-	__le32 addr_high;
-	__le32 addr_low;
-};
-
 struct ice_aqc_elem_info_bw {
 	__le16 bw_profile_idx;
 	__le16 bw_alloc;
@@ -734,6 +742,10 @@
 	struct ice_aqc_txsched_elem_data generic[1];
 };
 
+struct ice_aqc_get_elem {
+	struct ice_aqc_txsched_elem_data generic[1];
+};
+
 struct ice_aqc_get_topo_elem {
 	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
 	struct ice_aqc_txsched_elem_data
@@ -745,6 +757,32 @@
 	__le32 teid[1];
 };
 
+/* Query Port ETS (indirect 0x040E)
+ *
+ * This indirect command is used to query port TC node configuration.
+ */
+struct ice_aqc_query_port_ets {
+	__le32 port_teid;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_port_ets_elem {
+	u8 tc_valid_bits;
+	u8 reserved[3];
+	/* 3 bits for UP per TC 0-7, 4th byte reserved */
+	__le32 up2tc;
+	u8 tc_bw_share[8];
+	__le32 port_eir_prof_id;
+	__le32 port_cir_prof_id;
+	/* 3 bits per Node priority to TC 0-7, 4th byte reserved */
+	__le32 tc_node_prio;
+#define ICE_TC_NODE_PRIO_S	0x4
+	u8 reserved1[4];
+	__le32 tc_node_teid[8]; /* Used for response, reserved in command */
+};
+
 /* Query Scheduler Resource Allocation (indirect 0x0412)
  * This indirect command retrieves the scheduler resources allocated by
  * EMP Firmware to the given PF.
@@ -771,9 +809,8 @@
 	u8 chunk_size;
 	__le16 max_device_nodes;
 	__le16 max_pf_nodes;
-	u8 rsvd0[2];
-	__le16 max_shared_rate_lmtr;
-	__le16 max_children;
+	u8 rsvd0[4];
+	__le16 max_sibl_grp_sz;
 	__le16 max_cir_rl_profiles;
 	__le16 max_eir_rl_profiles;
 	__le16 max_srl_profiles;
@@ -846,11 +883,46 @@
 #define ICE_PHY_TYPE_LOW_40GBASE_KR4		BIT_ULL(33)
 #define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC	BIT_ULL(34)
 #define ICE_PHY_TYPE_LOW_40G_XLAUI		BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_50GBASE_CR2		BIT_ULL(36)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR2		BIT_ULL(37)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR2		BIT_ULL(38)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR2		BIT_ULL(39)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC	BIT_ULL(40)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2		BIT_ULL(41)
+#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC	BIT_ULL(42)
+#define ICE_PHY_TYPE_LOW_50G_AUI2		BIT_ULL(43)
+#define ICE_PHY_TYPE_LOW_50GBASE_CP		BIT_ULL(44)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR		BIT_ULL(45)
+#define ICE_PHY_TYPE_LOW_50GBASE_FR		BIT_ULL(46)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR		BIT_ULL(47)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4	BIT_ULL(48)
+#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC	BIT_ULL(49)
+#define ICE_PHY_TYPE_LOW_50G_AUI1		BIT_ULL(50)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR4		BIT_ULL(51)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR4		BIT_ULL(52)
+#define ICE_PHY_TYPE_LOW_100GBASE_LR4		BIT_ULL(53)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR4		BIT_ULL(54)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC	BIT_ULL(55)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4		BIT_ULL(56)
+#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC	BIT_ULL(57)
+#define ICE_PHY_TYPE_LOW_100G_AUI4		BIT_ULL(58)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4	BIT_ULL(59)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4	BIT_ULL(60)
+#define ICE_PHY_TYPE_LOW_100GBASE_CP2		BIT_ULL(61)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR2		BIT_ULL(62)
+#define ICE_PHY_TYPE_LOW_100GBASE_DR		BIT_ULL(63)
 #define ICE_PHY_TYPE_LOW_MAX_INDEX		63
+/* The second set of defines is for phy_type_high. */
+#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4	BIT_ULL(0)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC	BIT_ULL(1)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2		BIT_ULL(2)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC	BIT_ULL(3)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2		BIT_ULL(4)
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX		19
 
 struct ice_aqc_get_phy_caps_data {
 	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
-	__le64 reserved;
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
 	u8 caps;
 #define ICE_AQC_PHY_EN_TX_LINK_PAUSE			BIT(0)
 #define ICE_AQC_PHY_EN_RX_LINK_PAUSE			BIT(1)
@@ -858,6 +930,8 @@
 #define ICE_AQC_PHY_EN_LINK				BIT(3)
 #define ICE_AQC_PHY_AN_MODE				BIT(4)
 #define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC				BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK				ICE_M(0xff, 0)
 	u8 low_power_ctrl;
 #define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
 	__le16 eee_cap;
@@ -870,6 +944,7 @@
 #define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
 	__le16 eeer_value;
 	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 phy_fw_ver[8];
 	u8 link_fec_options;
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
@@ -878,6 +953,8 @@
 #define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
 #define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
 #define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+#define ICE_AQC_PHY_FEC_MASK				ICE_M(0xdf, 0)
+	u8 rsvd1;	/* Byte 35 reserved */
 	u8 extended_compliance_code;
 #define ICE_MODULE_TYPE_TOTAL_BYTE			3
 	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
@@ -892,13 +969,14 @@
 #define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
 #define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
 	u8 qualified_module_count;
+	u8 rsvd2[7];	/* Bytes 47:41 reserved */
 #define ICE_AQC_QUAL_MOD_COUNT_MAX			16
 	struct {
 		u8 v_oui[3];
-		u8 rsvd1;
+		u8 rsvd3;
 		u8 v_part[16];
 		__le32 v_rev;
-		__le64 rsvd8;
+		__le64 rsvd4;
 	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
 };
 
@@ -915,13 +993,16 @@
 /* Set PHY config command data structure */
 struct ice_aqc_set_phy_cfg_data {
 	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
-	__le64 rsvd0;
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
 	u8 caps;
-#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY		BIT(0)
-#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY		BIT(1)
-#define ICE_AQ_PHY_ENA_LOW_POWER		BIT(2)
-#define ICE_AQ_PHY_ENA_LINK			BIT(3)
-#define ICE_AQ_PHY_ENA_ATOMIC_LINK		BIT(5)
+#define ICE_AQ_PHY_ENA_VALID_MASK	ICE_M(0xef, 0)
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY	BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY	BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER	BIT(2)
+#define ICE_AQ_PHY_ENA_LINK		BIT(3)
+#define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT	BIT(5)
+#define ICE_AQ_PHY_ENA_LESM		BIT(6)
+#define ICE_AQ_PHY_ENA_AUTO_FEC		BIT(7)
 	u8 low_power_ctrl;
 	__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
 	__le16 eeer_value;
@@ -985,7 +1066,7 @@
 	u8 ext_info;
 #define ICE_AQ_LINK_PHY_TEMP_ALARM	BIT(0)
 #define ICE_AQ_LINK_EXCESSIVE_ERRORS	BIT(1)	/* Excessive Link Errors */
-	/* Port TX Suspended */
+	/* Port Tx Suspended */
 #define ICE_AQ_LINK_TX_S		2
 #define ICE_AQ_LINK_TX_M		(0x03 << ICE_AQ_LINK_TX_S)
 #define ICE_AQ_LINK_TX_ACTIVE		0
@@ -997,6 +1078,7 @@
 #define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
 #define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
 #define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+#define ICE_AQ_FEC_MASK			ICE_M(0x7, 0)
 	/* Pacing Config */
 #define ICE_AQ_CFG_PACING_S		3
 #define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
@@ -1022,10 +1104,12 @@
 #define ICE_AQ_LINK_SPEED_20GB		BIT(6)
 #define ICE_AQ_LINK_SPEED_25GB		BIT(7)
 #define ICE_AQ_LINK_SPEED_40GB		BIT(8)
+#define ICE_AQ_LINK_SPEED_50GB		BIT(9)
+#define ICE_AQ_LINK_SPEED_100GB		BIT(10)
 #define ICE_AQ_LINK_SPEED_UNKNOWN	BIT(15)
 	__le32 reserved3; /* Aligns next field to 8-byte boundary */
 	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
-	__le64 reserved4;
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
 };
 
 /* Set event mask command (direct 0x0613) */
@@ -1045,6 +1129,24 @@
 	u8	reserved1[6];
 };
 
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+	u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN		BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK		BIT(1)
+	u8 reserved[15];
+};
+
+/* Set Port Identification LED (direct, 0x06E9) */
+struct ice_aqc_set_port_id_led {
+	u8 lport_num;
+	u8 lport_num_valid;
+	u8 ident_mode;
+#define ICE_AQC_PORT_IDENT_LED_BLINK	BIT(0)
+#define ICE_AQC_PORT_IDENT_LED_ORIG	0
+	u8 rsvd[13];
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -1056,10 +1158,10 @@
 #define ICE_AQC_NVM_LAST_CMD		BIT(0)
 #define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
 #define ICE_AQC_NVM_PRESERVATION_S	1
-#define ICE_AQC_NVM_PRESERVATION_M	(3 << CSR_AQ_NVM_PRESERVATION_S)
-#define ICE_AQC_NVM_NO_PRESERVATION	(0 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVATION_M	(3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION	(0 << ICE_AQC_NVM_PRESERVATION_S)
 #define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
-#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << ICE_AQC_NVM_PRESERVATION_S)
 #define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
 	__le16 module_typeid;
 	__le16 length;
@@ -1068,6 +1170,150 @@
 	__le32 addr_low;
 };
 
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+	u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY	BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC	BIT(1)
+	u8 rsvd;
+	__le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT	0xBABA
+	u8 rsvd2[12];
+};
+
+/**
+ * Send to PF command (indirect 0x0801) ID is only used by PF
+ *
+ * Send to VF command (indirect 0x0802) ID is only used by PF
+ *
+ */
+struct ice_aqc_pf_vf_msg {
+	__le32 id;
+	u32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get LLDP MIB (indirect 0x0A00)
+ * Note: This is also used by the LLDP MIB Change Event (0x0A01)
+ * as the format is the same.
+ */
+struct ice_aqc_lldp_get_mib {
+	u8 type;
+#define ICE_AQ_LLDP_MIB_TYPE_S			0
+#define ICE_AQ_LLDP_MIB_TYPE_M			(0x3 << ICE_AQ_LLDP_MIB_TYPE_S)
+#define ICE_AQ_LLDP_MIB_LOCAL			0
+#define ICE_AQ_LLDP_MIB_REMOTE			1
+#define ICE_AQ_LLDP_MIB_LOCAL_AND_REMOTE	2
+#define ICE_AQ_LLDP_BRID_TYPE_S			2
+#define ICE_AQ_LLDP_BRID_TYPE_M			(0x3 << ICE_AQ_LLDP_BRID_TYPE_S)
+#define ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID	0
+#define ICE_AQ_LLDP_BRID_TYPE_NON_TPMR		1
+/* Tx pause flags in the 0xA01 event use ICE_AQ_LLDP_TX_* */
+#define ICE_AQ_LLDP_TX_S			0x4
+#define ICE_AQ_LLDP_TX_M			(0x03 << ICE_AQ_LLDP_TX_S)
+#define ICE_AQ_LLDP_TX_ACTIVE			0
+#define ICE_AQ_LLDP_TX_SUSPENDED		1
+#define ICE_AQ_LLDP_TX_FLUSHED			3
+/* The following bytes are reserved for the Get LLDP MIB command (0x0A00)
+ * and in the LLDP MIB Change Event (0x0A01). They are valid for the
+ * Get LLDP MIB (0x0A00) response only.
+ */
+	u8 reserved1;
+	__le16 local_len;
+	__le16 remote_len;
+	u8 reserved2[2];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Configure LLDP MIB Change Event (direct 0x0A01) */
+/* For MIB Change Event use ice_aqc_lldp_get_mib structure above */
+struct ice_aqc_lldp_set_mib_change {
+	u8 command;
+#define ICE_AQ_LLDP_MIB_UPDATE_ENABLE		0x0
+#define ICE_AQ_LLDP_MIB_UPDATE_DIS		0x1
+	u8 reserved[15];
+};
+
+/* Stop LLDP (direct 0x0A05) */
+struct ice_aqc_lldp_stop {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_STATE_MASK	BIT(0)
+#define ICE_AQ_LLDP_AGENT_STOP		0x0
+#define ICE_AQ_LLDP_AGENT_SHUTDOWN	ICE_AQ_LLDP_AGENT_STATE_MASK
+#define ICE_AQ_LLDP_AGENT_PERSIST_DIS	BIT(1)
+	u8 reserved[15];
+};
+
+/* Start LLDP (direct 0x0A06) */
+struct ice_aqc_lldp_start {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_START		BIT(0)
+#define ICE_AQ_LLDP_AGENT_PERSIST_ENA	BIT(1)
+	u8 reserved[15];
+};
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * The command uses the generic descriptor struct and
+ * returns the struct below as an indirect response.
+ */
+struct ice_aqc_get_cee_dcb_cfg_resp {
+	u8 oper_num_tc;
+	u8 oper_prio_tc[4];
+	u8 oper_tc_bw[8];
+	u8 oper_pfc_en;
+	__le16 oper_app_prio;
+#define ICE_AQC_CEE_APP_FCOE_S		0
+#define ICE_AQC_CEE_APP_FCOE_M		(0x7 << ICE_AQC_CEE_APP_FCOE_S)
+#define ICE_AQC_CEE_APP_ISCSI_S		3
+#define ICE_AQC_CEE_APP_ISCSI_M		(0x7 << ICE_AQC_CEE_APP_ISCSI_S)
+#define ICE_AQC_CEE_APP_FIP_S		8
+#define ICE_AQC_CEE_APP_FIP_M		(0x7 << ICE_AQC_CEE_APP_FIP_S)
+	__le32 tlv_status;
+#define ICE_AQC_CEE_PG_STATUS_S		0
+#define ICE_AQC_CEE_PG_STATUS_M		(0x7 << ICE_AQC_CEE_PG_STATUS_S)
+#define ICE_AQC_CEE_PFC_STATUS_S	3
+#define ICE_AQC_CEE_PFC_STATUS_M	(0x7 << ICE_AQC_CEE_PFC_STATUS_S)
+#define ICE_AQC_CEE_FCOE_STATUS_S	8
+#define ICE_AQC_CEE_FCOE_STATUS_M	(0x7 << ICE_AQC_CEE_FCOE_STATUS_S)
+#define ICE_AQC_CEE_ISCSI_STATUS_S	11
+#define ICE_AQC_CEE_ISCSI_STATUS_M	(0x7 << ICE_AQC_CEE_ISCSI_STATUS_S)
+#define ICE_AQC_CEE_FIP_STATUS_S	16
+#define ICE_AQC_CEE_FIP_STATUS_M	(0x7 << ICE_AQC_CEE_FIP_STATUS_S)
+	u8 reserved[12];
+};
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
+ */
+struct ice_aqc_lldp_set_local_mib {
+	u8 type;
+#define SET_LOCAL_MIB_TYPE_DCBX_M		BIT(0)
+#define SET_LOCAL_MIB_TYPE_LOCAL_MIB		0
+#define SET_LOCAL_MIB_TYPE_CEE_M		BIT(1)
+#define SET_LOCAL_MIB_TYPE_CEE_WILLING		0
+#define SET_LOCAL_MIB_TYPE_CEE_NON_WILLING	SET_LOCAL_MIB_TYPE_CEE_M
+	u8 reserved0;
+	__le16 length;
+	u8 reserved1[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
+ * The same structure is used for the response, with the command field
+ * being used as the status field.
+ */
+struct ice_aqc_lldp_stop_start_specific_agent {
+	u8 command;
+#define ICE_AQC_START_STOP_AGENT_M		BIT(0)
+#define ICE_AQC_START_STOP_AGENT_STOP_DCBX	0
+#define ICE_AQC_START_STOP_AGENT_START_DCBX	ICE_AQC_START_STOP_AGENT_M
+	u8 reserved[15];
+};
+
 /* Get/Set RSS key (indirect 0x0B04/0x0B02) */
 struct ice_aqc_get_set_rss_key {
 #define ICE_AQC_GSET_RSS_KEY_VSI_VALID	BIT(15)
@@ -1081,6 +1327,9 @@
 
 #define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE	0x28
 #define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE	0xC
+#define ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE \
+				(ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE + \
+				 ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE)
 
 struct ice_aqc_get_set_rss_keys {
 	u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
@@ -1088,7 +1337,7 @@
 };
 
 /* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
-struct  ice_aqc_get_set_rss_lut {
+struct ice_aqc_get_set_rss_lut {
 #define ICE_AQC_GSET_RSS_LUT_VSI_VALID	BIT(15)
 #define ICE_AQC_GSET_RSS_LUT_VSI_ID_S	0
 #define ICE_AQC_GSET_RSS_LUT_VSI_ID_M	(0x1FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S)
@@ -1122,7 +1371,7 @@
 	__le32 addr_low;
 };
 
-/* Add TX LAN Queues (indirect 0x0C30) */
+/* Add Tx LAN Queues (indirect 0x0C30) */
 struct ice_aqc_add_txqs {
 	u8 num_qgrps;
 	u8 reserved[3];
@@ -1131,7 +1380,7 @@
 	__le32 addr_low;
 };
 
-/* This is the descriptor of each queue entry for the Add TX LAN Queues
+/* This is the descriptor of each queue entry for the Add Tx LAN Queues
  * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
  */
 struct ice_aqc_add_txqs_perq {
@@ -1143,7 +1392,7 @@
 	struct ice_aqc_txsched_elem info;
 };
 
-/* The format of the command buffer for Add TX LAN Queues (0x0C30)
+/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
  * is an array of the following structs. Please note that the length of
  * each struct ice_aqc_add_tx_qgrp is variable due
  * to the variable number of queues in each group!
@@ -1155,7 +1404,7 @@
 	struct ice_aqc_add_txqs_perq txqs[1];
 };
 
-/* Disable TX LAN Queues (indirect 0x0C31) */
+/* Disable Tx LAN Queues (indirect 0x0C31) */
 struct ice_aqc_dis_txqs {
 	u8 cmd_type;
 #define ICE_AQC_Q_DIS_CMD_S		0
@@ -1177,7 +1426,7 @@
 	__le32 addr_low;
 };
 
-/* The buffer for Disable TX LAN Queues (indirect 0x0C31)
+/* The buffer for Disable Tx LAN Queues (indirect 0x0C31)
  * contains the following structures, arrayed one after the
  * other.
  * Note: Since the q_id is 16 bits wide, if the
@@ -1203,6 +1452,134 @@
 	struct ice_aqc_dis_txq_item qgrps[1];
 };
 
+/* Configure Firmware Logging Command (indirect 0xFF09)
+ * Logging Information Read Response (indirect 0xFF10)
+ * Note: The 0xFF10 command has no input parameters.
+ */
+struct ice_aqc_fw_logging {
+	u8 log_ctrl;
+#define ICE_AQC_FW_LOG_AQ_EN		BIT(0)
+#define ICE_AQC_FW_LOG_UART_EN		BIT(1)
+	u8 rsvd0;
+	u8 log_ctrl_valid; /* Not used by 0xFF10 Response */
+#define ICE_AQC_FW_LOG_AQ_VALID		BIT(0)
+#define ICE_AQC_FW_LOG_UART_VALID	BIT(1)
+	u8 rsvd1[5];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+enum ice_aqc_fw_logging_mod {
+	ICE_AQC_FW_LOG_ID_GENERAL = 0,
+	ICE_AQC_FW_LOG_ID_CTRL,
+	ICE_AQC_FW_LOG_ID_LINK,
+	ICE_AQC_FW_LOG_ID_LINK_TOPO,
+	ICE_AQC_FW_LOG_ID_DNL,
+	ICE_AQC_FW_LOG_ID_I2C,
+	ICE_AQC_FW_LOG_ID_SDP,
+	ICE_AQC_FW_LOG_ID_MDIO,
+	ICE_AQC_FW_LOG_ID_ADMINQ,
+	ICE_AQC_FW_LOG_ID_HDMA,
+	ICE_AQC_FW_LOG_ID_LLDP,
+	ICE_AQC_FW_LOG_ID_DCBX,
+	ICE_AQC_FW_LOG_ID_DCB,
+	ICE_AQC_FW_LOG_ID_NETPROXY,
+	ICE_AQC_FW_LOG_ID_NVM,
+	ICE_AQC_FW_LOG_ID_AUTH,
+	ICE_AQC_FW_LOG_ID_VPD,
+	ICE_AQC_FW_LOG_ID_IOSF,
+	ICE_AQC_FW_LOG_ID_PARSER,
+	ICE_AQC_FW_LOG_ID_SW,
+	ICE_AQC_FW_LOG_ID_SCHEDULER,
+	ICE_AQC_FW_LOG_ID_TXQ,
+	ICE_AQC_FW_LOG_ID_RSVD,
+	ICE_AQC_FW_LOG_ID_POST,
+	ICE_AQC_FW_LOG_ID_WATCHDOG,
+	ICE_AQC_FW_LOG_ID_TASK_DISPATCH,
+	ICE_AQC_FW_LOG_ID_MNG,
+	ICE_AQC_FW_LOG_ID_MAX,
+};
+
+/* This is the buffer for both of the logging commands.
+ * The entry array size depends on the datalen parameter in the descriptor.
+ * There will be a total of datalen / 2 entries.
+ */
+struct ice_aqc_fw_logging_data {
+	__le16 entry[1];
+#define ICE_AQC_FW_LOG_ID_S		0
+#define ICE_AQC_FW_LOG_ID_M		(0xFFF << ICE_AQC_FW_LOG_ID_S)
+
+#define ICE_AQC_FW_LOG_CONF_SUCCESS	0	/* Used by response */
+#define ICE_AQC_FW_LOG_CONF_BAD_INDX	BIT(12)	/* Used by response */
+
+#define ICE_AQC_FW_LOG_EN_S		12
+#define ICE_AQC_FW_LOG_EN_M		(0xF << ICE_AQC_FW_LOG_EN_S)
+#define ICE_AQC_FW_LOG_INFO_EN		BIT(12)	/* Used by command */
+#define ICE_AQC_FW_LOG_INIT_EN		BIT(13)	/* Used by command */
+#define ICE_AQC_FW_LOG_FLOW_EN		BIT(14)	/* Used by command */
+#define ICE_AQC_FW_LOG_ERR_EN		BIT(15)	/* Used by command */
+};
+
+/* Get/Clear FW Log (indirect 0xFF11) */
+struct ice_aqc_get_clear_fw_log {
+	u8 flags;
+#define ICE_AQC_FW_LOG_CLEAR		BIT(0)
+#define ICE_AQC_FW_LOG_MORE_DATA_AVAIL	BIT(1)
+	u8 rsvd1[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Download Package (indirect 0x0C40) */
+/* Also used for Update Package (indirect 0x0C42) */
+struct ice_aqc_download_pkg {
+	u8 flags;
+#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF	0x01
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_download_pkg_resp {
+	__le32 error_offset;
+	__le32 error_info;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get Package Info List (indirect 0x0C43) */
+struct ice_aqc_get_pkg_info_list {
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Version format for packages */
+struct ice_pkg_ver {
+	u8 major;
+	u8 minor;
+	u8 update;
+	u8 draft;
+};
+
+#define ICE_PKG_NAME_SIZE	32
+
+struct ice_aqc_get_pkg_info {
+	struct ice_pkg_ver ver;
+	char name[ICE_PKG_NAME_SIZE];
+	u8 is_in_nvm;
+	u8 is_active;
+	u8 is_active_at_boot;
+	u8 is_modified;
+};
+
+/* Get Package Info List response buffer format (0x0C43) */
+struct ice_aqc_get_pkg_info_resp {
+	__le32 count;
+	struct ice_aqc_get_pkg_info pkg_info[1];
+};
 /**
  * struct ice_aq_desc - Admin Queue (AQ) descriptor
  * @flags: ICE_AQ_FLAG_* flags
@@ -1214,10 +1591,10 @@
  * @params: command-specific parameters
  *
  * Descriptor format for commands the driver posts on the Admin Transmit Queue
- * (ATQ).  The firmware writes back onto the command descriptor and returns
- * the result of the command.  Asynchronous events that are not an immediate
+ * (ATQ). The firmware writes back onto the command descriptor and returns
+ * the result of the command. Asynchronous events that are not an immediate
  * result of the command are written to the Admin Receive Queue (ARQ) using
- * the same descriptor format.  Descriptors are in little-endian notation with
+ * the same descriptor format. Descriptors are in little-endian notation with
  * 32-bit words.
  */
 struct ice_aq_desc {
@@ -1231,6 +1608,7 @@
 		u8 raw[16];
 		struct ice_aqc_generic generic;
 		struct ice_aqc_get_ver get_ver;
+		struct ice_aqc_driver_ver driver_ver;
 		struct ice_aqc_q_shutdown q_shutdown;
 		struct ice_aqc_req_res res_owner;
 		struct ice_aqc_manage_mac_read mac_read;
@@ -1240,18 +1618,32 @@
 		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_set_phy_cfg set_phy;
 		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_set_port_id_led set_port_id_led;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
 		struct ice_aqc_sw_rules sw_rules;
 		struct ice_aqc_get_topo get_topo;
-		struct ice_aqc_get_cfg_elem get_update_elem;
+		struct ice_aqc_sched_elem_cmd sched_elem_cmd;
 		struct ice_aqc_query_txsched_res query_sched_res;
-		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
+		struct ice_aqc_query_port_ets port_ets;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_nvm_checksum nvm_checksum;
+		struct ice_aqc_pf_vf_msg virt;
+		struct ice_aqc_lldp_get_mib lldp_get_mib;
+		struct ice_aqc_lldp_set_mib_change lldp_set_event;
+		struct ice_aqc_lldp_stop lldp_stop;
+		struct ice_aqc_lldp_start lldp_start;
+		struct ice_aqc_lldp_set_local_mib lldp_set_mib;
+		struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
 		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
 		struct ice_aqc_get_set_rss_key get_set_rss_key;
 		struct ice_aqc_add_txqs add_txqs;
 		struct ice_aqc_dis_txqs dis_txqs;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
+		struct ice_aqc_fw_logging fw_logging;
+		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+		struct ice_aqc_download_pkg download_pkg;
+		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_set_event_mask set_event_mask;
 		struct ice_aqc_get_link_status get_link_status;
@@ -1275,17 +1667,26 @@
 
 /* error codes */
 enum ice_aq_err {
-	ICE_AQ_RC_OK		= 0,  /* success */
+	ICE_AQ_RC_OK		= 0,  /* Success */
+	ICE_AQ_RC_EPERM		= 1,  /* Operation not permitted */
+	ICE_AQ_RC_ENOENT	= 2,  /* No such element */
 	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
 	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
-	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
+	ICE_AQ_RC_EEXIST	= 13, /* Object already exists */
 	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
+	ICE_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	ICE_AQ_RC_ENOSEC	= 24, /* Missing security manifest */
+	ICE_AQ_RC_EBADSIG	= 25, /* Bad RSA signature */
+	ICE_AQ_RC_ESVN		= 26, /* SVN number prohibits this package */
+	ICE_AQ_RC_EBADMAN	= 27, /* Manifest hash mismatch */
+	ICE_AQ_RC_EBADBUF	= 28, /* Buffer hash mismatches manifest */
 };
 
 /* Admin Queue command opcodes */
 enum ice_adminq_opc {
 	/* AQ commands */
 	ice_aqc_opc_get_ver				= 0x0001,
+	ice_aqc_opc_driver_ver				= 0x0002,
 	ice_aqc_opc_q_shutdown				= 0x0003,
 
 	/* resource ownership */
@@ -1325,8 +1726,10 @@
 	/* transmit scheduler commands */
 	ice_aqc_opc_get_dflt_topo			= 0x0400,
 	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_get_sched_elems			= 0x0404,
 	ice_aqc_opc_suspend_sched_elems			= 0x0409,
 	ice_aqc_opc_resume_sched_elems			= 0x040A,
+	ice_aqc_opc_query_port_ets			= 0x040E,
 	ice_aqc_opc_delete_sched_elems			= 0x040F,
 	ice_aqc_opc_query_sched_res			= 0x0412,
 
@@ -1336,9 +1739,24 @@
 	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
 	ice_aqc_opc_set_event_mask			= 0x0613,
+	ice_aqc_opc_set_mac_lb				= 0x0620,
+	ice_aqc_opc_set_port_id_led			= 0x06E9,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
+	ice_aqc_opc_nvm_checksum			= 0x0706,
+
+	/* PF/VF mailbox commands */
+	ice_mbx_opc_send_msg_to_pf			= 0x0801,
+	ice_mbx_opc_send_msg_to_vf			= 0x0802,
+	/* LLDP commands */
+	ice_aqc_opc_lldp_get_mib			= 0x0A00,
+	ice_aqc_opc_lldp_set_mib_change			= 0x0A01,
+	ice_aqc_opc_lldp_stop				= 0x0A05,
+	ice_aqc_opc_lldp_start				= 0x0A06,
+	ice_aqc_opc_get_cee_dcb_cfg			= 0x0A07,
+	ice_aqc_opc_lldp_set_local_mib			= 0x0A08,
+	ice_aqc_opc_lldp_stop_start_specific_agent	= 0x0A09,
 
 	/* RSS commands */
 	ice_aqc_opc_set_rss_key				= 0x0B02,
@@ -1346,9 +1764,17 @@
 	ice_aqc_opc_get_rss_key				= 0x0B04,
 	ice_aqc_opc_get_rss_lut				= 0x0B05,
 
-	/* TX queue handling commands/events */
+	/* Tx queue handling commands/events */
 	ice_aqc_opc_add_txqs				= 0x0C30,
 	ice_aqc_opc_dis_txqs				= 0x0C31,
+
+	/* package commands */
+	ice_aqc_opc_download_pkg			= 0x0C40,
+	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
+
+	/* debug commands */
+	ice_aqc_opc_fw_logging				= 0xFF09,
+	ice_aqc_opc_fw_logging_info			= 0xFF10,
 };
 
 #endif /* _ICE_ADMINQ_CMD_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 661beea..3a6b395 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c

@@ -7,16 +7,16 @@
 
 #define ICE_PF_RESET_WAIT_COUNT	200
 
-#define ICE_NIC_FLX_ENTRY(hw, mdid, idx) \
-	wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(ICE_RXDID_FLEX_NIC), \
+#define ICE_PROG_FLEX_ENTRY(hw, rxdid, mdid, idx) \
+	wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(rxdid), \
 	     ((ICE_RX_OPC_MDID << \
 	       GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \
 	      GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \
 	     (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \
 	      GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M))
 
-#define ICE_NIC_FLX_FLG_ENTRY(hw, flg_0, flg_1, flg_2, flg_3, idx) \
-	wr32((hw), GLFLXP_RXDID_FLAGS(ICE_RXDID_FLEX_NIC, idx), \
+#define ICE_PROG_FLG_ENTRY(hw, rxdid, flg_0, flg_1, flg_2, flg_3, idx) \
+	wr32((hw), GLFLXP_RXDID_FLAGS(rxdid, idx), \
 	     (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \
 	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \
 	     (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \
@@ -31,7 +31,7 @@
  * @hw: pointer to the HW structure
  *
  * This function sets the MAC type of the adapter based on the
- * vendor ID and device ID stored in the hw structure.
+ * vendor ID and device ID stored in the HW structure.
  */
 static enum ice_status ice_set_mac_type(struct ice_hw *hw)
 {
@@ -43,6 +43,20 @@
 }
 
 /**
+ * ice_dev_onetime_setup - Temporary HW/FW workarounds
+ * @hw: pointer to the HW structure
+ *
+ * This function provides temporary workarounds for certain issues
+ * that are expected to be fixed in the HW/FW.
+ */
+void ice_dev_onetime_setup(struct ice_hw *hw)
+{
+#define MBX_PF_VT_PFALLOC	0x00231E80
+	/* set VFs per PF */
+	wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
+}
+
+/**
  * ice_clear_pf_cfg - Clear PF configuration
  * @hw: pointer to the hardware structure
  *
@@ -60,7 +74,7 @@
 
 /**
  * ice_aq_manage_mac_read - manage MAC address read command
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @buf: a virtual buffer to hold the manage MAC read response
  * @buf_size: Size of the virtual buffer
  * @cd: pointer to command details structure or NULL
@@ -125,7 +139,7 @@
  *
  * Returns the various PHY capabilities supported on the Port (0x0600)
  */
-static enum ice_status
+enum ice_status
 ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
 		    struct ice_aqc_get_phy_caps_data *pcaps,
 		    struct ice_sq_cd *cd)
@@ -148,8 +162,10 @@
 	cmd->param0 |= cpu_to_le16(report_mode);
 	status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd);
 
-	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP)
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) {
 		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+		pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
+	}
 
 	return status;
 }
@@ -166,6 +182,9 @@
 		return ICE_MEDIA_UNKNOWN;
 
 	hw_link_info = &pi->phy.link_info;
+	if (hw_link_info->phy_type_low && hw_link_info->phy_type_high)
+		/* If more than one media type is selected, report unknown */
+		return ICE_MEDIA_UNKNOWN;
 
 	if (hw_link_info->phy_type_low) {
 		switch (hw_link_info->phy_type_low) {
@@ -179,6 +198,15 @@
 		case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
 		case ICE_PHY_TYPE_LOW_40GBASE_SR4:
 		case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR:
+		case ICE_PHY_TYPE_LOW_50GBASE_FR:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_DR:
 			return ICE_MEDIA_FIBER;
 		case ICE_PHY_TYPE_LOW_100BASE_TX:
 		case ICE_PHY_TYPE_LOW_1000BASE_T:
@@ -192,6 +220,11 @@
 		case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
 		case ICE_PHY_TYPE_LOW_25GBASE_CR1:
 		case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_CP:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CP2:
 			return ICE_MEDIA_DA;
 		case ICE_PHY_TYPE_LOW_1000BASE_KX:
 		case ICE_PHY_TYPE_LOW_2500BASE_KX:
@@ -202,10 +235,18 @@
 		case ICE_PHY_TYPE_LOW_25GBASE_KR1:
 		case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
 		case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+			return ICE_MEDIA_BACKPLANE;
+		}
+	} else {
+		switch (hw_link_info->phy_type_high) {
+		case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
 			return ICE_MEDIA_BACKPLANE;
 		}
 	}
-
 	return ICE_MEDIA_UNKNOWN;
 }
 
@@ -222,21 +263,23 @@
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd)
 {
-	struct ice_link_status *hw_link_info_old, *hw_link_info;
 	struct ice_aqc_get_link_status_data link_data = { 0 };
 	struct ice_aqc_get_link_status *resp;
+	struct ice_link_status *li_old, *li;
 	enum ice_media_type *hw_media_type;
 	struct ice_fc_info *hw_fc_info;
 	bool tx_pause, rx_pause;
 	struct ice_aq_desc desc;
 	enum ice_status status;
+	struct ice_hw *hw;
 	u16 cmd_flags;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
-	hw_link_info_old = &pi->phy.link_info_old;
+	hw = pi->hw;
+	li_old = &pi->phy.link_info_old;
 	hw_media_type = &pi->phy.media_type;
-	hw_link_info = &pi->phy.link_info;
+	li = &pi->phy.link_info;
 	hw_fc_info = &pi->fc;
 
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
@@ -245,24 +288,27 @@
 	resp->cmd_flags = cpu_to_le16(cmd_flags);
 	resp->lport_num = pi->lport;
 
-	status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data),
-				 cd);
+	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd);
 
 	if (status)
 		return status;
 
 	/* save off old link status information */
-	*hw_link_info_old = *hw_link_info;
+	*li_old = *li;
 
 	/* update current link status information */
-	hw_link_info->link_speed = le16_to_cpu(link_data.link_speed);
-	hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	li->link_speed = le16_to_cpu(link_data.link_speed);
+	li->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
 	*hw_media_type = ice_get_media_type(pi);
-	hw_link_info->link_info = link_data.link_info;
-	hw_link_info->an_info = link_data.an_info;
-	hw_link_info->ext_info = link_data.ext_info;
-	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
-	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
+	li->link_info = link_data.link_info;
+	li->an_info = link_data.an_info;
+	li->ext_info = link_data.ext_info;
+	li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	li->topo_media_conflict = link_data.topo_media_conflict;
+	li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M |
+				      ICE_AQ_CFG_PACING_TYPE_M);
 
 	/* update fc info */
 	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
@@ -276,49 +322,116 @@
 	else
 		hw_fc_info->current_mode = ICE_FC_NONE;
 
-	hw_link_info->lse_ena =
-		!!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+	li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	ice_debug(hw, ICE_DBG_LINK, "link_speed = 0x%x\n", li->link_speed);
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+		  (unsigned long long)li->phy_type_low);
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+		  (unsigned long long)li->phy_type_high);
+	ice_debug(hw, ICE_DBG_LINK, "media_type = 0x%x\n", *hw_media_type);
+	ice_debug(hw, ICE_DBG_LINK, "link_info = 0x%x\n", li->link_info);
+	ice_debug(hw, ICE_DBG_LINK, "an_info = 0x%x\n", li->an_info);
+	ice_debug(hw, ICE_DBG_LINK, "ext_info = 0x%x\n", li->ext_info);
+	ice_debug(hw, ICE_DBG_LINK, "lse_ena = 0x%x\n", li->lse_ena);
+	ice_debug(hw, ICE_DBG_LINK, "max_frame = 0x%x\n", li->max_frame_size);
+	ice_debug(hw, ICE_DBG_LINK, "pacing = 0x%x\n", li->pacing);
 
 	/* save link status information */
 	if (link)
-		*link = *hw_link_info;
+		*link = *li;
 
 	/* flag cleared so calling functions don't call AQ again */
 	pi->phy.get_link_info = false;
 
-	return status;
+	return 0;
 }
 
 /**
- * ice_init_flex_parser - initialize rx flex parser
+ * ice_init_flex_flags
  * @hw: pointer to the hardware structure
+ * @prof_id: Rx Descriptor Builder profile ID
  *
- * Function to initialize flex descriptors
+ * Function to initialize Rx flex flags
  */
-static void ice_init_flex_parser(struct ice_hw *hw)
+static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id)
 {
 	u8 idx = 0;
 
-	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_LOW, 0);
-	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_HIGH, 1);
-	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_LOWER, 2);
-	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_HIGH, 3);
-	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_FRG, ICE_RXFLG_UDP_GRE,
-			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_FIN, idx++);
-	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_SYN, ICE_RXFLG_RST,
-			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++);
-	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI,
-			      ICE_RXFLG_EVLAN_x8100, ICE_RXFLG_EVLAN_x9100,
-			      idx++);
-	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_VLAN_x8100, ICE_RXFLG_TNL_VLAN,
-			      ICE_RXFLG_TNL_MAC, ICE_RXFLG_TNL0, idx++);
-	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2,
-			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx);
+	/* Flex-flag fields (0-2) are programmed with FLG64 bits with layout:
+	 * flexiflags0[5:0] - TCP flags, is_packet_fragmented, is_packet_UDP_GRE
+	 * flexiflags1[3:0] - Not used for flag programming
+	 * flexiflags2[7:0] - Tunnel and VLAN types
+	 * 2 invalid fields in last index
+	 */
+	switch (prof_id) {
+	/* Rx flex flags are currently programmed for the NIC profiles only.
+	 * Different flag bit programming configurations can be added per
+	 * profile as needed.
+	 */
+	case ICE_RXDID_FLEX_NIC:
+	case ICE_RXDID_FLEX_NIC_2:
+		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG,
+				   ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI,
+				   ICE_FLG_FIN, idx++);
+		/* flex flag 1 is not used for flexi-flag programming, skipping
+		 * these four FLG64 bits.
+		 */
+		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST,
+				   ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++);
+		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI,
+				   ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100,
+				   ICE_FLG_EVLAN_x9100, idx++);
+		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100,
+				   ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC,
+				   ICE_FLG_TNL0, idx++);
+		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2,
+				   ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx);
+		break;
+
+	default:
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Flag programming for profile ID %d not supported\n",
+			  prof_id);
+	}
+}
+
+/**
+ * ice_init_flex_flds
+ * @hw: pointer to the hardware structure
+ * @prof_id: Rx Descriptor Builder profile ID
+ *
+ * Function to initialize flex descriptors
+ */
+static void ice_init_flex_flds(struct ice_hw *hw, enum ice_rxdid prof_id)
+{
+	enum ice_flex_rx_mdid mdid;
+
+	switch (prof_id) {
+	case ICE_RXDID_FLEX_NIC:
+	case ICE_RXDID_FLEX_NIC_2:
+		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_LOW, 0);
+		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_HIGH, 1);
+		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_FLOW_ID_LOWER, 2);
+
+		mdid = (prof_id == ICE_RXDID_FLEX_NIC_2) ?
+			ICE_RX_MDID_SRC_VSI : ICE_RX_MDID_FLOW_ID_HIGH;
+
+		ICE_PROG_FLEX_ENTRY(hw, prof_id, mdid, 3);
+
+		ice_init_flex_flags(hw, prof_id);
+		break;
+
+	default:
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Field init for profile ID %d not supported\n",
+			  prof_id);
+	}
 }
 
 /**
  * ice_init_fltr_mgmt_struct - initializes filter management list and locks
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  */
 static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
 {
@@ -333,49 +446,311 @@
 
 	INIT_LIST_HEAD(&sw->vsi_list_map_head);
 
-	mutex_init(&sw->mac_list_lock);
-	INIT_LIST_HEAD(&sw->mac_list_head);
-
-	mutex_init(&sw->vlan_list_lock);
-	INIT_LIST_HEAD(&sw->vlan_list_head);
-
-	mutex_init(&sw->eth_m_list_lock);
-	INIT_LIST_HEAD(&sw->eth_m_list_head);
-
-	mutex_init(&sw->promisc_list_lock);
-	INIT_LIST_HEAD(&sw->promisc_list_head);
-
-	mutex_init(&sw->mac_vlan_list_lock);
-	INIT_LIST_HEAD(&sw->mac_vlan_list_head);
-
-	return 0;
+	return ice_init_def_sw_recp(hw);
 }
 
 /**
  * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  */
 static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
 {
 	struct ice_switch_info *sw = hw->switch_info;
 	struct ice_vsi_list_map_info *v_pos_map;
 	struct ice_vsi_list_map_info *v_tmp_map;
+	struct ice_sw_recipe *recps;
+	u8 i;
 
 	list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
 				 list_entry) {
 		list_del(&v_pos_map->list_entry);
 		devm_kfree(ice_hw_to_dev(hw), v_pos_map);
 	}
+	recps = hw->switch_info->recp_list;
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry;
 
-	mutex_destroy(&sw->mac_list_lock);
-	mutex_destroy(&sw->vlan_list_lock);
-	mutex_destroy(&sw->eth_m_list_lock);
-	mutex_destroy(&sw->promisc_list_lock);
-	mutex_destroy(&sw->mac_vlan_list_lock);
-
+		recps[i].root_rid = i;
+		mutex_destroy(&recps[i].filt_rule_lock);
+		list_for_each_entry_safe(lst_itr, tmp_entry,
+					 &recps[i].filt_rules, list_entry) {
+			list_del(&lst_itr->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), lst_itr);
+		}
+	}
+	ice_rm_all_sw_replay_rule_info(hw);
+	devm_kfree(ice_hw_to_dev(hw), sw->recp_list);
 	devm_kfree(ice_hw_to_dev(hw), sw);
 }
 
+#define ICE_FW_LOG_DESC_SIZE(n)	(sizeof(struct ice_aqc_fw_logging_data) + \
+	(((n) - 1) * sizeof(((struct ice_aqc_fw_logging_data *)0)->entry)))
+#define ICE_FW_LOG_DESC_SIZE_MAX	\
+	ICE_FW_LOG_DESC_SIZE(ICE_AQC_FW_LOG_ID_MAX)
+
+/**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_fw_logging_data *config;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 size;
+
+	size = ICE_FW_LOG_DESC_SIZE_MAX;
+	config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+	if (!config)
+		return ICE_ERR_NO_MEMORY;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+	if (!status) {
+		u16 i;
+
+		/* Save FW logging information into the HW structure */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 v, m, flgs;
+
+			v = le16_to_cpu(config->entry[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+			if (m < ICE_AQC_FW_LOG_ID_MAX)
+				hw->fw_log.evnts[m].cur = flgs;
+		}
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), config);
+
+	return status;
+}
+
+/**
+ * ice_cfg_fw_log - configure FW logging
+ * @hw: pointer to the HW struct
+ * @enable: enable certain FW logging events if true, disable all if false
+ *
+ * This function enables/disables the FW logging via Rx CQ events and a UART
+ * port based on predetermined configurations. FW logging via the Rx CQ can be
+ * enabled/disabled for individual PF's. However, FW logging via the UART can
+ * only be enabled/disabled for all PFs on the same device.
+ *
+ * To enable overall FW logging, the "cq_en" and "uart_en" enable bits in
+ * hw->fw_log need to be set accordingly, e.g. based on user-provided input,
+ * before initializing the device.
+ *
+ * When re/configuring FW logging, callers need to update the "cfg" elements of
+ * the hw->fw_log.evnts array with the desired logging event configurations for
+ * modules of interest. When disabling FW logging completely, the callers can
+ * just pass false in the "enable" parameter. On completion, the function will
+ * update the "cur" element of the hw->fw_log.evnts array with the resulting
+ * logging event configurations of the modules that are being re/configured. FW
+ * logging modules that are not part of a reconfiguration operation retain their
+ * previous states.
+ *
+ * Before resetting the device, it is recommended that the driver disables FW
+ * logging before shutting down the control queue. When disabling FW logging
+ * ("enable" = false), the latest configurations of FW logging events stored in
+ * hw->fw_log.evnts[] are not overridden to allow them to be reconfigured after
+ * a device reset.
+ *
+ * When enabling FW logging to emit log messages via the Rx CQ during the
+ * device's initialization phase, a mechanism alternative to interrupt handlers
+ * needs to be used to extract FW log messages from the Rx CQ periodically and
+ * to prevent the Rx CQ from being full and stalling other types of control
+ * messages from FW to SW. Interrupts are typically disabled during the device's
+ * initialization phase.
+ */
+static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable)
+{
+	struct ice_aqc_fw_logging_data *data = NULL;
+	struct ice_aqc_fw_logging *cmd;
+	enum ice_status status = 0;
+	u16 i, chgs = 0, len = 0;
+	struct ice_aq_desc desc;
+	u8 actv_evnts = 0;
+	void *buf = NULL;
+
+	if (!hw->fw_log.cq_en && !hw->fw_log.uart_en)
+		return 0;
+
+	/* Disable FW logging only when the control queue is still responsive */
+	if (!enable &&
+	    (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
+		return 0;
+
+	/* Get current FW log settings */
+	status = ice_get_fw_log_cfg(hw);
+	if (status)
+		return status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
+	cmd = &desc.params.fw_logging;
+
+	/* Indicate which controls are valid */
+	if (hw->fw_log.cq_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_AQ_VALID;
+
+	if (hw->fw_log.uart_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_UART_VALID;
+
+	if (enable) {
+		/* Fill in an array of entries with FW logging modules and
+		 * logging events being reconfigured.
+		 */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 val;
+
+			/* Keep track of enabled event types */
+			actv_evnts |= hw->fw_log.evnts[i].cfg;
+
+			if (hw->fw_log.evnts[i].cfg == hw->fw_log.evnts[i].cur)
+				continue;
+
+			if (!data) {
+				data = devm_kzalloc(ice_hw_to_dev(hw),
+						    ICE_FW_LOG_DESC_SIZE_MAX,
+						    GFP_KERNEL);
+				if (!data)
+					return ICE_ERR_NO_MEMORY;
+			}
+
+			val = i << ICE_AQC_FW_LOG_ID_S;
+			val |= hw->fw_log.evnts[i].cfg << ICE_AQC_FW_LOG_EN_S;
+			data->entry[chgs++] = cpu_to_le16(val);
+		}
+
+		/* Only enable FW logging if at least one module is specified.
+		 * If FW logging is currently enabled but all modules are not
+		 * enabled to emit log messages, disable FW logging altogether.
+		 */
+		if (actv_evnts) {
+			/* Leave if there is effectively no change */
+			if (!chgs)
+				goto out;
+
+			if (hw->fw_log.cq_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_AQ_EN;
+
+			if (hw->fw_log.uart_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_UART_EN;
+
+			buf = data;
+			len = ICE_FW_LOG_DESC_SIZE(chgs);
+			desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+		}
+	}
+
+	status = ice_aq_send_cmd(hw, &desc, buf, len, NULL);
+	if (!status) {
+		/* Update the current configuration to reflect events enabled.
+		 * hw->fw_log.cq_en and hw->fw_log.uart_en indicate if the FW
+		 * logging mode is enabled for the device. They do not reflect
+		 * actual modules being enabled to emit log messages. So, their
+		 * values remain unchanged even when all modules are disabled.
+		 */
+		u16 cnt = enable ? chgs : (u16)ICE_AQC_FW_LOG_ID_MAX;
+
+		hw->fw_log.actv_evnts = actv_evnts;
+		for (i = 0; i < cnt; i++) {
+			u16 v, m;
+
+			if (!enable) {
+				/* When disabling all FW logging events as part
+				 * of device's de-initialization, the original
+				 * configurations are retained, and can be used
+				 * to reconfigure FW logging later if the device
+				 * is re-initialized.
+				 */
+				hw->fw_log.evnts[i].cur = 0;
+				continue;
+			}
+
+			v = le16_to_cpu(data->entry[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			hw->fw_log.evnts[m].cur = hw->fw_log.evnts[m].cfg;
+		}
+	}
+
+out:
+	if (data)
+		devm_kfree(ice_hw_to_dev(hw), data);
+
+	return status;
+}
+
+/**
+ * ice_output_fw_log
+ * @hw: pointer to the HW struct
+ * @desc: pointer to the AQ message descriptor
+ * @buf: pointer to the buffer accompanying the AQ message
+ *
+ * Formats a FW Log message and outputs it via the standard driver logs.
+ */
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
+{
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+	ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
+			le16_to_cpu(desc->datalen));
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
+}
+
+/**
+ * ice_get_itr_intrl_gran - determine int/intrl granularity
+ * @hw: pointer to the HW struct
+ *
+ * Determines the ITR/intrl granularities based on the maximum aggregate
+ * bandwidth according to the device's configuration during power-on.
+ */
+static void ice_get_itr_intrl_gran(struct ice_hw *hw)
+{
+	u8 max_agg_bw = (rd32(hw, GL_PWR_MODE_CTL) &
+			 GL_PWR_MODE_CTL_CAR_MAX_BW_M) >>
+			GL_PWR_MODE_CTL_CAR_MAX_BW_S;
+
+	switch (max_agg_bw) {
+	case ICE_MAX_AGG_BW_200G:
+	case ICE_MAX_AGG_BW_100G:
+	case ICE_MAX_AGG_BW_50G:
+		hw->itr_gran = ICE_ITR_GRAN_ABOVE_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_ABOVE_25;
+		break;
+	case ICE_MAX_AGG_BW_25G:
+		hw->itr_gran = ICE_ITR_GRAN_MAX_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_MAX_25;
+		break;
+	}
+}
+
+/**
+ * ice_get_nvm_version - get cached NVM version data
+ * @hw: pointer to the hardware structure
+ * @oem_ver: 8 bit NVM version
+ * @oem_build: 16 bit NVM build number
+ * @oem_patch: 8 NVM patch number
+ * @ver_hi: high 16 bits of the NVM version
+ * @ver_lo: low 16 bits of the NVM version
+ */
+void
+ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build,
+		    u8 *oem_patch, u8 *ver_hi, u8 *ver_lo)
+{
+	struct ice_nvm_info *nvm = &hw->nvm;
+
+	*oem_ver = (u8)((nvm->oem_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT);
+	*oem_patch = (u8)(nvm->oem_ver & ICE_OEM_VER_PATCH_MASK);
+	*oem_build = (u16)((nvm->oem_ver & ICE_OEM_VER_BUILD_MASK) >>
+			   ICE_OEM_VER_BUILD_SHIFT);
+	*ver_hi = (nvm->ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT;
+	*ver_lo = (nvm->ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT;
+}
+
 /**
  * ice_init_hw - main hardware initialization routine
  * @hw: pointer to the hardware structure
@@ -400,16 +775,17 @@
 	if (status)
 		return status;
 
-	/* set these values to minimum allowed */
-	hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200;
-	hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100;
-	hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50;
-	hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25;
+	ice_get_itr_intrl_gran(hw);
 
-	status = ice_init_all_ctrlq(hw);
+	status = ice_create_all_ctrlq(hw);
 	if (status)
 		goto err_unroll_cqinit;
 
+	/* Enable FW logging. Not fatal if this fails. */
+	status = ice_cfg_fw_log(hw, true);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to enable FW logging.\n");
+
 	status = ice_clear_pf_cfg(hw);
 	if (status)
 		goto err_unroll_cqinit;
@@ -431,7 +807,7 @@
 		goto err_unroll_cqinit;
 	}
 
-	/* set the back pointer to hw */
+	/* set the back pointer to HW */
 	hw->port_info->hw = hw;
 
 	/* Initialize port_info struct with switch configuration data */
@@ -441,7 +817,7 @@
 
 	hw->evb_veb = true;
 
-	/* Query the allocated resources for tx scheduler */
+	/* Query the allocated resources for Tx scheduler */
 	status = ice_sched_query_res_alloc(hw);
 	if (status) {
 		ice_debug(hw, ICE_DBG_SCHED,
@@ -472,10 +848,20 @@
 	if (status)
 		goto err_unroll_sched;
 
+	/* need a valid SW entry point to build a Tx tree */
+	if (!hw->sw_entry_point_layer) {
+		ice_debug(hw, ICE_DBG_SCHED, "invalid sw entry point\n");
+		status = ICE_ERR_CFG;
+		goto err_unroll_sched;
+	}
+	INIT_LIST_HEAD(&hw->agg_list);
+
 	status = ice_init_fltr_mgmt_struct(hw);
 	if (status)
 		goto err_unroll_sched;
 
+	ice_dev_onetime_setup(hw);
+
 	/* Get MAC information */
 	/* A single port can report up to two (LAN and WoL) addresses */
 	mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
@@ -494,8 +880,11 @@
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
 
-	ice_init_flex_parser(hw);
-
+	ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC);
+	ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC_2);
+	status = ice_init_hw_tbls(hw);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
 	return 0;
 
 err_unroll_fltr_mgmt_struct:
@@ -505,25 +894,38 @@
 err_unroll_alloc:
 	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 err_unroll_cqinit:
-	ice_shutdown_all_ctrlq(hw);
+	ice_destroy_all_ctrlq(hw);
 	return status;
 }
 
 /**
  * ice_deinit_hw - unroll initialization operations done by ice_init_hw
  * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
  */
 void ice_deinit_hw(struct ice_hw *hw)
 {
+	ice_cleanup_fltr_mgmt_struct(hw);
+
 	ice_sched_cleanup_all(hw);
-	ice_shutdown_all_ctrlq(hw);
+	ice_sched_clear_agg(hw);
+	ice_free_seg(hw);
+	ice_free_hw_tbls(hw);
 
 	if (hw->port_info) {
 		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 		hw->port_info = NULL;
 	}
 
-	ice_cleanup_fltr_mgmt_struct(hw);
+	/* Attempt to disable FW logging before shutting down control queues */
+	ice_cfg_fw_log(hw, false);
+	ice_destroy_all_ctrlq(hw);
+
+	/* Clear VSI contexts if not already cleared */
+	ice_clear_all_vsi_ctx(hw);
 }
 
 /**
@@ -652,6 +1054,8 @@
 		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
 		val = GLGEN_RTRIG_GLOBR_M;
 		break;
+	default:
+		return ICE_ERR_PARAM;
 	}
 
 	val |= rd32(hw, GLGEN_RTRIG);
@@ -666,9 +1070,9 @@
  * ice_copy_rxq_ctx_to_hw
  * @hw: pointer to the hardware structure
  * @ice_rxq_ctx: pointer to the rxq context
- * @rxq_index: the index of the rx queue
+ * @rxq_index: the index of the Rx queue
  *
- * Copies rxq context from dense structure to hw register space
+ * Copies rxq context from dense structure to HW register space
  */
 static enum ice_status
 ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
@@ -681,7 +1085,7 @@
 	if (rxq_index > QRX_CTRL_MAX_INDEX)
 		return ICE_ERR_PARAM;
 
-	/* Copy each dword separately to hw */
+	/* Copy each dword separately to HW */
 	for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
 		wr32(hw, QRX_CONTEXT(i, rxq_index),
 		     *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
@@ -715,6 +1119,7 @@
 	ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena,	1,	195),
 	ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena,	1,	196),
 	ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh,		3,	198),
+	ICE_CTX_STORE(ice_rlan_ctx, prefena,		1,	201),
 	{ 0 }
 };
 
@@ -722,10 +1127,11 @@
  * ice_write_rxq_ctx
  * @hw: pointer to the hardware structure
  * @rlan_ctx: pointer to the rxq context
- * @rxq_index: the index of the rx queue
+ * @rxq_index: the index of the Rx queue
  *
  * Converts rxq context from sparse to dense structure and then writes
- * it to hw register space
+ * it to HW register space and enables the hardware to prefetch descriptors
+ * instead of only fetching them on demand
  */
 enum ice_status
 ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
@@ -733,6 +1139,11 @@
 {
 	u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
 
+	if (!rlan_ctx)
+		return ICE_ERR_BAD_PTR;
+
+	rlan_ctx->prefena = 1;
+
 	ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
 	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
 }
@@ -748,6 +1159,7 @@
 	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
 	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
 	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag,	1,	91),
 	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
 	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
 	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
@@ -766,7 +1178,7 @@
 	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
 	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
 	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
-	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		110,	171),
+	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		122,	171),
 	{ 0 }
 };
 
@@ -780,8 +1192,9 @@
  *
  * Dumps debug log about control command with descriptor contents.
  */
-void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc,
-		  void *buf, u16 buf_len)
+void
+ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf,
+	     u16 buf_len)
 {
 	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
 	u16 len;
@@ -821,9 +1234,15 @@
 
 /* FW Admin Queue command wrappers */
 
+/* Software lock/mutex that is meant to be held while the Global Config Lock
+ * in firmware is acquired by the software to prevent most (but not all) types
+ * of AQ commands from being sent to FW
+ */
+DEFINE_MUTEX(ice_global_cfg_lock_sw);
+
 /**
  * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @desc: descriptor describing the command
  * @buf: buffer to use for indirect commands (NULL for direct commands)
  * @buf_size: size of buffer for indirect commands (0 for direct commands)
@@ -835,12 +1254,43 @@
 ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
 		u16 buf_size, struct ice_sq_cd *cd)
 {
-	return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+	struct ice_aqc_req_res *cmd = &desc->params.res_owner;
+	bool lock_acquired = false;
+	enum ice_status status;
+
+	/* When a package download is in process (i.e. when the firmware's
+	 * Global Configuration Lock resource is held), only the Download
+	 * Package, Get Version, Get Package Info List and Release Resource
+	 * (with resource ID set to Global Config Lock) AdminQ commands are
+	 * allowed; all others must block until the package download completes
+	 * and the Global Config Lock is released.  See also
+	 * ice_acquire_global_cfg_lock().
+	 */
+	switch (le16_to_cpu(desc->opcode)) {
+	case ice_aqc_opc_download_pkg:
+	case ice_aqc_opc_get_pkg_info_list:
+	case ice_aqc_opc_get_ver:
+		break;
+	case ice_aqc_opc_release_res:
+		if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK)
+			break;
+		/* fall-through */
+	default:
+		mutex_lock(&ice_global_cfg_lock_sw);
+		lock_acquired = true;
+		break;
+	}
+
+	status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+	if (lock_acquired)
+		mutex_unlock(&ice_global_cfg_lock_sw);
+
+	return status;
 }
 
 /**
  * ice_aq_get_fw_ver
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @cd: pointer to command details structure or NULL
  *
  * Get the firmware version (0x0001) from the admin queue commands
@@ -873,8 +1323,45 @@
 }
 
 /**
+ * ice_aq_send_driver_ver
+ * @hw: pointer to the HW struct
+ * @dv: driver's major, minor version
+ * @cd: pointer to command details structure or NULL
+ *
+ * Send the driver version (0x0002) to the firmware
+ */
+enum ice_status
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_ver *cmd;
+	struct ice_aq_desc desc;
+	u16 len;
+
+	cmd = &desc.params.driver_ver;
+
+	if (!dv)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_ver);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->major_ver = dv->major_ver;
+	cmd->minor_ver = dv->minor_ver;
+	cmd->build_ver = dv->build_ver;
+	cmd->subbuild_ver = dv->subbuild_ver;
+
+	len = 0;
+	while (len < sizeof(dv->driver_string) &&
+	       isascii(dv->driver_string[len]) && dv->driver_string[len])
+		len++;
+
+	return ice_aq_send_cmd(hw, &desc, dv->driver_string, len, cd);
+}
+
+/**
  * ice_aq_q_shutdown
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @unloading: is the driver unloading itself
  *
  * Tell the Firmware that we're shutting down the AdminQ and whether
@@ -890,21 +1377,36 @@
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
 
 	if (unloading)
-		cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING);
+		cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING;
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
 }
 
 /**
  * ice_aq_req_res
- * @hw: pointer to the hw struct
- * @res: resource id
+ * @hw: pointer to the HW struct
+ * @res: resource ID
  * @access: access type
  * @sdp_number: resource number
  * @timeout: the maximum time in ms that the driver may hold the resource
  * @cd: pointer to command details structure or NULL
  *
- * requests common resource using the admin queue commands (0x0008)
+ * Requests common resource using the admin queue commands (0x0008).
+ * When attempting to acquire the Global Config Lock, the driver can
+ * learn of three states:
+ *  1) ICE_SUCCESS -        acquired lock, and can perform download package
+ *  2) ICE_ERR_AQ_ERROR -   did not get lock, driver should fail to load
+ *  3) ICE_ERR_AQ_NO_WORK - did not get lock, but another driver has
+ *                          successfully downloaded the package; the driver does
+ *                          not have to download the package and can continue
+ *                          loading
+ *
+ * Note that if the caller is in an acquire lock, perform action, release lock
+ * phase of operation, it is possible that the FW may detect a timeout and issue
+ * a CORER. In this case, the driver will receive a CORER interrupt and will
+ * have to determine its cause. The calling thread that is handling this flow
+ * will likely get an error propagated back to it indicating the Download
+ * Package, Update Package or the Release Resource AQ commands timed out.
  */
 static enum ice_status
 ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
@@ -922,13 +1424,43 @@
 	cmd_resp->res_id = cpu_to_le16(res);
 	cmd_resp->access_type = cpu_to_le16(access);
 	cmd_resp->res_number = cpu_to_le32(sdp_number);
+	cmd_resp->timeout = cpu_to_le32(*timeout);
+	*timeout = 0;
 
 	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
 	/* The completion specifies the maximum time in ms that the driver
 	 * may hold the resource in the Timeout field.
-	 * If the resource is held by someone else, the command completes with
-	 * busy return value and the timeout field indicates the maximum time
-	 * the current owner of the resource has to free it.
+	 */
+
+	/* Global config lock response utilizes an additional status field.
+	 *
+	 * If the Global config lock resource is held by some other driver, the
+	 * command completes with ICE_AQ_RES_GLBL_IN_PROG in the status field
+	 * and the timeout field indicates the maximum time the current owner
+	 * of the resource has to free it.
+	 */
+	if (res == ICE_GLOBAL_CFG_LOCK_RES_ID) {
+		if (le16_to_cpu(cmd_resp->status) == ICE_AQ_RES_GLBL_SUCCESS) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return 0;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_IN_PROG) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return ICE_ERR_AQ_ERROR;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_DONE) {
+			return ICE_ERR_AQ_NO_WORK;
+		}
+
+		/* invalid FW response, force a timeout immediately */
+		*timeout = 0;
+		return ICE_ERR_AQ_ERROR;
+	}
+
+	/* If the resource is held by some other driver, the command completes
+	 * with a busy return value and the timeout field indicates the maximum
+	 * time the current owner of the resource has to free it.
 	 */
 	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
 		*timeout = le32_to_cpu(cmd_resp->timeout);
@@ -938,8 +1470,8 @@
 
 /**
  * ice_aq_release_res
- * @hw: pointer to the hw struct
- * @res: resource id
+ * @hw: pointer to the HW struct
+ * @res: resource ID
  * @sdp_number: resource number
  * @cd: pointer to command details structure or NULL
  *
@@ -965,32 +1497,30 @@
 /**
  * ice_acquire_res
  * @hw: pointer to the HW structure
- * @res: resource id
+ * @res: resource ID
  * @access: access type (read or write)
+ * @timeout: timeout in milliseconds
  *
  * This function will attempt to acquire the ownership of a resource.
  */
 enum ice_status
 ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
-		enum ice_aq_res_access_type access)
+		enum ice_aq_res_access_type access, u32 timeout)
 {
 #define ICE_RES_POLLING_DELAY_MS	10
 	u32 delay = ICE_RES_POLLING_DELAY_MS;
+	u32 time_left = timeout;
 	enum ice_status status;
-	u32 time_left = 0;
-	u32 timeout;
 
 	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
 
-	/* An admin queue return code of ICE_AQ_RC_EEXIST means that another
-	 * driver has previously acquired the resource and performed any
-	 * necessary updates; in this case the caller does not obtain the
-	 * resource and has no further work to do.
+	/* A return code of ICE_ERR_AQ_NO_WORK means that another driver has
+	 * previously acquired the resource and performed any necessary updates;
+	 * in this case the caller does not obtain the resource and has no
+	 * further work to do.
 	 */
-	if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
-		status = ICE_ERR_AQ_NO_WORK;
+	if (status == ICE_ERR_AQ_NO_WORK)
 		goto ice_acquire_res_exit;
-	}
 
 	if (status)
 		ice_debug(hw, ICE_DBG_RES,
@@ -1003,11 +1533,9 @@
 		timeout = (timeout > delay) ? timeout - delay : 0;
 		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
 
-		if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
+		if (status == ICE_ERR_AQ_NO_WORK)
 			/* lock free, but no work to do */
-			status = ICE_ERR_AQ_NO_WORK;
 			break;
-		}
 
 		if (!status)
 			/* lock acquired */
@@ -1031,7 +1559,7 @@
 /**
  * ice_release_res
  * @hw: pointer to the HW structure
- * @res: resource id
+ * @res: resource ID
  *
  * This function will release a resource using the proper Admin Command.
  */
@@ -1043,7 +1571,7 @@
 	status = ice_aq_release_res(hw, res, 0, NULL);
 
 	/* there are some rare cases when trying to release the resource
-	 * results in an admin Q timeout, so handle them correctly
+	 * results in an admin queue timeout, so handle them correctly
 	 */
 	while ((status == ICE_ERR_AQ_TIMEOUT) &&
 	       (total_delay < hw->adminq.sq_cmd_timeout)) {
@@ -1054,8 +1582,31 @@
 }
 
 /**
+ * ice_get_num_per_func - determine number of resources per PF
+ * @hw: pointer to the HW structure
+ * @max: value to be evenly split between each PF
+ *
+ * Determine the number of valid functions by going through the bitmap returned
+ * from parsing capabilities and use this to calculate the number of resources
+ * per PF based on the max value passed in.
+ */
+static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max)
+{
+	u8 funcs;
+
+#define ICE_CAPS_VALID_FUNCS_M	0xFF
+	funcs = hweight8(hw->dev_caps.common_cap.valid_functions &
+			 ICE_CAPS_VALID_FUNCS_M);
+
+	if (!funcs)
+		return 0;
+
+	return max / funcs;
+}
+
+/**
  * ice_parse_caps - parse function/device capabilities
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @buf: pointer to a buffer containing function/device capability records
  * @cap_count: number of capability records in the list
  * @opc: type of capabilities list to parse
@@ -1070,6 +1621,7 @@
 	struct ice_hw_func_caps *func_p = NULL;
 	struct ice_hw_dev_caps *dev_p = NULL;
 	struct ice_hw_common_caps *caps;
+	char const *prefix;
 	u32 i;
 
 	if (!buf)
@@ -1080,9 +1632,11 @@
 	if (opc == ice_aqc_opc_list_dev_caps) {
 		dev_p = &hw->dev_caps;
 		caps = &dev_p->common_cap;
+		prefix = "dev cap";
 	} else if (opc == ice_aqc_opc_list_func_caps) {
 		func_p = &hw->func_caps;
 		caps = &func_p->common_cap;
+		prefix = "func cap";
 	} else {
 		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
 		return;
@@ -1095,72 +1649,114 @@
 		u16 cap = le16_to_cpu(cap_resp->cap);
 
 		switch (cap) {
+		case ICE_AQC_CAPS_VALID_FUNCTIONS:
+			caps->valid_functions = number;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "%s: valid_functions (bitmap) = %d\n", prefix,
+				  caps->valid_functions);
+			break;
+		case ICE_AQC_CAPS_SRIOV:
+			caps->sr_iov_1_1 = (number == 1);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "%s: sr_iov_1_1 = %d\n", prefix,
+				  caps->sr_iov_1_1);
+			break;
+		case ICE_AQC_CAPS_VF:
+			if (dev_p) {
+				dev_p->num_vfs_exposed = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: num_vfs_exposed = %d\n", prefix,
+					  dev_p->num_vfs_exposed);
+			} else if (func_p) {
+				func_p->num_allocd_vfs = number;
+				func_p->vf_base_id = logical_id;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: num_allocd_vfs = %d\n", prefix,
+					  func_p->num_allocd_vfs);
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: vf_base_id = %d\n", prefix,
+					  func_p->vf_base_id);
+			}
+			break;
 		case ICE_AQC_CAPS_VSI:
 			if (dev_p) {
 				dev_p->num_vsi_allocd_to_host = number;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.VSI cnt = %d\n",
+					  "%s: num_vsi_allocd_to_host = %d\n",
+					  prefix,
 					  dev_p->num_vsi_allocd_to_host);
 			} else if (func_p) {
-				func_p->guaranteed_num_vsi = number;
+				func_p->guar_num_vsi =
+					ice_get_num_per_func(hw, ICE_MAX_VSI);
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Func.VSI cnt = %d\n",
-					  func_p->guaranteed_num_vsi);
+					  "%s: guar_num_vsi (fw) = %d\n",
+					  prefix, number);
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: guar_num_vsi = %d\n",
+					  prefix, func_p->guar_num_vsi);
 			}
 			break;
+		case ICE_AQC_CAPS_DCB:
+			caps->dcb = (number == 1);
+			caps->active_tc_bitmap = logical_id;
+			caps->maxtc = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "%s: dcb = %d\n", prefix, caps->dcb);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "%s: active_tc_bitmap = %d\n", prefix,
+				  caps->active_tc_bitmap);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "%s: maxtc = %d\n", prefix, caps->maxtc);
+			break;
 		case ICE_AQC_CAPS_RSS:
 			caps->rss_table_size = number;
 			caps->rss_table_entry_width = logical_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table size = %d\n",
+				  "%s: rss_table_size = %d\n", prefix,
 				  caps->rss_table_size);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table width = %d\n",
+				  "%s: rss_table_entry_width = %d\n", prefix,
 				  caps->rss_table_entry_width);
 			break;
 		case ICE_AQC_CAPS_RXQS:
 			caps->num_rxq = number;
 			caps->rxq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+				  "%s: num_rxq = %d\n", prefix,
+				  caps->num_rxq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Rx first queue ID = %d\n",
+				  "%s: rxq_first_id = %d\n", prefix,
 				  caps->rxq_first_id);
 			break;
 		case ICE_AQC_CAPS_TXQS:
 			caps->num_txq = number;
 			caps->txq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+				  "%s: num_txq = %d\n", prefix,
+				  caps->num_txq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Tx first queue ID = %d\n",
+				  "%s: txq_first_id = %d\n", prefix,
 				  caps->txq_first_id);
 			break;
 		case ICE_AQC_CAPS_MSIX:
 			caps->num_msix_vectors = number;
 			caps->msix_vector_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX vector count = %d\n",
+				  "%s: num_msix_vectors = %d\n", prefix,
 				  caps->num_msix_vectors);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX first vector index = %d\n",
+				  "%s: msix_vector_first_id = %d\n", prefix,
 				  caps->msix_vector_first_id);
 			break;
 		case ICE_AQC_CAPS_MAX_MTU:
 			caps->max_mtu = number;
-			if (dev_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.MaxMTU = %d\n",
-					  caps->max_mtu);
-			else if (func_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: func.MaxMTU = %d\n",
-					  caps->max_mtu);
+			ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
+				  prefix, caps->max_mtu);
 			break;
 		default:
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
-				  cap);
+				  "%s: unknown capability[%d]: 0x%x\n", prefix,
+				  i, cap);
 			break;
 		}
 	}
@@ -1168,10 +1764,10 @@
 
 /**
  * ice_aq_discover_caps - query function/device capabilities
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @buf: a virtual buffer to hold the capabilities
  * @buf_size: Size of the virtual buffer
- * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM
+ * @cap_count: cap count needed if AQ err==ENOMEM
  * @opc: capabilities type to discover - pass in the command opcode
  * @cd: pointer to command details structure or NULL
  *
@@ -1179,7 +1775,7 @@
  * the firmware.
  */
 static enum ice_status
-ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size,
+ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
 		     enum ice_adminq_opc opc, struct ice_sq_cd *cd)
 {
 	struct ice_aqc_list_caps *cmd;
@@ -1197,61 +1793,146 @@
 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
 	if (!status)
 		ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
-	*data_size = le16_to_cpu(desc.datalen);
+	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOMEM)
+		*cap_count = le32_to_cpu(cmd->count);
+	return status;
+}
+
+/**
+ * ice_discover_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ * @opc: capabilities type to discover - pass in the command opcode
+ */
+static enum ice_status
+ice_discover_caps(struct ice_hw *hw, enum ice_adminq_opc opc)
+{
+	enum ice_status status;
+	u32 cap_count;
+	u16 cbuf_len;
+	u8 retries;
+
+	/* The driver doesn't know how many capabilities the device will return
+	 * so the buffer size required isn't known ahead of time. The driver
+	 * starts with cbuf_len and if this turns out to be insufficient, the
+	 * device returns ICE_AQ_RC_ENOMEM and also the cap_count it needs.
+	 * The driver then allocates the buffer based on the count and retries
+	 * the operation. So it follows that the retry count is 2.
+	 */
+#define ICE_GET_CAP_BUF_COUNT	40
+#define ICE_GET_CAP_RETRY_COUNT	2
+
+	cap_count = ICE_GET_CAP_BUF_COUNT;
+	retries = ICE_GET_CAP_RETRY_COUNT;
+
+	do {
+		void *cbuf;
+
+		cbuf_len = (u16)(cap_count *
+				 sizeof(struct ice_aqc_list_caps_elem));
+		cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
+		if (!cbuf)
+			return ICE_ERR_NO_MEMORY;
+
+		status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &cap_count,
+					      opc, NULL);
+		devm_kfree(ice_hw_to_dev(hw), cbuf);
+
+		if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
+			break;
+
+		/* If ENOMEM is returned, try again with bigger buffer */
+	} while (--retries);
 
 	return status;
 }
 
 /**
+ * ice_set_safe_mode_caps - Override dev/func capabilities when in safe mode
+ * @hw: pointer to the hardware structure
+ */
+void ice_set_safe_mode_caps(struct ice_hw *hw)
+{
+	struct ice_hw_func_caps *func_caps = &hw->func_caps;
+	struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
+	u32 valid_func, rxq_first_id, txq_first_id;
+	u32 msix_vector_first_id, max_mtu;
+	u32 num_func = 0;
+	u8 i;
+
+	/* cache some func_caps values that should be restored after memset */
+	valid_func = func_caps->common_cap.valid_functions;
+	txq_first_id = func_caps->common_cap.txq_first_id;
+	rxq_first_id = func_caps->common_cap.rxq_first_id;
+	msix_vector_first_id = func_caps->common_cap.msix_vector_first_id;
+	max_mtu = func_caps->common_cap.max_mtu;
+
+	/* unset func capabilities */
+	memset(func_caps, 0, sizeof(*func_caps));
+
+	/* restore cached values */
+	func_caps->common_cap.valid_functions = valid_func;
+	func_caps->common_cap.txq_first_id = txq_first_id;
+	func_caps->common_cap.rxq_first_id = rxq_first_id;
+	func_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
+	func_caps->common_cap.max_mtu = max_mtu;
+
+	/* one Tx and one Rx queue in safe mode */
+	func_caps->common_cap.num_rxq = 1;
+	func_caps->common_cap.num_txq = 1;
+
+	/* two MSIX vectors, one for traffic and one for misc causes */
+	func_caps->common_cap.num_msix_vectors = 2;
+	func_caps->guar_num_vsi = 1;
+
+	/* cache some dev_caps values that should be restored after memset */
+	valid_func = dev_caps->common_cap.valid_functions;
+	txq_first_id = dev_caps->common_cap.txq_first_id;
+	rxq_first_id = dev_caps->common_cap.rxq_first_id;
+	msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id;
+	max_mtu = dev_caps->common_cap.max_mtu;
+
+	/* unset dev capabilities */
+	memset(dev_caps, 0, sizeof(*dev_caps));
+
+	/* restore cached values */
+	dev_caps->common_cap.valid_functions = valid_func;
+	dev_caps->common_cap.txq_first_id = txq_first_id;
+	dev_caps->common_cap.rxq_first_id = rxq_first_id;
+	dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
+	dev_caps->common_cap.max_mtu = max_mtu;
+
+	/* valid_func is a bitmap. get number of functions */
+#define ICE_MAX_FUNCS 8
+	for (i = 0; i < ICE_MAX_FUNCS; i++)
+		if (valid_func & BIT(i))
+			num_func++;
+
+	/* one Tx and one Rx queue per function in safe mode */
+	dev_caps->common_cap.num_rxq = num_func;
+	dev_caps->common_cap.num_txq = num_func;
+
+	/* two MSIX vectors per function */
+	dev_caps->common_cap.num_msix_vectors = 2 * num_func;
+}
+
+/**
  * ice_get_caps - get info about the HW
  * @hw: pointer to the hardware structure
  */
 enum ice_status ice_get_caps(struct ice_hw *hw)
 {
 	enum ice_status status;
-	u16 data_size = 0;
-	u16 cbuf_len;
-	u8 retries;
 
-	/* The driver doesn't know how many capabilities the device will return
-	 * so the buffer size required isn't known ahead of time. The driver
-	 * starts with cbuf_len and if this turns out to be insufficient, the
-	 * device returns ICE_AQ_RC_ENOMEM and also the buffer size it needs.
-	 * The driver then allocates the buffer of this size and retries the
-	 * operation. So it follows that the retry count is 2.
-	 */
-#define ICE_GET_CAP_BUF_COUNT	40
-#define ICE_GET_CAP_RETRY_COUNT	2
-
-	cbuf_len = ICE_GET_CAP_BUF_COUNT *
-		sizeof(struct ice_aqc_list_caps_elem);
-
-	retries = ICE_GET_CAP_RETRY_COUNT;
-
-	do {
-		void *cbuf;
-
-		cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
-		if (!cbuf)
-			return ICE_ERR_NO_MEMORY;
-
-		status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size,
-					      ice_aqc_opc_list_func_caps, NULL);
-		devm_kfree(ice_hw_to_dev(hw), cbuf);
-
-		if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
-			break;
-
-		/* If ENOMEM is returned, try again with bigger buffer */
-		cbuf_len = data_size;
-	} while (--retries);
+	status = ice_discover_caps(hw, ice_aqc_opc_list_dev_caps);
+	if (!status)
+		status = ice_discover_caps(hw, ice_aqc_opc_list_func_caps);
 
 	return status;
 }
 
 /**
  * ice_aq_manage_mac_write - manage MAC address write command
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
  * @flags: flags to control write behavior
  * @cd: pointer to command details structure or NULL
@@ -1259,7 +1940,7 @@
  * This function is used to write MAC address to the NVM (0x0108).
  */
 enum ice_status
-ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
 			struct ice_sq_cd *cd)
 {
 	struct ice_aqc_manage_mac_write *cmd;
@@ -1271,15 +1952,15 @@
 	cmd->flags = flags;
 
 	/* Prep values for flags, sah, sal */
-	cmd->sah = htons(*((u16 *)mac_addr));
-	cmd->sal = htonl(*((u32 *)(mac_addr + 2)));
+	cmd->sah = htons(*((const u16 *)mac_addr));
+	cmd->sal = htonl(*((const u32 *)(mac_addr + 2)));
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
 
 /**
  * ice_aq_clear_pxe_mode
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * Tell the firmware that the driver is taking over from PXE (0x0110).
  */
@@ -1295,7 +1976,7 @@
 
 /**
  * ice_clear_pxe_mode - clear pxe operations mode
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * Make sure all PXE mode settings are cleared, including things
  * like descriptor fetch/write-back mode.
@@ -1307,8 +1988,184 @@
 }
 
 /**
+ * ice_get_link_speed_based_on_phy_type - returns link speed
+ * @phy_type_low: lower part of phy_type
+ * @phy_type_high: higher part of phy_type
+ *
+ * This helper function will convert an entry in PHY type structure
+ * [phy_type_low, phy_type_high] to its corresponding link speed.
+ * Note: In the structure of [phy_type_low, phy_type_high], there should
+ * be one bit set, as this function will convert one PHY type to its
+ * speed.
+ * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ */
+static u16
+ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
+{
+	u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+	u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+
+	switch (phy_type_low) {
+	case ICE_PHY_TYPE_LOW_100BASE_TX:
+	case ICE_PHY_TYPE_LOW_100M_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100MB;
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_T:
+	case ICE_PHY_TYPE_LOW_1000BASE_SX:
+	case ICE_PHY_TYPE_LOW_1000BASE_LX:
+	case ICE_PHY_TYPE_LOW_1000BASE_KX:
+	case ICE_PHY_TYPE_LOW_1G_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_1000MB;
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_T:
+	case ICE_PHY_TYPE_LOW_2500BASE_X:
+	case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_2500MB;
+		break;
+	case ICE_PHY_TYPE_LOW_5GBASE_T:
+	case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_5GB;
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_T:
+	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+	case ICE_PHY_TYPE_LOW_10GBASE_SR:
+	case ICE_PHY_TYPE_LOW_10GBASE_LR:
+	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_10GB;
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_T:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+	case ICE_PHY_TYPE_LOW_25GBASE_SR:
+	case ICE_PHY_TYPE_LOW_25GBASE_LR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_25GB;
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB;
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2:
+	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI2:
+	case ICE_PHY_TYPE_LOW_50GBASE_CP:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR:
+	case ICE_PHY_TYPE_LOW_50GBASE_FR:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI1:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB;
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4:
+	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_AUI4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	switch (phy_type_high) {
+	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN &&
+	    speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return speed_phy_type_low;
+	else
+		return speed_phy_type_high;
+}
+
+/**
+ * ice_update_phy_type
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @link_speeds_bitmap: targeted link speeds bitmap
+ *
+ * Note: For the link_speeds_bitmap structure, you can check it at
+ * [ice_aqc_get_link_status->link_speed]. Caller can pass in
+ * link_speeds_bitmap include multiple speeds.
+ *
+ * Each entry in this [phy_type_low, phy_type_high] structure will
+ * present a certain link speed. This helper function will turn on bits
+ * in [phy_type_low, phy_type_high] structure based on the value of
+ * link_speeds_bitmap input parameter.
+ */
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap)
+{
+	u64 pt_high;
+	u64 pt_low;
+	int index;
+	u16 speed;
+
+	/* We first check with low part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) {
+		pt_low = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(pt_low, 0);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_low |= BIT_ULL(index);
+	}
+
+	/* We then check with high part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) {
+		pt_high = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(0, pt_high);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_high |= BIT_ULL(index);
+	}
+}
+
+/**
  * ice_aq_set_phy_cfg
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @lport: logical port number
  * @cfg: structure with PHY configuration data to be set
  * @cd: pointer to command details structure or NULL
@@ -1318,19 +2175,38 @@
  * mode as the PF may not have the privilege to set some of the PHY Config
  * parameters. This status will be indicated by the command response (0x0601).
  */
-static enum ice_status
+enum ice_status
 ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
 {
-	struct ice_aqc_set_phy_cfg *cmd;
 	struct ice_aq_desc desc;
 
 	if (!cfg)
 		return ICE_ERR_PARAM;
 
-	cmd = &desc.params.set_phy;
+	/* Ensure that only valid bits of cfg->caps can be turned on. */
+	if (cfg->caps & ~ICE_AQ_PHY_ENA_VALID_MASK) {
+		ice_debug(hw, ICE_DBG_PHY,
+			  "Invalid bit is set in ice_aqc_set_phy_cfg_data->caps : 0x%x\n",
+			  cfg->caps);
+
+		cfg->caps &= ICE_AQ_PHY_ENA_VALID_MASK;
+	}
+
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
-	cmd->lport_num = lport;
+	desc.params.set_phy.lport_num = lport;
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_low));
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_high));
+	ice_debug(hw, ICE_DBG_LINK, "caps = 0x%x\n", cfg->caps);
+	ice_debug(hw, ICE_DBG_LINK, "low_power_ctrl = 0x%x\n",
+		  cfg->low_power_ctrl);
+	ice_debug(hw, ICE_DBG_LINK, "eee_cap = 0x%x\n", cfg->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "eeer_value = 0x%x\n", cfg->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "link_fec_opt = 0x%x\n", cfg->link_fec_opt);
 
 	return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
 }
@@ -1339,39 +2215,39 @@
  * ice_update_link_info - update status of the HW network link
  * @pi: port info structure of the interested logical port
  */
-static enum ice_status
-ice_update_link_info(struct ice_port_info *pi)
+enum ice_status ice_update_link_info(struct ice_port_info *pi)
 {
-	struct ice_aqc_get_phy_caps_data *pcaps;
-	struct ice_phy_info *phy_info;
+	struct ice_link_status *li;
 	enum ice_status status;
-	struct ice_hw *hw;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
 
-	hw = pi->hw;
+	li = &pi->phy.link_info;
 
-	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
-	if (!pcaps)
-		return ICE_ERR_NO_MEMORY;
-
-	phy_info = &pi->phy;
 	status = ice_aq_get_link_info(pi, true, NULL, NULL);
 	if (status)
-		goto out;
+		return status;
 
-	if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
-		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+	if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		struct ice_aqc_get_phy_caps_data *pcaps;
+		struct ice_hw *hw;
+
+		hw = pi->hw;
+		pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+				     GFP_KERNEL);
+		if (!pcaps)
+			return ICE_ERR_NO_MEMORY;
+
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
 					     pcaps, NULL);
-		if (status)
-			goto out;
+		if (!status)
+			memcpy(li->module_type, &pcaps->module_type,
+			       sizeof(li->module_type));
 
-		memcpy(phy_info->link_info.module_type, &pcaps->module_type,
-		       sizeof(phy_info->link_info.module_type));
+		devm_kfree(ice_hw_to_dev(hw), pcaps);
 	}
-out:
-	devm_kfree(ice_hw_to_dev(hw), pcaps);
+
 	return status;
 }
 
@@ -1379,12 +2255,12 @@
  * ice_set_fc
  * @pi: port information structure
  * @aq_failures: pointer to status code, specific to ice_set_fc routine
- * @atomic_restart: enable automatic link update
+ * @ena_auto_link_update: enable automatic link update
  *
  * Set the requested flow control mode.
  */
 enum ice_status
-ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart)
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
 {
 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
 	struct ice_aqc_get_phy_caps_data *pcaps;
@@ -1416,7 +2292,7 @@
 	if (!pcaps)
 		return ICE_ERR_NO_MEMORY;
 
-	/* Get the current phy config */
+	/* Get the current PHY config */
 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 				     NULL);
 	if (status) {
@@ -1427,16 +2303,19 @@
 	/* clear the old pause settings */
 	cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
 				   ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+
 	/* set the new capabilities */
 	cfg.caps |= pause_mask;
+
 	/* If the capabilities have changed, then set the new config */
 	if (cfg.caps != pcaps->caps) {
 		int retry_count, retry_max = 10;
 
 		/* Auto restart link so settings take effect */
-		if (atomic_restart)
-			cfg.caps |= ICE_AQ_PHY_ENA_ATOMIC_LINK;
+		if (ena_auto_link_update)
+			cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
 		/* Copy over all the old settings */
+		cfg.phy_type_high = pcaps->phy_type_high;
 		cfg.phy_type_low = pcaps->phy_type_low;
 		cfg.low_power_ctrl = pcaps->low_power_ctrl;
 		cfg.eee_cap = pcaps->eee_cap;
@@ -1473,6 +2352,71 @@
 }
 
 /**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg)
+{
+	if (!caps || !cfg)
+		return;
+
+	cfg->phy_type_low = caps->phy_type_low;
+	cfg->phy_type_high = caps->phy_type_high;
+	cfg->caps = caps->caps;
+	cfg->low_power_ctrl = caps->low_power_ctrl;
+	cfg->eee_cap = caps->eee_cap;
+	cfg->eeer_value = caps->eeer_value;
+	cfg->link_fec_opt = caps->link_fec_options;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ *
+ * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC
+ * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps
+ * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling.
+ */
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
+{
+	switch (fec) {
+	case ICE_FEC_BASER:
+		/* Clear RS bits, and AND BASE-R ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+				     ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+				     ICE_AQC_PHY_FEC_25G_KR_REQ;
+		break;
+	case ICE_FEC_RS:
+		/* Clear BASE-R bits, and AND RS ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+				     ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+		break;
+	case ICE_FEC_NONE:
+		/* Clear all FEC option bits. */
+		cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+		break;
+	case ICE_FEC_AUTO:
+		/* AND auto FEC bit, and all caps bits. */
+		cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+		break;
+	}
+}
+
+/**
  * ice_get_link_status - get status of the HW network link
  * @pi: port information structure
  * @link_up: pointer to bool (true/false = linkup/linkdown)
@@ -1536,7 +2480,7 @@
 
 /**
  * ice_aq_set_event_mask
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @port_num: port number of the physical function
  * @mask: event mask to be set
  * @cd: pointer to command details structure or NULL
@@ -1557,6 +2501,56 @@
 	cmd->lport_num = port_num;
 
 	cmd->event_mask = cpu_to_le16(mask);
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_lb *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_lb;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+	if (ena_lpbk)
+		cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_port_id_led
+ * @pi: pointer to the port information
+ * @is_orig_mode: is this LED set to original mode (by the net-list)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set LED value for the given port (0x06e9)
+ */
+enum ice_status
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_port_id_led *cmd;
+	struct ice_hw *hw = pi->hw;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_port_id_led;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led);
+
+	if (is_orig_mode)
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG;
+	else
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK;
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
@@ -1654,7 +2648,7 @@
 /**
  * ice_aq_get_rss_lut
  * @hw: pointer to the hardware structure
- * @vsi_id: VSI FW index
+ * @vsi_handle: software VSI handle
  * @lut_type: LUT table type
  * @lut: pointer to the LUT buffer provided by the caller
  * @lut_size: size of the LUT buffer
@@ -1662,17 +2656,20 @@
  * get the RSS lookup table, PF or VSI type
  */
 enum ice_status
-ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
-		   u16 lut_size)
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
+		   u8 *lut, u16 lut_size)
 {
-	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
-					false);
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					lut_type, lut, lut_size, 0, false);
 }
 
 /**
  * ice_aq_set_rss_lut
  * @hw: pointer to the hardware structure
- * @vsi_id: VSI FW index
+ * @vsi_handle: software VSI handle
  * @lut_type: LUT table type
  * @lut: pointer to the LUT buffer provided by the caller
  * @lut_size: size of the LUT buffer
@@ -1680,16 +2677,19 @@
  * set the RSS lookup table, PF or VSI type
  */
 enum ice_status
-ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
-		   u16 lut_size)
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
+		   u8 *lut, u16 lut_size)
 {
-	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
-					true);
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					lut_type, lut, lut_size, 0, true);
 }
 
 /**
  * __ice_aq_get_set_rss_key
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @vsi_id: VSI FW index
  * @key: pointer to key info struct
  * @set: set true to set the key, false to get the key
@@ -1724,32 +2724,40 @@
 
 /**
  * ice_aq_get_rss_key
- * @hw: pointer to the hw struct
- * @vsi_id: VSI FW index
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
  * @key: pointer to key info struct
  *
  * get the RSS key per VSI
  */
 enum ice_status
-ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
 		   struct ice_aqc_get_set_rss_keys *key)
 {
-	return __ice_aq_get_set_rss_key(hw, vsi_id, key, false);
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !key)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					key, false);
 }
 
 /**
  * ice_aq_set_rss_key
- * @hw: pointer to the hw struct
- * @vsi_id: VSI FW index
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
  * @keys: pointer to key info struct
  *
  * set the RSS key per VSI
  */
 enum ice_status
-ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
 		   struct ice_aqc_get_set_rss_keys *keys)
 {
-	return __ice_aq_get_set_rss_key(hw, vsi_id, keys, true);
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !keys)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					keys, true);
 }
 
 /**
@@ -1820,6 +2828,8 @@
  * @num_qgrps: number of groups in the list
  * @qg_list: the list of groups to disable
  * @buf_size: the total size of the qg_list buffer in bytes
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
  * @cd: pointer to command details structure or NULL
  *
  * Disable LAN Tx queue (0x0C31)
@@ -1827,23 +2837,58 @@
 static enum ice_status
 ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
 		   struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+		   enum ice_disq_rst_src rst_src, u16 vmvf_num,
 		   struct ice_sq_cd *cd)
 {
 	struct ice_aqc_dis_txqs *cmd;
 	struct ice_aq_desc desc;
+	enum ice_status status;
 	u16 i, sz = 0;
 
 	cmd = &desc.params.dis_txqs;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
 
-	if (!qg_list)
+	/* qg_list can be NULL only in VM/VF reset flow */
+	if (!qg_list && !rst_src)
 		return ICE_ERR_PARAM;
 
 	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
 		return ICE_ERR_PARAM;
-	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
 	cmd->num_entries = num_qgrps;
 
+	cmd->vmvf_and_timeout = cpu_to_le16((5 << ICE_AQC_Q_DIS_TIMEOUT_S) &
+					    ICE_AQC_Q_DIS_TIMEOUT_M);
+
+	switch (rst_src) {
+	case ICE_VM_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VM_RESET;
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16(vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_VF_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET;
+		/* In this case, FW expects vmvf_num to be absolute VF ID */
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) &
+				    ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_NO_RESET:
+	default:
+		break;
+	}
+
+	/* flush pipe on time out */
+	cmd->cmd_type |= ICE_AQC_Q_DIS_CMD_FLUSH_PIPE;
+	/* If no queue group info, we are in a reset flow. Issue the AQ */
+	if (!qg_list)
+		goto do_aq;
+
+	/* set RD bit to indicate that command buffer is provided by the driver
+	 * and it needs to be read by the firmware
+	 */
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
 	for (i = 0; i < num_qgrps; ++i) {
 		/* Calculate the size taken up by the queue IDs in this group */
 		sz += qg_list[i].num_qs * sizeof(qg_list[i].q_id);
@@ -1859,7 +2904,18 @@
 	if (buf_size != sz)
 		return ICE_ERR_PARAM;
 
-	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+do_aq:
+	status = ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+	if (status) {
+		if (!qg_list)
+			ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
+				  vmvf_num, hw->adminq.sq_last_status);
+		else
+			ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
+				  le16_to_cpu(qg_list[0].q_id[0]),
+				  hw->adminq.sq_last_status);
+	}
+	return status;
 }
 
 /* End of FW Admin Queue command wrappers */
@@ -1870,8 +2926,8 @@
  * @dest_ctx: the context to be written to
  * @ce_info:  a description of the struct to be filled
  */
-static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx,
-			   const struct ice_ctx_ele *ce_info)
+static void
+ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 {
 	u8 src_byte, dest_byte, mask;
 	u8 *from, *dest;
@@ -1909,8 +2965,8 @@
  * @dest_ctx: the context to be written to
  * @ce_info:  a description of the struct to be filled
  */
-static void ice_write_word(u8 *src_ctx, u8 *dest_ctx,
-			   const struct ice_ctx_ele *ce_info)
+static void
+ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 {
 	u16 src_word, mask;
 	__le16 dest_word;
@@ -1952,8 +3008,8 @@
  * @dest_ctx: the context to be written to
  * @ce_info:  a description of the struct to be filled
  */
-static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx,
-			    const struct ice_ctx_ele *ce_info)
+static void
+ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 {
 	u32 src_dword, mask;
 	__le32 dest_dword;
@@ -2003,8 +3059,8 @@
  * @dest_ctx: the context to be written to
  * @ce_info:  a description of the struct to be filled
  */
-static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx,
-			    const struct ice_ctx_ele *ce_info)
+static void
+ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 {
 	u64 src_qword, mask;
 	__le64 dest_qword;
@@ -2086,24 +3142,50 @@
 }
 
 /**
+ * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ */
+static struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
+{
+	struct ice_vsi_ctx *vsi;
+	struct ice_q_ctx *q_ctx;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi)
+		return NULL;
+	if (q_handle >= vsi->num_lan_q_entries[tc])
+		return NULL;
+	if (!vsi->lan_q_ctx[tc])
+		return NULL;
+	q_ctx = vsi->lan_q_ctx[tc];
+	return &q_ctx[q_handle];
+}
+
+/**
  * ice_ena_vsi_txq
  * @pi: port information structure
- * @vsi_id: VSI id
- * @tc: tc number
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
  * @num_qgrps: Number of added queue groups
  * @buf: list of queue groups to be added
  * @buf_size: size of buffer for indirect command
  * @cd: pointer to command details structure or NULL
  *
- * This function adds one lan q
+ * This function adds one LAN queue
  */
 enum ice_status
-ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
-		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
 		struct ice_sq_cd *cd)
 {
 	struct ice_aqc_txsched_elem_data node = { 0 };
 	struct ice_sched_node *parent;
+	struct ice_q_ctx *q_ctx;
 	enum ice_status status;
 	struct ice_hw *hw;
 
@@ -2115,15 +3197,27 @@
 
 	hw = pi->hw;
 
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
 	mutex_lock(&pi->sched_lock);
 
+	q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handle);
+	if (!q_ctx) {
+		ice_debug(hw, ICE_DBG_SCHED, "Enaq: invalid queue handle %d\n",
+			  q_handle);
+		status = ICE_ERR_PARAM;
+		goto ena_txq_exit;
+	}
+
 	/* find a parent node */
-	parent = ice_sched_get_free_qparent(pi, vsi_id, tc,
+	parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
 					    ICE_SCHED_NODE_OWNER_LAN);
 	if (!parent) {
 		status = ICE_ERR_PARAM;
 		goto ena_txq_exit;
 	}
+
 	buf->parent_teid = parent->info.node_teid;
 	node.parent_teid = parent->info.node_teid;
 	/* Mark that the values in the "generic" section as valid. The default
@@ -2135,19 +3229,24 @@
 	 * Bit 5-6.
 	 * - Bit 7 is reserved.
 	 * Without setting the generic section as valid in valid_sections, the
-	 * Admin Q command will fail with error code ICE_AQ_RC_EINVAL.
+	 * Admin queue command will fail with error code ICE_AQ_RC_EINVAL.
 	 */
 	buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC;
 
-	/* add the lan q */
+	/* add the LAN queue */
 	status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
-	if (status)
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED, "enable queue %d failed %d\n",
+			  le16_to_cpu(buf->txqs[0].txq_id),
+			  hw->adminq.sq_last_status);
 		goto ena_txq_exit;
+	}
 
 	node.node_teid = buf->txqs[0].q_teid;
 	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+	q_ctx->q_handle = q_handle;
 
-	/* add a leaf node into schduler tree q layer */
+	/* add a leaf node into schduler tree queue layer */
 	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
 
 ena_txq_exit:
@@ -2158,24 +3257,43 @@
 /**
  * ice_dis_vsi_txq
  * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
  * @num_queues: number of queues
+ * @q_handles: pointer to software queue handle array
  * @q_ids: pointer to the q_id array
  * @q_teids: pointer to queue node teids
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
  * @cd: pointer to command details structure or NULL
  *
  * This function removes queues and their corresponding nodes in SW DB
  */
 enum ice_status
-ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
-		u32 *q_teids, struct ice_sq_cd *cd)
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handles, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd)
 {
 	enum ice_status status = ICE_ERR_DOES_NOT_EXIST;
 	struct ice_aqc_dis_txq_item qg_list;
+	struct ice_q_ctx *q_ctx;
 	u16 i;
 
 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
 		return ICE_ERR_CFG;
 
+	if (!num_queues) {
+		/* if queue is disabled already yet the disable queue command
+		 * has to be sent to complete the VF reset, then call
+		 * ice_aq_dis_lan_txq without any queue information
+		 */
+		if (rst_src)
+			return ice_aq_dis_lan_txq(pi->hw, 0, NULL, 0, rst_src,
+						  vmvf_num, NULL);
+		return ICE_ERR_CFG;
+	}
+
 	mutex_lock(&pi->sched_lock);
 
 	for (i = 0; i < num_queues; i++) {
@@ -2184,32 +3302,45 @@
 		node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
 		if (!node)
 			continue;
+		q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handles[i]);
+		if (!q_ctx) {
+			ice_debug(pi->hw, ICE_DBG_SCHED, "invalid queue handle%d\n",
+				  q_handles[i]);
+			continue;
+		}
+		if (q_ctx->q_handle != q_handles[i]) {
+			ice_debug(pi->hw, ICE_DBG_SCHED, "Err:handles %d %d\n",
+				  q_ctx->q_handle, q_handles[i]);
+			continue;
+		}
 		qg_list.parent_teid = node->info.parent_teid;
 		qg_list.num_qs = 1;
 		qg_list.q_id[0] = cpu_to_le16(q_ids[i]);
 		status = ice_aq_dis_lan_txq(pi->hw, 1, &qg_list,
-					    sizeof(qg_list), cd);
+					    sizeof(qg_list), rst_src, vmvf_num,
+					    cd);
 
 		if (status)
 			break;
 		ice_free_sched_node(pi, node);
+		q_ctx->q_handle = ICE_INVAL_Q_HANDLE;
 	}
 	mutex_unlock(&pi->sched_lock);
 	return status;
 }
 
 /**
- * ice_cfg_vsi_qs - configure the new/exisiting VSI queues
+ * ice_cfg_vsi_qs - configure the new/existing VSI queues
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc_bitmap: TC bitmap
  * @maxqs: max queues array per TC
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
  *
  * This function adds/updates the VSI queues per TC.
  */
 static enum ice_status
-ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
 	       u16 *maxqs, u8 owner)
 {
 	enum ice_status status = 0;
@@ -2218,14 +3349,17 @@
 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
 		return ICE_ERR_CFG;
 
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
 	mutex_lock(&pi->sched_lock);
 
-	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+	ice_for_each_traffic_class(i) {
 		/* configuration is possible only if TC node is present */
 		if (!ice_sched_get_tc_node(pi, i))
 			continue;
 
-		status = ice_sched_cfg_vsi(pi, vsi_id, i, maxqs[i], owner,
+		status = ice_sched_cfg_vsi(pi, vsi_handle, i, maxqs[i], owner,
 					   ice_is_tc_ena(tc_bitmap, i));
 		if (status)
 			break;
@@ -2236,18 +3370,182 @@
 }
 
 /**
- * ice_cfg_vsi_lan - configure VSI lan queues
+ * ice_cfg_vsi_lan - configure VSI LAN queues
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc_bitmap: TC bitmap
- * @max_lanqs: max lan queues array per TC
+ * @max_lanqs: max LAN queues array per TC
  *
- * This function adds/updates the VSI lan queues per TC.
+ * This function adds/updates the VSI LAN queues per TC.
  */
 enum ice_status
-ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
 		u16 *max_lanqs)
 {
-	return ice_cfg_vsi_qs(pi, vsi_id, tc_bitmap, max_lanqs,
+	return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_lanqs,
 			      ICE_SCHED_NODE_OWNER_LAN);
 }
+
+/**
+ * ice_replay_pre_init - replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * Initializes required config data for VSI, FD, ACL, and RSS before replay.
+ */
+static enum ice_status ice_replay_pre_init(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	/* Delete old entries from replay filter list head if there is any */
+	ice_rm_all_sw_replay_rule_info(hw);
+	/* In start of replay, move entries into replay_rules list, it
+	 * will allow adding rules entries back to filt_rules list,
+	 * which is operational list.
+	 */
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++)
+		list_replace_init(&sw->recp_list[i].filt_rules,
+				  &sw->recp_list[i].filt_replay_rules);
+
+	return 0;
+}
+
+/**
+ * ice_replay_vsi - replay VSI configuration
+ * @hw: pointer to the HW struct
+ * @vsi_handle: driver VSI handle
+ *
+ * Restore all VSI configuration after reset. It is required to call this
+ * function with main VSI first.
+ */
+enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+	enum ice_status status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	/* Replay pre-initialization if there is any */
+	if (vsi_handle == ICE_MAIN_VSI_HANDLE) {
+		status = ice_replay_pre_init(hw);
+		if (status)
+			return status;
+	}
+
+	/* Replay per VSI all filters */
+	status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+	return status;
+}
+
+/**
+ * ice_replay_post - post replay configuration cleanup
+ * @hw: pointer to the HW struct
+ *
+ * Post replay cleanup.
+ */
+void ice_replay_post(struct ice_hw *hw)
+{
+	/* Delete old entries from replay filter list head */
+	ice_rm_all_sw_replay_rule_info(hw);
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of 64 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u64 new_data = rd64(hw, reg) & (BIT_ULL(40) - 1);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(40)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(32)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_sched_query_elem - query element information from HW
+ * @hw: pointer to the HW struct
+ * @node_teid: node TEID to be queried
+ * @buf: buffer to element information
+ *
+ * This function queries HW element information
+ */
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_get_elem *buf)
+{
+	u16 buf_size, num_elem_ret = 0;
+	enum ice_status status;
+
+	buf_size = sizeof(*buf);
+	memset(buf, 0, buf_size);
+	buf->generic[0].node_teid = cpu_to_le32(node_teid);
+	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
+					  NULL);
+	if (status || num_elem_ret != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
+	return status;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 9a55191..c3df92f 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h

@@ -6,64 +6,102 @@
 
 #include "ice.h"
 #include "ice_type.h"
+#include "ice_flex_pipe.h"
 #include "ice_switch.h"
+#include <linux/avf/virtchnl.h>
 
-void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
-		  u16 buf_len);
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
+
+void
+ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
 enum ice_status ice_init_hw(struct ice_hw *hw);
 void ice_deinit_hw(struct ice_hw *hw);
 enum ice_status ice_check_reset(struct ice_hw *hw);
 enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
+enum ice_status ice_create_all_ctrlq(struct ice_hw *hw);
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
 void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+void ice_destroy_all_ctrlq(struct ice_hw *hw);
 enum ice_status
 ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		  struct ice_rq_event_info *e, u16 *pending);
 enum ice_status
 ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+enum ice_status ice_update_link_info(struct ice_port_info *pi);
 enum ice_status
 ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
-		enum ice_aq_res_access_type access);
+		enum ice_aq_res_access_type access, u32 timeout);
 void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
 enum ice_status ice_init_nvm(struct ice_hw *hw);
 enum ice_status
+ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data);
+enum ice_status
 ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_aq_desc *desc, void *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
 void ice_clear_pxe_mode(struct ice_hw *hw);
 enum ice_status ice_get_caps(struct ice_hw *hw);
+
+void ice_set_safe_mode_caps(struct ice_hw *hw);
+
+void ice_dev_onetime_setup(struct ice_hw *hw);
+
 enum ice_status
 ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 		  u32 rxq_index);
 
 enum ice_status
-ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
 		   u16 lut_size);
 enum ice_status
-ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
 		   u16 lut_size);
 enum ice_status
-ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
 		   struct ice_aqc_get_set_rss_keys *keys);
 enum ice_status
-ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
 		   struct ice_aqc_get_set_rss_keys *keys);
+
 bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
 enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
 extern const struct ice_ctx_ele ice_tlan_ctx_info[];
 enum ice_status
 ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info);
+
+extern struct mutex ice_global_cfg_lock_sw;
+
 enum ice_status
 ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 		void *buf, u16 buf_size, struct ice_sq_cd *cd);
 enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+
 enum ice_status
-ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *caps,
+		    struct ice_sq_cd *cd);
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap);
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
 			struct ice_sq_cd *cd);
 enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 enum ice_status
-ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart);
+ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd);
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
+	   bool ena_auto_link_update);
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec);
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg);
 enum ice_status
 ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
 			   struct ice_sq_cd *cd);
@@ -74,13 +112,37 @@
 ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
 		      struct ice_sq_cd *cd);
 enum ice_status
-ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
-		u32 *q_teids, struct ice_sq_cd *cmd_details);
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
 enum ice_status
-ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd);
+
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handle, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd);
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
 		u16 *max_lanqs);
 enum ice_status
-ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
-		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
+enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
+void ice_replay_post(struct ice_hw *hw);
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+void
+ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build,
+		    u8 *oem_patch, u8 *ver_hi, u8 *ver_lo);
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_get_elem *buf);
 #endif /* _ICE_COMMON_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index e783976..2353166 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c

@@ -3,6 +3,26 @@
 
 #include "ice_common.h"
 
+#define ICE_CQ_INIT_REGS(qinfo, prefix)				\
+do {								\
+	(qinfo)->sq.head = prefix##_ATQH;			\
+	(qinfo)->sq.tail = prefix##_ATQT;			\
+	(qinfo)->sq.len = prefix##_ATQLEN;			\
+	(qinfo)->sq.bah = prefix##_ATQBAH;			\
+	(qinfo)->sq.bal = prefix##_ATQBAL;			\
+	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
+	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
+	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
+	(qinfo)->rq.head = prefix##_ARQH;			\
+	(qinfo)->rq.tail = prefix##_ARQT;			\
+	(qinfo)->rq.len = prefix##_ARQLEN;			\
+	(qinfo)->rq.bah = prefix##_ARQBAH;			\
+	(qinfo)->rq.bal = prefix##_ARQBAL;			\
+	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
+	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
+	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
+} while (0)
+
 /**
  * ice_adminq_init_regs - Initialize AdminQ registers
  * @hw: pointer to the hardware structure
@@ -13,28 +33,25 @@
 {
 	struct ice_ctl_q_info *cq = &hw->adminq;
 
-	cq->sq.head = PF_FW_ATQH;
-	cq->sq.tail = PF_FW_ATQT;
-	cq->sq.len = PF_FW_ATQLEN;
-	cq->sq.bah = PF_FW_ATQBAH;
-	cq->sq.bal = PF_FW_ATQBAL;
-	cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M;
-	cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
-	cq->sq.head_mask = PF_FW_ATQH_ATQH_M;
+	ICE_CQ_INIT_REGS(cq, PF_FW);
+}
 
-	cq->rq.head = PF_FW_ARQH;
-	cq->rq.tail = PF_FW_ARQT;
-	cq->rq.len = PF_FW_ARQLEN;
-	cq->rq.bah = PF_FW_ARQBAH;
-	cq->rq.bal = PF_FW_ARQBAL;
-	cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M;
-	cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
-	cq->rq.head_mask = PF_FW_ARQH_ARQH_M;
+/**
+ * ice_mailbox_init_regs - Initialize Mailbox registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_mailbox_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+
+	ICE_CQ_INIT_REGS(cq, PF_MBX);
 }
 
 /**
  * ice_check_sq_alive
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  *
  * Returns true if Queue is enabled else false.
@@ -101,37 +118,20 @@
 }
 
 /**
- * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings
+ * ice_free_cq_ring - Free control queue ring
  * @hw: pointer to the hardware structure
- * @cq: pointer to the specific Control queue
+ * @ring: pointer to the specific control queue ring
  *
- * This assumes the posted send buffers have already been cleaned
+ * This assumes the posted buffers have already been cleaned
  * and de-allocated
  */
-static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+static void ice_free_cq_ring(struct ice_hw *hw, struct ice_ctl_q_ring *ring)
 {
-	dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
-			   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
-	cq->sq.desc_buf.va = NULL;
-	cq->sq.desc_buf.pa = 0;
-	cq->sq.desc_buf.size = 0;
-}
-
-/**
- * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings
- * @hw: pointer to the hardware structure
- * @cq: pointer to the specific Control queue
- *
- * This assumes the posted receive buffers have already been cleaned
- * and de-allocated
- */
-static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
-{
-	dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size,
-			   cq->rq.desc_buf.va, cq->rq.desc_buf.pa);
-	cq->rq.desc_buf.va = NULL;
-	cq->rq.desc_buf.pa = 0;
-	cq->rq.desc_buf.size = 0;
+	dmam_free_coherent(ice_hw_to_dev(hw), ring->desc_buf.size,
+			   ring->desc_buf.va, ring->desc_buf.pa);
+	ring->desc_buf.va = NULL;
+	ring->desc_buf.pa = 0;
+	ring->desc_buf.size = 0;
 }
 
 /**
@@ -250,54 +250,23 @@
 	return ICE_ERR_NO_MEMORY;
 }
 
-/**
- * ice_free_rq_bufs - Free ARQ buffer info elements
- * @hw: pointer to the hardware structure
- * @cq: pointer to the specific Control queue
- */
-static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+static enum ice_status
+ice_cfg_cq_regs(struct ice_hw *hw, struct ice_ctl_q_ring *ring, u16 num_entries)
 {
-	int i;
+	/* Clear Head and Tail */
+	wr32(hw, ring->head, 0);
+	wr32(hw, ring->tail, 0);
 
-	/* free descriptors */
-	for (i = 0; i < cq->num_rq_entries; i++) {
-		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
-				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
-		cq->rq.r.rq_bi[i].va = NULL;
-		cq->rq.r.rq_bi[i].pa = 0;
-		cq->rq.r.rq_bi[i].size = 0;
-	}
+	/* set starting point */
+	wr32(hw, ring->len, (num_entries | ring->len_ena_mask));
+	wr32(hw, ring->bal, lower_32_bits(ring->desc_buf.pa));
+	wr32(hw, ring->bah, upper_32_bits(ring->desc_buf.pa));
 
-	/* free the dma header */
-	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
-}
+	/* Check one register to verify that config was applied */
+	if (rd32(hw, ring->bal) != lower_32_bits(ring->desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
 
-/**
- * ice_free_sq_bufs - Free ATQ buffer info elements
- * @hw: pointer to the hardware structure
- * @cq: pointer to the specific Control queue
- */
-static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
-{
-	int i;
-
-	/* only unmap if the address is non-NULL */
-	for (i = 0; i < cq->num_sq_entries; i++)
-		if (cq->sq.r.sq_bi[i].pa) {
-			dmam_free_coherent(ice_hw_to_dev(hw),
-					   cq->sq.r.sq_bi[i].size,
-					   cq->sq.r.sq_bi[i].va,
-					   cq->sq.r.sq_bi[i].pa);
-			cq->sq.r.sq_bi[i].va = NULL;
-			cq->sq.r.sq_bi[i].pa = 0;
-			cq->sq.r.sq_bi[i].size = 0;
-		}
-
-	/* free the buffer info list */
-	devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf);
-
-	/* free the dma header */
-	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+	return 0;
 }
 
 /**
@@ -310,23 +279,7 @@
 static enum ice_status
 ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 {
-	u32 reg = 0;
-
-	/* Clear Head and Tail */
-	wr32(hw, cq->sq.head, 0);
-	wr32(hw, cq->sq.tail, 0);
-
-	/* set starting point */
-	wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask));
-	wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa));
-	wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa));
-
-	/* Check one register to verify that config was applied */
-	reg = rd32(hw, cq->sq.bal);
-	if (reg != lower_32_bits(cq->sq.desc_buf.pa))
-		return ICE_ERR_AQ_ERROR;
-
-	return 0;
+	return ice_cfg_cq_regs(hw, &cq->sq, cq->num_sq_entries);
 }
 
 /**
@@ -334,30 +287,20 @@
  * @hw: pointer to the hardware structure
  * @cq: pointer to the specific Control queue
  *
- * Configure base address and length registers for the receive (event q)
+ * Configure base address and length registers for the receive (event queue)
  */
 static enum ice_status
 ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 {
-	u32 reg = 0;
+	enum ice_status status;
 
-	/* Clear Head and Tail */
-	wr32(hw, cq->rq.head, 0);
-	wr32(hw, cq->rq.tail, 0);
-
-	/* set starting point */
-	wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask));
-	wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa));
-	wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa));
+	status = ice_cfg_cq_regs(hw, &cq->rq, cq->num_rq_entries);
+	if (status)
+		return status;
 
 	/* Update tail in the HW to post pre-allocated buffers */
 	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
 
-	/* Check one register to verify that config was applied */
-	reg = rd32(hw, cq->rq.bal);
-	if (reg != lower_32_bits(cq->rq.desc_buf.pa))
-		return ICE_ERR_AQ_ERROR;
-
 	return 0;
 }
 
@@ -367,7 +310,7 @@
  * @cq: pointer to the specific Control queue
  *
  * This is the main initialization routine for the Control Send Queue
- * Prior to calling this function, drivers *MUST* set the following fields
+ * Prior to calling this function, the driver *MUST* set the following fields
  * in the cq->structure:
  *     - cq->num_sq_entries
  *     - cq->sq_buf_size
@@ -414,7 +357,7 @@
 	goto init_ctrlq_exit;
 
 init_ctrlq_free_rings:
-	ice_free_ctrlq_sq_ring(hw, cq);
+	ice_free_cq_ring(hw, &cq->sq);
 
 init_ctrlq_exit:
 	return ret_code;
@@ -426,7 +369,7 @@
  * @cq: pointer to the specific Control queue
  *
  * The main initialization routine for the Admin Receive (Event) Queue.
- * Prior to calling this function, drivers *MUST* set the following fields
+ * Prior to calling this function, the driver *MUST* set the following fields
  * in the cq->structure:
  *     - cq->num_rq_entries
  *     - cq->rq_buf_size
@@ -473,12 +416,33 @@
 	goto init_ctrlq_exit;
 
 init_ctrlq_free_rings:
-	ice_free_ctrlq_rq_ring(hw, cq);
+	ice_free_cq_ring(hw, &cq->rq);
 
 init_ctrlq_exit:
 	return ret_code;
 }
 
+#define ICE_FREE_CQ_BUFS(hw, qi, ring)					\
+do {									\
+	int i;								\
+	/* free descriptors */						\
+	for (i = 0; i < (qi)->num_##ring##_entries; i++)		\
+		if ((qi)->ring.r.ring##_bi[i].pa) {			\
+			dmam_free_coherent(ice_hw_to_dev(hw),		\
+					   (qi)->ring.r.ring##_bi[i].size,\
+					   (qi)->ring.r.ring##_bi[i].va,\
+					   (qi)->ring.r.ring##_bi[i].pa);\
+			(qi)->ring.r.ring##_bi[i].va = NULL;		\
+			(qi)->ring.r.ring##_bi[i].pa = 0;		\
+			(qi)->ring.r.ring##_bi[i].size = 0;		\
+		}							\
+	/* free the buffer info list */					\
+	if ((qi)->ring.cmd_buf)						\
+		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
+	/* free DMA head */						\
+	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
+} while (0)
+
 /**
  * ice_shutdown_sq - shutdown the Control ATQ
  * @hw: pointer to the hardware structure
@@ -508,8 +472,8 @@
 	cq->sq.count = 0;	/* to indicate uninitialized queue */
 
 	/* free ring buffers and the ring itself */
-	ice_free_sq_bufs(hw, cq);
-	ice_free_ctrlq_sq_ring(hw, cq);
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
+	ice_free_cq_ring(hw, &cq->sq);
 
 shutdown_sq_out:
 	mutex_unlock(&cq->sq_lock);
@@ -576,8 +540,8 @@
 	cq->rq.count = 0;
 
 	/* free ring buffers and the ring itself */
-	ice_free_rq_bufs(hw, cq);
-	ice_free_ctrlq_rq_ring(hw, cq);
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
+	ice_free_cq_ring(hw, &cq->rq);
 
 shutdown_rq_out:
 	mutex_unlock(&cq->rq_lock);
@@ -605,14 +569,8 @@
 	return 0;
 
 init_ctrlq_free_rq:
-	if (cq->rq.head) {
-		ice_shutdown_rq(hw, cq);
-		mutex_destroy(&cq->rq_lock);
-	}
-	if (cq->sq.head) {
-		ice_shutdown_sq(hw, cq);
-		mutex_destroy(&cq->sq_lock);
-	}
+	ice_shutdown_rq(hw, cq);
+	ice_shutdown_sq(hw, cq);
 	return status;
 }
 
@@ -621,13 +579,14 @@
  * @hw: pointer to the hardware structure
  * @q_type: specific Control queue type
  *
- * Prior to calling this function, drivers *MUST* set the following fields
+ * Prior to calling this function, the driver *MUST* set the following fields
  * in the cq->structure:
  *     - cq->num_sq_entries
  *     - cq->num_rq_entries
  *     - cq->rq_buf_size
  *     - cq->sq_buf_size
  *
+ * NOTE: this function does not initialize the controlq locks
  */
 static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
 {
@@ -639,6 +598,10 @@
 		ice_adminq_init_regs(hw);
 		cq = &hw->adminq;
 		break;
+	case ICE_CTL_Q_MAILBOX:
+		ice_mailbox_init_regs(hw);
+		cq = &hw->mailboxq;
+		break;
 	default:
 		return ICE_ERR_PARAM;
 	}
@@ -649,8 +612,6 @@
 	    !cq->rq_buf_size || !cq->sq_buf_size) {
 		return ICE_ERR_CFG;
 	}
-	mutex_init(&cq->sq_lock);
-	mutex_init(&cq->rq_lock);
 
 	/* setup SQ command write back timeout */
 	cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
@@ -658,7 +619,7 @@
 	/* allocate the ATQ */
 	ret_code = ice_init_sq(hw, cq);
 	if (ret_code)
-		goto init_ctrlq_destroy_locks;
+		return ret_code;
 
 	/* allocate the ARQ */
 	ret_code = ice_init_rq(hw, cq);
@@ -670,9 +631,6 @@
 
 init_ctrlq_free_sq:
 	ice_shutdown_sq(hw, cq);
-init_ctrlq_destroy_locks:
-	mutex_destroy(&cq->sq_lock);
-	mutex_destroy(&cq->rq_lock);
 	return ret_code;
 }
 
@@ -680,12 +638,14 @@
  * ice_init_all_ctrlq - main initialization routine for all control queues
  * @hw: pointer to the hardware structure
  *
- * Prior to calling this function, drivers *MUST* set the following fields
+ * Prior to calling this function, the driver MUST* set the following fields
  * in the cq->structure for all control queues:
  *     - cq->num_sq_entries
  *     - cq->num_rq_entries
  *     - cq->rq_buf_size
  *     - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks.
  */
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 {
@@ -696,13 +656,56 @@
 	if (ret_code)
 		return ret_code;
 
-	return ice_init_check_adminq(hw);
+	ret_code = ice_init_check_adminq(hw);
+	if (ret_code)
+		return ret_code;
+
+	/* Init Mailbox queue */
+	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_ctrlq_locks - Initialize locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Initializes the send and receive queue locks for a given control queue.
+ */
+static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_init(&cq->sq_lock);
+	mutex_init(&cq->rq_lock);
+}
+
+/**
+ * ice_create_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * This function creates all the control queue locks and then calls
+ * ice_init_all_ctrlq. It should be called once during driver load. If the
+ * driver needs to re-initialize control queues at run time it should call
+ * ice_init_all_ctrlq instead.
+ */
+enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
+{
+	ice_init_ctrlq_locks(&hw->adminq);
+	ice_init_ctrlq_locks(&hw->mailboxq);
+
+	return ice_init_all_ctrlq(hw);
 }
 
 /**
  * ice_shutdown_ctrlq - shutdown routine for any control queue
  * @hw: pointer to the hardware structure
  * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
  */
 static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
 {
@@ -714,28 +717,62 @@
 		if (ice_check_sq_alive(hw, cq))
 			ice_aq_q_shutdown(hw, true);
 		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		break;
 	default:
 		return;
 	}
 
-	if (cq->sq.head) {
-		ice_shutdown_sq(hw, cq);
-		mutex_destroy(&cq->sq_lock);
-	}
-	if (cq->rq.head) {
-		ice_shutdown_rq(hw, cq);
-		mutex_destroy(&cq->rq_lock);
-	}
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
 }
 
 /**
  * ice_shutdown_all_ctrlq - shutdown routine for all control queues
  * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
  */
 void ice_shutdown_all_ctrlq(struct ice_hw *hw)
 {
 	/* Shutdown FW admin queue */
 	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PF-VF Mailbox */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_destroy_ctrlq_locks - Destroy locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Destroys the send and receive queue locks for a given control queue.
+ */
+static void
+ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_destroy_all_ctrlq - exit routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * This function shuts down all the control queues and then destroys the
+ * control queue locks. It should be called once during driver unload. The
+ * driver should call ice_shutdown_all_ctrlq if it needs to shut down and
+ * reinitialize control queues, such as in response to a reset event.
+ */
+void ice_destroy_all_ctrlq(struct ice_hw *hw)
+{
+	/* shut down all the control queues first */
+	ice_shutdown_all_ctrlq(hw);
+
+	ice_destroy_ctrlq_locks(&hw->adminq);
+	ice_destroy_ctrlq_locks(&hw->mailboxq);
 }
 
 /**
@@ -774,7 +811,7 @@
 
 /**
  * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  *
  * Returns true if the firmware has processed all descriptors on the
@@ -790,14 +827,14 @@
 
 /**
  * ice_sq_send_cmd - send command to Control Queue (ATQ)
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  * @desc: prefilled descriptor describing the command (non DMA mem)
  * @buf: buffer to use for indirect commands (or NULL for direct commands)
  * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
  * @cd: pointer to command details structure
  *
- * This is the main send command routine for the ATQ.  It runs the q,
+ * This is the main send command routine for the ATQ. It runs the queue,
  * cleans the queue, etc.
  */
 enum ice_status
@@ -814,6 +851,9 @@
 	u16 retval = 0;
 	u32 val = 0;
 
+	/* if reset is in progress return a soft error */
+	if (hw->reset_ongoing)
+		return ICE_ERR_RESET_ONGOING;
 	mutex_lock(&cq->sq_lock);
 
 	cq->sq_last_status = ICE_AQ_RC_OK;
@@ -855,7 +895,7 @@
 
 	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
 	if (cd)
-		memcpy(details, cd, sizeof(*details));
+		*details = *cd;
 	else
 		memset(details, 0, sizeof(*details));
 
@@ -982,13 +1022,13 @@
 
 /**
  * ice_clean_rq_elem
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
  * @e: event info from the receive descriptor, includes any buffers
  * @pending: number of events that could be left to process
  *
  * This function cleans one Admin Receive Queue element and returns
- * the contents through e.  It can also return how many events are
+ * the contents through e. It can also return how many events are
  * left to process through 'pending'.
  */
 enum ice_status

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index ea02b89..44945c2 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h

@@ -8,6 +8,7 @@
 
 /* Maximum buffer lengths for all control queue types */
 #define ICE_AQ_MAX_BUF_LEN 4096
+#define ICE_MBXQ_MAX_BUF_LEN 4096
 
 #define ICE_CTL_Q_DESC(R, i) \
 	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
@@ -18,23 +19,23 @@
 
 /* Defines that help manage the driver vs FW API checks.
  * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
- *
  */
 #define EXP_FW_API_VER_BRANCH		0x00
-#define EXP_FW_API_VER_MAJOR		0x00
-#define EXP_FW_API_VER_MINOR		0x01
+#define EXP_FW_API_VER_MAJOR		0x01
+#define EXP_FW_API_VER_MINOR		0x03
 
 /* Different control queue types: These are mainly for SW consumption. */
 enum ice_ctl_q {
 	ICE_CTL_Q_UNKNOWN = 0,
 	ICE_CTL_Q_ADMIN,
+	ICE_CTL_Q_MAILBOX,
 };
 
 /* Control Queue default settings */
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
 
 struct ice_ctl_q_ring {
-	void *dma_head;			/* Virtual address to dma head */
+	void *dma_head;			/* Virtual address to DMA head */
 	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
 	void *cmd_buf;			/* command buffer memory */
 
@@ -78,6 +79,7 @@
 /* Control Queue information */
 struct ice_ctl_q_info {
 	enum ice_ctl_q qtype;
+	enum ice_aq_err rq_last_status;	/* last status on receive queue */
 	struct ice_ctl_q_ring rq;	/* receive queue */
 	struct ice_ctl_q_ring sq;	/* send queue */
 	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
@@ -85,10 +87,9 @@
 	u16 num_sq_entries;		/* send queue depth */
 	u16 rq_buf_size;		/* receive queue buffer size */
 	u16 sq_buf_size;		/* send queue buffer size */
+	enum ice_aq_err sq_last_status;	/* last status on send queue */
 	struct mutex sq_lock;		/* Send queue lock */
 	struct mutex rq_lock;		/* Receive queue lock */
-	enum ice_aq_err sq_last_status;	/* last status on send queue */
-	enum ice_aq_err rq_last_status;	/* last status on receive queue */
 };
 
 #endif /* _ICE_CONTROLQ_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
new file mode 100644
index 0000000..dd7efff
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c

@@ -0,0 +1,1441 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_dcb.h"
+
+/**
+ * ice_aq_get_lldp_mib
+ * @hw: pointer to the HW struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @local_len: length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet). (0x0A00)
+ */
+static enum ice_status
+ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
+		    u16 buf_size, u16 *local_len, u16 *remote_len,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_get_mib *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.lldp_get_mib;
+
+	if (buf_size == 0 || !buf)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib);
+
+	cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M;
+	cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M;
+
+	desc.datalen = cpu_to_le16(buf_size);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		if (local_len)
+			*local_len = le16_to_cpu(cmd->local_len);
+		if (remote_len)
+			*remote_len = le16_to_cpu(cmd->remote_len);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_update: Enable or Disable event posting
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes (0x0A01)
+ */
+static enum ice_status
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_mib_change *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_event;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_mib_change);
+
+	if (!ena_update)
+		cmd->command |= ICE_AQ_LLDP_MIB_UPDATE_DIS;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_stop_lldp
+ * @hw: pointer to the HW struct
+ * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
+ *			 False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ *	     reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent (0x0A05)
+ */
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_stop;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_stop);
+
+	if (shutdown_lldp_agent)
+		cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_start_lldp
+ * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports. (0x0A06)
+ */
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_start *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_start;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_start);
+
+	cmd->command = ICE_AQ_LLDP_AGENT_START;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the HW struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB. (0x0A08)
+ */
+static enum ice_status
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_local_mib *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_mib;
+
+	if (buf_size == 0 || !buf)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_local_mib);
+
+	desc.flags |= cpu_to_le16((u16)ICE_AQ_FLAG_RD);
+	desc.datalen = cpu_to_le16(buf_size);
+
+	cmd->type = mib_type;
+	cmd->length = cpu_to_le16(buf_size);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_get_dcbx_status
+ * @hw: pointer to the HW struct
+ *
+ * Get the DCBX status from the Firmware
+ */
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
+{
+	u32 reg;
+
+	reg = rd32(hw, PRTDCB_GENS);
+	return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >>
+		    PRTDCB_GENS_DCBX_STATUS_S);
+}
+
+/**
+ * ice_parse_ieee_ets_common_tlv
+ * @buf: Data buffer to be parsed for ETS CFG/REC data
+ * @ets_cfg: Container to store parsed data
+ *
+ * Parses the common data of IEEE 802.1Qaz ETS CFG/REC TLV
+ */
+static void
+ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		ets_cfg->prio_table[i * 2] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >>
+			 ICE_IEEE_ETS_PRIO_1_S);
+		ets_cfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >>
+			 ICE_IEEE_ETS_PRIO_0_S);
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 9 | 10| 11| 12| 13| 14| 15| 16|
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		ets_cfg->tcbwtable[i] = buf[offset];
+		ets_cfg->tsatable[i] = buf[ICE_MAX_TRAFFIC_CLASS + offset++];
+	}
+}
+
+/**
+ * ice_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >>
+			   ICE_IEEE_ETS_WILLING_S);
+	etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S);
+	etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >>
+			  ICE_IEEE_ETS_MAXTC_S);
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_parse_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], &dcbcfg->etsrec);
+}
+
+/**
+ * ice_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >>
+			       ICE_IEEE_PFC_WILLING_S);
+	dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S);
+	dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >>
+			      ICE_IEEE_PFC_CAP_S);
+	dcbcfg->pfc.pfcena = buf[1];
+}
+
+/**
+ * ice_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ */
+static void
+ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv,
+		       struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 offset = 0;
+	u16 typelen;
+	int i = 0;
+	u16 len;
+	u8 *buf;
+
+	typelen = ntohs(tlv->typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	buf = tlv->tlvinfo;
+
+	/* Removing sizeof(ouisubtype) and reserved byte from len.
+	 * Remaining len div 3 is number of APP TLVs.
+	 */
+	len -= (sizeof(tlv->ouisubtype) + 1);
+
+	/* Move offset to App Priority Table */
+	offset++;
+
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (offset < len) {
+		dcbcfg->app[i].priority = ((buf[offset] &
+					    ICE_IEEE_APP_PRIO_M) >>
+					   ICE_IEEE_APP_PRIO_S);
+		dcbcfg->app[i].selector = ((buf[offset] &
+					    ICE_IEEE_APP_SEL_M) >>
+					   ICE_IEEE_APP_SEL_S);
+		dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) |
+			buf[offset + 2];
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+
+	dcbcfg->numapps = i;
+}
+
+/**
+ * ice_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u8 subtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	switch (subtype) {
+	case ICE_IEEE_SUBTYPE_ETS_CFG:
+		ice_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_ETS_REC:
+		ice_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_PFC_CFG:
+		ice_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_APP_PRI:
+		ice_parse_ieee_app_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ */
+static void
+ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	int i;
+
+	etscfg = &dcbcfg->etscfg;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		etscfg->willing = 1;
+
+	etscfg->cbs = 0;
+	/* Priority Group Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		etscfg->prio_table[i * 2] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+		etscfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		offset++;
+	}
+
+	/* PG Percentage Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		etscfg->tcbwtable[i] = buf[offset++];
+
+		if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT)
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		else
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+	}
+
+	/* Number of TCs supported (1 octet) */
+	etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * ice_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ */
+static void
+ice_parse_cee_pfccfg_tlv(struct ice_cee_feat_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		dcbcfg->pfc.willing = 1;
+
+	/* ------------------------
+	 * | PFC Enable | PFC TCs |
+	 * ------------------------
+	 * | 1 octet    | 1 octet |
+	 */
+	dcbcfg->pfc.pfcena = buf[0];
+	dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * ice_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ */
+static void
+ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, typelen, offset = 0;
+	struct ice_cee_app_prio *app;
+	u8 i;
+
+	typelen = ntohs(tlv->hdr.typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+
+	dcbcfg->numapps = len / sizeof(*app);
+	if (!dcbcfg->numapps)
+		return;
+	if (dcbcfg->numapps > ICE_DCBX_MAX_APPS)
+		dcbcfg->numapps = ICE_DCBX_MAX_APPS;
+
+	for (i = 0; i < dcbcfg->numapps; i++) {
+		u8 up, selector;
+
+		app = (struct ice_cee_app_prio *)(tlv->tlvinfo + offset);
+		for (up = 0; up < ICE_MAX_USER_PRIORITY; up++)
+			if (app->prio_map & BIT(up))
+				break;
+
+		dcbcfg->app[i].priority = up;
+
+		/* Get Selector from lower 2 bits, and convert to IEEE */
+		selector = (app->upper_oui_sel & ICE_CEE_APP_SELECTOR_M);
+		switch (selector) {
+		case ICE_CEE_APP_SEL_ETHTYPE:
+			dcbcfg->app[i].selector = ICE_APP_SEL_ETHTYPE;
+			break;
+		case ICE_CEE_APP_SEL_TCPIP:
+			dcbcfg->app[i].selector = ICE_APP_SEL_TCPIP;
+			break;
+		default:
+			/* Keep selector as it is for unknown types */
+			dcbcfg->app[i].selector = selector;
+		}
+
+		dcbcfg->app[i].prot_id = ntohs(app->protocol);
+		/* Move to next app */
+		offset += sizeof(*app);
+	}
+}
+
+/**
+ * ice_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_cee_feat_tlv *sub_tlv;
+	u8 subtype, feat_tlv_count = 0;
+	u16 len, tlvlen, typelen;
+	u32 ouisubtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	/* Return if not CEE DCBX */
+	if (subtype != ICE_CEE_DCBX_TYPE)
+		return;
+
+	typelen = ntohs(tlv->typelen);
+	tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	len = sizeof(tlv->typelen) + sizeof(ouisubtype) +
+		sizeof(struct ice_cee_ctrl_tlv);
+	/* Return if no CEE DCBX Feature TLVs */
+	if (tlvlen <= len)
+		return;
+
+	sub_tlv = (struct ice_cee_feat_tlv *)((char *)tlv + len);
+	while (feat_tlv_count < ICE_CEE_MAX_FEAT_TYPE) {
+		u16 sublen;
+
+		typelen = ntohs(sub_tlv->hdr.typelen);
+		sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >>
+			       ICE_LLDP_TLV_TYPE_S);
+		switch (subtype) {
+		case ICE_CEE_SUBTYPE_PG_CFG:
+			ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_PFC_CFG:
+			ice_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_APP_PRI:
+			ice_parse_cee_app_tlv(sub_tlv, dcbcfg);
+			break;
+		default:
+			return;	/* Invalid Sub-type return */
+		}
+		feat_tlv_count++;
+		/* Move to next sub TLV */
+		sub_tlv = (struct ice_cee_feat_tlv *)
+			  ((char *)sub_tlv + sizeof(sub_tlv->hdr.typelen) +
+			   sublen);
+	}
+}
+
+/**
+ * ice_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently only IEEE 802.1Qaz TLV is supported, all others
+ * will be returned
+ */
+static void
+ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u32 oui;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S);
+	switch (oui) {
+	case ICE_IEEE_8021QAZ_OUI:
+		ice_parse_ieee_tlv(tlv, dcbcfg);
+		break;
+	case ICE_CEE_DCBX_OUI:
+		ice_parse_cee_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_lldp_to_dcb_cfg
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ */
+static enum ice_status
+ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_lldp_org_tlv *tlv;
+	enum ice_status ret = 0;
+	u16 offset = 0;
+	u16 typelen;
+	u16 type;
+	u16 len;
+
+	if (!lldpmib || !dcbcfg)
+		return ICE_ERR_PARAM;
+
+	/* set to the start of LLDPDU */
+	lldpmib += ETH_HLEN;
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		typelen = ntohs(tlv->typelen);
+		type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S);
+		len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		offset += sizeof(typelen) + len;
+
+		/* END TLV or beyond LLDPDU size */
+		if (type == ICE_TLV_TYPE_END || offset > ICE_LLDPDU_SIZE)
+			break;
+
+		switch (type) {
+		case ICE_TLV_TYPE_ORG:
+			ice_parse_org_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+
+		/* Move to next TLV */
+		tlv = (struct ice_lldp_org_tlv *)
+		      ((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_aq_get_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @mib_type: MIB type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the firmware
+ */
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg)
+{
+	enum ice_status ret;
+	u8 *lldpmib;
+
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_get_lldp_mib(hw, bridgetype, mib_type, (void *)lldpmib,
+				  ICE_LLDPDU_SIZE, NULL, NULL, NULL);
+
+	if (!ret)
+		/* Parse LLDP MIB to get DCB configuration */
+		ret = ice_lldp_to_dcb_cfg(lldpmib, dcbcfg);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
+ * @hw: pointer to the HW struct
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ *		      False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ *		       True if DCBX Agent is active
+ *		       False if DCBX Agent is stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent. In case that this wrapper function
+ * returns ICE_SUCCESS, caller will need to check if FW returns back the same
+ * value as stated in dcbx_agent_status, and react accordingly. (0x0A09)
+ */
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop_start_specific_agent *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+	u16 opcode;
+
+	cmd = &desc.params.lldp_agent_ctrl;
+
+	opcode = ice_aqc_opc_lldp_stop_start_specific_agent;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+
+	if (start_dcbx_agent)
+		cmd->command = ICE_AQC_START_STOP_AGENT_START_DCBX;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	*dcbx_agent_status = false;
+
+	if (!status &&
+	    cmd->command == ICE_AQC_START_STOP_AGENT_START_DCBX)
+		*dcbx_agent_status = true;
+
+	return status;
+}
+
+/**
+ * ice_aq_get_cee_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware (0x0A07)
+ */
+static enum ice_status
+ice_aq_get_cee_dcb_cfg(struct ice_hw *hw,
+		       struct ice_aqc_get_cee_dcb_cfg_resp *buff,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cee_dcb_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, (void *)buff, sizeof(*buff), cd);
+}
+
+/**
+ * ice_cee_to_dcb_cfg
+ * @cee_cfg: pointer to CEE configuration struct
+ * @dcbcfg: DCB configuration struct
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ */
+static void
+ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+		   struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+	u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift;
+	u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+	u8 i, err, sync, oper, app_index, ice_app_sel_type;
+	u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+	u16 ice_app_prot_id_type;
+
+	/* CEE PG data to ETS config */
+	dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+	/* Note that the FW creates the oper_prio_tc nibbles reversed
+	 * from those in the CEE Priority Group sub-TLV.
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		dcbcfg->etscfg.prio_table[i * 2] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		dcbcfg->etscfg.prio_table[i * 2 + 1] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+	}
+
+	ice_for_each_traffic_class(i) {
+		dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+		if (dcbcfg->etscfg.prio_table[i] == ICE_CEE_PGID_STRICT) {
+			/* Map it to next empty TC */
+			dcbcfg->etscfg.prio_table[i] = cee_cfg->oper_num_tc - 1;
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		} else {
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+		}
+	}
+
+	/* CEE PFC data to ETS config */
+	dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
+	dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+
+	app_index = 0;
+	for (i = 0; i < 3; i++) {
+		if (i == 0) {
+			/* FCoE APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FCOE_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FCOE_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE;
+		} else if (i == 1) {
+			/* iSCSI APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_ISCSI_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
+			ice_app_sel_type = ICE_APP_SEL_TCPIP;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI;
+		} else {
+			/* FIP APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FIP_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_FIP;
+		}
+
+		status = (tlv_status & ice_aqc_cee_status_mask) >>
+			 ice_aqc_cee_status_shift;
+		err = (status & ICE_TLV_STATUS_ERR) ? 1 : 0;
+		sync = (status & ICE_TLV_STATUS_SYNC) ? 1 : 0;
+		oper = (status & ICE_TLV_STATUS_OPER) ? 1 : 0;
+		/* Add FCoE/iSCSI/FIP APP if Error is False and
+		 * Oper/Sync is True
+		 */
+		if (!err && sync && oper) {
+			dcbcfg->app[app_index].priority =
+				(app_prio & ice_aqc_cee_app_mask) >>
+				ice_aqc_cee_app_shift;
+			dcbcfg->app[app_index].selector = ice_app_sel_type;
+			dcbcfg->app[app_index].prot_id = ice_app_prot_id_type;
+			app_index++;
+		}
+	}
+
+	dcbcfg->numapps = app_index;
+}
+
+/**
+ * ice_get_ieee_dcb_cfg
+ * @pi: port information structure
+ * @dcbx_mode: mode of DCBX (IEEE or CEE)
+ *
+ * Get IEEE or CEE mode DCB configuration from the Firmware
+ */
+static enum ice_status
+ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
+{
+	struct ice_dcbx_cfg *dcbx_cfg = NULL;
+	enum ice_status ret;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	if (dcbx_mode == ICE_DCBX_MODE_IEEE)
+		dcbx_cfg = &pi->local_dcbx_cfg;
+	else if (dcbx_mode == ICE_DCBX_MODE_CEE)
+		dcbx_cfg = &pi->desired_dcbx_cfg;
+
+	/* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
+	 * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
+	 */
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_LOCAL,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	if (ret)
+		goto out;
+
+	/* Get Remote DCB Config */
+	dcbx_cfg = &pi->remote_dcbx_cfg;
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	/* Don't treat ENOENT as an error for Remote MIBs */
+	if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+		ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * ice_get_dcb_cfg
+ * @pi: port information structure
+ *
+ * Get DCB configuration from the Firmware
+ */
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	enum ice_status ret;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
+	if (!ret) {
+		/* CEE mode */
+		dcbx_cfg = &pi->local_dcbx_cfg;
+		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+		dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status);
+		ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg);
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+	} else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
+		/* CEE mode not enabled try querying IEEE data */
+		dcbx_cfg = &pi->local_dcbx_cfg;
+		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_init_dcb
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the Firmware
+ */
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
+{
+	struct ice_port_info *pi = hw->port_info;
+	enum ice_status ret = 0;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	pi->is_sw_lldp = true;
+
+	/* Get DCBX status */
+	pi->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (pi->dcbx_status == ICE_DCBX_STATUS_DONE ||
+	    pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+	    pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+		/* Get current DCBX configuration */
+		ret = ice_get_dcb_cfg(pi);
+		pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM);
+		if (ret)
+			return ret;
+	} else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
+		return ICE_ERR_NOT_READY;
+	}
+
+	/* Configure the LLDP MIB change event */
+	if (enable_mib_change) {
+		ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+		if (!ret)
+			pi->is_sw_lldp = false;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_mib: enable/disable MIB change event
+ *
+ * Configure (disable/enable) MIB
+ */
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
+{
+	struct ice_port_info *pi = hw->port_info;
+	enum ice_status ret;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	/* Get DCBX status */
+	pi->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (pi->dcbx_status == ICE_DCBX_STATUS_DIS)
+		return ICE_ERR_NOT_READY;
+
+	ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
+	if (!ret)
+		pi->is_sw_lldp = !ena_mib;
+
+	return ret;
+}
+
+/**
+ * ice_add_ieee_ets_common_tlv
+ * @buf: Data buffer to be populated with ice_dcb_ets_cfg data
+ * @ets_cfg: Container for ice_dcb_ets_cfg data
+ *
+ * Populate the TLV buffer with ice_dcb_ets_cfg data
+ */
+static void
+ice_add_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 priority0, priority1;
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		priority0 = ets_cfg->prio_table[i * 2] & 0xF;
+		priority1 = ets_cfg->prio_table[i * 2 + 1] & 0xF;
+		buf[offset] = (priority0 << ICE_IEEE_ETS_PRIO_1_S) | priority1;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		buf[offset] = ets_cfg->tcbwtable[i];
+		buf[ICE_MAX_TRAFFIC_CLASS + offset] = ets_cfg->tsatable[i];
+		offset++;
+	}
+}
+
+/**
+ * ice_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_add_ieee_ets_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u8 maxtcwilling = 0;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	if (etscfg->willing)
+		maxtcwilling = BIT(ICE_IEEE_ETS_WILLING_S);
+	maxtcwilling |= etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+	buf[0] = maxtcwilling;
+
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_add_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etsrec;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	etsrec = &dcbcfg->etsrec;
+
+	/* First Octet is reserved */
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etsrec);
+}
+
+/**
+ * ice_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_add_ieee_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	if (dcbcfg->pfc.willing)
+		buf[0] = BIT(ICE_IEEE_PFC_WILLING_S);
+
+	if (dcbcfg->pfc.mbc)
+		buf[0] |= BIT(ICE_IEEE_PFC_MBC_S);
+
+	buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+	buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_ieee_app_pri_tlv -  Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store which holds the APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ */
+static void
+ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 typelen, len, offset = 0;
+	u8 priority, selector, i = 0;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	/* No APP TLVs then just return */
+	if (dcbcfg->numapps == 0)
+		return;
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_APP_PRI);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Move offset to App Priority Table */
+	offset++;
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (i < dcbcfg->numapps) {
+		priority = dcbcfg->app[i].priority & 0x7;
+		selector = dcbcfg->app[i].selector & 0x7;
+		buf[offset] = (priority << ICE_IEEE_APP_PRIO_S) | selector;
+		buf[offset + 1] = (dcbcfg->app[i].prot_id >> 0x8) & 0xFF;
+		buf[offset + 2] = dcbcfg->app[i].prot_id & 0xFF;
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+	/* len includes size of ouisubtype + 1 reserved + 3*numapps */
+	len = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | (len & 0x1FF));
+	tlv->typelen = htons(typelen);
+}
+
+/**
+ * ice_add_dcb_tlv - Add all IEEE TLVs
+ * @tlv: Fill TLV data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ * @tlvid: Type of IEEE TLV
+ *
+ * Add tlv information
+ */
+static void
+ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg,
+		u16 tlvid)
+{
+	switch (tlvid) {
+	case ICE_IEEE_TLV_ID_ETS_CFG:
+		ice_add_ieee_ets_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_ETS_REC:
+		ice_add_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_PFC_CFG:
+		ice_add_ieee_pfc_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_APP_PRI:
+		ice_add_ieee_app_pri_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_dcb_cfg_to_lldp - Convert DCB configuration to MIB format
+ * @lldpmib: pointer to the HW struct
+ * @miblen: length of LLDP MIB
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Convert the DCB configuration to MIB format
+ */
+static void
+ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, offset = 0, tlvid = ICE_TLV_ID_START;
+	struct ice_lldp_org_tlv *tlv;
+	u16 typelen;
+
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		ice_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+		typelen = ntohs(tlv->typelen);
+		len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+		if (len)
+			offset += len + 2;
+		/* END TLV or beyond LLDPDU size */
+		if (tlvid >= ICE_TLV_ID_END_OF_LLDPPDU ||
+		    offset > ICE_LLDPDU_SIZE)
+			break;
+		/* Move to next TLV */
+		if (len)
+			tlv = (struct ice_lldp_org_tlv *)
+				((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+	*miblen = offset;
+}
+
+/**
+ * ice_set_dcb_cfg - Set the local LLDP MIB to FW
+ * @pi: port information structure
+ *
+ * Set DCB configuration to the Firmware
+ */
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
+{
+	u8 mib_type, *lldpmib = NULL;
+	struct ice_dcbx_cfg *dcbcfg;
+	enum ice_status ret;
+	struct ice_hw *hw;
+	u16 miblen;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	/* update the HW local config */
+	dcbcfg = &pi->local_dcbx_cfg;
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return ICE_ERR_NO_MEMORY;
+
+	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+		mib_type |= SET_LOCAL_MIB_TYPE_CEE_NON_WILLING;
+
+	ice_dcb_cfg_to_lldp(lldpmib, &miblen, dcbcfg);
+	ret = ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen,
+				  NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_query_port_ets - query port ets configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ets configuration
+ */
+static enum ice_status
+ice_aq_query_port_ets(struct ice_port_info *pi,
+		      struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_query_port_ets *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	cmd = &desc.params.port_ets;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
+	cmd->port_teid = pi->root->info.node_teid;
+
+	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
+	return status;
+}
+
+/**
+ * ice_update_port_tc_tree_cfg - update TC tree configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ *
+ * update the SW DB with the new TC changes
+ */
+static enum ice_status
+ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
+			    struct ice_aqc_port_ets_elem *buf)
+{
+	struct ice_sched_node *node, *tc_node;
+	struct ice_aqc_get_elem elem;
+	enum ice_status status = 0;
+	u32 teid1, teid2;
+	u8 i, j;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	/* suspend the missing TC nodes */
+	for (i = 0; i < pi->root->num_children; i++) {
+		teid1 = le32_to_cpu(pi->root->children[i]->info.node_teid);
+		ice_for_each_traffic_class(j) {
+			teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+			if (teid1 == teid2)
+				break;
+		}
+		if (j < ICE_MAX_TRAFFIC_CLASS)
+			continue;
+		/* TC is missing */
+		pi->root->children[i]->in_use = false;
+	}
+	/* add the new TC nodes */
+	ice_for_each_traffic_class(j) {
+		teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+		if (teid2 == ICE_INVAL_TEID)
+			continue;
+		/* Is it already present in the tree ? */
+		for (i = 0; i < pi->root->num_children; i++) {
+			tc_node = pi->root->children[i];
+			if (!tc_node)
+				continue;
+			teid1 = le32_to_cpu(tc_node->info.node_teid);
+			if (teid1 == teid2) {
+				tc_node->tc_num = j;
+				tc_node->in_use = true;
+				break;
+			}
+		}
+		if (i < pi->root->num_children)
+			continue;
+		/* new TC */
+		status = ice_sched_query_elem(pi->hw, teid2, &elem);
+		if (!status)
+			status = ice_sched_add_node(pi, 1, &elem.generic[0]);
+		if (status)
+			break;
+		/* update the TC number */
+		node = ice_sched_find_node_by_teid(pi->root, teid2);
+		if (node)
+			node->tc_num = j;
+	}
+	return status;
+}
+
+/**
+ * ice_query_port_ets - query port ets configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ets configuration and update the
+ * SW DB with the TC changes
+ */
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	enum ice_status status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_aq_query_port_ets(pi, buf, buf_size, cd);
+	if (!status)
+		status = ice_update_port_tc_tree_cfg(pi, buf);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
new file mode 100644
index 0000000..ee138f9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h

@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_H_
+#define _ICE_DCB_H_
+
+#include "ice_type.h"
+
+#define ICE_DCBX_STATUS_NOT_STARTED	0
+#define ICE_DCBX_STATUS_IN_PROGRESS	1
+#define ICE_DCBX_STATUS_DONE		2
+#define ICE_DCBX_STATUS_DIS		7
+
+#define ICE_TLV_TYPE_END		0
+#define ICE_TLV_TYPE_ORG		127
+
+#define ICE_IEEE_8021QAZ_OUI		0x0080C2
+#define ICE_IEEE_SUBTYPE_ETS_CFG	9
+#define ICE_IEEE_SUBTYPE_ETS_REC	10
+#define ICE_IEEE_SUBTYPE_PFC_CFG	11
+#define ICE_IEEE_SUBTYPE_APP_PRI	12
+
+#define ICE_CEE_DCBX_OUI		0x001B21
+#define ICE_CEE_DCBX_TYPE		2
+#define ICE_CEE_SUBTYPE_PG_CFG		2
+#define ICE_CEE_SUBTYPE_PFC_CFG		3
+#define ICE_CEE_SUBTYPE_APP_PRI		4
+#define ICE_CEE_MAX_FEAT_TYPE		3
+/* Defines for LLDP TLV header */
+#define ICE_LLDP_TLV_LEN_S		0
+#define ICE_LLDP_TLV_LEN_M		(0x01FF << ICE_LLDP_TLV_LEN_S)
+#define ICE_LLDP_TLV_TYPE_S		9
+#define ICE_LLDP_TLV_TYPE_M		(0x7F << ICE_LLDP_TLV_TYPE_S)
+#define ICE_LLDP_TLV_SUBTYPE_S		0
+#define ICE_LLDP_TLV_SUBTYPE_M		(0xFF << ICE_LLDP_TLV_SUBTYPE_S)
+#define ICE_LLDP_TLV_OUI_S		8
+#define ICE_LLDP_TLV_OUI_M		(0xFFFFFFUL << ICE_LLDP_TLV_OUI_S)
+
+/* Defines for IEEE ETS TLV */
+#define ICE_IEEE_ETS_MAXTC_S	0
+#define ICE_IEEE_ETS_MAXTC_M		(0x7 << ICE_IEEE_ETS_MAXTC_S)
+#define ICE_IEEE_ETS_CBS_S		6
+#define ICE_IEEE_ETS_CBS_M		BIT(ICE_IEEE_ETS_CBS_S)
+#define ICE_IEEE_ETS_WILLING_S		7
+#define ICE_IEEE_ETS_WILLING_M		BIT(ICE_IEEE_ETS_WILLING_S)
+#define ICE_IEEE_ETS_PRIO_0_S		0
+#define ICE_IEEE_ETS_PRIO_0_M		(0x7 << ICE_IEEE_ETS_PRIO_0_S)
+#define ICE_IEEE_ETS_PRIO_1_S		4
+#define ICE_IEEE_ETS_PRIO_1_M		(0x7 << ICE_IEEE_ETS_PRIO_1_S)
+#define ICE_CEE_PGID_PRIO_0_S		0
+#define ICE_CEE_PGID_PRIO_0_M		(0xF << ICE_CEE_PGID_PRIO_0_S)
+#define ICE_CEE_PGID_PRIO_1_S		4
+#define ICE_CEE_PGID_PRIO_1_M		(0xF << ICE_CEE_PGID_PRIO_1_S)
+#define ICE_CEE_PGID_STRICT		15
+
+/* Defines for IEEE TSA types */
+#define ICE_IEEE_TSA_STRICT		0
+#define ICE_IEEE_TSA_ETS		2
+
+/* Defines for IEEE PFC TLV */
+#define ICE_IEEE_PFC_CAP_S		0
+#define ICE_IEEE_PFC_CAP_M		(0xF << ICE_IEEE_PFC_CAP_S)
+#define ICE_IEEE_PFC_MBC_S		6
+#define ICE_IEEE_PFC_MBC_M		BIT(ICE_IEEE_PFC_MBC_S)
+#define ICE_IEEE_PFC_WILLING_S		7
+#define ICE_IEEE_PFC_WILLING_M		BIT(ICE_IEEE_PFC_WILLING_S)
+
+/* Defines for IEEE APP TLV */
+#define ICE_IEEE_APP_SEL_S		0
+#define ICE_IEEE_APP_SEL_M		(0x7 << ICE_IEEE_APP_SEL_S)
+#define ICE_IEEE_APP_PRIO_S		5
+#define ICE_IEEE_APP_PRIO_M		(0x7 << ICE_IEEE_APP_PRIO_S)
+
+/* TLV definitions for preparing MIB */
+#define ICE_IEEE_TLV_ID_ETS_CFG		3
+#define ICE_IEEE_TLV_ID_ETS_REC		4
+#define ICE_IEEE_TLV_ID_PFC_CFG		5
+#define ICE_IEEE_TLV_ID_APP_PRI		6
+#define ICE_TLV_ID_END_OF_LLDPPDU	7
+#define ICE_TLV_ID_START		ICE_IEEE_TLV_ID_ETS_CFG
+
+#define ICE_IEEE_ETS_TLV_LEN		25
+#define ICE_IEEE_PFC_TLV_LEN		6
+#define ICE_IEEE_APP_TLV_LEN		11
+
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct ice_lldp_org_tlv {
+	__be16 typelen;
+	__be32 ouisubtype;
+	u8 tlvinfo[1];
+} __packed;
+
+struct ice_cee_tlv_hdr {
+	__be16 typelen;
+	u8 operver;
+	u8 maxver;
+};
+
+struct ice_cee_ctrl_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	__be32 seqno;
+	__be32 ackno;
+};
+
+struct ice_cee_feat_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define ICE_CEE_FEAT_TLV_ENA_M		0x80
+#define ICE_CEE_FEAT_TLV_WILLING_M	0x40
+#define ICE_CEE_FEAT_TLV_ERR_M		0x20
+	u8 subtype;
+	u8 tlvinfo[1];
+};
+
+struct ice_cee_app_prio {
+	__be16 protocol;
+	u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define ICE_CEE_APP_SELECTOR_M	0x03
+	__be16 lower_oui;
+	u8 prio_map;
+} __packed;
+
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg);
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change);
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cmd_details);
+#ifdef CONFIG_DCB
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd);
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib);
+#else /* CONFIG_DCB */
+static inline enum ice_status
+ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
+		 bool __always_unused shutdown_lldp_agent,
+		 bool __always_unused persist,
+		 struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+		  bool __always_unused persist,
+		  struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
+		       bool __always_unused start_dcbx_agent,
+		       bool *dcbx_agent_status,
+		       struct ice_sq_cd __always_unused *cd)
+{
+	*dcbx_agent_status = false;
+
+	return 0;
+}
+
+static inline enum ice_status
+ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+			bool __always_unused ena_mib)
+{
+	return 0;
+}
+
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
new file mode 100644
index 0000000..dd47869
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c

@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf = vsi->back;
+	struct ice_dcbx_cfg *dcbcfg;
+	u8 netdev_tc;
+	int i;
+
+	if (!netdev)
+		return;
+
+	if (!ena_tc) {
+		netdev_reset_tc(netdev);
+		return;
+	}
+
+	if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
+		return;
+
+	dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			netdev_set_tc_queue(netdev,
+					    vsi->tc_cfg.tc_info[i].netdev_tc,
+					    vsi->tc_cfg.tc_info[i].qcount_tx,
+					    vsi->tc_cfg.tc_info[i].qoffset);
+
+	for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+		u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+		/* Get the mapped netdev TC# for the UP */
+		netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+		netdev_set_prio_tc_map(netdev, i, netdev_tc);
+	}
+}
+
+/**
+ * ice_dcb_get_ena_tc - return bitmap of enabled TCs
+ * @dcbcfg: DCB config to evaluate for enabled TCs
+ */
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 i, num_tc, ena_tc = 1;
+
+	num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+	for (i = 0; i < num_tc; i++)
+		ena_tc |= BIT(i);
+
+	return ena_tc;
+}
+
+/**
+ * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
+ * @dcbcfg: config to retrieve number of TCs from
+ */
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	bool tc_unused = false;
+	u8 num_tc = 0;
+	u8 ret = 0;
+	int i;
+
+	/* Scan the ETS Config Priority Table to find traffic classes
+	 * enabled and create a bitmask of enabled TCs
+	 */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
+
+	/* Scan bitmask for contiguous TCs starting with TC0 */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TCs - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = true;
+		}
+	}
+
+	/* There is always at least 1 TC */
+	if (!ret)
+		ret = 1;
+
+	return ret;
+}
+
+/**
+ * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: VSI owner of rings being updated
+ */
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
+{
+	struct ice_ring *tx_ring, *rx_ring;
+	u16 qoffset, qcount;
+	int i, n;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+		/* Reset the TC information */
+		for (i = 0; i < vsi->num_txq; i++) {
+			tx_ring = vsi->tx_rings[i];
+			tx_ring->dcb_tc = 0;
+		}
+		for (i = 0; i < vsi->num_rxq; i++) {
+			rx_ring = vsi->rx_rings[i];
+			rx_ring->dcb_tc = 0;
+		}
+		return;
+	}
+
+	ice_for_each_traffic_class(n) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(n)))
+			break;
+
+		qoffset = vsi->tc_cfg.tc_info[n].qoffset;
+		qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
+		for (i = qoffset; i < (qoffset + qcount); i++) {
+			tx_ring = vsi->tx_rings[i];
+			rx_ring = vsi->rx_rings[i];
+			tx_ring->dcb_tc = n;
+			rx_ring->dcb_tc = n;
+		}
+	}
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+static void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+	u8 tc_map = 0;
+	int v, ret;
+
+	/* Update each VSI */
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+
+		if (pf->vsi[v]->type == ICE_VSI_PF)
+			tc_map = ice_dcb_get_ena_tc(dcbcfg);
+		else
+			tc_map = ICE_DFLT_TRAFFIC_CLASS;
+
+		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"Failed to config TC for VSI index: %d\n",
+				pf->vsi[v]->idx);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+	}
+}
+
+/**
+ * ice_pf_dcb_cfg - Apply new DCB configuration
+ * @pf: pointer to the PF struct
+ * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
+ */
+static
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
+{
+	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	int ret = 0;
+
+	curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(new_cfg) > 1) {
+		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
+		dev_dbg(&pf->pdev->dev, "No change in DCB config required\n");
+		return ret;
+	}
+
+	/* Store old config in case FW config fails */
+	old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL);
+	memcpy(old_cfg, curr_cfg, sizeof(*old_cfg));
+
+	/* avoid race conditions by holding the lock while disabling and
+	 * re-enabling the VSI
+	 */
+	if (!locked)
+		rtnl_lock();
+	ice_pf_dis_all_vsi(pf, true);
+
+	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
+	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+
+	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
+	 * the new config came from the HW in the first place.
+	 */
+	if (pf->hw.port_info->is_sw_lldp) {
+		ret = ice_set_dcb_cfg(pf->hw.port_info);
+		if (ret) {
+			dev_err(&pf->pdev->dev, "Set DCB Config failed\n");
+			/* Restore previous settings to local config */
+			memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
+			goto out;
+		}
+	}
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		goto out;
+	}
+
+	ice_pf_dcb_recfg(pf);
+
+out:
+	ice_pf_ena_all_vsi(pf, true);
+	if (!locked)
+		rtnl_unlock();
+	devm_kfree(&pf->pdev->dev, old_cfg);
+	return ret;
+}
+
+/**
+ * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config
+ * @pi: port information structure
+ */
+static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pi->local_dcbx_cfg;
+	u8 i;
+
+	/* Ensure ETS recommended DCB configuration is not already set */
+	if (dcbcfg->etsrec.maxtcs)
+		return;
+
+	/* In CEE mode, set the default to 1 TC */
+	dcbcfg->etsrec.maxtcs = 1;
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100;
+		dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT :
+						 ICE_IEEE_TSA_ETS;
+	}
+}
+
+/**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool
+ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		   struct ice_dcbx_cfg *new_cfg)
+{
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prio_table,
+			   &old_cfg->etscfg.prio_table,
+			   sizeof(new_cfg->etscfg.prio_table))) {
+			need_reconfig = true;
+			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
+ * ice_dcb_rebuild - rebuild DCB post reset
+ * @pf: physical function instance
+ */
+void ice_dcb_rebuild(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg;
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	enum ice_status ret;
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	/* If DCB was not enabled previously, we are done */
+	if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return;
+
+	local_dcbx_cfg = &pf->hw.port_info->local_dcbx_cfg;
+	desired_dcbx_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	/* Save current willing state and force FW to unwilling */
+	local_dcbx_cfg->etscfg.willing = 0x0;
+	local_dcbx_cfg->pfc.willing = 0x0;
+	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
+
+	ice_cfg_etsrec_defaults(pf->hw.port_info);
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n");
+		goto dcb_error;
+	}
+
+	/* Retrieve DCB config and ensure same as current in SW */
+	prev_cfg = devm_kmemdup(&pf->pdev->dev, local_dcbx_cfg,
+				sizeof(*prev_cfg), GFP_KERNEL);
+	if (!prev_cfg) {
+		dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n");
+		goto dcb_error;
+	}
+
+	ice_init_dcb(&pf->hw, true);
+	if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
+		pf->hw.port_info->is_sw_lldp = true;
+	else
+		pf->hw.port_info->is_sw_lldp = false;
+
+	if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) {
+		/* difference in cfg detected - disable DCB till next MIB */
+		dev_err(&pf->pdev->dev, "Set local MIB not accurate\n");
+		goto dcb_error;
+	}
+
+	/* fetched config congruent to previous configuration */
+	devm_kfree(&pf->pdev->dev, prev_cfg);
+
+	/* Set the local desired config */
+	if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE)
+		memcpy(local_dcbx_cfg, desired_dcbx_cfg,
+		       sizeof(*local_dcbx_cfg));
+
+	ice_cfg_etsrec_defaults(pf->hw.port_info);
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Failed to set desired config\n");
+		goto dcb_error;
+	}
+	dev_info(&pf->pdev->dev, "DCB restored after reset\n");
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	return;
+
+dcb_error:
+	dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n");
+	prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL);
+	prev_cfg->etscfg.willing = true;
+	prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+	prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+	memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
+	ice_pf_dcb_cfg(pf, prev_cfg, false);
+	devm_kfree(&pf->pdev->dev, prev_cfg);
+}
+
+/**
+ * ice_dcb_init_cfg - set the initial DCB config in SW
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
+ */
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
+{
+	struct ice_dcbx_cfg *newcfg;
+	struct ice_port_info *pi;
+	int ret = 0;
+
+	pi = pf->hw.port_info;
+	newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL);
+	if (!newcfg)
+		return -ENOMEM;
+
+	memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg));
+	memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
+
+	dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
+	if (ice_pf_dcb_cfg(pf, newcfg, locked))
+		ret = -EINVAL;
+
+	devm_kfree(&pf->pdev->dev, newcfg);
+
+	return ret;
+}
+
+/**
+ * ice_dcb_sw_default_config - Apply a default DCB config
+ * @pf: PF to apply config to
+ * @locked: was this function called with RTNL held
+ */
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_dcbx_cfg *dcbcfg;
+	struct ice_port_info *pi;
+	struct ice_hw *hw;
+	int ret;
+
+	hw = &pf->hw;
+	pi = hw->port_info;
+	dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL);
+
+	memset(dcbcfg, 0, sizeof(*dcbcfg));
+	memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
+
+	dcbcfg->etscfg.willing = 1;
+	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
+	dcbcfg->etscfg.tcbwtable[0] = 100;
+	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+
+	memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
+	       sizeof(dcbcfg->etsrec));
+	dcbcfg->etsrec.willing = 0;
+
+	dcbcfg->pfc.willing = 1;
+	dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
+
+	dcbcfg->numapps = 1;
+	dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
+	dcbcfg->app[0].priority = 3;
+	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
+
+	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
+	devm_kfree(&pf->pdev->dev, dcbcfg);
+	if (ret)
+		return ret;
+
+	return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+}
+
+/**
+ * ice_init_pf_dcb - initialize DCB for a PF
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
+ */
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_port_info *port_info;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	port_info = hw->port_info;
+
+	err = ice_init_dcb(hw, false);
+	if (err && !port_info->is_sw_lldp) {
+		dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err);
+		goto dcb_init_err;
+	}
+
+	dev_info(&pf->pdev->dev,
+		 "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+		 pf->hw.func_caps.common_cap.maxtc);
+	if (err) {
+		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
+		dev_info(&pf->pdev->dev,
+			 "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+		err = ice_dcb_sw_dflt_cfg(pf, locked);
+		if (err) {
+			dev_err(&pf->pdev->dev,
+				"Failed to set local DCB config %d\n", err);
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+		return 0;
+	}
+
+	set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+
+	/* DCBX in FW and LLDP enabled in FW */
+	pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE;
+
+	err = ice_dcb_init_cfg(pf, locked);
+	if (err)
+		goto dcb_init_err;
+
+	return err;
+
+dcb_init_err:
+	dev_err(dev, "DCB init failed\n");
+	return err;
+}
+
+/**
+ * ice_update_dcb_stats - Update DCB stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_dcb_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u8 port;
+	int i;
+
+	port = hw->port_info->lport;
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+
+	for (i = 0; i < 8; i++) {
+		ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_rx[i],
+				  &cur_ps->priority_xoff_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_rx[i],
+				  &cur_ps->priority_xon_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_tx[i],
+				  &cur_ps->priority_xon_tx[i]);
+		ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_tx[i],
+				  &cur_ps->priority_xoff_tx[i]);
+		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_2_xoff[i],
+				  &cur_ps->priority_xon_2_xoff[i]);
+	}
+}
+
+/**
+ * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ */
+int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+			      struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
+		return 0;
+
+	/* Insert 802.1p priority into VLAN header */
+	if ((first->tx_flags & (ICE_TX_FLAGS_HW_VLAN | ICE_TX_FLAGS_SW_VLAN)) ||
+	    skb->priority != TC_PRIO_CONTROL) {
+		first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
+		/* Mask the lower 3 bits to set the 802.1p priority */
+		first->tx_flags |= (skb->priority & 0x7) <<
+				   ICE_TX_FLAGS_VLAN_PR_S;
+		if (first->tx_flags & ICE_TX_FLAGS_SW_VLAN) {
+			struct vlan_ethhdr *vhdr;
+			int rc;
+
+			rc = skb_cow_head(skb, 0);
+			if (rc < 0)
+				return rc;
+			vhdr = (struct vlan_ethhdr *)skb->data;
+			vhdr->h_vlan_TCI = htons(first->tx_flags >>
+						 ICE_TX_FLAGS_VLAN_S);
+		} else {
+			first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dcb_process_lldp_set_mib_change - Process MIB change
+ * @pf: ptr to ice_pf
+ * @event: pointer to the admin queue receive event
+ */
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_aqc_lldp_get_mib *mib;
+	struct ice_dcbx_cfg tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	struct ice_port_info *pi;
+	u8 type;
+	int ret;
+
+	/* Not DCB capable or capability disabled */
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+		dev_dbg(&pf->pdev->dev,
+			"MIB Change Event in HOST mode\n");
+		return;
+	}
+
+	pi = pf->hw.port_info;
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+	/* Ignore if event is not for Nearest Bridge */
+	type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M);
+	dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+	if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+		return;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+	dev_dbg(&pf->pdev->dev,
+		"LLDP event mib type %s\n", type ? "remote" : "local");
+	if (type == ICE_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+					 &pi->remote_dcbx_cfg);
+		if (ret) {
+			dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
+			return;
+		}
+	}
+
+	/* store the old configuration */
+	tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
+
+	/* Reset the old DCBX configuration data */
+	memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
+
+	/* Get updated DCBX data from firmware */
+	ret = ice_get_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+		return;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
+		dev_dbg(&pf->pdev->dev,
+			"No change detected in DCBX configuration.\n");
+		return;
+	}
+
+	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+					   &pi->local_dcbx_cfg);
+	if (!need_reconfig)
+		return;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	rtnl_lock();
+	ice_pf_dis_all_vsi(pf, true);
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		rtnl_unlock();
+		return;
+	}
+
+	/* changes in configuration update VSI */
+	ice_pf_dcb_recfg(pf);
+
+	ice_pf_ena_all_vsi(pf, true);
+	rtnl_unlock();
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
new file mode 100644
index 0000000..661a6f7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h

@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_LIB_H_
+#define _ICE_DCB_LIB_H_
+
+#include "ice.h"
+#include "ice_lib.h"
+
+#ifdef CONFIG_DCB
+#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+
+void ice_dcb_rebuild(struct ice_pf *pf);
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
+void ice_update_dcb_stats(struct ice_pf *pf);
+int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+			      struct ice_tx_buf *first);
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event);
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
+static inline void
+ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring)
+{
+	tlan_ctx->cgd_num = ring->dcb_tc;
+}
+#else
+#define ice_dcb_rebuild(pf) do {} while (0)
+
+static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return ICE_DFLT_TRAFFIC_CLASS;
+}
+
+static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return 1;
+}
+
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
+{
+	dev_dbg(&pf->pdev->dev, "DCB not supported\n");
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
+			      struct ice_tx_buf __always_unused *first)
+{
+	return 0;
+}
+
+#define ice_update_dcb_stats(pf) do {} while (0)
+#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
+#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
+#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
+#define ice_vsi_cfg_netdev_tc(vsi, ena_tc) do {} while (0)
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_LIB_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
index 0e14d72..f8d5c66 100644
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h

@@ -5,15 +5,11 @@
 #define _ICE_DEVIDS_H_
 
 /* Device IDs */
-/* Intel(R) Ethernet Controller C810 for backplane */
-#define ICE_DEV_ID_C810_BACKPLANE	0x1591
-/* Intel(R) Ethernet Controller C810 for QSFP */
-#define ICE_DEV_ID_C810_QSFP		0x1592
-/* Intel(R) Ethernet Controller C810 for SFP */
-#define ICE_DEV_ID_C810_SFP		0x1593
-/* Intel(R) Ethernet Controller C810/X557-AT 10GBASE-T */
-#define ICE_DEV_ID_C810_10G_BASE_T	0x1594
-/* Intel(R) Ethernet Controller C810 1GbE */
-#define ICE_DEV_ID_C810_SGMII		0x1595
+/* Intel(R) Ethernet Controller E810-C for backplane */
+#define ICE_DEV_ID_E810C_BACKPLANE	0x1591
+/* Intel(R) Ethernet Controller E810-C for QSFP */
+#define ICE_DEV_ID_E810C_QSFP		0x1592
+/* Intel(R) Ethernet Controller E810-C for SFP */
+#define ICE_DEV_ID_E810C_SFP		0x1593
 
 #endif /* _ICE_DEVIDS_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 4c5c87b..7e23034 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c

@@ -4,6 +4,8 @@
 /* ethtool support for ice */
 
 #include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
 
 struct ice_stats {
 	char stat_string[ETH_GSTRING_LEN];
@@ -33,26 +35,50 @@
 #define ICE_PF_STATS_LEN	ARRAY_SIZE(ice_gstrings_pf_stats)
 #define ICE_VSI_STATS_LEN	ARRAY_SIZE(ice_gstrings_vsi_stats)
 
-#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \
-				 ice_q_stats_len(n))
+#define ICE_PFC_STATS_LEN ( \
+		(FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \
+		 FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \
+		 FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \
+		 FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \
+		 / sizeof(u64))
+#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
+				 ICE_VSI_STATS_LEN + ice_q_stats_len(n))
 
 static const struct ice_stats ice_gstrings_vsi_stats[] = {
-	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
-	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
-	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
 	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
-	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
 	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
-	ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
-	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
-	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
 	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
 	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
 };
 
+enum ice_ethtool_test_id {
+	ICE_ETH_TEST_REG = 0,
+	ICE_ETH_TEST_EEPROM,
+	ICE_ETH_TEST_INTR,
+	ICE_ETH_TEST_LOOP,
+	ICE_ETH_TEST_LINK,
+};
+
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"EEPROM test    (offline)",
+	"Interrupt test (offline)",
+	"Loopback test  (offline)",
+	"Link test   (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
  * but they aren't. This device is capable of supporting multiple
  * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
@@ -62,49 +88,49 @@
  * The PF_STATs are appended to the netdev stats only when ethtool -S
  * is queried on the base PF netdev.
  */
-static struct ice_stats ice_gstrings_pf_stats[] = {
-	ICE_PF_STAT("tx_bytes", stats.eth.tx_bytes),
-	ICE_PF_STAT("rx_bytes", stats.eth.rx_bytes),
-	ICE_PF_STAT("tx_unicast", stats.eth.tx_unicast),
-	ICE_PF_STAT("rx_unicast", stats.eth.rx_unicast),
-	ICE_PF_STAT("tx_multicast", stats.eth.tx_multicast),
-	ICE_PF_STAT("rx_multicast", stats.eth.rx_multicast),
-	ICE_PF_STAT("tx_broadcast", stats.eth.tx_broadcast),
-	ICE_PF_STAT("rx_broadcast", stats.eth.rx_broadcast),
-	ICE_PF_STAT("tx_errors", stats.eth.tx_errors),
-	ICE_PF_STAT("tx_size_64", stats.tx_size_64),
-	ICE_PF_STAT("rx_size_64", stats.rx_size_64),
-	ICE_PF_STAT("tx_size_127", stats.tx_size_127),
-	ICE_PF_STAT("rx_size_127", stats.rx_size_127),
-	ICE_PF_STAT("tx_size_255", stats.tx_size_255),
-	ICE_PF_STAT("rx_size_255", stats.rx_size_255),
-	ICE_PF_STAT("tx_size_511", stats.tx_size_511),
-	ICE_PF_STAT("rx_size_511", stats.rx_size_511),
-	ICE_PF_STAT("tx_size_1023", stats.tx_size_1023),
-	ICE_PF_STAT("rx_size_1023", stats.rx_size_1023),
-	ICE_PF_STAT("tx_size_1522", stats.tx_size_1522),
-	ICE_PF_STAT("rx_size_1522", stats.rx_size_1522),
-	ICE_PF_STAT("tx_size_big", stats.tx_size_big),
-	ICE_PF_STAT("rx_size_big", stats.rx_size_big),
-	ICE_PF_STAT("link_xon_tx", stats.link_xon_tx),
-	ICE_PF_STAT("link_xon_rx", stats.link_xon_rx),
-	ICE_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
-	ICE_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
-	ICE_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down),
-	ICE_PF_STAT("rx_undersize", stats.rx_undersize),
-	ICE_PF_STAT("rx_fragments", stats.rx_fragments),
-	ICE_PF_STAT("rx_oversize", stats.rx_oversize),
-	ICE_PF_STAT("rx_jabber", stats.rx_jabber),
-	ICE_PF_STAT("rx_csum_bad", hw_csum_rx_error),
-	ICE_PF_STAT("rx_length_errors", stats.rx_len_errors),
-	ICE_PF_STAT("rx_dropped", stats.eth.rx_discards),
-	ICE_PF_STAT("rx_crc_errors", stats.crc_errors),
-	ICE_PF_STAT("illegal_bytes", stats.illegal_bytes),
-	ICE_PF_STAT("mac_local_faults", stats.mac_local_faults),
-	ICE_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+static const struct ice_stats ice_gstrings_pf_stats[] = {
+	ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+	ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+	ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+	ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+	ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+	ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+	ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+	ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+	ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
 };
 
-static u32 ice_regs_dump_list[] = {
+static const u32 ice_regs_dump_list[] = {
 	PFGEN_STATE,
 	PRTGEN_STATUS,
 	QRX_CTRL(0),
@@ -112,33 +138,28 @@
 	QINT_RQCTL(0),
 	PFINT_OICR_ENA,
 	QRX_ITR(0),
+	PF0INT_ITR_0(0),
+	PF0INT_ITR_1(0),
+	PF0INT_ITR_2(0),
 };
 
-/**
- * ice_nvm_version_str - format the NVM version strings
- * @hw: ptr to the hardware info
- */
-static char *ice_nvm_version_str(struct ice_hw *hw)
-{
-	static char buf[ICE_ETHTOOL_FWVER_LEN];
-	u8 ver, patch;
-	u32 full_ver;
-	u16 build;
+struct ice_priv_flag {
+	char name[ETH_GSTRING_LEN];
+	u32 bitno;			/* bit position in pf->flags */
+};
 
-	full_ver = hw->nvm.oem_ver;
-	ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT);
-	build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >>
-		      ICE_OEM_VER_BUILD_SHIFT);
-	patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK);
-
-	snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d",
-		 (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT,
-		 (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT,
-		 hw->nvm.eetrack, ver, build, patch);
-
-	return buf;
+#define ICE_PRIV_FLAG(_name, _bitno) { \
+	.name = _name, \
+	.bitno = _bitno, \
 }
 
+static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
+	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
+	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+};
+
+#define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
+
 static void
 ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 {
@@ -152,6 +173,7 @@
 		sizeof(drvinfo->fw_version));
 	strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
 		sizeof(drvinfo->bus_info));
+	drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
 }
 
 static int ice_get_regs_len(struct net_device __always_unused *netdev)
@@ -203,6 +225,620 @@
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 }
 
+static int ice_get_eeprom_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	return (int)(pf->hw.nvm.sr_words * sizeof(u16));
+}
+
+static int
+ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
+	       u8 *bytes)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	u16 first_word, last_word, nwords;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+	int ret = 0;
+	u16 *buf;
+
+	dev = &pf->pdev->dev;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	nwords = last_word - first_word + 1;
+
+	buf = devm_kcalloc(dev, nwords, sizeof(u16), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	status = ice_read_sr_buf(hw, first_word, &nwords, buf);
+	if (status) {
+		dev_err(dev, "ice_read_sr_buf failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		eeprom->len = sizeof(u16) * nwords;
+		ret = -EIO;
+		goto out;
+	}
+
+	memcpy(bytes, (u8 *)buf + (eeprom->offset & 1), eeprom->len);
+out:
+	devm_kfree(dev, buf);
+	return ret;
+}
+
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+	struct ice_vf *vf = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			return true;
+	return false;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	enum ice_status status;
+	bool link_up = false;
+
+	netdev_info(netdev, "link test\n");
+	status = ice_get_link_status(np->vsi->port_info, &link_up);
+	if (status) {
+		netdev_err(netdev, "link query error, status = %d\n", status);
+		return 1;
+	}
+
+	if (!link_up)
+		return 2;
+
+	return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	netdev_info(netdev, "EEPROM test\n");
+	return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5,
+		0x00000000, 0xFFFFFFFF
+	};
+	u32 val, orig_val;
+	int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+		u32 pattern = patterns[i] & mask;
+
+		wr32(hw, reg, pattern);
+		val = rd32(hw, reg);
+		if (val == pattern)
+			continue;
+		dev_err(&pf->pdev->dev,
+			"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+			, __func__, reg, pattern, val);
+		return 1;
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		dev_err(&pf->pdev->dev,
+			"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+			, __func__, reg, orig_val, val);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_hw *hw = np->vsi->port_info->hw;
+	u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+		hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+	struct ice_diag_reg_test_info {
+		u32 address;
+		u32 mask;
+		u32 elem_num;
+		u32 elem_size;
+	} ice_reg_list[] = {
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+		{GLINT_ITR(1, 0), 0x00000fff, int_elements,
+			GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+		{GLINT_CTL, 0xffff0001, 1, 0}
+	};
+	int i;
+
+	netdev_dbg(netdev, "Register test\n");
+	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+		u32 j;
+
+		for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+			u32 mask = ice_reg_list[i].mask;
+			u32 reg = ice_reg_list[i].address +
+				(j * ice_reg_list[i].elem_size);
+
+			/* bail on failure (non-zero return) */
+			if (ice_reg_pattern_test(hw, reg, mask))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_setup_tx_rings(vsi);
+	if (status)
+		goto err_setup_tx_ring;
+
+	status = ice_vsi_setup_rx_rings(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_cfg(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_start_rx_rings(vsi);
+	if (status)
+		goto err_start_rx_ring;
+
+	return status;
+
+err_start_rx_ring:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+	ice_vsi_free_tx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	status = ice_vsi_stop_rx_rings(vsi);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+	u8 *data;
+
+	if (!pf)
+		return -EINVAL;
+
+	data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Since the ethernet test frame should always be at least
+	 * 64 bytes long, fill some octets in the payload with test data.
+	 */
+	memset(data, 0xFF, size);
+	data[32] = 0xDE;
+	data[42] = 0xAD;
+	data[44] = 0xBE;
+	data[46] = 0xEF;
+
+	*ret_data = data;
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+	/* Validate bytes of a frame under offsets chosen earlier */
+	if (frame[32] == 0xDE &&
+	    frame[42] == 0xAD &&
+	    frame[44] == 0xBE &&
+	    frame[46] == 0xEF &&
+	    frame[48] == 0xFF)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
+{
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+	u64 td_cmd;
+
+	tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return -EINVAL;
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+
+	/* These flags are required for a descriptor to be pushed out */
+	td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+		cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			    (td_cmd << ICE_TXD_QW1_CMD_S) |
+			    ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+			    ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			    ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+	tx_buf->next_to_watch = tx_desc;
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use >= tx_ring->count)
+		tx_ring->next_to_use = 0;
+
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* Wait until the packets get transmitted to the receive queue. */
+	usleep_range(1000, 2000);
+	dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+{
+	struct ice_rx_buf *rx_buf;
+	int valid_frames, i;
+	u8 *received_buf;
+
+	valid_frames = 0;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union ice_32b_rx_flex_desc *rx_desc;
+
+		rx_desc = ICE_RX_DESC(rx_ring, i);
+
+		if (!(rx_desc->wb.status_error0 &
+		    cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+			continue;
+
+		rx_buf = &rx_ring->rx_buf[i];
+		received_buf = page_address(rx_buf->page);
+
+		if (ice_lbtest_check_frame(received_buf))
+			valid_frames++;
+	}
+
+	return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+	struct ice_pf *pf = orig_vsi->back;
+	struct ice_ring *tx_ring, *rx_ring;
+	u8 broadcast[ETH_ALEN], ret = 0;
+	int num_frames, valid_frames;
+	LIST_HEAD(tmp_list);
+	u8 *tx_frame;
+	int i;
+
+	netdev_info(netdev, "loopback test\n");
+
+	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+	if (!test_vsi) {
+		netdev_err(netdev, "Failed to create a VSI for the loopback test");
+		return 1;
+	}
+
+	test_vsi->netdev = netdev;
+	tx_ring = test_vsi->tx_rings[0];
+	rx_ring = test_vsi->rx_rings[0];
+
+	if (ice_lbtest_prepare_rings(test_vsi)) {
+		ret = 2;
+		goto lbtest_vsi_close;
+	}
+
+	if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+		ret = 3;
+		goto lbtest_rings_dis;
+	}
+
+	/* Enable MAC loopback in firmware */
+	if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+		ret = 4;
+		goto lbtest_mac_dis;
+	}
+
+	/* Test VSI needs to receive broadcast packets */
+	eth_broadcast_addr(broadcast);
+	if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
+		ret = 5;
+		goto lbtest_mac_dis;
+	}
+
+	if (ice_add_mac(&pf->hw, &tmp_list)) {
+		ret = 6;
+		goto free_mac_list;
+	}
+
+	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+		ret = 7;
+		goto remove_mac_filters;
+	}
+
+	num_frames = min_t(int, tx_ring->count, 32);
+	for (i = 0; i < num_frames; i++) {
+		if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+			ret = 8;
+			goto lbtest_free_frame;
+		}
+	}
+
+	valid_frames = ice_lbtest_receive_frames(rx_ring);
+	if (!valid_frames)
+		ret = 9;
+	else if (valid_frames != num_frames)
+		ret = 10;
+
+lbtest_free_frame:
+	devm_kfree(&pf->pdev->dev, tx_frame);
+remove_mac_filters:
+	if (ice_remove_mac(&pf->hw, &tmp_list))
+		netdev_err(netdev, "Could not remove MAC filter for the test VSI");
+free_mac_list:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+lbtest_mac_dis:
+	/* Disable MAC loopback after the test is completed. */
+	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+		netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+	if (ice_lbtest_disable_rings(test_vsi))
+		netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+	test_vsi->netdev = NULL;
+	if (ice_vsi_release(test_vsi))
+		netdev_err(netdev, "Failed to remove the test VSI");
+
+	return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	u16 swic_old = pf->sw_int_count;
+
+	netdev_info(netdev, "interrupt test\n");
+
+	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M |
+	     GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_SWINT_TRIG_M);
+
+	usleep_range(1000, 2000);
+	return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+	      u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(netdev, "offline testing starting\n");
+
+		set_bit(__ICE_TESTING, pf->state);
+
+		if (ice_active_vfs(pf)) {
+			dev_warn(&pf->pdev->dev,
+				 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			data[ICE_ETH_TEST_REG] = 1;
+			data[ICE_ETH_TEST_EEPROM] = 1;
+			data[ICE_ETH_TEST_INTR] = 1;
+			data[ICE_ETH_TEST_LOOP] = 1;
+			data[ICE_ETH_TEST_LINK] = 1;
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+			clear_bit(__ICE_TESTING, pf->state);
+			goto skip_ol_tests;
+		}
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			ice_stop(netdev);
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+		data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+		data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+		data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+		if (data[ICE_ETH_TEST_LINK] ||
+		    data[ICE_ETH_TEST_EEPROM] ||
+		    data[ICE_ETH_TEST_LOOP] ||
+		    data[ICE_ETH_TEST_INTR] ||
+		    data[ICE_ETH_TEST_REG])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__ICE_TESTING, pf->state);
+
+		if (if_running) {
+			int status = ice_open(netdev);
+
+			if (status) {
+				dev_err(&pf->pdev->dev,
+					"Could not open device %s, err %d",
+					pf->int_name, status);
+			}
+		}
+	} else {
+		/* Online tests */
+		netdev_info(netdev, "online testing starting\n");
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		if (data[ICE_ETH_TEST_LINK])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[ICE_ETH_TEST_REG] = 0;
+		data[ICE_ETH_TEST_EEPROM] = 0;
+		data[ICE_ETH_TEST_INTR] = 0;
+		data[ICE_ETH_TEST_LOOP] = 0;
+	}
+
+skip_ol_tests:
+	netdev_info(netdev, "testing finished\n");
+}
+
 static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -220,17 +856,17 @@
 
 		ice_for_each_alloc_txq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "tx-queue-%u.tx_packets", i);
+				 "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
 		ice_for_each_alloc_rxq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "rx-queue-%u.rx_packets", i);
+				 "rx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
@@ -238,17 +874,392 @@
 			return;
 
 		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "port.%s",
+			snprintf(p, ETH_GSTRING_LEN, "%s",
 				 ice_gstrings_pf_stats[i].stat_string);
 			p += ETH_GSTRING_LEN;
 		}
 
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx_priority_%u_xon.nic", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx_priority_%u_xoff.nic", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx_priority_%u_xon.nic", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx_priority_%u_xoff.nic", i);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	case ETH_SS_TEST:
+		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_priv_flags[i].name);
+			p += ETH_GSTRING_LEN;
+		}
 		break;
 	default:
 		break;
 	}
 }
 
+static int
+ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool led_active;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		led_active = true;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		led_active = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_set_phy_cfg_data config = { 0 };
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_vsi *vsi = np->vsi;
+	u8 sw_cfg_caps, sw_cfg_fec;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	/* Changing the FEC parameters is not supported if not the PF VSI */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Get last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Copy SW configuration returned from PHY caps to PHY config */
+	ice_copy_phy_caps_to_cfg(caps, &config);
+	sw_cfg_caps = caps->caps;
+	sw_cfg_fec = caps->link_fec_options;
+
+	/* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
+	memset(caps, 0, sizeof(*caps));
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
+	config.link_fec_opt = caps->link_fec_options;
+
+	ice_cfg_phy_fec(&config, req_fec);
+
+	/* If FEC mode has changed, then set PHY configuration and enable AN. */
+	if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
+	    (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
+	    config.link_fec_opt != sw_cfg_fec) {
+		if (caps->caps & ICE_AQC_PHY_AN_MODE)
+			config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+		status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
+
+		if (status)
+			err = -EAGAIN;
+	}
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_fec_mode fec;
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec = ICE_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec = ICE_FEC_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec = ICE_FEC_BASER;
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec = ICE_FEC_NONE;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+	link_info = &pi->phy.link_info;
+
+	/* Set FEC mode based on negotiated link info */
+	switch (link_info->fec_info) {
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	}
+
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Set supported/configured FEC modes based on PHY capability */
+	if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if (caps->link_fec_options == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
+ * ice_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the ice_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ */
+static u32 ice_get_priv_flags(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (test_bit(priv_flag->bitno, pf->flags))
+			ret_flags |= BIT(i);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * ice_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ */
+static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0;
+	u32 i;
+
+	if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
+		return -EINVAL;
+
+	set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+
+	bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			set_bit(priv_flag->bitno, pf->flags);
+		else
+			clear_bit(priv_flag->bitno, pf->flags);
+	}
+
+	bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
+		if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
+			enum ice_status status;
+
+			/* Disable FW LLDP engine */
+			status = ice_cfg_lldp_mib_change(&pf->hw, false);
+
+			/* If unregistering for LLDP events fails, this is
+			 * not an error state, as there shouldn't be any
+			 * events to respond to.
+			 */
+			if (status)
+				dev_info(&pf->pdev->dev,
+					 "Failed to unreg for LLDP events\n");
+
+			/* The AQ call to stop the FW LLDP agent will generate
+			 * an error if the agent is already stopped.
+			 */
+			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
+			if (status)
+				dev_warn(&pf->pdev->dev,
+					 "Fail to stop LLDP agent\n");
+			/* Use case for having the FW LLDP agent stopped
+			 * will likely not need DCB, so failure to init is
+			 * not a concern of ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Forward LLDP packets to default VSI so that they
+			 * are passed up the stack
+			 */
+			ice_cfg_sw_lldp(vsi, false, true);
+		} else {
+			enum ice_status status;
+			bool dcbx_agent_status;
+
+			/* AQ command to start FW LLDP agent will return an
+			 * error if the agent is already started
+			 */
+			status = ice_aq_start_lldp(&pf->hw, true, NULL);
+			if (status)
+				dev_warn(&pf->pdev->dev,
+					 "Fail to start LLDP Agent\n");
+
+			/* AQ command to start FW DCBX agent will fail if
+			 * the agent is already started
+			 */
+			status = ice_aq_start_stop_dcbx(&pf->hw, true,
+							&dcbx_agent_status,
+							NULL);
+			if (status)
+				dev_dbg(&pf->pdev->dev,
+					"Failed to start FW DCBX\n");
+
+			dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n",
+				 dcbx_agent_status ? "ACTIVE" : "DISABLED");
+
+			/* Failure to configure MIB change or init DCB is not
+			 * relevant to ethtool.  Print notification that
+			 * registration/init failed but do not return error
+			 * state to ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Remove rule to direct LLDP packets to default VSI.
+			 * The FW LLDP engine will now be consuming them.
+			 */
+			ice_cfg_sw_lldp(vsi, false, false);
+
+			/* Register for MIB change events */
+			status = ice_cfg_lldp_mib_change(&pf->hw, true);
+			if (status)
+				dev_dbg(&pf->pdev->dev,
+					"Fail to enable MIB change events\n");
+		}
+	}
+	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+	return ret;
+}
+
 static int ice_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
@@ -260,10 +1271,10 @@
 		 * a private ethtool flag). This is due to the nature of the
 		 * ethtool stats API.
 		 *
-		 * User space programs such as ethtool must make 3 separate
+		 * Userspace programs such as ethtool must make 3 separate
 		 * ioctl requests, one for size, one for the strings, and
 		 * finally one for the stats. Since these cross into
-		 * user space, changes to the number or size could result in
+		 * userspace, changes to the number or size could result in
 		 * undefined memory access or incorrect string<->value
 		 * correlations for statistics.
 		 *
@@ -272,6 +1283,10 @@
 		 * not safe.
 		 */
 		return ICE_ALL_STATS_LEN(netdev);
+	case ETH_SS_TEST:
+		return ICE_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return ICE_PRIV_FLAG_ARRAY_SIZE;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -285,14 +1300,17 @@
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	struct ice_ring *ring;
-	unsigned int j = 0;
+	unsigned int j;
 	int i = 0;
 	char *p;
 
+	ice_update_pf_stats(pf);
+	ice_update_vsi_stats(vsi);
+
 	for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
 		p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
 		data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
-			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 
 	/* populate per queue stats */
@@ -330,60 +1348,765 @@
 		data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
 			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_tx[j];
+		data[i++] = pf->stats.priority_xoff_tx[j];
+	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_rx[j];
+		data[i++] = pf->stats.priority_xoff_rx[j];
+	}
 }
 
+/**
+ * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
+ * @netdev: network interface device structure
+ * @ks: ethtool link ksettings struct to fill out
+ */
+static void
+ice_phy_type_to_ethtool(struct net_device *netdev,
+			struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	bool need_add_adv_mode = false;
+	struct ice_vsi *vsi = np->vsi;
+	u64 phy_types_high;
+	u64 phy_types_low;
+
+	hw_link_info = &vsi->port_info->phy.link_info;
+	phy_types_low = vsi->port_info->phy.phy_type_low;
+	phy_types_high = vsi->port_info->phy.phy_type_high;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseKX_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_SX ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseX_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseT_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_X ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseX_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseX_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     5000baseT_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_DA ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseKR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseSR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseLR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseCR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_SR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseSR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseKR_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseKR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseCR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseSR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseLR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseCR2_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     50000baseCR2_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseKR2_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     50000baseKR2_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_FR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseSR2_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     50000baseSR2_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2  ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2 ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+			need_add_adv_mode = true;
+	}
+	if (need_add_adv_mode) {
+		need_add_adv_mode = false;
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseCR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseSR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+			need_add_adv_mode = true;
+	}
+	if (need_add_adv_mode) {
+		need_add_adv_mode = false;
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseSR4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_LR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseLR4_ER4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+			need_add_adv_mode = true;
+	}
+	if (need_add_adv_mode) {
+		need_add_adv_mode = false;
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseLR4_ER4_Full);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseKR4_Full);
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+			need_add_adv_mode = true;
+	}
+	if (need_add_adv_mode)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseKR4_Full);
+
+	/* Autoneg PHY types */
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Autoneg);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Autoneg);
+	}
+	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
+	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Autoneg);
+	}
+}
+
+#define TEST_SET_BITS_TIMEOUT	50
+#define TEST_SET_BITS_SLEEP_MAX	2000
+#define TEST_SET_BITS_SLEEP_MIN	1000
+
+/**
+ * ice_get_settings_link_up - Get Link settings for when link is up
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ */
+static void
+ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
+			 struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ethtool_link_ksettings cap_ksettings;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	bool unrecog_phy_high = false;
+	bool unrecog_phy_low = false;
+
+	link_info = &vsi->port_info->phy.link_info;
+
+	/* Initialize supported and advertised settings based on PHY settings */
+	switch (link_info->phy_type_low) {
+	case ICE_PHY_TYPE_LOW_100BASE_TX:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100M_SGMII:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_1G_SGMII:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_SX:
+	case ICE_PHY_TYPE_LOW_1000BASE_LX:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_KX:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseKX_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     2500baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_X:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseX_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     2500baseX_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_5GBASE_T:
+	case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     5000baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_SR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_LR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseKR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_T:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseCR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_SR:
+	case ICE_PHY_TYPE_LOW_25GBASE_LR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_KR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseKR_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseCR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseKR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_CP:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseCR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     50000baseCR2_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2:
+	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI2:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR:
+	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI1:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseCR2_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseKR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     50000baseKR2_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_FR:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseSR2_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseCR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4:
+	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_AUI4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseCR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseSR4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseLR4_ER4_Full);
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseKR4_Full);
+		break;
+	default:
+		unrecog_phy_low = true;
+	}
+
+	switch (link_info->phy_type_high) {
+	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     100000baseKR4_Full);
+		break;
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		break;
+	default:
+		unrecog_phy_high = true;
+	}
+
+	if (unrecog_phy_low && unrecog_phy_high) {
+		/* if we got here and link is up something bad is afoot */
+		netdev_info(netdev,
+			    "WARNING: Unrecognized PHY_Low (0x%llx).\n",
+			    (u64)link_info->phy_type_low);
+		netdev_info(netdev,
+			    "WARNING: Unrecognized PHY_High (0x%llx).\n",
+			    (u64)link_info->phy_type_high);
+	}
+
+	/* Now that we've worked out everything that could be supported by the
+	 * current PHY type, get what is supported by the NVM and intersect
+	 * them to get what is truly supported
+	 */
+	memset(&cap_ksettings, 0, sizeof(cap_ksettings));
+	ice_phy_type_to_ethtool(netdev, &cap_ksettings);
+	ethtool_intersect_link_masks(ks, &cap_ksettings);
+
+	switch (link_info->link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		ks->base.speed = SPEED_100000;
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		ks->base.speed = SPEED_50000;
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		ks->base.speed = SPEED_40000;
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		ks->base.speed = SPEED_25000;
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		ks->base.speed = SPEED_20000;
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		ks->base.speed = SPEED_10000;
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		ks->base.speed = SPEED_5000;
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		ks->base.speed = SPEED_2500;
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		ks->base.speed = SPEED_1000;
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		ks->base.speed = SPEED_100;
+		break;
+	default:
+		netdev_info(netdev,
+			    "WARNING: Unrecognized link_speed (0x%x).\n",
+			    link_info->link_speed);
+		break;
+	}
+	ks->base.duplex = DUPLEX_FULL;
+
+	if (link_info->an_info & ICE_AQ_AN_COMPLETED)
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Autoneg);
+
+	/* Set flow control negotiated Rx/Tx pause */
+	switch (pi->fc.current_mode) {
+	case ICE_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		break;
+	case ICE_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_PFC:
+		/* fall through */
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	}
+}
+
+/**
+ * ice_get_settings_link_down - Get the Link settings when link is down
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ *
+ * Reports link settings that can be determined when link is down
+ */
+static void
+ice_get_settings_link_down(struct ethtool_link_ksettings *ks,
+			   struct net_device *netdev)
+{
+	/* link is down and the driver needs to fall back on
+	 * supported PHY types to figure out what info to display
+	 */
+	ice_phy_type_to_ethtool(netdev, ks);
+
+	/* With no link, speed and duplex are unknown */
+	ks->base.speed = SPEED_UNKNOWN;
+	ks->base.duplex = DUPLEX_UNKNOWN;
+}
+
+/**
+ * ice_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Reports speed/duplex settings based on media_type
+ */
 static int
 ice_get_link_ksettings(struct net_device *netdev,
 		       struct ethtool_link_ksettings *ks)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
 	struct ice_link_status *hw_link_info;
 	struct ice_vsi *vsi = np->vsi;
-	bool link_up;
+	enum ice_status status;
+	int err = 0;
 
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+	ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
 	hw_link_info = &vsi->port_info->phy.link_info;
-	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
-
-	ethtool_link_ksettings_add_link_mode(ks, supported,
-					     10000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(ks, advertising,
-					     10000baseT_Full);
 
 	/* set speed and duplex */
-	if (link_up) {
-		switch (hw_link_info->link_speed) {
-		case ICE_AQ_LINK_SPEED_100MB:
-			ks->base.speed = SPEED_100;
-			break;
-		case ICE_AQ_LINK_SPEED_2500MB:
-			ks->base.speed = SPEED_2500;
-			break;
-		case ICE_AQ_LINK_SPEED_5GB:
-			ks->base.speed = SPEED_5000;
-			break;
-		case ICE_AQ_LINK_SPEED_10GB:
-			ks->base.speed = SPEED_10000;
-			break;
-		case ICE_AQ_LINK_SPEED_25GB:
-			ks->base.speed = SPEED_25000;
-			break;
-		case ICE_AQ_LINK_SPEED_40GB:
-			ks->base.speed = SPEED_40000;
-			break;
-		default:
-			ks->base.speed = SPEED_UNKNOWN;
-			break;
-		}
-
-		ks->base.duplex = DUPLEX_FULL;
-	} else {
-		ks->base.speed = SPEED_UNKNOWN;
-		ks->base.duplex = DUPLEX_UNKNOWN;
-	}
+	if (hw_link_info->link_info & ICE_AQ_LINK_UP)
+		ice_get_settings_link_up(ks, netdev);
+	else
+		ice_get_settings_link_down(ks, netdev);
 
 	/* set autoneg settings */
-	ks->base.autoneg = ((hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
-			    AUTONEG_ENABLE : AUTONEG_DISABLE);
+	ks->base.autoneg = (hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+		AUTONEG_ENABLE : AUTONEG_DISABLE;
 
 	/* set media type settings */
 	switch (vsi->port_info->phy.media_type) {
@@ -417,40 +2140,403 @@
 	/* flow control is symmetric and always supported */
 	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
 
-	switch (vsi->port_info->fc.req_mode) {
-	case ICE_FC_FULL:
-		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
-		break;
-	case ICE_FC_TX_PAUSE:
-		ethtool_link_ksettings_add_link_mode(ks, advertising,
-						     Asym_Pause);
-		break;
-	case ICE_FC_RX_PAUSE:
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+	if (status) {
+		err = -EIO;
+		goto done;
+	}
+
+	/* Set the advertised flow control based on the PHY capability */
+	if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
+	    (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) {
 		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     Asym_Pause);
-		break;
-	case ICE_FC_PFC:
-	default:
+	} else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else {
 		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
 		ethtool_link_ksettings_del_link_mode(ks, advertising,
 						     Asym_Pause);
-		break;
 	}
 
-	return 0;
+	/* Set advertised FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP, caps, NULL);
+	if (status) {
+		err = -EIO;
+		goto done;
+	}
+
+	/* Set supported FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
 }
 
 /**
- * ice_get_rxnfc - command to get RX flow classification rules
+ * ice_ksettings_find_adv_link_speed - Find advertising link speed
+ * @ks: ethtool ksettings
+ */
+static u16
+ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
+{
+	u16 adv_link_speed = 0;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  5000baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseLR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseKR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_25GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseLR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseKR4_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseCR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseKR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseSR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseLR4_ER4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseKR4_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
+
+	return adv_link_speed;
+}
+
+/**
+ * ice_setup_autoneg
+ * @p: port info
+ * @ks: ethtool_link_ksettings
+ * @config: configuration that will be sent down to FW
+ * @autoneg_enabled: autonegotiation is enabled or not
+ * @autoneg_changed: will there a change in autonegotiation
+ * @netdev: network interface device structure
+ *
+ * Setup PHY autonegotiation feature
+ */
+static int
+ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
+		  struct ice_aqc_set_phy_cfg_data *config,
+		  u8 autoneg_enabled, u8 *autoneg_changed,
+		  struct net_device *netdev)
+{
+	int err = 0;
+
+	*autoneg_changed = 0;
+
+	/* Check autoneg */
+	if (autoneg_enabled == AUTONEG_ENABLE) {
+		/* If autoneg was not already enabled */
+		if (!(p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)) {
+			/* If autoneg is not supported, return error */
+			if (!ethtool_link_ksettings_test_link_mode(ks,
+								   supported,
+								   Autoneg)) {
+				netdev_info(netdev, "Autoneg not supported on this phy.\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	} else {
+		/* If autoneg is currently enabled */
+		if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
+			/* If autoneg is supported 10GBASE_T is the only PHY
+			 * that can disable it, so otherwise return error
+			 */
+			if (ethtool_link_ksettings_test_link_mode(ks,
+								  supported,
+								  Autoneg)) {
+				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps &= ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ice_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Set speed/duplex per media_types advertised/forced
+ */
+static int
+ice_set_link_ksettings(struct net_device *netdev,
+		       const struct ethtool_link_ksettings *ks)
+{
+	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ethtool_link_ksettings safe_ks, copy_ks;
+	struct ice_aqc_get_phy_caps_data *abilities;
+	u16 adv_link_speed, curr_link_speed, idx;
+	struct ice_aqc_set_phy_cfg_data config;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_port_info *p;
+	u8 autoneg_changed = 0;
+	enum ice_status status;
+	u64 phy_type_high;
+	u64 phy_type_low;
+	int err = 0;
+	bool linkup;
+
+	p = np->vsi->port_info;
+
+	if (!p)
+		return -EOPNOTSUPP;
+
+	/* Check if this is LAN VSI */
+	ice_for_each_vsi(pf, idx)
+		if (pf->vsi[idx]->type == ICE_VSI_PF) {
+			if (np->vsi != pf->vsi[idx])
+				return -EOPNOTSUPP;
+			break;
+		}
+
+	if (p->phy.media_type != ICE_MEDIA_BASET &&
+	    p->phy.media_type != ICE_MEDIA_FIBER &&
+	    p->phy.media_type != ICE_MEDIA_BACKPLANE &&
+	    p->phy.media_type != ICE_MEDIA_DA &&
+	    p->phy.link_info.link_info & ICE_AQ_LINK_UP)
+		return -EOPNOTSUPP;
+
+	/* copy the ksettings to copy_ks to avoid modifying the original */
+	memcpy(&copy_ks, ks, sizeof(copy_ks));
+
+	/* save autoneg out of ksettings */
+	autoneg = copy_ks.base.autoneg;
+
+	memset(&safe_ks, 0, sizeof(safe_ks));
+
+	/* Get link modes supported by hardware.*/
+	ice_phy_type_to_ethtool(netdev, &safe_ks);
+
+	/* and check against modes requested by user.
+	 * Return an error if unsupported mode was set.
+	 */
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS))
+		return -EINVAL;
+
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(safe_ks));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	ice_get_link_ksettings(netdev, &safe_ks);
+
+	/* set autoneg back to what it currently is */
+	copy_ks.base.autoneg = safe_ks.base.autoneg;
+	/* we don't compare the speed */
+	copy_ks.base.speed = safe_ks.base.speed;
+
+	/* If copy_ks.base and safe_ks.base are not the same now, then they are
+	 * trying to set something that we do not support.
+	 */
+	if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base)))
+		return -EOPNOTSUPP;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
+	}
+
+	abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities),
+				 GFP_KERNEL);
+	if (!abilities)
+		return -ENOMEM;
+
+	/* Get the current PHY config */
+	status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_SW_CFG, abilities,
+				     NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Copy abilities to config in case autoneg is not set below */
+	memset(&config, 0, sizeof(config));
+	config.caps = abilities->caps & ~ICE_AQC_PHY_AN_MODE;
+	if (abilities->caps & ICE_AQC_PHY_AN_MODE)
+		config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	/* Check autoneg */
+	err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed,
+				netdev);
+
+	if (err)
+		goto done;
+
+	/* Call to get the current link speed */
+	p->phy.get_link_info = true;
+	status = ice_get_link_status(p, &linkup);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	curr_link_speed = p->phy.link_info.link_speed;
+	adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
+
+	/* If speed didn't get set, set it to what it currently is.
+	 * This is needed because if advertise is 0 (as it is when autoneg
+	 * is disabled) then speed won't get set.
+	 */
+	if (!adv_link_speed)
+		adv_link_speed = curr_link_speed;
+
+	/* Convert the advertise link speeds to their corresponded PHY_TYPE */
+	ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
+
+	if (!autoneg_changed && adv_link_speed == curr_link_speed) {
+		netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+		goto done;
+	}
+
+	/* copy over the rest of the abilities */
+	config.low_power_ctrl = abilities->low_power_ctrl;
+	config.eee_cap = abilities->eee_cap;
+	config.eeer_value = abilities->eeer_value;
+	config.link_fec_opt = abilities->link_fec_options;
+
+	/* save the requested speeds */
+	p->phy.link_info.req_speeds = adv_link_speed;
+
+	/* set link and auto negotiation so changes take effect */
+	config.caps |= ICE_AQ_PHY_ENA_LINK;
+
+	if (phy_type_low || phy_type_high) {
+		config.phy_type_high = cpu_to_le64(phy_type_high) &
+			abilities->phy_type_high;
+		config.phy_type_low = cpu_to_le64(phy_type_low) &
+			abilities->phy_type_low;
+	} else {
+		err = -EAGAIN;
+		netdev_info(netdev, "Nothing changed. No PHY_TYPE is corresponded to advertised link speed.\n");
+		goto done;
+	}
+
+	/* If link is up put link down */
+	if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) {
+		/* Tell the OS link is going down, the link will go
+		 * back up when fw says it is ready asynchronously
+		 */
+		ice_print_link_msg(np->vsi, false);
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+	}
+
+	/* make the aq call */
+	status = ice_aq_set_phy_cfg(&pf->hw, lport, &config, NULL);
+	if (status) {
+		netdev_info(netdev, "Set phy config failed,\n");
+		err = -EAGAIN;
+	}
+
+done:
+	devm_kfree(&pf->pdev->dev, abilities);
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+
+	return err;
+}
+
+/**
+ * ice_get_rxnfc - command to get Rx flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
  * @rule_locs: buffer to rturn Rx flow classification rules
  *
  * Returns Success if the command is supported.
  */
-static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
-			 u32 __always_unused *rule_locs)
+static int
+ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+	      u32 __always_unused *rule_locs)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -550,7 +2636,7 @@
 		    vsi->tx_rings[0]->count, new_tx_cnt);
 
 	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
-				sizeof(struct ice_ring), GFP_KERNEL);
+				sizeof(*tx_rings), GFP_KERNEL);
 	if (!tx_rings) {
 		err = -ENOMEM;
 		goto done;
@@ -582,7 +2668,7 @@
 		    vsi->rx_rings[0]->count, new_rx_cnt);
 
 	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
-				sizeof(struct ice_ring), GFP_KERNEL);
+				sizeof(*rx_rings), GFP_KERNEL);
 	if (!rx_rings) {
 		err = -ENOMEM;
 		goto done;
@@ -672,17 +2758,17 @@
 {
 	/* restart autonegotiation */
 	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_link_status *hw_link_info;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_port_info *pi;
 	enum ice_status status;
-	bool link_up;
 
 	pi = vsi->port_info;
-	hw_link_info = &pi->phy.link_info;
-	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+	/* If VSI state is up, then restart autoneg with link up */
+	if (!test_bit(__ICE_DOWN, vsi->back->state))
+		status = ice_aq_set_link_restart_an(pi, true, NULL);
+	else
+		status = ice_aq_set_link_restart_an(pi, false, NULL);
 
-	status = ice_aq_set_link_restart_an(pi, link_up, NULL);
 	if (status) {
 		netdev_info(netdev, "link restart failed, err %d aq_err %d\n",
 			    status, pi->hw->adminq.sq_last_status);
@@ -696,39 +2782,68 @@
  * ice_get_pauseparam - Get Flow Control status
  * @netdev: network interface device structure
  * @pause: ethernet pause (flow control) parameters
+ *
+ * Get requested flow control status from PHY capability.
+ * If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which
+ * is handled by ice_get_link_ksettings. ice_get_link_ksettings will report
+ * the negotiated Rx/Tx pause via lp_advertising.
  */
 static void
 ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_port_info *pi;
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	enum ice_status status;
 
-	pi = np->vsi->port_info;
-	pause->autoneg =
-		((pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) ?
-		 AUTONEG_ENABLE : AUTONEG_DISABLE);
+	/* Initialize pause params */
+	pause->rx_pause = 0;
+	pause->tx_pause = 0;
 
-	if (pi->fc.current_mode == ICE_FC_RX_PAUSE) {
-		pause->rx_pause = 1;
-	} else if (pi->fc.current_mode == ICE_FC_TX_PAUSE) {
+	dcbx_cfg = &pi->local_dcbx_cfg;
+
+	pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
+			     GFP_KERNEL);
+	if (!pcaps)
+		return;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+				     NULL);
+	if (status)
+		goto out;
+
+	pause->autoneg = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
+			AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (dcbx_cfg->pfc.pfcena)
+		/* PFC enabled so report LFC as off */
+		goto out;
+
+	if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
 		pause->tx_pause = 1;
-	} else if (pi->fc.current_mode == ICE_FC_FULL) {
+	if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
 		pause->rx_pause = 1;
-		pause->tx_pause = 1;
-	}
+
+out:
+	devm_kfree(&vsi->back->pdev->dev, pcaps);
 }
 
 /**
  * ice_set_pauseparam - Set Flow Control parameter
  * @netdev: network interface device structure
- * @pause: return tx/rx flow control status
+ * @pause: return Tx/Rx flow control status
  */
 static int
 ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *pcaps;
 	struct ice_link_status *hw_link_info;
 	struct ice_pf *pf = np->vsi->back;
+	struct ice_dcbx_cfg *dcbx_cfg;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_hw *hw = &pf->hw;
 	struct ice_port_info *pi;
@@ -736,9 +2851,11 @@
 	u8 aq_failures;
 	bool link_up;
 	int err = 0;
+	u32 is_an;
 
 	pi = vsi->port_info;
 	hw_link_info = &pi->phy.link_info;
+	dcbx_cfg = &pi->local_dcbx_cfg;
 	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
 
 	/* Changing the port's flow control is not supported if this isn't the
@@ -749,7 +2866,30 @@
 		return -EOPNOTSUPP;
 	}
 
-	if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+	/* Get pause param reports configured and negotiated flow control pause
+	 * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
+	 * defined get pause param pause->autoneg reports SW configured setting,
+	 * so compare pause->autoneg with SW configured to prevent the user from
+	 * using set pause param to chance autoneg.
+	 */
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+				     NULL);
+	if (status) {
+		kfree(pcaps);
+		return -EIO;
+	}
+
+	is_an = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
+			AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	kfree(pcaps);
+
+	if (pause->autoneg != is_an) {
 		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
 		return -EOPNOTSUPP;
 	}
@@ -761,6 +2901,10 @@
 		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
 	}
 
+	if (dcbx_cfg->pfc.pfcena) {
+		netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
+		return -EOPNOTSUPP;
+	}
 	if (pause->rx_pause && pause->tx_pause)
 		pi->fc.req_mode = ICE_FC_FULL;
 	else if (pause->rx_pause && !pause->tx_pause)
@@ -823,7 +2967,7 @@
 }
 
 /**
- * ice_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * ice_get_rxfh_indir_size - get the Rx flow hash indirection table size
  * @netdev: network interface device structure
  *
  * Returns the table size.
@@ -836,7 +2980,7 @@
 }
 
 /**
- * ice_get_rxfh - get the rx flow hash indirection table
+ * ice_get_rxfh - get the Rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
  * @key: hash key
@@ -883,17 +3027,18 @@
 }
 
 /**
- * ice_set_rxfh - set the rx flow hash indirection table
+ * ice_set_rxfh - set the Rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
  * @key: hash key
  * @hfunc: hash function
  *
- * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * Returns -EINVAL if the table specifies an invalid queue ID, otherwise
  * returns 0 after programming the table.
  */
-static int ice_set_rxfh(struct net_device *netdev, const u32 *indir,
-			const u8 *key, const u8 hfunc)
+static int
+ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
+	     const u8 hfunc)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -947,16 +3092,331 @@
 	return 0;
 }
 
+enum ice_container_type {
+	ICE_RX_CONTAINER,
+	ICE_TX_CONTAINER,
+};
+
+/**
+ * ice_get_rc_coalesce - get ITR values for specific ring container
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @c_type: container type, Rx or Tx
+ * @rc: ring container that the ITR values will come from
+ *
+ * Query the device for ice_ring_container specific ITR values. This is
+ * done per ice_ring_container because each q_vector can have 1 or more rings
+ * and all of said ring(s) will have the same ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
+		    struct ice_ring_container *rc)
+{
+	struct ice_pf *pf;
+
+	if (!rc->ring)
+		return -EINVAL;
+
+	pf = rc->ring->vsi->back;
+
+	switch (c_type) {
+	case ICE_RX_CONTAINER:
+		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
+		ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl;
+		break;
+	case ICE_TX_CONTAINER:
+		ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
+		ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		break;
+	default:
+		dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_q_coalesce - get a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue for getting ITR/INTRL (coalesce) settings
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Getting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+		if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * __ice_get_coalesce - get ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we return coalesce settings
+ * based on queue number 0, else use the actual q_num passed in.
+ */
+static int
+__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0)
+		q_num = 0;
+
+	if (ice_get_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	return __ice_get_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_get_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * ice_set_rc_coalesce - set ITR values for specific ring container
+ * @c_type: container type, Rx or Tx
+ * @ec: ethtool structure from user to update ITR settings
+ * @rc: ring container that the ITR values will come from
+ * @vsi: VSI associated to the ring container
+ *
+ * Set specific ITR values. This is done per ice_ring_container because each
+ * q_vector can have 1 or more rings and all of said ring(s) will have the same
+ * ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
+		    struct ice_ring_container *rc, struct ice_vsi *vsi)
+{
+	const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx";
+	u32 use_adaptive_coalesce, coalesce_usecs;
+	struct ice_pf *pf = vsi->back;
+	u16 itr_setting;
+
+	if (!rc->ring)
+		return -EINVAL;
+
+	switch (c_type) {
+	case ICE_RX_CONTAINER:
+		if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
+		    (ec->rx_coalesce_usecs_high &&
+		     ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
+			netdev_info(vsi->netdev,
+				    "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+				    c_type_str, pf->hw.intrl_gran,
+				    ICE_MAX_INTRL);
+			return -EINVAL;
+		}
+		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
+			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
+			wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
+			     ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
+						   pf->hw.intrl_gran));
+		}
+
+		use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+		coalesce_usecs = ec->rx_coalesce_usecs;
+
+		break;
+	case ICE_TX_CONTAINER:
+		if (ec->tx_coalesce_usecs_high) {
+			netdev_info(vsi->netdev,
+				    "setting %s-usecs-high is not supported\n",
+				    c_type_str);
+			return -EINVAL;
+		}
+
+		use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+		coalesce_usecs = ec->tx_coalesce_usecs;
+
+		break;
+	default:
+		dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
+		return -EINVAL;
+	}
+
+	itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+	if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
+		netdev_info(vsi->netdev,
+			    "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+			    c_type_str, c_type_str);
+		return -EINVAL;
+	}
+
+	if (coalesce_usecs > ICE_ITR_MAX) {
+		netdev_info(vsi->netdev,
+			    "Invalid value, %s-usecs range is 0-%d\n",
+			    c_type_str, ICE_ITR_MAX);
+		return -EINVAL;
+	}
+
+	/* hardware only supports an ITR granularity of 2us */
+	if (coalesce_usecs % 2 != 0) {
+		netdev_info(vsi->netdev,
+			    "Invalid value, %s-usecs must be even\n",
+			    c_type_str);
+		return -EINVAL;
+	}
+
+	if (use_adaptive_coalesce) {
+		rc->itr_setting |= ICE_ITR_DYNAMIC;
+	} else {
+		/* store user facing value how it was set */
+		rc->itr_setting = coalesce_usecs;
+		/* set to static and convert to value HW understands */
+		rc->target_itr =
+			ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting));
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_q_coalesce - set a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Setting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+
+		if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * __ice_set_coalesce - set ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we set the coalesce settings
+ * for all Tx/Rx queues, else use the actual q_num passed in.
+ */
+static int
+__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0) {
+		int i;
+
+		ice_for_each_q_vector(vsi, i) {
+			if (ice_set_q_coalesce(vsi, ec, i))
+				return -EINVAL;
+		}
+		goto set_complete;
+	}
+
+	if (ice_set_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+set_complete:
+
+	return 0;
+}
+
+static int
+ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	return __ice_set_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_set_coalesce(netdev, ec, q_num);
+}
+
 static const struct ethtool_ops ice_ethtool_ops = {
 	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
 	.get_drvinfo            = ice_get_drvinfo,
 	.get_regs_len           = ice_get_regs_len,
 	.get_regs               = ice_get_regs,
 	.get_msglevel           = ice_get_msglevel,
 	.set_msglevel           = ice_set_msglevel,
+	.self_test		= ice_self_test,
 	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_coalesce		= ice_get_coalesce,
+	.set_coalesce		= ice_set_coalesce,
 	.get_strings		= ice_get_strings,
+	.set_phys_id		= ice_set_phys_id,
 	.get_ethtool_stats      = ice_get_ethtool_stats,
+	.get_priv_flags		= ice_get_priv_flags,
+	.set_priv_flags		= ice_set_priv_flags,
 	.get_sset_count		= ice_get_sset_count,
 	.get_rxnfc		= ice_get_rxnfc,
 	.get_ringparam		= ice_get_ringparam,
@@ -968,8 +3428,40 @@
 	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
 	.get_rxfh		= ice_get_rxfh,
 	.set_rxfh		= ice_set_rxfh,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_per_queue_coalesce = ice_get_per_q_coalesce,
+	.set_per_queue_coalesce = ice_set_per_q_coalesce,
+	.get_fecparam		= ice_get_fecparam,
+	.set_fecparam		= ice_set_fecparam,
 };
 
+static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
+	.get_drvinfo		= ice_get_drvinfo,
+	.get_regs_len		= ice_get_regs_len,
+	.get_regs		= ice_get_regs,
+	.get_msglevel		= ice_get_msglevel,
+	.set_msglevel		= ice_set_msglevel,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_strings		= ice_get_strings,
+	.get_ethtool_stats	= ice_get_ethtool_stats,
+	.get_sset_count		= ice_get_sset_count,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+};
+
+/**
+ * ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_safe_mode_ops;
+}
+
 /**
  * ice_set_ethtool_ops - setup netdev ethtool ops
  * @netdev: network interface device structure

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
new file mode 100644
index 0000000..cbd53b5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c

@@ -0,0 +1,1549 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flex_pipe.h"
+
+/**
+ * ice_pkg_val_buf
+ * @buf: pointer to the ice buffer
+ *
+ * This helper function validates a buffer's header.
+ */
+static struct ice_buf_hdr *ice_pkg_val_buf(struct ice_buf *buf)
+{
+	struct ice_buf_hdr *hdr;
+	u16 section_count;
+	u16 data_end;
+
+	hdr = (struct ice_buf_hdr *)buf->buf;
+	/* verify data */
+	section_count = le16_to_cpu(hdr->section_count);
+	if (section_count < ICE_MIN_S_COUNT || section_count > ICE_MAX_S_COUNT)
+		return NULL;
+
+	data_end = le16_to_cpu(hdr->data_end);
+	if (data_end < ICE_MIN_S_DATA_END || data_end > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	return hdr;
+}
+
+/**
+ * ice_find_buf_table
+ * @ice_seg: pointer to the ice segment
+ *
+ * Returns the address of the buffer table within the ice segment.
+ */
+static struct ice_buf_table *ice_find_buf_table(struct ice_seg *ice_seg)
+{
+	struct ice_nvm_table *nvms;
+
+	nvms = (struct ice_nvm_table *)
+		(ice_seg->device_table +
+		 le32_to_cpu(ice_seg->device_table_count));
+
+	return (__force struct ice_buf_table *)
+		(nvms->vers + le32_to_cpu(nvms->table_count));
+}
+
+/**
+ * ice_pkg_enum_buf
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This function will enumerate all the buffers in the ice segment. The first
+ * call is made with the ice_seg parameter non-NULL; on subsequent calls,
+ * ice_seg is set to NULL which continues the enumeration. When the function
+ * returns a NULL pointer, then the end of the buffers has been reached, or an
+ * unexpected value has been detected (for example an invalid section count or
+ * an invalid buffer end value).
+ */
+static struct ice_buf_hdr *
+ice_pkg_enum_buf(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+	if (ice_seg) {
+		state->buf_table = ice_find_buf_table(ice_seg);
+		if (!state->buf_table)
+			return NULL;
+
+		state->buf_idx = 0;
+		return ice_pkg_val_buf(state->buf_table->buf_array);
+	}
+
+	if (++state->buf_idx < le32_to_cpu(state->buf_table->buf_count))
+		return ice_pkg_val_buf(state->buf_table->buf_array +
+				       state->buf_idx);
+	else
+		return NULL;
+}
+
+/**
+ * ice_pkg_advance_sect
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This helper function will advance the section within the ice segment,
+ * also advancing the buffer if needed.
+ */
+static bool
+ice_pkg_advance_sect(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+	if (!ice_seg && !state->buf)
+		return false;
+
+	if (!ice_seg && state->buf)
+		if (++state->sect_idx < le16_to_cpu(state->buf->section_count))
+			return true;
+
+	state->buf = ice_pkg_enum_buf(ice_seg, state);
+	if (!state->buf)
+		return false;
+
+	/* start of new buffer, reset section index */
+	state->sect_idx = 0;
+	return true;
+}
+
+/**
+ * ice_pkg_enum_section
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ *
+ * This function will enumerate all the sections of a particular type in the
+ * ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the matching
+ * sections has been reached.
+ */
+static void *
+ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+		     u32 sect_type)
+{
+	u16 offset, size;
+
+	if (ice_seg)
+		state->type = sect_type;
+
+	if (!ice_pkg_advance_sect(ice_seg, state))
+		return NULL;
+
+	/* scan for next matching section */
+	while (state->buf->section_entry[state->sect_idx].type !=
+	       cpu_to_le32(state->type))
+		if (!ice_pkg_advance_sect(NULL, state))
+			return NULL;
+
+	/* validate section */
+	offset = le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+	if (offset < ICE_MIN_S_OFF || offset > ICE_MAX_S_OFF)
+		return NULL;
+
+	size = le16_to_cpu(state->buf->section_entry[state->sect_idx].size);
+	if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ)
+		return NULL;
+
+	/* make sure the section fits in the buffer */
+	if (offset + size > ICE_PKG_BUF_SIZE)
+		return NULL;
+
+	state->sect_type =
+		le32_to_cpu(state->buf->section_entry[state->sect_idx].type);
+
+	/* calc pointer to this section */
+	state->sect = ((u8 *)state->buf) +
+		le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+
+	return state->sect;
+}
+
+/**
+ * ice_acquire_global_cfg_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the global config lock for reading
+ * or writing of the package. When attempting to obtain write access, the
+ * caller must check for the following two return values:
+ *
+ * ICE_SUCCESS        - Means the caller has acquired the global config lock
+ *                      and can perform writing of the package.
+ * ICE_ERR_AQ_NO_WORK - Indicates another driver has already written the
+ *                      package or has found that no update was necessary; in
+ *                      this case, the caller can just skip performing any
+ *                      update of the package.
+ */
+static enum ice_status
+ice_acquire_global_cfg_lock(struct ice_hw *hw,
+			    enum ice_aq_res_access_type access)
+{
+	enum ice_status status;
+
+	status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, access,
+				 ICE_GLOBAL_CFG_LOCK_TIMEOUT);
+
+	if (!status)
+		mutex_lock(&ice_global_cfg_lock_sw);
+	else if (status == ICE_ERR_AQ_NO_WORK)
+		ice_debug(hw, ICE_DBG_PKG,
+			  "Global config lock: No work to do\n");
+
+	return status;
+}
+
+/**
+ * ice_release_global_cfg_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the global config lock.
+ */
+static void ice_release_global_cfg_lock(struct ice_hw *hw)
+{
+	mutex_unlock(&ice_global_cfg_lock_sw);
+	ice_release_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID);
+}
+
+/**
+ * ice_aq_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer to transfer
+ * @buf_size: the size of the package buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Download Package (0x0C40)
+ */
+static enum ice_status
+ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+		    u16 buf_size, bool last_buf, u32 *error_offset,
+		    u32 *error_info, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_download_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == ICE_ERR_AQ_ERROR) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * ice_find_seg_in_pkg
+ * @hw: pointer to the hardware structure
+ * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK)
+ * @pkg_hdr: pointer to the package header to be searched
+ *
+ * This function searches a package file for a particular segment type. On
+ * success it returns a pointer to the segment header, otherwise it will
+ * return NULL.
+ */
+static struct ice_generic_seg_hdr *
+ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
+		    struct ice_pkg_hdr *pkg_hdr)
+{
+	u32 i;
+
+	ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n",
+		  pkg_hdr->format_ver.major, pkg_hdr->format_ver.minor,
+		  pkg_hdr->format_ver.update, pkg_hdr->format_ver.draft);
+
+	/* Search all package segments for the requested segment type */
+	for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) {
+		struct ice_generic_seg_hdr *seg;
+
+		seg = (struct ice_generic_seg_hdr *)
+			((u8 *)pkg_hdr + le32_to_cpu(pkg_hdr->seg_offset[i]));
+
+		if (le32_to_cpu(seg->seg_type) == seg_type)
+			return seg;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware. Metadata buffers are skipped, and the first metadata buffer
+ * found indicates that the rest of the buffers are all metadata buffers.
+ */
+static enum ice_status
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	enum ice_status status;
+	struct ice_buf_hdr *bh;
+	u32 offset, info, i;
+
+	if (!bufs || !count)
+		return ICE_ERR_PARAM;
+
+	/* If the first buffer's first section has its metadata bit set
+	 * then there are no buffers to be downloaded, and the operation is
+	 * considered a success.
+	 */
+	bh = (struct ice_buf_hdr *)bufs;
+	if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+		return 0;
+
+	/* reset pkg_dwnld_status in case this function is called in the
+	 * reset/rebuild flow
+	 */
+	hw->pkg_dwnld_status = ICE_AQ_RC_OK;
+
+	status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+	if (status) {
+		if (status == ICE_ERR_AQ_NO_WORK)
+			hw->pkg_dwnld_status = ICE_AQ_RC_EEXIST;
+		else
+			hw->pkg_dwnld_status = hw->adminq.sq_last_status;
+		return status;
+	}
+
+	for (i = 0; i < count; i++) {
+		bool last = ((i + 1) == count);
+
+		if (!last) {
+			/* check next buffer for metadata flag */
+			bh = (struct ice_buf_hdr *)(bufs + i + 1);
+
+			/* A set metadata flag in the next buffer will signal
+			 * that the current buffer will be the last buffer
+			 * downloaded
+			 */
+			if (le16_to_cpu(bh->section_count))
+				if (le32_to_cpu(bh->section_entry[0].type) &
+				    ICE_METADATA_BUF)
+					last = true;
+		}
+
+		bh = (struct ice_buf_hdr *)(bufs + i);
+
+		status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last,
+					     &offset, &info, NULL);
+
+		/* Save AQ status from download package */
+		hw->pkg_dwnld_status = hw->adminq.sq_last_status;
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Pkg download failed: err %d off %d inf %d\n",
+				  status, offset, info);
+
+			break;
+		}
+
+		if (last)
+			break;
+	}
+
+	ice_release_global_cfg_lock(hw);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_pkg_info_list
+ * @hw: pointer to the hardware structure
+ * @pkg_info: the buffer which will receive the information list
+ * @buf_size: the size of the pkg_info information buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Package Info List (0x0C43)
+ */
+static enum ice_status
+ice_aq_get_pkg_info_list(struct ice_hw *hw,
+			 struct ice_aqc_get_pkg_info_resp *pkg_info,
+			 u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_pkg_info_list);
+
+	return ice_aq_send_cmd(hw, &desc, pkg_info, buf_size, cd);
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_status
+ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+	struct ice_buf_table *ice_buf_tbl;
+
+	ice_debug(hw, ICE_DBG_PKG, "Segment version: %d.%d.%d.%d\n",
+		  ice_seg->hdr.seg_ver.major, ice_seg->hdr.seg_ver.minor,
+		  ice_seg->hdr.seg_ver.update, ice_seg->hdr.seg_ver.draft);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+		  le32_to_cpu(ice_seg->hdr.seg_type),
+		  le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_name);
+
+	ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+		  le32_to_cpu(ice_buf_tbl->buf_count));
+
+	return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+				  le32_to_cpu(ice_buf_tbl->buf_count));
+}
+
+/**
+ * ice_init_pkg_info
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to the driver's package hdr
+ *
+ * Saves off the package details into the HW structure.
+ */
+static enum ice_status
+ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+{
+	struct ice_global_metadata_seg *meta_seg;
+	struct ice_generic_seg_hdr *seg_hdr;
+
+	if (!pkg_hdr)
+		return ICE_ERR_PARAM;
+
+	meta_seg = (struct ice_global_metadata_seg *)
+		   ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr);
+	if (meta_seg) {
+		hw->pkg_ver = meta_seg->pkg_ver;
+		memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name));
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n",
+			  meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor,
+			  meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft,
+			  meta_seg->pkg_name);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Did not find metadata segment in driver package\n");
+		return ICE_ERR_CFG;
+	}
+
+	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
+	if (seg_hdr) {
+		hw->ice_pkg_ver = seg_hdr->seg_ver;
+		memcpy(hw->ice_pkg_name, seg_hdr->seg_name,
+		       sizeof(hw->ice_pkg_name));
+
+		ice_debug(hw, ICE_DBG_PKG, "Ice Pkg: %d.%d.%d.%d, %s\n",
+			  seg_hdr->seg_ver.major, seg_hdr->seg_ver.minor,
+			  seg_hdr->seg_ver.update, seg_hdr->seg_ver.draft,
+			  seg_hdr->seg_name);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Did not find ice segment in driver package\n");
+		return ICE_ERR_CFG;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_pkg_info
+ * @hw: pointer to the hardware structure
+ *
+ * Store details of the package currently loaded in HW into the HW structure.
+ */
+static enum ice_status ice_get_pkg_info(struct ice_hw *hw)
+{
+	struct ice_aqc_get_pkg_info_resp *pkg_info;
+	enum ice_status status;
+	u16 size;
+	u32 i;
+
+	size = sizeof(*pkg_info) + (sizeof(pkg_info->pkg_info[0]) *
+				    (ICE_PKG_CNT - 1));
+	pkg_info = kzalloc(size, GFP_KERNEL);
+	if (!pkg_info)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_get_pkg_info_list(hw, pkg_info, size, NULL);
+	if (status)
+		goto init_pkg_free_alloc;
+
+	for (i = 0; i < le32_to_cpu(pkg_info->count); i++) {
+#define ICE_PKG_FLAG_COUNT	4
+		char flags[ICE_PKG_FLAG_COUNT + 1] = { 0 };
+		u8 place = 0;
+
+		if (pkg_info->pkg_info[i].is_active) {
+			flags[place++] = 'A';
+			hw->active_pkg_ver = pkg_info->pkg_info[i].ver;
+			memcpy(hw->active_pkg_name,
+			       pkg_info->pkg_info[i].name,
+			       sizeof(hw->active_pkg_name));
+			hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm;
+		}
+		if (pkg_info->pkg_info[i].is_active_at_boot)
+			flags[place++] = 'B';
+		if (pkg_info->pkg_info[i].is_modified)
+			flags[place++] = 'M';
+		if (pkg_info->pkg_info[i].is_in_nvm)
+			flags[place++] = 'N';
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg[%d]: %d.%d.%d.%d,%s,%s\n",
+			  i, pkg_info->pkg_info[i].ver.major,
+			  pkg_info->pkg_info[i].ver.minor,
+			  pkg_info->pkg_info[i].ver.update,
+			  pkg_info->pkg_info[i].ver.draft,
+			  pkg_info->pkg_info[i].name, flags);
+	}
+
+init_pkg_free_alloc:
+	kfree(pkg_info);
+
+	return status;
+}
+
+/**
+ * ice_verify_pkg - verify package
+ * @pkg: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * Verifies various attributes of the package file, including length, format
+ * version, and the requirement of at least one segment.
+ */
+static enum ice_status ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len)
+{
+	u32 seg_count;
+	u32 i;
+
+	if (len < sizeof(*pkg))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	if (pkg->format_ver.major != ICE_PKG_FMT_VER_MAJ ||
+	    pkg->format_ver.minor != ICE_PKG_FMT_VER_MNR ||
+	    pkg->format_ver.update != ICE_PKG_FMT_VER_UPD ||
+	    pkg->format_ver.draft != ICE_PKG_FMT_VER_DFT)
+		return ICE_ERR_CFG;
+
+	/* pkg must have at least one segment */
+	seg_count = le32_to_cpu(pkg->seg_count);
+	if (seg_count < 1)
+		return ICE_ERR_CFG;
+
+	/* make sure segment array fits in package length */
+	if (len < sizeof(*pkg) + ((seg_count - 1) * sizeof(pkg->seg_offset)))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	/* all segments must fit within length */
+	for (i = 0; i < seg_count; i++) {
+		u32 off = le32_to_cpu(pkg->seg_offset[i]);
+		struct ice_generic_seg_hdr *seg;
+
+		/* segment header must fit */
+		if (len < off + sizeof(*seg))
+			return ICE_ERR_BUF_TOO_SHORT;
+
+		seg = (struct ice_generic_seg_hdr *)((u8 *)pkg + off);
+
+		/* segment body must fit */
+		if (len < off + le32_to_cpu(seg->seg_size))
+			return ICE_ERR_BUF_TOO_SHORT;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_free_seg - free package segment pointer
+ * @hw: pointer to the hardware structure
+ *
+ * Frees the package segment pointer in the proper manner, depending on if the
+ * segment was allocated or just the passed in pointer was stored.
+ */
+void ice_free_seg(struct ice_hw *hw)
+{
+	if (hw->pkg_copy) {
+		devm_kfree(ice_hw_to_dev(hw), hw->pkg_copy);
+		hw->pkg_copy = NULL;
+		hw->pkg_size = 0;
+	}
+	hw->seg = NULL;
+}
+
+/**
+ * ice_init_pkg_regs - initialize additional package registers
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_pkg_regs(struct ice_hw *hw)
+{
+#define ICE_SW_BLK_INP_MASK_L 0xFFFFFFFF
+#define ICE_SW_BLK_INP_MASK_H 0x0000FFFF
+#define ICE_SW_BLK_IDX	0
+
+	/* setup Switch block input mask, which is 48-bits in two parts */
+	wr32(hw, GL_PREEXT_L2_PMASK0(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_L);
+	wr32(hw, GL_PREEXT_L2_PMASK1(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_H);
+}
+
+/**
+ * ice_chk_pkg_version - check package version for compatibility with driver
+ * @pkg_ver: pointer to a version structure to check
+ *
+ * Check to make sure that the package about to be downloaded is compatible with
+ * the driver. To be compatible, the major and minor components of the package
+ * version must match our ICE_PKG_SUPP_VER_MAJ and ICE_PKG_SUPP_VER_MNR
+ * definitions.
+ */
+static enum ice_status ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver)
+{
+	if (pkg_ver->major != ICE_PKG_SUPP_VER_MAJ ||
+	    pkg_ver->minor != ICE_PKG_SUPP_VER_MNR)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	return 0;
+}
+
+/**
+ * ice_init_pkg - initialize/download package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function initializes a package. The package contains HW tables
+ * required to do packet processing. First, the function extracts package
+ * information such as version. Then it finds the ice configuration segment
+ * within the package; this function then saves a copy of the segment pointer
+ * within the supplied package buffer. Next, the function will cache any hints
+ * from the package, followed by downloading the package itself. Note, that if
+ * a previous PF driver has already downloaded the package successfully, then
+ * the current driver will not have to download the package again.
+ *
+ * The local package contents will be used to query default behavior and to
+ * update specific sections of the HW's version of the package (e.g. to update
+ * the parse graph to understand new protocols).
+ *
+ * This function stores a pointer to the package buffer memory, and it is
+ * expected that the supplied buffer will not be freed immediately. If the
+ * package buffer needs to be freed, such as when read from a file, use
+ * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this
+ * case.
+ */
+enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
+{
+	struct ice_pkg_hdr *pkg;
+	enum ice_status status;
+	struct ice_seg *seg;
+
+	if (!buf || !len)
+		return ICE_ERR_PARAM;
+
+	pkg = (struct ice_pkg_hdr *)buf;
+	status = ice_verify_pkg(pkg, len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "failed to verify pkg (err: %d)\n",
+			  status);
+		return status;
+	}
+
+	/* initialize package info */
+	status = ice_init_pkg_info(hw, pkg);
+	if (status)
+		return status;
+
+	/* before downloading the package, check package version for
+	 * compatibility with driver
+	 */
+	status = ice_chk_pkg_version(&hw->pkg_ver);
+	if (status)
+		return status;
+
+	/* find segment in given package */
+	seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg);
+	if (!seg) {
+		ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* download package */
+	status = ice_download_pkg(hw, seg);
+	if (status == ICE_ERR_AQ_NO_WORK) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "package previously loaded - no work.\n");
+		status = 0;
+	}
+
+	/* Get information on the package currently loaded in HW, then make sure
+	 * the driver is compatible with this version.
+	 */
+	if (!status) {
+		status = ice_get_pkg_info(hw);
+		if (!status)
+			status = ice_chk_pkg_version(&hw->active_pkg_ver);
+	}
+
+	if (!status) {
+		hw->seg = seg;
+		/* on successful package download update other required
+		 * registers to support the package and fill HW tables
+		 * with package content.
+		 */
+		ice_init_pkg_regs(hw);
+		ice_fill_blk_tbls(hw);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n",
+			  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_copy_and_init_pkg - initialize/download a copy of the package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function copies the package buffer, and then calls ice_init_pkg() to
+ * initialize the copied package contents.
+ *
+ * The copying is necessary if the package buffer supplied is constant, or if
+ * the memory may disappear shortly after calling this function.
+ *
+ * If the package buffer resides in the data segment and can be modified, the
+ * caller is free to use ice_init_pkg() instead of ice_copy_and_init_pkg().
+ *
+ * However, if the package buffer needs to be copied first, such as when being
+ * read from a file, the caller should use ice_copy_and_init_pkg().
+ *
+ * This function will first copy the package buffer, before calling
+ * ice_init_pkg(). The caller is free to immediately destroy the original
+ * package buffer, as the new copy will be managed by this function and
+ * related routines.
+ */
+enum ice_status ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len)
+{
+	enum ice_status status;
+	u8 *buf_copy;
+
+	if (!buf || !len)
+		return ICE_ERR_PARAM;
+
+	buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+
+	status = ice_init_pkg(hw, buf_copy, len);
+	if (status) {
+		/* Free the copy, since we failed to initialize the package */
+		devm_kfree(ice_hw_to_dev(hw), buf_copy);
+	} else {
+		/* Track the copied pkg so we can free it later */
+		hw->pkg_copy = buf_copy;
+		hw->pkg_size = len;
+	}
+
+	return status;
+}
+
+/* PTG Management */
+
+/**
+ * ice_ptg_find_ptype - Search for packet type group using packet type (ptype)
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to search for
+ * @ptg: pointer to variable that receives the PTG
+ *
+ * This function will search the PTGs for a particular ptype, returning the
+ * PTG ID that contains it through the PTG parameter, with the value of
+ * ICE_DEFAULT_PTG (0) meaning it is part the default PTG.
+ */
+static enum ice_status
+ice_ptg_find_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 *ptg)
+{
+	if (ptype >= ICE_XLT1_CNT || !ptg)
+		return ICE_ERR_PARAM;
+
+	*ptg = hw->blk[blk].xlt1.ptypes[ptype].ptg;
+	return 0;
+}
+
+/**
+ * ice_ptg_alloc_val - Allocates a new packet type group ID by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptg: the PTG to allocate
+ *
+ * This function allocates a given packet type group ID specified by the PTG
+ * parameter.
+ */
+static void ice_ptg_alloc_val(struct ice_hw *hw, enum ice_block blk, u8 ptg)
+{
+	hw->blk[blk].xlt1.ptg_tbl[ptg].in_use = true;
+}
+
+/**
+ * ice_ptg_remove_ptype - Removes ptype from a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to remove
+ * @ptg: the PTG to remove the ptype from
+ *
+ * This function will remove the ptype from the specific PTG, and move it to
+ * the default PTG (ICE_DEFAULT_PTG).
+ */
+static enum ice_status
+ice_ptg_remove_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	struct ice_ptg_ptype **ch;
+	struct ice_ptg_ptype *p;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	/* Should not happen if .in_use is set, bad config */
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype)
+		return ICE_ERR_CFG;
+
+	/* find the ptype within this PTG, and bypass the link over it */
+	p = hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	ch = &hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	while (p) {
+		if (ptype == (p - hw->blk[blk].xlt1.ptypes)) {
+			*ch = p->next_ptype;
+			break;
+		}
+
+		ch = &p->next_ptype;
+		p = p->next_ptype;
+	}
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ICE_DEFAULT_PTG;
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_ptg_add_mv_ptype - Adds/moves ptype to a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to add or move
+ * @ptg: the PTG to add or move the ptype to
+ *
+ * This function will either add or move a ptype to a particular PTG depending
+ * on if the ptype is already part of another group. Note that using a
+ * a destination PTG ID of ICE_DEFAULT_PTG (0) will move the ptype to the
+ * default PTG.
+ */
+static enum ice_status
+ice_ptg_add_mv_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	enum ice_status status;
+	u8 original_ptg;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use && ptg != ICE_DEFAULT_PTG)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	status = ice_ptg_find_ptype(hw, blk, ptype, &original_ptg);
+	if (status)
+		return status;
+
+	/* Is ptype already in the correct PTG? */
+	if (original_ptg == ptg)
+		return 0;
+
+	/* Remove from original PTG and move back to the default PTG */
+	if (original_ptg != ICE_DEFAULT_PTG)
+		ice_ptg_remove_ptype(hw, blk, ptype, original_ptg);
+
+	/* Moving to default PTG? Then we're done with this request */
+	if (ptg == ICE_DEFAULT_PTG)
+		return 0;
+
+	/* Add ptype to PTG at beginning of list */
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype =
+		hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype =
+		&hw->blk[blk].xlt1.ptypes[ptype];
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ptg;
+	hw->blk[blk].xlt1.t[ptype] = ptg;
+
+	return 0;
+}
+
+/* Block / table size info */
+struct ice_blk_size_details {
+	u16 xlt1;			/* # XLT1 entries */
+	u16 xlt2;			/* # XLT2 entries */
+	u16 prof_tcam;			/* # profile ID TCAM entries */
+	u16 prof_id;			/* # profile IDs */
+	u8 prof_cdid_bits;		/* # CDID one-hot bits used in key */
+	u16 prof_redir;			/* # profile redirection entries */
+	u16 es;				/* # extraction sequence entries */
+	u16 fvw;			/* # field vector words */
+	u8 overwrite;			/* overwrite existing entries allowed */
+	u8 reverse;			/* reverse FV order */
+};
+
+static const struct ice_blk_size_details blk_sizes[ICE_BLK_COUNT] = {
+	/**
+	 * Table Definitions
+	 * XLT1 - Number of entries in XLT1 table
+	 * XLT2 - Number of entries in XLT2 table
+	 * TCAM - Number of entries Profile ID TCAM table
+	 * CDID - Control Domain ID of the hardware block
+	 * PRED - Number of entries in the Profile Redirection Table
+	 * FV   - Number of entries in the Field Vector
+	 * FVW  - Width (in WORDs) of the Field Vector
+	 * OVR  - Overwrite existing table entries
+	 * REV  - Reverse FV
+	 */
+	/*          XLT1        , XLT2        ,TCAM, PID,CDID,PRED,   FV, FVW */
+	/*          Overwrite   , Reverse FV */
+	/* SW  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 256,   0,  256, 256,  48,
+		    false, false },
+	/* ACL */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  32,
+		    false, false },
+	/* FD  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    false, true  },
+	/* RSS */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    true,  true  },
+	/* PE  */ { ICE_XLT1_CNT, ICE_XLT2_CNT,  64,  32,   0,   32,  32,  24,
+		    false, false },
+};
+
+enum ice_sid_all {
+	ICE_SID_XLT1_OFF = 0,
+	ICE_SID_XLT2_OFF,
+	ICE_SID_PR_OFF,
+	ICE_SID_PR_REDIR_OFF,
+	ICE_SID_ES_OFF,
+	ICE_SID_OFF_COUNT,
+};
+
+/* VSIG Management */
+
+/**
+ * ice_vsig_find_vsi - find a VSIG that contains a specified VSI
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI of interest
+ * @vsig: pointer to receive the VSI group
+ *
+ * This function will lookup the VSI entry in the XLT2 list and return
+ * the VSI group its associated with.
+ */
+static enum ice_status
+ice_vsig_find_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 *vsig)
+{
+	if (!vsig || vsi >= ICE_MAX_VSI)
+		return ICE_ERR_PARAM;
+
+	/* As long as there's a default or valid VSIG associated with the input
+	 * VSI, the functions returns a success. Any handling of VSIG will be
+	 * done by the following add, update or remove functions.
+	 */
+	*vsig = hw->blk[blk].xlt2.vsis[vsi].vsig;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_alloc_val - allocate a new VSIG by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: the VSIG to allocate
+ *
+ * This function will allocate a given VSIG specified by the VSIG parameter.
+ */
+static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) {
+		INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+		hw->blk[blk].xlt2.vsig_tbl[idx].in_use = true;
+	}
+
+	return ICE_VSIG_VALUE(idx, hw->pf_id);
+}
+
+/**
+ * ice_vsig_remove_vsi - remove VSI from VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to remove
+ * @vsig: VSI group to remove from
+ *
+ * The function will remove the input VSI from its VSI group and move it
+ * to the DEFAULT_VSIG.
+ */
+static enum ice_status
+ice_vsig_remove_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi **vsi_head, *vsi_cur, *vsi_tgt;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	/* entry already in default VSIG, don't have to remove */
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	vsi_head = &hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	if (!(*vsi_head))
+		return ICE_ERR_CFG;
+
+	vsi_tgt = &hw->blk[blk].xlt2.vsis[vsi];
+	vsi_cur = (*vsi_head);
+
+	/* iterate the VSI list, skip over the entry to be removed */
+	while (vsi_cur) {
+		if (vsi_tgt == vsi_cur) {
+			(*vsi_head) = vsi_cur->next_vsi;
+			break;
+		}
+		vsi_head = &vsi_cur->next_vsi;
+		vsi_cur = vsi_cur->next_vsi;
+	}
+
+	/* verify if VSI was removed from group list */
+	if (!vsi_cur)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	vsi_cur->vsig = ICE_DEFAULT_VSIG;
+	vsi_cur->changed = 1;
+	vsi_cur->next_vsi = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_add_mv_vsi - add or move a VSI to a VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to move
+ * @vsig: destination VSI group
+ *
+ * This function will move or add the input VSI to the target VSIG.
+ * The function will find the original VSIG the VSI belongs to and
+ * move the entry to the DEFAULT_VSIG, update the original VSIG and
+ * then move entry to the new VSIG.
+ */
+static enum ice_status
+ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi *tmp;
+	enum ice_status status;
+	u16 orig_vsig, idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	/* if VSIG not in use and VSIG is not default type this VSIG
+	 * doesn't exist.
+	 */
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use &&
+	    vsig != ICE_DEFAULT_VSIG)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (status)
+		return status;
+
+	/* no update required if vsigs match */
+	if (orig_vsig == vsig)
+		return 0;
+
+	if (orig_vsig != ICE_DEFAULT_VSIG) {
+		/* remove entry from orig_vsig and add to default VSIG */
+		status = ice_vsig_remove_vsi(hw, blk, vsi, orig_vsig);
+		if (status)
+			return status;
+	}
+
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	/* Create VSI entry and add VSIG and prop_mask values */
+	hw->blk[blk].xlt2.vsis[vsi].vsig = vsig;
+	hw->blk[blk].xlt2.vsis[vsi].changed = 1;
+
+	/* Add new entry to the head of the VSIG list */
+	tmp = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi =
+		&hw->blk[blk].xlt2.vsis[vsi];
+	hw->blk[blk].xlt2.vsis[vsi].next_vsi = tmp;
+	hw->blk[blk].xlt2.t[vsi] = vsig;
+
+	return 0;
+}
+
+/* Block / table section IDs */
+static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = {
+	/* SWITCH */
+	{	ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW
+	},
+
+	/* ACL */
+	{	ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL
+	},
+
+	/* FD */
+	{	ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD
+	},
+
+	/* RSS */
+	{	ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS
+	},
+
+	/* PE */
+	{	ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE
+	}
+};
+
+/**
+ * ice_init_sw_xlt1_db - init software XLT1 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt1_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 pt;
+
+	for (pt = 0; pt < hw->blk[blk].xlt1.count; pt++) {
+		u8 ptg;
+
+		ptg = hw->blk[blk].xlt1.t[pt];
+		if (ptg != ICE_DEFAULT_PTG) {
+			ice_ptg_alloc_val(hw, blk, ptg);
+			ice_ptg_add_mv_ptype(hw, blk, pt, ptg);
+		}
+	}
+}
+
+/**
+ * ice_init_sw_xlt2_db - init software XLT2 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt2_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 vsi;
+
+	for (vsi = 0; vsi < hw->blk[blk].xlt2.count; vsi++) {
+		u16 vsig;
+
+		vsig = hw->blk[blk].xlt2.t[vsi];
+		if (vsig) {
+			ice_vsig_alloc_val(hw, blk, vsig);
+			ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+			/* no changes at this time, since this has been
+			 * initialized from the original package
+			 */
+			hw->blk[blk].xlt2.vsis[vsi].changed = 0;
+		}
+	}
+}
+
+/**
+ * ice_init_sw_db - init software database from HW tables
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_sw_db(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		ice_init_sw_xlt1_db(hw, (enum ice_block)i);
+		ice_init_sw_xlt2_db(hw, (enum ice_block)i);
+	}
+}
+
+/**
+ * ice_fill_tbl - Reads content of a single table type into database
+ * @hw: pointer to the hardware structure
+ * @block_id: Block ID of the table to copy
+ * @sid: Section ID of the table to copy
+ *
+ * Will attempt to read the entire content of a given table of a single block
+ * into the driver database. We assume that the buffer will always
+ * be as large or larger than the data contained in the package. If
+ * this condition is not met, there is most likely an error in the package
+ * contents.
+ */
+static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
+{
+	u32 dst_len, sect_len, offset = 0;
+	struct ice_prof_redir_section *pr;
+	struct ice_prof_id_section *pid;
+	struct ice_xlt1_section *xlt1;
+	struct ice_xlt2_section *xlt2;
+	struct ice_sw_fv_section *es;
+	struct ice_pkg_enum state;
+	u8 *src, *dst;
+	void *sect;
+
+	/* if the HW segment pointer is null then the first iteration of
+	 * ice_pkg_enum_section() will fail. In this case the HW tables will
+	 * not be filled and return success.
+	 */
+	if (!hw->seg) {
+		ice_debug(hw, ICE_DBG_PKG, "hw->seg is NULL, tables are not filled\n");
+		return;
+	}
+
+	memset(&state, 0, sizeof(state));
+
+	sect = ice_pkg_enum_section(hw->seg, &state, sid);
+
+	while (sect) {
+		switch (sid) {
+		case ICE_SID_XLT1_SW:
+		case ICE_SID_XLT1_FD:
+		case ICE_SID_XLT1_RSS:
+		case ICE_SID_XLT1_ACL:
+		case ICE_SID_XLT1_PE:
+			xlt1 = (struct ice_xlt1_section *)sect;
+			src = xlt1->value;
+			sect_len = le16_to_cpu(xlt1->count) *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			dst = hw->blk[block_id].xlt1.t;
+			dst_len = hw->blk[block_id].xlt1.count *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			break;
+		case ICE_SID_XLT2_SW:
+		case ICE_SID_XLT2_FD:
+		case ICE_SID_XLT2_RSS:
+		case ICE_SID_XLT2_ACL:
+		case ICE_SID_XLT2_PE:
+			xlt2 = (struct ice_xlt2_section *)sect;
+			src = (__force u8 *)xlt2->value;
+			sect_len = le16_to_cpu(xlt2->count) *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			dst = (u8 *)hw->blk[block_id].xlt2.t;
+			dst_len = hw->blk[block_id].xlt2.count *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			break;
+		case ICE_SID_PROFID_TCAM_SW:
+		case ICE_SID_PROFID_TCAM_FD:
+		case ICE_SID_PROFID_TCAM_RSS:
+		case ICE_SID_PROFID_TCAM_ACL:
+		case ICE_SID_PROFID_TCAM_PE:
+			pid = (struct ice_prof_id_section *)sect;
+			src = (u8 *)pid->entry;
+			sect_len = le16_to_cpu(pid->count) *
+				sizeof(*hw->blk[block_id].prof.t);
+			dst = (u8 *)hw->blk[block_id].prof.t;
+			dst_len = hw->blk[block_id].prof.count *
+				sizeof(*hw->blk[block_id].prof.t);
+			break;
+		case ICE_SID_PROFID_REDIR_SW:
+		case ICE_SID_PROFID_REDIR_FD:
+		case ICE_SID_PROFID_REDIR_RSS:
+		case ICE_SID_PROFID_REDIR_ACL:
+		case ICE_SID_PROFID_REDIR_PE:
+			pr = (struct ice_prof_redir_section *)sect;
+			src = pr->redir_value;
+			sect_len = le16_to_cpu(pr->count) *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			dst = hw->blk[block_id].prof_redir.t;
+			dst_len = hw->blk[block_id].prof_redir.count *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			break;
+		case ICE_SID_FLD_VEC_SW:
+		case ICE_SID_FLD_VEC_FD:
+		case ICE_SID_FLD_VEC_RSS:
+		case ICE_SID_FLD_VEC_ACL:
+		case ICE_SID_FLD_VEC_PE:
+			es = (struct ice_sw_fv_section *)sect;
+			src = (u8 *)es->fv;
+			sect_len = (u32)(le16_to_cpu(es->count) *
+					 hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			dst = (u8 *)hw->blk[block_id].es.t;
+			dst_len = (u32)(hw->blk[block_id].es.count *
+					hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			break;
+		default:
+			return;
+		}
+
+		/* if the section offset exceeds destination length, terminate
+		 * table fill.
+		 */
+		if (offset > dst_len)
+			return;
+
+		/* if the sum of section size and offset exceed destination size
+		 * then we are out of bounds of the HW table size for that PF.
+		 * Changing section length to fill the remaining table space
+		 * of that PF.
+		 */
+		if ((offset + sect_len) > dst_len)
+			sect_len = dst_len - offset;
+
+		memcpy(dst + offset, src, sect_len);
+		offset += sect_len;
+		sect = ice_pkg_enum_section(NULL, &state, sid);
+	}
+}
+
+/**
+ * ice_fill_blk_tbls - Read package context for tables
+ * @hw: pointer to the hardware structure
+ *
+ * Reads the current package contents and populates the driver
+ * database with the data iteratively for all advanced feature
+ * blocks. Assume that the HW tables have been allocated.
+ */
+void ice_fill_blk_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		enum ice_block blk_id = (enum ice_block)i;
+
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt1.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt2.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof_redir.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].es.sid);
+	}
+
+	ice_init_sw_db(hw);
+}
+
+/**
+ * ice_free_hw_tbls - free hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+void ice_free_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		hw->blk[i].is_list_init = false;
+
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsig_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsis);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof_redir.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+	}
+
+	memset(hw->blk, 0, sizeof(hw->blk));
+}
+
+/**
+ * ice_clear_hw_tbls - clear HW tables and flow profiles
+ * @hw: pointer to the hardware structure
+ */
+void ice_clear_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+
+		memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes));
+		memset(xlt1->ptg_tbl, 0,
+		       ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl));
+		memset(xlt1->t, 0, xlt1->count * sizeof(*xlt1->t));
+
+		memset(xlt2->vsis, 0, xlt2->count * sizeof(*xlt2->vsis));
+		memset(xlt2->vsig_tbl, 0,
+		       xlt2->count * sizeof(*xlt2->vsig_tbl));
+		memset(xlt2->t, 0, xlt2->count * sizeof(*xlt2->t));
+
+		memset(prof->t, 0, prof->count * sizeof(*prof->t));
+		memset(prof_redir->t, 0,
+		       prof_redir->count * sizeof(*prof_redir->t));
+
+		memset(es->t, 0, es->count * sizeof(*es->t));
+		memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
+		memset(es->written, 0, es->count * sizeof(*es->written));
+	}
+}
+
+/**
+ * ice_init_hw_tbls - init hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+		u16 j;
+
+		if (hw->blk[i].is_list_init)
+			continue;
+
+		hw->blk[i].is_list_init = true;
+
+		hw->blk[i].overwrite = blk_sizes[i].overwrite;
+		es->reverse = blk_sizes[i].reverse;
+
+		xlt1->sid = ice_blk_sids[i][ICE_SID_XLT1_OFF];
+		xlt1->count = blk_sizes[i].xlt1;
+
+		xlt1->ptypes = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+					    sizeof(*xlt1->ptypes), GFP_KERNEL);
+
+		if (!xlt1->ptypes)
+			goto err;
+
+		xlt1->ptg_tbl = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_PTGS,
+					     sizeof(*xlt1->ptg_tbl),
+					     GFP_KERNEL);
+
+		if (!xlt1->ptg_tbl)
+			goto err;
+
+		xlt1->t = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+				       sizeof(*xlt1->t), GFP_KERNEL);
+		if (!xlt1->t)
+			goto err;
+
+		xlt2->sid = ice_blk_sids[i][ICE_SID_XLT2_OFF];
+		xlt2->count = blk_sizes[i].xlt2;
+
+		xlt2->vsis = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					  sizeof(*xlt2->vsis), GFP_KERNEL);
+
+		if (!xlt2->vsis)
+			goto err;
+
+		xlt2->vsig_tbl = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					      sizeof(*xlt2->vsig_tbl),
+					      GFP_KERNEL);
+		if (!xlt2->vsig_tbl)
+			goto err;
+
+		for (j = 0; j < xlt2->count; j++)
+			INIT_LIST_HEAD(&xlt2->vsig_tbl[j].prop_lst);
+
+		xlt2->t = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+				       sizeof(*xlt2->t), GFP_KERNEL);
+		if (!xlt2->t)
+			goto err;
+
+		prof->sid = ice_blk_sids[i][ICE_SID_PR_OFF];
+		prof->count = blk_sizes[i].prof_tcam;
+		prof->max_prof_id = blk_sizes[i].prof_id;
+		prof->cdid_bits = blk_sizes[i].prof_cdid_bits;
+		prof->t = devm_kcalloc(ice_hw_to_dev(hw), prof->count,
+				       sizeof(*prof->t), GFP_KERNEL);
+
+		if (!prof->t)
+			goto err;
+
+		prof_redir->sid = ice_blk_sids[i][ICE_SID_PR_REDIR_OFF];
+		prof_redir->count = blk_sizes[i].prof_redir;
+		prof_redir->t = devm_kcalloc(ice_hw_to_dev(hw),
+					     prof_redir->count,
+					     sizeof(*prof_redir->t),
+					     GFP_KERNEL);
+
+		if (!prof_redir->t)
+			goto err;
+
+		es->sid = ice_blk_sids[i][ICE_SID_ES_OFF];
+		es->count = blk_sizes[i].es;
+		es->fvw = blk_sizes[i].fvw;
+		es->t = devm_kcalloc(ice_hw_to_dev(hw),
+				     (u32)(es->count * es->fvw),
+				     sizeof(*es->t), GFP_KERNEL);
+		if (!es->t)
+			goto err;
+
+		es->ref_count = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					     sizeof(*es->ref_count),
+					     GFP_KERNEL);
+
+		es->written = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					   sizeof(*es->written), GFP_KERNEL);
+		if (!es->ref_count)
+			goto err;
+	}
+	return 0;
+
+err:
+	ice_free_hw_tbls(hw);
+	return ICE_ERR_NO_MEMORY;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
new file mode 100644
index 0000000..37eb282
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h

@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_PIPE_H_
+#define _ICE_FLEX_PIPE_H_
+
+#include "ice_type.h"
+
+/* Package minimal version supported */
+#define ICE_PKG_SUPP_VER_MAJ	1
+#define ICE_PKG_SUPP_VER_MNR	3
+
+/* Package format version */
+#define ICE_PKG_FMT_VER_MAJ	1
+#define ICE_PKG_FMT_VER_MNR	0
+#define ICE_PKG_FMT_VER_UPD	0
+#define ICE_PKG_FMT_VER_DFT	0
+
+#define ICE_PKG_CNT 4
+
+enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
+enum ice_status
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
+enum ice_status ice_init_hw_tbls(struct ice_hw *hw);
+void ice_free_seg(struct ice_hw *hw);
+void ice_fill_blk_tbls(struct ice_hw *hw);
+void ice_clear_hw_tbls(struct ice_hw *hw);
+void ice_free_hw_tbls(struct ice_hw *hw);
+#endif /* _ICE_FLEX_PIPE_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
new file mode 100644
index 0000000..5d5a7ea
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h

@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_TYPE_H_
+#define _ICE_FLEX_TYPE_H_
+/* Extraction Sequence (Field Vector) Table */
+struct ice_fv_word {
+	u8 prot_id;
+	u16 off;		/* Offset within the protocol header */
+	u8 resvrd;
+} __packed;
+
+#define ICE_MAX_FV_WORDS 48
+struct ice_fv {
+	struct ice_fv_word ew[ICE_MAX_FV_WORDS];
+};
+
+/* Package and segment headers and tables */
+struct ice_pkg_hdr {
+	struct ice_pkg_ver format_ver;
+	__le32 seg_count;
+	__le32 seg_offset[1];
+};
+
+/* generic segment */
+struct ice_generic_seg_hdr {
+#define SEGMENT_TYPE_METADATA	0x00000001
+#define SEGMENT_TYPE_ICE	0x00000010
+	__le32 seg_type;
+	struct ice_pkg_ver seg_ver;
+	__le32 seg_size;
+	char seg_name[ICE_PKG_NAME_SIZE];
+};
+
+/* ice specific segment */
+
+union ice_device_id {
+	struct {
+		__le16 device_id;
+		__le16 vendor_id;
+	} dev_vend_id;
+	__le32 id;
+};
+
+struct ice_device_id_entry {
+	union ice_device_id device;
+	union ice_device_id sub_device;
+};
+
+struct ice_seg {
+	struct ice_generic_seg_hdr hdr;
+	__le32 device_table_count;
+	struct ice_device_id_entry device_table[1];
+};
+
+struct ice_nvm_table {
+	__le32 table_count;
+	__le32 vers[1];
+};
+
+struct ice_buf {
+#define ICE_PKG_BUF_SIZE	4096
+	u8 buf[ICE_PKG_BUF_SIZE];
+};
+
+struct ice_buf_table {
+	__le32 buf_count;
+	struct ice_buf buf_array[1];
+};
+
+/* global metadata specific segment */
+struct ice_global_metadata_seg {
+	struct ice_generic_seg_hdr hdr;
+	struct ice_pkg_ver pkg_ver;
+	__le32 track_id;
+	char pkg_name[ICE_PKG_NAME_SIZE];
+};
+
+#define ICE_MIN_S_OFF		12
+#define ICE_MAX_S_OFF		4095
+#define ICE_MIN_S_SZ		1
+#define ICE_MAX_S_SZ		4084
+
+/* section information */
+struct ice_section_entry {
+	__le32 type;
+	__le16 offset;
+	__le16 size;
+};
+
+#define ICE_MIN_S_COUNT		1
+#define ICE_MAX_S_COUNT		511
+#define ICE_MIN_S_DATA_END	12
+#define ICE_MAX_S_DATA_END	4096
+
+#define ICE_METADATA_BUF	0x80000000
+
+struct ice_buf_hdr {
+	__le16 section_count;
+	__le16 data_end;
+	struct ice_section_entry section_entry[1];
+};
+
+#define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) ((ICE_PKG_BUF_SIZE - \
+	sizeof(struct ice_buf_hdr) - (hd_sz)) / (ent_sz))
+
+/* ice package section IDs */
+#define ICE_SID_XLT1_SW			12
+#define ICE_SID_XLT2_SW			13
+#define ICE_SID_PROFID_TCAM_SW		14
+#define ICE_SID_PROFID_REDIR_SW		15
+#define ICE_SID_FLD_VEC_SW		16
+
+#define ICE_SID_XLT1_ACL		22
+#define ICE_SID_XLT2_ACL		23
+#define ICE_SID_PROFID_TCAM_ACL		24
+#define ICE_SID_PROFID_REDIR_ACL	25
+#define ICE_SID_FLD_VEC_ACL		26
+
+#define ICE_SID_XLT1_FD			32
+#define ICE_SID_XLT2_FD			33
+#define ICE_SID_PROFID_TCAM_FD		34
+#define ICE_SID_PROFID_REDIR_FD		35
+#define ICE_SID_FLD_VEC_FD		36
+
+#define ICE_SID_XLT1_RSS		42
+#define ICE_SID_XLT2_RSS		43
+#define ICE_SID_PROFID_TCAM_RSS		44
+#define ICE_SID_PROFID_REDIR_RSS	45
+#define ICE_SID_FLD_VEC_RSS		46
+
+#define ICE_SID_RXPARSER_BOOST_TCAM	56
+
+#define ICE_SID_XLT1_PE			82
+#define ICE_SID_XLT2_PE			83
+#define ICE_SID_PROFID_TCAM_PE		84
+#define ICE_SID_PROFID_REDIR_PE		85
+#define ICE_SID_FLD_VEC_PE		86
+
+/* Label Metadata section IDs */
+#define ICE_SID_LBL_FIRST		0x80000010
+#define ICE_SID_LBL_RXPARSER_TMEM	0x80000018
+/* The following define MUST be updated to reflect the last label section ID */
+#define ICE_SID_LBL_LAST		0x80000038
+
+enum ice_block {
+	ICE_BLK_SW = 0,
+	ICE_BLK_ACL,
+	ICE_BLK_FD,
+	ICE_BLK_RSS,
+	ICE_BLK_PE,
+	ICE_BLK_COUNT
+};
+
+/* package labels */
+struct ice_label {
+	__le16 value;
+#define ICE_PKG_LABEL_SIZE	64
+	char name[ICE_PKG_LABEL_SIZE];
+};
+
+struct ice_label_section {
+	__le16 count;
+	struct ice_label label[1];
+};
+
+#define ICE_MAX_LABELS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+	sizeof(struct ice_label_section) - sizeof(struct ice_label), \
+	sizeof(struct ice_label))
+
+struct ice_sw_fv_section {
+	__le16 count;
+	__le16 base_offset;
+	struct ice_fv fv[1];
+};
+
+/* The BOOST TCAM stores the match packet header in reverse order, meaning
+ * the fields are reversed; in addition, this means that the normally big endian
+ * fields of the packet are now little endian.
+ */
+struct ice_boost_key_value {
+#define ICE_BOOST_REMAINING_HV_KEY	15
+	u8 remaining_hv_key[ICE_BOOST_REMAINING_HV_KEY];
+	__le16 hv_dst_port_key;
+	__le16 hv_src_port_key;
+	u8 tcam_search_key;
+} __packed;
+
+struct ice_boost_key {
+	struct ice_boost_key_value key;
+	struct ice_boost_key_value key2;
+};
+
+/* package Boost TCAM entry */
+struct ice_boost_tcam_entry {
+	__le16 addr;
+	__le16 reserved;
+	/* break up the 40 bytes of key into different fields */
+	struct ice_boost_key key;
+	u8 boost_hit_index_group;
+	/* The following contains bitfields which are not on byte boundaries.
+	 * These fields are currently unused by driver software.
+	 */
+#define ICE_BOOST_BIT_FIELDS		43
+	u8 bit_fields[ICE_BOOST_BIT_FIELDS];
+};
+
+struct ice_boost_tcam_section {
+	__le16 count;
+	__le16 reserved;
+	struct ice_boost_tcam_entry tcam[1];
+};
+
+#define ICE_MAX_BST_TCAMS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+	sizeof(struct ice_boost_tcam_section) - \
+	sizeof(struct ice_boost_tcam_entry), \
+	sizeof(struct ice_boost_tcam_entry))
+
+struct ice_xlt1_section {
+	__le16 count;
+	__le16 offset;
+	u8 value[1];
+} __packed;
+
+struct ice_xlt2_section {
+	__le16 count;
+	__le16 offset;
+	__le16 value[1];
+};
+
+struct ice_prof_redir_section {
+	__le16 count;
+	__le16 offset;
+	u8 redir_value[1];
+};
+
+struct ice_pkg_enum {
+	struct ice_buf_table *buf_table;
+	u32 buf_idx;
+
+	u32 type;
+	struct ice_buf_hdr *buf;
+	u32 sect_idx;
+	void *sect;
+	u32 sect_type;
+
+	u32 entry_idx;
+	void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
+};
+
+struct ice_es {
+	u32 sid;
+	u16 count;
+	u16 fvw;
+	u16 *ref_count;
+	struct list_head prof_map;
+	struct ice_fv_word *t;
+	struct mutex prof_map_lock;	/* protect access to profiles list */
+	u8 *written;
+	u8 reverse; /* set to true to reverse FV order */
+};
+
+/* PTYPE Group management */
+
+/* Note: XLT1 table takes 13-bit as input, and results in an 8-bit packet type
+ * group (PTG) ID as output.
+ *
+ * Note: PTG 0 is the default packet type group and it is assumed that all PTYPE
+ * are a part of this group until moved to a new PTG.
+ */
+#define ICE_DEFAULT_PTG	0
+
+struct ice_ptg_entry {
+	struct ice_ptg_ptype *first_ptype;
+	u8 in_use;
+};
+
+struct ice_ptg_ptype {
+	struct ice_ptg_ptype *next_ptype;
+	u8 ptg;
+};
+
+struct ice_vsig_entry {
+	struct list_head prop_lst;
+	struct ice_vsig_vsi *first_vsi;
+	u8 in_use;
+};
+
+struct ice_vsig_vsi {
+	struct ice_vsig_vsi *next_vsi;
+	u32 prop_mask;
+	u16 changed;
+	u16 vsig;
+};
+
+#define ICE_XLT1_CNT	1024
+#define ICE_MAX_PTGS	256
+
+/* XLT1 Table */
+struct ice_xlt1 {
+	struct ice_ptg_entry *ptg_tbl;
+	struct ice_ptg_ptype *ptypes;
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+#define ICE_XLT2_CNT	768
+#define ICE_MAX_VSIGS	768
+
+/* VSIG bit layout:
+ * [0:12]: incremental VSIG index 1 to ICE_MAX_VSIGS
+ * [13:15]: PF number of device
+ */
+#define ICE_VSIG_IDX_M	(0x1FFF)
+#define ICE_PF_NUM_S	13
+#define ICE_PF_NUM_M	(0x07 << ICE_PF_NUM_S)
+#define ICE_VSIG_VALUE(vsig, pf_id) \
+	(u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
+	      (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M))
+#define ICE_DEFAULT_VSIG	0
+
+/* XLT2 Table */
+struct ice_xlt2 {
+	struct ice_vsig_entry *vsig_tbl;
+	struct ice_vsig_vsi *vsis;
+	u16 *t;
+	u32 sid;
+	u16 count;
+};
+
+/* Keys are made up of two values, each one-half the size of the key.
+ * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values)
+ */
+#define ICE_TCAM_KEY_VAL_SZ	5
+#define ICE_TCAM_KEY_SZ		(2 * ICE_TCAM_KEY_VAL_SZ)
+
+struct ice_prof_tcam_entry {
+	__le16 addr;
+	u8 key[ICE_TCAM_KEY_SZ];
+	u8 prof_id;
+} __packed;
+
+struct ice_prof_id_section {
+	__le16 count;
+	struct ice_prof_tcam_entry entry[1];
+} __packed;
+
+struct ice_prof_tcam {
+	u32 sid;
+	u16 count;
+	u16 max_prof_id;
+	struct ice_prof_tcam_entry *t;
+	u8 cdid_bits; /* # CDID bits to use in key, 0, 2, 4, or 8 */
+};
+
+struct ice_prof_redir {
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+/* Tables per block */
+struct ice_blk_info {
+	struct ice_xlt1 xlt1;
+	struct ice_xlt2 xlt2;
+	struct ice_prof_tcam prof;
+	struct ice_prof_redir prof_redir;
+	struct ice_es es;
+	u8 overwrite; /* set to true to allow overwrite of table entries */
+	u8 is_list_init;
+};
+
+#endif /* _ICE_FLEX_TYPE_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 6076fc8..152fbd5 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h

@@ -6,251 +6,342 @@
 #ifndef _ICE_HW_AUTOGEN_H_
 #define _ICE_HW_AUTOGEN_H_
 
-#define QTX_COMM_DBELL(_DBQM)		(0x002C0000 + ((_DBQM) * 4))
-#define PF_FW_ARQBAH			0x00080180
-#define PF_FW_ARQBAL			0x00080080
-#define PF_FW_ARQH			0x00080380
-#define PF_FW_ARQH_ARQH_S		0
-#define PF_FW_ARQH_ARQH_M		ICE_M(0x3FF, PF_FW_ARQH_ARQH_S)
-#define PF_FW_ARQLEN			0x00080280
-#define PF_FW_ARQLEN_ARQLEN_S		0
-#define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
-#define PF_FW_ARQLEN_ARQVFE_S		28
-#define PF_FW_ARQLEN_ARQVFE_M		BIT(PF_FW_ARQLEN_ARQVFE_S)
-#define PF_FW_ARQLEN_ARQOVFL_S		29
-#define PF_FW_ARQLEN_ARQOVFL_M		BIT(PF_FW_ARQLEN_ARQOVFL_S)
-#define PF_FW_ARQLEN_ARQCRIT_S		30
-#define PF_FW_ARQLEN_ARQCRIT_M		BIT(PF_FW_ARQLEN_ARQCRIT_S)
-#define PF_FW_ARQLEN_ARQENABLE_S	31
-#define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
-#define PF_FW_ARQT			0x00080480
-#define PF_FW_ATQBAH			0x00080100
-#define PF_FW_ATQBAL			0x00080000
-#define PF_FW_ATQH			0x00080300
-#define PF_FW_ATQH_ATQH_S		0
-#define PF_FW_ATQH_ATQH_M		ICE_M(0x3FF, PF_FW_ATQH_ATQH_S)
-#define PF_FW_ATQLEN			0x00080200
-#define PF_FW_ATQLEN_ATQLEN_S		0
-#define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
-#define PF_FW_ATQLEN_ATQVFE_S		28
-#define PF_FW_ATQLEN_ATQVFE_M		BIT(PF_FW_ATQLEN_ATQVFE_S)
-#define PF_FW_ATQLEN_ATQOVFL_S		29
-#define PF_FW_ATQLEN_ATQOVFL_M		BIT(PF_FW_ATQLEN_ATQOVFL_S)
-#define PF_FW_ATQLEN_ATQCRIT_S		30
-#define PF_FW_ATQLEN_ATQCRIT_M		BIT(PF_FW_ATQLEN_ATQCRIT_S)
-#define PF_FW_ATQLEN_ATQENABLE_S	31
-#define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
-#define PF_FW_ATQT			0x00080400
-
+#define PF0INT_ITR_0(_i)			(0x03000004 + ((_i) * 4096))
+#define PF0INT_ITR_1(_i)			(0x03000008 + ((_i) * 4096))
+#define PF0INT_ITR_2(_i)			(0x0300000C + ((_i) * 4096))
+#define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD_HEAD_S			0
+#define QTX_COMM_HEAD_HEAD_M			ICE_M(0x1FFF, 0)
+#define PF_FW_ARQBAH				0x00080180
+#define PF_FW_ARQBAL				0x00080080
+#define PF_FW_ARQH				0x00080380
+#define PF_FW_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN				0x00080280
+#define PF_FW_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN_ARQVFE_M			BIT(28)
+#define PF_FW_ARQLEN_ARQOVFL_M			BIT(29)
+#define PF_FW_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_FW_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_FW_ARQT				0x00080480
+#define PF_FW_ATQBAH				0x00080100
+#define PF_FW_ATQBAL				0x00080000
+#define PF_FW_ATQH				0x00080300
+#define PF_FW_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN				0x00080200
+#define PF_FW_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN_ATQVFE_M			BIT(28)
+#define PF_FW_ATQLEN_ATQOVFL_M			BIT(29)
+#define PF_FW_ATQLEN_ATQCRIT_M			BIT(30)
+#define VF_MBX_ARQLEN(_VF)			(0x0022BC00 + ((_VF) * 4))
+#define PF_FW_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_FW_ATQT				0x00080400
+#define PF_MBX_ARQBAH				0x0022E400
+#define PF_MBX_ARQBAL				0x0022E380
+#define PF_MBX_ARQH				0x0022E500
+#define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN				0x0022E480
+#define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_MBX_ARQT				0x0022E580
+#define PF_MBX_ATQBAH				0x0022E180
+#define PF_MBX_ATQBAL				0x0022E100
+#define PF_MBX_ATQH				0x0022E280
+#define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN				0x0022E200
+#define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_MBX_ATQT				0x0022E300
+#define PRTDCB_GENS				0x00083020
+#define PRTDCB_GENS_DCBX_STATUS_S		0
+#define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
+#define GL_PREEXT_L2_PMASK0(_i)			(0x0020F0FC + ((_i) * 4))
+#define GL_PREEXT_L2_PMASK1(_i)			(0x0020F108 + ((_i) * 4))
 #define GLFLXP_RXDID_FLAGS(_i, _j)		(0x0045D000 + ((_i) * 4 + (_j) * 256))
 #define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S	0
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M	ICE_M(0x3F, 0)
 #define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S	8
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M	ICE_M(0x3F, 8)
 #define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S	16
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M	ICE_M(0x3F, 16)
 #define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S	24
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M	ICE_M(0x3F, 24)
 #define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
-#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, 0)
 #define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S	30
-#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, 30)
 #define GLFLXP_RXDID_FLX_WRD_1(_i)		(0x0045c900 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S	0
-#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, 0)
 #define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S	30
-#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, 30)
 #define GLFLXP_RXDID_FLX_WRD_2(_i)		(0x0045ca00 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S	0
-#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, 0)
 #define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S	30
-#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, 30)
 #define GLFLXP_RXDID_FLX_WRD_3(_i)		(0x0045cb00 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S	0
-#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, 0)
 #define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S	30
-#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S)
-
-#define QRXFLXP_CNTXT(_QRX)		(0x00480000 + ((_QRX) * 4))
-#define QRXFLXP_CNTXT_RXDID_IDX_S	0
-#define QRXFLXP_CNTXT_RXDID_IDX_M	ICE_M(0x3F, QRXFLXP_CNTXT_RXDID_IDX_S)
-#define QRXFLXP_CNTXT_RXDID_PRIO_S	8
-#define QRXFLXP_CNTXT_RXDID_PRIO_M	ICE_M(0x7, QRXFLXP_CNTXT_RXDID_PRIO_S)
-#define QRXFLXP_CNTXT_TS_S		11
-#define QRXFLXP_CNTXT_TS_M		BIT(QRXFLXP_CNTXT_TS_S)
-#define GLGEN_RSTAT			0x000B8188
-#define GLGEN_RSTAT_DEVSTATE_S		0
-#define GLGEN_RSTAT_DEVSTATE_M		ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S)
-#define GLGEN_RSTCTL			0x000B8180
-#define GLGEN_RSTCTL_GRSTDEL_S		0
-#define GLGEN_RSTCTL_GRSTDEL_M		ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
-#define GLGEN_RSTAT_RESET_TYPE_S	2
-#define GLGEN_RSTAT_RESET_TYPE_M	ICE_M(0x3, GLGEN_RSTAT_RESET_TYPE_S)
-#define GLGEN_RTRIG			0x000B8190
-#define GLGEN_RTRIG_CORER_S		0
-#define GLGEN_RTRIG_CORER_M		BIT(GLGEN_RTRIG_CORER_S)
-#define GLGEN_RTRIG_GLOBR_S		1
-#define GLGEN_RTRIG_GLOBR_M		BIT(GLGEN_RTRIG_GLOBR_S)
-#define GLGEN_STAT			0x000B612C
-#define PFGEN_CTRL			0x00091000
-#define PFGEN_CTRL_PFSWR_S		0
-#define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
-#define PFGEN_STATE			0x00088000
-#define PRTGEN_STATUS			0x000B8100
-#define PFHMC_ERRORDATA			0x00520500
-#define PFHMC_ERRORINFO			0x00520400
-#define GLINT_DYN_CTL(_INT)		(0x00160000 + ((_INT) * 4))
-#define GLINT_DYN_CTL_INTENA_S		0
-#define GLINT_DYN_CTL_INTENA_M		BIT(GLINT_DYN_CTL_INTENA_S)
-#define GLINT_DYN_CTL_CLEARPBA_S	1
-#define GLINT_DYN_CTL_CLEARPBA_M	BIT(GLINT_DYN_CTL_CLEARPBA_S)
-#define GLINT_DYN_CTL_SWINT_TRIG_S	2
-#define GLINT_DYN_CTL_SWINT_TRIG_M	BIT(GLINT_DYN_CTL_SWINT_TRIG_S)
-#define GLINT_DYN_CTL_ITR_INDX_S	3
-#define GLINT_DYN_CTL_SW_ITR_INDX_S	25
-#define GLINT_DYN_CTL_SW_ITR_INDX_M	ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S)
-#define GLINT_DYN_CTL_INTENA_MSK_S	31
-#define GLINT_DYN_CTL_INTENA_MSK_M	BIT(GLINT_DYN_CTL_INTENA_MSK_S)
-#define GLINT_ITR(_i, _INT)		(0x00154000 + ((_i) * 8192 + (_INT) * 4))
-#define PFINT_FW_CTL			0x0016C800
-#define PFINT_FW_CTL_MSIX_INDX_S	0
-#define PFINT_FW_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S)
-#define PFINT_FW_CTL_ITR_INDX_S		11
-#define PFINT_FW_CTL_ITR_INDX_M		ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S)
-#define PFINT_FW_CTL_CAUSE_ENA_S	30
-#define PFINT_FW_CTL_CAUSE_ENA_M	BIT(PFINT_FW_CTL_CAUSE_ENA_S)
-#define PFINT_OICR			0x0016CA00
-#define PFINT_OICR_ECC_ERR_S		16
-#define PFINT_OICR_ECC_ERR_M		BIT(PFINT_OICR_ECC_ERR_S)
-#define PFINT_OICR_MAL_DETECT_S		19
-#define PFINT_OICR_MAL_DETECT_M		BIT(PFINT_OICR_MAL_DETECT_S)
-#define PFINT_OICR_GRST_S		20
-#define PFINT_OICR_GRST_M		BIT(PFINT_OICR_GRST_S)
-#define PFINT_OICR_PCI_EXCEPTION_S	21
-#define PFINT_OICR_PCI_EXCEPTION_M	BIT(PFINT_OICR_PCI_EXCEPTION_S)
-#define PFINT_OICR_HMC_ERR_S		26
-#define PFINT_OICR_HMC_ERR_M		BIT(PFINT_OICR_HMC_ERR_S)
-#define PFINT_OICR_PE_CRITERR_S		28
-#define PFINT_OICR_PE_CRITERR_M		BIT(PFINT_OICR_PE_CRITERR_S)
-#define PFINT_OICR_CTL			0x0016CA80
-#define PFINT_OICR_CTL_MSIX_INDX_S	0
-#define PFINT_OICR_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S)
-#define PFINT_OICR_CTL_ITR_INDX_S	11
-#define PFINT_OICR_CTL_ITR_INDX_M	ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S)
-#define PFINT_OICR_CTL_CAUSE_ENA_S	30
-#define PFINT_OICR_CTL_CAUSE_ENA_M	BIT(PFINT_OICR_CTL_CAUSE_ENA_S)
-#define PFINT_OICR_ENA			0x0016C900
-#define QINT_RQCTL(_QRX)		(0x00150000 + ((_QRX) * 4))
-#define QINT_RQCTL_MSIX_INDX_S		0
-#define QINT_RQCTL_ITR_INDX_S		11
-#define QINT_RQCTL_CAUSE_ENA_S		30
-#define QINT_RQCTL_CAUSE_ENA_M		BIT(QINT_RQCTL_CAUSE_ENA_S)
-#define QINT_TQCTL(_DBQM)		(0x00140000 + ((_DBQM) * 4))
-#define QINT_TQCTL_MSIX_INDX_S		0
-#define QINT_TQCTL_ITR_INDX_S		11
-#define QINT_TQCTL_CAUSE_ENA_S		30
-#define QINT_TQCTL_CAUSE_ENA_M		BIT(QINT_TQCTL_CAUSE_ENA_S)
-#define GLLAN_RCTL_0			0x002941F8
-#define QRX_CONTEXT(_i, _QRX)		(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
-#define QRX_CTRL(_QRX)			(0x00120000 + ((_QRX) * 4))
-#define QRX_CTRL_MAX_INDEX		2047
-#define QRX_CTRL_QENA_REQ_S		0
-#define QRX_CTRL_QENA_REQ_M		BIT(QRX_CTRL_QENA_REQ_S)
-#define QRX_CTRL_QENA_STAT_S		2
-#define QRX_CTRL_QENA_STAT_M		BIT(QRX_CTRL_QENA_STAT_S)
-#define QRX_ITR(_QRX)			(0x00292000 + ((_QRX) * 4))
-#define QRX_TAIL(_QRX)			(0x00290000 + ((_QRX) * 4))
-#define GLNVM_FLA			0x000B6108
-#define GLNVM_FLA_LOCKED_S		6
-#define GLNVM_FLA_LOCKED_M		BIT(GLNVM_FLA_LOCKED_S)
-#define GLNVM_GENS			0x000B6100
-#define GLNVM_GENS_SR_SIZE_S		5
-#define GLNVM_GENS_SR_SIZE_M		ICE_M(0x7, GLNVM_GENS_SR_SIZE_S)
-#define GLNVM_ULD			0x000B6008
-#define GLNVM_ULD_CORER_DONE_S		3
-#define GLNVM_ULD_CORER_DONE_M		BIT(GLNVM_ULD_CORER_DONE_S)
-#define GLNVM_ULD_GLOBR_DONE_S		4
-#define GLNVM_ULD_GLOBR_DONE_M		BIT(GLNVM_ULD_GLOBR_DONE_S)
-#define PF_FUNC_RID			0x0009E880
-#define PF_FUNC_RID_FUNC_NUM_S		0
-#define PF_FUNC_RID_FUNC_NUM_M		ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S)
-#define GLPRT_BPRCH(_i)			(0x00381384 + ((_i) * 8))
-#define GLPRT_BPRCL(_i)			(0x00381380 + ((_i) * 8))
-#define GLPRT_BPTCH(_i)			(0x00381244 + ((_i) * 8))
-#define GLPRT_BPTCL(_i)			(0x00381240 + ((_i) * 8))
-#define GLPRT_CRCERRS(_i)		(0x00380100 + ((_i) * 8))
-#define GLPRT_GORCH(_i)			(0x00380004 + ((_i) * 8))
-#define GLPRT_GORCL(_i)			(0x00380000 + ((_i) * 8))
-#define GLPRT_GOTCH(_i)			(0x00380B44 + ((_i) * 8))
-#define GLPRT_GOTCL(_i)			(0x00380B40 + ((_i) * 8))
-#define GLPRT_ILLERRC(_i)		(0x003801C0 + ((_i) * 8))
-#define GLPRT_LXOFFRXC(_i)		(0x003802C0 + ((_i) * 8))
-#define GLPRT_LXOFFTXC(_i)		(0x00381180 + ((_i) * 8))
-#define GLPRT_LXONRXC(_i)		(0x00380280 + ((_i) * 8))
-#define GLPRT_LXONTXC(_i)		(0x00381140 + ((_i) * 8))
-#define GLPRT_MLFC(_i)			(0x00380040 + ((_i) * 8))
-#define GLPRT_MPRCH(_i)			(0x00381344 + ((_i) * 8))
-#define GLPRT_MPRCL(_i)			(0x00381340 + ((_i) * 8))
-#define GLPRT_MPTCH(_i)			(0x00381204 + ((_i) * 8))
-#define GLPRT_MPTCL(_i)			(0x00381200 + ((_i) * 8))
-#define GLPRT_MRFC(_i)			(0x00380080 + ((_i) * 8))
-#define GLPRT_PRC1023H(_i)		(0x00380A04 + ((_i) * 8))
-#define GLPRT_PRC1023L(_i)		(0x00380A00 + ((_i) * 8))
-#define GLPRT_PRC127H(_i)		(0x00380944 + ((_i) * 8))
-#define GLPRT_PRC127L(_i)		(0x00380940 + ((_i) * 8))
-#define GLPRT_PRC1522H(_i)		(0x00380A44 + ((_i) * 8))
-#define GLPRT_PRC1522L(_i)		(0x00380A40 + ((_i) * 8))
-#define GLPRT_PRC255H(_i)		(0x00380984 + ((_i) * 8))
-#define GLPRT_PRC255L(_i)		(0x00380980 + ((_i) * 8))
-#define GLPRT_PRC511H(_i)		(0x003809C4 + ((_i) * 8))
-#define GLPRT_PRC511L(_i)		(0x003809C0 + ((_i) * 8))
-#define GLPRT_PRC64H(_i)		(0x00380904 + ((_i) * 8))
-#define GLPRT_PRC64L(_i)		(0x00380900 + ((_i) * 8))
-#define GLPRT_PRC9522H(_i)		(0x00380A84 + ((_i) * 8))
-#define GLPRT_PRC9522L(_i)		(0x00380A80 + ((_i) * 8))
-#define GLPRT_PTC1023H(_i)		(0x00380C84 + ((_i) * 8))
-#define GLPRT_PTC1023L(_i)		(0x00380C80 + ((_i) * 8))
-#define GLPRT_PTC127H(_i)		(0x00380BC4 + ((_i) * 8))
-#define GLPRT_PTC127L(_i)		(0x00380BC0 + ((_i) * 8))
-#define GLPRT_PTC1522H(_i)		(0x00380CC4 + ((_i) * 8))
-#define GLPRT_PTC1522L(_i)		(0x00380CC0 + ((_i) * 8))
-#define GLPRT_PTC255H(_i)		(0x00380C04 + ((_i) * 8))
-#define GLPRT_PTC255L(_i)		(0x00380C00 + ((_i) * 8))
-#define GLPRT_PTC511H(_i)		(0x00380C44 + ((_i) * 8))
-#define GLPRT_PTC511L(_i)		(0x00380C40 + ((_i) * 8))
-#define GLPRT_PTC64H(_i)		(0x00380B84 + ((_i) * 8))
-#define GLPRT_PTC64L(_i)		(0x00380B80 + ((_i) * 8))
-#define GLPRT_PTC9522H(_i)		(0x00380D04 + ((_i) * 8))
-#define GLPRT_PTC9522L(_i)		(0x00380D00 + ((_i) * 8))
-#define GLPRT_RFC(_i)			(0x00380AC0 + ((_i) * 8))
-#define GLPRT_RJC(_i)			(0x00380B00 + ((_i) * 8))
-#define GLPRT_RLEC(_i)			(0x00380140 + ((_i) * 8))
-#define GLPRT_ROC(_i)			(0x00380240 + ((_i) * 8))
-#define GLPRT_RUC(_i)			(0x00380200 + ((_i) * 8))
-#define GLPRT_TDOLD(_i)			(0x00381280 + ((_i) * 8))
-#define GLPRT_UPRCH(_i)			(0x00381304 + ((_i) * 8))
-#define GLPRT_UPRCL(_i)			(0x00381300 + ((_i) * 8))
-#define GLPRT_UPTCH(_i)			(0x003811C4 + ((_i) * 8))
-#define GLPRT_UPTCL(_i)			(0x003811C0 + ((_i) * 8))
-#define GLV_BPRCH(_i)			(0x003B6004 + ((_i) * 8))
-#define GLV_BPRCL(_i)			(0x003B6000 + ((_i) * 8))
-#define GLV_BPTCH(_i)			(0x0030E004 + ((_i) * 8))
-#define GLV_BPTCL(_i)			(0x0030E000 + ((_i) * 8))
-#define GLV_GORCH(_i)			(0x003B0004 + ((_i) * 8))
-#define GLV_GORCL(_i)			(0x003B0000 + ((_i) * 8))
-#define GLV_GOTCH(_i)			(0x00300004 + ((_i) * 8))
-#define GLV_GOTCL(_i)			(0x00300000 + ((_i) * 8))
-#define GLV_MPRCH(_i)			(0x003B4004 + ((_i) * 8))
-#define GLV_MPRCL(_i)			(0x003B4000 + ((_i) * 8))
-#define GLV_MPTCH(_i)			(0x0030C004 + ((_i) * 8))
-#define GLV_MPTCL(_i)			(0x0030C000 + ((_i) * 8))
-#define GLV_RDPC(_i)			(0x00294C04 + ((_i) * 4))
-#define GLV_TEPC(_VSI)			(0x00312000 + ((_VSI) * 4))
-#define GLV_UPRCH(_i)			(0x003B2004 + ((_i) * 8))
-#define GLV_UPRCL(_i)			(0x003B2000 + ((_i) * 8))
-#define GLV_UPTCH(_i)			(0x0030A004 + ((_i) * 8))
-#define GLV_UPTCL(_i)			(0x0030A000 + ((_i) * 8))
-#define VSIQF_HKEY_MAX_INDEX		12
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define QRXFLXP_CNTXT(_QRX)			(0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S		0
+#define QRXFLXP_CNTXT_RXDID_IDX_M		ICE_M(0x3F, 0)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S		8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M		ICE_M(0x7, 8)
+#define GLGEN_RSTAT				0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_M			ICE_M(0x3, 0)
+#define GLGEN_RSTCTL				0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S			0
+#define GLGEN_RSTCTL_GRSTDEL_M			ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S		2
+#define GLGEN_RSTAT_RESET_TYPE_M		ICE_M(0x3, 2)
+#define GLGEN_RTRIG				0x000B8190
+#define GLGEN_RTRIG_CORER_M			BIT(0)
+#define GLGEN_RTRIG_GLOBR_M			BIT(1)
+#define GLGEN_STAT				0x000B612C
+#define GLGEN_VFLRSTAT(_i)			(0x00093A04 + ((_i) * 4))
+#define PFGEN_CTRL				0x00091000
+#define PFGEN_CTRL_PFSWR_M			BIT(0)
+#define PFGEN_STATE				0x00088000
+#define PRTGEN_STATUS				0x000B8100
+#define VFGEN_RSTAT(_VF)			(0x00074000 + ((_VF) * 4))
+#define VPGEN_VFRSTAT(_VF)			(0x00090800 + ((_VF) * 4))
+#define VPGEN_VFRSTAT_VFRD_M			BIT(0)
+#define VPGEN_VFRTRIG(_VF)			(0x00090000 + ((_VF) * 4))
+#define VPGEN_VFRTRIG_VFSWR_M			BIT(0)
+#define PFHMC_ERRORDATA				0x00520500
+#define PFHMC_ERRORINFO				0x00520400
+#define GLINT_CTL				0x0016CC54
+#define GLINT_CTL_DIS_AUTOMASK_M		BIT(0)
+#define GLINT_CTL_ITR_GRAN_200_S		16
+#define GLINT_CTL_ITR_GRAN_200_M		ICE_M(0xF, 16)
+#define GLINT_CTL_ITR_GRAN_100_S		20
+#define GLINT_CTL_ITR_GRAN_100_M		ICE_M(0xF, 20)
+#define GLINT_CTL_ITR_GRAN_50_S			24
+#define GLINT_CTL_ITR_GRAN_50_M			ICE_M(0xF, 24)
+#define GLINT_CTL_ITR_GRAN_25_S			28
+#define GLINT_CTL_ITR_GRAN_25_M			ICE_M(0xF, 28)
+#define GLINT_DYN_CTL(_INT)			(0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_M			BIT(0)
+#define GLINT_DYN_CTL_CLEARPBA_M		BIT(1)
+#define GLINT_DYN_CTL_SWINT_TRIG_M		BIT(2)
+#define GLINT_DYN_CTL_ITR_INDX_S		3
+#define GLINT_DYN_CTL_ITR_INDX_M		ICE_M(0x3, 3)
+#define GLINT_DYN_CTL_INTERVAL_S		5
+#define GLINT_DYN_CTL_INTERVAL_M		ICE_M(0xFFF, 5)
+#define GLINT_DYN_CTL_SW_ITR_INDX_M		ICE_M(0x3, 25)
+#define GLINT_DYN_CTL_WB_ON_ITR_M		BIT(30)
+#define GLINT_DYN_CTL_INTENA_MSK_M		BIT(31)
+#define GLINT_ITR(_i, _INT)			(0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define GLINT_RATE(_INT)			(0x0015A000 + ((_INT) * 4))
+#define GLINT_RATE_INTRL_ENA_M			BIT(6)
+#define GLINT_VECT2FUNC(_INT)			(0x00162000 + ((_INT) * 4))
+#define GLINT_VECT2FUNC_VF_NUM_S		0
+#define GLINT_VECT2FUNC_VF_NUM_M		ICE_M(0xFF, 0)
+#define GLINT_VECT2FUNC_PF_NUM_S		12
+#define GLINT_VECT2FUNC_PF_NUM_M		ICE_M(0x7, 12)
+#define GLINT_VECT2FUNC_IS_PF_S			16
+#define GLINT_VECT2FUNC_IS_PF_M			BIT(16)
+#define PFINT_FW_CTL				0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_FW_CTL_ITR_INDX_S			11
+#define PFINT_FW_CTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define PFINT_FW_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_MBX_CTL				0x0016B280
+#define PFINT_MBX_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_MBX_CTL_ITR_INDX_S		11
+#define PFINT_MBX_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR				0x0016CA00
+#define PFINT_OICR_ECC_ERR_M			BIT(16)
+#define PFINT_OICR_MAL_DETECT_M			BIT(19)
+#define PFINT_OICR_GRST_M			BIT(20)
+#define PFINT_OICR_PCI_EXCEPTION_M		BIT(21)
+#define PFINT_OICR_HMC_ERR_M			BIT(26)
+#define PFINT_OICR_PE_CRITERR_M			BIT(28)
+#define PFINT_OICR_VFLR_M			BIT(29)
+#define PFINT_OICR_SWINT_M			BIT(31)
+#define PFINT_OICR_CTL				0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_OICR_CTL_ITR_INDX_S		11
+#define PFINT_OICR_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_OICR_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR_ENA				0x0016C900
+#define QINT_RQCTL(_QRX)			(0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S			0
+#define QINT_RQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_RQCTL_ITR_INDX_S			11
+#define QINT_RQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_RQCTL_CAUSE_ENA_M			BIT(30)
+#define QINT_TQCTL(_DBQM)			(0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S			0
+#define QINT_TQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_TQCTL_ITR_INDX_S			11
+#define QINT_TQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_TQCTL_CAUSE_ENA_M			BIT(30)
+#define VPINT_ALLOC(_VF)			(0x001D1000 + ((_VF) * 4))
+#define VPINT_ALLOC_FIRST_S			0
+#define VPINT_ALLOC_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_LAST_S			12
+#define VPINT_ALLOC_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_VALID_M			BIT(31)
+#define VPINT_ALLOC_PCI(_VF)			(0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S			0
+#define VPINT_ALLOC_PCI_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S			12
+#define VPINT_ALLOC_PCI_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M			BIT(31)
+#define VPINT_MBX_CTL(_VSI)			(0x0016A000 + ((_VSI) * 4))
+#define VPINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define GLLAN_RCTL_0				0x002941F8
+#define QRX_CONTEXT(_i, _QRX)			(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX)				(0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX			2047
+#define QRX_CTRL_QENA_REQ_S			0
+#define QRX_CTRL_QENA_REQ_M			BIT(0)
+#define QRX_CTRL_QENA_STAT_S			2
+#define QRX_CTRL_QENA_STAT_M			BIT(2)
+#define QRX_ITR(_QRX)				(0x00292000 + ((_QRX) * 4))
+#define QRX_TAIL(_QRX)				(0x00290000 + ((_QRX) * 4))
+#define QRX_TAIL_MAX_INDEX			2047
+#define QRX_TAIL_TAIL_S				0
+#define QRX_TAIL_TAIL_M				ICE_M(0x1FFF, 0)
+#define VPLAN_RX_QBASE(_VF)			(0x00072000 + ((_VF) * 4))
+#define VPLAN_RX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_RX_QBASE_VFFIRSTQ_M		ICE_M(0x7FF, 0)
+#define VPLAN_RX_QBASE_VFNUMQ_S			16
+#define VPLAN_RX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_RXQ_MAPENA(_VF)			(0x00073000 + ((_VF) * 4))
+#define VPLAN_RXQ_MAPENA_RX_ENA_M		BIT(0)
+#define VPLAN_TX_QBASE(_VF)			(0x001D1800 + ((_VF) * 4))
+#define VPLAN_TX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_TX_QBASE_VFFIRSTQ_M		ICE_M(0x3FFF, 0)
+#define VPLAN_TX_QBASE_VFNUMQ_S			16
+#define VPLAN_TX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_TXQ_MAPENA(_VF)			(0x00073800 + ((_VF) * 4))
+#define VPLAN_TXQ_MAPENA_TX_ENA_M		BIT(0)
+#define GL_MDET_RX				0x00294C00
+#define GL_MDET_RX_QNUM_S			0
+#define GL_MDET_RX_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_RX_VF_NUM_S			15
+#define GL_MDET_RX_VF_NUM_M			ICE_M(0xFF, 15)
+#define GL_MDET_RX_PF_NUM_S			23
+#define GL_MDET_RX_PF_NUM_M			ICE_M(0x7, 23)
+#define GL_MDET_RX_MAL_TYPE_S			26
+#define GL_MDET_RX_MAL_TYPE_M			ICE_M(0x1F, 26)
+#define GL_MDET_RX_VALID_M			BIT(31)
+#define GL_MDET_TX_PQM				0x002D2E00
+#define GL_MDET_TX_PQM_PF_NUM_S			0
+#define GL_MDET_TX_PQM_PF_NUM_M			ICE_M(0x7, 0)
+#define GL_MDET_TX_PQM_VF_NUM_S			4
+#define GL_MDET_TX_PQM_VF_NUM_M			ICE_M(0xFF, 4)
+#define GL_MDET_TX_PQM_QNUM_S			12
+#define GL_MDET_TX_PQM_QNUM_M			ICE_M(0x3FFF, 12)
+#define GL_MDET_TX_PQM_MAL_TYPE_S		26
+#define GL_MDET_TX_PQM_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_PQM_VALID_M			BIT(31)
+#define GL_MDET_TX_TCLAN			0x000FC068
+#define GL_MDET_TX_TCLAN_QNUM_S			0
+#define GL_MDET_TX_TCLAN_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_TX_TCLAN_VF_NUM_S		15
+#define GL_MDET_TX_TCLAN_VF_NUM_M		ICE_M(0xFF, 15)
+#define GL_MDET_TX_TCLAN_PF_NUM_S		23
+#define GL_MDET_TX_TCLAN_PF_NUM_M		ICE_M(0x7, 23)
+#define GL_MDET_TX_TCLAN_MAL_TYPE_S		26
+#define GL_MDET_TX_TCLAN_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_TCLAN_VALID_M		BIT(31)
+#define PF_MDET_RX				0x00294280
+#define PF_MDET_RX_VALID_M			BIT(0)
+#define PF_MDET_TX_PQM				0x002D2C80
+#define PF_MDET_TX_PQM_VALID_M			BIT(0)
+#define PF_MDET_TX_TCLAN			0x000FC000
+#define PF_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_RX(_VF)				(0x00294400 + ((_VF) * 4))
+#define VP_MDET_RX_VALID_M			BIT(0)
+#define VP_MDET_TX_PQM(_VF)			(0x002D2000 + ((_VF) * 4))
+#define VP_MDET_TX_PQM_VALID_M			BIT(0)
+#define VP_MDET_TX_TCLAN(_VF)			(0x000FB800 + ((_VF) * 4))
+#define VP_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_TX_TDPU(_VF)			(0x00040000 + ((_VF) * 4))
+#define VP_MDET_TX_TDPU_VALID_M			BIT(0)
+#define GLNVM_FLA				0x000B6108
+#define GLNVM_FLA_LOCKED_M			BIT(6)
+#define GLNVM_GENS				0x000B6100
+#define GLNVM_GENS_SR_SIZE_S			5
+#define GLNVM_GENS_SR_SIZE_M			ICE_M(0x7, 5)
+#define GLNVM_ULD				0x000B6008
+#define GLNVM_ULD_CORER_DONE_M			BIT(3)
+#define GLNVM_ULD_GLOBR_DONE_M			BIT(4)
+#define GLPCI_CNF2				0x000BE004
+#define GLPCI_CNF2_CACHELINE_SIZE_M		BIT(1)
+#define PF_FUNC_RID				0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S			0
+#define PF_FUNC_RID_FUNC_NUM_M			ICE_M(0x7, 0)
+#define PF_PCI_CIAA				0x0009E580
+#define PF_PCI_CIAA_VF_NUM_S			12
+#define PF_PCI_CIAD				0x0009E500
+#define GL_PWR_MODE_CTL				0x000B820C
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_S		30
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_M		ICE_M(0x3, 30)
+#define GLPRT_BPRCL(_i)				(0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCL(_i)				(0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i)			(0x00380100 + ((_i) * 8))
+#define GLPRT_GORCL(_i)				(0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCL(_i)				(0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i)			(0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i)			(0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i)			(0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i)			(0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i)			(0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i)				(0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCL(_i)				(0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCL(_i)				(0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i)				(0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i)			(0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127L(_i)			(0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i)			(0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255L(_i)			(0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511L(_i)			(0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64L(_i)			(0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i)			(0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i)			(0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127L(_i)			(0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i)			(0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255L(_i)			(0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511L(_i)			(0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64L(_i)			(0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i)			(0x00380D00 + ((_i) * 8))
+#define GLPRT_PXOFFRXC(_i, _j)			(0x00380500 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXOFFTXC(_i, _j)			(0x00380F40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONRXC(_i, _j)			(0x00380300 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONTXC(_i, _j)			(0x00380D40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_RFC(_i)				(0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i)				(0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i)				(0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i)				(0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i)				(0x00380200 + ((_i) * 8))
+#define GLPRT_RXON2OFFCNT(_i, _j)		(0x00380700 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_TDOLD(_i)				(0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCL(_i)				(0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCL(_i)				(0x003811C0 + ((_i) * 8))
+#define GLV_BPRCL(_i)				(0x003B6000 + ((_i) * 8))
+#define GLV_BPTCL(_i)				(0x0030E000 + ((_i) * 8))
+#define GLV_GORCL(_i)				(0x003B0000 + ((_i) * 8))
+#define GLV_GOTCL(_i)				(0x00300000 + ((_i) * 8))
+#define GLV_MPRCL(_i)				(0x003B4000 + ((_i) * 8))
+#define GLV_MPTCL(_i)				(0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i)				(0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI)				(0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
+#define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
+#define PF_VT_PFALLOC_HIF			0x0009DD80
+#define VSIQF_HKEY_MAX_INDEX			12
+#define VSIQF_HLUT_MAX_INDEX			15
+#define VFINT_DYN_CTLN(_i)			(0x00003800 + ((_i) * 4))
+#define VFINT_DYN_CTLN_CLEARPBA_M		BIT(1)
+#define PRTRPB_RDPC				0x000AC260
 
 #endif /* _ICE_HW_AUTOGEN_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 068dbc7..2aac8f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h

@@ -6,11 +6,11 @@
 
 union ice_32byte_rx_desc {
 	struct {
-		__le64  pkt_addr; /* Packet buffer address */
-		__le64  hdr_addr; /* Header buffer address */
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
 			/* bit 0 of hdr_addr is DD bit */
-		__le64  rsvd1;
-		__le64  rsvd2;
+		__le64 rsvd1;
+		__le64 rsvd2;
 	} read;
 	struct {
 		struct {
@@ -20,7 +20,7 @@
 			} lo_dword;
 			union {
 				__le32 rss; /* RSS Hash */
-				__le32 fd_id; /* Flow Director filter id */
+				__le32 fd_id; /* Flow Director filter ID */
 			} hi_dword;
 		} qword0;
 		struct {
@@ -99,21 +99,21 @@
 	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
 };
 
-/* RX Flex Descriptor
+/* Rx Flex Descriptor
  * This descriptor is used instead of the legacy version descriptor when
  * ice_rlan_ctx.adv_desc is set
  */
 union ice_32b_rx_flex_desc {
 	struct {
-		__le64  pkt_addr; /* Packet buffer address */
-		__le64  hdr_addr; /* Header buffer address */
-				  /* bit 0 of hdr_addr is DD bit */
-		__le64  rsvd1;
-		__le64  rsvd2;
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
 	} read;
 	struct {
 		/* Qword 0 */
-		u8 rxdid; /* descriptor builder profile id */
+		u8 rxdid; /* descriptor builder profile ID */
 		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
 		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
 		__le16 pkt_len; /* [15:14] are reserved */
@@ -149,7 +149,7 @@
 
 /* Rx Flex Descriptor NIC Profile
  * This descriptor corresponds to RxDID 2 which contains
- * metadata fields for RSS, flow id and timestamp info
+ * metadata fields for RSS, flow ID and timestamp info
  */
 struct ice_32b_rx_flex_desc_nic {
 	/* Qword 0 */
@@ -188,41 +188,43 @@
  * with a specific metadata (profile 7 reserved for HW)
  */
 enum ice_rxdid {
-	ICE_RXDID_START			= 0,
-	ICE_RXDID_LEGACY_0		= ICE_RXDID_START,
-	ICE_RXDID_LEGACY_1,
-	ICE_RXDID_FLX_START,
-	ICE_RXDID_FLEX_NIC		= ICE_RXDID_FLX_START,
-	ICE_RXDID_FLX_LAST		= 63,
-	ICE_RXDID_LAST			= ICE_RXDID_FLX_LAST
+	ICE_RXDID_LEGACY_0		= 0,
+	ICE_RXDID_LEGACY_1		= 1,
+	ICE_RXDID_FLEX_NIC		= 2,
+	ICE_RXDID_FLEX_NIC_2		= 6,
+	ICE_RXDID_HW			= 7,
+	ICE_RXDID_LAST			= 63,
 };
 
 /* Receive Flex Descriptor Rx opcode values */
 #define ICE_RX_OPC_MDID		0x01
 
 /* Receive Descriptor MDID values */
-#define ICE_RX_MDID_FLOW_ID_LOWER	5
-#define ICE_RX_MDID_FLOW_ID_HIGH	6
-#define ICE_RX_MDID_HASH_LOW		56
-#define ICE_RX_MDID_HASH_HIGH		57
+enum ice_flex_rx_mdid {
+	ICE_RX_MDID_FLOW_ID_LOWER	= 5,
+	ICE_RX_MDID_FLOW_ID_HIGH,
+	ICE_RX_MDID_SRC_VSI		= 19,
+	ICE_RX_MDID_HASH_LOW		= 56,
+	ICE_RX_MDID_HASH_HIGH,
+};
 
-/* Rx Flag64 packet flag bits */
-enum ice_rx_flg64_bits {
-	ICE_RXFLG_PKT_DSI	= 0,
-	ICE_RXFLG_EVLAN_x8100	= 15,
-	ICE_RXFLG_EVLAN_x9100,
-	ICE_RXFLG_VLAN_x8100,
-	ICE_RXFLG_TNL_MAC	= 22,
-	ICE_RXFLG_TNL_VLAN,
-	ICE_RXFLG_PKT_FRG,
-	ICE_RXFLG_FIN		= 32,
-	ICE_RXFLG_SYN,
-	ICE_RXFLG_RST,
-	ICE_RXFLG_TNL0		= 38,
-	ICE_RXFLG_TNL1,
-	ICE_RXFLG_TNL2,
-	ICE_RXFLG_UDP_GRE,
-	ICE_RXFLG_RSVD		= 63
+/* Rx/Tx Flag64 packet flag bits */
+enum ice_flg64_bits {
+	ICE_FLG_PKT_DSI		= 0,
+	ICE_FLG_EVLAN_x8100	= 15,
+	ICE_FLG_EVLAN_x9100,
+	ICE_FLG_VLAN_x8100,
+	ICE_FLG_TNL_MAC		= 22,
+	ICE_FLG_TNL_VLAN,
+	ICE_FLG_PKT_FRG,
+	ICE_FLG_FIN		= 32,
+	ICE_FLG_SYN,
+	ICE_FLG_RST,
+	ICE_FLG_TNL0		= 38,
+	ICE_FLG_TNL1,
+	ICE_FLG_TNL2,
+	ICE_FLG_UDP_GRE,
+	ICE_FLG_RSVD		= 63
 };
 
 /* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
@@ -254,6 +256,9 @@
 
 #define ICE_RXQ_CTX_SIZE_DWORDS		8
 #define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS	22
+#define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS	5
+#define GLTCLAN_CQ_CNTX(i, CQ)		(GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
 
 /* RLAN Rx queue context data
  *
@@ -272,19 +277,20 @@
 	u16 dbuf; /* bigger than needed, see above for reason */
 #define ICE_RLAN_CTX_HBUF_S 6
 	u16 hbuf; /* bigger than needed, see above for reason */
-	u8  dtype;
-	u8  dsize;
-	u8  crcstrip;
-	u8  l2tsel;
-	u8  hsplit_0;
-	u8  hsplit_1;
-	u8  showiv;
+	u8 dtype;
+	u8 dsize;
+	u8 crcstrip;
+	u8 l2tsel;
+	u8 hsplit_0;
+	u8 hsplit_1;
+	u8 showiv;
 	u32 rxmax; /* bigger than needed, see above for reason */
-	u8  tphrdesc_ena;
-	u8  tphwdesc_ena;
-	u8  tphdata_ena;
-	u8  tphhead_ena;
+	u8 tphrdesc_ena;
+	u8 tphwdesc_ena;
+	u8 tphdata_ena;
+	u8 tphhead_ena;
 	u16 lrxqthresh; /* bigger than needed, see above for reason */
+	u8 prefena;	/* NOTE: normally must be set to 1 at init */
 };
 
 struct ice_ctx_ele {
@@ -317,7 +323,7 @@
 	ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS	= 2,
 };
 
-/* TX Descriptor */
+/* Tx Descriptor */
 struct ice_tx_desc {
 	__le64 buf_addr; /* Address of descriptor's data buf */
 	__le64 cmd_type_offset_bsz;
@@ -337,11 +343,12 @@
 	ICE_TX_DESC_CMD_EOP			= 0x0001,
 	ICE_TX_DESC_CMD_RS			= 0x0002,
 	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
-	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
-	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
-	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
-	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
-	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
+	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020,
+	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040,
+	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060,
+	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100,
+	ICE_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200,
+	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300,
 };
 
 #define ICE_TXD_QW1_OFFSET_S	16
@@ -411,34 +418,36 @@
 struct ice_tlan_ctx {
 #define ICE_TLAN_CTX_BASE_S	7
 	u64 base;		/* base is defined in 128-byte units */
-	u8  port_num;
+	u8 port_num;
 	u16 cgd_num;		/* bigger than needed, see above for reason */
-	u8  pf_num;
+	u8 pf_num;
 	u16 vmvf_num;
-	u8  vmvf_type;
+	u8 vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VF	0
 #define ICE_TLAN_CTX_VMVF_TYPE_VMQ	1
 #define ICE_TLAN_CTX_VMVF_TYPE_PF	2
 	u16 src_vsi;
-	u8  tsyn_ena;
-	u8  alt_vlan;
+	u8 tsyn_ena;
+	u8 internal_usage_flag;
+	u8 alt_vlan;
 	u16 cpuid;		/* bigger than needed, see above for reason */
-	u8  wb_mode;
-	u8  tphrd_desc;
-	u8  tphrd;
-	u8  tphwr_desc;
+	u8 wb_mode;
+	u8 tphrd_desc;
+	u8 tphrd;
+	u8 tphwr_desc;
 	u16 cmpq_id;
 	u16 qnum_in_func;
-	u8  itr_notification_mode;
-	u8  adjust_prof_id;
+	u8 itr_notification_mode;
+	u8 adjust_prof_id;
 	u32 qlen;		/* bigger than needed, see above for reason */
-	u8  quanta_prof_idx;
-	u8  tso_ena;
+	u8 quanta_prof_idx;
+	u8 tso_ena;
 	u16 tso_qnum;
-	u8  legacy_int;
-	u8  drop_ena;
-	u8  cache_prof_idx;
-	u8  pkt_shaper_prof_idx;
-	u8  int_q_state;	/* width not needed - internal do not write */
+	u8 legacy_int;
+	u8 drop_ena;
+	u8 cache_prof_idx;
+	u8 pkt_shaper_prof_idx;
+	u8 int_q_state;	/* width not needed - internal do not write */
 };
 
 /* macro to make the table lines short */
@@ -471,4 +480,18 @@
 {
 	return ice_ptype_lkup[ptype];
 }
+
+#define ICE_LINK_SPEED_UNKNOWN		0
+#define ICE_LINK_SPEED_10MBPS		10
+#define ICE_LINK_SPEED_100MBPS		100
+#define ICE_LINK_SPEED_1000MBPS		1000
+#define ICE_LINK_SPEED_2500MBPS		2500
+#define ICE_LINK_SPEED_5000MBPS		5000
+#define ICE_LINK_SPEED_10000MBPS	10000
+#define ICE_LINK_SPEED_20000MBPS	20000
+#define ICE_LINK_SPEED_25000MBPS	25000
+#define ICE_LINK_SPEED_40000MBPS	40000
+#define ICE_LINK_SPEED_50000MBPS	50000
+#define ICE_LINK_SPEED_100000MBPS	100000
+
 #endif /* _ICE_LAN_TX_RX_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
new file mode 100644
index 0000000..cc75538
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c

@@ -0,0 +1,3301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+static int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	u32 regval;
+	u16 pf_q;
+	int err;
+
+	/* what is Rx queue number in global space of 2K Rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	 /* Enable Flexible Descriptors in the queue context which
+	  * allows this driver to select a specific receive descriptor format
+	  */
+	if (vsi->type != ICE_VSI_VF) {
+		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+			QRXFLXP_CNTXT_RXDID_IDX_M;
+
+		/* increasing context priority to pick up profile ID;
+		 * default is 0x01; setting to 0x03 to ensure profile
+		 * is programming if prev context is of same priority
+		 */
+		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+			QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+	}
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+
+	return 0;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	ice_set_cgd_num(tlan_ctx, ring);
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* Firmware expects vmvf_num to be absolute VF ID */
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+			      QRX_CTRL_QENA_STAT_M))
+			return 0;
+
+		usleep_range(20, 40);
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int ret = 0;
+	u32 rx_reg;
+
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
+
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+	/* wait for the change to finish */
+	ret = ice_pf_rxq_wait(pf, pf_q, ena);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"VSI idx %d Rx ring %d %sable timeout\n",
+			vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_rings - Start or stop a VSI's Rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ */
+static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		ret = ice_vsi_ctrl_rx_ring(vsi, ena, i);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the VSI
+ * @vsi: VSI pointer
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* allocate memory for both Tx and Rx ring pointers */
+	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				     sizeof(*vsi->tx_rings), GFP_KERNEL);
+	if (!vsi->tx_rings)
+		return -ENOMEM;
+
+	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				     sizeof(*vsi->rx_rings), GFP_KERNEL);
+	if (!vsi->rx_rings)
+		goto err_rings;
+
+	vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				    sizeof(*vsi->txq_map), GFP_KERNEL);
+
+	if (!vsi->txq_map)
+		goto err_txq_map;
+
+	vsi->rxq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				    sizeof(*vsi->rxq_map), GFP_KERNEL);
+	if (!vsi->rxq_map)
+		goto err_rxq_map;
+
+
+	/* There is no need to allocate q_vectors for a loopback VSI. */
+	if (vsi->type == ICE_VSI_LB)
+		return 0;
+
+	/* allocate memory for q_vector pointers */
+	vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
+				      sizeof(*vsi->q_vectors), GFP_KERNEL);
+	if (!vsi->q_vectors)
+		goto err_vectors;
+
+	return 0;
+
+err_vectors:
+	devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+err_rxq_map:
+	devm_kfree(&pf->pdev->dev, vsi->txq_map);
+err_txq_map:
+	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+err_rings:
+	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
+{
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* fall through */
+	case ICE_VSI_LB:
+		vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
+		vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
+		break;
+	default:
+		dev_dbg(&vsi->back->pdev->dev,
+			"Not setting number of Tx/Rx descriptors for VSI type %d\n",
+			vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ * @vf_id: ID of the VF being configured
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf = NULL;
+
+	if (vsi->type == ICE_VSI_VF)
+		vsi->vf_id = vf_id;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf),
+				       num_online_cpus());
+
+		pf->num_lan_tx = vsi->alloc_txq;
+
+		/* only 1 Rx queue unless RSS is enabled */
+		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			vsi->alloc_rxq = 1;
+		else
+			vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf),
+					       num_online_cpus());
+
+		pf->num_lan_rx = vsi->alloc_rxq;
+
+		vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq);
+		break;
+	case ICE_VSI_VF:
+		vf = &pf->vf[vsi->vf_id];
+		vsi->alloc_txq = vf->num_vf_qs;
+		vsi->alloc_rxq = vf->num_vf_qs;
+		/* pf->num_vf_msix includes (VF miscellaneous vector +
+		 * data queue interrupts). Since vsi->num_q_vectors is number
+		 * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+		 * original vector count
+		 */
+		vsi->num_q_vectors = pf->num_vf_msix - ICE_NONQ_VECS_VF;
+		break;
+	case ICE_VSI_LB:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		break;
+	}
+
+	ice_vsi_set_num_desc(vsi);
+}
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+	int **tmp_array = (int **)array;
+	int next;
+
+	if (curr < (size - 1) && !tmp_array[curr + 1]) {
+		next = curr + 1;
+	} else {
+		int i = 0;
+
+		while ((i < size) && (tmp_array[i]))
+			i++;
+		if (i == size)
+			next = ICE_NO_VSI;
+		else
+			next = i;
+	}
+	return next;
+}
+
+/**
+ * ice_vsi_delete - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+void ice_vsi_delete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+
+	ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return;
+
+	if (vsi->type == ICE_VSI_VF)
+		ctxt->vf_num = vsi->vf_id;
+	ctxt->vsi_num = vsi->vsi_num;
+
+	memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info));
+
+	status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
+	if (status)
+		dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
+			vsi->vsi_num);
+
+	devm_kfree(&pf->pdev->dev, ctxt);
+}
+
+/**
+ * ice_vsi_free_arrays - De-allocate queue and vector pointer arrays for the VSI
+ * @vsi: pointer to VSI being cleared
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* free the ring and vector containers */
+	if (vsi->q_vectors) {
+		devm_kfree(&pf->pdev->dev, vsi->q_vectors);
+		vsi->q_vectors = NULL;
+	}
+	if (vsi->tx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+		vsi->tx_rings = NULL;
+	}
+	if (vsi->rx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+		vsi->rx_rings = NULL;
+	}
+	if (vsi->txq_map) {
+		devm_kfree(&pf->pdev->dev, vsi->txq_map);
+		vsi->txq_map = NULL;
+	}
+	if (vsi->rxq_map) {
+		devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+		vsi->rxq_map = NULL;
+	}
+}
+
+/**
+ * ice_vsi_clear - clean up and deallocate the provided VSI
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the VSI's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_vsi_clear(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = NULL;
+
+	if (!vsi)
+		return 0;
+
+	if (!vsi->back)
+		return -EINVAL;
+
+	pf = vsi->back;
+
+	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+		dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
+			vsi->idx);
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->sw_mutex);
+	/* updates the PF for this cleared VSI */
+
+	pf->vsi[vsi->idx] = NULL;
+	if (vsi->idx < pf->next_vsi)
+		pf->next_vsi = vsi->idx;
+
+	ice_vsi_free_arrays(vsi);
+	mutex_unlock(&pf->sw_mutex);
+	devm_kfree(&pf->pdev->dev, vsi);
+
+	return 0;
+}
+
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct VSI in the PF
+ * @pf: board private structure
+ * @type: type of VSI
+ * @vf_id: ID of the VF being configured
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *
+ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
+{
+	struct ice_vsi *vsi = NULL;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->sw_mutex);
+
+	/* If we have already allocated our maximum number of VSIs,
+	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+	 * is available to be populated
+	 */
+	if (pf->next_vsi == ICE_NO_VSI) {
+		dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
+		goto unlock_pf;
+	}
+
+	vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
+	if (!vsi)
+		goto unlock_pf;
+
+	vsi->type = type;
+	vsi->back = pf;
+	set_bit(__ICE_DOWN, vsi->state);
+
+	vsi->idx = pf->next_vsi;
+
+	if (type == ICE_VSI_VF)
+		ice_vsi_set_num_qs(vsi, vf_id);
+	else
+		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+
+		/* Setup default MSIX irq handler for VSI */
+		vsi->irq_handler = ice_msix_clean_rings;
+		break;
+	case ICE_VSI_VF:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
+	case ICE_VSI_LB:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		goto unlock_pf;
+	}
+
+	/* fill VSI slot in the PF struct */
+	pf->vsi[pf->next_vsi] = vsi;
+
+	/* prepare pf->next_vsi for next use */
+	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+					 pf->next_vsi);
+	goto unlock_pf;
+
+err_rings:
+	devm_kfree(&pf->pdev->dev, vsi);
+	vsi = NULL;
+unlock_pf:
+	mutex_unlock(&pf->sw_mutex);
+	return vsi;
+}
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+	int offset, i;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+					    0, qs_cfg->q_count, 0);
+	if (offset >= qs_cfg->pf_map_size) {
+		mutex_unlock(qs_cfg->qs_mutex);
+		return -ENOMEM;
+	}
+
+	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+	for (i = 0; i < qs_cfg->q_count; i++)
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+	int i, index = 0;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	for (i = 0; i < qs_cfg->q_count; i++) {
+		index = find_next_zero_bit(qs_cfg->pf_map,
+					   qs_cfg->pf_map_size, index);
+		if (index >= qs_cfg->pf_map_size)
+			goto err_scatter;
+		set_bit(index, qs_cfg->pf_map);
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+err_scatter:
+	for (index = 0; index < i; index++) {
+		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return -ENOMEM;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+	int ret = 0;
+
+	ret = __ice_vsi_get_qs_contig(qs_cfg);
+	if (ret) {
+		/* contig failed, so try with scatter approach */
+		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+					qs_cfg->scatter_count);
+		ret = __ice_vsi_get_qs_sc(qs_cfg);
+	}
+	return ret;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg tx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->alloc_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = vsi->tx_mapping_mode
+	};
+	struct ice_qs_cfg rx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_rxqs,
+		.pf_map_size = pf->max_pf_rxqs,
+		.q_count = vsi->alloc_rxq,
+		.scatter_count = ICE_MAX_SCATTER_RXQS,
+		.vsi_map = vsi->rxq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = vsi->rx_mapping_mode
+	};
+	int ret = 0;
+
+	vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG;
+	vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG;
+
+	ret = __ice_vsi_get_qs(&tx_qs_cfg);
+	if (!ret)
+		ret = __ice_vsi_get_qs(&rx_qs_cfg);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI that is going to release queues
+ */
+void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		clear_bit(vsi->txq_map[i], pf->avail_txqs);
+		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_is_safe_mode
+ * @pf: pointer to the PF struct
+ *
+ * returns true if driver is in safe mode, false otherwise
+ */
+bool ice_is_safe_mode(struct ice_pf *pf)
+{
+	return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures that hold user inputs
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+
+	if (vsi->rss_hkey_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
+	if (vsi->rss_lut_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
+}
+
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+	struct ice_hw_common_caps *cap;
+	struct ice_pf *pf = vsi->back;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+		return;
+	}
+
+	cap = &pf->hw.func_caps.common_cap;
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		vsi->rss_table_size = cap->rss_table_size;
+		vsi->rss_size = min_t(int, num_online_cpus(),
+				      BIT(cap->rss_table_entry_width));
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will gets a small RSS table
+		 * For VSI_LUT, LUT size should be set to 64 bytes
+		 */
+		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+		vsi->rss_size = min_t(int, num_online_cpus(),
+				      BIT(cap->rss_table_entry_width));
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
+		break;
+	case ICE_VSI_LB:
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
+{
+	u32 table = 0;
+
+	memset(&ctxt->info, 0, sizeof(ctxt->info));
+	/* VSI's should be allocated from shared pool */
+	ctxt->alloc_from_pool = true;
+	/* Src pruning enabled by default */
+	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+	/* Traffic from VSI can be sent to LAN */
+	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+	/* By default bits 3 and 4 in vlan_flags are 0's which results in legacy
+	 * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all
+	 * packets untagged/tagged.
+	 */
+	ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL &
+				  ICE_AQ_VSI_VLAN_MODE_M) >>
+				 ICE_AQ_VSI_VLAN_MODE_S);
+	/* Have 1:1 UP mapping for both ingress/egress tables */
+	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+	ctxt->info.ingress_table = cpu_to_le32(table);
+	ctxt->info.egress_table = cpu_to_le32(table);
+	/* Have 1:1 UP mapping for outer to inner UP table */
+	ctxt->info.outer_up_table = cpu_to_le32(table);
+	/* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	u16 offset = 0, qmap = 0, tx_count = 0;
+	u16 qcount_tx = vsi->alloc_txq;
+	u16 qcount_rx = vsi->alloc_rxq;
+	u16 tx_numq_tc, rx_numq_tc;
+	u16 pow = 0, max_rss = 0;
+	bool ena_tc0 = false;
+	u8 netdev_tc = 0;
+	int i;
+
+	/* at least TC0 should be enabled by default */
+	if (vsi->tc_cfg.numtc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(0)))
+			ena_tc0 = true;
+	} else {
+		ena_tc0 = true;
+	}
+
+	if (ena_tc0) {
+		vsi->tc_cfg.numtc++;
+		vsi->tc_cfg.ena_tc |= 1;
+	}
+
+	rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc;
+	if (!rx_numq_tc)
+		rx_numq_tc = 1;
+	tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc;
+	if (!tx_numq_tc)
+		tx_numq_tc = 1;
+
+	/* TC mapping is a function of the number of Rx queues assigned to the
+	 * VSI for each traffic class and the offset of these queues.
+	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+	 * queues allocated to TC0. No:of queues is a power-of-2.
+	 *
+	 * If TC is not enabled, the queue offset is set to 0, and allocate one
+	 * queue, this way, traffic for the given TC will be sent to the default
+	 * queue.
+	 *
+	 * Setup number and offset of Rx queues for all TCs for the VSI
+	 */
+
+	qcount_rx = rx_numq_tc;
+
+	/* qcount will change if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
+		if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) {
+			if (vsi->type == ICE_VSI_PF)
+				max_rss = ICE_MAX_LG_RSS_QS;
+			else
+				max_rss = ICE_MAX_SMALL_RSS_QS;
+			qcount_rx = min_t(int, rx_numq_tc, max_rss);
+			qcount_rx = min_t(int, qcount_rx, vsi->rss_size);
+		}
+	}
+
+	/* find the (rounded up) power-of-2 of qcount */
+	pow = order_base_2(qcount_rx);
+
+	ice_for_each_traffic_class(i) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+			vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+			vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		/* TC is enabled */
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
+		vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc;
+		vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+
+		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+			ICE_AQ_VSI_TC_Q_OFFSET_M) |
+			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+			 ICE_AQ_VSI_TC_Q_NUM_M);
+		offset += qcount_rx;
+		tx_count += tx_numq_tc;
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+
+	/* if offset is non-zero, means it is calculated correctly based on
+	 * enabled TCs for a given VSI otherwise qcount_rx will always
+	 * be correct and non-zero because it is based off - VSI's
+	 * allocated Rx queues which is at least 1 (hence qcount_tx will be
+	 * at least 1)
+	 */
+	if (offset)
+		vsi->num_rxq = offset;
+	else
+		vsi->num_rxq = qcount_rx;
+
+	vsi->num_txq = tx_count;
+
+	if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
+		dev_dbg(&vsi->back->pdev->dev, "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+		/* since there is a chance that num_rxq could have been changed
+		 * in the above for loop, make num_txq equal to num_rxq.
+		 */
+		vsi->num_txq = vsi->num_rxq;
+	}
+
+	/* Rx queue mapping */
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	/* q_mapping buffer holds the info for the first queue allocated for
+	 * this VSI in the PF space and also the number of queues associated
+	 * with this VSI.
+	 */
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+}
+
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 lut_type, hash_type;
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will gets a small RSS table which is a VSI LUT type */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	case ICE_VSI_LB:
+		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+		return;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		return;
+	}
+
+	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
+				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
+/**
+ * ice_vsi_init - Create and initialize a VSI
+ * @vsi: the VSI being configured
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_init(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vsi_ctx *ctxt;
+	int ret = 0;
+
+	ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
+	case ICE_VSI_PF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_VF;
+		/* VF number here is the absolute VF number (0-255) */
+		ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	ice_set_dflt_vsi_ctx(ctxt);
+	/* if the switch is in VEB mode, allow VSI loopback */
+	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+	/* Set LUT type and HASH type if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_set_rss_vsi_ctx(ctxt, vsi);
+
+	ctxt->info.sw_id = vsi->port_info->sw_id;
+	ice_vsi_setup_q_map(vsi, ctxt);
+
+	/* Enable MAC Antispoof with new VSI being initialized or updated */
+	if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) {
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+		ctxt->info.sec_flags |=
+			ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+	}
+
+	/* Allow control frames out of main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	}
+
+	ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Add VSI failed, err %d\n", ret);
+		return -EIO;
+	}
+
+	/* keep context for update VSI operations */
+	vsi->info = ctxt->info;
+
+	/* record VSI number returned */
+	vsi->vsi_num = ctxt->vsi_num;
+
+	devm_kfree(&pf->pdev->dev, ctxt);
+	return ret;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
+			v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(&pf->pdev->dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	if (vsi->type == ICE_VSI_VF)
+		goto out;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* This will not be called in the driver load path because the netdev
+	 * will not be created yet. All other cases with register the NAPI
+	 * handler here (i.e. resume, reset/rebuild, etc.)
+	 */
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
+
+out:
+	/* tie q_vector and VSI together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int v_idx = 0, num_q_vectors;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+			vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	num_q_vectors = vsi->num_q_vectors;
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(&pf->pdev->dev,
+		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after ice_vsi_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ */
+static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 num_q_vectors;
+
+	/* SRIOV doesn't grab irq_tracker entries for each VSI */
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	if (vsi->base_vector) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
+		return -EEXIST;
+	}
+
+	num_q_vectors = vsi->num_q_vectors;
+	/* reserve slots from OS requested IRQs */
+	vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+				       vsi->idx);
+	if (vsi->base_vector < 0) {
+		dev_err(&pf->pdev->dev,
+			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			num_q_vectors, vsi->vsi_num, vsi->base_vector);
+		return -ENOENT;
+	}
+	pf->num_avail_sw_msix -= num_q_vectors;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			if (vsi->tx_rings[i]) {
+				kfree_rcu(vsi->tx_rings[i], rcu);
+				vsi->tx_rings[i] = NULL;
+			}
+		}
+	}
+	if (vsi->rx_rings) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			if (vsi->rx_rings[i]) {
+				kfree_rcu(vsi->rx_rings[i], rcu);
+				vsi->rx_rings[i] = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	/* Allocate Tx rings */
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->txq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_tx_desc;
+		vsi->tx_rings[i] = ring;
+	}
+
+	/* Allocate Rx rings */
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->rxq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_rx_desc;
+		vsi->rx_rings[i] = ring;
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+#ifdef CONFIG_DCB
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+#else
+static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+#endif /* CONFIG_DCB */
+{
+	int q_vectors = vsi->num_q_vectors;
+	int tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_vector->tx.itr_idx = ICE_TX_ITR;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_vector->rx.itr_idx = ICE_RX_ITR;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_manage_rss_lut - disable/enable RSS
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT.
+ */
+int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
+{
+	int err = 0;
+	u8 *lut;
+
+	lut = devm_kzalloc(&vsi->back->pdev->dev, vsi->rss_table_size,
+			   GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (ena) {
+		if (vsi->rss_lut_user)
+			memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+		else
+			ice_fill_rss_lut(lut, vsi->rss_table_size,
+					 vsi->rss_size);
+	}
+
+	err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
+	devm_kfree(&vsi->back->pdev->dev, lut);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
+{
+	struct ice_aqc_get_set_rss_keys *key;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	int err = 0;
+	u8 *lut;
+
+	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
+
+	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+	status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut,
+				    vsi->rss_table_size);
+
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"set_rss_lut failed, error %d\n", status);
+		err = -EIO;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	key = devm_kzalloc(&pf->pdev->dev, sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	if (vsi->rss_hkey_user)
+		memcpy(key,
+		       (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user,
+		       ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+	else
+		netdev_rss_key_fill((void *)key,
+				    ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+
+	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
+
+	if (status) {
+		dev_err(&pf->pdev->dev, "set_rss_key failed, error %d\n",
+			status);
+		err = -EIO;
+	}
+
+	devm_kfree(&pf->pdev->dev, key);
+ice_vsi_cfg_rss_exit:
+	devm_kfree(&pf->pdev->dev, lut);
+	return err;
+}
+
+/**
+ * ice_add_mac_to_list - Add a MAC address filter entry to the list
+ * @vsi: the VSI to be forwarded to
+ * @add_list: pointer to the list which contains MAC filter entries
+ * @macaddr: the MAC address to be added.
+ *
+ * Adds MAC address filter entry to the temp list
+ *
+ * Returns 0 on success or ENOMEM on failure.
+ */
+int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+			const u8 *macaddr)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src_id = ICE_SRC_ID_VSI;
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.vsi_handle = vsi->idx;
+	ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr);
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, add_list);
+
+	return 0;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+	struct ice_eth_stats *prev_es, *cur_es;
+	struct ice_hw *hw = &vsi->back->hw;
+	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
+
+	prev_es = &vsi->eth_stats_prev;
+	cur_es = &vsi->eth_stats;
+
+	ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_bytes, &cur_es->rx_bytes);
+
+	ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_unicast, &cur_es->rx_unicast);
+
+	ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_multicast, &cur_es->rx_multicast);
+
+	ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_broadcast, &cur_es->rx_broadcast);
+
+	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_discards, &cur_es->rx_discards);
+
+	ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_bytes, &cur_es->tx_bytes);
+
+	ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_unicast, &cur_es->tx_unicast);
+
+	ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_multicast, &cur_es->tx_multicast);
+
+	ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_broadcast, &cur_es->tx_broadcast);
+
+	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_errors, &cur_es->tx_errors);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_free_fltr_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_add_mac_to_list
+ */
+void ice_free_fltr_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_vsi_add_vlan - Add VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN ID to be added
+ */
+int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+	int err = 0;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src_id = ICE_SRC_ID_VSI;
+	tmp->fltr_info.vsi_handle = vsi->idx;
+	tmp->fltr_info.l_data.vlan.vlan_id = vid;
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, &tmp_add_list);
+
+	status = ice_add_vlan(&pf->hw, &tmp_add_list);
+	if (status) {
+		err = -ENODEV;
+		dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
+			vid, vsi->vsi_num);
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return err;
+}
+
+/**
+ * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN ID to be removed
+ *
+ * Returns 0 on success and negative on failure
+ */
+int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+	int err = 0;
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	list->fltr_info.vsi_handle = vsi->idx;
+	list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	list->fltr_info.l_data.vlan.vlan_id = vid;
+	list->fltr_info.flag = ICE_FLTR_TX;
+	list->fltr_info.src_id = ICE_SRC_ID_VSI;
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	status = ice_remove_vlan(&pf->hw, &tmp_add_list);
+	if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(&pf->pdev->dev,
+			"Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
+			vid, vsi->vsi_num, status);
+	} else if (status) {
+		dev_err(&pf->pdev->dev,
+			"Error removing VLAN %d on vsi %i error: %d\n",
+			vid, vsi->vsi_num, status);
+		err = -EIO;
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+	u16 i;
+
+	if (vsi->type == ICE_VSI_VF)
+		goto setup_rings;
+
+	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
+		vsi->max_frame = vsi->netdev->mtu +
+			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	else
+		vsi->max_frame = ICE_RXBUF_2048;
+
+	vsi->rx_buf_len = ICE_RXBUF_2048;
+setup_rings:
+	/* set up individual rings */
+	for (i = 0; i < vsi->num_rxq; i++) {
+		int err;
+
+		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
+		if (err) {
+			dev_err(&vsi->back->pdev->dev,
+				"ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+				i, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @tc_q_idx: queue index within given TC
+ * @qg_buf: queue group buffer
+ * @tc: TC that Tx ring belongs to
+ */
+static int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
+		struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
+{
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	u8 buf_len = sizeof(*qg_buf);
+	enum ice_status status;
+	u16 pf_q;
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	ring->q_handle = tc_q_idx;
+
+	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+				 1, qg_buf, buf_len, NULL);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set LAN Tx queue context, error: %d\n",
+			status);
+		return -ENODEV;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
+ * @offset: offset within vsi->txq_map
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_pf *pf = vsi->back;
+	u16 q_idx = 0, i;
+	int err = 0;
+	u8 tc;
+
+	qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	/* set up and configure the Tx queues for each enabled TC */
+	ice_for_each_traffic_class(tc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
+			break;
+
+		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
+			err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
+					      qg_buf, tc);
+			if (err)
+				goto err_cfg_txqs;
+
+			q_idx++;
+		}
+	}
+err_cfg_txqs:
+	devm_kfree(&pf->pdev->dev, qg_buf);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+{
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
+}
+
+/**
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
+ *
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
+ */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
+{
+	u32 val = intrl / gran;
+
+	if (val)
+		return val | GLINT_RATE_INTRL_ENA_M;
+	return 0;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+	u32 regval = rd32(hw, GLINT_CTL);
+
+	/* no need to update global register if ITR gran is already set */
+	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+		return;
+
+	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+		  GLINT_CTL_ITR_GRAN_200_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+		  GLINT_CTL_ITR_GRAN_100_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+		  GLINT_CTL_ITR_GRAN_50_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+		  GLINT_CTL_ITR_GRAN_25_M);
+	wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+static void
+ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	ice_cfg_itr_gran(hw);
+
+	if (q_vector->num_ring_rx) {
+		struct ice_ring_container *rc = &q_vector->rx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_RX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+
+	if (q_vector->num_ring_tx) {
+		struct ice_ring_container *rc = &q_vector->tx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_TX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
+ */
+void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0, rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
+
+		ice_cfg_itr(hw, q_vector);
+
+		wr32(hw, GLINT_RATE(reg_idx),
+		     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+		/* Both Transmit Queue Interrupt Cause Control register
+		 * and Receive Queue Interrupt Cause control register
+		 * expects MSIX_INDX field to be the vector index
+		 * within the function space and not the absolute
+		 * vector index across PF or across device.
+		 * For SR-IOV VF VSIs queue vector index always starts
+		 * with 1 since first vector index(0) is used for OICR
+		 * in VF space. Since VMDq and other PF VSIs are within
+		 * the PF function space, use the vector index that is
+		 * tracked for this PF.
+		 */
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+					      q_vector->tx.itr_idx);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+					      q_vector->rx.itr_idx);
+			rxq++;
+		}
+	}
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the VSI being changed
+ */
+int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring the VSI to let the driver add VLAN tags by
+	 * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag
+	 * insertion happens in the Tx hot path, in ice_tx_map.
+	 */
+	ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
+
+	/* Preserve existing VLAN strip setting */
+	ctxt->info.vlan_flags |= (vsi->info.vlan_flags &
+				  ICE_AQ_VSI_VLAN_EMOD_M);
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info.vlan_flags = ctxt->info.vlan_flags;
+out:
+	devm_kfree(dev, ctxt);
+	return ret;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring what the VSI should do with the VLAN tag in
+	 * the Rx packet. We can either leave the tag in the packet or put it in
+	 * the Rx descriptor.
+	 */
+	if (ena)
+		/* Strip VLAN tag from Rx packet and put it in the desc */
+		ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH;
+	else
+		/* Disable stripping. Leave tag in packet */
+		ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+
+	/* Allow all packets untagged/tagged */
+	ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
+			ena, status, hw->adminq.sq_last_status);
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info.vlan_flags = ctxt->info.vlan_flags;
+out:
+	devm_kfree(dev, ctxt);
+	return ret;
+}
+
+/**
+ * ice_vsi_start_rx_rings - start VSI's Rx rings
+ * @vsi: the VSI whose rings are to be started
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_start_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_rx_rings - stop VSI's Rx rings
+ * @vsi: the VSI
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, false);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 val;
+
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
+
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector)
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == ICE_ERR_RESET_ONGOING) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n", status);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	u8 tc = 0;
+
+#ifdef CONFIG_DCB
+	tc = ring->dcb_tc;
+#endif /* CONFIG_DCB */
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	txq_meta->vsi_idx = vsi->idx;
+	txq_meta->tc = tc;
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
+ */
+static int
+ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		      u16 rel_vmvf_num, struct ice_ring **rings)
+{
+	u16 i, q_idx = 0;
+	int status;
+	u8 tc;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
+
+	/* set up the Tx queue list to be disabled for each enabled TC */
+	ice_for_each_traffic_class(tc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
+			break;
+
+		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
+			struct ice_txq_meta txq_meta = { };
+
+			if (!rings || !rings[q_idx])
+				return -EINVAL;
+
+			ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+			status = ice_vsi_stop_tx_ring(vsi, rst_src,
+						      rel_vmvf_num,
+						      rings[q_idx], &txq_meta);
+
+			if (status)
+				return status;
+
+			q_idx++;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ */
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num)
+{
+	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
+}
+
+/**
+ * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
+ * @vsi: VSI to enable or disable VLAN pruning on
+ * @ena: set to true to enable VLAN pruning and false to disable it
+ * @vlan_promisc: enable valid security flags if not in VLAN promiscuous mode
+ *
+ * returns 0 if VSI is updated, negative otherwise
+ */
+int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
+{
+	struct ice_vsi_ctx *ctxt;
+	struct device *dev;
+	struct ice_pf *pf;
+	int status;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	dev = &pf->pdev->dev;
+	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (ena) {
+		ctxt->info.sec_flags |=
+			ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+		ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	} else {
+		ctxt->info.sec_flags &=
+			~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+		ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	}
+
+	if (!vlan_promisc)
+		ctxt->info.valid_sections =
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID |
+				    ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n",
+			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
+			   pf->hw.adminq.sq_last_status);
+		goto err_out;
+	}
+
+	vsi->info.sec_flags = ctxt->info.sec_flags;
+	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+
+	devm_kfree(dev, ctxt);
+	return 0;
+
+err_out:
+	devm_kfree(dev, ctxt);
+	return -EIO;
+}
+
+static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+	struct ice_dcbx_cfg *cfg = &vsi->port_info->local_dcbx_cfg;
+
+	vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+	vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+}
+
+/**
+ * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors
+ * @vsi: VSI to set the q_vectors register index on
+ */
+static int
+ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
+{
+	u16 i;
+
+	if (!vsi || !vsi->q_vectors)
+		return -EINVAL;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		if (!q_vector) {
+			dev_err(&vsi->back->pdev->dev,
+				"Failed to set reg_idx on q_vector %d VSI %d\n",
+				i, vsi->vsi_num);
+			goto clear_reg_idx;
+		}
+
+		if (vsi->type == ICE_VSI_VF) {
+			struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+			q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+		} else {
+			q_vector->reg_idx =
+				q_vector->v_idx + vsi->base_vector;
+		}
+	}
+
+	return 0;
+
+clear_reg_idx:
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		if (q_vector)
+			q_vector->reg_idx = 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * ice_vsi_add_rem_eth_mac - Program VSI ethertype based filter with rule
+ * @vsi: the VSI being configured
+ * @add_rule: boolean value to add or remove ethertype filter rule
+ */
+static void
+ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	list->fltr_info.fltr_act = ICE_DROP_PACKET;
+	list->fltr_info.flag = ICE_FLTR_TX;
+	list->fltr_info.src_id = ICE_SRC_ID_VSI;
+	list->fltr_info.vsi_handle = vsi->idx;
+	list->fltr_info.l_data.ethertype_mac.ethertype = vsi->ethtype;
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (add_rule)
+		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+	else
+		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+
+	if (status)
+		dev_err(&pf->pdev->dev,
+			"Failure Adding or Removing Ethertype on VSI %i error: %d\n",
+			vsi->vsi_num, status);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	list->fltr_info.vsi_handle = vsi->idx;
+	list->fltr_info.l_data.ethertype_mac.ethertype = ETH_P_LLDP;
+
+	if (tx) {
+		list->fltr_info.fltr_act = ICE_DROP_PACKET;
+		list->fltr_info.flag = ICE_FLTR_TX;
+		list->fltr_info.src_id = ICE_SRC_ID_VSI;
+	} else {
+		list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		list->fltr_info.flag = ICE_FLTR_RX;
+		list->fltr_info.src_id = ICE_SRC_ID_LPORT;
+	}
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (create)
+		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+	else
+		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+
+	if (status)
+		dev_err(&pf->pdev->dev,
+			"Fail %s %s LLDP rule on VSI %i error: %d\n",
+			create ? "adding" : "removing", tx ? "TX" : "RX",
+			vsi->vsi_num, status);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ * @type: VSI type
+ * @vf_id: defines VF ID to which this VSI connects. This field is meant to be
+ *         used only for ICE_VSI_VF VSI type. For other VSI types, should
+ *         fill-in ICE_INVAL_VFID as input.
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configured VSI sw struct on
+ * success, NULL on failure.
+ */
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+	      enum ice_vsi_type type, u16 vf_id)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct device *dev = &pf->pdev->dev;
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	int ret, i;
+
+	if (type == ICE_VSI_VF)
+		vsi = ice_vsi_alloc(pf, type, vf_id);
+	else
+		vsi = ice_vsi_alloc(pf, type, ICE_INVAL_VFID);
+
+	if (!vsi) {
+		dev_err(dev, "could not allocate VSI\n");
+		return NULL;
+	}
+
+	vsi->port_info = pi;
+	vsi->vsw = pf->first_sw;
+	if (vsi->type == ICE_VSI_PF)
+		vsi->ethtype = ETH_P_PAUSE;
+
+	if (vsi->type == ICE_VSI_VF)
+		vsi->vf_id = vf_id;
+
+	if (ice_vsi_get_qs(vsi)) {
+		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+			vsi->idx);
+		goto unroll_get_qs;
+	}
+
+	/* set RSS capabilities */
+	ice_vsi_set_rss_params(vsi);
+
+	/* set TC configuration */
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* create the VSI */
+	ret = ice_vsi_init(vsi);
+	if (ret)
+		goto unroll_get_qs;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto unroll_alloc_q_vector;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_VF:
+		/* VF driver will take care of creating netdev for this type and
+		 * map queues to vectors through Virtchnl, PF driver only
+		 * creates a VSI and corresponding structures for bookkeeping
+		 * purpose
+		 */
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_alloc_q_vector;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_LB:
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+		break;
+	default:
+		/* clean up the resources and exit */
+		goto unroll_vsi_init;
+	}
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->alloc_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"VSI %d failed lan queue config, error %d\n",
+			vsi->vsi_num, status);
+		goto unroll_vector_base;
+	}
+
+	/* Add switch rule to drop all Tx Flow Control Frames, of look up
+	 * type ETHERTYPE from VSIs, and restrict malicious VF from sending
+	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
+	 * The rule is added once for PF VSI in order to create appropriate
+	 * recipe, since VSI/VSI list is ignored with drop action...
+	 * Also add rules to handle LLDP Tx and Rx packets.  Tx LLDP packets
+	 * need to be dropped so that VFs cannot send LLDP packets to reconfig
+	 * DCB settings in the HW.  Also, if the FW DCBX engine is not running
+	 * then Rx LLDP packets need to be redirected up the stack.
+	 */
+	if (!ice_is_safe_mode(pf)) {
+		if (vsi->type == ICE_VSI_PF) {
+			ice_vsi_add_rem_eth_mac(vsi, true);
+
+			/* Tx LLDP packets */
+			ice_cfg_sw_lldp(vsi, true, true);
+
+			/* Rx LLDP packets */
+			if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+				ice_cfg_sw_lldp(vsi, false, true);
+		}
+	}
+
+	return vsi;
+
+unroll_vector_base:
+	/* reclaim SW interrupts back to the common pool */
+	ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+	pf->num_avail_sw_msix += vsi->num_q_vectors;
+unroll_alloc_q_vector:
+	ice_vsi_free_q_vectors(vsi);
+unroll_vsi_init:
+	ice_vsi_delete(vsi);
+unroll_get_qs:
+	ice_vsi_put_qs(vsi);
+	ice_vsi_clear(vsi);
+
+	return NULL;
+}
+
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0;
+	u32 rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
+
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_vsi_free_irq - Free the IRQ association with the OS
+ * @vsi: the VSI being configured
+ */
+void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	int i;
+
+	if (!vsi->q_vectors || !vsi->irqs_ready)
+		return;
+
+	ice_vsi_release_msix(vsi);
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	vsi->irqs_ready = false;
+	ice_for_each_q_vector(vsi, i) {
+		u16 vector = i + base;
+		int irq_num;
+
+		irq_num = pf->msix_entries[vector].vector;
+
+		/* free only the irqs that were actually requested */
+		if (!vsi->q_vectors[i] ||
+		    !(vsi->q_vectors[i]->num_ring_tx ||
+		      vsi->q_vectors[i]->num_ring_rx))
+			continue;
+
+		/* clear the affinity notifier in the IRQ descriptor */
+		irq_set_affinity_notifier(irq_num, NULL);
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(irq_num, NULL);
+		synchronize_irq(irq_num);
+		devm_free_irq(&pf->pdev->dev, irq_num,
+			      vsi->q_vectors[i]);
+	}
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->tx_rings)
+		return;
+
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	ice_for_each_rxq(vsi, i)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+void ice_vsi_close(struct ice_vsi *vsi)
+{
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
+		ice_down(vsi);
+
+	ice_vsi_free_irq(vsi);
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+}
+
+/**
+ * ice_free_res - free a block of resources
+ * @res: pointer to the resource
+ * @index: starting index previously returned by ice_get_res
+ * @id: identifier to track owner
+ *
+ * Returns number of resources freed
+ */
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
+{
+	int count = 0;
+	int i;
+
+	if (!res || index >= res->end)
+		return -EINVAL;
+
+	id |= ICE_RES_VALID_BIT;
+	for (i = index; i < res->end && res->list[i] == id; i++) {
+		res->list[i] = 0;
+		count++;
+	}
+
+	return count;
+}
+
+/**
+ * ice_search_res - Search the tracker for a block of resources
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or -ENOMEM for error
+ */
+static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	int start = 0, end = 0;
+
+	if (needed > res->end)
+		return -ENOMEM;
+
+	id |= ICE_RES_VALID_BIT;
+
+	do {
+		/* skip already allocated entries */
+		if (res->list[end++] & ICE_RES_VALID_BIT) {
+			start = end;
+			if ((start + needed) > res->end)
+				break;
+		}
+
+		if (end == (start + needed)) {
+			int i = start;
+
+			/* there was enough, so assign it to the requestor */
+			while (i != end)
+				res->list[i++] = id;
+
+			return start;
+		}
+	} while (end < res->end);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_get_res - get a block of resources
+ * @pf: board private structure
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or negative for error
+ */
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	if (!res || !pf)
+		return -EINVAL;
+
+	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
+		dev_err(&pf->pdev->dev,
+			"param err: needed=%d, num_entries = %d id=0x%04x\n",
+			needed, res->num_entries, id);
+		return -EINVAL;
+	}
+
+	return ice_search_res(res, needed, id);
+}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+	int base = vsi->base_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	if (vsi->tx_rings) {
+		ice_for_each_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->tx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_TQCTL(reg));
+				val &= ~QINT_TQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_TQCTL(reg), val);
+			}
+		}
+	}
+
+	if (vsi->rx_rings) {
+		ice_for_each_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->rx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_RQCTL(reg));
+				val &= ~QINT_RQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_RQCTL(reg), val);
+			}
+		}
+	}
+
+	/* disable each interrupt */
+	ice_for_each_q_vector(vsi, i) {
+		if (!vsi->q_vectors[i])
+			continue;
+		wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
+	}
+
+	ice_flush(hw);
+
+	/* don't call synchronize_irq() for VF's from the host */
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	ice_for_each_q_vector(vsi, i)
+		synchronize_irq(pf->msix_entries[i + base].vector);
+}
+
+/**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+int ice_vsi_release(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	if (!vsi->back)
+		return -ENODEV;
+	pf = vsi->back;
+
+	/* do not unregister while driver is in the reset recovery pending
+	 * state. Since reset/rebuild happens through PF service task workqueue,
+	 * it's not a good idea to unregister netdev that is associated to the
+	 * PF that is running the work queue items currently. This is done to
+	 * avoid check_flush_dependency() warning on this wq
+	 */
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state))
+		unregister_netdev(vsi->netdev);
+
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_rss_clean(vsi);
+
+	/* Disable VSI and free resources */
+	if (vsi->type != ICE_VSI_LB)
+		ice_vsi_dis_irq(vsi);
+	ice_vsi_close(vsi);
+
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
+	if (vsi->type != ICE_VSI_VF) {
+		/* reclaim SW interrupts back to the common pool */
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+		pf->num_avail_sw_msix += vsi->num_q_vectors;
+	}
+
+	if (!ice_is_safe_mode(pf)) {
+		if (vsi->type == ICE_VSI_PF) {
+			ice_vsi_add_rem_eth_mac(vsi, false);
+			ice_cfg_sw_lldp(vsi, true, false);
+			/* The Rx rule will only exist to remove if the LLDP FW
+			 * engine is currently stopped
+			 */
+			if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+				ice_cfg_sw_lldp(vsi, false, false);
+		}
+	}
+
+	ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+	ice_vsi_delete(vsi);
+	ice_vsi_free_q_vectors(vsi);
+
+	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
+	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	ice_vsi_clear_rings(vsi);
+
+	ice_vsi_put_qs(vsi);
+
+	/* retain SW VSI data structure since it is needed to unregister and
+	 * free VSI netdev when PF is not in reset recovery pending state,\
+	 * for ex: during rmmod.
+	 */
+	if (!ice_is_reset_in_progress(pf->state))
+		ice_vsi_clear(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_rebuild - Rebuild VSI after reset
+ * @vsi: VSI to be rebuild
+ *
+ * Returns 0 on success and negative value on failure
+ */
+int ice_vsi_rebuild(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_vf *vf = NULL;
+	enum ice_status status;
+	struct ice_pf *pf;
+	int ret, i;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	if (vsi->type == ICE_VSI_VF)
+		vf = &pf->vf[vsi->vf_id];
+
+	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+	ice_vsi_free_q_vectors(vsi);
+
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
+	if (vsi->type != ICE_VSI_VF) {
+		/* reclaim SW interrupts back to the common pool */
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+		pf->num_avail_sw_msix += vsi->num_q_vectors;
+		vsi->base_vector = 0;
+	}
+
+	ice_vsi_put_qs(vsi);
+	ice_vsi_clear_rings(vsi);
+	ice_vsi_free_arrays(vsi);
+	ice_dev_onetime_setup(&pf->hw);
+	if (vsi->type == ICE_VSI_VF)
+		ice_vsi_set_num_qs(vsi, vf->vf_id);
+	else
+		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
+
+	ret = ice_vsi_alloc_arrays(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	ice_vsi_get_qs(vsi);
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* Initialize VSI struct elements and create VSI in FW */
+	ret = ice_vsi_init(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_VF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		break;
+	default:
+		break;
+	}
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->alloc_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"VSI %d failed lan queue config, error %d\n",
+			vsi->vsi_num, status);
+		goto err_vectors;
+	}
+	return 0;
+
+err_vectors:
+	ice_vsi_free_q_vectors(vsi);
+err_rings:
+	if (vsi->netdev) {
+		vsi->current_netdev_flags = 0;
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_vsi:
+	ice_vsi_clear(vsi);
+	set_bit(__ICE_RESET_FAILED, pf->state);
+	return ret;
+}
+
+/**
+ * ice_is_reset_in_progress - check for a reset in progress
+ * @state: PF state field
+ */
+bool ice_is_reset_in_progress(unsigned long *state)
+{
+	return test_bit(__ICE_RESET_OICR_RECV, state) ||
+	       test_bit(__ICE_PFR_REQ, state) ||
+	       test_bit(__ICE_CORER_REQ, state) ||
+	       test_bit(__ICE_GLOBR_REQ, state);
+}
+
+#ifdef CONFIG_DCB
+/**
+ * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
+ * @vsi: VSI being configured
+ * @ctx: the context buffer returned from AQ VSI update command
+ */
+static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
+{
+	vsi->info.mapping_flags = ctx->info.mapping_flags;
+	memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
+	       sizeof(vsi->info.q_mapping));
+	memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
+	       sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
+ * @vsi: VSI to be configured
+ * @ena_tc: TC bitmap
+ *
+ * VSI queues expected to be quiesced before calling this function
+ */
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_vsi_ctx *ctx;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	int i, ret = 0;
+	u8 num_tc = 0;
+
+	ice_for_each_traffic_class(i) {
+		/* build bitmap of enabled TCs */
+		if (ena_tc & BIT(i))
+			num_tc++;
+		/* populate max_txqs per TC */
+		max_txqs[i] = vsi->alloc_txq;
+	}
+
+	vsi->tc_cfg.ena_tc = ena_tc;
+	vsi->tc_cfg.numtc = num_tc;
+
+	ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->vf_num = 0;
+	ctx->info = vsi->info;
+
+	ice_vsi_setup_q_map(vsi, ctx);
+
+	/* must to indicate which section of VSI context are being modified */
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+	if (status) {
+		dev_info(&pf->pdev->dev, "Failed VSI Update\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"VSI %d failed TC config, error %d\n",
+			vsi->vsi_num, status);
+		ret = -EIO;
+		goto out;
+	}
+	ice_vsi_update_q_map(vsi, ctx);
+	vsi->info.valid_sections = 0;
+
+	ice_vsi_cfg_netdev_tc(vsi, ena_tc);
+out:
+	devm_kfree(&pf->pdev->dev, ctx);
+	return ret;
+}
+#endif /* CONFIG_DCB */
+
+/**
+ * ice_nvm_version_str - format the NVM version strings
+ * @hw: ptr to the hardware info
+ */
+char *ice_nvm_version_str(struct ice_hw *hw)
+{
+	u8 oem_ver, oem_patch, ver_hi, ver_lo;
+	static char buf[ICE_NVM_VER_LEN];
+	u16 oem_build;
+
+	ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, &ver_hi,
+			    &ver_lo);
+
+	snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", ver_hi, ver_lo,
+		 hw->nvm.eetrack, oem_ver, oem_build, oem_patch);
+
+	return buf;
+}
+
+/**
+ * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
+ * @vsi: the VSI being configured MAC filter
+ * @macaddr: the MAC address to be added.
+ * @set: Add or delete a MAC filter
+ *
+ * Adds or removes MAC address filter entry for VF VSI
+ */
+enum ice_status
+ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set)
+{
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+
+	 /* Update MAC filter list to be added or removed for a VSI */
+	if (ice_add_mac_to_list(vsi, &tmp_add_list, macaddr)) {
+		status = ICE_ERR_NO_MEMORY;
+		goto cfg_mac_fltr_exit;
+	}
+
+	if (set)
+		status = ice_add_mac(&vsi->back->hw, &tmp_add_list);
+	else
+		status = ice_remove_mac(&vsi->back->hw, &tmp_add_list);
+
+cfg_mac_fltr_exit:
+	ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list);
+	return status;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
new file mode 100644
index 0000000..47bc033
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h

@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LIB_H_
+#define _ICE_LIB_H_
+
+#include "ice.h"
+
+struct ice_txq_meta {
+	/* Tx-scheduler element identifier */
+	u32 q_teid;
+	/* Entry in VSI's txq_map bitmap */
+	u16 q_id;
+	/* Relative index of Tx queue within TC */
+	u16 q_handle;
+	/* VSI index that Tx queue belongs to */
+	u16 vsi_idx;
+	/* TC number that Tx queue belongs to */
+	u8 tc;
+};
+
+int
+ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+		    const u8 *macaddr);
+
+void ice_free_fltr_list(struct device *dev, struct list_head *h);
+
+void ice_update_eth_stats(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_msix(struct ice_vsi *vsi);
+
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+
+void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		       struct ice_txq_meta *txq_meta);
+
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+#endif /* CONFIG_PCI_IOV */
+
+int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
+
+int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
+
+int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi);
+
+int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena);
+
+int ice_vsi_start_rx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_stop_rx_rings(struct ice_vsi *vsi);
+
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num);
+
+int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
+
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
+void ice_vsi_delete(struct ice_vsi *vsi);
+
+int ice_vsi_clear(struct ice_vsi *vsi);
+
+#ifdef CONFIG_DCB
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
+#endif /* CONFIG_DCB */
+
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+	      enum ice_vsi_type type, u16 vf_id);
+
+void ice_napi_del(struct ice_vsi *vsi);
+
+int ice_vsi_release(struct ice_vsi *vsi);
+
+void ice_vsi_close(struct ice_vsi *vsi);
+
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
+
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
+
+int ice_vsi_rebuild(struct ice_vsi *vsi);
+
+bool ice_is_reset_in_progress(unsigned long *state);
+
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+
+void ice_vsi_put_qs(struct ice_vsi *vsi);
+
+#ifdef CONFIG_DCB
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+#endif /* CONFIG_DCB */
+
+void ice_vsi_dis_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
+
+char *ice_nvm_version_str(struct ice_hw *hw);
+
+enum ice_status
+ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
+
+bool ice_is_safe_mode(struct ice_pf *pf);
+#endif /* !_ICE_LIB_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 3f047bb..214cd6e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c

@@ -6,17 +6,30 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
 
-#define DRV_VERSION	"ice-0.7.0-k"
+#define DRV_VERSION_MAJOR 0
+#define DRV_VERSION_MINOR 8
+#define DRV_VERSION_BUILD 1
+
+#define DRV_VERSION	__stringify(DRV_VERSION_MAJOR) "." \
+			__stringify(DRV_VERSION_MINOR) "." \
+			__stringify(DRV_VERSION_BUILD) "-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
 const char ice_drv_ver[] = DRV_VERSION;
 static const char ice_driver_string[] = DRV_SUMMARY;
 static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
 
+/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
+#define ICE_DDP_PKG_PATH	"intel/ice/ddp/"
+#define ICE_DDP_PKG_FILE	ICE_DDP_PKG_PATH "ice.pkg"
+
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
+MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
 
 static int debug = -1;
 module_param(debug, int, 0644);
@@ -27,188 +40,145 @@
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 
 static struct workqueue_struct *ice_wq;
+static const struct net_device_ops ice_netdev_safe_mode_ops;
 static const struct net_device_ops ice_netdev_ops;
 
-static void ice_pf_dis_all_vsi(struct ice_pf *pf);
-static void ice_rebuild(struct ice_pf *pf);
-static int ice_vsi_release(struct ice_vsi *vsi);
-static void ice_update_vsi_stats(struct ice_vsi *vsi);
-static void ice_update_pf_stats(struct ice_pf *pf);
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
+
+static void ice_vsi_release_all(struct ice_pf *pf);
 
 /**
- * ice_get_free_slot - get the next non-NULL location index in array
- * @array: array to search
- * @size: size of the array
- * @curr: last known occupied index to be used as a search hint
- *
- * void * is being used to keep the functionality generic. This lets us use this
- * function on any array of pointers.
+ * ice_get_tx_pending - returns number of Tx descriptors not processed
+ * @ring: the ring of descriptors
  */
-static int ice_get_free_slot(void *array, int size, int curr)
+static u16 ice_get_tx_pending(struct ice_ring *ring)
 {
-	int **tmp_array = (int **)array;
-	int next;
+	u16 head, tail;
 
-	if (curr < (size - 1) && !tmp_array[curr + 1]) {
-		next = curr + 1;
-	} else {
-		int i = 0;
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
 
-		while ((i < size) && (tmp_array[i]))
-			i++;
-		if (i == size)
-			next = ICE_NO_VSI;
-		else
-			next = i;
-	}
-	return next;
-}
-
-/**
- * ice_search_res - Search the tracker for a block of resources
- * @res: pointer to the resource
- * @needed: size of the block needed
- * @id: identifier to track owner
- * Returns the base item index of the block, or -ENOMEM for error
- */
-static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
-{
-	int start = res->search_hint;
-	int end = start;
-
-	id |= ICE_RES_VALID_BIT;
-
-	do {
-		/* skip already allocated entries */
-		if (res->list[end++] & ICE_RES_VALID_BIT) {
-			start = end;
-			if ((start + needed) > res->num_entries)
-				break;
-		}
-
-		if (end == (start + needed)) {
-			int i = start;
-
-			/* there was enough, so assign it to the requestor */
-			while (i != end)
-				res->list[i++] = id;
-
-			if (end == res->num_entries)
-				end = 0;
-
-			res->search_hint = end;
-			return start;
-		}
-	} while (1);
-
-	return -ENOMEM;
-}
-
-/**
- * ice_get_res - get a block of resources
- * @pf: board private structure
- * @res: pointer to the resource
- * @needed: size of the block needed
- * @id: identifier to track owner
- *
- * Returns the base item index of the block, or -ENOMEM for error
- * The search_hint trick and lack of advanced fit-finding only works
- * because we're highly likely to have all the same sized requests.
- * Linear search time and any fragmentation should be minimal.
- */
-static int
-ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
-{
-	int ret;
-
-	if (!res || !pf)
-		return -EINVAL;
-
-	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
-		dev_err(&pf->pdev->dev,
-			"param err: needed=%d, num_entries = %d id=0x%04x\n",
-			needed, res->num_entries, id);
-		return -EINVAL;
-	}
-
-	/* search based on search_hint */
-	ret = ice_search_res(res, needed, id);
-
-	if (ret < 0) {
-		/* previous search failed. Reset search hint and try again */
-		res->search_hint = 0;
-		ret = ice_search_res(res, needed, id);
-	}
-
-	return ret;
-}
-
-/**
- * ice_free_res - free a block of resources
- * @res: pointer to the resource
- * @index: starting index previously returned by ice_get_res
- * @id: identifier to track owner
- * Returns number of resources freed
- */
-static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
-{
-	int count = 0;
-	int i;
-
-	if (!res || index >= res->num_entries)
-		return -EINVAL;
-
-	id |= ICE_RES_VALID_BIT;
-	for (i = index; i < res->num_entries && res->list[i] == id; i++) {
-		res->list[i] = 0;
-		count++;
-	}
-
-	return count;
-}
-
-/**
- * ice_add_mac_to_list - Add a mac address filter entry to the list
- * @vsi: the VSI to be forwarded to
- * @add_list: pointer to the list which contains MAC filter entries
- * @macaddr: the MAC address to be added.
- *
- * Adds mac address filter entry to the temp list
- *
- * Returns 0 on success or ENOMEM on failure.
- */
-static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
-			       const u8 *macaddr)
-{
-	struct ice_fltr_list_entry *tmp;
-	struct ice_pf *pf = vsi->back;
-
-	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
-	if (!tmp)
-		return -ENOMEM;
-
-	tmp->fltr_info.flag = ICE_FLTR_TX;
-	tmp->fltr_info.src = vsi->vsi_num;
-	tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
-	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
-	ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr);
-
-	INIT_LIST_HEAD(&tmp->list_entry);
-	list_add(&tmp->list_entry, add_list);
-
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
 	return 0;
 }
 
 /**
- * ice_add_mac_to_sync_list - creates list of mac addresses to be synced
+ * ice_check_for_hang_subtask - check for and recover hung queues
+ * @pf: pointer to PF struct
+ */
+static void ice_check_for_hang_subtask(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = NULL;
+	struct ice_hw *hw;
+	unsigned int i;
+	int packets;
+	u32 v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
+			vsi = pf->vsi[v];
+			break;
+		}
+
+	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
+		return;
+
+	hw = &vsi->back->hw;
+
+	for (i = 0; i < vsi->num_txq; i++) {
+		struct ice_ring *tx_ring = vsi->tx_rings[i];
+
+		if (tx_ring && tx_ring->desc) {
+			/* If packet counter has not changed the queue is
+			 * likely stalled, so force an interrupt for this
+			 * queue.
+			 *
+			 * prev_pkt would be negative if there was no
+			 * pending work.
+			 */
+			packets = tx_ring->stats.pkts & INT_MAX;
+			if (tx_ring->tx_stats.prev_pkt == packets) {
+				/* Trigger sw interrupt to revive the queue */
+				ice_trigger_sw_intr(hw, tx_ring->q_vector);
+				continue;
+			}
+
+			/* Memory barrier between read of packet count and call
+			 * to ice_get_tx_pending()
+			 */
+			smp_rmb();
+			tx_ring->tx_stats.prev_pkt =
+			    ice_get_tx_pending(tx_ring) ? packets : -1;
+		}
+	}
+}
+
+/**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+	enum ice_status status;
+	u8 broadcast[ETH_ALEN];
+	struct ice_vsi *vsi;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EINVAL;
+
+	/* To add a MAC filter, first add the MAC to a list and then
+	 * pass the list to ice_add_mac.
+	 */
+
+	 /* Add a unicast MAC filter so the VSI can get its packets */
+	status = ice_vsi_cfg_mac_fltr(vsi, vsi->port_info->mac.perm_addr, true);
+	if (status)
+		goto unregister;
+
+	/* VSI needs to receive broadcast traffic, so add the broadcast
+	 * MAC address to the list as well.
+	 */
+	eth_broadcast_addr(broadcast);
+	status = ice_vsi_cfg_mac_fltr(vsi, broadcast, true);
+	if (status)
+		goto unregister;
+
+	return 0;
+unregister:
+	/* We aren't useful with no MAC filters, so unregister if we
+	 * had an error
+	 */
+	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
+		dev_err(&pf->pdev->dev,
+			"Could not add MAC filters error %d. Unregistering device\n",
+			status);
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	return -EIO;
+}
+
+/**
+ * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
  * @netdev: the net device on which the sync is happening
- * @addr: mac address to sync
+ * @addr: MAC address to sync
  *
  * This is a callback function which is called by the in kernel device sync
  * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
  * populates the tmp_sync_list, which is later used by ice_add_mac to add the
- * mac filters from the hardware.
+ * MAC filters from the hardware.
  */
 static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
 {
@@ -222,14 +192,14 @@
 }
 
 /**
- * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced
+ * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
  * @netdev: the net device on which the unsync is happening
- * @addr: mac address to unsync
+ * @addr: MAC address to unsync
  *
  * This is a callback function which is called by the in kernel device unsync
  * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
  * populates the tmp_unsync_list, which is later used by ice_remove_mac to
- * delete the mac filters from the hardware.
+ * delete the MAC filters from the hardware.
  */
 static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
 {
@@ -243,24 +213,6 @@
 }
 
 /**
- * ice_free_fltr_list - free filter lists helper
- * @dev: pointer to the device struct
- * @h: pointer to the list head to be freed
- *
- * Helper function to free filter lists previously created using
- * ice_add_mac_to_list
- */
-static void ice_free_fltr_list(struct device *dev, struct list_head *h)
-{
-	struct ice_fltr_list_entry *e, *tmp;
-
-	list_for_each_entry_safe(e, tmp, h, list_entry) {
-		list_del(&e->list_entry);
-		devm_kfree(dev, e);
-	}
-}
-
-/**
  * ice_vsi_fltr_changed - check if filter state changed
  * @vsi: VSI to be checked
  *
@@ -274,6 +226,39 @@
 }
 
 /**
+ * ice_cfg_promisc - Enable or disable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ * @set_promisc: enable or disable promisc flag request
+ *
+ */
+static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	enum ice_status status = 0;
+
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	if (vsi->vlan_ena) {
+		status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
+						  set_promisc);
+	} else {
+		if (set_promisc)
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     0);
+		else
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       0);
+	}
+
+	if (status)
+		return -EIO;
+
+	return 0;
+}
+
+/**
  * ice_vsi_sync_fltr - Update the VSI filter list to the HW
  * @vsi: ptr to the VSI
  *
@@ -288,6 +273,7 @@
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status status = 0;
 	u32 changed_flags = 0;
+	u8 promisc_m;
 	int err = 0;
 
 	if (!vsi->netdev)
@@ -317,7 +303,7 @@
 		netif_addr_unlock_bh(netdev);
 	}
 
-	/* Remove mac addresses in the unsync list */
+	/* Remove MAC addresses in the unsync list */
 	status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
 	ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
 	if (status) {
@@ -329,12 +315,16 @@
 		}
 	}
 
-	/* Add mac addresses in the sync list */
+	/* Add MAC addresses in the sync list */
 	status = ice_add_mac(hw, &vsi->tmp_sync_list);
 	ice_free_fltr_list(dev, &vsi->tmp_sync_list);
-	if (status) {
+	/* If filter is added successfully or already exists, do not go into
+	 * 'if' condition and report it as error. Instead continue processing
+	 * rest of the function.
+	 */
+	if (status && status != ICE_ERR_ALREADY_EXISTS) {
 		netdev_err(netdev, "Failed to add MAC filters\n");
-		/* If there is no more space for new umac filters, vsi
+		/* If there is no more space for new umac filters, VSI
 		 * should go into promiscuous mode. There should be some
 		 * space reserved for promiscuous filters.
 		 */
@@ -351,49 +341,56 @@
 		}
 	}
 	/* check for changes in promiscuous modes */
-	if (changed_flags & IFF_ALLMULTI)
-		netdev_warn(netdev, "Unsupported configuration\n");
+	if (changed_flags & IFF_ALLMULTI) {
+		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+			if (vsi->vlan_ena)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+
+			err = ice_cfg_promisc(vsi, promisc_m, true);
+			if (err) {
+				netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		} else if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
+			if (vsi->vlan_ena)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+
+			err = ice_cfg_promisc(vsi, promisc_m, false);
+			if (err) {
+				netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		}
+	}
 
 	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
 	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
 		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 		if (vsi->current_netdev_flags & IFF_PROMISC) {
-			/* Apply TX filter rule to get traffic from VMs */
-			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
-						  ICE_FLTR_TX);
-			if (status) {
-				netdev_err(netdev, "Error setting default VSI %i tx rule\n",
-					   vsi->vsi_num);
-				vsi->current_netdev_flags &= ~IFF_PROMISC;
-				err = -EIO;
-				goto out_promisc;
-			}
-			/* Apply RX filter rule to get traffic from wire */
-			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
+			/* Apply Rx filter rule to get traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
 						  ICE_FLTR_RX);
 			if (status) {
-				netdev_err(netdev, "Error setting default VSI %i rx rule\n",
+				netdev_err(netdev, "Error setting default VSI %i Rx rule\n",
 					   vsi->vsi_num);
 				vsi->current_netdev_flags &= ~IFF_PROMISC;
 				err = -EIO;
 				goto out_promisc;
 			}
 		} else {
-			/* Clear TX filter rule to stop traffic from VMs */
-			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
-						  ICE_FLTR_TX);
-			if (status) {
-				netdev_err(netdev, "Error clearing default VSI %i tx rule\n",
-					   vsi->vsi_num);
-				vsi->current_netdev_flags |= IFF_PROMISC;
-				err = -EIO;
-				goto out_promisc;
-			}
-			/* Clear filter RX to remove traffic from wire */
-			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
+			/* Clear Rx filter to remove traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
 						  ICE_FLTR_RX);
 			if (status) {
-				netdev_err(netdev, "Error clearing default VSI %i rx rule\n",
+				netdev_err(netdev, "Error clearing default VSI %i Rx rule\n",
 					   vsi->vsi_num);
 				vsi->current_netdev_flags |= IFF_PROMISC;
 				err = -EIO;
@@ -428,7 +425,7 @@
 
 	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
 
-	for (v = 0; v < pf->num_alloc_vsi; v++)
+	ice_for_each_vsi(pf, v)
 		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
 		    ice_vsi_sync_fltr(pf->vsi[v])) {
 			/* come back and try again later */
@@ -438,12 +435,48 @@
 }
 
 /**
- * ice_is_reset_recovery_pending - schedule a reset
- * @state: pf state field
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
  */
-static bool ice_is_reset_recovery_pending(unsigned long int *state)
+static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
 {
-	return test_bit(__ICE_RESET_RECOVERY_PENDING, state);
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			ice_stop(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		} else {
+			ice_vsi_close(vsi);
+		}
+	}
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ * @locked: is the rtnl_lock already held
+ */
+#ifdef CONFIG_DCB
+void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+#else
+static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+#endif /* CONFIG_DCB */
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_dis_vsi(pf->vsi[v], locked);
 }
 
 /**
@@ -456,23 +489,31 @@
 ice_prepare_for_reset(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
-	u32 v;
+	int i;
 
-	ice_for_each_vsi(pf, v)
-		if (pf->vsi[v])
-			ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num);
+	/* already prepared for reset */
+	if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
+		return;
 
-	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
+	/* Notify VFs of impending reset */
+	if (ice_check_sq_alive(hw, &hw->mailboxq))
+		ice_vc_notify_reset(pf);
 
+	/* Disable VFs until reset is completed */
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		ice_set_vf_state_qs_dis(&pf->vf[i]);
+
+	/* clear SW filtering DB */
+	ice_clear_hw_tbls(hw);
 	/* disable the VSIs and their queues that are not already DOWN */
-	/* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */
-	ice_pf_dis_all_vsi(pf);
+	ice_pf_dis_all_vsi(pf, false);
 
-	ice_for_each_vsi(pf, v)
-		if (pf->vsi[v])
-			pf->vsi[v]->vsi_num = 0;
+	if (hw->port_info)
+		ice_sched_clear_port(hw->port_info);
 
 	ice_shutdown_all_ctrlq(hw);
+
+	set_bit(__ICE_PREPARED_FOR_RESET, pf->state);
 }
 
 /**
@@ -489,27 +530,30 @@
 	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
 	WARN_ON(in_interrupt());
 
-	/* PFR is a bit of a special case because it doesn't result in an OICR
-	 * interrupt. So for PFR, we prepare for reset, issue the reset and
-	 * rebuild sequentially.
-	 */
-	if (reset_type == ICE_RESET_PFR) {
-		set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
-		ice_prepare_for_reset(pf);
-	}
+	ice_prepare_for_reset(pf);
 
 	/* trigger the reset */
 	if (ice_reset(hw, reset_type)) {
 		dev_err(dev, "reset %d failed\n", reset_type);
 		set_bit(__ICE_RESET_FAILED, pf->state);
-		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		clear_bit(__ICE_RESET_OICR_RECV, pf->state);
+		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(__ICE_PFR_REQ, pf->state);
+		clear_bit(__ICE_CORER_REQ, pf->state);
+		clear_bit(__ICE_GLOBR_REQ, pf->state);
 		return;
 	}
 
+	/* PFR is a bit of a special case because it doesn't result in an OICR
+	 * interrupt. So for PFR, rebuild after the reset and clear the reset-
+	 * associated state bits.
+	 */
 	if (reset_type == ICE_RESET_PFR) {
 		pf->pfr_count++;
-		ice_rebuild(pf);
-		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		ice_rebuild(pf, reset_type);
+		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(__ICE_PFR_REQ, pf->state);
+		ice_reset_all_vfs(pf, true);
 	}
 }
 
@@ -519,77 +563,84 @@
  */
 static void ice_reset_subtask(struct ice_pf *pf)
 {
-	enum ice_reset_req reset_type;
-
-	rtnl_lock();
+	enum ice_reset_req reset_type = ICE_RESET_INVAL;
 
 	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
-	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what
-	 * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in
-	 * pf->state. So if reset/recovery is pending (as indicated by this bit)
-	 * we do a rebuild and return.
+	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
+	 * of reset is pending and sets bits in pf->state indicating the reset
+	 * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set
+	 * prepare for pending reset if not already (for PF software-initiated
+	 * global resets the software should already be prepared for it as
+	 * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated
+	 * by firmware or software on other PFs, that bit is not set so prepare
+	 * for the reset now), poll for reset done, rebuild and return.
 	 */
-	if (ice_is_reset_recovery_pending(pf->state)) {
-		clear_bit(__ICE_GLOBR_RECV, pf->state);
-		clear_bit(__ICE_CORER_RECV, pf->state);
+	if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) {
+		/* Perform the largest reset requested */
+		if (test_and_clear_bit(__ICE_CORER_RECV, pf->state))
+			reset_type = ICE_RESET_CORER;
+		if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state))
+			reset_type = ICE_RESET_GLOBR;
+		if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state))
+			reset_type = ICE_RESET_EMPR;
+		/* return if no valid reset type requested */
+		if (reset_type == ICE_RESET_INVAL)
+			return;
 		ice_prepare_for_reset(pf);
 
 		/* make sure we are ready to rebuild */
-		if (ice_check_reset(&pf->hw))
+		if (ice_check_reset(&pf->hw)) {
 			set_bit(__ICE_RESET_FAILED, pf->state);
-		else
-			ice_rebuild(pf);
-		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
-		goto unlock;
+		} else {
+			/* done with reset. start rebuild */
+			pf->hw.reset_ongoing = false;
+			ice_rebuild(pf, reset_type);
+			/* clear bit to resume normal operations, but
+			 * ICE_NEEDS_RESTART bit is set in case rebuild failed
+			 */
+			clear_bit(__ICE_RESET_OICR_RECV, pf->state);
+			clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+			clear_bit(__ICE_PFR_REQ, pf->state);
+			clear_bit(__ICE_CORER_REQ, pf->state);
+			clear_bit(__ICE_GLOBR_REQ, pf->state);
+			ice_reset_all_vfs(pf, true);
+		}
+
+		return;
 	}
 
 	/* No pending resets to finish processing. Check for new resets */
-	if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state))
-		reset_type = ICE_RESET_GLOBR;
-	else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state))
-		reset_type = ICE_RESET_CORER;
-	else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state))
+	if (test_bit(__ICE_PFR_REQ, pf->state))
 		reset_type = ICE_RESET_PFR;
-	else
-		goto unlock;
+	if (test_bit(__ICE_CORER_REQ, pf->state))
+		reset_type = ICE_RESET_CORER;
+	if (test_bit(__ICE_GLOBR_REQ, pf->state))
+		reset_type = ICE_RESET_GLOBR;
+	/* If no valid reset type requested just return */
+	if (reset_type == ICE_RESET_INVAL)
+		return;
 
-	/* reset if not already down or resetting */
+	/* reset if not already down or busy */
 	if (!test_bit(__ICE_DOWN, pf->state) &&
 	    !test_bit(__ICE_CFG_BUSY, pf->state)) {
 		ice_do_reset(pf, reset_type);
 	}
-
-unlock:
-	rtnl_unlock();
 }
 
 /**
- * ice_watchdog_subtask - periodic tasks not using event driven scheduling
- * @pf: board private structure
+ * ice_print_topo_conflict - print topology conflict message
+ * @vsi: the VSI whose topology status is being checked
  */
-static void ice_watchdog_subtask(struct ice_pf *pf)
+static void ice_print_topo_conflict(struct ice_vsi *vsi)
 {
-	int i;
-
-	/* if interface is down do nothing */
-	if (test_bit(__ICE_DOWN, pf->state) ||
-	    test_bit(__ICE_CFG_BUSY, pf->state))
-		return;
-
-	/* make sure we don't do these things too often */
-	if (time_before(jiffies,
-			pf->serv_tmr_prev + pf->serv_tmr_period))
-		return;
-
-	pf->serv_tmr_prev = jiffies;
-
-	/* Update the stats for active netdevs so the network stack
-	 * can look at updated numbers whenever it cares to
-	 */
-	ice_update_pf_stats(pf);
-	for (i = 0; i < pf->num_alloc_vsi; i++)
-		if (pf->vsi[i] && pf->vsi[i]->netdev)
-			ice_update_vsi_stats(pf->vsi[i]);
+	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
+	case ICE_AQ_LINK_TOPO_CONFLICT:
+	case ICE_AQ_LINK_MEDIA_CONFLICT:
+		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
+		break;
+	default:
+		break;
+	}
 }
 
 /**
@@ -599,8 +650,16 @@
  */
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 {
+	struct ice_aqc_get_phy_caps_data *caps;
+	enum ice_status status;
+	const char *fec_req;
 	const char *speed;
+	const char *fec;
 	const char *fc;
+	const char *an;
+
+	if (!vsi)
+		return;
 
 	if (vsi->current_isup == isup)
 		return;
@@ -613,6 +672,12 @@
 	}
 
 	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		speed = "100 G";
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		speed = "50 G";
+		break;
 	case ICE_AQ_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
@@ -644,21 +709,191 @@
 
 	switch (vsi->port_info->fc.current_mode) {
 	case ICE_FC_FULL:
-		fc = "RX/TX";
+		fc = "Rx/Tx";
 		break;
 	case ICE_FC_TX_PAUSE:
-		fc = "TX";
+		fc = "Tx";
 		break;
 	case ICE_FC_RX_PAUSE:
-		fc = "RX";
+		fc = "Rx";
+		break;
+	case ICE_FC_NONE:
+		fc = "None";
 		break;
 	default:
 		fc = "Unknown";
 		break;
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
-		    speed, fc);
+	/* Get FEC mode based on negotiated link info */
+	switch (vsi->port_info->phy.link_info.fec_info) {
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fec = "RS-FEC";
+		break;
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fec = "FC-FEC/BASE-R";
+		break;
+	default:
+		fec = "NONE";
+		break;
+	}
+
+	/* check if autoneg completed, might be false due to not supported */
+	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
+		an = "True";
+	else
+		an = "False";
+
+	/* Get FEC mode requested based on PHY caps last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		fec_req = "Unknown";
+		goto done;
+	}
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+	if (status)
+		netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		fec_req = "RS-FEC";
+	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fec_req = "FC-FEC/BASE-R";
+	else
+		fec_req = "NONE";
+
+	devm_kfree(&vsi->back->pdev->dev, caps);
+
+done:
+	netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n",
+		    speed, fec_req, fec, an, fc);
+	ice_print_topo_conflict(vsi);
+}
+
+/**
+ * ice_vsi_link_event - update the VSI's netdev
+ * @vsi: the VSI on which the link event occurred
+ * @link_up: whether or not the VSI needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+	if (!vsi)
+		return;
+
+	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev)
+		return;
+
+	if (vsi->type == ICE_VSI_PF) {
+		if (link_up == netif_carrier_ok(vsi->netdev))
+			return;
+
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+	}
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: PF that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ * @link_up: true if the physical link is up and false if it is down
+ * @link_speed: current link speed received from the link event
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
+	       u16 link_speed)
+{
+	struct ice_phy_info *phy_info;
+	struct ice_vsi *vsi;
+	u16 old_link_speed;
+	bool old_link;
+	int result;
+
+	phy_info = &pi->phy;
+	phy_info->link_info_old = phy_info->link_info;
+
+	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+	old_link_speed = phy_info->link_info_old.link_speed;
+
+	/* update the link info structures and re-enable link events,
+	 * don't bail on failure due to other book keeping needed
+	 */
+	result = ice_update_link_info(pi);
+	if (result)
+		dev_dbg(&pf->pdev->dev,
+			"Failed to update link status and re-enable link events for port %d\n",
+			pi->lport);
+
+	/* if the old link up/down and speed is the same as the new */
+	if (link_up == old_link && link_speed == old_link_speed)
+		return result;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi || !vsi->port_info)
+		return -EINVAL;
+
+	/* turn off PHY if media was removed */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
+	    !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+		result = ice_aq_set_link_restart_an(pi, false, NULL);
+		if (result) {
+			dev_dbg(&pf->pdev->dev,
+				"Failed to set link down, VSI %d error %d\n",
+				vsi->vsi_num, result);
+			return result;
+		}
+	}
+
+	ice_vsi_link_event(vsi, link_up);
+	ice_print_link_msg(vsi, link_up);
+
+	if (pf->num_alloc_vfs)
+		ice_vc_notify_link_state(pf);
+
+	return result;
+}
+
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies,
+			pf->serv_tmr_prev + pf->serv_tmr_period))
+		return;
+
+	pf->serv_tmr_prev = jiffies;
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	ice_update_pf_stats(pf);
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			ice_update_vsi_stats(pf->vsi[i]);
 }
 
 /**
@@ -692,106 +927,25 @@
 }
 
 /**
- * ice_vsi_link_event - update the vsi's netdev
- * @vsi: the vsi on which the link event occurred
- * @link_up: whether or not the vsi needs to be set up or down
- */
-static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
-{
-	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
-		return;
-
-	if (vsi->type == ICE_VSI_PF) {
-		if (!vsi->netdev) {
-			dev_dbg(&vsi->back->pdev->dev,
-				"vsi->netdev is not initialized!\n");
-			return;
-		}
-		if (link_up) {
-			netif_carrier_on(vsi->netdev);
-			netif_tx_wake_all_queues(vsi->netdev);
-		} else {
-			netif_carrier_off(vsi->netdev);
-			netif_tx_stop_all_queues(vsi->netdev);
-		}
-	}
-}
-
-/**
- * ice_link_event - process the link event
- * @pf: pf that the link event is associated with
- * @pi: port_info for the port that the link event is associated with
- *
- * Returns -EIO if ice_get_link_status() fails
- * Returns 0 on success
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: PF that the link event is associated with
+ * @event: event structure containing link status info
  */
 static int
-ice_link_event(struct ice_pf *pf, struct ice_port_info *pi)
+ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
 {
-	u8 new_link_speed, old_link_speed;
-	struct ice_phy_info *phy_info;
-	bool new_link_same_as_old;
-	bool new_link, old_link;
-	u8 lport;
-	u16 v;
-
-	phy_info = &pi->phy;
-	phy_info->link_info_old = phy_info->link_info;
-	/* Force ice_get_link_status() to update link info */
-	phy_info->get_link_info = true;
-
-	old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
-	old_link_speed = phy_info->link_info_old.link_speed;
-
-	lport = pi->lport;
-	if (ice_get_link_status(pi, &new_link)) {
-		dev_dbg(&pf->pdev->dev,
-			"Could not get link status for port %d\n", lport);
-		return -EIO;
-	}
-
-	new_link_speed = phy_info->link_info.link_speed;
-
-	new_link_same_as_old = (new_link == old_link &&
-				new_link_speed == old_link_speed);
-
-	ice_for_each_vsi(pf, v) {
-		struct ice_vsi *vsi = pf->vsi[v];
-
-		if (!vsi || !vsi->port_info)
-			continue;
-
-		if (new_link_same_as_old &&
-		    (test_bit(__ICE_DOWN, vsi->state) ||
-		    new_link == netif_carrier_ok(vsi->netdev)))
-			continue;
-
-		if (vsi->port_info->lport == lport) {
-			ice_print_link_msg(vsi, new_link);
-			ice_vsi_link_event(vsi, new_link);
-		}
-	}
-
-	return 0;
-}
-
-/**
- * ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
- *
- * Return -EINVAL if port_info is null
- * Return status on succes
- */
-static int ice_handle_link_event(struct ice_pf *pf)
-{
+	struct ice_aqc_get_link_status_data *link_data;
 	struct ice_port_info *port_info;
 	int status;
 
+	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
 	port_info = pf->hw.port_info;
 	if (!port_info)
 		return -EINVAL;
 
-	status = ice_link_event(pf, port_info);
+	status = ice_link_event(pf, port_info,
+				!!(link_data->link_info & ICE_AQ_LINK_UP),
+				le16_to_cpu(link_data->link_speed));
 	if (status)
 		dev_dbg(&pf->pdev->dev,
 			"Could not process link event, error %d\n", status);
@@ -822,6 +976,10 @@
 		cq = &hw->adminq;
 		qtype = "Admin";
 		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		qtype = "Mailbox";
+		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
 			 q_type);
@@ -899,10 +1057,19 @@
 
 		switch (opcode) {
 		case ice_aqc_opc_get_link_status:
-			if (ice_handle_link_event(pf))
+			if (ice_handle_link_event(pf, &event))
 				dev_err(&pf->pdev->dev,
 					"Could not handle link event\n");
 			break;
+		case ice_mbx_opc_send_msg_to_pf:
+			ice_vc_process_vf_msg(pf, &event);
+			break;
+		case ice_aqc_opc_fw_logging:
+			ice_output_fw_log(hw, &event.desc, event.msg_buf);
+			break;
+		case ice_aqc_opc_lldp_set_mib_change:
+			ice_dcb_process_lldp_set_mib_change(pf, &event);
+			break;
 		default:
 			dev_dbg(&pf->pdev->dev,
 				"%s Receive Queue unknown event 0x%04x ignored\n",
@@ -959,6 +1126,28 @@
 }
 
 /**
+ * ice_clean_mailboxq_subtask - clean the MailboxQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
+		return;
+
+	clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+
+	if (ice_ctrlq_pending(hw, &hw->mailboxq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
+
+	ice_flush(hw);
+}
+
+/**
  * ice_service_task_schedule - schedule the service task to wake up
  * @pf: board private structure
  *
@@ -966,8 +1155,9 @@
  */
 static void ice_service_task_schedule(struct ice_pf *pf)
 {
-	if (!test_bit(__ICE_DOWN, pf->state) &&
-	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state))
+	if (!test_bit(__ICE_SERVICE_DIS, pf->state) &&
+	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) &&
+	    !test_bit(__ICE_NEEDS_RESTART, pf->state))
 		queue_work(ice_wq, &pf->serv_task);
 }
 
@@ -985,6 +1175,34 @@
 }
 
 /**
+ * ice_service_task_stop - stop service task and cancel works
+ * @pf: board private structure
+ */
+static void ice_service_task_stop(struct ice_pf *pf)
+{
+	set_bit(__ICE_SERVICE_DIS, pf->state);
+
+	if (pf->serv_tmr.function)
+		del_timer_sync(&pf->serv_tmr);
+	if (pf->serv_task.func)
+		cancel_work_sync(&pf->serv_task);
+
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_task_restart - restart service task and schedule works
+ * @pf: board private structure
+ *
+ * This function is needed for suspend and resume works (e.g WoL scenario)
+ */
+static void ice_service_task_restart(struct ice_pf *pf)
+{
+	clear_bit(__ICE_SERVICE_DIS, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
  * ice_service_timer - timer callback to schedule service task
  * @t: pointer to timer_list
  */
@@ -997,6 +1215,274 @@
 }
 
 /**
+ * ice_handle_mdd_event - handle malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from service task. OICR interrupt handler indicates MDD event
+ */
+static void ice_handle_mdd_event(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool mdd_detected = false;
+	u32 reg;
+	int i;
+
+	if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state))
+		return;
+
+	/* find what triggered the MDD event */
+	reg = rd32(hw, GL_MDET_TX_PQM);
+	if (reg & GL_MDET_TX_PQM_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
+				GL_MDET_TX_PQM_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
+				GL_MDET_TX_PQM_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
+				GL_MDET_TX_PQM_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
+				GL_MDET_TX_PQM_QNUM_S);
+
+		if (netif_msg_tx_err(pf))
+			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
+		mdd_detected = true;
+	}
+
+	reg = rd32(hw, GL_MDET_TX_TCLAN);
+	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
+				GL_MDET_TX_TCLAN_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
+				GL_MDET_TX_TCLAN_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
+				GL_MDET_TX_TCLAN_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
+				GL_MDET_TX_TCLAN_QNUM_S);
+
+		if (netif_msg_rx_err(pf))
+			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
+		mdd_detected = true;
+	}
+
+	reg = rd32(hw, GL_MDET_RX);
+	if (reg & GL_MDET_RX_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
+				GL_MDET_RX_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
+				GL_MDET_RX_VF_NUM_S;
+		u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
+				GL_MDET_RX_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
+				GL_MDET_RX_QNUM_S);
+
+		if (netif_msg_rx_err(pf))
+			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_RX, 0xffffffff);
+		mdd_detected = true;
+	}
+
+	if (mdd_detected) {
+		bool pf_mdd_detected = false;
+
+		reg = rd32(hw, PF_MDET_TX_PQM);
+		if (reg & PF_MDET_TX_PQM_VALID_M) {
+			wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
+			dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+			pf_mdd_detected = true;
+		}
+
+		reg = rd32(hw, PF_MDET_TX_TCLAN);
+		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
+			wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
+			dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+			pf_mdd_detected = true;
+		}
+
+		reg = rd32(hw, PF_MDET_RX);
+		if (reg & PF_MDET_RX_VALID_M) {
+			wr32(hw, PF_MDET_RX, 0xFFFF);
+			dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n");
+			pf_mdd_detected = true;
+		}
+		/* Queue belongs to the PF initiate a reset */
+		if (pf_mdd_detected) {
+			set_bit(__ICE_NEEDS_RESTART, pf->state);
+			ice_service_task_schedule(pf);
+		}
+	}
+
+	/* check to see if one of the VFs caused the MDD */
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		bool vf_mdd_detected = false;
+
+		reg = rd32(hw, VP_MDET_TX_PQM(i));
+		if (reg & VP_MDET_TX_PQM_VALID_M) {
+			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
+			vf_mdd_detected = true;
+			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+				 i);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TCLAN(i));
+		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
+			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
+			vf_mdd_detected = true;
+			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+				 i);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TDPU(i));
+		if (reg & VP_MDET_TX_TDPU_VALID_M) {
+			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
+			vf_mdd_detected = true;
+			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+				 i);
+		}
+
+		reg = rd32(hw, VP_MDET_RX(i));
+		if (reg & VP_MDET_RX_VALID_M) {
+			wr32(hw, VP_MDET_RX(i), 0xFFFF);
+			vf_mdd_detected = true;
+			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
+				 i);
+		}
+
+		if (vf_mdd_detected) {
+			vf->num_mdd_events++;
+			if (vf->num_mdd_events &&
+			    vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD)
+				dev_info(&pf->pdev->dev,
+					 "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
+					 i, vf->num_mdd_events);
+		}
+	}
+}
+
+/**
+ * ice_force_phys_link_state - Force the physical link state
+ * @vsi: VSI to force the physical link state to up/down
+ * @link_up: true/false indicates to set the physical link to up/down
+ *
+ * Force the physical link state by getting the current PHY capabilities from
+ * hardware and setting the PHY config based on the determined capabilities. If
+ * link changes a link event will be triggered because both the Enable Automatic
+ * Link Update and LESM Enable bits are set when setting the PHY capabilities.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_port_info *pi;
+	struct device *dev;
+	int retcode;
+
+	if (!vsi || !vsi->port_info || !vsi->back)
+		return -EINVAL;
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	dev = &vsi->back->pdev->dev;
+
+	pi = vsi->port_info;
+
+	pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+				      NULL);
+	if (retcode) {
+		dev_err(dev,
+			"Failed to get phy capabilities, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+		goto out;
+	}
+
+	/* No change in link */
+	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
+	    link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
+		goto out;
+
+	cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
+		retcode = -ENOMEM;
+		goto out;
+	}
+
+	cfg->phy_type_low = pcaps->phy_type_low;
+	cfg->phy_type_high = pcaps->phy_type_high;
+	cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+	cfg->low_power_ctrl = pcaps->low_power_ctrl;
+	cfg->eee_cap = pcaps->eee_cap;
+	cfg->eeer_value = pcaps->eeer_value;
+	cfg->link_fec_opt = pcaps->link_fec_options;
+	if (link_up)
+		cfg->caps |= ICE_AQ_PHY_ENA_LINK;
+	else
+		cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
+
+	retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL);
+	if (retcode) {
+		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+	}
+
+	devm_kfree(dev, cfg);
+out:
+	devm_kfree(dev, pcaps);
+	return retcode;
+}
+
+/**
+ * ice_check_media_subtask - Check for media; bring link up if detected.
+ * @pf: pointer to PF struct
+ */
+static void ice_check_media_subtask(struct ice_pf *pf)
+{
+	struct ice_port_info *pi;
+	struct ice_vsi *vsi;
+	int err;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* No need to check for media if it's already present or the interface
+	 * is down
+	 */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) ||
+	    test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	/* Refresh link info and check if media is present */
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err)
+		return;
+
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		err = ice_force_phys_link_state(vsi, true);
+		if (err)
+			return;
+		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+		/* A Link Status Event will be generated; the event handler
+		 * will complete bringing the interface up
+		 */
+	}
+}
+
+/**
  * ice_service_task - manage and run subtasks
  * @work: pointer to work_struct contained by the PF struct
  */
@@ -1010,16 +1496,28 @@
 	/* process reset requests first */
 	ice_reset_subtask(pf);
 
-	/* bail if a reset/recovery cycle is pending */
-	if (ice_is_reset_recovery_pending(pf->state) ||
-	    test_bit(__ICE_SUSPENDED, pf->state)) {
+	/* bail if a reset/recovery cycle is pending or rebuild failed */
+	if (ice_is_reset_in_progress(pf->state) ||
+	    test_bit(__ICE_SUSPENDED, pf->state) ||
+	    test_bit(__ICE_NEEDS_RESTART, pf->state)) {
 		ice_service_task_complete(pf);
 		return;
 	}
 
-	ice_sync_fltr_subtask(pf);
-	ice_watchdog_subtask(pf);
 	ice_clean_adminq_subtask(pf);
+	ice_check_media_subtask(pf);
+	ice_check_for_hang_subtask(pf);
+	ice_sync_fltr_subtask(pf);
+	ice_handle_mdd_event(pf);
+	ice_watchdog_subtask(pf);
+
+	if (ice_is_safe_mode(pf)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	ice_process_vflr_event(pf);
+	ice_clean_mailboxq_subtask(pf);
 
 	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
 	ice_service_task_complete(pf);
@@ -1029,13 +1527,16 @@
 	 * schedule the service task now.
 	 */
 	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+	    test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
 	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
 		mod_timer(&pf->serv_tmr, jiffies);
 }
 
 /**
  * ice_set_ctrlq_len - helper function to set controlq length
- * @hw: pointer to the hw instance
+ * @hw: pointer to the HW instance
  */
 static void ice_set_ctrlq_len(struct ice_hw *hw)
 {
@@ -1043,6 +1544,10 @@
 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->mailboxq.num_rq_entries = ICE_MBXRQ_LEN;
+	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
+	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
 }
 
 /**
@@ -1053,8 +1558,9 @@
  * This is a callback function used by the irq_set_affinity_notifier function
  * so that we may register to receive changes to the irq affinity masks.
  */
-static void ice_irq_affinity_notify(struct irq_affinity_notify *notify,
-				    const cpumask_t *mask)
+static void
+ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+			const cpumask_t *mask)
 {
 	struct ice_q_vector *q_vector =
 		container_of(notify, struct ice_q_vector, affinity_notify);
@@ -1073,97 +1579,22 @@
 static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
 
 /**
- * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
- * @vsi: the VSI being un-configured
- */
-static void ice_vsi_dis_irq(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	int base = vsi->base_vector;
-	u32 val;
-	int i;
-
-	/* disable interrupt causation from each queue */
-	if (vsi->tx_rings) {
-		ice_for_each_txq(vsi, i) {
-			if (vsi->tx_rings[i]) {
-				u16 reg;
-
-				reg = vsi->tx_rings[i]->reg_idx;
-				val = rd32(hw, QINT_TQCTL(reg));
-				val &= ~QINT_TQCTL_CAUSE_ENA_M;
-				wr32(hw, QINT_TQCTL(reg), val);
-			}
-		}
-	}
-
-	if (vsi->rx_rings) {
-		ice_for_each_rxq(vsi, i) {
-			if (vsi->rx_rings[i]) {
-				u16 reg;
-
-				reg = vsi->rx_rings[i]->reg_idx;
-				val = rd32(hw, QINT_RQCTL(reg));
-				val &= ~QINT_RQCTL_CAUSE_ENA_M;
-				wr32(hw, QINT_RQCTL(reg), val);
-			}
-		}
-	}
-
-	/* disable each interrupt */
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		for (i = vsi->base_vector;
-		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
-			wr32(hw, GLINT_DYN_CTL(i), 0);
-
-		ice_flush(hw);
-		for (i = 0; i < vsi->num_q_vectors; i++)
-			synchronize_irq(pf->msix_entries[i + base].vector);
-	}
-}
-
-/**
  * ice_vsi_ena_irq - Enable IRQ for the given VSI
  * @vsi: the VSI being configured
  */
 static int ice_vsi_ena_irq(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
+	struct ice_hw *hw = &vsi->back->hw;
+	int i;
 
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		int i;
-
-		for (i = 0; i < vsi->num_q_vectors; i++)
-			ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
-	}
+	ice_for_each_q_vector(vsi, i)
+		ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
 
 	ice_flush(hw);
 	return 0;
 }
 
 /**
- * ice_vsi_delete - delete a VSI from the switch
- * @vsi: pointer to VSI being removed
- */
-static void ice_vsi_delete(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_vsi_ctx ctxt;
-	enum ice_status status;
-
-	ctxt.vsi_num = vsi->vsi_num;
-
-	memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props));
-
-	status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
-	if (status)
-		dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
-			vsi->vsi_num);
-}
-
-/**
  * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
  * @vsi: the VSI being configured
  * @basename: name for the vector
@@ -1197,10 +1628,9 @@
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = devm_request_irq(&pf->pdev->dev,
-				       pf->msix_entries[base + vector].vector,
-				       vsi->irq_handler, 0, q_vector->name,
-				       q_vector);
+		err = devm_request_irq(&pf->pdev->dev, irq_num,
+				       vsi->irq_handler, 0,
+				       q_vector->name, q_vector);
 		if (err) {
 			netdev_err(vsi->netdev,
 				   "MSIX request_irq failed, error: %d\n", err);
@@ -1231,467 +1661,6 @@
 }
 
 /**
- * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
- * @vsi: the VSI being configured
- */
-static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
-{
-	struct ice_hw_common_caps *cap;
-	struct ice_pf *pf = vsi->back;
-
-	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
-		vsi->rss_size = 1;
-		return;
-	}
-
-	cap = &pf->hw.func_caps.common_cap;
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		/* PF VSI will inherit RSS instance of PF */
-		vsi->rss_table_size = cap->rss_table_size;
-		vsi->rss_size = min_t(int, num_online_cpus(),
-				      BIT(cap->rss_table_entry_width));
-		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
-		break;
-	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
-		break;
-	}
-}
-
-/**
- * ice_vsi_setup_q_map - Setup a VSI queue map
- * @vsi: the VSI being configured
- * @ctxt: VSI context structure
- */
-static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
-{
-	u16 offset = 0, qmap = 0, numq_tc;
-	u16 pow = 0, max_rss = 0, qcount;
-	u16 qcount_tx = vsi->alloc_txq;
-	u16 qcount_rx = vsi->alloc_rxq;
-	bool ena_tc0 = false;
-	int i;
-
-	/* at least TC0 should be enabled by default */
-	if (vsi->tc_cfg.numtc) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(0)))
-			ena_tc0 =  true;
-	} else {
-		ena_tc0 =  true;
-	}
-
-	if (ena_tc0) {
-		vsi->tc_cfg.numtc++;
-		vsi->tc_cfg.ena_tc |= 1;
-	}
-
-	numq_tc = qcount_rx / vsi->tc_cfg.numtc;
-
-	/* TC mapping is a function of the number of Rx queues assigned to the
-	 * VSI for each traffic class and the offset of these queues.
-	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
-	 * queues allocated to TC0. No:of queues is a power-of-2.
-	 *
-	 * If TC is not enabled, the queue offset is set to 0, and allocate one
-	 * queue, this way, traffic for the given TC will be sent to the default
-	 * queue.
-	 *
-	 * Setup number and offset of Rx queues for all TCs for the VSI
-	 */
-
-	/* qcount will change if RSS is enabled */
-	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
-		if (vsi->type == ICE_VSI_PF)
-			max_rss = ICE_MAX_LG_RSS_QS;
-		else
-			max_rss = ICE_MAX_SMALL_RSS_QS;
-
-		qcount = min_t(int, numq_tc, max_rss);
-		qcount = min_t(int, qcount, vsi->rss_size);
-	} else {
-		qcount = numq_tc;
-	}
-
-	/* find the (rounded up) power-of-2 of qcount */
-	pow = order_base_2(qcount);
-
-	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
-			/* TC is not enabled */
-			vsi->tc_cfg.tc_info[i].qoffset = 0;
-			vsi->tc_cfg.tc_info[i].qcount = 1;
-			ctxt->info.tc_mapping[i] = 0;
-			continue;
-		}
-
-		/* TC is enabled */
-		vsi->tc_cfg.tc_info[i].qoffset = offset;
-		vsi->tc_cfg.tc_info[i].qcount = qcount;
-
-		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
-			ICE_AQ_VSI_TC_Q_OFFSET_M) |
-			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
-			 ICE_AQ_VSI_TC_Q_NUM_M);
-		offset += qcount;
-		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
-	}
-
-	vsi->num_txq = qcount_tx;
-	vsi->num_rxq = offset;
-
-	/* Rx queue mapping */
-	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
-	/* q_mapping buffer holds the info for the first queue allocated for
-	 * this VSI in the PF space and also the number of queues associated
-	 * with this VSI.
-	 */
-	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
-	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
-}
-
-/**
- * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
- * @ctxt: the VSI context being set
- *
- * This initializes a default VSI context for all sections except the Queues.
- */
-static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
-{
-	u32 table = 0;
-
-	memset(&ctxt->info, 0, sizeof(ctxt->info));
-	/* VSI's should be allocated from shared pool */
-	ctxt->alloc_from_pool = true;
-	/* Src pruning enabled by default */
-	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
-	/* Traffic from VSI can be sent to LAN */
-	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
-
-	/* By default bits 3 and 4 in vlan_flags are 0's which results in legacy
-	 * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all
-	 * packets untagged/tagged.
-	 */
-	ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL &
-				  ICE_AQ_VSI_VLAN_MODE_M) >>
-				 ICE_AQ_VSI_VLAN_MODE_S);
-
-	/* Have 1:1 UP mapping for both ingress/egress tables */
-	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
-	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
-	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
-	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
-	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
-	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
-	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
-	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
-	ctxt->info.ingress_table = cpu_to_le32(table);
-	ctxt->info.egress_table = cpu_to_le32(table);
-	/* Have 1:1 UP mapping for outer to inner UP table */
-	ctxt->info.outer_up_table = cpu_to_le32(table);
-	/* No Outer tag support outer_tag_flags remains to zero */
-}
-
-/**
- * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
- * @ctxt: the VSI context being set
- * @vsi: the VSI being configured
- */
-static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
-{
-	u8 lut_type, hash_type;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		/* PF VSI will inherit RSS instance of PF */
-		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
-		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
-		break;
-	default:
-		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
-			 vsi->type);
-		return;
-	}
-
-	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
-				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
-				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
-				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
-}
-
-/**
- * ice_vsi_add - Create a new VSI or fetch preallocated VSI
- * @vsi: the VSI being configured
- *
- * This initializes a VSI context depending on the VSI type to be added and
- * passes it down to the add_vsi aq command to create a new VSI.
- */
-static int ice_vsi_add(struct ice_vsi *vsi)
-{
-	struct ice_vsi_ctx ctxt = { 0 };
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	int ret = 0;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		ctxt.flags = ICE_AQ_VSI_TYPE_PF;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	ice_set_dflt_vsi_ctx(&ctxt);
-	/* if the switch is in VEB mode, allow VSI loopback */
-	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
-		ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
-
-	/* Set LUT type and HASH type if RSS is enabled */
-	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
-		ice_set_rss_vsi_ctx(&ctxt, vsi);
-
-	ctxt.info.sw_id = vsi->port_info->sw_id;
-	ice_vsi_setup_q_map(vsi, &ctxt);
-
-	ret = ice_aq_add_vsi(hw, &ctxt, NULL);
-	if (ret) {
-		dev_err(&vsi->back->pdev->dev,
-			"Add VSI AQ call failed, err %d\n", ret);
-		return -EIO;
-	}
-	vsi->info = ctxt.info;
-	vsi->vsi_num = ctxt.vsi_num;
-
-	return ret;
-}
-
-/**
- * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
- * @vsi: the VSI being cleaned up
- */
-static void ice_vsi_release_msix(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	u16 vector = vsi->base_vector;
-	struct ice_hw *hw = &pf->hw;
-	u32 txq = 0;
-	u32 rxq = 0;
-	int i, q;
-
-	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[i];
-
-		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0);
-		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0);
-		for (q = 0; q < q_vector->num_ring_tx; q++) {
-			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
-			txq++;
-		}
-
-		for (q = 0; q < q_vector->num_ring_rx; q++) {
-			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
-			rxq++;
-		}
-	}
-
-	ice_flush(hw);
-}
-
-/**
- * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
- * @vsi: the VSI having rings deallocated
- */
-static void ice_vsi_clear_rings(struct ice_vsi *vsi)
-{
-	int i;
-
-	if (vsi->tx_rings) {
-		for (i = 0; i < vsi->alloc_txq; i++) {
-			if (vsi->tx_rings[i]) {
-				kfree_rcu(vsi->tx_rings[i], rcu);
-				vsi->tx_rings[i] = NULL;
-			}
-		}
-	}
-	if (vsi->rx_rings) {
-		for (i = 0; i < vsi->alloc_rxq; i++) {
-			if (vsi->rx_rings[i]) {
-				kfree_rcu(vsi->rx_rings[i], rcu);
-				vsi->rx_rings[i] = NULL;
-			}
-		}
-	}
-}
-
-/**
- * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
- * @vsi: VSI which is having rings allocated
- */
-static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int i;
-
-	/* Allocate tx_rings */
-	for (i = 0; i < vsi->alloc_txq; i++) {
-		struct ice_ring *ring;
-
-		/* allocate with kzalloc(), free with kfree_rcu() */
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-
-		if (!ring)
-			goto err_out;
-
-		ring->q_index = i;
-		ring->reg_idx = vsi->txq_map[i];
-		ring->ring_active = false;
-		ring->vsi = vsi;
-		ring->netdev = vsi->netdev;
-		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
-
-		vsi->tx_rings[i] = ring;
-	}
-
-	/* Allocate rx_rings */
-	for (i = 0; i < vsi->alloc_rxq; i++) {
-		struct ice_ring *ring;
-
-		/* allocate with kzalloc(), free with kfree_rcu() */
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_out;
-
-		ring->q_index = i;
-		ring->reg_idx = vsi->rxq_map[i];
-		ring->ring_active = false;
-		ring->vsi = vsi;
-		ring->netdev = vsi->netdev;
-		ring->dev = &pf->pdev->dev;
-		ring->count = vsi->num_desc;
-		vsi->rx_rings[i] = ring;
-	}
-
-	return 0;
-
-err_out:
-	ice_vsi_clear_rings(vsi);
-	return -ENOMEM;
-}
-
-/**
- * ice_vsi_free_irq - Free the irq association with the OS
- * @vsi: the VSI being configured
- */
-static void ice_vsi_free_irq(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int base = vsi->base_vector;
-
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		int i;
-
-		if (!vsi->q_vectors || !vsi->irqs_ready)
-			return;
-
-		vsi->irqs_ready = false;
-		for (i = 0; i < vsi->num_q_vectors; i++) {
-			u16 vector = i + base;
-			int irq_num;
-
-			irq_num = pf->msix_entries[vector].vector;
-
-			/* free only the irqs that were actually requested */
-			if (!vsi->q_vectors[i] ||
-			    !(vsi->q_vectors[i]->num_ring_tx ||
-			      vsi->q_vectors[i]->num_ring_rx))
-				continue;
-
-			/* clear the affinity notifier in the IRQ descriptor */
-			irq_set_affinity_notifier(irq_num, NULL);
-
-			/* clear the affinity_mask in the IRQ descriptor */
-			irq_set_affinity_hint(irq_num, NULL);
-			synchronize_irq(irq_num);
-			devm_free_irq(&pf->pdev->dev, irq_num,
-				      vsi->q_vectors[i]);
-		}
-		ice_vsi_release_msix(vsi);
-	}
-}
-
-/**
- * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
- * @vsi: the VSI being configured
- */
-static void ice_vsi_cfg_msix(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	u16 vector = vsi->base_vector;
-	struct ice_hw *hw = &pf->hw;
-	u32 txq = 0, rxq = 0;
-	int i, q, itr;
-	u8 itr_gran;
-
-	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[i];
-
-		itr_gran = hw->itr_gran_200;
-
-		if (q_vector->num_ring_rx) {
-			q_vector->rx.itr =
-				ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting,
-					   itr_gran);
-			q_vector->rx.latency_range = ICE_LOW_LATENCY;
-		}
-
-		if (q_vector->num_ring_tx) {
-			q_vector->tx.itr =
-				ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting,
-					   itr_gran);
-			q_vector->tx.latency_range = ICE_LOW_LATENCY;
-		}
-		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr);
-		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr);
-
-		/* Both Transmit Queue Interrupt Cause Control register
-		 * and Receive Queue Interrupt Cause control register
-		 * expects MSIX_INDX field to be the vector index
-		 * within the function space and not the absolute
-		 * vector index across PF or across device.
-		 * For SR-IOV VF VSIs queue vector index always starts
-		 * with 1 since first vector index(0) is used for OICR
-		 * in VF space. Since VMDq and other PF VSIs are withtin
-		 * the PF function space, use the vector index thats
-		 * tracked for this PF.
-		 */
-		for (q = 0; q < q_vector->num_ring_tx; q++) {
-			u32 val;
-
-			itr = ICE_TX_ITR;
-			val = QINT_TQCTL_CAUSE_ENA_M |
-			      (itr << QINT_TQCTL_ITR_INDX_S)  |
-			      (vector << QINT_TQCTL_MSIX_INDX_S);
-			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
-			txq++;
-		}
-
-		for (q = 0; q < q_vector->num_ring_rx; q++) {
-			u32 val;
-
-			itr = ICE_RX_ITR;
-			val = QINT_RQCTL_CAUSE_ENA_M |
-			      (itr << QINT_RQCTL_ITR_INDX_S)  |
-			      (vector << QINT_RQCTL_MSIX_INDX_S);
-			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
-			rxq++;
-		}
-	}
-
-	ice_flush(hw);
-}
-
-/**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
  */
@@ -1708,6 +1677,7 @@
 	       PFINT_OICR_MAL_DETECT_M |
 	       PFINT_OICR_GRST_M |
 	       PFINT_OICR_PCI_EXCEPTION_M |
+	       PFINT_OICR_VFLR_M |
 	       PFINT_OICR_HMC_ERR_M |
 	       PFINT_OICR_PE_CRITERR_M);
 
@@ -1731,12 +1701,28 @@
 	u32 oicr, ena_mask;
 
 	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+	set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 
 	oicr = rd32(hw, PFINT_OICR);
 	ena_mask = rd32(hw, PFINT_OICR_ENA);
 
+	if (oicr & PFINT_OICR_SWINT_M) {
+		ena_mask &= ~PFINT_OICR_SWINT_M;
+		pf->sw_int_count++;
+	}
+
+	if (oicr & PFINT_OICR_MAL_DETECT_M) {
+		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
+		set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
+	}
+	if (oicr & PFINT_OICR_VFLR_M) {
+		ena_mask &= ~PFINT_OICR_VFLR_M;
+		set_bit(__ICE_VFLR_EVENT_PENDING, pf->state);
+	}
+
 	if (oicr & PFINT_OICR_GRST_M) {
 		u32 reset;
+
 		/* we have a reset warning */
 		ena_mask &= ~PFINT_OICR_GRST_M;
 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
@@ -1746,15 +1732,18 @@
 			pf->corer_count++;
 		else if (reset == ICE_RESET_GLOBR)
 			pf->globr_count++;
-		else
+		else if (reset == ICE_RESET_EMPR)
 			pf->empr_count++;
+		else
+			dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n",
+				reset);
 
 		/* If a reset cycle isn't already in progress, we set a bit in
 		 * pf->state so that the service task can start a reset/rebuild.
 		 * We also make note of which reset happened so that peer
 		 * devices/drivers can be informed.
 		 */
-		if (!test_bit(__ICE_RESET_RECOVERY_PENDING, pf->state)) {
+		if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) {
 			if (reset == ICE_RESET_CORER)
 				set_bit(__ICE_CORER_RECV, pf->state);
 			else if (reset == ICE_RESET_GLOBR)
@@ -1762,7 +1751,20 @@
 			else
 				set_bit(__ICE_EMPR_RECV, pf->state);
 
-			set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+			/* There are couple of different bits at play here.
+			 * hw->reset_ongoing indicates whether the hardware is
+			 * in reset. This is set to true when a reset interrupt
+			 * is received and set back to false after the driver
+			 * has determined that the hardware is out of reset.
+			 *
+			 * __ICE_RESET_OICR_RECV in pf->state indicates
+			 * that a post reset rebuild is required before the
+			 * driver is operational again. This is set above.
+			 *
+			 * As this is the start of the reset/rebuild cycle, set
+			 * both to indicate that.
+			 */
+			hw->reset_ongoing = true;
 		}
 	}
 
@@ -1774,7 +1776,7 @@
 			rd32(hw, PFHMC_ERRORDATA));
 	}
 
-	/* Report and mask off any remaining unexpected interrupts */
+	/* Report any remaining unexpected interrupts */
 	oicr &= ena_mask;
 	if (oicr) {
 		dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
@@ -1788,12 +1790,9 @@
 			set_bit(__ICE_PFR_REQ, pf->state);
 			ice_service_task_schedule(pf);
 		}
-		ena_mask &= ~oicr;
 	}
 	ret = IRQ_HANDLED;
 
-	/* re-enable interrupt causes that are not handled during this pass */
-	wr32(hw, PFINT_OICR_ENA, ena_mask);
 	if (!test_bit(__ICE_DOWN, pf->state)) {
 		ice_service_task_schedule(pf);
 		ice_irq_dynamic_ena(hw, NULL, NULL);
@@ -1803,205 +1802,24 @@
 }
 
 /**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
+ * ice_dis_ctrlq_interrupts - disable control queue interrupts
+ * @hw: pointer to HW structure
  */
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
 {
-	int q_vectors = vsi->num_q_vectors;
-	int tx_rings_rem, rx_rings_rem;
-	int v_id;
+	/* disable Admin queue Interrupt causes */
+	wr32(hw, PFINT_FW_CTL,
+	     rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
 
-	/* initially assigning remaining rings count to VSIs num queue value */
-	tx_rings_rem = vsi->num_txq;
-	rx_rings_rem = vsi->num_rxq;
+	/* disable Mailbox queue Interrupt causes */
+	wr32(hw, PFINT_MBX_CTL,
+	     rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
 
-	for (v_id = 0; v_id < q_vectors; v_id++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
-		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+	/* disable Control queue Interrupt causes */
+	wr32(hw, PFINT_OICR_CTL,
+	     rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
 
-		/* Tx rings mapping to vector */
-		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_tx = tx_rings_per_v;
-		q_vector->tx.ring = NULL;
-		q_base = vsi->num_txq - tx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
-			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
-			tx_ring->q_vector = q_vector;
-			tx_ring->next = q_vector->tx.ring;
-			q_vector->tx.ring = tx_ring;
-		}
-		tx_rings_rem -= tx_rings_per_v;
-
-		/* Rx rings mapping to vector */
-		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_rx = rx_rings_per_v;
-		q_vector->rx.ring = NULL;
-		q_base = vsi->num_rxq - rx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
-			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
-			rx_ring->q_vector = q_vector;
-			rx_ring->next = q_vector->rx.ring;
-			q_vector->rx.ring = rx_ring;
-		}
-		rx_rings_rem -= rx_rings_per_v;
-	}
-}
-
-/**
- * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- */
-static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		vsi->alloc_txq = pf->num_lan_tx;
-		vsi->alloc_rxq = pf->num_lan_rx;
-		vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE);
-		vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx);
-		break;
-	default:
-		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
-			 vsi->type);
-		break;
-	}
-}
-
-/**
- * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
- * @vsi: VSI pointer
- * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
- *
- * On error: returns error code (negative)
- * On success: returns 0
- */
-static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors)
-{
-	struct ice_pf *pf = vsi->back;
-
-	/* allocate memory for both Tx and Rx ring pointers */
-	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
-				     sizeof(struct ice_ring *), GFP_KERNEL);
-	if (!vsi->tx_rings)
-		goto err_txrings;
-
-	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
-				     sizeof(struct ice_ring *), GFP_KERNEL);
-	if (!vsi->rx_rings)
-		goto err_rxrings;
-
-	if (alloc_qvectors) {
-		/* allocate memory for q_vector pointers */
-		vsi->q_vectors = devm_kcalloc(&pf->pdev->dev,
-					      vsi->num_q_vectors,
-					      sizeof(struct ice_q_vector *),
-					      GFP_KERNEL);
-		if (!vsi->q_vectors)
-			goto err_vectors;
-	}
-
-	return 0;
-
-err_vectors:
-	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
-err_rxrings:
-	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
-err_txrings:
-	return -ENOMEM;
-}
-
-/**
- * ice_msix_clean_rings - MSIX mode Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a q_vector
- */
-static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
-{
-	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
-
-	if (!q_vector->tx.ring && !q_vector->rx.ring)
-		return IRQ_HANDLED;
-
-	napi_schedule(&q_vector->napi);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * ice_vsi_alloc - Allocates the next available struct vsi in the PF
- * @pf: board private structure
- * @type: type of VSI
- *
- * returns a pointer to a VSI on success, NULL on failure.
- */
-static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type)
-{
-	struct ice_vsi *vsi = NULL;
-
-	/* Need to protect the allocation of the VSIs at the PF level */
-	mutex_lock(&pf->sw_mutex);
-
-	/* If we have already allocated our maximum number of VSIs,
-	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
-	 * is available to be populated
-	 */
-	if (pf->next_vsi == ICE_NO_VSI) {
-		dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
-		goto unlock_pf;
-	}
-
-	vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
-	if (!vsi)
-		goto unlock_pf;
-
-	vsi->type = type;
-	vsi->back = pf;
-	set_bit(__ICE_DOWN, vsi->state);
-	vsi->idx = pf->next_vsi;
-	vsi->work_lmt = ICE_DFLT_IRQ_WORK;
-
-	ice_vsi_set_num_qs(vsi);
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		if (ice_vsi_alloc_arrays(vsi, true))
-			goto err_rings;
-
-		/* Setup default MSIX irq handler for VSI */
-		vsi->irq_handler = ice_msix_clean_rings;
-		break;
-	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
-		goto unlock_pf;
-	}
-
-	/* fill VSI slot in the PF struct */
-	pf->vsi[pf->next_vsi] = vsi;
-
-	/* prepare pf->next_vsi for next use */
-	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
-					 pf->next_vsi);
-	goto unlock_pf;
-
-err_rings:
-	devm_kfree(&pf->pdev->dev, vsi);
-	vsi = NULL;
-unlock_pf:
-	mutex_unlock(&pf->sw_mutex);
-	return vsi;
+	ice_flush(hw);
 }
 
 /**
@@ -2010,33 +1828,62 @@
  */
 static void ice_free_irq_msix_misc(struct ice_pf *pf)
 {
-	/* disable OICR interrupt */
-	wr32(&pf->hw, PFINT_OICR_ENA, 0);
-	ice_flush(&pf->hw);
+	struct ice_hw *hw = &pf->hw;
 
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
+	ice_dis_ctrlq_interrupts(hw);
+
+	/* disable OICR interrupt */
+	wr32(hw, PFINT_OICR_ENA, 0);
+	ice_flush(hw);
+
+	if (pf->msix_entries) {
 		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
 		devm_free_irq(&pf->pdev->dev,
 			      pf->msix_entries[pf->oicr_idx].vector, pf);
 	}
 
+	pf->num_avail_sw_msix += 1;
 	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
 }
 
 /**
+ * ice_ena_ctrlq_interrupts - enable control queue interrupts
+ * @hw: pointer to HW structure
+ * @reg_idx: HW vector index to associate the control queue interrupts with
+ */
+static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
+{
+	u32 val;
+
+	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+	       PFINT_OICR_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_OICR_CTL, val);
+
+	/* enable Admin queue Interrupt causes */
+	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+	       PFINT_FW_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_FW_CTL, val);
+
+	/* enable Mailbox queue Interrupt causes */
+	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
+	       PFINT_MBX_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_MBX_CTL, val);
+
+	ice_flush(hw);
+}
+
+/**
  * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
  * @pf: board private structure
  *
  * This sets up the handler for MSIX 0, which is used to manage the
- * non-queue interrupts, e.g. AdminQ and errors.  This is not used
+ * non-queue interrupts, e.g. AdminQ and errors. This is not used
  * when in MSI or Legacy interrupt mode.
  */
 static int ice_req_irq_msix_misc(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
 	int oicr_idx, err = 0;
-	u8 itr_gran;
-	u32 val;
 
 	if (!pf->int_name[0])
 		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
@@ -2047,7 +1894,7 @@
 	 * lost during reset. Note that this function is called only during
 	 * rebuild path and not while reset is in progress.
 	 */
-	if (ice_is_reset_recovery_pending(pf->state))
+	if (ice_is_reset_in_progress(pf->state))
 		goto skip_req_irq;
 
 	/* reserve one vector in irq_tracker for misc interrupts */
@@ -2055,6 +1902,7 @@
 	if (oicr_idx < 0)
 		return oicr_idx;
 
+	pf->num_avail_sw_msix -= 1;
 	pf->oicr_idx = oicr_idx;
 
 	err = devm_request_irq(&pf->pdev->dev,
@@ -2065,25 +1913,16 @@
 			"devm_request_irq for %s failed: %d\n",
 			pf->int_name, err);
 		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		pf->num_avail_sw_msix += 1;
 		return err;
 	}
 
 skip_req_irq:
 	ice_ena_misc_vector(pf);
 
-	val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
-	       PFINT_OICR_CTL_CAUSE_ENA_M);
-	wr32(hw, PFINT_OICR_CTL, val);
-
-	/* This enables Admin queue Interrupt causes */
-	val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) |
-	       PFINT_FW_CTL_CAUSE_ENA_M);
-	wr32(hw, PFINT_FW_CTL, val);
-
-	itr_gran = hw->itr_gran_200;
-
+	ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
 	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
-	     ITR_TO_REG(ICE_ITR_8K, itr_gran));
+	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
 
 	ice_flush(hw);
 	ice_irq_dynamic_ena(hw, NULL, NULL);
@@ -2092,230 +1931,61 @@
 }
 
 /**
- * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @vsi: the VSI getting queues
+ * ice_napi_add - register NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be registered
  *
- * Return 0 on success and a negative value on error
+ * This function is only called in the driver's load path. Registering the NAPI
+ * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
+ * reset/rebuild, etc.)
  */
-static int ice_vsi_get_qs_contig(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int offset, ret = 0;
-
-	mutex_lock(&pf->avail_q_mutex);
-	/* look for contiguous block of queues for tx */
-	offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS,
-					    0, vsi->alloc_txq, 0);
-	if (offset < ICE_MAX_TXQS) {
-		int i;
-
-		bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq);
-		for (i = 0; i < vsi->alloc_txq; i++)
-			vsi->txq_map[i] = i + offset;
-	} else {
-		ret = -ENOMEM;
-		vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER;
-	}
-
-	/* look for contiguous block of queues for rx */
-	offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS,
-					    0, vsi->alloc_rxq, 0);
-	if (offset < ICE_MAX_RXQS) {
-		int i;
-
-		bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq);
-		for (i = 0; i < vsi->alloc_rxq; i++)
-			vsi->rxq_map[i] = i + offset;
-	} else {
-		ret = -ENOMEM;
-		vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER;
-	}
-	mutex_unlock(&pf->avail_q_mutex);
-
-	return ret;
-}
-
-/**
- * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI
- * @vsi: the VSI getting queues
- *
- * Return 0 on success and a negative value on error
- */
-static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int i, index = 0;
-
-	mutex_lock(&pf->avail_q_mutex);
-
-	if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) {
-		for (i = 0; i < vsi->alloc_txq; i++) {
-			index = find_next_zero_bit(pf->avail_txqs,
-						   ICE_MAX_TXQS, index);
-			if (index < ICE_MAX_TXQS) {
-				set_bit(index, pf->avail_txqs);
-				vsi->txq_map[i] = index;
-			} else {
-				goto err_scatter_tx;
-			}
-		}
-	}
-
-	if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) {
-		for (i = 0; i < vsi->alloc_rxq; i++) {
-			index = find_next_zero_bit(pf->avail_rxqs,
-						   ICE_MAX_RXQS, index);
-			if (index < ICE_MAX_RXQS) {
-				set_bit(index, pf->avail_rxqs);
-				vsi->rxq_map[i] = index;
-			} else {
-				goto err_scatter_rx;
-			}
-		}
-	}
-
-	mutex_unlock(&pf->avail_q_mutex);
-	return 0;
-
-err_scatter_rx:
-	/* unflag any queues we have grabbed (i is failed position) */
-	for (index = 0; index < i; index++) {
-		clear_bit(vsi->rxq_map[index], pf->avail_rxqs);
-		vsi->rxq_map[index] = 0;
-	}
-	i = vsi->alloc_txq;
-err_scatter_tx:
-	/* i is either position of failed attempt or vsi->alloc_txq */
-	for (index = 0; index < i; index++) {
-		clear_bit(vsi->txq_map[index], pf->avail_txqs);
-		vsi->txq_map[index] = 0;
-	}
-
-	mutex_unlock(&pf->avail_q_mutex);
-	return -ENOMEM;
-}
-
-/**
- * ice_vsi_get_qs - Assign queues from PF to VSI
- * @vsi: the VSI to assign queues to
- *
- * Returns 0 on success and a negative value on error
- */
-static int ice_vsi_get_qs(struct ice_vsi *vsi)
-{
-	int ret = 0;
-
-	vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG;
-	vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG;
-
-	/* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping
-	 * modes individually to scatter if assigning contiguous queues
-	 * to rx or tx fails
-	 */
-	ret = ice_vsi_get_qs_contig(vsi);
-	if (ret < 0) {
-		if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER)
-			vsi->alloc_txq = max_t(u16, vsi->alloc_txq,
-					       ICE_MAX_SCATTER_TXQS);
-		if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER)
-			vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq,
-					       ICE_MAX_SCATTER_RXQS);
-		ret = ice_vsi_get_qs_scatter(vsi);
-	}
-
-	return ret;
-}
-
-/**
- * ice_vsi_put_qs - Release queues from VSI to PF
- * @vsi: the VSI thats going to release queues
- */
-static void ice_vsi_put_qs(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int i;
-
-	mutex_lock(&pf->avail_q_mutex);
-
-	for (i = 0; i < vsi->alloc_txq; i++) {
-		clear_bit(vsi->txq_map[i], pf->avail_txqs);
-		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
-	}
-
-	for (i = 0; i < vsi->alloc_rxq; i++) {
-		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
-		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
-	}
-
-	mutex_unlock(&pf->avail_q_mutex);
-}
-
-/**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_q_vector *q_vector;
-	struct ice_ring *ring;
-
-	if (!vsi->q_vectors[v_idx]) {
-		dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n",
-			v_idx);
-		return;
-	}
-	q_vector = vsi->q_vectors[v_idx];
-
-	ice_for_each_ring(ring, q_vector->tx)
-		ring->q_vector = NULL;
-	ice_for_each_ring(ring, q_vector->rx)
-		ring->q_vector = NULL;
-
-	/* only VSI with an associated netdev is set up with NAPI */
-	if (vsi->netdev)
-		netif_napi_del(&q_vector->napi);
-
-	devm_kfree(&vsi->back->pdev->dev, q_vector);
-	vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-static void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+static void ice_napi_add(struct ice_vsi *vsi)
 {
 	int v_idx;
 
-	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
-		ice_free_q_vector(vsi, v_idx);
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
+			       ice_napi_poll, NAPI_POLL_WEIGHT);
 }
 
 /**
- * ice_cfg_netdev - Setup the netdev flags
- * @vsi: the VSI being configured
- *
- * Returns 0 on success, negative value on failure
+ * ice_set_ops - set netdev and ethtools ops for the given netdev
+ * @netdev: netdev instance
  */
-static int ice_cfg_netdev(struct ice_vsi *vsi)
+static void ice_set_ops(struct net_device *netdev)
 {
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (ice_is_safe_mode(pf)) {
+		netdev->netdev_ops = &ice_netdev_safe_mode_ops;
+		ice_set_ethtool_safe_mode_ops(netdev);
+		return;
+	}
+
+	netdev->netdev_ops = &ice_netdev_ops;
+	ice_set_ethtool_ops(netdev);
+}
+
+/**
+ * ice_set_netdev_features - set features for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_netdev_features(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	netdev_features_t csumo_features;
 	netdev_features_t vlano_features;
 	netdev_features_t dflt_features;
 	netdev_features_t tso_features;
-	struct ice_netdev_priv *np;
-	struct net_device *netdev;
-	u8 mac_addr[ETH_ALEN];
 
-	netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv),
-				    vsi->alloc_txq, vsi->alloc_rxq);
-	if (!netdev)
-		return -ENOMEM;
-
-	vsi->netdev = netdev;
-	np = netdev_priv(netdev);
-	np->vsi = vsi;
+	if (ice_is_safe_mode(pf)) {
+		/* safe mode */
+		netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
+		netdev->hw_features = netdev->features;
+		return;
+	}
 
 	dflt_features = NETIF_F_SG	|
 			NETIF_F_HIGHDMA	|
@@ -2323,6 +1993,7 @@
 
 	csumo_features = NETIF_F_RXCSUM	  |
 			 NETIF_F_IP_CSUM  |
+			 NETIF_F_SCTP_CRC |
 			 NETIF_F_IPV6_CSUM;
 
 	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -2342,217 +2013,61 @@
 				   tso_features;
 	netdev->vlan_features |= dflt_features | csumo_features |
 				 tso_features;
+}
+
+/**
+ * ice_cfg_netdev - Allocate, configure and register a netdev
+ * @vsi: the VSI associated with the new netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+	int err;
+
+	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
+				    vsi->alloc_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	ice_set_netdev_features(netdev);
+
+	ice_set_ops(netdev);
 
 	if (vsi->type == ICE_VSI_PF) {
-		SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
+		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
 		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
-
 		ether_addr_copy(netdev->dev_addr, mac_addr);
 		ether_addr_copy(netdev->perm_addr, mac_addr);
 	}
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
-	/* assign netdev_ops */
-	netdev->netdev_ops = &ice_netdev_ops;
+	/* Setup netdev TC information */
+	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
 
 	/* setup watchdog timeout value to be 5 second */
 	netdev->watchdog_timeo = 5 * HZ;
 
-	ice_set_ethtool_ops(netdev);
-
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = ICE_MAX_MTU;
 
-	return 0;
-}
+	err = register_netdev(vsi->netdev);
+	if (err)
+		return err;
 
-/**
- * ice_vsi_free_arrays - clean up vsi resources
- * @vsi: pointer to VSI being cleared
- * @free_qvectors: bool to specify if q_vectors should be deallocated
- */
-static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors)
-{
-	struct ice_pf *pf = vsi->back;
+	netif_carrier_off(vsi->netdev);
 
-	/* free the ring and vector containers */
-	if (free_qvectors && vsi->q_vectors) {
-		devm_kfree(&pf->pdev->dev, vsi->q_vectors);
-		vsi->q_vectors = NULL;
-	}
-	if (vsi->tx_rings) {
-		devm_kfree(&pf->pdev->dev, vsi->tx_rings);
-		vsi->tx_rings = NULL;
-	}
-	if (vsi->rx_rings) {
-		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
-		vsi->rx_rings = NULL;
-	}
-}
-
-/**
- * ice_vsi_clear - clean up and deallocate the provided vsi
- * @vsi: pointer to VSI being cleared
- *
- * This deallocates the vsi's queue resources, removes it from the PF's
- * VSI array if necessary, and deallocates the VSI
- *
- * Returns 0 on success, negative on failure
- */
-static int ice_vsi_clear(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = NULL;
-
-	if (!vsi)
-		return 0;
-
-	if (!vsi->back)
-		return -EINVAL;
-
-	pf = vsi->back;
-
-	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
-		dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
-			vsi->idx);
-		return -EINVAL;
-	}
-
-	mutex_lock(&pf->sw_mutex);
-	/* updates the PF for this cleared vsi */
-
-	pf->vsi[vsi->idx] = NULL;
-	if (vsi->idx < pf->next_vsi)
-		pf->next_vsi = vsi->idx;
-
-	ice_vsi_free_arrays(vsi, true);
-	mutex_unlock(&pf->sw_mutex);
-	devm_kfree(&pf->pdev->dev, vsi);
-
-	return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the vsi struct
- *
- * We allocate one q_vector.  If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_q_vector *q_vector;
-
-	/* allocate q_vector */
-	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
-	if (!q_vector)
-		return -ENOMEM;
-
-	q_vector->vsi = vsi;
-	q_vector->v_idx = v_idx;
-	/* only set affinity_mask if the CPU is online */
-	if (cpu_online(v_idx))
-		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
-	if (vsi->netdev)
-		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
-			       NAPI_POLL_WEIGHT);
-	/* tie q_vector and vsi together */
-	vsi->q_vectors[v_idx] = q_vector;
-
-	return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt.  If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int v_idx = 0, num_q_vectors;
-	int err;
-
-	if (vsi->q_vectors[0]) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
-			vsi->vsi_num);
-		return -EEXIST;
-	}
-
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		num_q_vectors = vsi->num_q_vectors;
-	} else {
-		err = -EINVAL;
-		goto err_out;
-	}
-
-	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = ice_vsi_alloc_q_vector(vsi, v_idx);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	while (v_idx--)
-		ice_free_q_vector(vsi, v_idx);
-
-	dev_err(&pf->pdev->dev,
-		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
-		vsi->num_q_vectors, vsi->vsi_num, err);
-	vsi->num_q_vectors = 0;
-	return err;
-}
-
-/**
- * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
- * @vsi: ptr to the VSI
- *
- * This should only be called after ice_vsi_alloc() which allocates the
- * corresponding SW VSI structure and initializes num_queue_pairs for the
- * newly allocated VSI.
- *
- * Returns 0 on success or negative on failure
- */
-static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int num_q_vectors = 0;
-
-	if (vsi->base_vector) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
-			vsi->vsi_num, vsi->base_vector);
-		return -EEXIST;
-	}
-
-	if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		return -ENOENT;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		num_q_vectors = vsi->num_q_vectors;
-		break;
-	default:
-		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
-			 vsi->type);
-		break;
-	}
-
-	if (num_q_vectors)
-		vsi->base_vector = ice_get_res(pf, pf->irq_tracker,
-					       num_q_vectors, vsi->idx);
-
-	if (vsi->base_vector < 0) {
-		dev_err(&pf->pdev->dev,
-			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
-			num_q_vectors, vsi->vsi_num, vsi->base_vector);
-		return -ENOENT;
-	}
+	/* make sure transmit queues start off as stopped */
+	netif_tx_stop_all_queues(vsi->netdev);
 
 	return 0;
 }
@@ -2572,343 +2087,48 @@
 }
 
 /**
- * ice_vsi_cfg_rss - Configure RSS params for a VSI
- * @vsi: VSI to be configured
- */
-static int ice_vsi_cfg_rss(struct ice_vsi *vsi)
-{
-	u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
-	struct ice_aqc_get_set_rss_keys *key;
-	struct ice_pf *pf = vsi->back;
-	enum ice_status status;
-	int err = 0;
-	u8 *lut;
-
-	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
-
-	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
-	if (!lut)
-		return -ENOMEM;
-
-	if (vsi->rss_lut_user)
-		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
-	else
-		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
-
-	status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type,
-				    lut, vsi->rss_table_size);
-
-	if (status) {
-		dev_err(&vsi->back->pdev->dev,
-			"set_rss_lut failed, error %d\n", status);
-		err = -EIO;
-		goto ice_vsi_cfg_rss_exit;
-	}
-
-	key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL);
-	if (!key) {
-		err = -ENOMEM;
-		goto ice_vsi_cfg_rss_exit;
-	}
-
-	if (vsi->rss_hkey_user)
-		memcpy(seed, vsi->rss_hkey_user,
-		       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
-	else
-		netdev_rss_key_fill((void *)seed,
-				    ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
-	memcpy(&key->standard_rss_key, seed,
-	       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
-
-	status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key);
-
-	if (status) {
-		dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n",
-			status);
-		err = -EIO;
-	}
-
-	devm_kfree(&pf->pdev->dev, key);
-ice_vsi_cfg_rss_exit:
-	devm_kfree(&pf->pdev->dev, lut);
-	return err;
-}
-
-/**
- * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI
- * @vsi: pointer to the ice_vsi
- *
- * This reallocates the VSIs queue resources
- *
- * Returns 0 on success and negative value on failure
- */
-static int ice_vsi_reinit_setup(struct ice_vsi *vsi)
-{
-	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
-	int ret, i;
-
-	if (!vsi)
-		return -EINVAL;
-
-	ice_vsi_free_q_vectors(vsi);
-	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
-	vsi->base_vector = 0;
-	ice_vsi_clear_rings(vsi);
-	ice_vsi_free_arrays(vsi, false);
-	ice_vsi_set_num_qs(vsi);
-
-	/* Initialize VSI struct elements and create VSI in FW */
-	ret = ice_vsi_add(vsi);
-	if (ret < 0)
-		goto err_vsi;
-
-	ret = ice_vsi_alloc_arrays(vsi, false);
-	if (ret < 0)
-		goto err_vsi;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		if (!vsi->netdev) {
-			ret = ice_cfg_netdev(vsi);
-			if (ret)
-				goto err_rings;
-
-			ret = register_netdev(vsi->netdev);
-			if (ret)
-				goto err_rings;
-
-			netif_carrier_off(vsi->netdev);
-			netif_tx_stop_all_queues(vsi->netdev);
-		}
-
-		ret = ice_vsi_alloc_q_vectors(vsi);
-		if (ret)
-			goto err_rings;
-
-		ret = ice_vsi_setup_vector_base(vsi);
-		if (ret)
-			goto err_vectors;
-
-		ret = ice_vsi_alloc_rings(vsi);
-		if (ret)
-			goto err_vectors;
-
-		ice_vsi_map_rings_to_vectors(vsi);
-		break;
-	default:
-		break;
-	}
-
-	ice_vsi_set_tc_cfg(vsi);
-
-	/* configure VSI nodes based on number of queues and TC's */
-	for (i = 0; i < vsi->tc_cfg.numtc; i++)
-		max_txqs[i] = vsi->num_txq;
-
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
-			      vsi->tc_cfg.ena_tc, max_txqs);
-	if (ret) {
-		dev_info(&vsi->back->pdev->dev,
-			 "Failed VSI lan queue config\n");
-		goto err_vectors;
-	}
-	return 0;
-
-err_vectors:
-	ice_vsi_free_q_vectors(vsi);
-err_rings:
-	if (vsi->netdev) {
-		vsi->current_netdev_flags = 0;
-		unregister_netdev(vsi->netdev);
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
-err_vsi:
-	ice_vsi_clear(vsi);
-	set_bit(__ICE_RESET_FAILED, vsi->back->state);
-	return ret;
-}
-
-/**
- * ice_vsi_setup - Set up a VSI by a given type
+ * ice_pf_vsi_setup - Set up a PF VSI
  * @pf: board private structure
- * @type: VSI type
  * @pi: pointer to the port_info instance
  *
- * This allocates the sw VSI structure and its queue resources.
- *
- * Returns pointer to the successfully allocated and configure VSI sw struct on
- * success, otherwise returns NULL on failure.
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
  */
 static struct ice_vsi *
-ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
-	      struct ice_port_info *pi)
+ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 {
-	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
-	struct device *dev = &pf->pdev->dev;
-	struct ice_vsi_ctx ctxt = { 0 };
-	struct ice_vsi *vsi;
-	int ret, i;
-
-	vsi = ice_vsi_alloc(pf, type);
-	if (!vsi) {
-		dev_err(dev, "could not allocate VSI\n");
-		return NULL;
-	}
-
-	vsi->port_info = pi;
-	vsi->vsw = pf->first_sw;
-
-	if (ice_vsi_get_qs(vsi)) {
-		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
-			vsi->idx);
-		goto err_get_qs;
-	}
-
-	/* set RSS capabilities */
-	ice_vsi_set_rss_params(vsi);
-
-	/* create the VSI */
-	ret = ice_vsi_add(vsi);
-	if (ret)
-		goto err_vsi;
-
-	ctxt.vsi_num = vsi->vsi_num;
-
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		ret = ice_cfg_netdev(vsi);
-		if (ret)
-			goto err_cfg_netdev;
-
-		ret = register_netdev(vsi->netdev);
-		if (ret)
-			goto err_register_netdev;
-
-		netif_carrier_off(vsi->netdev);
-
-		/* make sure transmit queues start off as stopped */
-		netif_tx_stop_all_queues(vsi->netdev);
-		ret = ice_vsi_alloc_q_vectors(vsi);
-		if (ret)
-			goto err_msix;
-
-		ret = ice_vsi_setup_vector_base(vsi);
-		if (ret)
-			goto err_rings;
-
-		ret = ice_vsi_alloc_rings(vsi);
-		if (ret)
-			goto err_rings;
-
-		ice_vsi_map_rings_to_vectors(vsi);
-
-		/* Do not exit if configuring RSS had an issue, at least
-		 * receive traffic on first queue. Hence no need to capture
-		 * return value
-		 */
-		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
-			ice_vsi_cfg_rss(vsi);
-		break;
-	default:
-		/* if vsi type is not recognized, clean up the resources and
-		 * exit
-		 */
-		goto err_rings;
-	}
-
-	ice_vsi_set_tc_cfg(vsi);
-
-	/* configure VSI nodes based on number of queues and TC's */
-	for (i = 0; i < vsi->tc_cfg.numtc; i++)
-		max_txqs[i] = vsi->num_txq;
-
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
-			      vsi->tc_cfg.ena_tc, max_txqs);
-	if (ret) {
-		dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n");
-		goto err_rings;
-	}
-
-	return vsi;
-
-err_rings:
-	ice_vsi_free_q_vectors(vsi);
-err_msix:
-	if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
-		unregister_netdev(vsi->netdev);
-err_register_netdev:
-	if (vsi->netdev) {
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
-err_cfg_netdev:
-	ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
-	if (ret)
-		dev_err(&vsi->back->pdev->dev,
-			"Free VSI AQ call failed, err %d\n", ret);
-err_vsi:
-	ice_vsi_put_qs(vsi);
-err_get_qs:
-	pf->q_left_tx += vsi->alloc_txq;
-	pf->q_left_rx += vsi->alloc_rxq;
-	ice_vsi_clear(vsi);
-
-	return NULL;
+	return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
 }
 
 /**
- * ice_vsi_add_vlan - Add vsi membership for given vlan
- * @vsi: the vsi being configured
- * @vid: vlan id to be added
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
  */
-static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 {
-	struct ice_fltr_list_entry *tmp;
-	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
-	enum ice_status status;
-	int err = 0;
-
-	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-
-	tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
-	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	tmp->fltr_info.flag = ICE_FLTR_TX;
-	tmp->fltr_info.src = vsi->vsi_num;
-	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
-	tmp->fltr_info.l_data.vlan.vlan_id = vid;
-
-	INIT_LIST_HEAD(&tmp->list_entry);
-	list_add(&tmp->list_entry, &tmp_add_list);
-
-	status = ice_add_vlan(&pf->hw, &tmp_add_list);
-	if (status) {
-		err = -ENODEV;
-		dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
-			vid, vsi->vsi_num);
-	}
-
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-	return err;
+	return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
 }
 
 /**
- * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
  * @netdev: network interface to be adjusted
  * @proto: unused protocol
- * @vid: vlan id to be added
+ * @vid: VLAN ID to be added
  *
- * net_device_ops implementation for adding vlan ids
+ * net_device_ops implementation for adding VLAN IDs
  */
-static int ice_vlan_rx_add_vid(struct net_device *netdev,
-			       __always_unused __be16 proto, u16 vid)
+static int
+ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
+		    u16 vid)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
-	int ret = 0;
+	int ret;
 
 	if (vid >= VLAN_N_VID) {
 		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
@@ -2919,76 +2139,59 @@
 	if (vsi->info.pvid)
 		return -EINVAL;
 
-	/* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is
+	/* Enable VLAN pruning when VLAN 0 is added */
+	if (unlikely(!vid)) {
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Add all VLAN IDs including 0 to the switch filter. VLAN ID 0 is
 	 * needed to continue allowing all untagged packets since VLAN prune
 	 * list is applied to all packets by the switch
 	 */
 	ret = ice_vsi_add_vlan(vsi, vid);
-
-	if (!ret)
-		set_bit(vid, vsi->active_vlans);
+	if (!ret) {
+		vsi->vlan_ena = true;
+		set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	}
 
 	return ret;
 }
 
 /**
- * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
- * @vsi: the VSI being configured
- * @vid: VLAN id to be removed
- */
-static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
-{
-	struct ice_fltr_list_entry *list;
-	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
-
-	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
-	if (!list)
-		return;
-
-	list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
-	list->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
-	list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	list->fltr_info.l_data.vlan.vlan_id = vid;
-	list->fltr_info.flag = ICE_FLTR_TX;
-	list->fltr_info.src = vsi->vsi_num;
-
-	INIT_LIST_HEAD(&list->list_entry);
-	list_add(&list->list_entry, &tmp_add_list);
-
-	if (ice_remove_vlan(&pf->hw, &tmp_add_list))
-		dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n",
-			vid, vsi->vsi_num);
-
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-}
-
-/**
- * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
  * @netdev: network interface to be adjusted
  * @proto: unused protocol
- * @vid: vlan id to be removed
+ * @vid: VLAN ID to be removed
  *
- * net_device_ops implementation for removing vlan ids
+ * net_device_ops implementation for removing VLAN IDs
  */
-static int ice_vlan_rx_kill_vid(struct net_device *netdev,
-				__always_unused __be16 proto, u16 vid)
+static int
+ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
+		     u16 vid)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
+	int ret;
 
 	if (vsi->info.pvid)
 		return -EINVAL;
 
-	/* return code is ignored as there is nothing a user
-	 * can do about failure to remove and a log message was
-	 * already printed from the other function
+	/* Make sure ice_vsi_kill_vlan is successful before updating VLAN
+	 * information
 	 */
-	ice_vsi_kill_vlan(vsi, vid);
+	ret = ice_vsi_kill_vlan(vsi, vid);
+	if (ret)
+		return ret;
 
-	clear_bit(vid, vsi->active_vlans);
+	/* Disable VLAN pruning when VLAN 0 is removed */
+	if (unlikely(!vid))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
 
-	return 0;
+	vsi->vlan_ena = false;
+	set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	return ret;
 }
 
 /**
@@ -2999,96 +2202,94 @@
  */
 static int ice_setup_pf_sw(struct ice_pf *pf)
 {
-	LIST_HEAD(tmp_add_list);
-	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	int status = 0;
 
-	if (!ice_is_reset_recovery_pending(pf->state)) {
-		vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info);
-		if (!vsi) {
-			status = -ENOMEM;
-			goto error_exit;
-		}
-	} else {
-		vsi = pf->vsi[0];
-		status = ice_vsi_reinit_setup(vsi);
-		if (status < 0)
-			return -EIO;
-	}
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
 
-	/* tmp_add_list contains a list of MAC addresses for which MAC
-	 * filters need to be programmed. Add the VSI's unicast MAC to
-	 * this list
-	 */
-	status = ice_add_mac_to_list(vsi, &tmp_add_list,
-				     vsi->port_info->mac.perm_addr);
-	if (status)
-		goto error_exit;
-
-	/* VSI needs to receive broadcast traffic, so add the broadcast
-	 * MAC address to the list.
-	 */
-	eth_broadcast_addr(broadcast);
-	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
-	if (status)
-		goto error_exit;
-
-	/* program MAC filters for entries in tmp_add_list */
-	status = ice_add_mac(&pf->hw, &tmp_add_list);
-	if (status) {
-		dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
+	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
+	if (!vsi) {
 		status = -ENOMEM;
-		goto error_exit;
+		goto unroll_vsi_setup;
 	}
 
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	status = ice_cfg_netdev(vsi);
+	if (status) {
+		status = -ENODEV;
+		goto unroll_vsi_setup;
+	}
+
+	/* registering the NAPI handler requires both the queues and
+	 * netdev to be created, which are done in ice_pf_vsi_setup()
+	 * and ice_cfg_netdev() respectively
+	 */
+	ice_napi_add(vsi);
+
+	status = ice_init_mac_fltr(pf);
+	if (status)
+		goto unroll_napi_add;
+
 	return status;
 
-error_exit:
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-
+unroll_napi_add:
 	if (vsi) {
-		ice_vsi_free_q_vectors(vsi);
-		if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
-			unregister_netdev(vsi->netdev);
+		ice_napi_del(vsi);
 		if (vsi->netdev) {
+			if (vsi->netdev->reg_state == NETREG_REGISTERED)
+				unregister_netdev(vsi->netdev);
 			free_netdev(vsi->netdev);
 			vsi->netdev = NULL;
 		}
+	}
 
+unroll_vsi_setup:
+	if (vsi) {
+		ice_vsi_free_q_vectors(vsi);
 		ice_vsi_delete(vsi);
 		ice_vsi_put_qs(vsi);
-		pf->q_left_tx += vsi->alloc_txq;
-		pf->q_left_rx += vsi->alloc_rxq;
 		ice_vsi_clear(vsi);
 	}
 	return status;
 }
 
 /**
- * ice_determine_q_usage - Calculate queue distribution
- * @pf: board private structure
- *
- * Return -ENOMEM if we don't get enough queues for all ports
+ * ice_get_avail_q_count - Get count of queues in use
+ * @pf_qmap: bitmap to get queue use count from
+ * @lock: pointer to a mutex that protects access to pf_qmap
+ * @size: size of the bitmap
  */
-static void ice_determine_q_usage(struct ice_pf *pf)
+static u16
+ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
 {
-	u16 q_left_tx, q_left_rx;
+	u16 count = 0, bit;
 
-	q_left_tx = pf->hw.func_caps.common_cap.num_txq;
-	q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
+	mutex_lock(lock);
+	for_each_clear_bit(bit, pf_qmap, size)
+		count++;
+	mutex_unlock(lock);
 
-	pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus());
+	return count;
+}
 
-	/* only 1 rx queue unless RSS is enabled */
-	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
-		pf->num_lan_rx = 1;
-	else
-		pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus());
+/**
+ * ice_get_avail_txq_count - Get count of Tx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_txq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
+				     pf->max_pf_txqs);
+}
 
-	pf->q_left_tx = q_left_tx - pf->num_lan_tx;
-	pf->q_left_rx = q_left_rx - pf->num_lan_rx;
+/**
+ * ice_get_avail_rxq_count - Get count of Rx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
+				     pf->max_pf_rxqs);
 }
 
 /**
@@ -3097,40 +2298,77 @@
  */
 static void ice_deinit_pf(struct ice_pf *pf)
 {
-	if (pf->serv_tmr.function)
-		del_timer_sync(&pf->serv_tmr);
-	if (pf->serv_task.func)
-		cancel_work_sync(&pf->serv_task);
+	ice_service_task_stop(pf);
 	mutex_destroy(&pf->sw_mutex);
 	mutex_destroy(&pf->avail_q_mutex);
+
+	if (pf->avail_txqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+	}
+
+	if (pf->avail_rxqs) {
+		bitmap_free(pf->avail_rxqs);
+		pf->avail_rxqs = NULL;
+	}
+}
+
+/**
+ * ice_set_pf_caps - set PFs capability flags
+ * @pf: pointer to the PF instance
+ */
+static void ice_set_pf_caps(struct ice_pf *pf)
+{
+	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+
+	clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+	if (func_caps->common_cap.dcb)
+		set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+#ifdef CONFIG_PCI_IOV
+	clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+	if (func_caps->common_cap.sr_iov_1_1) {
+		set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+		pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs,
+					      ICE_MAX_VF_COUNT);
+	}
+#endif /* CONFIG_PCI_IOV */
+	clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
+	if (func_caps->common_cap.rss_table_size)
+		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
+	pf->max_pf_txqs = func_caps->common_cap.num_txq;
+	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
 }
 
 /**
  * ice_init_pf - Initialize general software structures (struct ice_pf)
  * @pf: board private structure to initialize
  */
-static void ice_init_pf(struct ice_pf *pf)
+static int ice_init_pf(struct ice_pf *pf)
 {
-	bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
-	set_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+	ice_set_pf_caps(pf);
 
 	mutex_init(&pf->sw_mutex);
-	mutex_init(&pf->avail_q_mutex);
-
-	/* Clear avail_[t|r]x_qs bitmaps (set all to avail) */
-	mutex_lock(&pf->avail_q_mutex);
-	bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS);
-	bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
-	mutex_unlock(&pf->avail_q_mutex);
-
-	if (pf->hw.func_caps.common_cap.rss_table_size)
-		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
 
 	/* setup service timer and periodic service task */
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
 	pf->serv_tmr_period = HZ;
 	INIT_WORK(&pf->serv_task, ice_service_task);
 	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+
+	mutex_init(&pf->avail_q_mutex);
+	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
+	if (!pf->avail_txqs)
+		return -ENOMEM;
+
+	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+	if (!pf->avail_rxqs) {
+		devm_kfree(&pf->pdev->dev, pf->avail_txqs);
+		pf->avail_txqs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**
@@ -3149,15 +2387,21 @@
 
 	/* reserve one vector for miscellaneous handler */
 	needed = 1;
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
 	v_budget += needed;
 	v_left -= needed;
 
 	/* reserve vectors for LAN traffic */
-	pf->num_lan_msix = min_t(int, num_online_cpus(), v_left);
-	v_budget += pf->num_lan_msix;
+	needed = min_t(int, num_online_cpus(), v_left);
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
+	pf->num_lan_msix = needed;
+	v_budget += needed;
+	v_left -= needed;
 
 	pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
-					sizeof(struct msix_entry), GFP_KERNEL);
+					sizeof(*pf->msix_entries), GFP_KERNEL);
 
 	if (!pf->msix_entries) {
 		err = -ENOMEM;
@@ -3179,17 +2423,18 @@
 
 	if (v_actual < v_budget) {
 		dev_warn(&pf->pdev->dev,
-			 "not enough vectors. requested = %d, obtained = %d\n",
+			 "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
 			 v_budget, v_actual);
-		if (v_actual >= (pf->num_lan_msix + 1)) {
-			pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1);
-		} else if (v_actual >= 2) {
-			pf->num_lan_msix = 1;
-			pf->num_avail_msix = v_actual - 2;
-		} else {
+/* 2 vectors for LAN (traffic + OICR) */
+#define ICE_MIN_LAN_VECS 2
+
+		if (v_actual < ICE_MIN_LAN_VECS) {
+			/* error if we can't get minimum vectors */
 			pci_disable_msix(pf->pdev);
 			err = -ERANGE;
 			goto msix_err;
+		} else {
+			pf->num_lan_msix = ICE_MIN_LAN_VECS;
 		}
 	}
 
@@ -3199,9 +2444,13 @@
 	devm_kfree(&pf->pdev->dev, pf->msix_entries);
 	goto exit_err;
 
+no_hw_vecs_left_err:
+	dev_err(&pf->pdev->dev,
+		"not enough device MSI-X vectors. requested = %d, available = %d\n",
+		needed, v_left);
+	err = -ERANGE;
 exit_err:
 	pf->num_lan_msix = 0;
-	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
 	return err;
 }
 
@@ -3214,38 +2463,6 @@
 	pci_disable_msix(pf->pdev);
 	devm_kfree(&pf->pdev->dev, pf->msix_entries);
 	pf->msix_entries = NULL;
-	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
-}
-
-/**
- * ice_init_interrupt_scheme - Determine proper interrupt scheme
- * @pf: board private structure to initialize
- */
-static int ice_init_interrupt_scheme(struct ice_pf *pf)
-{
-	int vectors = 0;
-	ssize_t size;
-
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		vectors = ice_ena_msix_range(pf);
-	else
-		return -ENODEV;
-
-	if (vectors < 0)
-		return vectors;
-
-	/* set up vector assignment tracking */
-	size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors);
-
-	pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
-	if (!pf->irq_tracker) {
-		ice_dis_msix(pf);
-		return -ENOMEM;
-	}
-
-	pf->irq_tracker->num_entries = vectors;
-
-	return 0;
 }
 
 /**
@@ -3254,8 +2471,7 @@
  */
 static void ice_clear_interrupt_scheme(struct ice_pf *pf)
 {
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		ice_dis_msix(pf);
+	ice_dis_msix(pf);
 
 	if (pf->irq_tracker) {
 		devm_kfree(&pf->pdev->dev, pf->irq_tracker);
@@ -3264,40 +2480,343 @@
 }
 
 /**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+static int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+	int vectors;
+
+	vectors = ice_ena_msix_range(pf);
+
+	if (vectors < 0)
+		return vectors;
+
+	/* set up vector assignment tracking */
+	pf->irq_tracker =
+		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
+			     (sizeof(u16) * vectors), GFP_KERNEL);
+	if (!pf->irq_tracker) {
+		ice_dis_msix(pf);
+		return -ENOMEM;
+	}
+
+	/* populate SW interrupts pool with number of OS granted IRQs. */
+	pf->num_avail_sw_msix = vectors;
+	pf->irq_tracker->num_entries = vectors;
+	pf->irq_tracker->end = pf->irq_tracker->num_entries;
+
+	return 0;
+}
+
+/**
+ * ice_log_pkg_init - log result of DDP package load
+ * @hw: pointer to hardware info
+ * @status: status of package load
+ */
+static void
+ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	struct device *dev = &pf->pdev->dev;
+
+	switch (*status) {
+	case ICE_SUCCESS:
+		/* The package download AdminQ command returned success because
+		 * this download succeeded or ICE_ERR_AQ_NO_WORK since there is
+		 * already a package loaded on the device.
+		 */
+		if (hw->pkg_ver.major == hw->active_pkg_ver.major &&
+		    hw->pkg_ver.minor == hw->active_pkg_ver.minor &&
+		    hw->pkg_ver.update == hw->active_pkg_ver.update &&
+		    hw->pkg_ver.draft == hw->active_pkg_ver.draft &&
+		    !memcmp(hw->pkg_name, hw->active_pkg_name,
+			    sizeof(hw->pkg_name))) {
+			if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST)
+				dev_info(dev,
+					 "DDP package already present on device: %s version %d.%d.%d.%d\n",
+					 hw->active_pkg_name,
+					 hw->active_pkg_ver.major,
+					 hw->active_pkg_ver.minor,
+					 hw->active_pkg_ver.update,
+					 hw->active_pkg_ver.draft);
+			else
+				dev_info(dev,
+					 "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
+					 hw->active_pkg_name,
+					 hw->active_pkg_ver.major,
+					 hw->active_pkg_ver.minor,
+					 hw->active_pkg_ver.update,
+					 hw->active_pkg_ver.draft);
+		} else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
+			   hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
+			dev_err(dev,
+				"The device has a DDP package that is not supported by the driver.  The device has package '%s' version %d.%d.x.x.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
+				hw->active_pkg_name,
+				hw->active_pkg_ver.major,
+				hw->active_pkg_ver.minor,
+				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+			*status = ICE_ERR_NOT_SUPPORTED;
+		} else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+			   hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
+			dev_info(dev,
+				 "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package '%s' version %d.%d.%d.%d.  The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
+				 hw->active_pkg_name,
+				 hw->active_pkg_ver.major,
+				 hw->active_pkg_ver.minor,
+				 hw->active_pkg_ver.update,
+				 hw->active_pkg_ver.draft,
+				 hw->pkg_name,
+				 hw->pkg_ver.major,
+				 hw->pkg_ver.minor,
+				 hw->pkg_ver.update,
+				 hw->pkg_ver.draft);
+		} else {
+			dev_err(dev,
+				"An unknown error occurred when loading the DDP package, please reboot the system.  If the problem persists, update the NVM.  Entering Safe Mode.\n");
+			*status = ICE_ERR_NOT_SUPPORTED;
+		}
+		break;
+	case ICE_ERR_BUF_TOO_SHORT:
+		/* fall-through */
+	case ICE_ERR_CFG:
+		dev_err(dev,
+			"The DDP package file is invalid. Entering Safe Mode.\n");
+		break;
+	case ICE_ERR_NOT_SUPPORTED:
+		/* Package File version not supported */
+		if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ ||
+		    (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+		     hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR))
+			dev_err(dev,
+				"The DDP package file version is higher than the driver supports.  Please use an updated driver.  Entering Safe Mode.\n");
+		else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ ||
+			 (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+			  hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR))
+			dev_err(dev,
+				"The DDP package file version is lower than the driver supports.  The driver requires version %d.%d.x.x.  Please use an updated DDP Package file.  Entering Safe Mode.\n",
+				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+		break;
+	case ICE_ERR_AQ_ERROR:
+		switch (hw->adminq.sq_last_status) {
+		case ICE_AQ_RC_ENOSEC:
+		case ICE_AQ_RC_EBADSIG:
+			dev_err(dev,
+				"The DDP package could not be loaded because its signature is not valid.  Please use a valid DDP Package.  Entering Safe Mode.\n");
+			return;
+		case ICE_AQ_RC_ESVN:
+			dev_err(dev,
+				"The DDP Package could not be loaded because its security revision is too low.  Please use an updated DDP Package.  Entering Safe Mode.\n");
+			return;
+		case ICE_AQ_RC_EBADMAN:
+		case ICE_AQ_RC_EBADBUF:
+			dev_err(dev,
+				"An error occurred on the device while loading the DDP package.  The device will be reset.\n");
+			return;
+		default:
+			break;
+		}
+		/* fall-through */
+	default:
+		dev_err(dev,
+			"An unknown error (%d) occurred when loading the DDP package.  Entering Safe Mode.\n",
+			*status);
+		break;
+	}
+}
+
+/**
+ * ice_load_pkg - load/reload the DDP Package file
+ * @firmware: firmware structure when firmware requested or NULL for reload
+ * @pf: pointer to the PF instance
+ *
+ * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
+ * initialize HW tables.
+ */
+static void
+ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+
+	/* Load DDP Package */
+	if (firmware && !hw->pkg_copy) {
+		status = ice_copy_and_init_pkg(hw, firmware->data,
+					       firmware->size);
+		ice_log_pkg_init(hw, &status);
+	} else if (!firmware && hw->pkg_copy) {
+		/* Reload package during rebuild after CORER/GLOBR reset */
+		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
+		ice_log_pkg_init(hw, &status);
+	} else {
+		dev_err(dev,
+			"The DDP package file failed to load. Entering Safe Mode.\n");
+	}
+
+	if (status) {
+		/* Safe Mode */
+		clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+		return;
+	}
+
+	/* Successful download package is the precondition for advanced
+	 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
+	 */
+	set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver should be able to handle
+ * 128 Byte cache lines, so we only print a warning in case issues are seen,
+ * specifically with Tx.
+ */
+static void ice_verify_cacheline_size(struct ice_pf *pf)
+{
+	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
+		dev_warn(&pf->pdev->dev,
+			 "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+			 ICE_CACHE_LINE_BYTES);
+}
+
+/**
+ * ice_send_version - update firmware with driver version
+ * @pf: PF struct
+ *
+ * Returns ICE_SUCCESS on success, else error code
+ */
+static enum ice_status ice_send_version(struct ice_pf *pf)
+{
+	struct ice_driver_ver dv;
+
+	dv.major_ver = DRV_VERSION_MAJOR;
+	dv.minor_ver = DRV_VERSION_MINOR;
+	dv.build_ver = DRV_VERSION_BUILD;
+	dv.subbuild_ver = 0;
+	strscpy((char *)dv.driver_string, DRV_VERSION,
+		sizeof(dv.driver_string));
+	return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
+}
+
+/**
+ * ice_get_opt_fw_name - return optional firmware file name or NULL
+ * @pf: pointer to the PF instance
+ */
+static char *ice_get_opt_fw_name(struct ice_pf *pf)
+{
+	/* Optional firmware name same as default with additional dash
+	 * followed by a EUI-64 identifier (PCIe Device Serial Number)
+	 */
+	struct pci_dev *pdev = pf->pdev;
+	char *opt_fw_filename = NULL;
+	u32 dword;
+	u8 dsn[8];
+	int pos;
+
+	/* Determine the name of the optional file using the DSN (two
+	 * dwords following the start of the DSN Capability).
+	 */
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
+	if (pos) {
+		opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
+		if (!opt_fw_filename)
+			return NULL;
+
+		pci_read_config_dword(pdev, pos + 4, &dword);
+		put_unaligned_le32(dword, &dsn[0]);
+		pci_read_config_dword(pdev, pos + 8, &dword);
+		put_unaligned_le32(dword, &dsn[4]);
+		snprintf(opt_fw_filename, NAME_MAX,
+			 "%sice-%02x%02x%02x%02x%02x%02x%02x%02x.pkg",
+			 ICE_DDP_PKG_PATH,
+			 dsn[7], dsn[6], dsn[5], dsn[4],
+			 dsn[3], dsn[2], dsn[1], dsn[0]);
+	}
+
+	return opt_fw_filename;
+}
+
+/**
+ * ice_request_fw - Device initialization routine
+ * @pf: pointer to the PF instance
+ */
+static void ice_request_fw(struct ice_pf *pf)
+{
+	char *opt_fw_filename = ice_get_opt_fw_name(pf);
+	const struct firmware *firmware = NULL;
+	struct device *dev = &pf->pdev->dev;
+	int err = 0;
+
+	/* optional device-specific DDP (if present) overrides the default DDP
+	 * package file. kernel logs a debug message if the file doesn't exist,
+	 * and warning messages for other errors.
+	 */
+	if (opt_fw_filename) {
+		err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
+		if (err) {
+			kfree(opt_fw_filename);
+			goto dflt_pkg_load;
+		}
+
+		/* request for firmware was successful. Download to device */
+		ice_load_pkg(firmware, pf);
+		kfree(opt_fw_filename);
+		release_firmware(firmware);
+		return;
+	}
+
+dflt_pkg_load:
+	err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
+	if (err) {
+		dev_err(dev,
+			"The DDP package file was not found or could not be read. Entering Safe Mode\n");
+		return;
+	}
+
+	/* request for firmware was successful. Download to device */
+	ice_load_pkg(firmware, pf);
+	release_firmware(firmware);
+}
+
+/**
  * ice_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in ice_pci_tbl
  *
  * Returns 0 on success, negative on failure
  */
-static int ice_probe(struct pci_dev *pdev,
-		     const struct pci_device_id __always_unused *ent)
+static int
+ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 {
+	struct device *dev = &pdev->dev;
 	struct ice_pf *pf;
 	struct ice_hw *hw;
 	int err;
 
-	/* this driver uses devres, see Documentation/driver-model/devres.txt */
+	/* this driver uses devres, see Documentation/driver-api/driver-model/devres.rst */
 	err = pcim_enable_device(pdev);
 	if (err)
 		return err;
 
 	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
 	if (err) {
-		dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err);
+		dev_err(dev, "BAR0 I/O map error %d\n", err);
 		return err;
 	}
 
-	pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL);
+	pf = devm_kzalloc(dev, sizeof(*pf), GFP_KERNEL);
 	if (!pf)
 		return -ENOMEM;
 
-	/* set up for high or low dma */
-	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	/* set up for high or low DMA */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (err)
-		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
 	if (err) {
-		dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
+		dev_err(dev, "DMA configuration failed: 0x%x\n", err);
 		return err;
 	}
 
@@ -3307,6 +2826,8 @@
 	pf->pdev = pdev;
 	pci_set_drvdata(pdev, pf);
 	set_bit(__ICE_DOWN, pf->state);
+	/* Disable service task until DOWN bit is cleared */
+	set_bit(__ICE_SERVICE_DIS, pf->state);
 
 	hw = &pf->hw;
 	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
@@ -3329,28 +2850,47 @@
 
 	err = ice_init_hw(hw);
 	if (err) {
-		dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err);
+		dev_err(dev, "ice_init_hw failed: %d\n", err);
 		err = -EIO;
 		goto err_exit_unroll;
 	}
 
-	dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n",
-		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
-		 hw->api_maj_ver, hw->api_min_ver);
+	dev_info(dev, "firmware %d.%d.%d api %d.%d.%d nvm %s build 0x%08x\n",
+		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
+		 hw->api_maj_ver, hw->api_min_ver, hw->api_patch,
+		 ice_nvm_version_str(hw), hw->fw_build);
 
-	ice_init_pf(pf);
+	ice_request_fw(pf);
 
-	ice_determine_q_usage(pf);
+	/* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
+	 * set in pf->state, which will cause ice_is_safe_mode to return
+	 * true
+	 */
+	if (ice_is_safe_mode(pf)) {
+		dev_err(dev,
+			"Package download failed. Advanced features disabled - Device now in Safe Mode\n");
+		/* we already got function/device capabilities but these don't
+		 * reflect what the driver needs to do in safe mode. Instead of
+		 * adding conditional logic everywhere to ignore these
+		 * device/function capabilities, override them.
+		 */
+		ice_set_safe_mode_caps(hw);
+	}
 
-	pf->num_alloc_vsi = min_t(u16, ICE_MAX_VSI_ALLOC,
-				  hw->func_caps.guaranteed_num_vsi);
+	err = ice_init_pf(pf);
+	if (err) {
+		dev_err(dev, "ice_init_pf failed: %d\n", err);
+		goto err_init_pf_unroll;
+	}
+
+	pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
 	if (!pf->num_alloc_vsi) {
 		err = -EIO;
 		goto err_init_pf_unroll;
 	}
 
-	pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi,
-			       sizeof(struct ice_vsi *), GFP_KERNEL);
+	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
+			       GFP_KERNEL);
 	if (!pf->vsi) {
 		err = -ENOMEM;
 		goto err_init_pf_unroll;
@@ -3358,35 +2898,37 @@
 
 	err = ice_init_interrupt_scheme(pf);
 	if (err) {
-		dev_err(&pdev->dev,
-			"ice_init_interrupt_scheme failed: %d\n", err);
+		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
 		err = -EIO;
 		goto err_init_interrupt_unroll;
 	}
 
+	/* Driver is mostly up */
+	clear_bit(__ICE_DOWN, pf->state);
+
 	/* In case of MSIX we are going to setup the misc vector right here
 	 * to handle admin queue events etc. In case of legacy and MSI
 	 * the misc functionality and queue processing is combined in
 	 * the same vector and that gets setup at open.
 	 */
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		err = ice_req_irq_msix_misc(pf);
-		if (err) {
-			dev_err(&pdev->dev,
-				"setup of misc vector failed: %d\n", err);
-			goto err_init_interrupt_unroll;
-		}
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "setup of misc vector failed: %d\n", err);
+		goto err_init_interrupt_unroll;
 	}
 
 	/* create switch struct for the switch element created by FW on boot */
-	pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(struct ice_sw),
-				    GFP_KERNEL);
+	pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL);
 	if (!pf->first_sw) {
 		err = -ENOMEM;
 		goto err_msix_misc_unroll;
 	}
 
-	pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	if (hw->evb_veb)
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	else
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
+
 	pf->first_sw->pf = pf;
 
 	/* record the sw_id available for later use */
@@ -3394,33 +2936,57 @@
 
 	err = ice_setup_pf_sw(pf);
 	if (err) {
-		dev_err(&pdev->dev,
-			"probe failed due to setup pf switch:%d\n", err);
+		dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
 		goto err_alloc_sw_unroll;
 	}
 
-	/* Driver is mostly up */
-	clear_bit(__ICE_DOWN, pf->state);
+	clear_bit(__ICE_SERVICE_DIS, pf->state);
+
+	/* tell the firmware we are up */
+	err = ice_send_version(pf);
+	if (err) {
+		dev_err(dev,
+			"probe failed sending driver version %s. error: %d\n",
+			ice_drv_ver, err);
+		goto err_alloc_sw_unroll;
+	}
 
 	/* since everything is good, start the service timer */
 	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
 
 	err = ice_init_link_events(pf->hw.port_info);
 	if (err) {
-		dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err);
+		dev_err(dev, "ice_init_link_events failed: %d\n", err);
 		goto err_alloc_sw_unroll;
 	}
 
+	ice_verify_cacheline_size(pf);
+
+	/* If no DDP driven features have to be setup, return here */
+	if (ice_is_safe_mode(pf))
+		return 0;
+
+	/* initialize DDP driven features */
+
+	/* Note: DCB init failure is non-fatal to load */
+	if (ice_init_pf_dcb(pf, false)) {
+		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		ice_cfg_lldp_mib_change(&pf->hw, true);
+	}
+
 	return 0;
 
 err_alloc_sw_unroll:
+	set_bit(__ICE_SERVICE_DIS, pf->state);
 	set_bit(__ICE_DOWN, pf->state);
 	devm_kfree(&pf->pdev->dev, pf->first_sw);
 err_msix_misc_unroll:
 	ice_free_irq_msix_misc(pf);
 err_init_interrupt_unroll:
 	ice_clear_interrupt_scheme(pf);
-	devm_kfree(&pdev->dev, pf->vsi);
+	devm_kfree(dev, pf->vsi);
 err_init_pf_unroll:
 	ice_deinit_pf(pf);
 	ice_deinit_hw(hw);
@@ -3436,31 +3002,170 @@
 static void ice_remove(struct pci_dev *pdev)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
-	int i = 0;
-	int err;
+	int i;
 
 	if (!pf)
 		return;
 
-	set_bit(__ICE_DOWN, pf->state);
-
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		if (!pf->vsi[i])
-			continue;
-
-		err = ice_vsi_release(pf->vsi[i]);
-		if (err)
-			dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n",
-				i, err);
+	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+		if (!ice_is_reset_in_progress(pf->state))
+			break;
+		msleep(100);
 	}
 
+	set_bit(__ICE_DOWN, pf->state);
+	ice_service_task_stop(pf);
+
+	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags))
+		ice_free_vfs(pf);
+	ice_vsi_release_all(pf);
 	ice_free_irq_msix_misc(pf);
-	ice_clear_interrupt_scheme(pf);
+	ice_for_each_vsi(pf, i) {
+		if (!pf->vsi[i])
+			continue;
+		ice_vsi_free_q_vectors(pf->vsi[i]);
+	}
 	ice_deinit_pf(pf);
 	ice_deinit_hw(&pf->hw);
+	ice_clear_interrupt_scheme(pf);
+	/* Issue a PFR as part of the prescribed driver unload flow.  Do not
+	 * do it via ice_schedule_reset() since there is no need to rebuild
+	 * and the service task is already stopped.
+	 */
+	ice_reset(&pf->hw, ICE_RESET_PFR);
 	pci_disable_pcie_error_reporting(pdev);
 }
 
+/**
+ * ice_pci_err_detected - warning that PCI error has been detected
+ * @pdev: PCI device information struct
+ * @err: the type of PCI error
+ *
+ * Called to warn that something happened on the PCI bus and the error handling
+ * is in progress.  Allows the driver to gracefully prepare/handle PCI errors.
+ */
+static pci_ers_result_t
+ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
+			__func__, err);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf);
+		}
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ice_pci_err_slot_reset - a PCI slot reset has just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to determine if the driver can recover from the PCI slot reset by
+ * using a register read to determine if the device is recoverable.
+ */
+static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+	int err;
+	u32 reg;
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset, error %d\n",
+			err);
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+
+		/* Check for life */
+		reg = rd32(&pf->hw, GLGEN_RTRIG);
+		if (!reg)
+			result = PCI_ERS_RESULT_RECOVERED;
+		else
+			result = PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	err = pci_cleanup_aer_uncorrect_error_status(pdev);
+	if (err)
+		dev_dbg(&pdev->dev,
+			"pci_cleanup_aer_uncorrect_error_status failed, error %d\n",
+			err);
+		/* non-fatal, continue */
+
+	return result;
+}
+
+/**
+ * ice_pci_err_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error and/or
+ * reset recovery have finished
+ */
+static void ice_pci_err_resume(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev,
+			"%s failed, device is unrecoverable\n", __func__);
+		return;
+	}
+
+	if (test_bit(__ICE_SUSPENDED, pf->state)) {
+		dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
+			__func__);
+		return;
+	}
+
+	ice_do_reset(pf, ICE_RESET_PFR);
+	ice_service_task_restart(pf);
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+}
+
+/**
+ * ice_pci_err_reset_prepare - prepare device driver for PCI reset
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf);
+		}
+	}
+}
+
+/**
+ * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_done(struct pci_dev *pdev)
+{
+	ice_pci_err_resume(pdev);
+}
+
 /* ice_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -3470,21 +3175,29 @@
  *   Class, Class Mask, private data (not used) }
  */
 static const struct pci_device_id ice_pci_tbl[] = {
-	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 },
-	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 },
-	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 },
-	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 },
-	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
 	/* required last entry */
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
 
+static const struct pci_error_handlers ice_pci_err_handler = {
+	.error_detected = ice_pci_err_detected,
+	.slot_reset = ice_pci_err_slot_reset,
+	.reset_prepare = ice_pci_err_reset_prepare,
+	.reset_done = ice_pci_err_reset_done,
+	.resume = ice_pci_err_resume
+};
+
 static struct pci_driver ice_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = ice_pci_tbl,
 	.probe = ice_probe,
 	.remove = ice_remove,
+	.sriov_configure = ice_sriov_configure,
+	.err_handler = &ice_pci_err_handler
 };
 
 /**
@@ -3500,7 +3213,7 @@
 	pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver);
 	pr_info("%s\n", ice_copyright);
 
-	ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME);
+	ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME);
 	if (!ice_wq) {
 		pr_err("Failed to create workqueue\n");
 		return -ENOMEM;
@@ -3508,7 +3221,7 @@
 
 	status = pci_register_driver(&ice_driver);
 	if (status) {
-		pr_err("failed to register pci driver, err %d\n", status);
+		pr_err("failed to register PCI driver, err %d\n", status);
 		destroy_workqueue(ice_wq);
 	}
 
@@ -3531,7 +3244,7 @@
 module_exit(ice_module_exit);
 
 /**
- * ice_set_mac_address - NDO callback to set mac address
+ * ice_set_mac_address - NDO callback to set MAC address
  * @netdev: network interface device structure
  * @pi: pointer to an address structure
  *
@@ -3545,10 +3258,8 @@
 	struct ice_hw *hw = &pf->hw;
 	struct sockaddr *addr = pi;
 	enum ice_status status;
-	LIST_HEAD(a_mac_list);
-	LIST_HEAD(r_mac_list);
 	u8 flags = 0;
-	int err;
+	int err = 0;
 	u8 *mac;
 
 	mac = (u8 *)addr->sa_data;
@@ -3562,68 +3273,49 @@
 	}
 
 	if (test_bit(__ICE_DOWN, pf->state) ||
-	    ice_is_reset_recovery_pending(pf->state)) {
+	    ice_is_reset_in_progress(pf->state)) {
 		netdev_err(netdev, "can't set mac %pM. device not ready\n",
 			   mac);
 		return -EBUSY;
 	}
 
-	/* When we change the mac address we also have to change the mac address
-	 * based filter rules that were created previously for the old mac
+	/* When we change the MAC address we also have to change the MAC address
+	 * based filter rules that were created previously for the old MAC
 	 * address. So first, we remove the old filter rule using ice_remove_mac
-	 * and then create a new filter rule using ice_add_mac. Note that for
-	 * both these operations, we first need to form a "list" of mac
-	 * addresses (even though in this case, we have only 1 mac address to be
-	 * added/removed) and this done using ice_add_mac_to_list. Depending on
-	 * the ensuing operation this "list" of mac addresses is either to be
-	 * added or removed from the filter.
+	 * and then create a new filter rule using ice_add_mac via
+	 * ice_vsi_cfg_mac_fltr function call for both add and/or remove
+	 * filters.
 	 */
-	err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr);
-	if (err) {
-		err = -EADDRNOTAVAIL;
-		goto free_lists;
-	}
-
-	status = ice_remove_mac(hw, &r_mac_list);
+	status = ice_vsi_cfg_mac_fltr(vsi, netdev->dev_addr, false);
 	if (status) {
 		err = -EADDRNOTAVAIL;
-		goto free_lists;
+		goto err_update_filters;
 	}
 
-	err = ice_add_mac_to_list(vsi, &a_mac_list, mac);
-	if (err) {
-		err = -EADDRNOTAVAIL;
-		goto free_lists;
-	}
-
-	status = ice_add_mac(hw, &a_mac_list);
+	status = ice_vsi_cfg_mac_fltr(vsi, mac, true);
 	if (status) {
 		err = -EADDRNOTAVAIL;
-		goto free_lists;
+		goto err_update_filters;
 	}
 
-free_lists:
-	/* free list entries */
-	ice_free_fltr_list(&pf->pdev->dev, &r_mac_list);
-	ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
-
+err_update_filters:
 	if (err) {
-		netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
 			   mac);
 		return err;
 	}
 
-	/* change the netdev's mac address */
+	/* change the netdev's MAC address */
 	memcpy(netdev->dev_addr, mac, netdev->addr_len);
-	netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
 		   netdev->dev_addr);
 
-	/* write new mac address to the firmware */
+	/* write new MAC address to the firmware */
 	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
 	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
 	if (status) {
-		netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
-			   mac);
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
+			   mac, status);
 	}
 	return 0;
 }
@@ -3660,12 +3352,14 @@
  * @tb: pointer to array of nladdr (unused)
  * @dev: the net device pointer
  * @addr: the MAC address entry being added
- * @vid: VLAN id
+ * @vid: VLAN ID
  * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack
  */
-static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
-		       struct net_device *dev, const unsigned char *addr,
-		       u16 vid, u16 flags)
+static int
+ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+	    struct net_device *dev, const unsigned char *addr, u16 vid,
+	    u16 flags, struct netlink_ext_ack __always_unused *extack)
 {
 	int err;
 
@@ -3698,11 +3392,12 @@
  * @tb: pointer to array of nladdr (unused)
  * @dev: the net device pointer
  * @addr: the MAC address entry being added
- * @vid: VLAN id
+ * @vid: VLAN ID
  */
-static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
-		       struct net_device *dev, const unsigned char *addr,
-		       __always_unused u16 vid)
+static int
+ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+	    struct net_device *dev, const unsigned char *addr,
+	    __always_unused u16 vid)
 {
 	int err;
 
@@ -3722,108 +3417,60 @@
 }
 
 /**
- * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
- * @vsi: the vsi being changed
- */
-static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
-{
-	struct device *dev = &vsi->back->pdev->dev;
-	struct ice_hw *hw = &vsi->back->hw;
-	struct ice_vsi_ctx ctxt = { 0 };
-	enum ice_status status;
-
-	/* Here we are configuring the VSI to let the driver add VLAN tags by
-	 * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag
-	 * insertion happens in the Tx hot path, in ice_tx_map.
-	 */
-	ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
-
-	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
-	ctxt.vsi_num = vsi->vsi_num;
-
-	status = ice_aq_update_vsi(hw, &ctxt, NULL);
-	if (status) {
-		dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
-			status, hw->adminq.sq_last_status);
-		return -EIO;
-	}
-
-	vsi->info.vlan_flags = ctxt.info.vlan_flags;
-	return 0;
-}
-
-/**
- * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
- * @vsi: the vsi being changed
- * @ena: boolean value indicating if this is a enable or disable request
- */
-static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
-{
-	struct device *dev = &vsi->back->pdev->dev;
-	struct ice_hw *hw = &vsi->back->hw;
-	struct ice_vsi_ctx ctxt = { 0 };
-	enum ice_status status;
-
-	/* Here we are configuring what the VSI should do with the VLAN tag in
-	 * the Rx packet. We can either leave the tag in the packet or put it in
-	 * the Rx descriptor.
-	 */
-	if (ena) {
-		/* Strip VLAN tag from Rx packet and put it in the desc */
-		ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH;
-	} else {
-		/* Disable stripping. Leave tag in packet */
-		ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
-	}
-
-	/* Allow all packets untagged/tagged */
-	ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
-
-	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
-	ctxt.vsi_num = vsi->vsi_num;
-
-	status = ice_aq_update_vsi(hw, &ctxt, NULL);
-	if (status) {
-		dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n",
-			ena, status, hw->adminq.sq_last_status);
-		return -EIO;
-	}
-
-	vsi->info.vlan_flags = ctxt.info.vlan_flags;
-	return 0;
-}
-
-/**
  * ice_set_features - set the netdev feature flags
  * @netdev: ptr to the netdev being adjusted
  * @features: the feature set that the stack is suggesting
  */
-static int ice_set_features(struct net_device *netdev,
-			    netdev_features_t features)
+static int
+ice_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	int ret = 0;
 
+	/* Don't set any netdev advanced features with device in Safe Mode */
+	if (ice_is_safe_mode(vsi->back)) {
+		dev_err(&vsi->back->pdev->dev,
+			"Device is in Safe Mode - not enabling advanced netdev features\n");
+		return ret;
+	}
+
+	/* Multiple features can be changed in one call so keep features in
+	 * separate if/else statements to guarantee each feature is checked
+	 */
+	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
+		ret = ice_vsi_manage_rss_lut(vsi, true);
+	else if (!(features & NETIF_F_RXHASH) &&
+		 netdev->features & NETIF_F_RXHASH)
+		ret = ice_vsi_manage_rss_lut(vsi, false);
+
 	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
 		ret = ice_vsi_manage_vlan_stripping(vsi, true);
 	else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
 		 (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
 		ret = ice_vsi_manage_vlan_stripping(vsi, false);
-	else if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
-		 !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
 		ret = ice_vsi_manage_vlan_insertion(vsi);
 	else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
 		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
 		ret = ice_vsi_manage_vlan_insertion(vsi);
 
+	if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
+
 	return ret;
 }
 
 /**
- * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI
- * @vsi: VSI to setup vlan properties for
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI
+ * @vsi: VSI to setup VLAN properties for
  */
 static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
 {
@@ -3838,290 +3485,26 @@
 }
 
 /**
- * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up
- * @vsi: the VSI being brought back up
- */
-static int ice_restore_vlan(struct ice_vsi *vsi)
-{
-	int err;
-	u16 vid;
-
-	if (!vsi->netdev)
-		return -EINVAL;
-
-	err = ice_vsi_vlan_setup(vsi);
-	if (err)
-		return err;
-
-	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) {
-		err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
- */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
-{
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-
-	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
-	tlan_ctx->port_num = vsi->port_info->lport;
-
-	/* Transmit Queue Length */
-	tlan_ctx->qlen = ring->count;
-
-	/* PF number */
-	tlan_ctx->pf_num = hw->pf_id;
-
-	/* queue belongs to a specific VSI type
-	 * VF / VM index should be programmed per vmvf_type setting:
-	 * for vmvf_type = VF, it is VF number between 0-256
-	 * for vmvf_type = VM, it is VM number between 0-767
-	 * for PF or EMP this field should be set to zero
-	 */
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
-		break;
-	default:
-		return;
-	}
-
-	/* make sure the context is associated with the right VSI */
-	tlan_ctx->src_vsi = vsi->vsi_num;
-
-	tlan_ctx->tso_ena = ICE_TX_LEGACY;
-	tlan_ctx->tso_qnum = pf_q;
-
-	/* Legacy or Advanced Host Interface:
-	 * 0: Advanced Host Interface
-	 * 1: Legacy Host Interface
-	 */
-	tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_vsi_cfg_txqs - Configure the VSI for Tx
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- * Configure the Tx VSI for operation.
- */
-static int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
-{
-	struct ice_aqc_add_tx_qgrp *qg_buf;
-	struct ice_aqc_add_txqs_perq *txq;
-	struct ice_pf *pf = vsi->back;
-	enum ice_status status;
-	u16 buf_len, i, pf_q;
-	int err = 0, tc = 0;
-	u8 num_q_grps;
-
-	buf_len = sizeof(struct ice_aqc_add_tx_qgrp);
-	qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL);
-	if (!qg_buf)
-		return -ENOMEM;
-
-	if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) {
-		err = -EINVAL;
-		goto err_cfg_txqs;
-	}
-	qg_buf->num_txqs = 1;
-	num_q_grps = 1;
-
-	/* set up and configure the tx queues */
-	ice_for_each_txq(vsi, i) {
-		struct ice_tlan_ctx tlan_ctx = { 0 };
-
-		pf_q = vsi->txq_map[i];
-		ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q);
-		/* copy context contents into the qg_buf */
-		qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-		ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
-			    ice_tlan_ctx_info);
-
-		/* init queue specific tail reg. It is referred as transmit
-		 * comm scheduler queue doorbell.
-		 */
-		vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-		status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc,
-					 num_q_grps, qg_buf, buf_len, NULL);
-		if (status) {
-			dev_err(&vsi->back->pdev->dev,
-				"Failed to set LAN Tx queue context, error: %d\n",
-				status);
-			err = -ENODEV;
-			goto err_cfg_txqs;
-		}
-
-		/* Add Tx Queue TEID into the VSI tx ring from the response
-		 * This will complete configuring and enabling the queue.
-		 */
-		txq = &qg_buf->txqs[0];
-		if (pf_q == le16_to_cpu(txq->txq_id))
-			vsi->tx_rings[i]->txq_teid =
-				le32_to_cpu(txq->q_teid);
-	}
-err_cfg_txqs:
-	devm_kfree(&pf->pdev->dev, qg_buf);
-	return err;
-}
-
-/**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
- */
-static int ice_setup_rx_ctx(struct ice_ring *ring)
-{
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-	u32 rxdid = ICE_RXDID_FLEX_NIC;
-	struct ice_rlan_ctx rlan_ctx;
-	u32 regval;
-	u16 pf_q;
-	int err;
-
-	/* what is RX queue number in global space of 2K rx queues */
-	pf_q = vsi->rxq_map[ring->q_index];
-
-	/* clear the context structure first */
-	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
-	rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S;
-
-	rlan_ctx.qlen = ring->count;
-
-	/* Receive Packet Data Buffer Size.
-	 * The Packet Data Buffer Size is defined in 128 byte units.
-	 */
-	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
-	/* use 32 byte descriptors */
-	rlan_ctx.dsize = 1;
-
-	/* Strip the Ethernet CRC bytes before the packet is posted to host
-	 * memory.
-	 */
-	rlan_ctx.crcstrip = 1;
-
-	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
-	rlan_ctx.l2tsel = 1;
-
-	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
-	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
-	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
-	/* This controls whether VLAN is stripped from inner headers
-	 * The VLAN in the inner L2 header is stripped to the receive
-	 * descriptor if enabled by this flag.
-	 */
-	rlan_ctx.showiv = 0;
-
-	/* Max packet size for this queue - must not be set to a larger value
-	 * than 5 x DBUF
-	 */
-	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
-			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
-	/* Rx queue threshold in units of 64 */
-	rlan_ctx.lrxqthresh = 1;
-
-	 /* Enable Flexible Descriptors in the queue context which
-	  * allows this driver to select a specific receive descriptor format
-	  */
-	regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
-	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
-		QRXFLXP_CNTXT_RXDID_IDX_M;
-
-	/* increasing context priority to pick up profile id;
-	 * default is 0x01; setting to 0x03 to ensure profile
-	 * is programming if prev context is of same priority
-	 */
-	regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
-		QRXFLXP_CNTXT_RXDID_PRIO_M;
-
-	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
-
-	/* Absolute queue number out of 2K needs to be passed */
-	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
-	if (err) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
-			pf_q, err);
-		return -EIO;
-	}
-
-	/* init queue specific tail register */
-	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
-	writel(0, ring->tail);
-	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
-	return 0;
-}
-
-/**
- * ice_vsi_cfg_rxqs - Configure the VSI for Rx
- * @vsi: the VSI being configured
- *
- * Return 0 on success and a negative value on error
- * Configure the Rx VSI for operation.
- */
-static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
-{
-	int err = 0;
-	u16 i;
-
-	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
-		vsi->max_frame = vsi->netdev->mtu +
-			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-	else
-		vsi->max_frame = ICE_RXBUF_2048;
-
-	vsi->rx_buf_len = ICE_RXBUF_2048;
-	/* set up individual rings */
-	for (i = 0; i < vsi->num_rxq && !err; i++)
-		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
-
-	if (err) {
-		dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n");
-		return -EIO;
-	}
-	return err;
-}
-
-/**
  * ice_vsi_cfg - Setup the VSI
  * @vsi: the VSI being configured
  *
  * Return 0 on success and negative value on error
  */
-static int ice_vsi_cfg(struct ice_vsi *vsi)
+int ice_vsi_cfg(struct ice_vsi *vsi)
 {
 	int err;
 
 	if (vsi->netdev) {
 		ice_set_rx_mode(vsi->netdev);
-		err = ice_restore_vlan(vsi);
+
+		err = ice_vsi_vlan_setup(vsi);
+
 		if (err)
 			return err;
 	}
+	ice_vsi_cfg_dcb_rings(vsi);
 
-	err = ice_vsi_cfg_txqs(vsi);
+	err = ice_vsi_cfg_lan_txqs(vsi);
 	if (!err)
 		err = ice_vsi_cfg_rxqs(vsi);
 
@@ -4129,200 +3512,6 @@
 }
 
 /**
- * ice_vsi_stop_tx_rings - Disable Tx rings
- * @vsi: the VSI being configured
- */
-static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	enum ice_status status;
-	u32 *q_teids, val;
-	u16 *q_ids, i;
-	int err = 0;
-
-	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
-		return -EINVAL;
-
-	q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids),
-			       GFP_KERNEL);
-	if (!q_teids)
-		return -ENOMEM;
-
-	q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids),
-			     GFP_KERNEL);
-	if (!q_ids) {
-		err = -ENOMEM;
-		goto err_alloc_q_ids;
-	}
-
-	/* set up the tx queue list to be disabled */
-	ice_for_each_txq(vsi, i) {
-		u16 v_idx;
-
-		if (!vsi->tx_rings || !vsi->tx_rings[i]) {
-			err = -EINVAL;
-			goto err_out;
-		}
-
-		q_ids[i] = vsi->txq_map[i];
-		q_teids[i] = vsi->tx_rings[i]->txq_teid;
-
-		/* clear cause_ena bit for disabled queues */
-		val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
-		val &= ~QINT_TQCTL_CAUSE_ENA_M;
-		wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
-
-		/* software is expected to wait for 100 ns */
-		ndelay(100);
-
-		/* trigger a software interrupt for the vector associated to
-		 * the queue to schedule napi handler
-		 */
-		v_idx = vsi->tx_rings[i]->q_vector->v_idx;
-		wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx),
-		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
-	}
-	status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids,
-				 NULL);
-	if (status) {
-		dev_err(&pf->pdev->dev,
-			"Failed to disable LAN Tx queues, error: %d\n",
-			status);
-		err = -ENODEV;
-	}
-
-err_out:
-	devm_kfree(&pf->pdev->dev, q_ids);
-
-err_alloc_q_ids:
-	devm_kfree(&pf->pdev->dev, q_teids);
-
-	return err;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
-	int i;
-
-	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
-		u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q));
-
-		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-			break;
-
-		usleep_range(10, 20);
-	}
-	if (i >= ICE_Q_WAIT_RETRY_LIMIT)
-		return -ETIMEDOUT;
-
-	return 0;
-}
-
-/**
- * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings
- * @vsi: the VSI being configured
- * @ena: start or stop the rx rings
- */
-static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	int i, j, ret = 0;
-
-	for (i = 0; i < vsi->num_rxq; i++) {
-		int pf_q = vsi->rxq_map[i];
-		u32 rx_reg;
-
-		for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) {
-			rx_reg = rd32(hw, QRX_CTRL(pf_q));
-			if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) ==
-			    ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1))
-				break;
-			usleep_range(1000, 2000);
-		}
-
-		/* Skip if the queue is already in the requested state */
-		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-			continue;
-
-		/* turn on/off the queue */
-		if (ena)
-			rx_reg |= QRX_CTRL_QENA_REQ_M;
-		else
-			rx_reg &= ~QRX_CTRL_QENA_REQ_M;
-		wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
-		/* wait for the change to finish */
-		ret = ice_pf_rxq_wait(pf, pf_q, ena);
-		if (ret) {
-			dev_err(&pf->pdev->dev,
-				"VSI idx %d Rx ring %d %sable timeout\n",
-				vsi->idx, pf_q, (ena ? "en" : "dis"));
-			break;
-		}
-	}
-
-	return ret;
-}
-
-/**
- * ice_vsi_start_rx_rings - start VSI's rx rings
- * @vsi: the VSI whose rings are to be started
- *
- * Returns 0 on success and a negative value on error
- */
-static int ice_vsi_start_rx_rings(struct ice_vsi *vsi)
-{
-	return ice_vsi_ctrl_rx_rings(vsi, true);
-}
-
-/**
- * ice_vsi_stop_rx_rings - stop VSI's rx rings
- * @vsi: the VSI
- *
- * Returns 0 on success and a negative value on error
- */
-static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
-{
-	return ice_vsi_ctrl_rx_rings(vsi, false);
-}
-
-/**
- * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings
- * @vsi: the VSI
- * Returns 0 on success and a negative value on error
- */
-static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi)
-{
-	int err_tx, err_rx;
-
-	err_tx = ice_vsi_stop_tx_rings(vsi);
-	if (err_tx)
-		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n");
-
-	err_rx = ice_vsi_stop_rx_rings(vsi);
-	if (err_rx)
-		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n");
-
-	if (err_tx || err_rx)
-		return -EIO;
-
-	return 0;
-}
-
-/**
  * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
  * @vsi: the VSI being configured
  */
@@ -4333,8 +3522,12 @@
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_enable(&vsi->q_vectors[q_idx]->napi);
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_enable(&q_vector->napi);
+	}
 }
 
 /**
@@ -4348,10 +3541,7 @@
 	struct ice_pf *pf = vsi->back;
 	int err;
 
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		ice_vsi_cfg_msix(vsi);
-	else
-		return -ENOTSUPP;
+	ice_vsi_cfg_msix(vsi);
 
 	/* Enable only Rx rings, Tx rings were enabled by the FW when the
 	 * Tx queue group list was configured and the context bits were
@@ -4375,7 +3565,7 @@
 
 	ice_service_task_schedule(pf);
 
-	return err;
+	return 0;
 }
 
 /**
@@ -4402,8 +3592,8 @@
  * This function fetches stats from the ring considering the atomic operations
  * that needs to be performed to read u64 values in 32 bit machine.
  */
-static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts,
-					 u64 *bytes)
+static void
+ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes)
 {
 	unsigned int start;
 	*pkts = 0;
@@ -4419,122 +3609,6 @@
 }
 
 /**
- * ice_stat_update40 - read 40 bit stat from the chip and update stat values
- * @hw: ptr to the hardware info
- * @hireg: high 32 bit HW register to read from
- * @loreg: low 32 bit HW register to read from
- * @prev_stat_loaded: bool to specify if previous stats are loaded
- * @prev_stat: ptr to previous loaded stat value
- * @cur_stat: ptr to current stat value
- */
-static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg,
-			      bool prev_stat_loaded, u64 *prev_stat,
-			      u64 *cur_stat)
-{
-	u64 new_data;
-
-	new_data = rd32(hw, loreg);
-	new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
-
-	/* device stats are not reset at PFR, they likely will not be zeroed
-	 * when the driver starts. So save the first values read and use them as
-	 * offsets to be subtracted from the raw values in order to report stats
-	 * that count from zero.
-	 */
-	if (!prev_stat_loaded)
-		*prev_stat = new_data;
-	if (likely(new_data >= *prev_stat))
-		*cur_stat = new_data - *prev_stat;
-	else
-		/* to manage the potential roll-over */
-		*cur_stat = (new_data + BIT_ULL(40)) - *prev_stat;
-	*cur_stat &= 0xFFFFFFFFFFULL;
-}
-
-/**
- * ice_stat_update32 - read 32 bit stat from the chip and update stat values
- * @hw: ptr to the hardware info
- * @reg: HW register to read from
- * @prev_stat_loaded: bool to specify if previous stats are loaded
- * @prev_stat: ptr to previous loaded stat value
- * @cur_stat: ptr to current stat value
- */
-static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
-			      u64 *prev_stat, u64 *cur_stat)
-{
-	u32 new_data;
-
-	new_data = rd32(hw, reg);
-
-	/* device stats are not reset at PFR, they likely will not be zeroed
-	 * when the driver starts. So save the first values read and use them as
-	 * offsets to be subtracted from the raw values in order to report stats
-	 * that count from zero.
-	 */
-	if (!prev_stat_loaded)
-		*prev_stat = new_data;
-	if (likely(new_data >= *prev_stat))
-		*cur_stat = new_data - *prev_stat;
-	else
-		/* to manage the potential roll-over */
-		*cur_stat = (new_data + BIT_ULL(32)) - *prev_stat;
-}
-
-/**
- * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
- * @vsi: the VSI to be updated
- */
-static void ice_update_eth_stats(struct ice_vsi *vsi)
-{
-	struct ice_eth_stats *prev_es, *cur_es;
-	struct ice_hw *hw = &vsi->back->hw;
-	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
-
-	prev_es = &vsi->eth_stats_prev;
-	cur_es = &vsi->eth_stats;
-
-	ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->rx_bytes,
-			  &cur_es->rx_bytes);
-
-	ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->rx_unicast,
-			  &cur_es->rx_unicast);
-
-	ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->rx_multicast,
-			  &cur_es->rx_multicast);
-
-	ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->rx_broadcast,
-			  &cur_es->rx_broadcast);
-
-	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
-			  &prev_es->rx_discards, &cur_es->rx_discards);
-
-	ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->tx_bytes,
-			  &cur_es->tx_bytes);
-
-	ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->tx_unicast,
-			  &cur_es->tx_unicast);
-
-	ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->tx_multicast,
-			  &cur_es->tx_multicast);
-
-	ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num),
-			  vsi->stat_offsets_loaded, &prev_es->tx_broadcast,
-			  &cur_es->tx_broadcast);
-
-	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
-			  &prev_es->tx_errors, &cur_es->tx_errors);
-
-	vsi->stat_offsets_loaded = true;
-}
-
-/**
  * ice_update_vsi_ring_stats - Update VSI stats counters
  * @vsi: the VSI to be updated
  */
@@ -4588,7 +3662,7 @@
  * ice_update_vsi_stats - Update VSI stats counters
  * @vsi: the VSI to be updated
  */
-static void ice_update_vsi_stats(struct ice_vsi *vsi)
+void ice_update_vsi_stats(struct ice_vsi *vsi)
 {
 	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
 	struct ice_eth_stats *cur_es = &vsi->eth_stats;
@@ -4615,6 +3689,8 @@
 		cur_ns->rx_errors = pf->stats.crc_errors +
 				    pf->stats.illegal_bytes;
 		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+		/* record drops from the port level */
+		cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
 	}
 }
 
@@ -4622,147 +3698,139 @@
  * ice_update_pf_stats - Update PF port stats counters
  * @pf: PF whose stats needs to be updated
  */
-static void ice_update_pf_stats(struct ice_pf *pf)
+void ice_update_pf_stats(struct ice_pf *pf)
 {
 	struct ice_hw_port_stats *prev_ps, *cur_ps;
 	struct ice_hw *hw = &pf->hw;
-	u8 pf_id;
+	u8 port;
 
+	port = hw->port_info->lport;
 	prev_ps = &pf->stats_prev;
 	cur_ps = &pf->stats;
-	pf_id = hw->pf_id;
 
-	ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.rx_bytes,
+	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_bytes,
 			  &cur_ps->eth.rx_bytes);
 
-	ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.rx_unicast,
+	ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_unicast,
 			  &cur_ps->eth.rx_unicast);
 
-	ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.rx_multicast,
+	ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_multicast,
 			  &cur_ps->eth.rx_multicast);
 
-	ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast,
+	ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_broadcast,
 			  &cur_ps->eth.rx_broadcast);
 
-	ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.tx_bytes,
+	ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_discards,
+			  &cur_ps->eth.rx_discards);
+
+	ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_bytes,
 			  &cur_ps->eth.tx_bytes);
 
-	ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.tx_unicast,
+	ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_unicast,
 			  &cur_ps->eth.tx_unicast);
 
-	ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.tx_multicast,
+	ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_multicast,
 			  &cur_ps->eth.tx_multicast);
 
-	ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast,
+	ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_broadcast,
 			  &cur_ps->eth.tx_broadcast);
 
-	ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_dropped_link_down,
 			  &cur_ps->tx_dropped_link_down);
 
-	ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->rx_size_64,
-			  &cur_ps->rx_size_64);
+	ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_64, &cur_ps->rx_size_64);
 
-	ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->rx_size_127,
-			  &cur_ps->rx_size_127);
+	ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_127, &cur_ps->rx_size_127);
 
-	ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->rx_size_255,
-			  &cur_ps->rx_size_255);
+	ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_255, &cur_ps->rx_size_255);
 
-	ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->rx_size_511,
-			  &cur_ps->rx_size_511);
+	ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_511, &cur_ps->rx_size_511);
 
-	ice_stat_update40(hw, GLPRT_PRC1023H(pf_id),
-			  GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
 
-	ice_stat_update40(hw, GLPRT_PRC1522H(pf_id),
-			  GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
 
-	ice_stat_update40(hw, GLPRT_PRC9522H(pf_id),
-			  GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
 
-	ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->tx_size_64,
-			  &cur_ps->tx_size_64);
+	ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_64, &cur_ps->tx_size_64);
 
-	ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->tx_size_127,
-			  &cur_ps->tx_size_127);
+	ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_127, &cur_ps->tx_size_127);
 
-	ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->tx_size_255,
-			  &cur_ps->tx_size_255);
+	ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_255, &cur_ps->tx_size_255);
 
-	ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id),
-			  pf->stat_prev_loaded, &prev_ps->tx_size_511,
-			  &cur_ps->tx_size_511);
+	ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_511, &cur_ps->tx_size_511);
 
-	ice_stat_update40(hw, GLPRT_PTC1023H(pf_id),
-			  GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
 
-	ice_stat_update40(hw, GLPRT_PTC1522H(pf_id),
-			  GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
 
-	ice_stat_update40(hw, GLPRT_PTC9522H(pf_id),
-			  GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
 
-	ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
 
-	ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
 
-	ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
 
-	ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
 
-	ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
+	ice_update_dcb_stats(pf);
+
+	ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
 			  &prev_ps->crc_errors, &cur_ps->crc_errors);
 
-	ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
 			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
 
-	ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
 			  &prev_ps->mac_local_faults,
 			  &cur_ps->mac_local_faults);
 
-	ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
 			  &prev_ps->mac_remote_faults,
 			  &cur_ps->mac_remote_faults);
 
-	ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
 
-	ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
 
-	ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
 
-	ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
 
-	ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
 
 	pf->stat_prev_loaded = true;
@@ -4782,12 +3850,16 @@
 
 	vsi_stats = &vsi->net_stats;
 
-	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq)
+	if (!vsi->num_txq || !vsi->num_rxq)
 		return;
+
 	/* netdev packet/byte stats come from ring counter. These are obtained
 	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
+	 * But, only call the update routine and read the registers if VSI is
+	 * not down.
 	 */
-	ice_update_vsi_ring_stats(vsi);
+	if (!test_bit(__ICE_DOWN, vsi->state))
+		ice_update_vsi_ring_stats(vsi);
 	stats->tx_packets = vsi_stats->tx_packets;
 	stats->tx_bytes = vsi_stats->tx_bytes;
 	stats->rx_packets = vsi_stats->rx_packets;
@@ -4817,8 +3889,12 @@
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_disable(&vsi->q_vectors[q_idx]->napi);
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_disable(&q_vector->napi);
+	}
 }
 
 /**
@@ -4827,7 +3903,7 @@
  */
 int ice_down(struct ice_vsi *vsi)
 {
-	int i, err;
+	int i, tx_err, rx_err, link_err = 0;
 
 	/* Caller of this function is expected to set the
 	 * vsi->state __ICE_DOWN bit
@@ -4838,19 +3914,43 @@
 	}
 
 	ice_vsi_dis_irq(vsi);
-	err = ice_vsi_stop_tx_rx_rings(vsi);
+
+	tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (tx_err)
+		netdev_err(vsi->netdev,
+			   "Failed stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, tx_err);
+
+	rx_err = ice_vsi_stop_rx_rings(vsi);
+	if (rx_err)
+		netdev_err(vsi->netdev,
+			   "Failed stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, rx_err);
+
 	ice_napi_disable_all(vsi);
 
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
+		link_err = ice_force_phys_link_state(vsi, false);
+		if (link_err)
+			netdev_err(vsi->netdev,
+				   "Failed to set physical link down, VSI %d error %d\n",
+				   vsi->vsi_num, link_err);
+	}
+
 	ice_for_each_txq(vsi, i)
 		ice_clean_tx_ring(vsi->tx_rings[i]);
 
 	ice_for_each_rxq(vsi, i)
 		ice_clean_rx_ring(vsi->rx_rings[i]);
 
-	if (err)
-		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+	if (tx_err || rx_err || link_err) {
+		netdev_err(vsi->netdev,
+			   "Failed to close VSI 0x%04X on switch 0x%04X\n",
 			   vsi->vsi_num, vsi->vsw->sw_id);
-	return err;
+		return -EIO;
+	}
+
+	return 0;
 }
 
 /**
@@ -4859,7 +3959,7 @@
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -4870,6 +3970,7 @@
 	}
 
 	ice_for_each_txq(vsi, i) {
+		vsi->tx_rings[i]->netdev = vsi->netdev;
 		err = ice_setup_tx_ring(vsi->tx_rings[i]);
 		if (err)
 			break;
@@ -4884,7 +3985,7 @@
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -4895,6 +3996,7 @@
 	}
 
 	ice_for_each_rxq(vsi, i) {
+		vsi->rx_rings[i]->netdev = vsi->netdev;
 		err = ice_setup_rx_ring(vsi->rx_rings[i]);
 		if (err)
 			break;
@@ -4904,56 +4006,6 @@
 }
 
 /**
- * ice_vsi_req_irq - Request IRQ from the OS
- * @vsi: The VSI IRQ is being requested for
- * @basename: name for the vector
- *
- * Return 0 on success and a negative value on error
- */
-static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename)
-{
-	struct ice_pf *pf = vsi->back;
-	int err = -EINVAL;
-
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		err = ice_vsi_req_irq_msix(vsi, basename);
-
-	return err;
-}
-
-/**
- * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
- * @vsi: the VSI having resources freed
- */
-static void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
-{
-	int i;
-
-	if (!vsi->tx_rings)
-		return;
-
-	ice_for_each_txq(vsi, i)
-		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
-			ice_free_tx_ring(vsi->tx_rings[i]);
-}
-
-/**
- * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
- * @vsi: the VSI having resources freed
- */
-static void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
-{
-	int i;
-
-	if (!vsi->rx_rings)
-		return;
-
-	ice_for_each_rxq(vsi, i)
-		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
-			ice_free_rx_ring(vsi->rx_rings[i]);
-}
-
-/**
  * ice_vsi_open - Called when a network interface is made active
  * @vsi: the VSI to open
  *
@@ -4982,7 +4034,7 @@
 
 	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
 		 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
-	err = ice_vsi_req_irq(vsi, int_name);
+	err = ice_vsi_req_irq_msix(vsi, int_name);
 	if (err)
 		goto err_setup_rx;
 
@@ -5014,146 +4066,165 @@
 }
 
 /**
- * ice_vsi_close - Shut down a VSI
- * @vsi: the VSI being shut down
+ * ice_vsi_release_all - Delete all VSIs
+ * @pf: PF from which all VSIs are being removed
  */
-static void ice_vsi_close(struct ice_vsi *vsi)
+static void ice_vsi_release_all(struct ice_pf *pf)
 {
-	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
-		ice_down(vsi);
+	int err, i;
 
-	ice_vsi_free_irq(vsi);
-	ice_vsi_free_tx_rings(vsi);
-	ice_vsi_free_rx_rings(vsi);
-}
-
-/**
- * ice_rss_clean - Delete RSS related VSI structures that hold user inputs
- * @vsi: the VSI being removed
- */
-static void ice_rss_clean(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf;
-
-	pf = vsi->back;
-
-	if (vsi->rss_hkey_user)
-		devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
-	if (vsi->rss_lut_user)
-		devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
-}
-
-/**
- * ice_vsi_release - Delete a VSI and free its resources
- * @vsi: the VSI being removed
- *
- * Returns 0 on success or < 0 on error
- */
-static int ice_vsi_release(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf;
-
-	if (!vsi->back)
-		return -ENODEV;
-	pf = vsi->back;
-
-	if (vsi->netdev) {
-		unregister_netdev(vsi->netdev);
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
-
-	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
-		ice_rss_clean(vsi);
-
-	/* Disable VSI and free resources */
-	ice_vsi_dis_irq(vsi);
-	ice_vsi_close(vsi);
-
-	/* reclaim interrupt vectors back to PF */
-	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
-	pf->num_avail_msix += vsi->num_q_vectors;
-
-	ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num);
-	ice_vsi_delete(vsi);
-	ice_vsi_free_q_vectors(vsi);
-	ice_vsi_clear_rings(vsi);
-
-	ice_vsi_put_qs(vsi);
-	pf->q_left_tx += vsi->alloc_txq;
-	pf->q_left_rx += vsi->alloc_rxq;
-
-	ice_vsi_clear(vsi);
-
-	return 0;
-}
-
-/**
- * ice_dis_vsi - pause a VSI
- * @vsi: the VSI being paused
- */
-static void ice_dis_vsi(struct ice_vsi *vsi)
-{
-	if (test_bit(__ICE_DOWN, vsi->state))
+	if (!pf->vsi)
 		return;
 
-	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+	ice_for_each_vsi(pf, i) {
+		if (!pf->vsi[i])
+			continue;
 
-	if (vsi->netdev && netif_running(vsi->netdev) &&
-	    vsi->type == ICE_VSI_PF)
-		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
-
-	ice_vsi_close(vsi);
+		err = ice_vsi_release(pf->vsi[i]);
+		if (err)
+			dev_dbg(&pf->pdev->dev,
+				"Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
+				i, err, pf->vsi[i]->vsi_num);
+	}
 }
 
 /**
  * ice_ena_vsi - resume a VSI
  * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
  */
-static void ice_ena_vsi(struct ice_vsi *vsi)
+static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
 {
-	if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state))
-		return;
+	int err = 0;
 
-	if (vsi->netdev && netif_running(vsi->netdev))
-		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
-	else if (ice_vsi_open(vsi))
-		/* this clears the DOWN bit */
-		dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n",
-			vsi->vsi_num, vsi->vsw->sw_id);
-}
+	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return 0;
 
-/**
- * ice_pf_dis_all_vsi - Pause all VSIs on a PF
- * @pf: the PF
- */
-static void ice_pf_dis_all_vsi(struct ice_pf *pf)
-{
-	int v;
+	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
 
-	ice_for_each_vsi(pf, v)
-		if (pf->vsi[v])
-			ice_dis_vsi(pf->vsi[v]);
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			err = ice_open(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		}
+	}
+
+	return err;
 }
 
 /**
  * ice_pf_ena_all_vsi - Resume all VSIs on a PF
  * @pf: the PF
+ * @locked: is the rtnl_lock already held
  */
-static void ice_pf_ena_all_vsi(struct ice_pf *pf)
+#ifdef CONFIG_DCB
+int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
 {
 	int v;
 
 	ice_for_each_vsi(pf, v)
 		if (pf->vsi[v])
-			ice_ena_vsi(pf->vsi[v]);
+			if (ice_ena_vsi(pf->vsi[v], locked))
+				return -EIO;
+
+	return 0;
+}
+#endif /* CONFIG_DCB */
+
+/**
+ * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
+ * @pf: pointer to the PF instance
+ * @type: VSI type to rebuild
+ *
+ * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
+ */
+static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	enum ice_status status;
+	int i, err;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != type)
+			continue;
+
+		/* rebuild the VSI */
+		err = ice_vsi_rebuild(vsi);
+		if (err) {
+			dev_err(&pf->pdev->dev,
+				"rebuild VSI failed, err %d, VSI index %d, type %d\n",
+				err, vsi->idx, type);
+			return err;
+		}
+
+		/* replay filters for the VSI */
+		status = ice_replay_vsi(&pf->hw, vsi->idx);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"replay VSI failed, status %d, VSI index %d, type %d\n",
+				status, vsi->idx, type);
+			return -EIO;
+		}
+
+		/* Re-map HW VSI number, using VSI handle that has been
+		 * previously validated in ice_replay_vsi() call above
+		 */
+		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+		/* enable the VSI */
+		err = ice_ena_vsi(vsi, false);
+		if (err) {
+			dev_err(&pf->pdev->dev,
+				"enable VSI failed, err %d, VSI index %d, type %d\n",
+				err, vsi->idx, type);
+			return err;
+		}
+
+		dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %d\n",
+			 vsi->idx, type);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_update_pf_netdev_link - Update PF netdev link status
+ * @pf: pointer to the PF instance
+ */
+static void ice_update_pf_netdev_link(struct ice_pf *pf)
+{
+	bool link_up;
+	int i;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != ICE_VSI_PF)
+			return;
+
+		ice_get_link_status(pf->vsi[i]->port_info, &link_up);
+		if (link_up) {
+			netif_carrier_on(pf->vsi[i]->netdev);
+			netif_tx_wake_all_queues(pf->vsi[i]->netdev);
+		} else {
+			netif_carrier_off(pf->vsi[i]->netdev);
+			netif_tx_stop_all_queues(pf->vsi[i]->netdev);
+		}
+	}
 }
 
 /**
  * ice_rebuild - rebuild after reset
- * @pf: pf to rebuild
+ * @pf: PF to rebuild
+ * @reset_type: type of reset
  */
-static void ice_rebuild(struct ice_pf *pf)
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 {
 	struct device *dev = &pf->pdev->dev;
 	struct ice_hw *hw = &pf->hw;
@@ -5163,18 +4234,28 @@
 	if (test_bit(__ICE_DOWN, pf->state))
 		goto clear_recovery;
 
-	dev_dbg(dev, "rebuilding pf\n");
+	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
 
 	ret = ice_init_all_ctrlq(hw);
 	if (ret) {
 		dev_err(dev, "control queues init failed %d\n", ret);
-		goto fail_reset;
+		goto err_init_ctrlq;
+	}
+
+	/* if DDP was previously loaded successfully */
+	if (!ice_is_safe_mode(pf)) {
+		/* reload the SW DB of filter tables */
+		if (reset_type == ICE_RESET_PFR)
+			ice_fill_blk_tbls(hw);
+		else
+			/* Reload DDP Package after CORER/GLOBR reset */
+			ice_load_pkg(NULL, pf);
 	}
 
 	ret = ice_clear_pf_cfg(hw);
 	if (ret) {
 		dev_err(dev, "clear PF configuration failed %d\n", ret);
-		goto fail_reset;
+		goto err_init_ctrlq;
 	}
 
 	ice_clear_pxe_mode(hw);
@@ -5182,35 +4263,69 @@
 	ret = ice_get_caps(hw);
 	if (ret) {
 		dev_err(dev, "ice_get_caps failed %d\n", ret);
-		goto fail_reset;
+		goto err_init_ctrlq;
 	}
 
-	/* basic nic switch setup */
-	err = ice_setup_pf_sw(pf);
-	if (err) {
-		dev_err(dev, "ice_setup_pf_sw failed\n");
-		goto fail_reset;
-	}
+	err = ice_sched_init_port(hw->port_info);
+	if (err)
+		goto err_sched_init_port;
+
+	err = ice_update_link_info(hw->port_info);
+	if (err)
+		dev_err(&pf->pdev->dev, "Get link status error %d\n", err);
 
 	/* start misc vector */
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
-		err = ice_req_irq_msix_misc(pf);
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "misc vector setup failed: %d\n", err);
+		goto err_sched_init_port;
+	}
+
+	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		ice_dcb_rebuild(pf);
+
+	/* rebuild PF VSI */
+	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
+	if (err) {
+		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
+		goto err_vsi_rebuild;
+	}
+
+	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
+		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_VF);
 		if (err) {
-			dev_err(dev, "misc vector setup failed: %d\n", err);
-			goto fail_reset;
+			dev_err(dev, "VF VSI rebuild failed: %d\n", err);
+			goto err_vsi_rebuild;
 		}
 	}
 
-	/* restart the VSIs that were rebuilt and running before the reset */
-	ice_pf_ena_all_vsi(pf);
+	ice_update_pf_netdev_link(pf);
 
+	/* tell the firmware we are up */
+	ret = ice_send_version(pf);
+	if (ret) {
+		dev_err(dev,
+			"Rebuild failed due to error sending driver version: %d\n",
+			ret);
+		goto err_vsi_rebuild;
+	}
+
+	ice_replay_post(hw);
+
+	/* if we get here, reset flow is successful */
+	clear_bit(__ICE_RESET_FAILED, pf->state);
 	return;
 
-fail_reset:
+err_vsi_rebuild:
+err_sched_init_port:
+	ice_sched_cleanup_all(hw);
+err_init_ctrlq:
 	ice_shutdown_all_ctrlq(hw);
 	set_bit(__ICE_RESET_FAILED, pf->state);
 clear_recovery:
-	set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+	/* set this bit in PF state to control service task scheduling */
+	set_bit(__ICE_NEEDS_RESTART, pf->state);
+	dev_err(dev, "Rebuild failed, unload and reload driver\n");
 }
 
 /**
@@ -5228,22 +4343,22 @@
 	u8 count = 0;
 
 	if (new_mtu == netdev->mtu) {
-		netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
+		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
 		return 0;
 	}
 
 	if (new_mtu < netdev->min_mtu) {
-		netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	} else if (new_mtu > netdev->max_mtu) {
-		netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	}
 	/* if a reset is in progress, wait for some time for it to complete */
 	do {
-		if (ice_is_reset_recovery_pending(pf->state)) {
+		if (ice_is_reset_in_progress(pf->state)) {
 			count++;
 			usleep_range(1000, 2000);
 		} else {
@@ -5253,7 +4368,7 @@
 	} while (count < 100);
 
 	if (count == 100) {
-		netdev_err(netdev, "can't change mtu. Device is busy\n");
+		netdev_err(netdev, "can't change MTU. Device is busy\n");
 		return -EBUSY;
 	}
 
@@ -5265,18 +4380,18 @@
 
 		err = ice_down(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 
 		err = ice_up(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 	}
 
-	netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+	netdev_info(netdev, "changed MTU to %d\n", new_mtu);
 	return 0;
 }
 
@@ -5299,7 +4414,7 @@
 		struct ice_aqc_get_set_rss_keys *buf =
 				  (struct ice_aqc_get_set_rss_keys *)seed;
 
-		status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf);
+		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
 
 		if (status) {
 			dev_err(&pf->pdev->dev,
@@ -5310,8 +4425,8 @@
 	}
 
 	if (lut) {
-		status = ice_aq_set_rss_lut(hw, vsi->vsi_num,
-					    vsi->rss_lut_type, lut, lut_size);
+		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
+					    lut, lut_size);
 		if (status) {
 			dev_err(&pf->pdev->dev,
 				"Cannot set RSS lut, err %d aq_err %d\n",
@@ -5342,7 +4457,7 @@
 		struct ice_aqc_get_set_rss_keys *buf =
 				  (struct ice_aqc_get_set_rss_keys *)seed;
 
-		status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf);
+		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
 		if (status) {
 			dev_err(&pf->pdev->dev,
 				"Cannot get RSS key, err %d aq_err %d\n",
@@ -5352,8 +4467,8 @@
 	}
 
 	if (lut) {
-		status = ice_aq_get_rss_lut(hw, vsi->vsi_num,
-					    vsi->rss_lut_type, lut, lut_size);
+		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
+					    lut, lut_size);
 		if (status) {
 			dev_err(&pf->pdev->dev,
 				"Cannot get RSS lut, err %d aq_err %d\n",
@@ -5366,27 +4481,292 @@
 }
 
 /**
+ * ice_bridge_getlink - Get the hardware bridge mode
+ * @skb: skb buff
+ * @pid: process ID
+ * @seq: RTNL message seq
+ * @dev: the netdev being configured
+ * @filter_mask: filter mask passed in
+ * @nlflags: netlink flags passed in
+ *
+ * Return the bridge mode (VEB/VEPA)
+ */
+static int
+ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+		   struct net_device *dev, u32 filter_mask, int nlflags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 bmode;
+
+	bmode = pf->first_sw->bridge_mode;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
+				       filter_mask, NULL);
+}
+
+/**
+ * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
+ * @vsi: Pointer to VSI structure
+ * @bmode: Hardware bridge mode (VEB/VEPA)
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_aqc_vsi_props *vsi_props;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	vsi_props = &vsi->info;
+
+	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (bmode == BRIDGE_MODE_VEB)
+		/* change from VEPA to VEB mode */
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	else
+		/* change from VEB to VEPA mode */
+		ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
+			bmode, status, hw->adminq.sq_last_status);
+		ret = -EIO;
+		goto out;
+	}
+	/* Update sw flags for book keeping */
+	vsi_props->sw_flags = ctxt->info.sw_flags;
+
+out:
+	devm_kfree(dev, ctxt);
+	return ret;
+}
+
+/**
+ * ice_bridge_setlink - Set the hardware bridge mode
+ * @dev: the netdev being configured
+ * @nlh: RTNL message
+ * @flags: bridge setlink flags
+ * @extack: netlink extended ack
+ *
+ * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
+ * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
+ * not already set for all VSIs connected to this switch. And also update the
+ * unicast switch filter rules for the corresponding switch of the netdev.
+ */
+static int
+ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+		   u16 __always_unused flags,
+		   struct netlink_ext_ack __always_unused *extack)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_pf *pf = np->vsi->back;
+	struct nlattr *attr, *br_spec;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct ice_sw *pf_sw;
+	int rem, v, err = 0;
+
+	pf_sw = pf->first_sw;
+	/* find the attribute in the netlink message */
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+		mode = nla_get_u16(attr);
+		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
+			return -EINVAL;
+		/* Continue  if bridge mode is not being flipped */
+		if (mode == pf_sw->bridge_mode)
+			continue;
+		/* Iterates through the PF VSI list and update the loopback
+		 * mode of the VSI
+		 */
+		ice_for_each_vsi(pf, v) {
+			if (!pf->vsi[v])
+				continue;
+			err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
+			if (err)
+				return err;
+		}
+
+		hw->evb_veb = (mode == BRIDGE_MODE_VEB);
+		/* Update the unicast switch filter rules for the corresponding
+		 * switch of the netdev
+		 */
+		status = ice_update_sw_rule_bridge_mode(hw);
+		if (status) {
+			netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %d\n",
+				   mode, status, hw->adminq.sq_last_status);
+			/* revert hw->evb_veb */
+			hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
+			return -EIO;
+		}
+
+		pf_sw->bridge_mode = mode;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ */
+static void ice_tx_timeout(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_ring *tx_ring = NULL;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int hung_queue = -1;
+	u32 i;
+
+	pf->tx_timeout_count++;
+
+	/* find the stopped queue the same way dev_watchdog() does */
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		unsigned long trans_start;
+		struct netdev_queue *q;
+
+		q = netdev_get_tx_queue(netdev, i);
+		trans_start = q->trans_start;
+		if (netif_xmit_stopped(q) &&
+		    time_after(jiffies,
+			       trans_start + netdev->watchdog_timeo)) {
+			hung_queue = i;
+			break;
+		}
+	}
+
+	if (i == netdev->num_tx_queues)
+		netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
+	else
+		/* now that we have an index, find the tx_ring struct */
+		for (i = 0; i < vsi->num_txq; i++)
+			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+				if (hung_queue == vsi->tx_rings[i]->q_index) {
+					tx_ring = vsi->tx_rings[i];
+					break;
+				}
+
+	/* Reset recovery level if enough time has elapsed after last timeout.
+	 * Also ensure no new reset action happens before next timeout period.
+	 */
+	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
+		pf->tx_timeout_recovery_level = 1;
+	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
+				       netdev->watchdog_timeo)))
+		return;
+
+	if (tx_ring) {
+		struct ice_hw *hw = &pf->hw;
+		u32 head, val = 0;
+
+		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[hung_queue])) &
+			QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
+		/* Read interrupt register */
+		val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
+
+		netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
+			    vsi->vsi_num, hung_queue, tx_ring->next_to_clean,
+			    head, tx_ring->next_to_use, val);
+	}
+
+	pf->tx_timeout_last_recovery = jiffies;
+	netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
+		    pf->tx_timeout_recovery_level, hung_queue);
+
+	switch (pf->tx_timeout_recovery_level) {
+	case 1:
+		set_bit(__ICE_PFR_REQ, pf->state);
+		break;
+	case 2:
+		set_bit(__ICE_CORER_REQ, pf->state);
+		break;
+	case 3:
+		set_bit(__ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
+		set_bit(__ICE_DOWN, pf->state);
+		set_bit(__ICE_NEEDS_RESTART, vsi->state);
+		set_bit(__ICE_SERVICE_DIS, pf->state);
+		break;
+	}
+
+	ice_service_task_schedule(pf);
+	pf->tx_timeout_recovery_level++;
+}
+
+/**
  * ice_open - Called when a network interface becomes active
  * @netdev: network interface device structure
  *
  * The open entry point is called when a network interface is made
- * active by the system (IFF_UP).  At this point all resources needed
+ * active by the system (IFF_UP). At this point all resources needed
  * for transmit and receive operations are allocated, the interrupt
  * handler is registered with the OS, the netdev watchdog is enabled,
  * and the stack is notified that the interface is ready.
  *
  * Returns 0 on success, negative value on failure
  */
-static int ice_open(struct net_device *netdev)
+int ice_open(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
 	int err;
 
+	if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) {
+		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
+		return -EIO;
+	}
+
 	netif_carrier_off(netdev);
 
-	err = ice_vsi_open(vsi);
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err) {
+		netdev_err(netdev, "Failed to get link info, error %d\n",
+			   err);
+		return err;
+	}
 
+	/* Set PHY if there is media, otherwise, turn off PHY */
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		err = ice_force_phys_link_state(vsi, true);
+		if (err) {
+			netdev_err(netdev,
+				   "Failed to set physical link up, error %d\n",
+				   err);
+			return err;
+		}
+	} else {
+		err = ice_aq_set_link_restart_an(pi, false, NULL);
+		if (err) {
+			netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n",
+				   vsi->vsi_num, err);
+			return err;
+		}
+		set_bit(ICE_FLAG_NO_MEDIA, vsi->back->flags);
+	}
+
+	err = ice_vsi_open(vsi);
 	if (err)
 		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
 			   vsi->vsi_num, vsi->vsw->sw_id);
@@ -5398,12 +4778,12 @@
  * @netdev: network interface device structure
  *
  * The stop entry point is called when an interface is de-activated by the OS,
- * and the netdevice enters the DOWN state.  The hardware is still under the
+ * and the netdevice enters the DOWN state. The hardware is still under the
  * driver's control, but the netdev interface is disabled.
  *
  * Returns success only - not allowed to fail
  */
-static int ice_stop(struct net_device *netdev)
+int ice_stop(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -5427,14 +4807,14 @@
 	size_t len;
 
 	/* No point in doing any of this if neither checksum nor GSO are
-	 * being requested for this frame.  We can rule out both by just
+	 * being requested for this frame. We can rule out both by just
 	 * checking for CHECKSUM_PARTIAL
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
 		return features;
 
 	/* We cannot support GSO if the MSS is going to be less than
-	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 * 64 bytes. If it is then we need to drop support for GSO.
 	 */
 	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
 		features &= ~NETIF_F_GSO_MASK;
@@ -5463,6 +4843,17 @@
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
+static const struct net_device_ops ice_netdev_safe_mode_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_tx_timeout = ice_tx_timeout,
+};
+
 static const struct net_device_ops ice_netdev_ops = {
 	.ndo_open = ice_open,
 	.ndo_stop = ice_stop,
@@ -5473,9 +4864,18 @@
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
+	.ndo_set_vf_mac = ice_set_vf_mac,
+	.ndo_get_vf_config = ice_get_vf_cfg,
+	.ndo_set_vf_trust = ice_set_vf_trust,
+	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
+	.ndo_set_vf_link_state = ice_set_vf_link_state,
 	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
 	.ndo_set_features = ice_set_features,
+	.ndo_bridge_getlink = ice_bridge_getlink,
+	.ndo_bridge_setlink = ice_bridge_setlink,
 	.ndo_fdb_add = ice_fdb_add,
 	.ndo_fdb_del = ice_fdb_del,
+	.ndo_tx_timeout = ice_tx_timeout,
 };

diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 295a8cd..bcb431f 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c

@@ -5,7 +5,7 @@
 
 /**
  * ice_aq_read_nvm
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @module_typeid: module pointer location in words from the NVM beginning
  * @offset: byte offset from the module beginning
  * @length: length of the section to be read (in bytes from the offset)
@@ -119,12 +119,69 @@
 
 	status = ice_read_sr_aq(hw, offset, 1, data, true);
 	if (!status)
-		*data = le16_to_cpu(*(__le16 *)data);
+		*data = le16_to_cpu(*(__force __le16 *)data);
 
 	return status;
 }
 
 /**
+ * ice_read_sr_buf_aq - Reads Shadow RAM buf via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buf) from the SR using the ice_read_sr_aq
+ * method. Ownership of the NVM is taken before reading the buffer and later
+ * released.
+ */
+static enum ice_status
+ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
+{
+	enum ice_status status;
+	bool last_cmd = false;
+	u16 words_read = 0;
+	u16 i = 0;
+
+	do {
+		u16 read_size, off_w;
+
+		/* Calculate number of bytes we should read in this step.
+		 * It's not allowed to read more than one page at a time or
+		 * to cross page boundaries.
+		 */
+		off_w = offset % ICE_SR_SECTOR_SIZE_IN_WORDS;
+		read_size = off_w ?
+			min_t(u16, *words,
+			      (ICE_SR_SECTOR_SIZE_IN_WORDS - off_w)) :
+			min_t(u16, (*words - words_read),
+			      ICE_SR_SECTOR_SIZE_IN_WORDS);
+
+		/* Check if this is last command, if so set proper flag */
+		if ((words_read + read_size) >= *words)
+			last_cmd = true;
+
+		status = ice_read_sr_aq(hw, offset, read_size,
+					data + words_read, last_cmd);
+		if (status)
+			goto read_nvm_buf_aq_exit;
+
+		/* Increment counter for words already read and move offset to
+		 * new read location
+		 */
+		words_read += read_size;
+		offset += read_size;
+	} while (words_read < *words);
+
+	for (i = 0; i < *words; i++)
+		data[i] = le16_to_cpu(((__force __le16 *)data)[i]);
+
+read_nvm_buf_aq_exit:
+	*words = words_read;
+	return status;
+}
+
+/**
  * ice_acquire_nvm - Generic request for acquiring the NVM ownership
  * @hw: pointer to the HW structure
  * @access: NVM access type (read or write)
@@ -137,7 +194,7 @@
 	if (hw->nvm.blank_nvm_mode)
 		return 0;
 
-	return ice_acquire_res(hw, ICE_NVM_RES_ID, access);
+	return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT);
 }
 
 /**
@@ -178,7 +235,7 @@
 
 /**
  * ice_init_nvm - initializes NVM setting
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * This function reads and populates NVM settings such as Shadow RAM size,
  * max_timeout, and blank_nvm_mode
@@ -191,7 +248,7 @@
 	u32 fla, gens_stat;
 	u8 sr_size;
 
-	/* The SR size is stored regardless of the nvm programming mode
+	/* The SR size is stored regardless of the NVM programming mode
 	 * as the blank mode may be used in the factory line.
 	 */
 	gens_stat = rd32(hw, GLNVM_GENS);
@@ -234,3 +291,59 @@
 
 	return status;
 }
+
+/**
+ * ice_read_sr_buf - Reads Shadow RAM buf and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buf) from the SR using the ice_read_nvm_buf_aq
+ * method. The buf read is preceded by the NVM ownership take
+ * and followed by the release.
+ */
+enum ice_status
+ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (!status) {
+		status = ice_read_sr_buf_aq(hw, offset, words, data);
+		ice_release_nvm(hw);
+	}
+
+	return status;
+}
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_checksum *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	cmd = &desc.params.nvm_checksum;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+	cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	ice_release_nvm(hw);
+
+	if (!status)
+		if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+			status = ICE_ERR_NVM_CHECKSUM;
+
+	return status;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index eeae199..2fde965 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c

@@ -17,7 +17,6 @@
 {
 	struct ice_sched_node *root;
 	struct ice_hw *hw;
-	u16 max_children;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
@@ -28,8 +27,8 @@
 	if (!root)
 		return ICE_ERR_NO_MEMORY;
 
-	max_children = le16_to_cpu(hw->layer_info[0].max_children);
-	root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+	/* coverity[suspicious_sizeof] */
+	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
 				      sizeof(*root), GFP_KERNEL);
 	if (!root->children) {
 		devm_kfree(ice_hw_to_dev(hw), root);
@@ -44,9 +43,9 @@
 /**
  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
- * @teid: node teid to search
+ * @teid: node TEID to search
  *
- * This function searches for a node matching the teid in the scheduling tree
+ * This function searches for a node matching the TEID in the scheduling tree
  * from the SW DB. The search is recursive and is restricted by the number of
  * layers it has searched through; stopping at the max supported layer.
  *
@@ -67,7 +66,7 @@
 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
 		return NULL;
 
-	/* Check if teid matches to any of the children nodes */
+	/* Check if TEID matches to any of the children nodes */
 	for (i = 0; i < start_node->num_children; i++)
 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
 			return start_node->children[i];
@@ -86,6 +85,59 @@
 }
 
 /**
+ * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
+ * @hw: pointer to the HW struct
+ * @cmd_opc: cmd opcode
+ * @elems_req: number of elements to request
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_resp: returns total number of elements response
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function sends a scheduling elements cmd (cmd_opc)
+ */
+static enum ice_status
+ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
+			    u16 elems_req, void *buf, u16 buf_size,
+			    u16 *elems_resp, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sched_elem_cmd *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.sched_elem_cmd;
+	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
+	cmd->num_elem_req = cpu_to_le16(elems_req);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && elems_resp)
+		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
+
+	return status;
+}
+
+/**
+ * ice_aq_query_sched_elems - query scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to query
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements returned
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduling elements (0x0404)
+ */
+enum ice_status
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_get_elem *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
  * @pi: port information structure
  * @layer: Scheduler layer of the node
@@ -98,9 +150,10 @@
 		   struct ice_aqc_txsched_elem_data *info)
 {
 	struct ice_sched_node *parent;
+	struct ice_aqc_get_elem elem;
 	struct ice_sched_node *node;
+	enum ice_status status;
 	struct ice_hw *hw;
-	u16 max_children;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
@@ -117,12 +170,20 @@
 		return ICE_ERR_PARAM;
 	}
 
+	/* query the current node information from FW  before additing it
+	 * to the SW DB
+	 */
+	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
+	if (status)
+		return status;
+
 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return ICE_ERR_NO_MEMORY;
-	max_children = le16_to_cpu(hw->layer_info[layer].max_children);
-	if (max_children) {
-		node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+	if (hw->max_children[layer]) {
+		/* coverity[suspicious_sizeof] */
+		node->children = devm_kcalloc(ice_hw_to_dev(hw),
+					      hw->max_children[layer],
 					      sizeof(*node), GFP_KERNEL);
 		if (!node->children) {
 			devm_kfree(ice_hw_to_dev(hw), node);
@@ -134,13 +195,13 @@
 	node->parent = parent;
 	node->tx_sched_layer = layer;
 	parent->children[parent->num_children++] = node;
-	memcpy(&node->info, info, sizeof(*info));
+	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
 	return 0;
 }
 
 /**
  * ice_aq_delete_sched_elems - delete scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @grps_req: number of groups to delete
  * @buf: pointer to buffer
  * @buf_size: buffer size in bytes
@@ -154,30 +215,19 @@
 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
 			  u16 *grps_del, struct ice_sq_cd *cd)
 {
-	struct ice_aqc_add_move_delete_elem *cmd;
-	struct ice_aq_desc desc;
-	enum ice_status status;
-
-	cmd = &desc.params.add_move_delete_elem;
-	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
-	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
-	cmd->num_grps_req = cpu_to_le16(grps_req);
-
-	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
-	if (!status && grps_del)
-		*grps_del = le16_to_cpu(cmd->num_grps_updated);
-
-	return status;
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_del, cd);
 }
 
 /**
- * ice_sched_remove_elems - remove nodes from hw
- * @hw: pointer to the hw struct
+ * ice_sched_remove_elems - remove nodes from HW
+ * @hw: pointer to the HW struct
  * @parent: pointer to the parent node
  * @num_nodes: number of nodes
  * @node_teids: array of node teids to be deleted
  *
- * This function remove nodes from hw
+ * This function remove nodes from HW
  */
 static enum ice_status
 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
@@ -192,47 +242,35 @@
 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
 	if (!buf)
 		return ICE_ERR_NO_MEMORY;
+
 	buf->hdr.parent_teid = parent->info.node_teid;
 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
 	for (i = 0; i < num_nodes; i++)
 		buf->teid[i] = cpu_to_le32(node_teids[i]);
+
 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
 					   &num_groups_removed, NULL);
 	if (status || num_groups_removed != 1)
-		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
+		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
+			  hw->adminq.sq_last_status);
+
 	devm_kfree(ice_hw_to_dev(hw), buf);
 	return status;
 }
 
 /**
  * ice_sched_get_first_node - get the first node of the given layer
- * @hw: pointer to the hw struct
+ * @pi: port information structure
  * @parent: pointer the base node of the subtree
  * @layer: layer number
  *
  * This function retrieves the first node of the given layer from the subtree
  */
 static struct ice_sched_node *
-ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
-			 u8 layer)
+ice_sched_get_first_node(struct ice_port_info *pi,
+			 struct ice_sched_node *parent, u8 layer)
 {
-	u8 i;
-
-	if (layer < hw->sw_entry_point_layer)
-		return NULL;
-	for (i = 0; i < parent->num_children; i++) {
-		struct ice_sched_node *node = parent->children[i];
-
-		if (node) {
-			if (node->tx_sched_layer == layer)
-				return node;
-			/* this recursion is intentional, and wouldn't
-			 * go more than 9 calls
-			 */
-			return ice_sched_get_first_node(hw, node, layer);
-		}
-	}
-	return NULL;
+	return pi->sib_head[parent->tc_num][layer];
 }
 
 /**
@@ -246,7 +284,7 @@
 {
 	u8 i;
 
-	if (!pi)
+	if (!pi || !pi->root)
 		return NULL;
 	for (i = 0; i < pi->root->num_children; i++)
 		if (pi->root->children[i]->tc_num == tc)
@@ -282,17 +320,13 @@
 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
 		u32 teid = le32_to_cpu(node->info.node_teid);
-		enum ice_status status;
 
-		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
-		if (status)
-			ice_debug(hw, ICE_DBG_SCHED,
-				  "remove element failed %d\n", status);
+		ice_sched_remove_elems(hw, node->parent, 1, &teid);
 	}
 	parent = node->parent;
 	/* root has no parent */
 	if (parent) {
-		struct ice_sched_node *p, *tc_node;
+		struct ice_sched_node *p;
 
 		/* update the parent */
 		for (i = 0; i < parent->num_children; i++)
@@ -304,16 +338,7 @@
 				break;
 			}
 
-		/* search for previous sibling that points to this node and
-		 * remove the reference
-		 */
-		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
-		if (!tc_node) {
-			ice_debug(hw, ICE_DBG_SCHED,
-				  "Invalid TC number %d\n", node->tc_num);
-			goto err_exit;
-		}
-		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
+		p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
 		while (p) {
 			if (p->sibling == node) {
 				p->sibling = node->sibling;
@@ -321,8 +346,13 @@
 			}
 			p = p->sibling;
 		}
+
+		/* update the sibling head if head is getting removed */
+		if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
+			pi->sib_head[node->tc_num][node->tx_sched_layer] =
+				node->sibling;
 	}
-err_exit:
+
 	/* leaf nodes have no children */
 	if (node->children)
 		devm_kfree(ice_hw_to_dev(hw), node->children);
@@ -331,7 +361,7 @@
 
 /**
  * ice_aq_get_dflt_topo - gets default scheduler topology
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @lport: logical port number
  * @buf: pointer to buffer
  * @buf_size: buffer size in bytes
@@ -361,7 +391,7 @@
 
 /**
  * ice_aq_add_sched_elems - adds scheduling element
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @grps_req: the number of groups that are requested to be added
  * @buf: pointer to buffer
  * @buf_size: buffer size in bytes
@@ -375,57 +405,14 @@
 		       struct ice_aqc_add_elem *buf, u16 buf_size,
 		       u16 *grps_added, struct ice_sq_cd *cd)
 {
-	struct ice_aqc_add_move_delete_elem *cmd;
-	struct ice_aq_desc desc;
-	enum ice_status status;
-
-	cmd = &desc.params.add_move_delete_elem;
-	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
-	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
-
-	cmd->num_grps_req = cpu_to_le16(grps_req);
-	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
-	if (!status && grps_added)
-		*grps_added = le16_to_cpu(cmd->num_grps_updated);
-
-	return status;
-}
-
-/**
- * ice_suspend_resume_elems - suspend/resume scheduler elements
- * @hw: pointer to the hw struct
- * @elems_req: number of elements to suspend
- * @buf: pointer to buffer
- * @buf_size: buffer size in bytes
- * @elems_ret: returns total number of elements suspended
- * @cd: pointer to command details structure or NULL
- * @cmd_code: command code for suspend or resume
- *
- * suspend/resume scheduler elements
- */
-static enum ice_status
-ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
-			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
-			 u16 *elems_ret, struct ice_sq_cd *cd,
-			 enum ice_adminq_opc cmd_code)
-{
-	struct ice_aqc_get_cfg_elem *cmd;
-	struct ice_aq_desc desc;
-	enum ice_status status;
-
-	cmd = &desc.params.get_update_elem;
-	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
-	cmd->num_elem_req = cpu_to_le16(elems_req);
-	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
-	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
-	if (!status && elems_ret)
-		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
-	return status;
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_added, cd);
 }
 
 /**
  * ice_aq_suspend_sched_elems - suspend scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @elems_req: number of elements to suspend
  * @buf: pointer to buffer
  * @buf_size: buffer size in bytes
@@ -439,13 +426,14 @@
 			   struct ice_aqc_suspend_resume_elem *buf,
 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
 {
-	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
-					cd, ice_aqc_opc_suspend_sched_elems);
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
 }
 
 /**
  * ice_aq_resume_sched_elems - resume scheduler elements
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @elems_req: number of elements to resume
  * @buf: pointer to buffer
  * @buf_size: buffer size in bytes
@@ -459,13 +447,14 @@
 			  struct ice_aqc_suspend_resume_elem *buf,
 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
 {
-	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
-					cd, ice_aqc_opc_resume_sched_elems);
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
 }
 
 /**
  * ice_aq_query_sched_res - query scheduler resource
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @buf_size: buffer size in bytes
  * @buf: pointer to buffer
  * @cd: pointer to command details structure or NULL
@@ -484,13 +473,13 @@
 }
 
 /**
- * ice_sched_suspend_resume_elems - suspend or resume hw nodes
- * @hw: pointer to the hw struct
+ * ice_sched_suspend_resume_elems - suspend or resume HW nodes
+ * @hw: pointer to the HW struct
  * @num_nodes: number of nodes
  * @node_teids: array of node teids to be suspended or resumed
  * @suspend: true means suspend / false means resume
  *
- * This function suspends or resumes hw nodes
+ * This function suspends or resumes HW nodes
  */
 static enum ice_status
 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
@@ -524,25 +513,62 @@
 }
 
 /**
- * ice_sched_clear_tx_topo - clears the schduler tree nodes
- * @pi: port information structure
- *
- * This function removes all the nodes from HW as well as from SW DB.
+ * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
  */
-static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+static enum ice_status
+ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_q_ctx *q_ctx;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+	/* allocate LAN queue contexts */
+	if (!vsi_ctx->lan_q_ctx[tc]) {
+		vsi_ctx->lan_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+						      new_numqs,
+						      sizeof(*q_ctx),
+						      GFP_KERNEL);
+		if (!vsi_ctx->lan_q_ctx[tc])
+			return ICE_ERR_NO_MEMORY;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+		return 0;
+	}
+	/* num queues are increased, update the queue contexts */
+	if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
+		u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
+
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return ICE_ERR_NO_MEMORY;
+		memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
+		       prev_num * sizeof(*q_ctx));
+		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
+		vsi_ctx->lan_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_clear_agg - clears the aggregator related information
+ * @hw: pointer to the hardware structure
+ *
+ * This function removes aggregator list and free up aggregator related memory
+ * previously allocated.
+ */
+void ice_sched_clear_agg(struct ice_hw *hw)
 {
 	struct ice_sched_agg_info *agg_info;
-	struct ice_sched_vsi_info *vsi_elem;
 	struct ice_sched_agg_info *atmp;
-	struct ice_sched_vsi_info *tmp;
-	struct ice_hw *hw;
 
-	if (!pi)
-		return;
-
-	hw = pi->hw;
-
-	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
+	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
 		struct ice_sched_agg_vsi_info *agg_vsi_info;
 		struct ice_sched_agg_vsi_info *vtmp;
 
@@ -551,15 +577,21 @@
 			list_del(&agg_vsi_info->list_entry);
 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
 		}
+		list_del(&agg_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), agg_info);
 	}
+}
 
-	/* remove the vsi list */
-	list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list,
-				 list_entry) {
-		list_del(&vsi_elem->list_entry);
-		devm_kfree(ice_hw_to_dev(hw), vsi_elem);
-	}
-
+/**
+ * ice_sched_clear_tx_topo - clears the scheduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+	if (!pi)
+		return;
 	if (pi->root) {
 		ice_free_sched_node(pi, pi->root);
 		pi->root = NULL;
@@ -572,7 +604,7 @@
  *
  * Cleanup scheduling elements from SW DB
  */
-static void ice_sched_clear_port(struct ice_port_info *pi)
+void ice_sched_clear_port(struct ice_port_info *pi)
 {
 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
 		return;
@@ -586,19 +618,22 @@
 
 /**
  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * Cleanup scheduling elements from SW DB for all the ports
  */
 void ice_sched_cleanup_all(struct ice_hw *hw)
 {
-	if (!hw || !hw->port_info)
+	if (!hw)
 		return;
 
-	if (hw->layer_info)
+	if (hw->layer_info) {
 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+		hw->layer_info = NULL;
+	}
 
-	ice_sched_clear_port(hw->port_info);
+	if (hw->port_info)
+		ice_sched_clear_port(hw->port_info);
 
 	hw->num_tx_sched_layers = 0;
 	hw->num_tx_sched_phys_layers = 0;
@@ -607,41 +642,16 @@
 }
 
 /**
- * ice_sched_create_vsi_info_entry - create an empty new VSI entry
- * @pi: port information structure
- * @vsi_id: VSI Id
- *
- * This function creates a new VSI entry and adds it to list
- */
-static struct ice_sched_vsi_info *
-ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
-{
-	struct ice_sched_vsi_info *vsi_elem;
-
-	if (!pi)
-		return NULL;
-
-	vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem),
-				GFP_KERNEL);
-	if (!vsi_elem)
-		return NULL;
-
-	list_add(&vsi_elem->list_entry, &pi->vsi_info_list);
-	vsi_elem->vsi_id = vsi_id;
-	return vsi_elem;
-}
-
-/**
- * ice_sched_add_elems - add nodes to hw and SW DB
+ * ice_sched_add_elems - add nodes to HW and SW DB
  * @pi: port information structure
  * @tc_node: pointer to the branch node
  * @parent: pointer to the parent node
  * @layer: layer number to add nodes
  * @num_nodes: number of nodes
  * @num_nodes_added: pointer to num nodes added
- * @first_node_teid: if new nodes are added then return the teid of first node
+ * @first_node_teid: if new nodes are added then return the TEID of first node
  *
- * This function add nodes to hw as well as to SW DB for a given layer
+ * This function add nodes to HW as well as to SW DB for a given layer
  */
 static enum ice_status
 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
@@ -653,10 +663,10 @@
 	u16 i, num_groups_added = 0;
 	enum ice_status status = 0;
 	struct ice_hw *hw = pi->hw;
-	u16 buf_size;
+	size_t buf_size;
 	u32 teid;
 
-	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+	buf_size = struct_size(buf, generic, num_nodes - 1);
 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
 	if (!buf)
 		return ICE_ERR_NO_MEMORY;
@@ -671,15 +681,20 @@
 			ICE_AQC_ELEM_VALID_EIR;
 		buf->generic[i].data.generic = 0;
 		buf->generic[i].data.cir_bw.bw_profile_idx =
-			ICE_SCHED_DFLT_RL_PROF_ID;
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.cir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
 		buf->generic[i].data.eir_bw.bw_profile_idx =
-			ICE_SCHED_DFLT_RL_PROF_ID;
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.eir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
 	}
 
 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
 					&num_groups_added, NULL);
 	if (status || num_groups_added != 1) {
-		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
+		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
+			  hw->adminq.sq_last_status);
 		devm_kfree(ice_hw_to_dev(hw), buf);
 		return ICE_ERR_CFG;
 	}
@@ -697,7 +712,6 @@
 
 		teid = le32_to_cpu(buf->generic[i].node_teid);
 		new_node = ice_sched_find_node_by_teid(parent, teid);
-
 		if (!new_node) {
 			ice_debug(hw, ICE_DBG_SCHED,
 				  "Node is missing for teid =%d\n", teid);
@@ -709,14 +723,17 @@
 
 		/* add it to previous node sibling pointer */
 		/* Note: siblings are not linked across branches */
-		prev = ice_sched_get_first_node(hw, tc_node, layer);
-
+		prev = ice_sched_get_first_node(pi, tc_node, layer);
 		if (prev && prev != new_node) {
 			while (prev->sibling)
 				prev = prev->sibling;
 			prev->sibling = new_node;
 		}
 
+		/* initialize the sibling head */
+		if (!pi->sib_head[tc_node->tc_num][layer])
+			pi->sib_head[tc_node->tc_num][layer] = new_node;
+
 		if (i == 0)
 			*first_node_teid = teid;
 	}
@@ -732,7 +749,7 @@
  * @parent: pointer to parent node
  * @layer: layer number to add nodes
  * @num_nodes: number of nodes to be added
- * @first_node_teid: pointer to the first node teid
+ * @first_node_teid: pointer to the first node TEID
  * @num_nodes_added: pointer to number of nodes added
  *
  * This function add nodes to a given layer.
@@ -760,8 +777,7 @@
 		return ICE_ERR_PARAM;
 
 	/* max children per node per layer */
-	max_child_nodes =
-	    le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children);
+	max_child_nodes = hw->max_children[parent->tx_sched_layer];
 
 	/* current number of children + required nodes exceed max children ? */
 	if ((parent->num_children + num_nodes) > max_child_nodes) {
@@ -785,7 +801,7 @@
 
 			*num_nodes_added += num_added;
 		}
-		/* Don't modify the first node teid memory if the first node was
+		/* Don't modify the first node TEID memory if the first node was
 		 * added already in the above call. Instead send some temp
 		 * memory for all other recursive calls.
 		 */
@@ -817,7 +833,7 @@
 
 /**
  * ice_sched_get_qgrp_layer - get the current queue group layer number
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * This function returns the current queue group layer number
  */
@@ -829,7 +845,7 @@
 
 /**
  * ice_sched_get_vsi_layer - get the current VSI layer number
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  *
  * This function returns the current VSI layer number
  */
@@ -840,7 +856,7 @@
 	 *     7               4
 	 *     5 or less       sw_entry_point_layer
 	 */
-	/* calculate the vsi layer based on number of layers. */
+	/* calculate the VSI layer based on number of layers. */
 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
 
@@ -851,86 +867,13 @@
 }
 
 /**
- * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer
- * @pi: pointer to the port info struct
- * @layer: layer number
- *
- * This function calculates the number of nodes present in the scheduler tree
- * including all the branches for a given layer
- */
-static u16
-ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer)
-{
-	struct ice_hw *hw;
-	u16 num_nodes = 0;
-	u8 i;
-
-	if (!pi)
-		return num_nodes;
-
-	hw = pi->hw;
-
-	/* Calculate the number of nodes for all TCs */
-	for (i = 0; i < pi->root->num_children; i++) {
-		struct ice_sched_node *tc_node, *node;
-
-		tc_node = pi->root->children[i];
-
-		/* Get the first node */
-		node = ice_sched_get_first_node(hw, tc_node, layer);
-		if (!node)
-			continue;
-
-		/* count the siblings */
-		while (node) {
-			num_nodes++;
-			node = node->sibling;
-		}
-	}
-
-	return num_nodes;
-}
-
-/**
- * ice_sched_val_max_nodes - check max number of nodes reached or not
- * @pi: port information structure
- * @new_num_nodes_per_layer: pointer to the new number of nodes array
- *
- * This function checks whether the scheduler tree layers have enough space to
- * add new nodes
- */
-static enum ice_status
-ice_sched_validate_for_max_nodes(struct ice_port_info *pi,
-				 u16 *new_num_nodes_per_layer)
-{
-	struct ice_hw *hw = pi->hw;
-	u8 i, qg_layer;
-	u16 num_nodes;
-
-	qg_layer = ice_sched_get_qgrp_layer(hw);
-
-	/* walk through all the layers from SW entry point to qgroup layer */
-	for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) {
-		num_nodes = ice_sched_get_num_nodes_per_layer(pi, i);
-		if (num_nodes + new_num_nodes_per_layer[i] >
-		    le16_to_cpu(hw->layer_info[i].max_pf_nodes)) {
-			ice_debug(hw, ICE_DBG_SCHED,
-				  "max nodes reached for layer = %d\n", i);
-			return ICE_ERR_CFG;
-		}
-	}
-	return 0;
-}
-
-/**
  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
  * @pi: port information structure
  *
  * This function removes the leaf node that was created by the FW
  * during initialization
  */
-static void
-ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
 {
 	struct ice_sched_node *node;
 
@@ -958,8 +901,7 @@
  * This function frees all the nodes except root and TC that were created by
  * the FW during initialization
  */
-static void
-ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
 {
 	struct ice_sched_node *node;
 
@@ -1003,14 +945,12 @@
 	hw = pi->hw;
 
 	/* Query the Default Topology from FW */
-	buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES,
-			   sizeof(*buf), GFP_KERNEL);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
 	if (!buf)
 		return ICE_ERR_NO_MEMORY;
 
 	/* Query default scheduling tree topology */
-	status = ice_aq_get_dflt_topo(hw, pi->lport, buf,
-				      sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES,
+	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
 				      &num_branches, NULL);
 	if (status)
 		goto err_init_port;
@@ -1034,7 +974,7 @@
 		goto err_init_port;
 	}
 
-	/* If the last node is a leaf node then the index of the Q group
+	/* If the last node is a leaf node then the index of the queue group
 	 * layer is two less than the number of elements.
 	 */
 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
@@ -1074,8 +1014,6 @@
 	/* initialize the port for handling the scheduler tree */
 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
 	mutex_init(&pi->sched_lock);
-	INIT_LIST_HEAD(&pi->agg_list);
-	INIT_LIST_HEAD(&pi->vsi_info_list);
 
 err_init_port:
 	if (status && pi->root) {
@@ -1097,6 +1035,8 @@
 {
 	struct ice_aqc_query_txsched_res_resp *buf;
 	enum ice_status status = 0;
+	__le16 max_sibl;
+	u16 i;
 
 	if (hw->layer_info)
 		return status;
@@ -1115,10 +1055,22 @@
 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
 	hw->max_cgds = buf->sched_props.max_pf_cgds;
 
-	 hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
-				       (hw->num_tx_sched_layers *
-					sizeof(*hw->layer_info)),
-				       GFP_KERNEL);
+	/* max sibling group size of current layer refers to the max children
+	 * of the below layer node.
+	 * layer 1 node max children will be layer 2 max sibling group size
+	 * layer 2 node max children will be layer 3 max sibling group size
+	 * and so on. This array will be populated from root (index 0) to
+	 * qgroup layer 7. Leaf node has no children.
+	 */
+	for (i = 0; i < hw->num_tx_sched_layers; i++) {
+		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
+		hw->max_children[i] = le16_to_cpu(max_sibl);
+	}
+
+	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+				      (hw->num_tx_sched_layers *
+				       sizeof(*hw->layer_info)),
+				      GFP_KERNEL);
 	if (!hw->layer_info) {
 		status = ICE_ERR_NO_MEMORY;
 		goto sched_query_out;
@@ -1130,29 +1082,8 @@
 }
 
 /**
- * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id
- * @pi: port information structure
- * @vsi_id: vsi id
- *
- * This function retrieves the vsi list for the given vsi id
- */
-static struct ice_sched_vsi_info *
-ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
-{
-	struct ice_sched_vsi_info *list_elem;
-
-	if (!pi)
-		return NULL;
-
-	list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry)
-		if (list_elem->vsi_id == vsi_id)
-			return list_elem;
-	return NULL;
-}
-
-/**
  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @base: pointer to the base node
  * @node: pointer to the node to search
  *
@@ -1184,38 +1115,36 @@
 }
 
 /**
- * ice_sched_get_free_qparent - Get a free lan or rdma q group node
+ * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
  * @pi: port information structure
- * @vsi_id: vsi id
+ * @vsi_handle: software VSI handle
  * @tc: branch number
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
  *
- * This function retrieves a free lan or rdma q group node
+ * This function retrieves a free LAN or RDMA queue group node
  */
 struct ice_sched_node *
-ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
 			   u8 owner)
 {
 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
-	struct ice_sched_vsi_info *list_elem;
+	struct ice_vsi_ctx *vsi_ctx;
 	u16 max_children;
 	u8 qgrp_layer;
 
 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
-	max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children);
+	max_children = pi->hw->max_children[qgrp_layer];
 
-	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
-	if (!list_elem)
-		goto lan_q_exit;
-
-	vsi_node = list_elem->vsi_node[tc];
-
-	/* validate invalid VSI id */
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		return NULL;
+	vsi_node = vsi_ctx->sched.vsi_node[tc];
+	/* validate invalid VSI ID */
 	if (!vsi_node)
 		goto lan_q_exit;
 
-	/* get the first q group node from VSI sub-tree */
-	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
+	/* get the first queue group node from VSI sub-tree */
+	qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
 	while (qgrp_node) {
 		/* make sure the qgroup node is part of the VSI subtree */
 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
@@ -1230,27 +1159,27 @@
 }
 
 /**
- * ice_sched_get_vsi_node - Get a VSI node based on VSI id
- * @hw: pointer to the hw struct
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
+ * @hw: pointer to the HW struct
  * @tc_node: pointer to the TC node
- * @vsi_id: VSI id
+ * @vsi_handle: software VSI handle
  *
- * This function retrieves a VSI node for a given VSI id from a given
+ * This function retrieves a VSI node for a given VSI ID from a given
  * TC branch
  */
 static struct ice_sched_node *
 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
-		       u16 vsi_id)
+		       u16 vsi_handle)
 {
 	struct ice_sched_node *node;
 	u8 vsi_layer;
 
 	vsi_layer = ice_sched_get_vsi_layer(hw);
-	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
+	node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
 
 	/* Check whether it already exists */
 	while (node) {
-		if (node->vsi_id == vsi_id)
+		if (node->vsi_handle == vsi_handle)
 			return node;
 		node = node->sibling;
 	}
@@ -1260,7 +1189,7 @@
 
 /**
  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @num_qs: number of queues
  * @num_nodes: num nodes array
  *
@@ -1276,12 +1205,10 @@
 	qgl = ice_sched_get_qgrp_layer(hw);
 	vsil = ice_sched_get_vsi_layer(hw);
 
-	/* calculate num nodes from q group to VSI layer */
+	/* calculate num nodes from queue group to VSI layer */
 	for (i = qgl; i > vsil; i--) {
-		u16 max_children = le16_to_cpu(hw->layer_info[i].max_children);
-
 		/* round to the next integer if there is a remainder */
-		num = DIV_ROUND_UP(num, max_children);
+		num = DIV_ROUND_UP(num, hw->max_children[i]);
 
 		/* need at least one node */
 		num_nodes[i] = num ? num : 1;
@@ -1291,16 +1218,16 @@
 /**
  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
  * @pi: port information structure
- * @vsi_id: VSI id
+ * @vsi_handle: software VSI handle
  * @tc_node: pointer to the TC node
  * @num_nodes: pointer to the num nodes that needs to be added per layer
- * @owner: node owner (lan or rdma)
+ * @owner: node owner (LAN or RDMA)
  *
  * This function adds the VSI child nodes to tree. It gets called for
- * lan and rdma separately.
+ * LAN and RDMA separately.
  */
 static enum ice_status
-ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id,
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
 			      struct ice_sched_node *tc_node, u16 *num_nodes,
 			      u8 owner)
 {
@@ -1311,16 +1238,13 @@
 	u16 num_added = 0;
 	u8 i, qgl, vsil;
 
-	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
-	if (status)
-		return status;
-
 	qgl = ice_sched_get_qgrp_layer(hw);
 	vsil = ice_sched_get_vsi_layer(hw);
-	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
 	for (i = vsil + 1; i <= qgl; i++) {
 		if (!parent)
 			return ICE_ERR_CFG;
+
 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
 						      num_nodes[i],
 						      &first_node_teid,
@@ -1348,49 +1272,13 @@
 }
 
 /**
- * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
- * @pi: port information structure
- * @vsi_node: pointer to the VSI node
- * @num_nodes: pointer to the num nodes that needs to be removed per layer
- * @owner: node owner (lan or rdma)
- *
- * This function removes the VSI child nodes from the tree. It gets called for
- * lan and rdma separately.
- */
-static void
-ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
-			     struct ice_sched_node *vsi_node, u16 *num_nodes,
-			     u8 owner)
-{
-	struct ice_sched_node *node, *next;
-	u8 i, qgl, vsil;
-	u16 num;
-
-	qgl = ice_sched_get_qgrp_layer(pi->hw);
-	vsil = ice_sched_get_vsi_layer(pi->hw);
-
-	for (i = qgl; i > vsil; i--) {
-		num = num_nodes[i];
-		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
-		while (node && num) {
-			next = node->sibling;
-			if (node->owner == owner && !node->num_children) {
-				ice_free_sched_node(pi, node);
-				num--;
-			}
-			node = next;
-		}
-	}
-}
-
-/**
  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @tc_node: pointer to TC node
  * @num_nodes: pointer to num nodes array
  *
  * This function calculates the number of supported nodes needed to add this
- * VSI into tx tree including the VSI, parent and intermediate nodes in below
+ * VSI into Tx tree including the VSI, parent and intermediate nodes in below
  * layers
  */
 static void
@@ -1398,8 +1286,8 @@
 				 struct ice_sched_node *tc_node, u16 *num_nodes)
 {
 	struct ice_sched_node *node;
-	u16 max_child;
-	u8 i, vsil;
+	u8 vsil;
+	int i;
 
 	vsil = ice_sched_get_vsi_layer(hw);
 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
@@ -1412,34 +1300,38 @@
 			/* If intermediate nodes are reached max children
 			 * then add a new one.
 			 */
-			node = ice_sched_get_first_node(hw, tc_node, i);
-			max_child = le16_to_cpu(hw->layer_info[i].max_children);
-
+			node = ice_sched_get_first_node(hw->port_info, tc_node,
+							(u8)i);
 			/* scan all the siblings */
 			while (node) {
-				if (node->num_children < max_child)
+				if (node->num_children < hw->max_children[i])
 					break;
 				node = node->sibling;
 			}
 
+			/* tree has one intermediate node to add this new VSI.
+			 * So no need to calculate supported nodes for below
+			 * layers.
+			 */
+			if (node)
+				break;
 			/* all the nodes are full, allocate a new one */
-			if (!node)
-				num_nodes[i]++;
+			num_nodes[i]++;
 		}
 }
 
 /**
- * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc_node: pointer to TC node
  * @num_nodes: pointer to num nodes array
  *
- * This function adds the VSI supported nodes into tx tree including the
+ * This function adds the VSI supported nodes into Tx tree including the
  * VSI, its parent and intermediate nodes in below layers
  */
 static enum ice_status
-ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id,
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
 				struct ice_sched_node *tc_node, u16 *num_nodes)
 {
 	struct ice_sched_node *parent = tc_node;
@@ -1451,10 +1343,6 @@
 	if (!pi)
 		return ICE_ERR_PARAM;
 
-	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
-	if (status)
-		return status;
-
 	vsil = ice_sched_get_vsi_layer(pi->hw);
 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
@@ -1477,21 +1365,22 @@
 			return ICE_ERR_CFG;
 
 		if (i == vsil)
-			parent->vsi_id = vsi_id;
+			parent->vsi_handle = vsi_handle;
 	}
+
 	return 0;
 }
 
 /**
  * ice_sched_add_vsi_to_topo - add a new VSI into tree
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc: TC number
  *
  * This function adds a new VSI into scheduler tree
  */
 static enum ice_status
-ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc)
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
 {
 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
 	struct ice_sched_node *tc_node;
@@ -1504,14 +1393,15 @@
 	/* calculate number of supported nodes needed for this VSI */
 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
 
-	/* add vsi supported nodes to tc subtree */
-	return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes);
+	/* add VSI supported nodes to TC subtree */
+	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+					       num_nodes);
 }
 
 /**
  * ice_sched_update_vsi_child_nodes - update VSI child nodes
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc: TC number
  * @new_numqs: new number of max queues
  * @owner: owner of this subtree
@@ -1519,75 +1409,62 @@
  * This function updates the VSI child nodes based on the number of queues
  */
 static enum ice_status
-ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
-				 u16 new_numqs, u8 owner)
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+				 u8 tc, u16 new_numqs, u8 owner)
 {
-	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
 	struct ice_sched_node *vsi_node;
 	struct ice_sched_node *tc_node;
-	struct ice_sched_vsi_info *vsi;
+	struct ice_vsi_ctx *vsi_ctx;
 	enum ice_status status = 0;
 	struct ice_hw *hw = pi->hw;
 	u16 prev_numqs;
-	u8 i;
 
 	tc_node = ice_sched_get_tc_node(pi, tc);
 	if (!tc_node)
 		return ICE_ERR_CFG;
 
-	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
 	if (!vsi_node)
 		return ICE_ERR_CFG;
 
-	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
-	if (!vsi)
-		return ICE_ERR_CFG;
-
-	if (owner == ICE_SCHED_NODE_OWNER_LAN)
-		prev_numqs = vsi->max_lanq[tc];
-	else
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
 		return ICE_ERR_PARAM;
 
-	/* num queues are not changed */
-	if (prev_numqs == new_numqs)
+	prev_numqs = vsi_ctx->sched.max_lanq[tc];
+	/* num queues are not changed or less than the previous number */
+	if (new_numqs <= prev_numqs)
 		return status;
-
-	/* calculate number of nodes based on prev/new number of qs */
-	if (prev_numqs)
-		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
+	status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+	if (status)
+		return status;
 
 	if (new_numqs)
 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+	/* Keep the max number of queue configuration all the time. Update the
+	 * tree only if number of queues > previous number of queues. This may
+	 * leave some extra nodes in the tree if number of queues < previous
+	 * number but that wouldn't harm anything. Removing those extra nodes
+	 * may complicate the code if those nodes are part of SRL or
+	 * individually rate limited.
+	 */
+	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
+					       new_num_nodes, owner);
+	if (status)
+		return status;
+	vsi_ctx->sched.max_lanq[tc] = new_numqs;
 
-	if (prev_numqs > new_numqs) {
-		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
-			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
-
-		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
-					     owner);
-	} else {
-		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
-			new_num_nodes[i] -= prev_num_nodes[i];
-
-		status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node,
-						       new_num_nodes, owner);
-		if (status)
-			return status;
-	}
-
-	vsi->max_lanq[tc] = new_numqs;
-
-	return status;
+	return 0;
 }
 
 /**
- * ice_sched_cfg_vsi - configure the new/exisiting VSI
+ * ice_sched_cfg_vsi - configure the new/existing VSI
  * @pi: port information structure
- * @vsi_id: VSI Id
+ * @vsi_handle: software VSI handle
  * @tc: TC number
  * @maxqs: max number of queues
- * @owner: lan or rdma
+ * @owner: LAN or RDMA
  * @enable: TC enabled or disabled
  *
  * This function adds/updates VSI nodes based on the number of queues. If TC is
@@ -1595,27 +1472,24 @@
  * disabled then suspend the VSI if it is not already.
  */
 enum ice_status
-ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 		  u8 owner, bool enable)
 {
 	struct ice_sched_node *vsi_node, *tc_node;
-	struct ice_sched_vsi_info *vsi;
+	struct ice_vsi_ctx *vsi_ctx;
 	enum ice_status status = 0;
 	struct ice_hw *hw = pi->hw;
 
+	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
 	tc_node = ice_sched_get_tc_node(pi, tc);
 	if (!tc_node)
 		return ICE_ERR_PARAM;
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
 
-	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
-	if (!vsi)
-		vsi = ice_sched_create_vsi_info_entry(pi, vsi_id);
-	if (!vsi)
-		return ICE_ERR_NO_MEMORY;
-
-	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
-
-	/* suspend the VSI if tc is not enabled */
+	/* suspend the VSI if TC is not enabled */
 	if (!enable) {
 		if (vsi_node && vsi_node->in_use) {
 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
@@ -1630,18 +1504,26 @@
 
 	/* TC is enabled, if it is a new VSI then add it to the tree */
 	if (!vsi_node) {
-		status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc);
+		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
 		if (status)
 			return status;
-		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+
+		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
 		if (!vsi_node)
 			return ICE_ERR_CFG;
-		vsi->vsi_node[tc] = vsi_node;
+
+		vsi_ctx->sched.vsi_node[tc] = vsi_node;
 		vsi_node->in_use = true;
+		/* invalidate the max queues whenever VSI gets added first time
+		 * into the scheduler tree (boot or after reset). We need to
+		 * recreate the child nodes all the time in these cases.
+		 */
+		vsi_ctx->sched.max_lanq[tc] = 0;
 	}
 
 	/* update the VSI child nodes */
-	status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner);
+	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
+						  owner);
 	if (status)
 		return status;
 
@@ -1656,3 +1538,135 @@
 
 	return status;
 }
+
+/**
+ * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function removes single aggregator VSI info entry from
+ * aggregator list.
+ */
+static void
+ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_info *atmp;
+
+	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
+				 list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry)
+			if (agg_vsi_info->vsi_handle == vsi_handle) {
+				list_del(&agg_vsi_info->list_entry);
+				devm_kfree(ice_hw_to_dev(pi->hw),
+					   agg_vsi_info);
+				return;
+			}
+	}
+}
+
+/**
+ * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
+ * @node: pointer to the sub-tree node
+ *
+ * This function checks for a leaf node presence in a given sub-tree node.
+ */
+static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < node->num_children; i++)
+		if (ice_sched_is_leaf_node_present(node->children[i]))
+			return true;
+	/* check for a leaf node */
+	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
+}
+
+/**
+ * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @owner: LAN or RDMA
+ *
+ * This function removes the VSI and its LAN or RDMA children nodes from the
+ * scheduler tree.
+ */
+static enum ice_status
+ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_vsi_ctx *vsi_ctx;
+	u8 i;
+
+	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return status;
+	mutex_lock(&pi->sched_lock);
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		goto exit_sched_rm_vsi_cfg;
+
+	ice_for_each_traffic_class(i) {
+		struct ice_sched_node *vsi_node, *tc_node;
+		u8 j = 0;
+
+		tc_node = ice_sched_get_tc_node(pi, i);
+		if (!tc_node)
+			continue;
+
+		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
+		if (!vsi_node)
+			continue;
+
+		if (ice_sched_is_leaf_node_present(vsi_node)) {
+			ice_debug(pi->hw, ICE_DBG_SCHED,
+				  "VSI has leaf nodes in TC %d\n", i);
+			status = ICE_ERR_IN_USE;
+			goto exit_sched_rm_vsi_cfg;
+		}
+		while (j < vsi_node->num_children) {
+			if (vsi_node->children[j]->owner == owner) {
+				ice_free_sched_node(pi, vsi_node->children[j]);
+
+				/* reset the counter again since the num
+				 * children will be updated after node removal
+				 */
+				j = 0;
+			} else {
+				j++;
+			}
+		}
+		/* remove the VSI if it has no children */
+		if (!vsi_node->num_children) {
+			ice_free_sched_node(pi, vsi_node);
+			vsi_ctx->sched.vsi_node[i] = NULL;
+
+			/* clean up aggregator related VSI info if any */
+			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+		}
+		if (owner == ICE_SCHED_NODE_OWNER_LAN)
+			vsi_ctx->sched.max_lanq[i] = 0;
+	}
+	status = 0;
+
+exit_sched_rm_vsi_cfg:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its LAN children nodes from scheduler tree
+ * for all TCs.
+ */
+enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index badadcc..3902a8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h

@@ -12,7 +12,7 @@
 struct ice_sched_agg_vsi_info {
 	struct list_head list_entry;
 	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
-	u16 vsi_id;
+	u16 vsi_handle;
 };
 
 struct ice_sched_agg_info {
@@ -24,9 +24,16 @@
 };
 
 /* FW AQ command calls */
+enum ice_status
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_get_elem *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd);
 enum ice_status ice_sched_init_port(struct ice_port_info *pi);
 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_clear_port(struct ice_port_info *pi);
 void ice_sched_cleanup_all(struct ice_hw *hw);
+void ice_sched_clear_agg(struct ice_hw *hw);
+
 struct ice_sched_node *
 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
 enum ice_status
@@ -35,9 +42,10 @@
 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
 struct ice_sched_node *
-ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
 			   u8 owner);
 enum ice_status
-ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 		  u8 owner, bool enable);
+enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
 #endif /* _ICE_SCHED_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
new file mode 100644
index 0000000..d2db0d0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c

@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_adminq_cmd.h"
+#include "ice_sriov.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_pf_vf_msg *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+	cmd = &desc.params.virt;
+	cmd->id = cpu_to_le32(vfid);
+
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+
+	if (msglen)
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+	u32 speed;
+
+	if (adv_link_support)
+		switch (link_speed) {
+		case ICE_AQ_LINK_SPEED_10MB:
+			speed = ICE_LINK_SPEED_10MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_100MB:
+			speed = ICE_LINK_SPEED_100MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_1000MB:
+			speed = ICE_LINK_SPEED_1000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_2500MB:
+			speed = ICE_LINK_SPEED_2500MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_5GB:
+			speed = ICE_LINK_SPEED_5000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			speed = ICE_LINK_SPEED_10000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_20GB:
+			speed = ICE_LINK_SPEED_20000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			speed = ICE_LINK_SPEED_25000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+			speed = ICE_LINK_SPEED_40000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_50GB:
+			speed = ICE_LINK_SPEED_50000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_100GB:
+			speed = ICE_LINK_SPEED_100000MBPS;
+			break;
+		default:
+			speed = ICE_LINK_SPEED_UNKNOWN;
+			break;
+		}
+	else
+		/* Virtchnl speeds are not defined for every speed supported in
+		 * the hardware. To maintain compatibility with older AVF
+		 * drivers, while reporting the speed the new speed values are
+		 * resolved to the closest known virtchnl speeds
+		 */
+		switch (link_speed) {
+		case ICE_AQ_LINK_SPEED_10MB:
+		case ICE_AQ_LINK_SPEED_100MB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_100MB;
+			break;
+		case ICE_AQ_LINK_SPEED_1000MB:
+		case ICE_AQ_LINK_SPEED_2500MB:
+		case ICE_AQ_LINK_SPEED_5GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_1GB;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_10GB;
+			break;
+		case ICE_AQ_LINK_SPEED_20GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_20GB;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_25GB;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+			/* fall through */
+		case ICE_AQ_LINK_SPEED_50GB:
+			/* fall through */
+		case ICE_AQ_LINK_SPEED_100GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
+			break;
+		default:
+			speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN;
+			break;
+		}
+
+	return speed;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
new file mode 100644
index 0000000..3d78a07
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h

@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SRIOV_H_
+#define _ICE_SRIOV_H_
+
+#include "ice_common.h"
+
+#ifdef CONFIG_PCI_IOV
+enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd);
+
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+#else /* CONFIG_PCI_IOV */
+static inline enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
+		      u16 __always_unused vfid, u32 __always_unused v_opcode,
+		      u32 __always_unused v_retval, u8 __always_unused *msg,
+		      u16 __always_unused msglen,
+		      struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline u32
+ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support,
+				u16 __always_unused link_speed)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_SRIOV_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 9a95c4f..c015978 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h

@@ -6,9 +6,13 @@
 
 /* Error Codes */
 enum ice_status {
+	ICE_SUCCESS				= 0,
+
+	/* Generic codes : Range -1..-49 */
 	ICE_ERR_PARAM				= -1,
 	ICE_ERR_NOT_IMPL			= -2,
 	ICE_ERR_NOT_READY			= -3,
+	ICE_ERR_NOT_SUPPORTED			= -4,
 	ICE_ERR_BAD_PTR				= -5,
 	ICE_ERR_INVAL_SIZE			= -6,
 	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
@@ -19,7 +23,10 @@
 	ICE_ERR_OUT_OF_RANGE			= -13,
 	ICE_ERR_ALREADY_EXISTS			= -14,
 	ICE_ERR_DOES_NOT_EXIST			= -15,
+	ICE_ERR_IN_USE				= -16,
 	ICE_ERR_MAX_LIMIT			= -17,
+	ICE_ERR_RESET_ONGOING			= -18,
+	ICE_ERR_NVM_CHECKSUM			= -51,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,

diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 6b7ec2a..1acdd43 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c

@@ -19,7 +19,7 @@
  * byte 6 = 0x2: to identify it as locally administered SA MAC
  * byte 12 = 0x81 & byte 13 = 0x00:
  *	In case of VLAN filter first two bytes defines ether type (0x8100)
- *	and remaining two bytes are placeholder for programming a given VLAN id
+ *	and remaining two bytes are placeholder for programming a given VLAN ID
  *	In case of Ether type filter it is treated as header without VLAN tag
  *	and byte 12 and 13 is used to program a given Ether type instead
  */
@@ -51,7 +51,7 @@
 
 /**
  * ice_aq_alloc_free_res - command to allocate/free resources
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @num_entries: number of resource entries in buffer
  * @buf: Indirect buffer to hold data parameters and response
  * @buf_size: size of buffer for indirect commands
@@ -86,6 +86,35 @@
 }
 
 /**
+ * ice_init_def_sw_recp - initialize the recipe book keeping tables
+ * @hw: pointer to the HW struct
+ *
+ * Allocate memory for the entire recipe table and initialize the structures/
+ * entries corresponding to basic recipes.
+ */
+enum ice_status ice_init_def_sw_recp(struct ice_hw *hw)
+{
+	struct ice_sw_recipe *recps;
+	u8 i;
+
+	recps = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_NUM_RECIPES,
+			     sizeof(*recps), GFP_KERNEL);
+	if (!recps)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		recps[i].root_rid = i;
+		INIT_LIST_HEAD(&recps[i].filt_rules);
+		INIT_LIST_HEAD(&recps[i].filt_replay_rules);
+		mutex_init(&recps[i].filt_rule_lock);
+	}
+
+	hw->switch_info->recp_list = recps;
+
+	return 0;
+}
+
+/**
  * ice_aq_get_sw_cfg - get switch configuration
  * @hw: pointer to the hardware structure
  * @buf: pointer to the result buffer
@@ -100,7 +129,7 @@
  *
  * NOTE: *req_desc is both an input/output parameter.
  * The caller of this function first calls this function with *request_desc set
- * to 0.  If the response from f/w has *req_desc set to 0, all the switch
+ * to 0. If the response from f/w has *req_desc set to 0, all the switch
  * configuration information has been returned; if non-zero (meaning not all
  * the information was returned), the caller should call this function again
  * with *req_desc set to the previous value returned by f/w to get the
@@ -134,29 +163,30 @@
 
 /**
  * ice_aq_add_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @vsi_ctx: pointer to a VSI context struct
  * @cd: pointer to command details structure or NULL
  *
  * Add a VSI context to the hardware (0x0210)
  */
-enum ice_status
+static enum ice_status
 ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
 	       struct ice_sq_cd *cd)
 {
 	struct ice_aqc_add_update_free_vsi_resp *res;
 	struct ice_aqc_add_get_update_free_vsi *cmd;
-	enum ice_status status;
 	struct ice_aq_desc desc;
+	enum ice_status status;
 
 	cmd = &desc.params.vsi_cmd;
-	res = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+	res = &desc.params.add_update_free_vsi_res;
 
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
 
 	if (!vsi_ctx->alloc_from_pool)
 		cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
 					   ICE_AQ_VSI_IS_VALID);
+	cmd->vf_id = vsi_ctx->vf_num;
 
 	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
 
@@ -175,14 +205,50 @@
 }
 
 /**
+ * ice_aq_free_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware (0x0213)
+ */
+static enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+	if (keep_vsi_alloc)
+		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
  * ice_aq_update_vsi
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @vsi_ctx: pointer to a VSI context struct
  * @cd: pointer to command details structure or NULL
  *
  * Update VSI context in the hardware (0x0211)
  */
-enum ice_status
+static enum ice_status
 ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
 		  struct ice_sq_cd *cd)
 {
@@ -192,7 +258,7 @@
 	enum ice_status status;
 
 	cmd = &desc.params.vsi_cmd;
-	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+	resp = &desc.params.add_update_free_vsi_res;
 
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
 
@@ -212,45 +278,199 @@
 }
 
 /**
- * ice_aq_free_vsi
- * @hw: pointer to the hw struct
+ * ice_is_vsi_valid - check whether the VSI is valid or not
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * check whether the VSI is valid or not
+ */
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle)
+{
+	return vsi_handle < ICE_MAX_VSI && hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_get_hw_vsi_num - return the HW VSI number
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the HW VSI number
+ * Caution: call this function only if VSI is valid (ice_is_vsi_valid)
+ */
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle)
+{
+	return hw->vsi_ctx[vsi_handle]->vsi_num;
+}
+
+/**
+ * ice_get_vsi_ctx - return the VSI context entry for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the VSI context entry for a given VSI handle
+ */
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	return (vsi_handle >= ICE_MAX_VSI) ? NULL : hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_save_vsi_ctx - save the VSI context for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @vsi: VSI context pointer
+ *
+ * save the VSI context entry for a given VSI handle
+ */
+static void
+ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi)
+{
+	hw->vsi_ctx[vsi_handle] = vsi;
+}
+
+/**
+ * ice_clear_vsi_q_ctx - clear VSI queue contexts for all TCs
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ */
+static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi;
+	u8 i;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi)
+		return;
+	ice_for_each_traffic_class(i) {
+		if (vsi->lan_q_ctx[i]) {
+			devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]);
+			vsi->lan_q_ctx[i] = NULL;
+		}
+	}
+}
+
+/**
+ * ice_clear_vsi_ctx - clear the VSI context entry
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * clear the VSI context entry
+ */
+static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (vsi) {
+		ice_clear_vsi_q_ctx(hw, vsi_handle);
+		devm_kfree(ice_hw_to_dev(hw), vsi);
+		hw->vsi_ctx[vsi_handle] = NULL;
+	}
+}
+
+/**
+ * ice_clear_all_vsi_ctx - clear all the VSI context entries
+ * @hw: pointer to the HW struct
+ */
+void ice_clear_all_vsi_ctx(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_MAX_VSI; i++)
+		ice_clear_vsi_ctx(hw, i);
+}
+
+/**
+ * ice_add_vsi - add VSI context to the hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle provided by drivers
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware also add it into the VSI handle list.
+ * If this function gets called after reset for existing VSIs then update
+ * with the new HW VSI number in the corresponding VSI handle list entry.
+ */
+enum ice_status
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd)
+{
+	struct ice_vsi_ctx *tmp_vsi_ctx;
+	enum ice_status status;
+
+	if (vsi_handle >= ICE_MAX_VSI)
+		return ICE_ERR_PARAM;
+	status = ice_aq_add_vsi(hw, vsi_ctx, cd);
+	if (status)
+		return status;
+	tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!tmp_vsi_ctx) {
+		/* Create a new VSI context */
+		tmp_vsi_ctx = devm_kzalloc(ice_hw_to_dev(hw),
+					   sizeof(*tmp_vsi_ctx), GFP_KERNEL);
+		if (!tmp_vsi_ctx) {
+			ice_aq_free_vsi(hw, vsi_ctx, false, cd);
+			return ICE_ERR_NO_MEMORY;
+		}
+		*tmp_vsi_ctx = *vsi_ctx;
+		ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
+	} else {
+		/* update with new HW VSI num */
+		if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num)
+			tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_free_vsi- free VSI context from hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
  * @vsi_ctx: pointer to a VSI context struct
  * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
  * @cd: pointer to command details structure or NULL
  *
- * Get VSI context info from hardware (0x0213)
+ * Free VSI context info from hardware as well as from VSI handle list
  */
 enum ice_status
-ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
-		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd)
 {
-	struct ice_aqc_add_update_free_vsi_resp *resp;
-	struct ice_aqc_add_get_update_free_vsi *cmd;
-	struct ice_aq_desc desc;
 	enum ice_status status;
 
-	cmd = &desc.params.vsi_cmd;
-	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
-
-	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
-
-	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
-	if (keep_vsi_alloc)
-		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
-
-	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
-	if (!status) {
-		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
-		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
-	}
-
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	status = ice_aq_free_vsi(hw, vsi_ctx, keep_vsi_alloc, cd);
+	if (!status)
+		ice_clear_vsi_ctx(hw, vsi_handle);
 	return status;
 }
 
 /**
+ * ice_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware
+ */
+enum ice_status
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	return ice_aq_update_vsi(hw, vsi_ctx, cd);
+}
+
+/**
  * ice_aq_alloc_free_vsi_list
- * @hw: pointer to the hw struct
- * @vsi_list_id: VSI list id returned or used for lookup
+ * @hw: pointer to the HW struct
+ * @vsi_list_id: VSI list ID returned or used for lookup
  * @lkup_type: switch rule filter lookup type
  * @opc: switch rules population command type - pass in the command opcode
  *
@@ -306,7 +526,7 @@
 
 /**
  * ice_aq_sw_rules - add/update/remove switch rules
- * @hw: pointer to the hw struct
+ * @hw: pointer to the HW struct
  * @rule_list: pointer to switch rule population list
  * @rule_list_sz: total size of the rule list in bytes
  * @num_rules: number of switch rules in the rule_list
@@ -430,25 +650,58 @@
 /**
  * ice_fill_sw_info - Helper function to populate lb_en and lan_en
  * @hw: pointer to the hardware structure
- * @f_info: filter info structure to fill/update
+ * @fi: filter info structure to fill/update
  *
  * This helper function populates the lb_en and lan_en elements of the provided
  * ice_fltr_info struct using the switch's type and characteristics of the
  * switch rule being configured.
  */
-static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *f_info)
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi)
 {
-	f_info->lb_en = false;
-	f_info->lan_en = false;
-	if ((f_info->flag & ICE_FLTR_TX) &&
-	    (f_info->fltr_act == ICE_FWD_TO_VSI ||
-	     f_info->fltr_act == ICE_FWD_TO_VSI_LIST ||
-	     f_info->fltr_act == ICE_FWD_TO_Q ||
-	     f_info->fltr_act == ICE_FWD_TO_QGRP)) {
-		f_info->lb_en = true;
-		if (!(hw->evb_veb && f_info->lkup_type == ICE_SW_LKUP_MAC &&
-		      is_unicast_ether_addr(f_info->l_data.mac.mac_addr)))
-			f_info->lan_en = true;
+	fi->lb_en = false;
+	fi->lan_en = false;
+	if ((fi->flag & ICE_FLTR_TX) &&
+	    (fi->fltr_act == ICE_FWD_TO_VSI ||
+	     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+	     fi->fltr_act == ICE_FWD_TO_Q ||
+	     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+		/* Setting LB for prune actions will result in replicated
+		 * packets to the internal switch that will be dropped.
+		 */
+		if (fi->lkup_type != ICE_SW_LKUP_VLAN)
+			fi->lb_en = true;
+
+		/* Set lan_en to TRUE if
+		 * 1. The switch is a VEB AND
+		 * 2
+		 * 2.1 The lookup is a directional lookup like ethertype,
+		 * promiscuous, ethertype-MAC, promiscuous-VLAN
+		 * and default-port OR
+		 * 2.2 The lookup is VLAN, OR
+		 * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR
+		 * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC.
+		 *
+		 * OR
+		 *
+		 * The switch is a VEPA.
+		 *
+		 * In all other cases, the LAN enable has to be set to false.
+		 */
+		if (hw->evb_veb) {
+			if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC ||
+			    fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+			    fi->lkup_type == ICE_SW_LKUP_DFLT ||
+			    fi->lkup_type == ICE_SW_LKUP_VLAN ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)))
+				fi->lan_en = true;
+		} else {
+			fi->lan_en = true;
+		}
 	}
 }
 
@@ -464,10 +717,12 @@
 		 struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc)
 {
 	u16 vlan_id = ICE_MAX_VLAN_ID + 1;
-	u8 eth_hdr[DUMMY_ETH_HDR_LEN];
 	void *daddr = NULL;
+	u16 eth_hdr_sz;
+	u8 *eth_hdr;
 	u32 act = 0;
 	__be16 *off;
+	u8 q_rgn;
 
 	if (opc == ice_aqc_opc_remove_sw_rules) {
 		s_rule->pdata.lkup_tx_rx.act = 0;
@@ -477,13 +732,16 @@
 		return;
 	}
 
+	eth_hdr_sz = sizeof(dummy_eth_header);
+	eth_hdr = s_rule->pdata.lkup_tx_rx.hdr;
+
 	/* initialize the ether header with a dummy header */
-	memcpy(eth_hdr, dummy_eth_header, sizeof(dummy_eth_header));
+	memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz);
 	ice_fill_sw_info(hw, f_info);
 
 	switch (f_info->fltr_act) {
 	case ICE_FWD_TO_VSI:
-		act |= (f_info->fwd_id.vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+		act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
 			ICE_SINGLE_ACT_VSI_ID_M;
 		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
 			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
@@ -503,13 +761,18 @@
 		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
 			ICE_SINGLE_ACT_Q_INDEX_M;
 		break;
-	case ICE_FWD_TO_QGRP:
-		act |= ICE_SINGLE_ACT_TO_Q;
-		act |= (f_info->qgrp_size << ICE_SINGLE_ACT_Q_REGION_S) &
-			ICE_SINGLE_ACT_Q_REGION_M;
-		break;
 	case ICE_DROP_PACKET:
-		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP;
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+			ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_QGRP:
+		q_rgn = f_info->qgrp_size > 0 ?
+			(u8)ilog2(f_info->qgrp_size) : 0;
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+			ICE_SINGLE_ACT_Q_REGION_M;
 		break;
 	default:
 		return;
@@ -536,7 +799,7 @@
 		daddr = f_info->l_data.ethertype_mac.mac_addr;
 		/* fall-through */
 	case ICE_SW_LKUP_ETHERTYPE:
-		off = (__be16 *)&eth_hdr[ICE_ETH_ETHTYPE_OFFSET];
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
 		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
 		break;
 	case ICE_SW_LKUP_MAC_VLAN:
@@ -563,18 +826,16 @@
 	s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
 
 	if (daddr)
-		ether_addr_copy(&eth_hdr[ICE_ETH_DA_OFFSET], daddr);
+		ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
 
 	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
-		off = (__be16 *)&eth_hdr[ICE_ETH_VLAN_TCI_OFFSET];
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
 		*off = cpu_to_be16(vlan_id);
 	}
 
 	/* Create the switch rule with the final dummy Ethernet header */
 	if (opc != ice_aqc_opc_update_sw_rules)
-		s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(sizeof(eth_hdr));
-
-	memcpy(s_rule->pdata.lkup_tx_rx.hdr, eth_hdr, sizeof(eth_hdr));
+		s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(eth_hdr_sz);
 }
 
 /**
@@ -582,7 +843,7 @@
  * @hw: pointer to the hardware structure
  * @m_ent: the management entry for which sw marker needs to be added
  * @sw_marker: sw marker to tag the Rx descriptor with
- * @l_id: large action resource id
+ * @l_id: large action resource ID
  *
  * Create a large action to hold software marker and update the switch rule
  * entry pointed by m_ent with newly created large action
@@ -594,15 +855,15 @@
 	struct ice_aqc_sw_rules_elem *lg_act, *rx_tx;
 	/* For software marker we need 3 large actions
 	 * 1. FWD action: FWD TO VSI or VSI LIST
-	 * 2. GENERIC VALUE action to hold the profile id
-	 * 3. GENERIC VALUE action to hold the software marker id
+	 * 2. GENERIC VALUE action to hold the profile ID
+	 * 3. GENERIC VALUE action to hold the software marker ID
 	 */
 	const u16 num_lg_acts = 3;
 	enum ice_status status;
 	u16 lg_act_size;
 	u16 rules_size;
-	u16 vsi_info;
 	u32 act;
+	u16 id;
 
 	if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
 		return ICE_ERR_PARAM;
@@ -610,7 +871,7 @@
 	/* Create two back-to-back switch rules and submit them to the HW using
 	 * one memory buffer:
 	 *    1. Large Action
-	 *    2. Look up tx rx
+	 *    2. Look up Tx Rx
 	 */
 	lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts);
 	rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
@@ -628,12 +889,11 @@
 	/* First action VSI forwarding or VSI list forwarding depending on how
 	 * many VSIs
 	 */
-	vsi_info = (m_ent->vsi_count > 1) ?
-		m_ent->fltr_info.fwd_id.vsi_list_id :
-		m_ent->fltr_info.fwd_id.vsi_id;
+	id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id :
+		m_ent->fltr_info.fwd_id.hw_vsi_id;
 
 	act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
-	act |= (vsi_info << ICE_LG_ACT_VSI_LIST_ID_S) &
+	act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) &
 		ICE_LG_ACT_VSI_LIST_ID_M;
 	if (m_ent->vsi_count > 1)
 		act |= ICE_LG_ACT_VSI_LIST;
@@ -655,17 +915,17 @@
 
 	lg_act->pdata.lg_act.act[2] = cpu_to_le32(act);
 
-	/* call the fill switch rule to fill the lookup tx rx structure */
+	/* call the fill switch rule to fill the lookup Tx Rx structure */
 	ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
 			 ice_aqc_opc_update_sw_rules);
 
-	/* Update the action to point to the large action id */
+	/* Update the action to point to the large action ID */
 	rx_tx->pdata.lkup_tx_rx.act =
 		cpu_to_le32(ICE_SINGLE_ACT_PTR |
 			    ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
 			     ICE_SINGLE_ACT_PTR_VAL_M));
 
-	/* Use the filter rule id of the previously created rule with single
+	/* Use the filter rule ID of the previously created rule with single
 	 * act. Once the update happens, hardware will treat this as large
 	 * action
 	 */
@@ -686,15 +946,15 @@
 /**
  * ice_create_vsi_list_map
  * @hw: pointer to the hardware structure
- * @vsi_array: array of VSIs to form a VSI list
- * @num_vsi: num VSI in the array
- * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @vsi_handle_arr: array of VSI handles to set in the VSI mapping
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
  *
- * Helper function to create a new entry of VSI list id to VSI mapping
- * using the given VSI list id
+ * Helper function to create a new entry of VSI list ID to VSI mapping
+ * using the given VSI list ID
  */
 static struct ice_vsi_list_map_info *
-ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
 			u16 vsi_list_id)
 {
 	struct ice_switch_info *sw = hw->switch_info;
@@ -706,9 +966,9 @@
 		return NULL;
 
 	v_map->vsi_list_id = vsi_list_id;
-
+	v_map->ref_cnt = 1;
 	for (i = 0; i < num_vsi; i++)
-		set_bit(vsi_array[i], v_map->vsi_map);
+		set_bit(vsi_handle_arr[i], v_map->vsi_map);
 
 	list_add(&v_map->list_entry, &sw->vsi_list_map_head);
 	return v_map;
@@ -717,18 +977,18 @@
 /**
  * ice_update_vsi_list_rule
  * @hw: pointer to the hardware structure
- * @vsi_array: array of VSIs to form a VSI list
- * @num_vsi: num VSI in the array
- * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
  * @remove: Boolean value to indicate if this is a remove action
  * @opc: switch rules population command type - pass in the command opcode
  * @lkup_type: lookup type of the filter
  *
  * Call AQ command to add a new switch rule or update existing switch rule
- * using the given VSI list id
+ * using the given VSI list ID
  */
 static enum ice_status
-ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
 			 u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
 			 enum ice_sw_lkup_type lkup_type)
 {
@@ -759,9 +1019,15 @@
 	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
 	if (!s_rule)
 		return ICE_ERR_NO_MEMORY;
-
-	for (i = 0; i < num_vsi; i++)
-		s_rule->pdata.vsi_list.vsi[i] = cpu_to_le16(vsi_array[i]);
+	for (i = 0; i < num_vsi; i++) {
+		if (!ice_is_vsi_valid(hw, vsi_handle_arr[i])) {
+			status = ICE_ERR_PARAM;
+			goto exit;
+		}
+		/* AQ call requires hw_vsi_id(s) */
+		s_rule->pdata.vsi_list.vsi[i] =
+			cpu_to_le16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i]));
+	}
 
 	s_rule->type = cpu_to_le16(type);
 	s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi);
@@ -769,28 +1035,24 @@
 
 	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
 
+exit:
 	devm_kfree(ice_hw_to_dev(hw), s_rule);
 	return status;
 }
 
 /**
  * ice_create_vsi_list_rule - Creates and populates a VSI list rule
- * @hw: pointer to the hw struct
- * @vsi_array: array of VSIs to form a VSI list
- * @num_vsi: number of VSIs in the array
+ * @hw: pointer to the HW struct
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
  * @vsi_list_id: stores the ID of the VSI list to be created
  * @lkup_type: switch rule filter's lookup type
  */
 static enum ice_status
-ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
 			 u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
 {
 	enum ice_status status;
-	int i;
-
-	for (i = 0; i < num_vsi; i++)
-		if (vsi_array[i] >= ICE_MAX_VSI)
-			return ICE_ERR_OUT_OF_RANGE;
 
 	status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
 					    ice_aqc_opc_alloc_res);
@@ -798,9 +1060,9 @@
 		return status;
 
 	/* Update the newly created VSI list to include the specified VSIs */
-	return ice_update_vsi_list_rule(hw, vsi_array, num_vsi, *vsi_list_id,
-					false, ice_aqc_opc_add_sw_rules,
-					lkup_type);
+	return ice_update_vsi_list_rule(hw, vsi_handle_arr, num_vsi,
+					*vsi_list_id, false,
+					ice_aqc_opc_add_sw_rules, lkup_type);
 }
 
 /**
@@ -816,10 +1078,10 @@
 ice_create_pkt_fwd_rule(struct ice_hw *hw,
 			struct ice_fltr_list_entry *f_entry)
 {
-	struct ice_switch_info *sw = hw->switch_info;
 	struct ice_fltr_mgmt_list_entry *fm_entry;
 	struct ice_aqc_sw_rules_elem *s_rule;
 	enum ice_sw_lkup_type l_type;
+	struct ice_sw_recipe *recp;
 	enum ice_status status;
 
 	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
@@ -860,31 +1122,9 @@
 	 * calls remove filter AQ command
 	 */
 	l_type = fm_entry->fltr_info.lkup_type;
-	if (l_type == ICE_SW_LKUP_MAC) {
-		mutex_lock(&sw->mac_list_lock);
-		list_add(&fm_entry->list_entry, &sw->mac_list_head);
-		mutex_unlock(&sw->mac_list_lock);
-	} else if (l_type == ICE_SW_LKUP_VLAN) {
-		mutex_lock(&sw->vlan_list_lock);
-		list_add(&fm_entry->list_entry, &sw->vlan_list_head);
-		mutex_unlock(&sw->vlan_list_lock);
-	} else if (l_type == ICE_SW_LKUP_ETHERTYPE ||
-		   l_type == ICE_SW_LKUP_ETHERTYPE_MAC) {
-		mutex_lock(&sw->eth_m_list_lock);
-		list_add(&fm_entry->list_entry, &sw->eth_m_list_head);
-		mutex_unlock(&sw->eth_m_list_lock);
-	} else if (l_type == ICE_SW_LKUP_PROMISC ||
-		   l_type == ICE_SW_LKUP_PROMISC_VLAN) {
-		mutex_lock(&sw->promisc_list_lock);
-		list_add(&fm_entry->list_entry, &sw->promisc_list_head);
-		mutex_unlock(&sw->promisc_list_lock);
-	} else if (fm_entry->fltr_info.lkup_type == ICE_SW_LKUP_MAC_VLAN) {
-		mutex_lock(&sw->mac_vlan_list_lock);
-		list_add(&fm_entry->list_entry, &sw->mac_vlan_list_head);
-		mutex_unlock(&sw->mac_vlan_list_lock);
-	} else {
-		status = ICE_ERR_NOT_IMPL;
-	}
+	recp = &hw->switch_info->recp_list[l_type];
+	list_add(&fm_entry->list_entry, &recp->filt_rules);
+
 ice_create_pkt_fwd_rule_exit:
 	devm_kfree(ice_hw_to_dev(hw), s_rule);
 	return status;
@@ -893,19 +1133,15 @@
 /**
  * ice_update_pkt_fwd_rule
  * @hw: pointer to the hardware structure
- * @rule_id: rule of previously created switch rule to update
- * @vsi_list_id: VSI list id to be updated with
- * @f_info: ice_fltr_info to pull other information for switch rule
+ * @f_info: filter information for switch rule
  *
  * Call AQ command to update a previously created switch rule with a
- * VSI list id
+ * VSI list ID
  */
 static enum ice_status
-ice_update_pkt_fwd_rule(struct ice_hw *hw, u16 rule_id, u16 vsi_list_id,
-			struct ice_fltr_info f_info)
+ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info)
 {
 	struct ice_aqc_sw_rules_elem *s_rule;
-	struct ice_fltr_info tmp_fltr;
 	enum ice_status status;
 
 	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
@@ -913,14 +1149,9 @@
 	if (!s_rule)
 		return ICE_ERR_NO_MEMORY;
 
-	tmp_fltr = f_info;
-	tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
-	tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+	ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules);
 
-	ice_fill_sw_rule(hw, &tmp_fltr, s_rule,
-			 ice_aqc_opc_update_sw_rules);
-
-	s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id);
+	s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(f_info->fltr_rule_id);
 
 	/* Update switch rule with new rule set to forward VSI list */
 	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
@@ -931,7 +1162,48 @@
 }
 
 /**
- * ice_handle_vsi_list_mgmt
+ * ice_update_sw_rule_bridge_mode
+ * @hw: pointer to the HW struct
+ *
+ * Updates unicast switch filter rules based on VEB/VEPA mode
+ */
+enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	enum ice_status status = 0;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(fm_entry, rule_head, list_entry) {
+		struct ice_fltr_info *fi = &fm_entry->fltr_info;
+		u8 *addr = fi->l_data.mac.mac_addr;
+
+		/* Update unicast Tx rules to reflect the selected
+		 * VEB/VEPA mode
+		 */
+		if ((fi->flag & ICE_FLTR_TX) && is_unicast_ether_addr(addr) &&
+		    (fi->fltr_act == ICE_FWD_TO_VSI ||
+		     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+		     fi->fltr_act == ICE_FWD_TO_Q ||
+		     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+			status = ice_update_pkt_fwd_rule(hw, fi);
+			if (status)
+				break;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return status;
+}
+
+/**
+ * ice_add_update_vsi_list
  * @hw: pointer to the hardware structure
  * @m_entry: pointer to current filter management list entry
  * @cur_fltr: filter information from the book keeping entry
@@ -940,22 +1212,22 @@
  * Call AQ command to add or update previously created VSI list with new VSI.
  *
  * Helper function to do book keeping associated with adding filter information
- * The algorithm to do the booking keeping is described below :
- * When a VSI needs to subscribe to a given filter( MAC/VLAN/Ethtype etc.)
+ * The algorithm to do the book keeping is described below :
+ * When a VSI needs to subscribe to a given filter (MAC/VLAN/Ethtype etc.)
  *	if only one VSI has been added till now
  *		Allocate a new VSI list and add two VSIs
  *		to this list using switch rule command
  *		Update the previously created switch rule with the
- *		newly created VSI list id
+ *		newly created VSI list ID
  *	if a VSI list was previously created
  *		Add the new VSI to the previously created VSI list set
  *		using the update switch rule command
  */
 static enum ice_status
-ice_handle_vsi_list_mgmt(struct ice_hw *hw,
-			 struct ice_fltr_mgmt_list_entry *m_entry,
-			 struct ice_fltr_info *cur_fltr,
-			 struct ice_fltr_info *new_fltr)
+ice_add_update_vsi_list(struct ice_hw *hw,
+			struct ice_fltr_mgmt_list_entry *m_entry,
+			struct ice_fltr_info *cur_fltr,
+			struct ice_fltr_info *new_fltr)
 {
 	enum ice_status status = 0;
 	u16 vsi_list_id = 0;
@@ -975,34 +1247,36 @@
 		 * a part of a VSI list. So, create a VSI list with the old and
 		 * new VSIs.
 		 */
-		u16 vsi_id_arr[2];
-		u16 fltr_rule;
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
 
 		/* A rule already exists with the new VSI being added */
-		if (cur_fltr->fwd_id.vsi_id == new_fltr->fwd_id.vsi_id)
+		if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id)
 			return ICE_ERR_ALREADY_EXISTS;
 
-		vsi_id_arr[0] = cur_fltr->fwd_id.vsi_id;
-		vsi_id_arr[1] = new_fltr->fwd_id.vsi_id;
-		status = ice_create_vsi_list_rule(hw, &vsi_id_arr[0], 2,
+		vsi_handle_arr[0] = cur_fltr->vsi_handle;
+		vsi_handle_arr[1] = new_fltr->vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
 						  &vsi_list_id,
 						  new_fltr->lkup_type);
 		if (status)
 			return status;
 
-		fltr_rule = cur_fltr->fltr_rule_id;
+		tmp_fltr = *new_fltr;
+		tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
 		/* Update the previous switch rule of "MAC forward to VSI" to
 		 * "MAC fwd to VSI list"
 		 */
-		status = ice_update_pkt_fwd_rule(hw, fltr_rule, vsi_list_id,
-						 *new_fltr);
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
 		if (status)
 			return status;
 
 		cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
 		cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
 		m_entry->vsi_list_info =
-			ice_create_vsi_list_map(hw, &vsi_id_arr[0], 2,
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
 						vsi_list_id);
 
 		/* If this entry was large action then the large action needs
@@ -1014,25 +1288,28 @@
 					       m_entry->sw_marker_id,
 					       m_entry->lg_act_idx);
 	} else {
-		u16 vsi_id = new_fltr->fwd_id.vsi_id;
+		u16 vsi_handle = new_fltr->vsi_handle;
 		enum ice_adminq_opc opcode;
 
+		if (!m_entry->vsi_list_info)
+			return ICE_ERR_CFG;
+
 		/* A rule already exists with the new VSI being added */
-		if (test_bit(vsi_id, m_entry->vsi_list_info->vsi_map))
+		if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
 			return 0;
 
 		/* Update the previously created VSI list set with
-		 * the new VSI id passed in
+		 * the new VSI ID passed in
 		 */
 		vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
 		opcode = ice_aqc_opc_update_sw_rules;
 
-		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
-						  false, opcode,
+		status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+						  vsi_list_id, false, opcode,
 						  new_fltr->lkup_type);
-		/* update VSI list mapping info with new VSI id */
+		/* update VSI list mapping info with new VSI ID */
 		if (!status)
-			set_bit(vsi_id, m_entry->vsi_list_info->vsi_map);
+			set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
 	}
 	if (!status)
 		m_entry->vsi_count++;
@@ -1040,54 +1317,325 @@
 }
 
 /**
- * ice_find_mac_entry
+ * ice_find_rule_entry - Search a rule entry
  * @hw: pointer to the hardware structure
- * @mac_addr: MAC address to search for
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
  *
- * Helper function to search for a MAC entry using a given MAC address
- * Returns pointer to the entry if found.
+ * Helper function to search for a given rule entry
+ * Returns pointer to entry storing the rule if found
  */
 static struct ice_fltr_mgmt_list_entry *
-ice_find_mac_entry(struct ice_hw *hw, u8 *mac_addr)
+ice_find_rule_entry(struct ice_hw *hw, u8 recp_id, struct ice_fltr_info *f_info)
 {
-	struct ice_fltr_mgmt_list_entry *m_list_itr, *mac_ret = NULL;
+	struct ice_fltr_mgmt_list_entry *list_itr, *ret = NULL;
 	struct ice_switch_info *sw = hw->switch_info;
+	struct list_head *list_head;
 
-	mutex_lock(&sw->mac_list_lock);
-	list_for_each_entry(m_list_itr, &sw->mac_list_head, list_entry) {
-		u8 *buf = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
-
-		if (ether_addr_equal(buf, mac_addr)) {
-			mac_ret = m_list_itr;
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+			    sizeof(f_info->l_data)) &&
+		    f_info->flag == list_itr->fltr_info.flag) {
+			ret = list_itr;
 			break;
 		}
 	}
-	mutex_unlock(&sw->mac_list_lock);
-	return mac_ret;
+	return ret;
 }
 
 /**
- * ice_add_shared_mac - Add one MAC shared filter rule
+ * ice_find_vsi_list_entry - Search VSI list map with VSI count 1
  * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which VSI lists needs to be searched
+ * @vsi_handle: VSI handle to be found in VSI list
+ * @vsi_list_id: VSI list ID found containing vsi_handle
+ *
+ * Helper function to search a VSI list with single entry containing given VSI
+ * handle element. This can be extended further to search VSI list with more
+ * than 1 vsi_count. Returns pointer to VSI list entry if found.
+ */
+static struct ice_vsi_list_map_info *
+ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+			u16 *vsi_list_id)
+{
+	struct ice_vsi_list_map_info *map_info = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
+			map_info = list_itr->vsi_list_info;
+			if (test_bit(vsi_handle, map_info->vsi_map)) {
+				*vsi_list_id = map_info->vsi_list_id;
+				return map_info;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * ice_add_rule_internal - add rule for a given lookup type
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type (recipe ID) for which rule has to be added
  * @f_entry: structure containing MAC forwarding information
  *
- * Adds or updates the book keeping list for the MAC addresses
+ * Adds or updates the rule lists for a given recipe
  */
 static enum ice_status
-ice_add_shared_mac(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+ice_add_rule_internal(struct ice_hw *hw, u8 recp_id,
+		      struct ice_fltr_list_entry *f_entry)
 {
+	struct ice_switch_info *sw = hw->switch_info;
 	struct ice_fltr_info *new_fltr, *cur_fltr;
 	struct ice_fltr_mgmt_list_entry *m_entry;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
 
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+
+	mutex_lock(rule_lock);
 	new_fltr = &f_entry->fltr_info;
+	if (new_fltr->flag & ICE_FLTR_RX)
+		new_fltr->src = hw->port_info->lport;
+	else if (new_fltr->flag & ICE_FLTR_TX)
+		new_fltr->src = f_entry->fltr_info.fwd_id.hw_vsi_id;
 
-	m_entry = ice_find_mac_entry(hw, &new_fltr->l_data.mac.mac_addr[0]);
-	if (!m_entry)
+	m_entry = ice_find_rule_entry(hw, recp_id, new_fltr);
+	if (!m_entry) {
+		mutex_unlock(rule_lock);
 		return ice_create_pkt_fwd_rule(hw, f_entry);
+	}
 
 	cur_fltr = &m_entry->fltr_info;
+	status = ice_add_update_vsi_list(hw, m_entry, cur_fltr, new_fltr);
+	mutex_unlock(rule_lock);
 
-	return ice_handle_vsi_list_mgmt(hw, m_entry, cur_fltr, new_fltr);
+	return status;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ *
+ * The VSI list should be emptied before this function is called to remove the
+ * VSI list.
+ */
+static enum ice_status
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+
+	/* Free the vsi_list resource that we allocated. It is assumed that the
+	 * list is empty at this point.
+	 */
+	status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+					    ice_aqc_opc_free_res);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ *           be done
+ */
+static enum ice_status
+ice_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+			struct ice_fltr_mgmt_list_entry *fm_list)
+{
+	enum ice_sw_lkup_type lkup_type;
+	enum ice_status status = 0;
+	u16 vsi_list_id;
+
+	if (fm_list->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST ||
+	    fm_list->vsi_count == 0)
+		return ICE_ERR_PARAM;
+
+	/* A rule with the VSI being removed does not exist */
+	if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	lkup_type = fm_list->fltr_info.lkup_type;
+	vsi_list_id = fm_list->fltr_info.fwd_id.vsi_list_id;
+	status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+					  ice_aqc_opc_update_sw_rules,
+					  lkup_type);
+	if (status)
+		return status;
+
+	fm_list->vsi_count--;
+	clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+
+	if (fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) {
+		struct ice_fltr_info tmp_fltr_info = fm_list->fltr_info;
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+		u16 rem_vsi_handle;
+
+		rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+						ICE_MAX_VSI);
+		if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+			return ICE_ERR_OUT_OF_RANGE;
+
+		/* Make sure VSI list is empty before removing it below */
+		status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+						  vsi_list_id, true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+
+		tmp_fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		tmp_fltr_info.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, rem_vsi_handle);
+		tmp_fltr_info.vsi_handle = rem_vsi_handle;
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr_info);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+				  tmp_fltr_info.fwd_id.hw_vsi_id, status);
+			return status;
+		}
+
+		fm_list->fltr_info = tmp_fltr_info;
+	}
+
+	if ((fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) ||
+	    (fm_list->vsi_count == 0 && lkup_type == ICE_SW_LKUP_VLAN)) {
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+
+		/* Remove the VSI list since it is no longer used */
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Failed to remove VSI list %d, error %d\n",
+				  vsi_list_id, status);
+			return status;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list->vsi_list_info = NULL;
+	}
+
+	return status;
+}
+
+/**
+ * ice_remove_rule_internal - Remove a filter rule of a given type
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @f_entry: rule entry containing filter information
+ */
+static enum ice_status
+ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id,
+			 struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_elem;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
+	bool remove_rule = false;
+	u16 vsi_handle;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+	mutex_lock(rule_lock);
+	list_elem = ice_find_rule_entry(hw, recp_id, &f_entry->fltr_info);
+	if (!list_elem) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto exit;
+	}
+
+	if (list_elem->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST) {
+		remove_rule = true;
+	} else if (!list_elem->vsi_list_info) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto exit;
+	} else if (list_elem->vsi_list_info->ref_cnt > 1) {
+		/* a ref_cnt > 1 indicates that the vsi_list is being
+		 * shared by multiple rules. Decrement the ref_cnt and
+		 * remove this rule, but do not modify the list, as it
+		 * is in-use by other rules.
+		 */
+		list_elem->vsi_list_info->ref_cnt--;
+		remove_rule = true;
+	} else {
+		/* a ref_cnt of 1 indicates the vsi_list is only used
+		 * by one rule. However, the original removal request is only
+		 * for a single VSI. Update the vsi_list first, and only
+		 * remove the rule if there are no further VSIs in this list.
+		 */
+		vsi_handle = f_entry->fltr_info.vsi_handle;
+		status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem);
+		if (status)
+			goto exit;
+		/* if VSI count goes to zero after updating the VSI list */
+		if (list_elem->vsi_count == 0)
+			remove_rule = true;
+	}
+
+	if (remove_rule) {
+		/* Remove the lookup rule */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule) {
+			status = ICE_ERR_NO_MEMORY;
+			goto exit;
+		}
+
+		ice_fill_sw_rule(hw, &list_elem->fltr_info, s_rule,
+				 ice_aqc_opc_remove_sw_rules);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
+					 ice_aqc_opc_remove_sw_rules, NULL);
+
+		/* Remove a book keeping from the list */
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+
+		if (status)
+			goto exit;
+
+		list_del(&list_elem->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_elem);
+	}
+exit:
+	mutex_unlock(rule_lock);
+	return status;
 }
 
 /**
@@ -1106,7 +1654,10 @@
 {
 	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
 	struct ice_fltr_list_entry *m_list_itr;
+	struct list_head *rule_head;
 	u16 elem_sent, total_elem_left;
+	struct ice_switch_info *sw;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
 	enum ice_status status = 0;
 	u16 num_unicast = 0;
 	u16 s_rule_size;
@@ -1114,48 +1665,73 @@
 	if (!m_list || !hw)
 		return ICE_ERR_PARAM;
 
+	s_rule = NULL;
+	sw = hw->switch_info;
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
 	list_for_each_entry(m_list_itr, m_list, list_entry) {
 		u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+		u16 vsi_handle;
+		u16 hw_vsi_id;
 
-		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+		m_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		vsi_handle = m_list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
 			return ICE_ERR_PARAM;
-		if (is_zero_ether_addr(add))
+		hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+		m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id;
+		/* update the src in case it is VSI num */
+		if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI)
+			return ICE_ERR_PARAM;
+		m_list_itr->fltr_info.src = hw_vsi_id;
+		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC ||
+		    is_zero_ether_addr(add))
 			return ICE_ERR_PARAM;
 		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
 			/* Don't overwrite the unicast address */
-			if (ice_find_mac_entry(hw, add))
+			mutex_lock(rule_lock);
+			if (ice_find_rule_entry(hw, ICE_SW_LKUP_MAC,
+						&m_list_itr->fltr_info)) {
+				mutex_unlock(rule_lock);
 				return ICE_ERR_ALREADY_EXISTS;
+			}
+			mutex_unlock(rule_lock);
 			num_unicast++;
 		} else if (is_multicast_ether_addr(add) ||
 			   (is_unicast_ether_addr(add) && hw->ucast_shared)) {
-			status = ice_add_shared_mac(hw, m_list_itr);
-			if (status) {
-				m_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
-				return status;
-			}
-			m_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+			m_list_itr->status =
+				ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
+						      m_list_itr);
+			if (m_list_itr->status)
+				return m_list_itr->status;
 		}
 	}
 
+	mutex_lock(rule_lock);
 	/* Exit if no suitable entries were found for adding bulk switch rule */
-	if (!num_unicast)
-		return 0;
+	if (!num_unicast) {
+		status = 0;
+		goto ice_add_mac_exit;
+	}
+
+	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
 
 	/* Allocate switch rule buffer for the bulk update for unicast */
 	s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
 	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
 			      GFP_KERNEL);
-	if (!s_rule)
-		return ICE_ERR_NO_MEMORY;
+	if (!s_rule) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_add_mac_exit;
+	}
 
 	r_iter = s_rule;
 	list_for_each_entry(m_list_itr, m_list, list_entry) {
 		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
-		u8 *addr = &f_info->l_data.mac.mac_addr[0];
+		u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
 
-		if (is_unicast_ether_addr(addr)) {
-			ice_fill_sw_rule(hw, &m_list_itr->fltr_info,
-					 r_iter, ice_aqc_opc_add_sw_rules);
+		if (is_unicast_ether_addr(mac_addr)) {
+			ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter,
+					 ice_aqc_opc_add_sw_rules);
 			r_iter = (struct ice_aqc_sw_rules_elem *)
 				((u8 *)r_iter + s_rule_size);
 		}
@@ -1179,15 +1755,14 @@
 			((u8 *)r_iter + (elem_sent * s_rule_size));
 	}
 
-	/* Fill up rule id based on the value returned from FW */
+	/* Fill up rule ID based on the value returned from FW */
 	r_iter = s_rule;
 	list_for_each_entry(m_list_itr, m_list, list_entry) {
 		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
-		u8 *addr = &f_info->l_data.mac.mac_addr[0];
-		struct ice_switch_info *sw = hw->switch_info;
+		u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
 		struct ice_fltr_mgmt_list_entry *fm_entry;
 
-		if (is_unicast_ether_addr(addr)) {
+		if (is_unicast_ether_addr(mac_addr)) {
 			f_info->fltr_rule_id =
 				le16_to_cpu(r_iter->pdata.lkup_tx_rx.index);
 			f_info->fltr_act = ICE_FWD_TO_VSI;
@@ -1203,46 +1778,21 @@
 			/* The book keeping entries will get removed when
 			 * base driver calls remove filter AQ command
 			 */
-			mutex_lock(&sw->mac_list_lock);
-			list_add(&fm_entry->list_entry, &sw->mac_list_head);
-			mutex_unlock(&sw->mac_list_lock);
 
+			list_add(&fm_entry->list_entry, rule_head);
 			r_iter = (struct ice_aqc_sw_rules_elem *)
 				((u8 *)r_iter + s_rule_size);
 		}
 	}
 
 ice_add_mac_exit:
-	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	mutex_unlock(rule_lock);
+	if (s_rule)
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
 	return status;
 }
 
 /**
- * ice_find_vlan_entry
- * @hw: pointer to the hardware structure
- * @vlan_id: VLAN id to search for
- *
- * Helper function to search for a VLAN entry using a given VLAN id
- * Returns pointer to the entry if found.
- */
-static struct ice_fltr_mgmt_list_entry *
-ice_find_vlan_entry(struct ice_hw *hw, u16 vlan_id)
-{
-	struct ice_fltr_mgmt_list_entry *vlan_list_itr, *vlan_ret = NULL;
-	struct ice_switch_info *sw = hw->switch_info;
-
-	mutex_lock(&sw->vlan_list_lock);
-	list_for_each_entry(vlan_list_itr, &sw->vlan_list_head, list_entry)
-		if (vlan_list_itr->fltr_info.l_data.vlan.vlan_id == vlan_id) {
-			vlan_ret = vlan_list_itr;
-			break;
-		}
-
-	mutex_unlock(&sw->vlan_list_lock);
-	return vlan_ret;
-}
-
-/**
  * ice_add_vlan_internal - Add one VLAN based filter rule
  * @hw: pointer to the hardware structure
  * @f_entry: filter entry containing one VLAN information
@@ -1250,53 +1800,150 @@
 static enum ice_status
 ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
 {
-	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_switch_info *sw = hw->switch_info;
 	struct ice_fltr_mgmt_list_entry *v_list_itr;
-	u16 vlan_id;
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	enum ice_sw_lkup_type lkup_type;
+	u16 vsi_list_id = 0, vsi_handle;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
 
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
 	new_fltr = &f_entry->fltr_info;
-	/* VLAN id should only be 12 bits */
+
+	/* VLAN ID should only be 12 bits */
 	if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
 		return ICE_ERR_PARAM;
 
-	vlan_id = new_fltr->l_data.vlan.vlan_id;
-	v_list_itr = ice_find_vlan_entry(hw, vlan_id);
+	if (new_fltr->src_id != ICE_SRC_ID_VSI)
+		return ICE_ERR_PARAM;
+
+	new_fltr->src = new_fltr->fwd_id.hw_vsi_id;
+	lkup_type = new_fltr->lkup_type;
+	vsi_handle = new_fltr->vsi_handle;
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	mutex_lock(rule_lock);
+	v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN, new_fltr);
 	if (!v_list_itr) {
-		u16 vsi_id = ICE_VSI_INVAL_ID;
-		enum ice_status status;
-		u16 vsi_list_id = 0;
+		struct ice_vsi_list_map_info *map_info = NULL;
 
 		if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
-			enum ice_sw_lkup_type lkup_type = new_fltr->lkup_type;
-
-			/* All VLAN pruning rules use a VSI list.
-			 * Convert the action to forwarding to a VSI list.
+			/* All VLAN pruning rules use a VSI list. Check if
+			 * there is already a VSI list containing VSI that we
+			 * want to add. If found, use the same vsi_list_id for
+			 * this new VLAN rule or else create a new list.
 			 */
-			vsi_id = new_fltr->fwd_id.vsi_id;
-			status = ice_create_vsi_list_rule(hw, &vsi_id, 1,
-							  &vsi_list_id,
-							  lkup_type);
-			if (status)
-				return status;
+			map_info = ice_find_vsi_list_entry(hw, ICE_SW_LKUP_VLAN,
+							   vsi_handle,
+							   &vsi_list_id);
+			if (!map_info) {
+				status = ice_create_vsi_list_rule(hw,
+								  &vsi_handle,
+								  1,
+								  &vsi_list_id,
+								  lkup_type);
+				if (status)
+					goto exit;
+			}
+			/* Convert the action to forwarding to a VSI list. */
 			new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
 			new_fltr->fwd_id.vsi_list_id = vsi_list_id;
 		}
 
 		status = ice_create_pkt_fwd_rule(hw, f_entry);
-		if (!status && vsi_id != ICE_VSI_INVAL_ID) {
-			v_list_itr = ice_find_vlan_entry(hw, vlan_id);
-			if (!v_list_itr)
-				return ICE_ERR_DOES_NOT_EXIST;
-			v_list_itr->vsi_list_info =
-				ice_create_vsi_list_map(hw, &vsi_id, 1,
-							vsi_list_id);
+		if (!status) {
+			v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN,
+							 new_fltr);
+			if (!v_list_itr) {
+				status = ICE_ERR_DOES_NOT_EXIST;
+				goto exit;
+			}
+			/* reuse VSI list for new rule and increment ref_cnt */
+			if (map_info) {
+				v_list_itr->vsi_list_info = map_info;
+				map_info->ref_cnt++;
+			} else {
+				v_list_itr->vsi_list_info =
+					ice_create_vsi_list_map(hw, &vsi_handle,
+								1, vsi_list_id);
+			}
+		}
+	} else if (v_list_itr->vsi_list_info->ref_cnt == 1) {
+		/* Update existing VSI list to add new VSI ID only if it used
+		 * by one VLAN rule.
+		 */
+		cur_fltr = &v_list_itr->fltr_info;
+		status = ice_add_update_vsi_list(hw, v_list_itr, cur_fltr,
+						 new_fltr);
+	} else {
+		/* If VLAN rule exists and VSI list being used by this rule is
+		 * referenced by more than 1 VLAN rule. Then create a new VSI
+		 * list appending previous VSI with new VSI and update existing
+		 * VLAN rule to point to new VSI list ID
+		 */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+		u16 cur_handle;
+
+		/* Current implementation only supports reusing VSI list with
+		 * one VSI count. We should never hit below condition
+		 */
+		if (v_list_itr->vsi_count > 1 &&
+		    v_list_itr->vsi_list_info->ref_cnt > 1) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Invalid configuration: Optimization to reuse VSI list with more than one VSI is not being done yet\n");
+			status = ICE_ERR_CFG;
+			goto exit;
 		}
 
-		return status;
+		cur_handle =
+			find_first_bit(v_list_itr->vsi_list_info->vsi_map,
+				       ICE_MAX_VSI);
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_handle == vsi_handle) {
+			status = ICE_ERR_ALREADY_EXISTS;
+			goto exit;
+		}
+
+		vsi_handle_arr[0] = cur_handle;
+		vsi_handle_arr[1] = vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id, lkup_type);
+		if (status)
+			goto exit;
+
+		tmp_fltr = v_list_itr->fltr_info;
+		tmp_fltr.fltr_rule_id = v_list_itr->fltr_info.fltr_rule_id;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		/* Update the previous switch rule to a new VSI list which
+		 * includes current VSI that is requested
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			goto exit;
+
+		/* before overriding VSI list map info. decrement ref_cnt of
+		 * previous VSI list
+		 */
+		v_list_itr->vsi_list_info->ref_cnt--;
+
+		/* now update to newly created list */
+		v_list_itr->fltr_info.fwd_id.vsi_list_id = vsi_list_id;
+		v_list_itr->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+		v_list_itr->vsi_count++;
 	}
 
-	cur_fltr = &v_list_itr->fltr_info;
-	return ice_handle_vsi_list_mgmt(hw, v_list_itr, cur_fltr, new_fltr);
+exit:
+	mutex_unlock(rule_lock);
+	return status;
 }
 
 /**
@@ -1313,335 +1960,120 @@
 		return ICE_ERR_PARAM;
 
 	list_for_each_entry(v_list_itr, v_list, list_entry) {
-		enum ice_status status;
-
 		if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
 			return ICE_ERR_PARAM;
-
-		status = ice_add_vlan_internal(hw, v_list_itr);
-		if (status) {
-			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
-			return status;
-		}
-		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+		v_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		v_list_itr->status = ice_add_vlan_internal(hw, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
 	}
 	return 0;
 }
 
 /**
- * ice_remove_vsi_list_rule
+ * ice_add_eth_mac - Add ethertype and MAC based filter rule
  * @hw: pointer to the hardware structure
- * @vsi_list_id: VSI list id generated as part of allocate resource
- * @lkup_type: switch rule filter lookup type
- */
-static enum ice_status
-ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
-			 enum ice_sw_lkup_type lkup_type)
-{
-	struct ice_aqc_sw_rules_elem *s_rule;
-	enum ice_status status;
-	u16 s_rule_size;
-
-	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0);
-	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
-	if (!s_rule)
-		return ICE_ERR_NO_MEMORY;
-
-	s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
-	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
-	/* FW expects number of VSIs in vsi_list resource to be 0 for clear
-	 * command. Since memory is zero'ed out during initialization, it's not
-	 * necessary to explicitly initialize the variable to 0.
-	 */
-
-	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1,
-				 ice_aqc_opc_remove_sw_rules, NULL);
-	if (!status)
-		/* Free the vsi_list resource that we allocated */
-		status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
-						    ice_aqc_opc_free_res);
-
-	devm_kfree(ice_hw_to_dev(hw), s_rule);
-	return status;
-}
-
-/**
- * ice_handle_rem_vsi_list_mgmt
- * @hw: pointer to the hardware structure
- * @vsi_id: ID of the VSI to remove
- * @fm_list_itr: filter management entry for which the VSI list management
- * needs to be done
- */
-static enum ice_status
-ice_handle_rem_vsi_list_mgmt(struct ice_hw *hw, u16 vsi_id,
-			     struct ice_fltr_mgmt_list_entry *fm_list_itr)
-{
-	struct ice_switch_info *sw = hw->switch_info;
-	enum ice_status status = 0;
-	enum ice_sw_lkup_type lkup_type;
-	bool is_last_elem = true;
-	bool conv_list = false;
-	bool del_list = false;
-	u16 vsi_list_id;
-
-	lkup_type = fm_list_itr->fltr_info.lkup_type;
-	vsi_list_id = fm_list_itr->fltr_info.fwd_id.vsi_list_id;
-
-	if (fm_list_itr->vsi_count > 1) {
-		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
-						  true,
-						  ice_aqc_opc_update_sw_rules,
-						  lkup_type);
-		if (status)
-			return status;
-		fm_list_itr->vsi_count--;
-		is_last_elem = false;
-		clear_bit(vsi_id, fm_list_itr->vsi_list_info->vsi_map);
-	}
-
-	/* For non-VLAN rules that forward packets to a VSI list, convert them
-	 * to forwarding packets to a VSI if there is only one VSI left in the
-	 * list.  Unused lists are then removed.
-	 * VLAN rules need to use VSI lists even with only one VSI.
-	 */
-	if (fm_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST) {
-		if (lkup_type == ICE_SW_LKUP_VLAN) {
-			del_list = is_last_elem;
-		} else if (fm_list_itr->vsi_count == 1) {
-			conv_list = true;
-			del_list = true;
-		}
-	}
-
-	if (del_list) {
-		/* Remove the VSI list since it is no longer used */
-		struct ice_vsi_list_map_info *vsi_list_info =
-			fm_list_itr->vsi_list_info;
-
-		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
-		if (status)
-			return status;
-
-		if (conv_list) {
-			u16 rem_vsi_id;
-
-			rem_vsi_id = find_first_bit(vsi_list_info->vsi_map,
-						    ICE_MAX_VSI);
-
-			/* Error out when the expected last element is not in
-			 * the VSI list map
-			 */
-			if (rem_vsi_id == ICE_MAX_VSI)
-				return ICE_ERR_OUT_OF_RANGE;
-
-			/* Change the list entry action from VSI_LIST to VSI */
-			fm_list_itr->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-			fm_list_itr->fltr_info.fwd_id.vsi_id = rem_vsi_id;
-		}
-
-		list_del(&vsi_list_info->list_entry);
-		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
-		fm_list_itr->vsi_list_info = NULL;
-	}
-
-	if (conv_list) {
-		/* Convert the rule's forward action to forwarding packets to
-		 * a VSI
-		 */
-		struct ice_aqc_sw_rules_elem *s_rule;
-
-		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
-				      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE,
-				      GFP_KERNEL);
-		if (!s_rule)
-			return ICE_ERR_NO_MEMORY;
-
-		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
-				 ice_aqc_opc_update_sw_rules);
-
-		s_rule->pdata.lkup_tx_rx.index =
-			cpu_to_le16(fm_list_itr->fltr_info.fltr_rule_id);
-
-		status = ice_aq_sw_rules(hw, s_rule,
-					 ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
-					 ice_aqc_opc_update_sw_rules, NULL);
-		devm_kfree(ice_hw_to_dev(hw), s_rule);
-		if (status)
-			return status;
-	}
-
-	if (is_last_elem) {
-		/* Remove the lookup rule */
-		struct ice_aqc_sw_rules_elem *s_rule;
-
-		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
-				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE,
-				      GFP_KERNEL);
-		if (!s_rule)
-			return ICE_ERR_NO_MEMORY;
-
-		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
-				 ice_aqc_opc_remove_sw_rules);
-
-		status = ice_aq_sw_rules(hw, s_rule,
-					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
-					 ice_aqc_opc_remove_sw_rules, NULL);
-		if (status)
-			return status;
-
-		/* Remove a book keeping entry from the MAC address list */
-		mutex_lock(&sw->mac_list_lock);
-		list_del(&fm_list_itr->list_entry);
-		mutex_unlock(&sw->mac_list_lock);
-		devm_kfree(ice_hw_to_dev(hw), fm_list_itr);
-		devm_kfree(ice_hw_to_dev(hw), s_rule);
-	}
-	return status;
-}
-
-/**
- * ice_remove_mac_entry
- * @hw: pointer to the hardware structure
- * @f_entry: structure containing MAC forwarding information
- */
-static enum ice_status
-ice_remove_mac_entry(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
-{
-	struct ice_fltr_mgmt_list_entry *m_entry;
-	u16 vsi_id;
-	u8 *add;
-
-	add = &f_entry->fltr_info.l_data.mac.mac_addr[0];
-
-	m_entry = ice_find_mac_entry(hw, add);
-	if (!m_entry)
-		return ICE_ERR_PARAM;
-
-	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
-	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, m_entry);
-}
-
-/**
- * ice_remove_mac - remove a MAC address based filter rule
- * @hw: pointer to the hardware structure
- * @m_list: list of MAC addresses and forwarding information
+ * @em_list: list of ether type MAC filter, MAC is optional
  *
- * This function removes either a MAC filter rule or a specific VSI from a
- * VSI list for a multicast MAC address.
- *
- * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by
- * ice_add_mac. Caller should be aware that this call will only work if all
- * the entries passed into m_list were added previously. It will not attempt to
- * do a partial remove of entries that were found.
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
  */
 enum ice_status
-ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
 {
-	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
-	u8 s_rule_size = ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
-	struct ice_switch_info *sw = hw->switch_info;
-	struct ice_fltr_mgmt_list_entry *m_entry;
-	struct ice_fltr_list_entry *m_list_itr;
-	u16 elem_sent, total_elem_left;
-	enum ice_status status = 0;
-	u16 num_unicast = 0;
+	struct ice_fltr_list_entry *em_list_itr;
 
-	if (!m_list)
+	if (!em_list || !hw)
 		return ICE_ERR_PARAM;
 
-	list_for_each_entry(m_list_itr, m_list, list_entry) {
-		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+	list_for_each_entry(em_list_itr, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
 
-		if (is_unicast_ether_addr(addr) && !hw->ucast_shared)
-			num_unicast++;
-		else if (is_multicast_ether_addr(addr) ||
-			 (is_unicast_ether_addr(addr) && hw->ucast_shared))
-			ice_remove_mac_entry(hw, m_list_itr);
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return ICE_ERR_PARAM;
+
+		em_list_itr->status = ice_add_rule_internal(hw, l_type,
+							    em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
 	}
-
-	/* Exit if no unicast addresses found. Multicast switch rules
-	 * were added individually
-	 */
-	if (!num_unicast)
-		return 0;
-
-	/* Allocate switch rule buffer for the bulk update for unicast */
-	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
-			      GFP_KERNEL);
-	if (!s_rule)
-		return ICE_ERR_NO_MEMORY;
-
-	r_iter = s_rule;
-	list_for_each_entry(m_list_itr, m_list, list_entry) {
-		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
-
-		if (is_unicast_ether_addr(addr)) {
-			m_entry = ice_find_mac_entry(hw, addr);
-			if (!m_entry) {
-				status = ICE_ERR_DOES_NOT_EXIST;
-				goto ice_remove_mac_exit;
-			}
-
-			ice_fill_sw_rule(hw, &m_entry->fltr_info,
-					 r_iter, ice_aqc_opc_remove_sw_rules);
-			r_iter = (struct ice_aqc_sw_rules_elem *)
-				((u8 *)r_iter + s_rule_size);
-		}
-	}
-
-	/* Call AQ bulk switch rule update for all unicast addresses */
-	r_iter = s_rule;
-	/* Call AQ switch rule in AQ_MAX chunk */
-	for (total_elem_left = num_unicast; total_elem_left > 0;
-	     total_elem_left -= elem_sent) {
-		struct ice_aqc_sw_rules_elem *entry = r_iter;
-
-		elem_sent = min(total_elem_left,
-				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
-		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
-					 elem_sent, ice_aqc_opc_remove_sw_rules,
-					 NULL);
-		if (status)
-			break;
-		r_iter = (struct ice_aqc_sw_rules_elem *)
-			((u8 *)r_iter + s_rule_size);
-	}
-
-	list_for_each_entry(m_list_itr, m_list, list_entry) {
-		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
-
-		if (is_unicast_ether_addr(addr)) {
-			m_entry = ice_find_mac_entry(hw, addr);
-			if (!m_entry)
-				return ICE_ERR_OUT_OF_RANGE;
-			mutex_lock(&sw->mac_list_lock);
-			list_del(&m_entry->list_entry);
-			mutex_unlock(&sw->mac_list_lock);
-			devm_kfree(ice_hw_to_dev(hw), m_entry);
-		}
-	}
-
-ice_remove_mac_exit:
-	devm_kfree(ice_hw_to_dev(hw), s_rule);
-	return status;
+	return 0;
 }
 
 /**
- * ice_cfg_dflt_vsi - add filter rule to set/unset given VSI as default
- * VSI for the switch (represented by swid)
+ * ice_remove_eth_mac - Remove an ethertype (or MAC) based filter rule
  * @hw: pointer to the hardware structure
- * @vsi_id: number of VSI to set as default
+ * @em_list: list of ethertype or ethertype MAC entries
+ */
+enum ice_status
+ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+	struct ice_fltr_list_entry *em_list_itr, *tmp;
+
+	if (!em_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry_safe(em_list_itr, tmp, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return ICE_ERR_PARAM;
+
+		em_list_itr->status = ice_remove_rule_internal(hw, l_type,
+							       em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_rem_sw_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+	if (!list_empty(rule_head)) {
+		struct ice_fltr_mgmt_list_entry *entry;
+		struct ice_fltr_mgmt_list_entry *tmp;
+
+		list_for_each_entry_safe(entry, tmp, rule_head, list_entry) {
+			list_del(&entry->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), entry);
+		}
+	}
+}
+
+/**
+ * ice_cfg_dflt_vsi - change state of VSI to set/clear default
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to set as default
  * @set: true to add the above mentioned switch rule, false to remove it
  * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ *
+ * add filter rule to set/unset given VSI as default VSI for the switch
+ * (represented by swid)
  */
 enum ice_status
-ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction)
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction)
 {
 	struct ice_aqc_sw_rules_elem *s_rule;
 	struct ice_fltr_info f_info;
 	enum ice_adminq_opc opcode;
 	enum ice_status status;
 	u16 s_rule_size;
+	u16 hw_vsi_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
 
 	s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE :
 			    ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
@@ -1654,15 +2086,17 @@
 	f_info.lkup_type = ICE_SW_LKUP_DFLT;
 	f_info.flag = direction;
 	f_info.fltr_act = ICE_FWD_TO_VSI;
-	f_info.fwd_id.vsi_id = vsi_id;
+	f_info.fwd_id.hw_vsi_id = hw_vsi_id;
 
 	if (f_info.flag & ICE_FLTR_RX) {
 		f_info.src = hw->port_info->lport;
+		f_info.src_id = ICE_SRC_ID_LPORT;
 		if (!set)
 			f_info.fltr_rule_id =
 				hw->port_info->dflt_rx_vsi_rule_id;
 	} else if (f_info.flag & ICE_FLTR_TX) {
-		f_info.src = vsi_id;
+		f_info.src_id = ICE_SRC_ID_VSI;
+		f_info.src = hw_vsi_id;
 		if (!set)
 			f_info.fltr_rule_id =
 				hw->port_info->dflt_tx_vsi_rule_id;
@@ -1682,10 +2116,10 @@
 		u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
 
 		if (f_info.flag & ICE_FLTR_TX) {
-			hw->port_info->dflt_tx_vsi_num = vsi_id;
+			hw->port_info->dflt_tx_vsi_num = hw_vsi_id;
 			hw->port_info->dflt_tx_vsi_rule_id = index;
 		} else if (f_info.flag & ICE_FLTR_RX) {
-			hw->port_info->dflt_rx_vsi_num = vsi_id;
+			hw->port_info->dflt_rx_vsi_num = hw_vsi_id;
 			hw->port_info->dflt_rx_vsi_rule_id = index;
 		}
 	} else {
@@ -1704,26 +2138,94 @@
 }
 
 /**
- * ice_remove_vlan_internal - Remove one VLAN based filter rule
+ * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry
  * @hw: pointer to the hardware structure
- * @f_entry: filter entry containing one VLAN information
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
+ *
+ * Helper function to search for a unicast rule entry - this is to be used
+ * to remove unicast MAC filter that is not shared with other VSIs on the
+ * PF switch.
+ *
+ * Returns pointer to entry storing the rule if found
  */
-static enum ice_status
-ice_remove_vlan_internal(struct ice_hw *hw,
-			 struct ice_fltr_list_entry *f_entry)
+static struct ice_fltr_mgmt_list_entry *
+ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id,
+			  struct ice_fltr_info *f_info)
 {
-	struct ice_fltr_info *new_fltr;
-	struct ice_fltr_mgmt_list_entry *v_list_elem;
-	u16 vsi_id;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
 
-	new_fltr = &f_entry->fltr_info;
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+			    sizeof(f_info->l_data)) &&
+		    f_info->fwd_id.hw_vsi_id ==
+		    list_itr->fltr_info.fwd_id.hw_vsi_id &&
+		    f_info->flag == list_itr->fltr_info.flag)
+			return list_itr;
+	}
+	return NULL;
+}
 
-	v_list_elem = ice_find_vlan_entry(hw, new_fltr->l_data.vlan.vlan_id);
-	if (!v_list_elem)
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by
+ * ice_add_mac. Caller should be aware that this call will only work if all
+ * the entries passed into m_list were added previously. It will not attempt to
+ * do a partial remove of entries that were found.
+ */
+enum ice_status
+ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_fltr_list_entry *list_itr, *tmp;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+
+	if (!m_list)
 		return ICE_ERR_PARAM;
 
-	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
-	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, v_list_elem);
+	rule_lock = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) {
+		enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type;
+		u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0];
+		u16 vsi_handle;
+
+		if (l_type != ICE_SW_LKUP_MAC)
+			return ICE_ERR_PARAM;
+
+		vsi_handle = list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
+			return ICE_ERR_PARAM;
+
+		list_itr->fltr_info.fwd_id.hw_vsi_id =
+					ice_get_hw_vsi_num(hw, vsi_handle);
+		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
+			/* Don't remove the unicast address that belongs to
+			 * another VSI on the switch, since it is not being
+			 * shared...
+			 */
+			mutex_lock(rule_lock);
+			if (!ice_find_ucast_rule_entry(hw, ICE_SW_LKUP_MAC,
+						       &list_itr->fltr_info)) {
+				mutex_unlock(rule_lock);
+				return ICE_ERR_DOES_NOT_EXIST;
+			}
+			mutex_unlock(rule_lock);
+		}
+		list_itr->status = ice_remove_rule_internal(hw,
+							    ICE_SW_LKUP_MAC,
+							    list_itr);
+		if (list_itr->status)
+			return list_itr->status;
+	}
+	return 0;
 }
 
 /**
@@ -1734,131 +2236,456 @@
 enum ice_status
 ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
 {
-	struct ice_fltr_list_entry *v_list_itr;
-	enum ice_status status = 0;
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
 
 	if (!v_list || !hw)
 		return ICE_ERR_PARAM;
 
-	list_for_each_entry(v_list_itr, v_list, list_entry) {
-		status = ice_remove_vlan_internal(hw, v_list_itr);
-		if (status) {
-			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
-			return status;
-		}
-		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		enum ice_sw_lkup_type l_type = v_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_VLAN)
+			return ICE_ERR_PARAM;
+		v_list_itr->status = ice_remove_rule_internal(hw,
+							      ICE_SW_LKUP_VLAN,
+							      v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
 	}
-	return status;
+	return 0;
+}
+
+/**
+ * ice_vsi_uses_fltr - Determine if given VSI uses specified filter
+ * @fm_entry: filter entry to inspect
+ * @vsi_handle: VSI handle to compare with filter info
+ */
+static bool
+ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle)
+{
+	return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI &&
+		 fm_entry->fltr_info.vsi_handle == vsi_handle) ||
+		(fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST &&
+		 (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map))));
+}
+
+/**
+ * ice_add_entry_to_vsi_fltr_list - Add copy of fltr_list_entry to remove list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @vsi_list_head: pointer to the list to add entry to
+ * @fi: pointer to fltr_info of filter entry to copy & add
+ *
+ * Helper function, used when creating a list of filters to remove from
+ * a specific VSI. The entry added to vsi_list_head is a COPY of the
+ * original filter entry, with the exception of fltr_info.fltr_act and
+ * fltr_info.fwd_id fields. These are set such that later logic can
+ * extract which VSI to remove the fltr from, and pass on that information.
+ */
+static enum ice_status
+ice_add_entry_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+			       struct list_head *vsi_list_head,
+			       struct ice_fltr_info *fi)
+{
+	struct ice_fltr_list_entry *tmp;
+
+	/* this memory is freed up in the caller function
+	 * once filters for this VSI are removed
+	 */
+	tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return ICE_ERR_NO_MEMORY;
+
+	tmp->fltr_info = *fi;
+
+	/* Overwrite these fields to indicate which VSI to remove filter from,
+	 * so find and remove logic can extract the information from the
+	 * list entries. Note that original entries will still have proper
+	 * values.
+	 */
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.vsi_handle = vsi_handle;
+	tmp->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_add(&tmp->list_entry, vsi_list_head);
+
+	return 0;
 }
 
 /**
  * ice_add_to_vsi_fltr_list - Add VSI filters to the list
  * @hw: pointer to the hardware structure
- * @vsi_id: ID of VSI to remove filters from
+ * @vsi_handle: VSI handle to remove filters from
  * @lkup_list_head: pointer to the list that has certain lookup type filters
- * @vsi_list_head: pointer to the list pertaining to VSI with vsi_id
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_handle
+ *
+ * Locates all filters in lkup_list_head that are used by the given VSI,
+ * and adds COPIES of those entries to vsi_list_head (intended to be used
+ * to remove the listed filters).
+ * Note that this means all entries in vsi_list_head must be explicitly
+ * deallocated by the caller when done with list.
  */
 static enum ice_status
-ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_id,
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
 			 struct list_head *lkup_list_head,
 			 struct list_head *vsi_list_head)
 {
 	struct ice_fltr_mgmt_list_entry *fm_entry;
+	enum ice_status status = 0;
 
-	/* check to make sure VSI id is valid and within boundary */
-	if (vsi_id >=
-	    (sizeof(fm_entry->vsi_list_info->vsi_map) * BITS_PER_BYTE - 1))
+	/* check to make sure VSI ID is valid and within boundary */
+	if (!ice_is_vsi_valid(hw, vsi_handle))
 		return ICE_ERR_PARAM;
 
 	list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
 		struct ice_fltr_info *fi;
 
 		fi = &fm_entry->fltr_info;
-		if ((fi->fltr_act == ICE_FWD_TO_VSI &&
-		     fi->fwd_id.vsi_id == vsi_id) ||
-		    (fi->fltr_act == ICE_FWD_TO_VSI_LIST &&
-		     (test_bit(vsi_id, fm_entry->vsi_list_info->vsi_map)))) {
-			struct ice_fltr_list_entry *tmp;
+		if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle))
+			continue;
 
-			/* this memory is freed up in the caller function
-			 * ice_remove_vsi_lkup_fltr() once filters for
-			 * this VSI are removed
-			 */
-			tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp),
-					   GFP_KERNEL);
-			if (!tmp)
-				return ICE_ERR_NO_MEMORY;
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							vsi_list_head, fi);
+		if (status)
+			return status;
+	}
+	return status;
+}
 
-			memcpy(&tmp->fltr_info, fi, sizeof(*fi));
+/**
+ * ice_determine_promisc_mask
+ * @fi: filter info to parse
+ *
+ * Helper function to determine which ICE_PROMISC_ mask corresponds
+ * to given filter into.
+ */
+static u8 ice_determine_promisc_mask(struct ice_fltr_info *fi)
+{
+	u16 vid = fi->l_data.mac_vlan.vlan_id;
+	u8 *macaddr = fi->l_data.mac.mac_addr;
+	bool is_tx_fltr = false;
+	u8 promisc_mask = 0;
 
-			/* Expected below fields to be set to ICE_FWD_TO_VSI and
-			 * the particular VSI id since we are only removing this
-			 * one VSI
-			 */
-			if (fi->fltr_act == ICE_FWD_TO_VSI_LIST) {
-				tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-				tmp->fltr_info.fwd_id.vsi_id = vsi_id;
-			}
+	if (fi->flag == ICE_FLTR_TX)
+		is_tx_fltr = true;
 
-			list_add(&tmp->list_entry, vsi_list_head);
-		}
+	if (is_broadcast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX;
+	else if (is_multicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX;
+	else if (is_unicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_UCAST_TX : ICE_PROMISC_UCAST_RX;
+	if (vid)
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX;
+
+	return promisc_mask;
+}
+
+/**
+ * ice_remove_promisc - Remove promisc based filter rules
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @v_list: list of promisc entries
+ */
+static enum ice_status
+ice_remove_promisc(struct ice_hw *hw, u8 recp_id,
+		   struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		v_list_itr->status =
+			ice_remove_rule_internal(hw, recp_id, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
 	}
 	return 0;
 }
 
 /**
+ * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+enum ice_status
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry, *tmp;
+	struct list_head remove_list_head;
+	struct ice_fltr_mgmt_list_entry *itr;
+	struct list_head *rule_head;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
+	enum ice_status status = 0;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	if (vid)
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	else
+		recipe_id = ICE_SW_LKUP_PROMISC;
+
+	rule_head = &sw->recp_list[recipe_id].filt_rules;
+	rule_lock = &sw->recp_list[recipe_id].filt_rule_lock;
+
+	INIT_LIST_HEAD(&remove_list_head);
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(itr, rule_head, list_entry) {
+		u8 fltr_promisc_mask = 0;
+
+		if (!ice_vsi_uses_fltr(itr, vsi_handle))
+			continue;
+
+		fltr_promisc_mask |=
+			ice_determine_promisc_mask(&itr->fltr_info);
+
+		/* Skip if filter is not completely specified by given mask */
+		if (fltr_promisc_mask & ~promisc_mask)
+			continue;
+
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							&remove_list_head,
+							&itr->fltr_info);
+		if (status) {
+			mutex_unlock(rule_lock);
+			goto free_fltr_list;
+		}
+	}
+	mutex_unlock(rule_lock);
+
+	status = ice_remove_promisc(hw, recipe_id, &remove_list_head);
+
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+
+	return status;
+}
+
+/**
+ * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+enum ice_status
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid)
+{
+	enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR };
+	struct ice_fltr_list_entry f_list_entry;
+	struct ice_fltr_info new_fltr;
+	enum ice_status status = 0;
+	bool is_tx_fltr;
+	u16 hw_vsi_id;
+	int pkt_type;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	memset(&new_fltr, 0, sizeof(new_fltr));
+
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN;
+		new_fltr.l_data.mac_vlan.vlan_id = vid;
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	} else {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC;
+		recipe_id = ICE_SW_LKUP_PROMISC;
+	}
+
+	/* Separate filters must be set for each direction/packet type
+	 * combination, so we will loop over the mask value, store the
+	 * individual type, and clear it out in the input mask as it
+	 * is found.
+	 */
+	while (promisc_mask) {
+		u8 *mac_addr;
+
+		pkt_type = 0;
+		is_tx_fltr = false;
+
+		if (promisc_mask & ICE_PROMISC_UCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_RX;
+			pkt_type = UCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_UCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_TX;
+			pkt_type = UCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_RX;
+			pkt_type = MCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_TX;
+			pkt_type = MCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_RX;
+			pkt_type = BCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_TX;
+			pkt_type = BCAST_FLTR;
+			is_tx_fltr = true;
+		}
+
+		/* Check for VLAN promiscuous flag */
+		if (promisc_mask & ICE_PROMISC_VLAN_RX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_RX;
+		} else if (promisc_mask & ICE_PROMISC_VLAN_TX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_TX;
+			is_tx_fltr = true;
+		}
+
+		/* Set filter DA based on packet type */
+		mac_addr = new_fltr.l_data.mac.mac_addr;
+		if (pkt_type == BCAST_FLTR) {
+			eth_broadcast_addr(mac_addr);
+		} else if (pkt_type == MCAST_FLTR ||
+			   pkt_type == UCAST_FLTR) {
+			/* Use the dummy ether header DA */
+			ether_addr_copy(mac_addr, dummy_eth_header);
+			if (pkt_type == MCAST_FLTR)
+				mac_addr[0] |= 0x1;	/* Set multicast bit */
+		}
+
+		/* Need to reset this to zero for all iterations */
+		new_fltr.flag = 0;
+		if (is_tx_fltr) {
+			new_fltr.flag |= ICE_FLTR_TX;
+			new_fltr.src = hw_vsi_id;
+		} else {
+			new_fltr.flag |= ICE_FLTR_RX;
+			new_fltr.src = hw->port_info->lport;
+		}
+
+		new_fltr.fltr_act = ICE_FWD_TO_VSI;
+		new_fltr.vsi_handle = vsi_handle;
+		new_fltr.fwd_id.hw_vsi_id = hw_vsi_id;
+		f_list_entry.fltr_info = new_fltr;
+
+		status = ice_add_rule_internal(hw, recipe_id, &f_list_entry);
+		if (status)
+			goto set_promisc_exit;
+	}
+
+set_promisc_exit:
+	return status;
+}
+
+/**
+ * ice_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @rm_vlan_promisc: Clear VLANs VSI promisc mode
+ *
+ * Configure VSI with all associated VLANs to given promiscuous mode(s)
+ */
+enum ice_status
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *list_itr, *tmp;
+	struct list_head vsi_list_head;
+	struct list_head *vlan_head;
+	struct mutex *vlan_lock; /* Lock to protect filter rule list */
+	enum ice_status status;
+	u16 vlan_id;
+
+	INIT_LIST_HEAD(&vsi_list_head);
+	vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+	mutex_lock(vlan_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head,
+					  &vsi_list_head);
+	mutex_unlock(vlan_lock);
+	if (status)
+		goto free_fltr_list;
+
+	list_for_each_entry(list_itr, &vsi_list_head, list_entry) {
+		vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id;
+		if (rm_vlan_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi_handle,
+						       promisc_mask, vlan_id);
+		else
+			status = ice_set_vsi_promisc(hw, vsi_handle,
+						     promisc_mask, vlan_id);
+		if (status)
+			break;
+	}
+
+free_fltr_list:
+	list_for_each_entry_safe(list_itr, tmp, &vsi_list_head, list_entry) {
+		list_del(&list_itr->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_itr);
+	}
+	return status;
+}
+
+/**
  * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
  * @hw: pointer to the hardware structure
- * @vsi_id: ID of VSI to remove filters from
+ * @vsi_handle: VSI handle to remove filters from
  * @lkup: switch rule filter lookup type
  */
 static void
-ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id,
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle,
 			 enum ice_sw_lkup_type lkup)
 {
 	struct ice_switch_info *sw = hw->switch_info;
 	struct ice_fltr_list_entry *fm_entry;
 	struct list_head remove_list_head;
+	struct list_head *rule_head;
 	struct ice_fltr_list_entry *tmp;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
 	enum ice_status status;
 
 	INIT_LIST_HEAD(&remove_list_head);
+	rule_lock = &sw->recp_list[lkup].filt_rule_lock;
+	rule_head = &sw->recp_list[lkup].filt_rules;
+	mutex_lock(rule_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, rule_head,
+					  &remove_list_head);
+	mutex_unlock(rule_lock);
+	if (status)
+		return;
+
 	switch (lkup) {
 	case ICE_SW_LKUP_MAC:
-		mutex_lock(&sw->mac_list_lock);
-		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
-						  &sw->mac_list_head,
-						  &remove_list_head);
-		mutex_unlock(&sw->mac_list_lock);
-		if (!status) {
-			ice_remove_mac(hw, &remove_list_head);
-			goto free_fltr_list;
-		}
+		ice_remove_mac(hw, &remove_list_head);
 		break;
 	case ICE_SW_LKUP_VLAN:
-		mutex_lock(&sw->vlan_list_lock);
-		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
-						  &sw->vlan_list_head,
-						  &remove_list_head);
-		mutex_unlock(&sw->vlan_list_lock);
-		if (!status) {
-			ice_remove_vlan(hw, &remove_list_head);
-			goto free_fltr_list;
-		}
+		ice_remove_vlan(hw, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_PROMISC:
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		ice_remove_promisc(hw, lkup, &remove_list_head);
 		break;
 	case ICE_SW_LKUP_MAC_VLAN:
 	case ICE_SW_LKUP_ETHERTYPE:
 	case ICE_SW_LKUP_ETHERTYPE_MAC:
-	case ICE_SW_LKUP_PROMISC:
-	case ICE_SW_LKUP_PROMISC_VLAN:
 	case ICE_SW_LKUP_DFLT:
-		ice_debug(hw, ICE_DBG_SW,
-			  "Remove filters for this lookup type hasn't been implemented yet\n");
+	case ICE_SW_LKUP_LAST:
+	default:
+		ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type %d\n", lkup);
 		break;
 	}
 
-	return;
-free_fltr_list:
 	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
 		list_del(&fm_entry->list_entry);
 		devm_kfree(ice_hw_to_dev(hw), fm_entry);
@@ -1868,16 +2695,121 @@
 /**
  * ice_remove_vsi_fltr - Remove all filters for a VSI
  * @hw: pointer to the hardware structure
- * @vsi_id: ID of VSI to remove filters from
+ * @vsi_handle: VSI handle to remove filters from
  */
-void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id)
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle)
 {
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC_VLAN);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_VLAN);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_DFLT);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE_MAC);
-	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_DFLT);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC_VLAN);
+}
+
+/**
+ * ice_replay_vsi_fltr - Replay filters for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @recp_id: Recipe ID for which rules need to be replayed
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replays the filter of recipe recp_id for a VSI represented via vsi_handle.
+ * It is required to pass valid VSI handle.
+ */
+static enum ice_status
+ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+		    struct list_head *list_head)
+{
+	struct ice_fltr_mgmt_list_entry *itr;
+	enum ice_status status = 0;
+	u16 hw_vsi_id;
+
+	if (list_empty(list_head))
+		return status;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_for_each_entry(itr, list_head, list_entry) {
+		struct ice_fltr_list_entry f_entry;
+
+		f_entry.fltr_info = itr->fltr_info;
+		if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN &&
+		    itr->fltr_info.vsi_handle == vsi_handle) {
+			/* update the src in case it is VSI num */
+			if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+				f_entry.fltr_info.src = hw_vsi_id;
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+			if (status)
+				goto end;
+			continue;
+		}
+		if (!itr->vsi_list_info ||
+		    !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+			continue;
+		/* Clearing it so that the logic can add it back */
+		clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+		f_entry.fltr_info.vsi_handle = vsi_handle;
+		f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		/* update the src in case it is VSI num */
+		if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+			f_entry.fltr_info.src = hw_vsi_id;
+		if (recp_id == ICE_SW_LKUP_VLAN)
+			status = ice_add_vlan_internal(hw, &f_entry);
+		else
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+		if (status)
+			goto end;
+	}
+end:
+	return status;
+}
+
+/**
+ * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ *
+ * Replays filters for requested VSI via vsi_handle.
+ */
+enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	enum ice_status status = 0;
+	u8 i;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		struct list_head *head;
+
+		head = &sw->recp_list[i].filt_replay_rules;
+		status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+		if (status)
+			return status;
+	}
+	return status;
+}
+
+/**
+ * ice_rm_all_sw_replay_rule_info - deletes filter replay rules
+ * @hw: pointer to the HW struct
+ *
+ * Deletes the filter replay rules.
+ */
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	if (!sw)
+		return;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		if (!list_empty(&sw->recp_list[i].filt_replay_rules)) {
+			struct list_head *l_head;
+
+			l_head = &sw->recp_list[i].filt_replay_rules;
+			ice_rem_sw_rule_info(hw, l_head);
+		}
+	}
 }

diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 9b8ec12..cb123fb 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h

@@ -8,7 +8,16 @@
 
 #define ICE_SW_CFG_MAX_BUF_LEN 2048
 #define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
 #define ICE_VSI_INVAL_ID 0xffff
+#define ICE_INVAL_Q_HANDLE 0xFFFF
+
+/* VSI queue context structure */
+struct ice_q_ctx {
+	u16  q_handle;
+};
 
 /* VSI context structure for add/get/update/free operations */
 struct ice_vsi_ctx {
@@ -17,7 +26,11 @@
 	u16 vsis_unallocated;
 	u16 flags;
 	struct ice_aqc_vsi_props info;
+	struct ice_sched_vsi_info sched;
 	u8 alloc_from_pool;
+	u8 vf_num;
+	u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
 };
 
 enum ice_sw_fwd_act_type {
@@ -39,6 +52,15 @@
 	ICE_SW_LKUP_DFLT = 5,
 	ICE_SW_LKUP_ETHERTYPE_MAC = 8,
 	ICE_SW_LKUP_PROMISC_VLAN = 9,
+	ICE_SW_LKUP_LAST
+};
+
+/* type of filter src ID */
+enum ice_src_id {
+	ICE_SRC_ID_UNKNOWN = 0,
+	ICE_SRC_ID_VSI,
+	ICE_SRC_ID_QUEUE,
+	ICE_SRC_ID_LPORT,
 };
 
 struct ice_fltr_info {
@@ -49,12 +71,10 @@
 	/* rule ID returned by firmware once filter rule is created */
 	u16 fltr_rule_id;
 	u16 flag;
-#define ICE_FLTR_RX		BIT(0)
-#define ICE_FLTR_TX		BIT(1)
-#define ICE_FLTR_TX_RX		(ICE_FLTR_RX | ICE_FLTR_TX)
 
 	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
 	u16 src;
+	enum ice_src_id src_id;
 
 	union {
 		struct {
@@ -76,20 +96,27 @@
 			u16 ethertype;
 			u8 mac_addr[ETH_ALEN]; /* optional */
 		} ethertype_mac;
-	} l_data;
+	} l_data; /* Make sure to zero out the memory of l_data before using
+		   * it or only set the data associated with lookup match
+		   * rest everything should be zero
+		   */
 
 	/* Depending on filter action */
 	union {
-		/* queue id in case of ICE_FWD_TO_Q and starting
-		 * queue id in case of ICE_FWD_TO_QGRP.
+		/* queue ID in case of ICE_FWD_TO_Q and starting
+		 * queue ID in case of ICE_FWD_TO_QGRP.
 		 */
 		u16 q_id:11;
-		u16 vsi_id:10;
+		u16 hw_vsi_id:10;
 		u16 vsi_list_id:10;
 	} fwd_id;
 
+	/* Sw VSI handle */
+	u16 vsi_handle;
+
 	/* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
-	 * determines the range of queues the packet needs to be forwarded to
+	 * determines the range of queues the packet needs to be forwarded to.
+	 * Note that qgrp_size must be set to a power of 2.
 	 */
 	u8 qgrp_size;
 
@@ -98,32 +125,55 @@
 	u8 lan_en;	/* Indicate if packet can be forwarded to the uplink */
 };
 
-/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */
+struct ice_sw_recipe {
+	struct list_head l_entry;
+
+	/* To protect modification of filt_rule list
+	 * defined below
+	 */
+	struct mutex filt_rule_lock;
+
+	/* List of type ice_fltr_mgmt_list_entry */
+	struct list_head filt_rules;
+	struct list_head filt_replay_rules;
+
+	/* linked list of type recipe_list_entry */
+	struct list_head rg_list;
+	/* linked list of type ice_sw_fv_list_entry*/
+	struct list_head fv_list;
+	struct ice_aqc_recipe_data_elem *r_buf;
+	u8 recp_count;
+	u8 root_rid;
+	u8 num_profs;
+	u8 *prof_ids;
+
+	/* recipe bitmap: what all recipes makes this recipe */
+	DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */
 struct ice_vsi_list_map_info {
 	struct list_head list_entry;
 	DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
 	u16 vsi_list_id;
-};
-
-enum ice_sw_fltr_status {
-	ICE_FLTR_STATUS_NEW = 0,
-	ICE_FLTR_STATUS_FW_SUCCESS,
-	ICE_FLTR_STATUS_FW_FAIL,
+	/* counter to track how many rules are reusing this VSI list */
+	u16 ref_cnt;
 };
 
 struct ice_fltr_list_entry {
 	struct list_head list_entry;
-	enum ice_sw_fltr_status status;
+	enum ice_status status;
 	struct ice_fltr_info fltr_info;
 };
 
 /* This defines an entry in the list that maintains MAC or VLAN membership
  * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
  * VLAN. As an optimization the VSI list should be created only when a
- * second VSI becomes a subscriber to the VLAN address.
+ * second VSI becomes a subscriber to the same MAC address. VSI lists are always
+ * used for VLAN membership.
  */
 struct ice_fltr_mgmt_list_entry {
-	/* back pointer to VSI list id to VSI list mapping */
+	/* back pointer to VSI list ID to VSI list mapping */
 	struct ice_vsi_list_map_info *vsi_list_info;
 	u16 vsi_count;
 #define ICE_INVAL_LG_ACT_INDEX 0xffff
@@ -136,26 +186,64 @@
 	u8 counter_index;
 };
 
+enum ice_promisc_flags {
+	ICE_PROMISC_UCAST_RX = 0x1,
+	ICE_PROMISC_UCAST_TX = 0x2,
+	ICE_PROMISC_MCAST_RX = 0x4,
+	ICE_PROMISC_MCAST_TX = 0x8,
+	ICE_PROMISC_BCAST_RX = 0x10,
+	ICE_PROMISC_BCAST_TX = 0x20,
+	ICE_PROMISC_VLAN_RX = 0x40,
+	ICE_PROMISC_VLAN_TX = 0x80,
+};
+
 /* VSI related commands */
 enum ice_status
-ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd);
+enum ice_status
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd);
+enum ice_status
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
 	       struct ice_sq_cd *cd);
-enum ice_status
-ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
-		  struct ice_sq_cd *cd);
-enum ice_status
-ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
-		bool keep_vsi_alloc, struct ice_sq_cd *cd);
-
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle);
+void ice_clear_all_vsi_ctx(struct ice_hw *hw);
+/* Switch config */
 enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 
 /* Switch/bridge related commands */
+enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw);
 enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
 enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
-void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id);
-enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
-enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
 enum ice_status
-ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction);
+ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+enum ice_status
+ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+
+/* Promisc/defport setup for VSIs */
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction);
+enum ice_status
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		    u16 vid);
+enum ice_status
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid);
+enum ice_status
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc);
+
+enum ice_status ice_init_def_sw_recp(struct ice_hw *hw);
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle);
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+
+enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle);
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);
 
 #endif /* _ICE_SWITCH_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 0c95c8f..33dd103 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c

@@ -6,6 +6,7 @@
 #include <linux/prefetch.h>
 #include <linux/mm.h>
 #include "ice.h"
+#include "ice_dcb_lib.h"
 
 #define ICE_RX_HDR_SIZE		256
 
@@ -48,19 +49,17 @@
  */
 void ice_clean_tx_ring(struct ice_ring *tx_ring)
 {
-	unsigned long size;
 	u16 i;
 
 	/* ring already cleared, nothing to do */
 	if (!tx_ring->tx_buf)
 		return;
 
-	/* Free all the Tx ring sk_bufss */
+	/* Free all the Tx ring sk_buffs */
 	for (i = 0; i < tx_ring->count; i++)
 		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
-	size = sizeof(struct ice_tx_buf) * tx_ring->count;
-	memset(tx_ring->tx_buf, 0, size);
+	memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
 
 	/* Zero out the descriptor ring */
 	memset(tx_ring->desc, 0, tx_ring->size);
@@ -96,17 +95,16 @@
 
 /**
  * ice_clean_tx_irq - Reclaim resources after transmit completes
- * @vsi: the VSI we care about
  * @tx_ring: Tx ring to clean
  * @napi_budget: Used to determine if we are in netpoll
  *
  * Returns true if there's any budget left (e.g. the clean is finished)
  */
-static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring,
-			     int napi_budget)
+static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 {
 	unsigned int total_bytes = 0, total_pkts = 0;
-	unsigned int budget = vsi->work_lmt;
+	unsigned int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_vsi *vsi = tx_ring->vsi;
 	s16 i = tx_ring->next_to_clean;
 	struct ice_tx_desc *tx_desc;
 	struct ice_tx_buf *tx_buf;
@@ -115,6 +113,8 @@
 	tx_desc = ICE_TX_DESC(tx_ring, i);
 	i -= tx_ring->count;
 
+	prefetch(&vsi->state);
+
 	do {
 		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
 
@@ -207,7 +207,7 @@
 		smp_mb();
 		if (__netif_subqueue_stopped(tx_ring->netdev,
 					     tx_ring->q_index) &&
-		   !test_bit(__ICE_DOWN, vsi->state)) {
+		    !test_bit(__ICE_DOWN, vsi->state)) {
 			netif_wake_subqueue(tx_ring->netdev,
 					    tx_ring->q_index);
 			++tx_ring->tx_stats.restart_q;
@@ -219,28 +219,28 @@
 
 /**
  * ice_setup_tx_ring - Allocate the Tx descriptors
- * @tx_ring: the tx ring to set up
+ * @tx_ring: the Tx ring to set up
  *
  * Return 0 on success, negative on error
  */
 int ice_setup_tx_ring(struct ice_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
-	int bi_size;
 
 	if (!dev)
 		return -ENOMEM;
 
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(tx_ring->tx_buf);
-	bi_size = sizeof(struct ice_tx_buf) * tx_ring->count;
-	tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	tx_ring->tx_buf =
+		devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count,
+			     GFP_KERNEL);
 	if (!tx_ring->tx_buf)
 		return -ENOMEM;
 
-	/* round up to nearest 4K */
-	tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc);
-	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	/* round up to nearest page */
+	tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+			      PAGE_SIZE);
 	tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
 					    GFP_KERNEL);
 	if (!tx_ring->desc) {
@@ -251,6 +251,7 @@
 
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
+	tx_ring->tx_stats.prev_pkt = -1;
 	return 0;
 
 err:
@@ -266,7 +267,6 @@
 void ice_clean_rx_ring(struct ice_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	unsigned long size;
 	u16 i;
 
 	/* ring already cleared, nothing to do */
@@ -284,15 +284,23 @@
 		if (!rx_buf->page)
 			continue;
 
-		dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE);
-		__free_pages(rx_buf->page, 0);
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(dev, rx_buf->dma,
+					      rx_buf->page_offset,
+					      ICE_RXBUF_2048, DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 
 		rx_buf->page = NULL;
 		rx_buf->page_offset = 0;
 	}
 
-	size = sizeof(struct ice_rx_buf) * rx_ring->count;
-	memset(rx_ring->rx_buf, 0, size);
+	memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
 
 	/* Zero out the descriptor ring */
 	memset(rx_ring->desc, 0, rx_ring->size);
@@ -323,28 +331,28 @@
 
 /**
  * ice_setup_rx_ring - Allocate the Rx descriptors
- * @rx_ring: the rx ring to set up
+ * @rx_ring: the Rx ring to set up
  *
  * Return 0 on success, negative on error
  */
 int ice_setup_rx_ring(struct ice_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	int bi_size;
 
 	if (!dev)
 		return -ENOMEM;
 
 	/* warn if we are about to overwrite the pointer */
 	WARN_ON(rx_ring->rx_buf);
-	bi_size = sizeof(struct ice_rx_buf) * rx_ring->count;
-	rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	rx_ring->rx_buf =
+		devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count,
+			     GFP_KERNEL);
 	if (!rx_ring->rx_buf)
 		return -ENOMEM;
 
-	/* round up to nearest 4K */
-	rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc);
-	rx_ring->size = ALIGN(rx_ring->size, 4096);
+	/* round up to nearest page */
+	rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+			      PAGE_SIZE);
 	rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
 					    GFP_KERNEL);
 	if (!rx_ring->desc) {
@@ -370,18 +378,28 @@
  */
 static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
 {
+	u16 prev_ntu = rx_ring->next_to_use;
+
 	rx_ring->next_to_use = val;
 
 	/* update next to alloc since we have filled the ring */
 	rx_ring->next_to_alloc = val;
 
-	/* Force memory writes to complete before letting h/w
-	 * know there are new descriptors to fetch.  (Only
-	 * applicable for weak-ordered memory model archs,
-	 * such as IA-64).
+	/* QRX_TAIL will be updated with any tail value, but hardware ignores
+	 * the lower 3 bits. This makes it so we only bump tail on meaningful
+	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+	 * the budget depending on the current traffic load.
 	 */
-	wmb();
-	writel(val, rx_ring->tail);
+	val &= ~0x7;
+	if (prev_ntu != val) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch. (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(val, rx_ring->tail);
+	}
 }
 
 /**
@@ -392,8 +410,8 @@
  * Returns true if the page was successfully allocated or
  * reused.
  */
-static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
-				  struct ice_rx_buf *bi)
+static bool
+ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
 {
 	struct page *page = bi->page;
 	dma_addr_t dma;
@@ -412,7 +430,8 @@
 	}
 
 	/* map page for use */
-	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+				 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
@@ -426,6 +445,8 @@
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = 0;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 
 	return true;
 }
@@ -435,7 +456,13 @@
  * @rx_ring: ring to place buffers on
  * @cleaned_count: number of buffers to replace
  *
- * Returns false if all allocations were successful, true if any fail
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
  */
 bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
 {
@@ -447,13 +474,20 @@
 	if (!rx_ring->netdev || !cleaned_count)
 		return false;
 
-	/* get the RX descriptor and buffer based on next_to_use */
+	/* get the Rx descriptor and buffer based on next_to_use */
 	rx_desc = ICE_RX_DESC(rx_ring, ntu);
 	bi = &rx_ring->rx_buf[ntu];
 
 	do {
+		/* if we fail here, we have work remaining */
 		if (!ice_alloc_mapped_page(rx_ring, bi))
-			goto no_bufs;
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 ICE_RXBUF_2048,
+						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if buffer_addrs didn't change
 		 * because each write-back erases this info.
@@ -478,16 +512,7 @@
 	if (rx_ring->next_to_use != ntu)
 		ice_release_rx_desc(rx_ring, ntu);
 
-	return false;
-
-no_bufs:
-	if (rx_ring->next_to_use != ntu)
-		ice_release_rx_desc(rx_ring, ntu);
-
-	/* make sure to come back via polling to try again after
-	 * allocation failure
-	 */
-	return true;
+	return !!cleaned_count;
 }
 
 /**
@@ -500,61 +525,43 @@
 }
 
 /**
- * ice_add_rx_frag - Add contents of Rx buffer to sk_buff
- * @rx_buf: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
- * @skb: sk_buf to place the data into
+ * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
+ * @rx_buf: Rx buffer to adjust
+ * @size: Size of adjustment
  *
- * This function will add the data contained in rx_buf->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
+ * Update the offset within page so that Rx buf will be ready to be reused.
+ * For systems with PAGE_SIZE < 8192 this function will flip the page offset
+ * so the second half of page assigned to Rx buffer will be used, otherwise
+ * the offset is moved by the @size bytes
  */
-static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,
-			    union ice_32b_rx_flex_desc *rx_desc,
-			    struct sk_buff *skb)
+static void
+ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
 {
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = ICE_RXBUF_2048;
+	/* flip page offset to other buffer */
+	rx_buf->page_offset ^= size;
 #else
-	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-	unsigned int truesize;
-#endif /* PAGE_SIZE < 8192) */
+	/* move offset up to the next cache line */
+	rx_buf->page_offset += size;
+#endif
+}
 
-	struct page *page;
-	unsigned int size;
-
-	size = le16_to_cpu(rx_desc->wb.pkt_len) &
-		ICE_RX_FLX_DESC_PKT_LEN_M;
-
-	page = rx_buf->page;
-
+/**
+ * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
+ * @rx_buf: buffer containing the page
+ *
+ * If page is reusable, we have a green light for calling ice_reuse_rx_page,
+ * which will assign the current buffer to the buffer that next_to_alloc is
+ * pointing to; otherwise, the DMA mapping needs to be destroyed and
+ * page freed
+ */
+static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
+{
 #if (PAGE_SIZE >= 8192)
-	truesize = ALIGN(size, L1_CACHE_BYTES);
-#endif /* PAGE_SIZE >= 8192) */
-
-	/* will the data fit in the skb we allocated? if so, just
-	 * copy it as it is pretty small anyway
-	 */
-	if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
-		unsigned char *va = page_address(page) + rx_buf->page_offset;
-
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-		/* page is not reserved, we can reuse buffer as-is */
-		if (likely(!ice_page_is_reserved(page)))
-			return true;
-
-		/* this page cannot be reused so discard it */
-		__free_pages(page, 0);
-		return false;
-	}
-
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			rx_buf->page_offset, size, truesize);
+	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
+#endif
+	unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
+	struct page *page = rx_buf->page;
 
 	/* avoid re-using remote pages */
 	if (unlikely(ice_page_is_reserved(page)))
@@ -562,36 +569,63 @@
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely(page_count(page) != 1))
+	if (unlikely((page_count(page) - pagecnt_bias) > 1))
 		return false;
-
-	/* flip page offset to other buffer */
-	rx_buf->page_offset ^= truesize;
 #else
-	/* move offset up to the next cache line */
-	rx_buf->page_offset += truesize;
-
 	if (rx_buf->page_offset > last_offset)
 		return false;
 #endif /* PAGE_SIZE < 8192) */
 
-	/* Even if we own the page, we are not allowed to use atomic_set()
-	 * This would break get_page_unless_zero() users.
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
 	 */
-	get_page(rx_buf->page);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buf->pagecnt_bias = USHRT_MAX;
+	}
 
 	return true;
 }
 
 /**
+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_buf: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ * The function will then update the page offset.
+ */
+static void
+ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
+		unsigned int size)
+{
+#if (PAGE_SIZE >= 8192)
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#else
+	unsigned int truesize = ICE_RXBUF_2048;
+#endif
+
+	if (!size)
+		return;
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
+			rx_buf->page_offset, size, truesize);
+
+	/* page is being used so we must update the page offset */
+	ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+}
+
+/**
  * ice_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
+ * @rx_ring: Rx descriptor ring to store buffers on
  * @old_buf: donor buffer to have page reused
  *
  * Synchronizes page for reuse by the adapter
  */
-static void ice_reuse_rx_page(struct ice_ring *rx_ring,
-			      struct ice_rx_buf *old_buf)
+static void
+ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
 {
 	u16 nta = rx_ring->next_to_alloc;
 	struct ice_rx_buf *new_buf;
@@ -602,121 +636,137 @@
 	nta++;
 	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
-	/* transfer page from old buffer to new buffer */
-	*new_buf = *old_buf;
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls and unnecessary copy of skb.
+	 */
+	new_buf->dma = old_buf->dma;
+	new_buf->page = old_buf->page;
+	new_buf->page_offset = old_buf->page_offset;
+	new_buf->pagecnt_bias = old_buf->pagecnt_bias;
 }
 
 /**
- * ice_fetch_rx_buf - Allocate skb and populate it
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_desc: descriptor containing info written by hardware
+ * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @skb: skb to be used
+ * @size: size of buffer to add to skb
  *
- * This function allocates an skb on the fly, and populates it with the page
- * data from the current receive descriptor, taking care to set up the skb
- * correctly, as well as handling calling the page recycle function if
- * necessary.
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
  */
-static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring,
-					union ice_32b_rx_flex_desc *rx_desc)
+static struct ice_rx_buf *
+ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
+	       const unsigned int size)
 {
 	struct ice_rx_buf *rx_buf;
-	struct sk_buff *skb;
-	struct page *page;
 
 	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
-	page = rx_buf->page;
-	prefetchw(page);
+	prefetchw(rx_buf->page);
+	*skb = rx_buf->skb;
 
-	skb = rx_buf->skb;
+	if (!size)
+		return rx_buf;
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+				      rx_buf->page_offset, size,
+				      DMA_FROM_DEVICE);
 
-	if (likely(!skb)) {
-		u8 *page_addr = page_address(page) + rx_buf->page_offset;
+	/* We have pulled a buffer for use, so decrement pagecnt_bias */
+	rx_buf->pagecnt_bias--;
 
-		/* prefetch first cache line of first page */
-		prefetch(page_addr);
+	return rx_buf;
+}
+
+/**
+ * ice_construct_skb - Allocate skb and populate it
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @size: the length of the packet
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *
+ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		  unsigned int size)
+{
+	void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
 #if L1_CACHE_BYTES < 128
-		prefetch((void *)(page_addr + L1_CACHE_BYTES));
+	prefetch((u8 *)va + L1_CACHE_BYTES);
 #endif /* L1_CACHE_BYTES */
 
-		/* allocate a skb to store the frags */
-		skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-				       ICE_RX_HDR_SIZE,
-				       GFP_ATOMIC | __GFP_NOWARN);
-		if (unlikely(!skb)) {
-			rx_ring->rx_stats.alloc_buf_failed++;
-			return NULL;
-		}
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
 
-		/* we will be copying header into skb->data in
-		 * pskb_may_pull so it is in our interest to prefetch
-		 * it now to avoid a possible cache miss
+	skb_record_rx_queue(skb, rx_ring->q_index);
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > ICE_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* if we exhaust the linear part then add what is left as a frag */
+	size -= headlen;
+	if (size) {
+#if (PAGE_SIZE >= 8192)
+		unsigned int truesize = SKB_DATA_ALIGN(size);
+#else
+		unsigned int truesize = ICE_RXBUF_2048;
+#endif
+		skb_add_rx_frag(skb, 0, rx_buf->page,
+				rx_buf->page_offset + headlen, size, truesize);
+		/* buffer is used by skb, update page_offset */
+		ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+	} else {
+		/* buffer is unused, reset bias back to rx_buf; data was copied
+		 * onto skb's linear part so there's no need for adjusting
+		 * page offset and we can reuse this buffer as-is
 		 */
-		prefetchw(skb->data);
-
-		skb_record_rx_queue(skb, rx_ring->q_index);
-	} else {
-		/* we are reusing so sync this buffer for CPU use */
-		dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
-					      rx_buf->page_offset,
-					      ICE_RXBUF_2048,
-					      DMA_FROM_DEVICE);
-
-		rx_buf->skb = NULL;
+		rx_buf->pagecnt_bias++;
 	}
 
-	/* pull page into skb */
-	if (ice_add_rx_frag(rx_buf, rx_desc, skb)) {
-		/* hand second half of page back to the ring */
-		ice_reuse_rx_page(rx_ring, rx_buf);
-		rx_ring->rx_stats.page_reuse_count++;
-	} else {
-		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
-			       DMA_FROM_DEVICE);
-	}
-
-	/* clear contents of buffer_info */
-	rx_buf->page = NULL;
-
 	return skb;
 }
 
 /**
- * ice_pull_tail - ice specific version of skb_pull_tail
- * @skb: pointer to current skb being adjusted
+ * ice_put_rx_buf - Clean up used buffer and either recycle or free
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
  *
- * This function is an ice specific version of __pskb_pull_tail.  The
- * main difference between this version and the original function is that
- * this function can make several assumptions about the state of things
- * that allow for significant optimizations versus the standard function.
- * As a result we can do things like drop a frag and maintain an accurate
- * truesize for the skb.
+ * This function will  clean up the contents of the rx_buf. It will
+ * either recycle the buffer or unmap it and free the associated resources.
  */
-static void ice_pull_tail(struct sk_buff *skb)
+static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
-	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
-	unsigned int pull_len;
-	unsigned char *va;
+	if (!rx_buf)
+		return;
 
-	/* it is valid to use page_address instead of kmap since we are
-	 * working with pages allocated out of the lomem pool per
-	 * alloc_page(GFP_ATOMIC)
-	 */
-	va = skb_frag_address(frag);
+	if (ice_can_reuse_rx_page(rx_buf)) {
+		/* hand second half of page back to the ring */
+		ice_reuse_rx_page(rx_ring, rx_buf);
+		rx_ring->rx_stats.page_reuse_count++;
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
+				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+	}
 
-	/* we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
-
-	/* update all of the pointers */
-	skb_frag_size_sub(frag, pull_len);
-	frag->page_offset += pull_len;
-	skb->data_len -= pull_len;
-	skb->tail += pull_len;
+	/* clear contents of buffer_info */
+	rx_buf->page = NULL;
+	rx_buf->skb = NULL;
 }
 
 /**
@@ -733,10 +783,6 @@
  */
 static bool ice_cleanup_headers(struct sk_buff *skb)
 {
-	/* place header in linear portion of buffer */
-	if (skb_is_nonlinear(skb))
-		ice_pull_tail(skb);
-
 	/* if eth_skb_pad returns an error the skb was freed */
 	if (eth_skb_pad(skb))
 		return true;
@@ -754,8 +800,8 @@
  * The status_error_len doesn't need to be shifted because it begins
  * at offset zero.
  */
-static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,
-			     const u16 stat_err_bits)
+static bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
 {
 	return !!(rx_desc->wb.status_error0 &
 		  cpu_to_le16(stat_err_bits));
@@ -767,14 +813,14 @@
  * @rx_desc: Rx descriptor for current buffer
  * @skb: Current socket buffer containing buffer in progress
  *
- * This function updates next to clean.  If the buffer is an EOP buffer
+ * This function updates next to clean. If the buffer is an EOP buffer
  * this function exits returning false, otherwise it will place the
  * sk_buff in the next buffer to be chained and return true indicating
  * that this is in fact a non-EOP buffer.
  */
-static bool ice_is_non_eop(struct ice_ring *rx_ring,
-			   union ice_32b_rx_flex_desc *rx_desc,
-			   struct sk_buff *skb)
+static bool
+ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	       struct sk_buff *skb)
 {
 	u32 ntc = rx_ring->next_to_clean + 1;
 
@@ -834,15 +880,16 @@
 
 /**
  * ice_rx_csum - Indicate in skb if checksum is good
- * @vsi: the VSI we care about
+ * @ring: the ring we care about
  * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
  * @ptype: the packet type decoded by hardware
  *
  * skb->protocol must be set before this function is called
  */
-static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
-			union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
 {
 	struct ice_rx_ptype_decoded decoded;
 	u32 rx_error, rx_status;
@@ -858,7 +905,7 @@
 	skb_checksum_none_assert(skb);
 
 	/* check if Rx checksum is enabled */
-	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
 		return;
 
 	/* check if HW has decoded the packet and checksum */
@@ -898,12 +945,12 @@
 	return;
 
 checksum_fail:
-	vsi->back->hw_csum_rx_error++;
+	ring->vsi->back->hw_csum_rx_error++;
 }
 
 /**
  * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_ring: Rx descriptor ring packet is being transacted on
  * @rx_desc: pointer to the EOP Rx descriptor
  * @skb: pointer to current skb being populated
  * @ptype: the packet type decoded by hardware
@@ -912,44 +959,44 @@
  * order to populate the hash, checksum, VLAN, protocol, and
  * other fields within the skb.
  */
-static void ice_process_skb_fields(struct ice_ring *rx_ring,
-				   union ice_32b_rx_flex_desc *rx_desc,
-				   struct sk_buff *skb, u8 ptype)
+static void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype)
 {
 	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
 
 	/* modifies the skb - consumes the enet header */
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 
-	ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype);
+	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
 }
 
 /**
  * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: rx ring in play
+ * @rx_ring: Rx ring in play
  * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
+ * @vlan_tag: VLAN tag for packet
  *
  * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without vlan tag)
+ * gro receive functions (with/without VLAN tag)
  */
-static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb,
-			    u16 vlan_tag)
+static void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
 {
 	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-	    (vlan_tag & VLAN_VID_MASK)) {
+	    (vlan_tag & VLAN_VID_MASK))
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-	}
 	napi_gro_receive(&rx_ring->q_vector->napi, skb);
 }
 
 /**
  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
- * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_ring: Rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
  *
  * This function provides a "bounce buffer" approach to Rx interrupt
- * processing.  The advantage to this is that on systems that have
+ * processing. The advantage to this is that on systems that have
  * expensive overhead for IOMMU access this provides a means of avoiding
  * it by maintaining the mapping of the page to the system.
  *
@@ -959,24 +1006,19 @@
 {
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
-	bool failure = false;
+	bool failure;
 
-	/* start the loop to process RX packets bounded by 'budget' */
+	/* start the loop to process Rx packets bounded by 'budget' */
 	while (likely(total_rx_pkts < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
+		struct ice_rx_buf *rx_buf;
 		struct sk_buff *skb;
+		unsigned int size;
 		u16 stat_err_bits;
 		u16 vlan_tag = 0;
 		u8 rx_ptype;
 
-		/* return some buffers to hardware, one at a time is too slow */
-		if (cleaned_count >= ICE_RX_BUF_WRITE) {
-			failure = failure ||
-				  ice_alloc_rx_bufs(rx_ring, cleaned_count);
-			cleaned_count = 0;
-		}
-
-		/* get the RX desc from RX ring based on 'next_to_clean' */
+		/* get the Rx desc from Rx ring based on 'next_to_clean' */
 		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
 		/* status_error_len will always be zero for unused descriptors
@@ -994,11 +1036,26 @@
 		 */
 		dma_rmb();
 
-		/* allocate (if needed) and populate skb */
-		skb = ice_fetch_rx_buf(rx_ring, rx_desc);
-		if (!skb)
-			break;
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+			ICE_RX_FLX_DESC_PKT_LEN_M;
 
+		/* retrieve a buffer from the ring */
+		rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
+
+		if (skb)
+			ice_add_rx_frag(rx_buf, skb, size);
+		else
+			skb = ice_construct_skb(rx_ring, rx_buf, size);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			if (rx_buf)
+				rx_buf->pagecnt_bias++;
+			break;
+		}
+
+		ice_put_rx_buf(rx_ring, rx_buf);
 		cleaned_count++;
 
 		/* skip if it is NOP desc */
@@ -1011,9 +1068,6 @@
 			continue;
 		}
 
-		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
-			ICE_RX_FLEX_DESC_PTYPE_M;
-
 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
 		if (ice_test_staterr(rx_desc, stat_err_bits))
 			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
@@ -1030,6 +1084,9 @@
 		total_rx_bytes += skb->len;
 
 		/* populate checksum, VLAN, and protocol */
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+			ICE_RX_FLEX_DESC_PTYPE_M;
+
 		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
 		/* send completed skb up the stack */
@@ -1039,6 +1096,9 @@
 		total_rx_pkts++;
 	}
 
+	/* return up to cleaned_count buffers to hardware */
+	failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+
 	/* update queue and vector specific stats */
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.pkts += total_rx_pkts;
@@ -1052,6 +1112,357 @@
 }
 
 /**
+ * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
+ * @port_info: port_info structure containing the current link speed
+ * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
+ * @itr: ITR value to update
+ *
+ * Calculate how big of an increment should be applied to the ITR value passed
+ * in based on wmem_default, SKB overhead, Ethernet overhead, and the current
+ * link speed.
+ *
+ * The following is a calculation derived from:
+ *  wmem_default / (size + overhead) = desired_pkts_per_int
+ *  rate / bits_per_byte / (size + Ethernet overhead) = pkt_rate
+ *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to:
+ *
+ *	 wmem_default * bits_per_byte * usecs_per_sec   pkt_size + 24
+ * ITR = -------------------------------------------- * --------------
+ *			     rate			pkt_size + 640
+ */
+static unsigned int
+ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info,
+				 unsigned int avg_pkt_size,
+				 unsigned int itr)
+{
+	switch (port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		/* fall through */
+	default:
+		itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	}
+
+	if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= ICE_ITR_ADAPTIVE_LATENCY;
+		itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+	return itr;
+}
+
+/**
+ * ice_update_itr - update the adaptive ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ */
+static void
+ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
+{
+	unsigned long next_update = jiffies;
+	unsigned int packets, bytes, itr;
+	bool container_is_rx;
+
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
+		return;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	container_is_rx = (&q_vector->rx == rc);
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = container_is_rx ?
+		ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY :
+		ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	prefetch(q_vector->vsi->port_info);
+
+	packets = rc->total_pkts;
+	bytes = rc->total_bytes;
+
+	if (container_is_rx) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
+			itr = ICE_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size_and_speed;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & ICE_ITR_MASK) ==
+		    ICE_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 56) {
+		itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= ICE_ITR_ADAPTIVE_LATENCY;
+			itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= ICE_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr >>= 1;
+		itr &= ICE_ITR_MASK;
+		if (itr < ICE_ITR_ADAPTIVE_MIN_USECS)
+			itr = ICE_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = ICE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size_and_speed:
+
+	/* based on checks above packets cannot be 0 so division is safe */
+	itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info,
+					       bytes / packets, itr);
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
+	rc->total_bytes = 0;
+	rc->total_pkts = 0;
+}
+
+/**
+ * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
+ * @itr_idx: interrupt throttling index
+ * @itr: interrupt throttling value in usecs
+ */
+static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
+{
+	/* The ITR value is reported in microseconds, and the register value is
+	 * recorded in 2 microsecond units. For this reason we only need to
+	 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
+	 * granularity as a shift instead of division. The mask makes sure the
+	 * ITR value is never odd so we don't accidentally write into the field
+	 * prior to the ITR field.
+	 */
+	itr &= ICE_ITR_MASK;
+
+	return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+		(itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
+		(itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
+}
+
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
+/**
+ * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
+ * @q_vector: q_vector for which ITR is being updated and interrupt enabled
+ */
+static void ice_update_ena_itr(struct ice_q_vector *q_vector)
+{
+	struct ice_ring_container *tx = &q_vector->tx;
+	struct ice_ring_container *rx = &q_vector->rx;
+	struct ice_vsi *vsi = q_vector->vsi;
+	u32 itr_val;
+
+	/* when exiting WB_ON_ITR lets set a low ITR value and trigger
+	 * interrupts to expire right away in case we have more work ready to go
+	 * already
+	 */
+	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
+		itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+		/* set target back to last user set value */
+		rx->target_itr = rx->itr_setting;
+		/* set current to what we just wrote and dynamic if needed */
+		rx->current_itr = ICE_WB_ON_ITR_USECS |
+			(rx->itr_setting & ICE_ITR_DYNAMIC);
+		/* allow normal interrupt flow to start */
+		q_vector->itr_countdown = 0;
+		return;
+	}
+
+	/* This will do nothing if dynamic updates are not enabled */
+	ice_update_itr(q_vector, tx);
+	ice_update_itr(q_vector, rx);
+
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (rx->target_itr < rx->current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+		rx->current_itr = rx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((tx->target_itr < tx->current_itr) ||
+		   ((rx->target_itr - rx->current_itr) <
+		    (tx->target_itr - tx->current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
+		itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr);
+		tx->current_itr = tx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (rx->current_itr != rx->target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+		rx->current_itr = rx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* Still have to re-enable the interrupts */
+		itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
+	}
+
+	if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
+		wr32(&q_vector->vsi->back->hw,
+		     GLINT_DYN_CTL(q_vector->reg_idx),
+		     itr_val);
+}
+
+/**
+ * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
+ * @q_vector: q_vector to set WB_ON_ITR on
+ *
+ * We need to tell hardware to write-back completed descriptors even when
+ * interrupts are disabled. Descriptors will be written back on cache line
+ * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
+ * descriptors may not be written back if they don't fill a cache line until the
+ * next interrupt.
+ *
+ * This sets the write-back frequency to 2 microseconds as that is the minimum
+ * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
+ * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ */
+static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
+{
+	struct ice_vsi *vsi = q_vector->vsi;
+
+	/* already in WB_ON_ITR mode no need to change it */
+	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
+		return;
+
+	if (q_vector->num_ring_rx)
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+		     ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+						 ICE_RX_ITR));
+
+	if (q_vector->num_ring_tx)
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+		     ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+						 ICE_TX_ITR));
+
+	q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
+}
+
+/**
  * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
  * @napi: napi struct with our devices info in it
  * @budget: amount of work driver is allowed to do this pass, in packets
@@ -1064,29 +1475,32 @@
 {
 	struct ice_q_vector *q_vector =
 				container_of(napi, struct ice_q_vector, napi);
-	struct ice_vsi *vsi = q_vector->vsi;
-	struct ice_pf *pf = vsi->back;
 	bool clean_complete = true;
-	int budget_per_ring = 0;
 	struct ice_ring *ring;
+	int budget_per_ring;
 	int work_done = 0;
 
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	ice_for_each_ring(ring, q_vector->tx)
-		if (!ice_clean_tx_irq(vsi, ring, budget))
+		if (!ice_clean_tx_irq(ring, budget))
 			clean_complete = false;
 
 	/* Handle case where we are called by netpoll with a budget of 0 */
-	if (budget <= 0)
+	if (unlikely(budget <= 0))
 		return budget;
 
-	/* We attempt to distribute budget to each Rx queue fairly, but don't
-	 * allow the budget to go below 1 because that would exit polling early.
-	 */
-	if (q_vector->num_ring_rx)
+	/* normally we have 1 Rx ring per q_vector */
+	if (unlikely(q_vector->num_ring_rx > 1))
+		/* We attempt to distribute budget to each Rx queue fairly, but
+		 * don't allow the budget to go below 1 because that would exit
+		 * polling early.
+		 */
 		budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
+	else
+		/* Max of 1 Rx ring in this q_vector so give it the budget */
+		budget_per_ring = budget;
 
 	ice_for_each_ring(ring, q_vector->rx) {
 		int cleaned;
@@ -1102,11 +1516,15 @@
 	if (!clean_complete)
 		return budget;
 
-	/* Work is done so exit the polling mode and re-enable the interrupt */
-	napi_complete_done(napi, work_done);
-	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
-	return 0;
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		ice_update_ena_itr(q_vector);
+	else
+		ice_set_wb_on_itr(q_vector);
+
+	return min_t(int, work_done, budget - 1);
 }
 
 /* helper function for building cmd/type/offset */
@@ -1121,7 +1539,7 @@
 }
 
 /**
- * __ice_maybe_stop_tx - 2nd level check for tx stop conditions
+ * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
  * @tx_ring: the ring to be checked
  * @size: the size buffer we want to assure is available
  *
@@ -1144,7 +1562,7 @@
 }
 
 /**
- * ice_maybe_stop_tx - 1st level check for tx stop conditions
+ * ice_maybe_stop_tx - 1st level check for Tx stop conditions
  * @tx_ring: the ring to be checked
  * @size:    the size buffer we want to assure is available
  *
@@ -1154,6 +1572,7 @@
 {
 	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
 		return 0;
+
 	return __ice_maybe_stop_tx(tx_ring, size);
 }
 
@@ -1173,7 +1592,7 @@
 {
 	u64 td_offset, td_tag, td_cmd;
 	u16 i = tx_ring->next_to_use;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	unsigned int data_len, size;
 	struct ice_tx_desc *tx_desc;
 	struct ice_tx_buf *tx_buf;
@@ -1290,19 +1709,14 @@
 	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	/* notify HW of packet */
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
 
 dma_error:
-	/* clear dma mappings for failed tx_buf map */
+	/* clear DMA mappings for failed tx_buf map */
 	for (;;) {
 		tx_buf = &tx_ring->tx_buf[i];
 		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
@@ -1397,6 +1811,12 @@
 		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
 		break;
 	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+		l4_len = sizeof(struct sctphdr) >> 2;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+
 	default:
 		if (first->tx_flags & ICE_TX_FLAGS_TSO)
 			return -1;
@@ -1410,7 +1830,7 @@
 }
 
 /**
- * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
  * @tx_ring: ring to send buffer on
  * @first: pointer to struct ice_tx_buf
  *
@@ -1436,7 +1856,7 @@
 		 * to the encapsulated ethertype.
 		 */
 		skb->protocol = vlan_get_protocol(skb);
-		goto out;
+		return 0;
 	}
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
@@ -1458,8 +1878,7 @@
 		first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
 	}
 
-out:
-	return 0;
+	return ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
 }
 
 /**
@@ -1496,6 +1915,7 @@
 	if (err < 0)
 		return err;
 
+	/* cppcheck-suppress unreadVariable */
 	ip.hdr = skb_network_header(skb);
 	l4.hdr = skb_transport_header(skb);
 
@@ -1525,10 +1945,10 @@
 	cd_mss = skb_shinfo(skb)->gso_size;
 
 	/* record cdesc_qw1 with TSO parameters */
-	off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
-			 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
-			 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
-			 (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			     (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			     (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
 	first->tx_flags |= ICE_TX_FLAGS_TSO;
 	return 1;
 }
@@ -1551,30 +1971,30 @@
  * Finally, we add one to round up. Because 256 isn't an exact multiple of
  * 3, we'll underestimate near each multiple of 12K. This is actually more
  * accurate as we have 4K - 1 of wiggle room that we can fit into the last
- * segment.  For our purposes this is accurate out to 1M which is orders of
+ * segment. For our purposes this is accurate out to 1M which is orders of
  * magnitude greater than our largest possible GSO size.
  *
  * This would then be implemented as:
- *     return (((size >> 12) * 85) >> 8) + 1;
+ *     return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
  *
  * Since multiplication and division are commutative, we can reorder
  * operations into:
- *     return ((size * 85) >> 20) + 1;
+ *     return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
  */
 static unsigned int ice_txd_use_count(unsigned int size)
 {
-	return ((size * 85) >> 20) + 1;
+	return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
 }
 
 /**
- * ice_xmit_desc_count - calculate number of tx descriptors needed
+ * ice_xmit_desc_count - calculate number of Tx descriptors needed
  * @skb: send buffer
  *
  * Returns number of data descriptors needed for this skb.
  */
 static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
 {
-	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
 	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
 	unsigned int count = 0, size = skb_headlen(skb);
 
@@ -1605,7 +2025,7 @@
  */
 static bool __ice_chk_linearize(struct sk_buff *skb)
 {
-	const struct skb_frag_struct *frag, *stale;
+	const skb_frag_t *frag, *stale;
 	int nr_frags, sum;
 
 	/* no need to check if number of frags is less than 7 */
@@ -1619,7 +2039,7 @@
 	nr_frags -= ICE_MAX_BUF_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
 
-	/* Initialize size to the negative value of gso_size minus 1.  We
+	/* Initialize size to the negative value of gso_size minus 1. We
 	 * use this as the worst case scenerio in which the frag ahead
 	 * of us only provides one byte which is why we are limited to 6
 	 * descriptors for a single transmit as the header and previous
@@ -1687,6 +2107,7 @@
 ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 {
 	struct ice_tx_offload_params offload = { 0 };
+	struct ice_vsi *vsi = tx_ring->vsi;
 	struct ice_tx_buf *first;
 	unsigned int count;
 	int tso, csum;
@@ -1705,7 +2126,8 @@
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
-	if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+	if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
+			      ICE_DESCS_FOR_CTX_DESC)) {
 		tx_ring->tx_stats.tx_busy++;
 		return NETDEV_TX_BUSY;
 	}
@@ -1733,7 +2155,15 @@
 	if (csum < 0)
 		goto out_drop;
 
-	if (tso || offload.cd_tunnel_params) {
+	/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
+	if (unlikely(skb->priority == TC_PRIO_CONTROL &&
+		     vsi->type == ICE_VSI_PF &&
+		     vsi->port_info->is_sw_lldp))
+		offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+					ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+					ICE_TXD_CTX_QW1_CMD_S);
+
+	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
 		struct ice_tx_ctx_desc *cdesc;
 		int i = tx_ring->next_to_use;
 

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 31bc998..94a9280 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h

@@ -22,8 +22,21 @@
 #define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG	128
 
-/* Tx Descriptors needed, worst case */
-#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+/* We are assuming that the cache line is always 64 Bytes here for ice.
+ * In order to make sure that is a correct assumption there is a check in probe
+ * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
+ * size is 128 bytes. We do it this way because we do not want to read the
+ * GLPCI_CNF2 register or a variable containing the value on every pass through
+ * the Tx path.
+ */
+#define ICE_CACHE_LINE_BYTES		64
+#define ICE_DESCS_PER_CACHE_LINE	(ICE_CACHE_LINE_BYTES / \
+					 sizeof(struct ice_tx_desc))
+#define ICE_DESCS_FOR_CTX_DESC		1
+#define ICE_DESCS_FOR_SKB_DATA_PTR	1
+/* Tx descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
+		     ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
 #define ICE_DESC_UNUSED(R)	\
 	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	(R)->next_to_clean - (R)->next_to_use - 1)
@@ -32,27 +45,32 @@
 #define ICE_TX_FLAGS_HW_VLAN	BIT(1)
 #define ICE_TX_FLAGS_SW_VLAN	BIT(2)
 #define ICE_TX_FLAGS_VLAN_M	0xffff0000
+#define ICE_TX_FLAGS_VLAN_PR_M	0xe0000000
+#define ICE_TX_FLAGS_VLAN_PR_S	29
 #define ICE_TX_FLAGS_VLAN_S	16
 
+#define ICE_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
 struct ice_tx_buf {
 	struct ice_tx_desc *next_to_watch;
 	struct sk_buff *skb;
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
-	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
 };
 
 struct ice_tx_offload_params {
-	u8 header_len;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
 	u32 td_cmd;
 	u32 td_offset;
 	u32 td_l2tag1;
-	u16 cd_l2tag2;
 	u32 cd_tunnel_params;
-	u64 cd_qw1;
-	struct ice_ring *tx_ring;
+	u16 cd_l2tag2;
+	u8 header_len;
 };
 
 struct ice_rx_buf {
@@ -60,6 +78,7 @@
 	dma_addr_t dma;
 	struct page *page;
 	unsigned int page_offset;
+	u16 pagecnt_bias;
 };
 
 struct ice_q_stats {
@@ -71,6 +90,7 @@
 	u64 restart_q;
 	u64 tx_busy;
 	u64 tx_linearize;
+	int prev_pkt; /* negative if no pending Tx descriptors */
 };
 
 struct ice_rxq_stats {
@@ -102,11 +122,40 @@
 /* indices into GLINT_ITR registers */
 #define ICE_RX_ITR	ICE_IDX_ITR0
 #define ICE_TX_ITR	ICE_IDX_ITR1
-#define ICE_ITR_DYNAMIC	0x8000  /* use top bit as a flag */
-#define ICE_ITR_8K	0x003E
+#define ICE_ITR_8K	124
+#define ICE_ITR_20K	50
+#define ICE_ITR_MAX	8160
+#define ICE_DFLT_TX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_DFLT_RX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_ITR_DYNAMIC	0x8000  /* used as flag for itr_setting */
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC))
+#define ITR_TO_REG(setting)	((setting) & ~ICE_ITR_DYNAMIC)
+#define ICE_ITR_GRAN_S		1	/* ITR granularity is always 2us */
+#define ICE_ITR_GRAN_US		BIT(ICE_ITR_GRAN_S)
+#define ICE_ITR_MASK		0x1FFE	/* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting)	__ALIGN_MASK(setting, ~ICE_ITR_MASK)
 
-/* apply ITR HW granularity translation to program the HW registers */
-#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran))
+#define ICE_ITR_ADAPTIVE_MIN_INC	0x0002
+#define ICE_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define ICE_ITR_ADAPTIVE_MAX_USECS	0x00FA
+#define ICE_ITR_ADAPTIVE_LATENCY	0x8000
+#define ICE_ITR_ADAPTIVE_BULK		0x0000
+
+#define ICE_DFLT_INTRL	0
+#define ICE_MAX_INTRL	236
+
+#define ICE_WB_ON_ITR_USECS	2
+#define ICE_IN_WB_ON_ITR_MODE	255
+/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
+ * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
+ * set the write-back latency to the usecs passed in.
+ */
+#define ICE_GLINT_DYN_CTL_WB_ON_ITR(usecs, itr_idx)	\
+	((((usecs) << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)) & \
+	  GLINT_DYN_CTL_INTERVAL_M) | \
+	 (((itr_idx) << GLINT_DYN_CTL_ITR_INDX_S) & \
+	  GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | \
+	 GLINT_DYN_CTL_WB_ON_ITR_M)
 
 /* Legacy or Advanced Mode Queue */
 #define ICE_TX_ADVANCED	0
@@ -114,6 +163,7 @@
 
 /* descriptor ring, associated with a VSI */
 struct ice_ring {
+	/* CL1 - 1st cacheline starts here */
 	struct ice_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
@@ -125,16 +175,11 @@
 		struct ice_tx_buf *tx_buf;
 		struct ice_rx_buf *rx_buf;
 	};
+	/* CL2 - 2nd cacheline starts here */
 	u16 q_index;			/* Queue number of ring */
-	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 q_handle;			/* Queue handle per TC */
 
-	/* high bit set means dynamic, use accessor routines to read/write.
-	 * hardware supports 2us/1us resolution for the ITR registers.
-	 * these values always store the USER setting, and must be converted
-	 * before programming to a register.
-	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
+	u8 ring_active:1;		/* is ring online or not */
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -142,8 +187,7 @@
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
-
-	u8 ring_active;			/* is ring online or not */
+	u16 next_to_alloc;
 
 	/* stats structs */
 	struct ice_q_stats	stats;
@@ -153,26 +197,33 @@
 		struct ice_rxq_stats rx_stats;
 	};
 
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
+	/* CLX - the below items are only accessed infrequently and should be
+	 * in their own cache line if possible
+	 */
+	dma_addr_t dma;			/* physical address of ring */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 rx_buf_len;
+#ifdef CONFIG_DCB
+	u8 dcb_tc;			/* Traffic class of ring */
+#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
-enum ice_latency_range {
-	ICE_LOWEST_LATENCY = 0,
-	ICE_LOW_LATENCY = 1,
-	ICE_BULK_LATENCY = 2,
-	ICE_ULTRA_LATENCY = 3,
-};
-
 struct ice_ring_container {
-	/* array of pointers to rings */
+	/* head of linked-list of rings */
 	struct ice_ring *ring;
+	unsigned long next_update;	/* jiffies value of next queue update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_pkts;	/* total packets processed this int */
-	enum ice_latency_range latency_range;
-	u16 itr;
+	u16 itr_idx;		/* index in the interrupt vector */
+	u16 target_itr;		/* value in usecs divided by the hw->itr_gran */
+	u16 current_itr;	/* value in usecs divided by the hw->itr_gran */
+	/* high bit set means dynamic ITR, rest is used to store user
+	 * readable ITR value in usecs and must be converted before programming
+	 * to a register.
+	 */
+	u16 itr_setting;
 };
 
 /* iterator for handling rings in ring container */

diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index ba11b58..6667d17 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h

@@ -4,28 +4,35 @@
 #ifndef _ICE_TYPE_H_
 #define _ICE_TYPE_H_
 
+#define ICE_BYTES_PER_WORD	2
+#define ICE_BYTES_PER_DWORD	4
+
 #include "ice_status.h"
 #include "ice_hw_autogen.h"
 #include "ice_osdep.h"
 #include "ice_controlq.h"
 #include "ice_lan_tx_rx.h"
+#include "ice_flex_type.h"
 
-#define ICE_BYTES_PER_WORD	2
-#define ICE_BYTES_PER_DWORD	4
-
-static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
+static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 {
-	return test_bit(tc, (unsigned long *)&bitmap);
+	return test_bit(tc, &bitmap);
 }
 
+/* Driver always calls main vsi_handle first */
+#define ICE_MAIN_VSI_HANDLE		0
+
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_FW_LOG		BIT_ULL(3)
 #define ICE_DBG_LINK		BIT_ULL(4)
+#define ICE_DBG_PHY		BIT_ULL(5)
 #define ICE_DBG_QCTX		BIT_ULL(6)
 #define ICE_DBG_NVM		BIT_ULL(7)
 #define ICE_DBG_LAN		BIT_ULL(8)
 #define ICE_DBG_SW		BIT_ULL(13)
 #define ICE_DBG_SCHED		BIT_ULL(14)
+#define ICE_DBG_PKG		BIT_ULL(16)
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
@@ -34,15 +41,28 @@
 enum ice_aq_res_ids {
 	ICE_NVM_RES_ID = 1,
 	ICE_SPD_RES_ID,
-	ICE_GLOBAL_CFG_LOCK_RES_ID,
-	ICE_CHANGE_LOCK_RES_ID
+	ICE_CHANGE_LOCK_RES_ID,
+	ICE_GLOBAL_CFG_LOCK_RES_ID
 };
 
+/* FW update timeout definitions are in milliseconds */
+#define ICE_NVM_TIMEOUT			180000
+#define ICE_CHANGE_LOCK_TIMEOUT		1000
+#define ICE_GLOBAL_CFG_LOCK_TIMEOUT	3000
+
 enum ice_aq_res_access_type {
 	ICE_RES_READ = 1,
 	ICE_RES_WRITE
 };
 
+struct ice_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 driver_string[32];
+};
+
 enum ice_fc_mode {
 	ICE_FC_NONE = 0,
 	ICE_FC_RX_PAUSE,
@@ -52,6 +72,13 @@
 	ICE_FC_DFLT
 };
 
+enum ice_fec_mode {
+	ICE_FEC_NONE = 0,
+	ICE_FEC_RS,
+	ICE_FEC_BASER,
+	ICE_FEC_AUTO
+};
+
 enum ice_set_fc_aq_failures {
 	ICE_SET_FC_AQ_FAIL_NONE = 0,
 	ICE_SET_FC_AQ_FAIL_GET,
@@ -76,11 +103,15 @@
 
 enum ice_vsi_type {
 	ICE_VSI_PF = 0,
+	ICE_VSI_VF,
+	ICE_VSI_LB = 6,
 };
 
 struct ice_link_status {
 	/* Refer to ice_aq_phy_type for bits definition */
 	u64 phy_type_low;
+	u64 phy_type_high;
+	u8 topo_media_conflict;
 	u16 max_frame_size;
 	u16 link_speed;
 	u16 req_speeds;
@@ -88,6 +119,7 @@
 	u8 link_info;
 	u8 an_info;
 	u8 ext_info;
+	u8 fec_info;
 	u8 pacing;
 	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
 	 * ice_aqc_get_phy_caps structure
@@ -95,22 +127,37 @@
 	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
 };
 
+/* Different reset sources for which a disable queue AQ call has to be made in
+ * order to clean the Tx scheduler as a part of the reset
+ */
+enum ice_disq_rst_src {
+	ICE_NO_RESET = 0,
+	ICE_VM_RESET,
+	ICE_VF_RESET,
+};
+
 /* PHY info such as phy_type, etc... */
 struct ice_phy_info {
 	struct ice_link_status link_info;
 	struct ice_link_status link_info_old;
 	u64 phy_type_low;
+	u64 phy_type_high;
 	enum ice_media_type media_type;
 	u8 get_link_info;
 };
 
 /* Common HW capabilities for SW use */
 struct ice_hw_common_caps {
-	/* TX/RX queues */
-	u16 num_rxq;		/* Number/Total RX queues */
-	u16 rxq_first_id;	/* First queue ID for RX queues */
-	u16 num_txq;		/* Number/Total TX queues */
-	u16 txq_first_id;	/* First queue ID for TX queues */
+	u32 valid_functions;
+	/* DCB capabilities */
+	u32 active_tc_bitmap;
+	u32 maxtc;
+
+	/* Tx/Rx queues */
+	u16 num_rxq;		/* Number/Total Rx queues */
+	u16 rxq_first_id;	/* First queue ID for Rx queues */
+	u16 num_txq;		/* Number/Total Tx queues */
+	u16 txq_first_id;	/* First queue ID for Tx queues */
 
 	/* MSI-X vectors */
 	u16 num_msix_vectors;
@@ -119,20 +166,28 @@
 	/* Max MTU for function or device */
 	u16 max_mtu;
 
+	/* Virtualization support */
+	u8 sr_iov_1_1;			/* SR-IOV enabled */
+
 	/* RSS related capabilities */
 	u16 rss_table_size;		/* 512 for PFs and 64 for VFs */
 	u8 rss_table_entry_width;	/* RSS Entry width in bits */
+
+	u8 dcb;
 };
 
 /* Function specific capabilities */
 struct ice_hw_func_caps {
 	struct ice_hw_common_caps common_cap;
-	u32 guaranteed_num_vsi;
+	u32 num_allocd_vfs;		/* Number of allocated VFs */
+	u32 vf_base_id;			/* Logical ID of the first VF */
+	u32 guar_num_vsi;
 };
 
 /* Device wide capabilities */
 struct ice_hw_dev_caps {
 	struct ice_hw_common_caps common_cap;
+	u32 num_vfs_exposed;		/* Total number of VFs exposed */
 	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
 };
 
@@ -142,11 +197,18 @@
 	u8 perm_addr[ETH_ALEN];
 };
 
-/* Various RESET request, These are not tied with HW reset types */
+/* Reset types used to determine which kind of reset was requested. These
+ * defines match what the RESET_TYPE field of the GLGEN_RSTAT register.
+ * ICE_RESET_PFR does not match any RESET_TYPE field in the GLGEN_RSTAT register
+ * because its reset source is different than the other types listed.
+ */
 enum ice_reset_req {
-	ICE_RESET_PFR	= 0,
+	ICE_RESET_POR	= 0,
+	ICE_RESET_INVAL	= 0,
 	ICE_RESET_CORER	= 1,
 	ICE_RESET_GLOBR	= 2,
+	ICE_RESET_EMPR	= 3,
+	ICE_RESET_PFR	= 4,
 };
 
 /* Bus parameters */
@@ -170,17 +232,24 @@
 	u8 blank_nvm_mode;        /* is NVM empty (no FW present) */
 };
 
+#define ICE_NVM_VER_LEN	32
+
 /* Max number of port to queue branches w.r.t topology */
 #define ICE_MAX_TRAFFIC_CLASS 8
 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
 
+#define ice_for_each_traffic_class(_i)	\
+	for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+
+#define ICE_INVAL_TEID 0xFFFFFFFF
+
 struct ice_sched_node {
 	struct ice_sched_node *parent;
 	struct ice_sched_node *sibling; /* next sibling in the same layer */
 	struct ice_sched_node **children;
 	struct ice_aqc_txsched_elem_data info;
-	u32 agg_id;			/* aggregator group id */
-	u16 vsi_id;
+	u32 agg_id;			/* aggregator group ID */
+	u16 vsi_handle;
 	u8 in_use;			/* suspended or in use */
 	u8 tx_sched_layer;		/* Logical Layer (1-9) */
 	u8 num_children;
@@ -204,14 +273,14 @@
 };
 
 #define ICE_SCHED_DFLT_RL_PROF_ID	0
+#define ICE_SCHED_DFLT_BW_WT		1
 
-/* vsi type list entry to locate corresponding vsi/ag nodes */
+/* VSI type list entry to locate corresponding VSI/ag nodes */
 struct ice_sched_vsi_info {
 	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
 	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
 	struct list_head list_entry;
 	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
-	u16 vsi_id;
 };
 
 /* driver defines the policy */
@@ -221,15 +290,70 @@
 	u8 rdma_ena;
 };
 
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct ice_dcb_ets_cfg {
+	u8 willing;
+	u8 cbs;
+	u8 maxtcs;
+	u8 prio_table[ICE_MAX_TRAFFIC_CLASS];
+	u8 tcbwtable[ICE_MAX_TRAFFIC_CLASS];
+	u8 tsatable[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct ice_dcb_pfc_cfg {
+	u8 willing;
+	u8 mbc;
+	u8 pfccap;
+	u8 pfcena;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct ice_dcb_app_priority_table {
+	u16 prot_id;
+	u8 priority;
+	u8 selector;
+};
+
+#define ICE_MAX_USER_PRIORITY	8
+#define ICE_DCBX_MAX_APPS	32
+#define ICE_LLDPDU_SIZE		1500
+#define ICE_TLV_STATUS_OPER	0x1
+#define ICE_TLV_STATUS_SYNC	0x2
+#define ICE_TLV_STATUS_ERR	0x4
+#define ICE_APP_PROT_ID_FCOE	0x8906
+#define ICE_APP_PROT_ID_ISCSI	0x0cbc
+#define ICE_APP_PROT_ID_FIP	0x8914
+#define ICE_APP_SEL_ETHTYPE	0x1
+#define ICE_APP_SEL_TCPIP	0x2
+#define ICE_CEE_APP_SEL_ETHTYPE	0x0
+#define ICE_CEE_APP_SEL_TCPIP	0x1
+
+struct ice_dcbx_cfg {
+	u32 numapps;
+	u32 tlv_status; /* CEE mode TLV status */
+	struct ice_dcb_ets_cfg etscfg;
+	struct ice_dcb_ets_cfg etsrec;
+	struct ice_dcb_pfc_cfg pfc;
+	struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS];
+	u8 dcbx_mode;
+#define ICE_DCBX_MODE_CEE	0x1
+#define ICE_DCBX_MODE_IEEE	0x2
+	u8 app_mode;
+#define ICE_DCBX_APPS_NON_WILLING	0x1
+};
+
 struct ice_port_info {
 	struct ice_sched_node *root;	/* Root Node per Port */
-	struct ice_hw *hw;		/* back pointer to hw instance */
+	struct ice_hw *hw;		/* back pointer to HW instance */
 	u32 last_node_teid;		/* scheduler last node info */
 	u16 sw_id;			/* Initial switch ID belongs to port */
 	u16 pf_vf_num;
 	u8 port_state;
 #define ICE_SCHED_PORT_STATE_INIT	0x0
 #define ICE_SCHED_PORT_STATE_READY	0x1
+	u8 lport;
+#define ICE_LPORT_MASK			0xff
 	u16 dflt_tx_vsi_rule_id;
 	u16 dflt_tx_vsi_num;
 	u16 dflt_rx_vsi_rule_id;
@@ -238,28 +362,39 @@
 	struct ice_mac_info mac;
 	struct ice_phy_info phy;
 	struct mutex sched_lock;	/* protect access to TXSched tree */
-	struct ice_sched_tx_policy sched_policy;
-	struct list_head vsi_info_list;
-	struct list_head agg_list;	/* lists all aggregator */
-	u8 lport;
-#define ICE_LPORT_MASK		0xff
-	u8 is_vf;
+	struct ice_sched_node *
+		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
+	/* DCBX info */
+	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
+	struct ice_dcbx_cfg desired_dcbx_cfg;	/* CEE Desired Cfg */
+	/* LLDP/DCBX Status */
+	u8 dcbx_status:3;		/* see ICE_DCBX_STATUS_DIS */
+	u8 is_sw_lldp:1;
+	u8 is_vf:1;
 };
 
 struct ice_switch_info {
-	/* Switch VSI lists to MAC/VLAN translation */
-	struct mutex mac_list_lock;		/* protect MAC list */
-	struct list_head mac_list_head;
-	struct mutex vlan_list_lock;		/* protect VLAN list */
-	struct list_head vlan_list_head;
-	struct mutex eth_m_list_lock;	/* protect ethtype list */
-	struct list_head eth_m_list_head;
-	struct mutex promisc_list_lock;	/* protect promisc mode list */
-	struct list_head promisc_list_head;
-	struct mutex mac_vlan_list_lock;	/* protect MAC-VLAN list */
-	struct list_head mac_vlan_list_head;
-
 	struct list_head vsi_list_map_head;
+	struct ice_sw_recipe *recp_list;
+};
+
+/* FW logging configuration */
+struct ice_fw_log_evnt {
+	u8 cfg : 4;	/* New event enables to configure */
+	u8 cur : 4;	/* Current/active event enables */
+};
+
+struct ice_fw_log_cfg {
+	u8 cq_en : 1;    /* FW logging is enabled via the control queue */
+	u8 uart_en : 1;  /* FW logging is enabled via UART for all PFs */
+	u8 actv_evnts;   /* Cumulation of currently enabled log events */
+
+#define ICE_FW_LOG_EVNT_INFO	(ICE_AQC_FW_LOG_INFO_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_INIT	(ICE_AQC_FW_LOG_INIT_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_FLOW	(ICE_AQC_FW_LOG_FLOW_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_ERR	(ICE_AQC_FW_LOG_ERR_EN >> ICE_AQC_FW_LOG_EN_S)
+	struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX];
 };
 
 /* Port hardware description */
@@ -280,14 +415,18 @@
 
 	u8 pf_id;		/* device profile info */
 
-	/* TX Scheduler values */
+	/* Tx Scheduler values */
 	u16 num_tx_sched_layers;
 	u16 num_tx_sched_phys_layers;
 	u8 flattened_layers;
 	u8 max_cgds;
 	u8 sw_entry_point_layer;
+	u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct list_head agg_list;	/* lists all aggregator */
 
+	struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
 	u8 evb_veb;		/* true for VEB, false for VEPA */
+	u8 reset_ongoing;	/* true if HW is in reset, false otherwise */
 	struct ice_bus_info bus;
 	struct ice_nvm_info nvm;
 	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
@@ -297,6 +436,7 @@
 
 	/* Control Queue info */
 	struct ice_ctl_q_info adminq;
+	struct ice_ctl_q_info mailboxq;
 
 	u8 api_branch;		/* API branch version */
 	u8 api_maj_ver;		/* API major version */
@@ -308,18 +448,53 @@
 	u8 fw_patch;		/* firmware patch version */
 	u32 fw_build;		/* firmware build number */
 
-	/* minimum allowed value for different speeds */
-#define ICE_ITR_GRAN_MIN_200	1
-#define ICE_ITR_GRAN_MIN_100	1
-#define ICE_ITR_GRAN_MIN_50	2
-#define ICE_ITR_GRAN_MIN_25	4
+	struct ice_fw_log_cfg fw_log;
+
+/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
+ * register. Used for determining the ITR/intrl granularity during
+ * initialization.
+ */
+#define ICE_MAX_AGG_BW_200G	0x0
+#define ICE_MAX_AGG_BW_100G	0X1
+#define ICE_MAX_AGG_BW_50G	0x2
+#define ICE_MAX_AGG_BW_25G	0x3
+	/* ITR granularity for different speeds */
+#define ICE_ITR_GRAN_ABOVE_25	2
+#define ICE_ITR_GRAN_MAX_25	4
 	/* ITR granularity in 1 us */
-	u8 itr_gran_200;
-	u8 itr_gran_100;
-	u8 itr_gran_50;
-	u8 itr_gran_25;
+	u8 itr_gran;
+	/* INTRL granularity for different speeds */
+#define ICE_INTRL_GRAN_ABOVE_25	4
+#define ICE_INTRL_GRAN_MAX_25	8
+	/* INTRL granularity in 1 us */
+	u8 intrl_gran;
+
 	u8 ucast_shared;	/* true if VSIs can share unicast addr */
 
+	/* Active package version (currently active) */
+	struct ice_pkg_ver active_pkg_ver;
+	u8 active_pkg_name[ICE_PKG_NAME_SIZE];
+	u8 active_pkg_in_nvm;
+
+	enum ice_aq_err pkg_dwnld_status;
+
+	/* Driver's package ver - (from the Metadata seg) */
+	struct ice_pkg_ver pkg_ver;
+	u8 pkg_name[ICE_PKG_NAME_SIZE];
+
+	/* Driver's Ice package version (from the Ice seg) */
+	struct ice_pkg_ver ice_pkg_ver;
+	u8 ice_pkg_name[ICE_PKG_NAME_SIZE];
+
+	/* Pointer to the ice segment */
+	struct ice_seg *seg;
+
+	/* Pointer to allocated copy of pkg memory */
+	u8 *pkg_copy;
+	u32 pkg_size;
+
+	/* HW block tables */
+	struct ice_blk_info blk[ICE_BLK_COUNT];
 };
 
 /* Statistics collected by each port, VSI, VEB, and S-channel */
@@ -354,6 +529,11 @@
 	u64 link_xoff_rx;		/* lxoffrxc */
 	u64 link_xon_tx;		/* lxontxc */
 	u64 link_xoff_tx;		/* lxofftxc */
+	u64 priority_xon_rx[8];		/* pxonrxc[8] */
+	u64 priority_xoff_rx[8];	/* pxoffrxc[8] */
+	u64 priority_xon_tx[8];		/* pxontxc[8] */
+	u64 priority_xoff_tx[8];	/* pxofftxc[8] */
+	u64 priority_xon_2_xoff[8];	/* pxon2offc[8] */
 	u64 rx_size_64;			/* prc64 */
 	u64 rx_size_127;		/* prc127 */
 	u64 rx_size_255;		/* prc255 */
@@ -391,4 +571,7 @@
 #define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
 #define ICE_SR_WORDS_IN_1KB		512
 
+/* Hash redirection LUT for VSI - maximum array size */
+#define ICE_VSIQF_HLUT_ARRAY_SIZE	((VSIQF_HLUT_MAX_INDEX + 1) * 4)
+
 #endif /* _ICE_TYPE_H_ */

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
new file mode 100644
index 0000000..b45797f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c

@@ -0,0 +1,3275 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+
+/**
+ * ice_err_to_virt err - translate errors for VF return code
+ * @ice_err: error return code
+ */
+static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
+{
+	switch (ice_err) {
+	case ICE_SUCCESS:
+		return VIRTCHNL_STATUS_SUCCESS;
+	case ICE_ERR_BAD_PTR:
+	case ICE_ERR_INVAL_SIZE:
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+	case ICE_ERR_PARAM:
+	case ICE_ERR_CFG:
+		return VIRTCHNL_STATUS_ERR_PARAM;
+	case ICE_ERR_NO_MEMORY:
+		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
+	case ICE_ERR_NOT_READY:
+	case ICE_ERR_RESET_FAILED:
+	case ICE_ERR_FW_API_VER:
+	case ICE_ERR_AQ_ERROR:
+	case ICE_ERR_AQ_TIMEOUT:
+	case ICE_ERR_AQ_FULL:
+	case ICE_ERR_AQ_NO_WORK:
+	case ICE_ERR_AQ_EMPTY:
+		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+	default:
+		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+	}
+}
+
+/**
+ * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
+ * @pf: pointer to the PF structure
+ * @v_opcode: operation code
+ * @v_retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ */
+static void
+ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
+		    enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++, vf++) {
+		/* Not all vfs are enabled so skip the ones that are not */
+		if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+		    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		/* Ignore return value on purpose - a given VF may fail, but
+		 * we need to keep going and send to all of them
+		 */
+		ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
+				      msglen, NULL);
+	}
+}
+
+/**
+ * ice_set_pfe_link - Set the link speed/status of the virtchnl_pf_event
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
+ * @ice_link_speed: link speed specified by ICE_AQ_LINK_SPEED_*
+ * @link_up: whether or not to set the link up/down
+ */
+static void
+ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
+		 int ice_link_speed, bool link_up)
+{
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+		pfe->event_data.link_event_adv.link_status = link_up;
+		/* Speed in Mbps */
+		pfe->event_data.link_event_adv.link_speed =
+			ice_conv_link_speed_to_virtchnl(true, ice_link_speed);
+	} else {
+		pfe->event_data.link_event.link_status = link_up;
+		/* Legacy method for virtchnl link speeds */
+		pfe->event_data.link_event.link_speed =
+			(enum virtchnl_link_speed)
+			ice_conv_link_speed_to_virtchnl(false, ice_link_speed);
+	}
+}
+
+/**
+ * ice_set_pfe_link_forced - Force the virtchnl_pf_event link speed/status
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
+ * @link_up: whether or not to set the link up/down
+ */
+static void
+ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
+			bool link_up)
+{
+	u16 link_speed;
+
+	if (link_up)
+		link_speed = ICE_AQ_LINK_SPEED_100GB;
+	else
+		link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
+
+	ice_set_pfe_link(vf, pfe, link_speed, link_up);
+}
+
+/**
+ * ice_vc_notify_vf_link_state - Inform a VF of link status
+ * @vf: pointer to the VF structure
+ *
+ * send a link status message to a single VF
+ */
+static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
+{
+	struct virtchnl_pf_event pfe = { 0 };
+	struct ice_link_status *ls;
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	ls = &hw->port_info->phy.link_info;
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	/* Always report link is down if the VF queues aren't enabled */
+	if (!vf->num_qs_ena)
+		ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
+	else if (vf->link_forced)
+		ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
+	else
+		ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info &
+				 ICE_AQ_LINK_UP);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
+			      sizeof(pfe), NULL);
+}
+
+/**
+ * ice_free_vf_res - Free a VF's resources
+ * @vf: pointer to the VF info
+ */
+static void ice_free_vf_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	int i, last_vector_idx;
+
+	/* First, disable VF's configuration API to prevent OS from
+	 * accessing the VF's VSI after it's freed or invalidated.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	/* free VSI and disconnect it from the parent uplink */
+	if (vf->lan_vsi_idx) {
+		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
+		vf->lan_vsi_idx = 0;
+		vf->lan_vsi_num = 0;
+		vf->num_mac = 0;
+	}
+
+	last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1;
+	/* Disable interrupts so that VF starts in a known state */
+	for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+		wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
+		ice_flush(&pf->hw);
+	}
+	/* reset some of the state variables keeping track of the resources */
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_mappings
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int first, last, v;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
+
+	first = vf->first_vector_idx;
+	last = first + pf->num_vf_msix - 1;
+	for (v = first; v <= last; v++) {
+		u32 reg;
+
+		reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) &
+			GLINT_VECT2FUNC_IS_PF_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(&pf->pdev->dev,
+			"Scattered mode for VF Tx queues is not yet implemented\n");
+
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(&pf->pdev->dev,
+			"Scattered mode for VF Rx queues is not yet implemented\n");
+}
+
+/**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * If MSIX entries from the pf->irq_tracker were needed then we need to
+ * reset the irq_tracker->end and give back the entries we needed to
+ * num_avail_sw_msix.
+ *
+ * If no MSIX entries were taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+	struct ice_res_tracker *res;
+
+	if (!pf)
+		return -EINVAL;
+
+	res = pf->irq_tracker;
+	if (!res)
+		return -EINVAL;
+
+	/* give back irq_tracker resources used */
+	if (pf->sriov_base_vector < res->num_entries) {
+		res->end = res->num_entries;
+		pf->num_avail_sw_msix +=
+			res->num_entries - pf->sriov_base_vector;
+	}
+
+	pf->sriov_base_vector = 0;
+
+	return 0;
+}
+
+/**
+ * ice_set_vf_state_qs_dis - Set VF queues state to disabled
+ * @vf: pointer to the VF structure
+ */
+void ice_set_vf_state_qs_dis(struct ice_vf *vf)
+{
+	/* Clear Rx/Tx enabled queues flag */
+	bitmap_zero(vf->txq_ena, ICE_MAX_BASE_QS_PER_VF);
+	bitmap_zero(vf->rxq_ena, ICE_MAX_BASE_QS_PER_VF);
+	vf->num_qs_ena = 0;
+	clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_qs - Disable the VF queues
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_qs(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+	ice_vsi_stop_rx_rings(vsi);
+	ice_set_vf_state_qs_dis(vf);
+}
+
+/**
+ * ice_free_vfs - Free all VFs
+ * @pf: pointer to the PF structure
+ */
+void ice_free_vfs(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int tmp, i;
+
+	if (!pf->vf)
+		return;
+
+	while (test_and_set_bit(__ICE_VF_DIS, pf->state))
+		usleep_range(1000, 2000);
+
+	/* Avoid wait time by stopping all VFs at the same time */
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
+			ice_dis_vf_qs(&pf->vf[i]);
+
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
+	tmp = pf->num_alloc_vfs;
+	pf->num_vf_qps = 0;
+	pf->num_alloc_vfs = 0;
+	for (i = 0; i < tmp; i++) {
+		if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
+			/* disable VF qp mappings */
+			ice_dis_vf_mappings(&pf->vf[i]);
+			ice_free_vf_res(&pf->vf[i]);
+		}
+	}
+
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
+	devm_kfree(&pf->pdev->dev, pf->vf);
+	pf->vf = NULL;
+
+	/* This check is for when the driver is unloaded while VFs are
+	 * assigned. Setting the number of VFs to 0 through sysfs is caught
+	 * before this function ever gets called.
+	 */
+	if (!pci_vfs_assigned(pf->pdev)) {
+		int vf_id;
+
+		/* Acknowledge VFLR for all VFs. Without this, VFs will fail to
+		 * work correctly when SR-IOV gets re-enabled.
+		 */
+		for (vf_id = 0; vf_id < tmp; vf_id++) {
+			u32 reg_idx, bit_idx;
+
+			reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+			bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+			wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+		}
+	}
+	clear_bit(__ICE_VF_DIS, pf->state);
+	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+}
+
+/**
+ * ice_trigger_vf_reset - Reset a VF on HW
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ * @is_pfr: true if the reset was triggered due to a previous PFR
+ *
+ * Trigger hardware to start a reset for a particular VF. Expects the caller
+ * to wait the proper amount of time to allow hardware to reset the VF before
+ * it cleans up and restores VF functionality.
+ */
+static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
+{
+	struct ice_pf *pf = vf->pf;
+	u32 reg, reg_idx, bit_idx;
+	struct ice_hw *hw;
+	int vf_abs_id, i;
+
+	hw = &pf->hw;
+	vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	/* Inform VF that it is no longer active, as a warning */
+	clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+	/* Disable VF's configuration API during reset. The flag is re-enabled
+	 * in ice_alloc_vf_res(), when it's safe again to access VF's VSI.
+	 * It's normally disabled in ice_free_vf_res(), but it's safer
+	 * to do it earlier to give some time to finish to any VF config
+	 * functions that may still be running at this point.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	/* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it
+	 * in the case of VFR. If this is done for PFR, it can mess up VF
+	 * resets because the VF driver may already have started cleanup
+	 * by the time we get here.
+	 */
+	if (!is_pfr)
+		wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
+
+	/* In the case of a VFLR, the HW has already reset the VF and we
+	 * just need to clean up, so don't hit the VFRTRIG register.
+	 */
+	if (!is_vflr) {
+		/* reset VF using VPGEN_VFRTRIG reg */
+		reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+		reg |= VPGEN_VFRTRIG_VFSWR_M;
+		wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	}
+	/* clear the VFLR bit in GLGEN_VFLRSTAT */
+	reg_idx = (vf_abs_id) / 32;
+	bit_idx = (vf_abs_id) % 32;
+	wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+	ice_flush(hw);
+
+	wr32(hw, PF_PCI_CIAA,
+	     VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S));
+	for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
+		reg = rd32(hw, PF_PCI_CIAD);
+		/* no transactions pending so stop polling */
+		if ((reg & VF_TRANS_PENDING_M) == 0)
+			break;
+
+		dev_err(&pf->pdev->dev,
+			"VF %d PCI transactions stuck\n", vf->vf_id);
+		udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
+	}
+}
+
+/**
+ * ice_vsi_set_pvid_fill_ctxt - Set VSI ctxt for add PVID
+ * @ctxt: the VSI ctxt to fill
+ * @vid: the VLAN ID to set as a PVID
+ */
+static void ice_vsi_set_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt, u16 vid)
+{
+	ctxt->info.vlan_flags = (ICE_AQ_VSI_VLAN_MODE_UNTAGGED |
+				 ICE_AQ_VSI_PVLAN_INSERT_PVID |
+				 ICE_AQ_VSI_VLAN_EMOD_STR);
+	ctxt->info.pvid = cpu_to_le16(vid);
+	ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+						ICE_AQ_VSI_PROP_SW_VALID);
+}
+
+/**
+ * ice_vsi_kill_pvid_fill_ctxt - Set VSI ctx for remove PVID
+ * @ctxt: the VSI ctxt to fill
+ */
+static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt)
+{
+	ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+	ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
+	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+						ICE_AQ_VSI_PROP_SW_VALID);
+}
+
+/**
+ * ice_vsi_manage_pvid - Enable or disable port VLAN for VSI
+ * @vsi: the VSI to update
+ * @vid: the VLAN ID to set as a PVID
+ * @enable: true for enable PVID false for disable
+ */
+static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+	if (enable)
+		ice_vsi_set_pvid_fill_ctxt(ctxt, vid);
+	else
+		ice_vsi_kill_pvid_fill_ctxt(ctxt);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
+			 status, hw->adminq.sq_last_status);
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info = ctxt->info;
+out:
+	devm_kfree(dev, ctxt);
+	return ret;
+}
+
+/**
+ * ice_vf_vsi_setup - Set up a VF VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ * @vf_id: defines VF ID to which this VSI connects.
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_VF, vf_id);
+}
+
+/**
+ * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in PF space that is used by this VF.
+ * This index is used when accessing PF relative registers such as
+ * GLINT_VECT2FUNC and GLINT_DYN_CTL.
+ * This will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix;
+}
+
+/**
+ * ice_alloc_vsi_res - Setup VF VSI and its resources
+ * @vf: pointer to the VF structure
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_alloc_vsi_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	LIST_HEAD(tmp_add_list);
+	u8 broadcast[ETH_ALEN];
+	struct ice_vsi *vsi;
+	int status = 0;
+
+	/* first vector index is the VFs OICR index */
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+
+	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+	if (!vsi) {
+		dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
+		return -ENOMEM;
+	}
+
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	/* Check if port VLAN exist before, and restore it accordingly */
+	if (vf->port_vlan_id) {
+		ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true);
+		ice_vsi_add_vlan(vsi, vf->port_vlan_id & ICE_VLAN_M);
+	}
+
+	eth_broadcast_addr(broadcast);
+
+	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+	if (status)
+		goto ice_alloc_vsi_res_exit;
+
+	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
+		status = ice_add_mac_to_list(vsi, &tmp_add_list,
+					     vf->dflt_lan_addr.addr);
+		if (status)
+			goto ice_alloc_vsi_res_exit;
+	}
+
+	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	if (status)
+		dev_err(&pf->pdev->dev,
+			"could not add mac filters error %d\n", status);
+	else
+		vf->num_mac = 1;
+
+	/* Clear this bit after VF initialization since we shouldn't reclaim
+	 * and reassign interrupts for synchronous or asynchronous VFR events.
+	 * We don't want to reconfigure interrupts since AVF driver doesn't
+	 * expect vector assignment to be changed unless there is a request for
+	 * more vectors.
+	 */
+ice_alloc_vsi_res_exit:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return status;
+}
+
+/**
+ * ice_alloc_vf_res - Allocate VF resources
+ * @vf: pointer to the VF structure
+ */
+static int ice_alloc_vf_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	int tx_rx_queue_left;
+	int status;
+
+	/* Update number of VF queues, in case VF had requested for queue
+	 * changes
+	 */
+	tx_rx_queue_left = min_t(int, ice_get_avail_txq_count(pf),
+				 ice_get_avail_rxq_count(pf));
+	tx_rx_queue_left += ICE_DFLT_QS_PER_VF;
+	if (vf->num_req_qs && vf->num_req_qs <= tx_rx_queue_left &&
+	    vf->num_req_qs != vf->num_vf_qs)
+		vf->num_vf_qs = vf->num_req_qs;
+
+	/* setup VF VSI and necessary resources */
+	status = ice_alloc_vsi_res(vf);
+	if (status)
+		goto ice_alloc_vf_res_exit;
+
+	if (vf->trusted)
+		set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+	else
+		clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+
+	/* VF is now completely initialized */
+	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	return status;
+
+ice_alloc_vf_res_exit:
+	ice_free_vf_res(vf);
+	return status;
+}
+
+/**
+ * ice_ena_vf_mappings
+ * @vf: pointer to the VF structure
+ *
+ * Enable VF vectors and queues allocation by writing the details into
+ * respective registers.
+ */
+static void ice_ena_vf_mappings(struct ice_vf *vf)
+{
+	int abs_vf_id, abs_first, abs_last;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int first, last, v;
+	struct ice_hw *hw;
+	u32 reg;
+
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	first = vf->first_vector_idx;
+	last = (first + pf->num_vf_msix) - 1;
+	abs_first = first + pf->hw.func_caps.common_cap.msix_vector_first_id;
+	abs_last = (abs_first + pf->num_vf_msix) - 1;
+	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	/* VF Vector allocation */
+	reg = (((abs_first << VPINT_ALLOC_FIRST_S) & VPINT_ALLOC_FIRST_M) |
+	       ((abs_last << VPINT_ALLOC_LAST_S) & VPINT_ALLOC_LAST_M) |
+	       VPINT_ALLOC_VALID_M);
+	wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
+
+	reg = (((abs_first << VPINT_ALLOC_PCI_FIRST_S)
+		 & VPINT_ALLOC_PCI_FIRST_M) |
+	       ((abs_last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) |
+	       VPINT_ALLOC_PCI_VALID_M);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
+	/* map the interrupts to its functions */
+	for (v = first; v <= last; v++) {
+		reg = (((abs_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
+			GLINT_VECT2FUNC_VF_NUM_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	/* Map mailbox interrupt. We put an explicit 0 here to remind us that
+	 * VF admin queue interrupts will go to VF MSI-X vector 0.
+	 */
+	wr32(hw, VPINT_MBX_CTL(abs_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M | 0);
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
+	/* VF Tx queues allocation */
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Tx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
+			VPLAN_TX_QBASE_VFFIRSTQ_M) |
+		       (((vsi->alloc_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
+			VPLAN_TX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"Scattered mode for VF Tx queues is not yet implemented\n");
+	}
+
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
+	/* VF Rx queues allocation */
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Rx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
+			VPLAN_RX_QBASE_VFFIRSTQ_M) |
+		       (((vsi->alloc_txq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
+			VPLAN_RX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"Scattered mode for VF Rx queues is not yet implemented\n");
+	}
+}
+
+/**
+ * ice_determine_res
+ * @pf: pointer to the PF structure
+ * @avail_res: available resources in the PF structure
+ * @max_res: maximum resources that can be given per VF
+ * @min_res: minimum resources that can be given per VF
+ *
+ * Returns non-zero value if resources (queues/vectors) are available or
+ * returns zero if PF cannot accommodate for all num_alloc_vfs.
+ */
+static int
+ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
+{
+	bool checked_min_res = false;
+	int res;
+
+	/* start by checking if PF can assign max number of resources for
+	 * all num_alloc_vfs.
+	 * if yes, return number per VF
+	 * If no, divide by 2 and roundup, check again
+	 * repeat the loop till we reach a point where even minimum resources
+	 * are not available, in that case return 0
+	 */
+	res = max_res;
+	while ((res >= min_res) && !checked_min_res) {
+		int num_all_res;
+
+		num_all_res = pf->num_alloc_vfs * res;
+		if (num_all_res <= avail_res)
+			return res;
+
+		if (res == min_res)
+			checked_min_res = true;
+
+		res = DIV_ROUND_UP(res, 2);
+	}
+	return 0;
+}
+
+/**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf;
+
+	if (!vf || !q_vector)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* always add one to account for the OICR being the first MSIX */
+	return pf->sriov_base_vector + pf->num_vf_msix * vf->vf_id +
+		q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+	int i;
+
+	if (!res)
+		return -EINVAL;
+
+	for (i = res->num_entries - 1; i >= 0; i--)
+		if (res->list[i] & ICE_RES_VALID_BIT)
+			return i;
+
+	return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so in many cases the irq_tracker will not
+ * be needed. In these cases we just set the pf->sriov_base_vector and return
+ * success.
+ *
+ * If SR-IOV needs to use any pf->irq_tracker entries it updates the
+ * irq_tracker->end based on the first entry needed for SR-IOV. This makes it
+ * so any calls to ice_get_res() using the irq_tracker will not try to use
+ * resources at or beyond the newly set value.
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 pf_total_msix_vectors =
+		pf->hw.func_caps.common_cap.num_msix_vectors;
+	struct ice_res_tracker *res = pf->irq_tracker;
+	int sriov_base_vector;
+
+	if (max_valid_res_idx < 0)
+		return max_valid_res_idx;
+
+	sriov_base_vector = pf_total_msix_vectors - num_msix_needed;
+
+	/* make sure we only grab irq_tracker entries from the list end and
+	 * that we have enough available MSIX vectors
+	 */
+	if (sriov_base_vector <= max_valid_res_idx)
+		return -EINVAL;
+
+	pf->sriov_base_vector = sriov_base_vector;
+
+	/* dip into irq_tracker entries and update used resources */
+	if (num_msix_needed > (pf_total_msix_vectors - res->num_entries)) {
+		pf->num_avail_sw_msix -=
+			res->num_entries - pf->sriov_base_vector;
+		res->end = pf->sriov_base_vector;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_check_avail_res - check if vectors and queues are available
+ * @pf: pointer to the PF structure
+ *
+ * This function is where we calculate actual number of resources for VF VSIs,
+ * we don't reserve ahead of time during probe. Returns success if vectors and
+ * queues resources are available, otherwise returns error code
+ */
+static int ice_check_avail_res(struct ice_pf *pf)
+{
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 num_msix, num_txq, num_rxq, num_avail_msix;
+
+	if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
+		return -EINVAL;
+
+	/* add 1 to max_valid_res_idx to account for it being 0-based */
+	num_avail_msix = pf->hw.func_caps.common_cap.num_msix_vectors -
+		(max_valid_res_idx + 1);
+
+	/* Grab from HW interrupts common pool
+	 * Note: By the time the user decides it needs more vectors in a VF
+	 * its already too late since one must decide this prior to creating the
+	 * VF interface. So the best we can do is take a guess as to what the
+	 * user might want.
+	 *
+	 * We have two policies for vector allocation:
+	 * 1. if num_alloc_vfs is from 1 to 16, then we consider this as small
+	 * number of NFV VFs used for NFV appliances, since this is a special
+	 * case, we try to assign maximum vectors per VF (65) as much as
+	 * possible, based on determine_resources algorithm.
+	 * 2. if num_alloc_vfs is from 17 to 256, then its large number of
+	 * regular VFs which are not used for any special purpose. Hence try to
+	 * grab default interrupt vectors (5 as supported by AVF driver).
+	 */
+	if (pf->num_alloc_vfs <= 16) {
+		num_msix = ice_determine_res(pf, num_avail_msix,
+					     ICE_MAX_INTR_PER_VF,
+					     ICE_MIN_INTR_PER_VF);
+	} else if (pf->num_alloc_vfs <= ICE_MAX_VF_COUNT) {
+		num_msix = ice_determine_res(pf, num_avail_msix,
+					     ICE_DFLT_INTR_PER_VF,
+					     ICE_MIN_INTR_PER_VF);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"Number of VFs %d exceeds max VF count %d\n",
+			pf->num_alloc_vfs, ICE_MAX_VF_COUNT);
+		return -EIO;
+	}
+
+	if (!num_msix)
+		return -EIO;
+
+	/* Grab from the common pool
+	 * start by requesting Default queues (4 as supported by AVF driver),
+	 * Note that, the main difference between queues and vectors is, latter
+	 * can only be reserved at init time but queues can be requested by VF
+	 * at runtime through Virtchnl, that is the reason we start by reserving
+	 * few queues.
+	 */
+	num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf),
+				    ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF);
+
+	num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf),
+				    ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF);
+
+	if (!num_txq || !num_rxq)
+		return -EIO;
+
+	if (ice_sriov_set_msix_res(pf, num_msix * pf->num_alloc_vfs))
+		return -EINVAL;
+
+	/* since AVF driver works with only queue pairs which means, it expects
+	 * to have equal number of Rx and Tx queues, so take the minimum of
+	 * available Tx or Rx queues
+	 */
+	pf->num_vf_qps = min_t(int, num_txq, num_rxq);
+	pf->num_vf_msix = num_msix;
+
+	return 0;
+}
+
+/**
+ * ice_cleanup_and_realloc_vf - Clean up VF and reallocate resources after reset
+ * @vf: pointer to the VF structure
+ *
+ * Cleanup a VF after the hardware reset is finished. Expects the caller to
+ * have verified whether the reset is finished properly, and ensure the
+ * minimum amount of wait time has passed. Reallocate VF resources back to make
+ * VF state active
+ */
+static void ice_cleanup_and_realloc_vf(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
+	u32 reg;
+
+	hw = &pf->hw;
+
+	/* PF software completes the flow by notifying VF that reset flow is
+	 * completed. This is done by enabling hardware by clearing the reset
+	 * bit in the VPGEN_VFRTRIG reg and setting VFR_STATE in the VFGEN_RSTAT
+	 * register to VFR completed (done at the end of this function)
+	 * By doing this we allow HW to access VF memory at any point. If we
+	 * did it any sooner, HW could access memory while it was being freed
+	 * in ice_free_vf_res(), causing an IOMMU fault.
+	 *
+	 * On the other hand, this needs to be done ASAP, because the VF driver
+	 * is waiting for this to happen and may report a timeout. It's
+	 * harmless, but it gets logged into Guest OS kernel log, so best avoid
+	 * it.
+	 */
+	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+	wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+
+	/* reallocate VF resources to finish resetting the VSI state */
+	if (!ice_alloc_vf_res(vf)) {
+		ice_ena_vf_mappings(vf);
+		set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+		clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
+		vf->num_vlan = 0;
+	}
+
+	/* Tell the VF driver the reset is done. This needs to be done only
+	 * after VF has been fully initialized, because the VF driver may
+	 * request resources immediately after setting this flag.
+	 */
+	wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+}
+
+/**
+ * ice_vf_set_vsi_promisc - set given VF VSI to given promiscuous mode(s)
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ * @rm_promisc: promisc flag request from the VF to remove or add filter
+ *
+ * This function configures VF VSI promiscuous mode, based on the VF requests,
+ * for Unicast, Multicast and VLAN
+ */
+static enum ice_status
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
+		       bool rm_promisc)
+{
+	struct ice_pf *pf = vf->pf;
+	enum ice_status status = 0;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	if (vf->num_vlan) {
+		status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
+						  rm_promisc);
+	} else if (vf->port_vlan_id) {
+		if (rm_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       vf->port_vlan_id);
+		else
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     vf->port_vlan_id);
+	} else {
+		if (rm_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       0);
+		else
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     0);
+	}
+
+	return status;
+}
+
+/**
+ * ice_config_res_vfs - Finalize allocation of VFs resources in one go
+ * @pf: pointer to the PF structure
+ *
+ * This function is being called as last part of resetting all VFs, or when
+ * configuring VFs for the first time, where there is no resource to be freed
+ * Returns true if resources were properly allocated for all VFs, and false
+ * otherwise.
+ */
+static bool ice_config_res_vfs(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int v;
+
+	if (ice_check_avail_res(pf)) {
+		dev_err(&pf->pdev->dev,
+			"Cannot allocate VF resources, try with fewer number of VFs\n");
+		return false;
+	}
+
+	/* rearm global interrupts */
+	if (test_and_clear_bit(__ICE_OICR_INTR_DIS, pf->state))
+		ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	/* Finish resetting each VF and allocate resources */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		struct ice_vf *vf = &pf->vf[v];
+
+		vf->num_vf_qs = pf->num_vf_qps;
+		dev_dbg(&pf->pdev->dev,
+			"VF-id %d has %d queues configured\n",
+			vf->vf_id, vf->num_vf_qs);
+		ice_cleanup_and_realloc_vf(vf);
+	}
+
+	ice_flush(hw);
+	clear_bit(__ICE_VF_DIS, pf->state);
+
+	return true;
+}
+
+/**
+ * ice_reset_all_vfs - reset all allocated VFs in one go
+ * @pf: pointer to the PF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ *
+ * First, tell the hardware to reset each VF, then do all the waiting in one
+ * chunk, and finally finish restoring each VF after the wait. This is useful
+ * during PF routines which need to reset all VFs, as otherwise it must perform
+ * these resets in a serialized fashion.
+ *
+ * Returns true if any VFs were reset, and false otherwise.
+ */
+bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	int v, i;
+
+	/* If we don't have any VFs, then there is nothing to reset */
+	if (!pf->num_alloc_vfs)
+		return false;
+
+	/* If VFs have been disabled, there is no need to reset */
+	if (test_and_set_bit(__ICE_VF_DIS, pf->state))
+		return false;
+
+	/* Begin reset on all VFs at once */
+	for (v = 0; v < pf->num_alloc_vfs; v++)
+		ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
+
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		struct ice_vsi *vsi;
+
+		vf = &pf->vf[v];
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
+			ice_dis_vf_qs(vf);
+		ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+				NULL, ICE_VF_RESET, vf->vf_id, NULL);
+	}
+
+	/* HW requires some time to make sure it can flush the FIFO for a VF
+	 * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
+	 * sequence to make sure that it has completed. We'll keep track of
+	 * the VFs using a simple iterator that increments once that VF has
+	 * finished resetting.
+	 */
+	for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+
+		/* Check each VF in sequence */
+		while (v < pf->num_alloc_vfs) {
+			u32 reg;
+
+			vf = &pf->vf[v];
+			reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
+			if (!(reg & VPGEN_VFRSTAT_VFRD_M)) {
+				/* only delay if the check failed */
+				usleep_range(10, 20);
+				break;
+			}
+
+			/* If the current VF has finished resetting, move on
+			 * to the next VF in sequence.
+			 */
+			v++;
+		}
+	}
+
+	/* Display a warning if at least one VF didn't manage to reset in
+	 * time, but continue on with the operation.
+	 */
+	if (v < pf->num_alloc_vfs)
+		dev_warn(&pf->pdev->dev, "VF reset check timeout\n");
+
+	/* free VF resources to begin resetting the VSI state */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		vf = &pf->vf[v];
+
+		ice_free_vf_res(vf);
+
+		/* Free VF queues as well, and reallocate later.
+		 * If a given VF has different number of queues
+		 * configured, the request for update will come
+		 * via mailbox communication.
+		 */
+		vf->num_vf_qs = 0;
+	}
+
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
+	if (!ice_config_res_vfs(pf))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_reset_vf - Reset a particular VF
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ *
+ * Returns true if the VF is reset, false otherwise.
+ */
+static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct ice_hw *hw;
+	bool rsd = false;
+	u8 promisc_m;
+	u32 reg;
+	int i;
+
+	/* If the PF has been disabled, there is no need resetting VF until
+	 * PF is active again.
+	 */
+	if (test_bit(__ICE_VF_DIS, pf->state))
+		return false;
+
+	/* If the VF has been disabled, this means something else is
+	 * resetting the VF, so we shouldn't continue. Otherwise, set
+	 * disable VF state bit for actual reset, and continue.
+	 */
+	if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return false;
+
+	ice_trigger_vf_reset(vf, is_vflr, false);
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
+		ice_dis_vf_qs(vf);
+
+	/* Call Disable LAN Tx queue AQ whether or not queues are
+	 * enabled. This is needed for successful completion of VFR.
+	 */
+	ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+			NULL, ICE_VF_RESET, vf->vf_id, NULL);
+
+	hw = &pf->hw;
+	/* poll VPGEN_VFRSTAT reg to make sure
+	 * that reset is complete
+	 */
+	for (i = 0; i < 10; i++) {
+		/* VF reset requires driver to first reset the VF and then
+		 * poll the status register to make sure that the reset
+		 * completed successfully.
+		 */
+		reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
+		if (reg & VPGEN_VFRSTAT_VFRD_M) {
+			rsd = true;
+			break;
+		}
+
+		/* only sleep if the reset is not done */
+		usleep_range(10, 20);
+	}
+
+	/* Display a warning if VF didn't manage to reset in time, but need to
+	 * continue on with the operation.
+	 */
+	if (!rsd)
+		dev_warn(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+			 vf->vf_id);
+
+	/* disable promiscuous modes in case they were enabled
+	 * ignore any error if disabling process failed
+	 */
+	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
+		if (vf->port_vlan_id ||  vf->num_vlan)
+			promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+		else
+			promisc_m = ICE_UCAST_PROMISC_BITS;
+
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
+			dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n");
+	}
+
+	/* free VF resources to begin resetting the VSI state */
+	ice_free_vf_res(vf);
+
+	ice_cleanup_and_realloc_vf(vf);
+
+	ice_flush(hw);
+
+	return true;
+}
+
+/**
+ * ice_vc_notify_link_state - Inform all VFs on a PF of link status
+ * @pf: pointer to the PF structure
+ */
+void ice_vc_notify_link_state(struct ice_pf *pf)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		ice_vc_notify_vf_link_state(&pf->vf[i]);
+}
+
+/**
+ * ice_vc_notify_reset - Send pending reset message to all VFs
+ * @pf: pointer to the PF structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ */
+void ice_vc_notify_reset(struct ice_pf *pf)
+{
+	struct virtchnl_pf_event pfe;
+
+	if (!pf->num_alloc_vfs)
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS,
+			    (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+}
+
+/**
+ * ice_vc_notify_vf_reset - Notify VF of a reset event
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_notify_vf_reset(struct ice_vf *vf)
+{
+	struct virtchnl_pf_event pfe;
+
+	/* validate the request */
+	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+		return;
+
+	/* verify if the VF is in either init or active before proceeding */
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+	    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
+			      NULL);
+}
+
+/**
+ * ice_alloc_vfs - Allocate and set up VFs resources
+ * @pf: pointer to the PF structure
+ * @num_alloc_vfs: number of VFs to allocate
+ */
+static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vfs;
+	int i, ret;
+
+	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
+	set_bit(__ICE_OICR_INTR_DIS, pf->state);
+	ice_flush(hw);
+
+	ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
+	if (ret) {
+		pf->num_alloc_vfs = 0;
+		goto err_unroll_intr;
+	}
+	/* allocate memory */
+	vfs = devm_kcalloc(&pf->pdev->dev, num_alloc_vfs, sizeof(*vfs),
+			   GFP_KERNEL);
+	if (!vfs) {
+		ret = -ENOMEM;
+		goto err_pci_disable_sriov;
+	}
+	pf->vf = vfs;
+
+	/* apply default profile */
+	for (i = 0; i < num_alloc_vfs; i++) {
+		vfs[i].pf = pf;
+		vfs[i].vf_sw_id = pf->first_sw;
+		vfs[i].vf_id = i;
+
+		/* assign default capabilities */
+		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
+		vfs[i].spoofchk = true;
+	}
+	pf->num_alloc_vfs = num_alloc_vfs;
+
+	/* VF resources get allocated with initialization */
+	if (!ice_config_res_vfs(pf)) {
+		ret = -EIO;
+		goto err_unroll_sriov;
+	}
+
+	return ret;
+
+err_unroll_sriov:
+	pf->vf = NULL;
+	devm_kfree(&pf->pdev->dev, vfs);
+	vfs = NULL;
+	pf->num_alloc_vfs = 0;
+err_pci_disable_sriov:
+	pci_disable_sriov(pf->pdev);
+err_unroll_intr:
+	/* rearm interrupts here */
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+	clear_bit(__ICE_OICR_INTR_DIS, pf->state);
+	return ret;
+}
+
+/**
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
+ *
+ * Check the PF's state for a collection of bits that would indicate
+ * the PF is in a state that would inhibit normal operation for
+ * driver functionality.
+ *
+ * Returns true if PF is in a nominal state.
+ * Returns false otherwise
+ */
+static bool ice_pf_state_is_nominal(struct ice_pf *pf)
+{
+	DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
+
+	if (!pf)
+		return false;
+
+	bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
+	if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_pci_sriov_ena - Enable or change number of VFs
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to allocate
+ */
+static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
+{
+	int pre_existing_vfs = pci_num_vf(pf->pdev);
+	struct device *dev = &pf->pdev->dev;
+	int err;
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
+		return -EBUSY;
+	}
+
+	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
+		dev_err(dev, "This device is not capable of SR-IOV\n");
+		return -ENODEV;
+	}
+
+	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+		ice_free_vfs(pf);
+	else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+		return num_vfs;
+
+	if (num_vfs > pf->num_vfs_supported) {
+		dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
+			num_vfs, pf->num_vfs_supported);
+		return -ENOTSUPP;
+	}
+
+	dev_info(dev, "Allocating %d VFs\n", num_vfs);
+	err = ice_alloc_vfs(pf, num_vfs);
+	if (err) {
+		dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+		return err;
+	}
+
+	set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+	return num_vfs;
+}
+
+/**
+ * ice_sriov_configure - Enable or change number of VFs via sysfs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * This function is called when the user updates the number of VFs in sysfs.
+ */
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (ice_is_safe_mode(pf)) {
+		dev_err(&pf->pdev->dev,
+			"SR-IOV cannot be configured - Device is in Safe Mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (num_vfs)
+		return ice_pci_sriov_ena(pf, num_vfs);
+
+	if (!pci_vfs_assigned(pdev)) {
+		ice_free_vfs(pf);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"can't free VFs because some are assigned to VMs.\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_process_vflr_event - Free VF resources via IRQ calls
+ * @pf: pointer to the PF structure
+ *
+ * called from the VFLR IRQ handler to
+ * free up VF resources and state variables
+ */
+void ice_process_vflr_event(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int vf_id;
+	u32 reg;
+
+	if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    !pf->num_alloc_vfs)
+		return;
+
+	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+		struct ice_vf *vf = &pf->vf[vf_id];
+		u32 reg_idx, bit_idx;
+
+		reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+		bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+		/* read GLGEN_VFLRSTAT register to find out the flr VFs */
+		reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+		if (reg & BIT(bit_idx))
+			/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
+			ice_reset_vf(vf, true);
+	}
+}
+
+/**
+ * ice_vc_dis_vf - Disable a given VF via SW reset
+ * @vf: pointer to the VF info
+ *
+ * Disable the VF through a SW reset
+ */
+static void ice_vc_dis_vf(struct ice_vf *vf)
+{
+	ice_vc_notify_vf_reset(vf);
+	ice_reset_vf(vf, false);
+}
+
+/**
+ * ice_vc_send_msg_to_vf - Send message to VF
+ * @vf: pointer to the VF info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to VF
+ */
+static int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	enum ice_status aq_ret;
+	struct ice_pf *pf;
+
+	/* validate the request */
+	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* single place to detect unsuccessful return values */
+	if (v_retval) {
+		vf->num_inval_msgs++;
+		dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
+			 vf->vf_id, v_opcode, v_retval);
+		if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
+			dev_err(&pf->pdev->dev,
+				"Number of invalid messages exceeded for VF %d\n",
+				vf->vf_id);
+			dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
+			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+			return -EIO;
+		}
+	} else {
+		vf->num_valid_msgs++;
+		/* reset the invalid counter, if a valid message is received. */
+		vf->num_inval_msgs = 0;
+	}
+
+	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
+				       msg, msglen, NULL);
+	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
+		dev_info(&pf->pdev->dev,
+			 "Unable to send the message to VF %d ret %d aq_err %d\n",
+			 vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_get_ver_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request the API version used by the PF
+ */
+static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_version_info info = {
+		VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
+	};
+
+	vf->vf_ver = *(struct virtchnl_version_info *)msg;
+	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
+	if (VF_IS_V10(&vf->vf_ver))
+		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
+				     VIRTCHNL_STATUS_SUCCESS, (u8 *)&info,
+				     sizeof(struct virtchnl_version_info));
+}
+
+/**
+ * ice_vc_get_vf_res_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request its resources
+ */
+static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_resource *vfres = NULL;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int len = 0;
+	int ret;
+
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	len = sizeof(struct virtchnl_vf_resource);
+
+	vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL);
+	if (!vfres) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		len = 0;
+		goto err;
+	}
+	if (VF_IS_V11(&vf->vf_ver))
+		vf->driver_caps = *(u32 *)msg;
+	else
+		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
+				  VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vsi->info.pvid)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+	} else {
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+		else
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+	vfres->num_vsis = 1;
+	/* Tx and Rx queue are equal for VF */
+	vfres->num_queue_pairs = vsi->num_txq;
+	vfres->max_vectors = pf->num_vf_msix;
+	vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
+	vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+
+	vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
+	vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
+	vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
+	ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+			vf->dflt_lan_addr.addr);
+
+	set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+err:
+	/* send the response back to the VF */
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
+				    (u8 *)vfres, len);
+
+	devm_kfree(&pf->pdev->dev, vfres);
+	return ret;
+}
+
+/**
+ * ice_vc_reset_vf_msg
+ * @vf: pointer to the VF info
+ *
+ * called from the VF to reset itself,
+ * unlike other virtchnl messages, PF driver
+ * doesn't send the response back to the VF
+ */
+static void ice_vc_reset_vf_msg(struct ice_vf *vf)
+{
+	if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		ice_reset_vf(vf, false);
+}
+
+/**
+ * ice_find_vsi_from_id
+ * @pf: the PF structure to search for the VSI
+ * @id: ID of the VSI it is searching for
+ *
+ * searches for the VSI with the given ID
+ */
+static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
+{
+	int i;
+
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->vsi_num == id)
+			return pf->vsi[i];
+
+	return NULL;
+}
+
+/**
+ * ice_vc_isvalid_vsi_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VF relative VSI ID
+ *
+ * check for the valid VSI ID
+ */
+static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_find_vsi_from_id(pf, vsi_id);
+
+	return (vsi && (vsi->vf_id == vf->vf_id));
+}
+
+/**
+ * ice_vc_isvalid_q_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI ID
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
+{
+	struct ice_vsi *vsi = ice_find_vsi_from_id(vf->pf, vsi_id);
+	/* allocated Tx and Rx queues should be always equal for VF VSI */
+	return (vsi && (qid < vsi->alloc_txq));
+}
+
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+	return ring_len == 0 ||
+	       (ring_len >= ICE_MIN_NUM_DESC &&
+		ring_len <= ICE_MAX_NUM_DESC &&
+		!(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
+/**
+ * ice_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ */
+static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_rss_key *vrk =
+		(struct virtchnl_rss_key *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi = NULL;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss(vsi, vrk->key, NULL, 0))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ */
+static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi = NULL;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_get_stats_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to get VSI stats
+ */
+static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+		(struct virtchnl_queue_select *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_eth_stats stats;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	memset(&stats, 0, sizeof(struct ice_eth_stats));
+	ice_update_eth_stats(vsi);
+
+	stats = vsi->eth_stats;
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret,
+				     (u8 *)&stats, sizeof(stats));
+}
+
+/**
+ * ice_vc_ena_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ */
+static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!vqs->rx_queues && !vqs->tx_queues) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vqs->rx_queues > ICE_MAX_BASE_QS_PER_VF ||
+	    vqs->tx_queues > ICE_MAX_BASE_QS_PER_VF) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	q_map = vqs->rx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->rxq_ena))
+			continue;
+
+		if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) {
+			dev_err(&vsi->back->pdev->dev,
+				"Failed to enable Rx ring %d on VSI %d\n",
+				vf_q_id, vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		set_bit(vf_q_id, vf->rxq_ena);
+		vf->num_qs_ena++;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	q_map = vqs->tx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->txq_ena))
+			continue;
+
+		set_bit(vf_q_id, vf->txq_ena);
+		vf->num_qs_ena++;
+	}
+
+	/* Set flag to indicate that queues are enabled */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS)
+		set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_dis_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific
+ * queue(s)
+ */
+static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
+	    !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!vqs->rx_queues && !vqs->tx_queues) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vqs->rx_queues > ICE_MAX_BASE_QS_PER_VF ||
+	    vqs->tx_queues > ICE_MAX_BASE_QS_PER_VF) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vqs->tx_queues) {
+		q_map = vqs->tx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+			struct ice_ring *ring = vsi->tx_rings[vf_q_id];
+			struct ice_txq_meta txq_meta = { 0 };
+
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->txq_ena))
+				continue;
+
+			ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+			if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
+						 ring, &txq_meta)) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed to stop Tx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->txq_ena);
+			vf->num_qs_ena--;
+		}
+	}
+
+	if (vqs->rx_queues) {
+		q_map = vqs->rx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->rxq_ena))
+				continue;
+
+			if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed to stop Rx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->rxq_ena);
+			vf->num_qs_ena--;
+		}
+	}
+
+	/* Clear enabled queues flag */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS && !vf->num_qs_ena)
+		clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the IRQ to queue map
+ */
+static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_irq_map_info *irqmap_info;
+	u16 vsi_id, vsi_q_id, vector_id;
+	struct virtchnl_vector_map *map;
+	struct ice_pf *pf = vf->pf;
+	u16 num_q_vectors_mapped;
+	struct ice_vsi *vsi;
+	unsigned long qmap;
+	int i;
+
+	irqmap_info = (struct virtchnl_irq_map_info *)msg;
+	num_q_vectors_mapped = irqmap_info->num_vectors;
+
+	/* Check to make sure number of VF vectors mapped is not greater than
+	 * number of VF vectors originally allocated, and check that
+	 * there is actually at least a single VF queue vector mapped
+	 */
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    pf->num_vf_msix < num_q_vectors_mapped ||
+	    !irqmap_info->num_vectors) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < num_q_vectors_mapped; i++) {
+		struct ice_q_vector *q_vector;
+
+		map = &irqmap_info->vecmap[i];
+
+		vector_id = map->vector_id;
+		vsi_id = map->vsi_id;
+		/* validate msg params */
+		if (!(vector_id < pf->hw.func_caps.common_cap
+		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		    (!vector_id && (map->rxq_map || map->txq_map))) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* No need to map VF miscellaneous or rogue vector */
+		if (!vector_id)
+			continue;
+
+		/* Subtract non queue vector from vector_id passed by VF
+		 * to get actual number of VSI queue vector array index
+		 */
+		q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+		if (!q_vector) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* lookout for the invalid queue index */
+		qmap = map->rxq_map;
+		q_vector->num_ring_rx = 0;
+		for_each_set_bit(vsi_q_id, &qmap, ICE_MAX_BASE_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vsi_id, vsi_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+			q_vector->num_ring_rx++;
+			q_vector->rx.itr_idx = map->rxitr_idx;
+			vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->rx.itr_idx);
+		}
+
+		qmap = map->txq_map;
+		q_vector->num_ring_tx = 0;
+		for_each_set_bit(vsi_q_id, &qmap, ICE_MAX_BASE_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vsi_id, vsi_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+			q_vector->num_ring_tx++;
+			q_vector->tx.itr_idx = map->txitr_idx;
+			vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->tx.itr_idx);
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the Rx/Tx queues
+ */
+static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vsi_queue_config_info *qci =
+	    (struct virtchnl_vsi_queue_config_info *)msg;
+	struct virtchnl_queue_pair_info *qpi;
+	u16 num_rxq = 0, num_txq = 0;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int i;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF ||
+	    qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+		dev_err(&pf->pdev->dev,
+			"VF-%d requesting more than supported number of queues: %d\n",
+			vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < qci->num_queue_pairs; i++) {
+		qpi = &qci->qpair[i];
+		if (qpi->txq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.queue_id != qpi->txq.queue_id ||
+		    qpi->txq.headwb_enabled ||
+		    !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+		    !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+		    !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+		/* copy Tx queue info from VF into VSI */
+		if (qpi->txq.ring_len > 0) {
+			num_txq++;
+			vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
+			vsi->tx_rings[i]->count = qpi->txq.ring_len;
+		}
+
+		/* copy Rx queue info from VF into VSI */
+		if (qpi->rxq.ring_len > 0) {
+			num_rxq++;
+			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
+			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+
+			if (qpi->rxq.databuffer_size != 0 &&
+			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+			     qpi->rxq.databuffer_size < 1024)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+			vsi->rx_buf_len = qpi->rxq.databuffer_size;
+			vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
+			if (qpi->rxq.max_pkt_size >= (16 * 1024) ||
+			    qpi->rxq.max_pkt_size < 64) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+		}
+
+		vsi->max_frame = qpi->rxq.max_pkt_size;
+	}
+
+	/* VF can request to configure less than allocated queues
+	 * or default allocated queues. So update the VSI with new number
+	 */
+	vsi->num_txq = num_txq;
+	vsi->num_rxq = num_rxq;
+	/* All queues of VF VSI are in TC 0 */
+	vsi->tc_cfg.tc_info[0].qcount_tx = num_txq;
+	vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq;
+
+	if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_is_vf_trusted
+ * @vf: pointer to the VF info
+ */
+static bool ice_is_vf_trusted(struct ice_vf *vf)
+{
+	return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool ice_can_vf_change_mac(struct ice_vf *vf)
+{
+	/* If the VF MAC address has been set administratively (via the
+	 * ndo_set_vf_mac command), then deny permission to the VF to
+	 * add/delete unicast MAC addresses, unless the VF is trusted
+	 */
+	if (vf->pf_set_mac && !ice_is_vf_trusted(vf))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_handle_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @set: true if MAC filters are being set, false otherwise
+ *
+ * add guest MAC address filter
+ */
+static int
+ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	enum virtchnl_ops vc_op;
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	int mac_count = 0;
+	int i;
+
+	if (set)
+		vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
+	else
+		vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	if (set && !ice_is_vf_trusted(vf) &&
+	    (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
+			vf->vf_id);
+		/* There is no need to let VF know about not being trusted
+		 * to add more MAC addr, so we can just return success message.
+		 */
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *maddr = al->list[i].addr;
+
+		if (ether_addr_equal(maddr, vf->dflt_lan_addr.addr) ||
+		    is_broadcast_ether_addr(maddr)) {
+			if (set) {
+				/* VF is trying to add filters that the PF
+				 * already added. Just continue.
+				 */
+				dev_info(&pf->pdev->dev,
+					 "MAC %pM already set for VF %d\n",
+					 maddr, vf->vf_id);
+				continue;
+			} else {
+				/* VF can't remove dflt_lan_addr/bcast MAC */
+				dev_err(&pf->pdev->dev,
+					"VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
+					maddr, vf->vf_id);
+				continue;
+			}
+		}
+
+		/* check for the invalid cases and bail if necessary */
+		if (is_zero_ether_addr(maddr)) {
+			dev_err(&pf->pdev->dev,
+				"invalid MAC %pM provided for VF %d\n",
+				maddr, vf->vf_id);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto handle_mac_exit;
+		}
+
+		if (is_unicast_ether_addr(maddr) &&
+		    !ice_can_vf_change_mac(vf)) {
+			dev_err(&pf->pdev->dev,
+				"can't change unicast MAC for untrusted VF %d\n",
+				vf->vf_id);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto handle_mac_exit;
+		}
+
+		/* program the updated filter list */
+		status = ice_vsi_cfg_mac_fltr(vsi, maddr, set);
+		if (status == ICE_ERR_DOES_NOT_EXIST ||
+		    status == ICE_ERR_ALREADY_EXISTS) {
+			dev_info(&pf->pdev->dev,
+				 "can't %s MAC filters %pM for VF %d, error %d\n",
+				 set ? "add" : "remove", maddr, vf->vf_id,
+				 status);
+		} else if (status) {
+			dev_err(&pf->pdev->dev,
+				"can't %s MAC filters for VF %d, error %d\n",
+				set ? "add" : "remove", vf->vf_id, status);
+			v_ret = ice_err_to_virt_err(status);
+			goto handle_mac_exit;
+		}
+
+		mac_count++;
+	}
+
+	/* Track number of MAC filters programmed for the VF VSI */
+	if (set)
+		vf->num_mac += mac_count;
+	else
+		vf->num_mac -= mac_count;
+
+handle_mac_exit:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_add_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * add guest MAC address filter
+ */
+static int ice_vc_add_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_del_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove guest MAC address filter
+ */
+static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_request_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queue pairs via virtchnl message response to VF.
+ */
+static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_res_request *vfres =
+		(struct virtchnl_vf_res_request *)msg;
+	u16 req_queues = vfres->num_queue_pairs;
+	struct ice_pf *pf = vf->pf;
+	u16 max_allowed_vf_queues;
+	u16 tx_rx_queue_left;
+	u16 cur_queues;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	cur_queues = vf->num_vf_qs;
+	tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+				 ice_get_avail_rxq_count(pf));
+	max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
+	if (!req_queues) {
+		dev_err(&pf->pdev->dev,
+			"VF %d tried to request 0 queues. Ignoring.\n",
+			vf->vf_id);
+	} else if (req_queues > ICE_MAX_BASE_QS_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"VF %d tried to request more than %d queues.\n",
+			vf->vf_id, ICE_MAX_BASE_QS_PER_VF);
+		vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF;
+	} else if (req_queues > cur_queues &&
+		   req_queues - cur_queues > tx_rx_queue_left) {
+		dev_warn(&pf->pdev->dev,
+			 "VF %d requested %u more queues, but only %u left.\n",
+			 vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
+		vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
+					       ICE_MAX_BASE_QS_PER_VF);
+	} else {
+		/* request is successful, then reset VF */
+		vf->num_req_qs = req_queues;
+		ice_vc_dis_vf(vf);
+		dev_info(&pf->pdev->dev,
+			 "VF %d granted request of %u queues.\n",
+			 vf->vf_id, req_queues);
+		return 0;
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
+				     v_ret, (u8 *)vfres, sizeof(*vfres));
+}
+
+/**
+ * ice_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @vlan_id: VLAN ID being set
+ * @qos: priority setting
+ * @vlan_proto: VLAN protocol
+ *
+ * program VF Port VLAN ID and/or QoS
+ */
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto)
+{
+	u16 vlanprio = vlan_id | (qos << ICE_VLAN_PRIORITY_S);
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+	int ret = 0;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	if (vlan_id > ICE_MAX_VLANID || qos > 7) {
+		dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
+		return -EINVAL;
+	}
+
+	if (vlan_proto != htons(ETH_P_8021Q)) {
+		dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+		return -EPROTONOSUPPORT;
+	}
+
+	vf = &pf->vf[vf_id];
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	if (le16_to_cpu(vsi->info.pvid) == vlanprio) {
+		/* duplicate request, so just return success */
+		dev_info(&pf->pdev->dev,
+			 "Duplicate pvid %d request\n", vlanprio);
+		return ret;
+	}
+
+	/* If PVID, then remove all filters on the old VLAN */
+	if (vsi->info.pvid)
+		ice_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
+				  VLAN_VID_MASK));
+
+	if (vlan_id || qos) {
+		ret = ice_vsi_manage_pvid(vsi, vlanprio, true);
+		if (ret)
+			goto error_set_pvid;
+	} else {
+		ice_vsi_manage_pvid(vsi, 0, false);
+		vsi->info.pvid = 0;
+	}
+
+	if (vlan_id) {
+		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+			 vlan_id, qos, vf_id);
+
+		/* add new VLAN filter for each MAC */
+		ret = ice_vsi_add_vlan(vsi, vlan_id);
+		if (ret)
+			goto error_set_pvid;
+	}
+
+	/* The Port VLAN needs to be saved across resets the same as the
+	 * default LAN MAC address.
+	 */
+	vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
+
+error_set_pvid:
+	return ret;
+}
+
+/**
+ * ice_vc_process_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @add_v: Add VLAN if true, otherwise delete VLAN
+ *
+ * Process virtchnl op to add or remove programmed guest VLAN ID
+ */
+static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	bool vlan_promisc = false;
+	struct ice_vsi *vsi;
+	struct ice_hw *hw;
+	int status = 0;
+	u8 promisc_m;
+	int i;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (add_v && !ice_is_vf_trusted(vf) &&
+	    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+		dev_info(&pf->pdev->dev,
+			 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+			 vf->vf_id);
+		/* There is no need to let VF know about being not trusted,
+		 * so we can just return success message here
+		 */
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			dev_err(&pf->pdev->dev,
+				"invalid VF VLAN id %d\n", vfl->vlan_id[i]);
+			goto error_param;
+		}
+	}
+
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vsi->info.pvid) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_vsi_manage_vlan_stripping(vsi, add_v)) {
+		dev_err(&pf->pdev->dev,
+			"%sable VLAN stripping failed for VSI %i\n",
+			 add_v ? "en" : "dis", vsi->vsi_num);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+		vlan_promisc = true;
+
+	if (add_v) {
+		for (i = 0; i < vfl->num_elements; i++) {
+			u16 vid = vfl->vlan_id[i];
+
+			if (!ice_is_vf_trusted(vf) &&
+			    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+				dev_info(&pf->pdev->dev,
+					 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+					 vf->vf_id);
+				/* There is no need to let VF know about being
+				 * not trusted, so we can just return success
+				 * message here as well.
+				 */
+				goto error_param;
+			}
+
+			if (ice_vsi_add_vlan(vsi, vid)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			vf->num_vlan++;
+			/* Enable VLAN pruning when VLAN is added */
+			if (!vlan_promisc) {
+				status = ice_cfg_vlan_pruning(vsi, true, false);
+				if (status) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(&pf->pdev->dev,
+						"Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
+						vid, status);
+					goto error_param;
+				}
+			} else {
+				/* Enable Ucast/Mcast VLAN promiscuous mode */
+				promisc_m = ICE_PROMISC_VLAN_TX |
+					    ICE_PROMISC_VLAN_RX;
+
+				status = ice_set_vsi_promisc(hw, vsi->idx,
+							     promisc_m, vid);
+				if (status) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(&pf->pdev->dev,
+						"Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
+						vid, status);
+				}
+			}
+		}
+	} else {
+		/* In case of non_trusted VF, number of VLAN elements passed
+		 * to PF for removal might be greater than number of VLANs
+		 * filter programmed for that VF - So, use actual number of
+		 * VLANS added earlier with add VLAN opcode. In order to avoid
+		 * removing VLAN that doesn't exist, which result to sending
+		 * erroneous failed message back to the VF
+		 */
+		int num_vf_vlan;
+
+		num_vf_vlan = vf->num_vlan;
+		for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
+			u16 vid = vfl->vlan_id[i];
+
+			/* Make sure ice_vsi_kill_vlan is successful before
+			 * updating VLAN information
+			 */
+			if (ice_vsi_kill_vlan(vsi, vid)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			vf->num_vlan--;
+			/* Disable VLAN pruning when the last VLAN is removed */
+			if (!vf->num_vlan)
+				ice_cfg_vlan_pruning(vsi, false, false);
+
+			/* Disable Unicast/Multicast VLAN promiscuous mode */
+			if (vlan_promisc) {
+				promisc_m = ICE_PROMISC_VLAN_TX |
+					    ICE_PROMISC_VLAN_RX;
+
+				ice_clear_vsi_promisc(hw, vsi->idx,
+						      promisc_m, vid);
+			}
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	if (add_v)
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret,
+					     NULL, 0);
+	else
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret,
+					     NULL, 0);
+}
+
+/**
+ * ice_vc_add_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Add and program guest VLAN ID
+ */
+static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_remove_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove programmed guest VLAN ID
+ */
+static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_ena_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Enable VLAN header stripping for a given VF
+ */
+static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (ice_vsi_manage_vlan_stripping(vsi, true))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Disable VLAN header stripping for a given VF
+ */
+static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_vsi_manage_vlan_stripping(vsi, false))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_process_vf_msg - Process request from VF
+ * @pf: pointer to the PF structure
+ * @event: pointer to the AQ event
+ *
+ * called from the common asq/arq handler to
+ * process request from VF
+ */
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
+	s16 vf_id = le16_to_cpu(event->desc.retval);
+	u16 msglen = event->msg_len;
+	u8 *msg = event->msg_buf;
+	struct ice_vf *vf = NULL;
+	int err = 0;
+
+	if (vf_id >= pf->num_alloc_vfs) {
+		err = -EINVAL;
+		goto error_handler;
+	}
+
+	vf = &pf->vf[vf_id];
+
+	/* Check if VF is disabled. */
+	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
+		err = -EPERM;
+		goto error_handler;
+	}
+
+	/* Perform basic checks on the msg */
+	err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+	if (err) {
+		if (err == VIRTCHNL_STATUS_ERR_PARAM)
+			err = -EPERM;
+		else
+			err = -EINVAL;
+	}
+
+error_handler:
+	if (err) {
+		ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
+				      NULL, 0);
+		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+			vf_id, v_opcode, msglen, err);
+		return;
+	}
+
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		err = ice_vc_get_ver_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		err = ice_vc_get_vf_res_msg(vf, msg);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		ice_vc_reset_vf_msg(vf);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		err = ice_vc_add_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		err = ice_vc_del_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		err = ice_vc_cfg_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		err = ice_vc_ena_qs_msg(vf, msg);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		err = ice_vc_dis_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES:
+		err = ice_vc_request_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		err = ice_vc_cfg_irq_map_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		err = ice_vc_config_rss_key(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		err = ice_vc_config_rss_lut(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		err = ice_vc_get_stats_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+		err = ice_vc_add_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN:
+		err = ice_vc_remove_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		err = ice_vc_ena_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		err = ice_vc_dis_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_UNKNOWN:
+	default:
+		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
+			v_opcode, vf_id);
+		err = ice_vc_send_msg_to_vf(vf, v_opcode,
+					    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+					    NULL, 0);
+		break;
+	}
+	if (err) {
+		/* Helper function cares less about error return values here
+		 * as it is busy with pending work.
+		 */
+		dev_info(&pf->pdev->dev,
+			 "PF failed to honor VF %d, opcode %d, error %d\n",
+			 vf_id, v_opcode, err);
+	}
+}
+
+/**
+ * ice_get_vf_cfg
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ivi: VF configuration structure
+ *
+ * return VF configuration
+ */
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	ivi->vf = vf_id;
+	ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
+
+	/* VF configuration for VLAN and applicable QoS */
+	ivi->vlan = le16_to_cpu(vsi->info.pvid) & ICE_VLAN_M;
+	ivi->qos = (le16_to_cpu(vsi->info.pvid) & ICE_PRIORITY_M) >>
+		    ICE_VLAN_PRIORITY_S;
+
+	ivi->trusted = vf->trusted;
+	ivi->spoofchk = vf->spoofchk;
+	if (!vf->link_forced)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vf->link_up)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
+	return 0;
+}
+
+/**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx *ctx;
+	enum ice_status status;
+	struct ice_vf *vf;
+	int ret = 0;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	if (ena == vf->spoofchk) {
+		dev_dbg(&pf->pdev->dev, "VF spoofchk already %s\n",
+			ena ? "ON" : "OFF");
+		return 0;
+	}
+
+	ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+	if (ena) {
+		ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+		ctx->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M;
+	}
+
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+	if (status) {
+		dev_dbg(&pf->pdev->dev,
+			"Error %d, failed to update VSI* parameters\n", status);
+		ret = -EIO;
+		goto out;
+	}
+
+	vf->spoofchk = ena;
+	vsi->info.sec_flags = ctx->info.sec_flags;
+	vsi->info.sw_flags2 = ctx->info.sw_flags2;
+out:
+	devm_kfree(&pf->pdev->dev, ctx);
+	return ret;
+}
+
+/**
+ * ice_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: MAC address
+ *
+ * program VF MAC address
+ */
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf;
+	int ret = 0;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
+		netdev_err(netdev, "%pM not a valid unicast address\n", mac);
+		return -EINVAL;
+	}
+
+	/* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
+	 * flow will use the updated dflt_lan_addr and add a MAC filter
+	 * using ice_add_mac. Also set pf_set_mac to indicate that the PF has
+	 * set the MAC address for this VF.
+	 */
+	ether_addr_copy(vf->dflt_lan_addr.addr, mac);
+	vf->pf_set_mac = true;
+	netdev_info(netdev,
+		    "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
+		    vf_id, mac);
+
+	ice_vc_dis_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_set_vf_trust
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @trusted: Boolean value to enable/disable trusted VF
+ *
+ * Enable or disable a given VF as trusted
+ */
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf;
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	/* Check if already trusted */
+	if (trusted == vf->trusted)
+		return 0;
+
+	vf->trusted = trusted;
+	ice_vc_dis_vf(vf);
+	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+		 vf_id, trusted ? "" : "un");
+
+	return 0;
+}
+
+/**
+ * ice_set_vf_link_state
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @link_state: required link state
+ *
+ * Set VF's link state, irrespective of physical link state status
+ */
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct virtchnl_pf_event pfe = { 0 };
+	struct ice_link_status *ls;
+	struct ice_vf *vf;
+	struct ice_hw *hw;
+
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	hw = &pf->hw;
+	ls = &pf->hw.port_info->phy.link_info;
+
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "vf %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	switch (link_state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		vf->link_forced = false;
+		vf->link_up = ls->link_info & ICE_AQ_LINK_UP;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		vf->link_forced = true;
+		vf->link_up = true;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		vf->link_forced = true;
+		vf->link_up = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (vf->link_forced)
+		ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
+	else
+		ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up);
+
+	/* Notify the VF of its new link state */
+	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
+			      sizeof(pfe), NULL);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
new file mode 100644
index 0000000..0d9880c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h

@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_PF_H_
+#define _ICE_VIRTCHNL_PF_H_
+#include "ice.h"
+
+#define ICE_MAX_VLANID			4095
+#define ICE_VLAN_PRIORITY_S		12
+#define ICE_VLAN_M			0xFFF
+#define ICE_PRIORITY_M			0x7000
+
+/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
+#define ICE_MAX_VLAN_PER_VF		8
+#define ICE_MAX_MACADDR_PER_VF		12
+
+/* Malicious Driver Detection */
+#define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED		10
+#define ICE_MDD_EVENTS_THRESHOLD		30
+
+/* Static VF transaction/status register def */
+#define VF_DEVICE_STATUS		0xAA
+#define VF_TRANS_PENDING_M		0x20
+
+/* wait defines for polling PF_PCI_CIAD register status */
+#define ICE_PCI_CIAD_WAIT_COUNT		100
+#define ICE_PCI_CIAD_WAIT_DELAY_US	1
+
+/* VF resources default values and limitation */
+#define ICE_MAX_VF_COUNT		256
+#define ICE_MAX_QS_PER_VF		256
+#define ICE_MIN_QS_PER_VF		1
+#define ICE_DFLT_QS_PER_VF		4
+#define ICE_NONQ_VECS_VF		1
+#define ICE_MAX_SCATTER_QS_PER_VF	16
+#define ICE_MAX_BASE_QS_PER_VF		16
+#define ICE_MAX_INTR_PER_VF		65
+#define ICE_MAX_POLICY_INTR_PER_VF	33
+#define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
+#define ICE_DFLT_INTR_PER_VF		(ICE_DFLT_QS_PER_VF + 1)
+
+/* Specific VF states */
+enum ice_vf_states {
+	ICE_VF_STATE_INIT = 0,		/* PF is initializing VF */
+	ICE_VF_STATE_ACTIVE,		/* VF resources are allocated for use */
+	ICE_VF_STATE_QS_ENA,		/* VF queue(s) enabled */
+	ICE_VF_STATE_DIS,
+	ICE_VF_STATE_MC_PROMISC,
+	ICE_VF_STATE_UC_PROMISC,
+	ICE_VF_STATES_NBITS
+};
+
+/* VF capabilities */
+enum ice_virtchnl_cap {
+	ICE_VIRTCHNL_VF_CAP_L2 = 0,
+	ICE_VIRTCHNL_VF_CAP_PRIVILEGE,
+};
+
+/* VF information structure */
+struct ice_vf {
+	struct ice_pf *pf;
+
+	s16 vf_id;			/* VF ID in the PF space */
+	u16 lan_vsi_idx;		/* index into PF struct */
+	/* first vector index of this VF in the PF space */
+	int first_vector_idx;
+	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
+	struct virtchnl_version_info vf_ver;
+	u32 driver_caps;		/* reported by VF driver */
+	struct virtchnl_ether_addr dflt_lan_addr;
+	DECLARE_BITMAP(txq_ena, ICE_MAX_BASE_QS_PER_VF);
+	DECLARE_BITMAP(rxq_ena, ICE_MAX_BASE_QS_PER_VF);
+	u16 port_vlan_id;
+	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
+	u8 trusted:1;
+	u8 spoofchk:1;
+	u8 link_forced:1;
+	u8 link_up:1;			/* only valid if VF link is forced */
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
+	u16 lan_vsi_num;		/* ID as used by firmware */
+	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
+	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
+
+	u64 num_mdd_events;		/* number of MDD events detected */
+	u64 num_inval_msgs;		/* number of continuous invalid msgs */
+	u64 num_valid_msgs;		/* number of valid msgs detected */
+	unsigned long vf_caps;		/* VF's adv. capabilities */
+	u8 num_req_qs;			/* num of queue pairs requested by VF */
+	u16 num_mac;
+	u16 num_vlan;
+	u16 num_vf_qs;			/* num of queue configured per VF */
+	u16 num_qs_ena;			/* total num of Tx/Rx queue enabled */
+};
+
+#ifdef CONFIG_PCI_IOV
+void ice_process_vflr_event(struct ice_pf *pf);
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
+
+void ice_free_vfs(struct ice_pf *pf);
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_vc_notify_link_state(struct ice_pf *pf);
+void ice_vc_notify_reset(struct ice_pf *pf);
+bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr);
+
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto);
+
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
+
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
+
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
+
+void ice_set_vf_state_qs_dis(struct ice_vf *vf);
+#else /* CONFIG_PCI_IOV */
+#define ice_process_vflr_event(pf) do {} while (0)
+#define ice_free_vfs(pf) do {} while (0)
+#define ice_vc_process_vf_msg(pf, event) do {} while (0)
+#define ice_vc_notify_link_state(pf) do {} while (0)
+#define ice_vc_notify_reset(pf) do {} while (0)
+#define ice_set_vf_state_qs_dis(vf) do {} while (0)
+
+static inline bool
+ice_reset_all_vfs(struct ice_pf __always_unused *pf,
+		  bool __always_unused is_vflr)
+{
+	return true;
+}
+
+static inline int
+ice_sriov_configure(struct pci_dev __always_unused *pdev,
+		    int __always_unused num_vfs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_mac(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id, u8 __always_unused *mac)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_get_vf_cfg(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id,
+	       struct ifla_vf_info __always_unused *ivi)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_trust(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id, bool __always_unused trusted)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_port_vlan(struct net_device __always_unused *netdev,
+		     int __always_unused vf_id, u16 __always_unused vid,
+		     u8 __always_unused qos, __be16 __always_unused v_proto)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_spoofchk(struct net_device __always_unused *netdev,
+		    int __always_unused vf_id, bool __always_unused ena)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_link_state(struct net_device __always_unused *netdev,
+		      int __always_unused vf_id, int __always_unused link_state)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+		    struct ice_q_vector __always_unused *q_vector)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VIRTCHNL_PF_H_ */

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index bafdcf7..8a6ef35 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c

@@ -466,7 +466,7 @@
 			? igb_setup_copper_link_82575
 			: igb_setup_serdes_link_82575;
 
-	if (mac->type == e1000_82580) {
+	if (mac->type == e1000_82580 || mac->type == e1000_i350) {
 		switch (hw->device_id) {
 		/* feature not supported on these id's */
 		case E1000_DEV_ID_DH89XXCC_SGMII:
@@ -638,7 +638,7 @@
 			dev_spec->sgmii_active = true;
 			break;
 		}
-		/* fall through for I2C based SGMII */
+		/* fall through - for I2C based SGMII */
 	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
 		/* read media type from SFP EEPROM */
 		ret_val = igb_set_sfp_media_type_82575(hw);

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 8a28f33..d2e2c50 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h

@@ -194,6 +194,8 @@
 /* enable link status from external LINK_0 and LINK_1 pins */
 #define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
 #define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000  /* D3 WUC */
+#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */
 #define E1000_CTRL_SDP0_DIR 0x00400000  /* SDP0 Data direction */
 #define E1000_CTRL_SDP1_DIR 0x00800000  /* SDP1 Data direction */
 #define E1000_CTRL_RST      0x04000000  /* Global reset */
@@ -334,6 +336,7 @@
 
 #define I210_RXPBSIZE_DEFAULT		0x000000A2 /* RXPBSIZE default */
 #define I210_RXPBSIZE_MASK		0x0000003F
+#define I210_RXPBSIZE_PB_30KB		0x0000001E
 #define I210_RXPBSIZE_PB_32KB		0x00000020
 #define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
 #define I210_TXPBSIZE_MASK		0xC0FFFFFF

diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 0ad737d..9cb4998 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h

@@ -409,6 +409,8 @@
 #define E1000_I210_TQAVCC(_n)	(0x3004 + ((_n) * 0x40))
 #define E1000_I210_TQAVHC(_n)	(0x300C + ((_n) * 0x40))
 
+#define E1000_I210_RR2DCDELAY	0x5BF4
+
 #define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
 #define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
 

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 5acf3b7..3182b05 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c

@@ -448,7 +448,7 @@
 
 static int igb_get_regs_len(struct net_device *netdev)
 {
-#define IGB_REGS_LEN 739
+#define IGB_REGS_LEN 740
 	return IGB_REGS_LEN * sizeof(u32);
 }
 
@@ -675,41 +675,44 @@
 		regs_buff[554] = adapter->stats.b2ogprc;
 	}
 
-	if (hw->mac.type != e1000_82576)
-		return;
-	for (i = 0; i < 12; i++)
-		regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
-	for (i = 0; i < 4; i++)
-		regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
+	if (hw->mac.type == e1000_82576) {
+		for (i = 0; i < 12; i++)
+			regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
+		for (i = 0; i < 4; i++)
+			regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
 
-	for (i = 0; i < 12; i++)
-		regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
-	for (i = 0; i < 12; i++)
-		regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+	}
+
+	if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+		regs_buff[739] = rd32(E1000_I210_RR2DCDELAY);
 }
 
 static int igb_get_eeprom_len(struct net_device *netdev)
@@ -2113,7 +2116,7 @@
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 
-	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE))
+	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_FILTER))
 		return -EOPNOTSUPP;
 
 	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
@@ -3158,8 +3161,8 @@
 	} else if (!edata->eee_enabled) {
 		dev_err(&adapter->pdev->dev,
 			"Setting EEE options are not supported with EEE disabled\n");
-			return -EINVAL;
-		}
+		return -EINVAL;
+	}
 
 	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
 	if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 0796cef..ed7e667 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c

@@ -39,7 +39,7 @@
 #include "igb.h"
 
 #define MAJ 5
-#define MIN 4
+#define MIN 6
 #define BUILD 0
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
@@ -239,7 +239,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
@@ -753,6 +753,8 @@
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
 		netdev_err(netdev, "PCIe link lost\n");
+		WARN(pci_device_is_present(igb->pdev),
+		     "igb: Failed to read reg 0x%x!\n", reg);
 	}
 
 	return value;
@@ -1189,15 +1191,15 @@
 {
 	struct igb_q_vector *q_vector;
 	struct igb_ring *ring;
-	int ring_count, size;
+	int ring_count;
+	size_t size;
 
 	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
 	if (txr_count > 1 || rxr_count > 1)
 		return -ENOMEM;
 
 	ring_count = txr_count + rxr_count;
-	size = sizeof(struct igb_q_vector) +
-	       (sizeof(struct igb_ring) * ring_count);
+	size = struct_size(q_vector, ring, ring_count);
 
 	/* allocate q_vector and rings */
 	q_vector = adapter->q_vector[v_idx];
@@ -1850,13 +1852,12 @@
 	 * configuration' in respect to these parameters.
 	 */
 
-	netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d \
-			    idleslope %d sendslope %d hiCredit %d \
-			    locredit %d\n",
-		   (ring->cbs_enable) ? "enabled" : "disabled",
-		   (ring->launchtime_enable) ? "enabled" : "disabled", queue,
-		   ring->idleslope, ring->sendslope, ring->hicredit,
-		   ring->locredit);
+	netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+		   ring->cbs_enable ? "enabled" : "disabled",
+		   ring->launchtime_enable ? "enabled" : "disabled",
+		   queue,
+		   ring->idleslope, ring->sendslope,
+		   ring->hicredit, ring->locredit);
 }
 
 static int igb_save_txtime_params(struct igb_adapter *adapter, int queue,
@@ -1935,7 +1936,7 @@
 
 		val = rd32(E1000_RXPBS);
 		val &= ~I210_RXPBSIZE_MASK;
-		val |= I210_RXPBSIZE_PB_32KB;
+		val |= I210_RXPBSIZE_PB_30KB;
 		wr32(E1000_RXPBS, val);
 
 		/* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
@@ -2064,7 +2065,8 @@
 	if ((hw->phy.media_type == e1000_media_type_copper) &&
 	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
 		swap_now = true;
-	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
+	} else if ((hw->phy.media_type != e1000_media_type_copper) &&
+		   !(connsw & E1000_CONNSW_SERDESD)) {
 		/* copper signal takes time to appear */
 		if (adapter->copper_tries < 4) {
 			adapter->copper_tries++;
@@ -2370,7 +2372,7 @@
 		adapter->ei.get_invariants(hw);
 		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
 	}
-	if ((mac->type == e1000_82575) &&
+	if ((mac->type == e1000_82575 || mac->type == e1000_i350) &&
 	    (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
 		igb_enable_mas(adapter);
 	}
@@ -2481,13 +2483,14 @@
 	else
 		igb_reset(adapter);
 
-	return 0;
+	return 1;
 }
 
 static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr, u16 vid,
-			   u16 flags)
+			   u16 flags,
+			   struct netlink_ext_ack *extack)
 {
 	/* guarantee we can provide a unique filter for the unicast address */
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
@@ -2577,13 +2580,15 @@
 #define VLAN_PRIO_FULL_MASK (0x07)
 
 static int igb_parse_cls_flower(struct igb_adapter *adapter,
-				struct tc_cls_flower_offload *f,
+				struct flow_cls_offload *f,
 				int traffic_class,
 				struct igb_nfc_filter *input)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	struct netlink_ext_ack *extack = f->common.extack;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -2593,78 +2598,60 @@
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						 f->mask);
-
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (!is_broadcast_ether_addr(mask->dst)) {
+		flow_rule_match_eth_addrs(rule, &match);
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (!is_broadcast_ether_addr(match.mask->dst)) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |=
 				IGB_FILTER_FLAG_DST_MAC_ADDR;
-			ether_addr_copy(input->filter.dst_addr, key->dst);
+			ether_addr_copy(input->filter.dst_addr, match.key->dst);
 		}
 
-		if (!is_zero_ether_addr(mask->src)) {
-			if (!is_broadcast_ether_addr(mask->src)) {
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (!is_broadcast_ether_addr(match.mask->src)) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |=
 				IGB_FILTER_FLAG_SRC_MAC_ADDR;
-			ether_addr_copy(input->filter.src_addr, key->src);
+			ether_addr_copy(input->filter.src_addr, match.key->src);
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_BASIC,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_BASIC,
-						 f->mask);
-
-		if (mask->n_proto) {
-			if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+		flow_rule_match_basic(rule, &match);
+		if (match.mask->n_proto) {
+			if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
-			input->filter.etype = key->n_proto;
+			input->filter.etype = match.key->n_proto;
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_VLAN,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_VLAN,
-						 f->mask);
-
-		if (mask->vlan_priority) {
-			if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_priority) {
+			if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
-			input->filter.vlan_tci = key->vlan_priority;
+			input->filter.vlan_tci = match.key->vlan_priority;
 		}
 	}
 
@@ -2675,7 +2662,7 @@
 }
 
 static int igb_configure_clsflower(struct igb_adapter *adapter,
-				   struct tc_cls_flower_offload *cls_flower)
+				   struct flow_cls_offload *cls_flower)
 {
 	struct netlink_ext_ack *extack = cls_flower->common.extack;
 	struct igb_nfc_filter *filter, *f;
@@ -2737,7 +2724,7 @@
 }
 
 static int igb_delete_clsflower(struct igb_adapter *adapter,
-				struct tc_cls_flower_offload *cls_flower)
+				struct flow_cls_offload *cls_flower)
 {
 	struct igb_nfc_filter *filter;
 	int err;
@@ -2767,14 +2754,14 @@
 }
 
 static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
-				   struct tc_cls_flower_offload *cls_flower)
+				   struct flow_cls_offload *cls_flower)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return igb_configure_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return igb_delete_clsflower(adapter, cls_flower);
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return -EOPNOTSUPP;
 	default:
 		return -EOPNOTSUPP;
@@ -2798,25 +2785,6 @@
 	}
 }
 
-static int igb_setup_tc_block(struct igb_adapter *adapter,
-			      struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int igb_offload_txtime(struct igb_adapter *adapter,
 			      struct tc_etf_qopt_offload *qopt)
 {
@@ -2840,6 +2808,8 @@
 	return 0;
 }
 
+static LIST_HEAD(igb_block_cb_list);
+
 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			void *type_data)
 {
@@ -2849,7 +2819,11 @@
 	case TC_SETUP_QDISC_CBS:
 		return igb_offload_cbs(adapter, type_data);
 	case TC_SETUP_BLOCK:
-		return igb_setup_tc_block(adapter, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &igb_block_cb_list,
+						  igb_setup_tc_block_cb,
+						  adapter, adapter, true);
+
 	case TC_SETUP_QDISC_ETF:
 		return igb_offload_txtime(adapter, type_data);
 
@@ -3468,6 +3442,9 @@
 			break;
 		}
 	}
+
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+
 	pm_runtime_put_noidle(&pdev->dev);
 	return 0;
 
@@ -4756,8 +4733,7 @@
 {
 	u16 i = rx_ring->next_to_clean;
 
-	if (rx_ring->skb)
-		dev_kfree_skb(rx_ring->skb);
+	dev_kfree_skb(rx_ring->skb);
 	rx_ring->skb = NULL;
 
 	/* Free all the Rx ring sk_buffs */
@@ -5699,7 +5675,8 @@
 	 * should have been handled by the upper layers.
 	 */
 	if (tx_ring->launchtime_enable) {
-		ts = ns_to_timespec64(first->skb->tstamp);
+		ts = ktime_to_timespec64(first->skb->tstamp);
+		first->skb->tstamp = ktime_set(0, 0);
 		context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
 	} else {
 		context_desc->seqnum_seed = 0;
@@ -5942,7 +5919,7 @@
 	struct sk_buff *skb = first->skb;
 	struct igb_tx_buffer *tx_buffer;
 	union e1000_adv_tx_desc *tx_desc;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	dma_addr_t dma;
 	unsigned int data_len, size;
 	u32 tx_flags = first->tx_flags;
@@ -6019,6 +5996,8 @@
 	/* set the timestamp */
 	first->time_stamp = jiffies;
 
+	skb_tx_timestamp(skb);
+
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
 	 * memory model archs, such as IA-64).
@@ -6040,13 +6019,8 @@
 	/* Make sure there is space in the ring for the next send. */
 	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 	return 0;
 
@@ -6101,7 +6075,8 @@
 	 * otherwise try next time
 	 */
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
-		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
 
 	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
 		/* this is a hard error */
@@ -6147,8 +6122,6 @@
 	else if (!tso)
 		igb_tx_csum(tx_ring, first);
 
-	skb_tx_timestamp(skb);
-
 	if (igb_tx_map(tx_ring, first, hdr_len))
 		goto cleanup_tx_tstamp;
 
@@ -6713,7 +6686,7 @@
 			igb_setup_dca(adapter);
 			break;
 		}
-		/* Fall Through since DCA is disabled. */
+		/* Fall Through - since DCA is disabled. */
 	case DCA_PROVIDER_REMOVE:
 		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
 			/* without this a class_device is left
@@ -7753,11 +7726,13 @@
 	if (!clean_complete)
 		return budget;
 
-	/* If not enough Rx work done, exit the polling mode */
-	napi_complete_done(napi, work_done);
-	igb_ring_irq_enable(q_vector);
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		igb_ring_irq_enable(q_vector);
 
-	return 0;
+	return min(work_done, budget - 1);
 }
 
 /**
@@ -8062,7 +8037,7 @@
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > IGB_RX_HDR_LEN)
-		headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
+		headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
@@ -8754,9 +8729,7 @@
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl, rctl, status;
 	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
-#ifdef CONFIG_PM
-	int retval = 0;
-#endif
+	bool wake;
 
 	rtnl_lock();
 	netif_device_detach(netdev);
@@ -8769,12 +8742,6 @@
 	igb_clear_interrupt_scheme(adapter);
 	rtnl_unlock();
 
-#ifdef CONFIG_PM
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-#endif
-
 	status = rd32(E1000_STATUS);
 	if (status & E1000_STATUS_LU)
 		wufc &= ~E1000_WUFC_LNKC;
@@ -8791,10 +8758,6 @@
 		}
 
 		ctrl = rd32(E1000_CTRL);
-		/* advertise wake from D3Cold */
-		#define E1000_CTRL_ADVD3WUC 0x00100000
-		/* phy power management enable */
-		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
 		ctrl |= E1000_CTRL_ADVD3WUC;
 		wr32(E1000_CTRL, ctrl);
 
@@ -8808,12 +8771,15 @@
 		wr32(E1000_WUFC, 0);
 	}
 
-	*enable_wake = wufc || adapter->en_mng_pt;
-	if (!*enable_wake)
+	wake = wufc || adapter->en_mng_pt;
+	if (!wake)
 		igb_power_down_link(adapter);
 	else
 		igb_power_up_link(adapter);
 
+	if (enable_wake)
+		*enable_wake = wake;
+
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */
@@ -8856,22 +8822,7 @@
 
 static int __maybe_unused igb_suspend(struct device *dev)
 {
-	int retval;
-	bool wake;
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	retval = __igb_shutdown(pdev, &wake, 0);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
+	return __igb_shutdown(to_pci_dev(dev), NULL, 0);
 }
 
 static int __maybe_unused igb_resume(struct device *dev)
@@ -8930,8 +8881,7 @@
 
 static int __maybe_unused igb_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct igb_adapter *adapter = netdev_priv(netdev);
 
 	if (!igb_has_link(adapter))
@@ -8942,22 +8892,7 @@
 
 static int __maybe_unused igb_runtime_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int retval;
-	bool wake;
-
-	retval = __igb_shutdown(pdev, &wake, 1);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
+	return __igb_shutdown(to_pci_dev(dev), NULL, 1);
 }
 
 static int __maybe_unused igb_runtime_resume(struct device *dev)
@@ -9086,7 +9021,6 @@
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	pci_ers_result_t result;
-	int err;
 
 	if (pci_enable_device_mem(pdev)) {
 		dev_err(&pdev->dev,
@@ -9110,14 +9044,6 @@
 		result = PCI_ERS_RESULT_RECOVERED;
 	}
 
-	err = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (err) {
-		dev_err(&pdev->dev,
-			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
-			err);
-		/* non-fatal, continue */
-	}
-
 	return result;
 }
 

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 9f4d700..c39e921 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c

@@ -51,9 +51,17 @@
  *
  * The 40 bit 82580 SYSTIM overflows every
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter needs
+ * to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, and also the NIC
+ * clock can be adjusted to run up to 6% faster and the system clock
+ * up to 10% slower, so we aim for 6 minutes to be sure the actual
+ * interval in the NIC time is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 6)
 #define IGB_PTP_TX_TIMEOUT		(HZ * 15)
 #define INCPERIOD_82576			BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASK		GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
@@ -269,17 +277,25 @@
 	return 0;
 }
 
-static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
-				 struct timespec64 *ts)
+static int igb_ptp_gettimex_82576(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
 {
 	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
 					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
 	unsigned long flags;
+	u32 lo, hi;
 	u64 ns;
 
 	spin_lock_irqsave(&igb->tmreg_lock, flags);
 
-	ns = timecounter_read(&igb->tc);
+	ptp_read_system_prets(sts);
+	lo = rd32(E1000_SYSTIML);
+	ptp_read_system_postts(sts);
+	hi = rd32(E1000_SYSTIMH);
+
+	ns = timecounter_cyc2time(&igb->tc, ((u64)hi << 32) | lo);
 
 	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
 
@@ -288,16 +304,50 @@
 	return 0;
 }
 
-static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
-				struct timespec64 *ts)
+static int igb_ptp_gettimex_82580(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
 {
 	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
 					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	unsigned long flags;
+	u32 lo, hi;
+	u64 ns;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	rd32(E1000_SYSTIMR);
+	ptp_read_system_postts(sts);
+	lo = rd32(E1000_SYSTIML);
+	hi = rd32(E1000_SYSTIMH);
+
+	ns = timecounter_cyc2time(&igb->tc, ((u64)hi << 32) | lo);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int igb_ptp_gettimex_i210(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts,
+				 struct ptp_system_timestamp *sts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
 	unsigned long flags;
 
 	spin_lock_irqsave(&igb->tmreg_lock, flags);
 
-	igb_ptp_read_i210(igb, ts);
+	ptp_read_system_prets(sts);
+	rd32(E1000_SYSTIMR);
+	ptp_read_system_postts(sts);
+	ts->tv_nsec = rd32(E1000_SYSTIML);
+	ts->tv_sec = rd32(E1000_SYSTIMH);
 
 	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
 
@@ -471,6 +521,19 @@
 
 	switch (rq->type) {
 	case PTP_CLK_REQ_EXTTS:
+		/* Reject requests with unsupported flags */
+		if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+					PTP_RISING_EDGE |
+					PTP_FALLING_EDGE |
+					PTP_STRICT_FLAGS))
+			return -EOPNOTSUPP;
+
+		/* Reject requests failing to enable both edges. */
+		if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+		    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+		    (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+			return -EOPNOTSUPP;
+
 		if (on) {
 			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
 					   rq->extts.index);
@@ -501,6 +564,10 @@
 		return 0;
 
 	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
 		if (on) {
 			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
 					   rq->perout.index);
@@ -650,9 +717,12 @@
 	struct igb_adapter *igb =
 		container_of(work, struct igb_adapter, ptp_overflow_work.work);
 	struct timespec64 ts;
+	u64 ns;
 
-	igb->ptp_caps.gettime64(&igb->ptp_caps, &ts);
+	/* Update the timecounter */
+	ns = timecounter_read(&igb->tc);
 
+	ts = ns_to_timespec64(ns);
 	pr_debug("igb overflow check at %lld.%09lu\n",
 		 (long long) ts.tv_sec, ts.tv_nsec);
 
@@ -1118,7 +1188,7 @@
 		adapter->ptp_caps.pps = 0;
 		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-		adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82576;
 		adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
 		adapter->ptp_caps.enable = igb_ptp_feature_enable;
 		adapter->cc.read = igb_ptp_read_82576;
@@ -1137,7 +1207,7 @@
 		adapter->ptp_caps.pps = 0;
 		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-		adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82580;
 		adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
 		adapter->ptp_caps.enable = igb_ptp_feature_enable;
 		adapter->cc.read = igb_ptp_read_82580;
@@ -1165,7 +1235,7 @@
 		adapter->ptp_caps.pin_config = adapter->sdp_config;
 		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
-		adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_i210;
 		adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
 		adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
 		adapter->ptp_caps.verify = igb_ptp_verify_pin;

diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index 163e583..a3cd7ac 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c

@@ -241,7 +241,7 @@
 	s32 err;
 	u16 i;
 
-	WARN_ON_ONCE(!spin_is_locked(&hw->mbx_lock));
+	lockdep_assert_held(&hw->mbx_lock);
 
 	/* lock the mailbox to prevent pf/vf race condition */
 	err = e1000_obtain_mbx_lock_vf(hw);
@@ -279,7 +279,7 @@
 	s32 err;
 	u16 i;
 
-	WARN_ON_ONCE(!spin_is_locked(&hw->mbx_lock));
+	lockdep_assert_held(&hw->mbx_lock);
 
 	/* lock the mailbox to prevent pf/vf race condition */
 	err = e1000_obtain_mbx_lock_vf(hw);

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index e0c989f..0f2b68f 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c

@@ -1186,10 +1186,13 @@
 
 	igbvf_clean_rx_irq(adapter, &work_done, budget);
 
-	/* If not enough Rx work done, exit the polling mode */
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
+	if (work_done == budget)
+		return budget;
 
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
 		if (adapter->requested_itr & 3)
 			igbvf_set_itr(adapter);
 
@@ -2171,7 +2174,7 @@
 		goto dma_error;
 
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag;
 
 		count++;
 		i++;
@@ -2276,10 +2279,6 @@
 	tx_ring->buffer_info[first].next_to_watch = tx_desc;
 	tx_ring->next_to_use = i;
 	writel(i, adapter->hw.hw_addr + tx_ring->tail);
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it synchronizes IO on IA64/Altix systems
-	 */
-	mmiowb();
 }
 
 static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
@@ -3011,7 +3010,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Gigabit Virtual Function Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 /* netdev.c */

diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
new file mode 100644
index 0000000..88c6f88
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/Makefile

@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c)  2018 Intel Corporation
+
+#
+# Intel(R) I225-LM/I225-V 2.5G Ethernet Controller
+#
+
+obj-$(CONFIG_IGC) += igc.o
+
+igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
+igc_ethtool.o

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
new file mode 100644
index 0000000..7e16345
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc.h

@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_H_
+#define _IGC_H_
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/ethtool.h>
+#include <linux/sctp.h>
+
+#include "igc_hw.h"
+
+/* forward declaration */
+void igc_set_ethtool_ops(struct net_device *);
+
+struct igc_adapter;
+struct igc_ring;
+
+void igc_up(struct igc_adapter *adapter);
+void igc_down(struct igc_adapter *adapter);
+int igc_setup_tx_resources(struct igc_ring *ring);
+int igc_setup_rx_resources(struct igc_ring *ring);
+void igc_free_tx_resources(struct igc_ring *ring);
+void igc_free_rx_resources(struct igc_ring *ring);
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter);
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues);
+int igc_reinit_queues(struct igc_adapter *adapter);
+void igc_write_rss_indir_tbl(struct igc_adapter *adapter);
+bool igc_has_link(struct igc_adapter *adapter);
+void igc_reset(struct igc_adapter *adapter);
+int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
+int igc_add_mac_steering_filter(struct igc_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags);
+int igc_del_mac_steering_filter(struct igc_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags);
+void igc_update_stats(struct igc_adapter *adapter);
+
+extern char igc_driver_name[];
+extern char igc_driver_version[];
+
+#define IGC_REGS_LEN			740
+#define IGC_RETA_SIZE			128
+
+/* Interrupt defines */
+#define IGC_START_ITR			648 /* ~6000 ints/sec */
+#define IGC_FLAG_HAS_MSI		BIT(0)
+#define IGC_FLAG_QUEUE_PAIRS		BIT(3)
+#define IGC_FLAG_DMAC			BIT(4)
+#define IGC_FLAG_NEED_LINK_UPDATE	BIT(9)
+#define IGC_FLAG_MEDIA_RESET		BIT(10)
+#define IGC_FLAG_MAS_ENABLE		BIT(12)
+#define IGC_FLAG_HAS_MSIX		BIT(13)
+#define IGC_FLAG_VLAN_PROMISC		BIT(15)
+#define IGC_FLAG_RX_LEGACY		BIT(16)
+
+#define IGC_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
+#define IGC_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
+
+#define IGC_MRQC_ENABLE_RSS_MQ		0x00000002
+#define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
+#define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
+
+#define IGC_START_ITR			648 /* ~6000 ints/sec */
+#define IGC_4K_ITR			980
+#define IGC_20K_ITR			196
+#define IGC_70K_ITR			56
+
+#define IGC_DEFAULT_ITR		3 /* dynamic */
+#define IGC_MAX_ITR_USECS	10000
+#define IGC_MIN_ITR_USECS	10
+#define NON_Q_VECTORS		1
+#define MAX_MSIX_ENTRIES	10
+
+/* TX/RX descriptor defines */
+#define IGC_DEFAULT_TXD		256
+#define IGC_DEFAULT_TX_WORK	128
+#define IGC_MIN_TXD		80
+#define IGC_MAX_TXD		4096
+
+#define IGC_DEFAULT_RXD		256
+#define IGC_MIN_RXD		80
+#define IGC_MAX_RXD		4096
+
+/* Transmit and receive queues */
+#define IGC_MAX_RX_QUEUES		4
+#define IGC_MAX_TX_QUEUES		4
+
+#define MAX_Q_VECTORS			8
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+/* Supported Rx Buffer Sizes */
+#define IGC_RXBUFFER_256		256
+#define IGC_RXBUFFER_2048		2048
+#define IGC_RXBUFFER_3072		3072
+
+#define AUTO_ALL_MODES		0
+#define IGC_RX_HDR_LEN			IGC_RXBUFFER_256
+
+/* RX and TX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *           descriptors available in its onboard memory.
+ *           Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *           available in host memory.
+ *           If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *           descriptors until either it has this many to write back, or the
+ *           ITR timer expires.
+ */
+#define IGC_RX_PTHRESH			8
+#define IGC_RX_HTHRESH			8
+#define IGC_TX_PTHRESH			8
+#define IGC_TX_HTHRESH			1
+#define IGC_RX_WTHRESH			4
+#define IGC_TX_WTHRESH			16
+
+#define IGC_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define IGC_TS_HDR_LEN			16
+
+#define IGC_SKB_PAD			(NET_SKB_PAD + NET_IP_ALIGN)
+
+#if (PAGE_SIZE < 8192)
+#define IGC_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IGC_RXBUFFER_2048) - IGC_SKB_PAD - IGC_TS_HDR_LEN)
+#else
+#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN)
+#endif
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGC_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+/* VLAN info */
+#define IGC_TX_FLAGS_VLAN_MASK	0xffff0000
+
+/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+enum igc_state_t {
+	__IGC_TESTING,
+	__IGC_RESETTING,
+	__IGC_DOWN,
+	__IGC_PTP_TX_IN_PROGRESS,
+};
+
+enum igc_tx_flags {
+	/* cmd_type flags */
+	IGC_TX_FLAGS_VLAN	= 0x01,
+	IGC_TX_FLAGS_TSO	= 0x02,
+	IGC_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IGC_TX_FLAGS_IPV4	= 0x10,
+	IGC_TX_FLAGS_CSUM	= 0x20,
+};
+
+enum igc_boards {
+	board_base,
+};
+
+/* The largest size we can write to the descriptor is 65535.  In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGC_MAX_TXD_PWR		15
+#define IGC_MAX_DATA_PER_TXD	BIT(IGC_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
+#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igc_tx_buffer {
+	union igc_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	u16 gso_segs;
+	__be16 protocol;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct igc_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct igc_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+	u64 restart_queue2;
+};
+
+struct igc_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+};
+
+struct igc_rx_packet_stats {
+	u64 ipv4_packets;      /* IPv4 headers processed */
+	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
+	u64 ipv6_packets;      /* IPv6 headers processed */
+	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
+	u64 tcp_packets;       /* TCP headers processed */
+	u64 udp_packets;       /* UDP headers processed */
+	u64 sctp_packets;      /* SCTP headers processed */
+	u64 nfs_packets;       /* NFS headers processe */
+	u64 other_packets;
+};
+
+struct igc_ring_container {
+	struct igc_ring *ring;          /* pointer to linked list of rings */
+	unsigned int total_bytes;       /* total bytes processed this int */
+	unsigned int total_packets;     /* total packets processed this int */
+	u16 work_limit;                 /* total work allowed per interrupt */
+	u8 count;                       /* total number of rings in vector */
+	u8 itr;                         /* current ITR setting for ring */
+};
+
+struct igc_ring {
+	struct igc_q_vector *q_vector;  /* backlink to q_vector */
+	struct net_device *netdev;      /* back pointer to net_device */
+	struct device *dev;             /* device for dma mapping */
+	union {                         /* array of buffer info structs */
+		struct igc_tx_buffer *tx_buffer_info;
+		struct igc_rx_buffer *rx_buffer_info;
+	};
+	void *desc;                     /* descriptor ring memory */
+	unsigned long flags;            /* ring specific flags */
+	void __iomem *tail;             /* pointer to ring tail register */
+	dma_addr_t dma;                 /* phys address of the ring */
+	unsigned int size;              /* length of desc. ring in bytes */
+
+	u16 count;                      /* number of desc. in the ring */
+	u8 queue_index;                 /* logical index of the ring*/
+	u8 reg_idx;                     /* physical index of the ring */
+	bool launchtime_enable;		/* true if LaunchTime is enabled */
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igc_tx_queue_stats tx_stats;
+			struct u64_stats_sync tx_syncp;
+			struct u64_stats_sync tx_syncp2;
+		};
+		/* RX */
+		struct {
+			struct igc_rx_queue_stats rx_stats;
+			struct igc_rx_packet_stats pkt_stats;
+			struct u64_stats_sync rx_syncp;
+			struct sk_buff *skb;
+		};
+	};
+} ____cacheline_internodealigned_in_smp;
+
+struct igc_q_vector {
+	struct igc_adapter *adapter;    /* backlink */
+	void __iomem *itr_register;
+	u32 eims_value;                 /* EIMS mask value */
+
+	u16 itr_val;
+	u8 set_itr;
+
+	struct igc_ring_container rx, tx;
+
+	struct napi_struct napi;
+
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+	struct net_device poll_dev;
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct igc_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+#define MAX_ETYPE_FILTER		(4 - 1)
+
+enum igc_filter_match_flags {
+	IGC_FILTER_FLAG_ETHER_TYPE =	0x1,
+	IGC_FILTER_FLAG_VLAN_TCI   =	0x2,
+	IGC_FILTER_FLAG_SRC_MAC_ADDR =	0x4,
+	IGC_FILTER_FLAG_DST_MAC_ADDR =	0x8,
+};
+
+/* RX network flow classification data structure */
+struct igc_nfc_input {
+	/* Byte layout in order, all values with MSB first:
+	 * match_flags - 1 byte
+	 * etype - 2 bytes
+	 * vlan_tci - 2 bytes
+	 */
+	u8 match_flags;
+	__be16 etype;
+	__be16 vlan_tci;
+	u8 src_addr[ETH_ALEN];
+	u8 dst_addr[ETH_ALEN];
+};
+
+struct igc_nfc_filter {
+	struct hlist_node nfc_node;
+	struct igc_nfc_input filter;
+	unsigned long cookie;
+	u16 etype_reg_index;
+	u16 sw_idx;
+	u16 action;
+};
+
+struct igc_mac_addr {
+	u8 addr[ETH_ALEN];
+	u8 queue;
+	u8 state; /* bitmask */
+};
+
+#define IGC_MAC_STATE_DEFAULT		0x1
+#define IGC_MAC_STATE_IN_USE		0x2
+#define IGC_MAC_STATE_SRC_ADDR		0x4
+#define IGC_MAC_STATE_QUEUE_STEERING	0x8
+
+#define IGC_MAX_RXNFC_FILTERS		16
+
+/* Board specific private data structure */
+struct igc_adapter {
+	struct net_device *netdev;
+
+	unsigned long state;
+	unsigned int flags;
+	unsigned int num_q_vectors;
+
+	struct msix_entry *msix_entries;
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
+
+	/* RX */
+	int num_rx_queues;
+	struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
+
+	struct timer_list watchdog_timer;
+	struct timer_list dma_err_timer;
+	struct timer_list phy_info_timer;
+
+	u16 link_speed;
+	u16 link_duplex;
+
+	u8 port_num;
+
+	u8 __iomem *io_addr;
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	struct work_struct dma_err_task;
+	bool fc_autoneg;
+
+	u8 tx_timeout_factor;
+
+	int msg_enable;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+	/* lock for statistics */
+	spinlock_t stats64_lock;
+	struct rtnl_link_stats64 stats64;
+
+	/* structs defined in igc_hw.h */
+	struct igc_hw hw;
+	struct igc_hw_stats stats;
+
+	struct igc_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+	u32 *shadow_vfta;
+
+	u32 rss_queues;
+	u32 rss_indir_tbl_init;
+
+	/* RX network flow classification support */
+	struct hlist_head nfc_filter_list;
+	struct hlist_head cls_flower_list;
+	unsigned int nfc_filter_count;
+
+	/* lock for RX network flow classification filter */
+	spinlock_t nfc_lock;
+	bool etype_bitmap[MAX_ETYPE_FILTER];
+
+	struct igc_mac_addr *mac_table;
+
+	u8 rss_indir_tbl[IGC_RETA_SIZE];
+
+	unsigned long link_check_timeout;
+	struct igc_info ei;
+};
+
+/* igc_desc_unused - calculate if we have unused descriptors */
+static inline u16 igc_desc_unused(const struct igc_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+static inline s32 igc_get_phy_info(struct igc_hw *hw)
+{
+	if (hw->phy.ops.get_phy_info)
+		return hw->phy.ops.get_phy_info(hw);
+
+	return 0;
+}
+
+static inline s32 igc_reset_phy(struct igc_hw *hw)
+{
+	if (hw->phy.ops.reset)
+		return hw->phy.ops.reset(hw);
+
+	return 0;
+}
+
+static inline struct netdev_queue *txring_txq(const struct igc_ring *tx_ring)
+{
+	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
+enum igc_ring_flags_t {
+	IGC_RING_FLAG_RX_3K_BUFFER,
+	IGC_RING_FLAG_RX_BUILD_SKB_ENABLED,
+	IGC_RING_FLAG_RX_SCTP_CSUM,
+	IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
+	IGC_RING_FLAG_TX_CTX_IDX,
+	IGC_RING_FLAG_TX_DETECT_HANG
+};
+
+#define ring_uses_large_buffer(ring) \
+	test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igc_rx_bufsz(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IGC_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IGC_MAX_FRAME_BUILD_SKB + IGC_TS_HDR_LEN;
+#endif
+	return IGC_RXBUFFER_2048;
+}
+
+static inline unsigned int igc_rx_pg_order(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	if (hw->phy.ops.read_reg)
+		return hw->phy.ops.read_reg(hw, offset, data);
+
+	return 0;
+}
+
+/* forward declaration */
+void igc_reinit_locked(struct igc_adapter *);
+int igc_add_filter(struct igc_adapter *adapter,
+		   struct igc_nfc_filter *input);
+int igc_erase_filter(struct igc_adapter *adapter,
+		     struct igc_nfc_filter *input);
+
+#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
+
+#define IGC_TXD_DCMD	(IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
+
+#define IGC_RX_DESC(R, i)       \
+	(&(((union igc_adv_rx_desc *)((R)->desc))[i]))
+#define IGC_TX_DESC(R, i)       \
+	(&(((union igc_adv_tx_desc *)((R)->desc))[i]))
+#define IGC_TX_CTXTDESC(R, i)   \
+	(&(((struct igc_adv_tx_context_desc *)((R)->desc))[i]))
+
+#endif /* _IGC_H_ */

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
new file mode 100644
index 0000000..db289bc
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.c

@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+#include "igc_i225.h"
+#include "igc_mac.h"
+#include "igc_base.h"
+#include "igc.h"
+
+/**
+ * igc_reset_hw_base - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_reset_hw_base(struct igc_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = igc_disable_pcie_master(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Master disable polling has failed.\n");
+
+	hw_dbg("Masking off all interrupts\n");
+	wr32(IGC_IMC, 0xffffffff);
+
+	wr32(IGC_RCTL, 0);
+	wr32(IGC_TCTL, IGC_TCTL_PSP);
+	wrfl();
+
+	usleep_range(10000, 20000);
+
+	ctrl = rd32(IGC_CTRL);
+
+	hw_dbg("Issuing a global reset to MAC\n");
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_DEV_RST);
+
+	ret_val = igc_get_auto_rd_done(hw);
+	if (ret_val) {
+		/* When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		hw_dbg("Auto Read Done did not complete\n");
+	}
+
+	/* Clear any pending interrupt events. */
+	wr32(IGC_IMC, 0xffffffff);
+	rd32(IGC_ICR);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_base - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_nvm_params_base(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(IGC_EECD);
+	u16 size;
+
+	size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >>
+		     IGC_EECD_SIZE_EX_SHIFT);
+
+	/* Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* Just in case size is out of range, cap it to the largest
+	 * EEPROM size supported
+	 */
+	if (size > 15)
+		size = 15;
+
+	nvm->type = igc_nvm_eeprom_spi;
+	nvm->word_size = BIT(size);
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+
+	nvm->page_size = eecd & IGC_EECD_ADDR_BITS ? 32 : 8;
+	nvm->address_bits = eecd & IGC_EECD_ADDR_BITS ?
+			    16 : 8;
+
+	if (nvm->word_size == BIT(15))
+		nvm->page_size = 128;
+
+	return 0;
+}
+
+/**
+ * igc_setup_copper_link_base - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Configures the link for auto-neg or forced speed and duplex.  Then we check
+ * for link, once link is established calls to configure collision distance
+ * and flow control are called.
+ */
+static s32 igc_setup_copper_link_base(struct igc_hw *hw)
+{
+	s32  ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_SLU;
+	ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX);
+	wr32(IGC_CTRL, ctrl);
+
+	ret_val = igc_setup_copper_link(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_mac_params_base - Init MAC func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_mac_params_base(struct igc_hw *hw)
+{
+	struct igc_dev_spec_base *dev_spec = &hw->dev_spec._base;
+	struct igc_mac_info *mac = &hw->mac;
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	mac->rar_entry_count = IGC_RAR_ENTRIES;
+
+	/* reset */
+	mac->ops.reset_hw = igc_reset_hw_base;
+
+	mac->ops.acquire_swfw_sync = igc_acquire_swfw_sync_i225;
+	mac->ops.release_swfw_sync = igc_release_swfw_sync_i225;
+
+	/* Allow a single clear of the SW semaphore on I225 */
+	if (mac->type == igc_i225)
+		dev_spec->clear_semaphore_once = true;
+
+	/* physical interface link setup */
+	mac->ops.setup_physical_interface = igc_setup_copper_link_base;
+
+	return 0;
+}
+
+/**
+ * igc_init_phy_params_base - Init PHY func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_phy_params_base(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+
+	if (hw->phy.media_type != igc_media_type_copper) {
+		phy->type = igc_phy_none;
+		goto out;
+	}
+
+	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT_2500;
+	phy->reset_delay_us	= 100;
+
+	/* set lan id */
+	hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >>
+			IGC_STATUS_FUNC_SHIFT;
+
+	/* Make sure the PHY is in a good state. Several people have reported
+	 * firmware leaving the PHY's page select register set to something
+	 * other than the default of zero, which causes the PHY ID read to
+	 * access something other than the intended register.
+	 */
+	ret_val = hw->phy.ops.reset(hw);
+	if (ret_val) {
+		hw_dbg("Error resetting the PHY.\n");
+		goto out;
+	}
+
+	ret_val = igc_get_phy_id(hw);
+	if (ret_val)
+		return ret_val;
+
+	igc_check_for_copper_link(hw);
+
+	/* Verify phy id and set remaining function pointers */
+	switch (phy->id) {
+	case I225_I_PHY_ID:
+		phy->type	= igc_phy_i225;
+		break;
+	default:
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+static s32 igc_get_invariants_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+
+	switch (hw->device_id) {
+	case IGC_DEV_ID_I225_LM:
+	case IGC_DEV_ID_I225_V:
+	case IGC_DEV_ID_I225_I:
+	case IGC_DEV_ID_I220_V:
+	case IGC_DEV_ID_I225_K:
+		mac->type = igc_i225;
+		break;
+	default:
+		return -IGC_ERR_MAC_INIT;
+	}
+
+	hw->phy.media_type = igc_media_type_copper;
+
+	/* mac initialization and operations */
+	ret_val = igc_init_mac_params_base(hw);
+	if (ret_val)
+		goto out;
+
+	/* NVM initialization */
+	ret_val = igc_init_nvm_params_base(hw);
+	switch (hw->mac.type) {
+	case igc_i225:
+		ret_val = igc_init_nvm_params_i225(hw);
+		break;
+	default:
+		break;
+	}
+
+	/* setup PHY parameters */
+	ret_val = igc_init_phy_params_base(hw);
+	if (ret_val)
+		goto out;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_acquire_phy_base - Acquire rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * Acquire access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_acquire_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_release_phy_base - Release rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * A wrapper to release access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static void igc_release_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_init_hw_base - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation.
+ */
+static s32 igc_init_hw_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	u16 i, rar_count = mac->rar_entry_count;
+	s32 ret_val = 0;
+
+	/* Setup the receive address */
+	igc_init_rx_addrs(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	hw_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		array_wr32(IGC_MTA, i, 0);
+
+	/* Zero out the Unicast HASH table */
+	hw_dbg("Zeroing the UTA\n");
+	for (i = 0; i < mac->uta_reg_count; i++)
+		array_wr32(IGC_UTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = igc_setup_link(hw);
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	igc_clear_hw_cntrs_base(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_power_down_phy_copper_base - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ */
+void igc_power_down_phy_copper_base(struct igc_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(igc_enable_mng_pass_thru(hw) || igc_check_reset_block(hw)))
+		igc_power_down_phy_copper(hw);
+}
+
+/**
+ * igc_rx_fifo_flush_base - Clean rx fifo after Rx enable
+ * @hw: pointer to the HW structure
+ *
+ * After Rx enable, if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo.  This
+ * function clears the fifos and flushes any packets that came in as rx was
+ * being enabled.
+ */
+void igc_rx_fifo_flush_base(struct igc_hw *hw)
+{
+	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+	int i, ms_wait;
+
+	/* disable IPv6 options as per hardware errata */
+	rfctl = rd32(IGC_RFCTL);
+	rfctl |= IGC_RFCTL_IPV6_EX_DIS;
+	wr32(IGC_RFCTL, rfctl);
+
+	if (!(rd32(IGC_MANC) & IGC_MANC_RCV_TCO_EN))
+		return;
+
+	/* Disable all Rx queues */
+	for (i = 0; i < 4; i++) {
+		rxdctl[i] = rd32(IGC_RXDCTL(i));
+		wr32(IGC_RXDCTL(i),
+		     rxdctl[i] & ~IGC_RXDCTL_QUEUE_ENABLE);
+	}
+	/* Poll all queues to verify they have shut down */
+	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+		usleep_range(1000, 2000);
+		rx_enabled = 0;
+		for (i = 0; i < 4; i++)
+			rx_enabled |= rd32(IGC_RXDCTL(i));
+		if (!(rx_enabled & IGC_RXDCTL_QUEUE_ENABLE))
+			break;
+	}
+
+	if (ms_wait == 10)
+		pr_debug("Queue disable timed out after 10ms\n");
+
+	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+	 * incoming packets are rejected.  Set enable and wait 2ms so that
+	 * any packet that was coming in as RCTL.EN was set is flushed
+	 */
+	wr32(IGC_RFCTL, rfctl & ~IGC_RFCTL_LEF);
+
+	rlpml = rd32(IGC_RLPML);
+	wr32(IGC_RLPML, 0);
+
+	rctl = rd32(IGC_RCTL);
+	temp_rctl = rctl & ~(IGC_RCTL_EN | IGC_RCTL_SBP);
+	temp_rctl |= IGC_RCTL_LPE;
+
+	wr32(IGC_RCTL, temp_rctl);
+	wr32(IGC_RCTL, temp_rctl | IGC_RCTL_EN);
+	wrfl();
+	usleep_range(2000, 3000);
+
+	/* Enable Rx queues that were previously enabled and restore our
+	 * previous state
+	 */
+	for (i = 0; i < 4; i++)
+		wr32(IGC_RXDCTL(i), rxdctl[i]);
+	wr32(IGC_RCTL, rctl);
+	wrfl();
+
+	wr32(IGC_RLPML, rlpml);
+	wr32(IGC_RFCTL, rfctl);
+
+	/* Flush receive errors generated by workaround */
+	rd32(IGC_ROC);
+	rd32(IGC_RNBC);
+	rd32(IGC_MPC);
+}
+
+static struct igc_mac_operations igc_mac_ops_base = {
+	.init_hw		= igc_init_hw_base,
+	.check_for_link		= igc_check_for_copper_link,
+	.rar_set		= igc_rar_set,
+	.read_mac_addr		= igc_read_mac_addr,
+	.get_speed_and_duplex	= igc_get_speed_and_duplex_copper,
+};
+
+static const struct igc_phy_operations igc_phy_ops_base = {
+	.acquire		= igc_acquire_phy_base,
+	.release		= igc_release_phy_base,
+	.reset			= igc_phy_hw_reset,
+	.read_reg		= igc_read_phy_reg_gpy,
+	.write_reg		= igc_write_phy_reg_gpy,
+};
+
+const struct igc_info igc_base_info = {
+	.get_invariants		= igc_get_invariants_base,
+	.mac_ops		= &igc_mac_ops_base,
+	.phy_ops		= &igc_phy_ops_base,
+};

diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
new file mode 100644
index 0000000..ea627ce
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.h

@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_BASE_H_
+#define _IGC_BASE_H_
+
+/* forward declaration */
+void igc_rx_fifo_flush_base(struct igc_hw *hw);
+void igc_power_down_phy_copper_base(struct igc_hw *hw);
+
+/* Transmit Descriptor - Advanced */
+union igc_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;    /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Context descriptors */
+struct igc_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 launch_time;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_MAC_TSTAMP	0x00080000 /* IEEE1588 Timestamp packet */
+#define IGC_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
+#define IGC_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define IGC_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
+#define IGC_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
+#define IGC_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
+#define IGC_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define IGC_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define IGC_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+#define IGC_RAR_ENTRIES		16
+
+/* Receive Descriptor - Advanced */
+union igc_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /*RSS type, Pkt type*/
+					/* Split Header, header buffer len */
+					__le16 hdr_info;
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Additional Transmit Descriptor Control definitions */
+#define IGC_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
+
+/* Additional Receive Descriptor Control definitions */
+#define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
+
+/* SRRCTL bit definitions */
+#define IGC_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
+#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT		2  /* Shift _left_ */
+#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF	0x02000000
+
+#endif /* _IGC_BASE_H */

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
new file mode 100644
index 0000000..f3f2325
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h

@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_DEFINES_H_
+#define _IGC_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define REQ_RX_DESCRIPTOR_MULTIPLE	8
+
+#define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
+
+/* Physical Func Reset Done Indication */
+#define IGC_CTRL_EXT_LINK_MODE_MASK	0x00C00000
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define COPPER_LINK_UP_LIMIT		10
+#define PHY_AUTO_NEG_LIMIT		45
+#define PHY_FORCE_LIMIT			20
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT		800
+/*Blocks new Master requests */
+#define IGC_CTRL_GIO_MASTER_DISABLE	0x00000004
+/* Status of Master requests. */
+#define IGC_STATUS_GIO_MASTER_ENABLE	0x00080000
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define IGC_RAH_AV		0x80000000 /* Receive descriptor valid */
+#define IGC_RAH_POOL_1		0x00040000
+#define IGC_RAL_MAC_ADDR_LEN	4
+#define IGC_RAH_MAC_ADDR_LEN	2
+
+/* Error Codes */
+#define IGC_SUCCESS			0
+#define IGC_ERR_NVM			1
+#define IGC_ERR_PHY			2
+#define IGC_ERR_CONFIG			3
+#define IGC_ERR_PARAM			4
+#define IGC_ERR_MAC_INIT		5
+#define IGC_ERR_RESET			9
+#define IGC_ERR_MASTER_REQUESTS_PENDING	10
+#define IGC_ERR_BLK_PHY_RESET		12
+#define IGC_ERR_SWFW_SYNC		13
+
+/* Device Control */
+#define IGC_CTRL_DEV_RST	0x20000000  /* Device reset */
+
+#define IGC_CTRL_PHY_RST	0x80000000  /* PHY Reset */
+#define IGC_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
+#define IGC_CTRL_FRCSPD		0x00000800  /* Force Speed */
+#define IGC_CTRL_FRCDPX		0x00001000  /* Force Duplex */
+
+#define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
+#define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
+
+#define IGC_CONNSW_AUTOSENSE_EN	0x1
+
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE	0x2600
+
+/* PBA constants */
+#define IGC_PBA_34K		0x0022
+
+/* SW Semaphore Register */
+#define IGC_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
+#define IGC_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
+
+/* SWFW_SYNC Definitions */
+#define IGC_SWFW_EEP_SM		0x1
+#define IGC_SWFW_PHY0_SM	0x2
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_10T_HD_CAPS	0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS	0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS	0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS	0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_PAUSE		0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR		0x0800   /* Asymmetric Pause Direction bit */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_PAUSE		0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR	0x0800 /* LP Asymmetric Pause Direction bit */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE	0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_REMOTE_RX_STATUS	0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS	0x2000 /* Local receiver OK */
+
+/* PHY GPY 211 registers */
+#define STANDARD_AN_REG_MASK	0x0007 /* MMD */
+#define ANEG_MULTIGBT_AN_CTRL	0x0020 /* MULTI GBT AN Control Register */
+#define MMD_DEVADDR_SHIFT	16     /* Shift MMD to higher bits */
+#define CR_2500T_FD_CAPS	0x0080 /* Advertise 2500T FD capability */
+
+/* NVM Control */
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT		10
+#define IGC_EECD_AUTO_RD		0x00000200  /* NVM Auto Read done */
+#define IGC_EECD_REQ		0x00000040 /* NVM Access Request */
+#define IGC_EECD_GNT		0x00000080 /* NVM Access Grant */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define IGC_EECD_ADDR_BITS		0x00000400
+#define IGC_NVM_GRANT_ATTEMPTS		1000 /* NVM # attempts to gain grant */
+#define IGC_EECD_SIZE_EX_MASK		0x00007800  /* NVM Size */
+#define IGC_EECD_SIZE_EX_SHIFT		11
+#define IGC_EECD_FLUPD_I225		0x00800000 /* Update FLASH */
+#define IGC_EECD_FLUDONE_I225		0x04000000 /* Update FLASH done*/
+#define IGC_EECD_FLASH_DETECTED_I225	0x00080000 /* FLASH detected */
+#define IGC_FLUDONE_ATTEMPTS		20000
+#define IGC_EERD_EEWR_MAX_COUNT		512 /* buffered EEPROM words rw */
+
+/* Offset to data in NVM read/write registers */
+#define IGC_NVM_RW_REG_DATA	16
+#define IGC_NVM_RW_REG_DONE	2    /* Offset to READ/WRITE done bit */
+#define IGC_NVM_RW_REG_START	1    /* Start operation */
+#define IGC_NVM_RW_ADDR_SHIFT	2    /* Shift to the address bits */
+#define IGC_NVM_POLL_READ	0    /* Flag for polling for read complete */
+
+/* NVM Word Offsets */
+#define NVM_CHECKSUM_REG		0x003F
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM				0xBABA
+
+#define NVM_PBA_OFFSET_0		8
+#define NVM_PBA_OFFSET_1		9
+#define NVM_RESERVED_WORD		0xFFFF
+#define NVM_PBA_PTR_GUARD		0xFAFA
+#define NVM_WORD_SIZE_BASE_SHIFT	6
+
+/* Collision related configuration parameters */
+#define IGC_COLLISION_THRESHOLD		15
+#define IGC_CT_SHIFT			4
+#define IGC_COLLISION_DISTANCE		63
+#define IGC_COLD_SHIFT			12
+
+/* Device Status */
+#define IGC_STATUS_FD		0x00000001      /* Full duplex.0=half,1=full */
+#define IGC_STATUS_LU		0x00000002      /* Link up.0=no,1=link */
+#define IGC_STATUS_FUNC_MASK	0x0000000C      /* PCI Function Mask */
+#define IGC_STATUS_FUNC_SHIFT	2
+#define IGC_STATUS_FUNC_1	0x00000004      /* Function 1 */
+#define IGC_STATUS_TXOFF	0x00000010      /* transmission paused */
+#define IGC_STATUS_SPEED_100	0x00000040      /* Speed 100Mb/s */
+#define IGC_STATUS_SPEED_1000	0x00000080      /* Speed 1000Mb/s */
+#define IGC_STATUS_SPEED_2500	0x00400000	/* Speed 2.5Gb/s */
+
+#define SPEED_10		10
+#define SPEED_100		100
+#define SPEED_1000		1000
+#define SPEED_2500		2500
+#define HALF_DUPLEX		1
+#define FULL_DUPLEX		2
+
+/* 1Gbps and 2.5Gbps half duplex is not supported, nor spec-compliant. */
+#define ADVERTISE_10_HALF		0x0001
+#define ADVERTISE_10_FULL		0x0002
+#define ADVERTISE_100_HALF		0x0004
+#define ADVERTISE_100_FULL		0x0008
+#define ADVERTISE_1000_HALF		0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL		0x0020
+#define ADVERTISE_2500_HALF		0x0040 /* Not used, just FYI */
+#define ADVERTISE_2500_FULL		0x0080
+
+#define IGC_ALL_SPEED_DUPLEX_2500 ( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL | ADVERTISE_1000_FULL | ADVERTISE_2500_FULL)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT_2500	IGC_ALL_SPEED_DUPLEX_2500
+
+/* Interrupt Cause Read */
+#define IGC_ICR_TXDW		BIT(0)	/* Transmit desc written back */
+#define IGC_ICR_TXQE		BIT(1)	/* Transmit Queue empty */
+#define IGC_ICR_LSC		BIT(2)	/* Link Status Change */
+#define IGC_ICR_RXSEQ		BIT(3)	/* Rx sequence error */
+#define IGC_ICR_RXDMT0		BIT(4)	/* Rx desc min. threshold (0) */
+#define IGC_ICR_RXO		BIT(6)	/* Rx overrun */
+#define IGC_ICR_RXT0		BIT(7)	/* Rx timer intr (ring 0) */
+#define IGC_ICR_DRSTA		BIT(30)	/* Device Reset Asserted */
+
+/* If this bit asserted, the driver should claim the interrupt */
+#define IGC_ICR_INT_ASSERTED	BIT(31)
+
+#define IGC_ICS_RXT0		IGC_ICR_RXT0 /* Rx timer intr */
+
+#define IMS_ENABLE_MASK ( \
+	IGC_IMS_RXT0   |    \
+	IGC_IMS_TXDW   |    \
+	IGC_IMS_RXDMT0 |    \
+	IGC_IMS_RXSEQ  |    \
+	IGC_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define IGC_IMS_TXDW		IGC_ICR_TXDW	/* Tx desc written back */
+#define IGC_IMS_RXSEQ		IGC_ICR_RXSEQ	/* Rx sequence error */
+#define IGC_IMS_LSC		IGC_ICR_LSC	/* Link Status Change */
+#define IGC_IMS_DOUTSYNC	IGC_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define IGC_IMS_DRSTA		IGC_ICR_DRSTA	/* Device Reset Asserted */
+#define IGC_IMS_RXT0		IGC_ICR_RXT0	/* Rx timer intr */
+#define IGC_IMS_RXDMT0		IGC_ICR_RXDMT0	/* Rx desc min. threshold */
+
+#define IGC_QVECTOR_MASK	0x7FFC		/* Q-vector mask */
+#define IGC_ITR_VAL_MASK	0x04		/* ITR value mask */
+
+/* Interrupt Cause Set */
+#define IGC_ICS_LSC		IGC_ICR_LSC       /* Link Status Change */
+#define IGC_ICS_RXDMT0		IGC_ICR_RXDMT0    /* rx desc min. threshold */
+#define IGC_ICS_DRSTA		IGC_ICR_DRSTA     /* Device Reset Aserted */
+
+#define IGC_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
+#define IGC_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
+#define IGC_IVAR_VALID		0x80
+#define IGC_GPIE_NSICR		0x00000001
+#define IGC_GPIE_MSIX_MODE	0x00000010
+#define IGC_GPIE_EIAME		0x40000000
+#define IGC_GPIE_PBA		0x80000000
+
+/* Transmit Descriptor bit definitions */
+#define IGC_TXD_DTYP_D		0x00100000 /* Data Descriptor */
+#define IGC_TXD_DTYP_C		0x00000000 /* Context Descriptor */
+#define IGC_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define IGC_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define IGC_TXD_CMD_EOP		0x01000000 /* End of Packet */
+#define IGC_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_TXD_CMD_IC		0x04000000 /* Insert Checksum */
+#define IGC_TXD_CMD_RS		0x08000000 /* Report Status */
+#define IGC_TXD_CMD_RPS		0x10000000 /* Report Packet Sent */
+#define IGC_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define IGC_TXD_CMD_VLE		0x40000000 /* Add VLAN tag */
+#define IGC_TXD_CMD_IDE		0x80000000 /* Enable Tidv register */
+#define IGC_TXD_STAT_DD		0x00000001 /* Descriptor Done */
+#define IGC_TXD_STAT_EC		0x00000002 /* Excess Collisions */
+#define IGC_TXD_STAT_LC		0x00000004 /* Late Collisions */
+#define IGC_TXD_STAT_TU		0x00000008 /* Transmit underrun */
+#define IGC_TXD_CMD_TCP		0x01000000 /* TCP packet */
+#define IGC_TXD_CMD_IP		0x02000000 /* IP packet */
+#define IGC_TXD_CMD_TSE		0x04000000 /* TCP Seg enable */
+#define IGC_TXD_STAT_TC		0x00000004 /* Tx Underrun */
+#define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define IGC_TCTL_EN		0x00000002 /* enable Tx */
+#define IGC_TCTL_PSP		0x00000008 /* pad short packets */
+#define IGC_TCTL_CT		0x00000ff0 /* collision threshold */
+#define IGC_TCTL_COLD		0x003ff000 /* collision distance */
+#define IGC_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
+#define IGC_TCTL_MULR		0x10000000 /* Multiple request support */
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
+#define FLOW_CONTROL_TYPE		0x8808
+/* Enable XON frame transmission */
+#define IGC_FCRTL_XONE			0x80000000
+
+/* Management Control */
+#define IGC_MANC_RCV_TCO_EN	0x00020000 /* Receive TCO Packets Enabled */
+#define IGC_MANC_BLK_PHY_RST_ON_IDE	0x00040000 /* Block phy resets */
+
+/* Receive Control */
+#define IGC_RCTL_RST		0x00000001 /* Software reset */
+#define IGC_RCTL_EN		0x00000002 /* enable */
+#define IGC_RCTL_SBP		0x00000004 /* store bad packet */
+#define IGC_RCTL_UPE		0x00000008 /* unicast promisc enable */
+#define IGC_RCTL_MPE		0x00000010 /* multicast promisc enable */
+#define IGC_RCTL_LPE		0x00000020 /* long packet enable */
+#define IGC_RCTL_LBM_MAC	0x00000040 /* MAC loopback mode */
+#define IGC_RCTL_LBM_TCVR	0x000000C0 /* tcvr loopback mode */
+
+#define IGC_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
+#define IGC_RCTL_BAM		0x00008000 /* broadcast enable */
+
+/* Receive Descriptor bit definitions */
+#define IGC_RXD_STAT_EOP	0x02    /* End of Packet */
+
+#define IGC_RXDEXT_STATERR_CE		0x01000000
+#define IGC_RXDEXT_STATERR_SE		0x02000000
+#define IGC_RXDEXT_STATERR_SEQ		0x04000000
+#define IGC_RXDEXT_STATERR_CXE		0x10000000
+#define IGC_RXDEXT_STATERR_TCPE		0x20000000
+#define IGC_RXDEXT_STATERR_IPE		0x40000000
+#define IGC_RXDEXT_STATERR_RXE		0x80000000
+
+/* Same mask, but for extended and packet split descriptors */
+#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	IGC_RXDEXT_STATERR_CE  |	\
+	IGC_RXDEXT_STATERR_SE  |	\
+	IGC_RXDEXT_STATERR_SEQ |	\
+	IGC_RXDEXT_STATERR_CXE |	\
+	IGC_RXDEXT_STATERR_RXE)
+
+#define IGC_MRQC_RSS_FIELD_IPV4_TCP	0x00010000
+#define IGC_MRQC_RSS_FIELD_IPV4		0x00020000
+#define IGC_MRQC_RSS_FIELD_IPV6_TCP_EX	0x00040000
+#define IGC_MRQC_RSS_FIELD_IPV6		0x00100000
+#define IGC_MRQC_RSS_FIELD_IPV6_TCP	0x00200000
+
+/* Header split receive */
+#define IGC_RFCTL_IPV6_EX_DIS	0x00010000
+#define IGC_RFCTL_LEF		0x00040000
+
+#define IGC_RCTL_SZ_256		0x00030000 /* Rx buffer size 256 */
+
+#define IGC_RCTL_MO_SHIFT	12 /* multicast offset shift */
+#define IGC_RCTL_CFIEN		0x00080000 /* canonical form enable */
+#define IGC_RCTL_DPF		0x00400000 /* discard pause frames */
+#define IGC_RCTL_PMCF		0x00800000 /* pass MAC control frames */
+#define IGC_RCTL_SECRC		0x04000000 /* Strip Ethernet CRC */
+
+#define I225_RXPBSIZE_DEFAULT	0x000000A2 /* RXPBSIZE default */
+#define I225_TXPBSIZE_DEFAULT	0x04000014 /* TXPBSIZE default */
+
+/* Receive Checksum Control */
+#define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
+#define IGC_RXCSUM_PCSD		0x00002000   /* packet checksum disabled */
+
+/* GPY211 - I225 defines */
+#define GPY_MMD_MASK		0xFFFF0000
+#define GPY_MMD_SHIFT		16
+#define GPY_REG_MASK		0x0000FFFF
+
+#define IGC_MMDAC_FUNC_DATA	0x4000 /* Data, no post increment */
+
+/* MAC definitions */
+#define IGC_FACTPS_MNGCG	0x20000000
+#define IGC_FWSM_MODE_MASK	0xE
+#define IGC_FWSM_MODE_SHIFT	1
+
+/* Management Control */
+#define IGC_MANC_SMBUS_EN	0x00000001 /* SMBus Enabled - RO */
+#define IGC_MANC_ASF_EN		0x00000002 /* ASF Enabled - RO */
+
+/* PHY */
+#define PHY_REVISION_MASK	0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS	0x1F  /* 5 bit address bus (0-0x1F) */
+#define IGC_GEN_POLL_TIMEOUT	1920
+
+/* PHY Control Register */
+#define MII_CR_FULL_DUPLEX	0x0100  /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
+#define MII_CR_POWER_DOWN	0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
+#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000	0x0040
+#define MII_CR_SPEED_100	0x2000
+#define MII_CR_SPEED_10		0x0000
+
+/* PHY Status Register */
+#define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL		0x00 /* Control Register */
+#define PHY_STATUS		0x01 /* Status Register */
+#define PHY_ID1			0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2			0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV		0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY		0x05 /* Link Partner Ability (Base Page) */
+#define PHY_1000T_CTRL		0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
+
+/* Bit definitions for valid PHY IDs. I = Integrated E = External */
+#define I225_I_PHY_ID		0x67C9DC00
+
+/* MDI Control */
+#define IGC_MDIC_DATA_MASK	0x0000FFFF
+#define IGC_MDIC_REG_MASK	0x001F0000
+#define IGC_MDIC_REG_SHIFT	16
+#define IGC_MDIC_PHY_MASK	0x03E00000
+#define IGC_MDIC_PHY_SHIFT	21
+#define IGC_MDIC_OP_WRITE	0x04000000
+#define IGC_MDIC_OP_READ	0x08000000
+#define IGC_MDIC_READY		0x10000000
+#define IGC_MDIC_INT_EN		0x20000000
+#define IGC_MDIC_ERROR		0x40000000
+#define IGC_MDIC_DEST		0x80000000
+
+#define IGC_N0_QUEUE		-1
+
+#define IGC_MAX_MAC_HDR_LEN	127
+#define IGC_MAX_NETWORK_HDR_LEN	511
+
+#define IGC_VLAPQF_QUEUE_SEL(_n, q_idx) ((q_idx) << ((_n) * 4))
+#define IGC_VLAPQF_P_VALID(_n)	(0x1 << (3 + (_n) * 4))
+#define IGC_VLAPQF_QUEUE_MASK	0x03
+
+#define IGC_ADVTXD_MACLEN_SHIFT		9  /* Adv ctxt desc mac len shift */
+#define IGC_ADVTXD_TUCMD_IPV4		0x00000400  /* IP Packet Type:1=IPv4 */
+#define IGC_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet Type of TCP */
+#define IGC_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 packet TYPE of SCTP */
+
+#endif /* _IGC_DEFINES_H_ */

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
new file mode 100644
index 0000000..ac98f1d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c

@@ -0,0 +1,1863 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+/* ethtool support for igc */
+#include <linux/if_vlan.h>
+#include <linux/pm_runtime.h>
+
+#include "igc.h"
+
+/* forward declaration */
+struct igc_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IGC_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(struct igc_adapter, _stat), \
+	.stat_offset = offsetof(struct igc_adapter, _stat) \
+}
+
+static const struct igc_stats igc_gstrings_stats[] = {
+	IGC_STAT("rx_packets", stats.gprc),
+	IGC_STAT("tx_packets", stats.gptc),
+	IGC_STAT("rx_bytes", stats.gorc),
+	IGC_STAT("tx_bytes", stats.gotc),
+	IGC_STAT("rx_broadcast", stats.bprc),
+	IGC_STAT("tx_broadcast", stats.bptc),
+	IGC_STAT("rx_multicast", stats.mprc),
+	IGC_STAT("tx_multicast", stats.mptc),
+	IGC_STAT("multicast", stats.mprc),
+	IGC_STAT("collisions", stats.colc),
+	IGC_STAT("rx_crc_errors", stats.crcerrs),
+	IGC_STAT("rx_no_buffer_count", stats.rnbc),
+	IGC_STAT("rx_missed_errors", stats.mpc),
+	IGC_STAT("tx_aborted_errors", stats.ecol),
+	IGC_STAT("tx_carrier_errors", stats.tncrs),
+	IGC_STAT("tx_window_errors", stats.latecol),
+	IGC_STAT("tx_abort_late_coll", stats.latecol),
+	IGC_STAT("tx_deferred_ok", stats.dc),
+	IGC_STAT("tx_single_coll_ok", stats.scc),
+	IGC_STAT("tx_multi_coll_ok", stats.mcc),
+	IGC_STAT("tx_timeout_count", tx_timeout_count),
+	IGC_STAT("rx_long_length_errors", stats.roc),
+	IGC_STAT("rx_short_length_errors", stats.ruc),
+	IGC_STAT("rx_align_errors", stats.algnerrc),
+	IGC_STAT("tx_tcp_seg_good", stats.tsctc),
+	IGC_STAT("tx_tcp_seg_failed", stats.tsctfc),
+	IGC_STAT("rx_flow_control_xon", stats.xonrxc),
+	IGC_STAT("rx_flow_control_xoff", stats.xoffrxc),
+	IGC_STAT("tx_flow_control_xon", stats.xontxc),
+	IGC_STAT("tx_flow_control_xoff", stats.xofftxc),
+	IGC_STAT("rx_long_byte_count", stats.gorc),
+	IGC_STAT("tx_dma_out_of_sync", stats.doosync),
+	IGC_STAT("tx_smbus", stats.mgptc),
+	IGC_STAT("rx_smbus", stats.mgprc),
+	IGC_STAT("dropped_smbus", stats.mgpdc),
+	IGC_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+	IGC_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+	IGC_STAT("os2bmc_tx_by_host", stats.o2bspc),
+	IGC_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+	IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+	IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+};
+
+#define IGC_NETDEV_STAT(_net_stat) { \
+	.stat_string = __stringify(_net_stat), \
+	.sizeof_stat = FIELD_SIZEOF(struct rtnl_link_stats64, _net_stat), \
+	.stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \
+}
+
+static const struct igc_stats igc_gstrings_net_stats[] = {
+	IGC_NETDEV_STAT(rx_errors),
+	IGC_NETDEV_STAT(tx_errors),
+	IGC_NETDEV_STAT(tx_dropped),
+	IGC_NETDEV_STAT(rx_length_errors),
+	IGC_NETDEV_STAT(rx_over_errors),
+	IGC_NETDEV_STAT(rx_frame_errors),
+	IGC_NETDEV_STAT(rx_fifo_errors),
+	IGC_NETDEV_STAT(tx_fifo_errors),
+	IGC_NETDEV_STAT(tx_heartbeat_errors)
+};
+
+enum igc_diagnostics_results {
+	TEST_REG = 0,
+	TEST_EEP,
+	TEST_IRQ,
+	TEST_LOOP,
+	TEST_LINK
+};
+
+static const char igc_gstrings_test[][ETH_GSTRING_LEN] = {
+	[TEST_REG]  = "Register test  (offline)",
+	[TEST_EEP]  = "Eeprom test    (offline)",
+	[TEST_IRQ]  = "Interrupt test (offline)",
+	[TEST_LOOP] = "Loopback test  (offline)",
+	[TEST_LINK] = "Link test   (on/offline)"
+};
+
+#define IGC_TEST_LEN (sizeof(igc_gstrings_test) / ETH_GSTRING_LEN)
+
+#define IGC_GLOBAL_STATS_LEN	\
+	(sizeof(igc_gstrings_stats) / sizeof(struct igc_stats))
+#define IGC_NETDEV_STATS_LEN	\
+	(sizeof(igc_gstrings_net_stats) / sizeof(struct igc_stats))
+#define IGC_RX_QUEUE_STATS_LEN \
+	(sizeof(struct igc_rx_queue_stats) / sizeof(u64))
+#define IGC_TX_QUEUE_STATS_LEN 3 /* packets, bytes, restart_queue */
+#define IGC_QUEUE_STATS_LEN \
+	((((struct igc_adapter *)netdev_priv(netdev))->num_rx_queues * \
+	  IGC_RX_QUEUE_STATS_LEN) + \
+	 (((struct igc_adapter *)netdev_priv(netdev))->num_tx_queues * \
+	  IGC_TX_QUEUE_STATS_LEN))
+#define IGC_STATS_LEN \
+	(IGC_GLOBAL_STATS_LEN + IGC_NETDEV_STATS_LEN + IGC_QUEUE_STATS_LEN)
+
+static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGC_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings)
+
+static void igc_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *drvinfo)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver,  igc_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, igc_driver_version, sizeof(drvinfo->version));
+
+	/* add fw_version here */
+	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN;
+}
+
+static int igc_get_regs_len(struct net_device *netdev)
+{
+	return IGC_REGS_LEN * sizeof(u32);
+}
+
+static void igc_get_regs(struct net_device *netdev,
+			 struct ethtool_regs *regs, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IGC_REGS_LEN * sizeof(u32));
+
+	regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = rd32(IGC_CTRL);
+	regs_buff[1] = rd32(IGC_STATUS);
+	regs_buff[2] = rd32(IGC_CTRL_EXT);
+	regs_buff[3] = rd32(IGC_MDIC);
+	regs_buff[4] = rd32(IGC_CONNSW);
+
+	/* NVM Register */
+	regs_buff[5] = rd32(IGC_EECD);
+
+	/* Interrupt */
+	/* Reading EICS for EICR because they read the
+	 * same but EICS does not clear on read
+	 */
+	regs_buff[6] = rd32(IGC_EICS);
+	regs_buff[7] = rd32(IGC_EICS);
+	regs_buff[8] = rd32(IGC_EIMS);
+	regs_buff[9] = rd32(IGC_EIMC);
+	regs_buff[10] = rd32(IGC_EIAC);
+	regs_buff[11] = rd32(IGC_EIAM);
+	/* Reading ICS for ICR because they read the
+	 * same but ICS does not clear on read
+	 */
+	regs_buff[12] = rd32(IGC_ICS);
+	regs_buff[13] = rd32(IGC_ICS);
+	regs_buff[14] = rd32(IGC_IMS);
+	regs_buff[15] = rd32(IGC_IMC);
+	regs_buff[16] = rd32(IGC_IAC);
+	regs_buff[17] = rd32(IGC_IAM);
+
+	/* Flow Control */
+	regs_buff[18] = rd32(IGC_FCAL);
+	regs_buff[19] = rd32(IGC_FCAH);
+	regs_buff[20] = rd32(IGC_FCTTV);
+	regs_buff[21] = rd32(IGC_FCRTL);
+	regs_buff[22] = rd32(IGC_FCRTH);
+	regs_buff[23] = rd32(IGC_FCRTV);
+
+	/* Receive */
+	regs_buff[24] = rd32(IGC_RCTL);
+	regs_buff[25] = rd32(IGC_RXCSUM);
+	regs_buff[26] = rd32(IGC_RLPML);
+	regs_buff[27] = rd32(IGC_RFCTL);
+
+	/* Transmit */
+	regs_buff[28] = rd32(IGC_TCTL);
+	regs_buff[29] = rd32(IGC_TIPG);
+
+	/* Wake Up */
+
+	/* MAC */
+
+	/* Statistics */
+	regs_buff[30] = adapter->stats.crcerrs;
+	regs_buff[31] = adapter->stats.algnerrc;
+	regs_buff[32] = adapter->stats.symerrs;
+	regs_buff[33] = adapter->stats.rxerrc;
+	regs_buff[34] = adapter->stats.mpc;
+	regs_buff[35] = adapter->stats.scc;
+	regs_buff[36] = adapter->stats.ecol;
+	regs_buff[37] = adapter->stats.mcc;
+	regs_buff[38] = adapter->stats.latecol;
+	regs_buff[39] = adapter->stats.colc;
+	regs_buff[40] = adapter->stats.dc;
+	regs_buff[41] = adapter->stats.tncrs;
+	regs_buff[42] = adapter->stats.sec;
+	regs_buff[43] = adapter->stats.htdpmc;
+	regs_buff[44] = adapter->stats.rlec;
+	regs_buff[45] = adapter->stats.xonrxc;
+	regs_buff[46] = adapter->stats.xontxc;
+	regs_buff[47] = adapter->stats.xoffrxc;
+	regs_buff[48] = adapter->stats.xofftxc;
+	regs_buff[49] = adapter->stats.fcruc;
+	regs_buff[50] = adapter->stats.prc64;
+	regs_buff[51] = adapter->stats.prc127;
+	regs_buff[52] = adapter->stats.prc255;
+	regs_buff[53] = adapter->stats.prc511;
+	regs_buff[54] = adapter->stats.prc1023;
+	regs_buff[55] = adapter->stats.prc1522;
+	regs_buff[56] = adapter->stats.gprc;
+	regs_buff[57] = adapter->stats.bprc;
+	regs_buff[58] = adapter->stats.mprc;
+	regs_buff[59] = adapter->stats.gptc;
+	regs_buff[60] = adapter->stats.gorc;
+	regs_buff[61] = adapter->stats.gotc;
+	regs_buff[62] = adapter->stats.rnbc;
+	regs_buff[63] = adapter->stats.ruc;
+	regs_buff[64] = adapter->stats.rfc;
+	regs_buff[65] = adapter->stats.roc;
+	regs_buff[66] = adapter->stats.rjc;
+	regs_buff[67] = adapter->stats.mgprc;
+	regs_buff[68] = adapter->stats.mgpdc;
+	regs_buff[69] = adapter->stats.mgptc;
+	regs_buff[70] = adapter->stats.tor;
+	regs_buff[71] = adapter->stats.tot;
+	regs_buff[72] = adapter->stats.tpr;
+	regs_buff[73] = adapter->stats.tpt;
+	regs_buff[74] = adapter->stats.ptc64;
+	regs_buff[75] = adapter->stats.ptc127;
+	regs_buff[76] = adapter->stats.ptc255;
+	regs_buff[77] = adapter->stats.ptc511;
+	regs_buff[78] = adapter->stats.ptc1023;
+	regs_buff[79] = adapter->stats.ptc1522;
+	regs_buff[80] = adapter->stats.mptc;
+	regs_buff[81] = adapter->stats.bptc;
+	regs_buff[82] = adapter->stats.tsctc;
+	regs_buff[83] = adapter->stats.iac;
+	regs_buff[84] = adapter->stats.rpthc;
+	regs_buff[85] = adapter->stats.hgptc;
+	regs_buff[86] = adapter->stats.hgorc;
+	regs_buff[87] = adapter->stats.hgotc;
+	regs_buff[88] = adapter->stats.lenerrs;
+	regs_buff[89] = adapter->stats.scvpc;
+	regs_buff[90] = adapter->stats.hrmpc;
+
+	for (i = 0; i < 4; i++)
+		regs_buff[91 + i] = rd32(IGC_SRRCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[95 + i] = rd32(IGC_PSRTYPE(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[99 + i] = rd32(IGC_RDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[103 + i] = rd32(IGC_RDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[107 + i] = rd32(IGC_RDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[111 + i] = rd32(IGC_RDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[115 + i] = rd32(IGC_RDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[119 + i] = rd32(IGC_RXDCTL(i));
+
+	for (i = 0; i < 10; i++)
+		regs_buff[123 + i] = rd32(IGC_EITR(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[139 + i] = rd32(IGC_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[145 + i] = rd32(IGC_RAH(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[149 + i] = rd32(IGC_TDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[152 + i] = rd32(IGC_TDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[156 + i] = rd32(IGC_TDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[160 + i] = rd32(IGC_TDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[164 + i] = rd32(IGC_TDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[168 + i] = rd32(IGC_TXDCTL(i));
+}
+
+static u32 igc_get_msglevel(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void igc_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int igc_nway_reset(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		igc_reinit_locked(adapter);
+	return 0;
+}
+
+static u32 igc_get_link(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	/* If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		mac->get_link_status = 1;
+
+	return igc_has_link(adapter);
+}
+
+static int igc_get_eeprom_len(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int igc_get_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int first_word, last_word;
+	u16 *eeprom_buff;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->nvm.type == igc_nvm_eeprom_spi) {
+		ret_val = hw->nvm.ops.read(hw, first_word,
+					   last_word - first_word + 1,
+					   eeprom_buff);
+	} else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = hw->nvm.ops.read(hw, first_word + i, 1,
+						   &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+	       eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int igc_set_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 *eeprom_buff;
+	void *ptr;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (hw->mac.type >= igc_i225 &&
+	    !igc_get_flash_presence_i225(hw)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && ret_val == 0) {
+		/* need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, last_word, 1,
+				   &eeprom_buff[last_word - first_word]);
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+
+	ret_val = hw->nvm.ops.write(hw, first_word,
+				    last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum if nvm write succeeded */
+	if (ret_val == 0)
+		hw->nvm.ops.update(hw);
+
+	/* check if need: igc_set_fw_version(adapter); */
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void igc_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IGC_MAX_RXD;
+	ring->tx_max_pending = IGC_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int igc_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_ring *temp_ring;
+	u16 new_rx_count, new_tx_count;
+	int i, err = 0;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+		return -EINVAL;
+
+	new_rx_count = min_t(u32, ring->rx_pending, IGC_MAX_RXD);
+	new_rx_count = max_t(u16, new_rx_count, IGC_MIN_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = min_t(u32, ring->tx_pending, IGC_MAX_TXD);
+	new_tx_count = max_t(u16, new_tx_count, IGC_MIN_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if (new_tx_count == adapter->tx_ring_count &&
+	    new_rx_count == adapter->rx_ring_count) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (adapter->num_tx_queues > adapter->num_rx_queues)
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_tx_queues));
+	else
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_rx_queues));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igc_down(adapter);
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = igc_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			igc_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = igc_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			igc_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+err_setup:
+	igc_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return err;
+}
+
+static void igc_get_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == igc_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int igc_set_pauseparam(struct net_device *netdev,
+			      struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = igc_fc_default;
+		if (netif_running(adapter->netdev)) {
+			igc_down(adapter);
+			igc_up(adapter);
+		} else {
+			igc_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		retval = ((hw->phy.media_type == igc_media_type_copper) ?
+			  igc_force_mac_fc(hw) : igc_setup_link(hw));
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return retval;
+}
+
+static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *igc_gstrings_test,
+		       IGC_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, igc_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) {
+			memcpy(p, igc_gstrings_net_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			sprintf(p, "tx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_restart", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			sprintf(p, "rx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_drops", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_csum_err", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_alloc_failed", i);
+			p += ETH_GSTRING_LEN;
+		}
+		/* BUG_ON(p - data != IGC_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, igc_priv_flags_strings,
+		       IGC_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int igc_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return IGC_STATS_LEN;
+	case ETH_SS_TEST:
+		return IGC_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IGC_PRIV_FLAGS_STR_LEN;
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static void igc_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats, u64 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	unsigned int start;
+	struct igc_ring *ring;
+	int i, j;
+	char *p;
+
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+
+	for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) {
+		p = (char *)adapter + igc_gstrings_stats[i].stat_offset;
+		data[i] = (igc_gstrings_stats[i].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < IGC_NETDEV_STATS_LEN; j++, i++) {
+		p = (char *)net_stats + igc_gstrings_net_stats[j].stat_offset;
+		data[i] = (igc_gstrings_net_stats[j].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		u64	restart2;
+
+		ring = adapter->tx_ring[j];
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
+			data[i]   = ring->tx_stats.packets;
+			data[i + 1] = ring->tx_stats.bytes;
+			data[i + 2] = ring->tx_stats.restart_queue;
+		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->tx_syncp2);
+			restart2  = ring->tx_stats.restart_queue2;
+		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start));
+		data[i + 2] += restart2;
+
+		i += IGC_TX_QUEUE_STATS_LEN;
+	}
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		ring = adapter->rx_ring[j];
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
+			data[i]   = ring->rx_stats.packets;
+			data[i + 1] = ring->rx_stats.bytes;
+			data[i + 2] = ring->rx_stats.drops;
+			data[i + 3] = ring->rx_stats.csum_err;
+			data[i + 4] = ring->rx_stats.alloc_failed;
+		} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
+		i += IGC_RX_QUEUE_STATS_LEN;
+	}
+	spin_unlock(&adapter->stats64_lock);
+}
+
+static int igc_get_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->rx_itr_setting <= 3)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) {
+		if (adapter->tx_itr_setting <= 3)
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+		else
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+	}
+
+	return 0;
+}
+
+static int igc_set_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (ec->rx_max_coalesced_frames ||
+	    ec->rx_coalesce_usecs_irq ||
+	    ec->rx_max_coalesced_frames_irq ||
+	    ec->tx_max_coalesced_frames ||
+	    ec->tx_coalesce_usecs_irq ||
+	    ec->stats_block_coalesce_usecs ||
+	    ec->use_adaptive_rx_coalesce ||
+	    ec->use_adaptive_tx_coalesce ||
+	    ec->pkt_rate_low ||
+	    ec->rx_coalesce_usecs_low ||
+	    ec->rx_max_coalesced_frames_low ||
+	    ec->tx_coalesce_usecs_low ||
+	    ec->tx_max_coalesced_frames_low ||
+	    ec->pkt_rate_high ||
+	    ec->rx_coalesce_usecs_high ||
+	    ec->rx_max_coalesced_frames_high ||
+	    ec->tx_coalesce_usecs_high ||
+	    ec->tx_max_coalesced_frames_high ||
+	    ec->rate_sample_interval)
+		return -ENOTSUPP;
+
+	if (ec->rx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->rx_coalesce_usecs > 3 &&
+	     ec->rx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->rx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if (ec->tx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->tx_coalesce_usecs > 3 &&
+	     ec->tx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->tx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	/* If ITR is disabled, disable DMAC */
+	if (ec->rx_coalesce_usecs == 0) {
+		if (adapter->flags & IGC_FLAG_DMAC)
+			adapter->flags &= ~IGC_FLAG_DMAC;
+	}
+
+	/* convert to rate of irq's per second */
+	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+	/* convert to rate of irq's per second */
+	if (adapter->flags & IGC_FLAG_QUEUE_PAIRS)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
+			q_vector->itr_val = adapter->rx_itr_setting;
+		else
+			q_vector->itr_val = adapter->tx_itr_setting;
+		if (q_vector->itr_val && q_vector->itr_val <= 3)
+			q_vector->itr_val = IGC_START_ITR;
+		q_vector->set_itr = 1;
+	}
+
+	return 0;
+}
+
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct igc_nfc_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = IGC_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	if (rule->filter.match_flags) {
+		fsp->flow_type = ETHER_FLOW;
+		fsp->ring_cookie = rule->action;
+		if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+			fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+			fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+		}
+		if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+			fsp->flow_type |= FLOW_EXT;
+			fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+			fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
+		}
+		if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+					rule->filter.dst_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+		}
+		if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_source,
+					rule->filter.src_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+		}
+
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter,
+				   struct ethtool_rxnfc *cmd,
+				   u32 *rule_locs)
+{
+	struct igc_nfc_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = IGC_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static int igc_get_rss_hash_opts(struct igc_adapter *adapter,
+				 struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on igc */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case UDP_V4_FLOW:
+		if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case SCTP_V4_FLOW:
+		/* Fall through */
+	case AH_ESP_V4_FLOW:
+		/* Fall through */
+	case AH_V4_FLOW:
+		/* Fall through */
+	case ESP_V4_FLOW:
+		/* Fall through */
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case UDP_V6_FLOW:
+		if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case SCTP_V6_FLOW:
+		/* Fall through */
+	case AH_ESP_V6_FLOW:
+		/* Fall through */
+	case AH_V6_FLOW:
+		/* Fall through */
+	case ESP_V6_FLOW:
+		/* Fall through */
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			 u32 *rule_locs)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->nfc_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = igc_get_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = igc_get_ethtool_nfc_all(adapter, cmd, rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ret = igc_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+#define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \
+		       IGC_FLAG_RSS_FIELD_IPV6_UDP)
+static int igc_set_rss_hash_opt(struct igc_adapter *adapter,
+				struct ethtool_rxnfc *nfc)
+{
+	u32 flags = adapter->flags;
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGC_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGC_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGC_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGC_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags != adapter->flags) {
+		struct igc_hw *hw = &adapter->hw;
+		u32 mrqc = rd32(IGC_MRQC);
+
+		if ((flags & UDP_RSS_FLAGS) &&
+		    !(adapter->flags & UDP_RSS_FLAGS))
+			dev_err(&adapter->pdev->dev,
+				"enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		adapter->flags = flags;
+
+		/* Perform hash on these packet types */
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV4 |
+			IGC_MRQC_RSS_FIELD_IPV4_TCP |
+			IGC_MRQC_RSS_FIELD_IPV6 |
+			IGC_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(IGC_MRQC_RSS_FIELD_IPV4_UDP |
+			  IGC_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+			mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+			mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
+
+		wr32(IGC_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter,
+					struct igc_nfc_filter *input)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u8 i;
+	u32 etqf;
+	u16 etype;
+
+	/* find an empty etype filter register */
+	for (i = 0; i < MAX_ETYPE_FILTER; ++i) {
+		if (!adapter->etype_bitmap[i])
+			break;
+	}
+	if (i == MAX_ETYPE_FILTER) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n");
+		return -EINVAL;
+	}
+
+	adapter->etype_bitmap[i] = true;
+
+	etqf = rd32(IGC_ETQF(i));
+	etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK);
+
+	etqf |= IGC_ETQF_FILTER_ENABLE;
+	etqf &= ~IGC_ETQF_ETYPE_MASK;
+	etqf |= (etype & IGC_ETQF_ETYPE_MASK);
+
+	etqf &= ~IGC_ETQF_QUEUE_MASK;
+	etqf |= ((input->action << IGC_ETQF_QUEUE_SHIFT)
+		& IGC_ETQF_QUEUE_MASK);
+	etqf |= IGC_ETQF_QUEUE_ENABLE;
+
+	wr32(IGC_ETQF(i), etqf);
+
+	input->etype_reg_index = i;
+
+	return 0;
+}
+
+static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter,
+					    struct igc_nfc_filter *input)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u16 queue_index;
+	u32 vlapqf;
+
+	vlapqf = rd32(IGC_VLAPQF);
+	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+				>> VLAN_PRIO_SHIFT;
+	queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLAPQF_QUEUE_MASK;
+
+	/* check whether this vlan prio is already set */
+	if (vlapqf & IGC_VLAPQF_P_VALID(vlan_priority) &&
+	    queue_index != input->action) {
+		dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n");
+		return -EEXIST;
+	}
+
+	vlapqf |= IGC_VLAPQF_P_VALID(vlan_priority);
+	vlapqf |= IGC_VLAPQF_QUEUE_SEL(vlan_priority, input->action);
+
+	wr32(IGC_VLAPQF, vlapqf);
+
+	return 0;
+}
+
+int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err = -EINVAL;
+
+	if (hw->mac.type == igc_i225 &&
+	    !(input->filter.match_flags & ~IGC_FILTER_FLAG_SRC_MAC_ADDR)) {
+		dev_err(&adapter->pdev->dev,
+			"i225 doesn't support flow classification rules specifying only source addresses.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		err = igc_rxnfc_write_etype_filter(adapter, input);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+		err = igc_add_mac_steering_filter(adapter,
+						  input->filter.dst_addr,
+						  input->action, 0);
+		err = min_t(int, err, 0);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		err = igc_add_mac_steering_filter(adapter,
+						  input->filter.src_addr,
+						  input->action,
+						  IGC_MAC_STATE_SRC_ADDR);
+		err = min_t(int, err, 0);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
+		err = igc_rxnfc_write_vlan_prio_filter(adapter, input);
+
+	return err;
+}
+
+static void igc_clear_etype_filter_regs(struct igc_adapter *adapter,
+					u16 reg_index)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 etqf = rd32(IGC_ETQF(reg_index));
+
+	etqf &= ~IGC_ETQF_QUEUE_ENABLE;
+	etqf &= ~IGC_ETQF_QUEUE_MASK;
+	etqf &= ~IGC_ETQF_FILTER_ENABLE;
+
+	wr32(IGC_ETQF(reg_index), etqf);
+
+	adapter->etype_bitmap[reg_index] = false;
+}
+
+static void igc_clear_vlan_prio_filter(struct igc_adapter *adapter,
+				       u16 vlan_tci)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u32 vlapqf;
+
+	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+	vlapqf = rd32(IGC_VLAPQF);
+	vlapqf &= ~IGC_VLAPQF_P_VALID(vlan_priority);
+	vlapqf &= ~IGC_VLAPQF_QUEUE_SEL(vlan_priority,
+						IGC_VLAPQF_QUEUE_MASK);
+
+	wr32(IGC_VLAPQF, vlapqf);
+}
+
+int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
+{
+	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
+		igc_clear_etype_filter_regs(adapter,
+					    input->etype_reg_index);
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
+		igc_clear_vlan_prio_filter(adapter,
+					   ntohs(input->filter.vlan_tci));
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+		igc_del_mac_steering_filter(adapter, input->filter.src_addr,
+					    input->action,
+					    IGC_MAC_STATE_SRC_ADDR);
+
+	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+		igc_del_mac_steering_filter(adapter, input->filter.dst_addr,
+					    input->action, 0);
+
+	return 0;
+}
+
+static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter,
+					struct igc_nfc_filter *input,
+					u16 sw_idx)
+{
+	struct igc_nfc_filter *rule, *parent;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = rule;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && rule->sw_idx == sw_idx) {
+		if (!input)
+			err = igc_erase_filter(adapter, rule);
+
+		hlist_del(&rule->nfc_node);
+		kfree(rule);
+		adapter->nfc_filter_count--;
+	}
+
+	/* If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node */
+	INIT_HLIST_NODE(&input->nfc_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_behind(&input->nfc_node, &parent->nfc_node);
+	else
+		hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
+
+	/* update counts */
+	adapter->nfc_filter_count++;
+
+	return 0;
+}
+
+static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igc_nfc_filter *input, *rule;
+	int err = 0;
+
+	if (!(netdev->hw_features & NETIF_F_NTUPLE))
+		return -EOPNOTSUPP;
+
+	/* Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC ||
+	    fsp->ring_cookie >= adapter->num_rx_queues) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n");
+		return -EINVAL;
+	}
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= IGC_MAX_RXNFC_FILTERS) {
+		dev_err(&adapter->pdev->dev, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
+		return -EINVAL;
+
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
+		input->filter.etype = fsp->h_u.ether_spec.h_proto;
+		input->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
+	}
+
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+		input->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR;
+		ether_addr_copy(input->filter.src_addr,
+				fsp->h_u.ether_spec.h_source);
+	}
+
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+		input->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR;
+		ether_addr_copy(input->filter.dst_addr,
+				fsp->h_u.ether_spec.h_dest);
+	}
+
+	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
+			err = -EINVAL;
+			goto err_out;
+		}
+		input->filter.vlan_tci = fsp->h_ext.vlan_tci;
+		input->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
+	}
+
+	input->action = fsp->ring_cookie;
+	input->sw_idx = fsp->location;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (!memcmp(&input->filter, &rule->filter,
+			    sizeof(input->filter))) {
+			err = -EEXIST;
+			dev_err(&adapter->pdev->dev,
+				"ethtool: this filter is already set\n");
+			goto err_out_w_lock;
+		}
+	}
+
+	err = igc_add_filter(adapter, input);
+	if (err)
+		goto err_out_w_lock;
+
+	igc_update_ethtool_nfc_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->nfc_lock);
+	return 0;
+
+err_out_w_lock:
+	spin_unlock(&adapter->nfc_lock);
+err_out:
+	kfree(input);
+	return err;
+}
+
+static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->nfc_lock);
+	err = igc_update_ethtool_nfc_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->nfc_lock);
+
+	return err;
+}
+
+static int igc_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = igc_set_rss_hash_opt(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		ret = igc_add_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = igc_del_ethtool_nfc_entry(adapter, cmd);
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+void igc_write_rss_indir_tbl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 reg = IGC_RETA(0);
+	u32 shift = 0;
+	int i = 0;
+
+	while (i < IGC_RETA_SIZE) {
+		u32 val = 0;
+		int j;
+
+		for (j = 3; j >= 0; j--) {
+			val <<= 8;
+			val |= adapter->rss_indir_tbl[i + j];
+		}
+
+		wr32(reg, val << shift);
+		reg += 4;
+		i += 4;
+	}
+}
+
+static u32 igc_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return IGC_RETA_SIZE;
+}
+
+static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		indir[i] = adapter->rss_indir_tbl[i];
+
+	return 0;
+}
+
+static int igc_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 num_queues;
+	int i;
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	num_queues = adapter->rss_queues;
+
+	/* Verify user input. */
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		if (indir[i] >= num_queues)
+			return -EINVAL;
+
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		adapter->rss_indir_tbl[i] = indir[i];
+
+	igc_write_rss_indir_tbl(adapter);
+
+	return 0;
+}
+
+static unsigned int igc_max_channels(struct igc_adapter *adapter)
+{
+	return igc_get_max_rss_queues(adapter);
+}
+
+static void igc_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = igc_max_channels(adapter);
+
+	/* Report info for other vector */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		ch->max_other = NON_Q_VECTORS;
+		ch->other_count = NON_Q_VECTORS;
+	}
+
+	ch->combined_count = adapter->rss_queues;
+}
+
+static int igc_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int count = ch->combined_count;
+	unsigned int max_combined = 0;
+
+	/* Verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* Verify other_count is valid and has not been changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* Verify the number of channels doesn't exceed hw limits */
+	max_combined = igc_max_channels(adapter);
+	if (count > max_combined)
+		return -EINVAL;
+
+	if (count != adapter->rss_queues) {
+		adapter->rss_queues = count;
+		igc_set_flag_queue_pairs(adapter, max_combined);
+
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match the new configuration.
+		 */
+		return igc_reinit_queues(adapter);
+	}
+
+	return 0;
+}
+
+static u32 igc_get_priv_flags(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IGC_FLAG_RX_LEGACY)
+		priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int igc_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IGC_FLAG_RX_LEGACY;
+	if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX)
+		flags |= IGC_FLAG_RX_LEGACY;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			igc_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_begin(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_get_sync(&adapter->pdev->dev);
+	return 0;
+}
+
+static void igc_ethtool_complete(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_put(&adapter->pdev->dev);
+}
+
+static int igc_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 status;
+	u32 speed;
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+	/* supported link modes */
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 2500baseT_Full);
+
+	/* twisted pair */
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = hw->phy.addr;
+
+	/* advertising link modes */
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full);
+
+	/* set autoneg settings */
+	if (hw->mac.autoneg == 1) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Autoneg);
+	}
+
+	switch (hw->fc.requested_mode) {
+	case igc_fc_full:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		break;
+	case igc_fc_rx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	case igc_fc_tx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+	}
+
+	status = rd32(IGC_STATUS);
+
+	if (status & IGC_STATUS_LU) {
+		if (status & IGC_STATUS_SPEED_1000) {
+			/* For I225, STATUS will indicate 1G speed in both
+			 * 1 Gbps and 2.5 Gbps link modes.
+			 * An additional bit is used
+			 * to differentiate between 1 Gbps and 2.5 Gbps.
+			 */
+			if (hw->mac.type == igc_i225 &&
+			    (status & IGC_STATUS_SPEED_2500)) {
+				speed = SPEED_2500;
+			} else {
+				speed = SPEED_1000;
+			}
+		} else if (status & IGC_STATUS_SPEED_100) {
+			speed = SPEED_100;
+		} else {
+			speed = SPEED_10;
+		}
+		if ((status & IGC_STATUS_FD) ||
+		    hw->phy.media_type != igc_media_type_copper)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+	cmd->base.speed = speed;
+	if (hw->mac.autoneg)
+		cmd->base.autoneg = AUTONEG_ENABLE;
+	else
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == igc_media_type_copper)
+		cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+	return 0;
+}
+
+static int igc_set_link_ksettings(struct net_device *netdev,
+				  const struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	/* When adapter in resetting mode, autoneg/speed/duplex
+	 * cannot be changed
+	 */
+	if (igc_check_reset_block(hw)) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot change link characteristics when reset is active.\n");
+		return -EINVAL;
+	}
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO &&
+		    cmd->base.autoneg != AUTONEG_ENABLE) {
+			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		hw->phy.autoneg_advertised = advertising;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = igc_fc_default;
+	} else {
+		/* calling this overrides forced MDI setting */
+		dev_info(&adapter->pdev->dev,
+			 "Force mode currently not supported\n");
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		igc_down(adapter);
+		igc_up(adapter);
+	} else {
+		igc_reset(adapter);
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+static const struct ethtool_ops igc_ethtool_ops = {
+	.get_drvinfo		= igc_get_drvinfo,
+	.get_regs_len		= igc_get_regs_len,
+	.get_regs		= igc_get_regs,
+	.get_msglevel		= igc_get_msglevel,
+	.set_msglevel		= igc_set_msglevel,
+	.nway_reset		= igc_nway_reset,
+	.get_link		= igc_get_link,
+	.get_eeprom_len		= igc_get_eeprom_len,
+	.get_eeprom		= igc_get_eeprom,
+	.set_eeprom		= igc_set_eeprom,
+	.get_ringparam		= igc_get_ringparam,
+	.set_ringparam		= igc_set_ringparam,
+	.get_pauseparam		= igc_get_pauseparam,
+	.set_pauseparam		= igc_set_pauseparam,
+	.get_strings		= igc_get_strings,
+	.get_sset_count		= igc_get_sset_count,
+	.get_ethtool_stats	= igc_get_ethtool_stats,
+	.get_coalesce		= igc_get_coalesce,
+	.set_coalesce		= igc_set_coalesce,
+	.get_rxnfc		= igc_get_rxnfc,
+	.set_rxnfc		= igc_set_rxnfc,
+	.get_rxfh_indir_size	= igc_get_rxfh_indir_size,
+	.get_rxfh		= igc_get_rxfh,
+	.set_rxfh		= igc_set_rxfh,
+	.get_channels		= igc_get_channels,
+	.set_channels		= igc_set_channels,
+	.get_priv_flags		= igc_get_priv_flags,
+	.set_priv_flags		= igc_set_priv_flags,
+	.begin			= igc_ethtool_begin,
+	.complete		= igc_ethtool_complete,
+	.get_link_ksettings	= igc_get_link_ksettings,
+	.set_link_ksettings	= igc_set_link_ksettings,
+};
+
+void igc_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &igc_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
new file mode 100644
index 0000000..abb2d72
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h

@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_HW_H_
+#define _IGC_HW_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+
+#include "igc_regs.h"
+#include "igc_defines.h"
+#include "igc_mac.h"
+#include "igc_phy.h"
+#include "igc_nvm.h"
+#include "igc_i225.h"
+#include "igc_base.h"
+
+#define IGC_DEV_ID_I225_LM			0x15F2
+#define IGC_DEV_ID_I225_V			0x15F3
+#define IGC_DEV_ID_I225_I			0x15F8
+#define IGC_DEV_ID_I220_V			0x15F7
+#define IGC_DEV_ID_I225_K			0x3100
+
+#define IGC_FUNC_0				0
+
+/* Function pointers for the MAC. */
+struct igc_mac_operations {
+	s32 (*check_for_link)(struct igc_hw *hw);
+	s32 (*reset_hw)(struct igc_hw *hw);
+	s32 (*init_hw)(struct igc_hw *hw);
+	s32 (*setup_physical_interface)(struct igc_hw *hw);
+	void (*rar_set)(struct igc_hw *hw, u8 *address, u32 index);
+	s32 (*read_mac_addr)(struct igc_hw *hw);
+	s32 (*get_speed_and_duplex)(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+	s32 (*acquire_swfw_sync)(struct igc_hw *hw, u16 mask);
+	void (*release_swfw_sync)(struct igc_hw *hw, u16 mask);
+};
+
+enum igc_mac_type {
+	igc_undefined = 0,
+	igc_i225,
+	igc_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum igc_phy_type {
+	igc_phy_unknown = 0,
+	igc_phy_none,
+	igc_phy_i225,
+};
+
+enum igc_media_type {
+	igc_media_type_unknown = 0,
+	igc_media_type_copper = 1,
+	igc_num_media_types
+};
+
+enum igc_nvm_type {
+	igc_nvm_unknown = 0,
+	igc_nvm_eeprom_spi,
+	igc_nvm_flash_hw,
+	igc_nvm_invm,
+};
+
+struct igc_info {
+	s32 (*get_invariants)(struct igc_hw *hw);
+	struct igc_mac_operations *mac_ops;
+	const struct igc_phy_operations *phy_ops;
+	struct igc_nvm_operations *nvm_ops;
+};
+
+extern const struct igc_info igc_base_info;
+
+struct igc_mac_info {
+	struct igc_mac_operations ops;
+
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+
+	enum igc_mac_type type;
+
+	u32 collision_delta;
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	u32 mc_filter_type;
+	u32 tx_packet_delta;
+	u32 txcw;
+
+	u16 mta_reg_count;
+	u16 uta_reg_count;
+
+	u16 rar_entry_count;
+
+	u8 forced_speed_duplex;
+
+	bool adaptive_ifs;
+	bool has_fwsm;
+	bool asf_firmware_present;
+	bool arc_subsystem_valid;
+
+	bool autoneg;
+	bool autoneg_failed;
+	bool get_link_status;
+};
+
+struct igc_nvm_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*read)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	s32 (*update)(struct igc_hw *hw);
+	s32 (*validate)(struct igc_hw *hw);
+	s32 (*valid_led_default)(struct igc_hw *hw, u16 *data);
+};
+
+struct igc_phy_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*check_reset_block)(struct igc_hw *hw);
+	s32 (*force_speed_duplex)(struct igc_hw *hw);
+	s32 (*get_phy_info)(struct igc_hw *hw);
+	s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*reset)(struct igc_hw *hw);
+	s32 (*write_reg)(struct igc_hw *hw, u32 address, u16 data);
+};
+
+struct igc_nvm_info {
+	struct igc_nvm_operations ops;
+	enum igc_nvm_type type;
+
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct igc_phy_info {
+	struct igc_phy_operations ops;
+
+	enum igc_phy_type type;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us; /* in usec */
+	u32 revision;
+
+	enum igc_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+
+	u8 mdix;
+
+	bool is_mdix;
+	bool reset_disable;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct igc_bus_info {
+	u16 func;
+	u16 pci_cmd_word;
+};
+
+enum igc_fc_mode {
+	igc_fc_none = 0,
+	igc_fc_rx_pause,
+	igc_fc_tx_pause,
+	igc_fc_full,
+	igc_fc_default = 0xFF
+};
+
+struct igc_fc_info {
+	u32 high_water;     /* Flow control high-water mark */
+	u32 low_water;      /* Flow control low-water mark */
+	u16 pause_time;     /* Flow control pause timer */
+	bool send_xon;      /* Flow control send XON */
+	bool strict_ieee;   /* Strict IEEE mode */
+	enum igc_fc_mode current_mode; /* Type of flow control */
+	enum igc_fc_mode requested_mode;
+};
+
+struct igc_dev_spec_base {
+	bool clear_semaphore_once;
+};
+
+struct igc_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	unsigned long io_base;
+
+	struct igc_mac_info  mac;
+	struct igc_fc_info   fc;
+	struct igc_nvm_info  nvm;
+	struct igc_phy_info  phy;
+
+	struct igc_bus_info bus;
+
+	union {
+		struct igc_dev_spec_base	_base;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8 revision_id;
+};
+
+/* Statistics counters collected by the MAC */
+struct igc_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+	u64 cbtmpc;
+	u64 htdpmc;
+	u64 cbrdpc;
+	u64 cbrmpc;
+	u64 rpthc;
+	u64 hgptc;
+	u64 htcbdpc;
+	u64 hgorc;
+	u64 hgotc;
+	u64 lenerrs;
+	u64 scvpc;
+	u64 hrmpc;
+	u64 doosync;
+	u64 o2bgptc;
+	u64 o2bspc;
+	u64 b2ospc;
+	u64 b2ogprc;
+};
+
+struct net_device *igc_get_hw_dev(struct igc_hw *hw);
+#define hw_dbg(format, arg...) \
+	netdev_dbg(igc_get_hw_dev(hw), format, ##arg)
+
+s32  igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+s32  igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+
+#endif /* _IGC_HW_H_ */

diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
new file mode 100644
index 0000000..c25f555
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c

@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+static s32 igc_acquire_nvm_i225(struct igc_hw *hw)
+{
+	return igc_acquire_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_release_nvm_i225 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ */
+static void igc_release_nvm_i225(struct igc_hw *hw)
+{
+	igc_release_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ */
+static s32 igc_get_hw_semaphore_i225(struct igc_hw *hw)
+{
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+	u32 swsm;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = rd32(IGC_SWSM);
+		if (!(swsm & IGC_SWSM_SMBI))
+			break;
+
+		usleep_range(500, 600);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._base.clear_semaphore_once) {
+			hw->dev_spec._base.clear_semaphore_once = false;
+			igc_put_hw_semaphore(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = rd32(IGC_SWSM);
+				if (!(swsm & IGC_SWSM_SMBI))
+					break;
+
+				usleep_range(500, 600);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			hw_dbg("Driver can't access device - SMBI bit is set.\n");
+			return -IGC_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = rd32(IGC_SWSM);
+		wr32(IGC_SWSM, swsm | IGC_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (rd32(IGC_SWSM) & IGC_SWSM_SWESMBI)
+			break;
+
+		usleep_range(500, 600);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		igc_put_hw_semaphore(hw);
+		hw_dbg("Driver can't access the NVM\n");
+		return -IGC_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_acquire_swfw_sync_i225 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ * will also specify which port we're acquiring the lock for.
+ */
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	s32 i = 0, timeout = 200;
+	u32 fwmask = mask << 16;
+	u32 swmask = mask;
+	s32 ret_val = 0;
+	u32 swfw_sync;
+
+	while (i < timeout) {
+		if (igc_get_hw_semaphore_i225(hw)) {
+			ret_val = -IGC_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = rd32(IGC_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask) */
+		igc_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -IGC_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+out:
+	return ret_val;
+}
+
+/**
+ * igc_release_swfw_sync_i225 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ * will also specify which port we're releasing the lock for.
+ */
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	while (igc_get_hw_semaphore_i225(hw))
+		; /* Empty */
+
+	swfw_sync = rd32(IGC_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+}
+
+/**
+ * igc_read_nvm_srrd_i225 - Reads Shadow Ram using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the Shadow Ram to read
+ * @words: number of words to read
+ * @data: word read from the Shadow Ram
+ *
+ * Reads a 16 bit word from the Shadow Ram using the EERD register.
+ * Uses necessary synchronization semaphores.
+ */
+static s32 igc_read_nvm_srrd_i225(struct igc_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_read_nvm_eerd(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_write_nvm_srwr - Write to Shadow Ram using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow Ram to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ * Writes data to Shadow Ram at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * Shadow Ram will most likely contain an invalid checksum.
+ */
+static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 attempts = 100000;
+	u32 i, k, eewr = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) |
+			(data[i] << IGC_NVM_RW_REG_DATA) |
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_SRWR, eewr);
+
+		for (k = 0; k < attempts; k++) {
+			if (IGC_NVM_RW_REG_DONE &
+			    rd32(IGC_SRWR)) {
+				ret_val = 0;
+				break;
+			}
+			udelay(5);
+		}
+
+		if (ret_val) {
+			hw_dbg("Shadow RAM write EEWR timed out\n");
+			break;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_write_nvm_srwr_i225 - Write to Shadow RAM using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow RAM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ * Writes data to Shadow RAM at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * data will not be committed to FLASH and also Shadow RAM will most likely
+ * contain an invalid checksum.
+ *
+ * If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ * partially written.
+ */
+static s32 igc_write_nvm_srwr_i225(struct igc_hw *hw, u16 offset, u16 words,
+				   u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to write in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_write_nvm_srwr(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_validate_nvm_checksum_i225 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+static s32 igc_validate_nvm_checksum_i225(struct igc_hw *hw)
+{
+	s32 (*read_op_ptr)(struct igc_hw *hw, u16 offset, u16 count,
+			   u16 *data);
+	s32 status = 0;
+
+	status = hw->nvm.ops.acquire(hw);
+	if (status)
+		goto out;
+
+	/* Replace the read function with semaphore grabbing with
+	 * the one that skips this for a while.
+	 * We have semaphore taken already here.
+	 */
+	read_op_ptr = hw->nvm.ops.read;
+	hw->nvm.ops.read = igc_read_nvm_eerd;
+
+	status = igc_validate_nvm_checksum(hw);
+
+	/* Revert original read operation. */
+	hw->nvm.ops.read = read_op_ptr;
+
+	hw->nvm.ops.release(hw);
+
+out:
+	return status;
+}
+
+/**
+ * igc_pool_flash_update_done_i225 - Pool FLUDONE status
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_pool_flash_update_done_i225(struct igc_hw *hw)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 i, reg;
+
+	for (i = 0; i < IGC_FLUDONE_ATTEMPTS; i++) {
+		reg = rd32(IGC_EECD);
+		if (reg & IGC_EECD_FLUDONE_I225) {
+			ret_val = 0;
+			break;
+		}
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_update_flash_i225 - Commit EEPROM to the flash
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_update_flash_i225(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 flup;
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val == -IGC_ERR_NVM) {
+		hw_dbg("Flash update time out\n");
+		goto out;
+	}
+
+	flup = rd32(IGC_EECD) | IGC_EECD_FLUPD_I225;
+	wr32(IGC_EECD, flup);
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val)
+		hw_dbg("Flash update time out\n");
+	else
+		hw_dbg("Flash update complete\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum_i225 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM. Next commit EEPROM data onto the Flash.
+ */
+static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	s32 ret_val = 0;
+	u16 i, nvm_data;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	ret_val = igc_read_nvm_eerd(hw, 0, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("EEPROM read failed\n");
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
+	 * because we do not want to take the synchronization
+	 * semaphores twice here.
+	 */
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = igc_read_nvm_eerd(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw->nvm.ops.release(hw);
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+				     &checksum);
+	if (ret_val) {
+		hw->nvm.ops.release(hw);
+		hw_dbg("NVM Write Error while updating checksum.\n");
+		goto out;
+	}
+
+	hw->nvm.ops.release(hw);
+
+	ret_val = igc_update_flash_i225(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_flash_presence_i225 - Check if flash device is detected
+ * @hw: pointer to the HW structure
+ */
+bool igc_get_flash_presence_i225(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 eec = 0;
+
+	eec = rd32(IGC_EECD);
+	if (eec & IGC_EECD_FLASH_DETECTED_I225)
+		ret_val = true;
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_i225 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+s32 igc_init_nvm_params_i225(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+
+	nvm->ops.acquire = igc_acquire_nvm_i225;
+	nvm->ops.release = igc_release_nvm_i225;
+
+	/* NVM Function Pointers */
+	if (igc_get_flash_presence_i225(hw)) {
+		hw->nvm.type = igc_nvm_flash_hw;
+		nvm->ops.read = igc_read_nvm_srrd_i225;
+		nvm->ops.write = igc_write_nvm_srwr_i225;
+		nvm->ops.validate = igc_validate_nvm_checksum_i225;
+		nvm->ops.update = igc_update_nvm_checksum_i225;
+	} else {
+		hw->nvm.type = igc_nvm_invm;
+		nvm->ops.read = igc_read_nvm_eerd;
+		nvm->ops.write = NULL;
+		nvm->ops.validate = NULL;
+		nvm->ops.update = NULL;
+	}
+	return 0;
+}

diff --git a/drivers/net/ethernet/intel/igc/igc_i225.h b/drivers/net/ethernet/intel/igc/igc_i225.h
new file mode 100644
index 0000000..7b66e1f
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.h

@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_I225_H_
+#define _IGC_I225_H_
+
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+
+s32 igc_init_nvm_params_i225(struct igc_hw *hw);
+bool igc_get_flash_presence_i225(struct igc_hw *hw);
+
+#endif

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
new file mode 100644
index 0000000..5eeb4c8
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c

@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "igc_mac.h"
+#include "igc_hw.h"
+
+/**
+ * igc_disable_pcie_master - Disables PCI-express master access
+ * @hw: pointer to the HW structure
+ *
+ * Returns 0 (0) if successful, else returns -10
+ * (-IGC_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ * the master requests to be disabled.
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests.
+ */
+s32 igc_disable_pcie_master(struct igc_hw *hw)
+{
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_GIO_MASTER_DISABLE;
+	wr32(IGC_CTRL, ctrl);
+
+	while (timeout) {
+		if (!(rd32(IGC_STATUS) &
+		    IGC_STATUS_GIO_MASTER_ENABLE))
+			break;
+		usleep_range(2000, 3000);
+		timeout--;
+	}
+
+	if (!timeout) {
+		hw_dbg("Master requests are pending.\n");
+		ret_val = -IGC_ERR_MASTER_REQUESTS_PENDING;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_init_rx_addrs - Initialize receive addresses
+ * @hw: pointer to the HW structure
+ * @rar_count: receive address registers
+ *
+ * Setup the receive address registers by setting the base receive address
+ * register to the devices MAC address and clearing all the other receive
+ * address registers to 0.
+ */
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count)
+{
+	u8 mac_addr[ETH_ALEN] = {0};
+	u32 i;
+
+	/* Setup the receive address */
+	hw_dbg("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	hw_dbg("Clearing RAR[1-%u]\n", rar_count - 1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ * igc_set_fc_watermarks - Set flow control high/low watermarks
+ * @hw: pointer to the HW structure
+ *
+ * Sets the flow control high/low threshold (watermark) registers.  If
+ * flow control XON frame transmission is enabled, then set XON frame
+ * transmission as well.
+ */
+static s32 igc_set_fc_watermarks(struct igc_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & igc_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= IGC_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	wr32(IGC_FCRTL, fcrtl);
+	wr32(IGC_FCRTH, fcrth);
+
+	return 0;
+}
+
+/**
+ * igc_setup_link - Setup flow control and link settings
+ * @hw: pointer to the HW structure
+ *
+ * Determines which flow control settings to use, then configures flow
+ * control.  Calls the appropriate media-specific link configuration
+ * function.  Assuming the adapter has a valid link partner, a valid link
+ * should be established.  Assumes the hardware has previously been reset
+ * and the transmitter and receiver are not enabled.
+ */
+s32 igc_setup_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (igc_check_reset_block(hw))
+		goto out;
+
+	/* If requested flow control is set to default, set flow control
+	 * to the both 'rx' and 'tx' pause frames.
+	 */
+	if (hw->fc.requested_mode == igc_fc_default)
+		hw->fc.requested_mode = igc_fc_full;
+
+	/* We want to save off the original Flow Control configuration just
+	 * in case we get disconnected and then reconnected into a different
+	 * hub or switch with different Flow Control capabilities.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		goto out;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	hw_dbg("Initializing the Flow Control address, type and timer regs\n");
+	wr32(IGC_FCT, FLOW_CONTROL_TYPE);
+	wr32(IGC_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	wr32(IGC_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	wr32(IGC_FCTTV, hw->fc.pause_time);
+
+	ret_val = igc_set_fc_watermarks(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_force_mac_fc - Force the MAC's flow control settings
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ * device control register to reflect the adapter settings.  TFCE and RFCE
+ * need to be explicitly set by software when a copper PHY is used because
+ * autonegotiation is managed by the PHY rather than the MAC.  Software must
+ * also configure these bits when link is forced on a fiber connection.
+ */
+s32 igc_force_mac_fc(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          frames but we do not receive pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		ctrl &= (~(IGC_CTRL_TFCE | IGC_CTRL_RFCE));
+		break;
+	case igc_fc_rx_pause:
+		ctrl &= (~IGC_CTRL_TFCE);
+		ctrl |= IGC_CTRL_RFCE;
+		break;
+	case igc_fc_tx_pause:
+		ctrl &= (~IGC_CTRL_RFCE);
+		ctrl |= IGC_CTRL_TFCE;
+		break;
+	case igc_fc_full:
+		ctrl |= (IGC_CTRL_TFCE | IGC_CTRL_RFCE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	wr32(IGC_CTRL, ctrl);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_clear_hw_cntrs_base - Clear base hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the base hardware counters by reading the counter registers.
+ */
+void igc_clear_hw_cntrs_base(struct igc_hw *hw)
+{
+	rd32(IGC_CRCERRS);
+	rd32(IGC_SYMERRS);
+	rd32(IGC_MPC);
+	rd32(IGC_SCC);
+	rd32(IGC_ECOL);
+	rd32(IGC_MCC);
+	rd32(IGC_LATECOL);
+	rd32(IGC_COLC);
+	rd32(IGC_DC);
+	rd32(IGC_SEC);
+	rd32(IGC_RLEC);
+	rd32(IGC_XONRXC);
+	rd32(IGC_XONTXC);
+	rd32(IGC_XOFFRXC);
+	rd32(IGC_XOFFTXC);
+	rd32(IGC_FCRUC);
+	rd32(IGC_GPRC);
+	rd32(IGC_BPRC);
+	rd32(IGC_MPRC);
+	rd32(IGC_GPTC);
+	rd32(IGC_GORCL);
+	rd32(IGC_GORCH);
+	rd32(IGC_GOTCL);
+	rd32(IGC_GOTCH);
+	rd32(IGC_RNBC);
+	rd32(IGC_RUC);
+	rd32(IGC_RFC);
+	rd32(IGC_ROC);
+	rd32(IGC_RJC);
+	rd32(IGC_TORL);
+	rd32(IGC_TORH);
+	rd32(IGC_TOTL);
+	rd32(IGC_TOTH);
+	rd32(IGC_TPR);
+	rd32(IGC_TPT);
+	rd32(IGC_MPTC);
+	rd32(IGC_BPTC);
+
+	rd32(IGC_PRC64);
+	rd32(IGC_PRC127);
+	rd32(IGC_PRC255);
+	rd32(IGC_PRC511);
+	rd32(IGC_PRC1023);
+	rd32(IGC_PRC1522);
+	rd32(IGC_PTC64);
+	rd32(IGC_PTC127);
+	rd32(IGC_PTC255);
+	rd32(IGC_PTC511);
+	rd32(IGC_PTC1023);
+	rd32(IGC_PTC1522);
+
+	rd32(IGC_ALGNERRC);
+	rd32(IGC_RXERRC);
+	rd32(IGC_TNCRS);
+	rd32(IGC_CEXTERR);
+	rd32(IGC_TSCTC);
+	rd32(IGC_TSCTFC);
+
+	rd32(IGC_MGTPRC);
+	rd32(IGC_MGTPDC);
+	rd32(IGC_MGTPTC);
+
+	rd32(IGC_IAC);
+	rd32(IGC_ICRXOC);
+
+	rd32(IGC_ICRXPTC);
+	rd32(IGC_ICRXATC);
+	rd32(IGC_ICTXPTC);
+	rd32(IGC_ICTXATC);
+	rd32(IGC_ICTXQEC);
+	rd32(IGC_ICTXQMTC);
+	rd32(IGC_ICRXDMTC);
+
+	rd32(IGC_CBTMPC);
+	rd32(IGC_HTDPMC);
+	rd32(IGC_CBRMPC);
+	rd32(IGC_RPTHC);
+	rd32(IGC_HGPTC);
+	rd32(IGC_HTCBDPC);
+	rd32(IGC_HGORCL);
+	rd32(IGC_HGORCH);
+	rd32(IGC_HGOTCL);
+	rd32(IGC_HGOTCH);
+	rd32(IGC_LENERRS);
+}
+
+/**
+ * igc_rar_set - Set receive address register
+ * @hw: pointer to the HW structure
+ * @addr: pointer to the receive address
+ * @index: receive address array register
+ *
+ * Sets the receive address array register at index to the address passed
+ * in by addr.
+ */
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= IGC_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/**
+ * igc_check_for_copper_link - Check for link (Copper)
+ * @hw: pointer to the HW structure
+ *
+ * Checks to see of the link status of the hardware has changed.  If a
+ * change in link status has been detected, then we read the PHY registers
+ * to get the current speed/duplex if link exists.
+ */
+s32 igc_check_for_copper_link(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	bool link;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status) {
+		ret_val = 0;
+		goto out;
+	}
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = igc_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link)
+		goto out; /* No link detected */
+
+	mac->get_link_status = false;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	igc_check_downshift(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg) {
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	igc_config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = igc_config_fc_after_link_up(hw);
+	if (ret_val)
+		hw_dbg("Error configuring flow control\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_config_collision_dist - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ */
+void igc_config_collision_dist(struct igc_hw *hw)
+{
+	u32 tctl;
+
+	tctl = rd32(IGC_TCTL);
+
+	tctl &= ~IGC_TCTL_COLD;
+	tctl |= IGC_COLLISION_DISTANCE << IGC_COLD_SHIFT;
+
+	wr32(IGC_TCTL, tctl);
+	wrfl();
+}
+
+/**
+ * igc_config_fc_after_link_up - Configures flow control after link
+ * @hw: pointer to the HW structure
+ *
+ * Checks the status of auto-negotiation after link up to ensure that the
+ * speed and duplex were not forced.  If the link needed to be forced, then
+ * flow control needs to be forced also.  If auto-negotiation is enabled
+ * and did not fail, then we configure flow control based on our link
+ * partner.
+ */
+s32 igc_config_fc_after_link_up(struct igc_hw *hw)
+{
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	struct igc_mac_info *mac = &hw->mac;
+	u16 speed, duplex;
+	s32 ret_val = 0;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed) {
+		if (hw->phy.media_type == igc_media_type_copper)
+			ret_val = igc_force_mac_fc(hw);
+	}
+
+	if (ret_val) {
+		hw_dbg("Error forcing flow control settings\n");
+		goto out;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if (hw->phy.media_type == igc_media_type_copper && mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+
+		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
+			goto out;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+					       &mii_nway_adv_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+					       &mii_nway_lp_ability_reg);
+		if (ret_val)
+			goto out;
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | igc_fc_none
+		 *   0   |    1    |   0   |   DC    | igc_fc_none
+		 *   0   |    1    |   1   |    0    | igc_fc_none
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | igc_fc_none
+		 *   1   |   DC    |   1   |   DC    | igc_fc_full
+		 *   1   |    1    |   0   |    0    | igc_fc_none
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | IGC_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+			/* Now we need to check if the user selected RX ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise RX
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF  the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == igc_fc_full) {
+				hw->fc.current_mode = igc_fc_full;
+				hw_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = igc_fc_rx_pause;
+				hw_dbg("Flow Control = RX PAUSE frames only.\n");
+			}
+		}
+
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_tx_pause;
+			hw_dbg("Flow Control = TX PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+		/* Per the IEEE spec, at this point flow control should be
+		 * disabled.  However, we want to consider that we could
+		 * be connected to a legacy switch that doesn't advertise
+		 * desired flow control, but can be forced on the link
+		 * partner.  So if we advertised no flow control, that is
+		 * what we will resolve to.  If we advertised some kind of
+		 * receive capability (Rx Pause Only or Full Flow Control)
+		 * and the link partner advertised none, we will configure
+		 * ourselves to enable Rx Flow Control only.  We can do
+		 * this safely for two reasons:  If the link partner really
+		 * didn't want flow control enabled, and we enable Rx, no
+		 * harm done since we won't be receiving any PAUSE frames
+		 * anyway.  If the intent on the link partner was to have
+		 * flow control enabled, then by us enabling RX only, we
+		 * can at least receive pause frames and process them.
+		 * This is a good idea because in most cases, since we are
+		 * predominantly a server NIC, more times than not we will
+		 * be asked to delay transmission of packets than asking
+		 * our link partner to pause transmission of frames.
+		 */
+		else if ((hw->fc.requested_mode == igc_fc_none) ||
+			 (hw->fc.requested_mode == igc_fc_tx_pause) ||
+			 (hw->fc.strict_ieee)) {
+			hw->fc.current_mode = igc_fc_none;
+			hw_dbg("Flow Control = NONE.\n");
+		} else {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+		if (ret_val) {
+			hw_dbg("Error getting link speed and duplex\n");
+			goto out;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = igc_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = igc_force_mac_fc(hw);
+		if (ret_val) {
+			hw_dbg("Error forcing flow control settings\n");
+			goto out;
+		}
+	}
+
+out:
+	return 0;
+}
+
+/**
+ * igc_get_auto_rd_done - Check for auto read completion
+ * @hw: pointer to the HW structure
+ *
+ * Check EEPROM for Auto Read done bit.
+ */
+s32 igc_get_auto_rd_done(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	s32 i = 0;
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (rd32(IGC_EECD) & IGC_EECD_AUTO_RD)
+			break;
+		usleep_range(1000, 2000);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		hw_dbg("Auto read by HW from NVM has not completed.\n");
+		ret_val = -IGC_ERR_RESET;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Read the status register for the current speed/duplex and store the current
+ * speed and duplex for copper connections.
+ */
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex)
+{
+	u32 status;
+
+	status = rd32(IGC_STATUS);
+	if (status & IGC_STATUS_SPEED_1000) {
+		/* For I225, STATUS will indicate 1G speed in both 1 Gbps
+		 * and 2.5 Gbps link modes. An additional bit is used
+		 * to differentiate between 1 Gbps and 2.5 Gbps.
+		 */
+		if (hw->mac.type == igc_i225 &&
+		    (status & IGC_STATUS_SPEED_2500)) {
+			*speed = SPEED_2500;
+			hw_dbg("2500 Mbs, ");
+		} else {
+			*speed = SPEED_1000;
+			hw_dbg("1000 Mbs, ");
+		}
+	} else if (status & IGC_STATUS_SPEED_100) {
+		*speed = SPEED_100;
+		hw_dbg("100 Mbs, ");
+	} else {
+		*speed = SPEED_10;
+		hw_dbg("10 Mbs, ");
+	}
+
+	if (status & IGC_STATUS_FD) {
+		*duplex = FULL_DUPLEX;
+		hw_dbg("Full Duplex\n");
+	} else {
+		*duplex = HALF_DUPLEX;
+		hw_dbg("Half Duplex\n");
+	}
+
+	return 0;
+}
+
+/**
+ * igc_put_hw_semaphore - Release hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Release hardware semaphore used to access the PHY or NVM
+ */
+void igc_put_hw_semaphore(struct igc_hw *hw)
+{
+	u32 swsm;
+
+	swsm = rd32(IGC_SWSM);
+
+	swsm &= ~(IGC_SWSM_SMBI | IGC_SWSM_SWESMBI);
+
+	wr32(IGC_SWSM, swsm);
+}
+
+/**
+ * igc_enable_mng_pass_thru - Enable processing of ARP's
+ * @hw: pointer to the HW structure
+ *
+ * Verifies the hardware needs to leave interface enabled so that frames can
+ * be directed to and from the management interface.
+ */
+bool igc_enable_mng_pass_thru(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 fwsm, factps;
+	u32 manc;
+
+	if (!hw->mac.asf_firmware_present)
+		goto out;
+
+	manc = rd32(IGC_MANC);
+
+	if (!(manc & IGC_MANC_RCV_TCO_EN))
+		goto out;
+
+	if (hw->mac.arc_subsystem_valid) {
+		fwsm = rd32(IGC_FWSM);
+		factps = rd32(IGC_FACTPS);
+
+		if (!(factps & IGC_FACTPS_MNGCG) &&
+		    ((fwsm & IGC_FWSM_MODE_MASK) ==
+		    (igc_mng_mode_pt << IGC_FWSM_MODE_SHIFT))) {
+			ret_val = true;
+			goto out;
+		}
+	} else {
+		if ((manc & IGC_MANC_SMBUS_EN) &&
+		    !(manc & IGC_MANC_ASF_EN)) {
+			ret_val = true;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
new file mode 100644
index 0000000..782bc99
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h

@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_MAC_H_
+#define _IGC_MAC_H_
+
+#include "igc_hw.h"
+#include "igc_phy.h"
+#include "igc_defines.h"
+
+#ifndef IGC_REMOVED
+#define IGC_REMOVED(a) (0)
+#endif /* IGC_REMOVED */
+
+/* forward declaration */
+s32 igc_disable_pcie_master(struct igc_hw *hw);
+s32 igc_check_for_copper_link(struct igc_hw *hw);
+s32 igc_config_fc_after_link_up(struct igc_hw *hw);
+s32 igc_force_mac_fc(struct igc_hw *hw);
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count);
+s32 igc_setup_link(struct igc_hw *hw);
+void igc_clear_hw_cntrs_base(struct igc_hw *hw);
+s32 igc_get_auto_rd_done(struct igc_hw *hw);
+void igc_put_hw_semaphore(struct igc_hw *hw);
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index);
+void igc_config_collision_dist(struct igc_hw *hw);
+
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+
+bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+
+enum igc_mng_mode {
+	igc_mng_mode_none = 0,
+	igc_mng_mode_asf,
+	igc_mng_mode_pt,
+	igc_mng_mode_ipmi,
+	igc_mng_mode_host_if_only
+};
+
+#endif

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
new file mode 100644
index 0000000..2488867
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_main.c

@@ -0,0 +1,4531 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <linux/aer.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+
+#include <net/ipv6.h>
+
+#include "igc.h"
+#include "igc_hw.h"
+
+#define DRV_VERSION	"0.0.1-k"
+#define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+static int debug = -1;
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+char igc_driver_name[] = "igc";
+char igc_driver_version[] = DRV_VERSION;
+static const char igc_driver_string[] = DRV_SUMMARY;
+static const char igc_copyright[] =
+	"Copyright(c) 2018 Intel Corporation.";
+
+static const struct igc_info *igc_info_tbl[] = {
+	[board_base] = &igc_base_info,
+};
+
+static const struct pci_device_id igc_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
+
+/* forward declaration */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring);
+static int igc_sw_init(struct igc_adapter *);
+static void igc_configure(struct igc_adapter *adapter);
+static void igc_power_down_link(struct igc_adapter *adapter);
+static void igc_set_default_mac_filter(struct igc_adapter *adapter);
+static void igc_set_rx_mode(struct net_device *netdev);
+static void igc_write_itr(struct igc_q_vector *q_vector);
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix);
+static void igc_free_q_vectors(struct igc_adapter *adapter);
+static void igc_irq_disable(struct igc_adapter *adapter);
+static void igc_irq_enable(struct igc_adapter *adapter);
+static void igc_configure_msix(struct igc_adapter *adapter);
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi);
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+void igc_reset(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_fc_info *fc = &hw->fc;
+	u32 pba, hwm;
+
+	/* Repartition PBA for greater than 9k MTU if required */
+	pba = IGC_PBA_34K;
+
+	/* flow control settings
+	 * The high water mark must be low enough to fit one full frame
+	 * after transmitting the pause frame.  As such we must have enough
+	 * space to allow for us to complete our current transmit and then
+	 * receive the frame that is in progress from the link partner.
+	 * Set it to:
+	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+	 */
+	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
+
+	hw->mac.ops.reset_hw(hw);
+
+	if (hw->mac.ops.init_hw(hw))
+		dev_err(&pdev->dev, "Hardware Error\n");
+
+	if (!netif_running(adapter->netdev))
+		igc_power_down_link(adapter);
+
+	igc_get_phy_info(hw);
+}
+
+/**
+ * igc_power_up_link - Power up the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_up_link(struct igc_adapter *adapter)
+{
+	igc_reset_phy(&adapter->hw);
+
+	if (adapter->hw.phy.media_type == igc_media_type_copper)
+		igc_power_up_phy_copper(&adapter->hw);
+
+	igc_setup_link(&adapter->hw);
+}
+
+/**
+ * igc_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_down_link(struct igc_adapter *adapter)
+{
+	if (adapter->hw.phy.media_type == igc_media_type_copper)
+		igc_power_down_phy_copper_base(&adapter->hw);
+}
+
+/**
+ * igc_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ */
+static void igc_release_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ */
+static void igc_get_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+	igc_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	while (i != tx_ring->next_to_use) {
+		union igc_adv_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IGC_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	/* reset BQL for queue */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igc_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i])
+			igc_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ */
+int igc_setup_tx_resources(struct igc_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int size = 0;
+
+	size = sizeof(struct igc_tx_buffer) * tx_ring->count;
+	tx_ring->tx_buffer_info = vzalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	dev_err(dev,
+		"Unable to allocate memory for the transmit descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = igc_setup_tx_resources(adapter->tx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Tx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_tx_resources(adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igc_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ */
+static void igc_clean_rx_ring(struct igc_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+
+	dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      buffer_info->dma,
+					      buffer_info->page_offset,
+					      igc_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev,
+				     buffer_info->dma,
+				     igc_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(buffer_info->page,
+					buffer_info->pagecnt_bias);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * igc_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i])
+			igc_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void igc_free_rx_resources(struct igc_ring *rx_ring)
+{
+	igc_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void igc_free_all_rx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ */
+int igc_setup_rx_resources(struct igc_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int size, desc_len;
+
+	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
+	rx_ring->rx_buffer_info = vzalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	desc_len = sizeof(union igc_adv_rx_desc);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	return 0;
+
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev,
+		"Unable to allocate memory for the receive descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_rx_resources - wrapper to allocate Rx resources
+ *                                (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = igc_setup_rx_resources(adapter->rx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Rx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_rx_resources(adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igc_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	union igc_adv_rx_desc *rx_desc;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0, rxdctl = 0;
+	u64 rdba = ring->dma;
+
+	/* disable the queue */
+	wr32(IGC_RXDCTL(reg_idx), 0);
+
+	/* Set DMA base address registers */
+	wr32(IGC_RDBAL(reg_idx),
+	     rdba & 0x00000000ffffffffULL);
+	wr32(IGC_RDBAH(reg_idx), rdba >> 32);
+	wr32(IGC_RDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_rx_desc));
+
+	/* initialize head and tail */
+	ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
+	wr32(IGC_RDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	/* reset next-to- use/clean to place SW in sync with hardware */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	/* set descriptor configuration */
+	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+	wr32(IGC_SRRCTL(reg_idx), srrctl);
+
+	rxdctl |= IGC_RX_PTHRESH;
+	rxdctl |= IGC_RX_HTHRESH << 8;
+	rxdctl |= IGC_RX_WTHRESH << 16;
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct igc_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IGC_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* enable receive descriptor fetching */
+	rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
+
+	wr32(IGC_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igc_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx(struct igc_adapter *adapter)
+{
+	int i;
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igc_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ */
+static void igc_configure_tx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int reg_idx = ring->reg_idx;
+	u64 tdba = ring->dma;
+	u32 txdctl = 0;
+
+	/* disable the queue */
+	wr32(IGC_TXDCTL(reg_idx), 0);
+	wrfl();
+	mdelay(10);
+
+	wr32(IGC_TDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_tx_desc));
+	wr32(IGC_TDBAL(reg_idx),
+	     tdba & 0x00000000ffffffffULL);
+	wr32(IGC_TDBAH(reg_idx), tdba >> 32);
+
+	ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
+	wr32(IGC_TDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	txdctl |= IGC_TX_PTHRESH;
+	txdctl |= IGC_TX_HTHRESH << 8;
+	txdctl |= IGC_TX_WTHRESH << 16;
+
+	txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
+	wr32(IGC_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igc_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ */
+static void igc_configure_tx(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_mrqc(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 j, num_rx_queues;
+	u32 mrqc, rxcsum;
+	u32 rss_key[10];
+
+	netdev_rss_key_fill(rss_key, sizeof(rss_key));
+	for (j = 0; j < 10; j++)
+		wr32(IGC_RSSRK(j), rss_key[j]);
+
+	num_rx_queues = adapter->rss_queues;
+
+	if (adapter->rss_indir_tbl_init != num_rx_queues) {
+		for (j = 0; j < IGC_RETA_SIZE; j++)
+			adapter->rss_indir_tbl[j] =
+			(j * num_rx_queues) / IGC_RETA_SIZE;
+		adapter->rss_indir_tbl_init = num_rx_queues;
+	}
+	igc_write_rss_indir_tbl(adapter);
+
+	/* Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
+	 * offloads as they are enabled by default
+	 */
+	rxcsum = rd32(IGC_RXCSUM);
+	rxcsum |= IGC_RXCSUM_PCSD;
+
+	/* Enable Receive Checksum Offload for SCTP */
+	rxcsum |= IGC_RXCSUM_CRCOFL;
+
+	/* Don't need to set TUOFL or IPOFL, they default to 1 */
+	wr32(IGC_RXCSUM, rxcsum);
+
+	/* Generate RSS hash based on packet types, TCP/UDP
+	 * port numbers and/or IPv4/v6 src and dst addresses
+	 */
+	mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
+	       IGC_MRQC_RSS_FIELD_IPV4_TCP |
+	       IGC_MRQC_RSS_FIELD_IPV6 |
+	       IGC_MRQC_RSS_FIELD_IPV6_TCP |
+	       IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
+
+	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
+
+	mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
+
+	wr32(IGC_MRQC, mrqc);
+}
+
+/**
+ * igc_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_rctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = rd32(IGC_RCTL);
+
+	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
+	rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
+
+	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
+		(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
+
+	/* enable stripping of CRC. Newer features require
+	 * that the HW strips the CRC.
+	 */
+	rctl |= IGC_RCTL_SECRC;
+
+	/* disable store bad packets and clear size bits. */
+	rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
+
+	/* enable LPE to allow for reception of jumbo frames */
+	rctl |= IGC_RCTL_LPE;
+
+	/* disable queue 0 to prevent tail write w/o re-config */
+	wr32(IGC_RXDCTL(0), 0);
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in set_rx_mode
+		 */
+		rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
+			 IGC_RCTL_BAM | /* RX All Bcast Pkts */
+			 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
+			  IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
+	}
+
+	wr32(IGC_RCTL, rctl);
+}
+
+/**
+ * igc_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_tctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl;
+
+	/* disable queue 0 which icould be enabled by default */
+	wr32(IGC_TXDCTL(0), 0);
+
+	/* Program the Transmit Control Register */
+	tctl = rd32(IGC_TCTL);
+	tctl &= ~IGC_TCTL_CT;
+	tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
+		(IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
+
+	/* Enable transmits */
+	tctl |= IGC_TCTL_EN;
+
+	wr32(IGC_TCTL, tctl);
+}
+
+/**
+ * igc_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_set_mac(struct net_device *netdev, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	igc_set_default_mac_filter(adapter);
+
+	return 0;
+}
+
+static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
+			    struct igc_tx_buffer *first,
+			    u32 vlan_macip_lens, u32 type_tucmd,
+			    u32 mss_l4len_idx)
+{
+	struct igc_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+	struct timespec64 ts;
+
+	context_desc = IGC_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
+
+	/* For 82575, context index must be unique per ring. */
+	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+
+	/* We assume there is always a valid Tx time available. Invalid times
+	 * should have been handled by the upper layers.
+	 */
+	if (tx_ring->launchtime_enable) {
+		ts = ktime_to_timespec64(first->skb->tstamp);
+		first->skb->tstamp = ktime_set(0, 0);
+		context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32);
+	} else {
+		context_desc->launch_time = 0;
+	}
+}
+
+static inline bool igc_ipv6_csum_is_sctp(struct sk_buff *skb)
+{
+	unsigned int offset = 0;
+
+	ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
+
+	return offset == skb_checksum_start_offset(skb);
+}
+
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+		if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
+		    !tx_ring->launchtime_enable)
+			return;
+		goto no_csum;
+	}
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
+		/* fall through */
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if ((first->protocol == htons(ETH_P_IP) &&
+		     (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
+		    (first->protocol == htons(ETH_P_IPV6) &&
+		     igc_ipv6_csum_is_sctp(skb))) {
+			type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		/* fall through */
+	default:
+		skb_checksum_help(skb);
+		goto csum_failed;
+	}
+
+	/* update TX checksum flag */
+	first->tx_flags |= IGC_TX_FLAGS_CSUM;
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
+}
+
+static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	struct net_device *netdev = tx_ring->netdev;
+
+	netif_stop_subqueue(netdev, tx_ring->queue_index);
+
+	/* memory barriier comment */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (igc_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_wake_subqueue(netdev, tx_ring->queue_index);
+
+	u64_stats_update_begin(&tx_ring->tx_syncp2);
+	tx_ring->tx_stats.restart_queue2++;
+	u64_stats_update_end(&tx_ring->tx_syncp2);
+
+	return 0;
+}
+
+static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	if (igc_desc_unused(tx_ring) >= size)
+		return 0;
+	return __igc_maybe_stop_tx(tx_ring, size);
+}
+
+static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
+		       IGC_ADVTXD_DCMD_DEXT |
+		       IGC_ADVTXD_DCMD_IFCS;
+
+	return cmd_type;
+}
+
+static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
+				 union igc_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	/* insert L4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
+			  ((IGC_TXD_POPTS_TXSM << 8) /
+			  IGC_TX_FLAGS_CSUM);
+
+	/* insert IPv4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
+			  (((IGC_TXD_POPTS_IXSM << 8)) /
+			  IGC_TX_FLAGS_IPV4);
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int igc_tx_map(struct igc_ring *tx_ring,
+		      struct igc_tx_buffer *first,
+		      const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+	u32 tx_flags = first->tx_flags;
+	skb_frag_t *frag;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	dma_addr_t dma;
+	u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
+
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+
+	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IGC_MAX_DATA_PER_TXD;
+			size -= IGC_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+				       size, DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IGC_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return 0;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	while (tx_buffer != first) {
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		if (i-- == 0)
+			i += tx_ring->count;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	if (dma_unmap_len(tx_buffer, len))
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buffer, len, 0);
+
+	dev_kfree_skb_any(tx_buffer->skb);
+	tx_buffer->skb = NULL;
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+				       struct igc_ring *tx_ring)
+{
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	__be16 protocol = vlan_get_protocol(skb);
+	struct igc_tx_buffer *first;
+	u32 tx_flags = 0;
+	unsigned short f;
+	u8 hdr_len = 0;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
+	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
+	 *	+ 2 desc gap to keep tail from touching head,
+	 *	+ 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
+
+	if (igc_maybe_stop_tx(tx_ring, count + 3)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+	igc_tx_csum(tx_ring, first);
+
+	igc_tx_map(tx_ring, first, hdr_len);
+
+	return NETDEV_TX_OK;
+}
+
+static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
+						    struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
+}
+
+static inline void igc_rx_hash(struct igc_ring *ring,
+			       union igc_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (ring->netdev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb,
+			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+			     PKT_HASH_TYPE_L3);
+}
+
+/**
+ * igc_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ */
+static void igc_process_skb_fields(struct igc_ring *rx_ring,
+				   union igc_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	igc_rx_hash(rx_ring, rx_desc, skb);
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
+					       const unsigned int size)
+{
+	struct igc_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+/**
+ * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ */
+static void igc_add_rx_frag(struct igc_ring *rx_ring,
+			    struct igc_rx_buffer *rx_buffer,
+			    struct sk_buff *skb,
+			    unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+	rx_buffer->page_offset ^= truesize;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
+				     struct igc_rx_buffer *rx_buffer,
+				     union igc_adv_rx_desc *rx_desc,
+				     unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(IGC_SKB_PAD + size);
+#endif
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* build an skb around the page buffer */
+	skb = build_skb(va - IGC_SKB_PAD, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, IGC_SKB_PAD);
+	__skb_put(skb, size);
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
+					 struct igc_rx_buffer *rx_buffer,
+					 union igc_adv_rx_desc *rx_desc,
+					 unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IGC_RX_HDR_LEN)
+		headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(va + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+/**
+ * igc_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void igc_reuse_rx_page(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *old_buff)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct igc_rx_buffer *new_buff;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls.
+	 */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static inline bool igc_page_is_reserved(struct page *page)
+{
+	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote pages */
+	if (unlikely(igc_page_is_reserved(page)))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+		return false;
+#else
+#define IGC_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IGC_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(!pagecnt_bias)) {
+		page_ref_add(page, USHRT_MAX);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * igc_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool igc_is_non_eop(struct igc_ring *rx_ring,
+			   union igc_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGC_RX_DESC(rx_ring, ntc));
+
+	if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+/**
+ * igc_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool igc_cleanup_headers(struct igc_ring *rx_ring,
+				union igc_adv_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+	if (unlikely((igc_test_staterr(rx_desc,
+				       IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+		struct net_device *netdev = rx_ring->netdev;
+
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+static void igc_put_rx_buffer(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *rx_buffer)
+{
+	if (igc_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		igc_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
+/**
+ * igc_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @adapter: address of board private structure
+ */
+static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
+{
+	union igc_adv_rx_desc *rx_desc;
+	u16 i = rx_ring->next_to_use;
+	struct igc_rx_buffer *bi;
+	u16 bufsz;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IGC_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	bufsz = igc_rx_bufsz(rx_ring);
+
+	do {
+		if (!igc_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset, bufsz,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IGC_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+{
+	unsigned int total_bytes = 0, total_packets = 0;
+	struct igc_ring *rx_ring = q_vector->rx.ring;
+	struct sk_buff *skb = rx_ring->skb;
+	u16 cleaned_count = igc_desc_unused(rx_ring);
+
+	while (likely(total_packets < budget)) {
+		union igc_adv_rx_desc *rx_desc;
+		struct igc_rx_buffer *rx_buffer;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
+			igc_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = igc_get_rx_buffer(rx_ring, size);
+
+		/* retrieve a buffer from the ring */
+		if (skb)
+			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
+		else
+			skb = igc_construct_skb(rx_ring, rx_buffer,
+						rx_desc, size);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		igc_put_rx_buffer(rx_ring, rx_buffer);
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (igc_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igc_process_skb_fields(rx_ring, rx_desc, skb);
+
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	u64_stats_update_begin(&rx_ring->rx_syncp);
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	u64_stats_update_end(&rx_ring->rx_syncp);
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	if (cleaned_count)
+		igc_alloc_rx_buffers(rx_ring, cleaned_count);
+
+	return total_packets;
+}
+
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 igc_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IGC_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = igc_rx_offset(rx_ring);
+	bi->pagecnt_bias = 1;
+
+	return true;
+}
+
+/**
+ * igc_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * returns true if ring is completely cleaned
+ */
+static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	struct igc_ring *tx_ring = q_vector->tx.ring;
+	unsigned int i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+
+	if (test_bit(__IGC_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* clear last DMA location and unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->tx_syncp);
+	tx_ring->tx_stats.bytes += total_bytes;
+	tx_ring->tx_stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->tx_syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+		struct igc_hw *hw = &adapter->hw;
+
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+		if (tx_buffer->next_to_watch &&
+		    time_after(jiffies, tx_buffer->time_stamp +
+		    (adapter->tx_timeout_factor * HZ)) &&
+		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
+			/* detected Tx unit hang */
+			dev_err(tx_ring->dev,
+				"Detected Tx Unit Hang\n"
+				"  Tx Queue             <%d>\n"
+				"  TDH                  <%x>\n"
+				"  TDT                  <%x>\n"
+				"  next_to_use          <%x>\n"
+				"  next_to_clean        <%x>\n"
+				"buffer_info[next_to_clean]\n"
+				"  time_stamp           <%lx>\n"
+				"  next_to_watch        <%p>\n"
+				"  jiffies              <%lx>\n"
+				"  desc.status          <%x>\n",
+				tx_ring->queue_index,
+				rd32(IGC_TDH(tx_ring->reg_idx)),
+				readl(tx_ring->tail),
+				tx_ring->next_to_use,
+				tx_ring->next_to_clean,
+				tx_buffer->time_stamp,
+				tx_buffer->next_to_watch,
+				jiffies,
+				tx_buffer->next_to_watch->wb.status);
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
+		}
+	}
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets &&
+		     netif_carrier_ok(tx_ring->netdev) &&
+		     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !(test_bit(__IGC_DOWN, &adapter->state))) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			u64_stats_update_begin(&tx_ring->tx_syncp);
+			tx_ring->tx_stats.restart_queue++;
+			u64_stats_update_end(&tx_ring->tx_syncp);
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * igc_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ */
+void igc_up(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i = 0;
+
+	/* hardware has been reset, we need to reload some things */
+	igc_configure(adapter);
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	if (adapter->msix_entries)
+		igc_configure_msix(adapter);
+	else
+		igc_assign_vector(adapter->q_vector[0], 0);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	netif_tx_start_all_queues(adapter->netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+}
+
+/**
+ * igc_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ */
+void igc_update_stats(struct igc_adapter *adapter)
+{
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+	u64 _bytes, _packets;
+	u64 bytes, packets;
+	unsigned int start;
+	u32 mpc;
+	int i;
+
+	/* Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+	if (pci_channel_offline(pdev))
+		return;
+
+	packets = 0;
+	bytes = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+		u32 rqdpc = rd32(IGC_RQDPC(i));
+
+		if (hw->mac.type >= igc_i225)
+			wr32(IGC_RQDPC(i), 0);
+
+		if (rqdpc) {
+			ring->rx_stats.drops += rqdpc;
+			net_stats->rx_fifo_errors += rqdpc;
+		}
+
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
+			_bytes = ring->rx_stats.bytes;
+			_packets = ring->rx_stats.packets;
+		} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = packets;
+
+	packets = 0;
+	bytes = 0;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
+			_bytes = ring->tx_stats.bytes;
+			_packets = ring->tx_stats.packets;
+		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = packets;
+	rcu_read_unlock();
+
+	/* read stats registers */
+	adapter->stats.crcerrs += rd32(IGC_CRCERRS);
+	adapter->stats.gprc += rd32(IGC_GPRC);
+	adapter->stats.gorc += rd32(IGC_GORCL);
+	rd32(IGC_GORCH); /* clear GORCL */
+	adapter->stats.bprc += rd32(IGC_BPRC);
+	adapter->stats.mprc += rd32(IGC_MPRC);
+	adapter->stats.roc += rd32(IGC_ROC);
+
+	adapter->stats.prc64 += rd32(IGC_PRC64);
+	adapter->stats.prc127 += rd32(IGC_PRC127);
+	adapter->stats.prc255 += rd32(IGC_PRC255);
+	adapter->stats.prc511 += rd32(IGC_PRC511);
+	adapter->stats.prc1023 += rd32(IGC_PRC1023);
+	adapter->stats.prc1522 += rd32(IGC_PRC1522);
+	adapter->stats.symerrs += rd32(IGC_SYMERRS);
+	adapter->stats.sec += rd32(IGC_SEC);
+
+	mpc = rd32(IGC_MPC);
+	adapter->stats.mpc += mpc;
+	net_stats->rx_fifo_errors += mpc;
+	adapter->stats.scc += rd32(IGC_SCC);
+	adapter->stats.ecol += rd32(IGC_ECOL);
+	adapter->stats.mcc += rd32(IGC_MCC);
+	adapter->stats.latecol += rd32(IGC_LATECOL);
+	adapter->stats.dc += rd32(IGC_DC);
+	adapter->stats.rlec += rd32(IGC_RLEC);
+	adapter->stats.xonrxc += rd32(IGC_XONRXC);
+	adapter->stats.xontxc += rd32(IGC_XONTXC);
+	adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
+	adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
+	adapter->stats.fcruc += rd32(IGC_FCRUC);
+	adapter->stats.gptc += rd32(IGC_GPTC);
+	adapter->stats.gotc += rd32(IGC_GOTCL);
+	rd32(IGC_GOTCH); /* clear GOTCL */
+	adapter->stats.rnbc += rd32(IGC_RNBC);
+	adapter->stats.ruc += rd32(IGC_RUC);
+	adapter->stats.rfc += rd32(IGC_RFC);
+	adapter->stats.rjc += rd32(IGC_RJC);
+	adapter->stats.tor += rd32(IGC_TORH);
+	adapter->stats.tot += rd32(IGC_TOTH);
+	adapter->stats.tpr += rd32(IGC_TPR);
+
+	adapter->stats.ptc64 += rd32(IGC_PTC64);
+	adapter->stats.ptc127 += rd32(IGC_PTC127);
+	adapter->stats.ptc255 += rd32(IGC_PTC255);
+	adapter->stats.ptc511 += rd32(IGC_PTC511);
+	adapter->stats.ptc1023 += rd32(IGC_PTC1023);
+	adapter->stats.ptc1522 += rd32(IGC_PTC1522);
+
+	adapter->stats.mptc += rd32(IGC_MPTC);
+	adapter->stats.bptc += rd32(IGC_BPTC);
+
+	adapter->stats.tpt += rd32(IGC_TPT);
+	adapter->stats.colc += rd32(IGC_COLC);
+
+	adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
+
+	adapter->stats.tsctc += rd32(IGC_TSCTC);
+	adapter->stats.tsctfc += rd32(IGC_TSCTFC);
+
+	adapter->stats.iac += rd32(IGC_IAC);
+	adapter->stats.icrxoc += rd32(IGC_ICRXOC);
+	adapter->stats.icrxptc += rd32(IGC_ICRXPTC);
+	adapter->stats.icrxatc += rd32(IGC_ICRXATC);
+	adapter->stats.ictxptc += rd32(IGC_ICTXPTC);
+	adapter->stats.ictxatc += rd32(IGC_ICTXATC);
+	adapter->stats.ictxqec += rd32(IGC_ICTXQEC);
+	adapter->stats.ictxqmtc += rd32(IGC_ICTXQMTC);
+	adapter->stats.icrxdmtc += rd32(IGC_ICRXDMTC);
+
+	/* Fill out the OS statistics structure */
+	net_stats->multicast = adapter->stats.mprc;
+	net_stats->collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	net_stats->rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.ruc + adapter->stats.roc +
+		adapter->stats.cexterr;
+	net_stats->rx_length_errors = adapter->stats.ruc +
+				      adapter->stats.roc;
+	net_stats->rx_crc_errors = adapter->stats.crcerrs;
+	net_stats->rx_frame_errors = adapter->stats.algnerrc;
+	net_stats->rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	net_stats->tx_errors = adapter->stats.ecol +
+			       adapter->stats.latecol;
+	net_stats->tx_aborted_errors = adapter->stats.ecol;
+	net_stats->tx_window_errors = adapter->stats.latecol;
+	net_stats->tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Management Stats */
+	adapter->stats.mgptc += rd32(IGC_MGTPTC);
+	adapter->stats.mgprc += rd32(IGC_MGTPRC);
+	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
+}
+
+static void igc_nfc_filter_exit(struct igc_adapter *adapter)
+{
+	struct igc_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igc_erase_filter(adapter, rule);
+
+	hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
+		igc_erase_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+
+static void igc_nfc_filter_restore(struct igc_adapter *adapter)
+{
+	struct igc_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igc_add_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+
+/**
+ * igc_down - Close the interface
+ * @adapter: board private structure
+ */
+void igc_down(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+	int i = 0;
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	/* disable receives in the hardware */
+	rctl = rd32(IGC_RCTL);
+	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
+	/* flush and sleep below */
+
+	igc_nfc_filter_exit(adapter);
+
+	/* set trans_start so we don't get spurious watchdogs during reset */
+	netif_trans_update(netdev);
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = rd32(IGC_TCTL);
+	tctl &= ~IGC_TCTL_EN;
+	wr32(IGC_TCTL, tctl);
+	/* flush both disables and wait for them to finish */
+	wrfl();
+	usleep_range(10000, 20000);
+
+	igc_irq_disable(adapter);
+
+	adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		if (adapter->q_vector[i]) {
+			napi_synchronize(&adapter->q_vector[i]->napi);
+			napi_disable(&adapter->q_vector[i]->napi);
+		}
+	}
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	/* record the stats before reset*/
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	if (!pci_channel_offline(adapter->pdev))
+		igc_reset(adapter);
+
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+
+	igc_clean_all_tx_rings(adapter);
+	igc_clean_all_rx_rings(adapter);
+}
+
+void igc_reinit_locked(struct igc_adapter *adapter)
+{
+	WARN_ON(in_interrupt());
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igc_down(adapter);
+	igc_up(adapter);
+	clear_bit(__IGC_RESETTING, &adapter->state);
+}
+
+static void igc_reset_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter;
+
+	adapter = container_of(work, struct igc_adapter, reset_task);
+
+	netdev_err(adapter->netdev, "Reset adapter\n");
+	igc_reinit_locked(adapter);
+}
+
+/**
+ * igc_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* igc_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+
+	if (netif_running(netdev))
+		igc_down(adapter);
+
+	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
+		 netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igc_up(adapter);
+	else
+		igc_reset(adapter);
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ */
+static struct net_device_stats *igc_get_stats(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (!test_bit(__IGC_RESETTING, &adapter->state))
+		igc_update_stats(adapter);
+
+	/* only return the current stats */
+	return &netdev->stats;
+}
+
+static netdev_features_t igc_fix_features(struct net_device *netdev,
+					  netdev_features_t features)
+{
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	return features;
+}
+
+static int igc_set_features(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* Add VLAN support */
+	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
+		return 0;
+
+	if (!(features & NETIF_F_NTUPLE)) {
+		struct hlist_node *node2;
+		struct igc_nfc_filter *rule;
+
+		spin_lock(&adapter->nfc_lock);
+		hlist_for_each_entry_safe(rule, node2,
+					  &adapter->nfc_filter_list, nfc_node) {
+			igc_erase_filter(adapter, rule);
+			hlist_del(&rule->nfc_node);
+			kfree(rule);
+		}
+		spin_unlock(&adapter->nfc_lock);
+		adapter->nfc_filter_count = 0;
+	}
+
+	netdev->features = features;
+
+	if (netif_running(netdev))
+		igc_reinit_locked(adapter);
+	else
+		igc_reset(adapter);
+
+	return 1;
+}
+
+static netdev_features_t
+igc_features_check(struct sk_buff *skb, struct net_device *dev,
+		   netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPv4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
+		features &= ~NETIF_F_TSO;
+
+	return features;
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i = 0;
+
+	igc_get_hw_control(adapter);
+	igc_set_rx_mode(netdev);
+
+	igc_setup_tctl(adapter);
+	igc_setup_mrqc(adapter);
+	igc_setup_rctl(adapter);
+
+	igc_nfc_filter_restore(adapter);
+	igc_configure_tx(adapter);
+	igc_configure_rx(adapter);
+
+	igc_rx_fifo_flush_base(&adapter->hw);
+
+	/* call igc_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+
+		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+	}
+}
+
+/**
+ * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ * @adapter: address of board private structure
+ * @index: Index of the RAR entry which need to be synced with MAC table
+ */
+static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+{
+	u8 *addr = adapter->mac_table[index].addr;
+	struct igc_hw *hw = &adapter->hw;
+	u32 rar_low, rar_high;
+
+	/* HW expects these to be in network order when they are plugged
+	 * into the registers which are little endian.  In order to guarantee
+	 * that ordering we need to do an leXX_to_cpup here in order to be
+	 * ready for the byteswap that occurs with writel
+	 */
+	rar_low = le32_to_cpup((__le32 *)(addr));
+	rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+	/* Indicate to hardware the Address is Valid. */
+	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
+		if (is_valid_ether_addr(addr))
+			rar_high |= IGC_RAH_AV;
+
+		rar_high |= IGC_RAH_POOL_1 <<
+			adapter->mac_table[index].queue;
+	}
+
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+
+	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+
+	igc_rar_set_index(adapter, 0);
+}
+
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
+				      const u8 *addr, const u8 flags)
+{
+	if (!(entry->state & IGC_MAC_STATE_IN_USE))
+		return true;
+
+	if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
+	    (flags & IGC_MAC_STATE_SRC_ADDR))
+		return false;
+
+	if (!ether_addr_equal(addr, entry->addr))
+		return false;
+
+	return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_add_mac_filter_flags(struct igc_adapter *adapter,
+				    const u8 *addr, const u8 queue,
+				    const u8 flags)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for the first empty entry in the MAC table.
+	 * Do not touch entries at the end of the table reserved for the VF MAC
+	 * addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
+					       addr, flags))
+			continue;
+
+		ether_addr_copy(adapter->mac_table[i].addr, addr);
+		adapter->mac_table[i].queue = queue;
+		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
+
+		igc_rar_set_index(adapter, i);
+		return i;
+	}
+
+	return -ENOSPC;
+}
+
+int igc_add_mac_steering_filter(struct igc_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags)
+{
+	return igc_add_mac_filter_flags(adapter, addr, queue,
+					IGC_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_del_mac_filter_flags(struct igc_adapter *adapter,
+				    const u8 *addr, const u8 queue,
+				    const u8 flags)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for matching entry in the MAC table based on given address
+	 * and queue. Do not touch entries at the end of the table reserved
+	 * for the VF MAC addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
+			continue;
+		if ((adapter->mac_table[i].state & flags) != flags)
+			continue;
+		if (adapter->mac_table[i].queue != queue)
+			continue;
+		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+			continue;
+
+		/* When a filter for the default address is "deleted",
+		 * we return it to its initial configuration
+		 */
+		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
+			adapter->mac_table[i].state =
+				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+		} else {
+			adapter->mac_table[i].state = 0;
+			adapter->mac_table[i].queue = 0;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+		}
+
+		igc_rar_set_index(adapter, i);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+int igc_del_mac_steering_filter(struct igc_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags)
+{
+	return igc_del_mac_filter_flags(adapter, addr, queue,
+					IGC_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+}
+
+/**
+ * igc_msix_other - msix other interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t igc_msix_other(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_hw *hw = &adapter->hw;
+	u32 icr = rd32(IGC_ICR);
+
+	/* reading ICR causes bit 31 of EICR to be cleared */
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & IGC_ICR_LSC) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	wr32(IGC_EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(IGC_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+	array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+	int rx_queue = IGC_N0_QUEUE;
+	int tx_queue = IGC_N0_QUEUE;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		if (rx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	default:
+		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i, vector = 0;
+	u32 tmp;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case igc_i225:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug.
+		 */
+		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
+		     IGC_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = BIT(vector);
+		tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+		wr32(IGC_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igc_assign_vector(adapter->q_vector[i], vector++);
+
+	wrfl();
+}
+
+static irqreturn_t igc_msix_ring(int irq, void *data)
+{
+	struct igc_q_vector *q_vector = data;
+
+	/* Write the ITR value calculated from the previous interrupt. */
+	igc_write_itr(q_vector);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_request_msix - Initialize MSI-X interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ */
+static int igc_request_msix(struct igc_adapter *adapter)
+{
+	int i = 0, err = 0, vector = 0, free_vector = 0;
+	struct net_device *netdev = adapter->netdev;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  &igc_msix_other, 0, netdev->name, adapter);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		vector++;
+
+		q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+				q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
+			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else
+			sprintf(q_vector->name, "%s-unused", netdev->name);
+
+		err = request_irq(adapter->msix_entries[vector].vector,
+				  igc_msix_ring, 0, q_vector->name,
+				  q_vector);
+		if (err)
+			goto err_free;
+	}
+
+	igc_configure_msix(adapter);
+	return 0;
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
+	return err;
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	/* if we're coming from igc_set_interrupt_capability, the vectors are
+	 * not yet allocated
+	 */
+	if (!q_vector)
+		return;
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	netif_napi_del(&q_vector->napi);
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		pci_disable_msi(adapter->pdev);
+	}
+
+	while (v_idx--)
+		igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
+{
+	igc_free_q_vectors(adapter);
+	igc_reset_interrupt_capability(adapter);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--) {
+		igc_reset_q_vector(adapter, v_idx);
+		igc_free_q_vector(adapter, v_idx);
+	}
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	adapter->q_vector[v_idx] = NULL;
+
+	/* igc_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	if (q_vector)
+		kfree_rcu(q_vector, rcu);
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void igc_update_phy_info(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+
+	igc_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igc_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ */
+bool igc_has_link(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	bool link_active = false;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * rx sequence error interrupt.  get_link_status will stay
+	 * false until the igc_check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	switch (hw->phy.media_type) {
+	case igc_media_type_copper:
+		if (!hw->mac.get_link_status)
+			return true;
+		hw->mac.ops.check_for_link(hw);
+		link_active = !hw->mac.get_link_status;
+		break;
+	default:
+	case igc_media_type_unknown:
+		break;
+	}
+
+	if (hw->mac.type == igc_i225 &&
+	    hw->phy.id == I225_I_PHY_ID) {
+		if (!netif_carrier_ok(adapter->netdev)) {
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
+			adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
+			adapter->link_check_timeout = jiffies;
+		}
+	}
+
+	return link_active;
+}
+
+/**
+ * igc_watchdog - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ */
+static void igc_watchdog(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igc_watchdog_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter = container_of(work,
+						   struct igc_adapter,
+						   watchdog_task);
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_data, retry_count = 20;
+	u32 connsw;
+	u32 link;
+	int i;
+
+	link = igc_has_link(adapter);
+
+	if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
+		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		else
+			link = false;
+	}
+
+	/* Force link down if we have fiber to swap to */
+	if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+		if (hw->phy.media_type == igc_media_type_copper) {
+			connsw = rd32(IGC_CONNSW);
+			if (!(connsw & IGC_CONNSW_AUTOSENSE_EN))
+				link = 0;
+		}
+	}
+	if (link) {
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+
+			hw->mac.ops.get_speed_and_duplex(hw,
+							 &adapter->link_speed,
+							 &adapter->link_duplex);
+
+			ctrl = rd32(IGC_CTRL);
+			/* Link status message must follow this format */
+			netdev_info(netdev,
+				    "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+				    netdev->name,
+				    adapter->link_speed,
+				    adapter->link_duplex == FULL_DUPLEX ?
+				    "Full" : "Half",
+				    (ctrl & IGC_CTRL_TFCE) &&
+				    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
+				    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
+				    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
+
+			/* check if SmartSpeed worked */
+			igc_check_downshift(hw);
+			if (phy->speed_downgraded)
+				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 14;
+				break;
+			case SPEED_100:
+				/* maybe add some timeout factor ? */
+				break;
+			}
+
+			if (adapter->link_speed != SPEED_1000)
+				goto no_wait;
+
+			/* wait for Remote receiver status OK */
+retry_read_status:
+			if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
+					      &phy_data)) {
+				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+				    retry_count) {
+					msleep(100);
+					retry_count--;
+					goto retry_read_status;
+				} else if (!retry_count) {
+					dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+				}
+			} else {
+				dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+			}
+no_wait:
+			netif_carrier_on(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+
+			/* Links status message must follow this format */
+			netdev_info(netdev, "igc: %s NIC Link is Down\n",
+				    netdev->name);
+			netif_carrier_off(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+
+			/* link is down, time to check for alternate media */
+			if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+				if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+					schedule_work(&adapter->reset_task);
+					/* return immediately */
+					return;
+				}
+			}
+
+		/* also check for alternate media here */
+		} else if (!netif_carrier_ok(netdev) &&
+			   (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
+			if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+				schedule_work(&adapter->reset_task);
+				/* return immediately */
+				return;
+			}
+		}
+	}
+
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		if (!netif_carrier_ok(netdev)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
+				adapter->tx_timeout_count++;
+				schedule_work(&adapter->reset_task);
+				/* return immediately since reset is imminent */
+				return;
+			}
+		}
+
+		/* Force detection of hung controller every watchdog period */
+		set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		u32 eics = 0;
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
+		wr32(IGC_EICS, eics);
+	} else {
+		wr32(IGC_ICS, IGC_ICS_RXDMT0);
+	}
+
+	/* Reset the timer */
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies +  HZ));
+		else
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies + 2 * HZ));
+	}
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size.  This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings.  The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		new_val = IGC_4K_ITR;
+		goto set_itr_val;
+	default:
+		break;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if (avg_wire_size > 300 && avg_wire_size < 1200)
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGC_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGC_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+			   struct igc_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes / packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes / packets > 8000)
+				itrval = bulk_latency;
+			else if ((packets < 10) || ((bytes / packets) > 1200))
+				itrval = bulk_latency;
+			else if ((packets > 35))
+				itrval = lowest_latency;
+		} else if (bytes / packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+/**
+ * igc_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr_msi(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* read ICR disables interrupts using IAM */
+	u32 icr = rd32(IGC_ICR);
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
+	 * need for the IMC write
+	 */
+	u32 icr = rd32(IGC_ICR);
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt
+	 */
+	if (!(icr & IGC_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		current_itr = 0;
+		new_itr = IGC_4K_ITR;
+		goto set_itr_now;
+	default:
+		break;
+	}
+
+	igc_update_itr(q_vector, &q_vector->tx);
+	igc_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > q_vector->itr_val ?
+			  max((new_itr * q_vector->itr_val) /
+			  (new_itr + (q_vector->itr_val >> 2)),
+			  new_itr) : new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if (adapter->num_q_vectors == 1)
+			igc_set_itr(q_vector);
+		else
+			igc_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->msix_entries)
+			wr32(IGC_EIMS, q_vector->eims_value);
+		else
+			igc_irq_enable(adapter);
+	}
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+	struct igc_q_vector *q_vector = container_of(napi,
+						     struct igc_q_vector,
+						     napi);
+	bool clean_complete = true;
+	int work_done = 0;
+
+	if (q_vector->tx.ring)
+		clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+	if (q_vector->rx.ring) {
+		int cleaned = igc_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		if (cleaned >= budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		igc_ring_irq_enable(q_vector);
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix)
+{
+	int numvecs, i;
+	int err;
+
+	if (!msix)
+		goto msi_only;
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+
+	adapter->num_tx_queues = adapter->rss_queues;
+
+	/* start with one vector for every Rx queue */
+	numvecs = adapter->num_rx_queues;
+
+	/* if Tx handler is separate add 1 for every Tx queue */
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
+
+	/* store the number of vectors reserved for queues */
+	adapter->num_q_vectors = numvecs;
+
+	/* add 1 vector for link status interrupts */
+	numvecs++;
+
+	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+					GFP_KERNEL);
+
+	if (!adapter->msix_entries)
+		return;
+
+	/* populate entry values */
+	for (i = 0; i < numvecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_range(adapter->pdev,
+				    adapter->msix_entries,
+				    numvecs,
+				    numvecs);
+	if (err > 0)
+		return;
+
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+
+	igc_reset_interrupt_capability(adapter);
+
+msi_only:
+	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+	adapter->rss_queues = 1;
+	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_q_vectors = 1;
+	if (!pci_enable_msi(adapter->pdev))
+		adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+			 struct igc_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+			      unsigned int v_count, unsigned int v_idx,
+			      unsigned int txr_count, unsigned int txr_idx,
+			      unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+	int ring_count;
+
+	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+
+	/* allocate q_vector and rings */
+	q_vector = adapter->q_vector[v_idx];
+	if (!q_vector)
+		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+				   GFP_KERNEL);
+	else
+		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi,
+		       igc_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+	q_vector->itr_val = IGC_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* initialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igc_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igc_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int q_vectors = adapter->num_q_vectors;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igc_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+	int i = 0, j = 0;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+	/* Fall through */
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = j;
+		break;
+	}
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	igc_set_interrupt_capability(adapter, msix);
+
+	err = igc_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igc_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igc_reset_interrupt_capability(adapter);
+	return err;
+}
+
+static void igc_free_irq(struct igc_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			free_irq(adapter->msix_entries[vector++].vector,
+				 adapter->q_vector[i]);
+	} else {
+		free_irq(adapter->pdev->irq, adapter);
+	}
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 regval = rd32(IGC_EIAM);
+
+		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+		wr32(IGC_EIMC, adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAC);
+		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	wr32(IGC_IAM, 0);
+	wr32(IGC_IMC, ~0);
+	wrfl();
+
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+		u32 regval = rd32(IGC_EIAC);
+
+		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAM);
+		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+		wr32(IGC_EIMS, adapter->eims_enable_mask);
+		wr32(IGC_IMS, ims);
+	} else {
+		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+	}
+}
+
+/**
+ * igc_request_irq - initialize interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static int igc_request_irq(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		err = igc_request_msix(adapter);
+		if (!err)
+			goto request_done;
+		/* fall back to MSI */
+		igc_free_all_tx_resources(adapter);
+		igc_free_all_rx_resources(adapter);
+
+		igc_clear_interrupt_scheme(adapter);
+		err = igc_init_interrupt_scheme(adapter, false);
+		if (err)
+			goto request_done;
+		igc_setup_all_tx_resources(adapter);
+		igc_setup_all_rx_resources(adapter);
+		igc_configure(adapter);
+	}
+
+	igc_assign_vector(adapter->q_vector[0], 0);
+
+	if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		err = request_irq(pdev->irq, &igc_intr_msi, 0,
+				  netdev->name, adapter);
+		if (!err)
+			goto request_done;
+
+		/* fall back to legacy interrupts */
+		igc_reset_interrupt_capability(adapter);
+		adapter->flags &= ~IGC_FLAG_HAS_MSI;
+	}
+
+	err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
+			  netdev->name, adapter);
+
+	if (err)
+		dev_err(&pdev->dev, "Error %d getting interrupt\n",
+			err);
+
+request_done:
+	return err;
+}
+
+static void igc_write_itr(struct igc_q_vector *q_vector)
+{
+	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
+
+	if (!q_vector->set_itr)
+		return;
+
+	if (!itr_val)
+		itr_val = IGC_ITR_VAL_MASK;
+
+	itr_val |= IGC_EITR_CNT_IGNR;
+
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
+}
+
+/**
+ * igc_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int __igc_open(struct net_device *netdev, bool resuming)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int err = 0;
+	int i = 0;
+
+	/* disallow open during test */
+
+	if (test_bit(__IGC_TESTING, &adapter->state)) {
+		WARN_ON(resuming);
+		return -EBUSY;
+	}
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = igc_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igc_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	igc_power_up_link(adapter);
+
+	igc_configure(adapter);
+
+	err = igc_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	netif_tx_start_all_queues(netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	return IGC_SUCCESS;
+
+err_set_queues:
+	igc_free_irq(adapter);
+err_req_irq:
+	igc_release_hw_control(adapter);
+	igc_power_down_link(adapter);
+	igc_free_all_rx_resources(adapter);
+err_setup_rx:
+	igc_free_all_tx_resources(adapter);
+err_setup_tx:
+	igc_reset(adapter);
+
+	return err;
+}
+
+static int igc_open(struct net_device *netdev)
+{
+	return __igc_open(netdev, false);
+}
+
+/**
+ * igc_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int __igc_close(struct net_device *netdev, bool suspending)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
+
+	igc_down(adapter);
+
+	igc_release_hw_control(adapter);
+
+	igc_free_irq(adapter);
+
+	igc_free_all_tx_resources(adapter);
+	igc_free_all_rx_resources(adapter);
+
+	return 0;
+}
+
+static int igc_close(struct net_device *netdev)
+{
+	if (netif_device_present(netdev) || netdev->dismantle)
+		return __igc_close(netdev, false);
+	return 0;
+}
+
+static const struct net_device_ops igc_netdev_ops = {
+	.ndo_open		= igc_open,
+	.ndo_stop		= igc_close,
+	.ndo_start_xmit		= igc_xmit_frame,
+	.ndo_set_mac_address	= igc_set_mac,
+	.ndo_change_mtu		= igc_change_mtu,
+	.ndo_get_stats		= igc_get_stats,
+	.ndo_fix_features	= igc_fix_features,
+	.ndo_set_features	= igc_set_features,
+	.ndo_features_check	= igc_features_check,
+};
+
+/* PCIe configuration access */
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	if (!pci_is_pcie(adapter->pdev))
+		return -IGC_ERR_CONFIG;
+
+	pcie_capability_read_word(adapter->pdev, reg, value);
+
+	return IGC_SUCCESS;
+}
+
+s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	if (!pci_is_pcie(adapter->pdev))
+		return -IGC_ERR_CONFIG;
+
+	pcie_capability_write_word(adapter->pdev, reg, *value);
+
+	return IGC_SUCCESS;
+}
+
+u32 igc_rd32(struct igc_hw *hw, u32 reg)
+{
+	struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
+	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+	u32 value = 0;
+
+	if (IGC_REMOVED(hw_addr))
+		return ~value;
+
+	value = readl(&hw_addr[reg]);
+
+	/* reads should not return all F's */
+	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+		struct net_device *netdev = igc->netdev;
+
+		hw->hw_addr = NULL;
+		netif_device_detach(netdev);
+		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		WARN(pci_device_is_present(igc->pdev),
+		     "igc: Failed to read reg 0x%x!\n", reg);
+	}
+
+	return value;
+}
+
+int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	mac->autoneg = 0;
+
+	/* Make sure dplx is at most 1 bit and lsb of speed is not set
+	 * for the switch() below to work
+	 */
+	if ((spd & 1) || (dplx & ~1))
+		goto err_inval;
+
+	switch (spd + dplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_10_HALF;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_10_FULL;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_100_HALF;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_100_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_HALF: /* not supported */
+		goto err_inval;
+	case SPEED_2500 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
+		break;
+	case SPEED_2500 + DUPLEX_HALF: /* not supported */
+	default:
+		goto err_inval;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+	return 0;
+
+err_inval:
+	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
+	return -EINVAL;
+}
+
+/**
+ * igc_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igc_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igc_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int igc_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	struct igc_adapter *adapter;
+	struct net_device *netdev;
+	struct igc_hw *hw;
+	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
+	int err;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	if (!err) {
+		err = dma_set_coherent_mask(&pdev->dev,
+					    DMA_BIT_MASK(64));
+	} else {
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			err = dma_set_coherent_mask(&pdev->dev,
+						    DMA_BIT_MASK(32));
+			if (err) {
+				dev_err(&pdev->dev, "igc: Wrong DMA config\n");
+				goto err_dma;
+			}
+		}
+	}
+
+	err = pci_request_selected_regions(pdev,
+					   pci_select_bars(pdev,
+							   IORESOURCE_MEM),
+					   igc_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
+				   IGC_MAX_TX_QUEUES);
+
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->port_num = hw->bus.func;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_ioremap;
+
+	err = -EIO;
+	adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
+				   pci_resource_len(pdev, 0));
+	if (!adapter->io_addr)
+		goto err_ioremap;
+
+	/* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
+	hw->hw_addr = adapter->io_addr;
+
+	netdev->netdev_ops = &igc_netdev_ops;
+	igc_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	netdev->mem_start = pci_resource_start(pdev, 0);
+	netdev->mem_end = pci_resource_end(pdev, 0);
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* Copy the default MAC and PHY function pointers */
+	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+
+	/* Initialize skew-specific constants */
+	err = ei->get_invariants(hw);
+	if (err)
+		goto err_sw_init;
+
+	/* Add supported features to the features list*/
+	netdev->features |= NETIF_F_HW_CSUM;
+
+	/* setup the private structure */
+	err = igc_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= NETIF_F_NTUPLE;
+	netdev->hw_features |= netdev->features;
+
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state
+	 */
+	hw->mac.ops.reset_hw(hw);
+
+	if (igc_get_flash_presence_i225(hw)) {
+		if (hw->nvm.ops.validate(hw) < 0) {
+			dev_err(&pdev->dev,
+				"The NVM Checksum Is Not Valid\n");
+			err = -EIO;
+			goto err_eeprom;
+		}
+	}
+
+	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
+		/* copy the MAC address out of the NVM */
+		if (hw->mac.ops.read_mac_addr(hw))
+			dev_err(&pdev->dev, "NVM Read Error\n");
+	}
+
+	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	/* configure RXPBSIZE and TXPBSIZE */
+	wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
+	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+
+	timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
+
+	INIT_WORK(&adapter->reset_task, igc_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0xaf;
+
+	hw->fc.requested_mode = igc_fc_default;
+	hw->fc.current_mode = igc_fc_default;
+
+	/* reset the hardware with the new settings */
+	igc_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
+
+	strncpy(netdev->name, "eth%d", IFNAMSIZ);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	 /* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	/* Check if Media Autosense is enabled */
+	adapter->ei = *ei;
+
+	/* print pcie link status and MAC address */
+	pcie_print_link_status(pdev);
+	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
+
+	return 0;
+
+err_register:
+	igc_release_hw_control(adapter);
+err_eeprom:
+	if (!igc_check_reset_block(hw))
+		igc_reset_phy(hw);
+err_sw_init:
+	igc_clear_interrupt_scheme(adapter);
+	iounmap(adapter->io_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_selected_regions(pdev,
+				     pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * igc_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igc_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ */
+static void igc_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igc_release_hw_control(adapter);
+	unregister_netdev(netdev);
+
+	igc_clear_interrupt_scheme(adapter);
+	pci_iounmap(pdev, adapter->io_addr);
+	pci_release_mem_regions(pdev);
+
+	kfree(adapter->mac_table);
+	kfree(adapter->shadow_vfta);
+	free_netdev(netdev);
+
+	pci_disable_pcie_error_reporting(pdev);
+
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver igc_driver = {
+	.name     = igc_driver_name,
+	.id_table = igc_pci_tbl,
+	.probe    = igc_probe,
+	.remove   = igc_remove,
+};
+
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues)
+{
+	/* Determine if we need to pair queues. */
+	/* If rss_queues > half of max_rss_queues, pair the queues in
+	 * order to conserve interrupts due to limited supply.
+	 */
+	if (adapter->rss_queues > (max_rss_queues / 2))
+		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	else
+		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+	unsigned int max_rss_queues;
+
+	/* Determine the maximum number of RSS queues supported. */
+	max_rss_queues = IGC_MAX_RX_QUEUES;
+
+	return max_rss_queues;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+	u32 max_rss_queues;
+
+	max_rss_queues = igc_get_max_rss_queues(adapter);
+	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+	igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGC_DEFAULT_TXD;
+	adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+	/* set default ITR values */
+	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+				VLAN_HLEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	spin_lock_init(&adapter->nfc_lock);
+	spin_lock_init(&adapter->stats64_lock);
+	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	adapter->mac_table = kzalloc(size, GFP_ATOMIC);
+	if (!adapter->mac_table)
+		return -ENOMEM;
+
+	igc_init_queue_configuration(adapter);
+
+	/* This call may decrease the number of queues */
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igc_irq_disable(adapter);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_reinit_queues - return error
+ * @adapter: pointer to adapter structure
+ */
+int igc_reinit_queues(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (netif_running(netdev))
+		igc_close(netdev);
+
+	igc_reset_interrupt_capability(adapter);
+
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	if (netif_running(netdev))
+		err = igc_open(netdev);
+
+	return err;
+}
+
+/**
+ * igc_get_hw_dev - return device
+ * @hw: pointer to hardware structure
+ *
+ * used by hardware layer to print debugging information
+ */
+struct net_device *igc_get_hw_dev(struct igc_hw *hw)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	return adapter->netdev;
+}
+
+/**
+ * igc_init_module - Driver Registration Routine
+ *
+ * igc_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init igc_init_module(void)
+{
+	int ret;
+
+	pr_info("%s - version %s\n",
+		igc_driver_string, igc_driver_version);
+
+	pr_info("%s\n", igc_copyright);
+
+	ret = pci_register_driver(&igc_driver);
+	return ret;
+}
+
+module_init(igc_init_module);
+
+/**
+ * igc_exit_module - Driver Exit Cleanup Routine
+ *
+ * igc_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit igc_exit_module(void)
+{
+	pci_unregister_driver(&igc_driver);
+}
+
+module_exit(igc_exit_module);
+/* igc_main.c */

diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c
new file mode 100644
index 0000000..58f81ab
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.c

@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_mac.h"
+#include "igc_nvm.h"
+
+/**
+ * igc_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ * @hw: pointer to the HW structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the EEPROM status bit for either read or write completion based
+ * upon the value of 'ee_reg'.
+ */
+static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == IGC_NVM_POLL_READ)
+			reg = rd32(IGC_EERD);
+		else
+			reg = rd32(IGC_EEWR);
+
+		if (reg & IGC_NVM_RW_REG_DONE) {
+			ret_val = 0;
+			break;
+		}
+
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_acquire_nvm - Generic request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+s32 igc_acquire_nvm(struct igc_hw *hw)
+{
+	s32 timeout = IGC_NVM_GRANT_ATTEMPTS;
+	u32 eecd = rd32(IGC_EECD);
+	s32 ret_val = 0;
+
+	wr32(IGC_EECD, eecd | IGC_EECD_REQ);
+	eecd = rd32(IGC_EECD);
+
+	while (timeout) {
+		if (eecd & IGC_EECD_GNT)
+			break;
+		udelay(5);
+		eecd = rd32(IGC_EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~IGC_EECD_REQ;
+		wr32(IGC_EECD, eecd);
+		hw_dbg("Could not acquire NVM grant\n");
+		ret_val = -IGC_ERR_NVM;
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_release_nvm - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ */
+void igc_release_nvm(struct igc_hw *hw)
+{
+	u32 eecd;
+
+	eecd = rd32(IGC_EECD);
+	eecd &= ~IGC_EECD_REQ;
+	wr32(IGC_EECD, eecd);
+}
+
+/**
+ * igc_read_nvm_eerd - Reads EEPROM using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ */
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) +
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_EERD, eerd);
+		ret_val = igc_poll_eerd_eewr_done(hw, IGC_NVM_POLL_READ);
+		if (ret_val)
+			break;
+
+		data[i] = (rd32(IGC_EERD) >> IGC_NVM_RW_REG_DATA);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_mac_addr - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+s32 igc_read_mac_addr(struct igc_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = rd32(IGC_RAH(0));
+	rar_low = rd32(IGC_RAL(0));
+
+	for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8));
+
+	for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8));
+
+	for (i = 0; i < ETH_ALEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return 0;
+}
+
+/**
+ * igc_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+s32 igc_validate_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32 ret_val = 0;
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16)NVM_SUM) {
+		hw_dbg("NVM Checksum Invalid\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ */
+s32 igc_update_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32  ret_val;
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}

diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h
new file mode 100644
index 0000000..f9fc2e9
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.h

@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_NVM_H_
+#define _IGC_NVM_H_
+
+s32 igc_acquire_nvm(struct igc_hw *hw);
+void igc_release_nvm(struct igc_hw *hw);
+s32 igc_read_mac_addr(struct igc_hw *hw);
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data);
+s32 igc_validate_nvm_checksum(struct igc_hw *hw);
+s32 igc_update_nvm_checksum(struct igc_hw *hw);
+
+#endif

diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
new file mode 100644
index 0000000..f4b05af
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c

@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_phy.h"
+
+/**
+ * igc_check_reset_block - Check if PHY reset is blocked
+ * @hw: pointer to the HW structure
+ *
+ * Read the PHY management control register and check whether a PHY reset
+ * is blocked.  If a reset is not blocked return 0, otherwise
+ * return IGC_ERR_BLK_PHY_RESET (12).
+ */
+s32 igc_check_reset_block(struct igc_hw *hw)
+{
+	u32 manc;
+
+	manc = rd32(IGC_MANC);
+
+	return (manc & IGC_MANC_BLK_PHY_RST_ON_IDE) ?
+		IGC_ERR_BLK_PHY_RESET : 0;
+}
+
+/**
+ * igc_get_phy_id - Retrieve the PHY ID and revision
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY registers and stores the PHY ID and possibly the PHY
+ * revision in the hardware structure.
+ */
+s32 igc_get_phy_id(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_id;
+
+	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id = (u32)(phy_id << 16);
+	usleep_range(200, 500);
+	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_has_link - Polls PHY for link
+ * @hw: pointer to the HW structure
+ * @iterations: number of times to poll for link
+ * @usec_interval: delay between polling attempts
+ * @success: pointer to whether polling was successful or not
+ *
+ * Polls the PHY status register for link, 'iterations' number of times.
+ */
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the PHY_STATUS register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val && usec_interval > 0) {
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			if (usec_interval >= 1000)
+				mdelay(usec_interval / 1000);
+			else
+				udelay(usec_interval);
+		}
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_LINK_STATUS)
+			break;
+		if (usec_interval >= 1000)
+			mdelay(usec_interval / 1000);
+		else
+			udelay(usec_interval);
+	}
+
+	*success = (i < iterations) ? true : false;
+
+	return ret_val;
+}
+
+/**
+ * igc_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, restore the link to previous settings.
+ */
+void igc_power_up_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg &= ~MII_CR_POWER_DOWN;
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * igc_power_down_phy_copper - Power down copper PHY
+ * @hw: pointer to the HW structure
+ *
+ * Power down PHY to save power when interface is down and wake on lan
+ * is not enabled.
+ */
+void igc_power_down_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg |= MII_CR_POWER_DOWN;
+
+	/* Temporary workaround - should be removed when PHY will implement
+	 * IEEE registers as properly
+	 */
+	/* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+	usleep_range(1000, 2000);
+}
+
+/**
+ * igc_check_downshift - Checks whether a downshift in speed occurred
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * A downshift is detected by querying the PHY link health.
+ */
+s32 igc_check_downshift(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	switch (phy->type) {
+	case igc_phy_i225:
+	default:
+		/* speed downshift not supported */
+		phy->speed_downgraded = false;
+		ret_val = 0;
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_phy_hw_reset - PHY hardware reset
+ * @hw: pointer to the HW structure
+ *
+ * Verify the reset block is not blocking us from resetting.  Acquire
+ * semaphore (if necessary) and read/set/write the device control reset
+ * bit in the PHY.  Wait the appropriate delay time for the device to
+ * reset and release the semaphore (if necessary).
+ */
+s32 igc_phy_hw_reset(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32  ret_val;
+	u32 ctrl;
+
+	ret_val = igc_check_reset_block(hw);
+	if (ret_val) {
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ctrl = rd32(IGC_CTRL);
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST);
+	wrfl();
+
+	udelay(phy->reset_delay_us);
+
+	wr32(IGC_CTRL, ctrl);
+	wrfl();
+
+	usleep_range(1500, 2000);
+
+	phy->ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_setup_autoneg - Configure PHY for auto-negotiation
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MII auto-neg advertisement register and/or the 1000T control
+ * register and if the PHY is already setup for auto-negotiation, then
+ * return successful.  Otherwise, setup advertisement and flow control to
+ * the appropriate values for the wanted auto-negotiation.
+ */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 aneg_multigbt_an_ctrl = 0;
+	u16 mii_1000t_ctrl_reg = 0;
+	u16 mii_autoneg_adv_reg;
+	s32 ret_val;
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+					    &mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+	    hw->phy.id == I225_I_PHY_ID) {
+		/* Read the MULTI GBT AN Control Register - reg 7.32 */
+		ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+					    MMD_DEVADDR_SHIFT) |
+					    ANEG_MULTIGBT_AN_CTRL,
+					    &aneg_multigbt_an_ctrl);
+
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+				 NWAY_AR_100TX_HD_CAPS |
+				 NWAY_AR_10T_FD_CAPS   |
+				 NWAY_AR_10T_HD_CAPS);
+	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+	hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		hw_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		hw_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		hw_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		hw_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		hw_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		hw_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 2500 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_2500_HALF)
+		hw_dbg("Advertise 2500mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 2500 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_2500_FULL) {
+		hw_dbg("Advertise 2500mb Full duplex\n");
+		aneg_multigbt_an_ctrl |= CR_2500T_FD_CAPS;
+	} else {
+		aneg_multigbt_an_ctrl &= ~CR_2500T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		/* Flow control (Rx & Tx) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_rx_pause:
+		/* Rx Flow control is enabled, and Tx Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of Rx Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric Rx PAUSE.  Later
+		 * (in igc_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case igc_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		return -IGC_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+					     mii_1000t_ctrl_reg);
+
+	if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+	    hw->phy.id == I225_I_PHY_ID)
+		ret_val = phy->ops.write_reg(hw,
+					     (STANDARD_AN_REG_MASK <<
+					     MMD_DEVADDR_SHIFT) |
+					     ANEG_MULTIGBT_AN_CTRL,
+					     aneg_multigbt_an_ctrl);
+
+	return ret_val;
+}
+
+/**
+ * igc_wait_autoneg - Wait for auto-neg completion
+ * @hw: pointer to the HW structure
+ *
+ * Waits for auto-negotiation to complete or for the auto-negotiation time
+ * limit to expire, which ever happens first.
+ */
+static s32 igc_wait_autoneg(struct igc_hw *hw)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_AUTONEG_COMPLETE)
+			break;
+		msleep(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ * igc_copper_link_autoneg - Setup/Enable autoneg for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Performs initial bounds checking on autoneg advertisement parameter, then
+ * configure to advertise the full capability.  Setup the PHY to autoneg
+ * and restart the negotiation process between the link partner.  If
+ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ */
+static s32 igc_copper_link_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_ctrl;
+	s32 ret_val;
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (phy->autoneg_advertised == 0)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	hw_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = igc_phy_setup_autoneg(hw);
+	if (ret_val) {
+		hw_dbg("Error Setting up Auto-Negotiation\n");
+		goto out;
+	}
+	hw_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = igc_wait_autoneg(hw);
+		if (ret_val) {
+			hw_dbg("Error while waiting for autoneg to complete\n");
+			goto out;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_setup_copper_link - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Calls the appropriate function to configure the link for auto-neg or forced
+ * speed and duplex.  Then we check for link, once link is established calls
+ * to configure collision distance and flow control are called.  If link is
+ * not established, we return -IGC_ERR_PHY (-2).
+ */
+s32 igc_setup_copper_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	bool link;
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = igc_copper_link_autoneg(hw);
+		if (ret_val)
+			goto out;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		hw_dbg("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			hw_dbg("Error Forcing Speed and Duplex\n");
+			goto out;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = igc_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
+	if (ret_val)
+		goto out;
+
+	if (link) {
+		hw_dbg("Valid link established!!!\n");
+		igc_config_collision_dist(hw);
+		ret_val = igc_config_fc_after_link_up(hw);
+	} else {
+		hw_dbg("Unable to establish link!!!\n");
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ */
+static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_READ));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		usleep_range(500, 1000);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Read did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	*data = (u16)mdic;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_write_phy_reg_mdic - Write MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ */
+static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to write the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_WRITE));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		usleep_range(500, 1000);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Write did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * __igc_access_xmdio_reg - Read/write XMDIO register
+ * @hw: pointer to the HW structure
+ * @address: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: pointer to value to read/write from/to the XMDIO address
+ * @read: boolean flag to indicate read or write
+ */
+static s32 __igc_access_xmdio_reg(struct igc_hw *hw, u16 address,
+				  u8 dev_addr, u16 *data, bool read)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, address);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, IGC_MMDAC_FUNC_DATA |
+					dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, IGC_MMDAAD, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, *data);
+	if (ret_val)
+		return ret_val;
+
+	/* Recalibrate the device back to 0 */
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, 0);
+	if (ret_val)
+		return ret_val;
+
+	return ret_val;
+}
+
+/**
+ * igc_read_xmdio_reg - Read XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be read from the EMI address
+ */
+static s32 igc_read_xmdio_reg(struct igc_hw *hw, u16 addr,
+			      u8 dev_addr, u16 *data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ * igc_write_xmdio_reg - Write XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be written to the XMDIO address
+ */
+static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr,
+			       u8 dev_addr, u16 data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
+
+/**
+ * igc_write_phy_reg_gpy - Write GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_write_phy_reg_mdic(hw, offset, data);
+		if (ret_val)
+			return ret_val;
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
+					      data);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_gpy - Read GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: lower half is register offset to read to
+ * upper half is MMD to use.
+ * @data: data to read at register offset
+ *
+ * Acquires semaphore, if necessary, then reads the data in the PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_read_phy_reg_mdic(hw, offset, data);
+		if (ret_val)
+			return ret_val;
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
+					     data);
+	}
+
+	return ret_val;
+}

diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h
new file mode 100644
index 0000000..25cba33
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.h

@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_PHY_H_
+#define _IGC_PHY_H_
+
+#include "igc_mac.h"
+
+s32 igc_check_reset_block(struct igc_hw *hw);
+s32 igc_phy_hw_reset(struct igc_hw *hw);
+s32 igc_get_phy_id(struct igc_hw *hw);
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success);
+s32 igc_check_downshift(struct igc_hw *hw);
+s32 igc_setup_copper_link(struct igc_hw *hw);
+void igc_power_up_phy_copper(struct igc_hw *hw);
+void igc_power_down_phy_copper(struct igc_hw *hw);
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data);
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data);
+
+#endif

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
new file mode 100644
index 0000000..50d7c04
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h

@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_REGS_H_
+#define _IGC_REGS_H_
+
+/* General Register Descriptions */
+#define IGC_CTRL		0x00000  /* Device Control - RW */
+#define IGC_STATUS		0x00008  /* Device Status - RO */
+#define IGC_EECD		0x00010  /* EEPROM/Flash Control - RW */
+#define IGC_CTRL_EXT		0x00018  /* Extended Device Control - RW */
+#define IGC_MDIC		0x00020  /* MDI Control - RW */
+#define IGC_MDICNFG		0x00E04  /* MDC/MDIO Configuration - RW */
+#define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
+
+/* Internal Packet Buffer Size Registers */
+#define IGC_RXPBS		0x02404  /* Rx Packet Buffer Size - RW */
+#define IGC_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
+
+/* NVM  Register Descriptions */
+#define IGC_EERD		0x12014  /* EEprom mode read - RW */
+#define IGC_EEWR		0x12018  /* EEprom mode write - RW */
+
+/* Flow Control Register Descriptions */
+#define IGC_FCAL		0x00028  /* FC Address Low - RW */
+#define IGC_FCAH		0x0002C  /* FC Address High - RW */
+#define IGC_FCT			0x00030  /* FC Type - RW */
+#define IGC_FCTTV		0x00170  /* FC Transmit Timer - RW */
+#define IGC_FCRTL		0x02160  /* FC Receive Threshold Low - RW */
+#define IGC_FCRTH		0x02168  /* FC Receive Threshold High - RW */
+#define IGC_FCRTV		0x02460  /* FC Refresh Timer Value - RW */
+#define IGC_FCSTS		0x02464  /* FC Status - RO */
+
+/* PCIe Register Description */
+#define IGC_GCR			0x05B00  /* PCIe control- RW */
+
+/* Semaphore registers */
+#define IGC_SW_FW_SYNC		0x05B5C  /* SW-FW Synchronization - RW */
+#define IGC_SWSM		0x05B50  /* SW Semaphore */
+#define IGC_FWSM		0x05B54  /* FW Semaphore */
+
+/* Function Active and Power State to MNG */
+#define IGC_FACTPS		0x05B30
+
+/* Interrupt Register Description */
+#define IGC_EICS		0x01520  /* Ext. Interrupt Cause Set - W0 */
+#define IGC_EIMS		0x01524  /* Ext. Interrupt Mask Set/Read - RW */
+#define IGC_EIMC		0x01528  /* Ext. Interrupt Mask Clear - WO */
+#define IGC_EIAC		0x0152C  /* Ext. Interrupt Auto Clear - RW */
+#define IGC_EIAM		0x01530  /* Ext. Interrupt Auto Mask - RW */
+#define IGC_ICR			0x01500  /* Intr Cause Read - RC/W1C */
+#define IGC_ICS			0x01504  /* Intr Cause Set - WO */
+#define IGC_IMS			0x01508  /* Intr Mask Set/Read - RW */
+#define IGC_IMC			0x0150C  /* Intr Mask Clear - WO */
+#define IGC_IAM			0x01510  /* Intr Ack Auto Mask- RW */
+/* Intr Throttle - RW */
+#define IGC_EITR(_n)		(0x01680 + (0x4 * (_n)))
+/* Interrupt Vector Allocation - RW */
+#define IGC_IVAR0		0x01700
+#define IGC_IVAR_MISC		0x01740  /* IVAR for "other" causes - RW */
+#define IGC_GPIE		0x01514  /* General Purpose Intr Enable - RW */
+
+/* Interrupt Cause */
+#define IGC_ICRXPTC		0x04104  /* Rx Packet Timer Expire Count */
+#define IGC_ICRXATC		0x04108  /* Rx Absolute Timer Expire Count */
+#define IGC_ICTXPTC		0x0410C  /* Tx Packet Timer Expire Count */
+#define IGC_ICTXATC		0x04110  /* Tx Absolute Timer Expire Count */
+#define IGC_ICTXQEC		0x04118  /* Tx Queue Empty Count */
+#define IGC_ICTXQMTC		0x0411C  /* Tx Queue Min Threshold Count */
+#define IGC_ICRXDMTC		0x04120  /* Rx Descriptor Min Threshold Count */
+#define IGC_ICRXOC		0x04124  /* Receiver Overrun Count */
+
+#define IGC_CBTMPC		0x0402C  /* Circuit Breaker TX Packet Count */
+#define IGC_HTDPMC		0x0403C  /* Host Transmit Discarded Packets */
+#define IGC_CBRMPC		0x040FC  /* Circuit Breaker RX Packet Count */
+#define IGC_RPTHC		0x04104  /* Rx Packets To Host */
+#define IGC_HGPTC		0x04118  /* Host Good Packets TX Count */
+#define IGC_HTCBDPC		0x04124  /* Host TX Circ.Breaker Drop Count */
+
+/* MSI-X Table Register Descriptions */
+#define IGC_PBACL		0x05B68  /* MSIx PBA Clear - R/W 1 to clear */
+
+/* RSS registers */
+#define IGC_MRQC		0x05818 /* Multiple Receive Control - RW */
+
+/* Filtering Registers */
+#define IGC_ETQF(_n)		(0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+
+/* ETQF register bit definitions */
+#define IGC_ETQF_FILTER_ENABLE	BIT(26)
+#define IGC_ETQF_QUEUE_ENABLE	BIT(31)
+#define IGC_ETQF_QUEUE_SHIFT	16
+#define IGC_ETQF_QUEUE_MASK	0x00070000
+#define IGC_ETQF_ETYPE_MASK	0x0000FFFF
+
+/* Redirection Table - RW Array */
+#define IGC_RETA(_i)		(0x05C00 + ((_i) * 4))
+/* RSS Random Key - RW Array */
+#define IGC_RSSRK(_i)		(0x05C80 + ((_i) * 4))
+
+/* Receive Register Descriptions */
+#define IGC_RCTL		0x00100  /* Rx Control - RW */
+#define IGC_SRRCTL(_n)		(0x0C00C + ((_n) * 0x40))
+#define IGC_PSRTYPE(_i)		(0x05480 + ((_i) * 4))
+#define IGC_RDBAL(_n)		(0x0C000 + ((_n) * 0x40))
+#define IGC_RDBAH(_n)		(0x0C004 + ((_n) * 0x40))
+#define IGC_RDLEN(_n)		(0x0C008 + ((_n) * 0x40))
+#define IGC_RDH(_n)		(0x0C010 + ((_n) * 0x40))
+#define IGC_RDT(_n)		(0x0C018 + ((_n) * 0x40))
+#define IGC_RXDCTL(_n)		(0x0C028 + ((_n) * 0x40))
+#define IGC_RQDPC(_n)		(0x0C030 + ((_n) * 0x40))
+#define IGC_RXCSUM		0x05000  /* Rx Checksum Control - RW */
+#define IGC_RLPML		0x05004  /* Rx Long Packet Max Length */
+#define IGC_RFCTL		0x05008  /* Receive Filter Control*/
+#define IGC_MTA			0x05200  /* Multicast Table Array - RW Array */
+#define IGC_UTA			0x0A000  /* Unicast Table Array - RW */
+#define IGC_RAL(_n)		(0x05400 + ((_n) * 0x08))
+#define IGC_RAH(_n)		(0x05404 + ((_n) * 0x08))
+#define IGC_VLAPQF		0x055B0  /* VLAN Priority Queue Filter VLAPQF */
+
+/* Transmit Register Descriptions */
+#define IGC_TCTL		0x00400  /* Tx Control - RW */
+#define IGC_TIPG		0x00410  /* Tx Inter-packet gap - RW */
+#define IGC_TDBAL(_n)		(0x0E000 + ((_n) * 0x40))
+#define IGC_TDBAH(_n)		(0x0E004 + ((_n) * 0x40))
+#define IGC_TDLEN(_n)		(0x0E008 + ((_n) * 0x40))
+#define IGC_TDH(_n)		(0x0E010 + ((_n) * 0x40))
+#define IGC_TDT(_n)		(0x0E018 + ((_n) * 0x40))
+#define IGC_TXDCTL(_n)		(0x0E028 + ((_n) * 0x40))
+
+/* MMD Register Descriptions */
+#define IGC_MMDAC		13 /* MMD Access Control */
+#define IGC_MMDAAD		14 /* MMD Access Address/Data */
+
+/* Good transmitted packets counter registers */
+#define IGC_PQGPTC(_n)		(0x010014 + (0x100 * (_n)))
+
+/* Statistics Register Descriptions */
+#define IGC_CRCERRS	0x04000  /* CRC Error Count - R/clr */
+#define IGC_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
+#define IGC_SYMERRS	0x04008  /* Symbol Error Count - R/clr */
+#define IGC_RXERRC	0x0400C  /* Receive Error Count - R/clr */
+#define IGC_MPC		0x04010  /* Missed Packet Count - R/clr */
+#define IGC_SCC		0x04014  /* Single Collision Count - R/clr */
+#define IGC_ECOL	0x04018  /* Excessive Collision Count - R/clr */
+#define IGC_MCC		0x0401C  /* Multiple Collision Count - R/clr */
+#define IGC_LATECOL	0x04020  /* Late Collision Count - R/clr */
+#define IGC_COLC	0x04028  /* Collision Count - R/clr */
+#define IGC_DC		0x04030  /* Defer Count - R/clr */
+#define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
+#define IGC_SEC		0x04038  /* Sequence Error Count - R/clr */
+#define IGC_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
+#define IGC_RLEC	0x04040  /* Receive Length Error Count - R/clr */
+#define IGC_XONRXC	0x04048  /* XON Rx Count - R/clr */
+#define IGC_XONTXC	0x0404C  /* XON Tx Count - R/clr */
+#define IGC_XOFFRXC	0x04050  /* XOFF Rx Count - R/clr */
+#define IGC_XOFFTXC	0x04054  /* XOFF Tx Count - R/clr */
+#define IGC_FCRUC	0x04058  /* Flow Control Rx Unsupported Count- R/clr */
+#define IGC_PRC64	0x0405C  /* Packets Rx (64 bytes) - R/clr */
+#define IGC_PRC127	0x04060  /* Packets Rx (65-127 bytes) - R/clr */
+#define IGC_PRC255	0x04064  /* Packets Rx (128-255 bytes) - R/clr */
+#define IGC_PRC511	0x04068  /* Packets Rx (255-511 bytes) - R/clr */
+#define IGC_PRC1023	0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
+#define IGC_PRC1522	0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
+#define IGC_GPRC	0x04074  /* Good Packets Rx Count - R/clr */
+#define IGC_BPRC	0x04078  /* Broadcast Packets Rx Count - R/clr */
+#define IGC_MPRC	0x0407C  /* Multicast Packets Rx Count - R/clr */
+#define IGC_GPTC	0x04080  /* Good Packets Tx Count - R/clr */
+#define IGC_GORCL	0x04088  /* Good Octets Rx Count Low - R/clr */
+#define IGC_GORCH	0x0408C  /* Good Octets Rx Count High - R/clr */
+#define IGC_GOTCL	0x04090  /* Good Octets Tx Count Low - R/clr */
+#define IGC_GOTCH	0x04094  /* Good Octets Tx Count High - R/clr */
+#define IGC_RNBC	0x040A0  /* Rx No Buffers Count - R/clr */
+#define IGC_RUC		0x040A4  /* Rx Undersize Count - R/clr */
+#define IGC_RFC		0x040A8  /* Rx Fragment Count - R/clr */
+#define IGC_ROC		0x040AC  /* Rx Oversize Count - R/clr */
+#define IGC_RJC		0x040B0  /* Rx Jabber Count - R/clr */
+#define IGC_MGTPRC	0x040B4  /* Management Packets Rx Count - R/clr */
+#define IGC_MGTPDC	0x040B8  /* Management Packets Dropped Count - R/clr */
+#define IGC_MGTPTC	0x040BC  /* Management Packets Tx Count - R/clr */
+#define IGC_TORL	0x040C0  /* Total Octets Rx Low - R/clr */
+#define IGC_TORH	0x040C4  /* Total Octets Rx High - R/clr */
+#define IGC_TOTL	0x040C8  /* Total Octets Tx Low - R/clr */
+#define IGC_TOTH	0x040CC  /* Total Octets Tx High - R/clr */
+#define IGC_TPR		0x040D0  /* Total Packets Rx - R/clr */
+#define IGC_TPT		0x040D4  /* Total Packets Tx - R/clr */
+#define IGC_PTC64	0x040D8  /* Packets Tx (64 bytes) - R/clr */
+#define IGC_PTC127	0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
+#define IGC_PTC255	0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
+#define IGC_PTC511	0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
+#define IGC_PTC1023	0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
+#define IGC_PTC1522	0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define IGC_MPTC	0x040F0  /* Multicast Packets Tx Count - R/clr */
+#define IGC_BPTC	0x040F4  /* Broadcast Packets Tx Count - R/clr */
+#define IGC_TSCTC	0x040F8  /* TCP Segmentation Context Tx - R/clr */
+#define IGC_TSCTFC	0x040FC  /* TCP Segmentation Context Tx Fail - R/clr */
+#define IGC_IAC		0x04100  /* Interrupt Assertion Count */
+#define IGC_ICTXPTC	0x0410C  /* Interrupt Cause Tx Pkt Timer Expire Count */
+#define IGC_ICTXATC	0x04110  /* Interrupt Cause Tx Abs Timer Expire Count */
+#define IGC_ICTXQEC	0x04118  /* Interrupt Cause Tx Queue Empty Count */
+#define IGC_ICTXQMTC	0x0411C  /* Interrupt Cause Tx Queue Min Thresh Count */
+#define IGC_RPTHC	0x04104  /* Rx Packets To Host */
+#define IGC_HGPTC	0x04118  /* Host Good Packets Tx Count */
+#define IGC_RXDMTC	0x04120  /* Rx Descriptor Minimum Threshold Count */
+#define IGC_HGORCL	0x04128  /* Host Good Octets Received Count Low */
+#define IGC_HGORCH	0x0412C  /* Host Good Octets Received Count High */
+#define IGC_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
+#define IGC_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
+#define IGC_LENERRS	0x04138  /* Length Errors Count */
+#define IGC_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
+
+/* Management registers */
+#define IGC_MANC	0x05820  /* Management Control - RW */
+
+/* Shadow Ram Write Register - RW */
+#define IGC_SRWR	0x12018
+
+/* forward declaration */
+struct igc_hw;
+u32 igc_rd32(struct igc_hw *hw, u32 reg);
+
+/* write operations, indexed using DWORDS */
+#define wr32(reg, val) \
+do { \
+	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+	if (!IGC_REMOVED(hw_addr)) \
+		writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+#define rd32(reg) (igc_rd32(hw, reg))
+
+#define wrfl() ((void)rd32(IGC_STATUS))
+
+#define array_wr32(reg, offset, value) \
+	wr32((reg) + ((offset) << 2), (value))
+
+#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+
+#endif

diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 7722153..0940a0d 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c

@@ -102,7 +102,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) PRO/10GbE Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
@@ -680,8 +680,8 @@
 	txdr->size = txdr->count * sizeof(struct ixgb_tx_desc);
 	txdr->size = ALIGN(txdr->size, 4096);
 
-	txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
-					 GFP_KERNEL);
+	txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+					GFP_KERNEL);
 	if (!txdr->desc) {
 		vfree(txdr->buffer_info);
 		return -ENOMEM;
@@ -763,8 +763,8 @@
 	rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc);
 	rxdr->size = ALIGN(rxdr->size, 4096);
 
-	rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
-					 GFP_KERNEL);
+	rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+					GFP_KERNEL);
 
 	if (!rxdr->desc) {
 		vfree(rxdr->buffer_info);
@@ -1331,9 +1331,7 @@
 	}
 
 	for (f = 0; f < nr_frags; f++) {
-		const struct skb_frag_struct *frag;
-
-		frag = &skb_shinfo(skb)->frags[f];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 		len = skb_frag_size(frag);
 		offset = 0;
 

diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 5414685..4fb0d9e 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile

@@ -8,7 +8,8 @@
 
 ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
               ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \
-              ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o
+              ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \
+              ixgbe_xsk.o
 
 ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
                               ixgbe_dcb_82599.o ixgbe_dcb_nl.o
@@ -16,4 +17,4 @@
 ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
 ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
 ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
-ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
+ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4fc906c..39e73ad 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h

@@ -12,6 +12,7 @@
 #include <linux/aer.h>
 #include <linux/if_vlan.h>
 #include <linux/jiffies.h>
+#include <linux/phy.h>
 
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
@@ -30,7 +31,6 @@
 #include "ixgbe_ipsec.h"
 
 #include <net/xdp.h>
-#include <net/busy_poll.h>
 
 /* common prefix used by pr_<> macros */
 #undef pr_fmt
@@ -50,8 +50,6 @@
 #define IXGBE_MAX_RXD			   4096
 #define IXGBE_MIN_RXD			     64
 
-#define IXGBE_ETH_P_LLDP		 0x88CC
-
 /* flow control */
 #define IXGBE_MIN_FCRTL			   0x40
 #define IXGBE_MAX_FCRTL			0x7FF80
@@ -228,13 +226,17 @@
 struct ixgbe_rx_buffer {
 	struct sk_buff *skb;
 	dma_addr_t dma;
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
-#else
-	__u16 page_offset;
-#endif
-	__u16 pagecnt_bias;
+	union {
+		struct {
+			struct page *page;
+			__u32 page_offset;
+			__u16 pagecnt_bias;
+		};
+		struct {
+			void *addr;
+			u64 handle;
+		};
+	};
 };
 
 struct ixgbe_queue_stats {
@@ -271,6 +273,7 @@
 	__IXGBE_TX_DETECT_HANG,
 	__IXGBE_HANG_CHECK_ARMED,
 	__IXGBE_TX_XDP_RING,
+	__IXGBE_TX_DISABLED,
 };
 
 #define ring_uses_build_skb(ring) \
@@ -347,6 +350,10 @@
 		struct ixgbe_rx_queue_stats rx_stats;
 	};
 	struct xdp_rxq_info xdp_rxq;
+	struct xdp_umem *xsk_umem;
+	struct zero_copy_allocator zca; /* ZC allocator anchor */
+	u16 ring_idx;		/* {rx,tx,xdp}_ring back reference idx */
+	u16 rx_buf_len;
 } ____cacheline_internodealigned_in_smp;
 
 enum ixgbe_ring_f_enum {
@@ -553,6 +560,7 @@
 	struct net_device *netdev;
 	struct bpf_prog *xdp_prog;
 	struct pci_dev *pdev;
+	struct mii_bus *mii_bus;
 
 	unsigned long state;
 
@@ -605,6 +613,7 @@
 #define IXGBE_FLAG2_EEE_ENABLED			BIT(15)
 #define IXGBE_FLAG2_RX_LEGACY			BIT(16)
 #define IXGBE_FLAG2_IPSEC_ENABLED		BIT(17)
+#define IXGBE_FLAG2_VF_IPSEC_ENABLED		BIT(18)
 
 	/* Tx fast path data */
 	int num_tx_queues;
@@ -624,6 +633,7 @@
 	/* XDP */
 	int num_xdp_queues;
 	struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES];
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled rings */
 
 	/* TX */
 	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
@@ -760,9 +770,9 @@
 #define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
 	u32 *rss_key;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 	struct ixgbe_ipsec *ipsec;
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
 };
 
 static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
@@ -994,7 +1004,7 @@
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
@@ -1003,15 +1013,30 @@
 		    struct sk_buff *skb);
 int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
 		   struct ixgbe_ipsec_tx_data *itd);
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf);
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
 #else
-static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
-static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
-static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { }
 static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 				  union ixgbe_adv_rx_desc *rx_desc,
-				  struct sk_buff *skb) { };
+				  struct sk_buff *skb) { }
 static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 				 struct ixgbe_tx_buffer *first,
-				 struct ixgbe_ipsec_tx_data *itd) { return 0; };
-#endif /* CONFIG_XFRM_OFFLOAD */
+				 struct ixgbe_ipsec_tx_data *itd) { return 0; }
+static inline void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter,
+					u32 vf) { }
+static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
+static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
+#endif /* CONFIG_IXGBE_IPSEC */
+
+static inline bool ixgbe_enabled_xdp_adapter(struct ixgbe_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
 #endif /* _IXGBE_H_ */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 1e49716..109f8de 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c

@@ -1048,7 +1048,7 @@
 	 * clear the multicast table.  Also reset num_rar_entries to 128,
 	 * since we modify this value when programming the SAN MAC address.
 	 */
-	hw->mac.num_rar_entries = 128;
+	hw->mac.num_rar_entries = IXGBE_82599_RAR_ENTRIES;
 	hw->mac.ops.init_rx_addrs(hw);
 
 	/* Store the permanent SAN mac address */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
index 50dfb02..171cdc5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c

@@ -190,22 +190,12 @@
 void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter)
 {
 	const char *name = pci_name(adapter->pdev);
-	struct dentry *pfile;
+
 	adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root);
-	if (adapter->ixgbe_dbg_adapter) {
-		pfile = debugfs_create_file("reg_ops", 0600,
-					    adapter->ixgbe_dbg_adapter, adapter,
-					    &ixgbe_dbg_reg_ops_fops);
-		if (!pfile)
-			e_dev_err("debugfs reg_ops for %s failed\n", name);
-		pfile = debugfs_create_file("netdev_ops", 0600,
-					    adapter->ixgbe_dbg_adapter, adapter,
-					    &ixgbe_dbg_netdev_ops_fops);
-		if (!pfile)
-			e_dev_err("debugfs netdev_ops for %s failed\n", name);
-	} else {
-		e_dev_err("debugfs entry for %s failed\n", name);
-	}
+	debugfs_create_file("reg_ops", 0600, adapter->ixgbe_dbg_adapter,
+			    adapter, &ixgbe_dbg_reg_ops_fops);
+	debugfs_create_file("netdev_ops", 0600, adapter->ixgbe_dbg_adapter,
+			    adapter, &ixgbe_dbg_netdev_ops_fops);
 }
 
 /**
@@ -224,8 +214,6 @@
 void ixgbe_dbg_init(void)
 {
 	ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL);
-	if (ixgbe_dbg_root == NULL)
-		pr_err("init of debugfs failed\n");
 }
 
 /**

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index e5a8461..7c52ae8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c

@@ -136,6 +136,8 @@
 static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = {
 #define IXGBE_PRIV_FLAGS_LEGACY_RX	BIT(0)
 	"legacy-rx",
+#define IXGBE_PRIV_FLAGS_VF_IPSEC_EN	BIT(1)
+	"vf-ipsec",
 };
 
 #define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings)
@@ -2204,7 +2206,8 @@
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
-	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
+	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE |
+			    WAKE_FILTER))
 		return -EOPNOTSUPP;
 
 	if (ixgbe_wol_exclusion(adapter, wol))
@@ -3223,7 +3226,8 @@
 		page_swap = true;
 	}
 
-	if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap) {
+	if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap ||
+	    !(addr_mode & IXGBE_SFF_DDM_IMPLEMENTED)) {
 		/* We have a SFP, but it does not support SFF-8472 */
 		modinfo->type = ETH_MODULE_SFF_8079;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
@@ -3409,6 +3413,9 @@
 	if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
 		priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX;
 
+	if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)
+		priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN;
+
 	return priv_flags;
 }
 
@@ -3421,6 +3428,10 @@
 	if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX)
 		flags2 |= IXGBE_FLAG2_RX_LEGACY;
 
+	flags2 &= ~IXGBE_FLAG2_VF_IPSEC_ENABLED;
+	if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN)
+		flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED;
+
 	if (flags2 != adapter->flags2) {
 		adapter->flags2 = flags2;
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index add124e..113f608 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c

@@ -4,6 +4,12 @@
 #include "ixgbe.h"
 #include <net/xfrm.h>
 #include <crypto/aead.h>
+#include <linux/if_bridge.h>
+
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs);
 
 /**
  * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
@@ -113,7 +119,6 @@
  **/
 static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 buf[4] = {0, 0, 0, 0};
 	u16 idx;
@@ -132,9 +137,6 @@
 		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
 		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
 	}
-
-	ipsec->num_rx_sa = 0;
-	ipsec->num_tx_sa = 0;
 }
 
 /**
@@ -290,6 +292,13 @@
 /**
  * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
  * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.
+ *
+ * Any VF entries are removed from the SW and HW tables since either
+ * (a) the VF also gets reset on PF reset and will ask again for the
+ * offloads, or (b) the VF has been removed by a change in the num_vfs.
  **/
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
 {
@@ -305,6 +314,28 @@
 	ixgbe_ipsec_clear_hw_tables(adapter);
 	ixgbe_ipsec_start_engine(adapter);
 
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+
+		if (r->used) {
+			if (r->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(r->xs);
+			else
+				ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi,
+						      r->key, r->salt,
+						      r->mode, r->iptbl_ind);
+		}
+
+		if (t->used) {
+			if (t->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(t->xs);
+			else
+				ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt);
+		}
+	}
+
 	/* reload the IP addrs */
 	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
 		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
@@ -312,20 +343,6 @@
 		if (ipsa->used)
 			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
 	}
-
-	/* reload the Rx and Tx keys */
-	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
-		struct rx_sa *rsa = &ipsec->rx_tbl[i];
-		struct tx_sa *tsa = &ipsec->tx_tbl[i];
-
-		if (rsa->used)
-			ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
-					      rsa->key, rsa->salt,
-					      rsa->mode, rsa->iptbl_ind);
-
-		if (tsa->used)
-			ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
-	}
 }
 
 /**
@@ -382,6 +399,8 @@
 	rcu_read_lock();
 	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
 				   (__force u32)spi) {
+		if (rsa->mode & IXGBE_RXTXMOD_VF)
+			continue;
 		if (spi == rsa->xs->id.spi &&
 		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
 		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
@@ -411,7 +430,6 @@
 	struct net_device *dev = xs->xso.dev;
 	unsigned char *key_data;
 	char *alg_name = NULL;
-	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 	int key_len;
 
 	if (!xs->aead) {
@@ -439,9 +457,9 @@
 	 * we don't need to do any byteswapping.
 	 * 160 accounts for 16 byte key and 4 byte salt
 	 */
-	if (key_len == 160) {
+	if (key_len == IXGBE_IPSEC_KEY_BITS) {
 		*mysalt = ((u32 *)key_data)[4];
-	} else if (key_len != 128) {
+	} else if (key_len != (IXGBE_IPSEC_KEY_BITS - (sizeof(*mysalt) * 8))) {
 		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
 		return -EINVAL;
 	} else {
@@ -676,7 +694,8 @@
 	} else {
 		struct tx_sa tsa;
 
-		if (adapter->num_vfs)
+		if (adapter->num_vfs &&
+		    adapter->bridge_mode != BRIDGE_MODE_VEPA)
 			return -EOPNOTSUPP;
 
 		/* find the first unused index */
@@ -814,6 +833,226 @@
 };
 
 /**
+ * ixgbe_ipsec_vf_clear - clear the tables of data for a VF
+ * @adapter: board private structure
+ * @vf: VF id to be removed
+ **/
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	int i;
+
+	if (!ipsec)
+		return;
+
+	/* search rx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
+		if (!ipsec->rx_tbl[i].used)
+			continue;
+		if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->rx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs);
+	}
+
+	/* search tx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_tx_sa; i++) {
+		if (!ipsec->tx_tbl[i].used)
+			continue;
+		if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->tx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs);
+	}
+}
+
+/**
+ * ixgbe_ipsec_vf_add_sa - translate VF request to SA add
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Make up a new xs and algorithm info from the data sent by the VF.
+ * We only need to sketch in just enough to set up the HW offload.
+ * Put the resulting offload_handle into the return message to the VF.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_algo_desc *algo;
+	struct sa_mbx_msg *sam;
+	struct xfrm_state *xs;
+	size_t aead_len;
+	u16 sa_idx;
+	u32 pfsa;
+	int err;
+
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	if (!adapter->vfinfo[vf].trusted ||
+	    !(adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)) {
+		e_warn(drv, "VF %d attempted to add an IPsec SA\n", vf);
+		err = -EACCES;
+		goto err_out;
+	}
+
+	/* Tx IPsec offload doesn't seem to work on this
+	 * device, so block these requests for now.
+	 */
+	if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
+	xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+	if (unlikely(!xs)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	xs->xso.flags = sam->flags;
+	xs->id.spi = sam->spi;
+	xs->id.proto = sam->proto;
+	xs->props.family = sam->family;
+	if (xs->props.family == AF_INET6)
+		memcpy(&xs->id.daddr.a6, sam->addr, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
+	xs->xso.dev = adapter->netdev;
+
+	algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+	if (unlikely(!algo)) {
+		err = -ENOENT;
+		goto err_xs;
+	}
+
+	aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
+	xs->aead = kzalloc(aead_len, GFP_KERNEL);
+	if (unlikely(!xs->aead)) {
+		err = -ENOMEM;
+		goto err_xs;
+	}
+
+	xs->props.ealgo = algo->desc.sadb_alg_id;
+	xs->geniv = algo->uinfo.aead.geniv;
+	xs->aead->alg_icv_len = IXGBE_IPSEC_AUTH_BITS;
+	xs->aead->alg_key_len = IXGBE_IPSEC_KEY_BITS;
+	memcpy(xs->aead->alg_key, sam->key, sizeof(sam->key));
+	memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name));
+
+	/* set up the HW offload */
+	err = ixgbe_ipsec_add_sa(xs);
+	if (err)
+		goto err_aead;
+
+	pfsa = xs->xso.offload_handle;
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		ipsec->rx_tbl[sa_idx].vf = vf;
+		ipsec->rx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	} else {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		ipsec->tx_tbl[sa_idx].vf = vf;
+		ipsec->tx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	}
+
+	msgbuf[1] = xs->xso.offload_handle;
+
+	return 0;
+
+err_aead:
+	kzfree(xs->aead);
+err_xs:
+	kzfree(xs);
+err_out:
+	msgbuf[1] = err;
+	return err;
+}
+
+/**
+ * ixgbe_ipsec_vf_del_sa - translate VF request to SA delete
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Given the offload_handle sent by the VF, look for the related SA table
+ * entry and use its xs field to call for a delete of the SA.
+ *
+ * Note: We silently ignore requests to delete entries that are already
+ *       set to unused because when a VF is set to "DOWN", the PF first
+ *       gets a reset and clears all the VF's entries; then the VF's
+ *       XFRM stack sends individual deletes for each entry, which the
+ *       reset already removed.  In the future it might be good to try to
+ *       optimize this so not so many unnecessary delete messages are sent.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	u32 pfsa = msgbuf[1];
+	u16 sa_idx;
+
+	if (!adapter->vfinfo[vf].trusted) {
+		e_err(drv, "vf %d attempted to delete an SA\n", vf);
+		return -EPERM;
+	}
+
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		struct rx_sa *rsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		rsa = &ipsec->rx_tbl[sa_idx];
+
+		if (!rsa->used)
+			return 0;
+
+		if (!(rsa->mode & IXGBE_RXTXMOD_VF) ||
+		    rsa->vf != vf) {
+			e_err(drv, "vf %d bad Rx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->rx_tbl[sa_idx].xs;
+	} else {
+		struct tx_sa *tsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		tsa = &ipsec->tx_tbl[sa_idx];
+
+		if (!tsa->used)
+			return 0;
+
+		if (!(tsa->mode & IXGBE_RXTXMOD_VF) ||
+		    tsa->vf != vf) {
+			e_err(drv, "vf %d bad Tx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->tx_tbl[sa_idx].xs;
+	}
+
+	ixgbe_ipsec_del_sa(xs);
+
+	/* remove the xs that was made-up in the add request */
+	kzfree(xs);
+
+	return 0;
+}
+
+/**
  * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
  * @tx_ring: outgoing context
  * @first: current data packet
@@ -826,11 +1065,13 @@
 	struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
 	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	struct xfrm_state *xs;
+	struct sec_path *sp;
 	struct tx_sa *tsa;
 
-	if (unlikely(!first->skb->sp->len)) {
+	sp = skb_sec_path(first->skb);
+	if (unlikely(!sp->len)) {
 		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
-			   __func__, first->skb->sp->len);
+			   __func__, sp->len);
 		return 0;
 	}
 
@@ -920,6 +1161,7 @@
 	struct xfrm_state *xs = NULL;
 	struct ipv6hdr *ip6 = NULL;
 	struct iphdr *ip4 = NULL;
+	struct sec_path *sp;
 	void *daddr;
 	__be32 spi;
 	u8 *c_hdr;
@@ -959,12 +1201,12 @@
 	if (unlikely(!xs))
 		return;
 
-	skb->sp = secpath_dup(skb->sp);
-	if (unlikely(!skb->sp))
+	sp = secpath_set(skb);
+	if (unlikely(!sp))
 		return;
 
-	skb->sp->xvec[skb->sp->len++] = xs;
-	skb->sp->olen++;
+	sp->xvec[sp->len++] = xs;
+	sp->olen++;
 	xo = xfrm_offload(skb);
 	xo->flags = CRYPTO_DONE;
 	xo->status = CRYPTO_SUCCESS;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index 9ef7faa..d2b64ff 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h

@@ -26,6 +26,7 @@
 #define IXGBE_RXMOD_PROTO_ESP		0x00000004
 #define IXGBE_RXMOD_DECRYPT		0x00000008
 #define IXGBE_RXMOD_IPV6		0x00000010
+#define IXGBE_RXTXMOD_VF		0x00000020
 
 struct rx_sa {
 	struct hlist_node hlist;
@@ -37,6 +38,7 @@
 	u8  iptbl_ind;
 	bool used;
 	bool decrypt;
+	u32 vf;
 };
 
 struct rx_ip_sa {
@@ -49,8 +51,10 @@
 	struct xfrm_state *xs;
 	u32 key[4];
 	u32 salt;
+	u32 mode;
 	bool encrypt;
 	bool used;
+	u32 vf;
 };
 
 struct ixgbe_ipsec_tx_data {
@@ -67,4 +71,13 @@
 	struct tx_sa *tx_tbl;
 	DECLARE_HASHTABLE(rx_sa_list, 10);
 };
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 flags;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
 #endif /* _IXGBE_IPSEC_H_ */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index d361f57..cc3196a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c

@@ -836,12 +836,10 @@
 	struct ixgbe_ring *ring;
 	int node = NUMA_NO_NODE;
 	int cpu = -1;
-	int ring_count, size;
+	int ring_count;
 	u8 tcs = adapter->hw_tcs;
 
 	ring_count = txr_count + rxr_count + xdp_count;
-	size = sizeof(struct ixgbe_q_vector) +
-	       (sizeof(struct ixgbe_ring) * ring_count);
 
 	/* customize cpu for Flow Director mapping */
 	if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
@@ -855,9 +853,11 @@
 	}
 
 	/* allocate q_vector and rings */
-	q_vector = kzalloc_node(size, GFP_KERNEL, node);
+	q_vector = kzalloc_node(struct_size(q_vector, ring, ring_count),
+				GFP_KERNEL, node);
 	if (!q_vector)
-		q_vector = kzalloc(size, GFP_KERNEL);
+		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+				   GFP_KERNEL);
 	if (!q_vector)
 		return -ENOMEM;
 
@@ -1055,7 +1055,7 @@
 	int txr_remaining = adapter->num_tx_queues;
 	int xdp_remaining = adapter->num_xdp_queues;
 	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
-	int err;
+	int err, i;
 
 	/* only one q_vector if MSI-X is disabled. */
 	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
@@ -1097,6 +1097,21 @@
 		xdp_idx += xqpv;
 	}
 
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		if (adapter->rx_ring[i])
+			adapter->rx_ring[i]->ring_idx = i;
+	}
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		if (adapter->tx_ring[i])
+			adapter->tx_ring[i]->ring_idx = i;
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		if (adapter->xdp_ring[i])
+			adapter->xdp_ring[i]->ring_idx = i;
+	}
+
 	return 0;
 
 err_out:

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6cdd58d..91b3780 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

@@ -27,6 +27,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/atomic.h>
+#include <linux/numa.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <net/udp_tunnel.h>
 #include <net/pkt_cls.h>
@@ -34,12 +35,16 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/vxlan.h>
 #include <net/mpls.h>
+#include <net/xdp_sock.h>
+#include <net/xfrm.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
 #include "ixgbe_dcb_82599.h"
+#include "ixgbe_phy.h"
 #include "ixgbe_sriov.h"
 #include "ixgbe_model.h"
+#include "ixgbe_txrx_common.h"
 
 char ixgbe_driver_name[] = "ixgbe";
 static const char ixgbe_driver_string[] =
@@ -159,7 +164,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 static struct workqueue_struct *ixgbe_wq;
@@ -893,8 +898,8 @@
 	}
 }
 
-static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
-					  u64 qmask)
+void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
+			    u64 qmask)
 {
 	u32 mask;
 
@@ -1673,9 +1678,9 @@
  * order to populate the hash, checksum, VLAN, timestamp, protocol, and
  * other fields within the skb.
  **/
-static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
-				     union ixgbe_adv_rx_desc *rx_desc,
-				     struct sk_buff *skb)
+void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
+			      union ixgbe_adv_rx_desc *rx_desc,
+			      struct sk_buff *skb)
 {
 	struct net_device *dev = rx_ring->netdev;
 	u32 flags = rx_ring->q_vector->adapter->flags;
@@ -1708,8 +1713,8 @@
 	skb->protocol = eth_type_trans(skb, dev);
 }
 
-static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
-			 struct sk_buff *skb)
+void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
+		  struct sk_buff *skb)
 {
 	napi_gro_receive(&q_vector->napi, skb);
 }
@@ -1781,7 +1786,7 @@
 static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
 			    struct sk_buff *skb)
 {
-	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
 	unsigned char *va;
 	unsigned int pull_len;
 
@@ -1796,14 +1801,14 @@
 	 * we need the header to contain the greater of either ETH_HLEN or
 	 * 60 bytes if the skb->len is less than 60 for skb_pad.
 	 */
-	pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE);
+	pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
 
 	/* update all of the pointers */
 	skb_frag_size_sub(frag, pull_len);
-	frag->page_offset += pull_len;
+	skb_frag_off_add(frag, pull_len);
 	skb->data_len -= pull_len;
 	skb->tail += pull_len;
 }
@@ -1821,13 +1826,7 @@
 static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
 				struct sk_buff *skb)
 {
-	/* if the page was released unmap it, else just sync our portion */
-	if (unlikely(IXGBE_CB(skb)->page_released)) {
-		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
-				     ixgbe_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE,
-				     IXGBE_RX_DMA_ATTR);
-	} else if (ring_uses_build_skb(rx_ring)) {
+	if (ring_uses_build_skb(rx_ring)) {
 		unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
 
 		dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -1836,14 +1835,22 @@
 					      skb_headlen(skb),
 					      DMA_FROM_DEVICE);
 	} else {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
 
 		dma_sync_single_range_for_cpu(rx_ring->dev,
 					      IXGBE_CB(skb)->dma,
-					      frag->page_offset,
+					      skb_frag_off(frag),
 					      skb_frag_size(frag),
 					      DMA_FROM_DEVICE);
 	}
+
+	/* If the page was released, just unmap it. */
+	if (unlikely(IXGBE_CB(skb)->page_released)) {
+		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
+				     ixgbe_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBE_RX_DMA_ATTR);
+	}
 }
 
 /**
@@ -1868,9 +1875,9 @@
  *
  * Returns true if an error was encountered and skb was freed.
  **/
-static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
-				  union ixgbe_adv_rx_desc *rx_desc,
-				  struct sk_buff *skb)
+bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
+			   union ixgbe_adv_rx_desc *rx_desc,
+			   struct sk_buff *skb)
 {
 	struct net_device *netdev = rx_ring->netdev;
 
@@ -2186,14 +2193,6 @@
 	return skb;
 }
 
-#define IXGBE_XDP_PASS		0
-#define IXGBE_XDP_CONSUMED	BIT(0)
-#define IXGBE_XDP_TX		BIT(1)
-#define IXGBE_XDP_REDIR		BIT(2)
-
-static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
-			       struct xdp_frame *xdpf);
-
 static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 				     struct ixgbe_ring *rx_ring,
 				     struct xdp_buff *xdp)
@@ -2625,7 +2624,7 @@
 		/* 16K ints/sec to 9.2K ints/sec */
 		avg_wire_size *= 15;
 		avg_wire_size += 11452;
-	} else if (avg_wire_size <= 1980) {
+	} else if (avg_wire_size < 1968) {
 		/* 9.2K ints/sec to 8K ints/sec */
 		avg_wire_size *= 5;
 		avg_wire_size += 22420;
@@ -2658,6 +2657,8 @@
 	case IXGBE_LINK_SPEED_2_5GB_FULL:
 	case IXGBE_LINK_SPEED_1GB_FULL:
 	case IXGBE_LINK_SPEED_10_FULL:
+		if (avg_wire_size > 8064)
+			avg_wire_size = 8064;
 		itr += DIV_ROUND_UP(avg_wire_size,
 				    IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
 		       IXGBE_ITR_ADAPTIVE_MIN_INC;
@@ -3167,7 +3168,11 @@
 #endif
 
 	ixgbe_for_each_ring(ring, q_vector->tx) {
-		if (!ixgbe_clean_tx_irq(q_vector, ring, budget))
+		bool wd = ring->xsk_umem ?
+			  ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
+			  ixgbe_clean_tx_irq(q_vector, ring, budget);
+
+		if (!wd)
 			clean_complete = false;
 	}
 
@@ -3183,7 +3188,10 @@
 		per_ring_budget = budget;
 
 	ixgbe_for_each_ring(ring, q_vector->rx) {
-		int cleaned = ixgbe_clean_rx_irq(q_vector, ring,
+		int cleaned = ring->xsk_umem ?
+			      ixgbe_clean_rx_irq_zc(q_vector, ring,
+						    per_ring_budget) :
+			      ixgbe_clean_rx_irq(q_vector, ring,
 						 per_ring_budget);
 
 		work_done += cleaned;
@@ -3475,6 +3483,10 @@
 	u32 txdctl = IXGBE_TXDCTL_ENABLE;
 	u8 reg_idx = ring->reg_idx;
 
+	ring->xsk_umem = NULL;
+	if (ring_is_xdp(ring))
+		ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
+
 	/* disable queue to avoid issues while updating state */
 	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
 	IXGBE_WRITE_FLUSH(hw);
@@ -3579,12 +3591,18 @@
 		else
 			mtqc |= IXGBE_MTQC_64VF;
 	} else {
-		if (tcs > 4)
+		if (tcs > 4) {
 			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
-		else if (tcs > 1)
+		} else if (tcs > 1) {
 			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
-		else
-			mtqc = IXGBE_MTQC_64Q_1PB;
+		} else {
+			u8 max_txq = adapter->num_tx_queues +
+				adapter->num_xdp_queues;
+			if (max_txq > 63)
+				mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
+			else
+				mtqc = IXGBE_MTQC_64Q_1PB;
+		}
 	}
 
 	IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
@@ -3707,10 +3725,27 @@
 	srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
 
 	/* configure the packet buffer length */
-	if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state))
+	if (rx_ring->xsk_umem) {
+		u32 xsk_buf_len = rx_ring->xsk_umem->chunk_size_nohr -
+				  XDP_PACKET_HEADROOM;
+
+		/* If the MAC support setting RXDCTL.RLPML, the
+		 * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
+		 * RXDCTL.RLPML is set to the actual UMEM buffer
+		 * size. If not, then we are stuck with a 1k buffer
+		 * size resolution. In this case frames larger than
+		 * the UMEM buffer size viewed in a 1k resolution will
+		 * be dropped.
+		 */
+		if (hw->mac.type != ixgbe_mac_82599EB)
+			srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+		else
+			srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	} else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) {
 		srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-	else
+	} else {
 		srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	}
 
 	/* configure descriptor type */
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -3924,8 +3959,11 @@
 			else
 				mrqc = IXGBE_MRQC_VMDQRSS64EN;
 
-			/* Enable L3/L4 for Tx Switched packets */
-			mrqc |= IXGBE_MRQC_L3L4TXSWEN;
+			/* Enable L3/L4 for Tx Switched packets only for X550,
+			 * older devices do not support this feature
+			 */
+			if (hw->mac.type >= ixgbe_mac_X550)
+				mrqc |= IXGBE_MRQC_L3L4TXSWEN;
 		} else {
 			if (tcs > 4)
 				mrqc = IXGBE_MRQC_RTRSS8TCEN;
@@ -4033,6 +4071,19 @@
 	u32 rxdctl;
 	u8 reg_idx = ring->reg_idx;
 
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
+	if (ring->xsk_umem) {
+		ring->zca.free = ixgbe_zca_free;
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_ZERO_COPY,
+						   &ring->zca));
+
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_PAGE_SHARED, NULL));
+	}
+
 	/* disable queue to avoid use of these values while updating state */
 	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
 	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
@@ -4082,6 +4133,17 @@
 #endif
 	}
 
+	if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
+		u32 xsk_buf_len = ring->xsk_umem->chunk_size_nohr -
+				  XDP_PACKET_HEADROOM;
+
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+		rxdctl |= xsk_buf_len | IXGBE_RXDCTL_RLPML_EN;
+
+		ring->rx_buf_len = xsk_buf_len;
+	}
+
 	/* initialize rx_buffer_info */
 	memset(ring->rx_buffer_info, 0,
 	       sizeof(struct ixgbe_rx_buffer) * ring->count);
@@ -4095,7 +4157,10 @@
 	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
 
 	ixgbe_rx_desc_queue_enable(adapter, ring);
-	ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
+	if (ring->xsk_umem)
+		ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
+	else
+		ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
 }
 
 static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
@@ -4245,7 +4310,6 @@
 		if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state))
 			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 
-		clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
 		if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
 			continue;
 
@@ -5175,6 +5239,7 @@
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *filter;
+	u64 action;
 
 	spin_lock(&adapter->fdir_perfect_lock);
 
@@ -5183,12 +5248,17 @@
 
 	hlist_for_each_entry_safe(filter, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
+		action = filter->action;
+		if (action != IXGBE_FDIR_DROP_QUEUE && action != 0)
+			action =
+			(action >> ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF) - 1;
+
 		ixgbe_fdir_write_perfect_filter_82599(hw,
 				&filter->filter,
 				filter->sw_idx,
-				(filter->action == IXGBE_FDIR_DROP_QUEUE) ?
+				(action == IXGBE_FDIR_DROP_QUEUE) ?
 				IXGBE_FDIR_DROP_QUEUE :
-				adapter->rx_ring[filter->action]->reg_idx);
+				adapter->rx_ring[action]->reg_idx);
 	}
 
 	spin_unlock(&adapter->fdir_perfect_lock);
@@ -5203,6 +5273,11 @@
 	u16 i = rx_ring->next_to_clean;
 	struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
 
+	if (rx_ring->xsk_umem) {
+		ixgbe_xsk_clean_rx_ring(rx_ring);
+		goto skip_free;
+	}
+
 	/* Free all the Rx ring sk_buffs */
 	while (i != rx_ring->next_to_alloc) {
 		if (rx_buffer->skb) {
@@ -5241,6 +5316,7 @@
 		}
 	}
 
+skip_free:
 	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
@@ -5885,6 +5961,11 @@
 	u16 i = tx_ring->next_to_clean;
 	struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 
+	if (tx_ring->xsk_umem) {
+		ixgbe_xsk_clean_tx_ring(tx_ring);
+		goto out;
+	}
+
 	while (i != tx_ring->next_to_use) {
 		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
 
@@ -5936,6 +6017,7 @@
 	if (!ring_is_xdp(tx_ring))
 		netdev_tx_reset_queue(txring_txq(tx_ring));
 
+out:
 	/* reset next_to_use and next_to_clean */
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
@@ -6004,9 +6086,9 @@
 	/* Disable Rx */
 	ixgbe_disable_rx(adapter);
 
-	/* synchronize_sched() needed for pending XDP buffers to drain */
+	/* synchronize_rcu() needed for pending XDP buffers to drain */
 	if (adapter->xdp_ring[0])
-		synchronize_sched();
+		synchronize_rcu();
 
 	ixgbe_irq_disable(adapter);
 
@@ -6210,6 +6292,10 @@
 	if (ixgbe_init_rss_key(adapter))
 		return -ENOMEM;
 
+	adapter->af_xdp_zc_qps = bitmap_zalloc(MAX_XDP_QUEUES, GFP_KERNEL);
+	if (!adapter->af_xdp_zc_qps)
+		return -ENOMEM;
+
 	/* Set MAC specific capability flags and exceptions */
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
@@ -6341,7 +6427,7 @@
 {
 	struct device *dev = tx_ring->dev;
 	int orig_node = dev_to_node(dev);
-	int ring_node = -1;
+	int ring_node = NUMA_NO_NODE;
 	int size;
 
 	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
@@ -6435,8 +6521,8 @@
 {
 	struct device *dev = rx_ring->dev;
 	int orig_node = dev_to_node(dev);
-	int ring_node = -1;
-	int size, err;
+	int ring_node = NUMA_NO_NODE;
+	int size;
 
 	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
 
@@ -6473,13 +6559,6 @@
 			     rx_ring->queue_index) < 0)
 		goto err;
 
-	err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
-					 MEM_TYPE_PAGE_SHARED, NULL);
-	if (err) {
-		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-		goto err;
-	}
-
 	rx_ring->xdp_prog = adapter->xdp_prog;
 
 	return 0;
@@ -7777,6 +7856,33 @@
 }
 
 /**
+ * ixgbe_check_fw_error - Check firmware for errors
+ * @adapter: the adapter private structure
+ *
+ * Check firmware errors in register FWSM
+ */
+static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 fwsm;
+
+	/* read fwsm.ext_err_ind register and log errors */
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+
+	if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
+	    !(fwsm & IXGBE_FWSM_FW_VAL_BIT))
+		e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
+			   fwsm);
+
+	if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
+		e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
+		return true;
+	}
+
+	return false;
+}
+
+/**
  * ixgbe_service_task - manages and runs subtasks
  * @work: pointer to work_struct containing our data
  **/
@@ -7794,6 +7900,12 @@
 		ixgbe_service_event_complete(adapter);
 		return;
 	}
+	if (ixgbe_check_fw_error(adapter)) {
+		if (!test_bit(__IXGBE_DOWN, &adapter->state))
+			unregister_netdev(adapter->netdev);
+		ixgbe_service_event_complete(adapter);
+		return;
+	}
 	if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) {
 		rtnl_lock();
 		adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
@@ -8068,9 +8180,6 @@
 	return __ixgbe_maybe_stop_tx(tx_ring, size);
 }
 
-#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
-		       IXGBE_TXD_CMD_RS)
-
 static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 			struct ixgbe_tx_buffer *first,
 			const u8 hdr_len)
@@ -8078,7 +8187,7 @@
 	struct sk_buff *skb = first->skb;
 	struct ixgbe_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	dma_addr_t dma;
 	unsigned int data_len, size;
 	u32 tx_flags = first->tx_flags;
@@ -8170,6 +8279,8 @@
 	/* set the timestamp */
 	first->time_stamp = jiffies;
 
+	skb_tx_timestamp(skb);
+
 	/*
 	 * Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
@@ -8191,13 +8302,8 @@
 
 	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
-	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return 0;
@@ -8377,8 +8483,7 @@
 
 #ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      struct net_device *sb_dev,
-			      select_queue_fallback_t fallback)
+			      struct net_device *sb_dev)
 {
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
@@ -8408,7 +8513,7 @@
 			break;
 		/* fall through */
 	default:
-		return fallback(dev, skb, sb_dev);
+		return netdev_pick_tx(dev, skb, sb_dev);
 	}
 
 	f = &adapter->ring_feature[RING_F_FCOE];
@@ -8423,8 +8528,8 @@
 }
 
 #endif
-static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
-			       struct xdp_frame *xdpf)
+int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
+			struct xdp_frame *xdpf)
 {
 	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
 	struct ixgbe_tx_buffer *tx_buffer;
@@ -8501,7 +8606,8 @@
 	 * otherwise try next time
 	 */
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
-		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
 
 	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
 		tx_ring->tx_stats.tx_busy++;
@@ -8547,8 +8653,6 @@
 		}
 	}
 
-	skb_tx_timestamp(skb);
-
 #ifdef CONFIG_PCI_IOV
 	/*
 	 * Use the l2switch_enable flag - would be false if the DMA
@@ -8595,8 +8699,9 @@
 
 #endif /* IXGBE_FCOE */
 
-#ifdef CONFIG_XFRM_OFFLOAD
-	if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+#ifdef CONFIG_IXGBE_IPSEC
+	if (xfrm_offload(skb) &&
+	    !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
 		goto out_drop;
 #endif
 	tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
@@ -8645,7 +8750,9 @@
 	if (skb_put_padto(skb, 17))
 		return NETDEV_TX_OK;
 
-	tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];
+	tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)];
+	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state)))
+		return NETDEV_TX_BUSY;
 
 	return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
 }
@@ -8688,6 +8795,15 @@
 	u16 value;
 	int rc;
 
+	if (adapter->mii_bus) {
+		int regnum = addr;
+
+		if (devad != MDIO_DEVAD_NONE)
+			regnum |= (devad << 16) | MII_ADDR_C45;
+
+		return mdiobus_read(adapter->mii_bus, prtad, regnum);
+	}
+
 	if (prtad != hw->phy.mdio.prtad)
 		return -EINVAL;
 	rc = hw->phy.ops.read_reg(hw, addr, devad, &value);
@@ -8702,6 +8818,15 @@
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 
+	if (adapter->mii_bus) {
+		int regnum = addr;
+
+		if (devad != MDIO_DEVAD_NONE)
+			regnum |= (devad << 16) | MII_ADDR_C45;
+
+		return mdiobus_write(adapter->mii_bus, prtad, regnum, value);
+	}
+
 	if (prtad != hw->phy.mdio.prtad)
 		return -EINVAL;
 	return hw->phy.ops.write_reg(hw, addr, devad, value);
@@ -9367,6 +9492,10 @@
 				jump->mat = nexthdr[i].jump;
 				adapter->jump_tables[link_uhtid] = jump;
 				break;
+			} else {
+				kfree(mask);
+				kfree(input);
+				kfree(jump);
 			}
 		}
 		return 0;
@@ -9484,27 +9613,6 @@
 	}
 }
 
-static int ixgbe_setup_tc_block(struct net_device *dev,
-				struct tc_block_offload *f)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
-					     adapter, adapter, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
-					adapter);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int ixgbe_setup_tc_mqprio(struct net_device *dev,
 				 struct tc_mqprio_qopt *mqprio)
 {
@@ -9512,12 +9620,19 @@
 	return ixgbe_setup_tc(dev, mqprio->num_tc);
 }
 
+static LIST_HEAD(ixgbe_block_cb_list);
+
 static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			    void *type_data)
 {
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return ixgbe_setup_tc_block(dev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &ixgbe_block_cb_list,
+						  ixgbe_setup_tc_block_cb,
+						  adapter, adapter, true);
 	case TC_SETUP_QDISC_MQPRIO:
 		return ixgbe_setup_tc_mqprio(dev, type_data);
 	default:
@@ -9671,7 +9786,7 @@
 			    NETIF_F_HW_VLAN_CTAG_FILTER))
 		ixgbe_set_rx_mode(netdev);
 
-	return 0;
+	return 1;
 }
 
 /**
@@ -9789,7 +9904,8 @@
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
 			     const unsigned char *addr, u16 vid,
-			     u16 flags)
+			     u16 flags,
+			     struct netlink_ext_ack *extack)
 {
 	/* guarantee we can provide a unique filter for the unicast address */
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
@@ -9878,7 +9994,8 @@
 }
 
 static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
-				    struct nlmsghdr *nlh, u16 flags)
+				    struct nlmsghdr *nlh, u16 flags,
+				    struct netlink_ext_ack *extack)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
@@ -10089,8 +10206,8 @@
 	 * the TSO, so it's the exception.
 	 */
 	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
-#ifdef CONFIG_XFRM_OFFLOAD
-		if (!skb->sp)
+#ifdef CONFIG_IXGBE_IPSEC
+		if (!secpath_exists(skb))
 #endif
 			features &= ~NETIF_F_TSO;
 	}
@@ -10103,6 +10220,7 @@
 	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct bpf_prog *old_prog;
+	bool need_reset;
 
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
 		return -EINVAL;
@@ -10125,9 +10243,10 @@
 		return -ENOMEM;
 
 	old_prog = xchg(&adapter->xdp_prog, prog);
+	need_reset = (!!prog != !!old_prog);
 
 	/* If transitioning XDP modes reconfigure rings */
-	if (!!prog != !!old_prog) {
+	if (need_reset) {
 		int err = ixgbe_setup_tc(dev, adapter->hw_tcs);
 
 		if (err) {
@@ -10143,6 +10262,15 @@
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
+	/* Kick start the NAPI context if there is an AF_XDP socket open
+	 * on that queue id. This so that receiving will start.
+	 */
+	if (need_reset && prog)
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			if (adapter->xdp_ring[i]->xsk_umem)
+				(void)ixgbe_xsk_wakeup(adapter->netdev, i,
+						       XDP_WAKEUP_RX);
+
 	return 0;
 }
 
@@ -10157,12 +10285,16 @@
 		xdp->prog_id = adapter->xdp_prog ?
 			adapter->xdp_prog->aux->id : 0;
 		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
+					    xdp->xsk.queue_id);
+
 	default:
 		return -EINVAL;
 	}
 }
 
-static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
+void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
 {
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
@@ -10192,6 +10324,9 @@
 	if (unlikely(!ring))
 		return -ENXIO;
 
+	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state)))
+		return -ENXIO;
+
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
 		int err;
@@ -10253,8 +10388,163 @@
 	.ndo_features_check	= ixgbe_features_check,
 	.ndo_bpf		= ixgbe_xdp,
 	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
+	.ndo_xsk_wakeup         = ixgbe_xsk_wakeup,
 };
 
+static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
+				 struct ixgbe_ring *tx_ring)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = tx_ring->reg_idx;
+	int wait_loop;
+	u32 txdctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+
+	/* delay mechanism from ixgbe_disable_tx */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+
+		if (!(txdctl & IXGBE_TXDCTL_ENABLE))
+			return;
+	}
+
+	e_err(drv, "TXDCTL.ENABLE not cleared within the polling period\n");
+}
+
+static void ixgbe_disable_txr(struct ixgbe_adapter *adapter,
+			      struct ixgbe_ring *tx_ring)
+{
+	set_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
+	ixgbe_disable_txr_hw(adapter, tx_ring);
+}
+
+static void ixgbe_disable_rxr_hw(struct ixgbe_adapter *adapter,
+				 struct ixgbe_ring *rx_ring)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = rx_ring->reg_idx;
+	int wait_loop;
+	u32 rxdctl;
+
+	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+	rxdctl |= IXGBE_RXDCTL_SWFLSH;
+
+	/* write value back with RXDCTL.ENABLE bit cleared */
+	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+
+	/* RXDCTL.EN may not change on 82598 if link is down, so skip it */
+	if (hw->mac.type == ixgbe_mac_82598EB &&
+	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		return;
+
+	/* delay mechanism from ixgbe_disable_rx */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+
+		if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
+			return;
+	}
+
+	e_err(drv, "RXDCTL.ENABLE not cleared within the polling period\n");
+}
+
+static void ixgbe_reset_txr_stats(struct ixgbe_ring *tx_ring)
+{
+	memset(&tx_ring->stats, 0, sizeof(tx_ring->stats));
+	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring)
+{
+	memset(&rx_ring->stats, 0, sizeof(rx_ring->stats));
+	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+}
+
+/**
+ * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings
+ * @adapter: adapter structure
+ * @ring: ring index
+ *
+ * This function disables a certain Rx/Tx/XDP Tx ring. The function
+ * assumes that the netdev is running.
+ **/
+void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
+{
+	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
+
+	rx_ring = adapter->rx_ring[ring];
+	tx_ring = adapter->tx_ring[ring];
+	xdp_ring = adapter->xdp_ring[ring];
+
+	ixgbe_disable_txr(adapter, tx_ring);
+	if (xdp_ring)
+		ixgbe_disable_txr(adapter, xdp_ring);
+	ixgbe_disable_rxr_hw(adapter, rx_ring);
+
+	if (xdp_ring)
+		synchronize_rcu();
+
+	/* Rx/Tx/XDP Tx share the same napi context. */
+	napi_disable(&rx_ring->q_vector->napi);
+
+	ixgbe_clean_tx_ring(tx_ring);
+	if (xdp_ring)
+		ixgbe_clean_tx_ring(xdp_ring);
+	ixgbe_clean_rx_ring(rx_ring);
+
+	ixgbe_reset_txr_stats(tx_ring);
+	if (xdp_ring)
+		ixgbe_reset_txr_stats(xdp_ring);
+	ixgbe_reset_rxr_stats(rx_ring);
+}
+
+/**
+ * ixgbe_txrx_ring_enable - Enable Rx/Tx/XDP Tx rings
+ * @adapter: adapter structure
+ * @ring: ring index
+ *
+ * This function enables a certain Rx/Tx/XDP Tx ring. The function
+ * assumes that the netdev is running.
+ **/
+void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
+{
+	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
+
+	rx_ring = adapter->rx_ring[ring];
+	tx_ring = adapter->tx_ring[ring];
+	xdp_ring = adapter->xdp_ring[ring];
+
+	/* Rx/Tx/XDP Tx share the same napi context. */
+	napi_enable(&rx_ring->q_vector->napi);
+
+	ixgbe_configure_tx_ring(adapter, tx_ring);
+	if (xdp_ring)
+		ixgbe_configure_tx_ring(adapter, xdp_ring);
+	ixgbe_configure_rx_ring(adapter, rx_ring);
+
+	clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
+	if (xdp_ring)
+		clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state);
+}
+
 /**
  * ixgbe_enumerate_functions - Get the number of ports this device has
  * @adapter: adapter structure
@@ -10618,7 +10908,7 @@
 	if (hw->mac.type >= ixgbe_mac_82599EB)
 		netdev->features |= NETIF_F_SCTP_CRC;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 #define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
 				 NETIF_F_HW_ESP_TX_CSUM | \
 				 NETIF_F_GSO_ESP)
@@ -10693,6 +10983,11 @@
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
 		netdev->features |= NETIF_F_LRO;
 
+	if (ixgbe_check_fw_error(adapter)) {
+		err = -EIO;
+		goto err_sw_init;
+	}
+
 	/* make sure the EEPROM is good */
 	if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
 		e_dev_err("The EEPROM Checksum Is Not Valid\n");
@@ -10849,6 +11144,8 @@
 			IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
 			true);
 
+	ixgbe_mii_bus_init(hw);
+
 	return 0;
 
 err_register:
@@ -10861,6 +11158,7 @@
 	kfree(adapter->jump_tables[0]);
 	kfree(adapter->mac_table);
 	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
 err_ioremap:
 	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
 	free_netdev(netdev);
@@ -10899,6 +11197,8 @@
 	set_bit(__IXGBE_REMOVING, &adapter->state);
 	cancel_work_sync(&adapter->service_task);
 
+	if (adapter->mii_bus)
+		mdiobus_unregister(adapter->mii_bus);
 
 #ifdef CONFIG_IXGBE_DCA
 	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
@@ -10947,6 +11247,7 @@
 
 	kfree(adapter->mac_table);
 	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
 	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
 	free_netdev(netdev);
 
@@ -11052,8 +11353,6 @@
 			/* Free device reference count */
 			pci_dev_put(vfdev);
 		}
-
-		pci_cleanup_aer_uncorrect_error_status(pdev);
 	}
 
 	/*
@@ -11103,7 +11402,6 @@
 {
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	pci_ers_result_t result;
-	int err;
 
 	if (pci_enable_device_mem(pdev)) {
 		e_err(probe, "Cannot re-enable PCI device after reset.\n");
@@ -11123,13 +11421,6 @@
 		result = PCI_ERS_RESULT_RECOVERED;
 	}
 
-	err = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (err) {
-		e_dev_err("pci_cleanup_aer_uncorrect_error_status "
-			  "failed 0x%0x\n", err);
-		/* non-fatal, continue */
-	}
-
 	return result;
 }
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index e085b65..a148534 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h

@@ -50,6 +50,7 @@
 	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
 	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
 	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
 	/* This value should always be last */
 	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
 };
@@ -80,6 +81,10 @@
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN 4
 /* word in permanent address message with the current multicast type */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 919a7af..2fb9796 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c

@@ -3,6 +3,7 @@
 
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/iopoll.h>
 #include <linux/sched.h>
 
 #include "ixgbe.h"
@@ -658,6 +659,306 @@
 	return status;
 }
 
+#define IXGBE_HW_READ_REG(addr) IXGBE_READ_REG(hw, addr)
+
+/**
+ *  ixgbe_msca_cmd - Write the command register and poll for completion/timeout
+ *  @hw: pointer to hardware structure
+ *  @cmd: command register value to write
+ **/
+static s32 ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd)
+{
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, cmd);
+
+	return readx_poll_timeout(IXGBE_HW_READ_REG, IXGBE_MSCA, cmd,
+				  !(cmd & IXGBE_MSCA_MDI_COMMAND), 10,
+				  10 * IXGBE_MDIO_COMMAND_TIMEOUT);
+}
+
+/**
+ *  ixgbe_mii_bus_read_generic - Read a clause 22/45 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_read_generic(struct ixgbe_hw *hw, int addr,
+				      int regnum, u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 data;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	if (regnum & MII_ADDR_C45) {
+		hwaddr |= regnum & GENMASK(21, 0);
+		cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND;
+	} else {
+		hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT;
+		cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL |
+			IXGBE_MSCA_READ_AUTOINC | IXGBE_MSCA_MDI_COMMAND;
+	}
+
+	data = ixgbe_msca_cmd(hw, cmd);
+	if (data < 0)
+		goto mii_bus_read_done;
+
+	/* For a clause 45 access the address cycle just completed, we still
+	 * need to do the read command, otherwise just get the data
+	 */
+	if (!(regnum & MII_ADDR_C45))
+		goto do_mii_bus_read;
+
+	cmd = hwaddr | IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND;
+	data = ixgbe_msca_cmd(hw, cmd);
+	if (data < 0)
+		goto mii_bus_read_done;
+
+do_mii_bus_read:
+	data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	data = (data >> IXGBE_MSRWD_READ_DATA_SHIFT) & GENMASK(16, 0);
+
+mii_bus_read_done:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return data;
+}
+
+/**
+ *  ixgbe_mii_bus_write_generic - Write a clause 22/45 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_write_generic(struct ixgbe_hw *hw, int addr,
+				       int regnum, u16 val, u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 err;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)val);
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	if (regnum & MII_ADDR_C45) {
+		hwaddr |= regnum & GENMASK(21, 0);
+		cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND;
+	} else {
+		hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT;
+		cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE |
+			IXGBE_MSCA_MDI_COMMAND;
+	}
+
+	/* For clause 45 this is an address cycle, for clause 22 this is the
+	 * entire transaction
+	 */
+	err = ixgbe_msca_cmd(hw, cmd);
+	if (err < 0 || !(regnum & MII_ADDR_C45))
+		goto mii_bus_write_done;
+
+	cmd = hwaddr | IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND;
+	err = ixgbe_msca_cmd(hw, cmd);
+
+mii_bus_write_done:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return err;
+}
+
+/**
+ *  ixgbe_mii_bus_read - Read a clause 22/45 register
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ **/
+static s32 ixgbe_mii_bus_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_read_generic(hw, addr, regnum, gssr);
+}
+
+/**
+ *  ixgbe_mii_bus_write - Write a clause 22/45 register
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_mii_bus_write(struct mii_bus *bus, int addr, int regnum,
+			       u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_write_generic(hw, addr, regnum, val, gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_read - Read a clause 22/45 register on x550em_a
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ **/
+static s32 ixgbe_x550em_a_mii_bus_read(struct mii_bus *bus, int addr,
+				       int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_read_generic(hw, addr, regnum, gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_write - Write a clause 22/45 register on x550em_a
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_x550em_a_mii_bus_write(struct mii_bus *bus, int addr,
+					int regnum, u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_write_generic(hw, addr, regnum, val, gssr);
+}
+
+/**
+ * ixgbe_get_first_secondary_devfn - get first device downstream of root port
+ * @devfn: PCI_DEVFN of root port on domain 0, bus 0
+ *
+ * Returns pci_dev pointer to PCI_DEVFN(0, 0) on subordinate side of root
+ * on domain 0, bus 0, devfn = 'devfn'
+ **/
+static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn)
+{
+	struct pci_dev *rp_pdev;
+	int bus;
+
+	rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn);
+	if (rp_pdev && rp_pdev->subordinate) {
+		bus = rp_pdev->subordinate->number;
+		return pci_get_domain_bus_and_slot(0, bus, 0);
+	}
+
+	return NULL;
+}
+
+/**
+ * ixgbe_x550em_a_has_mii - is this the first ixgbe x550em_a PCI function?
+ * @hw: pointer to hardware structure
+ *
+ * Returns true if hw points to lowest numbered PCI B:D.F x550_em_a device in
+ * the SoC.  There are up to 4 MACs sharing a single MDIO bus on the x550em_a,
+ * but we only want to register one MDIO bus.
+ **/
+static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	struct pci_dev *pdev = adapter->pdev;
+	struct pci_dev *func0_pdev;
+
+	/* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices
+	 * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0
+	 * It's not valid for function 0 to be disabled and function 1 is up,
+	 * so the lowest numbered ixgbe dev will be device 0 function 0 on one
+	 * of those two root ports
+	 */
+	func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0));
+	if (func0_pdev) {
+		if (func0_pdev == pdev)
+			return true;
+		else
+			return false;
+	}
+	func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0));
+	if (func0_pdev == pdev)
+		return true;
+
+	return false;
+}
+
+/**
+ * ixgbe_mii_bus_init - mii_bus structure setup
+ * @hw: pointer to hardware structure
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbe_mii_bus_init initializes a mii_bus structure in adapter
+ **/
+s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	struct pci_dev *pdev = adapter->pdev;
+	struct device *dev = &adapter->netdev->dev;
+	struct mii_bus *bus;
+	int err = -ENODEV;
+
+	bus = devm_mdiobus_alloc(dev);
+	if (!bus)
+		return -ENOMEM;
+
+	switch (hw->device_id) {
+	/* C3000 SoCs */
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+	case IXGBE_DEV_ID_X550EM_A_SFP_N:
+	case IXGBE_DEV_ID_X550EM_A_SGMII:
+	case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_SFP:
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		if (!ixgbe_x550em_a_has_mii(hw))
+			goto ixgbe_no_mii_bus;
+		bus->read = &ixgbe_x550em_a_mii_bus_read;
+		bus->write = &ixgbe_x550em_a_mii_bus_write;
+		break;
+	default:
+		bus->read = &ixgbe_mii_bus_read;
+		bus->write = &ixgbe_mii_bus_write;
+		break;
+	}
+
+	/* Use the position of the device in the PCI hierarchy as the id */
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mdio-%s", ixgbe_driver_name,
+		 pci_name(pdev));
+
+	bus->name = "ixgbe-mdio";
+	bus->priv = adapter;
+	bus->parent = dev;
+	bus->phy_mask = GENMASK(31, 0);
+
+	/* Support clause 22/45 natively.  ixgbe_probe() sets MDIO_EMULATE_C22
+	 * unfortunately that causes some clause 22 frames to be sent with
+	 * clause 45 addressing.  We don't want that.
+	 */
+	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22;
+
+	err = mdiobus_register(bus);
+	if (!err) {
+		adapter->mii_bus = bus;
+		return 0;
+	}
+
+ixgbe_no_mii_bus:
+	devm_mdiobus_free(dev, bus);
+	return err;
+}
+
 /**
  *  ixgbe_setup_phy_link_generic - Set and restart autoneg
  *  @hw: pointer to hardware structure

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index 64e44e0..6544c45 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h

@@ -45,6 +45,7 @@
 #define IXGBE_SFF_SOFT_RS_SELECT_10G		0x8
 #define IXGBE_SFF_SOFT_RS_SELECT_1G		0x0
 #define IXGBE_SFF_ADDRESSING_MODE		0x4
+#define IXGBE_SFF_DDM_IMPLEMENTED		0x40
 #define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE		0x1
 #define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE		0x8
 #define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE	0x23
@@ -120,6 +121,8 @@
 /* SFP+ SFF-8472 Compliance code */
 #define IXGBE_SFF_SFF_8472_UNSUP      0x00
 
+s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw);
+
 s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
 s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
 s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index b3e0d8b..0be13a9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c

@@ -72,13 +72,13 @@
 #define IXGBE_INCPER_SHIFT_82599 24
 
 #define IXGBE_OVERFLOW_PERIOD    (HZ * 30)
-#define IXGBE_PTP_TX_TIMEOUT     (HZ * 15)
+#define IXGBE_PTP_TX_TIMEOUT     (HZ)
 
-/* half of a one second clock period, for use with PPS signal. We have to use
- * this instead of something pre-defined like IXGBE_PTP_PPS_HALF_SECOND, in
- * order to force at least 64bits of precision for shifting
+/* We use our own definitions instead of NSEC_PER_SEC because we want to mark
+ * the value as a ULL to force precision when bit shifting.
  */
-#define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL
+#define NS_PER_SEC      1000000000ULL
+#define NS_PER_HALF_SEC  500000000ULL
 
 /* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL
  * which contain measurements of seconds and nanoseconds respectively. This
@@ -141,23 +141,26 @@
 #define MAX_TIMADJ	0x7FFFFFFF
 
 /**
- * ixgbe_ptp_setup_sdp_x540
+ * ixgbe_ptp_setup_sdp_X540
  * @adapter: private adapter structure
  *
  * this function enables or disables the clock out feature on SDP0 for
- * the X540 device. It will create a 1second periodic output that can
+ * the X540 device. It will create a 1 second periodic output that can
  * be used as the PPS (via an interrupt).
  *
- * It calculates when the systime will be on an exact second, and then
- * aligns the start of the PPS signal to that value. The shift is
- * necessary because it can change based on the link speed.
+ * It calculates when the system time will be on an exact second, and then
+ * aligns the start of the PPS signal to that value.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
  */
-static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter)
 {
+	struct cyclecounter *cc = &adapter->hw_cc;
 	struct ixgbe_hw *hw = &adapter->hw;
-	int shift = adapter->hw_cc.shift;
 	u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem;
-	u64 ns = 0, clock_edge = 0;
+	u64 ns = 0, clock_edge = 0, clock_period;
+	unsigned long flags;
 
 	/* disable the pin first */
 	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
@@ -177,26 +180,33 @@
 	/* enable the Clock Out feature on SDP0, and allow
 	 * interrupts to occur when the pin changes
 	 */
-	tsauxc = IXGBE_TSAUXC_EN_CLK |
-		 IXGBE_TSAUXC_SYNCLK |
-		 IXGBE_TSAUXC_SDP0_INT;
+	tsauxc = (IXGBE_TSAUXC_EN_CLK |
+		  IXGBE_TSAUXC_SYNCLK |
+		  IXGBE_TSAUXC_SDP0_INT);
 
-	/* clock period (or pulse length) */
-	clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
-	clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
-
-	/* Account for the cyclecounter wrap-around value by
-	 * using the converted ns value of the current time to
-	 * check for when the next aligned second would occur.
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflow.
 	 */
-	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
-	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
-	ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge);
+	clock_period = div_u64((NS_PER_HALF_SEC << cc->shift), cc->mult);
+	clktiml = (u32)(clock_period);
+	clktimh = (u32)(clock_period >> 32);
 
-	div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
-	clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
-	/* specify the initial clock start time */
+	/* Figure out how many seconds to add in order to round up */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
+
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
 	trgttiml = (u32)clock_edge;
 	trgttimh = (u32)(clock_edge >> 32);
 
@@ -212,8 +222,100 @@
 }
 
 /**
+ * ixgbe_ptp_setup_sdp_X550
+ * @adapter: private adapter structure
+ *
+ * Enable or disable a clock output signal on SDP 0 for X550 hardware.
+ *
+ * Use the target time feature to align the output signal on the next full
+ * second.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
+ */
+static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter)
+{
+	u32 esdp, tsauxc, freqout, trgttiml, trgttimh, rem, tssdp;
+	struct cyclecounter *cc = &adapter->hw_cc;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 ns = 0, clock_edge = 0;
+	struct timespec64 ts;
+	unsigned long flags;
+
+	/* disable the pin first */
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+	IXGBE_WRITE_FLUSH(hw);
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+		return;
+
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* enable the SDP0 pin as output, and connected to the
+	 * native function for Timesync (ClockOut)
+	 */
+	esdp |= IXGBE_ESDP_SDP0_DIR |
+		IXGBE_ESDP_SDP0_NATIVE;
+
+	/* enable the Clock Out feature on SDP0, and use Target Time 0 to
+	 * enable generation of interrupts on the clock change.
+	 */
+#define IXGBE_TSAUXC_DIS_TS_CLEAR 0x40000000
+	tsauxc = (IXGBE_TSAUXC_EN_CLK | IXGBE_TSAUXC_ST0 |
+		  IXGBE_TSAUXC_EN_TT0 | IXGBE_TSAUXC_SDP0_INT |
+		  IXGBE_TSAUXC_DIS_TS_CLEAR);
+
+	tssdp = (IXGBE_TSSDP_TS_SDP0_EN |
+		 IXGBE_TSSDP_TS_SDP0_CLK0);
+
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflowing a 32bit
+	 * value.
+	 */
+	freqout = div_u64(NS_PER_HALF_SEC << cc->shift,  cc->mult);
+
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	/* Figure out how far past the next second we are */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
+
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
+
+	/* X550 hardware stores the time in 32bits of 'billions of cycles' and
+	 * 32bits of 'cycles'. There's no guarantee that cycles represents
+	 * nanoseconds. However, we can use the math from a timespec64 to
+	 * convert into the hardware representation.
+	 *
+	 * See ixgbe_ptp_read_X550() for more details.
+	 */
+	ts = ns_to_timespec64(clock_edge);
+	trgttiml = (u32)ts.tv_nsec;
+	trgttimh = (u32)ts.tv_sec;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FREQOUT0, freqout);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSSDP, tssdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
  * ixgbe_ptp_read_X550 - read cycle counter value
- * @hw_cc: cyclecounter structure
+ * @cc: cyclecounter structure
  *
  * This function reads SYSTIME registers. It is called by the cyclecounter
  * structure to convert from internal representation into nanoseconds. We need
@@ -221,10 +323,10 @@
  * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
  * "cycles", rather than seconds and nanoseconds.
  */
-static u64 ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
+static u64 ixgbe_ptp_read_X550(const struct cyclecounter *cc)
 {
 	struct ixgbe_adapter *adapter =
-			container_of(hw_cc, struct ixgbe_adapter, hw_cc);
+		container_of(cc, struct ixgbe_adapter, hw_cc);
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct timespec64 ts;
 
@@ -443,22 +545,52 @@
 }
 
 /**
- * ixgbe_ptp_gettime
+ * ixgbe_ptp_gettimex
  * @ptp: the ptp clock structure
- * @ts: timespec structure to hold the current time value
+ * @ts: timespec to hold the PHC timestamp
+ * @sts: structure to hold the system time before and after reading the PHC
  *
  * read the timecounter and return the correct value on ns,
  * after converting it into a struct timespec.
  */
-static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp,
+			      struct timespec64 *ts,
+			      struct ptp_system_timestamp *sts)
 {
 	struct ixgbe_adapter *adapter =
 		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
 	unsigned long flags;
-	u64 ns;
+	u64 ns, stamp;
 
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	ns = timecounter_read(&adapter->hw_tc);
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/* Upper 32 bits represent billions of cycles, lower 32 bits
+		 * represent cycles. However, we use timespec64_to_ns for the
+		 * correct math even though the units haven't been corrected
+		 * yet.
+		 */
+		ptp_read_system_prets(sts);
+		IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+		ptp_read_system_postts(sts);
+		ts->tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+		ts->tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+		stamp = timespec64_to_ns(ts);
+		break;
+	default:
+		ptp_read_system_prets(sts);
+		stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+		ptp_read_system_postts(sts);
+		stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+		break;
+	}
+
+	ns = timecounter_cyc2time(&adapter->hw_tc, stamp);
+
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
@@ -567,10 +699,14 @@
 {
 	bool timeout = time_is_before_jiffies(adapter->last_overflow_check +
 					     IXGBE_OVERFLOW_PERIOD);
-	struct timespec64 ts;
+	unsigned long flags;
 
 	if (timeout) {
-		ixgbe_ptp_gettime(&adapter->ptp_caps, &ts);
+		/* Update the timecounter */
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+		timecounter_read(&adapter->hw_tc);
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
 		adapter->last_overflow_check = jiffies;
 	}
 }
@@ -804,6 +940,15 @@
 	ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
 }
 
+/**
+ * ixgbe_ptp_get_ts_config - get current hardware timestamping configuration
+ * @adapter: pointer to adapter structure
+ * @ifr: ioctl data
+ *
+ * This function returns the current timestamping settings. Rather than
+ * attempt to deconstruct registers to fill in the values, simply keep a copy
+ * of the old settings around, and return a copy when requested.
+ */
 int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
 {
 	struct hwtstamp_config *config = &adapter->tstamp_config;
@@ -1216,10 +1361,10 @@
 		adapter->ptp_caps.pps = 1;
 		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
-		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X540;
 		break;
 	case ixgbe_mac_82599EB:
 		snprintf(adapter->ptp_caps.name,
@@ -1233,7 +1378,7 @@
 		adapter->ptp_caps.pps = 0;
 		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
 		break;
@@ -1246,13 +1391,13 @@
 		adapter->ptp_caps.n_alarm = 0;
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.n_per_out = 0;
-		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.pps = 1;
 		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
-		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
-		adapter->ptp_setup_sdp = NULL;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X550;
 		break;
 	default:
 		adapter->ptp_clock = NULL;

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index eea63a9..537dfff 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c

@@ -496,6 +496,7 @@
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
 			/* Version 1.1 supports jumbo frames on VFs if PF has
 			 * jumbo frames enabled which means legacy VFs are
 			 * disabled
@@ -699,7 +700,6 @@
 	u8 num_tcs = adapter->hw_tcs;
 	u32 reg_val;
 	u32 queue;
-	u32 word;
 
 	/* remove VLAN filters beloning to this VF */
 	ixgbe_clear_vf_vlans(adapter, vf);
@@ -730,6 +730,9 @@
 	/* reset multicast table array for vf */
 	adapter->vfinfo[vf].num_vf_mc_hashes = 0;
 
+	/* clear any ipsec table info */
+	ixgbe_ipsec_vf_clear(adapter, vf);
+
 	/* Flush and reset the mta with the new values */
 	ixgbe_set_rx_mode(adapter->netdev);
 
@@ -754,6 +757,14 @@
 		}
 	}
 
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+static void ixgbe_vf_clear_mbx(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 word;
+
 	/* Clear VF's mailbox memory */
 	for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++)
 		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0);
@@ -827,6 +838,8 @@
 	/* reset the filters for the device */
 	ixgbe_vf_reset_event(adapter, vf);
 
+	ixgbe_vf_clear_mbx(adapter, vf);
+
 	/* set vf mac address */
 	if (!is_zero_ether_addr(vf_mac))
 		ixgbe_set_vf_mac(adapter, vf, vf_mac);
@@ -1002,6 +1015,7 @@
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		adapter->vfinfo[vf].vf_api = api;
 		return 0;
 	default:
@@ -1027,6 +1041,7 @@
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return -1;
@@ -1067,6 +1082,7 @@
 
 	/* verify the PF is supporting the correct API */
 	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		break;
@@ -1099,6 +1115,7 @@
 
 	/* verify the PF is supporting the correct API */
 	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		break;
@@ -1124,8 +1141,9 @@
 		/* promisc introduced in 1.3 version */
 		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
 			return -EOPNOTSUPP;
-		/* Fall threw */
+		/* Fall through */
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -1251,6 +1269,12 @@
 	case IXGBE_VF_UPDATE_XCAST_MODE:
 		retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf);
 		break;
+	case IXGBE_VF_IPSEC_ADD:
+		retval = ixgbe_ipsec_vf_add_sa(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_IPSEC_DEL:
+		retval = ixgbe_ipsec_vf_del_sa(adapter, msgbuf, vf);
+		break;
 	default:
 		e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -1621,7 +1645,7 @@
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP),
 				(IXGBE_ETQF_FILTER_EN    |
 				 IXGBE_ETQF_TX_ANTISPOOF |
-				 IXGBE_ETH_P_LLDP));
+				 ETH_P_LLDP));
 
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC),
 				(IXGBE_ETQF_FILTER_EN |

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
new file mode 100644
index 0000000..6d01700
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h

@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef _IXGBE_TXRX_COMMON_H_
+#define _IXGBE_TXRX_COMMON_H_
+
+#define IXGBE_XDP_PASS		0
+#define IXGBE_XDP_CONSUMED	BIT(0)
+#define IXGBE_XDP_TX		BIT(1)
+#define IXGBE_XDP_REDIR		BIT(2)
+
+#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
+		       IXGBE_TXD_CMD_RS)
+
+int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
+			struct xdp_frame *xdpf);
+bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
+			   union ixgbe_adv_rx_desc *rx_desc,
+			   struct sk_buff *skb);
+void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
+			      union ixgbe_adv_rx_desc *rx_desc,
+			      struct sk_buff *skb);
+void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
+		  struct sk_buff *skb);
+void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring);
+void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, u64 qmask);
+
+void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring);
+void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
+
+struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
+				struct ixgbe_ring *ring);
+int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+			 u16 qid);
+
+void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
+
+void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+			  struct ixgbe_ring *rx_ring,
+			  const int budget);
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
+bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
+			    struct ixgbe_ring *tx_ring, int napi_budget);
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
+
+#endif /* #define _IXGBE_TXRX_COMMON_H_ */

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 41bcbb3..2be1c4c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h

@@ -924,6 +924,9 @@
 /* Firmware Semaphore Register */
 #define IXGBE_FWSM_MODE_MASK	0xE
 #define IXGBE_FWSM_FW_MODE_PT	0x4
+#define IXGBE_FWSM_FW_NVM_RECOVERY_MODE	BIT(5)
+#define IXGBE_FWSM_EXT_ERR_IND_MASK	0x01F80000
+#define IXGBE_FWSM_FW_VAL_BIT	BIT(15)
 
 /* ARC Subsystem registers */
 #define IXGBE_HICR      0x15F00
@@ -1064,6 +1067,7 @@
 #define IXGBE_AUXSTMPL1  0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
 #define IXGBE_AUXSTMPH1  0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
 #define IXGBE_TSIM       0x08C68 /* TimeSync Interrupt Mask Register - RW */
+#define IXGBE_TSSDP      0x0003C /* TimeSync SDP Configuration Register - RW */
 
 /* Diagnostic Registers */
 #define IXGBE_RDSTATCTL   0x02C20
@@ -2237,11 +2241,18 @@
 #define IXGBE_RXDCTL_RLPML_EN   0x00008000
 #define IXGBE_RXDCTL_VME        0x40000000  /* VLAN mode enable */
 
-#define IXGBE_TSAUXC_EN_CLK   0x00000004
-#define IXGBE_TSAUXC_SYNCLK   0x00000008
-#define IXGBE_TSAUXC_SDP0_INT 0x00000040
+#define IXGBE_TSAUXC_EN_CLK		0x00000004
+#define IXGBE_TSAUXC_SYNCLK		0x00000008
+#define IXGBE_TSAUXC_SDP0_INT		0x00000040
+#define IXGBE_TSAUXC_EN_TT0		0x00000001
+#define IXGBE_TSAUXC_EN_TT1		0x00000002
+#define IXGBE_TSAUXC_ST0		0x00000010
 #define IXGBE_TSAUXC_DISABLE_SYSTIME	0x80000000
 
+#define IXGBE_TSSDP_TS_SDP0_SEL_MASK	0x000000C0
+#define IXGBE_TSSDP_TS_SDP0_CLK0	0x00000080
+#define IXGBE_TSSDP_TS_SDP0_EN		0x00000100
+
 #define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
 #define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
 
@@ -3461,6 +3472,7 @@
 			      const char *);
 	s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
 	s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+	bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
 	void (*disable_rx)(struct ixgbe_hw *hw);
 	void (*enable_rx)(struct ixgbe_hw *hw);
 	void (*set_source_address_pruning)(struct ixgbe_hw *, bool,

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 9772016..9c42f74 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c

@@ -1247,6 +1247,20 @@
 	return 0;
 }
 
+/**
+ * ixgbe_fw_recovery_mode - Check FW NVM recovery mode
+ * @hw: pointer t hardware structure
+ *
+ * Returns true if in FW NVM recovery mode.
+ */
+static bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw)
+{
+	u32 fwsm;
+
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+	return !!(fwsm & IXGBE_FWSM_FW_NVM_RECOVERY_MODE);
+}
+
 /** ixgbe_disable_rx_x550 - Disable RX unit
  *
  *  Enables the Rx DMA unit for x550
@@ -3818,6 +3832,7 @@
 	.enable_rx_buff			= &ixgbe_enable_rx_buff_generic, \
 	.get_thermal_sensor_data	= NULL, \
 	.init_thermal_sensor_thresh	= NULL, \
+	.fw_recovery_mode		= &ixgbe_fw_recovery_mode_X550, \
 	.enable_rx			= &ixgbe_enable_rx_generic, \
 	.disable_rx			= &ixgbe_disable_rx_x550, \
 

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
new file mode 100644
index 0000000..d6feaac
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

@@ -0,0 +1,749 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+
+#include "ixgbe.h"
+#include "ixgbe_txrx_common.h"
+
+struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
+				struct ixgbe_ring *ring)
+{
+	bool xdp_on = READ_ONCE(adapter->xdp_prog);
+	int qid = ring->ring_idx;
+
+	if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps))
+		return NULL;
+
+	return xdp_get_umem_from_qid(adapter->netdev, qid);
+}
+
+static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter,
+				  struct xdp_umem *umem)
+{
+	struct device *dev = &adapter->pdev->dev;
+	unsigned int i, j;
+	dma_addr_t dma;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
+					 DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+		if (dma_mapping_error(dev, dma))
+			goto out_unmap;
+
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+out_unmap:
+	for (j = 0; j < i; j++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	return -1;
+}
+
+static void ixgbe_xsk_umem_dma_unmap(struct ixgbe_adapter *adapter,
+				     struct xdp_umem *umem)
+{
+	struct device *dev = &adapter->pdev->dev;
+	unsigned int i;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
+
+		umem->pages[i].dma = 0;
+	}
+}
+
+static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
+				 struct xdp_umem *umem,
+				 u16 qid)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct xdp_umem_fq_reuse *reuseq;
+	bool if_running;
+	int err;
+
+	if (qid >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count);
+	if (!reuseq)
+		return -ENOMEM;
+
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	err = ixgbe_xsk_umem_dma_map(adapter, umem);
+	if (err)
+		return err;
+
+	if_running = netif_running(adapter->netdev) &&
+		     ixgbe_enabled_xdp_adapter(adapter);
+
+	if (if_running)
+		ixgbe_txrx_ring_disable(adapter, qid);
+
+	set_bit(qid, adapter->af_xdp_zc_qps);
+
+	if (if_running) {
+		ixgbe_txrx_ring_enable(adapter, qid);
+
+		/* Kick start the NAPI context so that receiving will start */
+		err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
+{
+	struct xdp_umem *umem;
+	bool if_running;
+
+	umem = xdp_get_umem_from_qid(adapter->netdev, qid);
+	if (!umem)
+		return -EINVAL;
+
+	if_running = netif_running(adapter->netdev) &&
+		     ixgbe_enabled_xdp_adapter(adapter);
+
+	if (if_running)
+		ixgbe_txrx_ring_disable(adapter, qid);
+
+	clear_bit(qid, adapter->af_xdp_zc_qps);
+	ixgbe_xsk_umem_dma_unmap(adapter, umem);
+
+	if (if_running)
+		ixgbe_txrx_ring_enable(adapter, qid);
+
+	return 0;
+}
+
+int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
+			 u16 qid)
+{
+	return umem ? ixgbe_xsk_umem_enable(adapter, umem, qid) :
+		ixgbe_xsk_umem_disable(adapter, qid);
+}
+
+static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
+			    struct ixgbe_ring *rx_ring,
+			    struct xdp_buff *xdp)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	int err, result = IXGBE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct xdp_frame *xdpf;
+	u64 offset;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	offset = xdp->data - xdp->data_hard_start;
+
+	xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf)) {
+			result = IXGBE_XDP_CONSUMED;
+			break;
+		}
+		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping packet */
+	case XDP_DROP:
+		result = IXGBE_XDP_CONSUMED;
+		break;
+	}
+	rcu_read_unlock();
+	return result;
+}
+
+static struct
+ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
+					unsigned int size)
+{
+	struct ixgbe_rx_buffer *bi;
+
+	bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      bi->dma, 0,
+				      size,
+				      DMA_BIDIRECTIONAL);
+
+	return bi;
+}
+
+static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
+				     struct ixgbe_rx_buffer *obi)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct ixgbe_rx_buffer *nbi;
+
+	nbi = &rx_ring->rx_buffer_info[rx_ring->next_to_alloc];
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	nbi->dma = obi->dma;
+	nbi->addr = obi->addr;
+	nbi->handle = obi->handle;
+
+	obi->addr = NULL;
+	obi->skb = NULL;
+}
+
+void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
+{
+	struct ixgbe_rx_buffer *bi;
+	struct ixgbe_ring *rx_ring;
+	u64 hr, mask;
+	u16 nta;
+
+	rx_ring = container_of(alloc, struct ixgbe_ring, zca);
+	hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+	mask = rx_ring->xsk_umem->chunk_mask;
+
+	nta = rx_ring->next_to_alloc;
+	bi = rx_ring->rx_buffer_info;
+
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	handle &= mask;
+
+	bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+					    rx_ring->xsk_umem->headroom);
+}
+
+static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
+				  struct ixgbe_rx_buffer *bi)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	void *addr = bi->addr;
+	u64 handle, hr;
+
+	if (addr)
+		return true;
+
+	if (!xsk_umem_peek_addr(umem, &handle)) {
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	bi->dma = xdp_umem_get_dma(umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
+
+	xsk_umem_discard_addr(umem);
+	return true;
+}
+
+static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
+				       struct ixgbe_rx_buffer *bi)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	u64 handle, hr;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	handle &= rx_ring->xsk_umem->chunk_mask;
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	bi->dma = xdp_umem_get_dma(umem, handle);
+	bi->dma += hr;
+
+	bi->addr = xdp_umem_get_data(umem, handle);
+	bi->addr += hr;
+
+	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
+
+	xsk_umem_discard_addr_rq(umem);
+	return true;
+}
+
+static __always_inline bool
+__ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
+			    bool alloc(struct ixgbe_ring *rx_ring,
+				       struct ixgbe_rx_buffer *bi))
+{
+	union ixgbe_adv_rx_desc *rx_desc;
+	struct ixgbe_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+	bool ok = true;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return true;
+
+	rx_desc = IXGBE_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	do {
+		if (!alloc(rx_ring, bi)) {
+			ok = false;
+			break;
+		}
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 rx_ring->rx_buf_len,
+						 DMA_BIDIRECTIONAL);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IXGBE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+
+	return ok;
+}
+
+void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
+{
+	__ixgbe_alloc_rx_buffers_zc(rx_ring, count,
+				    ixgbe_alloc_buffer_slow_zc);
+}
+
+static bool ixgbe_alloc_rx_buffers_fast_zc(struct ixgbe_ring *rx_ring,
+					   u16 count)
+{
+	return __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
+					   ixgbe_alloc_buffer_zc);
+}
+
+static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
+					      struct ixgbe_rx_buffer *bi,
+					      struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	struct sk_buff *skb;
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+			       xdp->data_end - xdp->data_hard_start,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+	return skb;
+}
+
+static void ixgbe_inc_ntc(struct ixgbe_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(IXGBE_RX_DESC(rx_ring, ntc));
+}
+
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+			  struct ixgbe_ring *rx_ring,
+			  const int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	bool failure = false;
+	struct sk_buff *skb;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	while (likely(total_rx_packets < budget)) {
+		union ixgbe_adv_rx_desc *rx_desc;
+		struct ixgbe_rx_buffer *bi;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
+			failure = failure ||
+				  !ixgbe_alloc_rx_buffers_fast_zc(rx_ring,
+								 cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		bi = ixgbe_get_rx_buffer_zc(rx_ring, size);
+
+		if (unlikely(!ixgbe_test_staterr(rx_desc,
+						 IXGBE_RXD_STAT_EOP))) {
+			struct ixgbe_rx_buffer *next_bi;
+
+			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+			ixgbe_inc_ntc(rx_ring);
+			next_bi =
+			       &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+			next_bi->skb = ERR_PTR(-EINVAL);
+			continue;
+		}
+
+		if (unlikely(bi->skb)) {
+			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+			ixgbe_inc_ntc(rx_ring);
+			continue;
+		}
+
+		xdp.data = bi->addr;
+		xdp.data_meta = xdp.data;
+		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+		xdp.data_end = xdp.data + size;
+		xdp.handle = bi->handle;
+
+		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp);
+
+		if (xdp_res) {
+			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				bi->addr = NULL;
+				bi->skb = NULL;
+			} else {
+				ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+			}
+			total_rx_packets++;
+			total_rx_bytes += size;
+
+			cleaned_count++;
+			ixgbe_inc_ntc(rx_ring);
+			continue;
+		}
+
+		/* XDP_PASS path */
+		skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			break;
+		}
+
+		cleaned_count++;
+		ixgbe_inc_ntc(rx_ring);
+
+		if (eth_skb_pad(skb))
+			continue;
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
+		ixgbe_rx_skb(q_vector, skb);
+	}
+
+	if (xdp_xmit & IXGBE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & IXGBE_XDP_TX) {
+		struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.
+		 */
+		wmb();
+		writel(ring->next_to_use, ring->tail);
+	}
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
+
+	if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+		return (int)total_rx_packets;
+	}
+	return failure ? budget : (int)total_rx_packets;
+}
+
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+	struct ixgbe_rx_buffer *bi = &rx_ring->rx_buffer_info[i];
+
+	while (i != rx_ring->next_to_alloc) {
+		xsk_umem_fq_reuse(rx_ring->xsk_umem, bi->handle);
+		i++;
+		bi++;
+		if (i == rx_ring->count) {
+			i = 0;
+			bi = rx_ring->rx_buffer_info;
+		}
+	}
+}
+
+static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+{
+	union ixgbe_adv_tx_desc *tx_desc = NULL;
+	struct ixgbe_tx_buffer *tx_bi;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+	u32 cmd_type;
+
+	while (budget-- > 0) {
+		if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
+		    !netif_carrier_ok(xdp_ring->netdev)) {
+			work_done = false;
+			break;
+		}
+
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+			break;
+
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+					   DMA_BIDIRECTIONAL);
+
+		tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
+		tx_bi->bytecount = desc.len;
+		tx_bi->xdpf = NULL;
+		tx_bi->gso_segs = 1;
+
+		tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		/* put descriptor type bits */
+		cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+			   IXGBE_ADVTXD_DCMD_DEXT |
+			   IXGBE_ADVTXD_DCMD_IFCS;
+		cmd_type |= desc.len | IXGBE_TXD_CMD;
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.olinfo_status =
+			cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		ixgbe_xdp_ring_update_tail(xdp_ring);
+		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+	}
+
+	return !!budget && work_done;
+}
+
+static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
+				      struct ixgbe_tx_buffer *tx_bi)
+{
+	xdp_return_frame(tx_bi->xdpf);
+	dma_unmap_single(tx_ring->dev,
+			 dma_unmap_addr(tx_bi, dma),
+			 dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_bi, len, 0);
+}
+
+bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
+			    struct ixgbe_ring *tx_ring, int napi_budget)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	unsigned int total_packets = 0, total_bytes = 0;
+	struct xdp_umem *umem = tx_ring->xsk_umem;
+	union ixgbe_adv_tx_desc *tx_desc;
+	struct ixgbe_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	tx_bi = &tx_ring->tx_buffer_info[ntc];
+	tx_desc = IXGBE_TX_DESC(tx_ring, ntc);
+
+	while (ntc != ntu) {
+		if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			break;
+
+		total_bytes += tx_bi->bytecount;
+		total_packets += tx_bi->gso_segs;
+
+		if (tx_bi->xdpf)
+			ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		tx_bi++;
+		tx_desc++;
+		ntc++;
+		if (unlikely(ntc == tx_ring->count)) {
+			ntc = 0;
+			tx_bi = tx_ring->tx_buffer_info;
+			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+	}
+
+	tx_ring->next_to_clean = ntc;
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(umem, xsk_frames);
+
+	if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
+		xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+
+	return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
+}
+
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state))
+		return -ENETDOWN;
+
+	if (!READ_ONCE(adapter->xdp_prog))
+		return -ENXIO;
+
+	if (qid >= adapter->num_xdp_queues)
+		return -ENXIO;
+
+	if (!adapter->xdp_ring[qid]->xsk_umem)
+		return -ENXIO;
+
+	ring = adapter->xdp_ring[qid];
+	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+		u64 eics = BIT_ULL(ring->q_vector->v_idx);
+
+		ixgbe_irq_rearm_queues(adapter, eics);
+	}
+
+	return 0;
+}
+
+void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	struct xdp_umem *umem = tx_ring->xsk_umem;
+	struct ixgbe_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		tx_bi = &tx_ring->tx_buffer_info[ntc];
+
+		if (tx_bi->xdpf)
+			ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		ntc++;
+		if (ntc == tx_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(umem, xsk_frames);
+}

diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index aba1e6a..186a4bb 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile

@@ -10,4 +10,5 @@
                 mbx.o \
                 ethtool.o \
                 ixgbevf_main.o
+ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o
 

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 700d8eb..6bace74 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h

@@ -133,9 +133,14 @@
 #define IXGBE_RXDADV_STAT_FCSTAT_NODDP	0x00000010 /* 01: Ctxt w/o DDP */
 #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP	0x00000020 /* 10: Recv. FCP_RSP */
 #define IXGBE_RXDADV_STAT_FCSTAT_DDP	0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_SECP		0x00020000 /* IPsec/MACsec pkt found */
 
 #define IXGBE_RXDADV_RSSTYPE_MASK	0x0000000F
 #define IXGBE_RXDADV_PKTTYPE_MASK	0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
 #define IXGBE_RXDADV_PKTTYPE_MASK_EX	0x0001FFF0
 #define IXGBE_RXDADV_HDRBUFLEN_MASK	0x00007FE0
 #define IXGBE_RXDADV_RSCCNT_MASK	0x001E0000
@@ -229,7 +234,7 @@
 /* Context descriptors */
 struct ixgbe_adv_tx_context_desc {
 	__le32 vlan_macip_lens;
-	__le32 seqnum_seed;
+	__le32 fceof_saidx;
 	__le32 type_tucmd_mlhl;
 	__le32 mss_l4len_idx;
 };
@@ -250,9 +255,12 @@
 #define IXGBE_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
 #define IXGBE_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
 #define IXGBE_ADVTXD_TUCMD_L4T_SCTP	0x00001000  /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP   0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 /* ESP Encrypt Enable */
 #define IXGBE_ADVTXD_IDX_SHIFT	4 /* Adv desc Index shift */
 #define IXGBE_ADVTXD_CC		0x00000080 /* Check Context */
 #define IXGBE_ADVTXD_POPTS_SHIFT	8  /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
 #define IXGBE_ADVTXD_POPTS_IXSM	(IXGBE_TXD_POPTS_IXSM << \
 				 IXGBE_ADVTXD_POPTS_SHIFT)
 #define IXGBE_ADVTXD_POPTS_TXSM	(IXGBE_TXD_POPTS_TXSM << \

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 631c910..54459b6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c

@@ -55,6 +55,8 @@
 	IXGBEVF_STAT("alloc_rx_page", alloc_rx_page),
 	IXGBEVF_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
 	IXGBEVF_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+	IXGBEVF_STAT("tx_ipsec", tx_ipsec),
+	IXGBEVF_STAT("rx_ipsec", rx_ipsec),
 };
 
 #define IXGBEVF_QUEUE_STATS_LEN ( \
@@ -83,22 +85,16 @@
 				      struct ethtool_link_ksettings *cmd)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 link_speed = 0;
-	bool link_up;
 
 	ethtool_link_ksettings_zero_link_mode(cmd, supported);
 	ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
 	cmd->base.autoneg = AUTONEG_DISABLE;
 	cmd->base.port = -1;
 
-	hw->mac.get_link_status = 1;
-	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-
-	if (link_up) {
+	if (adapter->link_up) {
 		__u32 speed = SPEED_10000;
 
-		switch (link_speed) {
+		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			speed = SPEED_10000;
 			break;

diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
new file mode 100644
index 0000000..5170dd9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c

@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#include "ixgbevf.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
+/**
+ * ixgbevf_ipsec_set_pf_sa - ask the PF to set up an SA
+ * @adapter: board private structure
+ * @xs: xfrm info to be sent to the PF
+ *
+ * Returns: positive offload handle from the PF, or negative error code
+ **/
+static int ixgbevf_ipsec_set_pf_sa(struct ixgbevf_adapter *adapter,
+				   struct xfrm_state *xs)
+{
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE] = { 0 };
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sa_mbx_msg *sam;
+	int ret;
+
+	/* send the important bits to the PF */
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	sam->flags = xs->xso.flags;
+	sam->spi = xs->id.spi;
+	sam->proto = xs->id.proto;
+	sam->family = xs->props.family;
+
+	if (xs->props.family == AF_INET6)
+		memcpy(sam->addr, &xs->id.daddr.a6, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(sam->addr, &xs->id.daddr.a4, sizeof(xs->id.daddr.a4));
+	memcpy(sam->key, xs->aead->alg_key, sizeof(sam->key));
+
+	msgbuf[0] = IXGBE_VF_IPSEC_ADD;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	ret = hw->mbx.ops.write_posted(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
+	if (ret)
+		goto out;
+
+	ret = hw->mbx.ops.read_posted(hw, msgbuf, 2);
+	if (ret)
+		goto out;
+
+	ret = (int)msgbuf[1];
+	if (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK && ret >= 0)
+		ret = -1;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_del_pf_sa - ask the PF to delete an SA
+ * @adapter: board private structure
+ * @pfsa: sa index returned from PF when created, -1 for all
+ *
+ * Returns: 0 on success, or negative error code
+ **/
+static int ixgbevf_ipsec_del_pf_sa(struct ixgbevf_adapter *adapter, int pfsa)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 msgbuf[2];
+	int err;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	msgbuf[0] = IXGBE_VF_IPSEC_DEL;
+	msgbuf[1] = (u32)pfsa;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	err = hw->mbx.ops.write_posted(hw, msgbuf, 2);
+	if (err)
+		goto out;
+
+	err = hw->mbx.ops.read_posted(hw, msgbuf, 2);
+	if (err)
+		goto out;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+	return err;
+}
+
+/**
+ * ixgbevf_ipsec_restore - restore the IPsec HW settings after a reset
+ * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.  While we're here, make sure any stale VF data is
+ * removed, since we go through reset when num_vfs changes.
+ **/
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	if (!(adapter->netdev->features & NETIF_F_HW_ESP))
+		return;
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+		int ret;
+
+		if (r->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, r->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload rx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+
+		if (t->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, t->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload tx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+	}
+}
+
+/**
+ * ixgbevf_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to IPsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static
+int ixgbevf_ipsec_find_empty_idx(struct ixgbevf_ipsec *ipsec, bool rxtable)
+{
+	u32 i;
+
+	if (rxtable) {
+		if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search rx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->rx_tbl[i].used)
+				return i;
+		}
+	} else {
+		if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search tx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->tx_tbl[i].used)
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * ixgbevf_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to IPsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an IPv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static
+struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec,
+					       __be32 *daddr, u8 proto,
+					       __be32 spi, bool ip4)
+{
+	struct xfrm_state *ret = NULL;
+	struct rx_sa *rsa;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
+				   (__force u32)spi) {
+		if (spi == rsa->xs->id.spi &&
+		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+				       sizeof(rsa->xs->id.daddr.a6)))) &&
+		    proto == rsa->xs->id.proto) {
+			ret = rsa->xs;
+			xfrm_state_hold(ret);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables.  The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs,
+					  u32 *mykey, u32 *mysalt)
+{
+	struct net_device *dev = xs->xso.dev;
+	unsigned char *key_data;
+	char *alg_name = NULL;
+	int key_len;
+
+	if (!xs->aead) {
+		netdev_err(dev, "Unsupported IPsec algorithm\n");
+		return -EINVAL;
+	}
+
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
+	if (strcmp(alg_name, aes_gcm_name)) {
+		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+			   aes_gcm_name);
+		return -EINVAL;
+	}
+
+	/* The key bytes come down in a big endian array of bytes, so
+	 * we don't need to do any byte swapping.
+	 * 160 accounts for 16 byte key and 4 byte salt
+	 */
+	if (key_len > IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = ((u32 *)key_data)[4];
+	} else if (key_len == IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = 0;
+	} else {
+		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+		return -EINVAL;
+	}
+	memcpy(mykey, key_data, 16);
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	u16 sa_idx;
+	int ret;
+
+	if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+		netdev_err(dev, "Unsupported protocol 0x%04x for IPsec offload\n",
+			   xs->id.proto);
+		return -EINVAL;
+	}
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		struct rx_sa rsa;
+
+		if (xs->calg) {
+			netdev_err(dev, "Compression offload not supported\n");
+			return -EINVAL;
+		}
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, true);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Rx table!\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&rsa, 0, sizeof(rsa));
+		rsa.used = true;
+		rsa.xs = xs;
+
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.decrypt = xs->ealg || xs->aead;
+
+		/* get the key and salt */
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Rx SA table\n");
+			return ret;
+		}
+
+		/* get ip for rx sa table */
+		if (xs->props.family == AF_INET6)
+			memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+		else
+			memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+		rsa.mode = IXGBE_RXMOD_VALID;
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+		if (rsa.decrypt)
+			rsa.mode |= IXGBE_RXMOD_DECRYPT;
+		if (rsa.xs->props.family == AF_INET6)
+			rsa.mode |= IXGBE_RXMOD_IPV6;
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		rsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+		ipsec->num_rx_sa++;
+
+		/* hash the new entry for faster search in Rx path */
+		hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+			     (__force u32)rsa.xs->id.spi);
+	} else {
+		struct tx_sa tsa;
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, false);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Tx table\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&tsa, 0, sizeof(tsa));
+		tsa.used = true;
+		tsa.xs = xs;
+
+		if (xs->id.proto & IPPROTO_ESP)
+			tsa.encrypt = xs->ealg || xs->aead;
+
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Tx SA table\n");
+			memset(&tsa, 0, sizeof(tsa));
+			return ret;
+		}
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		tsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+		ipsec->num_tx_sa++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	u16 sa_idx;
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+
+		if (!ipsec->rx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->rx_tbl[sa_idx].pfsa);
+		hash_del_rcu(&ipsec->rx_tbl[sa_idx].hlist);
+		memset(&ipsec->rx_tbl[sa_idx], 0, sizeof(struct rx_sa));
+		ipsec->num_rx_sa--;
+	} else {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+		if (!ipsec->tx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->tx_tbl[sa_idx].pfsa);
+		memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+		ipsec->num_tx_sa--;
+	}
+}
+
+/**
+ * ixgbevf_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbevf_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+	if (xs->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl != 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
+static const struct xfrmdev_ops ixgbevf_xfrmdev_ops = {
+	.xdo_dev_state_add = ixgbevf_ipsec_add_sa,
+	.xdo_dev_state_delete = ixgbevf_ipsec_del_sa,
+	.xdo_dev_offload_ok = ixgbevf_ipsec_offload_ok,
+};
+
+/**
+ * ixgbevf_ipsec_tx - setup Tx flags for IPsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	struct sec_path *sp;
+	struct tx_sa *tsa;
+	u16 sa_idx;
+
+	sp = skb_sec_path(first->skb);
+	if (unlikely(!sp->len)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+			   __func__, sp->len);
+		return 0;
+	}
+
+	xs = xfrm_input_state(first->skb);
+	if (unlikely(!xs)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+			   __func__, xs);
+		return 0;
+	}
+
+	sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+	if (unlikely(sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT)) {
+		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+			   __func__, sa_idx, xs->xso.offload_handle);
+		return 0;
+	}
+
+	tsa = &ipsec->tx_tbl[sa_idx];
+	if (unlikely(!tsa->used)) {
+		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+			   __func__, sa_idx);
+		return 0;
+	}
+
+	itd->pfsa = tsa->pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+
+	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CSUM;
+
+	if (xs->id.proto == IPPROTO_ESP) {
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		if (first->protocol == htons(ETH_P_IP))
+			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
+		 */
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
+
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
+	}
+	if (tsa->encrypt)
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+	return 1;
+}
+
+/**
+ * ixgbevf_ipsec_rx - decode IPsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an IPsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(rx_ring->netdev);
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+	__le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+					     IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_offload *xo = NULL;
+	struct xfrm_state *xs = NULL;
+	struct ipv6hdr *ip6 = NULL;
+	struct iphdr *ip4 = NULL;
+	struct sec_path *sp;
+	void *daddr;
+	__be32 spi;
+	u8 *c_hdr;
+	u8 proto;
+
+	/* Find the IP and crypto headers in the data.
+	 * We can assume no VLAN header in the way, b/c the
+	 * hw won't recognize the IPsec packet and anyway the
+	 * currently VLAN device doesn't support xfrm offload.
+	 */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+		ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+		daddr = &ip4->daddr;
+		c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+	} else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+		ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+		daddr = &ip6->daddr;
+		c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+	} else {
+		return;
+	}
+
+	switch (pkt_info & ipsec_pkt_types) {
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+		spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+		proto = IPPROTO_AH;
+		break;
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+		spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+		proto = IPPROTO_ESP;
+		break;
+	default:
+		return;
+	}
+
+	xs = ixgbevf_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+	if (unlikely(!xs))
+		return;
+
+	sp = secpath_set(skb);
+	if (unlikely(!sp))
+		return;
+
+	sp->xvec[sp->len++] = xs;
+	sp->olen++;
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	xo->status = CRYPTO_SUCCESS;
+
+	adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbevf_init_ipsec_offload - initialize registers for IPsec operation
+ * @adapter: board private structure
+ **/
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec;
+	size_t size;
+
+	switch (adapter->hw.api_version) {
+	case ixgbe_mbox_api_14:
+		break;
+	default:
+		return;
+	}
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		goto err1;
+	hash_init(ipsec->rx_sa_list);
+
+	size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->rx_tbl)
+		goto err2;
+
+	size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->tx_tbl)
+		goto err2;
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
+
+	adapter->ipsec = ipsec;
+
+	adapter->netdev->xfrmdev_ops = &ixgbevf_xfrmdev_ops;
+
+#define IXGBEVF_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	adapter->netdev->features |= IXGBEVF_ESP_FEATURES;
+	adapter->netdev->hw_enc_features |= IXGBEVF_ESP_FEATURES;
+
+	return;
+
+err2:
+	kfree(ipsec->rx_tbl);
+	kfree(ipsec->tx_tbl);
+	kfree(ipsec);
+err1:
+	netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbevf_stop_ipsec_offload - tear down the IPsec offload
+ * @adapter: board private structure
+ **/
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+
+	adapter->ipsec = NULL;
+	if (ipsec) {
+		kfree(ipsec->rx_tbl);
+		kfree(ipsec->tx_tbl);
+		kfree(ipsec);
+	}
+}

diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.h b/drivers/net/ethernet/intel/ixgbevf/ipsec.h
new file mode 100644
index 0000000..3740725
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.h

@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#ifndef _IXGBEVF_IPSEC_H_
+#define _IXGBEVF_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT	1024
+#define IXGBE_IPSEC_BASE_RX_INDEX	0
+#define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
+
+#define IXGBE_RXMOD_VALID		0x00000001
+#define IXGBE_RXMOD_PROTO_ESP		0x00000004
+#define IXGBE_RXMOD_DECRYPT		0x00000008
+#define IXGBE_RXMOD_IPV6		0x00000010
+
+struct rx_sa {
+	struct hlist_node hlist;
+	struct xfrm_state *xs;
+	__be32 ipaddr[4];
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	u32 pfsa;
+	bool used;
+	bool decrypt;
+};
+
+struct rx_ip_sa {
+	__be32 ipaddr[4];
+	u32 ref_cnt;
+	bool used;
+};
+
+struct tx_sa {
+	struct xfrm_state *xs;
+	u32 key[4];
+	u32 salt;
+	u32 pfsa;
+	bool encrypt;
+	bool used;
+};
+
+struct ixgbevf_ipsec_tx_data {
+	u32 flags;
+	u16 trailer_len;
+	u16 pfsa;
+};
+
+struct ixgbevf_ipsec {
+	u16 num_rx_sa;
+	u16 num_tx_sa;
+	struct rx_sa *rx_tbl;
+	struct tx_sa *tx_tbl;
+	DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 flags;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
+#endif /* _IXGBEVF_IPSEC_H_ */

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 56a1031..ecab686 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h

@@ -14,6 +14,7 @@
 #include <net/xdp.h>
 
 #include "vf.h"
+#include "ipsec.h"
 
 #define IXGBE_MAX_TXD_PWR	14
 #define IXGBE_MAX_DATA_PER_TXD	BIT(IXGBE_MAX_TXD_PWR)
@@ -163,6 +164,7 @@
 #define IXGBE_TX_FLAGS_VLAN		BIT(1)
 #define IXGBE_TX_FLAGS_TSO		BIT(2)
 #define IXGBE_TX_FLAGS_IPV4		BIT(3)
+#define IXGBE_TX_FLAGS_IPSEC		BIT(4)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
@@ -338,6 +340,7 @@
 	struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
 	u64 restart_queue;
 	u32 tx_timeout_count;
+	u64 tx_ipsec;
 
 	/* RX */
 	int num_rx_queues;
@@ -348,6 +351,7 @@
 	u64 alloc_rx_page_failed;
 	u64 alloc_rx_buff_failed;
 	u64 alloc_rx_page;
+	u64 rx_ipsec;
 
 	struct msix_entry *msix_entries;
 
@@ -384,6 +388,10 @@
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
 	u32 flags;
 #define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
+
+#ifdef CONFIG_XFRM
+	struct ixgbevf_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
 };
 
 enum ixbgevf_state_t {
@@ -451,6 +459,31 @@
 
 extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 
+#ifdef CONFIG_IXGBEVF_IPSEC
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb);
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd);
+#else
+static inline void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter) { }
+static inline void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+				    union ixgbe_adv_rx_desc *rx_desc,
+				    struct sk_buff *skb) { }
+static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+				   struct ixgbevf_tx_buffer *first,
+				   struct ixgbevf_ipsec_tx_data *itd)
+{ return 0; }
+#endif /* CONFIG_IXGBEVF_IPSEC */
+
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
 

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4093a9c..076f2da 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

@@ -30,6 +30,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/atomic.h>
+#include <net/xfrm.h>
 
 #include "ixgbevf.h"
 
@@ -40,7 +41,7 @@
 #define DRV_VERSION "4.1.0-k"
 const char ixgbevf_driver_version[] = DRV_VERSION;
 static char ixgbevf_copyright[] =
-	"Copyright (c) 2009 - 2015 Intel Corporation.";
+	"Copyright (c) 2009 - 2018 Intel Corporation.";
 
 static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
 	[board_82599_vf]	= &ixgbevf_82599_vf_info,
@@ -79,7 +80,7 @@
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
@@ -268,7 +269,7 @@
 	struct ixgbevf_adapter *adapter = q_vector->adapter;
 	struct ixgbevf_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
-	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
 	unsigned int budget = tx_ring->count / 2;
 	unsigned int i = tx_ring->next_to_clean;
 
@@ -299,6 +300,8 @@
 		/* update the statistics for this packet */
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+			total_ipsec++;
 
 		/* free the skb */
 		if (ring_is_xdp(tx_ring))
@@ -361,6 +364,7 @@
 	u64_stats_update_end(&tx_ring->syncp);
 	q_vector->tx.total_bytes += total_bytes;
 	q_vector->tx.total_packets += total_packets;
+	adapter->tx_ipsec += total_ipsec;
 
 	if (check_for_tx_hang(tx_ring) && ixgbevf_check_tx_hang(tx_ring)) {
 		struct ixgbe_hw *hw = &adapter->hw;
@@ -516,6 +520,9 @@
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
 	}
 
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+		ixgbevf_ipsec_rx(rx_ring, rx_desc, skb);
+
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
@@ -889,7 +896,8 @@
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > IXGBEVF_RX_HDR_SIZE)
-		headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
+		headlen = eth_get_headlen(skb->dev, xdp->data,
+					  IXGBEVF_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
 	memcpy(__skb_put(skb, headlen), xdp->data,
@@ -1012,7 +1020,7 @@
 		context_desc = IXGBEVF_TX_CTXTDESC(ring, 0);
 		context_desc->vlan_macip_lens	=
 			cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT);
-		context_desc->seqnum_seed	= 0;
+		context_desc->fceof_saidx	= 0;
 		context_desc->type_tucmd_mlhl	=
 			cpu_to_le32(IXGBE_TXD_CMD_DEXT |
 				    IXGBE_ADVTXD_DTYP_CTXT);
@@ -1287,16 +1295,20 @@
 	/* If all work not completed, return budget and keep polling */
 	if (!clean_complete)
 		return budget;
-	/* all work done, exit the polling mode */
-	napi_complete_done(napi, work_done);
-	if (adapter->rx_itr_setting == 1)
-		ixgbevf_set_itr(q_vector);
-	if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
-	    !test_bit(__IXGBEVF_REMOVING, &adapter->state))
-		ixgbevf_irq_enable_queues(adapter,
-					  BIT(q_vector->v_idx));
 
-	return 0;
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->rx_itr_setting == 1)
+			ixgbevf_set_itr(q_vector);
+		if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
+		    !test_bit(__IXGBEVF_REMOVING, &adapter->state))
+			ixgbevf_irq_enable_queues(adapter,
+						  BIT(q_vector->v_idx));
+	}
+
+	return min(work_done, budget - 1);
 }
 
 /**
@@ -1412,6 +1424,9 @@
 	 */
 	/* what was last interrupt timeslice? */
 	timepassed_us = q_vector->itr >> 2;
+	if (timepassed_us == 0)
+		return;
+
 	bytes_perint = bytes / timepassed_us; /* bytes/usec */
 
 	switch (itr_setting) {
@@ -2200,6 +2215,7 @@
 	ixgbevf_set_rx_mode(adapter->netdev);
 
 	ixgbevf_restore_vlan(adapter);
+	ixgbevf_ipsec_restore(adapter);
 
 	ixgbevf_configure_tx(adapter);
 	ixgbevf_configure_rx(adapter);
@@ -2246,11 +2262,14 @@
 static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int api[] = { ixgbe_mbox_api_13,
-		      ixgbe_mbox_api_12,
-		      ixgbe_mbox_api_11,
-		      ixgbe_mbox_api_10,
-		      ixgbe_mbox_api_unknown };
+	static const int api[] = {
+		ixgbe_mbox_api_14,
+		ixgbe_mbox_api_13,
+		ixgbe_mbox_api_12,
+		ixgbe_mbox_api_11,
+		ixgbe_mbox_api_10,
+		ixgbe_mbox_api_unknown
+	};
 	int err, idx = 0;
 
 	spin_lock_bh(&adapter->mbx_lock);
@@ -2501,6 +2520,7 @@
 		msleep(1);
 
 	ixgbevf_down(adapter);
+	pci_set_master(adapter->pdev);
 	ixgbevf_up(adapter);
 
 	clear_bit(__IXGBEVF_RESETTING, &adapter->state);
@@ -2605,6 +2625,7 @@
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
 			if (adapter->xdp_prog &&
 			    hw->mac.max_tx_queues == rss)
 				rss = rss > 3 ? 2 : 1;
@@ -3700,8 +3721,8 @@
 }
 
 static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring,
-				u32 vlan_macip_lens, u32 type_tucmd,
-				u32 mss_l4len_idx)
+				u32 vlan_macip_lens, u32 fceof_saidx,
+				u32 type_tucmd, u32 mss_l4len_idx)
 {
 	struct ixgbe_adv_tx_context_desc *context_desc;
 	u16 i = tx_ring->next_to_use;
@@ -3715,14 +3736,15 @@
 	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
 
 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
-	context_desc->seqnum_seed	= 0;
+	context_desc->fceof_saidx	= cpu_to_le32(fceof_saidx);
 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
 }
 
 static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 		       struct ixgbevf_tx_buffer *first,
-		       u8 *hdr_len)
+		       u8 *hdr_len,
+		       struct ixgbevf_ipsec_tx_data *itd)
 {
 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
 	struct sk_buff *skb = first->skb;
@@ -3736,6 +3758,7 @@
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
 	int err;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3761,13 +3784,15 @@
 	if (ip.v4->version == 4) {
 		unsigned char *csum_start = skb_checksum_start(skb);
 		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
 
 		/* IP header will have to cancel out any data that
-		 * is not a part of the outer IP header
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
 		 */
-		ip.v4->check = csum_fold(csum_partial(trans_start,
-						      csum_start - trans_start,
-						      0));
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -3799,13 +3824,16 @@
 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
 	mss_l4len_idx |= (1u << IXGBE_ADVTXD_IDX_SHIFT);
 
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
 	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
 	vlan_macip_lens = l4.hdr - ip.hdr;
 	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens,
-			    type_tucmd, mss_l4len_idx);
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
+			    mss_l4len_idx);
 
 	return 1;
 }
@@ -3820,10 +3848,12 @@
 }
 
 static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
-			    struct ixgbevf_tx_buffer *first)
+			    struct ixgbevf_tx_buffer *first,
+			    struct ixgbevf_ipsec_tx_data *itd)
 {
 	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens = 0;
+	u32 fceof_saidx = 0;
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3862,7 +3892,11 @@
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens,
+			    fceof_saidx, type_tucmd, 0);
 }
 
 static __le32 ixgbevf_tx_cmd_type(u32 tx_flags)
@@ -3896,8 +3930,12 @@
 	if (tx_flags & IXGBE_TX_FLAGS_IPV4)
 		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM);
 
-	/* use index 1 context for TSO/FSO/FCOE */
-	if (tx_flags & IXGBE_TX_FLAGS_TSO)
+	/* enable IPsec */
+	if (tx_flags & IXGBE_TX_FLAGS_IPSEC)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IPSEC);
+
+	/* use index 1 context for TSO/FSO/FCOE/IPSEC */
+	if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_IPSEC))
 		olinfo_status |= cpu_to_le32(1u << IXGBE_ADVTXD_IDX_SHIFT);
 
 	/* Check Context must be set if Tx switch is enabled, which it
@@ -3915,7 +3953,7 @@
 	struct sk_buff *skb = first->skb;
 	struct ixgbevf_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	dma_addr_t dma;
 	unsigned int data_len, size;
 	u32 tx_flags = first->tx_flags;
@@ -3990,6 +4028,8 @@
 	/* set the timestamp */
 	first->time_stamp = jiffies;
 
+	skb_tx_timestamp(skb);
+
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
 	 * memory model archs, such as IA-64).
@@ -4079,6 +4119,7 @@
 	int tso;
 	u32 tx_flags = 0;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct ixgbevf_ipsec_tx_data ipsec_tx = { 0 };
 #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
 	unsigned short f;
 #endif
@@ -4097,8 +4138,11 @@
 	 * otherwise try next time
 	 */
 #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
-	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
-		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		count += TXD_USE_COUNT(skb_frag_size(frag));
+	}
 #else
 	count += skb_shinfo(skb)->nr_frags;
 #endif
@@ -4123,11 +4167,15 @@
 	first->tx_flags = tx_flags;
 	first->protocol = vlan_get_protocol(skb);
 
-	tso = ixgbevf_tso(tx_ring, first, &hdr_len);
+#ifdef CONFIG_IXGBEVF_IPSEC
+	if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
+		goto out_drop;
+#endif
+	tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
-		ixgbevf_tx_csum(tx_ring, first);
+		ixgbevf_tx_csum(tx_ring, first, &ipsec_tx);
 
 	ixgbevf_tx_map(tx_ring, first, hdr_len);
 
@@ -4617,6 +4665,7 @@
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
 				  (ETH_HLEN + ETH_FCS_LEN);
 		break;
@@ -4652,6 +4701,7 @@
 
 	pci_set_drvdata(pdev, netdev);
 	netif_carrier_off(netdev);
+	ixgbevf_init_ipsec_offload(adapter);
 
 	ixgbevf_init_last_counter_stats(adapter);
 
@@ -4718,6 +4768,7 @@
 	if (netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(netdev);
 
+	ixgbevf_stop_ipsec_offload(adapter);
 	ixgbevf_clear_interrupt_scheme(adapter);
 	ixgbevf_reset_interrupt_capability(adapter);
 

diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index bfd9ae1..853796c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h

@@ -62,6 +62,7 @@
 	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
 	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
 	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
 	/* This value should always be last */
 	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
 };
@@ -92,6 +93,10 @@
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN	4
 /* word in permanent address message with the current multicast type */

diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index bf0577e..d5ce496 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c

@@ -309,6 +309,7 @@
 	 * is not supported for this device type.
 	 */
 	switch (hw->api_version) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		if (hw->mac.type < ixgbe_mac_X550_vf)
@@ -376,6 +377,7 @@
 	 * or if the operation is not supported for this device type.
 	 */
 	switch (hw->api_version) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		if (hw->mac.type < ixgbe_mac_X550_vf)
@@ -506,9 +508,8 @@
 		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
-	ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, IXGBE_VFMAILBOX_SIZE);
-
-	return 0;
+	return ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+			IXGBE_VFMAILBOX_SIZE);
 }
 
 /**
@@ -540,6 +541,7 @@
 		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
 			return -EOPNOTSUPP;
 		/* Fall threw */
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 		break;
 	default:
@@ -890,6 +892,7 @@
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return 0;

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index a5ab6f3..25aa400 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * JMicron JMC2x0 series PCIe Ethernet Linux Device Driver
  *
@@ -6,20 +7,6 @@
  * Copyright (c) 2009 - 2010 Guo-Fu Tseng <cooldavid@cooldavid.org>
  *
  * Author: Guo-Fu Tseng <cooldavid@cooldavid.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -27,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -595,11 +581,6 @@
 	if (unlikely(!(txring->bufinf)))
 		goto err_free_txring;
 
-	/*
-	 * Initialize Transmit Descriptors
-	 */
-	memset(txring->alloc, 0, TX_RING_ALLOC_SIZE(jme->tx_ring_size));
-
 	return 0;
 
 err_free_txring:
@@ -2034,10 +2015,9 @@
 				ctxbi->len,
 				PCI_DMA_TODEVICE);
 
-				ctxbi->mapping = 0;
-				ctxbi->len = 0;
+		ctxbi->mapping = 0;
+		ctxbi->len = 0;
 	}
-
 }
 
 static int
@@ -2049,23 +2029,22 @@
 	bool hidma = jme->dev->features & NETIF_F_HIGHDMA;
 	int i, nr_frags = skb_shinfo(skb)->nr_frags;
 	int mask = jme->tx_ring_mask;
-	const struct skb_frag_struct *frag;
 	u32 len;
 	int ret = 0;
 
 	for (i = 0 ; i < nr_frags ; ++i) {
-		frag = &skb_shinfo(skb)->frags[i];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
 		ctxdesc = txdesc + ((idx + i + 2) & (mask));
 		ctxbi = txbi + ((idx + i + 2) & (mask));
 
 		ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi,
-				skb_frag_page(frag),
-				frag->page_offset, skb_frag_size(frag), hidma);
+				      skb_frag_page(frag), skb_frag_off(frag),
+				      skb_frag_size(frag), hidma);
 		if (ret) {
 			jme_drop_tx_map(jme, idx, i);
 			goto out;
 		}
-
 	}
 
 	len = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
@@ -3212,8 +3191,7 @@
 static int
 jme_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct jme_adapter *jme = netdev_priv(netdev);
 
 	if (!netif_running(netdev))
@@ -3255,8 +3233,7 @@
 static int
 jme_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct jme_adapter *jme = netdev_priv(netdev);
 
 	if (!netif_running(netdev))

diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h
index 89535c0..2bba5ce 100644
--- a/drivers/net/ethernet/jme.h
+++ b/drivers/net/ethernet/jme.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * JMicron JMC2x0 series PCIe Ethernet Linux Device Driver
  *
@@ -6,20 +7,6 @@
  * Copyright (c) 2009 - 2010 Guo-Fu Tseng <cooldavid@cooldavid.org>
  *
  * Author: Guo-Fu Tseng <cooldavid@cooldavid.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
 
 #ifndef __JME_H_INCLUDED__

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index e08301d..6e73ffe 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c

@@ -1,15 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- *   This program is free software; you can redistribute it and/or modify it
- *   under the terms of the GNU General Public License version 2 as published
- *   by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, see <http://www.gnu.org/licenses/>.
  *
  *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
  */
@@ -112,10 +102,12 @@
 static int
 ltq_etop_alloc_skb(struct ltq_etop_chan *ch)
 {
+	struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+
 	ch->skb[ch->dma.desc] = netdev_alloc_skb(ch->netdev, MAX_DMA_DATA_LEN);
 	if (!ch->skb[ch->dma.desc])
 		return -ENOMEM;
-	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL,
+	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(&priv->pdev->dev,
 		ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN,
 		DMA_FROM_DEVICE);
 	ch->dma.desc_base[ch->dma.desc].addr =
@@ -365,15 +357,8 @@
 		return PTR_ERR(phydev);
 	}
 
-	phydev->supported &= (SUPPORTED_10baseT_Half
-			      | SUPPORTED_10baseT_Full
-			      | SUPPORTED_100baseT_Half
-			      | SUPPORTED_100baseT_Full
-			      | SUPPORTED_Autoneg
-			      | SUPPORTED_MII
-			      | SUPPORTED_TP);
+	phy_set_max_speed(phydev, SPEED_100);
 
-	phydev->advertising = phydev->supported;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -439,6 +424,7 @@
 		if (!IS_TX(i) && (!IS_RX(i)))
 			continue;
 		ltq_dma_open(&ch->dma);
+		ltq_dma_enable_irq(&ch->dma);
 		napi_enable(&ch->napi);
 	}
 	phy_start(dev->phydev);
@@ -493,7 +479,7 @@
 	netif_trans_update(dev);
 
 	spin_lock_irqsave(&priv->lock, flags);
-	desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len,
+	desc->addr = ((unsigned int) dma_map_single(&priv->pdev->dev, skb->data, len,
 						DMA_TO_DEVICE)) - byte_offset;
 	wmb();
 	desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
new file mode 100644
index 0000000..900affb
--- /dev/null
+++ b/drivers/net/ethernet/lantiq_xrx200.c

@@ -0,0 +1,561 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lantiq / Intel PMAC driver for XRX200 SoCs
+ *
+ * Copyright (C) 2010 Lantiq Deutschland
+ * Copyright (C) 2012 John Crispin <john@phrozen.org>
+ * Copyright (C) 2017 - 2018 Hauke Mehrtens <hauke@hauke-m.de>
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+
+#include <xway_dma.h>
+
+/* DMA */
+#define XRX200_DMA_DATA_LEN	0x600
+#define XRX200_DMA_RX		0
+#define XRX200_DMA_TX		1
+
+/* cpu port mac */
+#define PMAC_RX_IPG		0x0024
+#define PMAC_RX_IPG_MASK	0xf
+
+#define PMAC_HD_CTL		0x0000
+/* Add Ethernet header to packets from DMA to PMAC */
+#define PMAC_HD_CTL_ADD		BIT(0)
+/* Add VLAN tag to Packets from DMA to PMAC */
+#define PMAC_HD_CTL_TAG		BIT(1)
+/* Add CRC to packets from DMA to PMAC */
+#define PMAC_HD_CTL_AC		BIT(2)
+/* Add status header to packets from PMAC to DMA */
+#define PMAC_HD_CTL_AS		BIT(3)
+/* Remove CRC from packets from PMAC to DMA */
+#define PMAC_HD_CTL_RC		BIT(4)
+/* Remove Layer-2 header from packets from PMAC to DMA */
+#define PMAC_HD_CTL_RL2		BIT(5)
+/* Status header is present from DMA to PMAC */
+#define PMAC_HD_CTL_RXSH	BIT(6)
+/* Add special tag from PMAC to switch */
+#define PMAC_HD_CTL_AST		BIT(7)
+/* Remove specail Tag from PMAC to DMA */
+#define PMAC_HD_CTL_RST		BIT(8)
+/* Check CRC from DMA to PMAC */
+#define PMAC_HD_CTL_CCRC	BIT(9)
+/* Enable reaction to Pause frames in the PMAC */
+#define PMAC_HD_CTL_FC		BIT(10)
+
+struct xrx200_chan {
+	int tx_free;
+
+	struct napi_struct napi;
+	struct ltq_dma_channel dma;
+	struct sk_buff *skb[LTQ_DESC_NUM];
+
+	struct xrx200_priv *priv;
+};
+
+struct xrx200_priv {
+	struct clk *clk;
+
+	struct xrx200_chan chan_tx;
+	struct xrx200_chan chan_rx;
+
+	struct net_device *net_dev;
+	struct device *dev;
+
+	__iomem void *pmac_reg;
+};
+
+static u32 xrx200_pmac_r32(struct xrx200_priv *priv, u32 offset)
+{
+	return __raw_readl(priv->pmac_reg + offset);
+}
+
+static void xrx200_pmac_w32(struct xrx200_priv *priv, u32 val, u32 offset)
+{
+	__raw_writel(val, priv->pmac_reg + offset);
+}
+
+static void xrx200_pmac_mask(struct xrx200_priv *priv, u32 clear, u32 set,
+			     u32 offset)
+{
+	u32 val = xrx200_pmac_r32(priv, offset);
+
+	val &= ~(clear);
+	val |= set;
+	xrx200_pmac_w32(priv, val, offset);
+}
+
+/* drop all the packets from the DMA ring */
+static void xrx200_flush_dma(struct xrx200_chan *ch)
+{
+	int i;
+
+	for (i = 0; i < LTQ_DESC_NUM; i++) {
+		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+
+		if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) != LTQ_DMA_C)
+			break;
+
+		desc->ctl = LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
+			    XRX200_DMA_DATA_LEN;
+		ch->dma.desc++;
+		ch->dma.desc %= LTQ_DESC_NUM;
+	}
+}
+
+static int xrx200_open(struct net_device *net_dev)
+{
+	struct xrx200_priv *priv = netdev_priv(net_dev);
+
+	napi_enable(&priv->chan_tx.napi);
+	ltq_dma_open(&priv->chan_tx.dma);
+	ltq_dma_enable_irq(&priv->chan_tx.dma);
+
+	napi_enable(&priv->chan_rx.napi);
+	ltq_dma_open(&priv->chan_rx.dma);
+	/* The boot loader does not always deactivate the receiving of frames
+	 * on the ports and then some packets queue up in the PPE buffers.
+	 * They already passed the PMAC so they do not have the tags
+	 * configured here. Read the these packets here and drop them.
+	 * The HW should have written them into memory after 10us
+	 */
+	usleep_range(20, 40);
+	xrx200_flush_dma(&priv->chan_rx);
+	ltq_dma_enable_irq(&priv->chan_rx.dma);
+
+	netif_wake_queue(net_dev);
+
+	return 0;
+}
+
+static int xrx200_close(struct net_device *net_dev)
+{
+	struct xrx200_priv *priv = netdev_priv(net_dev);
+
+	netif_stop_queue(net_dev);
+
+	napi_disable(&priv->chan_rx.napi);
+	ltq_dma_close(&priv->chan_rx.dma);
+
+	napi_disable(&priv->chan_tx.napi);
+	ltq_dma_close(&priv->chan_tx.dma);
+
+	return 0;
+}
+
+static int xrx200_alloc_skb(struct xrx200_chan *ch)
+{
+	int ret = 0;
+
+	ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev,
+							  XRX200_DMA_DATA_LEN);
+	if (!ch->skb[ch->dma.desc]) {
+		ret = -ENOMEM;
+		goto skip;
+	}
+
+	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(ch->priv->dev,
+			ch->skb[ch->dma.desc]->data, XRX200_DMA_DATA_LEN,
+			DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(ch->priv->dev,
+				       ch->dma.desc_base[ch->dma.desc].addr))) {
+		dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+		ret = -ENOMEM;
+		goto skip;
+	}
+
+skip:
+	ch->dma.desc_base[ch->dma.desc].ctl =
+		LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
+		XRX200_DMA_DATA_LEN;
+
+	return ret;
+}
+
+static int xrx200_hw_receive(struct xrx200_chan *ch)
+{
+	struct xrx200_priv *priv = ch->priv;
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	struct sk_buff *skb = ch->skb[ch->dma.desc];
+	int len = (desc->ctl & LTQ_DMA_SIZE_MASK);
+	struct net_device *net_dev = priv->net_dev;
+	int ret;
+
+	ret = xrx200_alloc_skb(ch);
+
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+
+	if (ret) {
+		netdev_err(net_dev, "failed to allocate new rx buffer\n");
+		return ret;
+	}
+
+	skb_put(skb, len);
+	skb->protocol = eth_type_trans(skb, net_dev);
+	netif_receive_skb(skb);
+	net_dev->stats.rx_packets++;
+	net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
+
+	return 0;
+}
+
+static int xrx200_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct xrx200_chan *ch = container_of(napi,
+				struct xrx200_chan, napi);
+	int rx = 0;
+	int ret;
+
+	while (rx < budget) {
+		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+
+		if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+			ret = xrx200_hw_receive(ch);
+			if (ret)
+				return ret;
+			rx++;
+		} else {
+			break;
+		}
+	}
+
+	if (rx < budget) {
+		napi_complete(&ch->napi);
+		ltq_dma_enable_irq(&ch->dma);
+	}
+
+	return rx;
+}
+
+static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget)
+{
+	struct xrx200_chan *ch = container_of(napi,
+				struct xrx200_chan, napi);
+	struct net_device *net_dev = ch->priv->net_dev;
+	int pkts = 0;
+	int bytes = 0;
+
+	while (pkts < budget) {
+		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free];
+
+		if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+			struct sk_buff *skb = ch->skb[ch->tx_free];
+
+			pkts++;
+			bytes += skb->len;
+			ch->skb[ch->tx_free] = NULL;
+			consume_skb(skb);
+			memset(&ch->dma.desc_base[ch->tx_free], 0,
+			       sizeof(struct ltq_dma_desc));
+			ch->tx_free++;
+			ch->tx_free %= LTQ_DESC_NUM;
+		} else {
+			break;
+		}
+	}
+
+	net_dev->stats.tx_packets += pkts;
+	net_dev->stats.tx_bytes += bytes;
+	netdev_completed_queue(ch->priv->net_dev, pkts, bytes);
+
+	if (pkts < budget) {
+		napi_complete(&ch->napi);
+		ltq_dma_enable_irq(&ch->dma);
+	}
+
+	return pkts;
+}
+
+static int xrx200_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+{
+	struct xrx200_priv *priv = netdev_priv(net_dev);
+	struct xrx200_chan *ch = &priv->chan_tx;
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	u32 byte_offset;
+	dma_addr_t mapping;
+	int len;
+
+	skb->dev = net_dev;
+	if (skb_put_padto(skb, ETH_ZLEN)) {
+		net_dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	len = skb->len;
+
+	if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) {
+		netdev_err(net_dev, "tx ring full\n");
+		netif_stop_queue(net_dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	ch->skb[ch->dma.desc] = skb;
+
+	mapping = dma_map_single(priv->dev, skb->data, len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, mapping)))
+		goto err_drop;
+
+	/* dma needs to start on a 16 byte aligned address */
+	byte_offset = mapping % 16;
+
+	desc->addr = mapping - byte_offset;
+	/* Make sure the address is written before we give it to HW */
+	wmb();
+	desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |
+		LTQ_DMA_TX_OFFSET(byte_offset) | (len & LTQ_DMA_SIZE_MASK);
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+	if (ch->dma.desc == ch->tx_free)
+		netif_stop_queue(net_dev);
+
+	netdev_sent_queue(net_dev, len);
+
+	return NETDEV_TX_OK;
+
+err_drop:
+	dev_kfree_skb(skb);
+	net_dev->stats.tx_dropped++;
+	net_dev->stats.tx_errors++;
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops xrx200_netdev_ops = {
+	.ndo_open		= xrx200_open,
+	.ndo_stop		= xrx200_close,
+	.ndo_start_xmit		= xrx200_start_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
+{
+	struct xrx200_chan *ch = ptr;
+
+	ltq_dma_disable_irq(&ch->dma);
+	ltq_dma_ack_irq(&ch->dma);
+
+	napi_schedule(&ch->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int xrx200_dma_init(struct xrx200_priv *priv)
+{
+	struct xrx200_chan *ch_rx = &priv->chan_rx;
+	struct xrx200_chan *ch_tx = &priv->chan_tx;
+	int ret = 0;
+	int i;
+
+	ltq_dma_init_port(DMA_PORT_ETOP);
+
+	ch_rx->dma.nr = XRX200_DMA_RX;
+	ch_rx->dma.dev = priv->dev;
+	ch_rx->priv = priv;
+
+	ltq_dma_alloc_rx(&ch_rx->dma);
+	for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
+	     ch_rx->dma.desc++) {
+		ret = xrx200_alloc_skb(ch_rx);
+		if (ret)
+			goto rx_free;
+	}
+	ch_rx->dma.desc = 0;
+	ret = devm_request_irq(priv->dev, ch_rx->dma.irq, xrx200_dma_irq, 0,
+			       "xrx200_net_rx", &priv->chan_rx);
+	if (ret) {
+		dev_err(priv->dev, "failed to request RX irq %d\n",
+			ch_rx->dma.irq);
+		goto rx_ring_free;
+	}
+
+	ch_tx->dma.nr = XRX200_DMA_TX;
+	ch_tx->dma.dev = priv->dev;
+	ch_tx->priv = priv;
+
+	ltq_dma_alloc_tx(&ch_tx->dma);
+	ret = devm_request_irq(priv->dev, ch_tx->dma.irq, xrx200_dma_irq, 0,
+			       "xrx200_net_tx", &priv->chan_tx);
+	if (ret) {
+		dev_err(priv->dev, "failed to request TX irq %d\n",
+			ch_tx->dma.irq);
+		goto tx_free;
+	}
+
+	return ret;
+
+tx_free:
+	ltq_dma_free(&ch_tx->dma);
+
+rx_ring_free:
+	/* free the allocated RX ring */
+	for (i = 0; i < LTQ_DESC_NUM; i++) {
+		if (priv->chan_rx.skb[i])
+			dev_kfree_skb_any(priv->chan_rx.skb[i]);
+	}
+
+rx_free:
+	ltq_dma_free(&ch_rx->dma);
+	return ret;
+}
+
+static void xrx200_hw_cleanup(struct xrx200_priv *priv)
+{
+	int i;
+
+	ltq_dma_free(&priv->chan_tx.dma);
+	ltq_dma_free(&priv->chan_rx.dma);
+
+	/* free the allocated RX ring */
+	for (i = 0; i < LTQ_DESC_NUM; i++)
+		dev_kfree_skb_any(priv->chan_rx.skb[i]);
+}
+
+static int xrx200_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	struct xrx200_priv *priv;
+	struct net_device *net_dev;
+	const u8 *mac;
+	int err;
+
+	/* alloc the network device */
+	net_dev = devm_alloc_etherdev(dev, sizeof(struct xrx200_priv));
+	if (!net_dev)
+		return -ENOMEM;
+
+	priv = netdev_priv(net_dev);
+	priv->net_dev = net_dev;
+	priv->dev = dev;
+
+	net_dev->netdev_ops = &xrx200_netdev_ops;
+	SET_NETDEV_DEV(net_dev, dev);
+	net_dev->min_mtu = ETH_ZLEN;
+	net_dev->max_mtu = XRX200_DMA_DATA_LEN;
+
+	/* load the memory ranges */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to get resources\n");
+		return -ENOENT;
+	}
+
+	priv->pmac_reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->pmac_reg)) {
+		dev_err(dev, "failed to request and remap io ranges\n");
+		return PTR_ERR(priv->pmac_reg);
+	}
+
+	priv->chan_rx.dma.irq = platform_get_irq_byname(pdev, "rx");
+	if (priv->chan_rx.dma.irq < 0)
+		return -ENOENT;
+	priv->chan_tx.dma.irq = platform_get_irq_byname(pdev, "tx");
+	if (priv->chan_tx.dma.irq < 0)
+		return -ENOENT;
+
+	/* get the clock */
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "failed to get clock\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	mac = of_get_mac_address(np);
+	if (!IS_ERR(mac))
+		ether_addr_copy(net_dev->dev_addr, mac);
+	else
+		eth_hw_addr_random(net_dev);
+
+	/* bring up the dma engine and IP core */
+	err = xrx200_dma_init(priv);
+	if (err)
+		return err;
+
+	/* enable clock gate */
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		goto err_uninit_dma;
+
+	/* set IPG to 12 */
+	xrx200_pmac_mask(priv, PMAC_RX_IPG_MASK, 0xb, PMAC_RX_IPG);
+
+	/* enable status header, enable CRC */
+	xrx200_pmac_mask(priv, 0,
+			 PMAC_HD_CTL_RST | PMAC_HD_CTL_AST | PMAC_HD_CTL_RXSH |
+			 PMAC_HD_CTL_AS | PMAC_HD_CTL_AC | PMAC_HD_CTL_RC,
+			 PMAC_HD_CTL);
+
+	/* setup NAPI */
+	netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32);
+	netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32);
+
+	platform_set_drvdata(pdev, priv);
+
+	err = register_netdev(net_dev);
+	if (err)
+		goto err_unprepare_clk;
+
+	return 0;
+
+err_unprepare_clk:
+	clk_disable_unprepare(priv->clk);
+
+err_uninit_dma:
+	xrx200_hw_cleanup(priv);
+
+	return err;
+}
+
+static int xrx200_remove(struct platform_device *pdev)
+{
+	struct xrx200_priv *priv = platform_get_drvdata(pdev);
+	struct net_device *net_dev = priv->net_dev;
+
+	/* free stack related instances */
+	netif_stop_queue(net_dev);
+	netif_napi_del(&priv->chan_tx.napi);
+	netif_napi_del(&priv->chan_rx.napi);
+
+	/* remove the actual device */
+	unregister_netdev(net_dev);
+
+	/* release the clock */
+	clk_disable_unprepare(priv->clk);
+
+	/* shut down hardware */
+	xrx200_hw_cleanup(priv);
+
+	return 0;
+}
+
+static const struct of_device_id xrx200_match[] = {
+	{ .compatible = "lantiq,xrx200-net" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xrx200_match);
+
+static struct platform_driver xrx200_driver = {
+	.probe = xrx200_probe,
+	.remove = xrx200_remove,
+	.driver = {
+		.name = "lantiq,xrx200-net",
+		.of_match_table = xrx200_match,
+	},
+};
+
+module_platform_driver(xrx200_driver);
+
+MODULE_AUTHOR("John Crispin <john@phrozen.org>");
+MODULE_DESCRIPTION("Lantiq SoC XRX200 ethernet");
+MODULE_LICENSE("GPL");

diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index f33fd22..fb94216 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Marvell device configuration
 #
@@ -167,4 +168,7 @@
 
 	  If unsure, say N.
 
+
+source "drivers/net/ethernet/marvell/octeontx2/Kconfig"
+
 endif # NET_VENDOR_MARVELL

diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile
index 55d4d10..89dea72 100644
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile

@@ -11,3 +11,4 @@
 obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
 obj-$(CONFIG_SKGE) += skge.o
 obj-$(CONFIG_SKY2) += sky2.o
+obj-y		+= octeontx2/

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 62f204f..82ea55a 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Driver for Marvell Discovery (MV643XX) and Marvell Orion ethernet ports
  * Copyright (C) 2002 Matthew Dharm <mdharm@momenco.com>
@@ -21,19 +22,6 @@
  *			   Lennert Buytenhek <buytenh@marvell.com>
  *
  * Copyright (C) 2013 Michael Stapelberg <michael@stapelberg.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -671,7 +659,7 @@
 	for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
 		const skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag];
 
-		if (skb_frag_size(fragp) <= 8 && fragp->page_offset & 7)
+		if (skb_frag_size(fragp) <= 8 && skb_frag_off(fragp) & 7)
 			return 1;
 	}
 
@@ -1499,23 +1487,16 @@
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct net_device *dev = mp->dev;
-	u32 supported, advertising;
 
 	phy_ethtool_ksettings_get(dev->phydev, cmd);
 
 	/*
 	 * The MAC does not support 1000baseT_Half.
 	 */
-	ethtool_convert_link_mode_to_legacy_u32(&supported,
-						cmd->link_modes.supported);
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
-	supported &= ~SUPPORTED_1000baseT_Half;
-	advertising &= ~ADVERTISED_1000baseT_Half;
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+			   cmd->link_modes.supported);
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+			   cmd->link_modes.advertising);
 
 	return 0;
 }
@@ -2733,17 +2714,17 @@
 
 	memset(&res, 0, sizeof(res));
 	if (of_irq_to_resource(pnp, 0, &res) <= 0) {
-		dev_err(&pdev->dev, "missing interrupt on %s\n", pnp->name);
+		dev_err(&pdev->dev, "missing interrupt on %pOFn\n", pnp);
 		return -EINVAL;
 	}
 
 	if (of_property_read_u32(pnp, "reg", &ppd.port_number)) {
-		dev_err(&pdev->dev, "missing reg property on %s\n", pnp->name);
+		dev_err(&pdev->dev, "missing reg property on %pOFn\n", pnp);
 		return -EINVAL;
 	}
 
 	if (ppd.port_number >= 3) {
-		dev_err(&pdev->dev, "invalid reg property on %s\n", pnp->name);
+		dev_err(&pdev->dev, "invalid reg property on %pOFn\n", pnp);
 		return -EINVAL;
 	}
 
@@ -2756,8 +2737,8 @@
 	}
 
 	mac_addr = of_get_mac_address(pnp);
-	if (mac_addr)
-		memcpy(ppd.mac_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(ppd.mac_addr, mac_addr);
 
 	mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size);
 	mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr);
@@ -2886,7 +2867,7 @@
 
 	ret = mv643xx_eth_shared_of_probe(pdev);
 	if (ret)
-		return ret;
+		goto err_put_clk;
 	pd = dev_get_platdata(&pdev->dev);
 
 	msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ?
@@ -2894,6 +2875,11 @@
 	infer_hw_params(msp);
 
 	return 0;
+
+err_put_clk:
+	if (!IS_ERR(msp->clk))
+		clk_disable_unprepare(msp->clk);
+	return ret;
 }
 
 static int mv643xx_eth_shared_remove(struct platform_device *pdev)
@@ -3031,10 +3017,12 @@
 		phy->autoneg = AUTONEG_ENABLE;
 		phy->speed = 0;
 		phy->duplex = 0;
-		phy->advertising = phy->supported | ADVERTISED_Autoneg;
+		linkmode_copy(phy->advertising, phy->supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+				 phy->advertising);
 	} else {
 		phy->autoneg = AUTONEG_DISABLE;
-		phy->advertising = 0;
+		linkmode_zero(phy->advertising);
 		phy->speed = speed;
 		phy->duplex = duplex;
 	}

diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index c5dac6b..0b9e851 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c

@@ -64,7 +64,7 @@
 
 struct orion_mdio_dev {
 	void __iomem *regs;
-	struct clk *clk[3];
+	struct clk *clk[4];
 	/*
 	 * If we have access to the error interrupt pin (which is
 	 * somewhat misnamed as it not only reflects internal errors
@@ -319,13 +319,34 @@
 
 	init_waitqueue_head(&dev->smi_busy_wait);
 
-	for (i = 0; i < ARRAY_SIZE(dev->clk); i++) {
-		dev->clk[i] = of_clk_get(pdev->dev.of_node, i);
-		if (IS_ERR(dev->clk[i]))
-			break;
-		clk_prepare_enable(dev->clk[i]);
+	if (pdev->dev.of_node) {
+		for (i = 0; i < ARRAY_SIZE(dev->clk); i++) {
+			dev->clk[i] = of_clk_get(pdev->dev.of_node, i);
+			if (PTR_ERR(dev->clk[i]) == -EPROBE_DEFER) {
+				ret = -EPROBE_DEFER;
+				goto out_clk;
+			}
+			if (IS_ERR(dev->clk[i]))
+				break;
+			clk_prepare_enable(dev->clk[i]);
+		}
+
+		if (!IS_ERR(of_clk_get(pdev->dev.of_node,
+				       ARRAY_SIZE(dev->clk))))
+			dev_warn(&pdev->dev,
+				 "unsupported number of clocks, limiting to the first "
+				 __stringify(ARRAY_SIZE(dev->clk)) "\n");
+	} else {
+		dev->clk[0] = clk_get(&pdev->dev, NULL);
+		if (PTR_ERR(dev->clk[0]) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto out_clk;
+		}
+		if (!IS_ERR(dev->clk[0]))
+			clk_prepare_enable(dev->clk[0]);
 	}
 
+
 	dev->err_interrupt = platform_get_irq(pdev, 0);
 	if (dev->err_interrupt > 0 &&
 	    resource_size(r) < MVMDIO_ERR_INT_MASK + 4) {
@@ -362,6 +383,7 @@
 	if (dev->err_interrupt > 0)
 		writel(0, dev->regs + MVMDIO_ERR_INT_MASK);
 
+out_clk:
 	for (i = 0; i < ARRAY_SIZE(dev->clk); i++) {
 		if (IS_ERR(dev->clk[i]))
 			break;

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index b4ed7d3..e498206 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c

@@ -27,6 +27,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
+#include <linux/phy/phy.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
 #include <linux/platform_device.h>
@@ -221,6 +222,8 @@
 #define      MVNETA_GMAC_AN_FLOW_CTRL_EN         BIT(11)
 #define      MVNETA_GMAC_CONFIG_FULL_DUPLEX      BIT(12)
 #define      MVNETA_GMAC_AN_DUPLEX_EN            BIT(13)
+#define MVNETA_GMAC_CTRL_4                       0x2c90
+#define      MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE  BIT(1)
 #define MVNETA_MIB_COUNTERS_BASE                 0x3000
 #define      MVNETA_MIB_LATE_COLLISION           0x7c
 #define MVNETA_DA_FILT_SPEC_MCAST                0x3400
@@ -406,7 +409,6 @@
 	struct mvneta_pcpu_stats __percpu	*stats;
 
 	int pkt_size;
-	unsigned int frag_size;
 	void __iomem *base;
 	struct mvneta_rx_queue *rxqs;
 	struct mvneta_tx_queue *txqs;
@@ -435,6 +437,8 @@
 	struct device_node *dn;
 	unsigned int tx_csum_limit;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
+	struct phy *comphy;
 
 	struct mvneta_bm *bm_priv;
 	struct mvneta_bm_pool *pool_long;
@@ -492,7 +496,7 @@
 #if defined(__LITTLE_ENDIAN)
 struct mvneta_tx_desc {
 	u32  command;		/* Options used by HW for packet transmitting.*/
-	u16  reserverd1;	/* csum_l4 (for future use)		*/
+	u16  reserved1;		/* csum_l4 (for future use)		*/
 	u16  data_size;		/* Data size of transmitted packet in bytes */
 	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
 	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
@@ -517,7 +521,7 @@
 #else
 struct mvneta_tx_desc {
 	u16  data_size;		/* Data size of transmitted packet in bytes */
-	u16  reserverd1;	/* csum_l4 (for future use)		*/
+	u16  reserved1;		/* csum_l4 (for future use)		*/
 	u32  command;		/* Options used by HW for packet transmitting.*/
 	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
 	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
@@ -1115,7 +1119,7 @@
 			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
 
 	/* Fill entire long pool */
-	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size);
 	if (num != hwbm_pool->size) {
 		WARN(1, "pool %d: %d of %d allocated\n",
 		     bm_pool->id, num, hwbm_pool->size);
@@ -2064,10 +2068,7 @@
 		/* Linux processing */
 		rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
 
-		if (dev->features & NETIF_F_GRO)
-			napi_gro_receive(napi, rxq->skb);
-		else
-			netif_receive_skb(rxq->skb);
+		napi_gro_receive(napi, rxq->skb);
 
 		/* clean uncomplete skb pointer in queue */
 		rxq->skb = NULL;
@@ -2148,7 +2149,7 @@
 			if (unlikely(!skb))
 				goto err_drop_frame_ret_pool;
 
-			dma_sync_single_range_for_cpu(dev->dev.parent,
+			dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev,
 			                              rx_desc->buf_phys_addr,
 			                              MVNETA_MH_SIZE + NET_SKB_PAD,
 			                              rx_bytes,
@@ -2349,10 +2350,10 @@
 
 	for (i = 0; i < nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		void *addr = page_address(frag->page.p) + frag->page_offset;
+		void *addr = skb_frag_address(frag);
 
 		tx_desc = mvneta_txq_next_desc_get(txq);
-		tx_desc->data_size = frag->size;
+		tx_desc->data_size = skb_frag_size(frag);
 
 		tx_desc->buf_phys_addr =
 			dma_map_single(pp->dev->dev.parent, addr,
@@ -2395,7 +2396,7 @@
 }
 
 /* Main tx processing */
-static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	u16 txq_id = skb_get_queue_mapping(skb);
@@ -2467,7 +2468,7 @@
 		if (txq->count >= txq->tx_stop_threshold)
 			netif_tx_stop_queue(nq);
 
-		if (!skb->xmit_more || netif_xmit_stopped(nq) ||
+		if (!netdev_xmit_more() || netif_xmit_stopped(nq) ||
 		    txq->pending + frags > MVNETA_TXQ_DEC_SENT_MASK)
 			mvneta_txq_pend_desc_add(pp, txq, frags);
 		else
@@ -2509,12 +2510,13 @@
 {
 	struct mvneta_tx_queue *txq;
 	struct netdev_queue *nq;
+	int cpu = smp_processor_id();
 
 	while (cause_tx_done) {
 		txq = mvneta_tx_done_policy(pp, cause_tx_done);
 
 		nq = netdev_get_tx_queue(pp->dev, txq->id);
-		__netif_tx_lock(nq, smp_processor_id());
+		__netif_tx_lock(nq, cpu);
 
 		if (txq->count)
 			mvneta_txq_done(pp, txq);
@@ -2905,7 +2907,9 @@
 	if (!pp->bm_priv) {
 		/* Set Offset */
 		mvneta_rxq_offset_set(pp, rxq, 0);
-		mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
+		mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
+					PAGE_SIZE :
+					MVNETA_RX_BUF_SIZE(pp->pkt_size));
 		mvneta_rxq_bm_disable(pp, rxq);
 		mvneta_rxq_fill(pp, rxq, rxq->size);
 	} else {
@@ -3146,10 +3150,27 @@
 	return 0;
 }
 
+static int mvneta_comphy_init(struct mvneta_port *pp)
+{
+	int ret;
+
+	if (!pp->comphy)
+		return 0;
+
+	ret = phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET,
+			       pp->phy_interface);
+	if (ret)
+		return ret;
+
+	return phy_power_on(pp->comphy);
+}
+
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
 	int cpu;
 
+	WARN_ON(mvneta_comphy_init(pp));
+
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
 
@@ -3212,6 +3233,8 @@
 
 	mvneta_tx_reset(pp);
 	mvneta_rx_reset(pp);
+
+	WARN_ON(phy_power_off(pp->comphy));
 }
 
 static void mvneta_percpu_enable(void *arg)
@@ -3334,9 +3357,12 @@
 	return 0;
 }
 
-static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
+static void mvneta_validate(struct phylink_config *config,
+			    unsigned long *supported,
 			    struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct mvneta_port *pp = netdev_priv(ndev);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
 	/* We only support QSGMII, SGMII, 802.3z and RGMII modes */
@@ -3355,9 +3381,16 @@
 
 	/* Asymmetric pause is unsupported */
 	phylink_set(mask, Pause);
+
 	/* Half-duplex at speeds higher than 100Mbit is unsupported */
-	phylink_set(mask, 1000baseT_Full);
-	phylink_set(mask, 1000baseX_Full);
+	if (pp->comphy || state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+	}
+	if (pp->comphy || state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+		phylink_set(mask, 2500baseT_Full);
+		phylink_set(mask, 2500baseX_Full);
+	}
 
 	if (!phy_interface_mode_is_8023z(state->interface)) {
 		/* 10M and 100M are only supported in non-802.3z mode */
@@ -3371,18 +3404,26 @@
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 	bitmap_and(state->advertising, state->advertising, mask,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	/* We can only operate at 2500BaseX or 1000BaseX.  If requested
+	 * to advertise both, only report advertising at 2500BaseX.
+	 */
+	phylink_helper_basex_speed(state);
 }
 
-static int mvneta_mac_link_state(struct net_device *ndev,
+static int mvneta_mac_link_state(struct phylink_config *config,
 				 struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_stat;
 
 	gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS);
 
 	if (gmac_stat & MVNETA_GMAC_SPEED_1000)
-		state->speed = SPEED_1000;
+		state->speed =
+			state->interface == PHY_INTERFACE_MODE_2500BASEX ?
+			SPEED_2500 : SPEED_1000;
 	else if (gmac_stat & MVNETA_GMAC_SPEED_100)
 		state->speed = SPEED_100;
 	else
@@ -3401,8 +3442,9 @@
 	return 1;
 }
 
-static void mvneta_mac_an_restart(struct net_device *ndev)
+static void mvneta_mac_an_restart(struct phylink_config *config)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 
@@ -3412,18 +3454,21 @@
 		    gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN);
 }
 
-static void mvneta_mac_config(struct net_device *ndev, unsigned int mode,
-	const struct phylink_link_state *state)
+static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0);
 	u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+	u32 new_ctrl4, gmac_ctrl4 = mvreg_read(pp, MVNETA_GMAC_CTRL_4);
 	u32 new_clk, gmac_clk = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
 	u32 new_an, gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 
 	new_ctrl0 = gmac_ctrl0 & ~MVNETA_GMAC0_PORT_1000BASE_X;
 	new_ctrl2 = gmac_ctrl2 & ~(MVNETA_GMAC2_INBAND_AN_ENABLE |
 				   MVNETA_GMAC2_PORT_RESET);
+	new_ctrl4 = gmac_ctrl4 & ~(MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE);
 	new_clk = gmac_clk & ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
 	new_an = gmac_an & ~(MVNETA_GMAC_INBAND_AN_ENABLE |
 			     MVNETA_GMAC_INBAND_RESTART_AN |
@@ -3456,7 +3501,7 @@
 		if (state->duplex)
 			new_an |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
 
-		if (state->speed == SPEED_1000)
+		if (state->speed == SPEED_1000 || state->speed == SPEED_2500)
 			new_an |= MVNETA_GMAC_CONFIG_GMII_SPEED;
 		else if (state->speed == SPEED_100)
 			new_an |= MVNETA_GMAC_CONFIG_MII_SPEED;
@@ -3495,10 +3540,29 @@
 			    MVNETA_GMAC_FORCE_LINK_DOWN);
 	}
 
+
+	/* When at 2.5G, the link partner can send frames with shortened
+	 * preambles.
+	 */
+	if (state->speed == SPEED_2500)
+		new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE;
+
+	if (pp->comphy && pp->phy_interface != state->interface &&
+	    (state->interface == PHY_INTERFACE_MODE_SGMII ||
+	     state->interface == PHY_INTERFACE_MODE_1000BASEX ||
+	     state->interface == PHY_INTERFACE_MODE_2500BASEX)) {
+		pp->phy_interface = state->interface;
+
+		WARN_ON(phy_power_off(pp->comphy));
+		WARN_ON(mvneta_comphy_init(pp));
+	}
+
 	if (new_ctrl0 != gmac_ctrl0)
 		mvreg_write(pp, MVNETA_GMAC_CTRL_0, new_ctrl0);
 	if (new_ctrl2 != gmac_ctrl2)
 		mvreg_write(pp, MVNETA_GMAC_CTRL_2, new_ctrl2);
+	if (new_ctrl4 != gmac_ctrl4)
+		mvreg_write(pp, MVNETA_GMAC_CTRL_4, new_ctrl4);
 	if (new_clk != gmac_clk)
 		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, new_clk);
 	if (new_an != gmac_an)
@@ -3523,9 +3587,10 @@
 	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1);
 }
 
-static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
-				 phy_interface_t interface)
+static void mvneta_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -3542,10 +3607,11 @@
 	mvneta_set_eee(pp, false);
 }
 
-static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode,
+static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface,
 			       struct phy_device *phy)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -3749,7 +3815,6 @@
 	int ret;
 
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
-	pp->frag_size = PAGE_SIZE;
 
 	ret = mvneta_setup_rxqs(pp);
 	if (ret)
@@ -3792,9 +3857,6 @@
 			goto err_free_online_hp;
 	}
 
-	/* In default link is down */
-	netif_carrier_off(pp->dev);
-
 	ret = mvneta_mdio_probe(pp);
 	if (ret < 0) {
 		netdev_err(dev, "cannot probe MDIO bus\n");
@@ -4240,8 +4302,7 @@
 
 	/* The Armada 37x documents do not give limits for this other than
 	 * it being an 8-bit register. */
-	if (eee->tx_lpi_enabled &&
-	    (eee->tx_lpi_timer < 0 || eee->tx_lpi_timer > 255))
+	if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255)
 		return -EINVAL;
 
 	lpi_ctl0 = mvreg_read(pp, MVNETA_LPI_CTRL_0);
@@ -4397,7 +4458,7 @@
 	if (phy_mode == PHY_INTERFACE_MODE_QSGMII)
 		mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_QSGMII_SERDES_PROTO);
 	else if (phy_mode == PHY_INTERFACE_MODE_SGMII ||
-		 phy_mode == PHY_INTERFACE_MODE_1000BASEX)
+		 phy_interface_mode_is_8023z(phy_mode))
 		mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO);
 	else if (!phy_interface_mode_is_rgmii(phy_mode))
 		return -EINVAL;
@@ -4408,12 +4469,12 @@
 /* Device initialization routine */
 static int mvneta_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct device_node *dn = pdev->dev.of_node;
 	struct device_node *bm_node;
 	struct mvneta_port *pp;
 	struct net_device *dev;
 	struct phylink *phylink;
+	struct phy *comphy;
 	const char *dt_mac_addr;
 	char hw_mac_addr[ETH_ALEN];
 	const char *mac_from;
@@ -4422,15 +4483,14 @@
 	int err;
 	int cpu;
 
-	dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number);
+	dev = devm_alloc_etherdev_mqs(&pdev->dev, sizeof(struct mvneta_port),
+				      txq_number, rxq_number);
 	if (!dev)
 		return -ENOMEM;
 
 	dev->irq = irq_of_parse_and_map(dn, 0);
-	if (dev->irq == 0) {
-		err = -EINVAL;
-		goto err_free_netdev;
-	}
+	if (dev->irq == 0)
+		return -EINVAL;
 
 	phy_mode = of_get_phy_mode(dn);
 	if (phy_mode < 0) {
@@ -4439,8 +4499,22 @@
 		goto err_free_irq;
 	}
 
-	phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode,
-				 &mvneta_phylink_ops);
+	comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
+	if (comphy == ERR_PTR(-EPROBE_DEFER)) {
+		err = -EPROBE_DEFER;
+		goto err_free_irq;
+	} else if (IS_ERR(comphy)) {
+		comphy = NULL;
+	}
+
+	pp = netdev_priv(dev);
+	spin_lock_init(&pp->lock);
+
+	pp->phylink_config.dev = &dev->dev;
+	pp->phylink_config.type = PHYLINK_NETDEV;
+
+	phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
+				 phy_mode, &mvneta_phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
 		goto err_free_irq;
@@ -4452,9 +4526,8 @@
 
 	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
-	pp = netdev_priv(dev);
-	spin_lock_init(&pp->lock);
 	pp->phylink = phylink;
+	pp->comphy = comphy;
 	pp->phy_interface = phy_mode;
 	pp->dn = dn;
 
@@ -4479,8 +4552,7 @@
 	if (!IS_ERR(pp->clk_bus))
 		clk_prepare_enable(pp->clk_bus);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pp->base = devm_ioremap_resource(&pdev->dev, res);
+	pp->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pp->base)) {
 		err = PTR_ERR(pp->base);
 		goto err_clk;
@@ -4501,9 +4573,9 @@
 	}
 
 	dt_mac_addr = of_get_mac_address(dn);
-	if (dt_mac_addr) {
+	if (!IS_ERR(dt_mac_addr)) {
 		mac_from = "device tree";
-		memcpy(dev->dev_addr, dt_mac_addr, ETH_ALEN);
+		ether_addr_copy(dev->dev_addr, dt_mac_addr);
 	} else {
 		mvneta_get_mac_addr(pp, hw_mac_addr);
 		if (is_valid_ether_addr(hw_mac_addr)) {
@@ -4597,7 +4669,8 @@
 		}
 	}
 
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+			NETIF_F_TSO | NETIF_F_RXCSUM;
 	dev->hw_features |= dev->features;
 	dev->vlan_features |= dev->features;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -4611,7 +4684,7 @@
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register\n");
-		goto err_free_stats;
+		goto err_netdev;
 	}
 
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from,
@@ -4622,14 +4695,12 @@
 	return 0;
 
 err_netdev:
-	unregister_netdev(dev);
 	if (pp->bm_priv) {
 		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
 		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
 				       1 << pp->id);
 		mvneta_bm_put(pp->bm_priv);
 	}
-err_free_stats:
 	free_percpu(pp->stats);
 err_free_ports:
 	free_percpu(pp->ports);
@@ -4641,8 +4712,6 @@
 		phylink_destroy(pp->phylink);
 err_free_irq:
 	irq_dispose_mapping(dev->irq);
-err_free_netdev:
-	free_netdev(dev);
 	return err;
 }
 
@@ -4659,7 +4728,6 @@
 	free_percpu(pp->stats);
 	irq_dispose_mapping(dev->irq);
 	phylink_destroy(pp->phylink);
-	free_netdev(dev);
 
 	if (pp->bm_priv) {
 		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);

diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c
index de468e1..46c942e 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.c
+++ b/drivers/net/ethernet/marvell/mvneta_bm.c

@@ -190,7 +190,7 @@
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		hwbm_pool->construct = mvneta_bm_construct;
 		hwbm_pool->priv = new_pool;
-		spin_lock_init(&hwbm_pool->lock);
+		mutex_init(&hwbm_pool->buf_lock);
 
 		/* Create new pool */
 		err = mvneta_bm_pool_create(priv, new_pool);
@@ -201,7 +201,7 @@
 		}
 
 		/* Allocate buffers for this pool */
-		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size);
 		if (num != hwbm_pool->size) {
 			WARN(1, "pool %d: %d of %d allocated\n",
 			     new_pool->id, num, hwbm_pool->size);
@@ -411,15 +411,13 @@
 {
 	struct device_node *dn = pdev->dev.of_node;
 	struct mvneta_bm *priv;
-	struct resource *res;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(struct mvneta_bm), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	priv->reg_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->reg_base))
 		return PTR_ERR(priv->reg_base);
 

diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h
index c8425d3..e47783c 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.h
+++ b/drivers/net/ethernet/marvell/mvneta_bm.h

@@ -160,16 +160,23 @@
 			     (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
 }
 #else
-void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
-			    struct mvneta_bm_pool *bm_pool, u8 port_map) {}
-void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
-			 u8 port_map) {}
-int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; }
-int mvneta_bm_pool_refill(struct mvneta_bm *priv,
-			  struct mvneta_bm_pool *bm_pool) {return 0; }
-struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
-					  enum mvneta_bm_type type, u8 port_id,
-					  int pkt_size) { return NULL; }
+static inline void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+					  struct mvneta_bm_pool *bm_pool,
+					  u8 port_map) {}
+static inline void mvneta_bm_bufs_free(struct mvneta_bm *priv,
+				       struct mvneta_bm_pool *bm_pool,
+				       u8 port_map) {}
+static inline int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf)
+{ return 0; }
+static inline int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+					struct mvneta_bm_pool *bm_pool)
+{ return 0; }
+static inline struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv,
+							u8 pool_id,
+							enum mvneta_bm_type type,
+							u8 port_id,
+							int pkt_size)
+{ return NULL; }
 
 static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
 					 struct mvneta_bm_pool *bm_pool,
@@ -178,7 +185,8 @@
 static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
 					struct mvneta_bm_pool *bm_pool)
 { return 0; }
-struct mvneta_bm *mvneta_bm_get(struct device_node *node) { return NULL; }
-void mvneta_bm_put(struct mvneta_bm *priv) {}
+static inline struct mvneta_bm *mvneta_bm_get(struct device_node *node)
+{ return NULL; }
+static inline void mvneta_bm_put(struct mvneta_bm *priv) {}
 #endif /* CONFIG_MVNETA_BM */
 #endif

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 67b9e81..543a310 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h

@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
+#include <net/flow_offload.h>
 
 /* Fifo Registers */
 #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
@@ -101,6 +102,7 @@
 #define MVPP2_CLS_FLOW_TBL1_REG			0x1828
 #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK	0x7
 #define     MVPP2_CLS_FLOW_TBL1_N_FIELDS(x)	(x)
+#define     MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu)	(((lu) & 0x3f) << 3)
 #define     MVPP2_CLS_FLOW_TBL1_PRIO_MASK	0x3f
 #define     MVPP2_CLS_FLOW_TBL1_PRIO(x)		((x) << 9)
 #define     MVPP2_CLS_FLOW_TBL1_SEQ_MASK	0x7
@@ -123,13 +125,18 @@
 #define MVPP22_CLS_C2_TCAM_DATA2		0x1b18
 #define MVPP22_CLS_C2_TCAM_DATA3		0x1b1c
 #define MVPP22_CLS_C2_TCAM_DATA4		0x1b20
+#define     MVPP22_CLS_C2_LU_TYPE(lu)		((lu) & 0x3f)
 #define     MVPP22_CLS_C2_PORT_ID(port)		((port) << 8)
+#define     MVPP22_CLS_C2_PORT_MASK		(0xff << 8)
+#define MVPP22_CLS_C2_TCAM_INV			0x1b24
+#define     MVPP22_CLS_C2_TCAM_INV_BIT		BIT(31)
 #define MVPP22_CLS_C2_HIT_CTR			0x1b50
 #define MVPP22_CLS_C2_ACT			0x1b60
 #define     MVPP22_CLS_C2_ACT_RSS_EN(act)	(((act) & 0x3) << 19)
 #define     MVPP22_CLS_C2_ACT_FWD(act)		(((act) & 0x7) << 13)
 #define     MVPP22_CLS_C2_ACT_QHIGH(act)	(((act) & 0x3) << 11)
 #define     MVPP22_CLS_C2_ACT_QLOW(act)		(((act) & 0x3) << 9)
+#define     MVPP22_CLS_C2_ACT_COLOR(act)	((act) & 0x7)
 #define MVPP22_CLS_C2_ATTR0			0x1b64
 #define     MVPP22_CLS_C2_ATTR0_QHIGH(qh)	(((qh) & 0x1f) << 24)
 #define     MVPP22_CLS_C2_ATTR0_QHIGH_MASK	0x1f
@@ -141,6 +148,8 @@
 #define MVPP22_CLS_C2_ATTR2			0x1b6c
 #define     MVPP22_CLS_C2_ATTR2_RSS_EN		BIT(30)
 #define MVPP22_CLS_C2_ATTR3			0x1b70
+#define MVPP22_CLS_C2_TCAM_CTRL			0x1b90
+#define     MVPP22_CLS_C2_TCAM_BYPASS_FIFO	BIT(0)
 
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG			0x2040
@@ -253,7 +262,8 @@
 #define     MVPP2_ISR_ENABLE_INTERRUPT(mask)	((mask) & 0xffff)
 #define     MVPP2_ISR_DISABLE_INTERRUPT(mask)	(((mask) << 16) & 0xffff0000)
 #define MVPP2_ISR_RX_TX_CAUSE_REG(port)		(0x5480 + 4 * (port))
-#define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK	0xffff
+#define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK(version) \
+					((version) == MVPP21 ? 0xffff : 0xff)
 #define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK	0xff0000
 #define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET	16
 #define     MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK	BIT(24)
@@ -319,8 +329,26 @@
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK	0xff00
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT	8
 
+/* Packet Processor per-port counters */
+#define MVPP2_OVERRUN_ETH_DROP			0x7000
+#define MVPP2_CLS_ETH_DROP			0x7020
+
 /* Hit counters registers */
 #define MVPP2_CTRS_IDX				0x7040
+#define     MVPP22_CTRS_TX_CTR(port, txq)	((txq) | ((port) << 3) | BIT(7))
+#define MVPP2_TX_DESC_ENQ_CTR			0x7100
+#define MVPP2_TX_DESC_ENQ_TO_DDR_CTR		0x7104
+#define MVPP2_TX_BUFF_ENQ_TO_DDR_CTR		0x7108
+#define MVPP2_TX_DESC_ENQ_HW_FWD_CTR		0x710c
+#define MVPP2_RX_DESC_ENQ_CTR			0x7120
+#define MVPP2_TX_PKTS_DEQ_CTR			0x7130
+#define MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR	0x7200
+#define MVPP2_TX_PKTS_EARLY_DROP_CTR		0x7204
+#define MVPP2_TX_PKTS_BM_DROP_CTR		0x7208
+#define MVPP2_TX_PKTS_BM_MC_DROP_CTR		0x720c
+#define MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR	0x7220
+#define MVPP2_RX_PKTS_EARLY_DROP_CTR		0x7224
+#define MVPP2_RX_PKTS_BM_DROP_CTR		0x7228
 #define MVPP2_CLS_DEC_TBL_HIT_CTR		0x7700
 #define MVPP2_CLS_FLOW_TBL_HIT_CTR		0x7704
 
@@ -330,6 +358,7 @@
 #define     MVPP2_TXP_SCHED_ENQ_MASK		0xff
 #define     MVPP2_TXP_SCHED_DISQ_OFFSET		8
 #define MVPP2_TXP_SCHED_CMD_1_REG		0x8010
+#define MVPP2_TXP_SCHED_FIXED_PRIO_REG		0x8014
 #define MVPP2_TXP_SCHED_PERIOD_REG		0x8018
 #define MVPP2_TXP_SCHED_MTU_REG			0x801c
 #define     MVPP2_TXP_MTU_MAX			0x7FFFF
@@ -387,7 +416,7 @@
 #define     MVPP2_GMAC_IN_BAND_AUTONEG		BIT(2)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS	BIT(3)
 #define     MVPP2_GMAC_IN_BAND_RESTART_AN	BIT(4)
-#define     MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
+#define     MVPP2_GMAC_CONFIG_MII_SPEED		BIT(5)
 #define     MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
 #define     MVPP2_GMAC_AN_SPEED_EN		BIT(7)
 #define     MVPP2_GMAC_FC_ADV_EN		BIT(9)
@@ -400,8 +429,8 @@
 #define     MVPP2_GMAC_STATUS0_GMII_SPEED	BIT(1)
 #define     MVPP2_GMAC_STATUS0_MII_SPEED	BIT(2)
 #define     MVPP2_GMAC_STATUS0_FULL_DUPLEX	BIT(3)
-#define     MVPP2_GMAC_STATUS0_RX_PAUSE		BIT(6)
-#define     MVPP2_GMAC_STATUS0_TX_PAUSE		BIT(7)
+#define     MVPP2_GMAC_STATUS0_RX_PAUSE		BIT(4)
+#define     MVPP2_GMAC_STATUS0_TX_PAUSE		BIT(5)
 #define     MVPP2_GMAC_STATUS0_AN_COMPLETE	BIT(11)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG		0x1c
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
@@ -428,6 +457,8 @@
 #define MVPP22_XLG_CTRL0_REG			0x100
 #define     MVPP22_XLG_CTRL0_PORT_EN		BIT(0)
 #define     MVPP22_XLG_CTRL0_MAC_RESET_DIS	BIT(1)
+#define     MVPP22_XLG_CTRL0_FORCE_LINK_DOWN	BIT(2)
+#define     MVPP22_XLG_CTRL0_FORCE_LINK_PASS	BIT(3)
 #define     MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN	BIT(7)
 #define     MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN	BIT(8)
 #define     MVPP22_XLG_CTRL0_MIB_CNT_DIS	BIT(14)
@@ -479,6 +510,7 @@
 /* XPCS registers. PPv2.2 only */
 #define MVPP22_XPCS_BASE(port)			(0x7400 + (port) * 0x1000)
 #define MVPP22_XPCS_CFG0			0x0
+#define     MVPP22_XPCS_CFG0_RESET_DIS		BIT(0)
 #define     MVPP22_XPCS_CFG0_PCS_MODE(n)	((n) << 3)
 #define     MVPP22_XPCS_CFG0_ACTIVE_LANE(n)	((n) << 5)
 
@@ -547,8 +579,8 @@
 #define MVPP2_MAX_TSO_SEGS		300
 #define MVPP2_MAX_SKB_DESCS		(MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
 
-/* Dfault number of RXQs in use */
-#define MVPP2_DEFAULT_RXQ		1
+/* Max number of RXQs per port */
+#define MVPP2_PORT_MAX_RXQ		32
 
 /* Max number of Rx descriptors */
 #define MVPP2_MAX_RXD_MAX		1024
@@ -605,7 +637,14 @@
 #define MVPP2_BIT_TO_WORD(bit)		((bit) / 32)
 #define MVPP2_BIT_IN_WORD(bit)		((bit) % 32)
 
+#define MVPP2_N_PRS_FLOWS		52
+#define MVPP2_N_RFS_ENTRIES_PER_FLOW	4
+
+/* There are 7 supported high-level flows */
+#define MVPP2_N_RFS_RULES		(MVPP2_N_RFS_ENTRIES_PER_FLOW * 7)
+
 /* RSS constants */
+#define MVPP22_N_RSS_TABLES		8
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* IPv6 max L3 address size */
@@ -613,6 +652,7 @@
 
 /* Port flags */
 #define MVPP2_F_LOOPBACK		BIT(0)
+#define MVPP2_F_DT_COMPAT		BIT(1)
 
 /* Marvell tag types */
 enum mvpp2_tag_type {
@@ -643,6 +683,7 @@
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN		128
+#define MVPP2_BM_MAX_POOLS		8
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS	8
@@ -662,7 +703,7 @@
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
 
-#define MVPP2_MAX_THREADS		8
+#define MVPP2_MAX_THREADS		9
 #define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
 /* GMAC MIB Counters register definitions */
@@ -704,6 +745,11 @@
 #define MVPP2_DESC_DMA_MASK	DMA_BIT_MASK(40)
 
 /* Definitions */
+struct mvpp2_dbgfs_entries;
+
+struct mvpp2_rss_table {
+	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+};
 
 /* Shared Packet Processor resources */
 struct mvpp2 {
@@ -734,9 +780,17 @@
 	int port_count;
 	struct mvpp2_port *port_list[MVPP2_MAX_PORTS];
 
+	/* Number of Tx threads used */
+	unsigned int nthreads;
+	/* Map of threads needing locking */
+	unsigned long lock_map;
+
 	/* Aggregated TXQs */
 	struct mvpp2_tx_queue *aggr_txqs;
 
+	/* Are we using page_pool with per-cpu pools? */
+	int percpu_pools;
+
 	/* BM pools */
 	struct mvpp2_bm_pool *bm_pools;
 
@@ -760,6 +814,12 @@
 
 	/* Debugfs root entry */
 	struct dentry *dbgfs_dir;
+
+	/* Debugfs entries private data */
+	struct mvpp2_dbgfs_entries *dbgfs_entries;
+
+	/* RSS Indirection tables */
+	struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES];
 };
 
 struct mvpp2_pcpu_stats {
@@ -773,9 +833,8 @@
 /* Per-CPU port control */
 struct mvpp2_port_pcpu {
 	struct hrtimer tx_done_timer;
+	struct net_device *dev;
 	bool timer_scheduled;
-	/* Tasklet for egress finalization */
-	struct tasklet_struct tx_done_tasklet;
 };
 
 struct mvpp2_queue_vector {
@@ -788,13 +847,45 @@
 	int nrxqs;
 	u32 pending_cause_rx;
 	struct mvpp2_port *port;
+	struct cpumask *mask;
+};
+
+/* Internal represention of a Flow Steering rule */
+struct mvpp2_rfs_rule {
+	/* Rule location inside the flow*/
+	int loc;
+
+	/* Flow type, such as TCP_V4_FLOW, IP6_FLOW, etc. */
+	int flow_type;
+
+	/* Index of the C2 TCAM entry handling this rule */
+	int c2_index;
+
+	/* Header fields that needs to be extracted to match this flow */
+	u16 hek_fields;
+
+	/* CLS engine : only c2 is supported for now. */
+	u8 engine;
+
+	/* TCAM key and mask for C2-based steering. These fields should be
+	 * encapsulated in a union should we add more engines.
+	 */
+	u64 c2_tcam;
+	u64 c2_tcam_mask;
+
+	struct flow_rule *flow;
+};
+
+struct mvpp2_ethtool_fs {
+	struct mvpp2_rfs_rule rule;
+	struct ethtool_rxnfc rxnfc;
 };
 
 struct mvpp2_port {
 	u8 id;
 
 	/* Index of the port from the "group of ports" complex point
-	 * of view
+	 * of view. This is specific to PPv2.2.
 	 */
 	int gop_id;
 
@@ -823,6 +914,12 @@
 	/* Per-CPU port control */
 	struct mvpp2_port_pcpu __percpu *pcpu;
 
+	/* Protect the BM refills and the Tx paths when a thread is used on more
+	 * than a single CPU.
+	 */
+	spinlock_t bm_lock[MVPP2_MAX_THREADS];
+	spinlock_t tx_lock[MVPP2_MAX_THREADS];
+
 	/* Flags */
 	unsigned long flags;
 
@@ -839,6 +936,7 @@
 
 	phy_interface_t phy_interface;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	struct phy *comphy;
 
 	struct mvpp2_bm_pool *pool_long;
@@ -853,8 +951,14 @@
 
 	u32 tx_time_coal;
 
-	/* RSS indirection table */
-	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+	/* List of steering rules active on that port */
+	struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
+	int n_rfs_rules;
+
+	/* Each port has its own view of the rss contexts, so that it can number
+	 * them from 0
+	 */
+	int rss_ctx[MVPP22_N_RSS_TABLES];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@ -969,7 +1073,7 @@
 
 /* Per-CPU Tx queue control */
 struct mvpp2_txq_pcpu {
-	int cpu;
+	unsigned int thread;
 
 	/* Number of Tx DMA descriptors in the descriptor ring */
 	int size;
@@ -1095,14 +1199,6 @@
 void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data);
 u32 mvpp2_read(struct mvpp2 *priv, u32 offset);
 
-u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset);
-
-void mvpp2_percpu_write(struct mvpp2 *priv, int cpu, u32 offset, u32 data);
-u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu, u32 offset);
-
-void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu, u32 offset,
-				u32 data);
-
 void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name);
 
 void mvpp2_dbgfs_cleanup(struct mvpp2 *priv);

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index efdb7a6..35478cb 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c

@@ -22,302 +22,302 @@
 	}							\
 }
 
-static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = {
+static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
 	/* TCP over IPv4 flows, Not fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* TCP over IPv4 flows, Not fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* TCP over IPv4 flows, fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* TCP over IPv4 flows, fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* UDP over IPv4 flows, Not fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP4_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* UDP over IPv4 flows, Not fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* UDP over IPv4 flows, fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* UDP over IPv4 flows, fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* TCP over IPv6 flows, not fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP6_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP6_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* TCP over IPv6 flows, not fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* TCP over IPv6 flows, fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
 		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
 		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* TCP over IPv6 flows, fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_TCP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* UDP over IPv6 flows, not fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP6_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_UNTAG,
 		       MVPP22_CLS_HEK_IP6_5T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* UDP over IPv6 flows, not fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
-		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG,
+		       MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* UDP over IPv6 flows, fragmented, no vlan tag */
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
 		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
 		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
 
 	/* UDP over IPv6 flows, fragmented, with vlan tag */
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
-	MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
 		       MVPP2_PRS_RI_L4_UDP,
 		       MVPP2_PRS_IP_MASK),
 
 	/* IPv4 flows, no vlan tag */
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4,
 		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT,
 		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_UNTAG,
 		       MVPP22_CLS_HEK_IP4_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER,
 		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
 
 	/* IPv4 flows, with vlan tag */
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OPT,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
-		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG,
+		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP4_OTHER,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 
 	/* IPv6 flows, no vlan tag */
-	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_UNTAG,
 		       MVPP22_CLS_HEK_IP6_2T,
 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
 
 	/* IPv6 flows, with vlan tag */
-	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
-	MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
-		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG,
+		       MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED,
 		       MVPP2_PRS_RI_L3_IP6,
 		       MVPP2_PRS_RI_L3_PROTO_MASK),
 
 	/* Non IP flow, no vlan tag */
-	MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_UNTAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_UNTAG,
 		       0,
 		       MVPP2_PRS_RI_VLAN_NONE,
 		       MVPP2_PRS_RI_VLAN_MASK),
 	/* Non IP flow, with vlan tag */
-	MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_TAG,
+	MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_TAG,
 		       MVPP22_CLS_HEK_OPT_VLAN,
 		       0, 0),
 };
@@ -344,9 +344,9 @@
 				 struct mvpp2_cls_flow_entry *fe)
 {
 	mvpp2_write(priv, MVPP2_CLS_FLOW_INDEX_REG, fe->index);
-	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL0_REG,  fe->data[0]);
-	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL1_REG,  fe->data[1]);
-	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL2_REG,  fe->data[2]);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL0_REG, fe->data[0]);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL1_REG, fe->data[1]);
+	mvpp2_write(priv, MVPP2_CLS_FLOW_TBL2_REG, fe->data[2]);
 }
 
 u32 mvpp2_cls_lookup_hits(struct mvpp2 *priv, int index)
@@ -429,12 +429,6 @@
 		fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
 }
 
-static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq)
-{
-	fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK);
-	fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq);
-}
-
 static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe,
 				    bool is_last)
 {
@@ -454,9 +448,22 @@
 	fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
 }
 
+static void mvpp2_cls_flow_port_remove(struct mvpp2_cls_flow_entry *fe,
+				       u32 port)
+{
+	fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
+}
+
+static void mvpp2_cls_flow_lu_type_set(struct mvpp2_cls_flow_entry *fe,
+				       u8 lu_type)
+{
+	fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK);
+	fe->data[1] |= MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu_type);
+}
+
 /* Initialize the parser entry for the given flow */
 static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
-				    struct mvpp2_cls_flow *flow)
+				    const struct mvpp2_cls_flow *flow)
 {
 	mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri,
 			   flow->prs_ri.ri_mask);
@@ -464,7 +471,7 @@
 
 /* Initialize the Lookup Id table entry for the given flow */
 static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
-				    struct mvpp2_cls_flow *flow)
+				    const struct mvpp2_cls_flow *flow)
 {
 	struct mvpp2_cls_lookup_entry le;
 
@@ -477,7 +484,7 @@
 	/* We point on the first lookup in the sequence for the flow, that is
 	 * the C2 lookup.
 	 */
-	le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id));
+	le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_CLS_FLT_FIRST(flow->flow_id));
 
 	/* CLS is always enabled, RSS is enabled/disabled in C2 lookup */
 	le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
@@ -485,21 +492,113 @@
 	mvpp2_cls_lookup_write(priv, &le);
 }
 
+static void mvpp2_cls_c2_write(struct mvpp2 *priv,
+			       struct mvpp2_cls_c2_entry *c2)
+{
+	u32 val;
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
+
+	val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+	if (c2->valid)
+		val &= ~MVPP22_CLS_C2_TCAM_INV_BIT;
+	else
+		val |= MVPP22_CLS_C2_TCAM_INV_BIT;
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_INV, val);
+
+	mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
+
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
+	mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
+
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
+	/* Writing TCAM_DATA4 flushes writes to TCAM_DATA0-4 and INV to HW */
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
+}
+
+void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+		       struct mvpp2_cls_c2_entry *c2)
+{
+	u32 val;
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
+
+	c2->index = index;
+
+	c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
+	c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
+	c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
+	c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
+	c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
+
+	c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
+
+	c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
+	c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
+	c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
+	c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
+
+	val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV);
+	c2->valid = !(val & MVPP22_CLS_C2_TCAM_INV_BIT);
+}
+
+static int mvpp2_cls_ethtool_flow_to_type(int flow_type)
+{
+	switch (flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case ETHER_FLOW:
+		return MVPP22_FLOW_ETHERNET;
+	case TCP_V4_FLOW:
+		return MVPP22_FLOW_TCP4;
+	case TCP_V6_FLOW:
+		return MVPP22_FLOW_TCP6;
+	case UDP_V4_FLOW:
+		return MVPP22_FLOW_UDP4;
+	case UDP_V6_FLOW:
+		return MVPP22_FLOW_UDP6;
+	case IPV4_FLOW:
+		return MVPP22_FLOW_IP4;
+	case IPV6_FLOW:
+		return MVPP22_FLOW_IP6;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mvpp2_cls_c2_port_flow_index(struct mvpp2_port *port, int loc)
+{
+	return MVPP22_CLS_C2_RFS_LOC(port->id, loc);
+}
+
 /* Initialize the flow table entries for the given flow */
-static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
+static void mvpp2_cls_flow_init(struct mvpp2 *priv,
+				const struct mvpp2_cls_flow *flow)
 {
 	struct mvpp2_cls_flow_entry fe;
-	int i;
+	int i, pri = 0;
 
-	/* C2 lookup */
-	memset(&fe, 0, sizeof(fe));
-	fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id);
+	/* Assign default values to all entries in the flow */
+	for (i = MVPP2_CLS_FLT_FIRST(flow->flow_id);
+	     i <= MVPP2_CLS_FLT_LAST(flow->flow_id); i++) {
+		memset(&fe, 0, sizeof(fe));
+		fe.index = i;
+		mvpp2_cls_flow_pri_set(&fe, pri++);
+
+		if (i == MVPP2_CLS_FLT_LAST(flow->flow_id))
+			mvpp2_cls_flow_last_set(&fe, 1);
+
+		mvpp2_cls_flow_write(priv, &fe);
+	}
+
+	/* RSS config C2 lookup */
+	mvpp2_cls_flow_read(priv, MVPP2_CLS_FLT_C2_RSS_ENTRY(flow->flow_id),
+			    &fe);
 
 	mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
 	mvpp2_cls_flow_port_id_sel(&fe, true);
-	mvpp2_cls_flow_last_set(&fe, 0);
-	mvpp2_cls_flow_pri_set(&fe, 0);
-	mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1);
+	mvpp2_cls_flow_lu_type_set(&fe, MVPP22_CLS_LU_TYPE_ALL);
 
 	/* Add all ports */
 	for (i = 0; i < MVPP2_MAX_PORTS; i++)
@@ -509,22 +608,19 @@
 
 	/* C3Hx lookups */
 	for (i = 0; i < MVPP2_MAX_PORTS; i++) {
-		memset(&fe, 0, sizeof(fe));
-		fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id);
+		mvpp2_cls_flow_read(priv,
+				    MVPP2_CLS_FLT_HASH_ENTRY(i, flow->flow_id),
+				    &fe);
 
+		/* Set a default engine. Will be overwritten when setting the
+		 * real HEK parameters
+		 */
+		mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C3HA);
 		mvpp2_cls_flow_port_id_sel(&fe, true);
-		mvpp2_cls_flow_pri_set(&fe, i + 1);
-		mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE);
 		mvpp2_cls_flow_port_add(&fe, BIT(i));
 
 		mvpp2_cls_flow_write(priv, &fe);
 	}
-
-	/* Update the last entry */
-	mvpp2_cls_flow_last_set(&fe, 1);
-	mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST);
-
-	mvpp2_cls_flow_write(priv, &fe);
 }
 
 /* Adds a field to the Header Extracted Key generation parameters*/
@@ -555,9 +651,15 @@
 
 	for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
 		switch (BIT(i)) {
+		case MVPP22_CLS_HEK_OPT_MAC_DA:
+			field_id = MVPP22_CLS_FIELD_MAC_DA;
+			break;
 		case MVPP22_CLS_HEK_OPT_VLAN:
 			field_id = MVPP22_CLS_FIELD_VLAN;
 			break;
+		case MVPP22_CLS_HEK_OPT_VLAN_PRI:
+			field_id = MVPP22_CLS_FIELD_VLAN_PRI;
+			break;
 		case MVPP22_CLS_HEK_OPT_IP4SA:
 			field_id = MVPP22_CLS_FIELD_IP4SA;
 			break;
@@ -586,9 +688,33 @@
 	return 0;
 }
 
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+/* Returns the size, in bits, of the corresponding HEK field */
+static int mvpp2_cls_hek_field_size(u32 field)
 {
-	if (flow >= MVPP2_N_FLOWS)
+	switch (field) {
+	case MVPP22_CLS_HEK_OPT_MAC_DA:
+		return 48;
+	case MVPP22_CLS_HEK_OPT_VLAN:
+		return 12;
+	case MVPP22_CLS_HEK_OPT_VLAN_PRI:
+		return 3;
+	case MVPP22_CLS_HEK_OPT_IP4SA:
+	case MVPP22_CLS_HEK_OPT_IP4DA:
+		return 32;
+	case MVPP22_CLS_HEK_OPT_IP6SA:
+	case MVPP22_CLS_HEK_OPT_IP6DA:
+		return 128;
+	case MVPP22_CLS_HEK_OPT_L4SIP:
+	case MVPP22_CLS_HEK_OPT_L4DIP:
+		return 16;
+	default:
+		return -1;
+	}
+}
+
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+{
+	if (flow >= MVPP2_N_PRS_FLOWS)
 		return NULL;
 
 	return &cls_flows[flow];
@@ -608,21 +734,17 @@
 static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type,
 					u16 requested_opts)
 {
+	const struct mvpp2_cls_flow *flow;
 	struct mvpp2_cls_flow_entry fe;
-	struct mvpp2_cls_flow *flow;
 	int i, engine, flow_index;
 	u16 hash_opts;
 
-	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+	for_each_cls_flow_id_with_type(i, flow_type) {
 		flow = mvpp2_cls_flow_get(i);
 		if (!flow)
 			return -EINVAL;
 
-		if (flow->flow_type != flow_type)
-			continue;
-
-		flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-							flow->flow_id);
+		flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
 
 		mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -664,6 +786,9 @@
 		case MVPP22_CLS_FIELD_VLAN:
 			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
 			break;
+		case MVPP22_CLS_FIELD_VLAN_PRI:
+			hash_opts |= MVPP22_CLS_HEK_OPT_VLAN_PRI;
+			break;
 		case MVPP22_CLS_FIELD_L3_PROTO:
 			hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
 			break;
@@ -697,21 +822,17 @@
  */
 static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
 {
+	const struct mvpp2_cls_flow *flow;
 	struct mvpp2_cls_flow_entry fe;
-	struct mvpp2_cls_flow *flow;
 	int i, flow_index;
 	u16 hash_opts = 0;
 
-	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+	for_each_cls_flow_id_with_type(i, flow_type) {
 		flow = mvpp2_cls_flow_get(i);
 		if (!flow)
 			return 0;
 
-		if (flow->flow_type != flow_type)
-			continue;
-
-		flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
-							flow->flow_id);
+		flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id);
 
 		mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -723,10 +844,10 @@
 
 static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
 {
-	struct mvpp2_cls_flow *flow;
+	const struct mvpp2_cls_flow *flow;
 	int i;
 
-	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+	for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
 		flow = mvpp2_cls_flow_get(i);
 		if (!flow)
 			break;
@@ -737,47 +858,6 @@
 	}
 }
 
-static void mvpp2_cls_c2_write(struct mvpp2 *priv,
-			       struct mvpp2_cls_c2_entry *c2)
-{
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
-
-	/* Write TCAM */
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
-
-	mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
-
-	mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
-	mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
-	mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
-	mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
-}
-
-void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
-		       struct mvpp2_cls_c2_entry *c2)
-{
-	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
-
-	c2->index = index;
-
-	c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
-	c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
-	c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
-	c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
-	c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
-
-	c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
-
-	c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
-	c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
-	c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
-	c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
-}
-
 static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
 {
 	struct mvpp2_cls_c2_entry c2;
@@ -791,6 +871,10 @@
 	c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
 	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
 
+	/* Match on Lookup Type */
+	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK));
+	c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP22_CLS_LU_TYPE_ALL);
+
 	/* Update RSS status after matching this entry */
 	c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
 
@@ -809,6 +893,8 @@
 	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
 		      MVPP22_CLS_C2_ATTR0_QLOW(ql);
 
+	c2.valid = true;
+
 	mvpp2_cls_c2_write(port->priv, &c2);
 }
 
@@ -817,6 +903,7 @@
 {
 	struct mvpp2_cls_lookup_entry le;
 	struct mvpp2_cls_flow_entry fe;
+	struct mvpp2_cls_c2_entry c2;
 	int index;
 
 	/* Enable classifier */
@@ -840,6 +927,20 @@
 		mvpp2_cls_lookup_write(priv, &le);
 	}
 
+	/* Clear C2 TCAM engine table */
+	memset(&c2, 0, sizeof(c2));
+	c2.valid = false;
+	for (index = 0; index < MVPP22_CLS_C2_N_ENTRIES; index++) {
+		c2.index = index;
+		mvpp2_cls_c2_write(priv, &c2);
+	}
+
+	/* Disable the FIFO stages in C2 engine, which are only used in BIST
+	 * mode
+	 */
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_CTRL,
+		    MVPP22_CLS_C2_TCAM_BYPASS_FIFO);
+
 	mvpp2_cls_port_init_flows(priv);
 }
 
@@ -880,12 +981,22 @@
 	return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
 }
 
-static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* The RxQ number is used to select the RSS table. It that case, we set
+	 * it to be the ctx number.
+	 */
+	qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		     MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
@@ -894,22 +1005,57 @@
 static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* Reset the default destination RxQ to the port's first rx queue. */
+	qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		      MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
 }
 
-void mvpp22_rss_enable(struct mvpp2_port *port)
+static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx)
 {
-	mvpp2_rss_port_c2_enable(port);
+	return port->rss_ctx[port_rss_ctx];
 }
 
-void mvpp22_rss_disable(struct mvpp2_port *port)
+int mvpp22_port_rss_enable(struct mvpp2_port *port)
 {
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
+	mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0));
+
+	return 0;
+}
+
+int mvpp22_port_rss_disable(struct mvpp2_port *port)
+{
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
 	mvpp2_rss_port_c2_disable(port);
+
+	return 0;
+}
+
+static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry)
+{
+	struct mvpp2_cls_c2_entry c2;
+
+	mvpp2_cls_c2_read(port->priv, entry, &c2);
+
+	/* Clear the port map so that the entry doesn't match anymore */
+	c2.tcam[4] &= ~(MVPP22_CLS_C2_PORT_ID(BIT(port->id)));
+
+	mvpp2_cls_c2_write(port->priv, &c2);
 }
 
 /* Set CPU queue number for oversize packets */
@@ -928,6 +1074,370 @@
 	mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
 }
 
+static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
+				       struct mvpp2_rfs_rule *rule)
+{
+	struct flow_action_entry *act;
+	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql, pmap;
+	int index, ctx;
+
+	memset(&c2, 0, sizeof(c2));
+
+	index = mvpp2_cls_c2_port_flow_index(port, rule->loc);
+	if (index < 0)
+		return -EINVAL;
+	c2.index = index;
+
+	act = &rule->flow->action.entries[0];
+
+	rule->c2_index = c2.index;
+
+	c2.tcam[3] = (rule->c2_tcam & 0xffff) |
+		     ((rule->c2_tcam_mask & 0xffff) << 16);
+	c2.tcam[2] = ((rule->c2_tcam >> 16) & 0xffff) |
+		     (((rule->c2_tcam_mask >> 16) & 0xffff) << 16);
+	c2.tcam[1] = ((rule->c2_tcam >> 32) & 0xffff) |
+		     (((rule->c2_tcam_mask >> 32) & 0xffff) << 16);
+	c2.tcam[0] = ((rule->c2_tcam >> 48) & 0xffff) |
+		     (((rule->c2_tcam_mask >> 48) & 0xffff) << 16);
+
+	pmap = BIT(port->id);
+	c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
+	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
+
+	/* Match on Lookup Type */
+	c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK));
+	c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(rule->loc);
+
+	if (act->id == FLOW_ACTION_DROP) {
+		c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_RED_LOCK);
+	} else {
+		/* We want to keep the default color derived from the Header
+		 * Parser drop entries, for VLAN and MAC filtering. This will
+		 * assign a default color of Green or Red, and we want matches
+		 * with a non-drop action to keep that color.
+		 */
+		c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK);
+
+		/* Update RSS status after matching this entry */
+		if (act->queue.ctx)
+			c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+		/* Always lock the RSS_EN decision. We might have high prio
+		 * rules steering to an RXQ, and a lower one steering to RSS,
+		 * we don't want the low prio RSS rule overwriting this flag.
+		 */
+		c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
+		/* Mark packet as "forwarded to software", needed for RSS */
+		c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
+
+		c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) |
+			   MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK);
+
+		if (act->queue.ctx) {
+			/* Get the global ctx number */
+			ctx = mvpp22_rss_ctx(port, act->queue.ctx);
+			if (ctx < 0)
+				return -EINVAL;
+
+			qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		} else {
+			qh = ((act->queue.index + port->first_rxq) >> 3) &
+			      MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = (act->queue.index + port->first_rxq) &
+			      MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		}
+
+		c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+			      MVPP22_CLS_C2_ATTR0_QLOW(ql);
+	}
+
+	c2.valid = true;
+
+	mvpp2_cls_c2_write(port->priv, &c2);
+
+	return 0;
+}
+
+static int mvpp2_port_c2_rfs_rule_insert(struct mvpp2_port *port,
+					 struct mvpp2_rfs_rule *rule)
+{
+	return mvpp2_port_c2_tcam_rule_add(port, rule);
+}
+
+static int mvpp2_port_cls_rfs_rule_remove(struct mvpp2_port *port,
+					  struct mvpp2_rfs_rule *rule)
+{
+	const struct mvpp2_cls_flow *flow;
+	struct mvpp2_cls_flow_entry fe;
+	int index, i;
+
+	for_each_cls_flow_id_containing_type(i, rule->flow_type) {
+		flow = mvpp2_cls_flow_get(i);
+		if (!flow)
+			return 0;
+
+		index = MVPP2_CLS_FLT_C2_RFS(port->id, flow->flow_id, rule->loc);
+
+		mvpp2_cls_flow_read(port->priv, index, &fe);
+		mvpp2_cls_flow_port_remove(&fe, BIT(port->id));
+		mvpp2_cls_flow_write(port->priv, &fe);
+	}
+
+	if (rule->c2_index >= 0)
+		mvpp22_port_c2_lookup_disable(port, rule->c2_index);
+
+	return 0;
+}
+
+static int mvpp2_port_flt_rfs_rule_insert(struct mvpp2_port *port,
+					  struct mvpp2_rfs_rule *rule)
+{
+	const struct mvpp2_cls_flow *flow;
+	struct mvpp2 *priv = port->priv;
+	struct mvpp2_cls_flow_entry fe;
+	int index, ret, i;
+
+	if (rule->engine != MVPP22_CLS_ENGINE_C2)
+		return -EOPNOTSUPP;
+
+	ret = mvpp2_port_c2_rfs_rule_insert(port, rule);
+	if (ret)
+		return ret;
+
+	for_each_cls_flow_id_containing_type(i, rule->flow_type) {
+		flow = mvpp2_cls_flow_get(i);
+		if (!flow)
+			return 0;
+
+		if ((rule->hek_fields & flow->supported_hash_opts) != rule->hek_fields)
+			continue;
+
+		index = MVPP2_CLS_FLT_C2_RFS(port->id, flow->flow_id, rule->loc);
+
+		mvpp2_cls_flow_read(priv, index, &fe);
+		mvpp2_cls_flow_eng_set(&fe, rule->engine);
+		mvpp2_cls_flow_port_id_sel(&fe, true);
+		mvpp2_flow_set_hek_fields(&fe, rule->hek_fields);
+		mvpp2_cls_flow_lu_type_set(&fe, rule->loc);
+		mvpp2_cls_flow_port_add(&fe, 0xf);
+
+		mvpp2_cls_flow_write(priv, &fe);
+	}
+
+	return 0;
+}
+
+static int mvpp2_cls_c2_build_match(struct mvpp2_rfs_rule *rule)
+{
+	struct flow_rule *flow = rule->flow;
+	int offs = 0;
+
+	/* The order of insertion in C2 tcam must match the order in which
+	 * the fields are found in the header
+	 */
+	if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(flow, &match);
+		if (match.mask->vlan_id) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN;
+
+			rule->c2_tcam |= ((u64)match.key->vlan_id) << offs;
+			rule->c2_tcam_mask |= ((u64)match.mask->vlan_id) << offs;
+
+			/* Don't update the offset yet */
+		}
+
+		if (match.mask->vlan_priority) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN_PRI;
+
+			/* VLAN pri is always at offset 13 relative to the
+			 * current offset
+			 */
+			rule->c2_tcam |= ((u64)match.key->vlan_priority) <<
+				(offs + 13);
+			rule->c2_tcam_mask |= ((u64)match.mask->vlan_priority) <<
+				(offs + 13);
+		}
+
+		if (match.mask->vlan_dei)
+			return -EOPNOTSUPP;
+
+		/* vlan id and prio always seem to take a full 16-bit slot in
+		 * the Header Extracted Key.
+		 */
+		offs += 16;
+	}
+
+	if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(flow, &match);
+		if (match.mask->src) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4SIP;
+
+			rule->c2_tcam |= ((u64)ntohs(match.key->src)) << offs;
+			rule->c2_tcam_mask |= ((u64)ntohs(match.mask->src)) << offs;
+			offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4SIP);
+		}
+
+		if (match.mask->dst) {
+			rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4DIP;
+
+			rule->c2_tcam |= ((u64)ntohs(match.key->dst)) << offs;
+			rule->c2_tcam_mask |= ((u64)ntohs(match.mask->dst)) << offs;
+			offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4DIP);
+		}
+	}
+
+	if (hweight16(rule->hek_fields) > MVPP2_FLOW_N_FIELDS)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule)
+{
+	struct flow_rule *flow = rule->flow;
+	struct flow_action_entry *act;
+
+	act = &flow->action.entries[0];
+	if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP)
+		return -EOPNOTSUPP;
+
+	/* When both an RSS context and an queue index are set, the index
+	 * is considered as an offset to be added to the indirection table
+	 * entries. We don't support this, so reject this rule.
+	 */
+	if (act->queue.ctx && act->queue.index)
+		return -EOPNOTSUPP;
+
+	/* For now, only use the C2 engine which has a HEK size limited to 64
+	 * bits for TCAM matching.
+	 */
+	rule->engine = MVPP22_CLS_ENGINE_C2;
+
+	if (mvpp2_cls_c2_build_match(rule))
+		return -EINVAL;
+
+	return 0;
+}
+
+int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *rxnfc)
+{
+	struct mvpp2_ethtool_fs *efs;
+
+	if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
+		return -EINVAL;
+
+	efs = port->rfs_rules[rxnfc->fs.location];
+	if (!efs)
+		return -ENOENT;
+
+	memcpy(rxnfc, &efs->rxnfc, sizeof(efs->rxnfc));
+
+	return 0;
+}
+
+int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec_input input = {};
+	struct ethtool_rx_flow_rule *ethtool_rule;
+	struct mvpp2_ethtool_fs *efs, *old_efs;
+	int ret = 0;
+
+	if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
+		return -EINVAL;
+
+	efs = kzalloc(sizeof(*efs), GFP_KERNEL);
+	if (!efs)
+		return -ENOMEM;
+
+	input.fs = &info->fs;
+
+	/* We need to manually set the rss_ctx, since this info isn't present
+	 * in info->fs
+	 */
+	if (info->fs.flow_type & FLOW_RSS)
+		input.rss_ctx = info->rss_context;
+
+	ethtool_rule = ethtool_rx_flow_rule_create(&input);
+	if (IS_ERR(ethtool_rule)) {
+		ret = PTR_ERR(ethtool_rule);
+		goto clean_rule;
+	}
+
+	efs->rule.flow = ethtool_rule->rule;
+	efs->rule.flow_type = mvpp2_cls_ethtool_flow_to_type(info->fs.flow_type);
+	if (efs->rule.flow_type < 0) {
+		ret = efs->rule.flow_type;
+		goto clean_rule;
+	}
+
+	ret = mvpp2_cls_rfs_parse_rule(&efs->rule);
+	if (ret)
+		goto clean_eth_rule;
+
+	efs->rule.loc = info->fs.location;
+
+	/* Replace an already existing rule */
+	if (port->rfs_rules[efs->rule.loc]) {
+		old_efs = port->rfs_rules[efs->rule.loc];
+		ret = mvpp2_port_cls_rfs_rule_remove(port, &old_efs->rule);
+		if (ret)
+			goto clean_eth_rule;
+		kfree(old_efs);
+		port->n_rfs_rules--;
+	}
+
+	ret = mvpp2_port_flt_rfs_rule_insert(port, &efs->rule);
+	if (ret)
+		goto clean_eth_rule;
+
+	ethtool_rx_flow_rule_destroy(ethtool_rule);
+	efs->rule.flow = NULL;
+
+	memcpy(&efs->rxnfc, info, sizeof(*info));
+	port->rfs_rules[efs->rule.loc] = efs;
+	port->n_rfs_rules++;
+
+	return ret;
+
+clean_eth_rule:
+	ethtool_rx_flow_rule_destroy(ethtool_rule);
+clean_rule:
+	kfree(efs);
+	return ret;
+}
+
+int mvpp2_ethtool_cls_rule_del(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *info)
+{
+	struct mvpp2_ethtool_fs *efs;
+	int ret;
+
+	efs = port->rfs_rules[info->fs.location];
+	if (!efs)
+		return -EINVAL;
+
+	/* Remove the rule from the engines. */
+	ret = mvpp2_port_cls_rfs_rule_remove(port, &efs->rule);
+	if (ret)
+		return ret;
+
+	port->n_rfs_rules--;
+	port->rfs_rules[info->fs.location] = NULL;
+	kfree(efs);
+
+	return 0;
+}
+
 static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
 {
 	int nrxqs, cpu, cpus = num_possible_cpus();
@@ -947,37 +1457,181 @@
 	return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
 }
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+static void mvpp22_rss_fill_table(struct mvpp2_port *port,
+				  struct mvpp2_rss_table *table,
+				  u32 rss_ctx)
 {
 	struct mvpp2 *priv = port->priv;
 	int i;
 
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
-		u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+		u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
 			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
 		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
 
 		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
-			    mvpp22_rxfh_indir(port, port->indir[i]));
+			    mvpp22_rxfh_indir(port, table->indir[i]));
 	}
 }
 
+static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 ctx;
+
+	/* Find the first free RSS table */
+	for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) {
+		if (!priv->rss_tables[ctx])
+			break;
+	}
+
+	if (ctx == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]),
+					GFP_KERNEL);
+	if (!priv->rss_tables[ctx])
+		return -ENOMEM;
+
+	*rss_ctx = ctx;
+
+	/* Set the table width: replace the whole classifier Rx queue number
+	 * with the ones configured in RSS table entries.
+	 */
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx));
+	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
+	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+{
+	u32 rss_ctx;
+	int ret, i;
+
+	ret = mvpp22_rss_context_create(port, &rss_ctx);
+	if (ret)
+		return ret;
+
+	/* Find the first available context number in the port, starting from 1.
+	 * Context 0 on each port is reserved for the default context.
+	 */
+	for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
+		if (port->rss_ctx[i] < 0)
+			break;
+	}
+
+	if (i == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	port->rss_ctx[i] = rss_ctx;
+	*port_ctx = i;
+
+	return 0;
+}
+
+static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
+						    int rss_ctx)
+{
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return NULL;
+
+	return priv->rss_tables[rss_ctx];
+}
+
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	struct ethtool_rxnfc *rxnfc;
+	int i, rss_ctx, ret;
+
+	rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	/* Invalidate any active classification rule that use this context */
+	for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+		if (!port->rfs_rules[i])
+			continue;
+
+		rxnfc = &port->rfs_rules[i]->rxnfc;
+		if (!(rxnfc->fs.flow_type & FLOW_RSS) ||
+		    rxnfc->rss_context != port_ctx)
+			continue;
+
+		ret = mvpp2_ethtool_cls_rule_del(port, rxnfc);
+		if (ret) {
+			netdev_warn(port->dev,
+				    "couldn't remove classification rule %d associated to this context",
+				    rxnfc->fs.location);
+		}
+	}
+
+	kfree(priv->rss_tables[rss_ctx]);
+
+	priv->rss_tables[rss_ctx] = NULL;
+	port->rss_ctx[port_ctx] = -1;
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx,
+				  const u32 *indir)
+{
+	int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(rss_table->indir, indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	mvpp22_rss_fill_table(port, rss_table, rss_ctx);
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx,
+				  u32 *indir)
+{
+	int rss_ctx =  mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(indir, rss_table->indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	return 0;
+}
+
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
 {
 	u16 hash_opts = 0;
+	u32 flow_type;
 
-	switch (info->flow_type) {
-	case TCP_V4_FLOW:
-	case UDP_V4_FLOW:
-	case TCP_V6_FLOW:
-	case UDP_V6_FLOW:
+	flow_type = mvpp2_cls_ethtool_flow_to_type(info->flow_type);
+
+	switch (flow_type) {
+	case MVPP22_FLOW_TCP4:
+	case MVPP22_FLOW_UDP4:
+	case MVPP22_FLOW_TCP6:
+	case MVPP22_FLOW_UDP6:
 		if (info->data & RXH_L4_B_0_1)
 			hash_opts |= MVPP22_CLS_HEK_OPT_L4SIP;
 		if (info->data & RXH_L4_B_2_3)
 			hash_opts |= MVPP22_CLS_HEK_OPT_L4DIP;
 		/* Fallthrough */
-	case IPV4_FLOW:
-	case IPV6_FLOW:
+	case MVPP22_FLOW_IP4:
+	case MVPP22_FLOW_IP6:
 		if (info->data & RXH_L2DA)
 			hash_opts |= MVPP22_CLS_HEK_OPT_MAC_DA;
 		if (info->data & RXH_VLAN)
@@ -994,15 +1648,18 @@
 	default: return -EOPNOTSUPP;
 	}
 
-	return mvpp2_port_rss_hash_opts_set(port, info->flow_type, hash_opts);
+	return mvpp2_port_rss_hash_opts_set(port, flow_type, hash_opts);
 }
 
 int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
 {
 	unsigned long hash_opts;
+	u32 flow_type;
 	int i;
 
-	hash_opts = mvpp2_port_rss_hash_opts_get(port, info->flow_type);
+	flow_type = mvpp2_cls_ethtool_flow_to_type(info->flow_type);
+
+	hash_opts = mvpp2_port_rss_hash_opts_get(port, flow_type);
 	info->data = 0;
 
 	for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
@@ -1037,38 +1694,40 @@
 	return 0;
 }
 
-void mvpp22_rss_port_init(struct mvpp2_port *port)
+int mvpp22_port_rss_init(struct mvpp2_port *port)
 {
-	struct mvpp2 *priv = port->priv;
-	int i;
+	struct mvpp2_rss_table *table;
+	u32 context = 0;
+	int i, ret;
 
-	/* Set the table width: replace the whole classifier Rx queue number
-	 * with the ones configured in RSS table entries.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
-	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+	for (i = 0; i < MVPP22_N_RSS_TABLES; i++)
+		port->rss_ctx[i] = -1;
 
-	/* The default RxQ is used as a key to select the RSS table to use.
-	 * We use one RSS table per port.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX,
-		    MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
-	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
-		    MVPP22_RSS_TABLE_POINTER(port->id));
+	ret = mvpp22_rss_context_create(port, &context);
+	if (ret)
+		return ret;
+
+	table = mvpp22_rss_table_get(port->priv, context);
+	if (!table)
+		return -EINVAL;
+
+	port->rss_ctx[0] = context;
 
 	/* Configure the first table to evenly distribute the packets across
 	 * real Rx Queues. The table entries map a hash to a port Rx Queue.
 	 */
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
-		port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
+		table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
 
-	mvpp22_rss_fill_table(port, port->id);
+	mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0));
 
 	/* Configure default flows */
-	mvpp2_port_rss_hash_opts_set(port, IPV4_FLOW, MVPP22_CLS_HEK_IP4_2T);
-	mvpp2_port_rss_hash_opts_set(port, IPV6_FLOW, MVPP22_CLS_HEK_IP6_2T);
-	mvpp2_port_rss_hash_opts_set(port, TCP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
-	mvpp2_port_rss_hash_opts_set(port, TCP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
-	mvpp2_port_rss_hash_opts_set(port, UDP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
-	mvpp2_port_rss_hash_opts_set(port, UDP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP6, MVPP22_CLS_HEK_IP6_2T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP4, MVPP22_CLS_HEK_IP4_5T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T);
+	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T);
+
+	return 0;
 }

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 089f05f..8867f25 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h

@@ -33,15 +33,16 @@
 };
 
 #define MVPP22_CLS_HEK_OPT_MAC_DA	BIT(0)
-#define MVPP22_CLS_HEK_OPT_VLAN		BIT(1)
-#define MVPP22_CLS_HEK_OPT_L3_PROTO	BIT(2)
-#define MVPP22_CLS_HEK_OPT_IP4SA	BIT(3)
-#define MVPP22_CLS_HEK_OPT_IP4DA	BIT(4)
-#define MVPP22_CLS_HEK_OPT_IP6SA	BIT(5)
-#define MVPP22_CLS_HEK_OPT_IP6DA	BIT(6)
-#define MVPP22_CLS_HEK_OPT_L4SIP	BIT(7)
-#define MVPP22_CLS_HEK_OPT_L4DIP	BIT(8)
-#define MVPP22_CLS_HEK_N_FIELDS		9
+#define MVPP22_CLS_HEK_OPT_VLAN_PRI	BIT(1)
+#define MVPP22_CLS_HEK_OPT_VLAN		BIT(2)
+#define MVPP22_CLS_HEK_OPT_L3_PROTO	BIT(3)
+#define MVPP22_CLS_HEK_OPT_IP4SA	BIT(4)
+#define MVPP22_CLS_HEK_OPT_IP4DA	BIT(5)
+#define MVPP22_CLS_HEK_OPT_IP6SA	BIT(6)
+#define MVPP22_CLS_HEK_OPT_IP6DA	BIT(7)
+#define MVPP22_CLS_HEK_OPT_L4SIP	BIT(8)
+#define MVPP22_CLS_HEK_OPT_L4DIP	BIT(9)
+#define MVPP22_CLS_HEK_N_FIELDS		10
 
 #define MVPP22_CLS_HEK_L4_OPTS	(MVPP22_CLS_HEK_OPT_L4SIP | \
 				 MVPP22_CLS_HEK_OPT_L4DIP)
@@ -59,8 +60,12 @@
 #define MVPP22_CLS_HEK_IP6_5T	(MVPP22_CLS_HEK_IP6_2T | \
 				 MVPP22_CLS_HEK_L4_OPTS)
 
+#define MVPP22_CLS_HEK_TAGGED	(MVPP22_CLS_HEK_OPT_VLAN | \
+				 MVPP22_CLS_HEK_OPT_VLAN_PRI)
+
 enum mvpp2_cls_field_id {
 	MVPP22_CLS_FIELD_MAC_DA = 0x03,
+	MVPP22_CLS_FIELD_VLAN_PRI = 0x05,
 	MVPP22_CLS_FIELD_VLAN = 0x06,
 	MVPP22_CLS_FIELD_L3_PROTO = 0x0f,
 	MVPP22_CLS_FIELD_IP4SA = 0x10,
@@ -71,14 +76,6 @@
 	MVPP22_CLS_FIELD_L4DIP = 0x1e,
 };
 
-enum mvpp2_cls_flow_seq {
-	MVPP2_CLS_FLOW_SEQ_NORMAL = 0,
-	MVPP2_CLS_FLOW_SEQ_FIRST1,
-	MVPP2_CLS_FLOW_SEQ_FIRST2,
-	MVPP2_CLS_FLOW_SEQ_LAST,
-	MVPP2_CLS_FLOW_SEQ_MIDDLE
-};
-
 /* Classifier C2 engine constants */
 #define MVPP22_CLS_C2_TCAM_EN(data)		((data) << 16)
 
@@ -100,39 +97,62 @@
 	MVPP22_C2_FWD_HW_LOW_LAT_LOCK,
 };
 
+enum mvpp22_cls_c2_color_action {
+	MVPP22_C2_COL_NO_UPD = 0,
+	MVPP22_C2_COL_NO_UPD_LOCK,
+	MVPP22_C2_COL_GREEN,
+	MVPP22_C2_COL_GREEN_LOCK,
+	MVPP22_C2_COL_YELLOW,
+	MVPP22_C2_COL_YELLOW_LOCK,
+	MVPP22_C2_COL_RED,		/* Drop */
+	MVPP22_C2_COL_RED_LOCK,		/* Drop */
+};
+
 #define MVPP2_CLS_C2_TCAM_WORDS			5
 #define MVPP2_CLS_C2_ATTR_WORDS			5
 
 struct mvpp2_cls_c2_entry {
 	u32 index;
+	/* TCAM lookup key */
 	u32 tcam[MVPP2_CLS_C2_TCAM_WORDS];
+	/* Actions to perform upon TCAM match */
 	u32 act;
+	/* Attributes relative to the actions to perform */
 	u32 attr[MVPP2_CLS_C2_ATTR_WORDS];
+	/* Entry validity */
+	u8 valid;
 };
 
+#define MVPP22_FLOW_ETHER_BIT	BIT(0)
+#define MVPP22_FLOW_IP4_BIT	BIT(1)
+#define MVPP22_FLOW_IP6_BIT	BIT(2)
+#define MVPP22_FLOW_TCP_BIT	BIT(3)
+#define MVPP22_FLOW_UDP_BIT	BIT(4)
+
+#define MVPP22_FLOW_TCP4	(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP4_BIT | MVPP22_FLOW_TCP_BIT)
+#define MVPP22_FLOW_TCP6	(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP6_BIT | MVPP22_FLOW_TCP_BIT)
+#define MVPP22_FLOW_UDP4	(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP4_BIT | MVPP22_FLOW_UDP_BIT)
+#define MVPP22_FLOW_UDP6	(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP6_BIT | MVPP22_FLOW_UDP_BIT)
+#define MVPP22_FLOW_IP4		(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP4_BIT)
+#define MVPP22_FLOW_IP6		(MVPP22_FLOW_ETHER_BIT | MVPP22_FLOW_IP6_BIT)
+#define MVPP22_FLOW_ETHERNET	(MVPP22_FLOW_ETHER_BIT)
+
 /* Classifier C2 engine entries */
-#define MVPP22_CLS_C2_RSS_ENTRY(port)	(port)
-#define MVPP22_CLS_C2_N_ENTRIES		MVPP2_MAX_PORTS
+#define MVPP22_CLS_C2_N_ENTRIES		256
 
-/* RSS flow entries in the flow table. We have 2 entries per port for RSS.
- *
- * The first performs a lookup using the C2 TCAM engine, to tag the
- * packet for software forwarding (needed for RSS), enable or disable RSS, and
- * assign the default rx queue.
- *
- * The second configures the hash generation, by specifying which fields of the
- * packet header are used to generate the hash, and specifies the relevant hash
- * engine to use.
+/* Number of per-port dedicated entries in the C2 TCAM */
+#define MVPP22_CLS_C2_PORT_N_FLOWS	MVPP2_N_RFS_ENTRIES_PER_FLOW
+
+/* Each port has oen range per flow type + one entry controling the global RSS
+ * setting and the default rx queue
  */
-#define MVPP22_RSS_FLOW_C2_OFFS		0
-#define MVPP22_RSS_FLOW_HASH_OFFS	1
-#define MVPP22_RSS_FLOW_SIZE		(MVPP22_RSS_FLOW_HASH_OFFS + 1)
+#define MVPP22_CLS_C2_PORT_RANGE	(MVPP22_CLS_C2_PORT_N_FLOWS + 1)
+#define MVPP22_CLS_C2_PORT_FIRST(p)	((p) * MVPP22_CLS_C2_PORT_RANGE)
+#define MVPP22_CLS_C2_RSS_ENTRY(p)	(MVPP22_CLS_C2_PORT_FIRST((p) + 1) - 1)
 
-#define MVPP22_RSS_FLOW_C2(port)	((port) * MVPP22_RSS_FLOW_SIZE + \
-					 MVPP22_RSS_FLOW_C2_OFFS)
-#define MVPP22_RSS_FLOW_HASH(port)	((port) * MVPP22_RSS_FLOW_SIZE + \
-					 MVPP22_RSS_FLOW_HASH_OFFS)
-#define MVPP22_RSS_FLOW_FIRST(port)	MVPP22_RSS_FLOW_C2(port)
+#define MVPP22_CLS_C2_PORT_FLOW_FIRST(p)	(MVPP22_CLS_C2_PORT_FIRST(p))
+
+#define MVPP22_CLS_C2_RFS_LOC(p, loc)	(MVPP22_CLS_C2_PORT_FLOW_FIRST(p) + (loc))
 
 /* Packet flow ID */
 enum mvpp2_prs_flow {
@@ -162,6 +182,16 @@
 	MVPP2_FL_LAST,
 };
 
+/* LU Type defined for all engines, and specified in the flow table */
+#define MVPP2_CLS_LU_TYPE_MASK			0x3f
+
+enum mvpp2_cls_lu_type {
+	/* rule->loc is used as a lu-type for the entries 0 - 62. */
+	MVPP22_CLS_LU_TYPE_ALL = 63,
+};
+
+#define MVPP2_N_FLOWS		(MVPP2_FL_LAST - MVPP2_FL_START)
+
 struct mvpp2_cls_flow {
 	/* The L2-L4 traffic flow type */
 	int flow_type;
@@ -176,12 +206,48 @@
 	struct mvpp2_prs_result_info prs_ri;
 };
 
-#define MVPP2_N_FLOWS	52
+#define MVPP2_CLS_FLT_ENTRIES_PER_FLOW		(MVPP2_MAX_PORTS + 1 + 16)
+#define MVPP2_CLS_FLT_FIRST(id)			(((id) - MVPP2_FL_START) * \
+						 MVPP2_CLS_FLT_ENTRIES_PER_FLOW)
 
-#define MVPP2_ENTRIES_PER_FLOW			(MVPP2_MAX_PORTS + 1)
-#define MVPP2_FLOW_C2_ENTRY(id)			((id) * MVPP2_ENTRIES_PER_FLOW)
-#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id)	((id) * MVPP2_ENTRIES_PER_FLOW + \
-						(port) + 1)
+#define MVPP2_CLS_FLT_C2_RFS(port, id, rfs_n)	(MVPP2_CLS_FLT_FIRST(id) + \
+						 ((port) * MVPP2_MAX_PORTS) + \
+						 (rfs_n))
+
+#define MVPP2_CLS_FLT_C2_RSS_ENTRY(id)		(MVPP2_CLS_FLT_C2_RFS(MVPP2_MAX_PORTS, id, 0))
+#define MVPP2_CLS_FLT_HASH_ENTRY(port, id)	(MVPP2_CLS_FLT_C2_RSS_ENTRY(id) + 1 + (port))
+#define MVPP2_CLS_FLT_LAST(id)			(MVPP2_CLS_FLT_FIRST(id) + \
+						 MVPP2_CLS_FLT_ENTRIES_PER_FLOW - 1)
+
+/* Iterate on each classifier flow id. Sets 'i' to be the index of the first
+ * entry in the cls_flows table for each different flow_id.
+ * This relies on entries having the same flow_id in the cls_flows table being
+ * contiguous.
+ */
+#define for_each_cls_flow_id(i)						      \
+	for ((i) = 0; (i) < MVPP2_N_PRS_FLOWS; (i)++)			      \
+		if ((i) > 0 &&						      \
+		    cls_flows[(i)].flow_id == cls_flows[(i) - 1].flow_id)       \
+			continue;					      \
+		else
+
+/* Iterate on each classifier flow that has a given flow_type. Sets 'i' to be
+ * the index of the first entry in the cls_flow table for each different flow_id
+ * that has the given flow_type. This allows to operate on all flows that
+ * matches a given ethtool flow type.
+ */
+#define for_each_cls_flow_id_with_type(i, type)				      \
+	for_each_cls_flow_id((i))					      \
+		if (cls_flows[(i)].flow_type != (type))			      \
+			continue;					      \
+		else
+
+#define for_each_cls_flow_id_containing_type(i, type)			      \
+	for_each_cls_flow_id((i))					      \
+		if ((cls_flows[(i)].flow_type & (type)) != (type))	      \
+			continue;					      \
+		else
+
 struct mvpp2_cls_flow_entry {
 	u32 index;
 	u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
@@ -193,12 +259,18 @@
 	u32 data;
 };
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
+int mvpp22_port_rss_init(struct mvpp2_port *port);
 
-void mvpp22_rss_port_init(struct mvpp2_port *port);
+int mvpp22_port_rss_enable(struct mvpp2_port *port);
+int mvpp22_port_rss_disable(struct mvpp2_port *port);
 
-void mvpp22_rss_enable(struct mvpp2_port *port);
-void mvpp22_rss_disable(struct mvpp2_port *port);
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
+				  const u32 *indir);
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx,
+				  u32 *indir);
 
 int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
@@ -213,7 +285,7 @@
 
 u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe);
 
-struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
+const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
 
 u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index);
 
@@ -230,4 +302,13 @@
 void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
 		       struct mvpp2_cls_c2_entry *c2);
 
+int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *rxnfc);
+
+int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *info);
+
+int mvpp2_ethtool_cls_rule_del(struct mvpp2_port *port,
+			       struct ethtool_rxnfc *info);
+
 #endif

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
index f9744a6..4a3baa7 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c

@@ -18,22 +18,48 @@
 	struct mvpp2 *priv;
 };
 
+struct mvpp2_dbgfs_c2_entry {
+	int id;
+	struct mvpp2 *priv;
+};
+
 struct mvpp2_dbgfs_flow_entry {
 	int flow;
 	struct mvpp2 *priv;
 };
 
+struct mvpp2_dbgfs_flow_tbl_entry {
+	int id;
+	struct mvpp2 *priv;
+};
+
 struct mvpp2_dbgfs_port_flow_entry {
 	struct mvpp2_port *port;
 	struct mvpp2_dbgfs_flow_entry *dbg_fe;
 };
 
+struct mvpp2_dbgfs_entries {
+	/* Entries for Header Parser debug info */
+	struct mvpp2_dbgfs_prs_entry prs_entries[MVPP2_PRS_TCAM_SRAM_SIZE];
+
+	/* Entries for Classifier C2 engine debug info */
+	struct mvpp2_dbgfs_c2_entry c2_entries[MVPP22_CLS_C2_N_ENTRIES];
+
+	/* Entries for Classifier Flow Table debug info */
+	struct mvpp2_dbgfs_flow_tbl_entry flt_entries[MVPP2_CLS_FLOWS_TBL_SIZE];
+
+	/* Entries for Classifier flows debug info */
+	struct mvpp2_dbgfs_flow_entry flow_entries[MVPP2_N_PRS_FLOWS];
+
+	/* Entries for per-port flows debug info */
+	struct mvpp2_dbgfs_port_flow_entry port_flow_entries[MVPP2_MAX_PORTS];
+};
+
 static int mvpp2_dbgfs_flow_flt_hits_show(struct seq_file *s, void *unused)
 {
-	struct mvpp2_dbgfs_flow_entry *entry = s->private;
-	int id = MVPP2_FLOW_C2_ENTRY(entry->flow);
+	struct mvpp2_dbgfs_flow_tbl_entry *entry = s->private;
 
-	u32 hits = mvpp2_cls_flow_hits(entry->priv, id);
+	u32 hits = mvpp2_cls_flow_hits(entry->priv, entry->id);
 
 	seq_printf(s, "%u\n", hits);
 
@@ -58,7 +84,7 @@
 static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
 {
 	struct mvpp2_dbgfs_flow_entry *entry = s->private;
-	struct mvpp2_cls_flow *f;
+	const struct mvpp2_cls_flow *f;
 	const char *flow_name;
 
 	f = mvpp2_cls_flow_get(entry->flow);
@@ -93,30 +119,12 @@
 	return 0;
 }
 
-static int mvpp2_dbgfs_flow_type_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mvpp2_dbgfs_flow_type_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_flow_type_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct mvpp2_dbgfs_flow_entry *flow_entry = seq->private;
-
-	kfree(flow_entry);
-	return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_flow_type_fops = {
-	.open = mvpp2_dbgfs_flow_type_open,
-	.read = seq_read,
-	.release = mvpp2_dbgfs_flow_type_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_type);
 
 static int mvpp2_dbgfs_flow_id_show(struct seq_file *s, void *unused)
 {
-	struct mvpp2_dbgfs_flow_entry *entry = s->private;
-	struct mvpp2_cls_flow *f;
+	const struct mvpp2_dbgfs_flow_entry *entry = s->private;
+	const struct mvpp2_cls_flow *f;
 
 	f = mvpp2_cls_flow_get(entry->flow);
 	if (!f)
@@ -134,7 +142,7 @@
 	struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
 	struct mvpp2_port *port = entry->port;
 	struct mvpp2_cls_flow_entry fe;
-	struct mvpp2_cls_flow *f;
+	const struct mvpp2_cls_flow *f;
 	int flow_index;
 	u16 hash_opts;
 
@@ -142,7 +150,7 @@
 	if (!f)
 		return -EINVAL;
 
-	flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+	flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
 
 	mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -153,42 +161,21 @@
 	return 0;
 }
 
-static int mvpp2_dbgfs_port_flow_hash_opt_open(struct inode *inode,
-					       struct file *file)
-{
-	return single_open(file, mvpp2_dbgfs_port_flow_hash_opt_show,
-			   inode->i_private);
-}
-
-static int mvpp2_dbgfs_port_flow_hash_opt_release(struct inode *inode,
-						  struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct mvpp2_dbgfs_port_flow_entry *flow_entry = seq->private;
-
-	kfree(flow_entry);
-	return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_port_flow_hash_opt_fops = {
-	.open = mvpp2_dbgfs_port_flow_hash_opt_open,
-	.read = seq_read,
-	.release = mvpp2_dbgfs_port_flow_hash_opt_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_hash_opt);
 
 static int mvpp2_dbgfs_port_flow_engine_show(struct seq_file *s, void *unused)
 {
 	struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
 	struct mvpp2_port *port = entry->port;
 	struct mvpp2_cls_flow_entry fe;
-	struct mvpp2_cls_flow *f;
+	const struct mvpp2_cls_flow *f;
 	int flow_index, engine;
 
 	f = mvpp2_cls_flow_get(entry->dbg_fe->flow);
 	if (!f)
 		return -EINVAL;
 
-	flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+	flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id);
 
 	mvpp2_cls_flow_read(port->priv, flow_index, &fe);
 
@@ -203,11 +190,10 @@
 
 static int mvpp2_dbgfs_flow_c2_hits_show(struct seq_file *s, void *unused)
 {
-	struct mvpp2_port *port = s->private;
+	struct mvpp2_dbgfs_c2_entry *entry = s->private;
 	u32 hits;
 
-	hits = mvpp2_cls_c2_hit_count(port->priv,
-				      MVPP22_CLS_C2_RSS_ENTRY(port->id));
+	hits = mvpp2_cls_c2_hit_count(entry->priv, entry->id);
 
 	seq_printf(s, "%u\n", hits);
 
@@ -218,11 +204,11 @@
 
 static int mvpp2_dbgfs_flow_c2_rxq_show(struct seq_file *s, void *unused)
 {
-	struct mvpp2_port *port = s->private;
+	struct mvpp2_dbgfs_c2_entry *entry = s->private;
 	struct mvpp2_cls_c2_entry c2;
 	u8 qh, ql;
 
-	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+	mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
 
 	qh = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QHIGH_OFFS) &
 	     MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
@@ -239,11 +225,11 @@
 
 static int mvpp2_dbgfs_flow_c2_enable_show(struct seq_file *s, void *unused)
 {
-	struct mvpp2_port *port = s->private;
+	struct mvpp2_dbgfs_c2_entry *entry = s->private;
 	struct mvpp2_cls_c2_entry c2;
 	int enabled;
 
-	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+	mvpp2_cls_c2_read(entry->priv, entry->id, &c2);
 
 	enabled = !!(c2.attr[2] & MVPP22_CLS_C2_ATTR2_RSS_EN);
 
@@ -456,25 +442,7 @@
 	return 0;
 }
 
-static int mvpp2_dbgfs_prs_valid_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mvpp2_dbgfs_prs_valid_show, inode->i_private);
-}
-
-static int mvpp2_dbgfs_prs_valid_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct mvpp2_dbgfs_prs_entry *entry = seq->private;
-
-	kfree(entry);
-	return single_release(inode, file);
-}
-
-static const struct file_operations mvpp2_dbgfs_prs_valid_fops = {
-	.open = mvpp2_dbgfs_prs_valid_open,
-	.read = seq_read,
-	.release = mvpp2_dbgfs_prs_valid_release,
-};
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_valid);
 
 static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
 				      struct mvpp2_port *port,
@@ -484,13 +452,8 @@
 	struct dentry *port_dir;
 
 	port_dir = debugfs_create_dir(port->dev->name, parent);
-	if (IS_ERR(port_dir))
-		return PTR_ERR(port_dir);
 
-	/* This will be freed by 'hash_opts' release op */
-	port_entry = kmalloc(sizeof(*port_entry), GFP_KERNEL);
-	if (!port_entry)
-		return -ENOMEM;
+	port_entry = &port->priv->dbgfs_entries->port_flow_entries[port->id];
 
 	port_entry->port = port;
 	port_entry->dbg_fe = entry;
@@ -515,20 +478,12 @@
 	sprintf(flow_entry_name, "%02d", flow);
 
 	flow_entry_dir = debugfs_create_dir(flow_entry_name, parent);
-	if (!flow_entry_dir)
-		return -ENOMEM;
 
-	/* This will be freed by 'type' release op */
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
+	entry = &priv->dbgfs_entries->flow_entries[flow];
 
 	entry->flow = flow;
 	entry->priv = priv;
 
-	debugfs_create_file("flow_hits", 0444, flow_entry_dir, entry,
-			    &mvpp2_dbgfs_flow_flt_hits_fops);
-
 	debugfs_create_file("dec_hits", 0444, flow_entry_dir, entry,
 			    &mvpp2_dbgfs_flow_dec_hits_fops);
 
@@ -545,6 +500,7 @@
 		if (ret)
 			return ret;
 	}
+
 	return 0;
 }
 
@@ -554,10 +510,8 @@
 	int i, ret;
 
 	flow_dir = debugfs_create_dir("flows", parent);
-	if (!flow_dir)
-		return -ENOMEM;
 
-	for (i = 0; i < MVPP2_N_FLOWS; i++) {
+	for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) {
 		ret = mvpp2_dbgfs_flow_entry_init(flow_dir, priv, i);
 		if (ret)
 			return ret;
@@ -579,13 +533,8 @@
 	sprintf(prs_entry_name, "%03d", tid);
 
 	prs_entry_dir = debugfs_create_dir(prs_entry_name, parent);
-	if (!prs_entry_dir)
-		return -ENOMEM;
 
-	/* The 'valid' entry's ops will free that */
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
+	entry = &priv->dbgfs_entries->prs_entries[tid];
 
 	entry->tid = tid;
 	entry->priv = priv;
@@ -609,6 +558,9 @@
 	debugfs_create_file("hits", 0444, prs_entry_dir, entry,
 			    &mvpp2_dbgfs_prs_hits_fops);
 
+	debugfs_create_file("pmap", 0444, prs_entry_dir, entry,
+			     &mvpp2_dbgfs_prs_pmap_fops);
+
 	return 0;
 }
 
@@ -618,8 +570,6 @@
 	int i, ret;
 
 	prs_dir = debugfs_create_dir("parser", parent);
-	if (!prs_dir)
-		return -ENOMEM;
 
 	for (i = 0; i < MVPP2_PRS_TCAM_SRAM_SIZE; i++) {
 		ret = mvpp2_dbgfs_prs_entry_init(prs_dir, priv, i);
@@ -630,14 +580,104 @@
 	return 0;
 }
 
+static int mvpp2_dbgfs_c2_entry_init(struct dentry *parent,
+				     struct mvpp2 *priv, int id)
+{
+	struct mvpp2_dbgfs_c2_entry *entry;
+	struct dentry *c2_entry_dir;
+	char c2_entry_name[10];
+
+	if (id >= MVPP22_CLS_C2_N_ENTRIES)
+		return -EINVAL;
+
+	sprintf(c2_entry_name, "%03d", id);
+
+	c2_entry_dir = debugfs_create_dir(c2_entry_name, parent);
+	if (!c2_entry_dir)
+		return -ENOMEM;
+
+	entry = &priv->dbgfs_entries->c2_entries[id];
+
+	entry->id = id;
+	entry->priv = priv;
+
+	debugfs_create_file("hits", 0444, c2_entry_dir, entry,
+			    &mvpp2_dbgfs_flow_c2_hits_fops);
+
+	debugfs_create_file("default_rxq", 0444, c2_entry_dir, entry,
+			    &mvpp2_dbgfs_flow_c2_rxq_fops);
+
+	debugfs_create_file("rss_enable", 0444, c2_entry_dir, entry,
+			    &mvpp2_dbgfs_flow_c2_enable_fops);
+
+	return 0;
+}
+
+static int mvpp2_dbgfs_flow_tbl_entry_init(struct dentry *parent,
+					   struct mvpp2 *priv, int id)
+{
+	struct mvpp2_dbgfs_flow_tbl_entry *entry;
+	struct dentry *flow_tbl_entry_dir;
+	char flow_tbl_entry_name[10];
+
+	if (id >= MVPP2_CLS_FLOWS_TBL_SIZE)
+		return -EINVAL;
+
+	sprintf(flow_tbl_entry_name, "%03d", id);
+
+	flow_tbl_entry_dir = debugfs_create_dir(flow_tbl_entry_name, parent);
+	if (!flow_tbl_entry_dir)
+		return -ENOMEM;
+
+	entry = &priv->dbgfs_entries->flt_entries[id];
+
+	entry->id = id;
+	entry->priv = priv;
+
+	debugfs_create_file("hits", 0444, flow_tbl_entry_dir, entry,
+			    &mvpp2_dbgfs_flow_flt_hits_fops);
+
+	return 0;
+}
+
+static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv)
+{
+	struct dentry *cls_dir, *c2_dir, *flow_tbl_dir;
+	int i, ret;
+
+	cls_dir = debugfs_create_dir("classifier", parent);
+	if (!cls_dir)
+		return -ENOMEM;
+
+	c2_dir = debugfs_create_dir("c2", cls_dir);
+	if (!c2_dir)
+		return -ENOMEM;
+
+	for (i = 0; i < MVPP22_CLS_C2_N_ENTRIES; i++) {
+		ret = mvpp2_dbgfs_c2_entry_init(c2_dir, priv, i);
+		if (ret)
+			return ret;
+	}
+
+	flow_tbl_dir = debugfs_create_dir("flow_table", cls_dir);
+	if (!flow_tbl_dir)
+		return -ENOMEM;
+
+	for (i = 0; i < MVPP2_CLS_FLOWS_TBL_SIZE; i++) {
+		ret = mvpp2_dbgfs_flow_tbl_entry_init(flow_tbl_dir, priv, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int mvpp2_dbgfs_port_init(struct dentry *parent,
 				 struct mvpp2_port *port)
 {
 	struct dentry *port_dir;
 
 	port_dir = debugfs_create_dir(port->dev->name, parent);
-	if (IS_ERR(port_dir))
-		return PTR_ERR(port_dir);
 
 	debugfs_create_file("parser_entries", 0444, port_dir, port,
 			    &mvpp2_dbgfs_port_parser_fops);
@@ -648,21 +688,14 @@
 	debugfs_create_file("vid_filter", 0444, port_dir, port,
 			    &mvpp2_dbgfs_port_vid_fops);
 
-	debugfs_create_file("c2_hits", 0444, port_dir, port,
-			    &mvpp2_dbgfs_flow_c2_hits_fops);
-
-	debugfs_create_file("default_rxq", 0444, port_dir, port,
-			    &mvpp2_dbgfs_flow_c2_rxq_fops);
-
-	debugfs_create_file("rss_enable", 0444, port_dir, port,
-			    &mvpp2_dbgfs_flow_c2_enable_fops);
-
 	return 0;
 }
 
 void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
 {
 	debugfs_remove_recursive(priv->dbgfs_dir);
+
+	kfree(priv->dbgfs_entries);
 }
 
 void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
@@ -671,22 +704,24 @@
 	int ret, i;
 
 	mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL);
-	if (!mvpp2_root) {
+	if (!mvpp2_root)
 		mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL);
-		if (IS_ERR(mvpp2_root))
-			return;
-	}
 
 	mvpp2_dir = debugfs_create_dir(name, mvpp2_root);
-	if (IS_ERR(mvpp2_dir))
-		return;
 
 	priv->dbgfs_dir = mvpp2_dir;
+	priv->dbgfs_entries = kzalloc(sizeof(*priv->dbgfs_entries), GFP_KERNEL);
+	if (!priv->dbgfs_entries)
+		goto err;
 
 	ret = mvpp2_dbgfs_prs_init(mvpp2_dir, priv);
 	if (ret)
 		goto err;
 
+	ret = mvpp2_dbgfs_cls_init(mvpp2_dir, priv);
+	if (ret)
+		goto err;
+
 	for (i = 0; i < priv->port_count; i++) {
 		ret = mvpp2_dbgfs_port_init(mvpp2_dir, priv->port_list[i]);
 		if (ret)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 6320e08..111b3b8 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

@@ -56,9 +56,9 @@
 /* The prototype is added here to be used in start_dev when using ACPI. This
  * will be removed once phylink is used for all modes (dt+ACPI).
  */
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy);
 
 /* Queue modes */
@@ -82,13 +82,19 @@
 	return readl(priv->swth_base[0] + offset);
 }
 
-u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
+static u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
 {
 	return readl_relaxed(priv->swth_base[0] + offset);
 }
+
+static inline u32 mvpp2_cpu_to_thread(struct mvpp2 *priv, int cpu)
+{
+	return cpu % priv->nthreads;
+}
+
 /* These accessors should be used to access:
  *
- * - per-CPU registers, where each CPU has its own copy of the
+ * - per-thread registers, where each thread has its own copy of the
  *   register.
  *
  *   MVPP2_BM_VIRT_ALLOC_REG
@@ -104,8 +110,8 @@
  *   MVPP2_TXQ_SENT_REG
  *   MVPP2_RXQ_NUM_REG
  *
- * - global registers that must be accessed through a specific CPU
- *   window, because they are related to an access to a per-CPU
+ * - global registers that must be accessed through a specific thread
+ *   window, because they are related to an access to a per-thread
  *   register
  *
  *   MVPP2_BM_PHY_ALLOC_REG    (related to MVPP2_BM_VIRT_ALLOC_REG)
@@ -122,28 +128,28 @@
  *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
  *   MVPP2_TXQ_PREF_BUF_REG    (related to MVPP2_TXQ_NUM_REG)
  */
-void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
+static void mvpp2_thread_write(struct mvpp2 *priv, unsigned int thread,
 			       u32 offset, u32 data)
 {
-	writel(data, priv->swth_base[cpu] + offset);
+	writel(data, priv->swth_base[thread] + offset);
 }
 
-u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
+static u32 mvpp2_thread_read(struct mvpp2 *priv, unsigned int thread,
 			     u32 offset)
 {
-	return readl(priv->swth_base[cpu] + offset);
+	return readl(priv->swth_base[thread] + offset);
 }
 
-void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu,
+static void mvpp2_thread_write_relaxed(struct mvpp2 *priv, unsigned int thread,
 				       u32 offset, u32 data)
 {
-	writel_relaxed(data, priv->swth_base[cpu] + offset);
+	writel_relaxed(data, priv->swth_base[thread] + offset);
 }
 
-static u32 mvpp2_percpu_read_relaxed(struct mvpp2 *priv, int cpu,
+static u32 mvpp2_thread_read_relaxed(struct mvpp2 *priv, unsigned int thread,
 				     u32 offset)
 {
-	return readl_relaxed(priv->swth_base[cpu] + offset);
+	return readl_relaxed(priv->swth_base[thread] + offset);
 }
 
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
@@ -286,6 +292,26 @@
 		txq_pcpu->txq_put_index = 0;
 }
 
+/* Get number of maximum RXQ */
+static int mvpp2_get_nrxqs(struct mvpp2 *priv)
+{
+	unsigned int nrxqs;
+
+	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE)
+		return 1;
+
+	/* According to the PPv2.2 datasheet and our experiments on
+	 * PPv2.1, RX queues have an allocation granularity of 4 (when
+	 * more than a single one on PPv2.2).
+	 * Round up to nearest multiple of 4.
+	 */
+	nrxqs = (num_possible_cpus() + 3) & ~0x3;
+	if (nrxqs > MVPP2_PORT_MAX_RXQ)
+		nrxqs = MVPP2_PORT_MAX_RXQ;
+
+	return nrxqs;
+}
+
 /* Get number of physical egress port */
 static inline int mvpp2_egress_port(struct mvpp2_port *port)
 {
@@ -317,8 +343,7 @@
 /* Buffer Manager configuration routines */
 
 /* Create pool */
-static int mvpp2_bm_pool_create(struct platform_device *pdev,
-				struct mvpp2 *priv,
+static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv,
 				struct mvpp2_bm_pool *bm_pool, int size)
 {
 	u32 val;
@@ -337,7 +362,7 @@
 	else
 		bm_pool->size_bytes = 2 * sizeof(u64) * size;
 
-	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
+	bm_pool->virt_addr = dma_alloc_coherent(dev, bm_pool->size_bytes,
 						&bm_pool->dma_addr,
 						GFP_KERNEL);
 	if (!bm_pool->virt_addr)
@@ -345,9 +370,9 @@
 
 	if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
 			MVPP2_BM_POOL_PTR_ALIGN)) {
-		dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+		dma_free_coherent(dev, bm_pool->size_bytes,
 				  bm_pool->virt_addr, bm_pool->dma_addr);
-		dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
+		dev_err(dev, "BM pool %d is not %d bytes aligned\n",
 			bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
 		return -ENOMEM;
 	}
@@ -385,17 +410,17 @@
 				    dma_addr_t *dma_addr,
 				    phys_addr_t *phys_addr)
 {
-	int cpu = get_cpu();
+	unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu());
 
-	*dma_addr = mvpp2_percpu_read(priv, cpu,
+	*dma_addr = mvpp2_thread_read(priv, thread,
 				      MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
-	*phys_addr = mvpp2_percpu_read(priv, cpu, MVPP2_BM_VIRT_ALLOC_REG);
+	*phys_addr = mvpp2_thread_read(priv, thread, MVPP2_BM_VIRT_ALLOC_REG);
 
 	if (priv->hw_version == MVPP22) {
 		u32 val;
 		u32 dma_addr_highbits, phys_addr_highbits;
 
-		val = mvpp2_percpu_read(priv, cpu, MVPP22_BM_ADDR_HIGH_ALLOC);
+		val = mvpp2_thread_read(priv, thread, MVPP22_BM_ADDR_HIGH_ALLOC);
 		dma_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_PHYS_MASK);
 		phys_addr_highbits = (val & MVPP22_BM_ADDR_HIGH_VIRT_MASK) >>
 			MVPP22_BM_ADDR_HIGH_VIRT_SHIFT;
@@ -462,15 +487,14 @@
 }
 
 /* Cleanup pool */
-static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
-				 struct mvpp2 *priv,
+static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv,
 				 struct mvpp2_bm_pool *bm_pool)
 {
 	int buf_num;
 	u32 val;
 
 	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
-	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+	mvpp2_bm_bufs_free(dev, priv, bm_pool, buf_num);
 
 	/* Check buffer counters after free */
 	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
@@ -484,24 +508,26 @@
 	val |= MVPP2_BM_STOP_MASK;
 	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-	dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
+	dma_free_coherent(dev, bm_pool->size_bytes,
 			  bm_pool->virt_addr,
 			  bm_pool->dma_addr);
 	return 0;
 }
 
-static int mvpp2_bm_pools_init(struct platform_device *pdev,
-			       struct mvpp2 *priv)
+static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
 {
-	int i, err, size;
+	int i, err, size, poolnum = MVPP2_BM_POOLS_NUM;
 	struct mvpp2_bm_pool *bm_pool;
 
+	if (priv->percpu_pools)
+		poolnum = mvpp2_get_nrxqs(priv) * 2;
+
 	/* Create all pools with maximum size */
 	size = MVPP2_BM_POOL_SIZE_MAX;
-	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+	for (i = 0; i < poolnum; i++) {
 		bm_pool = &priv->bm_pools[i];
 		bm_pool->id = i;
-		err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size);
+		err = mvpp2_bm_pool_create(dev, priv, bm_pool, size);
 		if (err)
 			goto err_unroll_pools;
 		mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0);
@@ -509,17 +535,23 @@
 	return 0;
 
 err_unroll_pools:
-	dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size);
+	dev_err(dev, "failed to create BM pool %d, size %d\n", i, size);
 	for (i = i - 1; i >= 0; i--)
-		mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]);
+		mvpp2_bm_pool_destroy(dev, priv, &priv->bm_pools[i]);
 	return err;
 }
 
-static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
+static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 {
-	int i, err;
+	int i, err, poolnum = MVPP2_BM_POOLS_NUM;
 
-	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
+	if (priv->percpu_pools)
+		poolnum = mvpp2_get_nrxqs(priv) * 2;
+
+	dev_info(dev, "using %d %s buffers\n", poolnum,
+		 priv->percpu_pools ? "per-cpu" : "shared");
+
+	for (i = 0; i < poolnum; i++) {
 		/* Mask BM all interrupts */
 		mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0);
 		/* Clear BM cause register */
@@ -527,12 +559,12 @@
 	}
 
 	/* Allocate and initialize BM pools */
-	priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM,
+	priv->bm_pools = devm_kcalloc(dev, poolnum,
 				      sizeof(*priv->bm_pools), GFP_KERNEL);
 	if (!priv->bm_pools)
 		return -ENOMEM;
 
-	err = mvpp2_bm_pools_init(pdev, priv);
+	err = mvpp2_bm_pools_init(dev, priv);
 	if (err < 0)
 		return err;
 	return 0;
@@ -626,7 +658,11 @@
 				     dma_addr_t buf_dma_addr,
 				     phys_addr_t buf_phys_addr)
 {
-	int cpu = get_cpu();
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	unsigned long flags = 0;
+
+	if (test_bit(thread, &port->priv->lock_map))
+		spin_lock_irqsave(&port->bm_lock[thread], flags);
 
 	if (port->priv->hw_version == MVPP22) {
 		u32 val = 0;
@@ -640,7 +676,7 @@
 				<< MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
 				MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-		mvpp2_percpu_write_relaxed(port->priv, cpu,
+		mvpp2_thread_write_relaxed(port->priv, thread,
 					   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
 	}
 
@@ -649,11 +685,14 @@
 	 * descriptor. Instead of storing the virtual address, we
 	 * store the physical address
 	 */
-	mvpp2_percpu_write_relaxed(port->priv, cpu,
+	mvpp2_thread_write_relaxed(port->priv, thread,
 				   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
-	mvpp2_percpu_write_relaxed(port->priv, cpu,
+	mvpp2_thread_write_relaxed(port->priv, thread,
 				   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 
+	if (test_bit(thread, &port->priv->lock_map))
+		spin_unlock_irqrestore(&port->bm_lock[thread], flags);
+
 	put_cpu();
 }
 
@@ -666,6 +705,13 @@
 	phys_addr_t phys_addr;
 	void *buf;
 
+	if (port->priv->percpu_pools &&
+	    bm_pool->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+		netdev_err(port->dev,
+			   "attempted to use jumbo frames with per-cpu pools");
+		return 0;
+	}
+
 	buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
 	total_size = MVPP2_RX_TOTAL_SIZE(buf_size);
 
@@ -709,7 +755,64 @@
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
-	if (pool >= MVPP2_BM_POOLS_NUM) {
+	if ((port->priv->percpu_pools && pool > mvpp2_get_nrxqs(port->priv) * 2) ||
+	    (!port->priv->percpu_pools && pool >= MVPP2_BM_POOLS_NUM)) {
+		netdev_err(port->dev, "Invalid pool %d\n", pool);
+		return NULL;
+	}
+
+	/* Allocate buffers in case BM pool is used as long pool, but packet
+	 * size doesn't match MTU or BM pool hasn't being used yet
+	 */
+	if (new_pool->pkt_size == 0) {
+		int pkts_num;
+
+		/* Set default buffer number or free all the buffers in case
+		 * the pool is not empty
+		 */
+		pkts_num = new_pool->buf_num;
+		if (pkts_num == 0) {
+			if (port->priv->percpu_pools) {
+				if (pool < port->nrxqs)
+					pkts_num = mvpp2_pools[MVPP2_BM_SHORT].buf_num;
+				else
+					pkts_num = mvpp2_pools[MVPP2_BM_LONG].buf_num;
+			} else {
+				pkts_num = mvpp2_pools[pool].buf_num;
+			}
+		} else {
+			mvpp2_bm_bufs_free(port->dev->dev.parent,
+					   port->priv, new_pool, pkts_num);
+		}
+
+		new_pool->pkt_size = pkt_size;
+		new_pool->frag_size =
+			SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
+			MVPP2_SKB_SHINFO_SIZE;
+
+		/* Allocate buffers for this pool */
+		num = mvpp2_bm_bufs_add(port, new_pool, pkts_num);
+		if (num != pkts_num) {
+			WARN(1, "pool %d: %d of %d allocated\n",
+			     new_pool->id, num, pkts_num);
+			return NULL;
+		}
+	}
+
+	mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
+				  MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
+
+	return new_pool;
+}
+
+static struct mvpp2_bm_pool *
+mvpp2_bm_pool_use_percpu(struct mvpp2_port *port, int type,
+			 unsigned int pool, int pkt_size)
+{
+	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
+	int num;
+
+	if (pool > port->nrxqs * 2) {
 		netdev_err(port->dev, "Invalid pool %d\n", pool);
 		return NULL;
 	}
@@ -725,7 +828,7 @@
 		 */
 		pkts_num = new_pool->buf_num;
 		if (pkts_num == 0)
-			pkts_num = mvpp2_pools[pool].buf_num;
+			pkts_num = mvpp2_pools[type].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
 					   port->priv, new_pool, pkts_num);
@@ -750,11 +853,11 @@
 	return new_pool;
 }
 
-/* Initialize pools for swf */
-static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+/* Initialize pools for swf, shared buffers variant */
+static int mvpp2_swf_bm_pool_init_shared(struct mvpp2_port *port)
 {
-	int rxq;
 	enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+	int rxq;
 
 	/* If port pkt_size is higher than 1518B:
 	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
@@ -798,12 +901,76 @@
 	return 0;
 }
 
+/* Initialize pools for swf, percpu buffers variant */
+static int mvpp2_swf_bm_pool_init_percpu(struct mvpp2_port *port)
+{
+	struct mvpp2_bm_pool *p;
+	int i;
+
+	for (i = 0; i < port->nrxqs; i++) {
+		p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_SHORT, i,
+					     mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
+		if (!p)
+			return -ENOMEM;
+
+		port->priv->bm_pools[i].port_map |= BIT(port->id);
+		mvpp2_rxq_short_pool_set(port, i, port->priv->bm_pools[i].id);
+	}
+
+	for (i = 0; i < port->nrxqs; i++) {
+		p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_LONG, i + port->nrxqs,
+					     mvpp2_pools[MVPP2_BM_LONG].pkt_size);
+		if (!p)
+			return -ENOMEM;
+
+		port->priv->bm_pools[i + port->nrxqs].port_map |= BIT(port->id);
+		mvpp2_rxq_long_pool_set(port, i,
+					port->priv->bm_pools[i + port->nrxqs].id);
+	}
+
+	port->pool_long = NULL;
+	port->pool_short = NULL;
+
+	return 0;
+}
+
+static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
+{
+	if (port->priv->percpu_pools)
+		return mvpp2_swf_bm_pool_init_percpu(port);
+	else
+		return mvpp2_swf_bm_pool_init_shared(port);
+}
+
+static void mvpp2_set_hw_csum(struct mvpp2_port *port,
+			      enum mvpp2_bm_pool_log_num new_long_pool)
+{
+	const netdev_features_t csums = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+	/* Update L4 checksum when jumbo enable/disable on port.
+	 * Only port 0 supports hardware checksum offload due to
+	 * the Tx FIFO size limitation.
+	 * Also, don't set NETIF_F_HW_CSUM because L3_offset in TX descriptor
+	 * has 7 bits, so the maximum L3 offset is 128.
+	 */
+	if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
+		port->dev->features &= ~csums;
+		port->dev->hw_features &= ~csums;
+	} else {
+		port->dev->features |= csums;
+		port->dev->hw_features |= csums;
+	}
+}
+
 static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	enum mvpp2_bm_pool_log_num new_long_pool;
 	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
+	if (port->priv->percpu_pools)
+		goto out_set;
+
 	/* If port MTU is higher than 1518B:
 	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
 	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
@@ -830,17 +997,10 @@
 		/* Add port to new short & long pool */
 		mvpp2_swf_bm_pool_init(port);
 
-		/* Update L4 checksum when jumbo enable/disable on port */
-		if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
-			dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-			dev->hw_features &= ~(NETIF_F_IP_CSUM |
-					      NETIF_F_IPV6_CSUM);
-		} else {
-			dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-			dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-		}
+		mvpp2_set_hw_csum(port, new_long_pool);
 	}
 
+out_set:
 	dev->mtu = mtu;
 	dev->wanted_features = dev->features;
 
@@ -886,7 +1046,7 @@
 		    MVPP2_ISR_DISABLE_INTERRUPT(qvec->sw_thread_mask));
 }
 
-/* Mask the current CPU's Rx/Tx interrupts
+/* Mask the current thread's Rx/Tx interrupts
  * Called by on_each_cpu(), guaranteed to run with migration disabled,
  * using smp_processor_id() is OK.
  */
@@ -894,11 +1054,16 @@
 {
 	struct mvpp2_port *port = arg;
 
-	mvpp2_percpu_write(port->priv, smp_processor_id(),
+	/* If the thread isn't used, don't do anything */
+	if (smp_processor_id() > port->priv->nthreads)
+		return;
+
+	mvpp2_thread_write(port->priv,
+			   mvpp2_cpu_to_thread(port->priv, smp_processor_id()),
 			   MVPP2_ISR_RX_TX_MASK_REG(port->id), 0);
 }
 
-/* Unmask the current CPU's Rx/Tx interrupts.
+/* Unmask the current thread's Rx/Tx interrupts.
  * Called by on_each_cpu(), guaranteed to run with migration disabled,
  * using smp_processor_id() is OK.
  */
@@ -907,12 +1072,17 @@
 	struct mvpp2_port *port = arg;
 	u32 val;
 
+	/* If the thread isn't used, don't do anything */
+	if (smp_processor_id() > port->priv->nthreads)
+		return;
+
 	val = MVPP2_CAUSE_MISC_SUM_MASK |
-		MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+		MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK(port->priv->hw_version);
 	if (port->has_tx_irqs)
 		val |= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
 
-	mvpp2_percpu_write(port->priv, smp_processor_id(),
+	mvpp2_thread_write(port->priv,
+			   mvpp2_cpu_to_thread(port->priv, smp_processor_id()),
 			   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
 }
 
@@ -928,7 +1098,7 @@
 	if (mask)
 		val = 0;
 	else
-		val = MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+		val = MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK(MVPP22);
 
 	for (i = 0; i < port->nqvecs; i++) {
 		struct mvpp2_queue_vector *v = port->qvecs + i;
@@ -936,12 +1106,17 @@
 		if (v->type != MVPP2_QUEUE_VECTOR_SHARED)
 			continue;
 
-		mvpp2_percpu_write(port->priv, v->sw_thread_id,
+		mvpp2_thread_write(port->priv, v->sw_thread_id,
 				   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
 	}
 }
 
 /* Port configuration routines */
+static bool mvpp2_is_xlg(phy_interface_t interface)
+{
+	return interface == PHY_INTERFACE_MODE_10GKR ||
+	       interface == PHY_INTERFACE_MODE_XAUI;
+}
 
 static void mvpp22_gop_init_rgmii(struct mvpp2_port *port)
 {
@@ -987,27 +1162,20 @@
 	void __iomem *xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
 	u32 val;
 
-	/* XPCS */
 	val = readl(xpcs + MVPP22_XPCS_CFG0);
 	val &= ~(MVPP22_XPCS_CFG0_PCS_MODE(0x3) |
 		 MVPP22_XPCS_CFG0_ACTIVE_LANE(0x3));
 	val |= MVPP22_XPCS_CFG0_ACTIVE_LANE(2);
 	writel(val, xpcs + MVPP22_XPCS_CFG0);
 
-	/* MPCS */
 	val = readl(mpcs + MVPP22_MPCS_CTRL);
 	val &= ~MVPP22_MPCS_CTRL_FWD_ERR_CONN;
 	writel(val, mpcs + MVPP22_MPCS_CTRL);
 
 	val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
-	val &= ~(MVPP22_MPCS_CLK_RESET_DIV_RATIO(0x7) | MAC_CLK_RESET_MAC |
-		 MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX);
+	val &= ~MVPP22_MPCS_CLK_RESET_DIV_RATIO(0x7);
 	val |= MVPP22_MPCS_CLK_RESET_DIV_RATIO(1);
 	writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
-
-	val &= ~MVPP22_MPCS_CLK_RESET_DIV_SET;
-	val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX;
-	writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
 }
 
 static int mvpp22_gop_init(struct mvpp2_port *port)
@@ -1067,9 +1235,8 @@
 	u32 val;
 
 	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
-	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
+	    phy_interface_mode_is_8023z(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
 		/* Enable the GMAC link status irq for this port */
 		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
 		val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
@@ -1079,7 +1246,7 @@
 	if (port->gop_id == 0) {
 		/* Enable the XLG/GIG irqs for this port */
 		val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
-		if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+		if (mvpp2_is_xlg(port->phy_interface))
 			val |= MVPP22_XLG_EXT_INT_MASK_XLG;
 		else
 			val |= MVPP22_XLG_EXT_INT_MASK_GIG;
@@ -1099,9 +1266,8 @@
 	}
 
 	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
-	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
+	    phy_interface_mode_is_8023z(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
 		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
 		val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
 		writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
@@ -1112,10 +1278,10 @@
 {
 	u32 val;
 
-	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-	    port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
-	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
+	if (port->phylink ||
+	    phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    phy_interface_mode_is_8023z(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
 		val = readl(port->base + MVPP22_GMAC_INT_MASK);
 		val |= MVPP22_GMAC_INT_MASK_LINK_STAT;
 		writel(val, port->base + MVPP22_GMAC_INT_MASK);
@@ -1142,28 +1308,13 @@
  */
 static int mvpp22_comphy_init(struct mvpp2_port *port)
 {
-	enum phy_mode mode;
 	int ret;
 
 	if (!port->comphy)
 		return 0;
 
-	switch (port->phy_interface) {
-	case PHY_INTERFACE_MODE_SGMII:
-	case PHY_INTERFACE_MODE_1000BASEX:
-		mode = PHY_MODE_SGMII;
-		break;
-	case PHY_INTERFACE_MODE_2500BASEX:
-		mode = PHY_MODE_2500SGMII;
-		break;
-	case PHY_INTERFACE_MODE_10GKR:
-		mode = PHY_MODE_10GKR;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = phy_set_mode(port->comphy, mode);
+	ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET,
+			       port->phy_interface);
 	if (ret)
 		return ret;
 
@@ -1175,12 +1326,9 @@
 	u32 val;
 
 	/* Only GOP port 0 has an XLG MAC */
-	if (port->gop_id == 0 &&
-	    (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-	     port->phy_interface == PHY_INTERFACE_MODE_10GKR)) {
+	if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
 		val = readl(port->base + MVPP22_XLG_CTRL0_REG);
-		val |= MVPP22_XLG_CTRL0_PORT_EN |
-		       MVPP22_XLG_CTRL0_MAC_RESET_DIS;
+		val |= MVPP22_XLG_CTRL0_PORT_EN;
 		val &= ~MVPP22_XLG_CTRL0_MIB_CNT_DIS;
 		writel(val, port->base + MVPP22_XLG_CTRL0_REG);
 	} else {
@@ -1196,21 +1344,15 @@
 	u32 val;
 
 	/* Only GOP port 0 has an XLG MAC */
-	if (port->gop_id == 0 &&
-	    (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-	     port->phy_interface == PHY_INTERFACE_MODE_10GKR)) {
+	if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
 		val = readl(port->base + MVPP22_XLG_CTRL0_REG);
 		val &= ~MVPP22_XLG_CTRL0_PORT_EN;
 		writel(val, port->base + MVPP22_XLG_CTRL0_REG);
-
-		/* Disable & reset should be done separately */
-		val &= ~MVPP22_XLG_CTRL0_MAC_RESET_DIS;
-		writel(val, port->base + MVPP22_XLG_CTRL0_REG);
-	} else {
-		val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
-		val &= ~(MVPP2_GMAC_PORT_EN_MASK);
-		writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
 	}
+
+	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	val &= ~(MVPP2_GMAC_PORT_EN_MASK);
+	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
 }
 
 /* Set IEEE 802.3x Flow Control Xon Packet Transmission Mode */
@@ -1236,9 +1378,8 @@
 	else
 		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
 
-	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
-	    port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    port->phy_interface == PHY_INTERFACE_MODE_2500BASEX)
+	if (phy_interface_mode_is_8023z(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII)
 		val |= MVPP2_GMAC_PCS_LB_EN_MASK;
 	else
 		val &= ~MVPP2_GMAC_PCS_LB_EN_MASK;
@@ -1264,6 +1405,17 @@
 	return val;
 }
 
+/* Some counters are accessed indirectly by first writing an index to
+ * MVPP2_CTRS_IDX. The index can represent various resources depending on the
+ * register we access, it can be a hit counter for some classification tables,
+ * a counter specific to a rxq, a txq or a buffer pool.
+ */
+static u32 mvpp2_read_index(struct mvpp2 *priv, u32 index, u32 reg)
+{
+	mvpp2_write(priv, MVPP2_CTRS_IDX, index);
+	return mvpp2_read(priv, reg);
+}
+
 /* Due to the fact that software statistics and hardware statistics are, by
  * design, incremented at different moments in the chain of packet processing,
  * it is very likely that incoming packets could have been dropped after being
@@ -1273,7 +1425,7 @@
  * Hence, statistics gathered from userspace with ifconfig (software) and
  * ethtool (hardware) cannot be compared.
  */
-static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = {
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_mib_regs[] = {
 	{ MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true },
 	{ MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
 	{ MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
@@ -1303,16 +1455,103 @@
 	{ MVPP2_MIB_LATE_COLLISION, "late_collision" },
 };
 
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_port_regs[] = {
+	{ MVPP2_OVERRUN_ETH_DROP, "rx_fifo_or_parser_overrun_drops" },
+	{ MVPP2_CLS_ETH_DROP, "rx_classifier_drops" },
+};
+
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_txq_regs[] = {
+	{ MVPP2_TX_DESC_ENQ_CTR, "txq_%d_desc_enqueue" },
+	{ MVPP2_TX_DESC_ENQ_TO_DDR_CTR, "txq_%d_desc_enqueue_to_ddr" },
+	{ MVPP2_TX_BUFF_ENQ_TO_DDR_CTR, "txq_%d_buff_euqueue_to_ddr" },
+	{ MVPP2_TX_DESC_ENQ_HW_FWD_CTR, "txq_%d_desc_hardware_forwarded" },
+	{ MVPP2_TX_PKTS_DEQ_CTR, "txq_%d_packets_dequeued" },
+	{ MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR, "txq_%d_queue_full_drops" },
+	{ MVPP2_TX_PKTS_EARLY_DROP_CTR, "txq_%d_packets_early_drops" },
+	{ MVPP2_TX_PKTS_BM_DROP_CTR, "txq_%d_packets_bm_drops" },
+	{ MVPP2_TX_PKTS_BM_MC_DROP_CTR, "txq_%d_packets_rep_bm_drops" },
+};
+
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_rxq_regs[] = {
+	{ MVPP2_RX_DESC_ENQ_CTR, "rxq_%d_desc_enqueue" },
+	{ MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR, "rxq_%d_queue_full_drops" },
+	{ MVPP2_RX_PKTS_EARLY_DROP_CTR, "rxq_%d_packets_early_drops" },
+	{ MVPP2_RX_PKTS_BM_DROP_CTR, "rxq_%d_packets_bm_drops" },
+};
+
+#define MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs)	(ARRAY_SIZE(mvpp2_ethtool_mib_regs) + \
+						 ARRAY_SIZE(mvpp2_ethtool_port_regs) + \
+						 (ARRAY_SIZE(mvpp2_ethtool_txq_regs) * (ntxqs)) + \
+						 (ARRAY_SIZE(mvpp2_ethtool_rxq_regs) * (nrxqs)))
+
 static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
 				      u8 *data)
 {
-	if (sset == ETH_SS_STATS) {
-		int i;
+	struct mvpp2_port *port = netdev_priv(netdev);
+	int i, q;
 
-		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-			memcpy(data + i * ETH_GSTRING_LEN,
-			       &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++) {
+		strscpy(data, mvpp2_ethtool_mib_regs[i].string,
+			ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
 	}
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++) {
+		strscpy(data, mvpp2_ethtool_port_regs[i].string,
+			ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (q = 0; q < port->ntxqs; q++) {
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) {
+			snprintf(data, ETH_GSTRING_LEN,
+				 mvpp2_ethtool_txq_regs[i].string, q);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+
+	for (q = 0; q < port->nrxqs; q++) {
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) {
+			snprintf(data, ETH_GSTRING_LEN,
+				 mvpp2_ethtool_rxq_regs[i].string,
+				 q);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void mvpp2_read_stats(struct mvpp2_port *port)
+{
+	u64 *pstats;
+	int i, q;
+
+	pstats = port->ethtool_stats;
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++)
+		*pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_mib_regs[i]);
+
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++)
+		*pstats++ += mvpp2_read(port->priv,
+					mvpp2_ethtool_port_regs[i].offset +
+					4 * port->id);
+
+	for (q = 0; q < port->ntxqs; q++)
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++)
+			*pstats++ += mvpp2_read_index(port->priv,
+						      MVPP22_CTRS_TX_CTR(port->id, i),
+						      mvpp2_ethtool_txq_regs[i].offset);
+
+	/* Rxqs are numbered from 0 from the user standpoint, but not from the
+	 * driver's. We need to add the  port->first_rxq offset.
+	 */
+	for (q = 0; q < port->nrxqs; q++)
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++)
+			*pstats++ += mvpp2_read_index(port->priv,
+						      port->first_rxq + i,
+						      mvpp2_ethtool_rxq_regs[i].offset);
 }
 
 static void mvpp2_gather_hw_statistics(struct work_struct *work)
@@ -1320,14 +1559,10 @@
 	struct delayed_work *del_work = to_delayed_work(work);
 	struct mvpp2_port *port = container_of(del_work, struct mvpp2_port,
 					       stats_work);
-	u64 *pstats;
-	int i;
 
 	mutex_lock(&port->gather_stats_lock);
 
-	pstats = port->ethtool_stats;
-	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-		*pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
+	mvpp2_read_stats(port);
 
 	/* No need to read again the counters right after this function if it
 	 * was called asynchronously by the user (ie. use of ethtool).
@@ -1351,34 +1586,84 @@
 
 	mutex_lock(&port->gather_stats_lock);
 	memcpy(data, port->ethtool_stats,
-	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));
+	       sizeof(u64) * MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs));
 	mutex_unlock(&port->gather_stats_lock);
 }
 
 static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
 {
+	struct mvpp2_port *port = netdev_priv(dev);
+
 	if (sset == ETH_SS_STATS)
-		return ARRAY_SIZE(mvpp2_ethtool_regs);
+		return MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs);
 
 	return -EOPNOTSUPP;
 }
 
-static void mvpp2_port_reset(struct mvpp2_port *port)
+static void mvpp2_mac_reset_assert(struct mvpp2_port *port)
 {
 	u32 val;
-	unsigned int i;
 
-	/* Read the GOP statistics to reset the hardware counters */
-	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-		mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
-
-	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
-		    ~MVPP2_GMAC_PORT_RESET_MASK;
+	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) |
+	      MVPP2_GMAC_PORT_RESET_MASK;
 	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
 
-	while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
-	       MVPP2_GMAC_PORT_RESET_MASK)
-		continue;
+	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
+		val = readl(port->base + MVPP22_XLG_CTRL0_REG) &
+		      ~MVPP22_XLG_CTRL0_MAC_RESET_DIS;
+		writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+	}
+}
+
+static void mvpp22_pcs_reset_assert(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	void __iomem *mpcs, *xpcs;
+	u32 val;
+
+	if (port->priv->hw_version != MVPP22 || port->gop_id != 0)
+		return;
+
+	mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id);
+	xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+
+	val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
+	val &= ~(MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX);
+	val |= MVPP22_MPCS_CLK_RESET_DIV_SET;
+	writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+
+	val = readl(xpcs + MVPP22_XPCS_CFG0);
+	writel(val & ~MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0);
+}
+
+static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	void __iomem *mpcs, *xpcs;
+	u32 val;
+
+	if (port->priv->hw_version != MVPP22 || port->gop_id != 0)
+		return;
+
+	mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id);
+	xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+
+	switch (port->phy_interface) {
+	case PHY_INTERFACE_MODE_10GKR:
+		val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
+		val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX |
+		       MAC_CLK_RESET_SD_TX;
+		val &= ~MVPP22_MPCS_CLK_RESET_DIV_SET;
+		writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+		break;
+	case PHY_INTERFACE_MODE_XAUI:
+	case PHY_INTERFACE_MODE_RXAUI:
+		val = readl(xpcs + MVPP22_XPCS_CFG0);
+		writel(val | MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0);
+		break;
+	default:
+		break;
+	}
 }
 
 /* Change maximum receive size of the port */
@@ -1408,7 +1693,7 @@
 /* Set defaults to the MVPP2 port */
 static void mvpp2_defaults_set(struct mvpp2_port *port)
 {
-	int tx_port_num, val, queue, ptxq, lrxq;
+	int tx_port_num, val, queue, lrxq;
 
 	if (port->priv->hw_version == MVPP21) {
 		/* Update TX FIFO MIN Threshold */
@@ -1425,12 +1710,13 @@
 		    tx_port_num);
 	mvpp2_write(port->priv, MVPP2_TXP_SCHED_CMD_1_REG, 0);
 
+	/* Set TXQ scheduling to Round-Robin */
+	mvpp2_write(port->priv, MVPP2_TXP_SCHED_FIXED_PRIO_REG, 0);
+
 	/* Close bandwidth for all queues */
-	for (queue = 0; queue < MVPP2_MAX_TXQ; queue++) {
-		ptxq = mvpp2_txq_phys(port->id, queue);
+	for (queue = 0; queue < MVPP2_MAX_TXQ; queue++)
 		mvpp2_write(port->priv,
-			    MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(ptxq), 0);
-	}
+			    MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(queue), 0);
 
 	/* Set refill period to 1 usec, refill tokens
 	 * and bucket size to maximum
@@ -1624,7 +1910,8 @@
 static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
 {
 	/* aggregated access - relevant TXQ number is written in TX desc */
-	mvpp2_percpu_write(port->priv, smp_processor_id(),
+	mvpp2_thread_write(port->priv,
+			   mvpp2_cpu_to_thread(port->priv, smp_processor_id()),
 			   MVPP2_AGGR_TXQ_UPDATE_REG, pending);
 }
 
@@ -1634,14 +1921,15 @@
  * Called only from mvpp2_tx(), so migration is disabled, using
  * smp_processor_id() is OK.
  */
-static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
+static int mvpp2_aggr_desc_num_check(struct mvpp2_port *port,
 				     struct mvpp2_tx_queue *aggr_txq, int num)
 {
 	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
 		/* Update number of occupied aggregated Tx descriptors */
-		int cpu = smp_processor_id();
-		u32 val = mvpp2_read_relaxed(priv,
-					     MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+		unsigned int thread =
+			mvpp2_cpu_to_thread(port->priv, smp_processor_id());
+		u32 val = mvpp2_read_relaxed(port->priv,
+					     MVPP2_AGGR_TXQ_STATUS_REG(thread));
 
 		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
 
@@ -1657,16 +1945,17 @@
  * only by mvpp2_tx(), so migration is disabled, using
  * smp_processor_id() is OK.
  */
-static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
+static int mvpp2_txq_alloc_reserved_desc(struct mvpp2_port *port,
 					 struct mvpp2_tx_queue *txq, int num)
 {
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, smp_processor_id());
+	struct mvpp2 *priv = port->priv;
 	u32 val;
-	int cpu = smp_processor_id();
 
 	val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-	mvpp2_percpu_write_relaxed(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
+	mvpp2_thread_write_relaxed(priv, thread, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-	val = mvpp2_percpu_read_relaxed(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
+	val = mvpp2_thread_read_relaxed(priv, thread, MVPP2_TXQ_RSVD_RSLT_REG);
 
 	return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -1674,12 +1963,13 @@
 /* Check if there are enough reserved descriptors for transmission.
  * If not, request chunk of reserved descriptors and check again.
  */
-static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2 *priv,
+static int mvpp2_txq_reserved_desc_num_proc(struct mvpp2_port *port,
 					    struct mvpp2_tx_queue *txq,
 					    struct mvpp2_txq_pcpu *txq_pcpu,
 					    int num)
 {
-	int req, cpu, desc_count;
+	int req, desc_count;
+	unsigned int thread;
 
 	if (txq_pcpu->reserved_num >= num)
 		return 0;
@@ -1690,10 +1980,10 @@
 
 	desc_count = 0;
 	/* Compute total of used descriptors */
-	for_each_present_cpu(cpu) {
+	for (thread = 0; thread < port->priv->nthreads; thread++) {
 		struct mvpp2_txq_pcpu *txq_pcpu_aux;
 
-		txq_pcpu_aux = per_cpu_ptr(txq->pcpu, cpu);
+		txq_pcpu_aux = per_cpu_ptr(txq->pcpu, thread);
 		desc_count += txq_pcpu_aux->count;
 		desc_count += txq_pcpu_aux->reserved_num;
 	}
@@ -1702,10 +1992,10 @@
 	desc_count += req;
 
 	if (desc_count >
-	   (txq->size - (num_present_cpus() * MVPP2_CPU_DESC_CHUNK)))
+	   (txq->size - (MVPP2_MAX_THREADS * MVPP2_CPU_DESC_CHUNK)))
 		return -ENOMEM;
 
-	txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(priv, txq, req);
+	txq_pcpu->reserved_num += mvpp2_txq_alloc_reserved_desc(port, txq, req);
 
 	/* OK, the descriptor could have been updated: check again. */
 	if (txq_pcpu->reserved_num < num)
@@ -1759,7 +2049,7 @@
 
 /* Get number of sent descriptors and decrement counter.
  * The number of sent descriptors is returned.
- * Per-CPU access
+ * Per-thread access
  *
  * Called only from mvpp2_txq_done(), called from mvpp2_tx()
  * (migration disabled) and from the TX completion tasklet (migration
@@ -1771,7 +2061,8 @@
 	u32 val;
 
 	/* Reading status reg resets transmitted descriptor counter */
-	val = mvpp2_percpu_read_relaxed(port->priv, smp_processor_id(),
+	val = mvpp2_thread_read_relaxed(port->priv,
+					mvpp2_cpu_to_thread(port->priv, smp_processor_id()),
 					MVPP2_TXQ_SENT_REG(txq->id));
 
 	return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
@@ -1786,10 +2077,15 @@
 	struct mvpp2_port *port = arg;
 	int queue;
 
+	/* If the thread isn't used, don't do anything */
+	if (smp_processor_id() > port->priv->nthreads)
+		return;
+
 	for (queue = 0; queue < port->ntxqs; queue++) {
 		int id = port->txqs[queue]->id;
 
-		mvpp2_percpu_read(port->priv, smp_processor_id(),
+		mvpp2_thread_read(port->priv,
+				  mvpp2_cpu_to_thread(port->priv, smp_processor_id()),
 				  MVPP2_TXQ_SENT_REG(id));
 	}
 }
@@ -1849,13 +2145,13 @@
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 				   struct mvpp2_rx_queue *rxq)
 {
-	int cpu = get_cpu();
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
 
 	if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
 		rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
 
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_NUM_REG, rxq->id);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_THRESH_REG,
 			   rxq->pkts_coal);
 
 	put_cpu();
@@ -1865,15 +2161,15 @@
 static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port,
 				   struct mvpp2_tx_queue *txq)
 {
-	int cpu = get_cpu();
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
 	u32 val;
 
 	if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK)
 		txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK;
 
 	val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_THRESH_REG, val);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val);
 
 	put_cpu();
 }
@@ -1974,7 +2270,7 @@
 	struct netdev_queue *nq = netdev_get_tx_queue(port->dev, txq->log_id);
 	int tx_done;
 
-	if (txq_pcpu->cpu != smp_processor_id())
+	if (txq_pcpu->thread != mvpp2_cpu_to_thread(port->priv, smp_processor_id()))
 		netdev_err(port->dev, "wrong cpu on the end of Tx processing\n");
 
 	tx_done = mvpp2_txq_sent_desc_proc(port, txq);
@@ -1990,7 +2286,7 @@
 }
 
 static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
-				  int cpu)
+				  unsigned int thread)
 {
 	struct mvpp2_tx_queue *txq;
 	struct mvpp2_txq_pcpu *txq_pcpu;
@@ -2001,7 +2297,7 @@
 		if (!txq)
 			break;
 
-		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+		txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 
 		if (txq_pcpu->count) {
 			mvpp2_txq_done(port, txq, txq_pcpu);
@@ -2017,15 +2313,15 @@
 
 /* Allocate and initialize descriptors for aggr TXQ */
 static int mvpp2_aggr_txq_init(struct platform_device *pdev,
-			       struct mvpp2_tx_queue *aggr_txq, int cpu,
-			       struct mvpp2 *priv)
+			       struct mvpp2_tx_queue *aggr_txq,
+			       unsigned int thread, struct mvpp2 *priv)
 {
 	u32 txq_dma;
 
 	/* Allocate memory for TX descriptors */
-	aggr_txq->descs = dma_zalloc_coherent(&pdev->dev,
-				MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
-				&aggr_txq->descs_dma, GFP_KERNEL);
+	aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
+					     MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+					     &aggr_txq->descs_dma, GFP_KERNEL);
 	if (!aggr_txq->descs)
 		return -ENOMEM;
 
@@ -2033,7 +2329,7 @@
 
 	/* Aggr TXQ no reset WA */
 	aggr_txq->next_desc_to_proc = mvpp2_read(priv,
-						 MVPP2_AGGR_TXQ_INDEX_REG(cpu));
+						 MVPP2_AGGR_TXQ_INDEX_REG(thread));
 
 	/* Set Tx descriptors queue starting address indirect
 	 * access
@@ -2044,8 +2340,8 @@
 		txq_dma = aggr_txq->descs_dma >>
 			MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
 
-	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
-	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu),
+	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(thread), txq_dma);
+	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(thread),
 		    MVPP2_AGGR_TXQ_SIZE);
 
 	return 0;
@@ -2056,8 +2352,8 @@
 			  struct mvpp2_rx_queue *rxq)
 
 {
+	unsigned int thread;
 	u32 rxq_dma;
-	int cpu;
 
 	rxq->size = port->rx_ring_size;
 
@@ -2074,15 +2370,15 @@
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
 	/* Set Rx descriptors queue starting address - indirect access */
-	cpu = get_cpu();
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
+	thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_NUM_REG, rxq->id);
 	if (port->priv->hw_version == MVPP21)
 		rxq_dma = rxq->descs_dma;
 	else
 		rxq_dma = rxq->descs_dma >> MVPP22_DESC_ADDR_OFFS;
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_INDEX_REG, 0);
 	put_cpu();
 
 	/* Set Offset */
@@ -2127,7 +2423,7 @@
 static void mvpp2_rxq_deinit(struct mvpp2_port *port,
 			     struct mvpp2_rx_queue *rxq)
 {
-	int cpu;
+	unsigned int thread;
 
 	mvpp2_rxq_drop_pkts(port, rxq);
 
@@ -2146,10 +2442,10 @@
 	 * free descriptor number
 	 */
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-	cpu = get_cpu();
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
+	thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_NUM_REG, rxq->id);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_DESC_ADDR_REG, 0);
+	mvpp2_thread_write(port->priv, thread, MVPP2_RXQ_DESC_SIZE_REG, 0);
 	put_cpu();
 }
 
@@ -2158,7 +2454,8 @@
 			  struct mvpp2_tx_queue *txq)
 {
 	u32 val;
-	int cpu, desc, desc_per_txq, tx_port_num;
+	unsigned int thread;
+	int desc, desc_per_txq, tx_port_num;
 	struct mvpp2_txq_pcpu *txq_pcpu;
 
 	txq->size = port->tx_ring_size;
@@ -2173,18 +2470,18 @@
 	txq->last_desc = txq->size - 1;
 
 	/* Set Tx descriptors queue starting address - indirect access */
-	cpu = get_cpu();
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
+	thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_DESC_ADDR_REG,
 			   txq->descs_dma);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG,
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_DESC_SIZE_REG,
 			   txq->size & MVPP2_TXQ_DESC_SIZE_MASK);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_INDEX_REG, 0);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_RSVD_CLR_REG,
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_INDEX_REG, 0);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_RSVD_CLR_REG,
 			   txq->id << MVPP2_TXQ_RSVD_CLR_OFFSET);
-	val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PENDING_REG);
+	val = mvpp2_thread_read(port->priv, thread, MVPP2_TXQ_PENDING_REG);
 	val &= ~MVPP2_TXQ_PENDING_MASK;
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PENDING_REG, val);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_PENDING_REG, val);
 
 	/* Calculate base address in prefetch buffer. We reserve 16 descriptors
 	 * for each existing TXQ.
@@ -2195,7 +2492,7 @@
 	desc = (port->id * MVPP2_MAX_TXQ * desc_per_txq) +
 	       (txq->log_id * desc_per_txq);
 
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_PREF_BUF_REG,
 			   MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
 			   MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
 	put_cpu();
@@ -2214,8 +2511,8 @@
 	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq->log_id),
 		    val);
 
-	for_each_present_cpu(cpu) {
-		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+	for (thread = 0; thread < port->priv->nthreads; thread++) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 		txq_pcpu->size = txq->size;
 		txq_pcpu->buffs = kmalloc_array(txq_pcpu->size,
 						sizeof(*txq_pcpu->buffs),
@@ -2249,10 +2546,10 @@
 			     struct mvpp2_tx_queue *txq)
 {
 	struct mvpp2_txq_pcpu *txq_pcpu;
-	int cpu;
+	unsigned int thread;
 
-	for_each_present_cpu(cpu) {
-		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+	for (thread = 0; thread < port->priv->nthreads; thread++) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 		kfree(txq_pcpu->buffs);
 
 		if (txq_pcpu->tso_headers)
@@ -2275,13 +2572,13 @@
 	txq->descs_dma         = 0;
 
 	/* Set minimum bandwidth for disabled TXQs */
-	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
+	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->log_id), 0);
 
 	/* Set Tx descriptors queue starting address and size */
-	cpu = get_cpu();
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
+	thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_DESC_ADDR_REG, 0);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_DESC_SIZE_REG, 0);
 	put_cpu();
 }
 
@@ -2289,14 +2586,14 @@
 static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 {
 	struct mvpp2_txq_pcpu *txq_pcpu;
-	int delay, pending, cpu;
+	int delay, pending;
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
 	u32 val;
 
-	cpu = get_cpu();
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
-	val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
+	val = mvpp2_thread_read(port->priv, thread, MVPP2_TXQ_PREF_BUF_REG);
 	val |= MVPP2_TXQ_DRAIN_EN_MASK;
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_PREF_BUF_REG, val);
 
 	/* The napi queue has been stopped so wait for all packets
 	 * to be transmitted.
@@ -2312,17 +2609,17 @@
 		mdelay(1);
 		delay++;
 
-		pending = mvpp2_percpu_read(port->priv, cpu,
+		pending = mvpp2_thread_read(port->priv, thread,
 					    MVPP2_TXQ_PENDING_REG);
 		pending &= MVPP2_TXQ_PENDING_MASK;
 	} while (pending);
 
 	val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
-	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_PREF_BUF_REG, val);
 	put_cpu();
 
-	for_each_present_cpu(cpu) {
-		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
+	for (thread = 0; thread < port->priv->nthreads; thread++) {
+		txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 
 		/* Release all packets */
 		mvpp2_txq_bufs_free(port, txq, txq_pcpu, txq_pcpu->count);
@@ -2389,13 +2686,17 @@
 static int mvpp2_setup_txqs(struct mvpp2_port *port)
 {
 	struct mvpp2_tx_queue *txq;
-	int queue, err;
+	int queue, err, cpu;
 
 	for (queue = 0; queue < port->ntxqs; queue++) {
 		txq = port->txqs[queue];
 		err = mvpp2_txq_init(port, txq);
 		if (err)
 			goto err_cleanup;
+
+		/* Assign this queue to a CPU */
+		cpu = queue % num_present_cpus();
+		netif_set_xps_queue(port->dev, cpumask_of(cpu), queue);
 	}
 
 	if (port->has_tx_irqs) {
@@ -2436,8 +2737,7 @@
 
 	mvpp22_gop_mask_irq(port);
 
-	if (port->gop_id == 0 &&
-	    port->phy_interface == PHY_INTERFACE_MODE_10GKR) {
+	if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
 		val = readl(port->base + MVPP22_XLG_INT_STAT);
 		if (val & MVPP22_XLG_INT_STAT_LINK) {
 			event = true;
@@ -2446,9 +2746,8 @@
 				link = true;
 		}
 	} else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
-		   port->phy_interface == PHY_INTERFACE_MODE_SGMII ||
-		   port->phy_interface == PHY_INTERFACE_MODE_1000BASEX ||
-		   port->phy_interface == PHY_INTERFACE_MODE_2500BASEX) {
+		   phy_interface_mode_is_8023z(port->phy_interface) ||
+		   port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
 		val = readl(port->base + MVPP22_GMAC_INT_STAT);
 		if (val & MVPP22_GMAC_INT_STAT_LINK) {
 			event = true;
@@ -2487,46 +2786,35 @@
 	return IRQ_HANDLED;
 }
 
-static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
+static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer)
 {
-	ktime_t interval;
-
-	if (!port_pcpu->timer_scheduled) {
-		port_pcpu->timer_scheduled = true;
-		interval = MVPP2_TXDONE_HRTIMER_PERIOD_NS;
-		hrtimer_start(&port_pcpu->tx_done_timer, interval,
-			      HRTIMER_MODE_REL_PINNED);
-	}
-}
-
-static void mvpp2_tx_proc_cb(unsigned long data)
-{
-	struct net_device *dev = (struct net_device *)data;
-	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+	struct net_device *dev;
+	struct mvpp2_port *port;
+	struct mvpp2_port_pcpu *port_pcpu;
 	unsigned int tx_todo, cause;
 
+	port_pcpu = container_of(timer, struct mvpp2_port_pcpu, tx_done_timer);
+	dev = port_pcpu->dev;
+
 	if (!netif_running(dev))
-		return;
+		return HRTIMER_NORESTART;
+
 	port_pcpu->timer_scheduled = false;
+	port = netdev_priv(dev);
 
 	/* Process all the Tx queues */
 	cause = (1 << port->ntxqs) - 1;
-	tx_todo = mvpp2_tx_done(port, cause, smp_processor_id());
+	tx_todo = mvpp2_tx_done(port, cause,
+				mvpp2_cpu_to_thread(port->priv, smp_processor_id()));
 
 	/* Set the timer in case not all the packets were processed */
-	if (tx_todo)
-		mvpp2_timer_set(port_pcpu);
-}
+	if (tx_todo && !port_pcpu->timer_scheduled) {
+		port_pcpu->timer_scheduled = true;
+		hrtimer_forward_now(&port_pcpu->tx_done_timer,
+				    MVPP2_TXDONE_HRTIMER_PERIOD_NS);
 
-static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer)
-{
-	struct mvpp2_port_pcpu *port_pcpu = container_of(timer,
-							 struct mvpp2_port_pcpu,
-							 tx_done_timer);
-
-	tasklet_schedule(&port_pcpu->tx_done_tasklet);
-
+		return HRTIMER_RESTART;
+	}
 	return HRTIMER_NORESTART;
 }
 
@@ -2729,7 +3017,8 @@
 tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 		  struct mvpp2_tx_desc *desc)
 {
-	struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, smp_processor_id());
+	struct mvpp2_txq_pcpu *txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 
 	dma_addr_t buf_dma_addr =
 		mvpp2_txdesc_dma_addr_get(port, desc);
@@ -2746,21 +3035,23 @@
 				 struct mvpp2_tx_queue *aggr_txq,
 				 struct mvpp2_tx_queue *txq)
 {
-	struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, smp_processor_id());
+	struct mvpp2_txq_pcpu *txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
 	struct mvpp2_tx_desc *tx_desc;
 	int i;
 	dma_addr_t buf_dma_addr;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		void *addr = page_address(frag->page.p) + frag->page_offset;
+		void *addr = skb_frag_address(frag);
 
 		tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
 		mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
-		mvpp2_txdesc_size_set(port, tx_desc, frag->size);
+		mvpp2_txdesc_size_set(port, tx_desc, skb_frag_size(frag));
 
 		buf_dma_addr = dma_map_single(port->dev->dev.parent, addr,
-					      frag->size, DMA_TO_DEVICE);
+					      skb_frag_size(frag),
+					      DMA_TO_DEVICE);
 		if (dma_mapping_error(port->dev->dev.parent, buf_dma_addr)) {
 			mvpp2_txq_desc_put(txq);
 			goto cleanup;
@@ -2865,9 +3156,8 @@
 	int i, len, descs = 0;
 
 	/* Check number of available descriptors */
-	if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
-				      tso_count_descs(skb)) ||
-	    mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+	if (mvpp2_aggr_desc_num_check(port, aggr_txq, tso_count_descs(skb)) ||
+	    mvpp2_txq_reserved_desc_num_proc(port, txq, txq_pcpu,
 					     tso_count_descs(skb)))
 		return 0;
 
@@ -2907,21 +3197,28 @@
 }
 
 /* Main tx processing */
-static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_tx_queue *txq, *aggr_txq;
 	struct mvpp2_txq_pcpu *txq_pcpu;
 	struct mvpp2_tx_desc *tx_desc;
 	dma_addr_t buf_dma_addr;
+	unsigned long flags = 0;
+	unsigned int thread;
 	int frags = 0;
 	u16 txq_id;
 	u32 tx_cmd;
 
+	thread = mvpp2_cpu_to_thread(port->priv, smp_processor_id());
+
 	txq_id = skb_get_queue_mapping(skb);
 	txq = port->txqs[txq_id];
-	txq_pcpu = this_cpu_ptr(txq->pcpu);
-	aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
+	txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
+	aggr_txq = &port->priv->aggr_txqs[thread];
+
+	if (test_bit(thread, &port->priv->lock_map))
+		spin_lock_irqsave(&port->tx_lock[thread], flags);
 
 	if (skb_is_gso(skb)) {
 		frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
@@ -2930,9 +3227,8 @@
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
 	/* Check number of available descriptors */
-	if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq, frags) ||
-	    mvpp2_txq_reserved_desc_num_proc(port->priv, txq,
-					     txq_pcpu, frags)) {
+	if (mvpp2_aggr_desc_num_check(port, aggr_txq, frags) ||
+	    mvpp2_txq_reserved_desc_num_proc(port, txq, txq_pcpu, frags)) {
 		frags = 0;
 		goto out;
 	}
@@ -2974,7 +3270,7 @@
 
 out:
 	if (frags > 0) {
-		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+		struct mvpp2_pcpu_stats *stats = per_cpu_ptr(port->stats, thread);
 		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
 
 		txq_pcpu->reserved_num -= frags;
@@ -3004,11 +3300,19 @@
 	/* Set the timer in case not all frags were processed */
 	if (!port->has_tx_irqs && txq_pcpu->count <= frags &&
 	    txq_pcpu->count > 0) {
-		struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
+		struct mvpp2_port_pcpu *port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
-		mvpp2_timer_set(port_pcpu);
+		if (!port_pcpu->timer_scheduled) {
+			port_pcpu->timer_scheduled = true;
+			hrtimer_start(&port_pcpu->tx_done_timer,
+				      MVPP2_TXDONE_HRTIMER_PERIOD_NS,
+				      HRTIMER_MODE_REL_PINNED_SOFT);
+		}
 	}
 
+	if (test_bit(thread, &port->priv->lock_map))
+		spin_unlock_irqrestore(&port->tx_lock[thread], flags);
+
 	return NETDEV_TX_OK;
 }
 
@@ -3028,7 +3332,7 @@
 	int rx_done = 0;
 	struct mvpp2_port *port = netdev_priv(napi->dev);
 	struct mvpp2_queue_vector *qv;
-	int cpu = smp_processor_id();
+	unsigned int thread = mvpp2_cpu_to_thread(port->priv, smp_processor_id());
 
 	qv = container_of(napi, struct mvpp2_queue_vector, napi);
 
@@ -3042,7 +3346,7 @@
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read_relaxed(port->priv, qv->sw_thread_id,
+	cause_rx_tx = mvpp2_thread_read_relaxed(port->priv, qv->sw_thread_id,
 						MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
 
 	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
@@ -3051,7 +3355,7 @@
 
 		/* Clear the cause register */
 		mvpp2_write(port->priv, MVPP2_ISR_MISC_CAUSE_REG, 0);
-		mvpp2_percpu_write(port->priv, cpu,
+		mvpp2_thread_write(port->priv, thread,
 				   MVPP2_ISR_RX_TX_CAUSE_REG(port->id),
 				   cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
 	}
@@ -3065,7 +3369,8 @@
 	}
 
 	/* Process RX packets */
-	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	cause_rx = cause_rx_tx &
+		   MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK(port->priv->hw_version);
 	cause_rx <<= qv->first_rxq;
 	cause_rx |= qv->pending_cause_rx;
 	while (cause_rx && budget > 0) {
@@ -3102,19 +3407,26 @@
 {
 	u32 ctrl3;
 
+	/* Set the GMAC & XLG MAC in reset */
+	mvpp2_mac_reset_assert(port);
+
+	/* Set the MPCS and XPCS in reset */
+	mvpp22_pcs_reset_assert(port);
+
 	/* comphy reconfiguration */
 	mvpp22_comphy_init(port);
 
 	/* gop reconfiguration */
 	mvpp22_gop_init(port);
 
+	mvpp22_pcs_reset_deassert(port);
+
 	/* Only GOP port 0 has an XLG MAC */
 	if (port->gop_id == 0) {
 		ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG);
 		ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
 
-		if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-		    port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+		if (mvpp2_is_xlg(port->phy_interface))
 			ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
 		else
 			ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
@@ -3122,9 +3434,7 @@
 		writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG);
 	}
 
-	if (port->gop_id == 0 &&
-	    (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-	     port->phy_interface == PHY_INTERFACE_MODE_10GKR))
+	if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface))
 		mvpp2_xlg_max_rx_size_set(port);
 	else
 		mvpp2_gmac_max_rx_size_set(port);
@@ -3140,14 +3450,13 @@
 	for (i = 0; i < port->nqvecs; i++)
 		napi_enable(&port->qvecs[i].napi);
 
-	/* Enable interrupts on all CPUs */
+	/* Enable interrupts on all threads */
 	mvpp2_interrupts_enable(port);
 
 	if (port->priv->hw_version == MVPP22)
 		mvpp22_mode_reconfigure(port);
 
 	if (port->phylink) {
-		netif_carrier_off(port->dev);
 		phylink_start(port->phylink);
 	} else {
 		/* Phylink isn't used as of now for ACPI, so the MAC has to be
@@ -3157,9 +3466,9 @@
 		struct phylink_link_state state = {
 			.interface = port->phy_interface,
 		};
-		mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
-		mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface,
-				  NULL);
+		mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+		mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND,
+				  port->phy_interface, NULL);
 	}
 
 	netif_tx_start_all_queues(port->dev);
@@ -3170,7 +3479,7 @@
 {
 	int i;
 
-	/* Disable interrupts on all CPUs */
+	/* Disable interrupts on all threads */
 	mvpp2_interrupts_disable(port);
 
 	for (i = 0; i < port->nqvecs; i++)
@@ -3243,16 +3552,31 @@
 	for (i = 0; i < port->nqvecs; i++) {
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
-		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
+			qv->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+			if (!qv->mask) {
+				err = -ENOMEM;
+				goto err;
+			}
+
 			irq_set_status_flags(qv->irq, IRQ_NO_BALANCING);
+		}
 
 		err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
 		if (err)
 			goto err;
 
-		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
-			irq_set_affinity_hint(qv->irq,
-					      cpumask_of(qv->sw_thread_id));
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
+			unsigned int cpu;
+
+			for_each_present_cpu(cpu) {
+				if (mvpp2_cpu_to_thread(port->priv, cpu) ==
+				    qv->sw_thread_id)
+					cpumask_set_cpu(cpu, qv->mask);
+			}
+
+			irq_set_affinity_hint(qv->irq, qv->mask);
+		}
 	}
 
 	return 0;
@@ -3261,6 +3585,8 @@
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
 		irq_set_affinity_hint(qv->irq, NULL);
+		kfree(qv->mask);
+		qv->mask = NULL;
 		free_irq(qv->irq, qv);
 	}
 
@@ -3275,6 +3601,8 @@
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
 		irq_set_affinity_hint(qv->irq, NULL);
+		kfree(qv->mask);
+		qv->mask = NULL;
 		irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING);
 		free_irq(qv->irq, qv);
 	}
@@ -3396,11 +3724,11 @@
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_port_pcpu *port_pcpu;
-	int cpu;
+	unsigned int thread;
 
 	mvpp2_stop_dev(port);
 
-	/* Mask interrupts on all CPUs */
+	/* Mask interrupts on all threads */
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
 	mvpp2_shared_interrupt_mask_unmask(port, true);
 
@@ -3411,12 +3739,11 @@
 
 	mvpp2_irqs_deinit(port);
 	if (!port->has_tx_irqs) {
-		for_each_present_cpu(cpu) {
-			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+		for (thread = 0; thread < port->priv->nthreads; thread++) {
+			port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
 			hrtimer_cancel(&port_pcpu->tx_done_timer);
 			port_pcpu->timer_scheduled = false;
-			tasklet_kill(&port_pcpu->tx_done_tasklet);
 		}
 	}
 	mvpp2_cleanup_rxqs(port);
@@ -3424,6 +3751,9 @@
 
 	cancel_delayed_work_sync(&port->stats_work);
 
+	mvpp2_mac_reset_assert(port);
+	mvpp22_pcs_reset_assert(port);
+
 	return 0;
 }
 
@@ -3504,9 +3834,48 @@
 	return err;
 }
 
+/* Shut down all the ports, reconfigure the pools as percpu or shared,
+ * then bring up again all ports.
+ */
+static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
+{
+	int numbufs = MVPP2_BM_POOLS_NUM, i;
+	struct mvpp2_port *port = NULL;
+	bool status[MVPP2_MAX_PORTS];
+
+	for (i = 0; i < priv->port_count; i++) {
+		port = priv->port_list[i];
+		status[i] = netif_running(port->dev);
+		if (status[i])
+			mvpp2_stop(port->dev);
+	}
+
+	/* nrxqs is the same for all ports */
+	if (priv->percpu_pools)
+		numbufs = port->nrxqs * 2;
+
+	for (i = 0; i < numbufs; i++)
+		mvpp2_bm_pool_destroy(port->dev->dev.parent, priv, &priv->bm_pools[i]);
+
+	devm_kfree(port->dev->dev.parent, priv->bm_pools);
+	priv->percpu_pools = percpu;
+	mvpp2_bm_init(port->dev->dev.parent, priv);
+
+	for (i = 0; i < priv->port_count; i++) {
+		port = priv->port_list[i];
+		mvpp2_swf_bm_pool_init(port);
+		if (status[i])
+			mvpp2_open(port->dev);
+	}
+
+	return 0;
+}
+
 static int mvpp2_change_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	bool running = netif_running(dev);
+	struct mvpp2 *priv = port->priv;
 	int err;
 
 	if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
@@ -3515,40 +3884,49 @@
 		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
 	}
 
-	if (!netif_running(dev)) {
-		err = mvpp2_bm_update_mtu(dev, mtu);
-		if (!err) {
-			port->pkt_size =  MVPP2_RX_PKT_SIZE(mtu);
-			return 0;
+	if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
+		if (priv->percpu_pools) {
+			netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
+			mvpp2_bm_switch_buffers(priv, false);
 		}
+	} else {
+		bool jumbo = false;
+		int i;
 
-		/* Reconfigure BM to the original MTU */
-		err = mvpp2_bm_update_mtu(dev, dev->mtu);
-		if (err)
-			goto log_error;
+		for (i = 0; i < priv->port_count; i++)
+			if (priv->port_list[i] != port &&
+			    MVPP2_RX_PKT_SIZE(priv->port_list[i]->dev->mtu) >
+			    MVPP2_BM_LONG_PKT_SIZE) {
+				jumbo = true;
+				break;
+			}
+
+		/* No port is using jumbo frames */
+		if (!jumbo) {
+			dev_info(port->dev->dev.parent,
+				 "all ports have a low MTU, switching to per-cpu buffers");
+			mvpp2_bm_switch_buffers(priv, true);
+		}
 	}
 
-	mvpp2_stop_dev(port);
+	if (running)
+		mvpp2_stop_dev(port);
 
 	err = mvpp2_bm_update_mtu(dev, mtu);
-	if (!err) {
+	if (err) {
+		netdev_err(dev, "failed to change MTU\n");
+		/* Reconfigure BM to the original MTU */
+		mvpp2_bm_update_mtu(dev, dev->mtu);
+	} else {
 		port->pkt_size =  MVPP2_RX_PKT_SIZE(mtu);
-		goto out_start;
 	}
 
-	/* Reconfigure BM to the original MTU */
-	err = mvpp2_bm_update_mtu(dev, dev->mtu);
-	if (err)
-		goto log_error;
+	if (running) {
+		mvpp2_start_dev(port);
+		mvpp2_egress_enable(port);
+		mvpp2_ingress_enable(port);
+	}
 
-out_start:
-	mvpp2_start_dev(port);
-	mvpp2_egress_enable(port);
-	mvpp2_ingress_enable(port);
-
-	return 0;
-log_error:
-	netdev_err(dev, "failed to change MTU\n");
 	return err;
 }
 
@@ -3557,7 +3935,7 @@
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	unsigned int start;
-	int cpu;
+	unsigned int cpu;
 
 	for_each_possible_cpu(cpu) {
 		struct mvpp2_pcpu_stats *cpu_stats;
@@ -3637,9 +4015,9 @@
 
 	if (changed & NETIF_F_RXHASH) {
 		if (features & NETIF_F_RXHASH)
-			mvpp22_rss_enable(port);
+			mvpp22_port_rss_enable(port);
 		else
-			mvpp22_rss_disable(port);
+			mvpp22_port_rss_disable(port);
 	}
 
 	return 0;
@@ -3833,7 +4211,7 @@
 				   struct ethtool_rxnfc *info, u32 *rules)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	int ret = 0;
+	int ret = 0, i, loc = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
@@ -3845,6 +4223,18 @@
 	case ETHTOOL_GRXRINGS:
 		info->data = port->nrxqs;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt = port->n_rfs_rules;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = mvpp2_ethtool_cls_rule_get(port, info);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+			if (port->rfs_rules[i])
+				rules[loc++] = i;
+		}
+		break;
 	default:
 		return -ENOTSUPP;
 	}
@@ -3865,6 +4255,12 @@
 	case ETHTOOL_SRXFH:
 		ret = mvpp2_ethtool_rxfh_set(port, info);
 		break;
+	case ETHTOOL_SRXCLSRLINS:
+		ret = mvpp2_ethtool_cls_rule_ins(port, info);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = mvpp2_ethtool_cls_rule_del(port, info);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3880,24 +4276,25 @@
 				  u8 *hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
 
 	if (indir)
-		memcpy(indir, port->indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+		ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir);
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_CRC32;
 
-	return 0;
+	return ret;
 }
 
 static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 				  const u8 *key, const u8 hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
@@ -3908,15 +4305,58 @@
 	if (key)
 		return -EOPNOTSUPP;
 
-	if (indir) {
-		memcpy(port->indir, indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
-		mvpp22_rss_fill_table(port, port->id);
-	}
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir);
 
-	return 0;
+	return ret;
 }
 
+static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
+					  u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_CRC32;
+
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir);
+
+	return ret;
+}
+
+static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
+					  const u32 *indir, const u8 *key,
+					  const u8 hfunc, u32 *rss_context,
+					  bool delete)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+		return -EOPNOTSUPP;
+
+	if (key)
+		return -EOPNOTSUPP;
+
+	if (delete)
+		return mvpp22_port_rss_ctx_delete(port, *rss_context);
+
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		ret = mvpp22_port_rss_ctx_create(port, rss_context);
+		if (ret)
+			return ret;
+	}
+
+	return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir);
+}
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -3953,7 +4393,8 @@
 	.get_rxfh_indir_size	= mvpp2_ethtool_get_rxfh_indir_size,
 	.get_rxfh		= mvpp2_ethtool_get_rxfh,
 	.set_rxfh		= mvpp2_ethtool_set_rxfh,
-
+	.get_rxfh_context	= mvpp2_ethtool_get_rxfh_context,
+	.set_rxfh_context	= mvpp2_ethtool_set_rxfh_context,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -3984,12 +4425,18 @@
 static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port,
 					  struct device_node *port_node)
 {
+	struct mvpp2 *priv = port->priv;
 	struct mvpp2_queue_vector *v;
 	int i, ret;
 
-	port->nqvecs = num_possible_cpus();
-	if (queue_mode == MVPP2_QDIST_SINGLE_MODE)
-		port->nqvecs += 1;
+	switch (queue_mode) {
+	case MVPP2_QDIST_SINGLE_MODE:
+		port->nqvecs = priv->nthreads + 1;
+		break;
+	case MVPP2_QDIST_MULTI_MODE:
+		port->nqvecs = priv->nthreads;
+		break;
+	}
 
 	for (i = 0; i < port->nqvecs; i++) {
 		char irqname[16];
@@ -4001,17 +4448,22 @@
 		v->sw_thread_id = i;
 		v->sw_thread_mask = BIT(i);
 
-		snprintf(irqname, sizeof(irqname), "tx-cpu%d", i);
+		if (port->flags & MVPP2_F_DT_COMPAT)
+			snprintf(irqname, sizeof(irqname), "tx-cpu%d", i);
+		else
+			snprintf(irqname, sizeof(irqname), "hif%d", i);
 
 		if (queue_mode == MVPP2_QDIST_MULTI_MODE) {
-			v->first_rxq = i * MVPP2_DEFAULT_RXQ;
-			v->nrxqs = MVPP2_DEFAULT_RXQ;
+			v->first_rxq = i;
+			v->nrxqs = 1;
 		} else if (queue_mode == MVPP2_QDIST_SINGLE_MODE &&
 			   i == (port->nqvecs - 1)) {
 			v->first_rxq = 0;
 			v->nrxqs = port->nrxqs;
 			v->type = MVPP2_QUEUE_VECTOR_SHARED;
-			strncpy(irqname, "rx-shared", sizeof(irqname));
+
+			if (port->flags & MVPP2_F_DT_COMPAT)
+				strncpy(irqname, "rx-shared", sizeof(irqname));
 		}
 
 		if (port_node)
@@ -4088,15 +4540,15 @@
 	struct device *dev = port->dev->dev.parent;
 	struct mvpp2 *priv = port->priv;
 	struct mvpp2_txq_pcpu *txq_pcpu;
-	int queue, cpu, err;
+	unsigned int thread;
+	int queue, err;
 
 	/* Checks for hardware constraints */
 	if (port->first_rxq + port->nrxqs >
 	    MVPP2_MAX_PORTS * priv->max_port_rxqs)
 		return -EINVAL;
 
-	if (port->nrxqs % MVPP2_DEFAULT_RXQ ||
-	    port->nrxqs > priv->max_port_rxqs || port->ntxqs > MVPP2_MAX_TXQ)
+	if (port->nrxqs > priv->max_port_rxqs || port->ntxqs > MVPP2_MAX_TXQ)
 		return -EINVAL;
 
 	/* Disable port */
@@ -4132,9 +4584,9 @@
 		txq->id = queue_phy_id;
 		txq->log_id = queue;
 		txq->done_pkts_coal = MVPP2_TXDONE_COAL_PKTS_THRESH;
-		for_each_present_cpu(cpu) {
-			txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
-			txq_pcpu->cpu = cpu;
+		for (thread = 0; thread < priv->nthreads; thread++) {
+			txq_pcpu = per_cpu_ptr(txq->pcpu, thread);
+			txq_pcpu->thread = thread;
 		}
 
 		port->txqs[queue] = txq;
@@ -4186,7 +4638,7 @@
 	mvpp2_cls_port_config(port);
 
 	if (mvpp22_rss_is_supported())
-		mvpp22_rss_port_init(port);
+		mvpp22_port_rss_init(port);
 
 	/* Provide an initial Rx packet size */
 	port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
@@ -4196,6 +4648,11 @@
 	if (err)
 		goto err_free_percpu;
 
+	/* Clear all port stats */
+	mvpp2_read_stats(port);
+	memset(port->ethtool_stats, 0,
+	       MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs) * sizeof(u64));
+
 	return 0;
 
 err_free_percpu:
@@ -4207,24 +4664,51 @@
 	return err;
 }
 
-/* Checks if the port DT description has the TX interrupts
- * described. On PPv2.1, there are no such interrupts. On PPv2.2,
- * there are available, but we need to keep support for old DTs.
- */
-static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv,
-				   struct device_node *port_node)
+static bool mvpp22_port_has_legacy_tx_irqs(struct device_node *port_node,
+					   unsigned long *flags)
 {
-	char *irqs[5] = { "rx-shared", "tx-cpu0", "tx-cpu1",
-			  "tx-cpu2", "tx-cpu3" };
-	int ret, i;
+	char *irqs[5] = { "rx-shared", "tx-cpu0", "tx-cpu1", "tx-cpu2",
+			  "tx-cpu3" };
+	int i;
+
+	for (i = 0; i < 5; i++)
+		if (of_property_match_string(port_node, "interrupt-names",
+					     irqs[i]) < 0)
+			return false;
+
+	*flags |= MVPP2_F_DT_COMPAT;
+	return true;
+}
+
+/* Checks if the port dt description has the required Tx interrupts:
+ * - PPv2.1: there are no such interrupts.
+ * - PPv2.2:
+ *   - The old DTs have: "rx-shared", "tx-cpuX" with X in [0...3]
+ *   - The new ones have: "hifX" with X in [0..8]
+ *
+ * All those variants are supported to keep the backward compatibility.
+ */
+static bool mvpp2_port_has_irqs(struct mvpp2 *priv,
+				struct device_node *port_node,
+				unsigned long *flags)
+{
+	char name[5];
+	int i;
+
+	/* ACPI */
+	if (!port_node)
+		return true;
 
 	if (priv->hw_version == MVPP21)
 		return false;
 
-	for (i = 0; i < 5; i++) {
-		ret = of_property_match_string(port_node, "interrupt-names",
-					       irqs[i]);
-		if (ret < 0)
+	if (mvpp22_port_has_legacy_tx_irqs(port_node, flags))
+		return true;
+
+	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
+		snprintf(name, 5, "hif%d", i);
+		if (of_property_match_string(port_node, "interrupt-names",
+					     name) < 0)
 			return false;
 	}
 
@@ -4258,11 +4742,12 @@
 	eth_hw_addr_random(dev);
 }
 
-static void mvpp2_phylink_validate(struct net_device *dev,
+static void mvpp2_phylink_validate(struct phylink_config *config,
 				   unsigned long *supported,
 				   struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
 	/* Invalid combinations */
@@ -4276,7 +4761,7 @@
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
-		if (port->gop_id == 0)
+		if (port->priv->hw_version == MVPP22 && port->gop_id == 0)
 			goto empty_set;
 		break;
 	default:
@@ -4292,12 +4777,15 @@
 	case PHY_INTERFACE_MODE_10GKR:
 	case PHY_INTERFACE_MODE_XAUI:
 	case PHY_INTERFACE_MODE_NA:
-		phylink_set(mask, 10000baseCR_Full);
-		phylink_set(mask, 10000baseSR_Full);
-		phylink_set(mask, 10000baseLR_Full);
-		phylink_set(mask, 10000baseLRM_Full);
-		phylink_set(mask, 10000baseER_Full);
-		phylink_set(mask, 10000baseKR_Full);
+		if (port->gop_id == 0) {
+			phylink_set(mask, 10000baseT_Full);
+			phylink_set(mask, 10000baseCR_Full);
+			phylink_set(mask, 10000baseSR_Full);
+			phylink_set(mask, 10000baseLR_Full);
+			phylink_set(mask, 10000baseLRM_Full);
+			phylink_set(mask, 10000baseER_Full);
+			phylink_set(mask, 10000baseKR_Full);
+		}
 		/* Fall-through */
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
@@ -4308,12 +4796,12 @@
 		phylink_set(mask, 10baseT_Full);
 		phylink_set(mask, 100baseT_Half);
 		phylink_set(mask, 100baseT_Full);
-		phylink_set(mask, 10000baseT_Full);
 		/* Fall-through */
 	case PHY_INTERFACE_MODE_1000BASEX:
 	case PHY_INTERFACE_MODE_2500BASEX:
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseX_Full);
+		phylink_set(mask, 2500baseT_Full);
 		phylink_set(mask, 2500baseX_Full);
 		break;
 	default:
@@ -4383,10 +4871,11 @@
 		state->pause |= MLO_PAUSE_TX;
 }
 
-static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+static int mvpp2_phylink_mac_link_state(struct phylink_config *config,
 					struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 
 	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
 		u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4402,134 +4891,203 @@
 	return 1;
 }
 
-static void mvpp2_mac_an_restart(struct net_device *dev)
+static void mvpp2_mac_an_restart(struct phylink_config *config)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
-	u32 val;
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
+	u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 
-	if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
-		return;
-
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	/* The RESTART_AN bit is cleared by the h/w after restarting the AN
-	 * process.
-	 */
-	val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG;
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
+	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
+	       port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 }
 
 static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
 			     const struct phylink_link_state *state)
 {
-	u32 ctrl0, ctrl4;
+	u32 old_ctrl0, ctrl0;
+	u32 old_ctrl4, ctrl4;
 
-	ctrl0 = readl(port->base + MVPP22_XLG_CTRL0_REG);
-	ctrl4 = readl(port->base + MVPP22_XLG_CTRL4_REG);
+	old_ctrl0 = ctrl0 = readl(port->base + MVPP22_XLG_CTRL0_REG);
+	old_ctrl4 = ctrl4 = readl(port->base + MVPP22_XLG_CTRL4_REG);
+
+	ctrl0 |= MVPP22_XLG_CTRL0_MAC_RESET_DIS;
 
 	if (state->pause & MLO_PAUSE_TX)
 		ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+	else
+		ctrl0 &= ~MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+
 	if (state->pause & MLO_PAUSE_RX)
 		ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+	else
+		ctrl0 &= ~MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
 
-	ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
-	ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC |
-		 MVPP22_XLG_CTRL4_EN_IDLE_CHECK;
+	ctrl4 &= ~(MVPP22_XLG_CTRL4_MACMODSELECT_GMAC |
+		   MVPP22_XLG_CTRL4_EN_IDLE_CHECK);
+	ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
 
-	writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG);
-	writel(ctrl4, port->base + MVPP22_XLG_CTRL4_REG);
+	if (old_ctrl0 != ctrl0)
+		writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG);
+	if (old_ctrl4 != ctrl4)
+		writel(ctrl4, port->base + MVPP22_XLG_CTRL4_REG);
+
+	if (!(old_ctrl0 & MVPP22_XLG_CTRL0_MAC_RESET_DIS)) {
+		while (!(readl(port->base + MVPP22_XLG_CTRL0_REG) &
+			 MVPP22_XLG_CTRL0_MAC_RESET_DIS))
+			continue;
+	}
 }
 
 static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 			      const struct phylink_link_state *state)
 {
-	u32 an, ctrl0, ctrl2, ctrl4;
+	u32 old_an, an;
+	u32 old_ctrl0, ctrl0;
+	u32 old_ctrl2, ctrl2;
+	u32 old_ctrl4, ctrl4;
 
-	an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
-	ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-	ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-
-	/* Force link down */
-	an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-	an |= MVPP2_GMAC_FORCE_LINK_DOWN;
-	writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-
-	/* Set the GMAC in a reset state */
-	ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
-	writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+	old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
 
 	an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED |
 		MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
 		MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
 		MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN |
-		MVPP2_GMAC_FORCE_LINK_DOWN);
+		MVPP2_GMAC_IN_BAND_AUTONEG | MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS);
 	ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
-	ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
+	ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK |
+		   MVPP2_GMAC_PCS_ENABLE_MASK);
+	ctrl4 &= ~(MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
 
-	if (state->interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    state->interface == PHY_INTERFACE_MODE_2500BASEX) {
-		/* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
-		 * they negotiate duplex: they are always operating with a fixed
-		 * speed of 1000/2500Mbps in full duplex, so force 1000/2500
-		 * speed and full duplex here.
-		 */
-		ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
-		an |= MVPP2_GMAC_CONFIG_GMII_SPEED |
-		      MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-	} else if (!phy_interface_mode_is_rgmii(state->interface)) {
-		an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
-	}
-
-	if (state->duplex)
-		an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-	if (phylink_test(state->advertising, Pause))
-		an |= MVPP2_GMAC_FC_ADV_EN;
-	if (phylink_test(state->advertising, Asym_Pause))
-		an |= MVPP2_GMAC_FC_ADV_ASM_EN;
-
-	if (state->interface == PHY_INTERFACE_MODE_SGMII ||
-	    state->interface == PHY_INTERFACE_MODE_1000BASEX ||
-	    state->interface == PHY_INTERFACE_MODE_2500BASEX) {
-		an |= MVPP2_GMAC_IN_BAND_AUTONEG;
-		ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
-
-		ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL |
-			   MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
+	/* Configure port type */
+	if (phy_interface_mode_is_8023z(state->interface)) {
+		ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK;
+		ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
 		ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
 			 MVPP22_CTRL4_DP_CLK_SEL |
 			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-
-		if (state->pause & MLO_PAUSE_TX)
-			ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
-		if (state->pause & MLO_PAUSE_RX)
-			ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+	} else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+		ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK | MVPP2_GMAC_INBAND_AN_MASK;
+		ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+		ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
+			 MVPP22_CTRL4_DP_CLK_SEL |
+			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
 	} else if (phy_interface_mode_is_rgmii(state->interface)) {
-		an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS;
-
-		if (state->speed == SPEED_1000)
-			an |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-		else if (state->speed == SPEED_100)
-			an |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
 		ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL;
 		ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
 			 MVPP22_CTRL4_SYNC_BYPASS_DIS |
 			 MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
 	}
 
-	writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
-	writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
-	writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
-	writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	/* Configure advertisement bits */
+	if (phylink_test(state->advertising, Pause))
+		an |= MVPP2_GMAC_FC_ADV_EN;
+	if (phylink_test(state->advertising, Asym_Pause))
+		an |= MVPP2_GMAC_FC_ADV_ASM_EN;
+
+	/* Configure negotiation style */
+	if (!phylink_autoneg_inband(mode)) {
+		/* Phy or fixed speed - no in-band AN */
+		if (state->duplex)
+			an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+		if (state->speed == SPEED_1000 || state->speed == SPEED_2500)
+			an |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+		else if (state->speed == SPEED_100)
+			an |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+		if (state->pause & MLO_PAUSE_TX)
+			ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+		if (state->pause & MLO_PAUSE_RX)
+			ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+	} else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+		/* SGMII in-band mode receives the speed and duplex from
+		 * the PHY. Flow control information is not received. */
+		an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS);
+		an |= MVPP2_GMAC_IN_BAND_AUTONEG |
+		      MVPP2_GMAC_AN_SPEED_EN |
+		      MVPP2_GMAC_AN_DUPLEX_EN;
+
+		if (state->pause & MLO_PAUSE_TX)
+			ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+		if (state->pause & MLO_PAUSE_RX)
+			ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+	} else if (phy_interface_mode_is_8023z(state->interface)) {
+		/* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
+		 * they negotiate duplex: they are always operating with a fixed
+		 * speed of 1000/2500Mbps in full duplex, so force 1000/2500
+		 * speed and full duplex here.
+		 */
+		ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
+		an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS);
+		an |= MVPP2_GMAC_IN_BAND_AUTONEG |
+		      MVPP2_GMAC_CONFIG_GMII_SPEED |
+		      MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+		if (state->pause & MLO_PAUSE_AN && state->an_enabled) {
+			an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+		} else {
+			if (state->pause & MLO_PAUSE_TX)
+				ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+			if (state->pause & MLO_PAUSE_RX)
+				ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+		}
+	}
+
+/* Some fields of the auto-negotiation register require the port to be down when
+ * their value is updated.
+ */
+#define MVPP2_GMAC_AN_PORT_DOWN_MASK	\
+		(MVPP2_GMAC_IN_BAND_AUTONEG | \
+		 MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS | \
+		 MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | \
+		 MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_CONFIG_FULL_DUPLEX | \
+		 MVPP2_GMAC_AN_DUPLEX_EN)
+
+	if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK ||
+	    (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK ||
+	    (old_an ^ an) & MVPP2_GMAC_AN_PORT_DOWN_MASK) {
+		/* Force link down */
+		old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+		old_an |= MVPP2_GMAC_FORCE_LINK_DOWN;
+		writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+		/* Set the GMAC in a reset state - do this in a way that
+		 * ensures we clear it below.
+		 */
+		old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
+		writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+	}
+
+	if (old_ctrl0 != ctrl0)
+		writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
+	if (old_ctrl2 != ctrl2)
+		writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+	if (old_ctrl4 != ctrl4)
+		writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
+	if (old_an != an)
+		writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) {
+		while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+		       MVPP2_GMAC_PORT_RESET_MASK)
+			continue;
+	}
 }
 
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
+	bool change_interface = port->phy_interface != state->interface;
 
 	/* Check for invalid configuration */
-	if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) {
+	if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) {
 		netdev_err(dev, "Invalid mode on %s\n", dev->name);
 		return;
 	}
@@ -4537,8 +5095,9 @@
 	/* Make sure the port is disabled when reconfiguring the mode */
 	mvpp2_port_disable(port);
 
-	if (port->priv->hw_version == MVPP22 &&
-	    port->phy_interface != state->interface) {
+	if (port->priv->hw_version == MVPP22 && change_interface) {
+		mvpp22_gop_mask_irq(port);
+
 		port->phy_interface = state->interface;
 
 		/* Reconfigure the serdes lanes */
@@ -4547,33 +5106,41 @@
 	}
 
 	/* mac (re)configuration */
-	if (state->interface == PHY_INTERFACE_MODE_10GKR)
+	if (mvpp2_is_xlg(state->interface))
 		mvpp2_xlg_config(port, mode, state);
 	else if (phy_interface_mode_is_rgmii(state->interface) ||
-		 state->interface == PHY_INTERFACE_MODE_SGMII ||
-		 state->interface == PHY_INTERFACE_MODE_1000BASEX ||
-		 state->interface == PHY_INTERFACE_MODE_2500BASEX)
+		 phy_interface_mode_is_8023z(state->interface) ||
+		 state->interface == PHY_INTERFACE_MODE_SGMII)
 		mvpp2_gmac_config(port, mode, state);
 
 	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
 		mvpp2_port_loopback_set(port, state);
 
+	if (port->priv->hw_version == MVPP22 && change_interface)
+		mvpp22_gop_unmask_irq(port);
+
 	mvpp2_port_enable(port);
 }
 
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
-	if (!phylink_autoneg_inband(mode) &&
-	    interface != PHY_INTERFACE_MODE_10GKR) {
-		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-		val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
-		if (phy_interface_mode_is_rgmii(interface))
+	if (!phylink_autoneg_inband(mode)) {
+		if (mvpp2_is_xlg(interface)) {
+			val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+			val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_DOWN;
+			val |= MVPP22_XLG_CTRL0_FORCE_LINK_PASS;
+			writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+		} else {
+			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
 			val |= MVPP2_GMAC_FORCE_LINK_PASS;
-		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		}
 	}
 
 	mvpp2_port_enable(port);
@@ -4583,31 +5150,31 @@
 	netif_tx_wake_all_queues(dev);
 }
 
-static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
-				phy_interface_t interface)
+static void mvpp2_mac_link_down(struct phylink_config *config,
+				unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
-	if (!phylink_autoneg_inband(mode) &&
-	    interface != PHY_INTERFACE_MODE_10GKR) {
-		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-		val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-		val |= MVPP2_GMAC_FORCE_LINK_DOWN;
-		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	if (!phylink_autoneg_inband(mode)) {
+		if (mvpp2_is_xlg(interface)) {
+			val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+			val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS;
+			val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN;
+			writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+		} else {
+			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+			val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		}
 	}
 
 	netif_tx_stop_all_queues(dev);
 	mvpp2_egress_disable(port);
 	mvpp2_ingress_disable(port);
 
-	/* When using link interrupts to notify phylink of a MAC state change,
-	 * we do not want the port to be disabled (we want to receive further
-	 * interrupts, to be notified when the port will have a link later).
-	 */
-	if (!port->has_phy)
-		return;
-
 	mvpp2_port_disable(port);
 }
 
@@ -4629,32 +5196,26 @@
 	struct mvpp2_port *port;
 	struct mvpp2_port_pcpu *port_pcpu;
 	struct device_node *port_node = to_of_node(port_fwnode);
+	netdev_features_t features;
 	struct net_device *dev;
-	struct resource *res;
 	struct phylink *phylink;
 	char *mac_from = "";
-	unsigned int ntxqs, nrxqs;
+	unsigned int ntxqs, nrxqs, thread;
+	unsigned long flags = 0;
 	bool has_tx_irqs;
 	u32 id;
-	int features;
 	int phy_mode;
-	int err, i, cpu;
+	int err, i;
 
-	if (port_node) {
-		has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
-	} else {
-		has_tx_irqs = true;
-		queue_mode = MVPP2_QDIST_MULTI_MODE;
+	has_tx_irqs = mvpp2_port_has_irqs(priv, port_node, &flags);
+	if (!has_tx_irqs && queue_mode == MVPP2_QDIST_MULTI_MODE) {
+		dev_err(&pdev->dev,
+			"not enough IRQs to support multi queue mode\n");
+		return -EINVAL;
 	}
 
-	if (!has_tx_irqs)
-		queue_mode = MVPP2_QDIST_SINGLE_MODE;
-
 	ntxqs = MVPP2_MAX_TXQ;
-	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_MULTI_MODE)
-		nrxqs = MVPP2_DEFAULT_RXQ * num_possible_cpus();
-	else
-		nrxqs = MVPP2_DEFAULT_RXQ;
+	nrxqs = mvpp2_get_nrxqs(priv);
 
 	dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
 	if (!dev)
@@ -4697,6 +5258,7 @@
 	port->nrxqs = nrxqs;
 	port->priv = priv;
 	port->has_tx_irqs = has_tx_irqs;
+	port->flags = flags;
 
 	err = mvpp2_queue_vectors_init(port, port_node);
 	if (err)
@@ -4728,8 +5290,7 @@
 	port->comphy = comphy;
 
 	if (priv->hw_version == MVPP21) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
-		port->base = devm_ioremap_resource(&pdev->dev, res);
+		port->base = devm_platform_ioremap_resource(pdev, 2 + id);
 		if (IS_ERR(port->base)) {
 			err = PTR_ERR(port->base);
 			goto err_free_irq;
@@ -4760,7 +5321,7 @@
 	}
 
 	port->ethtool_stats = devm_kcalloc(&pdev->dev,
-					   ARRAY_SIZE(mvpp2_ethtool_regs),
+					   MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs),
 					   sizeof(u64), GFP_KERNEL);
 	if (!port->ethtool_stats) {
 		err = -ENOMEM;
@@ -4784,7 +5345,8 @@
 
 	mvpp2_port_periodic_xon_disable(port);
 
-	mvpp2_port_reset(port);
+	mvpp2_mac_reset_assert(port);
+	mvpp22_pcs_reset_assert(port);
 
 	port->pcpu = alloc_percpu(struct mvpp2_port_pcpu);
 	if (!port->pcpu) {
@@ -4793,17 +5355,14 @@
 	}
 
 	if (!port->has_tx_irqs) {
-		for_each_present_cpu(cpu) {
-			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+		for (thread = 0; thread < priv->nthreads; thread++) {
+			port_pcpu = per_cpu_ptr(port->pcpu, thread);
 
 			hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
-				     HRTIMER_MODE_REL_PINNED);
+				     HRTIMER_MODE_REL_PINNED_SOFT);
 			port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
 			port_pcpu->timer_scheduled = false;
-
-			tasklet_init(&port_pcpu->tx_done_tasklet,
-				     mvpp2_tx_proc_cb,
-				     (unsigned long)dev);
+			port_pcpu->dev = dev;
 		}
 	}
 
@@ -4813,14 +5372,14 @@
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
 			    NETIF_F_HW_VLAN_CTAG_FILTER;
 
-	if (mvpp22_rss_is_supported())
+	if (mvpp22_rss_is_supported()) {
 		dev->hw_features |= NETIF_F_RXHASH;
-
-	if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
-		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-		dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+		dev->features |= NETIF_F_NTUPLE;
 	}
 
+	if (!port->priv->percpu_pools)
+		mvpp2_set_hw_csum(port, port->pool_long->id);
+
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 	dev->priv_flags |= IFF_UNICAST_FLT;
@@ -4833,8 +5392,11 @@
 
 	/* Phylink isn't used w/ ACPI as of now */
 	if (port_node) {
-		phylink = phylink_create(dev, port_fwnode, phy_mode,
-					 &mvpp2_phylink_ops);
+		port->phylink_config.dev = &dev->dev;
+		port->phylink_config.type = PHYLINK_NETDEV;
+
+		phylink = phylink_create(&port->phylink_config, port_fwnode,
+					 phy_mode, &mvpp2_phylink_ops);
 		if (IS_ERR(phylink)) {
 			err = PTR_ERR(phylink);
 			goto err_free_port_pcpu;
@@ -5078,13 +5640,13 @@
 	}
 
 	/* Allocate and initialize aggregated TXQs */
-	priv->aggr_txqs = devm_kcalloc(&pdev->dev, num_present_cpus(),
+	priv->aggr_txqs = devm_kcalloc(&pdev->dev, MVPP2_MAX_THREADS,
 				       sizeof(*priv->aggr_txqs),
 				       GFP_KERNEL);
 	if (!priv->aggr_txqs)
 		return -ENOMEM;
 
-	for_each_present_cpu(i) {
+	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		priv->aggr_txqs[i].id = i;
 		priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
 		err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
@@ -5108,7 +5670,7 @@
 	mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
 
 	/* Buffer Manager initialization */
-	err = mvpp2_bm_init(pdev, priv);
+	err = mvpp2_bm_init(&pdev->dev, priv);
 	if (err < 0)
 		return err;
 
@@ -5131,7 +5693,7 @@
 	struct mvpp2 *priv;
 	struct resource *res;
 	void __iomem *base;
-	int i;
+	int i, shared;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -5141,6 +5703,8 @@
 	if (has_acpi_companion(&pdev->dev)) {
 		acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table,
 					    &pdev->dev);
+		if (!acpi_id)
+			return -EINVAL;
 		priv->hw_version = (unsigned long)acpi_id->driver_data;
 	} else {
 		priv->hw_version =
@@ -5153,14 +5717,12 @@
 	if (priv->hw_version == MVPP21)
 		queue_mode = MVPP2_QDIST_SINGLE_MODE;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
 	if (priv->hw_version == MVPP21) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
+		priv->lms_base = devm_platform_ioremap_resource(pdev, 1);
 		if (IS_ERR(priv->lms_base))
 			return PTR_ERR(priv->lms_base);
 	} else {
@@ -5194,8 +5756,21 @@
 			priv->sysctrl_base = NULL;
 	}
 
+	if (priv->hw_version == MVPP22 &&
+	    mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS)
+		priv->percpu_pools = 1;
+
 	mvpp2_setup_bm_pool();
 
+
+	priv->nthreads = min_t(unsigned int, num_present_cpus(),
+			       MVPP2_MAX_THREADS);
+
+	shared = num_present_cpus() - priv->nthreads;
+	if (shared > 0)
+		bitmap_fill(&priv->lock_map,
+			    min_t(int, shared, MVPP2_MAX_THREADS));
+
 	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 
@@ -5353,9 +5928,6 @@
 
 	mvpp2_dbgfs_cleanup(priv);
 
-	flush_workqueue(priv->stats_queue);
-	destroy_workqueue(priv->stats_queue);
-
 	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
 		if (priv->port_list[i]) {
 			mutex_destroy(&priv->port_list[i]->gather_stats_lock);
@@ -5364,13 +5936,15 @@
 		i++;
 	}
 
+	destroy_workqueue(priv->stats_queue);
+
 	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
 		struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
 
-		mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
+		mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool);
 	}
 
-	for_each_present_cpu(i) {
+	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		struct mvpp2_tx_queue *aggr_txq = &priv->aggr_txqs[i];
 
 		dma_free_coherent(&pdev->dev,

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index 392fd89..5692c60 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c

@@ -312,7 +312,8 @@
 	}
 
 	/* Set value */
-	pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] = shift & MVPP2_PRS_SRAM_SHIFT_MASK;
+	pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] |=
+		shift & MVPP2_PRS_SRAM_SHIFT_MASK;
 
 	/* Reset and set operation */
 	mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS,
@@ -1905,8 +1906,7 @@
 }
 
 /* Find tcam entry with matched pair <vid,port> */
-static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
-				    u16 mask)
+static int mvpp2_prs_vid_range_find(struct mvpp2_port *port, u16 vid, u16 mask)
 {
 	unsigned char byte[2], enable[2];
 	struct mvpp2_prs_entry pe;
@@ -1914,13 +1914,13 @@
 	int tid;
 
 	/* Go through the all entries with MVPP2_PRS_LU_VID */
-	for (tid = MVPP2_PE_VID_FILT_RANGE_START;
-	     tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
-		if (!priv->prs_shadow[tid].valid ||
-		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+	for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+	     tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+		if (!port->priv->prs_shadow[tid].valid ||
+		    port->priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
 			continue;
 
-		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		mvpp2_prs_init_from_hw(port->priv, &pe, tid);
 
 		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
 		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
@@ -1950,7 +1950,7 @@
 	memset(&pe, 0, sizeof(pe));
 
 	/* Scan TCAM and see if entry with this <vid,port> already exist */
-	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+	tid = mvpp2_prs_vid_range_find(port, vid, mask);
 
 	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
 	if (reg_val & MVPP2_DSA_EXTENDED)
@@ -2008,7 +2008,7 @@
 	int tid;
 
 	/* Scan TCAM and see if entry with this <vid,port> already exist */
-	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+	tid = mvpp2_prs_vid_range_find(port, vid, 0xfff);
 
 	/* No such entry */
 	if (tid < 0)
@@ -2026,8 +2026,10 @@
 
 	for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
 	     tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
-		if (priv->prs_shadow[tid].valid)
-			mvpp2_prs_vid_entry_remove(port, tid);
+		if (priv->prs_shadow[tid].valid) {
+			mvpp2_prs_hw_inv(priv, tid);
+			priv->prs_shadow[tid].valid = false;
+		}
 	}
 }
 

diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig
new file mode 100644
index 0000000..711ada7
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig

@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Marvell OcteonTX2 drivers configuration
+#
+
+config OCTEONTX2_MBOX
+	tristate
+
+config OCTEONTX2_AF
+	tristate "Marvell OcteonTX2 RVU Admin Function driver"
+	select OCTEONTX2_MBOX
+	depends on (64BIT && COMPILE_TEST) || ARM64
+	depends on PCI
+	help
+	  This driver supports Marvell's OcteonTX2 Resource Virtualization
+	  Unit's admin function manager which manages all RVU HW resources
+	  and provides a medium to other PF/VFs to configure HW. Should be
+	  enabled for other RVU device drivers to work.

diff --git a/drivers/net/ethernet/marvell/octeontx2/Makefile b/drivers/net/ethernet/marvell/octeontx2/Makefile
new file mode 100644
index 0000000..e579dcd
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/Makefile

@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Marvell OcteonTX2 device drivers.
+#
+
+obj-$(CONFIG_OCTEONTX2_AF) += af/

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
new file mode 100644
index 0000000..06329ac
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile

@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Marvell's OcteonTX2 RVU Admin Function driver
+#
+
+obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
+obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
+
+octeontx2_mbox-y := mbox.o
+octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
+		  rvu_reg.o rvu_npc.o

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
new file mode 100644
index 0000000..6d55e3d
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c

@@ -0,0 +1,871 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "cgx.h"
+
+#define DRV_NAME	"octeontx2-cgx"
+#define DRV_STRING      "Marvell OcteonTX2 CGX/MAC Driver"
+
+/**
+ * struct lmac
+ * @wq_cmd_cmplt:	waitq to keep the process blocked until cmd completion
+ * @cmd_lock:		Lock to serialize the command interface
+ * @resp:		command response
+ * @link_info:		link related information
+ * @event_cb:		callback for linkchange events
+ * @event_cb_lock:	lock for serializing callback with unregister
+ * @cmd_pend:		flag set before new command is started
+ *			flag cleared after command response is received
+ * @cgx:		parent cgx port
+ * @lmac_id:		lmac port id
+ * @name:		lmac port name
+ */
+struct lmac {
+	wait_queue_head_t wq_cmd_cmplt;
+	struct mutex cmd_lock;
+	u64 resp;
+	struct cgx_link_user_info link_info;
+	struct cgx_event_cb event_cb;
+	spinlock_t event_cb_lock;
+	bool cmd_pend;
+	struct cgx *cgx;
+	u8 lmac_id;
+	char *name;
+};
+
+struct cgx {
+	void __iomem		*reg_base;
+	struct pci_dev		*pdev;
+	u8			cgx_id;
+	u8			lmac_count;
+	struct lmac		*lmac_idmap[MAX_LMAC_PER_CGX];
+	struct			work_struct cgx_cmd_work;
+	struct			workqueue_struct *cgx_cmd_workq;
+	struct list_head	cgx_list;
+};
+
+static LIST_HEAD(cgx_list);
+
+/* Convert firmware speed encoding to user format(Mbps) */
+static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX];
+
+/* Convert firmware lmac type encoding to string */
+static char *cgx_lmactype_string[LMAC_MODE_MAX];
+
+/* CGX PHY management internal APIs */
+static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en);
+
+/* Supported devices */
+static const struct pci_device_id cgx_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
+	{ 0, }  /* end of table */
+};
+
+MODULE_DEVICE_TABLE(pci, cgx_id_table);
+
+static void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val)
+{
+	writeq(val, cgx->reg_base + (lmac << 18) + offset);
+}
+
+static u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset)
+{
+	return readq(cgx->reg_base + (lmac << 18) + offset);
+}
+
+static inline struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx)
+{
+	if (!cgx || lmac_id >= MAX_LMAC_PER_CGX)
+		return NULL;
+
+	return cgx->lmac_idmap[lmac_id];
+}
+
+int cgx_get_cgxcnt_max(void)
+{
+	struct cgx *cgx_dev;
+	int idmax = -ENODEV;
+
+	list_for_each_entry(cgx_dev, &cgx_list, cgx_list)
+		if (cgx_dev->cgx_id > idmax)
+			idmax = cgx_dev->cgx_id;
+
+	if (idmax < 0)
+		return 0;
+
+	return idmax + 1;
+}
+EXPORT_SYMBOL(cgx_get_cgxcnt_max);
+
+int cgx_get_lmac_cnt(void *cgxd)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx)
+		return -ENODEV;
+
+	return cgx->lmac_count;
+}
+EXPORT_SYMBOL(cgx_get_lmac_cnt);
+
+void *cgx_get_pdata(int cgx_id)
+{
+	struct cgx *cgx_dev;
+
+	list_for_each_entry(cgx_dev, &cgx_list, cgx_list) {
+		if (cgx_dev->cgx_id == cgx_id)
+			return cgx_dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(cgx_get_pdata);
+
+/* Ensure the required lock for event queue(where asynchronous events are
+ * posted) is acquired before calling this API. Else an asynchronous event(with
+ * latest link status) can reach the destination before this function returns
+ * and could make the link status appear wrong.
+ */
+int cgx_get_link_info(void *cgxd, int lmac_id,
+		      struct cgx_link_user_info *linfo)
+{
+	struct lmac *lmac = lmac_pdata(lmac_id, cgxd);
+
+	if (!lmac)
+		return -ENODEV;
+
+	*linfo = lmac->link_info;
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_link_info);
+
+static u64 mac2u64 (u8 *mac_addr)
+{
+	u64 mac = 0;
+	int index;
+
+	for (index = ETH_ALEN - 1; index >= 0; index--)
+		mac |= ((u64)*mac_addr++) << (8 * index);
+	return mac;
+}
+
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr)
+{
+	struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+	u64 cfg;
+
+	/* copy 6bytes from macaddr */
+	/* memcpy(&cfg, mac_addr, 6); */
+
+	cfg = mac2u64 (mac_addr);
+
+	cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)),
+		  cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49));
+
+	cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+	cfg |= CGX_DMAC_CTL0_CAM_ENABLE;
+	cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_set);
+
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id)
+{
+	struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+	u64 cfg;
+
+	cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8);
+	return cfg & CGX_RX_DMAC_ADR_MASK;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_get);
+
+int cgx_set_pkind(void *cgxd, u8 lmac_id, int pkind)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+
+	cgx_write(cgx, lmac_id, CGXX_CMRX_RX_ID_MAP, (pkind & 0x3F));
+	return 0;
+}
+EXPORT_SYMBOL(cgx_set_pkind);
+
+static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id)
+{
+	u64 cfg;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+	return (cfg >> CGX_LMAC_TYPE_SHIFT) & CGX_LMAC_TYPE_MASK;
+}
+
+/* Configure CGX LMAC in internal loopback mode */
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgxd;
+	u8 lmac_type;
+	u64 cfg;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+
+	lmac_type = cgx_get_lmac_type(cgx, lmac_id);
+	if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) {
+		cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL);
+		if (enable)
+			cfg |= CGXX_GMP_PCS_MRX_CTL_LBK;
+		else
+			cfg &= ~CGXX_GMP_PCS_MRX_CTL_LBK;
+		cgx_write(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL, cfg);
+	} else {
+		cfg = cgx_read(cgx, lmac_id, CGXX_SPUX_CONTROL1);
+		if (enable)
+			cfg |= CGXX_SPUX_CONTROL1_LBK;
+		else
+			cfg &= ~CGXX_SPUX_CONTROL1_LBK;
+		cgx_write(cgx, lmac_id, CGXX_SPUX_CONTROL1, cfg);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_internal_loopback);
+
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgx_get_pdata(cgx_id);
+	u64 cfg = 0;
+
+	if (!cgx)
+		return;
+
+	if (enable) {
+		/* Enable promiscuous mode on LMAC */
+		cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+		cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE);
+		cfg |= CGX_DMAC_BCAST_MODE;
+		cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+		cfg = cgx_read(cgx, 0,
+			       (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+		cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE;
+		cgx_write(cgx, 0,
+			  (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+	} else {
+		/* Disable promiscuous mode */
+		cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+		cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE;
+		cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+		cfg = cgx_read(cgx, 0,
+			       (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+		cfg |= CGX_DMAC_CAM_ADDR_ENABLE;
+		cgx_write(cgx, 0,
+			  (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+	}
+}
+EXPORT_SYMBOL(cgx_lmac_promisc_config);
+
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+	*rx_stat =  cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8));
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_rx_stats);
+
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+	*tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8));
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_tx_stats);
+
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
+{
+	struct cgx *cgx = cgxd;
+	u64 cfg;
+
+	if (!cgx || lmac_id >= cgx->lmac_count)
+		return -ENODEV;
+
+	cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+	if (enable)
+		cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+	else
+		cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+	cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_rx_tx_enable);
+
+/* CGX Firmware interface low level support */
+static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
+{
+	struct cgx *cgx = lmac->cgx;
+	struct device *dev;
+	int err = 0;
+	u64 cmd;
+
+	/* Ensure no other command is in progress */
+	err = mutex_lock_interruptible(&lmac->cmd_lock);
+	if (err)
+		return err;
+
+	/* Ensure command register is free */
+	cmd = cgx_read(cgx, lmac->lmac_id,  CGX_COMMAND_REG);
+	if (FIELD_GET(CMDREG_OWN, cmd) != CGX_CMD_OWN_NS) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/* Update ownership in command request */
+	req = FIELD_SET(CMDREG_OWN, CGX_CMD_OWN_FIRMWARE, req);
+
+	/* Mark this lmac as pending, before we start */
+	lmac->cmd_pend = true;
+
+	/* Start command in hardware */
+	cgx_write(cgx, lmac->lmac_id, CGX_COMMAND_REG, req);
+
+	/* Ensure command is completed without errors */
+	if (!wait_event_timeout(lmac->wq_cmd_cmplt, !lmac->cmd_pend,
+				msecs_to_jiffies(CGX_CMD_TIMEOUT))) {
+		dev = &cgx->pdev->dev;
+		dev_err(dev, "cgx port %d:%d cmd timeout\n",
+			cgx->cgx_id, lmac->lmac_id);
+		err = -EIO;
+		goto unlock;
+	}
+
+	/* we have a valid command response */
+	smp_rmb(); /* Ensure the latest updates are visible */
+	*resp = lmac->resp;
+
+unlock:
+	mutex_unlock(&lmac->cmd_lock);
+
+	return err;
+}
+
+static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp,
+				      struct cgx *cgx, int lmac_id)
+{
+	struct lmac *lmac;
+	int err;
+
+	lmac = lmac_pdata(lmac_id, cgx);
+	if (!lmac)
+		return -ENODEV;
+
+	err = cgx_fwi_cmd_send(req, resp, lmac);
+
+	/* Check for valid response */
+	if (!err) {
+		if (FIELD_GET(EVTREG_STAT, *resp) == CGX_STAT_FAIL)
+			return -EIO;
+		else
+			return 0;
+	}
+
+	return err;
+}
+
+static inline void cgx_link_usertable_init(void)
+{
+	cgx_speed_mbps[CGX_LINK_NONE] = 0;
+	cgx_speed_mbps[CGX_LINK_10M] = 10;
+	cgx_speed_mbps[CGX_LINK_100M] = 100;
+	cgx_speed_mbps[CGX_LINK_1G] = 1000;
+	cgx_speed_mbps[CGX_LINK_2HG] = 2500;
+	cgx_speed_mbps[CGX_LINK_5G] = 5000;
+	cgx_speed_mbps[CGX_LINK_10G] = 10000;
+	cgx_speed_mbps[CGX_LINK_20G] = 20000;
+	cgx_speed_mbps[CGX_LINK_25G] = 25000;
+	cgx_speed_mbps[CGX_LINK_40G] = 40000;
+	cgx_speed_mbps[CGX_LINK_50G] = 50000;
+	cgx_speed_mbps[CGX_LINK_100G] = 100000;
+
+	cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII";
+	cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI";
+	cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI";
+	cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R";
+	cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R";
+	cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII";
+	cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R";
+	cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R";
+	cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R";
+	cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII";
+}
+
+static inline void link_status_user_format(u64 lstat,
+					   struct cgx_link_user_info *linfo,
+					   struct cgx *cgx, u8 lmac_id)
+{
+	char *lmac_string;
+
+	linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
+	linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
+	linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)];
+	linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id);
+	lmac_string = cgx_lmactype_string[linfo->lmac_type_id];
+	strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1);
+}
+
+/* Hardware event handlers */
+static inline void cgx_link_change_handler(u64 lstat,
+					   struct lmac *lmac)
+{
+	struct cgx_link_user_info *linfo;
+	struct cgx *cgx = lmac->cgx;
+	struct cgx_link_event event;
+	struct device *dev;
+	int err_type;
+
+	dev = &cgx->pdev->dev;
+
+	link_status_user_format(lstat, &event.link_uinfo, cgx, lmac->lmac_id);
+	err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
+
+	event.cgx_id = cgx->cgx_id;
+	event.lmac_id = lmac->lmac_id;
+
+	/* update the local copy of link status */
+	lmac->link_info = event.link_uinfo;
+	linfo = &lmac->link_info;
+
+	/* Ensure callback doesn't get unregistered until we finish it */
+	spin_lock(&lmac->event_cb_lock);
+
+	if (!lmac->event_cb.notify_link_chg) {
+		dev_dbg(dev, "cgx port %d:%d Link change handler null",
+			cgx->cgx_id, lmac->lmac_id);
+		if (err_type != CGX_ERR_NONE) {
+			dev_err(dev, "cgx port %d:%d Link error %d\n",
+				cgx->cgx_id, lmac->lmac_id, err_type);
+		}
+		dev_info(dev, "cgx port %d:%d Link is %s %d Mbps\n",
+			 cgx->cgx_id, lmac->lmac_id,
+			 linfo->link_up ? "UP" : "DOWN", linfo->speed);
+		goto err;
+	}
+
+	if (lmac->event_cb.notify_link_chg(&event, lmac->event_cb.data))
+		dev_err(dev, "event notification failure\n");
+err:
+	spin_unlock(&lmac->event_cb_lock);
+}
+
+static inline bool cgx_cmdresp_is_linkevent(u64 event)
+{
+	u8 id;
+
+	id = FIELD_GET(EVTREG_ID, event);
+	if (id == CGX_CMD_LINK_BRING_UP ||
+	    id == CGX_CMD_LINK_BRING_DOWN)
+		return true;
+	else
+		return false;
+}
+
+static inline bool cgx_event_is_linkevent(u64 event)
+{
+	if (FIELD_GET(EVTREG_ID, event) == CGX_EVT_LINK_CHANGE)
+		return true;
+	else
+		return false;
+}
+
+static inline int cgx_fwi_get_mkex_prfl_sz(u64 *prfl_sz,
+					   struct cgx *cgx)
+{
+	u64 req = 0;
+	u64 resp;
+	int err;
+
+	req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_MKEX_PRFL_SIZE, req);
+	err = cgx_fwi_cmd_generic(req, &resp, cgx, 0);
+	if (!err)
+		*prfl_sz = FIELD_GET(RESP_MKEX_PRFL_SIZE, resp);
+
+	return err;
+}
+
+static inline int cgx_fwi_get_mkex_prfl_addr(u64 *prfl_addr,
+					     struct cgx *cgx)
+{
+	u64 req = 0;
+	u64 resp;
+	int err;
+
+	req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_MKEX_PRFL_ADDR, req);
+	err = cgx_fwi_cmd_generic(req, &resp, cgx, 0);
+	if (!err)
+		*prfl_addr = FIELD_GET(RESP_MKEX_PRFL_ADDR, resp);
+
+	return err;
+}
+
+int cgx_get_mkex_prfl_info(u64 *addr, u64 *size)
+{
+	struct cgx *cgx_dev;
+	int err;
+
+	if (!addr || !size)
+		return -EINVAL;
+
+	cgx_dev = list_first_entry(&cgx_list, struct cgx, cgx_list);
+	if (!cgx_dev)
+		return -ENXIO;
+
+	err = cgx_fwi_get_mkex_prfl_sz(size, cgx_dev);
+	if (err)
+		return -EIO;
+
+	err = cgx_fwi_get_mkex_prfl_addr(addr, cgx_dev);
+	if (err)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_get_mkex_prfl_info);
+
+static irqreturn_t cgx_fwi_event_handler(int irq, void *data)
+{
+	struct lmac *lmac = data;
+	struct cgx *cgx;
+	u64 event;
+
+	cgx = lmac->cgx;
+
+	event = cgx_read(cgx, lmac->lmac_id, CGX_EVENT_REG);
+
+	if (!FIELD_GET(EVTREG_ACK, event))
+		return IRQ_NONE;
+
+	switch (FIELD_GET(EVTREG_EVT_TYPE, event)) {
+	case CGX_EVT_CMD_RESP:
+		/* Copy the response. Since only one command is active at a
+		 * time, there is no way a response can get overwritten
+		 */
+		lmac->resp = event;
+		/* Ensure response is updated before thread context starts */
+		smp_wmb();
+
+		/* There wont be separate events for link change initiated from
+		 * software; Hence report the command responses as events
+		 */
+		if (cgx_cmdresp_is_linkevent(event))
+			cgx_link_change_handler(event, lmac);
+
+		/* Release thread waiting for completion  */
+		lmac->cmd_pend = false;
+		wake_up_interruptible(&lmac->wq_cmd_cmplt);
+		break;
+	case CGX_EVT_ASYNC:
+		if (cgx_event_is_linkevent(event))
+			cgx_link_change_handler(event, lmac);
+		break;
+	}
+
+	/* Any new event or command response will be posted by firmware
+	 * only after the current status is acked.
+	 * Ack the interrupt register as well.
+	 */
+	cgx_write(lmac->cgx, lmac->lmac_id, CGX_EVENT_REG, 0);
+	cgx_write(lmac->cgx, lmac->lmac_id, CGXX_CMRX_INT, FW_CGX_INT);
+
+	return IRQ_HANDLED;
+}
+
+/* APIs for PHY management using CGX firmware interface */
+
+/* callback registration for hardware events like link change */
+int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id)
+{
+	struct cgx *cgx = cgxd;
+	struct lmac *lmac;
+
+	lmac = lmac_pdata(lmac_id, cgx);
+	if (!lmac)
+		return -ENODEV;
+
+	lmac->event_cb = *cb;
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_evh_register);
+
+int cgx_lmac_evh_unregister(void *cgxd, int lmac_id)
+{
+	struct lmac *lmac;
+	unsigned long flags;
+	struct cgx *cgx = cgxd;
+
+	lmac = lmac_pdata(lmac_id, cgx);
+	if (!lmac)
+		return -ENODEV;
+
+	spin_lock_irqsave(&lmac->event_cb_lock, flags);
+	lmac->event_cb.notify_link_chg = NULL;
+	lmac->event_cb.data = NULL;
+	spin_unlock_irqrestore(&lmac->event_cb_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_evh_unregister);
+
+static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable)
+{
+	u64 req = 0;
+	u64 resp;
+
+	if (enable)
+		req = FIELD_SET(CMDREG_ID, CGX_CMD_LINK_BRING_UP, req);
+	else
+		req = FIELD_SET(CMDREG_ID, CGX_CMD_LINK_BRING_DOWN, req);
+
+	return cgx_fwi_cmd_generic(req, &resp, cgx, lmac_id);
+}
+
+static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx)
+{
+	u64 req = 0;
+
+	req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req);
+	return cgx_fwi_cmd_generic(req, resp, cgx, 0);
+}
+
+static int cgx_lmac_verify_fwi_version(struct cgx *cgx)
+{
+	struct device *dev = &cgx->pdev->dev;
+	int major_ver, minor_ver;
+	u64 resp;
+	int err;
+
+	if (!cgx->lmac_count)
+		return 0;
+
+	err = cgx_fwi_read_version(&resp, cgx);
+	if (err)
+		return err;
+
+	major_ver = FIELD_GET(RESP_MAJOR_VER, resp);
+	minor_ver = FIELD_GET(RESP_MINOR_VER, resp);
+	dev_dbg(dev, "Firmware command interface version = %d.%d\n",
+		major_ver, minor_ver);
+	if (major_ver != CGX_FIRMWARE_MAJOR_VER ||
+	    minor_ver != CGX_FIRMWARE_MINOR_VER)
+		return -EIO;
+	else
+		return 0;
+}
+
+static void cgx_lmac_linkup_work(struct work_struct *work)
+{
+	struct cgx *cgx = container_of(work, struct cgx, cgx_cmd_work);
+	struct device *dev = &cgx->pdev->dev;
+	int i, err;
+
+	/* Do Link up for all the lmacs */
+	for (i = 0; i < cgx->lmac_count; i++) {
+		err = cgx_fwi_link_change(cgx, i, true);
+		if (err)
+			dev_info(dev, "cgx port %d:%d Link up command failed\n",
+				 cgx->cgx_id, i);
+	}
+}
+
+int cgx_lmac_linkup_start(void *cgxd)
+{
+	struct cgx *cgx = cgxd;
+
+	if (!cgx)
+		return -ENODEV;
+
+	queue_work(cgx->cgx_cmd_workq, &cgx->cgx_cmd_work);
+
+	return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_linkup_start);
+
+static int cgx_lmac_init(struct cgx *cgx)
+{
+	struct lmac *lmac;
+	int i, err;
+
+	cgx->lmac_count = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0x7;
+	if (cgx->lmac_count > MAX_LMAC_PER_CGX)
+		cgx->lmac_count = MAX_LMAC_PER_CGX;
+
+	for (i = 0; i < cgx->lmac_count; i++) {
+		lmac = kcalloc(1, sizeof(struct lmac), GFP_KERNEL);
+		if (!lmac)
+			return -ENOMEM;
+		lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL);
+		if (!lmac->name)
+			return -ENOMEM;
+		sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i);
+		lmac->lmac_id = i;
+		lmac->cgx = cgx;
+		init_waitqueue_head(&lmac->wq_cmd_cmplt);
+		mutex_init(&lmac->cmd_lock);
+		spin_lock_init(&lmac->event_cb_lock);
+		err = request_irq(pci_irq_vector(cgx->pdev,
+						 CGX_LMAC_FWI + i * 9),
+				   cgx_fwi_event_handler, 0, lmac->name, lmac);
+		if (err)
+			return err;
+
+		/* Enable interrupt */
+		cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S,
+			  FW_CGX_INT);
+
+		/* Add reference */
+		cgx->lmac_idmap[i] = lmac;
+	}
+
+	return cgx_lmac_verify_fwi_version(cgx);
+}
+
+static int cgx_lmac_exit(struct cgx *cgx)
+{
+	struct lmac *lmac;
+	int i;
+
+	if (cgx->cgx_cmd_workq) {
+		flush_workqueue(cgx->cgx_cmd_workq);
+		destroy_workqueue(cgx->cgx_cmd_workq);
+		cgx->cgx_cmd_workq = NULL;
+	}
+
+	/* Free all lmac related resources */
+	for (i = 0; i < cgx->lmac_count; i++) {
+		lmac = cgx->lmac_idmap[i];
+		if (!lmac)
+			continue;
+		free_irq(pci_irq_vector(cgx->pdev, CGX_LMAC_FWI + i * 9), lmac);
+		kfree(lmac->name);
+		kfree(lmac);
+	}
+
+	return 0;
+}
+
+static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct cgx *cgx;
+	int err, nvec;
+
+	cgx = devm_kzalloc(dev, sizeof(*cgx), GFP_KERNEL);
+	if (!cgx)
+		return -ENOMEM;
+	cgx->pdev = pdev;
+
+	pci_set_drvdata(pdev, cgx);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		pci_set_drvdata(pdev, NULL);
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto err_disable_device;
+	}
+
+	/* MAP configuration registers */
+	cgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+	if (!cgx->reg_base) {
+		dev_err(dev, "CGX: Cannot map CSR memory space, aborting\n");
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	nvec = CGX_NVEC;
+	err = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+	if (err < 0 || err != nvec) {
+		dev_err(dev, "Request for %d msix vectors failed, err %d\n",
+			nvec, err);
+		goto err_release_regions;
+	}
+
+	cgx->cgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24)
+		& CGX_ID_MASK;
+
+	/* init wq for processing linkup requests */
+	INIT_WORK(&cgx->cgx_cmd_work, cgx_lmac_linkup_work);
+	cgx->cgx_cmd_workq = alloc_workqueue("cgx_cmd_workq", 0, 0);
+	if (!cgx->cgx_cmd_workq) {
+		dev_err(dev, "alloc workqueue failed for cgx cmd");
+		err = -ENOMEM;
+		goto err_free_irq_vectors;
+	}
+
+	list_add(&cgx->cgx_list, &cgx_list);
+
+	cgx_link_usertable_init();
+
+	err = cgx_lmac_init(cgx);
+	if (err)
+		goto err_release_lmac;
+
+	return 0;
+
+err_release_lmac:
+	cgx_lmac_exit(cgx);
+	list_del(&cgx->cgx_list);
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+err_release_regions:
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	return err;
+}
+
+static void cgx_remove(struct pci_dev *pdev)
+{
+	struct cgx *cgx = pci_get_drvdata(pdev);
+
+	cgx_lmac_exit(cgx);
+	list_del(&cgx->cgx_list);
+	pci_free_irq_vectors(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+struct pci_driver cgx_driver = {
+	.name = DRV_NAME,
+	.id_table = cgx_id_table,
+	.probe = cgx_probe,
+	.remove = cgx_remove,
+};

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
new file mode 100644
index 0000000..5c1f389
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h

@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CGX_H
+#define CGX_H
+
+#include "mbox.h"
+#include "cgx_fw_if.h"
+
+ /* PCI device IDs */
+#define	PCI_DEVID_OCTEONTX2_CGX		0xA059
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM		0
+
+#define CGX_ID_MASK			0x7
+#define MAX_LMAC_PER_CGX		4
+#define CGX_FIFO_LEN			65536 /* 64K for both Rx & Tx */
+#define CGX_OFFSET(x)			((x) * MAX_LMAC_PER_CGX)
+
+/* Registers */
+#define CGXX_CMRX_CFG			0x00
+#define CMR_EN				BIT_ULL(55)
+#define DATA_PKT_TX_EN			BIT_ULL(53)
+#define DATA_PKT_RX_EN			BIT_ULL(54)
+#define CGX_LMAC_TYPE_SHIFT		40
+#define CGX_LMAC_TYPE_MASK		0xF
+#define CGXX_CMRX_INT			0x040
+#define FW_CGX_INT			BIT_ULL(1)
+#define CGXX_CMRX_INT_ENA_W1S		0x058
+#define CGXX_CMRX_RX_ID_MAP		0x060
+#define CGXX_CMRX_RX_STAT0		0x070
+#define CGXX_CMRX_RX_LMACS		0x128
+#define CGXX_CMRX_RX_DMAC_CTL0		0x1F8
+#define CGX_DMAC_CTL0_CAM_ENABLE	BIT_ULL(3)
+#define CGX_DMAC_CAM_ACCEPT		BIT_ULL(3)
+#define CGX_DMAC_MCAST_MODE		BIT_ULL(1)
+#define CGX_DMAC_BCAST_MODE		BIT_ULL(0)
+#define CGXX_CMRX_RX_DMAC_CAM0		0x200
+#define CGX_DMAC_CAM_ADDR_ENABLE	BIT_ULL(48)
+#define CGXX_CMRX_RX_DMAC_CAM1		0x400
+#define CGX_RX_DMAC_ADR_MASK		GENMASK_ULL(47, 0)
+#define CGXX_CMRX_TX_STAT0		0x700
+#define CGXX_SCRATCH0_REG		0x1050
+#define CGXX_SCRATCH1_REG		0x1058
+#define CGX_CONST			0x2000
+#define CGXX_SPUX_CONTROL1		0x10000
+#define CGXX_SPUX_CONTROL1_LBK		BIT_ULL(14)
+#define CGXX_GMP_PCS_MRX_CTL		0x30000
+#define CGXX_GMP_PCS_MRX_CTL_LBK	BIT_ULL(14)
+
+#define CGX_COMMAND_REG			CGXX_SCRATCH1_REG
+#define CGX_EVENT_REG			CGXX_SCRATCH0_REG
+#define CGX_CMD_TIMEOUT			2200 /* msecs */
+
+#define CGX_NVEC			37
+#define CGX_LMAC_FWI			0
+
+enum LMAC_TYPE {
+	LMAC_MODE_SGMII		= 0,
+	LMAC_MODE_XAUI		= 1,
+	LMAC_MODE_RXAUI		= 2,
+	LMAC_MODE_10G_R		= 3,
+	LMAC_MODE_40G_R		= 4,
+	LMAC_MODE_QSGMII	= 6,
+	LMAC_MODE_25G_R		= 7,
+	LMAC_MODE_50G_R		= 8,
+	LMAC_MODE_100G_R	= 9,
+	LMAC_MODE_USXGMII	= 10,
+	LMAC_MODE_MAX,
+};
+
+struct cgx_link_event {
+	struct cgx_link_user_info link_uinfo;
+	u8 cgx_id;
+	u8 lmac_id;
+};
+
+/**
+ * struct cgx_event_cb
+ * @notify_link_chg:	callback for link change notification
+ * @data:	data passed to callback function
+ */
+struct cgx_event_cb {
+	int (*notify_link_chg)(struct cgx_link_event *event, void *data);
+	void *data;
+};
+
+extern struct pci_driver cgx_driver;
+
+int cgx_get_cgxcnt_max(void);
+int cgx_get_lmac_cnt(void *cgxd);
+void *cgx_get_pdata(int cgx_id);
+int cgx_set_pkind(void *cgxd, u8 lmac_id, int pkind);
+int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id);
+int cgx_lmac_evh_unregister(void *cgxd, int lmac_id);
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat);
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat);
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable);
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr);
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id);
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable);
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable);
+int cgx_get_link_info(void *cgxd, int lmac_id,
+		      struct cgx_link_user_info *linfo);
+int cgx_lmac_linkup_start(void *cgxd);
+int cgx_get_mkex_prfl_info(u64 *addr, u64 *size);
+#endif /* CGX_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
new file mode 100644
index 0000000..473d975
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h

@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 CGX driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CGX_FW_INTF_H__
+#define __CGX_FW_INTF_H__
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+
+#define CGX_FIRMWARE_MAJOR_VER		1
+#define CGX_FIRMWARE_MINOR_VER		0
+
+#define CGX_EVENT_ACK                   1UL
+
+/* CGX error types. set for cmd response status as CGX_STAT_FAIL */
+enum cgx_error_type {
+	CGX_ERR_NONE,
+	CGX_ERR_LMAC_NOT_ENABLED,
+	CGX_ERR_LMAC_MODE_INVALID,
+	CGX_ERR_REQUEST_ID_INVALID,
+	CGX_ERR_PREV_ACK_NOT_CLEAR,
+	CGX_ERR_PHY_LINK_DOWN,
+	CGX_ERR_PCS_RESET_FAIL,
+	CGX_ERR_AN_CPT_FAIL,
+	CGX_ERR_TX_NOT_IDLE,
+	CGX_ERR_RX_NOT_IDLE,
+	CGX_ERR_SPUX_BR_BLKLOCK_FAIL,
+	CGX_ERR_SPUX_RX_ALIGN_FAIL,
+	CGX_ERR_SPUX_TX_FAULT,
+	CGX_ERR_SPUX_RX_FAULT,
+	CGX_ERR_SPUX_RESET_FAIL,
+	CGX_ERR_SPUX_AN_RESET_FAIL,
+	CGX_ERR_SPUX_USX_AN_RESET_FAIL,
+	CGX_ERR_SMUX_RX_LINK_NOT_OK,
+	CGX_ERR_PCS_RECV_LINK_FAIL,
+	CGX_ERR_TRAINING_FAIL,
+	CGX_ERR_RX_EQU_FAIL,
+	CGX_ERR_SPUX_BER_FAIL,
+	CGX_ERR_SPUX_RSFEC_ALGN_FAIL,   /* = 22 */
+};
+
+/* LINK speed types */
+enum cgx_link_speed {
+	CGX_LINK_NONE,
+	CGX_LINK_10M,
+	CGX_LINK_100M,
+	CGX_LINK_1G,
+	CGX_LINK_2HG,
+	CGX_LINK_5G,
+	CGX_LINK_10G,
+	CGX_LINK_20G,
+	CGX_LINK_25G,
+	CGX_LINK_40G,
+	CGX_LINK_50G,
+	CGX_LINK_100G,
+	CGX_LINK_SPEED_MAX,
+};
+
+/* REQUEST ID types. Input to firmware */
+enum cgx_cmd_id {
+	CGX_CMD_NONE,
+	CGX_CMD_GET_FW_VER,
+	CGX_CMD_GET_MAC_ADDR,
+	CGX_CMD_SET_MTU,
+	CGX_CMD_GET_LINK_STS,		/* optional to user */
+	CGX_CMD_LINK_BRING_UP,
+	CGX_CMD_LINK_BRING_DOWN,
+	CGX_CMD_INTERNAL_LBK,
+	CGX_CMD_EXTERNAL_LBK,
+	CGX_CMD_HIGIG,
+	CGX_CMD_LINK_STATE_CHANGE,
+	CGX_CMD_MODE_CHANGE,		/* hot plug support */
+	CGX_CMD_INTF_SHUTDOWN,
+	CGX_CMD_GET_MKEX_PRFL_SIZE,
+	CGX_CMD_GET_MKEX_PRFL_ADDR
+};
+
+/* async event ids */
+enum cgx_evt_id {
+	CGX_EVT_NONE,
+	CGX_EVT_LINK_CHANGE,
+};
+
+/* event types - cause of interrupt */
+enum cgx_evt_type {
+	CGX_EVT_ASYNC,
+	CGX_EVT_CMD_RESP
+};
+
+enum cgx_stat {
+	CGX_STAT_SUCCESS,
+	CGX_STAT_FAIL
+};
+
+enum cgx_cmd_own {
+	CGX_CMD_OWN_NS,
+	CGX_CMD_OWN_FIRMWARE,
+};
+
+/* m - bit mask
+ * y - value to be written in the bitrange
+ * x - input value whose bitrange to be modified
+ */
+#define FIELD_SET(m, y, x)		\
+	(((x) & ~(m)) |			\
+	FIELD_PREP((m), (y)))
+
+/* scratchx(0) CSR used for ATF->non-secure SW communication.
+ * This acts as the status register
+ * Provides details on command ack/status, command response, error details
+ */
+#define EVTREG_ACK		BIT_ULL(0)
+#define EVTREG_EVT_TYPE		BIT_ULL(1)
+#define EVTREG_STAT		BIT_ULL(2)
+#define EVTREG_ID		GENMASK_ULL(8, 3)
+
+/* Response to command IDs with command status as CGX_STAT_FAIL
+ *
+ * Not applicable for commands :
+ * CGX_CMD_LINK_BRING_UP/DOWN/CGX_EVT_LINK_CHANGE
+ */
+#define EVTREG_ERRTYPE		GENMASK_ULL(18, 9)
+
+/* Response to cmd ID as CGX_CMD_GET_FW_VER with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MAJOR_VER		GENMASK_ULL(12, 9)
+#define RESP_MINOR_VER		GENMASK_ULL(16, 13)
+
+/* Response to cmd ID as CGX_CMD_GET_MAC_ADDR with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MAC_ADDR		GENMASK_ULL(56, 9)
+
+/* Response to cmd ID as CGX_CMD_GET_MKEX_PRFL_SIZE with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MKEX_PRFL_SIZE		GENMASK_ULL(63, 9)
+
+/* Response to cmd ID as CGX_CMD_GET_MKEX_PRFL_ADDR with cmd status as
+ * CGX_STAT_SUCCESS
+ */
+#define RESP_MKEX_PRFL_ADDR		GENMASK_ULL(63, 9)
+
+/* Response to cmd ID - CGX_CMD_LINK_BRING_UP/DOWN, event ID CGX_EVT_LINK_CHANGE
+ * status can be either CGX_STAT_FAIL or CGX_STAT_SUCCESS
+ *
+ * In case of CGX_STAT_FAIL, it indicates CGX configuration failed
+ * when processing link up/down/change command.
+ * Both err_type and current link status will be updated
+ *
+ * In case of CGX_STAT_SUCCESS, err_type will be CGX_ERR_NONE and current
+ * link status will be updated
+ */
+struct cgx_lnk_sts {
+	uint64_t reserved1:9;
+	uint64_t link_up:1;
+	uint64_t full_duplex:1;
+	uint64_t speed:4;		/* cgx_link_speed */
+	uint64_t err_type:10;
+	uint64_t reserved2:39;
+};
+
+#define RESP_LINKSTAT_UP		GENMASK_ULL(9, 9)
+#define RESP_LINKSTAT_FDUPLEX		GENMASK_ULL(10, 10)
+#define RESP_LINKSTAT_SPEED		GENMASK_ULL(14, 11)
+#define RESP_LINKSTAT_ERRTYPE		GENMASK_ULL(24, 15)
+
+/* scratchx(1) CSR used for non-secure SW->ATF communication
+ * This CSR acts as a command register
+ */
+#define CMDREG_OWN	BIT_ULL(0)
+#define CMDREG_ID	GENMASK_ULL(7, 2)
+
+/* Any command using enable/disable as an argument need
+ * to set this bitfield.
+ * Ex: Loopback, HiGig...
+ */
+#define CMDREG_ENABLE	BIT_ULL(8)
+
+/* command argument to be passed for cmd ID - CGX_CMD_SET_MTU */
+#define CMDMTU_SIZE	GENMASK_ULL(23, 8)
+
+/* command argument to be passed for cmd ID - CGX_CMD_LINK_CHANGE */
+#define CMDLINKCHANGE_LINKUP	BIT_ULL(8)
+#define CMDLINKCHANGE_FULLDPLX	BIT_ULL(9)
+#define CMDLINKCHANGE_SPEED	GENMASK_ULL(13, 10)
+
+#endif /* __CGX_FW_INTF_H__ */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
new file mode 100644
index 0000000..413c3f2
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h

@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "rvu_struct.h"
+
+#define OTX2_ALIGN			128  /* Align to cacheline */
+
+#define Q_SIZE_16		0ULL /* 16 entries */
+#define Q_SIZE_64		1ULL /* 64 entries */
+#define Q_SIZE_256		2ULL
+#define Q_SIZE_1K		3ULL
+#define Q_SIZE_4K		4ULL
+#define Q_SIZE_16K		5ULL
+#define Q_SIZE_64K		6ULL
+#define Q_SIZE_256K		7ULL
+#define Q_SIZE_1M		8ULL /* Million entries */
+#define Q_SIZE_MIN		Q_SIZE_16
+#define Q_SIZE_MAX		Q_SIZE_1M
+
+#define Q_COUNT(x)		(16ULL << (2 * x))
+#define Q_SIZE(x, n)		((ilog2(x) - (n)) / 2)
+
+/* Admin queue info */
+
+/* Since we intend to add only one instruction at a time,
+ * keep queue size to it's minimum.
+ */
+#define AQ_SIZE			Q_SIZE_16
+/* HW head & tail pointer mask */
+#define AQ_PTR_MASK		0xFFFFF
+
+struct qmem {
+	void            *base;
+	dma_addr_t	iova;
+	int		alloc_sz;
+	u8		entry_sz;
+	u8		align;
+	u32		qsize;
+};
+
+static inline int qmem_alloc(struct device *dev, struct qmem **q,
+			     int qsize, int entry_sz)
+{
+	struct qmem *qmem;
+	int aligned_addr;
+
+	if (!qsize)
+		return -EINVAL;
+
+	*q = devm_kzalloc(dev, sizeof(*qmem), GFP_KERNEL);
+	if (!*q)
+		return -ENOMEM;
+	qmem = *q;
+
+	qmem->entry_sz = entry_sz;
+	qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN;
+	qmem->base = dma_alloc_coherent(dev, qmem->alloc_sz,
+					 &qmem->iova, GFP_KERNEL);
+	if (!qmem->base)
+		return -ENOMEM;
+
+	qmem->qsize = qsize;
+
+	aligned_addr = ALIGN((u64)qmem->iova, OTX2_ALIGN);
+	qmem->align = (aligned_addr - qmem->iova);
+	qmem->base += qmem->align;
+	qmem->iova += qmem->align;
+	return 0;
+}
+
+static inline void qmem_free(struct device *dev, struct qmem *qmem)
+{
+	if (!qmem)
+		return;
+
+	if (qmem->base)
+		dma_free_coherent(dev, qmem->alloc_sz,
+				  qmem->base - qmem->align,
+				  qmem->iova - qmem->align);
+	devm_kfree(dev, qmem);
+}
+
+struct admin_queue {
+	struct qmem	*inst;
+	struct qmem	*res;
+	spinlock_t	lock; /* Serialize inst enqueue from PFs */
+};
+
+/* NPA aura count */
+enum npa_aura_sz {
+	NPA_AURA_SZ_0,
+	NPA_AURA_SZ_128,
+	NPA_AURA_SZ_256,
+	NPA_AURA_SZ_512,
+	NPA_AURA_SZ_1K,
+	NPA_AURA_SZ_2K,
+	NPA_AURA_SZ_4K,
+	NPA_AURA_SZ_8K,
+	NPA_AURA_SZ_16K,
+	NPA_AURA_SZ_32K,
+	NPA_AURA_SZ_64K,
+	NPA_AURA_SZ_128K,
+	NPA_AURA_SZ_256K,
+	NPA_AURA_SZ_512K,
+	NPA_AURA_SZ_1M,
+	NPA_AURA_SZ_MAX,
+};
+
+#define NPA_AURA_COUNT(x)	(1ULL << ((x) + 6))
+
+/* NPA AQ result structure for init/read/write of aura HW contexts */
+struct npa_aq_aura_res {
+	struct	npa_aq_res_s	res;
+	struct	npa_aura_s	aura_ctx;
+	struct	npa_aura_s	ctx_mask;
+};
+
+/* NPA AQ result structure for init/read/write of pool HW contexts */
+struct npa_aq_pool_res {
+	struct	npa_aq_res_s	res;
+	struct	npa_pool_s	pool_ctx;
+	struct	npa_pool_s	ctx_mask;
+};
+
+/* NIX Transmit schedulers */
+enum nix_scheduler {
+	NIX_TXSCH_LVL_SMQ = 0x0,
+	NIX_TXSCH_LVL_MDQ = 0x0,
+	NIX_TXSCH_LVL_TL4 = 0x1,
+	NIX_TXSCH_LVL_TL3 = 0x2,
+	NIX_TXSCH_LVL_TL2 = 0x3,
+	NIX_TXSCH_LVL_TL1 = 0x4,
+	NIX_TXSCH_LVL_CNT = 0x5,
+};
+
+#define TXSCH_TL1_DFLT_RR_QTM      ((1 << 24) - 1)
+#define TXSCH_TL1_DFLT_RR_PRIO     (0x1ull)
+
+/* Min/Max packet sizes, excluding FCS */
+#define	NIC_HW_MIN_FRS			40
+#define	NIC_HW_MAX_FRS			9212
+#define	SDP_HW_MAX_FRS			65535
+
+/* NIX RX action operation*/
+#define NIX_RX_ACTIONOP_DROP		(0x0ull)
+#define NIX_RX_ACTIONOP_UCAST		(0x1ull)
+#define NIX_RX_ACTIONOP_UCAST_IPSEC	(0x2ull)
+#define NIX_RX_ACTIONOP_MCAST		(0x3ull)
+#define NIX_RX_ACTIONOP_RSS		(0x4ull)
+
+/* NIX TX action operation*/
+#define NIX_TX_ACTIONOP_DROP		(0x0ull)
+#define NIX_TX_ACTIONOP_UCAST_DEFAULT	(0x1ull)
+#define NIX_TX_ACTIONOP_UCAST_CHAN	(0x2ull)
+#define NIX_TX_ACTIONOP_MCAST		(0x3ull)
+#define NIX_TX_ACTIONOP_DROP_VIOL	(0x5ull)
+
+#define NPC_MCAM_KEY_X1			0
+#define NPC_MCAM_KEY_X2			1
+#define NPC_MCAM_KEY_X4			2
+
+#define NIX_INTF_RX			0
+#define NIX_INTF_TX			1
+
+#define NIX_INTF_TYPE_CGX		0
+#define NIX_INTF_TYPE_LBK		1
+
+#define MAX_LMAC_PKIND			12
+#define NIX_LINK_CGX_LMAC(a, b)		(0 + 4 * (a) + (b))
+#define NIX_LINK_LBK(a)			(12 + (a))
+#define NIX_CHAN_CGX_LMAC_CHX(a, b, c)	(0x800 + 0x100 * (a) + 0x10 * (b) + (c))
+#define NIX_CHAN_LBK_CHX(a, b)		(0 + 0x100 * (a) + (b))
+
+/* NIX LSO format indices.
+ * As of now TSO is the only one using, so statically assigning indices.
+ */
+#define NIX_LSO_FORMAT_IDX_TSOV4	0
+#define NIX_LSO_FORMAT_IDX_TSOV6	1
+
+/* RSS info */
+#define MAX_RSS_GROUPS			8
+/* Group 0 has to be used in default pkt forwarding MCAM entries
+ * reserved for NIXLFs. Groups 1-7 can be used for RSS for ntuple
+ * filters.
+ */
+#define DEFAULT_RSS_CONTEXT_GROUP	0
+#define MAX_RSS_INDIR_TBL_SIZE		256 /* 1 << Max adder bits */
+
+#endif /* COMMON_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
new file mode 100644
index 0000000..d6f9ed8
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c

@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "rvu_reg.h"
+#include "mbox.h"
+
+static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+
+void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	struct mbox_hdr *tx_hdr, *rx_hdr;
+
+	tx_hdr = mdev->mbase + mbox->tx_start;
+	rx_hdr = mdev->mbase + mbox->rx_start;
+
+	spin_lock(&mdev->mbox_lock);
+	mdev->msg_size = 0;
+	mdev->rsp_size = 0;
+	tx_hdr->num_msgs = 0;
+	rx_hdr->num_msgs = 0;
+	spin_unlock(&mdev->mbox_lock);
+}
+EXPORT_SYMBOL(otx2_mbox_reset);
+
+void otx2_mbox_destroy(struct otx2_mbox *mbox)
+{
+	mbox->reg_base = NULL;
+	mbox->hwbase = NULL;
+
+	kfree(mbox->dev);
+	mbox->dev = NULL;
+}
+EXPORT_SYMBOL(otx2_mbox_destroy);
+
+int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev,
+		   void *reg_base, int direction, int ndevs)
+{
+	struct otx2_mbox_dev *mdev;
+	int devid;
+
+	switch (direction) {
+	case MBOX_DIR_AFPF:
+	case MBOX_DIR_PFVF:
+		mbox->tx_start = MBOX_DOWN_TX_START;
+		mbox->rx_start = MBOX_DOWN_RX_START;
+		mbox->tx_size  = MBOX_DOWN_TX_SIZE;
+		mbox->rx_size  = MBOX_DOWN_RX_SIZE;
+		break;
+	case MBOX_DIR_PFAF:
+	case MBOX_DIR_VFPF:
+		mbox->tx_start = MBOX_DOWN_RX_START;
+		mbox->rx_start = MBOX_DOWN_TX_START;
+		mbox->tx_size  = MBOX_DOWN_RX_SIZE;
+		mbox->rx_size  = MBOX_DOWN_TX_SIZE;
+		break;
+	case MBOX_DIR_AFPF_UP:
+	case MBOX_DIR_PFVF_UP:
+		mbox->tx_start = MBOX_UP_TX_START;
+		mbox->rx_start = MBOX_UP_RX_START;
+		mbox->tx_size  = MBOX_UP_TX_SIZE;
+		mbox->rx_size  = MBOX_UP_RX_SIZE;
+		break;
+	case MBOX_DIR_PFAF_UP:
+	case MBOX_DIR_VFPF_UP:
+		mbox->tx_start = MBOX_UP_RX_START;
+		mbox->rx_start = MBOX_UP_TX_START;
+		mbox->tx_size  = MBOX_UP_RX_SIZE;
+		mbox->rx_size  = MBOX_UP_TX_SIZE;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	switch (direction) {
+	case MBOX_DIR_AFPF:
+	case MBOX_DIR_AFPF_UP:
+		mbox->trigger = RVU_AF_AFPF_MBOX0;
+		mbox->tr_shift = 4;
+		break;
+	case MBOX_DIR_PFAF:
+	case MBOX_DIR_PFAF_UP:
+		mbox->trigger = RVU_PF_PFAF_MBOX1;
+		mbox->tr_shift = 0;
+		break;
+	case MBOX_DIR_PFVF:
+	case MBOX_DIR_PFVF_UP:
+		mbox->trigger = RVU_PF_VFX_PFVF_MBOX0;
+		mbox->tr_shift = 12;
+		break;
+	case MBOX_DIR_VFPF:
+	case MBOX_DIR_VFPF_UP:
+		mbox->trigger = RVU_VF_VFPF_MBOX1;
+		mbox->tr_shift = 0;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	mbox->reg_base = reg_base;
+	mbox->hwbase = hwbase;
+	mbox->pdev = pdev;
+
+	mbox->dev = kcalloc(ndevs, sizeof(struct otx2_mbox_dev), GFP_KERNEL);
+	if (!mbox->dev) {
+		otx2_mbox_destroy(mbox);
+		return -ENOMEM;
+	}
+
+	mbox->ndevs = ndevs;
+	for (devid = 0; devid < ndevs; devid++) {
+		mdev = &mbox->dev[devid];
+		mdev->mbase = mbox->hwbase + (devid * MBOX_SIZE);
+		spin_lock_init(&mdev->mbox_lock);
+		/* Init header to reset value */
+		otx2_mbox_reset(mbox, devid);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(otx2_mbox_init);
+
+int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	int timeout = 0, sleep = 1;
+
+	while (mdev->num_msgs != mdev->msgs_acked) {
+		msleep(sleep);
+		timeout += sleep;
+		if (timeout >= MBOX_RSP_TIMEOUT)
+			return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(otx2_mbox_wait_for_rsp);
+
+int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	unsigned long timeout = jiffies + 1 * HZ;
+
+	while (!time_after(jiffies, timeout)) {
+		if (mdev->num_msgs == mdev->msgs_acked)
+			return 0;
+		cpu_relax();
+	}
+	return -EIO;
+}
+EXPORT_SYMBOL(otx2_mbox_busy_poll_for_rsp);
+
+void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	struct mbox_hdr *tx_hdr, *rx_hdr;
+
+	tx_hdr = mdev->mbase + mbox->tx_start;
+	rx_hdr = mdev->mbase + mbox->rx_start;
+
+	spin_lock(&mdev->mbox_lock);
+	/* Reset header for next messages */
+	mdev->msg_size = 0;
+	mdev->rsp_size = 0;
+	mdev->msgs_acked = 0;
+
+	/* Sync mbox data into memory */
+	smp_wmb();
+
+	/* num_msgs != 0 signals to the peer that the buffer has a number of
+	 * messages.  So this should be written after writing all the messages
+	 * to the shared memory.
+	 */
+	tx_hdr->num_msgs = mdev->num_msgs;
+	rx_hdr->num_msgs = 0;
+	spin_unlock(&mdev->mbox_lock);
+
+	/* The interrupt should be fired after num_msgs is written
+	 * to the shared memory
+	 */
+	writeq(1, (void __iomem *)mbox->reg_base +
+	       (mbox->trigger | (devid << mbox->tr_shift)));
+}
+EXPORT_SYMBOL(otx2_mbox_msg_send);
+
+struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid,
+					    int size, int size_rsp)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	struct mbox_msghdr *msghdr = NULL;
+
+	spin_lock(&mdev->mbox_lock);
+	size = ALIGN(size, MBOX_MSG_ALIGN);
+	size_rsp = ALIGN(size_rsp, MBOX_MSG_ALIGN);
+	/* Check if there is space in mailbox */
+	if ((mdev->msg_size + size) > mbox->tx_size - msgs_offset)
+		goto exit;
+	if ((mdev->rsp_size + size_rsp) > mbox->rx_size - msgs_offset)
+		goto exit;
+
+	if (mdev->msg_size == 0)
+		mdev->num_msgs = 0;
+	mdev->num_msgs++;
+
+	msghdr = mdev->mbase + mbox->tx_start + msgs_offset + mdev->msg_size;
+
+	/* Clear the whole msg region */
+	memset(msghdr, 0, sizeof(*msghdr) + size);
+	/* Init message header with reset values */
+	msghdr->ver = OTX2_MBOX_VERSION;
+	mdev->msg_size += size;
+	mdev->rsp_size += size_rsp;
+	msghdr->next_msgoff = mdev->msg_size + msgs_offset;
+exit:
+	spin_unlock(&mdev->mbox_lock);
+
+	return msghdr;
+}
+EXPORT_SYMBOL(otx2_mbox_alloc_msg_rsp);
+
+struct mbox_msghdr *otx2_mbox_get_rsp(struct otx2_mbox *mbox, int devid,
+				      struct mbox_msghdr *msg)
+{
+	unsigned long imsg = mbox->tx_start + msgs_offset;
+	unsigned long irsp = mbox->rx_start + msgs_offset;
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	u16 msgs;
+
+	if (mdev->num_msgs != mdev->msgs_acked)
+		return ERR_PTR(-ENODEV);
+
+	for (msgs = 0; msgs < mdev->msgs_acked; msgs++) {
+		struct mbox_msghdr *pmsg = mdev->mbase + imsg;
+		struct mbox_msghdr *prsp = mdev->mbase + irsp;
+
+		if (msg == pmsg) {
+			if (pmsg->id != prsp->id)
+				return ERR_PTR(-ENODEV);
+			return prsp;
+		}
+
+		imsg = pmsg->next_msgoff;
+		irsp = prsp->next_msgoff;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL(otx2_mbox_get_rsp);
+
+int
+otx2_reply_invalid_msg(struct otx2_mbox *mbox, int devid, u16 pcifunc, u16 id)
+{
+	struct msg_rsp *rsp;
+
+	rsp = (struct msg_rsp *)
+	       otx2_mbox_alloc_msg(mbox, devid, sizeof(*rsp));
+	if (!rsp)
+		return -ENOMEM;
+	rsp->hdr.id = id;
+	rsp->hdr.sig = OTX2_MBOX_RSP_SIG;
+	rsp->hdr.rc = MBOX_MSG_INVALID;
+	rsp->hdr.pcifunc = pcifunc;
+	return 0;
+}
+EXPORT_SYMBOL(otx2_reply_invalid_msg);
+
+bool otx2_mbox_nonempty(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	bool ret;
+
+	spin_lock(&mdev->mbox_lock);
+	ret = mdev->num_msgs != 0;
+	spin_unlock(&mdev->mbox_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(otx2_mbox_nonempty);
+
+const char *otx2_mbox_id2name(u16 id)
+{
+	switch (id) {
+#define M(_name, _id, _1, _2, _3) case _id: return # _name;
+	MBOX_MESSAGES
+#undef M
+	default:
+		return "INVALID ID";
+	}
+}
+EXPORT_SYMBOL(otx2_mbox_id2name);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
new file mode 100644
index 0000000..75439fc
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h

@@ -0,0 +1,795 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MBOX_H
+#define MBOX_H
+
+#include <linux/etherdevice.h>
+#include <linux/sizes.h>
+
+#include "rvu_struct.h"
+#include "common.h"
+
+#define MBOX_SIZE		SZ_64K
+
+/* AF/PF: PF initiated, PF/VF VF initiated */
+#define MBOX_DOWN_RX_START	0
+#define MBOX_DOWN_RX_SIZE	(46 * SZ_1K)
+#define MBOX_DOWN_TX_START	(MBOX_DOWN_RX_START + MBOX_DOWN_RX_SIZE)
+#define MBOX_DOWN_TX_SIZE	(16 * SZ_1K)
+/* AF/PF: AF initiated, PF/VF PF initiated */
+#define MBOX_UP_RX_START	(MBOX_DOWN_TX_START + MBOX_DOWN_TX_SIZE)
+#define MBOX_UP_RX_SIZE		SZ_1K
+#define MBOX_UP_TX_START	(MBOX_UP_RX_START + MBOX_UP_RX_SIZE)
+#define MBOX_UP_TX_SIZE		SZ_1K
+
+#if MBOX_UP_TX_SIZE + MBOX_UP_TX_START != MBOX_SIZE
+# error "incorrect mailbox area sizes"
+#endif
+
+#define INTR_MASK(pfvfs) ((pfvfs < 64) ? (BIT_ULL(pfvfs) - 1) : (~0ull))
+
+#define MBOX_RSP_TIMEOUT	1000 /* in ms, Time to wait for mbox response */
+
+#define MBOX_MSG_ALIGN		16  /* Align mbox msg start to 16bytes */
+
+/* Mailbox directions */
+#define MBOX_DIR_AFPF		0  /* AF replies to PF */
+#define MBOX_DIR_PFAF		1  /* PF sends messages to AF */
+#define MBOX_DIR_PFVF		2  /* PF replies to VF */
+#define MBOX_DIR_VFPF		3  /* VF sends messages to PF */
+#define MBOX_DIR_AFPF_UP	4  /* AF sends messages to PF */
+#define MBOX_DIR_PFAF_UP	5  /* PF replies to AF */
+#define MBOX_DIR_PFVF_UP	6  /* PF sends messages to VF */
+#define MBOX_DIR_VFPF_UP	7  /* VF replies to PF */
+
+struct otx2_mbox_dev {
+	void	    *mbase;   /* This dev's mbox region */
+	spinlock_t  mbox_lock;
+	u16         msg_size; /* Total msg size to be sent */
+	u16         rsp_size; /* Total rsp size to be sure the reply is ok */
+	u16         num_msgs; /* No of msgs sent or waiting for response */
+	u16         msgs_acked; /* No of msgs for which response is received */
+};
+
+struct otx2_mbox {
+	struct pci_dev *pdev;
+	void   *hwbase;  /* Mbox region advertised by HW */
+	void   *reg_base;/* CSR base for this dev */
+	u64    trigger;  /* Trigger mbox notification */
+	u16    tr_shift; /* Mbox trigger shift */
+	u64    rx_start; /* Offset of Rx region in mbox memory */
+	u64    tx_start; /* Offset of Tx region in mbox memory */
+	u16    rx_size;  /* Size of Rx region */
+	u16    tx_size;  /* Size of Tx region */
+	u16    ndevs;    /* The number of peers */
+	struct otx2_mbox_dev *dev;
+};
+
+/* Header which preceeds all mbox messages */
+struct mbox_hdr {
+	u16  num_msgs;   /* No of msgs embedded */
+};
+
+/* Header which preceeds every msg and is also part of it */
+struct mbox_msghdr {
+	u16 pcifunc;     /* Who's sending this msg */
+	u16 id;          /* Mbox message ID */
+#define OTX2_MBOX_REQ_SIG (0xdead)
+#define OTX2_MBOX_RSP_SIG (0xbeef)
+	u16 sig;         /* Signature, for validating corrupted msgs */
+#define OTX2_MBOX_VERSION (0x0001)
+	u16 ver;         /* Version of msg's structure for this ID */
+	u16 next_msgoff; /* Offset of next msg within mailbox region */
+	int rc;          /* Msg process'ed response code */
+};
+
+void otx2_mbox_reset(struct otx2_mbox *mbox, int devid);
+void otx2_mbox_destroy(struct otx2_mbox *mbox);
+int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase,
+		   struct pci_dev *pdev, void __force *reg_base,
+		   int direction, int ndevs);
+void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid);
+int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid);
+int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid);
+struct mbox_msghdr *otx2_mbox_alloc_msg_rsp(struct otx2_mbox *mbox, int devid,
+					    int size, int size_rsp);
+struct mbox_msghdr *otx2_mbox_get_rsp(struct otx2_mbox *mbox, int devid,
+				      struct mbox_msghdr *msg);
+int otx2_reply_invalid_msg(struct otx2_mbox *mbox, int devid,
+			   u16 pcifunc, u16 id);
+bool otx2_mbox_nonempty(struct otx2_mbox *mbox, int devid);
+const char *otx2_mbox_id2name(u16 id);
+static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox,
+						      int devid, int size)
+{
+	return otx2_mbox_alloc_msg_rsp(mbox, devid, size, 0);
+}
+
+/* Mailbox message types */
+#define MBOX_MSG_MASK				0xFFFF
+#define MBOX_MSG_INVALID			0xFFFE
+#define MBOX_MSG_MAX				0xFFFF
+
+#define MBOX_MESSAGES							\
+/* Generic mbox IDs (range 0x000 - 0x1FF) */				\
+M(READY,		0x001, ready, msg_req, ready_msg_rsp)		\
+M(ATTACH_RESOURCES,	0x002, attach_resources, rsrc_attach, msg_rsp)	\
+M(DETACH_RESOURCES,	0x003, detach_resources, rsrc_detach, msg_rsp)	\
+M(MSIX_OFFSET,		0x004, msix_offset, msg_req, msix_offset_rsp)	\
+M(VF_FLR,		0x006, vf_flr, msg_req, msg_rsp)		\
+/* CGX mbox IDs (range 0x200 - 0x3FF) */				\
+M(CGX_START_RXTX,	0x200, cgx_start_rxtx, msg_req, msg_rsp)	\
+M(CGX_STOP_RXTX,	0x201, cgx_stop_rxtx, msg_req, msg_rsp)		\
+M(CGX_STATS,		0x202, cgx_stats, msg_req, cgx_stats_rsp)	\
+M(CGX_MAC_ADDR_SET,	0x203, cgx_mac_addr_set, cgx_mac_addr_set_or_get,    \
+				cgx_mac_addr_set_or_get)		\
+M(CGX_MAC_ADDR_GET,	0x204, cgx_mac_addr_get, cgx_mac_addr_set_or_get,    \
+				cgx_mac_addr_set_or_get)		\
+M(CGX_PROMISC_ENABLE,	0x205, cgx_promisc_enable, msg_req, msg_rsp)	\
+M(CGX_PROMISC_DISABLE,	0x206, cgx_promisc_disable, msg_req, msg_rsp)	\
+M(CGX_START_LINKEVENTS, 0x207, cgx_start_linkevents, msg_req, msg_rsp)	\
+M(CGX_STOP_LINKEVENTS,	0x208, cgx_stop_linkevents, msg_req, msg_rsp)	\
+M(CGX_GET_LINKINFO,	0x209, cgx_get_linkinfo, msg_req, cgx_link_info_msg) \
+M(CGX_INTLBK_ENABLE,	0x20A, cgx_intlbk_enable, msg_req, msg_rsp)	\
+M(CGX_INTLBK_DISABLE,	0x20B, cgx_intlbk_disable, msg_req, msg_rsp)	\
+/* NPA mbox IDs (range 0x400 - 0x5FF) */				\
+M(NPA_LF_ALLOC,		0x400, npa_lf_alloc,				\
+				npa_lf_alloc_req, npa_lf_alloc_rsp)	\
+M(NPA_LF_FREE,		0x401, npa_lf_free, msg_req, msg_rsp)		\
+M(NPA_AQ_ENQ,		0x402, npa_aq_enq, npa_aq_enq_req, npa_aq_enq_rsp)   \
+M(NPA_HWCTX_DISABLE,	0x403, npa_hwctx_disable, hwctx_disable_req, msg_rsp)\
+/* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */				\
+/* TIM mbox IDs (range 0x800 - 0x9FF) */				\
+/* CPT mbox IDs (range 0xA00 - 0xBFF) */				\
+/* NPC mbox IDs (range 0x6000 - 0x7FFF) */				\
+M(NPC_MCAM_ALLOC_ENTRY,	0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\
+				npc_mcam_alloc_entry_rsp)		\
+M(NPC_MCAM_FREE_ENTRY,	0x6001, npc_mcam_free_entry,			\
+				 npc_mcam_free_entry_req, msg_rsp)	\
+M(NPC_MCAM_WRITE_ENTRY,	0x6002, npc_mcam_write_entry,			\
+				 npc_mcam_write_entry_req, msg_rsp)	\
+M(NPC_MCAM_ENA_ENTRY,   0x6003, npc_mcam_ena_entry,			\
+				 npc_mcam_ena_dis_entry_req, msg_rsp)	\
+M(NPC_MCAM_DIS_ENTRY,   0x6004, npc_mcam_dis_entry,			\
+				 npc_mcam_ena_dis_entry_req, msg_rsp)	\
+M(NPC_MCAM_SHIFT_ENTRY, 0x6005, npc_mcam_shift_entry, npc_mcam_shift_entry_req,\
+				npc_mcam_shift_entry_rsp)		\
+M(NPC_MCAM_ALLOC_COUNTER, 0x6006, npc_mcam_alloc_counter,		\
+					npc_mcam_alloc_counter_req,	\
+					npc_mcam_alloc_counter_rsp)	\
+M(NPC_MCAM_FREE_COUNTER,  0x6007, npc_mcam_free_counter,		\
+				    npc_mcam_oper_counter_req, msg_rsp)	\
+M(NPC_MCAM_UNMAP_COUNTER, 0x6008, npc_mcam_unmap_counter,		\
+				   npc_mcam_unmap_counter_req, msg_rsp)	\
+M(NPC_MCAM_CLEAR_COUNTER, 0x6009, npc_mcam_clear_counter,		\
+				   npc_mcam_oper_counter_req, msg_rsp)	\
+M(NPC_MCAM_COUNTER_STATS, 0x600a, npc_mcam_counter_stats,		\
+				   npc_mcam_oper_counter_req,		\
+				   npc_mcam_oper_counter_rsp)		\
+M(NPC_MCAM_ALLOC_AND_WRITE_ENTRY, 0x600b, npc_mcam_alloc_and_write_entry,      \
+					  npc_mcam_alloc_and_write_entry_req,  \
+					  npc_mcam_alloc_and_write_entry_rsp)  \
+M(NPC_GET_KEX_CFG,	  0x600c, npc_get_kex_cfg,			\
+				   msg_req, npc_get_kex_cfg_rsp)	\
+/* NIX mbox IDs (range 0x8000 - 0xFFFF) */				\
+M(NIX_LF_ALLOC,		0x8000, nix_lf_alloc,				\
+				 nix_lf_alloc_req, nix_lf_alloc_rsp)	\
+M(NIX_LF_FREE,		0x8001, nix_lf_free, msg_req, msg_rsp)		\
+M(NIX_AQ_ENQ,		0x8002, nix_aq_enq, nix_aq_enq_req, nix_aq_enq_rsp)  \
+M(NIX_HWCTX_DISABLE,	0x8003, nix_hwctx_disable,			\
+				 hwctx_disable_req, msg_rsp)		\
+M(NIX_TXSCH_ALLOC,	0x8004, nix_txsch_alloc,			\
+				 nix_txsch_alloc_req, nix_txsch_alloc_rsp)   \
+M(NIX_TXSCH_FREE,	0x8005, nix_txsch_free, nix_txsch_free_req, msg_rsp) \
+M(NIX_TXSCHQ_CFG,	0x8006, nix_txschq_cfg, nix_txschq_config, msg_rsp)  \
+M(NIX_STATS_RST,	0x8007, nix_stats_rst, msg_req, msg_rsp)	\
+M(NIX_VTAG_CFG,		0x8008, nix_vtag_cfg, nix_vtag_config, msg_rsp)	\
+M(NIX_RSS_FLOWKEY_CFG,  0x8009, nix_rss_flowkey_cfg,			\
+				 nix_rss_flowkey_cfg,			\
+				 nix_rss_flowkey_cfg_rsp)		\
+M(NIX_SET_MAC_ADDR,	0x800a, nix_set_mac_addr, nix_set_mac_addr, msg_rsp) \
+M(NIX_SET_RX_MODE,	0x800b, nix_set_rx_mode, nix_rx_mode, msg_rsp)	\
+M(NIX_SET_HW_FRS,	0x800c, nix_set_hw_frs, nix_frs_cfg, msg_rsp)	\
+M(NIX_LF_START_RX,	0x800d, nix_lf_start_rx, msg_req, msg_rsp)	\
+M(NIX_LF_STOP_RX,	0x800e, nix_lf_stop_rx, msg_req, msg_rsp)	\
+M(NIX_MARK_FORMAT_CFG,	0x800f, nix_mark_format_cfg,			\
+				 nix_mark_format_cfg,			\
+				 nix_mark_format_cfg_rsp)		\
+M(NIX_SET_RX_CFG,	0x8010, nix_set_rx_cfg, nix_rx_cfg, msg_rsp)	\
+M(NIX_LSO_FORMAT_CFG,	0x8011, nix_lso_format_cfg,			\
+				 nix_lso_format_cfg,			\
+				 nix_lso_format_cfg_rsp)		\
+M(NIX_RXVLAN_ALLOC,	0x8012, nix_rxvlan_alloc, msg_req, msg_rsp)
+
+/* Messages initiated by AF (range 0xC00 - 0xDFF) */
+#define MBOX_UP_CGX_MESSAGES						\
+M(CGX_LINK_EVENT,	0xC00, cgx_link_event, cgx_link_info_msg, msg_rsp)
+
+enum {
+#define M(_name, _id, _1, _2, _3) MBOX_MSG_ ## _name = _id,
+MBOX_MESSAGES
+MBOX_UP_CGX_MESSAGES
+#undef M
+};
+
+/* Mailbox message formats */
+
+#define RVU_DEFAULT_PF_FUNC     0xFFFF
+
+/* Generic request msg used for those mbox messages which
+ * don't send any data in the request.
+ */
+struct msg_req {
+	struct mbox_msghdr hdr;
+};
+
+/* Generic rsponse msg used a ack or response for those mbox
+ * messages which doesn't have a specific rsp msg format.
+ */
+struct msg_rsp {
+	struct mbox_msghdr hdr;
+};
+
+/* RVU mailbox error codes
+ * Range 256 - 300.
+ */
+enum rvu_af_status {
+	RVU_INVALID_VF_ID           = -256,
+};
+
+struct ready_msg_rsp {
+	struct mbox_msghdr hdr;
+	u16    sclk_feq;	/* SCLK frequency */
+};
+
+/* Structure for requesting resource provisioning.
+ * 'modify' flag to be used when either requesting more
+ * or to detach partial of a cetain resource type.
+ * Rest of the fields specify how many of what type to
+ * be attached.
+ */
+struct rsrc_attach {
+	struct mbox_msghdr hdr;
+	u8   modify:1;
+	u8   npalf:1;
+	u8   nixlf:1;
+	u16  sso;
+	u16  ssow;
+	u16  timlfs;
+	u16  cptlfs;
+};
+
+/* Structure for relinquishing resources.
+ * 'partial' flag to be used when relinquishing all resources
+ * but only of a certain type. If not set, all resources of all
+ * types provisioned to the RVU function will be detached.
+ */
+struct rsrc_detach {
+	struct mbox_msghdr hdr;
+	u8 partial:1;
+	u8 npalf:1;
+	u8 nixlf:1;
+	u8 sso:1;
+	u8 ssow:1;
+	u8 timlfs:1;
+	u8 cptlfs:1;
+};
+
+#define MSIX_VECTOR_INVALID	0xFFFF
+#define MAX_RVU_BLKLF_CNT	256
+
+struct msix_offset_rsp {
+	struct mbox_msghdr hdr;
+	u16  npa_msixoff;
+	u16  nix_msixoff;
+	u8   sso;
+	u8   ssow;
+	u8   timlfs;
+	u8   cptlfs;
+	u16  sso_msixoff[MAX_RVU_BLKLF_CNT];
+	u16  ssow_msixoff[MAX_RVU_BLKLF_CNT];
+	u16  timlf_msixoff[MAX_RVU_BLKLF_CNT];
+	u16  cptlf_msixoff[MAX_RVU_BLKLF_CNT];
+};
+
+/* CGX mbox message formats */
+
+struct cgx_stats_rsp {
+	struct mbox_msghdr hdr;
+#define CGX_RX_STATS_COUNT	13
+#define CGX_TX_STATS_COUNT	18
+	u64 rx_stats[CGX_RX_STATS_COUNT];
+	u64 tx_stats[CGX_TX_STATS_COUNT];
+};
+
+/* Structure for requesting the operation for
+ * setting/getting mac address in the CGX interface
+ */
+struct cgx_mac_addr_set_or_get {
+	struct mbox_msghdr hdr;
+	u8 mac_addr[ETH_ALEN];
+};
+
+struct cgx_link_user_info {
+	uint64_t link_up:1;
+	uint64_t full_duplex:1;
+	uint64_t lmac_type_id:4;
+	uint64_t speed:20; /* speed in Mbps */
+#define LMACTYPE_STR_LEN 16
+	char lmac_type[LMACTYPE_STR_LEN];
+};
+
+struct cgx_link_info_msg {
+	struct mbox_msghdr hdr;
+	struct cgx_link_user_info link_info;
+};
+
+/* NPA mbox message formats */
+
+/* NPA mailbox error codes
+ * Range 301 - 400.
+ */
+enum npa_af_status {
+	NPA_AF_ERR_PARAM            = -301,
+	NPA_AF_ERR_AQ_FULL          = -302,
+	NPA_AF_ERR_AQ_ENQUEUE       = -303,
+	NPA_AF_ERR_AF_LF_INVALID    = -304,
+	NPA_AF_ERR_AF_LF_ALLOC      = -305,
+	NPA_AF_ERR_LF_RESET         = -306,
+};
+
+/* For NPA LF context alloc and init */
+struct npa_lf_alloc_req {
+	struct mbox_msghdr hdr;
+	int node;
+	int aura_sz;  /* No of auras */
+	u32 nr_pools; /* No of pools */
+};
+
+struct npa_lf_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u32 stack_pg_ptrs;  /* No of ptrs per stack page */
+	u32 stack_pg_bytes; /* Size of stack page */
+	u16 qints; /* NPA_AF_CONST::QINTS */
+};
+
+/* NPA AQ enqueue msg */
+struct npa_aq_enq_req {
+	struct mbox_msghdr hdr;
+	u32 aura_id;
+	u8 ctype;
+	u8 op;
+	union {
+		/* Valid when op == WRITE/INIT and ctype == AURA.
+		 * LF fills the pool_id in aura.pool_addr. AF will translate
+		 * the pool_id to pool context pointer.
+		 */
+		struct npa_aura_s aura;
+		/* Valid when op == WRITE/INIT and ctype == POOL */
+		struct npa_pool_s pool;
+	};
+	/* Mask data when op == WRITE (1=write, 0=don't write) */
+	union {
+		/* Valid when op == WRITE and ctype == AURA */
+		struct npa_aura_s aura_mask;
+		/* Valid when op == WRITE and ctype == POOL */
+		struct npa_pool_s pool_mask;
+	};
+};
+
+struct npa_aq_enq_rsp {
+	struct mbox_msghdr hdr;
+	union {
+		/* Valid when op == READ and ctype == AURA */
+		struct npa_aura_s aura;
+		/* Valid when op == READ and ctype == POOL */
+		struct npa_pool_s pool;
+	};
+};
+
+/* Disable all contexts of type 'ctype' */
+struct hwctx_disable_req {
+	struct mbox_msghdr hdr;
+	u8 ctype;
+};
+
+/* NIX mbox message formats */
+
+/* NIX mailbox error codes
+ * Range 401 - 500.
+ */
+enum nix_af_status {
+	NIX_AF_ERR_PARAM            = -401,
+	NIX_AF_ERR_AQ_FULL          = -402,
+	NIX_AF_ERR_AQ_ENQUEUE       = -403,
+	NIX_AF_ERR_AF_LF_INVALID    = -404,
+	NIX_AF_ERR_AF_LF_ALLOC      = -405,
+	NIX_AF_ERR_TLX_ALLOC_FAIL   = -406,
+	NIX_AF_ERR_TLX_INVALID      = -407,
+	NIX_AF_ERR_RSS_SIZE_INVALID = -408,
+	NIX_AF_ERR_RSS_GRPS_INVALID = -409,
+	NIX_AF_ERR_FRS_INVALID      = -410,
+	NIX_AF_ERR_RX_LINK_INVALID  = -411,
+	NIX_AF_INVAL_TXSCHQ_CFG     = -412,
+	NIX_AF_SMQ_FLUSH_FAILED     = -413,
+	NIX_AF_ERR_LF_RESET         = -414,
+	NIX_AF_ERR_RSS_NOSPC_FIELD  = -415,
+	NIX_AF_ERR_RSS_NOSPC_ALGO   = -416,
+	NIX_AF_ERR_MARK_CFG_FAIL    = -417,
+	NIX_AF_ERR_LSO_CFG_FAIL     = -418,
+	NIX_AF_INVAL_NPA_PF_FUNC    = -419,
+	NIX_AF_INVAL_SSO_PF_FUNC    = -420,
+};
+
+/* For NIX LF context alloc and init */
+struct nix_lf_alloc_req {
+	struct mbox_msghdr hdr;
+	int node;
+	u32 rq_cnt;   /* No of receive queues */
+	u32 sq_cnt;   /* No of send queues */
+	u32 cq_cnt;   /* No of completion queues */
+	u8  xqe_sz;
+	u16 rss_sz;
+	u8  rss_grps;
+	u16 npa_func;
+	u16 sso_func;
+	u64 rx_cfg;   /* See NIX_AF_LF(0..127)_RX_CFG */
+};
+
+struct nix_lf_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u16	sqb_size;
+	u16	rx_chan_base;
+	u16	tx_chan_base;
+	u8      rx_chan_cnt; /* total number of RX channels */
+	u8      tx_chan_cnt; /* total number of TX channels */
+	u8	lso_tsov4_idx;
+	u8	lso_tsov6_idx;
+	u8      mac_addr[ETH_ALEN];
+	u8	lf_rx_stats; /* NIX_AF_CONST1::LF_RX_STATS */
+	u8	lf_tx_stats; /* NIX_AF_CONST1::LF_TX_STATS */
+	u16	cints; /* NIX_AF_CONST2::CINTS */
+	u16	qints; /* NIX_AF_CONST2::QINTS */
+};
+
+/* NIX AQ enqueue msg */
+struct nix_aq_enq_req {
+	struct mbox_msghdr hdr;
+	u32  qidx;
+	u8 ctype;
+	u8 op;
+	union {
+		struct nix_rq_ctx_s rq;
+		struct nix_sq_ctx_s sq;
+		struct nix_cq_ctx_s cq;
+		struct nix_rsse_s   rss;
+		struct nix_rx_mce_s mce;
+	};
+	union {
+		struct nix_rq_ctx_s rq_mask;
+		struct nix_sq_ctx_s sq_mask;
+		struct nix_cq_ctx_s cq_mask;
+		struct nix_rsse_s   rss_mask;
+		struct nix_rx_mce_s mce_mask;
+	};
+};
+
+struct nix_aq_enq_rsp {
+	struct mbox_msghdr hdr;
+	union {
+		struct nix_rq_ctx_s rq;
+		struct nix_sq_ctx_s sq;
+		struct nix_cq_ctx_s cq;
+		struct nix_rsse_s   rss;
+		struct nix_rx_mce_s mce;
+	};
+};
+
+/* Tx scheduler/shaper mailbox messages */
+
+#define MAX_TXSCHQ_PER_FUNC		128
+
+struct nix_txsch_alloc_req {
+	struct mbox_msghdr hdr;
+	/* Scheduler queue count request at each level */
+	u16 schq_contig[NIX_TXSCH_LVL_CNT]; /* No of contiguous queues */
+	u16 schq[NIX_TXSCH_LVL_CNT]; /* No of non-contiguous queues */
+};
+
+struct nix_txsch_alloc_rsp {
+	struct mbox_msghdr hdr;
+	/* Scheduler queue count allocated at each level */
+	u16 schq_contig[NIX_TXSCH_LVL_CNT];
+	u16 schq[NIX_TXSCH_LVL_CNT];
+	/* Scheduler queue list allocated at each level */
+	u16 schq_contig_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+	u16 schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+};
+
+struct nix_txsch_free_req {
+	struct mbox_msghdr hdr;
+#define TXSCHQ_FREE_ALL BIT_ULL(0)
+	u16 flags;
+	/* Scheduler queue level to be freed */
+	u16 schq_lvl;
+	/* List of scheduler queues to be freed */
+	u16 schq;
+};
+
+struct nix_txschq_config {
+	struct mbox_msghdr hdr;
+	u8 lvl;	/* SMQ/MDQ/TL4/TL3/TL2/TL1 */
+#define TXSCHQ_IDX_SHIFT	16
+#define TXSCHQ_IDX_MASK		(BIT_ULL(10) - 1)
+#define TXSCHQ_IDX(reg, shift)	(((reg) >> (shift)) & TXSCHQ_IDX_MASK)
+	u8 num_regs;
+#define MAX_REGS_PER_MBOX_MSG	20
+	u64 reg[MAX_REGS_PER_MBOX_MSG];
+	u64 regval[MAX_REGS_PER_MBOX_MSG];
+};
+
+struct nix_vtag_config {
+	struct mbox_msghdr hdr;
+	/* '0' for 4 octet VTAG, '1' for 8 octet VTAG */
+	u8 vtag_size;
+	/* cfg_type is '0' for tx vlan cfg
+	 * cfg_type is '1' for rx vlan cfg
+	 */
+	u8 cfg_type;
+	union {
+		/* valid when cfg_type is '0' */
+		struct {
+			/* tx vlan0 tag(C-VLAN) */
+			u64 vlan0;
+			/* tx vlan1 tag(S-VLAN) */
+			u64 vlan1;
+			/* insert tx vlan tag */
+			u8 insert_vlan :1;
+			/* insert tx double vlan tag */
+			u8 double_vlan :1;
+		} tx;
+
+		/* valid when cfg_type is '1' */
+		struct {
+			/* rx vtag type index, valid values are in 0..7 range */
+			u8 vtag_type;
+			/* rx vtag strip */
+			u8 strip_vtag :1;
+			/* rx vtag capture */
+			u8 capture_vtag :1;
+		} rx;
+	};
+};
+
+struct nix_rss_flowkey_cfg {
+	struct mbox_msghdr hdr;
+	int	mcam_index;  /* MCAM entry index to modify */
+#define NIX_FLOW_KEY_TYPE_PORT	BIT(0)
+#define NIX_FLOW_KEY_TYPE_IPV4	BIT(1)
+#define NIX_FLOW_KEY_TYPE_IPV6	BIT(2)
+#define NIX_FLOW_KEY_TYPE_TCP	BIT(3)
+#define NIX_FLOW_KEY_TYPE_UDP	BIT(4)
+#define NIX_FLOW_KEY_TYPE_SCTP	BIT(5)
+	u32	flowkey_cfg; /* Flowkey types selected */
+	u8	group;       /* RSS context or group */
+};
+
+struct nix_rss_flowkey_cfg_rsp {
+	struct mbox_msghdr hdr;
+	u8	alg_idx; /* Selected algo index */
+};
+
+struct nix_set_mac_addr {
+	struct mbox_msghdr hdr;
+	u8 mac_addr[ETH_ALEN]; /* MAC address to be set for this pcifunc */
+};
+
+struct nix_mark_format_cfg {
+	struct mbox_msghdr hdr;
+	u8 offset;
+	u8 y_mask;
+	u8 y_val;
+	u8 r_mask;
+	u8 r_val;
+};
+
+struct nix_mark_format_cfg_rsp {
+	struct mbox_msghdr hdr;
+	u8 mark_format_idx;
+};
+
+struct nix_rx_mode {
+	struct mbox_msghdr hdr;
+#define NIX_RX_MODE_UCAST	BIT(0)
+#define NIX_RX_MODE_PROMISC	BIT(1)
+#define NIX_RX_MODE_ALLMULTI	BIT(2)
+	u16	mode;
+};
+
+struct nix_rx_cfg {
+	struct mbox_msghdr hdr;
+#define NIX_RX_OL3_VERIFY   BIT(0)
+#define NIX_RX_OL4_VERIFY   BIT(1)
+	u8 len_verify; /* Outer L3/L4 len check */
+#define NIX_RX_CSUM_OL4_VERIFY  BIT(0)
+	u8 csum_verify; /* Outer L4 checksum verification */
+};
+
+struct nix_frs_cfg {
+	struct mbox_msghdr hdr;
+	u8	update_smq;    /* Update SMQ's min/max lens */
+	u8	update_minlen; /* Set minlen also */
+	u8	sdp_link;      /* Set SDP RX link */
+	u16	maxlen;
+	u16	minlen;
+};
+
+struct nix_lso_format_cfg {
+	struct mbox_msghdr hdr;
+	u64 field_mask;
+#define NIX_LSO_FIELD_MAX	8
+	u64 fields[NIX_LSO_FIELD_MAX];
+};
+
+struct nix_lso_format_cfg_rsp {
+	struct mbox_msghdr hdr;
+	u8 lso_format_idx;
+};
+
+/* NPC mbox message structs */
+
+#define NPC_MCAM_ENTRY_INVALID	0xFFFF
+#define NPC_MCAM_INVALID_MAP	0xFFFF
+
+/* NPC mailbox error codes
+ * Range 701 - 800.
+ */
+enum npc_af_status {
+	NPC_MCAM_INVALID_REQ	= -701,
+	NPC_MCAM_ALLOC_DENIED	= -702,
+	NPC_MCAM_ALLOC_FAILED	= -703,
+	NPC_MCAM_PERM_DENIED	= -704,
+};
+
+struct npc_mcam_alloc_entry_req {
+	struct mbox_msghdr hdr;
+#define NPC_MAX_NONCONTIG_ENTRIES	256
+	u8  contig;   /* Contiguous entries ? */
+#define NPC_MCAM_ANY_PRIO		0
+#define NPC_MCAM_LOWER_PRIO		1
+#define NPC_MCAM_HIGHER_PRIO		2
+	u8  priority; /* Lower or higher w.r.t ref_entry */
+	u16 ref_entry;
+	u16 count;    /* Number of entries requested */
+};
+
+struct npc_mcam_alloc_entry_rsp {
+	struct mbox_msghdr hdr;
+	u16 entry; /* Entry allocated or start index if contiguous.
+		    * Invalid incase of non-contiguous.
+		    */
+	u16 count; /* Number of entries allocated */
+	u16 free_count; /* Number of entries available */
+	u16 entry_list[NPC_MAX_NONCONTIG_ENTRIES];
+};
+
+struct npc_mcam_free_entry_req {
+	struct mbox_msghdr hdr;
+	u16 entry; /* Entry index to be freed */
+	u8  all;   /* If all entries allocated to this PFVF to be freed */
+};
+
+struct mcam_entry {
+#define NPC_MAX_KWS_IN_KEY	7 /* Number of keywords in max keywidth */
+	u64	kw[NPC_MAX_KWS_IN_KEY];
+	u64	kw_mask[NPC_MAX_KWS_IN_KEY];
+	u64	action;
+	u64	vtag_action;
+};
+
+struct npc_mcam_write_entry_req {
+	struct mbox_msghdr hdr;
+	struct mcam_entry entry_data;
+	u16 entry;	 /* MCAM entry to write this match key */
+	u16 cntr;	 /* Counter for this MCAM entry */
+	u8  intf;	 /* Rx or Tx interface */
+	u8  enable_entry;/* Enable this MCAM entry ? */
+	u8  set_cntr;    /* Set counter for this entry ? */
+};
+
+/* Enable/Disable a given entry */
+struct npc_mcam_ena_dis_entry_req {
+	struct mbox_msghdr hdr;
+	u16 entry;
+};
+
+struct npc_mcam_shift_entry_req {
+	struct mbox_msghdr hdr;
+#define NPC_MCAM_MAX_SHIFTS	64
+	u16 curr_entry[NPC_MCAM_MAX_SHIFTS];
+	u16 new_entry[NPC_MCAM_MAX_SHIFTS];
+	u16 shift_count; /* Number of entries to shift */
+};
+
+struct npc_mcam_shift_entry_rsp {
+	struct mbox_msghdr hdr;
+	u16 failed_entry_idx; /* Index in 'curr_entry', not entry itself */
+};
+
+struct npc_mcam_alloc_counter_req {
+	struct mbox_msghdr hdr;
+	u8  contig;	/* Contiguous counters ? */
+#define NPC_MAX_NONCONTIG_COUNTERS       64
+	u16 count;	/* Number of counters requested */
+};
+
+struct npc_mcam_alloc_counter_rsp {
+	struct mbox_msghdr hdr;
+	u16 cntr;   /* Counter allocated or start index if contiguous.
+		     * Invalid incase of non-contiguous.
+		     */
+	u16 count;  /* Number of counters allocated */
+	u16 cntr_list[NPC_MAX_NONCONTIG_COUNTERS];
+};
+
+struct npc_mcam_oper_counter_req {
+	struct mbox_msghdr hdr;
+	u16 cntr;   /* Free a counter or clear/fetch it's stats */
+};
+
+struct npc_mcam_oper_counter_rsp {
+	struct mbox_msghdr hdr;
+	u64 stat;  /* valid only while fetching counter's stats */
+};
+
+struct npc_mcam_unmap_counter_req {
+	struct mbox_msghdr hdr;
+	u16 cntr;
+	u16 entry; /* Entry and counter to be unmapped */
+	u8  all;   /* Unmap all entries using this counter ? */
+};
+
+struct npc_mcam_alloc_and_write_entry_req {
+	struct mbox_msghdr hdr;
+	struct mcam_entry entry_data;
+	u16 ref_entry;
+	u8  priority;    /* Lower or higher w.r.t ref_entry */
+	u8  intf;	 /* Rx or Tx interface */
+	u8  enable_entry;/* Enable this MCAM entry ? */
+	u8  alloc_cntr;  /* Allocate counter and map ? */
+};
+
+struct npc_mcam_alloc_and_write_entry_rsp {
+	struct mbox_msghdr hdr;
+	u16 entry;
+	u16 cntr;
+};
+
+struct npc_get_kex_cfg_rsp {
+	struct mbox_msghdr hdr;
+	u64 rx_keyx_cfg;   /* NPC_AF_INTF(0)_KEX_CFG */
+	u64 tx_keyx_cfg;   /* NPC_AF_INTF(1)_KEX_CFG */
+#define NPC_MAX_INTF	2
+#define NPC_MAX_LID	8
+#define NPC_MAX_LT	16
+#define NPC_MAX_LD	2
+#define NPC_MAX_LFL	16
+	/* NPC_AF_KEX_LDATA(0..1)_FLAGS_CFG */
+	u64 kex_ld_flags[NPC_MAX_LD];
+	/* NPC_AF_INTF(0..1)_LID(0..7)_LT(0..15)_LD(0..1)_CFG */
+	u64 intf_lid_lt_ld[NPC_MAX_INTF][NPC_MAX_LID][NPC_MAX_LT][NPC_MAX_LD];
+	/* NPC_AF_INTF(0..1)_LDATA(0..1)_FLAGS(0..15)_CFG */
+	u64 intf_ld_flags[NPC_MAX_INTF][NPC_MAX_LD][NPC_MAX_LFL];
+#define MKEX_NAME_LEN 128
+	u8 mkex_pfl_name[MKEX_NAME_LEN];
+};
+
+#endif /* MBOX_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
new file mode 100644
index 0000000..5d4df31
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h

@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef NPC_H
+#define NPC_H
+
+enum NPC_LID_E {
+	NPC_LID_LA = 0,
+	NPC_LID_LB,
+	NPC_LID_LC,
+	NPC_LID_LD,
+	NPC_LID_LE,
+	NPC_LID_LF,
+	NPC_LID_LG,
+	NPC_LID_LH,
+};
+
+#define NPC_LT_NA 0
+
+enum npc_kpu_la_ltype {
+	NPC_LT_LA_8023 = 1,
+	NPC_LT_LA_ETHER,
+};
+
+enum npc_kpu_lb_ltype {
+	NPC_LT_LB_ETAG = 1,
+	NPC_LT_LB_CTAG,
+	NPC_LT_LB_STAG,
+	NPC_LT_LB_BTAG,
+	NPC_LT_LB_QINQ,
+	NPC_LT_LB_ITAG,
+};
+
+enum npc_kpu_lc_ltype {
+	NPC_LT_LC_IP = 1,
+	NPC_LT_LC_IP6,
+	NPC_LT_LC_ARP,
+	NPC_LT_LC_RARP,
+	NPC_LT_LC_MPLS,
+	NPC_LT_LC_NSH,
+	NPC_LT_LC_PTP,
+	NPC_LT_LC_FCOE,
+};
+
+/* Don't modify Ltypes upto SCTP, otherwise it will
+ * effect flow tag calculation and thus RSS.
+ */
+enum npc_kpu_ld_ltype {
+	NPC_LT_LD_TCP = 1,
+	NPC_LT_LD_UDP,
+	NPC_LT_LD_ICMP,
+	NPC_LT_LD_SCTP,
+	NPC_LT_LD_IGMP,
+	NPC_LT_LD_ICMP6,
+	NPC_LT_LD_ESP,
+	NPC_LT_LD_AH,
+	NPC_LT_LD_GRE,
+	NPC_LT_LD_GRE_MPLS,
+	NPC_LT_LD_GRE_NSH,
+	NPC_LT_LD_TU_MPLS,
+};
+
+enum npc_kpu_le_ltype {
+	NPC_LT_LE_TU_ETHER = 1,
+	NPC_LT_LE_TU_PPP,
+	NPC_LT_LE_TU_MPLS_IN_NSH,
+	NPC_LT_LE_TU_3RD_NSH,
+};
+
+enum npc_kpu_lf_ltype {
+	NPC_LT_LF_TU_IP = 1,
+	NPC_LT_LF_TU_IP6,
+	NPC_LT_LF_TU_ARP,
+	NPC_LT_LF_TU_MPLS_IP,
+	NPC_LT_LF_TU_MPLS_IP6,
+	NPC_LT_LF_TU_MPLS_ETHER,
+};
+
+enum npc_kpu_lg_ltype {
+	NPC_LT_LG_TU_TCP = 1,
+	NPC_LT_LG_TU_UDP,
+	NPC_LT_LG_TU_SCTP,
+	NPC_LT_LG_TU_ICMP,
+	NPC_LT_LG_TU_IGMP,
+	NPC_LT_LG_TU_ICMP6,
+	NPC_LT_LG_TU_ESP,
+	NPC_LT_LG_TU_AH,
+};
+
+enum npc_kpu_lh_ltype {
+	NPC_LT_LH_TCP_DATA = 1,
+	NPC_LT_LH_HTTP_DATA,
+	NPC_LT_LH_HTTPS_DATA,
+	NPC_LT_LH_PPTP_DATA,
+	NPC_LT_LH_UDP_DATA,
+};
+
+struct npc_kpu_profile_cam {
+	u8 state;
+	u8 state_mask;
+	u16 dp0;
+	u16 dp0_mask;
+	u16 dp1;
+	u16 dp1_mask;
+	u16 dp2;
+	u16 dp2_mask;
+};
+
+struct npc_kpu_profile_action {
+	u8 errlev;
+	u8 errcode;
+	u8 dp0_offset;
+	u8 dp1_offset;
+	u8 dp2_offset;
+	u8 bypass_count;
+	u8 parse_done;
+	u8 next_state;
+	u8 ptr_advance;
+	u8 cap_ena;
+	u8 lid;
+	u8 ltype;
+	u8 flags;
+	u8 offset;
+	u8 mask;
+	u8 right;
+	u8 shift;
+};
+
+struct npc_kpu_profile {
+	int cam_entries;
+	int action_entries;
+	struct npc_kpu_profile_cam *cam;
+	struct npc_kpu_profile_action *action;
+};
+
+/* NPC KPU register formats */
+struct npc_kpu_cam {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 rsvd_63_56     : 8;
+	u64 state          : 8;
+	u64 dp2_data       : 16;
+	u64 dp1_data       : 16;
+	u64 dp0_data       : 16;
+#else
+	u64 dp0_data       : 16;
+	u64 dp1_data       : 16;
+	u64 dp2_data       : 16;
+	u64 state          : 8;
+	u64 rsvd_63_56     : 8;
+#endif
+};
+
+struct npc_kpu_action0 {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 rsvd_63_57     : 7;
+	u64 byp_count      : 3;
+	u64 capture_ena    : 1;
+	u64 parse_done     : 1;
+	u64 next_state     : 8;
+	u64 rsvd_43        : 1;
+	u64 capture_lid    : 3;
+	u64 capture_ltype  : 4;
+	u64 capture_flags  : 8;
+	u64 ptr_advance    : 8;
+	u64 var_len_offset : 8;
+	u64 var_len_mask   : 8;
+	u64 var_len_right  : 1;
+	u64 var_len_shift  : 3;
+#else
+	u64 var_len_shift  : 3;
+	u64 var_len_right  : 1;
+	u64 var_len_mask   : 8;
+	u64 var_len_offset : 8;
+	u64 ptr_advance    : 8;
+	u64 capture_flags  : 8;
+	u64 capture_ltype  : 4;
+	u64 capture_lid    : 3;
+	u64 rsvd_43        : 1;
+	u64 next_state     : 8;
+	u64 parse_done     : 1;
+	u64 capture_ena    : 1;
+	u64 byp_count      : 3;
+	u64 rsvd_63_57     : 7;
+#endif
+};
+
+struct npc_kpu_action1 {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 rsvd_63_36     : 28;
+	u64 errlev         : 4;
+	u64 errcode        : 8;
+	u64 dp2_offset     : 8;
+	u64 dp1_offset     : 8;
+	u64 dp0_offset     : 8;
+#else
+	u64 dp0_offset     : 8;
+	u64 dp1_offset     : 8;
+	u64 dp2_offset     : 8;
+	u64 errcode        : 8;
+	u64 errlev         : 4;
+	u64 rsvd_63_36     : 28;
+#endif
+};
+
+struct npc_kpu_pkind_cpi_def {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 ena            : 1;
+	u64 rsvd_62_59     : 4;
+	u64 lid            : 3;
+	u64 ltype_match    : 4;
+	u64 ltype_mask     : 4;
+	u64 flags_match    : 8;
+	u64 flags_mask     : 8;
+	u64 add_offset     : 8;
+	u64 add_mask       : 8;
+	u64 rsvd_15        : 1;
+	u64 add_shift      : 3;
+	u64 rsvd_11_10     : 2;
+	u64 cpi_base       : 10;
+#else
+	u64 cpi_base       : 10;
+	u64 rsvd_11_10     : 2;
+	u64 add_shift      : 3;
+	u64 rsvd_15        : 1;
+	u64 add_mask       : 8;
+	u64 add_offset     : 8;
+	u64 flags_mask     : 8;
+	u64 flags_match    : 8;
+	u64 ltype_mask     : 4;
+	u64 ltype_match    : 4;
+	u64 lid            : 3;
+	u64 rsvd_62_59     : 4;
+	u64 ena            : 1;
+#endif
+};
+
+struct nix_rx_action {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64	rsvd_63_61	:3;
+	u64	flow_key_alg	:5;
+	u64	match_id	:16;
+	u64	index		:20;
+	u64	pf_func		:16;
+	u64	op		:4;
+#else
+	u64	op		:4;
+	u64	pf_func		:16;
+	u64	index		:20;
+	u64	match_id	:16;
+	u64	flow_key_alg	:5;
+	u64	rsvd_63_61	:3;
+#endif
+};
+
+/* NIX Receive Vtag Action Structure */
+#define VTAG0_VALID_BIT		BIT_ULL(15)
+#define VTAG0_TYPE_MASK		GENMASK_ULL(14, 12)
+#define VTAG0_LID_MASK		GENMASK_ULL(10, 8)
+#define VTAG0_RELPTR_MASK	GENMASK_ULL(7, 0)
+
+struct npc_mcam_kex {
+	/* MKEX Profle Header */
+	u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */
+	u8 name[MKEX_NAME_LEN];   /* MKEX Profile name */
+	u64 cpu_model;   /* Format as profiled by CPU hardware */
+	u64 kpu_version; /* KPU firmware/profile version */
+	u64 reserved; /* Reserved for extension */
+
+	/* MKEX Profle Data */
+	u64 keyx_cfg[NPC_MAX_INTF]; /* NPC_AF_INTF(0..1)_KEX_CFG */
+	/* NPC_AF_KEX_LDATA(0..1)_FLAGS_CFG */
+	u64 kex_ld_flags[NPC_MAX_LD];
+	/* NPC_AF_INTF(0..1)_LID(0..7)_LT(0..15)_LD(0..1)_CFG */
+	u64 intf_lid_lt_ld[NPC_MAX_INTF][NPC_MAX_LID][NPC_MAX_LT][NPC_MAX_LD];
+	/* NPC_AF_INTF(0..1)_LDATA(0..1)_FLAGS(0..15)_CFG */
+	u64 intf_ld_flags[NPC_MAX_INTF][NPC_MAX_LD][NPC_MAX_LFL];
+} __packed;
+
+#endif /* NPC_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
new file mode 100644
index 0000000..da649f6
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h

@@ -0,0 +1,5709 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef NPC_PROFILE_H
+#define NPC_PROFILE_H
+
+#define NPC_ETYPE_IP		0x0800
+#define NPC_ETYPE_IP6		0x86dd
+#define NPC_ETYPE_ARP		0x0806
+#define NPC_ETYPE_RARP		0x8035
+#define NPC_ETYPE_MPLSU		0x8847
+#define NPC_ETYPE_MPLSM		0x8848
+#define NPC_ETYPE_ETAG		0x893f
+#define NPC_ETYPE_CTAG		0x8100
+#define NPC_ETYPE_SBTAG		0x88a8
+#define NPC_ETYPE_ITAG		0x88e7
+#define NPC_ETYPE_PTP		0x88f7
+#define NPC_ETYPE_FCOE		0x8906
+#define NPC_ETYPE_QINQ		0x9100
+#define NPC_ETYPE_TRANS_ETH_BR	0x6558
+#define NPC_ETYPE_PPP		0x880b
+#define NPC_ETYPE_NSH		0x894f
+
+#define NPC_IPNH_HOP		0
+#define NPC_IPNH_ICMP		1
+#define NPC_IPNH_IGMP		2
+#define NPC_IPNH_IP		4
+#define NPC_IPNH_TCP		6
+#define NPC_IPNH_UDP		17
+#define NPC_IPNH_IP6		41
+#define NPC_IPNH_ROUT		43
+#define NPC_IPNH_FRAG		44
+#define NPC_IPNH_GRE		47
+#define NPC_IPNH_ESP		50
+#define NPC_IPNH_AH		51
+#define NPC_IPNH_ICMP6		58
+#define NPC_IPNH_NONH		59
+#define NPC_IPNH_DEST		60
+#define NPC_IPNH_SCTP		132
+#define NPC_IPNH_MPLS		137
+
+#define NPC_UDP_PORT_GTPC	2123
+#define NPC_UDP_PORT_GTPU	2152
+#define NPC_UDP_PORT_VXLAN	4789
+#define NPC_UDP_PORT_VXLANGPE	4790
+#define NPC_UDP_PORT_GENEVE	6081
+
+#define NPC_VXLANGPE_NP_IP	0x1
+#define NPC_VXLANGPE_NP_IP6	0x2
+#define NPC_VXLANGPE_NP_ETH	0x3
+#define NPC_VXLANGPE_NP_NSH	0x4
+#define NPC_VXLANGPE_NP_MPLS	0x5
+#define NPC_VXLANGPE_NP_GBP	0x6
+#define NPC_VXLANGPE_NP_VBNG	0x7
+
+#define NPC_NSH_NP_IP		0x1
+#define NPC_NSH_NP_IP6		0x2
+#define NPC_NSH_NP_ETH		0x3
+#define NPC_NSH_NP_NSH		0x4
+#define NPC_NSH_NP_MPLS		0x5
+
+#define NPC_TCP_PORT_HTTP	80
+#define NPC_TCP_PORT_HTTPS	443
+#define NPC_TCP_PORT_PPTP	1723
+
+#define NPC_MPLS_S		0x0100
+
+#define NPC_IP_VER_4		0x4000
+#define NPC_IP_VER_6		0x6000
+#define NPC_IP_VER_MASK		0xf000
+#define NPC_IP_HDR_LEN_5	0x0500
+#define NPC_IP_HDR_LEN_MASK	0x0f00
+
+#define NPC_GRE_F_CSUM		(0x1 << 15)
+#define NPC_GRE_F_ROUTE		(0x1 << 14)
+#define NPC_GRE_F_KEY		(0x1 << 13)
+#define NPC_GRE_F_SEQ		(0x1 << 12)
+#define NPC_GRE_F_ACK		(0x1 << 7)
+#define NPC_GRE_FLAG_MASK	(NPC_GRE_F_CSUM | NPC_GRE_F_ROUTE | \
+				 NPC_GRE_F_KEY | NPC_GRE_F_SEQ | NPC_GRE_F_ACK)
+#define NPC_GRE_VER_MASK	0x0003
+#define NPC_GRE_VER_1		0x0001
+
+#define NPC_VXLAN_I		0x0800
+
+#define NPC_VXLANGPE_VER	(0x3 << 12)
+#define NPC_VXLANGPE_I		(0x1 << 11)
+#define NPC_VXLANGPE_P		(0x1 << 10)
+#define NPC_VXLANGPE_B		(0x1 << 9)
+#define NPC_VXLANGPE_NP_MASK	0x00ff
+
+#define NPC_NSH_NP_MASK		0x00ff
+
+#define NPC_GENEVE_F_OAM	(0x1 << 7)
+#define NPC_GENEVE_F_CRI_OPT	(0x1 << 6)
+
+#define NPC_GTP_PT_GTP		(0x1 << 12)
+#define NPC_GTP_PT_MASK		(0x1 << 12)
+#define NPC_GTP_VER1		(0x1 << 13)
+#define NPC_GTP_VER_MASK	(0x7 << 13)
+#define NPC_GTP_MT_G_PDU	0xff
+#define NPC_GTP_MT_MASK		0xff
+
+#define NPC_TCP_DATA_OFFSET_5		0x5000
+#define NPC_TCP_DATA_OFFSET_MASK	0xf000
+
+enum npc_kpu_parser_state {
+	NPC_S_NA = 0,
+	NPC_S_KPU1_ETHER,
+	NPC_S_KPU1_PKI,
+	NPC_S_KPU2_CTAG,
+	NPC_S_KPU2_SBTAG,
+	NPC_S_KPU2_QINQ,
+	NPC_S_KPU2_ETAG,
+	NPC_S_KPU2_ITAG,
+	NPC_S_KPU3_CTAG,
+	NPC_S_KPU3_STAG,
+	NPC_S_KPU3_QINQ,
+	NPC_S_KPU3_ITAG,
+	NPC_S_KPU4_MPLS,
+	NPC_S_KPU4_NSH,
+	NPC_S_KPU5_IP,
+	NPC_S_KPU5_IP6,
+	NPC_S_KPU5_ARP,
+	NPC_S_KPU5_RARP,
+	NPC_S_KPU5_PTP,
+	NPC_S_KPU5_FCOE,
+	NPC_S_KPU5_MPLS,
+	NPC_S_KPU5_MPLS_PL,
+	NPC_S_KPU5_NSH,
+	NPC_S_KPU6_IP6_EXT,
+	NPC_S_KPU7_IP6_EXT,
+	NPC_S_KPU8_TCP,
+	NPC_S_KPU8_UDP,
+	NPC_S_KPU8_SCTP,
+	NPC_S_KPU8_ICMP,
+	NPC_S_KPU8_IGMP,
+	NPC_S_KPU8_ICMP6,
+	NPC_S_KPU8_GRE,
+	NPC_S_KPU8_ESP,
+	NPC_S_KPU8_AH,
+	NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN,
+	NPC_S_KPU9_TU_MPLS,
+	NPC_S_KPU9_TU_NSH,
+	NPC_S_KPU10_TU_MPLS_PL,
+	NPC_S_KPU10_TU_MPLS,
+	NPC_S_KPU10_TU_NSH,
+	NPC_S_KPU11_TU_ETHER,
+	NPC_S_KPU11_TU_PPP,
+	NPC_S_KPU11_TU_MPLS_IN_NSH,
+	NPC_S_KPU11_TU_3RD_NSH,
+	NPC_S_KPU12_TU_IP,
+	NPC_S_KPU12_TU_IP6,
+	NPC_S_KPU12_TU_ARP,
+	NPC_S_KPU13_TU_IP6_EXT,
+	NPC_S_KPU14_TU_IP6_EXT,
+	NPC_S_KPU15_TU_TCP,
+	NPC_S_KPU15_TU_UDP,
+	NPC_S_KPU15_TU_SCTP,
+	NPC_S_KPU15_TU_ICMP,
+	NPC_S_KPU15_TU_IGMP,
+	NPC_S_KPU15_TU_ICMP6,
+	NPC_S_KPU15_TU_ESP,
+	NPC_S_KPU15_TU_AH,
+	NPC_S_KPU16_HTTP_DATA,
+	NPC_S_KPU16_HTTPS_DATA,
+	NPC_S_KPU16_PPTP_DATA,
+	NPC_S_KPU16_TCP_DATA,
+	NPC_S_KPU16_UDP_DATA,
+	NPC_S_LAST /* has to be the last item */
+};
+
+enum npc_kpu_parser_flag {
+	NPC_F_NA = 0,
+	NPC_F_PKI,
+	NPC_F_PKI_VLAN,
+	NPC_F_PKI_ETAG,
+	NPC_F_PKI_ITAG,
+	NPC_F_PKI_MPLS,
+	NPC_F_PKI_NSH,
+	NPC_F_ETYPE_UNK,
+	NPC_F_ETHER_VLAN,
+	NPC_F_ETHER_ETAG,
+	NPC_F_ETHER_ITAG,
+	NPC_F_ETHER_MPLS,
+	NPC_F_ETHER_NSH,
+	NPC_F_STAG_CTAG,
+	NPC_F_STAG_CTAG_UNK,
+	NPC_F_STAG_STAG_CTAG,
+	NPC_F_STAG_STAG_STAG,
+	NPC_F_QINQ_CTAG,
+	NPC_F_QINQ_CTAG_UNK,
+	NPC_F_QINQ_QINQ_CTAG,
+	NPC_F_QINQ_QINQ_QINQ,
+	NPC_F_BTAG_ITAG,
+	NPC_F_BTAG_ITAG_STAG,
+	NPC_F_BTAG_ITAG_CTAG,
+	NPC_F_BTAG_ITAG_UNK,
+	NPC_F_ETAG_CTAG,
+	NPC_F_ETAG_BTAG_ITAG,
+	NPC_F_ETAG_STAG,
+	NPC_F_ETAG_QINQ,
+	NPC_F_ETAG_ITAG,
+	NPC_F_ETAG_ITAG_STAG,
+	NPC_F_ETAG_ITAG_CTAG,
+	NPC_F_ETAG_ITAG_UNK,
+	NPC_F_ITAG_STAG_CTAG,
+	NPC_F_ITAG_STAG,
+	NPC_F_ITAG_CTAG,
+	NPC_F_MPLS_4_LABELS,
+	NPC_F_MPLS_3_LABELS,
+	NPC_F_MPLS_2_LABELS,
+	NPC_F_IP_HAS_OPTIONS,
+	NPC_F_IP_IP_IN_IP,
+	NPC_F_IP_6TO4,
+	NPC_F_IP_MPLS_IN_IP,
+	NPC_F_IP_UNK_PROTO,
+	NPC_F_IP_IP_IN_IP_HAS_OPTIONS,
+	NPC_F_IP_6TO4_HAS_OPTIONS,
+	NPC_F_IP_MPLS_IN_IP_HAS_OPTIONS,
+	NPC_F_IP_UNK_PROTO_HAS_OPTIONS,
+	NPC_F_IP6_HAS_EXT,
+	NPC_F_IP6_TUN_IP6,
+	NPC_F_IP6_MPLS_IN_IP,
+	NPC_F_TCP_HAS_OPTIONS,
+	NPC_F_TCP_HTTP,
+	NPC_F_TCP_HTTPS,
+	NPC_F_TCP_PPTP,
+	NPC_F_TCP_UNK_PORT,
+	NPC_F_TCP_HTTP_HAS_OPTIONS,
+	NPC_F_TCP_HTTPS_HAS_OPTIONS,
+	NPC_F_TCP_PPTP_HAS_OPTIONS,
+	NPC_F_TCP_UNK_PORT_HAS_OPTIONS,
+	NPC_F_UDP_VXLAN,
+	NPC_F_UDP_VXLAN_NOVNI,
+	NPC_F_UDP_VXLAN_NOVNI_NSH,
+	NPC_F_UDP_VXLANGPE,
+	NPC_F_UDP_VXLANGPE_NSH,
+	NPC_F_UDP_VXLANGPE_MPLS,
+	NPC_F_UDP_VXLANGPE_NOVNI,
+	NPC_F_UDP_VXLANGPE_NOVNI_NSH,
+	NPC_F_UDP_VXLANGPE_NOVNI_MPLS,
+	NPC_F_UDP_VXLANGPE_UNK,
+	NPC_F_UDP_VXLANGPE_NONP,
+	NPC_F_UDP_GTP_GTPC,
+	NPC_F_UDP_GTP_GTPU_G_PDU,
+	NPC_F_UDP_GTP_GTPU_UNK,
+	NPC_F_UDP_UNK_PORT,
+	NPC_F_UDP_GENEVE,
+	NPC_F_UDP_GENEVE_OAM,
+	NPC_F_UDP_GENEVE_CRI_OPT,
+	NPC_F_UDP_GENEVE_OAM_CRI_OPT,
+	NPC_F_GRE_NVGRE,
+	NPC_F_GRE_HAS_SRE,
+	NPC_F_GRE_HAS_CSUM,
+	NPC_F_GRE_HAS_KEY,
+	NPC_F_GRE_HAS_SEQ,
+	NPC_F_GRE_HAS_CSUM_KEY,
+	NPC_F_GRE_HAS_CSUM_SEQ,
+	NPC_F_GRE_HAS_KEY_SEQ,
+	NPC_F_GRE_HAS_CSUM_KEY_SEQ,
+	NPC_F_GRE_HAS_ROUTE,
+	NPC_F_GRE_UNK_PROTO,
+	NPC_F_GRE_VER1,
+	NPC_F_GRE_VER1_HAS_SEQ,
+	NPC_F_GRE_VER1_HAS_ACK,
+	NPC_F_GRE_VER1_HAS_SEQ_ACK,
+	NPC_F_GRE_VER1_UNK_PROTO,
+	NPC_F_TU_ETHER_UNK,
+	NPC_F_TU_ETHER_CTAG,
+	NPC_F_TU_ETHER_CTAG_UNK,
+	NPC_F_TU_ETHER_STAG_CTAG,
+	NPC_F_TU_ETHER_STAG_CTAG_UNK,
+	NPC_F_TU_ETHER_STAG,
+	NPC_F_TU_ETHER_STAG_UNK,
+	NPC_F_TU_ETHER_QINQ_CTAG,
+	NPC_F_TU_ETHER_QINQ_CTAG_UNK,
+	NPC_F_TU_ETHER_QINQ,
+	NPC_F_TU_ETHER_QINQ_UNK,
+	NPC_F_LAST /* has to be the last item */
+};
+
+enum npc_kpu_err_code {
+	NPC_EC_NOERR = 0, /* has to be zero */
+	NPC_EC_UNK,
+	NPC_EC_L2_K1,
+	NPC_EC_L2_K2,
+	NPC_EC_L2_K3,
+	NPC_EC_L2_K3_ETYPE_UNK,
+	NPC_EC_L2_MPLS_2MANY,
+	NPC_EC_L2_K4,
+	NPC_EC_IP_VER,
+	NPC_EC_IP6_VER,
+	NPC_EC_VXLAN,
+	NPC_EC_NVGRE,
+	NPC_EC_GRE,
+	NPC_EC_GRE_VER1,
+	NPC_EC_L4,
+	NPC_EC_LAST /* has to be the last item */
+};
+
+enum NPC_ERRLEV_E {
+	NPC_ERRLEV_RE = 0,
+	NPC_ERRLEV_LA = 1,
+	NPC_ERRLEV_LB = 2,
+	NPC_ERRLEV_LC = 3,
+	NPC_ERRLEV_LD = 4,
+	NPC_ERRLEV_LE = 5,
+	NPC_ERRLEV_LF = 6,
+	NPC_ERRLEV_LG = 7,
+	NPC_ERRLEV_LH = 8,
+	NPC_ERRLEV_R9 = 9,
+	NPC_ERRLEV_R10 = 10,
+	NPC_ERRLEV_R11 = 11,
+	NPC_ERRLEV_R12 = 12,
+	NPC_ERRLEV_R13 = 13,
+	NPC_ERRLEV_R14 = 14,
+	NPC_ERRLEV_NIX = 15,
+	NPC_ERRLEV_ENUM_LAST = 16,
+};
+
+static struct npc_kpu_profile_action ikpu_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 14, 16,
+		0, 0, NPC_S_KPU1_ETHER, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 1, 0xff,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_ETAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, 0x0000, 0xfc00,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, 0x0400, 0xfe00,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_ETHER, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_ETAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0010, 0x0010, 0x0000, 0xffff,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0010, 0x0010, 0x0000, 0xffff,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU1_PKI, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_RARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_PTP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_FCOE, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSU, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_NSH, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_SBTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_RARP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_PTP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_FCOE, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_MPLSU, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_MPLSM, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_NSH, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_SBTAG, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_CTAG, 0xffff,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_SBTAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_RARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_PTP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_FCOE, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSU, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_NSH, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_QINQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_QINQ, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_ITAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_SBTAG, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, NPC_ETYPE_CTAG, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, NPC_ETYPE_ITAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ETAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU2_ITAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_CTAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_RARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_PTP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_FCOE, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSU, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_NSH, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_STAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_RARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_PTP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_FCOE, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSU, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_MPLSM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_NSH, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_PTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_FCOE, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_QINQ, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_RARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU3_ITAG, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
+	{
+		NPC_S_KPU4_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		NPC_MPLS_S, NPC_MPLS_S, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, NPC_MPLS_S, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU4_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, 0x0000, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU4_NSH, 0xff, NPC_NSH_NP_IP, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_NSH, 0xff, NPC_NSH_NP_IP6, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_NSH, 0xff, NPC_NSH_NP_ETH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_NSH, 0xff, NPC_NSH_NP_NSH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU4_NSH, 0xff, NPC_NSH_NP_MPLS, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_TCP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_UDP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_SCTP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_ICMP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IGMP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_ESP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_AH, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_GRE, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IP6, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_MPLS, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_TCP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_UDP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_SCTP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_ICMP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IGMP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_ESP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_AH, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_GRE, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_IP6, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, NPC_IPNH_MPLS, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_ARP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_RARP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_PTP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_FCOE, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_TCP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_UDP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_SCTP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_ICMP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_ICMP6 << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_ESP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_AH << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_GRE << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_IP6 << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, NPC_IPNH_MPLS << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_IP6, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS_PL, 0xff, NPC_IP_VER_4, NPC_IP_VER_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS_PL, 0xff, NPC_IP_VER_6, NPC_IP_VER_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS_PL, 0xff, 0x0000, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_MPLS_PL, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_NSH, 0xff, NPC_NSH_NP_IP, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_NSH, 0xff, NPC_NSH_NP_IP6, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_NSH, 0xff, NPC_NSH_NP_ETH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_NSH, 0xff, NPC_NSH_NP_NSH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU5_NSH, 0xff, NPC_NSH_NP_MPLS, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
+	{
+		NPC_S_KPU6_IP6_EXT, 0xff, 0x0000, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
+	{
+		NPC_S_KPU7_IP6_EXT, 0xff, 0x0000, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_HTTP, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_HTTPS, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_PPTP, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, 0x0000, 0x0000,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_HTTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_HTTPS, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, NPC_TCP_PORT_PPTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_TCP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLAN, 0xffff,
+		NPC_VXLAN_I, NPC_VXLAN_I, 0x0000, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLAN, 0xffff,
+		0x0000, 0xffff, 0x0000, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLAN, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_IP, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_IP6, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_ETH, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_NSH, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_MPLS, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_IP, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_IP6, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_ETH, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_NSH, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P | NPC_VXLANGPE_I,
+		NPC_VXLANGPE_NP_MPLS, NPC_VXLANGPE_NP_MASK,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		NPC_VXLANGPE_P, NPC_VXLANGPE_P, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_VXLANGPE, 0xffff,
+		0x0000, NPC_VXLANGPE_P, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		0x0000, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_CRI_OPT, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		0x0000, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_CRI_OPT, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		0x0000, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM, NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_CRI_OPT,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GENEVE, 0xffff,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT,
+		NPC_GENEVE_F_OAM | NPC_GENEVE_F_CRI_OPT, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GTPC, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GTPU, 0xffff,
+		NPC_GTP_PT_GTP | NPC_GTP_VER1 | NPC_GTP_MT_G_PDU,
+		NPC_GTP_PT_MASK | NPC_GTP_VER_MASK | NPC_GTP_MT_MASK,
+		0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, NPC_UDP_PORT_GTPU, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_UDP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_SCTP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_ICMP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_IGMP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_ICMP6, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_ESP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_AH, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_TRANS_ETH_BR, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_CSUM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSU, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_CSUM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_MPLSM, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_CSUM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_NSH, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_CSUM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_CSUM, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_IP6, 0xffff,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, 0x0000, 0xffff,
+		NPC_GRE_F_ROUTE, 0x4fff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, 0x0000, 0xffff,
+		0x0000, 0x4fff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, 0x0000, 0xffff,
+		0x0000, 0x0003, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_PPP, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_VER_1, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_PPP, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ | NPC_GRE_VER_1,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_PPP, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_ACK | NPC_GRE_VER_1,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, NPC_ETYPE_PPP, 0xffff,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ | NPC_GRE_F_ACK | NPC_GRE_VER_1,
+		0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, 0x0000, 0xffff,
+		0x2001, 0xef7f, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU8_GRE, 0xff, 0x0000, 0xffff,
+		0x0001, 0x0003, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
+	{
+		NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 0xff, 0x0000, NPC_MPLS_S,
+		NPC_MPLS_S, NPC_MPLS_S, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, NPC_MPLS_S, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, 0x0000, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		NPC_MPLS_S, NPC_MPLS_S, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, NPC_MPLS_S, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU9_TU_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, NPC_MPLS_S, 0x0000, NPC_MPLS_S,
+	},
+	{
+		NPC_S_KPU9_TU_NSH, 0xff, NPC_NSH_NP_IP, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_NSH, 0xff, NPC_NSH_NP_IP6, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_NSH, 0xff, NPC_NSH_NP_ETH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_NSH, 0xff, NPC_NSH_NP_NSH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU9_TU_NSH, 0xff, NPC_NSH_NP_MPLS, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu10_cam_entries[] = {
+	{
+		NPC_S_KPU10_TU_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS, 0xff, NPC_MPLS_S, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS, 0xff, 0x0000, NPC_MPLS_S,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS_PL, 0xff, NPC_IP_VER_4, NPC_IP_VER_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS_PL, 0xff, NPC_IP_VER_6, NPC_IP_VER_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS_PL, 0xff, 0x0000, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_MPLS_PL, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_NSH, 0xff, NPC_NSH_NP_IP, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_NSH, 0xff, NPC_NSH_NP_IP6, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_NSH, 0xff, NPC_NSH_NP_ETH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_NSH, 0xff, NPC_NSH_NP_NSH, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU10_TU_NSH, 0xff, NPC_NSH_NP_MPLS, NPC_NSH_NP_MASK,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu11_cam_entries[] = {
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_IP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_IP6, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_ARP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_SBTAG, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_IP6, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, NPC_ETYPE_ARP, 0xffff,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_CTAG, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_IP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_IP6, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		NPC_ETYPE_ARP, 0xffff, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, NPC_ETYPE_QINQ, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_ETHER, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_PPP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_MPLS_IN_NSH, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU11_TU_3RD_NSH, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_TCP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_UDP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_SCTP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_ICMP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_IGMP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_ESP, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_AH, 0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_TCP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_UDP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_SCTP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_ICMP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_IGMP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_ESP, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, NPC_IPNH_AH, 0x00ff,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_4, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_ARP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_TCP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_UDP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_SCTP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_ICMP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_ICMP6 << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_ESP << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, NPC_IPNH_AH << 8, 0xff00,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, 0x0000, 0x0000,
+		NPC_IP_VER_6, NPC_IP_VER_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU12_TU_IP6, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu13_cam_entries[] = {
+	{
+		NPC_S_KPU13_TU_IP6_EXT, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu14_cam_entries[] = {
+	{
+		NPC_S_KPU14_TU_IP6_EXT, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_HTTP, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_HTTPS, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_PPTP, 0xffff,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, 0x0000, 0x0000,
+		NPC_TCP_DATA_OFFSET_5, NPC_TCP_DATA_OFFSET_MASK, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_HTTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_HTTPS, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, NPC_TCP_PORT_PPTP, 0xffff,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_TCP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_UDP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_SCTP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_ICMP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_IGMP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_ICMP6, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_ESP, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU15_TU_AH, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_NA, 0X00, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_cam kpu16_cam_entries[] = {
+	{
+		NPC_S_KPU16_TCP_DATA, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU16_HTTP_DATA, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU16_HTTPS_DATA, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU16_PPTP_DATA, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+	{
+		NPC_S_KPU16_UDP_DATA, 0xff, 0x0000, 0x0000,
+		0x0000, 0x0000, 0x0000, 0x0000,
+	},
+};
+
+static struct npc_kpu_profile_action kpu1_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU5_IP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU5_IP6, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_ARP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_RARP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_PTP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_FCOE, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU2_CTAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 20,
+		0, 0, NPC_S_KPU2_SBTAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU2_QINQ, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 10, 24,
+		0, 0, NPC_S_KPU2_ETAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_ETAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 16, 20, 24,
+		0, 0, NPC_S_KPU2_ITAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		2, 0, NPC_S_KPU4_MPLS, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		2, 0, NPC_S_KPU4_MPLS, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU4_NSH, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETHER_NSH, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_8023, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_8023, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU5_IP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU5_IP6, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_ARP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_RARP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_PTP, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU5_FCOE, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU2_CTAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 20,
+		0, 0, NPC_S_KPU2_SBTAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU2_QINQ, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_VLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 10, 24,
+		0, 0, NPC_S_KPU2_ETAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_ETAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 16, 20, 24,
+		0, 0, NPC_S_KPU2_ITAG, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		2, 0, NPC_S_KPU4_MPLS, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		2, 0, NPC_S_KPU4_MPLS, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		2, 0, NPC_S_KPU4_NSH, 14, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_PKI_NSH, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LA, NPC_EC_L2_K1, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LA, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu2_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_CTAG, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_CTAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_CTAG, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_STAG, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_STAG_STAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU3_STAG, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU3_CTAG, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_BTAG, NPC_F_BTAG_ITAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_STAG, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_CTAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_CTAG, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_QINQ, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_QINQ_QINQ_QINQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		1, 0, NPC_S_KPU4_NSH, 4, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_QINQ, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_PTP, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_FCOE, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 1, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_MPLS, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 2, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		1, 0, NPC_S_KPU4_NSH, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, 2, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU3_CTAG, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 16, 20, 24,
+		0, 0, NPC_S_KPU3_ITAG, 12, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_BTAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_STAG, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 0,
+		0, 0, NPC_S_KPU3_QINQ, 8, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_QINQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU3_STAG, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU3_CTAG, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETAG_ITAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LB, NPC_LT_LB_ETAG, NPC_F_ETYPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 18, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 18, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 18, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 18, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 26, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 22, 1,
+		NPC_LID_LB, NPC_LT_LB_ITAG, NPC_F_ITAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu3_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_RARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_PTP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_FCOE, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU4_NSH, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_RARP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_PTP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_FCOE, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU4_NSH, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_RARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU4_NSH, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_RARP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_PTP, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_FCOE, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU4_NSH, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_RARP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_PTP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_FCOE, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU4_MPLS, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU4_NSH, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU5_IP, 18, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU5_IP6, 18, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_ARP, 18, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU5_RARP, 18, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 26, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 26, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 26, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU5_IP, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU5_IP6, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU5_ARP, 22, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K3, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu4_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU5_MPLS_PL, 4, 1,
+		NPC_LID_LC, NPC_LT_LC_MPLS, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU5_MPLS_PL, 8, 1,
+		NPC_LID_LC, NPC_LT_LC_MPLS, NPC_F_MPLS_2_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU5_MPLS_PL, 12, 1,
+		NPC_LID_LC, NPC_LT_LC_MPLS, NPC_F_MPLS_3_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 4, 0,
+		0, 0, NPC_S_KPU5_MPLS, 12, 1,
+		NPC_LID_LC, NPC_LT_LC_MPLS, NPC_F_MPLS_4_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		7, 0, NPC_S_KPU12_TU_IP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NSH, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		7, 0, NPC_S_KPU12_TU_IP6, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NSH, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		6, 0, NPC_S_KPU11_TU_ETHER, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NSH, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU5_NSH, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NSH, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		4, 0, NPC_S_KPU9_TU_MPLS, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NSH, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_K4, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu5_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU8_TCP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 8, 10,
+		2, 0, NPC_S_KPU8_UDP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_SCTP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_ICMP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_IGMP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU8_ESP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU8_AH, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU8_GRE, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_IP_IN_IP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_6TO4, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		3, 0, NPC_S_KPU9_TU_MPLS, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_MPLS_IN_IP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_UNK_PROTO, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU8_TCP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 8, 10,
+		2, 0, NPC_S_KPU8_UDP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_SCTP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_ICMP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_IGMP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU8_ESP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU8_AH, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU8_GRE, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_IP_IN_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_6TO4_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		3, 0, NPC_S_KPU9_TU_MPLS, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_MPLS_IN_IP_HAS_OPTIONS,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, NPC_F_IP_UNK_PROTO_HAS_OPTIONS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP_VER, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_ARP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_RARP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_PTP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_FCOE, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU8_TCP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 8, 10,
+		2, 0, NPC_S_KPU8_UDP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_SCTP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_ICMP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_ICMP6, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_ESP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_AH, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU8_GRE, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, NPC_F_IP6_TUN_IP6, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		3, 0, NPC_S_KPU9_TU_MPLS, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, NPC_F_IP6_MPLS_IN_IP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU6_IP6_EXT, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, NPC_F_IP6_HAS_EXT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP6_VER, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		5, 0, NPC_S_KPU11_TU_ETHER, 8, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		5, 0, NPC_S_KPU11_TU_ETHER, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_MPLS_2MANY, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		5, 0, NPC_S_KPU11_TU_ETHER, 4, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		5, 0, NPC_S_KPU11_TU_ETHER, 0, 0,
+		NPC_LID_LB, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		6, 0, NPC_S_KPU12_TU_IP6, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		5, 0, NPC_S_KPU11_TU_ETHER, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		5, 0, NPC_S_KPU11_TU_3RD_NSH, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		3, 0, NPC_S_KPU9_TU_MPLS, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu6_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu7_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu8_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_HTTP_DATA, 20, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_HTTP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_HTTPS_DATA, 20, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_HTTPS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_PPTP_DATA, 20, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_PPTP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_TCP_DATA, 20, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_UNK_PORT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_HTTP_DATA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_HTTP_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_HTTPS_DATA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_HTTPS_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_PPTP_DATA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_PPTP_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_TCP_DATA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_TCP, NPC_F_TCP_UNK_PORT_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLAN, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLAN_NOVNI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LD, NPC_EC_VXLAN, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NSH, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NOVNI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NOVNI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NOVNI, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NOVNI_NSH, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NOVNI_MPLS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_VXLANGPE_NONP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_CRI_OPT, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM_CRI_OPT,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_CRI_OPT,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM_CRI_OPT,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM, 8, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_CRI_OPT,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GENEVE_OAM_CRI_OPT,
+		8, 0x3f, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GTP_GTPC, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GTP_GTPU_G_PDU, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_GTP_GTPU_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		7, 0, NPC_S_KPU16_UDP_DATA, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_UDP, NPC_F_UDP_UNK_PORT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_SCTP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_ICMP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_IGMP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_ICMP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_ESP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_AH, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		2, 0, NPC_S_KPU11_TU_ETHER, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_NVGRE, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LD, NPC_EC_NVGRE, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_KEY_SEQ,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 6, 10,
+		0, 0, NPC_S_KPU9_TU_MPLS_IN_GRE_VXLAN, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_MPLS, NPC_F_GRE_HAS_CSUM_KEY_SEQ,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_CSUM, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_CSUM_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_CSUM_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU9_TU_NSH, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE_NSH, NPC_F_GRE_HAS_CSUM_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_KEY, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		3, 0, NPC_S_KPU12_TU_IP6, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_CSUM_KEY_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_HAS_ROUTE, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_UNK_PROTO, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LD, NPC_EC_GRE, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU11_TU_PPP, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_VER1, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU11_TU_PPP, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_VER1_HAS_SEQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU11_TU_PPP, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_VER1_HAS_ACK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU11_TU_PPP, 16, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_VER1_HAS_SEQ_ACK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LD, NPC_LT_LD_GRE, NPC_F_GRE_VER1_UNK_PROTO, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LD, NPC_EC_GRE_VER1, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LD, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu9_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 4, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 8, 0,
+		NPC_LID_LD, NPC_LT_NA, NPC_F_MPLS_2_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 12, 0,
+		NPC_LID_LD, NPC_LT_NA, NPC_F_MPLS_3_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 4, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS, 12, 0,
+		NPC_LID_LD, NPC_LT_NA, NPC_F_MPLS_4_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 4, 1,
+		NPC_LID_LD, NPC_LT_LD_TU_MPLS, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 8, 1,
+		NPC_LID_LD, NPC_LT_LD_TU_MPLS, NPC_F_MPLS_2_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS_PL, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_TU_MPLS, NPC_F_MPLS_3_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 4, 0,
+		0, 0, NPC_S_KPU10_TU_MPLS, 12, 1,
+		NPC_LID_LD, NPC_LT_LD_TU_MPLS, NPC_F_MPLS_4_LABELS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		2, 0, NPC_S_KPU12_TU_IP, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		2, 0, NPC_S_KPU12_TU_IP6, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		1, 0, NPC_S_KPU11_TU_ETHER, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU10_TU_NSH, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		1, 0, NPC_S_KPU11_TU_MPLS_IN_NSH, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_LE, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu10_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP, 4, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP6, 4, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		0, 0, NPC_S_KPU11_TU_ETHER, 8, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		0, 0, NPC_S_KPU11_TU_ETHER, 4, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LB, NPC_EC_L2_MPLS_2MANY, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP6, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		0, 0, NPC_S_KPU11_TU_ETHER, 4, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		0, 0, NPC_S_KPU11_TU_ETHER, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		1, 0, NPC_S_KPU12_TU_IP6, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20,
+		0, 0, NPC_S_KPU11_TU_ETHER, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU11_TU_3RD_NSH, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU11_TU_MPLS_IN_NSH, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 1, 0x3f,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_LE, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LD, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu11_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 14, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 14, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 14, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_CTAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER,
+		NPC_F_TU_ETHER_STAG_CTAG_UNK, 0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_STAG_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 22, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ_CTAG, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER,
+		NPC_F_TU_ETHER_QINQ_CTAG_UNK, 0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 6, 0, 0,
+		0, 0, NPC_S_KPU12_TU_IP6, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU12_TU_ARP, 18, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_QINQ_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_ETHER, NPC_F_TU_ETHER_UNK, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_PPP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_MPLS_IN_NSH, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LE, NPC_LT_LE_TU_3RD_NSH, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LE, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LE, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu12_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU15_TU_TCP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU15_TU_UDP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_SCTP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ICMP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_IGMP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ESP, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_AH, 20, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_UNK_PROTO, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU15_TU_TCP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU15_TU_UDP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_SCTP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ICMP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_IGMP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ESP, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_AH, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, NPC_F_IP_HAS_OPTIONS, 0, 0xf,
+		0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP,
+		NPC_F_IP_UNK_PROTO_HAS_OPTIONS, 0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_LF, NPC_EC_IP_VER, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_ARP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 12, 0,
+		2, 0, NPC_S_KPU15_TU_TCP, 40, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		2, 0, NPC_S_KPU15_TU_UDP, 40, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_SCTP, 40, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ICMP, 40, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ICMP6, 40, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_ESP, 40, 1,
+		NPC_LID_LC, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		2, 0, NPC_S_KPU15_TU_AH, 40, 1,
+		NPC_LID_LC, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 2, 0, 0,
+		0, 0, NPC_S_KPU13_TU_IP6_EXT, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, NPC_F_IP6_HAS_EXT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LF, NPC_EC_IP6_VER, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LF, NPC_LT_LF_TU_IP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LF, NPC_EC_UNK, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LF, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu13_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu14_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu15_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_HTTP_DATA, 20, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_HTTP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_HTTPS_DATA, 20, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_HTTPS, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_PPTP_DATA, 20, 1,
+		NPC_LID_LD, NPC_LT_LG_TU_TCP, NPC_F_TCP_PPTP, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_TCP_DATA, 20, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_UNK_PORT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_HTTP_DATA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_HTTP_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_HTTPS_DATA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_HTTPS_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_PPTP_DATA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_PPTP_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_TCP_DATA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_TCP, NPC_F_TCP_UNK_PORT_HAS_OPTIONS,
+		12, 0xf0, 1, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 0, NPC_S_KPU16_UDP_DATA, 8, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_UDP, NPC_F_UDP_UNK_PORT, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_SCTP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_ICMP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_IGMP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_ICMP6, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_ESP, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LG, NPC_LT_LG_TU_AH, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_LG, NPC_EC_L4, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 0,
+		NPC_LID_LG, NPC_LT_NA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile_action kpu16_action_entries[] = {
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LH, NPC_LT_LH_TCP_DATA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LH, NPC_LT_LH_HTTP_DATA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LH, NPC_LT_LH_HTTPS_DATA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LH, NPC_LT_LH_PPTP_DATA, 0, 0, 0,
+		0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR, 0, 0, 0,
+		0, 1, NPC_S_NA, 0, 1,
+		NPC_LID_LH, NPC_LT_LH_UDP_DATA, 0, 0, 0,
+		0, 0,
+	},
+};
+
+static struct npc_kpu_profile npc_kpu_profiles[] = {
+	{
+		ARRAY_SIZE(kpu1_cam_entries),
+		ARRAY_SIZE(kpu1_action_entries),
+		&kpu1_cam_entries[0],
+		&kpu1_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu2_cam_entries),
+		ARRAY_SIZE(kpu2_action_entries),
+		&kpu2_cam_entries[0],
+		&kpu2_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu3_cam_entries),
+		ARRAY_SIZE(kpu3_action_entries),
+		&kpu3_cam_entries[0],
+		&kpu3_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu4_cam_entries),
+		ARRAY_SIZE(kpu4_action_entries),
+		&kpu4_cam_entries[0],
+		&kpu4_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu5_cam_entries),
+		ARRAY_SIZE(kpu5_action_entries),
+		&kpu5_cam_entries[0],
+		&kpu5_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu6_cam_entries),
+		ARRAY_SIZE(kpu6_action_entries),
+		&kpu6_cam_entries[0],
+		&kpu6_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu7_cam_entries),
+		ARRAY_SIZE(kpu7_action_entries),
+		&kpu7_cam_entries[0],
+		&kpu7_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu8_cam_entries),
+		ARRAY_SIZE(kpu8_action_entries),
+		&kpu8_cam_entries[0],
+		&kpu8_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu9_cam_entries),
+		ARRAY_SIZE(kpu9_action_entries),
+		&kpu9_cam_entries[0],
+		&kpu9_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu10_cam_entries),
+		ARRAY_SIZE(kpu10_action_entries),
+		&kpu10_cam_entries[0],
+		&kpu10_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu11_cam_entries),
+		ARRAY_SIZE(kpu11_action_entries),
+		&kpu11_cam_entries[0],
+		&kpu11_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu12_cam_entries),
+		ARRAY_SIZE(kpu12_action_entries),
+		&kpu12_cam_entries[0],
+		&kpu12_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu13_cam_entries),
+		ARRAY_SIZE(kpu13_action_entries),
+		&kpu13_cam_entries[0],
+		&kpu13_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu14_cam_entries),
+		ARRAY_SIZE(kpu14_action_entries),
+		&kpu14_cam_entries[0],
+		&kpu14_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu15_cam_entries),
+		ARRAY_SIZE(kpu15_action_entries),
+		&kpu15_cam_entries[0],
+		&kpu15_action_entries[0],
+	},
+	{
+		ARRAY_SIZE(kpu16_cam_entries),
+		ARRAY_SIZE(kpu16_action_entries),
+		&kpu16_cam_entries[0],
+		&kpu16_action_entries[0],
+	},
+};
+
+#endif /* NPC_PROFILE_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
new file mode 100644
index 0000000..e581091
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c

@@ -0,0 +1,2532 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include "cgx.h"
+#include "rvu.h"
+#include "rvu_reg.h"
+
+#define DRV_NAME	"octeontx2-af"
+#define DRV_STRING      "Marvell OcteonTX2 RVU Admin Function Driver"
+#define DRV_VERSION	"1.0"
+
+static int rvu_get_hwvf(struct rvu *rvu, int pcifunc);
+
+static void rvu_set_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
+				struct rvu_block *block, int lf);
+static void rvu_clear_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
+				  struct rvu_block *block, int lf);
+static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc);
+
+static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+			 int type, int num,
+			 void (mbox_handler)(struct work_struct *),
+			 void (mbox_up_handler)(struct work_struct *));
+enum {
+	TYPE_AFVF,
+	TYPE_AFPF,
+};
+
+/* Supported devices */
+static const struct pci_device_id rvu_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_AF) },
+	{ 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION(DRV_STRING);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, rvu_id_table);
+
+static char *mkex_profile; /* MKEX profile name */
+module_param(mkex_profile, charp, 0000);
+MODULE_PARM_DESC(mkex_profile, "MKEX profile name string");
+
+/* Poll a RVU block's register 'offset', for a 'zero'
+ * or 'nonzero' at bits specified by 'mask'
+ */
+int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero)
+{
+	unsigned long timeout = jiffies + usecs_to_jiffies(100);
+	void __iomem *reg;
+	u64 reg_val;
+
+	reg = rvu->afreg_base + ((block << 28) | offset);
+	while (time_before(jiffies, timeout)) {
+		reg_val = readq(reg);
+		if (zero && !(reg_val & mask))
+			return 0;
+		if (!zero && (reg_val & mask))
+			return 0;
+		usleep_range(1, 5);
+		timeout--;
+	}
+	return -EBUSY;
+}
+
+int rvu_alloc_rsrc(struct rsrc_bmap *rsrc)
+{
+	int id;
+
+	if (!rsrc->bmap)
+		return -EINVAL;
+
+	id = find_first_zero_bit(rsrc->bmap, rsrc->max);
+	if (id >= rsrc->max)
+		return -ENOSPC;
+
+	__set_bit(id, rsrc->bmap);
+
+	return id;
+}
+
+int rvu_alloc_rsrc_contig(struct rsrc_bmap *rsrc, int nrsrc)
+{
+	int start;
+
+	if (!rsrc->bmap)
+		return -EINVAL;
+
+	start = bitmap_find_next_zero_area(rsrc->bmap, rsrc->max, 0, nrsrc, 0);
+	if (start >= rsrc->max)
+		return -ENOSPC;
+
+	bitmap_set(rsrc->bmap, start, nrsrc);
+	return start;
+}
+
+static void rvu_free_rsrc_contig(struct rsrc_bmap *rsrc, int nrsrc, int start)
+{
+	if (!rsrc->bmap)
+		return;
+	if (start >= rsrc->max)
+		return;
+
+	bitmap_clear(rsrc->bmap, start, nrsrc);
+}
+
+bool rvu_rsrc_check_contig(struct rsrc_bmap *rsrc, int nrsrc)
+{
+	int start;
+
+	if (!rsrc->bmap)
+		return false;
+
+	start = bitmap_find_next_zero_area(rsrc->bmap, rsrc->max, 0, nrsrc, 0);
+	if (start >= rsrc->max)
+		return false;
+
+	return true;
+}
+
+void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id)
+{
+	if (!rsrc->bmap)
+		return;
+
+	__clear_bit(id, rsrc->bmap);
+}
+
+int rvu_rsrc_free_count(struct rsrc_bmap *rsrc)
+{
+	int used;
+
+	if (!rsrc->bmap)
+		return 0;
+
+	used = bitmap_weight(rsrc->bmap, rsrc->max);
+	return (rsrc->max - used);
+}
+
+int rvu_alloc_bitmap(struct rsrc_bmap *rsrc)
+{
+	rsrc->bmap = kcalloc(BITS_TO_LONGS(rsrc->max),
+			     sizeof(long), GFP_KERNEL);
+	if (!rsrc->bmap)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Get block LF's HW index from a PF_FUNC's block slot number */
+int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot)
+{
+	u16 match = 0;
+	int lf;
+
+	mutex_lock(&rvu->rsrc_lock);
+	for (lf = 0; lf < block->lf.max; lf++) {
+		if (block->fn_map[lf] == pcifunc) {
+			if (slot == match) {
+				mutex_unlock(&rvu->rsrc_lock);
+				return lf;
+			}
+			match++;
+		}
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+	return -ENODEV;
+}
+
+/* Convert BLOCK_TYPE_E to a BLOCK_ADDR_E.
+ * Some silicon variants of OcteonTX2 supports
+ * multiple blocks of same type.
+ *
+ * @pcifunc has to be zero when no LF is yet attached.
+ */
+int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc)
+{
+	int devnum, blkaddr = -ENODEV;
+	u64 cfg, reg;
+	bool is_pf;
+
+	switch (blktype) {
+	case BLKTYPE_NPC:
+		blkaddr = BLKADDR_NPC;
+		goto exit;
+	case BLKTYPE_NPA:
+		blkaddr = BLKADDR_NPA;
+		goto exit;
+	case BLKTYPE_NIX:
+		/* For now assume NIX0 */
+		if (!pcifunc) {
+			blkaddr = BLKADDR_NIX0;
+			goto exit;
+		}
+		break;
+	case BLKTYPE_SSO:
+		blkaddr = BLKADDR_SSO;
+		goto exit;
+	case BLKTYPE_SSOW:
+		blkaddr = BLKADDR_SSOW;
+		goto exit;
+	case BLKTYPE_TIM:
+		blkaddr = BLKADDR_TIM;
+		goto exit;
+	case BLKTYPE_CPT:
+		/* For now assume CPT0 */
+		if (!pcifunc) {
+			blkaddr = BLKADDR_CPT0;
+			goto exit;
+		}
+		break;
+	}
+
+	/* Check if this is a RVU PF or VF */
+	if (pcifunc & RVU_PFVF_FUNC_MASK) {
+		is_pf = false;
+		devnum = rvu_get_hwvf(rvu, pcifunc);
+	} else {
+		is_pf = true;
+		devnum = rvu_get_pf(pcifunc);
+	}
+
+	/* Check if the 'pcifunc' has a NIX LF from 'BLKADDR_NIX0' */
+	if (blktype == BLKTYPE_NIX) {
+		reg = is_pf ? RVU_PRIV_PFX_NIX0_CFG : RVU_PRIV_HWVFX_NIX0_CFG;
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16));
+		if (cfg)
+			blkaddr = BLKADDR_NIX0;
+	}
+
+	/* Check if the 'pcifunc' has a CPT LF from 'BLKADDR_CPT0' */
+	if (blktype == BLKTYPE_CPT) {
+		reg = is_pf ? RVU_PRIV_PFX_CPT0_CFG : RVU_PRIV_HWVFX_CPT0_CFG;
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16));
+		if (cfg)
+			blkaddr = BLKADDR_CPT0;
+	}
+
+exit:
+	if (is_block_implemented(rvu->hw, blkaddr))
+		return blkaddr;
+	return -ENODEV;
+}
+
+static void rvu_update_rsrc_map(struct rvu *rvu, struct rvu_pfvf *pfvf,
+				struct rvu_block *block, u16 pcifunc,
+				u16 lf, bool attach)
+{
+	int devnum, num_lfs = 0;
+	bool is_pf;
+	u64 reg;
+
+	if (lf >= block->lf.max) {
+		dev_err(&rvu->pdev->dev,
+			"%s: FATAL: LF %d is >= %s's max lfs i.e %d\n",
+			__func__, lf, block->name, block->lf.max);
+		return;
+	}
+
+	/* Check if this is for a RVU PF or VF */
+	if (pcifunc & RVU_PFVF_FUNC_MASK) {
+		is_pf = false;
+		devnum = rvu_get_hwvf(rvu, pcifunc);
+	} else {
+		is_pf = true;
+		devnum = rvu_get_pf(pcifunc);
+	}
+
+	block->fn_map[lf] = attach ? pcifunc : 0;
+
+	switch (block->type) {
+	case BLKTYPE_NPA:
+		pfvf->npalf = attach ? true : false;
+		num_lfs = pfvf->npalf;
+		break;
+	case BLKTYPE_NIX:
+		pfvf->nixlf = attach ? true : false;
+		num_lfs = pfvf->nixlf;
+		break;
+	case BLKTYPE_SSO:
+		attach ? pfvf->sso++ : pfvf->sso--;
+		num_lfs = pfvf->sso;
+		break;
+	case BLKTYPE_SSOW:
+		attach ? pfvf->ssow++ : pfvf->ssow--;
+		num_lfs = pfvf->ssow;
+		break;
+	case BLKTYPE_TIM:
+		attach ? pfvf->timlfs++ : pfvf->timlfs--;
+		num_lfs = pfvf->timlfs;
+		break;
+	case BLKTYPE_CPT:
+		attach ? pfvf->cptlfs++ : pfvf->cptlfs--;
+		num_lfs = pfvf->cptlfs;
+		break;
+	}
+
+	reg = is_pf ? block->pf_lfcnt_reg : block->vf_lfcnt_reg;
+	rvu_write64(rvu, BLKADDR_RVUM, reg | (devnum << 16), num_lfs);
+}
+
+inline int rvu_get_pf(u16 pcifunc)
+{
+	return (pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
+}
+
+void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf)
+{
+	u64 cfg;
+
+	/* Get numVFs attached to this PF and first HWVF */
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+	*numvfs = (cfg >> 12) & 0xFF;
+	*hwvf = cfg & 0xFFF;
+}
+
+static int rvu_get_hwvf(struct rvu *rvu, int pcifunc)
+{
+	int pf, func;
+	u64 cfg;
+
+	pf = rvu_get_pf(pcifunc);
+	func = pcifunc & RVU_PFVF_FUNC_MASK;
+
+	/* Get first HWVF attached to this PF */
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+
+	return ((cfg & 0xFFF) + func - 1);
+}
+
+struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc)
+{
+	/* Check if it is a PF or VF */
+	if (pcifunc & RVU_PFVF_FUNC_MASK)
+		return &rvu->hwvf[rvu_get_hwvf(rvu, pcifunc)];
+	else
+		return &rvu->pf[rvu_get_pf(pcifunc)];
+}
+
+static bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc)
+{
+	int pf, vf, nvfs;
+	u64 cfg;
+
+	pf = rvu_get_pf(pcifunc);
+	if (pf >= rvu->hw->total_pfs)
+		return false;
+
+	if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+		return true;
+
+	/* Check if VF is within number of VFs attached to this PF */
+	vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+	nvfs = (cfg >> 12) & 0xFF;
+	if (vf >= nvfs)
+		return false;
+
+	return true;
+}
+
+bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr)
+{
+	struct rvu_block *block;
+
+	if (blkaddr < BLKADDR_RVUM || blkaddr >= BLK_COUNT)
+		return false;
+
+	block = &hw->block[blkaddr];
+	return block->implemented;
+}
+
+static void rvu_check_block_implemented(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkid;
+	u64 cfg;
+
+	/* For each block check if 'implemented' bit is set */
+	for (blkid = 0; blkid < BLK_COUNT; blkid++) {
+		block = &hw->block[blkid];
+		cfg = rvupf_read64(rvu, RVU_PF_BLOCK_ADDRX_DISC(blkid));
+		if (cfg & BIT_ULL(11))
+			block->implemented = true;
+	}
+}
+
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf)
+{
+	int err;
+
+	if (!block->implemented)
+		return 0;
+
+	rvu_write64(rvu, block->addr, block->lfreset_reg, lf | BIT_ULL(12));
+	err = rvu_poll_reg(rvu, block->addr, block->lfreset_reg, BIT_ULL(12),
+			   true);
+	return err;
+}
+
+static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
+{
+	struct rvu_block *block = &rvu->hw->block[blkaddr];
+
+	if (!block->implemented)
+		return;
+
+	rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
+	rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+}
+
+static void rvu_reset_all_blocks(struct rvu *rvu)
+{
+	/* Do a HW reset of all RVU blocks */
+	rvu_block_reset(rvu, BLKADDR_NPA, NPA_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_NIX0, NIX_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_NPC, NPC_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_SSO, SSO_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_TIM, TIM_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_CPT0, CPT_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_NDC0, NDC_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_NDC1, NDC_AF_BLK_RST);
+	rvu_block_reset(rvu, BLKADDR_NDC2, NDC_AF_BLK_RST);
+}
+
+static void rvu_scan_block(struct rvu *rvu, struct rvu_block *block)
+{
+	struct rvu_pfvf *pfvf;
+	u64 cfg;
+	int lf;
+
+	for (lf = 0; lf < block->lf.max; lf++) {
+		cfg = rvu_read64(rvu, block->addr,
+				 block->lfcfg_reg | (lf << block->lfshift));
+		if (!(cfg & BIT_ULL(63)))
+			continue;
+
+		/* Set this resource as being used */
+		__set_bit(lf, block->lf.bmap);
+
+		/* Get, to whom this LF is attached */
+		pfvf = rvu_get_pfvf(rvu, (cfg >> 8) & 0xFFFF);
+		rvu_update_rsrc_map(rvu, pfvf, block,
+				    (cfg >> 8) & 0xFFFF, lf, true);
+
+		/* Set start MSIX vector for this LF within this PF/VF */
+		rvu_set_msix_offset(rvu, pfvf, block, lf);
+	}
+}
+
+static void rvu_check_min_msix_vec(struct rvu *rvu, int nvecs, int pf, int vf)
+{
+	int min_vecs;
+
+	if (!vf)
+		goto check_pf;
+
+	if (!nvecs) {
+		dev_warn(rvu->dev,
+			 "PF%d:VF%d is configured with zero msix vectors, %d\n",
+			 pf, vf - 1, nvecs);
+	}
+	return;
+
+check_pf:
+	if (pf == 0)
+		min_vecs = RVU_AF_INT_VEC_CNT + RVU_PF_INT_VEC_CNT;
+	else
+		min_vecs = RVU_PF_INT_VEC_CNT;
+
+	if (!(nvecs < min_vecs))
+		return;
+	dev_warn(rvu->dev,
+		 "PF%d is configured with too few vectors, %d, min is %d\n",
+		 pf, nvecs, min_vecs);
+}
+
+static int rvu_setup_msix_resources(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	int pf, vf, numvfs, hwvf, err;
+	int nvecs, offset, max_msix;
+	struct rvu_pfvf *pfvf;
+	u64 cfg, phy_addr;
+	dma_addr_t iova;
+
+	for (pf = 0; pf < hw->total_pfs; pf++) {
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+		/* If PF is not enabled, nothing to do */
+		if (!((cfg >> 20) & 0x01))
+			continue;
+
+		rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
+
+		pfvf = &rvu->pf[pf];
+		/* Get num of MSIX vectors attached to this PF */
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_MSIX_CFG(pf));
+		pfvf->msix.max = ((cfg >> 32) & 0xFFF) + 1;
+		rvu_check_min_msix_vec(rvu, pfvf->msix.max, pf, 0);
+
+		/* Alloc msix bitmap for this PF */
+		err = rvu_alloc_bitmap(&pfvf->msix);
+		if (err)
+			return err;
+
+		/* Allocate memory for MSIX vector to RVU block LF mapping */
+		pfvf->msix_lfmap = devm_kcalloc(rvu->dev, pfvf->msix.max,
+						sizeof(u16), GFP_KERNEL);
+		if (!pfvf->msix_lfmap)
+			return -ENOMEM;
+
+		/* For PF0 (AF) firmware will set msix vector offsets for
+		 * AF, block AF and PF0_INT vectors, so jump to VFs.
+		 */
+		if (!pf)
+			goto setup_vfmsix;
+
+		/* Set MSIX offset for PF's 'RVU_PF_INT_VEC' vectors.
+		 * These are allocated on driver init and never freed,
+		 * so no need to set 'msix_lfmap' for these.
+		 */
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_INT_CFG(pf));
+		nvecs = (cfg >> 12) & 0xFF;
+		cfg &= ~0x7FFULL;
+		offset = rvu_alloc_rsrc_contig(&pfvf->msix, nvecs);
+		rvu_write64(rvu, BLKADDR_RVUM,
+			    RVU_PRIV_PFX_INT_CFG(pf), cfg | offset);
+setup_vfmsix:
+		/* Alloc msix bitmap for VFs */
+		for (vf = 0; vf < numvfs; vf++) {
+			pfvf =  &rvu->hwvf[hwvf + vf];
+			/* Get num of MSIX vectors attached to this VF */
+			cfg = rvu_read64(rvu, BLKADDR_RVUM,
+					 RVU_PRIV_PFX_MSIX_CFG(pf));
+			pfvf->msix.max = (cfg & 0xFFF) + 1;
+			rvu_check_min_msix_vec(rvu, pfvf->msix.max, pf, vf + 1);
+
+			/* Alloc msix bitmap for this VF */
+			err = rvu_alloc_bitmap(&pfvf->msix);
+			if (err)
+				return err;
+
+			pfvf->msix_lfmap =
+				devm_kcalloc(rvu->dev, pfvf->msix.max,
+					     sizeof(u16), GFP_KERNEL);
+			if (!pfvf->msix_lfmap)
+				return -ENOMEM;
+
+			/* Set MSIX offset for HWVF's 'RVU_VF_INT_VEC' vectors.
+			 * These are allocated on driver init and never freed,
+			 * so no need to set 'msix_lfmap' for these.
+			 */
+			cfg = rvu_read64(rvu, BLKADDR_RVUM,
+					 RVU_PRIV_HWVFX_INT_CFG(hwvf + vf));
+			nvecs = (cfg >> 12) & 0xFF;
+			cfg &= ~0x7FFULL;
+			offset = rvu_alloc_rsrc_contig(&pfvf->msix, nvecs);
+			rvu_write64(rvu, BLKADDR_RVUM,
+				    RVU_PRIV_HWVFX_INT_CFG(hwvf + vf),
+				    cfg | offset);
+		}
+	}
+
+	/* HW interprets RVU_AF_MSIXTR_BASE address as an IOVA, hence
+	 * create a IOMMU mapping for the physcial address configured by
+	 * firmware and reconfig RVU_AF_MSIXTR_BASE with IOVA.
+	 */
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_CONST);
+	max_msix = cfg & 0xFFFFF;
+	phy_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_MSIXTR_BASE);
+	iova = dma_map_resource(rvu->dev, phy_addr,
+				max_msix * PCI_MSIX_ENTRY_SIZE,
+				DMA_BIDIRECTIONAL, 0);
+
+	if (dma_mapping_error(rvu->dev, iova))
+		return -ENOMEM;
+
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_MSIXTR_BASE, (u64)iova);
+	rvu->msix_base_iova = iova;
+
+	return 0;
+}
+
+static void rvu_free_hw_resources(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	struct rvu_pfvf  *pfvf;
+	int id, max_msix;
+	u64 cfg;
+
+	rvu_npa_freemem(rvu);
+	rvu_npc_freemem(rvu);
+	rvu_nix_freemem(rvu);
+
+	/* Free block LF bitmaps */
+	for (id = 0; id < BLK_COUNT; id++) {
+		block = &hw->block[id];
+		kfree(block->lf.bmap);
+	}
+
+	/* Free MSIX bitmaps */
+	for (id = 0; id < hw->total_pfs; id++) {
+		pfvf = &rvu->pf[id];
+		kfree(pfvf->msix.bmap);
+	}
+
+	for (id = 0; id < hw->total_vfs; id++) {
+		pfvf = &rvu->hwvf[id];
+		kfree(pfvf->msix.bmap);
+	}
+
+	/* Unmap MSIX vector base IOVA mapping */
+	if (!rvu->msix_base_iova)
+		return;
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_CONST);
+	max_msix = cfg & 0xFFFFF;
+	dma_unmap_resource(rvu->dev, rvu->msix_base_iova,
+			   max_msix * PCI_MSIX_ENTRY_SIZE,
+			   DMA_BIDIRECTIONAL, 0);
+
+	mutex_destroy(&rvu->rsrc_lock);
+}
+
+static int rvu_setup_hw_resources(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkid, err;
+	u64 cfg;
+
+	/* Get HW supported max RVU PF & VF count */
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_CONST);
+	hw->total_pfs = (cfg >> 32) & 0xFF;
+	hw->total_vfs = (cfg >> 20) & 0xFFF;
+	hw->max_vfs_per_pf = (cfg >> 40) & 0xFF;
+
+	/* Init NPA LF's bitmap */
+	block = &hw->block[BLKADDR_NPA];
+	if (!block->implemented)
+		goto nix;
+	cfg = rvu_read64(rvu, BLKADDR_NPA, NPA_AF_CONST);
+	block->lf.max = (cfg >> 16) & 0xFFF;
+	block->addr = BLKADDR_NPA;
+	block->type = BLKTYPE_NPA;
+	block->lfshift = 8;
+	block->lookup_reg = NPA_AF_RVU_LF_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_NPA_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_NPA_CFG;
+	block->lfcfg_reg = NPA_PRIV_LFX_CFG;
+	block->msixcfg_reg = NPA_PRIV_LFX_INT_CFG;
+	block->lfreset_reg = NPA_AF_LF_RST;
+	sprintf(block->name, "NPA");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+nix:
+	/* Init NIX LF's bitmap */
+	block = &hw->block[BLKADDR_NIX0];
+	if (!block->implemented)
+		goto sso;
+	cfg = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_CONST2);
+	block->lf.max = cfg & 0xFFF;
+	block->addr = BLKADDR_NIX0;
+	block->type = BLKTYPE_NIX;
+	block->lfshift = 8;
+	block->lookup_reg = NIX_AF_RVU_LF_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_NIX0_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_NIX0_CFG;
+	block->lfcfg_reg = NIX_PRIV_LFX_CFG;
+	block->msixcfg_reg = NIX_PRIV_LFX_INT_CFG;
+	block->lfreset_reg = NIX_AF_LF_RST;
+	sprintf(block->name, "NIX");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+sso:
+	/* Init SSO group's bitmap */
+	block = &hw->block[BLKADDR_SSO];
+	if (!block->implemented)
+		goto ssow;
+	cfg = rvu_read64(rvu, BLKADDR_SSO, SSO_AF_CONST);
+	block->lf.max = cfg & 0xFFFF;
+	block->addr = BLKADDR_SSO;
+	block->type = BLKTYPE_SSO;
+	block->multislot = true;
+	block->lfshift = 3;
+	block->lookup_reg = SSO_AF_RVU_LF_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_SSO_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_SSO_CFG;
+	block->lfcfg_reg = SSO_PRIV_LFX_HWGRP_CFG;
+	block->msixcfg_reg = SSO_PRIV_LFX_HWGRP_INT_CFG;
+	block->lfreset_reg = SSO_AF_LF_HWGRP_RST;
+	sprintf(block->name, "SSO GROUP");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+ssow:
+	/* Init SSO workslot's bitmap */
+	block = &hw->block[BLKADDR_SSOW];
+	if (!block->implemented)
+		goto tim;
+	block->lf.max = (cfg >> 56) & 0xFF;
+	block->addr = BLKADDR_SSOW;
+	block->type = BLKTYPE_SSOW;
+	block->multislot = true;
+	block->lfshift = 3;
+	block->lookup_reg = SSOW_AF_RVU_LF_HWS_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_SSOW_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_SSOW_CFG;
+	block->lfcfg_reg = SSOW_PRIV_LFX_HWS_CFG;
+	block->msixcfg_reg = SSOW_PRIV_LFX_HWS_INT_CFG;
+	block->lfreset_reg = SSOW_AF_LF_HWS_RST;
+	sprintf(block->name, "SSOWS");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+tim:
+	/* Init TIM LF's bitmap */
+	block = &hw->block[BLKADDR_TIM];
+	if (!block->implemented)
+		goto cpt;
+	cfg = rvu_read64(rvu, BLKADDR_TIM, TIM_AF_CONST);
+	block->lf.max = cfg & 0xFFFF;
+	block->addr = BLKADDR_TIM;
+	block->type = BLKTYPE_TIM;
+	block->multislot = true;
+	block->lfshift = 3;
+	block->lookup_reg = TIM_AF_RVU_LF_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_TIM_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_TIM_CFG;
+	block->lfcfg_reg = TIM_PRIV_LFX_CFG;
+	block->msixcfg_reg = TIM_PRIV_LFX_INT_CFG;
+	block->lfreset_reg = TIM_AF_LF_RST;
+	sprintf(block->name, "TIM");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+cpt:
+	/* Init CPT LF's bitmap */
+	block = &hw->block[BLKADDR_CPT0];
+	if (!block->implemented)
+		goto init;
+	cfg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS0);
+	block->lf.max = cfg & 0xFF;
+	block->addr = BLKADDR_CPT0;
+	block->type = BLKTYPE_CPT;
+	block->multislot = true;
+	block->lfshift = 3;
+	block->lookup_reg = CPT_AF_RVU_LF_CFG_DEBUG;
+	block->pf_lfcnt_reg = RVU_PRIV_PFX_CPT0_CFG;
+	block->vf_lfcnt_reg = RVU_PRIV_HWVFX_CPT0_CFG;
+	block->lfcfg_reg = CPT_PRIV_LFX_CFG;
+	block->msixcfg_reg = CPT_PRIV_LFX_INT_CFG;
+	block->lfreset_reg = CPT_AF_LF_RST;
+	sprintf(block->name, "CPT");
+	err = rvu_alloc_bitmap(&block->lf);
+	if (err)
+		return err;
+
+init:
+	/* Allocate memory for PFVF data */
+	rvu->pf = devm_kcalloc(rvu->dev, hw->total_pfs,
+			       sizeof(struct rvu_pfvf), GFP_KERNEL);
+	if (!rvu->pf)
+		return -ENOMEM;
+
+	rvu->hwvf = devm_kcalloc(rvu->dev, hw->total_vfs,
+				 sizeof(struct rvu_pfvf), GFP_KERNEL);
+	if (!rvu->hwvf)
+		return -ENOMEM;
+
+	mutex_init(&rvu->rsrc_lock);
+
+	err = rvu_setup_msix_resources(rvu);
+	if (err)
+		return err;
+
+	for (blkid = 0; blkid < BLK_COUNT; blkid++) {
+		block = &hw->block[blkid];
+		if (!block->lf.bmap)
+			continue;
+
+		/* Allocate memory for block LF/slot to pcifunc mapping info */
+		block->fn_map = devm_kcalloc(rvu->dev, block->lf.max,
+					     sizeof(u16), GFP_KERNEL);
+		if (!block->fn_map)
+			return -ENOMEM;
+
+		/* Scan all blocks to check if low level firmware has
+		 * already provisioned any of the resources to a PF/VF.
+		 */
+		rvu_scan_block(rvu, block);
+	}
+
+	err = rvu_npc_init(rvu);
+	if (err)
+		goto exit;
+
+	err = rvu_cgx_init(rvu);
+	if (err)
+		goto exit;
+
+	err = rvu_npa_init(rvu);
+	if (err)
+		goto cgx_err;
+
+	err = rvu_nix_init(rvu);
+	if (err)
+		goto cgx_err;
+
+	return 0;
+
+cgx_err:
+	rvu_cgx_exit(rvu);
+exit:
+	return err;
+}
+
+/* NPA and NIX admin queue APIs */
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq)
+{
+	if (!aq)
+		return;
+
+	qmem_free(rvu->dev, aq->inst);
+	qmem_free(rvu->dev, aq->res);
+	devm_kfree(rvu->dev, aq);
+}
+
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+		 int qsize, int inst_size, int res_size)
+{
+	struct admin_queue *aq;
+	int err;
+
+	*ad_queue = devm_kzalloc(rvu->dev, sizeof(*aq), GFP_KERNEL);
+	if (!*ad_queue)
+		return -ENOMEM;
+	aq = *ad_queue;
+
+	/* Alloc memory for instructions i.e AQ */
+	err = qmem_alloc(rvu->dev, &aq->inst, qsize, inst_size);
+	if (err) {
+		devm_kfree(rvu->dev, aq);
+		return err;
+	}
+
+	/* Alloc memory for results */
+	err = qmem_alloc(rvu->dev, &aq->res, qsize, res_size);
+	if (err) {
+		rvu_aq_free(rvu, aq);
+		return err;
+	}
+
+	spin_lock_init(&aq->lock);
+	return 0;
+}
+
+static int rvu_mbox_handler_ready(struct rvu *rvu, struct msg_req *req,
+				  struct ready_msg_rsp *rsp)
+{
+	return 0;
+}
+
+/* Get current count of a RVU block's LF/slots
+ * provisioned to a given RVU func.
+ */
+static u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blktype)
+{
+	switch (blktype) {
+	case BLKTYPE_NPA:
+		return pfvf->npalf ? 1 : 0;
+	case BLKTYPE_NIX:
+		return pfvf->nixlf ? 1 : 0;
+	case BLKTYPE_SSO:
+		return pfvf->sso;
+	case BLKTYPE_SSOW:
+		return pfvf->ssow;
+	case BLKTYPE_TIM:
+		return pfvf->timlfs;
+	case BLKTYPE_CPT:
+		return pfvf->cptlfs;
+	}
+	return 0;
+}
+
+bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype)
+{
+	struct rvu_pfvf *pfvf;
+
+	if (!is_pf_func_valid(rvu, pcifunc))
+		return false;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+
+	/* Check if this PFFUNC has a LF of type blktype attached */
+	if (!rvu_get_rsrc_mapcount(pfvf, blktype))
+		return false;
+
+	return true;
+}
+
+static int rvu_lookup_rsrc(struct rvu *rvu, struct rvu_block *block,
+			   int pcifunc, int slot)
+{
+	u64 val;
+
+	val = ((u64)pcifunc << 24) | (slot << 16) | (1ULL << 13);
+	rvu_write64(rvu, block->addr, block->lookup_reg, val);
+	/* Wait for the lookup to finish */
+	/* TODO: put some timeout here */
+	while (rvu_read64(rvu, block->addr, block->lookup_reg) & (1ULL << 13))
+		;
+
+	val = rvu_read64(rvu, block->addr, block->lookup_reg);
+
+	/* Check LF valid bit */
+	if (!(val & (1ULL << 12)))
+		return -1;
+
+	return (val & 0xFFF);
+}
+
+static void rvu_detach_block(struct rvu *rvu, int pcifunc, int blktype)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int slot, lf, num_lfs;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, blktype, pcifunc);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+
+	num_lfs = rvu_get_rsrc_mapcount(pfvf, block->type);
+	if (!num_lfs)
+		return;
+
+	for (slot = 0; slot < num_lfs; slot++) {
+		lf = rvu_lookup_rsrc(rvu, block, pcifunc, slot);
+		if (lf < 0) /* This should never happen */
+			continue;
+
+		/* Disable the LF */
+		rvu_write64(rvu, blkaddr, block->lfcfg_reg |
+			    (lf << block->lfshift), 0x00ULL);
+
+		/* Update SW maintained mapping info as well */
+		rvu_update_rsrc_map(rvu, pfvf, block,
+				    pcifunc, lf, false);
+
+		/* Free the resource */
+		rvu_free_rsrc(&block->lf, lf);
+
+		/* Clear MSIX vector offset for this LF */
+		rvu_clear_msix_offset(rvu, pfvf, block, lf);
+	}
+}
+
+static int rvu_detach_rsrcs(struct rvu *rvu, struct rsrc_detach *detach,
+			    u16 pcifunc)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	bool detach_all = true;
+	struct rvu_block *block;
+	int blkid;
+
+	mutex_lock(&rvu->rsrc_lock);
+
+	/* Check for partial resource detach */
+	if (detach && detach->partial)
+		detach_all = false;
+
+	/* Check for RVU block's LFs attached to this func,
+	 * if so, detach them.
+	 */
+	for (blkid = 0; blkid < BLK_COUNT; blkid++) {
+		block = &hw->block[blkid];
+		if (!block->lf.bmap)
+			continue;
+		if (!detach_all && detach) {
+			if (blkid == BLKADDR_NPA && !detach->npalf)
+				continue;
+			else if ((blkid == BLKADDR_NIX0) && !detach->nixlf)
+				continue;
+			else if ((blkid == BLKADDR_SSO) && !detach->sso)
+				continue;
+			else if ((blkid == BLKADDR_SSOW) && !detach->ssow)
+				continue;
+			else if ((blkid == BLKADDR_TIM) && !detach->timlfs)
+				continue;
+			else if ((blkid == BLKADDR_CPT0) && !detach->cptlfs)
+				continue;
+		}
+		rvu_detach_block(rvu, pcifunc, block->type);
+	}
+
+	mutex_unlock(&rvu->rsrc_lock);
+	return 0;
+}
+
+static int rvu_mbox_handler_detach_resources(struct rvu *rvu,
+					     struct rsrc_detach *detach,
+					     struct msg_rsp *rsp)
+{
+	return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc);
+}
+
+static void rvu_attach_block(struct rvu *rvu, int pcifunc,
+			     int blktype, int num_lfs)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int slot, lf;
+	int blkaddr;
+	u64 cfg;
+
+	if (!num_lfs)
+		return;
+
+	blkaddr = rvu_get_blkaddr(rvu, blktype, 0);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+	if (!block->lf.bmap)
+		return;
+
+	for (slot = 0; slot < num_lfs; slot++) {
+		/* Allocate the resource */
+		lf = rvu_alloc_rsrc(&block->lf);
+		if (lf < 0)
+			return;
+
+		cfg = (1ULL << 63) | (pcifunc << 8) | slot;
+		rvu_write64(rvu, blkaddr, block->lfcfg_reg |
+			    (lf << block->lfshift), cfg);
+		rvu_update_rsrc_map(rvu, pfvf, block,
+				    pcifunc, lf, true);
+
+		/* Set start MSIX vector for this LF within this PF/VF */
+		rvu_set_msix_offset(rvu, pfvf, block, lf);
+	}
+}
+
+static int rvu_check_rsrc_availability(struct rvu *rvu,
+				       struct rsrc_attach *req, u16 pcifunc)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int free_lfs, mappedlfs;
+
+	/* Only one NPA LF can be attached */
+	if (req->npalf && !rvu_get_rsrc_mapcount(pfvf, BLKTYPE_NPA)) {
+		block = &hw->block[BLKADDR_NPA];
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		if (!free_lfs)
+			goto fail;
+	} else if (req->npalf) {
+		dev_err(&rvu->pdev->dev,
+			"Func 0x%x: Invalid req, already has NPA\n",
+			 pcifunc);
+		return -EINVAL;
+	}
+
+	/* Only one NIX LF can be attached */
+	if (req->nixlf && !rvu_get_rsrc_mapcount(pfvf, BLKTYPE_NIX)) {
+		block = &hw->block[BLKADDR_NIX0];
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		if (!free_lfs)
+			goto fail;
+	} else if (req->nixlf) {
+		dev_err(&rvu->pdev->dev,
+			"Func 0x%x: Invalid req, already has NIX\n",
+			pcifunc);
+		return -EINVAL;
+	}
+
+	if (req->sso) {
+		block = &hw->block[BLKADDR_SSO];
+		/* Is request within limits ? */
+		if (req->sso > block->lf.max) {
+			dev_err(&rvu->pdev->dev,
+				"Func 0x%x: Invalid SSO req, %d > max %d\n",
+				 pcifunc, req->sso, block->lf.max);
+			return -EINVAL;
+		}
+		mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type);
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		/* Check if additional resources are available */
+		if (req->sso > mappedlfs &&
+		    ((req->sso - mappedlfs) > free_lfs))
+			goto fail;
+	}
+
+	if (req->ssow) {
+		block = &hw->block[BLKADDR_SSOW];
+		if (req->ssow > block->lf.max) {
+			dev_err(&rvu->pdev->dev,
+				"Func 0x%x: Invalid SSOW req, %d > max %d\n",
+				 pcifunc, req->sso, block->lf.max);
+			return -EINVAL;
+		}
+		mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type);
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		if (req->ssow > mappedlfs &&
+		    ((req->ssow - mappedlfs) > free_lfs))
+			goto fail;
+	}
+
+	if (req->timlfs) {
+		block = &hw->block[BLKADDR_TIM];
+		if (req->timlfs > block->lf.max) {
+			dev_err(&rvu->pdev->dev,
+				"Func 0x%x: Invalid TIMLF req, %d > max %d\n",
+				 pcifunc, req->timlfs, block->lf.max);
+			return -EINVAL;
+		}
+		mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type);
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		if (req->timlfs > mappedlfs &&
+		    ((req->timlfs - mappedlfs) > free_lfs))
+			goto fail;
+	}
+
+	if (req->cptlfs) {
+		block = &hw->block[BLKADDR_CPT0];
+		if (req->cptlfs > block->lf.max) {
+			dev_err(&rvu->pdev->dev,
+				"Func 0x%x: Invalid CPTLF req, %d > max %d\n",
+				 pcifunc, req->cptlfs, block->lf.max);
+			return -EINVAL;
+		}
+		mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type);
+		free_lfs = rvu_rsrc_free_count(&block->lf);
+		if (req->cptlfs > mappedlfs &&
+		    ((req->cptlfs - mappedlfs) > free_lfs))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	dev_info(rvu->dev, "Request for %s failed\n", block->name);
+	return -ENOSPC;
+}
+
+static int rvu_mbox_handler_attach_resources(struct rvu *rvu,
+					     struct rsrc_attach *attach,
+					     struct msg_rsp *rsp)
+{
+	u16 pcifunc = attach->hdr.pcifunc;
+	int err;
+
+	/* If first request, detach all existing attached resources */
+	if (!attach->modify)
+		rvu_detach_rsrcs(rvu, NULL, pcifunc);
+
+	mutex_lock(&rvu->rsrc_lock);
+
+	/* Check if the request can be accommodated */
+	err = rvu_check_rsrc_availability(rvu, attach, pcifunc);
+	if (err)
+		goto exit;
+
+	/* Now attach the requested resources */
+	if (attach->npalf)
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_NPA, 1);
+
+	if (attach->nixlf)
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1);
+
+	if (attach->sso) {
+		/* RVU func doesn't know which exact LF or slot is attached
+		 * to it, it always sees as slot 0,1,2. So for a 'modify'
+		 * request, simply detach all existing attached LFs/slots
+		 * and attach a fresh.
+		 */
+		if (attach->modify)
+			rvu_detach_block(rvu, pcifunc, BLKTYPE_SSO);
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, attach->sso);
+	}
+
+	if (attach->ssow) {
+		if (attach->modify)
+			rvu_detach_block(rvu, pcifunc, BLKTYPE_SSOW);
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, attach->ssow);
+	}
+
+	if (attach->timlfs) {
+		if (attach->modify)
+			rvu_detach_block(rvu, pcifunc, BLKTYPE_TIM);
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, attach->timlfs);
+	}
+
+	if (attach->cptlfs) {
+		if (attach->modify)
+			rvu_detach_block(rvu, pcifunc, BLKTYPE_CPT);
+		rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, attach->cptlfs);
+	}
+
+exit:
+	mutex_unlock(&rvu->rsrc_lock);
+	return err;
+}
+
+static u16 rvu_get_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
+			       int blkaddr, int lf)
+{
+	u16 vec;
+
+	if (lf < 0)
+		return MSIX_VECTOR_INVALID;
+
+	for (vec = 0; vec < pfvf->msix.max; vec++) {
+		if (pfvf->msix_lfmap[vec] == MSIX_BLKLF(blkaddr, lf))
+			return vec;
+	}
+	return MSIX_VECTOR_INVALID;
+}
+
+static void rvu_set_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
+				struct rvu_block *block, int lf)
+{
+	u16 nvecs, vec, offset;
+	u64 cfg;
+
+	cfg = rvu_read64(rvu, block->addr, block->msixcfg_reg |
+			 (lf << block->lfshift));
+	nvecs = (cfg >> 12) & 0xFF;
+
+	/* Check and alloc MSIX vectors, must be contiguous */
+	if (!rvu_rsrc_check_contig(&pfvf->msix, nvecs))
+		return;
+
+	offset = rvu_alloc_rsrc_contig(&pfvf->msix, nvecs);
+
+	/* Config MSIX offset in LF */
+	rvu_write64(rvu, block->addr, block->msixcfg_reg |
+		    (lf << block->lfshift), (cfg & ~0x7FFULL) | offset);
+
+	/* Update the bitmap as well */
+	for (vec = 0; vec < nvecs; vec++)
+		pfvf->msix_lfmap[offset + vec] = MSIX_BLKLF(block->addr, lf);
+}
+
+static void rvu_clear_msix_offset(struct rvu *rvu, struct rvu_pfvf *pfvf,
+				  struct rvu_block *block, int lf)
+{
+	u16 nvecs, vec, offset;
+	u64 cfg;
+
+	cfg = rvu_read64(rvu, block->addr, block->msixcfg_reg |
+			 (lf << block->lfshift));
+	nvecs = (cfg >> 12) & 0xFF;
+
+	/* Clear MSIX offset in LF */
+	rvu_write64(rvu, block->addr, block->msixcfg_reg |
+		    (lf << block->lfshift), cfg & ~0x7FFULL);
+
+	offset = rvu_get_msix_offset(rvu, pfvf, block->addr, lf);
+
+	/* Update the mapping */
+	for (vec = 0; vec < nvecs; vec++)
+		pfvf->msix_lfmap[offset + vec] = 0;
+
+	/* Free the same in MSIX bitmap */
+	rvu_free_rsrc_contig(&pfvf->msix, nvecs, offset);
+}
+
+static int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req,
+					struct msix_offset_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
+	int lf, slot;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	if (!pfvf->msix.bmap)
+		return 0;
+
+	/* Set MSIX offsets for each block's LFs attached to this PF/VF */
+	lf = rvu_get_lf(rvu, &hw->block[BLKADDR_NPA], pcifunc, 0);
+	rsp->npa_msixoff = rvu_get_msix_offset(rvu, pfvf, BLKADDR_NPA, lf);
+
+	lf = rvu_get_lf(rvu, &hw->block[BLKADDR_NIX0], pcifunc, 0);
+	rsp->nix_msixoff = rvu_get_msix_offset(rvu, pfvf, BLKADDR_NIX0, lf);
+
+	rsp->sso = pfvf->sso;
+	for (slot = 0; slot < rsp->sso; slot++) {
+		lf = rvu_get_lf(rvu, &hw->block[BLKADDR_SSO], pcifunc, slot);
+		rsp->sso_msixoff[slot] =
+			rvu_get_msix_offset(rvu, pfvf, BLKADDR_SSO, lf);
+	}
+
+	rsp->ssow = pfvf->ssow;
+	for (slot = 0; slot < rsp->ssow; slot++) {
+		lf = rvu_get_lf(rvu, &hw->block[BLKADDR_SSOW], pcifunc, slot);
+		rsp->ssow_msixoff[slot] =
+			rvu_get_msix_offset(rvu, pfvf, BLKADDR_SSOW, lf);
+	}
+
+	rsp->timlfs = pfvf->timlfs;
+	for (slot = 0; slot < rsp->timlfs; slot++) {
+		lf = rvu_get_lf(rvu, &hw->block[BLKADDR_TIM], pcifunc, slot);
+		rsp->timlf_msixoff[slot] =
+			rvu_get_msix_offset(rvu, pfvf, BLKADDR_TIM, lf);
+	}
+
+	rsp->cptlfs = pfvf->cptlfs;
+	for (slot = 0; slot < rsp->cptlfs; slot++) {
+		lf = rvu_get_lf(rvu, &hw->block[BLKADDR_CPT0], pcifunc, slot);
+		rsp->cptlf_msixoff[slot] =
+			rvu_get_msix_offset(rvu, pfvf, BLKADDR_CPT0, lf);
+	}
+	return 0;
+}
+
+static int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	u16 vf, numvfs;
+	u64 cfg;
+
+	vf = pcifunc & RVU_PFVF_FUNC_MASK;
+	cfg = rvu_read64(rvu, BLKADDR_RVUM,
+			 RVU_PRIV_PFX_CFG(rvu_get_pf(pcifunc)));
+	numvfs = (cfg >> 12) & 0xFF;
+
+	if (vf && vf <= numvfs)
+		__rvu_flr_handler(rvu, pcifunc);
+	else
+		return RVU_INVALID_VF_ID;
+
+	return 0;
+}
+
+static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
+				struct mbox_msghdr *req)
+{
+	struct rvu *rvu = pci_get_drvdata(mbox->pdev);
+
+	/* Check if valid, if not reply with a invalid msg */
+	if (req->sig != OTX2_MBOX_REQ_SIG)
+		goto bad_message;
+
+	switch (req->id) {
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)			\
+	case _id: {							\
+		struct _rsp_type *rsp;					\
+		int err;						\
+									\
+		rsp = (struct _rsp_type *)otx2_mbox_alloc_msg(		\
+			mbox, devid,					\
+			sizeof(struct _rsp_type));			\
+		/* some handlers should complete even if reply */	\
+		/* could not be allocated */				\
+		if (!rsp &&						\
+		    _id != MBOX_MSG_DETACH_RESOURCES &&			\
+		    _id != MBOX_MSG_NIX_TXSCH_FREE &&			\
+		    _id != MBOX_MSG_VF_FLR)				\
+			return -ENOMEM;					\
+		if (rsp) {						\
+			rsp->hdr.id = _id;				\
+			rsp->hdr.sig = OTX2_MBOX_RSP_SIG;		\
+			rsp->hdr.pcifunc = req->pcifunc;		\
+			rsp->hdr.rc = 0;				\
+		}							\
+									\
+		err = rvu_mbox_handler_ ## _fn_name(rvu,		\
+						    (struct _req_type *)req, \
+						    rsp);		\
+		if (rsp && err)						\
+			rsp->hdr.rc = err;				\
+									\
+		return rsp ? err : -ENOMEM;				\
+	}
+MBOX_MESSAGES
+#undef M
+
+bad_message:
+	default:
+		otx2_reply_invalid_msg(mbox, devid, req->pcifunc, req->id);
+		return -ENODEV;
+	}
+}
+
+static void __rvu_mbox_handler(struct rvu_work *mwork, int type)
+{
+	struct rvu *rvu = mwork->rvu;
+	int offset, err, id, devid;
+	struct otx2_mbox_dev *mdev;
+	struct mbox_hdr *req_hdr;
+	struct mbox_msghdr *msg;
+	struct mbox_wq_info *mw;
+	struct otx2_mbox *mbox;
+
+	switch (type) {
+	case TYPE_AFPF:
+		mw = &rvu->afpf_wq_info;
+		break;
+	case TYPE_AFVF:
+		mw = &rvu->afvf_wq_info;
+		break;
+	default:
+		return;
+	}
+
+	devid = mwork - mw->mbox_wrk;
+	mbox = &mw->mbox;
+	mdev = &mbox->dev[devid];
+
+	/* Process received mbox messages */
+	req_hdr = mdev->mbase + mbox->rx_start;
+	if (req_hdr->num_msgs == 0)
+		return;
+
+	offset = mbox->rx_start + ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
+
+	for (id = 0; id < req_hdr->num_msgs; id++) {
+		msg = mdev->mbase + offset;
+
+		/* Set which PF/VF sent this message based on mbox IRQ */
+		switch (type) {
+		case TYPE_AFPF:
+			msg->pcifunc &=
+				~(RVU_PFVF_PF_MASK << RVU_PFVF_PF_SHIFT);
+			msg->pcifunc |= (devid << RVU_PFVF_PF_SHIFT);
+			break;
+		case TYPE_AFVF:
+			msg->pcifunc &=
+				~(RVU_PFVF_FUNC_MASK << RVU_PFVF_FUNC_SHIFT);
+			msg->pcifunc |= (devid << RVU_PFVF_FUNC_SHIFT) + 1;
+			break;
+		}
+
+		err = rvu_process_mbox_msg(mbox, devid, msg);
+		if (!err) {
+			offset = mbox->rx_start + msg->next_msgoff;
+			continue;
+		}
+
+		if (msg->pcifunc & RVU_PFVF_FUNC_MASK)
+			dev_warn(rvu->dev, "Error %d when processing message %s (0x%x) from PF%d:VF%d\n",
+				 err, otx2_mbox_id2name(msg->id),
+				 msg->id, devid,
+				 (msg->pcifunc & RVU_PFVF_FUNC_MASK) - 1);
+		else
+			dev_warn(rvu->dev, "Error %d when processing message %s (0x%x) from PF%d\n",
+				 err, otx2_mbox_id2name(msg->id),
+				 msg->id, devid);
+	}
+
+	/* Send mbox responses to VF/PF */
+	otx2_mbox_msg_send(mbox, devid);
+}
+
+static inline void rvu_afpf_mbox_handler(struct work_struct *work)
+{
+	struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+	__rvu_mbox_handler(mwork, TYPE_AFPF);
+}
+
+static inline void rvu_afvf_mbox_handler(struct work_struct *work)
+{
+	struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+	__rvu_mbox_handler(mwork, TYPE_AFVF);
+}
+
+static void __rvu_mbox_up_handler(struct rvu_work *mwork, int type)
+{
+	struct rvu *rvu = mwork->rvu;
+	struct otx2_mbox_dev *mdev;
+	struct mbox_hdr *rsp_hdr;
+	struct mbox_msghdr *msg;
+	struct mbox_wq_info *mw;
+	struct otx2_mbox *mbox;
+	int offset, id, devid;
+
+	switch (type) {
+	case TYPE_AFPF:
+		mw = &rvu->afpf_wq_info;
+		break;
+	case TYPE_AFVF:
+		mw = &rvu->afvf_wq_info;
+		break;
+	default:
+		return;
+	}
+
+	devid = mwork - mw->mbox_wrk_up;
+	mbox = &mw->mbox_up;
+	mdev = &mbox->dev[devid];
+
+	rsp_hdr = mdev->mbase + mbox->rx_start;
+	if (rsp_hdr->num_msgs == 0) {
+		dev_warn(rvu->dev, "mbox up handler: num_msgs = 0\n");
+		return;
+	}
+
+	offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN);
+
+	for (id = 0; id < rsp_hdr->num_msgs; id++) {
+		msg = mdev->mbase + offset;
+
+		if (msg->id >= MBOX_MSG_MAX) {
+			dev_err(rvu->dev,
+				"Mbox msg with unknown ID 0x%x\n", msg->id);
+			goto end;
+		}
+
+		if (msg->sig != OTX2_MBOX_RSP_SIG) {
+			dev_err(rvu->dev,
+				"Mbox msg with wrong signature %x, ID 0x%x\n",
+				msg->sig, msg->id);
+			goto end;
+		}
+
+		switch (msg->id) {
+		case MBOX_MSG_CGX_LINK_EVENT:
+			break;
+		default:
+			if (msg->rc)
+				dev_err(rvu->dev,
+					"Mbox msg response has err %d, ID 0x%x\n",
+					msg->rc, msg->id);
+			break;
+		}
+end:
+		offset = mbox->rx_start + msg->next_msgoff;
+		mdev->msgs_acked++;
+	}
+
+	otx2_mbox_reset(mbox, devid);
+}
+
+static inline void rvu_afpf_mbox_up_handler(struct work_struct *work)
+{
+	struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+	__rvu_mbox_up_handler(mwork, TYPE_AFPF);
+}
+
+static inline void rvu_afvf_mbox_up_handler(struct work_struct *work)
+{
+	struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+
+	__rvu_mbox_up_handler(mwork, TYPE_AFVF);
+}
+
+static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+			 int type, int num,
+			 void (mbox_handler)(struct work_struct *),
+			 void (mbox_up_handler)(struct work_struct *))
+{
+	void __iomem *hwbase = NULL, *reg_base;
+	int err, i, dir, dir_up;
+	struct rvu_work *mwork;
+	const char *name;
+	u64 bar4_addr;
+
+	switch (type) {
+	case TYPE_AFPF:
+		name = "rvu_afpf_mailbox";
+		bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR);
+		dir = MBOX_DIR_AFPF;
+		dir_up = MBOX_DIR_AFPF_UP;
+		reg_base = rvu->afreg_base;
+		break;
+	case TYPE_AFVF:
+		name = "rvu_afvf_mailbox";
+		bar4_addr = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR);
+		dir = MBOX_DIR_PFVF;
+		dir_up = MBOX_DIR_PFVF_UP;
+		reg_base = rvu->pfreg_base;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mw->mbox_wq = alloc_workqueue(name,
+				      WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+				      num);
+	if (!mw->mbox_wq)
+		return -ENOMEM;
+
+	mw->mbox_wrk = devm_kcalloc(rvu->dev, num,
+				    sizeof(struct rvu_work), GFP_KERNEL);
+	if (!mw->mbox_wrk) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	mw->mbox_wrk_up = devm_kcalloc(rvu->dev, num,
+				       sizeof(struct rvu_work), GFP_KERNEL);
+	if (!mw->mbox_wrk_up) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	/* Mailbox is a reserved memory (in RAM) region shared between
+	 * RVU devices, shouldn't be mapped as device memory to allow
+	 * unaligned accesses.
+	 */
+	hwbase = ioremap_wc(bar4_addr, MBOX_SIZE * num);
+	if (!hwbase) {
+		dev_err(rvu->dev, "Unable to map mailbox region\n");
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	err = otx2_mbox_init(&mw->mbox, hwbase, rvu->pdev, reg_base, dir, num);
+	if (err)
+		goto exit;
+
+	err = otx2_mbox_init(&mw->mbox_up, hwbase, rvu->pdev,
+			     reg_base, dir_up, num);
+	if (err)
+		goto exit;
+
+	for (i = 0; i < num; i++) {
+		mwork = &mw->mbox_wrk[i];
+		mwork->rvu = rvu;
+		INIT_WORK(&mwork->work, mbox_handler);
+
+		mwork = &mw->mbox_wrk_up[i];
+		mwork->rvu = rvu;
+		INIT_WORK(&mwork->work, mbox_up_handler);
+	}
+
+	return 0;
+exit:
+	if (hwbase)
+		iounmap((void __iomem *)hwbase);
+	destroy_workqueue(mw->mbox_wq);
+	return err;
+}
+
+static void rvu_mbox_destroy(struct mbox_wq_info *mw)
+{
+	if (mw->mbox_wq) {
+		flush_workqueue(mw->mbox_wq);
+		destroy_workqueue(mw->mbox_wq);
+		mw->mbox_wq = NULL;
+	}
+
+	if (mw->mbox.hwbase)
+		iounmap((void __iomem *)mw->mbox.hwbase);
+
+	otx2_mbox_destroy(&mw->mbox);
+	otx2_mbox_destroy(&mw->mbox_up);
+}
+
+static void rvu_queue_work(struct mbox_wq_info *mw, int first,
+			   int mdevs, u64 intr)
+{
+	struct otx2_mbox_dev *mdev;
+	struct otx2_mbox *mbox;
+	struct mbox_hdr *hdr;
+	int i;
+
+	for (i = first; i < mdevs; i++) {
+		/* start from 0 */
+		if (!(intr & BIT_ULL(i - first)))
+			continue;
+
+		mbox = &mw->mbox;
+		mdev = &mbox->dev[i];
+		hdr = mdev->mbase + mbox->rx_start;
+		if (hdr->num_msgs)
+			queue_work(mw->mbox_wq, &mw->mbox_wrk[i].work);
+
+		mbox = &mw->mbox_up;
+		mdev = &mbox->dev[i];
+		hdr = mdev->mbase + mbox->rx_start;
+		if (hdr->num_msgs)
+			queue_work(mw->mbox_wq, &mw->mbox_wrk_up[i].work);
+	}
+}
+
+static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
+{
+	struct rvu *rvu = (struct rvu *)rvu_irq;
+	int vfs = rvu->vfs;
+	u64 intr;
+
+	intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT);
+	/* Clear interrupts */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT, intr);
+
+	/* Sync with mbox memory region */
+	rmb();
+
+	rvu_queue_work(&rvu->afpf_wq_info, 0, rvu->hw->total_pfs, intr);
+
+	/* Handle VF interrupts */
+	if (vfs > 64) {
+		intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(1));
+		rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), intr);
+
+		rvu_queue_work(&rvu->afvf_wq_info, 64, vfs, intr);
+		vfs -= 64;
+	}
+
+	intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(0));
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), intr);
+
+	rvu_queue_work(&rvu->afvf_wq_info, 0, vfs, intr);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_enable_mbox_intr(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+
+	/* Clear spurious irqs, if any */
+	rvu_write64(rvu, BLKADDR_RVUM,
+		    RVU_AF_PFAF_MBOX_INT, INTR_MASK(hw->total_pfs));
+
+	/* Enable mailbox interrupt for all PFs except PF0 i.e AF itself */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT_ENA_W1S,
+		    INTR_MASK(hw->total_pfs) & ~1ULL);
+}
+
+static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr)
+{
+	struct rvu_block *block;
+	int slot, lf, num_lfs;
+	int err;
+
+	block = &rvu->hw->block[blkaddr];
+	num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc),
+					block->type);
+	if (!num_lfs)
+		return;
+	for (slot = 0; slot < num_lfs; slot++) {
+		lf = rvu_get_lf(rvu, block, pcifunc, slot);
+		if (lf < 0)
+			continue;
+
+		/* Cleanup LF and reset it */
+		if (block->addr == BLKADDR_NIX0)
+			rvu_nix_lf_teardown(rvu, pcifunc, block->addr, lf);
+		else if (block->addr == BLKADDR_NPA)
+			rvu_npa_lf_teardown(rvu, pcifunc, lf);
+
+		err = rvu_lf_reset(rvu, block, lf);
+		if (err) {
+			dev_err(rvu->dev, "Failed to reset blkaddr %d LF%d\n",
+				block->addr, lf);
+		}
+	}
+}
+
+static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
+{
+	mutex_lock(&rvu->flr_lock);
+	/* Reset order should reflect inter-block dependencies:
+	 * 1. Reset any packet/work sources (NIX, CPT, TIM)
+	 * 2. Flush and reset SSO/SSOW
+	 * 3. Cleanup pools (NPA)
+	 */
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NIX0);
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_CPT0);
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_TIM);
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSOW);
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSO);
+	rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NPA);
+	rvu_detach_rsrcs(rvu, NULL, pcifunc);
+	mutex_unlock(&rvu->flr_lock);
+}
+
+static void rvu_afvf_flr_handler(struct rvu *rvu, int vf)
+{
+	int reg = 0;
+
+	/* pcifunc = 0(PF0) | (vf + 1) */
+	__rvu_flr_handler(rvu, vf + 1);
+
+	if (vf >= 64) {
+		reg = 1;
+		vf = vf - 64;
+	}
+
+	/* Signal FLR finish and enable IRQ */
+	rvupf_write64(rvu, RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+	rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
+}
+
+static void rvu_flr_handler(struct work_struct *work)
+{
+	struct rvu_work *flrwork = container_of(work, struct rvu_work, work);
+	struct rvu *rvu = flrwork->rvu;
+	u16 pcifunc, numvfs, vf;
+	u64 cfg;
+	int pf;
+
+	pf = flrwork - rvu->flr_wrk;
+	if (pf >= rvu->hw->total_pfs) {
+		rvu_afvf_flr_handler(rvu, pf - rvu->hw->total_pfs);
+		return;
+	}
+
+	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+	numvfs = (cfg >> 12) & 0xFF;
+	pcifunc  = pf << RVU_PFVF_PF_SHIFT;
+
+	for (vf = 0; vf < numvfs; vf++)
+		__rvu_flr_handler(rvu, (pcifunc | (vf + 1)));
+
+	__rvu_flr_handler(rvu, pcifunc);
+
+	/* Signal FLR finish */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFTRPEND, BIT_ULL(pf));
+
+	/* Enable interrupt */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S,  BIT_ULL(pf));
+}
+
+static void rvu_afvf_queue_flr_work(struct rvu *rvu, int start_vf, int numvfs)
+{
+	int dev, vf, reg = 0;
+	u64 intr;
+
+	if (start_vf >= 64)
+		reg = 1;
+
+	intr = rvupf_read64(rvu, RVU_PF_VFFLR_INTX(reg));
+	if (!intr)
+		return;
+
+	for (vf = 0; vf < numvfs; vf++) {
+		if (!(intr & BIT_ULL(vf)))
+			continue;
+		dev = vf + start_vf + rvu->hw->total_pfs;
+		queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
+		/* Clear and disable the interrupt */
+		rvupf_write64(rvu, RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf));
+		rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(reg), BIT_ULL(vf));
+	}
+}
+
+static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
+{
+	struct rvu *rvu = (struct rvu *)rvu_irq;
+	u64 intr;
+	u8  pf;
+
+	intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT);
+	if (!intr)
+		goto afvf_flr;
+
+	for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+		if (intr & (1ULL << pf)) {
+			/* PF is already dead do only AF related operations */
+			queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
+			/* clear interrupt */
+			rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT,
+				    BIT_ULL(pf));
+			/* Disable the interrupt */
+			rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
+				    BIT_ULL(pf));
+		}
+	}
+
+afvf_flr:
+	rvu_afvf_queue_flr_work(rvu, 0, 64);
+	if (rvu->vfs > 64)
+		rvu_afvf_queue_flr_work(rvu, 64, rvu->vfs - 64);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_me_handle_vfset(struct rvu *rvu, int idx, u64 intr)
+{
+	int vf;
+
+	/* Nothing to be done here other than clearing the
+	 * TRPEND bit.
+	 */
+	for (vf = 0; vf < 64; vf++) {
+		if (intr & (1ULL << vf)) {
+			/* clear the trpend due to ME(master enable) */
+			rvupf_write64(rvu, RVU_PF_VFTRPENDX(idx), BIT_ULL(vf));
+			/* clear interrupt */
+			rvupf_write64(rvu, RVU_PF_VFME_INTX(idx), BIT_ULL(vf));
+		}
+	}
+}
+
+/* Handles ME interrupts from VFs of AF */
+static irqreturn_t rvu_me_vf_intr_handler(int irq, void *rvu_irq)
+{
+	struct rvu *rvu = (struct rvu *)rvu_irq;
+	int vfset;
+	u64 intr;
+
+	intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT);
+
+	for (vfset = 0; vfset <= 1; vfset++) {
+		intr = rvupf_read64(rvu, RVU_PF_VFME_INTX(vfset));
+		if (intr)
+			rvu_me_handle_vfset(rvu, vfset, intr);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* Handles ME interrupts from PFs */
+static irqreturn_t rvu_me_pf_intr_handler(int irq, void *rvu_irq)
+{
+	struct rvu *rvu = (struct rvu *)rvu_irq;
+	u64 intr;
+	u8  pf;
+
+	intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT);
+
+	/* Nothing to be done here other than clearing the
+	 * TRPEND bit.
+	 */
+	for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+		if (intr & (1ULL << pf)) {
+			/* clear the trpend due to ME(master enable) */
+			rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFTRPEND,
+				    BIT_ULL(pf));
+			/* clear interrupt */
+			rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT,
+				    BIT_ULL(pf));
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_unregister_interrupts(struct rvu *rvu)
+{
+	int irq;
+
+	/* Disable the Mbox interrupt */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT_ENA_W1C,
+		    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
+	/* Disable the PF FLR interrupt */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
+		    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
+	/* Disable the PF ME interrupt */
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT_ENA_W1C,
+		    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
+	for (irq = 0; irq < rvu->num_vec; irq++) {
+		if (rvu->irq_allocated[irq])
+			free_irq(pci_irq_vector(rvu->pdev, irq), rvu);
+	}
+
+	pci_free_irq_vectors(rvu->pdev);
+	rvu->num_vec = 0;
+}
+
+static int rvu_afvf_msix_vectors_num_ok(struct rvu *rvu)
+{
+	struct rvu_pfvf *pfvf = &rvu->pf[0];
+	int offset;
+
+	pfvf = &rvu->pf[0];
+	offset = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
+
+	/* Make sure there are enough MSIX vectors configured so that
+	 * VF interrupts can be handled. Offset equal to zero means
+	 * that PF vectors are not configured and overlapping AF vectors.
+	 */
+	return (pfvf->msix.max >= RVU_AF_INT_VEC_CNT + RVU_PF_INT_VEC_CNT) &&
+	       offset;
+}
+
+static int rvu_register_interrupts(struct rvu *rvu)
+{
+	int ret, offset, pf_vec_start;
+
+	rvu->num_vec = pci_msix_vec_count(rvu->pdev);
+
+	rvu->irq_name = devm_kmalloc_array(rvu->dev, rvu->num_vec,
+					   NAME_SIZE, GFP_KERNEL);
+	if (!rvu->irq_name)
+		return -ENOMEM;
+
+	rvu->irq_allocated = devm_kcalloc(rvu->dev, rvu->num_vec,
+					  sizeof(bool), GFP_KERNEL);
+	if (!rvu->irq_allocated)
+		return -ENOMEM;
+
+	/* Enable MSI-X */
+	ret = pci_alloc_irq_vectors(rvu->pdev, rvu->num_vec,
+				    rvu->num_vec, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(rvu->dev,
+			"RVUAF: Request for %d msix vectors failed, ret %d\n",
+			rvu->num_vec, ret);
+		return ret;
+	}
+
+	/* Register mailbox interrupt handler */
+	sprintf(&rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], "RVUAF Mbox");
+	ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_MBOX),
+			  rvu_mbox_intr_handler, 0,
+			  &rvu->irq_name[RVU_AF_INT_VEC_MBOX * NAME_SIZE], rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for mbox irq\n");
+		goto fail;
+	}
+
+	rvu->irq_allocated[RVU_AF_INT_VEC_MBOX] = true;
+
+	/* Enable mailbox interrupts from all PFs */
+	rvu_enable_mbox_intr(rvu);
+
+	/* Register FLR interrupt handler */
+	sprintf(&rvu->irq_name[RVU_AF_INT_VEC_PFFLR * NAME_SIZE],
+		"RVUAF FLR");
+	ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_PFFLR),
+			  rvu_flr_intr_handler, 0,
+			  &rvu->irq_name[RVU_AF_INT_VEC_PFFLR * NAME_SIZE],
+			  rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for FLR\n");
+		goto fail;
+	}
+	rvu->irq_allocated[RVU_AF_INT_VEC_PFFLR] = true;
+
+	/* Enable FLR interrupt for all PFs*/
+	rvu_write64(rvu, BLKADDR_RVUM,
+		    RVU_AF_PFFLR_INT, INTR_MASK(rvu->hw->total_pfs));
+
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1S,
+		    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
+	/* Register ME interrupt handler */
+	sprintf(&rvu->irq_name[RVU_AF_INT_VEC_PFME * NAME_SIZE],
+		"RVUAF ME");
+	ret = request_irq(pci_irq_vector(rvu->pdev, RVU_AF_INT_VEC_PFME),
+			  rvu_me_pf_intr_handler, 0,
+			  &rvu->irq_name[RVU_AF_INT_VEC_PFME * NAME_SIZE],
+			  rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for ME\n");
+	}
+	rvu->irq_allocated[RVU_AF_INT_VEC_PFME] = true;
+
+	/* Enable ME interrupt for all PFs*/
+	rvu_write64(rvu, BLKADDR_RVUM,
+		    RVU_AF_PFME_INT, INTR_MASK(rvu->hw->total_pfs));
+
+	rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFME_INT_ENA_W1S,
+		    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
+
+	if (!rvu_afvf_msix_vectors_num_ok(rvu))
+		return 0;
+
+	/* Get PF MSIX vectors offset. */
+	pf_vec_start = rvu_read64(rvu, BLKADDR_RVUM,
+				  RVU_PRIV_PFX_INT_CFG(0)) & 0x3ff;
+
+	/* Register MBOX0 interrupt. */
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFPF_MBOX0;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox0");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_mbox_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE],
+			  rvu);
+	if (ret)
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for Mbox0\n");
+
+	rvu->irq_allocated[offset] = true;
+
+	/* Register MBOX1 interrupt. MBOX1 IRQ number follows MBOX0 so
+	 * simply increment current offset by 1.
+	 */
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFPF_MBOX1;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF Mbox1");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_mbox_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE],
+			  rvu);
+	if (ret)
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for Mbox1\n");
+
+	rvu->irq_allocated[offset] = true;
+
+	/* Register FLR interrupt handler for AF's VFs */
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFFLR0;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF FLR0");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_flr_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE], rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for RVUAFVF FLR0\n");
+		goto fail;
+	}
+	rvu->irq_allocated[offset] = true;
+
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFFLR1;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF FLR1");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_flr_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE], rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for RVUAFVF FLR1\n");
+		goto fail;
+	}
+	rvu->irq_allocated[offset] = true;
+
+	/* Register ME interrupt handler for AF's VFs */
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFME0;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF ME0");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_me_vf_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE], rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for RVUAFVF ME0\n");
+		goto fail;
+	}
+	rvu->irq_allocated[offset] = true;
+
+	offset = pf_vec_start + RVU_PF_INT_VEC_VFME1;
+	sprintf(&rvu->irq_name[offset * NAME_SIZE], "RVUAFVF ME1");
+	ret = request_irq(pci_irq_vector(rvu->pdev, offset),
+			  rvu_me_vf_intr_handler, 0,
+			  &rvu->irq_name[offset * NAME_SIZE], rvu);
+	if (ret) {
+		dev_err(rvu->dev,
+			"RVUAF: IRQ registration failed for RVUAFVF ME1\n");
+		goto fail;
+	}
+	rvu->irq_allocated[offset] = true;
+	return 0;
+
+fail:
+	rvu_unregister_interrupts(rvu);
+	return ret;
+}
+
+static void rvu_flr_wq_destroy(struct rvu *rvu)
+{
+	if (rvu->flr_wq) {
+		flush_workqueue(rvu->flr_wq);
+		destroy_workqueue(rvu->flr_wq);
+		rvu->flr_wq = NULL;
+	}
+}
+
+static int rvu_flr_init(struct rvu *rvu)
+{
+	int dev, num_devs;
+	u64 cfg;
+	int pf;
+
+	/* Enable FLR for all PFs*/
+	for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+		rvu_write64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf),
+			    cfg | BIT_ULL(22));
+	}
+
+	rvu->flr_wq = alloc_workqueue("rvu_afpf_flr",
+				      WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+				       1);
+	if (!rvu->flr_wq)
+		return -ENOMEM;
+
+	num_devs = rvu->hw->total_pfs + pci_sriov_get_totalvfs(rvu->pdev);
+	rvu->flr_wrk = devm_kcalloc(rvu->dev, num_devs,
+				    sizeof(struct rvu_work), GFP_KERNEL);
+	if (!rvu->flr_wrk) {
+		destroy_workqueue(rvu->flr_wq);
+		return -ENOMEM;
+	}
+
+	for (dev = 0; dev < num_devs; dev++) {
+		rvu->flr_wrk[dev].rvu = rvu;
+		INIT_WORK(&rvu->flr_wrk[dev].work, rvu_flr_handler);
+	}
+
+	mutex_init(&rvu->flr_lock);
+
+	return 0;
+}
+
+static void rvu_disable_afvf_intr(struct rvu *rvu)
+{
+	int vfs = rvu->vfs;
+
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), INTR_MASK(vfs));
+	rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(vfs));
+	rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(vfs));
+	if (vfs <= 64)
+		return;
+
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1),
+		      INTR_MASK(vfs - 64));
+	rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(vfs - 64));
+	rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(vfs - 64));
+}
+
+static void rvu_enable_afvf_intr(struct rvu *rvu)
+{
+	int vfs = rvu->vfs;
+
+	/* Clear any pending interrupts and enable AF VF interrupts for
+	 * the first 64 VFs.
+	 */
+	/* Mbox */
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), INTR_MASK(vfs));
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), INTR_MASK(vfs));
+
+	/* FLR */
+	rvupf_write64(rvu, RVU_PF_VFFLR_INTX(0), INTR_MASK(vfs));
+	rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(vfs));
+	rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(vfs));
+
+	/* Same for remaining VFs, if any. */
+	if (vfs <= 64)
+		return;
+
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(1), INTR_MASK(vfs - 64));
+	rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1),
+		      INTR_MASK(vfs - 64));
+
+	rvupf_write64(rvu, RVU_PF_VFFLR_INTX(1), INTR_MASK(vfs - 64));
+	rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(vfs - 64));
+	rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(vfs - 64));
+}
+
+#define PCI_DEVID_OCTEONTX2_LBK 0xA061
+
+static int lbk_get_num_chans(void)
+{
+	struct pci_dev *pdev;
+	void __iomem *base;
+	int ret = -EIO;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_LBK,
+			      NULL);
+	if (!pdev)
+		goto err;
+
+	base = pci_ioremap_bar(pdev, 0);
+	if (!base)
+		goto err_put;
+
+	/* Read number of available LBK channels from LBK(0)_CONST register. */
+	ret = (readq(base + 0x10) >> 32) & 0xffff;
+	iounmap(base);
+err_put:
+	pci_dev_put(pdev);
+err:
+	return ret;
+}
+
+static int rvu_enable_sriov(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+	int err, chans, vfs;
+
+	if (!rvu_afvf_msix_vectors_num_ok(rvu)) {
+		dev_warn(&pdev->dev,
+			 "Skipping SRIOV enablement since not enough IRQs are available\n");
+		return 0;
+	}
+
+	chans = lbk_get_num_chans();
+	if (chans < 0)
+		return chans;
+
+	vfs = pci_sriov_get_totalvfs(pdev);
+
+	/* Limit VFs in case we have more VFs than LBK channels available. */
+	if (vfs > chans)
+		vfs = chans;
+
+	/* AF's VFs work in pairs and talk over consecutive loopback channels.
+	 * Thus we want to enable maximum even number of VFs. In case
+	 * odd number of VFs are available then the last VF on the list
+	 * remains disabled.
+	 */
+	if (vfs & 0x1) {
+		dev_warn(&pdev->dev,
+			 "Number of VFs should be even. Enabling %d out of %d.\n",
+			 vfs - 1, vfs);
+		vfs--;
+	}
+
+	if (!vfs)
+		return 0;
+
+	/* Save VFs number for reference in VF interrupts handlers.
+	 * Since interrupts might start arriving during SRIOV enablement
+	 * ordinary API cannot be used to get number of enabled VFs.
+	 */
+	rvu->vfs = vfs;
+
+	err = rvu_mbox_init(rvu, &rvu->afvf_wq_info, TYPE_AFVF, vfs,
+			    rvu_afvf_mbox_handler, rvu_afvf_mbox_up_handler);
+	if (err)
+		return err;
+
+	rvu_enable_afvf_intr(rvu);
+	/* Make sure IRQs are enabled before SRIOV. */
+	mb();
+
+	err = pci_enable_sriov(pdev, vfs);
+	if (err) {
+		rvu_disable_afvf_intr(rvu);
+		rvu_mbox_destroy(&rvu->afvf_wq_info);
+		return err;
+	}
+
+	return 0;
+}
+
+static void rvu_disable_sriov(struct rvu *rvu)
+{
+	rvu_disable_afvf_intr(rvu);
+	rvu_mbox_destroy(&rvu->afvf_wq_info);
+	pci_disable_sriov(rvu->pdev);
+}
+
+static void rvu_update_module_params(struct rvu *rvu)
+{
+	const char *default_pfl_name = "default";
+
+	strscpy(rvu->mkex_pfl_name,
+		mkex_profile ? mkex_profile : default_pfl_name, MKEX_NAME_LEN);
+}
+
+static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct rvu *rvu;
+	int    err;
+
+	rvu = devm_kzalloc(dev, sizeof(*rvu), GFP_KERNEL);
+	if (!rvu)
+		return -ENOMEM;
+
+	rvu->hw = devm_kzalloc(dev, sizeof(struct rvu_hwinfo), GFP_KERNEL);
+	if (!rvu->hw) {
+		devm_kfree(dev, rvu);
+		return -ENOMEM;
+	}
+
+	pci_set_drvdata(pdev, rvu);
+	rvu->pdev = pdev;
+	rvu->dev = &pdev->dev;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		goto err_freemem;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto err_disable_device;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to set DMA mask\n");
+		goto err_release_regions;
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "Unable to set consistent DMA mask\n");
+		goto err_release_regions;
+	}
+
+	/* Map Admin function CSRs */
+	rvu->afreg_base = pcim_iomap(pdev, PCI_AF_REG_BAR_NUM, 0);
+	rvu->pfreg_base = pcim_iomap(pdev, PCI_PF_REG_BAR_NUM, 0);
+	if (!rvu->afreg_base || !rvu->pfreg_base) {
+		dev_err(dev, "Unable to map admin function CSRs, aborting\n");
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	/* Store module params in rvu structure */
+	rvu_update_module_params(rvu);
+
+	/* Check which blocks the HW supports */
+	rvu_check_block_implemented(rvu);
+
+	rvu_reset_all_blocks(rvu);
+
+	err = rvu_setup_hw_resources(rvu);
+	if (err)
+		goto err_release_regions;
+
+	/* Init mailbox btw AF and PFs */
+	err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
+			    rvu->hw->total_pfs, rvu_afpf_mbox_handler,
+			    rvu_afpf_mbox_up_handler);
+	if (err)
+		goto err_hwsetup;
+
+	err = rvu_flr_init(rvu);
+	if (err)
+		goto err_mbox;
+
+	err = rvu_register_interrupts(rvu);
+	if (err)
+		goto err_flr;
+
+	/* Enable AF's VFs (if any) */
+	err = rvu_enable_sriov(rvu);
+	if (err)
+		goto err_irq;
+
+	return 0;
+err_irq:
+	rvu_unregister_interrupts(rvu);
+err_flr:
+	rvu_flr_wq_destroy(rvu);
+err_mbox:
+	rvu_mbox_destroy(&rvu->afpf_wq_info);
+err_hwsetup:
+	rvu_cgx_exit(rvu);
+	rvu_reset_all_blocks(rvu);
+	rvu_free_hw_resources(rvu);
+err_release_regions:
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_freemem:
+	pci_set_drvdata(pdev, NULL);
+	devm_kfree(&pdev->dev, rvu->hw);
+	devm_kfree(dev, rvu);
+	return err;
+}
+
+static void rvu_remove(struct pci_dev *pdev)
+{
+	struct rvu *rvu = pci_get_drvdata(pdev);
+
+	rvu_unregister_interrupts(rvu);
+	rvu_flr_wq_destroy(rvu);
+	rvu_cgx_exit(rvu);
+	rvu_mbox_destroy(&rvu->afpf_wq_info);
+	rvu_disable_sriov(rvu);
+	rvu_reset_all_blocks(rvu);
+	rvu_free_hw_resources(rvu);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+
+	devm_kfree(&pdev->dev, rvu->hw);
+	devm_kfree(&pdev->dev, rvu);
+}
+
+static struct pci_driver rvu_driver = {
+	.name = DRV_NAME,
+	.id_table = rvu_id_table,
+	.probe = rvu_probe,
+	.remove = rvu_remove,
+};
+
+static int __init rvu_init_module(void)
+{
+	int err;
+
+	pr_info("%s: %s\n", DRV_NAME, DRV_STRING);
+
+	err = pci_register_driver(&cgx_driver);
+	if (err < 0)
+		return err;
+
+	err =  pci_register_driver(&rvu_driver);
+	if (err < 0)
+		pci_unregister_driver(&cgx_driver);
+
+	return err;
+}
+
+static void __exit rvu_cleanup_module(void)
+{
+	pci_unregister_driver(&rvu_driver);
+	pci_unregister_driver(&cgx_driver);
+}
+
+module_init(rvu_init_module);
+module_exit(rvu_cleanup_module);

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
new file mode 100644
index 0000000..5222e42
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h

@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef RVU_H
+#define RVU_H
+
+#include <linux/pci.h>
+#include "rvu_struct.h"
+#include "common.h"
+#include "mbox.h"
+
+/* PCI device IDs */
+#define	PCI_DEVID_OCTEONTX2_RVU_AF		0xA065
+
+/* Subsystem Device ID */
+#define PCI_SUBSYS_DEVID_96XX                  0xB200
+
+/* PCI BAR nos */
+#define	PCI_AF_REG_BAR_NUM			0
+#define	PCI_PF_REG_BAR_NUM			2
+#define	PCI_MBOX_BAR_NUM			4
+
+#define NAME_SIZE				32
+
+/* PF_FUNC */
+#define RVU_PFVF_PF_SHIFT	10
+#define RVU_PFVF_PF_MASK	0x3F
+#define RVU_PFVF_FUNC_SHIFT	0
+#define RVU_PFVF_FUNC_MASK	0x3FF
+
+struct rvu_work {
+	struct	work_struct work;
+	struct	rvu *rvu;
+};
+
+struct rsrc_bmap {
+	unsigned long *bmap;	/* Pointer to resource bitmap */
+	u16  max;		/* Max resource id or count */
+};
+
+struct rvu_block {
+	struct rsrc_bmap	lf;
+	struct admin_queue	*aq; /* NIX/NPA AQ */
+	u16  *fn_map; /* LF to pcifunc mapping */
+	bool multislot;
+	bool implemented;
+	u8   addr;  /* RVU_BLOCK_ADDR_E */
+	u8   type;  /* RVU_BLOCK_TYPE_E */
+	u8   lfshift;
+	u64  lookup_reg;
+	u64  pf_lfcnt_reg;
+	u64  vf_lfcnt_reg;
+	u64  lfcfg_reg;
+	u64  msixcfg_reg;
+	u64  lfreset_reg;
+	unsigned char name[NAME_SIZE];
+};
+
+struct nix_mcast {
+	struct qmem	*mce_ctx;
+	struct qmem	*mcast_buf;
+	int		replay_pkind;
+	int		next_free_mce;
+	struct mutex	mce_lock; /* Serialize MCE updates */
+};
+
+struct nix_mce_list {
+	struct hlist_head	head;
+	int			count;
+	int			max;
+};
+
+struct npc_mcam {
+	struct rsrc_bmap counters;
+	struct mutex	lock;	/* MCAM entries and counters update lock */
+	unsigned long	*bmap;		/* bitmap, 0 => bmap_entries */
+	unsigned long	*bmap_reverse;	/* Reverse bitmap, bmap_entries => 0 */
+	u16	bmap_entries;	/* Number of unreserved MCAM entries */
+	u16	bmap_fcnt;	/* MCAM entries free count */
+	u16	*entry2pfvf_map;
+	u16	*entry2cntr_map;
+	u16	*cntr2pfvf_map;
+	u16	*cntr_refcnt;
+	u8	keysize;	/* MCAM keysize 112/224/448 bits */
+	u8	banks;		/* Number of MCAM banks */
+	u8	banks_per_entry;/* Number of keywords in key */
+	u16	banksize;	/* Number of MCAM entries in each bank */
+	u16	total_entries;	/* Total number of MCAM entries */
+	u16	nixlf_offset;	/* Offset of nixlf rsvd uncast entries */
+	u16	pf_offset;	/* Offset of PF's rsvd bcast, promisc entries */
+	u16	lprio_count;
+	u16	lprio_start;
+	u16	hprio_count;
+	u16	hprio_end;
+};
+
+/* Structure for per RVU func info ie PF/VF */
+struct rvu_pfvf {
+	bool		npalf; /* Only one NPALF per RVU_FUNC */
+	bool		nixlf; /* Only one NIXLF per RVU_FUNC */
+	u16		sso;
+	u16		ssow;
+	u16		cptlfs;
+	u16		timlfs;
+	u8		cgx_lmac;
+
+	/* Block LF's MSIX vector info */
+	struct rsrc_bmap msix;      /* Bitmap for MSIX vector alloc */
+#define MSIX_BLKLF(blkaddr, lf) (((blkaddr) << 8) | ((lf) & 0xFF))
+	u16		 *msix_lfmap; /* Vector to block LF mapping */
+
+	/* NPA contexts */
+	struct qmem	*aura_ctx;
+	struct qmem	*pool_ctx;
+	struct qmem	*npa_qints_ctx;
+	unsigned long	*aura_bmap;
+	unsigned long	*pool_bmap;
+
+	/* NIX contexts */
+	struct qmem	*rq_ctx;
+	struct qmem	*sq_ctx;
+	struct qmem	*cq_ctx;
+	struct qmem	*rss_ctx;
+	struct qmem	*cq_ints_ctx;
+	struct qmem	*nix_qints_ctx;
+	unsigned long	*sq_bmap;
+	unsigned long	*rq_bmap;
+	unsigned long	*cq_bmap;
+
+	u16		rx_chan_base;
+	u16		tx_chan_base;
+	u8              rx_chan_cnt; /* total number of RX channels */
+	u8              tx_chan_cnt; /* total number of TX channels */
+	u16		maxlen;
+	u16		minlen;
+
+	u8		mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
+
+	/* Broadcast pkt replication info */
+	u16			bcast_mce_idx;
+	struct nix_mce_list	bcast_mce_list;
+
+	/* VLAN offload */
+	struct mcam_entry entry;
+	int rxvlan_index;
+	bool rxvlan;
+};
+
+struct nix_txsch {
+	struct rsrc_bmap schq;
+	u8   lvl;
+#define NIX_TXSCHQ_TL1_CFG_DONE       BIT_ULL(0)
+#define TXSCH_MAP_FUNC(__pfvf_map)    ((__pfvf_map) & 0xFFFF)
+#define TXSCH_MAP_FLAGS(__pfvf_map)   ((__pfvf_map) >> 16)
+#define TXSCH_MAP(__func, __flags)    (((__func) & 0xFFFF) | ((__flags) << 16))
+	u32  *pfvf_map;
+};
+
+struct nix_mark_format {
+	u8 total;
+	u8 in_use;
+	u32 *cfg;
+};
+
+struct npc_pkind {
+	struct rsrc_bmap rsrc;
+	u32	*pfchan_map;
+};
+
+struct nix_flowkey {
+#define NIX_FLOW_KEY_ALG_MAX 32
+	u32 flowkey[NIX_FLOW_KEY_ALG_MAX];
+	int in_use;
+};
+
+struct nix_lso {
+	u8 total;
+	u8 in_use;
+};
+
+struct nix_hw {
+	struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */
+	struct nix_mcast mcast;
+	struct nix_flowkey flowkey;
+	struct nix_mark_format mark_format;
+	struct nix_lso lso;
+};
+
+struct rvu_hwinfo {
+	u8	total_pfs;   /* MAX RVU PFs HW supports */
+	u16	total_vfs;   /* Max RVU VFs HW supports */
+	u16	max_vfs_per_pf; /* Max VFs that can be attached to a PF */
+	u8	cgx;
+	u8	lmac_per_cgx;
+	u8	cgx_links;
+	u8	lbk_links;
+	u8	sdp_links;
+	u8	npc_kpus;          /* No of parser units */
+
+
+	struct rvu_block block[BLK_COUNT]; /* Block info */
+	struct nix_hw    *nix0;
+	struct npc_pkind pkind;
+	struct npc_mcam  mcam;
+};
+
+struct mbox_wq_info {
+	struct otx2_mbox mbox;
+	struct rvu_work *mbox_wrk;
+
+	struct otx2_mbox mbox_up;
+	struct rvu_work *mbox_wrk_up;
+
+	struct workqueue_struct *mbox_wq;
+};
+
+struct rvu {
+	void __iomem		*afreg_base;
+	void __iomem		*pfreg_base;
+	struct pci_dev		*pdev;
+	struct device		*dev;
+	struct rvu_hwinfo       *hw;
+	struct rvu_pfvf		*pf;
+	struct rvu_pfvf		*hwvf;
+	struct mutex		rsrc_lock; /* Serialize resource alloc/free */
+	int			vfs; /* Number of VFs attached to RVU */
+
+	/* Mbox */
+	struct mbox_wq_info	afpf_wq_info;
+	struct mbox_wq_info	afvf_wq_info;
+
+	/* PF FLR */
+	struct rvu_work		*flr_wrk;
+	struct workqueue_struct *flr_wq;
+	struct mutex		flr_lock; /* Serialize FLRs */
+
+	/* MSI-X */
+	u16			num_vec;
+	char			*irq_name;
+	bool			*irq_allocated;
+	dma_addr_t		msix_base_iova;
+
+	/* CGX */
+#define PF_CGXMAP_BASE		1 /* PF 0 is reserved for RVU PF */
+	u8			cgx_mapped_pfs;
+	u8			cgx_cnt_max;	 /* CGX port count max */
+	u8			*pf2cgxlmac_map; /* pf to cgx_lmac map */
+	u16			*cgxlmac2pf_map; /* bitmap of mapped pfs for
+						  * every cgx lmac port
+						  */
+	unsigned long		pf_notify_bmap; /* Flags for PF notification */
+	void			**cgx_idmap; /* cgx id to cgx data map table */
+	struct			work_struct cgx_evh_work;
+	struct			workqueue_struct *cgx_evh_wq;
+	spinlock_t		cgx_evq_lock; /* cgx event queue lock */
+	struct list_head	cgx_evq_head; /* cgx event queue head */
+
+	char mkex_pfl_name[MKEX_NAME_LEN]; /* Configured MKEX profile name */
+};
+
+static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val)
+{
+	writeq(val, rvu->afreg_base + ((block << 28) | offset));
+}
+
+static inline u64 rvu_read64(struct rvu *rvu, u64 block, u64 offset)
+{
+	return readq(rvu->afreg_base + ((block << 28) | offset));
+}
+
+static inline void rvupf_write64(struct rvu *rvu, u64 offset, u64 val)
+{
+	writeq(val, rvu->pfreg_base + offset);
+}
+
+static inline u64 rvupf_read64(struct rvu *rvu, u64 offset)
+{
+	return readq(rvu->pfreg_base + offset);
+}
+
+static inline bool is_rvu_9xxx_A0(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	return (pdev->revision == 0x00) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+}
+
+/* Function Prototypes
+ * RVU
+ */
+static inline int is_afvf(u16 pcifunc)
+{
+	return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
+}
+
+int rvu_alloc_bitmap(struct rsrc_bmap *rsrc);
+int rvu_alloc_rsrc(struct rsrc_bmap *rsrc);
+void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id);
+int rvu_rsrc_free_count(struct rsrc_bmap *rsrc);
+int rvu_alloc_rsrc_contig(struct rsrc_bmap *rsrc, int nrsrc);
+bool rvu_rsrc_check_contig(struct rsrc_bmap *rsrc, int nrsrc);
+int rvu_get_pf(u16 pcifunc);
+struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc);
+void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf);
+bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr);
+bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype);
+int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot);
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf);
+int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
+int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
+
+/* RVU HW reg validation */
+enum regmap_block {
+	TXSCHQ_HWREGMAP = 0,
+	MAX_HWREGMAP,
+};
+
+bool rvu_check_valid_reg(int regmap, int regblk, u64 reg);
+
+/* NPA/NIX AQ APIs */
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+		 int qsize, int inst_size, int res_size);
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
+
+/* CGX APIs */
+static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
+{
+	return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs);
+}
+
+static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
+{
+	*cgx_id = (map >> 4) & 0xF;
+	*lmac_id = (map & 0xF);
+}
+
+int rvu_cgx_init(struct rvu *rvu);
+int rvu_cgx_exit(struct rvu *rvu);
+void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu);
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start);
+int rvu_mbox_handler_cgx_start_rxtx(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_stop_rxtx(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_stats(struct rvu *rvu, struct msg_req *req,
+			       struct cgx_stats_rsp *rsp);
+int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_cgx_promisc_enable(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_promisc_disable(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_start_linkevents(struct rvu *rvu, struct msg_req *req,
+					  struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_stop_linkevents(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_get_linkinfo(struct rvu *rvu, struct msg_req *req,
+				      struct cgx_link_info_msg *rsp);
+int rvu_mbox_handler_cgx_intlbk_enable(struct rvu *rvu, struct msg_req *req,
+				       struct msg_rsp *rsp);
+int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp);
+
+/* NPA APIs */
+int rvu_npa_init(struct rvu *rvu);
+void rvu_npa_freemem(struct rvu *rvu);
+void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf);
+int rvu_mbox_handler_npa_aq_enq(struct rvu *rvu,
+				struct npa_aq_enq_req *req,
+				struct npa_aq_enq_rsp *rsp);
+int rvu_mbox_handler_npa_hwctx_disable(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp);
+int rvu_mbox_handler_npa_lf_alloc(struct rvu *rvu,
+				  struct npa_lf_alloc_req *req,
+				  struct npa_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_npa_lf_free(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp);
+
+/* NIX APIs */
+bool is_nixlf_attached(struct rvu *rvu, u16 pcifunc);
+int rvu_nix_init(struct rvu *rvu);
+int rvu_nix_reserve_mark_format(struct rvu *rvu, struct nix_hw *nix_hw,
+				int blkaddr, u32 cfg);
+void rvu_nix_freemem(struct rvu *rvu);
+int rvu_get_nixlf_count(struct rvu *rvu);
+void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf);
+int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
+				  struct nix_lf_alloc_req *req,
+				  struct nix_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_aq_enq(struct rvu *rvu,
+				struct nix_aq_enq_req *req,
+				struct nix_aq_enq_rsp *rsp);
+int rvu_mbox_handler_nix_hwctx_disable(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
+				     struct nix_txsch_alloc_req *req,
+				     struct nix_txsch_alloc_rsp *rsp);
+int rvu_mbox_handler_nix_txsch_free(struct rvu *rvu,
+				    struct nix_txsch_free_req *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
+				    struct nix_txschq_config *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_stats_rst(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_vtag_cfg(struct rvu *rvu,
+				  struct nix_vtag_config *req,
+				  struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_rxvlan_alloc(struct rvu *rvu, struct msg_req *req,
+				      struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_rss_flowkey_cfg(struct rvu *rvu,
+					 struct nix_rss_flowkey_cfg *req,
+					 struct nix_rss_flowkey_cfg_rsp *rsp);
+int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu,
+				      struct nix_set_mac_addr *req,
+				      struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
+				     struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
+				     struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
+					 struct nix_mark_format_cfg  *req,
+					 struct nix_mark_format_cfg_rsp *rsp);
+int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
+				    struct msg_rsp *rsp);
+int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
+					struct nix_lso_format_cfg *req,
+					struct nix_lso_format_cfg_rsp *rsp);
+
+/* NPC APIs */
+int rvu_npc_init(struct rvu *rvu);
+void rvu_npc_freemem(struct rvu *rvu);
+int rvu_npc_get_pkind(struct rvu *rvu, u16 pf);
+void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf);
+void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
+				 int nixlf, u64 chan, u8 *mac_addr);
+void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+				   int nixlf, u64 chan, bool allmulti);
+void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
+				       int nixlf, u64 chan);
+int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
+void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
+				    int group, int alg_idx, int mcam_index);
+int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
+					  struct npc_mcam_alloc_entry_req *req,
+					  struct npc_mcam_alloc_entry_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu,
+					 struct npc_mcam_free_entry_req *req,
+					 struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
+					  struct npc_mcam_write_entry_req *req,
+					  struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_ena_entry(struct rvu *rvu,
+					struct npc_mcam_ena_dis_entry_req *req,
+					struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_dis_entry(struct rvu *rvu,
+					struct npc_mcam_ena_dis_entry_req *req,
+					struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
+					  struct npc_mcam_shift_entry_req *req,
+					  struct npc_mcam_shift_entry_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu,
+				struct npc_mcam_alloc_counter_req *req,
+				struct npc_mcam_alloc_counter_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu,
+		   struct npc_mcam_oper_counter_req *req, struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_clear_counter(struct rvu *rvu,
+		struct npc_mcam_oper_counter_req *req, struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu,
+		struct npc_mcam_unmap_counter_req *req, struct msg_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_counter_stats(struct rvu *rvu,
+			struct npc_mcam_oper_counter_req *req,
+			struct npc_mcam_oper_counter_rsp *rsp);
+int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
+			  struct npc_mcam_alloc_and_write_entry_req *req,
+			  struct npc_mcam_alloc_and_write_entry_rsp *rsp);
+int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req,
+				     struct npc_get_kex_cfg_rsp *rsp);
+#endif /* RVU_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
new file mode 100644
index 0000000..7d7133c
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c

@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu.h"
+#include "cgx.h"
+
+struct cgx_evq_entry {
+	struct list_head evq_node;
+	struct cgx_link_event link_event;
+};
+
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)			\
+static struct _req_type __maybe_unused					\
+*otx2_mbox_alloc_msg_ ## _fn_name(struct rvu *rvu, int devid)		\
+{									\
+	struct _req_type *req;						\
+									\
+	req = (struct _req_type *)otx2_mbox_alloc_msg_rsp(		\
+		&rvu->afpf_wq_info.mbox_up, devid, sizeof(struct _req_type), \
+		sizeof(struct _rsp_type));				\
+	if (!req)							\
+		return NULL;						\
+	req->hdr.sig = OTX2_MBOX_REQ_SIG;				\
+	req->hdr.id = _id;						\
+	return req;							\
+}
+
+MBOX_UP_CGX_MESSAGES
+#undef M
+
+/* Returns bitmap of mapped PFs */
+static inline u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
+{
+	return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id];
+}
+
+static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
+{
+	return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF);
+}
+
+void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu)
+{
+	if (cgx_id >= rvu->cgx_cnt_max)
+		return NULL;
+
+	return rvu->cgx_idmap[cgx_id];
+}
+
+static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+{
+	struct npc_pkind *pkind = &rvu->hw->pkind;
+	int cgx_cnt_max = rvu->cgx_cnt_max;
+	int cgx, lmac_cnt, lmac;
+	int pf = PF_CGXMAP_BASE;
+	int size, free_pkind;
+
+	if (!cgx_cnt_max)
+		return 0;
+
+	if (cgx_cnt_max > 0xF || MAX_LMAC_PER_CGX > 0xF)
+		return -EINVAL;
+
+	/* Alloc map table
+	 * An additional entry is required since PF id starts from 1 and
+	 * hence entry at offset 0 is invalid.
+	 */
+	size = (cgx_cnt_max * MAX_LMAC_PER_CGX + 1) * sizeof(u8);
+	rvu->pf2cgxlmac_map = devm_kmalloc(rvu->dev, size, GFP_KERNEL);
+	if (!rvu->pf2cgxlmac_map)
+		return -ENOMEM;
+
+	/* Initialize all entries with an invalid cgx and lmac id */
+	memset(rvu->pf2cgxlmac_map, 0xFF, size);
+
+	/* Reverse map table */
+	rvu->cgxlmac2pf_map = devm_kzalloc(rvu->dev,
+				  cgx_cnt_max * MAX_LMAC_PER_CGX * sizeof(u16),
+				  GFP_KERNEL);
+	if (!rvu->cgxlmac2pf_map)
+		return -ENOMEM;
+
+	rvu->cgx_mapped_pfs = 0;
+	for (cgx = 0; cgx < cgx_cnt_max; cgx++) {
+		if (!rvu_cgx_pdata(cgx, rvu))
+			continue;
+		lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+		for (lmac = 0; lmac < lmac_cnt; lmac++, pf++) {
+			rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
+			rvu->cgxlmac2pf_map[CGX_OFFSET(cgx) + lmac] = 1 << pf;
+			free_pkind = rvu_alloc_rsrc(&pkind->rsrc);
+			pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16;
+			rvu->cgx_mapped_pfs++;
+		}
+	}
+	return 0;
+}
+
+static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu)
+{
+	struct cgx_evq_entry *qentry;
+	unsigned long flags;
+	int err;
+
+	qentry = kmalloc(sizeof(*qentry), GFP_KERNEL);
+	if (!qentry)
+		return -ENOMEM;
+
+	/* Lock the event queue before we read the local link status */
+	spin_lock_irqsave(&rvu->cgx_evq_lock, flags);
+	err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+				&qentry->link_event.link_uinfo);
+	qentry->link_event.cgx_id = cgx_id;
+	qentry->link_event.lmac_id = lmac_id;
+	if (err)
+		goto skip_add;
+	list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head);
+skip_add:
+	spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags);
+
+	/* start worker to process the events */
+	queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work);
+
+	return 0;
+}
+
+/* This is called from interrupt context and is expected to be atomic */
+static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
+{
+	struct cgx_evq_entry *qentry;
+	struct rvu *rvu = data;
+
+	/* post event to the event queue */
+	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
+	if (!qentry)
+		return -ENOMEM;
+	qentry->link_event = *event;
+	spin_lock(&rvu->cgx_evq_lock);
+	list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head);
+	spin_unlock(&rvu->cgx_evq_lock);
+
+	/* start worker to process the events */
+	queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work);
+
+	return 0;
+}
+
+static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
+{
+	struct cgx_link_user_info *linfo;
+	struct cgx_link_info_msg *msg;
+	unsigned long pfmap;
+	int err, pfid;
+
+	linfo = &event->link_uinfo;
+	pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
+
+	do {
+		pfid = find_first_bit(&pfmap, 16);
+		clear_bit(pfid, &pfmap);
+
+		/* check if notification is enabled */
+		if (!test_bit(pfid, &rvu->pf_notify_bmap)) {
+			dev_info(rvu->dev, "cgx %d: lmac %d Link status %s\n",
+				 event->cgx_id, event->lmac_id,
+				 linfo->link_up ? "UP" : "DOWN");
+			continue;
+		}
+
+		/* Send mbox message to PF */
+		msg = otx2_mbox_alloc_msg_cgx_link_event(rvu, pfid);
+		if (!msg)
+			continue;
+		msg->link_info = *linfo;
+		otx2_mbox_msg_send(&rvu->afpf_wq_info.mbox_up, pfid);
+		err = otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid);
+		if (err)
+			dev_warn(rvu->dev, "notification to pf %d failed\n",
+				 pfid);
+	} while (pfmap);
+}
+
+static void cgx_evhandler_task(struct work_struct *work)
+{
+	struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work);
+	struct cgx_evq_entry *qentry;
+	struct cgx_link_event *event;
+	unsigned long flags;
+
+	do {
+		/* Dequeue an event */
+		spin_lock_irqsave(&rvu->cgx_evq_lock, flags);
+		qentry = list_first_entry_or_null(&rvu->cgx_evq_head,
+						  struct cgx_evq_entry,
+						  evq_node);
+		if (qentry)
+			list_del(&qentry->evq_node);
+		spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags);
+		if (!qentry)
+			break; /* nothing more to process */
+
+		event = &qentry->link_event;
+
+		/* process event */
+		cgx_notify_pfs(event, rvu);
+		kfree(qentry);
+	} while (1);
+}
+
+static int cgx_lmac_event_handler_init(struct rvu *rvu)
+{
+	struct cgx_event_cb cb;
+	int cgx, lmac, err;
+	void *cgxd;
+
+	spin_lock_init(&rvu->cgx_evq_lock);
+	INIT_LIST_HEAD(&rvu->cgx_evq_head);
+	INIT_WORK(&rvu->cgx_evh_work, cgx_evhandler_task);
+	rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", 0, 0);
+	if (!rvu->cgx_evh_wq) {
+		dev_err(rvu->dev, "alloc workqueue failed");
+		return -ENOMEM;
+	}
+
+	cb.notify_link_chg = cgx_lmac_postevent; /* link change call back */
+	cb.data = rvu;
+
+	for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) {
+		cgxd = rvu_cgx_pdata(cgx, rvu);
+		if (!cgxd)
+			continue;
+		for (lmac = 0; lmac < cgx_get_lmac_cnt(cgxd); lmac++) {
+			err = cgx_lmac_evh_register(&cb, cgxd, lmac);
+			if (err)
+				dev_err(rvu->dev,
+					"%d:%d handler register failed\n",
+					cgx, lmac);
+		}
+	}
+
+	return 0;
+}
+
+static void rvu_cgx_wq_destroy(struct rvu *rvu)
+{
+	if (rvu->cgx_evh_wq) {
+		flush_workqueue(rvu->cgx_evh_wq);
+		destroy_workqueue(rvu->cgx_evh_wq);
+		rvu->cgx_evh_wq = NULL;
+	}
+}
+
+int rvu_cgx_init(struct rvu *rvu)
+{
+	int cgx, err;
+	void *cgxd;
+
+	/* CGX port id starts from 0 and are not necessarily contiguous
+	 * Hence we allocate resources based on the maximum port id value.
+	 */
+	rvu->cgx_cnt_max = cgx_get_cgxcnt_max();
+	if (!rvu->cgx_cnt_max) {
+		dev_info(rvu->dev, "No CGX devices found!\n");
+		return -ENODEV;
+	}
+
+	rvu->cgx_idmap = devm_kzalloc(rvu->dev, rvu->cgx_cnt_max *
+				      sizeof(void *), GFP_KERNEL);
+	if (!rvu->cgx_idmap)
+		return -ENOMEM;
+
+	/* Initialize the cgxdata table */
+	for (cgx = 0; cgx < rvu->cgx_cnt_max; cgx++)
+		rvu->cgx_idmap[cgx] = cgx_get_pdata(cgx);
+
+	/* Map CGX LMAC interfaces to RVU PFs */
+	err = rvu_map_cgx_lmac_pf(rvu);
+	if (err)
+		return err;
+
+	/* Register for CGX events */
+	err = cgx_lmac_event_handler_init(rvu);
+	if (err)
+		return err;
+
+	/* Ensure event handler registration is completed, before
+	 * we turn on the links
+	 */
+	mb();
+
+	/* Do link up for all CGX ports */
+	for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) {
+		cgxd = rvu_cgx_pdata(cgx, rvu);
+		if (!cgxd)
+			continue;
+		err = cgx_lmac_linkup_start(cgxd);
+		if (err)
+			dev_err(rvu->dev,
+				"Link up process failed to start on cgx %d\n",
+				cgx);
+	}
+
+	return 0;
+}
+
+int rvu_cgx_exit(struct rvu *rvu)
+{
+	int cgx, lmac;
+	void *cgxd;
+
+	for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) {
+		cgxd = rvu_cgx_pdata(cgx, rvu);
+		if (!cgxd)
+			continue;
+		for (lmac = 0; lmac < cgx_get_lmac_cnt(cgxd); lmac++)
+			cgx_lmac_evh_unregister(cgxd, lmac);
+	}
+
+	/* Ensure event handler unregister is completed */
+	mb();
+
+	rvu_cgx_wq_destroy(rvu);
+	return 0;
+}
+
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
+
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_start_rxtx(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp)
+{
+	rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_stop_rxtx(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp)
+{
+	rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, false);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_stats(struct rvu *rvu, struct msg_req *req,
+			       struct cgx_stats_rsp *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	int stat = 0, err = 0;
+	u64 tx_stat, rx_stat;
+	u8 cgx_idx, lmac;
+	void *cgxd;
+
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
+	cgxd = rvu_cgx_pdata(cgx_idx, rvu);
+
+	/* Rx stats */
+	while (stat < CGX_RX_STATS_COUNT) {
+		err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat);
+		if (err)
+			return err;
+		rsp->rx_stats[stat] = rx_stat;
+		stat++;
+	}
+
+	/* Tx stats */
+	stat = 0;
+	while (stat < CGX_TX_STATS_COUNT) {
+		err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat);
+		if (err)
+			return err;
+		rsp->tx_stats[stat] = tx_stat;
+		stat++;
+	}
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	u8 cgx_id, lmac_id;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr);
+
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu,
+				      struct cgx_mac_addr_set_or_get *req,
+				      struct cgx_mac_addr_set_or_get *rsp)
+{
+	int pf = rvu_get_pf(req->hdr.pcifunc);
+	u8 cgx_id, lmac_id;
+	int rc = 0, i;
+	u64 cfg;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	rsp->hdr.rc = rc;
+	cfg = cgx_lmac_addr_get(cgx_id, lmac_id);
+	/* copy 48 bit mac address to req->mac_addr */
+	for (i = 0; i < ETH_ALEN; i++)
+		rsp->mac_addr[i] = cfg >> (ETH_ALEN - 1 - i) * 8;
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_promisc_enable(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_promisc_config(cgx_id, lmac_id, true);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_promisc_disable(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+	    !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	cgx_lmac_promisc_config(cgx_id, lmac_id, false);
+	return 0;
+}
+
+static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	if (en) {
+		set_bit(pf, &rvu->pf_notify_bmap);
+		/* Send the current link status to PF */
+		rvu_cgx_send_link_info(cgx_id, lmac_id, rvu);
+	} else {
+		clear_bit(pf, &rvu->pf_notify_bmap);
+	}
+
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_start_linkevents(struct rvu *rvu, struct msg_req *req,
+					  struct msg_rsp *rsp)
+{
+	rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_stop_linkevents(struct rvu *rvu, struct msg_req *req,
+					 struct msg_rsp *rsp)
+{
+	rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, false);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_get_linkinfo(struct rvu *rvu, struct msg_req *req,
+				      struct cgx_link_info_msg *rsp)
+{
+	u8 cgx_id, lmac_id;
+	int pf, err;
+
+	pf = rvu_get_pf(req->hdr.pcifunc);
+
+	if (!is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+				&rsp->link_info);
+	return err;
+}
+
+static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en)
+{
+	int pf = rvu_get_pf(pcifunc);
+	u8 cgx_id, lmac_id;
+
+	/* This msg is expected only from PFs that are mapped to CGX LMACs,
+	 * if received from other PF/VF simply ACK, nothing to do.
+	 */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+		return -ENODEV;
+
+	rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+	return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu),
+					  lmac_id, en);
+}
+
+int rvu_mbox_handler_cgx_intlbk_enable(struct rvu *rvu, struct msg_req *req,
+				       struct msg_rsp *rsp)
+{
+	rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, true);
+	return 0;
+}
+
+int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req,
+					struct msg_rsp *rsp)
+{
+	rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, false);
+	return 0;
+}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
new file mode 100644
index 0000000..4a7609f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c

@@ -0,0 +1,2959 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+#include "npc.h"
+#include "cgx.h"
+
+static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
+
+enum mc_tbl_sz {
+	MC_TBL_SZ_256,
+	MC_TBL_SZ_512,
+	MC_TBL_SZ_1K,
+	MC_TBL_SZ_2K,
+	MC_TBL_SZ_4K,
+	MC_TBL_SZ_8K,
+	MC_TBL_SZ_16K,
+	MC_TBL_SZ_32K,
+	MC_TBL_SZ_64K,
+};
+
+enum mc_buf_cnt {
+	MC_BUF_CNT_8,
+	MC_BUF_CNT_16,
+	MC_BUF_CNT_32,
+	MC_BUF_CNT_64,
+	MC_BUF_CNT_128,
+	MC_BUF_CNT_256,
+	MC_BUF_CNT_512,
+	MC_BUF_CNT_1024,
+	MC_BUF_CNT_2048,
+};
+
+enum nix_makr_fmt_indexes {
+	NIX_MARK_CFG_IP_DSCP_RED,
+	NIX_MARK_CFG_IP_DSCP_YELLOW,
+	NIX_MARK_CFG_IP_DSCP_YELLOW_RED,
+	NIX_MARK_CFG_IP_ECN_RED,
+	NIX_MARK_CFG_IP_ECN_YELLOW,
+	NIX_MARK_CFG_IP_ECN_YELLOW_RED,
+	NIX_MARK_CFG_VLAN_DEI_RED,
+	NIX_MARK_CFG_VLAN_DEI_YELLOW,
+	NIX_MARK_CFG_VLAN_DEI_YELLOW_RED,
+	NIX_MARK_CFG_MAX,
+};
+
+/* For now considering MC resources needed for broadcast
+ * pkt replication only. i.e 256 HWVFs + 12 PFs.
+ */
+#define MC_TBL_SIZE	MC_TBL_SZ_512
+#define MC_BUF_CNT	MC_BUF_CNT_128
+
+struct mce {
+	struct hlist_node	node;
+	u16			idx;
+	u16			pcifunc;
+};
+
+bool is_nixlf_attached(struct rvu *rvu, u16 pcifunc)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return false;
+	return true;
+}
+
+int rvu_get_nixlf_count(struct rvu *rvu)
+{
+	struct rvu_block *block;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return 0;
+	block = &rvu->hw->block[blkaddr];
+	return block->lf.max;
+}
+
+static void nix_mce_list_init(struct nix_mce_list *list, int max)
+{
+	INIT_HLIST_HEAD(&list->head);
+	list->count = 0;
+	list->max = max;
+}
+
+static u16 nix_alloc_mce_list(struct nix_mcast *mcast, int count)
+{
+	int idx;
+
+	if (!mcast)
+		return 0;
+
+	idx = mcast->next_free_mce;
+	mcast->next_free_mce += count;
+	return idx;
+}
+
+static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
+{
+	if (blkaddr == BLKADDR_NIX0 && hw->nix0)
+		return hw->nix0;
+
+	return NULL;
+}
+
+static void nix_rx_sync(struct rvu *rvu, int blkaddr)
+{
+	int err;
+
+	/*Sync all in flight RX packets to LLC/DRAM */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+	err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+	if (err)
+		dev_err(rvu->dev, "NIX RX software sync failed\n");
+
+	/* As per a HW errata in 9xxx A0 silicon, HW may clear SW_SYNC[ENA]
+	 * bit too early. Hence wait for 50us more.
+	 */
+	if (is_rvu_9xxx_A0(rvu))
+		usleep_range(50, 60);
+}
+
+static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
+			    int lvl, u16 pcifunc, u16 schq)
+{
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+	u16 map_func;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return false;
+
+	txsch = &nix_hw->txsch[lvl];
+	/* Check out of bounds */
+	if (schq >= txsch->schq.max)
+		return false;
+
+	mutex_lock(&rvu->rsrc_lock);
+	map_func = TXSCH_MAP_FUNC(txsch->pfvf_map[schq]);
+	mutex_unlock(&rvu->rsrc_lock);
+
+	/* For TL1 schq, sharing across VF's of same PF is ok */
+	if (lvl == NIX_TXSCH_LVL_TL1 &&
+	    rvu_get_pf(map_func) != rvu_get_pf(pcifunc))
+		return false;
+
+	if (lvl != NIX_TXSCH_LVL_TL1 &&
+	    map_func != pcifunc)
+		return false;
+
+	return true;
+}
+
+static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	u8 cgx_id, lmac_id;
+	int pkind, pf, vf;
+	int err;
+
+	pf = rvu_get_pf(pcifunc);
+	if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
+		return 0;
+
+	switch (type) {
+	case NIX_INTF_TYPE_CGX:
+		pfvf->cgx_lmac = rvu->pf2cgxlmac_map[pf];
+		rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id);
+
+		pkind = rvu_npc_get_pkind(rvu, pf);
+		if (pkind < 0) {
+			dev_err(rvu->dev,
+				"PF_Func 0x%x: Invalid pkind\n", pcifunc);
+			return -EINVAL;
+		}
+		pfvf->rx_chan_base = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0);
+		pfvf->tx_chan_base = pfvf->rx_chan_base;
+		pfvf->rx_chan_cnt = 1;
+		pfvf->tx_chan_cnt = 1;
+		cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind);
+		rvu_npc_set_pkind(rvu, pkind, pfvf);
+		break;
+	case NIX_INTF_TYPE_LBK:
+		vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+		pfvf->rx_chan_base = NIX_CHAN_LBK_CHX(0, vf);
+		pfvf->tx_chan_base = vf & 0x1 ? NIX_CHAN_LBK_CHX(0, vf - 1) :
+						NIX_CHAN_LBK_CHX(0, vf + 1);
+		pfvf->rx_chan_cnt = 1;
+		pfvf->tx_chan_cnt = 1;
+		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+					      pfvf->rx_chan_base, false);
+		break;
+	}
+
+	/* Add a UCAST forwarding rule in MCAM with this NIXLF attached
+	 * RVU PF/VF's MAC address.
+	 */
+	rvu_npc_install_ucast_entry(rvu, pcifunc, nixlf,
+				    pfvf->rx_chan_base, pfvf->mac_addr);
+
+	/* Add this PF_FUNC to bcast pkt replication list */
+	err = nix_update_bcast_mce_list(rvu, pcifunc, true);
+	if (err) {
+		dev_err(rvu->dev,
+			"Bcast list, failed to enable PF_FUNC 0x%x\n",
+			pcifunc);
+		return err;
+	}
+
+	rvu_npc_install_bcast_match_entry(rvu, pcifunc,
+					  nixlf, pfvf->rx_chan_base);
+	pfvf->maxlen = NIC_HW_MIN_FRS;
+	pfvf->minlen = NIC_HW_MIN_FRS;
+
+	return 0;
+}
+
+static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	int err;
+
+	pfvf->maxlen = 0;
+	pfvf->minlen = 0;
+	pfvf->rxvlan = false;
+
+	/* Remove this PF_FUNC from bcast pkt replication list */
+	err = nix_update_bcast_mce_list(rvu, pcifunc, false);
+	if (err) {
+		dev_err(rvu->dev,
+			"Bcast list, failed to disable PF_FUNC 0x%x\n",
+			pcifunc);
+	}
+
+	/* Free and disable any MCAM entries used by this NIX LF */
+	rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
+}
+
+static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr,
+				 u64 format, bool v4, u64 *fidx)
+{
+	struct nix_lso_format field = {0};
+
+	/* IP's Length field */
+	field.layer = NIX_TXLAYER_OL3;
+	/* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */
+	field.offset = v4 ? 2 : 4;
+	field.sizem1 = 1; /* i.e 2 bytes */
+	field.alg = NIX_LSOALG_ADD_PAYLEN;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+
+	/* No ID field in IPv6 header */
+	if (!v4)
+		return;
+
+	/* IP's ID field */
+	field.layer = NIX_TXLAYER_OL3;
+	field.offset = 4;
+	field.sizem1 = 1; /* i.e 2 bytes */
+	field.alg = NIX_LSOALG_ADD_SEGNUM;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+}
+
+static void nix_setup_lso_tso_l4(struct rvu *rvu, int blkaddr,
+				 u64 format, u64 *fidx)
+{
+	struct nix_lso_format field = {0};
+
+	/* TCP's sequence number field */
+	field.layer = NIX_TXLAYER_OL4;
+	field.offset = 4;
+	field.sizem1 = 3; /* i.e 4 bytes */
+	field.alg = NIX_LSOALG_ADD_OFFSET;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+
+	/* TCP's flags field */
+	field.layer = NIX_TXLAYER_OL4;
+	field.offset = 12;
+	field.sizem1 = 1; /* 2 bytes */
+	field.alg = NIX_LSOALG_TCP_FLAGS;
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+		    *(u64 *)&field);
+}
+
+static void nix_setup_lso(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
+{
+	u64 cfg, idx, fidx = 0;
+
+	/* Get max HW supported format indices */
+	cfg = (rvu_read64(rvu, blkaddr, NIX_AF_CONST1) >> 48) & 0xFF;
+	nix_hw->lso.total = cfg;
+
+	/* Enable LSO */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_LSO_CFG);
+	/* For TSO, set first and middle segment flags to
+	 * mask out PSH, RST & FIN flags in TCP packet
+	 */
+	cfg &= ~((0xFFFFULL << 32) | (0xFFFFULL << 16));
+	cfg |= (0xFFF2ULL << 32) | (0xFFF2ULL << 16);
+	rvu_write64(rvu, blkaddr, NIX_AF_LSO_CFG, cfg | BIT_ULL(63));
+
+	/* Setup default static LSO formats
+	 *
+	 * Configure format fields for TCPv4 segmentation offload
+	 */
+	idx = NIX_LSO_FORMAT_IDX_TSOV4;
+	nix_setup_lso_tso_l3(rvu, blkaddr, idx, true, &fidx);
+	nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+	/* Set rest of the fields to NOP */
+	for (; fidx < 8; fidx++) {
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+	}
+	nix_hw->lso.in_use++;
+
+	/* Configure format fields for TCPv6 segmentation offload */
+	idx = NIX_LSO_FORMAT_IDX_TSOV6;
+	fidx = 0;
+	nix_setup_lso_tso_l3(rvu, blkaddr, idx, false, &fidx);
+	nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+	/* Set rest of the fields to NOP */
+	for (; fidx < 8; fidx++) {
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+	}
+	nix_hw->lso.in_use++;
+}
+
+static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+	kfree(pfvf->rq_bmap);
+	kfree(pfvf->sq_bmap);
+	kfree(pfvf->cq_bmap);
+	if (pfvf->rq_ctx)
+		qmem_free(rvu->dev, pfvf->rq_ctx);
+	if (pfvf->sq_ctx)
+		qmem_free(rvu->dev, pfvf->sq_ctx);
+	if (pfvf->cq_ctx)
+		qmem_free(rvu->dev, pfvf->cq_ctx);
+	if (pfvf->rss_ctx)
+		qmem_free(rvu->dev, pfvf->rss_ctx);
+	if (pfvf->nix_qints_ctx)
+		qmem_free(rvu->dev, pfvf->nix_qints_ctx);
+	if (pfvf->cq_ints_ctx)
+		qmem_free(rvu->dev, pfvf->cq_ints_ctx);
+
+	pfvf->rq_bmap = NULL;
+	pfvf->cq_bmap = NULL;
+	pfvf->sq_bmap = NULL;
+	pfvf->rq_ctx = NULL;
+	pfvf->sq_ctx = NULL;
+	pfvf->cq_ctx = NULL;
+	pfvf->rss_ctx = NULL;
+	pfvf->nix_qints_ctx = NULL;
+	pfvf->cq_ints_ctx = NULL;
+}
+
+static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
+			      struct rvu_pfvf *pfvf, int nixlf,
+			      int rss_sz, int rss_grps, int hwctx_size)
+{
+	int err, grp, num_indices;
+
+	/* RSS is not requested for this NIXLF */
+	if (!rss_sz)
+		return 0;
+	num_indices = rss_sz * rss_grps;
+
+	/* Alloc NIX RSS HW context memory and config the base */
+	err = qmem_alloc(rvu->dev, &pfvf->rss_ctx, num_indices, hwctx_size);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_BASE(nixlf),
+		    (u64)pfvf->rss_ctx->iova);
+
+	/* Config full RSS table size, enable RSS and caching */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf),
+		    BIT_ULL(36) | BIT_ULL(4) |
+		    ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE));
+	/* Config RSS group offset and sizes */
+	for (grp = 0; grp < rss_grps; grp++)
+		rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp),
+			    ((ilog2(rss_sz) - 1) << 16) | (rss_sz * grp));
+	return 0;
+}
+
+static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+			       struct nix_aq_inst_s *inst)
+{
+	struct admin_queue *aq = block->aq;
+	struct nix_aq_res_s *result;
+	int timeout = 1000;
+	u64 reg, head;
+
+	result = (struct nix_aq_res_s *)aq->res->base;
+
+	/* Get current head pointer where to append this instruction */
+	reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS);
+	head = (reg >> 4) & AQ_PTR_MASK;
+
+	memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+	       (void *)inst, aq->inst->entry_sz);
+	memset(result, 0, sizeof(*result));
+	/* sync into memory */
+	wmb();
+
+	/* Ring the doorbell and wait for result */
+	rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1);
+	while (result->compcode == NIX_AQ_COMP_NOTDONE) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+	}
+
+	if (result->compcode != NIX_AQ_COMP_GOOD)
+		/* TODO: Replace this with some error code */
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+			       struct nix_aq_enq_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int nixlf, blkaddr, rc = 0;
+	struct nix_aq_inst_s inst;
+	struct rvu_block *block;
+	struct admin_queue *aq;
+	struct rvu_pfvf *pfvf;
+	void *ctx, *mask;
+	bool ena;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	aq = block->aq;
+	if (!aq) {
+		dev_warn(rvu->dev, "%s: NIX AQ not initialized\n", __func__);
+		return NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+
+	/* Skip NIXLF check for broadcast MCE entry init */
+	if (!(!rsp && req->ctype == NIX_AQ_CTYPE_MCE)) {
+		if (!pfvf->nixlf || nixlf < 0)
+			return NIX_AF_ERR_AF_LF_INVALID;
+	}
+
+	switch (req->ctype) {
+	case NIX_AQ_CTYPE_RQ:
+		/* Check if index exceeds max no of queues */
+		if (!pfvf->rq_ctx || req->qidx >= pfvf->rq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_SQ:
+		if (!pfvf->sq_ctx || req->qidx >= pfvf->sq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_CQ:
+		if (!pfvf->cq_ctx || req->qidx >= pfvf->cq_ctx->qsize)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_RSS:
+		/* Check if RSS is enabled and qidx is within range */
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf));
+		if (!(cfg & BIT_ULL(4)) || !pfvf->rss_ctx ||
+		    (req->qidx >= (256UL << (cfg & 0xF))))
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	case NIX_AQ_CTYPE_MCE:
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_MCAST_CFG);
+		/* Check if index exceeds MCE list length */
+		if (!hw->nix0->mcast.mce_ctx ||
+		    (req->qidx >= (256UL << (cfg & 0xF))))
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+
+		/* Adding multicast lists for requests from PF/VFs is not
+		 * yet supported, so ignore this.
+		 */
+		if (rsp)
+			rc = NIX_AF_ERR_AQ_ENQUEUE;
+		break;
+	default:
+		rc = NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	if (rc)
+		return rc;
+
+	/* Check if SQ pointed SMQ belongs to this PF/VF or not */
+	if (req->ctype == NIX_AQ_CTYPE_SQ &&
+	    ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
+	     (req->op == NIX_AQ_INSTOP_WRITE &&
+	      req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
+		if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
+				     pcifunc, req->sq.smq))
+			return NIX_AF_ERR_AQ_ENQUEUE;
+	}
+
+	memset(&inst, 0, sizeof(struct nix_aq_inst_s));
+	inst.lf = nixlf;
+	inst.cindex = req->qidx;
+	inst.ctype = req->ctype;
+	inst.op = req->op;
+	/* Currently we are not supporting enqueuing multiple instructions,
+	 * so always choose first entry in result memory.
+	 */
+	inst.res_addr = (u64)aq->res->iova;
+
+	/* Clean result + context memory */
+	memset(aq->res->base, 0, aq->res->entry_sz);
+	/* Context needs to be written at RES_ADDR + 128 */
+	ctx = aq->res->base + 128;
+	/* Mask needs to be written at RES_ADDR + 256 */
+	mask = aq->res->base + 256;
+
+	switch (req->op) {
+	case NIX_AQ_INSTOP_WRITE:
+		if (req->ctype == NIX_AQ_CTYPE_RQ)
+			memcpy(mask, &req->rq_mask,
+			       sizeof(struct nix_rq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_SQ)
+			memcpy(mask, &req->sq_mask,
+			       sizeof(struct nix_sq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_CQ)
+			memcpy(mask, &req->cq_mask,
+			       sizeof(struct nix_cq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_RSS)
+			memcpy(mask, &req->rss_mask,
+			       sizeof(struct nix_rsse_s));
+		else if (req->ctype == NIX_AQ_CTYPE_MCE)
+			memcpy(mask, &req->mce_mask,
+			       sizeof(struct nix_rx_mce_s));
+		/* Fall through */
+	case NIX_AQ_INSTOP_INIT:
+		if (req->ctype == NIX_AQ_CTYPE_RQ)
+			memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_SQ)
+			memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_CQ)
+			memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s));
+		else if (req->ctype == NIX_AQ_CTYPE_RSS)
+			memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s));
+		else if (req->ctype == NIX_AQ_CTYPE_MCE)
+			memcpy(ctx, &req->mce, sizeof(struct nix_rx_mce_s));
+		break;
+	case NIX_AQ_INSTOP_NOP:
+	case NIX_AQ_INSTOP_READ:
+	case NIX_AQ_INSTOP_LOCK:
+	case NIX_AQ_INSTOP_UNLOCK:
+		break;
+	default:
+		rc = NIX_AF_ERR_AQ_ENQUEUE;
+		return rc;
+	}
+
+	spin_lock(&aq->lock);
+
+	/* Submit the instruction to AQ */
+	rc = nix_aq_enqueue_wait(rvu, block, &inst);
+	if (rc) {
+		spin_unlock(&aq->lock);
+		return rc;
+	}
+
+	/* Set RQ/SQ/CQ bitmap if respective queue hw context is enabled */
+	if (req->op == NIX_AQ_INSTOP_INIT) {
+		if (req->ctype == NIX_AQ_CTYPE_RQ && req->rq.ena)
+			__set_bit(req->qidx, pfvf->rq_bmap);
+		if (req->ctype == NIX_AQ_CTYPE_SQ && req->sq.ena)
+			__set_bit(req->qidx, pfvf->sq_bmap);
+		if (req->ctype == NIX_AQ_CTYPE_CQ && req->cq.ena)
+			__set_bit(req->qidx, pfvf->cq_bmap);
+	}
+
+	if (req->op == NIX_AQ_INSTOP_WRITE) {
+		if (req->ctype == NIX_AQ_CTYPE_RQ) {
+			ena = (req->rq.ena & req->rq_mask.ena) |
+				(test_bit(req->qidx, pfvf->rq_bmap) &
+				~req->rq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->rq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->rq_bmap);
+		}
+		if (req->ctype == NIX_AQ_CTYPE_SQ) {
+			ena = (req->rq.ena & req->sq_mask.ena) |
+				(test_bit(req->qidx, pfvf->sq_bmap) &
+				~req->sq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->sq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->sq_bmap);
+		}
+		if (req->ctype == NIX_AQ_CTYPE_CQ) {
+			ena = (req->rq.ena & req->cq_mask.ena) |
+				(test_bit(req->qidx, pfvf->cq_bmap) &
+				~req->cq_mask.ena);
+			if (ena)
+				__set_bit(req->qidx, pfvf->cq_bmap);
+			else
+				__clear_bit(req->qidx, pfvf->cq_bmap);
+		}
+	}
+
+	if (rsp) {
+		/* Copy read context into mailbox */
+		if (req->op == NIX_AQ_INSTOP_READ) {
+			if (req->ctype == NIX_AQ_CTYPE_RQ)
+				memcpy(&rsp->rq, ctx,
+				       sizeof(struct nix_rq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_SQ)
+				memcpy(&rsp->sq, ctx,
+				       sizeof(struct nix_sq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_CQ)
+				memcpy(&rsp->cq, ctx,
+				       sizeof(struct nix_cq_ctx_s));
+			else if (req->ctype == NIX_AQ_CTYPE_RSS)
+				memcpy(&rsp->rss, ctx,
+				       sizeof(struct nix_rsse_s));
+			else if (req->ctype == NIX_AQ_CTYPE_MCE)
+				memcpy(&rsp->mce, ctx,
+				       sizeof(struct nix_rx_mce_s));
+		}
+	}
+
+	spin_unlock(&aq->lock);
+	return 0;
+}
+
+static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+	struct nix_aq_enq_req aq_req;
+	unsigned long *bmap;
+	int qidx, q_cnt = 0;
+	int err = 0, rc;
+
+	if (!pfvf->cq_ctx || !pfvf->sq_ctx || !pfvf->rq_ctx)
+		return NIX_AF_ERR_AQ_ENQUEUE;
+
+	memset(&aq_req, 0, sizeof(struct nix_aq_enq_req));
+	aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+	if (req->ctype == NIX_AQ_CTYPE_CQ) {
+		aq_req.cq.ena = 0;
+		aq_req.cq_mask.ena = 1;
+		q_cnt = pfvf->cq_ctx->qsize;
+		bmap = pfvf->cq_bmap;
+	}
+	if (req->ctype == NIX_AQ_CTYPE_SQ) {
+		aq_req.sq.ena = 0;
+		aq_req.sq_mask.ena = 1;
+		q_cnt = pfvf->sq_ctx->qsize;
+		bmap = pfvf->sq_bmap;
+	}
+	if (req->ctype == NIX_AQ_CTYPE_RQ) {
+		aq_req.rq.ena = 0;
+		aq_req.rq_mask.ena = 1;
+		q_cnt = pfvf->rq_ctx->qsize;
+		bmap = pfvf->rq_bmap;
+	}
+
+	aq_req.ctype = req->ctype;
+	aq_req.op = NIX_AQ_INSTOP_WRITE;
+
+	for (qidx = 0; qidx < q_cnt; qidx++) {
+		if (!test_bit(qidx, bmap))
+			continue;
+		aq_req.qidx = qidx;
+		rc = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL);
+		if (rc) {
+			err = rc;
+			dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+				(req->ctype == NIX_AQ_CTYPE_CQ) ?
+				"CQ" : ((req->ctype == NIX_AQ_CTYPE_RQ) ?
+				"RQ" : "SQ"), qidx);
+		}
+	}
+
+	return err;
+}
+
+int rvu_mbox_handler_nix_aq_enq(struct rvu *rvu,
+				struct nix_aq_enq_req *req,
+				struct nix_aq_enq_rsp *rsp)
+{
+	return rvu_nix_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_nix_hwctx_disable(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp)
+{
+	return nix_lf_hwctx_disable(rvu, req);
+}
+
+int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
+				  struct nix_lf_alloc_req *req,
+				  struct nix_lf_alloc_rsp *rsp)
+{
+	int nixlf, qints, hwctx_size, intf, err, rc = 0;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 cfg, ctx_cfg;
+	int blkaddr;
+
+	if (!req->rq_cnt || !req->sq_cnt || !req->cq_cnt)
+		return NIX_AF_ERR_PARAM;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	/* Check if requested 'NIXLF <=> NPALF' mapping is valid */
+	if (req->npa_func) {
+		/* If default, use 'this' NIXLF's PFFUNC */
+		if (req->npa_func == RVU_DEFAULT_PF_FUNC)
+			req->npa_func = pcifunc;
+		if (!is_pffunc_map_valid(rvu, req->npa_func, BLKTYPE_NPA))
+			return NIX_AF_INVAL_NPA_PF_FUNC;
+	}
+
+	/* Check if requested 'NIXLF <=> SSOLF' mapping is valid */
+	if (req->sso_func) {
+		/* If default, use 'this' NIXLF's PFFUNC */
+		if (req->sso_func == RVU_DEFAULT_PF_FUNC)
+			req->sso_func = pcifunc;
+		if (!is_pffunc_map_valid(rvu, req->sso_func, BLKTYPE_SSO))
+			return NIX_AF_INVAL_SSO_PF_FUNC;
+	}
+
+	/* If RSS is being enabled, check if requested config is valid.
+	 * RSS table size should be power of two, otherwise
+	 * RSS_GRP::OFFSET + adder might go beyond that group or
+	 * won't be able to use entire table.
+	 */
+	if (req->rss_sz && (req->rss_sz > MAX_RSS_INDIR_TBL_SIZE ||
+			    !is_power_of_2(req->rss_sz)))
+		return NIX_AF_ERR_RSS_SIZE_INVALID;
+
+	if (req->rss_sz &&
+	    (!req->rss_grps || req->rss_grps > MAX_RSS_GROUPS))
+		return NIX_AF_ERR_RSS_GRPS_INVALID;
+
+	/* Reset this NIX LF */
+	err = rvu_lf_reset(rvu, block, nixlf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+			block->addr - BLKADDR_NIX0, nixlf);
+		return NIX_AF_ERR_LF_RESET;
+	}
+
+	ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3);
+
+	/* Alloc NIX RQ HW context memory and config the base */
+	hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->rq_ctx, req->rq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->rq_bmap = kcalloc(req->rq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->rq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_BASE(nixlf),
+		    (u64)pfvf->rq_ctx->iova);
+
+	/* Set caching and queue count in HW */
+	cfg = BIT_ULL(36) | (req->rq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_CFG(nixlf), cfg);
+
+	/* Alloc NIX SQ HW context memory and config the base */
+	hwctx_size = 1UL << (ctx_cfg & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->sq_ctx, req->sq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->sq_bmap = kcalloc(req->sq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->sq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_BASE(nixlf),
+		    (u64)pfvf->sq_ctx->iova);
+	cfg = BIT_ULL(36) | (req->sq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_CFG(nixlf), cfg);
+
+	/* Alloc NIX CQ HW context memory and config the base */
+	hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->cq_ctx, req->cq_cnt, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->cq_bmap = kcalloc(req->cq_cnt, sizeof(long), GFP_KERNEL);
+	if (!pfvf->cq_bmap)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_BASE(nixlf),
+		    (u64)pfvf->cq_ctx->iova);
+	cfg = BIT_ULL(36) | (req->cq_cnt - 1);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_CFG(nixlf), cfg);
+
+	/* Initialize receive side scaling (RSS) */
+	hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF);
+	err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf,
+				 req->rss_sz, req->rss_grps, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	/* Alloc memory for CQINT's HW contexts */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+	qints = (cfg >> 24) & 0xFFF;
+	hwctx_size = 1UL << ((ctx_cfg >> 24) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->cq_ints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_BASE(nixlf),
+		    (u64)pfvf->cq_ints_ctx->iova);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_CFG(nixlf), BIT_ULL(36));
+
+	/* Alloc memory for QINT's HW contexts */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+	qints = (cfg >> 12) & 0xFFF;
+	hwctx_size = 1UL << ((ctx_cfg >> 20) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->nix_qints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf),
+		    (u64)pfvf->nix_qints_ctx->iova);
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_CFG(nixlf), BIT_ULL(36));
+
+	/* Setup VLANX TPID's.
+	 * Use VLAN1 for 802.1Q
+	 * and VLAN0 for 802.1AD.
+	 */
+	cfg = (0x8100ULL << 16) | 0x88A8ULL;
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG(nixlf), cfg);
+
+	/* Enable LMTST for this NIX LF */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0));
+
+	/* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC */
+	if (req->npa_func)
+		cfg = req->npa_func;
+	if (req->sso_func)
+		cfg |= (u64)req->sso_func << 16;
+
+	cfg |= (u64)req->xqe_sz << 33;
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), cfg);
+
+	/* Config Rx pkt length, csum checks and apad  enable / disable */
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg);
+
+	intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+	err = nix_interface_init(rvu, pcifunc, intf, nixlf);
+	if (err)
+		goto free_mem;
+
+	/* Disable NPC entries as NIXLF's contexts are not initialized yet */
+	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
+
+	goto exit;
+
+free_mem:
+	nix_ctx_free(rvu, pfvf);
+	rc = -ENOMEM;
+
+exit:
+	/* Set macaddr of this PF/VF */
+	ether_addr_copy(rsp->mac_addr, pfvf->mac_addr);
+
+	/* set SQB size info */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQ_CONST);
+	rsp->sqb_size = (cfg >> 34) & 0xFFFF;
+	rsp->rx_chan_base = pfvf->rx_chan_base;
+	rsp->tx_chan_base = pfvf->tx_chan_base;
+	rsp->rx_chan_cnt = pfvf->rx_chan_cnt;
+	rsp->tx_chan_cnt = pfvf->tx_chan_cnt;
+	rsp->lso_tsov4_idx = NIX_LSO_FORMAT_IDX_TSOV4;
+	rsp->lso_tsov6_idx = NIX_LSO_FORMAT_IDX_TSOV6;
+	/* Get HW supported stat count */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+	rsp->lf_rx_stats = ((cfg >> 32) & 0xFF);
+	rsp->lf_tx_stats = ((cfg >> 24) & 0xFF);
+	/* Get count of CQ IRQs and error IRQs supported per LF */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+	rsp->qints = ((cfg >> 12) & 0xFFF);
+	rsp->cints = ((cfg >> 24) & 0xFFF);
+	return rc;
+}
+
+int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	int blkaddr, nixlf, err;
+	struct rvu_pfvf *pfvf;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_interface_deinit(rvu, pcifunc, nixlf);
+
+	/* Reset this NIX LF */
+	err = rvu_lf_reset(rvu, block, nixlf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+			block->addr - BLKADDR_NIX0, nixlf);
+		return NIX_AF_ERR_LF_RESET;
+	}
+
+	nix_ctx_free(rvu, pfvf);
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
+					 struct nix_mark_format_cfg  *req,
+					 struct nix_mark_format_cfg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_hw *nix_hw;
+	struct rvu_pfvf *pfvf;
+	int blkaddr, rc;
+	u32 cfg;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	cfg = (((u32)req->offset & 0x7) << 16) |
+	      (((u32)req->y_mask & 0xF) << 12) |
+	      (((u32)req->y_val & 0xF) << 8) |
+	      (((u32)req->r_mask & 0xF) << 4) | ((u32)req->r_val & 0xF);
+
+	rc = rvu_nix_reserve_mark_format(rvu, nix_hw, blkaddr, cfg);
+	if (rc < 0) {
+		dev_err(rvu->dev, "No mark_format_ctl for (pf:%d, vf:%d)",
+			rvu_get_pf(pcifunc), pcifunc & RVU_PFVF_FUNC_MASK);
+		return NIX_AF_ERR_MARK_CFG_FAIL;
+	}
+
+	rsp->mark_format_idx = rc;
+	return 0;
+}
+
+/* Disable shaping of pkts by a scheduler queue
+ * at a given scheduler level.
+ */
+static void nix_reset_tx_shaping(struct rvu *rvu, int blkaddr,
+				 int lvl, int schq)
+{
+	u64  cir_reg = 0, pir_reg = 0;
+	u64  cfg;
+
+	switch (lvl) {
+	case NIX_TXSCH_LVL_TL1:
+		cir_reg = NIX_AF_TL1X_CIR(schq);
+		pir_reg = 0; /* PIR not available at TL1 */
+		break;
+	case NIX_TXSCH_LVL_TL2:
+		cir_reg = NIX_AF_TL2X_CIR(schq);
+		pir_reg = NIX_AF_TL2X_PIR(schq);
+		break;
+	case NIX_TXSCH_LVL_TL3:
+		cir_reg = NIX_AF_TL3X_CIR(schq);
+		pir_reg = NIX_AF_TL3X_PIR(schq);
+		break;
+	case NIX_TXSCH_LVL_TL4:
+		cir_reg = NIX_AF_TL4X_CIR(schq);
+		pir_reg = NIX_AF_TL4X_PIR(schq);
+		break;
+	}
+
+	if (!cir_reg)
+		return;
+	cfg = rvu_read64(rvu, blkaddr, cir_reg);
+	rvu_write64(rvu, blkaddr, cir_reg, cfg & ~BIT_ULL(0));
+
+	if (!pir_reg)
+		return;
+	cfg = rvu_read64(rvu, blkaddr, pir_reg);
+	rvu_write64(rvu, blkaddr, pir_reg, cfg & ~BIT_ULL(0));
+}
+
+static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
+				 int lvl, int schq)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	int link;
+
+	/* Reset TL4's SDP link config */
+	if (lvl == NIX_TXSCH_LVL_TL4)
+		rvu_write64(rvu, blkaddr, NIX_AF_TL4X_SDP_LINK_CFG(schq), 0x00);
+
+	if (lvl != NIX_TXSCH_LVL_TL2)
+		return;
+
+	/* Reset TL2's CGX or LBK link config */
+	for (link = 0; link < (hw->cgx_links + hw->lbk_links); link++)
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_TL3_TL2X_LINKX_CFG(schq, link), 0x00);
+}
+
+static int
+rvu_get_tl1_schqs(struct rvu *rvu, int blkaddr, u16 pcifunc,
+		  u16 *schq_list, u16 *schq_cnt)
+{
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+	struct rvu_pfvf *pfvf;
+	u8 cgx_id, lmac_id;
+	u16 schq_base;
+	u32 *pfvf_map;
+	int pf, intf;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -ENODEV;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL1];
+	pfvf_map = txsch->pfvf_map;
+	pf = rvu_get_pf(pcifunc);
+
+	/* static allocation as two TL1's per link */
+	intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+
+	switch (intf) {
+	case NIX_INTF_TYPE_CGX:
+		rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id);
+		schq_base = (cgx_id * MAX_LMAC_PER_CGX + lmac_id) * 2;
+		break;
+	case NIX_INTF_TYPE_LBK:
+		schq_base = rvu->cgx_cnt_max * MAX_LMAC_PER_CGX * 2;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	if (schq_base + 1 > txsch->schq.max)
+		return -ENODEV;
+
+	/* init pfvf_map as we store flags */
+	if (pfvf_map[schq_base] == U32_MAX) {
+		pfvf_map[schq_base] =
+			TXSCH_MAP((pf << RVU_PFVF_PF_SHIFT), 0);
+		pfvf_map[schq_base + 1] =
+			TXSCH_MAP((pf << RVU_PFVF_PF_SHIFT), 0);
+
+		/* Onetime reset for TL1 */
+		nix_reset_tx_linkcfg(rvu, blkaddr,
+				     NIX_TXSCH_LVL_TL1, schq_base);
+		nix_reset_tx_shaping(rvu, blkaddr,
+				     NIX_TXSCH_LVL_TL1, schq_base);
+
+		nix_reset_tx_linkcfg(rvu, blkaddr,
+				     NIX_TXSCH_LVL_TL1, schq_base + 1);
+		nix_reset_tx_shaping(rvu, blkaddr,
+				     NIX_TXSCH_LVL_TL1, schq_base + 1);
+	}
+
+	if (schq_list && schq_cnt) {
+		schq_list[0] = schq_base;
+		schq_list[1] = schq_base + 1;
+		*schq_cnt = 2;
+	}
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
+				     struct nix_txsch_alloc_req *req,
+				     struct nix_txsch_alloc_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_txsch *txsch;
+	int lvl, idx, req_schq;
+	struct rvu_pfvf *pfvf;
+	struct nix_hw *nix_hw;
+	int blkaddr, rc = 0;
+	u32 *pfvf_map;
+	u16 schq;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	mutex_lock(&rvu->rsrc_lock);
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		txsch = &nix_hw->txsch[lvl];
+		req_schq = req->schq_contig[lvl] + req->schq[lvl];
+		pfvf_map = txsch->pfvf_map;
+
+		if (!req_schq)
+			continue;
+
+		/* There are only 28 TL1s */
+		if (lvl == NIX_TXSCH_LVL_TL1) {
+			if (req->schq_contig[lvl] ||
+			    req->schq[lvl] > 2 ||
+			    rvu_get_tl1_schqs(rvu, blkaddr,
+					      pcifunc, NULL, NULL))
+				goto err;
+			continue;
+		}
+
+		/* Check if request is valid */
+		if (req_schq > MAX_TXSCHQ_PER_FUNC)
+			goto err;
+
+		/* If contiguous queues are needed, check for availability */
+		if (req->schq_contig[lvl] &&
+		    !rvu_rsrc_check_contig(&txsch->schq, req->schq_contig[lvl]))
+			goto err;
+
+		/* Check if full request can be accommodated */
+		if (req_schq >= rvu_rsrc_free_count(&txsch->schq))
+			goto err;
+	}
+
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		txsch = &nix_hw->txsch[lvl];
+		rsp->schq_contig[lvl] = req->schq_contig[lvl];
+		pfvf_map = txsch->pfvf_map;
+		rsp->schq[lvl] = req->schq[lvl];
+
+		if (!req->schq[lvl] && !req->schq_contig[lvl])
+			continue;
+
+		/* Handle TL1 specially as it is
+		 * allocation is restricted to 2 TL1's
+		 * per link
+		 */
+
+		if (lvl == NIX_TXSCH_LVL_TL1) {
+			rsp->schq_contig[lvl] = 0;
+			rvu_get_tl1_schqs(rvu, blkaddr, pcifunc,
+					  &rsp->schq_list[lvl][0],
+					  &rsp->schq[lvl]);
+			continue;
+		}
+
+		/* Alloc contiguous queues first */
+		if (req->schq_contig[lvl]) {
+			schq = rvu_alloc_rsrc_contig(&txsch->schq,
+						     req->schq_contig[lvl]);
+
+			for (idx = 0; idx < req->schq_contig[lvl]; idx++) {
+				pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
+				nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+				nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+				rsp->schq_contig_list[lvl][idx] = schq;
+				schq++;
+			}
+		}
+
+		/* Alloc non-contiguous queues */
+		for (idx = 0; idx < req->schq[lvl]; idx++) {
+			schq = rvu_alloc_rsrc(&txsch->schq);
+			pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
+			nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+			nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+			rsp->schq_list[lvl][idx] = schq;
+		}
+	}
+	goto exit;
+err:
+	rc = NIX_AF_ERR_TLX_ALLOC_FAIL;
+exit:
+	mutex_unlock(&rvu->rsrc_lock);
+	return rc;
+}
+
+static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
+{
+	int blkaddr, nixlf, lvl, schq, err;
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	/* Disable TL2/3 queue links before SMQ flush*/
+	mutex_lock(&rvu->rsrc_lock);
+	for (lvl = NIX_TXSCH_LVL_TL4; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		if (lvl != NIX_TXSCH_LVL_TL2 && lvl != NIX_TXSCH_LVL_TL4)
+			continue;
+
+		txsch = &nix_hw->txsch[lvl];
+		for (schq = 0; schq < txsch->schq.max; schq++) {
+			if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+				continue;
+			nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+		}
+	}
+
+	/* Flush SMQs */
+	txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+	for (schq = 0; schq < txsch->schq.max; schq++) {
+		if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+			continue;
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq));
+		/* Do SMQ flush and set enqueue xoff */
+		cfg |= BIT_ULL(50) | BIT_ULL(49);
+		rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq), cfg);
+
+		/* Wait for flush to complete */
+		err = rvu_poll_reg(rvu, blkaddr,
+				   NIX_AF_SMQX_CFG(schq), BIT_ULL(49), true);
+		if (err) {
+			dev_err(rvu->dev,
+				"NIXLF%d: SMQ%d flush failed\n", nixlf, schq);
+		}
+	}
+
+	/* Now free scheduler queues to free pool */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		/* Free all SCHQ's except TL1 as
+		 * TL1 is shared across all VF's for a RVU PF
+		 */
+		if (lvl == NIX_TXSCH_LVL_TL1)
+			continue;
+
+		txsch = &nix_hw->txsch[lvl];
+		for (schq = 0; schq < txsch->schq.max; schq++) {
+			if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+				continue;
+			rvu_free_rsrc(&txsch->schq, schq);
+			txsch->pfvf_map[schq] = 0;
+		}
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+
+	/* Sync cached info for this LF in NDC-TX to LLC/DRAM */
+	rvu_write64(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12) | nixlf);
+	err = rvu_poll_reg(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12), true);
+	if (err)
+		dev_err(rvu->dev, "NDC-TX sync failed for NIXLF %d\n", nixlf);
+
+	return 0;
+}
+
+static int nix_txschq_free_one(struct rvu *rvu,
+			       struct nix_txsch_free_req *req)
+{
+	int lvl, schq, nixlf, blkaddr, rc;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_txsch *txsch;
+	struct nix_hw *nix_hw;
+	u32 *pfvf_map;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	lvl = req->schq_lvl;
+	schq = req->schq;
+	txsch = &nix_hw->txsch[lvl];
+
+	/* Don't allow freeing TL1 */
+	if (lvl > NIX_TXSCH_LVL_TL2 ||
+	    schq >= txsch->schq.max)
+		goto err;
+
+	pfvf_map = txsch->pfvf_map;
+	mutex_lock(&rvu->rsrc_lock);
+
+	if (TXSCH_MAP_FUNC(pfvf_map[schq]) != pcifunc) {
+		mutex_unlock(&rvu->rsrc_lock);
+		goto err;
+	}
+
+	/* Flush if it is a SMQ. Onus of disabling
+	 * TL2/3 queue links before SMQ flush is on user
+	 */
+	if (lvl == NIX_TXSCH_LVL_SMQ) {
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq));
+		/* Do SMQ flush and set enqueue xoff */
+		cfg |= BIT_ULL(50) | BIT_ULL(49);
+		rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq), cfg);
+
+		/* Wait for flush to complete */
+		rc = rvu_poll_reg(rvu, blkaddr,
+				  NIX_AF_SMQX_CFG(schq), BIT_ULL(49), true);
+		if (rc) {
+			dev_err(rvu->dev,
+				"NIXLF%d: SMQ%d flush failed\n", nixlf, schq);
+		}
+	}
+
+	/* Free the resource */
+	rvu_free_rsrc(&txsch->schq, schq);
+	txsch->pfvf_map[schq] = 0;
+	mutex_unlock(&rvu->rsrc_lock);
+	return 0;
+err:
+	return NIX_AF_ERR_TLX_INVALID;
+}
+
+int rvu_mbox_handler_nix_txsch_free(struct rvu *rvu,
+				    struct nix_txsch_free_req *req,
+				    struct msg_rsp *rsp)
+{
+	if (req->flags & TXSCHQ_FREE_ALL)
+		return nix_txschq_free(rvu, req->hdr.pcifunc);
+	else
+		return nix_txschq_free_one(rvu, req);
+}
+
+static bool is_txschq_config_valid(struct rvu *rvu, u16 pcifunc, int blkaddr,
+				   int lvl, u64 reg, u64 regval)
+{
+	u64 regbase = reg & 0xFFFF;
+	u16 schq, parent;
+
+	if (!rvu_check_valid_reg(TXSCHQ_HWREGMAP, lvl, reg))
+		return false;
+
+	schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+	/* Check if this schq belongs to this PF/VF or not */
+	if (!is_valid_txschq(rvu, blkaddr, lvl, pcifunc, schq))
+		return false;
+
+	parent = (regval >> 16) & 0x1FF;
+	/* Validate MDQ's TL4 parent */
+	if (regbase == NIX_AF_MDQX_PARENT(0) &&
+	    !is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_TL4, pcifunc, parent))
+		return false;
+
+	/* Validate TL4's TL3 parent */
+	if (regbase == NIX_AF_TL4X_PARENT(0) &&
+	    !is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_TL3, pcifunc, parent))
+		return false;
+
+	/* Validate TL3's TL2 parent */
+	if (regbase == NIX_AF_TL3X_PARENT(0) &&
+	    !is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_TL2, pcifunc, parent))
+		return false;
+
+	/* Validate TL2's TL1 parent */
+	if (regbase == NIX_AF_TL2X_PARENT(0) &&
+	    !is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_TL1, pcifunc, parent))
+		return false;
+
+	return true;
+}
+
+static int
+nix_tl1_default_cfg(struct rvu *rvu, u16 pcifunc)
+{
+	u16 schq_list[2], schq_cnt, schq;
+	int blkaddr, idx, err = 0;
+	u16 map_func, map_flags;
+	struct nix_hw *nix_hw;
+	u64 reg, regval;
+	u32 *pfvf_map;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	pfvf_map = nix_hw->txsch[NIX_TXSCH_LVL_TL1].pfvf_map;
+
+	mutex_lock(&rvu->rsrc_lock);
+
+	err = rvu_get_tl1_schqs(rvu, blkaddr,
+				pcifunc, schq_list, &schq_cnt);
+	if (err)
+		goto unlock;
+
+	for (idx = 0; idx < schq_cnt; idx++) {
+		schq = schq_list[idx];
+		map_func = TXSCH_MAP_FUNC(pfvf_map[schq]);
+		map_flags = TXSCH_MAP_FLAGS(pfvf_map[schq]);
+
+		/* check if config is already done or this is pf */
+		if (map_flags & NIX_TXSCHQ_TL1_CFG_DONE)
+			continue;
+
+		/* default configuration */
+		reg = NIX_AF_TL1X_TOPOLOGY(schq);
+		regval = (TXSCH_TL1_DFLT_RR_PRIO << 1);
+		rvu_write64(rvu, blkaddr, reg, regval);
+		reg = NIX_AF_TL1X_SCHEDULE(schq);
+		regval = TXSCH_TL1_DFLT_RR_QTM;
+		rvu_write64(rvu, blkaddr, reg, regval);
+		reg = NIX_AF_TL1X_CIR(schq);
+		regval = 0;
+		rvu_write64(rvu, blkaddr, reg, regval);
+
+		map_flags |= NIX_TXSCHQ_TL1_CFG_DONE;
+		pfvf_map[schq] = TXSCH_MAP(map_func, map_flags);
+	}
+unlock:
+	mutex_unlock(&rvu->rsrc_lock);
+	return err;
+}
+
+int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
+				    struct nix_txschq_config *req,
+				    struct msg_rsp *rsp)
+{
+	u16 schq, pcifunc = req->hdr.pcifunc;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u64 reg, regval, schq_regbase;
+	struct nix_txsch *txsch;
+	u16 map_func, map_flags;
+	struct nix_hw *nix_hw;
+	int blkaddr, idx, err;
+	u32 *pfvf_map;
+	int nixlf;
+
+	if (req->lvl >= NIX_TXSCH_LVL_CNT ||
+	    req->num_regs > MAX_REGS_PER_MBOX_MSG)
+		return NIX_AF_INVAL_TXSCHQ_CFG;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	txsch = &nix_hw->txsch[req->lvl];
+	pfvf_map = txsch->pfvf_map;
+
+	/* VF is only allowed to trigger
+	 * setting default cfg on TL1
+	 */
+	if (pcifunc & RVU_PFVF_FUNC_MASK &&
+	    req->lvl == NIX_TXSCH_LVL_TL1) {
+		return nix_tl1_default_cfg(rvu, pcifunc);
+	}
+
+	for (idx = 0; idx < req->num_regs; idx++) {
+		reg = req->reg[idx];
+		regval = req->regval[idx];
+		schq_regbase = reg & 0xFFFF;
+
+		if (!is_txschq_config_valid(rvu, pcifunc, blkaddr,
+					    txsch->lvl, reg, regval))
+			return NIX_AF_INVAL_TXSCHQ_CFG;
+
+		/* Replace PF/VF visible NIXLF slot with HW NIXLF id */
+		if (schq_regbase == NIX_AF_SMQX_CFG(0)) {
+			nixlf = rvu_get_lf(rvu, &hw->block[blkaddr],
+					   pcifunc, 0);
+			regval &= ~(0x7FULL << 24);
+			regval |= ((u64)nixlf << 24);
+		}
+
+		/* Mark config as done for TL1 by PF */
+		if (schq_regbase >= NIX_AF_TL1X_SCHEDULE(0) &&
+		    schq_regbase <= NIX_AF_TL1X_GREEN_BYTES(0)) {
+			schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+
+			mutex_lock(&rvu->rsrc_lock);
+
+			map_func = TXSCH_MAP_FUNC(pfvf_map[schq]);
+			map_flags = TXSCH_MAP_FLAGS(pfvf_map[schq]);
+
+			map_flags |= NIX_TXSCHQ_TL1_CFG_DONE;
+			pfvf_map[schq] = TXSCH_MAP(map_func, map_flags);
+			mutex_unlock(&rvu->rsrc_lock);
+		}
+
+		rvu_write64(rvu, blkaddr, reg, regval);
+
+		/* Check for SMQ flush, if so, poll for its completion */
+		if (schq_regbase == NIX_AF_SMQX_CFG(0) &&
+		    (regval & BIT_ULL(49))) {
+			err = rvu_poll_reg(rvu, blkaddr,
+					   reg, BIT_ULL(49), true);
+			if (err)
+				return NIX_AF_SMQ_FLUSH_FAILED;
+		}
+	}
+	return 0;
+}
+
+static int nix_rx_vtag_cfg(struct rvu *rvu, int nixlf, int blkaddr,
+			   struct nix_vtag_config *req)
+{
+	u64 regval = req->vtag_size;
+
+	if (req->rx.vtag_type > 7 || req->vtag_size > VTAGSIZE_T8)
+		return -EINVAL;
+
+	if (req->rx.capture_vtag)
+		regval |= BIT_ULL(5);
+	if (req->rx.strip_vtag)
+		regval |= BIT_ULL(4);
+
+	rvu_write64(rvu, blkaddr,
+		    NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, req->rx.vtag_type), regval);
+	return 0;
+}
+
+int rvu_mbox_handler_nix_vtag_cfg(struct rvu *rvu,
+				  struct nix_vtag_config *req,
+				  struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, nixlf, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	if (req->cfg_type) {
+		err = nix_rx_vtag_cfg(rvu, nixlf, blkaddr, req);
+		if (err)
+			return NIX_AF_ERR_PARAM;
+	} else {
+		/* TODO: handle tx vtag configuration */
+		return 0;
+	}
+
+	return 0;
+}
+
+static int nix_setup_mce(struct rvu *rvu, int mce, u8 op,
+			 u16 pcifunc, int next, bool eol)
+{
+	struct nix_aq_enq_req aq_req;
+	int err;
+
+	aq_req.hdr.pcifunc = 0;
+	aq_req.ctype = NIX_AQ_CTYPE_MCE;
+	aq_req.op = op;
+	aq_req.qidx = mce;
+
+	/* Forward bcast pkts to RQ0, RSS not needed */
+	aq_req.mce.op = 0;
+	aq_req.mce.index = 0;
+	aq_req.mce.eol = eol;
+	aq_req.mce.pf_func = pcifunc;
+	aq_req.mce.next = next;
+
+	/* All fields valid */
+	*(u64 *)(&aq_req.mce_mask) = ~0ULL;
+
+	err = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL);
+	if (err) {
+		dev_err(rvu->dev, "Failed to setup Bcast MCE for PF%d:VF%d\n",
+			rvu_get_pf(pcifunc), pcifunc & RVU_PFVF_FUNC_MASK);
+		return err;
+	}
+	return 0;
+}
+
+static int nix_update_mce_list(struct nix_mce_list *mce_list,
+			       u16 pcifunc, int idx, bool add)
+{
+	struct mce *mce, *tail = NULL;
+	bool delete = false;
+
+	/* Scan through the current list */
+	hlist_for_each_entry(mce, &mce_list->head, node) {
+		/* If already exists, then delete */
+		if (mce->pcifunc == pcifunc && !add) {
+			delete = true;
+			break;
+		}
+		tail = mce;
+	}
+
+	if (delete) {
+		hlist_del(&mce->node);
+		kfree(mce);
+		mce_list->count--;
+		return 0;
+	}
+
+	if (!add)
+		return 0;
+
+	/* Add a new one to the list, at the tail */
+	mce = kzalloc(sizeof(*mce), GFP_KERNEL);
+	if (!mce)
+		return -ENOMEM;
+	mce->idx = idx;
+	mce->pcifunc = pcifunc;
+	if (!tail)
+		hlist_add_head(&mce->node, &mce_list->head);
+	else
+		hlist_add_behind(&mce->node, &tail->node);
+	mce_list->count++;
+	return 0;
+}
+
+static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
+{
+	int err = 0, idx, next_idx, count;
+	struct nix_mce_list *mce_list;
+	struct mce *mce, *next_mce;
+	struct nix_mcast *mcast;
+	struct nix_hw *nix_hw;
+	struct rvu_pfvf *pfvf;
+	int blkaddr;
+
+	/* Broadcast pkt replication is not needed for AF's VFs, hence skip */
+	if (is_afvf(pcifunc))
+		return 0;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return 0;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return 0;
+
+	mcast = &nix_hw->mcast;
+
+	/* Get this PF/VF func's MCE index */
+	pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+	idx = pfvf->bcast_mce_idx + (pcifunc & RVU_PFVF_FUNC_MASK);
+
+	mce_list = &pfvf->bcast_mce_list;
+	if (idx > (pfvf->bcast_mce_idx + mce_list->max)) {
+		dev_err(rvu->dev,
+			"%s: Idx %d > max MCE idx %d, for PF%d bcast list\n",
+			__func__, idx, mce_list->max,
+			pcifunc >> RVU_PFVF_PF_SHIFT);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mcast->mce_lock);
+
+	err = nix_update_mce_list(mce_list, pcifunc, idx, add);
+	if (err)
+		goto end;
+
+	/* Disable MCAM entry in NPC */
+
+	if (!mce_list->count)
+		goto end;
+	count = mce_list->count;
+
+	/* Dump the updated list to HW */
+	hlist_for_each_entry(mce, &mce_list->head, node) {
+		next_idx = 0;
+		count--;
+		if (count) {
+			next_mce = hlist_entry(mce->node.next,
+					       struct mce, node);
+			next_idx = next_mce->idx;
+		}
+		/* EOL should be set in last MCE */
+		err = nix_setup_mce(rvu, mce->idx,
+				    NIX_AQ_INSTOP_WRITE, mce->pcifunc,
+				    next_idx, count ? false : true);
+		if (err)
+			goto end;
+	}
+
+end:
+	mutex_unlock(&mcast->mce_lock);
+	return err;
+}
+
+static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw)
+{
+	struct nix_mcast *mcast = &nix_hw->mcast;
+	int err, pf, numvfs, idx;
+	struct rvu_pfvf *pfvf;
+	u16 pcifunc;
+	u64 cfg;
+
+	/* Skip PF0 (i.e AF) */
+	for (pf = 1; pf < (rvu->cgx_mapped_pfs + 1); pf++) {
+		cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
+		/* If PF is not enabled, nothing to do */
+		if (!((cfg >> 20) & 0x01))
+			continue;
+		/* Get numVFs attached to this PF */
+		numvfs = (cfg >> 12) & 0xFF;
+
+		pfvf = &rvu->pf[pf];
+		/* Save the start MCE */
+		pfvf->bcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1);
+
+		nix_mce_list_init(&pfvf->bcast_mce_list, numvfs + 1);
+
+		for (idx = 0; idx < (numvfs + 1); idx++) {
+			/* idx-0 is for PF, followed by VFs */
+			pcifunc = (pf << RVU_PFVF_PF_SHIFT);
+			pcifunc |= idx;
+			/* Add dummy entries now, so that we don't have to check
+			 * for whether AQ_OP should be INIT/WRITE later on.
+			 * Will be updated when a NIXLF is attached/detached to
+			 * these PF/VFs.
+			 */
+			err = nix_setup_mce(rvu, pfvf->bcast_mce_idx + idx,
+					    NIX_AQ_INSTOP_INIT,
+					    pcifunc, 0, true);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
+{
+	struct nix_mcast *mcast = &nix_hw->mcast;
+	struct rvu_hwinfo *hw = rvu->hw;
+	int err, size;
+
+	size = (rvu_read64(rvu, blkaddr, NIX_AF_CONST3) >> 16) & 0x0F;
+	size = (1ULL << size);
+
+	/* Alloc memory for multicast/mirror replication entries */
+	err = qmem_alloc(rvu->dev, &mcast->mce_ctx,
+			 (256UL << MC_TBL_SIZE), size);
+	if (err)
+		return -ENOMEM;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BASE,
+		    (u64)mcast->mce_ctx->iova);
+
+	/* Set max list length equal to max no of VFs per PF  + PF itself */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_CFG,
+		    BIT_ULL(36) | (hw->max_vfs_per_pf << 4) | MC_TBL_SIZE);
+
+	/* Alloc memory for multicast replication buffers */
+	size = rvu_read64(rvu, blkaddr, NIX_AF_MC_MIRROR_CONST) & 0xFFFF;
+	err = qmem_alloc(rvu->dev, &mcast->mcast_buf,
+			 (8UL << MC_BUF_CNT), size);
+	if (err)
+		return -ENOMEM;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BUF_BASE,
+		    (u64)mcast->mcast_buf->iova);
+
+	/* Alloc pkind for NIX internal RX multicast/mirror replay */
+	mcast->replay_pkind = rvu_alloc_rsrc(&hw->pkind.rsrc);
+
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BUF_CFG,
+		    BIT_ULL(63) | (mcast->replay_pkind << 24) |
+		    BIT_ULL(20) | MC_BUF_CNT);
+
+	mutex_init(&mcast->mce_lock);
+
+	return nix_setup_bcast_tables(rvu, nix_hw);
+}
+
+static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
+{
+	struct nix_txsch *txsch;
+	u64 cfg, reg;
+	int err, lvl;
+
+	/* Get scheduler queue count of each type and alloc
+	 * bitmap for each for alloc/free/attach operations.
+	 */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		txsch = &nix_hw->txsch[lvl];
+		txsch->lvl = lvl;
+		switch (lvl) {
+		case NIX_TXSCH_LVL_SMQ:
+			reg = NIX_AF_MDQ_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL4:
+			reg = NIX_AF_TL4_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL3:
+			reg = NIX_AF_TL3_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL2:
+			reg = NIX_AF_TL2_CONST;
+			break;
+		case NIX_TXSCH_LVL_TL1:
+			reg = NIX_AF_TL1_CONST;
+			break;
+		}
+		cfg = rvu_read64(rvu, blkaddr, reg);
+		txsch->schq.max = cfg & 0xFFFF;
+		err = rvu_alloc_bitmap(&txsch->schq);
+		if (err)
+			return err;
+
+		/* Allocate memory for scheduler queues to
+		 * PF/VF pcifunc mapping info.
+		 */
+		txsch->pfvf_map = devm_kcalloc(rvu->dev, txsch->schq.max,
+					       sizeof(u32), GFP_KERNEL);
+		if (!txsch->pfvf_map)
+			return -ENOMEM;
+		memset(txsch->pfvf_map, U8_MAX, txsch->schq.max * sizeof(u32));
+	}
+	return 0;
+}
+
+int rvu_nix_reserve_mark_format(struct rvu *rvu, struct nix_hw *nix_hw,
+				int blkaddr, u32 cfg)
+{
+	int fmt_idx;
+
+	for (fmt_idx = 0; fmt_idx < nix_hw->mark_format.in_use; fmt_idx++) {
+		if (nix_hw->mark_format.cfg[fmt_idx] == cfg)
+			return fmt_idx;
+	}
+	if (fmt_idx >= nix_hw->mark_format.total)
+		return -ERANGE;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_MARK_FORMATX_CTL(fmt_idx), cfg);
+	nix_hw->mark_format.cfg[fmt_idx] = cfg;
+	nix_hw->mark_format.in_use++;
+	return fmt_idx;
+}
+
+static int nix_af_mark_format_setup(struct rvu *rvu, struct nix_hw *nix_hw,
+				    int blkaddr)
+{
+	u64 cfgs[] = {
+		[NIX_MARK_CFG_IP_DSCP_RED]         = 0x10003,
+		[NIX_MARK_CFG_IP_DSCP_YELLOW]      = 0x11200,
+		[NIX_MARK_CFG_IP_DSCP_YELLOW_RED]  = 0x11203,
+		[NIX_MARK_CFG_IP_ECN_RED]          = 0x6000c,
+		[NIX_MARK_CFG_IP_ECN_YELLOW]       = 0x60c00,
+		[NIX_MARK_CFG_IP_ECN_YELLOW_RED]   = 0x60c0c,
+		[NIX_MARK_CFG_VLAN_DEI_RED]        = 0x30008,
+		[NIX_MARK_CFG_VLAN_DEI_YELLOW]     = 0x30800,
+		[NIX_MARK_CFG_VLAN_DEI_YELLOW_RED] = 0x30808,
+	};
+	int i, rc;
+	u64 total;
+
+	total = (rvu_read64(rvu, blkaddr, NIX_AF_PSE_CONST) & 0xFF00) >> 8;
+	nix_hw->mark_format.total = (u8)total;
+	nix_hw->mark_format.cfg = devm_kcalloc(rvu->dev, total, sizeof(u32),
+					       GFP_KERNEL);
+	if (!nix_hw->mark_format.cfg)
+		return -ENOMEM;
+	for (i = 0; i < NIX_MARK_CFG_MAX; i++) {
+		rc = rvu_nix_reserve_mark_format(rvu, nix_hw, blkaddr, cfgs[i]);
+		if (rc < 0)
+			dev_err(rvu->dev, "Err %d in setup mark format %d\n",
+				i, rc);
+	}
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_stats_rst(struct rvu *rvu, struct msg_req *req,
+				   struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int i, nixlf, blkaddr;
+	u64 stats;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	/* Get stats count supported by HW */
+	stats = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+	/* Reset tx stats */
+	for (i = 0; i < ((stats >> 24) & 0xFF); i++)
+		rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_STATX(nixlf, i), 0);
+
+	/* Reset rx stats */
+	for (i = 0; i < ((stats >> 32) & 0xFF); i++)
+		rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_STATX(nixlf, i), 0);
+
+	return 0;
+}
+
+/* Returns the ALG index to be set into NPC_RX_ACTION */
+static int get_flowkey_alg_idx(struct nix_hw *nix_hw, u32 flow_cfg)
+{
+	int i;
+
+	/* Scan over exiting algo entries to find a match */
+	for (i = 0; i < nix_hw->flowkey.in_use; i++)
+		if (nix_hw->flowkey.flowkey[i] == flow_cfg)
+			return i;
+
+	return -ERANGE;
+}
+
+static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
+{
+	int idx, nr_field, key_off, field_marker, keyoff_marker;
+	int max_key_off, max_bit_pos, group_member;
+	struct nix_rx_flowkey_alg *field;
+	struct nix_rx_flowkey_alg tmp;
+	u32 key_type, valid_key;
+
+	if (!alg)
+		return -EINVAL;
+
+#define FIELDS_PER_ALG  5
+#define MAX_KEY_OFF	40
+	/* Clear all fields */
+	memset(alg, 0, sizeof(uint64_t) * FIELDS_PER_ALG);
+
+	/* Each of the 32 possible flow key algorithm definitions should
+	 * fall into above incremental config (except ALG0). Otherwise a
+	 * single NPC MCAM entry is not sufficient for supporting RSS.
+	 *
+	 * If a different definition or combination needed then NPC MCAM
+	 * has to be programmed to filter such pkts and it's action should
+	 * point to this definition to calculate flowtag or hash.
+	 *
+	 * The `for loop` goes over _all_ protocol field and the following
+	 * variables depicts the state machine forward progress logic.
+	 *
+	 * keyoff_marker - Enabled when hash byte length needs to be accounted
+	 * in field->key_offset update.
+	 * field_marker - Enabled when a new field needs to be selected.
+	 * group_member - Enabled when protocol is part of a group.
+	 */
+
+	keyoff_marker = 0; max_key_off = 0; group_member = 0;
+	nr_field = 0; key_off = 0; field_marker = 1;
+	field = &tmp; max_bit_pos = fls(flow_cfg);
+	for (idx = 0;
+	     idx < max_bit_pos && nr_field < FIELDS_PER_ALG &&
+	     key_off < MAX_KEY_OFF; idx++) {
+		key_type = BIT(idx);
+		valid_key = flow_cfg & key_type;
+		/* Found a field marker, reset the field values */
+		if (field_marker)
+			memset(&tmp, 0, sizeof(tmp));
+
+		switch (key_type) {
+		case NIX_FLOW_KEY_TYPE_PORT:
+			field->sel_chan = true;
+			/* This should be set to 1, when SEL_CHAN is set */
+			field->bytesm1 = 1;
+			field_marker = true;
+			keyoff_marker = true;
+			break;
+		case NIX_FLOW_KEY_TYPE_IPV4:
+			field->lid = NPC_LID_LC;
+			field->ltype_match = NPC_LT_LC_IP;
+			field->hdr_offset = 12; /* SIP offset */
+			field->bytesm1 = 7; /* SIP + DIP, 8 bytes */
+			field->ltype_mask = 0xF; /* Match only IPv4 */
+			field_marker = true;
+			keyoff_marker = false;
+			break;
+		case NIX_FLOW_KEY_TYPE_IPV6:
+			field->lid = NPC_LID_LC;
+			field->ltype_match = NPC_LT_LC_IP6;
+			field->hdr_offset = 8; /* SIP offset */
+			field->bytesm1 = 31; /* SIP + DIP, 32 bytes */
+			field->ltype_mask = 0xF; /* Match only IPv6 */
+			field_marker = true;
+			keyoff_marker = true;
+			break;
+		case NIX_FLOW_KEY_TYPE_TCP:
+		case NIX_FLOW_KEY_TYPE_UDP:
+		case NIX_FLOW_KEY_TYPE_SCTP:
+			field->lid = NPC_LID_LD;
+			field->bytesm1 = 3; /* Sport + Dport, 4 bytes */
+			if (key_type == NIX_FLOW_KEY_TYPE_TCP && valid_key) {
+				field->ltype_match |= NPC_LT_LD_TCP;
+				group_member = true;
+			} else if (key_type == NIX_FLOW_KEY_TYPE_UDP &&
+				   valid_key) {
+				field->ltype_match |= NPC_LT_LD_UDP;
+				group_member = true;
+			} else if (key_type == NIX_FLOW_KEY_TYPE_SCTP &&
+				   valid_key) {
+				field->ltype_match |= NPC_LT_LD_SCTP;
+				group_member = true;
+			}
+			field->ltype_mask = ~field->ltype_match;
+			if (key_type == NIX_FLOW_KEY_TYPE_SCTP) {
+				/* Handle the case where any of the group item
+				 * is enabled in the group but not the final one
+				 */
+				if (group_member) {
+					valid_key = true;
+					group_member = false;
+				}
+				field_marker = true;
+				keyoff_marker = true;
+			} else {
+				field_marker = false;
+				keyoff_marker = false;
+			}
+			break;
+		}
+		field->ena = 1;
+
+		/* Found a valid flow key type */
+		if (valid_key) {
+			field->key_offset = key_off;
+			memcpy(&alg[nr_field], field, sizeof(*field));
+			max_key_off = max(max_key_off, field->bytesm1 + 1);
+
+			/* Found a field marker, get the next field */
+			if (field_marker)
+				nr_field++;
+		}
+
+		/* Found a keyoff marker, update the new key_off */
+		if (keyoff_marker) {
+			key_off += max_key_off;
+			max_key_off = 0;
+		}
+	}
+	/* Processed all the flow key types */
+	if (idx == max_bit_pos && key_off <= MAX_KEY_OFF)
+		return 0;
+	else
+		return NIX_AF_ERR_RSS_NOSPC_FIELD;
+}
+
+static int reserve_flowkey_alg_idx(struct rvu *rvu, int blkaddr, u32 flow_cfg)
+{
+	u64 field[FIELDS_PER_ALG];
+	struct nix_hw *hw;
+	int fid, rc;
+
+	hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!hw)
+		return -EINVAL;
+
+	/* No room to add new flow hash algoritham */
+	if (hw->flowkey.in_use >= NIX_FLOW_KEY_ALG_MAX)
+		return NIX_AF_ERR_RSS_NOSPC_ALGO;
+
+	/* Generate algo fields for the given flow_cfg */
+	rc = set_flowkey_fields((struct nix_rx_flowkey_alg *)field, flow_cfg);
+	if (rc)
+		return rc;
+
+	/* Update ALGX_FIELDX register with generated fields */
+	for (fid = 0; fid < FIELDS_PER_ALG; fid++)
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_RX_FLOW_KEY_ALGX_FIELDX(hw->flowkey.in_use,
+							   fid), field[fid]);
+
+	/* Store the flow_cfg for futher lookup */
+	rc = hw->flowkey.in_use;
+	hw->flowkey.flowkey[rc] = flow_cfg;
+	hw->flowkey.in_use++;
+
+	return rc;
+}
+
+int rvu_mbox_handler_nix_rss_flowkey_cfg(struct rvu *rvu,
+					 struct nix_rss_flowkey_cfg *req,
+					 struct nix_rss_flowkey_cfg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int alg_idx, nixlf, blkaddr;
+	struct nix_hw *nix_hw;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	alg_idx = get_flowkey_alg_idx(nix_hw, req->flowkey_cfg);
+	/* Failed to get algo index from the exiting list, reserve new  */
+	if (alg_idx < 0) {
+		alg_idx = reserve_flowkey_alg_idx(rvu, blkaddr,
+						  req->flowkey_cfg);
+		if (alg_idx < 0)
+			return alg_idx;
+	}
+	rsp->alg_idx = alg_idx;
+	rvu_npc_update_flowkey_alg_idx(rvu, pcifunc, nixlf, req->group,
+				       alg_idx, req->mcam_index);
+	return 0;
+}
+
+static int nix_rx_flowkey_alg_cfg(struct rvu *rvu, int blkaddr)
+{
+	u32 flowkey_cfg, minkey_cfg;
+	int alg, fid, rc;
+
+	/* Disable all flow key algx fieldx */
+	for (alg = 0; alg < NIX_FLOW_KEY_ALG_MAX; alg++) {
+		for (fid = 0; fid < FIELDS_PER_ALG; fid++)
+			rvu_write64(rvu, blkaddr,
+				    NIX_AF_RX_FLOW_KEY_ALGX_FIELDX(alg, fid),
+				    0);
+	}
+
+	/* IPv4/IPv6 SIP/DIPs */
+	flowkey_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* TCPv4/v6 4-tuple, SIP, DIP, Sport, Dport */
+	minkey_cfg = flowkey_cfg;
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_TCP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* UDPv4/v6 4-tuple, SIP, DIP, Sport, Dport */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_UDP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* SCTPv4/v6 4-tuple, SIP, DIP, Sport, Dport */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_SCTP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* TCP/UDP v4/v6 4-tuple, rest IP pkts 2-tuple */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_TCP |
+			NIX_FLOW_KEY_TYPE_UDP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* TCP/SCTP v4/v6 4-tuple, rest IP pkts 2-tuple */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_TCP |
+			NIX_FLOW_KEY_TYPE_SCTP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* UDP/SCTP v4/v6 4-tuple, rest IP pkts 2-tuple */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_UDP |
+			NIX_FLOW_KEY_TYPE_SCTP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	/* TCP/UDP/SCTP v4/v6 4-tuple, rest IP pkts 2-tuple */
+	flowkey_cfg = minkey_cfg | NIX_FLOW_KEY_TYPE_TCP |
+		      NIX_FLOW_KEY_TYPE_UDP | NIX_FLOW_KEY_TYPE_SCTP;
+	rc = reserve_flowkey_alg_idx(rvu, blkaddr, flowkey_cfg);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu,
+				      struct nix_set_mac_addr *req,
+				      struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
+	int blkaddr, nixlf;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	ether_addr_copy(pfvf->mac_addr, req->mac_addr);
+
+	rvu_npc_install_ucast_entry(rvu, pcifunc, nixlf,
+				    pfvf->rx_chan_base, req->mac_addr);
+
+	rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
+				     struct msg_rsp *rsp)
+{
+	bool allmulti = false, disable_promisc = false;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
+	int blkaddr, nixlf;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	if (req->mode & NIX_RX_MODE_PROMISC)
+		allmulti = false;
+	else if (req->mode & NIX_RX_MODE_ALLMULTI)
+		allmulti = true;
+	else
+		disable_promisc = true;
+
+	if (disable_promisc)
+		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
+	else
+		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+					      pfvf->rx_chan_base, allmulti);
+
+	rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+
+	return 0;
+}
+
+static void nix_find_link_frs(struct rvu *rvu,
+			      struct nix_frs_cfg *req, u16 pcifunc)
+{
+	int pf = rvu_get_pf(pcifunc);
+	struct rvu_pfvf *pfvf;
+	int maxlen, minlen;
+	int numvfs, hwvf;
+	int vf;
+
+	/* Update with requester's min/max lengths */
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	pfvf->maxlen = req->maxlen;
+	if (req->update_minlen)
+		pfvf->minlen = req->minlen;
+
+	maxlen = req->maxlen;
+	minlen = req->update_minlen ? req->minlen : 0;
+
+	/* Get this PF's numVFs and starting hwvf */
+	rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
+
+	/* For each VF, compare requested max/minlen */
+	for (vf = 0; vf < numvfs; vf++) {
+		pfvf =  &rvu->hwvf[hwvf + vf];
+		if (pfvf->maxlen > maxlen)
+			maxlen = pfvf->maxlen;
+		if (req->update_minlen &&
+		    pfvf->minlen && pfvf->minlen < minlen)
+			minlen = pfvf->minlen;
+	}
+
+	/* Compare requested max/minlen with PF's max/minlen */
+	pfvf = &rvu->pf[pf];
+	if (pfvf->maxlen > maxlen)
+		maxlen = pfvf->maxlen;
+	if (req->update_minlen &&
+	    pfvf->minlen && pfvf->minlen < minlen)
+		minlen = pfvf->minlen;
+
+	/* Update the request with max/min PF's and it's VF's max/min */
+	req->maxlen = maxlen;
+	if (req->update_minlen)
+		req->minlen = minlen;
+}
+
+int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
+				    struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int pf = rvu_get_pf(pcifunc);
+	int blkaddr, schq, link = -1;
+	struct nix_txsch *txsch;
+	u64 cfg, lmac_fifo_len;
+	struct nix_hw *nix_hw;
+	u8 cgx = 0, lmac = 0;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	if (!req->sdp_link && req->maxlen > NIC_HW_MAX_FRS)
+		return NIX_AF_ERR_FRS_INVALID;
+
+	if (req->update_minlen && req->minlen < NIC_HW_MIN_FRS)
+		return NIX_AF_ERR_FRS_INVALID;
+
+	/* Check if requester wants to update SMQ's */
+	if (!req->update_smq)
+		goto rx_frscfg;
+
+	/* Update min/maxlen in each of the SMQ attached to this PF/VF */
+	txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+	mutex_lock(&rvu->rsrc_lock);
+	for (schq = 0; schq < txsch->schq.max; schq++) {
+		if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+			continue;
+		cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq));
+		cfg = (cfg & ~(0xFFFFULL << 8)) | ((u64)req->maxlen << 8);
+		if (req->update_minlen)
+			cfg = (cfg & ~0x7FULL) | ((u64)req->minlen & 0x7F);
+		rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(schq), cfg);
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+
+rx_frscfg:
+	/* Check if config is for SDP link */
+	if (req->sdp_link) {
+		if (!hw->sdp_links)
+			return NIX_AF_ERR_RX_LINK_INVALID;
+		link = hw->cgx_links + hw->lbk_links;
+		goto linkcfg;
+	}
+
+	/* Check if the request is from CGX mapped RVU PF */
+	if (is_pf_cgxmapped(rvu, pf)) {
+		/* Get CGX and LMAC to which this PF is mapped and find link */
+		rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx, &lmac);
+		link = (cgx * hw->lmac_per_cgx) + lmac;
+	} else if (pf == 0) {
+		/* For VFs of PF0 ingress is LBK port, so config LBK link */
+		link = hw->cgx_links;
+	}
+
+	if (link < 0)
+		return NIX_AF_ERR_RX_LINK_INVALID;
+
+	nix_find_link_frs(rvu, req, pcifunc);
+
+linkcfg:
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link));
+	cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16);
+	if (req->update_minlen)
+		cfg = (cfg & ~0xFFFFULL) | req->minlen;
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), cfg);
+
+	if (req->sdp_link || pf == 0)
+		return 0;
+
+	/* Update transmit credits for CGX links */
+	lmac_fifo_len =
+		CGX_FIFO_LEN / cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link));
+	cfg &= ~(0xFFFFFULL << 12);
+	cfg |=  ((lmac_fifo_len - req->maxlen) / 16) << 12;
+	rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
+	rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_EXPR_CREDIT(link), cfg);
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_rxvlan_alloc(struct rvu *rvu, struct msg_req *req,
+				      struct msg_rsp *rsp)
+{
+	struct npc_mcam_alloc_entry_req alloc_req = { };
+	struct npc_mcam_alloc_entry_rsp alloc_rsp = { };
+	struct npc_mcam_free_entry_req free_req = { };
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, nixlf, err;
+	struct rvu_pfvf *pfvf;
+
+	/* LBK VFs do not have separate MCAM UCAST entry hence
+	 * skip allocating rxvlan for them
+	 */
+	if (is_afvf(pcifunc))
+		return 0;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	if (pfvf->rxvlan)
+		return 0;
+
+	/* alloc new mcam entry */
+	alloc_req.hdr.pcifunc = pcifunc;
+	alloc_req.count = 1;
+
+	err = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req,
+						    &alloc_rsp);
+	if (err)
+		return err;
+
+	/* update entry to enable rxvlan offload */
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0) {
+		err = NIX_AF_ERR_AF_LF_INVALID;
+		goto free_entry;
+	}
+
+	nixlf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0) {
+		err = NIX_AF_ERR_AF_LF_INVALID;
+		goto free_entry;
+	}
+
+	pfvf->rxvlan_index = alloc_rsp.entry_list[0];
+	/* all it means is that rxvlan_index is valid */
+	pfvf->rxvlan = true;
+
+	err = rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+	if (err)
+		goto free_entry;
+
+	return 0;
+free_entry:
+	free_req.hdr.pcifunc = pcifunc;
+	free_req.entry = alloc_rsp.entry_list[0];
+	rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, rsp);
+	pfvf->rxvlan = false;
+	return err;
+}
+
+int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
+				    struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	int nixlf, blkaddr;
+	u64 cfg;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf));
+	/* Set the interface configuration */
+	if (req->len_verify & BIT(0))
+		cfg |= BIT_ULL(41);
+	else
+		cfg &= ~BIT_ULL(41);
+
+	if (req->len_verify & BIT(1))
+		cfg |= BIT_ULL(40);
+	else
+		cfg &= ~BIT_ULL(40);
+
+	if (req->csum_verify & BIT(0))
+		cfg |= BIT_ULL(37);
+	else
+		cfg &= ~BIT_ULL(37);
+
+	rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), cfg);
+
+	return 0;
+}
+
+static void nix_link_config(struct rvu *rvu, int blkaddr)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	int cgx, lmac_cnt, slink, link;
+	u64 tx_credits;
+
+	/* Set default min/max packet lengths allowed on NIX Rx links.
+	 *
+	 * With HW reset minlen value of 60byte, HW will treat ARP pkts
+	 * as undersize and report them to SW as error pkts, hence
+	 * setting it to 40 bytes.
+	 */
+	for (link = 0; link < (hw->cgx_links + hw->lbk_links); link++) {
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link),
+			    NIC_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS);
+	}
+
+	if (hw->sdp_links) {
+		link = hw->cgx_links + hw->lbk_links;
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link),
+			    SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS);
+	}
+
+	/* Set credits for Tx links assuming max packet length allowed.
+	 * This will be reconfigured based on MTU set for PF/VF.
+	 */
+	for (cgx = 0; cgx < hw->cgx; cgx++) {
+		lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+		tx_credits = ((CGX_FIFO_LEN / lmac_cnt) - NIC_HW_MAX_FRS) / 16;
+		/* Enable credits and set credit pkt count to max allowed */
+		tx_credits =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
+		slink = cgx * hw->lmac_per_cgx;
+		for (link = slink; link < (slink + lmac_cnt); link++) {
+			rvu_write64(rvu, blkaddr,
+				    NIX_AF_TX_LINKX_NORM_CREDIT(link),
+				    tx_credits);
+			rvu_write64(rvu, blkaddr,
+				    NIX_AF_TX_LINKX_EXPR_CREDIT(link),
+				    tx_credits);
+		}
+	}
+
+	/* Set Tx credits for LBK link */
+	slink = hw->cgx_links;
+	for (link = slink; link < (slink + hw->lbk_links); link++) {
+		tx_credits = 1000; /* 10 * max LBK datarate = 10 * 100Gbps */
+		/* Enable credits and set credit pkt count to max allowed */
+		tx_credits =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_TX_LINKX_NORM_CREDIT(link), tx_credits);
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_TX_LINKX_EXPR_CREDIT(link), tx_credits);
+	}
+}
+
+static int nix_calibrate_x2p(struct rvu *rvu, int blkaddr)
+{
+	int idx, err;
+	u64 status;
+
+	/* Start X2P bus calibration */
+	rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+		    rvu_read64(rvu, blkaddr, NIX_AF_CFG) | BIT_ULL(9));
+	/* Wait for calibration to complete */
+	err = rvu_poll_reg(rvu, blkaddr,
+			   NIX_AF_STATUS, BIT_ULL(10), false);
+	if (err) {
+		dev_err(rvu->dev, "NIX X2P bus calibration failed\n");
+		return err;
+	}
+
+	status = rvu_read64(rvu, blkaddr, NIX_AF_STATUS);
+	/* Check if CGX devices are ready */
+	for (idx = 0; idx < rvu->cgx_cnt_max; idx++) {
+		/* Skip when cgx port is not available */
+		if (!rvu_cgx_pdata(idx, rvu) ||
+		    (status & (BIT_ULL(16 + idx))))
+			continue;
+		dev_err(rvu->dev,
+			"CGX%d didn't respond to NIX X2P calibration\n", idx);
+		err = -EBUSY;
+	}
+
+	/* Check if LBK is ready */
+	if (!(status & BIT_ULL(19))) {
+		dev_err(rvu->dev,
+			"LBK didn't respond to NIX X2P calibration\n");
+		err = -EBUSY;
+	}
+
+	/* Clear 'calibrate_x2p' bit */
+	rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+		    rvu_read64(rvu, blkaddr, NIX_AF_CFG) & ~BIT_ULL(9));
+	if (err || (status & 0x3FFULL))
+		dev_err(rvu->dev,
+			"NIX X2P calibration failed, status 0x%llx\n", status);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+	u64 cfg;
+	int err;
+
+	/* Set admin queue endianness */
+	cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG);
+#ifdef __BIG_ENDIAN
+	cfg |= BIT_ULL(8);
+	rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#else
+	cfg &= ~BIT_ULL(8);
+	rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#endif
+
+	/* Do not bypass NDC cache */
+	cfg = rvu_read64(rvu, block->addr, NIX_AF_NDC_CFG);
+	cfg &= ~0x3FFEULL;
+	rvu_write64(rvu, block->addr, NIX_AF_NDC_CFG, cfg);
+
+	/* Result structure can be followed by RQ/SQ/CQ context at
+	 * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+	 * operation type. Alloc sufficient result memory for all operations.
+	 */
+	err = rvu_aq_alloc(rvu, &block->aq,
+			   Q_COUNT(AQ_SIZE), sizeof(struct nix_aq_inst_s),
+			   ALIGN(sizeof(struct nix_aq_res_s), 128) + 256);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, block->addr, NIX_AF_AQ_CFG, AQ_SIZE);
+	rvu_write64(rvu, block->addr,
+		    NIX_AF_AQ_BASE, (u64)block->aq->inst->iova);
+	return 0;
+}
+
+int rvu_nix_init(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr, err;
+	u64 cfg;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return 0;
+	block = &hw->block[blkaddr];
+
+	/* As per a HW errata in 9xxx A0 silicon, NIX may corrupt
+	 * internal state when conditional clocks are turned off.
+	 * Hence enable them.
+	 */
+	if (is_rvu_9xxx_A0(rvu))
+		rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+			    rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x5EULL);
+
+	/* Calibrate X2P bus to check if CGX/LBK links are fine */
+	err = nix_calibrate_x2p(rvu, blkaddr);
+	if (err)
+		return err;
+
+	/* Set num of links of each type */
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+	hw->cgx = (cfg >> 12) & 0xF;
+	hw->lmac_per_cgx = (cfg >> 8) & 0xF;
+	hw->cgx_links = hw->cgx * hw->lmac_per_cgx;
+	hw->lbk_links = 1;
+	hw->sdp_links = 1;
+
+	/* Initialize admin queue */
+	err = nix_aq_init(rvu, block);
+	if (err)
+		return err;
+
+	/* Restore CINT timer delay to HW reset values */
+	rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL);
+
+	if (blkaddr == BLKADDR_NIX0) {
+		hw->nix0 = devm_kzalloc(rvu->dev,
+					sizeof(struct nix_hw), GFP_KERNEL);
+		if (!hw->nix0)
+			return -ENOMEM;
+
+		err = nix_setup_txschq(rvu, hw->nix0, blkaddr);
+		if (err)
+			return err;
+
+		err = nix_af_mark_format_setup(rvu, hw->nix0, blkaddr);
+		if (err)
+			return err;
+
+		err = nix_setup_mcast(rvu, hw->nix0, blkaddr);
+		if (err)
+			return err;
+
+		/* Configure segmentation offload formats */
+		nix_setup_lso(rvu, hw->nix0, blkaddr);
+
+		/* Config Outer/Inner L2, IP, TCP, UDP and SCTP NPC layer info.
+		 * This helps HW protocol checker to identify headers
+		 * and validate length and checksums.
+		 */
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OL2,
+			    (NPC_LID_LA << 8) | (NPC_LT_LA_ETHER << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP4,
+			    (NPC_LID_LC << 8) | (NPC_LT_LC_IP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP4,
+			    (NPC_LID_LF << 8) | (NPC_LT_LF_TU_IP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP6,
+			    (NPC_LID_LC << 8) | (NPC_LT_LC_IP6 << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP6,
+			    (NPC_LID_LF << 8) | (NPC_LT_LF_TU_IP6 << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OTCP,
+			    (NPC_LID_LD << 8) | (NPC_LT_LD_TCP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ITCP,
+			    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_TCP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OUDP,
+			    (NPC_LID_LD << 8) | (NPC_LT_LD_UDP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IUDP,
+			    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_UDP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OSCTP,
+			    (NPC_LID_LD << 8) | (NPC_LT_LD_SCTP << 4) | 0x0F);
+		rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ISCTP,
+			    (NPC_LID_LG << 8) | (NPC_LT_LG_TU_SCTP << 4) |
+			    0x0F);
+
+		err = nix_rx_flowkey_alg_cfg(rvu, blkaddr);
+		if (err)
+			return err;
+
+		/* Initialize CGX/LBK/SDP link credits, min/max pkt lengths */
+		nix_link_config(rvu, blkaddr);
+	}
+	return 0;
+}
+
+void rvu_nix_freemem(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	struct nix_txsch *txsch;
+	struct nix_mcast *mcast;
+	struct nix_hw *nix_hw;
+	int blkaddr, lvl;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+	rvu_aq_free(rvu, block->aq);
+
+	if (blkaddr == BLKADDR_NIX0) {
+		nix_hw = get_nix_hw(rvu->hw, blkaddr);
+		if (!nix_hw)
+			return;
+
+		for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+			txsch = &nix_hw->txsch[lvl];
+			kfree(txsch->schq.bmap);
+		}
+
+		mcast = &nix_hw->mcast;
+		qmem_free(rvu->dev, mcast->mce_ctx);
+		qmem_free(rvu->dev, mcast->mcast_buf);
+		mutex_destroy(&mcast->mce_lock);
+	}
+}
+
+static int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct rvu_hwinfo *hw = rvu->hw;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	*nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (*nixlf < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
+				     struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int nixlf, err;
+
+	err = nix_get_nixlf(rvu, pcifunc, &nixlf);
+	if (err)
+		return err;
+
+	rvu_npc_enable_default_entries(rvu, pcifunc, nixlf);
+	return 0;
+}
+
+int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
+				    struct msg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	int nixlf, err;
+
+	err = nix_get_nixlf(rvu, pcifunc, &nixlf);
+	if (err)
+		return err;
+
+	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
+	return 0;
+}
+
+void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct hwctx_disable_req ctx_req;
+	int err;
+
+	ctx_req.hdr.pcifunc = pcifunc;
+
+	/* Cleanup NPC MCAM entries, free Tx scheduler queues being used */
+	nix_interface_deinit(rvu, pcifunc, nixlf);
+	nix_rx_sync(rvu, blkaddr);
+	nix_txschq_free(rvu, pcifunc);
+
+	if (pfvf->sq_ctx) {
+		ctx_req.ctype = NIX_AQ_CTYPE_SQ;
+		err = nix_lf_hwctx_disable(rvu, &ctx_req);
+		if (err)
+			dev_err(rvu->dev, "SQ ctx disable failed\n");
+	}
+
+	if (pfvf->rq_ctx) {
+		ctx_req.ctype = NIX_AQ_CTYPE_RQ;
+		err = nix_lf_hwctx_disable(rvu, &ctx_req);
+		if (err)
+			dev_err(rvu->dev, "RQ ctx disable failed\n");
+	}
+
+	if (pfvf->cq_ctx) {
+		ctx_req.ctype = NIX_AQ_CTYPE_CQ;
+		err = nix_lf_hwctx_disable(rvu, &ctx_req);
+		if (err)
+			dev_err(rvu->dev, "CQ ctx disable failed\n");
+	}
+
+	nix_ctx_free(rvu, pfvf);
+}
+
+int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
+					struct nix_lso_format_cfg *req,
+					struct nix_lso_format_cfg_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_hw *nix_hw;
+	struct rvu_pfvf *pfvf;
+	int blkaddr, idx, f;
+	u64 reg;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	nix_hw = get_nix_hw(rvu->hw, blkaddr);
+	if (!nix_hw)
+		return -EINVAL;
+
+	/* Find existing matching LSO format, if any */
+	for (idx = 0; idx < nix_hw->lso.in_use; idx++) {
+		for (f = 0; f < NIX_LSO_FIELD_MAX; f++) {
+			reg = rvu_read64(rvu, blkaddr,
+					 NIX_AF_LSO_FORMATX_FIELDX(idx, f));
+			if (req->fields[f] != (reg & req->field_mask))
+				break;
+		}
+
+		if (f == NIX_LSO_FIELD_MAX)
+			break;
+	}
+
+	if (idx < nix_hw->lso.in_use) {
+		/* Match found */
+		rsp->lso_format_idx = idx;
+		return 0;
+	}
+
+	if (nix_hw->lso.in_use == nix_hw->lso.total)
+		return NIX_AF_ERR_LSO_CFG_FAIL;
+
+	rsp->lso_format_idx = nix_hw->lso.in_use++;
+
+	for (f = 0; f < NIX_LSO_FIELD_MAX; f++)
+		rvu_write64(rvu, blkaddr,
+			    NIX_AF_LSO_FORMATX_FIELDX(rsp->lso_format_idx, f),
+			    req->fields[f]);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
new file mode 100644
index 0000000..c0e165d
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c

@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+
+static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+			       struct npa_aq_inst_s *inst)
+{
+	struct admin_queue *aq = block->aq;
+	struct npa_aq_res_s *result;
+	int timeout = 1000;
+	u64 reg, head;
+
+	result = (struct npa_aq_res_s *)aq->res->base;
+
+	/* Get current head pointer where to append this instruction */
+	reg = rvu_read64(rvu, block->addr, NPA_AF_AQ_STATUS);
+	head = (reg >> 4) & AQ_PTR_MASK;
+
+	memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+	       (void *)inst, aq->inst->entry_sz);
+	memset(result, 0, sizeof(*result));
+	/* sync into memory */
+	wmb();
+
+	/* Ring the doorbell and wait for result */
+	rvu_write64(rvu, block->addr, NPA_AF_AQ_DOOR, 1);
+	while (result->compcode == NPA_AQ_COMP_NOTDONE) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+	}
+
+	if (result->compcode != NPA_AQ_COMP_GOOD)
+		/* TODO: Replace this with some error code */
+		return -EBUSY;
+
+	return 0;
+}
+
+static int rvu_npa_aq_enq_inst(struct rvu *rvu, struct npa_aq_enq_req *req,
+			       struct npa_aq_enq_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, npalf, rc = 0;
+	struct npa_aq_inst_s inst;
+	struct rvu_block *block;
+	struct admin_queue *aq;
+	struct rvu_pfvf *pfvf;
+	void *ctx, *mask;
+	bool ena;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	if (!pfvf->aura_ctx || req->aura_id >= pfvf->aura_ctx->qsize)
+		return NPA_AF_ERR_AQ_ENQUEUE;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	aq = block->aq;
+	if (!aq) {
+		dev_warn(rvu->dev, "%s: NPA AQ not initialized\n", __func__);
+		return NPA_AF_ERR_AQ_ENQUEUE;
+	}
+
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	memset(&inst, 0, sizeof(struct npa_aq_inst_s));
+	inst.cindex = req->aura_id;
+	inst.lf = npalf;
+	inst.ctype = req->ctype;
+	inst.op = req->op;
+	/* Currently we are not supporting enqueuing multiple instructions,
+	 * so always choose first entry in result memory.
+	 */
+	inst.res_addr = (u64)aq->res->iova;
+
+	/* Clean result + context memory */
+	memset(aq->res->base, 0, aq->res->entry_sz);
+	/* Context needs to be written at RES_ADDR + 128 */
+	ctx = aq->res->base + 128;
+	/* Mask needs to be written at RES_ADDR + 256 */
+	mask = aq->res->base + 256;
+
+	switch (req->op) {
+	case NPA_AQ_INSTOP_WRITE:
+		/* Copy context and write mask */
+		if (req->ctype == NPA_AQ_CTYPE_AURA) {
+			memcpy(mask, &req->aura_mask,
+			       sizeof(struct npa_aura_s));
+			memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+		} else {
+			memcpy(mask, &req->pool_mask,
+			       sizeof(struct npa_pool_s));
+			memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+		}
+		break;
+	case NPA_AQ_INSTOP_INIT:
+		if (req->ctype == NPA_AQ_CTYPE_AURA) {
+			if (req->aura.pool_addr >= pfvf->pool_ctx->qsize) {
+				rc = NPA_AF_ERR_AQ_FULL;
+				break;
+			}
+			/* Set pool's context address */
+			req->aura.pool_addr = pfvf->pool_ctx->iova +
+			(req->aura.pool_addr * pfvf->pool_ctx->entry_sz);
+			memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+		} else { /* POOL's context */
+			memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+		}
+		break;
+	case NPA_AQ_INSTOP_NOP:
+	case NPA_AQ_INSTOP_READ:
+	case NPA_AQ_INSTOP_LOCK:
+	case NPA_AQ_INSTOP_UNLOCK:
+		break;
+	default:
+		rc = NPA_AF_ERR_AQ_FULL;
+		break;
+	}
+
+	if (rc)
+		return rc;
+
+	spin_lock(&aq->lock);
+
+	/* Submit the instruction to AQ */
+	rc = npa_aq_enqueue_wait(rvu, block, &inst);
+	if (rc) {
+		spin_unlock(&aq->lock);
+		return rc;
+	}
+
+	/* Set aura bitmap if aura hw context is enabled */
+	if (req->ctype == NPA_AQ_CTYPE_AURA) {
+		if (req->op == NPA_AQ_INSTOP_INIT && req->aura.ena)
+			__set_bit(req->aura_id, pfvf->aura_bmap);
+		if (req->op == NPA_AQ_INSTOP_WRITE) {
+			ena = (req->aura.ena & req->aura_mask.ena) |
+				(test_bit(req->aura_id, pfvf->aura_bmap) &
+				~req->aura_mask.ena);
+			if (ena)
+				__set_bit(req->aura_id, pfvf->aura_bmap);
+			else
+				__clear_bit(req->aura_id, pfvf->aura_bmap);
+		}
+	}
+
+	/* Set pool bitmap if pool hw context is enabled */
+	if (req->ctype == NPA_AQ_CTYPE_POOL) {
+		if (req->op == NPA_AQ_INSTOP_INIT && req->pool.ena)
+			__set_bit(req->aura_id, pfvf->pool_bmap);
+		if (req->op == NPA_AQ_INSTOP_WRITE) {
+			ena = (req->pool.ena & req->pool_mask.ena) |
+				(test_bit(req->aura_id, pfvf->pool_bmap) &
+				~req->pool_mask.ena);
+			if (ena)
+				__set_bit(req->aura_id, pfvf->pool_bmap);
+			else
+				__clear_bit(req->aura_id, pfvf->pool_bmap);
+		}
+	}
+	spin_unlock(&aq->lock);
+
+	if (rsp) {
+		/* Copy read context into mailbox */
+		if (req->op == NPA_AQ_INSTOP_READ) {
+			if (req->ctype == NPA_AQ_CTYPE_AURA)
+				memcpy(&rsp->aura, ctx,
+				       sizeof(struct npa_aura_s));
+			else
+				memcpy(&rsp->pool, ctx,
+				       sizeof(struct npa_pool_s));
+		}
+	}
+
+	return 0;
+}
+
+static int npa_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+	struct npa_aq_enq_req aq_req;
+	unsigned long *bmap;
+	int id, cnt = 0;
+	int err = 0, rc;
+
+	if (!pfvf->pool_ctx || !pfvf->aura_ctx)
+		return NPA_AF_ERR_AQ_ENQUEUE;
+
+	memset(&aq_req, 0, sizeof(struct npa_aq_enq_req));
+	aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+	if (req->ctype == NPA_AQ_CTYPE_POOL) {
+		aq_req.pool.ena = 0;
+		aq_req.pool_mask.ena = 1;
+		cnt = pfvf->pool_ctx->qsize;
+		bmap = pfvf->pool_bmap;
+	} else if (req->ctype == NPA_AQ_CTYPE_AURA) {
+		aq_req.aura.ena = 0;
+		aq_req.aura_mask.ena = 1;
+		cnt = pfvf->aura_ctx->qsize;
+		bmap = pfvf->aura_bmap;
+	}
+
+	aq_req.ctype = req->ctype;
+	aq_req.op = NPA_AQ_INSTOP_WRITE;
+
+	for (id = 0; id < cnt; id++) {
+		if (!test_bit(id, bmap))
+			continue;
+		aq_req.aura_id = id;
+		rc = rvu_npa_aq_enq_inst(rvu, &aq_req, NULL);
+		if (rc) {
+			err = rc;
+			dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+				(req->ctype == NPA_AQ_CTYPE_AURA) ?
+				"Aura" : "Pool", id);
+		}
+	}
+
+	return err;
+}
+
+int rvu_mbox_handler_npa_aq_enq(struct rvu *rvu,
+				struct npa_aq_enq_req *req,
+				struct npa_aq_enq_rsp *rsp)
+{
+	return rvu_npa_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_npa_hwctx_disable(struct rvu *rvu,
+				       struct hwctx_disable_req *req,
+				       struct msg_rsp *rsp)
+{
+	return npa_lf_hwctx_disable(rvu, req);
+}
+
+static void npa_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+	kfree(pfvf->aura_bmap);
+	pfvf->aura_bmap = NULL;
+
+	qmem_free(rvu->dev, pfvf->aura_ctx);
+	pfvf->aura_ctx = NULL;
+
+	kfree(pfvf->pool_bmap);
+	pfvf->pool_bmap = NULL;
+
+	qmem_free(rvu->dev, pfvf->pool_ctx);
+	pfvf->pool_ctx = NULL;
+
+	qmem_free(rvu->dev, pfvf->npa_qints_ctx);
+	pfvf->npa_qints_ctx = NULL;
+}
+
+int rvu_mbox_handler_npa_lf_alloc(struct rvu *rvu,
+				  struct npa_lf_alloc_req *req,
+				  struct npa_lf_alloc_rsp *rsp)
+{
+	int npalf, qints, hwctx_size, err, rc = 0;
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	u64 cfg, ctx_cfg;
+	int blkaddr;
+
+	if (req->aura_sz > NPA_AURA_SZ_MAX ||
+	    req->aura_sz == NPA_AURA_SZ_0 || !req->nr_pools)
+		return NPA_AF_ERR_PARAM;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	/* Reset this NPA LF */
+	err = rvu_lf_reset(rvu, block, npalf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+		return NPA_AF_ERR_LF_RESET;
+	}
+
+	ctx_cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST1);
+
+	/* Alloc memory for aura HW contexts */
+	hwctx_size = 1UL << (ctx_cfg & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->aura_ctx,
+			 NPA_AURA_COUNT(req->aura_sz), hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->aura_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+				  GFP_KERNEL);
+	if (!pfvf->aura_bmap)
+		goto free_mem;
+
+	/* Alloc memory for pool HW contexts */
+	hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->pool_ctx, req->nr_pools, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	pfvf->pool_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+				  GFP_KERNEL);
+	if (!pfvf->pool_bmap)
+		goto free_mem;
+
+	/* Get no of queue interrupts supported */
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+	qints = (cfg >> 28) & 0xFFF;
+
+	/* Alloc memory for Qints HW contexts */
+	hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+	err = qmem_alloc(rvu->dev, &pfvf->npa_qints_ctx, qints, hwctx_size);
+	if (err)
+		goto free_mem;
+
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf));
+	/* Clear way partition mask and set aura offset to '0' */
+	cfg &= ~(BIT_ULL(34) - 1);
+	/* Set aura size & enable caching of contexts */
+	cfg |= (req->aura_sz << 16) | BIT_ULL(34);
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf), cfg);
+
+	/* Configure aura HW context's base */
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_LOC_AURAS_BASE(npalf),
+		    (u64)pfvf->aura_ctx->iova);
+
+	/* Enable caching of qints hw context */
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_CFG(npalf), BIT_ULL(36));
+	rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_BASE(npalf),
+		    (u64)pfvf->npa_qints_ctx->iova);
+
+	goto exit;
+
+free_mem:
+	npa_ctx_free(rvu, pfvf);
+	rc = -ENOMEM;
+
+exit:
+	/* set stack page info */
+	cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+	rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF;
+	rsp->stack_pg_bytes = cfg & 0xFF;
+	rsp->qints = (cfg >> 28) & 0xFFF;
+	return rc;
+}
+
+int rvu_mbox_handler_npa_lf_free(struct rvu *rvu, struct msg_req *req,
+				 struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_block *block;
+	struct rvu_pfvf *pfvf;
+	int npalf, err;
+	int blkaddr;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+	if (!pfvf->npalf || blkaddr < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	block = &hw->block[blkaddr];
+	npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+	if (npalf < 0)
+		return NPA_AF_ERR_AF_LF_INVALID;
+
+	/* Reset this NPA LF */
+	err = rvu_lf_reset(rvu, block, npalf);
+	if (err) {
+		dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+		return NPA_AF_ERR_LF_RESET;
+	}
+
+	npa_ctx_free(rvu, pfvf);
+
+	return 0;
+}
+
+static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+	u64 cfg;
+	int err;
+
+	/* Set admin queue endianness */
+	cfg = rvu_read64(rvu, block->addr, NPA_AF_GEN_CFG);
+#ifdef __BIG_ENDIAN
+	cfg |= BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#else
+	cfg &= ~BIT_ULL(1);
+	rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#endif
+
+	/* Do not bypass NDC cache */
+	cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG);
+	cfg &= ~0x03DULL;
+	rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg);
+
+	/* Result structure can be followed by Aura/Pool context at
+	 * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+	 * operation type. Alloc sufficient result memory for all operations.
+	 */
+	err = rvu_aq_alloc(rvu, &block->aq,
+			   Q_COUNT(AQ_SIZE), sizeof(struct npa_aq_inst_s),
+			   ALIGN(sizeof(struct npa_aq_res_s), 128) + 256);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, block->addr, NPA_AF_AQ_CFG, AQ_SIZE);
+	rvu_write64(rvu, block->addr,
+		    NPA_AF_AQ_BASE, (u64)block->aq->inst->iova);
+	return 0;
+}
+
+int rvu_npa_init(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	int blkaddr, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (blkaddr < 0)
+		return 0;
+
+	/* Initialize admin queue */
+	err = npa_aq_init(rvu, &hw->block[blkaddr]);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void rvu_npa_freemem(struct rvu *rvu)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_block *block;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+	if (blkaddr < 0)
+		return;
+
+	block = &hw->block[blkaddr];
+	rvu_aq_free(rvu, block->aq);
+}
+
+void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct hwctx_disable_req ctx_req;
+
+	/* Disable all pools */
+	ctx_req.hdr.pcifunc = pcifunc;
+	ctx_req.ctype = NPA_AQ_CTYPE_POOL;
+	npa_lf_hwctx_disable(rvu, &ctx_req);
+
+	/* Disable all auras */
+	ctx_req.ctype = NPA_AQ_CTYPE_AURA;
+	npa_lf_hwctx_disable(rvu, &ctx_req);
+
+	npa_ctx_free(rvu, pfvf);
+}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
new file mode 100644
index 0000000..15f7027
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

@@ -0,0 +1,2212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+#include "npc.h"
+#include "cgx.h"
+#include "npc_profile.h"
+
+#define RSVD_MCAM_ENTRIES_PER_PF	2 /* Bcast & Promisc */
+#define RSVD_MCAM_ENTRIES_PER_NIXLF	1 /* Ucast for LFs */
+
+#define NIXLF_UCAST_ENTRY	0
+#define NIXLF_BCAST_ENTRY	1
+#define NIXLF_PROMISC_ENTRY	2
+
+#define NPC_PARSE_RESULT_DMAC_OFFSET	8
+
+static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
+				      int blkaddr, u16 pcifunc);
+static void npc_mcam_free_all_counters(struct rvu *rvu, struct npc_mcam *mcam,
+				       u16 pcifunc);
+
+void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
+{
+	int blkaddr;
+	u64 val = 0;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Config CPI base for the PKIND */
+	val = pkind | 1ULL << 62;
+	rvu_write64(rvu, blkaddr, NPC_AF_PKINDX_CPI_DEFX(pkind, 0), val);
+}
+
+int rvu_npc_get_pkind(struct rvu *rvu, u16 pf)
+{
+	struct npc_pkind *pkind = &rvu->hw->pkind;
+	u32 map;
+	int i;
+
+	for (i = 0; i < pkind->rsrc.max; i++) {
+		map = pkind->pfchan_map[i];
+		if (((map >> 16) & 0x3F) == pf)
+			return i;
+	}
+	return -1;
+}
+
+static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
+				    u16 pcifunc, int nixlf, int type)
+{
+	int pf = rvu_get_pf(pcifunc);
+	int index;
+
+	/* Check if this is for a PF */
+	if (pf && !(pcifunc & RVU_PFVF_FUNC_MASK)) {
+		/* Reserved entries exclude PF0 */
+		pf--;
+		index = mcam->pf_offset + (pf * RSVD_MCAM_ENTRIES_PER_PF);
+		/* Broadcast address matching entry should be first so
+		 * that the packet can be replicated to all VFs.
+		 */
+		if (type == NIXLF_BCAST_ENTRY)
+			return index;
+		else if (type == NIXLF_PROMISC_ENTRY)
+			return index + 1;
+	}
+
+	return (mcam->nixlf_offset + (nixlf * RSVD_MCAM_ENTRIES_PER_NIXLF));
+}
+
+static int npc_get_bank(struct npc_mcam *mcam, int index)
+{
+	int bank = index / mcam->banksize;
+
+	/* 0,1 & 2,3 banks are combined for this keysize */
+	if (mcam->keysize == NPC_MCAM_KEY_X2)
+		return bank ? 2 : 0;
+
+	return bank;
+}
+
+static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
+				  int blkaddr, int index)
+{
+	int bank = npc_get_bank(mcam, index);
+	u64 cfg;
+
+	index &= (mcam->banksize - 1);
+	cfg = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(index, bank));
+	return (cfg & 1);
+}
+
+static void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+				  int blkaddr, int index, bool enable)
+{
+	int bank = npc_get_bank(mcam, index);
+	int actbank = bank;
+
+	index &= (mcam->banksize - 1);
+	for (; bank < (actbank + mcam->banks_per_entry); bank++) {
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CFG(index, bank),
+			    enable ? 1 : 0);
+	}
+}
+
+static void npc_get_keyword(struct mcam_entry *entry, int idx,
+			    u64 *cam0, u64 *cam1)
+{
+	u64 kw_mask = 0x00;
+
+#define CAM_MASK(n)	(BIT_ULL(n) - 1)
+
+	/* 0, 2, 4, 6 indices refer to BANKX_CAMX_W0 and
+	 * 1, 3, 5, 7 indices refer to BANKX_CAMX_W1.
+	 *
+	 * Also, only 48 bits of BANKX_CAMX_W1 are valid.
+	 */
+	switch (idx) {
+	case 0:
+		/* BANK(X)_CAM_W0<63:0> = MCAM_KEY[KW0]<63:0> */
+		*cam1 = entry->kw[0];
+		kw_mask = entry->kw_mask[0];
+		break;
+	case 1:
+		/* BANK(X)_CAM_W1<47:0> = MCAM_KEY[KW1]<47:0> */
+		*cam1 = entry->kw[1] & CAM_MASK(48);
+		kw_mask = entry->kw_mask[1] & CAM_MASK(48);
+		break;
+	case 2:
+		/* BANK(X + 1)_CAM_W0<15:0> = MCAM_KEY[KW1]<63:48>
+		 * BANK(X + 1)_CAM_W0<63:16> = MCAM_KEY[KW2]<47:0>
+		 */
+		*cam1 = (entry->kw[1] >> 48) & CAM_MASK(16);
+		*cam1 |= ((entry->kw[2] & CAM_MASK(48)) << 16);
+		kw_mask = (entry->kw_mask[1] >> 48) & CAM_MASK(16);
+		kw_mask |= ((entry->kw_mask[2] & CAM_MASK(48)) << 16);
+		break;
+	case 3:
+		/* BANK(X + 1)_CAM_W1<15:0> = MCAM_KEY[KW2]<63:48>
+		 * BANK(X + 1)_CAM_W1<47:16> = MCAM_KEY[KW3]<31:0>
+		 */
+		*cam1 = (entry->kw[2] >> 48) & CAM_MASK(16);
+		*cam1 |= ((entry->kw[3] & CAM_MASK(32)) << 16);
+		kw_mask = (entry->kw_mask[2] >> 48) & CAM_MASK(16);
+		kw_mask |= ((entry->kw_mask[3] & CAM_MASK(32)) << 16);
+		break;
+	case 4:
+		/* BANK(X + 2)_CAM_W0<31:0> = MCAM_KEY[KW3]<63:32>
+		 * BANK(X + 2)_CAM_W0<63:32> = MCAM_KEY[KW4]<31:0>
+		 */
+		*cam1 = (entry->kw[3] >> 32) & CAM_MASK(32);
+		*cam1 |= ((entry->kw[4] & CAM_MASK(32)) << 32);
+		kw_mask = (entry->kw_mask[3] >> 32) & CAM_MASK(32);
+		kw_mask |= ((entry->kw_mask[4] & CAM_MASK(32)) << 32);
+		break;
+	case 5:
+		/* BANK(X + 2)_CAM_W1<31:0> = MCAM_KEY[KW4]<63:32>
+		 * BANK(X + 2)_CAM_W1<47:32> = MCAM_KEY[KW5]<15:0>
+		 */
+		*cam1 = (entry->kw[4] >> 32) & CAM_MASK(32);
+		*cam1 |= ((entry->kw[5] & CAM_MASK(16)) << 32);
+		kw_mask = (entry->kw_mask[4] >> 32) & CAM_MASK(32);
+		kw_mask |= ((entry->kw_mask[5] & CAM_MASK(16)) << 32);
+		break;
+	case 6:
+		/* BANK(X + 3)_CAM_W0<47:0> = MCAM_KEY[KW5]<63:16>
+		 * BANK(X + 3)_CAM_W0<63:48> = MCAM_KEY[KW6]<15:0>
+		 */
+		*cam1 = (entry->kw[5] >> 16) & CAM_MASK(48);
+		*cam1 |= ((entry->kw[6] & CAM_MASK(16)) << 48);
+		kw_mask = (entry->kw_mask[5] >> 16) & CAM_MASK(48);
+		kw_mask |= ((entry->kw_mask[6] & CAM_MASK(16)) << 48);
+		break;
+	case 7:
+		/* BANK(X + 3)_CAM_W1<47:0> = MCAM_KEY[KW6]<63:16> */
+		*cam1 = (entry->kw[6] >> 16) & CAM_MASK(48);
+		kw_mask = (entry->kw_mask[6] >> 16) & CAM_MASK(48);
+		break;
+	}
+
+	*cam1 &= kw_mask;
+	*cam0 = ~*cam1 & kw_mask;
+}
+
+static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+				  int blkaddr, int index, u8 intf,
+				  struct mcam_entry *entry, bool enable)
+{
+	int bank = npc_get_bank(mcam, index);
+	int kw = 0, actbank, actindex;
+	u64 cam0, cam1;
+
+	actbank = bank; /* Save bank id, to set action later on */
+	actindex = index;
+	index &= (mcam->banksize - 1);
+
+	/* CAM1 takes the comparison value and
+	 * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'.
+	 * CAM1<n> = 0 & CAM0<n> = 1 => match if key<n> = 0
+	 * CAM1<n> = 1 & CAM0<n> = 0 => match if key<n> = 1
+	 * CAM1<n> = 0 & CAM0<n> = 0 => always match i.e dontcare.
+	 */
+	for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) {
+		/* Interface should be set in all banks */
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1),
+			    intf);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0),
+			    ~intf & 0x3);
+
+		/* Set the match key */
+		npc_get_keyword(entry, kw, &cam0, &cam1);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_W0(index, bank, 1), cam1);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_W0(index, bank, 0), cam0);
+
+		npc_get_keyword(entry, kw + 1, &cam0, &cam1);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_W1(index, bank, 1), cam1);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_CAMX_W1(index, bank, 0), cam0);
+	}
+
+	/* Set 'action' */
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_ACTION(index, actbank), entry->action);
+
+	/* Set TAG 'action' */
+	rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_TAG_ACT(index, actbank),
+		    entry->vtag_action);
+
+	/* Enable the entry */
+	if (enable)
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, true);
+	else
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, false);
+}
+
+static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+				int blkaddr, u16 src, u16 dest)
+{
+	int dbank = npc_get_bank(mcam, dest);
+	int sbank = npc_get_bank(mcam, src);
+	u64 cfg, sreg, dreg;
+	int bank, i;
+
+	src &= (mcam->banksize - 1);
+	dest &= (mcam->banksize - 1);
+
+	/* Copy INTF's, W0's, W1's CAM0 and CAM1 configuration */
+	for (bank = 0; bank < mcam->banks_per_entry; bank++) {
+		sreg = NPC_AF_MCAMEX_BANKX_CAMX_INTF(src, sbank + bank, 0);
+		dreg = NPC_AF_MCAMEX_BANKX_CAMX_INTF(dest, dbank + bank, 0);
+		for (i = 0; i < 6; i++) {
+			cfg = rvu_read64(rvu, blkaddr, sreg + (i * 8));
+			rvu_write64(rvu, blkaddr, dreg + (i * 8), cfg);
+		}
+	}
+
+	/* Copy action */
+	cfg = rvu_read64(rvu, blkaddr,
+			 NPC_AF_MCAMEX_BANKX_ACTION(src, sbank));
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_ACTION(dest, dbank), cfg);
+
+	/* Copy TAG action */
+	cfg = rvu_read64(rvu, blkaddr,
+			 NPC_AF_MCAMEX_BANKX_TAG_ACT(src, sbank));
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_TAG_ACT(dest, dbank), cfg);
+
+	/* Enable or disable */
+	cfg = rvu_read64(rvu, blkaddr,
+			 NPC_AF_MCAMEX_BANKX_CFG(src, sbank));
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_CFG(dest, dbank), cfg);
+}
+
+static u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam,
+			       int blkaddr, int index)
+{
+	int bank = npc_get_bank(mcam, index);
+
+	index &= (mcam->banksize - 1);
+	return rvu_read64(rvu, blkaddr,
+			  NPC_AF_MCAMEX_BANKX_ACTION(index, bank));
+}
+
+void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
+				 int nixlf, u64 chan, u8 *mac_addr)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct mcam_entry entry = { {0} };
+	struct nix_rx_action action;
+	int blkaddr, index, kwi;
+	u64 mac = 0;
+
+	/* AF's VFs work in promiscuous mode */
+	if (is_afvf(pcifunc))
+		return;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	for (index = ETH_ALEN - 1; index >= 0; index--)
+		mac |= ((u64)*mac_addr++) << (8 * index);
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_UCAST_ENTRY);
+
+	/* Match ingress channel and DMAC */
+	entry.kw[0] = chan;
+	entry.kw_mask[0] = 0xFFFULL;
+
+	kwi = NPC_PARSE_RESULT_DMAC_OFFSET / sizeof(u64);
+	entry.kw[kwi] = mac;
+	entry.kw_mask[kwi] = BIT_ULL(48) - 1;
+
+	/* Don't change the action if entry is already enabled
+	 * Otherwise RSS action may get overwritten.
+	 */
+	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) {
+		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
+						      blkaddr, index);
+	} else {
+		*(u64 *)&action = 0x00;
+		action.op = NIX_RX_ACTIONOP_UCAST;
+		action.pf_func = pcifunc;
+	}
+
+	entry.action = *(u64 *)&action;
+	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
+			      NIX_INTF_RX, &entry, true);
+
+	/* add VLAN matching, setup action and save entry back for later */
+	entry.kw[0] |= (NPC_LT_LB_STAG | NPC_LT_LB_CTAG) << 20;
+	entry.kw_mask[0] |= (NPC_LT_LB_STAG & NPC_LT_LB_CTAG) << 20;
+
+	entry.vtag_action = VTAG0_VALID_BIT |
+			    FIELD_PREP(VTAG0_TYPE_MASK, 0) |
+			    FIELD_PREP(VTAG0_LID_MASK, NPC_LID_LA) |
+			    FIELD_PREP(VTAG0_RELPTR_MASK, 12);
+
+	memcpy(&pfvf->entry, &entry, sizeof(entry));
+}
+
+void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+				   int nixlf, u64 chan, bool allmulti)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, ucast_idx, index, kwi;
+	struct mcam_entry entry = { {0} };
+	struct nix_rx_action action = { };
+
+	/* Only PF or AF VF can add a promiscuous entry */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
+		return;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_PROMISC_ENTRY);
+
+	entry.kw[0] = chan;
+	entry.kw_mask[0] = 0xFFFULL;
+
+	if (allmulti) {
+		kwi = NPC_PARSE_RESULT_DMAC_OFFSET / sizeof(u64);
+		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
+		entry.kw_mask[kwi] = BIT_ULL(40);
+	}
+
+	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					     nixlf, NIXLF_UCAST_ENTRY);
+
+	/* If the corresponding PF's ucast action is RSS,
+	 * use the same action for promisc also
+	 */
+	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
+		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
+							blkaddr, ucast_idx);
+
+	if (action.op != NIX_RX_ACTIONOP_RSS) {
+		*(u64 *)&action = 0x00;
+		action.op = NIX_RX_ACTIONOP_UCAST;
+		action.pf_func = pcifunc;
+	}
+
+	entry.action = *(u64 *)&action;
+	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
+			      NIX_INTF_RX, &entry, true);
+}
+
+static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
+				     int nixlf, bool enable)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, index;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Only PF's have a promiscuous entry */
+	if (pcifunc & RVU_PFVF_FUNC_MASK)
+		return;
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_PROMISC_ENTRY);
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
+}
+
+void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	npc_enadis_promisc_entry(rvu, pcifunc, nixlf, false);
+}
+
+void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	npc_enadis_promisc_entry(rvu, pcifunc, nixlf, true);
+}
+
+void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
+				       int nixlf, u64 chan)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct mcam_entry entry = { {0} };
+	struct nix_rx_action action;
+#ifdef MCAST_MCE
+	struct rvu_pfvf *pfvf;
+#endif
+	int blkaddr, index;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Only PF can add a bcast match entry */
+	if (pcifunc & RVU_PFVF_FUNC_MASK)
+		return;
+#ifdef MCAST_MCE
+	pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+#endif
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_BCAST_ENTRY);
+
+	/* Check for L2B bit and LMAC channel
+	 * NOTE: Since MKEX default profile(a reduced version intended to
+	 * accommodate more capability but igoring few bits) a stap-gap
+	 * approach.
+	 * Since we care for L2B which by HRM NPC_PARSE_KEX_S at BIT_POS[25], So
+	 * moved to BIT_POS[13], ignoring ERRCODE, ERRLEV as we'll loose out
+	 * on capability features needed for CoS (/from ODP PoV) e.g: VLAN,
+	 * DSCP.
+	 *
+	 * Reduced layout of MKEX default profile -
+	 * Includes following are (i.e.CHAN, L2/3{B/M}, LA, LB, LC, LD):
+	 *
+	 * BIT_POS[31:28] : LD
+	 * BIT_POS[27:24] : LC
+	 * BIT_POS[23:20] : LB
+	 * BIT_POS[19:16] : LA
+	 * BIT_POS[15:12] : L3B, L3M, L2B, L2M
+	 * BIT_POS[11:00] : CHAN
+	 *
+	 */
+	entry.kw[0] = BIT_ULL(13) | chan;
+	entry.kw_mask[0] = BIT_ULL(13) | 0xFFFULL;
+
+	*(u64 *)&action = 0x00;
+#ifdef MCAST_MCE
+	/* Early silicon doesn't support pkt replication,
+	 * so install entry with UCAST action, so that PF
+	 * receives all broadcast packets.
+	 */
+	action.op = NIX_RX_ACTIONOP_MCAST;
+	action.pf_func = pcifunc;
+	action.index = pfvf->bcast_mce_idx;
+#else
+	action.op = NIX_RX_ACTIONOP_UCAST;
+	action.pf_func = pcifunc;
+#endif
+
+	entry.action = *(u64 *)&action;
+	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
+			      NIX_INTF_RX, &entry, true);
+}
+
+void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
+				    int group, int alg_idx, int mcam_index)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct nix_rx_action action;
+	int blkaddr, index, bank;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Check if this is for reserved default entry */
+	if (mcam_index < 0) {
+		if (group != DEFAULT_RSS_CONTEXT_GROUP)
+			return;
+		index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+						 nixlf, NIXLF_UCAST_ENTRY);
+	} else {
+		/* TODO: validate this mcam index */
+		index = mcam_index;
+	}
+
+	if (index >= mcam->total_entries)
+		return;
+
+	bank = npc_get_bank(mcam, index);
+	index &= (mcam->banksize - 1);
+
+	*(u64 *)&action = rvu_read64(rvu, blkaddr,
+				     NPC_AF_MCAMEX_BANKX_ACTION(index, bank));
+	/* Ignore if no action was set earlier */
+	if (!*(u64 *)&action)
+		return;
+
+	action.op = NIX_RX_ACTIONOP_RSS;
+	action.pf_func = pcifunc;
+	action.index = group;
+	action.flow_key_alg = alg_idx;
+
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action);
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_PROMISC_ENTRY);
+
+	/* If PF's promiscuous entry is enabled,
+	 * Set RSS action for that entry as well
+	 */
+	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) {
+		bank = npc_get_bank(mcam, index);
+		index &= (mcam->banksize - 1);
+
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_MCAMEX_BANKX_ACTION(index, bank),
+			    *(u64 *)&action);
+	}
+
+	rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+}
+
+static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
+				       int nixlf, bool enable)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct nix_rx_action action;
+	int index, bank, blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Ucast MCAM match entry of this PF/VF */
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_UCAST_ENTRY);
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
+
+	/* For PF, ena/dis promisc and bcast MCAM match entries */
+	if (pcifunc & RVU_PFVF_FUNC_MASK)
+		return;
+
+	/* For bcast, enable/disable only if it's action is not
+	 * packet replication, incase if action is replication
+	 * then this PF's nixlf is removed from bcast replication
+	 * list.
+	 */
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_BCAST_ENTRY);
+	bank = npc_get_bank(mcam, index);
+	*(u64 *)&action = rvu_read64(rvu, blkaddr,
+	     NPC_AF_MCAMEX_BANKX_ACTION(index & (mcam->banksize - 1), bank));
+	if (action.op != NIX_RX_ACTIONOP_MCAST)
+		npc_enable_mcam_entry(rvu, mcam,
+				      blkaddr, index, enable);
+	if (enable)
+		rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf);
+	else
+		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
+
+	rvu_npc_update_rxvlan(rvu, pcifunc, nixlf);
+}
+
+void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	npc_enadis_default_entries(rvu, pcifunc, nixlf, false);
+}
+
+void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	npc_enadis_default_entries(rvu, pcifunc, nixlf, true);
+}
+
+void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	mutex_lock(&mcam->lock);
+
+	/* Disable and free all MCAM entries mapped to this 'pcifunc' */
+	npc_mcam_free_all_entries(rvu, mcam, blkaddr, pcifunc);
+
+	/* Free all MCAM counters mapped to this 'pcifunc' */
+	npc_mcam_free_all_counters(rvu, mcam, pcifunc);
+
+	mutex_unlock(&mcam->lock);
+
+	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
+}
+
+#define SET_KEX_LD(intf, lid, ltype, ld, cfg)	\
+	rvu_write64(rvu, blkaddr,			\
+		NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf, lid, ltype, ld), cfg)
+
+#define SET_KEX_LDFLAGS(intf, ld, flags, cfg)	\
+	rvu_write64(rvu, blkaddr,			\
+		NPC_AF_INTFX_LDATAX_FLAGSX_CFG(intf, ld, flags), cfg)
+
+#define KEX_LD_CFG(bytesm1, hdr_ofs, ena, flags_ena, key_ofs)		\
+			(((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \
+			 ((flags_ena) << 6) | ((key_ofs) & 0x3F))
+
+static void npc_config_ldata_extract(struct rvu *rvu, int blkaddr)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int lid, ltype;
+	int lid_count;
+	u64 cfg;
+
+	cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST);
+	lid_count = (cfg >> 4) & 0xF;
+
+	/* First clear any existing config i.e
+	 * disable LDATA and FLAGS extraction.
+	 */
+	for (lid = 0; lid < lid_count; lid++) {
+		for (ltype = 0; ltype < 16; ltype++) {
+			SET_KEX_LD(NIX_INTF_RX, lid, ltype, 0, 0ULL);
+			SET_KEX_LD(NIX_INTF_RX, lid, ltype, 1, 0ULL);
+			SET_KEX_LD(NIX_INTF_TX, lid, ltype, 0, 0ULL);
+			SET_KEX_LD(NIX_INTF_TX, lid, ltype, 1, 0ULL);
+
+			SET_KEX_LDFLAGS(NIX_INTF_RX, 0, ltype, 0ULL);
+			SET_KEX_LDFLAGS(NIX_INTF_RX, 1, ltype, 0ULL);
+			SET_KEX_LDFLAGS(NIX_INTF_TX, 0, ltype, 0ULL);
+			SET_KEX_LDFLAGS(NIX_INTF_TX, 1, ltype, 0ULL);
+		}
+	}
+
+	if (mcam->keysize != NPC_MCAM_KEY_X2)
+		return;
+
+	/* Default MCAM KEX profile */
+	/* Layer A: Ethernet: */
+
+	/* DMAC: 6 bytes, KW1[47:0] */
+	cfg = KEX_LD_CFG(0x05, 0x0, 0x1, 0x0, NPC_PARSE_RESULT_DMAC_OFFSET);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LA, NPC_LT_LA_ETHER, 0, cfg);
+
+	/* Ethertype: 2 bytes, KW0[47:32] */
+	cfg = KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x4);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LA, NPC_LT_LA_ETHER, 1, cfg);
+
+	/* Layer B: Single VLAN (CTAG) */
+	/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
+	cfg = KEX_LD_CFG(0x03, 0x0, 0x1, 0x0, 0x4);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LB, NPC_LT_LB_CTAG, 0, cfg);
+
+	/* Layer B: Stacked VLAN (STAG|QinQ) */
+	/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
+	cfg = KEX_LD_CFG(0x03, 0x4, 0x1, 0x0, 0x4);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LB, NPC_LT_LB_STAG, 0, cfg);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LB, NPC_LT_LB_QINQ, 0, cfg);
+
+	/* Layer C: IPv4 */
+	/* SIP+DIP: 8 bytes, KW2[63:0] */
+	cfg = KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LC, NPC_LT_LC_IP, 0, cfg);
+	/* TOS: 1 byte, KW1[63:56] */
+	cfg = KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LC, NPC_LT_LC_IP, 1, cfg);
+
+	/* Layer D:UDP */
+	/* SPORT: 2 bytes, KW3[15:0] */
+	cfg = KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_UDP, 0, cfg);
+	/* DPORT: 2 bytes, KW3[31:16] */
+	cfg = KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_UDP, 1, cfg);
+
+	/* Layer D:TCP */
+	/* SPORT: 2 bytes, KW3[15:0] */
+	cfg = KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_TCP, 0, cfg);
+	/* DPORT: 2 bytes, KW3[31:16] */
+	cfg = KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a);
+	SET_KEX_LD(NIX_INTF_RX, NPC_LID_LD, NPC_LT_LD_TCP, 1, cfg);
+}
+
+static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr,
+				     struct npc_mcam_kex *mkex)
+{
+	int lid, lt, ld, fl;
+
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX),
+		    mkex->keyx_cfg[NIX_INTF_RX]);
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX),
+		    mkex->keyx_cfg[NIX_INTF_TX]);
+
+	for (ld = 0; ld < NPC_MAX_LD; ld++)
+		rvu_write64(rvu, blkaddr, NPC_AF_KEX_LDATAX_FLAGS_CFG(ld),
+			    mkex->kex_ld_flags[ld]);
+
+	for (lid = 0; lid < NPC_MAX_LID; lid++) {
+		for (lt = 0; lt < NPC_MAX_LT; lt++) {
+			for (ld = 0; ld < NPC_MAX_LD; ld++) {
+				SET_KEX_LD(NIX_INTF_RX, lid, lt, ld,
+					   mkex->intf_lid_lt_ld[NIX_INTF_RX]
+					   [lid][lt][ld]);
+
+				SET_KEX_LD(NIX_INTF_TX, lid, lt, ld,
+					   mkex->intf_lid_lt_ld[NIX_INTF_TX]
+					   [lid][lt][ld]);
+			}
+		}
+	}
+
+	for (ld = 0; ld < NPC_MAX_LD; ld++) {
+		for (fl = 0; fl < NPC_MAX_LFL; fl++) {
+			SET_KEX_LDFLAGS(NIX_INTF_RX, ld, fl,
+					mkex->intf_ld_flags[NIX_INTF_RX]
+					[ld][fl]);
+
+			SET_KEX_LDFLAGS(NIX_INTF_TX, ld, fl,
+					mkex->intf_ld_flags[NIX_INTF_TX]
+					[ld][fl]);
+		}
+	}
+}
+
+/* strtoull of "mkexprof" with base:36 */
+#define MKEX_SIGN      0x19bbfdbd15f
+#define MKEX_END_SIGN  0xdeadbeef
+
+static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr)
+{
+	const char *mkex_profile = rvu->mkex_pfl_name;
+	struct device *dev = &rvu->pdev->dev;
+	void __iomem *mkex_prfl_addr = NULL;
+	struct npc_mcam_kex *mcam_kex;
+	u64 prfl_addr;
+	u64 prfl_sz;
+
+	/* If user not selected mkex profile */
+	if (!strncmp(mkex_profile, "default", MKEX_NAME_LEN))
+		goto load_default;
+
+	if (cgx_get_mkex_prfl_info(&prfl_addr, &prfl_sz))
+		goto load_default;
+
+	if (!prfl_addr || !prfl_sz)
+		goto load_default;
+
+	mkex_prfl_addr = ioremap_wc(prfl_addr, prfl_sz);
+	if (!mkex_prfl_addr)
+		goto load_default;
+
+	mcam_kex = (struct npc_mcam_kex *)mkex_prfl_addr;
+
+	while (((s64)prfl_sz > 0) && (mcam_kex->mkex_sign != MKEX_END_SIGN)) {
+		/* Compare with mkex mod_param name string */
+		if (mcam_kex->mkex_sign == MKEX_SIGN &&
+		    !strncmp(mcam_kex->name, mkex_profile, MKEX_NAME_LEN)) {
+			/* Due to an errata (35786) in A0 pass silicon,
+			 * parse nibble enable configuration has to be
+			 * identical for both Rx and Tx interfaces.
+			 */
+			if (is_rvu_9xxx_A0(rvu) &&
+			    mcam_kex->keyx_cfg[NIX_INTF_RX] !=
+			    mcam_kex->keyx_cfg[NIX_INTF_TX])
+				goto load_default;
+
+			/* Program selected mkex profile */
+			npc_program_mkex_profile(rvu, blkaddr, mcam_kex);
+
+			goto unmap;
+		}
+
+		mcam_kex++;
+		prfl_sz -= sizeof(struct npc_mcam_kex);
+	}
+	dev_warn(dev, "Failed to load requested profile: %s\n",
+		 rvu->mkex_pfl_name);
+
+load_default:
+	dev_info(rvu->dev, "Using default mkex profile\n");
+	/* Config packet data and flags extraction into PARSE result */
+	npc_config_ldata_extract(rvu, blkaddr);
+
+unmap:
+	if (mkex_prfl_addr)
+		iounmap(mkex_prfl_addr);
+}
+
+static void npc_config_kpuaction(struct rvu *rvu, int blkaddr,
+				 struct npc_kpu_profile_action *kpuaction,
+				 int kpu, int entry, bool pkind)
+{
+	struct npc_kpu_action0 action0 = {0};
+	struct npc_kpu_action1 action1 = {0};
+	u64 reg;
+
+	action1.errlev = kpuaction->errlev;
+	action1.errcode = kpuaction->errcode;
+	action1.dp0_offset = kpuaction->dp0_offset;
+	action1.dp1_offset = kpuaction->dp1_offset;
+	action1.dp2_offset = kpuaction->dp2_offset;
+
+	if (pkind)
+		reg = NPC_AF_PKINDX_ACTION1(entry);
+	else
+		reg = NPC_AF_KPUX_ENTRYX_ACTION1(kpu, entry);
+
+	rvu_write64(rvu, blkaddr, reg, *(u64 *)&action1);
+
+	action0.byp_count = kpuaction->bypass_count;
+	action0.capture_ena = kpuaction->cap_ena;
+	action0.parse_done = kpuaction->parse_done;
+	action0.next_state = kpuaction->next_state;
+	action0.capture_lid = kpuaction->lid;
+	action0.capture_ltype = kpuaction->ltype;
+	action0.capture_flags = kpuaction->flags;
+	action0.ptr_advance = kpuaction->ptr_advance;
+	action0.var_len_offset = kpuaction->offset;
+	action0.var_len_mask = kpuaction->mask;
+	action0.var_len_right = kpuaction->right;
+	action0.var_len_shift = kpuaction->shift;
+
+	if (pkind)
+		reg = NPC_AF_PKINDX_ACTION0(entry);
+	else
+		reg = NPC_AF_KPUX_ENTRYX_ACTION0(kpu, entry);
+
+	rvu_write64(rvu, blkaddr, reg, *(u64 *)&action0);
+}
+
+static void npc_config_kpucam(struct rvu *rvu, int blkaddr,
+			      struct npc_kpu_profile_cam *kpucam,
+			      int kpu, int entry)
+{
+	struct npc_kpu_cam cam0 = {0};
+	struct npc_kpu_cam cam1 = {0};
+
+	cam1.state = kpucam->state & kpucam->state_mask;
+	cam1.dp0_data = kpucam->dp0 & kpucam->dp0_mask;
+	cam1.dp1_data = kpucam->dp1 & kpucam->dp1_mask;
+	cam1.dp2_data = kpucam->dp2 & kpucam->dp2_mask;
+
+	cam0.state = ~kpucam->state & kpucam->state_mask;
+	cam0.dp0_data = ~kpucam->dp0 & kpucam->dp0_mask;
+	cam0.dp1_data = ~kpucam->dp1 & kpucam->dp1_mask;
+	cam0.dp2_data = ~kpucam->dp2 & kpucam->dp2_mask;
+
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_KPUX_ENTRYX_CAMX(kpu, entry, 0), *(u64 *)&cam0);
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_KPUX_ENTRYX_CAMX(kpu, entry, 1), *(u64 *)&cam1);
+}
+
+static inline u64 enable_mask(int count)
+{
+	return (((count) < 64) ? ~(BIT_ULL(count) - 1) : (0x00ULL));
+}
+
+static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
+				    struct npc_kpu_profile *profile)
+{
+	int entry, num_entries, max_entries;
+
+	if (profile->cam_entries != profile->action_entries) {
+		dev_err(rvu->dev,
+			"KPU%d: CAM and action entries [%d != %d] not equal\n",
+			kpu, profile->cam_entries, profile->action_entries);
+	}
+
+	max_entries = rvu_read64(rvu, blkaddr, NPC_AF_CONST1) & 0xFFF;
+
+	/* Program CAM match entries for previous KPU extracted data */
+	num_entries = min_t(int, profile->cam_entries, max_entries);
+	for (entry = 0; entry < num_entries; entry++)
+		npc_config_kpucam(rvu, blkaddr,
+				  &profile->cam[entry], kpu, entry);
+
+	/* Program this KPU's actions */
+	num_entries = min_t(int, profile->action_entries, max_entries);
+	for (entry = 0; entry < num_entries; entry++)
+		npc_config_kpuaction(rvu, blkaddr, &profile->action[entry],
+				     kpu, entry, false);
+
+	/* Enable all programmed entries */
+	num_entries = min_t(int, profile->action_entries, profile->cam_entries);
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_KPUX_ENTRY_DISX(kpu, 0), enable_mask(num_entries));
+	if (num_entries > 64) {
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_KPUX_ENTRY_DISX(kpu, 1),
+			    enable_mask(num_entries - 64));
+	}
+
+	/* Enable this KPU */
+	rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(kpu), 0x01);
+}
+
+static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	int num_pkinds, num_kpus, idx;
+	struct npc_pkind *pkind;
+
+	/* Get HW limits */
+	hw->npc_kpus = (rvu_read64(rvu, blkaddr, NPC_AF_CONST) >> 8) & 0x1F;
+
+	/* Disable all KPUs and their entries */
+	for (idx = 0; idx < hw->npc_kpus; idx++) {
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_KPUX_ENTRY_DISX(idx, 0), ~0ULL);
+		rvu_write64(rvu, blkaddr,
+			    NPC_AF_KPUX_ENTRY_DISX(idx, 1), ~0ULL);
+		rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(idx), 0x00);
+	}
+
+	/* First program IKPU profile i.e PKIND configs.
+	 * Check HW max count to avoid configuring junk or
+	 * writing to unsupported CSR addresses.
+	 */
+	pkind = &hw->pkind;
+	num_pkinds = ARRAY_SIZE(ikpu_action_entries);
+	num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
+
+	for (idx = 0; idx < num_pkinds; idx++)
+		npc_config_kpuaction(rvu, blkaddr,
+				     &ikpu_action_entries[idx], 0, idx, true);
+
+	/* Program KPU CAM and Action profiles */
+	num_kpus = ARRAY_SIZE(npc_kpu_profiles);
+	num_kpus = min_t(int, hw->npc_kpus, num_kpus);
+
+	for (idx = 0; idx < num_kpus; idx++)
+		npc_program_kpu_profile(rvu, blkaddr,
+					idx, &npc_kpu_profiles[idx]);
+}
+
+static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
+{
+	int nixlf_count = rvu_get_nixlf_count(rvu);
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int rsvd, err;
+	u64 cfg;
+
+	/* Get HW limits */
+	cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST);
+	mcam->banks = (cfg >> 44) & 0xF;
+	mcam->banksize = (cfg >> 28) & 0xFFFF;
+	mcam->counters.max = (cfg >> 48) & 0xFFFF;
+
+	/* Actual number of MCAM entries vary by entry size */
+	cfg = (rvu_read64(rvu, blkaddr,
+			  NPC_AF_INTFX_KEX_CFG(0)) >> 32) & 0x07;
+	mcam->total_entries = (mcam->banks / BIT_ULL(cfg)) * mcam->banksize;
+	mcam->keysize = cfg;
+
+	/* Number of banks combined per MCAM entry */
+	if (cfg == NPC_MCAM_KEY_X4)
+		mcam->banks_per_entry = 4;
+	else if (cfg == NPC_MCAM_KEY_X2)
+		mcam->banks_per_entry = 2;
+	else
+		mcam->banks_per_entry = 1;
+
+	/* Reserve one MCAM entry for each of the NIX LF to
+	 * guarantee space to install default matching DMAC rule.
+	 * Also reserve 2 MCAM entries for each PF for default
+	 * channel based matching or 'bcast & promisc' matching to
+	 * support BCAST and PROMISC modes of operation for PFs.
+	 * PF0 is excluded.
+	 */
+	rsvd = (nixlf_count * RSVD_MCAM_ENTRIES_PER_NIXLF) +
+		((rvu->hw->total_pfs - 1) * RSVD_MCAM_ENTRIES_PER_PF);
+	if (mcam->total_entries <= rsvd) {
+		dev_warn(rvu->dev,
+			 "Insufficient NPC MCAM size %d for pkt I/O, exiting\n",
+			 mcam->total_entries);
+		return -ENOMEM;
+	}
+
+	mcam->bmap_entries = mcam->total_entries - rsvd;
+	mcam->nixlf_offset = mcam->bmap_entries;
+	mcam->pf_offset = mcam->nixlf_offset + nixlf_count;
+
+	/* Allocate bitmaps for managing MCAM entries */
+	mcam->bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(mcam->bmap_entries),
+				  sizeof(long), GFP_KERNEL);
+	if (!mcam->bmap)
+		return -ENOMEM;
+
+	mcam->bmap_reverse = devm_kcalloc(rvu->dev,
+					  BITS_TO_LONGS(mcam->bmap_entries),
+					  sizeof(long), GFP_KERNEL);
+	if (!mcam->bmap_reverse)
+		return -ENOMEM;
+
+	mcam->bmap_fcnt = mcam->bmap_entries;
+
+	/* Alloc memory for saving entry to RVU PFFUNC allocation mapping */
+	mcam->entry2pfvf_map = devm_kcalloc(rvu->dev, mcam->bmap_entries,
+					    sizeof(u16), GFP_KERNEL);
+	if (!mcam->entry2pfvf_map)
+		return -ENOMEM;
+
+	/* Reserve 1/8th of MCAM entries at the bottom for low priority
+	 * allocations and another 1/8th at the top for high priority
+	 * allocations.
+	 */
+	mcam->lprio_count = mcam->bmap_entries / 8;
+	if (mcam->lprio_count > BITS_PER_LONG)
+		mcam->lprio_count = round_down(mcam->lprio_count,
+					       BITS_PER_LONG);
+	mcam->lprio_start = mcam->bmap_entries - mcam->lprio_count;
+	mcam->hprio_count = mcam->lprio_count;
+	mcam->hprio_end = mcam->hprio_count;
+
+	/* Allocate bitmap for managing MCAM counters and memory
+	 * for saving counter to RVU PFFUNC allocation mapping.
+	 */
+	err = rvu_alloc_bitmap(&mcam->counters);
+	if (err)
+		return err;
+
+	mcam->cntr2pfvf_map = devm_kcalloc(rvu->dev, mcam->counters.max,
+					   sizeof(u16), GFP_KERNEL);
+	if (!mcam->cntr2pfvf_map)
+		goto free_mem;
+
+	/* Alloc memory for MCAM entry to counter mapping and for tracking
+	 * counter's reference count.
+	 */
+	mcam->entry2cntr_map = devm_kcalloc(rvu->dev, mcam->bmap_entries,
+					    sizeof(u16), GFP_KERNEL);
+	if (!mcam->entry2cntr_map)
+		goto free_mem;
+
+	mcam->cntr_refcnt = devm_kcalloc(rvu->dev, mcam->counters.max,
+					 sizeof(u16), GFP_KERNEL);
+	if (!mcam->cntr_refcnt)
+		goto free_mem;
+
+	mutex_init(&mcam->lock);
+
+	return 0;
+
+free_mem:
+	kfree(mcam->counters.bmap);
+	return -ENOMEM;
+}
+
+int rvu_npc_init(struct rvu *rvu)
+{
+	struct npc_pkind *pkind = &rvu->hw->pkind;
+	u64 keyz = NPC_MCAM_KEY_X2;
+	int blkaddr, entry, bank, err;
+	u64 cfg, nibble_ena;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0) {
+		dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
+		return -ENODEV;
+	}
+
+	/* First disable all MCAM entries, to stop traffic towards NIXLFs */
+	cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST);
+	for (bank = 0; bank < ((cfg >> 44) & 0xF); bank++) {
+		for (entry = 0; entry < ((cfg >> 28) & 0xFFFF); entry++)
+			rvu_write64(rvu, blkaddr,
+				    NPC_AF_MCAMEX_BANKX_CFG(entry, bank), 0);
+	}
+
+	/* Allocate resource bimap for pkind*/
+	pkind->rsrc.max = (rvu_read64(rvu, blkaddr,
+				      NPC_AF_CONST1) >> 12) & 0xFF;
+	err = rvu_alloc_bitmap(&pkind->rsrc);
+	if (err)
+		return err;
+
+	/* Allocate mem for pkind to PF and channel mapping info */
+	pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max,
+					 sizeof(u32), GFP_KERNEL);
+	if (!pkind->pfchan_map)
+		return -ENOMEM;
+
+	/* Configure KPU profile */
+	npc_parser_profile_init(rvu, blkaddr);
+
+	/* Config Outer L2, IPv4's NPC layer info */
+	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_OL2,
+		    (NPC_LID_LA << 8) | (NPC_LT_LA_ETHER << 4) | 0x0F);
+	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_OIP4,
+		    (NPC_LID_LC << 8) | (NPC_LT_LC_IP << 4) | 0x0F);
+
+	/* Config Inner IPV4 NPC layer info */
+	rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_IIP4,
+		    (NPC_LID_LF << 8) | (NPC_LT_LF_TU_IP << 4) | 0x0F);
+
+	/* Enable below for Rx pkts.
+	 * - Outer IPv4 header checksum validation.
+	 * - Detect outer L2 broadcast address and set NPC_RESULT_S[L2M].
+	 * - Inner IPv4 header checksum validation.
+	 * - Set non zero checksum error code value
+	 */
+	rvu_write64(rvu, blkaddr, NPC_AF_PCK_CFG,
+		    rvu_read64(rvu, blkaddr, NPC_AF_PCK_CFG) |
+		    BIT_ULL(32) | BIT_ULL(24) | BIT_ULL(6) |
+		    BIT_ULL(2) | BIT_ULL(1));
+
+	/* Set RX and TX side MCAM search key size.
+	 * LA..LD (ltype only) + Channel
+	 */
+	nibble_ena = 0x49247;
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX),
+			((keyz & 0x3) << 32) | nibble_ena);
+	/* Due to an errata (35786) in A0 pass silicon, parse nibble enable
+	 * configuration has to be identical for both Rx and Tx interfaces.
+	 */
+	if (!is_rvu_9xxx_A0(rvu))
+		nibble_ena = (1ULL << 19) - 1;
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX),
+			((keyz & 0x3) << 32) | nibble_ena);
+
+	err = npc_mcam_rsrcs_init(rvu, blkaddr);
+	if (err)
+		return err;
+
+	/* Configure MKEX profile */
+	npc_load_mkex_profile(rvu, blkaddr);
+
+	/* Set TX miss action to UCAST_DEFAULT i.e
+	 * transmit the packet on NIX LF SQ's default channel.
+	 */
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_ACT(NIX_INTF_TX),
+		    NIX_TX_ACTIONOP_UCAST_DEFAULT);
+
+	/* If MCAM lookup doesn't result in a match, drop the received packet */
+	rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_ACT(NIX_INTF_RX),
+		    NIX_RX_ACTIONOP_DROP);
+
+	return 0;
+}
+
+void rvu_npc_freemem(struct rvu *rvu)
+{
+	struct npc_pkind *pkind = &rvu->hw->pkind;
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+
+	kfree(pkind->rsrc.bmap);
+	kfree(mcam->counters.bmap);
+	mutex_destroy(&mcam->lock);
+}
+
+static int npc_mcam_verify_entry(struct npc_mcam *mcam,
+				 u16 pcifunc, int entry)
+{
+	/* Verify if entry is valid and if it is indeed
+	 * allocated to the requesting PFFUNC.
+	 */
+	if (entry >= mcam->bmap_entries)
+		return NPC_MCAM_INVALID_REQ;
+
+	if (pcifunc != mcam->entry2pfvf_map[entry])
+		return NPC_MCAM_PERM_DENIED;
+
+	return 0;
+}
+
+static int npc_mcam_verify_counter(struct npc_mcam *mcam,
+				   u16 pcifunc, int cntr)
+{
+	/* Verify if counter is valid and if it is indeed
+	 * allocated to the requesting PFFUNC.
+	 */
+	if (cntr >= mcam->counters.max)
+		return NPC_MCAM_INVALID_REQ;
+
+	if (pcifunc != mcam->cntr2pfvf_map[cntr])
+		return NPC_MCAM_PERM_DENIED;
+
+	return 0;
+}
+
+static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam,
+					int blkaddr, u16 entry, u16 cntr)
+{
+	u16 index = entry & (mcam->banksize - 1);
+	u16 bank = npc_get_bank(mcam, entry);
+
+	/* Set mapping and increment counter's refcnt */
+	mcam->entry2cntr_map[entry] = cntr;
+	mcam->cntr_refcnt[cntr]++;
+	/* Enable stats */
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank),
+		    BIT_ULL(9) | cntr);
+}
+
+static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu,
+					  struct npc_mcam *mcam,
+					  int blkaddr, u16 entry, u16 cntr)
+{
+	u16 index = entry & (mcam->banksize - 1);
+	u16 bank = npc_get_bank(mcam, entry);
+
+	/* Remove mapping and reduce counter's refcnt */
+	mcam->entry2cntr_map[entry] = NPC_MCAM_INVALID_MAP;
+	mcam->cntr_refcnt[cntr]--;
+	/* Disable stats */
+	rvu_write64(rvu, blkaddr,
+		    NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank), 0x00);
+}
+
+/* Sets MCAM entry in bitmap as used. Update
+ * reverse bitmap too. Should be called with
+ * 'mcam->lock' held.
+ */
+static void npc_mcam_set_bit(struct npc_mcam *mcam, u16 index)
+{
+	u16 entry, rentry;
+
+	entry = index;
+	rentry = mcam->bmap_entries - index - 1;
+
+	__set_bit(entry, mcam->bmap);
+	__set_bit(rentry, mcam->bmap_reverse);
+	mcam->bmap_fcnt--;
+}
+
+/* Sets MCAM entry in bitmap as free. Update
+ * reverse bitmap too. Should be called with
+ * 'mcam->lock' held.
+ */
+static void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index)
+{
+	u16 entry, rentry;
+
+	entry = index;
+	rentry = mcam->bmap_entries - index - 1;
+
+	__clear_bit(entry, mcam->bmap);
+	__clear_bit(rentry, mcam->bmap_reverse);
+	mcam->bmap_fcnt++;
+}
+
+static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
+				      int blkaddr, u16 pcifunc)
+{
+	u16 index, cntr;
+
+	/* Scan all MCAM entries and free the ones mapped to 'pcifunc' */
+	for (index = 0; index < mcam->bmap_entries; index++) {
+		if (mcam->entry2pfvf_map[index] == pcifunc) {
+			mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP;
+			/* Free the entry in bitmap */
+			npc_mcam_clear_bit(mcam, index);
+			/* Disable the entry */
+			npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false);
+
+			/* Update entry2counter mapping */
+			cntr = mcam->entry2cntr_map[index];
+			if (cntr != NPC_MCAM_INVALID_MAP)
+				npc_unmap_mcam_entry_and_cntr(rvu, mcam,
+							      blkaddr, index,
+							      cntr);
+		}
+	}
+}
+
+static void npc_mcam_free_all_counters(struct rvu *rvu, struct npc_mcam *mcam,
+				       u16 pcifunc)
+{
+	u16 cntr;
+
+	/* Scan all MCAM counters and free the ones mapped to 'pcifunc' */
+	for (cntr = 0; cntr < mcam->counters.max; cntr++) {
+		if (mcam->cntr2pfvf_map[cntr] == pcifunc) {
+			mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP;
+			mcam->cntr_refcnt[cntr] = 0;
+			rvu_free_rsrc(&mcam->counters, cntr);
+			/* This API is expected to be called after freeing
+			 * MCAM entries, which inturn will remove
+			 * 'entry to counter' mapping.
+			 * No need to do it again.
+			 */
+		}
+	}
+}
+
+/* Find area of contiguous free entries of size 'nr'.
+ * If not found return max contiguous free entries available.
+ */
+static u16 npc_mcam_find_zero_area(unsigned long *map, u16 size, u16 start,
+				   u16 nr, u16 *max_area)
+{
+	u16 max_area_start = 0;
+	u16 index, next, end;
+
+	*max_area = 0;
+
+again:
+	index = find_next_zero_bit(map, size, start);
+	if (index >= size)
+		return max_area_start;
+
+	end = ((index + nr) >= size) ? size : index + nr;
+	next = find_next_bit(map, end, index);
+	if (*max_area < (next - index)) {
+		*max_area = next - index;
+		max_area_start = index;
+	}
+
+	if (next < end) {
+		start = next + 1;
+		goto again;
+	}
+
+	return max_area_start;
+}
+
+/* Find number of free MCAM entries available
+ * within range i.e in between 'start' and 'end'.
+ */
+static u16 npc_mcam_get_free_count(unsigned long *map, u16 start, u16 end)
+{
+	u16 index, next;
+	u16 fcnt = 0;
+
+again:
+	if (start >= end)
+		return fcnt;
+
+	index = find_next_zero_bit(map, end, start);
+	if (index >= end)
+		return fcnt;
+
+	next = find_next_bit(map, end, index);
+	if (next <= end) {
+		fcnt += next - index;
+		start = next + 1;
+		goto again;
+	}
+
+	fcnt += end - index;
+	return fcnt;
+}
+
+static void
+npc_get_mcam_search_range_priority(struct npc_mcam *mcam,
+				   struct npc_mcam_alloc_entry_req *req,
+				   u16 *start, u16 *end, bool *reverse)
+{
+	u16 fcnt;
+
+	if (req->priority == NPC_MCAM_HIGHER_PRIO)
+		goto hprio;
+
+	/* For a low priority entry allocation
+	 * - If reference entry is not in hprio zone then
+	 *      search range: ref_entry to end.
+	 * - If reference entry is in hprio zone and if
+	 *   request can be accomodated in non-hprio zone then
+	 *      search range: 'start of middle zone' to 'end'
+	 * - else search in reverse, so that less number of hprio
+	 *   zone entries are allocated.
+	 */
+
+	*reverse = false;
+	*start = req->ref_entry + 1;
+	*end = mcam->bmap_entries;
+
+	if (req->ref_entry >= mcam->hprio_end)
+		return;
+
+	fcnt = npc_mcam_get_free_count(mcam->bmap,
+				       mcam->hprio_end, mcam->bmap_entries);
+	if (fcnt > req->count)
+		*start = mcam->hprio_end;
+	else
+		*reverse = true;
+	return;
+
+hprio:
+	/* For a high priority entry allocation, search is always
+	 * in reverse to preserve hprio zone entries.
+	 * - If reference entry is not in lprio zone then
+	 *      search range: 0 to ref_entry.
+	 * - If reference entry is in lprio zone and if
+	 *   request can be accomodated in middle zone then
+	 *      search range: 'hprio_end' to 'lprio_start'
+	 */
+
+	*reverse = true;
+	*start = 0;
+	*end = req->ref_entry;
+
+	if (req->ref_entry <= mcam->lprio_start)
+		return;
+
+	fcnt = npc_mcam_get_free_count(mcam->bmap,
+				       mcam->hprio_end, mcam->lprio_start);
+	if (fcnt < req->count)
+		return;
+	*start = mcam->hprio_end;
+	*end = mcam->lprio_start;
+}
+
+static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
+				  struct npc_mcam_alloc_entry_req *req,
+				  struct npc_mcam_alloc_entry_rsp *rsp)
+{
+	u16 entry_list[NPC_MAX_NONCONTIG_ENTRIES];
+	u16 fcnt, hp_fcnt, lp_fcnt;
+	u16 start, end, index;
+	int entry, next_start;
+	bool reverse = false;
+	unsigned long *bmap;
+	u16 max_contig;
+
+	mutex_lock(&mcam->lock);
+
+	/* Check if there are any free entries */
+	if (!mcam->bmap_fcnt) {
+		mutex_unlock(&mcam->lock);
+		return NPC_MCAM_ALLOC_FAILED;
+	}
+
+	/* MCAM entries are divided into high priority, middle and
+	 * low priority zones. Idea is to not allocate top and lower
+	 * most entries as much as possible, this is to increase
+	 * probability of honouring priority allocation requests.
+	 *
+	 * Two bitmaps are used for mcam entry management,
+	 * mcam->bmap for forward search i.e '0 to mcam->bmap_entries'.
+	 * mcam->bmap_reverse for reverse search i.e 'mcam->bmap_entries to 0'.
+	 *
+	 * Reverse bitmap is used to allocate entries
+	 * - when a higher priority entry is requested
+	 * - when available free entries are less.
+	 * Lower priority ones out of avaialble free entries are always
+	 * chosen when 'high vs low' question arises.
+	 */
+
+	/* Get the search range for priority allocation request */
+	if (req->priority) {
+		npc_get_mcam_search_range_priority(mcam, req,
+						   &start, &end, &reverse);
+		goto alloc;
+	}
+
+	/* Find out the search range for non-priority allocation request
+	 *
+	 * Get MCAM free entry count in middle zone.
+	 */
+	lp_fcnt = npc_mcam_get_free_count(mcam->bmap,
+					  mcam->lprio_start,
+					  mcam->bmap_entries);
+	hp_fcnt = npc_mcam_get_free_count(mcam->bmap, 0, mcam->hprio_end);
+	fcnt = mcam->bmap_fcnt - lp_fcnt - hp_fcnt;
+
+	/* Check if request can be accomodated in the middle zone */
+	if (fcnt > req->count) {
+		start = mcam->hprio_end;
+		end = mcam->lprio_start;
+	} else if ((fcnt + (hp_fcnt / 2) + (lp_fcnt / 2)) > req->count) {
+		/* Expand search zone from half of hprio zone to
+		 * half of lprio zone.
+		 */
+		start = mcam->hprio_end / 2;
+		end = mcam->bmap_entries - (mcam->lprio_count / 2);
+		reverse = true;
+	} else {
+		/* Not enough free entries, search all entries in reverse,
+		 * so that low priority ones will get used up.
+		 */
+		reverse = true;
+		start = 0;
+		end = mcam->bmap_entries;
+	}
+
+alloc:
+	if (reverse) {
+		bmap = mcam->bmap_reverse;
+		start = mcam->bmap_entries - start;
+		end = mcam->bmap_entries - end;
+		index = start;
+		start = end;
+		end = index;
+	} else {
+		bmap = mcam->bmap;
+	}
+
+	if (req->contig) {
+		/* Allocate requested number of contiguous entries, if
+		 * unsuccessful find max contiguous entries available.
+		 */
+		index = npc_mcam_find_zero_area(bmap, end, start,
+						req->count, &max_contig);
+		rsp->count = max_contig;
+		if (reverse)
+			rsp->entry = mcam->bmap_entries - index - max_contig;
+		else
+			rsp->entry = index;
+	} else {
+		/* Allocate requested number of non-contiguous entries,
+		 * if unsuccessful allocate as many as possible.
+		 */
+		rsp->count = 0;
+		next_start = start;
+		for (entry = 0; entry < req->count; entry++) {
+			index = find_next_zero_bit(bmap, end, next_start);
+			if (index >= end)
+				break;
+
+			next_start = start + (index - start) + 1;
+
+			/* Save the entry's index */
+			if (reverse)
+				index = mcam->bmap_entries - index - 1;
+			entry_list[entry] = index;
+			rsp->count++;
+		}
+	}
+
+	/* If allocating requested no of entries is unsucessful,
+	 * expand the search range to full bitmap length and retry.
+	 */
+	if (!req->priority && (rsp->count < req->count) &&
+	    ((end - start) != mcam->bmap_entries)) {
+		reverse = true;
+		start = 0;
+		end = mcam->bmap_entries;
+		goto alloc;
+	}
+
+	/* For priority entry allocation requests, if allocation is
+	 * failed then expand search to max possible range and retry.
+	 */
+	if (req->priority && rsp->count < req->count) {
+		if (req->priority == NPC_MCAM_LOWER_PRIO &&
+		    (start != (req->ref_entry + 1))) {
+			start = req->ref_entry + 1;
+			end = mcam->bmap_entries;
+			reverse = false;
+			goto alloc;
+		} else if ((req->priority == NPC_MCAM_HIGHER_PRIO) &&
+			   ((end - start) != req->ref_entry)) {
+			start = 0;
+			end = req->ref_entry;
+			reverse = true;
+			goto alloc;
+		}
+	}
+
+	/* Copy MCAM entry indices into mbox response entry_list.
+	 * Requester always expects indices in ascending order, so
+	 * so reverse the list if reverse bitmap is used for allocation.
+	 */
+	if (!req->contig && rsp->count) {
+		index = 0;
+		for (entry = rsp->count - 1; entry >= 0; entry--) {
+			if (reverse)
+				rsp->entry_list[index++] = entry_list[entry];
+			else
+				rsp->entry_list[entry] = entry_list[entry];
+		}
+	}
+
+	/* Mark the allocated entries as used and set nixlf mapping */
+	for (entry = 0; entry < rsp->count; entry++) {
+		index = req->contig ?
+			(rsp->entry + entry) : rsp->entry_list[entry];
+		npc_mcam_set_bit(mcam, index);
+		mcam->entry2pfvf_map[index] = pcifunc;
+		mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP;
+	}
+
+	/* Update available free count in mbox response */
+	rsp->free_count = mcam->bmap_fcnt;
+
+	mutex_unlock(&mcam->lock);
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
+					  struct npc_mcam_alloc_entry_req *req,
+					  struct npc_mcam_alloc_entry_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	rsp->entry = NPC_MCAM_ENTRY_INVALID;
+	rsp->free_count = 0;
+
+	/* Check if ref_entry is within range */
+	if (req->priority && req->ref_entry >= mcam->bmap_entries)
+		return NPC_MCAM_INVALID_REQ;
+
+	/* ref_entry can't be '0' if requested priority is high.
+	 * Can't be last entry if requested priority is low.
+	 */
+	if ((!req->ref_entry && req->priority == NPC_MCAM_HIGHER_PRIO) ||
+	    ((req->ref_entry == (mcam->bmap_entries - 1)) &&
+	     req->priority == NPC_MCAM_LOWER_PRIO))
+		return NPC_MCAM_INVALID_REQ;
+
+	/* Since list of allocated indices needs to be sent to requester,
+	 * max number of non-contiguous entries per mbox msg is limited.
+	 */
+	if (!req->contig && req->count > NPC_MAX_NONCONTIG_ENTRIES)
+		return NPC_MCAM_INVALID_REQ;
+
+	/* Alloc request from PFFUNC with no NIXLF attached should be denied */
+	if (!is_nixlf_attached(rvu, pcifunc))
+		return NPC_MCAM_ALLOC_DENIED;
+
+	return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp);
+}
+
+int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu,
+					 struct npc_mcam_free_entry_req *req,
+					 struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, rc = 0;
+	u16 cntr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	/* Free request from PFFUNC with no NIXLF attached, ignore */
+	if (!is_nixlf_attached(rvu, pcifunc))
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+
+	if (req->all)
+		goto free_all;
+
+	rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
+	if (rc)
+		goto exit;
+
+	mcam->entry2pfvf_map[req->entry] = 0;
+	npc_mcam_clear_bit(mcam, req->entry);
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false);
+
+	/* Update entry2counter mapping */
+	cntr = mcam->entry2cntr_map[req->entry];
+	if (cntr != NPC_MCAM_INVALID_MAP)
+		npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+					      req->entry, cntr);
+
+	goto exit;
+
+free_all:
+	/* Free up all entries allocated to requesting PFFUNC */
+	npc_mcam_free_all_entries(rvu, mcam, blkaddr, pcifunc);
+exit:
+	mutex_unlock(&mcam->lock);
+	return rc;
+}
+
+int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
+					  struct npc_mcam_write_entry_req *req,
+					  struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
+	if (rc)
+		goto exit;
+
+	if (req->set_cntr &&
+	    npc_mcam_verify_counter(mcam, pcifunc, req->cntr)) {
+		rc = NPC_MCAM_INVALID_REQ;
+		goto exit;
+	}
+
+	if (req->intf != NIX_INTF_RX && req->intf != NIX_INTF_TX) {
+		rc = NPC_MCAM_INVALID_REQ;
+		goto exit;
+	}
+
+	npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, req->intf,
+			      &req->entry_data, req->enable_entry);
+
+	if (req->set_cntr)
+		npc_map_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+					    req->entry, req->cntr);
+
+	rc = 0;
+exit:
+	mutex_unlock(&mcam->lock);
+	return rc;
+}
+
+int rvu_mbox_handler_npc_mcam_ena_entry(struct rvu *rvu,
+					struct npc_mcam_ena_dis_entry_req *req,
+					struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
+	mutex_unlock(&mcam->lock);
+	if (rc)
+		return rc;
+
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, true);
+
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_dis_entry(struct rvu *rvu,
+					struct npc_mcam_ena_dis_entry_req *req,
+					struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
+	mutex_unlock(&mcam->lock);
+	if (rc)
+		return rc;
+
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false);
+
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
+					  struct npc_mcam_shift_entry_req *req,
+					  struct npc_mcam_shift_entry_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	u16 old_entry, new_entry;
+	u16 index, cntr;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	if (req->shift_count > NPC_MCAM_MAX_SHIFTS)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	for (index = 0; index < req->shift_count; index++) {
+		old_entry = req->curr_entry[index];
+		new_entry = req->new_entry[index];
+
+		/* Check if both old and new entries are valid and
+		 * does belong to this PFFUNC or not.
+		 */
+		rc = npc_mcam_verify_entry(mcam, pcifunc, old_entry);
+		if (rc)
+			break;
+
+		rc = npc_mcam_verify_entry(mcam, pcifunc, new_entry);
+		if (rc)
+			break;
+
+		/* new_entry should not have a counter mapped */
+		if (mcam->entry2cntr_map[new_entry] != NPC_MCAM_INVALID_MAP) {
+			rc = NPC_MCAM_PERM_DENIED;
+			break;
+		}
+
+		/* Disable the new_entry */
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, false);
+
+		/* Copy rule from old entry to new entry */
+		npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry);
+
+		/* Copy counter mapping, if any */
+		cntr = mcam->entry2cntr_map[old_entry];
+		if (cntr != NPC_MCAM_INVALID_MAP) {
+			npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+						      old_entry, cntr);
+			npc_map_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+						    new_entry, cntr);
+		}
+
+		/* Enable new_entry and disable old_entry */
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, true);
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, old_entry, false);
+	}
+
+	/* If shift has failed then report the failed index */
+	if (index != req->shift_count) {
+		rc = NPC_MCAM_PERM_DENIED;
+		rsp->failed_entry_idx = index;
+	}
+
+	mutex_unlock(&mcam->lock);
+	return rc;
+}
+
+int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu,
+			struct npc_mcam_alloc_counter_req *req,
+			struct npc_mcam_alloc_counter_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 pcifunc = req->hdr.pcifunc;
+	u16 max_contig, cntr;
+	int blkaddr, index;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	/* If the request is from a PFFUNC with no NIXLF attached, ignore */
+	if (!is_nixlf_attached(rvu, pcifunc))
+		return NPC_MCAM_INVALID_REQ;
+
+	/* Since list of allocated counter IDs needs to be sent to requester,
+	 * max number of non-contiguous counters per mbox msg is limited.
+	 */
+	if (!req->contig && req->count > NPC_MAX_NONCONTIG_COUNTERS)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+
+	/* Check if unused counters are available or not */
+	if (!rvu_rsrc_free_count(&mcam->counters)) {
+		mutex_unlock(&mcam->lock);
+		return NPC_MCAM_ALLOC_FAILED;
+	}
+
+	rsp->count = 0;
+
+	if (req->contig) {
+		/* Allocate requested number of contiguous counters, if
+		 * unsuccessful find max contiguous entries available.
+		 */
+		index = npc_mcam_find_zero_area(mcam->counters.bmap,
+						mcam->counters.max, 0,
+						req->count, &max_contig);
+		rsp->count = max_contig;
+		rsp->cntr = index;
+		for (cntr = index; cntr < (index + max_contig); cntr++) {
+			__set_bit(cntr, mcam->counters.bmap);
+			mcam->cntr2pfvf_map[cntr] = pcifunc;
+		}
+	} else {
+		/* Allocate requested number of non-contiguous counters,
+		 * if unsuccessful allocate as many as possible.
+		 */
+		for (cntr = 0; cntr < req->count; cntr++) {
+			index = rvu_alloc_rsrc(&mcam->counters);
+			if (index < 0)
+				break;
+			rsp->cntr_list[cntr] = index;
+			rsp->count++;
+			mcam->cntr2pfvf_map[index] = pcifunc;
+		}
+	}
+
+	mutex_unlock(&mcam->lock);
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu,
+		struct npc_mcam_oper_counter_req *req, struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 index, entry = 0;
+	int blkaddr, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	err = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr);
+	if (err) {
+		mutex_unlock(&mcam->lock);
+		return err;
+	}
+
+	/* Mark counter as free/unused */
+	mcam->cntr2pfvf_map[req->cntr] = NPC_MCAM_INVALID_MAP;
+	rvu_free_rsrc(&mcam->counters, req->cntr);
+
+	/* Disable all MCAM entry's stats which are using this counter */
+	while (entry < mcam->bmap_entries) {
+		if (!mcam->cntr_refcnt[req->cntr])
+			break;
+
+		index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
+		if (index >= mcam->bmap_entries)
+			break;
+		if (mcam->entry2cntr_map[index] != req->cntr)
+			continue;
+
+		entry = index + 1;
+		npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+					      index, req->cntr);
+	}
+
+	mutex_unlock(&mcam->lock);
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu,
+		struct npc_mcam_unmap_counter_req *req, struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 index, entry = 0;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	rc = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr);
+	if (rc)
+		goto exit;
+
+	/* Unmap the MCAM entry and counter */
+	if (!req->all) {
+		rc = npc_mcam_verify_entry(mcam, req->hdr.pcifunc, req->entry);
+		if (rc)
+			goto exit;
+		npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+					      req->entry, req->cntr);
+		goto exit;
+	}
+
+	/* Disable all MCAM entry's stats which are using this counter */
+	while (entry < mcam->bmap_entries) {
+		if (!mcam->cntr_refcnt[req->cntr])
+			break;
+
+		index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
+		if (index >= mcam->bmap_entries)
+			break;
+		if (mcam->entry2cntr_map[index] != req->cntr)
+			continue;
+
+		entry = index + 1;
+		npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
+					      index, req->cntr);
+	}
+exit:
+	mutex_unlock(&mcam->lock);
+	return rc;
+}
+
+int rvu_mbox_handler_npc_mcam_clear_counter(struct rvu *rvu,
+		struct npc_mcam_oper_counter_req *req, struct msg_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	err = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr);
+	mutex_unlock(&mcam->lock);
+	if (err)
+		return err;
+
+	rvu_write64(rvu, blkaddr, NPC_AF_MATCH_STATX(req->cntr), 0x00);
+
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_counter_stats(struct rvu *rvu,
+			struct npc_mcam_oper_counter_req *req,
+			struct npc_mcam_oper_counter_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, err;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+	err = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr);
+	mutex_unlock(&mcam->lock);
+	if (err)
+		return err;
+
+	rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(req->cntr));
+	rsp->stat &= BIT_ULL(48) - 1;
+
+	return 0;
+}
+
+int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
+			  struct npc_mcam_alloc_and_write_entry_req *req,
+			  struct npc_mcam_alloc_and_write_entry_rsp *rsp)
+{
+	struct npc_mcam_alloc_counter_req cntr_req;
+	struct npc_mcam_alloc_counter_rsp cntr_rsp;
+	struct npc_mcam_alloc_entry_req entry_req;
+	struct npc_mcam_alloc_entry_rsp entry_rsp;
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 entry = NPC_MCAM_ENTRY_INVALID;
+	u16 cntr = NPC_MCAM_ENTRY_INVALID;
+	int blkaddr, rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	if (req->intf != NIX_INTF_RX && req->intf != NIX_INTF_TX)
+		return NPC_MCAM_INVALID_REQ;
+
+	/* Try to allocate a MCAM entry */
+	entry_req.hdr.pcifunc = req->hdr.pcifunc;
+	entry_req.contig = true;
+	entry_req.priority = req->priority;
+	entry_req.ref_entry = req->ref_entry;
+	entry_req.count = 1;
+
+	rc = rvu_mbox_handler_npc_mcam_alloc_entry(rvu,
+						   &entry_req, &entry_rsp);
+	if (rc)
+		return rc;
+
+	if (!entry_rsp.count)
+		return NPC_MCAM_ALLOC_FAILED;
+
+	entry = entry_rsp.entry;
+
+	if (!req->alloc_cntr)
+		goto write_entry;
+
+	/* Now allocate counter */
+	cntr_req.hdr.pcifunc = req->hdr.pcifunc;
+	cntr_req.contig = true;
+	cntr_req.count = 1;
+
+	rc = rvu_mbox_handler_npc_mcam_alloc_counter(rvu, &cntr_req, &cntr_rsp);
+	if (rc) {
+		/* Free allocated MCAM entry */
+		mutex_lock(&mcam->lock);
+		mcam->entry2pfvf_map[entry] = 0;
+		npc_mcam_clear_bit(mcam, entry);
+		mutex_unlock(&mcam->lock);
+		return rc;
+	}
+
+	cntr = cntr_rsp.cntr;
+
+write_entry:
+	mutex_lock(&mcam->lock);
+	npc_config_mcam_entry(rvu, mcam, blkaddr, entry, req->intf,
+			      &req->entry_data, req->enable_entry);
+
+	if (req->alloc_cntr)
+		npc_map_mcam_entry_and_cntr(rvu, mcam, blkaddr, entry, cntr);
+	mutex_unlock(&mcam->lock);
+
+	rsp->entry = entry;
+	rsp->cntr = cntr;
+
+	return 0;
+}
+
+#define GET_KEX_CFG(intf) \
+	rvu_read64(rvu, BLKADDR_NPC, NPC_AF_INTFX_KEX_CFG(intf))
+
+#define GET_KEX_FLAGS(ld) \
+	rvu_read64(rvu, BLKADDR_NPC, NPC_AF_KEX_LDATAX_FLAGS_CFG(ld))
+
+#define GET_KEX_LD(intf, lid, lt, ld)	\
+	rvu_read64(rvu, BLKADDR_NPC,	\
+		NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf, lid, lt, ld))
+
+#define GET_KEX_LDFLAGS(intf, ld, fl)	\
+	rvu_read64(rvu, BLKADDR_NPC,	\
+		NPC_AF_INTFX_LDATAX_FLAGSX_CFG(intf, ld, fl))
+
+int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req,
+				     struct npc_get_kex_cfg_rsp *rsp)
+{
+	int lid, lt, ld, fl;
+
+	rsp->rx_keyx_cfg = GET_KEX_CFG(NIX_INTF_RX);
+	rsp->tx_keyx_cfg = GET_KEX_CFG(NIX_INTF_TX);
+	for (lid = 0; lid < NPC_MAX_LID; lid++) {
+		for (lt = 0; lt < NPC_MAX_LT; lt++) {
+			for (ld = 0; ld < NPC_MAX_LD; ld++) {
+				rsp->intf_lid_lt_ld[NIX_INTF_RX][lid][lt][ld] =
+					GET_KEX_LD(NIX_INTF_RX, lid, lt, ld);
+				rsp->intf_lid_lt_ld[NIX_INTF_TX][lid][lt][ld] =
+					GET_KEX_LD(NIX_INTF_TX, lid, lt, ld);
+			}
+		}
+	}
+	for (ld = 0; ld < NPC_MAX_LD; ld++)
+		rsp->kex_ld_flags[ld] = GET_KEX_FLAGS(ld);
+
+	for (ld = 0; ld < NPC_MAX_LD; ld++) {
+		for (fl = 0; fl < NPC_MAX_LFL; fl++) {
+			rsp->intf_ld_flags[NIX_INTF_RX][ld][fl] =
+					GET_KEX_LDFLAGS(NIX_INTF_RX, ld, fl);
+			rsp->intf_ld_flags[NIX_INTF_TX][ld][fl] =
+					GET_KEX_LDFLAGS(NIX_INTF_TX, ld, fl);
+		}
+	}
+	memcpy(rsp->mkex_pfl_name, rvu->mkex_pfl_name, MKEX_NAME_LEN);
+	return 0;
+}
+
+int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf)
+{
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, index;
+	bool enable;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	if (!pfvf->rxvlan)
+		return 0;
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
+					 NIXLF_UCAST_ENTRY);
+	pfvf->entry.action = npc_get_mcam_action(rvu, mcam, blkaddr, index);
+	enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, index);
+	npc_config_mcam_entry(rvu, mcam, blkaddr, pfvf->rxvlan_index,
+			      NIX_INTF_RX, &pfvf->entry, enable);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
new file mode 100644
index 0000000..9d7c135
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c

@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "common.h"
+#include "mbox.h"
+#include "rvu.h"
+
+struct reg_range {
+	u64  start;
+	u64  end;
+};
+
+struct hw_reg_map {
+	u8	regblk;
+	u8	num_ranges;
+	u64	mask;
+#define	 MAX_REG_RANGES	8
+	struct reg_range range[MAX_REG_RANGES];
+};
+
+static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = {
+	{NIX_TXSCH_LVL_SMQ, 2, 0xFFFF, {{0x0700, 0x0708}, {0x1400, 0x14C8} } },
+	{NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18},
+			      {0x1200, 0x12E0} } },
+	{NIX_TXSCH_LVL_TL3, 3, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
+			      {0x1610, 0x1618} } },
+	{NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x1768} } },
+	{NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } },
+};
+
+bool rvu_check_valid_reg(int regmap, int regblk, u64 reg)
+{
+	int idx;
+	struct hw_reg_map *map;
+
+	/* Only 64bit offsets */
+	if (reg & 0x07)
+		return false;
+
+	if (regmap == TXSCHQ_HWREGMAP) {
+		if (regblk >= NIX_TXSCH_LVL_CNT)
+			return false;
+		map = &txsch_reg_map[regblk];
+	} else {
+		return false;
+	}
+
+	/* Should never happen */
+	if (map->regblk != regblk)
+		return false;
+
+	reg &= map->mask;
+
+	for (idx = 0; idx < map->num_ranges; idx++) {
+		if (reg >= map->range[idx].start &&
+		    reg < map->range[idx].end)
+			return true;
+	}
+	return false;
+}

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
new file mode 100644
index 0000000..1ea92a2
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h

@@ -0,0 +1,502 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef RVU_REG_H
+#define RVU_REG_H
+
+/* Admin function registers */
+#define RVU_AF_MSIXTR_BASE                  (0x10)
+#define RVU_AF_ECO                          (0x20)
+#define RVU_AF_BLK_RST                      (0x30)
+#define RVU_AF_PF_BAR4_ADDR                 (0x40)
+#define RVU_AF_RAS                          (0x100)
+#define RVU_AF_RAS_W1S                      (0x108)
+#define RVU_AF_RAS_ENA_W1S                  (0x110)
+#define RVU_AF_RAS_ENA_W1C                  (0x118)
+#define RVU_AF_GEN_INT                      (0x120)
+#define RVU_AF_GEN_INT_W1S                  (0x128)
+#define RVU_AF_GEN_INT_ENA_W1S              (0x130)
+#define RVU_AF_GEN_INT_ENA_W1C              (0x138)
+#define	RVU_AF_AFPF_MBOX0		    (0x02000)
+#define	RVU_AF_AFPF_MBOX1		    (0x02008)
+#define RVU_AF_AFPFX_MBOXX(a, b)            (0x2000 | (a) << 4 | (b) << 3)
+#define RVU_AF_PFME_STATUS                  (0x2800)
+#define RVU_AF_PFTRPEND                     (0x2810)
+#define RVU_AF_PFTRPEND_W1S                 (0x2820)
+#define RVU_AF_PF_RST                       (0x2840)
+#define RVU_AF_HWVF_RST                     (0x2850)
+#define RVU_AF_PFAF_MBOX_INT                (0x2880)
+#define RVU_AF_PFAF_MBOX_INT_W1S            (0x2888)
+#define RVU_AF_PFAF_MBOX_INT_ENA_W1S        (0x2890)
+#define RVU_AF_PFAF_MBOX_INT_ENA_W1C        (0x2898)
+#define RVU_AF_PFFLR_INT                    (0x28a0)
+#define RVU_AF_PFFLR_INT_W1S                (0x28a8)
+#define RVU_AF_PFFLR_INT_ENA_W1S            (0x28b0)
+#define RVU_AF_PFFLR_INT_ENA_W1C            (0x28b8)
+#define RVU_AF_PFME_INT                     (0x28c0)
+#define RVU_AF_PFME_INT_W1S                 (0x28c8)
+#define RVU_AF_PFME_INT_ENA_W1S             (0x28d0)
+#define RVU_AF_PFME_INT_ENA_W1C             (0x28d8)
+
+/* Admin function's privileged PF/VF registers */
+#define RVU_PRIV_CONST                      (0x8000000)
+#define RVU_PRIV_GEN_CFG                    (0x8000010)
+#define RVU_PRIV_CLK_CFG                    (0x8000020)
+#define RVU_PRIV_ACTIVE_PC                  (0x8000030)
+#define RVU_PRIV_PFX_CFG(a)                 (0x8000100 | (a) << 16)
+#define RVU_PRIV_PFX_MSIX_CFG(a)            (0x8000110 | (a) << 16)
+#define RVU_PRIV_PFX_ID_CFG(a)              (0x8000120 | (a) << 16)
+#define RVU_PRIV_PFX_INT_CFG(a)             (0x8000200 | (a) << 16)
+#define RVU_PRIV_PFX_NIX0_CFG               (0x8000300)
+#define RVU_PRIV_PFX_NPA_CFG		    (0x8000310)
+#define RVU_PRIV_PFX_SSO_CFG                (0x8000320)
+#define RVU_PRIV_PFX_SSOW_CFG               (0x8000330)
+#define RVU_PRIV_PFX_TIM_CFG                (0x8000340)
+#define RVU_PRIV_PFX_CPT0_CFG               (0x8000350)
+#define RVU_PRIV_BLOCK_TYPEX_REV(a)         (0x8000400 | (a) << 3)
+#define RVU_PRIV_HWVFX_INT_CFG(a)           (0x8001280 | (a) << 16)
+#define RVU_PRIV_HWVFX_NIX0_CFG             (0x8001300)
+#define RVU_PRIV_HWVFX_NPA_CFG              (0x8001310)
+#define RVU_PRIV_HWVFX_SSO_CFG              (0x8001320)
+#define RVU_PRIV_HWVFX_SSOW_CFG             (0x8001330)
+#define RVU_PRIV_HWVFX_TIM_CFG              (0x8001340)
+#define RVU_PRIV_HWVFX_CPT0_CFG             (0x8001350)
+
+/* RVU PF registers */
+#define	RVU_PF_VFX_PFVF_MBOX0		    (0x00000)
+#define	RVU_PF_VFX_PFVF_MBOX1		    (0x00008)
+#define RVU_PF_VFX_PFVF_MBOXX(a, b)         (0x0 | (a) << 12 | (b) << 3)
+#define RVU_PF_VF_BAR4_ADDR                 (0x10)
+#define RVU_PF_BLOCK_ADDRX_DISC(a)          (0x200 | (a) << 3)
+#define RVU_PF_VFME_STATUSX(a)              (0x800 | (a) << 3)
+#define RVU_PF_VFTRPENDX(a)                 (0x820 | (a) << 3)
+#define RVU_PF_VFTRPEND_W1SX(a)             (0x840 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INTX(a)            (0x880 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_W1SX(a)        (0x8A0 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_ENA_W1SX(a)    (0x8C0 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_ENA_W1CX(a)    (0x8E0 | (a) << 3)
+#define RVU_PF_VFFLR_INTX(a)                (0x900 | (a) << 3)
+#define RVU_PF_VFFLR_INT_W1SX(a)            (0x920 | (a) << 3)
+#define RVU_PF_VFFLR_INT_ENA_W1SX(a)        (0x940 | (a) << 3)
+#define RVU_PF_VFFLR_INT_ENA_W1CX(a)        (0x960 | (a) << 3)
+#define RVU_PF_VFME_INTX(a)                 (0x980 | (a) << 3)
+#define RVU_PF_VFME_INT_W1SX(a)             (0x9A0 | (a) << 3)
+#define RVU_PF_VFME_INT_ENA_W1SX(a)         (0x9C0 | (a) << 3)
+#define RVU_PF_VFME_INT_ENA_W1CX(a)         (0x9E0 | (a) << 3)
+#define RVU_PF_PFAF_MBOX0                   (0xC00)
+#define RVU_PF_PFAF_MBOX1                   (0xC08)
+#define RVU_PF_PFAF_MBOXX(a)                (0xC00 | (a) << 3)
+#define RVU_PF_INT                          (0xc20)
+#define RVU_PF_INT_W1S                      (0xc28)
+#define RVU_PF_INT_ENA_W1S                  (0xc30)
+#define RVU_PF_INT_ENA_W1C                  (0xc38)
+#define RVU_PF_MSIX_VECX_ADDR(a)            (0x000 | (a) << 4)
+#define RVU_PF_MSIX_VECX_CTL(a)             (0x008 | (a) << 4)
+#define RVU_PF_MSIX_PBAX(a)                 (0xF0000 | (a) << 3)
+
+/* RVU VF registers */
+#define	RVU_VF_VFPF_MBOX0		    (0x00000)
+#define	RVU_VF_VFPF_MBOX1		    (0x00008)
+
+/* NPA block's admin function registers */
+#define NPA_AF_BLK_RST                  (0x0000)
+#define NPA_AF_CONST                    (0x0010)
+#define NPA_AF_CONST1                   (0x0018)
+#define NPA_AF_LF_RST                   (0x0020)
+#define NPA_AF_GEN_CFG                  (0x0030)
+#define NPA_AF_NDC_CFG                  (0x0040)
+#define NPA_AF_INP_CTL                  (0x00D0)
+#define NPA_AF_ACTIVE_CYCLES_PC         (0x00F0)
+#define NPA_AF_AVG_DELAY                (0x0100)
+#define NPA_AF_GEN_INT                  (0x0140)
+#define NPA_AF_GEN_INT_W1S              (0x0148)
+#define NPA_AF_GEN_INT_ENA_W1S          (0x0150)
+#define NPA_AF_GEN_INT_ENA_W1C          (0x0158)
+#define NPA_AF_RVU_INT                  (0x0160)
+#define NPA_AF_RVU_INT_W1S              (0x0168)
+#define NPA_AF_RVU_INT_ENA_W1S          (0x0170)
+#define NPA_AF_RVU_INT_ENA_W1C          (0x0178)
+#define NPA_AF_ERR_INT			(0x0180)
+#define NPA_AF_ERR_INT_W1S		(0x0188)
+#define NPA_AF_ERR_INT_ENA_W1S		(0x0190)
+#define NPA_AF_ERR_INT_ENA_W1C		(0x0198)
+#define NPA_AF_RAS			(0x01A0)
+#define NPA_AF_RAS_W1S			(0x01A8)
+#define NPA_AF_RAS_ENA_W1S		(0x01B0)
+#define NPA_AF_RAS_ENA_W1C		(0x01B8)
+#define NPA_AF_BP_TEST                  (0x0200)
+#define NPA_AF_ECO                      (0x0300)
+#define NPA_AF_AQ_CFG                   (0x0600)
+#define NPA_AF_AQ_BASE                  (0x0610)
+#define NPA_AF_AQ_STATUS		(0x0620)
+#define NPA_AF_AQ_DOOR                  (0x0630)
+#define NPA_AF_AQ_DONE_WAIT             (0x0640)
+#define NPA_AF_AQ_DONE                  (0x0650)
+#define NPA_AF_AQ_DONE_ACK              (0x0660)
+#define NPA_AF_AQ_DONE_INT              (0x0680)
+#define NPA_AF_AQ_DONE_INT_W1S          (0x0688)
+#define NPA_AF_AQ_DONE_ENA_W1S          (0x0690)
+#define NPA_AF_AQ_DONE_ENA_W1C          (0x0698)
+#define NPA_AF_LFX_AURAS_CFG(a)         (0x4000 | (a) << 18)
+#define NPA_AF_LFX_LOC_AURAS_BASE(a)    (0x4010 | (a) << 18)
+#define NPA_AF_LFX_QINTS_CFG(a)         (0x4100 | (a) << 18)
+#define NPA_AF_LFX_QINTS_BASE(a)        (0x4110 | (a) << 18)
+#define NPA_PRIV_AF_INT_CFG             (0x10000)
+#define NPA_PRIV_LFX_CFG		(0x10010)
+#define NPA_PRIV_LFX_INT_CFG		(0x10020)
+#define NPA_AF_RVU_LF_CFG_DEBUG         (0x10030)
+
+/* NIX block's admin function registers */
+#define NIX_AF_CFG			(0x0000)
+#define NIX_AF_STATUS			(0x0010)
+#define NIX_AF_NDC_CFG			(0x0018)
+#define NIX_AF_CONST			(0x0020)
+#define NIX_AF_CONST1			(0x0028)
+#define NIX_AF_CONST2			(0x0030)
+#define NIX_AF_CONST3			(0x0038)
+#define NIX_AF_SQ_CONST			(0x0040)
+#define NIX_AF_CQ_CONST			(0x0048)
+#define NIX_AF_RQ_CONST			(0x0050)
+#define NIX_AF_PSE_CONST		(0x0060)
+#define NIX_AF_TL1_CONST		(0x0070)
+#define NIX_AF_TL2_CONST		(0x0078)
+#define NIX_AF_TL3_CONST		(0x0080)
+#define NIX_AF_TL4_CONST		(0x0088)
+#define NIX_AF_MDQ_CONST		(0x0090)
+#define NIX_AF_MC_MIRROR_CONST		(0x0098)
+#define NIX_AF_LSO_CFG			(0x00A8)
+#define NIX_AF_BLK_RST			(0x00B0)
+#define NIX_AF_TX_TSTMP_CFG		(0x00C0)
+#define NIX_AF_RX_CFG			(0x00D0)
+#define NIX_AF_AVG_DELAY		(0x00E0)
+#define NIX_AF_CINT_DELAY		(0x00F0)
+#define NIX_AF_RX_MCAST_BASE		(0x0100)
+#define NIX_AF_RX_MCAST_CFG		(0x0110)
+#define NIX_AF_RX_MCAST_BUF_BASE	(0x0120)
+#define NIX_AF_RX_MCAST_BUF_CFG		(0x0130)
+#define NIX_AF_RX_MIRROR_BUF_BASE	(0x0140)
+#define NIX_AF_RX_MIRROR_BUF_CFG	(0x0148)
+#define NIX_AF_LF_RST			(0x0150)
+#define NIX_AF_GEN_INT			(0x0160)
+#define NIX_AF_GEN_INT_W1S		(0x0168)
+#define NIX_AF_GEN_INT_ENA_W1S		(0x0170)
+#define NIX_AF_GEN_INT_ENA_W1C		(0x0178)
+#define NIX_AF_ERR_INT			(0x0180)
+#define NIX_AF_ERR_INT_W1S		(0x0188)
+#define NIX_AF_ERR_INT_ENA_W1S		(0x0190)
+#define NIX_AF_ERR_INT_ENA_W1C		(0x0198)
+#define NIX_AF_RAS			(0x01A0)
+#define NIX_AF_RAS_W1S			(0x01A8)
+#define NIX_AF_RAS_ENA_W1S		(0x01B0)
+#define NIX_AF_RAS_ENA_W1C		(0x01B8)
+#define NIX_AF_RVU_INT			(0x01C0)
+#define NIX_AF_RVU_INT_W1S		(0x01C8)
+#define NIX_AF_RVU_INT_ENA_W1S		(0x01D0)
+#define NIX_AF_RVU_INT_ENA_W1C		(0x01D8)
+#define NIX_AF_TCP_TIMER		(0x01E0)
+#define NIX_AF_RX_WQE_TAG_CTL		(0x01F0)
+#define NIX_AF_RX_DEF_OL2		(0x0200)
+#define NIX_AF_RX_DEF_OIP4		(0x0210)
+#define NIX_AF_RX_DEF_IIP4		(0x0220)
+#define NIX_AF_RX_DEF_OIP6		(0x0230)
+#define NIX_AF_RX_DEF_IIP6		(0x0240)
+#define NIX_AF_RX_DEF_OTCP		(0x0250)
+#define NIX_AF_RX_DEF_ITCP		(0x0260)
+#define NIX_AF_RX_DEF_OUDP		(0x0270)
+#define NIX_AF_RX_DEF_IUDP		(0x0280)
+#define NIX_AF_RX_DEF_OSCTP		(0x0290)
+#define NIX_AF_RX_DEF_ISCTP		(0x02A0)
+#define NIX_AF_RX_DEF_IPSECX		(0x02B0)
+#define NIX_AF_RX_IPSEC_GEN_CFG		(0x0300)
+#define NIX_AF_RX_CPTX_INST_ADDR	(0x0310)
+#define NIX_AF_NDC_TX_SYNC		(0x03F0)
+#define NIX_AF_AQ_CFG			(0x0400)
+#define NIX_AF_AQ_BASE			(0x0410)
+#define NIX_AF_AQ_STATUS		(0x0420)
+#define NIX_AF_AQ_DOOR			(0x0430)
+#define NIX_AF_AQ_DONE_WAIT		(0x0440)
+#define NIX_AF_AQ_DONE			(0x0450)
+#define NIX_AF_AQ_DONE_ACK		(0x0460)
+#define NIX_AF_AQ_DONE_TIMER		(0x0470)
+#define NIX_AF_AQ_DONE_INT		(0x0480)
+#define NIX_AF_AQ_DONE_INT_W1S		(0x0488)
+#define NIX_AF_AQ_DONE_ENA_W1S		(0x0490)
+#define NIX_AF_AQ_DONE_ENA_W1C		(0x0498)
+#define NIX_AF_RX_LINKX_SLX_SPKT_CNT	(0x0500)
+#define NIX_AF_RX_LINKX_SLX_SXQE_CNT	(0x0510)
+#define NIX_AF_RX_MCAST_JOBSX_SW_CNT	(0x0520)
+#define NIX_AF_RX_MIRROR_JOBSX_SW_CNT	(0x0530)
+#define NIX_AF_RX_LINKX_CFG(a)		(0x0540 | (a) << 16)
+#define NIX_AF_RX_SW_SYNC		(0x0550)
+#define NIX_AF_RX_SW_SYNC_DONE		(0x0560)
+#define NIX_AF_SEB_ECO			(0x0600)
+#define NIX_AF_SEB_TEST_BP		(0x0610)
+#define NIX_AF_NORM_TX_FIFO_STATUS	(0x0620)
+#define NIX_AF_EXPR_TX_FIFO_STATUS	(0x0630)
+#define NIX_AF_SDP_TX_FIFO_STATUS	(0x0640)
+#define NIX_AF_TX_NPC_CAPTURE_CONFIG	(0x0660)
+#define NIX_AF_TX_NPC_CAPTURE_INFO	(0x0670)
+
+#define NIX_AF_DEBUG_NPC_RESP_DATAX(a)          (0x680 | (a) << 3)
+#define NIX_AF_SMQX_CFG(a)                      (0x700 | (a) << 16)
+#define NIX_AF_PSE_CHANNEL_LEVEL                (0x800)
+#define NIX_AF_PSE_SHAPER_CFG                   (0x810)
+#define NIX_AF_TX_EXPR_CREDIT			(0x830)
+#define NIX_AF_MARK_FORMATX_CTL(a)              (0x900 | (a) << 18)
+#define NIX_AF_TX_LINKX_NORM_CREDIT(a)		(0xA00 | (a) << 16)
+#define NIX_AF_TX_LINKX_EXPR_CREDIT(a)		(0xA10 | (a) << 16)
+#define NIX_AF_TX_LINKX_SW_XOFF(a)              (0xA20 | (a) << 16)
+#define NIX_AF_TX_LINKX_HW_XOFF(a)              (0xA30 | (a) << 16)
+#define NIX_AF_SDP_LINK_CREDIT                  (0xa40)
+#define NIX_AF_SDP_SW_XOFFX(a)                  (0xA60 | (a) << 3)
+#define NIX_AF_SDP_HW_XOFFX(a)                  (0xAC0 | (a) << 3)
+#define NIX_AF_TL4X_BP_STATUS(a)                (0xB00 | (a) << 16)
+#define NIX_AF_TL4X_SDP_LINK_CFG(a)             (0xB10 | (a) << 16)
+#define NIX_AF_TL1X_SCHEDULE(a)                 (0xC00 | (a) << 16)
+#define NIX_AF_TL1X_SHAPE(a)                    (0xC10 | (a) << 16)
+#define NIX_AF_TL1X_CIR(a)                      (0xC20 | (a) << 16)
+#define NIX_AF_TL1X_SHAPE_STATE(a)              (0xC50 | (a) << 16)
+#define NIX_AF_TL1X_SW_XOFF(a)                  (0xC70 | (a) << 16)
+#define NIX_AF_TL1X_TOPOLOGY(a)                 (0xC80 | (a) << 16)
+#define NIX_AF_TL1X_GREEN(a)                    (0xC90 | (a) << 16)
+#define NIX_AF_TL1X_YELLOW(a)                   (0xCA0 | (a) << 16)
+#define NIX_AF_TL1X_RED(a)                      (0xCB0 | (a) << 16)
+#define NIX_AF_TL1X_MD_DEBUG0(a)                (0xCC0 | (a) << 16)
+#define NIX_AF_TL1X_MD_DEBUG1(a)                (0xCC8 | (a) << 16)
+#define NIX_AF_TL1X_MD_DEBUG2(a)                (0xCD0 | (a) << 16)
+#define NIX_AF_TL1X_MD_DEBUG3(a)                (0xCD8 | (a) << 16)
+#define NIX_AF_TL1A_DEBUG                       (0xce0)
+#define NIX_AF_TL1B_DEBUG                       (0xcf0)
+#define NIX_AF_TL1_DEBUG_GREEN                  (0xd00)
+#define NIX_AF_TL1_DEBUG_NODE                   (0xd10)
+#define NIX_AF_TL1X_DROPPED_PACKETS(a)          (0xD20 | (a) << 16)
+#define NIX_AF_TL1X_DROPPED_BYTES(a)            (0xD30 | (a) << 16)
+#define NIX_AF_TL1X_RED_PACKETS(a)              (0xD40 | (a) << 16)
+#define NIX_AF_TL1X_RED_BYTES(a)                (0xD50 | (a) << 16)
+#define NIX_AF_TL1X_YELLOW_PACKETS(a)           (0xD60 | (a) << 16)
+#define NIX_AF_TL1X_YELLOW_BYTES(a)             (0xD70 | (a) << 16)
+#define NIX_AF_TL1X_GREEN_PACKETS(a)            (0xD80 | (a) << 16)
+#define NIX_AF_TL1X_GREEN_BYTES(a)              (0xD90 | (a) << 16)
+#define NIX_AF_TL2X_SCHEDULE(a)                 (0xE00 | (a) << 16)
+#define NIX_AF_TL2X_SHAPE(a)                    (0xE10 | (a) << 16)
+#define NIX_AF_TL2X_CIR(a)                      (0xE20 | (a) << 16)
+#define NIX_AF_TL2X_PIR(a)                      (0xE30 | (a) << 16)
+#define NIX_AF_TL2X_SCHED_STATE(a)              (0xE40 | (a) << 16)
+#define NIX_AF_TL2X_SHAPE_STATE(a)              (0xE50 | (a) << 16)
+#define NIX_AF_TL2X_POINTERS(a)                 (0xE60 | (a) << 16)
+#define NIX_AF_TL2X_SW_XOFF(a)                  (0xE70 | (a) << 16)
+#define NIX_AF_TL2X_TOPOLOGY(a)                 (0xE80 | (a) << 16)
+#define NIX_AF_TL2X_PARENT(a)                   (0xE88 | (a) << 16)
+#define NIX_AF_TL2X_GREEN(a)                    (0xE90 | (a) << 16)
+#define NIX_AF_TL2X_YELLOW(a)                   (0xEA0 | (a) << 16)
+#define NIX_AF_TL2X_RED(a)                      (0xEB0 | (a) << 16)
+#define NIX_AF_TL2X_MD_DEBUG0(a)                (0xEC0 | (a) << 16)
+#define NIX_AF_TL2X_MD_DEBUG1(a)                (0xEC8 | (a) << 16)
+#define NIX_AF_TL2X_MD_DEBUG2(a)                (0xED0 | (a) << 16)
+#define NIX_AF_TL2X_MD_DEBUG3(a)                (0xED8 | (a) << 16)
+#define NIX_AF_TL2A_DEBUG                       (0xee0)
+#define NIX_AF_TL2B_DEBUG                       (0xef0)
+#define NIX_AF_TL3X_SCHEDULE(a)                 (0x1000 | (a) << 16)
+#define NIX_AF_TL3X_SHAPE(a)                    (0x1010 | (a) << 16)
+#define NIX_AF_TL3X_CIR(a)                      (0x1020 | (a) << 16)
+#define NIX_AF_TL3X_PIR(a)                      (0x1030 | (a) << 16)
+#define NIX_AF_TL3X_SCHED_STATE(a)              (0x1040 | (a) << 16)
+#define NIX_AF_TL3X_SHAPE_STATE(a)              (0x1050 | (a) << 16)
+#define NIX_AF_TL3X_POINTERS(a)                 (0x1060 | (a) << 16)
+#define NIX_AF_TL3X_SW_XOFF(a)                  (0x1070 | (a) << 16)
+#define NIX_AF_TL3X_TOPOLOGY(a)                 (0x1080 | (a) << 16)
+#define NIX_AF_TL3X_PARENT(a)                   (0x1088 | (a) << 16)
+#define NIX_AF_TL3X_GREEN(a)                    (0x1090 | (a) << 16)
+#define NIX_AF_TL3X_YELLOW(a)                   (0x10A0 | (a) << 16)
+#define NIX_AF_TL3X_RED(a)                      (0x10B0 | (a) << 16)
+#define NIX_AF_TL3X_MD_DEBUG0(a)                (0x10C0 | (a) << 16)
+#define NIX_AF_TL3X_MD_DEBUG1(a)                (0x10C8 | (a) << 16)
+#define NIX_AF_TL3X_MD_DEBUG2(a)                (0x10D0 | (a) << 16)
+#define NIX_AF_TL3X_MD_DEBUG3(a)                (0x10D8 | (a) << 16)
+#define NIX_AF_TL3A_DEBUG                       (0x10e0)
+#define NIX_AF_TL3B_DEBUG                       (0x10f0)
+#define NIX_AF_TL4X_SCHEDULE(a)                 (0x1200 | (a) << 16)
+#define NIX_AF_TL4X_SHAPE(a)                    (0x1210 | (a) << 16)
+#define NIX_AF_TL4X_CIR(a)                      (0x1220 | (a) << 16)
+#define NIX_AF_TL4X_PIR(a)                      (0x1230 | (a) << 16)
+#define NIX_AF_TL4X_SCHED_STATE(a)              (0x1240 | (a) << 16)
+#define NIX_AF_TL4X_SHAPE_STATE(a)              (0x1250 | (a) << 16)
+#define NIX_AF_TL4X_POINTERS(a)                 (0x1260 | (a) << 16)
+#define NIX_AF_TL4X_SW_XOFF(a)                  (0x1270 | (a) << 16)
+#define NIX_AF_TL4X_TOPOLOGY(a)                 (0x1280 | (a) << 16)
+#define NIX_AF_TL4X_PARENT(a)                   (0x1288 | (a) << 16)
+#define NIX_AF_TL4X_GREEN(a)                    (0x1290 | (a) << 16)
+#define NIX_AF_TL4X_YELLOW(a)                   (0x12A0 | (a) << 16)
+#define NIX_AF_TL4X_RED(a)                      (0x12B0 | (a) << 16)
+#define NIX_AF_TL4X_MD_DEBUG0(a)                (0x12C0 | (a) << 16)
+#define NIX_AF_TL4X_MD_DEBUG1(a)                (0x12C8 | (a) << 16)
+#define NIX_AF_TL4X_MD_DEBUG2(a)                (0x12D0 | (a) << 16)
+#define NIX_AF_TL4X_MD_DEBUG3(a)                (0x12D8 | (a) << 16)
+#define NIX_AF_TL4A_DEBUG                       (0x12e0)
+#define NIX_AF_TL4B_DEBUG                       (0x12f0)
+#define NIX_AF_MDQX_SCHEDULE(a)                 (0x1400 | (a) << 16)
+#define NIX_AF_MDQX_SHAPE(a)                    (0x1410 | (a) << 16)
+#define NIX_AF_MDQX_CIR(a)                      (0x1420 | (a) << 16)
+#define NIX_AF_MDQX_PIR(a)                      (0x1430 | (a) << 16)
+#define NIX_AF_MDQX_SCHED_STATE(a)              (0x1440 | (a) << 16)
+#define NIX_AF_MDQX_SHAPE_STATE(a)              (0x1450 | (a) << 16)
+#define NIX_AF_MDQX_POINTERS(a)                 (0x1460 | (a) << 16)
+#define NIX_AF_MDQX_SW_XOFF(a)                  (0x1470 | (a) << 16)
+#define NIX_AF_MDQX_PARENT(a)                   (0x1480 | (a) << 16)
+#define NIX_AF_MDQX_MD_DEBUG(a)                 (0x14C0 | (a) << 16)
+#define NIX_AF_MDQX_PTR_FIFO(a)                 (0x14D0 | (a) << 16)
+#define NIX_AF_MDQA_DEBUG                       (0x14e0)
+#define NIX_AF_MDQB_DEBUG                       (0x14f0)
+#define NIX_AF_TL3_TL2X_CFG(a)                  (0x1600 | (a) << 18)
+#define NIX_AF_TL3_TL2X_BP_STATUS(a)            (0x1610 | (a) << 16)
+#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b)         (0x1700 | (a) << 16 | (b) << 3)
+#define NIX_AF_RX_FLOW_KEY_ALGX_FIELDX(a, b)    (0x1800 | (a) << 18 | (b) << 3)
+#define NIX_AF_TX_MCASTX(a)                     (0x1900 | (a) << 15)
+#define NIX_AF_TX_VTAG_DEFX_CTL(a)              (0x1A00 | (a) << 16)
+#define NIX_AF_TX_VTAG_DEFX_DATA(a)             (0x1A10 | (a) << 16)
+#define NIX_AF_RX_BPIDX_STATUS(a)               (0x1A20 | (a) << 17)
+#define NIX_AF_RX_CHANX_CFG(a)                  (0x1A30 | (a) << 15)
+#define NIX_AF_CINT_TIMERX(a)                   (0x1A40 | (a) << 18)
+#define NIX_AF_LSO_FORMATX_FIELDX(a, b)         (0x1B00 | (a) << 16 | (b) << 3)
+#define NIX_AF_LFX_CFG(a)		(0x4000 | (a) << 17)
+#define NIX_AF_LFX_SQS_CFG(a)		(0x4020 | (a) << 17)
+#define NIX_AF_LFX_TX_CFG2(a)		(0x4028 | (a) << 17)
+#define NIX_AF_LFX_SQS_BASE(a)		(0x4030 | (a) << 17)
+#define NIX_AF_LFX_RQS_CFG(a)		(0x4040 | (a) << 17)
+#define NIX_AF_LFX_RQS_BASE(a)		(0x4050 | (a) << 17)
+#define NIX_AF_LFX_CQS_CFG(a)		(0x4060 | (a) << 17)
+#define NIX_AF_LFX_CQS_BASE(a)		(0x4070 | (a) << 17)
+#define NIX_AF_LFX_TX_CFG(a)		(0x4080 | (a) << 17)
+#define NIX_AF_LFX_TX_PARSE_CFG(a)	(0x4090 | (a) << 17)
+#define NIX_AF_LFX_RX_CFG(a)		(0x40A0 | (a) << 17)
+#define NIX_AF_LFX_RSS_CFG(a)		(0x40C0 | (a) << 17)
+#define NIX_AF_LFX_RSS_BASE(a)		(0x40D0 | (a) << 17)
+#define NIX_AF_LFX_QINTS_CFG(a)		(0x4100 | (a) << 17)
+#define NIX_AF_LFX_QINTS_BASE(a)	(0x4110 | (a) << 17)
+#define NIX_AF_LFX_CINTS_CFG(a)		(0x4120 | (a) << 17)
+#define NIX_AF_LFX_CINTS_BASE(a)	(0x4130 | (a) << 17)
+#define NIX_AF_LFX_RX_IPSEC_CFG0(a)	(0x4140 | (a) << 17)
+#define NIX_AF_LFX_RX_IPSEC_CFG1(a)	(0x4148 | (a) << 17)
+#define NIX_AF_LFX_RX_IPSEC_DYNO_CFG(a)	(0x4150 | (a) << 17)
+#define NIX_AF_LFX_RX_IPSEC_DYNO_BASE(a)	(0x4158 | (a) << 17)
+#define NIX_AF_LFX_RX_IPSEC_SA_BASE(a)	(0x4170 | (a) << 17)
+#define NIX_AF_LFX_TX_STATUS(a)		(0x4180 | (a) << 17)
+#define NIX_AF_LFX_RX_VTAG_TYPEX(a, b)	(0x4200 | (a) << 17 | (b) << 3)
+#define NIX_AF_LFX_LOCKX(a, b)		(0x4300 | (a) << 17 | (b) << 3)
+#define NIX_AF_LFX_TX_STATX(a, b)	(0x4400 | (a) << 17 | (b) << 3)
+#define NIX_AF_LFX_RX_STATX(a, b)	(0x4500 | (a) << 17 | (b) << 3)
+#define NIX_AF_LFX_RSS_GRPX(a, b)	(0x4600 | (a) << 17 | (b) << 3)
+#define NIX_AF_RX_NPC_MC_RCV		(0x4700)
+#define NIX_AF_RX_NPC_MC_DROP		(0x4710)
+#define NIX_AF_RX_NPC_MIRROR_RCV	(0x4720)
+#define NIX_AF_RX_NPC_MIRROR_DROP	(0x4730)
+#define NIX_AF_RX_ACTIVE_CYCLES_PCX(a)	(0x4800 | (a) << 16)
+
+#define NIX_PRIV_AF_INT_CFG		(0x8000000)
+#define NIX_PRIV_LFX_CFG		(0x8000010)
+#define NIX_PRIV_LFX_INT_CFG		(0x8000020)
+#define NIX_AF_RVU_LF_CFG_DEBUG		(0x8000030)
+
+/* SSO */
+#define SSO_AF_CONST			(0x1000)
+#define SSO_AF_CONST1			(0x1008)
+#define SSO_AF_BLK_RST			(0x10f8)
+#define SSO_AF_LF_HWGRP_RST		(0x10e0)
+#define SSO_AF_RVU_LF_CFG_DEBUG		(0x3800)
+#define SSO_PRIV_LFX_HWGRP_CFG		(0x10000)
+#define SSO_PRIV_LFX_HWGRP_INT_CFG	(0x20000)
+
+/* SSOW */
+#define SSOW_AF_RVU_LF_HWS_CFG_DEBUG	(0x0010)
+#define SSOW_AF_LF_HWS_RST		(0x0030)
+#define SSOW_PRIV_LFX_HWS_CFG		(0x1000)
+#define SSOW_PRIV_LFX_HWS_INT_CFG	(0x2000)
+
+/* TIM */
+#define TIM_AF_CONST			(0x90)
+#define TIM_PRIV_LFX_CFG		(0x20000)
+#define TIM_PRIV_LFX_INT_CFG		(0x24000)
+#define TIM_AF_RVU_LF_CFG_DEBUG		(0x30000)
+#define TIM_AF_BLK_RST			(0x10)
+#define TIM_AF_LF_RST			(0x20)
+
+/* CPT */
+#define CPT_AF_CONSTANTS0		(0x0000)
+#define CPT_PRIV_LFX_CFG		(0x41000)
+#define CPT_PRIV_LFX_INT_CFG		(0x43000)
+#define CPT_AF_RVU_LF_CFG_DEBUG		(0x45000)
+#define CPT_AF_LF_RST			(0x44000)
+#define CPT_AF_BLK_RST			(0x46000)
+
+#define NDC_AF_BLK_RST                  (0x002F0)
+#define NPC_AF_BLK_RST                  (0x00040)
+
+/* NPC */
+#define NPC_AF_CFG			(0x00000)
+#define NPC_AF_ACTIVE_PC		(0x00010)
+#define NPC_AF_CONST			(0x00020)
+#define NPC_AF_CONST1			(0x00030)
+#define NPC_AF_BLK_RST			(0x00040)
+#define NPC_AF_MCAM_SCRUB_CTL		(0x000a0)
+#define NPC_AF_KCAM_SCRUB_CTL		(0x000b0)
+#define NPC_AF_KPUX_CFG(a)		(0x00500 | (a) << 3)
+#define NPC_AF_PCK_CFG			(0x00600)
+#define NPC_AF_PCK_DEF_OL2		(0x00610)
+#define NPC_AF_PCK_DEF_OIP4		(0x00620)
+#define NPC_AF_PCK_DEF_OIP6		(0x00630)
+#define NPC_AF_PCK_DEF_IIP4		(0x00640)
+#define NPC_AF_KEX_LDATAX_FLAGS_CFG(a)	(0x00800 | (a) << 3)
+#define NPC_AF_INTFX_KEX_CFG(a)		(0x01010 | (a) << 8)
+#define NPC_AF_PKINDX_ACTION0(a)	(0x80000ull | (a) << 6)
+#define NPC_AF_PKINDX_ACTION1(a)	(0x80008ull | (a) << 6)
+#define NPC_AF_PKINDX_CPI_DEFX(a, b)	(0x80020ull | (a) << 6 | (b) << 3)
+#define NPC_AF_KPUX_ENTRYX_CAMX(a, b, c) \
+		(0x100000 | (a) << 14 | (b) << 6 | (c) << 3)
+#define NPC_AF_KPUX_ENTRYX_ACTION0(a, b) \
+		(0x100020 | (a) << 14 | (b) << 6)
+#define NPC_AF_KPUX_ENTRYX_ACTION1(a, b) \
+		(0x100028 | (a) << 14 | (b) << 6)
+#define NPC_AF_KPUX_ENTRY_DISX(a, b)	(0x180000 | (a) << 6 | (b) << 3)
+#define NPC_AF_CPIX_CFG(a)		(0x200000 | (a) << 3)
+#define NPC_AF_INTFX_LIDX_LTX_LDX_CFG(a, b, c, d) \
+		(0x900000 | (a) << 16 | (b) << 12 | (c) << 5 | (d) << 3)
+#define NPC_AF_INTFX_LDATAX_FLAGSX_CFG(a, b, c) \
+		(0x980000 | (a) << 16 | (b) << 12 | (c) << 3)
+#define NPC_AF_MCAMEX_BANKX_CAMX_INTF(a, b, c)       \
+		(0x1000000ull | (a) << 10 | (b) << 6 | (c) << 3)
+#define NPC_AF_MCAMEX_BANKX_CAMX_W0(a, b, c)         \
+		(0x1000010ull | (a) << 10 | (b) << 6 | (c) << 3)
+#define NPC_AF_MCAMEX_BANKX_CAMX_W1(a, b, c)         \
+		(0x1000020ull | (a) << 10 | (b) << 6 | (c) << 3)
+#define NPC_AF_MCAMEX_BANKX_CFG(a, b)	 (0x1800000ull | (a) << 8 | (b) << 4)
+#define NPC_AF_MCAMEX_BANKX_STAT_ACT(a, b) \
+		(0x1880000 | (a) << 8 | (b) << 4)
+#define NPC_AF_MATCH_STATX(a)		(0x1880008 | (a) << 8)
+#define NPC_AF_INTFX_MISS_STAT_ACT(a)	(0x1880040 + (a) * 0x8)
+#define NPC_AF_MCAMEX_BANKX_ACTION(a, b) (0x1900000ull | (a) << 8 | (b) << 4)
+#define NPC_AF_MCAMEX_BANKX_TAG_ACT(a, b) \
+		(0x1900008 | (a) << 8 | (b) << 4)
+#define NPC_AF_INTFX_MISS_ACT(a)	(0x1a00000 | (a) << 4)
+#define NPC_AF_INTFX_MISS_TAG_ACT(a)	(0x1b00008 | (a) << 4)
+#define NPC_AF_MCAM_BANKX_HITX(a, b)	(0x1c80000 | (a) << 8 | (b) << 4)
+#define NPC_AF_LKUP_CTL			(0x2000000)
+#define NPC_AF_LKUP_DATAX(a)		(0x2000200 | (a) << 4)
+#define NPC_AF_LKUP_RESULTX(a)		(0x2000400 | (a) << 4)
+#define NPC_AF_INTFX_STAT(a)		(0x2000800 | (a) << 4)
+#define NPC_AF_DBG_CTL			(0x3000000)
+#define NPC_AF_DBG_STATUS		(0x3000010)
+#define NPC_AF_KPUX_DBG(a)		(0x3000020 | (a) << 8)
+#define NPC_AF_IKPU_ERR_CTL		(0x3000080)
+#define NPC_AF_KPUX_ERR_CTL(a)		(0x30000a0 | (a) << 8)
+#define NPC_AF_MCAM_DBG			(0x3001000)
+#define NPC_AF_DBG_DATAX(a)		(0x3001400 | (a) << 4)
+#define NPC_AF_DBG_RESULTX(a)		(0x3001800 | (a) << 4)
+
+#endif /* RVU_REG_H */

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
new file mode 100644
index 0000000..84a3906
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h

@@ -0,0 +1,917 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef RVU_STRUCT_H
+#define RVU_STRUCT_H
+
+/* RVU Block Address Enumeration */
+enum rvu_block_addr_e {
+	BLKADDR_RVUM    = 0x0ULL,
+	BLKADDR_LMT     = 0x1ULL,
+	BLKADDR_MSIX    = 0x2ULL,
+	BLKADDR_NPA     = 0x3ULL,
+	BLKADDR_NIX0    = 0x4ULL,
+	BLKADDR_NIX1    = 0x5ULL,
+	BLKADDR_NPC     = 0x6ULL,
+	BLKADDR_SSO     = 0x7ULL,
+	BLKADDR_SSOW    = 0x8ULL,
+	BLKADDR_TIM     = 0x9ULL,
+	BLKADDR_CPT0    = 0xaULL,
+	BLKADDR_CPT1    = 0xbULL,
+	BLKADDR_NDC0    = 0xcULL,
+	BLKADDR_NDC1    = 0xdULL,
+	BLKADDR_NDC2    = 0xeULL,
+	BLK_COUNT	= 0xfULL,
+};
+
+/* RVU Block Type Enumeration */
+enum rvu_block_type_e {
+	BLKTYPE_RVUM = 0x0,
+	BLKTYPE_MSIX = 0x1,
+	BLKTYPE_LMT  = 0x2,
+	BLKTYPE_NIX  = 0x3,
+	BLKTYPE_NPA  = 0x4,
+	BLKTYPE_NPC  = 0x5,
+	BLKTYPE_SSO  = 0x6,
+	BLKTYPE_SSOW = 0x7,
+	BLKTYPE_TIM  = 0x8,
+	BLKTYPE_CPT  = 0x9,
+	BLKTYPE_NDC  = 0xa,
+	BLKTYPE_MAX  = 0xa,
+};
+
+/* RVU Admin function Interrupt Vector Enumeration */
+enum rvu_af_int_vec_e {
+	RVU_AF_INT_VEC_POISON = 0x0,
+	RVU_AF_INT_VEC_PFFLR  = 0x1,
+	RVU_AF_INT_VEC_PFME   = 0x2,
+	RVU_AF_INT_VEC_GEN    = 0x3,
+	RVU_AF_INT_VEC_MBOX   = 0x4,
+	RVU_AF_INT_VEC_CNT    = 0x5,
+};
+
+/**
+ * RVU PF Interrupt Vector Enumeration
+ */
+enum rvu_pf_int_vec_e {
+	RVU_PF_INT_VEC_VFFLR0     = 0x0,
+	RVU_PF_INT_VEC_VFFLR1     = 0x1,
+	RVU_PF_INT_VEC_VFME0      = 0x2,
+	RVU_PF_INT_VEC_VFME1      = 0x3,
+	RVU_PF_INT_VEC_VFPF_MBOX0 = 0x4,
+	RVU_PF_INT_VEC_VFPF_MBOX1 = 0x5,
+	RVU_PF_INT_VEC_AFPF_MBOX  = 0x6,
+	RVU_PF_INT_VEC_CNT	  = 0x7,
+};
+
+/* NPA admin queue completion enumeration */
+enum npa_aq_comp {
+	NPA_AQ_COMP_NOTDONE    = 0x0,
+	NPA_AQ_COMP_GOOD       = 0x1,
+	NPA_AQ_COMP_SWERR      = 0x2,
+	NPA_AQ_COMP_CTX_POISON = 0x3,
+	NPA_AQ_COMP_CTX_FAULT  = 0x4,
+	NPA_AQ_COMP_LOCKERR    = 0x5,
+};
+
+/* NPA admin queue context types */
+enum npa_aq_ctype {
+	NPA_AQ_CTYPE_AURA = 0x0,
+	NPA_AQ_CTYPE_POOL = 0x1,
+};
+
+/* NPA admin queue instruction opcodes */
+enum npa_aq_instop {
+	NPA_AQ_INSTOP_NOP    = 0x0,
+	NPA_AQ_INSTOP_INIT   = 0x1,
+	NPA_AQ_INSTOP_WRITE  = 0x2,
+	NPA_AQ_INSTOP_READ   = 0x3,
+	NPA_AQ_INSTOP_LOCK   = 0x4,
+	NPA_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NPA admin queue instruction structure */
+struct npa_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 doneint               : 1;	/* W0 */
+	u64 reserved_44_62        : 19;
+	u64 cindex                : 20;
+	u64 reserved_17_23        : 7;
+	u64 lf                    : 9;
+	u64 ctype                 : 4;
+	u64 op                    : 4;
+#else
+	u64 op                    : 4;
+	u64 ctype                 : 4;
+	u64 lf                    : 9;
+	u64 reserved_17_23        : 7;
+	u64 cindex                : 20;
+	u64 reserved_44_62        : 19;
+	u64 doneint               : 1;
+#endif
+	u64 res_addr;			/* W1 */
+};
+
+/* NPA admin queue result structure */
+struct npa_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 reserved_17_63        : 47; /* W0 */
+	u64 doneint               : 1;
+	u64 compcode              : 8;
+	u64 ctype                 : 4;
+	u64 op                    : 4;
+#else
+	u64 op                    : 4;
+	u64 ctype                 : 4;
+	u64 compcode              : 8;
+	u64 doneint               : 1;
+	u64 reserved_17_63        : 47;
+#endif
+	u64 reserved_64_127;		/* W1 */
+};
+
+struct npa_aura_s {
+	u64 pool_addr;			/* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 avg_level             : 8;
+	u64 reserved_118_119      : 2;
+	u64 shift                 : 6;
+	u64 aura_drop             : 8;
+	u64 reserved_98_103       : 6;
+	u64 bp_ena                : 2;
+	u64 aura_drop_ena         : 1;
+	u64 pool_drop_ena         : 1;
+	u64 reserved_93           : 1;
+	u64 avg_con               : 9;
+	u64 pool_way_mask         : 16;
+	u64 pool_caching          : 1;
+	u64 reserved_65           : 2;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 reserved_65           : 2;
+	u64 pool_caching          : 1;
+	u64 pool_way_mask         : 16;
+	u64 avg_con               : 9;
+	u64 reserved_93           : 1;
+	u64 pool_drop_ena         : 1;
+	u64 aura_drop_ena         : 1;
+	u64 bp_ena                : 2;
+	u64 reserved_98_103       : 6;
+	u64 aura_drop             : 8;
+	u64 shift                 : 6;
+	u64 reserved_118_119      : 2;
+	u64 avg_level             : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W2 */
+	u64 reserved_189_191      : 3;
+	u64 nix1_bpid             : 9;
+	u64 reserved_177_179      : 3;
+	u64 nix0_bpid             : 9;
+	u64 reserved_164_167      : 4;
+	u64 count                 : 36;
+#else
+	u64 count                 : 36;
+	u64 reserved_164_167      : 4;
+	u64 nix0_bpid             : 9;
+	u64 reserved_177_179      : 3;
+	u64 nix1_bpid             : 9;
+	u64 reserved_189_191      : 3;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W3 */
+	u64 reserved_252_255      : 4;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_stype              : 2;
+	u64 fc_up_crossing        : 1;
+	u64 fc_ena                : 1;
+	u64 reserved_240_243      : 4;
+	u64 bp                    : 8;
+	u64 reserved_228_231      : 4;
+	u64 limit                 : 36;
+#else
+	u64 limit                 : 36;
+	u64 reserved_228_231      : 4;
+	u64 bp                    : 8;
+	u64 reserved_240_243      : 4;
+	u64 fc_ena                : 1;
+	u64 fc_up_crossing        : 1;
+	u64 fc_stype              : 2;
+	u64 fc_hyst_bits          : 4;
+	u64 reserved_252_255      : 4;
+#endif
+	u64 fc_addr;			/* W4 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W5 */
+	u64 reserved_379_383      : 5;
+	u64 err_qint_idx          : 7;
+	u64 reserved_371          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_363          : 1;
+	u64 thresh_up             : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_int            : 1;
+	u64 err_int_ena           : 8;
+	u64 err_int               : 8;
+	u64 update_time           : 16;
+	u64 pool_drop             : 8;
+#else
+	u64 pool_drop             : 8;
+	u64 update_time           : 16;
+	u64 err_int               : 8;
+	u64 err_int_ena           : 8;
+	u64 thresh_int            : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_up             : 1;
+	u64 reserved_363          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_371          : 1;
+	u64 err_qint_idx          : 7;
+	u64 reserved_379_383      : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W6 */
+	u64 reserved_420_447      : 28;
+	u64 thresh                : 36;
+#else
+	u64 thresh                : 36;
+	u64 reserved_420_447      : 28;
+#endif
+	u64 reserved_448_511;		/* W7 */
+};
+
+struct npa_pool_s {
+	u64 stack_base;			/* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 reserved_115_127      : 13;
+	u64 buf_size              : 11;
+	u64 reserved_100_103      : 4;
+	u64 buf_offset            : 12;
+	u64 stack_way_mask        : 16;
+	u64 reserved_70_71        : 3;
+	u64 stack_caching         : 1;
+	u64 reserved_66_67        : 2;
+	u64 nat_align             : 1;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 nat_align             : 1;
+	u64 reserved_66_67        : 2;
+	u64 stack_caching         : 1;
+	u64 reserved_70_71        : 3;
+	u64 stack_way_mask        : 16;
+	u64 buf_offset            : 12;
+	u64 reserved_100_103      : 4;
+	u64 buf_size              : 11;
+	u64 reserved_115_127      : 13;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W2 */
+	u64 stack_pages           : 32;
+	u64 stack_max_pages       : 32;
+#else
+	u64 stack_max_pages       : 32;
+	u64 stack_pages           : 32;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W3 */
+	u64 reserved_240_255      : 16;
+	u64 op_pc                 : 48;
+#else
+	u64 op_pc                 : 48;
+	u64 reserved_240_255      : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W4 */
+	u64 reserved_316_319      : 4;
+	u64 update_time           : 16;
+	u64 reserved_297_299      : 3;
+	u64 fc_up_crossing        : 1;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_stype              : 2;
+	u64 fc_ena                : 1;
+	u64 avg_con               : 9;
+	u64 avg_level             : 8;
+	u64 reserved_270_271      : 2;
+	u64 shift                 : 6;
+	u64 reserved_260_263      : 4;
+	u64 stack_offset          : 4;
+#else
+	u64 stack_offset          : 4;
+	u64 reserved_260_263      : 4;
+	u64 shift                 : 6;
+	u64 reserved_270_271      : 2;
+	u64 avg_level             : 8;
+	u64 avg_con               : 9;
+	u64 fc_ena                : 1;
+	u64 fc_stype              : 2;
+	u64 fc_hyst_bits          : 4;
+	u64 fc_up_crossing        : 1;
+	u64 reserved_297_299      : 3;
+	u64 update_time           : 16;
+	u64 reserved_316_319      : 4;
+#endif
+	u64 fc_addr;			/* W5 */
+	u64 ptr_start;			/* W6 */
+	u64 ptr_end;			/* W7 */
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W8 */
+	u64 reserved_571_575      : 5;
+	u64 err_qint_idx          : 7;
+	u64 reserved_563          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_555          : 1;
+	u64 thresh_up             : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_int            : 1;
+	u64 err_int_ena           : 8;
+	u64 err_int               : 8;
+	u64 reserved_512_535      : 24;
+#else
+	u64 reserved_512_535      : 24;
+	u64 err_int               : 8;
+	u64 err_int_ena           : 8;
+	u64 thresh_int            : 1;
+	u64 thresh_int_ena        : 1;
+	u64 thresh_up             : 1;
+	u64 reserved_555          : 1;
+	u64 thresh_qint_idx       : 7;
+	u64 reserved_563          : 1;
+	u64 err_qint_idx          : 7;
+	u64 reserved_571_575      : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W9 */
+	u64 reserved_612_639      : 28;
+	u64 thresh                : 36;
+#else
+	u64 thresh                : 36;
+	u64 reserved_612_639      : 28;
+#endif
+	u64 reserved_640_703;		/* W10 */
+	u64 reserved_704_767;		/* W11 */
+	u64 reserved_768_831;		/* W12 */
+	u64 reserved_832_895;		/* W13 */
+	u64 reserved_896_959;		/* W14 */
+	u64 reserved_960_1023;		/* W15 */
+};
+
+/* NIX admin queue completion status */
+enum nix_aq_comp {
+	NIX_AQ_COMP_NOTDONE        = 0x0,
+	NIX_AQ_COMP_GOOD           = 0x1,
+	NIX_AQ_COMP_SWERR          = 0x2,
+	NIX_AQ_COMP_CTX_POISON     = 0x3,
+	NIX_AQ_COMP_CTX_FAULT      = 0x4,
+	NIX_AQ_COMP_LOCKERR        = 0x5,
+	NIX_AQ_COMP_SQB_ALLOC_FAIL = 0x6,
+};
+
+/* NIX admin queue context types */
+enum nix_aq_ctype {
+	NIX_AQ_CTYPE_RQ   = 0x0,
+	NIX_AQ_CTYPE_SQ   = 0x1,
+	NIX_AQ_CTYPE_CQ   = 0x2,
+	NIX_AQ_CTYPE_MCE  = 0x3,
+	NIX_AQ_CTYPE_RSS  = 0x4,
+	NIX_AQ_CTYPE_DYNO = 0x5,
+};
+
+/* NIX admin queue instruction opcodes */
+enum nix_aq_instop {
+	NIX_AQ_INSTOP_NOP    = 0x0,
+	NIX_AQ_INSTOP_INIT   = 0x1,
+	NIX_AQ_INSTOP_WRITE  = 0x2,
+	NIX_AQ_INSTOP_READ   = 0x3,
+	NIX_AQ_INSTOP_LOCK   = 0x4,
+	NIX_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NIX admin queue instruction structure */
+struct nix_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 doneint		: 1;	/* W0 */
+	u64 reserved_44_62	: 19;
+	u64 cindex		: 20;
+	u64 reserved_15_23	: 9;
+	u64 lf			: 7;
+	u64 ctype		: 4;
+	u64 op			: 4;
+#else
+	u64 op			: 4;
+	u64 ctype		: 4;
+	u64 lf			: 7;
+	u64 reserved_15_23	: 9;
+	u64 cindex		: 20;
+	u64 reserved_44_62	: 19;
+	u64 doneint		: 1;
+#endif
+	u64 res_addr;			/* W1 */
+};
+
+/* NIX admin queue result structure */
+struct nix_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 reserved_17_63	: 47;	/* W0 */
+	u64 doneint		: 1;
+	u64 compcode		: 8;
+	u64 ctype		: 4;
+	u64 op			: 4;
+#else
+	u64 op			: 4;
+	u64 ctype		: 4;
+	u64 compcode		: 8;
+	u64 doneint		: 1;
+	u64 reserved_17_63	: 47;
+#endif
+	u64 reserved_64_127;		/* W1 */
+};
+
+/* NIX Completion queue context structure */
+struct nix_cq_ctx_s {
+	u64 base;
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W1 */
+	u64 wrptr		: 20;
+	u64 avg_con		: 9;
+	u64 cint_idx		: 7;
+	u64 cq_err		: 1;
+	u64 qint_idx		: 7;
+	u64 rsvd_81_83		: 3;
+	u64 bpid		: 9;
+	u64 rsvd_69_71		: 3;
+	u64 bp_ena		: 1;
+	u64 rsvd_64_67		: 4;
+#else
+	u64 rsvd_64_67		: 4;
+	u64 bp_ena		: 1;
+	u64 rsvd_69_71		: 3;
+	u64 bpid		: 9;
+	u64 rsvd_81_83		: 3;
+	u64 qint_idx		: 7;
+	u64 cq_err		: 1;
+	u64 cint_idx		: 7;
+	u64 avg_con		: 9;
+	u64 wrptr		: 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 update_time		: 16;
+	u64 avg_level		: 8;
+	u64 head		: 20;
+	u64 tail		: 20;
+#else
+	u64 tail		: 20;
+	u64 head		: 20;
+	u64 avg_level		: 8;
+	u64 update_time		: 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 cq_err_int_ena	: 8;
+	u64 cq_err_int		: 8;
+	u64 qsize		: 4;
+	u64 rsvd_233_235	: 3;
+	u64 caching		: 1;
+	u64 substream		: 20;
+	u64 rsvd_210_211	: 2;
+	u64 ena			: 1;
+	u64 drop_ena		: 1;
+	u64 drop		: 8;
+	u64 dp			: 8;
+#else
+	u64 dp			: 8;
+	u64 drop		: 8;
+	u64 drop_ena		: 1;
+	u64 ena			: 1;
+	u64 rsvd_210_211	: 2;
+	u64 substream		: 20;
+	u64 caching		: 1;
+	u64 rsvd_233_235	: 3;
+	u64 qsize		: 4;
+	u64 cq_err_int		: 8;
+	u64 cq_err_int_ena	: 8;
+#endif
+};
+
+/* NIX Receive queue context structure */
+struct nix_rq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	u64 wqe_aura      : 20;
+	u64 substream     : 20;
+	u64 cq            : 20;
+	u64 ena_wqwd      : 1;
+	u64 ipsech_ena    : 1;
+	u64 sso_ena       : 1;
+	u64 ena           : 1;
+#else
+	u64 ena           : 1;
+	u64 sso_ena       : 1;
+	u64 ipsech_ena    : 1;
+	u64 ena_wqwd      : 1;
+	u64 cq            : 20;
+	u64 substream     : 20;
+	u64 wqe_aura      : 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W1 */
+	u64 rsvd_127_122  : 6;
+	u64 lpb_drop_ena  : 1;
+	u64 spb_drop_ena  : 1;
+	u64 xqe_drop_ena  : 1;
+	u64 wqe_caching   : 1;
+	u64 pb_caching    : 2;
+	u64 sso_tt        : 2;
+	u64 sso_grp       : 10;
+	u64 lpb_aura      : 20;
+	u64 spb_aura      : 20;
+#else
+	u64 spb_aura      : 20;
+	u64 lpb_aura      : 20;
+	u64 sso_grp       : 10;
+	u64 sso_tt        : 2;
+	u64 pb_caching    : 2;
+	u64 wqe_caching   : 1;
+	u64 xqe_drop_ena  : 1;
+	u64 spb_drop_ena  : 1;
+	u64 lpb_drop_ena  : 1;
+	u64 rsvd_127_122  : 6;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 xqe_hdr_split : 1;
+	u64 xqe_imm_copy  : 1;
+	u64 rsvd_189_184  : 6;
+	u64 xqe_imm_size  : 6;
+	u64 later_skip    : 6;
+	u64 rsvd_171      : 1;
+	u64 first_skip    : 7;
+	u64 lpb_sizem1    : 12;
+	u64 spb_ena       : 1;
+	u64 rsvd_150_148  : 3;
+	u64 wqe_skip      : 2;
+	u64 spb_sizem1    : 6;
+	u64 rsvd_139_128  : 12;
+#else
+	u64 rsvd_139_128  : 12;
+	u64 spb_sizem1    : 6;
+	u64 wqe_skip      : 2;
+	u64 rsvd_150_148  : 3;
+	u64 spb_ena       : 1;
+	u64 lpb_sizem1    : 12;
+	u64 first_skip    : 7;
+	u64 rsvd_171      : 1;
+	u64 later_skip    : 6;
+	u64 xqe_imm_size  : 6;
+	u64 rsvd_189_184  : 6;
+	u64 xqe_imm_copy  : 1;
+	u64 xqe_hdr_split : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 spb_pool_pass : 8;
+	u64 spb_pool_drop : 8;
+	u64 spb_aura_pass : 8;
+	u64 spb_aura_drop : 8;
+	u64 wqe_pool_pass : 8;
+	u64 wqe_pool_drop : 8;
+	u64 xqe_pass      : 8;
+	u64 xqe_drop      : 8;
+#else
+	u64 xqe_drop      : 8;
+	u64 xqe_pass      : 8;
+	u64 wqe_pool_drop : 8;
+	u64 wqe_pool_pass : 8;
+	u64 spb_aura_drop : 8;
+	u64 spb_aura_pass : 8;
+	u64 spb_pool_drop : 8;
+	u64 spb_pool_pass : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W4 */
+	u64 rsvd_319_315  : 5;
+	u64 qint_idx      : 7;
+	u64 rq_int_ena    : 8;
+	u64 rq_int        : 8;
+	u64 rsvd_291_288  : 4;
+	u64 lpb_pool_pass : 8;
+	u64 lpb_pool_drop : 8;
+	u64 lpb_aura_pass : 8;
+	u64 lpb_aura_drop : 8;
+#else
+	u64 lpb_aura_drop : 8;
+	u64 lpb_aura_pass : 8;
+	u64 lpb_pool_drop : 8;
+	u64 lpb_pool_pass : 8;
+	u64 rsvd_291_288  : 4;
+	u64 rq_int        : 8;
+	u64 rq_int_ena    : 8;
+	u64 qint_idx      : 7;
+	u64 rsvd_319_315  : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W5 */
+	u64 rsvd_383_366  : 18;
+	u64 flow_tagw     : 6;
+	u64 bad_utag      : 8;
+	u64 good_utag     : 8;
+	u64 ltag          : 24;
+#else
+	u64 ltag          : 24;
+	u64 good_utag     : 8;
+	u64 bad_utag      : 8;
+	u64 flow_tagw     : 6;
+	u64 rsvd_383_366  : 18;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W6 */
+	u64 rsvd_447_432  : 16;
+	u64 octs          : 48;
+#else
+	u64 octs          : 48;
+	u64 rsvd_447_432  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W7 */
+	u64 rsvd_511_496  : 16;
+	u64 pkts          : 48;
+#else
+	u64 pkts          : 48;
+	u64 rsvd_511_496  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W8 */
+	u64 rsvd_575_560  : 16;
+	u64 drop_octs     : 48;
+#else
+	u64 drop_octs     : 48;
+	u64 rsvd_575_560  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W9 */
+	u64 rsvd_639_624  : 16;
+	u64 drop_pkts     : 48;
+#else
+	u64 drop_pkts     : 48;
+	u64 rsvd_639_624  : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)	/* W10 */
+	u64 rsvd_703_688  : 16;
+	u64 re_pkts       : 48;
+#else
+	u64 re_pkts       : 48;
+	u64 rsvd_703_688  : 16;
+#endif
+	u64 rsvd_767_704;		/* W11 */
+	u64 rsvd_831_768;		/* W12 */
+	u64 rsvd_895_832;		/* W13 */
+	u64 rsvd_959_896;		/* W14 */
+	u64 rsvd_1023_960;		/* W15 */
+};
+
+/* NIX sqe sizes */
+enum nix_maxsqesz {
+	NIX_MAXSQESZ_W16 = 0x0,
+	NIX_MAXSQESZ_W8  = 0x1,
+};
+
+/* NIX SQB caching type */
+enum nix_stype {
+	NIX_STYPE_STF = 0x0,
+	NIX_STYPE_STT = 0x1,
+	NIX_STYPE_STP = 0x2,
+};
+
+/* NIX Send queue context structure */
+struct nix_sq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	u64 sqe_way_mask          : 16;
+	u64 cq                    : 20;
+	u64 sdp_mcast             : 1;
+	u64 substream             : 20;
+	u64 qint_idx              : 6;
+	u64 ena                   : 1;
+#else
+	u64 ena                   : 1;
+	u64 qint_idx              : 6;
+	u64 substream             : 20;
+	u64 sdp_mcast             : 1;
+	u64 cq                    : 20;
+	u64 sqe_way_mask          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W1 */
+	u64 sqb_count             : 16;
+	u64 default_chan          : 12;
+	u64 smq_rr_quantum        : 24;
+	u64 sso_ena               : 1;
+	u64 xoff                  : 1;
+	u64 cq_ena                : 1;
+	u64 smq                   : 9;
+#else
+	u64 smq                   : 9;
+	u64 cq_ena                : 1;
+	u64 xoff                  : 1;
+	u64 sso_ena               : 1;
+	u64 smq_rr_quantum        : 24;
+	u64 default_chan          : 12;
+	u64 sqb_count             : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W2 */
+	u64 rsvd_191              : 1;
+	u64 sqe_stype             : 2;
+	u64 sq_int_ena            : 8;
+	u64 sq_int                : 8;
+	u64 sqb_aura              : 20;
+	u64 smq_rr_count          : 25;
+#else
+	u64 smq_rr_count          : 25;
+	u64 sqb_aura              : 20;
+	u64 sq_int                : 8;
+	u64 sq_int_ena            : 8;
+	u64 sqe_stype             : 2;
+	u64 rsvd_191              : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W3 */
+	u64 rsvd_255_253          : 3;
+	u64 smq_next_sq_vld       : 1;
+	u64 smq_pend              : 1;
+	u64 smenq_next_sqb_vld    : 1;
+	u64 head_offset           : 6;
+	u64 smenq_offset          : 6;
+	u64 tail_offset           : 6;
+	u64 smq_lso_segnum        : 8;
+	u64 smq_next_sq           : 20;
+	u64 mnq_dis               : 1;
+	u64 lmt_dis               : 1;
+	u64 cq_limit              : 8;
+	u64 max_sqe_size          : 2;
+#else
+	u64 max_sqe_size          : 2;
+	u64 cq_limit              : 8;
+	u64 lmt_dis               : 1;
+	u64 mnq_dis               : 1;
+	u64 smq_next_sq           : 20;
+	u64 smq_lso_segnum        : 8;
+	u64 tail_offset           : 6;
+	u64 smenq_offset          : 6;
+	u64 head_offset           : 6;
+	u64 smenq_next_sqb_vld    : 1;
+	u64 smq_pend              : 1;
+	u64 smq_next_sq_vld       : 1;
+	u64 rsvd_255_253          : 3;
+#endif
+	u64 next_sqb              : 64;/* W4 */
+	u64 tail_sqb              : 64;/* W5 */
+	u64 smenq_sqb             : 64;/* W6 */
+	u64 smenq_next_sqb        : 64;/* W7 */
+	u64 head_sqb              : 64;/* W8 */
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W9 */
+	u64 rsvd_639_630          : 10;
+	u64 vfi_lso_vld           : 1;
+	u64 vfi_lso_vlan1_ins_ena : 1;
+	u64 vfi_lso_vlan0_ins_ena : 1;
+	u64 vfi_lso_mps           : 14;
+	u64 vfi_lso_sb            : 8;
+	u64 vfi_lso_sizem1        : 3;
+	u64 vfi_lso_total         : 18;
+	u64 rsvd_583_576          : 8;
+#else
+	u64 rsvd_583_576          : 8;
+	u64 vfi_lso_total         : 18;
+	u64 vfi_lso_sizem1        : 3;
+	u64 vfi_lso_sb            : 8;
+	u64 vfi_lso_mps           : 14;
+	u64 vfi_lso_vlan0_ins_ena : 1;
+	u64 vfi_lso_vlan1_ins_ena : 1;
+	u64 vfi_lso_vld           : 1;
+	u64 rsvd_639_630          : 10;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */
+	u64 rsvd_703_658          : 46;
+	u64 scm_lso_rem           : 18;
+#else
+	u64 scm_lso_rem           : 18;
+	u64 rsvd_703_658          : 46;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */
+	u64 rsvd_767_752          : 16;
+	u64 octs                  : 48;
+#else
+	u64 octs                  : 48;
+	u64 rsvd_767_752          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */
+	u64 rsvd_831_816          : 16;
+	u64 pkts                  : 48;
+#else
+	u64 pkts                  : 48;
+	u64 rsvd_831_816          : 16;
+#endif
+	u64 rsvd_895_832          : 64;/* W13 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */
+	u64 rsvd_959_944          : 16;
+	u64 dropped_octs          : 48;
+#else
+	u64 dropped_octs          : 48;
+	u64 rsvd_959_944          : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */
+	u64 rsvd_1023_1008        : 16;
+	u64 dropped_pkts          : 48;
+#else
+	u64 dropped_pkts          : 48;
+	u64 rsvd_1023_1008        : 16;
+#endif
+};
+
+/* NIX Receive side scaling entry structure*/
+struct nix_rsse_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	uint32_t reserved_20_31		: 12;
+	uint32_t rq			: 20;
+#else
+	uint32_t rq			: 20;
+	uint32_t reserved_20_31		: 12;
+
+#endif
+};
+
+/* NIX receive multicast/mirror entry structure */
+struct nix_rx_mce_s {
+#if defined(__BIG_ENDIAN_BITFIELD)  /* W0 */
+	uint64_t next       : 16;
+	uint64_t pf_func    : 16;
+	uint64_t rsvd_31_24 : 8;
+	uint64_t index      : 20;
+	uint64_t eol        : 1;
+	uint64_t rsvd_2     : 1;
+	uint64_t op         : 2;
+#else
+	uint64_t op         : 2;
+	uint64_t rsvd_2     : 1;
+	uint64_t eol        : 1;
+	uint64_t index      : 20;
+	uint64_t rsvd_31_24 : 8;
+	uint64_t pf_func    : 16;
+	uint64_t next       : 16;
+#endif
+};
+
+enum nix_lsoalg {
+	NIX_LSOALG_NOP,
+	NIX_LSOALG_ADD_SEGNUM,
+	NIX_LSOALG_ADD_PAYLEN,
+	NIX_LSOALG_ADD_OFFSET,
+	NIX_LSOALG_TCP_FLAGS,
+};
+
+enum nix_txlayer {
+	NIX_TXLAYER_OL3,
+	NIX_TXLAYER_OL4,
+	NIX_TXLAYER_IL3,
+	NIX_TXLAYER_IL4,
+};
+
+struct nix_lso_format {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 rsvd_19_63		: 45;
+	u64 alg			: 3;
+	u64 rsvd_14_15		: 2;
+	u64 sizem1		: 2;
+	u64 rsvd_10_11		: 2;
+	u64 layer		: 2;
+	u64 offset		: 8;
+#else
+	u64 offset		: 8;
+	u64 layer		: 2;
+	u64 rsvd_10_11		: 2;
+	u64 sizem1		: 2;
+	u64 rsvd_14_15		: 2;
+	u64 alg			: 3;
+	u64 rsvd_19_63		: 45;
+#endif
+};
+
+struct nix_rx_flowkey_alg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u64 reserved_35_63	:29;
+	u64 ltype_match		:4;
+	u64 ltype_mask		:4;
+	u64 sel_chan		:1;
+	u64 ena			:1;
+	u64 reserved_24_24	:1;
+	u64 lid			:3;
+	u64 bytesm1		:5;
+	u64 hdr_offset		:8;
+	u64 fn_mask		:1;
+	u64 ln_mask		:1;
+	u64 key_offset		:6;
+#else
+	u64 key_offset		:6;
+	u64 ln_mask		:1;
+	u64 fn_mask		:1;
+	u64 hdr_offset		:8;
+	u64 bytesm1		:5;
+	u64 lid			:3;
+	u64 reserved_24_24	:1;
+	u64 ena			:1;
+	u64 sel_chan		:1;
+	u64 ltype_mask		:4;
+	u64 ltype_match		:4;
+	u64 reserved_35_63	:29;
+#endif
+};
+
+/* NIX VTAG size */
+enum nix_vtag_size {
+	VTAGSIZE_T4   = 0x0,
+	VTAGSIZE_T8   = 0x1,
+};
+#endif /* RVU_STRUCT_H */

diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 3a97306..51b77c2 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PXA168 ethernet driver.
  * Most of the code is derived from mv643xx ethernet driver.
@@ -7,19 +8,6 @@
  *		Zhangfei Gao <zgao6@marvell.com>
  *		Philip Rakity <prakity@marvell.com>
  *		Mark Brown <markb@marvell.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/bitops.h>
@@ -201,6 +189,7 @@
 };
 
 struct pxa168_eth_private {
+	struct platform_device *pdev;
 	int port_num;		/* User Ethernet port number    */
 	int phy_addr;
 	int phy_speed;
@@ -331,7 +320,7 @@
 		used_rx_desc = pep->rx_used_desc_q;
 		p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc];
 		size = skb_end_pointer(skb) - skb->data;
-		p_used_rx_desc->buf_ptr = dma_map_single(NULL,
+		p_used_rx_desc->buf_ptr = dma_map_single(&pep->pdev->dev,
 							 skb->data,
 							 size,
 							 DMA_FROM_DEVICE);
@@ -557,9 +546,9 @@
 	 * table is full.
 	 */
 	if (!pep->htpr) {
-		pep->htpr = dma_zalloc_coherent(pep->dev->dev.parent,
-						HASH_ADDR_TABLE_SIZE,
-						&pep->htpr_dma, GFP_KERNEL);
+		pep->htpr = dma_alloc_coherent(pep->dev->dev.parent,
+					       HASH_ADDR_TABLE_SIZE,
+					       &pep->htpr_dma, GFP_KERNEL);
 		if (!pep->htpr)
 			return -ENOMEM;
 	} else {
@@ -743,7 +732,7 @@
 				netdev_err(dev, "Error in TX\n");
 			dev->stats.tx_errors++;
 		}
-		dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE);
+		dma_unmap_single(&pep->pdev->dev, addr, count, DMA_TO_DEVICE);
 		if (skb)
 			dev_kfree_skb_irq(skb);
 		released++;
@@ -805,7 +794,7 @@
 		if (rx_next_curr_desc == rx_used_desc)
 			pep->rx_resource_err = 1;
 		pep->rx_desc_count--;
-		dma_unmap_single(NULL, rx_desc->buf_ptr,
+		dma_unmap_single(&pep->pdev->dev, rx_desc->buf_ptr,
 				 rx_desc->buf_size,
 				 DMA_FROM_DEVICE);
 		received_packets++;
@@ -988,8 +977,8 @@
 	cmd.base.phy_address = pep->phy_addr;
 	cmd.base.speed = pep->phy_speed;
 	cmd.base.duplex = pep->phy_duplex;
-	ethtool_convert_legacy_u32_to_link_mode(cmd.link_modes.advertising,
-						PHY_BASIC_FEATURES);
+	bitmap_copy(cmd.link_modes.advertising, PHY_BASIC_FEATURES,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
 	cmd.base.autoneg = AUTONEG_ENABLE;
 
 	if (cmd.base.speed != 0)
@@ -1044,9 +1033,9 @@
 	pep->rx_desc_count = 0;
 	size = pep->rx_ring_size * sizeof(struct rx_desc);
 	pep->rx_desc_area_size = size;
-	pep->p_rx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size,
-						  &pep->rx_desc_dma,
-						  GFP_KERNEL);
+	pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						 &pep->rx_desc_dma,
+						 GFP_KERNEL);
 	if (!pep->p_rx_desc_area)
 		goto out;
 
@@ -1103,9 +1092,9 @@
 	pep->tx_desc_count = 0;
 	size = pep->tx_ring_size * sizeof(struct tx_desc);
 	pep->tx_desc_area_size = size;
-	pep->p_tx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size,
-						  &pep->tx_desc_dma,
-						  GFP_KERNEL);
+	pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						 &pep->tx_desc_dma,
+						 GFP_KERNEL);
 	if (!pep->p_tx_desc_area)
 		goto out;
 	/* Initialize the next_desc_ptr links in the Tx descriptors ring */
@@ -1260,7 +1249,8 @@
 	return work_done;
 }
 
-static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
@@ -1273,7 +1263,8 @@
 	length = skb->len;
 	pep->tx_skb[tx_index] = skb;
 	desc->byte_cnt = length;
-	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+	desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, length,
+					DMA_TO_DEVICE);
 
 	skb_tx_timestamp(skb);
 
@@ -1434,8 +1425,7 @@
 	pep->dev = dev;
 	pep->clk = clk;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pep->base = devm_ioremap_resource(&pdev->dev, res);
+	pep->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pep->base)) {
 		err = -ENOMEM;
 		goto err_netdev;
@@ -1458,7 +1448,7 @@
 	if (pdev->dev.of_node)
 		mac_addr = of_get_mac_address(pdev->dev.of_node);
 
-	if (mac_addr && is_valid_ether_addr(mac_addr)) {
+	if (!IS_ERR_OR_NULL(mac_addr)) {
 		ether_addr_copy(dev->dev_addr, mac_addr);
 	} else {
 		/* try reading the mac address, if set by the bootloader */
@@ -1527,6 +1517,7 @@
 	if (err)
 		goto err_free_mdio;
 
+	pep->pdev = pdev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	pxa168_init_hw(pep);
 	err = register_netdev(dev);

diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 9c08c36..095f6c7 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * New driver for Marvell Yukon chipset and SysKonnect Gigabit
  * Ethernet adapters. Based on earlier sk98lin, e100 and
@@ -8,19 +9,6 @@
  * those should be done at higher levels.
  *
  * Copyright (C) 2004, 2005 Stephen Hemminger <shemminger@osdl.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -152,8 +140,10 @@
 	memset(p, 0, regs->len);
 	memcpy_fromio(p, io, B3_RAM_ADDR);
 
-	memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1,
-		      regs->len - B3_RI_WTO_R1);
+	if (regs->len > B3_RI_WTO_R1) {
+		memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1,
+			      regs->len - B3_RI_WTO_R1);
+	}
 }
 
 /* Wake on Lan only supported on Yukon chips with rev 1 or above */
@@ -2568,8 +2558,6 @@
 		goto free_pci_mem;
 	}
 
-	memset(skge->mem, 0, skge->mem_size);
-
 	err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma);
 	if (err)
 		goto free_pci_mem;
@@ -3120,7 +3108,7 @@
 	skb_put(skb, len);
 
 	if (dev->features & NETIF_F_RXCSUM) {
-		skb->csum = csum;
+		skb->csum = le16_to_cpu(csum);
 		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 
@@ -3732,19 +3720,7 @@
 
 	return 0;
 }
-
-static int skge_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, skge_debug_show, inode->i_private);
-}
-
-static const struct file_operations skge_debug_fops = {
-	.owner		= THIS_MODULE,
-	.open		= skge_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(skge_debug);
 
 /*
  * Use network device events to create/remove/rename
@@ -3755,7 +3731,6 @@
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct skge_port *skge;
-	struct dentry *d;
 
 	if (dev->netdev_ops->ndo_open != &skge_up || !skge_debug)
 		goto done;
@@ -3763,33 +3738,20 @@
 	skge = netdev_priv(dev);
 	switch (event) {
 	case NETDEV_CHANGENAME:
-		if (skge->debugfs) {
-			d = debugfs_rename(skge_debug, skge->debugfs,
-					   skge_debug, dev->name);
-			if (d)
-				skge->debugfs = d;
-			else {
-				netdev_info(dev, "rename failed\n");
-				debugfs_remove(skge->debugfs);
-			}
-		}
+		if (skge->debugfs)
+			skge->debugfs = debugfs_rename(skge_debug,
+						       skge->debugfs,
+						       skge_debug, dev->name);
 		break;
 
 	case NETDEV_GOING_DOWN:
-		if (skge->debugfs) {
-			debugfs_remove(skge->debugfs);
-			skge->debugfs = NULL;
-		}
+		debugfs_remove(skge->debugfs);
+		skge->debugfs = NULL;
 		break;
 
 	case NETDEV_UP:
-		d = debugfs_create_file(dev->name, 0444,
-					skge_debug, dev,
-					&skge_debug_fops);
-		if (!d || IS_ERR(d))
-			netdev_info(dev, "debugfs create failed\n");
-		else
-			skge->debugfs = d;
+		skge->debugfs = debugfs_create_file(dev->name, 0444, skge_debug,
+						    dev, &skge_debug_fops);
 		break;
 	}
 
@@ -3804,15 +3766,8 @@
 
 static __init void skge_debug_init(void)
 {
-	struct dentry *ent;
+	skge_debug = debugfs_create_dir("skge", NULL);
 
-	ent = debugfs_create_dir("skge", NULL);
-	if (!ent || IS_ERR(ent)) {
-		pr_info("debugfs create directory failed\n");
-		return;
-	}
-
-	skge_debug = ent;
 	register_netdevice_notifier(&skge_notifier);
 }
 
@@ -4102,8 +4057,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int skge_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct skge_hw *hw  = pci_get_drvdata(pdev);
+	struct skge_hw *hw  = dev_get_drvdata(dev);
 	int i;
 
 	if (!hw)
@@ -4127,8 +4081,7 @@
 
 static int skge_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct skge_hw *hw  = pci_get_drvdata(pdev);
+	struct skge_hw *hw  = dev_get_drvdata(dev);
 	int i, err;
 
 	if (!hw)

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 697d9b3..5f56ee8 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * New driver for Marvell Yukon 2 chipset.
  * Based on earlier sk98lin, and skge driver.
@@ -7,19 +8,6 @@
  * those should be done at higher levels.
  *
  * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -46,6 +34,7 @@
 #include <linux/mii.h>
 #include <linux/of_device.h>
 #include <linux/of_net.h>
+#include <linux/dmi.h>
 
 #include <asm/irq.h>
 
@@ -93,7 +82,7 @@
 module_param(copybreak, int, 0);
 MODULE_PARM_DESC(copybreak, "Receive copy threshold");
 
-static int disable_msi = 0;
+static int disable_msi = -1;
 module_param(disable_msi, int, 0);
 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
@@ -1138,9 +1127,6 @@
 	/* Make sure write' to descriptors are complete before we tell hardware */
 	wmb();
 	sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx);
-
-	/* Synchronize I/O on since next processor may write to tail */
-	mmiowb();
 }
 
 
@@ -1353,7 +1339,6 @@
 
 	/* reset the Rx prefetch unit */
 	sky2_write32(hw, Y2_QADDR(rxq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
-	mmiowb();
 }
 
 /* Clean out receive buffer area, assumes receiver hardware stopped */
@@ -2485,13 +2470,11 @@
 		skb->ip_summed = re->skb->ip_summed;
 		skb->csum = re->skb->csum;
 		skb_copy_hash(skb, re->skb);
-		skb->vlan_proto = re->skb->vlan_proto;
-		skb->vlan_tci = re->skb->vlan_tci;
+		__vlan_hwaccel_copy_tag(skb, re->skb);
 
 		pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
 					       length, PCI_DMA_FROMDEVICE);
-		re->skb->vlan_proto = 0;
-		re->skb->vlan_tci = 0;
+		__vlan_hwaccel_clear_tag(re->skb);
 		skb_clear_hash(re->skb);
 		re->skb->ip_summed = CHECKSUM_NONE;
 		skb_put(skb, length);
@@ -4623,19 +4606,7 @@
 	napi_enable(&hw->napi);
 	return 0;
 }
-
-static int sky2_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sky2_debug_show, inode->i_private);
-}
-
-static const struct file_operations sky2_debug_fops = {
-	.owner		= THIS_MODULE,
-	.open		= sky2_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sky2_debug);
 
 /*
  * Use network device events to create/remove/rename
@@ -4821,8 +4792,8 @@
 	 * 2) from internal registers set by bootloader
 	 */
 	iap = of_get_mac_address(hw->pdev->dev.of_node);
-	if (iap)
-		memcpy(dev->dev_addr, iap, ETH_ALEN);
+	if (!IS_ERR(iap))
+		ether_addr_copy(dev->dev_addr, iap);
 	else
 		memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8,
 			      ETH_ALEN);
@@ -4931,6 +4902,45 @@
 	return buf;
 }
 
+static const struct dmi_system_id msi_blacklist[] = {
+	{
+		.ident = "Dell Inspiron 1545",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1545"),
+		},
+	},
+	{
+		.ident = "Gateway P-79",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Gateway"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P-79"),
+		},
+	},
+	{
+		.ident = "ASUS P5W DH Deluxe",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTEK COMPUTER INC"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P5W DH Deluxe"),
+		},
+	},
+	{
+		.ident = "ASUS P6T",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			DMI_MATCH(DMI_BOARD_NAME, "P6T"),
+		},
+	},
+	{
+		.ident = "ASUS P6X",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			DMI_MATCH(DMI_BOARD_NAME, "P6X"),
+		},
+	},
+	{}
+};
+
 static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *dev, *dev1;
@@ -5042,6 +5052,9 @@
 		goto err_out_free_pci;
 	}
 
+	if (disable_msi == -1)
+		disable_msi = !!dmi_check_system(msi_blacklist);
+
 	if (!disable_msi && pci_enable_msi(pdev) == 0) {
 		err = sky2_test_msi(hw);
 		if (err) {
@@ -5087,7 +5100,7 @@
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
-	pdev->d3_delay = 200;
+	pdev->d3_delay = 300;
 
 	return 0;
 
@@ -5161,8 +5174,7 @@
 
 static int sky2_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct sky2_hw *hw = pci_get_drvdata(pdev);
+	struct sky2_hw *hw = dev_get_drvdata(dev);
 	int i;
 
 	if (!hw)

diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index f9149d2..4968352 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig

@@ -1,6 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_VENDOR_MEDIATEK
 	bool "MediaTek ethernet driver"
-	depends on ARCH_MEDIATEK
+	depends on ARCH_MEDIATEK || SOC_MT7621 || SOC_MT7620
 	---help---
 	  If you have a Mediatek SoC with ethernet, say Y.
 
@@ -8,8 +9,7 @@
 
 config NET_MEDIATEK_SOC
 	tristate "MediaTek SoC Gigabit Ethernet support"
-	depends on NET_VENDOR_MEDIATEK
-	select PHYLIB
+	select PHYLINK
 	---help---
 	  This driver supports the gigabit ethernet MACs in the
 	  MediaTek SoC family.

diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index aa3f1c8..2d8362f 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile

@@ -1,5 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Mediatek SoCs built-in ethernet macs
 #
 
-obj-$(CONFIG_NET_MEDIATEK_SOC)			+= mtk_eth_soc.o
+obj-$(CONFIG_NET_MEDIATEK_SOC)                 += mtk_eth.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
new file mode 100644
index 0000000..ef11cf3
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c

@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-2019 MediaTek Inc.
+
+/* A library for configuring path from GMAC/GDM to target PHY
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/phy.h>
+#include <linux/regmap.h>
+
+#include "mtk_eth_soc.h"
+
+struct mtk_eth_muxc {
+	const char	*name;
+	int		cap_bit;
+	int		(*set_path)(struct mtk_eth *eth, int path);
+};
+
+static const char *mtk_eth_path_name(int path)
+{
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_RGMII:
+		return "gmac1_rgmii";
+	case MTK_ETH_PATH_GMAC1_TRGMII:
+		return "gmac1_trgmii";
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		return "gmac1_sgmii";
+	case MTK_ETH_PATH_GMAC2_RGMII:
+		return "gmac2_rgmii";
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		return "gmac2_sgmii";
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		return "gmac2_gephy";
+	case MTK_ETH_PATH_GDM1_ESW:
+		return "gdm1_esw";
+	default:
+		return "unknown path";
+	}
+}
+
+static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
+{
+	bool updated = true;
+	u32 val, mask, set;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		mask = ~(u32)MTK_MUX_TO_ESW;
+		set = 0;
+		break;
+	case MTK_ETH_PATH_GDM1_ESW:
+		mask = ~(u32)MTK_MUX_TO_ESW;
+		set = MTK_MUX_TO_ESW;
+		break;
+	default:
+		updated = false;
+		break;
+	};
+
+	if (updated) {
+		val = mtk_r32(eth, MTK_MAC_MISC);
+		val = (val & mask) | set;
+		mtk_w32(eth, val, MTK_MAC_MISC);
+	}
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		val = ~(u32)GEPHY_MAC_SEL;
+		break;
+	default:
+		updated = false;
+		break;
+	}
+
+	if (updated)
+		regmap_update_bits(eth->infra, INFRA_MISC2, GEPHY_MAC_SEL, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val = CO_QPHY_SEL;
+		break;
+	default:
+		updated = false;
+		break;
+	}
+
+	if (updated)
+		regmap_update_bits(eth->infra, INFRA_MISC2, CO_QPHY_SEL, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		val = SYSCFG0_SGMII_GMAC1;
+		break;
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val = SYSCFG0_SGMII_GMAC2;
+		break;
+	case MTK_ETH_PATH_GMAC1_RGMII:
+	case MTK_ETH_PATH_GMAC2_RGMII:
+		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+		val &= SYSCFG0_SGMII_MASK;
+
+		if ((path == MTK_GMAC1_RGMII && val == SYSCFG0_SGMII_GMAC1) ||
+		    (path == MTK_GMAC2_RGMII && val == SYSCFG0_SGMII_GMAC2))
+			val = 0;
+		else
+			updated = false;
+		break;
+	default:
+		updated = false;
+		break;
+	};
+
+	if (updated)
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
+{
+	unsigned int val = 0;
+	bool updated = true;
+
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+
+	switch (path) {
+	case MTK_ETH_PATH_GMAC1_SGMII:
+		val |= SYSCFG0_SGMII_GMAC1_V2;
+		break;
+	case MTK_ETH_PATH_GMAC2_GEPHY:
+		val &= ~(u32)SYSCFG0_SGMII_GMAC2_V2;
+		break;
+	case MTK_ETH_PATH_GMAC2_SGMII:
+		val |= SYSCFG0_SGMII_GMAC2_V2;
+		break;
+	default:
+		updated = false;
+	};
+
+	if (updated)
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK, val);
+
+	dev_dbg(eth->dev, "path %s in %s updated = %d\n",
+		mtk_eth_path_name(path), __func__, updated);
+
+	return 0;
+}
+
+static const struct mtk_eth_muxc mtk_eth_muxc[] = {
+	{
+		.name = "mux_gdm1_to_gmac1_esw",
+		.cap_bit = MTK_ETH_MUX_GDM1_TO_GMAC1_ESW,
+		.set_path = set_mux_gdm1_to_gmac1_esw,
+	}, {
+		.name = "mux_gmac2_gmac0_to_gephy",
+		.cap_bit = MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY,
+		.set_path = set_mux_gmac2_gmac0_to_gephy,
+	}, {
+		.name = "mux_u3_gmac2_to_qphy",
+		.cap_bit = MTK_ETH_MUX_U3_GMAC2_TO_QPHY,
+		.set_path = set_mux_u3_gmac2_to_qphy,
+	}, {
+		.name = "mux_gmac1_gmac2_to_sgmii_rgmii",
+		.cap_bit = MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII,
+		.set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii,
+	}, {
+		.name = "mux_gmac12_to_gephy_sgmii",
+		.cap_bit = MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII,
+		.set_path = set_mux_gmac12_to_gephy_sgmii,
+	},
+};
+
+static int mtk_eth_mux_setup(struct mtk_eth *eth, int path)
+{
+	int i, err = 0;
+
+	if (!MTK_HAS_CAPS(eth->soc->caps, path)) {
+		dev_err(eth->dev, "path %s isn't support on the SoC\n",
+			mtk_eth_path_name(path));
+		return -EINVAL;
+	}
+
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_MUX))
+		return 0;
+
+	/* Setup MUX in path fabric */
+	for (i = 0; i < ARRAY_SIZE(mtk_eth_muxc); i++) {
+		if (MTK_HAS_CAPS(eth->soc->caps, mtk_eth_muxc[i].cap_bit)) {
+			err = mtk_eth_muxc[i].set_path(eth, path);
+			if (err)
+				goto out;
+		} else {
+			dev_dbg(eth->dev, "mux %s isn't present on the SoC\n",
+				mtk_eth_muxc[i].name);
+		}
+	}
+
+out:
+	return err;
+}
+
+int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	int err, path;
+
+	path = (mac_id == 0) ?  MTK_ETH_PATH_GMAC1_SGMII :
+				MTK_ETH_PATH_GMAC2_SGMII;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	int err, path = 0;
+
+	if (mac_id == 1)
+		path = MTK_ETH_PATH_GMAC2_GEPHY;
+
+	if (!path)
+		return -EINVAL;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id)
+{
+	int err, path;
+
+	path = (mac_id == 0) ?  MTK_ETH_PATH_GMAC1_RGMII :
+				MTK_ETH_PATH_GMAC2_RGMII;
+
+	/* Setup proper MUXes along the path */
+	err = mtk_eth_mux_setup(eth, path);
+	if (err)
+		return err;
+
+	return 0;
+}
+

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6e6abdc..703adb9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

@@ -1,11 +1,5 @@
-/*   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; version 2 of the License
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  *
  *   Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
  *   Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
@@ -24,6 +18,7 @@
 #include <linux/tcp.h>
 #include <linux/interrupt.h>
 #include <linux/pinctrl/devinfo.h>
+#include <linux/phylink.h>
 
 #include "mtk_eth_soc.h"
 
@@ -54,8 +49,10 @@
 };
 
 static const char * const mtk_clks_source_name[] = {
-	"ethif", "esw", "gp0", "gp1", "gp2", "trgpll", "sgmii_tx250m",
-	"sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll"
+	"ethif", "sgmiitop", "esw", "gp0", "gp1", "gp2", "fe", "trgpll",
+	"sgmii_tx250m", "sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb",
+	"sgmii2_tx250m", "sgmii2_rx250m", "sgmii2_cdr_ref", "sgmii2_cdr_fb",
+	"sgmii_ck", "eth2pll",
 };
 
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -138,6 +135,31 @@
 	return _mtk_mdio_read(eth, phy_addr, phy_reg);
 }
 
+static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
+				     phy_interface_t interface)
+{
+	u32 val;
+
+	/* Check DDR memory type.
+	 * Currently TRGMII mode with DDR2 memory is not supported.
+	 */
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val);
+	if (interface == PHY_INTERFACE_MODE_TRGMII &&
+	    val & SYSCFG_DRAM_TYPE_DDR2) {
+		dev_err(eth->dev,
+			"TRGMII mode with DDR2 memory is not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	val = (interface == PHY_INTERFACE_MODE_TRGMII) ?
+		ETHSYS_TRGMII_MT7621_DDR_PLL : 0;
+
+	regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
+			   ETHSYS_TRGMII_MT7621_MASK, val);
+
+	return 0;
+}
+
 static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
 {
 	u32 val;
@@ -165,218 +187,340 @@
 	mtk_w32(eth, val, TRGMII_TCK_CTRL);
 }
 
-static void mtk_gmac_sgmii_hw_setup(struct mtk_eth *eth, int mac_id)
+static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
+			   const struct phylink_link_state *state)
 {
-	u32 val;
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+	struct mtk_eth *eth = mac->hw;
+	u32 mcr_cur, mcr_new, sid;
+	int val, ge_mode, err;
 
-	/* Setup the link timer and QPHY power up inside SGMIISYS */
-	regmap_write(eth->sgmiisys, SGMSYS_PCS_LINK_TIMER,
-		     SGMII_LINK_TIMER_DEFAULT);
+	/* MT76x8 has no hardware settings between for the MAC */
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
+	    mac->interface != state->interface) {
+		/* Setup soc pin functions */
+		switch (state->interface) {
+		case PHY_INTERFACE_MODE_TRGMII:
+			if (mac->id)
+				goto err_phy;
+			if (!MTK_HAS_CAPS(mac->hw->soc->caps,
+					  MTK_GMAC1_TRGMII))
+				goto err_phy;
+			/* fall through */
+		case PHY_INTERFACE_MODE_RGMII_TXID:
+		case PHY_INTERFACE_MODE_RGMII_RXID:
+		case PHY_INTERFACE_MODE_RGMII_ID:
+		case PHY_INTERFACE_MODE_RGMII:
+		case PHY_INTERFACE_MODE_MII:
+		case PHY_INTERFACE_MODE_REVMII:
+		case PHY_INTERFACE_MODE_RMII:
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_RGMII)) {
+				err = mtk_gmac_rgmii_path_setup(eth, mac->id);
+				if (err)
+					goto init_err;
+			}
+			break;
+		case PHY_INTERFACE_MODE_1000BASEX:
+		case PHY_INTERFACE_MODE_2500BASEX:
+		case PHY_INTERFACE_MODE_SGMII:
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+				err = mtk_gmac_sgmii_path_setup(eth, mac->id);
+				if (err)
+					goto init_err;
+			}
+			break;
+		case PHY_INTERFACE_MODE_GMII:
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_GEPHY)) {
+				err = mtk_gmac_gephy_path_setup(eth, mac->id);
+				if (err)
+					goto init_err;
+			}
+			break;
+		default:
+			goto err_phy;
+		}
 
-	regmap_read(eth->sgmiisys, SGMSYS_SGMII_MODE, &val);
-	val |= SGMII_REMOTE_FAULT_DIS;
-	regmap_write(eth->sgmiisys, SGMSYS_SGMII_MODE, val);
+		/* Setup clock for 1st gmac */
+		if (!mac->id && state->interface != PHY_INTERFACE_MODE_SGMII &&
+		    !phy_interface_mode_is_8023z(state->interface) &&
+		    MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII)) {
+			if (MTK_HAS_CAPS(mac->hw->soc->caps,
+					 MTK_TRGMII_MT7621_CLK)) {
+				if (mt7621_gmac0_rgmii_adjust(mac->hw,
+							      state->interface))
+					goto err_phy;
+			} else {
+				if (state->interface !=
+				    PHY_INTERFACE_MODE_TRGMII)
+					mtk_gmac0_rgmii_adjust(mac->hw,
+							       state->speed);
+			}
+		}
 
-	regmap_read(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, &val);
-	val |= SGMII_AN_RESTART;
-	regmap_write(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, val);
+		ge_mode = 0;
+		switch (state->interface) {
+		case PHY_INTERFACE_MODE_MII:
+		case PHY_INTERFACE_MODE_GMII:
+			ge_mode = 1;
+			break;
+		case PHY_INTERFACE_MODE_REVMII:
+			ge_mode = 2;
+			break;
+		case PHY_INTERFACE_MODE_RMII:
+			if (mac->id)
+				goto err_phy;
+			ge_mode = 3;
+			break;
+		default:
+			break;
+		}
 
-	regmap_read(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, &val);
-	val &= ~SGMII_PHYA_PWD;
-	regmap_write(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, val);
-
-	/* Determine MUX for which GMAC uses the SGMII interface */
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_DUAL_GMAC_SHARED_SGMII)) {
+		/* put the gmac into the right mode */
 		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
-		val &= ~SYSCFG0_SGMII_MASK;
-		val |= !mac_id ? SYSCFG0_SGMII_GMAC1 : SYSCFG0_SGMII_GMAC2;
+		val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
+		val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
 		regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
-		dev_info(eth->dev, "setup shared sgmii for gmac=%d\n",
-			 mac_id);
+		mac->interface = state->interface;
 	}
 
-	/* Setup the GMAC1 going through SGMII path when SoC also support
-	 * ESW on GMAC1
-	 */
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_GMAC1_ESW | MTK_GMAC1_SGMII) &&
-	    !mac_id) {
-		mtk_w32(eth, 0, MTK_MAC_MISC);
-		dev_info(eth->dev, "setup gmac1 going through sgmii");
-	}
-}
+	/* SGMII */
+	if (state->interface == PHY_INTERFACE_MODE_SGMII ||
+	    phy_interface_mode_is_8023z(state->interface)) {
+		/* The path GMAC to SGMII will be enabled once the SGMIISYS is
+		 * being setup done.
+		 */
+		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
 
-static void mtk_phy_link_adjust(struct net_device *dev)
-{
-	struct mtk_mac *mac = netdev_priv(dev);
-	u16 lcl_adv = 0, rmt_adv = 0;
-	u8 flowctrl;
-	u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG |
-		  MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN |
-		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
-		  MAC_MCR_BACKPR_EN;
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK,
+				   ~(u32)SYSCFG0_SGMII_MASK);
 
-	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		/* Decide how GMAC and SGMIISYS be mapped */
+		sid = (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_SGMII)) ?
+		       0 : mac->id;
+
+		/* Setup SGMIISYS with the determined property */
+		if (state->interface != PHY_INTERFACE_MODE_SGMII)
+			err = mtk_sgmii_setup_mode_force(eth->sgmii, sid,
+							 state);
+		else if (phylink_autoneg_inband(mode))
+			err = mtk_sgmii_setup_mode_an(eth->sgmii, sid);
+
+		if (err)
+			goto init_err;
+
+		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
+				   SYSCFG0_SGMII_MASK, val);
+	} else if (phylink_autoneg_inband(mode)) {
+		dev_err(eth->dev,
+			"In-band mode not supported in non SGMII mode!\n");
 		return;
+	}
 
-	switch (dev->phydev->speed) {
+	/* Setup gmac */
+	mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+	mcr_new = mcr_cur;
+	mcr_new &= ~(MAC_MCR_SPEED_100 | MAC_MCR_SPEED_1000 |
+		     MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_TX_FC |
+		     MAC_MCR_FORCE_RX_FC);
+	mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+		   MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
+
+	switch (state->speed) {
+	case SPEED_2500:
 	case SPEED_1000:
-		mcr |= MAC_MCR_SPEED_1000;
+		mcr_new |= MAC_MCR_SPEED_1000;
 		break;
 	case SPEED_100:
-		mcr |= MAC_MCR_SPEED_100;
+		mcr_new |= MAC_MCR_SPEED_100;
 		break;
-	};
-
-	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) &&
-	    !mac->id && !mac->trgmii)
-		mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
-
-	if (dev->phydev->link)
-		mcr |= MAC_MCR_FORCE_LINK;
-
-	if (dev->phydev->duplex) {
-		mcr |= MAC_MCR_FORCE_DPX;
-
-		if (dev->phydev->pause)
-			rmt_adv = LPA_PAUSE_CAP;
-		if (dev->phydev->asym_pause)
-			rmt_adv |= LPA_PAUSE_ASYM;
-
-		if (dev->phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (dev->phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
-
-		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
-
-		if (flowctrl & FLOW_CTRL_TX)
-			mcr |= MAC_MCR_FORCE_TX_FC;
-		if (flowctrl & FLOW_CTRL_RX)
-			mcr |= MAC_MCR_FORCE_RX_FC;
-
-		netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n",
-			  flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled",
-			  flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled");
+	}
+	if (state->duplex == DUPLEX_FULL) {
+		mcr_new |= MAC_MCR_FORCE_DPX;
+		if (state->pause & MLO_PAUSE_TX)
+			mcr_new |= MAC_MCR_FORCE_TX_FC;
+		if (state->pause & MLO_PAUSE_RX)
+			mcr_new |= MAC_MCR_FORCE_RX_FC;
 	}
 
-	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+	/* Only update control register when needed! */
+	if (mcr_new != mcr_cur)
+		mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
 
-	if (dev->phydev->link)
-		netif_carrier_on(dev);
-	else
-		netif_carrier_off(dev);
-
-	if (!of_phy_is_fixed_link(mac->of_node))
-		phy_print_status(dev->phydev);
-}
-
-static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
-				struct device_node *phy_node)
-{
-	struct phy_device *phydev;
-	int phy_mode;
-
-	phy_mode = of_get_phy_mode(phy_node);
-	if (phy_mode < 0) {
-		dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
-		return -EINVAL;
-	}
-
-	phydev = of_phy_connect(eth->netdev[mac->id], phy_node,
-				mtk_phy_link_adjust, 0, phy_mode);
-	if (!phydev) {
-		dev_err(eth->dev, "could not connect to PHY\n");
-		return -ENODEV;
-	}
-
-	dev_info(eth->dev,
-		 "connected mac %d to PHY at %s [uid=%08x, driver=%s]\n",
-		 mac->id, phydev_name(phydev), phydev->phy_id,
-		 phydev->drv->name);
-
-	return 0;
-}
-
-static int mtk_phy_connect(struct net_device *dev)
-{
-	struct mtk_mac *mac = netdev_priv(dev);
-	struct mtk_eth *eth;
-	struct device_node *np;
-	u32 val;
-
-	eth = mac->hw;
-	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
-	if (!np && of_phy_is_fixed_link(mac->of_node))
-		if (!of_phy_register_fixed_link(mac->of_node))
-			np = of_node_get(mac->of_node);
-	if (!np)
-		return -ENODEV;
-
-	mac->ge_mode = 0;
-	switch (of_get_phy_mode(np)) {
-	case PHY_INTERFACE_MODE_TRGMII:
-		mac->trgmii = true;
-	case PHY_INTERFACE_MODE_RGMII_TXID:
-	case PHY_INTERFACE_MODE_RGMII_RXID:
-	case PHY_INTERFACE_MODE_RGMII_ID:
-	case PHY_INTERFACE_MODE_RGMII:
-		break;
-	case PHY_INTERFACE_MODE_SGMII:
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII))
-			mtk_gmac_sgmii_hw_setup(eth, mac->id);
-		break;
-	case PHY_INTERFACE_MODE_MII:
-		mac->ge_mode = 1;
-		break;
-	case PHY_INTERFACE_MODE_REVMII:
-		mac->ge_mode = 2;
-		break;
-	case PHY_INTERFACE_MODE_RMII:
-		if (!mac->id)
-			goto err_phy;
-		mac->ge_mode = 3;
-		break;
-	default:
-		goto err_phy;
-	}
-
-	/* put the gmac into the right mode */
-	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
-	val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
-	val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
-	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
-
-	/* couple phydev to net_device */
-	if (mtk_phy_connect_node(eth, mac, np))
-		goto err_phy;
-
-	dev->phydev->autoneg = AUTONEG_ENABLE;
-	dev->phydev->speed = 0;
-	dev->phydev->duplex = 0;
-
-	if (of_phy_is_fixed_link(mac->of_node))
-		dev->phydev->supported |=
-		SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-
-	dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
-				   SUPPORTED_Asym_Pause;
-	dev->phydev->advertising = dev->phydev->supported |
-				    ADVERTISED_Autoneg;
-	phy_start_aneg(dev->phydev);
-
-	of_node_put(np);
-
-	return 0;
+	return;
 
 err_phy:
-	if (of_phy_is_fixed_link(mac->of_node))
-		of_phy_deregister_fixed_link(mac->of_node);
-	of_node_put(np);
-	dev_err(eth->dev, "%s: invalid phy\n", __func__);
-	return -EINVAL;
+	dev_err(eth->dev, "%s: GMAC%d mode %s not supported!\n", __func__,
+		mac->id, phy_modes(state->interface));
+	return;
+
+init_err:
+	dev_err(eth->dev, "%s: GMAC%d mode %s err: %d!\n", __func__,
+		mac->id, phy_modes(state->interface), err);
 }
 
+static int mtk_mac_link_state(struct phylink_config *config,
+			      struct phylink_link_state *state)
+{
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+	u32 pmsr = mtk_r32(mac->hw, MTK_MAC_MSR(mac->id));
+
+	state->link = (pmsr & MAC_MSR_LINK);
+	state->duplex = (pmsr & MAC_MSR_DPX) >> 1;
+
+	switch (pmsr & (MAC_MSR_SPEED_1000 | MAC_MSR_SPEED_100)) {
+	case 0:
+		state->speed = SPEED_10;
+		break;
+	case MAC_MSR_SPEED_100:
+		state->speed = SPEED_100;
+		break;
+	case MAC_MSR_SPEED_1000:
+		state->speed = SPEED_1000;
+		break;
+	default:
+		state->speed = SPEED_UNKNOWN;
+		break;
+	}
+
+	state->pause &= (MLO_PAUSE_RX | MLO_PAUSE_TX);
+	if (pmsr & MAC_MSR_RX_FC)
+		state->pause |= MLO_PAUSE_RX;
+	if (pmsr & MAC_MSR_TX_FC)
+		state->pause |= MLO_PAUSE_TX;
+
+	return 1;
+}
+
+static void mtk_mac_an_restart(struct phylink_config *config)
+{
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+
+	mtk_sgmii_restart_an(mac->hw, mac->id);
+}
+
+static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode,
+			      phy_interface_t interface)
+{
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+	u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+
+	mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN);
+	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+}
+
+static void mtk_mac_link_up(struct phylink_config *config, unsigned int mode,
+			    phy_interface_t interface,
+			    struct phy_device *phy)
+{
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+	u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+
+	mcr |= MAC_MCR_TX_EN | MAC_MCR_RX_EN;
+	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+}
+
+static void mtk_validate(struct phylink_config *config,
+			 unsigned long *supported,
+			 struct phylink_link_state *state)
+{
+	struct mtk_mac *mac = container_of(config, struct mtk_mac,
+					   phylink_config);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	if (state->interface != PHY_INTERFACE_MODE_NA &&
+	    state->interface != PHY_INTERFACE_MODE_MII &&
+	    state->interface != PHY_INTERFACE_MODE_GMII &&
+	    !(MTK_HAS_CAPS(mac->hw->soc->caps, MTK_RGMII) &&
+	      phy_interface_mode_is_rgmii(state->interface)) &&
+	    !(MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII) &&
+	      !mac->id && state->interface == PHY_INTERFACE_MODE_TRGMII) &&
+	    !(MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII) &&
+	      (state->interface == PHY_INTERFACE_MODE_SGMII ||
+	       phy_interface_mode_is_8023z(state->interface)))) {
+		linkmode_zero(supported);
+		return;
+	}
+
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Autoneg);
+
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_TRGMII:
+		phylink_set(mask, 1000baseT_Full);
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		phylink_set(mask, 1000baseX_Full);
+		phylink_set(mask, 2500baseX_Full);
+		break;
+	case PHY_INTERFACE_MODE_GMII:
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		phylink_set(mask, 1000baseT_Half);
+		/* fall through */
+	case PHY_INTERFACE_MODE_SGMII:
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+		/* fall through */
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_RMII:
+	case PHY_INTERFACE_MODE_REVMII:
+	case PHY_INTERFACE_MODE_NA:
+	default:
+		phylink_set(mask, 10baseT_Half);
+		phylink_set(mask, 10baseT_Full);
+		phylink_set(mask, 100baseT_Half);
+		phylink_set(mask, 100baseT_Full);
+		break;
+	}
+
+	if (state->interface == PHY_INTERFACE_MODE_NA) {
+		if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII)) {
+			phylink_set(mask, 1000baseT_Full);
+			phylink_set(mask, 1000baseX_Full);
+			phylink_set(mask, 2500baseX_Full);
+		}
+		if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_RGMII)) {
+			phylink_set(mask, 1000baseT_Full);
+			phylink_set(mask, 1000baseT_Half);
+			phylink_set(mask, 1000baseX_Full);
+		}
+		if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GEPHY)) {
+			phylink_set(mask, 1000baseT_Full);
+			phylink_set(mask, 1000baseT_Half);
+		}
+	}
+
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	linkmode_and(supported, supported, mask);
+	linkmode_and(state->advertising, state->advertising, mask);
+
+	/* We can only operate at 2500BaseX or 1000BaseX. If requested
+	 * to advertise both, only report advertising at 2500BaseX.
+	 */
+	phylink_helper_basex_speed(state);
+}
+
+static const struct phylink_mac_ops mtk_phylink_ops = {
+	.validate = mtk_validate,
+	.mac_link_state = mtk_mac_link_state,
+	.mac_an_restart = mtk_mac_an_restart,
+	.mac_config = mtk_mac_config,
+	.mac_link_down = mtk_mac_link_down,
+	.mac_link_up = mtk_mac_link_up,
+};
+
 static int mtk_mdio_init(struct mtk_eth *eth)
 {
 	struct device_node *mii_np;
@@ -405,7 +549,7 @@
 	eth->mii_bus->priv = eth;
 	eth->mii_bus->parent = eth->dev;
 
-	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
+	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%pOFn", mii_np);
 	ret = of_mdiobus_register(eth->mii_bus, mii_np);
 
 err_put_node:
@@ -427,8 +571,8 @@
 	u32 val;
 
 	spin_lock_irqsave(&eth->tx_irq_lock, flags);
-	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+	val = mtk_r32(eth, eth->tx_int_mask_reg);
+	mtk_w32(eth, val & ~mask, eth->tx_int_mask_reg);
 	spin_unlock_irqrestore(&eth->tx_irq_lock, flags);
 }
 
@@ -438,8 +582,8 @@
 	u32 val;
 
 	spin_lock_irqsave(&eth->tx_irq_lock, flags);
-	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+	val = mtk_r32(eth, eth->tx_int_mask_reg);
+	mtk_w32(eth, val | mask, eth->tx_int_mask_reg);
 	spin_unlock_irqrestore(&eth->tx_irq_lock, flags);
 }
 
@@ -469,6 +613,7 @@
 {
 	int ret = eth_mac_addr(dev, p);
 	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
 	const char *macaddr = dev->dev_addr;
 
 	if (ret)
@@ -478,11 +623,19 @@
 		return -EBUSY;
 
 	spin_lock_bh(&mac->hw->page_lock);
-	mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
-		MTK_GDMA_MAC_ADRH(mac->id));
-	mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
-		(macaddr[4] << 8) | macaddr[5],
-		MTK_GDMA_MAC_ADRL(mac->id));
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+			MT7628_SDM_MAC_ADRH);
+		mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+			(macaddr[4] << 8) | macaddr[5],
+			MT7628_SDM_MAC_ADRL);
+	} else {
+		mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+			MTK_GDMA_MAC_ADRH(mac->id));
+		mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+			(macaddr[4] << 8) | macaddr[5],
+			MTK_GDMA_MAC_ADRL(mac->id));
+	}
 	spin_unlock_bh(&mac->hw->page_lock);
 
 	return 0;
@@ -605,10 +758,10 @@
 	dma_addr_t dma_addr;
 	int i;
 
-	eth->scratch_ring = dma_zalloc_coherent(eth->dev,
-						cnt * sizeof(struct mtk_tx_dma),
-						&eth->phy_scratch_ring,
-						GFP_ATOMIC);
+	eth->scratch_ring = dma_alloc_coherent(eth->dev,
+					       cnt * sizeof(struct mtk_tx_dma),
+					       &eth->phy_scratch_ring,
+					       GFP_ATOMIC);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
 
@@ -658,19 +811,47 @@
 	return &ring->buf[idx];
 }
 
+static struct mtk_tx_dma *qdma_to_pdma(struct mtk_tx_ring *ring,
+				       struct mtk_tx_dma *dma)
+{
+	return ring->dma_pdma - ring->dma + dma;
+}
+
+static int txd_to_idx(struct mtk_tx_ring *ring, struct mtk_tx_dma *dma)
+{
+	return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
+}
+
 static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
 {
-	if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
-		dma_unmap_single(eth->dev,
-				 dma_unmap_addr(tx_buf, dma_addr0),
-				 dma_unmap_len(tx_buf, dma_len0),
-				 DMA_TO_DEVICE);
-	} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
-		dma_unmap_page(eth->dev,
-			       dma_unmap_addr(tx_buf, dma_addr0),
-			       dma_unmap_len(tx_buf, dma_len0),
-			       DMA_TO_DEVICE);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+			dma_unmap_single(eth->dev,
+					 dma_unmap_addr(tx_buf, dma_addr0),
+					 dma_unmap_len(tx_buf, dma_len0),
+					 DMA_TO_DEVICE);
+		} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
+			dma_unmap_page(eth->dev,
+				       dma_unmap_addr(tx_buf, dma_addr0),
+				       dma_unmap_len(tx_buf, dma_len0),
+				       DMA_TO_DEVICE);
+		}
+	} else {
+		if (dma_unmap_len(tx_buf, dma_len0)) {
+			dma_unmap_page(eth->dev,
+				       dma_unmap_addr(tx_buf, dma_addr0),
+				       dma_unmap_len(tx_buf, dma_len0),
+				       DMA_TO_DEVICE);
+		}
+
+		if (dma_unmap_len(tx_buf, dma_len1)) {
+			dma_unmap_page(eth->dev,
+				       dma_unmap_addr(tx_buf, dma_addr1),
+				       dma_unmap_len(tx_buf, dma_len1),
+				       DMA_TO_DEVICE);
+		}
 	}
+
 	tx_buf->flags = 0;
 	if (tx_buf->skb &&
 	    (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
@@ -678,19 +859,45 @@
 	tx_buf->skb = NULL;
 }
 
+static void setup_tx_buf(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
+			 struct mtk_tx_dma *txd, dma_addr_t mapped_addr,
+			 size_t size, int idx)
+{
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+		dma_unmap_len_set(tx_buf, dma_len0, size);
+	} else {
+		if (idx & 1) {
+			txd->txd3 = mapped_addr;
+			txd->txd2 |= TX_DMA_PLEN1(size);
+			dma_unmap_addr_set(tx_buf, dma_addr1, mapped_addr);
+			dma_unmap_len_set(tx_buf, dma_len1, size);
+		} else {
+			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
+			txd->txd1 = mapped_addr;
+			txd->txd2 = TX_DMA_PLEN0(size);
+			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+			dma_unmap_len_set(tx_buf, dma_len0, size);
+		}
+	}
+}
+
 static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 		      int tx_num, struct mtk_tx_ring *ring, bool gso)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_dma *itxd, *txd;
+	struct mtk_tx_dma *itxd_pdma, *txd_pdma;
 	struct mtk_tx_buf *itx_buf, *tx_buf;
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
 	u32 txd4 = 0, fport;
+	int k = 0;
 
 	itxd = ring->next_free;
+	itxd_pdma = qdma_to_pdma(ring, itxd);
 	if (itxd == ring->last_free)
 		return -ENOMEM;
 
@@ -721,26 +928,37 @@
 	itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
 	itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
 			  MTK_TX_FLAGS_FPORT1;
-	dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
-	dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
+	setup_tx_buf(eth, itx_buf, itxd_pdma, mapped_addr, skb_headlen(skb),
+		     k++);
 
 	/* TX SG offload */
 	txd = itxd;
+	txd_pdma = qdma_to_pdma(ring, txd);
 	nr_frags = skb_shinfo(skb)->nr_frags;
+
 	for (i = 0; i < nr_frags; i++) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		unsigned int offset = 0;
 		int frag_size = skb_frag_size(frag);
 
 		while (frag_size) {
 			bool last_frag = false;
 			unsigned int frag_map_size;
+			bool new_desc = true;
 
-			txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
-			if (txd == ring->last_free)
-				goto err_dma;
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA) ||
+			    (i & 0x1)) {
+				txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
+				txd_pdma = qdma_to_pdma(ring, txd);
+				if (txd == ring->last_free)
+					goto err_dma;
 
-			n_desc++;
+				n_desc++;
+			} else {
+				new_desc = false;
+			}
+
+
 			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
 			mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
 						       frag_map_size,
@@ -759,14 +977,16 @@
 			WRITE_ONCE(txd->txd4, fport);
 
 			tx_buf = mtk_desc_to_tx_buf(ring, txd);
-			memset(tx_buf, 0, sizeof(*tx_buf));
+			if (new_desc)
+				memset(tx_buf, 0, sizeof(*tx_buf));
 			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
 			tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
 					 MTK_TX_FLAGS_FPORT1;
 
-			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
-			dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
+			setup_tx_buf(eth, tx_buf, txd_pdma, mapped_addr,
+				     frag_map_size, k++);
+
 			frag_size -= frag_map_size;
 			offset += frag_map_size;
 		}
@@ -778,6 +998,12 @@
 	WRITE_ONCE(itxd->txd4, txd4);
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
 				(!nr_frags * TX_DMA_LS0)));
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		if (k & 0x1)
+			txd_pdma->txd2 |= TX_DMA_LS0;
+		else
+			txd_pdma->txd2 |= TX_DMA_LS1;
+	}
 
 	netdev_sent_queue(dev, skb->len);
 	skb_tx_timestamp(skb);
@@ -790,8 +1016,15 @@
 	 */
 	wmb();
 
-	if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
-		mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) ||
+		    !netdev_xmit_more())
+			mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+	} else {
+		int next_idx = NEXT_DESP_IDX(txd_to_idx(ring, txd),
+					     ring->dma_size);
+		mtk_w32(eth, next_idx, MT7628_TX_CTX_IDX0);
+	}
 
 	return 0;
 
@@ -803,7 +1036,11 @@
 		mtk_tx_unmap(eth, tx_buf);
 
 		itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+		if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			itxd_pdma->txd2 = TX_DMA_DESP2_DEF;
+
 		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
+		itxd_pdma = qdma_to_pdma(ring, itxd);
 	} while (itxd != txd);
 
 	return -ENOMEM;
@@ -812,13 +1049,14 @@
 static inline int mtk_cal_txd_req(struct sk_buff *skb)
 {
 	int i, nfrags;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 
 	nfrags = 1;
 	if (skb_is_gso(skb)) {
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			frag = &skb_shinfo(skb)->frags[i];
-			nfrags += DIV_ROUND_UP(frag->size, MTK_TX_DMA_BUF_LEN);
+			nfrags += DIV_ROUND_UP(skb_frag_size(frag),
+						MTK_TX_DMA_BUF_LEN);
 		}
 	} else {
 		nfrags += skb_shinfo(skb)->nr_frags;
@@ -933,7 +1171,7 @@
 
 	for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
 		ring = &eth->rx_ring[i];
-		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+		idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
 		if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
 			ring->calc_idx_update = true;
 			return ring;
@@ -976,13 +1214,13 @@
 		struct net_device *netdev;
 		unsigned int pktlen;
 		dma_addr_t dma_addr;
-		int mac = 0;
+		int mac;
 
 		ring = mtk_get_rx_ring(eth);
 		if (unlikely(!ring))
 			goto rx_done;
 
-		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+		idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
 		rxd = &ring->dma[idx];
 		data = ring->data[idx];
 
@@ -991,9 +1229,13 @@
 			break;
 
 		/* find out which mac the packet come from. values start at 1 */
-		mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
-		      RX_DMA_FPORT_MASK;
-		mac--;
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+			mac = 0;
+		} else {
+			mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+				RX_DMA_FPORT_MASK;
+			mac--;
+		}
 
 		if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
 			     !eth->netdev[mac]))
@@ -1011,7 +1253,8 @@
 			goto release_desc;
 		}
 		dma_addr = dma_map_single(eth->dev,
-					  new_data + NET_SKB_PAD,
+					  new_data + NET_SKB_PAD +
+					  eth->ip_align,
 					  ring->buf_size,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
@@ -1034,7 +1277,7 @@
 		pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
 		skb->dev = netdev;
 		skb_put(skb, pktlen);
-		if (trxd.rxd4 & RX_DMA_L4_VALID)
+		if (trxd.rxd4 & eth->rx_dma_l4_valid)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
 			skb_checksum_none_assert(skb);
@@ -1051,7 +1294,10 @@
 		rxd->rxd1 = (unsigned int)dma_addr;
 
 release_desc:
-		rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+			rxd->rxd2 = RX_DMA_LSO;
+		else
+			rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
 
 		ring->calc_idx = idx;
 
@@ -1070,19 +1316,14 @@
 	return done;
 }
 
-static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
+			    unsigned int *done, unsigned int *bytes)
 {
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
-	unsigned int done[MTK_MAX_DEVS];
-	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
-	int total = 0, i;
-
-	memset(done, 0, sizeof(done));
-	memset(bytes, 0, sizeof(bytes));
 
 	cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
 	dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
@@ -1120,6 +1361,62 @@
 
 	mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
 
+	return budget;
+}
+
+static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
+			    unsigned int *done, unsigned int *bytes)
+{
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct mtk_tx_dma *desc;
+	struct sk_buff *skb;
+	struct mtk_tx_buf *tx_buf;
+	u32 cpu, dma;
+
+	cpu = ring->cpu_idx;
+	dma = mtk_r32(eth, MT7628_TX_DTX_IDX0);
+
+	while ((cpu != dma) && budget) {
+		tx_buf = &ring->buf[cpu];
+		skb = tx_buf->skb;
+		if (!skb)
+			break;
+
+		if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
+			bytes[0] += skb->len;
+			done[0]++;
+			budget--;
+		}
+
+		mtk_tx_unmap(eth, tx_buf);
+
+		desc = &ring->dma[cpu];
+		ring->last_free = desc;
+		atomic_inc(&ring->free_count);
+
+		cpu = NEXT_DESP_IDX(cpu, ring->dma_size);
+	}
+
+	ring->cpu_idx = cpu;
+
+	return budget;
+}
+
+static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+{
+	struct mtk_tx_ring *ring = &eth->tx_ring;
+	unsigned int done[MTK_MAX_DEVS];
+	unsigned int bytes[MTK_MAX_DEVS];
+	int total = 0, i;
+
+	memset(done, 0, sizeof(done));
+	memset(bytes, 0, sizeof(bytes));
+
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		budget = mtk_poll_tx_qdma(eth, budget, done, bytes);
+	else
+		budget = mtk_poll_tx_pdma(eth, budget, done, bytes);
+
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!eth->netdev[i] || !done[i])
 			continue;
@@ -1151,13 +1448,14 @@
 	u32 status, mask;
 	int tx_done = 0;
 
-	mtk_handle_status_irq(eth);
-	mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_handle_status_irq(eth);
+	mtk_w32(eth, MTK_TX_DONE_INT, eth->tx_int_status_reg);
 	tx_done = mtk_poll_tx(eth, budget);
 
 	if (unlikely(netif_msg_intr(eth))) {
-		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+		status = mtk_r32(eth, eth->tx_int_status_reg);
+		mask = mtk_r32(eth, eth->tx_int_mask_reg);
 		dev_info(eth->dev,
 			 "done tx %d, intr 0x%08x/0x%x\n",
 			 tx_done, status, mask);
@@ -1166,7 +1464,7 @@
 	if (tx_done == budget)
 		return budget;
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+	status = mtk_r32(eth, eth->tx_int_status_reg);
 	if (status & MTK_TX_DONE_INT)
 		return budget;
 
@@ -1220,8 +1518,8 @@
 	if (!ring->buf)
 		goto no_tx_mem;
 
-	ring->dma = dma_zalloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
-					&ring->phys, GFP_ATOMIC);
+	ring->dma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+				       &ring->phys, GFP_ATOMIC);
 	if (!ring->dma)
 		goto no_tx_mem;
 
@@ -1233,6 +1531,24 @@
 		ring->dma[i].txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
 	}
 
+	/* On MT7688 (PDMA only) this driver uses the ring->dma structs
+	 * only as the framework. The real HW descriptors are the PDMA
+	 * descriptors in ring->dma_pdma.
+	 */
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+						    &ring->phys_pdma,
+						    GFP_ATOMIC);
+		if (!ring->dma_pdma)
+			goto no_tx_mem;
+
+		for (i = 0; i < MTK_DMA_SIZE; i++) {
+			ring->dma_pdma[i].txd2 = TX_DMA_DESP2_DEF;
+			ring->dma_pdma[i].txd4 = 0;
+		}
+	}
+
+	ring->dma_size = MTK_DMA_SIZE;
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
 	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
@@ -1243,15 +1559,23 @@
 	 */
 	wmb();
 
-	mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
-	mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
-	mtk_w32(eth,
-		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
-		MTK_QTX_CRX_PTR);
-	mtk_w32(eth,
-		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
-		MTK_QTX_DRX_PTR);
-	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
+		mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
+		mtk_w32(eth,
+			ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+			MTK_QTX_CRX_PTR);
+		mtk_w32(eth,
+			ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+			MTK_QTX_DRX_PTR);
+		mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
+			MTK_QTX_CFG(0));
+	} else {
+		mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
+		mtk_w32(eth, MTK_DMA_SIZE, MT7628_TX_MAX_CNT0);
+		mtk_w32(eth, 0, MT7628_TX_CTX_IDX0);
+		mtk_w32(eth, MT7628_PST_DTX_IDX0, MTK_PDMA_RST_IDX);
+	}
 
 	return 0;
 
@@ -1278,6 +1602,14 @@
 				  ring->phys);
 		ring->dma = NULL;
 	}
+
+	if (ring->dma_pdma) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(*ring->dma_pdma),
+				  ring->dma_pdma,
+				  ring->phys_pdma);
+		ring->dma_pdma = NULL;
+	}
 }
 
 static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
@@ -1317,22 +1649,25 @@
 			return -ENOMEM;
 	}
 
-	ring->dma = dma_zalloc_coherent(eth->dev,
-					rx_dma_size * sizeof(*ring->dma),
-					&ring->phys, GFP_ATOMIC);
+	ring->dma = dma_alloc_coherent(eth->dev,
+				       rx_dma_size * sizeof(*ring->dma),
+				       &ring->phys, GFP_ATOMIC);
 	if (!ring->dma)
 		return -ENOMEM;
 
 	for (i = 0; i < rx_dma_size; i++) {
 		dma_addr_t dma_addr = dma_map_single(eth->dev,
-				ring->data[i] + NET_SKB_PAD,
+				ring->data[i] + NET_SKB_PAD + eth->ip_align,
 				ring->buf_size,
 				DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
 			return -ENOMEM;
 		ring->dma[i].rxd1 = (unsigned int)dma_addr;
 
-		ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+			ring->dma[i].rxd2 = RX_DMA_LSO;
+		else
+			ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
 	}
 	ring->dma_size = rx_dma_size;
 	ring->calc_idx_update = false;
@@ -1648,9 +1983,16 @@
 	unsigned long t_start = jiffies;
 
 	while (1) {
-		if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
-		      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
-			return 0;
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+			if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
+			      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+				return 0;
+		} else {
+			if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) &
+			      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+				return 0;
+		}
+
 		if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
 			break;
 	}
@@ -1667,20 +2009,24 @@
 	if (mtk_dma_busy_wait(eth))
 		return -EBUSY;
 
-	/* QDMA needs scratch memory for internal reordering of the
-	 * descriptors
-	 */
-	err = mtk_init_fq_dma(eth);
-	if (err)
-		return err;
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		/* QDMA needs scratch memory for internal reordering of the
+		 * descriptors
+		 */
+		err = mtk_init_fq_dma(eth);
+		if (err)
+			return err;
+	}
 
 	err = mtk_tx_alloc(eth);
 	if (err)
 		return err;
 
-	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
-	if (err)
-		return err;
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
+		if (err)
+			return err;
+	}
 
 	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
 	if (err)
@@ -1697,10 +2043,14 @@
 			return err;
 	}
 
-	/* Enable random early drop and set drop threshold automatically */
-	mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
-		MTK_QDMA_FC_THRES);
-	mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		/* Enable random early drop and set drop threshold
+		 * automatically
+		 */
+		mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN |
+			FC_THRES_MIN, MTK_QDMA_FC_THRES);
+		mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+	}
 
 	return 0;
 }
@@ -1768,6 +2118,22 @@
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t mtk_handle_irq(int irq, void *_eth)
+{
+	struct mtk_eth *eth = _eth;
+
+	if (mtk_r32(eth, MTK_PDMA_INT_MASK) & MTK_RX_DONE_INT) {
+		if (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT)
+			mtk_handle_irq_rx(irq, _eth);
+	}
+	if (mtk_r32(eth, eth->tx_int_mask_reg) & MTK_TX_DONE_INT) {
+		if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT)
+			mtk_handle_irq_tx(irq, _eth);
+	}
+
+	return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void mtk_poll_controller(struct net_device *dev)
 {
@@ -1784,6 +2150,7 @@
 
 static int mtk_start_dma(struct mtk_eth *eth)
 {
+	u32 rx_2b_offset = (NET_IP_ALIGN == 2) ? MTK_RX_2B_OFFSET : 0;
 	int err;
 
 	err = mtk_dma_init(eth);
@@ -1792,17 +2159,23 @@
 		return err;
 	}
 
-	mtk_w32(eth,
-		MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
-		MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
-		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
-		MTK_RX_BT_32DWORDS,
-		MTK_QDMA_GLO_CFG);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		mtk_w32(eth,
+			MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+			MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+			MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+			MTK_RX_BT_32DWORDS,
+			MTK_QDMA_GLO_CFG);
 
-	mtk_w32(eth,
-		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
-		MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
-		MTK_PDMA_GLO_CFG);
+		mtk_w32(eth,
+			MTK_RX_DMA_EN | rx_2b_offset |
+			MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+			MTK_PDMA_GLO_CFG);
+	} else {
+		mtk_w32(eth, MTK_TX_WB_DDONE | MTK_TX_DMA_EN | MTK_RX_DMA_EN |
+			MTK_MULTI_EN | MTK_PDMA_SIZE_8DWORDS,
+			MTK_PDMA_GLO_CFG);
+	}
 
 	return 0;
 }
@@ -1811,6 +2184,14 @@
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
+	int err;
+
+	err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
+	if (err) {
+		netdev_err(dev, "%s: could not attach PHY: %d\n", __func__,
+			   err);
+		return err;
+	}
 
 	/* we run 2 netdevs on the same dma ring so we only bring it up once */
 	if (!refcount_read(&eth->dma_refcnt)) {
@@ -1828,9 +2209,8 @@
 	else
 		refcount_inc(&eth->dma_refcnt);
 
-	phy_start(dev->phydev);
+	phylink_start(mac->phylink);
 	netif_start_queue(dev);
-
 	return 0;
 }
 
@@ -1862,8 +2242,11 @@
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 
+	phylink_stop(mac->phylink);
+
 	netif_tx_disable(dev);
-	phy_stop(dev->phydev);
+
+	phylink_disconnect_phy(mac->phylink);
 
 	/* only shutdown DMA if this is the last user */
 	if (!refcount_dec_and_test(&eth->dma_refcnt))
@@ -1874,7 +2257,8 @@
 	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi);
 
-	mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
 	mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
 
 	mtk_dma_free(eth);
@@ -1936,18 +2320,27 @@
 	if (ret)
 		goto err_disable_pm;
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		ret = device_reset(eth->dev);
+		if (ret) {
+			dev_err(eth->dev, "MAC reset failed!\n");
+			goto err_disable_pm;
+		}
+
+		/* enable interrupt delay for RX */
+		mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+
+		/* disable delay and normal interrupt */
+		mtk_tx_irq_disable(eth, ~0);
+		mtk_rx_irq_disable(eth, ~0);
+
+		return 0;
+	}
+
+	/* Non-MT7628 handling... */
 	ethsys_reset(eth, RSTCTRL_FE);
 	ethsys_reset(eth, RSTCTRL_PPE);
 
-	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->mac[i])
-			continue;
-		val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id);
-		val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id);
-	}
-	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
-
 	if (eth->pctl) {
 		/* Set GE2 driving and slew rate */
 		regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
@@ -1960,11 +2353,11 @@
 	}
 
 	/* Set linkdown as the default for each GMAC. Its own MCR would be set
-	 * up with the more appropriate value when mtk_phy_link_adjust call is
-	 * being invoked.
+	 * up with the more appropriate value when mtk_mac_config call is being
+	 * invoked.
 	 */
 	for (i = 0; i < MTK_MAC_COUNT; i++)
-		mtk_w32(eth, 0, MTK_MAC_MCR(i));
+		mtk_w32(eth, MAC_MCR_FORCE_LINK_DOWN, MTK_MAC_MCR(i));
 
 	/* Indicates CDM to parse the MTK special tag from CPU
 	 * which also is working out for untag packets.
@@ -1992,7 +2385,7 @@
 	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2);
 	mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
 
 		/* setup the forward port to send frame to PDMA */
@@ -2034,7 +2427,7 @@
 	const char *mac_addr;
 
 	mac_addr = of_get_mac_address(mac->of_node);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(dev->dev_addr, mac_addr);
 
 	/* If the mac address is invalid, use random mac address  */
@@ -2044,7 +2437,7 @@
 			dev->dev_addr);
 	}
 
-	return mtk_phy_connect(dev);
+	return 0;
 }
 
 static void mtk_uninit(struct net_device *dev)
@@ -2052,20 +2445,20 @@
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 
-	phy_disconnect(dev->phydev);
-	if (of_phy_is_fixed_link(mac->of_node))
-		of_phy_deregister_fixed_link(mac->of_node);
+	phylink_disconnect_phy(mac->phylink);
 	mtk_tx_irq_disable(eth, ~0);
 	mtk_rx_irq_disable(eth, ~0);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct mtk_mac *mac = netdev_priv(dev);
+
 	switch (cmd) {
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		return phy_mii_ioctl(dev->phydev, ifr, cmd);
+		return phylink_mii_ioctl(mac->phylink, ifr, cmd);
 	default:
 		break;
 	}
@@ -2106,16 +2499,6 @@
 				     eth->dev->pins->default_state);
 	mtk_hw_init(eth);
 
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->mac[i] ||
-		    of_phy_is_fixed_link(eth->mac[i]->of_node))
-			continue;
-		err = phy_init_hw(eth->netdev[i]->phydev);
-		if (err)
-			dev_err(eth->dev, "%s: PHY init failed.\n",
-				eth->netdev[i]->name);
-	}
-
 	/* restart DMA and enable IRQs */
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!test_bit(i, &restart))
@@ -2178,9 +2561,7 @@
 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
 		return -EBUSY;
 
-	phy_ethtool_ksettings_get(ndev->phydev, cmd);
-
-	return 0;
+	return phylink_ethtool_ksettings_get(mac->phylink, cmd);
 }
 
 static int mtk_set_link_ksettings(struct net_device *ndev,
@@ -2191,7 +2572,7 @@
 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
 		return -EBUSY;
 
-	return phy_ethtool_ksettings_set(ndev->phydev, cmd);
+	return phylink_ethtool_ksettings_set(mac->phylink, cmd);
 }
 
 static void mtk_get_drvinfo(struct net_device *dev,
@@ -2225,22 +2606,10 @@
 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
 		return -EBUSY;
 
-	return genphy_restart_aneg(dev->phydev);
-}
+	if (!mac->phylink)
+		return -ENOTSUPP;
 
-static u32 mtk_get_link(struct net_device *dev)
-{
-	struct mtk_mac *mac = netdev_priv(dev);
-	int err;
-
-	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
-		return -EBUSY;
-
-	err = genphy_update_link(dev->phydev);
-	if (err)
-		return ethtool_op_get_link(dev);
-
-	return dev->phydev->link;
+	return phylink_ethtool_nway_reset(mac->phylink);
 }
 
 static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -2304,13 +2673,13 @@
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		if (dev->features & NETIF_F_LRO) {
+		if (dev->hw_features & NETIF_F_LRO) {
 			cmd->data = MTK_MAX_RX_RING_NUM;
 			ret = 0;
 		}
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
-		if (dev->features & NETIF_F_LRO) {
+		if (dev->hw_features & NETIF_F_LRO) {
 			struct mtk_mac *mac = netdev_priv(dev);
 
 			cmd->rule_cnt = mac->hwlro_ip_cnt;
@@ -2318,11 +2687,11 @@
 		}
 		break;
 	case ETHTOOL_GRXCLSRULE:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_get_fdir_entry(dev, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_get_fdir_all(dev, cmd,
 						     rule_locs);
 		break;
@@ -2339,11 +2708,11 @@
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_add_ipaddr(dev, cmd);
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_del_ipaddr(dev, cmd);
 		break;
 	default:
@@ -2360,7 +2729,7 @@
 	.get_msglevel		= mtk_get_msglevel,
 	.set_msglevel		= mtk_set_msglevel,
 	.nway_reset		= mtk_nway_reset,
-	.get_link		= mtk_get_link,
+	.get_link		= ethtool_op_get_link,
 	.get_strings		= mtk_get_strings,
 	.get_sset_count		= mtk_get_sset_count,
 	.get_ethtool_stats	= mtk_get_ethtool_stats,
@@ -2388,9 +2757,10 @@
 
 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 {
-	struct mtk_mac *mac;
 	const __be32 *_id = of_get_property(np, "reg", NULL);
-	int id, err;
+	struct phylink *phylink;
+	int phy_mode, id, err;
+	struct mtk_mac *mac;
 
 	if (!_id) {
 		dev_err(eth->dev, "missing mac id\n");
@@ -2434,18 +2804,44 @@
 	u64_stats_init(&mac->hw_stats->syncp);
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
+	/* phylink create */
+	phy_mode = of_get_phy_mode(np);
+	if (phy_mode < 0) {
+		dev_err(eth->dev, "incorrect phy-mode\n");
+		err = -EINVAL;
+		goto free_netdev;
+	}
+
+	/* mac config is not set */
+	mac->interface = PHY_INTERFACE_MODE_NA;
+	mac->mode = MLO_AN_PHY;
+	mac->speed = SPEED_UNKNOWN;
+
+	mac->phylink_config.dev = &eth->netdev[id]->dev;
+	mac->phylink_config.type = PHYLINK_NETDEV;
+
+	phylink = phylink_create(&mac->phylink_config,
+				 of_fwnode_handle(mac->of_node),
+				 phy_mode, &mtk_phylink_ops);
+	if (IS_ERR(phylink)) {
+		err = PTR_ERR(phylink);
+		goto free_netdev;
+	}
+
+	mac->phylink = phylink;
+
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
 	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 
-	eth->netdev[id]->hw_features = MTK_HW_FEATURES;
+	eth->netdev[id]->hw_features = eth->soc->hw_features;
 	if (eth->hwlro)
 		eth->netdev[id]->hw_features |= NETIF_F_LRO;
 
-	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
+	eth->netdev[id]->vlan_features = eth->soc->hw_features &
 		~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
-	eth->netdev[id]->features |= MTK_HW_FEATURES;
+	eth->netdev[id]->features |= eth->soc->hw_features;
 	eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
 
 	eth->netdev[id]->irq = eth->irq[0];
@@ -2460,11 +2856,9 @@
 
 static int mtk_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct device_node *mac_np;
 	struct mtk_eth *eth;
-	int err;
-	int i;
+	int err, i;
 
 	eth = devm_kzalloc(&pdev->dev, sizeof(*eth), GFP_KERNEL);
 	if (!eth)
@@ -2473,29 +2867,58 @@
 	eth->soc = of_device_get_match_data(&pdev->dev);
 
 	eth->dev = &pdev->dev;
-	eth->base = devm_ioremap_resource(&pdev->dev, res);
+	eth->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(eth->base))
 		return PTR_ERR(eth->base);
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		eth->tx_int_mask_reg = MTK_QDMA_INT_MASK;
+		eth->tx_int_status_reg = MTK_QDMA_INT_STATUS;
+	} else {
+		eth->tx_int_mask_reg = MTK_PDMA_INT_MASK;
+		eth->tx_int_status_reg = MTK_PDMA_INT_STATUS;
+	}
+
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		eth->rx_dma_l4_valid = RX_DMA_L4_VALID_PDMA;
+		eth->ip_align = NET_IP_ALIGN;
+	} else {
+		eth->rx_dma_l4_valid = RX_DMA_L4_VALID;
+	}
+
 	spin_lock_init(&eth->page_lock);
 	spin_lock_init(&eth->tx_irq_lock);
 	spin_lock_init(&eth->rx_irq_lock);
 
-	eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-						      "mediatek,ethsys");
-	if (IS_ERR(eth->ethsys)) {
-		dev_err(&pdev->dev, "no ethsys regmap found\n");
-		return PTR_ERR(eth->ethsys);
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							      "mediatek,ethsys");
+		if (IS_ERR(eth->ethsys)) {
+			dev_err(&pdev->dev, "no ethsys regmap found\n");
+			return PTR_ERR(eth->ethsys);
+		}
+	}
+
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_INFRA)) {
+		eth->infra = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							     "mediatek,infracfg");
+		if (IS_ERR(eth->infra)) {
+			dev_err(&pdev->dev, "no infracfg regmap found\n");
+			return PTR_ERR(eth->infra);
+		}
 	}
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
-		eth->sgmiisys =
-		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-						"mediatek,sgmiisys");
-		if (IS_ERR(eth->sgmiisys)) {
-			dev_err(&pdev->dev, "no sgmiisys regmap found\n");
-			return PTR_ERR(eth->sgmiisys);
-		}
+		eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii),
+					  GFP_KERNEL);
+		if (!eth->sgmii)
+			return -ENOMEM;
+
+		err = mtk_sgmii_init(eth->sgmii, pdev->dev.of_node,
+				     eth->soc->ana_rgc3);
+
+		if (err)
+			return err;
 	}
 
 	if (eth->soc->required_pctl) {
@@ -2508,7 +2931,10 @@
 	}
 
 	for (i = 0; i < 3; i++) {
-		eth->irq[i] = platform_get_irq(pdev, i);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0)
+			eth->irq[i] = eth->irq[0];
+		else
+			eth->irq[i] = platform_get_irq(pdev, i);
 		if (eth->irq[i] < 0) {
 			dev_err(&pdev->dev, "no IRQ%d resource found\n", i);
 			return -ENXIO;
@@ -2547,23 +2973,36 @@
 			continue;
 
 		err = mtk_add_mac(eth, mac_np);
-		if (err)
+		if (err) {
+			of_node_put(mac_np);
 			goto err_deinit_hw;
+		}
 	}
 
-	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
-			       dev_name(eth->dev), eth);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT)) {
+		err = devm_request_irq(eth->dev, eth->irq[0],
+				       mtk_handle_irq, 0,
+				       dev_name(eth->dev), eth);
+	} else {
+		err = devm_request_irq(eth->dev, eth->irq[1],
+				       mtk_handle_irq_tx, 0,
+				       dev_name(eth->dev), eth);
+		if (err)
+			goto err_free_dev;
+
+		err = devm_request_irq(eth->dev, eth->irq[2],
+				       mtk_handle_irq_rx, 0,
+				       dev_name(eth->dev), eth);
+	}
 	if (err)
 		goto err_free_dev;
 
-	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
-			       dev_name(eth->dev), eth);
-	if (err)
-		goto err_free_dev;
-
-	err = mtk_mdio_init(eth);
-	if (err)
-		goto err_free_dev;
+	/* No MT7628/88 support yet */
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		err = mtk_mdio_init(eth);
+		if (err)
+			goto err_free_dev;
+	}
 
 	for (i = 0; i < MTK_MAX_DEVS; i++) {
 		if (!eth->netdev[i])
@@ -2605,6 +3044,7 @@
 static int mtk_remove(struct platform_device *pdev)
 {
 	struct mtk_eth *eth = platform_get_drvdata(pdev);
+	struct mtk_mac *mac;
 	int i;
 
 	/* stop all devices to make sure that dma is properly shut down */
@@ -2612,6 +3052,8 @@
 		if (!eth->netdev[i])
 			continue;
 		mtk_stop(eth->netdev[i]);
+		mac = netdev_priv(eth->netdev[i]);
+		phylink_disconnect_phy(mac->phylink);
 	}
 
 	mtk_hw_deinit(eth);
@@ -2625,27 +3067,56 @@
 }
 
 static const struct mtk_soc_data mt2701_data = {
-	.caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
+	.caps = MT7623_CAPS | MTK_HWLRO,
+	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7623_CLKS_BITMAP,
 	.required_pctl = true,
 };
 
+static const struct mtk_soc_data mt7621_data = {
+	.caps = MT7621_CAPS,
+	.hw_features = MTK_HW_FEATURES,
+	.required_clks = MT7621_CLKS_BITMAP,
+	.required_pctl = false,
+};
+
 static const struct mtk_soc_data mt7622_data = {
-	.caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW | MTK_HWLRO,
+	.ana_rgc3 = 0x2028,
+	.caps = MT7622_CAPS | MTK_HWLRO,
+	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7622_CLKS_BITMAP,
 	.required_pctl = false,
 };
 
 static const struct mtk_soc_data mt7623_data = {
-	.caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
+	.caps = MT7623_CAPS | MTK_HWLRO,
+	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7623_CLKS_BITMAP,
 	.required_pctl = true,
 };
 
+static const struct mtk_soc_data mt7629_data = {
+	.ana_rgc3 = 0x128,
+	.caps = MT7629_CAPS | MTK_HWLRO,
+	.hw_features = MTK_HW_FEATURES,
+	.required_clks = MT7629_CLKS_BITMAP,
+	.required_pctl = false,
+};
+
+static const struct mtk_soc_data rt5350_data = {
+	.caps = MT7628_CAPS,
+	.hw_features = MTK_HW_FEATURES_MT7628,
+	.required_clks = MT7628_CLKS_BITMAP,
+	.required_pctl = false,
+};
+
 const struct of_device_id of_mtk_match[] = {
 	{ .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
+	{ .compatible = "mediatek,mt7621-eth", .data = &mt7621_data},
 	{ .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
 	{ .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
+	{ .compatible = "mediatek,mt7629-eth", .data = &mt7629_data},
+	{ .compatible = "ralink,rt5350-eth", .data = &rt5350_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, of_mtk_match);

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 4681929..76bd12c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h

@@ -1,11 +1,5 @@
-/*   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; version 2 of the License
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
  *
  *   Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
  *   Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
@@ -15,7 +9,12 @@
 #ifndef MTK_ETH_H
 #define MTK_ETH_H
 
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/of_net.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
+#include <linux/phylink.h>
 
 #define MTK_QDMA_PAGE_SIZE	2048
 #define	MTK_MAX_RX_LENGTH	1536
@@ -41,7 +40,8 @@
 				 NETIF_F_SG | NETIF_F_TSO | \
 				 NETIF_F_TSO6 | \
 				 NETIF_F_IPV6_CSUM)
-#define NEXT_RX_DESP_IDX(X, Y)	(((X) + 1) & ((Y) - 1))
+#define MTK_HW_FEATURES_MT7628	(NETIF_F_SG | NETIF_F_RXCSUM)
+#define NEXT_DESP_IDX(X, Y)	(((X) + 1) & ((Y) - 1))
 
 #define MTK_MAX_RX_RING_NUM	4
 #define MTK_HW_LRO_DMA_SIZE	8
@@ -120,6 +120,7 @@
 /* PDMA Global Configuration Register */
 #define MTK_PDMA_GLO_CFG	0xa04
 #define MTK_MULTI_EN		BIT(10)
+#define MTK_PDMA_SIZE_8DWORDS	(1 << 4)
 
 /* PDMA Reset Index Register */
 #define MTK_PDMA_RST_IDX	0xa08
@@ -214,7 +215,7 @@
 #define FC_THRES_MIN		0x4444
 
 /* QDMA Interrupt Status Register */
-#define MTK_QMTK_INT_STATUS	0x1A18
+#define MTK_QDMA_INT_STATUS	0x1A18
 #define MTK_RX_DONE_DLY		BIT(30)
 #define MTK_RX_DONE_INT3	BIT(19)
 #define MTK_RX_DONE_INT2	BIT(18)
@@ -278,11 +279,18 @@
 #define TX_DMA_OWNER_CPU	BIT(31)
 #define TX_DMA_LS0		BIT(30)
 #define TX_DMA_PLEN0(_x)	(((_x) & MTK_TX_DMA_BUF_LEN) << 16)
+#define TX_DMA_PLEN1(_x)	((_x) & MTK_TX_DMA_BUF_LEN)
 #define TX_DMA_SWC		BIT(14)
 #define TX_DMA_SDL(_x)		(((_x) & 0x3fff) << 16)
 
+/* PDMA on MT7628 */
+#define TX_DMA_DONE		BIT(31)
+#define TX_DMA_LS1		BIT(14)
+#define TX_DMA_DESP2_DEF	(TX_DMA_LS0 | TX_DMA_DONE)
+
 /* QDMA descriptor rxd2 */
 #define RX_DMA_DONE		BIT(31)
+#define RX_DMA_LSO		BIT(30)
 #define RX_DMA_PLEN0(_x)	(((_x) & 0x3fff) << 16)
 #define RX_DMA_GET_PLEN0(_x)	(((_x) >> 16) & 0x3fff)
 
@@ -291,6 +299,7 @@
 
 /* QDMA descriptor rxd4 */
 #define RX_DMA_L4_VALID		BIT(24)
+#define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
 #define RX_DMA_FPORT_SHIFT	19
 #define RX_DMA_FPORT_MASK	0x7
 
@@ -322,12 +331,19 @@
 #define MAC_MCR_SPEED_100	BIT(2)
 #define MAC_MCR_FORCE_DPX	BIT(1)
 #define MAC_MCR_FORCE_LINK	BIT(0)
-#define MAC_MCR_FIXED_LINK	(MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | \
-				 MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | \
-				 MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | \
-				 MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_RX_FC | \
-				 MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \
-				 MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK)
+#define MAC_MCR_FORCE_LINK_DOWN	(MAC_MCR_FORCE_MODE)
+
+/* Mac status registers */
+#define MTK_MAC_MSR(x)		(0x10108 + (x * 0x100))
+#define MAC_MSR_EEE1G		BIT(7)
+#define MAC_MSR_EEE100M		BIT(6)
+#define MAC_MSR_RX_FC		BIT(5)
+#define MAC_MSR_TX_FC		BIT(4)
+#define MAC_MSR_SPEED_1000	BIT(3)
+#define MAC_MSR_SPEED_100	BIT(2)
+#define MAC_MSR_SPEED_MASK	(MAC_MSR_SPEED_1000 | MAC_MSR_SPEED_100)
+#define MAC_MSR_DPX		BIT(1)
+#define MAC_MSR_LINK		BIT(0)
 
 /* TRGMII RXC control register */
 #define TRGMII_RCK_CTRL		0x10300
@@ -363,18 +379,29 @@
 #define ETHSYS_CHIPID4_7	0x4
 #define MT7623_ETH		7623
 #define MT7622_ETH		7622
+#define MT7621_ETH		7621
+
+/* ethernet system control register */
+#define ETHSYS_SYSCFG		0x10
+#define SYSCFG_DRAM_TYPE_DDR2	BIT(4)
 
 /* ethernet subsystem config register */
 #define ETHSYS_SYSCFG0		0x14
 #define SYSCFG0_GE_MASK		0x3
 #define SYSCFG0_GE_MODE(x, y)	(x << (12 + (y * 2)))
-#define SYSCFG0_SGMII_MASK	(3 << 8)
-#define SYSCFG0_SGMII_GMAC1	((2 << 8) & GENMASK(9, 8))
-#define SYSCFG0_SGMII_GMAC2	((3 << 8) & GENMASK(9, 8))
+#define SYSCFG0_SGMII_MASK     GENMASK(9, 8)
+#define SYSCFG0_SGMII_GMAC1    ((2 << 8) & SYSCFG0_SGMII_MASK)
+#define SYSCFG0_SGMII_GMAC2    ((3 << 8) & SYSCFG0_SGMII_MASK)
+#define SYSCFG0_SGMII_GMAC1_V2 BIT(9)
+#define SYSCFG0_SGMII_GMAC2_V2 BIT(8)
+
 
 /* ethernet subsystem clock register */
 #define ETHSYS_CLKCFG0		0x2c
 #define ETHSYS_TRGMII_CLK_SEL362_5	BIT(11)
+#define ETHSYS_TRGMII_MT7621_MASK	(BIT(5) | BIT(6))
+#define ETHSYS_TRGMII_MT7621_APLL	BIT(6)
+#define ETHSYS_TRGMII_MT7621_DDR_PLL	BIT(5)
 
 /* ethernet reset control register */
 #define ETHSYS_RSTCTRL		0x34
@@ -385,19 +412,61 @@
 /* Register to auto-negotiation restart */
 #define SGMSYS_PCS_CONTROL_1	0x0
 #define SGMII_AN_RESTART	BIT(9)
+#define SGMII_ISOLATE		BIT(10)
+#define SGMII_AN_ENABLE		BIT(12)
+#define SGMII_LINK_STATYS	BIT(18)
+#define SGMII_AN_ABILITY	BIT(19)
+#define SGMII_AN_COMPLETE	BIT(21)
+#define SGMII_PCS_FAULT		BIT(23)
+#define SGMII_AN_EXPANSION_CLR	BIT(30)
 
 /* Register to programmable link timer, the unit in 2 * 8ns */
 #define SGMSYS_PCS_LINK_TIMER	0x18
 #define SGMII_LINK_TIMER_DEFAULT	(0x186a0 & GENMASK(19, 0))
 
 /* Register to control remote fault */
-#define SGMSYS_SGMII_MODE	0x20
-#define SGMII_REMOTE_FAULT_DIS	BIT(8)
+#define SGMSYS_SGMII_MODE		0x20
+#define SGMII_IF_MODE_BIT0		BIT(0)
+#define SGMII_SPEED_DUPLEX_AN		BIT(1)
+#define SGMII_SPEED_10			0x0
+#define SGMII_SPEED_100			BIT(2)
+#define SGMII_SPEED_1000		BIT(3)
+#define SGMII_DUPLEX_FULL		BIT(4)
+#define SGMII_IF_MODE_BIT5		BIT(5)
+#define SGMII_REMOTE_FAULT_DIS		BIT(8)
+#define SGMII_CODE_SYNC_SET_VAL		BIT(9)
+#define SGMII_CODE_SYNC_SET_EN		BIT(10)
+#define SGMII_SEND_AN_ERROR_EN		BIT(11)
+#define SGMII_IF_MODE_MASK		GENMASK(5, 1)
+
+/* Register to set SGMII speed, ANA RG_ Control Signals III*/
+#define SGMSYS_ANA_RG_CS3	0x2028
+#define RG_PHY_SPEED_MASK	(BIT(2) | BIT(3))
+#define RG_PHY_SPEED_1_25G	0x0
+#define RG_PHY_SPEED_3_125G	BIT(2)
 
 /* Register to power up QPHY */
 #define SGMSYS_QPHY_PWR_STATE_CTRL 0xe8
 #define	SGMII_PHYA_PWD		BIT(4)
 
+/* Infrasys subsystem config registers */
+#define INFRA_MISC2            0x70c
+#define CO_QPHY_SEL            BIT(0)
+#define GEPHY_MAC_SEL          BIT(1)
+
+/* MT7628/88 specific stuff */
+#define MT7628_PDMA_OFFSET	0x0800
+#define MT7628_SDM_OFFSET	0x0c00
+
+#define MT7628_TX_BASE_PTR0	(MT7628_PDMA_OFFSET + 0x00)
+#define MT7628_TX_MAX_CNT0	(MT7628_PDMA_OFFSET + 0x04)
+#define MT7628_TX_CTX_IDX0	(MT7628_PDMA_OFFSET + 0x08)
+#define MT7628_TX_DTX_IDX0	(MT7628_PDMA_OFFSET + 0x0c)
+#define MT7628_PST_DTX_IDX0	BIT(0)
+
+#define MT7628_SDM_MAC_ADRL	(MT7628_SDM_OFFSET + 0x0c)
+#define MT7628_SDM_MAC_ADRH	(MT7628_SDM_OFFSET + 0x10)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
@@ -462,15 +531,21 @@
  */
 enum mtk_clks_map {
 	MTK_CLK_ETHIF,
+	MTK_CLK_SGMIITOP,
 	MTK_CLK_ESW,
 	MTK_CLK_GP0,
 	MTK_CLK_GP1,
 	MTK_CLK_GP2,
+	MTK_CLK_FE,
 	MTK_CLK_TRGPLL,
 	MTK_CLK_SGMII_TX_250M,
 	MTK_CLK_SGMII_RX_250M,
 	MTK_CLK_SGMII_CDR_REF,
 	MTK_CLK_SGMII_CDR_FB,
+	MTK_CLK_SGMII2_TX_250M,
+	MTK_CLK_SGMII2_RX_250M,
+	MTK_CLK_SGMII2_CDR_REF,
+	MTK_CLK_SGMII2_CDR_FB,
 	MTK_CLK_SGMII_CK,
 	MTK_CLK_ETH2PLL,
 	MTK_CLK_MAX
@@ -488,6 +563,22 @@
 				 BIT(MTK_CLK_SGMII_CDR_FB) | \
 				 BIT(MTK_CLK_SGMII_CK) | \
 				 BIT(MTK_CLK_ETH2PLL))
+#define MT7621_CLKS_BITMAP	(0)
+#define MT7628_CLKS_BITMAP	(0)
+#define MT7629_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
+				 BIT(MTK_CLK_GP2) | BIT(MTK_CLK_FE) | \
+				 BIT(MTK_CLK_SGMII_TX_250M) | \
+				 BIT(MTK_CLK_SGMII_RX_250M) | \
+				 BIT(MTK_CLK_SGMII_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII2_TX_250M) | \
+				 BIT(MTK_CLK_SGMII2_RX_250M) | \
+				 BIT(MTK_CLK_SGMII2_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII2_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII_CK) | \
+				 BIT(MTK_CLK_ETH2PLL) | BIT(MTK_CLK_SGMIITOP))
+
 enum mtk_dev_state {
 	MTK_HW_INIT,
 	MTK_RESETTING
@@ -528,6 +619,10 @@
 	struct mtk_tx_dma *last_free;
 	u16 thresh;
 	atomic_t free_count;
+	int dma_size;
+	struct mtk_tx_dma *dma_pdma;	/* For MT7628/88 PDMA handling */
+	dma_addr_t phys_pdma;
+	int cpu_idx;
 };
 
 /* PDMA rx ring mode */
@@ -557,35 +652,166 @@
 	u32 crx_idx_reg;
 };
 
-#define MTK_TRGMII			BIT(0)
-#define MTK_GMAC1_TRGMII		(BIT(1) | MTK_TRGMII)
-#define MTK_ESW				BIT(4)
-#define MTK_GMAC1_ESW			(BIT(5) | MTK_ESW)
-#define MTK_SGMII			BIT(8)
-#define MTK_GMAC1_SGMII			(BIT(9) | MTK_SGMII)
-#define MTK_GMAC2_SGMII			(BIT(10) | MTK_SGMII)
-#define MTK_DUAL_GMAC_SHARED_SGMII	(BIT(11) | MTK_GMAC1_SGMII | \
-					 MTK_GMAC2_SGMII)
-#define MTK_HWLRO			BIT(12)
+enum mkt_eth_capabilities {
+	MTK_RGMII_BIT = 0,
+	MTK_TRGMII_BIT,
+	MTK_SGMII_BIT,
+	MTK_ESW_BIT,
+	MTK_GEPHY_BIT,
+	MTK_MUX_BIT,
+	MTK_INFRA_BIT,
+	MTK_SHARED_SGMII_BIT,
+	MTK_HWLRO_BIT,
+	MTK_SHARED_INT_BIT,
+	MTK_TRGMII_MT7621_CLK_BIT,
+	MTK_QDMA_BIT,
+	MTK_SOC_MT7628_BIT,
+
+	/* MUX BITS*/
+	MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT,
+	MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT,
+	MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT,
+	MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT,
+	MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT,
+
+	/* PATH BITS */
+	MTK_ETH_PATH_GMAC1_RGMII_BIT,
+	MTK_ETH_PATH_GMAC1_TRGMII_BIT,
+	MTK_ETH_PATH_GMAC1_SGMII_BIT,
+	MTK_ETH_PATH_GMAC2_RGMII_BIT,
+	MTK_ETH_PATH_GMAC2_SGMII_BIT,
+	MTK_ETH_PATH_GMAC2_GEPHY_BIT,
+	MTK_ETH_PATH_GDM1_ESW_BIT,
+};
+
+/* Supported hardware group on SoCs */
+#define MTK_RGMII		BIT(MTK_RGMII_BIT)
+#define MTK_TRGMII		BIT(MTK_TRGMII_BIT)
+#define MTK_SGMII		BIT(MTK_SGMII_BIT)
+#define MTK_ESW			BIT(MTK_ESW_BIT)
+#define MTK_GEPHY		BIT(MTK_GEPHY_BIT)
+#define MTK_MUX			BIT(MTK_MUX_BIT)
+#define MTK_INFRA		BIT(MTK_INFRA_BIT)
+#define MTK_SHARED_SGMII	BIT(MTK_SHARED_SGMII_BIT)
+#define MTK_HWLRO		BIT(MTK_HWLRO_BIT)
+#define MTK_SHARED_INT		BIT(MTK_SHARED_INT_BIT)
+#define MTK_TRGMII_MT7621_CLK	BIT(MTK_TRGMII_MT7621_CLK_BIT)
+#define MTK_QDMA		BIT(MTK_QDMA_BIT)
+#define MTK_SOC_MT7628		BIT(MTK_SOC_MT7628_BIT)
+
+#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW		\
+	BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
+#define MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY	\
+	BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT)
+#define MTK_ETH_MUX_U3_GMAC2_TO_QPHY		\
+	BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT)
+#define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII	\
+	BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT)
+#define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII	\
+	BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT)
+
+/* Supported path present on SoCs */
+#define MTK_ETH_PATH_GMAC1_RGMII	BIT(MTK_ETH_PATH_GMAC1_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_TRGMII	BIT(MTK_ETH_PATH_GMAC1_TRGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_SGMII	BIT(MTK_ETH_PATH_GMAC1_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_RGMII	BIT(MTK_ETH_PATH_GMAC2_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_SGMII	BIT(MTK_ETH_PATH_GMAC2_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_GEPHY	BIT(MTK_ETH_PATH_GMAC2_GEPHY_BIT)
+#define MTK_ETH_PATH_GDM1_ESW		BIT(MTK_ETH_PATH_GDM1_ESW_BIT)
+
+#define MTK_GMAC1_RGMII		(MTK_ETH_PATH_GMAC1_RGMII | MTK_RGMII)
+#define MTK_GMAC1_TRGMII	(MTK_ETH_PATH_GMAC1_TRGMII | MTK_TRGMII)
+#define MTK_GMAC1_SGMII		(MTK_ETH_PATH_GMAC1_SGMII | MTK_SGMII)
+#define MTK_GMAC2_RGMII		(MTK_ETH_PATH_GMAC2_RGMII | MTK_RGMII)
+#define MTK_GMAC2_SGMII		(MTK_ETH_PATH_GMAC2_SGMII | MTK_SGMII)
+#define MTK_GMAC2_GEPHY		(MTK_ETH_PATH_GMAC2_GEPHY | MTK_GEPHY)
+#define MTK_GDM1_ESW		(MTK_ETH_PATH_GDM1_ESW | MTK_ESW)
+
+/* MUXes present on SoCs */
+/* 0: GDM1 -> GMAC1, 1: GDM1 -> ESW */
+#define MTK_MUX_GDM1_TO_GMAC1_ESW (MTK_ETH_MUX_GDM1_TO_GMAC1_ESW | MTK_MUX)
+
+/* 0: GMAC2 -> GEPHY, 1: GMAC0 -> GePHY */
+#define MTK_MUX_GMAC2_GMAC0_TO_GEPHY    \
+	(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY | MTK_MUX | MTK_INFRA)
+
+/* 0: U3 -> QPHY, 1: GMAC2 -> QPHY */
+#define MTK_MUX_U3_GMAC2_TO_QPHY        \
+	(MTK_ETH_MUX_U3_GMAC2_TO_QPHY | MTK_MUX | MTK_INFRA)
+
+/* 2: GMAC1 -> SGMII, 3: GMAC2 -> SGMII */
+#define MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII      \
+	(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_MUX | \
+	MTK_SHARED_SGMII)
+
+/* 0: GMACx -> GEPHY, 1: GMACx -> SGMII where x is 1 or 2 */
+#define MTK_MUX_GMAC12_TO_GEPHY_SGMII   \
+	(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX)
+
 #define MTK_HAS_CAPS(caps, _x)		(((caps) & (_x)) == (_x))
 
+#define MT7621_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \
+		      MTK_GMAC2_RGMII | MTK_SHARED_INT | \
+		      MTK_TRGMII_MT7621_CLK | MTK_QDMA)
+
+#define MT7622_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_SGMII | MTK_GMAC2_RGMII | \
+		      MTK_GMAC2_SGMII | MTK_GDM1_ESW | \
+		      MTK_MUX_GDM1_TO_GMAC1_ESW | \
+		      MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_QDMA)
+
+#define MT7623_CAPS  (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | MTK_GMAC2_RGMII | \
+		      MTK_QDMA)
+
+#define MT7628_CAPS  (MTK_SHARED_INT | MTK_SOC_MT7628)
+
+#define MT7629_CAPS  (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | MTK_GMAC2_GEPHY | \
+		      MTK_GDM1_ESW | MTK_MUX_GDM1_TO_GMAC1_ESW | \
+		      MTK_MUX_GMAC2_GMAC0_TO_GEPHY | \
+		      MTK_MUX_U3_GMAC2_TO_QPHY | \
+		      MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA)
+
 /* struct mtk_eth_data -	This is the structure holding all differences
  *				among various plaforms
+ * @ana_rgc3:                   The offset for register ANA_RGC3 related to
+ *				sgmiisys syscon
  * @caps			Flags shown the extra capability for the SoC
+ * @hw_features			Flags shown HW features
  * @required_clks		Flags shown the bitmap for required clocks on
  *				the target SoC
  * @required_pctl		A bool value to show whether the SoC requires
  *				the extra setup for those pins used by GMAC.
  */
 struct mtk_soc_data {
+	u32             ana_rgc3;
 	u32		caps;
 	u32		required_clks;
 	bool		required_pctl;
+	netdev_features_t hw_features;
 };
 
 /* currently no SoC has more than 2 macs */
 #define MTK_MAX_DEVS			2
 
+#define MTK_SGMII_PHYSPEED_AN          BIT(31)
+#define MTK_SGMII_PHYSPEED_MASK        GENMASK(2, 0)
+#define MTK_SGMII_PHYSPEED_1000        BIT(0)
+#define MTK_SGMII_PHYSPEED_2500        BIT(1)
+#define MTK_HAS_FLAGS(flags, _x)       (((flags) & (_x)) == (_x))
+
+/* struct mtk_sgmii -  This is the structure holding sgmii regmap and its
+ *                     characteristics
+ * @regmap:            The register map pointing at the range used to setup
+ *                     SGMII modes
+ * @flags:             The enum refers to which mode the sgmii wants to run on
+ * @ana_rgc3:          The offset refers to register ANA_RGC3 related to regmap
+ */
+
+struct mtk_sgmii {
+	struct regmap   *regmap[MTK_MAX_DEVS];
+	u32             flags[MTK_MAX_DEVS];
+	u32             ana_rgc3;
+};
+
 /* struct mtk_eth -	This is the main datasructure for holding the state
  *			of the driver
  * @dev:		The device pointer
@@ -601,8 +827,8 @@
  * @msg_enable:		Ethtool msg level
  * @ethsys:		The register map pointing at the range used to setup
  *			MII modes
- * @sgmiisys:		The register map pointing at the range used to setup
- *			SGMII modes
+ * @infra:              The register map pointing at the range used to setup
+ *                      SGMII and GePHY path
  * @pctl:		The register map pointing at the range used to setup
  *			GMAC port drive/slew values
  * @dma_refcnt:		track how many netdevs are using the DMA engine
@@ -634,7 +860,8 @@
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
-	struct regmap			*sgmiisys;
+	struct regmap                   *infra;
+	struct mtk_sgmii                *sgmii;
 	struct regmap			*pctl;
 	bool				hwlro;
 	refcount_t			dma_refcnt;
@@ -653,27 +880,33 @@
 	unsigned long			state;
 
 	const struct mtk_soc_data	*soc;
+
+	u32				tx_int_mask_reg;
+	u32				tx_int_status_reg;
+	u32				rx_dma_l4_valid;
+	int				ip_align;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
  *			SoC
  * @id:			The number of the MAC
- * @ge_mode:            Interface mode kept for setup restoring
+ * @interface:		Interface mode kept for detecting change in hw settings
  * @of_node:		Our devicetree node
  * @hw:			Backpointer to our main datastruture
  * @hw_stats:		Packet statistics counter
- * @trgmii		Indicate if the MAC uses TRGMII connected to internal
-			switch
  */
 struct mtk_mac {
 	int				id;
-	int				ge_mode;
+	phy_interface_t			interface;
+	unsigned int			mode;
+	int				speed;
 	struct device_node		*of_node;
+	struct phylink			*phylink;
+	struct phylink_config		phylink_config;
 	struct mtk_eth			*hw;
 	struct mtk_hw_stats		*hw_stats;
 	__be32				hwlro_ip[MTK_MAX_LRO_IP_CNT];
 	int				hwlro_ip_cnt;
-	bool				trgmii;
 };
 
 /* the struct describing the SoC. these are declared in the soc_xyz.c files */
@@ -685,4 +918,15 @@
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
 u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
 
+int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *np,
+		   u32 ana_rgc3);
+int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id);
+int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id,
+			       const struct phylink_link_state *state);
+void mtk_sgmii_restart_an(struct mtk_eth *eth, int mac_id);
+
+int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id);
+int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
+int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
+
 #endif /* MTK_ETH_H */

diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
new file mode 100644
index 0000000..4db27df
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c

@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-2019 MediaTek Inc.
+
+/* A library for MediaTek SGMII circuit
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+
+#include "mtk_eth_soc.h"
+
+int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
+{
+	struct device_node *np;
+	int i;
+
+	ss->ana_rgc3 = ana_rgc3;
+
+	for (i = 0; i < MTK_MAX_DEVS; i++) {
+		np = of_parse_phandle(r, "mediatek,sgmiisys", i);
+		if (!np)
+			break;
+
+		ss->regmap[i] = syscon_node_to_regmap(np);
+		if (IS_ERR(ss->regmap[i]))
+			return PTR_ERR(ss->regmap[i]);
+	}
+
+	return 0;
+}
+
+int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id)
+{
+	unsigned int val;
+
+	if (!ss->regmap[id])
+		return -EINVAL;
+
+	/* Setup the link timer and QPHY power up inside SGMIISYS */
+	regmap_write(ss->regmap[id], SGMSYS_PCS_LINK_TIMER,
+		     SGMII_LINK_TIMER_DEFAULT);
+
+	regmap_read(ss->regmap[id], SGMSYS_SGMII_MODE, &val);
+	val |= SGMII_REMOTE_FAULT_DIS;
+	regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val);
+
+	regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val);
+	val |= SGMII_AN_RESTART;
+	regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val);
+
+	regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	return 0;
+}
+
+int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id,
+			       const struct phylink_link_state *state)
+{
+	unsigned int val;
+
+	if (!ss->regmap[id])
+		return -EINVAL;
+
+	regmap_read(ss->regmap[id], ss->ana_rgc3, &val);
+	val &= ~RG_PHY_SPEED_MASK;
+	if (state->interface == PHY_INTERFACE_MODE_2500BASEX)
+		val |= RG_PHY_SPEED_3_125G;
+	regmap_write(ss->regmap[id], ss->ana_rgc3, val);
+
+	/* Disable SGMII AN */
+	regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val);
+	val &= ~SGMII_AN_ENABLE;
+	regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val);
+
+	/* SGMII force mode setting */
+	regmap_read(ss->regmap[id], SGMSYS_SGMII_MODE, &val);
+	val &= ~SGMII_IF_MODE_MASK;
+
+	switch (state->speed) {
+	case SPEED_10:
+		val |= SGMII_SPEED_10;
+		break;
+	case SPEED_100:
+		val |= SGMII_SPEED_100;
+		break;
+	case SPEED_2500:
+	case SPEED_1000:
+		val |= SGMII_SPEED_1000;
+		break;
+	};
+
+	if (state->duplex == DUPLEX_FULL)
+		val |= SGMII_DUPLEX_FULL;
+
+	regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val);
+
+	/* Release PHYA power down state */
+	regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	return 0;
+}
+
+void mtk_sgmii_restart_an(struct mtk_eth *eth, int mac_id)
+{
+	struct mtk_sgmii *ss = eth->sgmii;
+	unsigned int val, sid;
+
+	/* Decide how GMAC and SGMIISYS be mapped */
+	sid = (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_SGMII)) ?
+	       0 : mac_id;
+
+	if (!ss->regmap[sid])
+		return;
+
+	regmap_read(ss->regmap[sid], SGMSYS_PCS_CONTROL_1, &val);
+	val |= SGMII_AN_RESTART;
+	regmap_write(ss->regmap[sid], SGMSYS_PCS_CONTROL_1, val);
+}

diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
index 872548c..23cf791 100644
--- a/drivers/net/ethernet/mellanox/Kconfig
+++ b/drivers/net/ethernet/mellanox/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Mellanox driver configuration
 #

diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile
index 016aa26..79773ac 100644
--- a/drivers/net/ethernet/mellanox/Makefile
+++ b/drivers/net/ethernet/mellanox/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Mellanox device drivers.
 #

diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index f200b8c..e69c3c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig

@@ -1,10 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Mellanox driver configuration
 #
 
 config MLX4_EN
 	tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
-	depends on MAY_USE_DEVLINK
 	depends on PCI && NETDEVICES && ETHERNET && INET
 	select MLX4_CORE
 	imply PTP_1588_CLOCK
@@ -27,6 +27,7 @@
 config MLX4_CORE
 	tristate
 	depends on PCI
+	select NET_DEVLINK
 	default n
 
 config MLX4_DEBUG

diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 21788d4..b330020 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c

@@ -185,8 +185,7 @@
 	bitmap->avail = num - reserved_top - reserved_bot;
 	bitmap->effective_len = bitmap->avail;
 	spin_lock_init(&bitmap->lock);
-	bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long),
-				GFP_KERNEL);
+	bitmap->table = bitmap_zalloc(bitmap->max, GFP_KERNEL);
 	if (!bitmap->table)
 		return -ENOMEM;
 
@@ -197,7 +196,7 @@
 
 void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
 {
-	kfree(bitmap->table);
+	bitmap_free(bitmap->table);
 }
 
 struct mlx4_zone_allocator {
@@ -584,8 +583,8 @@
 	buf->npages       = 1;
 	buf->page_shift   = get_order(size) + PAGE_SHIFT;
 	buf->direct.buf   =
-		dma_zalloc_coherent(&dev->persist->pdev->dev,
-				    size, &t, GFP_KERNEL);
+		dma_alloc_coherent(&dev->persist->pdev->dev, size, &t,
+				   GFP_KERNEL);
 	if (!buf->direct.buf)
 		return -ENOMEM;
 
@@ -614,7 +613,7 @@
 		int i;
 
 		buf->direct.buf = NULL;
-		buf->nbufs	= (size + PAGE_SIZE - 1) / PAGE_SIZE;
+		buf->nbufs      = DIV_ROUND_UP(size, PAGE_SIZE);
 		buf->npages	= buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
@@ -624,8 +623,8 @@
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
-				dma_zalloc_coherent(&dev->persist->pdev->dev,
-						    PAGE_SIZE, &t, GFP_KERNEL);
+				dma_alloc_coherent(&dev->persist->pdev->dev,
+						   PAGE_SIZE, &t, GFP_KERNEL);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index c81d15b..5b11557 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c

@@ -129,10 +129,6 @@
 	comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
 	__raw_writel((__force u32)cpu_to_be32(comm_flags),
 		     (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
-	/* Make sure that our comm channel write doesn't
-	 * get mixed in with writes from another CPU.
-	 */
-	mmiowb();
 
 	end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
 	while (time_before(jiffies, end)) {
@@ -214,7 +210,7 @@
 	mutex_lock(&persist->interface_state_mutex);
 	if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
 	    !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
-		err = mlx4_restart_one(persist->pdev, false, NULL);
+		err = mlx4_restart_one(persist->pdev);
 		mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
 			  err);
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e65bc3c..c678344 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c

@@ -281,7 +281,6 @@
 	val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
 	__raw_writel((__force u32) cpu_to_be32(val),
 		     &priv->mfunc.comm->slave_write);
-	mmiowb();
 	mutex_unlock(&dev->persist->device_state_mutex);
 	return 0;
 }
@@ -496,12 +495,6 @@
 					       (op_modifier << HCR_OPMOD_SHIFT) |
 					       op), hcr + 6);
 
-	/*
-	 * Make sure that our HCR writes don't get mixed in with
-	 * writes from another CPU starting a FW command.
-	 */
-	mmiowb();
-
 	cmd->toggle = cmd->toggle ^ 1;
 
 	ret = 0;
@@ -2206,7 +2199,6 @@
 	}
 	__raw_writel((__force u32) cpu_to_be32(reply),
 		     &priv->mfunc.comm[slave].slave_read);
-	mmiowb();
 
 	return;
 
@@ -2410,7 +2402,6 @@
 				     &priv->mfunc.comm[i].slave_write);
 			__raw_writel((__force u32) 0,
 				     &priv->mfunc.comm[i].slave_read);
-			mmiowb();
 			for (port = 1; port <= MLX4_MAX_PORTS; port++) {
 				struct mlx4_vport_state *admin_vport;
 				struct mlx4_vport_state *oper_vport;
@@ -2576,10 +2567,6 @@
 		slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
 		__raw_writel((__force u32)cpu_to_be32(slave_read),
 			     &priv->mfunc.comm[slave].slave_read);
-		/* Make sure that our comm channel write doesn't
-		 * get mixed in with writes from another CPU.
-		 */
-		mmiowb();
 	}
 }
 
@@ -2645,6 +2632,8 @@
 	if (!priv->cmd.context)
 		return -ENOMEM;
 
+	if (mlx4_is_mfunc(dev))
+		mutex_lock(&priv->cmd.slave_cmd_mutex);
 	down_write(&priv->cmd.switch_sem);
 	for (i = 0; i < priv->cmd.max_cmds; ++i) {
 		priv->cmd.context[i].token = i;
@@ -2670,6 +2659,8 @@
 	down(&priv->cmd.poll_sem);
 	priv->cmd.use_events = 1;
 	up_write(&priv->cmd.switch_sem);
+	if (mlx4_is_mfunc(dev))
+		mutex_unlock(&priv->cmd.slave_cmd_mutex);
 
 	return err;
 }
@@ -2682,6 +2673,8 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i;
 
+	if (mlx4_is_mfunc(dev))
+		mutex_lock(&priv->cmd.slave_cmd_mutex);
 	down_write(&priv->cmd.switch_sem);
 	priv->cmd.use_events = 0;
 
@@ -2689,9 +2682,12 @@
 		down(&priv->cmd.event_sem);
 
 	kfree(priv->cmd.context);
+	priv->cmd.context = NULL;
 
 	up(&priv->cmd.poll_sem);
 	up_write(&priv->cmd.switch_sem);
+	if (mlx4_is_mfunc(dev))
+		mutex_unlock(&priv->cmd.slave_cmd_mutex);
 }
 
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
@@ -3274,7 +3270,7 @@
 		mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n",
 			  link_state, slave, port);
 		return -EINVAL;
-	};
+	}
 	s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
 	s_info->link_state = link_state;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index d8e9a32..65f8a4b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c

@@ -144,9 +144,9 @@
 }
 
 static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
-			 int cq_num)
+			 int cq_num, u8 opmod)
 {
-	return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
+	return mlx4_cmd(dev, mailbox->dma, cq_num, opmod,
 			MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
 			MLX4_CMD_WRAPPED);
 }
@@ -287,11 +287,63 @@
 		__mlx4_cq_free_icm(dev, cqn);
 }
 
+static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size)
+{
+	int entries_per_copy = PAGE_SIZE / cqe_size;
+	void *init_ents;
+	int err = 0;
+	int i;
+
+	init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!init_ents)
+		return -ENOMEM;
+
+	/* Populate a list of CQ entries to reduce the number of
+	 * copy_to_user calls. 0xcc is the initialization value
+	 * required by the FW.
+	 */
+	memset(init_ents, 0xcc, PAGE_SIZE);
+
+	if (entries_per_copy < entries) {
+		for (i = 0; i < entries / entries_per_copy; i++) {
+			err = copy_to_user((void __user *)buf, init_ents, PAGE_SIZE) ?
+				-EFAULT : 0;
+			if (err)
+				goto out;
+
+			buf += PAGE_SIZE;
+		}
+	} else {
+		err = copy_to_user((void __user *)buf, init_ents, entries * cqe_size) ?
+			-EFAULT : 0;
+	}
+
+out:
+	kfree(init_ents);
+
+	return err;
+}
+
+static void mlx4_init_kernel_cqes(struct mlx4_buf *buf,
+				  int entries,
+				  int cqe_size)
+{
+	int i;
+
+	if (buf->nbufs == 1)
+		memset(buf->direct.buf, 0xcc, entries * cqe_size);
+	else
+		for (i = 0; i < buf->npages; i++)
+			memset(buf->page_list[i].buf, 0xcc,
+			       1UL << buf->page_shift);
+}
+
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 		  struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
 		  struct mlx4_cq *cq, unsigned vector, int collapsed,
-		  int timestamp_en)
+		  int timestamp_en, void *buf_addr, bool user_cq)
 {
+	bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
 	struct mlx4_cmd_mailbox *mailbox;
@@ -336,7 +388,20 @@
 	cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
 	cq_context->db_rec_addr     = cpu_to_be64(db_rec);
 
-	err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
+	if (sw_cq_init) {
+		if (user_cq) {
+			err = mlx4_init_user_cqes(buf_addr, nent,
+						  dev->caps.cqe_size);
+			if (err)
+				sw_cq_init = false;
+		} else {
+			mlx4_init_kernel_cqes(buf_addr, nent,
+					      dev->caps.cqe_size);
+		}
+	}
+
+	err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init);
+
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	if (err)
 		goto err_radix;

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index 88316c7..eaf08f7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c

@@ -99,8 +99,7 @@
 					readl(cr_space + offset);
 
 		err = devlink_region_snapshot_create(crdump->region_crspace,
-						     cr_res_size, crspace_data,
-						     id, &kvfree);
+						     crspace_data, id, &kvfree);
 		if (err) {
 			kvfree(crspace_data);
 			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
@@ -139,9 +138,7 @@
 					readl(health_buf_start + offset);
 
 		err = devlink_region_snapshot_create(crdump->region_fw_health,
-						     HEALTH_BUFFER_SIZE,
-						     health_data,
-						     id, &kvfree);
+						     health_data, id, &kvfree);
 		if (err) {
 			kvfree(health_data);
 			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 1e487ac..74d4667 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c

@@ -54,11 +54,8 @@
 
 	cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
 	if (!cq) {
-		cq = kzalloc(sizeof(*cq), GFP_KERNEL);
-		if (!cq) {
-			en_err(priv, "Failed to allocate CQ structure\n");
-			return -ENOMEM;
-		}
+		en_err(priv, "Failed to allocate CQ structure\n");
+		return -ENOMEM;
 	}
 
 	cq->size = entries;
@@ -143,7 +140,7 @@
 	cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
 			    &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
-			    cq->vector, 0, timestamp_en);
+			    cq->vector, 0, timestamp_en, &cq->wqres.buf, false);
 	if (err)
 		goto free_eq;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index d290f07..a1202e5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c

@@ -639,7 +639,7 @@
 #define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...)		\
 	({								\
 		struct ptys2ethtool_config *cfg;			\
-		const unsigned int modes[] = { __VA_ARGS__ };		\
+		static const unsigned int modes[] = { __VA_ARGS__ };	\
 		unsigned int i;						\
 		cfg = &ptys2ethtool_map[reg_];				\
 		cfg->speed = speed_;					\
@@ -1745,6 +1745,7 @@
 		err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
+		cmd->data = MAX_NUM_OF_FS_RULES;
 		while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
 			err = mlx4_en_get_flow(dev, cmd, i);
 			if (!err)
@@ -1811,6 +1812,7 @@
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_port_profile new_prof;
 	struct mlx4_en_priv *tmp;
+	int total_tx_count;
 	int port_up = 0;
 	int xdp_count;
 	int err = 0;
@@ -1825,13 +1827,12 @@
 
 	mutex_lock(&mdev->state_lock);
 	xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
-	if (channel->tx_count * priv->prof->num_up + xdp_count >
-	    priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) {
+	total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count;
+	if (total_tx_count > MAX_TX_RINGS) {
 		err = -EINVAL;
 		en_err(priv,
 		       "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
-		       channel->tx_count * priv->prof->num_up  + xdp_count,
-		       MAX_TX_RINGS);
+		       total_tx_count, MAX_TX_RINGS);
 		goto out;
 	}
 
@@ -2010,6 +2011,8 @@
 	return ret;
 }
 
+#define MLX4_EEPROM_PAGE_LEN 256
+
 static int mlx4_en_get_module_info(struct net_device *dev,
 				   struct ethtool_modinfo *modinfo)
 {
@@ -2044,7 +2047,7 @@
 		break;
 	case MLX4_MODULE_ID_SFP:
 		modinfo->type = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN;
 		break;
 	default:
 		return -EINVAL;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index d25e16d..109472d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c

@@ -167,8 +167,13 @@
 		params->prof[i].rx_ppp = pfcrx;
 		params->prof[i].tx_pause = !(pfcrx || pfctx);
 		params->prof[i].tx_ppp = pfctx;
-		params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
-		params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+		if (mlx4_low_memory_profile()) {
+			params->prof[i].tx_ring_size = MLX4_EN_MIN_TX_SIZE;
+			params->prof[i].rx_ring_size = MLX4_EN_MIN_RX_SIZE;
+		} else {
+			params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
+			params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+		}
 		params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
 		params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up;
 		params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up *

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0d7fd3f..70fd246 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -39,7 +39,6 @@
 #include <linux/slab.h>
 #include <linux/hash.h>
 #include <net/ip.h>
-#include <net/busy_poll.h>
 #include <net/vxlan.h>
 #include <net/devlink.h>
 
@@ -92,6 +91,7 @@
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_port_profile new_prof;
 	struct mlx4_en_priv *tmp;
+	int total_count;
 	int port_up = 0;
 	int err = 0;
 
@@ -105,6 +105,14 @@
 				      MLX4_EN_NUM_UP_HIGH;
 	new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
 				   new_prof.num_up;
+	total_count = new_prof.tx_ring_num[TX] + new_prof.tx_ring_num[TX_XDP];
+	if (total_count > MAX_TX_RINGS) {
+		err = -EINVAL;
+		en_err(priv,
+		       "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+		       total_count, MAX_TX_RINGS);
+		goto out;
+	}
 	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
 	if (err)
 		goto out;
@@ -2646,14 +2654,6 @@
 		en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret);
 		return;
 	}
-
-	/* set offloads */
-	priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-				      NETIF_F_RXCSUM |
-				      NETIF_F_TSO | NETIF_F_TSO6 |
-				      NETIF_F_GSO_UDP_TUNNEL |
-				      NETIF_F_GSO_UDP_TUNNEL_CSUM |
-				      NETIF_F_GSO_PARTIAL;
 }
 
 static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@ -2661,14 +2661,6 @@
 	int ret;
 	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
 						 vxlan_del_task);
-	/* unset offloads */
-	priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-					NETIF_F_RXCSUM |
-					NETIF_F_TSO | NETIF_F_TSO6 |
-					NETIF_F_GSO_UDP_TUNNEL |
-					NETIF_F_GSO_UDP_TUNNEL_CSUM |
-					NETIF_F_GSO_PARTIAL);
-
 	ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
 				  VXLAN_STEER_BY_OUTER_MAC, 0);
 	if (ret)
@@ -3361,7 +3353,7 @@
 	dev->addr_len = ETH_ALEN;
 	mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
 	if (!is_valid_ether_addr(dev->dev_addr)) {
-		en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n",
+		en_err(priv, "Port: %d, invalid mac burned: %pM, quitting\n",
 		       priv->port, dev->dev_addr);
 		err = -EINVAL;
 		goto out;
@@ -3416,6 +3408,23 @@
 	if (mdev->LSO_support)
 		dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
 
+	if (mdev->dev->caps.tunnel_offload_mode ==
+	    MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+		dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+				    NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				    NETIF_F_GSO_PARTIAL;
+		dev->features    |= NETIF_F_GSO_UDP_TUNNEL |
+				    NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				    NETIF_F_GSO_PARTIAL;
+		dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+				       NETIF_F_RXCSUM |
+				       NETIF_F_TSO | NETIF_F_TSO6 |
+				       NETIF_F_GSO_UDP_TUNNEL |
+				       NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				       NETIF_F_GSO_PARTIAL;
+	}
+
 	dev->vlan_features = dev->hw_features;
 
 	dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH;
@@ -3484,16 +3493,6 @@
 		priv->rss_hash_fn = ETH_RSS_HASH_TOP;
 	}
 
-	if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
-		dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
-				    NETIF_F_GSO_UDP_TUNNEL_CSUM |
-				    NETIF_F_GSO_PARTIAL;
-		dev->features    |= NETIF_F_GSO_UDP_TUNNEL |
-				    NETIF_F_GSO_UDP_TUNNEL_CSUM |
-				    NETIF_F_GSO_PARTIAL;
-		dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
-	}
-
 	/* MTU range: 68 - hw-specific max */
 	dev->min_mtu = ETH_MIN_MTU;
 	dev->max_mtu = priv->max_mtu;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a1aeeb8..db3552f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c

@@ -31,7 +31,6 @@
  *
  */
 
-#include <net/busy_poll.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/mlx4/cq.h>
@@ -44,6 +43,7 @@
 #include <linux/vmalloc.h>
 #include <linux/irq.h>
 
+#include <net/ip.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_checksum.h>
 #endif
@@ -271,11 +271,8 @@
 
 	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring) {
-			en_err(priv, "Failed to allocate RX ring structure\n");
-			return -ENOMEM;
-		}
+		en_err(priv, "Failed to allocate RX ring structure\n");
+		return -ENOMEM;
 	}
 
 	ring->prod = 0;
@@ -620,6 +617,8 @@
 }
 #endif
 
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
 /* We reach this function only after checking that any of
  * the (IPv4 | IPv6) bits are set in cqe->status.
  */
@@ -627,9 +626,20 @@
 		      netdev_features_t dev_features)
 {
 	__wsum hw_checksum = 0;
+	void *hdr;
 
-	void *hdr = (u8 *)va + sizeof(struct ethhdr);
+	/* CQE csum doesn't cover padding octets in short ethernet
+	 * frames. And the pad field is appended prior to calculating
+	 * and appending the FCS field.
+	 *
+	 * Detecting these padded frames requires to verify and parse
+	 * IP headers, so we simply force all those small frames to skip
+	 * checksum complete.
+	 */
+	if (short_frame(skb->len))
+		return -EINVAL;
 
+	hdr = (u8 *)va + sizeof(struct ethhdr);
 	hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
 
 	if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) &&
@@ -822,6 +832,11 @@
 		skb_record_rx_queue(skb, cq_ring);
 
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
+			/* TODO: For IP non TCP/UDP packets when csum complete is
+			 * not an option (not supported or any other reason) we can
+			 * actually check cqe IPOK status bit and report
+			 * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE
+			 */
 			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
 						       MLX4_CQE_STATUS_UDP)) &&
 			    (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
@@ -875,7 +890,7 @@
 			skb->data_len = length;
 			napi_gro_frags(&cq->napi);
 		} else {
-			skb->vlan_tci = 0;
+			__vlan_hwaccel_clear_tag(skb);
 			skb_clear_hash(skb);
 		}
 next:
@@ -1172,7 +1187,7 @@
 	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
-		goto rss_err;
+		goto qp_alloc_err;
 	}
 
 	rss_map->indir_qp->event = mlx4_en_sqp_event;
@@ -1226,6 +1241,7 @@
 		       MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp);
 	mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
 	mlx4_qp_free(mdev->dev, rss_map->indir_qp);
+qp_alloc_err:
 	kfree(rss_map->indir_qp);
 	rss_map->indir_qp = NULL;
 rss_err:

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 1857ee0..4d5ca30 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -57,11 +57,8 @@
 
 	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring) {
-			en_err(priv, "Failed allocating TX ring\n");
-			return -ENOMEM;
-		}
+		en_err(priv, "Failed allocating TX ring\n");
+		return -ENOMEM;
 	}
 
 	ring->size = size;
@@ -688,16 +685,15 @@
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 struct net_device *sb_dev,
-			 select_queue_fallback_t fallback)
+			 struct net_device *sb_dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
 
 	if (netdev_get_num_tc(dev))
-		return fallback(dev, skb, NULL);
+		return netdev_pick_tx(dev, skb, NULL);
 
-	return fallback(dev, skb, NULL) % rings_p_up;
+	return netdev_pick_tx(dev, skb, NULL) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,
@@ -776,9 +772,7 @@
 
 	/* Map fragments if any */
 	for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
-		const struct skb_frag_struct *frag;
-
-		frag = &shinfo->frags[i_frag];
+		const skb_frag_t *frag = &shinfo->frags[i_frag];
 		byte_count = skb_frag_size(frag);
 		dma = skb_frag_dma_map(ddev, frag,
 				       0, byte_count,
@@ -1006,7 +1000,6 @@
 		ring->packets++;
 	}
 	ring->bytes += tx_info->nr_bytes;
-	netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
 	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
 
 	if (tx_info->inl)
@@ -1044,7 +1037,10 @@
 		netif_tx_stop_queue(ring->tx_queue);
 		ring->queue_stopped++;
 	}
-	send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
+
+	send_doorbell = __netdev_tx_sent_queue(ring->tx_queue,
+					       tx_info->nr_bytes,
+					       netdev_xmit_more());
 
 	real_size = (real_size / 16) & 0x3f;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 2df92db..c790a5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c

@@ -100,7 +100,7 @@
 					       req_not << 31),
 		     eq->doorbell);
 	/* We still want ordering, just not swabbing, so add a barrier */
-	mb();
+	wmb();
 }
 
 static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor,
@@ -558,6 +558,7 @@
 			mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n",
 				 __func__, be32_to_cpu(eqe->event.srq.srqn),
 				 eq->eqn);
+			/* fall through */
 		case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
 			if (mlx4_is_master(dev)) {
 				/* forward only to slave owning the SRQ */
@@ -820,7 +821,7 @@
 				  !!(eqe->owner & 0x80) ^
 				  !!(eq->cons_index & eq->nent) ? "HW" : "SW");
 			break;
-		};
+		}
 
 		++eq->cons_index;
 		eqes_found = 1;
@@ -1012,8 +1013,6 @@
 
 		dma_list[i] = t;
 		eq->page_list[i].map = t;
-
-		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
 	}
 
 	eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index babcfd9..6e501af 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -166,6 +166,7 @@
 		[37] = "sl to vl mapping table change event support",
 		[38] = "user MAC support",
 		[39] = "Report driver version to FW support",
+		[40] = "SW CQ initialization support",
 	};
 	int i;
 
@@ -1098,6 +1099,8 @@
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
 	if (field32 & (1 << 21))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
+	if (field32 & (1 << 23))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
 
 	for (i = 1; i <= dev_cap->num_ports; i++) {
 		err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i);
@@ -2064,9 +2067,11 @@
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	__be32 *outbox;
+	u64 qword_field;
 	u32 dword_field;
-	int err;
+	u16 word_field;
 	u8 byte_field;
+	int err;
 	static const u8 a0_dmfs_query_hw_steering[] =  {
 		[0] = MLX4_STEERING_DMFS_A0_DEFAULT,
 		[1] = MLX4_STEERING_DMFS_A0_DYNAMIC,
@@ -2094,19 +2099,32 @@
 
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
-	MLX4_GET(param->qpc_base,      outbox, INIT_HCA_QPC_BASE_OFFSET);
-	MLX4_GET(param->log_num_qps,   outbox, INIT_HCA_LOG_QP_OFFSET);
-	MLX4_GET(param->srqc_base,     outbox, INIT_HCA_SRQC_BASE_OFFSET);
-	MLX4_GET(param->log_num_srqs,  outbox, INIT_HCA_LOG_SRQ_OFFSET);
-	MLX4_GET(param->cqc_base,      outbox, INIT_HCA_CQC_BASE_OFFSET);
-	MLX4_GET(param->log_num_cqs,   outbox, INIT_HCA_LOG_CQ_OFFSET);
-	MLX4_GET(param->altc_base,     outbox, INIT_HCA_ALTC_BASE_OFFSET);
-	MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
-	MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
-	MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
-	MLX4_GET(param->num_sys_eqs,   outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
-	MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
-	MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
+	MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET);
+	param->qpc_base = qword_field & ~((u64)0x1f);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET);
+	param->log_num_qps = byte_field & 0x1f;
+	MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET);
+	param->srqc_base = qword_field & ~((u64)0x1f);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET);
+	param->log_num_srqs = byte_field & 0x1f;
+	MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET);
+	param->cqc_base = qword_field & ~((u64)0x1f);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET);
+	param->log_num_cqs = byte_field & 0x1f;
+	MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET);
+	param->altc_base = qword_field;
+	MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET);
+	param->auxc_base = qword_field;
+	MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET);
+	param->eqc_base = qword_field & ~((u64)0x1f);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET);
+	param->log_num_eqs = byte_field & 0x1f;
+	MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
+	param->num_sys_eqs = word_field & 0xfff;
+	MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+	param->rdmarc_base = qword_field & ~((u64)0x1f);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET);
+	param->log_rd_per_qp = byte_field & 0x7;
 
 	MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
 	if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
@@ -2125,22 +2143,21 @@
 	/* steering attributes */
 	if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 		MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
-		MLX4_GET(param->log_mc_entry_sz, outbox,
-			 INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
-		MLX4_GET(param->log_mc_table_sz, outbox,
-			 INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
-		MLX4_GET(byte_field, outbox,
-			 INIT_HCA_FS_A0_OFFSET);
+		MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+		param->log_mc_entry_sz = byte_field & 0x1f;
+		MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+		param->log_mc_table_sz = byte_field & 0x1f;
+		MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET);
 		param->dmfs_high_steer_mode =
 			a0_dmfs_query_hw_steering[(byte_field >> 6) & 3];
 	} else {
 		MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
-		MLX4_GET(param->log_mc_entry_sz, outbox,
-			 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
-		MLX4_GET(param->log_mc_hash_sz,  outbox,
-			 INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
-		MLX4_GET(param->log_mc_table_sz, outbox,
-			 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+		MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+		param->log_mc_entry_sz = byte_field & 0x1f;
+		MLX4_GET(byte_field,  outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+		param->log_mc_hash_sz = byte_field & 0x1f;
+		MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+		param->log_mc_table_sz = byte_field & 0x1f;
 	}
 
 	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
@@ -2164,15 +2181,18 @@
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
-	MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET);
-	MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+	MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET);
+	param->mw_enabled = byte_field >> 7;
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+	param->log_mpt_sz = byte_field & 0x3f;
 	MLX4_GET(param->mtt_base,   outbox, INIT_HCA_MTT_BASE_OFFSET);
 	MLX4_GET(param->cmpt_base,  outbox, INIT_HCA_CMPT_BASE_OFFSET);
 
 	/* UAR attributes */
 
 	MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
-	MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+	MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+	param->log_uar_sz = byte_field & 0xf;
 
 	/* phv_check enable */
 	MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET);

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 7262c63..d89a3da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c

@@ -57,12 +57,12 @@
 	int i;
 
 	if (chunk->nsg > 0)
-		pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages,
-			     PCI_DMA_BIDIRECTIONAL);
+		dma_unmap_sg(&dev->persist->pdev->dev, chunk->sg, chunk->npages,
+			     DMA_BIDIRECTIONAL);
 
 	for (i = 0; i < chunk->npages; ++i)
-		__free_pages(sg_page(&chunk->mem[i]),
-			     get_order(chunk->mem[i].length));
+		__free_pages(sg_page(&chunk->sg[i]),
+			     get_order(chunk->sg[i].length));
 }
 
 static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
@@ -71,9 +71,9 @@
 
 	for (i = 0; i < chunk->npages; ++i)
 		dma_free_coherent(&dev->persist->pdev->dev,
-				  chunk->mem[i].length,
-				  lowmem_page_address(sg_page(&chunk->mem[i])),
-				  sg_dma_address(&chunk->mem[i]));
+				  chunk->buf[i].size,
+				  chunk->buf[i].addr,
+				  chunk->buf[i].dma_addr);
 }
 
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
@@ -111,22 +111,21 @@
 	return 0;
 }
 
-static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
-				    int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf,
+				   int order, gfp_t gfp_mask)
 {
-	void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order,
-				       &sg_dma_address(mem), gfp_mask);
-	if (!buf)
+	buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order,
+				       &buf->dma_addr, gfp_mask);
+	if (!buf->addr)
 		return -ENOMEM;
 
-	if (offset_in_page(buf)) {
-		dma_free_coherent(dev, PAGE_SIZE << order,
-				  buf, sg_dma_address(mem));
+	if (offset_in_page(buf->addr)) {
+		dma_free_coherent(dev, PAGE_SIZE << order, buf->addr,
+				  buf->dma_addr);
 		return -ENOMEM;
 	}
 
-	sg_set_buf(mem, buf, PAGE_SIZE << order);
-	sg_dma_len(mem) = PAGE_SIZE << order;
+	buf->size = PAGE_SIZE << order;
 	return 0;
 }
 
@@ -159,21 +158,21 @@
 
 	while (npages > 0) {
 		if (!chunk) {
-			chunk = kmalloc_node(sizeof(*chunk),
+			chunk = kzalloc_node(sizeof(*chunk),
 					     gfp_mask & ~(__GFP_HIGHMEM |
 							  __GFP_NOWARN),
 					     dev->numa_node);
 			if (!chunk) {
-				chunk = kmalloc(sizeof(*chunk),
+				chunk = kzalloc(sizeof(*chunk),
 						gfp_mask & ~(__GFP_HIGHMEM |
 							     __GFP_NOWARN));
 				if (!chunk)
 					goto fail;
 			}
+			chunk->coherent = coherent;
 
-			sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
-			chunk->npages = 0;
-			chunk->nsg    = 0;
+			if (!coherent)
+				sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN);
 			list_add_tail(&chunk->list, &icm->chunk_list);
 		}
 
@@ -186,10 +185,10 @@
 
 		if (coherent)
 			ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
-						      &chunk->mem[chunk->npages],
-						      cur_order, mask);
+						&chunk->buf[chunk->npages],
+						cur_order, mask);
 		else
-			ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
+			ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages],
 						   cur_order, mask,
 						   dev->numa_node);
 
@@ -205,9 +204,9 @@
 		if (coherent)
 			++chunk->nsg;
 		else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
-			chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
-						chunk->npages,
-						PCI_DMA_BIDIRECTIONAL);
+			chunk->nsg = dma_map_sg(&dev->persist->pdev->dev,
+						chunk->sg, chunk->npages,
+						DMA_BIDIRECTIONAL);
 
 			if (chunk->nsg <= 0)
 				goto fail;
@@ -220,9 +219,8 @@
 	}
 
 	if (!coherent && chunk) {
-		chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
-					chunk->npages,
-					PCI_DMA_BIDIRECTIONAL);
+		chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, chunk->sg,
+					chunk->npages, DMA_BIDIRECTIONAL);
 
 		if (chunk->nsg <= 0)
 			goto fail;
@@ -320,7 +318,7 @@
 	u64 idx;
 	struct mlx4_icm_chunk *chunk;
 	struct mlx4_icm *icm;
-	struct page *page = NULL;
+	void *addr = NULL;
 
 	if (!table->lowmem)
 		return NULL;
@@ -336,28 +334,49 @@
 
 	list_for_each_entry(chunk, &icm->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
-			if (dma_handle && dma_offset >= 0) {
-				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
-					*dma_handle = sg_dma_address(&chunk->mem[i]) +
-						dma_offset;
-				dma_offset -= sg_dma_len(&chunk->mem[i]);
+			dma_addr_t dma_addr;
+			size_t len;
+
+			if (table->coherent) {
+				len = chunk->buf[i].size;
+				dma_addr = chunk->buf[i].dma_addr;
+				addr = chunk->buf[i].addr;
+			} else {
+				struct page *page;
+
+				len = sg_dma_len(&chunk->sg[i]);
+				dma_addr = sg_dma_address(&chunk->sg[i]);
+
+				/* XXX: we should never do this for highmem
+				 * allocation.  This function either needs
+				 * to be split, or the kernel virtual address
+				 * return needs to be made optional.
+				 */
+				page = sg_page(&chunk->sg[i]);
+				addr = lowmem_page_address(page);
 			}
+
+			if (dma_handle && dma_offset >= 0) {
+				if (len > dma_offset)
+					*dma_handle = dma_addr + dma_offset;
+				dma_offset -= len;
+			}
+
 			/*
 			 * DMA mapping can merge pages but not split them,
 			 * so if we found the page, dma_handle has already
 			 * been assigned to.
 			 */
-			if (chunk->mem[i].length > offset) {
-				page = sg_page(&chunk->mem[i]);
+			if (len > offset)
 				goto out;
-			}
-			offset -= chunk->mem[i].length;
+			offset -= len;
 		}
 	}
 
+	addr = NULL;
 out:
 	mutex_unlock(&table->mutex);
-	return page ? lowmem_page_address(page) + offset : NULL;
+	return addr ? addr + offset : NULL;
 }
 
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
@@ -406,7 +425,7 @@
 	obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
 	if (WARN_ON(!obj_per_chunk))
 		return -EINVAL;
-	num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
 
 	table->icm      = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
 	if (!table->icm)

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index c9169a4..d199874 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h

@@ -47,11 +47,21 @@
 	MLX4_ICM_PAGE_SIZE	= 1 << MLX4_ICM_PAGE_SHIFT,
 };
 
+struct mlx4_icm_buf {
+	void			*addr;
+	size_t			size;
+	dma_addr_t		dma_addr;
+};
+
 struct mlx4_icm_chunk {
 	struct list_head	list;
 	int			npages;
 	int			nsg;
-	struct scatterlist	mem[MLX4_ICM_CHUNK_LEN];
+	bool			coherent;
+	union {
+		struct scatterlist	sg[MLX4_ICM_CHUNK_LEN];
+		struct mlx4_icm_buf	buf[MLX4_ICM_CHUNK_LEN];
+	};
 };
 
 struct mlx4_icm {
@@ -114,12 +124,18 @@
 
 static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
 {
-	return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
+	if (iter->chunk->coherent)
+		return iter->chunk->buf[iter->page_idx].dma_addr;
+	else
+		return sg_dma_address(&iter->chunk->sg[iter->page_idx]);
 }
 
 static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
 {
-	return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
+	if (iter->chunk->coherent)
+		return iter->chunk->buf[iter->page_idx].size;
+	else
+		return sg_dma_len(&iter->chunk->sg[iter->page_idx]);
 }
 
 int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6a04603..d44ac66 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c

@@ -63,7 +63,7 @@
 
 #ifdef CONFIG_MLX4_DEBUG
 
-int mlx4_debug_level = 0;
+int mlx4_debug_level; /* 0 by default */
 module_param_named(debug_level, mlx4_debug_level, int, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 
@@ -83,7 +83,7 @@
 
 static uint8_t num_vfs[3] = {0, 0, 0};
 static int num_vfs_argc;
-module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
+module_param_array(num_vfs, byte, &num_vfs_argc, 0444);
 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
 			  "num_vfs=port1,port2,port1+2");
 
@@ -313,7 +313,7 @@
 		for (i = 0; i < dev->caps.num_ports - 1; i++) {
 			if (port_type[i] != port_type[i + 1]) {
 				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
-				return -EINVAL;
+				return -EOPNOTSUPP;
 			}
 		}
 	}
@@ -322,7 +322,7 @@
 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
 			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
 				 i + 1);
-			return -EINVAL;
+			return -EOPNOTSUPP;
 		}
 	}
 	return 0;
@@ -514,8 +514,7 @@
 	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
 	/*
 	 * Subtract 1 from the limit because we need to allocate a
-	 * spare CQE so the HCA HW can tell the difference between an
-	 * empty CQ and a full CQ.
+	 * spare CQE to enable resizing the CQ.
 	 */
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
@@ -1188,8 +1187,7 @@
 		mlx4_err(mdev,
 			 "Requested port type for port %d is not supported on this HCA\n",
 			 info->port);
-		err = -EINVAL;
-		goto err_sup;
+		return -EOPNOTSUPP;
 	}
 
 	mlx4_stop_sense(mdev);
@@ -1211,7 +1209,7 @@
 		for (i = 1; i <= mdev->caps.num_ports; i++) {
 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
-				err = -EINVAL;
+				err = -EOPNOTSUPP;
 			}
 		}
 	}
@@ -1237,7 +1235,7 @@
 out:
 	mlx4_start_sense(mdev);
 	mutex_unlock(&priv->port_mutex);
-err_sup:
+
 	return err;
 }
 
@@ -2241,7 +2239,7 @@
 	for (i = 1; i <= dev->caps.num_ports; i++) {
 		if (mlx4_dev_port(dev, i, &port_cap)) {
 			mlx4_err(dev,
-				 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
+				 "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n");
 		} else if ((dev->caps.dmfs_high_steer_mode !=
 			    MLX4_STEERING_DMFS_A0_DEFAULT) &&
 			   (port_cap.dmfs_optimized_state ==
@@ -2293,23 +2291,31 @@
 static int mlx4_init_hca(struct mlx4_dev *dev)
 {
 	struct mlx4_priv	  *priv = mlx4_priv(dev);
+	struct mlx4_init_hca_param *init_hca = NULL;
+	struct mlx4_dev_cap	  *dev_cap = NULL;
 	struct mlx4_adapter	   adapter;
-	struct mlx4_dev_cap	   dev_cap;
 	struct mlx4_profile	   profile;
-	struct mlx4_init_hca_param init_hca;
 	u64 icm_size;
 	struct mlx4_config_dev_params params;
 	int err;
 
 	if (!mlx4_is_slave(dev)) {
-		err = mlx4_dev_cap(dev, &dev_cap);
-		if (err) {
-			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
-			return err;
+		dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+		init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL);
+
+		if (!dev_cap || !init_hca) {
+			err = -ENOMEM;
+			goto out_free;
 		}
 
-		choose_steering_mode(dev, &dev_cap);
-		choose_tunnel_offload_mode(dev, &dev_cap);
+		err = mlx4_dev_cap(dev, dev_cap);
+		if (err) {
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
+			goto out_free;
+		}
+
+		choose_steering_mode(dev, dev_cap);
+		choose_tunnel_offload_mode(dev, dev_cap);
 
 		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
 		    mlx4_is_master(dev))
@@ -2332,48 +2338,48 @@
 		    MLX4_STEERING_MODE_DEVICE_MANAGED)
 			profile.num_mcg = MLX4_FS_NUM_MCG;
 
-		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
-					     &init_hca);
+		icm_size = mlx4_make_profile(dev, &profile, dev_cap,
+					     init_hca);
 		if ((long long) icm_size < 0) {
 			err = icm_size;
-			return err;
+			goto out_free;
 		}
 
 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
 
 		if (enable_4k_uar || !dev->persist->num_vfs) {
-			init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+			init_hca->log_uar_sz = ilog2(dev->caps.num_uars) +
 						    PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
-			init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+			init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
 		} else {
-			init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-			init_hca.uar_page_sz = PAGE_SHIFT - 12;
+			init_hca->log_uar_sz = ilog2(dev->caps.num_uars);
+			init_hca->uar_page_sz = PAGE_SHIFT - 12;
 		}
 
-		init_hca.mw_enabled = 0;
+		init_hca->mw_enabled = 0;
 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
 		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
-			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
+			init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE;
 
-		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
+		err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size);
 		if (err)
-			return err;
+			goto out_free;
 
-		err = mlx4_INIT_HCA(dev, &init_hca);
+		err = mlx4_INIT_HCA(dev, init_hca);
 		if (err) {
 			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
 			goto err_free_icm;
 		}
 
-		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
-			err = mlx4_query_func(dev, &dev_cap);
+		if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+			err = mlx4_query_func(dev, dev_cap);
 			if (err < 0) {
 				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
 				goto err_close;
 			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
-				dev->caps.num_eqs = dev_cap.max_eqs;
-				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
-				dev->caps.reserved_uars = dev_cap.reserved_uars;
+				dev->caps.num_eqs = dev_cap->max_eqs;
+				dev->caps.reserved_eqs = dev_cap->reserved_eqs;
+				dev->caps.reserved_uars = dev_cap->reserved_uars;
 			}
 		}
 
@@ -2382,14 +2388,13 @@
 		 * read HCA frequency by QUERY_HCA command
 		 */
 		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
-			memset(&init_hca, 0, sizeof(init_hca));
-			err = mlx4_QUERY_HCA(dev, &init_hca);
+			err = mlx4_QUERY_HCA(dev, init_hca);
 			if (err) {
 				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 			} else {
 				dev->caps.hca_core_clock =
-					init_hca.hca_core_clock;
+					init_hca->hca_core_clock;
 			}
 
 			/* In case we got HCA frequency 0 - disable timestamping
@@ -2465,7 +2470,8 @@
 	priv->eq_table.inta_pin = adapter.inta_pin;
 	memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
 
-	return 0;
+	err = 0;
+	goto out_free;
 
 unmap_bf:
 	unmap_internal_clock(dev);
@@ -2484,6 +2490,10 @@
 	if (!mlx4_is_slave(dev))
 		mlx4_free_icms(dev);
 
+out_free:
+	kfree(dev_cap);
+	kfree(init_hca);
+
 	return err;
 }
 
@@ -3252,7 +3262,7 @@
 free_mem:
 	dev->persist->num_vfs = 0;
 	kfree(dev->dev_vfs);
-        dev->dev_vfs = NULL;
+	dev->dev_vfs = NULL;
 	return dev_flags & ~MLX4_FLAG_MASTER;
 }
 
@@ -3920,26 +3930,43 @@
 	}
 }
 
-static int mlx4_devlink_reload(struct devlink *devlink,
-			       struct netlink_ext_ack *extack)
+static void mlx4_restart_one_down(struct pci_dev *pdev);
+static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
+			       struct devlink *devlink);
+
+static int mlx4_devlink_reload_down(struct devlink *devlink,
+				    struct netlink_ext_ack *extack)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_dev_persistent *persist = dev->persist;
+
+	if (persist->num_vfs)
+		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
+	mlx4_restart_one_down(persist->pdev);
+	return 0;
+}
+
+static int mlx4_devlink_reload_up(struct devlink *devlink,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_dev_persistent *persist = dev->persist;
 	int err;
 
-	if (persist->num_vfs)
-		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
-	err = mlx4_restart_one(persist->pdev, true, devlink);
+	err = mlx4_restart_one_up(persist->pdev, true, devlink);
 	if (err)
-		mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err);
+		mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n",
+			 err);
 
 	return err;
 }
 
 static const struct devlink_ops mlx4_devlink_ops = {
 	.port_type_set	= mlx4_devlink_port_type_set,
-	.reload		= mlx4_devlink_reload,
+	.reload_down	= mlx4_devlink_reload_down,
+	.reload_up	= mlx4_devlink_reload_up,
 };
 
 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -3982,6 +4009,8 @@
 	if (ret)
 		goto err_params_unregister;
 
+	devlink_params_publish(devlink);
+	devlink_reload_enable(devlink);
 	pci_save_state(pdev);
 	return 0;
 
@@ -4093,6 +4122,8 @@
 	struct devlink *devlink = priv_to_devlink(priv);
 	int active_vfs = 0;
 
+	devlink_reload_disable(devlink);
+
 	if (mlx4_is_slave(dev))
 		persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
 
@@ -4151,7 +4182,13 @@
 	return err;
 }
 
-int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink)
+static void mlx4_restart_one_down(struct pci_dev *pdev)
+{
+	mlx4_unload_one(pdev);
+}
+
+static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
+			       struct devlink *devlink)
 {
 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
 	struct mlx4_dev	 *dev  = persist->dev;
@@ -4163,7 +4200,6 @@
 	total_vfs = dev->persist->num_vfs;
 	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
 
-	mlx4_unload_one(pdev);
 	if (reload)
 		mlx4_devlink_param_load_driverinit_values(devlink);
 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
@@ -4182,6 +4218,12 @@
 	return err;
 }
 
+int mlx4_restart_one(struct pci_dev *pdev)
+{
+	mlx4_restart_one_down(pdev);
+	return mlx4_restart_one_up(pdev, false, NULL);
+}
+
 #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT }
 #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF }
 #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index ffed2d4..9c48182 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -1492,7 +1492,7 @@
 	rule.port = port;
 	rule.qpn = qpn;
 	INIT_LIST_HEAD(&rule.list);
-	mlx4_err(dev, "going promisc on %x\n", port);
+	mlx4_info(dev, "going promisc on %x\n", port);
 
 	return  mlx4_flow_attach(dev, &rule, regid_p);
 }

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 23f1b5b..527b52e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h

@@ -1043,8 +1043,7 @@
 void mlx4_catas_end(struct mlx4_dev *dev);
 int mlx4_crdump_init(struct mlx4_dev *dev);
 void mlx4_crdump_end(struct mlx4_dev *dev);
-int mlx4_restart_one(struct pci_dev *pdev, bool reload,
-		     struct devlink *devlink);
+int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
 void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 240f9c9..630f159 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

@@ -72,7 +72,7 @@
 #define MLX4_EN_PAGE_SIZE	(1 << MLX4_EN_PAGE_SHIFT)
 #define DEF_RX_RINGS		16
 #define MAX_RX_RINGS		128
-#define MIN_RX_RINGS		4
+#define MIN_RX_RINGS		1
 #define LOG_TXBB_SIZE		6
 #define TXBB_SIZE		BIT(LOG_TXBB_SIZE)
 #define HEADROOM		(2048 / TXBB_SIZE + 1)
@@ -698,8 +698,7 @@
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 struct net_device *sb_dev,
-			 select_queue_fallback_t fallback);
+			 struct net_device *sb_dev);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 10fcc22..ba6ac31 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c

@@ -2077,11 +2077,6 @@
 		size -= offset + size - I2C_PAGE_SIZE;
 
 	i2c_addr = I2C_ADDR_LOW;
-	if (offset >= I2C_PAGE_SIZE) {
-		/* Reset offset to high page */
-		i2c_addr = I2C_ADDR_HIGH;
-		offset -= I2C_PAGE_SIZE;
-	}
 
 	cable_info = (struct mlx4_cable_info *)inmad->data;
 	cable_info->dev_mem_address = cpu_to_be16(offset);

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 31bd567..1187ef1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

@@ -471,12 +471,31 @@
 		priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
 }
 
-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
+static int
+mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
+				 struct resource_allocator *res_alloc,
+				 int vf)
 {
-	/* reduce the sink counter */
-	return (dev->caps.max_counters - 1 -
-		(MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
-		/ MLX4_MAX_PORTS;
+	struct mlx4_active_ports actv_ports;
+	int ports, counters_guaranteed;
+
+	/* For master, only allocate according to the number of phys ports */
+	if (vf == mlx4_master_func_num(dev))
+		return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
+
+	/* calculate real number of ports for the VF */
+	actv_ports = mlx4_get_active_ports(dev, vf);
+	ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+	counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
+
+	/* If we do not have enough counters for this VF, do not
+	 * allocate any for it. '-1' to reduce the sink counter.
+	 */
+	if ((res_alloc->res_reserved + counters_guaranteed) >
+	    (dev->caps.max_counters - 1))
+		return 0;
+
+	return counters_guaranteed;
 }
 
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
@@ -484,7 +503,6 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i, j;
 	int t;
-	int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
 
 	priv->mfunc.master.res_tracker.slave_list =
 		kcalloc(dev->num_slaves, sizeof(struct slave_list),
@@ -603,16 +621,8 @@
 				break;
 			case RES_COUNTER:
 				res_alloc->quota[t] = dev->caps.max_counters;
-				if (t == mlx4_master_func_num(dev))
-					res_alloc->guaranteed[t] =
-						MLX4_PF_COUNTERS_PER_PORT *
-						MLX4_MAX_PORTS;
-				else if (t <= max_vfs_guarantee_counter)
-					res_alloc->guaranteed[t] =
-						MLX4_VF_COUNTERS_PER_PORT *
-						MLX4_MAX_PORTS;
-				else
-					res_alloc->guaranteed[t] = 0;
+				res_alloc->guaranteed[t] =
+					mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
 				break;
 			default:
 				break;
@@ -2719,13 +2729,13 @@
 	int total_pages;
 	int total_mem;
 	int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f;
+	int tot;
 
 	sq_size = 1 << (log_sq_size + log_sq_sride + 4);
 	rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4));
 	total_mem = sq_size + rq_size;
-	total_pages =
-		roundup_pow_of_two((total_mem + (page_offset << 6)) >>
-				   page_shift);
+	tot = (total_mem + (page_offset << 6)) >> page_shift;
+	total_pages = !tot ? 1 : roundup_pow_of_two(tot);
 
 	return total_pages;
 }
@@ -4729,7 +4739,6 @@
 	struct res_srq *tmp;
 	int state;
 	u64 in_param;
-	LIST_HEAD(tlist);
 	int srqn;
 	int err;
 
@@ -4795,7 +4804,6 @@
 	struct res_cq *tmp;
 	int state;
 	u64 in_param;
-	LIST_HEAD(tlist);
 	int cqn;
 	int err;
 
@@ -4858,7 +4866,6 @@
 	struct res_mpt *tmp;
 	int state;
 	u64 in_param;
-	LIST_HEAD(tlist);
 	int mptn;
 	int err;
 
@@ -4926,7 +4933,6 @@
 	struct res_mtt *mtt;
 	struct res_mtt *tmp;
 	int state;
-	LIST_HEAD(tlist);
 	int base;
 	int err;
 
@@ -5115,7 +5121,6 @@
 	struct res_eq *tmp;
 	int err;
 	int state;
-	LIST_HEAD(tlist);
 	int eqn;
 
 	err = move_all_busy(dev, slave, RES_EQ);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 37a5514..a1f20b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig

@@ -1,13 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Mellanox driver configuration
 #
 
 config MLX5_CORE
 	tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
-	depends on MAY_USE_DEVLINK
 	depends on PCI
+	select NET_DEVLINK
 	imply PTP_1588_CLOCK
 	imply VXLAN
+	imply MLXFW
+	imply PCI_HYPERV_INTERFACE
 	default n
 	---help---
 	  Core driver for low level functionality of the ConnectX-4 and
@@ -17,21 +20,21 @@
 	bool
 
 config MLX5_FPGA
-        bool "Mellanox Technologies Innova support"
-        depends on MLX5_CORE
+	bool "Mellanox Technologies Innova support"
+	depends on MLX5_CORE
 	select MLX5_ACCEL
-        ---help---
-          Build support for the Innova family of network cards by Mellanox
-          Technologies. Innova network cards are comprised of a ConnectX chip
-          and an FPGA chip on one board. If you select this option, the
-          mlx5_core driver will include the Innova FPGA core and allow building
-          sandbox-specific client drivers.
+	---help---
+	  Build support for the Innova family of network cards by Mellanox
+	  Technologies. Innova network cards are comprised of a ConnectX chip
+	  and an FPGA chip on one board. If you select this option, the
+	  mlx5_core driver will include the Innova FPGA core and allow building
+	  sandbox-specific client drivers.
 
 config MLX5_CORE_EN
 	bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
 	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
-	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	select PAGE_POOL
+	select DIMLIB
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
@@ -55,14 +58,14 @@
 	  API.
 
 config MLX5_MPFS
-        bool "Mellanox Technologies MLX5 MPFS support"
-        depends on MLX5_CORE_EN
+	bool "Mellanox Technologies MLX5 MPFS support"
+	depends on MLX5_CORE_EN
 	default y
-        ---help---
+	---help---
 	  Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
-          support in ConnectX NIC. MPFs is required for when multi-PF configuration
-          is enabled to allow passing user configured unicast MAC addresses to the
-          requesting PF.
+	  support in ConnectX NIC. MPFs is required for when multi-PF configuration
+	  is enabled to allow passing user configured unicast MAC addresses to the
+	  requesting PF.
 
 config MLX5_ESWITCH
 	bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
@@ -70,10 +73,10 @@
 	default y
 	---help---
 	  Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
-          E-Switch provides internal SRIOV packet steering and switching for the
-          enabled VFs and PF in two available modes:
-                Legacy SRIOV mode (L2 mac vlan steering based).
-                Switchdev mode (eswitch offloads).
+	  E-Switch provides internal SRIOV packet steering and switching for the
+	  enabled VFs and PF in two available modes:
+	        Legacy SRIOV mode (L2 mac vlan steering based).
+	        Switchdev mode (eswitch offloads).
 
 config MLX5_CORE_EN_DCB
 	bool "Data Center Bridging (DCB) Support"
@@ -94,26 +97,67 @@
 	---help---
 	  MLX5 IPoIB offloads & acceleration support.
 
+config MLX5_FPGA_IPSEC
+	bool "Mellanox Technologies IPsec Innova support"
+	depends on MLX5_CORE
+	depends on MLX5_FPGA
+	default n
+	help
+	Build IPsec support for the Innova family of network cards by Mellanox
+	Technologies. Innova network cards are comprised of a ConnectX chip
+	and an FPGA chip on one board. If you select this option, the
+	mlx5_core driver will include the Innova FPGA core and allow building
+	sandbox-specific client drivers.
+
 config MLX5_EN_IPSEC
 	bool "IPSec XFRM cryptography-offload accelaration"
-	depends on MLX5_ACCEL
 	depends on MLX5_CORE_EN
 	depends on XFRM_OFFLOAD
 	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+	depends on MLX5_FPGA_IPSEC
 	default n
-	---help---
+	help
 	  Build support for IPsec cryptography-offload accelaration in the NIC.
 	  Note: Support for hardware with this capability needs to be selected
 	  for this option to become available.
 
-config MLX5_EN_TLS
-	bool "TLS cryptography-offload accelaration"
+config MLX5_FPGA_TLS
+	bool "Mellanox Technologies TLS Innova support"
+	depends on TLS_DEVICE
+	depends on TLS=y || MLX5_CORE=m
+	depends on MLX5_FPGA
+	default n
+	help
+	Build TLS support for the Innova family of network cards by Mellanox
+	Technologies. Innova network cards are comprised of a ConnectX chip
+	and an FPGA chip on one board. If you select this option, the
+	mlx5_core driver will include the Innova FPGA core and allow building
+	sandbox-specific client drivers.
+
+config MLX5_TLS
+	bool "Mellanox Technologies TLS Connect-X support"
 	depends on MLX5_CORE_EN
 	depends on TLS_DEVICE
 	depends on TLS=y || MLX5_CORE=m
-	depends on MLX5_ACCEL
+	select MLX5_ACCEL
 	default n
-	---help---
-	  Build support for TLS cryptography-offload accelaration in the NIC.
-	  Note: Support for hardware with this capability needs to be selected
-	  for this option to become available.
+	help
+	Build TLS support for the Connect-X family of network cards by Mellanox
+	Technologies.
+
+config MLX5_EN_TLS
+	bool "TLS cryptography-offload accelaration"
+	depends on MLX5_CORE_EN
+	depends on MLX5_FPGA_TLS || MLX5_TLS
+	default y
+	help
+	Build support for TLS cryptography-offload accelaration in the NIC.
+	Note: Support for hardware with this capability needs to be selected
+	for this option to become available.
+
+config MLX5_SW_STEERING
+	bool "Mellanox Technologies software-managed steering"
+	depends on MLX5_CORE_EN && MLX5_ESWITCH
+	default y
+	help
+	Build support for software-managed steering in the NIC.

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d324a38..5708fcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile

@@ -12,17 +12,20 @@
 # mlx5 core basic
 #
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
-		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
-		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o  \
-		diag/fs_tracepoint.o diag/fw_tracer.o
+		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
+		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
+		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
+		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+		diag/fw_tracer.o diag/crdump.o devlink.o
 
 #
 # Netdev basic
 #
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
-		en_selftest.o en/port.o
+		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
+		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
+		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
 
 #
 # Netdev extra
@@ -30,15 +33,20 @@
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
+					lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
+					en/tc_tun_geneve.o diag/en_tc_tracepoint.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 
 #
 # Core extra
 #
-mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
+				      ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
 
 #
 # Ipoib netdev
@@ -48,14 +56,21 @@
 #
 # Accelerations & FPGA
 #
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
+mlx5_core-$(CONFIG_MLX5_FPGA_TLS)   += fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_ACCEL)      += lib/crypto.o accel/tls.o accel/ipsec.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
-				 fpga/ipsec.o fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
 
 mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
 				     en_accel/ipsec_stats.o
 
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+				   en_accel/ktls.o en_accel/ktls_tx.o
 
-CFLAGS_tracepoint.o := -I$(src)
+mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
+					steering/dr_matcher.o steering/dr_rule.o \
+					steering/dr_icm_pool.o steering/dr_crc32.o \
+					steering/dr_ste.o steering/dr_send.o \
+					steering/dr_cmd.o steering/dr_fw.o \
+					steering/dr_action.o steering/fs_dr.o

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 9f1b193..eddc34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c

@@ -31,6 +31,8 @@
  *
  */
 
+#ifdef CONFIG_MLX5_FPGA_IPSEC
+
 #include <linux/mlx5/device.h>
 
 #include "accel/ipsec.h"
@@ -74,6 +76,11 @@
 	return mlx5_fpga_ipsec_init(mdev);
 }
 
+void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+	mlx5_fpga_ipsec_build_fs_cmds();
+}
+
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_ipsec_cleanup(mdev);
@@ -107,3 +114,5 @@
 	return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
 }
 EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 024dbd2..530e428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h

@@ -37,7 +37,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/accel.h>
 
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_FPGA_IPSEC
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
 			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
@@ -54,6 +54,7 @@
 void mlx5_accel_esp_free_hw_context(void *context);
 
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_build_fs_cmds(void);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #else
@@ -79,6 +80,10 @@
 	return 0;
 }
 
+static inline void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+}
+
 static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index da7bd26..cab708a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c

@@ -35,6 +35,9 @@
 
 #include "accel/tls.h"
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+#ifdef CONFIG_MLX5_FPGA_TLS
 #include "fpga/tls.h"
 
 int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
@@ -61,7 +64,8 @@
 
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
 {
-	return mlx5_fpga_is_tls_device(mdev);
+	return mlx5_fpga_is_tls_device(mdev) ||
+		mlx5_accel_is_ktls_device(mdev);
 }
 
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
@@ -78,3 +82,42 @@
 {
 	mlx5_fpga_tls_cleanup(mdev);
 }
+#endif
+
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+			 struct tls_crypto_info *crypto_info,
+			 u32 *p_key_id)
+{
+	u32 sz_bytes;
+	void *key;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *info =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+		key      = info->key;
+		sz_bytes = sizeof(info->key);
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *info =
+			(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+		key      = info->key;
+		sz_bytes = sizeof(info->key);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	mlx5_destroy_encryption_key(mdev, key_id);
+}
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index def4093..d787bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h

@@ -37,7 +37,49 @@
 #include <linux/mlx5/driver.h>
 #include <linux/tls.h>
 
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+			 struct tls_crypto_info *crypto_info,
+			 u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+	if (!MLX5_CAP_GEN(mdev, tls))
+		return false;
+
+	if (!MLX5_CAP_GEN(mdev, log_max_dek))
+		return false;
+
+	return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+					 struct tls_crypto_info *crypto_info)
+{
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		if (crypto_info->version == TLS_1_2_VERSION)
+			return MLX5_CAP_TLS(mdev,  tls_1_2_aes_gcm_128);
+		break;
+	}
+
+	return false;
+}
+#else
+static inline int
+mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+		     struct tls_crypto_info *crypto_info,
+		     u32 *p_key_id) { return -ENOTSUPP; }
+static inline void
+mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
+
+static inline bool
+mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool
+mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+		      struct tls_crypto_info *crypto_info) { return false; }
+#endif
 
 enum {
 	MLX5_ACCEL_TLS_TX = BIT(0),
@@ -60,6 +102,7 @@
 	u8         reserved_at_2[0x1e];
 };
 
+#ifdef CONFIG_MLX5_FPGA_TLS
 int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
 			    struct tls_crypto_info *crypto_info,
 			    u32 start_offload_tcp_sn, u32 *p_swid,
@@ -84,11 +127,13 @@
 					   bool direction_sx) { }
 static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
 					   u32 seq, u64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+	return mlx5_accel_is_ktls_device(mdev);
+}
 static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
 static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-
 #endif
 
 #endif	/* __MLX5_ACCEL_TLS_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 456f300..549f962 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c

@@ -57,15 +57,16 @@
 					   int node)
 {
 	struct mlx5_priv *priv = &dev->priv;
+	struct device *device = dev->device;
 	int original_node;
 	void *cpu_handle;
 
 	mutex_lock(&priv->alloc_mutex);
-	original_node = dev_to_node(&dev->pdev->dev);
-	set_dev_node(&dev->pdev->dev, node);
-	cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size,
-					 dma_handle, GFP_KERNEL);
-	set_dev_node(&dev->pdev->dev, original_node);
+	original_node = dev_to_node(device);
+	set_dev_node(device, node);
+	cpu_handle = dma_alloc_coherent(device, size, dma_handle,
+					GFP_KERNEL);
+	set_dev_node(device, original_node);
 	mutex_unlock(&priv->alloc_mutex);
 	return cpu_handle;
 }
@@ -110,7 +111,7 @@
 
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-	dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+	dma_free_coherent(dev->device, buf->size, buf->frags->buf,
 			  buf->frags->map);
 
 	kfree(buf->frags);
@@ -139,7 +140,7 @@
 		if (!frag->buf)
 			goto err_free_buf;
 		if (frag->map & ((1 << buf->page_shift) - 1)) {
-			dma_free_coherent(&dev->pdev->dev, frag_sz,
+			dma_free_coherent(dev->device, frag_sz,
 					  buf->frags[i].buf, buf->frags[i].map);
 			mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
 				       &frag->map, buf->page_shift);
@@ -152,7 +153,7 @@
 
 err_free_buf:
 	while (i--)
-		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+		dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf,
 				  buf->frags[i].map);
 	kfree(buf->frags);
 err_out:
@@ -168,7 +169,7 @@
 	for (i = 0; i < buf->npages; i++) {
 		int frag_sz = min_t(int, size, PAGE_SIZE);
 
-		dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+		dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf,
 				  buf->frags[i].map);
 		size -= frag_sz;
 	}
@@ -186,10 +187,7 @@
 	if (!pgdir)
 		return NULL;
 
-	pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page),
-				sizeof(unsigned long),
-				GFP_KERNEL);
-
+	pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL);
 	if (!pgdir->bitmap) {
 		kfree(pgdir);
 		return NULL;
@@ -200,7 +198,7 @@
 	pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
 						       &pgdir->db_dma, node);
 	if (!pgdir->db_page) {
-		kfree(pgdir->bitmap);
+		bitmap_free(pgdir->bitmap);
 		kfree(pgdir);
 		return NULL;
 	}
@@ -277,10 +275,10 @@
 	__set_bit(db->index, db->u.pgdir->bitmap);
 
 	if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
-		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+		dma_free_coherent(dev->device, PAGE_SIZE,
 				  db->u.pgdir->db_page, db->u.pgdir->db_dma);
 		list_del(&db->u.pgdir->list);
-		kfree(db->u.pgdir->bitmap);
+		bitmap_free(db->u.pgdir->bitmap);
 		kfree(db->u.pgdir);
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a53736c..ea934cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

@@ -40,9 +40,11 @@
 #include <linux/random.h>
 #include <linux/io-mapping.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
 #include <linux/debugfs.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
 	CMD_IF_REV = 5,
@@ -308,10 +310,13 @@
 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
-	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_DESTROY_QP:
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_DEALLOC_MEMIC:
+	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+	case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -325,7 +330,6 @@
 	case MLX5_CMD_OP_CREATE_MKEY:
 	case MLX5_CMD_OP_QUERY_MKEY:
 	case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
-	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
 	case MLX5_CMD_OP_CREATE_EQ:
 	case MLX5_CMD_OP_QUERY_EQ:
 	case MLX5_CMD_OP_GEN_EQE:
@@ -370,6 +374,8 @@
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+	case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
 	case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
 	case MLX5_CMD_OP_QUERY_RATE_LIMIT:
 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
@@ -426,7 +432,7 @@
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_CREATE_QP:
 	case MLX5_CMD_OP_FPGA_MODIFY_QP:
@@ -435,6 +441,13 @@
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_CREATE_UCTX:
+	case MLX5_CMD_OP_DESTROY_UCTX:
+	case MLX5_CMD_OP_CREATE_UMEM:
+	case MLX5_CMD_OP_DESTROY_UMEM:
+	case MLX5_CMD_OP_ALLOC_MEMIC:
+	case MLX5_CMD_OP_MODIFY_XRQ:
+	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -518,6 +531,8 @@
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+	MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
 	MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
@@ -599,8 +614,8 @@
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
-	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
-	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
@@ -617,6 +632,15 @@
 	MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+	MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
+	MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
+	MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS);
+	MLX5_COMMAND_STR_CASE(CREATE_UCTX);
+	MLX5_COMMAND_STR_CASE(DESTROY_UCTX);
+	MLX5_COMMAND_STR_CASE(CREATE_UMEM);
+	MLX5_COMMAND_STR_CASE(DESTROY_UMEM);
+	MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR);
+	MLX5_COMMAND_STR_CASE(MODIFY_XRQ);
 	default: return "unknown command opcode";
 	}
 }
@@ -801,6 +825,8 @@
 	return MLX5_GET(mbox_in, in->first.data, opcode);
 }
 
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
 static void cb_timeout_handler(struct work_struct *work)
 {
 	struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -903,7 +929,6 @@
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
 	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
-	mmiowb();
 	/* if not in polling don't use ent after this point */
 	if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
 		poll_timeout(ent);
@@ -1333,7 +1358,7 @@
 	struct mlx5_cmd *cmd = &dev->cmd;
 
 	snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
-		 dev_name(&dev->pdev->dev));
+		 dev_name(dev->device));
 }
 
 static void clean_debug_files(struct mlx5_core_dev *dev)
@@ -1347,49 +1372,19 @@
 	debugfs_remove_recursive(dbg->dbg_root);
 }
 
-static int create_debugfs_files(struct mlx5_core_dev *dev)
+static void create_debugfs_files(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-	int err = -ENOMEM;
-
-	if (!mlx5_debugfs_root)
-		return 0;
 
 	dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
-	if (!dbg->dbg_root)
-		return err;
 
-	dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
-					  dev, &dfops);
-	if (!dbg->dbg_in)
-		goto err_dbg;
-
-	dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
-					   dev, &dfops);
-	if (!dbg->dbg_out)
-		goto err_dbg;
-
-	dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
-					      dev, &olfops);
-	if (!dbg->dbg_outlen)
-		goto err_dbg;
-
-	dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
-					    &dbg->status);
-	if (!dbg->dbg_status)
-		goto err_dbg;
-
-	dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
-	if (!dbg->dbg_run)
-		goto err_dbg;
+	debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops);
+	debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops);
+	debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops);
+	debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status);
+	debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
 
 	mlx5_cmdif_debugfs_init(dev);
-
-	return 0;
-
-err_dbg:
-	clean_debug_files(dev);
-	return err;
 }
 
 static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
@@ -1408,14 +1403,32 @@
 		up(&cmd->sem);
 }
 
+static int cmd_comp_notifier(struct notifier_block *nb,
+			     unsigned long type, void *data)
+{
+	struct mlx5_core_dev *dev;
+	struct mlx5_cmd *cmd;
+	struct mlx5_eqe *eqe;
+
+	cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+	dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	eqe = data;
+
+	mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+	return NOTIFY_OK;
+}
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
+	MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+	mlx5_eq_notifier_register(dev, &dev->cmd.nb);
 	mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
 }
 
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
 {
 	mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+	mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1431,7 +1444,7 @@
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1529,11 +1542,71 @@
 		}
 	}
 }
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+	unsigned long flags;
+	u64 vector;
+
+	/* wait for pending handlers to complete */
+	mlx5_eq_synchronize_cmd_irq(dev);
+	spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+	vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+	if (!vector)
+		goto no_trig;
+
+	vector |= MLX5_TRIGGERED_CMD_COMP;
+	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
+	return;
+
+no_trig:
+	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+void mlx5_cmd_flush(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int i;
+
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		while (down_trylock(&cmd->sem))
+			mlx5_cmd_trigger_completions(dev);
+
+	while (down_trylock(&cmd->pages_sem))
+		mlx5_cmd_trigger_completions(dev);
+
+	/* Unlock cmdif */
+	up(&cmd->pages_sem);
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		up(&cmd->sem);
+}
 
 static int status_to_err(u8 status)
 {
-	return status ? -1 : 0; /* TBD more meaningful codes */
+	switch (status) {
+	case MLX5_CMD_DELIVERY_STAT_OK:
+	case MLX5_DRIVER_STATUS_ABORTED:
+		return 0;
+	case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+	case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+		return -EBADR;
+	case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+	case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+	case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+		return -EFAULT; /* Bad address */
+	case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+	case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+	case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+	case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+		return -ENOMSG;
+	case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+		return -EIO;
+	default:
+		return -EINVAL;
+	}
 }
 
 static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
@@ -1659,12 +1732,57 @@
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-		     void *out, int out_size, mlx5_cmd_cbk_t callback,
-		     void *context)
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+			     struct mlx5_async_ctx *ctx)
 {
-	return cmd_exec(dev, in, in_size, out, out_size, callback, context,
-			false);
+	ctx->dev = dev;
+	/* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+	atomic_set(&ctx->num_inflight, 1);
+	init_waitqueue_head(&ctx->wait);
+}
+EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+/**
+ * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx
+ * @ctx: The ctx to clean
+ *
+ * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The
+ * caller must ensure that mlx5_cmd_exec_cb() is not called during or after
+ * the call mlx5_cleanup_async_ctx().
+ */
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+{
+	atomic_dec(&ctx->num_inflight);
+	wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+static void mlx5_cmd_exec_cb_handler(int status, void *_work)
+{
+	struct mlx5_async_work *work = _work;
+	struct mlx5_async_ctx *ctx = work->ctx;
+
+	work->user_callback(status, work);
+	if (atomic_dec_and_test(&ctx->num_inflight))
+		wake_up(&ctx->wait);
+}
+
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+		     void *out, int out_size, mlx5_async_cbk_t callback,
+		     struct mlx5_async_work *work)
+{
+	int ret;
+
+	work->ctx = ctx;
+	work->user_callback = callback;
+	if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
+		return -EIO;
+	ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+		       mlx5_cmd_exec_cb_handler, work, false);
+	if (ret && atomic_dec_and_test(&ctx->num_inflight))
+		wake_up(&ctx->wait);
+
+	return ret;
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
@@ -1735,10 +1853,10 @@
 
 static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 {
-	struct device *ddev = &dev->pdev->dev;
+	struct device *ddev = dev->device;
 
-	cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
-						 &cmd->alloc_dma, GFP_KERNEL);
+	cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+						&cmd->alloc_dma, GFP_KERNEL);
 	if (!cmd->cmd_alloc_buf)
 		return -ENOMEM;
 
@@ -1752,9 +1870,9 @@
 
 	dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
 			  cmd->alloc_dma);
-	cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev,
-						 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
-						 &cmd->alloc_dma, GFP_KERNEL);
+	cmd->cmd_alloc_buf = dma_alloc_coherent(ddev,
+						2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+						&cmd->alloc_dma, GFP_KERNEL);
 	if (!cmd->cmd_alloc_buf)
 		return -ENOMEM;
 
@@ -1766,7 +1884,7 @@
 
 static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 {
-	struct device *ddev = &dev->pdev->dev;
+	struct device *ddev = dev->device;
 
 	dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
 			  cmd->alloc_dma);
@@ -1785,14 +1903,13 @@
 	memset(cmd, 0, sizeof(*cmd));
 	cmd_if_rev = cmdif_rev(dev);
 	if (cmd_if_rev != CMD_IF_REV) {
-		dev_err(&dev->pdev->dev,
-			"Driver cmdif rev(%d) differs from firmware's(%d)\n",
-			CMD_IF_REV, cmd_if_rev);
+		mlx5_core_err(dev,
+			      "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+			      CMD_IF_REV, cmd_if_rev);
 		return -EINVAL;
 	}
 
-	cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align,
-				    0);
+	cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
 	if (!cmd->pool)
 		return -ENOMEM;
 
@@ -1804,14 +1921,14 @@
 	cmd->log_sz = cmd_l >> 4 & 0xf;
 	cmd->log_stride = cmd_l & 0xf;
 	if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
-		dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
-			1 << cmd->log_sz);
+		mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n",
+			      1 << cmd->log_sz);
 		err = -EINVAL;
 		goto err_free_page;
 	}
 
 	if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
-		dev_err(&dev->pdev->dev, "command queue size overflow\n");
+		mlx5_core_err(dev, "command queue size overflow\n");
 		err = -EINVAL;
 		goto err_free_page;
 	}
@@ -1822,8 +1939,8 @@
 
 	cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
 	if (cmd->cmdif_rev > CMD_IF_REV) {
-		dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
-			CMD_IF_REV, cmd->cmdif_rev);
+		mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n",
+			      CMD_IF_REV, cmd->cmdif_rev);
 		err = -EOPNOTSUPP;
 		goto err_free_page;
 	}
@@ -1839,7 +1956,7 @@
 	cmd_h = (u32)((u64)(cmd->dma) >> 32);
 	cmd_l = (u32)(cmd->dma);
 	if (cmd_l & 0xfff) {
-		dev_err(&dev->pdev->dev, "invalid command queue address\n");
+		mlx5_core_err(dev, "invalid command queue address\n");
 		err = -ENOMEM;
 		goto err_free_page;
 	}
@@ -1859,22 +1976,15 @@
 	set_wqname(dev);
 	cmd->wq = create_singlethread_workqueue(cmd->wq_name);
 	if (!cmd->wq) {
-		dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+		mlx5_core_err(dev, "failed to create command workqueue\n");
 		err = -ENOMEM;
 		goto err_cache;
 	}
 
-	err = create_debugfs_files(dev);
-	if (err) {
-		err = -ENOMEM;
-		goto err_wq;
-	}
+	create_debugfs_files(dev);
 
 	return 0;
 
-err_wq:
-	destroy_workqueue(cmd->wq);
-
 err_cache:
 	destroy_msg_cache(dev);
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index a417912..818edc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c

@@ -38,6 +38,7 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mlx5/cq.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 #define TASKLET_MAX_TIME 2
 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -57,7 +58,7 @@
 	list_for_each_entry_safe(mcq, temp, &ctx->process_list,
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
-		mcq->tasklet_ctx.comp(mcq);
+		mcq->tasklet_ctx.comp(mcq, NULL);
 		mlx5_cq_put(mcq);
 		if (time_after(jiffies, end))
 			break;
@@ -67,7 +68,8 @@
 		tasklet_schedule(&ctx->task);
 }
 
-static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
+				   struct mlx5_eqe *eqe)
 {
 	unsigned long flags;
 	struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@@ -86,22 +88,21 @@
 }
 
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			u32 *in, int inlen)
+			u32 *in, int inlen, u32 *out, int outlen)
 {
 	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
 	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-	struct mlx5_eq *eq;
+	struct mlx5_eq_comp *eq;
 	int err;
 
-	eq = mlx5_eqn2eq(dev, eqn);
+	eq = mlx5_eqn2comp_eq(dev, eqn);
 	if (IS_ERR(eq))
 		return PTR_ERR(eq);
 
-	memset(out, 0, sizeof(out));
+	memset(out, 0, outlen);
 	MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err)
 		return err;
 
@@ -109,6 +110,7 @@
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	cq->eq         = eq;
+	cq->uid = MLX5_GET(create_cq_in, in, uid);
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -118,12 +120,12 @@
 	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
 	/* Add to comp EQ CQ tree to recv comp events */
-	err = mlx5_eq_add_cq(eq, cq);
+	err = mlx5_eq_add_cq(&eq->core, cq);
 	if (err)
 		goto err_cmd;
 
 	/* Add to async EQ CQ tree to recv async events */
-	err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+	err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
 	if (err)
 		goto err_cq_add;
 
@@ -138,12 +140,13 @@
 	return 0;
 
 err_cq_add:
-	mlx5_eq_del_cq(eq, cq);
+	mlx5_eq_del_cq(&eq->core, cq);
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, din, uid, cq->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -155,16 +158,12 @@
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
 	int err;
 
-	err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
-	if (err)
-		return err;
-
-	err = mlx5_eq_del_cq(cq->eq, cq);
-	if (err)
-		return err;
+	mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
+	mlx5_eq_del_cq(&cq->eq->core, cq);
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, in, uid, cq->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -196,6 +195,7 @@
 	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
 
 	MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+	MLX5_SET(modify_cq_in, in, uid, cq->uid);
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 90fabd6..04854e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c

@@ -36,6 +36,7 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
 	QP_PID,
@@ -91,8 +92,6 @@
 void mlx5_register_debugfs(void)
 {
 	mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
-	if (IS_ERR_OR_NULL(mlx5_debugfs_root))
-		mlx5_debugfs_root = NULL;
 }
 
 void mlx5_unregister_debugfs(void)
@@ -100,45 +99,25 @@
 	debugfs_remove(mlx5_debugfs_root);
 }
 
-int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	atomic_set(&dev->num_qps, 0);
 
 	dev->priv.qp_debugfs = debugfs_create_dir("QPs",  dev->priv.dbg_root);
-	if (!dev->priv.qp_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.qp_debugfs);
 }
 
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	dev->priv.eq_debugfs = debugfs_create_dir("EQs",  dev->priv.dbg_root);
-	if (!dev->priv.eq_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.eq_debugfs);
 }
 
@@ -182,85 +161,41 @@
 	.write	= average_write,
 };
 
-int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_stats *stats;
 	struct dentry **cmd;
 	const char *namep;
-	int err;
 	int i;
 
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	cmd = &dev->priv.cmdif_debugfs;
 	*cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
-	if (!*cmd)
-		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
 		stats = &dev->cmd.stats[i];
 		namep = mlx5_command_str(i);
 		if (strcmp(namep, "unknown command opcode")) {
 			stats->root = debugfs_create_dir(namep, *cmd);
-			if (!stats->root) {
-				mlx5_core_warn(dev, "failed adding command %d\n",
-					       i);
-				err = -ENOMEM;
-				goto out;
-			}
 
-			stats->avg = debugfs_create_file("average", 0400,
-							 stats->root, stats,
-							 &stats_fops);
-			if (!stats->avg) {
-				mlx5_core_warn(dev, "failed creating debugfs file\n");
-				err = -ENOMEM;
-				goto out;
-			}
-
-			stats->count = debugfs_create_u64("n", 0400,
-							  stats->root,
-							  &stats->n);
-			if (!stats->count) {
-				mlx5_core_warn(dev, "failed creating debugfs file\n");
-				err = -ENOMEM;
-				goto out;
-			}
+			debugfs_create_file("average", 0400, stats->root, stats,
+					    &stats_fops);
+			debugfs_create_u64("n", 0400, stats->root, &stats->n);
 		}
 	}
-
-	return 0;
-out:
-	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
-	return err;
 }
 
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
 }
 
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	dev->priv.cq_debugfs = debugfs_create_dir("CQs",  dev->priv.dbg_root);
-	if (!dev->priv.cq_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.cq_debugfs);
 }
 
@@ -349,6 +284,16 @@
 	return param;
 }
 
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+			      u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+	MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 			 int index)
 {
@@ -473,7 +418,6 @@
 {
 	struct mlx5_rsc_debug *d;
 	char resn[32];
-	int err;
 	int i;
 
 	d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
@@ -485,30 +429,15 @@
 	d->type = type;
 	sprintf(resn, "0x%x", rsn);
 	d->root = debugfs_create_dir(resn,  root);
-	if (!d->root) {
-		err = -ENOMEM;
-		goto out_free;
-	}
 
 	for (i = 0; i < nfile; i++) {
 		d->fields[i].i = i;
-		d->fields[i].dent = debugfs_create_file(field[i], 0400,
-							d->root, &d->fields[i],
-							&fops);
-		if (!d->fields[i].dent) {
-			err = -ENOMEM;
-			goto out_rem;
-		}
+		debugfs_create_file(field[i], 0400, d->root, &d->fields[i],
+				    &fops);
 	}
 	*dbg = d;
 
 	return 0;
-out_rem:
-	debugfs_remove_recursive(d->root);
-
-out_free:
-	kfree(d);
-	return err;
 }
 
 static void rem_res_tree(struct mlx5_rsc_debug *d)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 37ba7c7..5086227 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c

@@ -45,75 +45,11 @@
 	unsigned long		state;
 };
 
-struct mlx5_delayed_event {
-	struct list_head	list;
-	struct mlx5_core_dev	*dev;
-	enum mlx5_dev_event	event;
-	unsigned long		param;
-};
-
 enum {
 	MLX5_INTERFACE_ADDED,
 	MLX5_INTERFACE_ATTACHED,
 };
 
-static void add_delayed_event(struct mlx5_priv *priv,
-			      struct mlx5_core_dev *dev,
-			      enum mlx5_dev_event event,
-			      unsigned long param)
-{
-	struct mlx5_delayed_event *delayed_event;
-
-	delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
-	if (!delayed_event) {
-		mlx5_core_err(dev, "event %d is missed\n", event);
-		return;
-	}
-
-	mlx5_core_dbg(dev, "Accumulating event %d\n", event);
-	delayed_event->dev = dev;
-	delayed_event->event = event;
-	delayed_event->param = param;
-	list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
-				  struct mlx5_priv *priv)
-{
-	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-	struct mlx5_delayed_event *de;
-	struct mlx5_delayed_event *n;
-	struct list_head temp;
-
-	INIT_LIST_HEAD(&temp);
-
-	spin_lock_irq(&priv->ctx_lock);
-
-	priv->is_accum_events = false;
-	list_splice_init(&priv->waiting_events_list, &temp);
-	if (!dev_ctx->context)
-		goto out;
-	list_for_each_entry_safe(de, n, &temp, list)
-		dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
-	spin_unlock_irq(&priv->ctx_lock);
-
-	list_for_each_entry_safe(de, n, &temp, list) {
-		list_del(&de->list);
-		kfree(de);
-	}
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
-	spin_lock_irq(&priv->ctx_lock);
-	priv->is_accum_events = true;
-	spin_unlock_irq(&priv->ctx_lock);
-}
 
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
@@ -129,8 +65,6 @@
 
 	dev_ctx->intf = intf;
 
-	delayed_event_start(priv);
-
 	dev_ctx->context = intf->add(dev);
 	if (dev_ctx->context) {
 		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@
 
 		spin_lock_irq(&priv->ctx_lock);
 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-		if (dev_ctx->intf->pfault) {
-			if (priv->pfault) {
-				mlx5_core_err(dev, "multiple page fault handlers not supported");
-			} else {
-				priv->pfault_ctx = dev_ctx->context;
-				priv->pfault = dev_ctx->intf->pfault;
-			}
-		}
-#endif
 		spin_unlock_irq(&priv->ctx_lock);
 	}
 
-	delayed_event_release(dev_ctx, priv);
-
 	if (!dev_ctx->context)
 		kfree(dev_ctx);
 }
@@ -179,15 +100,6 @@
 	if (!dev_ctx)
 		return;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	spin_lock_irq(&priv->ctx_lock);
-	if (priv->pfault == dev_ctx->intf->pfault)
-		priv->pfault = NULL;
-	spin_unlock_irq(&priv->ctx_lock);
-
-	synchronize_srcu(&priv->pfault_srcu);
-#endif
-
 	spin_lock_irq(&priv->ctx_lock);
 	list_del(&dev_ctx->list);
 	spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@
 	if (!dev_ctx)
 		return;
 
-	delayed_event_start(priv);
 	if (intf->attach) {
 		if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
-			goto out;
+			return;
 		if (intf->attach(dev, dev_ctx->context))
-			goto out;
-
+			return;
 		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
 	} else {
 		if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
-			goto out;
+			return;
 		dev_ctx->context = intf->add(dev);
 		if (!dev_ctx->context)
-			goto out;
-
+			return;
 		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
 	}
-
-out:
-	delayed_event_release(dev_ctx, priv);
 }
 
 void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -307,7 +213,7 @@
 	struct mlx5_interface *intf;
 
 	mutex_lock(&mlx5_intf_mutex);
-	list_for_each_entry(intf, &intf_list, list)
+	list_for_each_entry_reverse(intf, &intf_list, list)
 		mlx5_remove_device(intf, priv);
 	list_del(&priv->dev_list);
 	mutex_unlock(&mlx5_intf_mutex);
@@ -342,36 +248,35 @@
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+/* Must be called with intf_mutex held */
+static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_interface *intf;
+	bool found = false;
+
+	list_for_each_entry(intf, &intf_list, list) {
+		if (intf->protocol == protocol) {
+			dev_ctx = mlx5_get_device(intf, &mdev->priv);
+			if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+				found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
 void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
 {
 	mutex_lock(&mlx5_intf_mutex);
-	mlx5_remove_dev_by_protocol(mdev, protocol);
-	mlx5_add_dev_by_protocol(mdev, protocol);
+	if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
+		mlx5_remove_dev_by_protocol(mdev, protocol);
+		mlx5_add_dev_by_protocol(mdev, protocol);
+	}
 	mutex_unlock(&mlx5_intf_mutex);
 }
 
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
-	struct mlx5_priv *priv = &mdev->priv;
-	struct mlx5_device_context *dev_ctx;
-	unsigned long flags;
-	void *result = NULL;
-
-	spin_lock_irqsave(&priv->ctx_lock, flags);
-
-	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
-		if ((dev_ctx->intf->protocol == protocol) &&
-		    dev_ctx->intf->get_dev) {
-			result = dev_ctx->intf->get_dev(dev_ctx->context);
-			break;
-		}
-
-	spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
-	return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
-
 /* Must be called with intf_mutex held */
 void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
 {
@@ -406,13 +311,20 @@
 /* Must be called with intf_mutex held */
 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
 {
-	u32 pci_id = mlx5_gen_pci_id(dev);
 	struct mlx5_core_dev *res = NULL;
 	struct mlx5_core_dev *tmp_dev;
 	struct mlx5_priv *priv;
+	u32 pci_id;
 
+	if (!mlx5_core_is_pf(dev))
+		return NULL;
+
+	pci_id = mlx5_gen_pci_id(dev);
 	list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
 		tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+		if (!mlx5_core_is_pf(tmp_dev))
+			continue;
+
 		if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
 			res = tmp_dev;
 			break;
@@ -422,44 +334,6 @@
 	return res;
 }
 
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-		     unsigned long param)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_device_context *dev_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->ctx_lock, flags);
-
-	if (priv->is_accum_events)
-		add_delayed_event(priv, dev, event, param);
-
-	/* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
-	 * still in priv->ctx_list. In this case, only notify the dev_ctx if its
-	 * ADDED or ATTACHED bit are set.
-	 */
-	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-		if (dev_ctx->intf->event &&
-		    (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
-		     test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
-			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
-	spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-			  struct mlx5_pagefault *pfault)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	int srcu_idx;
-
-	srcu_idx = srcu_read_lock(&priv->pfault_srcu);
-	if (priv->pfault)
-		priv->pfault(dev, priv->pfault_ctx, pfault);
-	srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
 
 void mlx5_dev_list_lock(void)
 {

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
new file mode 100644
index 0000000..381925c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c

@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <devlink.h>
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "eswitch.h"
+
+static int mlx5_devlink_flash_update(struct devlink *devlink,
+				     const char *file_name,
+				     const char *component,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	const struct firmware *fw;
+	int err;
+
+	if (component)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
+	if (err)
+		return err;
+
+	return mlx5_firmware_flash(dev, fw, extack);
+}
+
+static u8 mlx5_fw_ver_major(u32 version)
+{
+	return (version >> 24) & 0xff;
+}
+
+static u8 mlx5_fw_ver_minor(u32 version)
+{
+	return (version >> 16) & 0xff;
+}
+
+static u16 mlx5_fw_ver_subminor(u32 version)
+{
+	return version & 0xffff;
+}
+
+#define DEVLINK_FW_STRING_LEN 32
+
+static int
+mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+		      struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	char version_str[DEVLINK_FW_STRING_LEN];
+	u32 running_fw, stored_fw;
+	int err;
+
+	err = devlink_info_driver_name_put(req, DRIVER_NAME);
+	if (err)
+		return err;
+
+	err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+	if (err)
+		return err;
+
+	err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
+	if (err)
+		return err;
+
+	snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+		 mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
+		 mlx5_fw_ver_subminor(running_fw));
+	err = devlink_info_version_running_put(req, "fw.version", version_str);
+	if (err)
+		return err;
+
+	/* no pending version, return running (stored) version */
+	if (stored_fw == 0)
+		stored_fw = running_fw;
+
+	snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+		 mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw),
+		 mlx5_fw_ver_subminor(stored_fw));
+	err = devlink_info_version_stored_put(req, "fw.version", version_str);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+	.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+	.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+#endif
+	.flash_update = mlx5_devlink_flash_update,
+	.info_get = mlx5_devlink_info_get,
+};
+
+struct devlink *mlx5_devlink_alloc(void)
+{
+	return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+}
+
+void mlx5_devlink_free(struct devlink *devlink)
+{
+	devlink_free(devlink);
+}
+
+static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id,
+					 union devlink_param_value val,
+					 struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	char *value = val.vstr;
+	int err = 0;
+
+	if (!strcmp(value, "dmfs")) {
+		return 0;
+	} else if (!strcmp(value, "smfs")) {
+		u8 eswitch_mode;
+		bool smfs_cap;
+
+		eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch);
+		smfs_cap = mlx5_fs_dr_is_supported(dev);
+
+		if (!smfs_cap) {
+			err = -EOPNOTSUPP;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Software managed steering is not supported by current device");
+		}
+
+		else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Software managed steering is not supported when eswitch offloads enabled.");
+			err = -EOPNOTSUPP;
+		}
+	} else {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Bad parameter: supported values are [\"dmfs\", \"smfs\"]");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id,
+				    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	enum mlx5_flow_steering_mode mode;
+
+	if (!strcmp(ctx->val.vstr, "smfs"))
+		mode = MLX5_FLOW_STEERING_MODE_SMFS;
+	else
+		mode = MLX5_FLOW_STEERING_MODE_DMFS;
+	dev->priv.steering->mode = mode;
+
+	return 0;
+}
+
+static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id,
+				    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS)
+		strcpy(ctx->val.vstr, "smfs");
+	else
+		strcpy(ctx->val.vstr, "dmfs");
+	return 0;
+}
+
+enum mlx5_devlink_param_id {
+	MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+};
+
+static const struct devlink_param mlx5_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+			     "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set,
+			     mlx5_devlink_fs_mode_validate),
+};
+
+static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	union devlink_param_value value;
+
+	if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS)
+		strcpy(value.vstr, "dmfs");
+	else
+		strcpy(value.vstr, "smfs");
+	devlink_param_driverinit_value_set(devlink,
+					   MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+					   value);
+}
+
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+{
+	int err;
+
+	err = devlink_register(devlink, dev);
+	if (err)
+		return err;
+
+	err = devlink_params_register(devlink, mlx5_devlink_params,
+				      ARRAY_SIZE(mlx5_devlink_params));
+	if (err)
+		goto params_reg_err;
+	mlx5_devlink_set_params_init_values(devlink);
+	devlink_params_publish(devlink);
+	return 0;
+
+params_reg_err:
+	devlink_unregister(devlink);
+	return err;
+}
+
+void mlx5_devlink_unregister(struct devlink *devlink)
+{
+	devlink_params_unregister(devlink, mlx5_devlink_params,
+				  ARRAY_SIZE(mlx5_devlink_params));
+	devlink_unregister(devlink);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
new file mode 100644
index 0000000..d0ba037
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h

@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef __MLX5_DEVLINK_H__
+#define __MLX5_DEVLINK_H__
+
+#include <net/devlink.h>
+
+struct devlink *mlx5_devlink_alloc(void);
+void mlx5_devlink_free(struct devlink *devlink);
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+void mlx5_devlink_unregister(struct devlink *devlink);
+
+#endif /* __MLX5_DEVLINK_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 0000000..28d0274
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c

@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE	0x7
+
+static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
+{
+	return !!dev->priv.health.crdump_size;
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+	u32 crdump_size = dev->priv.health.crdump_size;
+	int i, ret;
+
+	for (i = 0; i < (crdump_size / 4); i++)
+		cr_data[i] = BAD_ACCESS;
+
+	ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
+	if (ret <= 0) {
+		if (ret == 0)
+			return -EIO;
+		return ret;
+	}
+
+	if (crdump_size != ret) {
+		mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+			       ret, crdump_size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+	int ret;
+
+	if (!mlx5_crdump_enabled(dev))
+		return -ENODEV;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret) {
+		mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+			       ret);
+		return ret;
+	}
+	/* Verify no other PF is running cr-dump or sw reset */
+	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
+				     MLX5_VSC_LOCK);
+	if (ret) {
+		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+		goto unlock_gw;
+	}
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+	if (ret)
+		goto unlock_sem;
+
+	ret = mlx5_crdump_fill(dev, cr_data);
+
+unlock_sem:
+	mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK);
+unlock_gw:
+	mlx5_vsc_gw_unlock(dev);
+	return ret;
+}
+
+int mlx5_crdump_enable(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	u32 space_size;
+	int ret;
+
+	if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+	    mlx5_crdump_enabled(dev))
+		return 0;
+
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret)
+		return ret;
+
+	/* Check if space is supported and get space size */
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+				    &space_size);
+	if (ret) {
+		/* Unlock and mask error since space is not supported */
+		mlx5_vsc_gw_unlock(dev);
+		return 0;
+	}
+
+	if (!space_size) {
+		mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+		mlx5_vsc_gw_unlock(dev);
+		return -EINVAL;
+	}
+
+	ret = mlx5_vsc_gw_unlock(dev);
+	if (ret)
+		return ret;
+
+	priv->health.crdump_size = space_size;
+	return 0;
+}
+
+void mlx5_crdump_disable(struct mlx5_core_dev *dev)
+{
+	dev->priv.health.crdump_size = 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
new file mode 100644
index 0000000..1177860
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h

@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_EN_REP_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "en_rep.h"
+
+TRACE_EVENT(mlx5e_rep_neigh_update,
+	    TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha,
+		     bool neigh_connected),
+	    TP_ARGS(nhe, ha, neigh_connected),
+	    TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+			     __array(u8, ha, ETH_ALEN)
+			     __array(u8, v4, 4)
+			     __array(u8, v6, 16)
+			     __field(bool, neigh_connected)
+			     ),
+	    TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh;
+			struct in6_addr *pin6;
+			__be32 *p32;
+
+			__assign_str(devname, mn->dev->name);
+			__entry->neigh_connected = neigh_connected;
+			memcpy(__entry->ha, ha, ETH_ALEN);
+
+			p32 = (__be32 *)__entry->v4;
+			pin6 = (struct in6_addr *)__entry->v6;
+			if (mn->family == AF_INET) {
+				*p32 = mn->dst_ip.v4;
+				ipv6_addr_set_v4mapped(*p32, pin6);
+			} else if (mn->family == AF_INET6) {
+				*pin6 = mn->dst_ip.v6;
+			}
+			),
+	    TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n",
+		      __get_str(devname), __entry->ha,
+		      __entry->v4, __entry->v6, __entry->neigh_connected
+		      )
+);
+
+#endif /* _MLX5_EN_REP_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE en_rep_tracepoint
+#include <trace/define_trace.h>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
new file mode 100644
index 0000000..c5dc6c5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c

@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#define CREATE_TRACE_POINTS
+#include "en_tc_tracepoint.h"
+
+void put_ids_to_array(int *ids,
+		      const struct flow_action_entry *entries,
+		      unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		ids[i] = entries[i].id;
+}
+
+#define NAME_SIZE 16
+
+static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = {
+	[FLOW_ACTION_ACCEPT]	= "ACCEPT",
+	[FLOW_ACTION_DROP]	= "DROP",
+	[FLOW_ACTION_TRAP]	= "TRAP",
+	[FLOW_ACTION_GOTO]	= "GOTO",
+	[FLOW_ACTION_REDIRECT]	= "REDIRECT",
+	[FLOW_ACTION_MIRRED]	= "MIRRED",
+	[FLOW_ACTION_VLAN_PUSH]	= "VLAN_PUSH",
+	[FLOW_ACTION_VLAN_POP]	= "VLAN_POP",
+	[FLOW_ACTION_VLAN_MANGLE]	= "VLAN_MANGLE",
+	[FLOW_ACTION_TUNNEL_ENCAP]	= "TUNNEL_ENCAP",
+	[FLOW_ACTION_TUNNEL_DECAP]	= "TUNNEL_DECAP",
+	[FLOW_ACTION_MANGLE]	= "MANGLE",
+	[FLOW_ACTION_ADD]	= "ADD",
+	[FLOW_ACTION_CSUM]	= "CSUM",
+	[FLOW_ACTION_MARK]	= "MARK",
+	[FLOW_ACTION_WAKE]	= "WAKE",
+	[FLOW_ACTION_QUEUE]	= "QUEUE",
+	[FLOW_ACTION_SAMPLE]	= "SAMPLE",
+	[FLOW_ACTION_POLICE]	= "POLICE",
+	[FLOW_ACTION_CT]	= "CT",
+};
+
+const char *parse_action(struct trace_seq *p,
+			 int *ids,
+			 unsigned int num)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		if (ids[i] < NUM_FLOW_ACTIONS)
+			trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]);
+		else
+			trace_seq_printf(p, "UNKNOWN ");
+	}
+
+	trace_seq_putc(p, 0);
+	return ret;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
new file mode 100644
index 0000000..d4e6cfa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h

@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_TC_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include <net/flow_offload.h>
+#include "en_rep.h"
+
+#define __parse_action(ids, num) parse_action(p, ids, num)
+
+void put_ids_to_array(int *ids,
+		      const struct flow_action_entry *entries,
+		      unsigned int num);
+
+const char *parse_action(struct trace_seq *p,
+			 int *ids,
+			 unsigned int num);
+
+DECLARE_EVENT_CLASS(mlx5e_flower_template,
+		    TP_PROTO(const struct flow_cls_offload *f),
+		    TP_ARGS(f),
+		    TP_STRUCT__entry(__field(void *, cookie)
+				     __field(unsigned int, num)
+				     __dynamic_array(int, ids, f->rule ?
+					     f->rule->action.num_entries : 0)
+				     ),
+		    TP_fast_assign(__entry->cookie = (void *)f->cookie;
+			__entry->num = (f->rule ?
+				f->rule->action.num_entries : 0);
+			if (__entry->num)
+				put_ids_to_array(__get_dynamic_array(ids),
+						 f->rule->action.entries,
+						 f->rule->action.num_entries);
+			),
+		    TP_printk("cookie=%p actions= %s\n",
+			      __entry->cookie, __entry->num ?
+				      __parse_action(__get_dynamic_array(ids),
+						     __entry->num) : "NULL"
+			      )
+);
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower,
+	     TP_PROTO(const struct flow_cls_offload *f),
+	     TP_ARGS(f)
+	     );
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower,
+	     TP_PROTO(const struct flow_cls_offload *f),
+	     TP_ARGS(f)
+	     );
+
+TRACE_EVENT(mlx5e_stats_flower,
+	    TP_PROTO(const struct flow_cls_offload *f),
+	    TP_ARGS(f),
+	    TP_STRUCT__entry(__field(void *, cookie)
+			     __field(u64, bytes)
+			     __field(u64, packets)
+			     __field(u64, lastused)
+			     ),
+	    TP_fast_assign(__entry->cookie = (void *)f->cookie;
+		__entry->bytes = f->stats.bytes;
+		__entry->packets = f->stats.pkts;
+		__entry->lastused = f->stats.lastused;
+		),
+	    TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n",
+		      __entry->cookie, __entry->bytes,
+		      __entry->packets, __entry->lastused
+		      )
+);
+
+TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
+	    TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used),
+	    TP_ARGS(nhe, neigh_used),
+	    TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+			     __array(u8, v4, 4)
+			     __array(u8, v6, 16)
+			     __field(bool, neigh_used)
+			     ),
+	    TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh;
+			struct in6_addr *pin6;
+			__be32 *p32;
+
+			__assign_str(devname, mn->dev->name);
+			__entry->neigh_used = neigh_used;
+
+			p32 = (__be32 *)__entry->v4;
+			pin6 = (struct in6_addr *)__entry->v6;
+			if (mn->family == AF_INET) {
+				*p32 = mn->dst_ip.v4;
+				ipv6_addr_set_v4mapped(*p32, pin6);
+			} else if (mn->family == AF_INET6) {
+				*pin6 = mn->dst_ip.v6;
+			}
+			),
+	    TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n",
+		      __get_str(devname), __entry->v4, __entry->v6,
+		      __entry->neigh_used
+		      )
+);
+
+#endif /* _MLX5_TC_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE en_tc_tracepoint
+#include <trace/define_trace.h>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0f11fff..8ecac81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c

@@ -161,10 +161,10 @@
 	PRINT_MASKED_VAL(name, p, format);		   \
 }
 	DECLARE_MASK_VAL(u64, gre_key) = {
-		.m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
-		     MLX5_GET(fte_match_set_misc, mask, gre_key_l),
-		.v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
-		     MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+		.m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+		     MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+		.v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+		     MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
 
 	PRINT_MASKED_VAL(gre_key, p, "%llu");
 	PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
@@ -258,6 +258,8 @@
 	return ret;
 }
 
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft);
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 0240aee..ddf1b87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h

@@ -61,6 +61,41 @@
 			 const struct mlx5_flow_destination *dst,
 			 u32 counter_id);
 
+TRACE_EVENT(mlx5_fs_add_ft,
+	    TP_PROTO(const struct mlx5_flow_table *ft),
+	    TP_ARGS(ft),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_table *, ft)
+		__field(u32, id)
+		__field(u32, level)
+		__field(u32, type)
+	    ),
+	    TP_fast_assign(
+			   __entry->ft = ft;
+			   __entry->id = ft->id;
+			   __entry->level = ft->level;
+			   __entry->type = ft->type;
+	    ),
+	    TP_printk("ft=%p id=%u level=%u type=%u \n",
+		      __entry->ft, __entry->id, __entry->level, __entry->type)
+	    );
+
+TRACE_EVENT(mlx5_fs_del_ft,
+	    TP_PROTO(const struct mlx5_flow_table *ft),
+	    TP_ARGS(ft),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_table *, ft)
+		__field(u32, id)
+	    ),
+	    TP_fast_assign(
+			   __entry->ft = ft;
+			   __entry->id = ft->id;
+
+	    ),
+	    TP_printk("ft=%p id=%u\n",
+		      __entry->ft, __entry->id)
+	    );
+
 TRACE_EVENT(mlx5_fs_add_fg,
 	    TP_PROTO(const struct mlx5_flow_group *fg),
 	    TP_ARGS(fg),
@@ -133,7 +168,7 @@
 	{MLX5_FLOW_CONTEXT_ACTION_DROP,		 "DROP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,	 "FWD"},\
 	{MLX5_FLOW_CONTEXT_ACTION_COUNT,	 "CNT"},\
-	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
 	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
@@ -152,6 +187,7 @@
 		__field(u32, index)
 		__field(u32, action)
 		__field(u32, flow_tag)
+		__field(u32, flow_source)
 		__field(u8,  mask_enable)
 		__field(int, new_fte)
 		__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -169,7 +205,8 @@
 			   __entry->index = fte->index;
 			   __entry->action = fte->action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->action.flow_tag;
+			   __entry->flow_tag = fte->flow_context.flow_tag;
+			   __entry->flow_source = fte->flow_context.flow_source;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
@@ -252,10 +289,10 @@
 			   memcpy(__entry->destination,
 				  &rule->dest_attr,
 				  sizeof(__entry->destination));
-			   if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
-			       rule->dest_attr.counter)
+			   if (rule->dest_attr.type &
+			       MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				__entry->counter_id =
-				rule->dest_attr.counter->id;
+					rule->dest_attr.counter_id;
 	    ),
 	    TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
 		      __entry->rule, __entry->fte, __entry->index,

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d4ec93b..94d7b69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c

@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #define CREATE_TRACE_POINTS
+#include "lib/eq.h"
 #include "fw_tracer.h"
 #include "fw_tracer_tracepoint.h"
 
@@ -242,6 +243,19 @@
 	return -ENOMEM;
 }
 
+static void
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+	tracer->st_arr.saved_traces_index = 0;
+	mutex_init(&tracer->st_arr.lock);
+}
+
+static void
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+	mutex_destroy(&tracer->st_arr.lock);
+}
+
 static void mlx5_tracer_read_strings_db(struct work_struct *work)
 {
 	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -521,9 +535,28 @@
 		list_del(&str_frmt->list);
 }
 
-static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
-				    struct mlx5_core_dev *dev,
-				    u64 trace_timestamp)
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+				      u64 timestamp, bool lost,
+				      u8 event_id, char *msg)
+{
+	struct mlx5_fw_trace_data *trace_data;
+
+	mutex_lock(&tracer->st_arr.lock);
+	trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
+	trace_data->timestamp = timestamp;
+	trace_data->lost = lost;
+	trace_data->event_id = event_id;
+	strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG);
+
+	tracer->st_arr.saved_traces_index =
+		(tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+	mutex_unlock(&tracer->st_arr.lock);
+}
+
+static noinline
+void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
+			     struct mlx5_core_dev *dev,
+			     u64 trace_timestamp)
 {
 	char	tmp[512];
 
@@ -539,6 +572,9 @@
 	trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
 		      str_frmt->event_id, tmp);
 
+	mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+				  str_frmt->lost, str_frmt->event_id, tmp);
+
 	/* remove it from hash */
 	mlx5_tracer_clean_message(str_frmt);
 }
@@ -785,6 +821,109 @@
 	mlx5_fw_tracer_start(tracer);
 }
 
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
+					    u32 *in, int size_in)
+{
+	u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+
+	if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
+	    !MLX5_CAP_DEBUG(dev, core_dump_qp))
+		return -EOPNOTSUPP;
+
+	return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
+				    MLX5_REG_CORE_DUMP, 0, 1);
+}
+
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_tracer *tracer = dev->tracer;
+	u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+	int err;
+
+	if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
+		return -EOPNOTSUPP;
+	if (!tracer->owner)
+		return -EPERM;
+
+	MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
+
+	err =  mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
+	if (err)
+		return err;
+	queue_work(tracer->work_queue, &tracer->handle_traces_work);
+	flush_workqueue(tracer->work_queue);
+	return 0;
+}
+
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+			     struct mlx5_fw_trace_data *trace_data)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+	return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg)
+{
+	struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
+	u32 index, start_index, end_index;
+	u32 saved_traces_index;
+	int err;
+
+	if (!straces[0].timestamp)
+		return -ENOMSG;
+
+	mutex_lock(&tracer->st_arr.lock);
+	saved_traces_index = tracer->st_arr.saved_traces_index;
+	if (straces[saved_traces_index].timestamp)
+		start_index = saved_traces_index;
+	else
+		start_index = 0;
+	end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
+	if (err)
+		goto unlock;
+	index = start_index;
+	while (index != end_index) {
+		err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
+		if (err)
+			goto unlock;
+
+		index = (index + 1) & (SAVED_TRACES_NUM - 1);
+	}
+
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+	mutex_unlock(&tracer->st_arr.lock);
+	return err;
+}
+
 /* Create software resources (Buffers, etc ..) */
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 {
@@ -832,6 +971,7 @@
 		goto free_log_buf;
 	}
 
+	mlx5_fw_tracer_init_saved_traces_array(tracer);
 	mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
 
 	return tracer;
@@ -846,9 +986,9 @@
 	return ERR_PTR(err);
 }
 
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 {
 	struct mlx5_core_dev *dev;
@@ -874,6 +1014,9 @@
 		goto err_dealloc_pd;
 	}
 
+	MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+	mlx5_eq_notifier_register(dev, &tracer->nb);
+
 	mlx5_fw_tracer_start(tracer);
 
 	return 0;
@@ -883,9 +1026,7 @@
 	return err;
 }
 
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 {
 	if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +1034,7 @@
 
 	mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
 		      tracer->owner);
-
+	mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
 	cancel_work_sync(&tracer->ownership_change_work);
 	cancel_work_sync(&tracer->handle_traces_work);
 
@@ -915,6 +1056,7 @@
 	cancel_work_sync(&tracer->read_fw_strings_work);
 	mlx5_fw_tracer_clean_ready_list(tracer);
 	mlx5_fw_tracer_clean_print_hash(tracer);
+	mlx5_fw_tracer_clean_saved_traces_array(tracer);
 	mlx5_fw_tracer_free_strings_db(tracer);
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 	flush_workqueue(tracer->work_queue);
@@ -922,12 +1064,11 @@
 	kfree(tracer);
 }
 
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
 {
-	struct mlx5_fw_tracer *tracer = dev->tracer;
-
-	if (!tracer)
-		return;
+	struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+	struct mlx5_core_dev *dev = tracer->dev;
+	struct mlx5_eqe *eqe = data;
 
 	switch (eqe->sub_type) {
 	case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +1083,8 @@
 		mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
 			      eqe->sub_type);
 	}
+
+	return NOTIFY_OK;
 }
 
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 0347f2d..40601fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h

@@ -46,6 +46,9 @@
 #define TRACER_BLOCK_SIZE_BYTE 256
 #define TRACES_PER_BLOCK 32
 
+#define TRACE_STR_MSG 256
+#define SAVED_TRACES_NUM 8192
+
 #define TRACER_MAX_PARAMS 7
 #define MESSAGE_HASH_BITS 6
 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -53,8 +56,16 @@
 #define MASK_52_7 (0x1FFFFFFFFFFF80)
 #define MASK_6_0  (0x7F)
 
+struct mlx5_fw_trace_data {
+	u64 timestamp;
+	bool lost;
+	u8 event_id;
+	char msg[TRACE_STR_MSG];
+};
+
 struct mlx5_fw_tracer {
 	struct mlx5_core_dev *dev;
+	struct mlx5_nb        nb;
 	bool owner;
 	u8   trc_ver;
 	struct workqueue_struct *work_queue;
@@ -82,6 +93,13 @@
 		u32 consumer_index;
 	} buff;
 
+	/* Saved Traces Array */
+	struct {
+		struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM];
+		u32 saved_traces_index;
+		struct mutex lock; /* Protect st_arr access */
+	} st_arr;
+
 	u64 last_timestamp;
 	struct work_struct handle_traces_work;
 	struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -170,6 +188,8 @@
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg);
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
index 83f90e9..3038be5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h

@@ -47,7 +47,7 @@
 	TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
 
 	TP_STRUCT__entry(
-		__string(dev_name, dev_name(&tracer->dev->pdev->dev))
+		__string(dev_name, dev_name(tracer->dev->device))
 		__field(u64, trace_timestamp)
 		__field(bool, lost)
 		__field(u8, event_id)
@@ -55,7 +55,8 @@
 	),
 
 	TP_fast_assign(
-		__assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+		__assign_str(dev_name,
+			     dev_name(tracer->dev->device));
 		__entry->trace_timestamp = trace_timestamp;
 		__entry->lost = lost;
 		__entry->event_id = event_id;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
new file mode 100644
index 0000000..d2228e3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c

@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "ecpf.h"
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
+{
+	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
+}
+
+static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
+
+	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+	MLX5_SET(enable_hca_in, in, function_id, 0);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
+
+	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+	MLX5_SET(disable_hca_in, in, function_id, 0);
+	MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_peer_pf_enable_hca(dev);
+	if (err)
+		mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
+			      err);
+
+	return err;
+}
+
+static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_peer_pf_disable_hca(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
+			      err);
+		return;
+	}
+
+	err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages);
+	if (err)
+		mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n",
+			       err);
+}
+
+int mlx5_ec_init(struct mlx5_core_dev *dev)
+{
+	int err = 0;
+
+	if (!mlx5_core_is_ecpf(dev))
+		return 0;
+
+	/* ECPF shall enable HCA for peer PF in the same way a PF
+	 * does this for its VFs.
+	 */
+	err = mlx5_peer_pf_init(dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return;
+
+	mlx5_peer_pf_cleanup(dev);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
new file mode 100644
index 0000000..d3d7a00
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h

@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_ECPF_H__
+#define __MLX5_ECPF_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+	MLX5_ECPU_BIT_NUM = 23,
+};
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+int mlx5_ec_init(struct mlx5_core_dev *dev);
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+
+#else  /* CONFIG_MLX5_ESWITCH */
+
+static inline bool
+mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ECPF_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 16ceeb1..f1a7bc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h

@@ -48,11 +48,13 @@
 #include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
+#include <linux/bits.h>
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
 #include "en/fs.h"
+#include "lib/hv_vhca.h"
 
 extern const struct net_device_ops mlx5e_netdev_ops;
 struct page_pool;
@@ -75,15 +77,14 @@
 #define MLX5_SKB_FRAG_SZ(len)	(SKB_DATA_ALIGN(len) +	\
 				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
+#define MLX5E_RX_MAX_HEAD (256)
+
 #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
 	(6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
 	max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
-#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
-#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
-#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
-	(cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
-	MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
+	MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ			18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -118,8 +119,6 @@
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
-#define MLX5E_RX_MAX_HEAD (256)
-
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
 #define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
@@ -139,6 +138,7 @@
 #define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE >> 1)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
+#define MLX5E_TX_XSK_POLL_BUDGET       64
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
 #define MLX5E_UMR_WQE_INLINE_SZ \
@@ -147,9 +147,6 @@
 	       MLX5_UMR_MTT_ALIGNMENT))
 #define MLX5E_UMR_WQEBBS \
 	(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
-
-#define MLX5E_NUM_MAIN_GROUPS 9
 
 #define MLX5E_MSG_LEVEL			NETIF_MSG_LINK
 
@@ -160,6 +157,19 @@
 			    ##__VA_ARGS__);                     \
 } while (0)
 
+enum mlx5e_rq_group {
+	MLX5E_RQ_GROUP_REGULAR,
+	MLX5E_RQ_GROUP_XSK,
+#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
+};
+
+static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
+{
+	if (mlx5_lag_is_lacp_owner(mdev))
+		return 1;
+
+	return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS);
+}
 
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
 {
@@ -173,18 +183,23 @@
 	}
 }
 
+/* Use this function to get max num channels (rxqs/txqs) only to create netdev */
 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
 	return is_kdump_kernel() ?
 		MLX5E_MIN_NUM_CHANNELS :
-		min_t(int, mdev->priv.eq_table.num_comp_vectors,
-		      MLX5E_MAX_NUM_CHANNELS);
+		min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
 }
 
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
-	struct mlx5_wqe_eth_seg  eth;
-	struct mlx5_wqe_data_seg data[0];
+	union {
+		struct {
+			struct mlx5_wqe_eth_seg  eth;
+			struct mlx5_wqe_data_seg data[0];
+		};
+		u8 tls_progress_params_ctx[0];
+	};
 };
 
 struct mlx5e_rx_wqe_ll {
@@ -200,34 +215,33 @@
 	struct mlx5_wqe_ctrl_seg       ctrl;
 	struct mlx5_wqe_umr_ctrl_seg   uctrl;
 	struct mlx5_mkey_seg           mkc;
-	struct mlx5_mtt                inline_mtts[0];
+	union {
+		struct mlx5_mtt        inline_mtts[0];
+		u8                     tls_static_params_ctx[0];
+	};
 };
 
 extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
 
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
-	"rx_cqe_moder",
-	"tx_cqe_moder",
-	"rx_cqe_compress",
-	"rx_striding_rq",
-};
-
 enum mlx5e_priv_flag {
-	MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
-	MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
-	MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
-	MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
+	MLX5E_PFLAG_RX_CQE_BASED_MODER,
+	MLX5E_PFLAG_TX_CQE_BASED_MODER,
+	MLX5E_PFLAG_RX_CQE_COMPRESS,
+	MLX5E_PFLAG_RX_STRIDING_RQ,
+	MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+	MLX5E_PFLAG_XDP_TX_MPWQE,
+	MLX5E_NUM_PFLAGS, /* Keep last */
 };
 
 #define MLX5E_SET_PFLAG(params, pflag, enable)			\
 	do {							\
 		if (enable)					\
-			(params)->pflags |= (pflag);		\
+			(params)->pflags |= BIT(pflag);		\
 		else						\
-			(params)->pflags &= ~(pflag);		\
+			(params)->pflags &= ~(BIT(pflag));	\
 	} while (0)
 
-#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
@@ -240,14 +254,11 @@
 	u16 num_channels;
 	u8  num_tc;
 	bool rx_cqe_compress_def;
-	struct net_dim_cq_moder rx_cq_moderation;
-	struct net_dim_cq_moder tx_cq_moderation;
+	bool tunneled_offload_en;
+	struct dim_cq_moder rx_cq_moderation;
+	struct dim_cq_moder tx_cq_moderation;
 	bool lro_en;
-	u32 lro_wqe_sz;
 	u8  tx_min_inline_mode;
-	u8  rss_hfunc;
-	u8  toeplitz_hash_key[40];
-	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
 	bool vlan_strip_disable;
 	bool scatter_fcs_en;
 	bool rx_dim_enabled;
@@ -255,6 +266,7 @@
 	u32 lro_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
+	struct mlx5e_xsk *xsk;
 	unsigned int sw_mtu;
 	int hard_mtu;
 };
@@ -297,7 +309,10 @@
 
 enum {
 	MLX5E_RQ_STATE_ENABLED,
+	MLX5E_RQ_STATE_RECOVERING,
 	MLX5E_RQ_STATE_AM,
+	MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
+	MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
 };
 
 struct mlx5e_cq {
@@ -310,16 +325,18 @@
 	struct mlx5_core_cq        mcq;
 	struct mlx5e_channel      *channel;
 
+	/* control */
+	struct mlx5_core_dev      *mdev;
+	struct mlx5_wq_ctrl        wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_cq_decomp {
 	/* cqe decompression */
 	struct mlx5_cqe64          title;
 	struct mlx5_mini_cqe8      mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
 	u8                         mini_arr_idx;
-	u16                        decmprs_left;
-	u16                        decmprs_wqe_counter;
-
-	/* control */
-	struct mlx5_core_dev      *mdev;
-	struct mlx5_wq_ctrl        wq_ctrl;
+	u16                        left;
+	u16                        wqe_counter;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_tx_wqe_info {
@@ -327,6 +344,9 @@
 	u32 num_bytes;
 	u8  num_wqebbs;
 	u8  num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+	struct page *resync_dump_frag_page;
+#endif
 };
 
 enum mlx5e_dma_map_type {
@@ -346,11 +366,18 @@
 	MLX5E_SQ_STATE_IPSEC,
 	MLX5E_SQ_STATE_AM,
 	MLX5E_SQ_STATE_TLS,
-	MLX5E_SQ_STATE_REDIRECT,
+	MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
 };
 
 struct mlx5e_sq_wqe_info {
 	u8  opcode;
+
+	/* Auxiliary data for different opcodes. */
+	union {
+		struct {
+			struct mlx5e_rq *rq;
+		} umr;
+	};
 };
 
 struct mlx5e_txqsq {
@@ -359,7 +386,7 @@
 	/* dirtied @completion */
 	u16                        cc;
 	u32                        dma_fifo_cc;
-	struct net_dim             dim; /* Adaptive Moderation */
+	struct dim                 dim; /* Adaptive Moderation */
 
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
@@ -378,53 +405,127 @@
 	void __iomem              *uar_map;
 	struct netdev_queue       *txq;
 	u32                        sqn;
+	u16                        stop_room;
 	u8                         min_inline_mode;
 	struct device             *pdev;
 	__be32                     mkey_be;
 	unsigned long              state;
+	unsigned int               hw_mtu;
 	struct hwtstamp_config    *tstamp;
 	struct mlx5_clock         *clock;
 
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
 	struct mlx5e_channel      *channel;
+	int                        ch_ix;
 	int                        txq_ix;
 	u32                        rate_limit;
-	struct mlx5e_txqsq_recover {
-		struct work_struct         recover_work;
-		u64                        last_recover;
-	} recover;
+	struct work_struct         recover_work;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_dma_info {
-	struct page     *page;
-	dma_addr_t      addr;
+	dma_addr_t addr;
+	union {
+		struct page *page;
+		struct {
+			u64 handle;
+			void *data;
+		} xsk;
+	};
+};
+
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+	/* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+	 * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+	 * returned.
+	 */
+	MLX5E_XDP_XMIT_MODE_FRAME,
+
+	/* The xdp_frame was created in place as a result of XDP_TX from a
+	 * regular RQ. No DMA remapping happened, and the page belongs to us.
+	 */
+	MLX5E_XDP_XMIT_MODE_PAGE,
+
+	/* No xdp_frame was created at all, the transmit happened from a UMEM
+	 * page. The UMEM Completion Ring producer pointer has to be increased.
+	 */
+	MLX5E_XDP_XMIT_MODE_XSK,
 };
 
 struct mlx5e_xdp_info {
-	struct xdp_frame      *xdpf;
-	dma_addr_t            dma_addr;
-	struct mlx5e_dma_info di;
+	enum mlx5e_xdp_xmit_mode mode;
+	union {
+		struct {
+			struct xdp_frame *xdpf;
+			dma_addr_t dma_addr;
+		} frame;
+		struct {
+			struct mlx5e_rq *rq;
+			struct mlx5e_dma_info di;
+		} page;
+	};
 };
 
+struct mlx5e_xdp_xmit_data {
+	dma_addr_t  dma_addr;
+	void       *data;
+	u32         len;
+};
+
+struct mlx5e_xdp_info_fifo {
+	struct mlx5e_xdp_info *xi;
+	u32 *cc;
+	u32 *pc;
+	u32 mask;
+};
+
+struct mlx5e_xdp_wqe_info {
+	u8 num_wqebbs;
+	u8 num_pkts;
+};
+
+struct mlx5e_xdp_mpwqe {
+	/* Current MPWQE session */
+	struct mlx5e_tx_wqe *wqe;
+	u8                   ds_count;
+	u8                   pkt_count;
+	u8                   inline_on;
+};
+
+struct mlx5e_xdpsq;
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
+					struct mlx5e_xdp_xmit_data *,
+					struct mlx5e_xdp_info *,
+					int);
+
 struct mlx5e_xdpsq {
 	/* data path */
 
 	/* dirtied @completion */
+	u32                        xdpi_fifo_cc;
 	u16                        cc;
-	bool                       redirect_flush;
 
 	/* dirtied @xmit */
-	u16                        pc ____cacheline_aligned_in_smp;
-	bool                       doorbell;
+	u32                        xdpi_fifo_pc ____cacheline_aligned_in_smp;
+	u16                        pc;
+	struct mlx5_wqe_ctrl_seg   *doorbell_cseg;
+	struct mlx5e_xdp_mpwqe     mpwqe;
 
 	struct mlx5e_cq            cq;
 
 	/* read only */
+	struct xdp_umem           *umem;
 	struct mlx5_wq_cyc         wq;
 	struct mlx5e_xdpsq_stats  *stats;
+	mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
+	mlx5e_fp_xmit_xdp_frame    xmit_xdp_frame;
 	struct {
-		struct mlx5e_xdp_info     *xdpi;
+		struct mlx5e_xdp_wqe_info *wqe_info;
+		struct mlx5e_xdp_info_fifo xdpi_fifo;
 	} db;
 	void __iomem              *uar_map;
 	u32                        sqn;
@@ -441,10 +542,10 @@
 
 struct mlx5e_icosq {
 	/* data path */
+	u16                        cc;
+	u16                        pc;
 
-	/* dirtied @xmit */
-	u16                        pc ____cacheline_aligned_in_smp;
-
+	struct mlx5_wqe_ctrl_seg  *doorbell_cseg;
 	struct mlx5e_cq            cq;
 
 	/* write@xmit, read@completion */
@@ -461,13 +562,9 @@
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
 	struct mlx5e_channel      *channel;
-} ____cacheline_aligned_in_smp;
 
-static inline bool
-mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
-{
-	return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
-}
+	struct work_struct         recover_work;
+} ____cacheline_aligned_in_smp;
 
 struct mlx5e_wqe_frag_info {
 	struct mlx5e_dma_info *di;
@@ -511,7 +608,8 @@
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
 
 enum mlx5e_rq_flag {
-	MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
+	MLX5E_RQ_FLAG_XDP_XMIT,
+	MLX5E_RQ_FLAG_XDP_REDIRECT,
 };
 
 struct mlx5e_rq_frag_info {
@@ -542,11 +640,15 @@
 			struct mlx5e_mpw_info *info;
 			mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
 			u16                    num_strides;
+			u16                    actual_wq_head;
 			u8                     log_stride_sz;
-			bool                   umr_in_progress;
+			u8                     umr_in_progress;
+			u8                     umr_last_bulk;
+			u8                     umr_completed;
 		} mpwqe;
 	};
 	struct {
+		u16            umem_headroom;
 		u16            headroom;
 		u8             map_dir;   /* dma map direction */
 	} buff;
@@ -556,6 +658,7 @@
 	struct net_device     *netdev;
 	struct mlx5e_rq_stats *stats;
 	struct mlx5e_cq        cq;
+	struct mlx5e_cq_decomp cqd;
 	struct mlx5e_page_cache page_cache;
 	struct hwtstamp_config *tstamp;
 	struct mlx5_clock      *clock;
@@ -568,14 +671,20 @@
 	int                    ix;
 	unsigned int           hw_mtu;
 
-	struct net_dim         dim; /* Dynamic Interrupt Moderation */
+	struct dim         dim; /* Dynamic Interrupt Moderation */
 
 	/* XDP */
 	struct bpf_prog       *xdp_prog;
-	struct mlx5e_xdpsq     xdpsq;
+	struct mlx5e_xdpsq    *xdpsq;
 	DECLARE_BITMAP(flags, 8);
 	struct page_pool      *page_pool;
 
+	/* AF_XDP zero-copy */
+	struct zero_copy_allocator zca;
+	struct xdp_umem       *umem;
+
+	struct work_struct     recover_work;
+
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	__be32                 mkey_be;
@@ -588,9 +697,15 @@
 	struct xdp_rxq_info    xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
+enum mlx5e_channel_state {
+	MLX5E_CHANNEL_STATE_XSK,
+	MLX5E_CHANNEL_NUM_STATES
+};
+
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq            rq;
+	struct mlx5e_xdpsq         rq_xdpsq;
 	struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_icosq         icosq;   /* internal control operations */
 	bool                       xdp;
@@ -599,10 +714,18 @@
 	struct net_device         *netdev;
 	__be32                     mkey_be;
 	u8                         num_tc;
+	u8                         lag_port;
 
 	/* XDP_REDIRECT */
 	struct mlx5e_xdpsq         xdpsq;
 
+	/* AF_XDP zero-copy */
+	struct mlx5e_rq            xskrq;
+	struct mlx5e_xdpsq         xsksq;
+	struct mlx5e_icosq         xskicosq;
+	/* xskicosq can be accessed from any CPU - the spinlock protects it. */
+	spinlock_t                 xskicosq_lock;
+
 	/* data path - accessed per napi poll */
 	struct irq_desc *irq_desc;
 	struct mlx5e_ch_stats     *stats;
@@ -611,8 +734,10 @@
 	struct mlx5e_priv         *priv;
 	struct mlx5_core_dev      *mdev;
 	struct hwtstamp_config    *tstamp;
+	DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
 	int                        ix;
 	int                        cpu;
+	cpumask_var_t              xps_cpumask;
 };
 
 struct mlx5e_channels {
@@ -625,14 +750,17 @@
 	struct mlx5e_ch_stats ch;
 	struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_rq_stats rq;
+	struct mlx5e_rq_stats xskrq;
 	struct mlx5e_xdpsq_stats rq_xdpsq;
 	struct mlx5e_xdpsq_stats xdpsq;
+	struct mlx5e_xdpsq_stats xsksq;
 } ____cacheline_aligned_in_smp;
 
 enum {
-	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
+	MLX5E_STATE_XDP_TX_ENABLED,
+	MLX5E_STATE_XDP_OPEN,
 };
 
 struct mlx5e_rqt {
@@ -651,6 +779,40 @@
 	MLX5E_NIC_PRIO
 };
 
+struct mlx5e_rss_params {
+	u32	indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+	u32	rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+	u8	toeplitz_hash_key[40];
+	u8	hfunc;
+};
+
+struct mlx5e_modify_sq_param {
+	int curr_state;
+	int next_state;
+	int rl_update;
+	int rl_index;
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+struct mlx5e_hv_vhca_stats_agent {
+	struct mlx5_hv_vhca_agent *agent;
+	struct delayed_work        work;
+	u16                        delay;
+	void                      *buf;
+};
+#endif
+
+struct mlx5e_xsk {
+	/* UMEMs are stored separately from channels, because we don't want to
+	 * lose them when channels are recreated. The kernel also stores UMEMs,
+	 * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
+	 * so rely on our mechanism.
+	 */
+	struct xdp_umem **umems;
+	u16 refcnt;
+	bool ever_used;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -666,11 +828,13 @@
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channels      channels;
-	u32                        tisn[MLX5E_MAX_NUM_TC];
+	u32                        tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
 	struct mlx5e_rqt           indir_rqt;
 	struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_tir           xsk_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_rss_params    rss_params;
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
 	struct mlx5e_flow_steering fs;
@@ -679,16 +843,21 @@
 	struct work_struct         update_carrier_work;
 	struct work_struct         set_rx_mode_work;
 	struct work_struct         tx_timeout_work;
-	struct delayed_work        update_stats_work;
+	struct work_struct         update_stats_work;
+	struct work_struct         monitor_counters_work;
+	struct mlx5_nb             monitor_counters_nb;
 
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+	u16                        max_nch;
 	u8                         max_opened_tc;
 	struct hwtstamp_config     tstamp;
 	u16                        q_counter;
 	u16                        drop_rq_q_counter;
+	struct notifier_block      events_nb;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	struct mlx5e_dcbx          dcbx;
 #endif
@@ -701,10 +870,16 @@
 #ifdef CONFIG_MLX5_EN_TLS
 	struct mlx5e_tls          *tls;
 #endif
+	struct devlink_health_reporter *tx_reporter;
+	struct devlink_health_reporter *rx_reporter;
+	struct mlx5e_xsk           xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+	struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
 };
 
 struct mlx5e_profile {
-	void	(*init)(struct mlx5_core_dev *mdev,
+	int	(*init)(struct mlx5_core_dev *mdev,
 			struct net_device *netdev,
 			const struct mlx5e_profile *profile, void *ppriv);
 	void	(*cleanup)(struct mlx5e_priv *priv);
@@ -714,42 +889,65 @@
 	void	(*cleanup_tx)(struct mlx5e_priv *priv);
 	void	(*enable)(struct mlx5e_priv *priv);
 	void	(*disable)(struct mlx5e_priv *priv);
+	int	(*update_rx)(struct mlx5e_priv *priv);
 	void	(*update_stats)(struct mlx5e_priv *priv);
 	void	(*update_carrier)(struct mlx5e_priv *priv);
-	int	(*max_nch)(struct mlx5_core_dev *mdev);
 	struct {
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
 	} rx_handlers;
 	int	max_tc;
+	u8	rq_groups;
 };
 
 void mlx5e_build_ptys2ethtool_map(void);
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev,
-		       select_queue_fallback_t fallback);
+		       struct net_device *sb_dev);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5e_tx_wqe *wqe, u16 pi);
+			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
 
-void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
 void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 
+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+	switch (rq->wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+	default:
+		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+	}
+}
+
+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+	switch (rq->wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return rq->mpwqe.wq.cur_sz;
+	default:
+		return rq->wqe.wq.cur_sz;
+	}
+}
+
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params);
 
 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
-			bool recycle);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+				struct mlx5e_dma_info *dma_info,
+				bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
 bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
@@ -767,6 +965,8 @@
 			     struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
 
 void mlx5e_update_stats(struct mlx5e_priv *priv);
+void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
 
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 int mlx5e_self_test_num(struct mlx5e_priv *priv);
@@ -797,9 +997,35 @@
 
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
 		       struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
-				    enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+				    const struct mlx5e_tirc_config *ttconfig,
 				    void *tirc, bool inner);
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
+
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+		  struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+		  struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
@@ -812,9 +1038,10 @@
  * switching channels
  */
 typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
-void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
-				struct mlx5e_channels *new_chs,
-				mlx5e_fp_hw_modify hw_modify);
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
+int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
+			       struct mlx5e_channels *new_chs,
+			       mlx5e_fp_hw_modify hw_modify);
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 
@@ -827,66 +1054,22 @@
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params);
+int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
+void mlx5e_activate_rq(struct mlx5e_rq *rq);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
 
-static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+		    struct mlx5e_modify_sq_param *p);
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_tx_disable_queue(struct netdev_queue *txq);
+
+static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 {
-	return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
-		MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
-				      struct mlx5e_tx_wqe **wqe,
-				      u16 *pi)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
-	memset(*wqe, 0, sizeof(**wqe));
-}
-
-static inline
-struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
-{
-	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
-	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
-
-	memset(cseg, 0, sizeof(*cseg));
-
-	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
-	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
-
-	(*pc)++;
-
-	return wqe;
-}
-
-static inline
-void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
-		     void __iomem *uar_map,
-		     struct mlx5_wqe_ctrl_seg *ctrl)
-{
-	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
-	/* ensure wqe is visible to device before updating doorbell record */
-	dma_wmb();
-
-	*wq->db = cpu_to_be32(pc);
-
-	/* ensure doorbell record is visible to device before ringing the
-	 * doorbell
-	 */
-	wmb();
-
-	mlx5_write64((__be32 *)ctrl, uar_map, NULL);
-}
-
-static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
-{
-	struct mlx5_core_cq *mcq;
-
-	mcq = &cq->mcq;
-	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+	return MLX5_CAP_ETH(mdev, swp) &&
+		MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
 }
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
@@ -907,30 +1090,39 @@
 int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
 
 /* common netdev helpers */
+void mlx5e_create_q_counters(struct mlx5e_priv *priv);
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+		       struct mlx5e_rq *drop_rq);
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
+
 int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
 
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
-		     u32 underlay_qpn, u32 *tisn);
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
 
 int mlx5e_create_tises(struct mlx5e_priv *priv);
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+void mlx5e_destroy_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
+void mlx5e_update_carrier(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
-void mlx5e_update_stats_work(struct work_struct *work);
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
 
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
 int mlx5e_bits_invert(unsigned long a, int size);
 
 typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		     change_hw_mtu_cb set_mtu_cb);
 
@@ -954,22 +1146,57 @@
 			       struct ethtool_coalesce *coal);
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+				     struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+				     const struct ethtool_link_ksettings *link_ksettings);
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 			      struct ethtool_ts_info *info);
 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
 			       struct ethtool_flash *flash);
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+				  struct ethtool_pauseparam *pauseparam);
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+				 struct ethtool_pauseparam *pauseparam);
 
 /* mlx5e generic netdev management API */
+int mlx5e_netdev_init(struct net_device *netdev,
+		      struct mlx5e_priv *priv,
+		      struct mlx5_core_dev *mdev,
+		      const struct mlx5e_profile *profile,
+		      void *ppriv);
+void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv);
 struct net_device*
 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
-		    void *ppriv);
+		    int nch, void *ppriv);
 int mlx5e_attach_netdev(struct mlx5e_priv *priv);
 void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+			    struct mlx5e_xsk *xsk,
+			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
 			    u16 max_channels, u16 mtu);
-u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+			   struct mlx5e_params *params);
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+			    u16 num_channels);
 void mlx5e_rx_dim_work(struct work_struct *work);
 void mlx5e_tx_dim_work(struct work_struct *work);
+
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+				       struct net_device *netdev,
+				       netdev_features_t features);
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
+int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
+int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
+#endif
 #endif /* __MLX5_EN_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 1431232..68d5930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h

@@ -10,11 +10,14 @@
 };
 
 struct mlx5e_tc_table {
+	/* protects flow table */
+	struct mutex			t_lock;
 	struct mlx5_flow_table		*t;
 
 	struct rhashtable               ht;
 
-	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	struct mod_hdr_tbl mod_hdr;
+	struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
 
 	struct notifier_block     netdevice_nb;
@@ -73,12 +76,34 @@
 	MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
 };
 
+struct mlx5e_tirc_config {
+	u8 l3_prot_type;
+	u8 l4_prot_type;
+	u32 rx_hash_fields;
+};
+
+#define MLX5_HASH_IP		(MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP   |\
+				 MLX5_HASH_FIELD_SEL_L4_SPORT |\
+				 MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP   |\
+				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
 enum mlx5e_tunnel_types {
 	MLX5E_TT_IPV4_GRE,
 	MLX5E_TT_IPV6_GRE,
+	MLX5E_TT_IPV4_IPIP,
+	MLX5E_TT_IPV6_IPIP,
+	MLX5E_TT_IPV4_IPV6,
+	MLX5E_TT_IPV6_IPV6,
 	MLX5E_NUM_TUNNEL_TT,
 };
 
+bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+
 /* L3/L4 traffic type classifier */
 struct mlx5e_ttc_table {
 	struct mlx5e_flow_table  ft;
@@ -116,12 +141,17 @@
 
 void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
 void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
-int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
-int mlx5e_get_rxnfc(struct net_device *dev,
-		    struct ethtool_rxnfc *info, u32 *rule_locs);
+int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
+			    struct ethtool_rxnfc *info, u32 *rule_locs);
 #else
 static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)    { }
 static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
+					  struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
 #endif /* CONFIG_MLX5_EN_RXNFC */
 
 #ifdef CONFIG_MLX5_EN_ARFS
@@ -208,5 +238,8 @@
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 
+bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type);
+bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev);
+
 #endif /* __MLX5E_FLOW_STEER_H__ */
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
new file mode 100644
index 0000000..1d6b588
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c

@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "lib/eq.h"
+
+int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = cq->channel->priv;
+	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
+	u8 hw_status;
+	void *cqc;
+	int err;
+
+	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
+	if (err)
+		return err;
+
+	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+	hw_status = MLX5_GET(cqc, cqc, status);
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
+{
+	u8 cq_log_stride;
+	u32 cq_sz;
+	int err;
+
+	cq_sz = mlx5_cqwq_get_size(&cq->wq);
+	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
+{
+	int err;
+
+	err = mlx5e_reporter_tx_create(priv);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_rx_create(priv);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
+{
+	mlx5e_reporter_rx_destroy(priv);
+	mlx5e_reporter_tx_destroy(priv);
+}
+
+void mlx5e_health_channels_update(struct mlx5e_priv *priv)
+{
+	if (priv->tx_reporter)
+		devlink_health_reporter_state_update(priv->tx_reporter,
+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+	if (priv->rx_reporter)
+		devlink_health_reporter_state_update(priv->rx_reporter,
+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+}
+
+int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
+{
+	struct mlx5_core_dev *mdev = channel->mdev;
+	struct net_device *dev = channel->netdev;
+	struct mlx5e_modify_sq_param msp = {};
+	int err;
+
+	msp.curr_state = MLX5_SQC_STATE_ERR;
+	msp.next_state = MLX5_SQC_STATE_RST;
+
+	err = mlx5e_modify_sq(mdev, sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
+		return err;
+	}
+
+	memset(&msp, 0, sizeof(msp));
+	msp.curr_state = MLX5_SQC_STATE_RST;
+	msp.next_state = MLX5_SQC_STATE_RDY;
+
+	err = mlx5e_modify_sq(mdev, sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
+		return err;
+	}
+
+	return 0;
+}
+
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
+{
+	int err = 0;
+
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto out;
+
+	err = mlx5e_safe_reopen_channels(priv);
+
+out:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+
+	return err;
+}
+
+int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
+{
+	u32 eqe_count;
+
+	netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+	eqe_count = mlx5_eq_poll_irq_disabled(eq);
+	if (!eqe_count)
+		return -EIO;
+
+	netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
+		   eqe_count, eq->core.eqn);
+
+	channel->stats->eq_rearm++;
+	return 0;
+}
+
+int mlx5e_health_report(struct mlx5e_priv *priv,
+			struct devlink_health_reporter *reporter, char *err_str,
+			struct mlx5e_err_ctx *err_ctx)
+{
+	if (!reporter) {
+		netdev_err(priv->netdev, err_str);
+		return err_ctx->recover(&err_ctx->ctx);
+	}
+	return devlink_health_report(reporter, err_str, err_ctx);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
new file mode 100644
index 0000000..d3693fa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h

@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_EN_HEALTH_H
+#define __MLX5E_EN_HEALTH_H
+
+#include "en.h"
+
+#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+
+static inline bool cqe_syndrome_needs_recover(u8 syndrome)
+{
+	return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+	       syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+}
+
+int mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);
+
+int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
+int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
+int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
+
+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+
+#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+
+struct mlx5e_err_ctx {
+	int (*recover)(void *ctx);
+	void *ctx;
+};
+
+int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn);
+int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel);
+int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
+int mlx5e_health_report(struct mlx5e_priv *priv,
+			struct devlink_health_reporter *reporter, char *err_str,
+			struct mlx5e_err_ctx *err_ctx);
+int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
+void mlx5e_health_channels_update(struct mlx5e_priv *priv);
+
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 0000000..ac44bbe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c

@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+			      struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+	struct mlx5e_channel_stats *stats;
+	int tc;
+
+	stats = &priv->channel_stats[ch];
+	data->rx_packets = stats->rq.packets;
+	data->rx_bytes   = stats->rq.bytes;
+
+	for (tc = 0; tc < priv->max_opened_tc; tc++) {
+		data->tx_packets += stats->sq[tc].packets;
+		data->tx_bytes   += stats->sq[tc].bytes;
+	}
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data,
+				     int buf_len)
+{
+	int ch, i = 0;
+
+	for (ch = 0; ch < priv->max_nch; ch++) {
+		void *buf = data + i;
+
+		if (WARN_ON_ONCE(buf +
+				 sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+				 data + buf_len))
+			return;
+
+		mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf);
+		i += sizeof(struct mlx5e_hv_vhca_per_ring_stats);
+	}
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+	return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+		priv->max_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+	struct mlx5e_hv_vhca_stats_agent *sagent;
+	struct mlx5_hv_vhca_agent *agent;
+	struct delayed_work *dwork;
+	struct mlx5e_priv *priv;
+	int buf_len, rc;
+	void *buf;
+
+	dwork = to_delayed_work(work);
+	sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+	priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+	buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+	agent = sagent->agent;
+	buf = sagent->buf;
+
+	memset(buf, 0, buf_len);
+	mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+	rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+	if (rc) {
+		mlx5_core_err(priv->mdev,
+			      "%s: Failed to write stats, err = %d\n",
+			      __func__, rc);
+		return;
+	}
+
+	if (sagent->delay)
+		queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+	MLX5_HV_VHCA_STATS_VERSION     = 1,
+	MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+					struct mlx5_hv_vhca_control_block *block)
+{
+	struct mlx5e_hv_vhca_stats_agent *sagent;
+	struct mlx5e_priv *priv;
+
+	priv = mlx5_hv_vhca_agent_priv(agent);
+	sagent = &priv->stats_agent;
+
+	block->version = MLX5_HV_VHCA_STATS_VERSION;
+	block->rings   = priv->max_nch;
+
+	if (!block->command) {
+		cancel_delayed_work_sync(&priv->stats_agent.work);
+		return;
+	}
+
+	sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+			msecs_to_jiffies(block->command * 100);
+
+	queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+	cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+	int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+	struct mlx5_hv_vhca_agent *agent;
+
+	priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+	if (!priv->stats_agent.buf)
+		return -ENOMEM;
+
+	agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+					  MLX5_HV_VHCA_AGENT_STATS,
+					  mlx5e_hv_vhca_stats_control, NULL,
+					  mlx5e_hv_vhca_stats_cleanup,
+					  priv);
+
+	if (IS_ERR_OR_NULL(agent)) {
+		if (IS_ERR(agent))
+			netdev_warn(priv->netdev,
+				    "Failed to create hv vhca stats agent, err = %ld\n",
+				    PTR_ERR(agent));
+
+		kvfree(priv->stats_agent.buf);
+		return IS_ERR_OR_NULL(agent);
+	}
+
+	priv->stats_agent.agent = agent;
+	INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
+	return 0;
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+	if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+		return;
+
+	mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+	kvfree(priv->stats_agent.buf);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 0000000..664463f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h

@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+
+static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 0000000..7cd5b02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c

@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors      Type: 0x0, Counter:  0x0, group_id = N/A
+ * a_out_of_range_length_field   Type: 0x0, Counter:  0x1, group_id = N/A
+ * a_frame_too_long_errors       Type: 0x0, Counter:  0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter:  0x3, group_id = N/A
+ * a_alignment_errors            Type: 0x0, Counter:  0x4, group_id = N/A
+ * if_out_discards               Type: 0x0, Counter:  0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer   Type: 0x1, Counter:  0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1    MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+		return false;
+	if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+	    MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+	    NUM_REQ_PPCNT_COUNTER_S1)
+		return false;
+	if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+	    NUM_REQ_Q_COUNTERS_S1)
+		return false;
+	return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+	u32  in[MLX5_ST_SZ_DW(arm_monitor_counter_in)]  = {};
+	u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+
+	MLX5_SET(arm_monitor_counter_in, in, opcode,
+		 MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       monitor_counters_work);
+
+	mutex_lock(&priv->state_lock);
+	mlx5e_update_ndo_stats(priv);
+	mutex_unlock(&priv->state_lock);
+	mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+				       unsigned long event, void *eqe)
+{
+	struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+					      monitor_counters_nb);
+	queue_work(priv->wq, &priv->monitor_counters_work);
+	return NOTIFY_OK;
+}
+
+static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
+{
+	MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+		     MONITOR_COUNTER);
+	mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+}
+
+static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
+{
+	mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+	cancel_work_sync(&priv->monitor_counters_work);
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+	enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+	for (ppcnt_cnt = 0;
+	     ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+	     ppcnt_cnt++, cnt++) {
+		MLX5_SET(set_monitor_counter_in, in,
+			 monitor_counter[cnt].type,
+			 MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+		MLX5_SET(set_monitor_counter_in, in,
+			 monitor_counter[cnt].counter,
+			 ppcnt_cnt);
+	}
+	return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+	MLX5_SET(set_monitor_counter_in, in,
+		 monitor_counter[cnt].type,
+		 MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+	MLX5_SET(set_monitor_counter_in, in,
+		 monitor_counter[cnt].counter,
+		 MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+	MLX5_SET(set_monitor_counter_in, in,
+		 monitor_counter[cnt].counter_group_id,
+		 q_counter);
+	return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+	int num_q_counters      = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+	int num_ppcnt_counters  = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+				  MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+	u32  in[MLX5_ST_SZ_DW(set_monitor_counter_in)]  = {};
+	u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+	int q_counter = priv->q_counter;
+	int cnt	= 0;
+
+	if (num_ppcnt_counters  >=  NUM_REQ_PPCNT_COUNTER_S1 &&
+	    max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+		cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+	if (num_q_counters      >=  NUM_REQ_Q_COUNTERS_S1 &&
+	    max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+	    q_counter)
+		cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+	MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+	MLX5_SET(set_monitor_counter_in, in, opcode,
+		 MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+	INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+	mlx5e_monitor_counter_start(priv);
+	mlx5e_set_monitor_counter(priv);
+	mlx5e_monitor_counter_arm(priv);
+	queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+{
+	u32  in[MLX5_ST_SZ_DW(set_monitor_counter_in)]  = {};
+	u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+
+	MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
+	MLX5_SET(set_monitor_counter_in, in, opcode,
+		 MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+	mlx5e_monitor_counter_disable(priv);
+	mlx5e_monitor_counter_stop(priv);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 0000000..e1ac4b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h

@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int  mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
new file mode 100644
index 0000000..eb2e1f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c

@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "en/params.h"
+
+static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
+{
+	return params->xdp_prog || xsk;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+				 struct mlx5e_xsk_param *xsk)
+{
+	u16 headroom = NET_IP_ALIGN;
+
+	if (mlx5e_rx_is_xdp(params, xsk)) {
+		headroom += XDP_PACKET_HEADROOM;
+		if (xsk)
+			headroom += xsk->headroom;
+	} else {
+		headroom += MLX5_RX_HEADROOM;
+	}
+
+	return headroom;
+}
+
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk)
+{
+	u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+	return linear_rq_headroom + hw_mtu;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk)
+{
+	u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
+
+	/* AF_XDP doesn't build SKBs in place. */
+	if (!xsk)
+		frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
+
+	/* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
+	 * special case. It can run with frames smaller than a page, as it
+	 * doesn't allocate pages dynamically. However, here we pretend that
+	 * fragments are page-sized: it allows to treat XSK frames like pages
+	 * by redirecting alloc and free operations to XSK rings and by using
+	 * the fact there are no multiple packets per "page" (which is a frame).
+	 * The latter is important, because frames may come in a random order,
+	 * and we will have trouble assemblying a real page of multiple frames.
+	 */
+	if (mlx5e_rx_is_xdp(params, xsk))
+		frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
+
+	/* Even if we can go with a smaller fragment size, we must not put
+	 * multiple packets into a single frame.
+	 */
+	if (xsk)
+		frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
+
+	return frag_sz;
+}
+
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk)
+{
+	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
+
+	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+}
+
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+			    struct mlx5e_xsk_param *xsk)
+{
+	/* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+	 * than one page. For this, check both with and without xsk.
+	 */
+	u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+				 mlx5e_rx_get_linear_frag_sz(params, NULL));
+
+	return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
+}
+
+#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
+					  MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+				  struct mlx5e_params *params,
+				  struct mlx5e_xsk_param *xsk)
+{
+	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
+	s8 signed_log_num_strides_param;
+	u8 log_num_strides;
+
+	if (!mlx5e_rx_is_linear_skb(params, xsk))
+		return false;
+
+	if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+		return false;
+
+	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+		return true;
+
+	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+	signed_log_num_strides_param =
+		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+
+	return signed_log_num_strides_param >= 0;
+}
+
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+			       struct mlx5e_xsk_param *xsk)
+{
+	u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
+
+	/* Numbers are unsigned, don't subtract to avoid underflow. */
+	if (params->log_rq_mtu_frames <
+	    log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+	return params->log_rq_mtu_frames - log_pkts_per_wqe;
+}
+
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
+{
+	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+		return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
+
+	return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+}
+
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk)
+{
+	return MLX5_MPWRQ_LOG_WQE_SZ -
+		mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+}
+
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk)
+{
+	bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+		mlx5e_rx_is_linear_skb(params, xsk) :
+		mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+
+	return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
new file mode 100644
index 0000000..989d8f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h

@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_PARAMS_H__
+#define __MLX5_EN_PARAMS_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param {
+	u16 headroom;
+	u16 chunk_size;
+};
+
+struct mlx5e_rq_param {
+	u32                        rqc[MLX5_ST_SZ_DW(rqc)];
+	struct mlx5_wq_param       wq;
+	struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
+	struct mlx5_wq_param       wq;
+	bool                       is_mpw;
+};
+
+struct mlx5e_cq_param {
+	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
+	struct mlx5_wq_param       wq;
+	u16                        eq_ix;
+	u8                         cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+	struct mlx5e_rq_param      rq;
+	struct mlx5e_sq_param      sq;
+	struct mlx5e_sq_param      xdp_sq;
+	struct mlx5e_sq_param      icosq;
+	struct mlx5e_cq_param      rx_cq;
+	struct mlx5e_cq_param      tx_cq;
+	struct mlx5e_cq_param      icosq_cq;
+};
+
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+						u16 qid,
+						enum mlx5e_rq_group group,
+						u16 *ix)
+{
+	int nch = params->num_channels;
+	int ch = qid - nch * group;
+
+	if (ch < 0 || ch >= nch)
+		return false;
+
+	*ix = ch;
+	return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+					      u16 qid,
+					      u16 *ix,
+					      enum mlx5e_rq_group *group)
+{
+	u16 nch = params->num_channels;
+
+	*ix = qid % nch;
+	*group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
+				      struct mlx5e_params *params, u64 qid)
+{
+	return qid < params->num_channels * profile->rq_groups;
+}
+
+/* Parameter calculations */
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+				 struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+				struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+			    struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+				  struct mlx5e_params *params,
+				  struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+			       struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk);
+u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+				 struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk,
+			     struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+			      u8 log_wq_size,
+			      struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+			     u8 log_wq_size,
+			     struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_sq_param *param);
+
+#endif /* __MLX5_EN_PARAMS_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 12e1682..f777994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c

@@ -63,66 +63,174 @@
 	[MLX5E_50GBASE_KR2]       = 50000,
 };
 
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+	[MLX5E_SGMII_100M]			= 100,
+	[MLX5E_1000BASE_X_SGMII]		= 1000,
+	[MLX5E_5GBASE_R]			= 5000,
+	[MLX5E_10GBASE_XFI_XAUI_1]		= 10000,
+	[MLX5E_40GBASE_XLAUI_4_XLPPI_4]		= 40000,
+	[MLX5E_25GAUI_1_25GBASE_CR_KR]		= 25000,
+	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2]	= 50000,
+	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR]	= 50000,
+	[MLX5E_CAUI_4_100GBASE_CR4_KR4]		= 100000,
+	[MLX5E_200GAUI_4_200GBASE_CR4_KR4]	= 200000,
+	[MLX5E_400GAUI_8]			= 400000,
+};
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+				     const u32 **arr, u32 *size,
+				     bool force_legacy)
+{
+	bool ext = force_legacy ? false : MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+	*size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+		      ARRAY_SIZE(mlx5e_link_speed);
+	*arr  = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+			      struct mlx5e_port_eth_proto *eproto)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	int err;
+
+	if (!eproto)
+		return -EINVAL;
+
+	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+	if (err)
+		return err;
+
+	eproto->cap   = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+					   eth_proto_capability);
+	eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+	eproto->oper  = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+	return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+				 u8 *an_disable_cap, u8 *an_disable_admin)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+	*an_status = 0;
+	*an_disable_cap = 0;
+	*an_disable_admin = 0;
+
+	if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+		return;
+
+	*an_status = MLX5_GET(ptys_reg, out, an_status);
+	*an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+	*an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+			   u32 proto_admin, bool ext)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+	u8 an_disable_admin;
+	u8 an_disable_cap;
+	u8 an_status;
+
+	mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+				    &an_disable_admin);
+	if (!an_disable_cap && an_disable)
+		return -EPERM;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(ptys_reg, in, local_port, 1);
+	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+	MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+	if (ext)
+		MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+	else
+		MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+			    sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+			  bool force_legacy)
 {
 	unsigned long temp = eth_proto_oper;
+	const u32 *table;
 	u32 speed = 0;
+	u32 max_size;
 	int i;
 
-	i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
-	if (i < MLX5E_LINK_MODES_NUMBER)
-		speed = mlx5e_link_speed[i];
-
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+	i = find_first_bit(&temp, max_size);
+	if (i < max_size)
+		speed = table[i];
 	return speed;
 }
 
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
-	u32 eth_proto_oper;
+	struct mlx5e_port_eth_proto eproto;
+	bool force_legacy = false;
+	bool ext;
 	int err;
 
-	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err)
-		return err;
-
-	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	*speed = mlx5e_port_ptys2speed(eth_proto_oper);
+		goto out;
+	if (ext && !eproto.admin) {
+		force_legacy = true;
+		err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
+		if (err)
+			goto out;
+	}
+	*speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
 	if (!(*speed))
 		err = -EINVAL;
 
+out:
 	return err;
 }
 
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
+	struct mlx5e_port_eth_proto eproto;
 	u32 max_speed = 0;
-	u32 proto_cap;
+	const u32 *table;
+	u32 max_size;
+	bool ext;
 	int err;
 	int i;
 
-	err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err)
 		return err;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
-		if (proto_cap & MLX5E_PROT_MASK(i))
-			max_speed = max(max_speed, mlx5e_link_speed[i]);
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
+	for (i = 0; i < max_size; ++i)
+		if (eproto.cap & MLX5E_PROT_MASK(i))
+			max_speed = max(max_speed, table[i]);
 
 	*speed = max_speed;
 	return 0;
 }
 
-u32 mlx5e_port_speed2linkmodes(u32 speed)
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+			       bool force_legacy)
 {
 	u32 link_modes = 0;
+	const u32 *table;
+	u32 max_size;
 	int i;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (mlx5e_link_speed[i] == speed)
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+	for (i = 0; i < max_size; ++i) {
+		if (table[i] == speed)
 			link_modes |= MLX5E_PROT_MASK(i);
 	}
-
 	return link_modes;
 }
 
@@ -233,3 +341,207 @@
 	kfree(out);
 	return err;
 }
+
+static u32 fec_supported_speeds[] = {
+	10000,
+	40000,
+	25000,
+	50000,
+	56000,
+	100000
+};
+
+#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm,
+				 u8 *fec_policy,
+				 bool write,
+				 u32 speed)
+{
+	switch (speed) {
+	case 10000:
+	case 40000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_10g_40g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_10g_40g, *fec_policy);
+		break;
+	case 25000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_25g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_25g, *fec_policy);
+		break;
+	case 50000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_50g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_50g, *fec_policy);
+		break;
+	case 56000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_56g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_56g, *fec_policy);
+		break;
+	case 100000:
+		if (!write)
+			*fec_policy = MLX5_GET(pplm_reg, pplm,
+					       fec_override_admin_100g);
+		else
+			MLX5_SET(pplm_reg, pplm,
+				 fec_override_admin_100g, *fec_policy);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm,
+				   u8 *fec_cap,
+				   u32 speed)
+{
+	switch (speed) {
+	case 10000:
+	case 40000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_10g_40g);
+		break;
+	case 25000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_25g);
+		break;
+	case 50000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_50g);
+		break;
+	case 56000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_56g);
+		break;
+	case 100000:
+		*fec_cap = MLX5_GET(pplm_reg, pplm,
+				    fec_override_cap_100g);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps)
+{
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u32 current_fec_speed;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err =  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+	if (err)
+		return err;
+
+	return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed);
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+		       u8 *fec_configured_mode)
+{
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u32 link_speed;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err =  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	*fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+	if (!fec_configured_mode)
+		return 0;
+
+	err = mlx5e_port_linkspeed(dev, &link_speed);
+	if (err)
+		return err;
+
+	return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed);
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
+{
+	u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC);
+	bool fec_mode_not_supp_in_speed = false;
+	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+	u8 fec_policy_auto = 0;
+	u8 fec_caps = 0;
+	int err;
+	int i;
+
+	if (!MLX5_CAP_GEN(dev, pcam_reg))
+		return -EOPNOTSUPP;
+
+	if (!MLX5_CAP_PCAM_REG(dev, pplm))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(pplm_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+	if (err)
+		return err;
+
+	MLX5_SET(pplm_reg, out, local_port, 1);
+
+	for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) {
+		mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
+		/* policy supported for link speed, or policy is auto */
+		if (fec_caps & fec_policy || fec_policy == fec_policy_auto) {
+			mlx5e_fec_admin_field(out, &fec_policy, 1,
+					      fec_supported_speeds[i]);
+		} else {
+			/* turn off FEC if supported. Else, leave it the same */
+			if (fec_caps & fec_policy_nofec)
+				mlx5e_fec_admin_field(out, &fec_policy_nofec, 1,
+						      fec_supported_speeds[i]);
+			fec_mode_not_supp_in_speed = true;
+		}
+	}
+
+	if (fec_mode_not_supp_in_speed)
+		mlx5_core_dbg(dev,
+			      "FEC policy 0x%x is not supported for some speeds",
+			      fec_policy);
+
+	return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index f8cbd81..4a7f449 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h

@@ -36,13 +36,39 @@
 #include <linux/mlx5/driver.h>
 #include "en.h"
 
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+struct mlx5e_port_eth_proto {
+	u32 cap;
+	u32 admin;
+	u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+			      struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+				 u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+			   u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+			  bool force_legacy);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(u32 speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+			       bool force_legacy);
 
 int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
 int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
 int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
 int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+		       u8 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy);
+
+enum {
+	MLX5E_FEC_NOFEC,
+	MLX5E_FEC_FIRECODE,
+	MLX5E_FEC_RS_528_514,
+};
+
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index eac245a..633b117 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c

@@ -122,7 +122,9 @@
 	return err;
 }
 
-/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
 static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
 {
 	u32 speed;
@@ -130,10 +132,9 @@
 	int err;
 
 	err = mlx5e_port_linkspeed(priv->mdev, &speed);
-	if (err) {
-		mlx5_core_warn(priv->mdev, "cannot get port speed\n");
-		return 0;
-	}
+	if (err)
+		speed = SPEED_40000;
+	speed = max_t(u32, speed, SPEED_40000);
 
 	xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
 
@@ -142,7 +143,7 @@
 }
 
 static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
-				 u32 xoff, unsigned int mtu)
+				 u32 xoff, unsigned int max_mtu)
 {
 	int i;
 
@@ -154,36 +155,37 @@
 		}
 
 		if (port_buffer->buffer[i].size <
-		    (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+		    (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
 			return -ENOMEM;
 
 		port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
-		port_buffer->buffer[i].xon  = port_buffer->buffer[i].xoff - mtu;
+		port_buffer->buffer[i].xon  =
+			port_buffer->buffer[i].xoff - max_mtu;
 	}
 
 	return 0;
 }
 
 /**
- * update_buffer_lossy()
- *   mtu: device's MTU
- *   pfc_en: <input> current pfc configuration
- *   buffer: <input> current prio to buffer mapping
- *   xoff:   <input> xoff value
- *   port_buffer: <output> port receive buffer configuration
- *   change: <output>
+ *	update_buffer_lossy	- Update buffer configuration based on pfc
+ *	@max_mtu: netdev's max_mtu
+ *	@pfc_en: <input> current pfc configuration
+ *	@buffer: <input> current prio to buffer mapping
+ *	@xoff:   <input> xoff value
+ *	@port_buffer: <output> port receive buffer configuration
+ *	@change: <output>
  *
- *   Update buffer configuration based on pfc configuraiton and priority
- *   to buffer mapping.
- *   Buffer's lossy bit is changed to:
- *     lossless if there is at least one PFC enabled priority mapped to this buffer
- *     lossy if all priorities mapped to this buffer are PFC disabled
+ *	Update buffer configuration based on pfc configuraiton and
+ *	priority to buffer mapping.
+ *	Buffer's lossy bit is changed to:
+ *		lossless if there is at least one PFC enabled priority
+ *		mapped to this buffer lossy if all priorities mapped to
+ *		this buffer are PFC disabled
  *
- *   Return:
- *     Return 0 if no error.
- *     Set change to true if buffer configuration is modified.
+ *	@return: 0 if no error,
+ *	sets change to true if buffer configuration was modified.
  */
-static int update_buffer_lossy(unsigned int mtu,
+static int update_buffer_lossy(unsigned int max_mtu,
 			       u8 pfc_en, u8 *buffer, u32 xoff,
 			       struct mlx5e_port_buffer *port_buffer,
 			       bool *change)
@@ -220,7 +222,7 @@
 	}
 
 	if (changed) {
-		err = update_xoff_threshold(port_buffer, xoff, mtu);
+		err = update_xoff_threshold(port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 
@@ -230,6 +232,7 @@
 	return 0;
 }
 
+#define MINIMUM_MAX_MTU 9216
 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 				    u32 change, unsigned int mtu,
 				    struct ieee_pfc *pfc,
@@ -241,12 +244,14 @@
 	bool update_prio2buffer = false;
 	u8 buffer[MLX5E_MAX_PRIORITY];
 	bool update_buffer = false;
+	unsigned int max_mtu;
 	u32 total_used = 0;
 	u8 curr_pfc_en;
 	int err;
 	int i;
 
 	mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+	max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
 
 	err = mlx5e_port_query_buffer(priv, &port_buffer);
 	if (err)
@@ -254,7 +259,7 @@
 
 	if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}
@@ -264,7 +269,7 @@
 		if (err)
 			return err;
 
-		err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+		err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
 					  &port_buffer, &update_buffer);
 		if (err)
 			return err;
@@ -276,8 +281,8 @@
 		if (err)
 			return err;
 
-		err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
-					  &port_buffer, &update_buffer);
+		err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+					  xoff, &port_buffer, &update_buffer);
 		if (err)
 			return err;
 	}
@@ -301,7 +306,7 @@
 			return -EINVAL;
 
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}
@@ -309,7 +314,7 @@
 	/* Need to update buffer configuration if xoff value is changed */
 	if (!update_buffer && xoff != priv->dcbx.xoff) {
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
new file mode 100644
index 0000000..b860569
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "health.h"
+#include "params.h"
+
+static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+	void *out;
+	void *rqc;
+	int err;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_rq(dev, rqn, out);
+	if (err)
+		goto out;
+
+	rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+	*state = MLX5_GET(rqc, rqc, state);
+
+out:
+	kvfree(out);
+	return err;
+}
+
+static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
+{
+	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+	while (time_before(jiffies, exp_time)) {
+		if (icosq->cc == icosq->pc)
+			return 0;
+
+		msleep(20);
+	}
+
+	netdev_err(icosq->channel->netdev,
+		   "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
+		   icosq->sqn, icosq->cc, icosq->pc);
+
+	return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+{
+	WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+		  icosq->sqn, icosq->cc, icosq->pc);
+	icosq->cc = 0;
+	icosq->pc = 0;
+}
+
+static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+{
+	struct mlx5_core_dev *mdev;
+	struct mlx5e_icosq *icosq;
+	struct net_device *dev;
+	struct mlx5e_rq *rq;
+	u8 state;
+	int err;
+
+	icosq = ctx;
+	rq = &icosq->channel->rq;
+	mdev = icosq->channel->mdev;
+	dev = icosq->channel->netdev;
+	err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
+			   icosq->sqn, err);
+		goto out;
+	}
+
+	if (state != MLX5_SQC_STATE_ERR)
+		goto out;
+
+	mlx5e_deactivate_rq(rq);
+	err = mlx5e_wait_for_icosq_flush(icosq);
+	if (err)
+		goto out;
+
+	mlx5e_deactivate_icosq(icosq);
+
+	/* At this point, both the rq and the icosq are disabled */
+
+	err = mlx5e_health_sq_to_ready(icosq->channel, icosq->sqn);
+	if (err)
+		goto out;
+
+	mlx5e_reset_icosq_cc_pc(icosq);
+	mlx5e_free_rx_descs(rq);
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+	mlx5e_activate_icosq(icosq);
+	mlx5e_activate_rq(rq);
+
+	rq->stats->recover++;
+	return 0;
+out:
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+	return err;
+}
+
+void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+{
+	struct mlx5e_priv *priv = icosq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = icosq;
+	err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
+	sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+{
+	struct net_device *dev = rq->netdev;
+	int err;
+
+	err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+	if (err) {
+		netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+		return err;
+	}
+	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+	if (err) {
+		netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+		return err;
+	}
+
+	return 0;
+}
+
+static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+{
+	struct mlx5_core_dev *mdev;
+	struct net_device *dev;
+	struct mlx5e_rq *rq;
+	u8 state;
+	int err;
+
+	rq = ctx;
+	mdev = rq->mdev;
+	dev = rq->netdev;
+	err = mlx5e_query_rq_state(mdev, rq->rqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n",
+			   rq->rqn, err);
+		goto out;
+	}
+
+	if (state != MLX5_RQC_STATE_ERR)
+		goto out;
+
+	mlx5e_deactivate_rq(rq);
+	mlx5e_free_rx_descs(rq);
+
+	err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
+	if (err)
+		goto out;
+
+	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+	mlx5e_activate_rq(rq);
+	rq->stats->recover++;
+	return 0;
+out:
+	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+	return err;
+}
+
+void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
+{
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
+	sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+	struct mlx5e_icosq *icosq;
+	struct mlx5_eq_comp *eq;
+	struct mlx5e_rq *rq;
+	int err;
+
+	rq = ctx;
+	icosq = &rq->channel->icosq;
+	eq = rq->cq.mcq.eq;
+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
+	if (err)
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+
+	return err;
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+	sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
+static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+	return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
+				     void *context)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_err_ctx *err_ctx = context;
+
+	return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
+			 mlx5e_health_recover_channels(priv);
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+						   struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = rq->channel->priv;
+	struct mlx5e_params *params;
+	struct mlx5e_icosq *icosq;
+	u8 icosq_hw_state;
+	int wqes_sz;
+	u8 hw_state;
+	u16 wq_head;
+	int err;
+
+	params = &priv->channels.params;
+	icosq = &rq->channel->icosq;
+	err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state);
+	if (err)
+		return err;
+
+	err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state);
+	if (err)
+		return err;
+
+	wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
+	wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+		  rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq);
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+				      struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_rq *generic_rq;
+	u32 rq_stride, rq_sz;
+	int i, err = 0;
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+
+	generic_rq = &priv->channels.c[0]->rq;
+	rq_sz = mlx5e_rqwq_get_size(generic_rq);
+	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config");
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ");
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < priv->channels.num; i++) {
+		struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
+
+		err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
+		if (err)
+			goto unlock;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		goto unlock;
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+	.name = "rx",
+	.recover = mlx5e_rx_reporter_recover,
+	.diagnose = mlx5e_rx_reporter_diagnose,
+};
+
+#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
+
+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
+{
+	struct devlink *devlink = priv_to_devlink(priv->mdev);
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_create(devlink,
+						  &mlx5_rx_reporter_ops,
+						  MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
+						  true, priv);
+	if (IS_ERR(reporter)) {
+		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
+			    PTR_ERR(reporter));
+		return PTR_ERR(reporter);
+	}
+	priv->rx_reporter = reporter;
+	return 0;
+}
+
+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
+{
+	if (!priv->rx_reporter)
+		return;
+
+	devlink_health_reporter_destroy(priv->rx_reporter);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 0000000..bfed558
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c

@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "health.h"
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+	while (time_before(jiffies, exp_time)) {
+		if (sq->cc == sq->pc)
+			return 0;
+
+		msleep(20);
+	}
+
+	netdev_err(sq->channel->netdev,
+		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+		   sq->sqn, sq->cc, sq->pc);
+
+	return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+	WARN_ONCE(sq->cc != sq->pc,
+		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+		  sq->sqn, sq->cc, sq->pc);
+	sq->cc = 0;
+	sq->dma_fifo_cc = 0;
+	sq->pc = 0;
+}
+
+static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
+{
+	struct mlx5_core_dev *mdev;
+	struct net_device *dev;
+	struct mlx5e_txqsq *sq;
+	u8 state;
+	int err;
+
+	sq = ctx;
+	mdev = sq->channel->mdev;
+	dev = sq->channel->netdev;
+
+	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+		return 0;
+
+	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+			   sq->sqn, err);
+		goto out;
+	}
+
+	if (state != MLX5_SQC_STATE_ERR)
+		goto out;
+
+	mlx5e_tx_disable_queue(sq->txq);
+
+	err = mlx5e_wait_for_sq_flush(sq);
+	if (err)
+		goto out;
+
+	/* At this point, no new packets will arrive from the stack as TXQ is
+	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+	 * pending WQEs. SQ can safely reset the SQ.
+	 */
+
+	err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
+	if (err)
+		goto out;
+
+	mlx5e_reset_txqsq_cc_pc(sq);
+	sq->stats->recover++;
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+	mlx5e_activate_txqsq(sq);
+
+	return 0;
+out:
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+	return err;
+}
+
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
+{
+	struct mlx5e_priv *priv = sq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {0};
+
+	err_ctx.ctx = sq;
+	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+	sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
+
+	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+}
+
+static int mlx5e_tx_reporter_timeout_recover(void *ctx)
+{
+	struct mlx5_eq_comp *eq;
+	struct mlx5e_txqsq *sq;
+	int err;
+
+	sq = ctx;
+	eq = sq->cq.mcq.eq;
+	err = mlx5e_health_channel_eq_recover(eq, sq->channel);
+	if (err)
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+
+	return err;
+}
+
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+{
+	struct mlx5e_priv *priv = sq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx;
+
+	err_ctx.ctx = sq;
+	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+	sprintf(err_str,
+		"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+		sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+		jiffies_to_usecs(jiffies - sq->txq->trans_start));
+
+	return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
+}
+
+/* state lock cannot be grabbed within this function.
+ * It can cause a dead lock or a read-after-free.
+ */
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
+{
+	return err_ctx->recover(err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
+				     void *context)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_err_ctx *err_ctx = context;
+
+	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
+			 mlx5e_health_recover_channels(priv);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+					struct mlx5e_txqsq *sq, int tc)
+{
+	struct mlx5e_priv *priv = sq->channel->priv;
+	bool stopped = netif_xmit_stopped(sq->txq);
+	u8 state;
+	int err;
+
+	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+				      struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+	u32 sq_stride, sq_sz;
+
+	int i, tc, err = 0;
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+
+	sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
+	sq_stride = MLX5_SEND_WQE_BB;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config");
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < priv->channels.num; i++) {
+		struct mlx5e_channel *c = priv->channels.c[i];
+
+		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+			struct mlx5e_txqsq *sq = &c->sq[tc];
+
+			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
+			if (err)
+				goto unlock;
+		}
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		goto unlock;
+
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
+		.name = "tx",
+		.recover = mlx5e_tx_reporter_recover,
+		.diagnose = mlx5e_tx_reporter_diagnose,
+};
+
+#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
+
+int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
+{
+	struct devlink_health_reporter *reporter;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct devlink *devlink;
+
+	devlink = priv_to_devlink(mdev);
+	reporter =
+		devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
+					       MLX5_REPORTER_TX_GRACEFUL_PERIOD,
+					       true, priv);
+	if (IS_ERR(reporter)) {
+		netdev_warn(priv->netdev,
+			    "Failed to create tx reporter, err = %ld\n",
+			    PTR_ERR(reporter));
+		return PTR_ERR(reporter);
+	}
+	priv->tx_reporter = reporter;
+	return 0;
+}
+
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
+{
+	if (!priv->tx_reporter)
+		return;
+
+	devlink_health_reporter_destroy(priv->tx_reporter);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 0000000..745ab6c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c

@@ -0,0 +1,562 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include <net/geneve.h>
+#include "en/tc_tun.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+	if (netif_is_vxlan(tunnel_dev))
+		return &vxlan_tunnel;
+	else if (netif_is_geneve(tunnel_dev))
+		return &geneve_tunnel;
+	else if (netif_is_gretap(tunnel_dev) ||
+		 netif_is_ip6gretap(tunnel_dev))
+		return &gre_tunnel;
+	else
+		return NULL;
+}
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+				  struct net_device *dev,
+				  struct net_device **route_dev,
+				  struct net_device **out_dev)
+{
+	struct net_device *uplink_dev, *uplink_upper, *real_dev;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	bool dst_is_lag_dev;
+
+	real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
+	uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+
+	rcu_read_lock();
+	uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+	/* mlx5_lag_is_sriov() is a blocking function which can't be called
+	 * while holding rcu read lock. Take the net_device for correctness
+	 * sake.
+	 */
+	if (uplink_upper)
+		dev_hold(uplink_upper);
+	rcu_read_unlock();
+
+	dst_is_lag_dev = (uplink_upper &&
+			  netif_is_lag_master(uplink_upper) &&
+			  real_dev == uplink_upper &&
+			  mlx5_lag_is_sriov(priv->mdev));
+	if (uplink_upper)
+		dev_put(uplink_upper);
+
+	/* if the egress device isn't on the same HW e-switch or
+	 * it's a LAG device, use the uplink
+	 */
+	*route_dev = dev;
+	if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
+	    dst_is_lag_dev || is_vlan_dev(*route_dev))
+		*out_dev = uplink_dev;
+	else if (mlx5e_eswitch_rep(dev) &&
+		 mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
+		*out_dev = *route_dev;
+	else
+		return -EOPNOTSUPP;
+
+	if (!(mlx5e_eswitch_rep(*out_dev) &&
+	      mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+				   struct net_device *mirred_dev,
+				   struct net_device **out_dev,
+				   struct net_device **route_dev,
+				   struct flowi4 *fl4,
+				   struct neighbour **out_n,
+				   u8 *out_ttl)
+{
+	struct rtable *rt;
+	struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct net_device *uplink_dev;
+	int ret;
+
+	if (mlx5_lag_is_multipath(mdev)) {
+		struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+		uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+		fl4->flowi4_oif = uplink_dev->ifindex;
+	}
+
+	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+	ret = PTR_ERR_OR_ZERO(rt);
+	if (ret)
+		return ret;
+
+	if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
+		ip_rt_put(rt);
+		return -ENETUNREACH;
+	}
+#else
+	return -EOPNOTSUPP;
+#endif
+
+	ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
+	if (ret < 0) {
+		ip_rt_put(rt);
+		return ret;
+	}
+
+	if (!(*out_ttl))
+		*out_ttl = ip4_dst_hoplimit(&rt->dst);
+	n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+	ip_rt_put(rt);
+	if (!n)
+		return -ENOMEM;
+
+	*out_n = n;
+	return 0;
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+	if (dev->rtnl_link_ops)
+		return dev->rtnl_link_ops->kind;
+	else
+		return "unknown";
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+				   struct net_device *mirred_dev,
+				   struct net_device **out_dev,
+				   struct net_device **route_dev,
+				   struct flowi6 *fl6,
+				   struct neighbour **out_n,
+				   u8 *out_ttl)
+{
+	struct neighbour *n = NULL;
+	struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+	int ret;
+
+	ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+					 fl6);
+	if (ret < 0)
+		return ret;
+
+	if (!(*out_ttl))
+		*out_ttl = ip6_dst_hoplimit(dst);
+
+	ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+	if (ret < 0) {
+		dst_release(dst);
+		return ret;
+	}
+#else
+	return -EOPNOTSUPP;
+#endif
+
+	n = dst_neigh_lookup(dst, &fl6->daddr);
+	dst_release(dst);
+	if (!n)
+		return -ENOMEM;
+
+	*out_n = n;
+	return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+				      struct mlx5e_encap_entry *e)
+{
+	if (!e->tunnel) {
+		pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+		return -EOPNOTSUPP;
+	}
+
+	return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+			     struct mlx5e_encap_entry *e,
+			     u16 proto)
+{
+	struct ethhdr *eth = (struct ethhdr *)buf;
+	char *ip;
+
+	ether_addr_copy(eth->h_dest, e->h_dest);
+	ether_addr_copy(eth->h_source, dev->dev_addr);
+	if (is_vlan_dev(dev)) {
+		struct vlan_hdr *vlan = (struct vlan_hdr *)
+					((char *)eth + ETH_HLEN);
+		ip = (char *)vlan + VLAN_HLEN;
+		eth->h_proto = vlan_dev_vlan_proto(dev);
+		vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+		vlan->h_vlan_encapsulated_proto = htons(proto);
+	} else {
+		eth->h_proto = htons(proto);
+		ip = (char *)eth + ETH_HLEN;
+	}
+
+	return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+				    struct net_device *mirred_dev,
+				    struct mlx5e_encap_entry *e)
+{
+	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct net_device *out_dev, *route_dev;
+	struct neighbour *n = NULL;
+	struct flowi4 fl4 = {};
+	int ipv4_encap_size;
+	char *encap_header;
+	u8 nud_state, ttl;
+	struct iphdr *ip;
+	int err;
+
+	/* add the IP fields */
+	fl4.flowi4_tos = tun_key->tos;
+	fl4.daddr = tun_key->u.ipv4.dst;
+	fl4.saddr = tun_key->u.ipv4.src;
+	ttl = tun_key->ttl;
+
+	err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
+				      &fl4, &n, &ttl);
+	if (err)
+		return err;
+
+	ipv4_encap_size =
+		(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+		sizeof(struct iphdr) +
+		e->tunnel->calc_hlen(e);
+
+	if (max_encap_size < ipv4_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv4_encap_size, max_encap_size);
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+	if (!encap_header) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* used by mlx5e_detach_encap to lookup a neigh hash table
+	 * entry in the neigh hash table when a user deletes a rule
+	 */
+	e->m_neigh.dev = n->dev;
+	e->m_neigh.family = n->ops->family;
+	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+	e->out_dev = out_dev;
+	e->route_dev = route_dev;
+
+	/* It's important to add the neigh to the hash table before checking
+	 * the neigh validity state. So if we'll get a notification, in case the
+	 * neigh changes it's validity state, we would find the relevant neigh
+	 * in the hash.
+	 */
+	err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+	if (err)
+		goto free_encap;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	ether_addr_copy(e->h_dest, n->ha);
+	read_unlock_bh(&n->lock);
+
+	/* add ethernet header */
+	ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+					     ETH_P_IP);
+
+	/* add ip header */
+	ip->tos = tun_key->tos;
+	ip->version = 0x4;
+	ip->ihl = 0x5;
+	ip->ttl = ttl;
+	ip->daddr = fl4.daddr;
+	ip->saddr = fl4.saddr;
+
+	/* add tunneling protocol header */
+	err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+					 &ip->protocol, e);
+	if (err)
+		goto destroy_neigh_entry;
+
+	e->encap_size = ipv4_encap_size;
+	e->encap_header = encap_header;
+
+	if (!(nud_state & NUD_VALID)) {
+		neigh_event_send(n, NULL);
+		/* the encap entry will be made valid on neigh update event
+		 * and not used before that.
+		 */
+		goto out;
+	}
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     ipv4_encap_size, encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		err = PTR_ERR(e->pkt_reformat);
+		goto destroy_neigh_entry;
+	}
+
+	e->flags |= MLX5_ENCAP_ENTRY_VALID;
+	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+	neigh_release(n);
+	return err;
+
+destroy_neigh_entry:
+	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+	kfree(encap_header);
+out:
+	if (n)
+		neigh_release(n);
+	return err;
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+				    struct net_device *mirred_dev,
+				    struct mlx5e_encap_entry *e)
+{
+	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct net_device *out_dev, *route_dev;
+	struct neighbour *n = NULL;
+	struct flowi6 fl6 = {};
+	struct ipv6hdr *ip6h;
+	int ipv6_encap_size;
+	char *encap_header;
+	u8 nud_state, ttl;
+	int err;
+
+	ttl = tun_key->ttl;
+
+	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+	fl6.daddr = tun_key->u.ipv6.dst;
+	fl6.saddr = tun_key->u.ipv6.src;
+
+	err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
+				      &fl6, &n, &ttl);
+	if (err)
+		return err;
+
+	ipv6_encap_size =
+		(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+		sizeof(struct ipv6hdr) +
+		e->tunnel->calc_hlen(e);
+
+	if (max_encap_size < ipv6_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv6_encap_size, max_encap_size);
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+	if (!encap_header) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* used by mlx5e_detach_encap to lookup a neigh hash table
+	 * entry in the neigh hash table when a user deletes a rule
+	 */
+	e->m_neigh.dev = n->dev;
+	e->m_neigh.family = n->ops->family;
+	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+	e->out_dev = out_dev;
+	e->route_dev = route_dev;
+
+	/* It's importent to add the neigh to the hash table before checking
+	 * the neigh validity state. So if we'll get a notification, in case the
+	 * neigh changes it's validity state, we would find the relevant neigh
+	 * in the hash.
+	 */
+	err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+	if (err)
+		goto free_encap;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	ether_addr_copy(e->h_dest, n->ha);
+	read_unlock_bh(&n->lock);
+
+	/* add ethernet header */
+	ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+						 ETH_P_IPV6);
+
+	/* add ip header */
+	ip6_flow_hdr(ip6h, tun_key->tos, 0);
+	/* the HW fills up ipv6 payload len */
+	ip6h->hop_limit   = ttl;
+	ip6h->daddr	  = fl6.daddr;
+	ip6h->saddr	  = fl6.saddr;
+
+	/* add tunneling protocol header */
+	err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+					 &ip6h->nexthdr, e);
+	if (err)
+		goto destroy_neigh_entry;
+
+	e->encap_size = ipv6_encap_size;
+	e->encap_header = encap_header;
+
+	if (!(nud_state & NUD_VALID)) {
+		neigh_event_send(n, NULL);
+		/* the encap entry will be made valid on neigh update event
+		 * and not used before that.
+		 */
+		goto out;
+	}
+
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     ipv6_encap_size, encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		err = PTR_ERR(e->pkt_reformat);
+		goto destroy_neigh_entry;
+	}
+
+	e->flags |= MLX5_ENCAP_ENTRY_VALID;
+	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+	neigh_release(n);
+	return err;
+
+destroy_neigh_entry:
+	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+	kfree(encap_header);
+out:
+	if (n)
+		neigh_release(n);
+	return err;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+				    struct net_device *netdev)
+{
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
+
+	if (tunnel && tunnel->can_offload(priv))
+		return true;
+	else
+		return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+				 struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
+
+	if (!tunnel) {
+		e->reformat_type = -1;
+		return -EOPNOTSUPP;
+	}
+
+	return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+		       struct mlx5e_priv *priv,
+		       struct mlx5_flow_spec *spec,
+		       struct flow_cls_offload *f,
+		       void *headers_c,
+		       void *headers_v, u8 *match_level)
+{
+	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+	int err = 0;
+
+	if (!tunnel) {
+		netdev_warn(priv->netdev,
+			    "decapsulation offload is not supported for %s net device\n",
+			    mlx5e_netdev_kind(filter_dev));
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	*match_level = tunnel->match_level;
+
+	if (tunnel->parse_udp_ports) {
+		err = tunnel->parse_udp_ports(priv, spec, f,
+					      headers_c, headers_v);
+		if (err)
+			goto out;
+	}
+
+	if (tunnel->parse_tunnel) {
+		err = tunnel->parse_tunnel(priv, spec, f,
+					   headers_c, headers_v);
+		if (err)
+			goto out;
+	}
+
+out:
+	return err;
+}
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+				 struct mlx5_flow_spec *spec,
+				 struct flow_cls_offload *f,
+				 void *headers_c,
+				 void *headers_v)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_ports enc_ports;
+
+	/* Full udp dst port must be given */
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "UDP tunnel decap filter must include enc_dst_port condition");
+		netdev_warn(priv->netdev,
+			    "UDP tunnel decap filter must include enc_dst_port condition\n");
+		return -EOPNOTSUPP;
+	}
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	if (memchr_inv(&enc_ports.mask->dst, 0xff,
+		       sizeof(enc_ports.mask->dst))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "UDP tunnel decap filter must match enc_dst_port fully");
+		netdev_warn(priv->netdev,
+			    "UDP tunnel decap filter must match enc_dst_port fully\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* match on UDP protocol and dst port number */
+
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+		 ntohs(enc_ports.mask->dst));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+		 ntohs(enc_ports.key->dst));
+
+	/* UDP src port on outer header is generated by HW,
+	 * so it is probably a bad idea to request matching it.
+	 * Nonetheless, it is allowed.
+	 */
+
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+		 ntohs(enc_ports.mask->src));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+		 ntohs(enc_ports.key->src));
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 0000000..c362b92
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h

@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+enum {
+	MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+	MLX5E_TC_TUNNEL_TYPE_VXLAN,
+	MLX5E_TC_TUNNEL_TYPE_GENEVE,
+	MLX5E_TC_TUNNEL_TYPE_GRETAP,
+};
+
+struct mlx5e_tc_tunnel {
+	int tunnel_type;
+	enum mlx5_flow_match_level match_level;
+
+	bool (*can_offload)(struct mlx5e_priv *priv);
+	int (*calc_hlen)(struct mlx5e_encap_entry *e);
+	int (*init_encap_attr)(struct net_device *tunnel_dev,
+			       struct mlx5e_priv *priv,
+			       struct mlx5e_encap_entry *e,
+			       struct netlink_ext_ack *extack);
+	int (*generate_ip_tun_hdr)(char buf[],
+				   __u8 *ip_proto,
+				   struct mlx5e_encap_entry *e);
+	int (*parse_udp_ports)(struct mlx5e_priv *priv,
+			       struct mlx5_flow_spec *spec,
+			       struct flow_cls_offload *f,
+			       void *headers_c,
+			       void *headers_v);
+	int (*parse_tunnel)(struct mlx5e_priv *priv,
+			    struct mlx5_flow_spec *spec,
+			    struct flow_cls_offload *f,
+			    void *headers_c,
+			    void *headers_v);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+				 struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+				    struct net_device *mirred_dev,
+				    struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+				    struct net_device *mirred_dev,
+				    struct mlx5e_encap_entry *e);
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+				    struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+		       struct mlx5e_priv *priv,
+		       struct mlx5_flow_spec *spec,
+		       struct flow_cls_offload *f,
+		       void *headers_c,
+		       void *headers_v, u8 *match_level);
+
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+				 struct mlx5_flow_spec *spec,
+				 struct flow_cls_offload *f,
+				 void *headers_c,
+				 void *headers_v);
+
+#endif //__MLX5_EN_TC_TUNNEL_H__

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 0000000..951ea26
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c

@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+	return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+	return sizeof(struct udphdr) +
+	       sizeof(struct genevehdr) +
+	       e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+					       struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_ports enc_ports;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+		return -EOPNOTSUPP;
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	/* Currently we support only default GENEVE
+	 * port, so udp dst port must match.
+	 */
+	if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matched UDP dst port is not registered as a GENEVE port");
+		netdev_warn(priv->netdev,
+			    "UDP port %d is not registered as a GENEVE port\n",
+			    be16_to_cpu(enc_ports.key->dst));
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+					       struct mlx5_flow_spec *spec,
+					       struct flow_cls_offload *f,
+					       void *headers_c,
+					       void *headers_v)
+{
+	int err;
+
+	err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+					       struct mlx5e_priv *priv,
+					       struct mlx5e_encap_entry *e,
+					       struct netlink_ext_ack *extack)
+{
+	e->tunnel = &geneve_tunnel;
+
+	/* Reformat type for GENEVE encap is similar to VXLAN:
+	 * in both cases the HW adds in the same place a
+	 * defined encapsulation header that the SW provides.
+	 */
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+	return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+	vni[0] = (__force __u8)(tun_id >> 16);
+	vni[1] = (__force __u8)(tun_id >> 8);
+	vni[2] = (__force __u8)tun_id;
+#else
+	vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+	vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+	vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+					     __u8 *ip_proto,
+					     struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_info *tun_info = e->tun_info;
+	struct udphdr *udp = (struct udphdr *)(buf);
+	struct genevehdr *geneveh;
+
+	geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+	*ip_proto = IPPROTO_UDP;
+
+	udp->dest = tun_info->key.tp_dst;
+
+	memset(geneveh, 0, sizeof(*geneveh));
+	geneveh->ver = MLX5E_GENEVE_VER;
+	geneveh->opt_len = tun_info->options_len / 4;
+	geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+	geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+	mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+	geneveh->proto_type = htons(ETH_P_TEB);
+
+	if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+		if (!geneveh->opt_len)
+			return -EOPNOTSUPP;
+		ip_tunnel_info_opts_get(geneveh->options, tun_info);
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+					 struct mlx5_flow_spec *spec,
+					 struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_enc_keyid enc_keyid;
+	void *misc_c, *misc_v;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+		return 0;
+
+	flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+	if (!enc_keyid.mask->keyid)
+		return 0;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+		netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+					     struct mlx5_flow_spec *spec,
+					     struct flow_cls_offload *f)
+{
+	u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+	u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+	struct geneve_opt *option_key, *option_mask;
+	__be32 opt_data_key = 0, opt_data_mask = 0;
+	struct flow_match_enc_opts enc_opts;
+	int res = 0;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+	misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+		return 0;
+
+	flow_rule_match_enc_opts(rule, &enc_opts);
+
+	if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.geneve_tlv_option_0_data)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* make sure that we're talking about GENEVE options */
+
+	if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: option type is not GENEVE");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: option type is not GENEVE\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (enc_opts.mask->len &&
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.outer_geneve_opt_len)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options len is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+	 * doesn't include the TLV option header. 'geneve_opt_len' is a total
+	 * len of all the options, including the headers, also multiples of 4
+	 * bytes. Len that comes from the dissector is in bytes.
+	 */
+
+	if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: unsupported options len");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: unsupported options len (len=%d)\n",
+			    enc_opts.key->len);
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+	/* we support matching on one option only, so just get it */
+	option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+	option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+	if (option_key->length > max_tlv_option_data_len) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: unsupported option len");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+			    option_key->length, option_mask->length);
+		return -EOPNOTSUPP;
+	}
+
+	/* data can't be all 0 - fail to offload such rule */
+	if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: can't match on 0 data field");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: can't match on 0 data field\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* add new GENEVE TLV options object */
+	res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+	if (res) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on GENEVE options: failed creating TLV opt object");
+		netdev_warn(priv->netdev,
+			    "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+			    be16_to_cpu(option_key->opt_class),
+			    option_key->type, option_key->length);
+		return res;
+	}
+
+	/* In general, after creating the object, need to query it
+	 * in order to check which option data to set in misc3.
+	 * But we support only geneve_tlv_option_0_data, so no
+	 * point querying at this stage.
+	 */
+
+	memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+	memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+	MLX5_SET(fte_match_set_misc3, misc_3_v,
+		 geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+	MLX5_SET(fte_match_set_misc3, misc_3_c,
+		 geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+					    struct mlx5_flow_spec *spec,
+					    struct flow_cls_offload *f)
+{
+	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,  misc_parameters);
+	struct netlink_ext_ack *extack = f->common.extack;
+
+	/* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+		NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+		netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+		return -EOPNOTSUPP;
+	}
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+	MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+	/* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+				       ft_field_support.outer_geneve_protocol_type)) {
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+		MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+				     struct mlx5_flow_spec *spec,
+				     struct flow_cls_offload *f,
+				     void *headers_c,
+				     void *headers_v)
+{
+	int err;
+
+	err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+	if (err)
+		return err;
+
+	err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+	.match_level          = MLX5_MATCH_L4,
+	.can_offload          = mlx5e_tc_tun_can_offload_geneve,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_geneve,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_geneve,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_geneve,
+	.parse_udp_ports      = mlx5e_tc_tun_parse_udp_ports_geneve,
+	.parse_tunnel         = mlx5e_tc_tun_parse_geneve,
+};

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 0000000..58b1319
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c

@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+	return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+	return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+					       struct mlx5e_priv *priv,
+					       struct mlx5e_encap_entry *e,
+					       struct netlink_ext_ack *extack)
+{
+	e->tunnel = &gre_tunnel;
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+	return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+					     __u8 *ip_proto,
+					     struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_key *tun_key  = &e->tun_info->key;
+	struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+	int hdr_len;
+
+	*ip_proto = IPPROTO_GRE;
+
+	/* the HW does not calculate GRE csum or sequences */
+	if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+		return -EOPNOTSUPP;
+
+	greh->protocol = htons(ETH_P_TEB);
+
+	/* GRE key */
+	hdr_len	= mlx5e_tc_tun_calc_hlen_gretap(e);
+	greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+	if (tun_key->tun_flags & TUNNEL_KEY) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+		*ptr = tun_id;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+				     struct mlx5_flow_spec *spec,
+				     struct flow_cls_offload *f,
+				     void *headers_c,
+				     void *headers_v)
+{
+	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+	/* gre protocol */
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+	MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+	/* gre key */
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid enc_keyid;
+
+		flow_rule_match_enc_keyid(rule, &enc_keyid);
+		MLX5_SET(fte_match_set_misc, misc_c,
+			 gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+		MLX5_SET(fte_match_set_misc, misc_v,
+			 gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+	}
+
+	return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+	.match_level          = MLX5_MATCH_L3,
+	.can_offload          = mlx5e_tc_tun_can_offload_gretap,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_gretap,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_gretap,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_gretap,
+	.parse_udp_ports      = NULL,
+	.parse_tunnel         = mlx5e_tc_tun_parse_gretap,
+};

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 0000000..37b1768
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c

@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+	return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+	return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+					      struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_ports enc_ports;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+		return -EOPNOTSUPP;
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
+
+	/* check the UDP destination port validity */
+
+	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+				    be16_to_cpu(enc_ports.key->dst))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matched UDP dst port is not registered as a VXLAN port");
+		netdev_warn(priv->netdev,
+			    "UDP port %d is not registered as a VXLAN port\n",
+			    be16_to_cpu(enc_ports.key->dst));
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+					      struct mlx5_flow_spec *spec,
+					      struct flow_cls_offload *f,
+					      void *headers_c,
+					      void *headers_v)
+{
+	int err = 0;
+
+	err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+	if (err)
+		return err;
+
+	return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+					      struct mlx5e_priv *priv,
+					      struct mlx5e_encap_entry *e,
+					      struct netlink_ext_ack *extack)
+{
+	int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+	e->tunnel = &vxlan_tunnel;
+
+	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "vxlan udp dport was not registered with the HW");
+		netdev_warn(priv->netdev,
+			    "%d isn't an offloaded vxlan udp dport\n",
+			    dst_port);
+		return -EOPNOTSUPP;
+	}
+
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+	return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+					    __u8 *ip_proto,
+					    struct mlx5e_encap_entry *e)
+{
+	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+	struct udphdr *udp = (struct udphdr *)(buf);
+	struct vxlanhdr *vxh;
+
+	vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+	*ip_proto = IPPROTO_UDP;
+
+	udp->dest = tun_key->tp_dst;
+	vxh->vx_flags = VXLAN_HF_VNI;
+	vxh->vx_vni = vxlan_vni_field(tun_id);
+
+	return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+				    struct mlx5_flow_spec *spec,
+				    struct flow_cls_offload *f,
+				    void *headers_c,
+				    void *headers_v)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct flow_match_enc_keyid enc_keyid;
+	void *misc_c, *misc_v;
+
+	misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+		return 0;
+
+	flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+	if (!enc_keyid.mask->keyid)
+		return 0;
+
+	/* match on VNI is required */
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+					ft_field_support.outer_vxlan_vni)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Matching on VXLAN VNI is not supported");
+		netdev_warn(priv->netdev,
+			    "Matching on VXLAN VNI is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+		 be32_to_cpu(enc_keyid.mask->keyid));
+	MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+		 be32_to_cpu(enc_keyid.key->keyid));
+
+	return 0;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+	.match_level          = MLX5_MATCH_L4,
+	.can_offload          = mlx5e_tc_tun_can_offload_vxlan,
+	.calc_hlen            = mlx5e_tc_tun_calc_hlen_vxlan,
+	.init_encap_attr      = mlx5e_tc_tun_init_encap_attr_vxlan,
+	.generate_ip_tun_hdr  = mlx5e_gen_ip_tunnel_header_vxlan,
+	.parse_udp_ports      = mlx5e_tc_tun_parse_udp_ports_vxlan,
+	.parse_tunnel         = mlx5e_tc_tun_parse_vxlan,
+};

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 0000000..7c8796d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h

@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+
+#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+			    MLX5E_SQ_NOPS_ROOM)
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_TLS_ROOM (0)
+#else
+/* TLS offload requires additional stop_room for:
+ *  - a resync SKB.
+ * kTLS offload requires fixed additional stop_room for:
+ * - a static params WQE, and a progress params WQE.
+ * The additional MTU-depending room for the resync DUMP WQEs
+ * will be calculated and added in runtime.
+ */
+#define MLX5E_SQ_TLS_ROOM  \
+	(MLX5_SEND_WQE_MAX_WQEBBS + \
+	 MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
+#endif
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+	return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *
+mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	void *wqe;
+
+	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	memset(wqe, 0, size);
+
+	return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
+	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
+
+	memset(cseg, 0, sizeof(*cseg));
+
+	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
+
+	(*pc)++;
+
+	return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+	u16                         pi   = mlx5_wq_cyc_ctr2ix(wq, *pc);
+	struct mlx5e_tx_wqe        *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5_wqe_ctrl_seg   *cseg = &wqe->ctrl;
+
+	memset(cseg, 0, sizeof(*cseg));
+
+	cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+	cseg->qpn_ds           = cpu_to_be32((sqn << 8) | 0x01);
+	cseg->fm_ce_se         = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+	(*pc)++;
+
+	return wqe;
+}
+
+static inline void
+mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
+			u16 pi, u16 nnops)
+{
+	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+	edge_wi = wi + nnops;
+
+	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
+	for (; wi < edge_wi; wi++) {
+		memset(wi, 0, sizeof(*wi));
+		wi->num_wqebbs = 1;
+		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	}
+	sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+		struct mlx5_wqe_ctrl_seg *ctrl)
+{
+	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	/* ensure wqe is visible to device before updating doorbell record */
+	dma_wmb();
+
+	*wq->db = cpu_to_be32(pc);
+
+	/* ensure doorbell record is visible to device before ringing the
+	 * doorbell
+	 */
+	wmb();
+
+	mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
+{
+	return cseg && !!cseg->tisn;
+}
+
+static inline u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
+			 struct sk_buff *skb)
+{
+	u8 mode;
+
+	if (mlx5e_transport_inline_tx_wqe(cseg))
+		return MLX5_INLINE_MODE_TCP_UDP;
+
+	mode = sq->min_inline_mode;
+
+	if (skb_vlan_tag_present(skb) &&
+	    test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+		mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+	return mode;
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+	struct mlx5_core_cq *mcq;
+
+	mcq = &cq->mcq;
+	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+	return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+	       enum mlx5e_dma_map_type map_type)
+{
+	struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+	dma->addr = addr;
+	dma->size = size;
+	dma->type = map_type;
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+	switch (dma->type) {
+	case MLX5E_DMA_MAP_SINGLE:
+		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	case MLX5E_DMA_MAP_PAGE:
+		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	default:
+		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+	}
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+	__be16 l3_proto;
+	u8 l4_proto;
+	u8 is_tun;
+	__be16 tun_l3_proto;
+	u8 tun_l4_proto;
+};
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+		   struct mlx5e_swp_spec *swp_spec)
+{
+	/* SWP offsets are in 2-bytes words */
+	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+	if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+	if (swp_spec->l4_proto) {
+		eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+		if (swp_spec->l4_proto == IPPROTO_UDP)
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+	}
+
+	if (swp_spec->is_tun) {
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+	} else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+		if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+	}
+	switch (swp_spec->tun_l4_proto) {
+	case IPPROTO_UDP:
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		/* fall through */
+	case IPPROTO_TCP:
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		break;
+	}
+}
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ad6d471..f049e0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

@@ -31,30 +31,98 @@
  */
 
 #include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
 #include "en/xdp.h"
+#include "en/params.h"
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
+{
+	int hr = mlx5e_get_linear_rq_headroom(params, xsk);
+
+	/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+	 * The condition checked in mlx5e_rx_is_linear_skb is:
+	 *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
+	 *   (Note that hw_mtu == sw_mtu + hard_mtu.)
+	 * What is returned from this function is:
+	 *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
+	 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+	 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+	 * because both PAGE_SIZE and S are already aligned. Any number greater
+	 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+	 * so max_mtu is the maximum MTU allowed.
+	 */
+
+	return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
 
 static inline bool
-mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
-		    struct xdp_buff *xdp)
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+		    struct mlx5e_dma_info *di, struct xdp_buff *xdp)
 {
+	struct mlx5e_xdp_xmit_data xdptxd;
 	struct mlx5e_xdp_info xdpi;
+	struct xdp_frame *xdpf;
+	dma_addr_t dma_addr;
 
-	xdpi.xdpf = convert_to_xdp_frame(xdp);
-	if (unlikely(!xdpi.xdpf))
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
 		return false;
-	xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
-	dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
-				   xdpi.xdpf->len, PCI_DMA_TODEVICE);
-	xdpi.di = *di;
 
-	return mlx5e_xmit_xdp_frame(sq, &xdpi);
+	xdptxd.data = xdpf->data;
+	xdptxd.len  = xdpf->len;
+
+	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+		/* The xdp_buff was in the UMEM and was copied into a newly
+		 * allocated page. The UMEM page was returned via the ZCA, and
+		 * this new page has to be mapped at this point and has to be
+		 * unmapped and returned via xdp_return_frame on completion.
+		 */
+
+		/* Prevent double recycling of the UMEM page. Even in case this
+		 * function returns false, the xdp_buff shouldn't be recycled,
+		 * as it was already done in xdp_convert_zc_to_xdp_frame.
+		 */
+		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+		xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+		dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(sq->pdev, dma_addr)) {
+			xdp_return_frame(xdpf);
+			return false;
+		}
+
+		xdptxd.dma_addr     = dma_addr;
+		xdpi.frame.xdpf     = xdpf;
+		xdpi.frame.dma_addr = dma_addr;
+	} else {
+		/* Driver assumes that convert_to_xdp_frame returns an xdp_frame
+		 * that points to the same memory region as the original
+		 * xdp_buff. It allows to map the memory only once and to use
+		 * the DMA_BIDIRECTIONAL mode.
+		 */
+
+		xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+
+		dma_addr = di->addr + (xdpf->data - (void *)xdpf);
+		dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
+					   DMA_TO_DEVICE);
+
+		xdptxd.dma_addr = dma_addr;
+		xdpi.page.rq    = rq;
+		xdpi.page.di    = *di;
+	}
+
+	return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
 }
 
 /* returns true if packet was consumed by xdp */
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len)
+		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
 {
 	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+	struct xdp_umem *umem = rq->umem;
 	struct xdp_buff xdp;
 	u32 act;
 	int err;
@@ -66,16 +134,23 @@
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 	xdp.data_hard_start = va;
+	if (xsk)
+		xdp.handle = di->xsk.handle;
 	xdp.rxq = &rq->xdp_rxq;
 
 	act = bpf_prog_run_xdp(prog, &xdp);
+	if (xsk) {
+		u64 off = xdp.data - xdp.data_hard_start;
+
+		xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
+	}
 	switch (act) {
 	case XDP_PASS:
 		*rx_headroom = xdp.data - xdp.data_hard_start;
 		*len = xdp.data_end - xdp.data;
 		return false;
 	case XDP_TX:
-		if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
 		return true;
@@ -85,8 +160,9 @@
 		if (unlikely(err))
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
-		rq->xdpsq.redirect_flush = true;
-		mlx5e_page_dma_unmap(rq, di);
+		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+		if (!xsk)
+			mlx5e_page_dma_unmap(rq, di);
 		rq->stats->xdp_redirect++;
 		return true;
 	default:
@@ -102,7 +178,128 @@
 	}
 }
 
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_xdpsq_stats *stats = sq->stats;
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi, contig_wqebbs;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
+	if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
+		mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
+
+	session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi);
+
+	prefetchw(session->wqe->data);
+	session->ds_count  = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+	session->pkt_count = 0;
+
+	mlx5e_xdp_update_inline_state(sq);
+
+	stats->mpwqe++;
+}
+
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+	struct mlx5_wq_cyc       *wq    = &sq->wq;
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+	u16 ds_count = session->ds_count;
+	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+	cseg->opmod_idx_opcode =
+		cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+	wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+	wi->num_pkts   = session->pkt_count;
+
+	sq->pc += wi->num_wqebbs;
+
+	sq->doorbell_cseg = cseg;
+
+	session->wqe = NULL; /* Close session */
+}
+
+enum {
+	MLX5E_XDP_CHECK_OK = 1,
+	MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+	if (unlikely(!sq->mpwqe.wqe)) {
+		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+						     MLX5E_XDPSQ_STOP_ROOM))) {
+			/* SQ is full, ring doorbell */
+			mlx5e_xmit_xdp_doorbell(sq);
+			sq->stats->full++;
+			return -EBUSY;
+		}
+
+		return MLX5E_XDP_CHECK_START_MPWQE;
+	}
+
+	return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+				       struct mlx5e_xdp_xmit_data *xdptxd,
+				       struct mlx5e_xdp_info *xdpi,
+				       int check_result)
+{
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+	if (unlikely(xdptxd->len > sq->hw_mtu)) {
+		stats->err++;
+		return false;
+	}
+
+	if (!check_result)
+		check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+	if (unlikely(check_result < 0))
+		return false;
+
+	if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+		/* Start the session when nothing can fail, so it's guaranteed
+		 * that if there is an active session, it has at least one dseg,
+		 * and it's safe to complete it at any time.
+		 */
+		mlx5e_xdp_mpwqe_session_start(sq);
+	}
+
+	mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
+
+	if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
+		     session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
+		mlx5e_xdp_mpwqe_complete(sq);
+
+	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+	stats->xmit++;
+	return true;
+}
+
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+		/* SQ is full, ring doorbell */
+		mlx5e_xmit_xdp_doorbell(sq);
+		sq->stats->full++;
+		return -EBUSY;
+	}
+
+	return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+				 struct mlx5e_xdp_xmit_data *xdptxd,
+				 struct mlx5e_xdp_info *xdpi,
+				 int check_result)
 {
 	struct mlx5_wq_cyc       *wq   = &sq->wq;
 	u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -112,9 +309,8 @@
 	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
 	struct mlx5_wqe_data_seg *dseg = wqe->data;
 
-	struct xdp_frame *xdpf = xdpi->xdpf;
-	dma_addr_t dma_addr  = xdpi->dma_addr;
-	unsigned int dma_len = xdpf->len;
+	dma_addr_t dma_addr = xdptxd->dma_addr;
+	u32 dma_len = xdptxd->len;
 
 	struct mlx5e_xdpsq_stats *stats = sq->stats;
 
@@ -125,21 +321,16 @@
 		return false;
 	}
 
-	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
-		if (sq->doorbell) {
-			/* SQ is full, ring doorbell */
-			mlx5e_xmit_xdp_doorbell(sq);
-			sq->doorbell = false;
-		}
-		stats->full++;
+	if (!check_result)
+		check_result = mlx5e_xmit_xdp_frame_check(sq);
+	if (unlikely(check_result < 0))
 		return false;
-	}
 
 	cseg->fm_ce_se = 0;
 
 	/* copy the inline part if required */
 	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-		memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+		memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
 		eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
 		dma_len  -= MLX5E_XDP_MIN_INLINE;
 		dma_addr += MLX5E_XDP_MIN_INLINE;
@@ -152,24 +343,52 @@
 
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
 
-	/* move page to reference to sq responsibility,
-	 * and mark so it's not put back in page-cache.
-	 */
-	sq->db.xdpi[pi] = *xdpi;
 	sq->pc++;
 
-	sq->doorbell = true;
+	sq->doorbell_cseg = cseg;
 
+	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
 	stats->xmit++;
 	return true;
 }
 
+static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
+				  struct mlx5e_xdp_wqe_info *wi,
+				  u32 *xsk_frames,
+				  bool recycle)
+{
+	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+	u16 i;
+
+	for (i = 0; i < wi->num_pkts; i++) {
+		struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+		switch (xdpi.mode) {
+		case MLX5E_XDP_XMIT_MODE_FRAME:
+			/* XDP_TX from the XSK RQ and XDP_REDIRECT */
+			dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+					 xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+			xdp_return_frame(xdpi.frame.xdpf);
+			break;
+		case MLX5E_XDP_XMIT_MODE_PAGE:
+			/* XDP_TX from the regular RQ */
+			mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+			break;
+		case MLX5E_XDP_XMIT_MODE_XSK:
+			/* AF_XDP send */
+			(*xsk_frames)++;
+			break;
+		default:
+			WARN_ON_ONCE(true);
+		}
+	}
+}
+
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_xdpsq *sq;
 	struct mlx5_cqe64 *cqe;
-	struct mlx5e_rq *rq;
-	bool is_redirect;
+	u32 xsk_frames = 0;
 	u16 sqcc;
 	int i;
 
@@ -182,9 +401,6 @@
 	if (!cqe)
 		return false;
 
-	is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
-	rq = container_of(sq, struct mlx5e_rq, xdpsq);
-
 	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
 	 * otherwise a cq overrun may occur
 	 */
@@ -199,24 +415,28 @@
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
+		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+			netdev_WARN_ONCE(sq->channel->netdev,
+					 "Bad OP in XDPSQ CQE: 0x%x\n",
+					 get_cqe_opcode(cqe));
+
 		do {
-			u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
-			struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+			struct mlx5e_xdp_wqe_info *wi;
+			u16 ci;
 
 			last_wqe = (sqcc == wqe_counter);
-			sqcc++;
+			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+			wi = &sq->db.wqe_info[ci];
 
-			if (is_redirect) {
-				xdp_return_frame(xdpi->xdpf);
-				dma_unmap_single(sq->pdev, xdpi->dma_addr,
-						 xdpi->xdpf->len, DMA_TO_DEVICE);
-			} else {
-				/* Recycle RX page */
-				mlx5e_page_release(rq, &xdpi->di, true);
-			}
+			sqcc += wi->num_wqebbs;
+
+			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
 		} while (!last_wqe);
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
+	if (xsk_frames)
+		xsk_umem_complete_tx(sq->umem, xsk_frames);
+
 	sq->stats->cqes += i;
 
 	mlx5_cqwq_update_db_record(&cq->wq);
@@ -230,27 +450,22 @@
 
 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 {
-	struct mlx5e_rq *rq;
-	bool is_redirect;
-
-	is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
-	rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+	u32 xsk_frames = 0;
 
 	while (sq->cc != sq->pc) {
-		u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
-		struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+		struct mlx5e_xdp_wqe_info *wi;
+		u16 ci;
 
-		sq->cc++;
+		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+		wi = &sq->db.wqe_info[ci];
 
-		if (is_redirect) {
-			xdp_return_frame(xdpi->xdpf);
-			dma_unmap_single(sq->pdev, xdpi->dma_addr,
-					 xdpi->xdpf->len, DMA_TO_DEVICE);
-		} else {
-			/* Recycle RX page */
-			mlx5e_page_release(rq, &xdpi->di, false);
-		}
+		sq->cc += wi->num_wqebbs;
+
+		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
 	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(sq->umem, xsk_frames);
 }
 
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -262,7 +477,8 @@
 	int sq_num;
 	int i;
 
-	if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state)))
+	/* this flag is sufficient, no need to test internal sq state */
+	if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
 		return -ENETDOWN;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@ -275,33 +491,63 @@
 
 	sq = &priv->channels.c[sq_num]->xdpsq;
 
-	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
-		return -ENETDOWN;
-
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
+		struct mlx5e_xdp_xmit_data xdptxd;
 		struct mlx5e_xdp_info xdpi;
 
-		xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
-					       DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+		xdptxd.data = xdpf->data;
+		xdptxd.len = xdpf->len;
+		xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+						 xdptxd.len, DMA_TO_DEVICE);
+
+		if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
 			continue;
 		}
 
-		xdpi.xdpf = xdpf;
+		xdpi.mode           = MLX5E_XDP_XMIT_MODE_FRAME;
+		xdpi.frame.xdpf     = xdpf;
+		xdpi.frame.dma_addr = xdptxd.dma_addr;
 
-		if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
-			dma_unmap_single(sq->pdev, xdpi.dma_addr,
-					 xdpf->len, DMA_TO_DEVICE);
+		if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+			dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+					 xdptxd.len, DMA_TO_DEVICE);
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
 		}
 	}
 
-	if (flags & XDP_XMIT_FLUSH)
+	if (flags & XDP_XMIT_FLUSH) {
+		if (sq->mpwqe.wqe)
+			mlx5e_xdp_mpwqe_complete(sq);
 		mlx5e_xmit_xdp_doorbell(sq);
+	}
 
 	return n - drops;
 }
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+	struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
+
+	if (xdpsq->mpwqe.wqe)
+		mlx5e_xdp_mpwqe_complete(xdpsq);
+
+	mlx5e_xmit_xdp_doorbell(xdpsq);
+
+	if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
+		xdp_do_flush_map();
+		__clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
+	}
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+	sq->xmit_xdp_frame_check = is_mpw ?
+		mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
+	sq->xmit_xdp_frame = is_mpw ?
+		mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
+

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 6dfab04..36ac1e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

@@ -33,31 +33,188 @@
 #define __MLX5_EN_XDP_H__
 
 #include "en.h"
+#include "en/txrx.h"
 
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
-				 MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
-	((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
+#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
+
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
+	DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+#define MLX5E_XDP_MPW_MAX_NUM_DS \
+	(MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len);
+		      void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
-
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		   u32 flags);
 
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+	set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+	clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+	/* let other device's napi(s) see our new state */
+	synchronize_rcu();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+	return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+	set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+	clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+	return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 {
+	if (sq->doorbell_cseg) {
+		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+		sq->doorbell_cseg = NULL;
+	}
+}
+
+/* Enable inline WQEs to shift some load from a congested HCA (HW) to
+ * a less congested cpu (SW).
+ */
+static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+{
+	u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+
+#define MLX5E_XDP_INLINE_WATERMARK_LOW	10
+#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
+
+	if (session->inline_on) {
+		if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+			session->inline_on = 0;
+		return;
+	}
+
+	/* inline is false */
+	if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+		session->inline_on = 1;
+}
+
+static inline bool
+mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
+{
+	return session->inline_on &&
+	       session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+}
+
+static inline void
+mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
+			   u16 pi, u16 nnops)
+{
+	struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+	edge_wi = wi + nnops;
+	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
+	for (; wi < edge_wi; wi++) {
+		wi->num_wqebbs = 1;
+		wi->num_pkts   = 0;
+		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	}
+
+	sq->stats->nops += nnops;
+}
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+			 struct mlx5e_xdp_xmit_data *xdptxd,
+			 struct mlx5e_xdpsq_stats *stats)
+{
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5_wqe_data_seg *dseg =
+		(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+	u32 dma_len = xdptxd->len;
+
+	session->pkt_count++;
+
+	if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
+		struct mlx5_wqe_inline_seg *inline_dseg =
+			(struct mlx5_wqe_inline_seg *)dseg;
+		u16 ds_len = sizeof(*inline_dseg) + dma_len;
+		u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS);
+
+		inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
+		memcpy(inline_dseg->data, xdptxd->data, dma_len);
+
+		session->ds_count += ds_cnt;
+		stats->inlnw++;
+		return;
+	}
+
+	dseg->addr       = cpu_to_be64(xdptxd->dma_addr);
+	dseg->byte_count = cpu_to_be32(dma_len);
+	dseg->lkey       = sq->mkey_be;
+	session->ds_count++;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi)
+{
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_tx_wqe *wqe;
-	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
 
-	wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	*pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	memset(wqe, 0, sizeof(*wqe));
 
-	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+	return wqe;
 }
 
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+		     struct mlx5e_xdp_info *xi)
+{
+	u32 i = (*fifo->pc)++ & fifo->mask;
+
+	fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+	return fifo->xi[(*fifo->cc)++ & fifo->mask];
+}
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 0000000..475b6bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c

@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{
+	/* Check in advance that we have enough frames, instead of allocating
+	 * one-by-one, failing and moving frames to the Reuse Ring.
+	 */
+	return xsk_umem_has_addrs_rq(rq->umem, count);
+}
+
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+			      struct mlx5e_dma_info *dma_info)
+{
+	struct xdp_umem *umem = rq->umem;
+	u64 handle;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle))
+		return -ENOMEM;
+
+	dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
+						      rq->buff.umem_headroom);
+	dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+
+	/* No need to add headroom to the DMA address. In striding RQ case, we
+	 * just provide pages for UMR, and headroom is counted at the setup
+	 * stage when creating a WQE. In non-striding RQ case, headroom is
+	 * accounted in mlx5e_alloc_rx_wqe.
+	 */
+	dma_info->addr = xdp_umem_get_dma(umem, handle);
+
+	xsk_umem_discard_addr_rq(umem);
+
+	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+				   DMA_BIDIRECTIONAL);
+
+	return 0;
+}
+
+static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
+{
+	xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+}
+
+/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
+ * the userspace if possible, and if not, this function is called to reuse them
+ * in the driver.
+ */
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+			    struct mlx5e_dma_info *dma_info)
+{
+	mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+}
+
+/* Return a frame back to the hardware to fill in again. It is used by XDP when
+ * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
+ */
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+	struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+
+	mlx5e_xsk_recycle_frame(rq, handle);
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
+					       u32 cqe_bcnt)
+{
+	struct sk_buff *skb;
+
+	skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+	if (unlikely(!skb)) {
+		rq->stats->buff_alloc_err++;
+		return NULL;
+	}
+
+	skb_put_data(skb, data, cqe_bcnt);
+
+	return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+						    struct mlx5e_mpw_info *wi,
+						    u16 cqe_bcnt,
+						    u32 head_offset,
+						    u32 page_idx)
+{
+	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+	u32 cqe_bcnt32 = cqe_bcnt;
+	void *va, *data;
+	u32 frag_size;
+	bool consumed;
+
+	/* Check packet size. Note LRO doesn't use linear SKB */
+	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+		rq->stats->oversize_pkts_sw_drop++;
+		return NULL;
+	}
+
+	/* head_offset is not used in this function, because di->xsk.data and
+	 * di->addr point directly to the necessary place. Furthermore, in the
+	 * current implementation, UMR pages are mapped to XSK frames, so
+	 * head_offset should always be 0.
+	 */
+	WARN_ON_ONCE(head_offset);
+
+	va             = di->xsk.data;
+	data           = va + rx_headroom;
+	frag_size      = rq->buff.headroom + cqe_bcnt32;
+
+	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+	prefetch(data);
+
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+	rcu_read_unlock();
+
+	/* Possible flows:
+	 * - XDP_REDIRECT to XSKMAP:
+	 *   The page is owned by the userspace from now.
+	 * - XDP_TX and other XDP_REDIRECTs:
+	 *   The page was returned by ZCA and recycled.
+	 * - XDP_DROP:
+	 *   Recycle the page.
+	 * - XDP_PASS:
+	 *   Allocate an SKB, copy the data and recycle the page.
+	 *
+	 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+	 * size is the same as the Driver RX Ring's size, and pages for WQEs are
+	 * allocated first from the Reuse Ring, so it has enough space.
+	 */
+
+	if (likely(consumed)) {
+		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+		return NULL; /* page/packet was consumed by XDP */
+	}
+
+	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+	 * frame. On SKB allocation failure, NULL is returned.
+	 */
+	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+					      struct mlx5_cqe64 *cqe,
+					      struct mlx5e_wqe_frag_info *wi,
+					      u32 cqe_bcnt)
+{
+	struct mlx5e_dma_info *di = wi->di;
+	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+	void *va, *data;
+	bool consumed;
+	u32 frag_size;
+
+	/* wi->offset is not used in this function, because di->xsk.data and
+	 * di->addr point directly to the necessary place. Furthermore, in the
+	 * current implementation, one page = one packet = one frame, so
+	 * wi->offset should always be 0.
+	 */
+	WARN_ON_ONCE(wi->offset);
+
+	va             = di->xsk.data;
+	data           = va + rx_headroom;
+	frag_size      = rq->buff.headroom + cqe_bcnt;
+
+	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+	prefetch(data);
+
+	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
+		rq->stats->wqe_err++;
+		return NULL;
+	}
+
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+	rcu_read_unlock();
+
+	if (likely(consumed))
+		return NULL; /* page/packet was consumed by XDP */
+
+	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+	 * will be handled by mlx5e_put_rx_frag.
+	 * On SKB allocation failure, NULL is returned.
+	 */
+	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 0000000..cab0e93
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h

@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+			      struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+			    struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+						    struct mlx5e_mpw_info *wi,
+						    u16 cqe_bcnt,
+						    u32 head_offset,
+						    u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+					      struct mlx5_cqe64 *cqe,
+					      struct mlx5e_wqe_frag_info *wi,
+					      u32 cqe_bcnt);
+
+static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
+{
+	if (!xsk_umem_uses_need_wakeup(rq->umem))
+		return alloc_err;
+
+	if (unlikely(alloc_err))
+		xsk_set_rx_need_wakeup(rq->umem);
+	else
+		xsk_clear_rx_need_wakeup(rq->umem);
+
+	return false;
+}
+
+#endif /* __MLX5_EN_XSK_RX_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 0000000..631af8d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c

@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+
+/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
+ * change unexpectedly, and mlx5e has a minimum valid stride size for striding
+ * RQ, keep this check in the driver.
+ */
+#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+			      struct mlx5e_xsk_param *xsk,
+			      struct mlx5_core_dev *mdev)
+{
+	/* AF_XDP doesn't support frames larger than PAGE_SIZE. */
+	if (xsk->chunk_size > PAGE_SIZE ||
+			xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
+		return false;
+
+	/* Current MTU and XSK headroom don't allow packets to fit the frames. */
+	if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
+		return false;
+
+	/* frag_sz is different for regular and XSK RQs, so ensure that linear
+	 * SKB mode is possible.
+	 */
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		return mlx5e_rx_is_linear_skb(params, xsk);
+	}
+}
+
+static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
+				       u8 log_wq_size,
+				       struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+
+	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+				   struct mlx5e_params *params,
+				   struct mlx5e_xsk_param *xsk,
+				   struct mlx5e_channel_param *cparam)
+{
+	const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+	mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+	mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
+	mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
+	mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+	mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_channel *c)
+{
+	struct mlx5e_channel_param *cparam;
+	struct dim_cq_moder icocq_moder = {};
+	int err;
+
+	if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+		return -EINVAL;
+
+	cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
+	if (!cparam)
+		return -ENOMEM;
+
+	mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
+
+	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
+	if (unlikely(err))
+		goto err_free_cparam;
+
+	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
+	if (unlikely(err))
+		goto err_close_rx_cq;
+
+	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
+	if (unlikely(err))
+		goto err_close_rq;
+
+	/* Create a separate SQ, so that when the UMEM is disabled, we could
+	 * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+	 * the XDPSQ was used instead, we might run into trouble when the UMEM
+	 * is disabled and then reenabled, but the SQ continues receiving CQEs
+	 * from the old UMEM.
+	 */
+	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
+	if (unlikely(err))
+		goto err_close_tx_cq;
+
+	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
+	if (unlikely(err))
+		goto err_close_sq;
+
+	/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+	 * triggered and NAPI to be called on the correct CPU.
+	 */
+	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
+	if (unlikely(err))
+		goto err_close_icocq;
+
+	kvfree(cparam);
+
+	spin_lock_init(&c->xskicosq_lock);
+
+	set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+	return 0;
+
+err_close_icocq:
+	mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_sq:
+	mlx5e_close_xdpsq(&c->xsksq);
+
+err_close_tx_cq:
+	mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+	mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+	mlx5e_close_cq(&c->xskrq.cq);
+
+err_free_cparam:
+	kvfree(cparam);
+
+	return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+	clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+	napi_synchronize(&c->napi);
+
+	mlx5e_close_rq(&c->xskrq);
+	mlx5e_close_cq(&c->xskrq.cq);
+	mlx5e_close_icosq(&c->xskicosq);
+	mlx5e_close_cq(&c->xskicosq.cq);
+	mlx5e_close_xdpsq(&c->xsksq);
+	mlx5e_close_cq(&c->xsksq.cq);
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+	mlx5e_activate_icosq(&c->xskicosq);
+	set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+	/* TX queue is created active. */
+
+	spin_lock(&c->xskicosq_lock);
+	mlx5e_trigger_irq(&c->xskicosq);
+	spin_unlock(&c->xskicosq_lock);
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+	mlx5e_deactivate_rq(&c->xskrq);
+	/* TX queue is disabled on close. */
+	mlx5e_deactivate_icosq(&c->xskicosq);
+}
+
+static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+	struct mlx5e_redirect_rqt_param direct_rrp = {
+		.is_rss = false,
+		{
+			.rqn = rqn,
+		},
+	};
+
+	u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+	return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+	return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+	return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	int err, i;
+
+	if (!priv->xsk.refcnt)
+		return 0;
+
+	for (i = 0; i < chs->num; i++) {
+		struct mlx5e_channel *c = chs->c[i];
+
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+			continue;
+
+		err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
+		if (unlikely(err))
+			goto err_stop;
+	}
+
+	return 0;
+
+err_stop:
+	for (i--; i >= 0; i--) {
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+			continue;
+
+		mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+	}
+
+	return err;
+}
+
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	int i;
+
+	if (!priv->xsk.refcnt)
+		return;
+
+	for (i = 0; i < chs->num; i++) {
+		if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+			continue;
+
+		mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+	}
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 0000000..0dd11b8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h

@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+			      struct mlx5e_xsk_param *xsk,
+			      struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+		   struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 0000000..8782747
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c

@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "umem.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock.h>
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_channel *c;
+	u16 ix;
+
+	if (unlikely(!mlx5e_xdp_is_open(priv)))
+		return -ENETDOWN;
+
+	if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+		return -EINVAL;
+
+	c = priv->channels.c[ix];
+
+	if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
+		return -ENXIO;
+
+	if (!napi_if_scheduled_mark_missed(&c->napi)) {
+		/* To avoid WQE overrun, don't post a NOP if XSKICOSQ is not
+		 * active and not polled by NAPI. Return 0, because the upcoming
+		 * activate will trigger the IRQ for us.
+		 */
+		if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state)))
+			return 0;
+
+		spin_lock(&c->xskicosq_lock);
+		mlx5e_trigger_irq(&c->xskicosq);
+		spin_unlock(&c->xskicosq_lock);
+	}
+
+	return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+				  struct mlx5e_xdp_info *xdpi)
+{
+	u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+	struct mlx5e_tx_wqe *nopwqe;
+
+	wi->num_wqebbs = 1;
+	wi->num_pkts = 1;
+
+	nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+	sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+	struct xdp_umem *umem = sq->umem;
+	struct mlx5e_xdp_info xdpi;
+	struct mlx5e_xdp_xmit_data xdptxd;
+	bool work_done = true;
+	bool flush = false;
+
+	xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+	for (; budget; budget--) {
+		int check_result = sq->xmit_xdp_frame_check(sq);
+		struct xdp_desc desc;
+
+		if (unlikely(check_result < 0)) {
+			work_done = false;
+			break;
+		}
+
+		if (!xsk_umem_consume_tx(umem, &desc)) {
+			/* TX will get stuck until something wakes it up by
+			 * triggering NAPI. Currently it's expected that the
+			 * application calls sendto() if there are consumed, but
+			 * not completed frames.
+			 */
+			break;
+		}
+
+		xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
+		xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+		xdptxd.len = desc.len;
+
+		dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
+					   xdptxd.len, DMA_BIDIRECTIONAL);
+
+		if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+			if (sq->mpwqe.wqe)
+				mlx5e_xdp_mpwqe_complete(sq);
+
+			mlx5e_xsk_tx_post_err(sq, &xdpi);
+		}
+
+		flush = true;
+	}
+
+	if (flush) {
+		if (sq->mpwqe.wqe)
+			mlx5e_xdp_mpwqe_complete(sq);
+		mlx5e_xmit_xdp_doorbell(sq);
+
+		xsk_umem_consume_tx_done(umem);
+	}
+
+	return !(budget && work_done);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 0000000..79b487d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h

@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+#include <net/xdp_sock.h>
+
+/* TX data path */
+
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
+{
+	if (!xsk_umem_uses_need_wakeup(sq->umem))
+		return;
+
+	if (sq->pc != sq->cc)
+		xsk_clear_tx_need_wakeup(sq->umem);
+	else
+		xsk_set_tx_need_wakeup(sq->umem);
+}
+
+#endif /* __MLX5_EN_XSK_TX_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
new file mode 100644
index 0000000..4baaa57
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c

@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/xdp_sock.h>
+#include "umem.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
+			      struct xdp_umem *umem)
+{
+	struct device *dev = priv->mdev->device;
+	u32 i;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
+					      DMA_BIDIRECTIONAL);
+
+		if (unlikely(dma_mapping_error(dev, dma)))
+			goto err_unmap;
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+err_unmap:
+	while (i--) {
+		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		umem->pages[i].dma = 0;
+	}
+
+	return -ENOMEM;
+}
+
+static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
+				 struct xdp_umem *umem)
+{
+	struct device *dev = priv->mdev->device;
+	u32 i;
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		umem->pages[i].dma = 0;
+	}
+}
+
+static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+{
+	if (!xsk->umems) {
+		xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+				     sizeof(*xsk->umems), GFP_KERNEL);
+		if (unlikely(!xsk->umems))
+			return -ENOMEM;
+	}
+
+	xsk->refcnt++;
+	xsk->ever_used = true;
+
+	return 0;
+}
+
+static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+{
+	if (!--xsk->refcnt) {
+		kfree(xsk->umems);
+		xsk->umems = NULL;
+	}
+}
+
+static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+{
+	int err;
+
+	err = mlx5e_xsk_get_umems(xsk);
+	if (unlikely(err))
+		return err;
+
+	xsk->umems[ix] = umem;
+	return 0;
+}
+
+static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+{
+	xsk->umems[ix] = NULL;
+
+	mlx5e_xsk_put_umems(xsk);
+}
+
+static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+{
+	return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+{
+	xsk->headroom = umem->headroom;
+	xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+				   struct xdp_umem *umem, u16 ix)
+{
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_xsk_param xsk;
+	struct mlx5e_channel *c;
+	int err;
+
+	if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+		return -EBUSY;
+
+	if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+		return -EINVAL;
+
+	err = mlx5e_xsk_map_umem(priv, umem);
+	if (unlikely(err))
+		return err;
+
+	err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+	if (unlikely(err))
+		goto err_unmap_umem;
+
+	mlx5e_build_xsk_param(umem, &xsk);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		/* XSK objects will be created on open. */
+		goto validate_closed;
+	}
+
+	if (!params->xdp_prog) {
+		/* XSK objects will be created when an XDP program is set,
+		 * and the channels are reopened.
+		 */
+		goto validate_closed;
+	}
+
+	c = priv->channels.c[ix];
+
+	err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+	if (unlikely(err))
+		goto err_remove_umem;
+
+	mlx5e_activate_xsk(c);
+
+	/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+	 * any Fill Ring entries at the setup stage.
+	 */
+
+	err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+	if (unlikely(err))
+		goto err_deactivate;
+
+	return 0;
+
+err_deactivate:
+	mlx5e_deactivate_xsk(c);
+	mlx5e_close_xsk(c);
+
+err_remove_umem:
+	mlx5e_xsk_remove_umem(&priv->xsk, ix);
+
+err_unmap_umem:
+	mlx5e_xsk_unmap_umem(priv, umem);
+
+	return err;
+
+validate_closed:
+	/* Check the configuration in advance, rather than fail at a later stage
+	 * (in mlx5e_xdp_set or on open) and end up with no channels.
+	 */
+	if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+		err = -EINVAL;
+		goto err_remove_umem;
+	}
+
+	return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+	struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+						   &priv->xsk, ix);
+	struct mlx5e_channel *c;
+
+	if (unlikely(!umem))
+		return -EINVAL;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto remove_umem;
+
+	/* XSK RQ and SQ are only created if XDP program is set. */
+	if (!priv->channels.params.xdp_prog)
+		goto remove_umem;
+
+	c = priv->channels.c[ix];
+	mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+	mlx5e_deactivate_xsk(c);
+	mlx5e_close_xsk(c);
+
+remove_umem:
+	mlx5e_xsk_remove_umem(&priv->xsk, ix);
+	mlx5e_xsk_unmap_umem(priv, umem);
+
+	return 0;
+}
+
+static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+				 u16 ix)
+{
+	int err;
+
+	mutex_lock(&priv->state_lock);
+	err = mlx5e_xsk_enable_locked(priv, umem, ix);
+	mutex_unlock(&priv->state_lock);
+
+	return err;
+}
+
+static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+{
+	int err;
+
+	mutex_lock(&priv->state_lock);
+	err = mlx5e_xsk_disable_locked(priv, ix);
+	mutex_unlock(&priv->state_lock);
+
+	return err;
+}
+
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_params *params = &priv->channels.params;
+	u16 ix;
+
+	if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+		return -EINVAL;
+
+	return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
+		      mlx5e_xsk_disable_umem(priv, ix);
+}
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
+{
+	struct xdp_umem_fq_reuse *reuseq;
+
+	reuseq = xsk_reuseq_prepare(nentries);
+	if (unlikely(!reuseq))
+		return -ENOMEM;
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	return 0;
+}
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
+{
+	u16 res = xsk->refcnt ? params->num_channels : 0;
+
+	while (res) {
+		if (mlx5e_xsk_get_umem(params, xsk, res - 1))
+			break;
+		--res;
+	}
+
+	return res;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
new file mode 100644
index 0000000..25b4cbe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h

@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_UMEM_H__
+#define __MLX5_EN_XSK_UMEM_H__
+
+#include "en.h"
+
+static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
+						  struct mlx5e_xsk *xsk, u16 ix)
+{
+	if (!xsk || !xsk->umems)
+		return NULL;
+
+	if (unlikely(ix >= params->num_channels))
+		return NULL;
+
+	return xsk->umems[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
+
+#endif /* __MLX5_EN_XSK_UMEM_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 1dd2253..3022463 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h

@@ -39,6 +39,58 @@
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/tls_rxtx.h"
 #include "en.h"
+#include "en/txrx.h"
+
+#if IS_ENABLED(CONFIG_GENEVE)
+static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
+{
+	return mlx5_tx_swp_supported(mdev);
+}
+
+static inline void
+mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+	struct mlx5e_swp_spec swp_spec = {};
+	unsigned int offset = 0;
+	__be16 l3_proto;
+	u8 l4_proto;
+
+	l3_proto = vlan_get_protocol(skb);
+	switch (l3_proto) {
+	case htons(ETH_P_IP):
+		l4_proto = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
+		break;
+	default:
+		return;
+	}
+
+	if (l4_proto != IPPROTO_UDP ||
+	    udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT))
+		return;
+	swp_spec.l3_proto = l3_proto;
+	swp_spec.l4_proto = l4_proto;
+	swp_spec.is_tun = true;
+	if (inner_ip_hdr(skb)->version == 6) {
+		swp_spec.tun_l3_proto = htons(ETH_P_IPV6);
+		swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr;
+	} else {
+		swp_spec.tun_l3_proto = htons(ETH_P_IP);
+		swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol;
+	}
+
+	mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
+}
+
+#else
+static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
+{
+	return false;
+}
+
+#endif /* CONFIG_GENEVE */
 
 static inline void
 mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 128a82b..0dd1751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c

@@ -136,7 +136,7 @@
 				struct mlx5_wqe_eth_seg *eseg, u8 mode,
 				struct xfrm_offload *xo)
 {
-	u8 proto;
+	struct mlx5e_swp_spec swp_spec = {};
 
 	/* Tunnel Mode:
 	 * SWP:      OutL3       InL3  InL4
@@ -146,35 +146,23 @@
 	 * SWP:      OutL3       InL4
 	 *           InL3
 	 * Pkt: MAC  IP     ESP  L4
-	 *
-	 * Offsets are in 2-byte words, counting from start of frame
 	 */
-	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
-	if (skb->protocol == htons(ETH_P_IPV6))
-		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
-
-	if (mode == XFRM_MODE_TUNNEL) {
-		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+	swp_spec.l3_proto = skb->protocol;
+	swp_spec.is_tun = mode == XFRM_MODE_TUNNEL;
+	if (swp_spec.is_tun) {
 		if (xo->proto == IPPROTO_IPV6) {
-			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
-			proto = inner_ipv6_hdr(skb)->nexthdr;
+			swp_spec.tun_l3_proto = htons(ETH_P_IPV6);
+			swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr;
 		} else {
-			proto = inner_ip_hdr(skb)->protocol;
+			swp_spec.tun_l3_proto = htons(ETH_P_IP);
+			swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol;
 		}
 	} else {
-		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
-		if (skb->protocol == htons(ETH_P_IPV6))
-			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
-		proto = xo->proto;
+		swp_spec.tun_l3_proto = skb->protocol;
+		swp_spec.tun_l4_proto = xo->proto;
 	}
-	switch (proto) {
-	case IPPROTO_UDP:
-		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
-		/* Fall through */
-	case IPPROTO_TCP:
-		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
-		break;
-	}
+
+	mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
 }
 
 void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
@@ -254,11 +242,13 @@
 	struct mlx5e_ipsec_metadata *mdata;
 	struct mlx5e_ipsec_sa_entry *sa_entry;
 	struct xfrm_state *x;
+	struct sec_path *sp;
 
 	if (!xo)
 		return skb;
 
-	if (unlikely(skb->sp->len != 1)) {
+	sp = skb_sec_path(skb);
+	if (unlikely(sp->len != 1)) {
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
 		goto drop;
 	}
@@ -305,10 +295,11 @@
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo;
 	struct xfrm_state *xs;
+	struct sec_path *sp;
 	u32 sa_handle;
 
-	skb->sp = secpath_dup(skb->sp);
-	if (unlikely(!skb->sp)) {
+	sp = secpath_set(skb);
+	if (unlikely(!sp)) {
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
 		return NULL;
 	}
@@ -320,8 +311,9 @@
 		return NULL;
 	}
 
-	skb->sp->xvec[skb->sp->len++] = xs;
-	skb->sp->olen++;
+	sp = skb_sec_path(skb);
+	sp->xvec[sp->len++] = xs;
+	sp->olen++;
 
 	xo = xfrm_offload(skb);
 	xo->flags = CRYPTO_DONE;
@@ -372,10 +364,11 @@
 bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
 			       netdev_features_t features)
 {
+	struct sec_path *sp = skb_sec_path(skb);
 	struct xfrm_state *x;
 
-	if (skb->sp && skb->sp->len) {
-		x = skb->sp->xvec[0];
+	if (sp && sp->len) {
+		x = sp->xvec[0];
 		if (x && x->xso.offload_handle)
 			return true;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index ca47c05..db84500 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h

@@ -39,6 +39,7 @@
 #include <linux/skbuff.h>
 #include <net/xfrm.h>
 #include "en.h"
+#include "en/txrx.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
 					  struct sk_buff *skb, u32 *cqe_bcnt);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
new file mode 100644
index 0000000..46725cd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c

@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en.h"
+#include "en_accel/ktls.h"
+
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+{
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+	void *tisc;
+
+	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+	MLX5_SET(tisc, tisc, tls_en, 1);
+
+	return mlx5e_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+			  enum tls_offload_ctx_dir direction,
+			  struct tls_crypto_info *crypto_info,
+			  u32 start_offload_tcp_sn)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_ktls_offload_context_tx *tx_priv;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+
+	if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX))
+		return -EINVAL;
+
+	if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+		return -EOPNOTSUPP;
+
+	tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL);
+	if (!tx_priv)
+		return -ENOMEM;
+
+	tx_priv->expected_seq = start_offload_tcp_sn;
+	tx_priv->crypto_info  = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+	mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
+
+	/* tc and underlay_qpn values are not in use for tls tis */
+	err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn);
+	if (err)
+		goto create_tis_fail;
+
+	err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id);
+	if (err)
+		goto encryption_key_create_fail;
+
+	mlx5e_ktls_tx_offload_set_pending(tx_priv);
+
+	return 0;
+
+encryption_key_create_fail:
+	mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+create_tis_fail:
+	kvfree(tx_priv);
+	return err;
+}
+
+static void mlx5e_ktls_del(struct net_device *netdev,
+			   struct tls_context *tls_ctx,
+			   enum tls_offload_ctx_dir direction)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_ktls_offload_context_tx *tx_priv =
+		mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+	mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id);
+	mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+	kvfree(tx_priv);
+}
+
+static const struct tlsdev_ops mlx5e_ktls_ops = {
+	.tls_dev_add = mlx5e_ktls_add,
+	.tls_dev_del = mlx5e_ktls_del,
+};
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+
+	if (!mlx5_accel_is_ktls_device(priv->mdev))
+		return;
+
+	netdev->hw_features |= NETIF_F_HW_TLS_TX;
+	netdev->features    |= NETIF_F_HW_TLS_TX;
+
+	netdev->tlsdev_ops = &mlx5e_ktls_ops;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
new file mode 100644
index 0000000..a3efa29
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h

@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_KTLS_H__
+#define __MLX5E_KTLS_H__
+
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+#include <net/tls.h>
+#include "accel/tls.h"
+
+#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
+	(offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \
+	 MLX5_ST_SZ_BYTES(tls_static_params))
+#define MLX5E_KTLS_STATIC_WQEBBS \
+	(DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_PROGRESS_WQE_SZ \
+	(offsetof(struct mlx5e_tx_wqe, tls_progress_params_ctx) + \
+	 MLX5_ST_SZ_BYTES(tls_progress_params))
+#define MLX5E_KTLS_PROGRESS_WQEBBS \
+	(DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
+
+struct mlx5e_dump_wqe {
+	struct mlx5_wqe_ctrl_seg ctrl;
+	struct mlx5_wqe_data_seg data;
+};
+
+#define MLX5E_KTLS_DUMP_WQEBBS \
+	(DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
+
+enum {
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD     = 0,
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD        = 1,
+	MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START     = 0,
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
+	MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING  = 2,
+};
+
+struct mlx5e_ktls_offload_context_tx {
+	struct tls_offload_context_tx *tx_ctx;
+	struct tls12_crypto_info_aes_gcm_128 crypto_info;
+	u32 expected_seq;
+	u32 tisn;
+	u32 key_id;
+	bool ctx_post_pending;
+};
+
+struct mlx5e_ktls_offload_context_tx_shadow {
+	struct tls_offload_context_tx         tx_ctx;
+	struct mlx5e_ktls_offload_context_tx *priv_tx;
+};
+
+static inline void
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+			   struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+	struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+	BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+	shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+	shadow->priv_tx = priv_tx;
+	priv_tx->tx_ctx = tx_ctx;
+}
+
+static inline struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+	struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+	struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+	BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+	shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+	return shadow->priv_tx;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+					 struct mlx5e_txqsq *sq,
+					 struct sk_buff *skb,
+					 struct mlx5e_tx_wqe **wqe, u16 *pi);
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					   struct mlx5e_tx_wqe_info *wi,
+					   u32 *dma_fifo_cc);
+static inline u8
+mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
+			    unsigned int sync_len)
+{
+	/* Given the MTU and sync_len, calculates an upper bound for the
+	 * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+	 */
+	return MLX5E_KTLS_DUMP_WQEBBS *
+		(nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+}
+#else
+
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline void
+mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+				      struct mlx5e_tx_wqe_info *wi,
+				      u32 *dma_fifo_cc) {}
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
new file mode 100644
index 0000000..778dab1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c

@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en_accel/ktls.h"
+
+enum {
+	MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+	MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+	salt    = info->salt;    \
+	rec_seq = info->rec_seq; \
+	salt_sz    = sizeof(info->salt);    \
+	rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
+	char *initial_rn, *gcm_iv;
+	u16 salt_sz, rec_seq_sz;
+	char *salt, *rec_seq;
+	u8 tls_version;
+
+	EXTRACT_INFO_FIELDS;
+
+	gcm_iv      = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+	initial_rn  = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+	memcpy(gcm_iv,      salt,    salt_sz);
+	memcpy(initial_rn,  rec_seq, rec_seq_sz);
+
+	tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+	MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+	MLX5_SET(tls_static_params, ctx, const_1, 1);
+	MLX5_SET(tls_static_params, ctx, const_2, 2);
+	MLX5_SET(tls_static_params, ctx, encryption_standard,
+		 MLX5E_ENCRYPTION_STANDARD_TLS);
+	MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id);
+}
+
+static void
+build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
+		    struct mlx5e_ktls_offload_context_tx *priv_tx,
+		    bool fence)
+{
+	struct mlx5_wqe_ctrl_seg     *cseg  = &wqe->ctrl;
+	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+
+#define STATIC_PARAMS_DS_CNT \
+	DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS)
+
+	cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR |
+					     (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24));
+	cseg->qpn_ds           = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+					     STATIC_PARAMS_DS_CNT);
+	cseg->fm_ce_se         = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+	cseg->tisn             = cpu_to_be32(priv_tx->tisn << 8);
+
+	ucseg->flags = MLX5_UMR_INLINE;
+	ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+	fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx);
+}
+
+static void
+fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	MLX5_SET(tls_progress_params, ctx, tisn, priv_tx->tisn);
+	MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+		 MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+	MLX5_SET(tls_progress_params, ctx, auth_state,
+		 MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+static void
+build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
+		      struct mlx5e_ktls_offload_context_tx *priv_tx,
+		      bool fence)
+{
+	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+#define PROGRESS_PARAMS_DS_CNT \
+	DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS)
+
+	cseg->opmod_idx_opcode =
+		cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV |
+			    (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24));
+	cseg->qpn_ds           = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+					     PROGRESS_PARAMS_DS_CNT);
+	cseg->fm_ce_se         = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+	fill_progress_params_ctx(wqe->tls_progress_params_ctx, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+		       u16 pi, u8 num_wqebbs, u32 num_bytes,
+		       struct page *page)
+{
+	struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+	memset(wi, 0, sizeof(*wi));
+	wi->num_wqebbs = num_wqebbs;
+	wi->num_bytes  = num_bytes;
+	wi->resync_dump_frag_page = page;
+}
+
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	priv_tx->ctx_post_pending = true;
+}
+
+static bool
+mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+	bool ret = priv_tx->ctx_post_pending;
+
+	priv_tx->ctx_post_pending = false;
+
+	return ret;
+}
+
+static void
+post_static_params(struct mlx5e_txqsq *sq,
+		   struct mlx5e_ktls_offload_context_tx *priv_tx,
+		   bool fence)
+{
+	struct mlx5e_umr_wqe *umr_wqe;
+	u16 pi;
+
+	umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
+	build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
+	tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
+	sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+}
+
+static void
+post_progress_params(struct mlx5e_txqsq *sq,
+		     struct mlx5e_ktls_offload_context_tx *priv_tx,
+		     bool fence)
+{
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi;
+
+	wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
+	build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
+	tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
+	sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+}
+
+static void
+mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
+			      struct mlx5e_ktls_offload_context_tx *priv_tx,
+			      bool skip_static_post, bool fence_first_post)
+{
+	bool progress_fence = skip_static_post || !fence_first_post;
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 contig_wqebbs_room, pi;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room <
+		     MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+
+	if (!skip_static_post)
+		post_static_params(sq, priv_tx, fence_first_post);
+
+	post_progress_params(sq, priv_tx, progress_fence);
+}
+
+struct tx_sync_info {
+	u64 rcd_sn;
+	s32 sync_len;
+	int nr_frags;
+	skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+enum mlx5e_ktls_sync_retval {
+	MLX5E_KTLS_SYNC_DONE,
+	MLX5E_KTLS_SYNC_FAIL,
+	MLX5E_KTLS_SYNC_SKIP_NO_DATA,
+};
+
+static enum mlx5e_ktls_sync_retval
+tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+		 u32 tcp_seq, struct tx_sync_info *info)
+{
+	struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+	enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE;
+	struct tls_record_info *record;
+	int remaining, i = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tx_ctx->lock, flags);
+	record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
+
+	if (unlikely(!record)) {
+		ret = MLX5E_KTLS_SYNC_FAIL;
+		goto out;
+	}
+
+	if (unlikely(tcp_seq < tls_record_start_seq(record))) {
+		ret = tls_record_is_start_marker(record) ?
+			MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
+		goto out;
+	}
+
+	info->sync_len = tcp_seq - tls_record_start_seq(record);
+	remaining = info->sync_len;
+	while (remaining > 0) {
+		skb_frag_t *frag = &record->frags[i];
+
+		get_page(skb_frag_page(frag));
+		remaining -= skb_frag_size(frag);
+		info->frags[i++] = *frag;
+	}
+	/* reduce the part which will be sent with the original SKB */
+	if (remaining < 0)
+		skb_frag_size_add(&info->frags[i - 1], remaining);
+	info->nr_frags = i;
+out:
+	spin_unlock_irqrestore(&tx_ctx->lock, flags);
+	return ret;
+}
+
+static void
+tx_post_resync_params(struct mlx5e_txqsq *sq,
+		      struct mlx5e_ktls_offload_context_tx *priv_tx,
+		      u64 rcd_sn)
+{
+	struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
+	__be64 rn_be = cpu_to_be64(rcd_sn);
+	bool skip_static_post;
+	u16 rec_seq_sz;
+	char *rec_seq;
+
+	rec_seq = info->rec_seq;
+	rec_seq_sz = sizeof(info->rec_seq);
+
+	skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
+	if (!skip_static_post)
+		memcpy(rec_seq, &rn_be, rec_seq_sz);
+
+	mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
+}
+
+static int
+tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
+{
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct mlx5_wqe_data_seg *dseg;
+	struct mlx5e_dump_wqe *wqe;
+	dma_addr_t dma_addr = 0;
+	u16 ds_cnt;
+	int fsz;
+	u16 pi;
+
+	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+
+	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+
+	cseg = &wqe->ctrl;
+	dseg = &wqe->data;
+
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8)  | MLX5_OPCODE_DUMP);
+	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+	cseg->tisn             = cpu_to_be32(tisn << 8);
+	cseg->fm_ce_se         = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+	fsz = skb_frag_size(frag);
+	dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+				    DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+		return -ENOMEM;
+
+	dseg->addr       = cpu_to_be64(dma_addr);
+	dseg->lkey       = sq->mkey_be;
+	dseg->byte_count = cpu_to_be32(fsz);
+	mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+
+	tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag));
+	sq->pc += MLX5E_KTLS_DUMP_WQEBBS;
+
+	return 0;
+}
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+					   struct mlx5e_tx_wqe_info *wi,
+					   u32 *dma_fifo_cc)
+{
+	struct mlx5e_sq_stats *stats;
+	struct mlx5e_sq_dma *dma;
+
+	if (!wi->resync_dump_frag_page)
+		return;
+
+	dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+	stats = sq->stats;
+
+	mlx5e_tx_dma_unmap(sq->pdev, dma);
+	put_page(wi->resync_dump_frag_page);
+	stats->tls_dump_packets++;
+	stats->tls_dump_bytes += wi->num_bytes;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+	tx_fill_wi(sq, pi, 1, 0, NULL);
+
+	mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
+static enum mlx5e_ktls_sync_retval
+mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+			 struct mlx5e_txqsq *sq,
+			 int datalen,
+			 u32 seq)
+{
+	struct mlx5e_sq_stats *stats = sq->stats;
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	enum mlx5e_ktls_sync_retval ret;
+	struct tx_sync_info info = {};
+	u16 contig_wqebbs_room, pi;
+	u8 num_wqebbs;
+	int i = 0;
+
+	ret = tx_sync_info_get(priv_tx, seq, &info);
+	if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
+		if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
+			stats->tls_skip_no_sync_data++;
+			return MLX5E_KTLS_SYNC_SKIP_NO_DATA;
+		}
+		/* We might get here if a retransmission reaches the driver
+		 * after the relevant record is acked.
+		 * It should be safe to drop the packet in this case
+		 */
+		stats->tls_drop_no_sync_data++;
+		goto err_out;
+	}
+
+	if (unlikely(info.sync_len < 0)) {
+		if (likely(datalen <= -info.sync_len))
+			return MLX5E_KTLS_SYNC_DONE;
+
+		stats->tls_drop_bypass_req++;
+		goto err_out;
+	}
+
+	stats->tls_ooo++;
+
+	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+	/* If no dump WQE was sent, we need to have a fence NOP WQE before the
+	 * actual data xmit.
+	 */
+	if (!info.nr_frags) {
+		tx_post_fence_nop(sq);
+		return MLX5E_KTLS_SYNC_DONE;
+	}
+
+	num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
+	if (unlikely(contig_wqebbs_room < num_wqebbs))
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+
+	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+	for (; i < info.nr_frags; i++) {
+		unsigned int orig_fsz, frag_offset = 0, n = 0;
+		skb_frag_t *f = &info.frags[i];
+
+		orig_fsz = skb_frag_size(f);
+
+		do {
+			bool fence = !(i || frag_offset);
+			unsigned int fsz;
+
+			n++;
+			fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
+			skb_frag_size_set(f, fsz);
+			if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+				page_ref_add(skb_frag_page(f), n - 1);
+				goto err_out;
+			}
+
+			skb_frag_off_add(f, fsz);
+			frag_offset += fsz;
+		} while (frag_offset < orig_fsz);
+
+		page_ref_add(skb_frag_page(f), n - 1);
+	}
+
+	return MLX5E_KTLS_SYNC_DONE;
+
+err_out:
+	for (; i < info.nr_frags; i++)
+		/* The put_page() here undoes the page ref obtained in tx_sync_info_get().
+		 * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
+		 * released only upon their completions (or in mlx5e_free_txqsq_descs,
+		 * if channel closes).
+		 */
+		put_page(skb_frag_page(&info.frags[i]));
+
+	return MLX5E_KTLS_SYNC_FAIL;
+}
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+					 struct mlx5e_txqsq *sq,
+					 struct sk_buff *skb,
+					 struct mlx5e_tx_wqe **wqe, u16 *pi)
+{
+	struct mlx5e_ktls_offload_context_tx *priv_tx;
+	struct mlx5e_sq_stats *stats = sq->stats;
+	struct mlx5_wqe_ctrl_seg *cseg;
+	struct tls_context *tls_ctx;
+	int datalen;
+	u32 seq;
+
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		goto out;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	if (!datalen)
+		goto out;
+
+	tls_ctx = tls_get_ctx(skb->sk);
+	if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
+		goto err_out;
+
+	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+		mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
+		*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+		stats->tls_ctx++;
+	}
+
+	seq = ntohl(tcp_hdr(skb)->seq);
+	if (unlikely(priv_tx->expected_seq != seq)) {
+		enum mlx5e_ktls_sync_retval ret =
+			mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+
+		if (likely(ret == MLX5E_KTLS_SYNC_DONE))
+			*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+		else if (ret == MLX5E_KTLS_SYNC_FAIL)
+			goto err_out;
+		else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */
+			goto out;
+	}
+
+	priv_tx->expected_seq = seq + datalen;
+
+	cseg = &(*wqe)->ctrl;
+	cseg->tisn = cpu_to_be32(priv_tx->tisn << 8);
+
+	stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+	stats->tls_encrypted_bytes   += datalen;
+
+out:
+	return skb;
+
+err_out:
+	dev_kfree_skb_any(skb);
+	return NULL;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index e88340e..fba561f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c

@@ -160,25 +160,31 @@
 				direction == TLS_OFFLOAD_CTX_DIR_TX);
 }
 
-static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
-				u32 seq, u64 rcd_sn)
+static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
+			    u32 seq, u8 *rcd_sn_data,
+			    enum tls_offload_ctx_dir direction)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_rx *rx_ctx;
+	u64 rcd_sn = *(u64 *)rcd_sn_data;
 
+	if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
+		return -EINVAL;
 	rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
 
 	netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
 		    be64_to_cpu(rcd_sn));
 	mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
 	atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
+
+	return 0;
 }
 
 static const struct tlsdev_ops mlx5e_tls_ops = {
 	.tls_dev_add = mlx5e_tls_add,
 	.tls_dev_del = mlx5e_tls_del,
-	.tls_dev_resync_rx = mlx5e_tls_resync_rx,
+	.tls_dev_resync = mlx5e_tls_resync,
 };
 
 void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
@@ -186,6 +192,11 @@
 	struct net_device *netdev = priv->netdev;
 	u32 caps;
 
+	if (mlx5_accel_is_ktls_device(priv->mdev)) {
+		mlx5e_ktls_build_netdev(priv);
+		return;
+	}
+
 	if (!mlx5_accel_is_tls_device(priv->mdev))
 		return;
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 3f5d721..9015f3f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h

@@ -33,8 +33,10 @@
 #ifndef __MLX5E_TLS_H__
 #define __MLX5E_TLS_H__
 
-#ifdef CONFIG_MLX5_EN_TLS
+#include "accel/tls.h"
+#include "en_accel/ktls.h"
 
+#ifdef CONFIG_MLX5_EN_TLS
 #include <net/tls.h>
 #include "en.h"
 
@@ -94,7 +96,12 @@
 
 #else
 
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+	if (mlx5_accel_is_ktls_device(priv->mdev))
+		mlx5e_ktls_build_netdev(priv);
+}
+
 static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
 static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index be137d4..71384ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c

@@ -181,7 +181,6 @@
 	 */
 	nskb->ip_summed = CHECKSUM_PARTIAL;
 
-	nskb->xmit_more = 1;
 	nskb->queue_mapping = skb->queue_mapping;
 }
 
@@ -248,8 +247,8 @@
 	sq->stats->tls_resync_bytes += nskb->len;
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
-	mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
-	mlx5e_sq_fetch_wqe(sq, wqe, pi);
+	mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
+	*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
 	return skb;
 
 err_out:
@@ -270,6 +269,11 @@
 	int datalen;
 	u32 skb_seq;
 
+	if (MLX5_CAP_GEN(sq->channel->mdev, tls)) {
+		skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
+		goto out;
+	}
+
 	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
 		goto out;
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 311667e..90bc1f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h

@@ -38,6 +38,7 @@
 
 #include <linux/skbuff.h>
 #include "en.h"
+#include "en/txrx.h"
 
 struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
 					struct mlx5e_txqsq *sq,

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 45cdde6..2c75b27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c

@@ -437,12 +437,6 @@
 	return &arfs_t->rules_hash[bucket_idx];
 }
 
-static u8 arfs_get_ip_proto(const struct sk_buff *skb)
-{
-	return (skb->protocol == htons(ETH_P_IP)) ?
-		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
-}
-
 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 					 u8 ip_proto, __be16 etype)
 {
@@ -543,8 +537,11 @@
 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
-		netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
-			   __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
+		priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
+		mlx5e_dbg(HW, priv,
+			  "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
+			  __func__, arfs_rule->filter_id, arfs_rule->rxq,
+			  tuple->ip_proto, err);
 	}
 
 out:
@@ -599,31 +596,9 @@
 	arfs_may_expire_flow(priv);
 }
 
-/* return L4 destination port from ip4/6 packets */
-static __be16 arfs_get_dst_port(const struct sk_buff *skb)
-{
-	char *transport_header;
-
-	transport_header = skb_transport_header(skb);
-	if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
-		return ((struct tcphdr *)transport_header)->dest;
-	return ((struct udphdr *)transport_header)->dest;
-}
-
-/* return L4 source port from ip4/6 packets */
-static __be16 arfs_get_src_port(const struct sk_buff *skb)
-{
-	char *transport_header;
-
-	transport_header = skb_transport_header(skb);
-	if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
-		return ((struct tcphdr *)transport_header)->source;
-	return ((struct udphdr *)transport_header)->source;
-}
-
 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
 					 struct arfs_table *arfs_t,
-					 const struct sk_buff *skb,
+					 const struct flow_keys *fk,
 					 u16 rxq, u32 flow_id)
 {
 	struct arfs_rule *rule;
@@ -638,19 +613,19 @@
 	INIT_WORK(&rule->arfs_work, arfs_handle_work);
 
 	tuple = &rule->tuple;
-	tuple->etype = skb->protocol;
+	tuple->etype = fk->basic.n_proto;
+	tuple->ip_proto = fk->basic.ip_proto;
 	if (tuple->etype == htons(ETH_P_IP)) {
-		tuple->src_ipv4 = ip_hdr(skb)->saddr;
-		tuple->dst_ipv4 = ip_hdr(skb)->daddr;
+		tuple->src_ipv4 = fk->addrs.v4addrs.src;
+		tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
 	} else {
-		memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
+		memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
 		       sizeof(struct in6_addr));
-		memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
+		memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
 		       sizeof(struct in6_addr));
 	}
-	tuple->ip_proto = arfs_get_ip_proto(skb);
-	tuple->src_port = arfs_get_src_port(skb);
-	tuple->dst_port = arfs_get_dst_port(skb);
+	tuple->src_port = fk->ports.src;
+	tuple->dst_port = fk->ports.dst;
 
 	rule->flow_id = flow_id;
 	rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
@@ -661,37 +636,33 @@
 	return rule;
 }
 
-static bool arfs_cmp_ips(struct arfs_tuple *tuple,
-			 const struct sk_buff *skb)
+static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
 {
-	if (tuple->etype == htons(ETH_P_IP) &&
-	    tuple->src_ipv4 == ip_hdr(skb)->saddr &&
-	    tuple->dst_ipv4 == ip_hdr(skb)->daddr)
-		return true;
-	if (tuple->etype == htons(ETH_P_IPV6) &&
-	    (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
-		     sizeof(struct in6_addr))) &&
-	    (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
-		     sizeof(struct in6_addr))))
-		return true;
+	if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
+		return false;
+	if (tuple->etype != fk->basic.n_proto)
+		return false;
+	if (tuple->etype == htons(ETH_P_IP))
+		return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
+		       tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
+	if (tuple->etype == htons(ETH_P_IPV6))
+		return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+			       sizeof(struct in6_addr)) &&
+		       !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+			       sizeof(struct in6_addr));
 	return false;
 }
 
 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
-					const struct sk_buff *skb)
+					const struct flow_keys *fk)
 {
 	struct arfs_rule *arfs_rule;
 	struct hlist_head *head;
-	__be16 src_port = arfs_get_src_port(skb);
-	__be16 dst_port = arfs_get_dst_port(skb);
 
-	head = arfs_hash_bucket(arfs_t, src_port, dst_port);
+	head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
 	hlist_for_each_entry(arfs_rule, head, hlist) {
-		if (arfs_rule->tuple.src_port == src_port &&
-		    arfs_rule->tuple.dst_port == dst_port &&
-		    arfs_cmp_ips(&arfs_rule->tuple, skb)) {
+		if (arfs_cmp(&arfs_rule->tuple, fk))
 			return arfs_rule;
-		}
 	}
 
 	return NULL;
@@ -704,20 +675,24 @@
 	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
 	struct arfs_table *arfs_t;
 	struct arfs_rule *arfs_rule;
+	struct flow_keys fk;
 
-	if (skb->protocol != htons(ETH_P_IP) &&
-	    skb->protocol != htons(ETH_P_IPV6))
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return -EPROTONOSUPPORT;
+
+	if (fk.basic.n_proto != htons(ETH_P_IP) &&
+	    fk.basic.n_proto != htons(ETH_P_IPV6))
 		return -EPROTONOSUPPORT;
 
 	if (skb->encapsulation)
 		return -EPROTONOSUPPORT;
 
-	arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol);
+	arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
 	if (!arfs_t)
 		return -EPROTONOSUPPORT;
 
 	spin_lock_bh(&arfs->arfs_lock);
-	arfs_rule = arfs_find_rule(arfs_t, skb);
+	arfs_rule = arfs_find_rule(arfs_t, &fk);
 	if (arfs_rule) {
 		if (arfs_rule->rxq == rxq_index) {
 			spin_unlock_bh(&arfs->arfs_lock);
@@ -725,8 +700,7 @@
 		}
 		arfs_rule->rxq = rxq_index;
 	} else {
-		arfs_rule = arfs_alloc_rule(priv, arfs_t, skb,
-					    rxq_index, flow_id);
+		arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
 		if (!arfs_rule) {
 			spin_unlock_bh(&arfs->arfs_lock);
 			return -ENOMEM;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index db3278c..f7890e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c

@@ -45,7 +45,9 @@
 	if (err)
 		return err;
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 }
@@ -53,8 +55,10 @@
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir)
 {
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	mlx5_core_destroy_tir(mdev, tir->tirn);
 	list_del(&tir->list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
@@ -114,6 +118,7 @@
 	}
 
 	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+	mutex_init(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 
@@ -141,22 +146,25 @@
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_tir *tir;
-	int err  = -ENOMEM;
+	int err  = 0;
 	u32 tirn = 0;
 	int inlen;
 	void *in;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	in = kvzalloc(inlen, GFP_KERNEL);
-	if (!in)
+	if (!in) {
+		err = -ENOMEM;
 		goto out;
+	}
 
 	if (enable_uc_lb)
 		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
 		tirn = tir->tirn;
 		err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
@@ -168,18 +176,7 @@
 	kvfree(in);
 	if (err)
 		netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return err;
 }
-
-u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
-{
-	u8 min_inline_mode;
-
-	mlx5_query_min_inline(mdev, &min_inline_mode);
-	if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
-	    !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
-		min_inline_mode = MLX5_INLINE_MODE_L2;
-
-	return min_inline_mode;
-}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 722998d..01f2918 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

@@ -680,7 +680,7 @@
 
 	memset(perm_addr, 0xff, MAX_ADDR_LEN);
 
-	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+	mlx5_query_mac_address(priv->mdev, perm_addr);
 }
 
 static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
@@ -1101,7 +1101,7 @@
 static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
 						  struct mlx5e_params *params)
 {
-	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev);
+	mlx5_query_min_inline(priv->mdev, &params->tx_min_inline_mode);
 	if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
 	    params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
 		params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
@@ -1126,9 +1126,7 @@
 	    priv->channels.params.tx_min_inline_mode)
 		goto out;
 
-	if (mlx5e_open_channels(priv, &new_channels))
-		goto out;
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 
 out:
 	mutex_unlock(&priv->state_lock);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index d67adf7..ca9cfbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c

@@ -30,22 +30,22 @@
  * SOFTWARE.
  */
 
-#include <linux/net_dim.h>
+#include <linux/dim.h>
 #include "en.h"
 
 static void
-mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
 			struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
 {
 	mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
-	dim->state = NET_DIM_START_MEASURE;
+	dim->state = DIM_START_MEASURE;
 }
 
 void mlx5e_rx_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
 	mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
@@ -53,9 +53,9 @@
 
 void mlx5e_tx_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct dim *dim = container_of(work, struct dim, work);
 	struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
-	struct net_dim_cq_moder cur_moder =
+	struct dim_cq_moder cur_moder =
 		net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
 
 	mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 98dd3e0..9560126 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

@@ -32,6 +32,7 @@
 
 #include "en.h"
 #include "en/port.h"
+#include "en/xsk/umem.h"
 #include "lib/clock.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -46,7 +47,7 @@
 		 "%d.%d.%04d (%.16s)",
 		 fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
 		 mdev->board_id);
-	strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+	strlcpy(drvinfo->bus_info, dev_name(mdev->device),
 		sizeof(drvinfo->bus_info));
 }
 
@@ -63,78 +64,158 @@
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
 };
 
-static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
+static
+struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER];
+static
+struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER];
 
-#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...)                       \
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...)                  \
 	({                                                              \
 		struct ptys2ethtool_config *cfg;                        \
 		const unsigned int modes[] = { __VA_ARGS__ };           \
-		unsigned int i;                                         \
-		cfg = &ptys2ethtool_table[reg_];                        \
+		unsigned int i, bit, idx;                               \
+		cfg = &ptys2##table##_ethtool_table[reg_];		\
 		bitmap_zero(cfg->supported,                             \
 			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
 		bitmap_zero(cfg->advertised,                            \
 			    __ETHTOOL_LINK_MODE_MASK_NBITS);            \
 		for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) {             \
-			__set_bit(modes[i], cfg->supported);            \
-			__set_bit(modes[i], cfg->advertised);           \
+			bit = modes[i] % 64;                            \
+			idx = modes[i] / 64;                            \
+			__set_bit(bit, &cfg->supported[idx]);           \
+			__set_bit(bit, &cfg->advertised[idx]);          \
 		}                                                       \
 	})
 
 void mlx5e_build_ptys2ethtool_map(void)
 {
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII,
+	memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table));
+	memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table));
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy,
 				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy,
 				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy,
 				       ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy,
 				       ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy,
 				       ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy,
 				       ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy,
 				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy,
 				       ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy,
 				       ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy,
 				       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy,
 				       ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy,
 				       ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy,
 				       ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
 				       ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
 				       ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
 				       ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy,
 				       ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy,
 				       ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy,
 				       ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
-	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2,
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy,
 				       ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext,
+				       ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext,
+				       ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+				       ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext,
+				       ETHTOOL_LINK_MODE_5000baseT_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext,
+				       ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+				       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+				       ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
+				       ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+				       ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+				       ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+				       ETHTOOL_LINK_MODE_10000baseER_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext,
+				       ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext,
+				       ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+				       ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+				       ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2,
+				       ext,
+				       ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext,
+				       ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+				       ETHTOOL_LINK_MODE_50000baseDR_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext,
+				       ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext,
+				       ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT,
+				       ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT);
+	MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext,
+				       ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+				       ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
 }
 
+static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
+					struct ptys2ethtool_config **arr,
+					u32 *size)
+{
+	bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+	*arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+	*size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+		      ARRAY_SIZE(ptys2legacy_ethtool_table);
+}
+
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+struct pflag_desc {
+	char name[ETH_GSTRING_LEN];
+	mlx5e_pflag_handler handler;
+};
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
+
 int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 {
 	int i, num_stats = 0;
@@ -145,7 +226,7 @@
 			num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
 		return num_stats;
 	case ETH_SS_PRIV_FLAGS:
-		return ARRAY_SIZE(mlx5e_priv_flags);
+		return MLX5E_NUM_PFLAGS;
 	case ETH_SS_TEST:
 		return mlx5e_self_test_num(priv);
 	/* fallthrough */
@@ -175,8 +256,9 @@
 
 	switch (stringset) {
 	case ETH_SS_PRIV_FLAGS:
-		for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
-			strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+		for (i = 0; i < MLX5E_NUM_PFLAGS; i++)
+			strcpy(data + i * ETH_GSTRING_LEN,
+			       mlx5e_priv_flags[i].name);
 		break;
 
 	case ETH_SS_TEST:
@@ -288,11 +370,7 @@
 		goto unlock;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
-		goto unlock;
-
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 
 unlock:
 	mutex_unlock(&priv->state_lock);
@@ -311,8 +389,17 @@
 void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
 				struct ethtool_channels *ch)
 {
-	ch->max_combined   = priv->profile->max_nch(priv->mdev);
+	mutex_lock(&priv->state_lock);
+
+	ch->max_combined   = priv->max_nch;
 	ch->combined_count = priv->channels.params.num_channels;
+	if (priv->xsk.refcnt) {
+		/* The upper half are XSK queues. */
+		ch->max_combined *= 2;
+		ch->combined_count *= 2;
+	}
+
+	mutex_unlock(&priv->state_lock);
 }
 
 static void mlx5e_get_channels(struct net_device *dev,
@@ -326,6 +413,7 @@
 int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 			       struct ethtool_channels *ch)
 {
+	struct mlx5e_params *cur_params = &priv->channels.params;
 	unsigned int count = ch->combined_count;
 	struct mlx5e_channels new_channels = {};
 	bool arfs_enabled;
@@ -337,39 +425,49 @@
 		return -EINVAL;
 	}
 
-	if (priv->channels.params.num_channels == count)
+	if (cur_params->num_channels == count)
 		return 0;
 
 	mutex_lock(&priv->state_lock);
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.num_channels = count;
-	if (!netif_is_rxfh_configured(priv->netdev))
-		mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
-					      MLX5E_INDIR_RQT_SIZE, count);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
+	/* Don't allow changing the number of channels if there is an active
+	 * XSK, because the numeration of the XSK and regular RQs will change.
+	 */
+	if (priv->xsk.refcnt) {
+		err = -EINVAL;
+		netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
+			   __func__);
 		goto out;
 	}
 
-	/* Create fresh channels with new parameters */
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
+	new_channels.params = priv->channels.params;
+	new_channels.params.num_channels = count;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		*cur_params = new_channels.params;
+		if (!netif_is_rxfh_configured(priv->netdev))
+			mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
+						      MLX5E_INDIR_RQT_SIZE, count);
 		goto out;
+	}
 
 	arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
 	if (arfs_enabled)
 		mlx5e_arfs_disable(priv);
 
+	if (!netif_is_rxfh_configured(priv->netdev))
+		mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
+					      MLX5E_INDIR_RQT_SIZE, count);
+
 	/* Switch to new channels, set new parameters and close old ones */
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 
 	if (arfs_enabled) {
-		err = mlx5e_arfs_enable(priv);
-		if (err)
+		int err2 = mlx5e_arfs_enable(priv);
+
+		if (err2)
 			netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
-				   __func__, err);
+				   __func__, err2);
 	}
 
 out:
@@ -389,7 +487,7 @@
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
-	struct net_dim_cq_moder *rx_moder, *tx_moder;
+	struct dim_cq_moder *rx_moder, *tx_moder;
 
 	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
 		return -EOPNOTSUPP;
@@ -444,7 +542,7 @@
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
-	struct net_dim_cq_moder *rx_moder, *tx_moder;
+	struct dim_cq_moder *rx_moder, *tx_moder;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	int err = 0;
@@ -495,12 +593,7 @@
 		goto out;
 	}
 
-	/* open fresh channels with new coal parameters */
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
-		goto out;
-
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 
 out:
 	mutex_unlock(&priv->state_lock);
@@ -515,35 +608,109 @@
 	return mlx5e_ethtool_set_coalesce(priv, coal);
 }
 
-static void ptys2ethtool_supported_link(unsigned long *supported_modes,
+static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
+					unsigned long *supported_modes,
 					u32 eth_proto_cap)
 {
 	unsigned long proto_cap = eth_proto_cap;
+	struct ptys2ethtool_config *table;
+	u32 max_size;
 	int proto;
 
-	for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+	mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size);
+	for_each_set_bit(proto, &proto_cap, max_size)
 		bitmap_or(supported_modes, supported_modes,
-			  ptys2ethtool_table[proto].supported,
+			  table[proto].supported,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
 static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
-				    u32 eth_proto_cap)
+				    u32 eth_proto_cap, bool ext)
 {
 	unsigned long proto_cap = eth_proto_cap;
+	struct ptys2ethtool_config *table;
+	u32 max_size;
 	int proto;
 
-	for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+	table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+	max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+			 ARRAY_SIZE(ptys2legacy_ethtool_table);
+
+	for_each_set_bit(proto, &proto_cap, max_size)
 		bitmap_or(advertising_modes, advertising_modes,
-			  ptys2ethtool_table[proto].advertised,
+			  table[proto].advertised,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static const u32 pplm_fec_2_ethtool[] = {
+	[MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
+	[MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
+	[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+};
+
+static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
+{
+	int mode = 0;
+
+	if (!fec_mode)
+		return ETHTOOL_FEC_AUTO;
+
+	mode = find_first_bit(&fec_mode, size);
+
+	if (mode < ARRAY_SIZE(pplm_fec_2_ethtool))
+		return pplm_fec_2_ethtool[mode];
+
+	return 0;
+}
+
+/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */
+static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code)
+{
+	u32 offset;
+
+	offset = find_first_bit(&ethtool_fec_code, sizeof(u32));
+	offset -= ETHTOOL_FEC_OFF_BIT;
+	offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT;
+
+	return offset;
+}
+
+static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
+					struct ethtool_link_ksettings *link_ksettings)
+{
+	u_long fec_caps = 0;
+	u32 active_fec = 0;
+	u32 offset;
+	u32 bitn;
+	int err;
+
+	err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps);
+	if (err)
+		return (err == -EOPNOTSUPP) ? 0 : err;
+
+	err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
+	if (err)
+		return err;
+
+	for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) {
+		u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn];
+
+		offset = ethtool_fec2ethtool_caps(ethtool_bitmask);
+		__set_bit(offset, link_ksettings->link_modes.supported);
+	}
+
+	active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE);
+	offset = ethtool_fec2ethtool_caps(active_fec);
+	__set_bit(offset, link_ksettings->link_modes.advertising);
+
+	return 0;
+}
+
 static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
 						   u32 eth_proto_cap,
-						   u8 connector_type)
+						   u8 connector_type, bool ext)
 {
-	if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+	if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
 		if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
 				   | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
 				   | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
@@ -618,16 +785,17 @@
 }
 
 static void get_speed_duplex(struct net_device *netdev,
-			     u32 eth_proto_oper,
+			     u32 eth_proto_oper, bool force_legacy,
 			     struct ethtool_link_ksettings *link_ksettings)
 {
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 	u32 speed = SPEED_UNKNOWN;
 	u8 duplex = DUPLEX_UNKNOWN;
 
 	if (!netif_carrier_ok(netdev))
 		goto out;
 
-	speed = mlx5e_port_ptys2speed(eth_proto_oper);
+	speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
 	if (!speed) {
 		speed = SPEED_UNKNOWN;
 		goto out;
@@ -640,22 +808,22 @@
 	link_ksettings->base.duplex = duplex;
 }
 
-static void get_supported(u32 eth_proto_cap,
+static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
 			  struct ethtool_link_ksettings *link_ksettings)
 {
 	unsigned long *supported = link_ksettings->link_modes.supported;
+	ptys2ethtool_supported_link(mdev, supported, eth_proto_cap);
 
-	ptys2ethtool_supported_link(supported, eth_proto_cap);
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
 }
 
-static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
-			    u8 rx_pause,
-			    struct ethtool_link_ksettings *link_ksettings)
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause,
+			    struct ethtool_link_ksettings *link_ksettings,
+			    bool ext)
 {
 	unsigned long *advertising = link_ksettings->link_modes.advertising;
+	ptys2ethtool_adver_link(advertising, eth_proto_cap, ext);
 
-	ptys2ethtool_adver_link(advertising, eth_proto_cap);
 	if (rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
 	if (tx_pause ^ rx_pause)
@@ -674,9 +842,9 @@
 		[MLX5E_PORT_OTHER]              = PORT_OTHER,
 	};
 
-static u8 get_connector_port(u32 eth_proto, u8 connector_type)
+static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext)
 {
-	if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+	if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
 		return ptys2connector_type[connector_type];
 
 	if (eth_proto &
@@ -705,18 +873,18 @@
 	return PORT_OTHER;
 }
 
-static void get_lp_advertising(u32 eth_proto_lp,
+static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
 			       struct ethtool_link_ksettings *link_ksettings)
 {
 	unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+	bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 
-	ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
+	ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
 }
 
-static int mlx5e_get_link_ksettings(struct net_device *netdev,
-				    struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+				     struct ethtool_link_ksettings *link_ksettings)
 {
-	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
 	u32 rx_pause = 0;
@@ -728,39 +896,59 @@
 	u8 an_disable_admin;
 	u8 an_status;
 	u8 connector_type;
+	bool admin_ext;
+	bool ext;
 	int err;
 
 	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
 	if (err) {
-		netdev_err(netdev, "%s: query port ptys failed: %d\n",
+		netdev_err(priv->netdev, "%s: query port ptys failed: %d\n",
 			   __func__, err);
-		goto err_query_ptys;
+		goto err_query_regs;
+	}
+	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	eth_proto_cap    = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+					      eth_proto_capability);
+	eth_proto_admin  = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+					      eth_proto_admin);
+	/* Fields: eth_proto_admin and ext_eth_proto_admin  are
+	 * mutually exclusive. Hence try reading legacy advertising
+	 * when extended advertising is zero.
+	 * admin_ext indicates which proto_admin (ext vs. legacy)
+	 * should be read and interpreted
+	 */
+	admin_ext = ext;
+	if (ext && !eth_proto_admin) {
+		eth_proto_admin  = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+						      eth_proto_admin);
+		admin_ext = false;
 	}
 
-	eth_proto_cap    = MLX5_GET(ptys_reg, out, eth_proto_capability);
-	eth_proto_admin  = MLX5_GET(ptys_reg, out, eth_proto_admin);
-	eth_proto_oper   = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	eth_proto_lp     = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
-	an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
-	an_status        = MLX5_GET(ptys_reg, out, an_status);
-	connector_type   = MLX5_GET(ptys_reg, out, connector_type);
+	eth_proto_oper   = MLX5_GET_ETH_PROTO(ptys_reg, out, admin_ext,
+					      eth_proto_oper);
+	eth_proto_lp	    = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+	an_disable_admin    = MLX5_GET(ptys_reg, out, an_disable_admin);
+	an_status	    = MLX5_GET(ptys_reg, out, an_status);
+	connector_type	    = MLX5_GET(ptys_reg, out, connector_type);
 
 	mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
 
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 
-	get_supported(eth_proto_cap, link_ksettings);
-	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
-	get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+	get_supported(mdev, eth_proto_cap, link_ksettings);
+	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
+			admin_ext);
+	get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext,
+			 link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
 
 	link_ksettings->base.port = get_connector_port(eth_proto_oper,
-						       connector_type);
+						       connector_type, ext);
 	ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
-					       connector_type);
-	get_lp_advertising(eth_proto_lp, link_ksettings);
+					       connector_type, ext);
+	get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
 
 	if (an_status == MLX5_AN_COMPLETE)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -770,20 +958,38 @@
 							  AUTONEG_ENABLE;
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
 					     Autoneg);
+
+	err = get_fec_supported_advertised(mdev, link_ksettings);
+	if (err) {
+		netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n",
+			   __func__, err);
+		err = 0; /* don't fail caps query because of FEC error */
+	}
+
 	if (!an_disable_admin)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
 						     advertising, Autoneg);
 
-err_query_ptys:
+err_query_regs:
 	return err;
 }
 
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *link_ksettings)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
 static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 {
 	u32 i, ptys_modes = 0;
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (bitmap_intersects(ptys2ethtool_table[i].advertised,
+		if (*ptys2legacy_ethtool_table[i].advertised == 0)
+			continue;
+		if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised,
 				      link_modes,
 				      __ETHTOOL_LINK_MODE_MASK_NBITS))
 			ptys_modes |= MLX5E_PROT_MASK(i);
@@ -792,126 +998,179 @@
 	return ptys_modes;
 }
 
-static int mlx5e_set_link_ksettings(struct net_device *netdev,
-				    const struct ethtool_link_ksettings *link_ksettings)
+static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
 {
-	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	u32 i, ptys_modes = 0;
+	unsigned long modes[2];
+
+	for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) {
+		if (*ptys2ext_ethtool_table[i].advertised == 0)
+			continue;
+		memset(modes, 0, sizeof(modes));
+		bitmap_and(modes, ptys2ext_ethtool_table[i].advertised,
+			   link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+		if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] &&
+		    modes[1] == ptys2ext_ethtool_table[i].advertised[1])
+			ptys_modes |= MLX5E_PROT_MASK(i);
+	}
+	return ptys_modes;
+}
+
+static bool ext_link_mode_requested(const unsigned long *adver)
+{
+#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
+	int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,};
+
+	bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
+	return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static bool ext_speed_requested(u32 speed)
+{
+#define MLX5E_MAX_PTYS_LEGACY_SPEED 100000
+	return !!(speed > MLX5E_MAX_PTYS_LEGACY_SPEED);
+}
+
+static bool ext_requested(u8 autoneg, const unsigned long *adver, u32 speed)
+{
+	bool ext_link_mode = ext_link_mode_requested(adver);
+	bool ext_speed = ext_speed_requested(speed);
+
+	return  autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_speed;
+}
+
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+				     const struct ethtool_link_ksettings *link_ksettings)
+{
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u32 eth_proto_cap, eth_proto_admin;
+	struct mlx5e_port_eth_proto eproto;
+	const unsigned long *adver;
 	bool an_changes = false;
 	u8 an_disable_admin;
+	bool ext_supported;
 	u8 an_disable_cap;
 	bool an_disable;
 	u32 link_modes;
 	u8 an_status;
+	u8 autoneg;
 	u32 speed;
+	bool ext;
 	int err;
 
+	u32 (*ethtool2ptys_adver_func)(const unsigned long *adver);
+
+	adver = link_ksettings->link_modes.advertising;
+	autoneg = link_ksettings->base.autoneg;
 	speed = link_ksettings->base.speed;
 
-	link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
-		mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
-		mlx5e_port_speed2linkmodes(speed);
+	ext = ext_requested(autoneg, adver, speed),
+	ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	if (!ext_supported && ext)
+		return -EOPNOTSUPP;
 
-	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+	ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link :
+				  mlx5e_ethtool2ptys_adver_link;
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err) {
-		netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+		netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
 			   __func__, err);
 		goto out;
 	}
+	link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
+		mlx5e_port_speed2linkmodes(mdev, speed, !ext);
 
-	link_modes = link_modes & eth_proto_cap;
-	if (!link_modes) {
-		netdev_err(netdev, "%s: Not supported link mode(s) requested",
+	if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+	    autoneg != AUTONEG_ENABLE) {
+		netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
 			   __func__);
 		err = -EINVAL;
 		goto out;
 	}
 
-	err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
-	if (err) {
-		netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
-			   __func__, err);
+	link_modes = link_modes & eproto.cap;
+	if (!link_modes) {
+		netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
+			   __func__);
+		err = -EINVAL;
 		goto out;
 	}
 
-	mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
-				&an_disable_cap, &an_disable_admin);
+	mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
+				    &an_disable_admin);
 
-	an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
+	an_disable = autoneg == AUTONEG_DISABLE;
 	an_changes = ((!an_disable && an_disable_admin) ||
 		      (an_disable && !an_disable_admin));
 
-	if (!an_changes && link_modes == eth_proto_admin)
+	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext);
 	mlx5_toggle_port_link(mdev);
 
 out:
 	return err;
 }
 
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *link_ksettings)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
+{
+	return sizeof(priv->rss_params.toeplitz_hash_key);
+}
+
 static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	return sizeof(priv->channels.params.toeplitz_hash_key);
+	return mlx5e_ethtool_get_rxfh_key_size(priv);
+}
+
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv)
+{
+	return MLX5E_INDIR_RQT_SIZE;
 }
 
 static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
 {
-	return MLX5E_INDIR_RQT_SIZE;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_rxfh_indir_size(priv);
 }
 
 static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 			  u8 *hfunc)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rss_params *rss = &priv->rss_params;
 
 	if (indir)
-		memcpy(indir, priv->channels.params.indirection_rqt,
-		       sizeof(priv->channels.params.indirection_rqt));
+		memcpy(indir, rss->indirection_rqt,
+		       sizeof(rss->indirection_rqt));
 
 	if (key)
-		memcpy(key, priv->channels.params.toeplitz_hash_key,
-		       sizeof(priv->channels.params.toeplitz_hash_key));
+		memcpy(key, rss->toeplitz_hash_key,
+		       sizeof(rss->toeplitz_hash_key));
 
 	if (hfunc)
-		*hfunc = priv->channels.params.rss_hfunc;
+		*hfunc = rss->hfunc;
 
 	return 0;
 }
 
-static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
-{
-	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
-	struct mlx5_core_dev *mdev = priv->mdev;
-	int ctxlen = MLX5_ST_SZ_BYTES(tirc);
-	int tt;
-
-	MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
-	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		memset(tirc, 0, ctxlen);
-		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
-		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
-	}
-
-	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
-		return;
-
-	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		memset(tirc, 0, ctxlen);
-		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
-		mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
-	}
-}
-
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 			  const u8 *key, const u8 hfunc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_rss_params *rss = &priv->rss_params;
 	int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	bool hash_changed = false;
 	void *in;
@@ -927,15 +1186,14 @@
 
 	mutex_lock(&priv->state_lock);
 
-	if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
-	    hfunc != priv->channels.params.rss_hfunc) {
-		priv->channels.params.rss_hfunc = hfunc;
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
+		rss->hfunc = hfunc;
 		hash_changed = true;
 	}
 
 	if (indir) {
-		memcpy(priv->channels.params.indirection_rqt, indir,
-		       sizeof(priv->channels.params.indirection_rqt));
+		memcpy(rss->indirection_rqt, indir,
+		       sizeof(rss->indirection_rqt));
 
 		if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 			u32 rqtn = priv->indir_rqt.rqtn;
@@ -943,7 +1201,7 @@
 				.is_rss = true,
 				{
 					.rss = {
-						.hfunc = priv->channels.params.rss_hfunc,
+						.hfunc = rss->hfunc,
 						.channels  = &priv->channels,
 					},
 				},
@@ -954,10 +1212,9 @@
 	}
 
 	if (key) {
-		memcpy(priv->channels.params.toeplitz_hash_key, key,
-		       sizeof(priv->channels.params.toeplitz_hash_key));
-		hash_changed = hash_changed ||
-			       priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+		memcpy(rss->toeplitz_hash_key, key,
+		       sizeof(rss->toeplitz_hash_key));
+		hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP;
 	}
 
 	if (hash_changed)
@@ -1061,28 +1318,37 @@
 	return err;
 }
 
-static void mlx5e_get_pauseparam(struct net_device *netdev,
-				 struct ethtool_pauseparam *pauseparam)
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+				  struct ethtool_pauseparam *pauseparam)
 {
-	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
 	err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
 				    &pauseparam->tx_pause);
 	if (err) {
-		netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+		netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
 			   __func__, err);
 	}
 }
 
-static int mlx5e_set_pauseparam(struct net_device *netdev,
-				struct ethtool_pauseparam *pauseparam)
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pauseparam)
 {
-	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+				 struct ethtool_pauseparam *pauseparam)
+{
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
+	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+		return -EOPNOTSUPP;
+
 	if (pauseparam->autoneg)
 		return -EINVAL;
 
@@ -1090,22 +1356,25 @@
 				  pauseparam->rx_pause ? 1 : 0,
 				  pauseparam->tx_pause ? 1 : 0);
 	if (err) {
-		netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+		netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
 			   __func__, err);
 	}
 
 	return err;
 }
 
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pauseparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 			      struct ethtool_ts_info *info)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	int ret;
-
-	ret = ethtool_op_get_ts_info(priv->netdev, info);
-	if (ret)
-		return ret;
 
 	info->phc_index = mlx5_clock_get_ptp_index(mdev);
 
@@ -1113,9 +1382,9 @@
 	    info->phc_index == -1)
 		return 0;
 
-	info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE |
-				 SOF_TIMESTAMPING_RX_HARDWARE |
-				 SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
 			 BIT(HWTSTAMP_TX_ON);
@@ -1162,7 +1431,7 @@
 	return ret;
 }
 
-static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode)
+static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode)
 {
 	__u32 ret = 0;
 
@@ -1190,7 +1459,7 @@
 	return ret;
 }
 
-static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode)
+static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode)
 {
 	u8 ret = 0;
 
@@ -1236,7 +1505,7 @@
 	if (err)
 		return;
 
-	wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
+	wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
 }
 
 static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
@@ -1252,11 +1521,63 @@
 	if (wol->wolopts & ~wol_supported)
 		return -EINVAL;
 
-	mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts);
+	mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts);
 
 	return mlx5_set_port_wol(mdev, mlx5_wol_mode);
 }
 
+static int mlx5e_get_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 fec_configured = 0;
+	u32 fec_active = 0;
+	int err;
+
+	err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
+
+	if (err)
+		return err;
+
+	fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active,
+						sizeof(u32) * BITS_PER_BYTE);
+
+	if (!fecparam->active_fec)
+		return -EOPNOTSUPP;
+
+	fecparam->fec = pplm2ethtool_fec((u_long)fec_configured,
+					 sizeof(u8) * BITS_PER_BYTE);
+
+	return 0;
+}
+
+static int mlx5e_set_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 fec_policy = 0;
+	int mode;
+	int err;
+
+	for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
+		if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
+			continue;
+		fec_policy |= (1 << mode);
+		break;
+	}
+
+	err = mlx5e_set_fec_mode(mdev, fec_policy);
+
+	if (err)
+		return err;
+
+	mlx5_toggle_port_link(mdev);
+
+	return 0;
+}
+
 static u32 mlx5e_get_msglevel(struct net_device *dev)
 {
 	return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
@@ -1297,7 +1618,7 @@
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *dev = priv->mdev;
 	int size_read = 0;
-	u8 data[4];
+	u8 data[4] = {0};
 
 	size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
 	if (size_read < 2)
@@ -1307,22 +1628,22 @@
 	switch (data[0]) {
 	case MLX5_MODULE_ID_QSFP:
 		modinfo->type       = ETH_MODULE_SFF_8436;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		break;
 	case MLX5_MODULE_ID_QSFP_PLUS:
 	case MLX5_MODULE_ID_QSFP28:
 		/* data[1] = revision id */
 		if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) {
 			modinfo->type       = ETH_MODULE_SFF_8636;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
 		} else {
 			modinfo->type       = ETH_MODULE_SFF_8436;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		}
 		break;
 	case MLX5_MODULE_ID_SFP:
 		modinfo->type       = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
 		break;
 	default:
 		netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
@@ -1369,7 +1690,39 @@
 	return 0;
 }
 
-typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+			       struct ethtool_flash *flash)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct net_device *dev = priv->netdev;
+	const struct firmware *fw;
+	int err;
+
+	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&fw, flash->data, &dev->dev);
+	if (err)
+		return err;
+
+	dev_hold(dev);
+	rtnl_unlock();
+
+	err = mlx5_firmware_flash(mdev, fw, NULL);
+	release_firmware(fw);
+
+	rtnl_lock();
+	dev_put(dev);
+	return err;
+}
+
+static int mlx5e_flash_device(struct net_device *dev,
+			      struct ethtool_flash *flash)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	return mlx5e_ethtool_flash_device(priv, flash);
+}
 
 static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 				     bool is_rx_cq)
@@ -1379,7 +1732,6 @@
 	struct mlx5e_channels new_channels = {};
 	bool mode_changed;
 	u8 cq_period_mode, current_cq_period_mode;
-	int err = 0;
 
 	cq_period_mode = enable ?
 		MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
@@ -1407,12 +1759,7 @@
 		return 0;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
-		return err;
-
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
-	return 0;
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 }
 
 static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
@@ -1445,11 +1792,10 @@
 		return 0;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 	if (err)
 		return err;
 
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 	mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
 		  MLX5E_GET_PFLAG(&priv->channels.params,
 				  MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
@@ -1482,7 +1828,6 @@
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
-	int err;
 
 	if (enable) {
 		if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
@@ -1504,31 +1849,79 @@
 		return 0;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
-		return err;
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
 
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_channels *channels = &priv->channels;
+	struct mlx5e_channel *c;
+	int i;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+	    priv->channels.params.xdp_prog)
+		return 0;
+
+	for (i = 0; i < channels->num; i++) {
+		c = channels->c[i];
+		if (enable)
+			__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+		else
+			__clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+	}
+
 	return 0;
 }
 
-static int mlx5e_handle_pflag(struct net_device *netdev,
-			      u32 wanted_flags,
-			      enum mlx5e_priv_flag flag,
-			      mlx5e_pflag_handler pflag_handler)
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	bool enable = !!(wanted_flags & flag);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_channels new_channels = {};
+	int err;
+
+	if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+		return -EOPNOTSUPP;
+
+	new_channels.params = priv->channels.params;
+
+	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		priv->channels.params = new_channels.params;
+		return 0;
+	}
+
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+	return err;
+}
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
+	{ "rx_cqe_moder",        set_pflag_rx_cqe_based_moder },
+	{ "tx_cqe_moder",        set_pflag_tx_cqe_based_moder },
+	{ "rx_cqe_compress",     set_pflag_rx_cqe_compress },
+	{ "rx_striding_rq",      set_pflag_rx_striding_rq },
+	{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
+	{ "xdp_tx_mpwqe",        set_pflag_xdp_tx_mpwqe },
+};
+
+static int mlx5e_handle_pflag(struct net_device *netdev,
+			      u32 wanted_flags,
+			      enum mlx5e_priv_flag flag)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	bool enable = !!(wanted_flags & BIT(flag));
 	u32 changes = wanted_flags ^ priv->channels.params.pflags;
 	int err;
 
-	if (!(changes & flag))
+	if (!(changes & BIT(flag)))
 		return 0;
 
-	err = pflag_handler(netdev, enable);
+	err = mlx5e_priv_flags[flag].handler(netdev, enable);
 	if (err) {
-		netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
-			   enable ? "Enable" : "Disable", flag, err);
+		netdev_err(netdev, "%s private flag '%s' failed err %d\n",
+			   enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err);
 		return err;
 	}
 
@@ -1539,32 +1932,17 @@
 static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	enum mlx5e_priv_flag pflag;
 	int err;
 
 	mutex_lock(&priv->state_lock);
-	err = mlx5e_handle_pflag(netdev, pflags,
-				 MLX5E_PFLAG_RX_CQE_BASED_MODER,
-				 set_pflag_rx_cqe_based_moder);
-	if (err)
-		goto out;
 
-	err = mlx5e_handle_pflag(netdev, pflags,
-				 MLX5E_PFLAG_TX_CQE_BASED_MODER,
-				 set_pflag_tx_cqe_based_moder);
-	if (err)
-		goto out;
+	for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) {
+		err = mlx5e_handle_pflag(netdev, pflags, pflag);
+		if (err)
+			break;
+	}
 
-	err = mlx5e_handle_pflag(netdev, pflags,
-				 MLX5E_PFLAG_RX_CQE_COMPRESS,
-				 set_pflag_rx_cqe_compress);
-	if (err)
-		goto out;
-
-	err = mlx5e_handle_pflag(netdev, pflags,
-				 MLX5E_PFLAG_RX_STRIDING_RQ,
-				 set_pflag_rx_striding_rq);
-
-out:
 	mutex_unlock(&priv->state_lock);
 
 	/* Need to fix some features.. */
@@ -1580,38 +1958,26 @@
 	return priv->channels.params.pflags;
 }
 
-int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
-			       struct ethtool_flash *flash)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct net_device *dev = priv->netdev;
-	const struct firmware *fw;
-	int err;
-
-	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
-		return -EOPNOTSUPP;
-
-	err = request_firmware_direct(&fw, flash->data, &dev->dev);
-	if (err)
-		return err;
-
-	dev_hold(dev);
-	rtnl_unlock();
-
-	err = mlx5_firmware_flash(mdev, fw);
-	release_firmware(fw);
-
-	rtnl_lock();
-	dev_put(dev);
-	return err;
-}
-
-static int mlx5e_flash_device(struct net_device *dev,
-			      struct ethtool_flash *flash)
+static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	return mlx5e_ethtool_flash_device(priv, flash);
+	/* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part
+	 * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc,
+	 * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc
+	 * is compiled out via CONFIG_MLX5_EN_RXNFC=n.
+	 */
+	if (info->cmd == ETHTOOL_GRXRINGS) {
+		info->data = priv->channels.params.num_channels;
+		return 0;
+	}
+
+	return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs);
+}
+
+static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	return mlx5e_ethtool_set_rxnfc(dev, cmd);
 }
 
 const struct ethtool_ops mlx5e_ethtool_ops = {
@@ -1632,11 +1998,8 @@
 	.get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
 	.get_rxfh          = mlx5e_get_rxfh,
 	.set_rxfh          = mlx5e_set_rxfh,
-#ifdef CONFIG_MLX5_EN_RXNFC
 	.get_rxnfc         = mlx5e_get_rxnfc,
 	.set_rxnfc         = mlx5e_set_rxnfc,
-#endif
-	.flash_device      = mlx5e_flash_device,
 	.get_tunable       = mlx5e_get_tunable,
 	.set_tunable       = mlx5e_set_tunable,
 	.get_pauseparam    = mlx5e_get_pauseparam,
@@ -1647,9 +2010,12 @@
 	.set_wol	   = mlx5e_set_wol,
 	.get_module_info   = mlx5e_get_module_info,
 	.get_module_eeprom = mlx5e_get_module_eeprom,
+	.flash_device      = mlx5e_flash_device,
 	.get_priv_flags    = mlx5e_get_priv_flags,
 	.set_priv_flags    = mlx5e_set_priv_flags,
 	.self_test         = mlx5e_self_test,
 	.get_msglevel      = mlx5e_get_msglevel,
 	.set_msglevel      = mlx5e_set_msglevel,
+	.get_fecparam      = mlx5e_get_fecparam,
+	.set_fecparam      = mlx5e_set_fecparam,
 };

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 76cc10e..15b7f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c

@@ -747,8 +747,55 @@
 		.etype = ETH_P_IPV6,
 		.proto = IPPROTO_GRE,
 	},
+	[MLX5E_TT_IPV4_IPIP] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_IPIP,
+	},
+	[MLX5E_TT_IPV6_IPIP] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_IPIP,
+	},
+	[MLX5E_TT_IPV4_IPV6] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_IPV6,
+	},
+	[MLX5E_TT_IPV6_IPV6] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_IPV6,
+	},
+
 };
 
+bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type)
+{
+	switch (proto_type) {
+	case IPPROTO_GRE:
+		return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		return MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip);
+	default:
+		return false;
+	}
+}
+
+bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev)
+{
+	int tt;
+
+	for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
+		if (mlx5e_tunnel_proto_supported(mdev, ttc_tunnel_rules[tt].proto))
+			return true;
+	}
+	return false;
+}
+
+bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+	return (mlx5e_any_tunnel_proto_supported(mdev) &&
+		MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
+}
+
 static u8 mlx5e_etype_to_ipv(u16 ethertype)
 {
 	if (ethertype == ETH_P_IP)
@@ -838,6 +885,9 @@
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest.ft   = params->inner_ttc->ft.t;
 	for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
+		if (!mlx5e_tunnel_proto_supported(priv->mdev,
+						  ttc_tunnel_rules[tt].proto))
+			continue;
 		rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
 						    ttc_tunnel_rules[tt].etype,
 						    ttc_tunnel_rules[tt].proto);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 41cde92..acd946f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c

@@ -32,6 +32,8 @@
 
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "en/params.h"
+#include "en/xsk/umem.h"
 
 struct mlx5e_ethtool_rule {
 	struct list_head             list;
@@ -131,14 +133,14 @@
 	if (ip4src_m) {
 		memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &ip4src_v, sizeof(ip4src_v));
-		memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       0xff, sizeof(ip4src_m));
+		memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+		       &ip4src_m, sizeof(ip4src_m));
 	}
 	if (ip4dst_m) {
 		memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &ip4dst_v, sizeof(ip4dst_v));
-		memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       0xff, sizeof(ip4dst_m));
+		memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+		       &ip4dst_m, sizeof(ip4dst_m));
 	}
 
 	MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
@@ -173,11 +175,11 @@
 	__be16 pdst_m, __be16 pdst_v)
 {
 	if (psrc_m) {
-		MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff);
+		MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m));
 		MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v));
 	}
 	if (pdst_m) {
-		MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff);
+		MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m));
 		MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v));
 	}
 
@@ -190,12 +192,12 @@
 	__be16 pdst_m, __be16 pdst_v)
 {
 	if (psrc_m) {
-		MLX5E_FTE_SET(headers_c, udp_sport, 0xffff);
+		MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m));
 		MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
 	}
 
 	if (pdst_m) {
-		MLX5E_FTE_SET(headers_c, udp_dport, 0xffff);
+		MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m));
 		MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v));
 	}
 
@@ -397,10 +399,10 @@
 		      struct mlx5_flow_table *ft,
 		      struct ethtool_rx_flow_spec *fs)
 {
+	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
 	struct mlx5_flow_destination *dst = NULL;
-	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
 	int err = 0;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
@@ -414,6 +416,14 @@
 	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
 		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	} else {
+		struct mlx5e_params *params = &priv->channels.params;
+		enum mlx5e_rq_group group;
+		struct mlx5e_tir *tir;
+		u16 ix;
+
+		mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+		tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
+
 		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 		if (!dst) {
 			err = -ENOMEM;
@@ -421,12 +431,12 @@
 		}
 
 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+		dst->tir_num = tir[ix].tirn;
 		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+	spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
@@ -508,26 +518,14 @@
 	if (l4_mask->tos)
 		return -EINVAL;
 
-	if (l4_mask->ip4src) {
-		if (!all_ones(l4_mask->ip4src))
-			return -EINVAL;
+	if (l4_mask->ip4src)
 		ntuples++;
-	}
-	if (l4_mask->ip4dst) {
-		if (!all_ones(l4_mask->ip4dst))
-			return -EINVAL;
+	if (l4_mask->ip4dst)
 		ntuples++;
-	}
-	if (l4_mask->psrc) {
-		if (!all_ones(l4_mask->psrc))
-			return -EINVAL;
+	if (l4_mask->psrc)
 		ntuples++;
-	}
-	if (l4_mask->pdst) {
-		if (!all_ones(l4_mask->pdst))
-			return -EINVAL;
+	if (l4_mask->pdst)
 		ntuples++;
-	}
 	/* Flow is TCP/UDP */
 	return ++ntuples;
 }
@@ -540,16 +538,10 @@
 	if (l3_mask->l4_4_bytes || l3_mask->tos ||
 	    fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
 		return -EINVAL;
-	if (l3_mask->ip4src) {
-		if (!all_ones(l3_mask->ip4src))
-			return -EINVAL;
+	if (l3_mask->ip4src)
 		ntuples++;
-	}
-	if (l3_mask->ip4dst) {
-		if (!all_ones(l3_mask->ip4dst))
-			return -EINVAL;
+	if (l3_mask->ip4dst)
 		ntuples++;
-	}
 	if (l3_mask->proto)
 		ntuples++;
 	/* Flow is IPv4 */
@@ -588,16 +580,10 @@
 	if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst))
 		ntuples++;
 
-	if (l4_mask->psrc) {
-		if (!all_ones(l4_mask->psrc))
-			return -EINVAL;
+	if (l4_mask->psrc)
 		ntuples++;
-	}
-	if (l4_mask->pdst) {
-		if (!all_ones(l4_mask->pdst))
-			return -EINVAL;
+	if (l4_mask->pdst)
 		ntuples++;
-	}
 	/* Flow is TCP/UDP */
 	return ++ntuples;
 }
@@ -624,9 +610,10 @@
 	if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
 		return -ENOSPC;
 
-	if (fs->ring_cookie >= priv->channels.params.num_channels &&
-	    fs->ring_cookie != RX_CLS_FLOW_DISC)
-		return -EINVAL;
+	if (fs->ring_cookie != RX_CLS_FLOW_DISC)
+		if (!mlx5e_qid_validate(priv->profile, &priv->channels.params,
+					fs->ring_cookie))
+			return -EINVAL;
 
 	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
 	case ETHER_FLOW:
@@ -795,10 +782,116 @@
 	INIT_LIST_HEAD(&priv->fs.ethtool.rules);
 }
 
-int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
 {
-	int err = 0;
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+		return  MLX5E_TT_IPV4_TCP;
+	case TCP_V6_FLOW:
+		return MLX5E_TT_IPV6_TCP;
+	case UDP_V4_FLOW:
+		return MLX5E_TT_IPV4_UDP;
+	case UDP_V6_FLOW:
+		return MLX5E_TT_IPV6_UDP;
+	case AH_V4_FLOW:
+		return MLX5E_TT_IPV4_IPSEC_AH;
+	case AH_V6_FLOW:
+		return MLX5E_TT_IPV6_IPSEC_AH;
+	case ESP_V4_FLOW:
+		return MLX5E_TT_IPV4_IPSEC_ESP;
+	case ESP_V6_FLOW:
+		return MLX5E_TT_IPV6_IPSEC_ESP;
+	case IPV4_FLOW:
+		return MLX5E_TT_IPV4;
+	case IPV6_FLOW:
+		return MLX5E_TT_IPV6;
+	default:
+		return MLX5E_NUM_INDIR_TIRS;
+	}
+}
+
+static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
+				  struct ethtool_rxnfc *nfc)
+{
+	int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	enum mlx5e_traffic_types tt;
+	u8 rx_hash_field = 0;
+	void *in;
+
+	tt = flow_type_to_traffic_type(nfc->flow_type);
+	if (tt == MLX5E_NUM_INDIR_TIRS)
+		return -EINVAL;
+
+	/*  RSS does not support anything other than hashing to queues
+	 *  on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
+	 *  port.
+	 */
+	if (nfc->flow_type != TCP_V4_FLOW &&
+	    nfc->flow_type != TCP_V6_FLOW &&
+	    nfc->flow_type != UDP_V4_FLOW &&
+	    nfc->flow_type != UDP_V6_FLOW)
+		return -EOPNOTSUPP;
+
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EOPNOTSUPP;
+
+	if (nfc->data & RXH_IP_SRC)
+		rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP;
+	if (nfc->data & RXH_IP_DST)
+		rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP;
+	if (nfc->data & RXH_L4_B_0_1)
+		rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+	if (nfc->data & RXH_L4_B_2_3)
+		rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	mutex_lock(&priv->state_lock);
+
+	if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
+		goto out;
+
+	priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
+	mlx5e_modify_tirs_hash(priv, in, inlen);
+
+out:
+	mutex_unlock(&priv->state_lock);
+	kvfree(in);
+	return 0;
+}
+
+static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
+				  struct ethtool_rxnfc *nfc)
+{
+	enum mlx5e_traffic_types tt;
+	u32 hash_field = 0;
+
+	tt = flow_type_to_traffic_type(nfc->flow_type);
+	if (tt == MLX5E_NUM_INDIR_TIRS)
+		return -EINVAL;
+
+	hash_field = priv->rss_params.rx_hash_fields[tt];
+	nfc->data = 0;
+
+	if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
+		nfc->data |= RXH_IP_SRC;
+	if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP)
+		nfc->data |= RXH_IP_DST;
+	if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT)
+		nfc->data |= RXH_L4_B_0_1;
+	if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT)
+		nfc->data |= RXH_L4_B_2_3;
+
+	return 0;
+}
+
+int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	int err = 0;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
@@ -807,6 +900,9 @@
 	case ETHTOOL_SRXCLSRLDEL:
 		err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
 		break;
+	case ETHTOOL_SRXFH:
+		err = mlx5e_set_rss_hash_opt(priv, cmd);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -815,16 +911,13 @@
 	return err;
 }
 
-int mlx5e_get_rxnfc(struct net_device *dev,
-		    struct ethtool_rxnfc *info, u32 *rule_locs)
+int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
+			    struct ethtool_rxnfc *info, u32 *rule_locs)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int err = 0;
 
 	switch (info->cmd) {
-	case ETHTOOL_GRXRINGS:
-		info->data = priv->channels.params.num_channels;
-		break;
 	case ETHTOOL_GRXCLSRLCNT:
 		info->rule_cnt = priv->fs.ethtool.tot_num_rules;
 		break;
@@ -834,6 +927,9 @@
 	case ETHTOOL_GRXCLSRLALL:
 		err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
 		break;
+	case ETHTOOL_GRXFH:
+		err =  mlx5e_get_rss_hash_opt(priv, info);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index faa84b4..2a56e66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

@@ -34,14 +34,19 @@
 #include <net/pkt_cls.h>
 #include <linux/mlx5/fs.h>
 #include <net/vxlan.h>
+#include <net/geneve.h>
 #include <linux/bpf.h>
+#include <linux/if_bridge.h>
 #include <net/page_pool.h>
+#include <net/xdp_sock.h>
 #include "eswitch.h"
 #include "en.h"
+#include "en/txrx.h"
 #include "en_tc.h"
 #include "en_rep.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "en_accel/en_accel.h"
 #include "en_accel/tls.h"
 #include "accel/ipsec.h"
 #include "accel/tls.h"
@@ -49,34 +54,16 @@
 #include "lib/clock.h"
 #include "en/port.h"
 #include "en/xdp.h"
+#include "lib/eq.h"
+#include "en/monitor_stats.h"
+#include "en/health.h"
+#include "en/params.h"
+#include "en/xsk/umem.h"
+#include "en/xsk/setup.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"
 
-struct mlx5e_rq_param {
-	u32			rqc[MLX5_ST_SZ_DW(rqc)];
-	struct mlx5_wq_param	wq;
-	struct mlx5e_rq_frags_info frags_info;
-};
-
-struct mlx5e_sq_param {
-	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
-	struct mlx5_wq_param       wq;
-};
-
-struct mlx5e_cq_param {
-	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
-	struct mlx5_wq_param       wq;
-	u16                        eq_ix;
-	u8                         cq_period_mode;
-};
-
-struct mlx5e_channel_param {
-	struct mlx5e_rq_param      rq;
-	struct mlx5e_sq_param      sq;
-	struct mlx5e_sq_param      xdp_sq;
-	struct mlx5e_sq_param      icosq;
-	struct mlx5e_cq_param      rx_cq;
-	struct mlx5e_cq_param      tx_cq;
-	struct mlx5e_cq_param      icosq_cq;
-};
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
@@ -96,104 +83,9 @@
 	return true;
 }
 
-static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
-{
-	u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	u16 linear_rq_headroom = params->xdp_prog ?
-		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
-	u32 frag_sz;
-
-	linear_rq_headroom += NET_IP_ALIGN;
-
-	frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
-
-	if (params->xdp_prog && frag_sz < PAGE_SIZE)
-		frag_sz = PAGE_SIZE;
-
-	return frag_sz;
-}
-
-static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
-{
-	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
-
-	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
-}
-
-static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
-{
-	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
-
-	return !params->lro_en && frag_sz <= PAGE_SIZE;
-}
-
-static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
-					 struct mlx5e_params *params)
-{
-	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
-	s8 signed_log_num_strides_param;
-	u8 log_num_strides;
-
-	if (!mlx5e_rx_is_linear_skb(mdev, params))
-		return false;
-
-	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
-		return true;
-
-	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
-	signed_log_num_strides_param =
-		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
-
-	return signed_log_num_strides_param >= 0;
-}
-
-static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
-{
-	if (params->log_rq_mtu_frames <
-	    mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
-		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
-
-	return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params);
-}
-
-static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
-					  struct mlx5e_params *params)
-{
-	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
-		return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
-
-	return MLX5E_MPWQE_STRIDE_SZ(mdev,
-		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
-}
-
-static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
-					  struct mlx5e_params *params)
-{
-	return MLX5_MPWRQ_LOG_WQE_SZ -
-		mlx5e_mpwqe_get_log_stride_size(mdev, params);
-}
-
-static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
-				 struct mlx5e_params *params)
-{
-	u16 linear_rq_headroom = params->xdp_prog ?
-		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
-	bool is_linear_skb;
-
-	linear_rq_headroom += NET_IP_ALIGN;
-
-	is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
-		mlx5e_rx_is_linear_skb(mdev, params) :
-		mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
-
-	return is_linear_skb ? linear_rq_headroom : 0;
-}
-
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params)
 {
-	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 	params->log_rq_mtu_frames = is_kdump_kernel() ?
 		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
 		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
@@ -201,18 +93,31 @@
 	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
-		       BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
 		       BIT(params->log_rq_mtu_frames),
-		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params)
 {
-	return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		!MLX5_IPSEC_DEV(mdev) &&
-		!(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+		return false;
+
+	if (MLX5_IPSEC_DEV(mdev))
+		return false;
+
+	if (params->xdp_prog) {
+		/* XSK params are not considered here. If striding RQ is in use,
+		 * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
+		 * be called with the known XSK params.
+		 */
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+			return false;
+	}
+
+	return true;
 }
 
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -223,7 +128,7 @@
 		MLX5_WQ_TYPE_CYCLIC;
 }
 
-static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u8 port_state;
@@ -262,7 +167,7 @@
 			mlx5e_stats_grps[i].update_stats(priv);
 }
 
-static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
 {
 	int i;
 
@@ -272,10 +177,9 @@
 			mlx5e_stats_grps[i].update_stats(priv);
 }
 
-void mlx5e_update_stats_work(struct work_struct *work)
+static void mlx5e_update_stats_work(struct work_struct *work)
 {
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 					       update_stats_work);
 
 	mutex_lock(&priv->state_lock);
@@ -283,33 +187,46 @@
 	mutex_unlock(&priv->state_lock);
 }
 
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
-			      enum mlx5_dev_event event, unsigned long param)
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
 {
-	struct mlx5e_priv *priv = vpriv;
-
-	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
+	if (!priv->profile->update_stats)
 		return;
 
-	switch (event) {
-	case MLX5_DEV_EVENT_PORT_UP:
-	case MLX5_DEV_EVENT_PORT_DOWN:
+	if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
+		return;
+
+	queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+	struct mlx5_eqe   *eqe = data;
+
+	if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+		return NOTIFY_DONE;
+
+	switch (eqe->sub_type) {
+	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
 		queue_work(priv->wq, &priv->update_carrier_work);
 		break;
 	default:
-		break;
+		return NOTIFY_DONE;
 	}
+
+	return NOTIFY_OK;
 }
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+	priv->events_nb.notifier_call = async_event;
+	mlx5_notifier_register(priv->mdev, &priv->events_nb);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
-	synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+	mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
 }
 
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -331,26 +248,6 @@
 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
-static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
-{
-	switch (rq->wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
-	default:
-		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
-	}
-}
-
-static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
-{
-	switch (rq->wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return rq->mpwqe.wq.cur_sz;
-	default:
-		return rq->wqe.wq.cur_sz;
-	}
-}
-
 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 				     struct mlx5e_channel *c)
 {
@@ -415,12 +312,11 @@
 
 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
 {
-	struct mlx5e_wqe_frag_info next_frag, *prev;
+	struct mlx5e_wqe_frag_info next_frag = {};
+	struct mlx5e_wqe_frag_info *prev = NULL;
 	int i;
 
 	next_frag.di = &rq->wqe.di[0];
-	next_frag.offset = 0;
-	prev = NULL;
 
 	for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
 		struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
@@ -448,7 +344,6 @@
 }
 
 static int mlx5e_init_di_list(struct mlx5e_rq *rq,
-			      struct mlx5e_params *params,
 			      int wq_sz, int cpu)
 {
 	int len = wq_sz << rq->wqe.info.log_num_frags;
@@ -468,8 +363,17 @@
 	kvfree(rq->wqe.di);
 }
 
+static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
+{
+	struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
+
+	mlx5e_reporter_rq_cqe_err(rq);
+}
+
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct xdp_umem *umem,
 			  struct mlx5e_rq_param *rqp,
 			  struct mlx5e_rq *rq)
 {
@@ -477,6 +381,8 @@
 	struct mlx5_core_dev *mdev = c->mdev;
 	void *rqc = rqp->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+	u32 num_xsk_frames = 0;
+	u32 rq_xdp_ix;
 	u32 pool_size;
 	int wq_sz;
 	int err;
@@ -493,7 +399,14 @@
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
 	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	rq->stats   = &c->priv->channel_stats[c->ix].rq;
+	rq->xdpsq   = &c->rq_xdpsq;
+	rq->umem    = umem;
+
+	if (rq->umem)
+		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+	else
+		rq->stats = &c->priv->channel_stats[c->ix].rq;
+	INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
 	if (IS_ERR(rq->xdp_prog)) {
@@ -502,12 +415,16 @@
 		goto err_rq_wq_destroy;
 	}
 
-	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+	rq_xdp_ix = rq->ix;
+	if (xsk)
+		rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
+	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
 	if (err < 0)
 		goto err_rq_wq_destroy;
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+	rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
 	pool_size = 1 << params->log_rq_mtu_frames;
 
 	switch (rq->wq_type) {
@@ -521,7 +438,12 @@
 
 		wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-		pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+		if (xsk)
+			num_xsk_frames = wq_sz <<
+				mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+
+		pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
+			mlx5e_mpwqe_get_log_rq_size(params, xsk);
 
 		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -540,12 +462,15 @@
 			goto err_rq_wq_destroy;
 		}
 
-		rq->mpwqe.skb_from_cqe_mpwrq =
-			mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
-			mlx5e_skb_from_cqe_mpwrq_linear :
-			mlx5e_skb_from_cqe_mpwrq_nonlinear;
-		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
-		rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+		rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+			mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+			mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+				mlx5e_skb_from_cqe_mpwrq_linear :
+				mlx5e_skb_from_cqe_mpwrq_nonlinear;
+
+		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+		rq->mpwqe.num_strides =
+			BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
 
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
@@ -566,6 +491,9 @@
 
 		wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
 
+		if (xsk)
+			num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
+
 		rq->wqe.info = rqp->frags_info;
 		rq->wqe.frags =
 			kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
@@ -576,9 +504,10 @@
 			goto err_free;
 		}
 
-		err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu);
+		err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
 		if (err)
 			goto err_free;
+
 		rq->post_wqes = mlx5e_post_rx_wqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
@@ -594,33 +523,49 @@
 			goto err_free;
 		}
 
-		rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ?
-			mlx5e_skb_from_cqe_linear :
-			mlx5e_skb_from_cqe_nonlinear;
+		rq->wqe.skb_from_cqe = xsk ?
+			mlx5e_xsk_skb_from_cqe_linear :
+			mlx5e_rx_is_linear_skb(params, NULL) ?
+				mlx5e_skb_from_cqe_linear :
+				mlx5e_skb_from_cqe_nonlinear;
 		rq->mkey_be = c->mkey_be;
 	}
 
-	/* Create a page_pool and register it with rxq */
-	pp_params.order     = 0;
-	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
-	pp_params.pool_size = pool_size;
-	pp_params.nid       = cpu_to_node(c->cpu);
-	pp_params.dev       = c->pdev;
-	pp_params.dma_dir   = rq->buff.map_dir;
+	if (xsk) {
+		err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
+		if (unlikely(err)) {
+			mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
+				      num_xsk_frames);
+			goto err_free;
+		}
 
-	/* page_pool can be used even when there is no rq->xdp_prog,
-	 * given page_pool does not handle DMA mapping there is no
-	 * required state to clear. And page_pool gracefully handle
-	 * elevated refcnt.
-	 */
-	rq->page_pool = page_pool_create(&pp_params);
-	if (IS_ERR(rq->page_pool)) {
-		err = PTR_ERR(rq->page_pool);
-		rq->page_pool = NULL;
-		goto err_free;
+		rq->zca.free = mlx5e_xsk_zca_free;
+		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+						 MEM_TYPE_ZERO_COPY,
+						 &rq->zca);
+	} else {
+		/* Create a page_pool and register it with rxq */
+		pp_params.order     = 0;
+		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
+		pp_params.pool_size = pool_size;
+		pp_params.nid       = cpu_to_node(c->cpu);
+		pp_params.dev       = c->pdev;
+		pp_params.dma_dir   = rq->buff.map_dir;
+
+		/* page_pool can be used even when there is no rq->xdp_prog,
+		 * given page_pool does not handle DMA mapping there is no
+		 * required state to clear. And page_pool gracefully handle
+		 * elevated refcnt.
+		 */
+		rq->page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(rq->page_pool)) {
+			err = PTR_ERR(rq->page_pool);
+			rq->page_pool = NULL;
+			goto err_free;
+		}
+		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+						 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	}
-	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-					 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	if (err)
 		goto err_free;
 
@@ -660,11 +605,11 @@
 
 	switch (params->rx_cq_moderation.cq_period_mode) {
 	case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
-		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 		break;
 	case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
 	default:
-		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	}
 
 	rq->page_cache.head = 0;
@@ -687,8 +632,7 @@
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	if (rq->page_pool)
-		page_pool_destroy(rq->page_pool);
+	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -701,10 +645,6 @@
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 
-	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	if (rq->page_pool)
-		page_pool_destroy(rq->page_pool);
-
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
@@ -719,8 +659,15 @@
 	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
 		struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
 
-		mlx5e_page_release(rq, dma_info, false);
+		/* With AF_XDP, page_cache is not used, so this loop is not
+		 * entered, and it's safe to call mlx5e_page_release_dynamic
+		 * directly.
+		 */
+		mlx5e_page_release_dynamic(rq, dma_info, false);
 	}
+
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
@@ -762,8 +709,7 @@
 	return err;
 }
 
-static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
-				 int next_state)
+int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
 {
 	struct mlx5_core_dev *mdev = rq->mdev;
 
@@ -854,7 +800,7 @@
 	mlx5_core_destroy_rq(rq->mdev, rq->rqn);
 }
 
-static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 {
 	unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
 	struct mlx5e_channel *c = rq->channel;
@@ -871,20 +817,25 @@
 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
 
+	mlx5e_reporter_rx_timeout(rq);
 	return -ETIMEDOUT;
 }
 
-static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 {
 	__be16 wqe_ix_be;
 	u16 wqe_ix;
 
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+		u16 head = wq->head;
+		int i;
 
-		/* UMR WQE (if in progress) is always at wq->head */
-		if (rq->mpwqe.umr_in_progress)
-			rq->dealloc_wqe(rq, wq->head);
+		/* Outstanding UMR WQEs (in progress) start at wq->head */
+		for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
+			rq->dealloc_wqe(rq, head);
+			head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+		}
 
 		while (!mlx5_wq_ll_is_empty(wq)) {
 			struct mlx5e_rx_wqe_ll *wqe;
@@ -908,14 +859,13 @@
 
 }
 
-static int mlx5e_open_rq(struct mlx5e_channel *c,
-			 struct mlx5e_params *params,
-			 struct mlx5e_rq_param *param,
-			 struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+		  struct xdp_umem *umem, struct mlx5e_rq *rq)
 {
 	int err;
 
-	err = mlx5e_alloc_rq(c, params, param, rq);
+	err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
 	if (err)
 		return err;
 
@@ -927,9 +877,19 @@
 	if (err)
 		goto err_destroy_rq;
 
+	if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
+		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
+
 	if (params->rx_dim_enabled)
 		__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
+	/* We disable csum_complete when XDP is enabled since
+	 * XDP programs might manipulate packets which will render
+	 * skb->checksum incorrect.
+	 */
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
+		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+
 	return 0;
 
 err_destroy_rq:
@@ -940,29 +900,23 @@
 	return err;
 }
 
-static void mlx5e_activate_rq(struct mlx5e_rq *rq)
+void mlx5e_activate_rq(struct mlx5e_rq *rq)
 {
-	struct mlx5e_icosq *sq = &rq->channel->icosq;
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	struct mlx5e_tx_wqe *nopwqe;
-
-	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-	sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
-	nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+	mlx5e_trigger_irq(&rq->channel->icosq);
 }
 
-static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 {
 	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 }
 
-static void mlx5e_close_rq(struct mlx5e_rq *rq)
+void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	cancel_work_sync(&rq->dim.work);
+	cancel_work_sync(&rq->channel->icosq.recover_work);
+	cancel_work_sync(&rq->recover_work);
 	mlx5e_destroy_rq(rq);
 	mlx5e_free_rx_descs(rq);
 	mlx5e_free_rq(rq);
@@ -970,18 +924,42 @@
 
 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
 {
-	kvfree(sq->db.xdpi);
+	kvfree(sq->db.xdpi_fifo.xi);
+	kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
+{
+	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+	int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
+	int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+	xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
+				      GFP_KERNEL, numa);
+	if (!xdpi_fifo->xi)
+		return -ENOMEM;
+
+	xdpi_fifo->pc   = &sq->xdpi_fifo_pc;
+	xdpi_fifo->cc   = &sq->xdpi_fifo_cc;
+	xdpi_fifo->mask = dsegs_per_wq - 1;
+
+	return 0;
 }
 
 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+	int err;
 
-	sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
-				    GFP_KERNEL, numa);
-	if (!sq->db.xdpi) {
-		mlx5e_free_xdpsq_db(sq);
+	sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
+					GFP_KERNEL, numa);
+	if (!sq->db.wqe_info)
 		return -ENOMEM;
+
+	err = mlx5e_alloc_xdpsq_fifo(sq, numa);
+	if (err) {
+		mlx5e_free_xdpsq_db(sq);
+		return err;
 	}
 
 	return 0;
@@ -989,6 +967,7 @@
 
 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
+			     struct xdp_umem *umem,
 			     struct mlx5e_sq_param *param,
 			     struct mlx5e_xdpsq *sq,
 			     bool is_redirect)
@@ -1004,9 +983,13 @@
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	sq->stats     = is_redirect ?
-		&c->priv->channel_stats[c->ix].xdpsq :
-		&c->priv->channel_stats[c->ix].rq_xdpsq;
+	sq->umem      = umem;
+
+	sq->stats = sq->umem ?
+		&c->priv->channel_stats[c->ix].xsksq :
+		is_redirect ?
+			&c->priv->channel_stats[c->ix].xdpsq :
+			&c->priv->channel_stats[c->ix].rq_xdpsq;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1039,7 +1022,7 @@
 
 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
-	u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
 	sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
 						  sizeof(*sq->db.ico_wqe)),
@@ -1050,6 +1033,14 @@
 	return 0;
 }
 
+static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+	struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+					      recover_work);
+
+	mlx5e_reporter_icosq_cqe_err(sq);
+}
+
 static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 			     struct mlx5e_sq_param *param,
 			     struct mlx5e_icosq *sq)
@@ -1072,6 +1063,8 @@
 	if (err)
 		goto err_sq_wq_destroy;
 
+	INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+
 	return 0;
 
 err_sq_wq_destroy:
@@ -1113,7 +1106,7 @@
 	return 0;
 }
 
-static void mlx5e_sq_recover(struct work_struct *work);
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
 			     struct mlx5e_params *params,
@@ -1131,15 +1124,26 @@
 	sq->clock     = &mdev->clock;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
+	sq->ch_ix     = c->ix;
 	sq->txq_ix    = txq_ix;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
+	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
-	INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
+	sq->stop_room = MLX5E_SQ_STOP_ROOM;
+	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-	if (mlx5_accel_is_tls_device(c->priv->mdev))
+#ifdef CONFIG_MLX5_EN_TLS
+	if (mlx5_accel_is_tls_device(c->priv->mdev)) {
 		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+		sq->stop_room += MLX5E_SQ_TLS_ROOM +
+			mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
+						    TLS_MAX_PAYLOAD_SIZE);
+	}
+#endif
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1223,15 +1227,8 @@
 	return err;
 }
 
-struct mlx5e_modify_sq_param {
-	int curr_state;
-	int next_state;
-	bool rl_update;
-	int rl_index;
-};
-
-static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
-			   struct mlx5e_modify_sq_param *p)
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+		    struct mlx5e_modify_sq_param *p)
 {
 	void *in;
 	void *sqc;
@@ -1323,32 +1320,20 @@
 	return 0;
 
 err_free_txqsq:
-	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	mlx5e_free_txqsq(sq);
 
 	return err;
 }
 
-static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
-{
-	WARN_ONCE(sq->cc != sq->pc,
-		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
-		  sq->sqn, sq->cc, sq->pc);
-	sq->cc = 0;
-	sq->dma_fifo_cc = 0;
-	sq->pc = 0;
-}
-
-static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
 {
 	sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
-	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
 	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	netdev_tx_reset_queue(sq->txq);
 	netif_tx_start_queue(sq->txq);
 }
 
-static inline void netif_tx_disable_queue(struct netdev_queue *txq)
+void mlx5e_tx_disable_queue(struct netdev_queue *txq)
 {
 	__netif_tx_lock_bh(txq);
 	netif_tx_stop_queue(txq);
@@ -1364,14 +1349,18 @@
 	/* prevent netif_tx_wake_queue */
 	napi_synchronize(&c->napi);
 
-	netif_tx_disable_queue(sq->txq);
+	mlx5e_tx_disable_queue(sq->txq);
 
 	/* last doorbell out, godspeed .. */
 	if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
 		u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+		struct mlx5e_tx_wqe_info *wi;
 		struct mlx5e_tx_wqe *nop;
 
-		sq->db.wqe_info[pi].skb = NULL;
+		wi = &sq->db.wqe_info[pi];
+
+		memset(wi, 0, sizeof(*wi));
+		wi->num_wqebbs = 1;
 		nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
 	}
@@ -1383,6 +1372,8 @@
 	struct mlx5_core_dev *mdev = c->mdev;
 	struct mlx5_rate_limit rl = {0};
 
+	cancel_work_sync(&sq->dim.work);
+	cancel_work_sync(&sq->recover_work);
 	mlx5e_destroy_sq(mdev, sq->sqn);
 	if (sq->rate_limit) {
 		rl.rate = sq->rate_limit;
@@ -1392,111 +1383,16 @@
 	mlx5e_free_txqsq(sq);
 }
 
-static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
 {
-	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+	struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
+					      recover_work);
 
-	while (time_before(jiffies, exp_time)) {
-		if (sq->cc == sq->pc)
-			return 0;
-
-		msleep(20);
-	}
-
-	netdev_err(sq->channel->netdev,
-		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
-		   sq->sqn, sq->cc, sq->pc);
-
-	return -ETIMEDOUT;
+	mlx5e_reporter_tx_err_cqe(sq);
 }
 
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
-	struct mlx5_core_dev *mdev = sq->channel->mdev;
-	struct net_device *dev = sq->channel->netdev;
-	struct mlx5e_modify_sq_param msp = {0};
-	int err;
-
-	msp.curr_state = curr_state;
-	msp.next_state = MLX5_SQC_STATE_RST;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
-		return err;
-	}
-
-	memset(&msp, 0, sizeof(msp));
-	msp.curr_state = MLX5_SQC_STATE_RST;
-	msp.next_state = MLX5_SQC_STATE_RDY;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
-		return err;
-	}
-
-	return 0;
-}
-
-static void mlx5e_sq_recover(struct work_struct *work)
-{
-	struct mlx5e_txqsq_recover *recover =
-		container_of(work, struct mlx5e_txqsq_recover,
-			     recover_work);
-	struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
-					      recover);
-	struct mlx5_core_dev *mdev = sq->channel->mdev;
-	struct net_device *dev = sq->channel->netdev;
-	u8 state;
-	int err;
-
-	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
-	if (err) {
-		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
-			   sq->sqn, err);
-		return;
-	}
-
-	if (state != MLX5_RQC_STATE_ERR) {
-		netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
-		return;
-	}
-
-	netif_tx_disable_queue(sq->txq);
-
-	if (mlx5e_wait_for_sq_flush(sq))
-		return;
-
-	/* If the interval between two consecutive recovers per SQ is too
-	 * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
-	 * If we reached this state, there is probably a bug that needs to be
-	 * fixed. let's keep the queue close and let tx timeout cleanup.
-	 */
-	if (jiffies_to_msecs(jiffies - recover->last_recover) <
-	    MLX5E_SQ_RECOVER_MIN_INTERVAL) {
-		netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
-			   sq->sqn);
-		return;
-	}
-
-	/* At this point, no new packets will arrive from the stack as TXQ is
-	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
-	 * pending WQEs.  SQ can safely reset the SQ.
-	 */
-	if (mlx5e_sq_to_ready(sq, state))
-		return;
-
-	mlx5e_reset_txqsq_cc_pc(sq);
-	sq->stats->recover++;
-	recover->last_recover = jiffies;
-	mlx5e_activate_txqsq(sq);
-}
-
-static int mlx5e_open_icosq(struct mlx5e_channel *c,
-			    struct mlx5e_params *params,
-			    struct mlx5e_sq_param *param,
-			    struct mlx5e_icosq *sq)
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
 {
 	struct mlx5e_create_sq_param csp = {};
 	int err;
@@ -1508,7 +1404,6 @@
 	csp.cqn             = sq->cq.mcq.cqn;
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = params->tx_min_inline_mode;
-	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
 	if (err)
 		goto err_free_icosq;
@@ -1516,68 +1411,82 @@
 	return 0;
 
 err_free_icosq:
-	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	mlx5e_free_icosq(sq);
 
 	return err;
 }
 
-static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
+{
+	set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+}
+
+void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+{
+	struct mlx5e_channel *c = icosq->channel;
+
+	clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+	napi_synchronize(&c->napi);
+}
+
+void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 
-	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-	napi_synchronize(&c->napi);
-
 	mlx5e_destroy_sq(c->mdev, sq->sqn);
 	mlx5e_free_icosq(sq);
 }
 
-static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
-			    struct mlx5e_params *params,
-			    struct mlx5e_sq_param *param,
-			    struct mlx5e_xdpsq *sq,
-			    bool is_redirect)
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+		     struct mlx5e_sq_param *param, struct xdp_umem *umem,
+		     struct mlx5e_xdpsq *sq, bool is_redirect)
 {
-	unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
 	struct mlx5e_create_sq_param csp = {};
-	unsigned int inline_hdr_sz = 0;
 	int err;
-	int i;
 
-	err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+	err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
 	if (err)
 		return err;
 
 	csp.tis_lst_sz      = 1;
-	csp.tisn            = c->priv->tisn[0]; /* tc = 0 */
+	csp.tisn            = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
 	csp.cqn             = sq->cq.mcq.cqn;
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = sq->min_inline_mode;
-	if (is_redirect)
-		set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
 	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
 	if (err)
 		goto err_free_xdpsq;
 
-	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-		inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
-		ds_cnt++;
-	}
+	mlx5e_set_xmit_fp(sq, param->is_mpw);
 
-	/* Pre initialize fixed WQE fields */
-	for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
-		struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
-		struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
-		struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
-		struct mlx5_wqe_data_seg *dseg;
+	if (!param->is_mpw) {
+		unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+		unsigned int inline_hdr_sz = 0;
+		int i;
 
-		cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
-		eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+		if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+			inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+			ds_cnt++;
+		}
 
-		dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
-		dseg->lkey = sq->mkey_be;
+		/* Pre initialize fixed WQE fields */
+		for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+			struct mlx5e_xdp_wqe_info *wi  = &sq->db.wqe_info[i];
+			struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+			struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+			struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+			struct mlx5_wqe_data_seg *dseg;
+
+			cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+			eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+
+			dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+			dseg->lkey = sq->mkey_be;
+
+			wi->num_wqebbs = 1;
+			wi->num_pkts   = 1;
+		}
 	}
 
 	return 0;
@@ -1589,7 +1498,7 @@
 	return err;
 }
 
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 
@@ -1667,6 +1576,7 @@
 
 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 {
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	struct mlx5_core_dev *mdev = cq->mdev;
 	struct mlx5_core_cq *mcq = &cq->mcq;
 
@@ -1701,7 +1611,7 @@
 					    MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
-	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
 
 	kvfree(in);
 
@@ -1718,10 +1628,8 @@
 	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
 }
 
-static int mlx5e_open_cq(struct mlx5e_channel *c,
-			 struct net_dim_cq_moder moder,
-			 struct mlx5e_cq_param *param,
-			 struct mlx5e_cq *cq)
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+		  struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
 {
 	struct mlx5_core_dev *mdev = c->mdev;
 	int err;
@@ -1744,17 +1652,12 @@
 	return err;
 }
 
-static void mlx5e_close_cq(struct mlx5e_cq *cq)
+void mlx5e_close_cq(struct mlx5e_cq *cq)
 {
 	mlx5e_destroy_cq(cq);
 	mlx5e_free_cq(cq);
 }
 
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
-	return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
 			     struct mlx5e_channel_param *cparam)
@@ -1791,12 +1694,12 @@
 			  struct mlx5e_channel_param *cparam)
 {
 	struct mlx5e_priv *priv = c->priv;
-	int err, tc, max_nch = priv->profile->max_nch(priv->mdev);
+	int err, tc;
 
 	for (tc = 0; tc < params->num_tc; tc++) {
-		int txq_ix = c->ix + tc * max_nch;
+		int txq_ix = c->ix + tc * priv->max_nch;
 
-		err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
+		err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
 				       params, &cparam->sq, &c->sq[tc], tc);
 		if (err)
 			goto err_close_sqs;
@@ -1900,14 +1803,152 @@
 	return err;
 }
 
+static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c,
+				   struct mlx5e_params *params)
+{
+	int num_comp_vectors = mlx5_comp_vectors_count(c->mdev);
+	int irq;
+
+	if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) {
+		int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq));
+
+		cpumask_set_cpu(cpu, c->xps_cpumask);
+	}
+
+	return 0;
+}
+
+static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
+{
+	free_cpumask_var(c->xps_cpumask);
+}
+
+static int mlx5e_open_queues(struct mlx5e_channel *c,
+			     struct mlx5e_params *params,
+			     struct mlx5e_channel_param *cparam)
+{
+	struct dim_cq_moder icocq_moder = {0, 0};
+	int err;
+
+	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
+	if (err)
+		return err;
+
+	err = mlx5e_open_tx_cqs(c, params, cparam);
+	if (err)
+		goto err_close_icosq_cq;
+
+	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
+	if (err)
+		goto err_close_tx_cqs;
+
+	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+	if (err)
+		goto err_close_xdp_tx_cqs;
+
+	/* XDP SQ CQ params are same as normal TXQ sq CQ params */
+	err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
+				     &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
+	if (err)
+		goto err_close_rx_cq;
+
+	napi_enable(&c->napi);
+
+	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+	if (err)
+		goto err_disable_napi;
+
+	err = mlx5e_open_sqs(c, params, cparam);
+	if (err)
+		goto err_close_icosq;
+
+	if (c->xdp) {
+		err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
+				       &c->rq_xdpsq, false);
+		if (err)
+			goto err_close_sqs;
+	}
+
+	err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
+	if (err)
+		goto err_close_xdp_sq;
+
+	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
+	if (err)
+		goto err_close_rq;
+
+	return 0;
+
+err_close_rq:
+	mlx5e_close_rq(&c->rq);
+
+err_close_xdp_sq:
+	if (c->xdp)
+		mlx5e_close_xdpsq(&c->rq_xdpsq);
+
+err_close_sqs:
+	mlx5e_close_sqs(c);
+
+err_close_icosq:
+	mlx5e_close_icosq(&c->icosq);
+
+err_disable_napi:
+	napi_disable(&c->napi);
+
+	if (c->xdp)
+		mlx5e_close_cq(&c->rq_xdpsq.cq);
+
+err_close_rx_cq:
+	mlx5e_close_cq(&c->rq.cq);
+
+err_close_xdp_tx_cqs:
+	mlx5e_close_cq(&c->xdpsq.cq);
+
+err_close_tx_cqs:
+	mlx5e_close_tx_cqs(c);
+
+err_close_icosq_cq:
+	mlx5e_close_cq(&c->icosq.cq);
+
+	return err;
+}
+
+static void mlx5e_close_queues(struct mlx5e_channel *c)
+{
+	mlx5e_close_xdpsq(&c->xdpsq);
+	mlx5e_close_rq(&c->rq);
+	if (c->xdp)
+		mlx5e_close_xdpsq(&c->rq_xdpsq);
+	mlx5e_close_sqs(c);
+	mlx5e_close_icosq(&c->icosq);
+	napi_disable(&c->napi);
+	if (c->xdp)
+		mlx5e_close_cq(&c->rq_xdpsq.cq);
+	mlx5e_close_cq(&c->rq.cq);
+	mlx5e_close_cq(&c->xdpsq.cq);
+	mlx5e_close_tx_cqs(c);
+	mlx5e_close_cq(&c->icosq.cq);
+}
+
+static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
+{
+	u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
+
+	return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
+}
+
 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 			      struct mlx5e_params *params,
 			      struct mlx5e_channel_param *cparam,
+			      struct xdp_umem *umem,
 			      struct mlx5e_channel **cp)
 {
-	struct net_dim_cq_moder icocq_moder = {0, 0};
+	int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
 	struct net_device *netdev = priv->netdev;
-	int cpu = mlx5e_get_cpu(priv, ix);
+	struct mlx5e_xsk_param xsk;
 	struct mlx5e_channel *c;
 	unsigned int irq;
 	int err;
@@ -1926,97 +1967,44 @@
 	c->tstamp   = &priv->tstamp;
 	c->ix       = ix;
 	c->cpu      = cpu;
-	c->pdev     = &priv->mdev->pdev->dev;
+	c->pdev     = priv->mdev->device;
 	c->netdev   = priv->netdev;
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
-
 	c->irq_desc = irq_to_desc(irq);
+	c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
+
+	err = mlx5e_alloc_xps_cpumask(c, params);
+	if (err)
+		goto err_free_channel;
 
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
-	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
-	if (err)
+	err = mlx5e_open_queues(c, params, cparam);
+	if (unlikely(err))
 		goto err_napi_del;
 
-	err = mlx5e_open_tx_cqs(c, params, cparam);
-	if (err)
-		goto err_close_icosq_cq;
-
-	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
-	if (err)
-		goto err_close_tx_cqs;
-
-	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
-	if (err)
-		goto err_close_xdp_tx_cqs;
-
-	/* XDP SQ CQ params are same as normal TXQ sq CQ params */
-	err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
-				     &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
-	if (err)
-		goto err_close_rx_cq;
-
-	napi_enable(&c->napi);
-
-	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
-	if (err)
-		goto err_disable_napi;
-
-	err = mlx5e_open_sqs(c, params, cparam);
-	if (err)
-		goto err_close_icosq;
-
-	err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
-	if (err)
-		goto err_close_sqs;
-
-	err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
-	if (err)
-		goto err_close_xdp_sq;
-
-	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
-	if (err)
-		goto err_close_rq;
+	if (umem) {
+		mlx5e_build_xsk_param(umem, &xsk);
+		err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+		if (unlikely(err))
+			goto err_close_queues;
+	}
 
 	*cp = c;
 
 	return 0;
 
-err_close_rq:
-	mlx5e_close_rq(&c->rq);
-
-err_close_xdp_sq:
-	if (c->xdp)
-		mlx5e_close_xdpsq(&c->rq.xdpsq);
-
-err_close_sqs:
-	mlx5e_close_sqs(c);
-
-err_close_icosq:
-	mlx5e_close_icosq(&c->icosq);
-
-err_disable_napi:
-	napi_disable(&c->napi);
-	if (c->xdp)
-		mlx5e_close_cq(&c->rq.xdpsq.cq);
-
-err_close_rx_cq:
-	mlx5e_close_cq(&c->rq.cq);
-
-err_close_xdp_tx_cqs:
-	mlx5e_close_cq(&c->xdpsq.cq);
-
-err_close_tx_cqs:
-	mlx5e_close_tx_cqs(c);
-
-err_close_icosq_cq:
-	mlx5e_close_cq(&c->icosq.cq);
+err_close_queues:
+	mlx5e_close_queues(c);
 
 err_napi_del:
 	netif_napi_del(&c->napi);
+	mlx5e_free_xps_cpumask(c);
+
+err_free_channel:
 	kvfree(c);
 
 	return err;
@@ -2028,35 +2016,34 @@
 
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_activate_txqsq(&c->sq[tc]);
+	mlx5e_activate_icosq(&c->icosq);
 	mlx5e_activate_rq(&c->rq);
-	netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
+	netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
+
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_activate_xsk(c);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 {
 	int tc;
 
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_deactivate_xsk(c);
+
 	mlx5e_deactivate_rq(&c->rq);
+	mlx5e_deactivate_icosq(&c->icosq);
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_deactivate_txqsq(&c->sq[tc]);
 }
 
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
-	mlx5e_close_xdpsq(&c->xdpsq);
-	mlx5e_close_rq(&c->rq);
-	if (c->xdp)
-		mlx5e_close_xdpsq(&c->rq.xdpsq);
-	mlx5e_close_sqs(c);
-	mlx5e_close_icosq(&c->icosq);
-	napi_disable(&c->napi);
-	if (c->xdp)
-		mlx5e_close_cq(&c->rq.xdpsq.cq);
-	mlx5e_close_cq(&c->rq.cq);
-	mlx5e_close_cq(&c->xdpsq.cq);
-	mlx5e_close_tx_cqs(c);
-	mlx5e_close_cq(&c->icosq.cq);
+	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+		mlx5e_close_xsk(c);
+	mlx5e_close_queues(c);
 	netif_napi_del(&c->napi);
+	mlx5e_free_xps_cpumask(c);
 
 	kvfree(c);
 }
@@ -2065,6 +2052,7 @@
 
 static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 				      struct mlx5e_params *params,
+				      struct mlx5e_xsk_param *xsk,
 				      struct mlx5e_rq_frags_info *info)
 {
 	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
@@ -2077,10 +2065,10 @@
 		byte_count += MLX5E_METADATA_ETHER_LEN;
 #endif
 
-	if (mlx5e_rx_is_linear_skb(mdev, params)) {
+	if (mlx5e_rx_is_linear_skb(params, xsk)) {
 		int frag_stride;
 
-		frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+		frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
 		frag_stride = roundup_pow_of_two(frag_stride);
 
 		info->arr[0].frag_size = byte_count;
@@ -2131,9 +2119,17 @@
 	return order_base_2(sz);
 }
 
-static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
-				 struct mlx5e_params *params,
-				 struct mlx5e_rq_param *param)
+static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
+{
+	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+	return MLX5_GET(wq, wq, log_wq_sz);
+}
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  struct mlx5e_rq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
@@ -2143,16 +2139,16 @@
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
 			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
 			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
-		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
-		mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
 		ndsegs = param->frags_info.num_frags;
 	}
 
@@ -2165,7 +2161,7 @@
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
-	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+	param->wq.buf_numa_node = dev_to_node(mdev->device);
 }
 
 static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
@@ -2180,11 +2176,11 @@
 		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
 	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
 
-	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+	param->wq.buf_numa_node = dev_to_node(mdev->device);
 }
 
-static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
-					struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+				 struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2192,7 +2188,7 @@
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
 
-	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+	param->wq.buf_numa_node = dev_to_node(priv->mdev->device);
 }
 
 static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
@@ -2201,10 +2197,13 @@
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+	bool allow_swp;
 
+	allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
+		    !!MLX5_IPSEC_DEV(priv->mdev);
 	mlx5e_build_sq_param_common(priv, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
-	MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
+	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -2213,11 +2212,14 @@
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+	if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
 }
 
-static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_cq_param *param)
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_xsk_param *xsk,
+			     struct mlx5e_cq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *cqc = param->cqc;
@@ -2225,8 +2227,8 @@
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
-			mlx5e_mpwqe_get_log_num_strides(mdev, params);
+		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		log_cq_size = params->log_rq_mtu_frames;
@@ -2242,9 +2244,9 @@
 	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
-static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_cq_param *param)
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
@@ -2254,9 +2256,9 @@
 	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
 }
 
-static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
-				     u8 log_wq_size,
-				     struct mlx5e_cq_param *param)
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+			      u8 log_wq_size,
+			      struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
@@ -2264,12 +2266,12 @@
 
 	mlx5e_build_common_cq_param(priv, param);
 
-	param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
-static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
-				    u8 log_wq_size,
-				    struct mlx5e_sq_param *param)
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+			     u8 log_wq_size,
+			     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2280,28 +2282,44 @@
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 }
 
-static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
-				    struct mlx5e_params *params,
-				    struct mlx5e_sq_param *param)
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	mlx5e_build_sq_param_common(priv, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
+				      struct mlx5e_rq_param *rqp)
+{
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return order_base_2(MLX5E_UMR_WQEBBS) +
+			mlx5e_get_rq_log_wq_sz(rqp->rqc);
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	}
 }
 
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
 				      struct mlx5e_params *params,
 				      struct mlx5e_channel_param *cparam)
 {
-	u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	u8 icosq_log_wq_sz;
 
-	mlx5e_build_rq_param(priv, params, &cparam->rq);
+	mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
+
+	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
+
 	mlx5e_build_sq_param(priv, params, &cparam->sq);
 	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
 	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
-	mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+	mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
 	mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
 }
@@ -2322,11 +2340,17 @@
 
 	mlx5e_build_channel_param(priv, &chs->params, cparam);
 	for (i = 0; i < chs->num; i++) {
-		err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+		struct xdp_umem *umem = NULL;
+
+		if (chs->params.xdp_prog)
+			umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+
+		err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
 		if (err)
 			goto err_close_channels;
 	}
 
+	mlx5e_health_channels_update(priv);
 	kvfree(cparam);
 	return 0;
 
@@ -2349,14 +2373,22 @@
 		mlx5e_activate_channel(chs->c[i]);
 }
 
+#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
+
 static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
 {
 	int err = 0;
 	int i;
 
-	for (i = 0; i < chs->num; i++)
-		err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq,
-						  err ? 0 : 20000);
+	for (i = 0; i < chs->num; i++) {
+		int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
+
+		err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+
+		/* Don't wait on the XSK RQ, because the newer xdpsock sample
+		 * doesn't provide any Fill Ring entries at the setup stage.
+		 */
+	}
 
 	return err ? -ETIMEDOUT : 0;
 }
@@ -2428,35 +2460,33 @@
 	return err;
 }
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	struct mlx5e_rqt *rqt;
 	int err;
 	int ix;
 
-	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
-		rqt = &priv->direct_tir[ix].rqt;
-		err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
-		if (err)
+	for (ix = 0; ix < priv->max_nch; ix++) {
+		err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
+		if (unlikely(err))
 			goto err_destroy_rqts;
 	}
 
 	return 0;
 
 err_destroy_rqts:
-	mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+	mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+		mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
 
 	return err;
 }
 
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
 	int i;
 
-	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
-		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+	for (i = 0; i < priv->max_nch; i++)
+		mlx5e_destroy_rqt(priv, &tirs[i].rqt);
 }
 
 static int mlx5e_rx_hash_fn(int hfunc)
@@ -2491,7 +2521,7 @@
 			if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
 				ix = mlx5e_bits_invert(i, ilog2(sz));
 
-			ix = priv->channels.params.indirection_rqt[ix];
+			ix = priv->rss_params.indirection_rqt[ix];
 			rqn = rrp.rss.channels->c[ix]->rq.rqn;
 		} else {
 			rqn = rrp.rqn;
@@ -2549,7 +2579,7 @@
 		mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
 	}
 
-	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		struct mlx5e_redirect_rqt_param direct_rrp = {
 			.is_rss = false,
 			{
@@ -2574,7 +2604,7 @@
 		{
 			.rss = {
 				.channels  = chs,
-				.hfunc     = chs->params.rss_hfunc,
+				.hfunc     = priv->rss_params.hfunc,
 			}
 		},
 	};
@@ -2594,6 +2624,54 @@
 	mlx5e_redirect_rqts(priv, drop_rrp);
 }
 
+static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
+	[MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+				.l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+				.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+	},
+	[MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+				.l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+				.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+	},
+	[MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+				.l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+				.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+	},
+	[MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+				.l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+				.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+	},
+	[MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+				     .l4_prot_type = 0,
+				     .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+	},
+	[MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+				     .l4_prot_type = 0,
+				     .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+	},
+	[MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+				      .l4_prot_type = 0,
+				      .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+	},
+	[MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+				      .l4_prot_type = 0,
+				      .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+	},
+	[MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+			    .l4_prot_type = 0,
+			    .rx_hash_fields = MLX5_HASH_IP,
+	},
+	[MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+			    .l4_prot_type = 0,
+			    .rx_hash_fields = MLX5_HASH_IP,
+	},
+};
+
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
+{
+	return tirc_default_config[tt];
+}
+
 static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
 {
 	if (!params->lro_en)
@@ -2605,120 +2683,72 @@
 		 MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
 		 MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
 	MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
-		 (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+		 (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
 	MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
 }
 
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
-				    enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+				    const struct mlx5e_tirc_config *ttconfig,
 				    void *tirc, bool inner)
 {
 	void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
 			     MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 
-#define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP)
-
-#define MLX5_HASH_IP_L4PORTS    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP   |\
-				 MLX5_HASH_FIELD_SEL_L4_SPORT |\
-				 MLX5_HASH_FIELD_SEL_L4_DPORT)
-
-#define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP   |\
-				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-
-	MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
-	if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+	MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
+	if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
 		void *rss_key = MLX5_ADDR_OF(tirc, tirc,
 					     rx_hash_toeplitz_key);
 		size_t len = MLX5_FLD_SZ_BYTES(tirc,
 					       rx_hash_toeplitz_key);
 
 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
-		memcpy(rss_key, params->toeplitz_hash_key, len);
+		memcpy(rss_key, rss_params->toeplitz_hash_key, len);
+	}
+	MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+		 ttconfig->l3_prot_type);
+	MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+		 ttconfig->l4_prot_type);
+	MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+		 ttconfig->rx_hash_fields);
+}
+
+static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
+					enum mlx5e_traffic_types tt,
+					u32 rx_hash_fields)
+{
+	*ttconfig                = tirc_default_config[tt];
+	ttconfig->rx_hash_fields = rx_hash_fields;
+}
+
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+	struct mlx5e_rss_params *rss = &priv->rss_params;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+	struct mlx5e_tirc_config ttconfig;
+	int tt;
+
+	MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+		memset(tirc, 0, ctxlen);
+		mlx5e_update_rx_hash_fields(&ttconfig, tt,
+					    rss->rx_hash_fields[tt]);
+		mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
+		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
 	}
 
-	switch (tt) {
-	case MLX5E_TT_IPV4_TCP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_TCP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return;
 
-	case MLX5E_TT_IPV6_TCP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_TCP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV4_UDP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_UDP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV6_UDP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_UDP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV4_IPSEC_AH:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV6_IPSEC_AH:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV4_IPSEC_ESP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV6_IPSEC_ESP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV4:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP);
-		break;
-
-	case MLX5E_TT_IPV6:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP);
-		break;
-	default:
-		WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+		memset(tirc, 0, ctxlen);
+		mlx5e_update_rx_hash_fields(&ttconfig, tt,
+					    rss->rx_hash_fields[tt]);
+		mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
+		mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
+				     inlen);
 	}
 }
 
@@ -2750,7 +2780,7 @@
 			goto free_in;
 	}
 
-	for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
 					   in, inlen);
 		if (err)
@@ -2763,21 +2793,6 @@
 	return err;
 }
 
-static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
-					    enum mlx5e_traffic_types tt,
-					    u32 *tirc)
-{
-	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
-
-	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-
-	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
-	MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
-
-	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
-}
-
 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
 			 struct mlx5e_params *params, u16 mtu)
 {
@@ -2806,7 +2821,7 @@
 	*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
 }
 
-static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
 {
 	struct mlx5e_params *params = &priv->channels.params;
 	struct net_device *netdev = priv->netdev;
@@ -2827,6 +2842,21 @@
 	return 0;
 }
 
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
+{
+	struct mlx5e_params *params = &priv->channels.params;
+	struct net_device *netdev   = priv->netdev;
+	struct mlx5_core_dev *mdev  = priv->mdev;
+	u16 max_mtu;
+
+	/* MTU range: 68 - hw-specific max */
+	netdev->min_mtu = ETH_MIN_MTU;
+
+	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+	netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
+				ETH_MAX_MTU);
+}
+
 static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -2850,12 +2880,11 @@
 
 static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
 {
-	int max_nch = priv->profile->max_nch(priv->mdev);
 	int i, tc;
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < priv->max_nch; i++)
 		for (tc = 0; tc < priv->profile->max_tc; tc++)
-			priv->channel_tc2txq[i][tc] = i + tc * max_nch;
+			priv->channel_tc2txq[i][tc] = i + tc * priv->max_nch;
 }
 
 static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
@@ -2876,28 +2905,34 @@
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
 	int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+	int num_rxqs = priv->channels.num * priv->profile->rq_groups;
 	struct net_device *netdev = priv->netdev;
 
 	mlx5e_netdev_set_tcs(netdev);
 	netif_set_real_num_tx_queues(netdev, num_txqs);
-	netif_set_real_num_rx_queues(netdev, priv->channels.num);
+	netif_set_real_num_rx_queues(netdev, num_rxqs);
 
 	mlx5e_build_tx2sq_maps(priv);
 	mlx5e_activate_channels(&priv->channels);
+	mlx5e_xdp_tx_enable(priv);
 	netif_tx_start_all_queues(priv->netdev);
 
-	if (MLX5_ESWITCH_MANAGER(priv->mdev))
+	if (mlx5e_is_vport_rep(priv))
 		mlx5e_add_sqs_fwd_rules(priv);
 
 	mlx5e_wait_channels_min_rx_wqes(&priv->channels);
 	mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+
+	mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
 }
 
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
+	mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
+
 	mlx5e_redirect_rqts_to_drop(priv);
 
-	if (MLX5_ESWITCH_MANAGER(priv->mdev))
+	if (mlx5e_is_vport_rep(priv))
 		mlx5e_remove_sqs_fwd_rules(priv);
 
 	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -2905,16 +2940,18 @@
 	 */
 	netif_tx_stop_all_queues(priv->netdev);
 	netif_tx_disable(priv->netdev);
+	mlx5e_xdp_tx_disable(priv);
 	mlx5e_deactivate_channels(&priv->channels);
 }
 
-void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
-				struct mlx5e_channels *new_chs,
-				mlx5e_fp_hw_modify hw_modify)
+static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+				       struct mlx5e_channels *new_chs,
+				       mlx5e_fp_hw_modify hw_modify)
 {
 	struct net_device *netdev = priv->netdev;
 	int new_num_txqs;
 	int carrier_ok;
+
 	new_num_txqs = new_chs->num * new_chs->params.num_tc;
 
 	carrier_ok = netif_carrier_ok(netdev);
@@ -2932,7 +2969,7 @@
 	if (hw_modify)
 		hw_modify(priv);
 
-	mlx5e_refresh_tirs(priv, false);
+	priv->profile->update_rx(priv);
 	mlx5e_activate_priv_channels(priv);
 
 	/* return carrier back if needed */
@@ -2940,6 +2977,28 @@
 		netif_carrier_on(netdev);
 }
 
+int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
+			       struct mlx5e_channels *new_chs,
+			       mlx5e_fp_hw_modify hw_modify)
+{
+	int err;
+
+	err = mlx5e_open_channels(priv, new_chs);
+	if (err)
+		return err;
+
+	mlx5e_switch_priv_channels(priv, new_chs, hw_modify);
+	return 0;
+}
+
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+	struct mlx5e_channels new_channels = {};
+
+	new_channels.params = priv->channels.params;
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
+
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 {
 	priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
@@ -2949,25 +3008,28 @@
 int mlx5e_open_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	bool is_xdp = priv->channels.params.xdp_prog;
 	int err;
 
 	set_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (is_xdp)
+		mlx5e_xdp_set_open(priv);
 
 	err = mlx5e_open_channels(priv, &priv->channels);
 	if (err)
 		goto err_clear_state_opened_flag;
 
-	mlx5e_refresh_tirs(priv, false);
+	priv->profile->update_rx(priv);
 	mlx5e_activate_priv_channels(priv);
 	if (priv->profile->update_carrier)
 		priv->profile->update_carrier(priv);
 
-	if (priv->profile->update_stats)
-		queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+	mlx5e_queue_update_stats(priv);
 	return 0;
 
 err_clear_state_opened_flag:
+	if (is_xdp)
+		mlx5e_xdp_set_closed(priv);
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
 }
@@ -2999,6 +3061,8 @@
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return 0;
 
+	if (priv->channels.params.xdp_prog)
+		mlx5e_xdp_set_closed(priv);
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	netif_carrier_off(priv->netdev);
@@ -3051,14 +3115,14 @@
 			       struct mlx5e_cq *cq,
 			       struct mlx5e_cq_param *param)
 {
-	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
-	param->wq.db_numa_node  = dev_to_node(&mdev->pdev->dev);
+	param->wq.buf_numa_node = dev_to_node(mdev->device);
+	param->wq.db_numa_node  = dev_to_node(mdev->device);
 
 	return mlx5e_alloc_cq_common(mdev, param, cq);
 }
 
-static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
-			      struct mlx5e_rq *drop_rq)
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+		       struct mlx5e_rq *drop_rq)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_cq_param cq_param = {};
@@ -3102,7 +3166,7 @@
 	return err;
 }
 
-static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
 {
 	mlx5e_destroy_rq(drop_rq);
 	mlx5e_free_rq(drop_rq);
@@ -3110,20 +3174,19 @@
 	mlx5e_free_cq(&drop_rq->cq);
 }
 
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
-		     u32 underlay_qpn, u32 *tisn)
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
 {
-	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	MLX5_SET(tisc, tisc, prio, tc << 1);
-	MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
 	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
+	if (MLX5_GET(tisc, tisc, tls_en))
+		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+
 	if (mlx5_lag_is_lacp_owner(mdev))
 		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
 
-	return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+	return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
 }
 
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3131,59 +3194,97 @@
 	mlx5_core_destroy_tis(mdev, tisn);
 }
 
+void mlx5e_destroy_tises(struct mlx5e_priv *priv)
+{
+	int tc, i;
+
+	for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
+		for (tc = 0; tc < priv->profile->max_tc; tc++)
+			mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
+}
+
+static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
+}
+
 int mlx5e_create_tises(struct mlx5e_priv *priv)
 {
+	int tc, i;
 	int err;
-	int tc;
 
-	for (tc = 0; tc < priv->profile->max_tc; tc++) {
-		err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
-		if (err)
-			goto err_close_tises;
+	for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
+		for (tc = 0; tc < priv->profile->max_tc; tc++) {
+			u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+			void *tisc;
+
+			tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+			MLX5_SET(tisc, tisc, prio, tc << 1);
+
+			if (mlx5e_lag_should_assign_affinity(priv->mdev))
+				MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
+
+			err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
+			if (err)
+				goto err_close_tises;
+		}
 	}
 
 	return 0;
 
 err_close_tises:
-	for (tc--; tc >= 0; tc--)
-		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+	for (; i >= 0; i--) {
+		for (tc--; tc >= 0; tc--)
+			mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
+		tc = priv->profile->max_tc;
+	}
 
 	return err;
 }
 
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
-	int tc;
+	mlx5e_destroy_tises(priv);
+}
 
-	for (tc = 0; tc < priv->profile->max_tc; tc++)
-		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
+					     u32 rqtn, u32 *tirc)
+{
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+	MLX5_SET(tirc, tirc, indirect_table, rqtn);
+	MLX5_SET(tirc, tirc, tunneled_offload_en,
+		 priv->channels.params.tunneled_offload_en);
+
+	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
 }
 
 static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
 				      enum mlx5e_traffic_types tt,
 				      u32 *tirc)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
-
-	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-
-	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
-	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+	mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
+	mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+				       &tirc_default_config[tt], tirc, false);
 }
 
 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
-
-	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-
-	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-	MLX5_SET(tirc, tirc, indirect_table, rqtn);
+	mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc);
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
 }
 
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
+static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
+					    enum mlx5e_traffic_types tt,
+					    u32 *tirc)
+{
+	mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
+	mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+				       &tirc_default_config[tt], tirc, true);
+}
+
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 {
 	struct mlx5e_tir *tir;
 	void *tirc;
@@ -3210,7 +3311,7 @@
 		}
 	}
 
-	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+	if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
 		goto out;
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
@@ -3242,13 +3343,12 @@
 	return err;
 }
 
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	int nch = priv->profile->max_nch(priv->mdev);
 	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
-	int err;
+	int err = 0;
 	u32 *in;
 	int ix;
 
@@ -3257,51 +3357,49 @@
 	if (!in)
 		return -ENOMEM;
 
-	for (ix = 0; ix < nch; ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		memset(in, 0, inlen);
-		tir = &priv->direct_tir[ix];
+		tir = &tirs[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-		mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+		mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
 		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
-		if (err)
+		if (unlikely(err))
 			goto err_destroy_ch_tirs;
 	}
 
-	kvfree(in);
-
-	return 0;
+	goto out;
 
 err_destroy_ch_tirs:
-	mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+	mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
 	for (ix--; ix >= 0; ix--)
-		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+		mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
 
+out:
 	kvfree(in);
 
 	return err;
 }
 
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 {
 	int i;
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
 		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
 
-	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+	if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
 		return;
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
 		mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
 }
 
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	int nch = priv->profile->max_nch(priv->mdev);
 	int i;
 
-	for (i = 0; i < nch; i++)
-		mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+	for (i = 0; i < priv->max_nch; i++)
+		mlx5e_destroy_tir(priv->mdev, &tirs[i]);
 }
 
 static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
@@ -3332,10 +3430,9 @@
 	return 0;
 }
 
-static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 				 struct tc_mqprio_qopt *mqprio)
 {
-	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
 	u8 tc = mqprio->num_tc;
 	int err = 0;
@@ -3355,13 +3452,12 @@
 		goto out;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 	if (err)
 		goto out;
 
 	priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
 				    new_channels.params.num_tc);
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -3369,16 +3465,19 @@
 
 #ifdef CONFIG_MLX5_ESWITCH
 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
-				     struct tc_cls_flower_offload *cls_flower,
-				     int flags)
+				     struct flow_cls_offload *cls_flower,
+				     unsigned long flags)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
-		return mlx5e_configure_flower(priv, cls_flower, flags);
-	case TC_CLSFLOWER_DESTROY:
-		return mlx5e_delete_flower(priv, cls_flower, flags);
-	case TC_CLSFLOWER_STATS:
-		return mlx5e_stats_flower(priv, cls_flower, flags);
+	case FLOW_CLS_REPLACE:
+		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+					      flags);
+	case FLOW_CLS_DESTROY:
+		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+					   flags);
+	case FLOW_CLS_STATS:
+		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+					  flags);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3387,66 +3486,78 @@
 static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 				   void *cb_priv)
 {
+	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
 	struct mlx5e_priv *priv = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_setup_tc_block(struct net_device *dev,
-				struct tc_block_offload *f)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
-					     priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
-					priv);
-		return 0;
+		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 #endif
 
+static LIST_HEAD(mlx5e_block_cb_list);
+
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
-	case TC_SETUP_BLOCK:
-		return mlx5e_setup_tc_block(dev, type_data);
+	case TC_SETUP_BLOCK: {
+		struct flow_block_offload *f = type_data;
+
+		f->unlocked_driver_cb = true;
+		return flow_block_cb_setup_simple(type_data,
+						  &mlx5e_block_cb_list,
+						  mlx5e_setup_tc_block_cb,
+						  priv, priv, true);
+	}
 #endif
 	case TC_SETUP_QDISC_MQPRIO:
-		return mlx5e_setup_tc_mqprio(dev, type_data);
+		return mlx5e_setup_tc_mqprio(priv, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-static void
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
+{
+	int i;
+
+	for (i = 0; i < priv->max_nch; i++) {
+		struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
+		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+		int j;
+
+		s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
+		s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
+
+		for (j = 0; j < priv->max_opened_tc; j++) {
+			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+			s->tx_packets    += sq_stats->packets;
+			s->tx_bytes      += sq_stats->bytes;
+			s->tx_dropped    += sq_stats->dropped;
+		}
+	}
+}
+
+void
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
 	struct mlx5e_vport_stats *vstats = &priv->stats.vport;
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
-	/* update HW stats in background for next time */
-	queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+	if (!mlx5e_monitor_counter_supported(priv)) {
+		/* update HW stats in background for next time */
+		mlx5e_queue_update_stats(priv);
+	}
 
 	if (mlx5e_is_uplink_rep(priv)) {
 		stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
@@ -3454,12 +3565,7 @@
 		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
 		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
 	} else {
-		mlx5e_grp_sw_update_stats(priv);
-		stats->rx_packets = sstats->rx_packets;
-		stats->rx_bytes   = sstats->rx_bytes;
-		stats->tx_packets = sstats->tx_packets;
-		stats->tx_bytes   = sstats->tx_bytes;
-		stats->tx_dropped = sstats->tx_queue_dropped;
+		mlx5e_fold_sw_stats64(priv, stats);
 	}
 
 	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
@@ -3528,6 +3634,13 @@
 
 	mutex_lock(&priv->state_lock);
 
+	if (enable && priv->xsk.refcnt) {
+		netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
+			    priv->xsk.refcnt);
+		err = -EINVAL;
+		goto out;
+	}
+
 	old_params = &priv->channels.params;
 	if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
 		netdev_warn(netdev, "can't set LRO with legacy RQ\n");
@@ -3541,8 +3654,8 @@
 	new_channels.params.lro_en = enable;
 
 	if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
-		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
-		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
+		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
 			reset = false;
 	}
 
@@ -3552,11 +3665,7 @@
 		goto out;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err)
-		goto out;
-
-	mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -3579,7 +3688,7 @@
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	if (!enable && mlx5e_tc_num_filters(priv)) {
+	if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
 		netdev_err(netdev,
 			   "Active offloaded tc filters, can't turn hw_tc_offload off\n");
 		return -EINVAL;
@@ -3674,8 +3783,7 @@
 	return 0;
 }
 
-static int mlx5e_set_features(struct net_device *netdev,
-			      netdev_features_t features)
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	netdev_features_t oper_features = netdev->features;
 	int err = 0;
@@ -3721,9 +3829,16 @@
 			netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
 	}
 	if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
-		features &= ~NETIF_F_LRO;
-		if (params->lro_en)
+		if (features & NETIF_F_LRO) {
 			netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
+			features &= ~NETIF_F_LRO;
+		}
+	}
+
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+		features &= ~NETIF_F_RXHASH;
+		if (netdev->features & NETIF_F_RXHASH)
+			netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
 	}
 
 	mutex_unlock(&priv->state_lock);
@@ -3731,6 +3846,43 @@
 	return features;
 }
 
+static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+				   struct mlx5e_channels *chs,
+				   struct mlx5e_params *new_params,
+				   struct mlx5_core_dev *mdev)
+{
+	u16 ix;
+
+	for (ix = 0; ix < chs->params.num_channels; ix++) {
+		struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+		struct mlx5e_xsk_param xsk;
+
+		if (!umem)
+			continue;
+
+		mlx5e_build_xsk_param(umem, &xsk);
+
+		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
+			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+			int max_mtu_frame, max_mtu_page, max_mtu;
+
+			/* Two criteria must be met:
+			 * 1. HW MTU + all headrooms <= XSK frame size.
+			 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
+			 */
+			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+			max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+			max_mtu = min(max_mtu_frame, max_mtu_page);
+
+			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
+				   new_params->sw_mtu, ix, max_mtu);
+			return false;
+		}
+	}
+
+	return true;
+}
+
 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		     change_hw_mtu_cb set_mtu_cb)
 {
@@ -3751,18 +3903,31 @@
 	new_channels.params.sw_mtu = new_mtu;
 
 	if (params->xdp_prog &&
-	    !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+	    !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
 		netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
-			   new_mtu, MLX5E_XDP_MAX_MTU);
+			   new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (priv->xsk.refcnt &&
+	    !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
+				    &new_channels.params, priv->mdev)) {
 		err = -EINVAL;
 		goto out;
 	}
 
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
-		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
-		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+							      &new_channels.params,
+							      NULL);
+		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
+		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
 
+		/* If XSK is active, XSK RQs are linear. */
+		is_linear |= priv->xsk.refcnt;
+
+		/* Always reset in linear mode - hw_mtu is used in data path. */
 		reset = reset && (is_linear || (ppw_old != ppw_new));
 	}
 
@@ -3774,11 +3939,10 @@
 		goto out;
 	}
 
-	err = mlx5e_open_channels(priv, &new_channels);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb);
 	if (err)
 		goto out;
 
-	mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb);
 	netdev->mtu = new_channels.params.sw_mtu;
 
 out:
@@ -3835,7 +3999,8 @@
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_NTP_ALL:
 		/* Disable CQE compression */
-		netdev_warn(priv->netdev, "Disabling cqe compression");
+		if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+			netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
 		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
 		if (err) {
 			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
@@ -3852,6 +4017,9 @@
 	memcpy(&priv->tstamp, &config, sizeof(config));
 	mutex_unlock(&priv->state_lock);
 
+	/* might need to fix some features */
+	netdev_update_features(priv->netdev);
+
 	return copy_to_user(ifr->ifr_data, &config,
 			    sizeof(config)) ? -EFAULT : 0;
 }
@@ -3881,7 +4049,7 @@
 }
 
 #ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -3918,8 +4086,8 @@
 	return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
 }
 
-static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
-			     int max_tx_rate)
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+		      int max_tx_rate)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -3960,8 +4128,8 @@
 					    mlx5_ifla_link2vport(link_state));
 }
 
-static int mlx5e_get_vf_config(struct net_device *dev,
-			       int vf, struct ifla_vf_info *ivi)
+int mlx5e_get_vf_config(struct net_device *dev,
+			int vf, struct ifla_vf_info *ivi)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -3974,8 +4142,8 @@
 	return 0;
 }
 
-static int mlx5e_get_vf_stats(struct net_device *dev,
-			      int vf, struct ifla_vf_stats *vf_stats)
+int mlx5e_get_vf_stats(struct net_device *dev,
+		       int vf, struct ifla_vf_stats *vf_stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -4036,8 +4204,7 @@
 	queue_work(priv->wq, &vxlan_work->work);
 }
 
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
-				 struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -4050,8 +4217,7 @@
 	mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
 }
 
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
-				 struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -4087,6 +4253,11 @@
 	switch (proto) {
 	case IPPROTO_GRE:
 		return features;
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
+			return features;
+		break;
 	case IPPROTO_UDP:
 		udph = udp_hdr(skb);
 		port = be16_to_cpu(udph->dest);
@@ -4094,6 +4265,12 @@
 		/* Verify if UDP port is being offloaded by HW */
 		if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
 			return features;
+
+#if IS_ENABLED(CONFIG_GENEVE)
+		/* Support Geneve offload for default UDP port */
+		if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
+			return features;
+#endif
 	}
 
 out:
@@ -4101,9 +4278,9 @@
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
-static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
-					      struct net_device *netdev,
-					      netdev_features_t features)
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+				       struct net_device *netdev,
+				       netdev_features_t features)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -4123,31 +4300,13 @@
 	return features;
 }
 
-static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
-					struct mlx5e_txqsq *sq)
-{
-	struct mlx5_eq *eq = sq->cq.mcq.eq;
-	u32 eqe_count;
-
-	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-		   eq->eqn, eq->cons_index, eq->irqn);
-
-	eqe_count = mlx5_eq_poll_irq_disabled(eq);
-	if (!eqe_count)
-		return false;
-
-	netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
-	sq->channel->stats->eq_rearm++;
-	return true;
-}
-
 static void mlx5e_tx_timeout_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 					       tx_timeout_work);
-	struct net_device *dev = priv->netdev;
-	bool reopen_channels = false;
-	int i, err;
+	bool report_failed = false;
+	int err;
+	int i;
 
 	rtnl_lock();
 	mutex_lock(&priv->state_lock);
@@ -4156,34 +4315,24 @@
 		goto unlock;
 
 	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
-		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
+		struct netdev_queue *dev_queue =
+			netdev_get_tx_queue(priv->netdev, i);
 		struct mlx5e_txqsq *sq = priv->txq2sq[i];
 
 		if (!netif_xmit_stopped(dev_queue))
 			continue;
 
-		netdev_err(dev,
-			   "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
-			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
-			   jiffies_to_usecs(jiffies - dev_queue->trans_start));
-
-		/* If we recover a lost interrupt, most likely TX timeout will
-		 * be resolved, skip reopening channels
-		 */
-		if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
-			clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-			reopen_channels = true;
-		}
+		if (mlx5e_reporter_tx_timeout(sq))
+			report_failed = true;
 	}
 
-	if (!reopen_channels)
+	if (!report_failed)
 		goto unlock;
 
-	mlx5e_close_locked(dev);
-	err = mlx5e_open_locked(dev);
+	err = mlx5e_safe_reopen_channels(priv);
 	if (err)
 		netdev_err(priv->netdev,
-			   "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
 			   err);
 
 unlock:
@@ -4217,15 +4366,29 @@
 	new_channels.params = priv->channels.params;
 	new_channels.params.xdp_prog = prog;
 
-	if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+	/* No XSK params: AF_XDP can't be enabled yet at the point of setting
+	 * the XDP program.
+	 */
+	if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
 		netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-			    new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+			    new_channels.params.sw_mtu,
+			    mlx5e_xdp_max_mtu(&new_channels.params, NULL));
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
+{
+	if (priv->channels.params.xdp_prog)
+		mlx5e_xdp_set_open(priv);
+	else
+		mlx5e_xdp_set_closed(priv);
+
+	return 0;
+}
+
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4246,8 +4409,6 @@
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
 
-	if (was_opened && reset)
-		mlx5e_close_locked(netdev);
 	if (was_opened && !reset) {
 		/* num_channels is invariant here, so we can take the
 		 * batched reference right upfront.
@@ -4259,20 +4420,31 @@
 		}
 	}
 
-	/* exchange programs, extra prog reference we got from caller
-	 * as long as we don't fail from this point onwards.
-	 */
-	old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	if (was_opened && reset) {
+		struct mlx5e_channels new_channels = {};
+
+		new_channels.params = priv->channels.params;
+		new_channels.params.xdp_prog = prog;
+		mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+		old_prog = priv->channels.params.xdp_prog;
+
+		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
+		if (err)
+			goto unlock;
+	} else {
+		/* exchange programs, extra prog reference we got from caller
+		 * as long as we don't fail from this point onwards.
+		 */
+		old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	}
+
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (reset) /* change RQ type according to priv->xdp_prog */
+	if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
 		mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
 
-	if (was_opened && reset)
-		mlx5e_open_locked(netdev);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+	if (!was_opened || reset)
 		goto unlock;
 
 	/* exchanging programs w/o reset, we update ref counts on behalf
@@ -4280,19 +4452,29 @@
 	 */
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
+		bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
 
 		clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+		if (xsk_open)
+			clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
 		napi_synchronize(&c->napi);
 		/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
 
 		old_prog = xchg(&c->rq.xdp_prog, prog);
-
-		set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
-		/* napi_schedule in case we have missed anything */
-		napi_schedule(&c->napi);
-
 		if (old_prog)
 			bpf_prog_put(old_prog);
+
+		if (xsk_open) {
+			old_prog = xchg(&c->xskrq.xdp_prog, prog);
+			if (old_prog)
+				bpf_prog_put(old_prog);
+		}
+
+		set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+		if (xsk_open)
+			set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+		/* napi_schedule in case we have missed anything */
+		napi_schedule(&c->napi);
 	}
 
 unlock:
@@ -4323,11 +4505,69 @@
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx5e_xdp_query(dev);
 		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+					    xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
 	}
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				struct net_device *dev, u32 filter_mask,
+				int nlflags)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 mode, setting;
+	int err;
+
+	err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
+	if (err)
+		return err;
+	mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
+				       mode,
+				       0, 0, nlflags, filter_mask, NULL);
+}
+
+static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+				u16 flags, struct netlink_ext_ack *extack)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct nlattr *attr, *br_spec;
+	u16 mode = BRIDGE_MODE_UNDEF;
+	u8 setting;
+	int rem;
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!br_spec)
+		return -EINVAL;
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		if (nla_len(attr) < sizeof(mode))
+			return -EINVAL;
+
+		mode = nla_get_u16(attr);
+		if (mode > BRIDGE_MODE_VEPA)
+			return -EINVAL;
+
+		break;
+	}
+
+	if (mode == BRIDGE_MODE_UNDEF)
+		return -EINVAL;
+
+	setting = (mode == BRIDGE_MODE_VEPA) ?  1 : 0;
+	return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
+}
+#endif
+
 const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -4350,10 +4590,14 @@
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
 	.ndo_bpf		 = mlx5e_xdp,
 	.ndo_xdp_xmit            = mlx5e_xdp_xmit,
+	.ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
 #ifdef CONFIG_MLX5_EN_ARFS
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
 #ifdef CONFIG_MLX5_ESWITCH
+	.ndo_bridge_setlink      = mlx5e_bridge_setlink,
+	.ndo_bridge_getlink      = mlx5e_bridge_getlink,
+
 	/* SRIOV E-Switch NDOs */
 	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
 	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
@@ -4363,8 +4607,6 @@
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
-	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
-	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 #endif
 };
 
@@ -4418,9 +4660,9 @@
 		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 }
 
-static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@@ -4431,9 +4673,9 @@
 	return moder;
 }
 
-static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
 {
-	struct net_dim_cq_moder moder;
+	struct dim_cq_moder moder;
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
@@ -4447,8 +4689,8 @@
 static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
 {
 	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
-		NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
-		NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+		DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
@@ -4493,7 +4735,43 @@
 	return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
 }
 
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+			   struct mlx5e_params *params)
+{
+	/* Prefer Striding RQ, unless any of the following holds:
+	 * - Striding RQ configuration is not possible/supported.
+	 * - Slow PCI heuristic.
+	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+	 *
+	 * No XSK params: checking the availability of striding RQ in general.
+	 */
+	if (!slow_pci_heuristic(mdev) &&
+	    mlx5e_striding_rq_possible(mdev, params) &&
+	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+	     !mlx5e_rx_is_linear_skb(params, NULL)))
+		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+	mlx5e_set_rq_type(mdev, params);
+	mlx5e_init_rq_type_params(mdev, params);
+}
+
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+			    u16 num_channels)
+{
+	enum mlx5e_traffic_types tt;
+
+	rss_params->hfunc = ETH_RSS_HASH_TOP;
+	netdev_rss_key_fill(rss_params->toeplitz_hash_key,
+			    sizeof(rss_params->toeplitz_hash_key));
+	mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
+				      MLX5E_INDIR_RQT_SIZE, num_channels);
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+		rss_params->rx_hash_fields[tt] =
+			tirc_default_config[tt].rx_hash_fields;
+}
+
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+			    struct mlx5e_xsk *xsk,
+			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
 			    u16 max_channels, u16 mtu)
 {
@@ -4509,6 +4787,10 @@
 		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
+	/* XDP SQ */
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
+			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+
 	/* set CQE compression */
 	params->rx_cqe_compress_def = false;
 	if (MLX5_CAP_GEN(mdev, cqe_compression) &&
@@ -4516,27 +4798,19 @@
 		params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
 
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
 
 	/* RQ */
-	/* Prefer Striding RQ, unless any of the following holds:
-	 * - Striding RQ configuration is not possible/supported.
-	 * - Slow PCI heuristic.
-	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
-	 */
-	if (!slow_pci_heuristic(mdev) &&
-	    mlx5e_striding_rq_possible(mdev, params) &&
-	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
-	     !mlx5e_rx_is_linear_skb(mdev, params)))
-		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
-	mlx5e_set_rq_type(mdev, params);
-	mlx5e_init_rq_type_params(mdev, params);
+	mlx5e_build_rq_params(mdev, params);
 
 	/* HW LRO */
 
 	/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
-	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+		/* No XSK params: checking the availability of striding RQ in general. */
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
 			params->lro_en = !slow_pci_heuristic(mdev);
+	}
 	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
@@ -4549,47 +4823,22 @@
 	mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 
 	/* TX inline */
-	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
+	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 
 	/* RSS */
-	params->rss_hfunc = ETH_RSS_HASH_XOR;
-	netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
-	mlx5e_build_default_indir_rqt(params->indirection_rqt,
-				      MLX5E_INDIR_RQT_SIZE, max_channels);
-}
+	mlx5e_build_rss_params(rss_params, params->num_channels);
+	params->tunneled_offload_en =
+		mlx5e_tunnel_inner_ft_supported(mdev);
 
-static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
-					struct net_device *netdev,
-					const struct mlx5e_profile *profile,
-					void *ppriv)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	priv->mdev        = mdev;
-	priv->netdev      = netdev;
-	priv->profile     = profile;
-	priv->ppriv       = ppriv;
-	priv->msglevel    = MLX5E_MSG_LEVEL;
-	priv->max_opened_tc = 1;
-
-	mlx5e_build_nic_params(mdev, &priv->channels.params,
-			       profile->max_nch(mdev), netdev->mtu);
-
-	mutex_init(&priv->state_lock);
-
-	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
-	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
-	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
-	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
-
-	mlx5e_timestamp_init(priv);
+	/* AF_XDP */
+	params->xsk = xsk;
 }
 
 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+	mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
 	if (is_zero_ether_addr(netdev->dev_addr) &&
 	    !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
 		eth_hw_addr_random(netdev);
@@ -4597,12 +4846,6 @@
 	}
 }
 
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-static const struct switchdev_ops mlx5e_switchdev_ops = {
-	.switchdev_port_attr_get	= mlx5e_attr_get,
-};
-#endif
-
 static void mlx5e_build_nic_netdev(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4610,7 +4853,7 @@
 	bool fcs_supported;
 	bool fcs_enabled;
 
-	SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
+	SET_NETDEV_DEV(netdev, mdev->device);
 
 	netdev->netdev_ops = &mlx5e_netdev_ops;
 
@@ -4624,14 +4867,18 @@
 	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;
 
 	netdev->vlan_features    |= NETIF_F_SG;
-	netdev->vlan_features    |= NETIF_F_IP_CSUM;
-	netdev->vlan_features    |= NETIF_F_IPV6_CSUM;
+	netdev->vlan_features    |= NETIF_F_HW_CSUM;
 	netdev->vlan_features    |= NETIF_F_GRO;
 	netdev->vlan_features    |= NETIF_F_TSO;
 	netdev->vlan_features    |= NETIF_F_TSO6;
 	netdev->vlan_features    |= NETIF_F_RXCSUM;
 	netdev->vlan_features    |= NETIF_F_RXHASH;
 
+	netdev->mpls_features    |= NETIF_F_SG;
+	netdev->mpls_features    |= NETIF_F_HW_CSUM;
+	netdev->mpls_features    |= NETIF_F_TSO;
+	netdev->mpls_features    |= NETIF_F_TSO6;
+
 	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
 
@@ -4645,15 +4892,15 @@
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-	if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
-		netdev->hw_enc_features |= NETIF_F_IP_CSUM;
-		netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+	if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
+	    mlx5e_any_tunnel_proto_supported(mdev)) {
+		netdev->hw_enc_features |= NETIF_F_HW_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
 		netdev->hw_enc_features |= NETIF_F_TSO6;
 		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
 	}
 
-	if (mlx5_vxlan_allowed(mdev->vxlan)) {
+	if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
 		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
@@ -4661,7 +4908,7 @@
 		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
-	if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+	if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) {
 		netdev->hw_features     |= NETIF_F_GSO_GRE |
 					   NETIF_F_GSO_GRE_CSUM;
 		netdev->hw_enc_features |= NETIF_F_GSO_GRE |
@@ -4670,6 +4917,15 @@
 						NETIF_F_GSO_GRE_CSUM;
 	}
 
+	if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_IPIP)) {
+		netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
+				       NETIF_F_GSO_IPXIP6;
+		netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
+					   NETIF_F_GSO_IPXIP6;
+		netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
+						NETIF_F_GSO_IPXIP6;
+	}
+
 	netdev->hw_features	                 |= NETIF_F_GSO_PARTIAL;
 	netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
 	netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
@@ -4693,6 +4949,10 @@
 	if (!priv->channels.params.scatter_fcs_en)
 		netdev->features  &= ~NETIF_F_RXFCS;
 
+	/* prefere CQE compression over rxhash */
+	if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+		netdev->features &= ~NETIF_F_RXHASH;
+
 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
 	if (FT_CAP(flow_modify_en) &&
 	    FT_CAP(modify_root) &&
@@ -4712,17 +4972,11 @@
 	netdev->priv_flags       |= IFF_UNICAST_FLT;
 
 	mlx5e_set_netdev_dev_addr(netdev);
-
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-	if (MLX5_ESWITCH_MANAGER(mdev))
-		netdev->switchdev_ops = &mlx5e_switchdev_ops;
-#endif
-
 	mlx5e_ipsec_build_netdev(priv);
 	mlx5e_tls_build_netdev(priv);
 }
 
-static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+void mlx5e_create_q_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
@@ -4740,7 +4994,7 @@
 	}
 }
 
-static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 {
 	if (priv->q_counter)
 		mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
@@ -4749,15 +5003,24 @@
 		mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
 }
 
-static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
-			   struct net_device *netdev,
-			   const struct mlx5e_profile *profile,
-			   void *ppriv)
+static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
+			  struct net_device *netdev,
+			  const struct mlx5e_profile *profile,
+			  void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rss_params *rss = &priv->rss_params;
 	int err;
 
-	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+	err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+	if (err)
+		return err;
+
+	mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
+			       priv->max_nch, netdev->mtu);
+
+	mlx5e_timestamp_init(priv);
+
 	err = mlx5e_ipsec_init(priv);
 	if (err)
 		mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
@@ -4766,12 +5029,17 @@
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 	mlx5e_build_nic_netdev(netdev);
 	mlx5e_build_tc2txq_maps(priv);
+	mlx5e_health_create_reporters(priv);
+
+	return 0;
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+	mlx5e_health_destroy_reporters(priv);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
+	mlx5e_netdev_cleanup(priv->netdev, priv);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -4779,26 +5047,42 @@
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
+	mlx5e_create_q_counters(priv);
+
+	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+	if (err) {
+		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+		goto err_destroy_q_counters;
+	}
+
 	err = mlx5e_create_indirect_rqt(priv);
 	if (err)
-		return err;
+		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
-	err = mlx5e_create_indirect_tirs(priv);
+	err = mlx5e_create_indirect_tirs(priv, true);
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
+	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
+	if (unlikely(err))
+		goto err_destroy_direct_tirs;
+
+	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
+	if (unlikely(err))
+		goto err_destroy_xsk_rqts;
+
 	err = mlx5e_create_flow_steering(priv);
 	if (err) {
 		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-		goto err_destroy_direct_tirs;
+		goto err_destroy_xsk_tirs;
 	}
 
 	err = mlx5e_tc_nic_init(priv);
@@ -4809,14 +5093,22 @@
 
 err_destroy_flow_steering:
 	mlx5e_destroy_flow_steering(priv);
+err_destroy_xsk_tirs:
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+err_destroy_xsk_rqts:
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 err_destroy_indirect_tirs:
-	mlx5e_destroy_indirect_tirs(priv);
+	mlx5e_destroy_indirect_tirs(priv, true);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+	mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+	mlx5e_destroy_q_counters(priv);
 	return err;
 }
 
@@ -4824,10 +5116,14 @@
 {
 	mlx5e_tc_nic_cleanup(priv);
 	mlx5e_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv);
-	mlx5e_destroy_indirect_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_indirect_tirs(priv, true);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+	mlx5e_close_drop_rq(&priv->drop_rq);
+	mlx5e_destroy_q_counters(priv);
 }
 
 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
@@ -4850,7 +5146,6 @@
 {
 	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u16 max_mtu;
 
 	mlx5e_init_l2_addr(priv);
 
@@ -4858,19 +5153,16 @@
 	if (!netif_running(netdev))
 		mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
 
-	/* MTU range: 68 - hw-specific max */
-	netdev->min_mtu = ETH_MIN_MTU;
-	mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
-	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+	mlx5e_set_netdev_mtu_boundaries(priv);
 	mlx5e_set_dev_port_mtu(priv);
 
 	mlx5_lag_add(mdev, netdev);
 
 	mlx5e_enable_async_events(priv);
+	if (mlx5e_monitor_counter_supported(priv))
+		mlx5e_monitor_counter_init(priv);
 
-	if (MLX5_ESWITCH_MANAGER(priv->mdev))
-		mlx5e_register_vport_reps(priv);
-
+	mlx5e_hv_vhca_stats_create(priv);
 	if (netdev->reg_state != NETREG_REGISTERED)
 		return;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -4903,13 +5195,19 @@
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
-	if (MLX5_ESWITCH_MANAGER(priv->mdev))
-		mlx5e_unregister_vport_reps(priv);
+	mlx5e_hv_vhca_stats_destroy(priv);
+	if (mlx5e_monitor_counter_supported(priv))
+		mlx5e_monitor_counter_cleanup(priv);
 
 	mlx5e_disable_async_events(priv);
 	mlx5_lag_remove(mdev);
 }
 
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+	return mlx5e_refresh_tirs(priv, false);
+}
+
 static const struct mlx5e_profile mlx5e_nic_profile = {
 	.init		   = mlx5e_nic_init,
 	.cleanup	   = mlx5e_nic_cleanup,
@@ -4919,51 +5217,83 @@
 	.cleanup_tx	   = mlx5e_cleanup_nic_tx,
 	.enable		   = mlx5e_nic_enable,
 	.disable	   = mlx5e_nic_disable,
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = mlx5e_update_ndo_stats,
-	.max_nch	   = mlx5e_get_max_num_channels,
 	.update_carrier	   = mlx5e_update_carrier,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc		   = MLX5E_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
 };
 
 /* mlx5e generic netdev management API (move to en_common.c) */
 
+/* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */
+int mlx5e_netdev_init(struct net_device *netdev,
+		      struct mlx5e_priv *priv,
+		      struct mlx5_core_dev *mdev,
+		      const struct mlx5e_profile *profile,
+		      void *ppriv)
+{
+	/* priv init */
+	priv->mdev        = mdev;
+	priv->netdev      = netdev;
+	priv->profile     = profile;
+	priv->ppriv       = ppriv;
+	priv->msglevel    = MLX5E_MSG_LEVEL;
+	priv->max_nch     = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
+	priv->max_opened_tc = 1;
+
+	mutex_init(&priv->state_lock);
+	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
+	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+	priv->wq = create_singlethread_workqueue("mlx5e");
+	if (!priv->wq)
+		return -ENOMEM;
+
+	/* netdev init */
+	netif_carrier_off(netdev);
+
+#ifdef CONFIG_MLX5_EN_ARFS
+	netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(mdev);
+#endif
+
+	return 0;
+}
+
+void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
+{
+	destroy_workqueue(priv->wq);
+}
+
 struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 				       const struct mlx5e_profile *profile,
+				       int nch,
 				       void *ppriv)
 {
-	int nch = profile->max_nch(mdev);
 	struct net_device *netdev;
-	struct mlx5e_priv *priv;
+	int err;
 
 	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
 				    nch * profile->max_tc,
-				    nch);
+				    nch * profile->rq_groups);
 	if (!netdev) {
 		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
 		return NULL;
 	}
 
-#ifdef CONFIG_MLX5_EN_ARFS
-	netdev->rx_cpu_rmap = mdev->rmap;
-#endif
-
-	profile->init(mdev, netdev, profile, ppriv);
-
-	netif_carrier_off(netdev);
-
-	priv = netdev_priv(netdev);
-
-	priv->wq = create_singlethread_workqueue("mlx5e");
-	if (!priv->wq)
-		goto err_cleanup_nic;
+	err = profile->init(mdev, netdev, profile, ppriv);
+	if (err) {
+		mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err);
+		goto err_free_netdev;
+	}
 
 	return netdev;
 
-err_cleanup_nic:
-	if (profile->cleanup)
-		profile->cleanup(priv);
+err_free_netdev:
 	free_netdev(netdev);
 
 	return NULL;
@@ -4971,7 +5301,6 @@
 
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	const struct mlx5e_profile *profile;
 	int max_nch;
 	int err;
@@ -4984,7 +5313,7 @@
 	if (priv->channels.params.num_channels > max_nch) {
 		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
 		priv->channels.params.num_channels = max_nch;
-		mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+		mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
 					      MLX5E_INDIR_RQT_SIZE, max_nch);
 	}
 
@@ -4992,28 +5321,16 @@
 	if (err)
 		goto out;
 
-	mlx5e_create_q_counters(priv);
-
-	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
-	if (err) {
-		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-		goto err_destroy_q_counters;
-	}
-
 	err = profile->init_rx(priv);
 	if (err)
-		goto err_close_drop_rq;
+		goto err_cleanup_tx;
 
 	if (profile->enable)
 		profile->enable(priv);
 
 	return 0;
 
-err_close_drop_rq:
-	mlx5e_close_drop_rq(&priv->drop_rq);
-
-err_destroy_q_counters:
-	mlx5e_destroy_q_counters(priv);
+err_cleanup_tx:
 	profile->cleanup_tx(priv);
 
 out:
@@ -5031,10 +5348,8 @@
 	flush_workqueue(priv->wq);
 
 	profile->cleanup_rx(priv);
-	mlx5e_close_drop_rq(&priv->drop_rq);
-	mlx5e_destroy_q_counters(priv);
 	profile->cleanup_tx(priv);
-	cancel_delayed_work_sync(&priv->update_stats_work);
+	cancel_work_sync(&priv->update_stats_work);
 }
 
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
@@ -5042,7 +5357,6 @@
 	const struct mlx5e_profile *profile = priv->profile;
 	struct net_device *netdev = priv->netdev;
 
-	destroy_workqueue(priv->wq);
 	if (profile->cleanup)
 		profile->cleanup(priv);
 	free_netdev(netdev);
@@ -5078,6 +5392,11 @@
 	struct mlx5e_priv *priv = vpriv;
 	struct net_device *netdev = priv->netdev;
 
+#ifdef CONFIG_MLX5_ESWITCH
+	if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev)
+		return;
+#endif
+
 	if (!netif_device_present(netdev))
 		return;
 
@@ -5088,28 +5407,27 @@
 static void *mlx5e_add(struct mlx5_core_dev *mdev)
 {
 	struct net_device *netdev;
-	void *rpriv = NULL;
 	void *priv;
 	int err;
+	int nch;
 
 	err = mlx5e_check_required_hca_cap(mdev);
 	if (err)
 		return NULL;
 
 #ifdef CONFIG_MLX5_ESWITCH
-	if (MLX5_ESWITCH_MANAGER(mdev)) {
-		rpriv = mlx5e_alloc_nic_rep_priv(mdev);
-		if (!rpriv) {
-			mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
-			return NULL;
-		}
+	if (MLX5_ESWITCH_MANAGER(mdev) &&
+	    mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
+		mlx5e_rep_register_vport_reps(mdev);
+		return mdev;
 	}
 #endif
 
-	netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
+	nch = mlx5e_get_max_num_channels(mdev);
+	netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
 	if (!netdev) {
 		mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
-		goto err_free_rpriv;
+		return NULL;
 	}
 
 	priv = netdev_priv(netdev);
@@ -5135,30 +5453,26 @@
 	mlx5e_detach(mdev, priv);
 err_destroy_netdev:
 	mlx5e_destroy_netdev(priv);
-err_free_rpriv:
-	kfree(rpriv);
 	return NULL;
 }
 
 static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
 {
-	struct mlx5e_priv *priv = vpriv;
-	void *ppriv = priv->ppriv;
+	struct mlx5e_priv *priv;
 
+#ifdef CONFIG_MLX5_ESWITCH
+	if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
+		mlx5e_rep_unregister_vport_reps(mdev);
+		return;
+	}
+#endif
+	priv = vpriv;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_delete_app(priv);
 #endif
 	unregister_netdev(priv->netdev);
 	mlx5e_detach(mdev, vpriv);
 	mlx5e_destroy_netdev(priv);
-	kfree(ppriv);
-}
-
-static void *mlx5e_get_netdev(void *vpriv)
-{
-	struct mlx5e_priv *priv = vpriv;
-
-	return priv->netdev;
 }
 
 static struct mlx5_interface mlx5e_interface = {
@@ -5166,9 +5480,7 @@
 	.remove    = mlx5e_remove,
 	.attach    = mlx5e_attach,
 	.detach    = mlx5e_detach,
-	.event     = mlx5e_async_event,
 	.protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
-	.get_dev   = mlx5e_get_netdev,
 };
 
 void mlx5e_init(void)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c9cc974..cd9bb7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

@@ -37,26 +37,58 @@
 #include <net/act_api.h>
 #include <net/netevent.h>
 #include <net/arp.h>
+#include <net/devlink.h>
+#include <net/ipv6_stubs.h>
 
 #include "eswitch.h"
 #include "en.h"
 #include "en_rep.h"
 #include "en_tc.h"
+#include "en/tc_tun.h"
 #include "fs_core.h"
+#include "lib/port_tun.h"
+#define CREATE_TRACE_POINTS
+#include "diag/en_rep_tracepoint.h"
 
-#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
-	max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
-#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
-	max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
+        max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
 
 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
 
+struct mlx5e_rep_indr_block_priv {
+	struct net_device *netdev;
+	struct mlx5e_rep_priv *rpriv;
+
+	struct list_head list;
+};
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+					    struct net_device *netdev);
+
 static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 				  struct ethtool_drvinfo *drvinfo)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
 	strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
 		sizeof(drvinfo->driver));
 	strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%04d (%.16s)",
+		 fw_rev_maj(mdev), fw_rev_min(mdev),
+		 fw_rev_sub(mdev), mdev->board_id);
+}
+
+static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
+					 struct ethtool_drvinfo *drvinfo)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	mlx5e_rep_get_drvinfo(dev, drvinfo);
+	strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
+		sizeof(drvinfo->bus_info));
 }
 
 static const struct counter_desc sw_rep_stats_desc[] = {
@@ -123,29 +155,34 @@
 	vport_stats->tx_bytes   = vf_stats.rx_bytes;
 }
 
+static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+	struct rtnl_link_stats64 *vport_stats;
+
+	mlx5e_grp_802_3_update_stats(priv);
+
+	vport_stats = &priv->stats.vf_vport;
+
+	vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+	vport_stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
+	vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+	vport_stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+}
+
 static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
-	struct mlx5e_rq_stats *rq_stats;
-	struct mlx5e_sq_stats *sq_stats;
-	int i, j;
+	struct rtnl_link_stats64 stats64 = {};
 
 	memset(s, 0, sizeof(*s));
-	for (i = 0; i < priv->channels.num; i++) {
-		struct mlx5e_channel *c = priv->channels.c[i];
+	mlx5e_fold_sw_stats64(priv, &stats64);
 
-		rq_stats = c->rq.stats;
-
-		s->rx_packets	+= rq_stats->packets;
-		s->rx_bytes	+= rq_stats->bytes;
-
-		for (j = 0; j < priv->channels.params.num_tc; j++) {
-			sq_stats = c->sq[j].stats;
-
-			s->tx_packets		+= sq_stats->packets;
-			s->tx_bytes		+= sq_stats->bytes;
-		}
-	}
+	s->rx_packets = stats64.rx_packets;
+	s->rx_bytes   = stats64.rx_bytes;
+	s->tx_packets = stats64.tx_packets;
+	s->tx_bytes   = stats64.tx_bytes;
+	s->tx_queue_dropped = stats64.tx_dropped;
 }
 
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
@@ -158,9 +195,8 @@
 		return;
 
 	mutex_lock(&priv->state_lock);
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-		mlx5e_rep_update_sw_counters(priv);
-	mlx5e_rep_update_hw_counters(priv);
+	mlx5e_rep_update_sw_counters(priv);
+	priv->profile->update_stats(priv);
 	mutex_unlock(&priv->state_lock);
 
 	for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
@@ -182,34 +218,191 @@
 	}
 }
 
+static void mlx5e_rep_get_ringparam(struct net_device *dev,
+				struct ethtool_ringparam *param)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+static int mlx5e_rep_set_ringparam(struct net_device *dev,
+			       struct ethtool_ringparam *param)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv,
+					   struct mlx5_flow_destination *dest)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct mlx5_flow_handle *flow_rule;
+
+	flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+						      rep->vport,
+						      dest);
+	if (IS_ERR(flow_rule))
+		return PTR_ERR(flow_rule);
+
+	mlx5_del_flow_rules(rpriv->vport_rx_rule);
+	rpriv->vport_rx_rule = flow_rule;
+	return 0;
+}
+
+static void mlx5e_rep_get_channels(struct net_device *dev,
+				   struct ethtool_channels *ch)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5e_rep_set_channels(struct net_device *dev,
+				  struct ethtool_channels *ch)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	u16 curr_channels_amount = priv->channels.params.num_channels;
+	u32 new_channels_amount = ch->combined_count;
+	struct mlx5_flow_destination new_dest;
+	int err = 0;
+
+	err = mlx5e_ethtool_set_channels(priv, ch);
+	if (err)
+		return err;
+
+	if (curr_channels_amount == 1 && new_channels_amount > 1) {
+		new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		new_dest.ft = priv->fs.ttc.ft.t;
+	} else if (new_channels_amount == 1 && curr_channels_amount > 1) {
+		new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		new_dest.tir_num = priv->direct_tir[0].tirn;
+	} else {
+		return 0;
+	}
+
+	err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest);
+	if (err) {
+		netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n",
+			    curr_channels_amount, new_channels_amount);
+		return err;
+	}
+
+	return 0;
+}
+
+static int mlx5e_rep_get_coalesce(struct net_device *netdev,
+				  struct ethtool_coalesce *coal)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5e_rep_set_coalesce(struct net_device *netdev,
+				  struct ethtool_coalesce *coal)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
+static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_rxfh_key_size(priv);
+}
+
+static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_rxfh_indir_size(priv);
+}
+
+static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
+					    struct ethtool_pauseparam *pauseparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
+					   struct ethtool_pauseparam *pauseparam)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
+					       struct ethtool_link_ksettings *link_ksettings)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
+static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
+					       const struct ethtool_link_ksettings *link_ksettings)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
 static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.get_drvinfo	   = mlx5e_rep_get_drvinfo,
 	.get_link	   = ethtool_op_get_link,
 	.get_strings       = mlx5e_rep_get_strings,
 	.get_sset_count    = mlx5e_rep_get_sset_count,
 	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+	.get_ringparam     = mlx5e_rep_get_ringparam,
+	.set_ringparam     = mlx5e_rep_set_ringparam,
+	.get_channels      = mlx5e_rep_get_channels,
+	.set_channels      = mlx5e_rep_set_channels,
+	.get_coalesce      = mlx5e_rep_get_coalesce,
+	.set_coalesce      = mlx5e_rep_set_coalesce,
+	.get_rxfh_key_size   = mlx5e_rep_get_rxfh_key_size,
+	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
 };
 
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
+	.get_drvinfo	   = mlx5e_uplink_rep_get_drvinfo,
+	.get_link	   = ethtool_op_get_link,
+	.get_strings       = mlx5e_rep_get_strings,
+	.get_sset_count    = mlx5e_rep_get_sset_count,
+	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+	.get_ringparam     = mlx5e_rep_get_ringparam,
+	.set_ringparam     = mlx5e_rep_set_ringparam,
+	.get_channels      = mlx5e_rep_get_channels,
+	.set_channels      = mlx5e_rep_set_channels,
+	.get_coalesce      = mlx5e_rep_get_coalesce,
+	.set_coalesce      = mlx5e_rep_set_coalesce,
+	.get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
+	.set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
+	.get_rxfh_key_size   = mlx5e_rep_get_rxfh_key_size,
+	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+	.get_pauseparam    = mlx5e_uplink_rep_get_pauseparam,
+	.set_pauseparam    = mlx5e_uplink_rep_set_pauseparam,
+};
+
+static void mlx5e_rep_get_port_parent_id(struct net_device *dev,
+					 struct netdev_phys_item_id *ppid)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_priv *priv;
+	u64 parent_id;
 
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	priv = netdev_priv(dev);
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = ETH_ALEN;
-		ether_addr_copy(attr->u.ppid.id, rep->hw_id);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
+	parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
+	ppid->id_len = sizeof(parent_id);
+	memcpy(ppid->id, &parent_id, sizeof(parent_id));
 }
 
 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
@@ -218,7 +411,7 @@
 	struct mlx5e_rep_sq *rep_sq, *tmp;
 	struct mlx5e_rep_priv *rpriv;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return;
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -239,7 +432,7 @@
 	int err;
 	int i;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return 0;
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -307,16 +500,18 @@
 	mlx5e_sqs2vport_stop(esw, rep);
 }
 
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+	return ~0UL;
+}
+
 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
 {
-#if IS_ENABLED(CONFIG_IPV6)
-	unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms,
-						DELAY_PROBE_TIME);
-#else
-	unsigned long ipv6_interval = ~0UL;
-#endif
-	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
-						DELAY_PROBE_TIME);
+	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -334,54 +529,111 @@
 				 neigh_update->min_interval);
 }
 
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+	return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+	if (refcount_dec_and_test(&nhe->refcnt)) {
+		mlx5e_rep_neigh_entry_remove(nhe);
+		kfree_rcu(nhe, rcu);
+	}
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+		   struct mlx5e_neigh_hash_entry *nhe)
+{
+	struct mlx5e_neigh_hash_entry *next = NULL;
+
+	rcu_read_lock();
+
+	for (next = nhe ?
+		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					   &nhe->neigh_list,
+					   struct mlx5e_neigh_hash_entry,
+					   neigh_list) :
+		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					    struct mlx5e_neigh_hash_entry,
+					    neigh_list);
+	     next;
+	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					  &next->neigh_list,
+					  struct mlx5e_neigh_hash_entry,
+					  neigh_list))
+		if (mlx5e_rep_neigh_entry_hold(next))
+			break;
+
+	rcu_read_unlock();
+
+	if (nhe)
+		mlx5e_rep_neigh_entry_release(nhe);
+
+	return next;
+}
+
 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
 {
 	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
 						    neigh_update.neigh_stats_work.work);
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_neigh_hash_entry *nhe;
+	struct mlx5e_neigh_hash_entry *nhe = NULL;
 
 	rtnl_lock();
 	if (!list_empty(&rpriv->neigh_update.neigh_list))
 		mlx5e_rep_queue_neigh_stats_work(priv);
 
-	list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
+	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
 		mlx5e_tc_update_neigh_used_value(nhe);
 
 	rtnl_unlock();
 }
 
-static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
-{
-	refcount_inc(&nhe->refcnt);
-}
-
-static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
-{
-	if (refcount_dec_and_test(&nhe->refcnt))
-		kfree(nhe);
-}
-
 static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 				   struct mlx5e_encap_entry *e,
 				   bool neigh_connected,
 				   unsigned char ha[ETH_ALEN])
 {
 	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	bool encap_connected;
+	LIST_HEAD(flow_list);
 
 	ASSERT_RTNL();
 
-	if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
-	    !ether_addr_equal(e->h_dest, ha))
-		mlx5e_tc_encap_flows_del(priv, e);
+	/* wait for encap to be fully initialized */
+	wait_for_completion(&e->res_ready);
+
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+				    ether_addr_equal(e->h_dest, ha)))
+		goto unlock;
+
+	mlx5e_take_all_encap_flows(e, &flow_list);
+
+	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
 
 	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
 		ether_addr_copy(e->h_dest, ha);
 		ether_addr_copy(eth->h_dest, ha);
+		/* Update the encap source mac, in case that we delete
+		 * the flows when encap source mac changed.
+		 */
+		ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
 
-		mlx5e_tc_encap_flows_add(priv, e);
+		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
 	}
+unlock:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+	mlx5e_put_encap_flow_list(priv, &flow_list);
 }
 
 static void mlx5e_rep_neigh_update(struct work_struct *work)
@@ -393,7 +645,6 @@
 	unsigned char ha[ETH_ALEN];
 	struct mlx5e_priv *priv;
 	bool neigh_connected;
-	bool encap_connected;
 	u8 nud_state, dead;
 
 	rtnl_lock();
@@ -411,19 +662,242 @@
 
 	neigh_connected = (nud_state & NUD_VALID) && !dead;
 
-	list_for_each_entry(e, &nhe->encap_list, encap_list) {
-		encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-		priv = netdev_priv(e->out_dev);
+	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
 
-		if (encap_connected != neigh_connected ||
-		    !ether_addr_equal(e->h_dest, ha))
-			mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+	list_for_each_entry(e, &nhe->encap_list, encap_list) {
+		if (!mlx5e_encap_take(e))
+			continue;
+
+		priv = netdev_priv(e->out_dev);
+		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+		mlx5e_encap_put(priv, e);
 	}
 	mlx5e_rep_neigh_entry_release(nhe);
 	rtnl_unlock();
 	neigh_release(n);
 }
 
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+				 struct net_device *netdev)
+{
+	struct mlx5e_rep_indr_block_priv *cb_priv;
+
+	/* All callback list access should be protected by RTNL. */
+	ASSERT_RTNL();
+
+	list_for_each_entry(cb_priv,
+			    &rpriv->uplink_priv.tc_indr_block_priv_list,
+			    list)
+		if (cb_priv->netdev == netdev)
+			return cb_priv;
+
+	return NULL;
+}
+
+static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+	struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+	list_for_each_entry_safe(cb_priv, temp, head, list) {
+		mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
+		kfree(cb_priv);
+	}
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+		       struct flow_cls_offload *flower,
+		       struct mlx5e_rep_indr_block_priv *indr_priv)
+{
+	unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+	int err = 0;
+
+	switch (flower->command) {
+	case FLOW_CLS_REPLACE:
+		err = mlx5e_configure_flower(netdev, priv, flower, flags);
+		break;
+	case FLOW_CLS_DESTROY:
+		err = mlx5e_delete_flower(netdev, priv, flower, flags);
+		break;
+	case FLOW_CLS_STATS:
+		err = mlx5e_stats_flower(netdev, priv, flower, flags);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
+					 void *type_data, void *indr_priv)
+{
+	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv)
+{
+	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+	list_del(&indr_priv->list);
+	kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static int
+mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
+			      struct mlx5e_rep_priv *rpriv,
+			      struct flow_block_offload *f)
+{
+	struct mlx5e_rep_indr_block_priv *indr_priv;
+	struct flow_block_cb *block_cb;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	f->unlocked_driver_cb = true;
+	f->driver_block_list = &mlx5e_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+		if (indr_priv)
+			return -EEXIST;
+
+		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+		if (!indr_priv)
+			return -ENOMEM;
+
+		indr_priv->netdev = netdev;
+		indr_priv->rpriv = rpriv;
+		list_add(&indr_priv->list,
+			 &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+		block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
+					       indr_priv, indr_priv,
+					       mlx5e_rep_indr_tc_block_unbind);
+		if (IS_ERR(block_cb)) {
+			list_del(&indr_priv->list);
+			kfree(indr_priv);
+			return PTR_ERR(block_cb);
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+		if (!indr_priv)
+			return -ENOENT;
+
+		block_cb = flow_block_cb_lookup(f->block,
+						mlx5e_rep_indr_setup_block_cb,
+						indr_priv);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+			       enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
+						      type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+					 struct net_device *netdev)
+{
+	int err;
+
+	err = __flow_indr_block_cb_register(netdev, rpriv,
+					    mlx5e_rep_indr_setup_tc_cb,
+					    rpriv);
+	if (err) {
+		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+		mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+			      netdev_name(netdev), err);
+	}
+	return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+					    struct net_device *netdev)
+{
+	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+					rpriv);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+					 unsigned long event, void *ptr)
+{
+	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+						     uplink_priv.netdevice_nb);
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
+		return NOTIFY_OK;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		mlx5e_rep_indr_register_block(rpriv, netdev);
+		break;
+	case NETDEV_UNREGISTER:
+		mlx5e_rep_indr_unregister_block(rpriv, netdev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static void
+mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
+				  struct mlx5e_neigh_hash_entry *nhe,
+				  struct neighbour *n)
+{
+	/* Take a reference to ensure the neighbour and mlx5 encap
+	 * entry won't be destructed until we drop the reference in
+	 * delayed work.
+	 */
+	neigh_hold(n);
+
+	/* This assignment is valid as long as the the neigh reference
+	 * is taken
+	 */
+	nhe->n = n;
+
+	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+		mlx5e_rep_neigh_entry_release(nhe);
+		neigh_release(n);
+	}
+}
+
 static struct mlx5e_neigh_hash_entry *
 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
 			     struct mlx5e_neigh *m_neigh);
@@ -446,7 +920,7 @@
 	case NETEVENT_NEIGH_UPDATE:
 		n = ptr;
 #if IS_ENABLED(CONFIG_IPV6)
-		if (n->tbl != &nd_tbl && n->tbl != &arp_tbl)
+		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
 #else
 		if (n->tbl != &arp_tbl)
 #endif
@@ -456,34 +930,13 @@
 		m_neigh.family = n->ops->family;
 		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
 
-		/* We are in atomic context and can't take RTNL mutex, so use
-		 * spin_lock_bh to lookup the neigh table. bh is used since
-		 * netevent can be called from a softirq context.
-		 */
-		spin_lock_bh(&neigh_update->encap_lock);
+		rcu_read_lock();
 		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
-		if (!nhe) {
-			spin_unlock_bh(&neigh_update->encap_lock);
+		rcu_read_unlock();
+		if (!nhe)
 			return NOTIFY_DONE;
-		}
 
-		/* This assignment is valid as long as the the neigh reference
-		 * is taken
-		 */
-		nhe->n = n;
-
-		/* Take a reference to ensure the neighbour and mlx5 encap
-		 * entry won't be destructed until we drop the reference in
-		 * delayed work.
-		 */
-		neigh_hold(n);
-		mlx5e_rep_neigh_entry_hold(nhe);
-
-		if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
-			mlx5e_rep_neigh_entry_release(nhe);
-			neigh_release(n);
-		}
-		spin_unlock_bh(&neigh_update->encap_lock);
+		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
 		break;
 
 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
@@ -494,25 +947,21 @@
 		 * done per device delay prob time parameter.
 		 */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl))
+		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
 #else
 		if (!p->dev || p->tbl != &arp_tbl)
 #endif
 			return NOTIFY_DONE;
 
-		/* We are in atomic context and can't take RTNL mutex,
-		 * so use spin_lock_bh to walk the neigh list and look for
-		 * the relevant device. bh is used since netevent can be
-		 * called from a softirq context.
-		 */
-		spin_lock_bh(&neigh_update->encap_lock);
-		list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+					neigh_list) {
 			if (p->dev == nhe->m_neigh.dev) {
 				found = true;
 				break;
 			}
 		}
-		spin_unlock_bh(&neigh_update->encap_lock);
+		rcu_read_unlock();
 		if (!found)
 			return NOTIFY_DONE;
 
@@ -543,7 +992,7 @@
 		return err;
 
 	INIT_LIST_HEAD(&neigh_update->neigh_list);
-	spin_lock_init(&neigh_update->encap_lock);
+	mutex_init(&neigh_update->encap_lock);
 	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
 			  mlx5e_rep_neigh_stats_work);
 	mlx5e_rep_neigh_update_init_interval(rpriv);
@@ -570,6 +1019,7 @@
 
 	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
 
+	mutex_destroy(&neigh_update->encap_lock);
 	rhashtable_destroy(&neigh_update->neigh_ht);
 }
 
@@ -585,28 +1035,27 @@
 	if (err)
 		return err;
 
-	list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
 
 	return err;
 }
 
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
-					 struct mlx5e_neigh_hash_entry *nhe)
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
 {
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
 
-	spin_lock_bh(&rpriv->neigh_update.encap_lock);
+	mutex_lock(&rpriv->neigh_update.encap_lock);
 
-	list_del(&nhe->neigh_list);
+	list_del_rcu(&nhe->neigh_list);
 
 	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
 			       &nhe->rhash_node,
 			       mlx5e_neigh_ht_params);
-	spin_unlock_bh(&rpriv->neigh_update.encap_lock);
+	mutex_unlock(&rpriv->neigh_update.encap_lock);
 }
 
-/* This function must only be called under RTNL lock or under the
- * representor's encap_lock in case RTNL mutex can't be held.
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
  */
 static struct mlx5e_neigh_hash_entry *
 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
@@ -614,9 +1063,11 @@
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+	struct mlx5e_neigh_hash_entry *nhe;
 
-	return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
-				      mlx5e_neigh_ht_params);
+	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+				     mlx5e_neigh_ht_params);
+	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
 }
 
 static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
@@ -629,8 +1080,10 @@
 	if (!*nhe)
 		return -ENOMEM;
 
+	(*nhe)->priv = priv;
 	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
 	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+	spin_lock_init(&(*nhe)->encap_list_lock);
 	INIT_LIST_HEAD(&(*nhe)->encap_list);
 	refcount_set(&(*nhe)->refcnt, 1);
 
@@ -644,45 +1097,58 @@
 	return err;
 }
 
-static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
-					  struct mlx5e_neigh_hash_entry *nhe)
-{
-	/* The neigh hash entry must be removed from the hash table regardless
-	 * of the reference count value, so it won't be found by the next
-	 * neigh notification call. The neigh hash entry reference count is
-	 * incremented only during creation and neigh notification calls and
-	 * protects from freeing the nhe struct.
-	 */
-	mlx5e_rep_neigh_entry_remove(priv, nhe);
-	mlx5e_rep_neigh_entry_release(nhe);
-}
-
 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
 				 struct mlx5e_encap_entry *e)
 {
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
 	struct mlx5e_neigh_hash_entry *nhe;
 	int err;
 
+	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+	if (err)
+		return err;
+
+	mutex_lock(&rpriv->neigh_update.encap_lock);
 	nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
 	if (!nhe) {
 		err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
-		if (err)
+		if (err) {
+			mutex_unlock(&rpriv->neigh_update.encap_lock);
+			mlx5_tun_entropy_refcount_dec(tun_entropy,
+						      e->reformat_type);
 			return err;
+		}
 	}
-	list_add(&e->encap_list, &nhe->encap_list);
+
+	e->nhe = nhe;
+	spin_lock(&nhe->encap_list_lock);
+	list_add_rcu(&e->encap_list, &nhe->encap_list);
+	spin_unlock(&nhe->encap_list_lock);
+
+	mutex_unlock(&rpriv->neigh_update.encap_lock);
+
 	return 0;
 }
 
 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 				  struct mlx5e_encap_entry *e)
 {
-	struct mlx5e_neigh_hash_entry *nhe;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
 
-	list_del(&e->encap_list);
-	nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+	if (!e->nhe)
+		return;
 
-	if (list_empty(&nhe->encap_list))
-		mlx5e_rep_neigh_entry_destroy(priv, nhe);
+	spin_lock(&e->nhe->encap_list_lock);
+	list_del_rcu(&e->encap_list);
+	spin_unlock(&e->nhe->encap_list_lock);
+
+	mlx5e_rep_neigh_entry_release(e->nhe);
+	e->nhe = NULL;
+	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
 }
 
 static int mlx5e_rep_open(struct net_device *dev)
@@ -699,7 +1165,8 @@
 
 	if (!mlx5_modify_vport_admin_state(priv->mdev,
 					   MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					   rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+					   rep->vport, 1,
+					   MLX5_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
 unlock:
@@ -717,54 +1184,44 @@
 	mutex_lock(&priv->state_lock);
 	mlx5_modify_vport_admin_state(priv->mdev,
 				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-				      rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+				      rep->vport, 1,
+				      MLX5_VPORT_ADMIN_STATE_DOWN);
 	ret = mlx5e_close_locked(dev);
 	mutex_unlock(&priv->state_lock);
 	return ret;
 }
 
-static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
-					char *buf, size_t len)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
-	int ret;
-
-	ret = snprintf(buf, len, "%d", rep->vport - 1);
-	if (ret >= len)
-		return -EOPNOTSUPP;
-
-	return 0;
-}
-
 static int
 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
-			      struct tc_cls_flower_offload *cls_flower, int flags)
+			      struct flow_cls_offload *cls_flower, int flags)
 {
 	switch (cls_flower->command) {
-	case TC_CLSFLOWER_REPLACE:
-		return mlx5e_configure_flower(priv, cls_flower, flags);
-	case TC_CLSFLOWER_DESTROY:
-		return mlx5e_delete_flower(priv, cls_flower, flags);
-	case TC_CLSFLOWER_STATS:
-		return mlx5e_stats_flower(priv, cls_flower, flags);
+	case FLOW_CLS_REPLACE:
+		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+					      flags);
+	case FLOW_CLS_DESTROY:
+		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+					   flags);
+	case FLOW_CLS_STATS:
+		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+					  flags);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
-				       void *cb_priv)
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+				    struct tc_cls_matchall_offload *ma)
 {
-	struct mlx5e_priv *priv = cb_priv;
-
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+	switch (ma->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return mlx5e_tc_configure_matchall(priv, ma);
+	case TC_CLSMATCHALL_DESTROY:
+		return mlx5e_tc_delete_matchall(priv, ma);
+	case TC_CLSMATCHALL_STATS:
+		mlx5e_tc_stats_matchall(priv, ma);
+		return 0;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -773,45 +1230,34 @@
 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 				 void *cb_priv)
 {
+	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
 	struct mlx5e_priv *priv = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
-		return -EOPNOTSUPP;
-
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+	case TC_SETUP_CLSMATCHALL:
+		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-static int mlx5e_rep_setup_tc_block(struct net_device *dev,
-				    struct tc_block_offload *f)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
-					     priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(mlx5e_rep_block_cb_list);
 
 static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct flow_block_offload *f = type_data;
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return mlx5e_rep_setup_tc_block(dev, type_data);
+		f->unlocked_driver_cb = true;
+		return flow_block_cb_setup_simple(type_data,
+						  &mlx5e_rep_block_cb_list,
+						  mlx5e_rep_setup_tc_cb,
+						  priv, priv, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -819,43 +1265,23 @@
 
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
 
 	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
 		return false;
 
-	rep = rpriv->rep;
-	if (esw->mode == SRIOV_OFFLOADS &&
-	    rep && rep->vport == FDB_UPLINK_VPORT)
-		return true;
-
-	return false;
-}
-
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep;
-
-	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+	if (!rpriv) /* non vport rep mlx5e instances don't use this field */
 		return false;
 
 	rep = rpriv->rep;
-	if (rep && rep->vport != FDB_UPLINK_VPORT)
-		return true;
-
-	return false;
+	return (rep->vport == MLX5_VPORT_UPLINK);
 }
 
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
-		if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
 			return true;
 	}
 
@@ -867,22 +1293,13 @@
 		     struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
 
-	mlx5e_rep_update_sw_counters(priv);
-
-	stats->rx_packets = sstats->rx_packets;
-	stats->rx_bytes   = sstats->rx_bytes;
-	stats->tx_packets = sstats->tx_packets;
-	stats->tx_bytes   = sstats->tx_bytes;
-
-	stats->tx_dropped = sstats->tx_queue_dropped;
-
+	mlx5e_fold_sw_stats64(priv, stats);
 	return 0;
 }
 
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
-			    void *sp)
+static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+				       void *sp)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -898,135 +1315,295 @@
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
 	/* update HW stats in background for next time */
-	queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+	mlx5e_queue_update_stats(priv);
 	memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
 }
 
-static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
-	.switchdev_port_attr_get	= mlx5e_attr_get,
-};
-
-static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
+static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	return mlx5e_change_mtu(netdev, new_mtu, NULL);
 }
 
+static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
+{
+	struct sockaddr *saddr = addr;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+	return 0;
+}
+
+static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+					__be16 vlan_proto)
+{
+	netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
+
+	if (vlan != 0)
+		return -EOPNOTSUPP;
+
+	/* allow setting 0-vid for compatibility with libvirt */
+	return 0;
+}
+
+static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	return &rpriv->dl_port;
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_open                = mlx5e_rep_open,
 	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_setup_tc,
+	.ndo_get_devlink_port = mlx5e_get_devlink_port,
 	.ndo_get_stats64         = mlx5e_rep_get_stats,
-	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
-	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
-	.ndo_change_mtu          = mlx5e_change_rep_mtu,
+	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
+	.ndo_change_mtu          = mlx5e_rep_change_mtu,
 };
 
-static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params, u16 mtu)
+static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
+	.ndo_open                = mlx5e_open,
+	.ndo_stop                = mlx5e_close,
+	.ndo_start_xmit          = mlx5e_xmit,
+	.ndo_set_mac_address     = mlx5e_uplink_rep_set_mac,
+	.ndo_setup_tc            = mlx5e_rep_setup_tc,
+	.ndo_get_devlink_port = mlx5e_get_devlink_port,
+	.ndo_get_stats64         = mlx5e_get_stats,
+	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
+	.ndo_change_mtu          = mlx5e_uplink_rep_change_mtu,
+	.ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+	.ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+	.ndo_features_check      = mlx5e_features_check,
+	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
+	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
+	.ndo_get_vf_config       = mlx5e_get_vf_config,
+	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_set_vf_vlan         = mlx5e_uplink_rep_set_vf_vlan,
+	.ndo_set_features        = mlx5e_set_features,
+};
+
+bool mlx5e_eswitch_rep(struct net_device *netdev)
 {
+	if (netdev->netdev_ops == &mlx5e_netdev_ops_rep ||
+	    netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
+		return true;
+
+	return false;
+}
+
+static void mlx5e_build_rep_params(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_params *params;
+
 	u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
+	params = &priv->channels.params;
 	params->hard_mtu    = MLX5E_ETH_HARD_MTU;
-	params->sw_mtu      = mtu;
-	params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
-	params->rq_wq_type  = MLX5_WQ_TYPE_CYCLIC;
-	params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
+	params->sw_mtu      = netdev->mtu;
 
+	/* SQ */
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+	else
+		params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
+
+	/* RQ */
+	mlx5e_build_rq_params(mdev, params);
+
+	/* CQ moderation params */
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
 
 	params->num_tc                = 1;
-	params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+	params->tunneled_offload_en = false;
 
 	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+
+	/* RSS */
+	mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u16 max_mtu;
 
-	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+	if (rep->vport == MLX5_VPORT_UPLINK) {
+		SET_NETDEV_DEV(netdev, mdev->device);
+		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
+		/* we want a persistent mac for the uplink rep */
+		mlx5_query_mac_address(mdev, netdev->dev_addr);
+		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+		if (MLX5_CAP_GEN(mdev, qos))
+			netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+	} else {
+		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+		eth_hw_addr_random(netdev);
+		netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
+	}
 
 	netdev->watchdog_timeo    = 15 * HZ;
 
-	netdev->ethtool_ops	  = &mlx5e_rep_ethtool_ops;
+	netdev->features       |= NETIF_F_NETNS_LOCAL;
 
-	netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
+	netdev->hw_features    |= NETIF_F_HW_TC;
+	netdev->hw_features    |= NETIF_F_SG;
+	netdev->hw_features    |= NETIF_F_IP_CSUM;
+	netdev->hw_features    |= NETIF_F_IPV6_CSUM;
+	netdev->hw_features    |= NETIF_F_GRO;
+	netdev->hw_features    |= NETIF_F_TSO;
+	netdev->hw_features    |= NETIF_F_TSO6;
+	netdev->hw_features    |= NETIF_F_RXCSUM;
 
-	netdev->features	 |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
-	netdev->hw_features      |= NETIF_F_HW_TC;
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+	else
+		netdev->features |= NETIF_F_VLAN_CHALLENGED;
 
-	eth_hw_addr_random(netdev);
-
-	netdev->min_mtu = ETH_MIN_MTU;
-	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
-	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+	netdev->features |= netdev->hw_features;
 }
 
-static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
-			   struct net_device *netdev,
-			   const struct mlx5e_profile *profile,
-			   void *ppriv)
+static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
+			  struct net_device *netdev,
+			  const struct mlx5e_profile *profile,
+			  void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	int err;
 
-	priv->mdev                         = mdev;
-	priv->netdev                       = netdev;
-	priv->profile                      = profile;
-	priv->ppriv                        = ppriv;
+	err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+	if (err)
+		return err;
 
-	mutex_init(&priv->state_lock);
+	priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
 
-	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
-
-	priv->channels.params.num_channels = profile->max_nch(mdev);
-
-	mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+	mlx5e_build_rep_params(netdev);
 	mlx5e_build_rep_netdev(netdev);
 
 	mlx5e_timestamp_init(priv);
+
+	return 0;
 }
 
-static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
+{
+	mlx5e_netdev_cleanup(priv->netdev, priv);
+}
+
+static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
+{
+	struct ttc_params ttc_params = {};
+	int tt, err;
+
+	priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+					      MLX5_FLOW_NAMESPACE_KERNEL);
+
+	/* The inner_ttc in the ttc params is intentionally not set */
+	ttc_params.any_tt_tirn = priv->direct_tir[0].tirn;
+	mlx5e_set_ttc_ft_params(&ttc_params);
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+		ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+	err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+	if (err) {
+		netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+		return err;
+	}
+	return 0;
+}
+
+static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_flow_destination dest;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dest.tir_num = priv->direct_tir[0].tirn;
+	flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+						      rep->vport,
+						      &dest);
+	if (IS_ERR(flow_rule))
+		return PTR_ERR(flow_rule);
+	rpriv->vport_rx_rule = flow_rule;
+	return 0;
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
 	mlx5e_init_l2_addr(priv);
 
-	err = mlx5e_create_direct_rqts(priv);
-	if (err)
+	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+	if (err) {
+		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
 		return err;
+	}
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_indirect_rqt(priv);
+	if (err)
+		goto err_close_drop_rq;
+
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
+	if (err)
+		goto err_destroy_indirect_rqts;
+
+	err = mlx5e_create_indirect_tirs(priv, false);
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
-						      rep->vport,
-						      priv->direct_tir[0].tirn);
-	if (IS_ERR(flow_rule)) {
-		err = PTR_ERR(flow_rule);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
+	if (err)
+		goto err_destroy_indirect_tirs;
+
+	err = mlx5e_create_rep_ttc_table(priv);
+	if (err)
 		goto err_destroy_direct_tirs;
-	}
-	rpriv->vport_rx_rule = flow_rule;
+
+	err = mlx5e_create_rep_vport_rx_rule(priv);
+	if (err)
+		goto err_destroy_ttc_table;
 
 	return 0;
 
+err_destroy_ttc_table:
+	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+err_destroy_indirect_tirs:
+	mlx5e_destroy_indirect_tirs(priv, false);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+err_destroy_indirect_rqts:
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+	mlx5e_close_drop_rq(&priv->drop_rq);
 	return err;
 }
 
@@ -1035,12 +1612,18 @@
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
 	mlx5_del_flow_rules(rpriv->vport_rx_rule);
-	mlx5e_destroy_direct_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_indirect_tirs(priv, false);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+	mlx5e_close_drop_rq(&priv->drop_rq);
 }
 
 static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 {
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv;
 	int err;
 
 	err = mlx5e_create_tises(priv);
@@ -1048,92 +1631,245 @@
 		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
 		return err;
 	}
+
+	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+		uplink_priv = &rpriv->uplink_priv;
+
+		mutex_init(&uplink_priv->unready_flows_lock);
+		INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+		/* init shared tc flow table */
+		err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+		if (err)
+			goto destroy_tises;
+
+		mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
+		/* init indirect block notifications */
+		INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+		uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+		err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
+		if (err) {
+			mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
+			goto tc_esw_cleanup;
+		}
+	}
+
+	return 0;
+
+tc_esw_cleanup:
+	mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+destroy_tises:
+	mlx5e_destroy_tises(priv);
+	return err;
+}
+
+static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	mlx5e_destroy_tises(priv);
+
+	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+		/* clean indirect TC block notifications */
+		unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
+		mlx5e_rep_indr_clean_block_privs(rpriv);
+
+		/* delete shared tc flow table */
+		mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+		mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+	}
+}
+
+static void mlx5e_rep_enable(struct mlx5e_priv *priv)
+{
+	mlx5e_set_netdev_mtu_boundaries(priv);
+}
+
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
 	return 0;
 }
 
-static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
+static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
 {
-#define	MLX5E_PORT_REPRESENTOR_NCH 1
-	return MLX5E_PORT_REPRESENTOR_NCH;
+	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+
+	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+		struct mlx5_eqe *eqe = data;
+
+		switch (eqe->sub_type) {
+		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+			queue_work(priv->wq, &priv->update_carrier_work);
+			break;
+		default:
+			return NOTIFY_DONE;
+		}
+
+		return NOTIFY_OK;
+	}
+
+	if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
+		struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+		queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	u16 max_mtu;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+	mlx5e_set_dev_port_mtu(priv);
+
+	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+		  mlx5e_tc_reoffload_flows_work);
+
+	mlx5_lag_add(mdev, netdev);
+	priv->events_nb.notifier_call = uplink_rep_async_event;
+	mlx5_notifier_register(mdev, &priv->events_nb);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_initialize(priv);
+	mlx5e_dcbnl_init_app(priv);
+#endif
+}
+
+static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_delete_app(priv);
+#endif
+	mlx5_notifier_unregister(mdev, &priv->events_nb);
+	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+	mlx5_lag_remove(mdev);
 }
 
 static const struct mlx5e_profile mlx5e_rep_profile = {
 	.init			= mlx5e_init_rep,
+	.cleanup		= mlx5e_cleanup_rep,
 	.init_rx		= mlx5e_init_rep_rx,
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
-	.cleanup_tx		= mlx5e_cleanup_nic_tx,
+	.cleanup_tx		= mlx5e_cleanup_rep_tx,
+	.enable		        = mlx5e_rep_enable,
+	.update_rx		= mlx5e_update_rep_rx,
 	.update_stats           = mlx5e_rep_update_hw_counters,
-	.max_nch		= mlx5e_get_rep_max_num_channels,
-	.update_carrier		= NULL,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
-	.rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
+	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= 1,
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
-/* e-Switch vport representors */
+static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
+	.init			= mlx5e_init_rep,
+	.cleanup		= mlx5e_cleanup_rep,
+	.init_rx		= mlx5e_init_rep_rx,
+	.cleanup_rx		= mlx5e_cleanup_rep_rx,
+	.init_tx		= mlx5e_init_rep_tx,
+	.cleanup_tx		= mlx5e_cleanup_rep_tx,
+	.enable		        = mlx5e_uplink_rep_enable,
+	.disable	        = mlx5e_uplink_rep_disable,
+	.update_rx		= mlx5e_update_rep_rx,
+	.update_stats           = mlx5e_uplink_rep_update_hw_counters,
+	.update_carrier	        = mlx5e_update_carrier,
+	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
+	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+	.max_tc			= MLX5E_MAX_NUM_TC,
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
+};
 
-static int
-mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+static bool
+is_devlink_port_supported(const struct mlx5_core_dev *dev,
+			  const struct mlx5e_rep_priv *rpriv)
 {
-	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
-	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+	return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
+	       rpriv->rep->vport == MLX5_VPORT_PF ||
+	       mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
+}
 
-	int err;
+static unsigned int
+vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num)
+{
+	return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num;
+}
 
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		err = mlx5e_add_sqs_fwd_rules(priv);
-		if (err)
-			return err;
+static int register_devlink_port(struct mlx5_core_dev *dev,
+				 struct mlx5e_rep_priv *rpriv)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct netdev_phys_item_id ppid = {};
+	unsigned int dl_port_index = 0;
+
+	if (!is_devlink_port_supported(dev, rpriv))
+		return 0;
+
+	mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+
+	if (rep->vport == MLX5_VPORT_UPLINK) {
+		devlink_port_attrs_set(&rpriv->dl_port,
+				       DEVLINK_PORT_FLAVOUR_PHYSICAL,
+				       PCI_FUNC(dev->pdev->devfn), false, 0,
+				       &ppid.id[0], ppid.id_len);
+		dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
+	} else if (rep->vport == MLX5_VPORT_PF) {
+		devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
+					      &ppid.id[0], ppid.id_len,
+					      dev->pdev->devfn);
+		dl_port_index = rep->vport;
+	} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch,
+					    rpriv->rep->vport)) {
+		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
+					      &ppid.id[0], ppid.id_len,
+					      dev->pdev->devfn,
+					      rep->vport - 1);
+		dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
 	}
 
-	err = mlx5e_rep_neigh_init(rpriv);
-	if (err)
-		goto err_remove_sqs;
-
-	/* init shared tc flow table */
-	err = mlx5e_tc_esw_init(&rpriv->tc_ht);
-	if (err)
-		goto  err_neigh_cleanup;
-
-	return 0;
-
-err_neigh_cleanup:
-	mlx5e_rep_neigh_cleanup(rpriv);
-err_remove_sqs:
-	mlx5e_remove_sqs_fwd_rules(priv);
-	return err;
+	return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
 }
 
-static void
-mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+static void unregister_devlink_port(struct mlx5_core_dev *dev,
+				    struct mlx5e_rep_priv *rpriv)
 {
-	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
-	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-		mlx5e_remove_sqs_fwd_rules(priv);
-
-	/* clean uplink offloaded TC rules, delete shared tc flow table */
-	mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
-
-	mlx5e_rep_neigh_cleanup(rpriv);
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_unregister(&rpriv->dl_port);
 }
 
+/* e-Switch vport representors */
 static int
 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_rep_priv *uplink_rpriv;
+	const struct mlx5e_profile *profile;
 	struct mlx5e_rep_priv *rpriv;
 	struct net_device *netdev;
-	struct mlx5e_priv *upriv;
-	int err;
+	int nch, err;
 
 	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
 	if (!rpriv)
 		return -ENOMEM;
 
-	netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv);
+	/* rpriv->rep to be looked up when profile->init() is called */
+	rpriv->rep = rep;
+
+	nch = mlx5e_get_max_num_channels(dev);
+	profile = (rep->vport == MLX5_VPORT_UPLINK) ?
+		  &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
+	netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
 	if (!netdev) {
 		pr_warn("Failed to create representor netdev for vport %d\n",
 			rep->vport);
@@ -1142,15 +1878,20 @@
 	}
 
 	rpriv->netdev = netdev;
-	rpriv->rep = rep;
-	rep->rep_if[REP_ETH].priv = rpriv;
+	rep->rep_data[REP_ETH].priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
+	if (rep->vport == MLX5_VPORT_UPLINK) {
+		err = mlx5e_create_mdev_resources(dev);
+		if (err)
+			goto err_destroy_netdev;
+	}
+
 	err = mlx5e_attach_netdev(netdev_priv(netdev));
 	if (err) {
 		pr_warn("Failed to attach representor netdev for vport %d\n",
 			rep->vport);
-		goto err_destroy_netdev;
+		goto err_destroy_mdev_resources;
 	}
 
 	err = mlx5e_rep_neigh_init(rpriv);
@@ -1160,25 +1901,26 @@
 		goto err_detach_netdev;
 	}
 
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
-	upriv = netdev_priv(uplink_rpriv->netdev);
-	err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
-					 upriv);
-	if (err)
+	err = register_devlink_port(dev, rpriv);
+	if (err) {
+		esw_warn(dev, "Failed to register devlink port %d\n",
+			 rep->vport);
 		goto err_neigh_cleanup;
+	}
 
 	err = register_netdev(netdev);
 	if (err) {
 		pr_warn("Failed to register representor netdev for vport %d\n",
 			rep->vport);
-		goto err_egdev_cleanup;
+		goto err_devlink_cleanup;
 	}
 
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_type_eth_set(&rpriv->dl_port, netdev);
 	return 0;
 
-err_egdev_cleanup:
-	tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
-				     upriv);
+err_devlink_cleanup:
+	unregister_devlink_port(dev, rpriv);
 
 err_neigh_cleanup:
 	mlx5e_rep_neigh_cleanup(rpriv);
@@ -1186,6 +1928,10 @@
 err_detach_netdev:
 	mlx5e_detach_netdev(netdev_priv(netdev));
 
+err_destroy_mdev_resources:
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		mlx5e_destroy_mdev_resources(dev);
+
 err_destroy_netdev:
 	mlx5e_destroy_netdev(netdev_priv(netdev));
 	kfree(rpriv);
@@ -1198,18 +1944,17 @@
 	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_rep_priv *uplink_rpriv;
+	struct mlx5_core_dev *dev = priv->mdev;
 	void *ppriv = priv->ppriv;
-	struct mlx5e_priv *upriv;
 
+	if (is_devlink_port_supported(dev, rpriv))
+		devlink_port_type_clear(&rpriv->dl_port);
 	unregister_netdev(netdev);
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
-						    REP_ETH);
-	upriv = netdev_priv(uplink_rpriv->netdev);
-	tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
-				     upriv);
+	unregister_devlink_port(dev, rpriv);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		mlx5e_destroy_mdev_resources(priv->mdev);
 	mlx5e_destroy_netdev(priv);
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
@@ -1223,72 +1968,22 @@
 	return rpriv->netdev;
 }
 
-static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
-	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-	int vport;
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+	.load = mlx5e_vport_rep_load,
+	.unload = mlx5e_vport_rep_unload,
+	.get_proto_dev = mlx5e_vport_rep_get_proto_dev
+};
 
-	for (vport = 1; vport < total_vfs; vport++) {
-		struct mlx5_eswitch_rep_if rep_if = {};
-
-		rep_if.load = mlx5e_vport_rep_load;
-		rep_if.unload = mlx5e_vport_rep_unload;
-		rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
-		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
-	}
-}
-
-static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-	int vport;
-
-	for (vport = 1; vport < total_vfs; vport++)
-		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
-}
-
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
-	struct mlx5_eswitch_rep_if rep_if;
-	struct mlx5e_rep_priv *rpriv;
-
-	rpriv = priv->ppriv;
-	rpriv->netdev = priv->netdev;
-
-	rep_if.load = mlx5e_nic_rep_load;
-	rep_if.unload = mlx5e_nic_rep_unload;
-	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
-	rep_if.priv = rpriv;
-	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
-	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
-
-	mlx5e_rep_register_vf_vports(priv); /* VFs vports */
-}
-
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
-
-	mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
-	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
-}
-
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	struct mlx5e_rep_priv *rpriv;
 
-	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
-	if (!rpriv)
-		return NULL;
+	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
+}
 
-	rpriv->rep = &esw->offloads.vport_reps[0];
-	return rpriv;
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 844d32d..31f83c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h

@@ -35,8 +35,10 @@
 
 #include <net/ip_tunnels.h>
 #include <linux/rhashtable.h>
+#include <linux/mutex.h>
 #include "eswitch.h"
 #include "en.h"
+#include "lib/port_tun.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
 struct mlx5e_neigh_update_table {
@@ -47,25 +49,54 @@
 	 */
 	struct list_head	neigh_list;
 	/* protect lookup/remove operations */
-	spinlock_t              encap_lock;
+	struct mutex		encap_lock;
 	struct notifier_block   netevent_nb;
 	struct delayed_work     neigh_stats_work;
 	unsigned long           min_interval; /* jiffies */
 };
 
+struct mlx5_rep_uplink_priv {
+	/* Filters DB - instantiated by the uplink representor and shared by
+	 * the uplink's VFs
+	 */
+	struct rhashtable  tc_ht;
+
+	/* indirect block callbacks are invoked on bind/unbind events
+	 * on registered higher level devices (e.g. tunnel devices)
+	 *
+	 * tc_indr_block_cb_priv_list is used to lookup indirect callback
+	 * private data
+	 *
+	 * netdevice_nb is the netdev events notifier - used to register
+	 * tunnel devices for block events
+	 *
+	 */
+	struct list_head	    tc_indr_block_priv_list;
+	struct notifier_block	    netdevice_nb;
+
+	struct mlx5_tun_entropy tun_entropy;
+
+	/* protects unready_flows */
+	struct mutex                unready_flows_lock;
+	struct list_head            unready_flows;
+	struct work_struct          reoffload_flows_work;
+};
+
 struct mlx5e_rep_priv {
 	struct mlx5_eswitch_rep *rep;
 	struct mlx5e_neigh_update_table neigh_update;
 	struct net_device      *netdev;
 	struct mlx5_flow_handle *vport_rx_rule;
 	struct list_head       vport_sqs_list;
-	struct rhashtable      tc_ht; /* valid for uplink rep */
+	struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
+	struct rtnl_link_stats64 prev_vf_vport_stats;
+	struct devlink_port dl_port;
 };
 
 static inline
 struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
 {
-	return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+	return rep->rep_data[REP_ETH].priv;
 }
 
 struct mlx5e_neigh {
@@ -80,6 +111,7 @@
 struct mlx5e_neigh_hash_entry {
 	struct rhash_head rhash_node;
 	struct mlx5e_neigh m_neigh;
+	struct mlx5e_priv *priv;
 
 	/* Save the neigh hash entry in a list on the representor in
 	 * addition to the hash table. In order to iterate easily over the
@@ -87,6 +119,8 @@
 	 */
 	struct list_head neigh_list;
 
+	/* protects encap list */
+	spinlock_t encap_list_lock;
 	/* encap list sharing the same neigh */
 	struct list_head encap_list;
 
@@ -107,6 +141,8 @@
 	 * 'used' value and avoid neigh deleting by the kernel.
 	 */
 	unsigned long reported_lastuse;
+
+	struct rcu_head rcu;
 };
 
 enum {
@@ -115,6 +151,8 @@
 };
 
 struct mlx5e_encap_entry {
+	/* attached neigh hash entry */
+	struct mlx5e_neigh_hash_entry *nhe;
 	/* neigh hash entry list of encaps sharing the same neigh */
 	struct list_head encap_list;
 	struct mlx5e_neigh m_neigh;
@@ -123,15 +161,21 @@
 	 */
 	struct hlist_node encap_hlist;
 	struct list_head flows;
-	u32 encap_id;
-	struct ip_tunnel_info tun_info;
+	struct mlx5_pkt_reformat *pkt_reformat;
+	const struct ip_tunnel_info *tun_info;
 	unsigned char h_dest[ETH_ALEN];	/* destination eth addr	*/
 
 	struct net_device *out_dev;
-	int tunnel_type;
+	struct net_device *route_dev;
+	struct mlx5e_tc_tunnel *tunnel;
+	int reformat_type;
 	u8 flags;
 	char *encap_header;
 	int encap_size;
+	refcount_t refcnt;
+	struct completion res_ready;
+	int compl_result;
+	struct rcu_head rcu;
 };
 
 struct mlx5e_rep_sq {
@@ -139,17 +183,12 @@
 	struct list_head	 list;
 };
 
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
-
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
@@ -158,12 +197,17 @@
 				  struct mlx5e_encap_entry *e);
 
 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+bool mlx5e_eswitch_rep(struct net_device *netdev);
+
 #else /* CONFIG_MLX5_ESWITCH */
-static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
-static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
 static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
 static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
 #endif
 
+static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
+{
+	return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv);
+}
 #endif /* __MLX5E_REP_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d543a5c..82cffb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

@@ -34,9 +34,10 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
-#include <net/busy_poll.h>
+#include <linux/indirect_call_wrapper.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
+#include <net/inet_ecn.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -46,46 +47,53 @@
 #include "en_accel/tls_rxtx.h"
 #include "lib/clock.h"
 #include "en/xdp.h"
+#include "en/xsk/rx.h"
+#include "en/health.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 {
 	return config->rx_filter == HWTSTAMP_FILTER_ALL;
 }
 
-static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
-				       void *data)
+static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
+				       u32 cqcc, void *data)
 {
-	u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc);
+	u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
 
-	memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
+	memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
 }
 
 static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
-					 struct mlx5e_cq *cq, u32 cqcc)
+					 struct mlx5_cqwq *wq,
+					 u32 cqcc)
 {
-	mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
-	cq->decmprs_left        = be32_to_cpu(cq->title.byte_cnt);
-	cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
+	struct mlx5e_cq_decomp *cqd = &rq->cqd;
+	struct mlx5_cqe64 *title = &cqd->title;
+
+	mlx5e_read_cqe_slot(wq, cqcc, title);
+	cqd->left        = be32_to_cpu(title->byte_cnt);
+	cqd->wqe_counter = be16_to_cpu(title->wqe_counter);
 	rq->stats->cqe_compress_blks++;
 }
 
-static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
+static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq,
+					    struct mlx5e_cq_decomp *cqd,
+					    u32 cqcc)
 {
-	mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
-	cq->mini_arr_idx = 0;
+	mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr);
+	cqd->mini_arr_idx = 0;
 }
 
-static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
+static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n)
 {
-	struct mlx5_cqwq *wq = &cq->wq;
-
+	u32 cqcc   = wq->cc;
 	u8  op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
 	u32 ci     = mlx5_cqwq_ctr2ix(wq, cqcc);
 	u32 wq_sz  = mlx5_cqwq_get_size(wq);
 	u32 ci_top = min_t(u32, wq_sz, ci + n);
 
 	for (; ci < ci_top; ci++, n--) {
-		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
 
 		cqe->op_own = op_own;
 	}
@@ -93,7 +101,7 @@
 	if (unlikely(ci == wq_sz)) {
 		op_own = !op_own;
 		for (ci = 0; ci < n; ci++) {
-			struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+			struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
 
 			cqe->op_own = op_own;
 		}
@@ -101,68 +109,79 @@
 }
 
 static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
-					struct mlx5e_cq *cq, u32 cqcc)
+					struct mlx5_cqwq *wq,
+					u32 cqcc)
 {
-	cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
-	cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
-	cq->title.op_own      &= 0xf0;
-	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
-	cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
+	struct mlx5e_cq_decomp *cqd = &rq->cqd;
+	struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx];
+	struct mlx5_cqe64 *title = &cqd->title;
+
+	title->byte_cnt     = mini_cqe->byte_cnt;
+	title->check_sum    = mini_cqe->checksum;
+	title->op_own      &= 0xf0;
+	title->op_own      |= 0x01 & (cqcc >> wq->fbc.log_sz);
+	title->wqe_counter  = cpu_to_be16(cqd->wqe_counter);
 
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		cq->decmprs_wqe_counter +=
-			mpwrq_get_cqe_consumed_strides(&cq->title);
+		cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
 	else
-		cq->decmprs_wqe_counter =
-			mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
+		cqd->wqe_counter =
+			mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1);
 }
 
 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
-						struct mlx5e_cq *cq, u32 cqcc)
+						struct mlx5_cqwq *wq,
+						u32 cqcc)
 {
-	mlx5e_decompress_cqe(rq, cq, cqcc);
-	cq->title.rss_hash_type   = 0;
-	cq->title.rss_hash_result = 0;
+	struct mlx5e_cq_decomp *cqd = &rq->cqd;
+
+	mlx5e_decompress_cqe(rq, wq, cqcc);
+	cqd->title.rss_hash_type   = 0;
+	cqd->title.rss_hash_result = 0;
 }
 
 static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
-					     struct mlx5e_cq *cq,
+					     struct mlx5_cqwq *wq,
 					     int update_owner_only,
 					     int budget_rem)
 {
-	u32 cqcc = cq->wq.cc + update_owner_only;
+	struct mlx5e_cq_decomp *cqd = &rq->cqd;
+	u32 cqcc = wq->cc + update_owner_only;
 	u32 cqe_count;
 	u32 i;
 
-	cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
+	cqe_count = min_t(u32, cqd->left, budget_rem);
 
 	for (i = update_owner_only; i < cqe_count;
-	     i++, cq->mini_arr_idx++, cqcc++) {
-		if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
-			mlx5e_read_mini_arr_slot(cq, cqcc);
+	     i++, cqd->mini_arr_idx++, cqcc++) {
+		if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+			mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
 
-		mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
-		rq->handle_rx_cqe(rq, &cq->title);
+		mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
+		rq->handle_rx_cqe(rq, &cqd->title);
 	}
-	mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
-	cq->wq.cc = cqcc;
-	cq->decmprs_left -= cqe_count;
+	mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
+	wq->cc = cqcc;
+	cqd->left -= cqe_count;
 	rq->stats->cqe_compress_pkts += cqe_count;
 
 	return cqe_count;
 }
 
 static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
-					      struct mlx5e_cq *cq,
+					      struct mlx5_cqwq *wq,
 					      int budget_rem)
 {
-	mlx5e_read_title_slot(rq, cq, cq->wq.cc);
-	mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
-	mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
-	rq->handle_rx_cqe(rq, &cq->title);
-	cq->mini_arr_idx++;
+	struct mlx5e_cq_decomp *cqd = &rq->cqd;
+	u32 cc = wq->cc;
 
-	return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
+	mlx5e_read_title_slot(rq, wq, cc);
+	mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
+	mlx5e_decompress_cqe(rq, wq, cc);
+	rq->handle_rx_cqe(rq, &cqd->title);
+	cqd->mini_arr_idx++;
+
+	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
 }
 
 static inline bool mlx5e_page_is_reserved(struct page *page)
@@ -218,8 +237,8 @@
 	return true;
 }
 
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
-					  struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
+					struct mlx5e_dma_info *dma_info)
 {
 	if (mlx5e_rx_cache_get(rq, dma_info))
 		return 0;
@@ -231,7 +250,7 @@
 	dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
 				      PAGE_SIZE, rq->buff.map_dir);
 	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
-		put_page(dma_info->page);
+		page_pool_recycle_direct(rq->page_pool, dma_info->page);
 		dma_info->page = NULL;
 		return -ENOMEM;
 	}
@@ -239,13 +258,23 @@
 	return 0;
 }
 
+static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
+				   struct mlx5e_dma_info *dma_info)
+{
+	if (rq->umem)
+		return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+	else
+		return mlx5e_page_alloc_pool(rq, dma_info);
+}
+
 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
 {
 	dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
 }
 
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
-			bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+				struct mlx5e_dma_info *dma_info,
+				bool recycle)
 {
 	if (likely(recycle)) {
 		if (mlx5e_rx_cache_put(rq, dma_info))
@@ -255,10 +284,25 @@
 		page_pool_recycle_direct(rq->page_pool, dma_info->page);
 	} else {
 		mlx5e_page_dma_unmap(rq, dma_info);
+		page_pool_release_page(rq->page_pool, dma_info->page);
 		put_page(dma_info->page);
 	}
 }
 
+static inline void mlx5e_page_release(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info,
+				      bool recycle)
+{
+	if (rq->umem)
+		/* The `recycle` parameter is ignored, and the page is always
+		 * put into the Reuse Ring, because there is no way to return
+		 * the page to the userspace when the interface goes down.
+		 */
+		mlx5e_xsk_page_release(rq, dma_info);
+	else
+		mlx5e_page_release_dynamic(rq, dma_info, recycle);
+}
+
 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
 				    struct mlx5e_wqe_frag_info *frag)
 {
@@ -270,7 +314,7 @@
 		 * offset) should just use the new one without replenishing again
 		 * by themselves.
 		 */
-		err = mlx5e_page_alloc_mapped(rq, frag->di);
+		err = mlx5e_page_alloc(rq, frag->di);
 
 	return err;
 }
@@ -336,6 +380,13 @@
 	int err;
 	int i;
 
+	if (rq->umem) {
+		int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
+
+		if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+			return -ENOMEM;
+	}
+
 	for (i = 0; i < wqe_bulk; i++) {
 		struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
 
@@ -369,7 +420,7 @@
 static inline void
 mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
 		      struct mlx5e_dma_info *dma_info,
-		      int offset_from, int offset_to, u32 headlen)
+		      int offset_from, u32 headlen)
 {
 	const void *from = page_address(dma_info->page) + offset_from;
 	/* Aligning len to sizeof(long) optimizes memcpy performance */
@@ -377,47 +428,37 @@
 
 	dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
 				DMA_FROM_DEVICE);
-	skb_copy_to_linear_data_offset(skb, offset_to, from, len);
-}
-
-static inline void
-mlx5e_copy_skb_header_mpwqe(struct device *pdev,
-			    struct sk_buff *skb,
-			    struct mlx5e_dma_info *dma_info,
-			    u32 offset, u32 headlen)
-{
-	u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
-
-	mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg);
-
-	if (unlikely(offset + headlen > PAGE_SIZE)) {
-		dma_info++;
-		mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg,
-				      headlen - headlen_pg);
-	}
+	skb_copy_to_linear_data(skb, from, len);
 }
 
 static void
 mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
 {
-	const bool no_xdp_xmit =
-		bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+	bool no_xdp_xmit;
 	struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
 	int i;
 
+	/* A common case for AF_XDP. */
+	if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+		return;
+
+	no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
+				   MLX5_MPWRQ_PAGES_PER_WQE);
+
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
 		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
 			mlx5e_page_release(rq, &dma_info[i], recycle);
 }
 
-static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
 {
 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
-	struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
-	rq->mpwqe.umr_in_progress = false;
+	do {
+		u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head);
 
-	mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
+		mlx5_wq_ll_push(wq, next_wqe_index);
+	} while (--n);
 
 	/* ensure wqes are visible to device before updating doorbell record */
 	dma_wmb();
@@ -425,11 +466,6 @@
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
-{
-	return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-}
-
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
 					      u16 pi, u16 nnops)
@@ -457,6 +493,12 @@
 	int err;
 	int i;
 
+	if (rq->umem &&
+	    unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+		err = -ENOMEM;
+		goto err;
+	}
+
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
@@ -465,12 +507,10 @@
 	}
 
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
-		memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
-		       offsetof(struct mlx5e_umr_wqe, inline_mtts));
+	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
 
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
-		err = mlx5e_page_alloc_mapped(rq, dma_info);
+		err = mlx5e_page_alloc(rq, dma_info);
 		if (unlikely(err))
 			goto err_unmap;
 		umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
@@ -479,16 +519,16 @@
 	bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 	wi->consumed_strides = 0;
 
-	rq->mpwqe.umr_in_progress = true;
-
 	umr_wqe->ctrl.opmod_idx_opcode =
 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
 			    MLX5_OPCODE_UMR);
 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
 	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+	sq->db.ico_wqe[pi].umr.rq = rq;
 	sq->pc += MLX5E_UMR_WQEBBS;
-	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
+
+	sq->doorbell_cseg = &umr_wqe->ctrl;
 
 	return 0;
 
@@ -497,6 +537,8 @@
 		dma_info--;
 		mlx5e_page_release(rq, dma_info, true);
 	}
+
+err:
 	rq->stats->buff_alloc_err++;
 
 	return err;
@@ -543,37 +585,12 @@
 	return !!err;
 }
 
-static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
-					     struct mlx5e_icosq *sq,
-					     struct mlx5e_rq *rq,
-					     struct mlx5_cqe64 *cqe)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
-	struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
-
-	mlx5_cqwq_pop(&cq->wq);
-
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
-		netdev_WARN_ONCE(cq->channel->netdev,
-				 "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
-		return;
-	}
-
-	if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
-		mlx5e_post_rx_mpwqe(rq);
-		return;
-	}
-
-	if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
-		netdev_WARN_ONCE(cq->channel->netdev,
-				 "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode);
-}
-
-static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
 	struct mlx5_cqe64 *cqe;
+	u16 sqcc;
+	int i;
 
 	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
 		return;
@@ -582,28 +599,110 @@
 	if (likely(!cqe))
 		return;
 
-	/* by design, there's only a single cqe */
-	mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe);
+	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+	 * otherwise a cq overrun may occur
+	 */
+	sqcc = sq->cc;
+
+	i = 0;
+	do {
+		u16 wqe_counter;
+		bool last_wqe;
+
+		mlx5_cqwq_pop(&cq->wq);
+
+		wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+			netdev_WARN_ONCE(cq->channel->netdev,
+					 "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
+			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+				queue_work(cq->channel->priv->wq, &sq->recover_work);
+			break;
+		}
+		do {
+			struct mlx5e_sq_wqe_info *wi;
+			u16 ci;
+
+			last_wqe = (sqcc == wqe_counter);
+
+			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+			wi = &sq->db.ico_wqe[ci];
+
+			if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
+				sqcc += MLX5E_UMR_WQEBBS;
+				wi->umr.rq->mpwqe.umr_completed++;
+			} else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
+				sqcc++;
+			} else {
+				netdev_WARN_ONCE(cq->channel->netdev,
+						 "Bad OPCODE in ICOSQ WQE info: 0x%x\n",
+						 wi->opcode);
+			}
+
+		} while (!last_wqe);
+
+	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+	sq->cc = sqcc;
 
 	mlx5_cqwq_update_db_record(&cq->wq);
 }
 
 bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 {
+	struct mlx5e_icosq *sq = &rq->channel->icosq;
 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+	u8  umr_completed = rq->mpwqe.umr_completed;
+	int alloc_err = 0;
+	u8  missing, i;
+	u16 head;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return false;
 
-	mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
+	if (umr_completed) {
+		mlx5e_post_rx_mpwqe(rq, umr_completed);
+		rq->mpwqe.umr_in_progress -= umr_completed;
+		rq->mpwqe.umr_completed = 0;
+	}
 
-	if (mlx5_wq_ll_is_full(wq))
+	missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
+
+	if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
+		rq->stats->congst_umr++;
+
+#define UMR_WQE_BULK (2)
+	if (likely(missing < UMR_WQE_BULK))
 		return false;
 
-	if (!rq->mpwqe.umr_in_progress)
-		mlx5e_alloc_rx_mpwqe(rq, wq->head);
-	else
-		rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2;
+	head = rq->mpwqe.actual_wq_head;
+	i = missing;
+	do {
+		alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+
+		if (unlikely(alloc_err))
+			break;
+		head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
+	} while (--i);
+
+	rq->mpwqe.umr_last_bulk    = missing - i;
+	if (sq->doorbell_cseg) {
+		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+		sq->doorbell_cseg = NULL;
+	}
+
+	rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
+	rq->mpwqe.actual_wq_head   = head;
+
+	/* If XSK Fill Ring doesn't have enough frames, report the error, so
+	 * that one of the actions can be performed:
+	 * 1. If need_wakeup is used, signal that the application has to kick
+	 * the driver when it refills the Fill Ring.
+	 * 2. Otherwise, busy poll by rescheduling the NAPI poll.
+	 */
+	if (unlikely(alloc_err == -ENOMEM && rq->umem))
+		return true;
 
 	return false;
 }
@@ -688,23 +787,119 @@
 	skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
 }
 
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+					__be16 *proto)
 {
-	__be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
+	*proto = ((struct ethhdr *)skb->data)->h_proto;
+	*proto = __vlan_get_protocol(skb, *proto, network_depth);
 
-	ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
-	return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+	if (*proto == htons(ETH_P_IP))
+		return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+	if (*proto == htons(ETH_P_IPV6))
+		return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+	return false;
 }
 
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 {
-	const void *fcs_bytes;
-	u32 _fcs_bytes;
+	int network_depth = 0;
+	__be16 proto;
+	void *ip;
+	int rc;
 
-	fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
-				       ETH_FCS_LEN, &_fcs_bytes);
+	if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+		return;
 
-	return __get_unaligned_cpu32(fcs_bytes);
+	ip = skb->data + network_depth;
+	rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+					 IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+	rq->stats->ecn_mark += !!rc;
+}
+
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+	void *ip_p = skb->data + network_depth;
+
+	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+					    ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+		       struct mlx5e_rq_stats *stats)
+{
+	stats->csum_complete_tail_slow++;
+	skb->csum = csum_block_add(skb->csum,
+				   skb_checksum(skb, offset, len, 0),
+				   offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+		  struct mlx5e_rq_stats *stats)
+{
+	u8 tail_padding[MAX_PADDING];
+	int len = skb->len - offset;
+	void *tail;
+
+	if (unlikely(len > MAX_PADDING)) {
+		tail_padding_csum_slow(skb, offset, len, stats);
+		return;
+	}
+
+	tail = skb_header_pointer(skb, offset, len, tail_padding);
+	if (unlikely(!tail)) {
+		tail_padding_csum_slow(skb, offset, len, stats);
+		return;
+	}
+
+	stats->csum_complete_tail++;
+	skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto,
+		     struct mlx5e_rq_stats *stats)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr   *ip4;
+	int pkt_len;
+
+	/* Fixup vlan headers, if any */
+	if (network_depth > ETH_HLEN)
+		/* CQE csum is calculated from the IP header and does
+		 * not cover VLAN headers (if present). This will add
+		 * the checksum manually.
+		 */
+		skb->csum = csum_partial(skb->data + ETH_HLEN,
+					 network_depth - ETH_HLEN,
+					 skb->csum);
+
+	/* Fixup tail padding, if any */
+	switch (proto) {
+	case htons(ETH_P_IP):
+		ip4 = (struct iphdr *)(skb->data + network_depth);
+		pkt_len = network_depth + ntohs(ip4->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+		pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+		break;
+	default:
+		return;
+	}
+
+	if (likely(pkt_len >= skb->len))
+		return;
+
+	tail_padding_csum(skb, pkt_len, stats);
 }
 
 static inline void mlx5e_handle_csum(struct net_device *netdev,
@@ -715,6 +910,7 @@
 {
 	struct mlx5e_rq_stats *stats = rq->stats;
 	int network_depth = 0;
+	__be16 proto;
 
 	if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
 		goto csum_none;
@@ -725,25 +921,38 @@
 		return;
 	}
 
-	if (likely(is_last_ethertype_ip(skb, &network_depth))) {
+	/* True when explicitly set via priv flag, or XDP prog is loaded */
+	if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+		goto csum_unnecessary;
+
+	/* CQE csum doesn't cover padding octets in short ethernet
+	 * frames. And the pad field is appended prior to calculating
+	 * and appending the FCS field.
+	 *
+	 * Detecting these padded frames requires to verify and parse
+	 * IP headers, so we simply force all those small frames to be
+	 * CHECKSUM_UNNECESSARY even if they are not padded.
+	 */
+	if (short_frame(skb->len))
+		goto csum_unnecessary;
+
+	if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+		if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+			goto csum_unnecessary;
+
+		stats->csum_complete++;
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
-		if (network_depth > ETH_HLEN)
-			/* CQE csum is calculated from the IP header and does
-			 * not cover VLAN headers (if present). This will add
-			 * the checksum manually.
-			 */
-			skb->csum = csum_partial(skb->data + ETH_HLEN,
-						 network_depth - ETH_HLEN,
-						 skb->csum);
-		if (unlikely(netdev->features & NETIF_F_RXFCS))
-			skb->csum = csum_block_add(skb->csum,
-						   (__force __wsum)mlx5e_get_fcs(skb),
-						   skb->len - ETH_FCS_LEN);
-		stats->csum_complete++;
+
+		if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
+			return; /* CQE csum covers all received bytes */
+
+		/* csum might need some fixups ...*/
+		mlx5e_skb_csum_fixup(skb, network_depth, proto, stats);
 		return;
 	}
 
+csum_unnecessary:
 	if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
 		   (cqe->hds_ip_ext & CQE_L4_OK))) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -761,6 +970,8 @@
 	stats->csum_none++;
 }
 
+#define MLX5E_CE_BIT_MASK 0x80
+
 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 				      u32 cqe_bcnt,
 				      struct mlx5e_rq *rq,
@@ -805,6 +1016,10 @@
 	skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 
 	mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+	/* checking CE bit in cqe - MSB in ml_path field */
+	if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+		mlx5e_enable_ecn(rq, skb);
+
 	skb->protocol = eth_type_trans(skb, netdev);
 }
 
@@ -858,13 +1073,8 @@
 	prefetchw(va); /* xdp_frame data area */
 	prefetch(data);
 
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
-		rq->stats->wqe_err++;
-		return NULL;
-	}
-
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
 	rcu_read_unlock();
 	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
@@ -890,11 +1100,6 @@
 	u16 byte_cnt     = cqe_bcnt - headlen;
 	struct sk_buff *skb;
 
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
-		rq->stats->wqe_err++;
-		return NULL;
-	}
-
 	/* XDP is not supported in this configuration, as incoming packets
 	 * might spread among multiple pages.
 	 */
@@ -920,8 +1125,7 @@
 	}
 
 	/* copy header */
-	mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset,
-			      0, headlen);
+	mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
@@ -929,6 +1133,15 @@
 	return skb;
 }
 
+static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+	struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+
+	if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
+	    !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state))
+		queue_work(rq->channel->priv->wq, &rq->recover_work);
+}
+
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -941,7 +1154,16 @@
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		trigger_report(rq, cqe);
+		rq->stats->wqe_err++;
+		goto free_wqe;
+	}
+
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -979,6 +1201,11 @@
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		rq->stats->wqe_err++;
+		goto free_wqe;
+	}
+
 	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
 		/* probably for XDP */
@@ -1043,8 +1270,7 @@
 		di++;
 	}
 	/* copy header */
-	mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
-				    head_offset, headlen);
+	mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
@@ -1080,7 +1306,7 @@
 	prefetch(data);
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
 	rcu_read_unlock();
 	if (consumed) {
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
@@ -1114,7 +1340,8 @@
 
 	wi->consumed_strides += cstrides;
 
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		trigger_report(rq, cqe);
 		rq->stats->wqe_err++;
 		goto mpwrq_cqe_out;
 	}
@@ -1129,8 +1356,10 @@
 
 	cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
 
-	skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
-					   page_idx);
+	skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+			      mlx5e_skb_from_cqe_mpwrq_linear,
+			      mlx5e_skb_from_cqe_mpwrq_nonlinear,
+			      rq, wi, cqe_bcnt, head_offset, page_idx);
 	if (!skb)
 		goto mpwrq_cqe_out;
 
@@ -1150,46 +1379,45 @@
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-	struct mlx5e_xdpsq *xdpsq;
+	struct mlx5_cqwq *cqwq = &cq->wq;
 	struct mlx5_cqe64 *cqe;
 	int work_done = 0;
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return 0;
 
-	if (cq->decmprs_left)
-		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
+	if (rq->cqd.left) {
+		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
+		if (rq->cqd.left || work_done >= budget)
+			goto out;
+	}
 
-	cqe = mlx5_cqwq_get_cqe(&cq->wq);
-	if (!cqe)
+	cqe = mlx5_cqwq_get_cqe(cqwq);
+	if (!cqe) {
+		if (unlikely(work_done))
+			goto out;
 		return 0;
-
-	xdpsq = &rq->xdpsq;
+	}
 
 	do {
 		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
 			work_done +=
-				mlx5e_decompress_cqes_start(rq, cq,
+				mlx5e_decompress_cqes_start(rq, cqwq,
 							    budget - work_done);
 			continue;
 		}
 
-		mlx5_cqwq_pop(&cq->wq);
+		mlx5_cqwq_pop(cqwq);
 
-		rq->handle_rx_cqe(rq, cqe);
-	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+		INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+				mlx5e_handle_rx_cqe, rq, cqe);
+	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
-	if (xdpsq->doorbell) {
-		mlx5e_xmit_xdp_doorbell(xdpsq);
-		xdpsq->doorbell = false;
-	}
+out:
+	if (rq->xdp_prog)
+		mlx5e_xdp_rx_poll_complete(rq);
 
-	if (xdpsq->redirect_flush) {
-		xdp_do_flush_map();
-		xdpsq->redirect_flush = false;
-	}
-
-	mlx5_cqwq_update_db_record(&cq->wq);
+	mlx5_cqwq_update_db_record(cqwq);
 
 	/* ensure cq space is freed before enabling more cqes */
 	wmb();
@@ -1207,8 +1435,8 @@
 					 u32 cqe_bcnt,
 					 struct sk_buff *skb)
 {
-	struct mlx5e_rq_stats *stats = rq->stats;
 	struct hwtstamp_config *tstamp;
+	struct mlx5e_rq_stats *stats;
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
 	char *pseudo_header;
@@ -1231,6 +1459,7 @@
 
 	priv = mlx5i_epriv(netdev);
 	tstamp = &priv->tstamp;
+	stats = &priv->channel_stats[rq->ix].rq;
 
 	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
 	dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
@@ -1249,8 +1478,14 @@
 
 	skb->protocol = *((__be16 *)(skb->data));
 
-	skb->ip_summed = CHECKSUM_COMPLETE;
-	skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+	if (netdev->features & NETIF_F_RXCSUM) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+		stats->csum_complete++;
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+		stats->csum_none++;
+	}
 
 	if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
 		skb_hwtstamps(skb)->hwtstamp =
@@ -1269,7 +1504,6 @@
 
 	skb->dev = netdev;
 
-	stats->csum_complete++;
 	stats->packets++;
 	stats->bytes += cqe_bcnt;
 }
@@ -1286,7 +1520,15 @@
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		rq->stats->wqe_err++;
+		goto wq_free_wqe;
+	}
+
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
 	if (!skb)
 		goto wq_free_wqe;
 
@@ -1318,23 +1560,27 @@
 	wi       = get_frag(rq, ci);
 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
-	skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
-	if (unlikely(!skb)) {
-		/* a DROP, save the page-reuse checks */
-		mlx5e_free_rx_wqe(rq, wi, true);
-		goto wq_cyc_pop;
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		rq->stats->wqe_err++;
+		goto wq_free_wqe;
 	}
+
+	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+			      mlx5e_skb_from_cqe_linear,
+			      mlx5e_skb_from_cqe_nonlinear,
+			      rq, cqe, wi, cqe_bcnt);
+	if (unlikely(!skb)) /* a DROP, save the page-reuse checks */
+		goto wq_free_wqe;
+
 	skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
-	if (unlikely(!skb)) {
-		mlx5e_free_rx_wqe(rq, wi, true);
-		goto wq_cyc_pop;
-	}
+	if (unlikely(!skb))
+		goto wq_free_wqe;
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	napi_gro_receive(rq->cq.napi, skb);
 
+wq_free_wqe:
 	mlx5e_free_rx_wqe(rq, wi, true);
-wq_cyc_pop:
 	mlx5_wq_cyc_pop(wq);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4382ef8..bbff8d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c

@@ -35,6 +35,7 @@
 #include <linux/udp.h>
 #include <net/udp.h>
 #include "en.h"
+#include "en/port.h"
 
 enum {
 	MLX5E_ST_LINK_STATE,
@@ -64,7 +65,7 @@
 {
 	struct mlx5_core_health *health = &priv->mdev->priv.health;
 
-	return health->sick ? 1 : 0;
+	return health->fatal_error ? 1 : 0;
 }
 
 static int mlx5e_test_link_state(struct mlx5e_priv *priv)
@@ -80,22 +81,12 @@
 
 static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
 {
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	u32 eth_proto_oper;
-	int i;
+	u32 speed;
 
 	if (!netif_carrier_ok(priv->netdev))
 		return 1;
 
-	if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
-		return 1;
-
-	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
-		if (eth_proto_oper & MLX5E_PROT_MASK(i))
-			return 0;
-	}
-	return 1;
+	return mlx5e_port_linkspeed(priv->mdev, &speed);
 }
 
 struct mlx5ehdr {

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index d57d51c..7e6ebd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c

@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include "lib/mlx5.h"
 #include "en.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/tls.h"
@@ -47,20 +48,34 @@
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
 
 #ifdef CONFIG_MLX5_EN_TLS
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
 #endif
 
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
@@ -73,9 +88,11 @@
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_udp_seg_rem) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
@@ -86,18 +103,45 @@
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
@@ -125,18 +169,20 @@
 	return idx;
 }
 
-void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 {
-	struct mlx5e_sw_stats temp, *s = &temp;
+	struct mlx5e_sw_stats *s = &priv->stats.sw;
 	int i;
 
 	memset(s, 0, sizeof(*s));
 
-	for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
+	for (i = 0; i < priv->max_nch; i++) {
 		struct mlx5e_channel_stats *channel_stats =
 			&priv->channel_stats[i];
 		struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
 		struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+		struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq;
+		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
 		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
 		struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
 		int j;
@@ -145,14 +191,20 @@
 		s->rx_bytes	+= rq_stats->bytes;
 		s->rx_lro_packets += rq_stats->lro_packets;
 		s->rx_lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_ecn_mark	+= rq_stats->ecn_mark;
 		s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
 		s->rx_csum_none	+= rq_stats->csum_none;
 		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+		s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
 		s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
 		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_xdp_drop     += rq_stats->xdp_drop;
 		s->rx_xdp_redirect += rq_stats->xdp_redirect;
 		s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
+		s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
+		s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
+		s->rx_xdp_tx_nops  += xdpsq_stats->nops;
 		s->rx_xdp_tx_full  += xdpsq_stats->full;
 		s->rx_xdp_tx_err   += xdpsq_stats->err;
 		s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
@@ -163,23 +215,54 @@
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
 		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
 		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
-		s->rx_page_reuse  += rq_stats->page_reuse;
 		s->rx_cache_reuse += rq_stats->cache_reuse;
 		s->rx_cache_full  += rq_stats->cache_full;
 		s->rx_cache_empty += rq_stats->cache_empty;
 		s->rx_cache_busy  += rq_stats->cache_busy;
 		s->rx_cache_waive += rq_stats->cache_waive;
 		s->rx_congst_umr  += rq_stats->congst_umr;
+		s->rx_arfs_err    += rq_stats->arfs_err;
+		s->rx_recover     += rq_stats->recover;
 		s->ch_events      += ch_stats->events;
 		s->ch_poll        += ch_stats->poll;
 		s->ch_arm         += ch_stats->arm;
 		s->ch_aff_change  += ch_stats->aff_change;
+		s->ch_force_irq   += ch_stats->force_irq;
 		s->ch_eq_rearm    += ch_stats->eq_rearm;
 		/* xdp redirect */
 		s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
+		s->tx_xdp_mpwqe   += xdpsq_red_stats->mpwqe;
+		s->tx_xdp_inlnw   += xdpsq_red_stats->inlnw;
+		s->tx_xdp_nops	  += xdpsq_red_stats->nops;
 		s->tx_xdp_full    += xdpsq_red_stats->full;
 		s->tx_xdp_err     += xdpsq_red_stats->err;
 		s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
+		/* AF_XDP zero-copy */
+		s->rx_xsk_packets                += xskrq_stats->packets;
+		s->rx_xsk_bytes                  += xskrq_stats->bytes;
+		s->rx_xsk_csum_complete          += xskrq_stats->csum_complete;
+		s->rx_xsk_csum_unnecessary       += xskrq_stats->csum_unnecessary;
+		s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+		s->rx_xsk_csum_none              += xskrq_stats->csum_none;
+		s->rx_xsk_ecn_mark               += xskrq_stats->ecn_mark;
+		s->rx_xsk_removed_vlan_packets   += xskrq_stats->removed_vlan_packets;
+		s->rx_xsk_xdp_drop               += xskrq_stats->xdp_drop;
+		s->rx_xsk_xdp_redirect           += xskrq_stats->xdp_redirect;
+		s->rx_xsk_wqe_err                += xskrq_stats->wqe_err;
+		s->rx_xsk_mpwqe_filler_cqes      += xskrq_stats->mpwqe_filler_cqes;
+		s->rx_xsk_mpwqe_filler_strides   += xskrq_stats->mpwqe_filler_strides;
+		s->rx_xsk_oversize_pkts_sw_drop  += xskrq_stats->oversize_pkts_sw_drop;
+		s->rx_xsk_buff_alloc_err         += xskrq_stats->buff_alloc_err;
+		s->rx_xsk_cqe_compress_blks      += xskrq_stats->cqe_compress_blks;
+		s->rx_xsk_cqe_compress_pkts      += xskrq_stats->cqe_compress_pkts;
+		s->rx_xsk_congst_umr             += xskrq_stats->congst_umr;
+		s->rx_xsk_arfs_err               += xskrq_stats->arfs_err;
+		s->tx_xsk_xmit                   += xsksq_stats->xmit;
+		s->tx_xsk_mpwqe                  += xsksq_stats->mpwqe;
+		s->tx_xsk_inlnw                  += xsksq_stats->inlnw;
+		s->tx_xsk_full                   += xsksq_stats->full;
+		s->tx_xsk_err                    += xsksq_stats->err;
+		s->tx_xsk_cqes                   += xsksq_stats->cqes;
 
 		for (j = 0; j < priv->max_opened_tc; j++) {
 			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -194,7 +277,6 @@
 			s->tx_nop               += sq_stats->nop;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
-			s->tx_udp_seg_rem	+= sq_stats->udp_seg_rem;
 			s->tx_queue_dropped	+= sq_stats->dropped;
 			s->tx_cqe_err		+= sq_stats->cqe_err;
 			s->tx_recover		+= sq_stats->recover;
@@ -203,14 +285,20 @@
 			s->tx_csum_none		+= sq_stats->csum_none;
 			s->tx_csum_partial	+= sq_stats->csum_partial;
 #ifdef CONFIG_MLX5_EN_TLS
-			s->tx_tls_ooo		+= sq_stats->tls_ooo;
-			s->tx_tls_resync_bytes	+= sq_stats->tls_resync_bytes;
+			s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+			s->tx_tls_encrypted_bytes   += sq_stats->tls_encrypted_bytes;
+			s->tx_tls_ctx               += sq_stats->tls_ctx;
+			s->tx_tls_ooo               += sq_stats->tls_ooo;
+			s->tx_tls_dump_bytes        += sq_stats->tls_dump_bytes;
+			s->tx_tls_dump_packets      += sq_stats->tls_dump_packets;
+			s->tx_tls_resync_bytes      += sq_stats->tls_resync_bytes;
+			s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
+			s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+			s->tx_tls_drop_bypass_req   += sq_stats->tls_drop_bypass_req;
 #endif
 			s->tx_cqes		+= sq_stats->cqes;
 		}
 	}
-
-	memcpy(&priv->stats.sw, s, sizeof(*s));
 }
 
 static const struct counter_desc q_stats_desc[] = {
@@ -283,17 +371,27 @@
 }
 
 #define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
-static const struct counter_desc vnic_env_stats_desc[] = {
+static const struct counter_desc vnic_env_stats_steer_desc[] = {
 	{ "rx_steer_missed_packets",
 		VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
 };
 
-#define NUM_VNIC_ENV_COUNTERS		ARRAY_SIZE(vnic_env_stats_desc)
+static const struct counter_desc vnic_env_stats_dev_oob_desc[] = {
+	{ "dev_internal_queue_oob",
+		VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) },
+};
+
+#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \
+	(MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \
+	 ARRAY_SIZE(vnic_env_stats_steer_desc) : 0)
+#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \
+	(MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \
+	 ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0)
 
 static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
 {
-	return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ?
-		NUM_VNIC_ENV_COUNTERS : 0;
+	return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) +
+		NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev);
 }
 
 static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
@@ -301,12 +399,13 @@
 {
 	int i;
 
-	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
-		return idx;
-
-	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+	for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vnic_env_stats_desc[i].format);
+		       vnic_env_stats_steer_desc[i].format);
+
+	for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       vnic_env_stats_dev_oob_desc[i].format);
 	return idx;
 }
 
@@ -315,12 +414,13 @@
 {
 	int i;
 
-	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
-		return idx;
-
-	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+	for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++)
 		data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
-						  vnic_env_stats_desc, i);
+						  vnic_env_stats_steer_desc, i);
+
+	for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++)
+		data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out,
+						  vnic_env_stats_dev_oob_desc, i);
 	return idx;
 }
 
@@ -478,7 +578,10 @@
 	return idx;
 }
 
-static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
+	(MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
+
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -486,6 +589,9 @@
 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 	void *out;
 
+	if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+		return;
+
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 	out = pstats->IEEE_802_3_counters;
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
@@ -598,6 +704,9 @@
 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 	void *out;
 
+	if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+		return;
+
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 	out = pstats->RFC_2819_counters;
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
@@ -612,46 +721,82 @@
 	{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
 };
 
-#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+static const struct counter_desc
+pport_phy_statistical_err_lanes_stats_desc[] = {
+	{ "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) },
+	{ "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) },
+	{ "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) },
+	{ "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \
+	ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
+	ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
 
 static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int num_stats;
+
 	/* "1" for link_down_events special counter */
-	return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
-		NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+	num_stats = 1;
+
+	num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ?
+		     NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0;
+
+	num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ?
+		     NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0;
+
+	return num_stats;
 }
 
 static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
 				      int idx)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
 
 	strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
 
-	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+	if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
 		return idx;
 
 	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pport_phy_statistical_stats_desc[i].format);
+
+	if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+		for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pport_phy_statistical_err_lanes_stats_desc[i].format);
+
 	return idx;
 }
 
 static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
 
 	/* link_down_events_phy has special handling since it is not stored in __be64 format */
 	data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
 			       counter_set.phys_layer_cntrs.link_down_events);
 
-	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+	if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
 		return idx;
 
 	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
 		data[idx++] =
 			MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
 					    pport_phy_statistical_stats_desc, i);
+
+	if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+		for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+						    pport_phy_statistical_err_lanes_stats_desc,
+						    i);
 	return idx;
 }
 
@@ -838,6 +983,147 @@
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
 }
 
+#define PPORT_PER_TC_PRIO_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_per_tc_prio_grp_data_layout.c##_high)
+
+static const struct counter_desc pport_per_tc_prio_stats_desc[] = {
+	{ "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) },
+};
+
+#define NUM_PPORT_PER_TC_PRIO_COUNTERS	ARRAY_SIZE(pport_per_tc_prio_stats_desc)
+
+#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high)
+
+static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = {
+	{ "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) },
+	{ "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) },
+};
+
+#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \
+	ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc)
+
+static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return 0;
+
+	return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static int mlx5e_grp_per_port_buffer_congest_fill_strings(struct mlx5e_priv *priv,
+							  u8 *data, int idx)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int i, prio;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return idx;
+
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_tc_prio_stats_desc[i].format, prio);
+		for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_tc_congest_prio_stats_desc[i].format, prio);
+	}
+
+	return idx;
+}
+
+static int mlx5e_grp_per_port_buffer_congest_fill_stats(struct mlx5e_priv *priv,
+							u64 *data, int idx)
+{
+	struct mlx5e_pport_stats *pport = &priv->stats.pport;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int i, prio;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return idx;
+
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio],
+						    pport_per_tc_prio_stats_desc, i);
+		for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio],
+						    pport_per_tc_congest_prio_stats_desc, i);
+	}
+
+	return idx;
+}
+
+static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv)
+{
+	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+	void *out;
+	int prio;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return;
+
+	MLX5_SET(ppcnt_reg, in, pnat, 2);
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP);
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		out = pstats->per_tc_prio_counters[prio];
+		MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+	}
+}
+
+static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return 0;
+
+	return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv)
+{
+	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+	void *out;
+	int prio;
+
+	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
+		return;
+
+	MLX5_SET(ppcnt_reg, in, pnat, 2);
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP);
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		out = pstats->per_tc_congest_prio_counters[prio];
+		MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+	}
+}
+
+static int mlx5e_grp_per_port_buffer_congest_get_num_stats(struct mlx5e_priv *priv)
+{
+	return mlx5e_grp_per_tc_prio_get_num_stats(priv) +
+		mlx5e_grp_per_tc_congest_prio_get_num_stats(priv);
+}
+
+static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *priv)
+{
+	mlx5e_grp_per_tc_prio_update_stats(priv);
+	mlx5e_grp_per_tc_congest_prio_update_stats(priv);
+}
+
 #define PPORT_PER_PRIO_OFF(c) \
 	MLX5_BYTE_OFF(ppcnt_reg, \
 		      counter_set.eth_per_prio_grp_data_layout.c##_high)
@@ -896,7 +1182,7 @@
 };
 
 static const struct counter_desc pport_pfc_stall_stats_desc[] = {
-	{ "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+	{ "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
 	{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
 };
 
@@ -1037,6 +1323,9 @@
 	int prio;
 	void *out;
 
+	if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+		return;
+
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
@@ -1048,13 +1337,13 @@
 }
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-	{ "module_unplug", 8 },
+	{ "module_unplug",       sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
 };
 
 static const struct counter_desc mlx5e_pme_error_desc[] = {
-	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
-	{ "module_high_temp", 48 },       /* high temperature */
-	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
+	{ "module_bus_stuck",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+	{ "module_high_temp",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+	{ "module_bad_shorted",  sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
 };
 
 #define NUM_PME_STATUS_STATS		ARRAY_SIZE(mlx5e_pme_status_desc)
@@ -1082,15 +1371,17 @@
 static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
 				    int idx)
 {
-	struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+	struct mlx5_pme_stats pme_stats;
 	int i;
 
+	mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
 	for (i = 0; i < NUM_PME_STATUS_STATS; i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+		data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
 						   mlx5e_pme_status_desc, i);
 
 	for (i = 0; i < NUM_PME_ERR_STATS; i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+		data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
 						   mlx5e_pme_error_desc, i);
 
 	return idx;
@@ -1139,6 +1430,8 @@
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
@@ -1146,6 +1439,7 @@
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
@@ -1154,13 +1448,14 @@
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
 };
 
 static const struct counter_desc sq_stats_desc[] = {
@@ -1174,6 +1469,18 @@
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+#ifdef CONFIG_MLX5_EN_TLS
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
@@ -1186,6 +1493,9 @@
 
 static const struct counter_desc rq_xdpsq_stats_desc[] = {
 	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
 	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
 	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
 	{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
@@ -1193,16 +1503,51 @@
 
 static const struct counter_desc xdpsq_stats_desc[] = {
 	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
 	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
 	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
 	{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
 };
 
+static const struct counter_desc xskrq_stats_desc[] = {
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+	{ MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+};
+
+static const struct counter_desc xsksq_stats_desc[] = {
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+	{ MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
 static const struct counter_desc ch_stats_desc[] = {
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) },
 	{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
 };
 
@@ -1210,23 +1555,28 @@
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 #define NUM_XDPSQ_STATS			ARRAY_SIZE(xdpsq_stats_desc)
 #define NUM_RQ_XDPSQ_STATS		ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_XSKRQ_STATS			ARRAY_SIZE(xskrq_stats_desc)
+#define NUM_XSKSQ_STATS			ARRAY_SIZE(xsksq_stats_desc)
 #define NUM_CH_STATS			ARRAY_SIZE(ch_stats_desc)
 
 static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 {
-	int max_nch = priv->profile->max_nch(priv->mdev);
+	int max_nch = priv->max_nch;
 
 	return (NUM_RQ_STATS * max_nch) +
 	       (NUM_CH_STATS * max_nch) +
 	       (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
 	       (NUM_RQ_XDPSQ_STATS * max_nch) +
-	       (NUM_XDPSQ_STATS * max_nch);
+	       (NUM_XDPSQ_STATS * max_nch) +
+	       (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) +
+	       (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
 }
 
 static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 					   int idx)
 {
-	int max_nch = priv->profile->max_nch(priv->mdev);
+	bool is_xsk = priv->xsk.ever_used;
+	int max_nch = priv->max_nch;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
@@ -1238,6 +1588,9 @@
 		for (j = 0; j < NUM_RQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				rq_stats_desc[j].format, i);
+		for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				xskrq_stats_desc[j].format, i);
 		for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				rq_xdpsq_stats_desc[j].format, i);
@@ -1250,10 +1603,14 @@
 					sq_stats_desc[j].format,
 					priv->channel_tc2txq[i][tc]);
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < max_nch; i++) {
+		for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				xsksq_stats_desc[j].format, i);
 		for (j = 0; j < NUM_XDPSQ_STATS; j++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 				xdpsq_stats_desc[j].format, i);
+	}
 
 	return idx;
 }
@@ -1261,7 +1618,8 @@
 static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 					 int idx)
 {
-	int max_nch = priv->profile->max_nch(priv->mdev);
+	bool is_xsk = priv->xsk.ever_used;
+	int max_nch = priv->max_nch;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
@@ -1275,6 +1633,10 @@
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
 						     rq_stats_desc, j);
+		for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+			data[idx++] =
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+						     xskrq_stats_desc, j);
 		for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
@@ -1288,11 +1650,16 @@
 					MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
 							     sq_stats_desc, j);
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < max_nch; i++) {
+		for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+			data[idx++] =
+				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+						     xsksq_stats_desc, j);
 		for (j = 0; j < NUM_XDPSQ_STATS; j++)
 			data[idx++] =
 				MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
 						     xdpsq_stats_desc, j);
+	}
 
 	return idx;
 }
@@ -1388,7 +1755,13 @@
 		.get_num_stats = mlx5e_grp_channels_get_num_stats,
 		.fill_strings = mlx5e_grp_channels_fill_strings,
 		.fill_stats = mlx5e_grp_channels_fill_stats,
-	}
+	},
+	{
+		.get_num_stats = mlx5e_grp_per_port_buffer_congest_get_num_stats,
+		.fill_strings = mlx5e_grp_per_port_buffer_congest_fill_strings,
+		.fill_stats = mlx5e_grp_per_port_buffer_congest_fill_stats,
+		.update_stats = mlx5e_grp_per_port_buffer_congest_update_stats,
+	},
 };
 
 const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index c1064af..869f350 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h

@@ -46,6 +46,8 @@
 #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
@@ -66,14 +68,20 @@
 	u64 tx_nop;
 	u64 rx_lro_packets;
 	u64 rx_lro_bytes;
+	u64 rx_ecn_mark;
 	u64 rx_removed_vlan_packets;
 	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
+	u64 rx_csum_complete_tail;
+	u64 rx_csum_complete_tail_slow;
 	u64 rx_csum_unnecessary_inner;
 	u64 rx_xdp_drop;
 	u64 rx_xdp_redirect;
 	u64 rx_xdp_tx_xmit;
+	u64 rx_xdp_tx_mpwqe;
+	u64 rx_xdp_tx_inlnw;
+	u64 rx_xdp_tx_nops;
 	u64 rx_xdp_tx_full;
 	u64 rx_xdp_tx_err;
 	u64 rx_xdp_tx_cqe;
@@ -86,9 +94,11 @@
 	u64 tx_recover;
 	u64 tx_cqes;
 	u64 tx_queue_wake;
-	u64 tx_udp_seg_rem;
 	u64 tx_cqe_err;
 	u64 tx_xdp_xmit;
+	u64 tx_xdp_mpwqe;
+	u64 tx_xdp_inlnw;
+	u64 tx_xdp_nops;
 	u64 tx_xdp_full;
 	u64 tx_xdp_err;
 	u64 tx_xdp_cqes;
@@ -99,23 +109,59 @@
 	u64 rx_buff_alloc_err;
 	u64 rx_cqe_compress_blks;
 	u64 rx_cqe_compress_pkts;
-	u64 rx_page_reuse;
 	u64 rx_cache_reuse;
 	u64 rx_cache_full;
 	u64 rx_cache_empty;
 	u64 rx_cache_busy;
 	u64 rx_cache_waive;
 	u64 rx_congst_umr;
+	u64 rx_arfs_err;
+	u64 rx_recover;
 	u64 ch_events;
 	u64 ch_poll;
 	u64 ch_arm;
 	u64 ch_aff_change;
+	u64 ch_force_irq;
 	u64 ch_eq_rearm;
 
 #ifdef CONFIG_MLX5_EN_TLS
+	u64 tx_tls_encrypted_packets;
+	u64 tx_tls_encrypted_bytes;
+	u64 tx_tls_ctx;
 	u64 tx_tls_ooo;
+	u64 tx_tls_dump_packets;
+	u64 tx_tls_dump_bytes;
 	u64 tx_tls_resync_bytes;
+	u64 tx_tls_skip_no_sync_data;
+	u64 tx_tls_drop_no_sync_data;
+	u64 tx_tls_drop_bypass_req;
 #endif
+
+	u64 rx_xsk_packets;
+	u64 rx_xsk_bytes;
+	u64 rx_xsk_csum_complete;
+	u64 rx_xsk_csum_unnecessary;
+	u64 rx_xsk_csum_unnecessary_inner;
+	u64 rx_xsk_csum_none;
+	u64 rx_xsk_ecn_mark;
+	u64 rx_xsk_removed_vlan_packets;
+	u64 rx_xsk_xdp_drop;
+	u64 rx_xsk_xdp_redirect;
+	u64 rx_xsk_wqe_err;
+	u64 rx_xsk_mpwqe_filler_cqes;
+	u64 rx_xsk_mpwqe_filler_strides;
+	u64 rx_xsk_oversize_pkts_sw_drop;
+	u64 rx_xsk_buff_alloc_err;
+	u64 rx_xsk_cqe_compress_blks;
+	u64 rx_xsk_cqe_compress_pkts;
+	u64 rx_xsk_congst_umr;
+	u64 rx_xsk_arfs_err;
+	u64 tx_xsk_xmit;
+	u64 tx_xsk_mpwqe;
+	u64 tx_xsk_inlnw;
+	u64 tx_xsk_full;
+	u64 tx_xsk_err;
+	u64 tx_xsk_cqes;
 };
 
 struct mlx5e_qcounter_stats {
@@ -162,6 +208,8 @@
 	__be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
 	__be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
 	__be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+	__be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+	__be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
 };
 
 #define PCIE_PERF_GET(pcie_stats, c) \
@@ -180,11 +228,14 @@
 	u64 packets;
 	u64 bytes;
 	u64 csum_complete;
+	u64 csum_complete_tail;
+	u64 csum_complete_tail_slow;
 	u64 csum_unnecessary;
 	u64 csum_unnecessary_inner;
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
+	u64 ecn_mark;
 	u64 removed_vlan_packets;
 	u64 xdp_drop;
 	u64 xdp_redirect;
@@ -195,13 +246,14 @@
 	u64 buff_alloc_err;
 	u64 cqe_compress_blks;
 	u64 cqe_compress_pkts;
-	u64 page_reuse;
 	u64 cache_reuse;
 	u64 cache_full;
 	u64 cache_empty;
 	u64 cache_busy;
 	u64 cache_waive;
 	u64 congst_umr;
+	u64 arfs_err;
+	u64 recover;
 };
 
 struct mlx5e_sq_stats {
@@ -217,10 +269,17 @@
 	u64 csum_partial_inner;
 	u64 added_vlan_packets;
 	u64 nop;
-	u64 udp_seg_rem;
 #ifdef CONFIG_MLX5_EN_TLS
+	u64 tls_encrypted_packets;
+	u64 tls_encrypted_bytes;
+	u64 tls_ctx;
 	u64 tls_ooo;
+	u64 tls_dump_packets;
+	u64 tls_dump_bytes;
 	u64 tls_resync_bytes;
+	u64 tls_skip_no_sync_data;
+	u64 tls_drop_no_sync_data;
+	u64 tls_drop_bypass_req;
 #endif
 	/* less likely accessed in data path */
 	u64 csum_none;
@@ -235,6 +294,9 @@
 
 struct mlx5e_xdpsq_stats {
 	u64 xmit;
+	u64 mpwqe;
+	u64 inlnw;
+	u64 nops;
 	u64 full;
 	u64 err;
 	/* dirtied @completion */
@@ -246,6 +308,7 @@
 	u64 poll;
 	u64 arm;
 	u64 aff_change;
+	u64 force_irq;
 	u64 eq_rearm;
 };
 
@@ -275,6 +338,6 @@
 extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
 extern const int mlx5e_num_stats_grps;
 
-void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv);
 
 #endif /* __MLX5_EN_STATS_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3092c59..f90a9f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

@@ -38,54 +38,100 @@
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
-#include <net/switchdev.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_csum.h>
-#include <net/vxlan.h>
 #include <net/arp.h>
+#include <net/ipv6_stubs.h>
 #include "en.h"
 #include "en_rep.h"
 #include "en_tc.h"
 #include "eswitch.h"
-#include "lib/vxlan.h"
 #include "fs_core.h"
 #include "en/port.h"
+#include "en/tc_tun.h"
+#include "lib/devcom.h"
+#include "lib/geneve.h"
+#include "diag/en_tc_tracepoint.h"
 
 struct mlx5_nic_flow_attr {
 	u32 action;
 	u32 flow_tag;
-	u32 mod_hdr_id;
+	struct mlx5_modify_hdr *modify_hdr;
 	u32 hairpin_tirn;
 	u8 match_level;
 	struct mlx5_flow_table	*hairpin_ft;
+	struct mlx5_fc		*counter;
 };
 
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
 
 enum {
-	MLX5E_TC_FLOW_INGRESS	= MLX5E_TC_INGRESS,
-	MLX5E_TC_FLOW_EGRESS	= MLX5E_TC_EGRESS,
-	MLX5E_TC_FLOW_ESWITCH	= BIT(MLX5E_TC_FLOW_BASE),
-	MLX5E_TC_FLOW_NIC	= BIT(MLX5E_TC_FLOW_BASE + 1),
-	MLX5E_TC_FLOW_OFFLOADED	= BIT(MLX5E_TC_FLOW_BASE + 2),
-	MLX5E_TC_FLOW_HAIRPIN	= BIT(MLX5E_TC_FLOW_BASE + 3),
-	MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+	MLX5E_TC_FLOW_FLAG_INGRESS	= MLX5E_TC_FLAG_INGRESS_BIT,
+	MLX5E_TC_FLOW_FLAG_EGRESS	= MLX5E_TC_FLAG_EGRESS_BIT,
+	MLX5E_TC_FLOW_FLAG_ESWITCH	= MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+	MLX5E_TC_FLOW_FLAG_NIC		= MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+	MLX5E_TC_FLOW_FLAG_OFFLOADED	= MLX5E_TC_FLOW_BASE,
+	MLX5E_TC_FLOW_FLAG_HAIRPIN	= MLX5E_TC_FLOW_BASE + 1,
+	MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS	= MLX5E_TC_FLOW_BASE + 2,
+	MLX5E_TC_FLOW_FLAG_SLOW		= MLX5E_TC_FLOW_BASE + 3,
+	MLX5E_TC_FLOW_FLAG_DUP		= MLX5E_TC_FLOW_BASE + 4,
+	MLX5E_TC_FLOW_FLAG_NOT_READY	= MLX5E_TC_FLOW_BASE + 5,
+	MLX5E_TC_FLOW_FLAG_DELETED	= MLX5E_TC_FLOW_BASE + 6,
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
 
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ *   |- Helper array
+ *      [0] Helper item 0
+ *          |- list_head item 0
+ *          |- index (0)
+ *      [1] Helper item 1
+ *          |- list_head item 1
+ *          |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ *    helper item =
+ *        container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ *    containing struct =
+ *        container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+	struct mlx5e_encap_entry *e; /* attached encap instance */
+	struct list_head list;
+	int index;
+};
+
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	struct mlx5e_priv	*priv;
 	u64			cookie;
-	u8			flags;
+	unsigned long		flags;
 	struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
-	struct list_head	encap;   /* flows sharing the same encap ID */
+	/* Flow can be associated with multiple encap IDs.
+	 * The number of encaps is bounded by the number of supported
+	 * destinations.
+	 */
+	struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+	struct mlx5e_tc_flow    *peer_flow;
+	struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
 	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
+	struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
 	struct list_head	hairpin; /* flows sharing the same hairpin */
+	struct list_head	peer;    /* flows with peer flow */
+	struct list_head	unready; /* flows not ready to be offloaded (e.g due to missing route) */
+	int			tmp_efi_index;
+	struct list_head	tmp_list; /* temporary flow list used by neigh update */
+	refcount_t		refcnt;
+	struct rcu_head		rcu_head;
+	struct completion	init_done;
 	union {
 		struct mlx5_esw_flow_attr esw_attr[0];
 		struct mlx5_nic_flow_attr nic_attr[0];
@@ -93,16 +139,13 @@
 };
 
 struct mlx5e_tc_flow_parse_attr {
-	struct ip_tunnel_info tun_info;
+	const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+	struct net_device *filter_dev;
 	struct mlx5_flow_spec spec;
 	int num_mod_hdr_actions;
+	int max_mod_hdr_actions;
 	void *mod_hdr_actions;
-	int mirred_ifindex;
-};
-
-enum {
-	MLX5_HEADER_TYPE_VXLAN = 0x0,
-	MLX5_HEADER_TYPE_NVGRE = 0x1,
+	int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 };
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -126,12 +169,20 @@
 	/* a node of a hash table which keeps all the  hairpin entries */
 	struct hlist_node hairpin_hlist;
 
+	/* protects flows list */
+	spinlock_t flows_lock;
 	/* flows sharing the same hairpin */
 	struct list_head flows;
+	/* hpe's that were not fully initialized when dead peer update event
+	 * function traversed them.
+	 */
+	struct list_head dead_peer_wait_list;
 
 	u16 peer_vhca_id;
 	u8 prio;
 	struct mlx5e_hairpin *hp;
+	refcount_t refcnt;
+	struct completion res_ready;
 };
 
 struct mod_hdr_key {
@@ -143,16 +194,93 @@
 	/* a node of a hash table which keeps all the mod_hdr entries */
 	struct hlist_node mod_hdr_hlist;
 
+	/* protects flows list */
+	spinlock_t flows_lock;
 	/* flows sharing the same mod_hdr entry */
 	struct list_head flows;
 
 	struct mod_hdr_key key;
 
-	u32 mod_hdr_id;
+	struct mlx5_modify_hdr *modify_hdr;
+
+	refcount_t refcnt;
+	struct completion res_ready;
+	int compl_result;
 };
 
 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
 
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+			      struct mlx5e_tc_flow *flow);
+
+static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+{
+	if (!flow || !refcount_inc_not_zero(&flow->refcnt))
+		return ERR_PTR(-EINVAL);
+	return flow;
+}
+
+static void mlx5e_flow_put(struct mlx5e_priv *priv,
+			   struct mlx5e_tc_flow *flow)
+{
+	if (refcount_dec_and_test(&flow->refcnt)) {
+		mlx5e_tc_del_flow(priv, flow);
+		kfree_rcu(flow, rcu_head);
+	}
+}
+
+static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+	/* Complete all memory stores before setting bit. */
+	smp_mb__before_atomic();
+	set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+				     unsigned long flag)
+{
+	/* test_and_set_bit() provides all necessary barriers */
+	return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag)			\
+	__flow_flag_test_and_set(flow,				\
+				 MLX5E_TC_FLOW_FLAG_##flag)
+
+static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+	/* Complete all memory stores before clearing bit. */
+	smp_mb__before_atomic();
+	clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+						      MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+	bool ret = test_bit(flag, &flow->flags);
+
+	/* Read fields of flow structure only after checking flags. */
+	smp_mb__after_atomic();
+	return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+						    MLX5E_TC_FLOW_FLAG_##flag)
+
+static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+{
+	return flow_flag_test(flow, ESWITCH);
+}
+
+static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+{
+	return flow_flag_test(flow, OFFLOADED);
+}
+
 static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
 {
 	return jhash(key->actions,
@@ -168,15 +296,62 @@
 	return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
 }
 
+static struct mod_hdr_tbl *
+get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
+		&priv->fs.tc.mod_hdr;
+}
+
+static struct mlx5e_mod_hdr_entry *
+mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
+{
+	struct mlx5e_mod_hdr_entry *mh, *found = NULL;
+
+	hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+		if (!cmp_mod_hdr_info(&mh->key, key)) {
+			refcount_inc(&mh->refcnt);
+			found = mh;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
+			      struct mlx5e_mod_hdr_entry *mh,
+			      int namespace)
+{
+	struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
+
+	if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
+		return;
+	hash_del(&mh->mod_hdr_hlist);
+	mutex_unlock(&tbl->lock);
+
+	WARN_ON(!list_empty(&mh->flows));
+	if (mh->compl_result > 0)
+		mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
+
+	kfree(mh);
+}
+
+static int get_flow_name_space(struct mlx5e_tc_flow *flow)
+{
+	return mlx5e_is_eswitch_flow(flow) ?
+		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+}
 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 				struct mlx5e_tc_flow *flow,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int num_actions, actions_size, namespace, err;
 	struct mlx5e_mod_hdr_entry *mh;
+	struct mod_hdr_tbl *tbl;
 	struct mod_hdr_key key;
-	bool found = false;
 	u32 hash_key;
 
 	num_actions  = parse_attr->num_mod_hdr_actions;
@@ -187,80 +362,82 @@
 
 	hash_key = hash_mod_hdr_info(&key);
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-		namespace = MLX5_FLOW_NAMESPACE_FDB;
-		hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
-				       mod_hdr_hlist, hash_key) {
-			if (!cmp_mod_hdr_info(&mh->key, &key)) {
-				found = true;
-				break;
-			}
+	namespace = get_flow_name_space(flow);
+	tbl = get_mod_hdr_table(priv, namespace);
+
+	mutex_lock(&tbl->lock);
+	mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
+	if (mh) {
+		mutex_unlock(&tbl->lock);
+		wait_for_completion(&mh->res_ready);
+
+		if (mh->compl_result < 0) {
+			err = -EREMOTEIO;
+			goto attach_header_err;
 		}
-	} else {
-		namespace = MLX5_FLOW_NAMESPACE_KERNEL;
-		hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
-				       mod_hdr_hlist, hash_key) {
-			if (!cmp_mod_hdr_info(&mh->key, &key)) {
-				found = true;
-				break;
-			}
-		}
+		goto attach_flow;
 	}
 
-	if (found)
-		goto attach_flow;
-
 	mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
-	if (!mh)
+	if (!mh) {
+		mutex_unlock(&tbl->lock);
 		return -ENOMEM;
+	}
 
 	mh->key.actions = (void *)mh + sizeof(*mh);
 	memcpy(mh->key.actions, key.actions, actions_size);
 	mh->key.num_actions = num_actions;
+	spin_lock_init(&mh->flows_lock);
 	INIT_LIST_HEAD(&mh->flows);
+	refcount_set(&mh->refcnt, 1);
+	init_completion(&mh->res_ready);
 
-	err = mlx5_modify_header_alloc(priv->mdev, namespace,
-				       mh->key.num_actions,
-				       mh->key.actions,
-				       &mh->mod_hdr_id);
-	if (err)
-		goto out_err;
+	hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
+	mutex_unlock(&tbl->lock);
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
-		hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
-	else
-		hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+	mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
+						  mh->key.num_actions,
+						  mh->key.actions);
+	if (IS_ERR(mh->modify_hdr)) {
+		err = PTR_ERR(mh->modify_hdr);
+		mh->compl_result = err;
+		goto alloc_header_err;
+	}
+	mh->compl_result = 1;
+	complete_all(&mh->res_ready);
 
 attach_flow:
+	flow->mh = mh;
+	spin_lock(&mh->flows_lock);
 	list_add(&flow->mod_hdr, &mh->flows);
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
-		flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
+	spin_unlock(&mh->flows_lock);
+	if (mlx5e_is_eswitch_flow(flow))
+		flow->esw_attr->modify_hdr = mh->modify_hdr;
 	else
-		flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
+		flow->nic_attr->modify_hdr = mh->modify_hdr;
 
 	return 0;
 
-out_err:
-	kfree(mh);
+alloc_header_err:
+	complete_all(&mh->res_ready);
+attach_header_err:
+	mlx5e_mod_hdr_put(priv, mh, namespace);
 	return err;
 }
 
 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 				 struct mlx5e_tc_flow *flow)
 {
-	struct list_head *next = flow->mod_hdr.next;
+	/* flow wasn't fully initialized */
+	if (!flow->mh)
+		return;
 
+	spin_lock(&flow->mh->flows_lock);
 	list_del(&flow->mod_hdr);
+	spin_unlock(&flow->mh->flows_lock);
 
-	if (list_empty(next)) {
-		struct mlx5e_mod_hdr_entry *mh;
-
-		mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
-
-		mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
-		hash_del(&mh->mod_hdr_hlist);
-		kfree(mh);
-	}
+	mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
+	flow->mh = NULL;
 }
 
 static
@@ -319,7 +496,7 @@
 
 	for (i = 0; i < sz; i++) {
 		ix = i;
-		if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+		if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
 			ix = mlx5e_bits_invert(i, ilog2(sz));
 		ix = indirection_rqt[ix];
 		rqn = hp->pair->rqn[ix];
@@ -363,13 +540,15 @@
 	void *tirc;
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+		struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+
 		memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 
 		MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 		MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 		MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
-		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+		mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
 
 		err = mlx5_core_create_tir(hp->func_mdev, in,
 					   MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
@@ -522,17 +701,40 @@
 
 	hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
 			       hairpin_hlist, hash_key) {
-		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
+		if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
+			refcount_inc(&hpe->refcnt);
 			return hpe;
+		}
 	}
 
 	return NULL;
 }
 
+static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
+			      struct mlx5e_hairpin_entry *hpe)
+{
+	/* no more hairpin flows for us, release the hairpin pair */
+	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
+		return;
+	hash_del(&hpe->hairpin_hlist);
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+
+	if (!IS_ERR_OR_NULL(hpe->hp)) {
+		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+			   dev_name(hpe->hp->pair->peer_mdev->device));
+
+		mlx5e_hairpin_destroy(hpe->hp);
+	}
+
+	WARN_ON(!list_empty(&hpe->flows));
+	kfree(hpe);
+}
+
 #define UNKNOWN_MATCH_PRIO 8
 
 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
-				  struct mlx5_flow_spec *spec, u8 *match_prio)
+				  struct mlx5_flow_spec *spec, u8 *match_prio,
+				  struct netlink_ext_ack *extack)
 {
 	void *headers_c, *headers_v;
 	u8 prio_val, prio_mask = 0;
@@ -540,8 +742,8 @@
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
-		netdev_warn(priv->netdev,
-			    "only PCP trust state supported for hairpin\n");
+		NL_SET_ERR_MSG_MOD(extack,
+				   "only PCP trust state supported for hairpin");
 		return -EOPNOTSUPP;
 	}
 #endif
@@ -557,8 +759,8 @@
 	if (!vlan_present || !prio_mask) {
 		prio_val = UNKNOWN_MATCH_PRIO;
 	} else if (prio_mask != 0x7) {
-		netdev_warn(priv->netdev,
-			    "masked priority match not supported for hairpin\n");
+		NL_SET_ERR_MSG_MOD(extack,
+				   "masked priority match not supported for hairpin");
 		return -EOPNOTSUPP;
 	}
 
@@ -568,9 +770,10 @@
 
 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 				  struct mlx5e_tc_flow *flow,
-				  struct mlx5e_tc_flow_parse_attr *parse_attr)
+				  struct mlx5e_tc_flow_parse_attr *parse_attr,
+				  struct netlink_ext_ack *extack)
 {
-	int peer_ifindex = parse_attr->mirred_ifindex;
+	int peer_ifindex = parse_attr->mirred_ifindex[0];
 	struct mlx5_hairpin_params params;
 	struct mlx5_core_dev *peer_mdev;
 	struct mlx5e_hairpin_entry *hpe;
@@ -583,25 +786,46 @@
 
 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 	if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
-		netdev_warn(priv->netdev, "hairpin is not supported\n");
+		NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
 		return -EOPNOTSUPP;
 	}
 
 	peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
-	err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
+	err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
+				     extack);
 	if (err)
 		return err;
+
+	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
-	if (hpe)
+	if (hpe) {
+		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+		wait_for_completion(&hpe->res_ready);
+
+		if (IS_ERR(hpe->hp)) {
+			err = -EREMOTEIO;
+			goto out_err;
+		}
 		goto attach_flow;
+	}
 
 	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
-	if (!hpe)
+	if (!hpe) {
+		mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 		return -ENOMEM;
+	}
 
+	spin_lock_init(&hpe->flows_lock);
 	INIT_LIST_HEAD(&hpe->flows);
+	INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
 	hpe->peer_vhca_id = peer_id;
 	hpe->prio = match_prio;
+	refcount_set(&hpe->refcnt, 1);
+	init_completion(&hpe->res_ready);
+
+	hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+		 hash_hairpin_info(peer_id, match_prio));
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 
 	params.log_data_size = 15;
 	params.log_data_size = min_t(u8, params.log_data_size,
@@ -623,83 +847,79 @@
 	params.num_channels = link_speed64;
 
 	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
+	hpe->hp = hp;
+	complete_all(&hpe->res_ready);
 	if (IS_ERR(hp)) {
 		err = PTR_ERR(hp);
-		goto create_hairpin_err;
+		goto out_err;
 	}
 
 	netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
-		   hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name,
+		   hp->tirn, hp->pair->rqn[0],
+		   dev_name(hp->pair->peer_mdev->device),
 		   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
 
-	hpe->hp = hp;
-	hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
-		 hash_hairpin_info(peer_id, match_prio));
-
 attach_flow:
 	if (hpe->hp->num_channels > 1) {
-		flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
+		flow_flag_set(flow, HAIRPIN_RSS);
 		flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
 	} else {
 		flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
 	}
+
+	flow->hpe = hpe;
+	spin_lock(&hpe->flows_lock);
 	list_add(&flow->hairpin, &hpe->flows);
+	spin_unlock(&hpe->flows_lock);
 
 	return 0;
 
-create_hairpin_err:
-	kfree(hpe);
+out_err:
+	mlx5e_hairpin_put(priv, hpe);
 	return err;
 }
 
 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 				   struct mlx5e_tc_flow *flow)
 {
-	struct list_head *next = flow->hairpin.next;
+	/* flow wasn't fully initialized */
+	if (!flow->hpe)
+		return;
 
+	spin_lock(&flow->hpe->flows_lock);
 	list_del(&flow->hairpin);
+	spin_unlock(&flow->hpe->flows_lock);
 
-	/* no more hairpin flows for us, release the hairpin pair */
-	if (list_empty(next)) {
-		struct mlx5e_hairpin_entry *hpe;
-
-		hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
-
-		netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
-			   hpe->hp->pair->peer_mdev->priv.name);
-
-		mlx5e_hairpin_destroy(hpe->hp);
-		hash_del(&hpe->hairpin_hlist);
-		kfree(hpe);
-	}
+	mlx5e_hairpin_put(priv, flow->hpe);
+	flow->hpe = NULL;
 }
 
-static struct mlx5_flow_handle *
+static int
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow_parse_attr *parse_attr,
-		      struct mlx5e_tc_flow *flow)
+		      struct mlx5e_tc_flow *flow,
+		      struct netlink_ext_ack *extack)
 {
+	struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
-		.has_flow_tag = true,
-		.flow_tag = attr->flow_tag,
-		.encap_id = 0,
+		.flags    = FLOW_ACT_NO_APPEND,
 	};
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_handle *rule;
-	bool table_created = false;
 	int err, dest_ix = 0;
 
-	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
-		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
-		if (err) {
-			rule = ERR_PTR(err);
-			goto err_add_hairpin_flow;
-		}
-		if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
+	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+	flow_context->flow_tag = attr->flow_tag;
+
+	if (flow_flag_test(flow, HAIRPIN)) {
+		err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
+		if (err)
+			return err;
+
+		if (flow_flag_test(flow, HAIRPIN_RSS)) {
 			dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 			dest[dest_ix].ft = attr->hairpin_ft;
 		} else {
@@ -715,25 +935,24 @@
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(dev, true);
-		if (IS_ERR(counter)) {
-			rule = ERR_CAST(counter);
-			goto err_fc_create;
-		}
+		if (IS_ERR(counter))
+			return PTR_ERR(counter);
+
 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[dest_ix].counter = counter;
+		dest[dest_ix].counter_id = mlx5_fc_id(counter);
 		dest_ix++;
+		attr->counter = counter;
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
-		flow_act.modify_id = attr->mod_hdr_id;
+		flow_act.modify_hdr = attr->modify_hdr;
 		kfree(parse_attr->mod_hdr_actions);
-		if (err) {
-			rule = ERR_PTR(err);
-			goto err_create_mod_hdr_id;
-		}
+		if (err)
+			return err;
 	}
 
+	mutex_lock(&priv->fs.tc.t_lock);
 	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
 		int tc_grp_size, tc_tbl_size;
 		u32 max_flow_counter;
@@ -753,41 +972,23 @@
 							    MLX5E_TC_TABLE_NUM_GROUPS,
 							    MLX5E_TC_FT_LEVEL, 0);
 		if (IS_ERR(priv->fs.tc.t)) {
+			mutex_unlock(&priv->fs.tc.t_lock);
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Failed to create tc offload table\n");
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
-			rule = ERR_CAST(priv->fs.tc.t);
-			goto err_create_ft;
+			return PTR_ERR(priv->fs.tc.t);
 		}
-
-		table_created = true;
 	}
 
 	if (attr->match_level != MLX5_MATCH_NONE)
-		parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
-	rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
-				   &flow_act, dest, dest_ix);
+	flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+					    &flow_act, dest, dest_ix);
+	mutex_unlock(&priv->fs.tc.t_lock);
 
-	if (IS_ERR(rule))
-		goto err_add_rule;
-
-	return rule;
-
-err_add_rule:
-	if (table_created) {
-		mlx5_destroy_flow_table(priv->fs.tc.t);
-		priv->fs.tc.t = NULL;
-	}
-err_create_ft:
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-		mlx5e_detach_mod_hdr(priv, flow);
-err_create_mod_hdr_id:
-	mlx5_fc_destroy(dev, counter);
-err_fc_create:
-	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
-		mlx5e_hairpin_flow_del(priv, flow);
-err_add_hairpin_flow:
-	return rule;
+	return PTR_ERR_OR_ZERO(flow->rule[0]);
 }
 
 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
@@ -796,104 +997,258 @@
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 	struct mlx5_fc *counter = NULL;
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
-	mlx5_del_flow_rules(flow->rule[0]);
+	counter = attr->counter;
+	if (!IS_ERR_OR_NULL(flow->rule[0]))
+		mlx5_del_flow_rules(flow->rule[0]);
 	mlx5_fc_destroy(priv->mdev, counter);
 
-	if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+	mutex_lock(&priv->fs.tc.t_lock);
+	if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
 		mlx5_destroy_flow_table(priv->fs.tc.t);
 		priv->fs.tc.t = NULL;
 	}
+	mutex_unlock(&priv->fs.tc.t_lock);
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
 
-	if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+	if (flow_flag_test(flow, HAIRPIN))
 		mlx5e_hairpin_flow_del(priv, flow);
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-			       struct mlx5e_tc_flow *flow);
+			       struct mlx5e_tc_flow *flow, int out_index);
 
 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
-			      struct ip_tunnel_info *tun_info,
+			      struct mlx5e_tc_flow *flow,
 			      struct net_device *mirred_dev,
+			      int out_index,
+			      struct netlink_ext_ack *extack,
 			      struct net_device **encap_dev,
-			      struct mlx5e_tc_flow *flow);
+			      bool *encap_valid);
 
 static struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+			   struct mlx5e_tc_flow *flow,
+			   struct mlx5_flow_spec *spec,
+			   struct mlx5_esw_flow_attr *attr)
+{
+	struct mlx5_flow_handle *rule;
+
+	rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+	if (IS_ERR(rule))
+		return rule;
+
+	if (attr->split_count) {
+		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
+		if (IS_ERR(flow->rule[1])) {
+			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+			return flow->rule[1];
+		}
+	}
+
+	return rule;
+}
+
+static void
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+			     struct mlx5e_tc_flow *flow,
+			   struct mlx5_esw_flow_attr *attr)
+{
+	flow_flag_clear(flow, OFFLOADED);
+
+	if (attr->split_count)
+		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
+	mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+			      struct mlx5e_tc_flow *flow,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *slow_attr)
+{
+	struct mlx5_flow_handle *rule;
+
+	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	slow_attr->split_count = 0;
+	slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
+
+	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
+	if (!IS_ERR(rule))
+		flow_flag_set(flow, SLOW);
+
+	return rule;
+}
+
+static void
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+				  struct mlx5e_tc_flow *flow,
+				  struct mlx5_esw_flow_attr *slow_attr)
+{
+	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	slow_attr->split_count = 0;
+	slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
+	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+	flow_flag_clear(flow, SLOW);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_add(struct mlx5e_tc_flow *flow,
+			     struct list_head *unready_flows)
+{
+	flow_flag_set(flow, NOT_READY);
+	list_add_tail(&flow->unready, unready_flows);
+}
+
+/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
+ * function.
+ */
+static void unready_flow_del(struct mlx5e_tc_flow *flow)
+{
+	list_del(&flow->unready);
+	flow_flag_clear(flow, NOT_READY);
+}
+
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+
+	esw = flow->priv->mdev->priv.eswitch;
+	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &rpriv->uplink_priv;
+
+	mutex_lock(&uplink_priv->unready_flows_lock);
+	unready_flow_add(flow, &uplink_priv->unready_flows);
+	mutex_unlock(&uplink_priv->unready_flows_lock);
+}
+
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+
+	esw = flow->priv->mdev->priv.eswitch;
+	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &rpriv->uplink_priv;
+
+	mutex_lock(&uplink_priv->unready_flows_lock);
+	unready_flow_del(flow);
+	mutex_unlock(&uplink_priv->unready_flows_lock);
+}
+
+static int
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
-		      struct mlx5e_tc_flow_parse_attr *parse_attr,
-		      struct mlx5e_tc_flow *flow)
+		      struct mlx5e_tc_flow *flow,
+		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u32 max_chain = mlx5_eswitch_get_chain_range(esw);
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+	u16 max_prio = mlx5_eswitch_get_prio_range(esw);
 	struct net_device *out_dev, *encap_dev = NULL;
-	struct mlx5_flow_handle *rule = NULL;
+	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
 	struct mlx5e_priv *out_priv;
-	int err;
+	bool encap_valid = true;
+	int err = 0;
+	int out_index;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+	if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
+		NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
+		return -EOPNOTSUPP;
+	}
+
+	if (attr->chain > max_chain) {
+		NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
+		return -EOPNOTSUPP;
+	}
+
+	if (attr->prio > max_prio) {
+		NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
+		return -EOPNOTSUPP;
+	}
+
+	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+		int mirred_ifindex;
+
+		if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+			continue;
+
+		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
 		out_dev = __dev_get_by_index(dev_net(priv->netdev),
-					     attr->parse_attr->mirred_ifindex);
-		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
-					 out_dev, &encap_dev, flow);
-		if (err) {
-			rule = ERR_PTR(err);
-			if (err != -EAGAIN)
-				goto err_attach_encap;
-		}
+					     mirred_ifindex);
+		err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
+					 extack, &encap_dev, &encap_valid);
+		if (err)
+			return err;
+
 		out_priv = netdev_priv(encap_dev);
 		rpriv = out_priv->ppriv;
-		attr->out_rep[attr->out_count] = rpriv->rep;
-		attr->out_mdev[attr->out_count++] = out_priv->mdev;
+		attr->dests[out_index].rep = rpriv->rep;
+		attr->dests[out_index].mdev = out_priv->mdev;
 	}
 
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
-	if (err) {
-		rule = ERR_PTR(err);
-		goto err_add_vlan;
-	}
+	if (err)
+		return err;
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 		kfree(parse_attr->mod_hdr_actions);
-		if (err) {
-			rule = ERR_PTR(err);
-			goto err_mod_hdr;
-		}
+		if (err)
+			return err;
 	}
 
-	/* we get here if (1) there's no error (rule being null) or when
-	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		counter = mlx5_fc_create(attr->counter_dev, true);
+		if (IS_ERR(counter))
+			return PTR_ERR(counter);
+
+		attr->counter = counter;
+	}
+
+	/* we get here if one of the following takes place:
+	 * (1) there's no error
+	 * (2) there's an encap action and we don't have valid neigh
 	 */
-	if (rule != ERR_PTR(-EAGAIN)) {
-		rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
-		if (IS_ERR(rule))
-			goto err_add_rule;
+	if (!encap_valid) {
+		/* continue with goto slow path rule instead */
+		struct mlx5_esw_flow_attr slow_attr;
 
-		if (attr->mirror_count) {
-			flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
-			if (IS_ERR(flow->rule[1]))
-				goto err_fwd_rule;
-		}
+		flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
+	} else {
+		flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
 	}
-	return rule;
 
-err_fwd_rule:
-	mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-	rule = flow->rule[1];
-err_add_rule:
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-		mlx5e_detach_mod_hdr(priv, flow);
-err_mod_hdr:
-	mlx5_eswitch_del_vlan_action(esw, attr);
-err_add_vlan:
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-		mlx5e_detach_encap(priv, flow);
-err_attach_encap:
-	return rule;
+	if (IS_ERR(flow->rule[0]))
+		return PTR_ERR(flow->rule[0]);
+	else
+		flow_flag_set(flow, OFFLOADED);
+
+	return 0;
+}
+
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+	void *headers_v = MLX5_ADDR_OF(fte_match_param,
+				       spec->match_value,
+				       misc_parameters_3);
+	u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
+					     headers_v,
+					     geneve_tlv_option_0_data);
+
+	return !!geneve_tlv_opt_0_data;
 }
 
 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
@@ -901,130 +1256,285 @@
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5_esw_flow_attr slow_attr;
+	int out_index;
 
-	if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-		flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-		if (attr->mirror_count)
-			mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
-		mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+	if (flow_flag_test(flow, NOT_READY)) {
+		remove_unready_flow(flow);
+		kvfree(attr->parse_attr);
+		return;
 	}
 
+	if (mlx5e_is_offloaded_flow(flow)) {
+		if (flow_flag_test(flow, SLOW))
+			mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+		else
+			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+	}
+
+	if (mlx5_flow_has_geneve_opt(flow))
+		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
+
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
-		mlx5e_detach_encap(priv, flow);
-		kvfree(attr->parse_attr);
-	}
+	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+		if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+			mlx5e_detach_encap(priv, flow, out_index);
+			kfree(attr->parse_attr->tun_info[out_index]);
+		}
+	kvfree(attr->parse_attr);
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+		mlx5_fc_destroy(attr->counter_dev, attr->counter);
 }
 
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
-			      struct mlx5e_encap_entry *e)
+			      struct mlx5e_encap_entry *e,
+			      struct list_head *flow_list)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_esw_flow_attr *esw_attr;
+	struct mlx5_esw_flow_attr slow_attr, *esw_attr;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
 	struct mlx5e_tc_flow *flow;
 	int err;
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       e->encap_size, e->encap_header,
-			       &e->encap_id);
-	if (err) {
-		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
-			       err);
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     e->encap_size, e->encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+			       PTR_ERR(e->pkt_reformat));
 		return;
 	}
 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
 	mlx5e_rep_queue_neigh_stats_work(priv);
 
-	list_for_each_entry(flow, &e->flows, encap) {
+	list_for_each_entry(flow, flow_list, tmp_list) {
+		bool all_flow_encaps_valid = true;
+		int i;
+
+		if (!mlx5e_is_offloaded_flow(flow))
+			continue;
 		esw_attr = flow->esw_attr;
-		esw_attr->encap_id = e->encap_id;
-		flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
-		if (IS_ERR(flow->rule[0])) {
-			err = PTR_ERR(flow->rule[0]);
+		spec = &esw_attr->parse_attr->spec;
+
+		esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
+		esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+		/* Flow can be associated with multiple encap entries.
+		 * Before offloading the flow verify that all of them have
+		 * a valid neighbour.
+		 */
+		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+			if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+				continue;
+			if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+				all_flow_encaps_valid = false;
+				break;
+			}
+		}
+		/* Do not offload flows with unresolved neighbors */
+		if (!all_flow_encaps_valid)
+			continue;
+		/* update from slow path rule to encap rule */
+		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+		if (IS_ERR(rule)) {
+			err = PTR_ERR(rule);
 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
 				       err);
 			continue;
 		}
 
-		if (esw_attr->mirror_count) {
-			flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
-			if (IS_ERR(flow->rule[1])) {
-				mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
-				err = PTR_ERR(flow->rule[1]);
-				mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
-					       err);
-				continue;
-			}
-		}
-
-		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+		mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+		flow->rule[0] = rule;
+		/* was unset when slow path rule removed */
+		flow_flag_set(flow, OFFLOADED);
 	}
 }
 
 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
-			      struct mlx5e_encap_entry *e)
+			      struct mlx5e_encap_entry *e,
+			      struct list_head *flow_list)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr slow_attr;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	struct mlx5e_tc_flow *flow;
+	int err;
+
+	list_for_each_entry(flow, flow_list, tmp_list) {
+		if (!mlx5e_is_offloaded_flow(flow))
+			continue;
+		spec = &flow->esw_attr->parse_attr->spec;
+
+		/* update from encap rule to slow path rule */
+		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
+		/* mark the flow's encap dest as non-valid */
+		flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+
+		if (IS_ERR(rule)) {
+			err = PTR_ERR(rule);
+			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+				       err);
+			continue;
+		}
+
+		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+		flow->rule[0] = rule;
+		/* was unset when fast path rule removed */
+		flow_flag_set(flow, OFFLOADED);
+	}
+
+	/* we know that the encap is valid */
+	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+}
+
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+	if (mlx5e_is_eswitch_flow(flow))
+		return flow->esw_attr->counter;
+	else
+		return flow->nic_attr->counter;
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+	struct encap_flow_item *efi;
 	struct mlx5e_tc_flow *flow;
 
-	list_for_each_entry(flow, &e->flows, encap) {
-		if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-			struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	list_for_each_entry(efi, &e->flows, list) {
+		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+		if (IS_ERR(mlx5e_flow_get(flow)))
+			continue;
+		wait_for_completion(&flow->init_done);
 
-			flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-			if (attr->mirror_count)
-				mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
-			mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
-		}
+		flow->tmp_efi_index = efi->index;
+		list_add(&flow->tmp_list, flow_list);
+	}
+}
+
+/* Iterate over tmp_list of flows attached to flow_list head. */
+void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+{
+	struct mlx5e_tc_flow *flow, *tmp;
+
+	list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
+		mlx5e_flow_put(priv, flow);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+			   struct mlx5e_encap_entry *e)
+{
+	struct mlx5e_encap_entry *next = NULL;
+
+retry:
+	rcu_read_lock();
+
+	/* find encap with non-zero reference counter value */
+	for (next = e ?
+		     list_next_or_null_rcu(&nhe->encap_list,
+					   &e->encap_list,
+					   struct mlx5e_encap_entry,
+					   encap_list) :
+		     list_first_or_null_rcu(&nhe->encap_list,
+					    struct mlx5e_encap_entry,
+					    encap_list);
+	     next;
+	     next = list_next_or_null_rcu(&nhe->encap_list,
+					  &next->encap_list,
+					  struct mlx5e_encap_entry,
+					  encap_list))
+		if (mlx5e_encap_take(next))
+			break;
+
+	rcu_read_unlock();
+
+	/* release starting encap */
+	if (e)
+		mlx5e_encap_put(netdev_priv(e->out_dev), e);
+	if (!next)
+		return next;
+
+	/* wait for encap to be fully initialized */
+	wait_for_completion(&next->res_ready);
+	/* continue searching if encap entry is not in valid state after completion */
+	if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+		e = next;
+		goto retry;
 	}
 
-	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
-		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
-		mlx5_encap_dealloc(priv->mdev, e->encap_id);
-	}
+	return next;
 }
 
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
-	u64 bytes, packets, lastuse = 0;
+	struct mlx5e_encap_entry *e = NULL;
 	struct mlx5e_tc_flow *flow;
-	struct mlx5e_encap_entry *e;
 	struct mlx5_fc *counter;
 	struct neigh_table *tbl;
 	bool neigh_used = false;
 	struct neighbour *n;
+	u64 lastuse;
 
 	if (m_neigh->family == AF_INET)
 		tbl = &arp_tbl;
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (m_neigh->family == AF_INET6)
-		tbl = &nd_tbl;
+		tbl = ipv6_stub->nd_tbl;
 #endif
 	else
 		return;
 
-	list_for_each_entry(e, &nhe->encap_list, encap_list) {
-		if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
-			continue;
-		list_for_each_entry(flow, &e->flows, encap) {
-			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-				counter = mlx5_flow_rule_counter(flow->rule[0]);
-				mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+	/* mlx5e_get_next_valid_encap() releases previous encap before returning
+	 * next one.
+	 */
+	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+		struct encap_flow_item *efi, *tmp;
+		struct mlx5_eswitch *esw;
+		LIST_HEAD(flow_list);
+
+		esw = priv->mdev->priv.eswitch;
+		mutex_lock(&esw->offloads.encap_tbl_lock);
+		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+			flow = container_of(efi, struct mlx5e_tc_flow,
+					    encaps[efi->index]);
+			if (IS_ERR(mlx5e_flow_get(flow)))
+				continue;
+			list_add(&flow->tmp_list, &flow_list);
+
+			if (mlx5e_is_offloaded_flow(flow)) {
+				counter = mlx5e_tc_get_counter(flow);
+				lastuse = mlx5_fc_query_lastuse(counter);
 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 					neigh_used = true;
 					break;
 				}
 			}
 		}
-		if (neigh_used)
+		mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+		mlx5e_put_encap_flow_list(priv, &flow_list);
+		if (neigh_used) {
+			/* release current encap before breaking the loop */
+			mlx5e_encap_put(priv, e);
 			break;
+		}
 	}
 
+	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
 	if (neigh_used) {
 		nhe->reported_lastuse = jiffies;
 
@@ -1040,191 +1550,210 @@
 	}
 }
 
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-			       struct mlx5e_tc_flow *flow)
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 {
-	struct list_head *next = flow->encap.next;
+	WARN_ON(!list_empty(&e->flows));
 
-	list_del(&flow->encap);
-	if (list_empty(next)) {
-		struct mlx5e_encap_entry *e;
-
-		e = list_entry(next, struct mlx5e_encap_entry, flows);
+	if (e->compl_result > 0) {
 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-			mlx5_encap_dealloc(priv->mdev, e->encap_id);
-
-		hash_del_rcu(&e->encap_hlist);
-		kfree(e->encap_header);
-		kfree(e);
+			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 	}
+
+	kfree(e->tun_info);
+	kfree(e->encap_header);
+	kfree_rcu(e, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+		return;
+	hash_del_rcu(&e->encap_hlist);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+	mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow, int out_index)
+{
+	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	/* flow wasn't fully initialized */
+	if (!e)
+		return;
+
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	list_del(&flow->encaps[out_index].list);
+	flow->encaps[out_index].e = NULL;
+	if (!refcount_dec_and_test(&e->refcnt)) {
+		mutex_unlock(&esw->offloads.encap_tbl_lock);
+		return;
+	}
+	hash_del_rcu(&e->encap_hlist);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+	mlx5e_encap_dealloc(priv, e);
+}
+
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+	if (!flow_flag_test(flow, ESWITCH) ||
+	    !flow_flag_test(flow, DUP))
+		return;
+
+	mutex_lock(&esw->offloads.peer_mutex);
+	list_del(&flow->peer);
+	mutex_unlock(&esw->offloads.peer_mutex);
+
+	flow_flag_clear(flow, DUP);
+
+	mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+	kvfree(flow->peer_flow);
+	flow->peer_flow = NULL;
+}
+
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_core_dev *dev = flow->priv->mdev;
+	struct mlx5_devcom *devcom = dev->priv.devcom;
+	struct mlx5_eswitch *peer_esw;
+
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		return;
+
+	__mlx5e_tc_del_fdb_peer_flow(flow);
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+	if (mlx5e_is_eswitch_flow(flow)) {
+		mlx5e_tc_del_fdb_peer_flow(flow);
 		mlx5e_tc_del_fdb_flow(priv, flow);
-	else
+	} else {
 		mlx5e_tc_del_nic_flow(priv, flow);
-}
-
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
-			     struct tc_cls_flower_offload *f)
-{
-	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-				       outer_headers);
-	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				       outer_headers);
-	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-				    misc_parameters);
-	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-				    misc_parameters);
-
-	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
-	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->key);
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
-		MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
-			 be32_to_cpu(mask->keyid));
-		MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
-			 be32_to_cpu(key->keyid));
 	}
 }
 
+
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			     struct mlx5_flow_spec *spec,
-			     struct tc_cls_flower_offload *f)
+			     struct flow_cls_offload *f,
+			     struct net_device *filter_dev, u8 *match_level)
 {
+	struct netlink_ext_ack *extack = f->common.extack;
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				       outer_headers);
 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				       outer_headers);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	int err;
 
-	struct flow_dissector_key_control *enc_control =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-					  f->key);
+	err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+				 headers_c, headers_v, match_level);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "failed to parse tunnel attributes");
+		return err;
+	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_match_control match;
+		u16 addr_type;
 
-		/* Full udp dst port must be given */
-		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
-			goto vxlan_match_offload_err;
+		flow_rule_match_enc_control(rule, &match);
+		addr_type = match.key->addr_type;
 
-		if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
-		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
-			parse_vxlan_attr(spec, f);
-		else {
-			netdev_warn(priv->netdev,
-				    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
+		/* For tunnel addr_type used same key id`s as for non-tunnel */
+		if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+			struct flow_match_ipv4_addrs match;
+
+			flow_rule_match_enc_ipv4_addrs(rule, &match);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+				 src_ipv4_src_ipv6.ipv4_layout.ipv4,
+				 ntohl(match.mask->src));
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+				 src_ipv4_src_ipv6.ipv4_layout.ipv4,
+				 ntohl(match.key->src));
+
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+				 ntohl(match.mask->dst));
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+				 ntohl(match.key->dst));
+
+			MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+					 ethertype);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+				 ETH_P_IP);
+		} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+			struct flow_match_ipv6_addrs match;
+
+			flow_rule_match_enc_ipv6_addrs(rule, &match);
+			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+			       &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+								   ipv6));
+			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+			       &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
+								  ipv6));
+
+			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+			       &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+								   ipv6));
+			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+			       &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
+								  ipv6));
+
+			MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+					 ethertype);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+				 ETH_P_IPV6);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_enc_ip(rule, &match);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+			 match.mask->tos & 0x3);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+			 match.key->tos & 0x3);
+
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+			 match.mask->tos >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+			 match.key->tos  >> 2);
+
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+			 match.mask->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+			 match.key->ttl);
+
+		if (match.mask->ttl &&
+		    !MLX5_CAP_ESW_FLOWTABLE_FDB
+			(priv->mdev,
+			 ft_field_support.outer_ipv4_ttl)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Matching on TTL is not supported");
 			return -EOPNOTSUPP;
 		}
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-			 udp_dport, ntohs(mask->dst));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-			 udp_dport, ntohs(key->dst));
-
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-			 udp_sport, ntohs(mask->src));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-			 udp_sport, ntohs(key->src));
-	} else { /* udp dst port must be given */
-vxlan_match_offload_err:
-		netdev_warn(priv->netdev,
-			    "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  f->mask);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
-			 ntohl(mask->src));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
-			 ntohl(key->src));
-
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-			 ntohl(mask->dst));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-			 ntohl(key->dst));
-
-		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
-	} else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
-						  f->mask);
-
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-
-		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
-	}
-
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
-		struct flow_dissector_key_ip *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IP,
-						  f->key);
-		struct flow_dissector_key_ip *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IP,
-						  f->mask);
-
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
-
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
-
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
 	}
 
 	/* Enforce DMAC when offloading incoming tunneled flows.
@@ -1242,11 +1771,33 @@
 	return 0;
 }
 
+static void *get_match_headers_criteria(u32 flags,
+					struct mlx5_flow_spec *spec)
+{
+	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+		MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			     inner_headers) :
+		MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			     outer_headers);
+}
+
+static void *get_match_headers_value(u32 flags,
+				     struct mlx5_flow_spec *spec)
+{
+	return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+		MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			     inner_headers) :
+		MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			     outer_headers);
+}
+
 static int __parse_cls_flower(struct mlx5e_priv *priv,
 			      struct mlx5_flow_spec *spec,
-			      struct tc_cls_flower_offload *f,
-			      u8 *match_level)
+			      struct flow_cls_offload *f,
+			      struct net_device *filter_dev,
+			      u8 *inner_match_level, u8 *outer_match_level)
 {
+	struct netlink_ext_ack *extack = f->common.extack;
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				       outer_headers);
 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -1255,13 +1806,17 @@
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
+	u8 *match_level;
 
-	*match_level = MLX5_MATCH_NONE;
+	match_level = outer_match_level;
 
-	if (f->dissector->used_keys &
-	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
@@ -1276,69 +1831,62 @@
 	      BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
 	      BIT(FLOW_DISSECTOR_KEY_IP)  |
-	      BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
-			    f->dissector->used_keys);
+			    dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if ((dissector_uses_key(f->dissector,
-				FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
-	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
-	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
-	    dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  f->key);
-		switch (key->addr_type) {
-		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
-		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
-			if (parse_tunnel_attr(priv, spec, f))
-				return -EOPNOTSUPP;
-			break;
-		default:
+	if (mlx5e_get_tc_tun(filter_dev)) {
+		if (parse_tunnel_attr(priv, spec, f, filter_dev,
+				      outer_match_level))
 			return -EOPNOTSUPP;
-		}
 
-		/* In decap flow, header pointers should point to the inner
+		/* At this point, header pointers should point to the inner
 		 * headers, outer header were already set by parse_tunnel_attr
 		 */
-		headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-					 inner_headers);
-		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-					 inner_headers);
+		match_level = inner_match_level;
+		headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+						       spec);
+		headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+						    spec);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-			 ntohs(mask->n_proto));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-			 ntohs(key->n_proto));
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		if (mask->n_proto)
+		flow_rule_match_basic(rule, &match);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+			 ntohs(match.mask->n_proto));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+			 ntohs(match.key->n_proto));
+
+		if (match.mask->n_proto)
 			*match_level = MLX5_MATCH_L2;
 	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+	    is_vlan_dev(filter_dev)) {
+		struct flow_dissector_key_vlan filter_dev_mask;
+		struct flow_dissector_key_vlan filter_dev_key;
+		struct flow_match_vlan match;
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
-		if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
-			if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+		if (is_vlan_dev(filter_dev)) {
+			match.key = &filter_dev_key;
+			match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+			match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
+			match.key->vlan_priority = 0;
+			match.mask = &filter_dev_mask;
+			memset(match.mask, 0xff, sizeof(*match.mask));
+			match.mask->vlan_priority = 0;
+		} else {
+			flow_rule_match_vlan(rule, &match);
+		}
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_priority ||
+		    match.mask->vlan_tpid) {
+			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 					 svlan_tag, 1);
 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
@@ -1350,31 +1898,35 @@
 					 cvlan_tag, 1);
 			}
 
-			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
+				 match.mask->vlan_id);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
+				 match.key->vlan_id);
 
-			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
+				 match.mask->vlan_priority);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
+				 match.key->vlan_priority);
 
 			*match_level = MLX5_MATCH_L2;
 		}
 	} else if (*match_level != MLX5_MATCH_NONE) {
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
+		/* cvlan_tag enabled in match criteria and
+		 * disabled in match value means both S & C tags
+		 * don't exist (untagged of both)
+		 */
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
 		*match_level = MLX5_MATCH_L2;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CVLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CVLAN,
-						  f->mask);
-		if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
-			if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_cvlan(rule, &match);
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_priority ||
+		    match.mask->vlan_tpid) {
+			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
 				MLX5_SET(fte_match_set_misc, misc_c,
 					 outer_second_svlan_tag, 1);
 				MLX5_SET(fte_match_set_misc, misc_v,
@@ -1387,69 +1939,58 @@
 			}
 
 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
-				 mask->vlan_id);
+				 match.mask->vlan_id);
 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
-				 key->vlan_id);
+				 match.key->vlan_id);
 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
-				 mask->vlan_priority);
+				 match.mask->vlan_priority);
 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
-				 key->vlan_priority);
+				 match.key->vlan_priority);
 
 			*match_level = MLX5_MATCH_L2;
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     dmac_47_16),
-				mask->dst);
+				match.mask->dst);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     dmac_47_16),
-				key->dst);
+				match.key->dst);
 
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     smac_47_16),
-				mask->src);
+				match.mask->src);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     smac_47_16),
-				key->src);
+				match.key->src);
 
-		if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+		if (!is_zero_ether_addr(match.mask->src) ||
+		    !is_zero_ether_addr(match.mask->dst))
 			*match_level = MLX5_MATCH_L2;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		struct flow_dissector_key_control *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->mask);
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 
 		/* the HW doesn't support frag first/later */
-		if (mask->flags & FLOW_DIS_FIRST_FRAG)
+		if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
 			return -EOPNOTSUPP;
 
-		if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
-				 key->flags & FLOW_DIS_IS_FRAGMENT);
+				 match.key->flags & FLOW_DIS_IS_FRAGMENT);
 
 			/* the HW doesn't need L3 inline to match on frag=no */
-			if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+			if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
 				*match_level = MLX5_MATCH_L2;
 	/* ***  L2 attributes parsing up to here *** */
 			else
@@ -1457,171 +1998,148 @@
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		ip_proto = key->ip_proto;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
-			 mask->ip_proto);
+			 match.mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
-			 key->ip_proto);
+			 match.key->ip_proto);
 
-		if (mask->ip_proto)
+		if (match.mask->ip_proto)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
+		struct flow_match_ipv4_addrs match;
 
+		flow_rule_match_ipv4_addrs(rule, &match);
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       &mask->src, sizeof(mask->src));
+		       &match.mask->src, sizeof(match.mask->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       &key->src, sizeof(key->src));
+		       &match.key->src, sizeof(match.key->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       &mask->dst, sizeof(mask->dst));
+		       &match.mask->dst, sizeof(match.mask->dst));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       &key->dst, sizeof(key->dst));
+		       &match.key->dst, sizeof(match.key->dst));
 
-		if (mask->src || mask->dst)
+		if (match.mask->src || match.mask->dst)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_ipv6_addrs(rule, &match);
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &mask->src, sizeof(mask->src));
+		       &match.mask->src, sizeof(match.mask->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &key->src, sizeof(key->src));
+		       &match.key->src, sizeof(match.key->src));
 
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &mask->dst, sizeof(mask->dst));
+		       &match.mask->dst, sizeof(match.mask->dst));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &key->dst, sizeof(key->dst));
+		       &match.key->dst, sizeof(match.key->dst));
 
-		if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
-		    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+		if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
+		    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
 			*match_level = MLX5_MATCH_L3;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IP,
-						  f->key);
-		struct flow_dissector_key_ip *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IP,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+		flow_rule_match_ip(rule, &match);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+			 match.mask->tos & 0x3);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+			 match.key->tos & 0x3);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+			 match.mask->tos >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+			 match.key->tos  >> 2);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+			 match.mask->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+			 match.key->ttl);
 
-		if (mask->ttl &&
+		if (match.mask->ttl &&
 		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
-						ft_field_support.outer_ipv4_ttl))
+						ft_field_support.outer_ipv4_ttl)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Matching on TTL is not supported");
 			return -EOPNOTSUPP;
+		}
 
-		if (mask->tos || mask->ttl)
+		if (match.mask->tos || match.mask->ttl)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	/* ***  L3 attributes parsing up to here *** */
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
 		switch (ip_proto) {
 		case IPPROTO_TCP:
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 tcp_sport, ntohs(mask->src));
+				 tcp_sport, ntohs(match.mask->src));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 tcp_sport, ntohs(key->src));
+				 tcp_sport, ntohs(match.key->src));
 
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 tcp_dport, ntohs(mask->dst));
+				 tcp_dport, ntohs(match.mask->dst));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 tcp_dport, ntohs(key->dst));
+				 tcp_dport, ntohs(match.key->dst));
 			break;
 
 		case IPPROTO_UDP:
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 udp_sport, ntohs(mask->src));
+				 udp_sport, ntohs(match.mask->src));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 udp_sport, ntohs(key->src));
+				 udp_sport, ntohs(match.key->src));
 
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 udp_dport, ntohs(mask->dst));
+				 udp_dport, ntohs(match.mask->dst));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 udp_dport, ntohs(key->dst));
+				 udp_dport, ntohs(match.key->dst));
 			break;
 		default:
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only UDP and TCP transports are supported for L4 matching");
 			netdev_err(priv->netdev,
 				   "Only UDP and TCP transport are supported\n");
 			return -EINVAL;
 		}
 
-		if (mask->src || mask->dst)
+		if (match.mask->src || match.mask->dst)
 			*match_level = MLX5_MATCH_L4;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_TCP,
-						  f->key);
-		struct flow_dissector_key_tcp *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_TCP,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp match;
 
+		flow_rule_match_tcp(rule, &match);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
-			 ntohs(mask->flags));
+			 ntohs(match.mask->flags));
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
-			 ntohs(key->flags));
+			 ntohs(match.key->flags));
 
-		if (mask->flags)
+		if (match.mask->flags)
 			*match_level = MLX5_MATCH_L4;
 	}
 
@@ -1631,66 +2149,83 @@
 static int parse_cls_flower(struct mlx5e_priv *priv,
 			    struct mlx5e_tc_flow *flow,
 			    struct mlx5_flow_spec *spec,
-			    struct tc_cls_flower_offload *f)
+			    struct flow_cls_offload *f,
+			    struct net_device *filter_dev)
 {
+	u8 inner_match_level, outer_match_level, non_tunnel_match_level;
+	struct netlink_ext_ack *extack = f->common.extack;
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
-	u8 match_level;
+	bool is_eswitch_flow;
 	int err;
 
-	err = __parse_cls_flower(priv, spec, f, &match_level);
+	inner_match_level = MLX5_MATCH_NONE;
+	outer_match_level = MLX5_MATCH_NONE;
 
-	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
+	err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
+				 &outer_match_level);
+	non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
+				 outer_match_level : inner_match_level;
+
+	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
+	if (!err && is_eswitch_flow) {
 		rep = rpriv->rep;
-		if (rep->vport != FDB_UPLINK_VPORT &&
+		if (rep->vport != MLX5_VPORT_UPLINK &&
 		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
-		    esw->offloads.inline_mode < match_level)) {
+		    esw->offloads.inline_mode < non_tunnel_match_level)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Flow is not offloaded due to min inline setting");
 			netdev_warn(priv->netdev,
 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
-				    match_level, esw->offloads.inline_mode);
+				    non_tunnel_match_level, esw->offloads.inline_mode);
 			return -EOPNOTSUPP;
 		}
 	}
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
-		flow->esw_attr->match_level = match_level;
-	else
-		flow->nic_attr->match_level = match_level;
+	if (is_eswitch_flow) {
+		flow->esw_attr->inner_match_level = inner_match_level;
+		flow->esw_attr->outer_match_level = outer_match_level;
+	} else {
+		flow->nic_attr->match_level = non_tunnel_match_level;
+	}
 
 	return err;
 }
 
 struct pedit_headers {
 	struct ethhdr  eth;
+	struct vlan_hdr vlan;
 	struct iphdr   ip4;
 	struct ipv6hdr ip6;
 	struct tcphdr  tcp;
 	struct udphdr  udp;
 };
 
+struct pedit_headers_action {
+	struct pedit_headers	vals;
+	struct pedit_headers	masks;
+	u32			pedits;
+};
+
 static int pedit_header_offsets[] = {
-	[TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
-	[TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
-	[TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
-	[TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
-	[TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+	[FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+	[FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+	[FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+	[FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+	[FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
 };
 
 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
 
 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
-			 struct pedit_headers *masks,
-			 struct pedit_headers *vals)
+			 struct pedit_headers_action *hdrs)
 {
 	u32 *curr_pmask, *curr_pval;
 
-	if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
-		goto out_err;
-
-	curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
-	curr_pval  = (u32 *)(pedit_header(vals, hdr_type) + offset);
+	curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+	curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
 
 	if (*curr_pmask & mask)  /* disallow acting twice on the same location */
 		goto out_err;
@@ -1708,49 +2243,99 @@
 	u8  field;
 	u8  size;
 	u32 offset;
+	u32 match_offset;
 };
 
-#define OFFLOAD(fw_field, size, field, off) \
-		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
+#define OFFLOAD(fw_field, size, field, off, match_field) \
+		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
+		 offsetof(struct pedit_headers, field) + (off), \
+		 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
+
+/* masked values are the same and there are no rewrites that do not have a
+ * match.
+ */
+#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
+	type matchmaskx = *(type *)(matchmaskp); \
+	type matchvalx = *(type *)(matchvalp); \
+	type maskx = *(type *)(maskp); \
+	type valx = *(type *)(valp); \
+	\
+	(valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
+								 matchmaskx)); \
+})
+
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
+			 void *matchmaskp, int size)
+{
+	bool same = false;
+
+	switch (size) {
+	case sizeof(u8):
+		same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
+		break;
+	case sizeof(u16):
+		same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
+		break;
+	case sizeof(u32):
+		same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
+		break;
+	}
+
+	return same;
+}
 
 static struct mlx5_fields fields[] = {
-	OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
-	OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0),
-	OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
-	OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0),
-	OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0),
+	OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
+	OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0, dmac_15_0),
+	OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
+	OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0, smac_15_0),
+	OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0, ethertype),
+	OFFLOAD(FIRST_VID,  2, vlan.h_vlan_TCI, 0, first_vid),
 
-	OFFLOAD(IP_TTL, 1, ip4.ttl,   0),
-	OFFLOAD(SIPV4,  4, ip4.saddr, 0),
-	OFFLOAD(DIPV4,  4, ip4.daddr, 0),
+	OFFLOAD(IP_TTL, 1, ip4.ttl,   0, ttl_hoplimit),
+	OFFLOAD(SIPV4,  4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+	OFFLOAD(DIPV4,  4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 
-	OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
-	OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0),
-	OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0),
-	OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0),
-	OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
-	OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0),
-	OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0),
-	OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0),
-	OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
+	OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
+		src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
+	OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0,
+		src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
+	OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0,
+		src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
+	OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0,
+		src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
+	OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
+		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
+	OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0,
+		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
+	OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0,
+		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
+	OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0,
+		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
+	OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
 
-	OFFLOAD(TCP_SPORT, 2, tcp.source,  0),
-	OFFLOAD(TCP_DPORT, 2, tcp.dest,    0),
-	OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
+	OFFLOAD(TCP_SPORT, 2, tcp.source,  0, tcp_sport),
+	OFFLOAD(TCP_DPORT, 2, tcp.dest,    0, tcp_dport),
+	OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
 
-	OFFLOAD(UDP_SPORT, 2, udp.source, 0),
-	OFFLOAD(UDP_DPORT, 2, udp.dest,   0),
+	OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
+	OFFLOAD(UDP_DPORT, 2, udp.dest,   0, udp_dport),
 };
 
-/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
- * max from the SW pedit action. On success, it says how many HW actions were
- * actually parsed.
+/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, attr->num_mod_hdr_actions
+ * says how many HW actions were actually parsed.
  */
-static int offload_pedit_fields(struct pedit_headers *masks,
-				struct pedit_headers *vals,
-				struct mlx5e_tc_flow_parse_attr *parse_attr)
+static int offload_pedit_fields(struct pedit_headers_action *hdrs,
+				struct mlx5e_tc_flow_parse_attr *parse_attr,
+				u32 *action_flags,
+				struct netlink_ext_ack *extack)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+	void *headers_c = get_match_headers_criteria(*action_flags,
+						     &parse_attr->spec);
+	void *headers_v = get_match_headers_value(*action_flags,
+						  &parse_attr->spec);
 	int i, action_size, nactions, max_actions, first, last, next_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
 	struct mlx5_fields *f;
@@ -1761,17 +2346,21 @@
 	__be16 mask_be16;
 	void *action;
 
-	set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
-	add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
-	set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
-	add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
+	set_masks = &hdrs[0].masks;
+	add_masks = &hdrs[1].masks;
+	set_vals = &hdrs[0].vals;
+	add_vals = &hdrs[1].vals;
 
 	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
-	action = parse_attr->mod_hdr_actions;
-	max_actions = parse_attr->num_mod_hdr_actions;
-	nactions = 0;
+	action = parse_attr->mod_hdr_actions +
+		 parse_attr->num_mod_hdr_actions * action_size;
+
+	max_actions = parse_attr->max_mod_hdr_actions;
+	nactions = parse_attr->num_mod_hdr_actions;
 
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
+		bool skip;
+
 		f = &fields[i];
 		/* avoid seeing bits set from previous iterations */
 		s_mask = 0;
@@ -1787,28 +2376,47 @@
 			continue;
 
 		if (s_mask && a_mask) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "can't set and add to the same HW field");
 			printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
 			return -EOPNOTSUPP;
 		}
 
 		if (nactions == max_actions) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "too many pedit actions, can't offload");
 			printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
 			return -EOPNOTSUPP;
 		}
 
+		skip = false;
 		if (s_mask) {
+			void *match_mask = headers_c + f->match_offset;
+			void *match_val = headers_v + f->match_offset;
+
 			cmd  = MLX5_ACTION_TYPE_SET;
 			mask = s_mask;
 			vals_p = (void *)set_vals + f->offset;
+			/* don't rewrite if we have a match on the same value */
+			if (cmp_val_mask(vals_p, s_masks_p, match_val,
+					 match_mask, f->size))
+				skip = true;
 			/* clear to denote we consumed this field */
 			memset(s_masks_p, 0, f->size);
 		} else {
+			u32 zero = 0;
+
 			cmd  = MLX5_ACTION_TYPE_ADD;
 			mask = a_mask;
 			vals_p = (void *)add_vals + f->offset;
+			/* add 0 is no change */
+			if (!memcmp(vals_p, &zero, f->size))
+				skip = true;
 			/* clear to denote we consumed this field */
 			memset(a_masks_p, 0, f->size);
 		}
+		if (skip)
+			continue;
 
 		field_bsize = f->size * BITS_PER_BYTE;
 
@@ -1824,6 +2432,8 @@
 		next_z = find_next_zero_bit(&mask, field_bsize, first);
 		last  = find_last_bit(&mask, field_bsize);
 		if (first < next_z && next_z < last) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "rewrite of few sub-fields isn't supported");
 			printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
 			       mask);
 			return -EOPNOTSUPP;
@@ -1853,20 +2463,27 @@
 	return 0;
 }
 
+static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
+						  int namespace)
+{
+	if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+		return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+	else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+		return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
 static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
-				 const struct tc_action *a, int namespace,
+				 struct pedit_headers_action *hdrs,
+				 int namespace,
 				 struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
 	int nkeys, action_size, max_actions;
 
-	nkeys = tcf_pedit_nkeys(a);
+	nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
+		hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
 	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
 
-	if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
-		max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
-	else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
-		max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
-
+	max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
 	/* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
 	max_actions = min(max_actions, nkeys * 16);
 
@@ -1874,61 +2491,77 @@
 	if (!parse_attr->mod_hdr_actions)
 		return -ENOMEM;
 
-	parse_attr->num_mod_hdr_actions = max_actions;
+	parse_attr->max_mod_hdr_actions = max_actions;
 	return 0;
 }
 
 static const struct pedit_headers zero_masks = {};
 
 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
-				 const struct tc_action *a, int namespace,
-				 struct mlx5e_tc_flow_parse_attr *parse_attr)
+				 const struct flow_action_entry *act, int namespace,
+				 struct mlx5e_tc_flow_parse_attr *parse_attr,
+				 struct pedit_headers_action *hdrs,
+				 struct netlink_ext_ack *extack)
 {
-	struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
-	int nkeys, i, err = -EOPNOTSUPP;
+	u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+	int err = -EOPNOTSUPP;
 	u32 mask, val, offset;
-	u8 cmd, htype;
+	u8 htype;
 
-	nkeys = tcf_pedit_nkeys(a);
+	htype = act->mangle.htype;
+	err = -EOPNOTSUPP; /* can't be all optimistic */
 
-	memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
-	memset(vals,  0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+	if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+		NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+		goto out_err;
+	}
 
-	for (i = 0; i < nkeys; i++) {
-		htype = tcf_pedit_htype(a, i);
-		cmd = tcf_pedit_cmd(a, i);
-		err = -EOPNOTSUPP; /* can't be all optimistic */
+	if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "The pedit offload action is not supported");
+		goto out_err;
+	}
 
-		if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
-			netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
-			goto out_err;
-		}
+	mask = act->mangle.mask;
+	val = act->mangle.val;
+	offset = act->mangle.offset;
 
-		if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
-			netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
-			goto out_err;
-		}
+	err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
+	if (err)
+		goto out_err;
 
-		mask = tcf_pedit_mask(a, i);
-		val = tcf_pedit_val(a, i);
-		offset = tcf_pedit_offset(a, i);
+	hdrs[cmd].pedits++;
 
-		err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
+	return 0;
+out_err:
+	return err;
+}
+
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
+				 struct mlx5e_tc_flow_parse_attr *parse_attr,
+				 struct pedit_headers_action *hdrs,
+				 u32 *action_flags,
+				 struct netlink_ext_ack *extack)
+{
+	struct pedit_headers *cmd_masks;
+	int err;
+	u8 cmd;
+
+	if (!parse_attr->mod_hdr_actions) {
+		err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
 		if (err)
 			goto out_err;
 	}
 
-	err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
-	if (err)
-		goto out_err;
-
-	err = offload_pedit_fields(masks, vals, parse_attr);
+	err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
 	if (err < 0)
 		goto out_dealloc_parsed_actions;
 
 	for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
-		cmd_masks = &masks[cmd];
+		cmd_masks = &hdrs[cmd].masks;
 		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "attempt to offload an unsupported field");
 			netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
 			print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
 				       16, 1, cmd_masks, sizeof(zero_masks), true);
@@ -1945,19 +2578,26 @@
 	return err;
 }
 
-static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+static bool csum_offload_supported(struct mlx5e_priv *priv,
+				   u32 action,
+				   u32 update_flags,
+				   struct netlink_ext_ack *extack)
 {
 	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
 			 TCA_CSUM_UPDATE_FLAG_UDP;
 
 	/*  The HW recalcs checksums only if re-writing headers */
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "TC csum action is only offloaded with pedit");
 		netdev_warn(priv->netdev,
 			    "TC csum action is only offloaded with pedit\n");
 		return false;
 	}
 
 	if (update_flags & ~prot_flags) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "can't offload TC csum action for some header/s");
 		netdev_warn(priv->netdev,
 			    "can't offload TC csum action for some header/s - flags %#x\n",
 			    update_flags);
@@ -1967,18 +2607,65 @@
 	return true;
 }
 
-static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
-					  struct tcf_exts *exts)
+struct ip_ttl_word {
+	__u8	ttl;
+	__u8	protocol;
+	__sum16	check;
+};
+
+struct ipv6_hoplimit_word {
+	__be16	payload_len;
+	__u8	nexthdr;
+	__u8	hop_limit;
+};
+
+static bool is_action_keys_supported(const struct flow_action_entry *act)
 {
-	const struct tc_action *a;
+	u32 mask, offset;
+	u8 htype;
+
+	htype = act->mangle.htype;
+	offset = act->mangle.offset;
+	mask = ~act->mangle.mask;
+	/* For IPv4 & IPv6 header check 4 byte word,
+	 * to determine that modified fields
+	 * are NOT ttl & hop_limit only.
+	 */
+	if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+		struct ip_ttl_word *ttl_word =
+			(struct ip_ttl_word *)&mask;
+
+		if (offset != offsetof(struct iphdr, ttl) ||
+		    ttl_word->protocol ||
+		    ttl_word->check) {
+			return true;
+		}
+	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+		struct ipv6_hoplimit_word *hoplimit_word =
+			(struct ipv6_hoplimit_word *)&mask;
+
+		if (offset != offsetof(struct ipv6hdr, payload_len) ||
+		    hoplimit_word->payload_len ||
+		    hoplimit_word->nexthdr) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
+					  struct flow_action *flow_action,
+					  u32 actions,
+					  struct netlink_ext_ack *extack)
+{
+	const struct flow_action_entry *act;
 	bool modify_ip_header;
-	LIST_HEAD(actions);
-	u8 htype, ip_proto;
 	void *headers_v;
 	u16 ethertype;
-	int nkeys, i;
+	u8 ip_proto;
+	int i;
 
-	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+	headers_v = get_match_headers_value(actions, spec);
 	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
 
 	/* for non-IP we only re-write MACs, so we're okay */
@@ -1986,26 +2673,22 @@
 		goto out_ok;
 
 	modify_ip_header = false;
-	tcf_exts_for_each_action(i, a, exts) {
-		int k;
-
-		if (!is_tcf_pedit(a))
+	flow_action_for_each(i, act, flow_action) {
+		if (act->id != FLOW_ACTION_MANGLE &&
+		    act->id != FLOW_ACTION_ADD)
 			continue;
 
-		nkeys = tcf_pedit_nkeys(a);
-		for (k = 0; k < nkeys; k++) {
-			htype = tcf_pedit_htype(a, k);
-			if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
-			    htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
-				modify_ip_header = true;
-				break;
-			}
+		if (is_action_keys_supported(act)) {
+			modify_ip_header = true;
+			break;
 		}
 	}
 
 	ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
 	if (modify_ip_header && ip_proto != IPPROTO_TCP &&
 	    ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "can't offload re-write of non TCP/UDP");
 		pr_info("can't offload re-write of ip proto %d\n", ip_proto);
 		return false;
 	}
@@ -2015,23 +2698,28 @@
 }
 
 static bool actions_match_supported(struct mlx5e_priv *priv,
-				    struct tcf_exts *exts,
+				    struct flow_action *flow_action,
 				    struct mlx5e_tc_flow_parse_attr *parse_attr,
-				    struct mlx5e_tc_flow *flow)
+				    struct mlx5e_tc_flow *flow,
+				    struct netlink_ext_ack *extack)
 {
 	u32 actions;
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+	if (mlx5e_is_eswitch_flow(flow))
 		actions = flow->esw_attr->action;
 	else
 		actions = flow->nic_attr->action;
 
-	if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
-	    !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
+	if (flow_flag_test(flow, EGRESS) &&
+	    !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
+	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+	      (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
 		return false;
 
 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-		return modify_header_match_supported(&parse_attr->spec, exts);
+		return modify_header_match_supported(&parse_attr->spec,
+						     flow_action, actions,
+						     extack);
 
 	return true;
 }
@@ -2044,146 +2732,208 @@
 	fmdev = priv->mdev;
 	pmdev = peer_priv->mdev;
 
-	mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
-	mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+	fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
+	psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
 
 	return (fsystem_guid == psystem_guid);
 }
 
-static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
+				   const struct flow_action_entry *act,
+				   struct mlx5e_tc_flow_parse_attr *parse_attr,
+				   struct pedit_headers_action *hdrs,
+				   u32 *action, struct netlink_ext_ack *extack)
+{
+	u16 mask16 = VLAN_VID_MASK;
+	u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+	const struct flow_action_entry pedit_act = {
+		.id = FLOW_ACTION_MANGLE,
+		.mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+		.mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+		.mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+		.mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+	};
+	u8 match_prio_mask, match_prio_val;
+	void *headers_c, *headers_v;
+	int err;
+
+	headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
+	headers_v = get_match_headers_value(*action, &parse_attr->spec);
+
+	if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+	      MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "VLAN rewrite action must have VLAN protocol match");
+		return -EOPNOTSUPP;
+	}
+
+	match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+	match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+	if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Changing VLAN prio is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr,
+				    hdrs, NULL);
+	*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+	return err;
+}
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+				 struct mlx5e_tc_flow_parse_attr *parse_attr,
+				 struct pedit_headers_action *hdrs,
+				 u32 *action, struct netlink_ext_ack *extack)
+{
+	const struct flow_action_entry prio_tag_act = {
+		.vlan.vid = 0,
+		.vlan.prio =
+			MLX5_GET(fte_match_set_lyr_2_4,
+				 get_match_headers_value(*action,
+							 &parse_attr->spec),
+				 first_prio) &
+			MLX5_GET(fte_match_set_lyr_2_4,
+				 get_match_headers_criteria(*action,
+							    &parse_attr->spec),
+				 first_prio),
+	};
+
+	return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+				       &prio_tag_act, parse_attr, hdrs, action,
+				       extack);
+}
+
+static int parse_tc_nic_actions(struct mlx5e_priv *priv,
+				struct flow_action *flow_action,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
-				struct mlx5e_tc_flow *flow)
+				struct mlx5e_tc_flow *flow,
+				struct netlink_ext_ack *extack)
 {
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
-	const struct tc_action *a;
-	LIST_HEAD(actions);
+	struct pedit_headers_action hdrs[2] = {};
+	const struct flow_action_entry *act;
 	u32 action = 0;
 	int err, i;
 
-	if (!tcf_exts_has_actions(exts))
+	if (!flow_action_has_entries(flow_action))
 		return -EINVAL;
 
 	attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 
-	tcf_exts_for_each_action(i, a, exts) {
-		if (is_tcf_gact_shot(a)) {
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 			if (MLX5_CAP_FLOWTABLE(priv->mdev,
 					       flow_table_properties_nic_receive.flow_counter))
 				action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
-			continue;
-		}
-
-		if (is_tcf_pedit(a)) {
-			err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
-						    parse_attr);
+			break;
+		case FLOW_ACTION_MANGLE:
+		case FLOW_ACTION_ADD:
+			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
+						    parse_attr, hdrs, extack);
 			if (err)
 				return err;
 
 			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-			continue;
-		}
+			break;
+		case FLOW_ACTION_VLAN_MANGLE:
+			err = add_vlan_rewrite_action(priv,
+						      MLX5_FLOW_NAMESPACE_KERNEL,
+						      act, parse_attr, hdrs,
+						      &action, extack);
+			if (err)
+				return err;
 
-		if (is_tcf_csum(a)) {
+			break;
+		case FLOW_ACTION_CSUM:
 			if (csum_offload_supported(priv, action,
-						   tcf_csum_update_flags(a)))
-				continue;
+						   act->csum_flags,
+						   extack))
+				break;
 
 			return -EOPNOTSUPP;
-		}
-
-		if (is_tcf_mirred_egress_redirect(a)) {
-			struct net_device *peer_dev = tcf_mirred_dev(a);
+		case FLOW_ACTION_REDIRECT: {
+			struct net_device *peer_dev = act->dev;
 
 			if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
 			    same_hw_devs(priv, netdev_priv(peer_dev))) {
-				parse_attr->mirred_ifindex = peer_dev->ifindex;
-				flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+				parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
+				flow_flag_set(flow, HAIRPIN);
 				action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			} else {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "device is not on same HW, can't offload");
 				netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
 					    peer_dev->name);
 				return -EINVAL;
 			}
-			continue;
-		}
-
-		if (is_tcf_skbedit_mark(a)) {
-			u32 mark = tcf_skbedit_mark(a);
+			}
+			break;
+		case FLOW_ACTION_MARK: {
+			u32 mark = act->mark;
 
 			if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
-				netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
-					    mark);
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Bad flow mark - only 16 bit is supported");
 				return -EINVAL;
 			}
 
 			attr->flow_tag = mark;
 			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-			continue;
+			}
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+			return -EOPNOTSUPP;
 		}
+	}
 
-		return -EINVAL;
+	if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
+	    hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
+		err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
+					    parse_attr, hdrs, &action, extack);
+		if (err)
+			return err;
+		/* in case all pedit actions are skipped, remove the MOD_HDR
+		 * flag.
+		 */
+		if (parse_attr->num_mod_hdr_actions == 0) {
+			action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+			kfree(parse_attr->mod_hdr_actions);
+		}
 	}
 
 	attr->action = action;
-	if (!actions_match_supported(priv, exts, parse_attr, flow))
+	if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
 		return -EOPNOTSUPP;
 
 	return 0;
 }
 
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
-				 struct ip_tunnel_key *b)
+struct encap_key {
+	const struct ip_tunnel_key *ip_tun_key;
+	struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+static inline int cmp_encap_info(struct encap_key *a,
+				 struct encap_key *b)
 {
-	return memcmp(a, b, sizeof(*a));
+	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+	       a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
 }
 
-static inline int hash_encap_info(struct ip_tunnel_key *key)
+static inline int hash_encap_info(struct encap_key *key)
 {
-	return jhash(key, sizeof(*key), 0);
+	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+		     key->tc_tunnel->tunnel_type);
 }
 
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
-				   struct net_device *mirred_dev,
-				   struct net_device **out_dev,
-				   struct flowi4 *fl4,
-				   struct neighbour **out_n,
-				   u8 *out_ttl)
-{
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5e_rep_priv *uplink_rpriv;
-	struct rtable *rt;
-	struct neighbour *n = NULL;
-
-#if IS_ENABLED(CONFIG_INET)
-	int ret;
-
-	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
-	ret = PTR_ERR_OR_ZERO(rt);
-	if (ret)
-		return ret;
-#else
-	return -EOPNOTSUPP;
-#endif
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-	/* if the egress device isn't on the same HW e-switch, we use the uplink */
-	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
-		*out_dev = uplink_rpriv->netdev;
-	else
-		*out_dev = rt->dst.dev;
-
-	if (!(*out_ttl))
-		*out_ttl = ip4_dst_hoplimit(&rt->dst);
-	n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
-	ip_rt_put(rt);
-	if (!n)
-		return -ENOMEM;
-
-	*out_n = n;
-	return 0;
-}
 
 static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
 				  struct net_device *peer_netdev)
@@ -2193,424 +2943,160 @@
 	peer_priv = netdev_priv(peer_netdev);
 
 	return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
-		(priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
-		same_hw_devs(priv, peer_priv) &&
-		MLX5_VPORT_MANAGER(peer_priv->mdev) &&
-		(peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
+		mlx5e_eswitch_rep(priv->netdev) &&
+		mlx5e_eswitch_rep(peer_netdev) &&
+		same_hw_devs(priv, peer_priv));
 }
 
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
-				   struct net_device *mirred_dev,
-				   struct net_device **out_dev,
-				   struct flowi6 *fl6,
-				   struct neighbour **out_n,
-				   u8 *out_ttl)
+
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
 {
-	struct neighbour *n = NULL;
-	struct dst_entry *dst;
-
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-	struct mlx5e_rep_priv *uplink_rpriv;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	int ret;
-
-	ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
-					 fl6);
-	if (ret < 0)
-		return ret;
-
-	if (!(*out_ttl))
-		*out_ttl = ip6_dst_hoplimit(dst);
-
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-	/* if the egress device isn't on the same HW e-switch, we use the uplink */
-	if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
-		*out_dev = uplink_rpriv->netdev;
-	else
-		*out_dev = dst->dev;
-#else
-	return -EOPNOTSUPP;
-#endif
-
-	n = dst_neigh_lookup(dst, &fl6->daddr);
-	dst_release(dst);
-	if (!n)
-		return -ENOMEM;
-
-	*out_n = n;
-	return 0;
+	return refcount_inc_not_zero(&e->refcnt);
 }
 
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
-				  char buf[], int encap_size,
-				  unsigned char h_dest[ETH_ALEN],
-				  u8 tos, u8 ttl,
-				  __be32 daddr,
-				  __be32 saddr,
-				  __be16 udp_dst_port,
-				  __be32 vx_vni)
-{
-	struct ethhdr *eth = (struct ethhdr *)buf;
-	struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
-	struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
-	struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
-	memset(buf, 0, encap_size);
-
-	ether_addr_copy(eth->h_dest, h_dest);
-	ether_addr_copy(eth->h_source, out_dev->dev_addr);
-	eth->h_proto = htons(ETH_P_IP);
-
-	ip->daddr = daddr;
-	ip->saddr = saddr;
-
-	ip->tos = tos;
-	ip->ttl = ttl;
-	ip->protocol = IPPROTO_UDP;
-	ip->version = 0x4;
-	ip->ihl = 0x5;
-
-	udp->dest = udp_dst_port;
-	vxh->vx_flags = VXLAN_HF_VNI;
-	vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
-				  char buf[], int encap_size,
-				  unsigned char h_dest[ETH_ALEN],
-				  u8 tos, u8 ttl,
-				  struct in6_addr *daddr,
-				  struct in6_addr *saddr,
-				  __be16 udp_dst_port,
-				  __be32 vx_vni)
-{
-	struct ethhdr *eth = (struct ethhdr *)buf;
-	struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
-	struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
-	struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
-	memset(buf, 0, encap_size);
-
-	ether_addr_copy(eth->h_dest, h_dest);
-	ether_addr_copy(eth->h_source, out_dev->dev_addr);
-	eth->h_proto = htons(ETH_P_IPV6);
-
-	ip6_flow_hdr(ip6h, tos, 0);
-	/* the HW fills up ipv6 payload len */
-	ip6h->nexthdr     = IPPROTO_UDP;
-	ip6h->hop_limit   = ttl;
-	ip6h->daddr	  = *daddr;
-	ip6h->saddr	  = *saddr;
-
-	udp->dest = udp_dst_port;
-	vxh->vx_flags = VXLAN_HF_VNI;
-	vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
-					  struct net_device *mirred_dev,
-					  struct mlx5e_encap_entry *e)
-{
-	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
-	int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
-	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	struct net_device *out_dev;
-	struct neighbour *n = NULL;
-	struct flowi4 fl4 = {};
-	u8 nud_state, tos, ttl;
-	char *encap_header;
-	int err;
-
-	if (max_encap_size < ipv4_encap_size) {
-		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
-			       ipv4_encap_size, max_encap_size);
-		return -EOPNOTSUPP;
-	}
-
-	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
-	if (!encap_header)
-		return -ENOMEM;
-
-	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
-		fl4.flowi4_proto = IPPROTO_UDP;
-		fl4.fl4_dport = tun_key->tp_dst;
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		goto free_encap;
-	}
-
-	tos = tun_key->tos;
-	ttl = tun_key->ttl;
-
-	fl4.flowi4_tos = tun_key->tos;
-	fl4.daddr = tun_key->u.ipv4.dst;
-	fl4.saddr = tun_key->u.ipv4.src;
-
-	err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
-				      &fl4, &n, &ttl);
-	if (err)
-		goto free_encap;
-
-	/* used by mlx5e_detach_encap to lookup a neigh hash table
-	 * entry in the neigh hash table when a user deletes a rule
-	 */
-	e->m_neigh.dev = n->dev;
-	e->m_neigh.family = n->ops->family;
-	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-	e->out_dev = out_dev;
-
-	/* It's importent to add the neigh to the hash table before checking
-	 * the neigh validity state. So if we'll get a notification, in case the
-	 * neigh changes it's validity state, we would find the relevant neigh
-	 * in the hash.
-	 */
-	err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
-	if (err)
-		goto free_encap;
-
-	read_lock_bh(&n->lock);
-	nud_state = n->nud_state;
-	ether_addr_copy(e->h_dest, n->ha);
-	read_unlock_bh(&n->lock);
-
-	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
-		gen_vxlan_header_ipv4(out_dev, encap_header,
-				      ipv4_encap_size, e->h_dest, tos, ttl,
-				      fl4.daddr,
-				      fl4.saddr, tun_key->tp_dst,
-				      tunnel_id_to_key32(tun_key->tun_id));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		goto destroy_neigh_entry;
-	}
-	e->encap_size = ipv4_encap_size;
-	e->encap_header = encap_header;
-
-	if (!(nud_state & NUD_VALID)) {
-		neigh_event_send(n, NULL);
-		err = -EAGAIN;
-		goto out;
-	}
-
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv4_encap_size, encap_header, &e->encap_id);
-	if (err)
-		goto destroy_neigh_entry;
-
-	e->flags |= MLX5_ENCAP_ENTRY_VALID;
-	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
-	neigh_release(n);
-	return err;
-
-destroy_neigh_entry:
-	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
-	kfree(encap_header);
-out:
-	if (n)
-		neigh_release(n);
-	return err;
-}
-
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
-					  struct net_device *mirred_dev,
-					  struct mlx5e_encap_entry *e)
-{
-	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
-	int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
-	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	struct net_device *out_dev;
-	struct neighbour *n = NULL;
-	struct flowi6 fl6 = {};
-	u8 nud_state, tos, ttl;
-	char *encap_header;
-	int err;
-
-	if (max_encap_size < ipv6_encap_size) {
-		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
-			       ipv6_encap_size, max_encap_size);
-		return -EOPNOTSUPP;
-	}
-
-	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
-	if (!encap_header)
-		return -ENOMEM;
-
-	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
-		fl6.flowi6_proto = IPPROTO_UDP;
-		fl6.fl6_dport = tun_key->tp_dst;
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		goto free_encap;
-	}
-
-	tos = tun_key->tos;
-	ttl = tun_key->ttl;
-
-	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
-	fl6.daddr = tun_key->u.ipv6.dst;
-	fl6.saddr = tun_key->u.ipv6.src;
-
-	err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
-				      &fl6, &n, &ttl);
-	if (err)
-		goto free_encap;
-
-	/* used by mlx5e_detach_encap to lookup a neigh hash table
-	 * entry in the neigh hash table when a user deletes a rule
-	 */
-	e->m_neigh.dev = n->dev;
-	e->m_neigh.family = n->ops->family;
-	memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-	e->out_dev = out_dev;
-
-	/* It's importent to add the neigh to the hash table before checking
-	 * the neigh validity state. So if we'll get a notification, in case the
-	 * neigh changes it's validity state, we would find the relevant neigh
-	 * in the hash.
-	 */
-	err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
-	if (err)
-		goto free_encap;
-
-	read_lock_bh(&n->lock);
-	nud_state = n->nud_state;
-	ether_addr_copy(e->h_dest, n->ha);
-	read_unlock_bh(&n->lock);
-
-	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
-		gen_vxlan_header_ipv6(out_dev, encap_header,
-				      ipv6_encap_size, e->h_dest, tos, ttl,
-				      &fl6.daddr,
-				      &fl6.saddr, tun_key->tp_dst,
-				      tunnel_id_to_key32(tun_key->tun_id));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		goto destroy_neigh_entry;
-	}
-
-	e->encap_size = ipv6_encap_size;
-	e->encap_header = encap_header;
-
-	if (!(nud_state & NUD_VALID)) {
-		neigh_event_send(n, NULL);
-		err = -EAGAIN;
-		goto out;
-	}
-
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv6_encap_size, encap_header, &e->encap_id);
-	if (err)
-		goto destroy_neigh_entry;
-
-	e->flags |= MLX5_ENCAP_ENTRY_VALID;
-	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
-	neigh_release(n);
-	return err;
-
-destroy_neigh_entry:
-	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
-	kfree(encap_header);
-out:
-	if (n)
-		neigh_release(n);
-	return err;
-}
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
-			      struct ip_tunnel_info *tun_info,
-			      struct net_device *mirred_dev,
-			      struct net_device **encap_dev,
-			      struct mlx5e_tc_flow *flow)
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+		uintptr_t hash_key)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	unsigned short family = ip_tunnel_info_af(tun_info);
-	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	struct ip_tunnel_key *key = &tun_info->key;
 	struct mlx5e_encap_entry *e;
-	int tunnel_type, err = 0;
-	uintptr_t hash_key;
-	bool found = false;
-
-	/* udp dst port must be set */
-	if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
-		goto vxlan_encap_offload_err;
-
-	/* setting udp src port isn't supported */
-	if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
-vxlan_encap_offload_err:
-		netdev_warn(priv->netdev,
-			    "must set udp dst port and not set udp src port\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
-	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
-		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
-	} else {
-		netdev_warn(priv->netdev,
-			    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
-		return -EOPNOTSUPP;
-	}
-
-	hash_key = hash_encap_info(key);
+	struct encap_key e_key;
 
 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 				   encap_hlist, hash_key) {
-		if (!cmp_encap_info(&e->tun_info.key, key)) {
-			found = true;
-			break;
-		}
+		e_key.ip_tun_key = &e->tun_info->key;
+		e_key.tc_tunnel = e->tunnel;
+		if (!cmp_encap_info(&e_key, key) &&
+		    mlx5e_encap_take(e))
+			return e;
 	}
 
+	return NULL;
+}
+
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+	return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+			      struct mlx5e_tc_flow *flow,
+			      struct net_device *mirred_dev,
+			      int out_index,
+			      struct netlink_ext_ack *extack,
+			      struct net_device **encap_dev,
+			      bool *encap_valid)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	const struct ip_tunnel_info *tun_info;
+	struct encap_key key;
+	struct mlx5e_encap_entry *e;
+	unsigned short family;
+	uintptr_t hash_key;
+	int err = 0;
+
+	parse_attr = attr->parse_attr;
+	tun_info = parse_attr->tun_info[out_index];
+	family = ip_tunnel_info_af(tun_info);
+	key.ip_tun_key = &tun_info->key;
+	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+	if (!key.tc_tunnel) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+		return -EOPNOTSUPP;
+	}
+
+	hash_key = hash_encap_info(&key);
+
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	e = mlx5e_encap_get(priv, &key, hash_key);
+
 	/* must verify if encap is valid or not */
-	if (found)
+	if (e) {
+		mutex_unlock(&esw->offloads.encap_tbl_lock);
+		wait_for_completion(&e->res_ready);
+
+		/* Protect against concurrent neigh update. */
+		mutex_lock(&esw->offloads.encap_tbl_lock);
+		if (e->compl_result < 0) {
+			err = -EREMOTEIO;
+			goto out_err;
+		}
 		goto attach_flow;
+	}
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
-	if (!e)
-		return -ENOMEM;
+	if (!e) {
+		err = -ENOMEM;
+		goto out_err;
+	}
 
-	e->tun_info = *tun_info;
-	e->tunnel_type = tunnel_type;
+	refcount_set(&e->refcnt, 1);
+	init_completion(&e->res_ready);
+
+	tun_info = dup_tun_info(tun_info);
+	if (!tun_info) {
+		err = -ENOMEM;
+		goto out_err_init;
+	}
+	e->tun_info = tun_info;
+	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+	if (err)
+		goto out_err_init;
+
 	INIT_LIST_HEAD(&e->flows);
+	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
 
 	if (family == AF_INET)
-		err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
 	else if (family == AF_INET6)
-		err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
 
-	if (err && err != -EAGAIN)
+	/* Protect against concurrent neigh update. */
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	complete_all(&e->res_ready);
+	if (err) {
+		e->compl_result = err;
 		goto out_err;
-
-	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+	}
+	e->compl_result = 1;
 
 attach_flow:
-	list_add(&flow->encap, &e->flows);
+	flow->encaps[out_index].e = e;
+	list_add(&flow->encaps[out_index].list, &e->flows);
+	flow->encaps[out_index].index = out_index;
 	*encap_dev = e->out_dev;
-	if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-		attr->encap_id = e->encap_id;
-	else
-		err = -EAGAIN;
+	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+		attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+		attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+		*encap_valid = true;
+	} else {
+		*encap_valid = false;
+	}
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
 
 	return err;
 
 out_err:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+	if (e)
+		mlx5e_encap_put(priv, e);
+	return err;
+
+out_err_init:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+	kfree(tun_info);
 	kfree(e);
 	return err;
 }
 
 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
-				const struct tc_action *a,
+				const struct flow_action_entry *act,
 				struct mlx5_esw_flow_attr *attr,
 				u32 *action)
 {
@@ -2619,7 +3105,8 @@
 	if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
 		return -EOPNOTSUPP;
 
-	if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+	switch (act->id) {
+	case FLOW_ACTION_VLAN_POP:
 		if (vlan_idx) {
 			if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
 								 MLX5_FS_VLAN_DEPTH))
@@ -2629,10 +3116,11 @@
 		} else {
 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 		}
-	} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-		attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
-		attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
-		attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
+		break;
+	case FLOW_ACTION_VLAN_PUSH:
+		attr->vlan_vid[vlan_idx] = act->vlan.vid;
+		attr->vlan_prio[vlan_idx] = act->vlan.prio;
+		attr->vlan_proto[vlan_idx] = act->vlan.proto;
 		if (!attr->vlan_proto[vlan_idx])
 			attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
 
@@ -2644,14 +3132,15 @@
 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
 		} else {
 			if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
-			    (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
-			     tcf_vlan_push_prio(a)))
+			    (act->vlan.proto != htons(ETH_P_8021Q) ||
+			     act->vlan.prio))
 				return -EOPNOTSUPP;
 
 			*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
 		}
-	} else { /* action is TCA_VLAN_ACT_MODIFY */
-		return -EOPNOTSUPP;
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	attr->total_vlan = vlan_idx + 1;
@@ -2659,121 +3148,304 @@
 	return 0;
 }
 
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-				struct mlx5e_tc_flow_parse_attr *parse_attr,
-				struct mlx5e_tc_flow *flow)
+static int add_vlan_push_action(struct mlx5e_priv *priv,
+				struct mlx5_esw_flow_attr *attr,
+				struct net_device **out_dev,
+				u32 *action)
 {
+	struct net_device *vlan_dev = *out_dev;
+	struct flow_action_entry vlan_act = {
+		.id = FLOW_ACTION_VLAN_PUSH,
+		.vlan.vid = vlan_dev_vlan_id(vlan_dev),
+		.vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+		.vlan.prio = 0,
+	};
+	int err;
+
+	err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+	if (err)
+		return err;
+
+	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
+					dev_get_iflink(vlan_dev));
+	if (is_vlan_dev(*out_dev))
+		err = add_vlan_push_action(priv, attr, out_dev, action);
+
+	return err;
+}
+
+static int add_vlan_pop_action(struct mlx5e_priv *priv,
+			       struct mlx5_esw_flow_attr *attr,
+			       u32 *action)
+{
+	int nest_level = attr->parse_attr->filter_dev->lower_level;
+	struct flow_action_entry vlan_act = {
+		.id = FLOW_ACTION_VLAN_POP,
+	};
+	int err = 0;
+
+	while (nest_level--) {
+		err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+				    struct net_device *out_dev)
+{
+	if (is_merged_eswitch_dev(priv, out_dev))
+		return true;
+
+	return mlx5e_eswitch_rep(out_dev) &&
+	       same_hw_devs(priv, netdev_priv(out_dev));
+}
+
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
+				struct flow_action *flow_action,
+				struct mlx5e_tc_flow *flow,
+				struct netlink_ext_ack *extack)
+{
+	struct pedit_headers_action hdrs[2] = {};
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct ip_tunnel_info *info = NULL;
-	const struct tc_action *a;
-	LIST_HEAD(actions);
+	const struct ip_tunnel_info *info = NULL;
+	const struct flow_action_entry *act;
 	bool encap = false;
 	u32 action = 0;
 	int err, i;
 
-	if (!tcf_exts_has_actions(exts))
+	if (!flow_action_has_entries(flow_action))
 		return -EINVAL;
 
-	attr->in_rep = rpriv->rep;
-	attr->in_mdev = priv->mdev;
-
-	tcf_exts_for_each_action(i, a, exts) {
-		if (is_tcf_gact_shot(a)) {
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
-			continue;
-		}
-
-		if (is_tcf_pedit(a)) {
-			err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
-						    parse_attr);
+			break;
+		case FLOW_ACTION_MANGLE:
+		case FLOW_ACTION_ADD:
+			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
+						    parse_attr, hdrs, extack);
 			if (err)
 				return err;
 
 			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-			attr->mirror_count = attr->out_count;
-			continue;
-		}
-
-		if (is_tcf_csum(a)) {
+			attr->split_count = attr->out_count;
+			break;
+		case FLOW_ACTION_CSUM:
 			if (csum_offload_supported(priv, action,
-						   tcf_csum_update_flags(a)))
-				continue;
+						   act->csum_flags, extack))
+				break;
 
 			return -EOPNOTSUPP;
-		}
-
-		if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED: {
 			struct mlx5e_priv *out_priv;
 			struct net_device *out_dev;
 
-			out_dev = tcf_mirred_dev(a);
+			out_dev = act->dev;
+			if (!out_dev) {
+				/* out_dev is NULL when filters with
+				 * non-existing mirred device are replayed to
+				 * the driver.
+				 */
+				return -EINVAL;
+			}
 
 			if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "can't support more output ports, can't offload forwarding");
 				pr_err("can't support more than %d output ports, can't offload forwarding\n",
 				       attr->out_count);
 				return -EOPNOTSUPP;
 			}
 
-			if (switchdev_port_same_parent_id(priv->netdev,
-							  out_dev) ||
-			    is_merged_eswitch_dev(priv, out_dev)) {
-				action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			if (encap) {
+				parse_attr->mirred_ifindex[attr->out_count] =
+					out_dev->ifindex;
+				parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
+				if (!parse_attr->tun_info[attr->out_count])
+					return -ENOMEM;
+				encap = false;
+				attr->dests[attr->out_count].flags |=
+					MLX5_ESW_DEST_ENCAP;
+				attr->out_count++;
+				/* attr->dests[].rep is resolved when we
+				 * handle encap
+				 */
+			} else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
+				struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+				struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+				struct net_device *uplink_upper;
+
+				rcu_read_lock();
+				uplink_upper =
+					netdev_master_upper_dev_get_rcu(uplink_dev);
+				if (uplink_upper &&
+				    netif_is_lag_master(uplink_upper) &&
+				    uplink_upper == out_dev)
+					out_dev = uplink_dev;
+				rcu_read_unlock();
+
+				if (is_vlan_dev(out_dev)) {
+					err = add_vlan_push_action(priv, attr,
+								   &out_dev,
+								   &action);
+					if (err)
+						return err;
+				}
+
+				if (is_vlan_dev(parse_attr->filter_dev)) {
+					err = add_vlan_pop_action(priv, attr,
+								  &action);
+					if (err)
+						return err;
+				}
+
+				if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+					NL_SET_ERR_MSG_MOD(extack,
+							   "devices are not on same switch HW, can't offload forwarding");
+					pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+					       priv->netdev->name, out_dev->name);
+					return -EOPNOTSUPP;
+				}
+
 				out_priv = netdev_priv(out_dev);
 				rpriv = out_priv->ppriv;
-				attr->out_rep[attr->out_count] = rpriv->rep;
-				attr->out_mdev[attr->out_count++] = out_priv->mdev;
-			} else if (encap) {
-				parse_attr->mirred_ifindex = out_dev->ifindex;
-				parse_attr->tun_info = *info;
-				attr->parse_attr = parse_attr;
-				action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
-					  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
-					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
-				/* attr->out_rep is resolved when we handle encap */
+				attr->dests[attr->out_count].rep = rpriv->rep;
+				attr->dests[attr->out_count].mdev = out_priv->mdev;
+				attr->out_count++;
+			} else if (parse_attr->filter_dev != priv->netdev) {
+				/* All mlx5 devices are called to configure
+				 * high level device filters. Therefore, the
+				 * *attempt* to  install a filter on invalid
+				 * eswitch should not trigger an explicit error
+				 */
+				return -EINVAL;
 			} else {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "devices are not on same switch HW, can't offload forwarding");
 				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
 				       priv->netdev->name, out_dev->name);
 				return -EINVAL;
 			}
-			continue;
-		}
-
-		if (is_tcf_tunnel_set(a)) {
-			info = tcf_tunnel_info(a);
+			}
+			break;
+		case FLOW_ACTION_TUNNEL_ENCAP:
+			info = act->tunnel;
 			if (info)
 				encap = true;
 			else
 				return -EOPNOTSUPP;
-			attr->mirror_count = attr->out_count;
-			continue;
-		}
 
-		if (is_tcf_vlan(a)) {
-			err = parse_tc_vlan_action(priv, a, attr, &action);
-
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_POP:
+			if (act->id == FLOW_ACTION_VLAN_PUSH &&
+			    (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+				/* Replace vlan pop+push with vlan modify */
+				action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+				err = add_vlan_rewrite_action(priv,
+							      MLX5_FLOW_NAMESPACE_FDB,
+							      act, parse_attr, hdrs,
+							      &action, extack);
+			} else {
+				err = parse_tc_vlan_action(priv, act, attr, &action);
+			}
 			if (err)
 				return err;
 
-			attr->mirror_count = attr->out_count;
-			continue;
-		}
+			attr->split_count = attr->out_count;
+			break;
+		case FLOW_ACTION_VLAN_MANGLE:
+			err = add_vlan_rewrite_action(priv,
+						      MLX5_FLOW_NAMESPACE_FDB,
+						      act, parse_attr, hdrs,
+						      &action, extack);
+			if (err)
+				return err;
 
-		if (is_tcf_tunnel_release(a)) {
+			attr->split_count = attr->out_count;
+			break;
+		case FLOW_ACTION_TUNNEL_DECAP:
 			action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
-			continue;
-		}
+			break;
+		case FLOW_ACTION_GOTO: {
+			u32 dest_chain = act->chain_index;
+			u32 max_chain = mlx5_eswitch_get_chain_range(esw);
 
-		return -EINVAL;
+			if (dest_chain <= attr->chain) {
+				NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
+				return -EOPNOTSUPP;
+			}
+			if (dest_chain > max_chain) {
+				NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
+				return -EOPNOTSUPP;
+			}
+			action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			attr->dest_chain = dest_chain;
+			break;
+			}
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+	    action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+		/* For prio tag mode, replace vlan pop with rewrite vlan prio
+		 * tag rewrite.
+		 */
+		action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+		err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
+						       &action, extack);
+		if (err)
+			return err;
+	}
+
+	if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
+	    hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
+		err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+					    parse_attr, hdrs, &action, extack);
+		if (err)
+			return err;
+		/* in case all pedit actions are skipped, remove the MOD_HDR
+		 * flag. we might have set split_count either by pedit or
+		 * pop/push. if there is no pop/push either, reset it too.
+		 */
+		if (parse_attr->num_mod_hdr_actions == 0) {
+			action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+			kfree(parse_attr->mod_hdr_actions);
+			if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+			      (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
+				attr->split_count = 0;
+		}
 	}
 
 	attr->action = action;
-	if (!actions_match_supported(priv, exts, parse_attr, flow))
+	if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
 		return -EOPNOTSUPP;
 
-	if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+	if (attr->dest_chain) {
+		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+			NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
+			return -EOPNOTSUPP;
+		}
+		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	}
+
+	if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "current firmware doesn't support split rule for port mirroring");
 		netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
 		return -EOPNOTSUPP;
 	}
@@ -2781,14 +3453,19 @@
 	return 0;
 }
 
-static void get_flags(int flags, u8 *flow_flags)
+static void get_flags(int flags, unsigned long *flow_flags)
 {
-	u8 __flow_flags = 0;
+	unsigned long __flow_flags = 0;
 
-	if (flags & MLX5E_TC_INGRESS)
-		__flow_flags |= MLX5E_TC_FLOW_INGRESS;
-	if (flags & MLX5E_TC_EGRESS)
-		__flow_flags |= MLX5E_TC_FLOW_EGRESS;
+	if (flags & MLX5_TC_FLAG(INGRESS))
+		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
+	if (flags & MLX5_TC_FLAG(EGRESS))
+		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
+
+	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
+		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
+	if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
+		__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
 
 	*flow_flags = __flow_flags;
 }
@@ -2800,43 +3477,49 @@
 	.automatic_shrinking = true,
 };
 
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
+				    unsigned long flags)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_rep_priv *uplink_rpriv;
 
-	if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+	if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-		return &uplink_rpriv->tc_ht;
-	} else
+		return &uplink_rpriv->uplink_priv.tc_ht;
+	} else /* NIC offload */
 		return &priv->fs.tc.ht;
 }
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
-			   struct tc_cls_flower_offload *f, int flags)
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+		flow_flag_test(flow, INGRESS);
+	bool act_is_encap = !!(attr->action &
+			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
+	bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+						MLX5_DEVCOM_ESW_OFFLOADS);
+
+	if (!esw_paired)
+		return false;
+
+	if ((mlx5_lag_is_sriov(attr->in_mdev) ||
+	     mlx5_lag_is_multipath(attr->in_mdev)) &&
+	    (is_rep_ingress || act_is_encap))
+		return true;
+
+	return false;
+}
+
+static int
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+		 struct flow_cls_offload *f, unsigned long flow_flags,
+		 struct mlx5e_tc_flow_parse_attr **__parse_attr,
+		 struct mlx5e_tc_flow **__flow)
+{
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
-	struct rhashtable *tc_ht = get_tc_ht(priv);
 	struct mlx5e_tc_flow *flow;
-	int attr_size, err = 0;
-	u8 flow_flags = 0;
-
-	get_flags(flags, &flow_flags);
-
-	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
-	if (flow) {
-		netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
-		return 0;
-	}
-
-	if (esw && esw->mode == SRIOV_OFFLOADS) {
-		flow_flags |= MLX5E_TC_FLOW_ESWITCH;
-		attr_size  = sizeof(struct mlx5_esw_flow_attr);
-	} else {
-		flow_flags |= MLX5E_TC_FLOW_NIC;
-		attr_size  = sizeof(struct mlx5_nic_flow_attr);
-	}
+	int out_index, err;
 
 	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
 	parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
@@ -2848,113 +3531,513 @@
 	flow->cookie = f->cookie;
 	flow->flags = flow_flags;
 	flow->priv = priv;
+	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+		INIT_LIST_HEAD(&flow->encaps[out_index].list);
+	INIT_LIST_HEAD(&flow->mod_hdr);
+	INIT_LIST_HEAD(&flow->hairpin);
+	refcount_set(&flow->refcnt, 1);
+	init_completion(&flow->init_done);
 
-	err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
-	if (err < 0)
-		goto err_free;
+	*__flow = flow;
+	*__parse_attr = parse_attr;
 
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-		err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
-		if (err < 0)
-			goto err_free;
-		flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
-	} else {
-		err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
-		if (err < 0)
-			goto err_free;
-		flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
-	}
-
-	if (IS_ERR(flow->rule[0])) {
-		err = PTR_ERR(flow->rule[0]);
-		if (err != -EAGAIN)
-			goto err_free;
-	}
-
-	if (err != -EAGAIN)
-		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
-
-	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
-	    !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
-		kvfree(parse_attr);
-
-	err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
-	if (err) {
-		mlx5e_tc_del_flow(priv, flow);
-		kfree(flow);
-	}
-
-	return err;
+	return 0;
 
 err_free:
-	kvfree(parse_attr);
 	kfree(flow);
+	kvfree(parse_attr);
 	return err;
 }
 
-#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
-#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+			 struct mlx5e_priv *priv,
+			 struct mlx5e_tc_flow_parse_attr *parse_attr,
+			 struct flow_cls_offload *f,
+			 struct mlx5_eswitch_rep *in_rep,
+			 struct mlx5_core_dev *in_mdev)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	esw_attr->parse_attr = parse_attr;
+	esw_attr->chain = f->common.chain_index;
+	esw_attr->prio = f->common.prio;
+
+	esw_attr->in_rep = in_rep;
+	esw_attr->in_mdev = in_mdev;
+
+	if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+	    MLX5_COUNTER_SOURCE_ESWITCH)
+		esw_attr->counter_dev = in_mdev;
+	else
+		esw_attr->counter_dev = priv->mdev;
+}
+
+static struct mlx5e_tc_flow *
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+		     struct flow_cls_offload *f,
+		     unsigned long flow_flags,
+		     struct net_device *filter_dev,
+		     struct mlx5_eswitch_rep *in_rep,
+		     struct mlx5_core_dev *in_mdev)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_tc_flow *flow;
+	int attr_size, err;
+
+	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
+	attr_size  = sizeof(struct mlx5_esw_flow_attr);
+	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+			       &parse_attr, &flow);
+	if (err)
+		goto out;
+
+	parse_attr->filter_dev = filter_dev;
+	mlx5e_flow_esw_attr_init(flow->esw_attr,
+				 priv, parse_attr,
+				 f, in_rep, in_mdev);
+
+	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+			       f, filter_dev);
+	if (err)
+		goto err_free;
+
+	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
+	if (err)
+		goto err_free;
+
+	err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
+	complete_all(&flow->init_done);
+	if (err) {
+		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
+			goto err_free;
+
+		add_unready_flow(flow);
+	}
+
+	return flow;
+
+err_free:
+	mlx5e_flow_put(priv, flow);
+out:
+	return ERR_PTR(err);
+}
+
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
+				      struct mlx5e_tc_flow *flow,
+				      unsigned long flow_flags)
+{
+	struct mlx5e_priv *priv = flow->priv, *peer_priv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_rep_priv *peer_urpriv;
+	struct mlx5e_tc_flow *peer_flow;
+	struct mlx5_core_dev *in_mdev;
+	int err = 0;
+
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		return -ENODEV;
+
+	peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+	peer_priv = netdev_priv(peer_urpriv->netdev);
+
+	/* in_mdev is assigned of which the packet originated from.
+	 * So packets redirected to uplink use the same mdev of the
+	 * original flow and packets redirected from uplink use the
+	 * peer mdev.
+	 */
+	if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
+		in_mdev = peer_priv->mdev;
+	else
+		in_mdev = priv->mdev;
+
+	parse_attr = flow->esw_attr->parse_attr;
+	peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
+					 parse_attr->filter_dev,
+					 flow->esw_attr->in_rep, in_mdev);
+	if (IS_ERR(peer_flow)) {
+		err = PTR_ERR(peer_flow);
+		goto out;
+	}
+
+	flow->peer_flow = peer_flow;
+	flow_flag_set(flow, DUP);
+	mutex_lock(&esw->offloads.peer_mutex);
+	list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+	mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+		   struct flow_cls_offload *f,
+		   unsigned long flow_flags,
+		   struct net_device *filter_dev,
+		   struct mlx5e_tc_flow **__flow)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+	struct mlx5_core_dev *in_mdev = priv->mdev;
+	struct mlx5e_tc_flow *flow;
+	int err;
+
+	flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+				    in_mdev);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
+
+	if (is_peer_flow_needed(flow)) {
+		err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
+		if (err) {
+			mlx5e_tc_del_fdb_flow(priv, flow);
+			goto out;
+		}
+	}
+
+	*__flow = flow;
+
+	return 0;
+
+out:
+	return err;
+}
+
+static int
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
+		   struct flow_cls_offload *f,
+		   unsigned long flow_flags,
+		   struct net_device *filter_dev,
+		   struct mlx5e_tc_flow **__flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_tc_flow *flow;
+	int attr_size, err;
+
+	/* multi-chain not supported for NIC rules */
+	if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+		return -EOPNOTSUPP;
+
+	flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
+	attr_size  = sizeof(struct mlx5_nic_flow_attr);
+	err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+			       &parse_attr, &flow);
+	if (err)
+		goto out;
+
+	parse_attr->filter_dev = filter_dev;
+	err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+			       f, filter_dev);
+	if (err)
+		goto err_free;
+
+	err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+	if (err)
+		goto err_free;
+
+	flow_flag_set(flow, OFFLOADED);
+	kvfree(parse_attr);
+	*__flow = flow;
+
+	return 0;
+
+err_free:
+	mlx5e_flow_put(priv, flow);
+	kvfree(parse_attr);
+out:
+	return err;
+}
+
+static int
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+		  struct flow_cls_offload *f,
+		  unsigned long flags,
+		  struct net_device *filter_dev,
+		  struct mlx5e_tc_flow **flow)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	unsigned long flow_flags;
+	int err;
+
+	get_flags(flags, &flow_flags);
+
+	if (!tc_can_offload_extack(priv->netdev, f->common.extack))
+		return -EOPNOTSUPP;
+
+	if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+		err = mlx5e_add_fdb_flow(priv, f, flow_flags,
+					 filter_dev, flow);
+	else
+		err = mlx5e_add_nic_flow(priv, f, flow_flags,
+					 filter_dev, flow);
+
+	return err;
+}
+
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
+			   struct flow_cls_offload *f, unsigned long flags)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+	struct mlx5e_tc_flow *flow;
+	int err = 0;
+
+	rcu_read_lock();
+	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
+	rcu_read_unlock();
+	if (flow) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "flow cookie already exists, ignoring");
+		netdev_warn_once(priv->netdev,
+				 "flow cookie %lx already exists, ignoring\n",
+				 f->cookie);
+		err = -EEXIST;
+		goto out;
+	}
+
+	trace_mlx5e_configure_flower(f);
+	err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
+	if (err)
+		goto out;
+
+	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
+	if (err)
+		goto err_free;
+
+	return 0;
+
+err_free:
+	mlx5e_flow_put(priv, flow);
+out:
+	return err;
+}
 
 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
 {
-	if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
-		return true;
+	bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
+	bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
 
-	return false;
+	return flow_flag_test(flow, INGRESS) == dir_ingress &&
+		flow_flag_test(flow, EGRESS) == dir_egress;
 }
 
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
-			struct tc_cls_flower_offload *f, int flags)
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
+			struct flow_cls_offload *f, unsigned long flags)
 {
-	struct rhashtable *tc_ht = get_tc_ht(priv);
+	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
 	struct mlx5e_tc_flow *flow;
+	int err;
 
+	rcu_read_lock();
 	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
-	if (!flow || !same_flow_direction(flow, flags))
-		return -EINVAL;
+	if (!flow || !same_flow_direction(flow, flags)) {
+		err = -EINVAL;
+		goto errout;
+	}
 
+	/* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
+	 * set.
+	 */
+	if (flow_flag_test_and_set(flow, DELETED)) {
+		err = -EINVAL;
+		goto errout;
+	}
 	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
+	rcu_read_unlock();
 
-	mlx5e_tc_del_flow(priv, flow);
-
-	kfree(flow);
+	trace_mlx5e_delete_flower(f);
+	mlx5e_flow_put(priv, flow);
 
 	return 0;
+
+errout:
+	rcu_read_unlock();
+	return err;
 }
 
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
-		       struct tc_cls_flower_offload *f, int flags)
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
+		       struct flow_cls_offload *f, unsigned long flags)
 {
-	struct rhashtable *tc_ht = get_tc_ht(priv);
+	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+	struct mlx5_eswitch *peer_esw;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_fc *counter;
-	u64 bytes;
-	u64 packets;
-	u64 lastuse;
+	u64 lastuse = 0;
+	u64 packets = 0;
+	u64 bytes = 0;
+	int err = 0;
 
-	flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
-	if (!flow || !same_flow_direction(flow, flags))
+	rcu_read_lock();
+	flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
+						tc_ht_params));
+	rcu_read_unlock();
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
+
+	if (!same_flow_direction(flow, flags)) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	if (mlx5e_is_offloaded_flow(flow)) {
+		counter = mlx5e_tc_get_counter(flow);
+		if (!counter)
+			goto errout;
+
+		mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+	}
+
+	/* Under multipath it's possible for one rule to be currently
+	 * un-offloaded while the other rule is offloaded.
+	 */
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		goto out;
+
+	if (flow_flag_test(flow, DUP) &&
+	    flow_flag_test(flow->peer_flow, OFFLOADED)) {
+		u64 bytes2;
+		u64 packets2;
+		u64 lastuse2;
+
+		counter = mlx5e_tc_get_counter(flow->peer_flow);
+		if (!counter)
+			goto no_peer_counter;
+		mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+		bytes += bytes2;
+		packets += packets2;
+		lastuse = max_t(u64, lastuse, lastuse2);
+	}
+
+no_peer_counter:
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+out:
+	flow_stats_update(&f->stats, bytes, packets, lastuse);
+	trace_mlx5e_stats_flower(f);
+errout:
+	mlx5e_flow_put(priv, flow);
+	return err;
+}
+
+static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
+			       struct netlink_ext_ack *extack)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch *esw;
+	u16 vport_num;
+	u32 rate_mbps;
+	int err;
+
+	esw = priv->mdev->priv.eswitch;
+	/* rate is given in bytes/sec.
+	 * First convert to bits/sec and then round to the nearest mbit/secs.
+	 * mbit means million bits.
+	 * Moreover, if rate is non zero we choose to configure to a minimum of
+	 * 1 mbit/sec.
+	 */
+	rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
+	vport_num = rpriv->rep->vport;
+
+	err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+	if (err)
+		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
+
+	return err;
+}
+
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
+					struct flow_action *flow_action,
+					struct netlink_ext_ack *extack)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	const struct flow_action_entry *act;
+	int err;
+	int i;
+
+	if (!flow_action_has_entries(flow_action)) {
+		NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
 		return -EINVAL;
+	}
 
-	if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
-		return 0;
+	if (!flow_offload_has_one_action(flow_action)) {
+		NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
+		return -EOPNOTSUPP;
+	}
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
-	if (!counter)
-		return 0;
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_POLICE:
+			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
+			if (err)
+				return err;
 
-	mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
-
-	tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+			rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
+			return -EOPNOTSUPP;
+		}
+	}
 
 	return 0;
 }
 
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+				struct tc_cls_matchall_offload *ma)
+{
+	struct netlink_ext_ack *extack = ma->common.extack;
+
+	if (ma->common.prio != 1) {
+		NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
+		return -EINVAL;
+	}
+
+	return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
+}
+
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+			     struct tc_cls_matchall_offload *ma)
+{
+	struct netlink_ext_ack *extack = ma->common.extack;
+
+	return apply_police_params(priv, 0, extack);
+}
+
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+			     struct tc_cls_matchall_offload *ma)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct rtnl_link_stats64 cur_stats;
+	u64 dbytes;
+	u64 dpkts;
+
+	cur_stats = priv->stats.vf_vport;
+	dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+	dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+	rpriv->prev_vf_vport_stats = cur_stats;
+	flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
+}
+
 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
 					      struct mlx5e_priv *peer_priv)
 {
 	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
-	struct mlx5e_hairpin_entry *hpe;
+	struct mlx5e_hairpin_entry *hpe, *tmp;
+	LIST_HEAD(init_wait_list);
 	u16 peer_vhca_id;
 	int bkt;
 
@@ -2963,9 +4046,18 @@
 
 	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
 
-	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
-		if (hpe->peer_vhca_id == peer_vhca_id)
+	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
+	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
+		if (refcount_inc_not_zero(&hpe->refcnt))
+			list_add(&hpe->dead_peer_wait_list, &init_wait_list);
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+
+	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
+		wait_for_completion(&hpe->res_ready);
+		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
 			hpe->hp->pair->peer_gone = true;
+
+		mlx5e_hairpin_put(priv, hpe);
 	}
 }
 
@@ -3001,7 +4093,10 @@
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 	int err;
 
-	hash_init(tc->mod_hdr_tbl);
+	mutex_init(&tc->t_lock);
+	mutex_init(&tc->mod_hdr.lock);
+	hash_init(tc->mod_hdr.hlist);
+	mutex_init(&tc->hairpin_tbl_lock);
 	hash_init(tc->hairpin_tbl);
 
 	err = rhashtable_init(&tc->ht, &tc_ht_params);
@@ -3033,12 +4128,16 @@
 	if (tc->netdevice_nb.notifier_call)
 		unregister_netdevice_notifier(&tc->netdevice_nb);
 
-	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+	mutex_destroy(&tc->mod_hdr.lock);
+	mutex_destroy(&tc->hairpin_tbl_lock);
+
+	rhashtable_destroy(&tc->ht);
 
 	if (!IS_ERR_OR_NULL(tc->t)) {
 		mlx5_destroy_flow_table(tc->t);
 		tc->t = NULL;
 	}
+	mutex_destroy(&tc->t_lock);
 }
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
@@ -3051,9 +4150,32 @@
 	rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
 }
 
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
 {
-	struct rhashtable *tc_ht = get_tc_ht(priv);
+	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
 
 	return atomic_read(&tc_ht->nelems);
 }
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+	struct mlx5e_tc_flow *flow, *tmp;
+
+	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+		__mlx5e_tc_del_fdb_peer_flow(flow);
+}
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
+{
+	struct mlx5_rep_uplink_priv *rpriv =
+		container_of(work, struct mlx5_rep_uplink_priv,
+			     reoffload_flows_work);
+	struct mlx5e_tc_flow *flow, *tmp;
+
+	mutex_lock(&rpriv->unready_flows_lock);
+	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
+		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
+			unready_flow_del(flow);
+	}
+	mutex_unlock(&rpriv->unready_flows_lock);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 49436bf..924c6ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

@@ -40,40 +40,67 @@
 #ifdef CONFIG_MLX5_ESWITCH
 
 enum {
-	MLX5E_TC_INGRESS = BIT(0),
-	MLX5E_TC_EGRESS  = BIT(1),
-	MLX5E_TC_LAST_EXPORTED_BIT = 1,
+	MLX5E_TC_FLAG_INGRESS_BIT,
+	MLX5E_TC_FLAG_EGRESS_BIT,
+	MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+	MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+	MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
 };
 
+#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
+
 int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
-			   struct tc_cls_flower_offload *f, int flags);
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
-			struct tc_cls_flower_offload *f, int flags);
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
+			   struct flow_cls_offload *f, unsigned long flags);
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
+			struct flow_cls_offload *f, unsigned long flags);
 
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
-		       struct tc_cls_flower_offload *f, int flags);
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
+		       struct flow_cls_offload *f, unsigned long flags);
+
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+				struct tc_cls_matchall_offload *f);
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+			     struct tc_cls_matchall_offload *f);
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+			     struct tc_cls_matchall_offload *ma);
 
 struct mlx5e_encap_entry;
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
-			      struct mlx5e_encap_entry *e);
+			      struct mlx5e_encap_entry *e,
+			      struct list_head *flow_list);
 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
-			      struct mlx5e_encap_entry *e);
+			      struct mlx5e_encap_entry *e,
+			      struct list_head *flow_list);
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
+
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list);
+void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
 
 struct mlx5e_neigh_hash_entry;
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
 
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+				    struct net_device *out_dev);
 
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
-static inline int  mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+static inline int  mlx5e_tc_num_filters(struct mlx5e_priv *priv,
+					unsigned long flags)
+{
+	return 0;
+}
 #endif
 
 #endif /* __MLX5_EN_TC_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 6dacaeb..67dc4f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

@@ -32,57 +32,15 @@
 
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
+#include <net/geneve.h>
 #include <net/dsfield.h>
 #include "en.h"
+#include "en/txrx.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
 #include "lib/clock.h"
 
-#define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-#else
-/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
- * enough room for a resync SKB, a normal SKB and a NOP
- */
-#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-#endif
-
-static inline void mlx5e_tx_dma_unmap(struct device *pdev,
-				      struct mlx5e_sq_dma *dma)
-{
-	switch (dma->type) {
-	case MLX5E_DMA_MAP_SINGLE:
-		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
-		break;
-	case MLX5E_DMA_MAP_PAGE:
-		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
-		break;
-	default:
-		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
-	}
-}
-
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
-{
-	return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
-}
-
-static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
-				  dma_addr_t addr,
-				  u32 size,
-				  enum mlx5e_dma_map_type map_type)
-{
-	struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
-
-	dma->addr = addr;
-	dma->size = size;
-	dma->type = map_type;
-}
-
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
 {
 	int i;
@@ -110,16 +68,15 @@
 #endif
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       struct net_device *sb_dev,
-		       select_queue_fallback_t fallback)
+		       struct net_device *sb_dev)
 {
+	int txq_ix = netdev_pick_tx(dev, skb, NULL);
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	int channel_ix = fallback(dev, skb, NULL);
 	u16 num_channels;
 	int up = 0;
 
 	if (!netdev_get_num_tc(dev))
-		return channel_ix;
+		return txq_ix;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
@@ -127,16 +84,16 @@
 	else
 #endif
 		if (skb_vlan_tag_present(skb))
-			up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+			up = skb_vlan_tag_get_prio(skb);
 
-	/* channel_ix can be larger than num_channels since
+	/* txq_ix can be larger than num_channels since
 	 * dev->num_real_tx_queues = num_channels * num_tc
 	 */
 	num_channels = priv->channels.params.num_channels;
-	if (channel_ix >= num_channels)
-		channel_ix = reciprocal_scale(channel_ix, num_channels);
+	if (txq_ix >= num_channels)
+		txq_ix = priv->txq2sq[txq_ix]->ch_ix;
 
-	return priv->channel_tc2txq[channel_ix][up];
+	return priv->channel_tc2txq[txq_ix][up];
 }
 
 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
@@ -148,12 +105,8 @@
 
 static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
 {
-	struct flow_keys keys;
-
 	if (skb_transport_header_was_set(skb))
 		return skb_transport_offset(skb);
-	else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
-		return keys.control.thoff;
 	else
 		return mlx5e_skb_l2_header_offset(skb);
 }
@@ -167,20 +120,13 @@
 	case MLX5_INLINE_MODE_NONE:
 		return 0;
 	case MLX5_INLINE_MODE_TCP_UDP:
-		hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+		hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
 		if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
 			hlen += VLAN_HLEN;
 		break;
 	case MLX5_INLINE_MODE_IP:
-		/* When transport header is set to zero, it means no transport
-		 * header. When transport header is set to 0xff's, it means
-		 * transport header wasn't set.
-		 */
-		if (skb_transport_offset(skb)) {
-			hlen = mlx5e_skb_l3_header_offset(skb);
-			break;
-		}
-		/* fall through */
+		hlen = mlx5e_skb_l3_header_offset(skb);
+		break;
 	case MLX5_INLINE_MODE_L2:
 	default:
 		hlen = mlx5e_skb_l2_header_offset(skb);
@@ -264,7 +210,7 @@
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		int fsz = skb_frag_size(frag);
 
 		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
@@ -288,29 +234,14 @@
 	return -ENOMEM;
 }
 
-static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
-					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 nnops)
-{
-	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->skb        = NULL;
-		wi->num_wqebbs = 1;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-	sq->stats->nop += nnops;
-}
-
 static inline void
 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		     u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
-		     struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg)
+		     struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
+		     bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
+	bool send_doorbell;
 
 	wi->num_bytes = num_bytes;
 	wi->num_dma = num_dma;
@@ -320,25 +251,23 @@
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-	netdev_tx_sent_queue(sq->txq, num_bytes);
-
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	sq->pc += wi->num_wqebbs;
-	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+	if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
 		netif_tx_stop_queue(sq->txq);
 		sq->stats->stopped++;
 	}
 
-	if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
+	send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
+					       xmit_more);
+	if (send_doorbell)
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 }
 
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
-
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5e_tx_wqe *wqe, u16 pi)
+			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5_wqe_ctrl_seg *cseg;
@@ -363,15 +292,17 @@
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 		stats->packets += skb_shinfo(skb)->gso_segs;
 	} else {
+		u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
+
 		opcode    = MLX5_OPCODE_SEND;
 		mss       = 0;
-		ihs       = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+		ihs       = mlx5e_calc_min_inline(mode, skb);
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 		stats->packets++;
 	}
 
 	stats->bytes     += num_bytes;
-	stats->xmit_more += skb->xmit_more;
+	stats->xmit_more += xmit_more;
 
 	headlen = skb->len - ihs - skb->data_len;
 	ds_cnt += !!headlen;
@@ -387,8 +318,20 @@
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+#ifdef CONFIG_MLX5_EN_IPSEC
+		struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+		struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
+#endif
 		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+		wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+#ifdef CONFIG_MLX5_EN_IPSEC
+		wqe->eth = cur_eth;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+		wqe->ctrl = cur_ctrl;
+#endif
 	}
 
 	/* fill wqe */
@@ -397,6 +340,10 @@
 	eseg = &wqe->eth;
 	dseg =  wqe->data;
 
+#if IS_ENABLED(CONFIG_GENEVE)
+	if (skb->encapsulation)
+		mlx5e_tx_tunnel_accel(skb, eseg);
+#endif
 	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
 
 	eseg->mss = mss;
@@ -424,7 +371,7 @@
 		goto err_drop;
 
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
-			     num_dma, wi, cseg);
+			     num_dma, wi, cseg, xmit_more);
 
 	return NETDEV_TX_OK;
 
@@ -443,25 +390,29 @@
 	u16 pi;
 
 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
-	mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
 
 	/* might send skbs and update wqe and pi */
 	skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
 	if (unlikely(!skb))
 		return NETDEV_TX_OK;
 
-	return mlx5e_sq_xmit(sq, skb, wqe, pi);
+	return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
 }
 
 static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
 				 struct mlx5_err_cqe *err_cqe)
 {
-	u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+	struct mlx5_cqwq *wq = &sq->cq.wq;
+	u32 ci;
+
+	ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
 
 	netdev_err(sq->channel->netdev,
-		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
-		   sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
-		   err_cqe->vendor_err_synd);
+		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+		   sq->cq.mcq.cqn, ci, sq->sqn,
+		   get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+		   err_cqe->syndrome, err_cqe->vendor_err_synd);
 	mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
 }
 
@@ -507,13 +458,13 @@
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
-		if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
 			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
 					      &sq->state)) {
 				mlx5e_dump_error_cqe(sq,
 						     (struct mlx5_err_cqe *)cqe);
 				queue_work(cq->channel->priv->wq,
-					   &sq->recover.recover_work);
+					   &sq->recover_work);
 			}
 			stats->cqe_err++;
 		}
@@ -530,8 +481,9 @@
 			wi = &sq->db.wqe_info[ci];
 			skb = wi->skb;
 
-			if (unlikely(!skb)) { /* nop */
-				sqcc++;
+			if (unlikely(!skb)) {
+				mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+				sqcc += wi->num_wqebbs;
 				continue;
 			}
 
@@ -573,8 +525,7 @@
 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
 	if (netif_tx_queue_stopped(sq->txq) &&
-	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
-				   MLX5E_SQ_STOP_ROOM) &&
+	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
 	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
 		netif_tx_wake_queue(sq->txq);
 		stats->wake++;
@@ -587,29 +538,38 @@
 {
 	struct mlx5e_tx_wqe_info *wi;
 	struct sk_buff *skb;
+	u32 dma_fifo_cc;
+	u16 sqcc;
 	u16 ci;
 	int i;
 
-	while (sq->cc != sq->pc) {
-		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+	sqcc = sq->cc;
+	dma_fifo_cc = sq->dma_fifo_cc;
+
+	while (sqcc != sq->pc) {
+		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
 		wi = &sq->db.wqe_info[ci];
 		skb = wi->skb;
 
-		if (!skb) { /* nop */
-			sq->cc++;
+		if (!skb) {
+			mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+			sqcc += wi->num_wqebbs;
 			continue;
 		}
 
 		for (i = 0; i < wi->num_dma; i++) {
 			struct mlx5e_sq_dma *dma =
-				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+				mlx5e_dma_get(sq, dma_fifo_cc++);
 
 			mlx5e_tx_dma_unmap(sq->pdev, dma);
 		}
 
 		dev_kfree_skb_any(skb);
-		sq->cc += wi->num_wqebbs;
+		sqcc += wi->num_wqebbs;
 	}
+
+	sq->dma_fifo_cc = dma_fifo_cc;
+	sq->cc = sqcc;
 }
 
 #ifdef CONFIG_MLX5_CORE_IPOIB
@@ -623,7 +583,8 @@
 }
 
 netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5_av *av, u32 dqpn, u32 dqkey)
+			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
+			  bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5i_tx_wqe *wqe;
@@ -651,15 +612,17 @@
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 		stats->packets += skb_shinfo(skb)->gso_segs;
 	} else {
+		u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+
 		opcode    = MLX5_OPCODE_SEND;
 		mss       = 0;
-		ihs       = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+		ihs       = mlx5e_calc_min_inline(mode, skb);
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 		stats->packets++;
 	}
 
 	stats->bytes     += num_bytes;
-	stats->xmit_more += skb->xmit_more;
+	stats->xmit_more += xmit_more;
 
 	headlen = skb->len - ihs - skb->data_len;
 	ds_cnt += !!headlen;
@@ -704,7 +667,7 @@
 		goto err_drop;
 
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
-			     num_dma, wi, cseg);
+			     num_dma, wi, cseg, xmit_more);
 
 	return NETDEV_TX_OK;
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 85d5173..257a7c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c

@@ -33,6 +33,8 @@
 #include <linux/irq.h>
 #include "en.h"
 #include "en/xdp.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
 
 static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 {
@@ -48,34 +50,72 @@
 static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
 {
 	struct mlx5e_sq_stats *stats = sq->stats;
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample = {};
 
 	if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
 		return;
 
-	net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
-		       &dim_sample);
+	dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
 	net_dim(&sq->dim, dim_sample);
 }
 
 static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
 {
 	struct mlx5e_rq_stats *stats = rq->stats;
-	struct net_dim_sample dim_sample;
+	struct dim_sample dim_sample = {};
 
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
 		return;
 
-	net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
-		       &dim_sample);
+	dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
 	net_dim(&rq->dim, dim_sample);
 }
 
+void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5e_tx_wqe *nopwqe;
+	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+	nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
+static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
+{
+	bool busy_xsk = false, xsk_rx_alloc_err;
+
+	/* Handle the race between the application querying need_wakeup and the
+	 * driver setting it:
+	 * 1. Update need_wakeup both before and after the TX. If it goes to
+	 * "yes", it can only happen with the first update.
+	 * 2. If the application queried need_wakeup before we set it, the
+	 * packets will be transmitted anyway, even w/o a wakeup.
+	 * 3. Give a chance to clear need_wakeup after new packets were queued
+	 * for TX.
+	 */
+	mlx5e_xsk_update_tx_wakeup(xsksq);
+	busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+	mlx5e_xsk_update_tx_wakeup(xsksq);
+
+	xsk_rx_alloc_err = xskrq->post_wqes(xskrq);
+	busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+
+	return busy_xsk;
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
 					       napi);
 	struct mlx5e_ch_stats *ch_stats = c->stats;
+	struct mlx5e_xdpsq *xsksq = &c->xsksq;
+	struct mlx5e_rq *xskrq = &c->xskrq;
+	struct mlx5e_rq *rq = &c->rq;
+	bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+	bool aff_change = false;
+	bool busy_xsk = false;
 	bool busy = false;
 	int work_done = 0;
 	int i;
@@ -88,19 +128,34 @@
 	busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
 
 	if (c->xdp)
-		busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+		busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
 
 	if (likely(budget)) { /* budget=0 means: don't poll rx rings */
-		work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+		if (xsk_open)
+			work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+		if (likely(budget - work_done))
+			work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
 		busy |= work_done == budget;
 	}
 
-	busy |= c->rq.post_wqes(&c->rq);
+	mlx5e_poll_ico_cq(&c->icosq.cq);
+
+	busy |= rq->post_wqes(rq);
+	if (xsk_open) {
+		mlx5e_poll_ico_cq(&c->xskicosq.cq);
+		busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+		busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
+	}
+
+	busy |= busy_xsk;
 
 	if (busy) {
 		if (likely(mlx5e_channel_no_affinity_change(c)))
 			return budget;
 		ch_stats->aff_change++;
+		aff_change = true;
 		if (budget && work_done == budget)
 			work_done--;
 	}
@@ -115,16 +170,28 @@
 		mlx5e_cq_arm(&c->sq[i].cq);
 	}
 
-	mlx5e_handle_rx_dim(&c->rq);
+	mlx5e_handle_rx_dim(rq);
 
-	mlx5e_cq_arm(&c->rq.cq);
+	mlx5e_cq_arm(&rq->cq);
 	mlx5e_cq_arm(&c->icosq.cq);
 	mlx5e_cq_arm(&c->xdpsq.cq);
 
+	if (xsk_open) {
+		mlx5e_handle_rx_dim(xskrq);
+		mlx5e_cq_arm(&c->xskicosq.cq);
+		mlx5e_cq_arm(&xsksq->cq);
+		mlx5e_cq_arm(&xskrq->cq);
+	}
+
+	if (unlikely(aff_change && busy_xsk)) {
+		mlx5e_trigger_irq(&c->icosq);
+		ch_stats->force_irq++;
+	}
+
 	return work_done;
 }
 
-void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
 {
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16..580c71c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

@@ -31,20 +31,23 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eq.h>
 #include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "eswitch.h"
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
 
 enum {
-	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
 };
 
@@ -55,14 +58,33 @@
 };
 
 enum {
-	MLX5_NUM_SPARE_EQE	= 0x80,
-	MLX5_NUM_ASYNC_EQE	= 0x1000,
-	MLX5_NUM_CMD_EQE	= 32,
-	MLX5_NUM_PF_DRAIN	= 64,
+	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
 };
 
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
 enum {
-	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
+	MLX5_EQ_POLLING_BUDGET	= 128,
+};
+
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
+struct mlx5_eq_table {
+	struct list_head        comp_eqs_list;
+	struct mlx5_eq_async    pages_eq;
+	struct mlx5_eq_async    cmd_eq;
+	struct mlx5_eq_async    async_eq;
+
+	struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+	/* Since CQ DB is stored in async_eq */
+	struct mlx5_nb          cq_err_nb;
+
+	struct mutex            lock; /* sync async eqs creations */
+	int			num_comp_eqs;
+	struct mlx5_irq_table	*irq_table;
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
@@ -78,17 +100,6 @@
 			       (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)	    | \
 			       (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
 
-struct map_eq_in {
-	u64	mask;
-	u32	reserved;
-	u32	unmap_eqn;
-};
-
-struct cre_des_eq {
-	u8	reserved[15];
-	u8	eqn;
-};
-
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
 	u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,519 +110,66 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
-	return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
-	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
-	return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
-	switch (type) {
-	case MLX5_EVENT_TYPE_COMP:
-		return "MLX5_EVENT_TYPE_COMP";
-	case MLX5_EVENT_TYPE_PATH_MIG:
-		return "MLX5_EVENT_TYPE_PATH_MIG";
-	case MLX5_EVENT_TYPE_COMM_EST:
-		return "MLX5_EVENT_TYPE_COMM_EST";
-	case MLX5_EVENT_TYPE_SQ_DRAINED:
-		return "MLX5_EVENT_TYPE_SQ_DRAINED";
-	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
-	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
-	case MLX5_EVENT_TYPE_CQ_ERROR:
-		return "MLX5_EVENT_TYPE_CQ_ERROR";
-	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
-	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
-	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
-	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
-	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
-	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
-		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
-	case MLX5_EVENT_TYPE_PORT_CHANGE:
-		return "MLX5_EVENT_TYPE_PORT_CHANGE";
-	case MLX5_EVENT_TYPE_GPIO_EVENT:
-		return "MLX5_EVENT_TYPE_GPIO_EVENT";
-	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
-	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
-	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
-		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
-	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
-		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
-	case MLX5_EVENT_TYPE_STALL_EVENT:
-		return "MLX5_EVENT_TYPE_STALL_EVENT";
-	case MLX5_EVENT_TYPE_CMD:
-		return "MLX5_EVENT_TYPE_CMD";
-	case MLX5_EVENT_TYPE_PAGE_REQUEST:
-		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
-	case MLX5_EVENT_TYPE_PAGE_FAULT:
-		return "MLX5_EVENT_TYPE_PAGE_FAULT";
-	case MLX5_EVENT_TYPE_PPS_EVENT:
-		return "MLX5_EVENT_TYPE_PPS_EVENT";
-	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
-	case MLX5_EVENT_TYPE_FPGA_ERROR:
-		return "MLX5_EVENT_TYPE_FPGA_ERROR";
-	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
-	case MLX5_EVENT_TYPE_GENERAL_EVENT:
-		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
-	case MLX5_EVENT_TYPE_DEVICE_TRACER:
-		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
-	default:
-		return "Unrecognized event";
-	}
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
-	switch (subtype) {
-	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-		return MLX5_DEV_EVENT_PORT_DOWN;
-	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-		return MLX5_DEV_EVENT_PORT_UP;
-	case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-		return MLX5_DEV_EVENT_PORT_INITIALIZED;
-	case MLX5_PORT_CHANGE_SUBTYPE_LID:
-		return MLX5_DEV_EVENT_LID_CHANGE;
-	case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-		return MLX5_DEV_EVENT_PKEY_CHANGE;
-	case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-		return MLX5_DEV_EVENT_GUID_CHANGE;
-	case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-		return MLX5_DEV_EVENT_CLIENT_REREG;
-	}
-	return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
-{
-	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
-	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
-	__raw_writel((__force u32)cpu_to_be32(val), addr);
-	/* We still want ordering, just not swabbing, so add a barrier */
-	mb();
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
-	struct mlx5_pagefault *pfault = container_of(work,
-						     struct mlx5_pagefault,
-						     work);
-	struct mlx5_eq *eq = pfault->eq;
-
-	mlx5_core_page_fault(eq->dev, pfault);
-	mempool_free(pfault, eq->pf_ctx.pool);
-}
-
-static void eq_pf_process(struct mlx5_eq *eq)
-{
-	struct mlx5_core_dev *dev = eq->dev;
-	struct mlx5_eqe_page_fault *pf_eqe;
-	struct mlx5_pagefault *pfault;
-	struct mlx5_eqe *eqe;
-	int set_ci = 0;
-
-	while ((eqe = next_eqe_sw(eq))) {
-		pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
-		if (!pfault) {
-			schedule_work(&eq->pf_ctx.work);
-			break;
-		}
-
-		dma_rmb();
-		pf_eqe = &eqe->data.page_fault;
-		pfault->event_subtype = eqe->sub_type;
-		pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
-		mlx5_core_dbg(dev,
-			      "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
-			      eqe->sub_type, pfault->bytes_committed);
-
-		switch (eqe->sub_type) {
-		case MLX5_PFAULT_SUBTYPE_RDMA:
-			/* RDMA based event */
-			pfault->type =
-				be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
-			pfault->token =
-				be32_to_cpu(pf_eqe->rdma.pftype_token) &
-				MLX5_24BIT_MASK;
-			pfault->rdma.r_key =
-				be32_to_cpu(pf_eqe->rdma.r_key);
-			pfault->rdma.packet_size =
-				be16_to_cpu(pf_eqe->rdma.packet_length);
-			pfault->rdma.rdma_op_len =
-				be32_to_cpu(pf_eqe->rdma.rdma_op_len);
-			pfault->rdma.rdma_va =
-				be64_to_cpu(pf_eqe->rdma.rdma_va);
-			mlx5_core_dbg(dev,
-				      "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
-				      pfault->type, pfault->token,
-				      pfault->rdma.r_key);
-			mlx5_core_dbg(dev,
-				      "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
-				      pfault->rdma.rdma_op_len,
-				      pfault->rdma.rdma_va);
-			break;
-
-		case MLX5_PFAULT_SUBTYPE_WQE:
-			/* WQE based event */
-			pfault->type =
-				(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
-			pfault->token =
-				be32_to_cpu(pf_eqe->wqe.token);
-			pfault->wqe.wq_num =
-				be32_to_cpu(pf_eqe->wqe.pftype_wq) &
-				MLX5_24BIT_MASK;
-			pfault->wqe.wqe_index =
-				be16_to_cpu(pf_eqe->wqe.wqe_index);
-			pfault->wqe.packet_size =
-				be16_to_cpu(pf_eqe->wqe.packet_length);
-			mlx5_core_dbg(dev,
-				      "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
-				      pfault->type, pfault->token,
-				      pfault->wqe.wq_num,
-				      pfault->wqe.wqe_index);
-			break;
-
-		default:
-			mlx5_core_warn(dev,
-				       "Unsupported page fault event sub-type: 0x%02hhx\n",
-				       eqe->sub_type);
-			/* Unsupported page faults should still be
-			 * resolved by the page fault handler
-			 */
-		}
-
-		pfault->eq = eq;
-		INIT_WORK(&pfault->work, eqe_pf_action);
-		queue_work(eq->pf_ctx.wq, &pfault->work);
-
-		++eq->cons_index;
-		++set_ci;
-
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
-
-	eq_update_ci(eq, 1);
-}
-
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
-	struct mlx5_eq *eq = eq_ptr;
-	unsigned long flags;
-
-	if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
-		eq_pf_process(eq);
-		spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
-	} else {
-		schedule_work(&eq->pf_ctx.work);
-	}
-
-	return IRQ_HANDLED;
-}
-
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
- */
-static void mempool_refill(mempool_t *pool)
-{
-	while (pool->curr_nr < pool->min_nr)
-		mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
-	struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
-	mempool_refill(eq->pf_ctx.pool);
-
-	spin_lock_irq(&eq->pf_ctx.lock);
-	eq_pf_process(eq);
-	spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
-	spin_lock_init(&pf_ctx->lock);
-	INIT_WORK(&pf_ctx->work, eq_pf_action);
-
-	pf_ctx->wq = alloc_ordered_workqueue(name,
-					     WQ_MEM_RECLAIM);
-	if (!pf_ctx->wq)
-		return -ENOMEM;
-
-	pf_ctx->pool = mempool_create_kmalloc_pool
-		(MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
-	if (!pf_ctx->pool)
-		goto err_wq;
-
-	return 0;
-err_wq:
-	destroy_workqueue(pf_ctx->wq);
-	return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
-				u32 wq_num, u8 type, int error)
-{
-	u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
-
-	MLX5_SET(page_fault_resume_in, in, opcode,
-		 MLX5_CMD_OP_PAGE_FAULT_RESUME);
-	MLX5_SET(page_fault_resume_in, in, error, !!error);
-	MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
-	MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
-	MLX5_SET(page_fault_resume_in, in, token, token);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
-				  struct mlx5_eqe *eqe)
-{
-	switch (eqe->sub_type) {
-	case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
-		if (dev->event)
-			dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
-		break;
-	default:
-		mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
-			      eqe->sub_type);
-	}
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
-				    struct mlx5_eqe *eqe)
-{
-	u64 value_lsb;
-	u64 value_msb;
-
-	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
-	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
-	mlx5_core_warn(dev,
-		       "High temperature on sensors with bit set %llx %llx",
-		       value_msb, value_lsb);
-}
-
 /* caller must eventually call mlx5_cq_put on the returned cq */
 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
 	struct mlx5_cq_table *table = &eq->cq_table;
 	struct mlx5_core_cq *cq = NULL;
 
-	spin_lock(&table->lock);
+	rcu_read_lock();
 	cq = radix_tree_lookup(&table->tree, cqn);
 	if (likely(cq))
 		mlx5_cq_hold(cq);
-	spin_unlock(&table->lock);
+	rcu_read_unlock();
 
 	return cq;
 }
 
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+			    __always_unused unsigned long action,
+			    __always_unused void *data)
 {
-	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-	if (unlikely(!cq)) {
-		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	++cq->arm_sn;
-
-	cq->comp(cq);
-
-	mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
-{
-	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-	if (unlikely(!cq)) {
-		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	cq->event(cq, event_type);
-
-	mlx5_cq_put(cq);
-}
-
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
-{
-	struct mlx5_eq *eq = eq_ptr;
-	struct mlx5_core_dev *dev = eq->dev;
+	struct mlx5_eq_comp *eq_comp =
+		container_of(nb, struct mlx5_eq_comp, irq_nb);
+	struct mlx5_eq *eq = &eq_comp->core;
 	struct mlx5_eqe *eqe;
-	int set_ci = 0;
+	int num_eqes = 0;
 	u32 cqn = -1;
-	u32 rsn;
-	u8 port;
 
-	while ((eqe = next_eqe_sw(eq))) {
-		/*
-		 * Make sure we read EQ entry contents after we've
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
+		struct mlx5_core_cq *cq;
+
+		/* Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
 		 */
 		dma_rmb();
+		/* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+		cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
 
-		mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
-			      eq->eqn, eqe_type_str(eqe->type));
-		switch (eqe->type) {
-		case MLX5_EVENT_TYPE_COMP:
-			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-			mlx5_eq_cq_completion(eq, cqn);
-			break;
-		case MLX5_EVENT_TYPE_DCT_DRAINED:
-			rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
-			rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
-			mlx5_rsc_event(dev, rsn, eqe->type);
-			break;
-		case MLX5_EVENT_TYPE_PATH_MIG:
-		case MLX5_EVENT_TYPE_COMM_EST:
-		case MLX5_EVENT_TYPE_SQ_DRAINED:
-		case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-		case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-			rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
-			mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
-				      eqe_type_str(eqe->type), eqe->type, rsn);
-			mlx5_rsc_event(dev, rsn, eqe->type);
-			break;
-
-		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-			mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
-				      eqe_type_str(eqe->type), eqe->type, rsn);
-			mlx5_srq_event(dev, rsn, eqe->type);
-			break;
-
-		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
-			break;
-
-		case MLX5_EVENT_TYPE_PORT_CHANGE:
-			port = (eqe->data.port.port >> 4) & 0xf;
-			switch (eqe->sub_type) {
-			case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-			case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-			case MLX5_PORT_CHANGE_SUBTYPE_LID:
-			case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-			case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-			case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-			case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-				if (dev->event)
-					dev->event(dev, port_subtype_event(eqe->sub_type),
-						   (unsigned long)port);
-				break;
-			default:
-				mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
-					       port, eqe->sub_type);
-			}
-			break;
-		case MLX5_EVENT_TYPE_CQ_ERROR:
-			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
-				       cqn, eqe->data.cq_err.syndrome);
-			mlx5_eq_cq_event(eq, cqn, eqe->type);
-			break;
-
-		case MLX5_EVENT_TYPE_PAGE_REQUEST:
-			{
-				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
-
-				mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
-					      func_id, npages);
-				mlx5_core_req_pages_handler(dev, func_id, npages);
-			}
-			break;
-
-		case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-			mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
-			break;
-
-		case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-			mlx5_port_module_event(dev, eqe);
-			break;
-
-		case MLX5_EVENT_TYPE_PPS_EVENT:
-			mlx5_pps_event(dev, eqe);
-			break;
-
-		case MLX5_EVENT_TYPE_FPGA_ERROR:
-		case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-			mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
-			break;
-
-		case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-			mlx5_temp_warning_event(dev, eqe);
-			break;
-
-		case MLX5_EVENT_TYPE_GENERAL_EVENT:
-			general_event_handler(dev, eqe);
-			break;
-
-		case MLX5_EVENT_TYPE_DEVICE_TRACER:
-			mlx5_fw_tracer_event(dev, eqe);
-			break;
-
-		default:
-			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
-				       eqe->type, eq->eqn);
-			break;
+		cq = mlx5_eq_cq_get(eq, cqn);
+		if (likely(cq)) {
+			++cq->arm_sn;
+			cq->comp(cq, eqe);
+			mlx5_cq_put(cq);
+		} else {
+			mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
 		}
 
 		++eq->cons_index;
-		++set_ci;
 
-		/* The HCA will think the queue has overflowed if we
-		 * don't tell it we've been processing events.  We
-		 * create our EQs with MLX5_NUM_SPARE_EQE extra
-		 * entries, so we must update our consumer index at
-		 * least that often.
-		 */
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 
+out:
 	eq_update_ci(eq, 1);
 
 	if (cqn != -1)
-		tasklet_schedule(&eq->tasklet_ctx.task);
+		tasklet_schedule(&eq_comp->tasklet_ctx.task);
 
-	return IRQ_HANDLED;
+	return 0;
 }
 
 /* Some architectures don't latch interrupts when they are disabled, so using
@@ -619,19 +177,57 @@
  * avoid losing them.  It is not recommended to use it, unless this is the last
  * resort.
  */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 {
 	u32 count_eqe;
 
-	disable_irq(eq->irqn);
-	count_eqe = eq->cons_index;
-	mlx5_eq_int(eq->irqn, eq);
-	count_eqe = eq->cons_index - count_eqe;
-	enable_irq(eq->irqn);
+	disable_irq(eq->core.irqn);
+	count_eqe = eq->core.cons_index;
+	mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
+	count_eqe = eq->core.cons_index - count_eqe;
+	enable_irq(eq->core.irqn);
 
 	return count_eqe;
 }
 
+static int mlx5_eq_async_int(struct notifier_block *nb,
+			     unsigned long action, void *data)
+{
+	struct mlx5_eq_async *eq_async =
+		container_of(nb, struct mlx5_eq_async, irq_nb);
+	struct mlx5_eq *eq = &eq_async->core;
+	struct mlx5_eq_table *eqt;
+	struct mlx5_core_dev *dev;
+	struct mlx5_eqe *eqe;
+	int num_eqes = 0;
+
+	dev = eq->dev;
+	eqt = dev->priv.eq_table;
+
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
+		/*
+		 * Make sure we read EQ entry contents after we've
+		 * checked the ownership bit.
+		 */
+		dma_rmb();
+
+		atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+		atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
+
+		++eq->cons_index;
+
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
+	eq_update_ci(eq, 1);
+
+	return 0;
+}
+
 static void init_eq_buf(struct mlx5_eq *eq)
 {
 	struct mlx5_eqe *eqe;
@@ -643,39 +239,32 @@
 	}
 }
 
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name,
-		       enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+	      struct mlx5_eq_param *param)
 {
 	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
-	irq_handler_t handler;
+	u8 vecidx = param->irq_index;
 	__be64 *pas;
 	void *eqc;
 	int inlen;
 	u32 *in;
 	int err;
+	int i;
 
 	/* Init CQ table */
 	memset(cq_table, 0, sizeof(*cq_table));
 	spin_lock_init(&cq_table->lock);
 	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 
-	eq->type = type;
-	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+	eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
 	err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
 	if (err)
 		return err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (type == MLX5_EQ_TYPE_PF)
-		handler = mlx5_eq_pf_int;
-	else
-#endif
-		handler = mlx5_eq_int;
-
 	init_eq_buf(eq);
 
 	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +280,12 @@
 	mlx5_fill_page_array(&eq->buf, pas);
 
 	MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
-	MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+	if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
+		MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+
+	for (i = 0; i < 4; i++)
+		MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+				 param->mask[i]);
 
 	eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
 	MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,47 +298,19 @@
 	if (err)
 		goto err_in;
 
-	snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
-		 name, pci_name(dev->pdev));
-
+	eq->vecidx = vecidx;
 	eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
 	eq->irqn = pci_irq_vector(dev->pdev, vecidx);
 	eq->dev = dev;
 	eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-	err = request_irq(eq->irqn, handler, 0,
-			  priv->irq_info[vecidx].name, eq);
-	if (err)
-		goto err_eq;
 
 	err = mlx5_debug_eq_add(dev, eq);
 	if (err)
-		goto err_irq;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (type == MLX5_EQ_TYPE_PF) {
-		err = init_pf_ctx(&eq->pf_ctx, name);
-		if (err)
-			goto err_irq;
-	} else
-#endif
-	{
-		INIT_LIST_HEAD(&eq->tasklet_ctx.list);
-		INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
-		spin_lock_init(&eq->tasklet_ctx.lock);
-		tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
-			     (unsigned long)&eq->tasklet_ctx);
-	}
-
-	/* EQs are created in ARMED state
-	 */
-	eq_update_ci(eq, 1);
+		goto err_eq;
 
 	kvfree(in);
 	return 0;
 
-err_irq:
-	free_irq(eq->irqn, eq);
-
 err_eq:
 	mlx5_cmd_destroy_eq(dev, eq->eqn);
 
@@ -756,27 +322,59 @@
 	return err;
 }
 
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev : Device which owns the eq
+ * @eq  : EQ to enable
+ * @nb  : Notifier call block
+ *
+ * Must be called after EQ is created in device.
+ *
+ * @return: 0 if no error
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		   struct notifier_block *nb)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+	int err;
+
+	err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+	if (!err)
+		eq_update_ci(eq, 1);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Disable EQ for receiving EQEs
+ * @dev : Device which owns the eq
+ * @eq  : EQ to disable
+ * @nb  : Notifier call block
+ *
+ * Must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		     struct notifier_block *nb)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+	mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
 	int err;
 
 	mlx5_debug_eq_remove(dev, eq);
-	free_irq(eq->irqn, eq);
+
 	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
 	if (err)
 		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
 			       eq->eqn);
 	synchronize_irq(eq->irqn);
 
-	if (eq->type == MLX5_EQ_TYPE_COMP) {
-		tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	} else if (eq->type == MLX5_EQ_TYPE_PF) {
-		cancel_work_sync(&eq->pf_ctx.work);
-		destroy_workqueue(eq->pf_ctx.wq);
-		mempool_destroy(eq->pf_ctx.pool);
-#endif
-	}
 	mlx5_buf_free(dev, &eq->buf);
 
 	return err;
@@ -787,62 +385,149 @@
 	struct mlx5_cq_table *table = &eq->cq_table;
 	int err;
 
-	spin_lock_irq(&table->lock);
+	spin_lock(&table->lock);
 	err = radix_tree_insert(&table->tree, cq->cqn, cq);
-	spin_unlock_irq(&table->lock);
+	spin_unlock(&table->lock);
 
 	return err;
 }
 
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 {
 	struct mlx5_cq_table *table = &eq->cq_table;
 	struct mlx5_core_cq *tmp;
 
-	spin_lock_irq(&table->lock);
+	spin_lock(&table->lock);
 	tmp = radix_tree_delete(&table->tree, cq->cqn);
-	spin_unlock_irq(&table->lock);
+	spin_unlock(&table->lock);
 
 	if (!tmp) {
-		mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
-		return -ENOENT;
+		mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
+			      eq->eqn, cq->cqn);
+		return;
 	}
 
-	if (tmp != cq) {
-		mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
-		return -EINVAL;
-	}
+	if (tmp != cq)
+		mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
+			      eq->eqn, cq->cqn);
+}
 
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *eq_table;
+	int i;
+
+	eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+	if (!eq_table)
+		return -ENOMEM;
+
+	dev->priv.eq_table = eq_table;
+
+	mlx5_eq_debugfs_init(dev);
+
+	mutex_init(&eq_table->lock);
+	for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+		ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+
+	eq_table->irq_table = dev->priv.irq_table;
 	return 0;
 }
 
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 {
+	mlx5_eq_debugfs_cleanup(dev);
+	kvfree(dev->priv.eq_table);
+}
+
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev,
+			   struct mlx5_eq *eq, struct mlx5_eq_param *param)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;
 
-	spin_lock_init(&dev->priv.eq_table.lock);
+	mutex_lock(&eq_table->lock);
+	/* Async EQs must share irq index 0 */
+	if (param->irq_index != 0) {
+		err = -EINVAL;
+		goto unlock;
+	}
 
-	err = mlx5_eq_debugfs_init(dev);
-
+	err = create_map_eq(dev, eq, param);
+unlock:
+	mutex_unlock(&eq_table->lock);
 	return err;
 }
 
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
-	mlx5_eq_debugfs_cleanup(dev);
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+	int err;
+
+	mutex_lock(&eq_table->lock);
+	err = destroy_unmap_eq(dev, eq);
+	mutex_unlock(&eq_table->lock);
+	return err;
 }
 
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+static int cq_err_event_notifier(struct notifier_block *nb,
+				 unsigned long type, void *data)
 {
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	struct mlx5_eq_table *eqt;
+	struct mlx5_core_cq *cq;
+	struct mlx5_eqe *eqe;
+	struct mlx5_eq *eq;
+	u32 cqn;
+
+	/* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+	eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+	eq  = &eqt->async_eq.core;
+	eqe = data;
+
+	cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+	mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+		       cqn, eqe->data.cq_err.syndrome);
+
+	cq = mlx5_eq_cq_get(eq, cqn);
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+		return NOTIFY_OK;
+	}
+
+	if (cq->event)
+		cq->event(cq, type);
+
+	mlx5_cq_put(cq);
+
+	return NOTIFY_OK;
+}
+
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
+{
+	__be64 *user_unaffiliated_events;
+	__be64 *user_affiliated_events;
+	int i;
+
+	user_affiliated_events =
+		MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
+	user_unaffiliated_events =
+		MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
+
+	for (i = 0; i < 4; i++)
+		mask[i] |= be64_to_cpu(user_affiliated_events[i] |
+				       user_unaffiliated_events[i]);
+}
+
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
+{
 	u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
-	int err;
 
 	if (MLX5_VPORT_MANAGER(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
-	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
-	    MLX5_CAP_GEN(dev, general_notification_event))
+	if (MLX5_CAP_GEN(dev, general_notification_event))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
 
 	if (MLX5_CAP_GEN(dev, port_module_event))
@@ -865,127 +550,402 @@
 	if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
 
-	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
-				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
-				 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+	if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+	if (mlx5_eswitch_is_funcs_handler(dev))
+		async_event_mask |=
+			(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
+
+	mask[0] = async_event_mask;
+
+	if (MLX5_CAP_GEN(dev, event_cap))
+		gather_user_async_events(dev, mask);
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_param param = {};
+	int err;
+
+	MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+	table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+	param = (struct mlx5_eq_param) {
+		.irq_index = 0,
+		.nent = MLX5_NUM_CMD_EQE,
+	};
+
+	param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
+	err = create_async_eq(dev, &table->cmd_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
-		return err;
+		goto err0;
 	}
-
-	mlx5_cmd_use_events(dev);
-
-	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-				 MLX5_NUM_ASYNC_EQE, async_event_mask,
-				 "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+	err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
 	if (err) {
-		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+		mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
 		goto err1;
 	}
+	mlx5_cmd_use_events(dev);
 
-	err = mlx5_create_map_eq(dev, &table->pages_eq,
-				 MLX5_EQ_VEC_PAGES,
-				 /* TODO: sriov max_vf + */ 1,
-				 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
-				 MLX5_EQ_TYPE_ASYNC);
+	table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+	param = (struct mlx5_eq_param) {
+		.irq_index = 0,
+		.nent = MLX5_NUM_ASYNC_EQE,
+	};
+
+	gather_async_events_mask(dev, param.mask);
+	err = create_async_eq(dev, &table->async_eq.core, &param);
 	if (err) {
-		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
 		goto err2;
 	}
+	err = mlx5_eq_enable(dev, &table->async_eq.core,
+			     &table->async_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+		goto err3;
+	}
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (MLX5_CAP_GEN(dev, pg)) {
-		err = mlx5_create_map_eq(dev, &table->pfault_eq,
-					 MLX5_EQ_VEC_PFAULT,
-					 MLX5_NUM_ASYNC_EQE,
-					 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
-					 "mlx5_page_fault_eq",
-					 MLX5_EQ_TYPE_PF);
-		if (err) {
-			mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
-				       err);
-			goto err3;
-		}
+	table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+	param = (struct mlx5_eq_param) {
+		.irq_index = 0,
+		.nent = /* TODO: sriov max_vf + */ 1,
+	};
+
+	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
+	err = create_async_eq(dev, &table->pages_eq.core, &param);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+		goto err4;
+	}
+	err = mlx5_eq_enable(dev, &table->pages_eq.core,
+			     &table->pages_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+		goto err5;
 	}
 
 	return err;
+
+err5:
+	destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
 err3:
-	mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
-	return err;
-#endif
-
+	destroy_async_eq(dev, &table->async_eq.core);
 err2:
-	mlx5_destroy_unmap_eq(dev, &table->async_eq);
-
-err1:
 	mlx5_cmd_use_polling(dev);
-	mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+	destroy_async_eq(dev, &table->cmd_eq.core);
+err0:
+	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
 }
 
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
 {
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	struct mlx5_eq_table *table = dev->priv.eq_table;
 	int err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (MLX5_CAP_GEN(dev, pg)) {
-		err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
-		if (err)
-			mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
-				      err);
-	}
-#endif
-
-	err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+	mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->pages_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
 			      err);
 
-	err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->async_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
 			      err);
+
 	mlx5_cmd_use_polling(dev);
 
-	err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->cmd_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
 			      err);
+
+	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 }
 
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-	u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+	return &dev->priv.eq_table->async_eq.core;
+}
 
-	MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
-	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+	synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+	synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
+		       struct mlx5_eq_param *param)
+{
+	struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+	int err;
+
+	if (!eq)
+		return ERR_PTR(-ENOMEM);
+
+	err = create_async_eq(dev, eq, param);
+	if (err) {
+		kvfree(eq);
+		eq = ERR_PTR(err);
+	}
+
+	return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	int err;
+
+	if (IS_ERR(eq))
+		return -EINVAL;
+
+	err = destroy_async_eq(dev, eq);
+	if (err)
+		goto out;
+
+	kvfree(eq);
+out:
+	return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+	u32 ci = eq->cons_index + cc;
+	struct mlx5_eqe *eqe;
+
+	eqe = get_eqe(eq, ci & (eq->nent - 1));
+	eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+	/* Make sure we read EQ entry contents after we've
+	 * checked the ownership bit.
+	 */
+	if (eqe)
+		dma_rmb();
+
+	return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+	u32 val;
+
+	eq->cons_index += cc;
+	val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+	__raw_writel((__force u32)cpu_to_be32(val), addr);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	wmb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_comp *eq, *n;
+
+	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+		list_del(&eq->list);
+		mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
+		if (destroy_unmap_eq(dev, &eq->core))
+			mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+				       eq->core.eqn);
+		tasklet_disable(&eq->tasklet_ctx.task);
+		kfree(eq);
+	}
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_comp *eq;
+	int ncomp_eqs;
+	int nent;
+	int err;
+	int i;
+
+	INIT_LIST_HEAD(&table->comp_eqs_list);
+	ncomp_eqs = table->num_comp_eqs;
+	nent = MLX5_COMP_EQ_SIZE;
+	for (i = 0; i < ncomp_eqs; i++) {
+		int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
+		struct mlx5_eq_param param = {};
+
+		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+		if (!eq) {
+			err = -ENOMEM;
+			goto clean;
+		}
+
+		INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+		INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+		spin_lock_init(&eq->tasklet_ctx.lock);
+		tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+			     (unsigned long)&eq->tasklet_ctx);
+
+		eq->irq_nb.notifier_call = mlx5_eq_comp_int;
+		param = (struct mlx5_eq_param) {
+			.irq_index = vecidx,
+			.nent = nent,
+		};
+		err = create_map_eq(dev, &eq->core, &param);
+		if (err) {
+			kfree(eq);
+			goto clean;
+		}
+		err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+		if (err) {
+			destroy_unmap_eq(dev, &eq->core);
+			kfree(eq);
+			goto clean;
+		}
+
+		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+		/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+		list_add_tail(&eq->list, &table->comp_eqs_list);
+	}
+
+	return 0;
+
+clean:
+	destroy_comp_eqs(dev);
+	return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+		    unsigned int *irqn)
+{
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_comp *eq, *n;
+	int err = -ENOENT;
+	int i = 0;
+
+	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+		if (i++ == vector) {
+			*eqn = eq->core.eqn;
+			*irqn = eq->core.irqn;
+			err = 0;
+			break;
+		}
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+	return dev->priv.eq_table->num_comp_eqs;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+	int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+	return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+					  vecidx);
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+	return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
+}
+#endif
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_comp *eq;
+
+	list_for_each_entry(eq, &table->comp_eqs_list, list) {
+		if (eq->core.eqn == eqn)
+			return eq;
+	}
+
+	return ERR_PTR(-ENOENT);
 }
 
 /* This function should only be called after mlx5_cmd_force_teardown_hca */
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	struct mlx5_eq *eq;
+	struct mlx5_eq_table *table = dev->priv.eq_table;
 
-#ifdef CONFIG_RFS_ACCEL
-	if (dev->rmap) {
-		free_irq_cpu_rmap(dev->rmap);
-		dev->rmap = NULL;
-	}
-#endif
-	list_for_each_entry(eq, &table->comp_eqs_list, list)
-		free_irq(eq->irqn, eq);
-
-	free_irq(table->pages_eq.irqn, &table->pages_eq);
-	free_irq(table->async_eq.irqn, &table->async_eq);
-	free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (MLX5_CAP_GEN(dev, pg))
-		free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
-	pci_free_irq_vectors(dev->pdev);
+	mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+	mlx5_irq_table_destroy(dev);
+	mutex_unlock(&table->lock);
 }
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+	int err;
+
+	eq_table->num_comp_eqs =
+		mlx5_irq_get_num_comp(eq_table->irq_table);
+
+	err = create_async_eqs(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to create async EQs\n");
+		goto err_async_eqs;
+	}
+
+	err = create_comp_eqs(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to create completion EQs\n");
+		goto err_comp_eqs;
+	}
+
+	return 0;
+err_comp_eqs:
+	destroy_async_eqs(dev);
+err_async_eqs:
+	return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+	destroy_comp_eqs(dev);
+	destroy_async_eqs(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+	struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+	return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+	struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+	return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
+}
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ea7dedc..60fddf8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

@@ -36,10 +36,10 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "eswitch.h"
 #include "fs_core.h"
-
-#define UPLINK_VPORT 0xFFFF
+#include "ecpf.h"
 
 enum {
 	MLX5_ACTION_NONE = 0,
@@ -51,23 +51,34 @@
 struct vport_addr {
 	struct l2addr_node     node;
 	u8                     action;
-	u32                    vport;
+	u16                    vport;
 	struct mlx5_flow_handle *flow_rule;
 	bool mpfs; /* UC MAC was added to MPFs */
 	/* A flag indicating that mac was added due to mc promiscuous vport */
 	bool mc_promisc;
 };
 
-enum {
-	UC_ADDR_CHANGE = BIT(0),
-	MC_ADDR_CHANGE = BIT(1),
-	PROMISC_CHANGE = BIT(3),
-};
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw);
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw);
 
-/* Vport context events */
-#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
-			    MC_ADDR_CHANGE | \
-			    PROMISC_CHANGE)
+struct mlx5_vport *__must_check
+mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	u16 idx;
+
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+		return ERR_PTR(-EPERM);
+
+	idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+	if (idx > esw->total_vports - 1) {
+		esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n",
+			  vport_num, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &esw->vports[idx];
+}
 
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 					u32 events_mask)
@@ -80,20 +91,19 @@
 		 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
 				     in, nic_vport_context);
 
 	MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
 
-	if (events_mask & UC_ADDR_CHANGE)
+	if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE)
 		MLX5_SET(nic_vport_context, nic_vport_ctx,
 			 event_on_uc_address_change, 1);
-	if (events_mask & MC_ADDR_CHANGE)
+	if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE)
 		MLX5_SET(nic_vport_context, nic_vport_ctx,
 			 event_on_mc_address_change, 1);
-	if (events_mask & PROMISC_CHANGE)
+	if (events_mask & MLX5_VPORT_PROMISC_CHANGE)
 		MLX5_SET(nic_vport_context, nic_vport_ctx,
 			 event_on_promisc_change, 1);
 
@@ -109,12 +119,35 @@
 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
 	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
-static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					  void *in, int inlen)
+{
+	return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+				       void *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+	MLX5_SET(query_esw_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+	MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					 void *out, int outlen)
+{
+	return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 				  u16 vlan, u8 qos, u8 set_flags)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
@@ -151,7 +184,7 @@
 
 /* E-Switch FDB */
 static struct mlx5_flow_handle *
-__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
 			 u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
 	int match_header = (is_zero_ether_addr(mac_c) ? 0 :
@@ -187,7 +220,7 @@
 					misc_parameters);
 		mc_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 					misc_parameters);
-		MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+		MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK);
 		MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
 	}
 
@@ -214,7 +247,7 @@
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 
@@ -223,7 +256,7 @@
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 	u8 mac_v[ETH_ALEN];
@@ -236,7 +269,7 @@
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 	u8 mac_v[ETH_ALEN];
@@ -246,6 +279,37 @@
 	return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
+enum {
+	LEGACY_VEPA_PRIO = 0,
+	LEGACY_FDB_PRIO,
+};
+
+static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb;
+	int err;
+
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* num FTE 2, num FG 2 */
+	fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO,
+						  2, 2, 0, 0);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
+		return err;
+	}
+	esw->fdb_table.legacy.vepa_fdb = fdb;
+
+	return 0;
+}
+
 static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -263,7 +327,7 @@
 	esw_debug(dev, "Create FDB log_max_size(%d)\n",
 		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
 		return -EOPNOTSUPP;
@@ -274,8 +338,8 @@
 		return -ENOMEM;
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-
 	ft_attr.max_fte = table_size;
+	ft_attr.prio = LEGACY_FDB_PRIO;
 	fdb = mlx5_create_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
@@ -334,41 +398,96 @@
 	esw->fdb_table.legacy.promisc_grp = g;
 
 out:
-	if (err) {
-		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
-			esw->fdb_table.legacy.allmulti_grp = NULL;
-		}
-		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
-			mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
-			esw->fdb_table.legacy.addr_grp = NULL;
-		}
-		if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) {
-			mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
-			esw->fdb_table.legacy.fdb = NULL;
-		}
-	}
+	if (err)
+		esw_destroy_legacy_fdb_table(esw);
 
 	kvfree(flow_group_in);
 	return err;
 }
 
+static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+	esw_debug(esw->dev, "Destroy VEPA Table\n");
+	if (!esw->fdb_table.legacy.vepa_fdb)
+		return;
+
+	mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
+	esw->fdb_table.legacy.vepa_fdb = NULL;
+}
+
 static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 {
+	esw_debug(esw->dev, "Destroy FDB Table\n");
 	if (!esw->fdb_table.legacy.fdb)
 		return;
 
-	esw_debug(esw->dev, "Destroy FDB Table\n");
-	mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
-	mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+	if (esw->fdb_table.legacy.promisc_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+	if (esw->fdb_table.legacy.allmulti_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+	if (esw->fdb_table.legacy.addr_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
 	mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+
 	esw->fdb_table.legacy.fdb = NULL;
 	esw->fdb_table.legacy.addr_grp = NULL;
 	esw->fdb_table.legacy.allmulti_grp = NULL;
 	esw->fdb_table.legacy.promisc_grp = NULL;
 }
 
+static int esw_create_legacy_table(struct mlx5_eswitch *esw)
+{
+	int err;
+
+	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+
+	err = esw_create_legacy_vepa_table(esw);
+	if (err)
+		return err;
+
+	err = esw_create_legacy_fdb_table(esw);
+	if (err)
+		esw_destroy_legacy_vepa_table(esw);
+
+	return err;
+}
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+					MLX5_VPORT_MC_ADDR_CHANGE | \
+					MLX5_VPORT_PROMISC_CHANGE)
+
+static int esw_legacy_enable(struct mlx5_eswitch *esw)
+{
+	int ret;
+
+	ret = esw_create_legacy_table(esw);
+	if (ret)
+		return ret;
+
+	mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
+	return 0;
+}
+
+static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
+{
+	esw_cleanup_vepa_rules(esw);
+	esw_destroy_legacy_fdb_table(esw);
+	esw_destroy_legacy_vepa_table(esw);
+}
+
+static void esw_legacy_disable(struct mlx5_eswitch *esw)
+{
+	struct esw_mc_addr *mc_promisc;
+
+	mlx5_eswitch_disable_pf_vf_vports(esw);
+
+	mc_promisc = &esw->mc_promisc;
+	if (mc_promisc->uplink_rule)
+		mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+	esw_destroy_legacy_table(esw);
+}
+
 /* E-Switch vport UC/MC lists management */
 typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
 				 struct vport_addr *vaddr);
@@ -376,19 +495,19 @@
 static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 	int err;
 
-	/* Skip mlx5_mpfs_add_mac for PFs,
-	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	/* Skip mlx5_mpfs_add_mac for eswitch_managers,
+	 * it is already done by its netdev in mlx5e_execute_l2_action
 	 */
-	if (!vport)
+	if (esw->manager_vport == vport)
 		goto fdb_add;
 
 	err = mlx5_mpfs_add_mac(esw->dev, mac);
 	if (err) {
 		esw_warn(esw->dev,
-			 "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+			 "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n",
 			 mac, vport, err);
 		return err;
 	}
@@ -396,7 +515,7 @@
 
 fdb_add:
 	/* SRIOV is enabled: Forward UC MAC to vport */
-	if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+	if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
 		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
 	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
@@ -408,13 +527,13 @@
 static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 	int err = 0;
 
-	/* Skip mlx5_mpfs_del_mac for PFs,
-	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	/* Skip mlx5_mpfs_del_mac for eswitch managerss,
+	 * it is already done by its netdev in mlx5e_execute_l2_action
 	 */
-	if (!vport || !vaddr->mpfs)
+	if (!vaddr->mpfs || esw->manager_vport == vport)
 		goto fdb_del;
 
 	err = mlx5_mpfs_del_mac(esw->dev, mac);
@@ -437,17 +556,18 @@
 				   struct esw_mc_addr *esw_mc)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport_idx = 0;
+	struct mlx5_vport *vport;
+	u16 i, vport_num;
 
-	for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
-		struct mlx5_vport *vport = &esw->vports[vport_idx];
+	mlx5_esw_for_all_vports(esw, i, vport) {
 		struct hlist_head *vport_hash = vport->mc_list;
 		struct vport_addr *iter_vaddr =
 					l2addr_hash_find(vport_hash,
 							 mac,
 							 struct vport_addr);
+		vport_num = vport->vport;
 		if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
-		    vaddr->vport == vport_idx)
+		    vaddr->vport == vport_num)
 			continue;
 		switch (vaddr->action) {
 		case MLX5_ACTION_ADD:
@@ -459,14 +579,14 @@
 			if (!iter_vaddr) {
 				esw_warn(esw->dev,
 					 "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
-					 mac, vport_idx);
+					 mac, vport_num);
 				continue;
 			}
-			iter_vaddr->vport = vport_idx;
+			iter_vaddr->vport = vport_num;
 			iter_vaddr->flow_rule =
 					esw_fdb_set_vport_rule(esw,
 							       mac,
-							       vport_idx);
+							       vport_num);
 			iter_vaddr->mc_promisc = true;
 			break;
 		case MLX5_ACTION_DEL:
@@ -484,7 +604,7 @@
 	struct hlist_head *hash = esw->mc_table;
 	struct esw_mc_addr *esw_mc;
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 
 	if (!esw->fdb_table.legacy.fdb)
 		return 0;
@@ -498,7 +618,7 @@
 		return -ENOMEM;
 
 	esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
-		esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+		esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK);
 
 	/* Add this multicast mac to all the mc promiscuous vports */
 	update_allmulti_vports(esw, vaddr, esw_mc);
@@ -524,7 +644,7 @@
 	struct hlist_head *hash = esw->mc_table;
 	struct esw_mc_addr *esw_mc;
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 
 	if (!esw->fdb_table.legacy.fdb)
 		return 0;
@@ -563,9 +683,8 @@
 
 /* Apply vport UC/MC list to HW l2 table and FDB table */
 static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
-				      u32 vport_num, int list_type)
+				      struct mlx5_vport *vport, int list_type)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
 	vport_addr_action vport_addr_add;
 	vport_addr_action vport_addr_del;
@@ -598,9 +717,8 @@
 
 /* Sync vport UC/MC list from vport context */
 static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
-				       u32 vport_num, int list_type)
+				       struct mlx5_vport *vport, int list_type)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
 	u8 (*mac_list)[ETH_ALEN];
 	struct l2addr_node *node;
@@ -629,12 +747,12 @@
 	if (!vport->enabled)
 		goto out;
 
-	err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
+	err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type,
 					    mac_list, &size);
 	if (err)
 		goto out;
 	esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
-		  vport_num, is_uc ? "UC" : "MC", size);
+		  vport->vport, is_uc ? "UC" : "MC", size);
 
 	for (i = 0; i < size; i++) {
 		if (is_uc && !is_valid_ether_addr(mac_list[i]))
@@ -672,10 +790,10 @@
 		if (!addr) {
 			esw_warn(esw->dev,
 				 "Failed to add MAC(%pM) to vport[%d] DB\n",
-				 mac_list[i], vport_num);
+				 mac_list[i], vport->vport);
 			continue;
 		}
-		addr->vport = vport_num;
+		addr->vport = vport->vport;
 		addr->action = MLX5_ACTION_ADD;
 	}
 out:
@@ -685,9 +803,9 @@
 /* Sync vport UC/MC list from vport context
  * Must be called after esw_update_vport_addr_list
  */
-static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw,
+					struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	struct l2addr_node *node;
 	struct vport_addr *addr;
 	struct hlist_head *hash;
@@ -710,32 +828,32 @@
 		if (!addr) {
 			esw_warn(esw->dev,
 				 "Failed to add allmulti MAC(%pM) to vport[%d] DB\n",
-				 mac, vport_num);
+				 mac, vport->vport);
 			continue;
 		}
-		addr->vport = vport_num;
+		addr->vport = vport->vport;
 		addr->action = MLX5_ACTION_ADD;
 		addr->mc_promisc = true;
 	}
 }
 
 /* Apply vport rx mode to HW FDB table */
-static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw,
+				    struct mlx5_vport *vport,
 				    bool promisc, bool mc_promisc)
 {
 	struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 
 	if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
 		goto promisc;
 
 	if (mc_promisc) {
 		vport->allmulti_rule =
-				esw_fdb_set_vport_allmulti_rule(esw, vport_num);
+			esw_fdb_set_vport_allmulti_rule(esw, vport->vport);
 		if (!allmulti_addr->uplink_rule)
 			allmulti_addr->uplink_rule =
 				esw_fdb_set_vport_allmulti_rule(esw,
-								UPLINK_VPORT);
+								MLX5_VPORT_UPLINK);
 		allmulti_addr->refcnt++;
 	} else if (vport->allmulti_rule) {
 		mlx5_del_flow_rules(vport->allmulti_rule);
@@ -754,8 +872,8 @@
 		return;
 
 	if (promisc) {
-		vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
-								     vport_num);
+		vport->promisc_rule =
+			esw_fdb_set_vport_promisc_rule(esw, vport->vport);
 	} else if (vport->promisc_rule) {
 		mlx5_del_flow_rules(vport->promisc_rule);
 		vport->promisc_rule = NULL;
@@ -763,23 +881,23 @@
 }
 
 /* Sync vport rx mode from vport context */
-static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw,
+				     struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	int promisc_all = 0;
 	int promisc_uc = 0;
 	int promisc_mc = 0;
 	int err;
 
 	err = mlx5_query_nic_vport_promisc(esw->dev,
-					   vport_num,
+					   vport->vport,
 					   &promisc_uc,
 					   &promisc_mc,
 					   &promisc_all);
 	if (err)
 		return;
 	esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
-		  vport_num, promisc_all, promisc_mc);
+		  vport->vport, promisc_all, promisc_mc);
 
 	if (!vport->info.trusted || !vport->enabled) {
 		promisc_uc = 0;
@@ -787,7 +905,7 @@
 		promisc_all = 0;
 	}
 
-	esw_apply_vport_rx_mode(esw, vport_num, promisc_all,
+	esw_apply_vport_rx_mode(esw, vport, promisc_all,
 				(promisc_all || promisc_mc));
 }
 
@@ -797,32 +915,26 @@
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	u8 mac[ETH_ALEN];
 
-	mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+	mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
 	esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
 		  vport->vport, mac);
 
-	if (vport->enabled_events & UC_ADDR_CHANGE) {
-		esw_update_vport_addr_list(esw, vport->vport,
-					   MLX5_NVPRT_LIST_TYPE_UC);
-		esw_apply_vport_addr_list(esw, vport->vport,
-					  MLX5_NVPRT_LIST_TYPE_UC);
+	if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) {
+		esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC);
+		esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC);
 	}
 
-	if (vport->enabled_events & MC_ADDR_CHANGE) {
-		esw_update_vport_addr_list(esw, vport->vport,
-					   MLX5_NVPRT_LIST_TYPE_MC);
-	}
+	if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE)
+		esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC);
 
-	if (vport->enabled_events & PROMISC_CHANGE) {
-		esw_update_vport_rx_mode(esw, vport->vport);
+	if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) {
+		esw_update_vport_rx_mode(esw, vport);
 		if (!IS_ERR_OR_NULL(vport->allmulti_rule))
-			esw_update_vport_mc_promisc(esw, vport->vport);
+			esw_update_vport_mc_promisc(esw, vport);
 	}
 
-	if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) {
-		esw_apply_vport_addr_list(esw, vport->vport,
-					  MLX5_NVPRT_LIST_TYPE_MC);
-	}
+	if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE))
+		esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC);
 
 	esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
 	if (vport->enabled)
@@ -841,8 +953,8 @@
 	mutex_unlock(&esw->state_lock);
 }
 
-static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
-				       struct mlx5_vport *vport)
+int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_group *vlan_grp = NULL;
@@ -869,7 +981,7 @@
 		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -925,8 +1037,8 @@
 	return err;
 }
 
-static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
-					   struct mlx5_vport *vport)
+void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
+				    struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
 		mlx5_del_flow_rules(vport->egress.allowed_vlan);
@@ -938,8 +1050,8 @@
 	vport->egress.drop_rule = NULL;
 }
 
-static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
-					 struct mlx5_vport *vport)
+void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport)
 {
 	if (IS_ERR_OR_NULL(vport->egress.acl))
 		return;
@@ -955,8 +1067,8 @@
 	vport->egress.acl = NULL;
 }
 
-static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
-					struct mlx5_vport *vport)
+int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
+				 struct mlx5_vport *vport)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_core_dev *dev = esw->dev;
@@ -987,7 +1099,7 @@
 		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -1087,8 +1199,8 @@
 	return err;
 }
 
-static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
-					    struct mlx5_vport *vport)
+void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
+				     struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
 		mlx5_del_flow_rules(vport->ingress.drop_rule);
@@ -1098,10 +1210,12 @@
 
 	vport->ingress.drop_rule = NULL;
 	vport->ingress.allow_rule = NULL;
+
+	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
 }
 
-static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
-					  struct mlx5_vport *vport)
+void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
+				   struct mlx5_vport *vport)
 {
 	if (IS_ERR_OR_NULL(vport->ingress.acl))
 		return;
@@ -1133,13 +1247,6 @@
 	int err = 0;
 	u8 *smac_v;
 
-	if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
-		mlx5_core_warn(esw->dev,
-			       "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
-			       vport->vport);
-		return -EPERM;
-	}
-
 	esw_vport_cleanup_ingress_rules(esw, vport);
 
 	if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
@@ -1198,7 +1305,7 @@
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
@@ -1285,7 +1392,7 @@
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
@@ -1304,18 +1411,49 @@
 	return err;
 }
 
+static bool element_type_supported(struct mlx5_eswitch *esw, int type)
+{
+	const struct mlx5_core_dev *dev = esw->dev;
+
+	switch (type) {
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_TASR;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+	}
+	return false;
+}
+
 /* Vport QoS management */
-static int esw_create_tsar(struct mlx5_eswitch *esw)
+static void esw_create_tsar(struct mlx5_eswitch *esw)
 {
 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
 	struct mlx5_core_dev *dev = esw->dev;
+	__be32 *attr;
 	int err;
 
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
-		return 0;
+		return;
+
+	if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+		return;
 
 	if (esw->qos.enabled)
-		return -EEXIST;
+		return;
+
+	MLX5_SET(scheduling_context, tsar_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
 
 	err = mlx5_create_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
@@ -1323,11 +1461,10 @@
 						 &esw->qos.root_tsar_id);
 	if (err) {
 		esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
-		return err;
+		return;
 	}
 
 	esw->qos.enabled = true;
-	return 0;
 }
 
 static void esw_destroy_tsar(struct mlx5_eswitch *esw)
@@ -1346,11 +1483,11 @@
 	esw->qos.enabled = false;
 }
 
-static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
+static int esw_vport_enable_qos(struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
 				u32 initial_max_rate, u32 initial_bw_share)
 {
 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	struct mlx5_core_dev *dev = esw->dev;
 	void *vport_elem;
 	int err = 0;
@@ -1366,7 +1503,7 @@
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
 				  element_attributes);
-	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
 		 esw->qos.root_tsar_id);
 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
@@ -1379,7 +1516,7 @@
 						 &vport->qos.esw_tsar_ix);
 	if (err) {
 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
-			 vport_num, err);
+			 vport->vport, err);
 		return err;
 	}
 
@@ -1387,10 +1524,10 @@
 	return 0;
 }
 
-static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
+static void esw_vport_disable_qos(struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
-	int err = 0;
+	int err;
 
 	if (!vport->qos.enabled)
 		return;
@@ -1400,16 +1537,16 @@
 						  vport->qos.esw_tsar_ix);
 	if (err)
 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
-			 vport_num, err);
+			 vport->vport, err);
 
 	vport->qos.enabled = false;
 }
 
-static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
+static int esw_vport_qos_config(struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
 				u32 max_rate, u32 bw_share)
 {
 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-	struct mlx5_vport *vport = &esw->vports[vport_num];
 	struct mlx5_core_dev *dev = esw->dev;
 	void *vport_elem;
 	u32 bitmask = 0;
@@ -1425,7 +1562,7 @@
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
 				  element_attributes);
-	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
 		 esw->qos.root_tsar_id);
 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
@@ -1441,13 +1578,29 @@
 						 bitmask);
 	if (err) {
 		esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
-			 vport_num, err);
+			 vport->vport, err);
 		return err;
 	}
 
 	return 0;
 }
 
+int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
+			       u32 rate_mbps)
+{
+	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+	struct mlx5_vport *vport;
+
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+
+	return mlx5_modify_scheduling_element_cmd(esw->dev,
+						  SCHEDULING_HIERARCHY_E_SWITCH,
+						  ctx,
+						  vport->qos.esw_tsar_ix,
+						  MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
+}
+
 static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
 {
 	((u8 *)node_guid)[7] = mac[0];
@@ -1463,22 +1616,32 @@
 static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 				 struct mlx5_vport *vport)
 {
-	int vport_num = vport->vport;
+	u16 vport_num = vport->vport;
+	int flags;
 
-	if (!vport_num)
+	if (esw->manager_vport == vport_num)
 		return;
 
 	mlx5_modify_vport_admin_state(esw->dev,
 				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-				      vport_num,
+				      vport_num, 1,
 				      vport->info.link_state);
-	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
-	mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+
+	/* Host PF has its own mac/guid. */
+	if (vport_num) {
+		mlx5_modify_nic_vport_mac_address(esw->dev, vport_num,
+						  vport->info.mac);
+		mlx5_modify_nic_vport_node_guid(esw->dev, vport_num,
+						vport->info.node_guid);
+	}
+
+	flags = (vport->info.vlan || vport->info.qos) ?
+		SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
 	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
-			       (vport->info.vlan || vport->info.qos));
+			       flags);
 
 	/* Only legacy mode needs ACLs */
-	if (esw->mode == SRIOV_LEGACY) {
+	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		esw_vport_ingress_config(esw, vport);
 		esw_vport_egress_config(esw, vport);
 	}
@@ -1519,10 +1682,10 @@
 		mlx5_fc_destroy(dev, vport->egress.drop_counter);
 }
 
-static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
-			     int enable_events)
+static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+			     enum mlx5_eswitch_vport_event enabled_events)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
+	u16 vport_num = vport->vport;
 
 	mutex_lock(&esw->state_lock);
 	WARN_ON(vport->enabled);
@@ -1530,23 +1693,26 @@
 	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
 	/* Create steering drop counters for ingress and egress ACLs */
-	if (vport_num && esw->mode == SRIOV_LEGACY)
+	if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY)
 		esw_vport_create_drop_counters(vport);
 
 	/* Restore old vport configuration */
 	esw_apply_vport_conf(esw, vport);
 
 	/* Attach vport to the eswitch rate limiter */
-	if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate,
+	if (esw_vport_enable_qos(esw, vport, vport->info.max_rate,
 				 vport->qos.bw_share))
 		esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
 
 	/* Sync with current vport context */
-	vport->enabled_events = enable_events;
+	vport->enabled_events = enabled_events;
 	vport->enabled = true;
 
-	/* only PF is trusted by default */
-	if (!vport_num)
+	/* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
+	 * in smartNIC as it's a vport group manager.
+	 */
+	if (esw->manager_vport == vport_num ||
+	    (!vport_num && mlx5_core_is_ecpf(esw->dev)))
 		vport->info.trusted = true;
 
 	esw_vport_change_handle_locked(vport);
@@ -1556,9 +1722,10 @@
 	mutex_unlock(&esw->state_lock);
 }
 
-static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+static void esw_disable_vport(struct mlx5_eswitch *esw,
+			      struct mlx5_vport *vport)
 {
-	struct mlx5_vport *vport = &esw->vports[vport_num];
+	u16 vport_num = vport->vport;
 
 	if (!vport->enabled)
 		return;
@@ -1567,7 +1734,6 @@
 	/* Mark this vport as disabled to discard new events */
 	vport->enabled = false;
 
-	synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
 	/* Wait for current already scheduled events to complete */
 	flush_workqueue(esw->work_queue);
 	/* Disable events from this vport */
@@ -1579,11 +1745,12 @@
 	 */
 	esw_vport_change_handle_locked(vport);
 	vport->enabled_events = 0;
-	esw_vport_disable_qos(esw, vport_num);
-	if (vport_num && esw->mode == SRIOV_LEGACY) {
+	esw_vport_disable_qos(esw, vport);
+	if (esw->manager_vport != vport_num &&
+	    esw->mode == MLX5_ESWITCH_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
 					      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					      vport_num,
+					      vport_num, 1,
 					      MLX5_VPORT_ADMIN_STATE_DOWN);
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
@@ -1593,112 +1760,212 @@
 	mutex_unlock(&esw->state_lock);
 }
 
+static int eswitch_vport_event(struct notifier_block *nb,
+			       unsigned long type, void *data)
+{
+	struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+	struct mlx5_eqe *eqe = data;
+	struct mlx5_vport *vport;
+	u16 vport_num;
+
+	vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return NOTIFY_OK;
+
+	if (vport->enabled)
+		queue_work(esw->work_queue, &vport->vport_change_handler);
+
+	return NOTIFY_OK;
+}
+
+/**
+ * mlx5_esw_query_functions - Returns raw output about functions state
+ * @dev:	Pointer to device to query
+ *
+ * mlx5_esw_query_functions() allocates and returns functions changed
+ * raw output memory pointer from device on success. Otherwise returns ERR_PTR.
+ * Caller must free the memory using kvfree() when valid pointer is returned.
+ */
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
+	u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+	u32 *out;
+	int err;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(query_esw_functions_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	if (!err)
+		return out;
+
+	kvfree(out);
+	return ERR_PTR(err);
+}
+
+static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
+{
+	MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+	mlx5_eq_notifier_register(esw->dev, &esw->nb);
+
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+		MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+			     ESW_FUNCTIONS_CHANGED);
+		mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+	}
+}
+
+static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
+{
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+		mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+	mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
+	flush_workqueue(esw->work_queue);
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
+/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs
+ * whichever are present on the eswitch.
+ */
+void
+mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+				 enum mlx5_eswitch_vport_event enabled_events)
+{
+	struct mlx5_vport *vport;
+	int i;
 
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+	/* Enable PF vport */
+	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+	esw_enable_vport(esw, vport, enabled_events);
+
+	/* Enable ECPF vports */
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+		esw_enable_vport(esw, vport, enabled_events);
+	}
+
+	/* Enable VF vports */
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+		esw_enable_vport(esw, vport, enabled_events);
+}
+
+/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs
+ * whichever are previously enabled on the eswitch.
+ */
+void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i;
+
+	mlx5_esw_for_all_vports_reverse(esw, i, vport)
+		esw_disable_vport(esw, vport);
+}
+
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
 {
 	int err;
-	int i, enabled_events;
 
 	if (!ESW_ALLOWED(esw) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
-		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+		esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
 	}
 
 	if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+		esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
 
 	if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
+		esw_warn(esw->dev, "engress ACL is not supported by FW\n");
 
-	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+	esw_create_tsar(esw);
+
 	esw->mode = mode;
 
-	if (mode == SRIOV_LEGACY) {
-		err = esw_create_legacy_fdb_table(esw);
-	} else {
-		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_lag_update(esw->dev);
 
-		err = esw_offloads_init(esw, nvfs + 1);
+	if (mode == MLX5_ESWITCH_LEGACY) {
+		err = esw_legacy_enable(esw);
+	} else {
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+		err = esw_offloads_enable(esw);
 	}
 
 	if (err)
 		goto abort;
 
-	err = esw_create_tsar(esw);
-	if (err)
-		esw_warn(esw->dev, "Failed to create eswitch TSAR");
+	mlx5_eswitch_event_handlers_register(esw);
 
-	/* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
-	 * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
-	 * 2. FDB/Eswitch is programmed by user space tools
-	 */
-	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
-	for (i = 0; i <= nvfs; i++)
-		esw_enable_vport(esw, i, enabled_events);
+	esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+		 mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+		 esw->esw_funcs.num_vfs, esw->enabled_vports);
 
-	esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
-		 esw->enabled_vports);
 	return 0;
 
 abort:
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 
-	if (mode == SRIOV_OFFLOADS)
+	if (mode == MLX5_ESWITCH_OFFLOADS) {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+	}
 
 	return err;
 }
 
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
 {
-	struct esw_mc_addr *mc_promisc;
 	int old_mode;
-	int nvports;
-	int i;
 
-	if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+	if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
 		return;
 
-	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
-		 esw->enabled_vports, esw->mode);
+	esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+		 esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+		 esw->esw_funcs.num_vfs, esw->enabled_vports);
 
-	mc_promisc = &esw->mc_promisc;
-	nvports = esw->enabled_vports;
+	mlx5_eswitch_event_handlers_unregister(esw);
 
-	for (i = 0; i < esw->total_vports; i++)
-		esw_disable_vport(esw, i);
-
-	if (mc_promisc && mc_promisc->uplink_rule)
-		mlx5_del_flow_rules(mc_promisc->uplink_rule);
+	if (esw->mode == MLX5_ESWITCH_LEGACY)
+		esw_legacy_disable(esw);
+	else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+		esw_offloads_disable(esw);
 
 	esw_destroy_tsar(esw);
 
-	if (esw->mode == SRIOV_LEGACY)
-		esw_destroy_legacy_fdb_table(esw);
-	else if (esw->mode == SRIOV_OFFLOADS)
-		esw_offloads_cleanup(esw, nvports);
-
 	old_mode = esw->mode;
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 
-	if (old_mode == SRIOV_OFFLOADS)
+	mlx5_lag_update(esw->dev);
+
+	if (old_mode == MLX5_ESWITCH_OFFLOADS) {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+	}
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
-	int total_vports = MLX5_TOTAL_VPORTS(dev);
 	struct mlx5_eswitch *esw;
-	int vport_num;
-	int err;
+	struct mlx5_vport *vport;
+	int total_vports;
+	int err, i;
 
-	if (!MLX5_ESWITCH_MANAGER(dev))
+	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
 
+	total_vports = mlx5_eswitch_get_total_vports(dev);
+
 	esw_info(dev,
 		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
 		 total_vports,
@@ -1710,6 +1977,8 @@
 		return -ENOMEM;
 
 	esw->dev = dev;
+	esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+	esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
 
 	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
 	if (!esw->work_queue) {
@@ -1724,33 +1993,30 @@
 		goto abort;
 	}
 
+	esw->total_vports = total_vports;
+
 	err = esw_offloads_init_reps(esw);
 	if (err)
 		goto abort;
 
+	mutex_init(&esw->offloads.encap_tbl_lock);
 	hash_init(esw->offloads.encap_tbl);
-	hash_init(esw->offloads.mod_hdr_tbl);
+	mutex_init(&esw->offloads.mod_hdr.lock);
+	hash_init(esw->offloads.mod_hdr.hlist);
+	atomic64_set(&esw->offloads.num_flows, 0);
 	mutex_init(&esw->state_lock);
 
-	for (vport_num = 0; vport_num < total_vports; vport_num++) {
-		struct mlx5_vport *vport = &esw->vports[vport_num];
-
-		vport->vport = vport_num;
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
 		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 		vport->dev = dev;
 		INIT_WORK(&vport->vport_change_handler,
 			  esw_vport_change_handler);
 	}
 
-	esw->total_vports = total_vports;
 	esw->enabled_vports = 0;
-	esw->mode = SRIOV_NONE;
+	esw->mode = MLX5_ESWITCH_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
-	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
-	else
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	return 0;
@@ -1765,7 +2031,7 @@
 
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 {
-	if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))
+	if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
 		return;
 
 	esw_info(esw->dev, "cleanup\n");
@@ -1773,52 +2039,31 @@
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
+	mutex_destroy(&esw->offloads.mod_hdr.lock);
+	mutex_destroy(&esw->offloads.encap_tbl_lock);
 	kfree(esw->vports);
 	kfree(esw);
 }
 
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
-	struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
-	u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
-	struct mlx5_vport *vport;
-
-	if (!esw) {
-		pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
-			vport_num);
-		return;
-	}
-
-	vport = &esw->vports[vport_num];
-	if (vport->enabled)
-		queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
 /* Vport Administration */
-#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
-
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
-			       int vport, u8 mac[ETH_ALEN])
+			       u16 vport, u8 mac[ETH_ALEN])
 {
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	u64 node_guid;
 	int err = 0;
 
-	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
-		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+	if (is_multicast_ether_addr(mac))
 		return -EINVAL;
 
 	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
 
-	if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
+	if (evport->info.spoofchk && !is_valid_ether_addr(mac))
 		mlx5_core_warn(esw->dev,
-			       "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
+			       "Set invalid MAC while spoofchk is on, vport(%d)\n",
 			       vport);
-		err = -EPERM;
-		goto unlock;
-	}
 
 	err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
 	if (err) {
@@ -1837,7 +2082,7 @@
 
 	ether_addr_copy(evport->info.mac, mac);
 	evport->info.node_guid = node_guid;
-	if (evport->enabled && esw->mode == SRIOV_LEGACY)
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
 
 unlock:
@@ -1846,22 +2091,21 @@
 }
 
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
-				 int vport, int link_state)
+				 u16 vport, int link_state)
 {
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
 
 	err = mlx5_modify_vport_admin_state(esw->dev,
 					    MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					    vport, link_state);
+					    vport, 1, link_state);
 	if (err) {
 		mlx5_core_warn(esw->dev,
 			       "Failed to set vport %d link state, err = %d",
@@ -1873,20 +2117,16 @@
 
 unlock:
 	mutex_unlock(&esw->state_lock);
-	return 0;
+	return err;
 }
 
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
-				  int vport, struct ifla_vf_info *ivi)
+				  u16 vport, struct ifla_vf_info *ivi)
 {
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 
-	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
-		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
-
-	evport = &esw->vports[vport];
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
 
 	memset(ivi, 0, sizeof(*ivi));
 	ivi->vf = vport - 1;
@@ -1906,65 +2146,70 @@
 }
 
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				  int vport, u16 vlan, u8 qos, u8 set_flags)
+				  u16 vport, u16 vlan, u8 qos, u8 set_flags)
 {
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+	if (vlan > 4095 || qos > 7)
 		return -EINVAL;
 
-	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
-
 	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
 	if (err)
-		goto unlock;
+		return err;
 
 	evport->info.vlan = vlan;
 	evport->info.qos = qos;
-	if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
 		err = esw_vport_ingress_config(esw, evport);
 		if (err)
-			goto unlock;
+			return err;
 		err = esw_vport_egress_config(esw, evport);
 	}
 
-unlock:
-	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
 int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				int vport, u16 vlan, u8 qos)
+				u16 vport, u16 vlan, u8 qos)
 {
 	u8 set_flags = 0;
+	int err;
 
 	if (vlan || qos)
 		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
 
-	return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+	mutex_lock(&esw->state_lock);
+	err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+	mutex_unlock(&esw->state_lock);
+
+	return err;
 }
 
 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
-				    int vport, bool spoofchk)
+				    u16 vport, bool spoofchk)
 {
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	bool pschk;
 	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
 	pschk = evport->info.spoofchk;
 	evport->info.spoofchk = spoofchk;
-	if (evport->enabled && esw->mode == SRIOV_LEGACY)
+	if (pschk && !is_valid_ether_addr(evport->info.mac))
+		mlx5_core_warn(esw->dev,
+			       "Spoofchk in set while MAC is invalid, vport(%d)\n",
+			       evport->vport);
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
 	if (err)
 		evport->info.spoofchk = pschk;
@@ -1973,18 +2218,139 @@
 	return err;
 }
 
-int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
-				 int vport, bool setting)
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
 {
-	struct mlx5_vport *evport;
+	if (esw->fdb_table.legacy.vepa_uplink_rule)
+		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
+
+	if (esw->fdb_table.legacy.vepa_star_rule)
+		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
+
+	esw->fdb_table.legacy.vepa_uplink_rule = NULL;
+	esw->fdb_table.legacy.vepa_star_rule = NULL;
+}
+
+static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
+					 u8 setting)
+{
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+	void *misc;
+
+	if (!setting) {
+		esw_cleanup_vepa_rules(esw);
+		return 0;
+	}
+
+	if (esw->fdb_table.legacy.vepa_uplink_rule)
+		return 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	/* Uplink rule forward uplink traffic to FDB */
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = esw->fdb_table.legacy.fdb;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto out;
+	} else {
+		esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
+	}
+
+	/* Star rule to forward all traffic to uplink vport */
+	memset(spec, 0, sizeof(*spec));
+	memset(&dest, 0, sizeof(dest));
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport.num = MLX5_VPORT_UPLINK;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto out;
+	} else {
+		esw->fdb_table.legacy.vepa_star_rule = flow_rule;
+	}
+
+out:
+	kvfree(spec);
+	if (err)
+		esw_cleanup_vepa_rules(esw);
+	return err;
+}
+
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+{
+	int err = 0;
+
+	if (!esw)
+		return -EOPNOTSUPP;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
 
 	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = _mlx5_eswitch_set_vepa_locked(esw, setting);
+
+out:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+{
+	int err = 0;
+
+	if (!esw)
+		return -EOPNOTSUPP;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+
+	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+
+out:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+				 u16 vport, bool setting)
+{
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+
+	mutex_lock(&esw->state_lock);
 	evport->info.trusted = setting;
 	if (evport->enabled)
 		esw_vport_change_handle_locked(evport);
@@ -2000,8 +2366,7 @@
 	u32 max_guarantee = 0;
 	int i;
 
-	for (i = 0; i < esw->total_vports; i++) {
-		evport = &esw->vports[i];
+	mlx5_esw_for_all_vports(esw, i, evport) {
 		if (!evport->enabled || evport->info.min_rate < max_guarantee)
 			continue;
 		max_guarantee = evport->info.min_rate;
@@ -2020,8 +2385,7 @@
 	int err;
 	int i;
 
-	for (i = 0; i < esw->total_vports; i++) {
-		evport = &esw->vports[i];
+	mlx5_esw_for_all_vports(esw, i, evport) {
 		if (!evport->enabled)
 			continue;
 		vport_min_rate = evport->info.min_rate;
@@ -2036,7 +2400,7 @@
 		if (bw_share == evport->qos.bw_share)
 			continue;
 
-		err = esw_vport_qos_config(esw, i, vport_max_rate,
+		err = esw_vport_qos_config(esw, evport, vport_max_rate,
 					   bw_share);
 		if (!err)
 			evport->qos.bw_share = bw_share;
@@ -2047,27 +2411,31 @@
 	return 0;
 }
 
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 				u32 max_rate, u32 min_rate)
 {
-	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
-	bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
-					fw_max_bw_share >= MLX5_MIN_BW_SHARE;
-	bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
-	struct mlx5_vport *evport;
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	u32 fw_max_bw_share;
 	u32 previous_min_rate;
 	u32 divider;
+	bool min_rate_supported;
+	bool max_rate_supported;
 	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+
+	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
 	if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&esw->state_lock);
-	evport = &esw->vports[vport];
 
 	if (min_rate == evport->info.min_rate)
 		goto set_max_rate;
@@ -2085,7 +2453,7 @@
 	if (max_rate == evport->info.max_rate)
 		goto unlock;
 
-	err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share);
+	err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
 	if (!err)
 		evport->info.max_rate = max_rate;
 
@@ -2095,16 +2463,15 @@
 }
 
 static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
-					       int vport_idx,
+					       struct mlx5_vport *vport,
 					       struct mlx5_vport_drop_stats *stats)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	struct mlx5_vport *vport = &esw->vports[vport_idx];
 	u64 rx_discard_vport_down, tx_discard_vport_down;
 	u64 bytes = 0;
 	int err = 0;
 
-	if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+	if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
 		return 0;
 
 	if (vport->egress.drop_counter)
@@ -2119,7 +2486,7 @@
 	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
 		return 0;
 
-	err = mlx5_query_vport_down_stats(dev, vport_idx,
+	err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
 					  &rx_discard_vport_down,
 					  &tx_discard_vport_down);
 	if (err)
@@ -2134,19 +2501,18 @@
 }
 
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
-				 int vport,
+				 u16 vport_num,
 				 struct ifla_vf_stats *vf_stats)
 {
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
 	struct mlx5_vport_drop_stats stats = {0};
 	int err = 0;
 	u32 *out;
 
-	if (!ESW_ALLOWED(esw))
-		return -EPERM;
-	if (!LEGAL_VPORT(esw, vport))
-		return -EINVAL;
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
 
 	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
@@ -2155,11 +2521,9 @@
 	MLX5_SET(query_vport_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
-	MLX5_SET(query_vport_counter_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+	MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport);
+	MLX5_SET(query_vport_counter_in, in, other_vport, 1);
 
-	memset(out, 0, outlen);
 	err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
 	if (err)
 		goto free_out;
@@ -2216,6 +2580,55 @@
 
 u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
 {
-	return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+	return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw;
+
+	esw = dev->priv.eswitch;
+	return ESW_ALLOWED(esw) ? esw->offloads.encap :
+		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
+{
+	if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+	     dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
+	    (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+	     dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
+		return true;
+
+	return false;
+}
+
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+			       struct mlx5_core_dev *dev1)
+{
+	return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+		dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+{
+	const u32 *out;
+
+	WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+
+	if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		esw->esw_funcs.num_vfs = num_vfs;
+		return;
+	}
+
+	out = mlx5_esw_query_functions(esw->dev);
+	if (IS_ERR(out))
+		return;
+
+	esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+					  host_params_context.host_num_of_vfs);
+	kvfree(out);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c17bfca..6bd6f58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

@@ -35,9 +35,11 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_link.h>
+#include <linux/atomic.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "lib/mpfs.h"
 
@@ -49,8 +51,6 @@
 #define MLX5_MAX_MC_PER_VPORT(dev) \
 	(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
 
-#define FDB_UPLINK_VPORT 0xffff
-
 #define MLX5_MIN_BW_SHARE 1
 
 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
@@ -59,12 +59,18 @@
 #define mlx5_esw_has_fwd_fdb(dev) \
 	MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
 
+#define FDB_MAX_CHAIN 3
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 16
+
 struct vport_ingress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allow_untagged_spoofchk_grp;
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
+	struct mlx5_modify_hdr   *modify_metadata;
+	struct mlx5_flow_handle  *modify_metadata_rule;
 	struct mlx5_flow_handle  *allow_rule;
 	struct mlx5_flow_handle  *drop_rule;
 	struct mlx5_fc           *drop_counter;
@@ -96,6 +102,13 @@
 	bool                    trusted;
 };
 
+/* Vport context events */
+enum mlx5_eswitch_vport_event {
+	MLX5_VPORT_UC_ADDR_CHANGE = BIT(0),
+	MLX5_VPORT_MC_ADDR_CHANGE = BIT(1),
+	MLX5_VPORT_PROMISC_CHANGE = BIT(3),
+};
+
 struct mlx5_vport {
 	struct mlx5_core_dev    *dev;
 	int                     vport;
@@ -117,9 +130,16 @@
 	} qos;
 
 	bool                    enabled;
-	u16                     enabled_events;
+	enum mlx5_eswitch_vport_event enabled_events;
 };
 
+enum offloads_fdb_flags {
+	ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+};
+
+extern const unsigned int ESW_POOLS[4];
+
+#define PRIO_LEVELS 2
 struct mlx5_eswitch_fdb {
 	union {
 		struct legacy_fdb {
@@ -127,30 +147,50 @@
 			struct mlx5_flow_group *addr_grp;
 			struct mlx5_flow_group *allmulti_grp;
 			struct mlx5_flow_group *promisc_grp;
+			struct mlx5_flow_table *vepa_fdb;
+			struct mlx5_flow_handle *vepa_uplink_rule;
+			struct mlx5_flow_handle *vepa_star_rule;
 		} legacy;
 
 		struct offloads_fdb {
-			struct mlx5_flow_table *fast_fdb;
-			struct mlx5_flow_table *fwd_fdb;
+			struct mlx5_flow_namespace *ns;
 			struct mlx5_flow_table *slow_fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
+			struct mlx5_flow_group *peer_miss_grp;
+			struct mlx5_flow_handle **peer_miss_rules;
 			struct mlx5_flow_group *miss_grp;
 			struct mlx5_flow_handle *miss_rule_uni;
 			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
+
+			struct {
+				struct mlx5_flow_table *fdb;
+				u32 num_rules;
+			} fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
+			/* Protects fdb_prio table */
+			struct mutex fdb_prio_lock;
+
+			int fdb_left[ARRAY_SIZE(ESW_POOLS)];
 		} offloads;
 	};
+	u32 flags;
 };
 
 struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
 	struct mlx5_eswitch_rep *vport_reps;
+	struct list_head peer_flows;
+	struct mutex peer_mutex;
+	struct mutex encap_tbl_lock; /* protects encap_tbl */
 	DECLARE_HASHTABLE(encap_tbl, 8);
-	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	struct mod_hdr_tbl mod_hdr;
+	DECLARE_HASHTABLE(termtbl_tbl, 8);
+	struct mutex termtbl_mutex; /* protects termtbl hash */
+	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
 	u8 inline_mode;
-	u64 num_flows;
-	u8 encap;
+	atomic64_t num_flows;
+	enum devlink_eswitch_encap_mode encap;
 };
 
 /* E-Switch MC FDB table hash node */
@@ -160,19 +200,37 @@
 	u32                    refcnt;
 };
 
+struct mlx5_host_work {
+	struct work_struct	work;
+	struct mlx5_eswitch	*esw;
+};
+
+struct mlx5_esw_functions {
+	struct mlx5_nb		nb;
+	u16			num_vfs;
+};
+
+enum {
+	MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
+	struct mlx5_nb          nb;
+	/* legacy data structures */
 	struct mlx5_eswitch_fdb fdb_table;
 	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
+	struct esw_mc_addr mc_promisc;
+	/* end of legacy */
 	struct workqueue_struct *work_queue;
 	struct mlx5_vport       *vports;
+	u32 flags;
 	int                     total_vports;
 	int                     enabled_vports;
 	/* Synchronize between vport change events
 	 * and async SRIOV admin state changes
 	 */
 	struct mutex            state_lock;
-	struct esw_mc_addr	mc_promisc;
 
 	struct {
 		bool            enabled;
@@ -181,40 +239,85 @@
 
 	struct mlx5_esw_offload offloads;
 	int                     mode;
+	int                     nvports;
+	u16                     manager_vport;
+	u16                     first_host_vport;
+	struct mlx5_esw_functions esw_funcs;
 };
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_disable(struct mlx5_eswitch *esw);
+int esw_offloads_enable(struct mlx5_eswitch *esw);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
+				     struct mlx5_vport *vport);
+int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
+				 struct mlx5_vport *vport);
+void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
+				    struct mlx5_vport *vport);
+int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport);
+void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport);
+void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
+				   struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport);
+int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
+			       u32 rate_mbps);
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
-			       int vport, u8 mac[ETH_ALEN]);
+			       u16 vport, u8 mac[ETH_ALEN]);
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
-				 int vport, int link_state);
+				 u16 vport, int link_state);
 int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				int vport, u16 vlan, u8 qos);
+				u16 vport, u16 vlan, u8 qos);
 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
-				    int vport, bool spoofchk);
+				    u16 vport, bool spoofchk);
 int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
-				 int vport_num, bool setting);
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+				 u16 vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 				u32 max_rate, u32 min_rate);
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
-				  int vport, struct ifla_vf_info *ivi);
+				  u16 vport, struct ifla_vf_info *ivi);
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
-				 int vport,
+				 u16 vport,
 				 struct ifla_vf_stats *vf_stats);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					  void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+					 void *out, int outlen);
+
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
+struct mlx5_termtbl_handle;
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_spec *spec);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_table *ft,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_destination *dest,
+			      int num_dest);
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+			 struct mlx5_termtbl_handle *tt);
 
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
@@ -228,9 +331,23 @@
 mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
 				struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_esw_flow_attr *attr);
+
+bool
+mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
+
+u16
+mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
+
+u32
+mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
 
 struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
+				  struct mlx5_flow_destination *dest);
 
 enum {
 	SET_VLAN_STRIP	= BIT(0),
@@ -247,13 +364,17 @@
 /* current maximum for flow based vport multicasting */
 #define MLX5_MAX_FLOW_FWD_VPORTS 2
 
+enum {
+	MLX5_ESW_DEST_ENCAP         = BIT(0),
+	MLX5_ESW_DEST_ENCAP_VALID   = BIT(1),
+};
+
 struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *in_rep;
-	struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
-	struct mlx5_core_dev	*out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
 	struct mlx5_core_dev	*in_mdev;
+	struct mlx5_core_dev    *counter_dev;
 
-	int mirror_count;
+	int split_count;
 	int out_count;
 
 	int	action;
@@ -262,19 +383,35 @@
 	u8	vlan_prio[MLX5_FS_VLAN_DEPTH];
 	u8	total_vlan;
 	bool	vlan_handled;
-	u32	encap_id;
-	u32	mod_hdr_id;
-	u8	match_level;
+	struct {
+		u32 flags;
+		struct mlx5_eswitch_rep *rep;
+		struct mlx5_pkt_reformat *pkt_reformat;
+		struct mlx5_core_dev *mdev;
+		struct mlx5_termtbl_handle *termtbl;
+	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
+	struct  mlx5_modify_hdr *modify_hdr;
+	u8	inner_match_level;
+	u8	outer_match_level;
+	struct mlx5_fc *counter;
+	u32	chain;
+	u16	prio;
+	u32	dest_chain;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+				  struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
-int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+					 struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
+					struct netlink_ext_ack *extack);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -282,7 +419,7 @@
 int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				  int vport, u16 vlan, u8 qos, u8 set_flags);
+				  u16 vport, u16 vlan, u8 qos, u8 set_flags);
 
 static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
 						       u8 vlan_depth)
@@ -297,23 +434,189 @@
 		MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
 }
 
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
+			 struct mlx5_core_dev *dev1);
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+			       struct mlx5_core_dev *dev1);
+
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
-#define esw_info(dev, format, ...)				\
-	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+#define esw_info(__dev, format, ...)			\
+	dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
 
-#define esw_warn(dev, format, ...)				\
-	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+#define esw_warn(__dev, format, ...)			\
+	dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__)
 
 #define esw_debug(dev, format, ...)				\
 	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_ecpf_esw_manager(dev) ?
+		MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
+static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_ecpf_esw_manager(dev) ?
+		MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+}
+
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
+{
+	/* Ideally device should have the functions changed supported
+	 * capability regardless of it being ECPF or PF wherever such
+	 * event should be processed such as on eswitch manager device.
+	 * However, some ECPF based device might not have this capability
+	 * set. Hence OR for ECPF check to cover such device.
+	 */
+	return MLX5_CAP_ESW(dev, esw_functions_changed) ||
+	       mlx5_core_is_ecpf_esw_manager(dev);
+}
+
+static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
+{
+	/* Uplink always locate at the last element of the array.*/
+	return esw->total_vports - 1;
+}
+
+static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
+{
+	return esw->total_vports - 2;
+}
+
+static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
+						  u16 vport_num)
+{
+	if (vport_num == MLX5_VPORT_ECPF) {
+		if (!mlx5_ecpf_vport_exists(esw->dev))
+			esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
+		return mlx5_eswitch_ecpf_idx(esw);
+	}
+
+	if (vport_num == MLX5_VPORT_UPLINK)
+		return mlx5_eswitch_uplink_idx(esw);
+
+	return vport_num;
+}
+
+static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
+						  int index)
+{
+	if (index == mlx5_eswitch_ecpf_idx(esw) &&
+	    mlx5_ecpf_vport_exists(esw->dev))
+		return MLX5_VPORT_ECPF;
+
+	if (index == mlx5_eswitch_uplink_idx(esw))
+		return MLX5_VPORT_UPLINK;
+
+	return index;
+}
+
+/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
+/* The vport getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_vports(esw, i, vport)		\
+	for ((i) = MLX5_VPORT_PF;			\
+	     (vport) = &(esw)->vports[i],		\
+	     (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_all_vports_reverse(esw, i, vport)	\
+	for ((i) = (esw)->total_vports - 1;		\
+	     (vport) = &(esw)->vports[i],		\
+	     (i) >= MLX5_VPORT_PF; (i)--)
+
+#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)	\
+	for ((i) = MLX5_VPORT_FIRST_VF;			\
+	     (vport) = &(esw)->vports[(i)],		\
+	     (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs)	\
+	for ((i) = (nvfs);					\
+	     (vport) = &(esw)->vports[(i)],			\
+	     (i) >= MLX5_VPORT_FIRST_VF; (i)--)
+
+/* The rep getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_reps(esw, i, rep)			\
+	for ((i) = MLX5_VPORT_PF;				\
+	     (rep) = &(esw)->offloads.vport_reps[i],		\
+	     (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs)		\
+	for ((i) = MLX5_VPORT_FIRST_VF;				\
+	     (rep) = &(esw)->offloads.vport_reps[i],		\
+	     (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs)	\
+	for ((i) = (nvfs);					\
+	     (rep) = &(esw)->offloads.vport_reps[i],		\
+	     (i) >= MLX5_VPORT_FIRST_VF; (i)--)
+
+#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs)	\
+	for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs)	\
+	for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+
+/* Includes host PF (vport 0) if it's not esw manager. */
+#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs)	\
+	for ((i) = (esw)->first_host_vport;			\
+	     (rep) = &(esw)->offloads.vport_reps[i],		\
+	     (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs)	\
+	for ((i) = (nvfs);						\
+	     (rep) = &(esw)->offloads.vport_reps[i],			\
+	     (i) >= (esw)->first_host_vport; (i)--)
+
+#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs)	\
+	for ((vport) = (esw)->first_host_vport;			\
+	     (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs)	\
+	for ((vport) = (nvfs);						\
+	     (vport) >= (esw)->first_host_vport; (vport)--)
+
+struct mlx5_vport *__must_check
+mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
+void
+mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+				 enum mlx5_eswitch_vport_event enabled_events);
+void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
-static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
-static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline int  mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
+
+#define FDB_MAX_CHAIN 1
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 1
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3028e8d..9004a07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

@@ -37,11 +37,103 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "rdma.h"
+#include "en.h"
+#include "fs_core.h"
+#include "lib/devcom.h"
+#include "lib/eq.h"
 
-enum {
-	FDB_FAST_PATH = 0,
-	FDB_SLOW_PATH
-};
+/* There are two match-all miss flows, one for unicast dst mac and
+ * one for multicast.
+ */
+#define MLX5_ESW_MISS_FLOWS (2)
+
+#define fdb_prio_table(esw, chain, prio, level) \
+	(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
+
+#define UPLINK_REP_INDEX 0
+
+static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
+						     u16 vport_num)
+{
+	int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+	WARN_ON(idx > esw->total_vports - 1);
+	return &esw->offloads.vport_reps[idx];
+}
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+
+bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
+{
+	return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
+}
+
+u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
+{
+	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+		return FDB_MAX_CHAIN;
+
+	return 0;
+}
+
+u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
+{
+	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+		return FDB_MAX_PRIO;
+
+	return 1;
+}
+
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+				  struct mlx5_flow_spec *spec,
+				  struct mlx5_esw_flow_attr *attr)
+{
+	void *misc2;
+	void *misc;
+
+	/* Use metadata matching because vport is not represented by single
+	 * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+	 */
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+								   attr->in_rep->vport));
+
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+			MLX5_SET(fte_match_set_misc, misc,
+				 source_eswitch_owner_vhca_id,
+				 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+			MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+					 source_eswitch_owner_vhca_id);
+
+		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+	}
+
+	if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+	    attr->in_rep->vport == MLX5_VPORT_UPLINK)
+		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
 
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
@@ -49,21 +141,15 @@
 				struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
-	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_table *ft = NULL;
-	struct mlx5_fc *counter = NULL;
+	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+	bool split = !!(attr->split_count);
 	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_table *fdb;
 	int j, i = 0;
-	void *misc;
 
-	if (esw->mode != SRIOV_OFFLOADS)
+	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	if (attr->mirror_count)
-		ft = esw->fdb_table.offloads.fwd_fdb;
-	else
-		ft = esw->fdb_table.offloads.fast_fdb;
-
 	flow_act.action = attr->action;
 	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
@@ -81,66 +167,78 @@
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-		for (j = attr->mirror_count; j < attr->out_count; j++) {
-			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-			dest[i].vport.num = attr->out_rep[j]->vport;
-			dest[i].vport.vhca_id =
-				MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
-			dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+		if (attr->dest_chain) {
+			struct mlx5_flow_table *ft;
+
+			ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
+			if (IS_ERR(ft)) {
+				rule = ERR_CAST(ft);
+				goto err_create_goto_table;
+			}
+
+			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			dest[i].ft = ft;
 			i++;
+		} else {
+			for (j = attr->split_count; j < attr->out_count; j++) {
+				dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+				dest[i].vport.num = attr->dests[j].rep->vport;
+				dest[i].vport.vhca_id =
+					MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+				if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+					dest[i].vport.flags |=
+						MLX5_FLOW_DEST_VPORT_VHCA_ID;
+				if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+					flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+					flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
+					dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+					dest[i].vport.pkt_reformat =
+						attr->dests[j].pkt_reformat;
+				}
+				i++;
+			}
 		}
 	}
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-		counter = mlx5_fc_create(esw->dev, true);
-		if (IS_ERR(counter)) {
-			rule = ERR_CAST(counter);
-			goto err_counter_alloc;
-		}
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[i].counter = counter;
+		dest[i].counter_id = mlx5_fc_id(attr->counter);
 		i++;
 	}
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET(fte_match_set_misc, misc,
-			 source_eswitch_owner_vhca_id,
-			 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-				 source_eswitch_owner_vhca_id);
-
-	if (attr->match_level == MLX5_MATCH_NONE)
-		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-	else
-		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
-					      MLX5_MATCH_MISC_PARAMETERS;
-
-	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+	if (attr->outer_match_level != MLX5_MATCH_NONE)
+		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+	if (attr->inner_match_level != MLX5_MATCH_NONE)
 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-		flow_act.modify_id = attr->mod_hdr_id;
+		flow_act.modify_hdr = attr->modify_hdr;
 
-	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-		flow_act.encap_id = attr->encap_id;
+	fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
+	if (IS_ERR(fdb)) {
+		rule = ERR_CAST(fdb);
+		goto err_esw_get;
+	}
 
-	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+	if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec))
+		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+						     &flow_act, dest, i);
+	else
+		rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		goto err_add_rule;
 	else
-		esw->offloads.num_flows++;
+		atomic64_inc(&esw->offloads.num_flows);
 
 	return rule;
 
 err_add_rule:
-	mlx5_fc_destroy(esw->dev, counter);
-err_counter_alloc:
+	esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
+err_esw_get:
+	if (attr->dest_chain)
+		esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+err_create_goto_table:
 	return rule;
 }
 
@@ -150,49 +248,89 @@
 			  struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
-	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+	struct mlx5_flow_table *fast_fdb;
+	struct mlx5_flow_table *fwd_fdb;
 	struct mlx5_flow_handle *rule;
-	void *misc;
 	int i;
 
+	fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
+	if (IS_ERR(fast_fdb)) {
+		rule = ERR_CAST(fast_fdb);
+		goto err_get_fast;
+	}
+
+	fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
+	if (IS_ERR(fwd_fdb)) {
+		rule = ERR_CAST(fwd_fdb);
+		goto err_get_fwd;
+	}
+
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	for (i = 0; i < attr->mirror_count; i++) {
+	for (i = 0; i < attr->split_count; i++) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-		dest[i].vport.num = attr->out_rep[i]->vport;
+		dest[i].vport.num = attr->dests[i].rep->vport;
 		dest[i].vport.vhca_id =
-			MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
-		dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+			MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+		if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+			dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
+		}
 	}
 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+	dest[i].ft = fwd_fdb,
 	i++;
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+	mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET(fte_match_set_misc, misc,
-			 source_eswitch_owner_vhca_id,
-			 MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+	if (attr->outer_match_level != MLX5_MATCH_NONE)
+		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-				 source_eswitch_owner_vhca_id);
+	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 
-	if (attr->match_level == MLX5_MATCH_NONE)
-		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-	else
-		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
-					      MLX5_MATCH_MISC_PARAMETERS;
+	if (IS_ERR(rule))
+		goto add_err;
 
-	rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
-
-	if (!IS_ERR(rule))
-		esw->offloads.num_flows++;
+	atomic64_inc(&esw->offloads.num_flows);
 
 	return rule;
+add_err:
+	esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+err_get_fwd:
+	esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+err_get_fast:
+	return rule;
+}
+
+static void
+__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+			struct mlx5_flow_handle *rule,
+			struct mlx5_esw_flow_attr *attr,
+			bool fwd_rule)
+{
+	bool split = (attr->split_count > 0);
+	int i;
+
+	mlx5_del_flow_rules(rule);
+
+	/* unref the term table */
+	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+		if (attr->dests[i].termtbl)
+			mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+	}
+
+	atomic64_dec(&esw->offloads.num_flows);
+
+	if (fwd_rule)  {
+		esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+		esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+	} else {
+		esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
+		if (attr->dest_chain)
+			esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+	}
 }
 
 void
@@ -200,23 +338,25 @@
 				struct mlx5_flow_handle *rule,
 				struct mlx5_esw_flow_attr *attr)
 {
-	struct mlx5_fc *counter = NULL;
+	__mlx5_eswitch_del_rule(esw, rule, attr, false);
+}
 
-	counter = mlx5_flow_rule_counter(rule);
-	mlx5_del_flow_rules(rule);
-	mlx5_fc_destroy(esw->dev, counter);
-	esw->offloads.num_flows--;
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_esw_flow_attr *attr)
+{
+	__mlx5_eswitch_del_rule(esw, rule, attr, true);
 }
 
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 {
 	struct mlx5_eswitch_rep *rep;
-	int vf_vport, err = 0;
+	int i, err = 0;
 
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
-	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
-		rep = &esw->offloads.vport_reps[vf_vport];
-		if (!rep->rep_if[REP_ETH].valid)
+	mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+		if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
 			continue;
 
 		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -234,7 +374,7 @@
 	struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
 
 	in_rep  = attr->in_rep;
-	out_rep = attr->out_rep[0];
+	out_rep = attr->dests[0].rep;
 
 	if (push)
 		vport = in_rep;
@@ -255,17 +395,17 @@
 		goto out_notsupp;
 
 	in_rep  = attr->in_rep;
-	out_rep = attr->out_rep[0];
+	out_rep = attr->dests[0].rep;
 
-	if (push && in_rep->vport == FDB_UPLINK_VPORT)
+	if (push && in_rep->vport == MLX5_VPORT_UPLINK)
 		goto out_notsupp;
 
-	if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+	if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
 		goto out_notsupp;
 
 	/* vport has vlan push configured, can't offload VF --> wire rules w.o it */
 	if (!push && !pop && fwd)
-		if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+		if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
 			goto out_notsupp;
 
 	/* protects against (1) setting rules with different vlans to push and
@@ -294,11 +434,14 @@
 
 	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
-	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+	fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+		   !attr->dest_chain);
+
+	mutex_lock(&esw->state_lock);
 
 	err = esw_add_vlan_action_check(attr, push, pop, fwd);
 	if (err)
-		return err;
+		goto unlock;
 
 	attr->vlan_handled = false;
 
@@ -306,16 +449,16 @@
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 			vport->vlan_refcount++;
 			attr->vlan_handled = true;
 		}
 
-		return 0;
+		goto unlock;
 	}
 
 	if (!push && !pop)
-		return 0;
+		goto unlock;
 
 	if (!(offloads->vlan_push_pop_refcount)) {
 		/* it's the 1st vlan rule, apply global vlan pop policy */
@@ -340,6 +483,8 @@
 out:
 	if (!err)
 		attr->vlan_handled = true;
+unlock:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
@@ -362,14 +507,16 @@
 	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
 
+	mutex_lock(&esw->state_lock);
+
 	vport = esw_vlan_action_get_vport(attr, push, pop);
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
 			vport->vlan_refcount--;
 
-		return 0;
+		goto out;
 	}
 
 	if (push) {
@@ -387,17 +534,19 @@
 skip_unset_push:
 	offloads->vlan_push_pop_refcount--;
 	if (offloads->vlan_push_pop_refcount)
-		return 0;
+		goto out;
 
 	/* no more vlan rules, stop global vlan pop policy */
 	err = esw_set_global_vlan_pop(esw, 0);
 
 out:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
 struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
+				    u32 sqn)
 {
 	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest = {};
@@ -413,7 +562,8 @@
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
-	MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+	/* source vport is the esw manager */
+	MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -439,6 +589,204 @@
 	mlx5_del_flow_rules(rule);
 }
 
+static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+	u8 fdb_to_vport_reg_c_id;
+	int err;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+		return 0;
+
+	err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+						   out, sizeof(out));
+	if (err)
+		return err;
+
+	fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+					 esw_vport_context.fdb_to_vport_reg_c_id);
+
+	if (enable)
+		fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+	else
+		fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 field_select.fdb_to_vport_reg_c_id, 1);
+
+	return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+						     in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+				  struct mlx5_core_dev *peer_dev,
+				  struct mlx5_flow_spec *spec,
+				  struct mlx5_flow_destination *dest)
+{
+	void *misc;
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+
+		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+			 MLX5_CAP_GEN(peer_dev, vhca_id));
+
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+				 source_eswitch_owner_vhca_id);
+	}
+
+	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest->vport.num = peer_dev->priv.eswitch->manager_vport;
+	dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
+	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+}
+
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+					       struct mlx5_eswitch *peer_esw,
+					       struct mlx5_flow_spec *spec,
+					       u16 vport)
+{
+	void *misc;
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+								   vport));
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+	}
+}
+
+static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+				       struct mlx5_core_dev *peer_dev)
+{
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_handle **flows;
+	struct mlx5_flow_handle *flow;
+	struct mlx5_flow_spec *spec;
+	/* total vports is the same for both e-switches */
+	int nvports = esw->total_vports;
+	void *misc;
+	int err, i;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	peer_miss_rules_setup(esw, peer_dev, spec, &dest);
+
+	flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
+	if (!flows) {
+		err = -ENOMEM;
+		goto alloc_flows_err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    misc_parameters);
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+						   spec, MLX5_VPORT_PF);
+
+		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					   spec, &flow_act, &dest, 1);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto add_pf_flow_err;
+		}
+		flows[MLX5_VPORT_PF] = flow;
+	}
+
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
+		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					   spec, &flow_act, &dest, 1);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto add_ecpf_flow_err;
+		}
+		flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+	}
+
+	mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
+		esw_set_peer_miss_rule_source_port(esw,
+						   peer_dev->priv.eswitch,
+						   spec, i);
+
+		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					   spec, &flow_act, &dest, 1);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto add_vf_flow_err;
+		}
+		flows[i] = flow;
+	}
+
+	esw->fdb_table.offloads.peer_miss_rules = flows;
+
+	kvfree(spec);
+	return 0;
+
+add_vf_flow_err:
+	nvports = --i;
+	mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
+		mlx5_del_flow_rules(flows[i]);
+
+	if (mlx5_ecpf_vport_exists(esw->dev))
+		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+add_ecpf_flow_err:
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+add_pf_flow_err:
+	esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
+	kvfree(flows);
+alloc_flows_err:
+	kvfree(spec);
+	return err;
+}
+
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_handle **flows;
+	int i;
+
+	flows = esw->fdb_table.offloads.peer_miss_rules;
+
+	mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
+					       mlx5_core_max_vfs(esw->dev))
+		mlx5_del_flow_rules(flows[i]);
+
+	if (mlx5_ecpf_vport_exists(esw->dev))
+		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+
+	kvfree(flows);
+}
+
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_act flow_act = {0};
@@ -465,7 +813,7 @@
 	dmac_c[0] = 0x01;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-	dest.vport.num = 0;
+	dest.vport.num = esw->manager_vport;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
 	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
@@ -501,90 +849,211 @@
 
 #define ESW_OFFLOADS_NUM_GROUPS  4
 
-static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
+ * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via put/get_sz_to_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small
+ */
+#define ESW_SIZE (16 * 1024 * 1024)
+const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
+				    64 * 1024, 4 * 1024 };
+
+static int
+get_sz_from_pool(struct mlx5_eswitch *esw)
 {
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
-	struct mlx5_flow_table *fdb = NULL;
-	int esw_size, err = 0;
-	u32 flags = 0;
-	u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
-				MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+	int sz = 0, i;
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get FDB flow namespace\n");
-		err = -EOPNOTSUPP;
-		goto out_namespace;
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+		if (esw->fdb_table.offloads.fdb_left[i]) {
+			--esw->fdb_table.offloads.fdb_left[i];
+			sz = ESW_POOLS[i];
+			break;
+		}
 	}
 
-	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
-		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
-		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
-
-	esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
-			 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-
-	if (mlx5_esw_has_fwd_fdb(dev))
-		esw_size >>= 1;
-
-	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
-
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
-						  esw_size,
-						  ESW_OFFLOADS_NUM_GROUPS, 0,
-						  flags);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
-		goto out_namespace;
-	}
-	esw->fdb_table.offloads.fast_fdb = fdb;
-
-	if (!mlx5_esw_has_fwd_fdb(dev))
-		goto out_namespace;
-
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
-						  esw_size,
-						  ESW_OFFLOADS_NUM_GROUPS, 1,
-						  flags);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create fwd table err %d\n", err);
-		goto out_ft;
-	}
-	esw->fdb_table.offloads.fwd_fdb = fdb;
-
-	return err;
-
-out_ft:
-	mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
-out_namespace:
-	return err;
+	return sz;
 }
 
-static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+static void
+put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
 {
-	if (mlx5_esw_has_fwd_fdb(esw->dev))
-		mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
-	mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+		if (sz >= ESW_POOLS[i]) {
+			++esw->fdb_table.offloads.fdb_left[i];
+			break;
+		}
+	}
+}
+
+static struct mlx5_flow_table *
+create_next_size_table(struct mlx5_eswitch *esw,
+		       struct mlx5_flow_namespace *ns,
+		       u16 table_prio,
+		       int level,
+		       u32 flags)
+{
+	struct mlx5_flow_table *fdb;
+	int sz;
+
+	sz = get_sz_from_pool(esw);
+	if (!sz)
+		return ERR_PTR(-ENOSPC);
+
+	fdb = mlx5_create_auto_grouped_flow_table(ns,
+						  table_prio,
+						  sz,
+						  ESW_OFFLOADS_NUM_GROUPS,
+						  level,
+						  flags);
+	if (IS_ERR(fdb)) {
+		esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
+			 (int)PTR_ERR(fdb), table_prio, level, sz);
+		put_sz_to_pool(esw, sz);
+	}
+
+	return fdb;
+}
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_table *fdb = NULL;
+	struct mlx5_flow_namespace *ns;
+	int table_prio, l = 0;
+	u32 flags = 0;
+
+	if (chain == FDB_SLOW_PATH_CHAIN)
+		return esw->fdb_table.offloads.slow_fdb;
+
+	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
+
+	fdb = fdb_prio_table(esw, chain, prio, level).fdb;
+	if (fdb) {
+		/* take ref on earlier levels as well */
+		while (level >= 0)
+			fdb_prio_table(esw, chain, prio, level--).num_rules++;
+		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+		return fdb;
+	}
+
+	ns = mlx5_get_fdb_sub_ns(dev, chain);
+	if (!ns) {
+		esw_warn(dev, "Failed to get FDB sub namespace\n");
+		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
+
+	/* create earlier levels for correct fs_core lookup when
+	 * connecting tables
+	 */
+	for (l = 0; l <= level; l++) {
+		if (fdb_prio_table(esw, chain, prio, l).fdb) {
+			fdb_prio_table(esw, chain, prio, l).num_rules++;
+			continue;
+		}
+
+		fdb = create_next_size_table(esw, ns, table_prio, l, flags);
+		if (IS_ERR(fdb)) {
+			l--;
+			goto err_create_fdb;
+		}
+
+		fdb_prio_table(esw, chain, prio, l).fdb = fdb;
+		fdb_prio_table(esw, chain, prio, l).num_rules = 1;
+	}
+
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+	return fdb;
+
+err_create_fdb:
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+	if (l >= 0)
+		esw_put_prio_table(esw, chain, prio, l);
+
+	return fdb;
+}
+
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
+{
+	int l;
+
+	if (chain == FDB_SLOW_PATH_CHAIN)
+		return;
+
+	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
+
+	for (l = level; l >= 0; l--) {
+		if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
+			continue;
+
+		put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
+		mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
+		fdb_prio_table(esw, chain, prio, l).fdb = NULL;
+	}
+
+	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+}
+
+static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
+{
+	/* If lazy creation isn't supported, deref the fast path tables */
+	if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
+		esw_put_prio_table(esw, 0, 1, 1);
+		esw_put_prio_table(esw, 0, 1, 0);
+	}
 }
 
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+					   u32 *flow_group_in)
+{
+	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					    flow_group_in,
+					    match_criteria);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 match_criteria_enable,
+			 MLX5_MATCH_MISC_PARAMETERS_2);
+
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters_2.metadata_reg_c_0);
+	} else {
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 match_criteria_enable,
+			 MLX5_MATCH_MISC_PARAMETERS);
+
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters.source_port);
+	}
+}
+
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
+	u32 *flow_group_in, max_flow_counter;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb = NULL;
-	int table_size, ix, err = 0;
+	int table_size, ix, err = 0, i;
 	struct mlx5_flow_group *g;
+	u32 flags = 0, fdb_max;
 	void *match_criteria;
-	u32 *flow_group_in;
 	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -598,13 +1067,38 @@
 		err = -EOPNOTSUPP;
 		goto ns_err;
 	}
+	esw->fdb_table.offloads.ns = root_ns;
+	err = mlx5_flow_namespace_set_mode(root_ns,
+					   esw->dev->priv.steering->mode);
+	if (err) {
+		esw_warn(dev, "Failed to set FDB namespace steering mode\n");
+		goto ns_err;
+	}
 
-	err = esw_create_offloads_fast_fdb_table(esw);
-	if (err)
-		goto fast_fdb_err;
+	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
 
-	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
+		  fdb_max);
 
+	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
+		esw->fdb_table.offloads.fdb_left[i] =
+			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
+
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+		MLX5_ESW_MISS_FLOWS + esw->total_vports;
+
+	/* create the slow path fdb with encap set, so further table instances
+	 * can be created at run time while VFs are probed if the FW allows that.
+	 */
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	ft_attr.flags = flags;
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
 
@@ -616,8 +1110,19 @@
 	}
 	esw->fdb_table.offloads.slow_fdb = fdb;
 
+	/* If lazy creation isn't supported, open the fast path tables now */
+	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
+	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
+		esw_get_prio_table(esw, 0, 1, 0);
+		esw_get_prio_table(esw, 0, 1, 1);
+	} else {
+		esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
+		esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+	}
+
 	/* create send-to-vport group */
-	memset(flow_group_in, 0, inlen);
 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
 		 MLX5_MATCH_MISC_PARAMETERS);
 
@@ -638,6 +1143,36 @@
 	}
 	esw->fdb_table.offloads.send_to_vport_grp = g;
 
+	/* create peer esw miss group */
+	memset(flow_group_in, 0, inlen);
+
+	esw_set_flow_group_source_port(esw, flow_group_in);
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					      flow_group_in,
+					      match_criteria);
+
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters.source_eswitch_owner_vhca_id);
+
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 source_eswitch_owner_vhca_id_valid, 1);
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ix + esw->total_vports - 1);
+	ix += esw->total_vports;
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+		goto peer_miss_err;
+	}
+	esw->fdb_table.offloads.peer_miss_grp = g;
+
 	/* create miss group */
 	memset(flow_group_in, 0, inlen);
 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -649,7 +1184,8 @@
 	dmac[0] = 0x01;
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ix + MLX5_ESW_MISS_FLOWS);
 
 	g = mlx5_create_flow_group(fdb, flow_group_in);
 	if (IS_ERR(g)) {
@@ -663,18 +1199,22 @@
 	if (err)
 		goto miss_rule_err;
 
+	esw->nvports = nvports;
 	kvfree(flow_group_in);
 	return 0;
 
 miss_rule_err:
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 miss_err:
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+peer_miss_err:
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
+	esw_destroy_offloads_fast_fdb_tables(esw);
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 slow_fdb_err:
-	esw_destroy_offloads_fast_fdb_table(esw);
-fast_fdb_err:
+	/* Holds true only as long as DMFS is the default */
+	mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
 ns_err:
 	kvfree(flow_group_in);
 	return err;
@@ -682,20 +1222,24 @@
 
 static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 {
-	if (!esw->fdb_table.offloads.fast_fdb)
+	if (!esw->fdb_table.offloads.slow_fdb)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
 	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
 	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+	mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
-	esw_destroy_offloads_fast_fdb_table(esw);
+	esw_destroy_offloads_fast_fdb_tables(esw);
+	/* Holds true only as long as DMFS is the default */
+	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
+				     MLX5_FLOW_STEERING_MODE_DMFS);
 }
 
-static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
@@ -709,7 +1253,7 @@
 		return -EOPNOTSUPP;
 	}
 
-	ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+	ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
 
 	ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
 	if (IS_ERR(ft_offloads)) {
@@ -729,28 +1273,20 @@
 	mlx5_destroy_flow_table(offloads->ft_offloads);
 }
 
-static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_group *g;
-	struct mlx5_priv *priv = &esw->dev->priv;
 	u32 *flow_group_in;
-	void *match_criteria, *misc;
 	int err = 0;
-	int nvports = priv->sriov.num_vfs + 2;
 
+	nvports = nvports + MLX5_ESW_MISS_FLOWS;
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
 
 	/* create vport rx group */
-	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_MISC_PARAMETERS);
-
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-	misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	esw_set_flow_group_source_port(esw, flow_group_in);
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -775,10 +1311,10 @@
 }
 
 struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
+				  struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
@@ -789,19 +1325,28 @@
 		goto out;
 	}
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
 
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+		MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 
-	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-	dest.tir_num = tirn;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+	} else {
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	}
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
-					&flow_act, &dest, 1);
+					&flow_act, dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
 		goto out;
@@ -812,29 +1357,36 @@
 	return flow_rule;
 }
 
-static int esw_offloads_start(struct mlx5_eswitch *esw)
+static int esw_offloads_start(struct mlx5_eswitch *esw,
+			      struct netlink_ext_ack *extack)
 {
-	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+	int err, err1;
 
-	if (esw->mode != SRIOV_LEGACY) {
-		esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+	if (esw->mode != MLX5_ESWITCH_LEGACY &&
+	    !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't set offloads mode, SRIOV legacy not enabled");
 		return -EINVAL;
 	}
 
-	mlx5_eswitch_disable_sriov(esw);
-	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+	mlx5_eswitch_disable(esw);
+	mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
+	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
 	if (err) {
-		esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
-		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
-		if (err1)
-			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1);
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Failed setting eswitch to offloads");
+		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
+		if (err1) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Failed setting eswitch back to legacy");
+		}
 	}
 	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
 		if (mlx5_eswitch_inline_mode_get(esw,
-						 num_vfs,
 						 &esw->offloads.inline_mode)) {
 			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
-			esw_warn(esw->dev, "Inline mode is different between vports\n");
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Inline mode is different between vports");
 		}
 	}
 	return err;
@@ -847,88 +1399,189 @@
 
 int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 {
-	int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_esw_offload *offloads;
+	int total_vports = esw->total_vports;
 	struct mlx5_eswitch_rep *rep;
-	u8 hw_id[ETH_ALEN];
-	int vport;
+	int vport_index;
+	u8 rep_type;
 
-	esw->offloads.vport_reps = kcalloc(total_vfs,
+	esw->offloads.vport_reps = kcalloc(total_vports,
 					   sizeof(struct mlx5_eswitch_rep),
 					   GFP_KERNEL);
 	if (!esw->offloads.vport_reps)
 		return -ENOMEM;
 
-	offloads = &esw->offloads;
-	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+	mlx5_esw_for_all_reps(esw, vport_index, rep) {
+		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
+		rep->vport_index = vport_index;
 
-	for (vport = 0; vport < total_vfs; vport++) {
-		rep = &offloads->vport_reps[vport];
-
-		rep->vport = vport;
-		ether_addr_copy(rep->hw_id, hw_id);
+		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+			atomic_set(&rep->rep_data[rep_type].state,
+				   REP_UNREGISTERED);
 	}
 
-	offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
-
 	return 0;
 }
 
-static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
-					  u8 rep_type)
+static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
+				      struct mlx5_eswitch_rep *rep, u8 rep_type)
 {
-	struct mlx5_eswitch_rep *rep;
-	int vport;
-
-	for (vport = nvports - 1; vport >= 0; vport--) {
-		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->rep_if[rep_type].valid)
-			continue;
-
-		rep->rep_if[rep_type].unload(rep);
-	}
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
+			   REP_LOADED, REP_REGISTERED) == REP_LOADED)
+		esw->offloads.rep_ops[rep_type]->unload(rep);
 }
 
-static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	struct mlx5_eswitch_rep *rep;
+
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
+	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+	__esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+				   u8 rep_type)
+{
+	struct mlx5_eswitch_rep *rep;
+	int i;
+
+	mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
 {
 	u8 rep_type = NUM_REP_TYPES;
 
 	while (rep_type-- > 0)
-		esw_offloads_unload_reps_type(esw, nvports, rep_type);
+		__unload_reps_vf_vport(esw, nvports, rep_type);
 }
 
-static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
-				       u8 rep_type)
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	__unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+
+	/* Special vports must be the last to unload. */
+	__unload_reps_special_vport(esw, rep_type);
+}
+
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
+{
+	u8 rep_type = NUM_REP_TYPES;
+
+	while (rep_type-- > 0)
+		__unload_reps_all_vport(esw, rep_type);
+}
+
+static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
+				   struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+	int err = 0;
+
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
+			   REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
+		err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
+		if (err)
+			atomic_set(&rep->rep_data[rep_type].state,
+				   REP_REGISTERED);
+	}
+
+	return err;
+}
+
+static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
-	int vport;
 	int err;
 
-	for (vport = 0; vport < nvports; vport++) {
-		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->rep_if[rep_type].valid)
-			continue;
+	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+	err = __esw_offloads_load_rep(esw, rep, rep_type);
+	if (err)
+		return err;
 
-		err = rep->rep_if[rep_type].load(esw->dev, rep);
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
 		if (err)
-			goto err_reps;
+			goto err_pf;
+	}
+
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
+		if (err)
+			goto err_ecpf;
 	}
 
 	return 0;
 
-err_reps:
-	esw_offloads_unload_reps_type(esw, vport, rep_type);
+err_ecpf:
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
+err_pf:
+	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+	__esw_offloads_unload_rep(esw, rep, rep_type);
 	return err;
 }
 
-static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+				u8 rep_type)
+{
+	struct mlx5_eswitch_rep *rep;
+	int err, i;
+
+	mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
+		if (err)
+			goto err_vf;
+	}
+
+	return 0;
+
+err_vf:
+	__unload_reps_vf_vport(esw, --i, rep_type);
+	return err;
+}
+
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	int err;
+
+	/* Special vports must be loaded first, uplink rep creates mdev resource. */
+	err = __load_reps_special_vport(esw, rep_type);
+	if (err)
+		return err;
+
+	err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+	if (err)
+		goto err_vfs;
+
+	return 0;
+
+err_vfs:
+	__unload_reps_special_vport(esw, rep_type);
+	return err;
+}
+
+static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
 {
 	u8 rep_type = 0;
 	int err;
 
 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
-		err = esw_offloads_load_reps_type(esw, nvports, rep_type);
+		err = __load_reps_vf_vport(esw, nvports, rep_type);
 		if (err)
 			goto err_reps;
 	}
@@ -937,34 +1590,478 @@
 
 err_reps:
 	while (rep_type-- > 0)
-		esw_offloads_unload_reps_type(esw, nvports, rep_type);
+		__unload_reps_vf_vport(esw, nvports, rep_type);
 	return err;
 }
 
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
+{
+	u8 rep_type = 0;
+	int err;
+
+	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+		err = __load_reps_all_vport(esw, rep_type);
+		if (err)
+			goto err_reps;
+	}
+
+	return err;
+
+err_reps:
+	while (rep_type-- > 0)
+		__unload_reps_all_vport(esw, rep_type);
+	return err;
+}
+
+#define ESW_OFFLOADS_DEVCOM_PAIR	(0)
+#define ESW_OFFLOADS_DEVCOM_UNPAIR	(1)
+
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+				  struct mlx5_eswitch *peer_esw)
 {
 	int err;
 
-	err = esw_create_offloads_fdb_tables(esw, nvports);
+	err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
 	if (err)
 		return err;
 
-	err = esw_create_offloads_table(esw);
-	if (err)
-		goto create_ft_err;
+	return 0;
+}
 
-	err = esw_create_vport_rx_group(esw);
-	if (err)
-		goto create_fg_err;
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+	mlx5e_tc_clean_fdb_peer_flows(esw);
+	esw_del_fdb_peer_miss_rules(esw);
+}
 
-	err = esw_offloads_load_reps(esw, nvports);
-	if (err)
-		goto err_reps;
+static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
+					 struct mlx5_eswitch *peer_esw,
+					 bool pair)
+{
+	struct mlx5_flow_root_namespace *peer_ns;
+	struct mlx5_flow_root_namespace *ns;
+	int err;
+
+	peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
+	ns = esw->dev->priv.steering->fdb_root_ns;
+
+	if (pair) {
+		err = mlx5_flow_namespace_set_peer(ns, peer_ns);
+		if (err)
+			return err;
+
+		err = mlx5_flow_namespace_set_peer(peer_ns, ns);
+		if (err) {
+			mlx5_flow_namespace_set_peer(ns, NULL);
+			return err;
+		}
+	} else {
+		mlx5_flow_namespace_set_peer(ns, NULL);
+		mlx5_flow_namespace_set_peer(peer_ns, NULL);
+	}
+
+	return 0;
+}
+
+static int mlx5_esw_offloads_devcom_event(int event,
+					  void *my_data,
+					  void *event_data)
+{
+	struct mlx5_eswitch *esw = my_data;
+	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+	struct mlx5_eswitch *peer_esw = event_data;
+	int err;
+
+	switch (event) {
+	case ESW_OFFLOADS_DEVCOM_PAIR:
+		if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+		    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+			break;
+
+		err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
+		if (err)
+			goto err_out;
+		err = mlx5_esw_offloads_pair(esw, peer_esw);
+		if (err)
+			goto err_peer;
+
+		err = mlx5_esw_offloads_pair(peer_esw, esw);
+		if (err)
+			goto err_pair;
+
+		mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+		break;
+
+	case ESW_OFFLOADS_DEVCOM_UNPAIR:
+		if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+			break;
+
+		mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+		mlx5_esw_offloads_unpair(peer_esw);
+		mlx5_esw_offloads_unpair(esw);
+		mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
+		break;
+	}
 
 	return 0;
 
-err_reps:
-	esw_destroy_vport_rx_group(esw);
+err_pair:
+	mlx5_esw_offloads_unpair(esw);
+err_peer:
+	mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
+err_out:
+	mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
+		      event, err);
+	return err;
+}
+
+static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+	INIT_LIST_HEAD(&esw->offloads.peer_flows);
+	mutex_init(&esw->offloads.peer_mutex);
+
+	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+		return;
+
+	mlx5_devcom_register_component(devcom,
+				       MLX5_DEVCOM_ESW_OFFLOADS,
+				       mlx5_esw_offloads_devcom_event,
+				       esw);
+
+	mlx5_devcom_send_event(devcom,
+			       MLX5_DEVCOM_ESW_OFFLOADS,
+			       ESW_OFFLOADS_DEVCOM_PAIR, esw);
+}
+
+static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+{
+	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+		return;
+
+	mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+			       ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
+
+	mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
+					     struct mlx5_vport *vport)
+{
+	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	/* For prio tag mode, there is only 1 FTEs:
+	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
+	 * required, allow
+	 * Unmatched traffic is allowed by default
+	 */
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto out_no_mem;
+	}
+
+	/* Untagged packets - push prio tag VLAN, allow */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+			  MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.vlan[0].ethtype = ETH_P_8021Q;
+	flow_act.vlan[0].vid = 0;
+	flow_act.vlan[0].prio = 0;
+
+	if (vport->ingress.modify_metadata_rule) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+		flow_act.modify_hdr = vport->ingress.modify_metadata;
+	}
+
+	vport->ingress.allow_rule =
+		mlx5_add_flow_rules(vport->ingress.acl, spec,
+				    &flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.allow_rule)) {
+		err = PTR_ERR(vport->ingress.allow_rule);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress untagged allow rule, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.allow_rule = NULL;
+		goto out;
+	}
+
+out:
+	kvfree(spec);
+out_no_mem:
+	if (err)
+		esw_vport_cleanup_ingress_rules(esw, vport);
+	return err;
+}
+
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+						     struct mlx5_vport *vport)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_flow_act flow_act = {};
+	int err = 0;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+	MLX5_SET(set_action_in, action, data,
+		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+	vport->ingress.modify_metadata =
+		mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+					 1, action);
+	if (IS_ERR(vport->ingress.modify_metadata)) {
+		err = PTR_ERR(vport->ingress.modify_metadata);
+		esw_warn(esw->dev,
+			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
+			 vport->vport, err);
+		return err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.modify_hdr = vport->ingress.modify_metadata;
+	vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+								  &spec, &flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+		err = PTR_ERR(vport->ingress.modify_metadata_rule);
+		esw_warn(esw->dev,
+			 "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.modify_metadata_rule = NULL;
+		goto out;
+	}
+
+out:
+	if (err)
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
+	return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport)
+{
+	if (vport->ingress.modify_metadata_rule) {
+		mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
+
+		vport->ingress.modify_metadata_rule = NULL;
+	}
+}
+
+static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
+					    struct mlx5_vport *vport)
+{
+	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	/* For prio tag mode, there is only 1 FTEs:
+	 * 1) prio tag packets - pop the prio tag VLAN, allow
+	 * Unmatched traffic is allowed by default
+	 */
+
+	esw_vport_cleanup_egress_rules(esw, vport);
+
+	err = esw_vport_enable_egress_acl(esw, vport);
+	if (err) {
+		mlx5_core_warn(esw->dev,
+			       "failed to enable egress acl (%d) on vport[%d]\n",
+			       err, vport->vport);
+		return err;
+	}
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto out_no_mem;
+	}
+
+	/* prio tag vlan rule - pop it so VF receives untagged packets */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0);
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+			  MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	vport->egress.allowed_vlan =
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    &flow_act, NULL, 0);
+	if (IS_ERR(vport->egress.allowed_vlan)) {
+		err = PTR_ERR(vport->egress.allowed_vlan);
+		esw_warn(esw->dev,
+			 "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n",
+			 vport->vport, err);
+		vport->egress.allowed_vlan = NULL;
+		goto out;
+	}
+
+out:
+	kvfree(spec);
+out_no_mem:
+	if (err)
+		esw_vport_cleanup_egress_rules(esw, vport);
+	return err;
+}
+
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+					   struct mlx5_vport *vport)
+{
+	int err;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	esw_vport_cleanup_ingress_rules(esw, vport);
+
+	err = esw_vport_enable_ingress_acl(esw, vport);
+	if (err) {
+		esw_warn(esw->dev,
+			 "failed to enable ingress acl (%d) on vport[%d]\n",
+			 err, vport->vport);
+		return err;
+	}
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure ingress rules\n", vport->vport);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+		if (err)
+			goto out;
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+	    mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+		err = esw_vport_ingress_prio_tag_config(esw, vport);
+		if (err)
+			goto out;
+	}
+
+out:
+	if (err)
+		esw_vport_disable_ingress_acl(esw, vport);
+	return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+	if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+		return false;
+
+	if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+	      MLX5_FDB_TO_VPORT_REG_C_0))
+		return false;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+		return false;
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+	    mlx5_ecpf_vport_exists(esw->dev))
+		return false;
+
+	return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i, j;
+	int err;
+
+	if (esw_check_vport_match_metadata_supported(esw))
+		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		err = esw_vport_ingress_common_config(esw, vport);
+		if (err)
+			goto err_ingress;
+
+		if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+			err = esw_vport_egress_prio_tag_config(esw, vport);
+			if (err)
+				goto err_egress;
+		}
+	}
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
+	return 0;
+
+err_egress:
+	esw_vport_disable_ingress_acl(esw, vport);
+err_ingress:
+	for (j = MLX5_VPORT_PF; j < i; j++) {
+		vport = &esw->vports[j];
+		esw_vport_disable_egress_acl(esw, vport);
+		esw_vport_disable_ingress_acl(esw, vport);
+	}
+
+	return err;
+}
+
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i;
+
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		esw_vport_disable_egress_acl(esw, vport);
+		esw_vport_disable_ingress_acl(esw, vport);
+	}
+
+	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+}
+
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
+{
+	int num_vfs = esw->esw_funcs.num_vfs;
+	int total_vports;
+	int err;
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		total_vports = esw->total_vports;
+	else
+		total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
+
+	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
+	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
+	err = esw_create_offloads_acl_tables(esw);
+	if (err)
+		return err;
+
+	err = esw_create_offloads_fdb_tables(esw, total_vports);
+	if (err)
+		goto create_fdb_err;
+
+	err = esw_create_offloads_table(esw, total_vports);
+	if (err)
+		goto create_ft_err;
+
+	err = esw_create_vport_rx_group(esw, total_vports);
+	if (err)
+		goto create_fg_err;
+
+	return 0;
 
 create_fg_err:
 	esw_destroy_offloads_table(esw);
@@ -972,44 +2069,165 @@
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
-	return err;
-}
-
-static int esw_offloads_stop(struct mlx5_eswitch *esw)
-{
-	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
-
-	mlx5_eswitch_disable_sriov(esw);
-	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
-	if (err) {
-		esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
-		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
-		if (err1)
-			esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
-	}
-
-	/* enable back PF RoCE */
-	mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+create_fdb_err:
+	esw_destroy_offloads_acl_tables(esw);
 
 	return err;
 }
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 {
-	esw_offloads_unload_reps(esw, nvports);
 	esw_destroy_vport_rx_group(esw);
 	esw_destroy_offloads_table(esw);
 	esw_destroy_offloads_fdb_tables(esw);
+	esw_destroy_offloads_acl_tables(esw);
+}
+
+static void
+esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
+{
+	bool host_pf_disabled;
+	u16 new_num_vfs;
+
+	new_num_vfs = MLX5_GET(query_esw_functions_out, out,
+			       host_params_context.host_num_of_vfs);
+	host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+				    host_params_context.host_pf_disabled);
+
+	if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
+		return;
+
+	/* Number of VFs can only change from "0 to x" or "x to 0". */
+	if (esw->esw_funcs.num_vfs > 0) {
+		esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
+	} else {
+		int err;
+
+		err = esw_offloads_load_vf_reps(esw, new_num_vfs);
+		if (err)
+			return;
+	}
+	esw->esw_funcs.num_vfs = new_num_vfs;
+}
+
+static void esw_functions_changed_event_handler(struct work_struct *work)
+{
+	struct mlx5_host_work *host_work;
+	struct mlx5_eswitch *esw;
+	const u32 *out;
+
+	host_work = container_of(work, struct mlx5_host_work, work);
+	esw = host_work->esw;
+
+	out = mlx5_esw_query_functions(esw->dev);
+	if (IS_ERR(out))
+		goto out;
+
+	esw_vfs_changed_event_handler(esw, out);
+	kvfree(out);
+out:
+	kfree(host_work);
+}
+
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
+{
+	struct mlx5_esw_functions *esw_funcs;
+	struct mlx5_host_work *host_work;
+	struct mlx5_eswitch *esw;
+
+	host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
+	if (!host_work)
+		return NOTIFY_DONE;
+
+	esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
+	esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
+
+	host_work->esw = esw;
+
+	INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
+	queue_work(esw->work_queue, &host_work->work);
+
+	return NOTIFY_OK;
+}
+
+int esw_offloads_enable(struct mlx5_eswitch *esw)
+{
+	int err;
+
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
+	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+	else
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+
+	mlx5_rdma_enable_roce(esw->dev);
+	err = esw_offloads_steering_init(esw);
+	if (err)
+		goto err_steering_init;
+
+	err = esw_set_passing_vport_metadata(esw, true);
+	if (err)
+		goto err_vport_metadata;
+
+	mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
+
+	err = esw_offloads_load_all_reps(esw);
+	if (err)
+		goto err_reps;
+
+	esw_offloads_devcom_init(esw);
+	mutex_init(&esw->offloads.termtbl_mutex);
+
+	return 0;
+
+err_reps:
+	mlx5_eswitch_disable_pf_vf_vports(esw);
+	esw_set_passing_vport_metadata(esw, false);
+err_vport_metadata:
+	esw_offloads_steering_cleanup(esw);
+err_steering_init:
+	mlx5_rdma_disable_roce(esw->dev);
+	return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw,
+			     struct netlink_ext_ack *extack)
+{
+	int err, err1;
+
+	mlx5_eswitch_disable(esw);
+	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
+		err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
+		if (err1) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Failed setting eswitch back to offloads");
+		}
+	}
+
+	return err;
+}
+
+void esw_offloads_disable(struct mlx5_eswitch *esw)
+{
+	esw_offloads_devcom_cleanup(esw);
+	esw_offloads_unload_all_reps(esw);
+	mlx5_eswitch_disable_pf_vf_vports(esw);
+	esw_set_passing_vport_metadata(esw, false);
+	esw_offloads_steering_cleanup(esw);
+	mlx5_rdma_disable_roce(esw->dev);
+	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 {
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
-		*mlx5_mode = SRIOV_LEGACY;
+		*mlx5_mode = MLX5_ESWITCH_LEGACY;
 		break;
 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
-		*mlx5_mode = SRIOV_OFFLOADS;
+		*mlx5_mode = MLX5_ESWITCH_OFFLOADS;
 		break;
 	default:
 		return -EINVAL;
@@ -1021,10 +2239,10 @@
 static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
 {
 	switch (mlx5_mode) {
-	case SRIOV_LEGACY:
+	case MLX5_ESWITCH_LEGACY:
 		*mode = DEVLINK_ESWITCH_MODE_LEGACY;
 		break;
-	case SRIOV_OFFLOADS:
+	case MLX5_ESWITCH_OFFLOADS:
 		*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
 		break;
 	default:
@@ -1088,13 +2306,15 @@
 	if(!MLX5_ESWITCH_MANAGER(dev))
 		return -EPERM;
 
-	if (dev->priv.eswitch->mode == SRIOV_NONE)
+	if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+	    !mlx5_core_is_ecpf_esw_manager(dev))
 		return -EOPNOTSUPP;
 
 	return 0;
 }
 
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	u16 cur_mlx5_mode, mlx5_mode = 0;
@@ -1113,9 +2333,9 @@
 		return 0;
 
 	if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
-		return esw_offloads_start(dev->priv.eswitch);
+		return esw_offloads_start(dev->priv.eswitch, extack);
 	else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
-		return esw_offloads_stop(dev->priv.eswitch);
+		return esw_offloads_stop(dev->priv.eswitch, extack);
 	else
 		return -EINVAL;
 }
@@ -1132,11 +2352,12 @@
 	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
-int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+					 struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	int err, vport;
+	int err, vport, num_vport;
 	u8 mlx5_mode;
 
 	err = mlx5_devlink_eswitch_check(devlink);
@@ -1149,14 +2370,15 @@
 			return 0;
 		/* fall through */
 	case MLX5_CAP_INLINE_MODE_L2:
-		esw_warn(dev, "Inline mode can't be set\n");
+		NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
 		return -EOPNOTSUPP;
 	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
 		break;
 	}
 
-	if (esw->offloads.num_flows > 0) {
-		esw_warn(dev, "Can't set inline mode when flows are configured\n");
+	if (atomic64_read(&esw->offloads.num_flows) > 0) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't set inline mode when flows are configured");
 		return -EOPNOTSUPP;
 	}
 
@@ -1164,11 +2386,11 @@
 	if (err)
 		goto out;
 
-	for (vport = 1; vport < esw->enabled_vports; vport++) {
+	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
 		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
 		if (err) {
-			esw_warn(dev, "Failed to set min inline on vport %d\n",
-				 vport);
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Failed to set min inline on vport");
 			goto revert_inline_mode;
 		}
 	}
@@ -1177,7 +2399,8 @@
 	return 0;
 
 revert_inline_mode:
-	while (--vport > 0)
+	num_vport = --vport;
+	mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
 		mlx5_modify_nic_vport_min_inline(dev,
 						 vport,
 						 esw->offloads.inline_mode);
@@ -1198,7 +2421,7 @@
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
 
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
 {
 	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
@@ -1207,7 +2430,7 @@
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
 
-	if (esw->mode == SRIOV_NONE)
+	if (esw->mode == MLX5_ESWITCH_NONE)
 		return -EOPNOTSUPP;
 
 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
@@ -1222,9 +2445,10 @@
 	}
 
 query_vports:
-	for (vport = 1; vport <= nvfs; vport++) {
+	mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
 		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
-		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+		if (prev_mlx5_mode != mlx5_mode)
 			return -EINVAL;
 		prev_mlx5_mode = mlx5_mode;
 	}
@@ -1234,7 +2458,9 @@
 	return 0;
 }
 
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
+					struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -1245,14 +2471,14 @@
 		return err;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
-	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
 	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
 		return -EOPNOTSUPP;
 
 	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
 		return -EOPNOTSUPP;
 
-	if (esw->mode == SRIOV_LEGACY) {
+	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		esw->offloads.encap = encap;
 		return 0;
 	}
@@ -1260,24 +2486,30 @@
 	if (esw->offloads.encap == encap)
 		return 0;
 
-	if (esw->offloads.num_flows > 0) {
-		esw_warn(dev, "Can't set encapsulation when flows are configured\n");
+	if (atomic64_read(&esw->offloads.num_flows) > 0) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't set encapsulation when flows are configured");
 		return -EOPNOTSUPP;
 	}
 
-	esw_destroy_offloads_fast_fdb_table(esw);
+	esw_destroy_offloads_fdb_tables(esw);
 
 	esw->offloads.encap = encap;
-	err = esw_create_offloads_fast_fdb_table(esw);
+
+	err = esw_create_offloads_fdb_tables(esw, esw->nvports);
+
 	if (err) {
-		esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err);
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Failed re-creating fast FDB table");
 		esw->offloads.encap = !encap;
-		(void)esw_create_offloads_fast_fdb_table(esw);
+		(void)esw_create_offloads_fdb_tables(esw, esw->nvports);
 	}
+
 	return err;
 }
 
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -1291,78 +2523,86 @@
 	return 0;
 }
 
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *__rep_if,
-				     u8 rep_type)
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+				      const struct mlx5_eswitch_rep_ops *ops,
+				      u8 rep_type)
 {
-	struct mlx5_esw_offload *offloads = &esw->offloads;
-	struct mlx5_eswitch_rep_if *rep_if;
-
-	rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
-
-	rep_if->load   = __rep_if->load;
-	rep_if->unload = __rep_if->unload;
-	rep_if->get_proto_dev = __rep_if->get_proto_dev;
-	rep_if->priv = __rep_if->priv;
-
-	rep_if->valid = true;
-}
-EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
-
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index, u8 rep_type)
-{
-	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep_data *rep_data;
 	struct mlx5_eswitch_rep *rep;
+	int i;
 
-	rep = &offloads->vport_reps[vport_index];
-
-	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
-		rep->rep_if[rep_type].unload(rep);
-
-	rep->rep_if[rep_type].valid = false;
+	esw->offloads.rep_ops[rep_type] = ops;
+	mlx5_esw_for_all_reps(esw, i, rep) {
+		rep_data = &rep->rep_data[rep_type];
+		atomic_set(&rep_data->state, REP_REGISTERED);
+	}
 }
-EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
+
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	struct mlx5_eswitch_rep *rep;
+	int i;
+
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+		__unload_reps_all_vport(esw, rep_type);
+
+	mlx5_esw_for_all_reps(esw, i, rep)
+		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
+}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
-#define UPLINK_REP_INDEX 0
-	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
-	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
-	return rep->rep_if[rep_type].priv;
+	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+	return rep->rep_data[rep_type].priv;
 }
 
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
-				 int vport,
+				 u16 vport,
 				 u8 rep_type)
 {
-	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
-	if (vport == FDB_UPLINK_VPORT)
-		vport = UPLINK_REP_INDEX;
+	rep = mlx5_eswitch_get_rep(esw, vport);
 
-	rep = &offloads->vport_reps[vport];
-
-	if (rep->rep_if[rep_type].valid &&
-	    rep->rep_if[rep_type].get_proto_dev)
-		return rep->rep_if[rep_type].get_proto_dev(rep);
+	if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+	    esw->offloads.rep_ops[rep_type]->get_proto_dev)
+		return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
 
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
 {
-	return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+	return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
 }
 EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
 
 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
-						int vport)
+						u16 vport)
 {
-	return &esw->offloads.vport_reps[vport];
+	return mlx5_eswitch_get_rep(esw, vport);
 }
 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return vport_num >= MLX5_VPORT_FIRST_VF &&
+	       vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+	return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+					      u16 vport_num)
+{
+	return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
new file mode 100644
index 0000000..366bda1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c

@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/mlx5/fs.h>
+#include "eswitch.h"
+
+struct mlx5_termtbl_handle {
+	struct hlist_node termtbl_hlist;
+
+	struct mlx5_flow_table *termtbl;
+	struct mlx5_flow_act flow_act;
+	struct mlx5_flow_destination dest;
+
+	struct mlx5_flow_handle *rule;
+	int ref_count;
+};
+
+static u32
+mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
+			  struct mlx5_flow_destination *dest)
+{
+	u32 hash;
+
+	hash = jhash_1word(flow_act->action, 0);
+	hash = jhash((const void *)&flow_act->vlan,
+		     sizeof(flow_act->vlan), hash);
+	hash = jhash((const void *)&dest->vport.num,
+		     sizeof(dest->vport.num), hash);
+	hash = jhash((const void *)&dest->vport.vhca_id,
+		     sizeof(dest->vport.num), hash);
+	return hash;
+}
+
+static int
+mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
+			 struct mlx5_flow_destination *dest1,
+			 struct mlx5_flow_act *flow_act2,
+			 struct mlx5_flow_destination *dest2)
+{
+	return flow_act1->action != flow_act2->action ||
+	       dest1->vport.num != dest2->vport.num ||
+	       dest1->vport.vhca_id != dest2->vport.vhca_id ||
+	       memcmp(&flow_act1->vlan, &flow_act2->vlan,
+		      sizeof(flow_act1->vlan));
+}
+
+static int
+mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
+			    struct mlx5_termtbl_handle *tt,
+			    struct mlx5_flow_act *flow_act)
+{
+	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_flow_namespace *root_ns;
+	int prio, flags;
+	int err;
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* As this is the terminating action then the termination table is the
+	 * same prio as the slow path
+	 */
+	prio = FDB_SLOW_PATH;
+	flags = MLX5_FLOW_TABLE_TERMINATION;
+	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1,
+							  0, flags);
+	if (IS_ERR(tt->termtbl)) {
+		esw_warn(dev, "Failed to create termination table\n");
+		return -EOPNOTSUPP;
+	}
+
+	tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act,
+				       &tt->dest, 1);
+
+	if (IS_ERR(tt->rule)) {
+		esw_warn(dev, "Failed to create termination table rule\n");
+		goto add_flow_err;
+	}
+	return 0;
+
+add_flow_err:
+	err = mlx5_destroy_flow_table(tt->termtbl);
+	if (err)
+		esw_warn(dev, "Failed to destroy termination table\n");
+
+	return -EOPNOTSUPP;
+}
+
+static struct mlx5_termtbl_handle *
+mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
+				struct mlx5_flow_act *flow_act,
+				struct mlx5_flow_destination *dest)
+{
+	struct mlx5_termtbl_handle *tt;
+	bool found = false;
+	u32 hash_key;
+	int err;
+
+	mutex_lock(&esw->offloads.termtbl_mutex);
+
+	hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest);
+	hash_for_each_possible(esw->offloads.termtbl_tbl, tt,
+			       termtbl_hlist, hash_key) {
+		if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest,
+					      flow_act, dest)) {
+			found = true;
+			break;
+		}
+	}
+	if (found)
+		goto tt_add_ref;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt) {
+		err = -ENOMEM;
+		goto tt_create_err;
+	}
+
+	tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	tt->dest.vport.num = dest->vport.num;
+	tt->dest.vport.vhca_id = dest->vport.vhca_id;
+	memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
+
+	err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
+	if (err) {
+		esw_warn(esw->dev, "Failed to create termination table\n");
+		goto tt_create_err;
+	}
+	hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
+tt_add_ref:
+	tt->ref_count++;
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+	return tt;
+tt_create_err:
+	kfree(tt);
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+	return ERR_PTR(err);
+}
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+			 struct mlx5_termtbl_handle *tt)
+{
+	mutex_lock(&esw->offloads.termtbl_mutex);
+	if (--tt->ref_count == 0)
+		hash_del(&tt->termtbl_hlist);
+	mutex_unlock(&esw->offloads.termtbl_mutex);
+
+	if (!tt->ref_count) {
+		mlx5_del_flow_rules(tt->rule);
+		mlx5_destroy_flow_table(tt->termtbl);
+		kfree(tt);
+	}
+}
+
+static void
+mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
+				  struct mlx5_flow_act *dst)
+{
+	if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+		return;
+
+	src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+	dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+	memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
+	memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
+
+	if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+		return;
+
+	src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+	dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+	memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
+	memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+}
+
+static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
+						const struct mlx5_flow_spec *spec)
+{
+	u32 port_mask, port_value;
+
+	if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+		return spec->flow_context.flow_source ==
+					MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+	port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+			     misc_parameters.source_port);
+	port_value = MLX5_GET(fte_match_param, spec->match_value,
+			      misc_parameters.source_port);
+	return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK;
+}
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_spec *spec)
+{
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
+		return false;
+
+	/* push vlan on RX */
+	return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
+		mlx5_eswitch_offload_is_uplink_port(esw, spec);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+			      struct mlx5_flow_table *fdb,
+			      struct mlx5_flow_spec *spec,
+			      struct mlx5_esw_flow_attr *attr,
+			      struct mlx5_flow_act *flow_act,
+			      struct mlx5_flow_destination *dest,
+			      int num_dest)
+{
+	struct mlx5_flow_act term_tbl_act = {};
+	struct mlx5_flow_handle *rule = NULL;
+	bool term_table_created = false;
+	int num_vport_dests = 0;
+	int i, curr_dest;
+
+	mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act);
+	term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	for (i = 0; i < num_dest; i++) {
+		struct mlx5_termtbl_handle *tt;
+
+		/* only vport destinations can be terminated */
+		if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
+			continue;
+
+		/* get the terminating table for the action list */
+		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
+						     &dest[i]);
+		if (IS_ERR(tt)) {
+			esw_warn(esw->dev, "Failed to create termination table\n");
+			goto revert_changes;
+		}
+		attr->dests[num_vport_dests].termtbl = tt;
+		num_vport_dests++;
+
+		/* link the destination with the termination table */
+		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		dest[i].ft = tt->termtbl;
+		term_table_created = true;
+	}
+
+	/* at least one destination should reference a termination table */
+	if (!term_table_created)
+		goto revert_changes;
+
+	/* create the FTE */
+	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+	if (IS_ERR(rule))
+		goto revert_changes;
+
+	goto out;
+
+revert_changes:
+	/* revert the changes that were made to the original flow_act
+	 * and fall-back to the original rule actions
+	 */
+	mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act);
+
+	for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
+		struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+
+		/* search for the destination associated with the
+		 * current term table
+		 */
+		for (i = 0; i < num_dest; i++) {
+			if (dest[i].ft != tt->termtbl)
+				continue;
+
+			memset(&dest[i], 0, sizeof(dest[i]));
+			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+			dest[i].vport.num = tt->dest.vport.num;
+			dest[i].vport.vhca_id = tt->dest.vport.vhca_id;
+			mlx5_eswitch_termtbl_put(esw, tt);
+			break;
+		}
+	}
+	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+out:
+	return rule;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 0000000..8bcf342
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c

@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+	struct mlx5_nb  nb;
+	void           *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+static int pcie_core(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+	/* Events to be proccessed by mlx5_core */
+	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+	{.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+
+	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+	/* QP/WQ resource events to forward */
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+	/* SRQ events */
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+	struct mlx5_core_dev *dev;
+	struct workqueue_struct *wq;
+	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
+	/* driver notifier chain */
+	struct atomic_notifier_head nh;
+	/* port module events stats */
+	struct mlx5_pme_stats pme_stats;
+	/*pcie_core*/
+	struct work_struct pcie_core_work;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+	switch (type) {
+	case MLX5_EVENT_TYPE_COMP:
+		return "MLX5_EVENT_TYPE_COMP";
+	case MLX5_EVENT_TYPE_PATH_MIG:
+		return "MLX5_EVENT_TYPE_PATH_MIG";
+	case MLX5_EVENT_TYPE_COMM_EST:
+		return "MLX5_EVENT_TYPE_COMM_EST";
+	case MLX5_EVENT_TYPE_SQ_DRAINED:
+		return "MLX5_EVENT_TYPE_SQ_DRAINED";
+	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+	case MLX5_EVENT_TYPE_CQ_ERROR:
+		return "MLX5_EVENT_TYPE_CQ_ERROR";
+	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+	case MLX5_EVENT_TYPE_PORT_CHANGE:
+		return "MLX5_EVENT_TYPE_PORT_CHANGE";
+	case MLX5_EVENT_TYPE_GPIO_EVENT:
+		return "MLX5_EVENT_TYPE_GPIO_EVENT";
+	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+	case MLX5_EVENT_TYPE_STALL_EVENT:
+		return "MLX5_EVENT_TYPE_STALL_EVENT";
+	case MLX5_EVENT_TYPE_CMD:
+		return "MLX5_EVENT_TYPE_CMD";
+	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
+		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
+	case MLX5_EVENT_TYPE_PAGE_REQUEST:
+		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+	case MLX5_EVENT_TYPE_PAGE_FAULT:
+		return "MLX5_EVENT_TYPE_PAGE_FAULT";
+	case MLX5_EVENT_TYPE_PPS_EVENT:
+		return "MLX5_EVENT_TYPE_PPS_EVENT";
+	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+	case MLX5_EVENT_TYPE_FPGA_ERROR:
+		return "MLX5_EVENT_TYPE_FPGA_ERROR";
+	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+	case MLX5_EVENT_TYPE_GENERAL_EVENT:
+		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+	case MLX5_EVENT_TYPE_DEVICE_TRACER:
+		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+	default:
+		return "Unrecognized event";
+	}
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+			unsigned long type, void *data)
+{
+	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+	struct mlx5_events   *events   = event_nb->ctx;
+	struct mlx5_eqe      *eqe      = data;
+
+	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+		      eqe_type_str(eqe->type), eqe->sub_type);
+	return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+	struct mlx5_events   *events   = event_nb->ctx;
+	struct mlx5_eqe      *eqe      = data;
+	u64 value_lsb;
+	u64 value_msb;
+
+	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+	mlx5_core_warn(events->dev,
+		       "High temperature on sensors with bit set %llx %llx",
+		       value_msb, value_lsb);
+
+	return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+	switch (status) {
+	case MLX5_MODULE_STATUS_PLUGGED:
+		return "Cable plugged";
+	case MLX5_MODULE_STATUS_UNPLUGGED:
+		return "Cable unplugged";
+	case MLX5_MODULE_STATUS_ERROR:
+		return "Cable error";
+	case MLX5_MODULE_STATUS_DISABLED:
+		return "Cable disabled";
+	default:
+		return "Unknown status";
+	}
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+	switch (error) {
+	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+		return "Power budget exceeded";
+	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+		return "Long Range for non MLNX cable";
+	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+		return "Bus stuck (I2C or data shorted)";
+	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+		return "No EEPROM/retry timeout";
+	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+		return "Enforce part number list";
+	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+		return "Unknown identifier";
+	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+		return "High Temperature";
+	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+		return "Bad or shorted cable/module";
+	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+	default:
+		return "Unknown error";
+	}
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+	struct mlx5_events   *events   = event_nb->ctx;
+	struct mlx5_eqe      *eqe      = data;
+
+	enum port_module_event_status_type module_status;
+	enum port_module_event_error_type error_type;
+	struct mlx5_eqe_port_module *module_event_eqe;
+	const char *status_str;
+	u8 module_num;
+
+	module_event_eqe = &eqe->data.port_module;
+	module_status = module_event_eqe->module_status &
+			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+	error_type = module_event_eqe->error_type &
+		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+	if (module_status < MLX5_MODULE_STATUS_NUM)
+		events->pme_stats.status_counters[module_status]++;
+
+	if (module_status == MLX5_MODULE_STATUS_ERROR)
+		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+			events->pme_stats.error_counters[error_type]++;
+
+	if (!printk_ratelimit())
+		return NOTIFY_OK;
+
+	module_num = module_event_eqe->module;
+	status_str = mlx5_pme_status_to_string(module_status);
+	if (module_status == MLX5_MODULE_STATUS_ERROR) {
+		const char *error_str = mlx5_pme_error_to_string(error_type);
+
+		mlx5_core_err(events->dev,
+			      "Port module event[error]: module %u, %s, %s\n",
+			      module_num, status_str, error_str);
+	} else {
+		mlx5_core_info(events->dev,
+			       "Port module event: module %u, %s\n",
+			       module_num, status_str);
+	}
+
+	return NOTIFY_OK;
+}
+
+enum {
+	MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
+	MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
+	MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
+};
+
+static void mlx5_pcie_event(struct work_struct *work)
+{
+	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+	struct mlx5_events *events;
+	struct mlx5_core_dev *dev;
+	u8 power_status;
+	u16 pci_power;
+
+	events = container_of(work, struct mlx5_events, pcie_core_work);
+	dev  = events->dev;
+
+	if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
+		return;
+
+	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+			     MLX5_REG_MPEIN, 0, 0);
+	power_status = MLX5_GET(mpein_reg, out, pwr_status);
+	pci_power = MLX5_GET(mpein_reg, out, pci_power);
+
+	switch (power_status) {
+	case MLX5_PCI_POWER_COULD_NOT_BE_READ:
+		mlx5_core_info_rl(dev,
+				  "PCIe slot power capability was not advertised.\n");
+		break;
+	case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
+		mlx5_core_warn_rl(dev,
+				  "Detected insufficient power on the PCIe slot (%uW).\n",
+				  pci_power);
+		break;
+	case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
+		mlx5_core_info_rl(dev,
+				  "PCIe slot advertised sufficient power (%uW).\n",
+				  pci_power);
+		break;
+	}
+}
+
+static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
+{
+	struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
+							struct mlx5_event_nb,
+							nb);
+	struct mlx5_events      *events   = event_nb->ctx;
+	struct mlx5_eqe         *eqe      = data;
+
+	switch (eqe->sub_type) {
+	case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
+			queue_work(events->wq, &events->pcie_core_work);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+	*stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+	struct mlx5_events   *events   = event_nb->ctx;
+	struct mlx5_eqe      *eqe      = data;
+
+	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+		      eqe_type_str(eqe->type), eqe->sub_type);
+	atomic_notifier_call_chain(&events->nh, event, data);
+	return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+	if (!events)
+		return -ENOMEM;
+
+	ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+	events->dev = dev;
+	dev->priv.events = events;
+	events->wq = create_singlethread_workqueue("mlx5_events");
+	if (!events->wq)
+		return -ENOMEM;
+	INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
+
+	return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+	destroy_workqueue(dev->priv.events->wq);
+	kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+	struct mlx5_events *events = dev->priv.events;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+		events->notifiers[i].nb  = events_nbs_ref[i];
+		events->notifiers[i].ctx = events;
+		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+	}
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+	struct mlx5_events *events = dev->priv.events;
+	int i;
+
+	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+	flush_workqueue(events->wq);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+	struct mlx5_events *events = dev->priv.events;
+
+	return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+	struct mlx5_events *events = dev->priv.events;
+
+	return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+	return atomic_notifier_call_chain(&events->nh, event, data);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 8ca1d19..6102113 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c

@@ -135,7 +135,7 @@
 	*conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
 	/* Make sure that doorbell record is visible before ringing */
 	wmb();
-	mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+	mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET);
 }
 
 static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
@@ -334,7 +334,7 @@
 {
 	u8 opcode, status = 0;
 
-	opcode = cqe->op_own >> 4;
+	opcode = get_cqe_opcode(cqe);
 
 	switch (opcode) {
 	case MLX5_CQE_REQ_ERR:
@@ -414,7 +414,8 @@
 	mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
 }
 
-static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq,
+				       struct mlx5_eqe *eqe)
 {
 	struct mlx5_fpga_conn *conn;
 
@@ -429,6 +430,7 @@
 	struct mlx5_fpga_device *fdev = conn->fdev;
 	struct mlx5_core_dev *mdev = fdev->mdev;
 	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	struct mlx5_wq_param wqp;
 	struct mlx5_cqe64 *cqe;
 	int inlen, err, eqn;
@@ -462,8 +464,10 @@
 	}
 
 	err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
-	if (err)
+	if (err) {
+		kvfree(in);
 		goto err_cqwq;
+	}
 
 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
@@ -476,7 +480,7 @@
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
 
-	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out));
 	kvfree(in);
 
 	if (err)
@@ -867,7 +871,7 @@
 	conn->cb_arg = attr->cb_arg;
 
 	remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
-	err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+	err = mlx5_query_mac_address(fdev->mdev, remote_mac);
 	if (err) {
 		mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
 		ret = ERR_PTR(err);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 436a813..d046d1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c

@@ -36,6 +36,7 @@
 
 #include "mlx5_core.h"
 #include "lib/mlx5.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "fpga/conn.h"
 
@@ -145,6 +146,22 @@
 	return 0;
 }
 
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+	return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+	return mlx5_fpga_event(fdev, event, eqe);
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@
 	if (err)
 		goto out;
 
+	MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+	MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
 	err = mlx5_fpga_conn_device_init(fdev);
 	if (err)
 		goto err_rsvd_gid;
@@ -201,6 +223,8 @@
 	mlx5_fpga_conn_device_cleanup(fdev);
 
 err_rsvd_gid:
+	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
 	mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
 	spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@
 	}
 
 	mlx5_fpga_conn_device_cleanup(fdev);
+	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 	mlx5_core_unreserve_gids(mdev, max_num_qps);
 }
@@ -283,13 +310,13 @@
 	return "Unknown";
 }
 
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+			   unsigned long event, void *eqe)
 {
-	struct mlx5_fpga_device *fdev = mdev->fpga;
+	void *data = ((struct mlx5_eqe *)eqe)->data.raw;
 	const char *event_name;
 	bool teardown = false;
 	unsigned long flags;
-	u32 fpga_qpn;
 	u8 syndrome;
 
 	switch (event) {
@@ -300,12 +327,9 @@
 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
 		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
-		fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
 		break;
 	default:
-		mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
-					   event);
-		return;
+		return NOTIFY_DONE;
 	}
 
 	spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +350,6 @@
 	 */
 	if (teardown)
 		mlx5_trigger_health_work(fdev->mdev);
+
+	return NOTIFY_OK;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 3e2355c..52c9dee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h

@@ -35,11 +35,17 @@
 
 #ifdef CONFIG_MLX5_FPGA
 
+#include <linux/mlx5/eq.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fpga/cmd.h"
 
 /* Represents an Innova device */
 struct mlx5_fpga_device {
 	struct mlx5_core_dev *mdev;
+	struct mlx5_nb fpga_err_nb;
+	struct mlx5_nb fpga_qp_err_nb;
 	spinlock_t state_lock; /* Protects state transitions */
 	enum mlx5_fpga_status state;
 	enum mlx5_fpga_image last_admin_image;
@@ -57,32 +63,31 @@
 };
 
 #define mlx5_fpga_dbg(__adev, format, ...) \
-	dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
-		 __func__, __LINE__, current->pid, ##__VA_ARGS__)
+	mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+		       __func__, __LINE__, current->pid, ##__VA_ARGS__)
 
 #define mlx5_fpga_err(__adev, format, ...) \
-	dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
-		__func__, __LINE__, current->pid, ##__VA_ARGS__)
+	mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+		      __func__, __LINE__, current->pid, ##__VA_ARGS__)
 
 #define mlx5_fpga_warn(__adev, format, ...) \
-	dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
-		__func__, __LINE__, current->pid, ##__VA_ARGS__)
+	mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+		       __func__, __LINE__, current->pid, ##__VA_ARGS__)
 
 #define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
-	dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
-		format, __func__, __LINE__, ##__VA_ARGS__)
+	mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \
+			 format, __func__, __LINE__, ##__VA_ARGS__)
 
 #define mlx5_fpga_notice(__adev, format, ...) \
-	dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+	mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
 
 #define mlx5_fpga_info(__adev, format, ...) \
-	dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+	mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
 
 int mlx5_fpga_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
 
@@ -104,11 +109,6 @@
 {
 }
 
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
-				   void *data)
-{
-}
-
 #endif
 
 #endif /* __MLX5_FPGA_CORE_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index b5a8769..c76da30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c

@@ -636,7 +636,8 @@
 					   u8 match_criteria_enable,
 					   const u32 *match_c,
 					   const u32 *match_v,
-					   struct mlx5_flow_act *flow_act)
+					   struct mlx5_flow_act *flow_act,
+					   struct mlx5_flow_context *flow_context)
 {
 	const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   outer_headers);
@@ -655,7 +656,7 @@
 	    (match_criteria_enable &
 	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
 	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
-	     flow_act->has_flow_tag)
+	     (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
 		return false;
 
 	return true;
@@ -767,7 +768,8 @@
 					    fg->mask.match_criteria_enable,
 					    fg->mask.match_criteria,
 					    fte->val,
-					    &fte->action))
+					    &fte->action,
+					    &fte->flow_context))
 		return ERR_PTR(-EINVAL);
 	else if (!mlx5_is_fpga_ipsec_rule(mdev,
 					  fg->mask.match_criteria_enable,
@@ -989,32 +991,33 @@
 	return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
 }
 
-static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns,
 					   struct mlx5_flow_table *ft,
 					   u32 *in,
-					   unsigned int *group_id,
+					   struct mlx5_flow_group *fg,
 					   bool is_egress)
 {
-	int (*create_flow_group)(struct mlx5_core_dev *dev,
+	int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
 				 struct mlx5_flow_table *ft, u32 *in,
-				 unsigned int *group_id) =
+				 struct mlx5_flow_group *fg) =
 		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
 	char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
 					   match_criteria.misc_parameters);
+	struct mlx5_core_dev *dev = ns->dev;
 	u32 saved_outer_esp_spi_mask;
 	u8 match_criteria_enable;
 	int ret;
 
 	if (MLX5_CAP_FLOWTABLE(dev,
 			       flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
-		return create_flow_group(dev, ft, in, group_id);
+		return create_flow_group(ns, ft, in, fg);
 
 	match_criteria_enable =
 		MLX5_GET(create_flow_group_in, in, match_criteria_enable);
 	saved_outer_esp_spi_mask =
 		MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
 	if (!match_criteria_enable || !saved_outer_esp_spi_mask)
-		return create_flow_group(dev, ft, in, group_id);
+		return create_flow_group(ns, ft, in, fg);
 
 	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
 
@@ -1023,7 +1026,7 @@
 		MLX5_SET(create_flow_group_in, in, match_criteria_enable,
 			 match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
 
-	ret = create_flow_group(dev, ft, in, group_id);
+	ret = create_flow_group(ns, ft, in, fg);
 
 	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
 	MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
@@ -1031,17 +1034,18 @@
 	return ret;
 }
 
-static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
 				    struct mlx5_flow_group *fg,
 				    struct fs_fte *fte,
 				    bool is_egress)
 {
-	int (*create_fte)(struct mlx5_core_dev *dev,
+	int (*create_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
 			  struct mlx5_flow_group *fg,
 			  struct fs_fte *fte) =
 		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+	struct mlx5_core_dev *dev = ns->dev;
 	struct mlx5_fpga_device *fdev = dev->fpga;
 	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
 	struct mlx5_fpga_ipsec_rule *rule;
@@ -1053,7 +1057,7 @@
 	    !(fte->action.action &
 	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
 	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
-		return create_fte(dev, ft, fg, fte);
+		return create_fte(ns, ft, fg, fte);
 
 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
 	if (!rule)
@@ -1070,7 +1074,7 @@
 	WARN_ON(rule_insert(fipsec, rule));
 
 	modify_spec_mailbox(dev, fte, &mbox_mod);
-	ret = create_fte(dev, ft, fg, fte);
+	ret = create_fte(ns, ft, fg, fte);
 	restore_spec_mailbox(fte, &mbox_mod);
 	if (ret) {
 		_rule_delete(fipsec, rule);
@@ -1081,19 +1085,20 @@
 	return ret;
 }
 
-static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
-				    unsigned int group_id,
+				    struct mlx5_flow_group *fg,
 				    int modify_mask,
 				    struct fs_fte *fte,
 				    bool is_egress)
 {
-	int (*update_fte)(struct mlx5_core_dev *dev,
+	int (*update_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
-			  unsigned int group_id,
+			  struct mlx5_flow_group *fg,
 			  int modify_mask,
 			  struct fs_fte *fte) =
 		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+	struct mlx5_core_dev *dev = ns->dev;
 	bool is_esp = fte->action.esp_id;
 	struct mailbox_mod mbox_mod;
 	int ret;
@@ -1102,24 +1107,25 @@
 	    !(fte->action.action &
 	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
 	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
-		return update_fte(dev, ft, group_id, modify_mask, fte);
+		return update_fte(ns, ft, fg, modify_mask, fte);
 
 	modify_spec_mailbox(dev, fte, &mbox_mod);
-	ret = update_fte(dev, ft, group_id, modify_mask, fte);
+	ret = update_fte(ns, ft, fg, modify_mask, fte);
 	restore_spec_mailbox(fte, &mbox_mod);
 
 	return ret;
 }
 
-static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
 				    struct fs_fte *fte,
 				    bool is_egress)
 {
-	int (*delete_fte)(struct mlx5_core_dev *dev,
+	int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
 			  struct fs_fte *fte) =
 		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+	struct mlx5_core_dev *dev = ns->dev;
 	struct mlx5_fpga_device *fdev = dev->fpga;
 	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
 	struct mlx5_fpga_ipsec_rule *rule;
@@ -1131,7 +1137,7 @@
 	    !(fte->action.action &
 	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
 	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
-		return delete_fte(dev, ft, fte);
+		return delete_fte(ns, ft, fte);
 
 	rule = rule_search(fipsec, fte);
 	if (!rule)
@@ -1141,84 +1147,84 @@
 	rule_delete(fipsec, rule);
 
 	modify_spec_mailbox(dev, fte, &mbox_mod);
-	ret = delete_fte(dev, ft, fte);
+	ret = delete_fte(ns, ft, fte);
 	restore_spec_mailbox(fte, &mbox_mod);
 
 	return ret;
 }
 
 static int
-mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns,
 					    struct mlx5_flow_table *ft,
 					    u32 *in,
-					    unsigned int *group_id)
+					    struct mlx5_flow_group *fg)
 {
-	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+	return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true);
 }
 
 static int
-mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns,
 				     struct mlx5_flow_table *ft,
 				     struct mlx5_flow_group *fg,
 				     struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+	return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true);
 }
 
 static int
-mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns,
 				     struct mlx5_flow_table *ft,
-				     unsigned int group_id,
+				     struct mlx5_flow_group *fg,
 				     int modify_mask,
 				     struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+	return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
 					true);
 }
 
 static int
-mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns,
 				     struct mlx5_flow_table *ft,
 				     struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+	return fpga_ipsec_fs_delete_fte(ns, ft, fte, true);
 }
 
 static int
-mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns,
 					     struct mlx5_flow_table *ft,
 					     u32 *in,
-					     unsigned int *group_id)
+					     struct mlx5_flow_group *fg)
 {
-	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+	return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false);
 }
 
 static int
-mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      struct mlx5_flow_group *fg,
 				      struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+	return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false);
 }
 
 static int
-mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
-				      unsigned int group_id,
+				      struct mlx5_flow_group *fg,
 				      int modify_mask,
 				      struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+	return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
 					false);
 }
 
 static int
-mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      struct fs_fte *fte)
 {
-	return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+	return fpga_ipsec_fs_delete_fte(ns, ft, fte, false);
 }
 
 static struct mlx5_flow_cmds fpga_ipsec_ingress;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 2b5e63b..382985e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h

@@ -37,8 +37,6 @@
 #include "accel/ipsec.h"
 #include "fs_cmd.h"
 
-#ifdef CONFIG_MLX5_FPGA
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
@@ -66,77 +64,4 @@
 const struct mlx5_flow_cmds *
 mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
-#else
-
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline unsigned int
-mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
-						u64 *counters)
-{
-	return 0;
-}
-
-static inline void *
-mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
-			      struct mlx5_accel_esp_xfrm *accel_xfrm,
-			      const __be32 saddr[4],
-			      const __be32 daddr[4],
-			      const __be32 spi, bool is_ipv6)
-{
-	return NULL;
-}
-
-static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
-}
-
-static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
-	return 0;
-}
-
-static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
-}
-
-static inline struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
-			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
-			  u32 flags)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
-}
-
-static inline int
-mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
-			  const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
-	return mlx5_fs_cmd_get_default(type);
-}
-
-#endif /* CONFIG_MLX5_FPGA */
-
 #endif	/* __MLX5_FPGA_SADB_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 5cf5f2a..22a2ef1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c

@@ -148,14 +148,16 @@
 	return ret;
 }
 
-static void mlx5_fpga_tls_release_swid(struct idr *idr,
-				       spinlock_t *idr_spinlock, u32 swid)
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+					spinlock_t *idr_spinlock, u32 swid)
 {
 	unsigned long flags;
+	void *ptr;
 
 	spin_lock_irqsave(idr_spinlock, flags);
-	idr_remove(idr, swid);
+	ptr = idr_remove(idr, swid);
 	spin_unlock_irqrestore(idr_spinlock, flags);
+	return ptr;
 }
 
 static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
@@ -165,20 +167,12 @@
 	kfree(buf);
 }
 
-struct mlx5_teardown_stream_context {
-	struct mlx5_fpga_tls_command_context cmd;
-	u32 swid;
-};
-
 static void
 mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
 				  struct mlx5_fpga_device *fdev,
 				  struct mlx5_fpga_tls_command_context *cmd,
 				  struct mlx5_fpga_dma_buf *resp)
 {
-	struct mlx5_teardown_stream_context *ctx =
-		    container_of(cmd, struct mlx5_teardown_stream_context, cmd);
-
 	if (resp) {
 		u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
 
@@ -186,14 +180,6 @@
 			mlx5_fpga_err(fdev,
 				      "Teardown stream failed with syndrome = %d",
 				      syndrome);
-		else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
-			mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
-						   &fdev->tls->tx_idr_spinlock,
-						   ctx->swid);
-		else
-			mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
-						   &fdev->tls->rx_idr_spinlock,
-						   ctx->swid);
 	}
 	mlx5_fpga_tls_put_command_ctx(cmd);
 }
@@ -225,8 +211,14 @@
 
 	rcu_read_lock();
 	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
-	rcu_read_unlock();
+	if (unlikely(!flow)) {
+		rcu_read_unlock();
+		WARN_ONCE(1, "Received NULL pointer for handle\n");
+		kfree(buf);
+		return -EINVAL;
+	}
 	mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+	rcu_read_unlock();
 
 	MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
 	MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
@@ -238,6 +230,8 @@
 	buf->complete = mlx_tls_kfree_complete;
 
 	ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+	if (ret < 0)
+		kfree(buf);
 
 	return ret;
 }
@@ -245,7 +239,7 @@
 static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 					    void *flow, u32 swid, gfp_t flags)
 {
-	struct mlx5_teardown_stream_context *ctx;
+	struct mlx5_fpga_tls_command_context *ctx;
 	struct mlx5_fpga_dma_buf *buf;
 	void *cmd;
 
@@ -253,7 +247,7 @@
 	if (!ctx)
 		return;
 
-	buf = &ctx->cmd.buf;
+	buf = &ctx->buf;
 	cmd = (ctx + 1);
 	MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
 	MLX5_SET(tls_cmd, cmd, swid, swid);
@@ -264,8 +258,7 @@
 	buf->sg[0].data = cmd;
 	buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
 
-	ctx->swid = swid;
-	mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+	mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
 			       mlx5_fpga_tls_teardown_completion);
 }
 
@@ -275,13 +268,14 @@
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	void *flow;
 
-	rcu_read_lock();
 	if (direction_sx)
-		flow = idr_find(&tls->tx_idr, swid);
+		flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+						  &tls->tx_idr_spinlock,
+						  swid);
 	else
-		flow = idr_find(&tls->rx_idr, swid);
-
-	rcu_read_unlock();
+		flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+						  &tls->rx_idr_spinlock,
+						  swid);
 
 	if (!flow) {
 		mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
@@ -289,6 +283,7 @@
 		return;
 	}
 
+	synchronize_rcu(); /* before kfree(flow) */
 	mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e01f81..3c816e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c

@@ -39,7 +39,7 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
 					struct mlx5_flow_table *ft,
 					u32 underlay_qpn,
 					bool disconnect)
@@ -47,47 +47,43 @@
 	return 0;
 }
 
-static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
-					   u16 vport,
-					   enum fs_flow_table_op_mod op_mod,
-					   enum fs_flow_table_type type,
-					   unsigned int level,
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
+					   struct mlx5_flow_table *ft,
 					   unsigned int log_size,
-					   struct mlx5_flow_table *next_ft,
-					   unsigned int *table_id, u32 flags)
+					   struct mlx5_flow_table *next_ft)
 {
 	return 0;
 }
 
-static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 					    struct mlx5_flow_table *ft)
 {
 	return 0;
 }
 
-static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_flow_root_namespace *ns,
 					   struct mlx5_flow_table *ft,
 					   struct mlx5_flow_table *next_ft)
 {
 	return 0;
 }
 
-static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_flow_root_namespace *ns,
 					   struct mlx5_flow_table *ft,
 					   u32 *in,
-					   unsigned int *group_id)
+					   struct mlx5_flow_group *fg)
 {
 	return 0;
 }
 
-static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
 					    struct mlx5_flow_table *ft,
-					    unsigned int group_id)
+					    struct mlx5_flow_group *fg)
 {
 	return 0;
 }
 
-static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_create_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
 				    struct mlx5_flow_group *group,
 				    struct fs_fte *fte)
@@ -95,28 +91,73 @@
 	return 0;
 }
 
-static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_update_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
-				    unsigned int group_id,
+				    struct mlx5_flow_group *group,
 				    int modify_mask,
 				    struct fs_fte *fte)
 {
 	return -EOPNOTSUPP;
 }
 
-static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
 				    struct mlx5_flow_table *ft,
 				    struct fs_fte *fte)
 {
 	return 0;
 }
 
-static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+					       int reformat_type,
+					       size_t size,
+					       void *reformat_data,
+					       enum mlx5_flow_namespace_type namespace,
+					       struct mlx5_pkt_reformat *pkt_reformat)
+{
+	return 0;
+}
+
+static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+						  struct mlx5_pkt_reformat *pkt_reformat)
+{
+}
+
+static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+					     u8 namespace, u8 num_actions,
+					     void *modify_actions,
+					     struct mlx5_modify_hdr *modify_hdr)
+{
+	return 0;
+}
+
+static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+						struct mlx5_modify_hdr *modify_hdr)
+{
+}
+
+static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
+				  struct mlx5_flow_root_namespace *peer_ns)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+	return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 				   struct mlx5_flow_table *ft, u32 underlay_qpn,
 				   bool disconnect)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 
 	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
 	    underlay_qpn == 0)
@@ -143,43 +184,49 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-				      u16 vport,
-				      enum fs_flow_table_op_mod op_mod,
-				      enum fs_flow_table_type type,
-				      unsigned int level,
+static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
+				      struct mlx5_flow_table *ft,
 				      unsigned int log_size,
-				      struct mlx5_flow_table *next_ft,
-				      unsigned int *table_id, u32 flags)
+				      struct mlx5_flow_table *next_ft)
 {
-	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+	int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+	int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+	int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 	int err;
 
 	MLX5_SET(create_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
-	MLX5_SET(create_flow_table_in, in, table_type, type);
-	MLX5_SET(create_flow_table_in, in, flow_table_context.level, level);
+	MLX5_SET(create_flow_table_in, in, table_type, ft->type);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
 	MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
-	if (vport) {
-		MLX5_SET(create_flow_table_in, in, vport_number, vport);
+	if (ft->vport) {
+		MLX5_SET(create_flow_table_in, in, vport_number, ft->vport);
 		MLX5_SET(create_flow_table_in, in, other_vport, 1);
 	}
 
 	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
-		 en_encap_decap);
-	MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
-		 en_encap_decap);
+		 en_decap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+		 en_encap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table,
+		 term);
 
-	switch (op_mod) {
+	switch (ft->op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
 		if (next_ft) {
 			MLX5_SET(create_flow_table_in, in,
-				 flow_table_context.table_miss_action, 1);
+				 flow_table_context.table_miss_action,
+				 MLX5_FLOW_TABLE_MISS_ACTION_FWD);
 			MLX5_SET(create_flow_table_in, in,
 				 flow_table_context.table_miss_id, next_ft->id);
+		} else {
+			MLX5_SET(create_flow_table_in, in,
+				 flow_table_context.table_miss_action,
+				 ft->def_miss_action);
 		}
 		break;
 
@@ -194,16 +241,17 @@
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
-		*table_id = MLX5_GET(create_flow_table_out, out,
-				     table_id);
+		ft->id = MLX5_GET(create_flow_table_out, out,
+				  table_id);
 	return err;
 }
 
-static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 				       struct mlx5_flow_table *ft)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(destroy_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
@@ -217,12 +265,13 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      struct mlx5_flow_table *next_ft)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(modify_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_FLOW_TABLE);
@@ -249,26 +298,29 @@
 			 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
 		if (next_ft) {
 			MLX5_SET(modify_flow_table_in, in,
-				 flow_table_context.table_miss_action, 1);
+				 flow_table_context.table_miss_action,
+				 MLX5_FLOW_TABLE_MISS_ACTION_FWD);
 			MLX5_SET(modify_flow_table_in, in,
 				 flow_table_context.table_miss_id,
 				 next_ft->id);
 		} else {
 			MLX5_SET(modify_flow_table_in, in,
-				 flow_table_context.table_miss_action, 0);
+				 flow_table_context.table_miss_action,
+				 ft->def_miss_action);
 		}
 	}
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      u32 *in,
-				      unsigned int *group_id)
+				      struct mlx5_flow_group *fg)
 {
 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = ns->dev;
 	int err;
 
 	MLX5_SET(create_flow_group_in, in, opcode,
@@ -282,23 +334,24 @@
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
-		*group_id = MLX5_GET(create_flow_group_out, out,
-				     group_id);
+		fg->id = MLX5_GET(create_flow_group_out, out,
+				  group_id);
 	return err;
 }
 
-static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
 				       struct mlx5_flow_table *ft,
-				       unsigned int group_id)
+				       struct mlx5_flow_group *fg)
 {
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(destroy_flow_group_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
 	MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
 	MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
-	MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+	MLX5_SET(destroy_flow_group_in, in, group_id, fg->id);
 	if (ft->vport) {
 		MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
 		MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
@@ -307,22 +360,68 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+				  struct fs_fte *fte, bool *extended_dest)
+{
+	int fw_log_max_fdb_encap_uplink =
+		MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+	int num_fwd_destinations = 0;
+	struct mlx5_flow_rule *dst;
+	int num_encap = 0;
+
+	*extended_dest = false;
+	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+		return 0;
+
+	list_for_each_entry(dst, &fte->node.children, node.list) {
+		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			continue;
+		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+		    dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+			num_encap++;
+		num_fwd_destinations++;
+	}
+	if (num_fwd_destinations > 1 && num_encap > 0)
+		*extended_dest = true;
+
+	if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+		mlx5_core_warn(dev, "FW does not support extended destination");
+		return -EOPNOTSUPP;
+	}
+	if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+		mlx5_core_warn(dev, "FW does not support more than %d encaps",
+			       1 << fw_log_max_fdb_encap_uplink);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
 static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			    int opmod, int modify_mask,
 			    struct mlx5_flow_table *ft,
 			    unsigned group_id,
 			    struct fs_fte *fte)
 {
-	unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
-		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
 	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+	bool extended_dest = false;
 	struct mlx5_flow_rule *dst;
 	void *in_flow_context, *vlan;
 	void *in_match_value;
+	unsigned int inlen;
+	int dst_cnt_size;
 	void *in_dests;
 	u32 *in;
 	int err;
 
+	if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+		return -EOPNOTSUPP;
+
+	if (!extended_dest)
+		dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+	else
+		dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+	inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -341,11 +440,29 @@
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 
-	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
-	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
-	MLX5_SET(flow_context, in_flow_context, modify_header_id,
-		 fte->action.modify_id);
+	MLX5_SET(flow_context, in_flow_context, flow_tag,
+		 fte->flow_context.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, flow_source,
+		 fte->flow_context.flow_source);
+
+	MLX5_SET(flow_context, in_flow_context, extended_destination,
+		 extended_dest);
+	if (extended_dest) {
+		u32 action;
+
+		action = fte->action.action &
+			~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+		MLX5_SET(flow_context, in_flow_context, action, action);
+	} else {
+		MLX5_SET(flow_context, in_flow_context, action,
+			 fte->action.action);
+		if (fte->action.pkt_reformat)
+			MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+				 fte->action.pkt_reformat->id);
+	}
+	if (fte->action.modify_hdr)
+		MLX5_SET(flow_context, in_flow_context, modify_header_id,
+			 fte->action.modify_hdr->id);
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
@@ -385,10 +502,21 @@
 				id = dst->dest_attr.vport.num;
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id_valid,
-					 dst->dest_attr.vport.vhca_id_valid);
+					 !!(dst->dest_attr.vport.flags &
+					    MLX5_FLOW_DEST_VPORT_VHCA_ID));
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id,
 					 dst->dest_attr.vport.vhca_id);
+				if (extended_dest &&
+				    dst->dest_attr.vport.pkt_reformat) {
+					MLX5_SET(dest_format_struct, in_dests,
+						 packet_reformat,
+						 !!(dst->dest_attr.vport.flags &
+						    MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+					MLX5_SET(extended_dest_format, in_dests,
+						 packet_reformat_id,
+						 dst->dest_attr.vport.pkt_reformat->id);
+				}
 				break;
 			default:
 				id = dst->dest_attr.tir_num;
@@ -397,7 +525,7 @@
 			MLX5_SET(dest_format_struct, in_dests, destination_type,
 				 type);
 			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+			in_dests += dst_cnt_size;
 			list_size++;
 		}
 
@@ -417,8 +545,8 @@
 				continue;
 
 			MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
-				 dst->dest_attr.counter->id);
-			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+				 dst->dest_attr.counter_id);
+			in_dests += dst_cnt_size;
 			list_size++;
 		}
 		if (list_size > max_list_size) {
@@ -436,23 +564,25 @@
 	return err;
 }
 
-static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_create_fte(struct mlx5_flow_root_namespace *ns,
 			       struct mlx5_flow_table *ft,
 			       struct mlx5_flow_group *group,
 			       struct fs_fte *fte)
 {
+	struct mlx5_core_dev *dev = ns->dev;
 	unsigned int group_id = group->id;
 
 	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
-static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_update_fte(struct mlx5_flow_root_namespace *ns,
 			       struct mlx5_flow_table *ft,
-			       unsigned int group_id,
+			       struct mlx5_flow_group *fg,
 			       int modify_mask,
 			       struct fs_fte *fte)
 {
 	int opmod;
+	struct mlx5_core_dev *dev = ns->dev;
 	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
 						flow_table_properties_nic_receive.
 						flow_modify_en);
@@ -460,15 +590,16 @@
 		return -EOPNOTSUPP;
 	opmod = 1;
 
-	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
+	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, fg->id, fte);
 }
 
-static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
 			       struct mlx5_flow_table *ft,
 			       struct fs_fte *fte)
 {
 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
+	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, ft->type);
@@ -482,7 +613,9 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id)
 {
 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
@@ -490,6 +623,7 @@
 
 	MLX5_SET(alloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+	MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
@@ -497,6 +631,11 @@
 	return err;
 }
 
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+	return mlx5_cmd_fc_bulk_alloc(dev, 0, id);
+}
+
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
@@ -531,132 +670,107 @@
 	return 0;
 }
 
-struct mlx5_cmd_fc_bulk {
-	u32 id;
-	int num;
-	int outlen;
-	u32 out[0];
-};
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len)
 {
-	struct mlx5_cmd_fc_bulk *b;
-	int outlen =
-		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-		MLX5_ST_SZ_BYTES(traffic_counter) * num;
-
-	b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
-	if (!b)
-		return NULL;
-
-	b->id = id;
-	b->num = num;
-	b->outlen = outlen;
-
-	return b;
+	return MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len;
 }
 
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out)
 {
-	kfree(b);
-}
-
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
-{
+	int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
 	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
-	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
-	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-	return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
+	MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes)
+static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+					  int reformat_type,
+					  size_t size,
+					  void *reformat_data,
+					  enum mlx5_flow_namespace_type namespace,
+					  struct mlx5_pkt_reformat *pkt_reformat)
 {
-	int index = id - b->id;
-	void *stats;
-
-	if (index < 0 || index >= b->num) {
-		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
-			       id, b->id, b->id + b->num - 1);
-		return;
-	}
-
-	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
-			     flow_statistics[index]);
-	*packets = MLX5_GET64(traffic_counter, stats, packets);
-	*bytes = MLX5_GET64(traffic_counter, stats, octets);
-}
-
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id)
-{
-	int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
-	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
-	void *encap_header_in;
-	void *header;
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
+	void *packet_reformat_context_in;
+	int max_encap_size;
+	void *reformat;
 	int inlen;
 	int err;
 	u32 *in;
 
+	if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+		max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+	else
+		max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
+
 	if (size > max_encap_size) {
 		mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
 			       size, max_encap_size);
 		return -EINVAL;
 	}
 
-	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
 		     GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
-	encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
-	header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
-	inlen = header - (void *)in  + size;
+	packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+						  in, packet_reformat_context);
+	reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+				packet_reformat_context_in,
+				reformat_data);
+	inlen = reformat - (void *)in  + size;
 
 	memset(in, 0, inlen);
-	MLX5_SET(alloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
-	MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
-	MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
-	memcpy(header, encap_header, size);
+	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_data_size, size);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_type, reformat_type);
+	memcpy(reformat, reformat_data, size);
 
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+	pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
+				    out, packet_reformat_id);
 	kfree(in);
 	return err;
 }
 
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+					     struct mlx5_pkt_reformat *pkt_reformat)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
+	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
 
 	memset(in, 0, sizeof(in));
-	MLX5_SET(dealloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
-	MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+		 pkt_reformat->id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id)
+static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+					u8 namespace, u8 num_actions,
+					void *modify_actions,
+					struct mlx5_modify_hdr *modify_hdr)
 {
 	u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
 	int max_actions, actions_size, inlen, err;
+	struct mlx5_core_dev *dev = ns->dev;
 	void *actions_in;
 	u8 table_type;
 	u32 *in;
@@ -667,9 +781,18 @@
 		table_type = FS_FT_FDB;
 		break;
 	case MLX5_FLOW_NAMESPACE_KERNEL:
+	case MLX5_FLOW_NAMESPACE_BYPASS:
 		max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
 		table_type = FS_FT_NIC_RX;
 		break;
+	case MLX5_FLOW_NAMESPACE_EGRESS:
+		max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
+		table_type = FS_FT_NIC_TX;
+		break;
+	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+		max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+		table_type = FS_FT_ESW_INGRESS_ACL;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -698,21 +821,23 @@
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+	modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
 	kfree(in);
 	return err;
 }
 
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+					   struct mlx5_modify_hdr *modify_hdr)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
 	u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
 
 	memset(in, 0, sizeof(in));
 	MLX5_SET(dealloc_modify_header_context_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
-		 modify_header_id);
+		 modify_hdr->id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
@@ -727,6 +852,13 @@
 	.update_fte = mlx5_cmd_update_fte,
 	.delete_fte = mlx5_cmd_delete_fte,
 	.update_root_ft = mlx5_cmd_update_root_ft,
+	.packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc,
+	.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
+	.modify_header_alloc = mlx5_cmd_modify_header_alloc,
+	.modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
+	.set_peer = mlx5_cmd_stub_set_peer,
+	.create_ns = mlx5_cmd_stub_create_ns,
+	.destroy_ns = mlx5_cmd_stub_destroy_ns,
 };
 
 static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -739,9 +871,16 @@
 	.update_fte = mlx5_cmd_stub_update_fte,
 	.delete_fte = mlx5_cmd_stub_delete_fte,
 	.update_root_ft = mlx5_cmd_stub_update_root_ft,
+	.packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc,
+	.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
+	.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
+	.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
+	.set_peer = mlx5_cmd_stub_set_peer,
+	.create_ns = mlx5_cmd_stub_create_ns,
+	.destroy_ns = mlx5_cmd_stub_destroy_ns,
 };
 
-static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
 {
 	return &mlx5_flow_cmds;
 }
@@ -760,8 +899,9 @@
 	case FS_FT_FDB:
 	case FS_FT_SNIFFER_RX:
 	case FS_FT_SNIFFER_TX:
-		return mlx5_fs_cmd_get_fw_cmds();
 	case FS_FT_NIC_TX:
+	case FS_FT_RDMA_RX:
+		return mlx5_fs_cmd_get_fw_cmds();
 	default:
 		return mlx5_fs_cmd_get_stub_cmds();
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 6228ba7..d62de64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h

@@ -36,66 +36,84 @@
 #include "fs_core.h"
 
 struct mlx5_flow_cmds {
-	int (*create_flow_table)(struct mlx5_core_dev *dev,
-				 u16 vport,
-				 enum fs_flow_table_op_mod op_mod,
-				 enum fs_flow_table_type type,
-				 unsigned int level, unsigned int log_size,
-				 struct mlx5_flow_table *next_ft,
-				 unsigned int *table_id, u32 flags);
-	int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+	int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
+				 struct mlx5_flow_table *ft,
+				 unsigned int log_size,
+				 struct mlx5_flow_table *next_ft);
+	int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
 				  struct mlx5_flow_table *ft);
 
-	int (*modify_flow_table)(struct mlx5_core_dev *dev,
+	int (*modify_flow_table)(struct mlx5_flow_root_namespace *ns,
 				 struct mlx5_flow_table *ft,
 				 struct mlx5_flow_table *next_ft);
 
-	int (*create_flow_group)(struct mlx5_core_dev *dev,
+	int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
 				 struct mlx5_flow_table *ft,
 				 u32 *in,
-				 unsigned int *group_id);
+				 struct mlx5_flow_group *fg);
 
-	int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+	int (*destroy_flow_group)(struct mlx5_flow_root_namespace *ns,
 				  struct mlx5_flow_table *ft,
-				  unsigned int group_id);
+				  struct mlx5_flow_group *fg);
 
-	int (*create_fte)(struct mlx5_core_dev *dev,
+	int (*create_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
 			  struct mlx5_flow_group *fg,
 			  struct fs_fte *fte);
 
-	int (*update_fte)(struct mlx5_core_dev *dev,
+	int (*update_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
-			  unsigned int group_id,
+			  struct mlx5_flow_group *fg,
 			  int modify_mask,
 			  struct fs_fte *fte);
 
-	int (*delete_fte)(struct mlx5_core_dev *dev,
+	int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
 			  struct mlx5_flow_table *ft,
 			  struct fs_fte *fte);
 
-	int (*update_root_ft)(struct mlx5_core_dev *dev,
+	int (*update_root_ft)(struct mlx5_flow_root_namespace *ns,
 			      struct mlx5_flow_table *ft,
 			      u32 underlay_qpn,
 			      bool disconnect);
+
+	int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
+				     int reformat_type,
+				     size_t size,
+				     void *reformat_data,
+				     enum mlx5_flow_namespace_type namespace,
+				     struct mlx5_pkt_reformat *pkt_reformat);
+
+	void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns,
+					struct mlx5_pkt_reformat *pkt_reformat);
+
+	int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns,
+				   u8 namespace, u8 num_actions,
+				   void *modify_actions,
+				   struct mlx5_modify_hdr *modify_hdr);
+
+	void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns,
+				      struct mlx5_modify_hdr *modify_hdr);
+
+	int (*set_peer)(struct mlx5_flow_root_namespace *ns,
+			struct mlx5_flow_root_namespace *peer_ns);
+
+	int (*create_ns)(struct mlx5_flow_root_namespace *ns);
+	int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
 
-struct mlx5_cmd_fc_bulk;
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes);
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len);
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out);
 
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 37d114c..791e14a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
@@ -40,6 +41,7 @@
 #include "diag/fs_tracepoint.h"
 #include "accel/ipsec.h"
 #include "fpga/ipsec.h"
+#include "eswitch.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -58,7 +60,8 @@
 	ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
 		 __VA_ARGS__)\
 
-#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
+#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE,	\
+	.def_miss_action = def_miss_act,\
 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
 }
@@ -76,6 +79,14 @@
 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
 
+#define FS_CHAINING_CAPS_EGRESS                                                \
+	FS_REQUIRED_CAPS(                                                      \
+		FS_CAP(flow_table_properties_nic_transmit.flow_modify_en),     \
+		FS_CAP(flow_table_properties_nic_transmit.modify_root),        \
+		FS_CAP(flow_table_properties_nic_transmit                      \
+			       .identified_miss_table_mode),                   \
+		FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
@@ -121,33 +132,73 @@
 	int num_leaf_prios;
 	int prio;
 	int num_levels;
+	enum mlx5_flow_table_miss_action def_miss_action;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
 	.ar_size = 7,
+	  .children = (struct init_tree_node[]){
+		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+						    BY_PASS_PRIO_NUM_LEVELS))),
+		  ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+						    LAG_PRIO_NUM_LEVELS))),
+		  ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
+						    OFFLOADS_MAX_FT))),
+		  ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+						    ETHTOOL_PRIO_NUM_LEVELS))),
+		  ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
+						    KERNEL_NIC_TC_NUM_LEVELS),
+				  ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
+						    KERNEL_NIC_PRIO_NUM_LEVELS))),
+		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS,
+						    LEFTOVERS_NUM_LEVELS))),
+		  ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				  ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS,
+						    ANCHOR_NUM_LEVELS))),
+	}
+};
+
+static struct init_tree_node egress_root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 1,
 	.children = (struct init_tree_node[]) {
-		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_CHAINING_CAPS,
-			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+			 FS_CHAINING_CAPS_EGRESS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
-		ADD_PRIO(0, LAG_MIN_LEVEL, 0,
+	}
+};
+
+#define RDMA_RX_BYPASS_PRIO 0
+#define RDMA_RX_KERNEL_PRIO 1
+static struct init_tree_node rdma_rx_root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 2,
+	.children = (struct init_tree_node[]) {
+		[RDMA_RX_BYPASS_PRIO] =
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
 			 FS_CHAINING_CAPS,
-			 ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
-						  LAG_PRIO_NUM_LEVELS))),
-		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
-			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
-		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
+						  BY_PASS_PRIO_NUM_LEVELS))),
+		[RDMA_RX_KERNEL_PRIO] =
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
 			 FS_CHAINING_CAPS,
-			 ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
-						  ETHTOOL_PRIO_NUM_LEVELS))),
-		ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
-			 ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS),
-				ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
-						  KERNEL_NIC_PRIO_NUM_LEVELS))),
-		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
-			 FS_CHAINING_CAPS,
-			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
-		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
-			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
+				ADD_MULTIPLE_PRIO(1, 1))),
 	}
 };
 
@@ -242,10 +293,11 @@
 	}
 }
 
-static void down_write_ref_node(struct fs_node *node)
+static void down_write_ref_node(struct fs_node *node, bool locked)
 {
 	if (node) {
-		down_write(&node->lock);
+		if (!locked)
+			down_write(&node->lock);
 		refcount_inc(&node->refcount);
 	}
 }
@@ -256,13 +308,14 @@
 	up_read(&node->lock);
 }
 
-static void up_write_ref_node(struct fs_node *node)
+static void up_write_ref_node(struct fs_node *node, bool locked)
 {
 	refcount_dec(&node->refcount);
-	up_write(&node->lock);
+	if (!locked)
+		up_write(&node->lock);
 }
 
-static void tree_put_node(struct fs_node *node)
+static void tree_put_node(struct fs_node *node, bool locked)
 {
 	struct fs_node *parent_node = node->parent;
 
@@ -273,27 +326,27 @@
 			/* Only root namespace doesn't have parent and we just
 			 * need to free its node.
 			 */
-			down_write_ref_node(parent_node);
+			down_write_ref_node(parent_node, locked);
 			list_del_init(&node->list);
 			if (node->del_sw_func)
 				node->del_sw_func(node);
-			up_write_ref_node(parent_node);
+			up_write_ref_node(parent_node, locked);
 		} else {
 			kfree(node);
 		}
 		node = NULL;
 	}
 	if (!node && parent_node)
-		tree_put_node(parent_node);
+		tree_put_node(parent_node, locked);
 }
 
-static int tree_remove_node(struct fs_node *node)
+static int tree_remove_node(struct fs_node *node, bool locked)
 {
 	if (refcount_read(&node->refcount) > 1) {
 		refcount_dec(&node->refcount);
 		return -EEXIST;
 	}
-	tree_put_node(node);
+	tree_put_node(node, locked);
 	return 0;
 }
 
@@ -377,9 +430,10 @@
 	fs_get_obj(ft, node);
 	dev = get_dev(&ft->node);
 	root = find_root(&ft->node);
+	trace_mlx5_fs_del_ft(ft);
 
 	if (node->active) {
-		err = root->cmds->destroy_flow_table(dev, ft);
+		err = root->cmds->destroy_flow_table(root, ft);
 		if (err)
 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
 	}
@@ -398,22 +452,34 @@
 	kfree(ft);
 }
 
-static void del_sw_hw_rule(struct fs_node *node)
+static void modify_fte(struct fs_fte *fte)
 {
 	struct mlx5_flow_root_namespace *root;
-	struct mlx5_flow_rule *rule;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
-	struct fs_fte *fte;
-	int modify_mask;
-	struct mlx5_core_dev *dev = get_dev(node);
+	struct mlx5_core_dev *dev;
 	int err;
-	bool update_fte = false;
+
+	fs_get_obj(fg, fte->node.parent);
+	fs_get_obj(ft, fg->node.parent);
+	dev = get_dev(&fte->node);
+
+	root = find_root(&ft->node);
+	err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
+	if (err)
+		mlx5_core_warn(dev,
+			       "%s can't del rule fg id=%d fte_index=%d\n",
+			       __func__, fg->id, fte->index);
+	fte->modify_mask = 0;
+}
+
+static void del_sw_hw_rule(struct fs_node *node)
+{
+	struct mlx5_flow_rule *rule;
+	struct fs_fte *fte;
 
 	fs_get_obj(rule, node);
 	fs_get_obj(fte, rule->node.parent);
-	fs_get_obj(fg, fte->node.parent);
-	fs_get_obj(ft, fg->node.parent);
 	trace_mlx5_fs_del_rule(rule);
 	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		mutex_lock(&rule->dest_attr.ft->lock);
@@ -423,27 +489,19 @@
 
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
 	    --fte->dests_size) {
-		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
-			      BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+		fte->modify_mask |=
+			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
 		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
-		update_fte = true;
 		goto out;
 	}
 
 	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
-		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
-		update_fte = true;
+		fte->modify_mask |=
+			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
 	}
 out:
-	root = find_root(&ft->node);
-	if (update_fte && fte->dests_size) {
-		err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
-		if (err)
-			mlx5_core_warn(dev,
-				       "%s can't del rule fg id=%d fte_index=%d\n",
-				       __func__, fg->id, fte->index);
-	}
 	kfree(rule);
 }
 
@@ -464,11 +522,12 @@
 	dev = get_dev(&ft->node);
 	root = find_root(&ft->node);
 	if (node->active) {
-		err = root->cmds->delete_fte(dev, ft, fte);
+		err = root->cmds->delete_fte(root, ft, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "flow steering can't delete fte in index %d of flow group id %d\n",
 				       fte->index, fg->id);
+		node->active = 0;
 	}
 }
 
@@ -503,7 +562,7 @@
 	trace_mlx5_fs_del_fg(fg);
 
 	root = find_root(&ft->node);
-	if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
+	if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg))
 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
 			       fg->id, ft->id);
 }
@@ -520,7 +579,7 @@
 
 	rhashtable_destroy(&fg->ftes_hash);
 	ida_destroy(&fg->fte_allocator);
-	if (ft->autogroup.active)
+	if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
 		ft->autogroup.num_groups--;
 	err = rhltable_remove(&ft->fgs_hash,
 			      &fg->hash,
@@ -555,7 +614,7 @@
 }
 
 static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
-				u32 *match_value,
+				const struct mlx5_flow_spec *spec,
 				struct mlx5_flow_act *flow_act)
 {
 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -565,11 +624,12 @@
 	if (!fte)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(fte->val, match_value, sizeof(fte->val));
+	memcpy(fte->val, &spec->match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
 	fte->action = *flow_act;
+	fte->flow_context = spec->flow_context;
 
-	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+	tree_init_node(&fte->node, NULL, del_sw_fte);
 
 	return fte;
 }
@@ -583,7 +643,7 @@
 
 static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
 						u8 match_criteria_enable,
-						void *match_criteria,
+						const void *match_criteria,
 						int start_index,
 						int end_index)
 {
@@ -598,7 +658,8 @@
 	if (ret) {
 		kmem_cache_free(steering->fgs_cache, fg);
 		return ERR_PTR(ret);
-}
+	}
+
 	ida_init(&fg->fte_allocator);
 	fg->mask.match_criteria_enable = match_criteria_enable;
 	memcpy(&fg->mask.match_criteria, match_criteria,
@@ -612,7 +673,7 @@
 
 static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
 						       u8 match_criteria_enable,
-						       void *match_criteria,
+						       const void *match_criteria,
 						       int start_index,
 						       int end_index,
 						       struct list_head *prev)
@@ -694,7 +755,7 @@
 	struct fs_node *iter = list_entry(start, struct fs_node, list);
 	struct mlx5_flow_table *ft = NULL;
 
-	if (!root)
+	if (!root || root->type == FS_TYPE_PRIO_CHAINS)
 		return NULL;
 
 	list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -753,7 +814,7 @@
 
 	fs_for_each_ft(iter, prio) {
 		i++;
-		err = root->cmds->modify_flow_table(dev, iter, ft);
+		err = root->cmds->modify_flow_table(root, iter, ft);
 		if (err) {
 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
 				       iter->id);
@@ -789,7 +850,7 @@
 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
 	struct mlx5_ft_underlay_qp *uqp;
 	int min_level = INT_MAX;
-	int err;
+	int err = 0;
 	u32 qpn;
 
 	if (root->root_ft)
@@ -801,11 +862,11 @@
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
+		err = root->cmds->update_root_ft(root, ft, qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = root->cmds->update_root_ft(root->dev, ft,
+			err = root->cmds->update_root_ft(root, ft,
 							 qpn, false);
 			if (err)
 				break;
@@ -835,15 +896,15 @@
 	fs_get_obj(fte, rule->node.parent);
 	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
-	down_write_ref_node(&fte->node);
+	down_write_ref_node(&fte->node, false);
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
 	root = find_root(&ft->node);
-	err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+	err = root->cmds->update_fte(root, ft, fg,
 				     modify_mask, fte);
-	up_write_ref_node(&fte->node);
+	up_write_ref_node(&fte->node, false);
 
 	return err;
 }
@@ -983,9 +1044,8 @@
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
-	err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
-					    ft->type, ft->level, log_table_sz,
-					    next_ft, &ft->id, ft->flags);
+	ft->def_miss_action = ns->def_miss_action;
+	err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
 	if (err)
 		goto free_ft;
 
@@ -993,15 +1053,16 @@
 	if (err)
 		goto destroy_ft;
 	ft->node.active = true;
-	down_write_ref_node(&fs_prio->node);
+	down_write_ref_node(&fs_prio->node, false);
 	tree_add_node(&ft->node, &fs_prio->node);
 	list_add_flow_table(ft, fs_prio);
 	fs_prio->num_ft++;
-	up_write_ref_node(&fs_prio->node);
+	up_write_ref_node(&fs_prio->node, false);
 	mutex_unlock(&root->chain_lock);
+	trace_mlx5_fs_add_ft(ft);
 	return ft;
 destroy_ft:
-	root->cmds->destroy_flow_table(root->dev, ft);
+	root->cmds->destroy_flow_table(root, ft);
 free_ft:
 	kfree(ft);
 unlock_root:
@@ -1065,6 +1126,8 @@
 
 	ft->autogroup.active = true;
 	ft->autogroup.required_groups = max_num_groups;
+	/* We save place for flow groups in addition to max types */
+	ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
 
 	return ft;
 }
@@ -1083,24 +1146,23 @@
 				   start_flow_index);
 	int end_index = MLX5_GET(create_flow_group_in, fg_in,
 				 end_flow_index);
-	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	struct mlx5_flow_group *fg;
 	int err;
 
 	if (ft->autogroup.active)
 		return ERR_PTR(-EPERM);
 
-	down_write_ref_node(&ft->node);
+	down_write_ref_node(&ft->node, false);
 	fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
 				     start_index, end_index,
 				     ft->node.children.prev);
-	up_write_ref_node(&ft->node);
+	up_write_ref_node(&ft->node, false);
 	if (IS_ERR(fg))
 		return fg;
 
-	err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
+	err = root->cmds->create_flow_group(root, ft, fg_in, fg);
 	if (err) {
-		tree_put_node(&fg->node);
+		tree_put_node(&fg->node, false);
 		return ERR_PTR(err);
 	}
 	trace_mlx5_fs_add_fg(fg);
@@ -1238,11 +1300,9 @@
 	fs_get_obj(ft, fg->node.parent);
 	root = find_root(&fg->node);
 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
-		err = root->cmds->create_fte(get_dev(&ft->node),
-					     ft, fg, fte);
+		err = root->cmds->create_fte(root, ft, fg, fte);
 	else
-		err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
-						     modify_mask, fte);
+		err = root->cmds->update_fte(root, ft, fg, modify_mask, fte);
 	if (err)
 		goto free_handle;
 
@@ -1259,7 +1319,7 @@
 }
 
 static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft,
-						     struct mlx5_flow_spec *spec)
+						     const struct mlx5_flow_spec *spec)
 {
 	struct list_head *prev = &ft->node.children;
 	struct mlx5_flow_group *fg;
@@ -1270,8 +1330,7 @@
 		return ERR_PTR(-ENOENT);
 
 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
-		/* We save place for flow groups in addition to max types */
-		group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
+		group_size = ft->autogroup.group_size;
 
 	/*  ft->max_fte == ft->autogroup.max_types */
 	if (group_size == 0)
@@ -1298,7 +1357,8 @@
 	if (IS_ERR(fg))
 		goto out;
 
-	ft->autogroup.num_groups++;
+	if (group_size == ft->autogroup.group_size)
+		ft->autogroup.num_groups++;
 
 out:
 	return fg;
@@ -1308,7 +1368,6 @@
 				  struct mlx5_flow_group *fg)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
-	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	void *match_criteria_addr;
 	u8 src_esw_owner_mask_on;
@@ -1338,7 +1397,7 @@
 	memcpy(match_criteria_addr, fg->mask.match_criteria,
 	       sizeof(fg->mask.match_criteria));
 
-	err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
+	err = root->cmds->create_flow_group(root, ft, in, fg);
 	if (!err) {
 		fg->node.active = true;
 		trace_mlx5_fs_add_fg(fg);
@@ -1353,7 +1412,13 @@
 {
 	if (d1->type == d2->type) {
 		if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
-		     d1->vport.num == d2->vport.num) ||
+		     d1->vport.num == d2->vport.num &&
+		     d1->vport.flags == d2->vport.flags &&
+		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
+		      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
+		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+		      (d1->vport.pkt_reformat->id ==
+		       d2->vport.pkt_reformat->id) : true)) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 		     d1->ft == d2->ft) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1388,7 +1453,7 @@
 		return false;
 
 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
-			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+			     MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
@@ -1400,7 +1465,9 @@
 	return false;
 }
 
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+				  const struct mlx5_flow_context *flow_context,
+				  const struct mlx5_flow_act *flow_act)
 {
 	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
@@ -1408,12 +1475,12 @@
 		return -EEXIST;
 	}
 
-	if (flow_act->has_flow_tag &&
-	    fte->action.flow_tag != flow_act->flow_tag) {
+	if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+	    fte->flow_context.flow_tag != flow_context->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->action.flow_tag,
-			       flow_act->flow_tag);
+			       fte->flow_context.flow_tag,
+			       flow_context->flow_tag);
 		return -EEXIST;
 	}
 
@@ -1421,7 +1488,7 @@
 }
 
 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
-					    u32 *match_value,
+					    const struct mlx5_flow_spec *spec,
 					    struct mlx5_flow_act *flow_act,
 					    struct mlx5_flow_destination *dest,
 					    int dest_num,
@@ -1432,7 +1499,7 @@
 	int i;
 	int ret;
 
-	ret = check_conflicting_ftes(fte, flow_act);
+	ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -1455,29 +1522,8 @@
 	return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+static bool counter_is_valid(u32 action)
 {
-	struct mlx5_flow_rule *dst;
-	struct fs_fte *fte;
-
-	fs_get_obj(fte, handle->rule[0]->node.parent);
-
-	fs_for_each_dst(dst, fte) {
-		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
-			return dst->dest_attr.counter;
-	}
-
-	return NULL;
-}
-
-static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
-{
-	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
-		return !counter;
-
-	if (!counter)
-		return false;
-
 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
 }
@@ -1487,7 +1533,7 @@
 			  struct mlx5_flow_table *ft)
 {
 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
-		return counter_is_valid(dest->counter, action);
+		return counter_is_valid(action);
 
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return true;
@@ -1515,10 +1561,10 @@
 		struct match_list *iter, *match_tmp;
 
 		list_del(&head->first.list);
-		tree_put_node(&head->first.g->node);
+		tree_put_node(&head->first.g->node, false);
 		list_for_each_entry_safe(iter, match_tmp, &head->list,
 					 list) {
-			tree_put_node(&iter->g->node);
+			tree_put_node(&iter->g->node, false);
 			list_del(&iter->list);
 			kfree(iter);
 		}
@@ -1527,7 +1573,7 @@
 
 static int build_match_list(struct match_list_head *match_head,
 			    struct mlx5_flow_table *ft,
-			    struct mlx5_flow_spec *spec)
+			    const struct mlx5_flow_spec *spec)
 {
 	struct rhlist_head *tmp, *list;
 	struct mlx5_flow_group *g;
@@ -1580,7 +1626,7 @@
 
 static struct fs_fte *
 lookup_fte_locked(struct mlx5_flow_group *g,
-		  u32 *match_value,
+		  const u32 *match_value,
 		  bool take_write)
 {
 	struct fs_fte *fte_tmp;
@@ -1595,11 +1641,16 @@
 		fte_tmp = NULL;
 		goto out;
 	}
+	if (!fte_tmp->node.active) {
+		tree_put_node(&fte_tmp->node, false);
+		fte_tmp = NULL;
+		goto out;
+	}
 
 	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
 out:
 	if (take_write)
-		up_write_ref_node(&g->node);
+		up_write_ref_node(&g->node, false);
 	else
 		up_read_ref_node(&g->node);
 	return fte_tmp;
@@ -1608,7 +1659,7 @@
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
-		       struct mlx5_flow_spec *spec,
+		       const struct mlx5_flow_spec *spec,
 		       struct mlx5_flow_act *flow_act,
 		       struct mlx5_flow_destination *dest,
 		       int dest_num,
@@ -1623,12 +1674,14 @@
 	u64  version;
 	int err;
 
-	fte = alloc_fte(ft, spec->match_value, flow_act);
+	fte = alloc_fte(ft, spec, flow_act);
 	if (IS_ERR(fte))
 		return  ERR_PTR(-ENOMEM);
 
 search_again_locked:
 	version = matched_fgs_get_version(match_head);
+	if (flow_act->flags & FLOW_ACT_NO_APPEND)
+		goto skip_search;
 	/* Try to find a fg that already contains a matching fte */
 	list_for_each_entry(iter, match_head, list) {
 		struct fs_fte *fte_tmp;
@@ -1637,14 +1690,18 @@
 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
 		if (!fte_tmp)
 			continue;
-		rule = add_rule_fg(g, spec->match_value,
-				   flow_act, dest, dest_num, fte_tmp);
-		up_write_ref_node(&fte_tmp->node);
-		tree_put_node(&fte_tmp->node);
+		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
+		up_write_ref_node(&fte_tmp->node, false);
+		tree_put_node(&fte_tmp->node, false);
 		kmem_cache_free(steering->ftes_cache, fte);
 		return rule;
 	}
 
+skip_search:
+	/* No group with matching fte found, or we skipped the search.
+	 * Try to add a new fte to any matching fg.
+	 */
+
 	/* Check the ft version, for case that new flow group
 	 * was added while the fgs weren't locked
 	 */
@@ -1671,7 +1728,7 @@
 
 		err = insert_fte(g, fte);
 		if (err) {
-			up_write_ref_node(&g->node);
+			up_write_ref_node(&g->node, false);
 			if (err == -ENOSPC)
 				continue;
 			kmem_cache_free(steering->ftes_cache, fte);
@@ -1679,11 +1736,10 @@
 		}
 
 		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
-		up_write_ref_node(&g->node);
-		rule = add_rule_fg(g, spec->match_value,
-				   flow_act, dest, dest_num, fte);
-		up_write_ref_node(&fte->node);
-		tree_put_node(&fte->node);
+		up_write_ref_node(&g->node, false);
+		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+		up_write_ref_node(&fte->node, false);
+		tree_put_node(&fte->node, false);
 		return rule;
 	}
 	rule = ERR_PTR(-ENOENT);
@@ -1694,7 +1750,7 @@
 
 static struct mlx5_flow_handle *
 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
-		     struct mlx5_flow_spec *spec,
+		     const struct mlx5_flow_spec *spec,
 		     struct mlx5_flow_act *flow_act,
 		     struct mlx5_flow_destination *dest,
 		     int dest_num)
@@ -1725,7 +1781,7 @@
 	err = build_match_list(&match_head, ft, spec);
 	if (err) {
 		if (take_write)
-			up_write_ref_node(&ft->node);
+			up_write_ref_node(&ft->node, false);
 		else
 			up_read_ref_node(&ft->node);
 		return ERR_PTR(err);
@@ -1740,7 +1796,7 @@
 	if (!IS_ERR(rule) ||
 	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
 		if (take_write)
-			up_write_ref_node(&ft->node);
+			up_write_ref_node(&ft->node, false);
 		return rule;
 	}
 
@@ -1756,18 +1812,18 @@
 	g = alloc_auto_flow_group(ft, spec);
 	if (IS_ERR(g)) {
 		rule = ERR_CAST(g);
-		up_write_ref_node(&ft->node);
+		up_write_ref_node(&ft->node, false);
 		return rule;
 	}
 
 	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
-	up_write_ref_node(&ft->node);
+	up_write_ref_node(&ft->node, false);
 
 	err = create_auto_flow_group(ft, g);
 	if (err)
 		goto err_release_fg;
 
-	fte = alloc_fte(ft, spec->match_value, flow_act);
+	fte = alloc_fte(ft, spec, flow_act);
 	if (IS_ERR(fte)) {
 		err = PTR_ERR(fte);
 		goto err_release_fg;
@@ -1780,17 +1836,16 @@
 	}
 
 	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
-	up_write_ref_node(&g->node);
-	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
-			   dest_num, fte);
-	up_write_ref_node(&fte->node);
-	tree_put_node(&fte->node);
-	tree_put_node(&g->node);
+	up_write_ref_node(&g->node, false);
+	rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
+	up_write_ref_node(&fte->node, false);
+	tree_put_node(&fte->node, false);
+	tree_put_node(&g->node, false);
 	return rule;
 
 err_release_fg:
-	up_write_ref_node(&g->node);
-	tree_put_node(&g->node);
+	up_write_ref_node(&g->node, false);
+	tree_put_node(&g->node, false);
 	return ERR_PTR(err);
 }
 
@@ -1802,7 +1857,7 @@
 
 struct mlx5_flow_handle *
 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
-		    struct mlx5_flow_spec *spec,
+		    const struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
 		    int num_dest)
@@ -1853,10 +1908,33 @@
 
 void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
 {
+	struct fs_fte *fte;
 	int i;
 
+	/* In order to consolidate the HW changes we lock the FTE for other
+	 * changes, and increase its refcount, in order not to perform the
+	 * "del" functions of the FTE. Will handle them here.
+	 * The removal of the rules is done under locked FTE.
+	 * After removing all the handle's rules, if there are remaining
+	 * rules, it means we just need to modify the FTE in FW, and
+	 * unlock/decrease the refcount we increased before.
+	 * Otherwise, it means the FTE should be deleted. First delete the
+	 * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
+	 * the FTE, which will handle the last decrease of the refcount, as
+	 * well as required handling of its parent.
+	 */
+	fs_get_obj(fte, handle->rule[0]->node.parent);
+	down_write_ref_node(&fte->node, false);
 	for (i = handle->num_rules - 1; i >= 0; i--)
-		tree_remove_node(&handle->rule[i]->node);
+		tree_remove_node(&handle->rule[i]->node, true);
+	if (fte->modify_mask && fte->dests_size) {
+		modify_fte(fte);
+		up_write_ref_node(&fte->node, false);
+	} else {
+		del_hw_fte(&fte->node);
+		up_write(&fte->node.lock);
+		tree_put_node(&fte->node, false);
+	}
 	kfree(handle);
 }
 EXPORT_SYMBOL(mlx5_del_flow_rules);
@@ -1893,12 +1971,12 @@
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = root->cmds->update_root_ft(root->dev, new_root_ft,
+		err = root->cmds->update_root_ft(root, new_root_ft,
 						 qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = root->cmds->update_root_ft(root->dev,
+			err = root->cmds->update_root_ft(root,
 							 new_root_ft, qpn,
 							 false);
 			if (err)
@@ -1959,7 +2037,7 @@
 		mutex_unlock(&root->chain_lock);
 		return err;
 	}
-	if (tree_remove_node(&ft->node))
+	if (tree_remove_node(&ft->node, false))
 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
 			       ft->id);
 	mutex_unlock(&root->chain_lock);
@@ -1970,17 +2048,29 @@
 
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 {
-	if (tree_remove_node(&fg->node))
+	if (tree_remove_node(&fg->node, false))
 		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
 			       fg->id);
 }
 
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+						int n)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
+	if (!steering || !steering->fdb_sub_ns)
+		return NULL;
+
+	return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 						    enum mlx5_flow_namespace_type type)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 	struct mlx5_flow_root_namespace *root_ns;
-	int prio;
+	int prio = 0;
 	struct fs_prio *fs_prio;
 	struct mlx5_flow_namespace *ns;
 
@@ -1988,40 +2078,35 @@
 		return NULL;
 
 	switch (type) {
-	case MLX5_FLOW_NAMESPACE_BYPASS:
-	case MLX5_FLOW_NAMESPACE_LAG:
-	case MLX5_FLOW_NAMESPACE_OFFLOADS:
-	case MLX5_FLOW_NAMESPACE_ETHTOOL:
-	case MLX5_FLOW_NAMESPACE_KERNEL:
-	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
-	case MLX5_FLOW_NAMESPACE_ANCHOR:
-		prio = type;
-		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
 		if (steering->fdb_root_ns)
 			return &steering->fdb_root_ns->ns;
-		else
-			return NULL;
+		return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
 		if (steering->sniffer_rx_root_ns)
 			return &steering->sniffer_rx_root_ns->ns;
-		else
-			return NULL;
+		return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
 		if (steering->sniffer_tx_root_ns)
 			return &steering->sniffer_tx_root_ns->ns;
-		else
-			return NULL;
-	case MLX5_FLOW_NAMESPACE_EGRESS:
-		if (steering->egress_root_ns)
-			return &steering->egress_root_ns->ns;
-		else
-			return NULL;
-	default:
 		return NULL;
+	default:
+		break;
 	}
 
-	root_ns = steering->root_ns;
+	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+		root_ns = steering->egress_root_ns;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_BYPASS_PRIO;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_KERNEL_PRIO;
+	} else { /* Must be NIC RX */
+		root_ns = steering->root_ns;
+		prio = type;
+	}
+
 	if (!root_ns)
 		return NULL;
 
@@ -2043,7 +2128,7 @@
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 
-	if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+	if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
 		return NULL;
 
 	switch (type) {
@@ -2064,8 +2149,10 @@
 	}
 }
 
-static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
-				      unsigned int prio, int num_levels)
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+				       unsigned int prio,
+				       int num_levels,
+				       enum fs_node_type type)
 {
 	struct fs_prio *fs_prio;
 
@@ -2073,7 +2160,7 @@
 	if (!fs_prio)
 		return ERR_PTR(-ENOMEM);
 
-	fs_prio->node.type = FS_TYPE_PRIO;
+	fs_prio->node.type = type;
 	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->num_levels = num_levels;
@@ -2083,6 +2170,19 @@
 	return fs_prio;
 }
 
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+					      unsigned int prio,
+					      int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+				      unsigned int prio, int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
 						     *ns)
 {
@@ -2091,7 +2191,8 @@
 	return ns;
 }
 
-static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
+static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
+						       int def_miss_act)
 {
 	struct mlx5_flow_namespace	*ns;
 
@@ -2100,6 +2201,7 @@
 		return ERR_PTR(-ENOMEM);
 
 	fs_init_namespace(ns);
+	ns->def_miss_action = def_miss_act;
 	tree_init_node(&ns->node, NULL, del_sw_ns);
 	tree_add_node(&ns->node, &prio->node);
 	list_add_tail(&ns->node.list, &prio->node.children);
@@ -2166,7 +2268,7 @@
 		base = &fs_prio->node;
 	} else if (init_node->type == FS_TYPE_NAMESPACE) {
 		fs_get_obj(fs_prio, fs_parent_node);
-		fs_ns = fs_create_namespace(fs_prio);
+		fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action);
 		if (IS_ERR(fs_ns))
 			return PTR_ERR(fs_ns);
 		base = &fs_ns->node;
@@ -2220,7 +2322,7 @@
 		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
 
 	/* Create the root namespace */
-	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
+	root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
 	if (!root_ns)
 		return NULL;
 
@@ -2338,8 +2440,8 @@
 		tree_get_node(node);
 		list_for_each_entry_safe(iter, temp, &node->children, list)
 			clean_tree(iter);
-		tree_put_node(node);
-		tree_remove_node(node);
+		tree_put_node(node, false);
+		tree_remove_node(node, false);
 	}
 }
 
@@ -2359,10 +2461,11 @@
 	if (!steering->esw_egress_root_ns)
 		return;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
 
 	kfree(steering->esw_egress_root_ns);
+	steering->esw_egress_root_ns = NULL;
 }
 
 static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
@@ -2373,10 +2476,11 @@
 	if (!steering->esw_ingress_root_ns)
 		return;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
 
 	kfree(steering->esw_ingress_root_ns);
+	steering->esw_ingress_root_ns = NULL;
 }
 
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
@@ -2387,8 +2491,12 @@
 	cleanup_egress_acls_root_ns(dev);
 	cleanup_ingress_acls_root_ns(dev);
 	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
+	cleanup_root_ns(steering->rdma_rx_root_ns);
 	cleanup_root_ns(steering->egress_root_ns);
 	mlx5_cleanup_fc_stats(dev);
 	kmem_cache_destroy(steering->ftes_cache);
@@ -2406,11 +2514,7 @@
 
 	/* Create single prio */
 	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
-	if (IS_ERR(prio)) {
-		cleanup_root_ns(steering->sniffer_tx_root_ns);
-		return PTR_ERR(prio);
-	}
-	return 0;
+	return PTR_ERR_OR_ZERO(prio);
 }
 
 static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
@@ -2423,36 +2527,99 @@
 
 	/* Create single prio */
 	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
-	if (IS_ERR(prio)) {
-		cleanup_root_ns(steering->sniffer_rx_root_ns);
-		return PTR_ERR(prio);
-	}
-	return 0;
+	return PTR_ERR_OR_ZERO(prio);
 }
 
+static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+	int err;
+
+	steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
+	if (!steering->rdma_rx_root_ns)
+		return -ENOMEM;
+
+	err = init_root_tree(steering, &rdma_rx_root_fs,
+			     &steering->rdma_rx_root_ns->ns.node);
+	if (err)
+		goto out_err;
+
+	set_prio_attrs(steering->rdma_rx_root_ns);
+
+	return 0;
+
+out_err:
+	cleanup_root_ns(steering->rdma_rx_root_ns);
+	steering->rdma_rx_root_ns = NULL;
+	return err;
+}
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	struct mlx5_flow_namespace *ns;
+	struct fs_prio *maj_prio;
+	struct fs_prio *min_prio;
+	int levels;
+	int chain;
+	int prio;
+	int err;
 
 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
 	if (!steering->fdb_root_ns)
 		return -ENOMEM;
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
-	if (IS_ERR(prio))
-		goto out_err;
+	steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
+				       (FDB_MAX_CHAIN + 1), GFP_KERNEL);
+	if (!steering->fdb_sub_ns)
+		return -ENOMEM;
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
-	if (IS_ERR(prio))
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
+				  1);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
 		goto out_err;
+	}
+
+	levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
+	maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
+					  FDB_FAST_PATH,
+					  levels);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
+		goto out_err;
+	}
+
+	for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
+		ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
+		if (IS_ERR(ns)) {
+			err = PTR_ERR(ns);
+			goto out_err;
+		}
+
+		for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
+			min_prio = fs_create_prio(ns, prio, 2);
+			if (IS_ERR(min_prio)) {
+				err = PTR_ERR(min_prio);
+				goto out_err;
+			}
+		}
+
+		steering->fdb_sub_ns[chain] = ns;
+	}
+
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
+		goto out_err;
+	}
 
 	set_prio_attrs(steering->fdb_root_ns);
 	return 0;
 
 out_err:
 	cleanup_root_ns(steering->fdb_root_ns);
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	steering->fdb_root_ns = NULL;
-	return PTR_ERR(prio);
+	return err;
 }
 
 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
@@ -2484,16 +2651,18 @@
 static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
-	steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
-					       sizeof(*steering->esw_egress_root_ns),
-					       GFP_KERNEL);
+	steering->esw_egress_root_ns =
+			kcalloc(total_vports,
+				sizeof(*steering->esw_egress_root_ns),
+				GFP_KERNEL);
 	if (!steering->esw_egress_root_ns)
 		return -ENOMEM;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+	for (i = 0; i < total_vports; i++) {
 		err = init_egress_acl_root_ns(steering, i);
 		if (err)
 			goto cleanup_root_ns;
@@ -2505,22 +2674,25 @@
 	for (i--; i >= 0; i--)
 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
 	kfree(steering->esw_egress_root_ns);
+	steering->esw_egress_root_ns = NULL;
 	return err;
 }
 
 static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
-	steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
-						sizeof(*steering->esw_ingress_root_ns),
-						GFP_KERNEL);
+	steering->esw_ingress_root_ns =
+			kcalloc(total_vports,
+				sizeof(*steering->esw_ingress_root_ns),
+				GFP_KERNEL);
 	if (!steering->esw_ingress_root_ns)
 		return -ENOMEM;
 
-	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+	for (i = 0; i < total_vports; i++) {
 		err = init_ingress_acl_root_ns(steering, i);
 		if (err)
 			goto cleanup_root_ns;
@@ -2532,21 +2704,29 @@
 	for (i--; i >= 0; i--)
 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
 	kfree(steering->esw_ingress_root_ns);
+	steering->esw_ingress_root_ns = NULL;
 	return err;
 }
 
 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	int err;
 
 	steering->egress_root_ns = create_root_ns(steering,
 						  FS_FT_NIC_TX);
 	if (!steering->egress_root_ns)
 		return -ENOMEM;
 
-	/* create 1 prio*/
-	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
-	return PTR_ERR_OR_ZERO(prio);
+	err = init_root_tree(steering, &egress_root_fs,
+			     &steering->egress_root_ns->ns.node);
+	if (err)
+		goto cleanup;
+	set_prio_attrs(steering->egress_root_ns);
+	return 0;
+cleanup:
+	cleanup_root_ns(steering->egress_root_ns);
+	steering->egress_root_ns = NULL;
+	return err;
 }
 
 int mlx5_init_fs(struct mlx5_core_dev *dev)
@@ -2614,7 +2794,14 @@
 			goto err;
 	}
 
-	if (MLX5_IPSEC_DEV(dev)) {
+	if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
+	    MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
+		err = init_rdma_rx_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
+	if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
 		err = init_egress_root_ns(steering);
 		if (err)
 			goto err;
@@ -2643,7 +2830,7 @@
 		goto update_ft_fail;
 	}
 
-	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
 					 false);
 	if (err) {
 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
@@ -2687,7 +2874,7 @@
 		goto out;
 	}
 
-	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
 					 true);
 	if (err)
 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
@@ -2704,3 +2891,160 @@
 	return err;
 }
 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
+
+static struct mlx5_flow_root_namespace
+*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
+{
+	struct mlx5_flow_namespace *ns;
+
+	if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
+	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
+		ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
+	else
+		ns = mlx5_get_flow_namespace(dev, ns_type);
+	if (!ns)
+		return NULL;
+
+	return find_root(&ns->node);
+}
+
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+						 u8 ns_type, u8 num_actions,
+						 void *modify_actions)
+{
+	struct mlx5_flow_root_namespace *root;
+	struct mlx5_modify_hdr *modify_hdr;
+	int err;
+
+	root = get_root_namespace(dev, ns_type);
+	if (!root)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
+	if (!modify_hdr)
+		return ERR_PTR(-ENOMEM);
+
+	modify_hdr->ns_type = ns_type;
+	err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
+					      modify_actions, modify_hdr);
+	if (err) {
+		kfree(modify_hdr);
+		return ERR_PTR(err);
+	}
+
+	return modify_hdr;
+}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+				struct mlx5_modify_hdr *modify_hdr)
+{
+	struct mlx5_flow_root_namespace *root;
+
+	root = get_root_namespace(dev, modify_hdr->ns_type);
+	if (WARN_ON(!root))
+		return;
+	root->cmds->modify_header_dealloc(root, modify_hdr);
+	kfree(modify_hdr);
+}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+						     int reformat_type,
+						     size_t size,
+						     void *reformat_data,
+						     enum mlx5_flow_namespace_type ns_type)
+{
+	struct mlx5_pkt_reformat *pkt_reformat;
+	struct mlx5_flow_root_namespace *root;
+	int err;
+
+	root = get_root_namespace(dev, ns_type);
+	if (!root)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
+	if (!pkt_reformat)
+		return ERR_PTR(-ENOMEM);
+
+	pkt_reformat->ns_type = ns_type;
+	pkt_reformat->reformat_type = reformat_type;
+	err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
+						reformat_data, ns_type,
+						pkt_reformat);
+	if (err) {
+		kfree(pkt_reformat);
+		return ERR_PTR(err);
+	}
+
+	return pkt_reformat;
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
+
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  struct mlx5_pkt_reformat *pkt_reformat)
+{
+	struct mlx5_flow_root_namespace *root;
+
+	root = get_root_namespace(dev, pkt_reformat->ns_type);
+	if (WARN_ON(!root))
+		return;
+	root->cmds->packet_reformat_dealloc(root, pkt_reformat);
+	kfree(pkt_reformat);
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+				 struct mlx5_flow_root_namespace *peer_ns)
+{
+	if (peer_ns && ns->mode != peer_ns->mode) {
+		mlx5_core_err(ns->dev,
+			      "Can't peer namespace of different steering mode\n");
+		return -EINVAL;
+	}
+
+	return ns->cmds->set_peer(ns, peer_ns);
+}
+
+/* This function should be called only at init stage of the namespace.
+ * It is not safe to call this function while steering operations
+ * are executed in the namespace.
+ */
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+				 enum mlx5_flow_steering_mode mode)
+{
+	struct mlx5_flow_root_namespace *root;
+	const struct mlx5_flow_cmds *cmds;
+	int err;
+
+	root = find_root(&ns->node);
+	if (&root->ns != ns)
+	/* Can't set cmds to non root namespace */
+		return -EINVAL;
+
+	if (root->table_type != FS_FT_FDB)
+		return -EOPNOTSUPP;
+
+	if (root->mode == mode)
+		return 0;
+
+	if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
+		cmds = mlx5_fs_cmd_get_dr_cmds();
+	else
+		cmds = mlx5_fs_cmd_get_fw_cmds();
+	if (!cmds)
+		return -EOPNOTSUPP;
+
+	err = cmds->create_ns(root);
+	if (err) {
+		mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
+			      err);
+		return err;
+	}
+
+	root->cmds->destroy_ns(root);
+	root->cmds = cmds;
+	root->mode = mode;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 32070e5..c2621b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h

@@ -36,10 +36,41 @@
 #include <linux/refcount.h>
 #include <linux/mlx5/fs.h>
 #include <linux/rhashtable.h>
+#include <linux/llist.h>
+#include <steering/fs_dr.h>
+
+struct mlx5_modify_hdr {
+	enum mlx5_flow_namespace_type ns_type;
+	union {
+		struct mlx5_fs_dr_action action;
+		u32 id;
+	};
+};
+
+struct mlx5_pkt_reformat {
+	enum mlx5_flow_namespace_type ns_type;
+	int reformat_type; /* from mlx5_ifc */
+	union {
+		struct mlx5_fs_dr_action action;
+		u32 id;
+	};
+};
+
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
 
 enum fs_node_type {
 	FS_TYPE_NAMESPACE,
 	FS_TYPE_PRIO,
+	FS_TYPE_PRIO_CHAINS,
 	FS_TYPE_FLOW_TABLE,
 	FS_TYPE_FLOW_GROUP,
 	FS_TYPE_FLOW_ENTRY,
@@ -54,7 +85,8 @@
 	FS_FT_FDB             = 0X4,
 	FS_FT_SNIFFER_RX	= 0X5,
 	FS_FT_SNIFFER_TX	= 0X6,
-	FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX,
+	FS_FT_RDMA_RX		= 0X7,
+	FS_FT_MAX_TYPE = FS_FT_RDMA_RX,
 };
 
 enum fs_flow_table_op_mod {
@@ -66,16 +98,24 @@
 	FS_FTE_STATUS_EXISTING = 1UL << 0,
 };
 
+enum mlx5_flow_steering_mode {
+	MLX5_FLOW_STEERING_MODE_DMFS,
+	MLX5_FLOW_STEERING_MODE_SMFS
+};
+
 struct mlx5_flow_steering {
 	struct mlx5_core_dev *dev;
-	struct kmem_cache               *fgs_cache;
+	enum   mlx5_flow_steering_mode	mode;
+	struct kmem_cache		*fgs_cache;
 	struct kmem_cache               *ftes_cache;
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_root_namespace *fdb_root_ns;
+	struct mlx5_flow_namespace	**fdb_sub_ns;
 	struct mlx5_flow_root_namespace **esw_egress_root_ns;
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
+	struct mlx5_flow_root_namespace	*rdma_rx_root_ns;
 	struct mlx5_flow_root_namespace	*egress_root_ns;
 };
 
@@ -112,6 +152,7 @@
 /* Type of children is mlx5_flow_group */
 struct mlx5_flow_table {
 	struct fs_node			node;
+	struct mlx5_fs_dr_table		fs_dr_table;
 	u32				id;
 	u16				vport;
 	unsigned int			max_fte;
@@ -121,6 +162,7 @@
 	struct {
 		bool			active;
 		unsigned int		required_groups;
+		unsigned int		group_size;
 		unsigned int		num_groups;
 	} autogroup;
 	/* Protect fwd_rules */
@@ -129,29 +171,7 @@
 	struct list_head		fwd_rules;
 	u32				flags;
 	struct rhltable			fgs_hash;
-};
-
-struct mlx5_fc_cache {
-	u64 packets;
-	u64 bytes;
-	u64 lastuse;
-};
-
-struct mlx5_fc {
-	struct rb_node node;
-	struct list_head list;
-
-	/* last{packets,bytes} members are used when calculating the delta since
-	 * last reading
-	 */
-	u64 lastpackets;
-	u64 lastbytes;
-
-	u32 id;
-	bool deleted;
-	bool aging;
-
-	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+	enum mlx5_flow_table_miss_action def_miss_action;
 };
 
 struct mlx5_ft_underlay_qp {
@@ -159,7 +179,7 @@
 	u32 qpn;
 };
 
-#define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_800
+#define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_a00
 /* Calculate the fte_match_param length and without the reserved length.
  * Make sure the reserved field is the last.
  */
@@ -174,13 +194,16 @@
 /* Type of children is mlx5_flow_rule */
 struct fs_fte {
 	struct fs_node			node;
+	struct mlx5_fs_dr_rule		fs_dr_rule;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
 	u32				index;
+	struct mlx5_flow_context	flow_context;
 	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 	struct rhash_head		hash;
+	int				modify_mask;
 };
 
 /* Type of children is mlx5_flow_table/namespace */
@@ -196,6 +219,7 @@
 struct mlx5_flow_namespace {
 	/* parent == NULL => root ns */
 	struct	fs_node			node;
+	enum mlx5_flow_table_miss_action def_miss_action;
 };
 
 struct mlx5_flow_group_mask {
@@ -206,6 +230,7 @@
 /* Type of children is fs_fte */
 struct mlx5_flow_group {
 	struct fs_node			node;
+	struct mlx5_fs_dr_matcher	fs_dr_matcher;
 	struct mlx5_flow_group_mask	mask;
 	u32				start_index;
 	u32				max_ftes;
@@ -217,6 +242,8 @@
 
 struct mlx5_flow_root_namespace {
 	struct mlx5_flow_namespace	ns;
+	enum   mlx5_flow_steering_mode	mode;
+	struct mlx5_fs_dr_domain	fs_dr_domain;
 	enum   fs_flow_table_type	table_type;
 	struct mlx5_core_dev		*dev;
 	struct mlx5_flow_table		*root_ft;
@@ -234,6 +261,14 @@
 void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
 				      unsigned long interval);
 
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+				 struct mlx5_flow_root_namespace *peer_ns);
+
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+				 enum mlx5_flow_steering_mode mode);
+
 int mlx5_init_fs(struct mlx5_core_dev *dev);
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 
@@ -279,7 +314,8 @@
 	(type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) :		\
 	(type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) :		\
 	(type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) :		\
-	(BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\
+	(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) :		\
+	(BUILD_BUG_ON_ZERO(FS_FT_RDMA_RX != FS_FT_MAX_TYPE))\
 	)
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 58af6be..ab69eff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c

@@ -40,6 +40,37 @@
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FC_POOL_USED_BUFF_RATIO 10
+
+struct mlx5_fc_cache {
+	u64 packets;
+	u64 bytes;
+	u64 lastuse;
+};
+
+struct mlx5_fc {
+	struct list_head list;
+	struct llist_node addlist;
+	struct llist_node dellist;
+
+	/* last{packets,bytes} members are used when calculating the delta since
+	 * last reading
+	 */
+	u64 lastpackets;
+	u64 lastbytes;
+
+	struct mlx5_fc_bulk *bulk;
+	u32 id;
+	bool aging;
+
+	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev);
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
+static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool);
+static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc);
 
 /* locking scheme:
  *
@@ -52,11 +83,13 @@
  * access to counter list:
  * - create (user context)
  *   - mlx5_fc_create() only adds to an addlist to be used by
- *     mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ *     mlx5_fc_stats_work(). addlist is a lockless single linked list
+ *     that doesn't require any additional synchronization when adding single
+ *     node.
  *   - spawn thread to do the actual destroy
  *
  * - destroy (user context)
- *   - mark a counter as deleted
+ *   - add a counter to lockless dellist
  *   - spawn thread to do the actual del
  *
  * - dump (user context)
@@ -71,89 +104,125 @@
  *   elapsed, the thread will actually query the hardware.
  */
 
-static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
+						      u32 id)
 {
-	struct rb_node **new = &root->rb_node;
-	struct rb_node *parent = NULL;
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	unsigned long next_id = (unsigned long)id + 1;
+	struct mlx5_fc *counter;
+	unsigned long tmp;
 
-	while (*new) {
-		struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
-		int result = counter->id - this->id;
-
-		parent = *new;
-		if (result < 0)
-			new = &((*new)->rb_left);
-		else
-			new = &((*new)->rb_right);
+	rcu_read_lock();
+	/* skip counters that are in idr, but not yet in counters list */
+	idr_for_each_entry_continue_ul(&fc_stats->counters_idr,
+				       counter, tmp, next_id) {
+		if (!list_empty(&counter->list))
+			break;
 	}
+	rcu_read_unlock();
 
-	/* Add new node and rebalance tree. */
-	rb_link_node(&counter->node, parent, new);
-	rb_insert_color(&counter->node, root);
+	return counter ? &counter->list : &fc_stats->counters;
 }
 
-/* The function returns the last node that was queried so the caller
- * function can continue calling it till all counters are queried.
- */
-static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
-					   struct mlx5_fc *first,
-					   u32 last_id)
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+				 struct mlx5_fc *counter)
 {
-	struct mlx5_cmd_fc_bulk *b;
-	struct rb_node *node = NULL;
-	u32 afirst_id;
-	int num;
+	struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
+
+	list_add_tail(&counter->list, next);
+}
+
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+				 struct mlx5_fc *counter)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	list_del(&counter->list);
+
+	spin_lock(&fc_stats->counters_idr_lock);
+	WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+	spin_unlock(&fc_stats->counters_idr_lock);
+}
+
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
+{
+	return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+			  (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
+
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+				 struct mlx5_fc_cache *cache)
+{
+	void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+			     flow_statistics[index]);
+	u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+	u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
+
+	if (cache->packets == packets)
+		return;
+
+	cache->packets = packets;
+	cache->bytes = bytes;
+	cache->lastuse = jiffies;
+}
+
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+					      struct mlx5_fc *first,
+					      u32 last_id)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	bool query_more_counters = (first->id <= last_id);
+	int max_bulk_len = get_max_bulk_query_len(dev);
+	u32 *data = fc_stats->bulk_query_out;
+	struct mlx5_fc *counter = first;
+	u32 bulk_base_id;
+	int bulk_len;
 	int err;
 
-	int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
-			     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+	while (query_more_counters) {
+		/* first id must be aligned to 4 when using bulk query */
+		bulk_base_id = counter->id & ~0x3;
 
-	/* first id must be aligned to 4 when using bulk query */
-	afirst_id = first->id & ~0x3;
+		/* number of counters to query inc. the last counter */
+		bulk_len = min_t(int, max_bulk_len,
+				 ALIGN(last_id - bulk_base_id + 1, 4));
 
-	/* number of counters to query inc. the last counter */
-	num = ALIGN(last_id - afirst_id + 1, 4);
-	if (num > max_bulk) {
-		num = max_bulk;
-		last_id = afirst_id + num - 1;
+		err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+					     data);
+		if (err) {
+			mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+			return;
+		}
+		query_more_counters = false;
+
+		list_for_each_entry_from(counter, &fc_stats->counters, list) {
+			int counter_index = counter->id - bulk_base_id;
+			struct mlx5_fc_cache *cache = &counter->cache;
+
+			if (counter->id >= bulk_base_id + bulk_len) {
+				query_more_counters = true;
+				break;
+			}
+
+			update_counter_cache(counter_index, data, cache);
+		}
 	}
+}
 
-	b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
-	if (!b) {
-		mlx5_core_err(dev, "Error allocating resources for bulk query\n");
-		return NULL;
-	}
+static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+	mlx5_cmd_fc_free(dev, counter->id);
+	kfree(counter);
+}
 
-	err = mlx5_cmd_fc_bulk_query(dev, b);
-	if (err) {
-		mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
-		goto out;
-	}
+static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
-	for (node = &first->node; node; node = rb_next(node)) {
-		struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
-		struct mlx5_fc_cache *c = &counter->cache;
-		u64 packets;
-		u64 bytes;
-
-		if (counter->id > last_id)
-			break;
-
-		mlx5_cmd_fc_bulk_get(dev, b,
-				     counter->id, &packets, &bytes);
-
-		if (c->packets == packets)
-			continue;
-
-		c->packets = packets;
-		c->bytes = bytes;
-		c->lastuse = jiffies;
-	}
-
-out:
-	mlx5_cmd_fc_bulk_free(b);
-
-	return node;
+	if (counter->bulk)
+		mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
+	else
+		mlx5_fc_free(dev, counter);
 }
 
 static void mlx5_fc_stats_work(struct work_struct *work)
@@ -161,59 +230,42 @@
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	/* Take dellist first to ensure that counters cannot be deleted before
+	 * they are inserted.
+	 */
+	struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+	struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
+	struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
 	unsigned long now = jiffies;
-	struct mlx5_fc *counter = NULL;
-	struct mlx5_fc *last = NULL;
-	struct rb_node *node;
-	LIST_HEAD(tmplist);
 
-	spin_lock(&fc_stats->addlist_lock);
-
-	list_splice_tail_init(&fc_stats->addlist, &tmplist);
-
-	if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+	if (addlist || !list_empty(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work,
 				   fc_stats->sampling_interval);
 
-	spin_unlock(&fc_stats->addlist_lock);
+	llist_for_each_entry(counter, addlist, addlist)
+		mlx5_fc_stats_insert(dev, counter);
 
-	list_for_each_entry(counter, &tmplist, list)
-		mlx5_fc_stats_insert(&fc_stats->counters, counter);
+	llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+		mlx5_fc_stats_remove(dev, counter);
 
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
-
-		node = rb_next(node);
-
-		if (counter->deleted) {
-			rb_erase(&counter->node, &fc_stats->counters);
-
-			mlx5_cmd_fc_free(dev, counter->id);
-
-			kfree(counter);
-			continue;
-		}
-
-		last = counter;
+		mlx5_fc_release(dev, counter);
 	}
 
-	if (time_before(now, fc_stats->next_query) || !last)
+	if (time_before(now, fc_stats->next_query) ||
+	    list_empty(&fc_stats->counters))
 		return;
+	last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
 
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
-
-		node = mlx5_fc_stats_query(dev, counter, last->id);
-	}
+	counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+				   list);
+	if (counter)
+		mlx5_fc_stats_query_counter_range(dev, counter, last->id);
 
 	fc_stats->next_query = now + fc_stats->sampling_interval;
 }
 
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	struct mlx5_fc *counter;
 	int err;
 
@@ -222,29 +274,77 @@
 		return ERR_PTR(-ENOMEM);
 
 	err = mlx5_cmd_fc_alloc(dev, &counter->id);
-	if (err)
-		goto err_out;
+	if (err) {
+		kfree(counter);
+		return ERR_PTR(err);
+	}
+
+	return counter;
+}
+
+static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter;
+
+	if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
+		counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
+		if (!IS_ERR(counter))
+			return counter;
+	}
+
+	return mlx5_fc_single_alloc(dev);
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int err;
+
+	if (IS_ERR(counter))
+		return counter;
+
+	INIT_LIST_HEAD(&counter->list);
+	counter->aging = aging;
 
 	if (aging) {
-		counter->cache.lastuse = jiffies;
-		counter->aging = true;
+		u32 id = counter->id;
 
-		spin_lock(&fc_stats->addlist_lock);
-		list_add(&counter->list, &fc_stats->addlist);
-		spin_unlock(&fc_stats->addlist_lock);
+		counter->cache.lastuse = jiffies;
+		counter->lastbytes = counter->cache.bytes;
+		counter->lastpackets = counter->cache.packets;
+
+		idr_preload(GFP_KERNEL);
+		spin_lock(&fc_stats->counters_idr_lock);
+
+		err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
+				    GFP_NOWAIT);
+
+		spin_unlock(&fc_stats->counters_idr_lock);
+		idr_preload_end();
+		if (err)
+			goto err_out_alloc;
+
+		llist_add(&counter->addlist, &fc_stats->addlist);
 
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 	}
 
 	return counter;
 
-err_out:
-	kfree(counter);
-
+err_out_alloc:
+	mlx5_fc_release(dev, counter);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(mlx5_fc_create);
 
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+	return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -253,65 +353,69 @@
 		return;
 
 	if (counter->aging) {
-		counter->deleted = true;
+		llist_add(&counter->dellist, &fc_stats->dellist);
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 		return;
 	}
 
-	mlx5_cmd_fc_free(dev, counter->id);
-	kfree(counter);
+	mlx5_fc_release(dev, counter);
 }
 EXPORT_SYMBOL(mlx5_fc_destroy);
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int max_bulk_len;
+	int max_out_len;
 
-	fc_stats->counters = RB_ROOT;
-	INIT_LIST_HEAD(&fc_stats->addlist);
-	spin_lock_init(&fc_stats->addlist_lock);
+	spin_lock_init(&fc_stats->counters_idr_lock);
+	idr_init(&fc_stats->counters_idr);
+	INIT_LIST_HEAD(&fc_stats->counters);
+	init_llist_head(&fc_stats->addlist);
+	init_llist_head(&fc_stats->dellist);
+
+	max_bulk_len = get_max_bulk_query_len(dev);
+	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+	fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+	if (!fc_stats->bulk_query_out)
+		return -ENOMEM;
 
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
-		return -ENOMEM;
+		goto err_wq_create;
 
 	fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
 	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
 
+	mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
 	return 0;
+
+err_wq_create:
+	kfree(fc_stats->bulk_query_out);
+	return -ENOMEM;
 }
 
 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct llist_node *tmplist;
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
-	struct rb_node *node;
 
 	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
-	list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
-		list_del(&counter->list);
+	tmplist = llist_del_all(&fc_stats->addlist);
+	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+		mlx5_fc_release(dev, counter);
 
-		mlx5_cmd_fc_free(dev, counter->id);
+	list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
+		mlx5_fc_release(dev, counter);
 
-		kfree(counter);
-	}
-
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
-
-		node = rb_next(node);
-
-		rb_erase(&counter->node, &fc_stats->counters);
-
-		mlx5_cmd_fc_free(dev, counter->id);
-
-		kfree(counter);
-	}
+	mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
+	idr_destroy(&fc_stats->counters_idr);
+	kfree(fc_stats->bulk_query_out);
 }
 
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
@@ -321,6 +425,11 @@
 }
 EXPORT_SYMBOL(mlx5_fc_query);
 
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+	return counter->cache.lastuse;
+}
+
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse)
 {
@@ -353,3 +462,243 @@
 	fc_stats->sampling_interval = min_t(unsigned long, interval,
 					    fc_stats->sampling_interval);
 }
+
+/* Flow counter bluks */
+
+struct mlx5_fc_bulk {
+	struct list_head pool_list;
+	u32 base_id;
+	int bulk_len;
+	unsigned long *bitmask;
+	struct mlx5_fc fcs[0];
+};
+
+static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
+			 u32 id)
+{
+	counter->bulk = bulk;
+	counter->id = id;
+}
+
+static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
+{
+	return bitmap_weight(bulk->bitmask, bulk->bulk_len);
+}
+
+static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+{
+	enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
+	struct mlx5_fc_bulk *bulk;
+	int err = -ENOMEM;
+	int bulk_len;
+	u32 base_id;
+	int i;
+
+	alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
+	bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
+
+	bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc),
+		       GFP_KERNEL);
+	if (!bulk)
+		goto err_alloc_bulk;
+
+	bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+				GFP_KERNEL);
+	if (!bulk->bitmask)
+		goto err_alloc_bitmask;
+
+	err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id);
+	if (err)
+		goto err_mlx5_cmd_bulk_alloc;
+
+	bulk->base_id = base_id;
+	bulk->bulk_len = bulk_len;
+	for (i = 0; i < bulk_len; i++) {
+		mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i);
+		set_bit(i, bulk->bitmask);
+	}
+
+	return bulk;
+
+err_mlx5_cmd_bulk_alloc:
+	kfree(bulk->bitmask);
+err_alloc_bitmask:
+	kfree(bulk);
+err_alloc_bulk:
+	return ERR_PTR(err);
+}
+
+static int
+mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
+{
+	if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
+		mlx5_core_err(dev, "Freeing bulk before all counters were released\n");
+		return -EBUSY;
+	}
+
+	mlx5_cmd_fc_free(dev, bulk->base_id);
+	kfree(bulk->bitmask);
+	kfree(bulk);
+
+	return 0;
+}
+
+static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+{
+	int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
+
+	if (free_fc_index >= bulk->bulk_len)
+		return ERR_PTR(-ENOSPC);
+
+	clear_bit(free_fc_index, bulk->bitmask);
+	return &bulk->fcs[free_fc_index];
+}
+
+static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
+{
+	int fc_index = fc->id - bulk->base_id;
+
+	if (test_bit(fc_index, bulk->bitmask))
+		return -EINVAL;
+
+	set_bit(fc_index, bulk->bitmask);
+	return 0;
+}
+
+/* Flow counters pool API */
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev)
+{
+	fc_pool->dev = dev;
+	mutex_init(&fc_pool->pool_lock);
+	INIT_LIST_HEAD(&fc_pool->fully_used);
+	INIT_LIST_HEAD(&fc_pool->partially_used);
+	INIT_LIST_HEAD(&fc_pool->unused);
+	fc_pool->available_fcs = 0;
+	fc_pool->used_fcs = 0;
+	fc_pool->threshold = 0;
+}
+
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *bulk;
+	struct mlx5_fc_bulk *tmp;
+
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+}
+
+static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
+{
+	fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
+				   fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO);
+}
+
+static struct mlx5_fc_bulk *
+mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *new_bulk;
+
+	new_bulk = mlx5_fc_bulk_create(dev);
+	if (!IS_ERR(new_bulk))
+		fc_pool->available_fcs += new_bulk->bulk_len;
+	mlx5_fc_pool_update_threshold(fc_pool);
+	return new_bulk;
+}
+
+static void
+mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+
+	fc_pool->available_fcs -= bulk->bulk_len;
+	mlx5_fc_bulk_destroy(dev, bulk);
+	mlx5_fc_pool_update_threshold(fc_pool);
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_from_list(struct list_head *src_list,
+			       struct list_head *next_list,
+			       bool move_non_full_bulk)
+{
+	struct mlx5_fc_bulk *bulk;
+	struct mlx5_fc *fc;
+
+	if (list_empty(src_list))
+		return ERR_PTR(-ENODATA);
+
+	bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list);
+	fc = mlx5_fc_bulk_acquire_fc(bulk);
+	if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0)
+		list_move(&bulk->pool_list, next_list);
+	return fc;
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_fc_bulk *new_bulk;
+	struct mlx5_fc *fc;
+
+	mutex_lock(&fc_pool->pool_lock);
+
+	fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used,
+					    &fc_pool->fully_used, false);
+	if (IS_ERR(fc))
+		fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused,
+						    &fc_pool->partially_used,
+						    true);
+	if (IS_ERR(fc)) {
+		new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool);
+		if (IS_ERR(new_bulk)) {
+			fc = ERR_CAST(new_bulk);
+			goto out;
+		}
+		fc = mlx5_fc_bulk_acquire_fc(new_bulk);
+		list_add(&new_bulk->pool_list, &fc_pool->partially_used);
+	}
+	fc_pool->available_fcs--;
+	fc_pool->used_fcs++;
+
+out:
+	mutex_unlock(&fc_pool->pool_lock);
+	return fc;
+}
+
+static void
+mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *bulk = fc->bulk;
+	int bulk_free_fcs_amount;
+
+	mutex_lock(&fc_pool->pool_lock);
+
+	if (mlx5_fc_bulk_release_fc(bulk, fc)) {
+		mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n");
+		goto unlock;
+	}
+
+	fc_pool->available_fcs++;
+	fc_pool->used_fcs--;
+
+	bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk);
+	if (bulk_free_fcs_amount == 1)
+		list_move_tail(&bulk->pool_list, &fc_pool->partially_used);
+	if (bulk_free_fcs_amount == bulk->bulk_len) {
+		list_del(&bulk->pool_list);
+		if (fc_pool->available_fcs > fc_pool->threshold)
+			mlx5_fc_pool_free_bulk(fc_pool, bulk);
+		else
+			list_add(&bulk->pool_list, &fc_pool->unused);
+	}
+
+unlock:
+	mutex_unlock(&fc_pool->pool_lock);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 41ad24f..a19790d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c

@@ -37,6 +37,37 @@
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
 
+enum {
+	MCQS_IDENTIFIER_BOOT_IMG	= 0x1,
+	MCQS_IDENTIFIER_OEM_NVCONFIG	= 0x4,
+	MCQS_IDENTIFIER_MLNX_NVCONFIG	= 0x5,
+	MCQS_IDENTIFIER_CS_TOKEN	= 0x6,
+	MCQS_IDENTIFIER_DBG_TOKEN	= 0x7,
+	MCQS_IDENTIFIER_GEARBOX		= 0xA,
+};
+
+enum {
+	MCQS_UPDATE_STATE_IDLE,
+	MCQS_UPDATE_STATE_IN_PROGRESS,
+	MCQS_UPDATE_STATE_APPLIED,
+	MCQS_UPDATE_STATE_ACTIVE,
+	MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET,
+	MCQS_UPDATE_STATE_FAILED,
+	MCQS_UPDATE_STATE_CANCELED,
+	MCQS_UPDATE_STATE_BUSY,
+};
+
+enum {
+	MCQI_INFO_TYPE_CAPABILITIES	  = 0x0,
+	MCQI_INFO_TYPE_VERSION		  = 0x1,
+	MCQI_INFO_TYPE_ACTIVATION_METHOD  = 0x5,
+};
+
+enum {
+	MCQI_FW_RUNNING_VERSION = 0,
+	MCQI_FW_STORED_VERSION  = 1,
+};
+
 static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
 				  int outlen)
 {
@@ -202,6 +233,18 @@
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, event_cap)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+		if (err)
+			return err;
+	}
+
+	if (MLX5_CAP_GEN(dev, tls)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -250,7 +293,7 @@
 	if (ret)
 		return ret;
 
-	force_state = MLX5_GET(teardown_hca_out, out, force_state);
+	force_state = MLX5_GET(teardown_hca_out, out, state);
 	if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
 		mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
 		return -EIO;
@@ -259,6 +302,54 @@
 	return 0;
 }
 
+#define MLX5_FAST_TEARDOWN_WAIT_MS   3000
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
+{
+	unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
+	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+	int state;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, fast_teardown)) {
+		mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+	MLX5_SET(teardown_hca_in, in, profile,
+		 MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	state = MLX5_GET(teardown_hca_out, out, state);
+	if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+		mlx5_core_warn(dev, "teardown with fast mode failed\n");
+		return -EIO;
+	}
+
+	mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
+
+	/* Loop until device state turns to disable */
+	end = jiffies + msecs_to_jiffies(delay_ms);
+	do {
+		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+			break;
+
+		cond_resched();
+	} while (!time_after(jiffies, end));
+
+	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+			mlx5_get_nic_state(dev), delay_ms);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 enum mlxsw_reg_mcc_instruction {
 	MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
 	MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
@@ -344,33 +435,49 @@
 }
 
 static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
-			       u16 component_index,
-			       u32 *max_component_size,
-			       u8 *log_mcda_word_size,
-			       u16 *mcda_max_write_size)
+			       u16 component_index, bool read_pending,
+			       u8 info_type, u16 data_size, void *mcqi_data)
 {
-	u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
-	int offset = MLX5_ST_SZ_DW(mcqi_reg);
-	u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+	u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {};
+	u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {};
+	void *data;
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(mcqi_reg, in, component_index, component_index);
-	MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+	MLX5_SET(mcqi_reg, in, read_pending_component, read_pending);
+	MLX5_SET(mcqi_reg, in, info_type, info_type);
+	MLX5_SET(mcqi_reg, in, data_size, data_size);
 
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_MCQI, 0, 0);
+				   MLX5_ST_SZ_BYTES(mcqi_reg) + data_size,
+				   MLX5_REG_MCQI, 0, 0);
 	if (err)
-		goto out;
+		return err;
 
-	*max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
-	*log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
-	*mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+	data = MLX5_ADDR_OF(mcqi_reg, out, data);
+	memcpy(mcqi_data, data, data_size);
 
-out:
-	return err;
+	return 0;
+}
+
+static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index,
+				    u32 *max_component_size, u8 *log_mcda_word_size,
+				    u16 *mcda_max_write_size)
+{
+	u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {};
+	int err;
+
+	err = mlx5_reg_mcqi_query(dev, component_index, 0,
+				  MCQI_INFO_TYPE_CAPABILITIES,
+				  MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg);
+	if (err)
+		return err;
+
+	*max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size);
+	*log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size);
+	*mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size);
+
+	return 0;
 }
 
 struct mlx5_mlxfw_dev {
@@ -386,8 +493,13 @@
 		container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
 	struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
 
-	return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
-				   p_align_bits, p_max_write_size);
+	if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) {
+		mlx5_core_warn(dev, "caps query isn't supported by running FW\n");
+		return -EOPNOTSUPP;
+	}
+
+	return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size,
+					p_align_bits, p_max_write_size);
 }
 
 static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
@@ -504,7 +616,8 @@
 };
 
 int mlx5_firmware_flash(struct mlx5_core_dev *dev,
-			const struct firmware *firmware)
+			const struct firmware *firmware,
+			struct netlink_ext_ack *extack)
 {
 	struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
 		.mlxfw_dev = {
@@ -523,5 +636,133 @@
 		return -EOPNOTSUPP;
 	}
 
-	return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware);
+	return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev,
+				    firmware, extack);
+}
+
+static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev,
+				       u16 component_index, bool read_pending,
+				       u32 *mcqi_version_out)
+{
+	return mlx5_reg_mcqi_query(dev, component_index, read_pending,
+				   MCQI_INFO_TYPE_VERSION,
+				   MLX5_ST_SZ_BYTES(mcqi_version),
+				   mcqi_version_out);
+}
+
+static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out,
+			       u16 component_index)
+{
+	u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg);
+	u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+	int err;
+
+	memset(out, 0, out_sz);
+
+	MLX5_SET(mcqs_reg, in, component_index, component_index);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+				   out_sz, MLX5_REG_MCQS, 0, 0);
+	return err;
+}
+
+/* scans component index sequentially, to find the boot img index */
+static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+	u16 identifier, component_idx = 0;
+	bool quit;
+	int err;
+
+	do {
+		err = mlx5_reg_mcqs_query(dev, out, component_idx);
+		if (err)
+			return err;
+
+		identifier = MLX5_GET(mcqs_reg, out, identifier);
+		quit = !!MLX5_GET(mcqs_reg, out, last_index_flag);
+		quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG;
+	} while (!quit && ++component_idx);
+
+	if (identifier != MCQS_IDENTIFIER_BOOT_IMG) {
+		mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n",
+			       component_idx);
+		return -EOPNOTSUPP;
+	}
+
+	return component_idx;
+}
+
+static int
+mlx5_fw_image_pending(struct mlx5_core_dev *dev,
+		      int component_index,
+		      bool *pending_version_exists)
+{
+	u32 out[MLX5_ST_SZ_DW(mcqs_reg)];
+	u8 component_update_state;
+	int err;
+
+	err = mlx5_reg_mcqs_query(dev, out, component_index);
+	if (err)
+		return err;
+
+	component_update_state = MLX5_GET(mcqs_reg, out, component_update_state);
+
+	if (component_update_state == MCQS_UPDATE_STATE_IDLE) {
+		*pending_version_exists = false;
+	} else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) {
+		*pending_version_exists = true;
+	} else {
+		mlx5_core_warn(dev,
+			       "mcqs: can't read pending fw version while fw state is %d\n",
+			       component_update_state);
+		return -ENODATA;
+	}
+	return 0;
+}
+
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+			  u32 *running_ver, u32 *pending_ver)
+{
+	u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
+	bool pending_version_exists;
+	int component_index;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+	    !MLX5_CAP_MCAM_REG(dev, mcqs)) {
+		mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
+		return -EOPNOTSUPP;
+	}
+
+	component_index = mlx5_get_boot_img_component_index(dev);
+	if (component_index < 0)
+		return component_index;
+
+	err = mlx5_reg_mcqi_version_query(dev, component_index,
+					  MCQI_FW_RUNNING_VERSION,
+					  reg_mcqi_version);
+	if (err)
+		return err;
+
+	*running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+	err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
+	if (err)
+		return err;
+
+	if (!pending_version_exists) {
+		*pending_ver = 0;
+		return 0;
+	}
+
+	err = mlx5_reg_mcqi_version_query(dev, component_index,
+					  MCQI_FW_STORED_VERSION,
+					  reg_mcqi_version);
+	if (err)
+		return err;
+
+	*pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+	return 0;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9f39aec..c07f315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c

@@ -38,6 +38,10 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+#include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
 
 enum {
 	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
@@ -59,57 +63,132 @@
 };
 
 enum {
-	MLX5_NIC_IFC_FULL		= 0,
-	MLX5_NIC_IFC_DISABLED		= 1,
-	MLX5_NIC_IFC_NO_DRAM_NIC	= 2,
-	MLX5_NIC_IFC_INVALID		= 3
-};
-
-enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
-	MLX5_DROP_NEW_RECOVERY_WORK,
 };
 
-static u8 get_nic_state(struct mlx5_core_dev *dev)
+enum  {
+	MLX5_SENSOR_NO_ERR		= 0,
+	MLX5_SENSOR_PCI_COMM_ERR	= 1,
+	MLX5_SENSOR_PCI_ERR		= 2,
+	MLX5_SENSOR_NIC_DISABLED	= 3,
+	MLX5_SENSOR_NIC_SW_RESET	= 4,
+	MLX5_SENSOR_FW_SYND_RFR		= 5,
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
 {
-	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
 }
 
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
 {
-	unsigned long flags;
-	u64 vector;
+	u32 cur_cmdq_addr_l_sz;
 
-	/* wait for pending handlers to complete */
-	synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
-	spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
-	vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
-	if (!vector)
-		goto no_trig;
-
-	vector |= MLX5_TRIGGERED_CMD_COMP;
-	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
-	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector, true);
-	return;
-
-no_trig:
-	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+	cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
+	iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
+		    state << MLX5_NIC_IFC_OFFSET,
+		    &dev->iseg->cmdq_addr_l_sz);
 }
 
-static int in_fatal(struct mlx5_core_dev *dev)
+static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
 
-	if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
-		return 1;
+	/* Offline PCI reads return 0xffffffff */
+	return (ioread32be(&h->fw_ver) == 0xffffffff);
+}
 
-	if (ioread32be(&h->fw_ver) == 0xffffffff)
-		return 1;
+static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
+	u8 synd = ioread8(&h->synd);
 
-	return 0;
+	if (rfr && synd)
+		mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
+	return rfr && synd;
+}
+
+static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+{
+	if (sensor_pci_not_working(dev))
+		return MLX5_SENSOR_PCI_COMM_ERR;
+	if (pci_channel_offline(dev->pdev))
+		return MLX5_SENSOR_PCI_ERR;
+	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+		return MLX5_SENSOR_NIC_DISABLED;
+	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
+		return MLX5_SENSOR_NIC_SW_RESET;
+	if (sensor_fw_synd_rfr(dev))
+		return MLX5_SENSOR_FW_SYND_RFR;
+
+	return MLX5_SENSOR_NO_ERR;
+}
+
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
+{
+	enum mlx5_vsc_state state;
+	int ret;
+
+	if (!mlx5_core_is_pf(dev))
+		return -EBUSY;
+
+	/* Try to lock GW access, this stage doesn't return
+	 * EBUSY because locked GW does not mean that other PF
+	 * already started the reset.
+	 */
+	ret = mlx5_vsc_gw_lock(dev);
+	if (ret == -EBUSY)
+		return -EINVAL;
+	if (ret)
+		return ret;
+
+	state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
+	/* At this stage, if the return status == EBUSY, then we know
+	 * for sure that another PF started the reset, so don't allow
+	 * another reset.
+	 */
+	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
+	if (ret)
+		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+
+	/* Unlock GW access */
+	mlx5_vsc_gw_unlock(dev);
+
+	return ret;
+}
+
+static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
+{
+	bool supported = (ioread32be(&dev->iseg->initializing) >>
+			  MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+	u32 fatal_error;
+
+	if (!supported)
+		return false;
+
+	/* The reset only needs to be issued by one PF. The health buffer is
+	 * shared between all functions, and will be cleared during a reset.
+	 * Check again to avoid a redundant 2nd reset. If the fatal erros was
+	 * PCI related a reset won't help.
+	 */
+	fatal_error = check_fatal_sensors(dev);
+	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
+	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
+	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+		mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
+		return false;
+	}
+
+	mlx5_core_warn(dev, "Issuing FW Reset\n");
+	/* Write the NIC interface field to initiate the reset, the command
+	 * interface address also resides here, don't overwrite it.
+	 */
+	mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+
+	return true;
 }
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
@@ -117,14 +196,65 @@
 	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto unlock;
-
-	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
+	if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-		trigger_cmd_completions(dev);
+		goto unlock;
 	}
 
-	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+	if (check_fatal_sensors(dev) || force) {
+		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		mlx5_cmd_flush(dev);
+	}
+
+	mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+unlock:
+	mutex_unlock(&dev->intf_state_mutex);
+}
+
+#define MLX5_CRDUMP_WAIT_MS	60000
+#define MLX5_FW_RESET_WAIT_MS	1000
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
+{
+	unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+	int lock = -EBUSY;
+
+	mutex_lock(&dev->intf_state_mutex);
+	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		goto unlock;
+
+	mlx5_core_err(dev, "start\n");
+
+	if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+		/* Get cr-dump and reset FW semaphore */
+		lock = lock_sem_sw_reset(dev, true);
+
+		if (lock == -EBUSY) {
+			delay_ms = MLX5_CRDUMP_WAIT_MS;
+			goto recover_from_sw_reset;
+		}
+		/* Execute SW reset */
+		reset_fw_if_needed(dev);
+	}
+
+recover_from_sw_reset:
+	/* Recover from SW reset */
+	end = jiffies + msecs_to_jiffies(delay_ms);
+	do {
+		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+			break;
+
+		cond_resched();
+	} while (!time_after(jiffies, end));
+
+	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+			mlx5_get_nic_state(dev), delay_ms);
+	}
+
+	/* Release FW semaphore if you are the lock owner */
+	if (!lock)
+		lock_sem_sw_reset(dev, false);
+
 	mlx5_core_err(dev, "end\n");
 
 unlock:
@@ -133,7 +263,7 @@
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 {
-	u8 nic_interface = get_nic_state(dev);
+	u8 nic_interface = mlx5_get_nic_state(dev);
 
 	switch (nic_interface) {
 	case MLX5_NIC_IFC_FULL:
@@ -147,6 +277,20 @@
 	case MLX5_NIC_IFC_NO_DRAM_NIC:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
 		break;
+
+	case MLX5_NIC_IFC_SW_RESET:
+		/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
+		 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
+		 *    and this is a VF), this is not recoverable by SW reset.
+		 *    Logging of this is handled elsewhere.
+		 * 2. FW reset has been issued by another function, driver can
+		 *    be reloaded to recover after the mode switches to
+		 *    MLX5_NIC_IFC_DISABLED.
+		 */
+		if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
+			mlx5_core_warn(dev, "NIC SW reset in progress\n");
+		break;
+
 	default:
 		mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
 			       nic_interface);
@@ -155,52 +299,32 @@
 	mlx5_disable_device(dev);
 }
 
-static void health_recover(struct work_struct *work)
-{
-	struct mlx5_core_health *health;
-	struct delayed_work *dwork;
-	struct mlx5_core_dev *dev;
-	struct mlx5_priv *priv;
-	u8 nic_state;
-
-	dwork = container_of(work, struct delayed_work, work);
-	health = container_of(dwork, struct mlx5_core_health, recover_work);
-	priv = container_of(health, struct mlx5_priv, health);
-	dev = container_of(priv, struct mlx5_core_dev, priv);
-
-	nic_state = get_nic_state(dev);
-	if (nic_state == MLX5_NIC_IFC_INVALID) {
-		dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
-		return;
-	}
-
-	dev_err(&dev->pdev->dev, "starting health recovery flow\n");
-	mlx5_recover_device(dev);
-}
-
 /* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_DELAY_MSECS 60000
-static void health_care(struct work_struct *work)
+#define MLX5_RECOVERY_WAIT_MSECS 60000
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 {
-	unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
-	struct mlx5_core_health *health;
-	struct mlx5_core_dev *dev;
-	struct mlx5_priv *priv;
-	unsigned long flags;
+	unsigned long end;
 
-	health = container_of(work, struct mlx5_core_health, work);
-	priv = container_of(health, struct mlx5_priv, health);
-	dev = container_of(priv, struct mlx5_core_dev, priv);
 	mlx5_core_warn(dev, "handling bad device here\n");
 	mlx5_handle_bad_state(dev);
+	end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+	while (sensor_pci_not_working(dev)) {
+		if (time_after(jiffies, end)) {
+			mlx5_core_err(dev,
+				      "health recovery flow aborted, PCI reads still not working\n");
+			return -EIO;
+		}
+		msleep(100);
+	}
 
-	spin_lock_irqsave(&health->wq_lock, flags);
-	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
-		schedule_delayed_work(&health->recover_work, recover_delay);
-	else
-		dev_err(&dev->pdev->dev,
-			"new health works are not permitted at this stage\n");
-	spin_unlock_irqrestore(&health->wq_lock, flags);
+	mlx5_core_err(dev, "starting health recovery flow\n");
+	mlx5_recover_device(dev);
+	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
+	    check_fatal_sensors(dev)) {
+		mlx5_core_err(dev, "health recovery failed\n");
+		return -EIO;
+	}
+	return 0;
 }
 
 static const char *hsynd_str(u8 synd)
@@ -246,18 +370,299 @@
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
-		dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+		mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
+			      ioread32be(h->assert_var + i));
 
-	dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
-	dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+	mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
+		      ioread32be(&h->assert_exit_ptr));
+	mlx5_core_err(dev, "assert_callra 0x%08x\n",
+		      ioread32be(&h->assert_callra));
 	sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
-	dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
-	dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
-	dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
-	dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
-	dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+	mlx5_core_err(dev, "fw_ver %s\n", fw_str);
+	mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+	mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+	mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
+		      hsynd_str(ioread8(&h->synd)));
+	mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 	fw = ioread32be(&h->fw_ver);
-	dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw);
+	mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
+}
+
+static int
+mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+			  struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u8 synd;
+	int err;
+
+	synd = ioread8(&h->synd);
+	err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
+	if (err || !synd)
+		return err;
+	return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
+}
+
+struct mlx5_fw_reporter_ctx {
+	u8 err_synd;
+	int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+			       struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+	int err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
+				       fw_reporter_ctx->err_synd);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+					fw_reporter_ctx->miss_counter);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int
+mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
+				       struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	int err;
+	int i;
+
+	if (!ioread8(&h->synd))
+		return 0;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
+	if (err)
+		return err;
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
+		err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
+		if (err)
+			return err;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
+					ioread32be(&h->assert_exit_ptr));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
+					ioread32be(&h->assert_callra));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+	if (err)
+		return err;
+	err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
+				       ioread8(&h->irisc_index));
+	if (err)
+		return err;
+	err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
+					ioread16be(&h->ext_synd));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
+					ioread32be(&h->fw_ver));
+	if (err)
+		return err;
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+	return devlink_fmsg_pair_nest_end(fmsg);
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+		      struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	int err;
+
+	err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+	if (err)
+		return err;
+
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+		if (err)
+			return err;
+	}
+
+	err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
+	if (err)
+		return err;
+	return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
+static void mlx5_fw_reporter_err_work(struct work_struct *work)
+{
+	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+	struct mlx5_core_health *health;
+
+	health = container_of(work, struct mlx5_core_health, report_work);
+
+	if (IS_ERR_OR_NULL(health->fw_reporter))
+		return;
+
+	fw_reporter_ctx.err_synd = health->synd;
+	fw_reporter_ctx.miss_counter = health->miss_counter;
+	if (fw_reporter_ctx.err_synd) {
+		devlink_health_report(health->fw_reporter,
+				      "FW syndrom reported", &fw_reporter_ctx);
+		return;
+	}
+	if (fw_reporter_ctx.miss_counter)
+		devlink_health_report(health->fw_reporter,
+				      "FW miss counter reported",
+				      &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+		.name = "fw",
+		.diagnose = mlx5_fw_reporter_diagnose,
+		.dump = mlx5_fw_reporter_dump,
+};
+
+static int
+mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+			       void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+	return mlx5_health_try_recover(dev);
+}
+
+#define MLX5_CR_DUMP_CHUNK_SIZE 256
+static int
+mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+			    struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	u32 crdump_size = dev->priv.health.crdump_size;
+	u32 *cr_data;
+	u32 data_size;
+	u32 offset;
+	int err;
+
+	if (!mlx5_core_is_pf(dev))
+		return -EPERM;
+
+	cr_data = kvmalloc(crdump_size, GFP_KERNEL);
+	if (!cr_data)
+		return -ENOMEM;
+	err = mlx5_crdump_collect(dev, cr_data);
+	if (err)
+		goto free_data;
+
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+		if (err)
+			goto free_data;
+	}
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data");
+	if (err)
+		goto free_data;
+	for (offset = 0; offset < crdump_size; offset += data_size) {
+		if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE)
+			data_size = crdump_size - offset;
+		else
+			data_size = MLX5_CR_DUMP_CHUNK_SIZE;
+		err = devlink_fmsg_binary_put(fmsg, (char *)cr_data + offset,
+					      data_size);
+		if (err)
+			goto free_data;
+	}
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+
+free_data:
+	kvfree(cr_data);
+	return err;
+}
+
+static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+{
+	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+	struct mlx5_core_health *health;
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+
+	health = container_of(work, struct mlx5_core_health, fatal_report_work);
+	priv = container_of(health, struct mlx5_priv, health);
+	dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	mlx5_enter_error_state(dev, false);
+	if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+		if (mlx5_health_try_recover(dev))
+			mlx5_core_err(dev, "health recovery failed\n");
+		return;
+	}
+	fw_reporter_ctx.err_synd = health->synd;
+	fw_reporter_ctx.miss_counter = health->miss_counter;
+	devlink_health_report(health->fw_fatal_reporter,
+			      "FW fatal error reported", &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+		.name = "fw_fatal",
+		.recover = mlx5_fw_fatal_reporter_recover,
+		.dump = mlx5_fw_fatal_reporter_dump,
+};
+
+#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
+static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	health->fw_reporter =
+		devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
+					       0, false, dev);
+	if (IS_ERR(health->fw_reporter))
+		mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
+			       PTR_ERR(health->fw_reporter));
+
+	health->fw_fatal_reporter =
+		devlink_health_reporter_create(devlink,
+					       &mlx5_fw_fatal_reporter_ops,
+					       MLX5_REPORTER_FW_GRACEFUL_PERIOD,
+					       true, dev);
+	if (IS_ERR(health->fw_fatal_reporter))
+		mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
+			       PTR_ERR(health->fw_fatal_reporter));
+}
+
+static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	if (!IS_ERR_OR_NULL(health->fw_reporter))
+		devlink_health_reporter_destroy(health->fw_reporter);
+
+	if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
+		devlink_health_reporter_destroy(health->fw_fatal_reporter);
 }
 
 static unsigned long get_next_poll_jiffies(void)
@@ -278,10 +683,9 @@
 
 	spin_lock_irqsave(&health->wq_lock, flags);
 	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
-		queue_work(health->wq, &health->work);
+		queue_work(health->wq, &health->fatal_report_work);
 	else
-		dev_err(&dev->pdev->dev,
-			"new health works are not permitted at this stage\n");
+		mlx5_core_err(dev, "new health works are not permitted at this stage\n");
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 }
 
@@ -289,11 +693,24 @@
 {
 	struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
 	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	u32 fatal_error;
+	u8 prev_synd;
 	u32 count;
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		goto out;
 
+	fatal_error = check_fatal_sensors(dev);
+
+	if (fatal_error && !health->fatal_error) {
+		mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
+		dev->priv.health.fatal_error = fatal_error;
+		print_health_info(dev);
+		mlx5_trigger_health_work(dev);
+		goto out;
+	}
+
 	count = ioread32be(health->health_counter);
 	if (count == health->prev)
 		++health->miss_counter;
@@ -302,15 +719,15 @@
 
 	health->prev = count;
 	if (health->miss_counter == MAX_MISSES) {
-		dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
+		mlx5_core_err(dev, "device's health compromised - reached miss count\n");
 		print_health_info(dev);
+		queue_work(health->wq, &health->report_work);
 	}
 
-	if (in_fatal(dev) && !health->sick) {
-		health->sick = true;
-		print_health_info(dev);
-		mlx5_trigger_health_work(dev);
-	}
+	prev_synd = health->synd;
+	health->synd = ioread8(&h->synd);
+	if (health->synd && health->synd != prev_synd)
+		queue_work(health->wq, &health->report_work);
 
 out:
 	mod_timer(&health->timer, get_next_poll_jiffies());
@@ -321,9 +738,8 @@
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	timer_setup(&health->timer, poll_health, 0);
-	health->sick = 0;
+	health->fatal_error = MLX5_SENSOR_NO_ERR;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
@@ -339,7 +755,6 @@
 	if (disable_health) {
 		spin_lock_irqsave(&health->wq_lock, flags);
 		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-		set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 		spin_unlock_irqrestore(&health->wq_lock, flags);
 	}
 
@@ -353,21 +768,16 @@
 
 	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
-	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	spin_unlock_irqrestore(&health->wq_lock, flags);
-	cancel_delayed_work_sync(&health->recover_work);
-	cancel_work_sync(&health->work);
+	cancel_work_sync(&health->report_work);
+	cancel_work_sync(&health->fatal_report_work);
 }
 
-void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+void mlx5_health_flush(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
-	unsigned long flags;
 
-	spin_lock_irqsave(&health->wq_lock, flags);
-	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
-	spin_unlock_irqrestore(&health->wq_lock, flags);
-	cancel_delayed_work_sync(&dev->priv.health.recover_work);
+	flush_workqueue(health->wq);
 }
 
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
@@ -375,6 +785,7 @@
 	struct mlx5_core_health *health = &dev->priv.health;
 
 	destroy_workqueue(health->wq);
+	mlx5_fw_reporters_destroy(dev);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -382,20 +793,26 @@
 	struct mlx5_core_health *health;
 	char *name;
 
+	mlx5_fw_reporters_create(dev);
+
 	health = &dev->priv.health;
 	name = kmalloc(64, GFP_KERNEL);
 	if (!name)
-		return -ENOMEM;
+		goto out_err;
 
 	strcpy(name, "mlx5_health");
-	strcat(name, dev_name(&dev->pdev->dev));
+	strcat(name, dev_name(dev->device));
 	health->wq = create_singlethread_workqueue(name);
 	kfree(name);
 	if (!health->wq)
-		return -ENOMEM;
+		goto out_err;
 	spin_lock_init(&health->wq_lock);
-	INIT_WORK(&health->work, health_care);
-	INIT_DELAYED_WORK(&health->recover_work, health_recover);
+	INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
+	INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
 
 	return 0;
+
+out_err:
+	mlx5_fw_reporters_destroy(dev);
+	return -ENOMEM;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 5b7fe82..3ed8ab2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c

@@ -45,6 +45,7 @@
 static const struct net_device_ops mlx5i_netdev_ops = {
 	.ndo_open                = mlx5i_open,
 	.ndo_stop                = mlx5i_close,
+	.ndo_get_stats64         = mlx5i_get_stats,
 	.ndo_init                = mlx5i_dev_init,
 	.ndo_uninit              = mlx5i_dev_cleanup,
 	.ndo_change_mtu          = mlx5i_change_mtu,
@@ -67,29 +68,27 @@
 
 	params->lro_en = false;
 	params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+	params->tunneled_offload_en = false;
 }
 
 /* Called directly after IPoIB netdevice was created to initialize SW structs */
-void mlx5i_init(struct mlx5_core_dev *mdev,
-		struct net_device *netdev,
-		const struct mlx5e_profile *profile,
-		void *ppriv)
+int mlx5i_init(struct mlx5_core_dev *mdev,
+	       struct net_device *netdev,
+	       const struct mlx5e_profile *profile,
+	       void *ppriv)
 {
 	struct mlx5e_priv *priv  = mlx5i_epriv(netdev);
-	u16 max_mtu;
+	int err;
 
-	/* priv init */
-	priv->mdev        = mdev;
-	priv->netdev      = netdev;
-	priv->profile     = profile;
-	priv->ppriv       = ppriv;
-	mutex_init(&priv->state_lock);
+	err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+	if (err)
+		return err;
 
-	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
-	netdev->mtu = max_mtu;
+	mlx5e_set_netdev_mtu_boundaries(priv);
+	netdev->mtu = netdev->max_mtu;
 
-	mlx5e_build_nic_params(mdev, &priv->channels.params,
-			       profile->max_nch(mdev), netdev->mtu);
+	mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
+			       priv->max_nch, netdev->mtu);
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
 
 	mlx5e_timestamp_init(priv);
@@ -106,12 +105,55 @@
 
 	netdev->netdev_ops = &mlx5i_netdev_ops;
 	netdev->ethtool_ops = &mlx5i_ethtool_ops;
+
+	return 0;
 }
 
 /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
-static void mlx5i_cleanup(struct mlx5e_priv *priv)
+void mlx5i_cleanup(struct mlx5e_priv *priv)
 {
-	/* Do nothing .. */
+	mlx5e_netdev_cleanup(priv->netdev, priv);
+}
+
+static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
+{
+	struct mlx5e_sw_stats s = { 0 };
+	int i, j;
+
+	for (i = 0; i < priv->max_nch; i++) {
+		struct mlx5e_channel_stats *channel_stats;
+		struct mlx5e_rq_stats *rq_stats;
+
+		channel_stats = &priv->channel_stats[i];
+		rq_stats = &channel_stats->rq;
+
+		s.rx_packets += rq_stats->packets;
+		s.rx_bytes   += rq_stats->bytes;
+
+		for (j = 0; j < priv->max_opened_tc; j++) {
+			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+			s.tx_packets           += sq_stats->packets;
+			s.tx_bytes             += sq_stats->bytes;
+			s.tx_queue_dropped     += sq_stats->dropped;
+		}
+	}
+
+	memcpy(&priv->stats.sw, &s, sizeof(s));
+}
+
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct mlx5e_priv     *priv   = mlx5i_epriv(dev);
+	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+	mlx5i_grp_sw_update_stats(priv);
+
+	stats->rx_packets = sstats->rx_packets;
+	stats->rx_bytes   = sstats->rx_bytes;
+	stats->tx_packets = sstats->tx_packets;
+	stats->tx_bytes   = sstats->tx_bytes;
+	stats->tx_dropped = sstats->tx_queue_dropped;
 }
 
 int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
@@ -214,6 +256,18 @@
 	mlx5_core_destroy_qp(mdev, qp);
 }
 
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
+{
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+	void *tisc;
+
+	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+	MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+
+	return mlx5e_create_tis(mdev, in, tisn);
+}
+
 static int mlx5i_init_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
@@ -225,7 +279,7 @@
 		return err;
 	}
 
-	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+	err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0][0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
 		goto err_destroy_underlay_qp;
@@ -242,7 +296,7 @@
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
 
-	mlx5e_destroy_tis(priv->mdev, priv->tisn[0]);
+	mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]);
 	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
 }
 
@@ -306,21 +360,30 @@
 
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
+	mlx5e_create_q_counters(priv);
+
+	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+	if (err) {
+		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+		goto err_destroy_q_counters;
+	}
+
 	err = mlx5e_create_indirect_rqt(priv);
 	if (err)
-		return err;
+		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
-	err = mlx5e_create_indirect_tirs(priv);
+	err = mlx5e_create_indirect_tirs(priv, true);
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
@@ -331,23 +394,29 @@
 	return 0;
 
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 err_destroy_indirect_tirs:
-	mlx5e_destroy_indirect_tirs(priv);
+	mlx5e_destroy_indirect_tirs(priv, true);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+	mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+	mlx5e_destroy_q_counters(priv);
 	return err;
 }
 
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
 	mlx5i_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv);
-	mlx5e_destroy_indirect_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_indirect_tirs(priv, true);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+	mlx5e_close_drop_rq(&priv->drop_rq);
+	mlx5e_destroy_q_counters(priv);
 }
 
 static const struct mlx5e_profile mlx5i_nic_profile = {
@@ -359,12 +428,13 @@
 	.cleanup_rx	   = mlx5i_cleanup_rx,
 	.enable		   = NULL, /* mlx5i_enable */
 	.disable	   = NULL, /* mlx5i_disable */
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = NULL, /* mlx5i_update_stats */
-	.max_nch	   = mlx5e_get_max_num_channels,
 	.update_carrier    = NULL, /* no HW update in IB link */
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
 	.max_tc		   = MLX5I_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 /* mlx5i netdev NDos */
@@ -388,11 +458,11 @@
 
 	new_channels.params = *params;
 	new_channels.params.sw_mtu = new_mtu;
-	err = mlx5e_open_channels(priv, &new_channels);
+
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
 	if (err)
 		goto out;
 
-	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 	netdev->mtu = new_channels.params.sw_mtu;
 
 out:
@@ -468,7 +538,7 @@
 	if (err)
 		goto err_remove_fs_underlay_qp;
 
-	mlx5e_refresh_tirs(epriv, false);
+	epriv->profile->update_rx(epriv);
 	mlx5e_activate_priv_channels(epriv);
 
 	mutex_unlock(&epriv->state_lock);
@@ -561,7 +631,7 @@
 	struct mlx5_ib_ah *mah   = to_mah(address);
 	struct mlx5i_priv *ipriv = epriv->ppriv;
 
-	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
+	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
 }
 
 static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
@@ -592,7 +662,6 @@
 
 	mlx5e_detach_netdev(priv);
 	profile->cleanup(priv);
-	destroy_workqueue(priv->wq);
 
 	if (!ipriv->sub_interface) {
 		mlx5i_pkey_qpn_ht_cleanup(netdev);
@@ -600,58 +669,37 @@
 	}
 }
 
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-					  struct ib_device *ibdev,
-					  const char *name,
-					  void (*setup)(struct net_device *))
+static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
 {
-	const struct mlx5e_profile *profile;
-	struct net_device *netdev;
+	return mdev->mlx5e_res.pdn != 0;
+}
+
+static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
+{
+	if (mlx5_is_sub_interface(mdev))
+		return mlx5i_pkey_get_profile();
+	return &mlx5i_nic_profile;
+}
+
+static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
+			      struct net_device *netdev, void *param)
+{
+	struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param;
+	const struct mlx5e_profile *prof = mlx5_get_profile(mdev);
 	struct mlx5i_priv *ipriv;
 	struct mlx5e_priv *epriv;
 	struct rdma_netdev *rn;
-	bool sub_interface;
-	int nch;
 	int err;
 
-	if (mlx5i_check_required_hca_cap(mdev)) {
-		mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
-		return ERR_PTR(-EOPNOTSUPP);
-	}
-
-	/* TODO: Need to find a better way to check if child device*/
-	sub_interface = (mdev->mlx5e_res.pdn != 0);
-
-	if (sub_interface)
-		profile = mlx5i_pkey_get_profile();
-	else
-		profile = &mlx5i_nic_profile;
-
-	nch = profile->max_nch(mdev);
-
-	netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
-				  name, NET_NAME_UNKNOWN,
-				  setup,
-				  nch * MLX5E_MAX_NUM_TC,
-				  nch);
-	if (!netdev) {
-		mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
-		return NULL;
-	}
-
 	ipriv = netdev_priv(netdev);
 	epriv = mlx5i_epriv(netdev);
 
-	epriv->wq = create_singlethread_workqueue("mlx5i");
-	if (!epriv->wq)
-		goto err_free_netdev;
-
-	ipriv->sub_interface = sub_interface;
+	ipriv->sub_interface = mlx5_is_sub_interface(mdev);
 	if (!ipriv->sub_interface) {
 		err = mlx5i_pkey_qpn_ht_init(netdev);
 		if (err) {
 			mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
-			goto destroy_wq;
+			return err;
 		}
 
 		/* This should only be called once per mdev */
@@ -660,9 +708,11 @@
 			goto destroy_ht;
 	}
 
-	profile->init(mdev, netdev, profile, ipriv);
+	prof->init(mdev, netdev, prof, ipriv);
 
-	mlx5e_attach_netdev(epriv);
+	err = mlx5e_attach_netdev(epriv);
+	if (err)
+		goto detach;
 	netif_carrier_off(netdev);
 
 	/* set rdma_netdev func pointers */
@@ -676,15 +726,40 @@
 	netdev->priv_destructor = mlx5_rdma_netdev_free;
 	netdev->needs_free_netdev = 1;
 
-	return netdev;
+	return 0;
 
+detach:
+	prof->cleanup(epriv);
+	if (ipriv->sub_interface)
+		return err;
+	mlx5e_destroy_mdev_resources(mdev);
 destroy_ht:
 	mlx5i_pkey_qpn_ht_cleanup(netdev);
-destroy_wq:
-	destroy_workqueue(epriv->wq);
-err_free_netdev:
-	free_netdev(netdev);
-
-	return NULL;
+	return err;
 }
-EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
+
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+			    struct ib_device *device,
+			    struct rdma_netdev_alloc_params *params)
+{
+	int nch;
+	int rc;
+
+	rc = mlx5i_check_required_hca_cap(mdev);
+	if (rc)
+		return rc;
+
+	nch = mlx5e_get_max_num_channels(mdev);
+
+	*params = (struct rdma_netdev_alloc_params){
+		.sizeof_priv = sizeof(struct mlx5i_priv) +
+			       sizeof(struct mlx5e_priv),
+		.txqs = nch * MLX5E_MAX_NUM_TC,
+		.rxqs = nch,
+		.param = mdev,
+		.initialize_rdma_netdev = mlx5_rdma_setup_rn,
+	};
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_rdma_rn_get_params);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 0982c57..c87962c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h

@@ -59,6 +59,8 @@
 	char  *mlx5e_priv[0];
 };
 
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
+
 /* Underlay QP create/destroy functions */
 int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
 void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
@@ -84,10 +86,11 @@
 int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 
 /* Parent profile functions */
-void mlx5i_init(struct mlx5_core_dev *mdev,
-		struct net_device *netdev,
-		const struct mlx5e_profile *profile,
-		void *ppriv);
+int mlx5i_init(struct mlx5_core_dev *mdev,
+	       struct net_device *netdev,
+	       const struct mlx5e_profile *profile,
+	       void *ppriv);
+void mlx5i_cleanup(struct mlx5e_priv *priv);
 
 /* Get child interface nic profile */
 const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
@@ -118,8 +121,10 @@
 }
 
 netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5_av *av, u32 dqpn, u32 dqkey);
+			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
+			  bool xmit_more);
 void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
 #endif /* CONFIG_MLX5_CORE_IPOIB */
 #endif /* __MLX5E_IPOB_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 54a188f..96e6418 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c

@@ -146,6 +146,7 @@
 	.ndo_open                = mlx5i_pkey_open,
 	.ndo_stop                = mlx5i_pkey_close,
 	.ndo_init                = mlx5i_pkey_dev_init,
+	.ndo_get_stats64         = mlx5i_get_stats,
 	.ndo_uninit              = mlx5i_pkey_dev_cleanup,
 	.ndo_change_mtu          = mlx5i_pkey_change_mtu,
 	.ndo_do_ioctl            = mlx5i_pkey_ioctl,
@@ -209,7 +210,7 @@
 		goto err_unint_underlay_qp;
 	}
 
-	err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+	err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0][0]);
 	if (err) {
 		mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
 		goto err_remove_rx_uderlay_qp;
@@ -220,14 +221,14 @@
 		mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
 		goto err_clear_state_opened_flag;
 	}
-	mlx5e_refresh_tirs(epriv, false);
+	epriv->profile->update_rx(epriv);
 	mlx5e_activate_priv_channels(epriv);
 	mutex_unlock(&epriv->state_lock);
 
 	return 0;
 
 err_clear_state_opened_flag:
-	mlx5e_destroy_tis(mdev, epriv->tisn[0]);
+	mlx5e_destroy_tis(mdev, epriv->tisn[0][0]);
 err_remove_rx_uderlay_qp:
 	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
 err_unint_underlay_qp:
@@ -256,7 +257,7 @@
 	mlx5i_uninit_underlay_qp(priv);
 	mlx5e_deactivate_priv_channels(priv);
 	mlx5e_close_channels(&priv->channels);
-	mlx5e_destroy_tis(mdev, priv->tisn[0]);
+	mlx5e_destroy_tis(mdev, priv->tisn[0][0]);
 unlock:
 	mutex_unlock(&priv->state_lock);
 	return 0;
@@ -274,14 +275,17 @@
 }
 
 /* Called directly after IPoIB netdevice was created to initialize SW structs */
-static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
-			     struct net_device *netdev,
-			     const struct mlx5e_profile *profile,
-			     void *ppriv)
+static int mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+			   struct net_device *netdev,
+			   const struct mlx5e_profile *profile,
+			   void *ppriv)
 {
 	struct mlx5e_priv *priv  = mlx5i_epriv(netdev);
+	int err;
 
-	mlx5i_init(mdev, netdev, profile, ppriv);
+	err = mlx5i_init(mdev, netdev, profile, ppriv);
+	if (err)
+		return err;
 
 	/* Override parent ndo */
 	netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
@@ -291,12 +295,14 @@
 
 	/* Use dummy rqs */
 	priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+
+	return 0;
 }
 
 /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
 static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
 {
-	/* Do nothing .. */
+	mlx5i_cleanup(priv);
 }
 
 static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
@@ -344,11 +350,12 @@
 	.cleanup_rx	   = mlx5i_pkey_cleanup_rx,
 	.enable		   = NULL,
 	.disable	   = NULL,
+	.update_rx	   = mlx5e_update_nic_rx,
 	.update_stats	   = NULL,
-	.max_nch	   = mlx5e_get_max_num_channels,
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
 	.max_tc		   = MLX5I_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 const struct mlx5e_profile *mlx5i_pkey_get_profile(void)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 582b2f1..c5ef2ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c

@@ -34,39 +34,9 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
 #include "mlx5_core.h"
-
-enum {
-	MLX5_LAG_FLAG_BONDED = 1 << 0,
-};
-
-struct lag_func {
-	struct mlx5_core_dev *dev;
-	struct net_device    *netdev;
-};
-
-/* Used for collection of netdev event info. */
-struct lag_tracker {
-	enum   netdev_lag_tx_type           tx_type;
-	struct netdev_lag_lower_state_info  netdev_state[MLX5_MAX_PORTS];
-	bool is_bonded;
-};
-
-/* LAG data of a ConnectX card.
- * It serves both its phys functions.
- */
-struct mlx5_lag {
-	u8                        flags;
-	u8                        v2p_map[MLX5_MAX_PORTS];
-	struct lag_func           pf[MLX5_MAX_PORTS];
-	struct lag_tracker        tracker;
-	struct delayed_work       bond_work;
-	struct notifier_block     nb;
-
-	/* Admin state. Allow lag only if allowed is true
-	 * even if network conditions for lag were met
-	 */
-	bool                      allowed;
-};
+#include "eswitch.h"
+#include "lag.h"
+#include "lag_mp.h"
 
 /* General purpose, use for short periods of time.
  * Beware of lock dependencies (preferably, no locks should be acquired
@@ -148,13 +118,8 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 
-static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
-{
-	return dev->priv.lag;
-}
-
-static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
-				       struct net_device *ndev)
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+				struct net_device *ndev)
 {
 	int i;
 
@@ -165,9 +130,14 @@
 	return -1;
 }
 
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
 {
-	return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
 }
 
 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
@@ -186,36 +156,131 @@
 		*port2 = 1;
 }
 
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
-			      struct lag_tracker *tracker)
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+		     struct lag_tracker *tracker)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	u8 v2p_port1, v2p_port2;
+	int err;
+
+	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
+				       &v2p_port2);
+
+	if (v2p_port1 != ldev->v2p_map[0] ||
+	    v2p_port2 != ldev->v2p_map[1]) {
+		ldev->v2p_map[0] = v2p_port1;
+		ldev->v2p_map[1] = v2p_port2;
+
+		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
+			       ldev->v2p_map[0], ldev->v2p_map[1]);
+
+		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+		if (err)
+			mlx5_core_err(dev0,
+				      "Failed to modify LAG (%d)\n",
+				      err);
+	}
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+			   struct lag_tracker *tracker)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 	int err;
 
-	ldev->flags |= MLX5_LAG_FLAG_BONDED;
-
 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
 				       &ldev->v2p_map[1]);
 
+	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
+		       ldev->v2p_map[0], ldev->v2p_map[1]);
+
 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
 	if (err)
 		mlx5_core_err(dev0,
 			      "Failed to create LAG (%d)\n",
 			      err);
+	return err;
 }
 
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+		      struct lag_tracker *tracker,
+		      u8 flags)
 {
+	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 	int err;
 
-	ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+	err = mlx5_create_lag(ldev, tracker);
+	if (err) {
+		if (roce_lag) {
+			mlx5_core_err(dev0,
+				      "Failed to activate RoCE LAG\n");
+		} else {
+			mlx5_core_err(dev0,
+				      "Failed to activate VF LAG\n"
+				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+		}
+		return err;
+	}
+
+	ldev->flags |= flags;
+	return 0;
+}
+
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	bool roce_lag = __mlx5_lag_is_roce(ldev);
+	int err;
+
+	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 
 	err = mlx5_cmd_destroy_lag(dev0);
-	if (err)
-		mlx5_core_err(dev0,
-			      "Failed to destroy LAG (%d)\n",
-			      err);
+	if (err) {
+		if (roce_lag) {
+			mlx5_core_err(dev0,
+				      "Failed to deactivate RoCE LAG; driver restart required\n");
+		} else {
+			mlx5_core_err(dev0,
+				      "Failed to deactivate VF LAG; driver restart required\n"
+				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+		}
+	}
+
+	return err;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+		return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+	return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+#else
+	return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
+		!mlx5_sriov_is_enabled(ldev->pf[1].dev));
+#endif
+}
+
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
+{
+	int i;
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (ldev->pf[i].dev)
+			mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+						 MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
+{
+	int i;
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (ldev->pf[i].dev)
+			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+						    MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -223,9 +288,8 @@
 	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 	struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
 	struct lag_tracker tracker;
-	u8 v2p_port1, v2p_port2;
-	int i, err;
-	bool do_bond;
+	bool do_bond, roce_lag;
+	int err;
 
 	if (!dev0 || !dev1)
 		return;
@@ -234,48 +298,56 @@
 	tracker = ldev->tracker;
 	mutex_unlock(&lag_mutex);
 
-	do_bond = tracker.is_bonded && ldev->allowed;
+	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 
-	if (do_bond && !mlx5_lag_is_bonded(ldev)) {
-		for (i = 0; i < MLX5_MAX_PORTS; i++)
-			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
-						    MLX5_INTERFACE_PROTOCOL_IB);
+	if (do_bond && !__mlx5_lag_is_active(ldev)) {
+		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
+			   !mlx5_sriov_is_enabled(dev1);
 
-		mlx5_activate_lag(ldev, &tracker);
+#ifdef CONFIG_MLX5_ESWITCH
+		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
+#endif
 
-		mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
-		mlx5_nic_vport_enable_roce(dev1);
-	} else if (do_bond && mlx5_lag_is_bonded(ldev)) {
-		mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
-					       &v2p_port2);
+		if (roce_lag)
+			mlx5_lag_remove_ib_devices(ldev);
 
-		if ((v2p_port1 != ldev->v2p_map[0]) ||
-		    (v2p_port2 != ldev->v2p_map[1])) {
-			ldev->v2p_map[0] = v2p_port1;
-			ldev->v2p_map[1] = v2p_port2;
+		err = mlx5_activate_lag(ldev, &tracker,
+					roce_lag ? MLX5_LAG_FLAG_ROCE :
+					MLX5_LAG_FLAG_SRIOV);
+		if (err) {
+			if (roce_lag)
+				mlx5_lag_add_ib_devices(ldev);
 
-			err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
-			if (err)
-				mlx5_core_err(dev0,
-					      "Failed to modify LAG (%d)\n",
-					      err);
+			return;
 		}
-	} else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
-		mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
-		mlx5_nic_vport_disable_roce(dev1);
 
-		mlx5_deactivate_lag(ldev);
+		if (roce_lag) {
+			mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+			mlx5_nic_vport_enable_roce(dev1);
+		}
+	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
+		mlx5_modify_lag(ldev, &tracker);
+	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+		roce_lag = __mlx5_lag_is_roce(ldev);
 
-		for (i = 0; i < MLX5_MAX_PORTS; i++)
-			if (ldev->pf[i].dev)
-				mlx5_add_dev_by_protocol(ldev->pf[i].dev,
-							 MLX5_INTERFACE_PROTOCOL_IB);
+		if (roce_lag) {
+			mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+			mlx5_nic_vport_disable_roce(dev1);
+		}
+
+		err = mlx5_deactivate_lag(ldev);
+		if (err)
+			return;
+
+		if (roce_lag)
+			mlx5_lag_add_ib_devices(ldev);
 	}
 }
 
 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 {
-	schedule_delayed_work(&ldev->bond_work, delay);
+	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 }
 
 static void mlx5_do_bond_work(struct work_struct *work)
@@ -419,15 +491,6 @@
 	return NOTIFY_DONE;
 }
 
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
-	if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
-	    (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
-		return false;
-	else
-		return true;
-}
-
 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
 {
 	struct mlx5_lag *ldev;
@@ -436,14 +499,20 @@
 	if (!ldev)
 		return NULL;
 
+	ldev->wq = create_singlethread_workqueue("mlx5_lag");
+	if (!ldev->wq) {
+		kfree(ldev);
+		return NULL;
+	}
+
 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
-	ldev->allowed = mlx5_lag_check_prereq(ldev);
 
 	return ldev;
 }
 
 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
 {
+	destroy_workqueue(ldev->wq);
 	kfree(ldev);
 }
 
@@ -462,7 +531,6 @@
 	ldev->tracker.netdev_state[fn].link_up = 0;
 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
 
-	ldev->allowed = mlx5_lag_check_prereq(ldev);
 	dev->priv.lag = ldev;
 
 	mutex_unlock(&lag_mutex);
@@ -484,7 +552,6 @@
 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
 
 	dev->priv.lag = NULL;
-	ldev->allowed = mlx5_lag_check_prereq(ldev);
 	mutex_unlock(&lag_mutex);
 }
 
@@ -493,6 +560,7 @@
 {
 	struct mlx5_lag *ldev = NULL;
 	struct mlx5_core_dev *tmp_dev;
+	int err;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
 	    !MLX5_CAP_GEN(dev, lag_master) ||
@@ -520,6 +588,11 @@
 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 		}
 	}
+
+	err = mlx5_lag_mp_init(ldev);
+	if (err)
+		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+			      err);
 }
 
 /* Must be called with intf_mutex held */
@@ -532,7 +605,7 @@
 	if (!ldev)
 		return;
 
-	if (mlx5_lag_is_bonded(ldev))
+	if (__mlx5_lag_is_active(ldev))
 		mlx5_deactivate_lag(ldev);
 
 	mlx5_lag_dev_remove_pf(ldev, dev);
@@ -544,11 +617,26 @@
 	if (i == MLX5_MAX_PORTS) {
 		if (ldev->nb.notifier_call)
 			unregister_netdevice_notifier(&ldev->nb);
+		mlx5_lag_mp_cleanup(ldev);
 		cancel_delayed_work_sync(&ldev->bond_work);
 		mlx5_lag_dev_free(ldev);
 	}
 }
 
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	bool res;
+
+	mutex_lock(&lag_mutex);
+	ldev = mlx5_lag_dev_get(dev);
+	res  = ldev && __mlx5_lag_is_roce(ldev);
+	mutex_unlock(&lag_mutex);
+
+	return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_roce);
+
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 {
 	struct mlx5_lag *ldev;
@@ -556,49 +644,40 @@
 
 	mutex_lock(&lag_mutex);
 	ldev = mlx5_lag_dev_get(dev);
-	res  = ldev && mlx5_lag_is_bonded(ldev);
+	res  = ldev && __mlx5_lag_is_active(ldev);
 	mutex_unlock(&lag_mutex);
 
 	return res;
 }
 EXPORT_SYMBOL(mlx5_lag_is_active);
 
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 {
 	struct mlx5_lag *ldev;
-	int ret = 0;
-	bool lag_active;
+	bool res;
+
+	mutex_lock(&lag_mutex);
+	ldev = mlx5_lag_dev_get(dev);
+	res  = ldev && __mlx5_lag_is_sriov(ldev);
+	mutex_unlock(&lag_mutex);
+
+	return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
+
+void mlx5_lag_update(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
 
 	mlx5_dev_list_lock();
-
 	ldev = mlx5_lag_dev_get(dev);
-	if (!ldev) {
-		ret = -ENODEV;
+	if (!ldev)
 		goto unlock;
-	}
-	lag_active = mlx5_lag_is_bonded(ldev);
-	if (!mlx5_lag_check_prereq(ldev) && allow) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-	if (ldev->allowed == allow)
-		goto unlock;
-	ldev->allowed = allow;
-	if ((lag_active && !allow) || allow)
-		mlx5_do_bond(ldev);
+
+	mlx5_do_bond(ldev);
+
 unlock:
 	mlx5_dev_list_unlock();
-	return ret;
-}
-
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
-{
-	return mlx5_lag_set_state(dev, false);
-}
-
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
-{
-	return mlx5_lag_set_state(dev, true);
 }
 
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -609,7 +688,7 @@
 	mutex_lock(&lag_mutex);
 	ldev = mlx5_lag_dev_get(dev);
 
-	if (!(ldev && mlx5_lag_is_bonded(ldev)))
+	if (!(ldev && __mlx5_lag_is_roce(ldev)))
 		goto unlock;
 
 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
@@ -638,7 +717,7 @@
 		return true;
 
 	ldev = mlx5_lag_dev_get(dev);
-	if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+	if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
 		return true;
 
 	/* If bonded, we do not add an IB device for PF1. */
@@ -665,7 +744,7 @@
 
 	mutex_lock(&lag_mutex);
 	ldev = mlx5_lag_dev_get(dev);
-	if (ldev && mlx5_lag_is_bonded(ldev)) {
+	if (ldev && __mlx5_lag_is_roce(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
 		mdev[0] = ldev->pf[0].dev;
 		mdev[1] = ldev->pf[1].dev;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
new file mode 100644
index 0000000..1dea0b1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h

@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_H__
+#define __MLX5_LAG_H__
+
+#include "mlx5_core.h"
+#include "lag_mp.h"
+
+enum {
+	MLX5_LAG_FLAG_ROCE   = 1 << 0,
+	MLX5_LAG_FLAG_SRIOV  = 1 << 1,
+	MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
+};
+
+#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
+			     MLX5_LAG_FLAG_MULTIPATH)
+
+struct lag_func {
+	struct mlx5_core_dev *dev;
+	struct net_device    *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+	enum   netdev_lag_tx_type           tx_type;
+	struct netdev_lag_lower_state_info  netdev_state[MLX5_MAX_PORTS];
+	unsigned int is_bonded:1;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+	u8                        flags;
+	u8                        v2p_map[MLX5_MAX_PORTS];
+	struct lag_func           pf[MLX5_MAX_PORTS];
+	struct lag_tracker        tracker;
+	struct workqueue_struct   *wq;
+	struct delayed_work       bond_work;
+	struct notifier_block     nb;
+	struct lag_mp             lag_mp;
+};
+
+static inline struct mlx5_lag *
+mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+	return dev->priv.lag;
+}
+
+static inline bool
+__mlx5_lag_is_active(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+		     struct lag_tracker *tracker);
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+		      struct lag_tracker *tracker,
+		      u8 flags);
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+				struct net_device *ndev);
+
+#endif /* __MLX5_LAG_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
new file mode 100644
index 0000000..5d20d61
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c

@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <net/nexthop.h>
+#include "lag.h"
+#include "lag_mp.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
+{
+	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+		return false;
+
+	return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+}
+
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+}
+
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	bool res;
+
+	ldev = mlx5_lag_dev_get(dev);
+	res  = ldev && __mlx5_lag_is_multipath(ldev);
+
+	return res;
+}
+
+/**
+ * Set lag port affinity
+ *
+ * @ldev: lag device
+ * @port:
+ *     0 - set normal affinity.
+ *     1 - set affinity to port 1.
+ *     2 - set affinity to port 2.
+ *
+ **/
+static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
+{
+	struct lag_tracker tracker;
+
+	if (!__mlx5_lag_is_multipath(ldev))
+		return;
+
+	switch (port) {
+	case 0:
+		tracker.netdev_state[0].tx_enabled = true;
+		tracker.netdev_state[1].tx_enabled = true;
+		tracker.netdev_state[0].link_up = true;
+		tracker.netdev_state[1].link_up = true;
+		break;
+	case 1:
+		tracker.netdev_state[0].tx_enabled = true;
+		tracker.netdev_state[0].link_up = true;
+		tracker.netdev_state[1].tx_enabled = false;
+		tracker.netdev_state[1].link_up = false;
+		break;
+	case 2:
+		tracker.netdev_state[0].tx_enabled = false;
+		tracker.netdev_state[0].link_up = false;
+		tracker.netdev_state[1].tx_enabled = true;
+		tracker.netdev_state[1].link_up = true;
+		break;
+	default:
+		mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
+			       port);
+		return;
+	}
+
+	if (tracker.netdev_state[0].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
+					 MLX5_DEV_EVENT_PORT_AFFINITY,
+					 (void *)0);
+
+	if (tracker.netdev_state[1].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
+					 MLX5_DEV_EVENT_PORT_AFFINITY,
+					 (void *)0);
+
+	mlx5_modify_lag(ldev, &tracker);
+}
+
+static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+{
+	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
+
+	flush_workqueue(ldev->wq);
+}
+
+struct mlx5_fib_event_work {
+	struct work_struct work;
+	struct mlx5_lag *ldev;
+	unsigned long event;
+	union {
+		struct fib_entry_notifier_info fen_info;
+		struct fib_nh_notifier_info fnh_info;
+	};
+};
+
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+				     unsigned long event,
+				     struct fib_info *fi)
+{
+	struct lag_mp *mp = &ldev->lag_mp;
+	struct fib_nh *fib_nh0, *fib_nh1;
+	unsigned int nhs;
+
+	/* Handle delete event */
+	if (event == FIB_EVENT_ENTRY_DEL) {
+		/* stop track */
+		if (mp->mfi == fi)
+			mp->mfi = NULL;
+		return;
+	}
+
+	/* Handle add/replace event */
+	nhs = fib_info_num_path(fi);
+	if (nhs == 1) {
+		if (__mlx5_lag_is_active(ldev)) {
+			struct fib_nh *nh = fib_info_nh(fi, 0);
+			struct net_device *nh_dev = nh->fib_nh_dev;
+			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
+
+			mlx5_lag_set_port_affinity(ldev, ++i);
+		}
+		return;
+	}
+
+	if (nhs != 2)
+		return;
+
+	/* Verify next hops are ports of the same hca */
+	fib_nh0 = fib_info_nh(fi, 0);
+	fib_nh1 = fib_info_nh(fi, 1);
+	if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
+	    !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
+		mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
+		return;
+	}
+
+	/* First time we see multipath route */
+	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+		struct lag_tracker tracker;
+
+		tracker = ldev->tracker;
+		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+	}
+
+	mlx5_lag_set_port_affinity(ldev, 0);
+	mp->mfi = fi;
+}
+
+static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+				       unsigned long event,
+				       struct fib_nh *fib_nh,
+				       struct fib_info *fi)
+{
+	struct lag_mp *mp = &ldev->lag_mp;
+
+	/* Check the nh event is related to the route */
+	if (!mp->mfi || mp->mfi != fi)
+		return;
+
+	/* nh added/removed */
+	if (event == FIB_EVENT_NH_DEL) {
+		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
+
+		if (i >= 0) {
+			i = (i + 1) % 2 + 1; /* peer port */
+			mlx5_lag_set_port_affinity(ldev, i);
+		}
+	} else if (event == FIB_EVENT_NH_ADD &&
+		   fib_info_num_path(fi) == 2) {
+		mlx5_lag_set_port_affinity(ldev, 0);
+	}
+}
+
+static void mlx5_lag_fib_update(struct work_struct *work)
+{
+	struct mlx5_fib_event_work *fib_work =
+		container_of(work, struct mlx5_fib_event_work, work);
+	struct mlx5_lag *ldev = fib_work->ldev;
+	struct fib_nh *fib_nh;
+
+	/* Protect internal structures from changes */
+	rtnl_lock();
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		mlx5_lag_fib_route_event(ldev, fib_work->event,
+					 fib_work->fen_info.fi);
+		fib_info_put(fib_work->fen_info.fi);
+		break;
+	case FIB_EVENT_NH_ADD: /* fall through */
+	case FIB_EVENT_NH_DEL:
+		fib_nh = fib_work->fnh_info.fib_nh;
+		mlx5_lag_fib_nexthop_event(ldev,
+					   fib_work->event,
+					   fib_work->fnh_info.fib_nh,
+					   fib_nh->nh_parent);
+		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
+		break;
+	}
+
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+static struct mlx5_fib_event_work *
+mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
+{
+	struct mlx5_fib_event_work *fib_work;
+
+	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+	if (WARN_ON(!fib_work))
+		return NULL;
+
+	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
+	fib_work->ldev = ldev;
+	fib_work->event = event;
+
+	return fib_work;
+}
+
+static int mlx5_lag_fib_event(struct notifier_block *nb,
+			      unsigned long event,
+			      void *ptr)
+{
+	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
+	struct fib_notifier_info *info = ptr;
+	struct mlx5_fib_event_work *fib_work;
+	struct fib_entry_notifier_info *fen_info;
+	struct fib_nh_notifier_info *fnh_info;
+	struct net_device *fib_dev;
+	struct fib_info *fi;
+
+	if (!net_eq(info->net, &init_net))
+		return NOTIFY_DONE;
+
+	if (info->family != AF_INET)
+		return NOTIFY_DONE;
+
+	if (!mlx5_lag_multipath_check_prereq(ldev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		fen_info = container_of(info, struct fib_entry_notifier_info,
+					info);
+		fi = fen_info->fi;
+		if (fi->nh) {
+			NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+			return notifier_from_errno(-EINVAL);
+		}
+		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+		if (fib_dev != ldev->pf[0].netdev &&
+		    fib_dev != ldev->pf[1].netdev) {
+			return NOTIFY_DONE;
+		}
+		fib_work = mlx5_lag_init_fib_work(ldev, event);
+		if (!fib_work)
+			return NOTIFY_DONE;
+		fib_work->fen_info = *fen_info;
+		/* Take reference on fib_info to prevent it from being
+		 * freed while work is queued. Release it afterwards.
+		 */
+		fib_info_hold(fib_work->fen_info.fi);
+		break;
+	case FIB_EVENT_NH_ADD: /* fall through */
+	case FIB_EVENT_NH_DEL:
+		fnh_info = container_of(info, struct fib_nh_notifier_info,
+					info);
+		fib_work = mlx5_lag_init_fib_work(ldev, event);
+		if (!fib_work)
+			return NOTIFY_DONE;
+		fib_work->fnh_info = *fnh_info;
+		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	queue_work(ldev->wq, &fib_work->work);
+
+	return NOTIFY_DONE;
+}
+
+int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+{
+	struct lag_mp *mp = &ldev->lag_mp;
+	int err;
+
+	if (mp->fib_nb.notifier_call)
+		return 0;
+
+	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
+	err = register_fib_notifier(&mp->fib_nb,
+				    mlx5_lag_fib_event_flush);
+	if (err)
+		mp->fib_nb.notifier_call = NULL;
+
+	return err;
+}
+
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
+{
+	struct lag_mp *mp = &ldev->lag_mp;
+
+	if (!mp->fib_nb.notifier_call)
+		return;
+
+	unregister_fib_notifier(&mp->fib_nb);
+	mp->fib_nb.notifier_call = NULL;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
new file mode 100644
index 0000000..6d14b11
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h

@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_MP_H__
+#define __MLX5_LAG_MP_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+struct lag_mp {
+	struct notifier_block     fib_nb;
+	struct fib_info           *mfi; /* used in tracking fib events */
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_lag_mp_init(struct mlx5_lag *ldev);
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
+static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_LAG_MP_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
deleted file mode 100644
index d8e1711..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
+++ /dev/null

@@ -1 +0,0 @@
-subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 3f767cd..43f9760 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c

@@ -33,6 +33,7 @@
 #include <linux/clocksource.h>
 #include <linux/highmem.h>
 #include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
 #include "en.h"
 #include "clock.h"
 
@@ -71,7 +72,7 @@
 	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
 						  clock);
 
-	return mlx5_read_internal_timer(mdev) & cc->mask;
+	return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
 }
 
 static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
@@ -111,10 +112,10 @@
 	for (i = 0; i < clock->ptp_info.n_pins; i++) {
 		u64 tstart;
 
-		write_lock_irqsave(&clock->lock, flags);
+		write_seqlock_irqsave(&clock->lock, flags);
 		tstart = clock->pps_info.start[i];
 		clock->pps_info.start[i] = 0;
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		if (!tstart)
 			continue;
 
@@ -132,10 +133,10 @@
 						overflow_work);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 	schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
 }
 
@@ -147,24 +148,28 @@
 	u64 ns = timespec64_to_ns(ts);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_init(&clock->tc, &clock->cycles, ns);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
 
-static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
 {
 	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
 						ptp_info);
-	u64 ns;
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+						  clock);
 	unsigned long flags;
+	u64 cycles, ns;
 
-	write_lock_irqsave(&clock->lock, flags);
-	ns = timecounter_read(&clock->tc);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
+	cycles = mlx5_read_internal_timer(mdev, sts);
+	ns = timecounter_cyc2time(&clock->tc, cycles);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -177,10 +182,10 @@
 						ptp_info);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_adjtime(&clock->tc, delta);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
@@ -203,12 +208,12 @@
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
 	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
 				       clock->nominal_c_mult + diff;
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
@@ -231,6 +236,19 @@
 	if (!MLX5_PPS_CAP(mdev))
 		return -EOPNOTSUPP;
 
+	/* Reject requests with unsupported flags */
+	if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+				PTP_RISING_EDGE |
+				PTP_FALLING_EDGE |
+				PTP_STRICT_FLAGS))
+		return -EOPNOTSUPP;
+
+	/* Reject requests to enable time stamping on both edges. */
+	if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+	    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+	    (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+		return -EOPNOTSUPP;
+
 	if (rq->extts.index >= clock->ptp_info.n_pins)
 		return -EINVAL;
 
@@ -285,6 +303,10 @@
 	if (!MLX5_PPS_CAP(mdev))
 		return -EOPNOTSUPP;
 
+	/* Reject requests with unsupported flags */
+	if (rq->perout.flags)
+		return -EOPNOTSUPP;
+
 	if (rq->perout.index >= clock->ptp_info.n_pins)
 		return -EINVAL;
 
@@ -306,13 +328,13 @@
 		ts.tv_sec = rq->perout.start.sec;
 		ts.tv_nsec = rq->perout.start.nsec;
 		ns = timespec64_to_ns(&ts);
-		cycles_now = mlx5_read_internal_timer(mdev);
-		write_lock_irqsave(&clock->lock, flags);
+		cycles_now = mlx5_read_internal_timer(mdev, NULL);
+		write_seqlock_irqsave(&clock->lock, flags);
 		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
 		nsec_delta = ns - nsec_now;
 		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
 					 clock->cycles.mult);
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		time_stamp = cycles_now + cycles_delta;
 		field_select = MLX5_MTPPS_FS_PIN_MODE |
 			       MLX5_MTPPS_FS_PATTERN |
@@ -383,7 +405,7 @@
 	.pps		= 0,
 	.adjfreq	= mlx5_ptp_adjfreq,
 	.adjtime	= mlx5_ptp_adjtime,
-	.gettime64	= mlx5_ptp_gettime,
+	.gettimex64	= mlx5_ptp_gettimex,
 	.settime64	= mlx5_ptp_settime,
 	.enable		= NULL,
 	.verify		= NULL,
@@ -439,16 +461,17 @@
 	clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
 }
 
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
-		    struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+			  unsigned long type, void *data)
 {
-	struct mlx5_clock *clock = &mdev->clock;
+	struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+	struct mlx5_core_dev *mdev = clock->mdev;
 	struct ptp_clock_event ptp_event;
-	struct timespec64 ts;
-	u64 nsec_now, nsec_delta;
 	u64 cycles_now, cycles_delta;
+	u64 nsec_now, nsec_delta, ns;
+	struct mlx5_eqe *eqe = data;
 	int pin = eqe->data.pps.pin;
-	s64 ns;
+	struct timespec64 ts;
 	unsigned long flags;
 
 	switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,26 +486,30 @@
 		} else {
 			ptp_event.type = PTP_CLOCK_EXTTS;
 		}
+		/* TODOL clock->ptp can be NULL if ptp_clock_register failes */
 		ptp_clock_event(clock->ptp, &ptp_event);
 		break;
 	case PTP_PF_PEROUT:
-		mlx5_ptp_gettime(&clock->ptp_info, &ts);
-		cycles_now = mlx5_read_internal_timer(mdev);
+		mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+		cycles_now = mlx5_read_internal_timer(mdev, NULL);
 		ts.tv_sec += 1;
 		ts.tv_nsec = 0;
 		ns = timespec64_to_ns(&ts);
-		write_lock_irqsave(&clock->lock, flags);
+		write_seqlock_irqsave(&clock->lock, flags);
 		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
 		nsec_delta = ns - nsec_now;
 		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
 					 clock->cycles.mult);
 		clock->pps_info.start[pin] = cycles_now + cycles_delta;
 		schedule_work(&clock->pps_info.out_work);
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		break;
 	default:
-		mlx5_core_err(mdev, " Unhandled event\n");
+		mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+			      clock->ptp_info.pin_config[pin].func);
 	}
+
+	return NOTIFY_OK;
 }
 
 void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -498,7 +525,7 @@
 		mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
 		return;
 	}
-	rwlock_init(&clock->lock);
+	seqlock_init(&clock->lock);
 	clock->cycles.read = read_internal_timer;
 	clock->cycles.shift = MLX5_CYCLES_SHIFT;
 	clock->cycles.mult = clocksource_khz2mult(dev_freq,
@@ -511,37 +538,30 @@
 			 ktime_to_ns(ktime_get_real()));
 
 	/* Calculate period in seconds to call the overflow watchdog - to make
-	 * sure counter is checked at least once every wrap around.
+	 * sure counter is checked at least twice every wrap around.
 	 * The period is calculated as the minimum between max HW cycles count
 	 * (The clock source mask) and max amount of cycles that can be
 	 * multiplied by clock multiplier where the result doesn't exceed
 	 * 64bits.
 	 */
 	overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
-	overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
+	overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
 
 	ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
 				 frac, &frac);
 	do_div(ns, NSEC_PER_SEC / HZ);
 	clock->overflow_period = ns;
 
-	mdev->clock_info_page = alloc_page(GFP_KERNEL);
-	if (mdev->clock_info_page) {
-		mdev->clock_info = kmap(mdev->clock_info_page);
-		if (!mdev->clock_info) {
-			__free_page(mdev->clock_info_page);
-			mlx5_core_warn(mdev, "failed to map clock page\n");
-		} else {
-			mdev->clock_info->sign   = 0;
-			mdev->clock_info->nsec   = clock->tc.nsec;
-			mdev->clock_info->cycles = clock->tc.cycle_last;
-			mdev->clock_info->mask   = clock->cycles.mask;
-			mdev->clock_info->mult   = clock->nominal_c_mult;
-			mdev->clock_info->shift  = clock->cycles.shift;
-			mdev->clock_info->frac   = clock->tc.frac;
-			mdev->clock_info->overflow_period =
-						clock->overflow_period;
-		}
+	mdev->clock_info =
+		(struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL);
+	if (mdev->clock_info) {
+		mdev->clock_info->nsec = clock->tc.nsec;
+		mdev->clock_info->cycles = clock->tc.cycle_last;
+		mdev->clock_info->mask = clock->cycles.mask;
+		mdev->clock_info->mult = clock->nominal_c_mult;
+		mdev->clock_info->shift = clock->cycles.shift;
+		mdev->clock_info->frac = clock->tc.frac;
+		mdev->clock_info->overflow_period = clock->overflow_period;
 	}
 
 	INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
@@ -567,6 +587,9 @@
 			       PTR_ERR(clock->ptp));
 		clock->ptp = NULL;
 	}
+
+	MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+	mlx5_eq_notifier_register(mdev, &clock->pps_nb);
 }
 
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +599,7 @@
 	if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
 		return;
 
+	mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
 	if (clock->ptp) {
 		ptp_clock_unregister(clock->ptp);
 		clock->ptp = NULL;
@@ -585,8 +609,7 @@
 	cancel_delayed_work_sync(&clock->overflow_work);
 
 	if (mdev->clock_info) {
-		kunmap(mdev->clock_info_page);
-		__free_page(mdev->clock_info_page);
+		free_page((unsigned long)mdev->clock_info);
 		mdev->clock_info = NULL;
 	}
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 02e2e45..3160092 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h

@@ -36,7 +36,6 @@
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 void mlx5_init_clock(struct mlx5_core_dev *mdev);
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
@@ -46,11 +45,13 @@
 static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
 						u64 timestamp)
 {
+	unsigned int seq;
 	u64 nsec;
 
-	read_lock(&clock->lock);
-	nsec = timecounter_cyc2time(&clock->tc, timestamp);
-	read_unlock(&clock->lock);
+	do {
+		seq = read_seqbegin(&clock->lock);
+		nsec = timecounter_cyc2time(&clock->tc, timestamp);
+	} while (read_seqretry(&clock->lock, seq));
 
 	return ns_to_ktime(nsec);
 }
@@ -58,8 +59,6 @@
 #else
 static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
 static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
 	return -1;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 0000000..3fc575d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c

@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       void *key, u32 sz_bytes,
+			       u32 *p_key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+	u8  general_obj_key_size;
+	u64 general_obj_types;
+	void *obj, *key_p;
+	int err;
+
+	obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+	key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key);
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types &
+	      MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+		return -EINVAL;
+
+	switch (sz_bits) {
+	case 128:
+		general_obj_key_size =
+			MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+		key_p += sz_bytes;
+		break;
+	case 256:
+		general_obj_key_size =
+			MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(key_p, key, sz_bytes);
+
+	MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
+	MLX5_SET(encryption_key_obj, obj, key_type,
+		 MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	/* avoid leaking key on the stack */
+	memzero_explicit(in, sizeof(in));
+
+	return err;
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 0000000..bced2ef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c

@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+
+static LIST_HEAD(devcom_list);
+
+#define devcom_for_each_component(priv, comp, iter) \
+	for (iter = 0; \
+	     comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \
+	     iter++)
+
+struct mlx5_devcom_component {
+	struct {
+		void *data;
+	} device[MLX5_MAX_PORTS];
+
+	mlx5_devcom_event_handler_t handler;
+	struct rw_semaphore sem;
+	bool paired;
+};
+
+struct mlx5_devcom_list {
+	struct list_head list;
+
+	struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+	struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+};
+
+struct mlx5_devcom {
+	struct mlx5_devcom_list *priv;
+	int idx;
+};
+
+static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void)
+{
+	struct mlx5_devcom_component *comp;
+	struct mlx5_devcom_list *priv;
+	int i;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	devcom_for_each_component(priv, comp, i)
+		init_rwsem(&comp->sem);
+
+	return priv;
+}
+
+static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv,
+					     u8 idx)
+{
+	struct mlx5_devcom *devcom;
+
+	devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+	if (!devcom)
+		return NULL;
+
+	devcom->priv = priv;
+	devcom->idx = idx;
+	return devcom;
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_devcom_list *priv = NULL, *iter;
+	struct mlx5_devcom *devcom = NULL;
+	bool new_priv = false;
+	u64 sguid0, sguid1;
+	int idx, i;
+
+	if (!mlx5_core_is_pf(dev))
+		return NULL;
+
+	sguid0 = mlx5_query_nic_system_image_guid(dev);
+	list_for_each_entry(iter, &devcom_list, list) {
+		struct mlx5_core_dev *tmp_dev = NULL;
+
+		idx = -1;
+		for (i = 0; i < MLX5_MAX_PORTS; i++) {
+			if (iter->devs[i])
+				tmp_dev = iter->devs[i];
+			else
+				idx = i;
+		}
+
+		if (idx == -1)
+			continue;
+
+		sguid1 = mlx5_query_nic_system_image_guid(tmp_dev);
+		if (sguid0 != sguid1)
+			continue;
+
+		priv = iter;
+		break;
+	}
+
+	if (!priv) {
+		priv = mlx5_devcom_list_alloc();
+		if (!priv)
+			return ERR_PTR(-ENOMEM);
+
+		idx = 0;
+		new_priv = true;
+	}
+
+	priv->devs[idx] = dev;
+	devcom = mlx5_devcom_alloc(priv, idx);
+	if (!devcom) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (new_priv)
+		list_add(&priv->list, &devcom_list);
+
+	return devcom;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+{
+	struct mlx5_devcom_list *priv;
+	int i;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return;
+
+	priv = devcom->priv;
+	priv->devs[devcom->idx] = NULL;
+
+	kfree(devcom);
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (priv->devs[i])
+			break;
+
+	if (i != MLX5_MAX_PORTS)
+		return;
+
+	list_del(&priv->list);
+	kfree(priv);
+}
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+				    enum mlx5_devcom_components id,
+				    mlx5_devcom_event_handler_t handler,
+				    void *data)
+{
+	struct mlx5_devcom_component *comp;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return;
+
+	WARN_ON(!data);
+
+	comp = &devcom->priv->components[id];
+	down_write(&comp->sem);
+	comp->handler = handler;
+	comp->device[devcom->idx].data = data;
+	up_write(&comp->sem);
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+				      enum mlx5_devcom_components id)
+{
+	struct mlx5_devcom_component *comp;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return;
+
+	comp = &devcom->priv->components[id];
+	down_write(&comp->sem);
+	comp->device[devcom->idx].data = NULL;
+	up_write(&comp->sem);
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+			   enum mlx5_devcom_components id,
+			   int event,
+			   void *event_data)
+{
+	struct mlx5_devcom_component *comp;
+	int err = -ENODEV, i;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return err;
+
+	comp = &devcom->priv->components[id];
+	down_write(&comp->sem);
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (i != devcom->idx && comp->device[i].data) {
+			err = comp->handler(event, comp->device[i].data,
+					    event_data);
+			break;
+		}
+
+	up_write(&comp->sem);
+	return err;
+}
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+			    enum mlx5_devcom_components id,
+			    bool paired)
+{
+	struct mlx5_devcom_component *comp;
+
+	comp = &devcom->priv->components[id];
+	WARN_ON(!rwsem_is_locked(&comp->sem));
+
+	comp->paired = paired;
+}
+
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+			   enum mlx5_devcom_components id)
+{
+	if (IS_ERR_OR_NULL(devcom))
+		return false;
+
+	return devcom->priv->components[id].paired;
+}
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+				enum mlx5_devcom_components id)
+{
+	struct mlx5_devcom_component *comp;
+	int i;
+
+	if (IS_ERR_OR_NULL(devcom))
+		return NULL;
+
+	comp = &devcom->priv->components[id];
+	down_read(&comp->sem);
+	if (!comp->paired) {
+		up_read(&comp->sem);
+		return NULL;
+	}
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (i != devcom->idx)
+			break;
+
+	return comp->device[i].data;
+}
+
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+				   enum mlx5_devcom_components id)
+{
+	struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+
+	up_read(&comp->sem);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 0000000..939d5bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h

@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_devcom_components {
+	MLX5_DEVCOM_ESW_OFFLOADS,
+
+	MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+					   void *my_data,
+					   void *event_data);
+
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom);
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+				    enum mlx5_devcom_components id,
+				    mlx5_devcom_event_handler_t handler,
+				    void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+				      enum mlx5_devcom_components id);
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+			   enum mlx5_devcom_components id,
+			   int event,
+			   void *event_data);
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+			    enum mlx5_devcom_components id,
+			    bool paired);
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+			   enum mlx5_devcom_components id);
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+				enum mlx5_devcom_components id);
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+				   enum mlx5_devcom_components id);
+
+#endif
+

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
new file mode 100644
index 0000000..e065c2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c

@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+struct mlx5_dm {
+	/* protect access to icm bitmask */
+	spinlock_t lock;
+	unsigned long *steering_sw_icm_alloc_blocks;
+	unsigned long *header_modify_sw_icm_alloc_blocks;
+};
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
+{
+	u64 header_modify_icm_blocks = 0;
+	u64 steering_icm_blocks = 0;
+	struct mlx5_dm *dm;
+
+	if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
+		return 0;
+
+	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+	if (!dm)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&dm->lock);
+
+	if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) {
+		steering_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->steering_sw_icm_alloc_blocks =
+			kcalloc(BITS_TO_LONGS(steering_icm_blocks),
+				sizeof(unsigned long), GFP_KERNEL);
+		if (!dm->steering_sw_icm_alloc_blocks)
+			goto err_steering;
+	}
+
+	if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) {
+		header_modify_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->header_modify_sw_icm_alloc_blocks =
+			kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
+				sizeof(unsigned long), GFP_KERNEL);
+		if (!dm->header_modify_sw_icm_alloc_blocks)
+			goto err_modify_hdr;
+	}
+
+	return dm;
+
+err_modify_hdr:
+	kfree(dm->steering_sw_icm_alloc_blocks);
+
+err_steering:
+	kfree(dm);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_dm *dm = dev->dm;
+
+	if (!dev->dm)
+		return;
+
+	if (dm->steering_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+					  MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		kfree(dm->steering_sw_icm_alloc_blocks);
+	}
+
+	if (dm->header_modify_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev,
+							   log_header_modify_sw_icm_size) -
+				      MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		kfree(dm->header_modify_sw_icm_alloc_blocks);
+	}
+
+	kfree(dm);
+}
+
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			 u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u32 log_icm_size;
+	u32 max_blocks;
+	u64 block_idx;
+	void *sw_icm;
+	int ret;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	if (!length || (length & (length - 1)) ||
+	    length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1))
+		return -EINVAL;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev,
+						log_header_modify_sw_icm_size);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!block_map)
+		return -EOPNOTSUPP;
+
+	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+	spin_lock(&dm->lock);
+	block_idx = bitmap_find_next_zero_area(block_map,
+					       max_blocks,
+					       0,
+					       num_blocks, 0);
+
+	if (block_idx < max_blocks)
+		bitmap_set(block_map,
+			   block_idx, num_blocks);
+
+	spin_unlock(&dm->lock);
+
+	if (block_idx >= max_blocks)
+		return -ENOMEM;
+
+	sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+	icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+		   icm_start_addr);
+	MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		spin_lock(&dm->lock);
+		bitmap_clear(block_map,
+			     block_idx, num_blocks);
+		spin_unlock(&dm->lock);
+
+		return ret;
+	}
+
+	*addr = icm_start_addr;
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc);
+
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			   u64 length, u16 uid, phys_addr_t addr, u32 obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u64 start_idx;
+	int err;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	spin_lock(&dm->lock);
+	bitmap_clear(block_map,
+		     start_idx, num_blocks);
+	spin_unlock(&dm->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 0000000..4be4d2d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h

@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+	struct list_head      list;
+	struct list_head      process_list;
+	struct tasklet_struct task;
+	spinlock_t            lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+	spinlock_t              lock;	/* protect radix tree */
+	struct radix_tree_root  tree;
+};
+
+struct mlx5_eq {
+	struct mlx5_core_dev    *dev;
+	struct mlx5_cq_table    cq_table;
+	__be32 __iomem	        *doorbell;
+	u32                     cons_index;
+	struct mlx5_frag_buf    buf;
+	int                     size;
+	unsigned int            vecidx;
+	unsigned int            irqn;
+	u8                      eqn;
+	int                     nent;
+	struct mlx5_rsc_debug   *dbg;
+};
+
+struct mlx5_eq_async {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+};
+
+struct mlx5_eq_comp {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+	struct mlx5_eq_tasklet  tasklet_ctx;
+	struct list_head        list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+	return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+	return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+	__raw_writel((__force u32)cpu_to_be32(val), addr);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
new file mode 100644
index 0000000..23361a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c

@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/kernel.h>
+#include "mlx5_core.h"
+#include "geneve.h"
+
+struct mlx5_geneve {
+	struct mlx5_core_dev *mdev;
+	__be16 opt_class;
+	u8 opt_type;
+	u32 obj_id;
+	struct mutex sync_lock; /* protect GENEVE obj operations */
+	u32 refcount;
+};
+
+static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev,
+					 __be16 class,
+					 u8 type,
+					 u8 len)
+{
+	u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u64 general_obj_types;
+	void *hdr, *opt;
+	u16 obj_id;
+	int err;
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT))
+		return -EINVAL;
+
+	hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr);
+	opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+
+	MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class));
+	MLX5_SET(geneve_tlv_option, opt, option_type, type);
+	MLX5_SET(geneve_tlv_option, opt, option_data_length, len);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return obj_id;
+}
+
+static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt)
+{
+	int res = 0;
+
+	if (IS_ERR_OR_NULL(geneve))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&geneve->sync_lock);
+
+	if (geneve->refcount) {
+		if (geneve->opt_class == opt->opt_class &&
+		    geneve->opt_type == opt->type) {
+			/* We already have TLV options obj allocated */
+			geneve->refcount++;
+		} else {
+			/* TLV options obj allocated, but its params
+			 * do not match the new request.
+			 * We support only one such object.
+			 */
+			mlx5_core_warn(geneve->mdev,
+				       "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type,
+				       opt->length);
+			res = -EOPNOTSUPP;
+			goto unlock;
+		}
+	} else {
+		/* We don't have any TLV options obj allocated */
+
+		res = mlx5_geneve_tlv_option_create(geneve->mdev,
+						    opt->opt_class,
+						    opt->type,
+						    opt->length);
+		if (res < 0) {
+			mlx5_core_warn(geneve->mdev,
+				       "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n",
+				       be16_to_cpu(opt->opt_class),
+				       opt->type, opt->length, res);
+			goto unlock;
+		}
+		geneve->opt_class = opt->opt_class;
+		geneve->opt_type = opt->type;
+		geneve->obj_id = res;
+		geneve->refcount++;
+	}
+
+unlock:
+	mutex_unlock(&geneve->sync_lock);
+	return res;
+}
+
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	mutex_lock(&geneve->sync_lock);
+	if (--geneve->refcount == 0) {
+		/* We've just removed the last user of Geneve option.
+		 * Now delete the object in FW.
+		 */
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+		geneve->opt_class = 0;
+		geneve->opt_type = 0;
+		geneve->obj_id = 0;
+	}
+	mutex_unlock(&geneve->sync_lock);
+}
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_geneve *geneve =
+		kzalloc(sizeof(*geneve), GFP_KERNEL);
+
+	if (!geneve)
+		return ERR_PTR(-ENOMEM);
+	geneve->mdev = mdev;
+	mutex_init(&geneve->sync_lock);
+
+	return geneve;
+}
+
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve)
+{
+	if (IS_ERR_OR_NULL(geneve))
+		return;
+
+	/* Lockless since we are unloading */
+	if (geneve->refcount)
+		mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+	kfree(geneve);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
new file mode 100644
index 0000000..adee0cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h

@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_GENEVE_H__
+#define __MLX5_GENEVE_H__
+
+#include <net/geneve.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_geneve;
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev);
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve);
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt);
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline struct mlx5_geneve
+*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; }
+static inline void
+mlx5_geneve_destroy(struct mlx5_geneve *geneve) {}
+static inline int
+mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; }
+static inline void
+mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_GENEVE_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..583dc7e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c

@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+				 int offset, bool read)
+{
+	int rc = -EOPNOTSUPP;
+	int bytes_returned;
+	int block_id;
+
+	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+	rc = read ?
+	     hyperv_read_cfg_blk(dev->pdev, buf,
+				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+				 &bytes_returned) :
+	     hyperv_write_cfg_blk(dev->pdev, buf,
+				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+	/* Make sure len bytes were read successfully  */
+	if (read && !rc && len != bytes_returned)
+		rc = -EIO;
+
+	if (rc) {
+		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+			      read ? "read" : "write", rc, len,
+			      offset);
+		return rc;
+	}
+
+	return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	return hyperv_reg_block_invalidate(dev->pdev, context,
+					   block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..f9a4557
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h

@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000..4047629
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c

@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+	struct mlx5_core_dev       *dev;
+	struct workqueue_struct    *work_queue;
+	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
+	struct mutex                agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+	struct work_struct     invalidate_work;
+	struct mlx5_hv_vhca   *hv_vhca;
+	u64                    block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+	u16     sequence;
+	u16     offset;
+	u8      reserved[4];
+	u64     data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+	enum mlx5_hv_vhca_agent_type	 type;
+	struct mlx5_hv_vhca		*hv_vhca;
+	void				*priv;
+	u16                              seq;
+	void (*control)(struct mlx5_hv_vhca_agent *agent,
+			struct mlx5_hv_vhca_control_block *block);
+	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+			   u64 block_mask);
+	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_hv_vhca *hv_vhca = NULL;
+
+	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+	if (!hv_vhca)
+		return ERR_PTR(-ENOMEM);
+
+	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+	if (!hv_vhca->work_queue) {
+		kfree(hv_vhca);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	hv_vhca->dev = dev;
+	mutex_init(&hv_vhca->agents_lock);
+
+	return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	destroy_workqueue(hv_vhca->work_queue);
+	kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+	struct mlx5_hv_vhca_work *hwork;
+	struct mlx5_hv_vhca *hv_vhca;
+	int i;
+
+	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+	hv_vhca = hwork->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->invalidate)
+			continue;
+
+		if (!(BIT(agent->type) & hwork->block_mask))
+			continue;
+
+		agent->invalidate(agent, hwork->block_mask);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+	struct mlx5_hv_vhca_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+	work->hv_vhca    = hv_vhca;
+	work->block_mask = block_mask;
+
+	queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+					struct mlx5_hv_vhca_control_block *block)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->control)
+			continue;
+
+		if (!(AGENT_MASK(agent->type) & block->control))
+			continue;
+
+		agent->control(agent, block);
+	}
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+				      u32 *capabilities)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (agent)
+			*capabilities |= AGENT_MASK(agent->type);
+	}
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+				      u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+	struct mlx5_core_dev *dev = hv_vhca->dev;
+	struct mlx5_hv_vhca_control_block *block;
+	u32 capabilities = 0;
+	int err;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return;
+
+	err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+	if (err)
+		goto free_block;
+
+	mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+	/* In case no capabilities, send empty block in return */
+	if (!capabilities) {
+		memset(block, 0, sizeof(*block));
+		goto write;
+	}
+
+	if (block->capabilities != capabilities)
+		block->capabilities = capabilities;
+
+	if (block->control & ~capabilities)
+		goto free_block;
+
+	mlx5_hv_vhca_agents_control(hv_vhca, block);
+	block->command_ack = block->command;
+
+write:
+	mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+	kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+	return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+					 NULL,
+					 mlx5_hv_vhca_control_agent_invalidate,
+					 NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	mlx5_hv_vhca_agent_destroy(agent);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	struct mlx5_hv_vhca_agent *agent;
+	int err;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return IS_ERR_OR_NULL(hv_vhca);
+
+	err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+					  mlx5_hv_vhca_invalidate);
+	if (err)
+		return err;
+
+	agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+	if (IS_ERR_OR_NULL(agent)) {
+		mlx5_hv_unregister_invalidate(hv_vhca->dev);
+		return IS_ERR_OR_NULL(agent);
+	}
+
+	hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+	return 0;
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+	struct mlx5_hv_vhca_agent *agent;
+	int i;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+	if (agent)
+		mlx5_hv_vhca_control_agent_destroy(agent);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+		WARN_ON(hv_vhca->agents[i]);
+
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+	mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+			  void *priv)
+{
+	struct mlx5_hv_vhca_agent *agent;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return ERR_PTR(-ENOMEM);
+
+	if (type >= MLX5_HV_VHCA_AGENT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	if (hv_vhca->agents[type]) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return ERR_PTR(-EINVAL);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+	if (!agent)
+		return ERR_PTR(-ENOMEM);
+
+	agent->type      = type;
+	agent->hv_vhca   = hv_vhca;
+	agent->priv      = priv;
+	agent->control   = control;
+	agent->invalidate = invalidate;
+	agent->cleanup   = cleaup;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	hv_vhca->agents[type] = agent;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_vhca_agents_update(hv_vhca);
+
+	return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+
+	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return;
+	}
+
+	hv_vhca->agents[agent->type] = NULL;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	if (agent->cleanup)
+		agent->cleanup(agent);
+
+	kfree(agent);
+
+	mlx5_hv_vhca_agents_update(hv_vhca);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+					   struct mlx5_hv_vhca_data_block *data_block,
+					   void *src, int len, int *offset)
+{
+	int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+	data_block->sequence = agent->seq;
+	data_block->offset   = (*offset)++;
+	memcpy(data_block->data, src, bytes);
+
+	return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+	agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len)
+{
+	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+	int block_offset = 0;
+	int total = 0;
+	int err;
+
+	while (len) {
+		struct mlx5_hv_vhca_data_block data_block = {0};
+		int bytes;
+
+		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+							buf + total,
+							len, &block_offset);
+		if (!bytes)
+			return -ENOMEM;
+
+		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+					   sizeof(data_block), offset);
+		if (err)
+			return err;
+
+		total += bytes;
+		len   -= bytes;
+	}
+
+	mlx5_hv_vhca_agent_seq_update(agent);
+
+	return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+	return agent->priv;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000..4bad6a5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h

@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+	MLX5_HV_VHCA_AGENT_CONTROL = 0,
+	MLX5_HV_VHCA_AGENT_STATS   = 1,
+	MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+	u32     capabilities;
+	u32     control;
+	u16     command;
+	u16     command_ack;
+	u16     version;
+	u16     rings;
+	u32     reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+					   u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+			 void *buf, int len)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1c..b99d469 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h

@@ -33,11 +33,55 @@
 #ifndef __LIB_MLX5_H__
 #define __LIB_MLX5_H__
 
+#include "mlx5_core.h"
+
 void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
 void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
 int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
+
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK    0xF
+
+enum port_module_event_status_type {
+	MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+	MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+	MLX5_MODULE_STATUS_ERROR     = 0x3,
+	MLX5_MODULE_STATUS_DISABLED  = 0x4,
+	MLX5_MODULE_STATUS_NUM,
+};
+
+enum  port_module_event_error_type {
+	MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED    = 0x0,
+	MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX  = 0x1,
+	MLX5_MODULE_EVENT_ERROR_BUS_STUCK                = 0x2,
+	MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT  = 0x3,
+	MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+	MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER       = 0x5,
+	MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE         = 0x6,
+	MLX5_MODULE_EVENT_ERROR_BAD_CABLE                = 0x7,
+	MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+	MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+	u64 status_counters[MLX5_MODULE_STATUS_NUM];
+	u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
+/* Crypto */
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+			       void *key, u32 sz_bytes, u32 *p_key_id);
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 9835955..3118e8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c

@@ -67,6 +67,7 @@
 struct l2table_node {
 	struct l2addr_node node;
 	u32                index; /* index in HW l2 table */
+	int                ref_count;
 };
 
 struct mlx5_mpfs {
@@ -108,8 +109,7 @@
 
 	mutex_init(&mpfs->lock);
 	mpfs->size   = l2table_size;
-	mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size),
-			       sizeof(uintptr_t), GFP_KERNEL);
+	mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL);
 	if (!mpfs->bitmap) {
 		kfree(mpfs);
 		return -ENOMEM;
@@ -127,7 +127,7 @@
 		return;
 
 	WARN_ON(!hlist_empty(mpfs->hash));
-	kfree(mpfs->bitmap);
+	bitmap_free(mpfs->bitmap);
 	kfree(mpfs);
 }
 
@@ -135,8 +135,8 @@
 {
 	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
 	struct l2table_node *l2addr;
+	int err = 0;
 	u32 index;
-	int err;
 
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		return 0;
@@ -145,30 +145,35 @@
 
 	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
 	if (l2addr) {
-		err = -EEXIST;
-		goto abort;
+		l2addr->ref_count++;
+		goto out;
 	}
 
 	err = alloc_l2table_index(mpfs, &index);
 	if (err)
-		goto abort;
+		goto out;
 
 	l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
 	if (!l2addr) {
-		free_l2table_index(mpfs, index);
 		err = -ENOMEM;
-		goto abort;
+		goto hash_add_err;
 	}
 
+	err = set_l2table_entry_cmd(dev, index, mac);
+	if (err)
+		goto set_table_entry_err;
+
 	l2addr->index = index;
-	err = set_l2table_entry_cmd(dev, index, mac);
-	if (err) {
-		l2addr_hash_del(l2addr);
-		free_l2table_index(mpfs, index);
-	}
+	l2addr->ref_count = 1;
 
 	mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
-abort:
+	goto out;
+
+set_table_entry_err:
+	l2addr_hash_del(l2addr);
+hash_add_err:
+	free_l2table_index(mpfs, index);
+out:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
@@ -191,6 +196,9 @@
 		goto unlock;
 	}
 
+	if (--l2addr->ref_count > 0)
+		goto unlock;
+
 	index = l2addr->index;
 	del_l2table_entry_cmd(dev, index);
 	l2addr_hash_del(l2addr);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
new file mode 100644
index 0000000..6b774e0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c

@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/pci.h>
+#include "mlx5_core.h"
+#include "pci_vsc.h"
+
+#define MLX5_EXTRACT_C(source, offset, size)	\
+	((((u32)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len)		\
+	(((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size)			\
+	((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size)		\
+	(MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len)  \
+	((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+	((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len)	\
+	(((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+#define vsc_read(dev, offset, val) \
+	pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define vsc_write(dev, offset, val) \
+	pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define VSC_MAX_RETRIES 2048
+
+enum {
+	VSC_CTRL_OFFSET = 0x4,
+	VSC_COUNTER_OFFSET = 0x8,
+	VSC_SEMAPHORE_OFFSET = 0xc,
+	VSC_ADDR_OFFSET = 0x10,
+	VSC_DATA_OFFSET = 0x14,
+
+	VSC_FLAG_BIT_OFFS = 31,
+	VSC_FLAG_BIT_LEN = 1,
+
+	VSC_SYND_BIT_OFFS = 30,
+	VSC_SYND_BIT_LEN = 1,
+
+	VSC_ADDR_BIT_OFFS = 0,
+	VSC_ADDR_BIT_LEN = 30,
+
+	VSC_SPACE_BIT_OFFS = 0,
+	VSC_SPACE_BIT_LEN = 16,
+
+	VSC_SIZE_VLD_BIT_OFFS = 28,
+	VSC_SIZE_VLD_BIT_LEN = 1,
+
+	VSC_STATUS_BIT_OFFS = 29,
+	VSC_STATUS_BIT_LEN = 3,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_pf(dev))
+		return;
+
+	dev->vsc_addr = pci_find_capability(dev->pdev,
+					    PCI_CAP_ID_VNDR);
+	if (!dev->vsc_addr)
+		mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n");
+}
+
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+{
+	u32 counter = 0;
+	int retries = 0;
+	u32 lock_val;
+	int ret;
+
+	pci_cfg_access_lock(dev->pdev);
+	do {
+		if (retries > VSC_MAX_RETRIES) {
+			ret = -EBUSY;
+			goto pci_unlock;
+		}
+
+		/* Check if semaphore is already locked */
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		if (lock_val) {
+			retries++;
+			usleep_range(1000, 2000);
+			continue;
+		}
+
+		/* Read and write counter value, if written value is
+		 * the same, semaphore was acquired successfully.
+		 */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter);
+		if (ret)
+			goto pci_unlock;
+
+		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+		if (ret)
+			goto pci_unlock;
+
+		retries++;
+	} while (counter != lock_val);
+
+	return 0;
+
+pci_unlock:
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev)
+{
+	int ret;
+
+	ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK);
+	pci_cfg_access_unlock(dev->pdev);
+	return ret;
+}
+
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size)
+{
+	int ret;
+	u32 val = 0;
+
+	if (!mlx5_vsc_accessible(dev))
+		return -EINVAL;
+
+	if (ret_space_size)
+		*ret_space_size = 0;
+
+	/* Get a unique val */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	/* Try to modify the lock */
+	val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN);
+	ret = vsc_write(dev, VSC_CTRL_OFFSET, val);
+	if (ret)
+		goto out;
+
+	/* Verify lock was modified */
+	ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+	if (ret)
+		goto out;
+
+	if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0)
+		return -EINVAL;
+
+	/* Get space max address if indicated by size valid bit */
+	if (ret_space_size &&
+	    MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) {
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &val);
+		if (ret) {
+			mlx5_core_warn(dev, "Failed to get max space size\n");
+			goto out;
+		}
+		*ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS,
+					       VSC_ADDR_BIT_LEN);
+	}
+	return 0;
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val)
+{
+	int retries = 0;
+	u32 flag;
+	int ret;
+
+	do {
+		if (retries > VSC_MAX_RETRIES)
+			return -EBUSY;
+
+		ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag);
+		if (ret)
+			return ret;
+		flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN);
+		retries++;
+
+		if ((retries & 0xf) == 0)
+			usleep_range(1000, 2000);
+
+	} while (flag != expected_val);
+
+	return 0;
+}
+
+static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address,
+			     u32 data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	/* Set flag to 0x1 */
+	address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1);
+	ret = vsc_write(dev, VSC_DATA_OFFSET, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	/* Wait for the flag to be cleared */
+	ret = mlx5_vsc_wait_on_flag(dev, 0);
+
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address,
+			    u32 *data)
+{
+	int ret;
+
+	if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+			 VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+		return -EINVAL;
+
+	ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+	if (ret)
+		goto out;
+
+	ret = mlx5_vsc_wait_on_flag(dev, 1);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_DATA_OFFSET, data);
+out:
+	return ret;
+}
+
+static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev,
+				 unsigned int read_addr,
+				 unsigned int *next_read_addr,
+				 u32 *data)
+{
+	int ret;
+
+	ret = mlx5_vsc_gw_read(dev, read_addr, data);
+	if (ret)
+		goto out;
+
+	ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr);
+	if (ret)
+		goto out;
+
+	*next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS,
+				       VSC_ADDR_BIT_LEN);
+
+	if (*next_read_addr <= read_addr)
+		ret = -EINVAL;
+out:
+	return ret;
+}
+
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length)
+{
+	unsigned int next_read_addr = 0;
+	unsigned int read_addr = 0;
+
+	while (read_addr < length) {
+		if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr,
+					  &data[(read_addr >> 2)]))
+			return read_addr;
+
+		read_addr = next_read_addr;
+	}
+	return length;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state)
+{
+	u32 data, id = 0;
+	int ret;
+
+	ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+	if (ret) {
+		mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+		return ret;
+	}
+
+	if (state == MLX5_VSC_LOCK) {
+		/* Get a unique ID based on the counter */
+		ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+		if (ret)
+			return ret;
+	}
+
+	/* Try to modify lock */
+	ret = mlx5_vsc_gw_write(dev, space, id);
+	if (ret)
+		return ret;
+
+	/* Verify lock was modified */
+	ret = mlx5_vsc_gw_read(dev, space, &data);
+	if (ret)
+		return -EINVAL;
+
+	if (data != id)
+		return -EBUSY;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
new file mode 100644
index 0000000..64272a6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h

@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_PCI_VSC_H__
+#define __MLX5_PCI_VSC_H__
+
+enum mlx5_vsc_state {
+	MLX5_VSC_UNLOCK,
+	MLX5_VSC_LOCK,
+};
+
+enum {
+	MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+			  u32 *ret_space_size);
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+				int length);
+
+static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev)
+{
+	return !!dev->vsc_addr;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+			   enum mlx5_vsc_state state);
+
+#endif /* __MLX5_PCI_VSC_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644
index 0000000..48b5c84
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c

@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+	bool force_supported, force_enabled;
+	bool calc_supported, calc_enabled;
+	bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+					struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+	u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+	/* Default values for FW which do not support MLX5_REG_PCMR */
+	entropy_flags->force_supported = false;
+	entropy_flags->calc_supported = false;
+	entropy_flags->gre_calc_supported = false;
+	entropy_flags->force_enabled = false;
+	entropy_flags->calc_enabled = true;
+	entropy_flags->gre_calc_enabled = true;
+
+	if (!MLX5_CAP_GEN(mdev, ports_check))
+		return;
+
+	if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+		return;
+
+	entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+	entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+	entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+	entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+	entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+	entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+					  u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+					      u8 enable, u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+
+	tun_entropy->mdev = mdev;
+	mutex_init(&tun_entropy->lock);
+	mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+	tun_entropy->num_enabling_entries = 0;
+	tun_entropy->num_disabling_entries = 0;
+	tun_entropy->enabled = entropy_flags.calc_supported ?
+			       entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+			    int reformat_type, bool enable)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+	int err;
+
+	mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+	/* Tunnel entropy calculation may be controlled either on port basis
+	 * for all tunneling protocols or specifically for GRE protocol.
+	 * Prioritize GRE protocol control (if capable) over global port
+	 * configuration.
+	 */
+	if (entropy_flags.gre_calc_supported &&
+	    reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		if (!entropy_flags.force_supported)
+			return 0;
+		err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev,
+							 enable, !enable);
+		if (err)
+			return err;
+	} else if (entropy_flags.calc_supported) {
+		/* Other applications may change the global FW entropy
+		 * calculations settings. Check that the current entropy value
+		 * is the negative of the updated value.
+		 */
+		if (entropy_flags.force_enabled &&
+		    enable == entropy_flags.calc_enabled) {
+			mlx5_core_warn(tun_entropy->mdev,
+				       "Unexpected entropy calc setting - expected %d",
+				       !entropy_flags.calc_enabled);
+			return -EOPNOTSUPP;
+		}
+		/* GRE requires disabling entropy calculation. if there are
+		 * enabling entries (i.e VXLAN) we cannot turn it off for them,
+		 * thus fail.
+		 */
+		if (tun_entropy->num_enabling_entries)
+			return -EOPNOTSUPP;
+		err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+						     entropy_flags.force_supported);
+		if (err)
+			return err;
+		tun_entropy->enabled = enable;
+		/* if we turn on the entropy we don't need to force it anymore */
+		if (entropy_flags.force_supported && enable) {
+			err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type)
+{
+	/* the default is error for unknown (non VXLAN/GRE tunnel types) */
+	int err = -EOPNOTSUPP;
+
+	mutex_lock(&tun_entropy->lock);
+	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+	    tun_entropy->enabled) {
+		/* in case entropy calculation is enabled for all tunneling
+		 * types, it is ok for VXLAN, so approve.
+		 * otherwise keep the error default.
+		 */
+		tun_entropy->num_enabling_entries++;
+		err = 0;
+	} else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		/* turn off the entropy only for the first GRE rule.
+		 * for the next rules the entropy was already disabled
+		 * successfully.
+		 */
+		if (tun_entropy->num_disabling_entries == 0)
+			err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+		else
+			err = 0;
+		if (!err)
+			tun_entropy->num_disabling_entries++;
+	}
+	mutex_unlock(&tun_entropy->lock);
+
+	return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type)
+{
+	mutex_lock(&tun_entropy->lock);
+	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+		tun_entropy->num_enabling_entries--;
+	else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+		 --tun_entropy->num_disabling_entries == 0)
+		mlx5_set_entropy(tun_entropy, reformat_type, 1);
+	mutex_unlock(&tun_entropy->lock);
+}
+

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644
index 0000000..54c42a8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h

@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+	struct mlx5_core_dev *mdev;
+	u32 num_enabling_entries;
+	u32 num_disabling_entries;
+	u8  enabled;
+	struct mutex lock;	/* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index 9a8fd76..148b55c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c

@@ -32,7 +32,9 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/refcount.h>
 #include <linux/mlx5/driver.h>
+#include <net/vxlan.h>
 #include "mlx5_core.h"
 #include "vxlan.h"
 
@@ -47,7 +49,7 @@
 
 struct mlx5_vxlan_port {
 	struct hlist_node hlist;
-	atomic_t refcount;
+	refcount_t refcount;
 	u16 udp_port;
 };
 
@@ -112,7 +114,7 @@
 
 	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
 	if (vxlanp) {
-		atomic_inc(&vxlanp->refcount);
+		refcount_inc(&vxlanp->refcount);
 		return 0;
 	}
 
@@ -136,7 +138,7 @@
 	}
 
 	vxlanp->udp_port = port;
-	atomic_set(&vxlanp->refcount, 1);
+	refcount_set(&vxlanp->refcount, 1);
 
 	spin_lock_bh(&vxlan->lock);
 	hash_add(vxlan->htable, &vxlanp->hlist, port);
@@ -169,7 +171,7 @@
 		goto out_unlock;
 	}
 
-	if (atomic_dec_and_test(&vxlanp->refcount)) {
+	if (refcount_dec_and_test(&vxlanp->refcount)) {
 		hash_del(&vxlanp->hlist);
 		remove = true;
 	}
@@ -204,8 +206,8 @@
 	spin_lock_init(&vxlan->lock);
 	hash_init(vxlan->htable);
 
-	/* Hardware adds 4789 by default */
-	mlx5_vxlan_add_port(vxlan, 4789);
+	/* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */
+	mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT);
 
 	return vxlan;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
deleted file mode 100644
index 3a3b000..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ /dev/null

@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include "mlx5_core.h"
-
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-		      u16 opmod, u8 port)
-{
-	int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
-	int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
-	int err = -ENOMEM;
-	void *data;
-	void *resp;
-	u32 *out;
-	u32 *in;
-
-	in = kzalloc(inlen, GFP_KERNEL);
-	out = kzalloc(outlen, GFP_KERNEL);
-	if (!in || !out)
-		goto out;
-
-	MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
-	MLX5_SET(mad_ifc_in, in, op_mod, opmod);
-	MLX5_SET(mad_ifc_in, in, port, port);
-
-	data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
-	memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
-
-	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
-	if (err)
-		goto out;
-
-	resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
-	memcpy(outb, resp,
-	       MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
-
-out:
-	kfree(out);
-	kfree(in);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b5e9f66..50ab88d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -43,7 +43,6 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
 #include <linux/mlx5/mlx5_ifc.h>
@@ -53,9 +52,11 @@
 #endif
 #include <net/devlink.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fs_core.h"
 #include "lib/mpfs.h"
 #include "eswitch.h"
+#include "devlink.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
 #include "fpga/ipsec.h"
@@ -63,7 +64,12 @@
 #include "accel/tls.h"
 #include "lib/clock.h"
 #include "lib/vxlan.h"
+#include "lib/geneve.h"
+#include "lib/devcom.h"
+#include "lib/pci_vsc.h"
 #include "diag/fw_tracer.h"
+#include "ecpf.h"
+#include "lib/hv_vhca.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -162,43 +168,33 @@
 			.size	= 8,
 			.limit	= 4
 		},
-		.mr_cache[16]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[17]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[18]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[19]	= {
-			.size	= 4,
-			.limit	= 2
-		},
-		.mr_cache[20]	= {
-			.size	= 4,
-			.limit	= 2
-		},
 	},
 };
 
 #define FW_INIT_TIMEOUT_MILI		2000
 #define FW_INIT_WAIT_MS			2
-#define FW_PRE_INIT_TIMEOUT_MILI	10000
+#define FW_PRE_INIT_TIMEOUT_MILI	120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL	20000
 
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+			u32 warn_time_mili)
 {
+	unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 	int err = 0;
 
+	BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
 	while (fw_initializing(dev)) {
 		if (time_after(jiffies, end)) {
 			err = -EBUSY;
 			break;
 		}
+		if (warn_time_mili && time_after(jiffies, warn)) {
+			mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+				       jiffies_to_msecs(end - warn) / 1000);
+			warn = jiffies + msecs_to_jiffies(warn_time_mili);
+		}
 		msleep(FW_INIT_WAIT_MS);
 	}
 
@@ -319,51 +315,6 @@
 	pci_release_regions(pdev);
 }
 
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_eq_table *table = &priv->eq_table;
-	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-		      MLX5_CAP_GEN(dev, max_num_eqs) :
-		      1 << MLX5_CAP_GEN(dev, log_max_eq);
-	int nvec;
-	int err;
-
-	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-	       MLX5_EQ_VEC_COMP_BASE;
-	nvec = min_t(int, nvec, num_eqs);
-	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-		return -ENOMEM;
-
-	priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
-	if (!priv->irq_info)
-		return -ENOMEM;
-
-	nvec = pci_alloc_irq_vectors(dev->pdev,
-			MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-			PCI_IRQ_MSIX);
-	if (nvec < 0) {
-		err = nvec;
-		goto err_free_irq_info;
-	}
-
-	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-	return 0;
-
-err_free_irq_info:
-	kfree(priv->irq_info);
-	return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-
-	pci_free_irq_vectors(dev->pdev);
-	kfree(priv->irq_info);
-}
-
 struct mlx5_reg_host_endianness {
 	u8	he;
 	u8      rsvd[15];
@@ -503,6 +454,64 @@
 	return err;
 }
 
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+{
+	void *set_hca_cap;
+	void *set_ctx;
+	int set_sz;
+	bool do_set = false;
+	int err;
+
+	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
+	    !MLX5_CAP_GEN(dev, pg))
+		return 0;
+
+	err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+	if (err)
+		return err;
+
+	set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	set_ctx = kzalloc(set_sz, GFP_KERNEL);
+	if (!set_ctx)
+		return -ENOMEM;
+
+	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+	memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+	       MLX5_ST_SZ_BYTES(odp_cap));
+
+#define ODP_CAP_SET_MAX(dev, field)                                            \
+	do {                                                                   \
+		u32 _res = MLX5_CAP_ODP_MAX(dev, field);                       \
+		if (_res) {                                                    \
+			do_set = true;                                         \
+			MLX5_SET(odp_cap, set_hca_cap, field, _res);           \
+		}                                                              \
+	} while (0)
+
+	ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
+
+	if (do_set)
+		err = set_caps(dev, set_ctx, set_sz,
+			       MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+	kfree(set_ctx);
+
+	return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
 	void *set_ctx = NULL;
@@ -576,6 +585,32 @@
 	return err;
 }
 
+static int set_hca_cap(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = handle_hca_cap(dev);
+	if (err) {
+		mlx5_core_err(dev, "handle_hca_cap failed\n");
+		goto out;
+	}
+
+	err = handle_hca_cap_atomic(dev);
+	if (err) {
+		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
+		goto out;
+	}
+
+	err = handle_hca_cap_odp(dev);
+	if (err) {
+		mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
+		goto out;
+	}
+
+out:
+	return err;
+}
+
 static int set_hca_ctrl(struct mlx5_core_dev *dev)
 {
 	struct mlx5_reg_host_endianness he_in;
@@ -611,6 +646,8 @@
 
 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 	MLX5_SET(enable_hca_in, in, function_id, func_id);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+		 dev->caps.embedded_cpu);
 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
@@ -621,193 +658,31 @@
 
 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 	MLX5_SET(disable_hca_in, in, function_id, func_id);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+		 dev->caps.embedded_cpu);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+			     struct ptp_system_timestamp *sts)
 {
 	u32 timer_h, timer_h1, timer_l;
 
 	timer_h = ioread32be(&dev->iseg->internal_timer_h);
+	ptp_read_system_prets(sts);
 	timer_l = ioread32be(&dev->iseg->internal_timer_l);
+	ptp_read_system_postts(sts);
 	timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
-	if (timer_h != timer_h1) /* wrap around */
+	if (timer_h != timer_h1) {
+		/* wrap around */
+		ptp_read_system_prets(sts);
 		timer_l = ioread32be(&dev->iseg->internal_timer_l);
+		ptp_read_system_postts(sts);
+	}
 
 	return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	struct mlx5_priv *priv  = &mdev->priv;
-	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
-		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-		return -ENOMEM;
-	}
-
-	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-			priv->irq_info[i].mask);
-
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-	return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	struct mlx5_priv *priv  = &mdev->priv;
-	int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-	irq_set_affinity_hint(irq, NULL);
-	free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
-		err = mlx5_irq_set_affinity_hint(mdev, i);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	for (i--; i >= 0; i--)
-		mlx5_irq_clear_affinity_hint(mdev, i);
-
-	return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
-		mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
-		    unsigned int *irqn)
-{
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	struct mlx5_eq *eq, *n;
-	int err = -ENOENT;
-
-	spin_lock(&table->lock);
-	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-		if (eq->index == vector) {
-			*eqn = eq->eqn;
-			*irqn = eq->irqn;
-			err = 0;
-			break;
-		}
-	}
-	spin_unlock(&table->lock);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	struct mlx5_eq *eq;
-
-	spin_lock(&table->lock);
-	list_for_each_entry(eq, &table->comp_eqs_list, list)
-		if (eq->eqn == eqn) {
-			spin_unlock(&table->lock);
-			return eq;
-		}
-
-	spin_unlock(&table->lock);
-
-	return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
-	if (dev->rmap) {
-		free_irq_cpu_rmap(dev->rmap);
-		dev->rmap = NULL;
-	}
-#endif
-	spin_lock(&table->lock);
-	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-		list_del(&eq->list);
-		spin_unlock(&table->lock);
-		if (mlx5_destroy_unmap_eq(dev, eq))
-			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
-				       eq->eqn);
-		kfree(eq);
-		spin_lock(&table->lock);
-	}
-	spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
-	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	char name[MLX5_MAX_IRQ_NAME];
-	struct mlx5_eq *eq;
-	int ncomp_vec;
-	int nent;
-	int err;
-	int i;
-
-	INIT_LIST_HEAD(&table->comp_eqs_list);
-	ncomp_vec = table->num_comp_vectors;
-	nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-	dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-	if (!dev->rmap)
-		return -ENOMEM;
-#endif
-	for (i = 0; i < ncomp_vec; i++) {
-		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
-		if (!eq) {
-			err = -ENOMEM;
-			goto clean;
-		}
-
-#ifdef CONFIG_RFS_ACCEL
-		irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
-				 MLX5_EQ_VEC_COMP_BASE + i));
-#endif
-		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
-		err = mlx5_create_map_eq(dev, eq,
-					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-					 name, MLX5_EQ_TYPE_COMP);
-		if (err) {
-			kfree(eq);
-			goto clean;
-		}
-		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
-		eq->index = i;
-		spin_lock(&table->lock);
-		list_add_tail(&eq->list, &table->comp_eqs_list);
-		spin_unlock(&table->lock);
-	}
-
-	return 0;
-
-clean:
-	free_comp_eqs(dev);
-	return err;
-}
-
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
 	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
@@ -860,38 +735,27 @@
 	return -EOPNOTSUPP;
 }
 
-static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
+			 const struct pci_device_id *id)
 {
-	struct pci_dev *pdev = dev->pdev;
+	struct mlx5_priv *priv = &dev->priv;
 	int err = 0;
 
+	mutex_init(&dev->pci_status_mutex);
 	pci_set_drvdata(dev->pdev, dev);
-	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
-	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
 
-	mutex_init(&priv->pgdir_mutex);
-	INIT_LIST_HEAD(&priv->pgdir_list);
-	spin_lock_init(&priv->mkey_lock);
-
-	mutex_init(&priv->alloc_mutex);
-
+	dev->bar_addr = pci_resource_start(pdev, 0);
 	priv->numa_node = dev_to_node(&dev->pdev->dev);
 
-	priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
-	if (!priv->dbg_root) {
-		dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n");
-		return -ENOMEM;
-	}
-
 	err = mlx5_pci_enable_device(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
-		goto err_dbg;
+		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
+		return err;
 	}
 
 	err = request_bar(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "error requesting BARs, aborting\n");
+		mlx5_core_err(dev, "error requesting BARs, aborting\n");
 		goto err_disable;
 	}
 
@@ -899,18 +763,25 @@
 
 	err = set_dma_caps(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
 		goto err_clr_master;
 	}
 
-	dev->iseg_base = pci_resource_start(dev->pdev, 0);
+	if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+		mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
+
+	dev->iseg_base = dev->bar_addr;
 	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
 	if (!dev->iseg) {
 		err = -ENOMEM;
-		dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
 		goto err_clr_master;
 	}
 
+	mlx5_pci_vsc_init(dev);
+
 	return 0;
 
 err_clr_master:
@@ -918,47 +789,53 @@
 	release_bar(dev->pdev);
 err_disable:
 	mlx5_pci_disable_device(dev);
-
-err_dbg:
-	debugfs_remove(priv->dbg_root);
 	return err;
 }
 
-static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static void mlx5_pci_close(struct mlx5_core_dev *dev)
 {
 	iounmap(dev->iseg);
 	pci_clear_master(dev->pdev);
 	release_bar(dev->pdev);
 	mlx5_pci_disable_device(dev);
-	debugfs_remove_recursive(priv->dbg_root);
 }
 
-static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev)
 {
-	struct pci_dev *pdev = dev->pdev;
 	int err;
 
+	dev->priv.devcom = mlx5_devcom_register_device(dev);
+	if (IS_ERR(dev->priv.devcom))
+		mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
+			      dev->priv.devcom);
+
 	err = mlx5_query_board_id(dev);
 	if (err) {
-		dev_err(&pdev->dev, "query board id failed\n");
-		goto out;
+		mlx5_core_err(dev, "query board id failed\n");
+		goto err_devcom;
 	}
 
-	err = mlx5_eq_init(dev);
+	err = mlx5_irq_table_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to initialize eq\n");
-		goto out;
+		mlx5_core_err(dev, "failed to initialize irq table\n");
+		goto err_devcom;
 	}
 
-	err = mlx5_cq_debugfs_init(dev);
+	err = mlx5_eq_table_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
+		mlx5_core_err(dev, "failed to initialize eq\n");
+		goto err_irq_cleanup;
+	}
+
+	err = mlx5_events_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "failed to initialize events\n");
 		goto err_eq_cleanup;
 	}
 
-	mlx5_init_qp_table(dev);
+	mlx5_cq_debugfs_init(dev);
 
-	mlx5_init_srq_table(dev);
+	mlx5_init_qp_table(dev);
 
 	mlx5_init_mkey_table(dev);
 
@@ -967,180 +844,168 @@
 	mlx5_init_clock(dev);
 
 	dev->vxlan = mlx5_vxlan_create(dev);
+	dev->geneve = mlx5_geneve_create(dev);
 
 	err = mlx5_init_rl_table(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init rate limiting\n");
+		mlx5_core_err(dev, "Failed to init rate limiting\n");
 		goto err_tables_cleanup;
 	}
 
 	err = mlx5_mpfs_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+		mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
 		goto err_rl_cleanup;
 	}
 
-	err = mlx5_eswitch_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
-		goto err_mpfs_cleanup;
-	}
-
 	err = mlx5_sriov_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
-		goto err_eswitch_cleanup;
+		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
+		goto err_mpfs_cleanup;
+	}
+
+	err = mlx5_eswitch_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+		goto err_sriov_cleanup;
 	}
 
 	err = mlx5_fpga_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
-		goto err_sriov_cleanup;
+		mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
+		goto err_eswitch_cleanup;
 	}
 
+	dev->dm = mlx5_dm_create(dev);
+	if (IS_ERR(dev->dm))
+		mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
+
 	dev->tracer = mlx5_fw_tracer_create(dev);
+	dev->hv_vhca = mlx5_hv_vhca_create(dev);
 
 	return 0;
 
-err_sriov_cleanup:
-	mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+	mlx5_sriov_cleanup(dev);
 err_mpfs_cleanup:
 	mlx5_mpfs_cleanup(dev);
 err_rl_cleanup:
 	mlx5_cleanup_rl_table(dev);
 err_tables_cleanup:
+	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_mkey_table(dev);
-	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cq_debugfs_cleanup(dev);
-
+	mlx5_events_cleanup(dev);
 err_eq_cleanup:
-	mlx5_eq_cleanup(dev);
+	mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+	mlx5_irq_table_cleanup(dev);
+err_devcom:
+	mlx5_devcom_unregister_device(dev->priv.devcom);
 
-out:
 	return err;
 }
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
+	mlx5_dm_cleanup(dev);
 	mlx5_fpga_cleanup(dev);
-	mlx5_sriov_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+	mlx5_sriov_cleanup(dev);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
+	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
-	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cq_debugfs_cleanup(dev);
-	mlx5_eq_cleanup(dev);
+	mlx5_events_cleanup(dev);
+	mlx5_eq_table_cleanup(dev);
+	mlx5_irq_table_cleanup(dev);
+	mlx5_devcom_unregister_device(dev->priv.devcom);
 }
 
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
-			 bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 {
-	struct pci_dev *pdev = dev->pdev;
 	int err;
 
-	mutex_lock(&dev->intf_state_mutex);
-	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
-		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
-			 __func__);
-		goto out;
-	}
-
-	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
-		 fw_rev_min(dev), fw_rev_sub(dev));
+	mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+		       fw_rev_min(dev), fw_rev_sub(dev));
 
 	/* Only PFs hold the relevant PCIe information for this query */
 	if (mlx5_core_is_pf(dev))
 		pcie_print_link_status(dev->pdev);
 
-	/* on load removing any previous indication of internal error, device is
-	 * up
-	 */
-	dev->state = MLX5_DEVICE_STATE_UP;
-
 	/* wait for firmware to accept initialization segments configurations
 	 */
-	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
 	if (err) {
-		dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
-			FW_PRE_INIT_TIMEOUT_MILI);
-		goto out_err;
+		mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+			      FW_PRE_INIT_TIMEOUT_MILI);
+		return err;
 	}
 
 	err = mlx5_cmd_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
-		goto out_err;
+		mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
+		return err;
 	}
 
-	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
 	if (err) {
-		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
-			FW_INIT_TIMEOUT_MILI);
+		mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
+			      FW_INIT_TIMEOUT_MILI);
 		goto err_cmd_cleanup;
 	}
 
 	err = mlx5_core_enable_hca(dev, 0);
 	if (err) {
-		dev_err(&pdev->dev, "enable hca failed\n");
+		mlx5_core_err(dev, "enable hca failed\n");
 		goto err_cmd_cleanup;
 	}
 
 	err = mlx5_core_set_issi(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to set issi\n");
+		mlx5_core_err(dev, "failed to set issi\n");
 		goto err_disable_hca;
 	}
 
 	err = mlx5_satisfy_startup_pages(dev, 1);
 	if (err) {
-		dev_err(&pdev->dev, "failed to allocate boot pages\n");
+		mlx5_core_err(dev, "failed to allocate boot pages\n");
 		goto err_disable_hca;
 	}
 
 	err = set_hca_ctrl(dev);
 	if (err) {
-		dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+		mlx5_core_err(dev, "set_hca_ctrl failed\n");
 		goto reclaim_boot_pages;
 	}
 
-	err = handle_hca_cap(dev);
+	err = set_hca_cap(dev);
 	if (err) {
-		dev_err(&pdev->dev, "handle_hca_cap failed\n");
-		goto reclaim_boot_pages;
-	}
-
-	err = handle_hca_cap_atomic(dev);
-	if (err) {
-		dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+		mlx5_core_err(dev, "set_hca_cap failed\n");
 		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_satisfy_startup_pages(dev, 0);
 	if (err) {
-		dev_err(&pdev->dev, "failed to allocate init pages\n");
-		goto reclaim_boot_pages;
-	}
-
-	err = mlx5_pagealloc_start(dev);
-	if (err) {
-		dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
+		mlx5_core_err(dev, "failed to allocate init pages\n");
 		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_cmd_init_hca(dev, sw_owner_id);
 	if (err) {
-		dev_err(&pdev->dev, "init hca failed\n");
-		goto err_pagealloc_stop;
+		mlx5_core_err(dev, "init hca failed\n");
+		goto reclaim_boot_pages;
 	}
 
 	mlx5_set_driver_version(dev);
@@ -1149,97 +1014,195 @@
 
 	err = mlx5_query_hca_caps(dev);
 	if (err) {
-		dev_err(&pdev->dev, "query hca failed\n");
-		goto err_stop_poll;
+		mlx5_core_err(dev, "query hca failed\n");
+		goto stop_health;
 	}
 
-	if (boot) {
-		err = mlx5_init_once(dev, priv);
-		if (err) {
-			dev_err(&pdev->dev, "sw objs init failed\n");
-			goto err_stop_poll;
-		}
-	}
+	return 0;
 
-	err = mlx5_alloc_irq_vectors(dev);
+stop_health:
+	mlx5_stop_health_poll(dev, boot);
+reclaim_boot_pages:
+	mlx5_reclaim_startup_pages(dev);
+err_disable_hca:
+	mlx5_core_disable_hca(dev, 0);
+err_cmd_cleanup:
+	mlx5_cmd_cleanup(dev);
+
+	return err;
+}
+
+static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
+{
+	int err;
+
+	mlx5_stop_health_poll(dev, boot);
+	err = mlx5_cmd_teardown_hca(dev);
 	if (err) {
-		dev_err(&pdev->dev, "alloc irq vectors failed\n");
-		goto err_cleanup_once;
+		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
+		return err;
 	}
+	mlx5_reclaim_startup_pages(dev);
+	mlx5_core_disable_hca(dev, 0);
+	mlx5_cmd_cleanup(dev);
+
+	return 0;
+}
+
+static int mlx5_load(struct mlx5_core_dev *dev)
+{
+	int err;
 
 	dev->priv.uar = mlx5_get_uars_page(dev);
 	if (IS_ERR(dev->priv.uar)) {
-		dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
 		err = PTR_ERR(dev->priv.uar);
-		goto err_disable_msix;
+		return err;
 	}
 
-	err = mlx5_start_eqs(dev);
+	mlx5_events_start(dev);
+	mlx5_pagealloc_start(dev);
+
+	err = mlx5_irq_table_create(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-		goto err_put_uars;
+		mlx5_core_err(dev, "Failed to alloc IRQs\n");
+		goto err_irq_table;
+	}
+
+	err = mlx5_eq_table_create(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to create EQs\n");
+		goto err_eq_table;
 	}
 
 	err = mlx5_fw_tracer_init(dev->tracer);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init FW tracer\n");
+		mlx5_core_err(dev, "Failed to init FW tracer\n");
 		goto err_fw_tracer;
 	}
 
-	err = alloc_comp_eqs(dev);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
-		goto err_comp_eqs;
-	}
-
-	err = mlx5_irq_set_affinity_hints(dev);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-		goto err_affinity_hints;
-	}
+	mlx5_hv_vhca_init(dev->hv_vhca);
 
 	err = mlx5_fpga_device_start(dev);
 	if (err) {
-		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+		mlx5_core_err(dev, "fpga device start failed %d\n", err);
 		goto err_fpga_start;
 	}
 
 	err = mlx5_accel_ipsec_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		mlx5_core_err(dev, "IPSec device start failed %d\n", err);
 		goto err_ipsec_start;
 	}
 
 	err = mlx5_accel_tls_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+		mlx5_core_err(dev, "TLS device start failed %d\n", err);
 		goto err_tls_start;
 	}
 
 	err = mlx5_init_fs(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to init flow steering\n");
+		mlx5_core_err(dev, "Failed to init flow steering\n");
 		goto err_fs;
 	}
 
 	err = mlx5_core_set_hca_defaults(dev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to set hca defaults\n");
-		goto err_fs;
+		mlx5_core_err(dev, "Failed to set hca defaults\n");
+		goto err_sriov;
 	}
 
 	err = mlx5_sriov_attach(dev);
 	if (err) {
-		dev_err(&pdev->dev, "sriov init failed %d\n", err);
+		mlx5_core_err(dev, "sriov init failed %d\n", err);
 		goto err_sriov;
 	}
 
+	err = mlx5_ec_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to init embedded CPU\n");
+		goto err_ec;
+	}
+
+	return 0;
+
+err_ec:
+	mlx5_sriov_detach(dev);
+err_sriov:
+	mlx5_cleanup_fs(dev);
+err_fs:
+	mlx5_accel_tls_cleanup(dev);
+err_tls_start:
+	mlx5_accel_ipsec_cleanup(dev);
+err_ipsec_start:
+	mlx5_fpga_device_stop(dev);
+err_fpga_start:
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
+	mlx5_fw_tracer_cleanup(dev->tracer);
+err_fw_tracer:
+	mlx5_eq_table_destroy(dev);
+err_eq_table:
+	mlx5_irq_table_destroy(dev);
+err_irq_table:
+	mlx5_pagealloc_stop(dev);
+	mlx5_events_stop(dev);
+	mlx5_put_uars_page(dev, dev->priv.uar);
+	return err;
+}
+
+static void mlx5_unload(struct mlx5_core_dev *dev)
+{
+	mlx5_ec_cleanup(dev);
+	mlx5_sriov_detach(dev);
+	mlx5_cleanup_fs(dev);
+	mlx5_accel_ipsec_cleanup(dev);
+	mlx5_accel_tls_cleanup(dev);
+	mlx5_fpga_device_stop(dev);
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
+	mlx5_fw_tracer_cleanup(dev->tracer);
+	mlx5_eq_table_destroy(dev);
+	mlx5_irq_table_destroy(dev);
+	mlx5_pagealloc_stop(dev);
+	mlx5_events_stop(dev);
+	mlx5_put_uars_page(dev, dev->priv.uar);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+{
+	int err = 0;
+
+	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+	mutex_lock(&dev->intf_state_mutex);
+	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_warn(dev, "interface is up, NOP\n");
+		goto out;
+	}
+	/* remove any previous indication of internal error */
+	dev->state = MLX5_DEVICE_STATE_UP;
+
+	err = mlx5_function_setup(dev, boot);
+	if (err)
+		goto out;
+
+	if (boot) {
+		err = mlx5_init_once(dev);
+		if (err) {
+			mlx5_core_err(dev, "sw objs init failed\n");
+			goto function_teardown;
+		}
+	}
+
+	err = mlx5_load(dev);
+	if (err)
+		goto err_load;
+
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
 		err = mlx5_register_device(dev);
 		if (err) {
-			dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+			mlx5_core_err(dev, "register device failed %d\n", err);
 			goto err_reg_dev;
 		}
 	}
@@ -1248,83 +1211,34 @@
 out:
 	mutex_unlock(&dev->intf_state_mutex);
 
-	return 0;
+	return err;
 
 err_reg_dev:
-	mlx5_sriov_detach(dev);
-
-err_sriov:
-	mlx5_cleanup_fs(dev);
-
-err_fs:
-	mlx5_accel_tls_cleanup(dev);
-
-err_tls_start:
-	mlx5_accel_ipsec_cleanup(dev);
-
-err_ipsec_start:
-	mlx5_fpga_device_stop(dev);
-
-err_fpga_start:
-	mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
-	free_comp_eqs(dev);
-
-err_comp_eqs:
-	mlx5_fw_tracer_cleanup(dev->tracer);
-
-err_fw_tracer:
-	mlx5_stop_eqs(dev);
-
-err_put_uars:
-	mlx5_put_uars_page(dev, priv->uar);
-
-err_disable_msix:
-	mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+	mlx5_unload(dev);
+err_load:
 	if (boot)
 		mlx5_cleanup_once(dev);
-
-err_stop_poll:
-	mlx5_stop_health_poll(dev, boot);
-	if (mlx5_cmd_teardown_hca(dev)) {
-		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-		goto out_err;
-	}
-
-err_pagealloc_stop:
-	mlx5_pagealloc_stop(dev);
-
-reclaim_boot_pages:
-	mlx5_reclaim_startup_pages(dev);
-
-err_disable_hca:
-	mlx5_core_disable_hca(dev, 0);
-
-err_cmd_cleanup:
-	mlx5_cmd_cleanup(dev);
-
-out_err:
+function_teardown:
+	mlx5_function_teardown(dev, boot);
 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 	mutex_unlock(&dev->intf_state_mutex);
 
 	return err;
 }
 
-static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
-			   bool cleanup)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
 	int err = 0;
 
-	if (cleanup)
-		mlx5_drain_health_recovery(dev);
+	if (cleanup) {
+		mlx5_unregister_device(dev);
+		mlx5_drain_health_wq(dev);
+	}
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
-		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
-			 __func__);
+		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+			       __func__);
 		if (cleanup)
 			mlx5_cleanup_once(dev);
 		goto out;
@@ -1335,140 +1249,130 @@
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
-	mlx5_sriov_detach(dev);
-	mlx5_cleanup_fs(dev);
-	mlx5_accel_ipsec_cleanup(dev);
-	mlx5_accel_tls_cleanup(dev);
-	mlx5_fpga_device_stop(dev);
-	mlx5_irq_clear_affinity_hints(dev);
-	free_comp_eqs(dev);
-	mlx5_fw_tracer_cleanup(dev->tracer);
-	mlx5_stop_eqs(dev);
-	mlx5_put_uars_page(dev, priv->uar);
-	mlx5_free_irq_vectors(dev);
+	mlx5_unload(dev);
+
 	if (cleanup)
 		mlx5_cleanup_once(dev);
-	mlx5_stop_health_poll(dev, cleanup);
-	err = mlx5_cmd_teardown_hca(dev);
-	if (err) {
-		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-		goto out;
-	}
-	mlx5_pagealloc_stop(dev);
-	mlx5_reclaim_startup_pages(dev);
-	mlx5_core_disable_hca(dev, 0);
-	mlx5_cmd_cleanup(dev);
 
+	mlx5_function_teardown(dev, cleanup);
 out:
 	mutex_unlock(&dev->intf_state_mutex);
 	return err;
 }
 
-struct mlx5_core_event_handler {
-	void (*event)(struct mlx5_core_dev *dev,
-		      enum mlx5_dev_event event,
-		      void *data);
-};
-
-static const struct devlink_ops mlx5_devlink_ops = {
-#ifdef CONFIG_MLX5_ESWITCH
-	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
-	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
-	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
-	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
-	.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
-	.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
-#endif
-};
-
-#define MLX5_IB_MOD "mlx5_ib"
-static int init_one(struct pci_dev *pdev,
-		    const struct pci_device_id *id)
+static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 {
-	struct mlx5_core_dev *dev;
-	struct devlink *devlink;
-	struct mlx5_priv *priv;
+	struct mlx5_priv *priv = &dev->priv;
 	int err;
 
-	devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
-	if (!devlink) {
-		dev_err(&pdev->dev, "kzalloc failed\n");
-		return -ENOMEM;
-	}
-
-	dev = devlink_priv(devlink);
-	priv = &dev->priv;
-	priv->pci_dev_data = id->driver_data;
-
-	pci_set_drvdata(pdev, dev);
-
-	dev->pdev = pdev;
-	dev->event = mlx5_core_event;
-	dev->profile = &profile[prof_sel];
+	dev->profile = &profile[profile_idx];
 
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
-	mutex_init(&dev->pci_status_mutex);
 	mutex_init(&dev->intf_state_mutex);
 
-	INIT_LIST_HEAD(&priv->waiting_events_list);
-	priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	err = init_srcu_struct(&priv->pfault_srcu);
-	if (err) {
-		dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
-			err);
-		goto clean_dev;
-	}
-#endif
 	mutex_init(&priv->bfregs.reg_head.lock);
 	mutex_init(&priv->bfregs.wc_head.lock);
 	INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
 	INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
 
-	err = mlx5_pci_init(dev, priv);
-	if (err) {
-		dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
-		goto clean_srcu;
+	mutex_init(&priv->alloc_mutex);
+	mutex_init(&priv->pgdir_mutex);
+	INIT_LIST_HEAD(&priv->pgdir_list);
+	spin_lock_init(&priv->mkey_lock);
+
+	priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
+					    mlx5_debugfs_root);
+	if (!priv->dbg_root) {
+		dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n");
+		return -ENOMEM;
 	}
 
 	err = mlx5_health_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
-		goto close_pci;
+	if (err)
+		goto err_health_init;
+
+	err = mlx5_pagealloc_init(dev);
+	if (err)
+		goto err_pagealloc_init;
+
+	return 0;
+
+err_pagealloc_init:
+	mlx5_health_cleanup(dev);
+err_health_init:
+	debugfs_remove(dev->priv.dbg_root);
+
+	return err;
+}
+
+static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+{
+	mlx5_pagealloc_cleanup(dev);
+	mlx5_health_cleanup(dev);
+	debugfs_remove_recursive(dev->priv.dbg_root);
+}
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct mlx5_core_dev *dev;
+	struct devlink *devlink;
+	int err;
+
+	devlink = mlx5_devlink_alloc();
+	if (!devlink) {
+		dev_err(&pdev->dev, "devlink alloc failed\n");
+		return -ENOMEM;
 	}
 
-	mlx5_pagealloc_init(dev);
+	dev = devlink_priv(devlink);
+	dev->device = &pdev->dev;
+	dev->pdev = pdev;
 
-	err = mlx5_load_one(dev, priv, true);
+	dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+			 MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
+	err = mlx5_mdev_init(dev, prof_sel);
+	if (err)
+		goto mdev_init_err;
+
+	err = mlx5_pci_init(dev, pdev, id);
 	if (err) {
-		dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
-		goto clean_health;
+		mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
+			      err);
+		goto pci_init_err;
+	}
+
+	err = mlx5_load_one(dev, true);
+	if (err) {
+		mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+			      err);
+		goto err_load_one;
 	}
 
 	request_module_nowait(MLX5_IB_MOD);
 
-	err = devlink_register(devlink, &pdev->dev);
+	err = mlx5_devlink_register(devlink, &pdev->dev);
 	if (err)
 		goto clean_load;
 
+	err = mlx5_crdump_enable(dev);
+	if (err)
+		dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
 	pci_save_state(pdev);
 	return 0;
 
 clean_load:
-	mlx5_unload_one(dev, priv, true);
-clean_health:
-	mlx5_pagealloc_cleanup(dev);
-	mlx5_health_cleanup(dev);
-close_pci:
-	mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	cleanup_srcu_struct(&priv->pfault_srcu);
-clean_dev:
-#endif
-	devlink_free(devlink);
+	mlx5_unload_one(dev, true);
+
+err_load_one:
+	mlx5_pci_close(dev);
+pci_init_err:
+	mlx5_mdev_uninit(dev);
+mdev_init_err:
+	mlx5_devlink_free(devlink);
 
 	return err;
 }
@@ -1477,41 +1381,33 @@
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_priv *priv = &dev->priv;
 
-	devlink_unregister(devlink);
-	mlx5_unregister_device(dev);
+	mlx5_crdump_disable(dev);
+	mlx5_devlink_unregister(devlink);
 
-	if (mlx5_unload_one(dev, priv, true)) {
-		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
-		mlx5_health_cleanup(dev);
+	if (mlx5_unload_one(dev, true)) {
+		mlx5_core_err(dev, "mlx5_unload_one failed\n");
+		mlx5_health_flush(dev);
 		return;
 	}
 
-	mlx5_pagealloc_cleanup(dev);
-	mlx5_health_cleanup(dev);
-	mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
-	devlink_free(devlink);
+	mlx5_pci_close(dev);
+	mlx5_mdev_uninit(dev);
+	mlx5_devlink_free(devlink);
 }
 
 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 					      pci_channel_state_t state)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-	struct mlx5_priv *priv = &dev->priv;
 
-	dev_info(&pdev->dev, "%s was called\n", __func__);
+	mlx5_core_info(dev, "%s was called\n", __func__);
 
 	mlx5_enter_error_state(dev, false);
-	mlx5_unload_one(dev, priv, false);
-	/* In case of kernel call drain the health wq */
-	if (state) {
-		mlx5_drain_health_wq(dev);
-		mlx5_pci_disable_device(dev);
-	}
+	mlx5_error_sw_reset(dev);
+	mlx5_unload_one(dev, false);
+	mlx5_drain_health_wq(dev);
+	mlx5_pci_disable_device(dev);
 
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -1533,7 +1429,9 @@
 		count = ioread32be(health->health_counter);
 		if (count && count != 0xffffffff) {
 			if (last_count && last_count != count) {
-				dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+				mlx5_core_info(dev,
+					       "wait vital counter value 0x%x after %d iterations\n",
+					       count, i);
 				return 0;
 			}
 			last_count = count;
@@ -1549,12 +1447,12 @@
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 	int err;
 
-	dev_info(&pdev->dev, "%s was called\n", __func__);
+	mlx5_core_info(dev, "%s was called\n", __func__);
 
 	err = mlx5_pci_enable_device(dev);
 	if (err) {
-		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
-			, __func__, err);
+		mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
+			      __func__, err);
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
@@ -1563,7 +1461,7 @@
 	pci_save_state(pdev);
 
 	if (wait_vital(pdev)) {
-		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+		mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
@@ -1573,17 +1471,16 @@
 static void mlx5_pci_resume(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-	struct mlx5_priv *priv = &dev->priv;
 	int err;
 
-	dev_info(&pdev->dev, "%s was called\n", __func__);
+	mlx5_core_info(dev, "%s was called\n", __func__);
 
-	err = mlx5_load_one(dev, priv, false);
+	err = mlx5_load_one(dev, false);
 	if (err)
-		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
-			, __func__, err);
+		mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
+			      __func__, err);
 	else
-		dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+		mlx5_core_info(dev, "%s: device recovered\n", __func__);
 }
 
 static const struct pci_error_handlers mlx5_err_handler = {
@@ -1594,12 +1491,17 @@
 
 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 {
-	int ret;
+	bool fast_teardown = false, force_teardown = false;
+	int ret = 1;
 
-	if (!MLX5_CAP_GEN(dev, force_teardown)) {
-		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
+	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
+
+	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
+	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
+
+	if (!fast_teardown && !force_teardown)
 		return -EOPNOTSUPP;
-	}
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
@@ -1612,13 +1514,19 @@
 	mlx5_drain_health_wq(dev);
 	mlx5_stop_health_poll(dev, false);
 
-	ret = mlx5_cmd_force_teardown_hca(dev);
-	if (ret) {
-		mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
-		mlx5_start_health_poll(dev);
-		return ret;
-	}
+	ret = mlx5_cmd_fast_teardown_hca(dev);
+	if (!ret)
+		goto succeed;
 
+	ret = mlx5_cmd_force_teardown_hca(dev);
+	if (!ret)
+		goto succeed;
+
+	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+	mlx5_start_health_poll(dev);
+	return ret;
+
+succeed:
 	mlx5_enter_error_state(dev, true);
 
 	/* Some platforms requiring freeing the IRQ's in the shutdown
@@ -1626,7 +1534,6 @@
 	 * kexec. There is no need to cleanup the mlx5_core software
 	 * contexts.
 	 */
-	mlx5_irq_clear_affinity_hints(dev);
 	mlx5_core_eq_free_irqs(dev);
 
 	return 0;
@@ -1635,13 +1542,12 @@
 static void shutdown(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	struct mlx5_priv *priv = &dev->priv;
 	int err;
 
-	dev_info(&pdev->dev, "Shutdown was called\n");
+	mlx5_core_info(dev, "Shutdown was called\n");
 	err = mlx5_try_fast_unload(dev);
 	if (err)
-		mlx5_unload_one(dev, priv, false);
+		mlx5_unload_one(dev, false);
 	mlx5_pci_disable_device(dev);
 }
 
@@ -1658,8 +1564,12 @@
 	{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 Ex VF */
 	{ PCI_VDEVICE(MELLANOX, 0x101b) },			/* ConnectX-6 */
 	{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF},	/* ConnectX-6 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x101d) },			/* ConnectX-6 Dx */
+	{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},	/* ConnectX Family mlx5Gen Virtual Function */
+	{ PCI_VDEVICE(MELLANOX, 0x101f) },			/* ConnectX-6 LX */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d2) },			/* BlueField integrated ConnectX-5 network controller */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},	/* BlueField integrated ConnectX-5 network controller VF */
+	{ PCI_VDEVICE(MELLANOX, 0xa2d6) },			/* BlueField-2 integrated ConnectX-6 Dx network controller */
 	{ 0, }
 };
 
@@ -1667,7 +1577,8 @@
 
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
-	mlx5_pci_err_detected(dev->pdev, 0);
+	mlx5_error_sw_reset(dev);
+	mlx5_unload_one(dev, false);
 }
 
 void mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1705,7 +1616,7 @@
 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
 	mlx5_core_verify_params();
-	mlx5_fpga_ipsec_build_fs_cmds();
+	mlx5_accel_ipsec_build_fs_cmds();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b4134fa..b100489 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

@@ -38,7 +38,10 @@
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
@@ -46,39 +49,57 @@
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)				\
-	dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,		\
+	dev_dbg((__dev)->device, "%s:%d:(pid %d): " format,		\
 		 __func__, __LINE__, current->pid,			\
 		 ##__VA_ARGS__)
 
-#define mlx5_core_dbg_once(__dev, format, ...)				\
-	dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
-		     __func__, __LINE__, current->pid,			\
+#define mlx5_core_dbg_once(__dev, format, ...)		\
+	dev_dbg_once((__dev)->device,		\
+		     "%s:%d:(pid %d): " format,		\
+		     __func__, __LINE__, current->pid,	\
 		     ##__VA_ARGS__)
 
-#define mlx5_core_dbg_mask(__dev, mask, format, ...)			\
-do {									\
-	if ((mask) & mlx5_core_debug_mask)				\
-		mlx5_core_dbg(__dev, format, ##__VA_ARGS__);		\
+#define mlx5_core_dbg_mask(__dev, mask, format, ...)		\
+do {								\
+	if ((mask) & mlx5_core_debug_mask)			\
+		mlx5_core_dbg(__dev, format, ##__VA_ARGS__);	\
 } while (0)
 
-#define mlx5_core_err(__dev, format, ...)				\
-	dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
-		__func__, __LINE__, current->pid,	\
+#define mlx5_core_err(__dev, format, ...)			\
+	dev_err((__dev)->device, "%s:%d:(pid %d): " format,	\
+		__func__, __LINE__, current->pid,		\
 	       ##__VA_ARGS__)
 
-#define mlx5_core_err_rl(__dev, format, ...)				\
-	dev_err_ratelimited(&(__dev)->pdev->dev,			\
-			   "%s:%d:(pid %d): " format,			\
-			   __func__, __LINE__, current->pid,		\
-			   ##__VA_ARGS__)
+#define mlx5_core_err_rl(__dev, format, ...)			\
+	dev_err_ratelimited((__dev)->device,			\
+			    "%s:%d:(pid %d): " format,		\
+			    __func__, __LINE__, current->pid,	\
+			    ##__VA_ARGS__)
 
-#define mlx5_core_warn(__dev, format, ...)				\
-	dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
-		 __func__, __LINE__, current->pid,			\
-		##__VA_ARGS__)
+#define mlx5_core_warn(__dev, format, ...)			\
+	dev_warn((__dev)->device, "%s:%d:(pid %d): " format,	\
+		 __func__, __LINE__, current->pid,		\
+		 ##__VA_ARGS__)
 
-#define mlx5_core_info(__dev, format, ...)				\
-	dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+#define mlx5_core_warn_once(__dev, format, ...)				\
+	dev_warn_once((__dev)->device, "%s:%d:(pid %d): " format,	\
+		      __func__, __LINE__, current->pid,			\
+		      ##__VA_ARGS__)
+
+#define mlx5_core_warn_rl(__dev, format, ...)			\
+	dev_warn_ratelimited((__dev)->device,			\
+			     "%s:%d:(pid %d): " format,		\
+			     __func__, __LINE__, current->pid,	\
+			     ##__VA_ARGS__)
+
+#define mlx5_core_info(__dev, format, ...)		\
+	dev_info((__dev)->device, format, ##__VA_ARGS__)
+
+#define mlx5_core_info_rl(__dev, format, ...)			\
+	dev_info_ratelimited((__dev)->device,			\
+			     "%s:%d:(pid %d): " format,		\
+			     __func__, __LINE__, current->pid,	\
+			     ##__VA_ARGS__)
 
 enum {
 	MLX5_CMD_DATA, /* print command payload only */
@@ -90,17 +111,19 @@
 	MLX5_DRIVER_SYND = 0xbadd00de,
 };
 
+enum mlx5_semaphore_space_address {
+	MLX5_SEMAPHORE_SPACE_DOMAIN     = 0xA,
+	MLX5_SEMAPHORE_SW_RESET         = 0x20,
+};
+
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-		     unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-			  struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
@@ -108,7 +131,6 @@
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
 void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
-bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
@@ -118,32 +140,13 @@
 				       u32 modify_bitmask);
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+			     struct ptp_system_timestamp *sts);
 
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name,
-		       enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
+void mlx5_cmd_flush(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
@@ -156,6 +159,24 @@
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
 
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_attach_device(struct mlx5_core_dev *dev);
@@ -169,17 +190,6 @@
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
-
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
@@ -188,16 +198,27 @@
 int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
 int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
+
 #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
 			    MLX5_CAP_GEN((mdev), pps_modify) &&		\
 			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
 			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
 
-int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
+			struct netlink_ext_ack *extack);
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+			  u32 *running_ver, u32 *stored_ver);
 
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
 
+static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+	return pci_num_vf(dev->pdev) ? true : false;
+}
+
 static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 {
 	/* LACP owner conditions:
@@ -210,8 +231,16 @@
 		    MLX5_CAP_GEN(dev, lag_master);
 }
 
-int mlx5_lag_allow(struct mlx5_core_dev *dev);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev);
-
 void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+void mlx5_lag_update(struct mlx5_core_dev *dev);
+
+enum {
+	MLX5_NIC_IFC_FULL		= 0,
+	MLX5_NIC_IFC_DISABLED		= 1,
+	MLX5_NIC_IFC_NO_DRAM_NIC	= 2,
+	MLX5_NIC_IFC_SW_RESET		= 7
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
 #endif /* __MLX5_CORE_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 0670165..c501bf2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c

@@ -38,25 +38,23 @@
 
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
-	memset(table, 0, sizeof(*table));
-	rwlock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
 }
 
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 {
+	WARN_ON(!xa_empty(&dev->priv.mkey_table));
 }
 
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 			     struct mlx5_core_mkey *mkey,
-			     u32 *in, int inlen,
-			     u32 *out, int outlen,
-			     mlx5_cmd_cbk_t callback, void *context)
+			     struct mlx5_async_ctx *async_ctx, u32 *in,
+			     int inlen, u32 *out, int outlen,
+			     mlx5_async_cbk_t callback,
+			     struct mlx5_async_work *context)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	u32 mkey_index;
 	void *mkc;
 	int err;
@@ -71,7 +69,7 @@
 	MLX5_SET(mkc, mkc, mkey_7_0, key);
 
 	if (callback)
-		return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+		return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
 					callback, context);
 
 	err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
@@ -87,12 +85,10 @@
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      mkey_index, key, mkey->key);
 
-	/* connect to mkey tree */
-	write_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
-	write_unlock_irq(&table->lock);
+	err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+				  GFP_KERNEL));
 	if (err) {
-		mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+		mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
 			       mlx5_base_mkey(mkey->key), err);
 		mlx5_core_destroy_mkey(dev, mkey);
 	}
@@ -105,7 +101,7 @@
 			  struct mlx5_core_mkey *mkey,
 			  u32 *in, int inlen)
 {
-	return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+	return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
 					NULL, 0, NULL, NULL);
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
@@ -113,20 +109,14 @@
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey)
 {
-	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
-	struct mlx5_core_mkey *deleted_mkey;
+	struct xarray *mkeys = &dev->priv.mkey_table;
 	unsigned long flags;
 
-	write_lock_irqsave(&table->lock, flags);
-	deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
-	write_unlock_irqrestore(&table->lock, flags);
-	if (!deleted_mkey) {
-		mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
-			      mlx5_base_mkey(mkey->key));
-		return -ENOENT;
-	}
+	xa_lock_irqsave(mkeys, flags);
+	__xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+	xa_unlock_irqrestore(mkeys, flags);
 
 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
 	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index e36d3e3..91bd258 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

@@ -37,6 +37,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
 	MLX5_PAGES_CANT_GIVE	= 0,
@@ -47,6 +48,7 @@
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u16	func_id;
+	u8	ec_function;
 	s32	npages;
 	struct work_struct work;
 };
@@ -142,6 +144,7 @@
 	MLX5_SET(query_pages_in, in, op_mod, boot ?
 		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+	MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
@@ -197,7 +200,7 @@
 		rb_erase(&fwp->rb_node, &dev->priv.page_root);
 		if (fwp->free_count != 1)
 			list_del(&fwp->list);
-		dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+		dma_unmap_page(dev->device, addr & MLX5_U64_4K_PAGE_MASK,
 			       PAGE_SIZE, DMA_BIDIRECTIONAL);
 		__free_page(fwp->page);
 		kfree(fwp);
@@ -208,11 +211,12 @@
 
 static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
 {
+	struct device *device = dev->device;
+	int nid = dev_to_node(device);
 	struct page *page;
 	u64 zero_addr = 1;
 	u64 addr;
 	int err;
-	int nid = dev_to_node(&dev->pdev->dev);
 
 	page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
 	if (!page) {
@@ -220,9 +224,8 @@
 		return -ENOMEM;
 	}
 map:
-	addr = dma_map_page(&dev->pdev->dev, page, 0,
-			    PAGE_SIZE, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&dev->pdev->dev, addr)) {
+	addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(device, addr)) {
 		mlx5_core_warn(dev, "failed dma mapping page\n");
 		err = -ENOMEM;
 		goto err_mapping;
@@ -237,8 +240,7 @@
 	err = insert_page(dev, addr, page, func_id);
 	if (err) {
 		mlx5_core_err(dev, "failed to track allocated page\n");
-		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
+		dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	}
 
 err_mapping:
@@ -246,13 +248,14 @@
 		__free_page(page);
 
 	if (zero_addr == 0)
-		dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE,
+		dma_unmap_page(device, zero_addr, PAGE_SIZE,
 			       DMA_BIDIRECTIONAL);
 
 	return err;
 }
 
-static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
+			     bool ec_function)
 {
 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
@@ -261,6 +264,7 @@
 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
@@ -269,7 +273,7 @@
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
-		      int notify_fail)
+		      int notify_fail, bool ec_function)
 {
 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
@@ -304,6 +308,7 @@
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err) {
@@ -315,8 +320,11 @@
 	dev->priv.fw_pages += npages;
 	if (func_id)
 		dev->priv.vfs_pages += npages;
+	else if (mlx5_core_is_ecpf(dev) && !ec_function)
+		dev->priv.peer_pf_pages += npages;
 
-	mlx5_core_dbg(dev, "err %d\n", err);
+	mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
+		      npages, ec_function, func_id, err);
 
 	kvfree(in);
 	return 0;
@@ -327,7 +335,7 @@
 out_free:
 	kvfree(in);
 	if (notify_fail)
-		page_notify_fail(dev, func_id);
+		page_notify_fail(dev, func_id, ec_function);
 	return err;
 }
 
@@ -363,7 +371,7 @@
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
-			 int *nclaimed)
+			 int *nclaimed, bool ec_function)
 {
 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
@@ -384,6 +392,7 @@
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
@@ -409,6 +418,8 @@
 	dev->priv.fw_pages -= num_claimed;
 	if (func_id)
 		dev->priv.vfs_pages -= num_claimed;
+	else if (mlx5_core_is_ecpf(dev) && !ec_function)
+		dev->priv.peer_pf_pages -= num_claimed;
 
 out_free:
 	kvfree(out);
@@ -422,9 +433,10 @@
 	int err = 0;
 
 	if (req->npages < 0)
-		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
+				    req->ec_function);
 	else if (req->npages > 0)
-		err = give_pages(dev, req->func_id, req->npages, 1);
+		err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
 
 	if (err)
 		mlx5_core_warn(dev, "%s fail %d\n",
@@ -433,22 +445,43 @@
 	kfree(req);
 }
 
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s32 npages)
+enum {
+	EC_FUNCTION_MASK = 0x8000,
+};
+
+static int req_pages_handler(struct notifier_block *nb,
+			     unsigned long type, void *data)
 {
 	struct mlx5_pages_req *req;
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+	struct mlx5_eqe *eqe;
+	bool ec_function;
+	u16 func_id;
+	s32 npages;
 
+	priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+	dev  = container_of(priv, struct mlx5_core_dev, priv);
+	eqe  = data;
+
+	func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+	npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
+	ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
+	mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+		      func_id, npages);
 	req = kzalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req) {
 		mlx5_core_warn(dev, "failed to allocate pages request\n");
-		return;
+		return NOTIFY_DONE;
 	}
 
 	req->dev = dev;
 	req->func_id = func_id;
 	req->npages = npages;
+	req->ec_function = ec_function;
 	INIT_WORK(&req->work, pages_work_handler);
 	queue_work(dev->priv.pg_wq, &req->work);
+	return NOTIFY_OK;
 }
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -464,7 +497,7 @@
 	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
 		      npages, boot ? "boot" : "init", func_id);
 
-	return give_pages(dev, func_id, npages, 0);
+	return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
 }
 
 enum {
@@ -498,7 +531,7 @@
 			fwp = rb_entry(p, struct fw_page, rb_node);
 			err = reclaim_pages(dev, fwp->func_id,
 					    optimal_reclaimed_pages(),
-					    &nclaimed);
+					    &nclaimed, mlx5_core_is_ecpf(dev));
 
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
@@ -520,23 +553,17 @@
 	WARN(dev->priv.vfs_pages,
 	     "VFs FW pages counter is %d after reclaiming all pages\n",
 	     dev->priv.vfs_pages);
+	WARN(dev->priv.peer_pf_pages,
+	     "Peer PF FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.peer_pf_pages);
 
 	return 0;
 }
 
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
 	dev->priv.page_root = RB_ROOT;
 	INIT_LIST_HEAD(&dev->priv.free_list);
-}
-
-void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
-{
-	/* nothing */
-}
-
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
-{
 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
 	if (!dev->priv.pg_wq)
 		return -ENOMEM;
@@ -544,15 +571,27 @@
 	return 0;
 }
 
-void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
 	destroy_workqueue(dev->priv.pg_wq);
 }
 
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+	MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+	mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+	mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+	flush_workqueue(dev->priv.pg_wq);
+}
+
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
 {
 	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-	int prev_vfs_pages = dev->priv.vfs_pages;
+	int prev_pages = *pages;
 
 	/* In case of internal error we will free the pages manually later */
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -560,20 +599,19 @@
 		return 0;
 	}
 
-	mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
-		      dev->priv.name);
-	while (dev->priv.vfs_pages) {
+	mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages);
+	while (*pages) {
 		if (time_after(jiffies, end)) {
-			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
 			return -ETIMEDOUT;
 		}
-		if (dev->priv.vfs_pages < prev_vfs_pages) {
+		if (*pages < prev_pages) {
 			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-			prev_vfs_pages = dev->priv.vfs_pages;
+			prev_pages = *pages;
 		}
 		msleep(50);
 	}
 
-	mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
+	mlx5_core_dbg(dev, "All pages received\n");
 	return 0;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 0000000..373981a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+	struct atomic_notifier_head nh;
+	cpumask_var_t mask;
+	char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+	struct mlx5_irq *irq;
+	int nvec;
+#ifdef CONFIG_RFS_ACCEL
+	struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *irq_table;
+
+	irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+	if (!irq_table)
+		return -ENOMEM;
+
+	dev->priv.irq_table = irq_table;
+	return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+	kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+	return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+	atomic_notifier_call_chain(nh, 0, NULL);
+	return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+	if (vecidx == 0) {
+		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+		return;
+	}
+
+	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
+	return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+	char name[MLX5_MAX_IRQ_NAME];
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		irq_set_name(name, i);
+		ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+		snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+			 "%s@pci:%s", name, pci_name(dev->pdev));
+		err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+				  &irq->nh);
+		if (err) {
+			mlx5_core_err(dev, "Failed to request irq\n");
+			goto err_request_irq;
+		}
+	}
+	return 0;
+
+err_request_irq:
+	for (; i >= 0; i--) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		free_irq(irqn, &irq->nh);
+	}
+	return  err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+	int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+	int num_affinity_vec;
+	int vecidx;
+
+	num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+	irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+	if (!irq_table->rmap) {
+		err = -ENOMEM;
+		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+		goto err_out;
+	}
+
+	vecidx = MLX5_IRQ_VEC_COMP_BASE;
+	for (; vecidx < irq_table->nvec; vecidx++) {
+		err = irq_cpu_rmap_add(irq_table->rmap,
+				       pci_irq_vector(mdev->pdev, vecidx));
+		if (err) {
+			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+				      err);
+			goto err_irq_cpu_rmap_add;
+		}
+	}
+	return 0;
+
+err_irq_cpu_rmap_add:
+	irq_clear_rmap(mdev);
+err_out:
+#endif
+	return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+			irq->mask);
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irqn, irq->mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+			       irqn);
+
+	return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	irq_set_affinity_hint(irqn, NULL);
+	free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		err = set_comp_irq_affinity_hint(mdev, i);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	for (i--; i >= 0; i--)
+		clear_comp_irq_affinity_hint(mdev, i);
+
+	return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int i;
+
+	for (i = 0; i < nvec; i++)
+		clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+	return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+	return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_irq_table *table = priv->irq_table;
+	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+		      MLX5_CAP_GEN(dev, max_num_eqs) :
+		      1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int nvec;
+	int err;
+
+	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+	       MLX5_IRQ_VEC_COMP_BASE;
+	nvec = min_t(int, nvec, num_eqs);
+	if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+		return -ENOMEM;
+
+	table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+	if (!table->irq)
+		return -ENOMEM;
+
+	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+				     nvec, PCI_IRQ_MSIX);
+	if (nvec < 0) {
+		err = nvec;
+		goto err_free_irq;
+	}
+
+	table->nvec = nvec;
+
+	err = irq_set_rmap(dev);
+	if (err)
+		goto err_set_rmap;
+
+	err = request_irqs(dev, nvec);
+	if (err)
+		goto err_request_irqs;
+
+	err = set_comp_irq_affinity_hints(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+		goto err_set_affinity;
+	}
+
+	return 0;
+
+err_set_affinity:
+	unrequest_irqs(dev);
+err_request_irqs:
+	irq_clear_rmap(dev);
+err_set_rmap:
+	pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+	kfree(table->irq);
+	return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	/* free_irq requires that affinity and rmap will be cleared
+	 * before calling it. This is why there is asymmetry with set_rmap
+	 * which should be called after alloc_irq but before request_irq.
+	 */
+	irq_clear_rmap(dev);
+	clear_comp_irqs_affinity_hints(dev);
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+	pci_free_irq_vectors(dev->pdev);
+	kfree(table->irq);
+}
+

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 31a9cbd..cc262b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c

@@ -30,10 +30,7 @@
  * SOFTWARE.
  */
 
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
 #include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
@@ -157,44 +154,6 @@
 				    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
 
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-			      u32 *proto_cap, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-	if (err)
-		return err;
-
-	if (proto_mask == MLX5_PTYS_EN)
-		*proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
-	else
-		*proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
-
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-				u32 *proto_admin, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-	if (err)
-		return err;
-
-	if (proto_mask == MLX5_PTYS_EN)
-		*proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
-	else
-		*proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
-
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 				    u8 *link_width_oper, u8 local_port)
 {
@@ -211,23 +170,6 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
 
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
-				   u32 *proto_oper, u8 local_port)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
-				   local_port);
-	if (err)
-		return err;
-
-	*proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
-
 int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
 				  u8 *proto_oper, u8 local_port)
 {
@@ -245,35 +187,6 @@
 }
 EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
 
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
-		       u32 proto_admin, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
-	u8 an_disable_admin;
-	u8 an_disable_cap;
-	u8 an_status;
-
-	mlx5_query_port_autoneg(dev, proto_mask, &an_status,
-				&an_disable_cap, &an_disable_admin);
-	if (!an_disable_cap && an_disable)
-		return -EPERM;
-
-	memset(in, 0, sizeof(in));
-
-	MLX5_SET(ptys_reg, in, local_port, 1);
-	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
-	MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
-	if (proto_mask == MLX5_PTYS_EN)
-		MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
-	else
-		MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
-
-	return mlx5_core_access_reg(dev, in, sizeof(in), out,
-				    sizeof(out), MLX5_REG_PTYS, 0, 1);
-}
-EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
-
 /* This function should be used after setting a port register only */
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
 {
@@ -380,15 +293,36 @@
 	return 0;
 }
 
+static int mlx5_eeprom_page(int offset)
+{
+	if (offset < MLX5_EEPROM_PAGE_LENGTH)
+		/* Addresses between 0-255 - page 00 */
+		return 0;
+
+	/* Addresses between 256 - 639 belongs to pages 01, 02 and 03
+	 * For example, offset = 400 belongs to page 02:
+	 * 1 + ((400 - 256)/128) = 2
+	 */
+	return 1 + ((offset - MLX5_EEPROM_PAGE_LENGTH) /
+		    MLX5_EEPROM_HIGH_PAGE_LENGTH);
+}
+
+static int mlx5_eeprom_high_page_offset(int page_num)
+{
+	if (!page_num) /* Page 0 always start from low page */
+		return 0;
+
+	/* High page */
+	return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH;
+}
+
 int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 			     u16 offset, u16 size, u8 *data)
 {
+	int module_num, page_num, status, err;
 	u32 out[MLX5_ST_SZ_DW(mcia_reg)];
 	u32 in[MLX5_ST_SZ_DW(mcia_reg)];
-	int module_num;
 	u16 i2c_addr;
-	int status;
-	int err;
 	void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
 
 	err = mlx5_query_module_num(dev, &module_num);
@@ -398,21 +332,24 @@
 	memset(in, 0, sizeof(in));
 	size = min_t(int, size, MLX5_EEPROM_MAX_BYTES);
 
-	if (offset < MLX5_EEPROM_PAGE_LENGTH &&
-	    offset + size > MLX5_EEPROM_PAGE_LENGTH)
+	/* Get the page number related to the given offset */
+	page_num = mlx5_eeprom_page(offset);
+
+	/* Set the right offset according to the page number,
+	 * For page_num > 0, relative offset is always >= 128 (high page).
+	 */
+	offset -= mlx5_eeprom_high_page_offset(page_num);
+
+	if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
 		/* Cross pages read, read until offset 256 in low page */
 		size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
 
 	i2c_addr = MLX5_I2C_ADDR_LOW;
-	if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
-		i2c_addr = MLX5_I2C_ADDR_HIGH;
-		offset -= MLX5_EEPROM_PAGE_LENGTH;
-	}
 
 	MLX5_SET(mcia_reg, in, l, 0);
 	MLX5_SET(mcia_reg, in, module, module_num);
 	MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr);
-	MLX5_SET(mcia_reg, in, page_number, 0);
+	MLX5_SET(mcia_reg, in, page_number, page_num);
 	MLX5_SET(mcia_reg, in, device_address, offset);
 	MLX5_SET(mcia_reg, in, size, size);
 
@@ -606,25 +543,6 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
 
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
-			     u8 *an_status,
-			     u8 *an_disable_cap, u8 *an_disable_admin)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-
-	*an_status = 0;
-	*an_disable_cap = 0;
-	*an_disable_admin = 0;
-
-	if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
-		return;
-
-	*an_status = MLX5_GET(ptys_reg, out, an_status);
-	*an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
-	*an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
-
 int mlx5_max_tc(struct mlx5_core_dev *mdev)
 {
 	u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
@@ -870,8 +788,7 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
 
-static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
-				  int outlen)
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen)
 {
 	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
@@ -880,7 +797,7 @@
 				    outlen, MLX5_REG_PCMR, 0, 0);
 }
 
-static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
 {
 	u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
 
@@ -891,7 +808,11 @@
 int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
 {
 	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
 
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
 	MLX5_SET(pcmr_reg, in, local_port, 1);
 	MLX5_SET(pcmr_reg, in, fcs_chk, enable);
 	return mlx5_set_ports_check(mdev, in, sizeof(in));
@@ -915,63 +836,6 @@
 	*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
 
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
-	"Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
-	"Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
-	"Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
-	"Power budget exceeded",
-	"Long Range for non MLNX cable",
-	"Bus stuck(I2C or data shorted)",
-	"No EEPROM/retry timeout",
-	"Enforce part number list",
-	"Unknown identifier",
-	"High Temperature",
-	"Bad or shorted cable/module",
-	"Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
-	enum port_module_event_status_type module_status;
-	enum port_module_event_error_type error_type;
-	struct mlx5_eqe_port_module *module_event_eqe;
-	struct mlx5_priv *priv = &dev->priv;
-	u8 module_num;
-
-	module_event_eqe = &eqe->data.port_module;
-	module_num = module_event_eqe->module;
-	module_status = module_event_eqe->module_status &
-			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
-	error_type = module_event_eqe->error_type &
-		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
-	if (module_status < MLX5_MODULE_STATUS_ERROR) {
-		priv->pme_stats.status_counters[module_status - 1]++;
-	} else if (module_status == MLX5_MODULE_STATUS_ERROR) {
-		if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
-			/* Unknown error type */
-			error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
-		priv->pme_stats.error_counters[error_type]++;
-	}
-
-	if (!printk_ratelimit())
-		return;
-
-	if (module_status < MLX5_MODULE_STATUS_ERROR)
-		mlx5_core_info(dev,
-			       "Port module event: module %u, %s\n",
-			       module_num, mlx5_pme_status[module_status - 1]);
-
-	else if (module_status == MLX5_MODULE_STATUS_ERROR)
-		mlx5_core_info(dev,
-			       "Port module event[error]: module %u, %s, %s\n",
-			       module_num, mlx5_pme_status[module_status - 1],
-			       mlx5_pme_error[error_type]);
-}
-
 int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
 {
 	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4ca07bf..c3aea4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c

@@ -38,32 +38,31 @@
 #include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
-						 u32 rsn)
+static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
+			       struct mlx5_core_dct *dct);
+
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
 {
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
 	struct mlx5_core_rsc_common *common;
+	unsigned long flags;
 
-	spin_lock(&table->lock);
+	spin_lock_irqsave(&table->lock, flags);
 
 	common = radix_tree_lookup(&table->tree, rsn);
 	if (common)
-		atomic_inc(&common->refcount);
+		refcount_inc(&common->refcount);
 
-	spin_unlock(&table->lock);
+	spin_unlock_irqrestore(&table->lock, flags);
 
-	if (!common) {
-		mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
-			       rsn);
-		return NULL;
-	}
 	return common;
 }
 
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
 {
-	if (atomic_dec_and_test(&common->refcount))
+	if (refcount_dec_and_test(&common->refcount))
 		complete(&common->free);
 }
 
@@ -120,19 +119,57 @@
 	}
 }
 
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+			      unsigned long type, void *data)
 {
-	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+	struct mlx5_core_rsc_common *common;
+	struct mlx5_qp_table *table;
+	struct mlx5_core_dev *dev;
 	struct mlx5_core_dct *dct;
+	u8 event_type = (u8)type;
 	struct mlx5_core_qp *qp;
+	struct mlx5_priv *priv;
+	struct mlx5_eqe *eqe;
+	u32 rsn;
 
-	if (!common)
-		return;
+	switch (event_type) {
+	case MLX5_EVENT_TYPE_DCT_DRAINED:
+		eqe = data;
+		rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+		rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+		break;
+	case MLX5_EVENT_TYPE_PATH_MIG:
+	case MLX5_EVENT_TYPE_COMM_EST:
+	case MLX5_EVENT_TYPE_SQ_DRAINED:
+	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+		eqe = data;
+		rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+		rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	table = container_of(nb, struct mlx5_qp_table, nb);
+	priv  = container_of(table, struct mlx5_priv, qp_table);
+	dev   = container_of(priv, struct mlx5_core_dev, priv);
+
+	mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
+
+	common = mlx5_get_rsc(table, rsn);
+	if (!common) {
+		mlx5_core_dbg(dev, "Async event for unknown resource 0x%x\n", rsn);
+		return NOTIFY_OK;
+	}
 
 	if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
 		mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
 			       event_type, rsn);
-		return;
+		goto out;
 	}
 
 	switch (common->res) {
@@ -150,8 +187,10 @@
 	default:
 		mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
 	}
-
+out:
 	mlx5_core_put_rsc(common);
+
+	return NOTIFY_OK;
 }
 
 static int create_resource_common(struct mlx5_core_dev *dev,
@@ -170,7 +209,7 @@
 	if (err)
 		return err;
 
-	atomic_set(&qp->common.refcount, 1);
+	refcount_set(&qp->common.refcount, 1);
 	init_completion(&qp->common.free);
 	qp->pid = current->pid;
 
@@ -191,36 +230,63 @@
 	wait_for_completion(&qp->common.free);
 }
 
+static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+				  struct mlx5_core_dct *dct, bool need_cleanup)
+{
+	u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_dct_in)]   = {0};
+	struct mlx5_core_qp *qp = &dct->mqp;
+	int err;
+
+	err = mlx5_core_drain_dct(dev, dct);
+	if (err) {
+		if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+			goto destroy;
+		} else {
+			mlx5_core_warn(
+				dev, "failed drain DCT 0x%x with error 0x%x\n",
+				qp->qpn, err);
+			return err;
+		}
+	}
+	wait_for_completion(&dct->drained);
+destroy:
+	if (need_cleanup)
+		destroy_resource_common(dev, &dct->mqp);
+	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
+	err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+			    (void *)&out, sizeof(out));
+	return err;
+}
+
 int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 			 struct mlx5_core_dct *dct,
-			 u32 *in, int inlen)
+			 u32 *in, int inlen,
+			 u32 *out, int outlen)
 {
-	u32 out[MLX5_ST_SZ_DW(create_dct_out)]   = {0};
-	u32 din[MLX5_ST_SZ_DW(destroy_dct_in)]   = {0};
-	u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
 	struct mlx5_core_qp *qp = &dct->mqp;
 	int err;
 
 	init_completion(&dct->drained);
 	MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
 
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err) {
 		mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
 		return err;
 	}
 
 	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+	qp->uid = MLX5_GET(create_dct_in, in, uid);
 	err = create_resource_common(dev, qp, MLX5_RES_DCT);
 	if (err)
 		goto err_cmd;
 
 	return 0;
 err_cmd:
-	MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
-	MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
-	mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
-		      (void *)&out, sizeof(dout));
+	_mlx5_core_destroy_dct(dev, dct, false);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
@@ -240,6 +306,7 @@
 	if (err)
 		return err;
 
+	qp->uid = MLX5_GET(create_qp_in, in, uid);
 	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
@@ -261,6 +328,7 @@
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, din, uid, qp->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -275,6 +343,7 @@
 
 	MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
 	MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(drain_dct_in, in, uid, qp->uid);
 	return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
 			     (void *)&out, sizeof(out));
 }
@@ -282,28 +351,7 @@
 int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
 			  struct mlx5_core_dct *dct)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_dct_in)]   = {0};
-	struct mlx5_core_qp *qp = &dct->mqp;
-	int err;
-
-	err = mlx5_core_drain_dct(dev, dct);
-	if (err) {
-		if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-			goto destroy;
-		} else {
-			mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
-			return err;
-		}
-	}
-	wait_for_completion(&dct->drained);
-destroy:
-	destroy_resource_common(dev, &dct->mqp);
-	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
-	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
-	err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
-			    (void *)&out, sizeof(out));
-	return err;
+	return _mlx5_core_destroy_dct(dev, dct, true);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
 
@@ -320,6 +368,7 @@
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, uid, qp->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -373,7 +422,7 @@
 
 static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 				u32 opt_param_mask, void *qpc,
-				struct mbox_info *mbox)
+				struct mbox_info *mbox, u16 uid)
 {
 	mbox->out = NULL;
 	mbox->in = NULL;
@@ -381,26 +430,32 @@
 #define MBOX_ALLOC(mbox, typ)  \
 	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
 
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
-	MLX5_SET(typ##_in, in, opcode, _opcode); \
-	MLX5_SET(typ##_in, in, qpn, _qpn)
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid)                            \
+	do {                                                                   \
+		MLX5_SET(typ##_in, in, opcode, _opcode);                       \
+		MLX5_SET(typ##_in, in, qpn, _qpn);                             \
+		MLX5_SET(typ##_in, in, uid, _uid);                             \
+	} while (0)
 
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
-	MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
-	MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
-	memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid)          \
+	do {                                                                   \
+		MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid);                   \
+		MLX5_SET(typ##_in, in, opt_param_mask, _opt_p);                \
+		memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc,                  \
+		       MLX5_ST_SZ_BYTES(qpc));                                 \
+	} while (0)
 
 	switch (opcode) {
 	/* 2RST & 2ERR */
 	case MLX5_CMD_OP_2RST_QP:
 		if (MBOX_ALLOC(mbox, qp_2rst))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
 		break;
 	case MLX5_CMD_OP_2ERR_QP:
 		if (MBOX_ALLOC(mbox, qp_2err))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
 		break;
 
 	/* MODIFY with QPC */
@@ -408,37 +463,37 @@
 		if (MBOX_ALLOC(mbox, rst2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2RTR_QP:
 		if (MBOX_ALLOC(mbox, init2rtr_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTR2RTS_QP:
 		if (MBOX_ALLOC(mbox, rtr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTS2RTS_QP:
 		if (MBOX_ALLOC(mbox, rts2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_SQERR2RTS_QP:
 		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2INIT_QP:
 		if (MBOX_ALLOC(mbox, init2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	default:
 		mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
@@ -456,7 +511,7 @@
 	int err;
 
 	err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
-				   opt_param_mask, qpc, &mbox);
+				   opt_param_mask, qpc, &mbox, qp->uid);
 	if (err)
 		return err;
 
@@ -474,10 +529,16 @@
 	spin_lock_init(&table->lock);
 	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
 	mlx5_qp_debugfs_init(dev);
+
+	table->nb.notifier_call = rsc_event_notifier;
+	mlx5_notifier_register(dev, &table->nb);
 }
 
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
 {
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+	mlx5_notifier_unregister(dev, &table->nb);
 	mlx5_qp_debugfs_cleanup(dev);
 }
 
@@ -531,6 +592,17 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 
+static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
+
+	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+	MLX5_SET(destroy_rq_in, in, rqn, rqn);
+	MLX5_SET(destroy_rq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *rq)
 {
@@ -541,6 +613,7 @@
 	if (err)
 		return err;
 
+	rq->uid = MLX5_GET(create_rq_in, in, uid);
 	rq->qpn = rqn;
 	err = create_resource_common(dev, rq, MLX5_RES_RQ);
 	if (err)
@@ -549,7 +622,7 @@
 	return 0;
 
 err_destroy_rq:
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 
 	return err;
 }
@@ -559,10 +632,21 @@
 				  struct mlx5_core_qp *rq)
 {
 	destroy_resource_common(dev, rq);
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
 
+static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
+
+	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+	MLX5_SET(destroy_sq_in, in, sqn, sqn);
+	MLX5_SET(destroy_sq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *sq)
 {
@@ -573,6 +657,7 @@
 	if (err)
 		return err;
 
+	sq->uid = MLX5_GET(create_sq_in, in, uid);
 	sq->qpn = sqn;
 	err = create_resource_common(dev, sq, MLX5_RES_SQ);
 	if (err)
@@ -581,7 +666,7 @@
 	return 0;
 
 err_destroy_sq:
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 
 	return err;
 }
@@ -591,7 +676,7 @@
 				  struct mlx5_core_qp *sq)
 {
 	destroy_resource_common(dev, sq);
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
 
@@ -633,3 +718,20 @@
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+						int res_num,
+						enum mlx5_res_type res_type)
+{
+	u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+	return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+	mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
new file mode 100644
index 0000000..0fc7de4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c

@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include <rdma/ib_verbs.h>
+#include <net/addrconf.h>
+
+#include "lib/mlx5.h"
+#include "eswitch.h"
+#include "fs_core.h"
+#include "rdma.h"
+
+static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_roce *roce = &dev->priv.roce;
+
+	mlx5_del_flow_rules(roce->allow_rule);
+	mlx5_destroy_flow_group(roce->fg);
+	mlx5_destroy_flow_table(roce->ft);
+}
+
+static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_roce *roce = &dev->priv.roce;
+	struct mlx5_flow_handle *flow_rule = NULL;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns = NULL;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec *spec;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	void *match_criteria;
+	u32 *flow_group_in;
+	void *misc;
+	int err;
+
+	if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
+	      MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
+		return -EOPNOTSUPP;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		kvfree(flow_group_in);
+		return -ENOMEM;
+	}
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL);
+	if (!ns) {
+		mlx5_core_err(dev, "Failed to get RDMA RX namespace");
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	ft_attr.max_fte = 1;
+	ft = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		mlx5_core_err(dev, "Failed to create RDMA RX flow table");
+		err = PTR_ERR(ft);
+		goto free;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+			 misc_parameters.source_port);
+
+	fg = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(fg)) {
+		err = PTR_ERR(fg);
+		mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
+		goto destroy_flow_table;
+	}
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			    misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port,
+		 dev->priv.eswitch->manager_vport);
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			    misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
+			      err);
+		goto destroy_flow_group;
+	}
+
+	kvfree(spec);
+	kvfree(flow_group_in);
+	roce->ft = ft;
+	roce->fg = fg;
+	roce->allow_rule = flow_rule;
+
+	return 0;
+
+destroy_flow_group:
+	mlx5_destroy_flow_group(fg);
+destroy_flow_table:
+	mlx5_destroy_flow_table(ft);
+free:
+	kvfree(spec);
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
+{
+	mlx5_core_roce_gid_set(dev, 0, 0, 0,
+			       NULL, NULL, false, 0, 0);
+}
+
+static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
+{
+	u8 hw_id[ETH_ALEN];
+
+	mlx5_query_mac_address(dev, hw_id);
+	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	addrconf_addr_eui48(&gid->raw[8], hw_id);
+}
+
+static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
+{
+	union ib_gid gid;
+	u8 mac[ETH_ALEN];
+
+	mlx5_rdma_make_default_gid(dev, &gid);
+	return mlx5_core_roce_gid_set(dev, 0,
+				      MLX5_ROCE_VERSION_1,
+				      0, gid.raw, mac,
+				      false, 0, 1);
+}
+
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_roce *roce = &dev->priv.roce;
+
+	if (!roce->ft)
+		return;
+
+	mlx5_rdma_disable_roce_steering(dev);
+	mlx5_rdma_del_roce_addr(dev);
+	mlx5_nic_vport_disable_roce(dev);
+}
+
+void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_nic_vport_enable_roce(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+		return;
+	}
+
+	err = mlx5_rdma_add_roce_addr(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
+		goto disable_roce;
+	}
+
+	err = mlx5_rdma_enable_roce_steering(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
+		goto del_roce_addr;
+	}
+
+	return;
+
+del_roce_addr:
+	mlx5_rdma_del_roce_addr(dev);
+disable_roce:
+	mlx5_nic_vport_disable_roce(dev);
+	return;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
new file mode 100644
index 0000000..750cff2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h

@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_RDMA_H__
+#define __MLX5_RDMA_H__
+
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
+void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {}
+static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_RDMA_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index bc86dff..01c3804 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c

@@ -188,8 +188,7 @@
 		/* new rate limit */
 		err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
 		if (err) {
-			mlx5_core_err(dev, "Failed configuring rate limit(err %d): \
-				      rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+			mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
 				      err, rl->rate, rl->max_burst_sz,
 				      rl->typical_pkt_sz);
 			goto out;
@@ -218,8 +217,7 @@
 	mutex_lock(&table->rl_lock);
 	entry = find_rl_entry(table, rl);
 	if (!entry || !entry->refcount) {
-		mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \
-			       are not configured\n",
+		mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n",
 			       rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
 		goto out;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a067496..61fcfd8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c

@@ -36,13 +36,6 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
-{
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-
-	return !!sriov->num_vfs;
-}
-
 static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -81,17 +74,11 @@
 	int err;
 	int vf;
 
-	if (sriov->enabled_vfs) {
-		mlx5_core_warn(dev,
-			       "failed to enable SRIOV on device, already enabled with %d vfs\n",
-			       sriov->enabled_vfs);
-		return -EBUSY;
-	}
-
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		goto enable_vfs_hca;
 
-	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+	mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs);
+	err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "failed to enable eswitch SRIOV (%d)\n", err);
@@ -106,7 +93,6 @@
 			continue;
 		}
 		sriov->vfs_ctx[vf].enabled = 1;
-		sriov->enabled_vfs++;
 		if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
 			err = sriov_restore_guids(dev, vf);
 			if (err) {
@@ -125,13 +111,11 @@
 static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int num_vfs = pci_num_vf(dev->pdev);
 	int err;
 	int vf;
 
-	if (!sriov->enabled_vfs)
-		goto out;
-
-	for (vf = 0; vf < sriov->num_vfs; vf++) {
+	for (vf = num_vfs - 1; vf >= 0; vf--) {
 		if (!sriov->vfs_ctx[vf].enabled)
 			continue;
 		err = mlx5_core_disable_hca(dev, vf + 1);
@@ -140,44 +124,19 @@
 			continue;
 		}
 		sriov->vfs_ctx[vf].enabled = 0;
-		sriov->enabled_vfs--;
 	}
 
-out:
 	if (MLX5_ESWITCH_MANAGER(dev))
-		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+		mlx5_eswitch_disable(dev->priv.eswitch);
 
-	if (mlx5_wait_for_vf_pages(dev))
+	if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
 }
 
-static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
-{
-	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	int err = 0;
-
-	if (pci_num_vf(pdev)) {
-		mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
-		return -EBUSY;
-	}
-
-	err = pci_enable_sriov(pdev, num_vfs);
-	if (err)
-		mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
-
-	return err;
-}
-
-static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
-{
-	pci_disable_sriov(pdev);
-}
-
 static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int err = 0;
+	int err;
 
 	err = mlx5_device_enable_sriov(dev, num_vfs);
 	if (err) {
@@ -185,64 +144,47 @@
 		return err;
 	}
 
-	err = mlx5_pci_enable_sriov(pdev, num_vfs);
+	err = pci_enable_sriov(pdev, num_vfs);
 	if (err) {
-		mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
+		mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
 		mlx5_device_disable_sriov(dev);
-		return err;
 	}
-
-	sriov->num_vfs = num_vfs;
-
-	return 0;
+	return err;
 }
 
 static void mlx5_sriov_disable(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 
-	mlx5_pci_disable_sriov(pdev);
+	pci_disable_sriov(pdev);
 	mlx5_device_disable_sriov(dev);
-	sriov->num_vfs = 0;
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 	int err = 0;
 
 	mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
-	if (!mlx5_core_is_pf(dev))
-		return -EPERM;
 
-	if (num_vfs) {
-		int ret;
-
-		ret = mlx5_lag_forbid(dev);
-		if (ret && (ret != -ENODEV))
-			return ret;
-	}
-
-	if (num_vfs) {
+	if (num_vfs)
 		err = mlx5_sriov_enable(pdev, num_vfs);
-	} else {
+	else
 		mlx5_sriov_disable(pdev);
-		mlx5_lag_allow(dev);
-	}
 
+	if (!err)
+		sriov->num_vfs = num_vfs;
 	return err ? err : num_vfs;
 }
 
 int mlx5_sriov_attach(struct mlx5_core_dev *dev)
 {
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-
-	if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+	if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
 		return 0;
 
 	/* If sriov VFs exist in PCI level, enable them in device level */
-	return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+	return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev));
 }
 
 void mlx5_sriov_detach(struct mlx5_core_dev *dev)
@@ -253,6 +195,30 @@
 	mlx5_device_disable_sriov(dev);
 }
 
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+	u16 host_total_vfs;
+	const u32 *out;
+
+	if (mlx5_core_is_ecpf_esw_manager(dev)) {
+		out = mlx5_esw_query_functions(dev);
+
+		/* Old FW doesn't support getting total_vfs from esw func
+		 * but supports getting it from pci_sriov.
+		 */
+		if (IS_ERR(out))
+			goto done;
+		host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+					  host_params_context.host_total_vfs);
+		kvfree(out);
+		if (host_total_vfs)
+			return host_total_vfs;
+	}
+
+done:
+	return pci_sriov_get_totalvfs(dev->pdev);
+}
+
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -263,6 +229,7 @@
 		return 0;
 
 	total_vfs = pci_sriov_get_totalvfs(pdev);
+	sriov->max_vfs = mlx5_get_max_vfs(dev);
 	sriov->num_vfs = pci_num_vf(pdev);
 	sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
 	if (!sriov->vfs_ctx)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
deleted file mode 100644
index 23cc337..0000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ /dev/null

@@ -1,692 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
-	struct mlx5_srq_table *table = &dev->priv.srq_table;
-	struct mlx5_core_srq *srq;
-
-	spin_lock(&table->lock);
-
-	srq = radix_tree_lookup(&table->tree, srqn);
-	if (srq)
-		atomic_inc(&srq->refcount);
-
-	spin_unlock(&table->lock);
-
-	if (!srq) {
-		mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
-		return;
-	}
-
-	srq->event(srq, event_type);
-
-	if (atomic_dec_and_test(&srq->refcount))
-		complete(&srq->free);
-}
-
-static int get_pas_size(struct mlx5_srq_attr *in)
-{
-	u32 log_page_size = in->log_page_size + 12;
-	u32 log_srq_size  = in->log_size;
-	u32 log_rq_stride = in->wqe_shift;
-	u32 page_offset   = in->page_offset;
-	u32 po_quanta	  = 1 << (log_page_size - 6);
-	u32 rq_sz	  = 1 << (log_srq_size + 4 + log_rq_stride);
-	u32 page_size	  = 1 << log_page_size;
-	u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
-	u32 rq_num_pas	  = (rq_sz_po + page_size - 1) / page_size;
-
-	return rq_num_pas * sizeof(u64);
-}
-
-static void set_wq(void *wq, struct mlx5_srq_attr *in)
-{
-	MLX5_SET(wq,   wq, wq_signature,  !!(in->flags
-		 & MLX5_SRQ_FLAG_WQ_SIG));
-	MLX5_SET(wq,   wq, log_wq_pg_sz,  in->log_page_size);
-	MLX5_SET(wq,   wq, log_wq_stride, in->wqe_shift + 4);
-	MLX5_SET(wq,   wq, log_wq_sz,     in->log_size);
-	MLX5_SET(wq,   wq, page_offset,   in->page_offset);
-	MLX5_SET(wq,   wq, lwm,		  in->lwm);
-	MLX5_SET(wq,   wq, pd,		  in->pd);
-	MLX5_SET64(wq, wq, dbr_addr,	  in->db_record);
-}
-
-static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
-	MLX5_SET(srqc,   srqc, wq_signature,  !!(in->flags
-		 & MLX5_SRQ_FLAG_WQ_SIG));
-	MLX5_SET(srqc,   srqc, log_page_size, in->log_page_size);
-	MLX5_SET(srqc,   srqc, log_rq_stride, in->wqe_shift);
-	MLX5_SET(srqc,   srqc, log_srq_size,  in->log_size);
-	MLX5_SET(srqc,   srqc, page_offset,   in->page_offset);
-	MLX5_SET(srqc,	 srqc, lwm,	      in->lwm);
-	MLX5_SET(srqc,	 srqc, pd,	      in->pd);
-	MLX5_SET64(srqc, srqc, dbr_addr,      in->db_record);
-	MLX5_SET(srqc,	 srqc, xrcd,	      in->xrcd);
-	MLX5_SET(srqc,	 srqc, cqn,	      in->cqn);
-}
-
-static void get_wq(void *wq, struct mlx5_srq_attr *in)
-{
-	if (MLX5_GET(wq, wq, wq_signature))
-		in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
-	in->log_page_size = MLX5_GET(wq,   wq, log_wq_pg_sz);
-	in->wqe_shift	  = MLX5_GET(wq,   wq, log_wq_stride) - 4;
-	in->log_size	  = MLX5_GET(wq,   wq, log_wq_sz);
-	in->page_offset   = MLX5_GET(wq,   wq, page_offset);
-	in->lwm		  = MLX5_GET(wq,   wq, lwm);
-	in->pd		  = MLX5_GET(wq,   wq, pd);
-	in->db_record	  = MLX5_GET64(wq, wq, dbr_addr);
-}
-
-static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
-	if (MLX5_GET(srqc, srqc, wq_signature))
-		in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
-	in->log_page_size = MLX5_GET(srqc,   srqc, log_page_size);
-	in->wqe_shift	  = MLX5_GET(srqc,   srqc, log_rq_stride);
-	in->log_size	  = MLX5_GET(srqc,   srqc, log_srq_size);
-	in->page_offset   = MLX5_GET(srqc,   srqc, page_offset);
-	in->lwm		  = MLX5_GET(srqc,   srqc, lwm);
-	in->pd		  = MLX5_GET(srqc,   srqc, pd);
-	in->db_record	  = MLX5_GET64(srqc, srqc, dbr_addr);
-}
-
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
-{
-	struct mlx5_srq_table *table = &dev->priv.srq_table;
-	struct mlx5_core_srq *srq;
-
-	spin_lock(&table->lock);
-
-	srq = radix_tree_lookup(&table->tree, srqn);
-	if (srq)
-		atomic_inc(&srq->refcount);
-
-	spin_unlock(&table->lock);
-
-	return srq;
-}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			  struct mlx5_srq_attr *in)
-{
-	u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
-	void *create_in;
-	void *srqc;
-	void *pas;
-	int pas_size;
-	int inlen;
-	int err;
-
-	pas_size  = get_pas_size(in);
-	inlen	  = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
-	create_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!create_in)
-		return -ENOMEM;
-
-	srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
-	pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
-
-	set_srqc(srqc, in);
-	memcpy(pas, in->pas, pas_size);
-
-	MLX5_SET(create_srq_in, create_in, opcode,
-		 MLX5_CMD_OP_CREATE_SRQ);
-
-	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-			    sizeof(create_out));
-	kvfree(create_in);
-	if (!err)
-		srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
-
-	return err;
-}
-
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
-			   struct mlx5_core_srq *srq)
-{
-	u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
-	u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
-
-	MLX5_SET(destroy_srq_in, srq_in, opcode,
-		 MLX5_CMD_OP_DESTROY_SRQ);
-	MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
-
-	return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-			     srq_out, sizeof(srq_out));
-}
-
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-		       u16 lwm, int is_srq)
-{
-	u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-	u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-
-	MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
-	MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
-	MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
-	MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
-
-	return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-			      srq_out, sizeof(srq_out));
-}
-
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			 struct mlx5_srq_attr *out)
-{
-	u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
-	u32 *srq_out;
-	void *srqc;
-	int err;
-
-	srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
-	if (!srq_out)
-		return -ENOMEM;
-
-	MLX5_SET(query_srq_in, srq_in, opcode,
-		 MLX5_CMD_OP_QUERY_SRQ);
-	MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
-	err =  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-			     srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
-	if (err)
-		goto out;
-
-	srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
-	get_srqc(srqc, out);
-	if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
-		out->flags |= MLX5_SRQ_FLAG_ERR;
-out:
-	kvfree(srq_out);
-	return err;
-}
-
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
-			      struct mlx5_core_srq *srq,
-			      struct mlx5_srq_attr *in)
-{
-	u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
-	void *create_in;
-	void *xrc_srqc;
-	void *pas;
-	int pas_size;
-	int inlen;
-	int err;
-
-	pas_size  = get_pas_size(in);
-	inlen	  = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
-	create_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!create_in)
-		return -ENOMEM;
-
-	xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
-				xrc_srq_context_entry);
-	pas	 = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
-
-	set_srqc(xrc_srqc, in);
-	MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
-	memcpy(pas, in->pas, pas_size);
-	MLX5_SET(create_xrc_srq_in, create_in, opcode,
-		 MLX5_CMD_OP_CREATE_XRC_SRQ);
-
-	memset(create_out, 0, sizeof(create_out));
-	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-			    sizeof(create_out));
-	if (err)
-		goto out;
-
-	srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
-out:
-	kvfree(create_in);
-	return err;
-}
-
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
-			       struct mlx5_core_srq *srq)
-{
-	u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
-	u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
-	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
-		 MLX5_CMD_OP_DESTROY_XRC_SRQ);
-	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
-	return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
-			     xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
-			   struct mlx5_core_srq *srq, u16 lwm)
-{
-	u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
-	u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
-	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
-	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
-
-	return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
-			      xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
-			     struct mlx5_core_srq *srq,
-			     struct mlx5_srq_attr *out)
-{
-	u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
-	u32 *xrcsrq_out;
-	void *xrc_srqc;
-	int err;
-
-	xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
-	if (!xrcsrq_out)
-		return -ENOMEM;
-	memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-
-	MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
-		 MLX5_CMD_OP_QUERY_XRC_SRQ);
-	MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
-	err =  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
-			     MLX5_ST_SZ_BYTES(query_xrc_srq_out));
-	if (err)
-		goto out;
-
-	xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
-				xrc_srq_context_entry);
-	get_srqc(xrc_srqc, out);
-	if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
-		out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
-	kvfree(xrcsrq_out);
-	return err;
-}
-
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			  struct mlx5_srq_attr *in)
-{
-	void *create_in;
-	void *rmpc;
-	void *wq;
-	int pas_size;
-	int inlen;
-	int err;
-
-	pas_size = get_pas_size(in);
-	inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
-	create_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!create_in)
-		return -ENOMEM;
-
-	rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
-	wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
-	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-	set_wq(wq, in);
-	memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
-
-	err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
-
-	kvfree(create_in);
-	return err;
-}
-
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
-			   struct mlx5_core_srq *srq)
-{
-	return mlx5_core_destroy_rmp(dev, srq->srqn);
-}
-
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
-		       struct mlx5_core_srq *srq,
-		       u16 lwm)
-{
-	void *in;
-	void *rmpc;
-	void *wq;
-	void *bitmask;
-	int err;
-
-	in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
-	rmpc =	  MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
-	bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
-	wq   =	  MLX5_ADDR_OF(rmpc,	        rmpc, wq);
-
-	MLX5_SET(modify_rmp_in, in,	 rmp_state, MLX5_RMPC_STATE_RDY);
-	MLX5_SET(modify_rmp_in, in,	 rmpn,      srq->srqn);
-	MLX5_SET(wq,		wq,	 lwm,	    lwm);
-	MLX5_SET(rmp_bitmask,	bitmask, lwm,	    1);
-	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
-	err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
-	kvfree(in);
-	return err;
-}
-
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			 struct mlx5_srq_attr *out)
-{
-	u32 *rmp_out;
-	void *rmpc;
-	int err;
-
-	rmp_out =  kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
-	if (!rmp_out)
-		return -ENOMEM;
-
-	err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
-	if (err)
-		goto out;
-
-	rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
-	get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
-	if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
-		out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
-	kvfree(rmp_out);
-	return err;
-}
-
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			  struct mlx5_srq_attr *in)
-{
-	u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
-	void *create_in;
-	void *xrqc;
-	void *wq;
-	int pas_size;
-	int inlen;
-	int err;
-
-	pas_size = get_pas_size(in);
-	inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
-	create_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!create_in)
-		return -ENOMEM;
-
-	xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
-	wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
-
-	set_wq(wq, in);
-	memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
-
-	if (in->type == IB_SRQT_TM) {
-		MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
-		if (in->flags & MLX5_SRQ_FLAG_RNDV)
-			MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
-		MLX5_SET(xrqc, xrqc,
-			 tag_matching_topology_context.log_matching_list_sz,
-			 in->tm_log_list_size);
-	}
-	MLX5_SET(xrqc, xrqc, user_index, in->user_index);
-	MLX5_SET(xrqc, xrqc, cqn, in->cqn);
-	MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
-	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-			    sizeof(create_out));
-	kvfree(create_in);
-	if (!err)
-		srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
-
-	return err;
-}
-
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
-
-	MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
-	MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
-		       struct mlx5_core_srq *srq,
-		       u16 lwm)
-{
-	u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-
-	MLX5_SET(arm_rq_in, in, opcode,     MLX5_CMD_OP_ARM_RQ);
-	MLX5_SET(arm_rq_in, in, op_mod,     MLX5_ARM_RQ_IN_OP_MOD_XRQ);
-	MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
-	MLX5_SET(arm_rq_in, in, lwm,	    lwm);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			 struct mlx5_srq_attr *out)
-{
-	u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
-	u32 *xrq_out;
-	int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
-	void *xrqc;
-	int err;
-
-	xrq_out = kvzalloc(outlen, GFP_KERNEL);
-	if (!xrq_out)
-		return -ENOMEM;
-
-	MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
-	MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
-
-	err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
-	if (err)
-		goto out;
-
-	xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
-	get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
-	if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
-		out->flags |= MLX5_SRQ_FLAG_ERR;
-	out->tm_next_tag =
-		MLX5_GET(xrqc, xrqc,
-			 tag_matching_topology_context.append_next_index);
-	out->tm_hw_phase_cnt =
-		MLX5_GET(xrqc, xrqc,
-			 tag_matching_topology_context.hw_phase_cnt);
-	out->tm_sw_phase_cnt =
-		MLX5_GET(xrqc, xrqc,
-			 tag_matching_topology_context.sw_phase_cnt);
-
-out:
-	kvfree(xrq_out);
-	return err;
-}
-
-static int create_srq_split(struct mlx5_core_dev *dev,
-			    struct mlx5_core_srq *srq,
-			    struct mlx5_srq_attr *in)
-{
-	if (!dev->issi)
-		return create_srq_cmd(dev, srq, in);
-	switch (srq->common.res) {
-	case MLX5_RES_XSRQ:
-		return create_xrc_srq_cmd(dev, srq, in);
-	case MLX5_RES_XRQ:
-		return create_xrq_cmd(dev, srq, in);
-	default:
-		return create_rmp_cmd(dev, srq, in);
-	}
-}
-
-static int destroy_srq_split(struct mlx5_core_dev *dev,
-			     struct mlx5_core_srq *srq)
-{
-	if (!dev->issi)
-		return destroy_srq_cmd(dev, srq);
-	switch (srq->common.res) {
-	case MLX5_RES_XSRQ:
-		return destroy_xrc_srq_cmd(dev, srq);
-	case MLX5_RES_XRQ:
-		return destroy_xrq_cmd(dev, srq);
-	default:
-		return destroy_rmp_cmd(dev, srq);
-	}
-}
-
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			 struct mlx5_srq_attr *in)
-{
-	int err;
-	struct mlx5_srq_table *table = &dev->priv.srq_table;
-
-	switch (in->type) {
-	case IB_SRQT_XRC:
-		srq->common.res = MLX5_RES_XSRQ;
-		break;
-	case IB_SRQT_TM:
-		srq->common.res = MLX5_RES_XRQ;
-		break;
-	default:
-		srq->common.res = MLX5_RES_SRQ;
-	}
-
-	err = create_srq_split(dev, srq, in);
-	if (err)
-		return err;
-
-	atomic_set(&srq->refcount, 1);
-	init_completion(&srq->free);
-
-	spin_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, srq->srqn, srq);
-	spin_unlock_irq(&table->lock);
-	if (err) {
-		mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
-		goto err_destroy_srq_split;
-	}
-
-	return 0;
-
-err_destroy_srq_split:
-	destroy_srq_split(dev, srq);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
-	struct mlx5_srq_table *table = &dev->priv.srq_table;
-	struct mlx5_core_srq *tmp;
-	int err;
-
-	spin_lock_irq(&table->lock);
-	tmp = radix_tree_delete(&table->tree, srq->srqn);
-	spin_unlock_irq(&table->lock);
-	if (!tmp) {
-		mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
-		return -EINVAL;
-	}
-	if (tmp != srq) {
-		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
-		return -EINVAL;
-	}
-
-	err = destroy_srq_split(dev, srq);
-	if (err)
-		return err;
-
-	if (atomic_dec_and_test(&srq->refcount))
-		complete(&srq->free);
-	wait_for_completion(&srq->free);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-			struct mlx5_srq_attr *out)
-{
-	if (!dev->issi)
-		return query_srq_cmd(dev, srq, out);
-	switch (srq->common.res) {
-	case MLX5_RES_XSRQ:
-		return query_xrc_srq_cmd(dev, srq, out);
-	case MLX5_RES_XRQ:
-		return query_xrq_cmd(dev, srq, out);
-	default:
-		return query_rmp_cmd(dev, srq, out);
-	}
-}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-		      u16 lwm, int is_srq)
-{
-	if (!dev->issi)
-		return arm_srq_cmd(dev, srq, lwm, is_srq);
-	switch (srq->common.res) {
-	case MLX5_RES_XSRQ:
-		return arm_xrc_srq_cmd(dev, srq, lwm);
-	case MLX5_RES_XRQ:
-		return arm_xrq_cmd(dev, srq, lwm);
-	default:
-		return arm_rmp_cmd(dev, srq, lwm);
-	}
-}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_srq_table *table = &dev->priv.srq_table;
-
-	memset(table, 0, sizeof(*table));
-	spin_lock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-}
-
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
-{
-	/* nothing */
-}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile
new file mode 100644
index 0000000..c78512e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile

@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
new file mode 100644
index 0000000..004c56c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c

@@ -0,0 +1,1589 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+enum dr_action_domain {
+	DR_ACTION_DOMAIN_NIC_INGRESS,
+	DR_ACTION_DOMAIN_NIC_EGRESS,
+	DR_ACTION_DOMAIN_FDB_INGRESS,
+	DR_ACTION_DOMAIN_FDB_EGRESS,
+	DR_ACTION_DOMAIN_MAX,
+};
+
+enum dr_action_valid_state {
+	DR_ACTION_STATE_ERR,
+	DR_ACTION_STATE_NO_ACTION,
+	DR_ACTION_STATE_REFORMAT,
+	DR_ACTION_STATE_MODIFY_HDR,
+	DR_ACTION_STATE_MODIFY_VLAN,
+	DR_ACTION_STATE_NON_TERM,
+	DR_ACTION_STATE_TERM,
+	DR_ACTION_STATE_MAX,
+};
+
+static const enum dr_action_valid_state
+next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
+	[DR_ACTION_DOMAIN_NIC_INGRESS] = {
+		[DR_ACTION_STATE_NO_ACTION] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_REFORMAT] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_MODIFY_HDR] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
+		},
+		[DR_ACTION_STATE_MODIFY_VLAN] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+		},
+		[DR_ACTION_STATE_NON_TERM] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_TERM] = {
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_TERM,
+		},
+	},
+	[DR_ACTION_DOMAIN_NIC_EGRESS] = {
+		[DR_ACTION_STATE_NO_ACTION] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_REFORMAT] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+		},
+		[DR_ACTION_STATE_MODIFY_HDR] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_MODIFY_VLAN] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+		},
+		[DR_ACTION_STATE_NON_TERM] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+		},
+		[DR_ACTION_STATE_TERM] = {
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_TERM,
+		},
+	},
+	[DR_ACTION_DOMAIN_FDB_INGRESS] = {
+		[DR_ACTION_STATE_NO_ACTION] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_REFORMAT] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_MODIFY_HDR] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_MODIFY_VLAN] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+		},
+		[DR_ACTION_STATE_NON_TERM] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_TERM] = {
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_TERM,
+		},
+	},
+	[DR_ACTION_DOMAIN_FDB_EGRESS] = {
+		[DR_ACTION_STATE_NO_ACTION] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_REFORMAT] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_MODIFY_HDR] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_MODIFY_VLAN] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_NON_TERM] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
+			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+		},
+		[DR_ACTION_STATE_TERM] = {
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_TERM,
+		},
+	},
+};
+
+struct dr_action_modify_field_conv {
+	u16 hw_field;
+	u8 start;
+	u8 end;
+	u8 l3_type;
+	u8 l4_type;
+};
+
+static const struct dr_action_modify_field_conv dr_action_conv_arr[] = {
+	[MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56,
+		.l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
+		.l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
+		.l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
+		.l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
+		.l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
+		.l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63,
+	},
+	[MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+		.hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15,
+	},
+};
+
+#define MAX_VLANS 2
+struct dr_action_vlan_info {
+	int	count;
+	u32	headers[MAX_VLANS];
+};
+
+struct dr_action_apply_attr {
+	u32	modify_index;
+	u16	modify_actions;
+	u32	decap_index;
+	u16	decap_actions;
+	u8	decap_with_vlan:1;
+	u64	final_icm_addr;
+	u32	flow_tag;
+	u32	ctr_id;
+	u16	gvmi;
+	u16	hit_gvmi;
+	u32	reformat_id;
+	u32	reformat_size;
+	struct	dr_action_vlan_info vlans;
+};
+
+static int
+dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type,
+				  enum mlx5dr_action_type *action_type)
+{
+	switch (reformat_type) {
+	case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2:
+		*action_type = DR_ACTION_TYP_TNL_L2_TO_L2;
+		break;
+	case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2:
+		*action_type = DR_ACTION_TYP_L2_TO_TNL_L2;
+		break;
+	case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2:
+		*action_type = DR_ACTION_TYP_TNL_L3_TO_L2;
+		break;
+	case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3:
+		*action_type = DR_ACTION_TYP_L2_TO_TNL_L3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void dr_actions_init_next_ste(u8 **last_ste,
+				     u32 *added_stes,
+				     enum mlx5dr_ste_entry_type entry_type,
+				     u16 gvmi)
+{
+	(*added_stes)++;
+	*last_ste += DR_STE_SIZE;
+	mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi);
+}
+
+static void dr_actions_apply_tx(struct mlx5dr_domain *dmn,
+				u8 *action_type_set,
+				u8 *last_ste,
+				struct dr_action_apply_attr *attr,
+				u32 *added_stes)
+{
+	bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] ||
+		action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3];
+
+	/* We want to make sure the modify header comes before L2
+	 * encapsulation. The reason for that is that we support
+	 * modify headers for outer headers only
+	 */
+	if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+		mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+		mlx5dr_ste_set_rewrite_actions(last_ste,
+					       attr->modify_actions,
+					       attr->modify_index);
+	}
+
+	if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+		int i;
+
+		for (i = 0; i < attr->vlans.count; i++) {
+			if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
+				dr_actions_init_next_ste(&last_ste,
+							 added_stes,
+							 MLX5DR_STE_TYPE_TX,
+							 attr->gvmi);
+
+			mlx5dr_ste_set_tx_push_vlan(last_ste,
+						    attr->vlans.headers[i],
+						    encap);
+		}
+	}
+
+	if (encap) {
+		/* Modify header and encapsulation require a different STEs.
+		 * Since modify header STE format doesn't support encapsulation
+		 * tunneling_action.
+		 */
+		if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] ||
+		    action_type_set[DR_ACTION_TYP_PUSH_VLAN])
+			dr_actions_init_next_ste(&last_ste,
+						 added_stes,
+						 MLX5DR_STE_TYPE_TX,
+						 attr->gvmi);
+
+		mlx5dr_ste_set_tx_encap(last_ste,
+					attr->reformat_id,
+					attr->reformat_size,
+					action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
+		/* Whenever prio_tag_required enabled, we can be sure that the
+		 * previous table (ACL) already push vlan to our packet,
+		 * And due to HW limitation we need to set this bit, otherwise
+		 * push vlan + reformat will not work.
+		 */
+		if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required))
+			mlx5dr_ste_set_go_back_bit(last_ste);
+	}
+
+	if (action_type_set[DR_ACTION_TYP_CTR])
+		mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
+}
+
+static void dr_actions_apply_rx(u8 *action_type_set,
+				u8 *last_ste,
+				struct dr_action_apply_attr *attr,
+				u32 *added_stes)
+{
+	if (action_type_set[DR_ACTION_TYP_CTR])
+		mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
+
+	if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+		mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+		mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
+		mlx5dr_ste_set_rewrite_actions(last_ste,
+					       attr->decap_actions,
+					       attr->decap_index);
+	}
+
+	if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2])
+		mlx5dr_ste_set_rx_decap(last_ste);
+
+	if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+		int i;
+
+		for (i = 0; i < attr->vlans.count; i++) {
+			if (i ||
+			    action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] ||
+			    action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
+				dr_actions_init_next_ste(&last_ste,
+							 added_stes,
+							 MLX5DR_STE_TYPE_RX,
+							 attr->gvmi);
+
+			mlx5dr_ste_set_rx_pop_vlan(last_ste);
+		}
+	}
+
+	if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+		if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+			dr_actions_init_next_ste(&last_ste,
+						 added_stes,
+						 MLX5DR_STE_TYPE_MODIFY_PKT,
+						 attr->gvmi);
+		else
+			mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+
+		mlx5dr_ste_set_rewrite_actions(last_ste,
+					       attr->modify_actions,
+					       attr->modify_index);
+	}
+
+	if (action_type_set[DR_ACTION_TYP_TAG]) {
+		if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+			dr_actions_init_next_ste(&last_ste,
+						 added_stes,
+						 MLX5DR_STE_TYPE_RX,
+						 attr->gvmi);
+
+		mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag);
+	}
+}
+
+/* Apply the actions on the rule STE array starting from the last_ste.
+ * Actions might require more than one STE, new_num_stes will return
+ * the new size of the STEs array, rule with actions.
+ */
+static void dr_actions_apply(struct mlx5dr_domain *dmn,
+			     enum mlx5dr_ste_entry_type ste_type,
+			     u8 *action_type_set,
+			     u8 *last_ste,
+			     struct dr_action_apply_attr *attr,
+			     u32 *new_num_stes)
+{
+	u32 added_stes = 0;
+
+	if (ste_type == MLX5DR_STE_TYPE_RX)
+		dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes);
+	else
+		dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes);
+
+	last_ste += added_stes * DR_STE_SIZE;
+	*new_num_stes += added_stes;
+
+	mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi);
+	mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static enum dr_action_domain
+dr_action_get_action_domain(enum mlx5dr_domain_type domain,
+			    enum mlx5dr_ste_entry_type ste_type)
+{
+	switch (domain) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		return DR_ACTION_DOMAIN_NIC_INGRESS;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		return DR_ACTION_DOMAIN_NIC_EGRESS;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		if (ste_type == MLX5DR_STE_TYPE_RX)
+			return DR_ACTION_DOMAIN_FDB_INGRESS;
+		return DR_ACTION_DOMAIN_FDB_EGRESS;
+	default:
+		WARN_ON(true);
+		return DR_ACTION_DOMAIN_MAX;
+	}
+}
+
+static
+int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain,
+					  u32 action_type,
+					  u32 *state)
+{
+	u32 cur_state = *state;
+
+	/* Check action state machine is valid */
+	*state = next_action_state[action_domain][cur_state][action_type];
+
+	if (*state == DR_ACTION_STATE_ERR)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
+				      struct mlx5dr_action *dest_action,
+				      u64 *final_icm_addr)
+{
+	int ret;
+
+	switch (dest_action->action_type) {
+	case DR_ACTION_TYP_FT:
+		/* Allow destination flow table only if table is a terminating
+		 * table, since there is an *assumption* that in such case FW
+		 * will recalculate the CS.
+		 */
+		if (dest_action->dest_tbl.is_fw_tbl) {
+			*final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr;
+		} else {
+			mlx5dr_dbg(dmn,
+				   "Destination FT should be terminating when modify TTL is used\n");
+			return -EINVAL;
+		}
+		break;
+
+	case DR_ACTION_TYP_VPORT:
+		/* If destination is vport we will get the FW flow table
+		 * that recalculates the CS and forwards to the vport.
+		 */
+		ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
+								dest_action->vport.caps->num,
+								final_icm_addr);
+		if (ret) {
+			mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
+			return ret;
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+#define WITH_VLAN_NUM_HW_ACTIONS 6
+
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+				 struct mlx5dr_matcher_rx_tx *nic_matcher,
+				 struct mlx5dr_action *actions[],
+				 u32 num_actions,
+				 u8 *ste_arr,
+				 u32 *new_hw_ste_arr_sz)
+{
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+	bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	u8 action_type_set[DR_ACTION_TYP_MAX] = {};
+	struct mlx5dr_action *dest_action = NULL;
+	u32 state = DR_ACTION_STATE_NO_ACTION;
+	struct dr_action_apply_attr attr = {};
+	enum dr_action_domain action_domain;
+	bool recalc_cs_required = false;
+	u8 *last_ste;
+	int i, ret;
+
+	attr.gvmi = dmn->info.caps.gvmi;
+	attr.hit_gvmi = dmn->info.caps.gvmi;
+	attr.final_icm_addr = nic_dmn->default_icm_addr;
+	action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
+
+	for (i = 0; i < num_actions; i++) {
+		struct mlx5dr_action *action;
+		int max_actions_type = 1;
+		u32 action_type;
+
+		action = actions[i];
+		action_type = action->action_type;
+
+		switch (action_type) {
+		case DR_ACTION_TYP_DROP:
+			attr.final_icm_addr = nic_dmn->drop_icm_addr;
+			break;
+		case DR_ACTION_TYP_FT:
+			dest_action = action;
+			if (!action->dest_tbl.is_fw_tbl) {
+				if (action->dest_tbl.tbl->dmn != dmn) {
+					mlx5dr_dbg(dmn,
+						   "Destination table belongs to a different domain\n");
+					goto out_invalid_arg;
+				}
+				if (action->dest_tbl.tbl->level <= matcher->tbl->level) {
+					mlx5dr_dbg(dmn,
+						   "Destination table level should be higher than source table\n");
+					goto out_invalid_arg;
+				}
+				attr.final_icm_addr = rx_rule ?
+					action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
+					action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr;
+			} else {
+				struct mlx5dr_cmd_query_flow_table_details output;
+				int ret;
+
+				/* get the relevant addresses */
+				if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
+					ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev,
+									  action->dest_tbl.fw_tbl.ft->type,
+									  action->dest_tbl.fw_tbl.ft->id,
+									  &output);
+					if (!ret) {
+						action->dest_tbl.fw_tbl.tx_icm_addr =
+							output.sw_owner_icm_root_1;
+						action->dest_tbl.fw_tbl.rx_icm_addr =
+							output.sw_owner_icm_root_0;
+					} else {
+						mlx5dr_dbg(dmn,
+							   "Failed mlx5_cmd_query_flow_table ret: %d\n",
+							   ret);
+						return ret;
+					}
+				}
+				attr.final_icm_addr = rx_rule ?
+					action->dest_tbl.fw_tbl.rx_icm_addr :
+					action->dest_tbl.fw_tbl.tx_icm_addr;
+			}
+			break;
+		case DR_ACTION_TYP_QP:
+			mlx5dr_info(dmn, "Domain doesn't support QP\n");
+			goto out_invalid_arg;
+		case DR_ACTION_TYP_CTR:
+			attr.ctr_id = action->ctr.ctr_id +
+				action->ctr.offeset;
+			break;
+		case DR_ACTION_TYP_TAG:
+			attr.flow_tag = action->flow_tag;
+			break;
+		case DR_ACTION_TYP_TNL_L2_TO_L2:
+			break;
+		case DR_ACTION_TYP_TNL_L3_TO_L2:
+			attr.decap_index = action->rewrite.index;
+			attr.decap_actions = action->rewrite.num_of_actions;
+			attr.decap_with_vlan =
+				attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+			break;
+		case DR_ACTION_TYP_MODIFY_HDR:
+			attr.modify_index = action->rewrite.index;
+			attr.modify_actions = action->rewrite.num_of_actions;
+			recalc_cs_required = action->rewrite.modify_ttl;
+			break;
+		case DR_ACTION_TYP_L2_TO_TNL_L2:
+		case DR_ACTION_TYP_L2_TO_TNL_L3:
+			attr.reformat_size = action->reformat.reformat_size;
+			attr.reformat_id = action->reformat.reformat_id;
+			break;
+		case DR_ACTION_TYP_VPORT:
+			attr.hit_gvmi = action->vport.caps->vhca_gvmi;
+			dest_action = action;
+			if (rx_rule) {
+				/* Loopback on WIRE vport is not supported */
+				if (action->vport.caps->num == WIRE_PORT)
+					goto out_invalid_arg;
+
+				attr.final_icm_addr = action->vport.caps->icm_address_rx;
+			} else {
+				attr.final_icm_addr = action->vport.caps->icm_address_tx;
+			}
+			break;
+		case DR_ACTION_TYP_POP_VLAN:
+			max_actions_type = MAX_VLANS;
+			attr.vlans.count++;
+			break;
+		case DR_ACTION_TYP_PUSH_VLAN:
+			max_actions_type = MAX_VLANS;
+			if (attr.vlans.count == MAX_VLANS)
+				return -EINVAL;
+
+			attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr;
+			break;
+		default:
+			goto out_invalid_arg;
+		}
+
+		/* Check action duplication */
+		if (++action_type_set[action_type] > max_actions_type) {
+			mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n",
+				   action_type, max_actions_type);
+			goto out_invalid_arg;
+		}
+
+		/* Check action state machine is valid */
+		if (dr_action_validate_and_get_next_state(action_domain,
+							  action_type,
+							  &state)) {
+			mlx5dr_dbg(dmn, "Invalid action sequence provided\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	*new_hw_ste_arr_sz = nic_matcher->num_of_builders;
+	last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
+
+	/* Due to a HW bug, modifying TTL on RX flows will cause an incorrect
+	 * checksum calculation. In this case we will use a FW table to
+	 * recalculate.
+	 */
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+	    rx_rule && recalc_cs_required && dest_action) {
+		ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
+		if (ret) {
+			mlx5dr_dbg(dmn,
+				   "Failed to handle checksum recalculation err %d\n",
+				   ret);
+			return ret;
+		}
+	}
+
+	dr_actions_apply(dmn,
+			 nic_dmn->ste_type,
+			 action_type_set,
+			 last_ste,
+			 &attr,
+			 new_hw_ste_arr_sz);
+
+	return 0;
+
+out_invalid_arg:
+	return -EINVAL;
+}
+
+#define CVLAN_ETHERTYPE 0x8100
+#define SVLAN_ETHERTYPE 0x88a8
+#define HDR_LEN_L2_ONLY 14
+#define HDR_LEN_L2_VLAN 18
+#define REWRITE_HW_ACTION_NUM 6
+
+static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn,
+				 struct mlx5dr_action *action,
+				 void *data, size_t data_sz)
+{
+	struct mlx5_ifc_l2_hdr_bits *l2_hdr = data;
+	u64 ops[REWRITE_HW_ACTION_NUM] = {};
+	u32 hdr_fld_4b;
+	u16 hdr_fld_2b;
+	u16 vlan_type;
+	bool vlan;
+	int i = 0;
+	int ret;
+
+	vlan = (data_sz != HDR_LEN_L2_ONLY);
+
+	/* dmac_47_16 */
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_length, 0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_left_shifter, 16);
+	hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 inline_data, hdr_fld_4b);
+	i++;
+
+	/* smac_47_16 */
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_length, 0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_left_shifter, 16);
+	hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 |
+		      MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 inline_data, hdr_fld_4b);
+	i++;
+
+	/* dmac_15_0 */
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_length, 16);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_left_shifter, 0);
+	hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 inline_data, hdr_fld_2b);
+	i++;
+
+	/* ethertype + (optional) vlan */
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_left_shifter, 32);
+	if (!vlan) {
+		hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+		MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b);
+		MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16);
+	} else {
+		hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+		vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN;
+		hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan);
+		hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b;
+		MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b);
+		MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18);
+	}
+	i++;
+
+	/* smac_15_0 */
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_length, 16);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 destination_left_shifter, 0);
+	hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0);
+	MLX5_SET(dr_action_hw_set, ops + i,
+		 inline_data, hdr_fld_2b);
+	i++;
+
+	if (vlan) {
+		MLX5_SET(dr_action_hw_set, ops + i,
+			 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+		hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type);
+		MLX5_SET(dr_action_hw_set, ops + i,
+			 inline_data, hdr_fld_2b);
+		MLX5_SET(dr_action_hw_set, ops + i,
+			 destination_length, 16);
+		MLX5_SET(dr_action_hw_set, ops + i,
+			 destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
+		MLX5_SET(dr_action_hw_set, ops + i,
+			 destination_left_shifter, 0);
+		i++;
+	}
+
+	action->rewrite.data = (void *)ops;
+	action->rewrite.num_of_actions = i;
+	action->rewrite.chunk->byte_size = i * sizeof(*ops);
+
+	ret = mlx5dr_send_postsend_action(dmn, action);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct mlx5dr_action *
+dr_action_create_generic(enum mlx5dr_action_type action_type)
+{
+	struct mlx5dr_action *action;
+
+	action = kzalloc(sizeof(*action), GFP_KERNEL);
+	if (!action)
+		return NULL;
+
+	action->action_type = action_type;
+	refcount_set(&action->refcount, 1);
+
+	return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void)
+{
+	return dr_action_create_generic(DR_ACTION_TYP_DROP);
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
+{
+	struct mlx5dr_action *action;
+
+	refcount_inc(&tbl->refcount);
+
+	action = dr_action_create_generic(DR_ACTION_TYP_FT);
+	if (!action)
+		goto dec_ref;
+
+	action->dest_tbl.tbl = tbl;
+
+	return action;
+
+dec_ref:
+	refcount_dec(&tbl->refcount);
+	return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+					struct mlx5_core_dev *mdev)
+{
+	struct mlx5dr_action *action;
+
+	action = dr_action_create_generic(DR_ACTION_TYP_FT);
+	if (!action)
+		return NULL;
+
+	action->dest_tbl.is_fw_tbl = 1;
+	action->dest_tbl.fw_tbl.ft = ft;
+	action->dest_tbl.fw_tbl.mdev = mdev;
+
+	return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id)
+{
+	struct mlx5dr_action *action;
+
+	action = dr_action_create_generic(DR_ACTION_TYP_CTR);
+	if (!action)
+		return NULL;
+
+	action->ctr.ctr_id = counter_id;
+
+	return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
+{
+	struct mlx5dr_action *action;
+
+	action = dr_action_create_generic(DR_ACTION_TYP_TAG);
+	if (!action)
+		return NULL;
+
+	action->flow_tag = tag_value & 0xffffff;
+
+	return action;
+}
+
+static int
+dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
+				 struct mlx5dr_domain *dmn,
+				 size_t data_sz,
+				 void *data)
+{
+	if ((!data && data_sz) || (data && !data_sz) || reformat_type >
+		DR_ACTION_TYP_L2_TO_TNL_L3) {
+		mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
+		goto out_err;
+	}
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+		return 0;
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+		if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 &&
+		    reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) {
+			mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n");
+			goto out_err;
+		}
+	} else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+		if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 &&
+		    reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) {
+			mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n");
+			goto out_err;
+		}
+	}
+
+	return 0;
+
+out_err:
+	return -EINVAL;
+}
+
+#define ACTION_CACHE_LINE_SIZE 64
+
+static int
+dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
+				 size_t data_sz, void *data,
+				 struct mlx5dr_action *action)
+{
+	u32 reformat_id;
+	int ret;
+
+	switch (action->action_type) {
+	case DR_ACTION_TYP_L2_TO_TNL_L2:
+	case DR_ACTION_TYP_L2_TO_TNL_L3:
+	{
+		enum mlx5_reformat_ctx_type rt;
+
+		if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2)
+			rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+		else
+			rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+
+		ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data,
+						     &reformat_id);
+		if (ret)
+			return ret;
+
+		action->reformat.reformat_id = reformat_id;
+		action->reformat.reformat_size = data_sz;
+		return 0;
+	}
+	case DR_ACTION_TYP_TNL_L2_TO_L2:
+	{
+		return 0;
+	}
+	case DR_ACTION_TYP_TNL_L3_TO_L2:
+	{
+		/* Only Ethernet frame is supported, with VLAN (18) or without (14) */
+		if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN)
+			return -EINVAL;
+
+		action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+							       DR_CHUNK_SIZE_8);
+		if (!action->rewrite.chunk)
+			return -ENOMEM;
+
+		action->rewrite.index = (action->rewrite.chunk->icm_addr -
+					 dmn->info.caps.hdr_modify_icm_addr) /
+					 ACTION_CACHE_LINE_SIZE;
+
+		ret = dr_actions_l2_rewrite(dmn, action, data, data_sz);
+		if (ret) {
+			mlx5dr_icm_free_chunk(action->rewrite.chunk);
+			return ret;
+		}
+		return 0;
+	}
+	default:
+		mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
+		return -EINVAL;
+	}
+}
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void)
+{
+	return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN);
+}
+
+struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
+						     __be32 vlan_hdr)
+{
+	u32 vlan_hdr_h = ntohl(vlan_hdr);
+	u16 ethertype = vlan_hdr_h >> 16;
+	struct mlx5dr_action *action;
+
+	if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) {
+		mlx5dr_dbg(dmn, "Invalid vlan ethertype\n");
+		return NULL;
+	}
+
+	action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN);
+	if (!action)
+		return NULL;
+
+	action->push_vlan.vlan_hdr = vlan_hdr_h;
+	return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+				     enum mlx5dr_action_reformat_type reformat_type,
+				     size_t data_sz,
+				     void *data)
+{
+	enum mlx5dr_action_type action_type;
+	struct mlx5dr_action *action;
+	int ret;
+
+	refcount_inc(&dmn->refcount);
+
+	/* General checks */
+	ret = dr_action_reformat_to_action_type(reformat_type, &action_type);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Invalid reformat_type provided\n");
+		goto dec_ref;
+	}
+
+	ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data);
+	if (ret)
+		goto dec_ref;
+
+	action = dr_action_create_generic(action_type);
+	if (!action)
+		goto dec_ref;
+
+	action->reformat.dmn = dmn;
+
+	ret = dr_action_create_reformat_action(dmn,
+					       data_sz,
+					       data,
+					       action);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret);
+		goto free_action;
+	}
+
+	return action;
+
+free_action:
+	kfree(action);
+dec_ref:
+	refcount_dec(&dmn->refcount);
+	return NULL;
+}
+
+static const struct dr_action_modify_field_conv *
+dr_action_modify_get_hw_info(u16 sw_field)
+{
+	const struct dr_action_modify_field_conv *hw_action_info;
+
+	if (sw_field >= ARRAY_SIZE(dr_action_conv_arr))
+		goto not_found;
+
+	hw_action_info = &dr_action_conv_arr[sw_field];
+	if (!hw_action_info->end && !hw_action_info->start)
+		goto not_found;
+
+	return hw_action_info;
+
+not_found:
+	return NULL;
+}
+
+static int
+dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
+			  __be64 *sw_action,
+			  __be64 *hw_action,
+			  const struct dr_action_modify_field_conv **ret_hw_info)
+{
+	const struct dr_action_modify_field_conv *hw_action_info;
+	u8 offset, length, max_length, action;
+	u16 sw_field;
+	u8 hw_opcode;
+	u32 data;
+
+	/* Get SW modify action data */
+	action = MLX5_GET(set_action_in, sw_action, action_type);
+	length = MLX5_GET(set_action_in, sw_action, length);
+	offset = MLX5_GET(set_action_in, sw_action, offset);
+	sw_field = MLX5_GET(set_action_in, sw_action, field);
+	data = MLX5_GET(set_action_in, sw_action, data);
+
+	/* Convert SW data to HW modify action format */
+	hw_action_info = dr_action_modify_get_hw_info(sw_field);
+	if (!hw_action_info) {
+		mlx5dr_dbg(dmn, "Modify action invalid field given\n");
+		return -EINVAL;
+	}
+
+	max_length = hw_action_info->end - hw_action_info->start + 1;
+
+	switch (action) {
+	case MLX5_ACTION_TYPE_SET:
+		hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_SET;
+		/* PRM defines that length zero specific length of 32bits */
+		if (!length)
+			length = 32;
+
+		if (length + offset > max_length) {
+			mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+			return -EINVAL;
+		}
+		break;
+
+	case MLX5_ACTION_TYPE_ADD:
+		hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_ADD;
+		offset = 0;
+		length = max_length;
+		break;
+
+	default:
+		mlx5dr_info(dmn, "Unsupported action_type for modify action\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(dr_action_hw_set, hw_action, opcode, hw_opcode);
+
+	MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
+		 hw_action_info->hw_field);
+
+	MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter,
+		 hw_action_info->start + offset);
+
+	MLX5_SET(dr_action_hw_set, hw_action, destination_length,
+		 length == 32 ? 0 : length);
+
+	MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+
+	*ret_hw_info = hw_action_info;
+
+	return 0;
+}
+
+static int
+dr_action_modify_check_field_limitation(struct mlx5dr_domain *dmn,
+					const __be64 *sw_action)
+{
+	u16 sw_field;
+	u8 action;
+
+	sw_field = MLX5_GET(set_action_in, sw_action, field);
+	action = MLX5_GET(set_action_in, sw_action, action_type);
+
+	/* Check if SW field is supported in current domain (RX/TX) */
+	if (action == MLX5_ACTION_TYPE_SET) {
+		if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
+				mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
+					   sw_field);
+				return -EINVAL;
+			}
+		}
+
+		if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
+				mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
+					   sw_field);
+				return -EINVAL;
+			}
+		}
+	} else if (action == MLX5_ACTION_TYPE_ADD) {
+		if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
+		    sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
+		    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM &&
+		    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) {
+			mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", sw_field);
+			return -EINVAL;
+		}
+	} else {
+		mlx5dr_info(dmn, "Unsupported action %d modify action\n", action);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static bool
+dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
+{
+	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+
+	return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
+}
+
+static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
+					    u32 max_hw_actions,
+					    u32 num_sw_actions,
+					    __be64 sw_actions[],
+					    __be64 hw_actions[],
+					    u32 *num_hw_actions,
+					    bool *modify_ttl)
+{
+	const struct dr_action_modify_field_conv *hw_action_info;
+	u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED;
+	u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE;
+	u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE;
+	int ret, i, hw_idx = 0;
+	__be64 *sw_action;
+	__be64 hw_action;
+
+	*modify_ttl = false;
+
+	for (i = 0; i < num_sw_actions; i++) {
+		sw_action = &sw_actions[i];
+
+		ret = dr_action_modify_check_field_limitation(dmn, sw_action);
+		if (ret)
+			return ret;
+
+		if (!(*modify_ttl))
+			*modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action);
+
+		/* Convert SW action to HW action */
+		ret = dr_action_modify_sw_to_hw(dmn,
+						sw_action,
+						&hw_action,
+						&hw_action_info);
+		if (ret)
+			return ret;
+
+		/* Due to a HW limitation we cannot modify 2 different L3 types */
+		if (l3_type && hw_action_info->l3_type &&
+		    hw_action_info->l3_type != l3_type) {
+			mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n");
+			return -EINVAL;
+		}
+		if (hw_action_info->l3_type)
+			l3_type = hw_action_info->l3_type;
+
+		/* Due to a HW limitation we cannot modify two different L4 types */
+		if (l4_type && hw_action_info->l4_type &&
+		    hw_action_info->l4_type != l4_type) {
+			mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n");
+			return -EINVAL;
+		}
+		if (hw_action_info->l4_type)
+			l4_type = hw_action_info->l4_type;
+
+		/* HW reads and executes two actions at once this means we
+		 * need to create a gap if two actions access the same field
+		 */
+		if ((hw_idx % 2) && hw_field == hw_action_info->hw_field) {
+			/* Check if after gap insertion the total number of HW
+			 * modify actions doesn't exceeds the limit
+			 */
+			hw_idx++;
+			if ((num_sw_actions + hw_idx - i) >= max_hw_actions) {
+				mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n");
+				return -EINVAL;
+			}
+		}
+		hw_field = hw_action_info->hw_field;
+
+		hw_actions[hw_idx] = hw_action;
+		hw_idx++;
+	}
+
+	*num_hw_actions = hw_idx;
+
+	return 0;
+}
+
+static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
+					  size_t actions_sz,
+					  __be64 actions[],
+					  struct mlx5dr_action *action)
+{
+	struct mlx5dr_icm_chunk *chunk;
+	u32 max_hw_actions;
+	u32 num_hw_actions;
+	u32 num_sw_actions;
+	__be64 *hw_actions;
+	bool modify_ttl;
+	int ret;
+
+	num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE;
+	max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16);
+
+	if (num_sw_actions > max_hw_actions) {
+		mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n",
+			   num_sw_actions, max_hw_actions);
+		return -EINVAL;
+	}
+
+	chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16);
+	if (!chunk)
+		return -ENOMEM;
+
+	hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL);
+	if (!hw_actions) {
+		ret = -ENOMEM;
+		goto free_chunk;
+	}
+
+	ret = dr_actions_convert_modify_header(dmn,
+					       max_hw_actions,
+					       num_sw_actions,
+					       actions,
+					       hw_actions,
+					       &num_hw_actions,
+					       &modify_ttl);
+	if (ret)
+		goto free_hw_actions;
+
+	action->rewrite.chunk = chunk;
+	action->rewrite.modify_ttl = modify_ttl;
+	action->rewrite.data = (u8 *)hw_actions;
+	action->rewrite.num_of_actions = num_hw_actions;
+	action->rewrite.index = (chunk->icm_addr -
+				 dmn->info.caps.hdr_modify_icm_addr) /
+				 ACTION_CACHE_LINE_SIZE;
+
+	ret = mlx5dr_send_postsend_action(dmn, action);
+	if (ret)
+		goto free_hw_actions;
+
+	return 0;
+
+free_hw_actions:
+	kfree(hw_actions);
+free_chunk:
+	mlx5dr_icm_free_chunk(chunk);
+	return ret;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn,
+				   u32 flags,
+				   size_t actions_sz,
+				   __be64 actions[])
+{
+	struct mlx5dr_action *action;
+	int ret = 0;
+
+	refcount_inc(&dmn->refcount);
+
+	if (actions_sz % DR_MODIFY_ACTION_SIZE) {
+		mlx5dr_dbg(dmn, "Invalid modify actions size provided\n");
+		goto dec_ref;
+	}
+
+	action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR);
+	if (!action)
+		goto dec_ref;
+
+	action->rewrite.dmn = dmn;
+
+	ret = dr_action_create_modify_action(dmn,
+					     actions_sz,
+					     actions,
+					     action);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret);
+		goto free_action;
+	}
+
+	return action;
+
+free_action:
+	kfree(action);
+dec_ref:
+	refcount_dec(&dmn->refcount);
+	return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
+				u32 vport, u8 vhca_id_valid,
+				u16 vhca_id)
+{
+	struct mlx5dr_cmd_vport_cap *vport_cap;
+	struct mlx5dr_domain *vport_dmn;
+	struct mlx5dr_action *action;
+	u8 peer_vport;
+
+	peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi);
+	vport_dmn = peer_vport ? dmn->peer_dmn : dmn;
+	if (!vport_dmn) {
+		mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
+		return NULL;
+	}
+
+	if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+		mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n");
+		return NULL;
+	}
+
+	vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport);
+	if (!vport_cap) {
+		mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport);
+		return NULL;
+	}
+
+	action = dr_action_create_generic(DR_ACTION_TYP_VPORT);
+	if (!action)
+		return NULL;
+
+	action->vport.dmn = vport_dmn;
+	action->vport.caps = vport_cap;
+
+	return action;
+}
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action)
+{
+	if (refcount_read(&action->refcount) > 1)
+		return -EBUSY;
+
+	switch (action->action_type) {
+	case DR_ACTION_TYP_FT:
+		if (!action->dest_tbl.is_fw_tbl)
+			refcount_dec(&action->dest_tbl.tbl->refcount);
+		break;
+	case DR_ACTION_TYP_TNL_L2_TO_L2:
+		refcount_dec(&action->reformat.dmn->refcount);
+		break;
+	case DR_ACTION_TYP_TNL_L3_TO_L2:
+		mlx5dr_icm_free_chunk(action->rewrite.chunk);
+		refcount_dec(&action->reformat.dmn->refcount);
+		break;
+	case DR_ACTION_TYP_L2_TO_TNL_L2:
+	case DR_ACTION_TYP_L2_TO_TNL_L3:
+		mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev,
+						action->reformat.reformat_id);
+		refcount_dec(&action->reformat.dmn->refcount);
+		break;
+	case DR_ACTION_TYP_MODIFY_HDR:
+		mlx5dr_icm_free_chunk(action->rewrite.chunk);
+		kfree(action->rewrite.data);
+		refcount_dec(&action->rewrite.dmn->refcount);
+		break;
+	default:
+		break;
+	}
+
+	kfree(action);
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
new file mode 100644
index 0000000..41662c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c

@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+				       bool other_vport,
+				       u16 vport_number,
+				       u64 *icm_address_rx,
+				       u64 *icm_address_tx)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+	int err;
+
+	MLX5_SET(query_esw_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+	MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport);
+	MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*icm_address_rx =
+		MLX5_GET64(query_esw_vport_context_out, out,
+			   esw_vport_context.sw_steering_vport_icm_address_rx);
+	*icm_address_tx =
+		MLX5_GET64(query_esw_vport_context_out, out,
+			   esw_vport_context.sw_steering_vport_icm_address_tx);
+	return 0;
+}
+
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
+			  u16 vport_number, u16 *gvmi)
+{
+	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+	int out_size;
+	void *out;
+	int err;
+
+	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	out = kzalloc(out_size, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, other_function, other_vport);
+	MLX5_SET(query_hca_cap_in, in, function_id, vport_number);
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
+		 HCA_CAP_OPMOD_GET_CUR);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	if (err) {
+		kfree(out);
+		return err;
+	}
+
+	*gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
+
+	kfree(out);
+	return 0;
+}
+
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+			      struct mlx5dr_esw_caps *caps)
+{
+	caps->drop_icm_address_rx =
+		MLX5_CAP64_ESW_FLOWTABLE(mdev,
+					 sw_steering_fdb_action_drop_icm_address_rx);
+	caps->drop_icm_address_tx =
+		MLX5_CAP64_ESW_FLOWTABLE(mdev,
+					 sw_steering_fdb_action_drop_icm_address_tx);
+	caps->uplink_icm_address_rx =
+		MLX5_CAP64_ESW_FLOWTABLE(mdev,
+					 sw_steering_uplink_icm_address_rx);
+	caps->uplink_icm_address_tx =
+		MLX5_CAP64_ESW_FLOWTABLE(mdev,
+					 sw_steering_uplink_icm_address_tx);
+	caps->sw_owner =
+		MLX5_CAP_ESW_FLOWTABLE_FDB(mdev,
+					   sw_owner);
+
+	return 0;
+}
+
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+			    struct mlx5dr_cmd_caps *caps)
+{
+	caps->prio_tag_required	= MLX5_CAP_GEN(mdev, prio_tag_required);
+	caps->eswitch_manager	= MLX5_CAP_GEN(mdev, eswitch_manager);
+	caps->gvmi		= MLX5_CAP_GEN(mdev, vhca_id);
+	caps->flex_protocols	= MLX5_CAP_GEN(mdev, flex_parser_protocols);
+
+	if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) {
+		caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
+		caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
+	}
+
+	if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) {
+		caps->flex_parser_id_icmpv6_dw0 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0);
+		caps->flex_parser_id_icmpv6_dw1 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1);
+	}
+
+	caps->nic_rx_drop_address =
+		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
+	caps->nic_tx_drop_address =
+		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address);
+	caps->nic_tx_allow_address =
+		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address);
+
+	caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner);
+	caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level);
+
+	caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner);
+
+	caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size);
+	caps->hdr_modify_icm_addr =
+		MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address);
+
+	caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
+
+	return 0;
+}
+
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+				enum fs_flow_table_type type,
+				u32 table_id,
+				struct mlx5dr_cmd_query_flow_table_details *output)
+{
+	u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {};
+	int err;
+
+	MLX5_SET(query_flow_table_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_TABLE);
+
+	MLX5_SET(query_flow_table_in, in, table_type, type);
+	MLX5_SET(query_flow_table_in, in, table_id, table_id);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	output->status = MLX5_GET(query_flow_table_out, out, status);
+	output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level);
+
+	output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out,
+						 flow_table_context.sw_owner_icm_root_1);
+	output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out,
+						 flow_table_context.sw_owner_icm_root_0);
+
+	return 0;
+}
+
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
+{
+	u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
+
+	MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+					u32 table_type,
+					u32 table_id,
+					u32 group_id,
+					u32 modify_header_id,
+					u32 vport_id)
+{
+	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+	void *in_flow_context;
+	unsigned int inlen;
+	void *in_dests;
+	u32 *in;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+		1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+	MLX5_SET(set_fte_in, in, table_type, table_type);
+	MLX5_SET(set_fte_in, in, table_id, table_id);
+
+	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+	MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id);
+	MLX5_SET(flow_context, in_flow_context, destination_list_size, 1);
+	MLX5_SET(flow_context, in_flow_context, action,
+		 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+		 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR);
+
+	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+	MLX5_SET(dest_format_struct, in_dests, destination_type,
+		 MLX5_FLOW_DESTINATION_TYPE_VPORT);
+	MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id);
+
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+				    u32 table_type,
+				    u32 table_id)
+{
+	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
+
+	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+	MLX5_SET(delete_fte_in, in, table_type, table_type);
+	MLX5_SET(delete_fte_in, in, table_id, table_id);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+				   u32 table_type,
+				   u8 num_of_actions,
+				   u64 *actions,
+				   u32 *modify_header_id)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
+	void *p_actions;
+	u32 inlen;
+	u32 *in;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) +
+		 num_of_actions * sizeof(u64);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(alloc_modify_header_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+	MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+	MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions);
+	p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+	memcpy(p_actions, actions, num_of_actions * sizeof(u64));
+
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	if (err)
+		goto out;
+
+	*modify_header_id = MLX5_GET(alloc_modify_header_context_out, out,
+				     modify_header_id);
+out:
+	kvfree(in);
+	return err;
+}
+
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+				     u32 modify_header_id)
+{
+	u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
+
+	MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+	MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+		 modify_header_id);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+				       u32 table_type,
+				       u32 table_id,
+				       u32 *group_id)
+{
+	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	u32 *in;
+	int err;
+
+	in = kzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP);
+	MLX5_SET(create_flow_group_in, in, table_type, table_type);
+	MLX5_SET(create_flow_group_in, in, table_id, table_id);
+
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	if (err)
+		goto out;
+
+	*group_id = MLX5_GET(create_flow_group_out, out, group_id);
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+				  u32 table_type,
+				  u32 table_id,
+				  u32 group_id)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {};
+
+	MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+	MLX5_SET(destroy_flow_group_in, in, table_type, table_type);
+	MLX5_SET(destroy_flow_group_in, in, table_id, table_id);
+	MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+				 u32 table_type,
+				 u64 icm_addr_rx,
+				 u64 icm_addr_tx,
+				 u8 level,
+				 bool sw_owner,
+				 bool term_tbl,
+				 u64 *fdb_rx_icm_addr,
+				 u32 *table_id)
+{
+	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
+	void *ft_mdev;
+	int err;
+
+	MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
+	MLX5_SET(create_flow_table_in, in, table_type, table_type);
+
+	ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
+	MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl);
+	MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner);
+	MLX5_SET(flow_table_context, ft_mdev, level, level);
+
+	if (sw_owner) {
+		/* icm_addr_0 used for FDB RX / NIC TX / NIC_RX
+		 * icm_addr_1 used for FDB TX
+		 */
+		if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
+			MLX5_SET64(flow_table_context, ft_mdev,
+				   sw_owner_icm_root_0, icm_addr_rx);
+		} else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
+			MLX5_SET64(flow_table_context, ft_mdev,
+				   sw_owner_icm_root_0, icm_addr_tx);
+		} else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
+			MLX5_SET64(flow_table_context, ft_mdev,
+				   sw_owner_icm_root_0, icm_addr_rx);
+			MLX5_SET64(flow_table_context, ft_mdev,
+				   sw_owner_icm_root_1, icm_addr_tx);
+		}
+	}
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*table_id = MLX5_GET(create_flow_table_out, out, table_id);
+	if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB)
+		*fdb_rx_icm_addr =
+		(u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) |
+		(u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 |
+		(u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40;
+
+	return 0;
+}
+
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+				  u32 table_id,
+				  u32 table_type)
+{
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
+
+	MLX5_SET(destroy_flow_table_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+	MLX5_SET(destroy_flow_table_in, in, table_type, table_type);
+	MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
+
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+				   enum mlx5_reformat_ctx_type rt,
+				   size_t reformat_size,
+				   void *reformat_data,
+				   u32 *reformat_id)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
+	size_t inlen, cmd_data_sz, cmd_total_sz;
+	void *prctx;
+	void *pdata;
+	void *in;
+	int err;
+
+	cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in);
+	cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in,
+					packet_reformat_context.reformat_data);
+	inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+
+	prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context);
+	pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
+
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt);
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size);
+	memcpy(pdata, reformat_data, reformat_size);
+
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	if (err)
+		return err;
+
+	*reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+	kvfree(in);
+
+	return err;
+}
+
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+				     u32 reformat_id)
+{
+	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
+
+	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+		 reformat_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+			 u16 index, struct mlx5dr_cmd_gid_attr *attr)
+{
+	u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {};
+	int err;
+
+	MLX5_SET(query_roce_address_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ROCE_ADDRESS);
+
+	MLX5_SET(query_roce_address_in, in, roce_address_index, index);
+	MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	memcpy(&attr->gid,
+	       MLX5_ADDR_OF(query_roce_address_out,
+			    out, roce_address.source_l3_address),
+	       sizeof(attr->gid));
+	memcpy(attr->mac,
+	       MLX5_ADDR_OF(query_roce_address_out, out,
+			    roce_address.source_mac_47_32),
+	       sizeof(attr->mac));
+
+	if (MLX5_GET(query_roce_address_out, out,
+		     roce_address.roce_version) == MLX5_ROCE_VERSION_2)
+		attr->roce_ver = MLX5_ROCE_VERSION_2;
+	else
+		attr->roce_ver = MLX5_ROCE_VERSION_1;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
new file mode 100644
index 0000000..9e2eccb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c

@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved.
+ * Slicing-by-16 contributed by Bulat Ziganshin
+ *
+ * This software is provided 'as-is', without any express or implied warranty.
+ * In no event will the author be held liable for any damages arising from the
+ * of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software.
+ * 2. If you use this software in a product, an acknowledgment in the product
+ *    documentation would be appreciated but is not required.
+ * 3. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ *
+ * Taken from http://create.stephan-brumme.com/crc32/ and adapted.
+ */
+
+#include "dr_types.h"
+
+#define DR_STE_CRC_POLY 0xEDB88320L
+
+static u32 dr_ste_crc_tab32[8][256];
+
+static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j)
+{
+	tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff];
+}
+
+void mlx5dr_crc32_init_table(void)
+{
+	u32 crc, i, j;
+
+	for (i = 0; i < 256; i++) {
+		crc = i;
+		for (j = 0; j < 8; j++) {
+			if (crc & 0x00000001L)
+				crc = (crc >> 1) ^ DR_STE_CRC_POLY;
+			else
+				crc = crc >> 1;
+		}
+		dr_ste_crc_tab32[0][i] = crc;
+	}
+
+	/* Init CRC lookup tables according to crc_slice_8 algorithm */
+	for (i = 0; i < 256; i++) {
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i);
+		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i);
+	}
+}
+
+/* Compute CRC32 (Slicing-by-8 algorithm) */
+u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length)
+{
+	const u32 *curr = (const u32 *)input_data;
+	const u8 *curr_char;
+	u32 crc = 0, one, two;
+
+	if (!input_data)
+		return 0;
+
+	/* Process eight bytes at once (Slicing-by-8) */
+	while (length >= 8) {
+		one = *curr++ ^ crc;
+		two = *curr++;
+
+		crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff]
+			^ dr_ste_crc_tab32[1][(two >> 16) & 0xff]
+			^ dr_ste_crc_tab32[2][(two >> 8) & 0xff]
+			^ dr_ste_crc_tab32[3][two & 0xff]
+			^ dr_ste_crc_tab32[4][(one >> 24) & 0xff]
+			^ dr_ste_crc_tab32[5][(one >> 16) & 0xff]
+			^ dr_ste_crc_tab32[6][(one >> 8) & 0xff]
+			^ dr_ste_crc_tab32[7][one & 0xff];
+
+		length -= 8;
+	}
+
+	curr_char = (const u8 *)curr;
+	/* Remaining 1 to 7 bytes (standard algorithm) */
+	while (length-- != 0)
+		crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff)
+			^ *curr_char++];
+
+	return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
+		((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
new file mode 100644
index 0000000..5b24732
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c

@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/mlx5/eswitch.h>
+#include "dr_types.h"
+
+static int dr_domain_init_cache(struct mlx5dr_domain *dmn)
+{
+	/* Per vport cached FW FT for checksum recalculation, this
+	 * recalculation is needed due to a HW bug.
+	 */
+	dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports,
+					  sizeof(dmn->cache.recalc_cs_ft[0]),
+					  GFP_KERNEL);
+	if (!dmn->cache.recalc_cs_ft)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn)
+{
+	int i;
+
+	for (i = 0; i < dmn->info.caps.num_vports; i++) {
+		if (!dmn->cache.recalc_cs_ft[i])
+			continue;
+
+		mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]);
+	}
+
+	kfree(dmn->cache.recalc_cs_ft);
+}
+
+int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+					      u32 vport_num,
+					      u64 *rx_icm_addr)
+{
+	struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+
+	recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num];
+	if (!recalc_cs_ft) {
+		/* Table not in cache, need to allocate a new one */
+		recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num);
+		if (!recalc_cs_ft)
+			return -EINVAL;
+
+		dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft;
+	}
+
+	*rx_icm_addr = recalc_cs_ft->rx_icm_addr;
+
+	return 0;
+}
+
+static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
+{
+	int ret;
+
+	ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Couldn't allocate PD\n");
+		return ret;
+	}
+
+	dmn->uar = mlx5_get_uars_page(dmn->mdev);
+	if (!dmn->uar) {
+		mlx5dr_err(dmn, "Couldn't allocate UAR\n");
+		ret = -ENOMEM;
+		goto clean_pd;
+	}
+
+	dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE);
+	if (!dmn->ste_icm_pool) {
+		mlx5dr_err(dmn, "Couldn't get icm memory\n");
+		ret = -ENOMEM;
+		goto clean_uar;
+	}
+
+	dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION);
+	if (!dmn->action_icm_pool) {
+		mlx5dr_err(dmn, "Couldn't get action icm memory\n");
+		ret = -ENOMEM;
+		goto free_ste_icm_pool;
+	}
+
+	ret = mlx5dr_send_ring_alloc(dmn);
+	if (ret) {
+		mlx5dr_err(dmn, "Couldn't create send-ring\n");
+		goto free_action_icm_pool;
+	}
+
+	return 0;
+
+free_action_icm_pool:
+	mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+free_ste_icm_pool:
+	mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+clean_uar:
+	mlx5_put_uars_page(dmn->mdev, dmn->uar);
+clean_pd:
+	mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+
+	return ret;
+}
+
+static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
+{
+	mlx5dr_send_ring_free(dmn, dmn->send_ring);
+	mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+	mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+	mlx5_put_uars_page(dmn->mdev, dmn->uar);
+	mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+}
+
+static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
+				 bool other_vport,
+				 u16 vport_number)
+{
+	struct mlx5dr_cmd_vport_cap *vport_caps;
+	int ret;
+
+	vport_caps = &dmn->info.caps.vports_caps[vport_number];
+
+	ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
+						 other_vport,
+						 vport_number,
+						 &vport_caps->icm_address_rx,
+						 &vport_caps->icm_address_tx);
+	if (ret)
+		return ret;
+
+	ret = mlx5dr_cmd_query_gvmi(dmn->mdev,
+				    other_vport,
+				    vport_number,
+				    &vport_caps->vport_gvmi);
+	if (ret)
+		return ret;
+
+	vport_caps->num = vport_number;
+	vport_caps->vhca_gvmi = dmn->info.caps.gvmi;
+
+	return 0;
+}
+
+static int dr_domain_query_vports(struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps;
+	struct mlx5dr_cmd_vport_cap *wire_vport;
+	int vport;
+	int ret;
+
+	/* Query vports (except wire vport) */
+	for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) {
+		ret = dr_domain_query_vport(dmn, !!vport, vport);
+		if (ret)
+			return ret;
+	}
+
+	/* Last vport is the wire port */
+	wire_vport = &dmn->info.caps.vports_caps[vport];
+	wire_vport->num = WIRE_PORT;
+	wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx;
+	wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx;
+	wire_vport->vport_gvmi = 0;
+	wire_vport->vhca_gvmi = dmn->info.caps.gvmi;
+
+	return 0;
+}
+
+static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
+				    struct mlx5dr_domain *dmn)
+{
+	int ret;
+
+	if (!dmn->info.caps.eswitch_manager)
+		return -EOPNOTSUPP;
+
+	ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps);
+	if (ret)
+		return ret;
+
+	dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner;
+	dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx;
+	dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx;
+
+	dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports,
+					     sizeof(dmn->info.caps.vports_caps[0]),
+					     GFP_KERNEL);
+	if (!dmn->info.caps.vports_caps)
+		return -ENOMEM;
+
+	ret = dr_domain_query_vports(dmn);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Failed to query vports caps\n");
+		goto free_vports_caps;
+	}
+
+	dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1;
+
+	return 0;
+
+free_vports_caps:
+	kfree(dmn->info.caps.vports_caps);
+	dmn->info.caps.vports_caps = NULL;
+	return ret;
+}
+
+static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
+			       struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_cmd_vport_cap *vport_cap;
+	int ret;
+
+	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
+		mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n");
+		return -EOPNOTSUPP;
+	}
+
+	dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev);
+
+	ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps);
+	if (ret)
+		return ret;
+
+	ret = dr_domain_query_fdb_caps(mdev, dmn);
+	if (ret)
+		return ret;
+
+	switch (dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		if (!dmn->info.caps.rx_sw_owner)
+			return -ENOTSUPP;
+
+		dmn->info.supp_sw_steering = true;
+		dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+		dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
+		dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		if (!dmn->info.caps.tx_sw_owner)
+			return -ENOTSUPP;
+
+		dmn->info.supp_sw_steering = true;
+		dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+		dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
+		dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		if (!dmn->info.caps.eswitch_manager)
+			return -ENOTSUPP;
+
+		if (!dmn->info.caps.fdb_sw_owner)
+			return -ENOTSUPP;
+
+		dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+		dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+		vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0);
+		if (!vport_cap) {
+			mlx5dr_dbg(dmn, "Failed to get esw manager vport\n");
+			return -ENOENT;
+		}
+
+		dmn->info.supp_sw_steering = true;
+		dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx;
+		dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx;
+		dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address;
+		dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address;
+		break;
+	default:
+		mlx5dr_dbg(dmn, "Invalid domain\n");
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn)
+{
+	kfree(dmn->info.caps.vports_caps);
+}
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
+{
+	struct mlx5dr_domain *dmn;
+	int ret;
+
+	if (type > MLX5DR_DOMAIN_TYPE_FDB)
+		return NULL;
+
+	dmn = kzalloc(sizeof(*dmn), GFP_KERNEL);
+	if (!dmn)
+		return NULL;
+
+	dmn->mdev = mdev;
+	dmn->type = type;
+	refcount_set(&dmn->refcount, 1);
+	mutex_init(&dmn->mutex);
+
+	if (dr_domain_caps_init(mdev, dmn)) {
+		mlx5dr_dbg(dmn, "Failed init domain, no caps\n");
+		goto free_domain;
+	}
+
+	dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
+	dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K,
+					    dmn->info.caps.log_icm_size);
+
+	if (!dmn->info.supp_sw_steering) {
+		mlx5dr_err(dmn, "SW steering is not supported\n");
+		goto uninit_caps;
+	}
+
+	/* Allocate resources */
+	ret = dr_domain_init_resources(dmn);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed init domain resources\n");
+		goto uninit_caps;
+	}
+
+	ret = dr_domain_init_cache(dmn);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed initialize domain cache\n");
+		goto uninit_resourses;
+	}
+
+	/* Init CRC table for htbl CRC calculation */
+	mlx5dr_crc32_init_table();
+
+	return dmn;
+
+uninit_resourses:
+	dr_domain_uninit_resources(dmn);
+uninit_caps:
+	dr_domain_caps_uninit(dmn);
+free_domain:
+	kfree(dmn);
+	return NULL;
+}
+
+/* Assure synchronization of the device steering tables with updates made by SW
+ * insertion.
+ */
+int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
+{
+	int ret = 0;
+
+	if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
+		mutex_lock(&dmn->mutex);
+		ret = mlx5dr_send_ring_force_drain(dmn);
+		mutex_unlock(&dmn->mutex);
+		if (ret)
+			return ret;
+	}
+
+	if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
+		ret = mlx5dr_cmd_sync_steering(dmn->mdev);
+
+	return ret;
+}
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
+{
+	if (refcount_read(&dmn->refcount) > 1)
+		return -EBUSY;
+
+	/* make sure resources are not used by the hardware */
+	mlx5dr_cmd_sync_steering(dmn->mdev);
+	dr_domain_uninit_cache(dmn);
+	dr_domain_uninit_resources(dmn);
+	dr_domain_caps_uninit(dmn);
+	mutex_destroy(&dmn->mutex);
+	kfree(dmn);
+	return 0;
+}
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+			    struct mlx5dr_domain *peer_dmn)
+{
+	mutex_lock(&dmn->mutex);
+
+	if (dmn->peer_dmn)
+		refcount_dec(&dmn->peer_dmn->refcount);
+
+	dmn->peer_dmn = peer_dmn;
+
+	if (dmn->peer_dmn)
+		refcount_inc(&dmn->peer_dmn->refcount);
+
+	mutex_unlock(&dmn->mutex);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
new file mode 100644
index 0000000..60ef6e6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c

@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include "dr_types.h"
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
+{
+	struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+	u32 table_id, group_id, modify_hdr_id;
+	u64 rx_icm_addr, modify_ttl_action;
+	int ret;
+
+	recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL);
+	if (!recalc_cs_ft)
+		return NULL;
+
+	ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
+					   0, 0, dmn->info.caps.max_ft_level - 1,
+					   false, true, &rx_icm_addr, &table_id);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret);
+		goto free_ttl_tbl;
+	}
+
+	ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+						 MLX5_FLOW_TABLE_TYPE_FDB,
+						 table_id, &group_id);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret);
+		goto destroy_flow_table;
+	}
+
+	/* Modify TTL action by adding zero to trigger CS recalculation */
+	modify_ttl_action = 0;
+	MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD);
+	MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL);
+
+	ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1,
+					     &modify_ttl_action,
+					     &modify_hdr_id);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret);
+		goto destroy_flow_group;
+	}
+
+	ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev,
+						  MLX5_FLOW_TABLE_TYPE_FDB,
+						  table_id, group_id, modify_hdr_id,
+						  vport_num);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret);
+		goto dealloc_modify_header;
+	}
+
+	recalc_cs_ft->modify_hdr_id = modify_hdr_id;
+	recalc_cs_ft->rx_icm_addr = rx_icm_addr;
+	recalc_cs_ft->table_id = table_id;
+	recalc_cs_ft->group_id = group_id;
+
+	return recalc_cs_ft;
+
+dealloc_modify_header:
+	mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id);
+destroy_flow_group:
+	mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+				      MLX5_FLOW_TABLE_TYPE_FDB,
+				      table_id, group_id);
+destroy_flow_table:
+	mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB);
+free_ttl_tbl:
+	kfree(recalc_cs_ft);
+	return NULL;
+}
+
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+				    struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft)
+{
+	mlx5dr_cmd_del_flow_table_entry(dmn->mdev,
+					MLX5_FLOW_TABLE_TYPE_FDB,
+					recalc_cs_ft->table_id);
+	mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id);
+	mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+				      MLX5_FLOW_TABLE_TYPE_FDB,
+				      recalc_cs_ft->table_id,
+				      recalc_cs_ft->group_id);
+	mlx5dr_cmd_destroy_flow_table(dmn->mdev,
+				      recalc_cs_ft->table_id,
+				      MLX5_FLOW_TABLE_TYPE_FDB);
+
+	kfree(recalc_cs_ft);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
new file mode 100644
index 0000000..d7c7467
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c

@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
+#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024)
+
+struct mlx5dr_icm_pool;
+
+struct mlx5dr_icm_bucket {
+	struct mlx5dr_icm_pool *pool;
+
+	/* Chunks that aren't visible to HW not directly and not in cache */
+	struct list_head free_list;
+	unsigned int free_list_count;
+
+	/* Used chunks, HW may be accessing this memory */
+	struct list_head used_list;
+	unsigned int used_list_count;
+
+	/* HW may be accessing this memory but at some future,
+	 * undetermined time, it might cease to do so. Before deciding to call
+	 * sync_ste, this list is moved to sync_list
+	 */
+	struct list_head hot_list;
+	unsigned int hot_list_count;
+
+	/* Pending sync list, entries from the hot list are moved to this list.
+	 * sync_ste is executed and then sync_list is concatenated to the free list
+	 */
+	struct list_head sync_list;
+	unsigned int sync_list_count;
+
+	u32 total_chunks;
+	u32 num_of_entries;
+	u32 entry_size;
+	/* protect the ICM bucket */
+	struct mutex mutex;
+};
+
+struct mlx5dr_icm_pool {
+	struct mlx5dr_icm_bucket *buckets;
+	enum mlx5dr_icm_type icm_type;
+	enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+	enum mlx5dr_icm_chunk_size num_of_buckets;
+	struct list_head icm_mr_list;
+	/* protect the ICM MR list */
+	struct mutex mr_mutex;
+	struct mlx5dr_domain *dmn;
+};
+
+struct mlx5dr_icm_dm {
+	u32 obj_id;
+	enum mlx5_sw_icm_type type;
+	phys_addr_t addr;
+	size_t length;
+};
+
+struct mlx5dr_icm_mr {
+	struct mlx5dr_icm_pool *pool;
+	struct mlx5_core_mkey mkey;
+	struct mlx5dr_icm_dm dm;
+	size_t used_length;
+	size_t length;
+	u64 icm_start_addr;
+	struct list_head mr_list;
+};
+
+static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
+				 u32 pd, u64 length, u64 start_addr, int mode,
+				 struct mlx5_core_mkey *mkey)
+{
+	u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+	void *mkc;
+
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+	MLX5_SET(mkc, mkc, access_mode_1_0, mode);
+	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+	if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) {
+		MLX5_SET(mkc, mkc, rw, 1);
+		MLX5_SET(mkc, mkc, rr, 1);
+	}
+
+	MLX5_SET64(mkc, mkc, len, length);
+	MLX5_SET(mkc, mkc, pd, pd);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET64(mkc, mkc, start_addr, start_addr);
+
+	return mlx5_core_create_mkey(mdev, mkey, in, inlen);
+}
+
+static struct mlx5dr_icm_mr *
+dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
+		      enum mlx5_sw_icm_type type,
+		      size_t align_base)
+{
+	struct mlx5_core_dev *mdev = pool->dmn->mdev;
+	struct mlx5dr_icm_mr *icm_mr;
+	size_t align_diff;
+	int err;
+
+	icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
+	if (!icm_mr)
+		return NULL;
+
+	icm_mr->pool = pool;
+	INIT_LIST_HEAD(&icm_mr->mr_list);
+
+	icm_mr->dm.type = type;
+
+	/* 2^log_biggest_table * entry-size * double-for-alignment */
+	icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+							       pool->icm_type) * 2;
+
+	err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
+				   &icm_mr->dm.addr, &icm_mr->dm.obj_id);
+	if (err) {
+		mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
+		goto free_icm_mr;
+	}
+
+	/* Register device memory */
+	err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn,
+				    icm_mr->dm.length,
+				    icm_mr->dm.addr,
+				    MLX5_MKC_ACCESS_MODE_SW_ICM,
+				    &icm_mr->mkey);
+	if (err) {
+		mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err);
+		goto free_dm;
+	}
+
+	icm_mr->icm_start_addr = icm_mr->dm.addr;
+
+	/* align_base is always a power of 2 */
+	align_diff = icm_mr->icm_start_addr & (align_base - 1);
+	if (align_diff)
+		icm_mr->used_length = align_base - align_diff;
+
+	list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list);
+
+	return icm_mr;
+
+free_dm:
+	mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
+			       icm_mr->dm.addr, icm_mr->dm.obj_id);
+free_icm_mr:
+	kvfree(icm_mr);
+	return NULL;
+}
+
+static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
+{
+	struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev;
+	struct mlx5dr_icm_dm *dm = &icm_mr->dm;
+
+	list_del(&icm_mr->mr_list);
+	mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+	mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
+			       dm->addr, dm->obj_id);
+	kvfree(icm_mr);
+}
+
+static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+{
+	struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+	chunk->ste_arr = kvzalloc(bucket->num_of_entries *
+				  sizeof(chunk->ste_arr[0]), GFP_KERNEL);
+	if (!chunk->ste_arr)
+		return -ENOMEM;
+
+	chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries *
+				     DR_STE_SIZE_REDUCED, GFP_KERNEL);
+	if (!chunk->hw_ste_arr)
+		goto out_free_ste_arr;
+
+	chunk->miss_list = kvmalloc(bucket->num_of_entries *
+				    sizeof(chunk->miss_list[0]), GFP_KERNEL);
+	if (!chunk->miss_list)
+		goto out_free_hw_ste_arr;
+
+	return 0;
+
+out_free_hw_ste_arr:
+	kvfree(chunk->hw_ste_arr);
+out_free_ste_arr:
+	kvfree(chunk->ste_arr);
+	return -ENOMEM;
+}
+
+static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket)
+{
+	size_t mr_free_size, mr_req_size, mr_row_size;
+	struct mlx5dr_icm_pool *pool = bucket->pool;
+	struct mlx5dr_icm_mr *icm_mr = NULL;
+	struct mlx5dr_icm_chunk *chunk;
+	enum mlx5_sw_icm_type dm_type;
+	size_t align_base;
+	int i, err = 0;
+
+	mr_req_size = bucket->num_of_entries * bucket->entry_size;
+	mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+							 pool->icm_type);
+
+	if (pool->icm_type == DR_ICM_TYPE_STE) {
+		dm_type = MLX5_SW_ICM_TYPE_STEERING;
+		/* Align base is the biggest chunk size / row size */
+		align_base = mr_row_size;
+	} else {
+		dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+		/* Align base is 64B */
+		align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE;
+	}
+
+	mutex_lock(&pool->mr_mutex);
+	if (!list_empty(&pool->icm_mr_list)) {
+		icm_mr = list_last_entry(&pool->icm_mr_list,
+					 struct mlx5dr_icm_mr, mr_list);
+
+		if (icm_mr)
+			mr_free_size = icm_mr->dm.length - icm_mr->used_length;
+	}
+
+	if (!icm_mr || mr_free_size < mr_row_size) {
+		icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base);
+		if (!icm_mr) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+	}
+
+	/* Create memory aligned chunks */
+	for (i = 0; i < mr_row_size / mr_req_size; i++) {
+		chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
+		if (!chunk) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+
+		chunk->bucket = bucket;
+		chunk->rkey = icm_mr->mkey.key;
+		/* mr start addr is zero based */
+		chunk->mr_addr = icm_mr->used_length;
+		chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length;
+		icm_mr->used_length += mr_req_size;
+		chunk->num_of_entries = bucket->num_of_entries;
+		chunk->byte_size = chunk->num_of_entries * bucket->entry_size;
+
+		if (pool->icm_type == DR_ICM_TYPE_STE) {
+			err = dr_icm_chunk_ste_init(chunk);
+			if (err)
+				goto out_free_chunk;
+		}
+
+		INIT_LIST_HEAD(&chunk->chunk_list);
+		list_add(&chunk->chunk_list, &bucket->free_list);
+		bucket->free_list_count++;
+		bucket->total_chunks++;
+	}
+	mutex_unlock(&pool->mr_mutex);
+	return 0;
+
+out_free_chunk:
+	kvfree(chunk);
+out_err:
+	mutex_unlock(&pool->mr_mutex);
+	return err;
+}
+
+static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+{
+	kvfree(chunk->miss_list);
+	kvfree(chunk->hw_ste_arr);
+	kvfree(chunk->ste_arr);
+}
+
+static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk)
+{
+	struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+	list_del(&chunk->chunk_list);
+	bucket->total_chunks--;
+
+	if (bucket->pool->icm_type == DR_ICM_TYPE_STE)
+		dr_icm_chunk_ste_cleanup(chunk);
+
+	kvfree(chunk);
+}
+
+static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool,
+			       struct mlx5dr_icm_bucket *bucket,
+			       enum mlx5dr_icm_chunk_size chunk_size)
+{
+	if (pool->icm_type == DR_ICM_TYPE_STE)
+		bucket->entry_size = DR_STE_SIZE;
+	else
+		bucket->entry_size = DR_MODIFY_ACTION_SIZE;
+
+	bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
+	bucket->pool = pool;
+	mutex_init(&bucket->mutex);
+	INIT_LIST_HEAD(&bucket->free_list);
+	INIT_LIST_HEAD(&bucket->used_list);
+	INIT_LIST_HEAD(&bucket->hot_list);
+	INIT_LIST_HEAD(&bucket->sync_list);
+}
+
+static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket)
+{
+	struct mlx5dr_icm_chunk *chunk, *next;
+
+	mutex_destroy(&bucket->mutex);
+	list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
+	list_splice_tail_init(&bucket->hot_list, &bucket->free_list);
+
+	list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list)
+		dr_icm_chunk_destroy(chunk);
+
+	WARN_ON(bucket->total_chunks != 0);
+
+	/* Cleanup of unreturned chunks */
+	list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list)
+		dr_icm_chunk_destroy(chunk);
+}
+
+static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool)
+{
+	u64 hot_size = 0;
+	int chunk_order;
+
+	for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++)
+		hot_size += pool->buckets[chunk_order].hot_list_count *
+			    mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type);
+
+	return hot_size;
+}
+
+static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool,
+				     struct mlx5dr_icm_bucket *bucket)
+{
+	u64 bytes_for_sync;
+
+	bytes_for_sync = dr_icm_hot_mem_size(pool);
+	if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count)
+		return false;
+
+	return true;
+}
+
+static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket)
+{
+	list_splice_tail_init(&bucket->hot_list, &bucket->sync_list);
+	bucket->sync_list_count += bucket->hot_list_count;
+	bucket->hot_list_count = 0;
+}
+
+static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket)
+{
+	list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
+	bucket->free_list_count += bucket->sync_list_count;
+	bucket->sync_list_count = 0;
+}
+
+static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket)
+{
+	list_splice_tail_init(&bucket->sync_list, &bucket->hot_list);
+	bucket->hot_list_count += bucket->sync_list_count;
+	bucket->sync_list_count = 0;
+}
+
+static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool,
+				       struct mlx5dr_icm_bucket *cb,
+				       bool buckets[DR_CHUNK_SIZE_MAX])
+{
+	struct mlx5dr_icm_bucket *bucket;
+	int i;
+
+	for (i = 0; i < pool->num_of_buckets; i++) {
+		bucket = &pool->buckets[i];
+		if (bucket == cb) {
+			dr_icm_chill_bucket_start(bucket);
+			continue;
+		}
+
+		/* Freeing the mutex is done at the end of that process, after
+		 * sync_ste was executed at dr_icm_chill_buckets_end func.
+		 */
+		if (mutex_trylock(&bucket->mutex)) {
+			dr_icm_chill_bucket_start(bucket);
+			buckets[i] = true;
+		}
+	}
+}
+
+static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool,
+				     struct mlx5dr_icm_bucket *cb,
+				     bool buckets[DR_CHUNK_SIZE_MAX])
+{
+	struct mlx5dr_icm_bucket *bucket;
+	int i;
+
+	for (i = 0; i < pool->num_of_buckets; i++) {
+		bucket = &pool->buckets[i];
+		if (bucket == cb) {
+			dr_icm_chill_bucket_end(bucket);
+			continue;
+		}
+
+		if (!buckets[i])
+			continue;
+
+		dr_icm_chill_bucket_end(bucket);
+		mutex_unlock(&bucket->mutex);
+	}
+}
+
+static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool,
+				       struct mlx5dr_icm_bucket *cb,
+				       bool buckets[DR_CHUNK_SIZE_MAX])
+{
+	struct mlx5dr_icm_bucket *bucket;
+	int i;
+
+	for (i = 0; i < pool->num_of_buckets; i++) {
+		bucket = &pool->buckets[i];
+		if (bucket == cb) {
+			dr_icm_chill_bucket_abort(bucket);
+			continue;
+		}
+
+		if (!buckets[i])
+			continue;
+
+		dr_icm_chill_bucket_abort(bucket);
+		mutex_unlock(&bucket->mutex);
+	}
+}
+
+/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
+ * also memory used for HW STE management for optimizations.
+ */
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+		       enum mlx5dr_icm_chunk_size chunk_size)
+{
+	struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */
+	bool buckets[DR_CHUNK_SIZE_MAX] = {};
+	struct mlx5dr_icm_bucket *bucket;
+	int err;
+
+	if (chunk_size > pool->max_log_chunk_sz)
+		return NULL;
+
+	bucket = &pool->buckets[chunk_size];
+
+	mutex_lock(&bucket->mutex);
+
+	/* Take chunk from pool if available, otherwise allocate new chunks */
+	if (list_empty(&bucket->free_list)) {
+		if (dr_icm_reuse_hot_entries(pool, bucket)) {
+			dr_icm_chill_buckets_start(pool, bucket, buckets);
+			err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
+			if (err) {
+				dr_icm_chill_buckets_abort(pool, bucket, buckets);
+				mlx5dr_dbg(pool->dmn, "Sync_steering failed\n");
+				chunk = NULL;
+				goto out;
+			}
+			dr_icm_chill_buckets_end(pool, bucket, buckets);
+		} else {
+			dr_icm_chunks_create(bucket);
+		}
+	}
+
+	if (!list_empty(&bucket->free_list)) {
+		chunk = list_last_entry(&bucket->free_list,
+					struct mlx5dr_icm_chunk,
+					chunk_list);
+		if (chunk) {
+			list_del_init(&chunk->chunk_list);
+			list_add_tail(&chunk->chunk_list, &bucket->used_list);
+			bucket->free_list_count--;
+			bucket->used_list_count++;
+		}
+	}
+out:
+	mutex_unlock(&bucket->mutex);
+	return chunk;
+}
+
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
+{
+	struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+	if (bucket->pool->icm_type == DR_ICM_TYPE_STE) {
+		memset(chunk->ste_arr, 0,
+		       bucket->num_of_entries * sizeof(chunk->ste_arr[0]));
+		memset(chunk->hw_ste_arr, 0,
+		       bucket->num_of_entries * DR_STE_SIZE_REDUCED);
+	}
+
+	mutex_lock(&bucket->mutex);
+	list_del_init(&chunk->chunk_list);
+	list_add_tail(&chunk->chunk_list, &bucket->hot_list);
+	bucket->hot_list_count++;
+	bucket->used_list_count--;
+	mutex_unlock(&bucket->mutex);
+}
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+					       enum mlx5dr_icm_type icm_type)
+{
+	enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+	struct mlx5dr_icm_pool *pool;
+	int i;
+
+	if (icm_type == DR_ICM_TYPE_STE)
+		max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
+	else
+		max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+
+	pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+
+	pool->buckets = kcalloc(max_log_chunk_sz + 1,
+				sizeof(pool->buckets[0]),
+				GFP_KERNEL);
+	if (!pool->buckets)
+		goto free_pool;
+
+	pool->dmn = dmn;
+	pool->icm_type = icm_type;
+	pool->max_log_chunk_sz = max_log_chunk_sz;
+	pool->num_of_buckets = max_log_chunk_sz + 1;
+	INIT_LIST_HEAD(&pool->icm_mr_list);
+
+	for (i = 0; i < pool->num_of_buckets; i++)
+		dr_icm_bucket_init(pool, &pool->buckets[i], i);
+
+	mutex_init(&pool->mr_mutex);
+
+	return pool;
+
+free_pool:
+	kvfree(pool);
+	return NULL;
+}
+
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
+{
+	struct mlx5dr_icm_mr *icm_mr, *next;
+	int i;
+
+	mutex_destroy(&pool->mr_mutex);
+
+	list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list)
+		dr_icm_pool_mr_destroy(icm_mr);
+
+	for (i = 0; i < pool->num_of_buckets; i++)
+		dr_icm_bucket_cleanup(&pool->buckets[i]);
+
+	kfree(pool->buckets);
+	kvfree(pool);
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
new file mode 100644
index 0000000..67dea76
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c

@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->smac_47_16 || spec->smac_15_0);
+}
+
+static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->dmac_47_16 || spec->dmac_15_0);
+}
+
+static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
+		spec->src_ip_63_32 || spec->src_ip_31_0);
+}
+
+static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
+		spec->dst_ip_63_32 || spec->dst_ip_31_0);
+}
+
+static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
+		spec->ip_ecn || spec->ip_dscp);
+}
+
+static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->tcp_sport || spec->tcp_dport ||
+		spec->udp_sport || spec->udp_dport);
+}
+
+static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec)
+{
+	return (spec->dst_ip_31_0 || spec->src_ip_31_0);
+}
+
+static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec)
+{
+	return (dr_mask_is_l3_base_set(spec) ||
+		dr_mask_is_tcp_udp_base_set(spec) ||
+		dr_mask_is_ipv4_set(spec));
+}
+
+static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc)
+{
+	return misc->vxlan_vni;
+}
+
+static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
+{
+	return spec->ttl_hoplimit;
+}
+
+#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \
+	(_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \
+	(_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \
+	(_spec).ethertype || (_spec).ip_version || \
+	(_misc)._inner_outer##_second_vid || \
+	(_misc)._inner_outer##_second_cfi || \
+	(_misc)._inner_outer##_second_prio || \
+	(_misc)._inner_outer##_second_cvlan_tag || \
+	(_misc)._inner_outer##_second_svlan_tag)
+
+#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \
+	dr_mask_is_l3_base_set(&(_spec)) || \
+	dr_mask_is_tcp_udp_base_set(&(_spec)) || \
+	dr_mask_is_ttl_set(&(_spec)) || \
+	(_misc)._inner_outer##_ipv6_flow_label)
+
+#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \
+	(_misc3)._inner_outer##_tcp_seq_num || \
+	(_misc3)._inner_outer##_tcp_ack_num)
+
+#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \
+	(_misc2)._inner_outer##_first_mpls_label || \
+	(_misc2)._inner_outer##_first_mpls_exp || \
+	(_misc2)._inner_outer##_first_mpls_s_bos || \
+	(_misc2)._inner_outer##_first_mpls_ttl)
+
+static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc)
+{
+	return (misc->gre_key_h || misc->gre_key_l ||
+		misc->gre_protocol || misc->gre_c_present ||
+		misc->gre_k_present || misc->gre_s_present);
+}
+
+#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \
+	(_misc2).outer_first_mpls_over_##gre_udp##_label || \
+	(_misc2).outer_first_mpls_over_##gre_udp##_exp || \
+	(_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \
+	(_misc2).outer_first_mpls_over_##gre_udp##_ttl)
+
+#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \
+	DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \
+	DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp))
+
+static bool dr_mask_is_flex_parser_tnl_set(struct mlx5dr_match_misc3 *misc3)
+{
+	return (misc3->outer_vxlan_gpe_vni ||
+		misc3->outer_vxlan_gpe_next_protocol ||
+		misc3->outer_vxlan_gpe_flags);
+}
+
+static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3)
+{
+	return (misc3->icmpv6_type || misc3->icmpv6_code ||
+		misc3->icmpv6_header_data);
+}
+
+static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2)
+{
+	return misc2->metadata_reg_a;
+}
+
+static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2)
+{
+	return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 ||
+		misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3);
+}
+
+static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2)
+{
+	return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 ||
+		misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7);
+}
+
+static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc)
+{
+	return (misc->source_sqn || misc->source_port);
+}
+
+static bool
+dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn)
+{
+	return dmn->info.caps.flex_protocols &
+	       MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED;
+}
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+				   struct mlx5dr_matcher_rx_tx *nic_matcher,
+				   bool ipv6)
+{
+	if (ipv6) {
+		nic_matcher->ste_builder = nic_matcher->ste_builder6;
+		nic_matcher->num_of_builders = nic_matcher->num_of_builders6;
+	} else {
+		nic_matcher->ste_builder = nic_matcher->ste_builder4;
+		nic_matcher->num_of_builders = nic_matcher->num_of_builders4;
+	}
+
+	if (!nic_matcher->num_of_builders) {
+		mlx5dr_dbg(matcher->tbl->dmn,
+			   "Rule not supported on this matcher due to IP related fields\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
+				       struct mlx5dr_matcher_rx_tx *nic_matcher,
+				       bool ipv6)
+{
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_match_param mask = {};
+	struct mlx5dr_match_misc3 *misc3;
+	struct mlx5dr_ste_build *sb;
+	u8 *num_of_builders;
+	bool inner, rx;
+	int idx = 0;
+	int ret, i;
+
+	if (ipv6) {
+		sb = nic_matcher->ste_builder6;
+		num_of_builders = &nic_matcher->num_of_builders6;
+	} else {
+		sb = nic_matcher->ste_builder4;
+		num_of_builders = &nic_matcher->num_of_builders4;
+	}
+
+	rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+
+	/* Create a temporary mask to track and clear used mask fields */
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
+		mask.outer = matcher->mask.outer;
+
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC)
+		mask.misc = matcher->mask.misc;
+
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER)
+		mask.inner = matcher->mask.inner;
+
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2)
+		mask.misc2 = matcher->mask.misc2;
+
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3)
+		mask.misc3 = matcher->mask.misc3;
+
+	ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+					 &matcher->mask, NULL);
+	if (ret)
+		return ret;
+
+	/* Outer */
+	if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
+				       DR_MATCHER_CRITERIA_MISC |
+				       DR_MATCHER_CRITERIA_MISC2 |
+				       DR_MATCHER_CRITERIA_MISC3)) {
+		inner = false;
+
+		if (dr_mask_is_wqe_metadata_set(&mask.misc2))
+			mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx);
+
+		if (dr_mask_is_reg_c_0_3_set(&mask.misc2))
+			mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx);
+
+		if (dr_mask_is_reg_c_4_7_set(&mask.misc2))
+			mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx);
+
+		if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
+		    (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+		     dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
+			ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
+							    dmn, inner, rx);
+			if (ret)
+				return ret;
+		}
+
+		if (dr_mask_is_smac_set(&mask.outer) &&
+		    dr_mask_is_dmac_set(&mask.outer)) {
+			ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
+							      inner, rx);
+			if (ret)
+				return ret;
+		}
+
+		if (dr_mask_is_smac_set(&mask.outer))
+			mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
+			mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+
+		if (ipv6) {
+			if (dr_mask_is_dst_addr_set(&mask.outer))
+				mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
+								 inner, rx);
+
+			if (dr_mask_is_src_addr_set(&mask.outer))
+				mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
+								 inner, rx);
+
+			if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer))
+				mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
+							    inner, rx);
+		} else {
+			if (dr_mask_is_ipv4_5_tuple_set(&mask.outer))
+				mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
+								     inner, rx);
+
+			if (dr_mask_is_ttl_set(&mask.outer))
+				mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
+								  inner, rx);
+		}
+
+		if (dr_mask_is_flex_parser_tnl_set(&mask.misc3) &&
+		    dr_matcher_supp_flex_parser_vxlan_gpe(dmn))
+			mlx5dr_ste_build_flex_parser_tnl(&sb[idx++], &mask,
+							 inner, rx);
+
+		if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
+			mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer))
+			mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
+			mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask,
+						       inner, rx);
+
+		misc3 = &mask.misc3;
+		if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) &&
+		     mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) ||
+		    (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) &&
+		     mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) {
+			ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++],
+							     &mask, &dmn->info.caps,
+							     inner, rx);
+			if (ret)
+				return ret;
+		}
+		if (dr_mask_is_gre_set(&mask.misc))
+			mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx);
+	}
+
+	/* Inner */
+	if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER |
+				       DR_MATCHER_CRITERIA_MISC |
+				       DR_MATCHER_CRITERIA_MISC2 |
+				       DR_MATCHER_CRITERIA_MISC3)) {
+		inner = true;
+
+		if (dr_mask_is_eth_l2_tnl_set(&mask.misc))
+			mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx);
+
+		if (dr_mask_is_smac_set(&mask.inner) &&
+		    dr_mask_is_dmac_set(&mask.inner)) {
+			ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
+							      &mask, inner, rx);
+			if (ret)
+				return ret;
+		}
+
+		if (dr_mask_is_smac_set(&mask.inner))
+			mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
+			mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+
+		if (ipv6) {
+			if (dr_mask_is_dst_addr_set(&mask.inner))
+				mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
+								 inner, rx);
+
+			if (dr_mask_is_src_addr_set(&mask.inner))
+				mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
+								 inner, rx);
+
+			if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner))
+				mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
+							    inner, rx);
+		} else {
+			if (dr_mask_is_ipv4_5_tuple_set(&mask.inner))
+				mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
+								     inner, rx);
+
+			if (dr_mask_is_ttl_set(&mask.inner))
+				mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
+								  inner, rx);
+		}
+
+		if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner))
+			mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner))
+			mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
+
+		if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
+			mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx);
+	}
+	/* Empty matcher, takes all */
+	if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+		mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
+
+	if (idx == 0) {
+		mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n");
+		return -EINVAL;
+	}
+
+	/* Check that all mask fields were consumed */
+	for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) {
+		if (((u8 *)&mask)[i] != 0) {
+			mlx5dr_info(dmn, "Mask contains unsupported parameters\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	*num_of_builders = idx;
+
+	return 0;
+}
+
+static int dr_matcher_connect(struct mlx5dr_domain *dmn,
+			      struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
+			      struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+			      struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+	struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl;
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+	struct mlx5dr_htbl_connect_info info;
+	struct mlx5dr_ste_htbl *prev_htbl;
+	int ret;
+
+	/* Connect end anchor hash table to next_htbl or to the default address */
+	if (next_nic_matcher) {
+		info.type = CONNECT_HIT;
+		info.hit_next_htbl = next_nic_matcher->s_htbl;
+	} else {
+		info.type = CONNECT_MISS;
+		info.miss_icm_addr = nic_tbl->default_icm_addr;
+	}
+	ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+						curr_nic_matcher->e_anchor,
+						&info, info.type == CONNECT_HIT);
+	if (ret)
+		return ret;
+
+	/* Connect start hash table to end anchor */
+	info.type = CONNECT_MISS;
+	info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr;
+	ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+						curr_nic_matcher->s_htbl,
+						&info, false);
+	if (ret)
+		return ret;
+
+	/* Connect previous hash table to matcher start hash table */
+	if (prev_nic_matcher)
+		prev_htbl = prev_nic_matcher->e_anchor;
+	else
+		prev_htbl = nic_tbl->s_anchor;
+
+	info.type = CONNECT_HIT;
+	info.hit_next_htbl = curr_nic_matcher->s_htbl;
+	ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl,
+						&info, true);
+	if (ret)
+		return ret;
+
+	/* Update the pointing ste and next hash table */
+	curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr;
+	prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
+
+	if (next_nic_matcher) {
+		next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr;
+		curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+	}
+
+	return 0;
+}
+
+static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+{
+	struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher;
+	struct mlx5dr_table *tbl = matcher->tbl;
+	struct mlx5dr_domain *dmn = tbl->dmn;
+	bool first = true;
+	int ret;
+
+	next_matcher = NULL;
+	if (!list_empty(&tbl->matcher_list))
+		list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) {
+			if (tmp_matcher->prio >= matcher->prio) {
+				next_matcher = tmp_matcher;
+				break;
+			}
+			first = false;
+		}
+
+	prev_matcher = NULL;
+	if (next_matcher && !first)
+		prev_matcher = list_prev_entry(next_matcher, matcher_list);
+	else if (!first)
+		prev_matcher = list_last_entry(&tbl->matcher_list,
+					       struct mlx5dr_matcher,
+					       matcher_list);
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+	    dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+		ret = dr_matcher_connect(dmn, &matcher->rx,
+					 next_matcher ? &next_matcher->rx : NULL,
+					 prev_matcher ?	&prev_matcher->rx : NULL);
+		if (ret)
+			return ret;
+	}
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+	    dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+		ret = dr_matcher_connect(dmn, &matcher->tx,
+					 next_matcher ? &next_matcher->tx : NULL,
+					 prev_matcher ?	&prev_matcher->tx : NULL);
+		if (ret)
+			return ret;
+	}
+
+	if (prev_matcher)
+		list_add(&matcher->matcher_list, &prev_matcher->matcher_list);
+	else if (next_matcher)
+		list_add_tail(&matcher->matcher_list,
+			      &next_matcher->matcher_list);
+	else
+		list_add(&matcher->matcher_list, &tbl->matcher_list);
+
+	return 0;
+}
+
+static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+	mlx5dr_htbl_put(nic_matcher->s_htbl);
+	mlx5dr_htbl_put(nic_matcher->e_anchor);
+}
+
+static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher)
+{
+	dr_matcher_uninit_nic(&matcher->rx);
+	dr_matcher_uninit_nic(&matcher->tx);
+}
+
+static void dr_matcher_uninit(struct mlx5dr_matcher *matcher)
+{
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+
+	switch (dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		dr_matcher_uninit_nic(&matcher->rx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		dr_matcher_uninit_nic(&matcher->tx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		dr_matcher_uninit_fdb(matcher);
+		break;
+	default:
+		WARN_ON(true);
+		break;
+	}
+}
+
+static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
+			       struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	int ret, ret_v4, ret_v6;
+
+	ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false);
+	ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true);
+
+	if (ret_v4 && ret_v6) {
+		mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
+		return -EINVAL;
+	}
+
+	if (!ret_v4)
+		nic_matcher->ste_builder = nic_matcher->ste_builder4;
+	else
+		nic_matcher->ste_builder = nic_matcher->ste_builder6;
+
+	nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+						      DR_CHUNK_SIZE_1,
+						      MLX5DR_STE_LU_TYPE_DONT_CARE,
+						      0);
+	if (!nic_matcher->e_anchor)
+		return -ENOMEM;
+
+	nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+						    DR_CHUNK_SIZE_1,
+						    nic_matcher->ste_builder[0].lu_type,
+						    nic_matcher->ste_builder[0].byte_mask);
+	if (!nic_matcher->s_htbl) {
+		ret = -ENOMEM;
+		goto free_e_htbl;
+	}
+
+	/* make sure the tables exist while empty */
+	mlx5dr_htbl_get(nic_matcher->s_htbl);
+	mlx5dr_htbl_get(nic_matcher->e_anchor);
+
+	return 0;
+
+free_e_htbl:
+	mlx5dr_ste_htbl_free(nic_matcher->e_anchor);
+	return ret;
+}
+
+static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher)
+{
+	int ret;
+
+	ret = dr_matcher_init_nic(matcher, &matcher->rx);
+	if (ret)
+		return ret;
+
+	ret = dr_matcher_init_nic(matcher, &matcher->tx);
+	if (ret)
+		goto uninit_nic_rx;
+
+	return 0;
+
+uninit_nic_rx:
+	dr_matcher_uninit_nic(&matcher->rx);
+	return ret;
+}
+
+static int dr_matcher_init(struct mlx5dr_matcher *matcher,
+			   struct mlx5dr_match_parameters *mask)
+{
+	struct mlx5dr_table *tbl = matcher->tbl;
+	struct mlx5dr_domain *dmn = tbl->dmn;
+	int ret;
+
+	if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) {
+		mlx5dr_info(dmn, "Invalid match criteria attribute\n");
+		return -EINVAL;
+	}
+
+	if (mask) {
+		if (mask->match_sz > sizeof(struct mlx5dr_match_param)) {
+			mlx5dr_info(dmn, "Invalid match size attribute\n");
+			return -EINVAL;
+		}
+		mlx5dr_ste_copy_param(matcher->match_criteria,
+				      &matcher->mask, mask);
+	}
+
+	switch (dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		matcher->rx.nic_tbl = &tbl->rx;
+		ret = dr_matcher_init_nic(matcher, &matcher->rx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		matcher->tx.nic_tbl = &tbl->tx;
+		ret = dr_matcher_init_nic(matcher, &matcher->tx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		matcher->rx.nic_tbl = &tbl->rx;
+		matcher->tx.nic_tbl = &tbl->tx;
+		ret = dr_matcher_init_fdb(matcher);
+		break;
+	default:
+		WARN_ON(true);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *tbl,
+		      u16 priority,
+		      u8 match_criteria_enable,
+		      struct mlx5dr_match_parameters *mask)
+{
+	struct mlx5dr_matcher *matcher;
+	int ret;
+
+	refcount_inc(&tbl->refcount);
+
+	matcher = kzalloc(sizeof(*matcher), GFP_KERNEL);
+	if (!matcher)
+		goto dec_ref;
+
+	matcher->tbl = tbl;
+	matcher->prio = priority;
+	matcher->match_criteria = match_criteria_enable;
+	refcount_set(&matcher->refcount, 1);
+	INIT_LIST_HEAD(&matcher->matcher_list);
+
+	mutex_lock(&tbl->dmn->mutex);
+
+	ret = dr_matcher_init(matcher, mask);
+	if (ret)
+		goto free_matcher;
+
+	ret = dr_matcher_add_to_tbl(matcher);
+	if (ret)
+		goto matcher_uninit;
+
+	mutex_unlock(&tbl->dmn->mutex);
+
+	return matcher;
+
+matcher_uninit:
+	dr_matcher_uninit(matcher);
+free_matcher:
+	mutex_unlock(&tbl->dmn->mutex);
+	kfree(matcher);
+dec_ref:
+	refcount_dec(&tbl->refcount);
+	return NULL;
+}
+
+static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
+				 struct mlx5dr_table_rx_tx *nic_tbl,
+				 struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+				 struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+	struct mlx5dr_htbl_connect_info info;
+	struct mlx5dr_ste_htbl *prev_anchor;
+
+	if (prev_nic_matcher)
+		prev_anchor = prev_nic_matcher->e_anchor;
+	else
+		prev_anchor = nic_tbl->s_anchor;
+
+	/* Connect previous anchor hash table to next matcher or to the default address */
+	if (next_nic_matcher) {
+		info.type = CONNECT_HIT;
+		info.hit_next_htbl = next_nic_matcher->s_htbl;
+		next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr;
+		prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+	} else {
+		info.type = CONNECT_MISS;
+		info.miss_icm_addr = nic_tbl->default_icm_addr;
+		prev_anchor->ste_arr[0].next_htbl = NULL;
+	}
+
+	return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor,
+						 &info, true);
+}
+
+static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+{
+	struct mlx5dr_matcher *prev_matcher, *next_matcher;
+	struct mlx5dr_table *tbl = matcher->tbl;
+	struct mlx5dr_domain *dmn = tbl->dmn;
+	int ret = 0;
+
+	if (list_is_last(&matcher->matcher_list, &tbl->matcher_list))
+		next_matcher = NULL;
+	else
+		next_matcher = list_next_entry(matcher, matcher_list);
+
+	if (matcher->matcher_list.prev == &tbl->matcher_list)
+		prev_matcher = NULL;
+	else
+		prev_matcher = list_prev_entry(matcher, matcher_list);
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+	    dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+		ret = dr_matcher_disconnect(dmn, &tbl->rx,
+					    next_matcher ? &next_matcher->rx : NULL,
+					    prev_matcher ? &prev_matcher->rx : NULL);
+		if (ret)
+			return ret;
+	}
+
+	if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+	    dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+		ret = dr_matcher_disconnect(dmn, &tbl->tx,
+					    next_matcher ? &next_matcher->tx : NULL,
+					    prev_matcher ? &prev_matcher->tx : NULL);
+		if (ret)
+			return ret;
+	}
+
+	list_del(&matcher->matcher_list);
+
+	return 0;
+}
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+	struct mlx5dr_table *tbl = matcher->tbl;
+
+	if (refcount_read(&matcher->refcount) > 1)
+		return -EBUSY;
+
+	mutex_lock(&tbl->dmn->mutex);
+
+	dr_matcher_remove_from_tbl(matcher);
+	dr_matcher_uninit(matcher);
+	refcount_dec(&matcher->tbl->refcount);
+
+	mutex_unlock(&tbl->dmn->mutex);
+	kfree(matcher);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
new file mode 100644
index 0000000..bd1699e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c

@@ -0,0 +1,1258 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES)
+
+struct mlx5dr_rule_action_member {
+	struct mlx5dr_action *action;
+	struct list_head list;
+};
+
+static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
+				       struct list_head *miss_list,
+				       struct list_head *send_list)
+{
+	struct mlx5dr_ste_send_info *ste_info_last;
+	struct mlx5dr_ste *last_ste;
+
+	/* The new entry will be inserted after the last */
+	last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node);
+	WARN_ON(!last_ste);
+
+	ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL);
+	if (!ste_info_last)
+		return -ENOMEM;
+
+	mlx5dr_ste_set_miss_addr(last_ste->hw_ste,
+				 mlx5dr_ste_get_icm_addr(new_last_ste));
+	list_add_tail(&new_last_ste->miss_list_node, miss_list);
+
+	mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED,
+						  0, last_ste->hw_ste,
+						  ste_info_last, send_list, true);
+
+	return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
+			      struct mlx5dr_matcher_rx_tx *nic_matcher,
+			      u8 *hw_ste)
+{
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_ste_htbl *new_htbl;
+	struct mlx5dr_ste *ste;
+
+	/* Create new table for miss entry */
+	new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+					 DR_CHUNK_SIZE_1,
+					 MLX5DR_STE_LU_TYPE_DONT_CARE,
+					 0);
+	if (!new_htbl) {
+		mlx5dr_dbg(dmn, "Failed allocating collision table\n");
+		return NULL;
+	}
+
+	/* One and only entry, never grows */
+	ste = new_htbl->ste_arr;
+	mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+	mlx5dr_htbl_get(new_htbl);
+
+	return ste;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
+			       struct mlx5dr_matcher_rx_tx *nic_matcher,
+			       u8 *hw_ste,
+			       struct mlx5dr_ste *orig_ste)
+{
+	struct mlx5dr_ste *ste;
+
+	ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+	if (!ste) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n");
+		return NULL;
+	}
+
+	ste->ste_chain_location = orig_ste->ste_chain_location;
+
+	/* In collision entry, all members share the same miss_list_head */
+	ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
+
+	/* Next table */
+	if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste,
+					DR_CHUNK_SIZE_1)) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+		goto free_tbl;
+	}
+
+	return ste;
+
+free_tbl:
+	mlx5dr_ste_free(ste, matcher, nic_matcher);
+	return NULL;
+}
+
+static int
+dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info,
+				      struct mlx5dr_domain *dmn)
+{
+	int ret;
+
+	list_del(&ste_info->send_list);
+	ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data,
+				       ste_info->size, ste_info->offset);
+	if (ret)
+		goto out;
+	/* Copy data to ste, only reduced size, the last 16B (mask)
+	 * is already written to the hw.
+	 */
+	memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED);
+
+out:
+	kfree(ste_info);
+	return ret;
+}
+
+static int dr_rule_send_update_list(struct list_head *send_ste_list,
+				    struct mlx5dr_domain *dmn,
+				    bool is_reverse)
+{
+	struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+	int ret;
+
+	if (is_reverse) {
+		list_for_each_entry_safe_reverse(ste_info, tmp_ste_info,
+						 send_ste_list, send_list) {
+			ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+								    dmn);
+			if (ret)
+				return ret;
+		}
+	} else {
+		list_for_each_entry_safe(ste_info, tmp_ste_info,
+					 send_ste_list, send_list) {
+			ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+								    dmn);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste)
+{
+	struct mlx5dr_ste *ste;
+
+	if (list_empty(miss_list))
+		return NULL;
+
+	/* Check if hw_ste is present in the list */
+	list_for_each_entry(ste, miss_list, miss_list_node) {
+		if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste))
+			return ste;
+	}
+
+	return NULL;
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
+				struct mlx5dr_matcher_rx_tx *nic_matcher,
+				struct list_head *update_list,
+				struct mlx5dr_ste *col_ste,
+				u8 *hw_ste)
+{
+	struct mlx5dr_ste *new_ste;
+	int ret;
+
+	new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+	if (!new_ste)
+		return NULL;
+
+	/* In collision entry, all members share the same miss_list_head */
+	new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
+
+	/* Update the previous from the list */
+	ret = dr_rule_append_to_miss_list(new_ste,
+					  mlx5dr_ste_get_miss_list(col_ste),
+					  update_list);
+	if (ret) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n");
+		goto err_exit;
+	}
+
+	return new_ste;
+
+err_exit:
+	mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+	return NULL;
+}
+
+static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
+					 struct mlx5dr_matcher_rx_tx *nic_matcher,
+					 struct mlx5dr_ste *cur_ste,
+					 struct mlx5dr_ste *new_ste)
+{
+	new_ste->next_htbl = cur_ste->next_htbl;
+	new_ste->ste_chain_location = cur_ste->ste_chain_location;
+
+	if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location))
+		new_ste->next_htbl->pointing_ste = new_ste;
+
+	/* We need to copy the refcount since this ste
+	 * may have been traversed several times
+	 */
+	refcount_set(&new_ste->refcount, refcount_read(&cur_ste->refcount));
+
+	/* Link old STEs rule_mem list to the new ste */
+	mlx5dr_rule_update_rule_member(cur_ste, new_ste);
+	INIT_LIST_HEAD(&new_ste->rule_list);
+	list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list);
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
+			struct mlx5dr_matcher_rx_tx *nic_matcher,
+			struct mlx5dr_ste *cur_ste,
+			struct mlx5dr_ste_htbl *new_htbl,
+			struct list_head *update_list)
+{
+	struct mlx5dr_ste_send_info *ste_info;
+	bool use_update_list = false;
+	u8 hw_ste[DR_STE_SIZE] = {};
+	struct mlx5dr_ste *new_ste;
+	int new_idx;
+	u8 sb_idx;
+
+	/* Copy STE mask from the matcher */
+	sb_idx = cur_ste->ste_chain_location - 1;
+	mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
+
+	/* Copy STE control and tag */
+	memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED);
+	mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+
+	new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
+	new_ste = &new_htbl->ste_arr[new_idx];
+
+	if (mlx5dr_ste_not_used_ste(new_ste)) {
+		mlx5dr_htbl_get(new_htbl);
+		list_add_tail(&new_ste->miss_list_node,
+			      mlx5dr_ste_get_miss_list(new_ste));
+	} else {
+		new_ste = dr_rule_rehash_handle_collision(matcher,
+							  nic_matcher,
+							  update_list,
+							  new_ste,
+							  hw_ste);
+		if (!new_ste) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n",
+				   new_idx);
+			return NULL;
+		}
+		new_htbl->ctrl.num_of_collisions++;
+		use_update_list = true;
+	}
+
+	memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED);
+
+	new_htbl->ctrl.num_of_valid_entries++;
+
+	if (use_update_list) {
+		ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+		if (!ste_info)
+			goto err_exit;
+
+		mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0,
+							  hw_ste, ste_info,
+							  update_list, true);
+	}
+
+	dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste);
+
+	return new_ste;
+
+err_exit:
+	mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+	return NULL;
+}
+
+static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher,
+					 struct mlx5dr_matcher_rx_tx *nic_matcher,
+					 struct list_head *cur_miss_list,
+					 struct mlx5dr_ste_htbl *new_htbl,
+					 struct list_head *update_list)
+{
+	struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste;
+
+	if (list_empty(cur_miss_list))
+		return 0;
+
+	list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) {
+		new_ste = dr_rule_rehash_copy_ste(matcher,
+						  nic_matcher,
+						  cur_ste,
+						  new_htbl,
+						  update_list);
+		if (!new_ste)
+			goto err_insert;
+
+		list_del(&cur_ste->miss_list_node);
+		mlx5dr_htbl_put(cur_ste->htbl);
+	}
+	return 0;
+
+err_insert:
+	mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n");
+	WARN_ON(true);
+	return -EINVAL;
+}
+
+static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
+				    struct mlx5dr_matcher_rx_tx *nic_matcher,
+				    struct mlx5dr_ste_htbl *cur_htbl,
+				    struct mlx5dr_ste_htbl *new_htbl,
+				    struct list_head *update_list)
+{
+	struct mlx5dr_ste *cur_ste;
+	int cur_entries;
+	int err = 0;
+	int i;
+
+	cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size);
+
+	if (cur_entries < 1) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cur_entries; i++) {
+		cur_ste = &cur_htbl->ste_arr[i];
+		if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */
+			continue;
+
+		err = dr_rule_rehash_copy_miss_list(matcher,
+						    nic_matcher,
+						    mlx5dr_ste_get_miss_list(cur_ste),
+						    new_htbl,
+						    update_list);
+		if (err)
+			goto clean_copy;
+	}
+
+clean_copy:
+	return err;
+}
+
+static struct mlx5dr_ste_htbl *
+dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
+		    struct mlx5dr_rule_rx_tx *nic_rule,
+		    struct mlx5dr_ste_htbl *cur_htbl,
+		    u8 ste_location,
+		    struct list_head *update_list,
+		    enum mlx5dr_icm_chunk_size new_size)
+{
+	struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info;
+	struct mlx5dr_matcher *matcher = rule->matcher;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_matcher_rx_tx *nic_matcher;
+	struct mlx5dr_ste_send_info *ste_info;
+	struct mlx5dr_htbl_connect_info info;
+	struct mlx5dr_domain_rx_tx *nic_dmn;
+	u8 formatted_ste[DR_STE_SIZE] = {};
+	LIST_HEAD(rehash_table_send_list);
+	struct mlx5dr_ste *ste_to_update;
+	struct mlx5dr_ste_htbl *new_htbl;
+	int err;
+
+	nic_matcher = nic_rule->nic_matcher;
+	nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+	ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+	if (!ste_info)
+		return NULL;
+
+	new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+					 new_size,
+					 cur_htbl->lu_type,
+					 cur_htbl->byte_mask);
+	if (!new_htbl) {
+		mlx5dr_err(dmn, "Failed to allocate new hash table\n");
+		goto free_ste_info;
+	}
+
+	/* Write new table to HW */
+	info.type = CONNECT_MISS;
+	info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+	mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
+				     nic_dmn,
+				     new_htbl,
+				     formatted_ste,
+				     &info);
+
+	new_htbl->pointing_ste = cur_htbl->pointing_ste;
+	new_htbl->pointing_ste->next_htbl = new_htbl;
+	err = dr_rule_rehash_copy_htbl(matcher,
+				       nic_matcher,
+				       cur_htbl,
+				       new_htbl,
+				       &rehash_table_send_list);
+	if (err)
+		goto free_new_htbl;
+
+	if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste,
+				      nic_matcher->ste_builder[ste_location - 1].bit_mask)) {
+		mlx5dr_err(dmn, "Failed writing table to HW\n");
+		goto free_new_htbl;
+	}
+
+	/* Writing to the hw is done in regular order of rehash_table_send_list,
+	 * in order to have the origin data written before the miss address of
+	 * collision entries, if exists.
+	 */
+	if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) {
+		mlx5dr_err(dmn, "Failed updating table to HW\n");
+		goto free_ste_list;
+	}
+
+	/* Connect previous hash table to current */
+	if (ste_location == 1) {
+		/* The previous table is an anchor, anchors size is always one STE */
+		struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl;
+
+		/* On matcher s_anchor we keep an extra refcount */
+		mlx5dr_htbl_get(new_htbl);
+		mlx5dr_htbl_put(cur_htbl);
+
+		nic_matcher->s_htbl = new_htbl;
+
+		/* It is safe to operate dr_ste_set_hit_addr on the hw_ste here
+		 * (48B len) which works only on first 32B
+		 */
+		mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste,
+					new_htbl->chunk->icm_addr,
+					new_htbl->chunk->num_of_entries);
+
+		ste_to_update = &prev_htbl->ste_arr[0];
+	} else {
+		mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste,
+						     new_htbl);
+		ste_to_update = cur_htbl->pointing_ste;
+	}
+
+	mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED,
+						  0, ste_to_update->hw_ste, ste_info,
+						  update_list, false);
+
+	return new_htbl;
+
+free_ste_list:
+	/* Clean all ste_info's from the new table */
+	list_for_each_entry_safe(del_ste_info, tmp_ste_info,
+				 &rehash_table_send_list, send_list) {
+		list_del(&del_ste_info->send_list);
+		kfree(del_ste_info);
+	}
+
+free_new_htbl:
+	mlx5dr_ste_htbl_free(new_htbl);
+free_ste_info:
+	kfree(ste_info);
+	mlx5dr_info(dmn, "Failed creating rehash table\n");
+	return NULL;
+}
+
+static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule,
+					      struct mlx5dr_rule_rx_tx *nic_rule,
+					      struct mlx5dr_ste_htbl *cur_htbl,
+					      u8 ste_location,
+					      struct list_head *update_list)
+{
+	struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+	enum mlx5dr_icm_chunk_size new_size;
+
+	new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size);
+	new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz);
+
+	if (new_size == cur_htbl->chunk_size)
+		return NULL; /* Skip rehash, we already at the max size */
+
+	return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location,
+				   update_list, new_size);
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_collision(struct mlx5dr_matcher *matcher,
+			 struct mlx5dr_matcher_rx_tx *nic_matcher,
+			 struct mlx5dr_ste *ste,
+			 u8 *hw_ste,
+			 struct list_head *miss_list,
+			 struct list_head *send_list)
+{
+	struct mlx5dr_ste_send_info *ste_info;
+	struct mlx5dr_ste *new_ste;
+
+	ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+	if (!ste_info)
+		return NULL;
+
+	new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste);
+	if (!new_ste)
+		goto free_send_info;
+
+	if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n");
+		goto err_exit;
+	}
+
+	mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste,
+						  ste_info, send_list, false);
+
+	ste->htbl->ctrl.num_of_collisions++;
+	ste->htbl->ctrl.num_of_valid_entries++;
+
+	return new_ste;
+
+err_exit:
+	mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+free_send_info:
+	kfree(ste_info);
+	return NULL;
+}
+
+static void dr_rule_remove_action_members(struct mlx5dr_rule *rule)
+{
+	struct mlx5dr_rule_action_member *action_mem;
+	struct mlx5dr_rule_action_member *tmp;
+
+	list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) {
+		list_del(&action_mem->list);
+		refcount_dec(&action_mem->action->refcount);
+		kvfree(action_mem);
+	}
+}
+
+static int dr_rule_add_action_members(struct mlx5dr_rule *rule,
+				      size_t num_actions,
+				      struct mlx5dr_action *actions[])
+{
+	struct mlx5dr_rule_action_member *action_mem;
+	int i;
+
+	for (i = 0; i < num_actions; i++) {
+		action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL);
+		if (!action_mem)
+			goto free_action_members;
+
+		action_mem->action = actions[i];
+		INIT_LIST_HEAD(&action_mem->list);
+		list_add_tail(&action_mem->list, &rule->rule_actions_list);
+		refcount_inc(&action_mem->action->refcount);
+	}
+
+	return 0;
+
+free_action_members:
+	dr_rule_remove_action_members(rule);
+	return -ENOMEM;
+}
+
+/* While the pointer of ste is no longer valid, like while moving ste to be
+ * the first in the miss_list, and to be in the origin table,
+ * all rule-members that are attached to this ste should update their ste member
+ * to the new pointer
+ */
+void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste,
+				    struct mlx5dr_ste *new_ste)
+{
+	struct mlx5dr_rule_member *rule_mem;
+
+	if (!list_empty(&ste->rule_list))
+		list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list)
+			rule_mem->ste = new_ste;
+}
+
+static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
+				       struct mlx5dr_rule_rx_tx *nic_rule)
+{
+	struct mlx5dr_rule_member *rule_mem;
+	struct mlx5dr_rule_member *tmp_mem;
+
+	if (list_empty(&nic_rule->rule_members_list))
+		return;
+	list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) {
+		list_del(&rule_mem->list);
+		list_del(&rule_mem->use_ste_list);
+		mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher);
+		kvfree(rule_mem);
+	}
+}
+
+static u16 dr_get_bits_per_mask(u16 byte_mask)
+{
+	u16 bits = 0;
+
+	while (byte_mask) {
+		byte_mask = byte_mask & (byte_mask - 1);
+		bits++;
+	}
+
+	return bits;
+}
+
+static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
+				      struct mlx5dr_domain *dmn,
+				      struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+	struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+
+	if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
+		return false;
+
+	if (!ctrl->may_grow)
+		return false;
+
+	if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
+		return false;
+
+	if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
+	    (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
+		return true;
+
+	return false;
+}
+
+static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
+			      struct mlx5dr_ste *ste)
+{
+	struct mlx5dr_rule_member *rule_mem;
+
+	rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL);
+	if (!rule_mem)
+		return -ENOMEM;
+
+	rule_mem->ste = ste;
+	list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
+
+	list_add_tail(&rule_mem->use_ste_list, &ste->rule_list);
+
+	return 0;
+}
+
+static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
+				      struct mlx5dr_rule_rx_tx *nic_rule,
+				      struct list_head *send_ste_list,
+				      struct mlx5dr_ste *last_ste,
+				      u8 *hw_ste_arr,
+				      u32 new_hw_ste_arr_sz)
+{
+	struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher;
+	struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES];
+	u8 num_of_builders = nic_matcher->num_of_builders;
+	struct mlx5dr_matcher *matcher = rule->matcher;
+	u8 *curr_hw_ste, *prev_hw_ste;
+	struct mlx5dr_ste *action_ste;
+	int i, k, ret;
+
+	/* Two cases:
+	 * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
+	 * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
+	 *    to support the action.
+	 */
+	if (num_of_builders == new_hw_ste_arr_sz)
+		return 0;
+
+	for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
+		curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
+		prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE);
+		action_ste = dr_rule_create_collision_htbl(matcher,
+							   nic_matcher,
+							   curr_hw_ste);
+		if (!action_ste)
+			return -ENOMEM;
+
+		mlx5dr_ste_get(action_ste);
+
+		/* While free ste we go over the miss list, so add this ste to the list */
+		list_add_tail(&action_ste->miss_list_node,
+			      mlx5dr_ste_get_miss_list(action_ste));
+
+		ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]),
+					  GFP_KERNEL);
+		if (!ste_info_arr[k])
+			goto err_exit;
+
+		/* Point current ste to the new action */
+		mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl);
+		ret = dr_rule_add_member(nic_rule, action_ste);
+		if (ret) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n");
+			goto free_ste_info;
+		}
+		mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
+							  curr_hw_ste,
+							  ste_info_arr[k],
+							  send_ste_list, false);
+	}
+
+	return 0;
+
+free_ste_info:
+	kfree(ste_info_arr[k]);
+err_exit:
+	mlx5dr_ste_put(action_ste, matcher, nic_matcher);
+	return -ENOMEM;
+}
+
+static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
+				      struct mlx5dr_matcher_rx_tx *nic_matcher,
+				      struct mlx5dr_ste_htbl *cur_htbl,
+				      struct mlx5dr_ste *ste,
+				      u8 ste_location,
+				      u8 *hw_ste,
+				      struct list_head *miss_list,
+				      struct list_head *send_list)
+{
+	struct mlx5dr_ste_send_info *ste_info;
+
+	/* Take ref on table, only on first time this ste is used */
+	mlx5dr_htbl_get(cur_htbl);
+
+	/* new entry -> new branch */
+	list_add_tail(&ste->miss_list_node, miss_list);
+
+	mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+
+	ste->ste_chain_location = ste_location;
+
+	ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+	if (!ste_info)
+		goto clean_ste_setting;
+
+	if (mlx5dr_ste_create_next_htbl(matcher,
+					nic_matcher,
+					ste,
+					hw_ste,
+					DR_CHUNK_SIZE_1)) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+		goto clean_ste_info;
+	}
+
+	cur_htbl->ctrl.num_of_valid_entries++;
+
+	mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste,
+						  ste_info, send_list, false);
+
+	return 0;
+
+clean_ste_info:
+	kfree(ste_info);
+clean_ste_setting:
+	list_del_init(&ste->miss_list_node);
+	mlx5dr_htbl_put(cur_htbl);
+
+	return -ENOMEM;
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_ste_branch(struct mlx5dr_rule *rule,
+			  struct mlx5dr_rule_rx_tx *nic_rule,
+			  struct list_head *send_ste_list,
+			  struct mlx5dr_ste_htbl *cur_htbl,
+			  u8 *hw_ste,
+			  u8 ste_location,
+			  struct mlx5dr_ste_htbl **put_htbl)
+{
+	struct mlx5dr_matcher *matcher = rule->matcher;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_matcher_rx_tx *nic_matcher;
+	struct mlx5dr_domain_rx_tx *nic_dmn;
+	struct mlx5dr_ste_htbl *new_htbl;
+	struct mlx5dr_ste *matched_ste;
+	struct list_head *miss_list;
+	bool skip_rehash = false;
+	struct mlx5dr_ste *ste;
+	int index;
+
+	nic_matcher = nic_rule->nic_matcher;
+	nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+again:
+	index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl);
+	miss_list = &cur_htbl->chunk->miss_list[index];
+	ste = &cur_htbl->ste_arr[index];
+
+	if (mlx5dr_ste_not_used_ste(ste)) {
+		if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
+					       ste, ste_location,
+					       hw_ste, miss_list,
+					       send_ste_list))
+			return NULL;
+	} else {
+		/* Hash table index in use, check if this ste is in the miss list */
+		matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste);
+		if (matched_ste) {
+			/* If it is last STE in the chain, and has the same tag
+			 * it means that all the previous stes are the same,
+			 * if so, this rule is duplicated.
+			 */
+			if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location))
+				return matched_ste;
+
+			mlx5dr_dbg(dmn, "Duplicate rule inserted\n");
+		}
+
+		if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
+			/* Hash table index in use, try to resize of the hash */
+			skip_rehash = true;
+
+			/* Hold the table till we update.
+			 * Release in dr_rule_create_rule()
+			 */
+			*put_htbl = cur_htbl;
+			mlx5dr_htbl_get(cur_htbl);
+
+			new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl,
+						  ste_location, send_ste_list);
+			if (!new_htbl) {
+				mlx5dr_htbl_put(cur_htbl);
+				mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n",
+					    cur_htbl->chunk_size);
+			} else {
+				cur_htbl = new_htbl;
+			}
+			goto again;
+		} else {
+			/* Hash table index in use, add another collision (miss) */
+			ste = dr_rule_handle_collision(matcher,
+						       nic_matcher,
+						       ste,
+						       hw_ste,
+						       miss_list,
+						       send_ste_list);
+			if (!ste) {
+				mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n",
+					   index);
+				return NULL;
+			}
+		}
+	}
+	return ste;
+}
+
+static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value,
+				      u32 s_idx, u32 e_idx)
+{
+	u32 i;
+
+	for (i = s_idx; i < e_idx; i++) {
+		if (value[i] & ~mask[i]) {
+			pr_info("Rule parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+	return true;
+}
+
+static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
+			   struct mlx5dr_match_parameters *value,
+			   struct mlx5dr_match_param *param)
+{
+	u8 match_criteria = matcher->match_criteria;
+	size_t value_size = value->match_sz;
+	u8 *mask_p = (u8 *)&matcher->mask;
+	u8 *param_p = (u8 *)param;
+	u32 s_idx, e_idx;
+
+	if (!value_size ||
+	    (value_size > sizeof(struct mlx5dr_match_param) ||
+	     (value_size % sizeof(u32)))) {
+		mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n");
+		return false;
+	}
+
+	mlx5dr_ste_copy_param(matcher->match_criteria, param, value);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+		s_idx = offsetof(struct mlx5dr_match_param, outer);
+		e_idx = min(s_idx + sizeof(param->outer), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+		s_idx = offsetof(struct mlx5dr_match_param, misc);
+		e_idx = min(s_idx + sizeof(param->misc), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+
+	if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+		s_idx = offsetof(struct mlx5dr_match_param, inner);
+		e_idx = min(s_idx + sizeof(param->inner), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+		s_idx = offsetof(struct mlx5dr_match_param, misc2);
+		e_idx = min(s_idx + sizeof(param->misc2), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+		s_idx = offsetof(struct mlx5dr_match_param, misc3);
+		e_idx = min(s_idx + sizeof(param->misc3), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
+	return true;
+}
+
+static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
+				    struct mlx5dr_rule_rx_tx *nic_rule)
+{
+	dr_rule_clean_rule_members(rule, nic_rule);
+	return 0;
+}
+
+static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule)
+{
+	dr_rule_destroy_rule_nic(rule, &rule->rx);
+	dr_rule_destroy_rule_nic(rule, &rule->tx);
+	return 0;
+}
+
+static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
+{
+	struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+	switch (dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		dr_rule_destroy_rule_nic(rule, &rule->rx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		dr_rule_destroy_rule_nic(rule, &rule->tx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		dr_rule_destroy_rule_fdb(rule);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dr_rule_remove_action_members(rule);
+	kfree(rule);
+	return 0;
+}
+
+static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param)
+{
+	return (param->outer.ip_version == 6 ||
+		param->inner.ip_version == 6 ||
+		param->outer.ethertype == ETH_P_IPV6 ||
+		param->inner.ethertype == ETH_P_IPV6);
+}
+
+static bool dr_rule_skip(enum mlx5dr_domain_type domain,
+			 enum mlx5dr_ste_entry_type ste_type,
+			 struct mlx5dr_match_param *mask,
+			 struct mlx5dr_match_param *value)
+{
+	if (domain != MLX5DR_DOMAIN_TYPE_FDB)
+		return false;
+
+	if (mask->misc.source_port) {
+		if (ste_type == MLX5DR_STE_TYPE_RX)
+			if (value->misc.source_port != WIRE_PORT)
+				return true;
+
+		if (ste_type == MLX5DR_STE_TYPE_TX)
+			if (value->misc.source_port == WIRE_PORT)
+				return true;
+	}
+
+	/* Metadata C can be used to describe the source vport */
+	if (mask->misc2.metadata_reg_c_0) {
+		if (ste_type == MLX5DR_STE_TYPE_RX)
+			if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT)
+				return true;
+
+		if (ste_type == MLX5DR_STE_TYPE_TX)
+			if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT)
+				return true;
+	}
+	return false;
+}
+
+static int
+dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
+			struct mlx5dr_rule_rx_tx *nic_rule,
+			struct mlx5dr_match_param *param,
+			size_t num_actions,
+			struct mlx5dr_action *actions[])
+{
+	struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+	struct mlx5dr_matcher *matcher = rule->matcher;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_matcher_rx_tx *nic_matcher;
+	struct mlx5dr_domain_rx_tx *nic_dmn;
+	struct mlx5dr_ste_htbl *htbl = NULL;
+	struct mlx5dr_ste_htbl *cur_htbl;
+	struct mlx5dr_ste *ste = NULL;
+	LIST_HEAD(send_ste_list);
+	u8 *hw_ste_arr = NULL;
+	u32 new_hw_ste_arr_sz;
+	int ret, i;
+
+	nic_matcher = nic_rule->nic_matcher;
+	nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+	INIT_LIST_HEAD(&nic_rule->rule_members_list);
+
+	if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
+		return 0;
+
+	ret = mlx5dr_matcher_select_builders(matcher,
+					     nic_matcher,
+					     dr_rule_is_ipv6(param));
+	if (ret)
+		goto out_err;
+
+	hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+	if (!hw_ste_arr) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	/* Set the tag values inside the ste array */
+	ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
+	if (ret)
+		goto free_hw_ste;
+
+	/* Set the actions values/addresses inside the ste array */
+	ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions,
+					   num_actions, hw_ste_arr,
+					   &new_hw_ste_arr_sz);
+	if (ret)
+		goto free_hw_ste;
+
+	cur_htbl = nic_matcher->s_htbl;
+
+	/* Go over the array of STEs, and build dr_ste accordingly.
+	 * The loop is over only the builders which are equal or less to the
+	 * number of stes, in case we have actions that lives in other stes.
+	 */
+	for (i = 0; i < nic_matcher->num_of_builders; i++) {
+		/* Calculate CRC and keep new ste entry */
+		u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE);
+
+		ste = dr_rule_handle_ste_branch(rule,
+						nic_rule,
+						&send_ste_list,
+						cur_htbl,
+						cur_hw_ste_ent,
+						i + 1,
+						&htbl);
+		if (!ste) {
+			mlx5dr_err(dmn, "Failed creating next branch\n");
+			ret = -ENOENT;
+			goto free_rule;
+		}
+
+		cur_htbl = ste->next_htbl;
+
+		/* Keep all STEs in the rule struct */
+		ret = dr_rule_add_member(nic_rule, ste);
+		if (ret) {
+			mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i);
+			goto free_ste;
+		}
+
+		mlx5dr_ste_get(ste);
+	}
+
+	/* Connect actions */
+	ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list,
+					 ste, hw_ste_arr, new_hw_ste_arr_sz);
+	if (ret) {
+		mlx5dr_dbg(dmn, "Failed apply actions\n");
+		goto free_rule;
+	}
+	ret = dr_rule_send_update_list(&send_ste_list, dmn, true);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed sending ste!\n");
+		goto free_rule;
+	}
+
+	if (htbl)
+		mlx5dr_htbl_put(htbl);
+
+	kfree(hw_ste_arr);
+
+	return 0;
+
+free_ste:
+	mlx5dr_ste_put(ste, matcher, nic_matcher);
+free_rule:
+	dr_rule_clean_rule_members(rule, nic_rule);
+	/* Clean all ste_info's */
+	list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) {
+		list_del(&ste_info->send_list);
+		kfree(ste_info);
+	}
+free_hw_ste:
+	kfree(hw_ste_arr);
+out_err:
+	return ret;
+}
+
+static int
+dr_rule_create_rule_fdb(struct mlx5dr_rule *rule,
+			struct mlx5dr_match_param *param,
+			size_t num_actions,
+			struct mlx5dr_action *actions[])
+{
+	struct mlx5dr_match_param copy_param = {};
+	int ret;
+
+	/* Copy match_param since they will be consumed during the first
+	 * nic_rule insertion.
+	 */
+	memcpy(&copy_param, param, sizeof(struct mlx5dr_match_param));
+
+	ret = dr_rule_create_rule_nic(rule, &rule->rx, param,
+				      num_actions, actions);
+	if (ret)
+		return ret;
+
+	ret = dr_rule_create_rule_nic(rule, &rule->tx, &copy_param,
+				      num_actions, actions);
+	if (ret)
+		goto destroy_rule_nic_rx;
+
+	return 0;
+
+destroy_rule_nic_rx:
+	dr_rule_destroy_rule_nic(rule, &rule->rx);
+	return ret;
+}
+
+static struct mlx5dr_rule *
+dr_rule_create_rule(struct mlx5dr_matcher *matcher,
+		    struct mlx5dr_match_parameters *value,
+		    size_t num_actions,
+		    struct mlx5dr_action *actions[])
+{
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_match_param param = {};
+	struct mlx5dr_rule *rule;
+	int ret;
+
+	if (!dr_rule_verify(matcher, value, &param))
+		return NULL;
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->matcher = matcher;
+	INIT_LIST_HEAD(&rule->rule_actions_list);
+
+	ret = dr_rule_add_action_members(rule, num_actions, actions);
+	if (ret)
+		goto free_rule;
+
+	switch (dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		rule->rx.nic_matcher = &matcher->rx;
+		ret = dr_rule_create_rule_nic(rule, &rule->rx, &param,
+					      num_actions, actions);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		rule->tx.nic_matcher = &matcher->tx;
+		ret = dr_rule_create_rule_nic(rule, &rule->tx, &param,
+					      num_actions, actions);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		rule->rx.nic_matcher = &matcher->rx;
+		rule->tx.nic_matcher = &matcher->tx;
+		ret = dr_rule_create_rule_fdb(rule, &param,
+					      num_actions, actions);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		goto remove_action_members;
+
+	return rule;
+
+remove_action_members:
+	dr_rule_remove_action_members(rule);
+free_rule:
+	kfree(rule);
+	mlx5dr_info(dmn, "Failed creating rule\n");
+	return NULL;
+}
+
+struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+				       struct mlx5dr_match_parameters *value,
+				       size_t num_actions,
+				       struct mlx5dr_action *actions[])
+{
+	struct mlx5dr_rule *rule;
+
+	mutex_lock(&matcher->tbl->dmn->mutex);
+	refcount_inc(&matcher->refcount);
+
+	rule = dr_rule_create_rule(matcher, value, num_actions, actions);
+	if (!rule)
+		refcount_dec(&matcher->refcount);
+
+	mutex_unlock(&matcher->tbl->dmn->mutex);
+
+	return rule;
+}
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
+{
+	struct mlx5dr_matcher *matcher = rule->matcher;
+	struct mlx5dr_table *tbl = rule->matcher->tbl;
+	int ret;
+
+	mutex_lock(&tbl->dmn->mutex);
+
+	ret = dr_rule_destroy_rule(rule);
+
+	mutex_unlock(&tbl->dmn->mutex);
+
+	if (!ret)
+		refcount_dec(&matcher->refcount);
+	return ret;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
new file mode 100644
index 0000000..51803ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c

@@ -0,0 +1,977 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define QUEUE_SIZE 128
+#define SIGNAL_PER_DIV_QUEUE 16
+#define TH_NUMS_TO_DRAIN 2
+
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
+struct dr_data_seg {
+	u64 addr;
+	u32 length;
+	u32 lkey;
+	unsigned int send_flags;
+};
+
+struct postsend_info {
+	struct dr_data_seg write;
+	struct dr_data_seg read;
+	u64 remote_addr;
+	u32 rkey;
+};
+
+struct dr_qp_rtr_attr {
+	struct mlx5dr_cmd_gid_attr dgid_attr;
+	enum ib_mtu mtu;
+	u32 qp_num;
+	u16 port_num;
+	u8 min_rnr_timer;
+	u8 sgid_index;
+	u16 udp_src_port;
+};
+
+struct dr_qp_rts_attr {
+	u8 timeout;
+	u8 retry_cnt;
+	u8 rnr_retry;
+};
+
+struct dr_qp_init_attr {
+	u32 cqn;
+	u32 pdn;
+	u32 max_send_wr;
+	struct mlx5_uars_page *uar;
+};
+
+static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
+{
+	unsigned int idx;
+	u8 opcode;
+
+	opcode = get_cqe_opcode(cqe64);
+	if (opcode == MLX5_CQE_REQ_ERR) {
+		idx = be16_to_cpu(cqe64->wqe_counter) &
+			(dr_cq->qp->sq.wqe_cnt - 1);
+		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+	} else if (opcode == MLX5_CQE_RESP_ERR) {
+		++dr_cq->qp->sq.cc;
+	} else {
+		idx = be16_to_cpu(cqe64->wqe_counter) &
+			(dr_cq->qp->sq.wqe_cnt - 1);
+		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+
+		return CQ_OK;
+	}
+
+	return CQ_POLL_ERR;
+}
+
+static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
+{
+	struct mlx5_cqe64 *cqe64;
+	int err;
+
+	cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
+	if (!cqe64)
+		return CQ_EMPTY;
+
+	mlx5_cqwq_pop(&dr_cq->wq);
+	err = dr_parse_cqe(dr_cq, cqe64);
+	mlx5_cqwq_update_db_record(&dr_cq->wq);
+
+	return err;
+}
+
+static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
+{
+	int npolled;
+	int err = 0;
+
+	for (npolled = 0; npolled < ne; ++npolled) {
+		err = dr_cq_poll_one(dr_cq);
+		if (err != CQ_OK)
+			break;
+	}
+
+	return err == CQ_POLL_ERR ? err : npolled;
+}
+
+static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
+{
+	pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
+					 struct dr_qp_init_attr *attr)
+{
+	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
+	struct mlx5_wq_param wqp;
+	struct mlx5dr_qp *dr_qp;
+	int inlen;
+	void *qpc;
+	void *in;
+	int err;
+
+	dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
+	if (!dr_qp)
+		return NULL;
+
+	wqp.buf_numa_node = mdev->priv.numa_node;
+	wqp.db_numa_node = mdev->priv.numa_node;
+
+	dr_qp->rq.pc = 0;
+	dr_qp->rq.cc = 0;
+	dr_qp->rq.wqe_cnt = 4;
+	dr_qp->sq.pc = 0;
+	dr_qp->sq.cc = 0;
+	dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
+
+	MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+	MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+	MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+	err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
+				&dr_qp->wq_ctrl);
+	if (err) {
+		mlx5_core_info(mdev, "Can't create QP WQ\n");
+		goto err_wq;
+	}
+
+	dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
+				     sizeof(dr_qp->sq.wqe_head[0]),
+				     GFP_KERNEL);
+
+	if (!dr_qp->sq.wqe_head) {
+		mlx5_core_warn(mdev, "Can't allocate wqe head\n");
+		goto err_wqe_head;
+	}
+
+	inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+		MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+		dr_qp->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_in;
+	}
+
+	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+	MLX5_SET(qpc, qpc, pd, attr->pdn);
+	MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
+	MLX5_SET(qpc, qpc, log_page_size,
+		 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(qpc, qpc, fre, 1);
+	MLX5_SET(qpc, qpc, rlky, 1);
+	MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
+	MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
+	MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+	MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+	MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+	MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
+	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+	mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
+				  (__be64 *)MLX5_ADDR_OF(create_qp_in,
+							 in, pas));
+
+	err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
+	kfree(in);
+
+	if (err) {
+		mlx5_core_warn(mdev, " Can't create QP\n");
+		goto err_in;
+	}
+	dr_qp->mqp.event = dr_qp_event;
+	dr_qp->uar = attr->uar;
+
+	return dr_qp;
+
+err_in:
+	kfree(dr_qp->sq.wqe_head);
+err_wqe_head:
+	mlx5_wq_destroy(&dr_qp->wq_ctrl);
+err_wq:
+	kfree(dr_qp);
+	return NULL;
+}
+
+static void dr_destroy_qp(struct mlx5_core_dev *mdev,
+			  struct mlx5dr_qp *dr_qp)
+{
+	mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
+	kfree(dr_qp->sq.wqe_head);
+	mlx5_wq_destroy(&dr_qp->wq_ctrl);
+	kfree(dr_qp);
+}
+
+static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
+{
+	dma_wmb();
+	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
+
+	/* After wmb() the hw aware of new work */
+	wmb();
+
+	mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+			     u32 rkey, struct dr_data_seg *data_seg,
+			     u32 opcode, int nreq)
+{
+	struct mlx5_wqe_raddr_seg *wq_raddr;
+	struct mlx5_wqe_ctrl_seg *wq_ctrl;
+	struct mlx5_wqe_data_seg *wq_dseg;
+	unsigned int size;
+	unsigned int idx;
+
+	size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
+		sizeof(*wq_raddr) / 16;
+
+	idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+	wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+	wq_ctrl->imm = 0;
+	wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
+		MLX5_WQE_CTRL_CQ_UPDATE : 0;
+	wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
+						opcode);
+	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
+	wq_raddr = (void *)(wq_ctrl + 1);
+	wq_raddr->raddr = cpu_to_be64(remote_addr);
+	wq_raddr->rkey = cpu_to_be32(rkey);
+	wq_raddr->reserved = 0;
+
+	wq_dseg = (void *)(wq_raddr + 1);
+	wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+	wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+	wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+	dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+
+	if (nreq)
+		dr_cmd_notify_hw(dr_qp, wq_ctrl);
+}
+
+static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
+{
+	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+			 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
+	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+			 &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
+}
+
+/**
+ * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
+ * with send_list parameters:
+ *
+ *     @ste:       The data that attached to this specific ste
+ *     @size:      of data to write
+ *     @offset:    of the data from start of the hw_ste entry
+ *     @data:      data
+ *     @ste_info:  ste to be sent with send_list
+ *     @send_list: to append into it
+ *     @copy_data: if true indicates that the data should be kept because
+ *                 it's not backuped any where (like in re-hash).
+ *                 if false, it lets the data to be updated after
+ *                 it was added to the list.
+ */
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+					       u16 offset, u8 *data,
+					       struct mlx5dr_ste_send_info *ste_info,
+					       struct list_head *send_list,
+					       bool copy_data)
+{
+	ste_info->size = size;
+	ste_info->ste = ste;
+	ste_info->offset = offset;
+
+	if (copy_data) {
+		memcpy(ste_info->data_cont, data, size);
+		ste_info->data = ste_info->data_cont;
+	} else {
+		ste_info->data = data;
+	}
+
+	list_add_tail(&ste_info->send_list, send_list);
+}
+
+/* The function tries to consume one wc each time, unless the queue is full, in
+ * that case, which means that the hw is behind the sw in a full queue len
+ * the function will drain the cq till it empty.
+ */
+static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
+				struct mlx5dr_send_ring *send_ring)
+{
+	bool is_drain = false;
+	int ne;
+
+	if (send_ring->pending_wqe < send_ring->signal_th)
+		return 0;
+
+	/* Queue is full start drain it */
+	if (send_ring->pending_wqe >=
+	    dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
+		is_drain = true;
+
+	do {
+		ne = dr_poll_cq(send_ring->cq, 1);
+		if (ne < 0)
+			return ne;
+		else if (ne == 1)
+			send_ring->pending_wqe -= send_ring->signal_th;
+	} while (is_drain && send_ring->pending_wqe);
+
+	return 0;
+}
+
+static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
+			      struct postsend_info *send_info)
+{
+	send_ring->pending_wqe++;
+
+	if (send_ring->pending_wqe % send_ring->signal_th == 0)
+		send_info->write.send_flags |= IB_SEND_SIGNALED;
+
+	send_ring->pending_wqe++;
+	send_info->read.length = send_info->write.length;
+	/* Read into the same write area */
+	send_info->read.addr = (uintptr_t)send_info->write.addr;
+	send_info->read.lkey = send_ring->mr->mkey.key;
+
+	if (send_ring->pending_wqe % send_ring->signal_th == 0)
+		send_info->read.send_flags = IB_SEND_SIGNALED;
+	else
+		send_info->read.send_flags = 0;
+}
+
+static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
+				struct postsend_info *send_info)
+{
+	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+	u32 buff_offset;
+	int ret;
+
+	ret = dr_handle_pending_wc(dmn, send_ring);
+	if (ret)
+		return ret;
+
+	if (send_info->write.length > dmn->info.max_inline_size) {
+		buff_offset = (send_ring->tx_head &
+			       (dmn->send_ring->signal_th - 1)) *
+			send_ring->max_post_send_size;
+		/* Copy to ring mr */
+		memcpy(send_ring->buf + buff_offset,
+		       (void *)(uintptr_t)send_info->write.addr,
+		       send_info->write.length);
+		send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
+		send_info->write.lkey = send_ring->mr->mkey.key;
+	}
+
+	send_ring->tx_head++;
+	dr_fill_data_segs(send_ring, send_info);
+	dr_post_send(send_ring->qp, send_info);
+
+	return 0;
+}
+
+static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
+				   struct mlx5dr_ste_htbl *htbl,
+				   u8 **data,
+				   u32 *byte_size,
+				   int *iterations,
+				   int *num_stes)
+{
+	int alloc_size;
+
+	if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
+		*iterations = htbl->chunk->byte_size /
+			dmn->send_ring->max_post_send_size;
+		*byte_size = dmn->send_ring->max_post_send_size;
+		alloc_size = *byte_size;
+		*num_stes = *byte_size / DR_STE_SIZE;
+	} else {
+		*iterations = 1;
+		*num_stes = htbl->chunk->num_of_entries;
+		alloc_size = *num_stes * DR_STE_SIZE;
+	}
+
+	*data = kzalloc(alloc_size, GFP_KERNEL);
+	if (!*data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
+ *
+ *     @dmn:    Domain
+ *     @ste:    The ste struct that contains the data (at
+ *              least part of it)
+ *     @data:   The real data to send size data
+ *     @size:   for writing.
+ *     @offset: The offset from the icm mapped data to
+ *              start write to this for write only part of the
+ *              buffer.
+ *
+ * Return: 0 on success.
+ */
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
+			     u8 *data, u16 size, u16 offset)
+{
+	struct postsend_info send_info = {};
+
+	send_info.write.addr = (uintptr_t)data;
+	send_info.write.length = size;
+	send_info.write.lkey = 0;
+	send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
+	send_info.rkey = ste->htbl->chunk->rkey;
+
+	return dr_postsend_icm_data(dmn, &send_info);
+}
+
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+			      struct mlx5dr_ste_htbl *htbl,
+			      u8 *formatted_ste, u8 *mask)
+{
+	u32 byte_size = htbl->chunk->byte_size;
+	int num_stes_per_iter;
+	int iterations;
+	u8 *data;
+	int ret;
+	int i;
+	int j;
+
+	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+				      &iterations, &num_stes_per_iter);
+	if (ret)
+		return ret;
+
+	/* Send the data iteration times */
+	for (i = 0; i < iterations; i++) {
+		u32 ste_index = i * (byte_size / DR_STE_SIZE);
+		struct postsend_info send_info = {};
+
+		/* Copy all ste's on the data buffer
+		 * need to add the bit_mask
+		 */
+		for (j = 0; j < num_stes_per_iter; j++) {
+			u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
+			u32 ste_off = j * DR_STE_SIZE;
+
+			if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
+				memcpy(data + ste_off,
+				       formatted_ste, DR_STE_SIZE);
+			} else {
+				/* Copy data */
+				memcpy(data + ste_off,
+				       htbl->ste_arr[ste_index + j].hw_ste,
+				       DR_STE_SIZE_REDUCED);
+				/* Copy bit_mask */
+				memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
+				       mask, DR_STE_SIZE_MASK);
+			}
+		}
+
+		send_info.write.addr = (uintptr_t)data;
+		send_info.write.length = byte_size;
+		send_info.write.lkey = 0;
+		send_info.remote_addr =
+			mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
+		send_info.rkey = htbl->chunk->rkey;
+
+		ret = dr_postsend_icm_data(dmn, &send_info);
+		if (ret)
+			goto out_free;
+	}
+
+out_free:
+	kfree(data);
+	return ret;
+}
+
+/* Initialize htble with default STEs */
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+					struct mlx5dr_ste_htbl *htbl,
+					u8 *ste_init_data,
+					bool update_hw_ste)
+{
+	u32 byte_size = htbl->chunk->byte_size;
+	int iterations;
+	int num_stes;
+	u8 *data;
+	int ret;
+	int i;
+
+	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+				      &iterations, &num_stes);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num_stes; i++) {
+		u8 *copy_dst;
+
+		/* Copy the same ste on the data buffer */
+		copy_dst = data + i * DR_STE_SIZE;
+		memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
+
+		if (update_hw_ste) {
+			/* Copy the reduced ste to hash table ste_arr */
+			copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+			memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
+		}
+	}
+
+	/* Send the data iteration times */
+	for (i = 0; i < iterations; i++) {
+		u8 ste_index = i * (byte_size / DR_STE_SIZE);
+		struct postsend_info send_info = {};
+
+		send_info.write.addr = (uintptr_t)data;
+		send_info.write.length = byte_size;
+		send_info.write.lkey = 0;
+		send_info.remote_addr =
+			mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
+		send_info.rkey = htbl->chunk->rkey;
+
+		ret = dr_postsend_icm_data(dmn, &send_info);
+		if (ret)
+			goto out_free;
+	}
+
+out_free:
+	kfree(data);
+	return ret;
+}
+
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+				struct mlx5dr_action *action)
+{
+	struct postsend_info send_info = {};
+	int ret;
+
+	send_info.write.addr = (uintptr_t)action->rewrite.data;
+	send_info.write.length = action->rewrite.chunk->byte_size;
+	send_info.write.lkey = 0;
+	send_info.remote_addr = action->rewrite.chunk->mr_addr;
+	send_info.rkey = action->rewrite.chunk->rkey;
+
+	mutex_lock(&dmn->mutex);
+	ret = dr_postsend_icm_data(dmn, &send_info);
+	mutex_unlock(&dmn->mutex);
+
+	return ret;
+}
+
+static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
+				 struct mlx5dr_qp *dr_qp,
+				 int port)
+{
+	u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+	void *qpc;
+
+	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
+
+	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
+	MLX5_SET(qpc, qpc, rre, 1);
+	MLX5_SET(qpc, qpc, rwe, 1);
+
+	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+				   &dr_qp->mqp);
+}
+
+static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
+				    struct mlx5dr_qp *dr_qp,
+				    struct dr_qp_rts_attr *attr)
+{
+	u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+	void *qpc;
+
+	qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
+
+	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+	MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
+	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
+	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+
+	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
+				   &dr_qp->mqp);
+}
+
+static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
+				     struct mlx5dr_qp *dr_qp,
+				     struct dr_qp_rtr_attr *attr)
+{
+	u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+	void *qpc;
+
+	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
+
+	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+	MLX5_SET(qpc, qpc, mtu, attr->mtu);
+	MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
+	MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
+	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+	       attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
+	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+	       attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
+	MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+		 attr->sgid_index);
+
+	if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
+		MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+			 attr->udp_src_port);
+
+	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
+	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
+
+	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+				   &dr_qp->mqp);
+}
+
+static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
+	struct dr_qp_rts_attr rts_attr = {};
+	struct dr_qp_rtr_attr rtr_attr = {};
+	enum ib_mtu mtu = IB_MTU_1024;
+	u16 gid_index = 0;
+	int port = 1;
+	int ret;
+
+	/* Init */
+	ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
+	if (ret)
+		return ret;
+
+	/* RTR */
+	ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
+	if (ret)
+		return ret;
+
+	rtr_attr.mtu		= mtu;
+	rtr_attr.qp_num		= dr_qp->mqp.qpn;
+	rtr_attr.min_rnr_timer	= 12;
+	rtr_attr.port_num	= port;
+	rtr_attr.sgid_index	= gid_index;
+	rtr_attr.udp_src_port	= dmn->info.caps.roce_min_src_udp;
+
+	ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
+	if (ret)
+		return ret;
+
+	/* RTS */
+	rts_attr.timeout	= 14;
+	rts_attr.retry_cnt	= 7;
+	rts_attr.rnr_retry	= 7;
+
+	ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void dr_cq_event(struct mlx5_core_cq *mcq,
+			enum mlx5_event event)
+{
+	pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
+				      struct mlx5_uars_page *uar,
+				      size_t ncqe)
+{
+	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+	struct mlx5_wq_param wqp;
+	struct mlx5_cqe64 *cqe;
+	struct mlx5dr_cq *cq;
+	int inlen, err, eqn;
+	unsigned int irqn;
+	void *cqc, *in;
+	__be64 *pas;
+	int vector;
+	u32 i;
+
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return NULL;
+
+	ncqe = roundup_pow_of_two(ncqe);
+	MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
+
+	wqp.buf_numa_node = mdev->priv.numa_node;
+	wqp.db_numa_node = mdev->priv.numa_node;
+
+	err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
+			       &cq->wq_ctrl);
+	if (err)
+		goto out;
+
+	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+		cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+	}
+
+	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		sizeof(u64) * cq->wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		goto err_cqwq;
+
+	vector = smp_processor_id() % mlx5_comp_vectors_count(mdev);
+	err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
+	if (err) {
+		kvfree(in);
+		goto err_cqwq;
+	}
+
+	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+	MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
+	MLX5_SET(cqc, cqc, c_eqn, eqn);
+	MLX5_SET(cqc, cqc, uar_page, uar->index);
+	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+		 MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
+
+	cq->mcq.event = dr_cq_event;
+
+	err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
+	kvfree(in);
+
+	if (err)
+		goto err_cqwq;
+
+	cq->mcq.cqe_sz = 64;
+	cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
+	cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
+	*cq->mcq.set_ci_db = 0;
+	*cq->mcq.arm_db = 0;
+	cq->mcq.vector = 0;
+	cq->mcq.irqn = irqn;
+	cq->mcq.uar = uar;
+
+	return cq;
+
+err_cqwq:
+	mlx5_wq_destroy(&cq->wq_ctrl);
+out:
+	kfree(cq);
+	return NULL;
+}
+
+static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
+{
+	mlx5_core_destroy_cq(mdev, &cq->mcq);
+	mlx5_wq_destroy(&cq->wq_ctrl);
+	kfree(cq);
+}
+
+static int
+dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
+{
+	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+	void *mkc;
+
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+	MLX5_SET(mkc, mkc, a, 1);
+	MLX5_SET(mkc, mkc, rw, 1);
+	MLX5_SET(mkc, mkc, rr, 1);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+
+	MLX5_SET(mkc, mkc, pd, pdn);
+	MLX5_SET(mkc, mkc, length64, 1);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+	return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
+}
+
+static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
+				   u32 pdn, void *buf, size_t size)
+{
+	struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	struct device *dma_device;
+	dma_addr_t dma_addr;
+	int err;
+
+	if (!mr)
+		return NULL;
+
+	dma_device = &mdev->pdev->dev;
+	dma_addr = dma_map_single(dma_device, buf, size,
+				  DMA_BIDIRECTIONAL);
+	err = dma_mapping_error(dma_device, dma_addr);
+	if (err) {
+		mlx5_core_warn(mdev, "Can't dma buf\n");
+		kfree(mr);
+		return NULL;
+	}
+
+	err = dr_create_mkey(mdev, pdn, &mr->mkey);
+	if (err) {
+		mlx5_core_warn(mdev, "Can't create mkey\n");
+		dma_unmap_single(dma_device, dma_addr, size,
+				 DMA_BIDIRECTIONAL);
+		kfree(mr);
+		return NULL;
+	}
+
+	mr->dma_addr = dma_addr;
+	mr->size = size;
+	mr->addr = buf;
+
+	return mr;
+}
+
+static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
+{
+	mlx5_core_destroy_mkey(mdev, &mr->mkey);
+	dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
+			 DMA_BIDIRECTIONAL);
+	kfree(mr);
+}
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
+{
+	struct dr_qp_init_attr init_attr = {};
+	int cq_size;
+	int size;
+	int ret;
+
+	dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
+	if (!dmn->send_ring)
+		return -ENOMEM;
+
+	cq_size = QUEUE_SIZE + 1;
+	dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
+	if (!dmn->send_ring->cq) {
+		ret = -ENOMEM;
+		goto free_send_ring;
+	}
+
+	init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
+	init_attr.pdn = dmn->pdn;
+	init_attr.uar = dmn->uar;
+	init_attr.max_send_wr = QUEUE_SIZE;
+
+	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
+	if (!dmn->send_ring->qp)  {
+		ret = -ENOMEM;
+		goto clean_cq;
+	}
+
+	dmn->send_ring->cq->qp = dmn->send_ring->qp;
+
+	dmn->info.max_send_wr = QUEUE_SIZE;
+	dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
+					DR_STE_SIZE);
+
+	dmn->send_ring->signal_th = dmn->info.max_send_wr /
+		SIGNAL_PER_DIV_QUEUE;
+
+	/* Prepare qp to be used */
+	ret = dr_prepare_qp_to_rts(dmn);
+	if (ret)
+		goto clean_qp;
+
+	dmn->send_ring->max_post_send_size =
+		mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
+						   DR_ICM_TYPE_STE);
+
+	/* Allocating the max size as a buffer for writing */
+	size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
+	dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
+	if (!dmn->send_ring->buf) {
+		ret = -ENOMEM;
+		goto clean_qp;
+	}
+
+	dmn->send_ring->buf_size = size;
+
+	dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
+				       dmn->pdn, dmn->send_ring->buf, size);
+	if (!dmn->send_ring->mr) {
+		ret = -ENOMEM;
+		goto free_mem;
+	}
+
+	dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
+					    dmn->pdn, dmn->send_ring->sync_buff,
+					    MIN_READ_SYNC);
+	if (!dmn->send_ring->sync_mr) {
+		ret = -ENOMEM;
+		goto clean_mr;
+	}
+
+	return 0;
+
+clean_mr:
+	dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
+free_mem:
+	kfree(dmn->send_ring->buf);
+clean_qp:
+	dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
+clean_cq:
+	dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
+free_send_ring:
+	kfree(dmn->send_ring);
+
+	return ret;
+}
+
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+			   struct mlx5dr_send_ring *send_ring)
+{
+	dr_destroy_qp(dmn->mdev, send_ring->qp);
+	dr_destroy_cq(dmn->mdev, send_ring->cq);
+	dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
+	dr_dereg_mr(dmn->mdev, send_ring->mr);
+	kfree(send_ring->buf);
+	kfree(send_ring);
+}
+
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+	struct postsend_info send_info = {};
+	u8 data[DR_STE_SIZE];
+	int num_of_sends_req;
+	int ret;
+	int i;
+
+	/* Sending this amount of requests makes sure we will get drain */
+	num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
+
+	/* Send fake requests forcing the last to be signaled */
+	send_info.write.addr = (uintptr_t)data;
+	send_info.write.length = DR_STE_SIZE;
+	send_info.write.lkey = 0;
+	/* Using the sync_mr in order to write/read */
+	send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
+	send_info.rkey = send_ring->sync_mr->mkey.key;
+
+	for (i = 0; i < num_of_sends_req; i++) {
+		ret = dr_postsend_icm_data(dmn, &send_info);
+		if (ret)
+			return ret;
+	}
+
+	ret = dr_handle_pending_wc(dmn, send_ring);
+
+	return ret;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
new file mode 100644
index 0000000..3cbf74b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c

@@ -0,0 +1,2312 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include "dr_types.h"
+
+#define DR_STE_CRC_POLY 0xEDB88320L
+#define STE_IPV4 0x1
+#define STE_IPV6 0x2
+#define STE_TCP 0x1
+#define STE_UDP 0x2
+#define STE_SPI 0x3
+#define IP_VERSION_IPV4 0x4
+#define IP_VERSION_IPV6 0x6
+#define STE_SVLAN 0x1
+#define STE_CVLAN 0x2
+
+#define DR_STE_ENABLE_FLOW_TAG BIT(31)
+
+/* Set to STE a specific value using DR_STE_SET */
+#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \
+	if ((spec)->s_fname) { \
+		MLX5_SET(ste_##lookup_type, tag, t_fname, value); \
+		(spec)->s_fname = 0; \
+	} \
+} while (0)
+
+/* Set to STE spec->s_fname to tag->t_fname */
+#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \
+	DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname)
+
+/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */
+#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \
+	DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1)
+
+/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */
+#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \
+	DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname)
+
+#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \
+	MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \
+	MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \
+} while (0)
+
+#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \
+	DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \
+			  in_out##_first_mpls_label);\
+	DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \
+			  in_out##_first_mpls_s_bos); \
+	DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \
+			  in_out##_first_mpls_exp); \
+	DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \
+			  in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \
+	DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \
+		       in_out##_first_mpls_label);\
+	DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \
+		       in_out##_first_mpls_s_bos); \
+	DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \
+		       in_out##_first_mpls_exp); \
+	DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \
+		       in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+	(_misc)->outer_first_mpls_over_gre_label || \
+	(_misc)->outer_first_mpls_over_gre_exp || \
+	(_misc)->outer_first_mpls_over_gre_s_bos || \
+	(_misc)->outer_first_mpls_over_gre_ttl)
+#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+	(_misc)->outer_first_mpls_over_udp_label || \
+	(_misc)->outer_first_mpls_over_udp_exp || \
+	(_misc)->outer_first_mpls_over_udp_s_bos || \
+	(_misc)->outer_first_mpls_over_udp_ttl)
+
+#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \
+	((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \
+		   (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \
+			  MLX5DR_STE_LU_TYPE_##lookup_type##_O)
+
+enum dr_ste_tunl_action {
+	DR_STE_TUNL_ACTION_NONE		= 0,
+	DR_STE_TUNL_ACTION_ENABLE	= 1,
+	DR_STE_TUNL_ACTION_DECAP	= 2,
+	DR_STE_TUNL_ACTION_L3_DECAP	= 3,
+	DR_STE_TUNL_ACTION_POP_VLAN	= 4,
+};
+
+enum dr_ste_action_type {
+	DR_STE_ACTION_TYPE_PUSH_VLAN	= 1,
+	DR_STE_ACTION_TYPE_ENCAP_L3	= 3,
+	DR_STE_ACTION_TYPE_ENCAP	= 4,
+};
+
+struct dr_hw_ste_format {
+	u8 ctrl[DR_STE_SIZE_CTRL];
+	u8 tag[DR_STE_SIZE_TAG];
+	u8 mask[DR_STE_SIZE_MASK];
+};
+
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	u8 masked[DR_STE_SIZE_TAG] = {};
+	u32 crc32, index;
+	u16 bit;
+	int i;
+
+	/* Don't calculate CRC if the result is predicted */
+	if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0)
+		return 0;
+
+	/* Mask tag using byte mask, bit per byte */
+	bit = 1 << (DR_STE_SIZE_TAG - 1);
+	for (i = 0; i < DR_STE_SIZE_TAG; i++) {
+		if (htbl->byte_mask & bit)
+			masked[i] = hw_ste->tag[i];
+
+		bit = bit >> 1;
+	}
+
+	crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG);
+	index = crc32 & (htbl->chunk->num_of_entries - 1);
+
+	return index;
+}
+
+static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
+{
+	u16 byte_mask = 0;
+	int i;
+
+	for (i = 0; i < DR_STE_SIZE_MASK; i++) {
+		byte_mask = byte_mask << 1;
+		if (bit_mask[i] == 0xff)
+			byte_mask |= 1;
+	}
+	return byte_mask;
+}
+
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+	memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK);
+}
+
+void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
+{
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
+		 DR_STE_ENABLE_FLOW_TAG | flow_tag);
+}
+
+void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+	/* This can be used for both rx_steering_mult and for sx_transmit */
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id);
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16);
+}
+
+void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p)
+{
+	MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1);
+}
+
+void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
+				 bool go_back)
+{
+	MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+		 DR_STE_ACTION_TYPE_PUSH_VLAN);
+	MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
+	/* Due to HW limitation we need to set this bit, otherwise reforamt +
+	 * push vlan will not work.
+	 */
+	if (go_back)
+		mlx5dr_ste_set_go_back_bit(hw_ste_p);
+}
+
+void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3)
+{
+	MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+		 encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP);
+	/* The hardware expects here size in words (2 byte) */
+	MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2);
+	MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id);
+}
+
+void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p)
+{
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+		 DR_STE_TUNL_ACTION_DECAP);
+}
+
+void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p)
+{
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+		 DR_STE_TUNL_ACTION_POP_VLAN);
+}
+
+void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
+{
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+		 DR_STE_TUNL_ACTION_L3_DECAP);
+	MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
+}
+
+void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+	MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+}
+
+u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p)
+{
+	return MLX5_GET(ste_general, hw_ste_p, entry_type);
+}
+
+void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+				    u32 re_write_index)
+{
+	MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions,
+		 num_of_actions);
+	MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer,
+		 re_write_index);
+}
+
+void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+	MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type,
+		     u16 gvmi)
+{
+	MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+	MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type);
+	MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+	/* Set GVMI once, this is the same for RX/TX
+	 * bits 63_48 of next table base / miss address encode the next GVMI
+	 */
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi);
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi);
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste)
+{
+	memset(&hw_ste->tag, 0, sizeof(hw_ste->tag));
+	memset(&hw_ste->mask, 0, sizeof(hw_ste->mask));
+}
+
+static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste)
+{
+	hw_ste->tag[0] = 0xdc;
+	hw_ste->mask[0] = 0;
+}
+
+u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste)
+{
+	u64 index =
+		(MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) |
+		 MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26);
+
+	return index << 6;
+}
+
+void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size)
+{
+	u64 index = (icm_addr >> 5) | ht_size;
+
+	MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27);
+	MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index);
+}
+
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste)
+{
+	u32 index = ste - ste->htbl->ste_arr;
+
+	return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index;
+}
+
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste)
+{
+	u32 index = ste - ste->htbl->ste_arr;
+
+	return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index;
+}
+
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste)
+{
+	u32 index = ste - ste->htbl->ste_arr;
+
+	return &ste->htbl->miss_list[index];
+}
+
+static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
+				   struct mlx5dr_ste_htbl *next_htbl)
+{
+	struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+	u8 *hw_ste = ste->hw_ste;
+
+	MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask);
+	MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type);
+	mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+
+	dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste);
+}
+
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+				u8 ste_location)
+{
+	return ste_location == nic_matcher->num_of_builders;
+}
+
+/* Replace relevant fields, except of:
+ * htbl - keep the origin htbl
+ * miss_list + list - already took the src from the list.
+ * icm_addr/mr_addr - depends on the hosting table.
+ *
+ * Before:
+ * | a | -> | b | -> | c | ->
+ *
+ * After:
+ * | a | -> | c | ->
+ * While the data that was in b copied to a.
+ */
+static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
+{
+	memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED);
+	dst->next_htbl = src->next_htbl;
+	if (dst->next_htbl)
+		dst->next_htbl->pointing_ste = dst;
+
+	refcount_set(&dst->refcount, refcount_read(&src->refcount));
+
+	INIT_LIST_HEAD(&dst->rule_list);
+	list_splice_tail_init(&src->rule_list, &dst->rule_list);
+}
+
+/* Free ste which is the head and the only one in miss_list */
+static void
+dr_ste_remove_head_ste(struct mlx5dr_ste *ste,
+		       struct mlx5dr_matcher_rx_tx *nic_matcher,
+		       struct mlx5dr_ste_send_info *ste_info_head,
+		       struct list_head *send_ste_list,
+		       struct mlx5dr_ste_htbl *stats_tbl)
+{
+	u8 tmp_data_ste[DR_STE_SIZE] = {};
+	struct mlx5dr_ste tmp_ste = {};
+	u64 miss_addr;
+
+	tmp_ste.hw_ste = tmp_data_ste;
+
+	/* Use temp ste because dr_ste_always_miss_addr
+	 * touches bit_mask area which doesn't exist at ste->hw_ste.
+	 */
+	memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
+	miss_addr = nic_matcher->e_anchor->chunk->icm_addr;
+	mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr);
+	memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED);
+
+	list_del_init(&ste->miss_list_node);
+
+	/* Write full STE size in order to have "always_miss" */
+	mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE,
+						  0, tmp_data_ste,
+						  ste_info_head,
+						  send_ste_list,
+						  true /* Copy data */);
+
+	stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste which is the head but NOT the only one in miss_list:
+ * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0
+ */
+static void
+dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste,
+			struct mlx5dr_ste_send_info *ste_info_head,
+			struct list_head *send_ste_list,
+			struct mlx5dr_ste_htbl *stats_tbl)
+
+{
+	struct mlx5dr_ste_htbl *next_miss_htbl;
+
+	next_miss_htbl = next_ste->htbl;
+
+	/* Remove from the miss_list the next_ste before copy */
+	list_del_init(&next_ste->miss_list_node);
+
+	/* All rule-members that use next_ste should know about that */
+	mlx5dr_rule_update_rule_member(next_ste, ste);
+
+	/* Move data from next into ste */
+	dr_ste_replace(ste, next_ste);
+
+	/* Del the htbl that contains the next_ste.
+	 * The origin htbl stay with the same number of entries.
+	 */
+	mlx5dr_htbl_put(next_miss_htbl);
+
+	mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED,
+						  0, ste->hw_ste,
+						  ste_info_head,
+						  send_ste_list,
+						  true /* Copy data */);
+
+	stats_tbl->ctrl.num_of_collisions--;
+	stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste that is located in the middle of the miss list:
+ * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_|
+ */
+static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
+				     struct mlx5dr_ste_send_info *ste_info,
+				     struct list_head *send_ste_list,
+				     struct mlx5dr_ste_htbl *stats_tbl)
+{
+	struct mlx5dr_ste *prev_ste;
+	u64 miss_addr;
+
+	prev_ste = list_prev_entry(ste, miss_list_node);
+	if (WARN_ON(!prev_ste))
+		return;
+
+	miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste);
+	mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr);
+
+	mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0,
+						  prev_ste->hw_ste, ste_info,
+						  send_ste_list, true /* Copy data*/);
+
+	list_del_init(&ste->miss_list_node);
+
+	stats_tbl->ctrl.num_of_valid_entries--;
+	stats_tbl->ctrl.num_of_collisions--;
+}
+
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+		     struct mlx5dr_matcher *matcher,
+		     struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+	struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_ste_send_info ste_info_head;
+	struct mlx5dr_ste *next_ste, *first_ste;
+	bool put_on_origin_table = true;
+	struct mlx5dr_ste_htbl *stats_tbl;
+	LIST_HEAD(send_ste_list);
+
+	first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste),
+				     struct mlx5dr_ste, miss_list_node);
+	stats_tbl = first_ste->htbl;
+
+	/* Two options:
+	 * 1. ste is head:
+	 *	a. head ste is the only ste in the miss list
+	 *	b. head ste is not the only ste in the miss-list
+	 * 2. ste is not head
+	 */
+	if (first_ste == ste) { /* Ste is the head */
+		struct mlx5dr_ste *last_ste;
+
+		last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste),
+					   struct mlx5dr_ste, miss_list_node);
+		if (last_ste == first_ste)
+			next_ste = NULL;
+		else
+			next_ste = list_next_entry(ste, miss_list_node);
+
+		if (!next_ste) {
+			/* One and only entry in the list */
+			dr_ste_remove_head_ste(ste, nic_matcher,
+					       &ste_info_head,
+					       &send_ste_list,
+					       stats_tbl);
+		} else {
+			/* First but not only entry in the list */
+			dr_ste_replace_head_ste(ste, next_ste, &ste_info_head,
+						&send_ste_list, stats_tbl);
+			put_on_origin_table = false;
+		}
+	} else { /* Ste in the middle of the list */
+		dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl);
+	}
+
+	/* Update HW */
+	list_for_each_entry_safe(cur_ste_info, tmp_ste_info,
+				 &send_ste_list, send_list) {
+		list_del(&cur_ste_info->send_list);
+		mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste,
+					 cur_ste_info->data, cur_ste_info->size,
+					 cur_ste_info->offset);
+	}
+
+	if (put_on_origin_table)
+		mlx5dr_htbl_put(ste->htbl);
+}
+
+bool mlx5dr_ste_equal_tag(void *src, void *dst)
+{
+	struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src;
+	struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst;
+
+	return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG);
+}
+
+void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
+					  struct mlx5dr_ste_htbl *next_htbl)
+{
+	struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+
+	mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+}
+
+void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+	u64 index = miss_addr >> 6;
+
+	/* Miss address for TX and RX STEs located in the same offsets */
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26);
+	MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index);
+}
+
+void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr)
+{
+	u8 *hw_ste = ste->hw_ste;
+
+	MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
+	mlx5dr_ste_set_miss_addr(hw_ste, miss_addr);
+	dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
+}
+
+/* The assumption here is that we don't update the ste->hw_ste if it is not
+ * used ste, so it will be all zero, checking the next_lu_type.
+ */
+bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste;
+
+	if (MLX5_GET(ste_general, hw_ste, next_lu_type) ==
+	    MLX5DR_STE_LU_TYPE_NOP)
+		return true;
+
+	return false;
+}
+
+bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
+{
+	return !refcount_read(&ste->refcount);
+}
+
+/* Init one ste as a pattern for ste data array */
+void mlx5dr_ste_set_formatted_ste(u16 gvmi,
+				  struct mlx5dr_domain_rx_tx *nic_dmn,
+				  struct mlx5dr_ste_htbl *htbl,
+				  u8 *formatted_ste,
+				  struct mlx5dr_htbl_connect_info *connect_info)
+{
+	struct mlx5dr_ste ste = {};
+
+	mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi);
+	ste.hw_ste = formatted_ste;
+
+	if (connect_info->type == CONNECT_HIT)
+		dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl);
+	else
+		mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr);
+}
+
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+				      struct mlx5dr_domain_rx_tx *nic_dmn,
+				      struct mlx5dr_ste_htbl *htbl,
+				      struct mlx5dr_htbl_connect_info *connect_info,
+				      bool update_hw_ste)
+{
+	u8 formatted_ste[DR_STE_SIZE] = {};
+
+	mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
+				     nic_dmn,
+				     htbl,
+				     formatted_ste,
+				     connect_info);
+
+	return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste);
+}
+
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+				struct mlx5dr_matcher_rx_tx *nic_matcher,
+				struct mlx5dr_ste *ste,
+				u8 *cur_hw_ste,
+				enum mlx5dr_icm_chunk_size log_table_size)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste;
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_htbl_connect_info info;
+	struct mlx5dr_ste_htbl *next_htbl;
+
+	if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) {
+		u8 next_lu_type;
+		u16 byte_mask;
+
+		next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type);
+		byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask);
+
+		next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+						  log_table_size,
+						  next_lu_type,
+						  byte_mask);
+		if (!next_htbl) {
+			mlx5dr_dbg(dmn, "Failed allocating table\n");
+			return -ENOMEM;
+		}
+
+		/* Write new table to HW */
+		info.type = CONNECT_MISS;
+		info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+		if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl,
+						      &info, false)) {
+			mlx5dr_info(dmn, "Failed writing table to HW\n");
+			goto free_table;
+		}
+
+		mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl);
+		ste->next_htbl = next_htbl;
+		next_htbl->pointing_ste = ste;
+	}
+
+	return 0;
+
+free_table:
+	mlx5dr_ste_htbl_free(next_htbl);
+	return -ENOENT;
+}
+
+static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
+{
+	struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+	int num_of_entries;
+
+	htbl->ctrl.may_grow = true;
+
+	if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+		htbl->ctrl.may_grow = false;
+
+	/* Threshold is 50%, one is added to table of size 1 */
+	num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
+	ctrl->increase_threshold = (num_of_entries + 1) / 2;
+}
+
+struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+					      enum mlx5dr_icm_chunk_size chunk_size,
+					      u8 lu_type, u16 byte_mask)
+{
+	struct mlx5dr_icm_chunk *chunk;
+	struct mlx5dr_ste_htbl *htbl;
+	int i;
+
+	htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
+	if (!htbl)
+		return NULL;
+
+	chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size);
+	if (!chunk)
+		goto out_free_htbl;
+
+	htbl->chunk = chunk;
+	htbl->lu_type = lu_type;
+	htbl->byte_mask = byte_mask;
+	htbl->ste_arr = chunk->ste_arr;
+	htbl->hw_ste_arr = chunk->hw_ste_arr;
+	htbl->miss_list = chunk->miss_list;
+	refcount_set(&htbl->refcount, 0);
+
+	for (i = 0; i < chunk->num_of_entries; i++) {
+		struct mlx5dr_ste *ste = &htbl->ste_arr[i];
+
+		ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+		ste->htbl = htbl;
+		refcount_set(&ste->refcount, 0);
+		INIT_LIST_HEAD(&ste->miss_list_node);
+		INIT_LIST_HEAD(&htbl->miss_list[i]);
+		INIT_LIST_HEAD(&ste->rule_list);
+	}
+
+	htbl->chunk_size = chunk_size;
+	dr_ste_set_ctrl(htbl);
+	return htbl;
+
+out_free_htbl:
+	kfree(htbl);
+	return NULL;
+}
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl)
+{
+	if (refcount_read(&htbl->refcount))
+		return -EBUSY;
+
+	mlx5dr_icm_free_chunk(htbl->chunk);
+	kfree(htbl);
+	return 0;
+}
+
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+			       u8 match_criteria,
+			       struct mlx5dr_match_param *mask,
+			       struct mlx5dr_match_param *value)
+{
+	if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
+		if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
+			mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+			     struct mlx5dr_matcher_rx_tx *nic_matcher,
+			     struct mlx5dr_match_param *value,
+			     u8 *ste_arr)
+{
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	struct mlx5dr_ste_build *sb;
+	int ret, i;
+
+	ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+					 &matcher->mask, value);
+	if (ret)
+		return ret;
+
+	sb = nic_matcher->ste_builder;
+	for (i = 0; i < nic_matcher->num_of_builders; i++) {
+		mlx5dr_ste_init(ste_arr,
+				sb->lu_type,
+				nic_dmn->ste_type,
+				dmn->info.caps.gvmi);
+
+		mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
+
+		ret = sb->ste_build_tag_func(value, sb, ste_arr);
+		if (ret)
+			return ret;
+
+		/* Connect the STEs */
+		if (i < (nic_matcher->num_of_builders - 1)) {
+			/* Need the next builder for these fields,
+			 * not relevant for the last ste in the chain.
+			 */
+			sb++;
+			MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type);
+			MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask);
+		}
+		ste_arr += DR_STE_SIZE;
+	}
+	return 0;
+}
+
+static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
+						bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+	DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+	if (mask->smac_47_16 || mask->smac_15_0) {
+		MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32,
+			 mask->smac_47_16 >> 16);
+		MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0,
+			 mask->smac_47_16 << 16 | mask->smac_15_0);
+		mask->smac_47_16 = 0;
+		mask->smac_15_0 = 0;
+	}
+
+	DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid);
+	DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi);
+	DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio);
+	DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version);
+
+	if (mask->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+		mask->cvlan_tag = 0;
+	} else if (mask->svlan_tag) {
+		MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+		mask->svlan_tag = 0;
+	}
+
+	if (mask->cvlan_tag || mask->svlan_tag) {
+		pr_info("Invalid c/svlan mask configuration\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
+{
+	spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present);
+	spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present);
+	spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present);
+	spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port);
+	spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn);
+
+	spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port);
+	spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask,
+						      source_eswitch_owner_vhca_id);
+
+	spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio);
+	spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi);
+	spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid);
+	spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio);
+	spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi);
+	spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid);
+
+	spec->outer_second_cvlan_tag =
+		MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag);
+	spec->inner_second_cvlan_tag =
+		MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag);
+	spec->outer_second_svlan_tag =
+		MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag);
+	spec->inner_second_svlan_tag =
+		MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag);
+
+	spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol);
+
+	spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi);
+	spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo);
+
+	spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni);
+
+	spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni);
+	spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam);
+
+	spec->outer_ipv6_flow_label =
+		MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label);
+
+	spec->inner_ipv6_flow_label =
+		MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label);
+
+	spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len);
+	spec->geneve_protocol_type =
+		MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type);
+
+	spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp);
+}
+
+static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec)
+{
+	u32 raw_ip[4];
+
+	spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16);
+
+	spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0);
+	spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype);
+
+	spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16);
+
+	spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0);
+	spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio);
+	spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi);
+	spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid);
+
+	spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol);
+	spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp);
+	spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn);
+	spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag);
+	spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag);
+	spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag);
+	spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version);
+	spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags);
+	spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport);
+	spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport);
+
+	spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit);
+
+	spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport);
+	spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport);
+
+	memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+				    sizeof(raw_ip));
+
+	spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]);
+	spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]);
+	spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]);
+	spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]);
+
+	memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+				    sizeof(raw_ip));
+
+	spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]);
+	spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]);
+	spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]);
+	spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]);
+}
+
+static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec)
+{
+	spec->outer_first_mpls_label =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label);
+	spec->outer_first_mpls_exp =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp);
+	spec->outer_first_mpls_s_bos =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos);
+	spec->outer_first_mpls_ttl =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl);
+	spec->inner_first_mpls_label =
+		MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label);
+	spec->inner_first_mpls_exp =
+		MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp);
+	spec->inner_first_mpls_s_bos =
+		MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos);
+	spec->inner_first_mpls_ttl =
+		MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl);
+	spec->outer_first_mpls_over_gre_label =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label);
+	spec->outer_first_mpls_over_gre_exp =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp);
+	spec->outer_first_mpls_over_gre_s_bos =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos);
+	spec->outer_first_mpls_over_gre_ttl =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl);
+	spec->outer_first_mpls_over_udp_label =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label);
+	spec->outer_first_mpls_over_udp_exp =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp);
+	spec->outer_first_mpls_over_udp_s_bos =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos);
+	spec->outer_first_mpls_over_udp_ttl =
+		MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl);
+	spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7);
+	spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6);
+	spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5);
+	spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4);
+	spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3);
+	spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2);
+	spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1);
+	spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0);
+	spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a);
+	spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b);
+}
+
+static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
+{
+	spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num);
+	spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num);
+	spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num);
+	spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num);
+	spec->outer_vxlan_gpe_vni =
+		MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni);
+	spec->outer_vxlan_gpe_next_protocol =
+		MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol);
+	spec->outer_vxlan_gpe_flags =
+		MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags);
+	spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data);
+	spec->icmpv6_header_data =
+		MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data);
+	spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type);
+	spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code);
+	spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type);
+	spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
+}
+
+void mlx5dr_ste_copy_param(u8 match_criteria,
+			   struct mlx5dr_match_param *set_param,
+			   struct mlx5dr_match_parameters *mask)
+{
+	u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {};
+	u8 *data = (u8 *)mask->match_buf;
+	size_t param_location;
+	void *buff;
+
+	if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+		if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) {
+			memcpy(tail_param, data, mask->match_sz);
+			buff = tail_param;
+		} else {
+			buff = mask->match_buf;
+		}
+		dr_ste_copy_mask_spec(buff, &set_param->outer);
+	}
+	param_location = sizeof(struct mlx5dr_match_spec);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+		if (mask->match_sz < param_location +
+		    sizeof(struct mlx5dr_match_misc)) {
+			memcpy(tail_param, data + param_location,
+			       mask->match_sz - param_location);
+			buff = tail_param;
+		} else {
+			buff = data + param_location;
+		}
+		dr_ste_copy_mask_misc(buff, &set_param->misc);
+	}
+	param_location += sizeof(struct mlx5dr_match_misc);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+		if (mask->match_sz < param_location +
+		    sizeof(struct mlx5dr_match_spec)) {
+			memcpy(tail_param, data + param_location,
+			       mask->match_sz - param_location);
+			buff = tail_param;
+		} else {
+			buff = data + param_location;
+		}
+		dr_ste_copy_mask_spec(buff, &set_param->inner);
+	}
+	param_location += sizeof(struct mlx5dr_match_spec);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+		if (mask->match_sz < param_location +
+		    sizeof(struct mlx5dr_match_misc2)) {
+			memcpy(tail_param, data + param_location,
+			       mask->match_sz - param_location);
+			buff = tail_param;
+		} else {
+			buff = data + param_location;
+		}
+		dr_ste_copy_mask_misc2(buff, &set_param->misc2);
+	}
+
+	param_location += sizeof(struct mlx5dr_match_misc2);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+		if (mask->match_sz < param_location +
+		    sizeof(struct mlx5dr_match_misc3)) {
+			memcpy(tail_param, data + param_location,
+			       mask->match_sz - param_location);
+			buff = tail_param;
+		} else {
+			buff = data + param_location;
+		}
+		dr_ste_copy_mask_misc3(buff, &set_param->misc3);
+	}
+}
+
+static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   u8 *hw_ste_p)
+{
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
+	DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+	if (spec->smac_47_16 || spec->smac_15_0) {
+		MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32,
+			 spec->smac_47_16 >> 16);
+		MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0,
+			 spec->smac_47_16 << 16 | spec->smac_15_0);
+		spec->smac_47_16 = 0;
+		spec->smac_15_0 = 0;
+	}
+
+	if (spec->ip_version) {
+		if (spec->ip_version == IP_VERSION_IPV4) {
+			MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4);
+			spec->ip_version = 0;
+		} else if (spec->ip_version == IP_VERSION_IPV6) {
+			MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6);
+			spec->ip_version = 0;
+		} else {
+			pr_info("Unsupported ip_version value\n");
+			return -EINVAL;
+		}
+	}
+
+	DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid);
+	DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi);
+	DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio);
+
+	if (spec->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN);
+		spec->cvlan_tag = 0;
+	} else if (spec->svlan_tag) {
+		MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN);
+		spec->svlan_tag = 0;
+	}
+	return 0;
+}
+
+int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx)
+{
+	int ret;
+
+	ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
+	if (ret)
+		return ret;
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag;
+
+	return 0;
+}
+
+static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value,
+						  bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0);
+}
+
+static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+					    struct mlx5dr_ste_build *sb,
+					    u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+	DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx)
+{
+	dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value,
+						  bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32);
+	DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0);
+}
+
+static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+					    struct mlx5dr_ste_build *sb,
+					    u8 *hw_ste_p)
+{
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+	DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx)
+{
+	dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value,
+						      bool inner,
+						      u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  destination_address, mask, dst_ip_31_0);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  source_address, mask, src_ip_31_0);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  destination_port, mask, tcp_dport);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  destination_port, mask, udp_dport);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  source_port, mask, tcp_sport);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  source_port, mask, udp_sport);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  protocol, mask, ip_protocol);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  fragmented, mask, frag);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  dscp, mask, ip_dscp);
+	DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+			  ecn, mask, ip_ecn);
+
+	if (mask->tcp_flags) {
+		DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask);
+		mask->tcp_flags = 0;
+	}
+}
+
+static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+						struct mlx5dr_ste_build *sb,
+						u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp);
+	DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn);
+
+	if (spec->tcp_flags) {
+		DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec);
+		spec->tcp_flags = 0;
+	}
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+					  struct mlx5dr_match_param *mask,
+					  bool inner, bool rx)
+{
+	dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void
+dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+					bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+	struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid);
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi);
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio);
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag);
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype);
+	DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version);
+
+	if (mask->svlan_tag || mask->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1);
+		mask->cvlan_tag = 0;
+		mask->svlan_tag = 0;
+	}
+
+	if (inner) {
+		if (misc_mask->inner_second_cvlan_tag ||
+		    misc_mask->inner_second_svlan_tag) {
+			MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+			misc_mask->inner_second_cvlan_tag = 0;
+			misc_mask->inner_second_svlan_tag = 0;
+		}
+
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_vlan_id, misc_mask, inner_second_vid);
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_cfi, misc_mask, inner_second_cfi);
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_priority, misc_mask, inner_second_prio);
+	} else {
+		if (misc_mask->outer_second_cvlan_tag ||
+		    misc_mask->outer_second_svlan_tag) {
+			MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+			misc_mask->outer_second_cvlan_tag = 0;
+			misc_mask->outer_second_svlan_tag = 0;
+		}
+
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_vlan_id, misc_mask, outer_second_vid);
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_cfi, misc_mask, outer_second_cfi);
+		DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+				  second_priority, misc_mask, outer_second_prio);
+	}
+}
+
+static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+					      bool inner, u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+	struct mlx5dr_match_misc *misc_spec = &value->misc;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
+	DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
+	DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio);
+	DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag);
+	DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype);
+
+	if (spec->ip_version) {
+		if (spec->ip_version == IP_VERSION_IPV4) {
+			MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4);
+			spec->ip_version = 0;
+		} else if (spec->ip_version == IP_VERSION_IPV6) {
+			MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6);
+			spec->ip_version = 0;
+		} else {
+			pr_info("Unsupported ip_version value\n");
+			return -EINVAL;
+		}
+	}
+
+	if (spec->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN);
+		spec->cvlan_tag = 0;
+	} else if (spec->svlan_tag) {
+		MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN);
+		spec->svlan_tag = 0;
+	}
+
+	if (inner) {
+		if (misc_spec->inner_second_cvlan_tag) {
+			MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+			misc_spec->inner_second_cvlan_tag = 0;
+		} else if (misc_spec->inner_second_svlan_tag) {
+			MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+			misc_spec->inner_second_svlan_tag = 0;
+		}
+
+		DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid);
+		DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi);
+		DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio);
+	} else {
+		if (misc_spec->outer_second_cvlan_tag) {
+			MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+			misc_spec->outer_second_cvlan_tag = 0;
+		} else if (misc_spec->outer_second_svlan_tag) {
+			MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+			misc_spec->outer_second_svlan_tag = 0;
+		}
+		DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid);
+		DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi);
+		DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio);
+	}
+
+	return 0;
+}
+
+static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+					     bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16);
+	DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0);
+
+	dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
+	DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
+
+	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+}
+
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx)
+{
+	dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask);
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag;
+}
+
+static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+					     bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+	DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+	dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
+	DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+	return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+}
+
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx)
+{
+	dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag;
+}
+
+static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+					     bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+	struct mlx5dr_match_misc *misc = &value->misc;
+
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag);
+	DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype);
+	DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version);
+
+	if (misc->vxlan_vni) {
+		MLX5_SET(ste_eth_l2_tnl, bit_mask,
+			 l2_tunneling_network_id, (misc->vxlan_vni << 8));
+		misc->vxlan_vni = 0;
+	}
+
+	if (mask->svlan_tag || mask->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1);
+		mask->cvlan_tag = 0;
+		mask->svlan_tag = 0;
+	}
+}
+
+static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc *misc = &value->misc;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio);
+	DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype);
+
+	if (misc->vxlan_vni) {
+		MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id,
+			 (misc->vxlan_vni << 8));
+		misc->vxlan_vni = 0;
+	}
+
+	if (spec->cvlan_tag) {
+		MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN);
+		spec->cvlan_tag = 0;
+	} else if (spec->svlan_tag) {
+		MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN);
+		spec->svlan_tag = 0;
+	}
+
+	if (spec->ip_version) {
+		if (spec->ip_version == IP_VERSION_IPV4) {
+			MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4);
+			spec->ip_version = 0;
+		} else if (spec->ip_version == IP_VERSION_IPV6) {
+			MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6);
+			spec->ip_version = 0;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask, bool inner, bool rx)
+{
+	dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value,
+						   bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit);
+}
+
+static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+					     struct mlx5dr_ste_build *sb,
+					     u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+				       struct mlx5dr_match_param *mask,
+				       bool inner, bool rx)
+{
+	dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag;
+}
+
+static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value,
+					     bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn);
+	DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit);
+
+	if (mask->tcp_flags) {
+		DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask);
+		mask->tcp_flags = 0;
+	}
+}
+
+static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
+	DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
+	DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport);
+	DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport);
+	DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol);
+	DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag);
+	DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp);
+	DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
+	DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+	if (spec->tcp_flags) {
+		DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
+		spec->tcp_flags = 0;
+	}
+
+	return 0;
+}
+
+void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx)
+{
+	dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag;
+}
+
+static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
+					     struct mlx5dr_ste_build *sb,
+					     u8 *hw_ste_p)
+{
+	return 0;
+}
+
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx)
+{
+	sb->rx = rx;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE;
+	sb->byte_mask = 0;
+	sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag;
+}
+
+static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value,
+				       bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
+
+	if (inner)
+		DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask);
+	else
+		DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask);
+}
+
+static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value,
+				 struct mlx5dr_ste_build *sb,
+				 u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
+	u8 *tag = hw_ste->tag;
+
+	if (sb->inner)
+		DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag);
+	else
+		DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+			   struct mlx5dr_match_param *mask,
+			   bool inner, bool rx)
+{
+	dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_mpls_tag;
+}
+
+static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value,
+				      bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol);
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present);
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h);
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l);
+
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present);
+	DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present);
+}
+
+static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value,
+				struct mlx5dr_ste_build *sb,
+				u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct  mlx5dr_match_misc *misc = &value->misc;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
+
+	DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present);
+	DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h);
+	DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l);
+
+	DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present);
+
+	DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
+			  struct mlx5dr_match_param *mask, bool inner, bool rx)
+{
+	dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_GRE;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_gre_tag;
+}
+
+static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value,
+						bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+	if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
+				  misc_2_mask, outer_first_mpls_over_gre_label);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
+				  misc_2_mask, outer_first_mpls_over_gre_exp);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
+				  misc_2_mask, outer_first_mpls_over_gre_s_bos);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
+				  misc_2_mask, outer_first_mpls_over_gre_ttl);
+	} else {
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
+				  misc_2_mask, outer_first_mpls_over_udp_label);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
+				  misc_2_mask, outer_first_mpls_over_udp_exp);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
+				  misc_2_mask, outer_first_mpls_over_udp_s_bos);
+
+		DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
+				  misc_2_mask, outer_first_mpls_over_udp_ttl);
+	}
+}
+
+static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value,
+					  struct mlx5dr_ste_build *sb,
+					  u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+	u8 *tag = hw_ste->tag;
+
+	if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
+			       misc_2_mask, outer_first_mpls_over_gre_label);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
+			       misc_2_mask, outer_first_mpls_over_gre_exp);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
+			       misc_2_mask, outer_first_mpls_over_gre_s_bos);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
+			       misc_2_mask, outer_first_mpls_over_gre_ttl);
+	} else {
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
+			       misc_2_mask, outer_first_mpls_over_udp_label);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
+			       misc_2_mask, outer_first_mpls_over_udp_exp);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
+			       misc_2_mask, outer_first_mpls_over_udp_s_bos);
+
+		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
+			       misc_2_mask, outer_first_mpls_over_udp_ttl);
+	}
+	return 0;
+}
+
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx)
+{
+	dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag;
+}
+
+#define ICMP_TYPE_OFFSET_FIRST_DW		24
+#define ICMP_CODE_OFFSET_FIRST_DW		16
+#define ICMP_HEADER_DATA_OFFSET_SECOND_DW	0
+
+static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask,
+					       struct mlx5dr_cmd_caps *caps,
+					       u8 *bit_mask)
+{
+	struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3;
+	bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask);
+	u32 icmp_header_data_mask;
+	u32 icmp_type_mask;
+	u32 icmp_code_mask;
+	int dw0_location;
+	int dw1_location;
+
+	if (is_ipv4_mask) {
+		icmp_header_data_mask	= misc_3_mask->icmpv4_header_data;
+		icmp_type_mask		= misc_3_mask->icmpv4_type;
+		icmp_code_mask		= misc_3_mask->icmpv4_code;
+		dw0_location		= caps->flex_parser_id_icmp_dw0;
+		dw1_location		= caps->flex_parser_id_icmp_dw1;
+	} else {
+		icmp_header_data_mask	= misc_3_mask->icmpv6_header_data;
+		icmp_type_mask		= misc_3_mask->icmpv6_type;
+		icmp_code_mask		= misc_3_mask->icmpv6_code;
+		dw0_location		= caps->flex_parser_id_icmpv6_dw0;
+		dw1_location		= caps->flex_parser_id_icmpv6_dw1;
+	}
+
+	switch (dw0_location) {
+	case 4:
+		if (icmp_type_mask) {
+			MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
+				 (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW));
+			if (is_ipv4_mask)
+				misc_3_mask->icmpv4_type = 0;
+			else
+				misc_3_mask->icmpv6_type = 0;
+		}
+		if (icmp_code_mask) {
+			u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask,
+					       flex_parser_4);
+			MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
+				 cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW));
+			if (is_ipv4_mask)
+				misc_3_mask->icmpv4_code = 0;
+			else
+				misc_3_mask->icmpv6_code = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dw1_location) {
+	case 5:
+		if (icmp_header_data_mask) {
+			MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5,
+				 (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
+			if (is_ipv4_mask)
+				misc_3_mask->icmpv4_header_data = 0;
+			else
+				misc_3_mask->icmpv6_header_data = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value,
+					  struct mlx5dr_ste_build *sb,
+					  u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
+	u8 *tag = hw_ste->tag;
+	u32 icmp_header_data;
+	int dw0_location;
+	int dw1_location;
+	u32 icmp_type;
+	u32 icmp_code;
+	bool is_ipv4;
+
+	is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3);
+	if (is_ipv4) {
+		icmp_header_data	= misc_3->icmpv4_header_data;
+		icmp_type		= misc_3->icmpv4_type;
+		icmp_code		= misc_3->icmpv4_code;
+		dw0_location		= sb->caps->flex_parser_id_icmp_dw0;
+		dw1_location		= sb->caps->flex_parser_id_icmp_dw1;
+	} else {
+		icmp_header_data	= misc_3->icmpv6_header_data;
+		icmp_type		= misc_3->icmpv6_type;
+		icmp_code		= misc_3->icmpv6_code;
+		dw0_location		= sb->caps->flex_parser_id_icmpv6_dw0;
+		dw1_location		= sb->caps->flex_parser_id_icmpv6_dw1;
+	}
+
+	switch (dw0_location) {
+	case 4:
+		if (icmp_type) {
+			MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
+				 (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW));
+			if (is_ipv4)
+				misc_3->icmpv4_type = 0;
+			else
+				misc_3->icmpv6_type = 0;
+		}
+
+		if (icmp_code) {
+			u32 cur_val = MLX5_GET(ste_flex_parser_1, tag,
+					       flex_parser_4);
+			MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
+				 cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW));
+			if (is_ipv4)
+				misc_3->icmpv4_code = 0;
+			else
+				misc_3->icmpv6_code = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dw1_location) {
+	case 5:
+		if (icmp_header_data) {
+			MLX5_SET(ste_flex_parser_1, tag, flex_parser_5,
+				 (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
+			if (is_ipv4)
+				misc_3->icmpv4_header_data = 0;
+			else
+				misc_3->icmpv6_header_data = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
+				   struct mlx5dr_match_param *mask,
+				   struct mlx5dr_cmd_caps *caps,
+				   bool inner, bool rx)
+{
+	int ret;
+
+	ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask);
+	if (ret)
+		return ret;
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->caps = caps;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag;
+
+	return 0;
+}
+
+static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value,
+						  bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+	DR_STE_SET_MASK_V(general_purpose, bit_mask,
+			  general_purpose_lookup_field, misc_2_mask,
+			  metadata_reg_a);
+}
+
+static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value,
+					    struct mlx5dr_ste_build *sb,
+					    u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+		       misc_2_mask, metadata_reg_a);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx)
+{
+	dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag;
+}
+
+static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value,
+					      bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
+
+	if (inner) {
+		DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
+				  inner_tcp_seq_num);
+		DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
+				  inner_tcp_ack_num);
+	} else {
+		DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
+				  outer_tcp_seq_num);
+		DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
+				  outer_tcp_ack_num);
+	}
+}
+
+static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+					struct mlx5dr_ste_build *sb,
+					u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+	u8 *tag = hw_ste->tag;
+
+	if (sb->inner) {
+		DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
+		DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num);
+	} else {
+		DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num);
+		DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num);
+	}
+
+	return 0;
+}
+
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+				  struct mlx5dr_match_param *mask,
+				  bool inner, bool rx)
+{
+	dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner);
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag;
+}
+
+static void dr_ste_build_flex_parser_tnl_bit_mask(struct mlx5dr_match_param *value,
+						  bool inner, u8 *bit_mask)
+{
+	struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
+
+	if (misc_3_mask->outer_vxlan_gpe_flags ||
+	    misc_3_mask->outer_vxlan_gpe_next_protocol) {
+		MLX5_SET(ste_flex_parser_tnl, bit_mask,
+			 flex_parser_tunneling_header_63_32,
+			 (misc_3_mask->outer_vxlan_gpe_flags << 24) |
+			 (misc_3_mask->outer_vxlan_gpe_next_protocol));
+		misc_3_mask->outer_vxlan_gpe_flags = 0;
+		misc_3_mask->outer_vxlan_gpe_next_protocol = 0;
+	}
+
+	if (misc_3_mask->outer_vxlan_gpe_vni) {
+		MLX5_SET(ste_flex_parser_tnl, bit_mask,
+			 flex_parser_tunneling_header_31_0,
+			 misc_3_mask->outer_vxlan_gpe_vni << 8);
+		misc_3_mask->outer_vxlan_gpe_vni = 0;
+	}
+}
+
+static int dr_ste_build_flex_parser_tnl_tag(struct mlx5dr_match_param *value,
+					    struct mlx5dr_ste_build *sb,
+					    u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+	u8 *tag = hw_ste->tag;
+
+	if (misc3->outer_vxlan_gpe_flags ||
+	    misc3->outer_vxlan_gpe_next_protocol) {
+		MLX5_SET(ste_flex_parser_tnl, tag,
+			 flex_parser_tunneling_header_63_32,
+			 (misc3->outer_vxlan_gpe_flags << 24) |
+			 (misc3->outer_vxlan_gpe_next_protocol));
+		misc3->outer_vxlan_gpe_flags = 0;
+		misc3->outer_vxlan_gpe_next_protocol = 0;
+	}
+
+	if (misc3->outer_vxlan_gpe_vni) {
+		MLX5_SET(ste_flex_parser_tnl, tag,
+			 flex_parser_tunneling_header_31_0,
+			 misc3->outer_vxlan_gpe_vni << 8);
+		misc3->outer_vxlan_gpe_vni = 0;
+	}
+
+	return 0;
+}
+
+void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx)
+{
+	dr_ste_build_flex_parser_tnl_bit_mask(mask, inner, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_tag;
+}
+
+static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value,
+					     u8 *bit_mask)
+{
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+	DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h,
+			  misc_2_mask, metadata_reg_c_0);
+	DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l,
+			  misc_2_mask, metadata_reg_c_1);
+	DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h,
+			  misc_2_mask, metadata_reg_c_2);
+	DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l,
+			  misc_2_mask, metadata_reg_c_3);
+}
+
+static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+	DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+	DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+	DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx)
+{
+	dr_ste_build_register_0_bit_mask(mask, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_register_0_tag;
+}
+
+static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value,
+					     u8 *bit_mask)
+{
+	struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+	DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h,
+			  misc_2_mask, metadata_reg_c_4);
+	DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l,
+			  misc_2_mask, metadata_reg_c_5);
+	DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h,
+			  misc_2_mask, metadata_reg_c_6);
+	DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l,
+			  misc_2_mask, metadata_reg_c_7);
+}
+
+static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value,
+				       struct mlx5dr_ste_build *sb,
+				       u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+	DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+	DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+	DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+	return 0;
+}
+
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx)
+{
+	dr_ste_build_register_1_bit_mask(mask, sb->bit_mask);
+
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_register_1_tag;
+}
+
+static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+					      u8 *bit_mask)
+{
+	struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+	/* Partial misc source_port is not supported */
+	if (misc_mask->source_port && misc_mask->source_port != 0xffff)
+		return -EINVAL;
+
+	/* Partial misc source_eswitch_owner_vhca_id is not supported */
+	if (misc_mask->source_eswitch_owner_vhca_id &&
+	    misc_mask->source_eswitch_owner_vhca_id != 0xffff)
+		return -EINVAL;
+
+	DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
+	DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+	misc_mask->source_eswitch_owner_vhca_id = 0;
+
+	return 0;
+}
+
+static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+					 struct mlx5dr_ste_build *sb,
+					 u8 *hw_ste_p)
+{
+	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+	struct mlx5dr_match_misc *misc = &value->misc;
+	struct mlx5dr_cmd_vport_cap *vport_cap;
+	struct mlx5dr_domain *dmn = sb->dmn;
+	struct mlx5dr_cmd_caps *caps;
+	u8 *tag = hw_ste->tag;
+
+	DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
+
+	if (sb->vhca_id_valid) {
+		/* Find port GVMI based on the eswitch_owner_vhca_id */
+		if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+			caps = &dmn->info.caps;
+		else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
+					   dmn->peer_dmn->info.caps.gvmi))
+			caps = &dmn->peer_dmn->info.caps;
+		else
+			return -EINVAL;
+	} else {
+		caps = &dmn->info.caps;
+	}
+
+	vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port);
+	if (!vport_cap)
+		return -EINVAL;
+
+	if (vport_cap->vport_gvmi)
+		MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
+
+	misc->source_eswitch_owner_vhca_id = 0;
+	misc->source_port = 0;
+
+	return 0;
+}
+
+int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+				  struct mlx5dr_match_param *mask,
+				  struct mlx5dr_domain *dmn,
+				  bool inner, bool rx)
+{
+	int ret;
+
+	/* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
+	sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
+
+	ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+	if (ret)
+		return ret;
+
+	sb->rx = rx;
+	sb->dmn = dmn;
+	sb->inner = inner;
+	sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
+	sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
new file mode 100644
index 0000000..e178d8d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c

@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+				 struct mlx5dr_action *action)
+{
+	struct mlx5dr_matcher *last_matcher = NULL;
+	struct mlx5dr_htbl_connect_info info;
+	struct mlx5dr_ste_htbl *last_htbl;
+	int ret;
+
+	if (action && action->action_type != DR_ACTION_TYP_FT)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&tbl->dmn->mutex);
+
+	if (!list_empty(&tbl->matcher_list))
+		last_matcher = list_last_entry(&tbl->matcher_list,
+					       struct mlx5dr_matcher,
+					       matcher_list);
+
+	if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
+	    tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+		if (last_matcher)
+			last_htbl = last_matcher->rx.e_anchor;
+		else
+			last_htbl = tbl->rx.s_anchor;
+
+		tbl->rx.default_icm_addr = action ?
+			action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
+			tbl->rx.nic_dmn->default_icm_addr;
+
+		info.type = CONNECT_MISS;
+		info.miss_icm_addr = tbl->rx.default_icm_addr;
+
+		ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
+							tbl->rx.nic_dmn,
+							last_htbl,
+							&info, true);
+		if (ret) {
+			mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret);
+			goto out;
+		}
+	}
+
+	if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX ||
+	    tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+		if (last_matcher)
+			last_htbl = last_matcher->tx.e_anchor;
+		else
+			last_htbl = tbl->tx.s_anchor;
+
+		tbl->tx.default_icm_addr = action ?
+			action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr :
+			tbl->tx.nic_dmn->default_icm_addr;
+
+		info.type = CONNECT_MISS;
+		info.miss_icm_addr = tbl->tx.default_icm_addr;
+
+		ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
+							tbl->tx.nic_dmn,
+							last_htbl, &info, true);
+		if (ret) {
+			mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret);
+			goto out;
+		}
+	}
+
+	/* Release old action */
+	if (tbl->miss_action)
+		refcount_dec(&tbl->miss_action->refcount);
+
+	/* Set new miss action */
+	tbl->miss_action = action;
+	if (tbl->miss_action)
+		refcount_inc(&action->refcount);
+
+out:
+	mutex_unlock(&tbl->dmn->mutex);
+	return ret;
+}
+
+static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl)
+{
+	mlx5dr_htbl_put(nic_tbl->s_anchor);
+}
+
+static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
+{
+	dr_table_uninit_nic(&tbl->rx);
+	dr_table_uninit_nic(&tbl->tx);
+}
+
+static void dr_table_uninit(struct mlx5dr_table *tbl)
+{
+	mutex_lock(&tbl->dmn->mutex);
+
+	switch (tbl->dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		dr_table_uninit_nic(&tbl->rx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		dr_table_uninit_nic(&tbl->tx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		dr_table_uninit_fdb(tbl);
+		break;
+	default:
+		WARN_ON(true);
+		break;
+	}
+
+	mutex_unlock(&tbl->dmn->mutex);
+}
+
+static int dr_table_init_nic(struct mlx5dr_domain *dmn,
+			     struct mlx5dr_table_rx_tx *nic_tbl)
+{
+	struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+	struct mlx5dr_htbl_connect_info info;
+	int ret;
+
+	nic_tbl->default_icm_addr = nic_dmn->default_icm_addr;
+
+	nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+						  DR_CHUNK_SIZE_1,
+						  MLX5DR_STE_LU_TYPE_DONT_CARE,
+						  0);
+	if (!nic_tbl->s_anchor)
+		return -ENOMEM;
+
+	info.type = CONNECT_MISS;
+	info.miss_icm_addr = nic_dmn->default_icm_addr;
+	ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+						nic_tbl->s_anchor,
+						&info, true);
+	if (ret)
+		goto free_s_anchor;
+
+	mlx5dr_htbl_get(nic_tbl->s_anchor);
+
+	return 0;
+
+free_s_anchor:
+	mlx5dr_ste_htbl_free(nic_tbl->s_anchor);
+	return ret;
+}
+
+static int dr_table_init_fdb(struct mlx5dr_table *tbl)
+{
+	int ret;
+
+	ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+	if (ret)
+		return ret;
+
+	ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+	if (ret)
+		goto destroy_rx;
+
+	return 0;
+
+destroy_rx:
+	dr_table_uninit_nic(&tbl->rx);
+	return ret;
+}
+
+static int dr_table_init(struct mlx5dr_table *tbl)
+{
+	int ret = 0;
+
+	INIT_LIST_HEAD(&tbl->matcher_list);
+
+	mutex_lock(&tbl->dmn->mutex);
+
+	switch (tbl->dmn->type) {
+	case MLX5DR_DOMAIN_TYPE_NIC_RX:
+		tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX;
+		tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+		ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_NIC_TX:
+		tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX;
+		tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+		ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+		break;
+	case MLX5DR_DOMAIN_TYPE_FDB:
+		tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+		tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+		tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+		ret = dr_table_init_fdb(tbl);
+		break;
+	default:
+		WARN_ON(true);
+		break;
+	}
+
+	mutex_unlock(&tbl->dmn->mutex);
+
+	return ret;
+}
+
+static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
+{
+	return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev,
+					     tbl->table_id,
+					     tbl->table_type);
+}
+
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
+{
+	u64 icm_addr_rx = 0;
+	u64 icm_addr_tx = 0;
+	int ret;
+
+	if (tbl->rx.s_anchor)
+		icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr;
+
+	if (tbl->tx.s_anchor)
+		icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
+
+	ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev,
+					   tbl->table_type,
+					   icm_addr_rx,
+					   icm_addr_tx,
+					   tbl->dmn->info.caps.max_ft_level - 1,
+					   true, false, NULL,
+					   &tbl->table_id);
+
+	return ret;
+}
+
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
+{
+	struct mlx5dr_table *tbl;
+	int ret;
+
+	refcount_inc(&dmn->refcount);
+
+	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+	if (!tbl)
+		goto dec_ref;
+
+	tbl->dmn = dmn;
+	tbl->level = level;
+	refcount_set(&tbl->refcount, 1);
+
+	ret = dr_table_init(tbl);
+	if (ret)
+		goto free_tbl;
+
+	ret = dr_table_create_sw_owned_tbl(tbl);
+	if (ret)
+		goto uninit_tbl;
+
+	return tbl;
+
+uninit_tbl:
+	dr_table_uninit(tbl);
+free_tbl:
+	kfree(tbl);
+dec_ref:
+	refcount_dec(&dmn->refcount);
+	return NULL;
+}
+
+int mlx5dr_table_destroy(struct mlx5dr_table *tbl)
+{
+	int ret;
+
+	if (refcount_read(&tbl->refcount) > 1)
+		return -EBUSY;
+
+	ret = dr_table_destroy_sw_owned_tbl(tbl);
+	if (ret)
+		return ret;
+
+	dr_table_uninit(tbl);
+
+	if (tbl->miss_action)
+		refcount_dec(&tbl->miss_action->refcount);
+
+	refcount_dec(&tbl->dmn->refcount);
+	kfree(tbl);
+
+	return ret;
+}
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl)
+{
+	return tbl->table_id;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
new file mode 100644
index 0000000..1cb3769
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h

@@ -0,0 +1,1061 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef	_DR_TYPES_
+#define	_DR_TYPES_
+
+#include <linux/mlx5/driver.h>
+#include <linux/refcount.h>
+#include "fs_core.h"
+#include "wq.h"
+#include "lib/mlx5.h"
+#include "mlx5_ifc_dr.h"
+#include "mlx5dr.h"
+
+#define DR_RULE_MAX_STES 17
+#define DR_ACTION_MAX_STES 5
+#define WIRE_PORT 0xFFFF
+#define DR_STE_SVLAN 0x1
+#define DR_STE_CVLAN 0x2
+
+#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
+#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
+#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+
+enum mlx5dr_icm_chunk_size {
+	DR_CHUNK_SIZE_1,
+	DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */
+	DR_CHUNK_SIZE_2,
+	DR_CHUNK_SIZE_4,
+	DR_CHUNK_SIZE_8,
+	DR_CHUNK_SIZE_16,
+	DR_CHUNK_SIZE_32,
+	DR_CHUNK_SIZE_64,
+	DR_CHUNK_SIZE_128,
+	DR_CHUNK_SIZE_256,
+	DR_CHUNK_SIZE_512,
+	DR_CHUNK_SIZE_1K,
+	DR_CHUNK_SIZE_2K,
+	DR_CHUNK_SIZE_4K,
+	DR_CHUNK_SIZE_8K,
+	DR_CHUNK_SIZE_16K,
+	DR_CHUNK_SIZE_32K,
+	DR_CHUNK_SIZE_64K,
+	DR_CHUNK_SIZE_128K,
+	DR_CHUNK_SIZE_256K,
+	DR_CHUNK_SIZE_512K,
+	DR_CHUNK_SIZE_1024K,
+	DR_CHUNK_SIZE_2048K,
+	DR_CHUNK_SIZE_MAX,
+};
+
+enum mlx5dr_icm_type {
+	DR_ICM_TYPE_STE,
+	DR_ICM_TYPE_MODIFY_ACTION,
+};
+
+static inline enum mlx5dr_icm_chunk_size
+mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk)
+{
+	chunk += 2;
+	if (chunk < DR_CHUNK_SIZE_MAX)
+		return chunk;
+
+	return DR_CHUNK_SIZE_MAX;
+}
+
+enum {
+	DR_STE_SIZE = 64,
+	DR_STE_SIZE_CTRL = 32,
+	DR_STE_SIZE_TAG = 16,
+	DR_STE_SIZE_MASK = 16,
+};
+
+enum {
+	DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
+};
+
+enum {
+	DR_MODIFY_ACTION_SIZE = 8,
+};
+
+enum mlx5dr_matcher_criteria {
+	DR_MATCHER_CRITERIA_EMPTY = 0,
+	DR_MATCHER_CRITERIA_OUTER = 1 << 0,
+	DR_MATCHER_CRITERIA_MISC = 1 << 1,
+	DR_MATCHER_CRITERIA_INNER = 1 << 2,
+	DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
+	DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
+	DR_MATCHER_CRITERIA_MAX = 1 << 5,
+};
+
+enum mlx5dr_action_type {
+	DR_ACTION_TYP_TNL_L2_TO_L2,
+	DR_ACTION_TYP_L2_TO_TNL_L2,
+	DR_ACTION_TYP_TNL_L3_TO_L2,
+	DR_ACTION_TYP_L2_TO_TNL_L3,
+	DR_ACTION_TYP_DROP,
+	DR_ACTION_TYP_QP,
+	DR_ACTION_TYP_FT,
+	DR_ACTION_TYP_CTR,
+	DR_ACTION_TYP_TAG,
+	DR_ACTION_TYP_MODIFY_HDR,
+	DR_ACTION_TYP_VPORT,
+	DR_ACTION_TYP_POP_VLAN,
+	DR_ACTION_TYP_PUSH_VLAN,
+	DR_ACTION_TYP_MAX,
+};
+
+struct mlx5dr_icm_pool;
+struct mlx5dr_icm_chunk;
+struct mlx5dr_icm_bucket;
+struct mlx5dr_ste_htbl;
+struct mlx5dr_match_param;
+struct mlx5dr_cmd_caps;
+struct mlx5dr_matcher_rx_tx;
+
+struct mlx5dr_ste {
+	u8 *hw_ste;
+	/* refcount: indicates the num of rules that using this ste */
+	refcount_t refcount;
+
+	/* attached to the miss_list head at each htbl entry */
+	struct list_head miss_list_node;
+
+	/* each rule member that uses this ste attached here */
+	struct list_head rule_list;
+
+	/* this ste is member of htbl */
+	struct mlx5dr_ste_htbl *htbl;
+
+	struct mlx5dr_ste_htbl *next_htbl;
+
+	/* this ste is part of a rule, located in ste's chain */
+	u8 ste_chain_location;
+};
+
+struct mlx5dr_ste_htbl_ctrl {
+	/* total number of valid entries belonging to this hash table. This
+	 * includes the non collision and collision entries
+	 */
+	unsigned int num_of_valid_entries;
+
+	/* total number of collisions entries attached to this table */
+	unsigned int num_of_collisions;
+	unsigned int increase_threshold;
+	u8 may_grow:1;
+};
+
+struct mlx5dr_ste_htbl {
+	u8 lu_type;
+	u16 byte_mask;
+	refcount_t refcount;
+	struct mlx5dr_icm_chunk *chunk;
+	struct mlx5dr_ste *ste_arr;
+	u8 *hw_ste_arr;
+
+	struct list_head *miss_list;
+
+	enum mlx5dr_icm_chunk_size chunk_size;
+	struct mlx5dr_ste *pointing_ste;
+
+	struct mlx5dr_ste_htbl_ctrl ctrl;
+};
+
+struct mlx5dr_ste_send_info {
+	struct mlx5dr_ste *ste;
+	struct list_head send_list;
+	u16 size;
+	u16 offset;
+	u8 data_cont[DR_STE_SIZE];
+	u8 *data;
+};
+
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+					       u16 offset, u8 *data,
+					       struct mlx5dr_ste_send_info *ste_info,
+					       struct list_head *send_list,
+					       bool copy_data);
+
+struct mlx5dr_ste_build {
+	u8 inner:1;
+	u8 rx:1;
+	u8 vhca_id_valid:1;
+	struct mlx5dr_domain *dmn;
+	struct mlx5dr_cmd_caps *caps;
+	u8 lu_type;
+	u16 byte_mask;
+	u8 bit_mask[DR_STE_SIZE_MASK];
+	int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
+				  struct mlx5dr_ste_build *sb,
+				  u8 *hw_ste_p);
+};
+
+struct mlx5dr_ste_htbl *
+mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+		      enum mlx5dr_icm_chunk_size chunk_size,
+		      u8 lu_type, u16 byte_mask);
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl);
+
+static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl)
+{
+	if (refcount_dec_and_test(&htbl->refcount))
+		mlx5dr_ste_htbl_free(htbl);
+}
+
+static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
+{
+	refcount_inc(&htbl->refcount);
+}
+
+/* STE utils */
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl);
+void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi);
+void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
+				struct mlx5dr_ste_htbl *next_htbl);
+void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr);
+u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste);
+void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi);
+void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size);
+void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr);
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
+bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste);
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+				u8 ste_location);
+void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag);
+void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id);
+void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id,
+			     int size, bool encap_l3);
+void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p);
+void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan);
+void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p);
+void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid,
+				 bool go_back);
+void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type);
+u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p);
+void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+				    u32 re_write_index);
+void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p);
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste);
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
+
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+		     struct mlx5dr_matcher *matcher,
+		     struct mlx5dr_matcher_rx_tx *nic_matcher);
+static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste,
+				  struct mlx5dr_matcher *matcher,
+				  struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+	if (refcount_dec_and_test(&ste->refcount))
+		mlx5dr_ste_free(ste, matcher, nic_matcher);
+}
+
+/* initial as 0, increased only when ste appears in a new rule */
+static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
+{
+	refcount_inc(&ste->refcount);
+}
+
+void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
+					  struct mlx5dr_ste_htbl *next_htbl);
+bool mlx5dr_ste_equal_tag(void *src, void *dst);
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+				struct mlx5dr_matcher_rx_tx *nic_matcher,
+				struct mlx5dr_ste *ste,
+				u8 *cur_hw_ste,
+				enum mlx5dr_icm_chunk_size log_table_size);
+
+/* STE build functions */
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+			       u8 match_criteria,
+			       struct mlx5dr_match_param *mask,
+			       struct mlx5dr_match_param *value);
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+			     struct mlx5dr_matcher_rx_tx *nic_matcher,
+			     struct mlx5dr_match_param *value,
+			     u8 *ste_arr);
+int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+					  struct mlx5dr_match_param *mask,
+					  bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+				       struct mlx5dr_match_param *mask,
+				       bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+				  struct mlx5dr_match_param *mask,
+				  bool inner, bool rx);
+void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
+			  struct mlx5dr_match_param *mask,
+			  bool inner, bool rx);
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+			   struct mlx5dr_match_param *mask,
+			   bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx);
+int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
+				   struct mlx5dr_match_param *mask,
+				   struct mlx5dr_cmd_caps *caps,
+				   bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx);
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask,
+				      bool inner, bool rx);
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
+				 struct mlx5dr_match_param *mask,
+				 bool inner, bool rx);
+int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+				  struct mlx5dr_match_param *mask,
+				  struct mlx5dr_domain *dmn,
+				  bool inner, bool rx);
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
+
+/* Actions utils */
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+				 struct mlx5dr_matcher_rx_tx *nic_matcher,
+				 struct mlx5dr_action *actions[],
+				 u32 num_actions,
+				 u8 *ste_arr,
+				 u32 *new_hw_ste_arr_sz);
+
+struct mlx5dr_match_spec {
+	u32 smac_47_16;		/* Source MAC address of incoming packet */
+	/* Incoming packet Ethertype - this is the Ethertype
+	 * following the last VLAN tag of the packet
+	 */
+	u32 ethertype:16;
+	u32 smac_15_0:16;	/* Source MAC address of incoming packet */
+	u32 dmac_47_16;		/* Destination MAC address of incoming packet */
+	/* VLAN ID of first VLAN tag in the incoming packet.
+	 * Valid only when cvlan_tag==1 or svlan_tag==1
+	 */
+	u32 first_vid:12;
+	/* CFI bit of first VLAN tag in the incoming packet.
+	 * Valid only when cvlan_tag==1 or svlan_tag==1
+	 */
+	u32 first_cfi:1;
+	/* Priority of first VLAN tag in the incoming packet.
+	 * Valid only when cvlan_tag==1 or svlan_tag==1
+	 */
+	u32 first_prio:3;
+	u32 dmac_15_0:16;	/* Destination MAC address of incoming packet */
+	/* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
+	 *             Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
+	 */
+	u32 tcp_flags:9;
+	u32 ip_version:4;	/* IP version */
+	u32 frag:1;		/* Packet is an IP fragment */
+	/* The first vlan in the packet is s-vlan (0x8a88).
+	 * cvlan_tag and svlan_tag cannot be set together
+	 */
+	u32 svlan_tag:1;
+	/* The first vlan in the packet is c-vlan (0x8100).
+	 * cvlan_tag and svlan_tag cannot be set together
+	 */
+	u32 cvlan_tag:1;
+	/* Explicit Congestion Notification derived from
+	 * Traffic Class/TOS field of IPv6/v4
+	 */
+	u32 ip_ecn:2;
+	/* Differentiated Services Code Point derived from
+	 * Traffic Class/TOS field of IPv6/v4
+	 */
+	u32 ip_dscp:6;
+	u32 ip_protocol:8;	/* IP protocol */
+	/* TCP destination port.
+	 * tcp and udp sport/dport are mutually exclusive
+	 */
+	u32 tcp_dport:16;
+	/* TCP source port.;tcp and udp sport/dport are mutually exclusive */
+	u32 tcp_sport:16;
+	u32 ttl_hoplimit:8;
+	u32 reserved:24;
+	/* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
+	u32 udp_dport:16;
+	/* UDP source port.;tcp and udp sport/dport are mutually exclusive */
+	u32 udp_sport:16;
+	/* IPv6 source address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 src_ip_127_96;
+	/* IPv6 source address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 src_ip_95_64;
+	/* IPv6 source address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 src_ip_63_32;
+	/* IPv6 source address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 src_ip_31_0;
+	/* IPv6 destination address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 dst_ip_127_96;
+	/* IPv6 destination address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 dst_ip_95_64;
+	/* IPv6 destination address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 dst_ip_63_32;
+	/* IPv6 destination address of incoming packets
+	 * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+	 * This field should be qualified by an appropriate ethertype
+	 */
+	u32 dst_ip_31_0;
+};
+
+struct mlx5dr_match_misc {
+	u32 source_sqn:24;		/* Source SQN */
+	u32 source_vhca_port:4;
+	/* used with GRE, sequence number exist when gre_s_present == 1 */
+	u32 gre_s_present:1;
+	/* used with GRE, key exist when gre_k_present == 1 */
+	u32 gre_k_present:1;
+	u32 reserved_auto1:1;
+	/* used with GRE, checksum exist when gre_c_present == 1 */
+	u32 gre_c_present:1;
+	/* Source port.;0xffff determines wire port */
+	u32 source_port:16;
+	u32 source_eswitch_owner_vhca_id:16;
+	/* VLAN ID of first VLAN tag the inner header of the incoming packet.
+	 * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+	 */
+	u32 inner_second_vid:12;
+	/* CFI bit of first VLAN tag in the inner header of the incoming packet.
+	 * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+	 */
+	u32 inner_second_cfi:1;
+	/* Priority of second VLAN tag in the inner header of the incoming packet.
+	 * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+	 */
+	u32 inner_second_prio:3;
+	/* VLAN ID of first VLAN tag the outer header of the incoming packet.
+	 * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+	 */
+	u32 outer_second_vid:12;
+	/* CFI bit of first VLAN tag in the outer header of the incoming packet.
+	 * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+	 */
+	u32 outer_second_cfi:1;
+	/* Priority of second VLAN tag in the outer header of the incoming packet.
+	 * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+	 */
+	u32 outer_second_prio:3;
+	u32 gre_protocol:16;		/* GRE Protocol (outer) */
+	u32 reserved_auto3:12;
+	/* The second vlan in the inner header of the packet is s-vlan (0x8a88).
+	 * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+	 */
+	u32 inner_second_svlan_tag:1;
+	/* The second vlan in the outer header of the packet is s-vlan (0x8a88).
+	 * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+	 */
+	u32 outer_second_svlan_tag:1;
+	/* The second vlan in the inner header of the packet is c-vlan (0x8100).
+	 * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+	 */
+	u32 inner_second_cvlan_tag:1;
+	/* The second vlan in the outer header of the packet is c-vlan (0x8100).
+	 * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+	 */
+	u32 outer_second_cvlan_tag:1;
+	u32 gre_key_l:8;		/* GRE Key [7:0] (outer) */
+	u32 gre_key_h:24;		/* GRE Key[31:8] (outer) */
+	u32 reserved_auto4:8;
+	u32 vxlan_vni:24;		/* VXLAN VNI (outer) */
+	u32 geneve_oam:1;		/* GENEVE OAM field (outer) */
+	u32 reserved_auto5:7;
+	u32 geneve_vni:24;		/* GENEVE VNI field (outer) */
+	u32 outer_ipv6_flow_label:20;	/* Flow label of incoming IPv6 packet (outer) */
+	u32 reserved_auto6:12;
+	u32 inner_ipv6_flow_label:20;	/* Flow label of incoming IPv6 packet (inner) */
+	u32 reserved_auto7:12;
+	u32 geneve_protocol_type:16;	/* GENEVE protocol type (outer) */
+	u32 geneve_opt_len:6;		/* GENEVE OptLen (outer) */
+	u32 reserved_auto8:10;
+	u32 bth_dst_qp:24;		/* Destination QP in BTH header */
+	u32 reserved_auto9:8;
+	u8 reserved_auto10[20];
+};
+
+struct mlx5dr_match_misc2 {
+	u32 outer_first_mpls_ttl:8;		/* First MPLS TTL (outer) */
+	u32 outer_first_mpls_s_bos:1;		/* First MPLS S_BOS (outer) */
+	u32 outer_first_mpls_exp:3;		/* First MPLS EXP (outer) */
+	u32 outer_first_mpls_label:20;		/* First MPLS LABEL (outer) */
+	u32 inner_first_mpls_ttl:8;		/* First MPLS TTL (inner) */
+	u32 inner_first_mpls_s_bos:1;		/* First MPLS S_BOS (inner) */
+	u32 inner_first_mpls_exp:3;		/* First MPLS EXP (inner) */
+	u32 inner_first_mpls_label:20;		/* First MPLS LABEL (inner) */
+	u32 outer_first_mpls_over_gre_ttl:8;	/* last MPLS TTL (outer) */
+	u32 outer_first_mpls_over_gre_s_bos:1;	/* last MPLS S_BOS (outer) */
+	u32 outer_first_mpls_over_gre_exp:3;	/* last MPLS EXP (outer) */
+	u32 outer_first_mpls_over_gre_label:20;	/* last MPLS LABEL (outer) */
+	u32 outer_first_mpls_over_udp_ttl:8;	/* last MPLS TTL (outer) */
+	u32 outer_first_mpls_over_udp_s_bos:1;	/* last MPLS S_BOS (outer) */
+	u32 outer_first_mpls_over_udp_exp:3;	/* last MPLS EXP (outer) */
+	u32 outer_first_mpls_over_udp_label:20;	/* last MPLS LABEL (outer) */
+	u32 metadata_reg_c_7;			/* metadata_reg_c_7 */
+	u32 metadata_reg_c_6;			/* metadata_reg_c_6 */
+	u32 metadata_reg_c_5;			/* metadata_reg_c_5 */
+	u32 metadata_reg_c_4;			/* metadata_reg_c_4 */
+	u32 metadata_reg_c_3;			/* metadata_reg_c_3 */
+	u32 metadata_reg_c_2;			/* metadata_reg_c_2 */
+	u32 metadata_reg_c_1;			/* metadata_reg_c_1 */
+	u32 metadata_reg_c_0;			/* metadata_reg_c_0 */
+	u32 metadata_reg_a;			/* metadata_reg_a */
+	u32 metadata_reg_b;			/* metadata_reg_b */
+	u8 reserved_auto2[8];
+};
+
+struct mlx5dr_match_misc3 {
+	u32 inner_tcp_seq_num;
+	u32 outer_tcp_seq_num;
+	u32 inner_tcp_ack_num;
+	u32 outer_tcp_ack_num;
+	u32 outer_vxlan_gpe_vni:24;
+	u32 reserved_auto1:8;
+	u32 reserved_auto2:16;
+	u32 outer_vxlan_gpe_flags:8;
+	u32 outer_vxlan_gpe_next_protocol:8;
+	u32 icmpv4_header_data;
+	u32 icmpv6_header_data;
+	u32 icmpv6_code:8;
+	u32 icmpv6_type:8;
+	u32 icmpv4_code:8;
+	u32 icmpv4_type:8;
+	u8 reserved_auto3[0x1c];
+};
+
+struct mlx5dr_match_param {
+	struct mlx5dr_match_spec outer;
+	struct mlx5dr_match_misc misc;
+	struct mlx5dr_match_spec inner;
+	struct mlx5dr_match_misc2 misc2;
+	struct mlx5dr_match_misc3 misc3;
+};
+
+#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
+						   (_misc3)->icmpv4_code || \
+						   (_misc3)->icmpv4_header_data)
+
+struct mlx5dr_esw_caps {
+	u64 drop_icm_address_rx;
+	u64 drop_icm_address_tx;
+	u64 uplink_icm_address_rx;
+	u64 uplink_icm_address_tx;
+	bool sw_owner;
+};
+
+struct mlx5dr_cmd_vport_cap {
+	u16 vport_gvmi;
+	u16 vhca_gvmi;
+	u64 icm_address_rx;
+	u64 icm_address_tx;
+	u32 num;
+};
+
+struct mlx5dr_cmd_caps {
+	u16 gvmi;
+	u64 nic_rx_drop_address;
+	u64 nic_tx_drop_address;
+	u64 nic_tx_allow_address;
+	u64 esw_rx_drop_address;
+	u64 esw_tx_drop_address;
+	u32 log_icm_size;
+	u64 hdr_modify_icm_addr;
+	u32 flex_protocols;
+	u8 flex_parser_id_icmp_dw0;
+	u8 flex_parser_id_icmp_dw1;
+	u8 flex_parser_id_icmpv6_dw0;
+	u8 flex_parser_id_icmpv6_dw1;
+	u8 max_ft_level;
+	u16 roce_min_src_udp;
+	u8 num_esw_ports;
+	bool eswitch_manager;
+	bool rx_sw_owner;
+	bool tx_sw_owner;
+	bool fdb_sw_owner;
+	u32 num_vports;
+	struct mlx5dr_esw_caps esw_caps;
+	struct mlx5dr_cmd_vport_cap *vports_caps;
+	bool prio_tag_required;
+};
+
+struct mlx5dr_domain_rx_tx {
+	u64 drop_icm_addr;
+	u64 default_icm_addr;
+	enum mlx5dr_ste_entry_type ste_type;
+};
+
+struct mlx5dr_domain_info {
+	bool supp_sw_steering;
+	u32 max_inline_size;
+	u32 max_send_wr;
+	u32 max_log_sw_icm_sz;
+	u32 max_log_action_icm_sz;
+	struct mlx5dr_domain_rx_tx rx;
+	struct mlx5dr_domain_rx_tx tx;
+	struct mlx5dr_cmd_caps caps;
+};
+
+struct mlx5dr_domain_cache {
+	struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft;
+};
+
+struct mlx5dr_domain {
+	struct mlx5dr_domain *peer_dmn;
+	struct mlx5_core_dev *mdev;
+	u32 pdn;
+	struct mlx5_uars_page *uar;
+	enum mlx5dr_domain_type type;
+	refcount_t refcount;
+	struct mutex mutex; /* protect domain */
+	struct mlx5dr_icm_pool *ste_icm_pool;
+	struct mlx5dr_icm_pool *action_icm_pool;
+	struct mlx5dr_send_ring *send_ring;
+	struct mlx5dr_domain_info info;
+	struct mlx5dr_domain_cache cache;
+};
+
+struct mlx5dr_table_rx_tx {
+	struct mlx5dr_ste_htbl *s_anchor;
+	struct mlx5dr_domain_rx_tx *nic_dmn;
+	u64 default_icm_addr;
+};
+
+struct mlx5dr_table {
+	struct mlx5dr_domain *dmn;
+	struct mlx5dr_table_rx_tx rx;
+	struct mlx5dr_table_rx_tx tx;
+	u32 level;
+	u32 table_type;
+	u32 table_id;
+	struct list_head matcher_list;
+	struct mlx5dr_action *miss_action;
+	refcount_t refcount;
+};
+
+struct mlx5dr_matcher_rx_tx {
+	struct mlx5dr_ste_htbl *s_htbl;
+	struct mlx5dr_ste_htbl *e_anchor;
+	struct mlx5dr_ste_build *ste_builder;
+	struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES];
+	struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES];
+	u8 num_of_builders;
+	u8 num_of_builders4;
+	u8 num_of_builders6;
+	u64 default_icm_addr;
+	struct mlx5dr_table_rx_tx *nic_tbl;
+};
+
+struct mlx5dr_matcher {
+	struct mlx5dr_table *tbl;
+	struct mlx5dr_matcher_rx_tx rx;
+	struct mlx5dr_matcher_rx_tx tx;
+	struct list_head matcher_list;
+	u16 prio;
+	struct mlx5dr_match_param mask;
+	u8 match_criteria;
+	refcount_t refcount;
+	struct mlx5dv_flow_matcher *dv_matcher;
+};
+
+struct mlx5dr_rule_member {
+	struct mlx5dr_ste *ste;
+	/* attached to mlx5dr_rule via this */
+	struct list_head list;
+	/* attached to mlx5dr_ste via this */
+	struct list_head use_ste_list;
+};
+
+struct mlx5dr_action {
+	enum mlx5dr_action_type action_type;
+	refcount_t refcount;
+	union {
+		struct {
+			struct mlx5dr_domain *dmn;
+			struct mlx5dr_icm_chunk *chunk;
+			u8 *data;
+			u32 data_size;
+			u16 num_of_actions;
+			u32 index;
+			u8 allow_rx:1;
+			u8 allow_tx:1;
+			u8 modify_ttl:1;
+		} rewrite;
+		struct {
+			struct mlx5dr_domain *dmn;
+			u32 reformat_id;
+			u32 reformat_size;
+		} reformat;
+		struct {
+			u8 is_fw_tbl:1;
+			union {
+				struct mlx5dr_table *tbl;
+				struct {
+					struct mlx5_flow_table *ft;
+					u64 rx_icm_addr;
+					u64 tx_icm_addr;
+					struct mlx5_core_dev *mdev;
+				} fw_tbl;
+			};
+		} dest_tbl;
+		struct {
+			u32 ctr_id;
+			u32 offeset;
+		} ctr;
+		struct {
+			struct mlx5dr_domain *dmn;
+			struct mlx5dr_cmd_vport_cap *caps;
+		} vport;
+		struct {
+			u32 vlan_hdr; /* tpid_pcp_dei_vid */
+		} push_vlan;
+		u32 flow_tag;
+	};
+};
+
+enum mlx5dr_connect_type {
+	CONNECT_HIT	= 1,
+	CONNECT_MISS	= 2,
+};
+
+struct mlx5dr_htbl_connect_info {
+	enum mlx5dr_connect_type type;
+	union {
+		struct mlx5dr_ste_htbl *hit_next_htbl;
+		u64 miss_icm_addr;
+	};
+};
+
+struct mlx5dr_rule_rx_tx {
+	struct list_head rule_members_list;
+	struct mlx5dr_matcher_rx_tx *nic_matcher;
+};
+
+struct mlx5dr_rule {
+	struct mlx5dr_matcher *matcher;
+	struct mlx5dr_rule_rx_tx rx;
+	struct mlx5dr_rule_rx_tx tx;
+	struct list_head rule_actions_list;
+};
+
+void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
+				    struct mlx5dr_ste *ste);
+
+struct mlx5dr_icm_chunk {
+	struct mlx5dr_icm_bucket *bucket;
+	struct list_head chunk_list;
+	u32 rkey;
+	u32 num_of_entries;
+	u32 byte_size;
+	u64 icm_addr;
+	u64 mr_addr;
+
+	/* Memory optimisation */
+	struct mlx5dr_ste *ste_arr;
+	u8 *hw_ste_arr;
+	struct list_head *miss_list;
+};
+
+static inline int
+mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED;
+}
+
+static inline int
+mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED;
+}
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+				   struct mlx5dr_matcher_rx_tx *nic_matcher,
+				   bool ipv6);
+
+static inline u32
+mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
+{
+	return 1 << chunk_size;
+}
+
+static inline int
+mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
+				   enum mlx5dr_icm_type icm_type)
+{
+	int num_of_entries;
+	int entry_size;
+
+	if (icm_type == DR_ICM_TYPE_STE)
+		entry_size = DR_STE_SIZE;
+	else
+		entry_size = DR_MODIFY_ACTION_SIZE;
+
+	num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
+
+	return entry_size * num_of_entries;
+}
+
+static inline struct mlx5dr_cmd_vport_cap *
+mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport)
+{
+	if (!caps->vports_caps ||
+	    (vport >= caps->num_vports && vport != WIRE_PORT))
+		return NULL;
+
+	if (vport == WIRE_PORT)
+		vport = caps->num_vports;
+
+	return &caps->vports_caps[vport];
+}
+
+struct mlx5dr_cmd_query_flow_table_details {
+	u8 status;
+	u8 level;
+	u64 sw_owner_icm_root_1;
+	u64 sw_owner_icm_root_0;
+};
+
+/* internal API functions */
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+			    struct mlx5dr_cmd_caps *caps);
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+				       bool other_vport, u16 vport_number,
+				       u64 *icm_address_rx,
+				       u64 *icm_address_tx);
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev,
+			  bool other_vport, u16 vport_number, u16 *gvmi);
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+			      struct mlx5dr_esw_caps *caps);
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev);
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+					u32 table_type,
+					u32 table_id,
+					u32 group_id,
+					u32 modify_header_id,
+					u32 vport_id);
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+				    u32 table_type,
+				    u32 table_id);
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+				   u32 table_type,
+				   u8 num_of_actions,
+				   u64 *actions,
+				   u32 *modify_header_id);
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+				     u32 modify_header_id);
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+				       u32 table_type,
+				       u32 table_id,
+				       u32 *group_id);
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+				  u32 table_type,
+				  u32 table_id,
+				  u32 group_id);
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+				 u32 table_type,
+				 u64 icm_addr_rx,
+				 u64 icm_addr_tx,
+				 u8 level,
+				 bool sw_owner,
+				 bool term_tbl,
+				 u64 *fdb_rx_icm_addr,
+				 u32 *table_id);
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+				  u32 table_id,
+				  u32 table_type);
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+				enum fs_flow_table_type type,
+				u32 table_id,
+				struct mlx5dr_cmd_query_flow_table_details *output);
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+				   enum mlx5_reformat_ctx_type rt,
+				   size_t reformat_size,
+				   void *reformat_data,
+				   u32 *reformat_id);
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+				     u32 reformat_id);
+
+struct mlx5dr_cmd_gid_attr {
+	u8 gid[16];
+	u8 mac[6];
+	u32 roce_ver;
+};
+
+struct mlx5dr_cmd_qp_create_attr {
+	u32 page_id;
+	u32 pdn;
+	u32 cqn;
+	u32 pm_state;
+	u32 service_type;
+	u32 buff_umem_id;
+	u32 db_umem_id;
+	u32 sq_wqe_cnt;
+	u32 rq_wqe_cnt;
+	u32 rq_wqe_shift;
+};
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+			 u16 index, struct mlx5dr_cmd_gid_attr *attr);
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+					       enum mlx5dr_icm_type icm_type);
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
+
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+		       enum mlx5dr_icm_chunk_size chunk_size);
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
+bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste);
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+				      struct mlx5dr_domain_rx_tx *nic_dmn,
+				      struct mlx5dr_ste_htbl *htbl,
+				      struct mlx5dr_htbl_connect_info *connect_info,
+				      bool update_hw_ste);
+void mlx5dr_ste_set_formatted_ste(u16 gvmi,
+				  struct mlx5dr_domain_rx_tx *nic_dmn,
+				  struct mlx5dr_ste_htbl *htbl,
+				  u8 *formatted_ste,
+				  struct mlx5dr_htbl_connect_info *connect_info);
+void mlx5dr_ste_copy_param(u8 match_criteria,
+			   struct mlx5dr_match_param *set_param,
+			   struct mlx5dr_match_parameters *mask);
+
+void mlx5dr_crc32_init_table(void);
+u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length);
+
+struct mlx5dr_qp {
+	struct mlx5_core_dev *mdev;
+	struct mlx5_wq_qp wq;
+	struct mlx5_uars_page *uar;
+	struct mlx5_wq_ctrl wq_ctrl;
+	struct mlx5_core_qp mqp;
+	struct {
+		unsigned int pc;
+		unsigned int cc;
+		unsigned int size;
+		unsigned int *wqe_head;
+		unsigned int wqe_cnt;
+	} sq;
+	struct {
+		unsigned int pc;
+		unsigned int cc;
+		unsigned int size;
+		unsigned int wqe_cnt;
+	} rq;
+	int max_inline_data;
+};
+
+struct mlx5dr_cq {
+	struct mlx5_core_dev *mdev;
+	struct mlx5_cqwq wq;
+	struct mlx5_wq_ctrl wq_ctrl;
+	struct mlx5_core_cq mcq;
+	struct mlx5dr_qp *qp;
+};
+
+struct mlx5dr_mr {
+	struct mlx5_core_dev *mdev;
+	struct mlx5_core_mkey mkey;
+	dma_addr_t dma_addr;
+	void *addr;
+	size_t size;
+};
+
+#define MAX_SEND_CQE		64
+#define MIN_READ_SYNC		64
+
+struct mlx5dr_send_ring {
+	struct mlx5dr_cq *cq;
+	struct mlx5dr_qp *qp;
+	struct mlx5dr_mr *mr;
+	/* How much wqes are waiting for completion */
+	u32 pending_wqe;
+	/* Signal request per this trash hold value */
+	u16 signal_th;
+	/* Each post_send_size less than max_post_send_size */
+	u32 max_post_send_size;
+	/* manage the send queue */
+	u32 tx_head;
+	void *buf;
+	u32 buf_size;
+	struct ib_wc wc[MAX_SEND_CQE];
+	u8 sync_buff[MIN_READ_SYNC];
+	struct mlx5dr_mr *sync_mr;
+};
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+			   struct mlx5dr_send_ring *send_ring);
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
+			     struct mlx5dr_ste *ste,
+			     u8 *data,
+			     u16 size,
+			     u16 offset);
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+			      struct mlx5dr_ste_htbl *htbl,
+			      u8 *formatted_ste, u8 *mask);
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+					struct mlx5dr_ste_htbl *htbl,
+					u8 *ste_init_data,
+					bool update_hw_ste);
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+				struct mlx5dr_action *action);
+
+struct mlx5dr_fw_recalc_cs_ft {
+	u64 rx_icm_addr;
+	u32 table_id;
+	u32 group_id;
+	u32 modify_hdr_id;
+};
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num);
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+				    struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft);
+int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+					      u32 vport_num,
+					      u64 *rx_icm_addr);
+#endif  /* _DR_TYPES_H_ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
new file mode 100644
index 0000000..3d587d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c

@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5dr.h"
+#include "fs_dr.h"
+
+static bool mlx5_dr_is_fw_table(u32 flags)
+{
+	if (flags & MLX5_FLOW_TABLE_TERMINATION)
+		return true;
+
+	return false;
+}
+
+static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns,
+				      struct mlx5_flow_table *ft,
+				      u32 underlay_qpn,
+				      bool disconnect)
+{
+	return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn,
+							 disconnect);
+}
+
+static int set_miss_action(struct mlx5_flow_root_namespace *ns,
+			   struct mlx5_flow_table *ft,
+			   struct mlx5_flow_table *next_ft)
+{
+	struct mlx5dr_action *old_miss_action;
+	struct mlx5dr_action *action = NULL;
+	struct mlx5dr_table *next_tbl;
+	int err;
+
+	next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL;
+	if (next_tbl) {
+		action = mlx5dr_action_create_dest_table(next_tbl);
+		if (!action)
+			return -EINVAL;
+	}
+	old_miss_action = ft->fs_dr_table.miss_action;
+	err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
+	if (err && action) {
+		err = mlx5dr_action_destroy(action);
+		if (err) {
+			action = NULL;
+			mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+				      err);
+		}
+	}
+	ft->fs_dr_table.miss_action = action;
+	if (old_miss_action) {
+		err = mlx5dr_action_destroy(old_miss_action);
+		if (err)
+			mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+				      err);
+	}
+
+	return err;
+}
+
+static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
+					 struct mlx5_flow_table *ft,
+					 unsigned int log_size,
+					 struct mlx5_flow_table *next_ft)
+{
+	struct mlx5dr_table *tbl;
+	int err;
+
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
+								    log_size,
+								    next_ft);
+
+	tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
+				  ft->level);
+	if (!tbl) {
+		mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
+		return -EINVAL;
+	}
+
+	ft->fs_dr_table.dr_table = tbl;
+	ft->id = mlx5dr_table_get_id(tbl);
+
+	if (next_ft) {
+		err = set_miss_action(ns, ft, next_ft);
+		if (err) {
+			mlx5dr_table_destroy(tbl);
+			ft->fs_dr_table.dr_table = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+					  struct mlx5_flow_table *ft)
+{
+	struct mlx5dr_action *action = ft->fs_dr_table.miss_action;
+	int err;
+
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
+
+	err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table);
+	if (err) {
+		mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n",
+			      err);
+		return err;
+	}
+	if (action) {
+		err = mlx5dr_action_destroy(action);
+		if (err) {
+			mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n",
+				      err);
+			return err;
+		}
+	}
+
+	return err;
+}
+
+static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+					 struct mlx5_flow_table *ft,
+					 struct mlx5_flow_table *next_ft)
+{
+	return set_miss_action(ns, ft, next_ft);
+}
+
+static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
+					 struct mlx5_flow_table *ft,
+					 u32 *in,
+					 struct mlx5_flow_group *fg)
+{
+	struct mlx5dr_matcher *matcher;
+	u16 priority = MLX5_GET(create_flow_group_in, in,
+				start_flow_index);
+	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+					    in,
+					    match_criteria_enable);
+	struct mlx5dr_match_parameters mask;
+
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in,
+								    fg);
+
+	mask.match_buf = MLX5_ADDR_OF(create_flow_group_in,
+				      in, match_criteria);
+	mask.match_sz = sizeof(fg->mask.match_criteria);
+
+	matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table,
+					priority,
+					match_criteria_enable,
+					&mask);
+	if (!matcher) {
+		mlx5_core_err(ns->dev, "Failed creating matcher\n");
+		return -EINVAL;
+	}
+
+	fg->fs_dr_matcher.dr_matcher = matcher;
+	return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+					  struct mlx5_flow_table *ft,
+					  struct mlx5_flow_group *fg)
+{
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
+
+	return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher);
+}
+
+static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
+						 struct mlx5_flow_rule *dst)
+{
+	struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+	return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num,
+					       dest_attr->vport.flags &
+					       MLX5_FLOW_DEST_VPORT_VHCA_ID,
+					       dest_attr->vport.vhca_id);
+}
+
+static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev,
+					      struct mlx5_flow_rule *dst)
+{
+	struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
+
+	if (mlx5_dr_is_fw_table(dest_ft->flags))
+		return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev);
+	return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
+}
+
+static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain,
+						     struct mlx5_fs_vlan *vlan)
+{
+	u16 n_ethtype = vlan->ethtype;
+	u8  prio = vlan->prio;
+	u16 vid = vlan->vid;
+	u32 vlan_hdr;
+
+	vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 |  (u32)vid;
+	return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr));
+}
+
+#define MLX5_FLOW_CONTEXT_ACTION_MAX  20
+static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
+				  struct mlx5_flow_table *ft,
+				  struct mlx5_flow_group *group,
+				  struct fs_fte *fte)
+{
+	struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain;
+	struct mlx5dr_action *term_action = NULL;
+	struct mlx5dr_match_parameters params;
+	struct mlx5_core_dev *dev = ns->dev;
+	struct mlx5dr_action **fs_dr_actions;
+	struct mlx5dr_action *tmp_action;
+	struct mlx5dr_action **actions;
+	bool delay_encap_set = false;
+	struct mlx5dr_rule *rule;
+	struct mlx5_flow_rule *dst;
+	int fs_dr_num_actions = 0;
+	int num_actions = 0;
+	size_t match_sz;
+	int err = 0;
+	int i;
+
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
+
+	actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
+			  GFP_KERNEL);
+	if (!actions)
+		return -ENOMEM;
+
+	fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+				sizeof(*fs_dr_actions), GFP_KERNEL);
+	if (!fs_dr_actions) {
+		kfree(actions);
+		return -ENOMEM;
+	}
+
+	match_sz = sizeof(fte->val);
+
+	/* The order of the actions are must to be keep, only the following
+	 * order is supported by SW steering:
+	 * TX: push vlan -> modify header -> encap
+	 * RX: decap -> pop vlan -> modify header
+	 */
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+		tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+		tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+		enum mlx5dr_action_reformat_type decap_type =
+			DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
+
+		tmp_action = mlx5dr_action_create_packet_reformat(domain,
+								  decap_type, 0,
+								  NULL);
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+		bool is_decap = fte->action.pkt_reformat->reformat_type ==
+			MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+
+		if (is_decap)
+			actions[num_actions++] =
+				fte->action.pkt_reformat->action.dr_action;
+		else
+			delay_encap_set = true;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+		tmp_action =
+			mlx5dr_action_create_pop_vlan();
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
+		tmp_action =
+			mlx5dr_action_create_pop_vlan();
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+		actions[num_actions++] =
+			fte->action.modify_hdr->action.dr_action;
+
+	if (delay_encap_set)
+		actions[num_actions++] =
+			fte->action.pkt_reformat->action.dr_action;
+
+	/* The order of the actions below is not important */
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+		tmp_action = mlx5dr_action_create_drop();
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		term_action = tmp_action;
+	}
+
+	if (fte->flow_context.flow_tag) {
+		tmp_action =
+			mlx5dr_action_create_tag(fte->flow_context.flow_tag);
+		if (!tmp_action) {
+			err = -ENOMEM;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		list_for_each_entry(dst, &fte->node.children, node.list) {
+			enum mlx5_flow_destination_type type = dst->dest_attr.type;
+			u32 id;
+
+			if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+				err = -ENOSPC;
+				goto free_actions;
+			}
+
+			switch (type) {
+			case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+				id = dst->dest_attr.counter_id;
+
+				tmp_action =
+					mlx5dr_action_create_flow_counter(id);
+				if (!tmp_action) {
+					err = -ENOMEM;
+					goto free_actions;
+				}
+				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+				actions[num_actions++] = tmp_action;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+				tmp_action = create_ft_action(dev, dst);
+				if (!tmp_action) {
+					err = -ENOMEM;
+					goto free_actions;
+				}
+				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+				term_action = tmp_action;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+				tmp_action = create_vport_action(domain, dst);
+				if (!tmp_action) {
+					err = -ENOMEM;
+					goto free_actions;
+				}
+				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+				term_action = tmp_action;
+				break;
+			default:
+				err = -EOPNOTSUPP;
+				goto free_actions;
+			}
+		}
+	}
+
+	params.match_sz = match_sz;
+	params.match_buf = (u64 *)fte->val;
+
+	if (term_action)
+		actions[num_actions++] = term_action;
+
+	rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
+				  &params,
+				  num_actions,
+				  actions);
+	if (!rule) {
+		err = -EINVAL;
+		goto free_actions;
+	}
+
+	kfree(actions);
+	fte->fs_dr_rule.dr_rule = rule;
+	fte->fs_dr_rule.num_actions = fs_dr_num_actions;
+	fte->fs_dr_rule.dr_actions = fs_dr_actions;
+
+	return 0;
+
+free_actions:
+	for (i = 0; i < fs_dr_num_actions; i++)
+		if (!IS_ERR_OR_NULL(fs_dr_actions[i]))
+			mlx5dr_action_destroy(fs_dr_actions[i]);
+
+	mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
+	kfree(actions);
+	kfree(fs_dr_actions);
+	return err;
+}
+
+static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+					     int reformat_type,
+					     size_t size,
+					     void *reformat_data,
+					     enum mlx5_flow_namespace_type namespace,
+					     struct mlx5_pkt_reformat *pkt_reformat)
+{
+	struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+	struct mlx5dr_action *action;
+	int dr_reformat;
+
+	switch (reformat_type) {
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+	case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+		dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2;
+		break;
+	case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+		dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2;
+		break;
+	case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+		dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3;
+		break;
+	default:
+		mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
+			      reformat_type);
+		return -EOPNOTSUPP;
+	}
+
+	action = mlx5dr_action_create_packet_reformat(dr_domain,
+						      dr_reformat,
+						      size,
+						      reformat_data);
+	if (!action) {
+		mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n");
+		return -EINVAL;
+	}
+
+	pkt_reformat->action.dr_action = action;
+
+	return 0;
+}
+
+static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+						struct mlx5_pkt_reformat *pkt_reformat)
+{
+	mlx5dr_action_destroy(pkt_reformat->action.dr_action);
+}
+
+static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+					   u8 namespace, u8 num_actions,
+					   void *modify_actions,
+					   struct mlx5_modify_hdr *modify_hdr)
+{
+	struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+	struct mlx5dr_action *action;
+	size_t actions_sz;
+
+	actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) *
+		num_actions;
+	action = mlx5dr_action_create_modify_header(dr_domain, 0,
+						    actions_sz,
+						    modify_actions);
+	if (!action) {
+		mlx5_core_err(ns->dev, "Failed allocating modify-header action\n");
+		return -EINVAL;
+	}
+
+	modify_hdr->action.dr_action = action;
+
+	return 0;
+}
+
+static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+					      struct mlx5_modify_hdr *modify_hdr)
+{
+	mlx5dr_action_destroy(modify_hdr->action.dr_action);
+}
+
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+				  struct mlx5_flow_table *ft,
+				  struct mlx5_flow_group *group,
+				  int modify_mask,
+				  struct fs_fte *fte)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
+				  struct mlx5_flow_table *ft,
+				  struct fs_fte *fte)
+{
+	struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule;
+	int err;
+	int i;
+
+	if (mlx5_dr_is_fw_table(ft->flags))
+		return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
+
+	err = mlx5dr_rule_destroy(rule->dr_rule);
+	if (err)
+		return err;
+
+	for (i = 0; i < rule->num_actions; i++)
+		if (!IS_ERR_OR_NULL(rule->dr_actions[i]))
+			mlx5dr_action_destroy(rule->dr_actions[i]);
+
+	kfree(rule->dr_actions);
+	return 0;
+}
+
+static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
+				struct mlx5_flow_root_namespace *peer_ns)
+{
+	struct mlx5dr_domain *peer_domain = NULL;
+
+	if (peer_ns)
+		peer_domain = peer_ns->fs_dr_domain.dr_domain;
+	mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
+			       peer_domain);
+	return 0;
+}
+
+static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+	ns->fs_dr_domain.dr_domain =
+		mlx5dr_domain_create(ns->dev,
+				     MLX5DR_DOMAIN_TYPE_FDB);
+	if (!ns->fs_dr_domain.dr_domain) {
+		mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+	return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
+}
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+	return mlx5dr_is_supported(dev);
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
+	.create_flow_table = mlx5_cmd_dr_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_dr_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_dr_modify_flow_table,
+	.create_flow_group = mlx5_cmd_dr_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_dr_destroy_flow_group,
+	.create_fte = mlx5_cmd_dr_create_fte,
+	.update_fte = mlx5_cmd_dr_update_fte,
+	.delete_fte = mlx5_cmd_dr_delete_fte,
+	.update_root_ft = mlx5_cmd_dr_update_root_ft,
+	.packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc,
+	.packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc,
+	.modify_header_alloc = mlx5_cmd_dr_modify_header_alloc,
+	.modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc,
+	.set_peer = mlx5_cmd_dr_set_peer,
+	.create_ns = mlx5_cmd_dr_create_ns,
+	.destroy_ns = mlx5_cmd_dr_destroy_ns,
+};
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+		return &mlx5_flow_cmds_dr;
+}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
new file mode 100644
index 0000000..1fb185d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h

@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2019 Mellanox Technologies
+ */
+
+#ifndef _MLX5_FS_DR_
+#define _MLX5_FS_DR_
+
+#include "mlx5dr.h"
+
+struct mlx5_flow_root_namespace;
+struct fs_fte;
+
+struct mlx5_fs_dr_action {
+	struct mlx5dr_action *dr_action;
+};
+
+struct mlx5_fs_dr_ns {
+	struct mlx5_dr_ns *dr_ns;
+};
+
+struct mlx5_fs_dr_rule {
+	struct mlx5dr_rule    *dr_rule;
+	/* Only actions created by fs_dr */
+	struct mlx5dr_action  **dr_actions;
+	int                      num_actions;
+};
+
+struct mlx5_fs_dr_domain {
+	struct mlx5dr_domain	*dr_domain;
+};
+
+struct mlx5_fs_dr_matcher {
+	struct mlx5dr_matcher *dr_matcher;
+};
+
+struct mlx5_fs_dr_table {
+	struct mlx5dr_table  *dr_table;
+	struct mlx5dr_action *miss_action;
+};
+
+#ifdef CONFIG_MLX5_SW_STEERING
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void);
+
+#else
+
+static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+	return NULL;
+}
+
+static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+	return false;
+}
+
+#endif /* CONFIG_MLX5_SW_STEERING */
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
new file mode 100644
index 0000000..596c927
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h

@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef MLX5_IFC_DR_H
+#define MLX5_IFC_DR_H
+
+enum {
+	MLX5DR_ACTION_MDFY_HW_FLD_L2_0		= 0,
+	MLX5DR_ACTION_MDFY_HW_FLD_L2_1		= 1,
+	MLX5DR_ACTION_MDFY_HW_FLD_L2_2		= 2,
+	MLX5DR_ACTION_MDFY_HW_FLD_L3_0		= 3,
+	MLX5DR_ACTION_MDFY_HW_FLD_L3_1		= 4,
+	MLX5DR_ACTION_MDFY_HW_FLD_L3_2		= 5,
+	MLX5DR_ACTION_MDFY_HW_FLD_L3_3		= 6,
+	MLX5DR_ACTION_MDFY_HW_FLD_L3_4		= 7,
+	MLX5DR_ACTION_MDFY_HW_FLD_L4_0		= 8,
+	MLX5DR_ACTION_MDFY_HW_FLD_L4_1		= 9,
+	MLX5DR_ACTION_MDFY_HW_FLD_MPLS		= 10,
+	MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0	= 11,
+	MLX5DR_ACTION_MDFY_HW_FLD_REG_0		= 12,
+	MLX5DR_ACTION_MDFY_HW_FLD_REG_1		= 13,
+	MLX5DR_ACTION_MDFY_HW_FLD_REG_2		= 14,
+	MLX5DR_ACTION_MDFY_HW_FLD_REG_3		= 15,
+	MLX5DR_ACTION_MDFY_HW_FLD_L4_2		= 16,
+	MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0	= 17,
+	MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1	= 18,
+	MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2	= 19,
+	MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3	= 20,
+	MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1	= 21,
+	MLX5DR_ACTION_MDFY_HW_FLD_METADATA	= 22,
+	MLX5DR_ACTION_MDFY_HW_FLD_RESERVED	= 23,
+};
+
+enum {
+	MLX5DR_ACTION_MDFY_HW_OP_SET		= 0x2,
+	MLX5DR_ACTION_MDFY_HW_OP_ADD		= 0x3,
+};
+
+enum {
+	MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE	= 0x0,
+	MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4	= 0x1,
+	MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6	= 0x2,
+};
+
+enum {
+	MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE	= 0x0,
+	MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP	= 0x1,
+	MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP	= 0x2,
+};
+
+enum {
+	MLX5DR_STE_LU_TYPE_NOP				= 0x00,
+	MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP		= 0x05,
+	MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I		= 0x0a,
+	MLX5DR_STE_LU_TYPE_ETHL2_DST_O			= 0x06,
+	MLX5DR_STE_LU_TYPE_ETHL2_DST_I			= 0x07,
+	MLX5DR_STE_LU_TYPE_ETHL2_DST_D			= 0x1b,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_O			= 0x08,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_I			= 0x09,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_D			= 0x1c,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O		= 0x36,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I		= 0x37,
+	MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D		= 0x38,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O		= 0x0d,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I		= 0x0e,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D		= 0x1e,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O		= 0x0f,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I		= 0x10,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D		= 0x1f,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O		= 0x11,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I		= 0x12,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D		= 0x20,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O		= 0x29,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I		= 0x2a,
+	MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D		= 0x2b,
+	MLX5DR_STE_LU_TYPE_ETHL4_O			= 0x13,
+	MLX5DR_STE_LU_TYPE_ETHL4_I			= 0x14,
+	MLX5DR_STE_LU_TYPE_ETHL4_D			= 0x21,
+	MLX5DR_STE_LU_TYPE_ETHL4_MISC_O			= 0x2c,
+	MLX5DR_STE_LU_TYPE_ETHL4_MISC_I			= 0x2d,
+	MLX5DR_STE_LU_TYPE_ETHL4_MISC_D			= 0x2e,
+	MLX5DR_STE_LU_TYPE_MPLS_FIRST_O			= 0x15,
+	MLX5DR_STE_LU_TYPE_MPLS_FIRST_I			= 0x24,
+	MLX5DR_STE_LU_TYPE_MPLS_FIRST_D			= 0x25,
+	MLX5DR_STE_LU_TYPE_GRE				= 0x16,
+	MLX5DR_STE_LU_TYPE_FLEX_PARSER_0		= 0x22,
+	MLX5DR_STE_LU_TYPE_FLEX_PARSER_1		= 0x23,
+	MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER	= 0x19,
+	MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE		= 0x18,
+	MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0		= 0x2f,
+	MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1		= 0x30,
+	MLX5DR_STE_LU_TYPE_DONT_CARE			= 0x0f,
+};
+
+enum mlx5dr_ste_entry_type {
+	MLX5DR_STE_TYPE_TX		= 1,
+	MLX5DR_STE_TYPE_RX		= 2,
+	MLX5DR_STE_TYPE_MODIFY_PKT	= 6,
+};
+
+struct mlx5_ifc_ste_general_bits {
+	u8         entry_type[0x4];
+	u8         reserved_at_4[0x4];
+	u8         entry_sub_type[0x8];
+	u8         byte_mask[0x10];
+
+	u8         next_table_base_63_48[0x10];
+	u8         next_lu_type[0x8];
+	u8         next_table_base_39_32_size[0x8];
+
+	u8         next_table_base_31_5_size[0x1b];
+	u8         linear_hash_enable[0x1];
+	u8         reserved_at_5c[0x2];
+	u8         next_table_rank[0x2];
+
+	u8         reserved_at_60[0xa0];
+	u8         tag_value[0x60];
+	u8         bit_mask[0x60];
+};
+
+struct mlx5_ifc_ste_sx_transmit_bits {
+	u8         entry_type[0x4];
+	u8         reserved_at_4[0x4];
+	u8         entry_sub_type[0x8];
+	u8         byte_mask[0x10];
+
+	u8         next_table_base_63_48[0x10];
+	u8         next_lu_type[0x8];
+	u8         next_table_base_39_32_size[0x8];
+
+	u8         next_table_base_31_5_size[0x1b];
+	u8         linear_hash_enable[0x1];
+	u8         reserved_at_5c[0x2];
+	u8         next_table_rank[0x2];
+
+	u8         sx_wire[0x1];
+	u8         sx_func_lb[0x1];
+	u8         sx_sniffer[0x1];
+	u8         sx_wire_enable[0x1];
+	u8         sx_func_lb_enable[0x1];
+	u8         sx_sniffer_enable[0x1];
+	u8         action_type[0x3];
+	u8         reserved_at_69[0x1];
+	u8         action_description[0x6];
+	u8         gvmi[0x10];
+
+	u8         encap_pointer_vlan_data[0x20];
+
+	u8         loopback_syndome_en[0x8];
+	u8         loopback_syndome[0x8];
+	u8         counter_trigger[0x10];
+
+	u8         miss_address_63_48[0x10];
+	u8         counter_trigger_23_16[0x8];
+	u8         miss_address_39_32[0x8];
+
+	u8         miss_address_31_6[0x1a];
+	u8         learning_point[0x1];
+	u8         go_back[0x1];
+	u8         match_polarity[0x1];
+	u8         mask_mode[0x1];
+	u8         miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_rx_steering_mult_bits {
+	u8         entry_type[0x4];
+	u8         reserved_at_4[0x4];
+	u8         entry_sub_type[0x8];
+	u8         byte_mask[0x10];
+
+	u8         next_table_base_63_48[0x10];
+	u8         next_lu_type[0x8];
+	u8         next_table_base_39_32_size[0x8];
+
+	u8         next_table_base_31_5_size[0x1b];
+	u8         linear_hash_enable[0x1];
+	u8         reserved_at_[0x2];
+	u8         next_table_rank[0x2];
+
+	u8         member_count[0x10];
+	u8         gvmi[0x10];
+
+	u8         qp_list_pointer[0x20];
+
+	u8         reserved_at_a0[0x1];
+	u8         tunneling_action[0x3];
+	u8         action_description[0x4];
+	u8         reserved_at_a8[0x8];
+	u8         counter_trigger_15_0[0x10];
+
+	u8         miss_address_63_48[0x10];
+	u8         counter_trigger_23_16[0x08];
+	u8         miss_address_39_32[0x8];
+
+	u8         miss_address_31_6[0x1a];
+	u8         learning_point[0x1];
+	u8         fail_on_error[0x1];
+	u8         match_polarity[0x1];
+	u8         mask_mode[0x1];
+	u8         miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_modify_packet_bits {
+	u8         entry_type[0x4];
+	u8         reserved_at_4[0x4];
+	u8         entry_sub_type[0x8];
+	u8         byte_mask[0x10];
+
+	u8         next_table_base_63_48[0x10];
+	u8         next_lu_type[0x8];
+	u8         next_table_base_39_32_size[0x8];
+
+	u8         next_table_base_31_5_size[0x1b];
+	u8         linear_hash_enable[0x1];
+	u8         reserved_at_[0x2];
+	u8         next_table_rank[0x2];
+
+	u8         number_of_re_write_actions[0x10];
+	u8         gvmi[0x10];
+
+	u8         header_re_write_actions_pointer[0x20];
+
+	u8         reserved_at_a0[0x1];
+	u8         tunneling_action[0x3];
+	u8         action_description[0x4];
+	u8         reserved_at_a8[0x8];
+	u8         counter_trigger_15_0[0x10];
+
+	u8         miss_address_63_48[0x10];
+	u8         counter_trigger_23_16[0x08];
+	u8         miss_address_39_32[0x8];
+
+	u8         miss_address_31_6[0x1a];
+	u8         learning_point[0x1];
+	u8         fail_on_error[0x1];
+	u8         match_polarity[0x1];
+	u8         mask_mode[0x1];
+	u8         miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_bits {
+	u8         smac_47_16[0x20];
+
+	u8         smac_15_0[0x10];
+	u8         l3_ethertype[0x10];
+
+	u8         qp_type[0x2];
+	u8         ethertype_filter[0x1];
+	u8         reserved_at_43[0x1];
+	u8         sx_sniffer[0x1];
+	u8         force_lb[0x1];
+	u8         functional_lb[0x1];
+	u8         port[0x1];
+	u8         reserved_at_48[0x4];
+	u8         first_priority[0x3];
+	u8         first_cfi[0x1];
+	u8         first_vlan_qualifier[0x2];
+	u8         reserved_at_52[0x2];
+	u8         first_vlan_id[0xc];
+
+	u8         ip_fragmented[0x1];
+	u8         tcp_syn[0x1];
+	u8         encp_type[0x2];
+	u8         l3_type[0x2];
+	u8         l4_type[0x2];
+	u8         reserved_at_68[0x4];
+	u8         second_priority[0x3];
+	u8         second_cfi[0x1];
+	u8         second_vlan_qualifier[0x2];
+	u8         reserved_at_72[0x2];
+	u8         second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_dst_bits {
+	u8         dmac_47_16[0x20];
+
+	u8         dmac_15_0[0x10];
+	u8         l3_ethertype[0x10];
+
+	u8         qp_type[0x2];
+	u8         ethertype_filter[0x1];
+	u8         reserved_at_43[0x1];
+	u8         sx_sniffer[0x1];
+	u8         force_lb[0x1];
+	u8         functional_lb[0x1];
+	u8         port[0x1];
+	u8         reserved_at_48[0x4];
+	u8         first_priority[0x3];
+	u8         first_cfi[0x1];
+	u8         first_vlan_qualifier[0x2];
+	u8         reserved_at_52[0x2];
+	u8         first_vlan_id[0xc];
+
+	u8         ip_fragmented[0x1];
+	u8         tcp_syn[0x1];
+	u8         encp_type[0x2];
+	u8         l3_type[0x2];
+	u8         l4_type[0x2];
+	u8         reserved_at_68[0x4];
+	u8         second_priority[0x3];
+	u8         second_cfi[0x1];
+	u8         second_vlan_qualifier[0x2];
+	u8         reserved_at_72[0x2];
+	u8         second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_dst_bits {
+	u8         dmac_47_16[0x20];
+
+	u8         dmac_15_0[0x10];
+	u8         smac_47_32[0x10];
+
+	u8         smac_31_0[0x20];
+
+	u8         sx_sniffer[0x1];
+	u8         force_lb[0x1];
+	u8         functional_lb[0x1];
+	u8         port[0x1];
+	u8         l3_type[0x2];
+	u8         reserved_at_66[0x6];
+	u8         first_priority[0x3];
+	u8         first_cfi[0x1];
+	u8         first_vlan_qualifier[0x2];
+	u8         reserved_at_72[0x2];
+	u8         first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits {
+	u8         destination_address[0x20];
+
+	u8         source_address[0x20];
+
+	u8         source_port[0x10];
+	u8         destination_port[0x10];
+
+	u8         fragmented[0x1];
+	u8         first_fragment[0x1];
+	u8         reserved_at_62[0x2];
+	u8         reserved_at_64[0x1];
+	u8         ecn[0x2];
+	u8         tcp_ns[0x1];
+	u8         tcp_cwr[0x1];
+	u8         tcp_ece[0x1];
+	u8         tcp_urg[0x1];
+	u8         tcp_ack[0x1];
+	u8         tcp_psh[0x1];
+	u8         tcp_rst[0x1];
+	u8         tcp_syn[0x1];
+	u8         tcp_fin[0x1];
+	u8         dscp[0x6];
+	u8         reserved_at_76[0x2];
+	u8         protocol[0x8];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits {
+	u8         dst_ip_127_96[0x20];
+
+	u8         dst_ip_95_64[0x20];
+
+	u8         dst_ip_63_32[0x20];
+
+	u8         dst_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l2_tnl_bits {
+	u8         dmac_47_16[0x20];
+
+	u8         dmac_15_0[0x10];
+	u8         l3_ethertype[0x10];
+
+	u8         l2_tunneling_network_id[0x20];
+
+	u8         ip_fragmented[0x1];
+	u8         tcp_syn[0x1];
+	u8         encp_type[0x2];
+	u8         l3_type[0x2];
+	u8         l4_type[0x2];
+	u8         first_priority[0x3];
+	u8         first_cfi[0x1];
+	u8         reserved_at_6c[0x3];
+	u8         gre_key_flag[0x1];
+	u8         first_vlan_qualifier[0x2];
+	u8         reserved_at_72[0x2];
+	u8         first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_src_bits {
+	u8         src_ip_127_96[0x20];
+
+	u8         src_ip_95_64[0x20];
+
+	u8         src_ip_63_32[0x20];
+
+	u8         src_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits {
+	u8         version[0x4];
+	u8         ihl[0x4];
+	u8         reserved_at_8[0x8];
+	u8         total_length[0x10];
+
+	u8         identification[0x10];
+	u8         flags[0x3];
+	u8         fragment_offset[0xd];
+
+	u8         time_to_live[0x8];
+	u8         reserved_at_48[0x8];
+	u8         checksum[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l4_bits {
+	u8         fragmented[0x1];
+	u8         first_fragment[0x1];
+	u8         reserved_at_2[0x6];
+	u8         protocol[0x8];
+	u8         dst_port[0x10];
+
+	u8         ipv6_version[0x4];
+	u8         reserved_at_24[0x1];
+	u8         ecn[0x2];
+	u8         tcp_ns[0x1];
+	u8         tcp_cwr[0x1];
+	u8         tcp_ece[0x1];
+	u8         tcp_urg[0x1];
+	u8         tcp_ack[0x1];
+	u8         tcp_psh[0x1];
+	u8         tcp_rst[0x1];
+	u8         tcp_syn[0x1];
+	u8         tcp_fin[0x1];
+	u8         src_port[0x10];
+
+	u8         ipv6_payload_length[0x10];
+	u8         ipv6_hop_limit[0x8];
+	u8         dscp[0x6];
+	u8         reserved_at_5e[0x2];
+
+	u8         tcp_data_offset[0x4];
+	u8         reserved_at_64[0x8];
+	u8         flow_label[0x14];
+};
+
+struct mlx5_ifc_ste_eth_l4_misc_bits {
+	u8         checksum[0x10];
+	u8         length[0x10];
+
+	u8         seq_num[0x20];
+
+	u8         ack_num[0x20];
+
+	u8         urgent_pointer[0x10];
+	u8         window_size[0x10];
+};
+
+struct mlx5_ifc_ste_mpls_bits {
+	u8         mpls0_label[0x14];
+	u8         mpls0_exp[0x3];
+	u8         mpls0_s_bos[0x1];
+	u8         mpls0_ttl[0x8];
+
+	u8         mpls1_label[0x20];
+
+	u8         mpls2_label[0x20];
+
+	u8         reserved_at_60[0x16];
+	u8         mpls4_s_bit[0x1];
+	u8         mpls4_qualifier[0x1];
+	u8         mpls3_s_bit[0x1];
+	u8         mpls3_qualifier[0x1];
+	u8         mpls2_s_bit[0x1];
+	u8         mpls2_qualifier[0x1];
+	u8         mpls1_s_bit[0x1];
+	u8         mpls1_qualifier[0x1];
+	u8         mpls0_s_bit[0x1];
+	u8         mpls0_qualifier[0x1];
+};
+
+struct mlx5_ifc_ste_register_0_bits {
+	u8         register_0_h[0x20];
+
+	u8         register_0_l[0x20];
+
+	u8         register_1_h[0x20];
+
+	u8         register_1_l[0x20];
+};
+
+struct mlx5_ifc_ste_register_1_bits {
+	u8         register_2_h[0x20];
+
+	u8         register_2_l[0x20];
+
+	u8         register_3_h[0x20];
+
+	u8         register_3_l[0x20];
+};
+
+struct mlx5_ifc_ste_gre_bits {
+	u8         gre_c_present[0x1];
+	u8         reserved_at_30[0x1];
+	u8         gre_k_present[0x1];
+	u8         gre_s_present[0x1];
+	u8         strict_src_route[0x1];
+	u8         recur[0x3];
+	u8         flags[0x5];
+	u8         version[0x3];
+	u8         gre_protocol[0x10];
+
+	u8         checksum[0x10];
+	u8         offset[0x10];
+
+	u8         gre_key_h[0x18];
+	u8         gre_key_l[0x8];
+
+	u8         seq_num[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_0_bits {
+	u8         parser_3_label[0x14];
+	u8         parser_3_exp[0x3];
+	u8         parser_3_s_bos[0x1];
+	u8         parser_3_ttl[0x8];
+
+	u8         flex_parser_2[0x20];
+
+	u8         flex_parser_1[0x20];
+
+	u8         flex_parser_0[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_1_bits {
+	u8         flex_parser_7[0x20];
+
+	u8         flex_parser_6[0x20];
+
+	u8         flex_parser_5[0x20];
+
+	u8         flex_parser_4[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_bits {
+	u8         flex_parser_tunneling_header_63_32[0x20];
+
+	u8         flex_parser_tunneling_header_31_0[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_general_purpose_bits {
+	u8         general_purpose_lookup_field[0x20];
+
+	u8         reserved_at_20[0x20];
+
+	u8         reserved_at_40[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_src_gvmi_qp_bits {
+	u8         loopback_syndrome[0x8];
+	u8         reserved_at_8[0x8];
+	u8         source_gvmi[0x10];
+
+	u8         reserved_at_20[0x5];
+	u8         force_lb[0x1];
+	u8         functional_lb[0x1];
+	u8         source_is_requestor[0x1];
+	u8         source_qp[0x18];
+
+	u8         reserved_at_40[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_l2_hdr_bits {
+	u8         dmac_47_16[0x20];
+
+	u8         dmac_15_0[0x10];
+	u8         smac_47_32[0x10];
+
+	u8         smac_31_0[0x20];
+
+	u8         ethertype[0x10];
+	u8         vlan_type[0x10];
+
+	u8         vlan[0x10];
+	u8         reserved_at_90[0x10];
+};
+
+/* Both HW set and HW add share the same HW format with different opcodes */
+struct mlx5_ifc_dr_action_hw_set_bits {
+	u8         opcode[0x8];
+	u8         destination_field_code[0x8];
+	u8         reserved_at_10[0x2];
+	u8         destination_left_shifter[0x6];
+	u8         reserved_at_18[0x3];
+	u8         destination_length[0x5];
+
+	u8         inline_data[0x20];
+};
+
+#endif /* MLX5_IFC_DR_H */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
new file mode 100644
index 0000000..adda9cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h

@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef _MLX5DR_H_
+#define _MLX5DR_H_
+
+struct mlx5dr_domain;
+struct mlx5dr_table;
+struct mlx5dr_matcher;
+struct mlx5dr_rule;
+struct mlx5dr_action;
+
+enum mlx5dr_domain_type {
+	MLX5DR_DOMAIN_TYPE_NIC_RX,
+	MLX5DR_DOMAIN_TYPE_NIC_TX,
+	MLX5DR_DOMAIN_TYPE_FDB,
+};
+
+enum mlx5dr_domain_sync_flags {
+	MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0,
+	MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1,
+};
+
+enum mlx5dr_action_reformat_type {
+	DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2,
+	DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
+	DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
+	DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
+};
+
+struct mlx5dr_match_parameters {
+	size_t match_sz;
+	u64 *match_buf; /* Device spec format */
+};
+
+#ifdef CONFIG_MLX5_SW_STEERING
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
+
+int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+			    struct mlx5dr_domain *peer_dmn);
+
+struct mlx5dr_table *
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level);
+
+int mlx5dr_table_destroy(struct mlx5dr_table *table);
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *table);
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *table,
+		      u16 priority,
+		      u8 match_criteria_enable,
+		      struct mlx5dr_match_parameters *mask);
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_rule *
+mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+		   struct mlx5dr_match_parameters *value,
+		   size_t num_actions,
+		   struct mlx5dr_action *actions[]);
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+				 struct mlx5dr_action *action);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+					struct mlx5_core_dev *mdev);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
+				u32 vport, u8 vhca_id_valid,
+				u16 vhca_id);
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void);
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id);
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+				     enum mlx5dr_action_reformat_type reformat_type,
+				     size_t data_sz,
+				     void *data);
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
+				   u32 flags,
+				   size_t actions_sz,
+				   __be64 actions[]);
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void);
+
+struct mlx5dr_action *
+mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr);
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action);
+
+static inline bool
+mlx5dr_is_supported(struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner);
+}
+
+#else /* CONFIG_MLX5_SW_STEERING */
+
+static inline struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; }
+
+static inline int
+mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; }
+
+static inline int
+mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; }
+
+static inline void
+mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+		       struct mlx5dr_domain *peer_dmn) { }
+
+static inline struct mlx5dr_table *
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; }
+
+static inline int
+mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; }
+
+static inline u32
+mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; }
+
+static inline struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *table,
+		      u16 priority,
+		      u8 match_criteria_enable,
+		      struct mlx5dr_match_parameters *mask) { return NULL; }
+
+static inline int
+mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; }
+
+static inline struct mlx5dr_rule *
+mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+		   struct mlx5dr_match_parameters *value,
+		   size_t num_actions,
+		   struct mlx5dr_action *actions[]) { return NULL; }
+
+static inline int
+mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; }
+
+static inline int
+mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+			     struct mlx5dr_action *action) { return 0; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+					struct mlx5_core_dev *mdev) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
+				u32 vport, u8 vhca_id_valid,
+				u16 vhca_id) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_drop(void) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_tag(u32 tag_value) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+				     enum mlx5dr_action_reformat_type reformat_type,
+				     size_t data_sz,
+				     void *data) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
+				   u32 flags,
+				   size_t actions_sz,
+				   __be64 actions[]) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_pop_vlan(void) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain,
+			       __be32 vlan_hdr) { return NULL; }
+
+static inline int
+mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; }
+
+static inline bool
+mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; }
+
+#endif /* CONFIG_MLX5_SW_STEERING */
+
+#endif /* _MLX5DR_H_ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a1ee9a8..b106850 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c

@@ -182,16 +182,24 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
 
+int mlx5_core_create_tir_out(struct mlx5_core_dev *dev,
+			     u32 *in, int inlen,
+			     u32 *out, int outlen)
+{
+	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+
+	return mlx5_cmd_exec(dev, in, inlen, out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_create_tir_out);
+
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *tirn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
 	int err;
 
-	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	err = mlx5_core_create_tir_out(dev, in, inlen,
+				       out, sizeof(out));
 	if (!err)
 		*tirn = MLX5_GET(create_tir_out, out, tirn);
 
@@ -258,115 +266,6 @@
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			 u32 *rmpn)
-{
-	u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
-	int err;
-
-	MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-	if (!err)
-		*rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
-	return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
-	u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
-	MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
-	MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
-	MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out,
-					  sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
-	u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
-	int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
-	MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
-	MLX5_SET(query_rmp_in, in, rmpn,   rmpn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
-	void *in;
-	void *rmpc;
-	void *wq;
-	void *bitmask;
-	int  err;
-
-	in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
-	rmpc    = MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
-	bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
-	wq      = MLX5_ADDR_OF(rmpc,	        rmpc, wq);
-
-	MLX5_SET(modify_rmp_in, in,	 rmp_state, MLX5_RMPC_STATE_RDY);
-	MLX5_SET(modify_rmp_in, in,	 rmpn,      rmpn);
-	MLX5_SET(wq,		wq,	 lwm,	    lwm);
-	MLX5_SET(rmp_bitmask,	bitmask, lwm,	    1);
-	MLX5_SET(rmpc,		rmpc,	 state,	    MLX5_RMPC_STATE_RDY);
-
-	err =  mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
-	kvfree(in);
-
-	return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			  u32 *xsrqn)
-{
-	u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
-	int err;
-
-	MLX5_SET(create_xrc_srq_in, in, opcode,     MLX5_CMD_OP_CREATE_XRC_SRQ);
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-	if (!err)
-		*xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
-	return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
-	MLX5_SET(destroy_xrc_srq_in, in, opcode,   MLX5_CMD_OP_DESTROY_XRC_SRQ);
-	MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
-	u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
-	MLX5_SET(arm_xrc_srq_in, in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
-	MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
-	MLX5_SET(arm_xrc_srq_in, in, lwm,      lwm);
-	MLX5_SET(arm_xrc_srq_in, in, op_mod,
-		 MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *rqtn)
 {

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 8b97066..0d00622 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c

@@ -79,7 +79,7 @@
 	else
 		system_page_index = index;
 
-	return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+	return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index;
 }
 
 static void up_rel_func(struct kref *kref)
@@ -90,8 +90,8 @@
 	iounmap(up->map);
 	if (mlx5_cmd_free_uar(up->mdev, up->index))
 		mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
-	kfree(up->reg_bitmap);
-	kfree(up->fp_bitmap);
+	bitmap_free(up->reg_bitmap);
+	bitmap_free(up->fp_bitmap);
 	kfree(up);
 }
 
@@ -110,11 +110,11 @@
 		return ERR_PTR(err);
 
 	up->mdev = mdev;
-	up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+	up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
 	if (!up->reg_bitmap)
 		goto error1;
 
-	up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+	up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
 	if (!up->fp_bitmap)
 		goto error1;
 
@@ -157,8 +157,8 @@
 	if (mlx5_cmd_free_uar(mdev, up->index))
 		mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
 error1:
-	kfree(up->fp_bitmap);
-	kfree(up->reg_bitmap);
+	bitmap_free(up->fp_bitmap);
+	bitmap_free(up->reg_bitmap);
 	kfree(up);
 	return ERR_PTR(err);
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b02af31..30f7848 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c

@@ -34,6 +34,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 
 /* Mutex to hold while enabling or disabling RoCE */
@@ -64,7 +65,7 @@
 }
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-				  u16 vport, u8 state)
+				  u16 vport, u8 other_vport, u8 state)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
@@ -73,8 +74,7 @@
 		 MLX5_CMD_OP_MODIFY_VPORT_STATE);
 	MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
 	MLX5_SET(modify_vport_state_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+	MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
 	MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
 	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
@@ -122,12 +122,13 @@
 			   u8 *min_inline_mode)
 {
 	switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode))
+			break;
+		/* fall through */
 	case MLX5_CAP_INLINE_MODE_L2:
 		*min_inline_mode = MLX5_INLINE_MODE_L2;
 		break;
-	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
-		mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
-		break;
 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
 		*min_inline_mode = MLX5_INLINE_MODE_NONE;
 		break;
@@ -156,11 +157,12 @@
 }
 
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
-				     u16 vport, u8 *addr)
+				     u16 vport, bool other, u8 *addr)
 {
-	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
 	u8 *out_addr;
+	u32 *out;
 	int err;
 
 	out = kvzalloc(outlen, GFP_KERNEL);
@@ -170,7 +172,12 @@
 	out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
 				nic_vport_context.permanent_address);
 
-	err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 	if (!err)
 		ether_addr_copy(addr, &out_addr[2]);
 
@@ -179,6 +186,12 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
 
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+{
+	return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
+
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				      u16 vport, u8 *addr)
 {
@@ -195,9 +208,7 @@
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 field_select.permanent_address, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-
-	if (vport)
-		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 
 	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
 				     in, nic_vport_context);
@@ -255,7 +266,7 @@
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
-				  u32 vport,
+				  u16 vport,
 				  enum mlx5_list_type list_type,
 				  u8 addr_list[][ETH_ALEN],
 				  int *list_size)
@@ -292,9 +303,7 @@
 		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
 	MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
 	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-
-	if (vport)
-		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
 	if (err)
@@ -372,67 +381,6 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
 
-int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
-			       u32 vport,
-			       u16 vlans[],
-			       int *size)
-{
-	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
-	void *nic_vport_ctx;
-	int req_list_size;
-	int max_list_size;
-	int out_sz;
-	void *out;
-	int err;
-	int i;
-
-	req_list_size = *size;
-	max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
-	if (req_list_size > max_list_size) {
-		mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n",
-			       req_list_size, max_list_size);
-		req_list_size = max_list_size;
-	}
-
-	out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
-			req_list_size * MLX5_ST_SZ_BYTES(vlan_layout);
-
-	memset(in, 0, sizeof(in));
-	out = kzalloc(out_sz, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	MLX5_SET(query_nic_vport_context_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
-	MLX5_SET(query_nic_vport_context_in, in, allowed_list_type,
-		 MLX5_NVPRT_LIST_TYPE_VLAN);
-	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-
-	if (vport)
-		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
-
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
-	if (err)
-		goto out;
-
-	nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
-				     nic_vport_context);
-	req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
-				 allowed_list_size);
-
-	*size = req_list_size;
-	for (i = 0; i < req_list_size; i++) {
-		void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
-					       nic_vport_ctx,
-					       current_uc_mac_address[i]);
-		vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan);
-	}
-out:
-	kfree(out);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans);
-
 int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 				u16 vlans[],
 				int list_size)
@@ -526,7 +474,7 @@
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
 
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
-				    u32 vport, u64 node_guid)
+				    u16 vport, u64 node_guid)
 {
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	void *nic_vport_context;
@@ -545,7 +493,7 @@
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 field_select.node_guid, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-	MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 
 	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
 					 in, nic_vport_context);
@@ -827,7 +775,7 @@
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
 
 int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
-				 u32 vport,
+				 u16 vport,
 				 int *promisc_uc,
 				 int *promisc_mc,
 				 int *promisc_all)
@@ -1057,7 +1005,7 @@
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
 
 int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
-				u64 *rx_discard_vport_down,
+				u8 other_vport, u64 *rx_discard_vport_down,
 				u64 *tx_discard_vport_down)
 {
 	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
@@ -1068,8 +1016,7 @@
 		 MLX5_CMD_OP_QUERY_VNIC_ENV);
 	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
 	MLX5_SET(query_vnic_env_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+	MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
 
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (err)
@@ -1201,3 +1148,36 @@
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
+{
+	int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+	u64 tmp = 0;
+
+	if (mdev->sys_image_guid)
+		return mdev->sys_image_guid;
+
+	if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
+		mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+	else
+		mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+
+	mdev->sys_image_guid = tmp;
+
+	return tmp;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev:	Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of vports for
+ * the eswitch.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ddca327..dd2315c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c

@@ -44,59 +44,47 @@
 	return wq->fbc.sz_m1 + 1;
 }
 
+u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
+{
+	return wq->fbc.log_stride;
+}
+
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
 {
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
+static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
 {
-	return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
-{
-	return mlx5_wq_cyc_get_byte_size(&wq->rq) +
-	       mlx5_wq_cyc_get_byte_size(&wq->sq);
-}
-
-static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
-{
-	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
-{
-	return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+	return ((u32)1 << log_sz) << log_stride;
 }
 
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	int err;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-	wq->sz    = wq->fbc.sz_m1 + 1;
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	fbc->frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+	wq->sz = mlx5_wq_cyc_get_size(wq);
 
 	wq_ctrl->mdev = mdev;
 
@@ -108,46 +96,19 @@
 	return err;
 }
 
-static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
-				 struct mlx5_wq_qp *qp)
-{
-	struct mlx5_frag_buf_ctrl *sq_fbc;
-	struct mlx5_frag_buf *rqb, *sqb;
-
-	rqb  = &qp->rq.fbc.frag_buf;
-	*rqb = *buf;
-	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
-
-	sq_fbc = &qp->sq.fbc;
-	sqb    = &sq_fbc->frag_buf;
-	*sqb   = *buf;
-	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->sq);
-	sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
-	sqb->frags += rqb->npages; /* first part is for the rq */
-	if (sq_fbc->strides_offset)
-		sqb->frags--;
-}
-
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
-	u16 sq_strides_offset;
-	u32 rq_pg_remainder;
+	u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+	u8 log_rq_sz     = MLX5_GET(qpc, qpc, log_rq_size);
+	u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+	u8 log_sq_sz     = MLX5_GET(qpc, qpc, log_sq_size);
+
+	u32 rq_byte_size;
 	int err;
 
-	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
-		      MLX5_GET(qpc, qpc, log_rq_size),
-		      &wq->rq.fbc);
 
-	rq_pg_remainder   = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
-	sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
-
-	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
-			     MLX5_GET(qpc, qpc, log_sq_size),
-			     sq_strides_offset,
-			     &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -155,14 +116,32 @@
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+	err = mlx5_frag_buf_alloc_node(mdev,
+				       wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+				       wq_get_byte_sz(log_sq_sz, log_sq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+	rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+	if (rq_byte_size < PAGE_SIZE) {
+		/* SQ starts within the same page of the RQ */
+		u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+		mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+				     log_sq_stride, log_sq_sz, sq_strides_offset,
+				     &wq->sq.fbc);
+	} else {
+		u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+		mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+			      log_sq_stride, log_sq_sz, &wq->sq.fbc);
+	}
 
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -181,17 +160,20 @@
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl)
 {
+	/* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
+	u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
+	u8 log_wq_sz     = MLX5_GET(cqc, cqc, log_cq_size);
 	int err;
 
-	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf,
 				       param->buf_numa_node);
 	if (err) {
@@ -200,8 +182,7 @@
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
 
 	wq_ctrl->mdev = mdev;
 
@@ -217,30 +198,29 @@
 		      void *wqc, struct mlx5_wq_ll *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	struct mlx5_wqe_srq_next_seg *next_seg;
 	int err;
 	int i;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
 
 	for (i = 0; i < fbc->sz_m1; i++) {
 		next_seg = mlx5_wq_ll_get_wqe(wq, i);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index b1293d1..55791f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h

@@ -89,6 +89,7 @@
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
+u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
@@ -177,9 +178,14 @@
 	return mlx5_cqwq_ctr2ix(wq, wq->cc);
 }
 
-static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+	struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+
+	/* For 128B CQEs the data is in the last 64B */
+	cqe += wq->fbc.log_stride == 7;
+
+	return cqe;
 }
 
 static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)
@@ -238,6 +244,13 @@
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix)
+{
+	struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix);
+
+	return be16_to_cpu(wqe->next_wqe_index);
+}
+
 static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
 {
 	wq->head = head_next;

diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig
index 186ebe7..0367f83 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Mellanox firmware flash library configuration
 #

diff --git a/drivers/net/ethernet/mellanox/mlxfw/Makefile b/drivers/net/ethernet/mellanox/mlxfw/Makefile
index 7448b30..36007cd 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxfw/Makefile

@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_MLXFW)	+= mlxfw.o
 mlxfw-objs		:= mlxfw_fsm.o mlxfw_mfa2_tlv_multi.o mlxfw_mfa2.o

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
index 7a712b6..c50e74a 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h

@@ -1,41 +1,11 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #ifndef _MLXFW_H
 #define _MLXFW_H
 
 #include <linux/firmware.h>
+#include <linux/netlink.h>
 
 enum mlxfw_fsm_state {
 	MLXFW_FSM_STATE_IDLE,
@@ -88,6 +58,10 @@
 	void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
 
 	void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+
+	void (*status_notify)(struct mlxfw_dev *mlxfw_dev,
+			      const char *msg, const char *comp_name,
+			      u32 done_bytes, u32 total_bytes);
 };
 
 struct mlxfw_dev {
@@ -98,11 +72,13 @@
 
 #if IS_REACHABLE(CONFIG_MLXFW)
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware);
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack);
 #else
 static inline
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware)
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
index 2cf8912..29e95d0 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #define pr_fmt(fmt) "mlxfw: " fmt
 
@@ -70,8 +39,19 @@
 		"unknown error"
 };
 
+static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev,
+				const char *msg, const char *comp_name,
+				u32 done_bytes, u32 total_bytes)
+{
+	if (!mlxfw_dev->ops->status_notify)
+		return;
+	mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name,
+				      done_bytes, total_bytes);
+}
+
 static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
-				enum mlxfw_fsm_state fsm_state)
+				enum mlxfw_fsm_state fsm_state,
+				struct netlink_ext_ack *extack)
 {
 	enum mlxfw_fsm_state_err fsm_state_err;
 	enum mlxfw_fsm_state curr_fsm_state;
@@ -86,13 +66,17 @@
 		return err;
 
 	if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) {
+		fsm_state_err = min_t(enum mlxfw_fsm_state_err,
+				      fsm_state_err, MLXFW_FSM_STATE_ERR_MAX);
 		pr_err("Firmware flash failed: %s\n",
 		       mlxfw_fsm_state_err_str[fsm_state_err]);
+		NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed");
 		return -EINVAL;
 	}
 	if (curr_fsm_state != fsm_state) {
 		if (--times == 0) {
 			pr_err("Timeout reached on FSM state change");
+			NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change");
 			return -ETIMEDOUT;
 		}
 		msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS);
@@ -107,16 +91,20 @@
 
 static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
 				 u32 fwhandle,
-				 struct mlxfw_mfa2_component *comp)
+				 struct mlxfw_mfa2_component *comp,
+				 struct netlink_ext_ack *extack)
 {
 	u16 comp_max_write_size;
 	u8 comp_align_bits;
 	u32 comp_max_size;
+	char comp_name[8];
 	u16 block_size;
 	u8 *block_ptr;
 	u32 offset;
 	int err;
 
+	sprintf(comp_name, "%u", comp->index);
+
 	err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index,
 					      &comp_max_size, &comp_align_bits,
 					      &comp_max_write_size);
@@ -127,6 +115,7 @@
 	if (comp->data_size > comp_max_size) {
 		pr_err("Component %d is of size %d which is bigger than limit %d\n",
 		       comp->index, comp->data_size, comp_max_size);
+		NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit");
 		return -EINVAL;
 	}
 
@@ -134,6 +123,7 @@
 					       comp_align_bits);
 
 	pr_debug("Component update\n");
+	mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0);
 	err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle,
 						   comp->index,
 						   comp->data_size);
@@ -141,11 +131,13 @@
 		return err;
 
 	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
-				   MLXFW_FSM_STATE_DOWNLOAD);
+				   MLXFW_FSM_STATE_DOWNLOAD, extack);
 	if (err)
 		goto err_out;
 
 	pr_debug("Component download\n");
+	mlxfw_status_notify(mlxfw_dev, "Downloading component",
+			    comp_name, 0, comp->data_size);
 	for (offset = 0;
 	     offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits);
 	     offset += comp_max_write_size) {
@@ -157,15 +149,20 @@
 							 offset);
 		if (err)
 			goto err_out;
+		mlxfw_status_notify(mlxfw_dev, "Downloading component",
+				    comp_name, offset + block_size,
+				    comp->data_size);
 	}
 
 	pr_debug("Component verify\n");
+	mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0);
 	err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle,
 						   comp->index);
 	if (err)
 		goto err_out;
 
-	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_out;
 	return 0;
@@ -176,7 +173,8 @@
 }
 
 static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
-				  struct mlxfw_mfa2_file *mfa2_file)
+				  struct mlxfw_mfa2_file *mfa2_file,
+				  struct netlink_ext_ack *extack)
 {
 	u32 component_count;
 	int err;
@@ -187,6 +185,7 @@
 					      &component_count);
 	if (err) {
 		pr_err("Could not find device PSID in MFA2 file\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file");
 		return err;
 	}
 
@@ -199,7 +198,7 @@
 			return PTR_ERR(comp);
 
 		pr_info("Flashing component type %d\n", comp->index);
-		err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp);
+		err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack);
 		mlxfw_mfa2_file_component_put(comp);
 		if (err)
 			return err;
@@ -208,7 +207,8 @@
 }
 
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
-			 const struct firmware *firmware)
+			 const struct firmware *firmware,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxfw_mfa2_file *mfa2_file;
 	u32 fwhandle;
@@ -216,6 +216,7 @@
 
 	if (!mlxfw_mfa2_check(firmware)) {
 		pr_err("Firmware file is not MFA2\n");
+		NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2");
 		return -EINVAL;
 	}
 
@@ -224,29 +225,35 @@
 		return PTR_ERR(mfa2_file);
 
 	pr_info("Initialize firmware flash process\n");
+	mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process",
+			    NULL, 0, 0);
 	err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle);
 	if (err) {
 		pr_err("Could not lock the firmware FSM\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM");
 		goto err_fsm_lock;
 	}
 
 	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
-				   MLXFW_FSM_STATE_LOCKED);
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_state_wait_idle_to_locked;
 
-	err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file);
+	err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack);
 	if (err)
 		goto err_flash_components;
 
 	pr_debug("Activate image\n");
+	mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0);
 	err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle);
 	if (err) {
 		pr_err("Could not activate the downloaded image\n");
+		NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image");
 		goto err_fsm_activate;
 	}
 
-	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+	err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+				   MLXFW_FSM_STATE_LOCKED, extack);
 	if (err)
 		goto err_state_wait_activate_to_locked;
 
@@ -254,6 +261,7 @@
 	mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle);
 
 	pr_info("Firmware flash done.\n");
+	mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0);
 	mlxfw_mfa2_file_fini(mfa2_file);
 	return 0;
 

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
index 993cb5b..544344a 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #define pr_fmt(fmt) "mlxfw_mfa2: " fmt
 

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
index 20472aa..5bba6ad 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #ifndef _MLXFW_MFA2_H
 #define _MLXFW_MFA2_H

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
index f667942..874c0a2 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #ifndef _MLXFW_MFA2_FILE_H
 #define _MLXFW_MFA2_FILE_H

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
index dd66737..b001e52 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h

@@ -1,36 +1,6 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
+
 #ifndef _MLXFW_MFA2_FORMAT_H
 #define _MLXFW_MFA2_FORMAT_H
 

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
index cc013e7..33c9711 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #ifndef _MLXFW_MFA2_TLV_H
 #define _MLXFW_MFA2_TLV_H

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
index 0094b92..017d68f 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c

@@ -1,36 +1,5 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
 
 #define pr_fmt(fmt) "MFA2: " fmt
 

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
index 2c66789..633284e 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h

@@ -1,36 +1,6 @@
-/*
- * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */
+
 #ifndef _MLXFW_MFA2_TLV_MULTI_H
 #define _MLXFW_MFA2_TLV_MULTI_H
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 8a291eb..f458fd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig

@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Mellanox switch drivers configuration
 #
 
 config MLXSW_CORE
 	tristate "Mellanox Technologies Switch ASICs support"
-	depends on MAY_USE_DEVLINK
+	select NET_DEVLINK
 	---help---
 	  This driver supports Mellanox Technologies Switch ASICs family.
 
@@ -22,7 +23,6 @@
 config MLXSW_CORE_THERMAL
 	bool "Thermal zone support for Mellanox Technologies Switch ASICs"
 	depends on MLXSW_CORE && THERMAL
-	depends on !(MLXSW_CORE=y && THERMAL=m)
 	default y
 	---help---
 	 Say Y here if you want to automatically control fans speed according
@@ -71,20 +71,24 @@
 	  module will be called mlxsw_switchx2.
 
 config MLXSW_SPECTRUM
-	tristate "Mellanox Technologies Spectrum support"
+	tristate "Mellanox Technologies Spectrum family support"
 	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
 	depends on IPV6 || IPV6=n
 	depends on NET_IPGRE || NET_IPGRE=n
 	depends on IPV6_GRE || IPV6_GRE=n
+	depends on VXLAN || VXLAN=n
 	select GENERIC_ALLOCATOR
 	select PARMAN
+	select OBJAGG
 	select MLXFW
+	imply PTP_1588_CLOCK
+	select NET_PTP_CLASSIFY if PTP_1588_CLOCK
 	default m
 	---help---
-	  This driver supports Mellanox Technologies Spectrum Ethernet
-	  Switch ASICs.
+	  This driver supports Mellanox Technologies
+	  Spectrum/Spectrum-2/Spectrum-3 Ethernet Switch ASICs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called mlxsw_spectrum.

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 68fa44a..0e86a58 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile

@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MLXSW_CORE)	+= mlxsw_core.o
 mlxsw_core-objs			:= core.o core_acl_flex_keys.o \
-				   core_acl_flex_actions.o
+				   core_acl_flex_actions.o core_env.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
 obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
@@ -20,15 +20,17 @@
 				   spectrum_acl_tcam.o spectrum_acl_ctcam.o \
 				   spectrum_acl_atcam.o spectrum_acl_erp.o \
 				   spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
-				   spectrum_acl.o \
+				   spectrum_acl_bloom_filter.o spectrum_acl.o \
 				   spectrum_flower.o spectrum_cnt.o \
 				   spectrum_fid.o spectrum_ipip.o \
 				   spectrum_acl_flex_actions.o \
 				   spectrum_acl_flex_keys.o \
 				   spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
 				   spectrum_mr_tcam.o spectrum_mr.o \
-				   spectrum_qdisc.o spectrum_span.o
+				   spectrum_qdisc.o spectrum_span.o \
+				   spectrum_nve.o spectrum_nve_vxlan.o \
+				   spectrum_dpipe.o spectrum_trap.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
-mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
+mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK)		+= spectrum_ptp.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
 mlxsw_minimal-objs		:= minimal.o

diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 0772e43..5ffdfb5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h

@@ -317,6 +317,18 @@
  */
 MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2);
 
+/* cmd_mbox_query_fw_free_running_clock_offset
+ * The offset of the free running clock page
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64);
+
+/* cmd_mbox_query_fw_fr_rn_clk_bar
+ * PCI base address register (BAR) of the free running clock page
+ * 0: BAR 0
+ * 1: 64 bit BAR
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2);
+
 /* QUERY_BOARDINFO - Query Board Information
  * -----------------------------------------
  * OpMod == 0 (N/A), INMmod == 0 (N/A)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 30f751e..0a0884d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c

@@ -80,7 +80,7 @@
 	struct mlxsw_thermal *thermal;
 	struct mlxsw_core_port *ports;
 	unsigned int max_ports;
-	bool reload_fail;
+	bool fw_flash_in_progress;
 	unsigned long driver_priv[0];
 	/* driver_priv has to be always the last item */
 };
@@ -121,6 +121,12 @@
 }
 EXPORT_SYMBOL(mlxsw_core_driver_priv);
 
+bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->driver->res_query_enabled;
+}
+EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
+
 struct mlxsw_rx_listener_item {
 	struct list_head list;
 	struct mlxsw_rx_listener rxl;
@@ -428,12 +434,16 @@
 	struct rcu_head rcu;
 };
 
-#define MLXSW_EMAD_TIMEOUT_MS 200
+#define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS	3000
+#define MLXSW_EMAD_TIMEOUT_MS			200
 
 static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
 {
 	unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS);
 
+	if (trans->core->fw_flash_in_progress)
+		timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS);
+
 	queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout);
 }
 
@@ -563,7 +573,7 @@
 	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
 		return 0;
 
-	emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
+	emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
 	if (!emad_wq)
 		return -ENOMEM;
 	mlxsw_core->emad_wq = emad_wq;
@@ -776,7 +786,8 @@
 static int
 mlxsw_devlink_sb_pool_set(struct devlink *devlink,
 			  unsigned int sb_index, u16 pool_index, u32 size,
-			  enum devlink_sb_threshold_type threshold_type)
+			  enum devlink_sb_threshold_type threshold_type,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
@@ -784,7 +795,8 @@
 	if (!mlxsw_driver->sb_pool_set)
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index,
-					 pool_index, size, threshold_type);
+					 pool_index, size, threshold_type,
+					 extack);
 }
 
 static void *__dl_port(struct devlink_port *devlink_port)
@@ -824,7 +836,8 @@
 
 static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
 					  unsigned int sb_index, u16 pool_index,
-					  u32 threshold)
+					  u32 threshold,
+					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
@@ -834,7 +847,7 @@
 	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
-					      pool_index, threshold);
+					      pool_index, threshold, extack);
 }
 
 static int
@@ -859,7 +872,8 @@
 mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
 				  unsigned int sb_index, u16 tc_index,
 				  enum devlink_sb_pool_type pool_type,
-				  u16 pool_index, u32 threshold)
+				  u16 pool_index, u32 threshold,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
@@ -870,7 +884,7 @@
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
 						 tc_index, pool_type,
-						 pool_index, threshold);
+						 pool_index, threshold, extack);
 }
 
 static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink,
@@ -929,27 +943,136 @@
 						     pool_type, p_cur, p_max);
 }
 
-static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
-						struct netlink_ext_ack *extack)
+static int
+mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+		       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	char fw_info_psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE];
+	u32 hw_rev, fw_major, fw_minor, fw_sub_minor;
+	char mgir_pl[MLXSW_REG_MGIR_LEN];
+	char buf[32];
 	int err;
 
+	err = devlink_info_driver_name_put(req,
+					   mlxsw_core->bus_info->device_kind);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgir_pack(mgir_pl);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mgir_unpack(mgir_pl, &hw_rev, fw_info_psid, &fw_major,
+			      &fw_minor, &fw_sub_minor);
+
+	sprintf(buf, "%X", hw_rev);
+	err = devlink_info_version_fixed_put(req, "hw.revision", buf);
+	if (err)
+		return err;
+
+	err = devlink_info_version_fixed_put(req, "fw.psid", fw_info_psid);
+	if (err)
+		return err;
+
+	sprintf(buf, "%d.%d.%d", fw_major, fw_minor, fw_sub_minor);
+	err = devlink_info_version_running_put(req, "fw.version", buf);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int
+mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
+					  struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+
 	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET))
 		return -EOPNOTSUPP;
 
 	mlxsw_core_bus_device_unregister(mlxsw_core, true);
-	err = mlxsw_core_bus_device_register(mlxsw_core->bus_info,
-					     mlxsw_core->bus,
-					     mlxsw_core->bus_priv, true,
-					     devlink);
-	mlxsw_core->reload_fail = !!err;
+	return 0;
+}
 
-	return err;
+static int
+mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
+					struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+
+	return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
+					      mlxsw_core->bus,
+					      mlxsw_core->bus_priv, true,
+					      devlink);
+}
+
+static int mlxsw_devlink_flash_update(struct devlink *devlink,
+				      const char *file_name,
+				      const char *component,
+				      struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->flash_update)
+		return -EOPNOTSUPP;
+	return mlxsw_driver->flash_update(mlxsw_core, file_name,
+					  component, extack);
+}
+
+static int mlxsw_devlink_trap_init(struct devlink *devlink,
+				   const struct devlink_trap *trap,
+				   void *trap_ctx)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->trap_init)
+		return -EOPNOTSUPP;
+	return mlxsw_driver->trap_init(mlxsw_core, trap, trap_ctx);
+}
+
+static void mlxsw_devlink_trap_fini(struct devlink *devlink,
+				    const struct devlink_trap *trap,
+				    void *trap_ctx)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->trap_fini)
+		return;
+	mlxsw_driver->trap_fini(mlxsw_core, trap, trap_ctx);
+}
+
+static int mlxsw_devlink_trap_action_set(struct devlink *devlink,
+					 const struct devlink_trap *trap,
+					 enum devlink_trap_action action)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->trap_action_set)
+		return -EOPNOTSUPP;
+	return mlxsw_driver->trap_action_set(mlxsw_core, trap, action);
+}
+
+static int
+mlxsw_devlink_trap_group_init(struct devlink *devlink,
+			      const struct devlink_trap_group *group)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+	if (!mlxsw_driver->trap_group_init)
+		return -EOPNOTSUPP;
+	return mlxsw_driver->trap_group_init(mlxsw_core, group);
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
-	.reload				= mlxsw_devlink_core_bus_device_reload,
+	.reload_down		= mlxsw_devlink_core_bus_device_reload_down,
+	.reload_up		= mlxsw_devlink_core_bus_device_reload_up,
 	.port_type_set			= mlxsw_devlink_port_type_set,
 	.port_split			= mlxsw_devlink_port_split,
 	.port_unsplit			= mlxsw_devlink_port_unsplit,
@@ -963,12 +1086,19 @@
 	.sb_occ_max_clear		= mlxsw_devlink_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_devlink_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_devlink_sb_occ_tc_port_bind_get,
+	.info_get			= mlxsw_devlink_info_get,
+	.flash_update			= mlxsw_devlink_flash_update,
+	.trap_init			= mlxsw_devlink_trap_init,
+	.trap_fini			= mlxsw_devlink_trap_fini,
+	.trap_action_set		= mlxsw_devlink_trap_action_set,
+	.trap_group_init		= mlxsw_devlink_trap_group_init,
 };
 
-int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
-				   const struct mlxsw_bus *mlxsw_bus,
-				   void *bus_priv, bool reload,
-				   struct devlink *devlink)
+static int
+__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
+				 const struct mlxsw_bus *mlxsw_bus,
+				 void *bus_priv, bool reload,
+				 struct devlink *devlink)
 {
 	const char *device_kind = mlxsw_bus_info->device_kind;
 	struct mlxsw_core *mlxsw_core;
@@ -1035,6 +1165,18 @@
 			goto err_devlink_register;
 	}
 
+	if (mlxsw_driver->params_register && !reload) {
+		err = mlxsw_driver->params_register(mlxsw_core);
+		if (err)
+			goto err_register_params;
+	}
+
+	if (mlxsw_driver->init) {
+		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+		if (err)
+			goto err_driver_init;
+	}
+
 	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
 	if (err)
 		goto err_hwmon_init;
@@ -1044,19 +1186,23 @@
 	if (err)
 		goto err_thermal_init;
 
-	if (mlxsw_driver->init) {
-		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
-		if (err)
-			goto err_driver_init;
-	}
+	if (mlxsw_driver->params_register)
+		devlink_params_publish(devlink);
+
+	if (!reload)
+		devlink_reload_enable(devlink);
 
 	return 0;
 
-err_driver_init:
-	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
+err_driver_init:
+	if (mlxsw_driver->params_unregister && !reload)
+		mlxsw_driver->params_unregister(mlxsw_core);
+err_register_params:
 	if (!reload)
 		devlink_unregister(devlink);
 err_devlink_register:
@@ -1076,6 +1222,29 @@
 err_devlink_alloc:
 	return err;
 }
+
+int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
+				   const struct mlxsw_bus *mlxsw_bus,
+				   void *bus_priv, bool reload,
+				   struct devlink *devlink)
+{
+	bool called_again = false;
+	int err;
+
+again:
+	err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus,
+					       bus_priv, reload, devlink);
+	/* -EAGAIN is returned in case the FW was updated. FW needs
+	 * a reset, so lets try to call __mlxsw_core_bus_device_register()
+	 * again.
+	 */
+	if (err == -EAGAIN && !called_again) {
+		called_again = true;
+		goto again;
+	}
+
+	return err;
+}
 EXPORT_SYMBOL(mlxsw_core_bus_device_register);
 
 void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
@@ -1083,7 +1252,9 @@
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
-	if (mlxsw_core->reload_fail) {
+	if (!reload)
+		devlink_reload_disable(devlink);
+	if (devlink_is_reload_failed(devlink)) {
 		if (!reload)
 			/* Only the parts that were not de-initialized in the
 			 * failed reload attempt need to be de-initialized.
@@ -1093,10 +1264,14 @@
 			return;
 	}
 
-	if (mlxsw_core->driver->fini)
-		mlxsw_core->driver->fini(mlxsw_core);
+	if (mlxsw_core->driver->params_unregister)
+		devlink_params_unpublish(devlink);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
+	if (mlxsw_core->driver->params_unregister && !reload)
+		mlxsw_core->driver->params_unregister(mlxsw_core);
 	if (!reload)
 		devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
@@ -1109,6 +1284,8 @@
 	return;
 
 reload_fail_deinit:
+	if (mlxsw_core->driver->params_unregister)
+		mlxsw_core->driver->params_unregister(mlxsw_core);
 	devlink_unregister(devlink);
 	devlink_resources_unregister(devlink, NULL);
 	devlink_free(devlink);
@@ -1131,6 +1308,15 @@
 }
 EXPORT_SYMBOL(mlxsw_core_skb_transmit);
 
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port)
+{
+	if (mlxsw_core->driver->ptp_transmitted)
+		mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb,
+						    local_port);
+}
+EXPORT_SYMBOL(mlxsw_core_ptp_transmitted);
+
 static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
 				   const struct mlxsw_rx_listener *rxl_b)
 {
@@ -1354,6 +1540,18 @@
 }
 EXPORT_SYMBOL(mlxsw_core_trap_unregister);
 
+int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core,
+			       const struct mlxsw_listener *listener,
+			       enum mlxsw_reg_hpkt_action action)
+{
+	char hpkt_pl[MLXSW_REG_HPKT_LEN];
+
+	mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id,
+			    listener->trap_group, listener->is_ctrl);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+}
+EXPORT_SYMBOL(mlxsw_core_trap_action_set);
+
 static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core)
 {
 	return atomic64_inc_return(&mlxsw_core->emad.tid);
@@ -1418,13 +1616,17 @@
 	if (trans->retries)
 		dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n",
 			 trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid);
-	if (err)
+	if (err) {
 		dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n",
 			trans->tid, trans->reg->id,
 			mlxsw_reg_id_str(trans->reg->id),
 			mlxsw_core_reg_access_type_str(trans->type),
 			trans->emad_status,
 			mlxsw_emad_op_tlv_status_str(trans->emad_status));
+		trace_devlink_hwerr(priv_to_devlink(mlxsw_core),
+				    trans->emad_status,
+				    mlxsw_emad_op_tlv_status_str(trans->emad_status));
+	}
 
 	list_del(&trans->bulk_list);
 	kfree_rcu(trans, rcu);
@@ -1667,7 +1869,12 @@
 }
 EXPORT_SYMBOL(mlxsw_core_res_get);
 
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port)
+static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port,
+				  enum devlink_port_flavour flavour,
+				  u32 port_number, bool split,
+				  u32 split_port_subnumber,
+				  const unsigned char *switch_id,
+				  unsigned char switch_id_len)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 	struct mlxsw_core_port *mlxsw_core_port =
@@ -1676,14 +1883,16 @@
 	int err;
 
 	mlxsw_core_port->local_port = local_port;
+	devlink_port_attrs_set(devlink_port, flavour, port_number,
+			       split, split_port_subnumber,
+			       switch_id, switch_id_len);
 	err = devlink_port_register(devlink, devlink_port, local_port);
 	if (err)
 		memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
 	return err;
 }
-EXPORT_SYMBOL(mlxsw_core_port_init);
 
-void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
+static void __mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
 	struct mlxsw_core_port *mlxsw_core_port =
 					&mlxsw_core->ports[local_port];
@@ -1692,20 +1901,61 @@
 	devlink_port_unregister(devlink_port);
 	memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
 }
+
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port,
+			 u32 port_number, bool split,
+			 u32 split_port_subnumber,
+			 const unsigned char *switch_id,
+			 unsigned char switch_id_len)
+{
+	return __mlxsw_core_port_init(mlxsw_core, local_port,
+				      DEVLINK_PORT_FLAVOUR_PHYSICAL,
+				      port_number, split, split_port_subnumber,
+				      switch_id, switch_id_len);
+}
+EXPORT_SYMBOL(mlxsw_core_port_init);
+
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
+{
+	__mlxsw_core_port_fini(mlxsw_core, local_port);
+}
 EXPORT_SYMBOL(mlxsw_core_port_fini);
 
+int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core,
+			     void *port_driver_priv,
+			     const unsigned char *switch_id,
+			     unsigned char switch_id_len)
+{
+	struct mlxsw_core_port *mlxsw_core_port =
+				&mlxsw_core->ports[MLXSW_PORT_CPU_PORT];
+	int err;
+
+	err = __mlxsw_core_port_init(mlxsw_core, MLXSW_PORT_CPU_PORT,
+				     DEVLINK_PORT_FLAVOUR_CPU,
+				     0, false, 0,
+				     switch_id, switch_id_len);
+	if (err)
+		return err;
+
+	mlxsw_core_port->port_driver_priv = port_driver_priv;
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_cpu_port_init);
+
+void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core)
+{
+	__mlxsw_core_port_fini(mlxsw_core, MLXSW_PORT_CPU_PORT);
+}
+EXPORT_SYMBOL(mlxsw_core_cpu_port_fini);
+
 void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
-			     void *port_driver_priv, struct net_device *dev,
-			     u32 port_number, bool split,
-			     u32 split_port_subnumber)
+			     void *port_driver_priv, struct net_device *dev)
 {
 	struct mlxsw_core_port *mlxsw_core_port =
 					&mlxsw_core->ports[local_port];
 	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
 
 	mlxsw_core_port->port_driver_priv = port_driver_priv;
-	devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
-			       port_number, split, split_port_subnumber);
 	devlink_port_type_eth_set(devlink_port, dev);
 }
 EXPORT_SYMBOL(mlxsw_core_port_eth_set);
@@ -1745,16 +1995,18 @@
 }
 EXPORT_SYMBOL(mlxsw_core_port_type_get);
 
-int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
-				       u8 local_port, char *name, size_t len)
+
+struct devlink_port *
+mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
+				 u8 local_port)
 {
 	struct mlxsw_core_port *mlxsw_core_port =
 					&mlxsw_core->ports[local_port];
 	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
 
-	return devlink_port_get_phys_port_name(devlink_port, name, len);
+	return devlink_port;
 }
-EXPORT_SYMBOL(mlxsw_core_port_get_phys_port_name);
+EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
 
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
 				    const char *buf, size_t size)
@@ -1854,14 +2106,75 @@
 }
 EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get);
 
+void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core)
+{
+	mlxsw_core->fw_flash_in_progress = true;
+}
+EXPORT_SYMBOL(mlxsw_core_fw_flash_start);
+
+void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core)
+{
+	mlxsw_core->fw_flash_in_progress = false;
+}
+EXPORT_SYMBOL(mlxsw_core_fw_flash_end);
+
+int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox,
+			       struct mlxsw_res *res)
+{
+	int index, i;
+	u64 data;
+	u16 id;
+	int err;
+
+	if (!res)
+		return 0;
+
+	mlxsw_cmd_mbox_zero(mbox);
+
+	for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES;
+	     index++) {
+		err = mlxsw_cmd_query_resources(mlxsw_core, mbox, index);
+		if (err)
+			return err;
+
+		for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) {
+			id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i);
+			data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i);
+
+			if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID)
+				return 0;
+
+			mlxsw_res_parse(res, id, data);
+		}
+	}
+
+	/* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get
+	 * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW.
+	 */
+	return -EIO;
+}
+EXPORT_SYMBOL(mlxsw_core_resources_query);
+
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_h);
+
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_l);
+
 static int __init mlxsw_core_module_init(void)
 {
 	int err;
 
-	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
+	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
 	if (!mlxsw_wq)
 		return -ENOMEM;
-	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
+	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
 					    mlxsw_core_driver_name);
 	if (!mlxsw_owq) {
 		err = -ENOMEM;

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index c35be47..5d7d2ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h

@@ -28,6 +28,8 @@
 
 void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
+bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
+
 int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver);
 void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver);
 
@@ -46,6 +48,8 @@
 				  const struct mlxsw_tx_info *tx_info);
 int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 			    const struct mlxsw_tx_info *tx_info);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port);
 
 struct mlxsw_rx_listener {
 	void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
@@ -124,6 +128,9 @@
 void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
 				const struct mlxsw_listener *listener,
 				void *priv);
+int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core,
+			       const struct mlxsw_listener *listener,
+			       enum mlxsw_reg_hpkt_action action);
 
 typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
 				  size_t payload_len, unsigned long cb_priv);
@@ -164,24 +171,34 @@
 				  u16 lag_id, u8 local_port);
 
 void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port);
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port);
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port,
+			 u32 port_number, bool split,
+			 u32 split_port_subnumber,
+			 const unsigned char *switch_id,
+			 unsigned char switch_id_len);
 void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port);
+int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core,
+			     void *port_driver_priv,
+			     const unsigned char *switch_id,
+			     unsigned char switch_id_len);
+void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core);
 void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
-			     void *port_driver_priv, struct net_device *dev,
-			     u32 port_number, bool split,
-			     u32 split_port_subnumber);
+			     void *port_driver_priv, struct net_device *dev);
 void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
 			    void *port_driver_priv);
 void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
 			   void *port_driver_priv);
 enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
 						u8 local_port);
-int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
-				       u8 local_port, char *name, size_t len);
+struct devlink_port *
+mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
+				 u8 local_port);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
 bool mlxsw_core_schedule_work(struct work_struct *work);
 void mlxsw_core_flush_owq(void);
+int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox,
+			       struct mlxsw_res *res);
 
 #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
 
@@ -249,13 +266,14 @@
 			   struct devlink_sb_pool_info *pool_info);
 	int (*sb_pool_set)(struct mlxsw_core *mlxsw_core,
 			   unsigned int sb_index, u16 pool_index, u32 size,
-			   enum devlink_sb_threshold_type threshold_type);
+			   enum devlink_sb_threshold_type threshold_type,
+			   struct netlink_ext_ack *extack);
 	int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
 				unsigned int sb_index, u16 pool_index,
 				u32 *p_threshold);
 	int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port,
 				unsigned int sb_index, u16 pool_index,
-				u32 threshold);
+				u32 threshold, struct netlink_ext_ack *extack);
 	int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
 				   unsigned int sb_index, u16 tc_index,
 				   enum devlink_sb_pool_type pool_type,
@@ -263,7 +281,8 @@
 	int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port,
 				   unsigned int sb_index, u16 tc_index,
 				   enum devlink_sb_pool_type pool_type,
-				   u16 pool_index, u32 threshold);
+				   u16 pool_index, u32 threshold,
+				   struct netlink_ext_ack *extack);
 	int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core,
 			       unsigned int sb_index);
 	int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core,
@@ -275,6 +294,18 @@
 				       unsigned int sb_index, u16 tc_index,
 				       enum devlink_sb_pool_type pool_type,
 				       u32 *p_cur, u32 *p_max);
+	int (*flash_update)(struct mlxsw_core *mlxsw_core,
+			    const char *file_name, const char *component,
+			    struct netlink_ext_ack *extack);
+	int (*trap_init)(struct mlxsw_core *mlxsw_core,
+			 const struct devlink_trap *trap, void *trap_ctx);
+	void (*trap_fini)(struct mlxsw_core *mlxsw_core,
+			  const struct devlink_trap *trap, void *trap_ctx);
+	int (*trap_action_set)(struct mlxsw_core *mlxsw_core,
+			       const struct devlink_trap *trap,
+			       enum devlink_trap_action action);
+	int (*trap_group_init)(struct mlxsw_core *mlxsw_core,
+			       const struct devlink_trap_group *group);
 	void (*txhdr_construct)(struct sk_buff *skb,
 				const struct mlxsw_tx_info *tx_info);
 	int (*resources_register)(struct mlxsw_core *mlxsw_core);
@@ -282,6 +313,15 @@
 			     const struct mlxsw_config_profile *profile,
 			     u64 *p_single_size, u64 *p_double_size,
 			     u64 *p_linear_size);
+	int (*params_register)(struct mlxsw_core *mlxsw_core);
+	void (*params_unregister)(struct mlxsw_core *mlxsw_core);
+
+	/* Notify a driver that a timestamped packet was transmitted. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core,
+				struct sk_buff *skb, u8 local_port);
+
 	u8 txhdr_len;
 	const struct mlxsw_config_profile *profile;
 	bool res_query_enabled;
@@ -292,6 +332,12 @@
 			     u64 *p_single_size, u64 *p_double_size,
 			     u64 *p_linear_size);
 
+void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core);
+void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core);
+
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core);
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core);
+
 bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
 			  enum mlxsw_res_id res_id);
 
@@ -322,6 +368,8 @@
 			char *in_mbox, size_t in_mbox_size,
 			char *out_mbox, size_t out_mbox_size,
 			u8 *p_status);
+	u32 (*read_frc_h)(void *bus_priv);
+	u32 (*read_frc_l)(void *bus_priv);
 	u8 features;
 };
 
@@ -339,6 +387,8 @@
 	struct mlxsw_fw_rev fw_rev;
 	u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
 	u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
+	u8 low_frequency:1,
+	   read_frc_capable:1;
 };
 
 struct mlxsw_hwmon;
@@ -389,4 +439,19 @@
 
 #endif
 
+enum mlxsw_devlink_param_id {
+	MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+};
+
+struct mlxsw_skb_cb {
+	struct mlxsw_tx_info tx_info;
+};
+
+static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb));
+	return (struct mlxsw_skb_cb *) skb->cb;
+}
+
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index 785bf01..feb4672 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c

@@ -30,8 +30,9 @@
 
 			elinst = &block->instances[j];
 			if (elinst->type != elinst->info->type ||
-			    elinst->item.size.bits !=
-			    elinst->info->item.size.bits)
+			    (!elinst->avoid_size_check &&
+			     elinst->item.size.bits !=
+			     elinst->info->item.size.bits))
 				return false;
 		}
 	}
@@ -236,12 +237,10 @@
 			  struct mlxsw_afk_element_usage *elusage)
 {
 	struct mlxsw_afk_key_info *key_info;
-	size_t alloc_size;
 	int err;
 
-	alloc_size = sizeof(*key_info) +
-		     sizeof(key_info->blocks[0]) * mlxsw_afk->max_blocks;
-	key_info = kzalloc(alloc_size, GFP_KERNEL);
+	key_info = kzalloc(struct_size(key_info, blocks, mlxsw_afk->max_blocks),
+			   GFP_KERNEL);
 	if (!key_info)
 		return ERR_PTR(-ENOMEM);
 	err = mlxsw_afk_picker(mlxsw_afk, key_info, elusage);
@@ -387,12 +386,12 @@
 
 static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item,
 				    const struct mlxsw_item *output_item,
-				    char *storage, char *output)
+				    char *storage, char *output, int diff)
 {
 	u32 value;
 
 	value = __mlxsw_item_get32(storage, storage_item, 0);
-	__mlxsw_item_set32(output, output_item, 0, value);
+	__mlxsw_item_set32(output, output_item, 0, value + diff);
 }
 
 static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
@@ -408,14 +407,14 @@
 
 static void
 mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
-			char *output, char *storage)
+			char *output, char *storage, int u32_diff)
 {
 	const struct mlxsw_item *storage_item = &elinst->info->item;
 	const struct mlxsw_item *output_item = &elinst->item;
 
 	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
 		mlxsw_sp_afk_encode_u32(storage_item, output_item,
-					storage, output);
+					storage, output, u32_diff);
 	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
 		mlxsw_sp_afk_encode_buf(storage_item, output_item,
 					storage, output);
@@ -426,15 +425,17 @@
 void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
 		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
-		      char *key, char *mask, int block_start, int block_end)
+		      char *key, char *mask)
 {
+	unsigned int blocks_count =
+			mlxsw_afk_key_info_blocks_count_get(key_info);
 	char block_mask[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
 	char block_key[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
 	const struct mlxsw_afk_element_inst *elinst;
 	enum mlxsw_afk_element element;
 	int block_index, i;
 
-	for (i = block_start; i <= block_end; i++) {
+	for (i = 0; i < blocks_count; i++) {
 		memset(block_key, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
 		memset(block_mask, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
 
@@ -446,15 +447,24 @@
 				continue;
 
 			mlxsw_sp_afk_encode_one(elinst, block_key,
-						values->storage.key);
+						values->storage.key,
+						elinst->u32_key_diff);
 			mlxsw_sp_afk_encode_one(elinst, block_mask,
-						values->storage.mask);
+						values->storage.mask, 0);
 		}
 
-		if (key)
-			mlxsw_afk->ops->encode_block(block_key, i, key);
-		if (mask)
-			mlxsw_afk->ops->encode_block(block_mask, i, mask);
+		mlxsw_afk->ops->encode_block(key, i, block_key);
+		mlxsw_afk->ops->encode_block(mask, i, block_mask);
 	}
 }
 EXPORT_SYMBOL(mlxsw_afk_encode);
+
+void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key,
+		     int block_start, int block_end)
+{
+	int i;
+
+	for (i = block_start; i <= block_end; i++)
+		mlxsw_afk->ops->clear_block(key, i);
+}
+EXPORT_SYMBOL(mlxsw_afk_clear);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index c29c045..cb229b5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h

@@ -33,6 +33,8 @@
 	MLXSW_AFK_ELEMENT_IP_TTL_,
 	MLXSW_AFK_ELEMENT_IP_ECN,
 	MLXSW_AFK_ELEMENT_IP_DSCP,
+	MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
+	MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
 	MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -72,7 +74,7 @@
  * define an internal storage geometry.
  */
 static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
-	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+	MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16),
 	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
 	MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
@@ -87,6 +89,8 @@
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
 	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
+	MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3),
+	MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
@@ -103,9 +107,14 @@
 	const struct mlxsw_afk_element_info *info;
 	enum mlxsw_afk_element_type type;
 	struct mlxsw_item item; /* element geometry in block */
+	int u32_key_diff; /* in case value needs to be adjusted before write
+			   * this diff is here to handle that
+			   */
+	bool avoid_size_check;
 };
 
-#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size)		\
+#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset,			\
+			       _shift, _size, _u32_key_diff, _avoid_size_check)	\
 	{									\
 		.info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element],	\
 		.type = _type,							\
@@ -115,15 +124,24 @@
 			.size = {.bits = _size},				\
 			.name = #_element,					\
 		},								\
+		.u32_key_diff = _u32_key_diff,					\
+		.avoid_size_check = _avoid_size_check,				\
 	}
 
 #define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size)		\
 	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32,			\
-			       _element, _offset, _shift, _size)
+			       _element, _offset, _shift, _size, 0, false)
+
+#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset,			\
+				       _shift, _size, _key_diff,		\
+				       _avoid_size_check)			\
+	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32,			\
+			       _element, _offset, _shift, _size,		\
+			       _key_diff, _avoid_size_check)
 
 #define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size)			\
 	MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF,			\
-			       _element, _offset, 0, _size)
+			       _element, _offset, 0, _size, 0, false)
 
 struct mlxsw_afk_block {
 	u16 encoding; /* block ID */
@@ -188,7 +206,8 @@
 struct mlxsw_afk_ops {
 	const struct mlxsw_afk_block *blocks;
 	unsigned int blocks_count;
-	void (*encode_block)(char *block, int block_index, char *output);
+	void (*encode_block)(char *output, int block_index, char *block);
+	void (*clear_block)(char *output, int block_index);
 };
 
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
@@ -228,6 +247,8 @@
 void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
 		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
-		      char *key, char *mask, int block_start, int block_end);
+		      char *key, char *mask);
+void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key,
+		     int block_start, int block_end);
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
new file mode 100644
index 0000000..d2c7ce6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c

@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/sfp.h>
+
+#include "core.h"
+#include "core_env.h"
+#include "item.h"
+#include "reg.h"
+
+static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
+					  bool *qsfp)
+{
+	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+	char mcia_pl[MLXSW_REG_MCIA_LEN];
+	u8 ident;
+	int err;
+
+	mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
+			    MLXSW_REG_MCIA_I2C_ADDR_LOW);
+	err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	ident = eeprom_tmp[0];
+	switch (ident) {
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
+		*qsfp = false;
+		break;
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: /* fall-through */
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: /* fall-through */
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD:
+		*qsfp = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
+			      u16 offset, u16 size, void *data,
+			      unsigned int *p_read_size)
+{
+	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+	char mcia_pl[MLXSW_REG_MCIA_LEN];
+	u16 i2c_addr;
+	int status;
+	int err;
+
+	size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE);
+
+	if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH &&
+	    offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
+		/* Cross pages read, read until offset 256 in low page */
+		size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset;
+
+	i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW;
+	if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) {
+		i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH;
+		offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH;
+	}
+
+	mlxsw_reg_mcia_pack(mcia_pl, module, 0, 0, offset, size, i2c_addr);
+
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
+	if (err)
+		return err;
+
+	status = mlxsw_reg_mcia_status_get(mcia_pl);
+	if (status)
+		return -EIO;
+
+	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	memcpy(data, eeprom_tmp, size);
+	*p_read_size = size;
+
+	return 0;
+}
+
+int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
+					 int off, int *temp)
+{
+	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+	union {
+		u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE];
+		u16 temp;
+	} temp_thresh;
+	char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int module_temp;
+	bool qsfp;
+	int err;
+
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+	if (!module_temp) {
+		*temp = 0;
+		return 0;
+	}
+
+	/* Read Free Side Device Temperature Thresholds from page 03h
+	 * (MSB at lower byte address).
+	 * Bytes:
+	 * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM);
+	 * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM);
+	 * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN);
+	 * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN);
+	 */
+
+	/* Validate module identifier value. */
+	err = mlxsw_env_validate_cable_ident(core, module, &qsfp);
+	if (err)
+		return err;
+
+	if (qsfp)
+		mlxsw_reg_mcia_pack(mcia_pl, module, 0,
+				    MLXSW_REG_MCIA_TH_PAGE_NUM,
+				    MLXSW_REG_MCIA_TH_PAGE_OFF + off,
+				    MLXSW_REG_MCIA_TH_ITEM_SIZE,
+				    MLXSW_REG_MCIA_I2C_ADDR_LOW);
+	else
+		mlxsw_reg_mcia_pack(mcia_pl, module, 0,
+				    MLXSW_REG_MCIA_PAGE0_LO,
+				    off, MLXSW_REG_MCIA_TH_ITEM_SIZE,
+				    MLXSW_REG_MCIA_I2C_ADDR_HIGH);
+
+	err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE);
+	*temp = temp_thresh.temp * 1000;
+
+	return 0;
+}
+
+int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+			      struct ethtool_modinfo *modinfo)
+{
+	u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
+	u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE;
+	u8 module_rev_id, module_id, diag_mon;
+	unsigned int read_size;
+	int err;
+
+	err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
+					    module_info, &read_size);
+	if (err)
+		return err;
+
+	if (read_size < offset)
+		return -EIO;
+
+	module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID];
+	module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID];
+
+	switch (module_id) {
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP:
+		modinfo->type       = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		break;
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28:
+		if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 ||
+		    module_rev_id >=
+		    MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) {
+			modinfo->type       = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+		} else {
+			modinfo->type       = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		}
+		break;
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
+		/* Verify if transceiver provides diagnostic monitoring page */
+		err = mlxsw_env_query_module_eeprom(mlxsw_core, module,
+						    SFP_DIAGMON, 1, &diag_mon,
+						    &read_size);
+		if (err)
+			return err;
+
+		if (read_size < 1)
+			return -EIO;
+
+		modinfo->type       = ETH_MODULE_SFF_8472;
+		if (diag_mon)
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		else
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_env_get_module_info);
+
+int mlxsw_env_get_module_eeprom(struct net_device *netdev,
+				struct mlxsw_core *mlxsw_core, int module,
+				struct ethtool_eeprom *ee, u8 *data)
+{
+	int offset = ee->offset;
+	unsigned int read_size;
+	int i = 0;
+	int err;
+
+	if (!ee->len)
+		return -EINVAL;
+
+	memset(data, 0, ee->len);
+
+	while (i < ee->len) {
+		err = mlxsw_env_query_module_eeprom(mlxsw_core, module, offset,
+						    ee->len - i, data + i,
+						    &read_size);
+		if (err) {
+			netdev_err(netdev, "Eeprom query failed\n");
+			return err;
+		}
+
+		i += read_size;
+		offset += read_size;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_env_get_module_eeprom);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
new file mode 100644
index 0000000..064d0e7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h

@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CORE_ENV_H
+#define _MLXSW_CORE_ENV_H
+
+int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
+					 int off, int *temp);
+
+int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+			      struct ethtool_modinfo *modinfo);
+
+int mlxsw_env_get_module_eeprom(struct net_device *netdev,
+				struct mlxsw_core *mlxsw_core, int module,
+				struct ethtool_eeprom *ee, u8 *data);
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index e04e816..5b00726 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c

@@ -7,8 +7,10 @@
 #include <linux/sysfs.h>
 #include <linux/hwmon.h>
 #include <linux/err.h>
+#include <linux/sfp.h>
 
 #include "core.h"
+#include "core_env.h"
 
 #define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
 #define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
@@ -21,6 +23,14 @@
 	char name[32];
 };
 
+static int mlxsw_hwmon_get_attr_index(int index, int count)
+{
+	if (index >= count)
+		return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+
+	return index;
+}
+
 struct mlxsw_hwmon {
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -30,6 +40,8 @@
 	struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1];
 	struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
 	unsigned int attrs_count;
+	u8 sensor_count;
+	u8 module_sensor_count;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -40,18 +52,19 @@
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp;
+	int temp, index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temp);
 }
 
 static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
@@ -62,18 +75,19 @@
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp_max;
+	int temp_max, index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
-	return sprintf(buf, "%u\n", temp_max);
+	return sprintf(buf, "%d\n", temp_max);
 }
 
 static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
@@ -85,6 +99,7 @@
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned long val;
+	int index;
 	int err;
 
 	err = kstrtoul(buf, 10, &val);
@@ -93,7 +108,9 @@
 	if (val != 1)
 		return -EINVAL;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
 	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -121,6 +138,27 @@
 	return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl));
 }
 
+static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char fore_pl[MLXSW_REG_FORE_LEN];
+	bool fault;
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n");
+		return err;
+	}
+	mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault);
+
+	return sprintf(buf, "%u\n", fault);
+}
+
 static ssize_t mlxsw_hwmon_pwm_show(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
@@ -167,12 +205,155 @@
 	return len;
 }
 
+static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u8 module;
+	int temp;
+	int err;
+
+	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+
+	return sprintf(buf, "%d\n", temp);
+}
+
+static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
+						  struct device_attribute *attr,
+						  char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
+	u8 module, fault;
+	u16 temp;
+	int err;
+
+	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+			    1);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
+	if (err) {
+		dev_err(dev, "Failed to query module temperature sensor\n");
+		return err;
+	}
+
+	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
+
+	/* Update status and temperature cache. */
+	switch (temp) {
+	case MLXSW_REG_MTBR_BAD_SENS_INFO:
+		/* Untrusted cable is connected. Reading temperature from its
+		 * sensor is faulty.
+		 */
+		fault = 1;
+		break;
+	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
+	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
+	case MLXSW_REG_MTBR_INDEX_NA:
+	default:
+		fault = 0;
+		break;
+	}
+
+	return sprintf(buf, "%u\n", fault);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_critical_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	int temp;
+	u8 module;
+	int err;
+
+	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
+						   SFP_TEMP_HIGH_WARN, &temp);
+	if (err) {
+		dev_err(dev, "Failed to query module temperature thresholds\n");
+		return err;
+	}
+
+	return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	u8 module;
+	int temp;
+	int err;
+
+	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
+						   SFP_TEMP_HIGH_ALARM, &temp);
+	if (err) {
+		dev_err(dev, "Failed to query module temperature thresholds\n");
+		return err;
+	}
+
+	return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_label_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+
+	return sprintf(buf, "front panel %03u\n",
+		       mlwsw_hwmon_attr->type_index);
+}
+
+static ssize_t
+mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	int index = mlwsw_hwmon_attr->type_index -
+		    mlxsw_hwmon->module_sensor_count + 1;
+
+	return sprintf(buf, "gearbox %03u\n", index);
+}
+
 enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_RST,
 	MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+	MLXSW_HWMON_ATTR_TYPE_FAN_FAULT,
 	MLXSW_HWMON_ATTR_TYPE_PWM,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
 };
 
 static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -209,6 +390,12 @@
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "fan%u_input", num + 1);
 		break;
+	case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "fan%u_fault", num + 1);
+		break;
 	case MLXSW_HWMON_ATTR_TYPE_PWM:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
 		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
@@ -216,6 +403,47 @@
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "pwm%u", num + 1);
 		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_input", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT:
+		mlxsw_hwmon_attr->dev_attr.show =
+					mlxsw_hwmon_module_temp_fault_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_fault", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_module_temp_critical_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_crit", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_module_temp_emergency_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_emergency", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_module_temp_label_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_label", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_gbox_temp_label_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_label", num + 1);
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -233,7 +461,6 @@
 {
 	char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	u8 sensor_count;
 	int i;
 	int err;
 
@@ -242,8 +469,8 @@
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n");
 		return err;
 	}
-	sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
-	for (i = 0; i < sensor_count; i++) {
+	mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
+	for (i = 0; i < mlxsw_hwmon->sensor_count; i++) {
 		mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
 		err = mlxsw_reg_write(mlxsw_hwmon->core,
 				      MLXSW_REG(mtmp), mtmp_pl);
@@ -280,10 +507,14 @@
 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
 	num = 0;
 	for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) {
-		if (tacho_active & BIT(type_index))
+		if (tacho_active & BIT(type_index)) {
 			mlxsw_hwmon_attr_add(mlxsw_hwmon,
 					     MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+					     type_index, num);
+			mlxsw_hwmon_attr_add(mlxsw_hwmon,
+					     MLXSW_HWMON_ATTR_TYPE_FAN_FAULT,
 					     type_index, num++);
+		}
 	}
 	num = 0;
 	for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) {
@@ -295,6 +526,104 @@
 	return 0;
 }
 
+static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core);
+	char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0};
+	int i, index;
+	u8 width;
+	int err;
+
+	if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core))
+		return 0;
+
+	/* Add extra attributes for module temperature. Sensor index is
+	 * assigned to sensor_count value, while all indexed before
+	 * sensor_count are already utilized by the sensors connected through
+	 * mtmp register by mlxsw_hwmon_temp_init().
+	 */
+	index = mlxsw_hwmon->sensor_count;
+	for (i = 1; i < module_count; i++) {
+		mlxsw_reg_pmlp_pack(pmlp_pl, i);
+		err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp),
+				      pmlp_pl);
+		if (err) {
+			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n",
+				i);
+			return err;
+		}
+		width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+		if (!width)
+			continue;
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+				     index, index);
+		index++;
+	}
+	mlxsw_hwmon->module_sensor_count = index;
+
+	return 0;
+}
+
+static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	int index, max_index, sensor_index;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u8 gbox_num;
+	int err;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+	if (!gbox_num)
+		return 0;
+
+	index = mlxsw_hwmon->module_sensor_count;
+	max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+	while (index < max_index) {
+		sensor_index = index % mlxsw_hwmon->module_sensor_count +
+			       MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+		mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+		err = mlxsw_reg_write(mlxsw_hwmon->core,
+				      MLXSW_REG(mtmp), mtmp_pl);
+		if (err) {
+			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+				sensor_index);
+			return err;
+		}
+		mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+				     index, index);
+		index++;
+	}
+
+	return 0;
+}
+
 int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 		     const struct mlxsw_bus_info *mlxsw_bus_info,
 		     struct mlxsw_hwmon **p_hwmon)
@@ -317,6 +646,14 @@
 	if (err)
 		goto err_fans_init;
 
+	err = mlxsw_hwmon_module_init(mlxsw_hwmon);
+	if (err)
+		goto err_temp_module_init;
+
+	err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon);
+	if (err)
+		goto err_temp_gearbox_init;
+
 	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
 	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
@@ -333,6 +670,8 @@
 	return 0;
 
 err_hwmon_register:
+err_temp_gearbox_init:
+err_temp_module_init:
 err_fans_init:
 err_temp_init:
 	kfree(mlxsw_hwmon);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 6d29dc4..35a1dc8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c

@@ -9,17 +9,49 @@
 #include <linux/sysfs.h>
 #include <linux/thermal.h>
 #include <linux/err.h>
+#include <linux/sfp.h>
 
 #include "core.h"
+#include "core_env.h"
 
 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
-#define MLXSW_THERMAL_MAX_TEMP	110000	/* 110C */
+#define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
+#define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
+#define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
+#define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
+#define MLXSW_THERMAL_ASIC_TEMP_CRIT	110000	/* 110C */
+#define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
+#define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
+#define MLXSW_THERMAL_ZONE_MAX_NAME	16
+#define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
 #define MLXSW_THERMAL_MAX_STATE	10
 #define MLXSW_THERMAL_MAX_DUTY	255
+/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
+ * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
+ * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
+ * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
+ * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
+ */
+#define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
+#define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
+#define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
+
+/* External cooling devices, allowed for binding to mlxsw thermal zones. */
+static char * const mlxsw_thermal_external_allowed_cdev[] = {
+	"mlxreg_fan",
+};
+
+enum mlxsw_thermal_trips {
+	MLXSW_THERMAL_TEMP_TRIP_NORM,
+	MLXSW_THERMAL_TEMP_TRIP_HIGH,
+	MLXSW_THERMAL_TEMP_TRIP_HOT,
+	MLXSW_THERMAL_TEMP_TRIP_CRIT,
+};
 
 struct mlxsw_thermal_trip {
 	int	type;
 	int	temp;
+	int	hyst;
 	int	min_state;
 	int	max_state;
 };
@@ -27,32 +59,29 @@
 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
 	{	/* In range - 0-40% PWM */
 		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 75000,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
 		.min_state	= 0,
 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
 	},
-	{	/* High - 40-100% PWM */
-		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 80000,
-		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
-		.max_state	= MLXSW_THERMAL_MAX_STATE,
-	},
 	{
-		/* Very high - 100% PWM */
+		/* In range - 40-100% PWM */
 		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 85000,
-		.min_state	= MLXSW_THERMAL_MAX_STATE,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
+		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	},
 	{	/* Warning */
 		.type		= THERMAL_TRIP_HOT,
-		.temp		= 105000,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
 		.min_state	= MLXSW_THERMAL_MAX_STATE,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	},
 	{	/* Critical - soft poweroff */
 		.type		= THERMAL_TRIP_CRITICAL,
-		.temp		= MLXSW_THERMAL_MAX_TEMP,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
 		.min_state	= MLXSW_THERMAL_MAX_STATE,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	}
@@ -63,13 +92,30 @@
 /* Make sure all trips are writable */
 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
 
+struct mlxsw_thermal;
+
+struct mlxsw_thermal_module {
+	struct mlxsw_thermal *parent;
+	struct thermal_zone_device *tzdev;
+	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
+	enum thermal_device_mode mode;
+	int module; /* Module or gearbox number */
+};
+
 struct mlxsw_thermal {
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
 	struct thermal_zone_device *tzdev;
+	int polling_delay;
 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
 	enum thermal_device_mode mode;
+	struct mlxsw_thermal_module *tz_module_arr;
+	struct mlxsw_thermal_module *tz_gearbox_arr;
+	u8 tz_gearbox_num;
+	unsigned int tz_highest_score;
+	struct thermal_zone_device *tz_highest_dev;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -93,9 +139,95 @@
 		if (thermal->cdevs[i] == cdev)
 			return i;
 
+	/* Allow mlxsw thermal zone binding to an external cooling device */
+	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
+		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
+			    sizeof(cdev->type)))
+			return 0;
+	}
+
 	return -ENODEV;
 }
 
+static void
+mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
+{
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
+}
+
+static int
+mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
+				  struct mlxsw_thermal_module *tz)
+{
+	int crit_temp, emerg_temp;
+	int err;
+
+	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+						   SFP_TEMP_HIGH_WARN,
+						   &crit_temp);
+	if (err)
+		return err;
+
+	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+						   SFP_TEMP_HIGH_ALARM,
+						   &emerg_temp);
+	if (err)
+		return err;
+
+	/* According to the system thermal requirements, the thermal zones are
+	 * defined with four trip points. The critical and emergency
+	 * temperature thresholds, provided by QSFP module are set as "active"
+	 * and "hot" trip points, "normal" and "critical" trip points are
+	 * derived from "active" and "hot" by subtracting or adding double
+	 * hysteresis value.
+	 */
+	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
+					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
+	else
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
+	if (emerg_temp > crit_temp)
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
+					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
+	else
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp;
+
+	return 0;
+}
+
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
+					  struct thermal_zone_device *tzdev,
+					  struct mlxsw_thermal_trip *trips,
+					  int temp)
+{
+	struct mlxsw_thermal_trip *trip = trips;
+	unsigned int score, delta, i, shift = 1;
+
+	/* Calculate thermal zone score, if temperature is above the critical
+	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
+	 */
+	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
+	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
+	     i++, trip++) {
+		if (temp < trip->temp) {
+			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
+			score = delta * shift;
+			break;
+		}
+		shift *= 256;
+	}
+
+	if (score > thermal->tz_highest_score) {
+		thermal->tz_highest_score = score;
+		thermal->tz_highest_dev = tzdev;
+	}
+}
+
 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
 			      struct thermal_cooling_device *cdev)
 {
@@ -162,7 +294,7 @@
 	mutex_lock(&tzdev->lock);
 
 	if (mode == THERMAL_DEVICE_ENABLED)
-		tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
+		tzdev->polling_delay = thermal->polling_delay;
 	else
 		tzdev->polling_delay = 0;
 
@@ -180,7 +312,7 @@
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp;
+	int temp;
 	int err;
 
 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
@@ -191,8 +323,11 @@
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
+					      temp);
 
-	*p_temp = (int) temp;
+	*p_temp = temp;
 	return 0;
 }
 
@@ -227,13 +362,47 @@
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 
 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
-	    temp > MLXSW_THERMAL_MAX_TEMP)
+	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
 		return -EINVAL;
 
 	thermal->trips[trip].temp = temp;
 	return 0;
 }
 
+static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
+				       int trip, int *p_hyst)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	*p_hyst = thermal->trips[trip].hyst;
+	return 0;
+}
+
+static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
+				       int trip, int hyst)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	thermal->trips[trip].hyst = hyst;
+	return 0;
+}
+
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
+				   int trip, enum thermal_trend *trend)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	if (tzdev == thermal->tz_highest_dev)
+		return 1;
+
+	*trend = THERMAL_TREND_STABLE;
+	return 0;
+}
+
 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
 	.bind = mlxsw_thermal_bind,
 	.unbind = mlxsw_thermal_unbind,
@@ -243,6 +412,237 @@
 	.get_trip_type	= mlxsw_thermal_get_trip_type,
 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
+	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
+	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
+	.get_trend	= mlxsw_thermal_trend_get,
+};
+
+static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
+				     struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	int i, j, err;
+
+	/* If the cooling device is one of ours bind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
+
+		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
+						       trip->max_state,
+						       trip->min_state,
+						       THERMAL_WEIGHT_DEFAULT);
+		if (err < 0)
+			goto err_bind_cooling_device;
+	}
+	return 0;
+
+err_bind_cooling_device:
+	for (j = i - 1; j >= 0; j--)
+		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
+	return err;
+}
+
+static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
+				       struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	int i;
+	int err;
+
+	/* If the cooling device is one of ours unbind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
+		WARN_ON(err);
+	}
+	return err;
+}
+
+static int mlxsw_thermal_module_mode_get(struct thermal_zone_device *tzdev,
+					 enum thermal_device_mode *mode)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	*mode = tz->mode;
+
+	return 0;
+}
+
+static int mlxsw_thermal_module_mode_set(struct thermal_zone_device *tzdev,
+					 enum thermal_device_mode mode)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+
+	mutex_lock(&tzdev->lock);
+
+	if (mode == THERMAL_DEVICE_ENABLED)
+		tzdev->polling_delay = thermal->polling_delay;
+	else
+		tzdev->polling_delay = 0;
+
+	mutex_unlock(&tzdev->lock);
+
+	tz->mode = mode;
+	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
+
+	return 0;
+}
+
+static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
+					 int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	struct device *dev = thermal->bus_info->dev;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	int temp;
+	int err;
+
+	/* Read module temperature. */
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+			    tz->module, false, false);
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		/* Do not return error - in case of broken module's sensor
+		 * it will cause error message flooding.
+		 */
+		temp = 0;
+		*p_temp = (int) temp;
+		return 0;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	*p_temp = temp;
+
+	if (!temp)
+		return 0;
+
+	/* Update trip points. */
+	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+	if (!err && temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
+				   enum thermal_trip_type *p_type)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_type = tz->trips[trip].type;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
+				   int trip, int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_temp = tz->trips[trip].temp;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
+				   int trip, int temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
+	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
+		return -EINVAL;
+
+	tz->trips[trip].temp = temp;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
+				   int *p_hyst)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	*p_hyst = tz->trips[trip].hyst;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
+				   int hyst)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	tz->trips[trip].hyst = hyst;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
+	.bind		= mlxsw_thermal_module_bind,
+	.unbind		= mlxsw_thermal_module_unbind,
+	.get_mode	= mlxsw_thermal_module_mode_get,
+	.set_mode	= mlxsw_thermal_module_mode_set,
+	.get_temp	= mlxsw_thermal_module_temp_get,
+	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
+	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
+	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
+	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
+	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_trend_get,
+};
+
+static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
+					  int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u16 index;
+	int temp;
+	int err;
+
+	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+	*p_temp = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
+	.bind		= mlxsw_thermal_module_bind,
+	.unbind		= mlxsw_thermal_module_unbind,
+	.get_mode	= mlxsw_thermal_module_mode_get,
+	.set_mode	= mlxsw_thermal_module_mode_set,
+	.get_temp	= mlxsw_thermal_gearbox_temp_get,
+	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
+	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
+	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
+	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
+	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_trend_get,
 };
 
 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
@@ -285,12 +685,51 @@
 	struct mlxsw_thermal *thermal = cdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
-	int err, idx;
+	unsigned long cur_state, i;
+	int idx;
+	u8 duty;
+	int err;
 
 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
 	if (idx < 0)
 		return idx;
 
+	/* Verify if this request is for changing allowed fan dynamical
+	 * minimum. If it is - update cooling levels accordingly and update
+	 * state, if current state is below the newly requested minimum state.
+	 * For example, if current state is 5, and minimal state is to be
+	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
+	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
+	 * overwritten.
+	 */
+	if (state >= MLXSW_THERMAL_SPEED_MIN &&
+	    state <= MLXSW_THERMAL_SPEED_MAX) {
+		state -= MLXSW_THERMAL_MAX_STATE;
+		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
+			thermal->cooling_levels[i] = max(state, i);
+
+		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
+		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+		if (err)
+			return err;
+
+		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
+		cur_state = mlxsw_duty_to_state(duty);
+
+		/* If current fan state is lower than requested dynamical
+		 * minimum, increase fan speed up to dynamical minimum.
+		 */
+		if (state < cur_state)
+			return 0;
+
+		state = cur_state;
+	}
+
+	if (state > MLXSW_THERMAL_MAX_STATE)
+		return -EINVAL;
+
+	/* Normalize the state to the valid speed range. */
+	state = thermal->cooling_levels[state];
 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
 	if (err) {
@@ -306,6 +745,217 @@
 	.set_cur_state	= mlxsw_thermal_set_cur_state,
 };
 
+static int
+mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
+{
+	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+	int err;
+
+	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
+		 module_tz->module + 1);
+	module_tz->tzdev = thermal_zone_device_register(tz_name,
+							MLXSW_THERMAL_NUM_TRIPS,
+							MLXSW_THERMAL_TRIP_MASK,
+							module_tz,
+							&mlxsw_thermal_module_ops,
+							NULL, 0, 0);
+	if (IS_ERR(module_tz->tzdev)) {
+		err = PTR_ERR(module_tz->tzdev);
+		return err;
+	}
+
+	module_tz->mode = THERMAL_DEVICE_ENABLED;
+	return 0;
+}
+
+static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
+{
+	thermal_zone_device_unregister(tzdev);
+}
+
+static int
+mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
+			  struct mlxsw_thermal *thermal, u8 local_port)
+{
+	struct mlxsw_thermal_module *module_tz;
+	char pmlp_pl[MLXSW_REG_PMLP_LEN];
+	u8 width, module;
+	int err;
+
+	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+	err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl);
+	if (err)
+		return err;
+
+	width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+	if (!width)
+		return 0;
+
+	module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+	module_tz = &thermal->tz_module_arr[module];
+	/* Skip if parent is already set (case of port split). */
+	if (module_tz->parent)
+		return 0;
+	module_tz->module = module;
+	module_tz->parent = thermal;
+	memcpy(module_tz->trips, default_thermal_trips,
+	       sizeof(thermal->trips));
+	/* Initialize all trip point. */
+	mlxsw_thermal_module_trips_reset(module_tz);
+	/* Update trip point according to the module data. */
+	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
+}
+
+static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
+{
+	if (module_tz && module_tz->tzdev) {
+		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
+		module_tz->tzdev = NULL;
+		module_tz->parent = NULL;
+	}
+}
+
+static int
+mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
+			   struct mlxsw_thermal *thermal)
+{
+	unsigned int module_count = mlxsw_core_max_ports(core);
+	struct mlxsw_thermal_module *module_tz;
+	int i, err;
+
+	if (!mlxsw_core_res_query_enabled(core))
+		return 0;
+
+	thermal->tz_module_arr = kcalloc(module_count,
+					 sizeof(*thermal->tz_module_arr),
+					 GFP_KERNEL);
+	if (!thermal->tz_module_arr)
+		return -ENOMEM;
+
+	for (i = 1; i < module_count; i++) {
+		err = mlxsw_thermal_module_init(dev, core, thermal, i);
+		if (err)
+			goto err_unreg_tz_module_arr;
+	}
+
+	for (i = 0; i < module_count - 1; i++) {
+		module_tz = &thermal->tz_module_arr[i];
+		if (!module_tz->parent)
+			continue;
+		err = mlxsw_thermal_module_tz_init(module_tz);
+		if (err)
+			goto err_unreg_tz_module_arr;
+	}
+
+	return 0;
+
+err_unreg_tz_module_arr:
+	for (i = module_count - 1; i >= 0; i--)
+		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
+	kfree(thermal->tz_module_arr);
+	return err;
+}
+
+static void
+mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
+{
+	unsigned int module_count = mlxsw_core_max_ports(thermal->core);
+	int i;
+
+	if (!mlxsw_core_res_query_enabled(thermal->core))
+		return;
+
+	for (i = module_count - 1; i >= 0; i--)
+		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
+	kfree(thermal->tz_module_arr);
+}
+
+static int
+mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+{
+	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+
+	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
+		 gearbox_tz->module + 1);
+	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
+						MLXSW_THERMAL_NUM_TRIPS,
+						MLXSW_THERMAL_TRIP_MASK,
+						gearbox_tz,
+						&mlxsw_thermal_gearbox_ops,
+						NULL, 0, 0);
+	if (IS_ERR(gearbox_tz->tzdev))
+		return PTR_ERR(gearbox_tz->tzdev);
+
+	gearbox_tz->mode = THERMAL_DEVICE_ENABLED;
+	return 0;
+}
+
+static void
+mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
+{
+	thermal_zone_device_unregister(gearbox_tz->tzdev);
+}
+
+static int
+mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
+			     struct mlxsw_thermal *thermal)
+{
+	struct mlxsw_thermal_module *gearbox_tz;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	int i;
+	int err;
+
+	if (!mlxsw_core_res_query_enabled(core))
+		return 0;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+	if (!thermal->tz_gearbox_num)
+		return 0;
+
+	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
+					  sizeof(*thermal->tz_gearbox_arr),
+					  GFP_KERNEL);
+	if (!thermal->tz_gearbox_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < thermal->tz_gearbox_num; i++) {
+		gearbox_tz = &thermal->tz_gearbox_arr[i];
+		memcpy(gearbox_tz->trips, default_thermal_trips,
+		       sizeof(thermal->trips));
+		gearbox_tz->module = i;
+		gearbox_tz->parent = thermal;
+		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
+		if (err)
+			goto err_unreg_tz_gearbox;
+	}
+
+	return 0;
+
+err_unreg_tz_gearbox:
+	for (i--; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+	return err;
+}
+
+static void
+mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
+{
+	int i;
+
+	if (!mlxsw_core_res_query_enabled(thermal->core))
+		return;
+
+	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+}
+
 int mlxsw_thermal_init(struct mlxsw_core *core,
 		       const struct mlxsw_bus_info *bus_info,
 		       struct mlxsw_thermal **p_thermal)
@@ -358,8 +1008,9 @@
 		if (pwm_active & BIT(i)) {
 			struct thermal_cooling_device *cdev;
 
-			cdev = thermal_cooling_device_register("Fan", thermal,
-							&mlxsw_cooling_ops);
+			cdev = thermal_cooling_device_register("mlxsw_fan",
+							       thermal,
+							       &mlxsw_cooling_ops);
 			if (IS_ERR(cdev)) {
 				err = PTR_ERR(cdev);
 				dev_err(dev, "Failed to register cooling device\n");
@@ -369,22 +1020,47 @@
 		}
 	}
 
+	/* Initialize cooling levels per PWM state. */
+	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
+		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
+						 i);
+
+	thermal->polling_delay = bus_info->low_frequency ?
+				 MLXSW_THERMAL_SLOW_POLL_INT :
+				 MLXSW_THERMAL_POLL_INT;
+
 	thermal->tzdev = thermal_zone_device_register("mlxsw",
 						      MLXSW_THERMAL_NUM_TRIPS,
 						      MLXSW_THERMAL_TRIP_MASK,
 						      thermal,
 						      &mlxsw_thermal_ops,
 						      NULL, 0,
-						      MLXSW_THERMAL_POLL_INT);
+						      thermal->polling_delay);
 	if (IS_ERR(thermal->tzdev)) {
 		err = PTR_ERR(thermal->tzdev);
 		dev_err(dev, "Failed to register thermal zone\n");
 		goto err_unreg_cdevs;
 	}
 
+	err = mlxsw_thermal_modules_init(dev, core, thermal);
+	if (err)
+		goto err_unreg_tzdev;
+
+	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
+	if (err)
+		goto err_unreg_modules_tzdev;
+
 	thermal->mode = THERMAL_DEVICE_ENABLED;
 	*p_thermal = thermal;
 	return 0;
+
+err_unreg_modules_tzdev:
+	mlxsw_thermal_modules_fini(thermal);
+err_unreg_tzdev:
+	if (thermal->tzdev) {
+		thermal_zone_device_unregister(thermal->tzdev);
+		thermal->tzdev = NULL;
+	}
 err_unreg_cdevs:
 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
 		if (thermal->cdevs[i])
@@ -398,6 +1074,8 @@
 {
 	int i;
 
+	mlxsw_thermal_gearboxes_fini(thermal);
+	mlxsw_thermal_modules_fini(thermal);
 	if (thermal->tzdev) {
 		thermal_zone_device_unregister(thermal->tzdev);
 		thermal->tzdev = NULL;

diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 798bd5a..95f408d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c

@@ -14,14 +14,17 @@
 #include "cmd.h"
 #include "core.h"
 #include "i2c.h"
+#include "resources.h"
 
 #define MLXSW_I2C_CIR2_BASE		0x72000
 #define MLXSW_I2C_CIR_STATUS_OFF	0x18
 #define MLXSW_I2C_CIR2_OFF_STATUS	(MLXSW_I2C_CIR2_BASE + \
 					 MLXSW_I2C_CIR_STATUS_OFF)
 #define MLXSW_I2C_OPMOD_SHIFT		12
+#define MLXSW_I2C_EVENT_BIT_SHIFT	22
 #define MLXSW_I2C_GO_BIT_SHIFT		23
 #define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT	24
+#define MLXSW_I2C_EVENT_BIT		BIT(MLXSW_I2C_EVENT_BIT_SHIFT)
 #define MLXSW_I2C_GO_BIT		BIT(MLXSW_I2C_GO_BIT_SHIFT)
 #define MLXSW_I2C_GO_OPMODE		BIT(MLXSW_I2C_OPMOD_SHIFT)
 #define MLXSW_I2C_SET_IMM_CMD		(MLXSW_I2C_GO_OPMODE | \
@@ -33,17 +36,20 @@
 #define MLXSW_I2C_TLV_HDR_SIZE		0x10
 #define MLXSW_I2C_ADDR_WIDTH		4
 #define MLXSW_I2C_PUSH_CMD_SIZE		(MLXSW_I2C_ADDR_WIDTH + 4)
+#define MLXSW_I2C_SET_EVENT_CMD		(MLXSW_I2C_EVENT_BIT)
+#define MLXSW_I2C_PUSH_EVENT_CMD	(MLXSW_I2C_GO_BIT | \
+					 MLXSW_I2C_SET_EVENT_CMD)
 #define MLXSW_I2C_READ_SEMA_SIZE	4
 #define MLXSW_I2C_PREP_SIZE		(MLXSW_I2C_ADDR_WIDTH + 28)
 #define MLXSW_I2C_MBOX_SIZE		20
 #define MLXSW_I2C_MBOX_OUT_PARAM_OFF	12
-#define MLXSW_I2C_MAX_BUFF_SIZE		32
 #define MLXSW_I2C_MBOX_OFFSET_BITS	20
 #define MLXSW_I2C_MBOX_SIZE_BITS	12
 #define MLXSW_I2C_ADDR_BUF_SIZE		4
-#define MLXSW_I2C_BLK_MAX		32
+#define MLXSW_I2C_BLK_DEF		32
 #define MLXSW_I2C_RETRY			5
 #define MLXSW_I2C_TIMEOUT_MSECS		5000
+#define MLXSW_I2C_MAX_DATA_SIZE		256
 
 /**
  * struct mlxsw_i2c - device private data:
@@ -55,6 +61,7 @@
  * @dev: I2C device;
  * @core: switch core pointer;
  * @bus_info: bus info block;
+ * @block_size: maximum block size allowed to pass to under layer;
  */
 struct mlxsw_i2c {
 	struct {
@@ -67,6 +74,7 @@
 	struct device *dev;
 	struct mlxsw_core *core;
 	struct mlxsw_bus_info bus_info;
+	u16 block_size;
 };
 
 #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
@@ -167,7 +175,7 @@
 	return err > 0 ? 0 : err;
 }
 
-/* Routine posts a command to ASIC though mail box. */
+/* Routine posts a command to ASIC through mail box. */
 static int mlxsw_i2c_write_cmd(struct i2c_client *client,
 			       struct mlxsw_i2c *mlxsw_i2c,
 			       int immediate)
@@ -213,6 +221,66 @@
 	return 0;
 }
 
+/* Routine posts initialization command to ASIC through mail box. */
+static int
+mlxsw_i2c_write_init_cmd(struct i2c_client *client,
+			 struct mlxsw_i2c *mlxsw_i2c, u16 opcode, u32 in_mod)
+{
+	__be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = {
+		0, cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD)
+	};
+	__be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = {
+		0, 0, 0, 0, 0, 0,
+		cpu_to_be32(client->adapter->nr & 0xffff),
+		cpu_to_be32(MLXSW_I2C_SET_EVENT_CMD)
+	};
+	struct i2c_msg push_cmd =
+		MLXSW_I2C_WRITE_MSG(client, push_cmd_buf,
+				    MLXSW_I2C_PUSH_CMD_SIZE);
+	struct i2c_msg prep_cmd =
+		MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE);
+	u8 status;
+	int err;
+
+	push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD | opcode);
+	prep_cmd_buf[3] = cpu_to_be32(in_mod);
+	prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_GO_BIT | opcode);
+	mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf,
+				 MLXSW_I2C_CIR2_BASE);
+	mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf,
+				 MLXSW_I2C_CIR2_OFF_STATUS);
+
+	/* Prepare Command Interface Register for transaction */
+	err = i2c_transfer(client->adapter, &prep_cmd, 1);
+	if (err < 0)
+		return err;
+	else if (err != 1)
+		return -EIO;
+
+	/* Write out Command Interface Register GO bit to push transaction */
+	err = i2c_transfer(client->adapter, &push_cmd, 1);
+	if (err < 0)
+		return err;
+	else if (err != 1)
+		return -EIO;
+
+	/* Wait until go bit is cleared. */
+	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
+	if (err) {
+		dev_err(&client->dev, "HW semaphore is not released");
+		return err;
+	}
+
+	/* Validate transaction completion status. */
+	if (status) {
+		dev_err(&client->dev, "Bad transaction completion status %x\n",
+			status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 /* Routine obtains mail box offsets from ASIC register space. */
 static int mlxsw_i2c_get_mbox(struct i2c_client *client,
 			      struct mlxsw_i2c *mlxsw_i2c)
@@ -248,20 +316,26 @@
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
 	unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
-	u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
 	int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
 	unsigned long end;
+	u8 *tran_buf;
 	struct i2c_msg write_tran =
-		MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+		MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE);
 	int err;
 
+	tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE,
+			   GFP_KERNEL);
+	if (!tran_buf)
+		return -ENOMEM;
+
+	write_tran.buf = tran_buf;
 	for (i = 0; i < num; i++) {
-		chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : in_mbox_size;
+		chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : in_mbox_size;
 		write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 		memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
-		       MLXSW_I2C_BLK_MAX * i, chunk_size);
+		       mlxsw_i2c->block_size * i, chunk_size);
 
 		j = 0;
 		end = jiffies + timeout;
@@ -275,9 +349,10 @@
 			 (j++ < MLXSW_I2C_RETRY));
 
 		if (err != 1) {
-			if (!err)
+			if (!err) {
 				err = -EIO;
-			return err;
+				goto mlxsw_i2c_write_exit;
+			}
 		}
 
 		off += chunk_size;
@@ -288,30 +363,33 @@
 	err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
 	if (err) {
 		dev_err(&client->dev, "Could not start transaction");
-		return -EIO;
+		err = -EIO;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Wait until go bit is cleared. */
 	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
 	if (err) {
 		dev_err(&client->dev, "HW semaphore is not released");
-		return err;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Validate transaction completion status. */
 	if (*p_status) {
 		dev_err(&client->dev, "Bad transaction completion status %x\n",
 			*p_status);
-		return -EIO;
+		err = -EIO;
 	}
 
-	return 0;
+mlxsw_i2c_write_exit:
+	kfree(tran_buf);
+	return err;
 }
 
 /* Routine executes I2C command. */
 static int
-mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox,
-	      size_t out_mbox_size, u8 *out_mbox, u8 *status)
+mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
+	      u8 *in_mbox, size_t out_mbox_size, u8 *out_mbox, u8 *status)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
@@ -326,31 +404,47 @@
 
 	WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
 
-	reg_size = mlxsw_i2c_get_reg_size(in_mbox);
-	num = reg_size / MLXSW_I2C_BLK_MAX;
-	if (reg_size % MLXSW_I2C_BLK_MAX)
-		num++;
+	if (in_mbox) {
+		reg_size = mlxsw_i2c_get_reg_size(in_mbox);
+		num = reg_size / mlxsw_i2c->block_size;
+		if (reg_size % mlxsw_i2c->block_size)
+			num++;
 
-	if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
-		dev_err(&client->dev, "Could not acquire lock");
-		return -EINVAL;
-	}
+		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+			dev_err(&client->dev, "Could not acquire lock");
+			return -EINVAL;
+		}
 
-	err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status);
-	if (err)
-		goto cmd_fail;
+		err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status);
+		if (err)
+			goto cmd_fail;
 
-	/* No out mailbox is case of write transaction. */
-	if (!out_mbox) {
-		mutex_unlock(&mlxsw_i2c->cmd.lock);
-		return 0;
+		/* No out mailbox is case of write transaction. */
+		if (!out_mbox) {
+			mutex_unlock(&mlxsw_i2c->cmd.lock);
+			return 0;
+		}
+	} else {
+		/* No input mailbox is case of initialization query command. */
+		reg_size = MLXSW_I2C_MAX_DATA_SIZE;
+		num = reg_size / mlxsw_i2c->block_size;
+
+		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+			dev_err(&client->dev, "Could not acquire lock");
+			return -EINVAL;
+		}
+
+		err = mlxsw_i2c_write_init_cmd(client, mlxsw_i2c, opcode,
+					       in_mod);
+		if (err)
+			goto cmd_fail;
 	}
 
 	/* Send read transaction to get output mailbox content. */
 	read_tran[1].buf = out_mbox;
 	for (i = 0; i < num; i++) {
-		chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : reg_size;
+		chunk_size = (reg_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : reg_size;
 		read_tran[1].len = chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 
@@ -395,8 +489,8 @@
 {
 	struct mlxsw_i2c *mlxsw_i2c = bus_priv;
 
-	return mlxsw_i2c_cmd(mlxsw_i2c->dev, in_mbox_size, in_mbox,
-			     out_mbox_size, out_mbox, status);
+	return mlxsw_i2c_cmd(mlxsw_i2c->dev, opcode, in_mod, in_mbox_size,
+			     in_mbox, out_mbox_size, out_mbox, status);
 }
 
 static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv,
@@ -414,13 +508,34 @@
 static int
 mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	       const struct mlxsw_config_profile *profile,
-	       struct mlxsw_res *resources)
+	       struct mlxsw_res *res)
 {
 	struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+	char *mbox;
+	int err;
 
 	mlxsw_i2c->core = mlxsw_core;
 
-	return 0;
+	mbox = mlxsw_cmd_mbox_alloc();
+	if (!mbox)
+		return -ENOMEM;
+
+	err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+	if (err)
+		goto mbox_put;
+
+	mlxsw_i2c->bus_info.fw_rev.major =
+		mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.minor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.subminor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
+	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
+
+mbox_put:
+	mlxsw_cmd_mbox_free(mbox);
+	return err;
 }
 
 static void mlxsw_i2c_fini(void *bus_priv)
@@ -442,6 +557,7 @@
 static int mlxsw_i2c_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id)
 {
+	const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
 	struct mlxsw_i2c *mlxsw_i2c;
 	u8 status;
 	int err;
@@ -450,6 +566,22 @@
 	if (!mlxsw_i2c)
 		return -ENOMEM;
 
+	if (quirks) {
+		if ((quirks->max_read_len &&
+		     quirks->max_read_len < MLXSW_I2C_BLK_DEF) ||
+		    (quirks->max_write_len &&
+		     quirks->max_write_len < MLXSW_I2C_BLK_DEF)) {
+			dev_err(&client->dev, "Insufficient transaction buffer length\n");
+			return -EOPNOTSUPP;
+		}
+
+		mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+					      min_t(u16, quirks->max_read_len,
+						    quirks->max_write_len));
+	} else {
+		mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF;
+	}
+
 	i2c_set_clientdata(client, mlxsw_i2c);
 	mutex_init(&mlxsw_i2c->cmd.lock);
 
@@ -503,6 +635,7 @@
 	mlxsw_i2c->bus_info.device_kind = id->name;
 	mlxsw_i2c->bus_info.device_name = client->name;
 	mlxsw_i2c->bus_info.dev = &client->dev;
+	mlxsw_i2c->bus_info.low_frequency = true;
 	mlxsw_i2c->dev = &client->dev;
 
 	err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,

diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 5a6c445..471b0ca 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c

@@ -1,6 +1,9 @@
 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+/* Copyright (c) 2016-2019 Mellanox Technologies. All rights reserved */
 
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -8,59 +11,403 @@
 #include <linux/types.h>
 
 #include "core.h"
+#include "core_env.h"
 #include "i2c.h"
 
-static const char mlxsw_minimal_driver_name[] = "mlxsw_minimal";
+static const char mlxsw_m_driver_name[] = "mlxsw_minimal";
 
-static const struct mlxsw_config_profile mlxsw_minimal_config_profile;
+struct mlxsw_m_port;
 
-static struct mlxsw_driver mlxsw_minimal_driver = {
-	.kind		= mlxsw_minimal_driver_name,
-	.priv_size	= 1,
-	.profile	= &mlxsw_minimal_config_profile,
+struct mlxsw_m {
+	struct mlxsw_m_port **ports;
+	int *module_to_port;
+	struct mlxsw_core *core;
+	const struct mlxsw_bus_info *bus_info;
+	u8 base_mac[ETH_ALEN];
+	u8 max_ports;
 };
 
-static const struct i2c_device_id mlxsw_minimal_i2c_id[] = {
+struct mlxsw_m_port {
+	struct net_device *dev;
+	struct mlxsw_m *mlxsw_m;
+	u8 local_port;
+	u8 module;
+};
+
+static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m)
+{
+	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(spad), spad_pl);
+	if (err)
+		return err;
+	mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_m->base_mac);
+	return 0;
+}
+
+static int mlxsw_m_port_dummy_open_stop(struct net_device *dev)
+{
+	return 0;
+}
+
+static struct devlink_port *
+mlxsw_m_port_get_devlink_port(struct net_device *dev)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+	struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+	return mlxsw_core_port_devlink_port_get(mlxsw_m->core,
+						mlxsw_m_port->local_port);
+}
+
+static const struct net_device_ops mlxsw_m_port_netdev_ops = {
+	.ndo_open		= mlxsw_m_port_dummy_open_stop,
+	.ndo_stop		= mlxsw_m_port_dummy_open_stop,
+	.ndo_get_devlink_port	= mlxsw_m_port_get_devlink_port,
+};
+
+static void mlxsw_m_module_get_drvinfo(struct net_device *dev,
+				       struct ethtool_drvinfo *drvinfo)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+	struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+	strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind,
+		sizeof(drvinfo->driver));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%d",
+		 mlxsw_m->bus_info->fw_rev.major,
+		 mlxsw_m->bus_info->fw_rev.minor,
+		 mlxsw_m->bus_info->fw_rev.subminor);
+	strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name,
+		sizeof(drvinfo->bus_info));
+}
+
+static int mlxsw_m_get_module_info(struct net_device *netdev,
+				   struct ethtool_modinfo *modinfo)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
+	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
+
+	return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+}
+
+static int
+mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
+			  u8 *data)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
+	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
+
+	return mlxsw_env_get_module_eeprom(netdev, core, mlxsw_m_port->module,
+					   ee, data);
+}
+
+static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
+	.get_drvinfo		= mlxsw_m_module_get_drvinfo,
+	.get_module_info	= mlxsw_m_get_module_info,
+	.get_module_eeprom	= mlxsw_m_get_module_eeprom,
+};
+
+static int
+mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u8 local_port,
+			     u8 *p_module, u8 *p_width)
+{
+	char pmlp_pl[MLXSW_REG_PMLP_LEN];
+	int err;
+
+	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+	err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(pmlp), pmlp_pl);
+	if (err)
+		return err;
+	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+
+	return 0;
+}
+
+static int
+mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port)
+{
+	struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+	struct net_device *dev = mlxsw_m_port->dev;
+	char ppad_pl[MLXSW_REG_PPAD_LEN];
+	int err;
+
+	mlxsw_reg_ppad_pack(ppad_pl, false, 0);
+	err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(ppad), ppad_pl);
+	if (err)
+		return err;
+	mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr);
+	/* The last byte value in base mac address is guaranteed
+	 * to be such it does not overflow when adding local_port
+	 * value.
+	 */
+	dev->dev_addr[ETH_ALEN - 1] += mlxsw_m_port->module + 1;
+	return 0;
+}
+
+static int
+mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module)
+{
+	struct mlxsw_m_port *mlxsw_m_port;
+	struct net_device *dev;
+	int err;
+
+	err = mlxsw_core_port_init(mlxsw_m->core, local_port,
+				   module + 1, false, 0,
+				   mlxsw_m->base_mac,
+				   sizeof(mlxsw_m->base_mac));
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to init core port\n",
+			local_port);
+		return err;
+	}
+
+	dev = alloc_etherdev(sizeof(struct mlxsw_m_port));
+	if (!dev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev);
+	mlxsw_m_port = netdev_priv(dev);
+	mlxsw_m_port->dev = dev;
+	mlxsw_m_port->mlxsw_m = mlxsw_m;
+	mlxsw_m_port->local_port = local_port;
+	mlxsw_m_port->module = module;
+
+	dev->netdev_ops = &mlxsw_m_port_netdev_ops;
+	dev->ethtool_ops = &mlxsw_m_port_ethtool_ops;
+
+	err = mlxsw_m_port_dev_addr_get(mlxsw_m_port);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Port %d: Unable to get port mac address\n",
+			mlxsw_m_port->local_port);
+		goto err_dev_addr_get;
+	}
+
+	netif_carrier_off(dev);
+	mlxsw_m->ports[local_port] = mlxsw_m_port;
+	err = register_netdev(dev);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to register netdev\n",
+			mlxsw_m_port->local_port);
+		goto err_register_netdev;
+	}
+
+	mlxsw_core_port_eth_set(mlxsw_m->core, mlxsw_m_port->local_port,
+				mlxsw_m_port, dev);
+
+	return 0;
+
+err_register_netdev:
+	mlxsw_m->ports[local_port] = NULL;
+	free_netdev(dev);
+err_dev_addr_get:
+err_alloc_etherdev:
+	mlxsw_core_port_fini(mlxsw_m->core, local_port);
+	return err;
+}
+
+static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u8 local_port)
+{
+	struct mlxsw_m_port *mlxsw_m_port = mlxsw_m->ports[local_port];
+
+	mlxsw_core_port_clear(mlxsw_m->core, local_port, mlxsw_m);
+	unregister_netdev(mlxsw_m_port->dev); /* This calls ndo_stop */
+	mlxsw_m->ports[local_port] = NULL;
+	free_netdev(mlxsw_m_port->dev);
+	mlxsw_core_port_fini(mlxsw_m->core, local_port);
+}
+
+static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port,
+				   u8 *last_module)
+{
+	u8 module, width;
+	int err;
+
+	/* Fill out to local port mapping array */
+	err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module,
+					   &width);
+	if (err)
+		return err;
+
+	if (!width)
+		return 0;
+	/* Skip, if port belongs to the cluster */
+	if (module == *last_module)
+		return 0;
+	*last_module = module;
+	mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports;
+
+	return 0;
+}
+
+static void mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 module)
+{
+	mlxsw_m->module_to_port[module] = -1;
+}
+
+static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+{
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
+	u8 last_module = max_ports;
+	int i;
+	int err;
+
+	mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports),
+				 GFP_KERNEL);
+	if (!mlxsw_m->ports)
+		return -ENOMEM;
+
+	mlxsw_m->module_to_port = kmalloc_array(max_ports, sizeof(int),
+						GFP_KERNEL);
+	if (!mlxsw_m->module_to_port) {
+		err = -ENOMEM;
+		goto err_module_to_port_alloc;
+	}
+
+	/* Invalidate the entries of module to local port mapping array */
+	for (i = 0; i < max_ports; i++)
+		mlxsw_m->module_to_port[i] = -1;
+
+	/* Fill out module to local port mapping array */
+	for (i = 1; i < max_ports; i++) {
+		err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
+		if (err)
+			goto err_module_to_port_map;
+	}
+
+	/* Create port objects for each valid entry */
+	for (i = 0; i < mlxsw_m->max_ports; i++) {
+		if (mlxsw_m->module_to_port[i] > 0) {
+			err = mlxsw_m_port_create(mlxsw_m,
+						  mlxsw_m->module_to_port[i],
+						  i);
+			if (err)
+				goto err_module_to_port_create;
+		}
+	}
+
+	return 0;
+
+err_module_to_port_create:
+	for (i--; i >= 0; i--) {
+		if (mlxsw_m->module_to_port[i] > 0)
+			mlxsw_m_port_remove(mlxsw_m,
+					    mlxsw_m->module_to_port[i]);
+	}
+	i = max_ports;
+err_module_to_port_map:
+	for (i--; i > 0; i--)
+		mlxsw_m_port_module_unmap(mlxsw_m, i);
+	kfree(mlxsw_m->module_to_port);
+err_module_to_port_alloc:
+	kfree(mlxsw_m->ports);
+	return err;
+}
+
+static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_m->max_ports; i++) {
+		if (mlxsw_m->module_to_port[i] > 0) {
+			mlxsw_m_port_remove(mlxsw_m,
+					    mlxsw_m->module_to_port[i]);
+			mlxsw_m_port_module_unmap(mlxsw_m, i);
+		}
+	}
+
+	kfree(mlxsw_m->module_to_port);
+	kfree(mlxsw_m->ports);
+}
+
+static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
+			const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+	struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
+	int err;
+
+	mlxsw_m->core = mlxsw_core;
+	mlxsw_m->bus_info = mlxsw_bus_info;
+
+	err = mlxsw_m_base_mac_get(mlxsw_m);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n");
+		return err;
+	}
+
+	err = mlxsw_m_ports_create(mlxsw_m);
+	if (err) {
+		dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
+
+	mlxsw_m_ports_remove(mlxsw_m);
+}
+
+static const struct mlxsw_config_profile mlxsw_m_config_profile;
+
+static struct mlxsw_driver mlxsw_m_driver = {
+	.kind			= mlxsw_m_driver_name,
+	.priv_size		= sizeof(struct mlxsw_m),
+	.init			= mlxsw_m_init,
+	.fini			= mlxsw_m_fini,
+	.profile		= &mlxsw_m_config_profile,
+	.res_query_enabled	= true,
+};
+
+static const struct i2c_device_id mlxsw_m_i2c_id[] = {
 	{ "mlxsw_minimal", 0},
 	{ },
 };
 
-static struct i2c_driver mlxsw_minimal_i2c_driver = {
+static struct i2c_driver mlxsw_m_i2c_driver = {
 	.driver.name = "mlxsw_minimal",
 	.class = I2C_CLASS_HWMON,
-	.id_table = mlxsw_minimal_i2c_id,
+	.id_table = mlxsw_m_i2c_id,
 };
 
-static int __init mlxsw_minimal_module_init(void)
+static int __init mlxsw_m_module_init(void)
 {
 	int err;
 
-	err = mlxsw_core_driver_register(&mlxsw_minimal_driver);
+	err = mlxsw_core_driver_register(&mlxsw_m_driver);
 	if (err)
 		return err;
 
-	err = mlxsw_i2c_driver_register(&mlxsw_minimal_i2c_driver);
+	err = mlxsw_i2c_driver_register(&mlxsw_m_i2c_driver);
 	if (err)
 		goto err_i2c_driver_register;
 
 	return 0;
 
 err_i2c_driver_register:
-	mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+	mlxsw_core_driver_unregister(&mlxsw_m_driver);
 
 	return err;
 }
 
-static void __exit mlxsw_minimal_module_exit(void)
+static void __exit mlxsw_m_module_exit(void)
 {
-	mlxsw_i2c_driver_unregister(&mlxsw_minimal_i2c_driver);
-	mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+	mlxsw_i2c_driver_unregister(&mlxsw_m_i2c_driver);
+	mlxsw_core_driver_unregister(&mlxsw_m_driver);
 }
 
-module_init(mlxsw_minimal_module_init);
-module_exit(mlxsw_minimal_module_exit);
+module_init(mlxsw_m_module_init);
+module_exit(mlxsw_m_module_exit);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox minimal driver");
-MODULE_DEVICE_TABLE(i2c, mlxsw_minimal_i2c_id);
+MODULE_DEVICE_TABLE(i2c, mlxsw_m_i2c_id);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 5890fdf..615455a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c

@@ -102,6 +102,7 @@
 struct mlxsw_pci {
 	struct pci_dev *pdev;
 	u8 __iomem *hw_addr;
+	u64 free_running_clock_offset;
 	struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
 	u32 doorbell_offset;
 	struct mlxsw_core *core;
@@ -507,17 +508,28 @@
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
 	struct mlxsw_pci_queue_elem_info *elem_info;
+	struct mlxsw_tx_info tx_info;
 	char *wqe;
 	struct sk_buff *skb;
 	int i;
 
 	spin_lock(&q->lock);
 	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+	tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
 	skb = elem_info->u.sdq.skb;
 	wqe = elem_info->elem;
 	for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
 		mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
-	dev_kfree_skb_any(skb);
+
+	if (unlikely(!tx_info.is_emad &&
+		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb,
+					   tx_info.local_port);
+		skb = NULL;
+	}
+
+	if (skb)
+		dev_kfree_skb_any(skb);
 	elem_info->u.sdq.skb = NULL;
 
 	if (q->consumer_counter++ != consumer_counter_limit)
@@ -604,29 +616,31 @@
 		u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
 		u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
 		u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
+		char ncqe[MLXSW_PCI_CQE_SIZE_MAX];
+
+		memcpy(ncqe, cqe, q->elem_size);
+		mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
 
 		if (sendq) {
 			struct mlxsw_pci_queue *sdq;
 
 			sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn);
 			mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
-						 wqe_counter, cqe);
+						 wqe_counter, ncqe);
 			q->u.cq.comp_sdq_count++;
 		} else {
 			struct mlxsw_pci_queue *rdq;
 
 			rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn);
 			mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
-						 wqe_counter, q->u.cq.v, cqe);
+						 wqe_counter, q->u.cq.v, ncqe);
 			q->u.cq.comp_rdq_count++;
 		}
 		if (++items == credits)
 			break;
 	}
-	if (items) {
-		mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+	if (items)
 		mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
-	}
 }
 
 static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q)
@@ -833,7 +847,6 @@
 					     &mem_item->mapaddr);
 	if (!mem_item->buf)
 		return -ENOMEM;
-	memset(mem_item->buf, 0, mem_item->size);
 
 	q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL);
 	if (!q->elem_info) {
@@ -1037,42 +1050,6 @@
 	mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
 }
 
-static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
-				     struct mlxsw_res *res)
-{
-	int index, i;
-	u64 data;
-	u16 id;
-	int err;
-
-	if (!res)
-		return 0;
-
-	mlxsw_cmd_mbox_zero(mbox);
-
-	for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES;
-	     index++) {
-		err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index);
-		if (err)
-			return err;
-
-		for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) {
-			id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i);
-			data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i);
-
-			if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID)
-				return 0;
-
-			mlxsw_res_parse(res, id, data);
-		}
-	}
-
-	/* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get
-	 * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW.
-	 */
-	return -EIO;
-}
-
 static int
 mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_pci *mlxsw_pci,
 				const struct mlxsw_config_profile *profile,
@@ -1365,10 +1342,10 @@
 		u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
 
 		if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
-			break;
+			return 0;
 		cond_resched();
 	} while (time_before(jiffies, end));
-	return 0;
+	return -EBUSY;
 }
 
 static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
@@ -1448,6 +1425,15 @@
 	mlxsw_pci->doorbell_offset =
 		mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
 
+	if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) {
+		dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n");
+		err = -EINVAL;
+		goto err_fr_rn_clk_bar;
+	}
+
+	mlxsw_pci->free_running_clock_offset =
+		mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox);
+
 	num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
 	err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
 	if (err)
@@ -1457,7 +1443,7 @@
 	if (err)
 		goto err_boardinfo;
 
-	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res);
+	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
 	if (err)
 		goto err_query_resources;
 
@@ -1503,6 +1489,7 @@
 err_boardinfo:
 	mlxsw_pci_fw_area_fini(mlxsw_pci);
 err_fw_area_init:
+err_fr_rn_clk_bar:
 err_doorbell_page_bar:
 err_iface_rev:
 err_query_fw:
@@ -1571,6 +1558,7 @@
 		err = -EAGAIN;
 		goto unlock;
 	}
+	mlxsw_skb_cb(skb)->tx_info = *tx_info;
 	elem_info->u.sdq.skb = skb;
 
 	wqe = elem_info->elem;
@@ -1594,6 +1582,9 @@
 			goto unmap_frags;
 	}
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
 	/* Set unused sq entries byte count to zero. */
 	for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
 		mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
@@ -1706,6 +1697,24 @@
 	return err;
 }
 
+static u32 mlxsw_pci_read_frc_h(void *bus_priv)
+{
+	struct mlxsw_pci *mlxsw_pci = bus_priv;
+	u64 frc_offset;
+
+	frc_offset = mlxsw_pci->free_running_clock_offset;
+	return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_H(frc_offset));
+}
+
+static u32 mlxsw_pci_read_frc_l(void *bus_priv)
+{
+	struct mlxsw_pci *mlxsw_pci = bus_priv;
+	u64 frc_offset;
+
+	frc_offset = mlxsw_pci->free_running_clock_offset;
+	return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_L(frc_offset));
+}
+
 static const struct mlxsw_bus mlxsw_pci_bus = {
 	.kind			= "pci",
 	.init			= mlxsw_pci_init,
@@ -1713,6 +1722,8 @@
 	.skb_transmit_busy	= mlxsw_pci_skb_transmit_busy,
 	.skb_transmit		= mlxsw_pci_skb_transmit,
 	.cmd_exec		= mlxsw_pci_cmd_exec,
+	.read_frc_h		= mlxsw_pci_read_frc_h,
+	.read_frc_l		= mlxsw_pci_read_frc_l,
 	.features		= MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
 };
 
@@ -1720,7 +1731,6 @@
 {
 	const char *driver_name = pdev->driver->name;
 	struct mlxsw_pci *mlxsw_pci;
-	bool called_again = false;
 	int err;
 
 	mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL);
@@ -1775,20 +1785,13 @@
 	mlxsw_pci->bus_info.device_kind = driver_name;
 	mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
 	mlxsw_pci->bus_info.dev = &pdev->dev;
+	mlxsw_pci->bus_info.read_frc_capable = true;
 	mlxsw_pci->id = id;
 
-again:
 	err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
 					     &mlxsw_pci_bus, mlxsw_pci, false,
 					     NULL);
-	/* -EAGAIN is returned in case the FW was updated. FW needs
-	 * a reset, so lets try to call mlxsw_core_bus_device_register()
-	 * again.
-	 */
-	if (err == -EAGAIN && !called_again) {
-		called_again = true;
-		goto again;
-	} else if (err) {
+	if (err) {
 		dev_err(&pdev->dev, "cannot register bus device\n");
 		goto err_bus_device_register;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 946339e..5b13236 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h

@@ -9,6 +9,7 @@
 #define PCI_DEVICE_ID_MELLANOX_SWITCHX2		0xc738
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM		0xcb84
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM2	0xcf6c
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3	0xcf70
 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB		0xcb20
 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB2	0xcf08
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 83f452b..e57e42e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h

@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET			0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	5000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	20000
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS		100
 #define MLXSW_PCI_FW_READY			0xA1844
 #define MLXSW_PCI_FW_READY_MASK			0xFFFF
@@ -43,6 +43,9 @@
 #define MLXSW_PCI_DOORBELL(offset, type_offset, num)	\
 	((offset) + (type_offset) + (num) * 4)
 
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_H(offset)	(offset)
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_L(offset)	((offset) + 4)
+
 #define MLXSW_PCI_CQS_MAX	96
 #define MLXSW_PCI_EQS_COUNT	2
 #define MLXSW_PCI_EQ_ASYNC_NUM	0
@@ -53,6 +56,7 @@
 #define MLXSW_PCI_WQE_SIZE	32 /* 32 bytes per element */
 #define MLXSW_PCI_CQE01_SIZE	16 /* 16 bytes per element */
 #define MLXSW_PCI_CQE2_SIZE	32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE_SIZE_MAX	MLXSW_PCI_CQE2_SIZE
 #define MLXSW_PCI_EQE_SIZE	16 /* 16 bytes per element */
 #define MLXSW_PCI_WQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
 #define MLXSW_PCI_CQE01_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE)
@@ -221,7 +225,7 @@
 MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
 
 /* pci_eqe_cqn
- * Completion Queue that triggeret this EQE.
+ * Completion Queue that triggered this EQE.
  */
 MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6e8b619..5494cf9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h

@@ -295,6 +295,7 @@
 	MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0,
 	MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1,
 	MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2,
+	MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL = 0xC,
 };
 
 /* reg_sfd_rec_type
@@ -525,6 +526,61 @@
 	mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid);
 }
 
+/* reg_sfd_uc_tunnel_uip_msb
+ * When protocol is IPv4, the most significant byte of the underlay IPv4
+ * destination IP.
+ * When protocol is IPv6, reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_msb, MLXSW_REG_SFD_BASE_LEN, 24,
+		     8, MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_tunnel_fid
+ * Filtering ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_fid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+		     MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+enum mlxsw_reg_sfd_uc_tunnel_protocol {
+	MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4,
+	MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV6,
+};
+
+/* reg_sfd_uc_tunnel_protocol
+ * IP protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_protocol, MLXSW_REG_SFD_BASE_LEN, 27,
+		     1, MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+/* reg_sfd_uc_tunnel_uip_lsb
+ * When protocol is IPv4, the least significant bytes of the underlay
+ * IPv4 destination IP.
+ * When protocol is IPv6, pointer to the underlay IPv6 destination IP
+ * which is configured by RIPS.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_lsb, MLXSW_REG_SFD_BASE_LEN, 0,
+		     24, MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_uc_tunnel_pack(char *payload, int rec_index,
+			     enum mlxsw_reg_sfd_rec_policy policy,
+			     const char *mac, u16 fid,
+			     enum mlxsw_reg_sfd_rec_action action, u32 uip,
+			     enum mlxsw_reg_sfd_uc_tunnel_protocol proto)
+{
+	mlxsw_reg_sfd_rec_pack(payload, rec_index,
+			       MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL, mac,
+			       action);
+	mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
+	mlxsw_reg_sfd_uc_tunnel_uip_msb_set(payload, rec_index, uip >> 24);
+	mlxsw_reg_sfd_uc_tunnel_uip_lsb_set(payload, rec_index, uip);
+	mlxsw_reg_sfd_uc_tunnel_fid_set(payload, rec_index, fid);
+	mlxsw_reg_sfd_uc_tunnel_protocol_set(payload, rec_index, proto);
+}
+
 /* SFN - Switch FDB Notification Register
  * -------------------------------------------
  * The switch provides notifications on newly learned FDB entries and
@@ -585,6 +641,10 @@
 	MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7,
 	/* Aged-out MAC address on a LAG port. */
 	MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8,
+	/* Learned unicast tunnel record. */
+	MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL = 0xD,
+	/* Aged-out unicast tunnel record. */
+	MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL = 0xE,
 };
 
 /* reg_sfn_rec_type
@@ -648,6 +708,66 @@
 	*p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index);
 }
 
+/* reg_sfn_uc_tunnel_uip_msb
+ * When protocol is IPv4, the most significant byte of the underlay IPv4
+ * address of the remote VTEP.
+ * When protocol is IPv6, reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_msb, MLXSW_REG_SFN_BASE_LEN, 24,
+		     8, MLXSW_REG_SFN_REC_LEN, 0x08, false);
+
+enum mlxsw_reg_sfn_uc_tunnel_protocol {
+	MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV4,
+	MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV6,
+};
+
+/* reg_sfn_uc_tunnel_protocol
+ * IP protocol.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_protocol, MLXSW_REG_SFN_BASE_LEN, 27,
+		     1, MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+/* reg_sfn_uc_tunnel_uip_lsb
+ * When protocol is IPv4, the least significant bytes of the underlay
+ * IPv4 address of the remote VTEP.
+ * When protocol is IPv6, ipv6_id to be queried from TNIPSD.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_lsb, MLXSW_REG_SFN_BASE_LEN, 0,
+		     24, MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+enum mlxsw_reg_sfn_tunnel_port {
+	MLXSW_REG_SFN_TUNNEL_PORT_NVE,
+	MLXSW_REG_SFN_TUNNEL_PORT_VPLS,
+	MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL0,
+	MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL1,
+};
+
+/* reg_sfn_uc_tunnel_port
+ * Tunnel port.
+ * Reserved on Spectrum.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, tunnel_port, MLXSW_REG_SFN_BASE_LEN, 0, 4,
+		     MLXSW_REG_SFN_REC_LEN, 0x10, false);
+
+static inline void
+mlxsw_reg_sfn_uc_tunnel_unpack(char *payload, int rec_index, char *mac,
+			       u16 *p_fid, u32 *p_uip,
+			       enum mlxsw_reg_sfn_uc_tunnel_protocol *p_proto)
+{
+	u32 uip_msb, uip_lsb;
+
+	mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
+	*p_fid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
+	uip_msb = mlxsw_reg_sfn_uc_tunnel_uip_msb_get(payload, rec_index);
+	uip_lsb = mlxsw_reg_sfn_uc_tunnel_uip_lsb_get(payload, rec_index);
+	*p_uip = uip_msb << 24 | uip_lsb;
+	*p_proto = mlxsw_reg_sfn_uc_tunnel_protocol_get(payload, rec_index);
+}
+
 /* SPMS - Switch Port MSTP/RSTP State Register
  * -------------------------------------------
  * Configures the spanning tree state of a physical port.
@@ -877,7 +997,7 @@
 	MLXSW_REG_ZERO(spaft, payload);
 	mlxsw_reg_spaft_local_port_set(payload, local_port);
 	mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
-	mlxsw_reg_spaft_allow_prio_tagged_set(payload, true);
+	mlxsw_reg_spaft_allow_prio_tagged_set(payload, allow_untagged);
 	mlxsw_reg_spaft_allow_tagged_set(payload, true);
 }
 
@@ -1069,6 +1189,8 @@
 	MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
 	MLXSW_REG_SFDF_FLUSH_PER_LAG,
 	MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+	MLXSW_REG_SFDF_FLUSH_PER_NVE,
+	MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID,
 };
 
 /* reg_sfdf_flush_type
@@ -1079,6 +1201,10 @@
  * 3 - All FID dynamic entries pointing to port are flushed.
  * 4 - All dynamic entries pointing to LAG are flushed.
  * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * 6 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are
+ *     flushed.
+ * 7 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are
+ *     flushed, per FID.
  * Access: RW
  */
 MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
@@ -1315,12 +1441,19 @@
  */
 MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20);
 
-static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
+/* reg_slcr_seed
+ * LAG seed value. The seed is the same for all ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, seed, 0x08, 0, 32);
+
+static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash, u32 seed)
 {
 	MLXSW_REG_ZERO(slcr, payload);
 	mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
 	mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC);
 	mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
+	mlxsw_reg_slcr_seed_set(payload, seed);
 }
 
 /* SLCOR - Switch LAG Collector Register
@@ -2066,6 +2199,14 @@
  */
 MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16);
 
+/* reg_pagt_multi
+ * Multi-ACL
+ * 0 - This ACL is the last ACL in the multi-ACL
+ * 1 - This ACL is part of a multi-ACL
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false);
+
 /* reg_pagt_acl_id
  * ACL identifier
  * Access: RW
@@ -2079,12 +2220,13 @@
 }
 
 static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index,
-					      u16 acl_id)
+					      u16 acl_id, bool multi)
 {
 	u8 size = mlxsw_reg_pagt_size_get(payload);
 
 	if (index >= size)
 		mlxsw_reg_pagt_size_set(payload, index + 1);
+	mlxsw_reg_pagt_multi_set(payload, index, multi);
 	mlxsw_reg_pagt_acl_id_set(payload, index, acl_id);
 }
 
@@ -2362,6 +2504,43 @@
 	*p_a = mlxsw_reg_pefa_a_get(payload);
 }
 
+/* PEMRBT - Policy-Engine Multicast Router Binding Table Register
+ * --------------------------------------------------------------
+ * This register is used for binding Multicast router to an ACL group
+ * that serves the MC router.
+ * This register is not supported by SwitchX/-2 and Spectrum.
+ */
+#define MLXSW_REG_PEMRBT_ID 0x3014
+#define MLXSW_REG_PEMRBT_LEN 0x14
+
+MLXSW_REG_DEFINE(pemrbt, MLXSW_REG_PEMRBT_ID, MLXSW_REG_PEMRBT_LEN);
+
+enum mlxsw_reg_pemrbt_protocol {
+	MLXSW_REG_PEMRBT_PROTO_IPV4,
+	MLXSW_REG_PEMRBT_PROTO_IPV6,
+};
+
+/* reg_pemrbt_protocol
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pemrbt, protocol, 0x00, 0, 1);
+
+/* reg_pemrbt_group_id
+ * ACL group identifier.
+ * Range 0..cap_max_acl_groups-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pemrbt, group_id, 0x10, 0, 16);
+
+static inline void
+mlxsw_reg_pemrbt_pack(char *payload, enum mlxsw_reg_pemrbt_protocol protocol,
+		      u16 group_id)
+{
+	MLXSW_REG_ZERO(pemrbt, payload);
+	mlxsw_reg_pemrbt_protocol_set(payload, protocol);
+	mlxsw_reg_pemrbt_group_id_set(payload, group_id);
+}
+
 /* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2
  * -----------------------------------------------------
  * This register is used for accessing rules within a TCAM region.
@@ -2573,7 +2752,7 @@
 	mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank);
 	mlxsw_reg_perpt_erpt_index_set(payload, erpt_index);
 	mlxsw_reg_perpt_key_size_set(payload, key_size);
-	mlxsw_reg_perpt_bf_bypass_set(payload, true);
+	mlxsw_reg_perpt_bf_bypass_set(payload, false);
 	mlxsw_reg_perpt_erp_id_set(payload, erp_id);
 	mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank);
 	mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index);
@@ -2765,8 +2944,9 @@
 					u32 priority,
 					const char *tcam_region_info,
 					const char *key, u8 erp_id,
-					bool large_exists, u32 lkey_id,
-					u32 action_pointer)
+					u16 delta_start, u8 delta_mask,
+					u8 delta_value, bool large_exists,
+					u32 lkey_id, u32 action_pointer)
 {
 	MLXSW_REG_ZERO(ptce3, payload);
 	mlxsw_reg_ptce3_v_set(payload, valid);
@@ -2775,6 +2955,9 @@
 	mlxsw_reg_ptce3_tcam_region_info_memcpy_to(payload, tcam_region_info);
 	mlxsw_reg_ptce3_flex2_key_blocks_memcpy_to(payload, key);
 	mlxsw_reg_ptce3_erp_id_set(payload, erp_id);
+	mlxsw_reg_ptce3_delta_start_set(payload, delta_start);
+	mlxsw_reg_ptce3_delta_mask_set(payload, delta_mask);
+	mlxsw_reg_ptce3_delta_value_set(payload, delta_value);
 	mlxsw_reg_ptce3_large_exists_set(payload, large_exists);
 	mlxsw_reg_ptce3_large_entry_key_id_set(payload, lkey_id);
 	mlxsw_reg_ptce3_action_pointer_set(payload, action_pointer);
@@ -2832,7 +3015,7 @@
 	mlxsw_reg_percr_region_id_set(payload, region_id);
 	mlxsw_reg_percr_atcam_ignore_prune_set(payload, false);
 	mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false);
-	mlxsw_reg_percr_bf_bypass_set(payload, true);
+	mlxsw_reg_percr_bf_bypass_set(payload, false);
 }
 
 /* PERERP - Policy-Engine Region eRP Register
@@ -2921,6 +3104,72 @@
 	mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id);
 }
 
+/* PEABFE - Policy-Engine Algorithmic Bloom Filter Entries Register
+ * ----------------------------------------------------------------
+ * This register configures the Bloom filter entries.
+ */
+#define MLXSW_REG_PEABFE_ID 0x3022
+#define MLXSW_REG_PEABFE_BASE_LEN 0x10
+#define MLXSW_REG_PEABFE_BF_REC_LEN 0x4
+#define MLXSW_REG_PEABFE_BF_REC_MAX_COUNT 256
+#define MLXSW_REG_PEABFE_LEN (MLXSW_REG_PEABFE_BASE_LEN + \
+			      MLXSW_REG_PEABFE_BF_REC_LEN * \
+			      MLXSW_REG_PEABFE_BF_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(peabfe, MLXSW_REG_PEABFE_ID, MLXSW_REG_PEABFE_LEN);
+
+/* reg_peabfe_size
+ * Number of BF entries to be updated.
+ * Range 1..256
+ * Access: Op
+ */
+MLXSW_ITEM32(reg, peabfe, size, 0x00, 0, 9);
+
+/* reg_peabfe_bf_entry_state
+ * Bloom filter state
+ * 0 - Clear
+ * 1 - Set
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_state,
+		     MLXSW_REG_PEABFE_BASE_LEN,	31, 1,
+		     MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+/* reg_peabfe_bf_entry_bank
+ * Bloom filter bank ID
+ * Range 0..cap_max_erp_table_banks-1
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_bank,
+		     MLXSW_REG_PEABFE_BASE_LEN,	24, 4,
+		     MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+/* reg_peabfe_bf_entry_index
+ * Bloom filter entry index
+ * Range 0..2^cap_max_bf_log-1
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_index,
+		     MLXSW_REG_PEABFE_BASE_LEN,	0, 24,
+		     MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_peabfe_pack(char *payload)
+{
+	MLXSW_REG_ZERO(peabfe, payload);
+}
+
+static inline void mlxsw_reg_peabfe_rec_pack(char *payload, int rec_index,
+					     u8 state, u8 bank, u32 bf_index)
+{
+	u8 num_rec = mlxsw_reg_peabfe_size_get(payload);
+
+	if (rec_index >= num_rec)
+		mlxsw_reg_peabfe_size_set(payload, rec_index + 1);
+	mlxsw_reg_peabfe_bf_entry_state_set(payload, rec_index, state);
+	mlxsw_reg_peabfe_bf_entry_bank_set(payload, rec_index, bank);
+	mlxsw_reg_peabfe_bf_entry_index_set(payload, rec_index, bf_index);
+}
+
 /* IEDR - Infrastructure Entry Delete Register
  * ----------------------------------------------------
  * This register is used for deleting entries from the entry tables.
@@ -3215,7 +3464,7 @@
  * Configures the ETS elements.
  */
 #define MLXSW_REG_QEEC_ID 0x400D
-#define MLXSW_REG_QEEC_LEN 0x1C
+#define MLXSW_REG_QEEC_LEN 0x20
 
 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
 
@@ -3257,6 +3506,27 @@
  */
 MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
 
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
+/* reg_qeec_ptps
+ * PTP shaper
+ * 0: regular shaper mode
+ * 1: PTP oriented shaper
+ * Allowed only for hierarchy 0
+ * Not supported for CPU port
+ * Note that ptps mode may affect the shaper rates of all hierarchies
+ * Supported only on Spectrum-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1);
+
 enum {
 	MLXSW_REG_QEEC_BYTES_MODE,
 	MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3273,6 +3543,17 @@
  */
 MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
 
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN	200000		/* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
 /* reg_qeec_mase
  * Max shaper configuration enable. Enables configuration of the max
  * shaper on this ETS element.
@@ -3332,6 +3613,16 @@
 	mlxsw_reg_qeec_next_element_index_set(payload, next_index);
 }
 
+static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
+					    bool ptps)
+{
+	MLXSW_REG_ZERO(qeec, payload);
+	mlxsw_reg_qeec_local_port_set(payload, local_port);
+	mlxsw_reg_qeec_element_hierarchy_set(payload,
+					     MLXSW_REG_QEEC_HIERARCY_PORT);
+	mlxsw_reg_qeec_ptps_set(payload, ptps);
+}
+
 /* QRWE - QoS ReWrite Enable
  * -------------------------
  * This register configures the rewrite enable per receive port.
@@ -3545,6 +3836,112 @@
 	mlxsw_reg_qtctm_mc_set(payload, mc);
 }
 
+/* QPSC - QoS PTP Shaper Configuration Register
+ * --------------------------------------------
+ * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1.
+ * Supported only on Spectrum-1.
+ */
+#define MLXSW_REG_QPSC_ID 0x401B
+#define MLXSW_REG_QPSC_LEN 0x28
+
+MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN);
+
+enum mlxsw_reg_qpsc_port_speed {
+	MLXSW_REG_QPSC_PORT_SPEED_100M,
+	MLXSW_REG_QPSC_PORT_SPEED_1G,
+	MLXSW_REG_QPSC_PORT_SPEED_10G,
+	MLXSW_REG_QPSC_PORT_SPEED_25G,
+};
+
+/* reg_qpsc_port_speed
+ * Port speed.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4);
+
+/* reg_qpsc_shaper_time_exp
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4);
+
+/* reg_qpsc_shaper_time_mantissa
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5);
+
+/* reg_qpsc_shaper_inc
+ * Number of tokens added to shaper on each update.
+ * Units of 8B.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5);
+
+/* reg_qpsc_shaper_bs
+ * Max shaper Burst size.
+ * Burst size is 2 ^ max_shaper_bs * 512 [bits]
+ * Range is: 5..25 (from 2KB..2GB)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6);
+
+/* reg_qpsc_ptsc_we
+ * Write enable to port_to_shaper_credits.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1);
+
+/* reg_qpsc_port_to_shaper_credits
+ * For split ports: range 1..57
+ * For non-split ports: range 1..112
+ * Written only when ptsc_we is set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8);
+
+/* reg_qpsc_ing_timestamp_inc
+ * Ingress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at ingress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32);
+
+/* reg_qpsc_egr_timestamp_inc
+ * Egress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at egress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32);
+
+static inline void
+mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed,
+		    u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc,
+		    u8 shaper_bs, u8 port_to_shaper_credits,
+		    int ing_timestamp_inc, int egr_timestamp_inc)
+{
+	MLXSW_REG_ZERO(qpsc, payload);
+	mlxsw_reg_qpsc_port_speed_set(payload, port_speed);
+	mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp);
+	mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa);
+	mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc);
+	mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs);
+	mlxsw_reg_qpsc_ptsc_we_set(payload, true);
+	mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits);
+	mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc);
+	mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc);
+}
+
 /* PMLP - Ports Module to Local Port Register
  * ------------------------------------------
  * Configures the assignment of modules to local ports.
@@ -3702,6 +4099,25 @@
  */
 MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
 
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M				BIT(0)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII			BIT(1)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII			BIT(2)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R				BIT(3)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G			BIT(4)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G		BIT(5)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR		BIT(6)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2	BIT(7)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR	BIT(8)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4		BIT(9)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2		BIT(10)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4		BIT(12)
+
+/* reg_ptys_ext_eth_proto_cap
+ * Extended Ethernet port supported speeds and protocols.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
+
 #define MLXSW_REG_PTYS_ETH_SPEED_SGMII			BIT(0)
 #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX		BIT(1)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4		BIT(2)
@@ -3710,7 +4126,6 @@
 #define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2		BIT(5)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4		BIT(6)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4		BIT(7)
-#define MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4		BIT(8)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR		BIT(12)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR		BIT(13)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR		BIT(14)
@@ -3756,6 +4171,12 @@
  */
 MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16);
 
+/* reg_ptys_ext_eth_proto_admin
+ * Extended speed and protocol to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ext_eth_proto_admin, 0x14, 0, 32);
+
 /* reg_ptys_eth_proto_admin
  * Speed and protocol to set port to.
  * Access: RW
@@ -3774,6 +4195,12 @@
  */
 MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16);
 
+/* reg_ptys_ext_eth_proto_oper
+ * The extended current speed and protocol configured for the port.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ext_eth_proto_oper, 0x20, 0, 32);
+
 /* reg_ptys_eth_proto_oper
  * The current speed and protocol configured for the port.
  * Access: RO
@@ -3792,12 +4219,23 @@
  */
 MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
 
-/* reg_ptys_eth_proto_lp_advertise
- * The protocols that were advertised by the link partner during
- * autonegotiation.
+enum mlxsw_reg_ptys_connector_type {
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA,
+	MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER,
+};
+
+/* reg_ptys_connector_type
+ * Connector type indication.
  * Access: RO
  */
-MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
+MLXSW_ITEM32(reg, ptys, connector_type, 0x2C, 0, 4);
 
 static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
 					   u32 proto_admin, bool autoneg)
@@ -3809,17 +4247,46 @@
 	mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
 }
 
+static inline void mlxsw_reg_ptys_ext_eth_pack(char *payload, u8 local_port,
+					       u32 proto_admin, bool autoneg)
+{
+	MLXSW_REG_ZERO(ptys, payload);
+	mlxsw_reg_ptys_local_port_set(payload, local_port);
+	mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
+	mlxsw_reg_ptys_ext_eth_proto_admin_set(payload, proto_admin);
+	mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
+}
+
 static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
 					     u32 *p_eth_proto_cap,
-					     u32 *p_eth_proto_adm,
+					     u32 *p_eth_proto_admin,
 					     u32 *p_eth_proto_oper)
 {
 	if (p_eth_proto_cap)
-		*p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload);
-	if (p_eth_proto_adm)
-		*p_eth_proto_adm = mlxsw_reg_ptys_eth_proto_admin_get(payload);
+		*p_eth_proto_cap =
+			mlxsw_reg_ptys_eth_proto_cap_get(payload);
+	if (p_eth_proto_admin)
+		*p_eth_proto_admin =
+			mlxsw_reg_ptys_eth_proto_admin_get(payload);
 	if (p_eth_proto_oper)
-		*p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload);
+		*p_eth_proto_oper =
+			mlxsw_reg_ptys_eth_proto_oper_get(payload);
+}
+
+static inline void mlxsw_reg_ptys_ext_eth_unpack(char *payload,
+						 u32 *p_eth_proto_cap,
+						 u32 *p_eth_proto_admin,
+						 u32 *p_eth_proto_oper)
+{
+	if (p_eth_proto_cap)
+		*p_eth_proto_cap =
+			mlxsw_reg_ptys_ext_eth_proto_cap_get(payload);
+	if (p_eth_proto_admin)
+		*p_eth_proto_admin =
+			mlxsw_reg_ptys_ext_eth_proto_admin_get(payload);
+	if (p_eth_proto_oper)
+		*p_eth_proto_oper =
+			mlxsw_reg_ptys_ext_eth_proto_oper_get(payload);
 }
 
 static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port,
@@ -4142,8 +4609,11 @@
 
 enum mlxsw_reg_ppcnt_grp {
 	MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
+	MLXSW_REG_PPCNT_RFC_2863_CNT = 0x1,
 	MLXSW_REG_PPCNT_RFC_2819_CNT = 0x2,
+	MLXSW_REG_PPCNT_RFC_3635_CNT = 0x3,
 	MLXSW_REG_PPCNT_EXT_CNT = 0x5,
+	MLXSW_REG_PPCNT_DISCARD_CNT = 0x6,
 	MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
 	MLXSW_REG_PPCNT_TC_CNT = 0x11,
 	MLXSW_REG_PPCNT_TC_CONG_TC = 0x13,
@@ -4158,6 +4628,7 @@
  * 0x2: RFC 2819 Counters
  * 0x3: RFC 3635 Counters
  * 0x5: Ethernet Extended Counters
+ * 0x6: Ethernet Discard Counters
  * 0x8: Link Level Retransmission Counters
  * 0x10: Per Priority Counters
  * 0x11: Per Traffic Class Counters
@@ -4301,8 +4772,46 @@
 MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted,
 	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
 
+/* Ethernet RFC 2863 Counter Group */
+
+/* reg_ppcnt_if_in_discards
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, if_in_discards,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64);
+
+/* reg_ppcnt_if_out_discards
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, if_out_discards,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64);
+
+/* reg_ppcnt_if_out_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, if_out_errors,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64);
+
 /* Ethernet RFC 2819 Counter Group */
 
+/* reg_ppcnt_ether_stats_undersize_pkts
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_undersize_pkts,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64);
+
+/* reg_ppcnt_ether_stats_oversize_pkts
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_oversize_pkts,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64);
+
+/* reg_ppcnt_ether_stats_fragments
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_fragments,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64);
+
 /* reg_ppcnt_ether_stats_pkts64octets
  * Access: RO
  */
@@ -4363,6 +4872,32 @@
 MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets,
 	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0xA0, 0, 64);
 
+/* Ethernet RFC 3635 Counter Group */
+
+/* reg_ppcnt_dot3stats_fcs_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, dot3stats_fcs_errors,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* reg_ppcnt_dot3stats_symbol_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, dot3stats_symbol_errors,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_dot3control_in_unknown_opcodes
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, dot3control_in_unknown_opcodes,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
+
+/* reg_ppcnt_dot3in_pause_frames
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, dot3in_pause_frames,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
 /* Ethernet Extended Counter Group Counters */
 
 /* reg_ppcnt_ecn_marked
@@ -4371,6 +4906,80 @@
 MLXSW_ITEM64(reg, ppcnt, ecn_marked,
 	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
 
+/* Ethernet Discard Counter Group Counters */
+
+/* reg_ppcnt_ingress_general
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ingress_general,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
+
+/* reg_ppcnt_ingress_policy_engine
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ingress_policy_engine,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* reg_ppcnt_ingress_vlan_membership
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ingress_vlan_membership,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64);
+
+/* reg_ppcnt_ingress_tag_frame_type
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ingress_tag_frame_type,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64);
+
+/* reg_ppcnt_egress_vlan_membership
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_vlan_membership,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64);
+
+/* reg_ppcnt_loopback_filter
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, loopback_filter,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64);
+
+/* reg_ppcnt_egress_general
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_general,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64);
+
+/* reg_ppcnt_egress_hoq
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_hoq,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64);
+
+/* reg_ppcnt_egress_policy_engine
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_policy_engine,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64);
+
+/* reg_ppcnt_ingress_tx_link_down
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ingress_tx_link_down,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
+
+/* reg_ppcnt_egress_stp_filter
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_stp_filter,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_egress_sll
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, egress_sll,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
 /* Ethernet Per Priority Group Counters */
 
 /* reg_ppcnt_rx_octets
@@ -4728,6 +5337,42 @@
 	mlxsw_reg_pspa_sub_port_set(payload, 0);
 }
 
+/* PPLR - Port Physical Loopback Register
+ * --------------------------------------
+ * This register allows configuration of the port's loopback mode.
+ */
+#define MLXSW_REG_PPLR_ID 0x5018
+#define MLXSW_REG_PPLR_LEN 0x8
+
+MLXSW_REG_DEFINE(pplr, MLXSW_REG_PPLR_ID, MLXSW_REG_PPLR_LEN);
+
+/* reg_pplr_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pplr, local_port, 0x00, 16, 8);
+
+/* Phy local loopback. When set the port's egress traffic is looped back
+ * to the receiver and the port transmitter is disabled.
+ */
+#define MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL BIT(1)
+
+/* reg_pplr_lb_en
+ * Loopback enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pplr, lb_en, 0x04, 0, 8);
+
+static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port,
+				       bool phy_local)
+{
+	MLXSW_REG_ZERO(pplr, payload);
+	mlxsw_reg_pplr_local_port_set(payload, local_port);
+	mlxsw_reg_pplr_lb_en_set(payload,
+				 phy_local ?
+				 MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0);
+}
+
 /* HTGT - Host Trap Group Table
  * ----------------------------
  * Configures the properties for forwarding to CPU.
@@ -4773,6 +5418,17 @@
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
+
+	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
+	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
+};
+
+enum mlxsw_reg_htgt_discard_trap_group {
+	MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 };
 
 /* reg_htgt_trap_group
@@ -4910,6 +5566,8 @@
 	MLXSW_REG_HPKT_ACTION_DISCARD,
 	MLXSW_REG_HPKT_ACTION_SOFT_DISCARD,
 	MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD,
+	MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU,
+	MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT = 15,
 };
 
 /* reg_hpkt_action
@@ -4920,6 +5578,8 @@
  * 3 - Discard.
  * 4 - Soft discard (allow other traps to act on the packet).
  * 5 - Trap and soft discard (allow other traps to overwrite this trap).
+ * 6 - Trap to CPU (CPU receives sole copy) and count it as error.
+ * 15 - Restore the firmware's default action.
  * Access: RW
  *
  * Note: Must be set to 0 (forward) for event trap IDs, as they are already
@@ -5263,6 +5923,8 @@
 	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
 	/* IPinIP IPv6 underlay Unicast */
 	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
+	/* IPinIP generic - used for Spectrum-2 underlay RIF */
+	MLXSW_REG_RITR_LOOPBACK_GENERIC,
 };
 
 /* reg_ritr_loopback_protocol
@@ -5303,6 +5965,13 @@
  */
 MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
 
+/* reg_ritr_loopback_ipip_underlay_rif
+ * Underlay ingress router interface.
+ * Reserved for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16);
+
 /* reg_ritr_loopback_ipip_usip*
  * Encapsulation Underlay source IP.
  * Access: RW
@@ -5418,11 +6087,12 @@
 mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
 			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
 			    enum mlxsw_reg_ritr_loopback_ipip_options options,
-			    u16 uvr_id, u32 gre_key)
+			    u16 uvr_id, u16 underlay_rif, u32 gre_key)
 {
 	mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
 	mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
 	mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
+	mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif);
 	mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
 }
 
@@ -5430,12 +6100,12 @@
 mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
 			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
 			    enum mlxsw_reg_ritr_loopback_ipip_options options,
-			    u16 uvr_id, u32 usip, u32 gre_key)
+			    u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key)
 {
 	mlxsw_reg_ritr_loopback_protocol_set(payload,
 				    MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
 	mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
-						 uvr_id, gre_key);
+						 uvr_id, underlay_rif, gre_key);
 	mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
 }
 
@@ -6797,6 +7467,13 @@
  */
 MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
 
+/* reg_rtdp_egress_router_interface
+ * Underlay egress router interface.
+ * Valid range is from 0 to cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16);
+
 /* IPinIP */
 
 /* reg_rtdp_ipip_irif
@@ -7446,6 +8123,35 @@
 		*p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload);
 }
 
+/* FORE - Fan Out of Range Event Register
+ * --------------------------------------
+ * This register reports the status of the controlled fans compared to the
+ * range defined by the MFSL register.
+ */
+#define MLXSW_REG_FORE_ID 0x9007
+#define MLXSW_REG_FORE_LEN 0x0C
+
+MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN);
+
+/* fan_under_limit
+ * Fan speed is below the low limit defined in MFSL register. Each bit relates
+ * to a single tachometer and indicates the specific tachometer reading is
+ * below the threshold.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10);
+
+static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho,
+					 bool *fault)
+{
+	u16 limit;
+
+	if (fault) {
+		limit = mlxsw_reg_fore_fan_under_limit_get(payload);
+		*fault = limit & BIT(tacho);
+	}
+}
+
 /* MTCAP - Management Temperature Capabilities
  * -------------------------------------------
  * This register exposes the capabilities of the device and
@@ -7474,16 +8180,21 @@
 
 MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
 
+#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
+#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
 /* reg_mtmp_sensor_index
  * Sensors index to access.
  * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
  * (module 0 is mapped to sensor_index 64).
  * Access: Index
  */
-MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
 
 /* Convert to milli degrees Celsius */
-#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
+#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \
+					  ((v_) >= 0) ? ((v_) * 125) : \
+					  ((s16)((GENMASK(15, 0) + (v_) + 1) \
+					   * 125)); })
 
 /* reg_mtmp_temperature
  * Temperature reading from the sensor. Reading is in 0.125 Celsius
@@ -7542,7 +8253,7 @@
  */
 MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
 
-static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
 				       bool max_temp_enable,
 				       bool max_temp_reset)
 {
@@ -7554,11 +8265,10 @@
 						    MLXSW_REG_MTMP_THRESH_HI);
 }
 
-static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
-					 unsigned int *p_max_temp,
-					 char *sensor_name)
+static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
+					 int *p_max_temp, char *sensor_name)
 {
-	u16 temp;
+	s16 temp;
 
 	if (p_temp) {
 		temp = mlxsw_reg_mtmp_temperature_get(payload);
@@ -7572,6 +8282,80 @@
 		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }
 
+/* MTBR - Management Temperature Bulk Register
+ * -------------------------------------------
+ * This register is used for bulk temperature reading.
+ */
+#define MLXSW_REG_MTBR_ID 0x900F
+#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */
+#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */
+#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN +	\
+			    MLXSW_REG_MTBR_REC_LEN *	\
+			    MLXSW_REG_MTBR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
+
+/* reg_mtbr_base_sensor_index
+ * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors,
+ * 64-127 are mapped to the SFP+/QSFP modules sequentially).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12);
+
+/* reg_mtbr_num_rec
+ * Request: Number of records to read
+ * Response: Number of records read
+ * See above description for more details.
+ * Range 1..255
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8);
+
+/* reg_mtbr_rec_max_temp
+ * The highest measured temperature from the sensor.
+ * When the bit mte is cleared, the field max_temperature is reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
+		     16, MLXSW_REG_MTBR_REC_LEN, 0x00, false);
+
+/* reg_mtbr_rec_temp
+ * Temperature reading from the sensor. Reading is in 0..125 Celsius
+ * degrees units.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
+		     MLXSW_REG_MTBR_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
+				       u8 num_rec)
+{
+	MLXSW_REG_ZERO(mtbr, payload);
+	mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index);
+	mlxsw_reg_mtbr_num_rec_set(payload, num_rec);
+}
+
+/* Error codes from temperatute reading */
+enum mlxsw_reg_mtbr_temp_status {
+	MLXSW_REG_MTBR_NO_CONN		= 0x8000,
+	MLXSW_REG_MTBR_NO_TEMP_SENS	= 0x8001,
+	MLXSW_REG_MTBR_INDEX_NA		= 0x8002,
+	MLXSW_REG_MTBR_BAD_SENS_INFO	= 0x8003,
+};
+
+/* Base index for reading modules temperature */
+#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64
+
+static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind,
+					      u16 *p_temp, u16 *p_max_temp)
+{
+	if (p_temp)
+		*p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind);
+	if (p_max_temp)
+		*p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind);
+}
+
 /* MCIA - Management Cable Info Access
  * -----------------------------------
  * MCIA register is used to access the SFP+ and QSFP connector's EPROM.
@@ -7626,13 +8410,41 @@
  */
 MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
 
-#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48
+#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH	256
+#define MLXSW_REG_MCIA_EEPROM_SIZE		48
+#define MLXSW_REG_MCIA_I2C_ADDR_LOW		0x50
+#define MLXSW_REG_MCIA_I2C_ADDR_HIGH		0x51
+#define MLXSW_REG_MCIA_PAGE0_LO_OFF		0xa0
+#define MLXSW_REG_MCIA_TH_ITEM_SIZE		2
+#define MLXSW_REG_MCIA_TH_PAGE_NUM		3
+#define MLXSW_REG_MCIA_PAGE0_LO			0
+#define MLXSW_REG_MCIA_TH_PAGE_OFF		0x80
+
+enum mlxsw_reg_mcia_eeprom_module_info_rev_id {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC	= 0x00,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436	= 0x01,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636	= 0x03,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info_id {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP	= 0x03,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP	= 0x0C,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS	= 0x0D,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28	= 0x11,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD	= 0x18,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE,
+};
 
 /* reg_mcia_eeprom
  * Bytes to read/write.
  * Access: RW
  */
-MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
 
 static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
 				       u8 page_number, u16 device_addr,
@@ -7903,6 +8715,60 @@
 	mlxsw_reg_mpar_pa_id_set(payload, pa_id);
 }
 
+/* MGIR - Management General Information Register
+ * ----------------------------------------------
+ * MGIR register allows software to query the hardware and firmware general
+ * information.
+ */
+#define MLXSW_REG_MGIR_ID 0x9020
+#define MLXSW_REG_MGIR_LEN 0x9C
+
+MLXSW_REG_DEFINE(mgir, MLXSW_REG_MGIR_ID, MLXSW_REG_MGIR_LEN);
+
+/* reg_mgir_hw_info_device_hw_revision
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, hw_info_device_hw_revision, 0x0, 16, 16);
+
+#define MLXSW_REG_MGIR_FW_INFO_PSID_SIZE 16
+
+/* reg_mgir_fw_info_psid
+ * PSID (ASCII string).
+ * Access: RO
+ */
+MLXSW_ITEM_BUF(reg, mgir, fw_info_psid, 0x30, MLXSW_REG_MGIR_FW_INFO_PSID_SIZE);
+
+/* reg_mgir_fw_info_extended_major
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_major, 0x44, 0, 32);
+
+/* reg_mgir_fw_info_extended_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_minor, 0x48, 0, 32);
+
+/* reg_mgir_fw_info_extended_sub_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_sub_minor, 0x4C, 0, 32);
+
+static inline void mlxsw_reg_mgir_pack(char *payload)
+{
+	MLXSW_REG_ZERO(mgir, payload);
+}
+
+static inline void
+mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
+		      u32 *fw_major, u32 *fw_minor, u32 *fw_sub_minor)
+{
+	*hw_rev = mlxsw_reg_mgir_hw_info_device_hw_revision_get(payload);
+	mlxsw_reg_mgir_fw_info_psid_memcpy_from(payload, fw_info_psid);
+	*fw_major = mlxsw_reg_mgir_fw_info_extended_major_get(payload);
+	*fw_minor = mlxsw_reg_mgir_fw_info_extended_minor_get(payload);
+	*fw_sub_minor = mlxsw_reg_mgir_fw_info_extended_sub_minor_get(payload);
+}
+
 /* MRSR - Management Reset and Shutdown Register
  * ---------------------------------------------
  * MRSR register is used to reset or shutdown the switch or
@@ -7968,6 +8834,107 @@
 					   MLXSW_REG_MLCR_DURATION_MAX : 0);
 }
 
+/* MTPPS - Management Pulse Per Second Register
+ * --------------------------------------------
+ * This register provides the device PPS capabilities, configure the PPS in and
+ * out modules and holds the PPS in time stamp.
+ */
+#define MLXSW_REG_MTPPS_ID 0x9053
+#define MLXSW_REG_MTPPS_LEN 0x3C
+
+MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN);
+
+/* reg_mtpps_enable
+ * Enables the PPS functionality the specific pin.
+ * A boolean variable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1);
+
+enum mlxsw_reg_mtpps_pin_mode {
+	MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2,
+};
+
+/* reg_mtpps_pin_mode
+ * Pin mode to be used. The mode must comply with the supported modes of the
+ * requested pin.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4);
+
+#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN	7
+
+/* reg_mtpps_pin
+ * Pin to be configured or queried out of the supported pins.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8);
+
+/* reg_mtpps_time_stamp
+ * When pin_mode = pps_in, the latched device time when it was triggered from
+ * the external GPIO pin.
+ * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target
+ * time to generate next output signal.
+ * Time is in units of device clock.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64);
+
+static inline void
+mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp)
+{
+	MLXSW_REG_ZERO(mtpps, payload);
+	mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN);
+	mlxsw_reg_mtpps_pin_mode_set(payload,
+				     MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN);
+	mlxsw_reg_mtpps_enable_set(payload, true);
+	mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp);
+}
+
+/* MTUTC - Management UTC Register
+ * -------------------------------
+ * Configures the HW UTC counter.
+ */
+#define MLXSW_REG_MTUTC_ID 0x9055
+#define MLXSW_REG_MTUTC_LEN 0x1C
+
+MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN);
+
+enum mlxsw_reg_mtutc_operation {
+	MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0,
+	MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3,
+};
+
+/* reg_mtutc_operation
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4);
+
+/* reg_mtutc_freq_adjustment
+ * Frequency adjustment: Every PPS the HW frequency will be
+ * adjusted by this value. Units of HW clock, where HW counts
+ * 10^9 HW clocks for 1 HW second.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32);
+
+/* reg_mtutc_utc_sec
+ * UTC seconds.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32);
+
+static inline void
+mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper,
+		     u32 freq_adj, u32 utc_sec)
+{
+	MLXSW_REG_ZERO(mtutc, payload);
+	mlxsw_reg_mtutc_operation_set(payload, oper);
+	mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj);
+	mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec);
+}
+
 /* MCQI - Management Component Query Information
  * ---------------------------------------------
  * This register allows querying information about firmware components.
@@ -8279,6 +9246,769 @@
 	mlxsw_reg_mgpc_opcode_set(payload, opcode);
 }
 
+/* MPRS - Monitoring Parsing State Register
+ * ----------------------------------------
+ * The MPRS register is used for setting up the parsing for hash,
+ * policy-engine and routing.
+ */
+#define MLXSW_REG_MPRS_ID 0x9083
+#define MLXSW_REG_MPRS_LEN 0x14
+
+MLXSW_REG_DEFINE(mprs, MLXSW_REG_MPRS_ID, MLXSW_REG_MPRS_LEN);
+
+/* reg_mprs_parsing_depth
+ * Minimum parsing depth.
+ * Need to enlarge parsing depth according to L3, MPLS, tunnels, ACL
+ * rules, traps, hash, etc. Default is 96 bytes. Reserved when SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, parsing_depth, 0x00, 0, 16);
+
+/* reg_mprs_parsing_en
+ * Parsing enable.
+ * Bit 0 - Enable parsing of NVE of types VxLAN, VxLAN-GPE, GENEVE and
+ * NVGRE. Default is enabled. Reserved when SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, parsing_en, 0x04, 0, 16);
+
+/* reg_mprs_vxlan_udp_dport
+ * VxLAN UDP destination port.
+ * Used for identifying VxLAN packets and for dport field in
+ * encapsulation. Default is 4789.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, vxlan_udp_dport, 0x10, 0, 16);
+
+static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
+				       u16 vxlan_udp_dport)
+{
+	MLXSW_REG_ZERO(mprs, payload);
+	mlxsw_reg_mprs_parsing_depth_set(payload, parsing_depth);
+	mlxsw_reg_mprs_parsing_en_set(payload, true);
+	mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
+}
+
+/* MOGCR - Monitoring Global Configuration Register
+ * ------------------------------------------------
+ */
+#define MLXSW_REG_MOGCR_ID 0x9086
+#define MLXSW_REG_MOGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN);
+
+/* reg_mogcr_ptp_iftc
+ * PTP Ingress FIFO Trap Clear
+ * The PTP_ING_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
+
+/* reg_mogcr_ptp_eftc
+ * PTP Egress FIFO Trap Clear
+ * The PTP_EGR_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);
+
+/* MTPPPC - Time Precision Packet Port Configuration
+ * -------------------------------------------------
+ * This register serves for configuration of which PTP messages should be
+ * timestamped. This is a global configuration, despite the register name.
+ *
+ * Reserved when Spectrum-2.
+ */
+#define MLXSW_REG_MTPPPC_ID 0x9090
+#define MLXSW_REG_MTPPPC_LEN 0x28
+
+MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN);
+
+/* reg_mtpppc_ing_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at ingress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16);
+
+/* reg_mtpppc_egr_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at egress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr)
+{
+	MLXSW_REG_ZERO(mtpppc, payload);
+	mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing);
+	mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
+}
+
+/* MTPPTR - Time Precision Packet Timestamping Reading
+ * ---------------------------------------------------
+ * The MTPPTR is used for reading the per port PTP timestamp FIFO.
+ * There is a trap for packets which are latched to the timestamp FIFO, thus the
+ * SW knows which FIFO to read. Note that packets enter the FIFO before been
+ * trapped. The sequence number is used to synchronize the timestamp FIFO
+ * entries and the trapped packets.
+ * Reserved when Spectrum-2.
+ */
+
+#define MLXSW_REG_MTPPTR_ID 0x9091
+#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4
+#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN +		\
+		    MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN);
+
+/* reg_mtpptr_local_port
+ * Not supported for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mtpptr_dir {
+	MLXSW_REG_MTPPTR_DIR_INGRESS,
+	MLXSW_REG_MTPPTR_DIR_EGRESS,
+};
+
+/* reg_mtpptr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1);
+
+/* reg_mtpptr_clr
+ * Clear the records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1);
+
+/* reg_mtpptr_num_rec
+ * Number of valid records in the response
+ * Range 0.. cap_ptp_timestamp_fifo
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4);
+
+/* reg_mtpptr_rec_message_type
+ * MessageType field as defined by IEEE 1588 Each bit corresponds to a value
+ * (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 8, 4,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_domain_number
+ * DomainNumber field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 8,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_sequence_id
+ * SequenceId field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 16,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0x4, false);
+
+/* reg_mtpptr_rec_timestamp_high
+ * Timestamp of when the PTP packet has passed through the port Units of PLL
+ * clock time.
+ * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0x8, false);
+
+/* reg_mtpptr_rec_timestamp_low
+ * See rec_timestamp_high.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low,
+		     MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+		     MLXSW_REG_MTPPTR_REC_LEN, 0xC, false);
+
+static inline void mlxsw_reg_mtpptr_unpack(const char *payload,
+					   unsigned int rec,
+					   u8 *p_message_type,
+					   u8 *p_domain_number,
+					   u16 *p_sequence_id,
+					   u64 *p_timestamp)
+{
+	u32 timestamp_high, timestamp_low;
+
+	*p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec);
+	*p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec);
+	*p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec);
+	timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec);
+	timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec);
+	*p_timestamp = (u64)timestamp_high << 32 | timestamp_low;
+}
+
+/* MTPTPT - Monitoring Precision Time Protocol Trap Register
+ * ---------------------------------------------------------
+ * This register is used for configuring under which trap to deliver PTP
+ * packets depending on type of the packet.
+ */
+#define MLXSW_REG_MTPTPT_ID 0x9092
+#define MLXSW_REG_MTPTPT_LEN 0x08
+
+MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN);
+
+enum mlxsw_reg_mtptpt_trap_id {
+	MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+	MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+};
+
+/* reg_mtptpt_trap_id
+ * Trap id.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4);
+
+/* reg_mtptpt_message_type
+ * Bitwise vector of PTP message types to trap. This is a necessary but
+ * non-sufficient condition since need to enable also per port. See MTPPPC.
+ * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g.
+ * Bit0: Sync, Bit1: Delay_Req)
+ */
+MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mtptptp_pack(char *payload,
+					  enum mlxsw_reg_mtptpt_trap_id trap_id,
+					  u16 message_type)
+{
+	MLXSW_REG_ZERO(mtptpt, payload);
+	mlxsw_reg_mtptpt_trap_id_set(payload, trap_id);
+	mlxsw_reg_mtptpt_message_type_set(payload, message_type);
+}
+
+/* MGPIR - Management General Peripheral Information Register
+ * ----------------------------------------------------------
+ * MGPIR register allows software to query the hardware and
+ * firmware general information of peripheral entities.
+ */
+#define MLXSW_REG_MGPIR_ID 0x9100
+#define MLXSW_REG_MGPIR_LEN 0xA0
+
+MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN);
+
+enum mlxsw_reg_mgpir_device_type {
+	MLXSW_REG_MGPIR_DEVICE_TYPE_NONE,
+	MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
+};
+
+/* device_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4);
+
+/* devices_per_flash
+ * Number of devices of device_type per flash (can be shared by few devices).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
+
+/* num_of_devices
+ * Number of devices of device_type.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+
+static inline void mlxsw_reg_mgpir_pack(char *payload)
+{
+	MLXSW_REG_ZERO(mgpir, payload);
+}
+
+static inline void
+mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
+		       enum mlxsw_reg_mgpir_device_type *device_type,
+		       u8 *devices_per_flash)
+{
+	if (num_of_devices)
+		*num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
+	if (device_type)
+		*device_type = mlxsw_reg_mgpir_device_type_get(payload);
+	if (devices_per_flash)
+		*devices_per_flash =
+				mlxsw_reg_mgpir_devices_per_flash_get(payload);
+}
+
+/* TNGCR - Tunneling NVE General Configuration Register
+ * ----------------------------------------------------
+ * The TNGCR register is used for setting up the NVE Tunneling configuration.
+ */
+#define MLXSW_REG_TNGCR_ID 0xA001
+#define MLXSW_REG_TNGCR_LEN 0x44
+
+MLXSW_REG_DEFINE(tngcr, MLXSW_REG_TNGCR_ID, MLXSW_REG_TNGCR_LEN);
+
+enum mlxsw_reg_tngcr_type {
+	MLXSW_REG_TNGCR_TYPE_VXLAN,
+	MLXSW_REG_TNGCR_TYPE_VXLAN_GPE,
+	MLXSW_REG_TNGCR_TYPE_GENEVE,
+	MLXSW_REG_TNGCR_TYPE_NVGRE,
+};
+
+/* reg_tngcr_type
+ * Tunnel type for encapsulation and decapsulation. The types are mutually
+ * exclusive.
+ * Note: For Spectrum the NVE parsing must be enabled in MPRS.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, type, 0x00, 0, 4);
+
+/* reg_tngcr_nve_valid
+ * The VTEP is valid. Allows adding FDB entries for tunnel encapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_valid, 0x04, 31, 1);
+
+/* reg_tngcr_nve_ttl_uc
+ * The TTL for NVE tunnel encapsulation underlay unicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_ttl_uc, 0x04, 0, 8);
+
+/* reg_tngcr_nve_ttl_mc
+ * The TTL for NVE tunnel encapsulation underlay multicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_ttl_mc, 0x08, 0, 8);
+
+enum {
+	/* Do not copy flow label. Calculate flow label using nve_flh. */
+	MLXSW_REG_TNGCR_FL_NO_COPY,
+	/* Copy flow label from inner packet if packet is IPv6 and
+	 * encapsulation is by IPv6. Otherwise, calculate flow label using
+	 * nve_flh.
+	 */
+	MLXSW_REG_TNGCR_FL_COPY,
+};
+
+/* reg_tngcr_nve_flc
+ * For NVE tunnel encapsulation: Flow label copy from inner packet.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_flc, 0x0C, 25, 1);
+
+enum {
+	/* Flow label is static. In Spectrum this means '0'. Spectrum-2
+	 * uses {nve_fl_prefix, nve_fl_suffix}.
+	 */
+	MLXSW_REG_TNGCR_FL_NO_HASH,
+	/* 8 LSBs of the flow label are calculated from ECMP hash of the
+	 * inner packet. 12 MSBs are configured by nve_fl_prefix.
+	 */
+	MLXSW_REG_TNGCR_FL_HASH,
+};
+
+/* reg_tngcr_nve_flh
+ * NVE flow label hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_flh, 0x0C, 24, 1);
+
+/* reg_tngcr_nve_fl_prefix
+ * NVE flow label prefix. Constant 12 MSBs of the flow label.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_fl_prefix, 0x0C, 8, 12);
+
+/* reg_tngcr_nve_fl_suffix
+ * NVE flow label suffix. Constant 8 LSBs of the flow label.
+ * Reserved when nve_flh=1 and for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_fl_suffix, 0x0C, 0, 8);
+
+enum {
+	/* Source UDP port is fixed (default '0') */
+	MLXSW_REG_TNGCR_UDP_SPORT_NO_HASH,
+	/* Source UDP port is calculated based on hash */
+	MLXSW_REG_TNGCR_UDP_SPORT_HASH,
+};
+
+/* reg_tngcr_nve_udp_sport_type
+ * NVE UDP source port type.
+ * Spectrum uses LAG hash (SLCRv2). Spectrum-2 uses ECMP hash (RECRv2).
+ * When the source UDP port is calculated based on hash, then the 8 LSBs
+ * are calculated from hash the 8 MSBs are configured by
+ * nve_udp_sport_prefix.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_udp_sport_type, 0x10, 24, 1);
+
+/* reg_tngcr_nve_udp_sport_prefix
+ * NVE UDP source port prefix. Constant 8 MSBs of the UDP source port.
+ * Reserved when NVE type is NVGRE.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_udp_sport_prefix, 0x10, 8, 8);
+
+/* reg_tngcr_nve_group_size_mc
+ * The amount of sequential linked lists of MC entries. The first linked
+ * list is configured by SFD.underlay_mc_ptr.
+ * Valid values: 1, 2, 4, 8, 16, 32, 64
+ * The linked list are configured by TNUMT.
+ * The hash is set by LAG hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_group_size_mc, 0x18, 0, 8);
+
+/* reg_tngcr_nve_group_size_flood
+ * The amount of sequential linked lists of flooding entries. The first
+ * linked list is configured by SFMR.nve_tunnel_flood_ptr
+ * Valid values: 1, 2, 4, 8, 16, 32, 64
+ * The linked list are configured by TNUMT.
+ * The hash is set by LAG hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_group_size_flood, 0x1C, 0, 8);
+
+/* reg_tngcr_learn_enable
+ * During decapsulation, whether to learn from NVE port.
+ * Reserved when Spectrum-2. See TNPC.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, learn_enable, 0x20, 31, 1);
+
+/* reg_tngcr_underlay_virtual_router
+ * Underlay virtual router.
+ * Reserved when Spectrum-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, underlay_virtual_router, 0x20, 0, 16);
+
+/* reg_tngcr_underlay_rif
+ * Underlay ingress router interface. RIF type should be loopback generic.
+ * Reserved when Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, underlay_rif, 0x24, 0, 16);
+
+/* reg_tngcr_usipv4
+ * Underlay source IPv4 address of the NVE.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, usipv4, 0x28, 0, 32);
+
+/* reg_tngcr_usipv6
+ * Underlay source IPv6 address of the NVE. For Spectrum, must not be
+ * modified under traffic of NVE tunneling encapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, tngcr, usipv6, 0x30, 16);
+
+static inline void mlxsw_reg_tngcr_pack(char *payload,
+					enum mlxsw_reg_tngcr_type type,
+					bool valid, u8 ttl)
+{
+	MLXSW_REG_ZERO(tngcr, payload);
+	mlxsw_reg_tngcr_type_set(payload, type);
+	mlxsw_reg_tngcr_nve_valid_set(payload, valid);
+	mlxsw_reg_tngcr_nve_ttl_uc_set(payload, ttl);
+	mlxsw_reg_tngcr_nve_ttl_mc_set(payload, ttl);
+	mlxsw_reg_tngcr_nve_flc_set(payload, MLXSW_REG_TNGCR_FL_NO_COPY);
+	mlxsw_reg_tngcr_nve_flh_set(payload, 0);
+	mlxsw_reg_tngcr_nve_udp_sport_type_set(payload,
+					       MLXSW_REG_TNGCR_UDP_SPORT_HASH);
+	mlxsw_reg_tngcr_nve_udp_sport_prefix_set(payload, 0);
+	mlxsw_reg_tngcr_nve_group_size_mc_set(payload, 1);
+	mlxsw_reg_tngcr_nve_group_size_flood_set(payload, 1);
+}
+
+/* TNUMT - Tunneling NVE Underlay Multicast Table Register
+ * -------------------------------------------------------
+ * The TNUMT register is for building the underlay MC table. It is used
+ * for MC, flooding and BC traffic into the NVE tunnel.
+ */
+#define MLXSW_REG_TNUMT_ID 0xA003
+#define MLXSW_REG_TNUMT_LEN 0x20
+
+MLXSW_REG_DEFINE(tnumt, MLXSW_REG_TNUMT_ID, MLXSW_REG_TNUMT_LEN);
+
+enum mlxsw_reg_tnumt_record_type {
+	MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+	MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+	MLXSW_REG_TNUMT_RECORD_TYPE_LABEL,
+};
+
+/* reg_tnumt_record_type
+ * Record type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, record_type, 0x00, 28, 4);
+
+enum mlxsw_reg_tnumt_tunnel_port {
+	MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+	MLXSW_REG_TNUMT_TUNNEL_PORT_VPLS,
+	MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL0,
+	MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL1,
+};
+
+/* reg_tnumt_tunnel_port
+ * Tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, tunnel_port, 0x00, 24, 4);
+
+/* reg_tnumt_underlay_mc_ptr
+ * Index to the underlay multicast table.
+ * For Spectrum the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnumt, underlay_mc_ptr, 0x00, 0, 24);
+
+/* reg_tnumt_vnext
+ * The next_underlay_mc_ptr is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, vnext, 0x04, 31, 1);
+
+/* reg_tnumt_next_underlay_mc_ptr
+ * The next index to the underlay multicast table.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, next_underlay_mc_ptr, 0x04, 0, 24);
+
+/* reg_tnumt_record_size
+ * Number of IP addresses in the record.
+ * Range is 1..cap_max_nve_mc_entries_ipv{4,6}
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, record_size, 0x08, 0, 3);
+
+/* reg_tnumt_udip
+ * The underlay IPv4 addresses. udip[i] is reserved if i >= size
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, tnumt, udip, 0x0C, 0, 32, 0x04, 0x00, false);
+
+/* reg_tnumt_udip_ptr
+ * The pointer to the underlay IPv6 addresses. udip_ptr[i] is reserved if
+ * i >= size. The IPv6 addresses are configured by RIPS.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, tnumt, udip_ptr, 0x0C, 0, 24, 0x04, 0x00, false);
+
+static inline void mlxsw_reg_tnumt_pack(char *payload,
+					enum mlxsw_reg_tnumt_record_type type,
+					enum mlxsw_reg_tnumt_tunnel_port tport,
+					u32 underlay_mc_ptr, bool vnext,
+					u32 next_underlay_mc_ptr,
+					u8 record_size)
+{
+	MLXSW_REG_ZERO(tnumt, payload);
+	mlxsw_reg_tnumt_record_type_set(payload, type);
+	mlxsw_reg_tnumt_tunnel_port_set(payload, tport);
+	mlxsw_reg_tnumt_underlay_mc_ptr_set(payload, underlay_mc_ptr);
+	mlxsw_reg_tnumt_vnext_set(payload, vnext);
+	mlxsw_reg_tnumt_next_underlay_mc_ptr_set(payload, next_underlay_mc_ptr);
+	mlxsw_reg_tnumt_record_size_set(payload, record_size);
+}
+
+/* TNQCR - Tunneling NVE QoS Configuration Register
+ * ------------------------------------------------
+ * The TNQCR register configures how QoS is set in encapsulation into the
+ * underlay network.
+ */
+#define MLXSW_REG_TNQCR_ID 0xA010
+#define MLXSW_REG_TNQCR_LEN 0x0C
+
+MLXSW_REG_DEFINE(tnqcr, MLXSW_REG_TNQCR_ID, MLXSW_REG_TNQCR_LEN);
+
+/* reg_tnqcr_enc_set_dscp
+ * For encapsulation: How to set DSCP field:
+ * 0 - Copy the DSCP from the overlay (inner) IP header to the underlay
+ * (outer) IP header. If there is no IP header, use TNQDR.dscp
+ * 1 - Set the DSCP field as TNQDR.dscp
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnqcr, enc_set_dscp, 0x04, 28, 1);
+
+static inline void mlxsw_reg_tnqcr_pack(char *payload)
+{
+	MLXSW_REG_ZERO(tnqcr, payload);
+	mlxsw_reg_tnqcr_enc_set_dscp_set(payload, 0);
+}
+
+/* TNQDR - Tunneling NVE QoS Default Register
+ * ------------------------------------------
+ * The TNQDR register configures the default QoS settings for NVE
+ * encapsulation.
+ */
+#define MLXSW_REG_TNQDR_ID 0xA011
+#define MLXSW_REG_TNQDR_LEN 0x08
+
+MLXSW_REG_DEFINE(tnqdr, MLXSW_REG_TNQDR_ID, MLXSW_REG_TNQDR_LEN);
+
+/* reg_tnqdr_local_port
+ * Local port number (receive port). CPU port is supported.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnqdr, local_port, 0x00, 16, 8);
+
+/* reg_tnqdr_dscp
+ * For encapsulation, the default DSCP.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnqdr, dscp, 0x04, 0, 6);
+
+static inline void mlxsw_reg_tnqdr_pack(char *payload, u8 local_port)
+{
+	MLXSW_REG_ZERO(tnqdr, payload);
+	mlxsw_reg_tnqdr_local_port_set(payload, local_port);
+	mlxsw_reg_tnqdr_dscp_set(payload, 0);
+}
+
+/* TNEEM - Tunneling NVE Encapsulation ECN Mapping Register
+ * --------------------------------------------------------
+ * The TNEEM register maps ECN of the IP header at the ingress to the
+ * encapsulation to the ECN of the underlay network.
+ */
+#define MLXSW_REG_TNEEM_ID 0xA012
+#define MLXSW_REG_TNEEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tneem, MLXSW_REG_TNEEM_ID, MLXSW_REG_TNEEM_LEN);
+
+/* reg_tneem_overlay_ecn
+ * ECN of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tneem, overlay_ecn, 0x04, 24, 2);
+
+/* reg_tneem_underlay_ecn
+ * ECN of the IP header in the underlay network.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tneem, underlay_ecn, 0x04, 16, 2);
+
+static inline void mlxsw_reg_tneem_pack(char *payload, u8 overlay_ecn,
+					u8 underlay_ecn)
+{
+	MLXSW_REG_ZERO(tneem, payload);
+	mlxsw_reg_tneem_overlay_ecn_set(payload, overlay_ecn);
+	mlxsw_reg_tneem_underlay_ecn_set(payload, underlay_ecn);
+}
+
+/* TNDEM - Tunneling NVE Decapsulation ECN Mapping Register
+ * --------------------------------------------------------
+ * The TNDEM register configures the actions that are done in the
+ * decapsulation.
+ */
+#define MLXSW_REG_TNDEM_ID 0xA013
+#define MLXSW_REG_TNDEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tndem, MLXSW_REG_TNDEM_ID, MLXSW_REG_TNDEM_LEN);
+
+/* reg_tndem_underlay_ecn
+ * ECN field of the IP header in the underlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tndem, underlay_ecn, 0x04, 24, 2);
+
+/* reg_tndem_overlay_ecn
+ * ECN field of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tndem, overlay_ecn, 0x04, 16, 2);
+
+/* reg_tndem_eip_ecn
+ * Egress IP ECN. ECN field of the IP header of the packet which goes out
+ * from the decapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, eip_ecn, 0x04, 8, 2);
+
+/* reg_tndem_trap_en
+ * Trap enable:
+ * 0 - No trap due to decap ECN
+ * 1 - Trap enable with trap_id
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, trap_en, 0x08, 28, 4);
+
+/* reg_tndem_trap_id
+ * Trap ID. Either DECAP_ECN0 or DECAP_ECN1.
+ * Reserved when trap_en is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, trap_id, 0x08, 0, 9);
+
+static inline void mlxsw_reg_tndem_pack(char *payload, u8 underlay_ecn,
+					u8 overlay_ecn, u8 ecn, bool trap_en,
+					u16 trap_id)
+{
+	MLXSW_REG_ZERO(tndem, payload);
+	mlxsw_reg_tndem_underlay_ecn_set(payload, underlay_ecn);
+	mlxsw_reg_tndem_overlay_ecn_set(payload, overlay_ecn);
+	mlxsw_reg_tndem_eip_ecn_set(payload, ecn);
+	mlxsw_reg_tndem_trap_en_set(payload, trap_en);
+	mlxsw_reg_tndem_trap_id_set(payload, trap_id);
+}
+
+/* TNPC - Tunnel Port Configuration Register
+ * -----------------------------------------
+ * The TNPC register is used for tunnel port configuration.
+ * Reserved when Spectrum.
+ */
+#define MLXSW_REG_TNPC_ID 0xA020
+#define MLXSW_REG_TNPC_LEN 0x18
+
+MLXSW_REG_DEFINE(tnpc, MLXSW_REG_TNPC_ID, MLXSW_REG_TNPC_LEN);
+
+enum mlxsw_reg_tnpc_tunnel_port {
+	MLXSW_REG_TNPC_TUNNEL_PORT_NVE,
+	MLXSW_REG_TNPC_TUNNEL_PORT_VPLS,
+	MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL0,
+	MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL1,
+};
+
+/* reg_tnpc_tunnel_port
+ * Tunnel port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnpc, tunnel_port, 0x00, 0, 4);
+
+/* reg_tnpc_learn_enable_v6
+ * During IPv6 underlay decapsulation, whether to learn from tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnpc, learn_enable_v6, 0x04, 1, 1);
+
+/* reg_tnpc_learn_enable_v4
+ * During IPv4 underlay decapsulation, whether to learn from tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnpc, learn_enable_v4, 0x04, 0, 1);
+
+static inline void mlxsw_reg_tnpc_pack(char *payload,
+				       enum mlxsw_reg_tnpc_tunnel_port tport,
+				       bool learn_enable)
+{
+	MLXSW_REG_ZERO(tnpc, payload);
+	mlxsw_reg_tnpc_tunnel_port_set(payload, tport);
+	mlxsw_reg_tnpc_learn_enable_v4_set(payload, learn_enable);
+	mlxsw_reg_tnpc_learn_enable_v6_set(payload, learn_enable);
+}
+
 /* TIGCR - Tunneling IPinIP General Configuration Register
  * -------------------------------------------------------
  * The TIGCR register is used for setting up the IPinIP Tunnel configuration.
@@ -8336,8 +10066,15 @@
  */
 MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4);
 
+/* reg_sbpr_infi_size
+ * Size is infinite.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1);
+
 /* reg_sbpr_size
  * Pool size in buffer cells.
+ * Reserved when infi_size = 1.
  * Access: RW
  */
 MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24);
@@ -8355,13 +10092,15 @@
 
 static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool,
 				       enum mlxsw_reg_sbxx_dir dir,
-				       enum mlxsw_reg_sbpr_mode mode, u32 size)
+				       enum mlxsw_reg_sbpr_mode mode, u32 size,
+				       bool infi_size)
 {
 	MLXSW_REG_ZERO(sbpr, payload);
 	mlxsw_reg_sbpr_pool_set(payload, pool);
 	mlxsw_reg_sbpr_dir_set(payload, dir);
 	mlxsw_reg_sbpr_mode_set(payload, mode);
 	mlxsw_reg_sbpr_size_set(payload, size);
+	mlxsw_reg_sbpr_infi_size_set(payload, infi_size);
 }
 
 /* SBCM - Shared Buffer Class Management Register
@@ -8409,6 +10148,12 @@
 #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
 #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14
 
+/* reg_sbcm_infi_max
+ * Max buffer is infinite.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1);
+
 /* reg_sbcm_max_buff
  * When the pool associated to the port-pg/tclass is configured to
  * static, Maximum buffer size for the limiter configured in cells.
@@ -8418,6 +10163,7 @@
  * 0: 0
  * i: (1/128)*2^(i-1), for i=1..14
  * 0xFF: Infinity
+ * Reserved when infi_max = 1.
  * Access: RW
  */
 MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24);
@@ -8430,7 +10176,8 @@
 
 static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
 				       enum mlxsw_reg_sbxx_dir dir,
-				       u32 min_buff, u32 max_buff, u8 pool)
+				       u32 min_buff, u32 max_buff,
+				       bool infi_max, u8 pool)
 {
 	MLXSW_REG_ZERO(sbcm, payload);
 	mlxsw_reg_sbcm_local_port_set(payload, local_port);
@@ -8438,6 +10185,7 @@
 	mlxsw_reg_sbcm_dir_set(payload, dir);
 	mlxsw_reg_sbcm_min_buff_set(payload, min_buff);
 	mlxsw_reg_sbcm_max_buff_set(payload, max_buff);
+	mlxsw_reg_sbcm_infi_max_set(payload, infi_max);
 	mlxsw_reg_sbcm_pool_set(payload, pool);
 }
 
@@ -8748,8 +10496,10 @@
 	MLXSW_REG(ppbs),
 	MLXSW_REG(prcr),
 	MLXSW_REG(pefa),
+	MLXSW_REG(pemrbt),
 	MLXSW_REG(ptce2),
 	MLXSW_REG(perpt),
+	MLXSW_REG(peabfe),
 	MLXSW_REG(perar),
 	MLXSW_REG(ptce3),
 	MLXSW_REG(percr),
@@ -8763,6 +10513,7 @@
 	MLXSW_REG(qpdsm),
 	MLXSW_REG(qpdpm),
 	MLXSW_REG(qtctm),
+	MLXSW_REG(qpsc),
 	MLXSW_REG(pmlp),
 	MLXSW_REG(pmtu),
 	MLXSW_REG(ptys),
@@ -8774,6 +10525,7 @@
 	MLXSW_REG(pptb),
 	MLXSW_REG(pbmc),
 	MLXSW_REG(pspa),
+	MLXSW_REG(pplr),
 	MLXSW_REG(htgt),
 	MLXSW_REG(hpkt),
 	MLXSW_REG(rgcr),
@@ -8798,18 +10550,36 @@
 	MLXSW_REG(mfsc),
 	MLXSW_REG(mfsm),
 	MLXSW_REG(mfsl),
+	MLXSW_REG(fore),
 	MLXSW_REG(mtcap),
 	MLXSW_REG(mtmp),
+	MLXSW_REG(mtbr),
 	MLXSW_REG(mcia),
 	MLXSW_REG(mpat),
 	MLXSW_REG(mpar),
+	MLXSW_REG(mgir),
 	MLXSW_REG(mrsr),
 	MLXSW_REG(mlcr),
+	MLXSW_REG(mtpps),
+	MLXSW_REG(mtutc),
 	MLXSW_REG(mpsc),
 	MLXSW_REG(mcqi),
 	MLXSW_REG(mcc),
 	MLXSW_REG(mcda),
 	MLXSW_REG(mgpc),
+	MLXSW_REG(mprs),
+	MLXSW_REG(mogcr),
+	MLXSW_REG(mtpppc),
+	MLXSW_REG(mtpptr),
+	MLXSW_REG(mtptpt),
+	MLXSW_REG(mgpir),
+	MLXSW_REG(tngcr),
+	MLXSW_REG(tnumt),
+	MLXSW_REG(tnqcr),
+	MLXSW_REG(tnqdr),
+	MLXSW_REG(tneem),
+	MLXSW_REG(tndem),
+	MLXSW_REG(tnpc),
 	MLXSW_REG(tigcr),
 	MLXSW_REG(sbpr),
 	MLXSW_REG(sbcm),

diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 79a31de..33a9fc9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h

@@ -24,8 +24,11 @@
 	MLXSW_RES_ID_MAX_SYSTEM_PORT,
 	MLXSW_RES_ID_MAX_LAG,
 	MLXSW_RES_ID_MAX_LAG_MEMBERS,
+	MLXSW_RES_ID_LOCAL_PORTS_IN_1X,
+	MLXSW_RES_ID_LOCAL_PORTS_IN_2X,
 	MLXSW_RES_ID_MAX_BUFFER_SIZE,
 	MLXSW_RES_ID_CELL_SIZE,
+	MLXSW_RES_ID_MAX_HEADROOM_SIZE,
 	MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS,
 	MLXSW_RES_ID_ACL_MAX_TCAM_RULES,
 	MLXSW_RES_ID_ACL_MAX_REGIONS,
@@ -41,11 +44,14 @@
 	MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB,
 	MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB,
 	MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB,
+	MLXSW_RES_ID_ACL_MAX_BF_LOG,
 	MLXSW_RES_ID_MAX_CPU_POLICERS,
 	MLXSW_RES_ID_MAX_VRS,
 	MLXSW_RES_ID_MAX_RIFS,
 	MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
 	MLXSW_RES_ID_MAX_LPM_TREES,
+	MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4,
+	MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6,
 
 	/* Internal resources.
 	 * Determined by the SW, not queried from the HW.
@@ -74,8 +80,11 @@
 	[MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502,
 	[MLXSW_RES_ID_MAX_LAG] = 0x2520,
 	[MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+	[MLXSW_RES_ID_LOCAL_PORTS_IN_1X] = 0x2610,
+	[MLXSW_RES_ID_LOCAL_PORTS_IN_2X] = 0x2611,
 	[MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,	/* Bytes */
 	[MLXSW_RES_ID_CELL_SIZE] = 0x2803,	/* Bytes */
+	[MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811,	/* Bytes */
 	[MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901,
 	[MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902,
 	[MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903,
@@ -91,11 +100,14 @@
 	[MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951,
 	[MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952,
 	[MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953,
+	[MLXSW_RES_ID_ACL_MAX_BF_LOG] = 0x2960,
 	[MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
 	[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
 	[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
 	[MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
 	[MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
+	[MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4] = 0x2E02,
+	[MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6] = 0x2E03,
 };
 
 struct mlxsw_res {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ada644d..dcf9562 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

@@ -21,6 +21,7 @@
 #include <linux/dcbnl.h>
 #include <linux/inetdevice.h>
 #include <linux/netlink.h>
+#include <linux/jhash.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
@@ -31,6 +32,7 @@
 #include "spectrum.h"
 #include "pci.h"
 #include "core.h"
+#include "core_env.h"
 #include "reg.h"
 #include "port.h"
 #include "trap.h"
@@ -39,13 +41,14 @@
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
 #include "spectrum_span.h"
+#include "spectrum_ptp.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP1_FWREV_MAJOR 13
-#define MLXSW_SP1_FWREV_MINOR 1703
-#define MLXSW_SP1_FWREV_SUBMINOR 4
+#define MLXSW_SP1_FWREV_MINOR 2000
+#define MLXSW_SP1_FWREV_SUBMINOR 1886
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -62,8 +65,16 @@
 
 static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
+static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3";
 static const char mlxsw_sp_driver_version[] = "1.0";
 
+static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = {
+	0xff, 0xff, 0xff, 0xff, 0xfc, 0x00
+};
+static const unsigned char mlxsw_sp2_mac_mask[ETH_ALEN] = {
+	0xff, 0xff, 0xff, 0xff, 0xf0, 0x00
+};
+
 /* tx_hdr_version
  * Tx header version.
  * Must be set to 1.
@@ -137,6 +148,39 @@
 	struct mlxsw_sp *mlxsw_sp;
 };
 
+struct mlxsw_sp_ptp_ops {
+	struct mlxsw_sp_ptp_clock *
+		(*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+	void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
+
+	struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp);
+	void (*fini)(struct mlxsw_sp_ptp_state *ptp_state);
+
+	/* Notify a driver that a packet that might be PTP was received. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			u8 local_port);
+
+	/* Notify a driver that a timestamped packet was transmitted. Driver
+	 * is responsible for freeing the passed-in SKB.
+	 */
+	void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			    u8 local_port);
+
+	int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct hwtstamp_config *config);
+	int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct hwtstamp_config *config);
+	void (*shaper_work)(struct work_struct *work);
+	int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp,
+			   struct ethtool_ts_info *info);
+	int (*get_stats_count)(void);
+	void (*get_stats_strings)(u8 **p);
+	void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
+			  u64 *data, int data_index);
+};
+
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
 				    u16 component_index, u32 *p_max_size,
 				    u8 *p_align_bits, u16 *p_max_write_size)
@@ -285,6 +329,19 @@
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
 }
 
+static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev,
+				   const char *msg, const char *comp_name,
+				   u32 done_bytes, u32 total_bytes)
+{
+	struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+		container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+
+	devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core),
+					   msg, comp_name,
+					   done_bytes, total_bytes);
+}
+
 static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
 	.component_query	= mlxsw_sp_component_query,
 	.fsm_lock		= mlxsw_sp_fsm_lock,
@@ -294,11 +351,13 @@
 	.fsm_activate		= mlxsw_sp_fsm_activate,
 	.fsm_query_state	= mlxsw_sp_fsm_query_state,
 	.fsm_cancel		= mlxsw_sp_fsm_cancel,
-	.fsm_release		= mlxsw_sp_fsm_release
+	.fsm_release		= mlxsw_sp_fsm_release,
+	.status_notify		= mlxsw_sp_status_notify,
 };
 
 static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
-				   const struct firmware *firmware)
+				   const struct firmware *firmware,
+				   struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = {
 		.mlxfw_dev = {
@@ -308,8 +367,16 @@
 		},
 		.mlxsw_sp = mlxsw_sp
 	};
+	int err;
 
-	return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware);
+	mlxsw_core_fw_flash_start(mlxsw_sp->core);
+	devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core));
+	err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev,
+				   firmware, extack);
+	devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core));
+	mlxsw_core_fw_flash_end(mlxsw_sp->core);
+
+	return err;
 }
 
 static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
@@ -317,6 +384,7 @@
 	const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
 	const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
 	const char *fw_filename = mlxsw_sp->fw_filename;
+	union devlink_param_value value;
 	const struct firmware *firmware;
 	int err;
 
@@ -324,6 +392,15 @@
 	if (!req_rev || !fw_filename)
 		return 0;
 
+	/* Don't check if devlink 'fw_load_policy' param is 'flash' */
+	err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_sp->core),
+						 DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+						 &value);
+	if (err)
+		return err;
+	if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)
+		return 0;
+
 	/* Validate driver & FW are compatible */
 	if (rev->major != req_rev->major) {
 		WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
@@ -331,7 +408,10 @@
 		return -EINVAL;
 	}
 	if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
-	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor))
+	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
+	    (rev->minor > req_rev->minor ||
+	     (rev->minor == req_rev->minor &&
+	      rev->subminor >= req_rev->subminor)))
 		return 0;
 
 	dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
@@ -347,7 +427,7 @@
 		return err;
 	}
 
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
+	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL);
 	release_firmware(firmware);
 	if (err)
 		dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n");
@@ -361,6 +441,27 @@
 		return 0;
 }
 
+static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core,
+				 const char *file_name, const char *component,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct firmware *firmware;
+	int err;
+
+	if (component)
+		return -EOPNOTSUPP;
+
+	err = request_firmware_direct(&firmware, file_name,
+				      mlxsw_sp->bus_info->dev);
+	if (err)
+		return err;
+	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack);
+	release_firmware(firmware);
+
+	return err;
+}
+
 int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
 			      unsigned int counter_index, u64 *packets,
 			      u64 *bytes)
@@ -711,6 +812,8 @@
 	u64 len;
 	int err;
 
+	memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
 	if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
 		return NETDEV_TX_BUSY;
 
@@ -826,8 +929,12 @@
 	u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0;
 	u16 delay = !!my_pfc ? my_pfc->delay : 0;
 	char pbmc_pl[MLXSW_REG_PBMC_LEN];
+	u32 taken_headroom_cells = 0;
+	u32 max_headroom_cells;
 	int i, j, err;
 
+	max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp);
+
 	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
 	if (err)
@@ -836,8 +943,10 @@
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		bool configure = false;
 		bool pfc = false;
+		u16 thres_cells;
+		u16 delay_cells;
+		u16 total_cells;
 		bool lossy;
-		u16 thres;
 
 		for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
 			if (prio_tc[j] == i) {
@@ -851,10 +960,17 @@
 			continue;
 
 		lossy = !(pfc || pause_en);
-		thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
-		delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc,
-						  pause_en);
-		mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy);
+		thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
+		delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay,
+							pfc, pause_en);
+		total_cells = thres_cells + delay_cells;
+
+		taken_headroom_cells += total_cells;
+		if (taken_headroom_cells > max_headroom_cells)
+			return -ENOBUFS;
+
+		mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells,
+				     thres_cells, lossy);
 	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
@@ -1114,22 +1230,40 @@
 	return 0;
 }
 
-static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+				     bool flush_default)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, *tmp;
 
 	list_for_each_entry_safe(mlxsw_sp_port_vlan, tmp,
-				 &mlxsw_sp_port->vlans_list, list)
-		mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+				 &mlxsw_sp_port->vlans_list, list) {
+		if (!flush_default &&
+		    mlxsw_sp_port_vlan->vid == MLXSW_SP_DEFAULT_VID)
+			continue;
+		mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
+	}
 }
 
-static struct mlxsw_sp_port_vlan *
+static void
+mlxsw_sp_port_vlan_cleanup(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+	if (mlxsw_sp_port_vlan->bridge_port)
+		mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
+	else if (mlxsw_sp_port_vlan->fid)
+		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+}
+
+struct mlxsw_sp_port_vlan *
 mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	bool untagged = vid == 1;
+	bool untagged = vid == MLXSW_SP_DEFAULT_VID;
 	int err;
 
+	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+	if (mlxsw_sp_port_vlan)
+		return ERR_PTR(-EEXIST);
+
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, untagged);
 	if (err)
 		return ERR_PTR(err);
@@ -1141,7 +1275,6 @@
 	}
 
 	mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port;
-	mlxsw_sp_port_vlan->ref_count = 1;
 	mlxsw_sp_port_vlan->vid = vid;
 	list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list);
 
@@ -1152,46 +1285,17 @@
 	return ERR_PTR(err);
 }
 
-static void
-mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	u16 vid = mlxsw_sp_port_vlan->vid;
 
+	mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port_vlan);
 	list_del(&mlxsw_sp_port_vlan->list);
 	kfree(mlxsw_sp_port_vlan);
 	mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
 }
 
-struct mlxsw_sp_port_vlan *
-mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
-{
-	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-
-	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
-	if (mlxsw_sp_port_vlan) {
-		mlxsw_sp_port_vlan->ref_count++;
-		return mlxsw_sp_port_vlan;
-	}
-
-	return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid);
-}
-
-void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
-{
-	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
-
-	if (--mlxsw_sp_port_vlan->ref_count != 0)
-		return;
-
-	if (mlxsw_sp_port_vlan->bridge_port)
-		mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
-	else if (fid)
-		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
-
-	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
-}
-
 static int mlxsw_sp_port_add_vid(struct net_device *dev,
 				 __be16 __always_unused proto, u16 vid)
 {
@@ -1203,7 +1307,7 @@
 	if (!vid)
 		return 0;
 
-	return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid));
+	return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid));
 }
 
 static int mlxsw_sp_port_kill_vid(struct net_device *dev,
@@ -1221,21 +1325,11 @@
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (!mlxsw_sp_port_vlan)
 		return 0;
-	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
 
 	return 0;
 }
 
-static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
-					    size_t len)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-
-	return mlxsw_core_port_get_phys_port_name(mlxsw_sp_port->mlxsw_sp->core,
-						  mlxsw_sp_port->local_port,
-						  name, len);
-}
-
 static struct mlxsw_sp_port_mall_tc_entry *
 mlxsw_sp_port_mall_tc_entry_find(struct mlxsw_sp_port *port,
 				 unsigned long cookie) {
@@ -1251,21 +1345,19 @@
 static int
 mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 				      struct mlxsw_sp_port_mall_mirror_tc_entry *mirror,
-				      const struct tc_action *a,
+				      const struct flow_action_entry *act,
 				      bool ingress)
 {
 	enum mlxsw_sp_span_type span_type;
-	struct net_device *to_dev;
 
-	to_dev = tcf_mirred_dev(a);
-	if (!to_dev) {
+	if (!act->dev) {
 		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
 		return -EINVAL;
 	}
 
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, act->dev, span_type,
 					true, &mirror->span_id);
 }
 
@@ -1284,7 +1376,7 @@
 static int
 mlxsw_sp_port_add_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port,
 				      struct tc_cls_matchall_offload *cls,
-				      const struct tc_action *a,
+				      const struct flow_action_entry *act,
 				      bool ingress)
 {
 	int err;
@@ -1295,18 +1387,18 @@
 		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
 		return -EEXIST;
 	}
-	if (tcf_sample_rate(a) > MLXSW_REG_MPSC_RATE_MAX) {
+	if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
 		netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
 		return -EOPNOTSUPP;
 	}
 
 	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
-			   tcf_sample_psample_group(a));
-	mlxsw_sp_port->sample->truncate = tcf_sample_truncate(a);
-	mlxsw_sp_port->sample->trunc_size = tcf_sample_trunc_size(a);
-	mlxsw_sp_port->sample->rate = tcf_sample_rate(a);
+			   act->sample.psample_group);
+	mlxsw_sp_port->sample->truncate = act->sample.truncate;
+	mlxsw_sp_port->sample->trunc_size = act->sample.trunc_size;
+	mlxsw_sp_port->sample->rate = act->sample.rate;
 
-	err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, tcf_sample_rate(a));
+	err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, act->sample.rate);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
@@ -1332,11 +1424,10 @@
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
 	__be16 protocol = f->common.protocol;
-	const struct tc_action *a;
-	LIST_HEAD(actions);
+	struct flow_action_entry *act;
 	int err;
 
-	if (!tcf_exts_has_one_action(f->exts)) {
+	if (!flow_offload_has_one_action(&f->rule->action)) {
 		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
 		return -EOPNOTSUPP;
 	}
@@ -1346,19 +1437,21 @@
 		return -ENOMEM;
 	mall_tc_entry->cookie = f->cookie;
 
-	a = tcf_exts_first_action(f->exts);
+	act = &f->rule->action.entries[0];
 
-	if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
+	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
 		struct mlxsw_sp_port_mall_mirror_tc_entry *mirror;
 
 		mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR;
 		mirror = &mall_tc_entry->mirror;
 		err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port,
-							    mirror, a, ingress);
-	} else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
+							    mirror, act,
+							    ingress);
+	} else if (act->id == FLOW_ACTION_SAMPLE &&
+		   protocol == htons(ETH_P_ALL)) {
 		mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
 		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
-							    a, ingress);
+							    act, ingress);
 	} else {
 		err = -EOPNOTSUPP;
 	}
@@ -1420,21 +1513,21 @@
 
 static int
 mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
-			     struct tc_cls_flower_offload *f)
+			     struct flow_cls_offload *f)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block);
 
 	switch (f->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f);
 		return 0;
-	case TC_CLSFLOWER_STATS:
+	case FLOW_CLS_STATS:
 		return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_TMPLT_CREATE:
+	case FLOW_CLS_TMPLT_CREATE:
 		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
-	case TC_CLSFLOWER_TMPLT_DESTROY:
+	case FLOW_CLS_TMPLT_DESTROY:
 		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
 		return 0;
 	default:
@@ -1497,35 +1590,47 @@
 	}
 }
 
+static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
+{
+	struct mlxsw_sp_acl_block *acl_block = cb_priv;
+
+	mlxsw_sp_acl_block_destroy(acl_block);
+}
+
+static LIST_HEAD(mlxsw_sp_block_cb_list);
+
 static int
 mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-				    struct tcf_block *block, bool ingress,
-				    struct netlink_ext_ack *extack)
+			            struct flow_block_offload *f, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
-	struct tcf_block_cb *block_cb;
+	struct flow_block_cb *block_cb;
+	bool register_block = false;
 	int err;
 
-	block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
-				       mlxsw_sp);
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
+					mlxsw_sp);
 	if (!block_cb) {
-		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net);
+		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
 		if (!acl_block)
 			return -ENOMEM;
-		block_cb = __tcf_block_cb_register(block,
-						   mlxsw_sp_setup_tc_block_cb_flower,
-						   mlxsw_sp, acl_block, extack);
+		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
+					       mlxsw_sp, acl_block,
+					       mlxsw_sp_tc_block_flower_release);
 		if (IS_ERR(block_cb)) {
+			mlxsw_sp_acl_block_destroy(acl_block);
 			err = PTR_ERR(block_cb);
 			goto err_cb_register;
 		}
+		register_block = true;
 	} else {
-		acl_block = tcf_block_cb_priv(block_cb);
+		acl_block = flow_block_cb_priv(block_cb);
 	}
-	tcf_block_cb_incref(block_cb);
+	flow_block_cb_incref(block_cb);
 	err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
-				      mlxsw_sp_port, ingress);
+				      mlxsw_sp_port, ingress, f->extack);
 	if (err)
 		goto err_block_bind;
 
@@ -1534,28 +1639,32 @@
 	else
 		mlxsw_sp_port->eg_acl_block = acl_block;
 
+	if (register_block) {
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
+	}
+
 	return 0;
 
 err_block_bind:
-	if (!tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block, block_cb);
+	if (!flow_block_cb_decref(block_cb))
+		flow_block_cb_free(block_cb);
 err_cb_register:
-		mlxsw_sp_acl_block_destroy(acl_block);
-	}
 	return err;
 }
 
 static void
 mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct tcf_block *block, bool ingress)
+				      struct flow_block_offload *f, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
-	struct tcf_block_cb *block_cb;
+	struct flow_block_cb *block_cb;
 	int err;
 
-	block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
-				       mlxsw_sp);
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
+					mlxsw_sp);
 	if (!block_cb)
 		return;
 
@@ -1564,50 +1673,63 @@
 	else
 		mlxsw_sp_port->eg_acl_block = NULL;
 
-	acl_block = tcf_block_cb_priv(block_cb);
+	acl_block = flow_block_cb_priv(block_cb);
 	err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
 					mlxsw_sp_port, ingress);
-	if (!err && !tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block, block_cb);
-		mlxsw_sp_acl_block_destroy(acl_block);
+	if (!err && !flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 	}
 }
 
 static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
-				   struct tc_block_offload *f)
+				   struct flow_block_offload *f)
 {
-	tc_setup_cb_t *cb;
+	struct flow_block_cb *block_cb;
+	flow_setup_cb_t *cb;
 	bool ingress;
 	int err;
 
-	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
 		cb = mlxsw_sp_setup_tc_block_cb_matchall_ig;
 		ingress = true;
-	} else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
 		cb = mlxsw_sp_setup_tc_block_cb_matchall_eg;
 		ingress = false;
 	} else {
 		return -EOPNOTSUPP;
 	}
 
+	f->driver_block_list = &mlxsw_sp_block_cb_list;
+
 	switch (f->command) {
-	case TC_BLOCK_BIND:
-		err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
-					    mlxsw_sp_port, f->extack);
-		if (err)
-			return err;
-		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
-							  f->block, ingress,
-							  f->extack);
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, mlxsw_sp_port,
+					  &mlxsw_sp_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
+					       mlxsw_sp_port, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, f,
+							  ingress);
 		if (err) {
-			tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+			flow_block_cb_free(block_cb);
 			return err;
 		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
 		return 0;
-	case TC_BLOCK_UNBIND:
+	case FLOW_BLOCK_UNBIND:
 		mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
-						      f->block, ingress);
-		tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+						      f, ingress);
+		block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1652,6 +1774,25 @@
 	return 0;
 }
 
+static int mlxsw_sp_feature_loopback(struct net_device *dev, bool enable)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	char pplr_pl[MLXSW_REG_PPLR_LEN];
+	int err;
+
+	if (netif_running(dev))
+		mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+
+	mlxsw_reg_pplr_pack(pplr_pl, mlxsw_sp_port->local_port, enable);
+	err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pplr),
+			      pplr_pl);
+
+	if (netif_running(dev))
+		mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
+
+	return err;
+}
+
 typedef int (*mlxsw_sp_feature_handler)(struct net_device *dev, bool enable);
 
 static int mlxsw_sp_handle_feature(struct net_device *dev,
@@ -1683,8 +1824,89 @@
 static int mlxsw_sp_set_features(struct net_device *dev,
 				 netdev_features_t features)
 {
-	return mlxsw_sp_handle_feature(dev, features, NETIF_F_HW_TC,
+	netdev_features_t oper_features = dev->features;
+	int err = 0;
+
+	err |= mlxsw_sp_handle_feature(dev, features, NETIF_F_HW_TC,
 				       mlxsw_sp_feature_hw_tc);
+	err |= mlxsw_sp_handle_feature(dev, features, NETIF_F_LOOPBACK,
+				       mlxsw_sp_feature_loopback);
+
+	if (err) {
+		dev->features = oper_features;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct devlink_port *
+mlxsw_sp_port_get_devlink_port(struct net_device *dev)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	return mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
+						mlxsw_sp_port->local_port);
+}
+
+static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port,
+							     &config);
+	if (err)
+		return err;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port,
+							     &config);
+	if (err)
+		return err;
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct hwtstamp_config config = {0};
+
+	mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config);
+}
+
+static int
+mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr);
+	case SIOCGHWTSTAMP:
+		return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
@@ -1700,8 +1922,9 @@
 	.ndo_get_offload_stats	= mlxsw_sp_port_get_offload_stats,
 	.ndo_vlan_rx_add_vid	= mlxsw_sp_port_add_vid,
 	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
-	.ndo_get_phys_port_name	= mlxsw_sp_port_get_phys_port_name,
 	.ndo_set_features	= mlxsw_sp_set_features,
+	.ndo_get_devlink_port	= mlxsw_sp_port_get_devlink_port,
+	.ndo_do_ioctl		= mlxsw_sp_port_ioctl,
 };
 
 static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
@@ -1872,8 +2095,38 @@
 
 #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats)
 
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2863_stats[] = {
+	{
+		.str = "if_in_discards",
+		.getter = mlxsw_reg_ppcnt_if_in_discards_get,
+	},
+	{
+		.str = "if_out_discards",
+		.getter = mlxsw_reg_ppcnt_if_out_discards_get,
+	},
+	{
+		.str = "if_out_errors",
+		.getter = mlxsw_reg_ppcnt_if_out_errors_get,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN \
+	ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2863_stats)
+
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = {
 	{
+		.str = "ether_stats_undersize_pkts",
+		.getter = mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get,
+	},
+	{
+		.str = "ether_stats_oversize_pkts",
+		.getter = mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get,
+	},
+	{
+		.str = "ether_stats_fragments",
+		.getter = mlxsw_reg_ppcnt_ether_stats_fragments_get,
+	},
+	{
 		.str = "ether_pkts64octets",
 		.getter = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get,
 	},
@@ -1918,6 +2171,82 @@
 #define MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN \
 	ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2819_stats)
 
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_3635_stats[] = {
+	{
+		.str = "dot3stats_fcs_errors",
+		.getter = mlxsw_reg_ppcnt_dot3stats_fcs_errors_get,
+	},
+	{
+		.str = "dot3stats_symbol_errors",
+		.getter = mlxsw_reg_ppcnt_dot3stats_symbol_errors_get,
+	},
+	{
+		.str = "dot3control_in_unknown_opcodes",
+		.getter = mlxsw_reg_ppcnt_dot3control_in_unknown_opcodes_get,
+	},
+	{
+		.str = "dot3in_pause_frames",
+		.getter = mlxsw_reg_ppcnt_dot3in_pause_frames_get,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN \
+	ARRAY_SIZE(mlxsw_sp_port_hw_rfc_3635_stats)
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_discard_stats[] = {
+	{
+		.str = "discard_ingress_general",
+		.getter = mlxsw_reg_ppcnt_ingress_general_get,
+	},
+	{
+		.str = "discard_ingress_policy_engine",
+		.getter = mlxsw_reg_ppcnt_ingress_policy_engine_get,
+	},
+	{
+		.str = "discard_ingress_vlan_membership",
+		.getter = mlxsw_reg_ppcnt_ingress_vlan_membership_get,
+	},
+	{
+		.str = "discard_ingress_tag_frame_type",
+		.getter = mlxsw_reg_ppcnt_ingress_tag_frame_type_get,
+	},
+	{
+		.str = "discard_egress_vlan_membership",
+		.getter = mlxsw_reg_ppcnt_egress_vlan_membership_get,
+	},
+	{
+		.str = "discard_loopback_filter",
+		.getter = mlxsw_reg_ppcnt_loopback_filter_get,
+	},
+	{
+		.str = "discard_egress_general",
+		.getter = mlxsw_reg_ppcnt_egress_general_get,
+	},
+	{
+		.str = "discard_egress_hoq",
+		.getter = mlxsw_reg_ppcnt_egress_hoq_get,
+	},
+	{
+		.str = "discard_egress_policy_engine",
+		.getter = mlxsw_reg_ppcnt_egress_policy_engine_get,
+	},
+	{
+		.str = "discard_ingress_tx_link_down",
+		.getter = mlxsw_reg_ppcnt_ingress_tx_link_down_get,
+	},
+	{
+		.str = "discard_egress_stp_filter",
+		.getter = mlxsw_reg_ppcnt_egress_stp_filter_get,
+	},
+	{
+		.str = "discard_egress_sll",
+		.getter = mlxsw_reg_ppcnt_egress_sll_get,
+	},
+};
+
+#define MLXSW_SP_PORT_HW_DISCARD_STATS_LEN \
+	ARRAY_SIZE(mlxsw_sp_port_hw_discard_stats)
+
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
 	{
 		.str = "rx_octets_prio",
@@ -1970,7 +2299,10 @@
 #define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats)
 
 #define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \
+					 MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \
 					 MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \
+					 MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN + \
+					 MLXSW_SP_PORT_HW_DISCARD_STATS_LEN + \
 					 (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \
 					  IEEE_8021QAZ_MAX_TCS) + \
 					 (MLXSW_SP_PORT_HW_TC_STATS_LEN * \
@@ -1981,7 +2313,7 @@
 	int i;
 
 	for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) {
-		snprintf(*p, ETH_GSTRING_LEN, "%s_%d",
+		snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d",
 			 mlxsw_sp_port_hw_prio_stats[i].str, prio);
 		*p += ETH_GSTRING_LEN;
 	}
@@ -1992,7 +2324,7 @@
 	int i;
 
 	for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) {
-		snprintf(*p, ETH_GSTRING_LEN, "%s_%d",
+		snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d",
 			 mlxsw_sp_port_hw_tc_stats[i].str, tc);
 		*p += ETH_GSTRING_LEN;
 	}
@@ -2001,6 +2333,7 @@
 static void mlxsw_sp_port_get_strings(struct net_device *dev,
 				      u32 stringset, u8 *data)
 {
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	u8 *p = data;
 	int i;
 
@@ -2011,18 +2344,38 @@
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
+
+		for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; i++) {
+			memcpy(p, mlxsw_sp_port_hw_rfc_2863_stats[i].str,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
 		for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; i++) {
 			memcpy(p, mlxsw_sp_port_hw_rfc_2819_stats[i].str,
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
 
+		for (i = 0; i < MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; i++) {
+			memcpy(p, mlxsw_sp_port_hw_rfc_3635_stats[i].str,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; i++) {
+			memcpy(p, mlxsw_sp_port_hw_discard_stats[i].str,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
 		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
 			mlxsw_sp_port_get_prio_strings(&p, i);
 
 		for (i = 0; i < TC_MAX_QUEUE; i++)
 			mlxsw_sp_port_get_tc_strings(&p, i);
 
+		mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p);
 		break;
 	}
 }
@@ -2059,10 +2412,22 @@
 		*p_hw_stats = mlxsw_sp_port_hw_stats;
 		*p_len = MLXSW_SP_PORT_HW_STATS_LEN;
 		break;
+	case MLXSW_REG_PPCNT_RFC_2863_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_rfc_2863_stats;
+		*p_len = MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN;
+		break;
 	case MLXSW_REG_PPCNT_RFC_2819_CNT:
 		*p_hw_stats = mlxsw_sp_port_hw_rfc_2819_stats;
 		*p_len = MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
 		break;
+	case MLXSW_REG_PPCNT_RFC_3635_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_rfc_3635_stats;
+		*p_len = MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN;
+		break;
+	case MLXSW_REG_PPCNT_DISCARD_CNT:
+		*p_hw_stats = mlxsw_sp_port_hw_discard_stats;
+		*p_len = MLXSW_SP_PORT_HW_DISCARD_STATS_LEN;
+		break;
 	case MLXSW_REG_PPCNT_PRIO_CNT:
 		*p_hw_stats = mlxsw_sp_port_hw_prio_stats;
 		*p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
@@ -2105,6 +2470,7 @@
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
 				    struct ethtool_stats *stats, u64 *data)
 {
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	int i, data_index = 0;
 
 	/* IEEE 802.3 Counters */
@@ -2112,11 +2478,26 @@
 				  data, data_index);
 	data_index = MLXSW_SP_PORT_HW_STATS_LEN;
 
+	/* RFC 2863 Counters */
+	__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2863_CNT, 0,
+				  data, data_index);
+	data_index += MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN;
+
 	/* RFC 2819 Counters */
 	__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, 0,
 				  data, data_index);
 	data_index += MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
 
+	/* RFC 3635 Counters */
+	__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_3635_CNT, 0,
+				  data, data_index);
+	data_index += MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN;
+
+	/* Discard Counters */
+	__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_DISCARD_CNT, 0,
+				  data, data_index);
+	data_index += MLXSW_SP_PORT_HW_DISCARD_STATS_LEN;
+
 	/* Per-Priority Counters */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		__mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i,
@@ -2130,25 +2511,33 @@
 					  data, data_index);
 		data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN;
 	}
+
+	/* PTP counters */
+	mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port,
+						    data, data_index);
+	data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count();
 }
 
 static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
 {
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return MLXSW_SP_PORT_ETHTOOL_STATS_LEN;
+		return MLXSW_SP_PORT_ETHTOOL_STATS_LEN +
+		       mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count();
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-struct mlxsw_sp_port_link_mode {
+struct mlxsw_sp1_port_link_mode {
 	enum ethtool_link_mode_bit_indices mask_ethtool;
 	u32 mask;
 	u32 speed;
 };
 
-static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = {
+static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_100baseT_Full_BIT,
@@ -2220,11 +2609,6 @@
 		.speed		= SPEED_25000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
-		.speed		= SPEED_25000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
 		.speed		= SPEED_50000,
@@ -2240,26 +2624,6 @@
 		.speed		= SPEED_50000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
-		.speed		= SPEED_56000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
-		.speed		= SPEED_56000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
-		.speed		= SPEED_56000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
-		.speed		= SPEED_56000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4,
 		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
 		.speed		= SPEED_100000,
@@ -2281,11 +2645,12 @@
 	},
 };
 
-#define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode)
+#define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode)
 
 static void
-mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto,
-				  struct ethtool_link_ksettings *cmd)
+mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp,
+				   u32 ptys_eth_proto,
+				   struct ethtool_link_ksettings *cmd)
 {
 	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
 			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
@@ -2303,162 +2668,663 @@
 		ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
 }
 
-static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode)
+static void
+mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
+			 u8 width, unsigned long *mode)
 {
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
-			__set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask)
+			__set_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool,
 				  mode);
 	}
 }
 
-static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
-					    struct ethtool_link_ksettings *cmd)
+static u32
+mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
 {
-	u32 speed = SPEED_UNKNOWN;
-	u8 duplex = DUPLEX_UNKNOWN;
 	int i;
 
-	if (!carrier_ok)
-		goto out;
-
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) {
-			speed = mlxsw_sp_port_link_mode[i].speed;
-			duplex = DUPLEX_FULL;
-			break;
-		}
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask)
+			return mlxsw_sp1_port_link_mode[i].speed;
 	}
-out:
-	cmd->base.speed = speed;
-	cmd->base.duplex = duplex;
+
+	return SPEED_UNKNOWN;
 }
 
-static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto)
+static void
+mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
+				 u32 ptys_eth_proto,
+				 struct ethtool_link_ksettings *cmd)
 {
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_SGMII))
-		return PORT_FIBRE;
+	cmd->base.speed = SPEED_UNKNOWN;
+	cmd->base.duplex = DUPLEX_UNKNOWN;
 
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4))
-		return PORT_DA;
+	if (!carrier_ok)
+		return;
 
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
-			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4))
-		return PORT_NONE;
-
-	return PORT_OTHER;
+	cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+	if (cmd->base.speed != SPEED_UNKNOWN)
+		cmd->base.duplex = DUPLEX_FULL;
 }
 
 static u32
-mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd)
+mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width,
+			      const struct ethtool_link_ksettings *cmd)
 {
 	u32 ptys_proto = 0;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (test_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool,
 			     cmd->link_modes.advertising))
-			ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+			ptys_proto |= mlxsw_sp1_port_link_mode[i].mask;
 	}
 	return ptys_proto;
 }
 
-static u32 mlxsw_sp_to_ptys_speed(u32 speed)
+static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width,
+				   u32 speed)
 {
 	u32 ptys_proto = 0;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (speed == mlxsw_sp_port_link_mode[i].speed)
-			ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (speed == mlxsw_sp1_port_link_mode[i].speed)
+			ptys_proto |= mlxsw_sp1_port_link_mode[i].mask;
 	}
 	return ptys_proto;
 }
 
-static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed)
+static u32
+mlxsw_sp1_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed)
 {
 	u32 ptys_proto = 0;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (mlxsw_sp_port_link_mode[i].speed <= upper_speed)
-			ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+	for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+		if (mlxsw_sp1_port_link_mode[i].speed <= upper_speed)
+			ptys_proto |= mlxsw_sp1_port_link_mode[i].mask;
 	}
 	return ptys_proto;
 }
 
-static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap,
-					     struct ethtool_link_ksettings *cmd)
+static int
+mlxsw_sp1_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+			  u32 *base_speed)
 {
+	*base_speed = MLXSW_SP_PORT_BASE_SPEED_25G;
+	return 0;
+}
+
+static void
+mlxsw_sp1_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload,
+			    u8 local_port, u32 proto_admin, bool autoneg)
+{
+	mlxsw_reg_ptys_eth_pack(payload, local_port, proto_admin, autoneg);
+}
+
+static void
+mlxsw_sp1_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload,
+			      u32 *p_eth_proto_cap, u32 *p_eth_proto_admin,
+			      u32 *p_eth_proto_oper)
+{
+	mlxsw_reg_ptys_eth_unpack(payload, p_eth_proto_cap, p_eth_proto_admin,
+				  p_eth_proto_oper);
+}
+
+static const struct mlxsw_sp_port_type_speed_ops
+mlxsw_sp1_port_type_speed_ops = {
+	.from_ptys_supported_port	= mlxsw_sp1_from_ptys_supported_port,
+	.from_ptys_link			= mlxsw_sp1_from_ptys_link,
+	.from_ptys_speed		= mlxsw_sp1_from_ptys_speed,
+	.from_ptys_speed_duplex		= mlxsw_sp1_from_ptys_speed_duplex,
+	.to_ptys_advert_link		= mlxsw_sp1_to_ptys_advert_link,
+	.to_ptys_speed			= mlxsw_sp1_to_ptys_speed,
+	.to_ptys_upper_speed		= mlxsw_sp1_to_ptys_upper_speed,
+	.port_speed_base		= mlxsw_sp1_port_speed_base,
+	.reg_ptys_eth_pack		= mlxsw_sp1_reg_ptys_eth_pack,
+	.reg_ptys_eth_unpack		= mlxsw_sp1_reg_ptys_eth_unpack,
+};
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_sgmii_100m[] = {
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_sgmii_100m)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = {
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii[] = {
+	ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_5gbase_r[] = {
+	ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_5gbase_r)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g[] = {
+	ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
+	ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g[] = {
+	ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+	ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+	ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+	ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr[] = {
+	ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+	ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+	ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2[] = {
+	ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr[] = {
+	ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+	ETHTOOL_LINK_MODE_50000baseDR_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4[] = {
+	ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2[] = {
+	ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT,
+	ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2)
+
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = {
+	ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT,
+	ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT,
+	ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+	ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+	ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4)
+
+#define MLXSW_SP_PORT_MASK_WIDTH_1X	BIT(0)
+#define MLXSW_SP_PORT_MASK_WIDTH_2X	BIT(1)
+#define MLXSW_SP_PORT_MASK_WIDTH_4X	BIT(2)
+
+static u8 mlxsw_sp_port_mask_width_get(u8 width)
+{
+	switch (width) {
+	case 1:
+		return MLXSW_SP_PORT_MASK_WIDTH_1X;
+	case 2:
+		return MLXSW_SP_PORT_MASK_WIDTH_2X;
+	case 4:
+		return MLXSW_SP_PORT_MASK_WIDTH_4X;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+struct mlxsw_sp2_port_link_mode {
+	const enum ethtool_link_mode_bit_indices *mask_ethtool;
+	int m_ethtool_len;
+	u32 mask;
+	u32 speed;
+	u8 mask_width;
+};
+
+static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_sgmii_100m,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_100,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_1000base_x_sgmii,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_1000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_2500,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_5gbase_r,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_5000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_10000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_40000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
+				  MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_25000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_2X |
+				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_50000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X,
+		.speed		= SPEED_50000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_100000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_2X,
+		.speed		= SPEED_100000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.speed		= SPEED_200000,
+	},
+};
+
+#define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode)
+
+static void
+mlxsw_sp2_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp,
+				   u32 ptys_eth_proto,
+				   struct ethtool_link_ksettings *cmd)
+{
+	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
+}
+
+static void
+mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode,
+			  unsigned long *mode)
+{
+	int i;
+
+	for (i = 0; i < link_mode->m_ethtool_len; i++)
+		__set_bit(link_mode->mask_ethtool[i], mode);
+}
+
+static void
+mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
+			 u8 width, unsigned long *mode)
+{
+	u8 mask_width = mlxsw_sp_port_mask_width_get(width);
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if ((ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) &&
+		    (mask_width & mlxsw_sp2_port_link_mode[i].mask_width))
+			mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i],
+						  mode);
+	}
+}
+
+static u32
+mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask)
+			return mlxsw_sp2_port_link_mode[i].speed;
+	}
+
+	return SPEED_UNKNOWN;
+}
+
+static void
+mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
+				 u32 ptys_eth_proto,
+				 struct ethtool_link_ksettings *cmd)
+{
+	cmd->base.speed = SPEED_UNKNOWN;
+	cmd->base.duplex = DUPLEX_UNKNOWN;
+
+	if (!carrier_ok)
+		return;
+
+	cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+	if (cmd->base.speed != SPEED_UNKNOWN)
+		cmd->base.duplex = DUPLEX_FULL;
+}
+
+static bool
+mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode,
+			   const unsigned long *mode)
+{
+	int cnt = 0;
+	int i;
+
+	for (i = 0; i < link_mode->m_ethtool_len; i++) {
+		if (test_bit(link_mode->mask_ethtool[i], mode))
+			cnt++;
+	}
+
+	return cnt == link_mode->m_ethtool_len;
+}
+
+static u32
+mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width,
+			      const struct ethtool_link_ksettings *cmd)
+{
+	u8 mask_width = mlxsw_sp_port_mask_width_get(width);
+	u32 ptys_proto = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if ((mask_width & mlxsw_sp2_port_link_mode[i].mask_width) &&
+		    mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i],
+					       cmd->link_modes.advertising))
+			ptys_proto |= mlxsw_sp2_port_link_mode[i].mask;
+	}
+	return ptys_proto;
+}
+
+static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp,
+				   u8 width, u32 speed)
+{
+	u8 mask_width = mlxsw_sp_port_mask_width_get(width);
+	u32 ptys_proto = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if ((speed == mlxsw_sp2_port_link_mode[i].speed) &&
+		    (mask_width & mlxsw_sp2_port_link_mode[i].mask_width))
+			ptys_proto |= mlxsw_sp2_port_link_mode[i].mask;
+	}
+	return ptys_proto;
+}
+
+static u32
+mlxsw_sp2_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed)
+{
+	u32 ptys_proto = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+		if (mlxsw_sp2_port_link_mode[i].speed <= upper_speed)
+			ptys_proto |= mlxsw_sp2_port_link_mode[i].mask;
+	}
+	return ptys_proto;
+}
+
+static int
+mlxsw_sp2_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+			  u32 *base_speed)
+{
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u32 eth_proto_cap;
+	int err;
+
+	/* In Spectrum-2, the speed of 1x can change from port to port, so query
+	 * it from firmware.
+	 */
+	mlxsw_reg_ptys_ext_eth_pack(ptys_pl, local_port, 0, false);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+	mlxsw_reg_ptys_ext_eth_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+
+	if (eth_proto_cap &
+	    MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR) {
+		*base_speed = MLXSW_SP_PORT_BASE_SPEED_50G;
+		return 0;
+	}
+
+	if (eth_proto_cap &
+	    MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR) {
+		*base_speed = MLXSW_SP_PORT_BASE_SPEED_25G;
+		return 0;
+	}
+
+	return -EIO;
+}
+
+static void
+mlxsw_sp2_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload,
+			    u8 local_port, u32 proto_admin,
+			    bool autoneg)
+{
+	mlxsw_reg_ptys_ext_eth_pack(payload, local_port, proto_admin, autoneg);
+}
+
+static void
+mlxsw_sp2_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload,
+			      u32 *p_eth_proto_cap, u32 *p_eth_proto_admin,
+			      u32 *p_eth_proto_oper)
+{
+	mlxsw_reg_ptys_ext_eth_unpack(payload, p_eth_proto_cap,
+				      p_eth_proto_admin, p_eth_proto_oper);
+}
+
+static const struct mlxsw_sp_port_type_speed_ops
+mlxsw_sp2_port_type_speed_ops = {
+	.from_ptys_supported_port	= mlxsw_sp2_from_ptys_supported_port,
+	.from_ptys_link			= mlxsw_sp2_from_ptys_link,
+	.from_ptys_speed		= mlxsw_sp2_from_ptys_speed,
+	.from_ptys_speed_duplex		= mlxsw_sp2_from_ptys_speed_duplex,
+	.to_ptys_advert_link		= mlxsw_sp2_to_ptys_advert_link,
+	.to_ptys_speed			= mlxsw_sp2_to_ptys_speed,
+	.to_ptys_upper_speed		= mlxsw_sp2_to_ptys_upper_speed,
+	.port_speed_base		= mlxsw_sp2_port_speed_base,
+	.reg_ptys_eth_pack		= mlxsw_sp2_reg_ptys_eth_pack,
+	.reg_ptys_eth_unpack		= mlxsw_sp2_reg_ptys_eth_unpack,
+};
+
+static void
+mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap,
+				 u8 width, struct ethtool_link_ksettings *cmd)
+{
+	const struct mlxsw_sp_port_type_speed_ops *ops;
+
+	ops = mlxsw_sp->port_type_speed_ops;
+
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
 
-	mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd);
-	mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported);
+	ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd);
+	ops->from_ptys_link(mlxsw_sp, eth_proto_cap, width,
+			    cmd->link_modes.supported);
 }
 
-static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg,
-					     struct ethtool_link_ksettings *cmd)
+static void
+mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp,
+				 u32 eth_proto_admin, bool autoneg, u8 width,
+				 struct ethtool_link_ksettings *cmd)
 {
+	const struct mlxsw_sp_port_type_speed_ops *ops;
+
+	ops = mlxsw_sp->port_type_speed_ops;
+
 	if (!autoneg)
 		return;
 
 	ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
-	mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising);
+	ops->from_ptys_link(mlxsw_sp, eth_proto_admin, width,
+			    cmd->link_modes.advertising);
 }
 
-static void
-mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status,
-				    struct ethtool_link_ksettings *cmd)
+static u8
+mlxsw_sp_port_connector_port(enum mlxsw_reg_ptys_connector_type connector_type)
 {
-	if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp)
-		return;
-
-	ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg);
-	mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising);
+	switch (connector_type) {
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR:
+		return PORT_OTHER;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE:
+		return PORT_NONE;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP:
+		return PORT_TP;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI:
+		return PORT_AUI;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC:
+		return PORT_BNC;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII:
+		return PORT_MII;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE:
+		return PORT_FIBRE;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA:
+		return PORT_DA;
+	case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER:
+		return PORT_OTHER;
+	default:
+		WARN_ON_ONCE(1);
+		return PORT_OTHER;
+	}
 }
 
 static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 					    struct ethtool_link_ksettings *cmd)
 {
-	u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp;
+	u32 eth_proto_cap, eth_proto_admin, eth_proto_oper;
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	const struct mlxsw_sp_port_type_speed_ops *ops;
 	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u8 autoneg_status;
+	u8 connector_type;
 	bool autoneg;
 	int err;
 
+	ops = mlxsw_sp->port_type_speed_ops;
+
 	autoneg = mlxsw_sp_port->link.autoneg;
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
+	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
+			       0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
-	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
-				  &eth_proto_oper);
+	ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, &eth_proto_cap,
+				 &eth_proto_admin, &eth_proto_oper);
 
-	mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
+	mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap,
+					 mlxsw_sp_port->mapping.width, cmd);
 
-	mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd);
-
-	eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl);
-	autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl);
-	mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd);
+	mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg,
+					 mlxsw_sp_port->mapping.width, cmd);
 
 	cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
-	cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper);
-	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper,
-					cmd);
+	connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl);
+	cmd->base.port = mlxsw_sp_port_connector_port(connector_type);
+	ops->from_ptys_speed_duplex(mlxsw_sp, netif_carrier_ok(dev),
+				    eth_proto_oper, cmd);
 
 	return 0;
 }
@@ -2469,21 +3335,27 @@
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	const struct mlxsw_sp_port_type_speed_ops *ops;
 	char ptys_pl[MLXSW_REG_PTYS_LEN];
 	u32 eth_proto_cap, eth_proto_new;
 	bool autoneg;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
+	ops = mlxsw_sp->port_type_speed_ops;
+
+	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
+			       0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
-	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+	ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, &eth_proto_cap, NULL, NULL);
 
 	autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
 	eth_proto_new = autoneg ?
-		mlxsw_sp_to_ptys_advert_link(cmd) :
-		mlxsw_sp_to_ptys_speed(cmd->base.speed);
+		ops->to_ptys_advert_link(mlxsw_sp, mlxsw_sp_port->mapping.width,
+					 cmd) :
+		ops->to_ptys_speed(mlxsw_sp, mlxsw_sp_port->mapping.width,
+				   cmd->base.speed);
 
 	eth_proto_new = eth_proto_new & eth_proto_cap;
 	if (!eth_proto_new) {
@@ -2491,159 +3363,35 @@
 		return -EINVAL;
 	}
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_new, autoneg);
+	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
+			       eth_proto_new, autoneg);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
 
+	mlxsw_sp_port->link.autoneg = autoneg;
+
 	if (!netif_running(dev))
 		return 0;
 
-	mlxsw_sp_port->link.autoneg = autoneg;
-
 	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
 	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
 
 	return 0;
 }
 
-static int mlxsw_sp_flash_device(struct net_device *dev,
-				 struct ethtool_flash *flash)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	const struct firmware *firmware;
-	int err;
-
-	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
-		return -EOPNOTSUPP;
-
-	dev_hold(dev);
-	rtnl_unlock();
-
-	err = request_firmware_direct(&firmware, flash->data, &dev->dev);
-	if (err)
-		goto out;
-	err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
-	release_firmware(firmware);
-out:
-	rtnl_lock();
-	dev_put(dev);
-	return err;
-}
-
-#define MLXSW_SP_I2C_ADDR_LOW 0x50
-#define MLXSW_SP_I2C_ADDR_HIGH 0x51
-#define MLXSW_SP_EEPROM_PAGE_LENGTH 256
-
-static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
-					u16 offset, u16 size, void *data,
-					unsigned int *p_read_size)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE];
-	char mcia_pl[MLXSW_REG_MCIA_LEN];
-	u16 i2c_addr;
-	int status;
-	int err;
-
-	size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
-
-	if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH &&
-	    offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH)
-		/* Cross pages read, read until offset 256 in low page */
-		size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset;
-
-	i2c_addr = MLXSW_SP_I2C_ADDR_LOW;
-	if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) {
-		i2c_addr = MLXSW_SP_I2C_ADDR_HIGH;
-		offset -= MLXSW_SP_EEPROM_PAGE_LENGTH;
-	}
-
-	mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module,
-			    0, 0, offset, size, i2c_addr);
-
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl);
-	if (err)
-		return err;
-
-	status = mlxsw_reg_mcia_status_get(mcia_pl);
-	if (status)
-		return -EIO;
-
-	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
-	memcpy(data, eeprom_tmp, size);
-	*p_read_size = size;
-
-	return 0;
-}
-
-enum mlxsw_sp_eeprom_module_info_rev_id {
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC      = 0x00,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436       = 0x01,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636       = 0x03,
-};
-
-enum mlxsw_sp_eeprom_module_info_id {
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP              = 0x03,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP             = 0x0C,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS        = 0x0D,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28           = 0x11,
-};
-
-enum mlxsw_sp_eeprom_module_info {
-	MLXSW_SP_EEPROM_MODULE_INFO_ID,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID,
-	MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
-};
-
 static int mlxsw_sp_get_module_info(struct net_device *netdev,
 				    struct ethtool_modinfo *modinfo)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
-	u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE];
-	u8 module_rev_id, module_id;
-	unsigned int read_size;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
-	err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0,
-					   MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
-					   module_info, &read_size);
-	if (err)
-		return err;
+	err = mlxsw_env_get_module_info(mlxsw_sp->core,
+					mlxsw_sp_port->mapping.module,
+					modinfo);
 
-	if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE)
-		return -EIO;
-
-	module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID];
-	module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID];
-
-	switch (module_id) {
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP:
-		modinfo->type       = ETH_MODULE_SFF_8436;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
-		break;
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS:
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28:
-		if (module_id  == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 ||
-		    module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) {
-			modinfo->type       = ETH_MODULE_SFF_8636;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
-		} else {
-			modinfo->type       = ETH_MODULE_SFF_8436;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
-		}
-		break;
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP:
-		modinfo->type       = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
+	return err;
 }
 
 static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
@@ -2651,30 +3399,23 @@
 				      u8 *data)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
-	int offset = ee->offset;
-	unsigned int read_size;
-	int i = 0;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
-	if (!ee->len)
-		return -EINVAL;
+	err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+					  mlxsw_sp_port->mapping.module, ee,
+					  data);
 
-	memset(data, 0, ee->len);
+	return err;
+}
 
-	while (i < ee->len) {
-		err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, offset,
-						   ee->len - i, data + i,
-						   &read_size);
-		if (err) {
-			netdev_err(mlxsw_sp_port->dev, "Eeprom query failed\n");
-			return err;
-		}
+static int
+mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 
-		i += read_size;
-		offset += read_size;
-	}
-
-	return 0;
+	return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
 }
 
 static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
@@ -2688,22 +3429,33 @@
 	.get_sset_count		= mlxsw_sp_port_get_sset_count,
 	.get_link_ksettings	= mlxsw_sp_port_get_link_ksettings,
 	.set_link_ksettings	= mlxsw_sp_port_set_link_ksettings,
-	.flash_device		= mlxsw_sp_flash_device,
 	.get_module_info	= mlxsw_sp_get_module_info,
 	.get_module_eeprom	= mlxsw_sp_get_module_eeprom,
+	.get_ts_info		= mlxsw_sp_get_ts_info,
 };
 
 static int
 mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	u32 upper_speed = MLXSW_SP_PORT_BASE_SPEED * width;
+	const struct mlxsw_sp_port_type_speed_ops *ops;
 	char ptys_pl[MLXSW_REG_PTYS_LEN];
 	u32 eth_proto_admin;
+	u32 upper_speed;
+	u32 base_speed;
+	int err;
 
-	eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_admin, mlxsw_sp_port->link.autoneg);
+	ops = mlxsw_sp->port_type_speed_ops;
+
+	err = ops->port_speed_base(mlxsw_sp, mlxsw_sp_port->local_port,
+				   &base_speed);
+	if (err)
+		return err;
+	upper_speed = base_speed * width;
+
+	eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed);
+	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
+			       eth_proto_admin, mlxsw_sp_port->link.autoneg);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -2736,6 +3488,21 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
 }
 
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				    enum mlxsw_reg_qeec_hr hr, u8 index,
+				    u8 next_index, u32 minrate)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+	mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+			    next_index);
+	mlxsw_reg_qeec_mise_set(qeec_pl, true);
+	mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
 int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			      u8 switch_prio, u8 tclass)
 {
@@ -2776,13 +3543,14 @@
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
 					    MLXSW_REG_QEEC_HIERARCY_TC,
 					    i + 8, i,
-					    false, 0);
+					    true, 100);
 		if (err)
 			return err;
 	}
 
-	/* Make sure the max shaper is disabled in all hierarchies that
-	 * support it.
+	/* Make sure the max shaper is disabled in all hierarchies that support
+	 * it. Note that this disables ptps (PTP shaper), but that is intended
+	 * for the initial configuration.
 	 */
 	err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
 					    MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
@@ -2804,6 +3572,23 @@
 						    MLXSW_REG_QEEC_MAS_DIS);
 		if (err)
 			return err;
+
+		err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+						    MLXSW_REG_QEEC_HIERARCY_TC,
+						    i + 8, i,
+						    MLXSW_REG_QEEC_MAS_DIS);
+		if (err)
+			return err;
+	}
+
+	/* Configure the min shaper for multicast TCs. */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+					       MLXSW_REG_QEEC_HIERARCY_TC,
+					       i + 8, i,
+					       MLXSW_REG_QEEC_MIS_MIN);
+		if (err)
+			return err;
 	}
 
 	/* Map all priorities to traffic class 0. */
@@ -2834,7 +3619,10 @@
 	struct net_device *dev;
 	int err;
 
-	err = mlxsw_core_port_init(mlxsw_sp->core, local_port);
+	err = mlxsw_core_port_init(mlxsw_sp->core, local_port,
+				   module + 1, split, lane / width,
+				   mlxsw_sp->base_mac,
+				   sizeof(mlxsw_sp->base_mac));
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
 			local_port);
@@ -2851,7 +3639,7 @@
 	mlxsw_sp_port->dev = dev;
 	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp_port->local_port = local_port;
-	mlxsw_sp_port->pvid = 1;
+	mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID;
 	mlxsw_sp_port->split = split;
 	mlxsw_sp_port->mapping.module = module;
 	mlxsw_sp_port->mapping.width = width;
@@ -2905,7 +3693,7 @@
 
 	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
 			 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
-	dev->hw_features |= NETIF_F_HW_TC;
+	dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK;
 
 	dev->min_mtu = 0;
 	dev->max_mtu = ETH_MAX_MTU;
@@ -2983,15 +3771,41 @@
 		goto err_port_qdiscs_init;
 	}
 
-	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
+	err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 0, VLAN_N_VID - 1, false,
+				     false);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to clear VLAN filter\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_vlan_clear;
+	}
+
+	err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_nve_init;
+	}
+
+	err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set PVID\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_pvid_set;
+	}
+
+	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port,
+						       MLXSW_SP_DEFAULT_VID);
 	if (IS_ERR(mlxsw_sp_port_vlan)) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
 			mlxsw_sp_port->local_port);
 		err = PTR_ERR(mlxsw_sp_port_vlan);
-		goto err_port_vlan_get;
+		goto err_port_vlan_create;
 	}
+	mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
 
-	mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+	INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
+			  mlxsw_sp->ptp_ops->shaper_work);
+
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
 	if (err) {
@@ -3001,16 +3815,18 @@
 	}
 
 	mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
-				mlxsw_sp_port, dev, module + 1,
-				mlxsw_sp_port->split, lane / width);
+				mlxsw_sp_port, dev);
 	mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0);
 	return 0;
 
 err_register_netdev:
 	mlxsw_sp->ports[local_port] = NULL;
-	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
-	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
-err_port_vlan_get:
+	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
+err_port_vlan_create:
+err_port_pvid_set:
+	mlxsw_sp_port_nve_fini(mlxsw_sp_port);
+err_port_nve_init:
+err_port_vlan_clear:
 	mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 err_port_qdiscs_init:
 	mlxsw_sp_port_fids_fini(mlxsw_sp_port);
@@ -3045,11 +3861,13 @@
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
 	cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+	cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
+	mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
-	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
-	mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+	mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true);
+	mlxsw_sp_port_nve_fini(mlxsw_sp_port);
 	mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 	mlxsw_sp_port_fids_fini(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -3063,6 +3881,45 @@
 	mlxsw_core_port_fini(mlxsw_sp->core, local_port);
 }
 
+static int mlxsw_sp_cpu_port_create(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
+
+	mlxsw_sp_port = kzalloc(sizeof(*mlxsw_sp_port), GFP_KERNEL);
+	if (!mlxsw_sp_port)
+		return -ENOMEM;
+
+	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
+	mlxsw_sp_port->local_port = MLXSW_PORT_CPU_PORT;
+
+	err = mlxsw_core_cpu_port_init(mlxsw_sp->core,
+				       mlxsw_sp_port,
+				       mlxsw_sp->base_mac,
+				       sizeof(mlxsw_sp->base_mac));
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize core CPU port\n");
+		goto err_core_cpu_port_init;
+	}
+
+	mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = mlxsw_sp_port;
+	return 0;
+
+err_core_cpu_port_init:
+	kfree(mlxsw_sp_port);
+	return err;
+}
+
+static void mlxsw_sp_cpu_port_remove(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port =
+				mlxsw_sp->ports[MLXSW_PORT_CPU_PORT];
+
+	mlxsw_core_cpu_port_fini(mlxsw_sp->core);
+	mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = NULL;
+	kfree(mlxsw_sp_port);
+}
+
 static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	return mlxsw_sp->ports[local_port] != NULL;
@@ -3075,6 +3932,7 @@
 	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
 		if (mlxsw_sp_port_created(mlxsw_sp, i))
 			mlxsw_sp_port_remove(mlxsw_sp, i);
+	mlxsw_sp_cpu_port_remove(mlxsw_sp);
 	kfree(mlxsw_sp->port_to_module);
 	kfree(mlxsw_sp->ports);
 }
@@ -3099,6 +3957,10 @@
 		goto err_port_to_module_alloc;
 	}
 
+	err = mlxsw_sp_cpu_port_create(mlxsw_sp);
+	if (err)
+		goto err_cpu_port_create;
+
 	for (i = 1; i < max_ports; i++) {
 		/* Mark as invalid */
 		mlxsw_sp->port_to_module[i] = -1;
@@ -3122,6 +3984,8 @@
 	for (i--; i >= 1; i--)
 		if (mlxsw_sp_port_created(mlxsw_sp, i))
 			mlxsw_sp_port_remove(mlxsw_sp, i);
+	mlxsw_sp_cpu_port_remove(mlxsw_sp);
+err_cpu_port_create:
 	kfree(mlxsw_sp->port_to_module);
 err_port_to_module_alloc:
 	kfree(mlxsw_sp->ports);
@@ -3136,14 +4000,14 @@
 }
 
 static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
-				      u8 module, unsigned int count)
+				      u8 module, unsigned int count, u8 offset)
 {
 	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
 	int err, i;
 
 	for (i = 0; i < count; i++) {
-		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
-					   module, width, i * width);
+		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * offset,
+					   true, module, width, i * width);
 		if (err)
 			goto err_port_create;
 	}
@@ -3152,8 +4016,8 @@
 
 err_port_create:
 	for (i--; i >= 0; i--)
-		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
-			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 	return err;
 }
 
@@ -3184,11 +4048,19 @@
 			       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	u8 local_ports_in_1x, local_ports_in_2x, offset;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	u8 module, cur_width, base_port;
 	int i;
 	int err;
 
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
+		return -EIO;
+
+	local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
+	local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
+
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -3214,13 +4086,15 @@
 
 	/* Make sure we have enough slave (even) ports for the split. */
 	if (count == 2) {
+		offset = local_ports_in_2x;
 		base_port = local_port;
-		if (mlxsw_sp->ports[base_port + 1]) {
+		if (mlxsw_sp->ports[base_port + local_ports_in_2x]) {
 			netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
 			NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
 			return -EINVAL;
 		}
 	} else {
+		offset = local_ports_in_1x;
 		base_port = mlxsw_sp_cluster_base_port_get(local_port);
 		if (mlxsw_sp->ports[base_port + 1] ||
 		    mlxsw_sp->ports[base_port + 3]) {
@@ -3231,10 +4105,11 @@
 	}
 
 	for (i = 0; i < count; i++)
-		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
-			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
-	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
+	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count,
+					 offset);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
 		goto err_port_split_create;
@@ -3251,11 +4126,19 @@
 				 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	u8 local_ports_in_1x, local_ports_in_2x, offset;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	u8 cur_width, base_port;
 	unsigned int count;
 	int i;
 
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
+		return -EIO;
+
+	local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
+	local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
+
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -3273,6 +4156,11 @@
 	cur_width = mlxsw_sp_port->mapping.width;
 	count = cur_width == 1 ? 4 : 2;
 
+	if (count == 2)
+		offset = local_ports_in_2x;
+	else
+		offset = local_ports_in_1x;
+
 	base_port = mlxsw_sp_cluster_base_port_get(local_port);
 
 	/* Determine which ports to remove. */
@@ -3280,8 +4168,8 @@
 		base_port = base_port + 2;
 
 	for (i = 0; i < count; i++)
-		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
-			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
 	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 
@@ -3305,14 +4193,55 @@
 	if (status == MLXSW_PORT_OPER_STATUS_UP) {
 		netdev_info(mlxsw_sp_port->dev, "link up\n");
 		netif_carrier_on(mlxsw_sp_port->dev);
+		mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
 	} else {
 		netdev_info(mlxsw_sp_port->dev, "link down\n");
 		netif_carrier_off(mlxsw_sp_port->dev);
 	}
 }
 
-static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
-					      u8 local_port, void *priv)
+static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp,
+					  char *mtpptr_pl, bool ingress)
+{
+	u8 local_port;
+	u8 num_rec;
+	int i;
+
+	local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl);
+	num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl);
+	for (i = 0; i < num_rec; i++) {
+		u8 domain_number;
+		u8 message_type;
+		u16 sequence_id;
+		u64 timestamp;
+
+		mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type,
+					&domain_number, &sequence_id,
+					&timestamp);
+		mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port,
+					    message_type, domain_number,
+					    sequence_id, timestamp);
+	}
+}
+
+static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg,
+					      char *mtpptr_pl, void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true);
+}
+
+static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg,
+					      char *mtpptr_pl, void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false);
+}
+
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+				       u8 local_port, void *priv)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -3343,10 +4272,10 @@
 	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
 }
 
-static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
+static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb,
 					      u8 local_port, void *priv)
 {
-	skb->offload_mr_fwd_mark = 1;
+	skb->offload_l3_fwd_mark = 1;
 	skb->offload_fwd_mark = 1;
 	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
 }
@@ -3386,6 +4315,14 @@
 	consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
+				     void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -3394,8 +4331,8 @@
 	MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action,	\
 		_is_ctrl, SP_##_trap_group, DISCARD)
 
-#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
-	MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action,	\
+#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
+	MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action,	\
 		_is_ctrl, SP_##_trap_group, DISCARD)
 
 #define MLXSW_SP_EVENTL(_func, _trap_id)		\
@@ -3407,7 +4344,8 @@
 	/* L2 traps */
 	MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
 	MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
-	MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
+		  false, SP_LLDP, DISCARD),
 	MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
 	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
@@ -3428,7 +4366,7 @@
 	/* L3 traps */
 	MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
 	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
@@ -3459,6 +4397,7 @@
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
 	/* PKT Sample trap */
@@ -3471,7 +4410,20 @@
 	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
-	MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
+	MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
+	/* NVE traps */
+	MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
+	MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
+	/* PTP traps */
+	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
+		  false, SP_PTP0, DISCARD),
+	MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false),
+};
+
+static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+	/* Events */
+	MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0),
+	MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0),
 };
 
 static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -3499,6 +4451,7 @@
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
 			rate = 128;
 			burst_size = 7;
 			break;
@@ -3519,8 +4472,16 @@
 			burst_size = 7;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
-			rate = 4 * 1024;
-			burst_size = 4;
+			rate = 1024;
+			burst_size = 7;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+			rate = 24 * 1024;
+			burst_size = 12;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
+			rate = 19 * 1024;
+			burst_size = 12;
 			break;
 		default:
 			continue;
@@ -3560,6 +4521,7 @@
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
 			priority = 5;
 			tc = 5;
 			break;
@@ -3577,6 +4539,7 @@
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
 			priority = 2;
 			tc = 2;
 			break;
@@ -3584,6 +4547,7 @@
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
 			priority = 1;
 			tc = 1;
 			break;
@@ -3609,22 +4573,16 @@
 	return 0;
 }
 
-static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_listener listeners[],
+				   size_t listeners_count)
 {
 	int i;
 	int err;
 
-	err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
-	if (err)
-		return err;
-
-	err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
-	if (err)
-		return err;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+	for (i = 0; i < listeners_count; i++) {
 		err = mlxsw_core_trap_register(mlxsw_sp->core,
-					       &mlxsw_sp_listener[i],
+					       &listeners[i],
 					       mlxsw_sp);
 		if (err)
 			goto err_listener_register;
@@ -3635,28 +4593,73 @@
 err_listener_register:
 	for (i--; i >= 0; i--) {
 		mlxsw_core_trap_unregister(mlxsw_sp->core,
-					   &mlxsw_sp_listener[i],
+					   &listeners[i],
 					   mlxsw_sp);
 	}
 	return err;
 }
 
-static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_listener listeners[],
+				      size_t listeners_count)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+	for (i = 0; i < listeners_count; i++) {
 		mlxsw_core_trap_unregister(mlxsw_sp->core,
-					   &mlxsw_sp_listener[i],
+					   &listeners[i],
 					   mlxsw_sp);
 	}
 }
 
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+				      ARRAY_SIZE(mlxsw_sp_listener));
+	if (err)
+		return err;
+
+	err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
+				      mlxsw_sp->listeners_count);
+	if (err)
+		goto err_extra_traps_init;
+
+	return 0;
+
+err_extra_traps_init:
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+				  ARRAY_SIZE(mlxsw_sp_listener));
+	return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
+				  mlxsw_sp->listeners_count);
+	mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+				  ARRAY_SIZE(mlxsw_sp_listener));
+}
+
+#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
+
 static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
 {
 	char slcr_pl[MLXSW_REG_SLCR_LEN];
+	u32 seed;
 	int err;
 
+	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac),
+		     MLXSW_SP_LAG_SEED_INIT);
 	mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
 				     MLXSW_REG_SLCR_LAG_HASH_DMAC |
 				     MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
@@ -3665,7 +4668,7 @@
 				     MLXSW_REG_SLCR_LAG_HASH_DIP |
 				     MLXSW_REG_SLCR_LAG_HASH_SPORT |
 				     MLXSW_REG_SLCR_LAG_HASH_DPORT |
-				     MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
+				     MLXSW_REG_SLCR_LAG_HASH_IPPROTO, seed);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
 	if (err)
 		return err;
@@ -3699,6 +4702,38 @@
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
+static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
+	.clock_init	= mlxsw_sp1_ptp_clock_init,
+	.clock_fini	= mlxsw_sp1_ptp_clock_fini,
+	.init		= mlxsw_sp1_ptp_init,
+	.fini		= mlxsw_sp1_ptp_fini,
+	.receive	= mlxsw_sp1_ptp_receive,
+	.transmitted	= mlxsw_sp1_ptp_transmitted,
+	.hwtstamp_get	= mlxsw_sp1_ptp_hwtstamp_get,
+	.hwtstamp_set	= mlxsw_sp1_ptp_hwtstamp_set,
+	.shaper_work	= mlxsw_sp1_ptp_shaper_work,
+	.get_ts_info	= mlxsw_sp1_ptp_get_ts_info,
+	.get_stats_count = mlxsw_sp1_get_stats_count,
+	.get_stats_strings = mlxsw_sp1_get_stats_strings,
+	.get_stats	= mlxsw_sp1_get_stats,
+};
+
+static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
+	.clock_init	= mlxsw_sp2_ptp_clock_init,
+	.clock_fini	= mlxsw_sp2_ptp_clock_fini,
+	.init		= mlxsw_sp2_ptp_init,
+	.fini		= mlxsw_sp2_ptp_fini,
+	.receive	= mlxsw_sp2_ptp_receive,
+	.transmitted	= mlxsw_sp2_ptp_transmitted,
+	.hwtstamp_get	= mlxsw_sp2_ptp_hwtstamp_get,
+	.hwtstamp_set	= mlxsw_sp2_ptp_hwtstamp_set,
+	.shaper_work	= mlxsw_sp2_ptp_shaper_work,
+	.get_ts_info	= mlxsw_sp2_ptp_get_ts_info,
+	.get_stats_count = mlxsw_sp2_get_stats_count,
+	.get_stats_strings = mlxsw_sp2_get_stats_strings,
+	.get_stats	= mlxsw_sp2_get_stats,
+};
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
@@ -3739,6 +4774,12 @@
 		goto err_traps_init;
 	}
 
+	err = mlxsw_sp_devlink_traps_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize devlink traps\n");
+		goto err_devlink_traps_init;
+	}
+
 	err = mlxsw_sp_buffers_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n");
@@ -3778,12 +4819,46 @@
 		goto err_afa_init;
 	}
 
+	err = mlxsw_sp_nve_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
+		goto err_nve_init;
+	}
+
+	err = mlxsw_sp_acl_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
+		goto err_acl_init;
+	}
+
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
 	}
 
+	if (mlxsw_sp->bus_info->read_frc_capable) {
+		/* NULL is a valid return value from clock_init */
+		mlxsw_sp->clock =
+			mlxsw_sp->ptp_ops->clock_init(mlxsw_sp,
+						      mlxsw_sp->bus_info->dev);
+		if (IS_ERR(mlxsw_sp->clock)) {
+			err = PTR_ERR(mlxsw_sp->clock);
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n");
+			goto err_ptp_clock_init;
+		}
+	}
+
+	if (mlxsw_sp->clock) {
+		/* NULL is a valid return value from ptp_ops->init */
+		mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp);
+		if (IS_ERR(mlxsw_sp->ptp_state)) {
+			err = PTR_ERR(mlxsw_sp->ptp_state);
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n");
+			goto err_ptp_init;
+		}
+	}
+
 	/* Initialize netdevice notifier after router and SPAN is initialized,
 	 * so that the event handler can use router structures and call SPAN
 	 * respin.
@@ -3795,12 +4870,6 @@
 		goto err_netdev_notifier;
 	}
 
-	err = mlxsw_sp_acl_init(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
-		goto err_acl_init;
-	}
-
 	err = mlxsw_sp_dpipe_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n");
@@ -3818,12 +4887,20 @@
 err_ports_create:
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 err_dpipe_init:
-	mlxsw_sp_acl_fini(mlxsw_sp);
-err_acl_init:
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
+	if (mlxsw_sp->clock)
+		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+err_ptp_init:
+	if (mlxsw_sp->clock)
+		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+err_ptp_clock_init:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_acl_fini(mlxsw_sp);
+err_acl_init:
+	mlxsw_sp_nve_fini(mlxsw_sp);
+err_nve_init:
 	mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3836,6 +4913,8 @@
 err_lag_init:
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 err_buffers_init:
+	mlxsw_sp_devlink_traps_fini(mlxsw_sp);
+err_devlink_traps_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
 err_traps_init:
 	mlxsw_sp_fids_fini(mlxsw_sp);
@@ -3856,6 +4935,14 @@
 	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
 	mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+	mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
+	mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
+	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
+	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
+	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
+	mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+	mlxsw_sp->listeners = mlxsw_sp1_listener;
+	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3870,6 +4957,12 @@
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
 	mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
+	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
+	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
+	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
+	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
+	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3880,30 +4973,40 @@
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
-	mlxsw_sp_acl_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	if (mlxsw_sp->clock) {
+		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+	}
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_acl_fini(mlxsw_sp);
+	mlxsw_sp_nve_fini(mlxsw_sp);
 	mlxsw_sp_afa_fini(mlxsw_sp);
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 	mlxsw_sp_span_fini(mlxsw_sp);
 	mlxsw_sp_lag_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
+	mlxsw_sp_devlink_traps_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_fids_fini(mlxsw_sp);
 	mlxsw_sp_kvdl_fini(mlxsw_sp);
 }
 
+/* Per-FID flood tables are used for both "true" 802.1D FIDs and emulated
+ * 802.1Q FIDs
+ */
+#define MLXSW_SP_FID_FLOOD_TABLE_SIZE	(MLXSW_SP_FID_8021D_MAX + \
+					 VLAN_VID_MASK - 1)
+
 static const struct mlxsw_config_profile mlxsw_sp1_config_profile = {
 	.used_max_mid			= 1,
 	.max_mid			= MLXSW_SP_MID_MAX,
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
-	.max_fid_offset_flood_tables	= 3,
-	.fid_offset_flood_table_size	= VLAN_N_VID - 1,
 	.max_fid_flood_tables		= 3,
-	.fid_flood_table_size		= MLXSW_SP_FID_8021D_MAX,
+	.fid_flood_table_size		= MLXSW_SP_FID_FLOOD_TABLE_SIZE,
 	.used_max_ib_mc			= 1,
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
@@ -3926,10 +5029,8 @@
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
-	.max_fid_offset_flood_tables	= 3,
-	.fid_offset_flood_table_size	= VLAN_N_VID - 1,
 	.max_fid_flood_tables		= 3,
-	.fid_flood_table_size		= MLXSW_SP_FID_8021D_MAX,
+	.fid_flood_table_size		= MLXSW_SP_FID_FLOOD_TABLE_SIZE,
 	.used_max_ib_mc			= 1,
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
@@ -4044,6 +5145,26 @@
 	return 0;
 }
 
+static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct devlink_resource_size_params kvd_size_params;
+	u32 kvd_size;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE))
+		return -EIO;
+
+	kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
+	devlink_resource_size_params_init(&kvd_size_params, kvd_size, kvd_size,
+					  MLXSW_SP_KVD_GRANULARITY,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+
+	return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
+					 kvd_size, MLXSW_SP_RESOURCE_KVD,
+					 DEVLINK_RESOURCE_ID_PARENT_TOP,
+					 &kvd_size_params);
+}
+
 static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
 {
 	return mlxsw_sp1_resources_kvd_register(mlxsw_core);
@@ -4051,7 +5172,7 @@
 
 static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
 {
-	return 0;
+	return mlxsw_sp2_resources_kvd_register(mlxsw_core);
 }
 
 static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
@@ -4109,6 +5230,126 @@
 	return 0;
 }
 
+static int
+mlxsw_sp_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id,
+					       union devlink_param_value val,
+					       struct netlink_ext_ack *extack)
+{
+	if ((val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER) &&
+	    (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)) {
+		NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct devlink_param mlxsw_sp_devlink_params[] = {
+	DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY,
+			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+			      NULL, NULL,
+			      mlxsw_sp_devlink_param_fw_load_policy_validate),
+};
+
+static int mlxsw_sp_params_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	union devlink_param_value value;
+	int err;
+
+	err = devlink_params_register(devlink, mlxsw_sp_devlink_params,
+				      ARRAY_SIZE(mlxsw_sp_devlink_params));
+	if (err)
+		return err;
+
+	value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER;
+	devlink_param_driverinit_value_set(devlink,
+					   DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+					   value);
+	return 0;
+}
+
+static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core)
+{
+	devlink_params_unregister(priv_to_devlink(mlxsw_core),
+				  mlxsw_sp_devlink_params,
+				  ARRAY_SIZE(mlxsw_sp_devlink_params));
+}
+
+static int
+mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id,
+					     struct devlink_param_gset_ctx *ctx)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+	ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp);
+	return 0;
+}
+
+static int
+mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id,
+					     struct devlink_param_gset_ctx *ctx)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+	return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32);
+}
+
+static const struct devlink_param mlxsw_sp2_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+			     "acl_region_rehash_interval",
+			     DEVLINK_PARAM_TYPE_U32,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     mlxsw_sp_params_acl_region_rehash_intrvl_get,
+			     mlxsw_sp_params_acl_region_rehash_intrvl_set,
+			     NULL),
+};
+
+static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	union devlink_param_value value;
+	int err;
+
+	err = mlxsw_sp_params_register(mlxsw_core);
+	if (err)
+		return err;
+
+	err = devlink_params_register(devlink, mlxsw_sp2_devlink_params,
+				      ARRAY_SIZE(mlxsw_sp2_devlink_params));
+	if (err)
+		goto err_devlink_params_register;
+
+	value.vu32 = 0;
+	devlink_param_driverinit_value_set(devlink,
+					   MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+					   value);
+	return 0;
+
+err_devlink_params_register:
+	mlxsw_sp_params_unregister(mlxsw_core);
+	return err;
+}
+
+static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
+{
+	devlink_params_unregister(priv_to_devlink(mlxsw_core),
+				  mlxsw_sp2_devlink_params,
+				  ARRAY_SIZE(mlxsw_sp2_devlink_params));
+	mlxsw_sp_params_unregister(mlxsw_core);
+}
+
+static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+				     struct sk_buff *skb, u8 local_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+	skb_pull(skb, MLXSW_TXHDR_LEN);
+	mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port);
+}
+
 static struct mlxsw_driver mlxsw_sp1_driver = {
 	.kind				= mlxsw_sp1_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
@@ -4127,9 +5368,17 @@
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
+	.flash_update			= mlxsw_sp_flash_update,
+	.trap_init			= mlxsw_sp_trap_init,
+	.trap_fini			= mlxsw_sp_trap_fini,
+	.trap_action_set		= mlxsw_sp_trap_action_set,
+	.trap_group_init		= mlxsw_sp_trap_group_init,
 	.txhdr_construct		= mlxsw_sp_txhdr_construct,
 	.resources_register		= mlxsw_sp1_resources_register,
 	.kvd_sizes_get			= mlxsw_sp_kvd_sizes_get,
+	.params_register		= mlxsw_sp_params_register,
+	.params_unregister		= mlxsw_sp_params_unregister,
+	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp1_config_profile,
 	.res_query_enabled		= true,
@@ -4153,8 +5402,49 @@
 	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
 	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
 	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
+	.flash_update			= mlxsw_sp_flash_update,
+	.trap_init			= mlxsw_sp_trap_init,
+	.trap_fini			= mlxsw_sp_trap_fini,
+	.trap_action_set		= mlxsw_sp_trap_action_set,
+	.trap_group_init		= mlxsw_sp_trap_group_init,
 	.txhdr_construct		= mlxsw_sp_txhdr_construct,
 	.resources_register		= mlxsw_sp2_resources_register,
+	.params_register		= mlxsw_sp2_params_register,
+	.params_unregister		= mlxsw_sp2_params_unregister,
+	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
+	.txhdr_len			= MLXSW_TXHDR_LEN,
+	.profile			= &mlxsw_sp2_config_profile,
+	.res_query_enabled		= true,
+};
+
+static struct mlxsw_driver mlxsw_sp3_driver = {
+	.kind				= mlxsw_sp3_driver_name,
+	.priv_size			= sizeof(struct mlxsw_sp),
+	.init				= mlxsw_sp2_init,
+	.fini				= mlxsw_sp_fini,
+	.basic_trap_groups_set		= mlxsw_sp_basic_trap_groups_set,
+	.port_split			= mlxsw_sp_port_split,
+	.port_unsplit			= mlxsw_sp_port_unsplit,
+	.sb_pool_get			= mlxsw_sp_sb_pool_get,
+	.sb_pool_set			= mlxsw_sp_sb_pool_set,
+	.sb_port_pool_get		= mlxsw_sp_sb_port_pool_get,
+	.sb_port_pool_set		= mlxsw_sp_sb_port_pool_set,
+	.sb_tc_pool_bind_get		= mlxsw_sp_sb_tc_pool_bind_get,
+	.sb_tc_pool_bind_set		= mlxsw_sp_sb_tc_pool_bind_set,
+	.sb_occ_snapshot		= mlxsw_sp_sb_occ_snapshot,
+	.sb_occ_max_clear		= mlxsw_sp_sb_occ_max_clear,
+	.sb_occ_port_pool_get		= mlxsw_sp_sb_occ_port_pool_get,
+	.sb_occ_tc_port_bind_get	= mlxsw_sp_sb_occ_tc_port_bind_get,
+	.flash_update			= mlxsw_sp_flash_update,
+	.trap_init			= mlxsw_sp_trap_init,
+	.trap_fini			= mlxsw_sp_trap_fini,
+	.trap_action_set		= mlxsw_sp_trap_action_set,
+	.trap_group_init		= mlxsw_sp_trap_group_init,
+	.txhdr_construct		= mlxsw_sp_txhdr_construct,
+	.resources_register		= mlxsw_sp2_resources_register,
+	.params_register		= mlxsw_sp2_params_register,
+	.params_unregister		= mlxsw_sp2_params_unregister,
+	.ptp_transmitted		= mlxsw_sp_ptp_transmitted,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp2_config_profile,
 	.res_query_enabled		= true,
@@ -4230,6 +5520,25 @@
 	dev_put(mlxsw_sp_port->dev);
 }
 
+static void
+mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port,
+				 struct net_device *lag_dev)
+{
+	struct net_device *br_dev = netdev_master_upper_dev_get(lag_dev);
+	struct net_device *upper_dev;
+	struct list_head *iter;
+
+	if (netif_is_bridge_port(lag_dev))
+		mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, br_dev);
+
+	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+		if (!netif_is_bridge_port(upper_dev))
+			continue;
+		br_dev = netdev_master_upper_dev_get(upper_dev);
+		mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, br_dev);
+	}
+}
+
 static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
 {
 	char sldr_pl[MLXSW_REG_SLDR_LEN];
@@ -4357,7 +5666,6 @@
 				  struct net_device *lag_dev)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 	struct mlxsw_sp_upper *lag;
 	u16 lag_id;
 	u8 port_index;
@@ -4380,9 +5688,6 @@
 	err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
 	if (err)
 		goto err_col_port_add;
-	err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id);
-	if (err)
-		goto err_col_port_enable;
 
 	mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index,
 				   mlxsw_sp_port->local_port);
@@ -4391,14 +5696,11 @@
 	lag->ref_count++;
 
 	/* Port is no longer usable as a router interface */
-	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
-	if (mlxsw_sp_port_vlan->fid)
-		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+	if (mlxsw_sp_port->default_vlan->fid)
+		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan);
 
 	return 0;
 
-err_col_port_enable:
-	mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
 err_col_port_add:
 	if (!lag->ref_count)
 		mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
@@ -4417,11 +5719,15 @@
 	lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
 	WARN_ON(lag->ref_count == 0);
 
-	mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
 	mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
 
 	/* Any VLANs configured on the port are no longer valid */
-	mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+	mlxsw_sp_port_vlan_flush(mlxsw_sp_port, false);
+	mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port->default_vlan);
+	/* Make the LAG and its directly linked uppers leave bridges they
+	 * are memeber in
+	 */
+	mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev);
 
 	if (lag->ref_count == 1)
 		mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
@@ -4431,9 +5737,8 @@
 	mlxsw_sp_port->lagged = 0;
 	lag->ref_count--;
 
-	mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
 	/* Make sure untagged frames are allowed to ingress */
-	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+	mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID);
 }
 
 static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -4458,21 +5763,56 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
 }
 
-static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port,
-				       bool lag_tx_enabled)
+static int
+mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	if (lag_tx_enabled)
-		return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port,
-						  mlxsw_sp_port->lag_id);
-	else
-		return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
-						     mlxsw_sp_port->lag_id);
+	int err;
+
+	err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port,
+					   mlxsw_sp_port->lag_id);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+	if (err)
+		goto err_dist_port_add;
+
+	return 0;
+
+err_dist_port_add:
+	mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+	return err;
+}
+
+static int
+mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	int err;
+
+	err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
+					    mlxsw_sp_port->lag_id);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port,
+					    mlxsw_sp_port->lag_id);
+	if (err)
+		goto err_col_port_disable;
+
+	return 0;
+
+err_col_port_disable:
+	mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+	return err;
 }
 
 static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
 				     struct netdev_lag_lower_state_info *info)
 {
-	return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
+	if (info->tx_enabled)
+		return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port);
+	else
+		return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
 }
 
 static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -4511,7 +5851,7 @@
 	err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true);
 	if (err)
 		goto err_port_stp_set;
-	err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
+	err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2,
 				     true, false);
 	if (err)
 		goto err_port_vlan_set;
@@ -4543,12 +5883,73 @@
 		mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
 					       vid, true);
 
-	mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
+	mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2,
 			       false, false);
 	mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
 	mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
 }
 
+static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
+{
+	unsigned int num_vxlans = 0;
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		if (netif_is_vxlan(dev))
+			num_vxlans++;
+	}
+
+	return num_vxlans > 1;
+}
+
+static bool mlxsw_sp_bridge_vxlan_vlan_is_valid(struct net_device *br_dev)
+{
+	DECLARE_BITMAP(vlans, VLAN_N_VID) = {0};
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		u16 pvid;
+		int err;
+
+		if (!netif_is_vxlan(dev))
+			continue;
+
+		err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid);
+		if (err || !pvid)
+			continue;
+
+		if (test_and_set_bit(pvid, vlans))
+			return false;
+	}
+
+	return true;
+}
+
+static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
+					   struct netlink_ext_ack *extack)
+{
+	if (br_multicast_enabled(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
+		return false;
+	}
+
+	if (!br_vlan_enabled(br_dev) &&
+	    mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
+		return false;
+	}
+
+	if (br_vlan_enabled(br_dev) &&
+	    !mlxsw_sp_bridge_vxlan_vlan_is_valid(br_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices cannot have the same VLAN as PVID and egress untagged");
+		return false;
+	}
+
+	return true;
+}
+
 static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 					       struct net_device *dev,
 					       unsigned long event, void *ptr)
@@ -4578,6 +5979,11 @@
 		}
 		if (!info->linking)
 			break;
+		if (netif_is_bridge_master(upper_dev) &&
+		    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+		    mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+		    !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
 		if (netdev_has_any_upper_dev(upper_dev) &&
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4611,11 +6017,6 @@
 			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
 			return -EINVAL;
 		}
-		if (is_vlan_dev(upper_dev) &&
-		    vlan_dev_vlan_id(upper_dev) == 1) {
-			NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic");
-			return -EINVAL;
-		}
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -4630,12 +6031,14 @@
 							   lower_dev,
 							   upper_dev);
 		} else if (netif_is_lag_master(upper_dev)) {
-			if (info->linking)
+			if (info->linking) {
 				err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
 							     upper_dev);
-			else
+			} else {
+				mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
 				mlxsw_sp_port_lag_leave(mlxsw_sp_port,
 							upper_dev);
+			}
 		} else if (netif_is_ovs_master(upper_dev)) {
 			if (info->linking)
 				err = mlxsw_sp_port_ovs_join(mlxsw_sp_port);
@@ -4644,6 +6047,16 @@
 		} else if (netif_is_macvlan(upper_dev)) {
 			if (!info->linking)
 				mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+		} else if (is_vlan_dev(upper_dev)) {
+			struct net_device *br_dev;
+
+			if (!netif_is_bridge_port(upper_dev))
+				break;
+			if (info->linking)
+				break;
+			br_dev = netdev_master_upper_dev_get(upper_dev);
+			mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev,
+						   br_dev);
 		}
 		break;
 	}
@@ -4735,6 +6148,11 @@
 		}
 		if (!info->linking)
 			break;
+		if (netif_is_bridge_master(upper_dev) &&
+		    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+		    mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+		    !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
 		if (netdev_has_any_upper_dev(upper_dev) &&
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4795,6 +6213,48 @@
 	return 0;
 }
 
+static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev,
+						struct net_device *br_dev,
+						unsigned long event, void *ptr,
+						u16 vid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+
+	if (!mlxsw_sp)
+		return 0;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!netif_is_macvlan(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+			return -EOPNOTSUPP;
+		}
+		if (!info->linking)
+			break;
+		if (netif_is_macvlan(upper_dev) &&
+		    !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+			return -EOPNOTSUPP;
+		}
+		break;
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (info->linking)
+			break;
+		if (netif_is_macvlan(upper_dev))
+			mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+		break;
+	}
+
+	return 0;
+}
+
 static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
 					 unsigned long event, void *ptr)
 {
@@ -4808,6 +6268,9 @@
 		return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev,
 							      real_dev, event,
 							      ptr, vid);
+	else if (netif_is_bridge_master(real_dev))
+		return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev,
+							    event, ptr, vid);
 
 	return 0;
 }
@@ -4881,6 +6344,74 @@
 	return netif_is_l3_master(info->upper_dev);
 }
 
+static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
+					  struct net_device *dev,
+					  unsigned long event, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *cu_info;
+	struct netdev_notifier_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+
+	extack = netdev_notifier_info_to_extack(info);
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		cu_info = container_of(info,
+				       struct netdev_notifier_changeupper_info,
+				       info);
+		upper_dev = cu_info->upper_dev;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+			return -EOPNOTSUPP;
+		if (cu_info->linking) {
+			if (!netif_running(dev))
+				return 0;
+			/* When the bridge is VLAN-aware, the VNI of the VxLAN
+			 * device needs to be mapped to a VLAN, but at this
+			 * point no VLANs are configured on the VxLAN device
+			 */
+			if (br_vlan_enabled(upper_dev))
+				return 0;
+			return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
+							  dev, 0, extack);
+		} else {
+			/* VLANs were already flushed, which triggered the
+			 * necessary cleanup
+			 */
+			if (br_vlan_enabled(upper_dev))
+				return 0;
+			mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev);
+		}
+		break;
+	case NETDEV_PRE_UP:
+		upper_dev = netdev_master_upper_dev_get(dev);
+		if (!upper_dev)
+			return 0;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0,
+						  extack);
+	case NETDEV_DOWN:
+		upper_dev = netdev_master_upper_dev_get(dev);
+		if (!upper_dev)
+			return 0;
+		if (!netif_is_bridge_master(upper_dev))
+			return 0;
+		if (!mlxsw_sp_lower_get(upper_dev))
+			return 0;
+		mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev);
+		break;
+	}
+
+	return 0;
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
@@ -4897,14 +6428,18 @@
 	}
 	mlxsw_sp_span_respin(mlxsw_sp);
 
+	if (netif_is_vxlan(dev))
+		err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
 						       event, ptr);
 	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
 						       event, ptr);
-	else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
-		err = mlxsw_sp_netdevice_router_port_event(dev);
+	else if (event == NETDEV_PRE_CHANGEADDR ||
+		 event == NETDEV_CHANGEADDR ||
+		 event == NETDEV_CHANGEMTU)
+		err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
 	else if (mlxsw_sp_is_vrf_event(event, ptr))
 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
 	else if (mlxsw_sp_port_dev_check(dev))
@@ -4925,18 +6460,10 @@
 	.notifier_call = mlxsw_sp_inetaddr_valid_event,
 };
 
-static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_inetaddr_event,
-};
-
 static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_inet6addr_valid_event,
 };
 
-static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_inet6addr_event,
-};
-
 static const struct pci_device_id mlxsw_sp1_pci_id_table[] = {
 	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
 	{0, },
@@ -4957,14 +6484,22 @@
 	.id_table = mlxsw_sp2_pci_id_table,
 };
 
+static const struct pci_device_id mlxsw_sp3_pci_id_table[] = {
+	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM3), 0},
+	{0, },
+};
+
+static struct pci_driver mlxsw_sp3_pci_driver = {
+	.name = mlxsw_sp3_driver_name,
+	.id_table = mlxsw_sp3_pci_id_table,
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
 	int err;
 
 	register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
-	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
-	register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 
 	err = mlxsw_core_driver_register(&mlxsw_sp1_driver);
 	if (err)
@@ -4974,6 +6509,10 @@
 	if (err)
 		goto err_sp2_core_driver_register;
 
+	err = mlxsw_core_driver_register(&mlxsw_sp3_driver);
+	if (err)
+		goto err_sp3_core_driver_register;
+
 	err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver);
 	if (err)
 		goto err_sp1_pci_driver_register;
@@ -4982,31 +6521,37 @@
 	if (err)
 		goto err_sp2_pci_driver_register;
 
+	err = mlxsw_pci_driver_register(&mlxsw_sp3_pci_driver);
+	if (err)
+		goto err_sp3_pci_driver_register;
+
 	return 0;
 
-err_sp2_pci_driver_register:
+err_sp3_pci_driver_register:
 	mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
+err_sp2_pci_driver_register:
+	mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
 err_sp1_pci_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
+err_sp3_core_driver_register:
 	mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
 err_sp2_core_driver_register:
 	mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
 err_sp1_core_driver_register:
-	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
-	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
 	return err;
 }
 
 static void __exit mlxsw_sp_module_exit(void)
 {
+	mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver);
 	mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
 	mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+	mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
-	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
-	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
 }
 
@@ -5018,4 +6563,5 @@
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
 MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
 MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table);
 MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 3cdb7ac..b2a0028 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h

@@ -8,6 +8,7 @@
 #include <linux/netdevice.h>
 #include <linux/rhashtable.h>
 #include <linux/bitops.h>
+#include <linux/if_bridge.h>
 #include <linux/if_vlan.h>
 #include <linux/list.h>
 #include <linux/dcbnl.h>
@@ -16,6 +17,7 @@
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
+#include <net/vxlan.h>
 
 #include "port.h"
 #include "core.h"
@@ -23,13 +25,16 @@
 #include "core_acl_flex_actions.h"
 #include "reg.h"
 
+#define MLXSW_SP_DEFAULT_VID (VLAN_N_VID - 1)
+
 #define MLXSW_SP_FID_8021D_MAX 1024
 
 #define MLXSW_SP_MID_MAX 7000
 
 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
 
-#define MLXSW_SP_PORT_BASE_SPEED 25000	/* Mb/s */
+#define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */
+#define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */
 
 #define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */
 #define MLXSW_SP_KVD_GRANULARITY 128
@@ -55,6 +60,8 @@
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
 struct mlxsw_sp_span_entry;
+enum mlxsw_sp_l3proto;
+union mlxsw_sp_l3addr;
 
 struct mlxsw_sp_upper {
 	struct net_device *dev;
@@ -69,6 +76,11 @@
 	MLXSW_SP_RIF_TYPE_MAX,
 };
 
+struct mlxsw_sp_rif_ops;
+
+extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
+extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
+
 enum mlxsw_sp_fid_type {
 	MLXSW_SP_FID_TYPE_8021Q,
 	MLXSW_SP_FID_TYPE_8021D,
@@ -77,6 +89,10 @@
 	MLXSW_SP_FID_TYPE_MAX,
 };
 
+enum mlxsw_sp_nve_type {
+	MLXSW_SP_NVE_TYPE_VXLAN,
+};
+
 struct mlxsw_sp_mid {
 	struct list_head list;
 	unsigned char addr[ETH_ALEN];
@@ -113,15 +129,22 @@
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_nve;
 struct mlxsw_sp_kvdl_ops;
 struct mlxsw_sp_mr_tcam_ops;
 struct mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_nve_ops;
+struct mlxsw_sp_sb_vals;
+struct mlxsw_sp_port_type_speed_ops;
+struct mlxsw_sp_ptp_state;
+struct mlxsw_sp_ptp_ops;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
 	unsigned char base_mac[ETH_ALEN];
+	const unsigned char *mac_mask;
 	struct mlxsw_sp_upper *lags;
 	int *port_to_module;
 	struct mlxsw_sp_sb *sb;
@@ -132,7 +155,10 @@
 	struct mlxsw_sp_acl *acl;
 	struct mlxsw_sp_fid_core *fid_core;
 	struct mlxsw_sp_kvdl *kvdl;
+	struct mlxsw_sp_nve *nve;
 	struct notifier_block netdevice_nb;
+	struct mlxsw_sp_ptp_clock *clock;
+	struct mlxsw_sp_ptp_state *ptp_state;
 
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct {
@@ -146,6 +172,13 @@
 	const struct mlxsw_afk_ops *afk_ops;
 	const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
 	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+	const struct mlxsw_sp_nve_ops **nve_ops_arr;
+	const struct mlxsw_sp_rif_ops **rif_ops_arr;
+	const struct mlxsw_sp_sb_vals *sb_vals;
+	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+	const struct mlxsw_sp_ptp_ops *ptp_ops;
+	const struct mlxsw_listener *listeners;
+	size_t listeners_count;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -177,7 +210,6 @@
 	struct list_head list;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct mlxsw_sp_fid *fid;
-	unsigned int ref_count;
 	u16 vid;
 	struct mlxsw_sp_bridge_port *bridge_port;
 	struct list_head bridge_vlan_node;
@@ -193,6 +225,16 @@
 	u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
+struct mlxsw_sp_ptp_port_dir_stats {
+	u64 packets;
+	u64 timestamps;
+};
+
+struct mlxsw_sp_ptp_port_stats {
+	struct mlxsw_sp_ptp_port_dir_stats rx_gcd;
+	struct mlxsw_sp_ptp_port_dir_stats tx_gcd;
+};
+
 struct mlxsw_sp_port {
 	struct net_device *dev;
 	struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
@@ -228,13 +270,84 @@
 	} periodic_hw_stats;
 	struct mlxsw_sp_port_sample *sample;
 	struct list_head vlans_list;
+	struct mlxsw_sp_port_vlan *default_vlan;
 	struct mlxsw_sp_qdisc *root_qdisc;
 	struct mlxsw_sp_qdisc *tclass_qdiscs;
 	unsigned acl_rule_count;
 	struct mlxsw_sp_acl_block *ing_acl_block;
 	struct mlxsw_sp_acl_block *eg_acl_block;
+	struct {
+		struct delayed_work shaper_dw;
+		struct hwtstamp_config hwtstamp_config;
+		u16 ing_types;
+		u16 egr_types;
+		struct mlxsw_sp_ptp_port_stats stats;
+	} ptp;
 };
 
+struct mlxsw_sp_port_type_speed_ops {
+	void (*from_ptys_supported_port)(struct mlxsw_sp *mlxsw_sp,
+					 u32 ptys_eth_proto,
+					 struct ethtool_link_ksettings *cmd);
+	void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
+			       u8 width, unsigned long *mode);
+	u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto);
+	void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp,
+				       bool carrier_ok, u32 ptys_eth_proto,
+				       struct ethtool_link_ksettings *cmd);
+	u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width,
+				   const struct ethtool_link_ksettings *cmd);
+	u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed);
+	u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed);
+	int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+			       u32 *base_speed);
+	void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload,
+				  u8 local_port, u32 proto_admin, bool autoneg);
+	void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload,
+				    u32 *p_eth_proto_cap,
+				    u32 *p_eth_proto_admin,
+				    u32 *p_eth_proto_oper);
+};
+
+static inline struct net_device *
+mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
+{
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		if (netif_is_vxlan(dev))
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
+{
+	return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
+}
+
+static inline int
+mlxsw_sp_vxlan_mapped_vid(const struct net_device *vxlan_dev, u16 *p_vid)
+{
+	struct bridge_vlan_info vinfo;
+	u16 vid = 0;
+	int err;
+
+	err = br_vlan_get_pvid(vxlan_dev, &vid);
+	if (err || !vid)
+		goto out;
+
+	err = br_vlan_get_info(vxlan_dev, vid, &vinfo);
+	if (err || !(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED))
+		vid = 0;
+
+out:
+	*p_vid = vid;
+	return err;
+}
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -283,13 +396,14 @@
 			 struct devlink_sb_pool_info *pool_info);
 int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
 			 unsigned int sb_index, u16 pool_index, u32 size,
-			 enum devlink_sb_threshold_type threshold_type);
+			 enum devlink_sb_threshold_type threshold_type,
+			 struct netlink_ext_ack *extack);
 int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
 			      unsigned int sb_index, u16 pool_index,
 			      u32 *p_threshold);
 int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
 			      unsigned int sb_index, u16 pool_index,
-			      u32 threshold);
+			      u32 threshold, struct netlink_ext_ack *extack);
 int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
 				 unsigned int sb_index, u16 tc_index,
 				 enum devlink_sb_pool_type pool_type,
@@ -297,7 +411,8 @@
 int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
 				 unsigned int sb_index, u16 tc_index,
 				 enum devlink_sb_pool_type pool_type,
-				 u16 pool_index, u32 threshold);
+				 u16 pool_index, u32 threshold,
+				 struct netlink_ext_ack *extack);
 int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
 			     unsigned int sb_index);
 int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
@@ -311,12 +426,14 @@
 				     u32 *p_cur, u32 *p_max);
 u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells);
 u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes);
+u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp);
+
+extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
+extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
 
 /* spectrum_switchdev.c */
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port);
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
 			bool adding);
 void
@@ -330,8 +447,21 @@
 				struct net_device *br_dev);
 bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
 					 const struct net_device *br_dev);
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+			       const struct net_device *br_dev,
+			       const struct net_device *vxlan_dev, u16 vid,
+			       struct netlink_ext_ack *extack);
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *vxlan_dev);
+struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+					     const struct net_device *br_dev,
+					     u16 vid,
+					     struct netlink_ext_ack *extack);
+extern struct notifier_block mlxsw_sp_switchdev_notifier;
 
 /* spectrum.c */
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+				       u8 local_port, void *priv);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight);
@@ -351,8 +481,8 @@
 				   bool learn_enable);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 struct mlxsw_sp_port_vlan *
-mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
-void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
+mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
 			   u16 vid_end, bool is_member, bool untagged);
 int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
@@ -383,17 +513,25 @@
 #endif
 
 /* spectrum_router.c */
+enum mlxsw_sp_l3proto {
+	MLXSW_SP_L3_PROTO_IPV4,
+	MLXSW_SP_L3_PROTO_IPV6,
+#define MLXSW_SP_L3_PROTO_MAX	(MLXSW_SP_L3_PROTO_IPV6 + 1)
+};
+
+union mlxsw_sp_l3addr {
+	__be32 addr4;
+	struct in6_addr addr6;
+};
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
+					 unsigned long event, void *ptr);
 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
 			      const struct net_device *macvlan_dev);
-int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr);
 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
 				  unsigned long event, void *ptr);
-int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr);
 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
 				   unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
@@ -413,9 +551,24 @@
 				 struct netdev_notifier_info *info);
 void
 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
-void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
 				 struct net_device *dev);
+struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+					      const struct net_device *dev);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip,
+				      u32 tunnel_index);
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip);
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				u16 *vr_id);
+int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+			       u16 *ul_rif_index);
+void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index);
 
 /* spectrum_kvdl.c */
 enum mlxsw_sp_kvdl_entry_type {
@@ -423,6 +576,7 @@
 	MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
 	MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
 	MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+	MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT,
 };
 
 static inline unsigned int
@@ -433,6 +587,7 @@
 	case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
 	case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
 	case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT: /* fall through */
 	default:
 		return 1;
 	}
@@ -479,6 +634,8 @@
 	unsigned int priority;
 	struct mlxsw_afk_element_values values;
 	struct mlxsw_afa_block *act_block;
+	u8 action_created:1,
+	   egress_bind_blocker:1;
 	unsigned int counter_index;
 };
 
@@ -488,6 +645,17 @@
 /* spectrum_acl.c */
 enum mlxsw_sp_acl_profile {
 	MLXSW_SP_ACL_PROFILE_FLOWER,
+	MLXSW_SP_ACL_PROFILE_MR,
+};
+
+struct mlxsw_sp_acl_block {
+	struct list_head binding_list;
+	struct mlxsw_sp_acl_ruleset *ruleset_zero;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned int rule_count;
+	unsigned int disable_count;
+	unsigned int egress_blocker_rule_count;
+	struct net *net;
 };
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
@@ -502,7 +670,8 @@
 int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
 			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress);
+			    bool ingress,
+			    struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_acl_block *block,
 			      struct mlxsw_sp_port *mlxsw_sp_port,
@@ -522,7 +691,8 @@
 u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset);
 
 struct mlxsw_sp_acl_rule_info *
-mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl);
+mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl,
+			  struct mlxsw_afa_block *afa_block);
 void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei);
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
@@ -566,6 +736,7 @@
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
 			 unsigned long cookie,
+			 struct mlxsw_afa_block *afa_block,
 			 struct netlink_ext_ack *extack);
 void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_acl_rule *rule);
@@ -573,6 +744,9 @@
 			  struct mlxsw_sp_acl_rule *rule);
 void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_acl_rule *rule);
+int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_rule *rule,
+				     struct mlxsw_afa_block *afa_block);
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
@@ -587,6 +761,8 @@
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
+u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val);
 
 /* spectrum_acl_tcam.c */
 struct mlxsw_sp_acl_tcam;
@@ -601,10 +777,13 @@
 	size_t region_priv_size;
 	int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
 			   void *tcam_priv,
-			   struct mlxsw_sp_acl_tcam_region *region);
+			   struct mlxsw_sp_acl_tcam_region *region,
+			   void *hints_priv);
 	void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
 	int (*region_associate)(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam_region *region);
+	void * (*region_rehash_hints_get)(void *region_priv);
+	void (*region_rehash_hints_put)(void *hints_priv);
 	size_t chunk_priv_size;
 	void (*chunk_init)(void *region_priv, void *chunk_priv,
 			   unsigned int priority);
@@ -617,6 +796,9 @@
 	void (*entry_del)(struct mlxsw_sp *mlxsw_sp,
 			  void *region_priv, void *chunk_priv,
 			  void *entry_priv);
+	int (*entry_action_replace)(struct mlxsw_sp *mlxsw_sp,
+				    void *region_priv, void *entry_priv,
+				    struct mlxsw_sp_acl_rule_info *rulei);
 	int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp,
 				  void *region_priv, void *entry_priv,
 				  bool *activity);
@@ -639,19 +821,19 @@
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
-			    struct tc_cls_flower_offload *f);
+			    struct flow_cls_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_acl_block *block,
-			     struct tc_cls_flower_offload *f);
+			     struct flow_cls_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
-			  struct tc_cls_flower_offload *f);
+			  struct flow_cls_offload *f);
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
-				 struct tc_cls_flower_offload *f);
+				 struct flow_cls_offload *f);
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_block *block,
-				   struct tc_cls_flower_offload *f);
+				   struct flow_cls_offload *f);
 
 /* spectrum_qdisc.c */
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
@@ -662,6 +844,26 @@
 			   struct tc_prio_qopt_offload *p);
 
 /* spectrum_fid.c */
+bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
+bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid);
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
+						  u16 fid_index);
+int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex);
+int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid,
+			  enum mlxsw_sp_nve_type *p_type);
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+						__be32 vni);
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+				     u32 nve_flood_index);
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type,
+			 __be32 vni, int nve_ifindex);
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
+void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid,
+				    const struct net_device *nve_dev);
 int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
 			   enum mlxsw_sp_flood_type packet_type, u8 local_port,
 			   bool member);
@@ -669,10 +871,10 @@
 			      struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid,
 				 struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
-enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid);
 u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid);
 enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid);
 void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif);
+struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid);
 enum mlxsw_sp_rif_type
 mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp,
 			   enum mlxsw_sp_fid_type type);
@@ -680,6 +882,10 @@
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
 					    int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp,
+					       u16 vid);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+					       int br_ifindex);
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index);
 struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
@@ -725,4 +931,55 @@
 /* spectrum2_mr_tcam.c */
 extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
 
+/* spectrum_nve.c */
+struct mlxsw_sp_nve_params {
+	enum mlxsw_sp_nve_type type;
+	__be32 vni;
+	const struct net_device *dev;
+};
+
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
+
+int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip,
+				    enum mlxsw_sp_l3proto proto,
+				    union mlxsw_sp_l3addr *addr);
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid,
+			      enum mlxsw_sp_l3proto proto,
+			      union mlxsw_sp_l3addr *addr);
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fid *fid,
+			       enum mlxsw_sp_l3proto proto,
+			       union mlxsw_sp_l3addr *addr);
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+				      u32 tb_id, __be32 addr);
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+			    struct mlxsw_sp_nve_params *params,
+			    struct netlink_ext_ack *extack);
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
+
+/* spectrum_nve_vxlan.c */
+int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp);
+
+/* spectrum_trap.c */
+int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core,
+		       const struct devlink_trap *trap, void *trap_ctx);
+void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core,
+			const struct devlink_trap *trap, void *trap_ctx);
+int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core,
+			     const struct devlink_trap *trap,
+			     enum devlink_trap_action action);
+int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
+			     const struct devlink_trap_group *group);
+
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
index 2a9eac9..3a636f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c

@@ -67,7 +67,7 @@
 	mlxsw_sp_acl_ctcam_chunk_init(&region->cregion,
 				      &region->catchall.cchunk,
 				      MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
-	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, NULL);
 	if (IS_ERR(rulei)) {
 		err = PTR_ERR(rulei);
 		goto err_rulei_create;
@@ -112,7 +112,8 @@
 static int
 mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
 			       void *tcam_priv,
-			       struct mlxsw_sp_acl_tcam_region *_region)
+			       struct mlxsw_sp_acl_tcam_region *_region,
+			       void *hints_priv)
 {
 	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
 	int err;
@@ -193,6 +194,14 @@
 }
 
 static int
+mlxsw_sp1_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					void *region_priv, void *entry_priv,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	return -EOPNOTSUPP;
+}
+
+static int
 mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
 					     struct mlxsw_sp_acl_tcam_region *_region,
 					     unsigned int offset,
@@ -240,5 +249,6 @@
 	.entry_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_entry),
 	.entry_add		= mlxsw_sp1_acl_tcam_entry_add,
 	.entry_del		= mlxsw_sp1_acl_tcam_entry_del,
+	.entry_action_replace	= mlxsw_sp1_acl_tcam_entry_action_replace,
 	.entry_activity_get	= mlxsw_sp1_acl_tcam_entry_activity_get,
 };

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
index 8ca77f3..6c66a0f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c

@@ -34,15 +34,15 @@
 {
 	struct mlxsw_sp_acl_atcam_region *aregion;
 	struct mlxsw_sp_acl_atcam_entry *aentry;
-	struct mlxsw_sp_acl_erp *erp;
+	struct mlxsw_sp_acl_erp_mask *erp_mask;
 
 	aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
 	aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
 
-	erp = mlxsw_sp_acl_erp_get(aregion, mask, true);
-	if (IS_ERR(erp))
-		return PTR_ERR(erp);
-	aentry->erp = erp;
+	erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, true);
+	if (IS_ERR(erp_mask))
+		return PTR_ERR(erp_mask);
+	aentry->erp_mask = erp_mask;
 
 	return 0;
 }
@@ -57,7 +57,7 @@
 	aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
 	aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
 
-	mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+	mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask);
 }
 
 static const struct mlxsw_sp_acl_ctcam_region_ops
@@ -139,7 +139,8 @@
 static int
 mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
 			       void *tcam_priv,
-			       struct mlxsw_sp_acl_tcam_region *_region)
+			       struct mlxsw_sp_acl_tcam_region *_region,
+			       void *hints_priv)
 {
 	struct mlxsw_sp2_acl_tcam_region *region = region_priv;
 	struct mlxsw_sp2_acl_tcam *tcam = tcam_priv;
@@ -147,7 +148,8 @@
 	region->region = _region;
 
 	return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam,
-					      &region->aregion, _region,
+					      &region->aregion,
+					      _region, hints_priv,
 					      &mlxsw_sp2_acl_ctcam_region_ops);
 }
 
@@ -166,6 +168,18 @@
 	return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id);
 }
 
+static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv)
+{
+	struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+
+	return mlxsw_sp_acl_atcam_rehash_hints_get(&region->aregion);
+}
+
+static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv)
+{
+	mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv);
+}
+
 static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
 					  unsigned int priority)
 {
@@ -211,6 +225,20 @@
 }
 
 static int
+mlxsw_sp2_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					void *region_priv, void *entry_priv,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+	entry->act_block = rulei->act_block;
+	return mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp,
+						       &region->aregion,
+						       &entry->aentry, rulei);
+}
+
+static int
 mlxsw_sp2_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
 				      void *region_priv, void *entry_priv,
 				      bool *activity)
@@ -229,11 +257,14 @@
 	.region_init		= mlxsw_sp2_acl_tcam_region_init,
 	.region_fini		= mlxsw_sp2_acl_tcam_region_fini,
 	.region_associate	= mlxsw_sp2_acl_tcam_region_associate,
+	.region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get,
+	.region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put,
 	.chunk_priv_size	= sizeof(struct mlxsw_sp2_acl_tcam_chunk),
 	.chunk_init		= mlxsw_sp2_acl_tcam_chunk_init,
 	.chunk_fini		= mlxsw_sp2_acl_tcam_chunk_fini,
 	.entry_priv_size	= sizeof(struct mlxsw_sp2_acl_tcam_entry),
 	.entry_add		= mlxsw_sp2_acl_tcam_entry_add,
 	.entry_del		= mlxsw_sp2_acl_tcam_entry_del,
+	.entry_action_replace	= mlxsw_sp2_acl_tcam_entry_action_replace,
 	.entry_activity_get	= mlxsw_sp2_acl_tcam_entry_activity_get,
 };

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
index 68c8b14..8d14770 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c

@@ -35,6 +35,7 @@
 				 MAX_KVD_ACTION_SETS),
 	MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE),
 	MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE),
+	MLXSW_SP2_KVDL_PART_INFO(TNUMT, 0x29, KVD_SIZE, KVD_SIZE),
 };
 
 #define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
index 4dd6247..e31ec75 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c

@@ -7,6 +7,201 @@
 #include "spectrum.h"
 #include "spectrum_mr.h"
 
+struct mlxsw_sp2_mr_tcam {
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_acl_block *acl_block;
+	struct mlxsw_sp_acl_ruleset *ruleset4;
+	struct mlxsw_sp_acl_ruleset *ruleset6;
+};
+
+struct mlxsw_sp2_mr_route {
+	struct mlxsw_sp2_mr_tcam *mr_tcam;
+};
+
+static struct mlxsw_sp_acl_ruleset *
+mlxsw_sp2_mr_tcam_proto_ruleset(struct mlxsw_sp2_mr_tcam *mr_tcam,
+				enum mlxsw_sp_l3proto proto)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return mr_tcam->ruleset4;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return mr_tcam->ruleset6;
+	}
+	return NULL;
+}
+
+static int mlxsw_sp2_mr_tcam_bind_group(struct mlxsw_sp *mlxsw_sp,
+					enum mlxsw_reg_pemrbt_protocol protocol,
+					struct mlxsw_sp_acl_ruleset *ruleset)
+{
+	char pemrbt_pl[MLXSW_REG_PEMRBT_LEN];
+	u16 group_id;
+
+	group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
+
+	mlxsw_reg_pemrbt_pack(pemrbt_pl, protocol, group_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pemrbt), pemrbt_pl);
+}
+
+static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv4[] = {
+		MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
+		MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+		MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+		MLXSW_AFK_ELEMENT_DST_IP_0_31,
+};
+
+static int mlxsw_sp2_mr_tcam_ipv4_init(struct mlxsw_sp2_mr_tcam *mr_tcam)
+{
+	struct mlxsw_afk_element_usage elusage;
+	int err;
+
+	/* Initialize IPv4 ACL group. */
+	mlxsw_afk_element_usage_fill(&elusage,
+				     mlxsw_sp2_mr_tcam_usage_ipv4,
+				     ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv4));
+	mr_tcam->ruleset4 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp,
+						     mr_tcam->acl_block,
+						     MLXSW_SP_L3_PROTO_IPV4,
+						     MLXSW_SP_ACL_PROFILE_MR,
+						     &elusage);
+
+	if (IS_ERR(mr_tcam->ruleset4))
+		return PTR_ERR(mr_tcam->ruleset4);
+
+	/* MC Router groups should be bound before routes are inserted. */
+	err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp,
+					   MLXSW_REG_PEMRBT_PROTO_IPV4,
+					   mr_tcam->ruleset4);
+	if (err)
+		goto err_bind_group;
+
+	return 0;
+
+err_bind_group:
+	mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4);
+	return err;
+}
+
+static void mlxsw_sp2_mr_tcam_ipv4_fini(struct mlxsw_sp2_mr_tcam *mr_tcam)
+{
+	mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4);
+}
+
+static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv6[] = {
+		MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
+		MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+		MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+		MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+		MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+		MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+		MLXSW_AFK_ELEMENT_DST_IP_96_127,
+		MLXSW_AFK_ELEMENT_DST_IP_64_95,
+		MLXSW_AFK_ELEMENT_DST_IP_32_63,
+		MLXSW_AFK_ELEMENT_DST_IP_0_31,
+};
+
+static int mlxsw_sp2_mr_tcam_ipv6_init(struct mlxsw_sp2_mr_tcam *mr_tcam)
+{
+	struct mlxsw_afk_element_usage elusage;
+	int err;
+
+	/* Initialize IPv6 ACL group */
+	mlxsw_afk_element_usage_fill(&elusage,
+				     mlxsw_sp2_mr_tcam_usage_ipv6,
+				     ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv6));
+	mr_tcam->ruleset6 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp,
+						     mr_tcam->acl_block,
+						     MLXSW_SP_L3_PROTO_IPV6,
+						     MLXSW_SP_ACL_PROFILE_MR,
+						     &elusage);
+
+	if (IS_ERR(mr_tcam->ruleset6))
+		return PTR_ERR(mr_tcam->ruleset6);
+
+	/* MC Router groups should be bound before routes are inserted. */
+	err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp,
+					   MLXSW_REG_PEMRBT_PROTO_IPV6,
+					   mr_tcam->ruleset6);
+	if (err)
+		goto err_bind_group;
+
+	return 0;
+
+err_bind_group:
+	mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6);
+	return err;
+}
+
+static void mlxsw_sp2_mr_tcam_ipv6_fini(struct mlxsw_sp2_mr_tcam *mr_tcam)
+{
+	mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6);
+}
+
+static void
+mlxsw_sp2_mr_tcam_rule_parse4(struct mlxsw_sp_acl_rule_info *rulei,
+			      struct mlxsw_sp_mr_route_key *key)
+{
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+				       (char *) &key->source.addr4,
+				       (char *) &key->source_mask.addr4, 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+				       (char *) &key->group.addr4,
+				       (char *) &key->group_mask.addr4, 4);
+}
+
+static void
+mlxsw_sp2_mr_tcam_rule_parse6(struct mlxsw_sp_acl_rule_info *rulei,
+			      struct mlxsw_sp_mr_route_key *key)
+{
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+				       &key->source.addr6.s6_addr[0x0],
+				       &key->source_mask.addr6.s6_addr[0x0], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+				       &key->source.addr6.s6_addr[0x4],
+				       &key->source_mask.addr6.s6_addr[0x4], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+				       &key->source.addr6.s6_addr[0x8],
+				       &key->source_mask.addr6.s6_addr[0x8], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+				       &key->source.addr6.s6_addr[0xc],
+				       &key->source_mask.addr6.s6_addr[0xc], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
+				       &key->group.addr6.s6_addr[0x0],
+				       &key->group_mask.addr6.s6_addr[0x0], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
+				       &key->group.addr6.s6_addr[0x4],
+				       &key->group_mask.addr6.s6_addr[0x4], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
+				       &key->group.addr6.s6_addr[0x8],
+				       &key->group_mask.addr6.s6_addr[0x8], 4);
+	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+				       &key->group.addr6.s6_addr[0xc],
+				       &key->group_mask.addr6.s6_addr[0xc], 4);
+}
+
+static void
+mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule,
+			     struct mlxsw_sp_mr_route_key *key,
+			     unsigned int priority)
+{
+	struct mlxsw_sp_acl_rule_info *rulei;
+
+	rulei = mlxsw_sp_acl_rule_rulei(rule);
+	rulei->priority = priority;
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7,
+				       key->vrid, GENMASK(7, 0));
+	mlxsw_sp_acl_rulei_keymask_u32(rulei,
+				       MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10,
+				       key->vrid >> 8, GENMASK(2, 0));
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return mlxsw_sp2_mr_tcam_rule_parse6(rulei, key);
+	}
+}
+
 static int
 mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 			       void *route_priv,
@@ -14,7 +209,33 @@
 			       struct mlxsw_afa_block *afa_block,
 			       enum mlxsw_sp_mr_route_prio prio)
 {
+	struct mlxsw_sp2_mr_route *mr_route = route_priv;
+	struct mlxsw_sp2_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_acl_ruleset *ruleset;
+	struct mlxsw_sp_acl_rule *rule;
+	int err;
+
+	mr_route->mr_tcam = mr_tcam;
+	ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto);
+	if (WARN_ON(!ruleset))
+		return -EINVAL;
+
+	rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset,
+					(unsigned long) route_priv, afa_block,
+					NULL);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+
+	mlxsw_sp2_mr_tcam_rule_parse(rule, key, prio);
+	err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule);
+	if (err)
+		goto err_rule_add;
+
 	return 0;
+
+err_rule_add:
+	mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
+	return err;
 }
 
 static void
@@ -22,6 +243,21 @@
 				void *route_priv,
 				struct mlxsw_sp_mr_route_key *key)
 {
+	struct mlxsw_sp2_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_acl_ruleset *ruleset;
+	struct mlxsw_sp_acl_rule *rule;
+
+	ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto);
+	if (WARN_ON(!ruleset))
+		return;
+
+	rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset,
+					(unsigned long) route_priv);
+	if (WARN_ON(!rule))
+		return;
+
+	mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
+	mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
 }
 
 static int
@@ -30,21 +266,64 @@
 			       struct mlxsw_sp_mr_route_key *key,
 			       struct mlxsw_afa_block *afa_block)
 {
-	return 0;
+	struct mlxsw_sp2_mr_route *mr_route = route_priv;
+	struct mlxsw_sp2_mr_tcam *mr_tcam = mr_route->mr_tcam;
+	struct mlxsw_sp_acl_ruleset *ruleset;
+	struct mlxsw_sp_acl_rule *rule;
+
+	ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto);
+	if (WARN_ON(!ruleset))
+		return -EINVAL;
+
+	rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset,
+					(unsigned long) route_priv);
+	if (WARN_ON(!rule))
+		return -EINVAL;
+
+	return mlxsw_sp_acl_rule_action_replace(mlxsw_sp, rule, afa_block);
 }
 
 static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+	struct mlxsw_sp2_mr_tcam *mr_tcam = priv;
+	int err;
+
+	mr_tcam->mlxsw_sp = mlxsw_sp;
+	mr_tcam->acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, NULL);
+	if (!mr_tcam->acl_block)
+		return -ENOMEM;
+
+	err = mlxsw_sp2_mr_tcam_ipv4_init(mr_tcam);
+	if (err)
+		goto err_ipv4_init;
+
+	err = mlxsw_sp2_mr_tcam_ipv6_init(mr_tcam);
+	if (err)
+		goto err_ipv6_init;
+
 	return 0;
+
+err_ipv6_init:
+	mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam);
+err_ipv4_init:
+	mlxsw_sp_acl_block_destroy(mr_tcam->acl_block);
+	return err;
 }
 
 static void mlxsw_sp2_mr_tcam_fini(void *priv)
 {
+	struct mlxsw_sp2_mr_tcam *mr_tcam = priv;
+
+	mlxsw_sp2_mr_tcam_ipv6_fini(mr_tcam);
+	mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam);
+	mlxsw_sp_acl_block_destroy(mr_tcam->acl_block);
 }
 
 const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = {
+	.priv_size = sizeof(struct mlxsw_sp2_mr_tcam),
 	.init = mlxsw_sp2_mr_tcam_init,
 	.fini = mlxsw_sp2_mr_tcam_fini,
+	.route_priv_size = sizeof(struct mlxsw_sp2_mr_route),
 	.route_create = mlxsw_sp2_mr_tcam_route_create,
 	.route_destroy = mlxsw_sp2_mr_tcam_route_destroy,
 	.route_update = mlxsw_sp2_mr_tcam_route_update,

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index c4f9238..150b3a1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c

@@ -45,14 +45,6 @@
 	bool ingress;
 };
 
-struct mlxsw_sp_acl_block {
-	struct list_head binding_list;
-	struct mlxsw_sp_acl_ruleset *ruleset_zero;
-	struct mlxsw_sp *mlxsw_sp;
-	unsigned int rule_count;
-	unsigned int disable_count;
-};
-
 struct mlxsw_sp_acl_ruleset_ht_key {
 	struct mlxsw_sp_acl_block *block;
 	u32 chain_index;
@@ -221,6 +213,7 @@
 		return NULL;
 	INIT_LIST_HEAD(&block->binding_list);
 	block->mlxsw_sp = mlxsw_sp;
+	block->net = net;
 	return block;
 }
 
@@ -246,7 +239,8 @@
 int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
 			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress)
+			    bool ingress,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_acl_block_binding *binding;
 	int err;
@@ -254,6 +248,11 @@
 	if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
 		return -EEXIST;
 
+	if (!ingress && block->egress_blocker_rule_count) {
+		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
+		return -EOPNOTSUPP;
+	}
+
 	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
 	if (!binding)
 		return -ENOMEM;
@@ -435,7 +434,8 @@
 }
 
 struct mlxsw_sp_acl_rule_info *
-mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl)
+mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl,
+			  struct mlxsw_afa_block *afa_block)
 {
 	struct mlxsw_sp_acl_rule_info *rulei;
 	int err;
@@ -443,11 +443,18 @@
 	rulei = kzalloc(sizeof(*rulei), GFP_KERNEL);
 	if (!rulei)
 		return NULL;
+
+	if (afa_block) {
+		rulei->act_block = afa_block;
+		return rulei;
+	}
+
 	rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa);
 	if (IS_ERR(rulei->act_block)) {
 		err = PTR_ERR(rulei->act_block);
 		goto err_afa_block_create;
 	}
+	rulei->action_created = 1;
 	return rulei;
 
 err_afa_block_create:
@@ -457,7 +464,8 @@
 
 void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei)
 {
-	mlxsw_afa_block_destroy(rulei->act_block);
+	if (rulei->action_created)
+		mlxsw_afa_block_destroy(rulei->act_block);
 	kfree(rulei);
 }
 
@@ -469,7 +477,7 @@
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
 				 unsigned int priority)
 {
-	rulei->priority = priority >> 16;
+	rulei->priority = priority;
 }
 
 void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
@@ -579,7 +587,7 @@
 {
 	u8 ethertype;
 
-	if (action == TCA_VLAN_ACT_MODIFY) {
+	if (action == FLOW_ACTION_VLAN_MANGLE) {
 		switch (proto) {
 		case ETH_P_8021Q:
 			ethertype = 0;
@@ -623,6 +631,7 @@
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
 			 unsigned long cookie,
+			 struct mlxsw_afa_block *afa_block,
 			 struct netlink_ext_ack *extack)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
@@ -630,7 +639,7 @@
 	int err;
 
 	mlxsw_sp_acl_ruleset_ref_inc(ruleset);
-	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size,
 		       GFP_KERNEL);
 	if (!rule) {
 		err = -ENOMEM;
@@ -639,7 +648,7 @@
 	rule->cookie = cookie;
 	rule->ruleset = ruleset;
 
-	rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+	rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, afa_block);
 	if (IS_ERR(rule->rulei)) {
 		err = PTR_ERR(rule->rulei);
 		goto err_rulei_create;
@@ -669,6 +678,7 @@
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
 	int err;
 
 	err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
@@ -686,14 +696,14 @@
 		 * one, to be directly bound to device. The rest of the
 		 * rulesets are bound by "Goto action set".
 		 */
-		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
-						      ruleset->ht_key.block);
+		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
 		if (err)
 			goto err_ruleset_block_bind;
 	}
 
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
-	ruleset->ht_key.block->rule_count++;
+	block->rule_count++;
+	block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
 	return 0;
 
 err_ruleset_block_bind:
@@ -709,7 +719,9 @@
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
 
+	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
 	ruleset->ht_key.block->rule_count--;
 	list_del(&rule->list);
 	if (!ruleset->ht_key.chain_index &&
@@ -721,6 +733,20 @@
 	ops->rule_del(mlxsw_sp, rule->priv);
 }
 
+int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_rule *rule,
+				     struct mlxsw_afa_block *afa_block)
+{
+	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+	struct mlxsw_sp_acl_rule_info *rulei;
+
+	rulei = mlxsw_sp_acl_rule_rulei(rule);
+	rulei->act_block = afa_block;
+
+	return ops->rule_action_replace(mlxsw_sp, rule->priv, rule->rulei);
+}
+
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
@@ -781,7 +807,7 @@
 			       msecs_to_jiffies(interval));
 }
 
-static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work)
+static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work)
 {
 	struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl,
 						rule_activity_update.dw.work);
@@ -860,7 +886,7 @@
 
 	/* Create the delayed work for the rule activity_update */
 	INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
-			  mlxsw_sp_acl_rul_activity_update_work);
+			  mlxsw_sp_acl_rule_activity_update_work);
 	acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS;
 	mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0);
 	return 0;
@@ -888,3 +914,19 @@
 	mlxsw_afk_destroy(acl->afk);
 	kfree(acl);
 }
+
+u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+	return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp,
+							   &acl->tcam);
+}
+
+int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val)
+{
+	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+	return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp,
+							   &acl->tcam, val);
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
index 2dda028..ded4cf6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c

@@ -7,6 +7,8 @@
 #include <linux/gfp.h>
 #include <linux/refcount.h>
 #include <linux/rhashtable.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mlxsw.h>
 
 #include "reg.h"
 #include "core.h"
@@ -14,8 +16,8 @@
 #include "spectrum_acl_tcam.h"
 #include "core_acl_flex_keys.h"
 
-#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START	6
-#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END	11
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START	0
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END	5
 
 struct mlxsw_sp_acl_atcam_lkey_id_ht_key {
 	char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* MSB blocks */
@@ -34,7 +36,7 @@
 	void (*fini)(struct mlxsw_sp_acl_atcam_region *aregion);
 	struct mlxsw_sp_acl_atcam_lkey_id *
 		(*lkey_id_get)(struct mlxsw_sp_acl_atcam_region *aregion,
-			       struct mlxsw_sp_acl_rule_info *rulei, u8 erp_id);
+			       char *enc_key, u8 erp_id);
 	void (*lkey_id_put)(struct mlxsw_sp_acl_atcam_region *aregion,
 			    struct mlxsw_sp_acl_atcam_lkey_id *lkey_id);
 };
@@ -64,7 +66,7 @@
 static bool
 mlxsw_sp_acl_atcam_is_centry(const struct mlxsw_sp_acl_atcam_entry *aentry)
 {
-	return mlxsw_sp_acl_erp_is_ctcam_erp(aentry->erp);
+	return mlxsw_sp_acl_erp_mask_is_ctcam(aentry->erp_mask);
 }
 
 static int
@@ -90,8 +92,7 @@
 
 static struct mlxsw_sp_acl_atcam_lkey_id *
 mlxsw_sp_acl_atcam_generic_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
-				       struct mlxsw_sp_acl_rule_info *rulei,
-				       u8 erp_id)
+				       char *enc_key, u8 erp_id)
 {
 	struct mlxsw_sp_acl_atcam_region_generic *region_generic;
 
@@ -220,8 +221,7 @@
 
 static struct mlxsw_sp_acl_atcam_lkey_id *
 mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
-				    struct mlxsw_sp_acl_rule_info *rulei,
-				    u8 erp_id)
+				    char *enc_key, u8 erp_id)
 {
 	struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
 	struct mlxsw_sp_acl_tcam_region *region = aregion->region;
@@ -230,9 +230,10 @@
 	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
 	struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
 
-	mlxsw_afk_encode(afk, region->key_info, &rulei->values, ht_key.enc_key,
-			 NULL, MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START,
-			 MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END);
+	memcpy(ht_key.enc_key, enc_key, sizeof(ht_key.enc_key));
+	mlxsw_afk_clear(afk, ht_key.enc_key,
+			MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START,
+			MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END);
 	ht_key.erp_id = erp_id;
 	lkey_id = rhashtable_lookup_fast(&region_12kb->lkey_ht, &ht_key,
 					 mlxsw_sp_acl_atcam_lkey_id_ht_params);
@@ -317,6 +318,7 @@
 			       struct mlxsw_sp_acl_atcam *atcam,
 			       struct mlxsw_sp_acl_atcam_region *aregion,
 			       struct mlxsw_sp_acl_tcam_region *region,
+			       void *hints_priv,
 			       const struct mlxsw_sp_acl_ctcam_region_ops *ops)
 {
 	int err;
@@ -324,6 +326,7 @@
 	aregion->region = region;
 	aregion->atcam = atcam;
 	mlxsw_sp_acl_atcam_region_type_init(aregion);
+	INIT_LIST_HEAD(&aregion->entries_list);
 
 	err = rhashtable_init(&aregion->entries_ht,
 			      &mlxsw_sp_acl_atcam_entries_ht_params);
@@ -332,7 +335,7 @@
 	err = aregion->ops->init(aregion);
 	if (err)
 		goto err_ops_init;
-	err = mlxsw_sp_acl_erp_region_init(aregion);
+	err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv);
 	if (err)
 		goto err_erp_region_init;
 	err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion,
@@ -357,6 +360,7 @@
 	mlxsw_sp_acl_erp_region_fini(aregion);
 	aregion->ops->fini(aregion);
 	rhashtable_destroy(&aregion->entries_ht);
+	WARN_ON(!list_empty(&aregion->entries_list));
 }
 
 void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
@@ -379,7 +383,7 @@
 				       struct mlxsw_sp_acl_rule_info *rulei)
 {
 	struct mlxsw_sp_acl_tcam_region *region = aregion->region;
-	u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+	u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask);
 	struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
 	char ptce3_pl[MLXSW_REG_PTCE3_LEN];
 	u32 kvdl_index, priority;
@@ -389,7 +393,7 @@
 	if (err)
 		return err;
 
-	lkey_id = aregion->ops->lkey_id_get(aregion, rulei, erp_id);
+	lkey_id = aregion->ops->lkey_id_get(aregion, aentry->enc_key, erp_id);
 	if (IS_ERR(lkey_id))
 		return PTR_ERR(lkey_id);
 	aentry->lkey_id = lkey_id;
@@ -397,7 +401,10 @@
 	kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block);
 	mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE,
 			     priority, region->tcam_region_info,
-			     aentry->ht_key.enc_key, erp_id,
+			     aentry->enc_key, erp_id,
+			     aentry->delta_info.start,
+			     aentry->delta_info.mask,
+			     aentry->delta_info.value,
 			     refcount_read(&lkey_id->refcnt) != 1, lkey_id->id,
 			     kvdl_index);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
@@ -418,18 +425,50 @@
 {
 	struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id;
 	struct mlxsw_sp_acl_tcam_region *region = aregion->region;
-	u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+	u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask);
 	char ptce3_pl[MLXSW_REG_PTCE3_LEN];
 
 	mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0,
-			     region->tcam_region_info, aentry->ht_key.enc_key,
-			     erp_id, refcount_read(&lkey_id->refcnt) != 1,
+			     region->tcam_region_info,
+			     aentry->enc_key, erp_id,
+			     aentry->delta_info.start,
+			     aentry->delta_info.mask,
+			     aentry->delta_info.value,
+			     refcount_read(&lkey_id->refcnt) != 1,
 			     lkey_id->id, 0);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
 	aregion->ops->lkey_id_put(aregion, lkey_id);
 }
 
 static int
+mlxsw_sp_acl_atcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					       struct mlxsw_sp_acl_atcam_region *aregion,
+					       struct mlxsw_sp_acl_atcam_entry *aentry,
+					       struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id;
+	u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask);
+	struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+	char ptce3_pl[MLXSW_REG_PTCE3_LEN];
+	u32 kvdl_index, priority;
+	int err;
+
+	err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true);
+	if (err)
+		return err;
+	kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block);
+	mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_UPDATE,
+			     priority, region->tcam_region_info,
+			     aentry->enc_key, erp_id,
+			     aentry->delta_info.start,
+			     aentry->delta_info.mask,
+			     aentry->delta_info.value,
+			     refcount_read(&lkey_id->refcnt) != 1, lkey_id->id,
+			     kvdl_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
+}
+
+static int
 __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_acl_atcam_region *aregion,
 			       struct mlxsw_sp_acl_atcam_entry *aentry,
@@ -438,19 +477,37 @@
 	struct mlxsw_sp_acl_tcam_region *region = aregion->region;
 	char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 };
 	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
-	struct mlxsw_sp_acl_erp *erp;
-	unsigned int blocks_count;
+	const struct mlxsw_sp_acl_erp_delta *delta;
+	struct mlxsw_sp_acl_erp_mask *erp_mask;
 	int err;
 
-	blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
 	mlxsw_afk_encode(afk, region->key_info, &rulei->values,
-			 aentry->ht_key.enc_key, mask, 0, blocks_count - 1);
+			 aentry->ht_key.full_enc_key, mask);
 
-	erp = mlxsw_sp_acl_erp_get(aregion, mask, false);
-	if (IS_ERR(erp))
-		return PTR_ERR(erp);
-	aentry->erp = erp;
-	aentry->ht_key.erp_id = mlxsw_sp_acl_erp_id(erp);
+	erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, false);
+	if (IS_ERR(erp_mask))
+		return PTR_ERR(erp_mask);
+	aentry->erp_mask = erp_mask;
+	aentry->ht_key.erp_id = mlxsw_sp_acl_erp_mask_erp_id(erp_mask);
+	memcpy(aentry->enc_key, aentry->ht_key.full_enc_key,
+	       sizeof(aentry->enc_key));
+
+	/* Compute all needed delta information and clear the delta bits
+	 * from the encrypted key.
+	 */
+	delta = mlxsw_sp_acl_erp_delta(aentry->erp_mask);
+	aentry->delta_info.start = mlxsw_sp_acl_erp_delta_start(delta);
+	aentry->delta_info.mask = mlxsw_sp_acl_erp_delta_mask(delta);
+	aentry->delta_info.value =
+		mlxsw_sp_acl_erp_delta_value(delta,
+					     aentry->ht_key.full_enc_key);
+	mlxsw_sp_acl_erp_delta_clear(delta, aentry->enc_key);
+
+	/* Add rule to the list of A-TCAM rules, assuming this
+	 * rule is intended to A-TCAM. In case this rule does
+	 * not fit into A-TCAM it will be removed from the list.
+	 */
+	list_add(&aentry->list, &aregion->entries_list);
 
 	/* We can't insert identical rules into the A-TCAM, so fail and
 	 * let the rule spill into C-TCAM
@@ -461,6 +518,13 @@
 	if (err)
 		goto err_rhashtable_insert;
 
+	/* Bloom filter must be updated here, before inserting the rule into
+	 * the A-TCAM.
+	 */
+	err = mlxsw_sp_acl_erp_bf_insert(mlxsw_sp, aregion, erp_mask, aentry);
+	if (err)
+		goto err_bf_insert;
+
 	err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry,
 						     rulei);
 	if (err)
@@ -469,10 +533,13 @@
 	return 0;
 
 err_rule_insert:
+	mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, erp_mask, aentry);
+err_bf_insert:
 	rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
 			       mlxsw_sp_acl_atcam_entries_ht_params);
 err_rhashtable_insert:
-	mlxsw_sp_acl_erp_put(aregion, erp);
+	list_del(&aentry->list);
+	mlxsw_sp_acl_erp_mask_put(aregion, erp_mask);
 	return err;
 }
 
@@ -482,9 +549,21 @@
 			       struct mlxsw_sp_acl_atcam_entry *aentry)
 {
 	mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry);
+	mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, aentry->erp_mask, aentry);
 	rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
 			       mlxsw_sp_acl_atcam_entries_ht_params);
-	mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+	list_del(&aentry->list);
+	mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask);
+}
+
+static int
+__mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_acl_atcam_region *aregion,
+					  struct mlxsw_sp_acl_atcam_entry *aentry,
+					  struct mlxsw_sp_acl_rule_info *rulei)
+{
+	return mlxsw_sp_acl_atcam_region_entry_action_replace(mlxsw_sp, aregion,
+							      aentry, rulei);
 }
 
 int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
@@ -502,6 +581,7 @@
 	/* It is possible we failed to add the rule to the A-TCAM due to
 	 * exceeded number of masks. Try to spill into C-TCAM.
 	 */
+	trace_mlxsw_sp_acl_atcam_entry_add_ctcam_spill(mlxsw_sp, aregion);
 	err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion,
 					   &achunk->cchunk, &aentry->centry,
 					   rulei, true);
@@ -523,6 +603,27 @@
 		__mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, aregion, aentry);
 }
 
+int
+mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_acl_atcam_region *aregion,
+					struct mlxsw_sp_acl_atcam_entry *aentry,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	int err;
+
+	if (mlxsw_sp_acl_atcam_is_centry(aentry))
+		err = mlxsw_sp_acl_ctcam_entry_action_replace(mlxsw_sp,
+							      &aregion->cregion,
+							      &aentry->centry,
+							      rulei);
+	else
+		err = __mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp,
+								aregion, aentry,
+								rulei);
+
+	return err;
+}
+
 int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_atcam *atcam)
 {
@@ -534,3 +635,14 @@
 {
 	mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam);
 }
+
+void *
+mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+	return mlxsw_sp_acl_erp_rehash_hints_get(aregion);
+}
+
+void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv)
+{
+	mlxsw_sp_acl_erp_rehash_hints_put(hints_priv);
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
new file mode 100644
index 0000000..3a2de13
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c

@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
+
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp_acl_bf {
+	struct mutex lock; /* Protects Bloom Filter updates. */
+	unsigned int bank_size;
+	refcount_t refcnt[0];
+};
+
+/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
+ * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
+ * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
+ * depending on the actual number of key blocks used in the region.
+ * The layout of the Bloom filter key is as follows:
+ *
+ * +-------------------------+------------------------+------------------------+
+ * | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 |
+ * +-------------------------+------------------------+------------------------+
+ */
+#define MLXSW_BLOOM_KEY_CHUNKS 3
+#define MLXSW_BLOOM_KEY_LEN 69
+
+/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
+ * 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero
+ * padding.
+ * The layout of each chunk is as follows:
+ *
+ * +---------+----------------------+-----------------------------------+
+ * | 3 bytes |        2 bytes       |              18 bytes             |
+ * +---------+-----------+----------+-----------------------------------+
+ * | 183:158 |  157:148  | 147:144  |               143:0               |
+ * +---------+-----------+----------+-----------------------------------+
+ * |    0    | region ID |  eRP ID  |      4 Key blocks (18 Bytes)      |
+ * +---------+-----------+----------+-----------------------------------+
+ */
+#define MLXSW_BLOOM_CHUNK_PAD_BYTES 3
+#define MLXSW_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_BLOOM_KEY_CHUNK_BYTES 23
+
+/* The offset of the key block within a chunk is 5 bytes as it comes after
+ * 3 bytes of zero padding and 16 bits of region ID and eRP ID.
+ */
+#define MLXSW_BLOOM_CHUNK_KEY_OFFSET 5
+
+/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
+ * and we need to populate it with 4 key blocks copied from the entry encoded
+ * key. Since the encoded key contains a padding, key block 11 starts at offset
+ * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
+ * 18 bytes.
+ * This array defines key offsets for easy access when copying key blocks from
+ * entry key to Bloom filter chunk.
+ */
+static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
+
+/* This table is just the CRC of each possible byte. It is
+ * computed, Msbit first, for the Bloom filter polynomial
+ * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
+ * the implicit x^16).
+ */
+static const u16 mlxsw_sp_acl_bf_crc_tab[256] = {
+0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d,
+0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a,
+0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a,
+0x5290, 0xd7b9, 0xddeb, 0x58c2, 0xc94f, 0x4c66, 0x4634, 0xc31d,
+0x4527, 0xc00e, 0xca5c, 0x4f75, 0xdef8, 0x5bd1, 0x5183, 0xd4aa,
+0xf7b0, 0x7299, 0x78cb, 0xfde2, 0x6c6f, 0xe946, 0xe314, 0x663d,
+0xa520, 0x2009, 0x2a5b, 0xaf72, 0x3eff, 0xbbd6, 0xb184, 0x34ad,
+0x17b7, 0x929e, 0x98cc, 0x1de5, 0x8c68, 0x0941, 0x0313, 0x863a,
+0x8a4e, 0x0f67, 0x0535, 0x801c, 0x1191, 0x94b8, 0x9eea, 0x1bc3,
+0x38d9, 0xbdf0, 0xb7a2, 0x328b, 0xa306, 0x262f, 0x2c7d, 0xa954,
+0x6a49, 0xef60, 0xe532, 0x601b, 0xf196, 0x74bf, 0x7eed, 0xfbc4,
+0xd8de, 0x5df7, 0x57a5, 0xd28c, 0x4301, 0xc628, 0xcc7a, 0x4953,
+0xcf69, 0x4a40, 0x4012, 0xc53b, 0x54b6, 0xd19f, 0xdbcd, 0x5ee4,
+0x7dfe, 0xf8d7, 0xf285, 0x77ac, 0xe621, 0x6308, 0x695a, 0xec73,
+0x2f6e, 0xaa47, 0xa015, 0x253c, 0xb4b1, 0x3198, 0x3bca, 0xbee3,
+0x9df9, 0x18d0, 0x1282, 0x97ab, 0x0626, 0x830f, 0x895d, 0x0c74,
+0x91b5, 0x149c, 0x1ece, 0x9be7, 0x0a6a, 0x8f43, 0x8511, 0x0038,
+0x2322, 0xa60b, 0xac59, 0x2970, 0xb8fd, 0x3dd4, 0x3786, 0xb2af,
+0x71b2, 0xf49b, 0xfec9, 0x7be0, 0xea6d, 0x6f44, 0x6516, 0xe03f,
+0xc325, 0x460c, 0x4c5e, 0xc977, 0x58fa, 0xddd3, 0xd781, 0x52a8,
+0xd492, 0x51bb, 0x5be9, 0xdec0, 0x4f4d, 0xca64, 0xc036, 0x451f,
+0x6605, 0xe32c, 0xe97e, 0x6c57, 0xfdda, 0x78f3, 0x72a1, 0xf788,
+0x3495, 0xb1bc, 0xbbee, 0x3ec7, 0xaf4a, 0x2a63, 0x2031, 0xa518,
+0x8602, 0x032b, 0x0979, 0x8c50, 0x1ddd, 0x98f4, 0x92a6, 0x178f,
+0x1bfb, 0x9ed2, 0x9480, 0x11a9, 0x8024, 0x050d, 0x0f5f, 0x8a76,
+0xa96c, 0x2c45, 0x2617, 0xa33e, 0x32b3, 0xb79a, 0xbdc8, 0x38e1,
+0xfbfc, 0x7ed5, 0x7487, 0xf1ae, 0x6023, 0xe50a, 0xef58, 0x6a71,
+0x496b, 0xcc42, 0xc610, 0x4339, 0xd2b4, 0x579d, 0x5dcf, 0xd8e6,
+0x5edc, 0xdbf5, 0xd1a7, 0x548e, 0xc503, 0x402a, 0x4a78, 0xcf51,
+0xec4b, 0x6962, 0x6330, 0xe619, 0x7794, 0xf2bd, 0xf8ef, 0x7dc6,
+0xbedb, 0x3bf2, 0x31a0, 0xb489, 0x2504, 0xa02d, 0xaa7f, 0x2f56,
+0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
+};
+
+static u16 mlxsw_sp_acl_bf_crc_byte(u16 crc, u8 c)
+{
+	return (crc << 8) ^ mlxsw_sp_acl_bf_crc_tab[(crc >> 8) ^ c];
+}
+
+static u16 mlxsw_sp_acl_bf_crc(const u8 *buffer, size_t len)
+{
+	u16 crc = 0;
+
+	while (len--)
+		crc = mlxsw_sp_acl_bf_crc_byte(crc, *buffer++);
+	return crc;
+}
+
+static void
+mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+			   struct mlxsw_sp_acl_atcam_entry *aentry,
+			   char *output, u8 *len)
+{
+	struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
+	u8 chunk_index, chunk_count, block_count;
+	char *chunk = output;
+	__be16 erp_region_id;
+
+	block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+	chunk_count = 1 + ((block_count - 1) >> 2);
+	erp_region_id = cpu_to_be16(aentry->ht_key.erp_id |
+				   (aregion->region->id << 4));
+	for (chunk_index = MLXSW_BLOOM_KEY_CHUNKS - chunk_count;
+	     chunk_index < MLXSW_BLOOM_KEY_CHUNKS; chunk_index++) {
+		memset(chunk, 0, MLXSW_BLOOM_CHUNK_PAD_BYTES);
+		memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id,
+		       sizeof(erp_region_id));
+		memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET,
+		       &aentry->enc_key[chunk_key_offsets[chunk_index]],
+		       MLXSW_BLOOM_CHUNK_KEY_BYTES);
+		chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES;
+	}
+	*len = chunk_count * MLXSW_BLOOM_KEY_CHUNK_BYTES;
+}
+
+static unsigned int
+mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
+				     unsigned int erp_bank,
+				     unsigned int bf_index)
+{
+	return erp_bank * bf->bank_size + bf_index;
+}
+
+static unsigned int
+mlxsw_sp_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+			  struct mlxsw_sp_acl_atcam_region *aregion,
+			  struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	char bf_key[MLXSW_BLOOM_KEY_LEN];
+	u8 bf_size;
+
+	mlxsw_sp_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+	return mlxsw_sp_acl_bf_crc(bf_key, bf_size);
+}
+
+int
+mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_acl_bf *bf,
+			  struct mlxsw_sp_acl_atcam_region *aregion,
+			  unsigned int erp_bank,
+			  struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	unsigned int rule_index;
+	char *peabfe_pl;
+	u16 bf_index;
+	int err;
+
+	mutex_lock(&bf->lock);
+
+	bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+	rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
+							  bf_index);
+
+	if (refcount_inc_not_zero(&bf->refcnt[rule_index])) {
+		err = 0;
+		goto unlock;
+	}
+
+	peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
+	if (!peabfe_pl) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	mlxsw_reg_peabfe_pack(peabfe_pl);
+	mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
+	kfree(peabfe_pl);
+	if (err)
+		goto unlock;
+
+	refcount_set(&bf->refcnt[rule_index], 1);
+	err = 0;
+
+unlock:
+	mutex_unlock(&bf->lock);
+	return err;
+}
+
+void
+mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_acl_bf *bf,
+			  struct mlxsw_sp_acl_atcam_region *aregion,
+			  unsigned int erp_bank,
+			  struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	unsigned int rule_index;
+	char *peabfe_pl;
+	u16 bf_index;
+
+	mutex_lock(&bf->lock);
+
+	bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
+	rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
+							  bf_index);
+
+	if (refcount_dec_and_test(&bf->refcnt[rule_index])) {
+		peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
+		if (!peabfe_pl)
+			goto unlock;
+
+		mlxsw_reg_peabfe_pack(peabfe_pl);
+		mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
+		kfree(peabfe_pl);
+	}
+
+unlock:
+	mutex_unlock(&bf->lock);
+}
+
+struct mlxsw_sp_acl_bf *
+mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
+{
+	struct mlxsw_sp_acl_bf *bf;
+	unsigned int bf_bank_size;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_BF_LOG))
+		return ERR_PTR(-EIO);
+
+	/* Bloom filter size per erp_table_bank
+	 * is 2^ACL_MAX_BF_LOG
+	 */
+	bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG);
+	bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks),
+		     GFP_KERNEL);
+	if (!bf)
+		return ERR_PTR(-ENOMEM);
+
+	bf->bank_size = bf_bank_size;
+	mutex_init(&bf->lock);
+
+	return bf;
+}
+
+void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
+{
+	mutex_destroy(&bf->lock);
+	kfree(bf);
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
index e3c6fe8..05680a7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c

@@ -46,7 +46,6 @@
 	struct mlxsw_sp_acl_tcam_region *region = cregion->region;
 	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
 	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-	unsigned int blocks_count;
 	char *act_set;
 	u32 priority;
 	char *mask;
@@ -63,9 +62,7 @@
 			     centry->parman_item.index, priority);
 	key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
 	mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
-	blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
-	mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask, 0,
-			 blocks_count - 1);
+	mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask);
 
 	err = cregion->ops->entry_insert(cregion, centry, mask);
 	if (err)
@@ -75,7 +72,15 @@
 	act_set = mlxsw_afa_block_first_set(rulei->act_block);
 	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
 
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+	if (err)
+		goto err_ptce2_write;
+
+	return 0;
+
+err_ptce2_write:
+	cregion->ops->entry_remove(cregion, centry);
+	return err;
 }
 
 static void
@@ -92,6 +97,27 @@
 	cregion->ops->entry_remove(cregion, centry);
 }
 
+static int
+mlxsw_sp_acl_ctcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					       struct mlxsw_sp_acl_ctcam_region *cregion,
+					       struct mlxsw_sp_acl_ctcam_entry *centry,
+					       struct mlxsw_afa_block *afa_block,
+					       unsigned int priority)
+{
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+	char *act_set;
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_UPDATE,
+			     cregion->region->tcam_region_info,
+			     centry->parman_item.index, priority);
+
+	act_set = mlxsw_afa_block_first_set(afa_block);
+	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+
 static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv,
 						   unsigned long new_count)
 {
@@ -194,3 +220,14 @@
 	parman_item_remove(cregion->parman, &cchunk->parman_prio,
 			   &centry->parman_item);
 }
+
+int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					    struct mlxsw_sp_acl_ctcam_region *cregion,
+					    struct mlxsw_sp_acl_ctcam_entry *centry,
+					    struct mlxsw_sp_acl_rule_info *rulei)
+{
+	return mlxsw_sp_acl_ctcam_region_entry_action_replace(mlxsw_sp, cregion,
+							      centry,
+							      rulei->act_block,
+							      rulei->priority);
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
index 0a4fd3c..4c98950 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c

@@ -7,7 +7,8 @@
 #include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
-#include <linux/rhashtable.h>
+#include <linux/mutex.h>
+#include <linux/objagg.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
@@ -24,11 +25,14 @@
 	unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1];
 	struct gen_pool *erp_tables;
 	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_acl_bf *bf;
 	unsigned int num_erp_banks;
 };
 
 struct mlxsw_sp_acl_erp_key {
 	char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN];
+#define __MASK_LEN 0x38
+#define __MASK_IDX(i) (__MASK_LEN - (i) - 1)
 	bool ctcam;
 };
 
@@ -36,10 +40,8 @@
 	struct mlxsw_sp_acl_erp_key key;
 	u8 id;
 	u8 index;
-	refcount_t refcnt;
 	DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
 	struct list_head list;
-	struct rhash_head ht_node;
 	struct mlxsw_sp_acl_erp_table *erp_table;
 };
 
@@ -53,7 +55,6 @@
 	DECLARE_BITMAP(erp_id_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
 	DECLARE_BITMAP(erp_index_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
 	struct list_head atcam_erps_list;
-	struct rhashtable erp_ht;
 	struct mlxsw_sp_acl_erp_core *erp_core;
 	struct mlxsw_sp_acl_atcam_region *aregion;
 	const struct mlxsw_sp_acl_erp_table_ops *ops;
@@ -61,12 +62,9 @@
 	unsigned int num_atcam_erps;
 	unsigned int num_max_atcam_erps;
 	unsigned int num_ctcam_erps;
-};
-
-static const struct rhashtable_params mlxsw_sp_acl_erp_ht_params = {
-	.key_len = sizeof(struct mlxsw_sp_acl_erp_key),
-	.key_offset = offsetof(struct mlxsw_sp_acl_erp, key),
-	.head_offset = offsetof(struct mlxsw_sp_acl_erp, ht_node),
+	unsigned int num_deltas;
+	struct objagg *objagg;
+	struct mutex objagg_lock; /* guards objagg manipulation */
 };
 
 struct mlxsw_sp_acl_erp_table_ops {
@@ -119,14 +117,17 @@
 	.erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy,
 };
 
-bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp)
+static bool
+mlxsw_sp_acl_erp_table_is_used(const struct mlxsw_sp_acl_erp_table *erp_table)
 {
-	return erp->key.ctcam;
+	return erp_table->ops != &erp_single_mask_ops &&
+	       erp_table->ops != &erp_no_mask_ops;
 }
 
-u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp)
+static unsigned int
+mlxsw_sp_acl_erp_bank_get(const struct mlxsw_sp_acl_erp *erp)
 {
-	return erp->id;
+	return erp->index % erp->erp_table->erp_core->num_erp_banks;
 }
 
 static unsigned int
@@ -194,12 +195,15 @@
 
 static int
 mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table,
-				 const struct mlxsw_sp_acl_erp *erp)
+				 struct mlxsw_sp_acl_erp_key *key)
 {
+	DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
 	unsigned long bit;
 	int err;
 
-	for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+	bitmap_from_arr32(mask_bitmap, (u32 *) key->mask,
+			  MLXSW_SP_ACL_TCAM_MASK_LEN);
+	for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
 		mlxsw_sp_acl_erp_master_mask_bit_set(bit,
 						     &erp_table->master_mask);
 
@@ -210,7 +214,7 @@
 	return 0;
 
 err_master_mask_update:
-	for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+	for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
 		mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
 						       &erp_table->master_mask);
 	return err;
@@ -218,12 +222,15 @@
 
 static int
 mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table,
-				   const struct mlxsw_sp_acl_erp *erp)
+				   struct mlxsw_sp_acl_erp_key *key)
 {
+	DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
 	unsigned long bit;
 	int err;
 
-	for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+	bitmap_from_arr32(mask_bitmap, (u32 *) key->mask,
+			  MLXSW_SP_ACL_TCAM_MASK_LEN);
+	for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
 		mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
 						       &erp_table->master_mask);
 
@@ -234,7 +241,7 @@
 	return 0;
 
 err_master_mask_update:
-	for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+	for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
 		mlxsw_sp_acl_erp_master_mask_bit_set(bit,
 						     &erp_table->master_mask);
 	return err;
@@ -256,26 +263,16 @@
 		goto err_erp_id_get;
 
 	memcpy(&erp->key, key, sizeof(*key));
-	bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
-			  MLXSW_SP_ACL_TCAM_MASK_LEN);
 	list_add(&erp->list, &erp_table->atcam_erps_list);
-	refcount_set(&erp->refcnt, 1);
 	erp_table->num_atcam_erps++;
 	erp->erp_table = erp_table;
 
-	err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+	err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key);
 	if (err)
 		goto err_master_mask_set;
 
-	err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
-				     mlxsw_sp_acl_erp_ht_params);
-	if (err)
-		goto err_rhashtable_insert;
-
 	return erp;
 
-err_rhashtable_insert:
-	mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
 err_master_mask_set:
 	erp_table->num_atcam_erps--;
 	list_del(&erp->list);
@@ -290,9 +287,7 @@
 {
 	struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
 
-	rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
-			       mlxsw_sp_acl_erp_ht_params);
-	mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+	mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key);
 	erp_table->num_atcam_erps--;
 	list_del(&erp->list);
 	mlxsw_sp_acl_erp_id_put(erp_table, erp->id);
@@ -525,6 +520,48 @@
 }
 
 static int
+mlxsw_acl_erp_table_bf_add(struct mlxsw_sp_acl_erp_table *erp_table,
+			   struct mlxsw_sp_acl_erp *erp)
+{
+	struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+	unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+	struct mlxsw_sp_acl_atcam_entry *aentry;
+	int err;
+
+	list_for_each_entry(aentry, &aregion->entries_list, list) {
+		err = mlxsw_sp_acl_bf_entry_add(aregion->region->mlxsw_sp,
+						erp_table->erp_core->bf,
+						aregion, erp_bank, aentry);
+		if (err)
+			goto bf_entry_add_err;
+	}
+
+	return 0;
+
+bf_entry_add_err:
+	list_for_each_entry_continue_reverse(aentry, &aregion->entries_list,
+					     list)
+		mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp,
+					  erp_table->erp_core->bf,
+					  aregion, erp_bank, aentry);
+	return err;
+}
+
+static void
+mlxsw_acl_erp_table_bf_del(struct mlxsw_sp_acl_erp_table *erp_table,
+			   struct mlxsw_sp_acl_erp *erp)
+{
+	struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+	unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+	struct mlxsw_sp_acl_atcam_entry *aentry;
+
+	list_for_each_entry_reverse(aentry, &aregion->entries_list, list)
+		mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp,
+					  erp_table->erp_core->bf,
+					  aregion, erp_bank, aentry);
+}
+
+static int
 mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
 {
 	struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
@@ -548,16 +585,24 @@
 		goto err_table_master_rp;
 	}
 
-	/* Maintain the same eRP bank for the master RP, so that we
-	 * wouldn't need to update the bloom filter
+	/* Make sure the master RP is using a valid index, as
+	 * only a single eRP row is currently allocated.
 	 */
-	master_rp->index = master_rp->index % erp_core->num_erp_banks;
+	master_rp->index = 0;
 	__set_bit(master_rp->index, erp_table->erp_index_bitmap);
 
 	err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp);
 	if (err)
 		goto err_table_master_rp_add;
 
+	/* Update Bloom filter before enabling eRP table, as rules
+	 * on the master RP were not set to Bloom filter up to this
+	 * point.
+	 */
+	err = mlxsw_acl_erp_table_bf_add(erp_table, master_rp);
+	if (err)
+		goto err_table_bf_add;
+
 	err = mlxsw_sp_acl_erp_table_enable(erp_table, false);
 	if (err)
 		goto err_table_enable;
@@ -565,6 +610,8 @@
 	return 0;
 
 err_table_enable:
+	mlxsw_acl_erp_table_bf_del(erp_table, master_rp);
+err_table_bf_add:
 	mlxsw_sp_acl_erp_table_erp_del(master_rp);
 err_table_master_rp_add:
 	__clear_bit(master_rp->index, erp_table->erp_index_bitmap);
@@ -585,6 +632,7 @@
 	master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
 	if (!master_rp)
 		return;
+	mlxsw_acl_erp_table_bf_del(erp_table, master_rp);
 	mlxsw_sp_acl_erp_table_erp_del(master_rp);
 	__clear_bit(master_rp->index, erp_table->erp_index_bitmap);
 	mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
@@ -647,9 +695,55 @@
 	mlxsw_sp_acl_erp_table_enable(erp_table, false);
 }
 
-static void
-mlxsw_sp_acl_erp_ctcam_table_ops_set(struct mlxsw_sp_acl_erp_table *erp_table)
+static int
+__mlxsw_sp_acl_erp_table_other_inc(struct mlxsw_sp_acl_erp_table *erp_table,
+				   unsigned int *inc_num)
 {
+	int err;
+
+	/* If there are C-TCAM eRP or deltas in use we need to transition
+	 * the region to use eRP table, if it is not already done
+	 */
+	if (!mlxsw_sp_acl_erp_table_is_used(erp_table)) {
+		err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
+		if (err)
+			return err;
+	}
+
+	/* When C-TCAM or deltas are used, the eRP table must be used */
+	if (erp_table->ops != &erp_multiple_masks_ops)
+		erp_table->ops = &erp_multiple_masks_ops;
+
+	(*inc_num)++;
+
+	return 0;
+}
+
+static int mlxsw_sp_acl_erp_ctcam_inc(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+	return __mlxsw_sp_acl_erp_table_other_inc(erp_table,
+						  &erp_table->num_ctcam_erps);
+}
+
+static int mlxsw_sp_acl_erp_delta_inc(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+	return __mlxsw_sp_acl_erp_table_other_inc(erp_table,
+						  &erp_table->num_deltas);
+}
+
+static void
+__mlxsw_sp_acl_erp_table_other_dec(struct mlxsw_sp_acl_erp_table *erp_table,
+				   unsigned int *dec_num)
+{
+	(*dec_num)--;
+
+	/* If there are no C-TCAM eRP or deltas in use, the state we
+	 * transition to depends on the number of A-TCAM eRPs currently
+	 * in use.
+	 */
+	if (erp_table->num_ctcam_erps > 0 || erp_table->num_deltas > 0)
+		return;
+
 	switch (erp_table->num_atcam_erps) {
 	case 2:
 		/* Keep using the eRP table, but correctly set the
@@ -683,9 +777,21 @@
 	}
 }
 
+static void mlxsw_sp_acl_erp_ctcam_dec(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+	__mlxsw_sp_acl_erp_table_other_dec(erp_table,
+					   &erp_table->num_ctcam_erps);
+}
+
+static void mlxsw_sp_acl_erp_delta_dec(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+	__mlxsw_sp_acl_erp_table_other_dec(erp_table,
+					   &erp_table->num_deltas);
+}
+
 static struct mlxsw_sp_acl_erp *
-__mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
-				     struct mlxsw_sp_acl_erp_key *key)
+mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+				   struct mlxsw_sp_acl_erp_key *key)
 {
 	struct mlxsw_sp_acl_erp *erp;
 	int err;
@@ -697,89 +803,41 @@
 	memcpy(&erp->key, key, sizeof(*key));
 	bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
 			  MLXSW_SP_ACL_TCAM_MASK_LEN);
-	refcount_set(&erp->refcnt, 1);
-	erp_table->num_ctcam_erps++;
+
+	err = mlxsw_sp_acl_erp_ctcam_inc(erp_table);
+	if (err)
+		goto err_erp_ctcam_inc;
+
 	erp->erp_table = erp_table;
 
-	err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+	err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key);
 	if (err)
 		goto err_master_mask_set;
 
-	err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
-				     mlxsw_sp_acl_erp_ht_params);
-	if (err)
-		goto err_rhashtable_insert;
-
 	err = mlxsw_sp_acl_erp_region_ctcam_enable(erp_table);
 	if (err)
 		goto err_erp_region_ctcam_enable;
 
-	/* When C-TCAM is used, the eRP table must be used */
-	erp_table->ops = &erp_multiple_masks_ops;
-
 	return erp;
 
 err_erp_region_ctcam_enable:
-	rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
-			       mlxsw_sp_acl_erp_ht_params);
-err_rhashtable_insert:
-	mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+	mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key);
 err_master_mask_set:
-	erp_table->num_ctcam_erps--;
+	mlxsw_sp_acl_erp_ctcam_dec(erp_table);
+err_erp_ctcam_inc:
 	kfree(erp);
 	return ERR_PTR(err);
 }
 
-static struct mlxsw_sp_acl_erp *
-mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
-				   struct mlxsw_sp_acl_erp_key *key)
-{
-	struct mlxsw_sp_acl_erp *erp;
-	int err;
-
-	/* There is a special situation where we need to spill rules
-	 * into the C-TCAM, yet the region is still using a master
-	 * mask and thus not performing a lookup in the C-TCAM. This
-	 * can happen when two rules that only differ in priority - and
-	 * thus sharing the same key - are programmed. In this case
-	 * we transition the region to use an eRP table
-	 */
-	err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
-	if (err)
-		return ERR_PTR(err);
-
-	erp = __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
-	if (IS_ERR(erp)) {
-		err = PTR_ERR(erp);
-		goto err_erp_create;
-	}
-
-	return erp;
-
-err_erp_create:
-	mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
-	return ERR_PTR(err);
-}
-
 static void
 mlxsw_sp_acl_erp_ctcam_mask_destroy(struct mlxsw_sp_acl_erp *erp)
 {
 	struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
 
 	mlxsw_sp_acl_erp_region_ctcam_disable(erp_table);
-	rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
-			       mlxsw_sp_acl_erp_ht_params);
-	mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
-	erp_table->num_ctcam_erps--;
+	mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key);
+	mlxsw_sp_acl_erp_ctcam_dec(erp_table);
 	kfree(erp);
-
-	/* Once the last C-TCAM eRP was destroyed, the state we
-	 * transition to depends on the number of A-TCAM eRPs currently
-	 * in use
-	 */
-	if (erp_table->num_ctcam_erps > 0)
-		return;
-	mlxsw_sp_acl_erp_ctcam_table_ops_set(erp_table);
 }
 
 static struct mlxsw_sp_acl_erp *
@@ -790,7 +848,7 @@
 	int err;
 
 	if (key->ctcam)
-		return __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+		return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
 
 	/* Expand the eRP table for the new eRP, if needed */
 	err = mlxsw_sp_acl_erp_table_expand(erp_table);
@@ -838,7 +896,8 @@
 	mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
 	mlxsw_sp_acl_erp_generic_destroy(erp);
 
-	if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0)
+	if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0 &&
+	    erp_table->num_deltas == 0)
 		erp_table->ops = &erp_two_masks_ops;
 }
 
@@ -940,46 +999,323 @@
 	WARN_ON(1);
 }
 
-struct mlxsw_sp_acl_erp *
-mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
-		     const char *mask, bool ctcam)
+struct mlxsw_sp_acl_erp_mask *
+mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion,
+			  const char *mask, bool ctcam)
 {
 	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
 	struct mlxsw_sp_acl_erp_key key;
-	struct mlxsw_sp_acl_erp *erp;
-
-	/* eRPs are allocated from a shared resource, but currently all
-	 * allocations are done under RTNL.
-	 */
-	ASSERT_RTNL();
+	struct objagg_obj *objagg_obj;
 
 	memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
 	key.ctcam = ctcam;
-	erp = rhashtable_lookup_fast(&erp_table->erp_ht, &key,
-				     mlxsw_sp_acl_erp_ht_params);
-	if (erp) {
-		refcount_inc(&erp->refcnt);
-		return erp;
-	}
-
-	return erp_table->ops->erp_create(erp_table, &key);
+	mutex_lock(&erp_table->objagg_lock);
+	objagg_obj = objagg_obj_get(erp_table->objagg, &key);
+	mutex_unlock(&erp_table->objagg_lock);
+	if (IS_ERR(objagg_obj))
+		return ERR_CAST(objagg_obj);
+	return (struct mlxsw_sp_acl_erp_mask *) objagg_obj;
 }
 
-void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
-			  struct mlxsw_sp_acl_erp *erp)
+void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion,
+			       struct mlxsw_sp_acl_erp_mask *erp_mask)
 {
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
 	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
 
-	ASSERT_RTNL();
-
-	if (!refcount_dec_and_test(&erp->refcnt))
-		return;
-
-	erp_table->ops->erp_destroy(erp_table, erp);
+	mutex_lock(&erp_table->objagg_lock);
+	objagg_obj_put(erp_table->objagg, objagg_obj);
+	mutex_unlock(&erp_table->objagg_lock);
 }
 
+int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_acl_atcam_region *aregion,
+			       struct mlxsw_sp_acl_erp_mask *erp_mask,
+			       struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+	const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
+	unsigned int erp_bank;
+
+	if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table))
+		return 0;
+
+	erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+	return mlxsw_sp_acl_bf_entry_add(mlxsw_sp,
+					erp->erp_table->erp_core->bf,
+					aregion, erp_bank, aentry);
+}
+
+void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_acl_atcam_region *aregion,
+				struct mlxsw_sp_acl_erp_mask *erp_mask,
+				struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+	const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
+	unsigned int erp_bank;
+
+	if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table))
+		return;
+
+	erp_bank = mlxsw_sp_acl_erp_bank_get(erp);
+	mlxsw_sp_acl_bf_entry_del(mlxsw_sp,
+				  erp->erp_table->erp_core->bf,
+				  aregion, erp_bank, aentry);
+}
+
+bool
+mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask)
+{
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+	const struct mlxsw_sp_acl_erp_key *key = objagg_obj_raw(objagg_obj);
+
+	return key->ctcam;
+}
+
+u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask)
+{
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+	const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
+
+	return erp->id;
+}
+
+struct mlxsw_sp_acl_erp_delta {
+	struct mlxsw_sp_acl_erp_key key;
+	u16 start;
+	u8 mask;
+};
+
+u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta)
+{
+	return delta->start;
+}
+
+u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta)
+{
+	return delta->mask;
+}
+
+u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta,
+				const char *enc_key)
+{
+	u16 start = delta->start;
+	u8 mask = delta->mask;
+	u16 tmp;
+
+	if (!mask)
+		return 0;
+
+	tmp = (unsigned char) enc_key[__MASK_IDX(start / 8)];
+	if (start / 8 + 1 < __MASK_LEN)
+		tmp |= (unsigned char) enc_key[__MASK_IDX(start / 8 + 1)] << 8;
+	tmp >>= start % 8;
+	tmp &= mask;
+	return tmp;
+}
+
+void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta,
+				  const char *enc_key)
+{
+	u16 start = delta->start;
+	u8 mask = delta->mask;
+	unsigned char *byte;
+	u16 tmp;
+
+	tmp = mask;
+	tmp <<= start % 8;
+	tmp = ~tmp;
+
+	byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8)];
+	*byte &= tmp & 0xff;
+	if (start / 8 + 1 < __MASK_LEN) {
+		byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8 + 1)];
+		*byte &= (tmp >> 8) & 0xff;
+	}
+}
+
+static const struct mlxsw_sp_acl_erp_delta
+mlxsw_sp_acl_erp_delta_default = {};
+
+const struct mlxsw_sp_acl_erp_delta *
+mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask)
+{
+	struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
+	const struct mlxsw_sp_acl_erp_delta *delta;
+
+	delta = objagg_obj_delta_priv(objagg_obj);
+	if (!delta)
+		delta = &mlxsw_sp_acl_erp_delta_default;
+	return delta;
+}
+
+static int
+mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key,
+			    const struct mlxsw_sp_acl_erp_key *key,
+			    u16 *delta_start, u8 *delta_mask)
+{
+	int offset = 0;
+	int si = -1;
+	u16 pmask;
+	u16 mask;
+	int i;
+
+	/* The difference between 2 masks can be up to 8 consecutive bits. */
+	for (i = 0; i < __MASK_LEN; i++) {
+		if (parent_key->mask[__MASK_IDX(i)] == key->mask[__MASK_IDX(i)])
+			continue;
+		if (si == -1)
+			si = i;
+		else if (si != i - 1)
+			return -EINVAL;
+	}
+	if (si == -1) {
+		/* The masks are the same, this can happen in case eRPs with
+		 * the same mask were created in both A-TCAM and C-TCAM.
+		 * The only possible condition under which this can happen
+		 * is identical rule insertion. Delta is not possible here.
+		 */
+		return -EINVAL;
+	}
+	pmask = (unsigned char) parent_key->mask[__MASK_IDX(si)];
+	mask = (unsigned char) key->mask[__MASK_IDX(si)];
+	if (si + 1 < __MASK_LEN) {
+		pmask |= (unsigned char) parent_key->mask[__MASK_IDX(si + 1)] << 8;
+		mask |= (unsigned char) key->mask[__MASK_IDX(si + 1)] << 8;
+	}
+
+	if ((pmask ^ mask) & pmask)
+		return -EINVAL;
+	mask &= ~pmask;
+	while (!(mask & (1 << offset)))
+		offset++;
+	while (!(mask & 1))
+		mask >>= 1;
+	if (mask & 0xff00)
+		return -EINVAL;
+
+	*delta_start = si * 8 + offset;
+	*delta_mask = mask;
+
+	return 0;
+}
+
+static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj,
+					 const void *obj)
+{
+	const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj;
+	const struct mlxsw_sp_acl_erp_key *key = obj;
+	u16 delta_start;
+	u8 delta_mask;
+	int err;
+
+	err = mlxsw_sp_acl_erp_delta_fill(parent_key, key,
+					  &delta_start, &delta_mask);
+	return err ? false : true;
+}
+
+static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2)
+{
+	const struct mlxsw_sp_acl_erp_key *key1 = obj1;
+	const struct mlxsw_sp_acl_erp_key *key2 = obj2;
+
+	/* For hints purposes, two objects are considered equal
+	 * in case the masks are the same. Does not matter what
+	 * the "ctcam" value is.
+	 */
+	return memcmp(key1->mask, key2->mask, sizeof(key1->mask));
+}
+
+static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj,
+					   void *obj)
+{
+	struct mlxsw_sp_acl_erp_key *parent_key = parent_obj;
+	struct mlxsw_sp_acl_atcam_region *aregion = priv;
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+	struct mlxsw_sp_acl_erp_key *key = obj;
+	struct mlxsw_sp_acl_erp_delta *delta;
+	u16 delta_start;
+	u8 delta_mask;
+	int err;
+
+	if (parent_key->ctcam || key->ctcam)
+		return ERR_PTR(-EINVAL);
+	err = mlxsw_sp_acl_erp_delta_fill(parent_key, key,
+					  &delta_start, &delta_mask);
+	if (err)
+		return ERR_PTR(-EINVAL);
+
+	delta = kzalloc(sizeof(*delta), GFP_KERNEL);
+	if (!delta)
+		return ERR_PTR(-ENOMEM);
+	delta->start = delta_start;
+	delta->mask = delta_mask;
+
+	err = mlxsw_sp_acl_erp_delta_inc(erp_table);
+	if (err)
+		goto err_erp_delta_inc;
+
+	memcpy(&delta->key, key, sizeof(*key));
+	err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &delta->key);
+	if (err)
+		goto err_master_mask_set;
+
+	return delta;
+
+err_master_mask_set:
+	mlxsw_sp_acl_erp_delta_dec(erp_table);
+err_erp_delta_inc:
+	kfree(delta);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv)
+{
+	struct mlxsw_sp_acl_erp_delta *delta = delta_priv;
+	struct mlxsw_sp_acl_atcam_region *aregion = priv;
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+
+	mlxsw_sp_acl_erp_master_mask_clear(erp_table, &delta->key);
+	mlxsw_sp_acl_erp_delta_dec(erp_table);
+	kfree(delta);
+}
+
+static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj,
+					  unsigned int root_id)
+{
+	struct mlxsw_sp_acl_atcam_region *aregion = priv;
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+	struct mlxsw_sp_acl_erp_key *key = obj;
+
+	if (!key->ctcam &&
+	    root_id != OBJAGG_OBJ_ROOT_ID_INVALID &&
+	    root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION)
+		return ERR_PTR(-ENOBUFS);
+	return erp_table->ops->erp_create(erp_table, key);
+}
+
+static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv)
+{
+	struct mlxsw_sp_acl_atcam_region *aregion = priv;
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+
+	erp_table->ops->erp_destroy(erp_table, root_priv);
+}
+
+static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = {
+	.obj_size = sizeof(struct mlxsw_sp_acl_erp_key),
+	.delta_check = mlxsw_sp_acl_erp_delta_check,
+	.hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp,
+	.delta_create = mlxsw_sp_acl_erp_delta_create,
+	.delta_destroy = mlxsw_sp_acl_erp_delta_destroy,
+	.root_create = mlxsw_sp_acl_erp_root_create,
+	.root_destroy = mlxsw_sp_acl_erp_root_destroy,
+};
+
 static struct mlxsw_sp_acl_erp_table *
-mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
+mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion,
+			      struct objagg_hints *hints)
 {
 	struct mlxsw_sp_acl_erp_table *erp_table;
 	int err;
@@ -988,18 +1324,22 @@
 	if (!erp_table)
 		return ERR_PTR(-ENOMEM);
 
-	err = rhashtable_init(&erp_table->erp_ht, &mlxsw_sp_acl_erp_ht_params);
-	if (err)
-		goto err_rhashtable_init;
+	erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops,
+					  hints, aregion);
+	if (IS_ERR(erp_table->objagg)) {
+		err = PTR_ERR(erp_table->objagg);
+		goto err_objagg_create;
+	}
 
 	erp_table->erp_core = aregion->atcam->erp_core;
 	erp_table->ops = &erp_no_mask_ops;
 	INIT_LIST_HEAD(&erp_table->atcam_erps_list);
 	erp_table->aregion = aregion;
+	mutex_init(&erp_table->objagg_lock);
 
 	return erp_table;
 
-err_rhashtable_init:
+err_objagg_create:
 	kfree(erp_table);
 	return ERR_PTR(err);
 }
@@ -1008,7 +1348,8 @@
 mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table)
 {
 	WARN_ON(!list_empty(&erp_table->atcam_erps_list));
-	rhashtable_destroy(&erp_table->erp_ht);
+	mutex_destroy(&erp_table->objagg_lock);
+	objagg_destroy(erp_table->objagg);
 	kfree(erp_table);
 }
 
@@ -1033,12 +1374,93 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
 }
 
-int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion)
+static int
+mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_acl_atcam_region *aregion,
+			     struct objagg_hints *hints, bool *p_rehash_needed)
 {
-	struct mlxsw_sp_acl_erp_table *erp_table;
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+	const struct objagg_stats *ostats;
+	const struct objagg_stats *hstats;
 	int err;
 
-	erp_table = mlxsw_sp_acl_erp_table_create(aregion);
+	*p_rehash_needed = false;
+
+	mutex_lock(&erp_table->objagg_lock);
+	ostats = objagg_stats_get(erp_table->objagg);
+	mutex_unlock(&erp_table->objagg_lock);
+	if (IS_ERR(ostats)) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n");
+		return PTR_ERR(ostats);
+	}
+
+	hstats = objagg_hints_stats_get(hints);
+	if (IS_ERR(hstats)) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n");
+		err = PTR_ERR(hstats);
+		goto err_hints_stats_get;
+	}
+
+	/* Very basic criterion for now. */
+	if (hstats->root_count < ostats->root_count)
+		*p_rehash_needed = true;
+
+	err = 0;
+
+	objagg_stats_put(hstats);
+err_hints_stats_get:
+	objagg_stats_put(ostats);
+	return err;
+}
+
+void *
+mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+	struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+	struct objagg_hints *hints;
+	bool rehash_needed;
+	int err;
+
+	mutex_lock(&erp_table->objagg_lock);
+	hints = objagg_hints_get(erp_table->objagg,
+				 OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
+	mutex_unlock(&erp_table->objagg_lock);
+	if (IS_ERR(hints)) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n");
+		return ERR_CAST(hints);
+	}
+	err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints,
+					   &rehash_needed);
+	if (err)
+		goto errout;
+
+	if (!rehash_needed) {
+		err = -EAGAIN;
+		goto errout;
+	}
+	return hints;
+
+errout:
+	objagg_hints_put(hints);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv)
+{
+	struct objagg_hints *hints = hints_priv;
+
+	objagg_hints_put(hints);
+}
+
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion,
+				 void *hints_priv)
+{
+	struct mlxsw_sp_acl_erp_table *erp_table;
+	struct objagg_hints *hints = hints_priv;
+	int err;
+
+	erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints);
 	if (IS_ERR(erp_table))
 		return PTR_ERR(erp_table);
 	aregion->erp_table = erp_table;
@@ -1118,6 +1540,12 @@
 	if (err)
 		goto err_gen_pool_add;
 
+	erp_core->bf = mlxsw_sp_acl_bf_init(mlxsw_sp, erp_core->num_erp_banks);
+	if (IS_ERR(erp_core->bf)) {
+		err = PTR_ERR(erp_core->bf);
+		goto err_bf_init;
+	}
+
 	/* Different regions require masks of different sizes */
 	err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core);
 	if (err)
@@ -1126,6 +1554,8 @@
 	return 0;
 
 err_erp_tables_sizes_query:
+	mlxsw_sp_acl_bf_fini(erp_core->bf);
+err_bf_init:
 err_gen_pool_add:
 	gen_pool_destroy(erp_core->erp_tables);
 	return err;
@@ -1134,6 +1564,7 @@
 static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_acl_erp_core *erp_core)
 {
+	mlxsw_sp_acl_bf_fini(erp_core->bf);
 	gen_pool_destroy(erp_core->erp_tables);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index d409b09..279c241 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c

@@ -12,7 +12,7 @@
 	MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
@@ -20,7 +20,7 @@
 	MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
@@ -32,13 +32,13 @@
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
 	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
@@ -98,8 +98,8 @@
 
 #define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16
 
-static void mlxsw_sp1_afk_encode_block(char *block, int block_index,
-				       char *output)
+static void mlxsw_sp1_afk_encode_block(char *output, int block_index,
+				       char *block)
 {
 	unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
 	char *output_indexed = output + offset;
@@ -107,10 +107,19 @@
 	memcpy(output_indexed, block, MLXSW_SP1_AFK_KEY_BLOCK_SIZE);
 }
 
+static void mlxsw_sp1_afk_clear_block(char *output, int block_index)
+{
+	unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
+	char *output_indexed = output + offset;
+
+	memset(output_indexed, 0, MLXSW_SP1_AFK_KEY_BLOCK_SIZE);
+}
+
 const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
 	.blocks		= mlxsw_sp1_afk_blocks,
 	.blocks_count	= ARRAY_SIZE(mlxsw_sp1_afk_blocks),
 	.encode_block	= mlxsw_sp1_afk_encode_block,
+	.clear_block	= mlxsw_sp1_afk_clear_block,
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = {
@@ -140,7 +149,7 @@
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = {
 	MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
-	MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */
+	MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = {
@@ -158,6 +167,11 @@
 	MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x04, 16, 8),
 };
 
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = {
+	MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_0_7, 0x04, 24, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_8_10, 0x00, 0, 3),
+};
+
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
 	MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x04, 4),
 };
@@ -201,6 +215,7 @@
 	MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0),
 	MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1),
 	MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2),
+	MLXSW_AFK_BLOCK(0x3C, mlxsw_sp_afk_element_info_ipv4_4),
 	MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0),
 	MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1),
 	MLXSW_AFK_BLOCK(0x42, mlxsw_sp_afk_element_info_ipv6_2),
@@ -263,10 +278,9 @@
 	MLXSW_SP2_AFK_BLOCK_LAYOUT(block11, 0x00, 12),
 };
 
-static void mlxsw_sp2_afk_encode_block(char *block, int block_index,
-				       char *output)
+static void __mlxsw_sp2_afk_block_value_set(char *output, int block_index,
+					    u64 block_value)
 {
-	u64 block_value = mlxsw_sp2_afk_block_value_get(block);
 	const struct mlxsw_sp2_afk_block_layout *block_layout;
 
 	if (WARN_ON(block_index < 0 ||
@@ -278,8 +292,22 @@
 			   &block_layout->item, 0, block_value);
 }
 
+static void mlxsw_sp2_afk_encode_block(char *output, int block_index,
+				       char *block)
+{
+	u64 block_value = mlxsw_sp2_afk_block_value_get(block);
+
+	__mlxsw_sp2_afk_block_value_set(output, block_index, block_value);
+}
+
+static void mlxsw_sp2_afk_clear_block(char *output, int block_index)
+{
+	__mlxsw_sp2_afk_block_value_set(output, block_index, 0);
+}
+
 const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = {
 	.blocks		= mlxsw_sp2_afk_blocks,
 	.blocks_count	= ARRAY_SIZE(mlxsw_sp2_afk_blocks),
 	.encode_block	= mlxsw_sp2_afk_encode_block,
+	.clear_block	= mlxsw_sp2_afk_clear_block,
 };

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index e171513..e993159 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c

@@ -8,6 +8,8 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <linux/mutex.h>
+#include <trace/events/mlxsw.h>
 
 #include "reg.h"
 #include "core.h"
@@ -23,6 +25,10 @@
 	return ops->priv_size;
 }
 
+#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */
+#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */
+#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS 100 /* number of entries */
+
 int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_acl_tcam *tcam)
 {
@@ -33,6 +39,11 @@
 	size_t alloc_size;
 	int err;
 
+	mutex_init(&tcam->lock);
+	tcam->vregion_rehash_intrvl =
+			MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT;
+	INIT_LIST_HEAD(&tcam->vregion_list);
+
 	max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core,
 					      ACL_MAX_TCAM_REGIONS);
 	max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS);
@@ -76,6 +87,7 @@
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 
+	mutex_destroy(&tcam->lock);
 	ops->fini(mlxsw_sp, tcam->priv);
 	kfree(tcam->used_groups);
 	kfree(tcam->used_regions);
@@ -95,8 +107,9 @@
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE))
 		return -EIO;
 
-	max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE);
-	if (rulei->priority > max_priority)
+	/* Priority range is 1..cap_kvd_size-1. */
+	max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE) - 1;
+	if (rulei->priority >= max_priority)
 		return -EINVAL;
 
 	/* Unlike in TC, in HW, higher number means higher priority. */
@@ -152,37 +165,99 @@
 struct mlxsw_sp_acl_tcam_group {
 	struct mlxsw_sp_acl_tcam *tcam;
 	u16 id;
+	struct mutex lock; /* guards region list updates */
 	struct list_head region_list;
 	unsigned int region_count;
-	struct rhashtable chunk_ht;
-	struct mlxsw_sp_acl_tcam_group_ops *ops;
+};
+
+struct mlxsw_sp_acl_tcam_vgroup {
+	struct mlxsw_sp_acl_tcam_group group;
+	struct list_head vregion_list;
+	struct rhashtable vchunk_ht;
 	const struct mlxsw_sp_acl_tcam_pattern *patterns;
 	unsigned int patterns_count;
 	bool tmplt_elusage_set;
 	struct mlxsw_afk_element_usage tmplt_elusage;
+	bool vregion_rehash_enabled;
 };
 
-struct mlxsw_sp_acl_tcam_chunk {
-	struct list_head list; /* Member of a TCAM region */
-	struct rhash_head ht_node; /* Member of a chunk HT */
-	unsigned int priority; /* Priority within the region and group */
-	struct mlxsw_sp_acl_tcam_group *group;
+struct mlxsw_sp_acl_tcam_rehash_ctx {
+	void *hints_priv;
+	bool this_is_rollback;
+	struct mlxsw_sp_acl_tcam_vchunk *current_vchunk; /* vchunk being
+							  * currently migrated.
+							  */
+	struct mlxsw_sp_acl_tcam_ventry *start_ventry; /* ventry to start
+							* migration from in
+							* a vchunk being
+							* currently migrated.
+							*/
+	struct mlxsw_sp_acl_tcam_ventry *stop_ventry; /* ventry to stop
+						       * migration at
+						       * a vchunk being
+						       * currently migrated.
+						       */
+};
+
+struct mlxsw_sp_acl_tcam_vregion {
+	struct mutex lock; /* Protects consistency of region, region2 pointers
+			    * and vchunk_list.
+			    */
 	struct mlxsw_sp_acl_tcam_region *region;
+	struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */
+	struct list_head list; /* Member of a TCAM group */
+	struct list_head tlist; /* Member of a TCAM */
+	struct list_head vchunk_list; /* List of vchunks under this vregion */
+	struct mlxsw_afk_key_info *key_info;
+	struct mlxsw_sp_acl_tcam *tcam;
+	struct mlxsw_sp_acl_tcam_vgroup *vgroup;
+	struct {
+		struct delayed_work dw;
+		struct mlxsw_sp_acl_tcam_rehash_ctx ctx;
+	} rehash;
+	struct mlxsw_sp *mlxsw_sp;
 	unsigned int ref_count;
+};
+
+struct mlxsw_sp_acl_tcam_vchunk;
+
+struct mlxsw_sp_acl_tcam_chunk {
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	struct mlxsw_sp_acl_tcam_region *region;
 	unsigned long priv[0];
 	/* priv has to be always the last item */
 };
 
+struct mlxsw_sp_acl_tcam_vchunk {
+	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */
+	struct list_head list; /* Member of a TCAM vregion */
+	struct rhash_head ht_node; /* Member of a chunk HT */
+	struct list_head ventry_list;
+	unsigned int priority; /* Priority within the vregion and group */
+	struct mlxsw_sp_acl_tcam_vgroup *vgroup;
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+	unsigned int ref_count;
+};
+
 struct mlxsw_sp_acl_tcam_entry {
+	struct mlxsw_sp_acl_tcam_ventry *ventry;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
 	unsigned long priv[0];
 	/* priv has to be always the last item */
 };
 
-static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
+struct mlxsw_sp_acl_tcam_ventry {
+	struct mlxsw_sp_acl_tcam_entry *entry;
+	struct list_head list; /* Member of a TCAM vchunk */
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	struct mlxsw_sp_acl_rule_info *rulei;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = {
 	.key_len = sizeof(unsigned int),
-	.key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority),
-	.head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node),
+	.key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority),
+	.head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node),
 	.automatic_shrinking = true,
 };
 
@@ -194,55 +269,90 @@
 	int acl_index = 0;
 
 	mlxsw_reg_pagt_pack(pagt_pl, group->id);
-	list_for_each_entry(region, &group->region_list, list)
-		mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id);
+	list_for_each_entry(region, &group->region_list, list) {
+		bool multi = false;
+
+		/* Check if the next entry in the list has the same vregion. */
+		if (region->list.next != &group->region_list &&
+		    list_next_entry(region, list)->vregion == region->vregion)
+			multi = true;
+		mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++,
+					   region->id, multi);
+	}
 	mlxsw_reg_pagt_size_set(pagt_pl, acl_index);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl);
 }
 
 static int
-mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_tcam *tcam,
-			    struct mlxsw_sp_acl_tcam_group *group,
-			    const struct mlxsw_sp_acl_tcam_pattern *patterns,
-			    unsigned int patterns_count,
-			    struct mlxsw_afk_element_usage *tmplt_elusage)
+mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam,
+			    struct mlxsw_sp_acl_tcam_group *group)
 {
 	int err;
 
 	group->tcam = tcam;
-	group->patterns = patterns;
-	group->patterns_count = patterns_count;
-	if (tmplt_elusage) {
-		group->tmplt_elusage_set = true;
-		memcpy(&group->tmplt_elusage, tmplt_elusage,
-		       sizeof(group->tmplt_elusage));
-	}
+	mutex_init(&group->lock);
 	INIT_LIST_HEAD(&group->region_list);
+
 	err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id);
 	if (err)
 		return err;
 
-	err = rhashtable_init(&group->chunk_ht,
-			      &mlxsw_sp_acl_tcam_chunk_ht_params);
+	return 0;
+}
+
+static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp_acl_tcam_group *group)
+{
+	struct mlxsw_sp_acl_tcam *tcam = group->tcam;
+
+	mutex_destroy(&group->lock);
+	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
+	WARN_ON(!list_empty(&group->region_list));
+}
+
+static int
+mlxsw_sp_acl_tcam_vgroup_add(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_acl_tcam *tcam,
+			     struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+			     const struct mlxsw_sp_acl_tcam_pattern *patterns,
+			     unsigned int patterns_count,
+			     struct mlxsw_afk_element_usage *tmplt_elusage,
+			     bool vregion_rehash_enabled)
+{
+	int err;
+
+	vgroup->patterns = patterns;
+	vgroup->patterns_count = patterns_count;
+	vgroup->vregion_rehash_enabled = vregion_rehash_enabled;
+
+	if (tmplt_elusage) {
+		vgroup->tmplt_elusage_set = true;
+		memcpy(&vgroup->tmplt_elusage, tmplt_elusage,
+		       sizeof(vgroup->tmplt_elusage));
+	}
+	INIT_LIST_HEAD(&vgroup->vregion_list);
+
+	err = mlxsw_sp_acl_tcam_group_add(tcam, &vgroup->group);
+	if (err)
+		return err;
+
+	err = rhashtable_init(&vgroup->vchunk_ht,
+			      &mlxsw_sp_acl_tcam_vchunk_ht_params);
 	if (err)
 		goto err_rhashtable_init;
 
 	return 0;
 
 err_rhashtable_init:
-	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
+	mlxsw_sp_acl_tcam_group_del(&vgroup->group);
 	return err;
 }
 
-static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_acl_tcam_group *group)
+static void
+mlxsw_sp_acl_tcam_vgroup_del(struct mlxsw_sp_acl_tcam_vgroup *vgroup)
 {
-	struct mlxsw_sp_acl_tcam *tcam = group->tcam;
-
-	rhashtable_destroy(&group->chunk_ht);
-	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
-	WARN_ON(!list_empty(&group->region_list));
+	rhashtable_destroy(&vgroup->vchunk_ht);
+	mlxsw_sp_acl_tcam_group_del(&vgroup->group);
+	WARN_ON(!list_empty(&vgroup->vregion_list));
 }
 
 static int
@@ -282,76 +392,76 @@
 }
 
 static unsigned int
-mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
 
-	if (list_empty(&region->chunk_list))
+	if (list_empty(&vregion->vchunk_list))
 		return 0;
-	/* As a priority of a region, return priority of the first chunk */
-	chunk = list_first_entry(&region->chunk_list, typeof(*chunk), list);
-	return chunk->priority;
+	/* As a priority of a vregion, return priority of the first vchunk */
+	vchunk = list_first_entry(&vregion->vchunk_list,
+				  typeof(*vchunk), list);
+	return vchunk->priority;
 }
 
 static unsigned int
-mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
 
-	if (list_empty(&region->chunk_list))
+	if (list_empty(&vregion->vchunk_list))
 		return 0;
-	chunk = list_last_entry(&region->chunk_list, typeof(*chunk), list);
-	return chunk->priority;
-}
-
-static void
-mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group,
-				 struct mlxsw_sp_acl_tcam_region *region)
-{
-	struct mlxsw_sp_acl_tcam_region *region2;
-	struct list_head *pos;
-
-	/* Position the region inside the list according to priority */
-	list_for_each(pos, &group->region_list) {
-		region2 = list_entry(pos, typeof(*region2), list);
-		if (mlxsw_sp_acl_tcam_region_prio(region2) >
-		    mlxsw_sp_acl_tcam_region_prio(region))
-			break;
-	}
-	list_add_tail(&region->list, pos);
-	group->region_count++;
-}
-
-static void
-mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group,
-				 struct mlxsw_sp_acl_tcam_region *region)
-{
-	group->region_count--;
-	list_del(&region->list);
+	vchunk = list_last_entry(&vregion->vchunk_list,
+				 typeof(*vchunk), list);
+	return vchunk->priority;
 }
 
 static int
 mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_acl_tcam_group *group,
-				      struct mlxsw_sp_acl_tcam_region *region)
+				      struct mlxsw_sp_acl_tcam_region *region,
+				      unsigned int priority,
+				      struct mlxsw_sp_acl_tcam_region *next_region)
 {
+	struct mlxsw_sp_acl_tcam_region *region2;
+	struct list_head *pos;
 	int err;
 
-	if (group->region_count == group->tcam->max_group_size)
-		return -ENOBUFS;
+	mutex_lock(&group->lock);
+	if (group->region_count == group->tcam->max_group_size) {
+		err = -ENOBUFS;
+		goto err_region_count_check;
+	}
 
-	mlxsw_sp_acl_tcam_group_list_add(group, region);
+	if (next_region) {
+		/* If the next region is defined, place the new one
+		 * before it. The next one is a sibling.
+		 */
+		pos = &next_region->list;
+	} else {
+		/* Position the region inside the list according to priority */
+		list_for_each(pos, &group->region_list) {
+			region2 = list_entry(pos, typeof(*region2), list);
+			if (mlxsw_sp_acl_tcam_vregion_prio(region2->vregion) >
+			    priority)
+				break;
+		}
+	}
+	list_add_tail(&region->list, pos);
+	region->group = group;
 
 	err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
 	if (err)
 		goto err_group_update;
-	region->group = group;
 
+	group->region_count++;
+	mutex_unlock(&group->lock);
 	return 0;
 
 err_group_update:
-	mlxsw_sp_acl_tcam_group_list_del(group, region);
-	mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+	list_del(&region->list);
+err_region_count_check:
+	mutex_unlock(&group->lock);
 	return err;
 }
 
@@ -361,67 +471,115 @@
 {
 	struct mlxsw_sp_acl_tcam_group *group = region->group;
 
-	mlxsw_sp_acl_tcam_group_list_del(group, region);
+	mutex_lock(&group->lock);
+	list_del(&region->list);
+	group->region_count--;
 	mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+	mutex_unlock(&group->lock);
 }
 
-static struct mlxsw_sp_acl_tcam_region *
-mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group,
-				    unsigned int priority,
-				    struct mlxsw_afk_element_usage *elusage,
-				    bool *p_need_split)
+static int
+mlxsw_sp_acl_tcam_vgroup_vregion_attach(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+					struct mlxsw_sp_acl_tcam_vregion *vregion,
+					unsigned int priority)
 {
-	struct mlxsw_sp_acl_tcam_region *region, *region2;
+	struct mlxsw_sp_acl_tcam_vregion *vregion2;
 	struct list_head *pos;
-	bool issubset;
+	int err;
 
-	list_for_each(pos, &group->region_list) {
-		region = list_entry(pos, typeof(*region), list);
-
-		/* First, check if the requested priority does not rather belong
-		 * under some of the next regions.
-		 */
-		if (pos->next != &group->region_list) { /* not last */
-			region2 = list_entry(pos->next, typeof(*region2), list);
-			if (priority >= mlxsw_sp_acl_tcam_region_prio(region2))
-				continue;
-		}
-
-		issubset = mlxsw_afk_key_info_subset(region->key_info, elusage);
-
-		/* If requested element usage would not fit and the priority
-		 * is lower than the currently inspected region we cannot
-		 * use this region, so return NULL to indicate new region has
-		 * to be created.
-		 */
-		if (!issubset &&
-		    priority < mlxsw_sp_acl_tcam_region_prio(region))
-			return NULL;
-
-		/* If requested element usage would not fit and the priority
-		 * is higher than the currently inspected region we cannot
-		 * use this region. There is still some hope that the next
-		 * region would be the fit. So let it be processed and
-		 * eventually break at the check right above this.
-		 */
-		if (!issubset &&
-		    priority > mlxsw_sp_acl_tcam_region_max_prio(region))
-			continue;
-
-		/* Indicate if the region needs to be split in order to add
-		 * the requested priority. Split is needed when requested
-		 * element usage won't fit into the found region.
-		 */
-		*p_need_split = !issubset;
-		return region;
+	/* Position the vregion inside the list according to priority */
+	list_for_each(pos, &vgroup->vregion_list) {
+		vregion2 = list_entry(pos, typeof(*vregion2), list);
+		if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) > priority)
+			break;
 	}
-	return NULL; /* New region has to be created. */
+	list_add_tail(&vregion->list, pos);
+
+	err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, &vgroup->group,
+						    vregion->region,
+						    priority, NULL);
+	if (err)
+		goto err_region_attach;
+
+	return 0;
+
+err_region_attach:
+	list_del(&vregion->list);
+	return err;
 }
 
 static void
-mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
-				     struct mlxsw_afk_element_usage *elusage,
-				     struct mlxsw_afk_element_usage *out)
+mlxsw_sp_acl_tcam_vgroup_vregion_detach(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+	list_del(&vregion->list);
+	if (vregion->region2)
+		mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp,
+						      vregion->region2);
+	mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region);
+}
+
+static struct mlxsw_sp_acl_tcam_vregion *
+mlxsw_sp_acl_tcam_vgroup_vregion_find(struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+				      unsigned int priority,
+				      struct mlxsw_afk_element_usage *elusage,
+				      bool *p_need_split)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2;
+	struct list_head *pos;
+	bool issubset;
+
+	list_for_each(pos, &vgroup->vregion_list) {
+		vregion = list_entry(pos, typeof(*vregion), list);
+
+		/* First, check if the requested priority does not rather belong
+		 * under some of the next vregions.
+		 */
+		if (pos->next != &vgroup->vregion_list) { /* not last */
+			vregion2 = list_entry(pos->next, typeof(*vregion2),
+					      list);
+			if (priority >=
+			    mlxsw_sp_acl_tcam_vregion_prio(vregion2))
+				continue;
+		}
+
+		issubset = mlxsw_afk_key_info_subset(vregion->key_info,
+						     elusage);
+
+		/* If requested element usage would not fit and the priority
+		 * is lower than the currently inspected vregion we cannot
+		 * use this region, so return NULL to indicate new vregion has
+		 * to be created.
+		 */
+		if (!issubset &&
+		    priority < mlxsw_sp_acl_tcam_vregion_prio(vregion))
+			return NULL;
+
+		/* If requested element usage would not fit and the priority
+		 * is higher than the currently inspected vregion we cannot
+		 * use this vregion. There is still some hope that the next
+		 * vregion would be the fit. So let it be processed and
+		 * eventually break at the check right above this.
+		 */
+		if (!issubset &&
+		    priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion))
+			continue;
+
+		/* Indicate if the vregion needs to be split in order to add
+		 * the requested priority. Split is needed when requested
+		 * element usage won't fit into the found vregion.
+		 */
+		*p_need_split = !issubset;
+		return vregion;
+	}
+	return NULL; /* New vregion has to be created. */
+}
+
+static void
+mlxsw_sp_acl_tcam_vgroup_use_patterns(struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+				      struct mlxsw_afk_element_usage *elusage,
+				      struct mlxsw_afk_element_usage *out)
 {
 	const struct mlxsw_sp_acl_tcam_pattern *pattern;
 	int i;
@@ -429,14 +587,14 @@
 	/* In case the template is set, we don't have to look up the pattern
 	 * and just use the template.
 	 */
-	if (group->tmplt_elusage_set) {
-		memcpy(out, &group->tmplt_elusage, sizeof(*out));
+	if (vgroup->tmplt_elusage_set) {
+		memcpy(out, &vgroup->tmplt_elusage, sizeof(*out));
 		WARN_ON(!mlxsw_afk_element_usage_subset(elusage, out));
 		return;
 	}
 
-	for (i = 0; i < group->patterns_count; i++) {
-		pattern = &group->patterns[i];
+	for (i = 0; i < vgroup->patterns_count; i++) {
+		pattern = &vgroup->patterns[i];
 		mlxsw_afk_element_usage_fill(out, pattern->elements,
 					     pattern->elements_count);
 		if (mlxsw_afk_element_usage_subset(elusage, out))
@@ -510,24 +668,19 @@
 static struct mlxsw_sp_acl_tcam_region *
 mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam *tcam,
-				struct mlxsw_afk_element_usage *elusage)
+				struct mlxsw_sp_acl_tcam_vregion *vregion,
+				void *hints_priv)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
 	struct mlxsw_sp_acl_tcam_region *region;
 	int err;
 
 	region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
 	if (!region)
 		return ERR_PTR(-ENOMEM);
-	INIT_LIST_HEAD(&region->chunk_list);
 	region->mlxsw_sp = mlxsw_sp;
-
-	region->key_info = mlxsw_afk_key_info_get(afk, elusage);
-	if (IS_ERR(region->key_info)) {
-		err = PTR_ERR(region->key_info);
-		goto err_key_info_get;
-	}
+	region->vregion = vregion;
+	region->key_info = vregion->key_info;
 
 	err = mlxsw_sp_acl_tcam_region_id_get(tcam, &region->id);
 	if (err)
@@ -546,7 +699,8 @@
 	if (err)
 		goto err_tcam_region_enable;
 
-	err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region);
+	err = ops->region_init(mlxsw_sp, region->priv, tcam->priv,
+			       region, hints_priv);
 	if (err)
 		goto err_tcam_region_init;
 
@@ -560,8 +714,6 @@
 err_tcam_region_associate:
 	mlxsw_sp_acl_tcam_region_id_put(tcam, region->id);
 err_region_id_get:
-	mlxsw_afk_key_info_put(region->key_info);
-err_key_info_get:
 	kfree(region);
 	return ERR_PTR(err);
 }
@@ -575,116 +727,247 @@
 	ops->region_fini(mlxsw_sp, region->priv);
 	mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 	mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
-	mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
-	mlxsw_afk_key_info_put(region->key_info);
+	mlxsw_sp_acl_tcam_region_id_put(region->group->tcam,
+					region->id);
 	kfree(region);
 }
 
-static int
-mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_acl_tcam_group *group,
-			      unsigned int priority,
-			      struct mlxsw_afk_element_usage *elusage,
-			      struct mlxsw_sp_acl_tcam_chunk *chunk)
+static void
+mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-	struct mlxsw_sp_acl_tcam_region *region;
-	bool region_created = false;
-	bool need_split;
-	int err;
+	unsigned long interval = vregion->tcam->vregion_rehash_intrvl;
 
-	region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage,
-						     &need_split);
-	if (region && need_split) {
-		/* According to priority, the chunk should belong to an
-		 * existing region. However, this chunk needs elements
-		 * that region does not contain. We need to split the existing
-		 * region into two and create a new region for this chunk
-		 * in between. This is not supported now.
-		 */
-		return -EOPNOTSUPP;
-	}
-	if (!region) {
-		struct mlxsw_afk_element_usage region_elusage;
-
-		mlxsw_sp_acl_tcam_group_use_patterns(group, elusage,
-						     &region_elusage);
-		region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam,
-							 &region_elusage);
-		if (IS_ERR(region))
-			return PTR_ERR(region);
-		region_created = true;
-	}
-
-	chunk->region = region;
-	list_add_tail(&chunk->list, &region->chunk_list);
-
-	if (!region_created)
-		return 0;
-
-	err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region);
-	if (err)
-		goto err_group_region_attach;
-
-	return 0;
-
-err_group_region_attach:
-	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
-	return err;
+	if (!interval)
+		return;
+	mlxsw_core_schedule_dw(&vregion->rehash.dw,
+			       msecs_to_jiffies(interval));
 }
 
 static void
-mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_acl_tcam_chunk *chunk)
-{
-	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_vregion *vregion,
+				 int *credits);
 
-	list_del(&chunk->list);
-	if (list_empty(&region->chunk_list)) {
-		mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region);
-		mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
+static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion =
+		container_of(work, struct mlxsw_sp_acl_tcam_vregion,
+			     rehash.dw.work);
+	int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS;
+
+	mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits);
+	if (credits < 0)
+		/* Rehash gone out of credits so it was interrupted.
+		 * Schedule the work as soon as possible to continue.
+		 */
+		mlxsw_core_schedule_dw(&vregion->rehash.dw, 0);
+	else
+		mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion);
+}
+
+static void
+mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion;
+
+	/* If a rule was added or deleted from vchunk which is currently
+	 * under rehash migration, we have to reset the ventry pointers
+	 * to make sure all rules are properly migrated.
+	 */
+	if (vregion->rehash.ctx.current_vchunk == vchunk) {
+		vregion->rehash.ctx.start_ventry = NULL;
+		vregion->rehash.ctx.stop_ventry = NULL;
 	}
 }
 
+static void
+mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+	/* If a chunk was added or deleted from vregion we have to reset
+	 * the current chunk pointer to make sure all chunks
+	 * are properly migrated.
+	 */
+	vregion->rehash.ctx.current_vchunk = NULL;
+}
+
+static struct mlxsw_sp_acl_tcam_vregion *
+mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+				 unsigned int priority,
+				 struct mlxsw_afk_element_usage *elusage)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+	struct mlxsw_sp_acl_tcam *tcam = vgroup->group.tcam;
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+	int err;
+
+	vregion = kzalloc(sizeof(*vregion), GFP_KERNEL);
+	if (!vregion)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&vregion->vchunk_list);
+	mutex_init(&vregion->lock);
+	vregion->tcam = tcam;
+	vregion->mlxsw_sp = mlxsw_sp;
+	vregion->vgroup = vgroup;
+	vregion->ref_count = 1;
+
+	vregion->key_info = mlxsw_afk_key_info_get(afk, elusage);
+	if (IS_ERR(vregion->key_info)) {
+		err = PTR_ERR(vregion->key_info);
+		goto err_key_info_get;
+	}
+
+	vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam,
+							  vregion, NULL);
+	if (IS_ERR(vregion->region)) {
+		err = PTR_ERR(vregion->region);
+		goto err_region_create;
+	}
+
+	err = mlxsw_sp_acl_tcam_vgroup_vregion_attach(mlxsw_sp, vgroup, vregion,
+						      priority);
+	if (err)
+		goto err_vgroup_vregion_attach;
+
+	if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) {
+		/* Create the delayed work for vregion periodic rehash */
+		INIT_DELAYED_WORK(&vregion->rehash.dw,
+				  mlxsw_sp_acl_tcam_vregion_rehash_work);
+		mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion);
+		mutex_lock(&tcam->lock);
+		list_add_tail(&vregion->tlist, &tcam->vregion_list);
+		mutex_unlock(&tcam->lock);
+	}
+
+	return vregion;
+
+err_vgroup_vregion_attach:
+	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region);
+err_region_create:
+	mlxsw_afk_key_info_put(vregion->key_info);
+err_key_info_get:
+	kfree(vregion);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	struct mlxsw_sp_acl_tcam_vgroup *vgroup = vregion->vgroup;
+	struct mlxsw_sp_acl_tcam *tcam = vregion->tcam;
+
+	if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) {
+		mutex_lock(&tcam->lock);
+		list_del(&vregion->tlist);
+		mutex_unlock(&tcam->lock);
+		cancel_delayed_work_sync(&vregion->rehash.dw);
+	}
+	mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion);
+	if (vregion->region2)
+		mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2);
+	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region);
+	mlxsw_afk_key_info_put(vregion->key_info);
+	mutex_destroy(&vregion->lock);
+	kfree(vregion);
+}
+
+u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_acl_tcam *tcam)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	u32 vregion_rehash_intrvl;
+
+	if (WARN_ON(!ops->region_rehash_hints_get))
+		return 0;
+	vregion_rehash_intrvl = tcam->vregion_rehash_intrvl;
+	return vregion_rehash_intrvl;
+}
+
+int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_acl_tcam *tcam,
+						u32 val)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+
+	if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val)
+		return -EINVAL;
+	if (WARN_ON(!ops->region_rehash_hints_get))
+		return -EOPNOTSUPP;
+	tcam->vregion_rehash_intrvl = val;
+	mutex_lock(&tcam->lock);
+	list_for_each_entry(vregion, &tcam->vregion_list, tlist) {
+		if (val)
+			mlxsw_core_schedule_dw(&vregion->rehash.dw, 0);
+		else
+			cancel_delayed_work_sync(&vregion->rehash.dw);
+	}
+	mutex_unlock(&tcam->lock);
+	return 0;
+}
+
+static struct mlxsw_sp_acl_tcam_vregion *
+mlxsw_sp_acl_tcam_vregion_get(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+			      unsigned int priority,
+			      struct mlxsw_afk_element_usage *elusage)
+{
+	struct mlxsw_afk_element_usage vregion_elusage;
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+	bool need_split;
+
+	vregion = mlxsw_sp_acl_tcam_vgroup_vregion_find(vgroup, priority,
+							elusage, &need_split);
+	if (vregion) {
+		if (need_split) {
+			/* According to priority, new vchunk should belong to
+			 * an existing vregion. However, this vchunk needs
+			 * elements that vregion does not contain. We need
+			 * to split the existing vregion into two and create
+			 * a new vregion for the new vchunk in between.
+			 * This is not supported now.
+			 */
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+		vregion->ref_count++;
+		return vregion;
+	}
+
+	mlxsw_sp_acl_tcam_vgroup_use_patterns(vgroup, elusage,
+					      &vregion_elusage);
+
+	return mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp, vgroup, priority,
+						&vregion_elusage);
+}
+
+static void
+mlxsw_sp_acl_tcam_vregion_put(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+	if (--vregion->ref_count)
+		return;
+	mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion);
+}
+
 static struct mlxsw_sp_acl_tcam_chunk *
 mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_acl_tcam_group *group,
-			       unsigned int priority,
-			       struct mlxsw_afk_element_usage *elusage)
+			       struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+			       struct mlxsw_sp_acl_tcam_region *region)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
-	int err;
-
-	if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
-		return ERR_PTR(-EINVAL);
 
 	chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
 	if (!chunk)
 		return ERR_PTR(-ENOMEM);
-	chunk->priority = priority;
-	chunk->group = group;
-	chunk->ref_count = 1;
+	chunk->vchunk = vchunk;
+	chunk->region = region;
 
-	err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority,
-					    elusage, chunk);
-	if (err)
-		goto err_chunk_assoc;
-
-	ops->chunk_init(chunk->region->priv, chunk->priv, priority);
-
-	err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
-				     mlxsw_sp_acl_tcam_chunk_ht_params);
-	if (err)
-		goto err_rhashtable_insert;
-
+	ops->chunk_init(region->priv, chunk->priv, vchunk->priority);
 	return chunk;
-
-err_rhashtable_insert:
-	ops->chunk_fini(chunk->priv);
-	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
-err_chunk_assoc:
-	kfree(chunk);
-	return ERR_PTR(err);
 }
 
 static void
@@ -692,90 +975,168 @@
 				struct mlxsw_sp_acl_tcam_chunk *chunk)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-	struct mlxsw_sp_acl_tcam_group *group = chunk->group;
 
-	rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
-			       mlxsw_sp_acl_tcam_chunk_ht_params);
 	ops->chunk_fini(chunk->priv);
-	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 	kfree(chunk);
 }
 
-static struct mlxsw_sp_acl_tcam_chunk *
-mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_tcam_group *group,
-			    unsigned int priority,
-			    struct mlxsw_afk_element_usage *elusage)
+static struct mlxsw_sp_acl_tcam_vchunk *
+mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+				unsigned int priority,
+				struct mlxsw_afk_element_usage *elusage)
 {
-	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	int err;
 
-	chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority,
-				       mlxsw_sp_acl_tcam_chunk_ht_params);
-	if (chunk) {
-		if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info,
+	if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
+		return ERR_PTR(-EINVAL);
+
+	vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL);
+	if (!vchunk)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&vchunk->ventry_list);
+	vchunk->priority = priority;
+	vchunk->vgroup = vgroup;
+	vchunk->ref_count = 1;
+
+	vregion = mlxsw_sp_acl_tcam_vregion_get(mlxsw_sp, vgroup,
+						priority, elusage);
+	if (IS_ERR(vregion)) {
+		err = PTR_ERR(vregion);
+		goto err_vregion_get;
+	}
+
+	vchunk->vregion = vregion;
+
+	err = rhashtable_insert_fast(&vgroup->vchunk_ht, &vchunk->ht_node,
+				     mlxsw_sp_acl_tcam_vchunk_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	mutex_lock(&vregion->lock);
+	vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk,
+						       vchunk->vregion->region);
+	if (IS_ERR(vchunk->chunk)) {
+		mutex_unlock(&vregion->lock);
+		err = PTR_ERR(vchunk->chunk);
+		goto err_chunk_create;
+	}
+
+	mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion);
+	list_add_tail(&vchunk->list, &vregion->vchunk_list);
+	mutex_unlock(&vregion->lock);
+
+	return vchunk;
+
+err_chunk_create:
+	rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node,
+			       mlxsw_sp_acl_tcam_vchunk_ht_params);
+err_rhashtable_insert:
+	mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vregion);
+err_vregion_get:
+	kfree(vchunk);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_vchunk *vchunk)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion;
+	struct mlxsw_sp_acl_tcam_vgroup *vgroup = vchunk->vgroup;
+
+	mutex_lock(&vregion->lock);
+	mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion);
+	list_del(&vchunk->list);
+	if (vchunk->chunk2)
+		mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
+	mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk);
+	mutex_unlock(&vregion->lock);
+	rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node,
+			       mlxsw_sp_acl_tcam_vchunk_ht_params);
+	mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vchunk->vregion);
+	kfree(vchunk);
+}
+
+static struct mlxsw_sp_acl_tcam_vchunk *
+mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+			     unsigned int priority,
+			     struct mlxsw_afk_element_usage *elusage)
+{
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+
+	vchunk = rhashtable_lookup_fast(&vgroup->vchunk_ht, &priority,
+					mlxsw_sp_acl_tcam_vchunk_ht_params);
+	if (vchunk) {
+		if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info,
 						       elusage)))
 			return ERR_PTR(-EINVAL);
-		chunk->ref_count++;
-		return chunk;
+		vchunk->ref_count++;
+		return vchunk;
 	}
-	return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group,
-					      priority, elusage);
+	return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, vgroup,
+					       priority, elusage);
 }
 
-static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_acl_tcam_chunk *chunk)
+static void
+mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_acl_tcam_vchunk *vchunk)
 {
-	if (--chunk->ref_count)
+	if (--vchunk->ref_count)
 		return;
-	mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
+	mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk);
 }
 
-static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
+static struct mlxsw_sp_acl_tcam_entry *
+mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_acl_tcam_ventry *ventry,
+			       struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	struct mlxsw_sp_acl_tcam_entry *entry;
+	int err;
+
+	entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+	entry->ventry = ventry;
+	entry->chunk = chunk;
+
+	err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv,
+			     entry->priv, ventry->rulei);
+	if (err)
+		goto err_entry_add;
+
+	return entry;
+
+err_entry_add:
+	kfree(entry);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+					    struct mlxsw_sp_acl_tcam_entry *entry)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 
-	return ops->entry_priv_size;
+	ops->entry_del(mlxsw_sp, entry->chunk->region->priv,
+		       entry->chunk->priv, entry->priv);
+	kfree(entry);
 }
 
-static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_acl_tcam_group *group,
+static int
+mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_region *region,
 				       struct mlxsw_sp_acl_tcam_entry *entry,
 				       struct mlxsw_sp_acl_rule_info *rulei)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-	struct mlxsw_sp_acl_tcam_chunk *chunk;
-	struct mlxsw_sp_acl_tcam_region *region;
-	int err;
 
-	chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority,
-					    &rulei->values.elusage);
-	if (IS_ERR(chunk))
-		return PTR_ERR(chunk);
-
-	region = chunk->region;
-
-	err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
-			     entry->priv, rulei);
-	if (err)
-		goto err_entry_add;
-	entry->chunk = chunk;
-
-	return 0;
-
-err_entry_add:
-	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
-	return err;
-}
-
-static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_acl_tcam_entry *entry)
-{
-	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
-	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
-
-	ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
-	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
+	return ops->entry_action_replace(mlxsw_sp, region->priv,
+					 entry->priv, rulei);
 }
 
 static int
@@ -784,13 +1145,374 @@
 				     bool *activity)
 {
 	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
-	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-	return ops->entry_activity_get(mlxsw_sp, region->priv,
+	return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv,
 				       entry->priv, activity);
 }
 
+static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_acl_tcam_vgroup *vgroup,
+					struct mlxsw_sp_acl_tcam_ventry *ventry,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	int err;
+
+	vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, vgroup, rulei->priority,
+					      &rulei->values.elusage);
+	if (IS_ERR(vchunk))
+		return PTR_ERR(vchunk);
+
+	ventry->vchunk = vchunk;
+	ventry->rulei = rulei;
+	vregion = vchunk->vregion;
+
+	mutex_lock(&vregion->lock);
+	ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry,
+						       vchunk->chunk);
+	if (IS_ERR(ventry->entry)) {
+		mutex_unlock(&vregion->lock);
+		err = PTR_ERR(ventry->entry);
+		goto err_entry_create;
+	}
+
+	list_add_tail(&ventry->list, &vchunk->ventry_list);
+	mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk);
+	mutex_unlock(&vregion->lock);
+
+	return 0;
+
+err_entry_create:
+	mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk);
+	return err;
+}
+
+static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_acl_tcam_ventry *ventry)
+{
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk;
+	struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion;
+
+	mutex_lock(&vregion->lock);
+	mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk);
+	list_del(&ventry->list);
+	mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry);
+	mutex_unlock(&vregion->lock);
+	mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_acl_tcam_ventry *ventry,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk;
+
+	return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp,
+						      vchunk->vregion->region,
+						      ventry->entry, rulei);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_acl_tcam_ventry *ventry,
+				      bool *activity)
+{
+	return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp,
+						    ventry->entry, activity);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_ventry *ventry,
+				 struct mlxsw_sp_acl_tcam_chunk *chunk,
+				 int *credits)
+{
+	struct mlxsw_sp_acl_tcam_entry *new_entry;
+
+	/* First check if the entry is not already where we want it to be. */
+	if (ventry->entry->chunk == chunk)
+		return 0;
+
+	if (--(*credits) < 0)
+		return 0;
+
+	new_entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk);
+	if (IS_ERR(new_entry))
+		return PTR_ERR(new_entry);
+	mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry);
+	ventry->entry = new_entry;
+	return 0;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+				       struct mlxsw_sp_acl_tcam_region *region,
+				       struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+	struct mlxsw_sp_acl_tcam_chunk *new_chunk;
+
+	new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region);
+	if (IS_ERR(new_chunk))
+		return PTR_ERR(new_chunk);
+	vchunk->chunk2 = vchunk->chunk;
+	vchunk->chunk = new_chunk;
+	ctx->current_vchunk = vchunk;
+	ctx->start_ventry = NULL;
+	ctx->stop_ventry = NULL;
+	return 0;
+}
+
+static void
+mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+				     struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+	mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
+	vchunk->chunk2 = NULL;
+	ctx->current_vchunk = NULL;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+				     struct mlxsw_sp_acl_tcam_region *region,
+				     struct mlxsw_sp_acl_tcam_rehash_ctx *ctx,
+				     int *credits)
+{
+	struct mlxsw_sp_acl_tcam_ventry *ventry;
+	int err;
+
+	if (vchunk->chunk->region != region) {
+		err = mlxsw_sp_acl_tcam_vchunk_migrate_start(mlxsw_sp, vchunk,
+							     region, ctx);
+		if (err)
+			return err;
+	} else if (!vchunk->chunk2) {
+		/* The chunk is already as it should be, nothing to do. */
+		return 0;
+	}
+
+	/* If the migration got interrupted, we have the ventry to start from
+	 * stored in context.
+	 */
+	if (ctx->start_ventry)
+		ventry = ctx->start_ventry;
+	else
+		ventry = list_first_entry(&vchunk->ventry_list,
+					  typeof(*ventry), list);
+
+	list_for_each_entry_from(ventry, &vchunk->ventry_list, list) {
+		/* During rollback, once we reach the ventry that failed
+		 * to migrate, we are done.
+		 */
+		if (ventry == ctx->stop_ventry)
+			break;
+
+		err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry,
+						       vchunk->chunk, credits);
+		if (err) {
+			if (ctx->this_is_rollback) {
+				/* Save the ventry which we ended with and try
+				 * to continue later on.
+				 */
+				ctx->start_ventry = ventry;
+				return err;
+			}
+			/* Swap the chunk and chunk2 pointers so the follow-up
+			 * rollback call will see the original chunk pointer
+			 * in vchunk->chunk.
+			 */
+			swap(vchunk->chunk, vchunk->chunk2);
+			/* The rollback has to be done from beginning of the
+			 * chunk, that is why we have to null the start_ventry.
+			 * However, we know where to stop the rollback,
+			 * at the current ventry.
+			 */
+			ctx->start_ventry = NULL;
+			ctx->stop_ventry = ventry;
+			return err;
+		} else if (*credits < 0) {
+			/* We are out of credits, the rest of the ventries
+			 * will be migrated later. Save the ventry
+			 * which we ended with.
+			 */
+			ctx->start_ventry = ventry;
+			return 0;
+		}
+	}
+
+	mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx);
+	return 0;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_tcam_vregion *vregion,
+				     struct mlxsw_sp_acl_tcam_rehash_ctx *ctx,
+				     int *credits)
+{
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	int err;
+
+	/* If the migration got interrupted, we have the vchunk
+	 * we are working on stored in context.
+	 */
+	if (ctx->current_vchunk)
+		vchunk = ctx->current_vchunk;
+	else
+		vchunk = list_first_entry(&vregion->vchunk_list,
+					  typeof(*vchunk), list);
+
+	list_for_each_entry_from(vchunk, &vregion->vchunk_list, list) {
+		err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk,
+							   vregion->region,
+							   ctx, credits);
+		if (err || *credits < 0)
+			return err;
+	}
+	return 0;
+}
+
+static int
+mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_tcam_vregion *vregion,
+				  struct mlxsw_sp_acl_tcam_rehash_ctx *ctx,
+				  int *credits)
+{
+	int err, err2;
+
+	trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion);
+	mutex_lock(&vregion->lock);
+	err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
+						   ctx, credits);
+	if (err) {
+		/* In case migration was not successful, we need to swap
+		 * so the original region pointer is assigned again
+		 * to vregion->region.
+		 */
+		swap(vregion->region, vregion->region2);
+		ctx->current_vchunk = NULL;
+		ctx->this_is_rollback = true;
+		err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
+							    ctx, credits);
+		if (err2) {
+			trace_mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed(mlxsw_sp,
+									       vregion);
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n");
+			/* Let the rollback to be continued later on. */
+		}
+	}
+	mutex_unlock(&vregion->lock);
+	trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion);
+	return err;
+}
+
+static bool
+mlxsw_sp_acl_tcam_vregion_rehash_in_progress(const struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+	return ctx->hints_priv;
+}
+
+static int
+mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_vregion *vregion,
+				       struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+	unsigned int priority = mlxsw_sp_acl_tcam_vregion_prio(vregion);
+	struct mlxsw_sp_acl_tcam_region *new_region;
+	void *hints_priv;
+	int err;
+
+	trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion);
+
+	hints_priv = ops->region_rehash_hints_get(vregion->region->priv);
+	if (IS_ERR(hints_priv))
+		return PTR_ERR(hints_priv);
+
+	new_region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam,
+						     vregion, hints_priv);
+	if (IS_ERR(new_region)) {
+		err = PTR_ERR(new_region);
+		goto err_region_create;
+	}
+
+	/* vregion->region contains the pointer to the new region
+	 * we are going to migrate to.
+	 */
+	vregion->region2 = vregion->region;
+	vregion->region = new_region;
+	err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp,
+						    vregion->region2->group,
+						    new_region, priority,
+						    vregion->region2);
+	if (err)
+		goto err_group_region_attach;
+
+	ctx->hints_priv = hints_priv;
+	ctx->this_is_rollback = false;
+
+	return 0;
+
+err_group_region_attach:
+	vregion->region = vregion->region2;
+	vregion->region2 = NULL;
+	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, new_region);
+err_region_create:
+	ops->region_rehash_hints_put(hints_priv);
+	return err;
+}
+
+static void
+mlxsw_sp_acl_tcam_vregion_rehash_end(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_acl_tcam_vregion *vregion,
+				     struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+	struct mlxsw_sp_acl_tcam_region *unused_region = vregion->region2;
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	vregion->region2 = NULL;
+	mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region);
+	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region);
+	ops->region_rehash_hints_put(ctx->hints_priv);
+	ctx->hints_priv = NULL;
+}
+
+static void
+mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_vregion *vregion,
+				 int *credits)
+{
+	struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx;
+	int err;
+
+	/* Check if the previous rehash work was interrupted
+	 * which means we have to continue it now.
+	 * If not, start a new rehash.
+	 */
+	if (!mlxsw_sp_acl_tcam_vregion_rehash_in_progress(ctx)) {
+		err = mlxsw_sp_acl_tcam_vregion_rehash_start(mlxsw_sp,
+							     vregion, ctx);
+		if (err) {
+			if (err != -EAGAIN)
+				dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n");
+			return;
+		}
+	}
+
+	err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion,
+						ctx, credits);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+	}
+
+	if (*credits >= 0)
+		mlxsw_sp_acl_tcam_vregion_rehash_end(mlxsw_sp, vregion, ctx);
+}
+
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 	MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
 	MLXSW_AFK_ELEMENT_DMAC_32_47,
@@ -841,11 +1563,11 @@
 	ARRAY_SIZE(mlxsw_sp_acl_tcam_patterns)
 
 struct mlxsw_sp_acl_tcam_flower_ruleset {
-	struct mlxsw_sp_acl_tcam_group group;
+	struct mlxsw_sp_acl_tcam_vgroup vgroup;
 };
 
 struct mlxsw_sp_acl_tcam_flower_rule {
-	struct mlxsw_sp_acl_tcam_entry entry;
+	struct mlxsw_sp_acl_tcam_ventry ventry;
 };
 
 static int
@@ -856,10 +1578,10 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
-	return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group,
-					   mlxsw_sp_acl_tcam_patterns,
-					   MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
-					   tmplt_elusage);
+	return mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup,
+					    mlxsw_sp_acl_tcam_patterns,
+					    MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
+					    tmplt_elusage, true);
 }
 
 static void
@@ -868,7 +1590,7 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
-	mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group);
+	mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup);
 }
 
 static int
@@ -879,7 +1601,7 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
-	return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->group,
+	return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->vgroup.group,
 					    mlxsw_sp_port, ingress);
 }
 
@@ -891,7 +1613,7 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
-	mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group,
+	mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->vgroup.group,
 				       mlxsw_sp_port, ingress);
 }
 
@@ -900,13 +1622,7 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
-	return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
-}
-
-static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
-{
-	return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
-	       mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
+	return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group);
 }
 
 static int
@@ -917,8 +1633,8 @@
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 	struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-	return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group,
-					   &rule->entry, rulei);
+	return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup,
+					    &rule->ventry, rulei);
 }
 
 static void
@@ -926,7 +1642,15 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-	mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry);
+	mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry);
+}
+
+static int
+mlxsw_sp_acl_tcam_flower_rule_action_replace(struct mlxsw_sp *mlxsw_sp,
+					     void *rule_priv,
+					     struct mlxsw_sp_acl_rule_info *rulei)
+{
+	return -EOPNOTSUPP;
 }
 
 static int
@@ -935,8 +1659,8 @@
 {
 	struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-	return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry,
-						    activity);
+	return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry,
+						     activity);
 }
 
 static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
@@ -946,15 +1670,152 @@
 	.ruleset_bind		= mlxsw_sp_acl_tcam_flower_ruleset_bind,
 	.ruleset_unbind		= mlxsw_sp_acl_tcam_flower_ruleset_unbind,
 	.ruleset_group_id	= mlxsw_sp_acl_tcam_flower_ruleset_group_id,
-	.rule_priv_size		= mlxsw_sp_acl_tcam_flower_rule_priv_size,
+	.rule_priv_size		= sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
 	.rule_add		= mlxsw_sp_acl_tcam_flower_rule_add,
 	.rule_del		= mlxsw_sp_acl_tcam_flower_rule_del,
+	.rule_action_replace	= mlxsw_sp_acl_tcam_flower_rule_action_replace,
 	.rule_activity_get	= mlxsw_sp_acl_tcam_flower_rule_activity_get,
 };
 
+struct mlxsw_sp_acl_tcam_mr_ruleset {
+	struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+	struct mlxsw_sp_acl_tcam_vgroup vgroup;
+};
+
+struct mlxsw_sp_acl_tcam_mr_rule {
+	struct mlxsw_sp_acl_tcam_ventry ventry;
+};
+
+static int
+mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam *tcam,
+				 void *ruleset_priv,
+				 struct mlxsw_afk_element_usage *tmplt_elusage)
+{
+	struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
+	int err;
+
+	err = mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup,
+					   mlxsw_sp_acl_tcam_patterns,
+					   MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
+					   tmplt_elusage, false);
+	if (err)
+		return err;
+
+	/* For most of the TCAM clients it would make sense to take a tcam chunk
+	 * only when the first rule is written. This is not the case for
+	 * multicast router as it is required to bind the multicast router to a
+	 * specific ACL Group ID which must exist in HW before multicast router
+	 * is initialized.
+	 */
+	ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp,
+						       &ruleset->vgroup, 1,
+						       tmplt_elusage);
+	if (IS_ERR(ruleset->vchunk)) {
+		err = PTR_ERR(ruleset->vchunk);
+		goto err_chunk_get;
+	}
+
+	return 0;
+
+err_chunk_get:
+	mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup);
+	return err;
+}
+
+static void
+mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv)
+{
+	struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
+
+	mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk);
+	mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup);
+}
+
+static int
+mlxsw_sp_acl_tcam_mr_ruleset_bind(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+				  struct mlxsw_sp_port *mlxsw_sp_port,
+				  bool ingress)
+{
+	/* Binding is done when initializing multicast router */
+	return 0;
+}
+
+static void
+mlxsw_sp_acl_tcam_mr_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
+				    void *ruleset_priv,
+				    struct mlxsw_sp_port *mlxsw_sp_port,
+				    bool ingress)
+{
+}
+
+static u16
+mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv)
+{
+	struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
+
+	return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group);
+}
+
+static int
+mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+			      void *rule_priv,
+			      struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
+	struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
+
+	return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup,
+					   &rule->ventry, rulei);
+}
+
+static void
+mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv)
+{
+	struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
+
+	mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry);
+}
+
+static int
+mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp,
+					 void *rule_priv,
+					 struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
+
+	return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry,
+						       rulei);
+}
+
+static int
+mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp,
+				       void *rule_priv, bool *activity)
+{
+	struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
+
+	return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry,
+						     activity);
+}
+
+static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = {
+	.ruleset_priv_size	= sizeof(struct mlxsw_sp_acl_tcam_mr_ruleset),
+	.ruleset_add		= mlxsw_sp_acl_tcam_mr_ruleset_add,
+	.ruleset_del		= mlxsw_sp_acl_tcam_mr_ruleset_del,
+	.ruleset_bind		= mlxsw_sp_acl_tcam_mr_ruleset_bind,
+	.ruleset_unbind		= mlxsw_sp_acl_tcam_mr_ruleset_unbind,
+	.ruleset_group_id	= mlxsw_sp_acl_tcam_mr_ruleset_group_id,
+	.rule_priv_size		= sizeof(struct mlxsw_sp_acl_tcam_mr_rule),
+	.rule_add		= mlxsw_sp_acl_tcam_mr_rule_add,
+	.rule_del		= mlxsw_sp_acl_tcam_mr_rule_del,
+	.rule_action_replace	= mlxsw_sp_acl_tcam_mr_rule_action_replace,
+	.rule_activity_get	= mlxsw_sp_acl_tcam_mr_rule_activity_get,
+};
+
 static const struct mlxsw_sp_acl_profile_ops *
 mlxsw_sp_acl_tcam_profile_ops_arr[] = {
 	[MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops,
+	[MLXSW_SP_ACL_PROFILE_MR] = &mlxsw_sp_acl_tcam_mr_ops,
 };
 
 const struct mlxsw_sp_acl_profile_ops *

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
index 219a4e2..5965913 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h

@@ -17,6 +17,9 @@
 	unsigned long *used_groups;  /* bit array */
 	unsigned int max_groups;
 	unsigned int max_group_size;
+	struct mutex lock; /* guards vregion list */
+	struct list_head vregion_list;
+	u32 vregion_rehash_intrvl;   /* ms */
 	unsigned long priv[0];
 	/* priv has to be always the last item */
 };
@@ -26,6 +29,11 @@
 			   struct mlxsw_sp_acl_tcam *tcam);
 void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_tcam *tcam);
+u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_acl_tcam *tcam,
+						u32 val);
 int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_rule_info *rulei,
 				   u32 *priority, bool fillup_priority);
@@ -43,11 +51,13 @@
 			       struct mlxsw_sp_port *mlxsw_sp_port,
 			       bool ingress);
 	u16 (*ruleset_group_id)(void *ruleset_priv);
-	size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+	size_t rule_priv_size;
 	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
 			void *ruleset_priv, void *rule_priv,
 			struct mlxsw_sp_acl_rule_info *rulei);
 	void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+	int (*rule_action_replace)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+				   struct mlxsw_sp_acl_rule_info *rulei);
 	int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
 				 bool *activity);
 };
@@ -65,11 +75,12 @@
 	(MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE)
 
 struct mlxsw_sp_acl_tcam_group;
+struct mlxsw_sp_acl_tcam_vregion;
 
 struct mlxsw_sp_acl_tcam_region {
-	struct list_head list; /* Member of a TCAM group */
-	struct list_head chunk_list; /* List of chunks under this region */
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
 	struct mlxsw_sp_acl_tcam_group *group;
+	struct list_head list; /* Member of a TCAM group */
 	enum mlxsw_reg_ptar_key_type key_type;
 	u16 id; /* ACL ID and region ID - they are same */
 	char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
@@ -121,6 +132,10 @@
 				  struct mlxsw_sp_acl_ctcam_region *cregion,
 				  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
 				  struct mlxsw_sp_acl_ctcam_entry *centry);
+int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					    struct mlxsw_sp_acl_ctcam_region *cregion,
+					    struct mlxsw_sp_acl_ctcam_entry *centry,
+					    struct mlxsw_sp_acl_rule_info *rulei);
 static inline unsigned int
 mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry)
 {
@@ -144,6 +159,7 @@
 
 struct mlxsw_sp_acl_atcam_region {
 	struct rhashtable entries_ht; /* A-TCAM only */
+	struct list_head entries_list; /* A-TCAM only */
 	struct mlxsw_sp_acl_ctcam_region cregion;
 	const struct mlxsw_sp_acl_atcam_region_ops *ops;
 	struct mlxsw_sp_acl_tcam_region *region;
@@ -154,7 +170,9 @@
 };
 
 struct mlxsw_sp_acl_atcam_entry_ht_key {
-	char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */
+	char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded
+								 * key.
+								 */
 	u8 erp_id;
 };
 
@@ -164,10 +182,19 @@
 
 struct mlxsw_sp_acl_atcam_entry {
 	struct rhash_head ht_node;
+	struct list_head list; /* Member in entries_list */
 	struct mlxsw_sp_acl_atcam_entry_ht_key ht_key;
+	char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key,
+							    * minus delta bits.
+							    */
+	struct {
+		u16 start;
+		u8 mask;
+		u8 value;
+	} delta_info;
 	struct mlxsw_sp_acl_ctcam_entry centry;
 	struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
-	struct mlxsw_sp_acl_erp *erp;
+	struct mlxsw_sp_acl_erp_mask *erp_mask;
 };
 
 static inline struct mlxsw_sp_acl_atcam_region *
@@ -189,6 +216,7 @@
 			       struct mlxsw_sp_acl_atcam *atcam,
 			       struct mlxsw_sp_acl_atcam_region *aregion,
 			       struct mlxsw_sp_acl_tcam_region *region,
+			       void *hints_priv,
 			       const struct mlxsw_sp_acl_ctcam_region_ops *ops);
 void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
 void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
@@ -204,25 +232,74 @@
 				  struct mlxsw_sp_acl_atcam_region *aregion,
 				  struct mlxsw_sp_acl_atcam_chunk *achunk,
 				  struct mlxsw_sp_acl_atcam_entry *aentry);
+int mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+					    struct mlxsw_sp_acl_atcam_region *aregion,
+					    struct mlxsw_sp_acl_atcam_entry *aentry,
+					    struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_atcam *atcam);
 void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_acl_atcam *atcam);
+void *
+mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv);
 
-struct mlxsw_sp_acl_erp;
+struct mlxsw_sp_acl_erp_delta;
 
-bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp);
-u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp);
-struct mlxsw_sp_acl_erp *
-mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
-		     const char *mask, bool ctcam);
-void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
-			  struct mlxsw_sp_acl_erp *erp);
-int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion);
+u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta);
+u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta);
+u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta,
+				const char *enc_key);
+void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta,
+				  const char *enc_key);
+
+struct mlxsw_sp_acl_erp_mask;
+
+bool
+mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask);
+u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask);
+const struct mlxsw_sp_acl_erp_delta *
+mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask);
+struct mlxsw_sp_acl_erp_mask *
+mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion,
+			  const char *mask, bool ctcam);
+void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion,
+			       struct mlxsw_sp_acl_erp_mask *erp_mask);
+int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_acl_atcam_region *aregion,
+			       struct mlxsw_sp_acl_erp_mask *erp_mask,
+			       struct mlxsw_sp_acl_atcam_entry *aentry);
+void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_acl_atcam_region *aregion,
+				struct mlxsw_sp_acl_erp_mask *erp_mask,
+				struct mlxsw_sp_acl_atcam_entry *aentry);
+void *
+mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv);
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion,
+				 void *hints_priv);
 void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
 int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_acl_atcam *atcam);
 void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_atcam *atcam);
 
+struct mlxsw_sp_acl_bf;
+
+int
+mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_acl_bf *bf,
+			  struct mlxsw_sp_acl_atcam_region *aregion,
+			  unsigned int erp_bank,
+			  struct mlxsw_sp_acl_atcam_entry *aentry);
+void
+mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_acl_bf *bf,
+			  struct mlxsw_sp_acl_atcam_region *aregion,
+			  unsigned int erp_bank,
+			  struct mlxsw_sp_acl_atcam_entry *aentry);
+struct mlxsw_sp_acl_bf *
+mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks);
+void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf);
+
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 3589432..b9eeae3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c

@@ -6,6 +6,7 @@
 #include <linux/dcbnl.h>
 #include <linux/if_ether.h>
 #include <linux/list.h>
+#include <linux/netlink.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -15,6 +16,8 @@
 struct mlxsw_sp_sb_pr {
 	enum mlxsw_reg_sbpr_mode mode;
 	u32 size;
+	u8 freeze_mode:1,
+	   freeze_size:1;
 };
 
 struct mlxsw_cp_sb_occ {
@@ -25,28 +28,96 @@
 struct mlxsw_sp_sb_cm {
 	u32 min_buff;
 	u32 max_buff;
-	u8 pool;
+	u16 pool_index;
 	struct mlxsw_cp_sb_occ occ;
+	u8 freeze_pool:1,
+	   freeze_thresh:1;
 };
 
+#define MLXSW_SP_SB_INFI -1U
+
 struct mlxsw_sp_sb_pm {
 	u32 min_buff;
 	u32 max_buff;
 	struct mlxsw_cp_sb_occ occ;
 };
 
-#define MLXSW_SP_SB_POOL_COUNT	4
-#define MLXSW_SP_SB_TC_COUNT	8
+struct mlxsw_sp_sb_mm {
+	u32 min_buff;
+	u32 max_buff;
+	u16 pool_index;
+};
+
+struct mlxsw_sp_sb_pool_des {
+	enum mlxsw_reg_sbxx_dir dir;
+	u8 pool;
+};
+
+#define MLXSW_SP_SB_POOL_ING		0
+#define MLXSW_SP_SB_POOL_EGR		4
+#define MLXSW_SP_SB_POOL_EGR_MC		8
+#define MLXSW_SP_SB_POOL_ING_CPU	9
+#define MLXSW_SP_SB_POOL_EGR_CPU	10
+
+static const struct mlxsw_sp_sb_pool_des mlxsw_sp1_sb_pool_dess[] = {
+	{MLXSW_REG_SBXX_DIR_INGRESS, 0},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 1},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 2},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 3},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 0},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 1},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 2},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 3},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 15},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 4},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 4},
+};
+
+static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = {
+	{MLXSW_REG_SBXX_DIR_INGRESS, 0},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 1},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 2},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 3},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 0},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 1},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 2},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 3},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 15},
+	{MLXSW_REG_SBXX_DIR_INGRESS, 4},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 4},
+};
+
+#define MLXSW_SP_SB_ING_TC_COUNT 8
+#define MLXSW_SP_SB_EG_TC_COUNT 16
 
 struct mlxsw_sp_sb_port {
-	struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
-	struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
+	struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT];
+	struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT];
+	struct mlxsw_sp_sb_pm *pms;
 };
 
 struct mlxsw_sp_sb {
-	struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
+	struct mlxsw_sp_sb_pr *prs;
 	struct mlxsw_sp_sb_port *ports;
 	u32 cell_size;
+	u32 max_headroom_cells;
+	u64 sb_size;
+};
+
+struct mlxsw_sp_sb_vals {
+	unsigned int pool_count;
+	const struct mlxsw_sp_sb_pool_des *pool_dess;
+	const struct mlxsw_sp_sb_pm *pms;
+	const struct mlxsw_sp_sb_pm *pms_cpu;
+	const struct mlxsw_sp_sb_pr *prs;
+	const struct mlxsw_sp_sb_mm *mms;
+	const struct mlxsw_sp_sb_cm *cms_ingress;
+	const struct mlxsw_sp_sb_cm *cms_egress;
+	const struct mlxsw_sp_sb_cm *cms_cpu;
+	unsigned int mms_count;
+	unsigned int cms_ingress_count;
+	unsigned int cms_egress_count;
+	unsigned int cms_cpu_count;
 };
 
 u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells)
@@ -59,96 +130,132 @@
 	return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size);
 }
 
-static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
-						 u8 pool,
-						 enum mlxsw_reg_sbxx_dir dir)
+u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp)
 {
-	return &mlxsw_sp->sb->prs[dir][pool];
+	return mlxsw_sp->sb->max_headroom_cells;
+}
+
+static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
+						 u16 pool_index)
+{
+	return &mlxsw_sp->sb->prs[pool_index];
+}
+
+static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir)
+{
+	if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+		return pg_buff < MLXSW_SP_SB_ING_TC_COUNT;
+	else
+		return pg_buff < MLXSW_SP_SB_EG_TC_COUNT;
 }
 
 static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp,
 						 u8 local_port, u8 pg_buff,
 						 enum mlxsw_reg_sbxx_dir dir)
 {
-	return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff];
+	struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port];
+
+	WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir));
+	if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+		return &sb_port->ing_cms[pg_buff];
+	else
+		return &sb_port->eg_cms[pg_buff];
 }
 
 static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp,
-						 u8 local_port, u8 pool,
-						 enum mlxsw_reg_sbxx_dir dir)
+						 u8 local_port, u16 pool_index)
 {
-	return &mlxsw_sp->sb->ports[local_port].pms[dir][pool];
+	return &mlxsw_sp->sb->ports[local_port].pms[pool_index];
 }
 
-static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool,
-				enum mlxsw_reg_sbxx_dir dir,
-				enum mlxsw_reg_sbpr_mode mode, u32 size)
+static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+				enum mlxsw_reg_sbpr_mode mode,
+				u32 size, bool infi_size)
 {
+	const struct mlxsw_sp_sb_pool_des *des =
+		&mlxsw_sp->sb_vals->pool_dess[pool_index];
 	char sbpr_pl[MLXSW_REG_SBPR_LEN];
 	struct mlxsw_sp_sb_pr *pr;
 	int err;
 
-	mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size);
+	mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode,
+			    size, infi_size);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl);
 	if (err)
 		return err;
 
-	pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+	if (infi_size)
+		size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size);
+	pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
 	pr->mode = mode;
 	pr->size = size;
 	return 0;
 }
 
 static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				u8 pg_buff, enum mlxsw_reg_sbxx_dir dir,
-				u32 min_buff, u32 max_buff, u8 pool)
+				u8 pg_buff, u32 min_buff, u32 max_buff,
+				bool infi_max, u16 pool_index)
 {
+	const struct mlxsw_sp_sb_pool_des *des =
+		&mlxsw_sp->sb_vals->pool_dess[pool_index];
 	char sbcm_pl[MLXSW_REG_SBCM_LEN];
+	struct mlxsw_sp_sb_cm *cm;
 	int err;
 
-	mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir,
-			    min_buff, max_buff, pool);
+	mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir,
+			    min_buff, max_buff, infi_max, des->pool);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl);
 	if (err)
 		return err;
-	if (pg_buff < MLXSW_SP_SB_TC_COUNT) {
-		struct mlxsw_sp_sb_cm *cm;
 
-		cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir);
+	if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) {
+		if (infi_max)
+			max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
+							mlxsw_sp->sb->sb_size);
+
+		cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff,
+					des->dir);
 		cm->min_buff = min_buff;
 		cm->max_buff = max_buff;
-		cm->pool = pool;
+		cm->pool_index = pool_index;
 	}
 	return 0;
 }
 
 static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				u8 pool, enum mlxsw_reg_sbxx_dir dir,
-				u32 min_buff, u32 max_buff)
+				u16 pool_index, u32 min_buff, u32 max_buff)
 {
+	const struct mlxsw_sp_sb_pool_des *des =
+		&mlxsw_sp->sb_vals->pool_dess[pool_index];
 	char sbpm_pl[MLXSW_REG_SBPM_LEN];
 	struct mlxsw_sp_sb_pm *pm;
 	int err;
 
-	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false,
+	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false,
 			    min_buff, max_buff);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl);
 	if (err)
 		return err;
 
-	pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
+	pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
 	pm->min_buff = min_buff;
 	pm->max_buff = max_buff;
 	return 0;
 }
 
 static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				    u8 pool, enum mlxsw_reg_sbxx_dir dir,
-				    struct list_head *bulk_list)
+				    u16 pool_index, struct list_head *bulk_list)
 {
+	const struct mlxsw_sp_sb_pool_des *des =
+		&mlxsw_sp->sb_vals->pool_dess[pool_index];
 	char sbpm_pl[MLXSW_REG_SBPM_LEN];
 
-	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0);
+	if (local_port == MLXSW_PORT_CPU_PORT &&
+	    des->dir == MLXSW_REG_SBXX_DIR_INGRESS)
+		return 0;
+
+	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
+			    true, 0, 0);
 	return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
 				     bulk_list, NULL, 0);
 }
@@ -163,38 +270,44 @@
 }
 
 static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				    u8 pool, enum mlxsw_reg_sbxx_dir dir,
-				    struct list_head *bulk_list)
+				    u16 pool_index, struct list_head *bulk_list)
 {
+	const struct mlxsw_sp_sb_pool_des *des =
+		&mlxsw_sp->sb_vals->pool_dess[pool_index];
 	char sbpm_pl[MLXSW_REG_SBPM_LEN];
 	struct mlxsw_sp_sb_pm *pm;
 
-	pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
-	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0);
+	if (local_port == MLXSW_PORT_CPU_PORT &&
+	    des->dir == MLXSW_REG_SBXX_DIR_INGRESS)
+		return 0;
+
+	pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
+	mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
+			    false, 0, 0);
 	return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
 				     bulk_list,
 				     mlxsw_sp_sb_pm_occ_query_cb,
 				     (unsigned long) pm);
 }
 
-static const u16 mlxsw_sp_pbs[] = {
-	[0] = 2 * ETH_FRAME_LEN,
-	[9] = 2 * MLXSW_PORT_MAX_MTU,
-};
-
-#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs)
+/* 1/4 of a headroom necessary for 100Gbps port and 100m cable. */
+#define MLXSW_SP_PB_HEADROOM 25632
 #define MLXSW_SP_PB_UNUSED 8
 
 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	const u32 pbs[] = {
+		[0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width,
+		[9] = MLXSW_PORT_MAX_MTU,
+	};
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pbmc_pl[MLXSW_REG_PBMC_LEN];
 	int i;
 
 	mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port,
 			    0xffff, 0xffff / 2);
-	for (i = 0; i < MLXSW_SP_PBS_LEN; i++) {
-		u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]);
+	for (i = 0; i < ARRAY_SIZE(pbs); i++) {
+		u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, pbs[i]);
 
 		if (i == MLXSW_SP_PB_UNUSED)
 			continue;
@@ -227,140 +340,267 @@
 	return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port);
 }
 
+static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_sb_port *sb_port)
+{
+	struct mlxsw_sp_sb_pm *pms;
+
+	pms = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*pms),
+		      GFP_KERNEL);
+	if (!pms)
+		return -ENOMEM;
+	sb_port->pms = pms;
+	return 0;
+}
+
+static void mlxsw_sp_sb_port_fini(struct mlxsw_sp_sb_port *sb_port)
+{
+	kfree(sb_port->pms);
+}
+
 static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp)
 {
 	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+	struct mlxsw_sp_sb_pr *prs;
+	int i;
+	int err;
 
 	mlxsw_sp->sb->ports = kcalloc(max_ports,
 				      sizeof(struct mlxsw_sp_sb_port),
 				      GFP_KERNEL);
 	if (!mlxsw_sp->sb->ports)
 		return -ENOMEM;
+
+	prs = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*prs),
+		      GFP_KERNEL);
+	if (!prs) {
+		err = -ENOMEM;
+		goto err_alloc_prs;
+	}
+	mlxsw_sp->sb->prs = prs;
+
+	for (i = 0; i < max_ports; i++) {
+		err = mlxsw_sp_sb_port_init(mlxsw_sp, &mlxsw_sp->sb->ports[i]);
+		if (err)
+			goto err_sb_port_init;
+	}
+
 	return 0;
+
+err_sb_port_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]);
+	kfree(mlxsw_sp->sb->prs);
+err_alloc_prs:
+	kfree(mlxsw_sp->sb->ports);
+	return err;
 }
 
 static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
 {
+	int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+	int i;
+
+	for (i = max_ports - 1; i >= 0; i--)
+		mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]);
+	kfree(mlxsw_sp->sb->prs);
 	kfree(mlxsw_sp->sb->ports);
 }
 
-#define MLXSW_SP_SB_PR_INGRESS_SIZE	12440000
-#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000)
-#define MLXSW_SP_SB_PR_EGRESS_SIZE	13232000
-
 #define MLXSW_SP_SB_PR(_mode, _size)	\
 	{				\
 		.mode = _mode,		\
 		.size = _size,		\
 	}
 
-static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = {
+#define MLXSW_SP_SB_PR_EXT(_mode, _size, _freeze_mode, _freeze_size)	\
+	{								\
+		.mode = _mode,						\
+		.size = _size,						\
+		.freeze_mode = _freeze_mode,				\
+		.freeze_size = _freeze_size,				\
+	}
+
+#define MLXSW_SP1_SB_PR_INGRESS_SIZE	12440000
+#define MLXSW_SP1_SB_PR_EGRESS_SIZE	13232000
+#define MLXSW_SP1_SB_PR_CPU_SIZE	(256 * 1000)
+
+/* Order according to mlxsw_sp1_sb_pool_dess */
+static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-		       MLXSW_SP_SB_PR_INGRESS_SIZE),
+		       MLXSW_SP1_SB_PR_INGRESS_SIZE),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-		       MLXSW_SP_SB_PR_INGRESS_MNG_SIZE),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP1_SB_PR_EGRESS_SIZE, true, false),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI,
+			   true, true),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
 };
 
-#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress)
+#define MLXSW_SP2_SB_PR_INGRESS_SIZE	35297568
+#define MLXSW_SP2_SB_PR_EGRESS_SIZE	35297568
+#define MLXSW_SP2_SB_PR_CPU_SIZE	(256 * 1000)
 
-static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = {
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE),
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+/* Order according to mlxsw_sp2_sb_pool_dess */
+static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
+		       MLXSW_SP2_SB_PR_INGRESS_SIZE),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP2_SB_PR_EGRESS_SIZE, true, false),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI,
+			   true, true),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP2_SB_PR_CPU_SIZE, true, false),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
+			   MLXSW_SP2_SB_PR_CPU_SIZE, true, false),
 };
 
-#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress)
-
-static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
-				  enum mlxsw_reg_sbxx_dir dir,
-				  const struct mlxsw_sp_sb_pr *prs,
-				  size_t prs_len)
+static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
+				const struct mlxsw_sp_sb_pr *prs,
+				size_t prs_len)
 {
 	int i;
 	int err;
 
 	for (i = 0; i < prs_len; i++) {
-		u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size);
+		u32 size = prs[i].size;
+		u32 size_cells;
 
-		err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size);
+		if (size == MLXSW_SP_SB_INFI) {
+			err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+						   0, true);
+		} else {
+			size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+			err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+						   size_cells, false);
+		}
 		if (err)
 			return err;
 	}
 	return 0;
 }
 
-static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp)
-{
-	int err;
-
-	err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS,
-				     mlxsw_sp_sb_prs_ingress,
-				     MLXSW_SP_SB_PRS_INGRESS_LEN);
-	if (err)
-		return err;
-	return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS,
-				      mlxsw_sp_sb_prs_egress,
-				      MLXSW_SP_SB_PRS_EGRESS_LEN);
-}
-
 #define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool)	\
 	{						\
 		.min_buff = _min_buff,			\
 		.max_buff = _max_buff,			\
-		.pool = _pool,				\
+		.pool_index = _pool,			\
 	}
 
-static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
-	MLXSW_SP_SB_CM(10000, 8, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
-	MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */
-	MLXSW_SP_SB_CM(20000, 1, 3),
+#define MLXSW_SP_SB_CM_ING(_min_buff, _max_buff)	\
+	{						\
+		.min_buff = _min_buff,			\
+		.max_buff = _max_buff,			\
+		.pool_index = MLXSW_SP_SB_POOL_ING,	\
+	}
+
+#define MLXSW_SP_SB_CM_EGR(_min_buff, _max_buff)	\
+	{						\
+		.min_buff = _min_buff,			\
+		.max_buff = _max_buff,			\
+		.pool_index = MLXSW_SP_SB_POOL_EGR,	\
+	}
+
+#define MLXSW_SP_SB_CM_EGR_MC(_min_buff, _max_buff)	\
+	{						\
+		.min_buff = _min_buff,			\
+		.max_buff = _max_buff,			\
+		.pool_index = MLXSW_SP_SB_POOL_EGR_MC,	\
+		.freeze_pool = true,			\
+		.freeze_thresh = true,			\
+	}
+
+static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_ingress[] = {
+	MLXSW_SP_SB_CM_ING(10000, 8),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */
+	MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU),
 };
 
-#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress)
-
-static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(0, 140000, 15),
-	MLXSW_SP_SB_CM(1, 0xff, 0),
+static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_ingress[] = {
+	MLXSW_SP_SB_CM_ING(0, 7),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_CM_ING(0, 0), /* dummy, this PG does not exist */
+	MLXSW_SP_SB_CM(10000, 8, MLXSW_SP_SB_POOL_ING_CPU),
 };
 
-#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress)
+static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_egress[] = {
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR(1500, 9),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR(1, 0xff),
+};
 
-#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0)
+static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = {
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR(0, 7),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR_MC(0, MLXSW_SP_SB_INFI),
+	MLXSW_SP_SB_CM_EGR(1, 0xff),
+};
+
+#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, MLXSW_SP_SB_POOL_EGR_CPU)
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
 	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
 	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
@@ -387,30 +627,49 @@
 	MLXSW_SP_CPU_PORT_SB_CM,
 };
 
-#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \
-	ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms)
+static bool
+mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index)
+{
+	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
+
+	return pr->mode == MLXSW_REG_SBPR_MODE_STATIC;
+}
 
 static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				  enum mlxsw_reg_sbxx_dir dir,
 				  const struct mlxsw_sp_sb_cm *cms,
 				  size_t cms_len)
 {
+	const struct mlxsw_sp_sb_vals *sb_vals = mlxsw_sp->sb_vals;
 	int i;
 	int err;
 
 	for (i = 0; i < cms_len; i++) {
 		const struct mlxsw_sp_sb_cm *cm;
 		u32 min_buff;
+		u32 max_buff;
 
 		if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS)
 			continue; /* PG number 8 does not exist, skip it */
 		cm = &cms[i];
-		/* All pools are initialized using dynamic thresholds,
-		 * therefore 'max_buff' isn't specified in cells.
-		 */
+		if (WARN_ON(sb_vals->pool_dess[cm->pool_index].dir != dir))
+			continue;
+
 		min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff);
-		err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir,
-					   min_buff, cm->max_buff, cm->pool);
+		max_buff = cm->max_buff;
+		if (max_buff == MLXSW_SP_SB_INFI) {
+			err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
+						   min_buff, 0,
+						   true, cm->pool_index);
+		} else {
+			if (mlxsw_sp_sb_pool_is_static(mlxsw_sp,
+						       cm->pool_index))
+				max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
+								max_buff);
+			err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
+						   min_buff, max_buff,
+						   false, cm->pool_index);
+		}
 		if (err)
 			return err;
 	}
@@ -419,27 +678,28 @@
 
 static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
-	err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp,
+	err = __mlxsw_sp_sb_cms_init(mlxsw_sp,
 				     mlxsw_sp_port->local_port,
 				     MLXSW_REG_SBXX_DIR_INGRESS,
-				     mlxsw_sp_sb_cms_ingress,
-				     MLXSW_SP_SB_CMS_INGRESS_LEN);
+				     mlxsw_sp->sb_vals->cms_ingress,
+				     mlxsw_sp->sb_vals->cms_ingress_count);
 	if (err)
 		return err;
 	return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp,
 				      mlxsw_sp_port->local_port,
 				      MLXSW_REG_SBXX_DIR_EGRESS,
-				      mlxsw_sp_sb_cms_egress,
-				      MLXSW_SP_SB_CMS_EGRESS_LEN);
+				      mlxsw_sp->sb_vals->cms_egress,
+				      mlxsw_sp->sb_vals->cms_egress_count);
 }
 
 static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp)
 {
 	return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS,
-				      mlxsw_sp_cpu_port_sb_cms,
-				      MLXSW_SP_CPU_PORT_SB_MCS_LEN);
+				      mlxsw_sp->sb_vals->cms_cpu,
+				      mlxsw_sp->sb_vals->cms_cpu_count);
 }
 
 #define MLXSW_SP_SB_PM(_min_buff, _max_buff)	\
@@ -448,38 +708,73 @@
 		.max_buff = _max_buff,		\
 	}
 
-static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = {
+/* Order according to mlxsw_sp1_sb_pool_dess */
+static const struct mlxsw_sp_sb_pm mlxsw_sp1_sb_pms[] = {
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
-	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
-};
-
-#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress)
-
-static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = {
+	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
 	MLXSW_SP_SB_PM(0, 7),
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
 	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+	MLXSW_SP_SB_PM(10000, 90000),
+	MLXSW_SP_SB_PM(0, 8),	/* 50% occupancy */
+	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
 };
 
-#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress)
+/* Order according to mlxsw_sp2_sb_pool_dess */
+static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = {
+	MLXSW_SP_SB_PM(0, 7),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 7),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(10000, 90000),
+	MLXSW_SP_SB_PM(0, 8),	/* 50% occupancy */
+	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+};
 
-static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				       enum mlxsw_reg_sbxx_dir dir,
-				       const struct mlxsw_sp_sb_pm *pms,
-				       size_t pms_len)
+/* Order according to mlxsw_sp*_sb_pool_dess */
+static const struct mlxsw_sp_sb_pm mlxsw_sp_cpu_port_sb_pms[] = {
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, 90000),
+	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
+};
+
+static int mlxsw_sp_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				const struct mlxsw_sp_sb_pm *pms,
+				bool skip_ingress)
 {
-	int i;
-	int err;
+	int i, err;
 
-	for (i = 0; i < pms_len; i++) {
-		const struct mlxsw_sp_sb_pm *pm;
+	for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
+		const struct mlxsw_sp_sb_pm *pm = &pms[i];
+		const struct mlxsw_sp_sb_pool_des *des;
+		u32 max_buff;
+		u32 min_buff;
 
-		pm = &pms[i];
-		err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir,
-					   pm->min_buff, pm->max_buff);
+		des = &mlxsw_sp->sb_vals->pool_dess[i];
+		if (skip_ingress && des->dir == MLXSW_REG_SBXX_DIR_INGRESS)
+			continue;
+
+		min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff);
+		max_buff = pm->max_buff;
+		if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i))
+			max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff);
+		err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, min_buff,
+					   max_buff);
 		if (err)
 			return err;
 	}
@@ -488,72 +783,62 @@
 
 static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	int err;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 
-	err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
-					  mlxsw_sp_port->local_port,
-					  MLXSW_REG_SBXX_DIR_INGRESS,
-					  mlxsw_sp_sb_pms_ingress,
-					  MLXSW_SP_SB_PMS_INGRESS_LEN);
-	if (err)
-		return err;
-	return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
-					   mlxsw_sp_port->local_port,
-					   MLXSW_REG_SBXX_DIR_EGRESS,
-					   mlxsw_sp_sb_pms_egress,
-					   MLXSW_SP_SB_PMS_EGRESS_LEN);
+	return mlxsw_sp_sb_pms_init(mlxsw_sp, mlxsw_sp_port->local_port,
+				    mlxsw_sp->sb_vals->pms, false);
 }
 
-struct mlxsw_sp_sb_mm {
-	u32 min_buff;
-	u32 max_buff;
-	u8 pool;
-};
+static int mlxsw_sp_cpu_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return mlxsw_sp_sb_pms_init(mlxsw_sp, 0, mlxsw_sp->sb_vals->pms_cpu,
+				    true);
+}
 
-#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool)	\
+#define MLXSW_SP_SB_MM(_min_buff, _max_buff)		\
 	{						\
 		.min_buff = _min_buff,			\
 		.max_buff = _max_buff,			\
-		.pool = _pool,				\
+		.pool_index = MLXSW_SP_SB_POOL_EGR,	\
 	}
 
 static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
-	MLXSW_SP_SB_MM(20000, 0xff, 0),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
+	MLXSW_SP_SB_MM(0, 6),
 };
 
-#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
-
 static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
 {
 	char sbmm_pl[MLXSW_REG_SBMM_LEN];
 	int i;
 	int err;
 
-	for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) {
+	for (i = 0; i < mlxsw_sp->sb_vals->mms_count; i++) {
+		const struct mlxsw_sp_sb_pool_des *des;
 		const struct mlxsw_sp_sb_mm *mc;
 		u32 min_buff;
 
-		mc = &mlxsw_sp_sb_mms[i];
-		/* All pools are initialized using dynamic thresholds,
-		 * therefore 'max_buff' isn't specified in cells.
+		mc = &mlxsw_sp->sb_vals->mms[i];
+		des = &mlxsw_sp->sb_vals->pool_dess[mc->pool_index];
+		/* All pools used by sb_mm's are initialized using dynamic
+		 * thresholds, therefore 'max_buff' isn't specified in cells.
 		 */
 		min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff);
 		mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff,
-				    mc->pool);
+				    des->pool);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl);
 		if (err)
 			return err;
@@ -561,9 +846,59 @@
 	return 0;
 }
 
+static void mlxsw_sp_pool_count(struct mlxsw_sp *mlxsw_sp,
+				u16 *p_ingress_len, u16 *p_egress_len)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i) {
+		if (mlxsw_sp->sb_vals->pool_dess[i].dir ==
+		    MLXSW_REG_SBXX_DIR_INGRESS)
+			(*p_ingress_len)++;
+		else
+			(*p_egress_len)++;
+	}
+
+	WARN(*p_egress_len == 0, "No egress pools\n");
+}
+
+const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = {
+	.pool_count = ARRAY_SIZE(mlxsw_sp1_sb_pool_dess),
+	.pool_dess = mlxsw_sp1_sb_pool_dess,
+	.pms = mlxsw_sp1_sb_pms,
+	.pms_cpu = mlxsw_sp_cpu_port_sb_pms,
+	.prs = mlxsw_sp1_sb_prs,
+	.mms = mlxsw_sp_sb_mms,
+	.cms_ingress = mlxsw_sp1_sb_cms_ingress,
+	.cms_egress = mlxsw_sp1_sb_cms_egress,
+	.cms_cpu = mlxsw_sp_cpu_port_sb_cms,
+	.mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms),
+	.cms_ingress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_ingress),
+	.cms_egress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_egress),
+	.cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms),
+};
+
+const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = {
+	.pool_count = ARRAY_SIZE(mlxsw_sp2_sb_pool_dess),
+	.pool_dess = mlxsw_sp2_sb_pool_dess,
+	.pms = mlxsw_sp2_sb_pms,
+	.pms_cpu = mlxsw_sp_cpu_port_sb_pms,
+	.prs = mlxsw_sp2_sb_prs,
+	.mms = mlxsw_sp_sb_mms,
+	.cms_ingress = mlxsw_sp2_sb_cms_ingress,
+	.cms_egress = mlxsw_sp2_sb_cms_egress,
+	.cms_cpu = mlxsw_sp_cpu_port_sb_cms,
+	.mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms),
+	.cms_ingress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_ingress),
+	.cms_egress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_egress),
+	.cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms),
+};
+
 int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 {
-	u64 sb_size;
+	u32 max_headroom_size;
+	u16 ing_pool_count = 0;
+	u16 eg_pool_count = 0;
 	int err;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
@@ -571,30 +906,45 @@
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
 		return -EIO;
-	sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE);
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE))
+		return -EIO;
 
 	mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL);
 	if (!mlxsw_sp->sb)
 		return -ENOMEM;
 	mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
+	mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+						   MAX_BUFFER_SIZE);
+	max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+					       MAX_HEADROOM_SIZE);
+	/* Round down, because this limit must not be overstepped. */
+	mlxsw_sp->sb->max_headroom_cells = max_headroom_size /
+						mlxsw_sp->sb->cell_size;
 
 	err = mlxsw_sp_sb_ports_init(mlxsw_sp);
 	if (err)
 		goto err_sb_ports_init;
-	err = mlxsw_sp_sb_prs_init(mlxsw_sp);
+	err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs,
+				   mlxsw_sp->sb_vals->pool_count);
 	if (err)
 		goto err_sb_prs_init;
 	err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
 	if (err)
 		goto err_sb_cpu_port_sb_cms_init;
+	err = mlxsw_sp_cpu_port_sb_pms_init(mlxsw_sp);
+	if (err)
+		goto err_sb_cpu_port_pms_init;
 	err = mlxsw_sp_sb_mms_init(mlxsw_sp);
 	if (err)
 		goto err_sb_mms_init;
-	err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size,
-				  MLXSW_SP_SB_POOL_COUNT,
-				  MLXSW_SP_SB_POOL_COUNT,
-				  MLXSW_SP_SB_TC_COUNT,
-				  MLXSW_SP_SB_TC_COUNT);
+	mlxsw_sp_pool_count(mlxsw_sp, &ing_pool_count, &eg_pool_count);
+	err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0,
+				  mlxsw_sp->sb->sb_size,
+				  ing_pool_count,
+				  eg_pool_count,
+				  MLXSW_SP_SB_ING_TC_COUNT,
+				  MLXSW_SP_SB_EG_TC_COUNT);
 	if (err)
 		goto err_devlink_sb_register;
 
@@ -602,6 +952,7 @@
 
 err_devlink_sb_register:
 err_sb_mms_init:
+err_sb_cpu_port_pms_init:
 err_sb_cpu_port_sb_cms_init:
 err_sb_prs_init:
 	mlxsw_sp_sb_ports_fini(mlxsw_sp);
@@ -632,84 +983,82 @@
 	return err;
 }
 
-static u8 pool_get(u16 pool_index)
-{
-	return pool_index % MLXSW_SP_SB_POOL_COUNT;
-}
-
-static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir)
-{
-	u16 pool_index;
-
-	pool_index = pool;
-	if (dir == MLXSW_REG_SBXX_DIR_EGRESS)
-		pool_index += MLXSW_SP_SB_POOL_COUNT;
-	return pool_index;
-}
-
-static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index)
-{
-	return pool_index < MLXSW_SP_SB_POOL_COUNT ?
-	       MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS;
-}
-
 int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
 			 unsigned int sb_index, u16 pool_index,
 			 struct devlink_sb_pool_info *pool_info)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-	u8 pool = pool_get(pool_index);
-	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
-	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+	enum mlxsw_reg_sbxx_dir dir;
+	struct mlxsw_sp_sb_pr *pr;
 
+	dir = mlxsw_sp->sb_vals->pool_dess[pool_index].dir;
+	pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
 	pool_info->pool_type = (enum devlink_sb_pool_type) dir;
 	pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
 	pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
+	pool_info->cell_size = mlxsw_sp->sb->cell_size;
 	return 0;
 }
 
 int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
 			 unsigned int sb_index, u16 pool_index, u32 size,
-			 enum devlink_sb_threshold_type threshold_type)
+			 enum devlink_sb_threshold_type threshold_type,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
-	u8 pool = pool_get(pool_index);
-	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+	const struct mlxsw_sp_sb_pr *pr;
 	enum mlxsw_reg_sbpr_mode mode;
 
-	if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
-		return -EINVAL;
-
 	mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
-	return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
+	pr = &mlxsw_sp->sb_vals->prs[pool_index];
+
+	if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size");
+		return -EINVAL;
+	}
+
+	if (pr->freeze_mode && pr->mode != mode) {
+		NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden");
+		return -EINVAL;
+	};
+
+	if (pr->freeze_size && pr->size != size) {
+		NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden");
+		return -EINVAL;
+	};
+
+	return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
+				    pool_size, false);
 }
 
 #define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */
 
-static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool,
-				     enum mlxsw_reg_sbxx_dir dir, u32 max_buff)
+static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+				     u32 max_buff)
 {
-	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
 
 	if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC)
 		return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
 	return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff);
 }
 
-static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
-				    enum mlxsw_reg_sbxx_dir dir, u32 threshold,
-				    u32 *p_max_buff)
+static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+				    u32 threshold, u32 *p_max_buff,
+				    struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
 
 	if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) {
 		int val;
 
 		val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
 		if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN ||
-		    val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX)
+		    val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid dynamic threshold value");
 			return -EINVAL;
+		}
 		*p_max_buff = val;
 	} else {
 		*p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold);
@@ -725,35 +1074,36 @@
 			mlxsw_core_port_driver_priv(mlxsw_core_port);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
-	u8 pool = pool_get(pool_index);
-	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
 	struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
-						       pool, dir);
+						       pool_index);
 
-	*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir,
+	*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index,
 						 pm->max_buff);
 	return 0;
 }
 
 int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
 			      unsigned int sb_index, u16 pool_index,
-			      u32 threshold)
+			      u32 threshold, struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port =
 			mlxsw_core_port_driver_priv(mlxsw_core_port);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
-	u8 pool = pool_get(pool_index);
-	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
 	u32 max_buff;
 	int err;
 
-	err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
-				       threshold, &max_buff);
+	if (local_port == MLXSW_PORT_CPU_PORT) {
+		NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's threshold is forbidden");
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
+				       threshold, &max_buff, extack);
 	if (err)
 		return err;
 
-	return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir,
+	return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index,
 				    0, max_buff);
 }
 
@@ -771,41 +1121,65 @@
 	struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
 						       pg_buff, dir);
 
-	*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir,
+	*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index,
 						 cm->max_buff);
-	*p_pool_index = pool_index_get(cm->pool, dir);
+	*p_pool_index = cm->pool_index;
 	return 0;
 }
 
 int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
 				 unsigned int sb_index, u16 tc_index,
 				 enum devlink_sb_pool_type pool_type,
-				 u16 pool_index, u32 threshold)
+				 u16 pool_index, u32 threshold,
+				 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port =
 			mlxsw_core_port_driver_priv(mlxsw_core_port);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
+	const struct mlxsw_sp_sb_cm *cm;
 	u8 pg_buff = tc_index;
 	enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
-	u8 pool = pool_get(pool_index);
 	u32 max_buff;
 	int err;
 
-	if (dir != dir_get(pool_index))
+	if (local_port == MLXSW_PORT_CPU_PORT) {
+		NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's binding is forbidden");
 		return -EINVAL;
+	}
 
-	err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
-				       threshold, &max_buff);
+	if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) {
+		NL_SET_ERR_MSG_MOD(extack, "Binding egress TC to ingress pool and vice versa is forbidden");
+		return -EINVAL;
+	}
+
+	if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+		cm = &mlxsw_sp->sb_vals->cms_ingress[tc_index];
+	else
+		cm = &mlxsw_sp->sb_vals->cms_egress[tc_index];
+
+	if (cm->freeze_pool && cm->pool_index != pool_index) {
+		NL_SET_ERR_MSG_MOD(extack, "Binding this TC to a different pool is forbidden");
+		return -EINVAL;
+	}
+
+	if (cm->freeze_thresh && cm->max_buff != threshold) {
+		NL_SET_ERR_MSG_MOD(extack, "Changing this TC's threshold is forbidden");
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
+				       threshold, &max_buff, extack);
 	if (err)
 		return err;
 
-	return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
-				    0, max_buff, pool);
+	return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff,
+				    0, max_buff, false, pool_index);
 }
 
 #define MASKED_COUNT_MAX \
-	(MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2))
+	(MLXSW_REG_SBSR_REC_MAX_COUNT / \
+	 (MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT))
 
 struct mlxsw_sp_sb_sr_occ_query_cb_ctx {
 	u8 masked_count;
@@ -831,7 +1205,12 @@
 	     local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
 		if (!mlxsw_sp->ports[local_port])
 			continue;
-		for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+		if (local_port == MLXSW_PORT_CPU_PORT) {
+			/* Ingress quotas are not supported for the CPU port */
+			masked_count++;
+			continue;
+		}
+		for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) {
 			cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
 						MLXSW_REG_SBXX_DIR_INGRESS);
 			mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@@ -845,7 +1224,7 @@
 	     local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
 		if (!mlxsw_sp->ports[local_port])
 			continue;
-		for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+		for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) {
 			cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
 						MLXSW_REG_SBXX_DIR_EGRESS);
 			mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@@ -866,7 +1245,7 @@
 	char *sbsr_pl;
 	u8 masked_count;
 	u8 local_port_1;
-	u8 local_port = 0;
+	u8 local_port;
 	int i;
 	int err;
 	int err2;
@@ -875,28 +1254,26 @@
 	if (!sbsr_pl)
 		return -ENOMEM;
 
+	local_port = MLXSW_PORT_CPU_PORT;
 next_batch:
-	local_port++;
 	local_port_1 = local_port;
 	masked_count = 0;
 	mlxsw_reg_sbsr_pack(sbsr_pl, false);
-	for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+	for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
 		mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+	for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
 		mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
-	}
 	for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
 		if (!mlxsw_sp->ports[local_port])
 			continue;
-		mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
+		if (local_port != MLXSW_PORT_CPU_PORT) {
+			/* Ingress quotas are not supported for the CPU port */
+			mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
+							     local_port, 1);
+		}
 		mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
-		for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
+		for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
 			err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
-						       MLXSW_REG_SBXX_DIR_INGRESS,
-						       &bulk_list);
-			if (err)
-				goto out;
-			err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
-						       MLXSW_REG_SBXX_DIR_EGRESS,
 						       &bulk_list);
 			if (err)
 				goto out;
@@ -914,8 +1291,10 @@
 				    cb_priv);
 	if (err)
 		goto out;
-	if (local_port < mlxsw_core_max_ports(mlxsw_core))
+	if (local_port < mlxsw_core_max_ports(mlxsw_core)) {
+		local_port++;
 		goto next_batch;
+	}
 
 out:
 	err2 = mlxsw_reg_trans_bulk_wait(&bulk_list);
@@ -932,7 +1311,7 @@
 	LIST_HEAD(bulk_list);
 	char *sbsr_pl;
 	unsigned int masked_count;
-	u8 local_port = 0;
+	u8 local_port;
 	int i;
 	int err;
 	int err2;
@@ -941,27 +1320,25 @@
 	if (!sbsr_pl)
 		return -ENOMEM;
 
+	local_port = MLXSW_PORT_CPU_PORT;
 next_batch:
-	local_port++;
 	masked_count = 0;
 	mlxsw_reg_sbsr_pack(sbsr_pl, true);
-	for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+	for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
 		mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+	for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
 		mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
-	}
 	for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
 		if (!mlxsw_sp->ports[local_port])
 			continue;
-		mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
+		if (local_port != MLXSW_PORT_CPU_PORT) {
+			/* Ingress quotas are not supported for the CPU port */
+			mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
+							     local_port, 1);
+		}
 		mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
-		for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
+		for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
 			err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
-						       MLXSW_REG_SBXX_DIR_INGRESS,
-						       &bulk_list);
-			if (err)
-				goto out;
-			err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
-						       MLXSW_REG_SBXX_DIR_EGRESS,
 						       &bulk_list);
 			if (err)
 				goto out;
@@ -975,8 +1352,10 @@
 				    &bulk_list, NULL, 0);
 	if (err)
 		goto out;
-	if (local_port < mlxsw_core_max_ports(mlxsw_core))
+	if (local_port < mlxsw_core_max_ports(mlxsw_core)) {
+		local_port++;
 		goto next_batch;
+	}
 
 out:
 	err2 = mlxsw_reg_trans_bulk_wait(&bulk_list);
@@ -994,10 +1373,8 @@
 			mlxsw_core_port_driver_priv(mlxsw_core_port);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
-	u8 pool = pool_get(pool_index);
-	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
 	struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
-						       pool, dir);
+						       pool_index);
 
 	*p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur);
 	*p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index b25048c..21296fa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c

@@ -408,14 +408,6 @@
 	have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
 							&prio_map);
 
-	if (!have_dscp) {
-		err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
-					MLXSW_REG_QPTS_TRUST_STATE_PCP);
-		if (err)
-			netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
-		return err;
-	}
-
 	mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio,
 					    &dscp_map);
 	err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port,
@@ -432,6 +424,14 @@
 		return err;
 	}
 
+	if (!have_dscp) {
+		err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+					MLXSW_REG_QPTS_TRUST_STATE_PCP);
+		if (err)
+			netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
+		return err;
+	}
+
 	err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
 					     MLXSW_REG_QPTS_TRUST_STATE_DSCP);
 	if (err) {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 41e607a..4993381 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c

@@ -220,7 +220,7 @@
 	for (; i < rif_count; i++) {
 		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
 
-		if (!rif)
+		if (!rif || !mlxsw_sp_rif_dev(rif))
 			continue;
 		err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif,
 					      counters_enabled);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
index e689576..246dbb3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h

@@ -4,24 +4,9 @@
 #ifndef _MLXSW_PIPELINE_H_
 #define _MLXSW_PIPELINE_H_
 
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
-
 int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp);
 
-#else
-
-static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
-{
-	return 0;
-}
-
-static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
-{
-}
-
-#endif
-
 #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
 #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
 #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 715d24f..8df3cb2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c

@@ -6,6 +6,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_bridge.h>
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 
 #include "spectrum.h"
@@ -14,6 +15,8 @@
 struct mlxsw_sp_fid_family;
 
 struct mlxsw_sp_fid_core {
+	struct rhashtable fid_ht;
+	struct rhashtable vni_ht;
 	struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
 	unsigned int *port_fid_mappings;
 };
@@ -24,6 +27,15 @@
 	unsigned int ref_count;
 	u16 fid_index;
 	struct mlxsw_sp_fid_family *fid_family;
+	struct rhash_head ht_node;
+
+	struct rhash_head vni_ht_node;
+	enum mlxsw_sp_nve_type nve_type;
+	__be32 vni;
+	u32 nve_flood_index;
+	int nve_ifindex;
+	u8 vni_valid:1,
+	   nve_flood_index_valid:1;
 };
 
 struct mlxsw_sp_fid_8021q {
@@ -36,6 +48,18 @@
 	int br_ifindex;
 };
 
+static const struct rhashtable_params mlxsw_sp_fid_ht_params = {
+	.key_len = sizeof_field(struct mlxsw_sp_fid, fid_index),
+	.key_offset = offsetof(struct mlxsw_sp_fid, fid_index),
+	.head_offset = offsetof(struct mlxsw_sp_fid, ht_node),
+};
+
+static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
+	.key_len = sizeof_field(struct mlxsw_sp_fid, vni),
+	.key_offset = offsetof(struct mlxsw_sp_fid, vni),
+	.head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node),
+};
+
 struct mlxsw_sp_flood_table {
 	enum mlxsw_sp_flood_type packet_type;
 	enum mlxsw_reg_sfgc_bridge_type bridge_type;
@@ -56,6 +80,13 @@
 			    struct mlxsw_sp_port *port, u16 vid);
 	void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
 			       struct mlxsw_sp_port *port, u16 vid);
+	int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+	void (*vni_clear)(struct mlxsw_sp_fid *fid);
+	int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
+				   u32 nve_flood_index);
+	void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
+	void (*fdb_clear_offload)(const struct mlxsw_sp_fid *fid,
+				  const struct net_device *nve_dev);
 };
 
 struct mlxsw_sp_fid_family {
@@ -70,6 +101,7 @@
 	enum mlxsw_sp_rif_type rif_type;
 	const struct mlxsw_sp_fid_ops *ops;
 	struct mlxsw_sp *mlxsw_sp;
+	u8 lag_vid_valid:1;
 };
 
 static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
@@ -94,6 +126,179 @@
 	[MLXSW_SP_FLOOD_TYPE_MC]	= mlxsw_sp_sfgc_mc_packet_types,
 };
 
+bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index)
+{
+	enum mlxsw_sp_fid_type fid_type = MLXSW_SP_FID_TYPE_DUMMY;
+	struct mlxsw_sp_fid_family *fid_family;
+
+	fid_family = mlxsw_sp->fid_core->fid_family_arr[fid_type];
+
+	return fid_family->start_index == fid_index;
+}
+
+bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid)
+{
+	return fid->fid_family->lag_vid_valid;
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
+						  u16 fid_index)
+{
+	struct mlxsw_sp_fid *fid;
+
+	fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index,
+				     mlxsw_sp_fid_ht_params);
+	if (fid)
+		fid->ref_count++;
+
+	return fid;
+}
+
+int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex)
+{
+	if (!fid->vni_valid)
+		return -EINVAL;
+
+	*nve_ifindex = fid->nve_ifindex;
+
+	return 0;
+}
+
+int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid,
+			  enum mlxsw_sp_nve_type *p_type)
+{
+	if (!fid->vni_valid)
+		return -EINVAL;
+
+	*p_type = fid->nve_type;
+
+	return 0;
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+						__be32 vni)
+{
+	struct mlxsw_sp_fid *fid;
+
+	fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni,
+				     mlxsw_sp_fid_vni_ht_params);
+	if (fid)
+		fid->ref_count++;
+
+	return fid;
+}
+
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni)
+{
+	if (!fid->vni_valid)
+		return -EINVAL;
+
+	*vni = fid->vni;
+
+	return 0;
+}
+
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+				     u32 nve_flood_index)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	int err;
+
+	if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+		return -EINVAL;
+
+	err = ops->nve_flood_index_set(fid, nve_flood_index);
+	if (err)
+		return err;
+
+	fid->nve_flood_index = nve_flood_index;
+	fid->nve_flood_index_valid = true;
+
+	return 0;
+}
+
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+	if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+		return;
+
+	fid->nve_flood_index_valid = false;
+	ops->nve_flood_index_clear(fid);
+}
+
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
+{
+	return fid->nve_flood_index_valid;
+}
+
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type,
+			 __be32 vni, int nve_ifindex)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+	int err;
+
+	if (WARN_ON(!ops->vni_set || fid->vni_valid))
+		return -EINVAL;
+
+	fid->nve_type = type;
+	fid->nve_ifindex = nve_ifindex;
+	fid->vni = vni;
+	err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
+					    &fid->vni_ht_node,
+					    mlxsw_sp_fid_vni_ht_params);
+	if (err)
+		return err;
+
+	err = ops->vni_set(fid, vni);
+	if (err)
+		goto err_vni_set;
+
+	fid->vni_valid = true;
+
+	return 0;
+
+err_vni_set:
+	rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+			       mlxsw_sp_fid_vni_ht_params);
+	return err;
+}
+
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+
+	if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+		return;
+
+	fid->vni_valid = false;
+	ops->vni_clear(fid);
+	rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+			       mlxsw_sp_fid_vni_ht_params);
+}
+
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid)
+{
+	return fid->vni_valid;
+}
+
+void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid,
+				    const struct net_device *nve_dev)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+	if (ops->fdb_clear_offload)
+		ops->fdb_clear_offload(fid, nve_dev);
+}
+
 static const struct mlxsw_sp_flood_table *
 mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
 				enum mlxsw_sp_flood_type packet_type)
@@ -154,11 +359,6 @@
 	fid->fid_family->ops->port_vid_unmap(fid, mlxsw_sp_port, vid);
 }
 
-enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid)
-{
-	return fid->fid_family->rif_type;
-}
-
 u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid)
 {
 	return fid->fid_index;
@@ -174,6 +374,11 @@
 	fid->rif = rif;
 }
 
+struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid)
+{
+	return fid->rif;
+}
+
 enum mlxsw_sp_rif_type
 mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp,
 			   enum mlxsw_sp_fid_type type)
@@ -217,6 +422,21 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
+static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+			       __be32 vni, bool vni_valid, u32 nve_flood_index,
+			       bool nve_flood_index_valid)
+{
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
+			    0);
+	mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
+	mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
+	mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
+	mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
 static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
 				u16 vid, bool valid)
 {
@@ -393,6 +613,8 @@
 
 static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
 {
+	if (fid->vni_valid)
+		mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
 	mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
 }
 
@@ -421,7 +643,7 @@
 
 static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid)
 {
-	return fid->fid_index - fid->fid_family->start_index;
+	return fid->fid_index - VLAN_N_VID;
 }
 
 static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -531,6 +753,48 @@
 				    mlxsw_sp_port->local_port, vid, false);
 }
 
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
+				   true, fid->nve_flood_index,
+				   fid->nve_flood_index_valid);
+}
+
+static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
+			    fid->nve_flood_index, fid->nve_flood_index_valid);
+}
+
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+						  u32 nve_flood_index)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
+				   fid->vni, fid->vni_valid, nve_flood_index,
+				   true);
+}
+
+static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+	mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
+			    fid->vni_valid, 0, false);
+}
+
+static void
+mlxsw_sp_fid_8021d_fdb_clear_offload(const struct mlxsw_sp_fid *fid,
+				     const struct net_device *nve_dev)
+{
+	br_fdb_clear_offload(nve_dev, 0);
+}
+
 static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
 	.setup			= mlxsw_sp_fid_8021d_setup,
 	.configure		= mlxsw_sp_fid_8021d_configure,
@@ -540,6 +804,11 @@
 	.flood_index		= mlxsw_sp_fid_8021d_flood_index,
 	.port_vid_map		= mlxsw_sp_fid_8021d_port_vid_map,
 	.port_vid_unmap		= mlxsw_sp_fid_8021d_port_vid_unmap,
+	.vni_set		= mlxsw_sp_fid_8021d_vni_set,
+	.vni_clear		= mlxsw_sp_fid_8021d_vni_clear,
+	.nve_flood_index_set	= mlxsw_sp_fid_8021d_nve_flood_index_set,
+	.nve_flood_index_clear	= mlxsw_sp_fid_8021d_nve_flood_index_clear,
+	.fdb_clear_offload	= mlxsw_sp_fid_8021d_fdb_clear_offload,
 };
 
 static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
@@ -573,6 +842,48 @@
 	.nr_flood_tables	= ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables),
 	.rif_type		= MLXSW_SP_RIF_TYPE_FID,
 	.ops			= &mlxsw_sp_fid_8021d_ops,
+	.lag_vid_valid		= 1,
+};
+
+static void
+mlxsw_sp_fid_8021q_fdb_clear_offload(const struct mlxsw_sp_fid *fid,
+				     const struct net_device *nve_dev)
+{
+	br_fdb_clear_offload(nve_dev, mlxsw_sp_fid_8021q_vid(fid));
+}
+
+static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_emu_ops = {
+	.setup			= mlxsw_sp_fid_8021q_setup,
+	.configure		= mlxsw_sp_fid_8021d_configure,
+	.deconfigure		= mlxsw_sp_fid_8021d_deconfigure,
+	.index_alloc		= mlxsw_sp_fid_8021d_index_alloc,
+	.compare		= mlxsw_sp_fid_8021q_compare,
+	.flood_index		= mlxsw_sp_fid_8021d_flood_index,
+	.port_vid_map		= mlxsw_sp_fid_8021d_port_vid_map,
+	.port_vid_unmap		= mlxsw_sp_fid_8021d_port_vid_unmap,
+	.vni_set		= mlxsw_sp_fid_8021d_vni_set,
+	.vni_clear		= mlxsw_sp_fid_8021d_vni_clear,
+	.nve_flood_index_set	= mlxsw_sp_fid_8021d_nve_flood_index_set,
+	.nve_flood_index_clear	= mlxsw_sp_fid_8021d_nve_flood_index_clear,
+	.fdb_clear_offload	= mlxsw_sp_fid_8021q_fdb_clear_offload,
+};
+
+/* There are 4K-2 emulated 802.1Q FIDs, starting right after the 802.1D FIDs */
+#define MLXSW_SP_FID_8021Q_EMU_START	(VLAN_N_VID + MLXSW_SP_FID_8021D_MAX)
+#define MLXSW_SP_FID_8021Q_EMU_END	(MLXSW_SP_FID_8021Q_EMU_START + \
+					 VLAN_VID_MASK - 2)
+
+/* Range and flood configuration must match mlxsw_config_profile */
+static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_emu_family = {
+	.type			= MLXSW_SP_FID_TYPE_8021Q,
+	.fid_size		= sizeof(struct mlxsw_sp_fid_8021q),
+	.start_index		= MLXSW_SP_FID_8021Q_EMU_START,
+	.end_index		= MLXSW_SP_FID_8021Q_EMU_END,
+	.flood_tables		= mlxsw_sp_fid_8021d_flood_tables,
+	.nr_flood_tables	= ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables),
+	.rif_type		= MLXSW_SP_RIF_TYPE_VLAN,
+	.ops			= &mlxsw_sp_fid_8021q_emu_ops,
+	.lag_vid_valid		= 1,
 };
 
 static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid)
@@ -696,18 +1007,36 @@
 static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = {
 	.type			= MLXSW_SP_FID_TYPE_DUMMY,
 	.fid_size		= sizeof(struct mlxsw_sp_fid),
-	.start_index		= MLXSW_SP_RFID_BASE - 1,
-	.end_index		= MLXSW_SP_RFID_BASE - 1,
+	.start_index		= VLAN_N_VID - 1,
+	.end_index		= VLAN_N_VID - 1,
 	.ops			= &mlxsw_sp_fid_dummy_ops,
 };
 
 static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
-	[MLXSW_SP_FID_TYPE_8021Q]	= &mlxsw_sp_fid_8021q_family,
+	[MLXSW_SP_FID_TYPE_8021Q]	= &mlxsw_sp_fid_8021q_emu_family,
 	[MLXSW_SP_FID_TYPE_8021D]	= &mlxsw_sp_fid_8021d_family,
 	[MLXSW_SP_FID_TYPE_RFID]	= &mlxsw_sp_fid_rfid_family,
 	[MLXSW_SP_FID_TYPE_DUMMY]	= &mlxsw_sp_fid_dummy_family,
 };
 
+static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp,
+						enum mlxsw_sp_fid_type type,
+						const void *arg)
+{
+	struct mlxsw_sp_fid_family *fid_family;
+	struct mlxsw_sp_fid *fid;
+
+	fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
+	list_for_each_entry(fid, &fid_family->fids_list, list) {
+		if (!fid->fid_family->ops->compare(fid, arg))
+			continue;
+		fid->ref_count++;
+		return fid;
+	}
+
+	return NULL;
+}
+
 static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
 					     enum mlxsw_sp_fid_type type,
 					     const void *arg)
@@ -717,14 +1046,11 @@
 	u16 fid_index;
 	int err;
 
-	fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
-	list_for_each_entry(fid, &fid_family->fids_list, list) {
-		if (!fid->fid_family->ops->compare(fid, arg))
-			continue;
-		fid->ref_count++;
+	fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg);
+	if (fid)
 		return fid;
-	}
 
+	fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
 	fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
 	if (!fid)
 		return ERR_PTR(-ENOMEM);
@@ -743,10 +1069,17 @@
 	if (err)
 		goto err_configure;
 
+	err = rhashtable_insert_fast(&mlxsw_sp->fid_core->fid_ht, &fid->ht_node,
+				     mlxsw_sp_fid_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
 	list_add(&fid->list, &fid_family->fids_list);
 	fid->ref_count++;
 	return fid;
 
+err_rhashtable_insert:
+	fid->fid_family->ops->deconfigure(fid);
 err_configure:
 	__clear_bit(fid_index - fid_family->start_index,
 		    fid_family->fids_bitmap);
@@ -758,19 +1091,18 @@
 void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid)
 {
 	struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
 
-	if (--fid->ref_count == 1 && fid->rif) {
-		/* Destroy the associated RIF and let it drop the last
-		 * reference on the FID.
-		 */
-		return mlxsw_sp_rif_destroy(fid->rif);
-	} else if (fid->ref_count == 0) {
-		list_del(&fid->list);
-		fid->fid_family->ops->deconfigure(fid);
-		__clear_bit(fid->fid_index - fid_family->start_index,
-			    fid_family->fids_bitmap);
-		kfree(fid);
-	}
+	if (--fid->ref_count != 0)
+		return;
+
+	list_del(&fid->list);
+	rhashtable_remove_fast(&mlxsw_sp->fid_core->fid_ht,
+			       &fid->ht_node, mlxsw_sp_fid_ht_params);
+	fid->fid_family->ops->deconfigure(fid);
+	__clear_bit(fid->fid_index - fid_family->start_index,
+		    fid_family->fids_bitmap);
+	kfree(fid);
 }
 
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid)
@@ -784,6 +1116,19 @@
 	return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
 }
 
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp,
+					       u16 vid)
+{
+	return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid);
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+					       int br_ifindex)
+{
+	return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D,
+				   &br_ifindex);
+}
+
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index)
 {
@@ -853,8 +1198,7 @@
 
 	fid_family->mlxsw_sp = mlxsw_sp;
 	INIT_LIST_HEAD(&fid_family->fids_list);
-	fid_family->fids_bitmap = kcalloc(BITS_TO_LONGS(nr_fids),
-					  sizeof(unsigned long), GFP_KERNEL);
+	fid_family->fids_bitmap = bitmap_zalloc(nr_fids, GFP_KERNEL);
 	if (!fid_family->fids_bitmap) {
 		err = -ENOMEM;
 		goto err_alloc_fids_bitmap;
@@ -871,7 +1215,7 @@
 	return 0;
 
 err_fid_flood_tables_init:
-	kfree(fid_family->fids_bitmap);
+	bitmap_free(fid_family->fids_bitmap);
 err_alloc_fids_bitmap:
 	kfree(fid_family);
 	return err;
@@ -882,7 +1226,7 @@
 			       struct mlxsw_sp_fid_family *fid_family)
 {
 	mlxsw_sp->fid_core->fid_family_arr[fid_family->type] = NULL;
-	kfree(fid_family->fids_bitmap);
+	bitmap_free(fid_family->fids_bitmap);
 	WARN_ON_ONCE(!list_empty(&fid_family->fids_list));
 	kfree(fid_family);
 }
@@ -918,6 +1262,14 @@
 		return -ENOMEM;
 	mlxsw_sp->fid_core = fid_core;
 
+	err = rhashtable_init(&fid_core->fid_ht, &mlxsw_sp_fid_ht_params);
+	if (err)
+		goto err_rhashtable_fid_init;
+
+	err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
+	if (err)
+		goto err_rhashtable_vni_init;
+
 	fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
 					      GFP_KERNEL);
 	if (!fid_core->port_fid_mappings) {
@@ -944,6 +1296,10 @@
 	}
 	kfree(fid_core->port_fid_mappings);
 err_alloc_port_fid_mappings:
+	rhashtable_destroy(&fid_core->vni_ht);
+err_rhashtable_vni_init:
+	rhashtable_destroy(&fid_core->fid_ht);
+err_rhashtable_fid_init:
 	kfree(fid_core);
 	return err;
 }
@@ -957,5 +1313,7 @@
 		mlxsw_sp_fid_family_unregister(mlxsw_sp,
 					       fid_core->fid_family_arr[i]);
 	kfree(fid_core->port_fid_mappings);
+	rhashtable_destroy(&fid_core->vni_ht);
+	rhashtable_destroy(&fid_core->fid_ht);
 	kfree(fid_core);
 }

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 8d21197..b607919 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c

@@ -17,13 +17,14 @@
 static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_acl_block *block,
 					 struct mlxsw_sp_acl_rule_info *rulei,
-					 struct tcf_exts *exts,
+					 struct flow_action *flow_action,
 					 struct netlink_ext_ack *extack)
 {
-	const struct tc_action *a;
+	const struct flow_action_entry *act;
+	int mirror_act_count = 0;
 	int err, i;
 
-	if (!tcf_exts_has_actions(exts))
+	if (!flow_action_has_entries(flow_action))
 		return 0;
 
 	/* Count action is inserted first */
@@ -31,27 +32,31 @@
 	if (err)
 		return err;
 
-	tcf_exts_for_each_action(i, a, exts) {
-		if (is_tcf_gact_ok(a)) {
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_ACCEPT:
 			err = mlxsw_sp_acl_rulei_act_terminate(rulei);
 			if (err) {
 				NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action");
 				return err;
 			}
-		} else if (is_tcf_gact_shot(a)) {
+			break;
+		case FLOW_ACTION_DROP:
 			err = mlxsw_sp_acl_rulei_act_drop(rulei);
 			if (err) {
 				NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action");
 				return err;
 			}
-		} else if (is_tcf_gact_trap(a)) {
+			break;
+		case FLOW_ACTION_TRAP:
 			err = mlxsw_sp_acl_rulei_act_trap(rulei);
 			if (err) {
 				NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action");
 				return err;
 			}
-		} else if (is_tcf_gact_goto_chain(a)) {
-			u32 chain_index = tcf_gact_goto_chain_index(a);
+			break;
+		case FLOW_ACTION_GOTO: {
+			u32 chain_index = act->chain_index;
 			struct mlxsw_sp_acl_ruleset *ruleset;
 			u16 group_id;
 
@@ -67,11 +72,23 @@
 				NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action");
 				return err;
 			}
-		} else if (is_tcf_mirred_egress_redirect(a)) {
+			}
+			break;
+		case FLOW_ACTION_REDIRECT: {
 			struct net_device *out_dev;
 			struct mlxsw_sp_fid *fid;
 			u16 fid_index;
 
+			if (mlxsw_sp_acl_block_is_egress_bound(block)) {
+				NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress");
+				return -EOPNOTSUPP;
+			}
+
+			/* Forbid block with this rulei to be bound
+			 * to egress in future.
+			 */
+			rulei->egress_bind_blocker = 1;
+
 			fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp);
 			fid_index = mlxsw_sp_fid_index(fid);
 			err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei,
@@ -79,29 +96,38 @@
 			if (err)
 				return err;
 
-			out_dev = tcf_mirred_dev(a);
+			out_dev = act->dev;
 			err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei,
 							 out_dev, extack);
 			if (err)
 				return err;
-		} else if (is_tcf_mirred_egress_mirror(a)) {
-			struct net_device *out_dev = tcf_mirred_dev(a);
+			}
+			break;
+		case FLOW_ACTION_MIRRED: {
+			struct net_device *out_dev = act->dev;
+
+			if (mirror_act_count++) {
+				NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported");
+				return -EOPNOTSUPP;
+			}
 
 			err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
 							    block, out_dev,
 							    extack);
 			if (err)
 				return err;
-		} else if (is_tcf_vlan(a)) {
-			u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
-			u32 action = tcf_vlan_action(a);
-			u8 prio = tcf_vlan_push_prio(a);
-			u16 vid = tcf_vlan_push_vid(a);
+			}
+			break;
+		case FLOW_ACTION_VLAN_MANGLE: {
+			u16 proto = be16_to_cpu(act->vlan.proto);
+			u8 prio = act->vlan.prio;
+			u16 vid = act->vlan.vid;
 
 			return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei,
-							   action, vid,
+							   act->id, vid,
 							   proto, prio, extack);
-		} else {
+			}
+		default:
 			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
 			dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
 			return -EOPNOTSUPP;
@@ -110,72 +136,106 @@
 	return 0;
 }
 
-static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f)
+static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
+				      struct flow_cls_offload *f,
+				      struct mlxsw_sp_acl_block *block)
 {
-	struct flow_dissector_key_ipv4_addrs *key =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-					  f->key);
-	struct flow_dissector_key_ipv4_addrs *mask =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-					  f->mask);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct net_device *ingress_dev;
+	struct flow_match_meta match;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+		return 0;
+
+	flow_rule_match_meta(rule, &match);
+	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
+		return -EINVAL;
+	}
+
+	ingress_dev = __dev_get_by_index(block->net,
+					 match.key->ingress_ifindex);
+	if (!ingress_dev) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on");
+		return -EINVAL;
+	}
+
+	if (!mlxsw_sp_port_dev_check(ingress_dev)) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port");
+		return -EINVAL;
+	}
+
+	mlxsw_sp_port = netdev_priv(ingress_dev);
+	if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device");
+		return -EINVAL;
+	}
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei,
+				       MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
+				       mlxsw_sp_port->local_port,
+				       0xFFFFFFFF);
+	return 0;
+}
+
+static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
+				       struct flow_cls_offload *f)
+{
+	struct flow_match_ipv4_addrs match;
+
+	flow_rule_match_ipv4_addrs(f->rule, &match);
 
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
-				       (char *) &key->src,
-				       (char *) &mask->src, 4);
+				       (char *) &match.key->src,
+				       (char *) &match.mask->src, 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
-				       (char *) &key->dst,
-				       (char *) &mask->dst, 4);
+				       (char *) &match.key->dst,
+				       (char *) &match.mask->dst, 4);
 }
 
 static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f)
+				       struct flow_cls_offload *f)
 {
-	struct flow_dissector_key_ipv6_addrs *key =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-					  f->key);
-	struct flow_dissector_key_ipv6_addrs *mask =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-					  f->mask);
+	struct flow_match_ipv6_addrs match;
+
+	flow_rule_match_ipv6_addrs(f->rule, &match);
 
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
-				       &key->src.s6_addr[0x0],
-				       &mask->src.s6_addr[0x0], 4);
+				       &match.key->src.s6_addr[0x0],
+				       &match.mask->src.s6_addr[0x0], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
-				       &key->src.s6_addr[0x4],
-				       &mask->src.s6_addr[0x4], 4);
+				       &match.key->src.s6_addr[0x4],
+				       &match.mask->src.s6_addr[0x4], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
-				       &key->src.s6_addr[0x8],
-				       &mask->src.s6_addr[0x8], 4);
+				       &match.key->src.s6_addr[0x8],
+				       &match.mask->src.s6_addr[0x8], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
-				       &key->src.s6_addr[0xC],
-				       &mask->src.s6_addr[0xC], 4);
+				       &match.key->src.s6_addr[0xC],
+				       &match.mask->src.s6_addr[0xC], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
-				       &key->dst.s6_addr[0x0],
-				       &mask->dst.s6_addr[0x0], 4);
+				       &match.key->dst.s6_addr[0x0],
+				       &match.mask->dst.s6_addr[0x0], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
-				       &key->dst.s6_addr[0x4],
-				       &mask->dst.s6_addr[0x4], 4);
+				       &match.key->dst.s6_addr[0x4],
+				       &match.mask->dst.s6_addr[0x4], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
-				       &key->dst.s6_addr[0x8],
-				       &mask->dst.s6_addr[0x8], 4);
+				       &match.key->dst.s6_addr[0x8],
+				       &match.mask->dst.s6_addr[0x8], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
-				       &key->dst.s6_addr[0xC],
-				       &mask->dst.s6_addr[0xC], 4);
+				       &match.key->dst.s6_addr[0xC],
+				       &match.mask->dst.s6_addr[0xC], 4);
 }
 
 static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_acl_rule_info *rulei,
-				       struct tc_cls_flower_offload *f,
+				       struct flow_cls_offload *f,
 				       u8 ip_proto)
 {
-	struct flow_dissector_key_ports *key, *mask;
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_match_ports match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS))
 		return 0;
 
 	if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
@@ -184,27 +244,25 @@
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_PORTS,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_PORTS,
-					 f->mask);
+	flow_rule_match_ports(rule, &match);
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT,
-				       ntohs(key->dst), ntohs(mask->dst));
+				       ntohs(match.key->dst),
+				       ntohs(match.mask->dst));
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT,
-				       ntohs(key->src), ntohs(mask->src));
+				       ntohs(match.key->src),
+				       ntohs(match.mask->src));
 	return 0;
 }
 
 static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_rule_info *rulei,
-				     struct tc_cls_flower_offload *f,
+				     struct flow_cls_offload *f,
 				     u8 ip_proto)
 {
-	struct flow_dissector_key_tcp *key, *mask;
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_match_tcp match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP))
 		return 0;
 
 	if (ip_proto != IPPROTO_TCP) {
@@ -213,25 +271,29 @@
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_TCP,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_TCP,
-					 f->mask);
+	flow_rule_match_tcp(rule, &match);
+
+	if (match.mask->flags & htons(0x0E00)) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "TCP flags match not supported on reserved bits");
+		dev_err(mlxsw_sp->bus_info->dev, "TCP flags match not supported on reserved bits\n");
+		return -EINVAL;
+	}
+
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS,
-				       ntohs(key->flags), ntohs(mask->flags));
+				       ntohs(match.key->flags),
+				       ntohs(match.mask->flags));
 	return 0;
 }
 
 static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_acl_rule_info *rulei,
-				    struct tc_cls_flower_offload *f,
+				    struct flow_cls_offload *f,
 				    u16 n_proto)
 {
-	struct flow_dissector_key_ip *key, *mask;
+	const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_match_ip match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP))
 		return 0;
 
 	if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
@@ -240,20 +302,18 @@
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_IP,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_IP,
-					 f->mask);
+	flow_rule_match_ip(rule, &match);
+
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
-				       key->ttl, mask->ttl);
+				       match.key->ttl, match.mask->ttl);
 
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
-				       key->tos & 0x3, mask->tos & 0x3);
+				       match.key->tos & 0x3,
+				       match.mask->tos & 0x3);
 
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
-				       key->tos >> 6, mask->tos >> 6);
+				       match.key->tos >> 2,
+				       match.mask->tos >> 2);
 
 	return 0;
 }
@@ -261,16 +321,19 @@
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
 				 struct mlxsw_sp_acl_rule_info *rulei,
-				 struct tc_cls_flower_offload *f)
+				 struct flow_cls_offload *f)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0;
 	u16 n_proto_key = 0;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	int err;
 
-	if (f->dissector->used_keys &
-	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -286,25 +349,23 @@
 
 	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
-		addr_type = key->addr_type;
+	err = mlxsw_sp_flower_parse_meta(rulei, f, block);
+	if (err)
+		return err;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
@@ -314,60 +375,59 @@
 					       MLXSW_AFK_ELEMENT_ETHERTYPE,
 					       n_proto_key, n_proto_mask);
 
-		ip_proto = key->ip_proto;
+		ip_proto = match.key->ip_proto;
 		mlxsw_sp_acl_rulei_keymask_u32(rulei,
 					       MLXSW_AFK_ELEMENT_IP_PROTO,
-					       key->ip_proto, mask->ip_proto);
+					       match.key->ip_proto,
+					       match.mask->ip_proto);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_DMAC_32_47,
-					       key->dst, mask->dst, 2);
+					       match.key->dst,
+					       match.mask->dst, 2);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_DMAC_0_31,
-					       key->dst + 2, mask->dst + 2, 4);
+					       match.key->dst + 2,
+					       match.mask->dst + 2, 4);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_SMAC_32_47,
-					       key->src, mask->src, 2);
+					       match.key->src,
+					       match.mask->src, 2);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_SMAC_0_31,
-					       key->src + 2, mask->src + 2, 4);
+					       match.key->src + 2,
+					       match.mask->src + 2, 4);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
+		flow_rule_match_vlan(rule, &match);
 		if (mlxsw_sp_acl_block_is_egress_bound(block)) {
 			NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress");
 			return -EOPNOTSUPP;
 		}
-		if (mask->vlan_id != 0)
+
+		/* Forbid block with this rulei to be bound
+		 * to egress in future.
+		 */
+		rulei->egress_bind_blocker = 1;
+
+		if (match.mask->vlan_id != 0)
 			mlxsw_sp_acl_rulei_keymask_u32(rulei,
 						       MLXSW_AFK_ELEMENT_VID,
-						       key->vlan_id,
-						       mask->vlan_id);
-		if (mask->vlan_priority != 0)
+						       match.key->vlan_id,
+						       match.mask->vlan_id);
+		if (match.mask->vlan_priority != 0)
 			mlxsw_sp_acl_rulei_keymask_u32(rulei,
 						       MLXSW_AFK_ELEMENT_PCP,
-						       key->vlan_priority,
-						       mask->vlan_priority);
+						       match.key->vlan_priority,
+						       match.mask->vlan_priority);
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
@@ -387,13 +447,14 @@
 	if (err)
 		return err;
 
-	return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts,
+	return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei,
+					     &f->rule->action,
 					     f->common.extack);
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
-			    struct tc_cls_flower_offload *f)
+			    struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_rule_info *rulei;
 	struct mlxsw_sp_acl_ruleset *ruleset;
@@ -406,7 +467,7 @@
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
 
-	rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie,
+	rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie, NULL,
 					f->common.extack);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
@@ -440,7 +501,7 @@
 
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_acl_block *block,
-			     struct tc_cls_flower_offload *f)
+			     struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule *rule;
@@ -462,7 +523,7 @@
 
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
-			  struct tc_cls_flower_offload *f)
+			  struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule *rule;
@@ -486,7 +547,7 @@
 	if (err)
 		goto err_rule_get_stats;
 
-	tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+	flow_stats_update(&f->stats, bytes, packets, lastuse);
 
 	mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
 	return 0;
@@ -498,7 +559,7 @@
 
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_block *block,
-				 struct tc_cls_flower_offload *f)
+				 struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	struct mlxsw_sp_acl_rule_info rulei;
@@ -519,7 +580,7 @@
 
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_block *block,
-				   struct tc_cls_flower_offload *f)
+				   struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 00db26c..6400cd6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c

@@ -145,6 +145,7 @@
 				     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+	u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb);
 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
 	struct ip_tunnel_parm parms;
 	unsigned int type_check;
@@ -157,6 +158,7 @@
 	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+	mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id);
 
 	type_check = has_ikey ?
 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
new file mode 100644
index 0000000..17f334b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c

@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = {
+	[MLXSW_SP_NVE_TYPE_VXLAN]	= &mlxsw_sp1_nve_vxlan_ops,
+};
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = {
+	[MLXSW_SP_NVE_TYPE_VXLAN]	= &mlxsw_sp2_nve_vxlan_ops,
+};
+
+struct mlxsw_sp_nve_mc_entry;
+struct mlxsw_sp_nve_mc_record;
+struct mlxsw_sp_nve_mc_list;
+
+struct mlxsw_sp_nve_mc_record_ops {
+	enum mlxsw_reg_tnumt_record_type type;
+	int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record,
+			 struct mlxsw_sp_nve_mc_entry *mc_entry,
+			 const union mlxsw_sp_l3addr *addr);
+	void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			  const struct mlxsw_sp_nve_mc_entry *mc_entry);
+	void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+			  char *tnumt_pl, unsigned int entry_index);
+	bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record,
+			      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+			      const union mlxsw_sp_l3addr *addr);
+};
+
+struct mlxsw_sp_nve_mc_list_key {
+	u16 fid_index;
+};
+
+struct mlxsw_sp_nve_mc_ipv6_entry {
+	struct in6_addr addr6;
+	u32 addr6_kvdl_index;
+};
+
+struct mlxsw_sp_nve_mc_entry {
+	union {
+		__be32 addr4;
+		struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry;
+	};
+	u8 valid:1;
+};
+
+struct mlxsw_sp_nve_mc_record {
+	struct list_head list;
+	enum mlxsw_sp_l3proto proto;
+	unsigned int num_entries;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	const struct mlxsw_sp_nve_mc_record_ops *ops;
+	u32 kvdl_index;
+	struct mlxsw_sp_nve_mc_entry entries[0];
+};
+
+struct mlxsw_sp_nve_mc_list {
+	struct list_head records_list;
+	struct rhash_head ht_node;
+	struct mlxsw_sp_nve_mc_list_key key;
+};
+
+static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = {
+	.key_len = sizeof(struct mlxsw_sp_nve_mc_list_key),
+	.key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key),
+	.head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node),
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+				      struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      const union mlxsw_sp_l3addr *addr)
+{
+	mc_entry->addr4 = addr->addr4;
+
+	return 0;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      char *tnumt_pl, unsigned int entry_index)
+{
+	u32 udip = be32_to_cpu(mc_entry->addr4);
+
+	mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+					  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+					  const union mlxsw_sp_l3addr *addr)
+{
+	return mc_entry->addr4 == addr->addr4;
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv4_ops = {
+	.type		= MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+	.entry_add	= &mlxsw_sp_nve_mc_record_ipv4_entry_add,
+	.entry_del	= &mlxsw_sp_nve_mc_record_ipv4_entry_del,
+	.entry_set	= &mlxsw_sp_nve_mc_record_ipv4_entry_set,
+	.entry_compare	= &mlxsw_sp_nve_mc_record_ipv4_entry_compare,
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+				      struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      const union mlxsw_sp_l3addr *addr)
+{
+	WARN_ON(1);
+
+	return -EINVAL;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+				      const struct mlxsw_sp_nve_mc_entry *mc_entry,
+				      char *tnumt_pl, unsigned int entry_index)
+{
+	u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index;
+
+	mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+					  const struct mlxsw_sp_nve_mc_entry *mc_entry,
+					  const union mlxsw_sp_l3addr *addr)
+{
+	return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6);
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv6_ops = {
+	.type		= MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+	.entry_add	= &mlxsw_sp_nve_mc_record_ipv6_entry_add,
+	.entry_del	= &mlxsw_sp_nve_mc_record_ipv6_entry_del,
+	.entry_set	= &mlxsw_sp_nve_mc_record_ipv6_entry_set,
+	.entry_compare	= &mlxsw_sp_nve_mc_record_ipv6_entry_compare,
+};
+
+static const struct mlxsw_sp_nve_mc_record_ops *
+mlxsw_sp_nve_mc_record_ops_arr[] = {
+	[MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops,
+	[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
+};
+
+int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip,
+				    enum mlxsw_sp_l3proto proto,
+				    union mlxsw_sp_l3addr *addr)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		addr->addr4 = cpu_to_be32(uip);
+		return 0;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
+			  const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+	return rhashtable_lookup_fast(&nve->mc_list_ht, key,
+				      mlxsw_sp_nve_mc_list_ht_params);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp,
+			    const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	int err;
+
+	mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL);
+	if (!mc_list)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&mc_list->records_list);
+	mc_list->key = *key;
+
+	err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node,
+				     mlxsw_sp_nve_mc_list_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	return mc_list;
+
+err_rhashtable_insert:
+	kfree(mc_list);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+	rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node,
+			       mlxsw_sp_nve_mc_list_ht_params);
+	WARN_ON(!list_empty(&mc_list->records_list));
+	kfree(mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp,
+			 const struct mlxsw_sp_nve_mc_list_key *key)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key);
+	if (mc_list)
+		return mc_list;
+
+	return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key);
+}
+
+static void
+mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp,
+			 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	if (!list_empty(&mc_list->records_list))
+		return;
+	mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_nve_mc_list *mc_list,
+			      enum mlxsw_sp_l3proto proto)
+{
+	unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto];
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	int err;
+
+	mc_record = kzalloc(struct_size(mc_record, entries, num_max_entries),
+			    GFP_KERNEL);
+	if (!mc_record)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+				  &mc_record->kvdl_index);
+	if (err)
+		goto err_kvdl_alloc;
+
+	mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto];
+	mc_record->mlxsw_sp = mlxsw_sp;
+	mc_record->mc_list = mc_list;
+	mc_record->proto = proto;
+	list_add_tail(&mc_record->list, &mc_list->records_list);
+
+	return mc_record;
+
+err_kvdl_alloc:
+	kfree(mc_record);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+
+	list_del(&mc_record->list);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+			   mc_record->kvdl_index);
+	WARN_ON(mc_record->num_entries);
+	kfree(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_nve_mc_list *mc_list,
+			   enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) {
+		unsigned int num_entries = mc_record->num_entries;
+		struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+		if (mc_record->proto == proto &&
+		    num_entries < nve->num_max_mc_entries[proto])
+			return mc_record;
+	}
+
+	return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto);
+}
+
+static void
+mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	if (mc_record->num_entries != 0)
+		return;
+
+	mlxsw_sp_nve_mc_record_destroy(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		if (mc_record->entries[i].valid)
+			continue;
+		return &mc_record->entries[i];
+	}
+
+	return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type;
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+	struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+	char tnumt_pl[MLXSW_REG_TNUMT_LEN];
+	unsigned int num_max_entries;
+	unsigned int num_entries = 0;
+	u32 next_kvdl_index = 0;
+	bool next_valid = false;
+	int i;
+
+	if (!list_is_last(&mc_record->list, &mc_list->records_list)) {
+		struct mlxsw_sp_nve_mc_record *next_record;
+
+		next_record = list_next_entry(mc_record, list);
+		next_kvdl_index = next_record->kvdl_index;
+		next_valid = true;
+	}
+
+	mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+			     mc_record->kvdl_index, next_valid,
+			     next_kvdl_index, mc_record->num_entries);
+
+	num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+		mc_entry = &mc_record->entries[i];
+		if (!mc_entry->valid)
+			continue;
+		mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl,
+					  num_entries++);
+	}
+
+	WARN_ON(num_entries != mc_record->num_entries);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+	struct mlxsw_sp_nve_mc_record *first_record;
+
+	first_record = list_first_entry(&mc_list->records_list,
+					struct mlxsw_sp_nve_mc_record, list);
+
+	return mc_record == first_record;
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record,
+			   union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+		mc_entry = &mc_record->entries[i];
+		if (!mc_entry->valid)
+			continue;
+		if (mc_record->ops->entry_compare(mc_record, mc_entry, addr))
+			return mc_entry;
+	}
+
+	return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record,
+			      union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_entry *mc_entry = NULL;
+	int err;
+
+	mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record);
+	if (WARN_ON(!mc_entry))
+		return -EINVAL;
+
+	err = mc_record->ops->entry_add(mc_record, mc_entry, addr);
+	if (err)
+		return err;
+	mc_record->num_entries++;
+	mc_entry->valid = true;
+
+	err = mlxsw_sp_nve_mc_record_refresh(mc_record);
+	if (err)
+		goto err_record_refresh;
+
+	/* If this is a new record and not the first one, then we need to
+	 * update the next pointer of the previous entry
+	 */
+	if (mc_record->num_entries != 1 ||
+	    mlxsw_sp_nve_mc_record_is_first(mc_record))
+		return 0;
+
+	err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list));
+	if (err)
+		goto err_prev_record_refresh;
+
+	return 0;
+
+err_prev_record_refresh:
+err_record_refresh:
+	mc_entry->valid = false;
+	mc_record->num_entries--;
+	mc_record->ops->entry_del(mc_record, mc_entry);
+	return err;
+}
+
+static void
+mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record,
+				 struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+	struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+
+	mc_entry->valid = false;
+	mc_record->num_entries--;
+
+	/* When the record continues to exist we only need to invalidate
+	 * the requested entry
+	 */
+	if (mc_record->num_entries != 0) {
+		mlxsw_sp_nve_mc_record_refresh(mc_record);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* If the record needs to be deleted, but it is not the first,
+	 * then we need to make sure that the previous record no longer
+	 * points to it. Remove deleted record from the list to reflect
+	 * that and then re-add it at the end, so that it could be
+	 * properly removed by the record destruction code
+	 */
+	if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) {
+		struct mlxsw_sp_nve_mc_record *prev_record;
+
+		prev_record = list_prev_entry(mc_record, list);
+		list_del(&mc_record->list);
+		mlxsw_sp_nve_mc_record_refresh(prev_record);
+		list_add_tail(&mc_record->list, &mc_list->records_list);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* If the first record needs to be deleted, but the list is not
+	 * singular, then the second record needs to be written in the
+	 * first record's address, as this address is stored as a property
+	 * of the FID
+	 */
+	if (mlxsw_sp_nve_mc_record_is_first(mc_record) &&
+	    !list_is_singular(&mc_list->records_list)) {
+		struct mlxsw_sp_nve_mc_record *next_record;
+
+		next_record = list_next_entry(mc_record, list);
+		swap(mc_record->kvdl_index, next_record->kvdl_index);
+		mlxsw_sp_nve_mc_record_refresh(next_record);
+		mc_record->ops->entry_del(mc_record, mc_entry);
+		return;
+	}
+
+	/* This is the last case where the last remaining record needs to
+	 * be deleted. Simply delete the entry
+	 */
+	mc_record->ops->entry_del(mc_record, mc_entry);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list,
+			    enum mlxsw_sp_l3proto proto,
+			    union mlxsw_sp_l3addr *addr,
+			    struct mlxsw_sp_nve_mc_entry **mc_entry)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	list_for_each_entry(mc_record, &mc_list->records_list, list) {
+		if (mc_record->proto != proto)
+			continue;
+
+		*mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr);
+		if (*mc_entry)
+			return mc_record;
+	}
+
+	return NULL;
+}
+
+static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nve_mc_list *mc_list,
+				       enum mlxsw_sp_l3proto proto,
+				       union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	int err;
+
+	mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto);
+	if (IS_ERR(mc_record))
+		return PTR_ERR(mc_record);
+
+	err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr);
+	if (err)
+		goto err_ip_add;
+
+	return 0;
+
+err_ip_add:
+	mlxsw_sp_nve_mc_record_put(mc_record);
+	return err;
+}
+
+static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nve_mc_list *mc_list,
+					enum mlxsw_sp_l3proto proto,
+					union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+	struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+	mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
+						&mc_entry);
+	if (!mc_record)
+		return;
+
+	mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+	mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static int
+mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid,
+				 struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	/* The address of the first record in the list is a property of
+	 * the FID and we never change it. It only needs to be set when
+	 * a new list is created
+	 */
+	if (mlxsw_sp_fid_nve_flood_index_is_set(fid))
+		return 0;
+
+	mc_record = list_first_entry(&mc_list->records_list,
+				     struct mlxsw_sp_nve_mc_record, list);
+
+	return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index);
+}
+
+static void
+mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid,
+				   struct mlxsw_sp_nve_mc_list *mc_list)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record;
+
+	/* The address of the first record needs to be invalidated only when
+	 * the last record is about to be removed
+	 */
+	if (!list_is_singular(&mc_list->records_list))
+		return;
+
+	mc_record = list_first_entry(&mc_list->records_list,
+				     struct mlxsw_sp_nve_mc_record, list);
+	if (mc_record->num_entries != 1)
+		return;
+
+	return mlxsw_sp_fid_nve_flood_index_clear(fid);
+}
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid,
+			      enum mlxsw_sp_l3proto proto,
+			      union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+	int err;
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key);
+	if (IS_ERR(mc_list))
+		return PTR_ERR(mc_list);
+
+	err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr);
+	if (err)
+		goto err_add_ip;
+
+	err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list);
+	if (err)
+		goto err_fid_flood_index_set;
+
+	return 0;
+
+err_fid_flood_index_set:
+	mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+err_add_ip:
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+	return err;
+}
+
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fid *fid,
+			       enum mlxsw_sp_l3proto proto,
+			       union mlxsw_sp_l3addr *addr)
+{
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+	if (!mc_list)
+		return;
+
+	mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
+	mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+static void
+mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+	struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+	unsigned int num_max_entries;
+	int i;
+
+	num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+	for (i = 0; i < num_max_entries; i++) {
+		struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i];
+
+		if (!mc_entry->valid)
+			continue;
+		mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+	}
+
+	WARN_ON(mc_record->num_entries);
+	mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fid *fid)
+{
+	struct mlxsw_sp_nve_mc_record *mc_record, *tmp;
+	struct mlxsw_sp_nve_mc_list_key key = { 0 };
+	struct mlxsw_sp_nve_mc_list *mc_list;
+
+	if (!mlxsw_sp_fid_nve_flood_index_is_set(fid))
+		return;
+
+	mlxsw_sp_fid_nve_flood_index_clear(fid);
+
+	key.fid_index = mlxsw_sp_fid_index(fid);
+	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+	if (WARN_ON(!mc_list))
+		return;
+
+	list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list)
+		mlxsw_sp_nve_mc_record_delete(mc_record);
+
+	WARN_ON(!list_empty(&mc_list->records_list));
+	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
+{
+	WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
+
+	return mlxsw_sp->nve->tunnel_index;
+}
+
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+				      u32 tb_id, __be32 addr)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	struct mlxsw_sp_nve_config *config = &nve->config;
+
+	if (nve->num_nve_tunnels &&
+	    config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
+	    config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
+		return true;
+
+	return false;
+}
+
+static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nve_config *config)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+	int err;
+
+	if (nve->num_nve_tunnels++ != 0)
+		return 0;
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+				  &nve->tunnel_index);
+	if (err)
+		goto err_kvdl_alloc;
+
+	ops = nve->nve_ops_arr[config->type];
+	err = ops->init(nve, config);
+	if (err)
+		goto err_ops_init;
+
+	return 0;
+
+err_ops_init:
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+			   nve->tunnel_index);
+err_kvdl_alloc:
+	nve->num_nve_tunnels--;
+	return err;
+}
+
+static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+
+	ops = nve->nve_ops_arr[nve->config.type];
+
+	if (mlxsw_sp->nve->num_nve_tunnels == 1) {
+		ops->fini(nve);
+		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+				   nve->tunnel_index);
+		memset(&nve->config, 0, sizeof(nve->config));
+	}
+	nve->num_nve_tunnels--;
+}
+
+static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp,
+					  u16 fid_index)
+{
+	char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+	mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID);
+	mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static void mlxsw_sp_nve_fdb_clear_offload(struct mlxsw_sp *mlxsw_sp,
+					   const struct mlxsw_sp_fid *fid,
+					   const struct net_device *nve_dev,
+					   __be32 vni)
+{
+	const struct mlxsw_sp_nve_ops *ops;
+	enum mlxsw_sp_nve_type type;
+
+	if (WARN_ON(mlxsw_sp_fid_nve_type(fid, &type)))
+		return;
+
+	ops = mlxsw_sp->nve->nve_ops_arr[type];
+	ops->fdb_clear_offload(nve_dev, vni);
+}
+
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+			    struct mlxsw_sp_nve_params *params,
+			    struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+	const struct mlxsw_sp_nve_ops *ops;
+	struct mlxsw_sp_nve_config config;
+	int err;
+
+	ops = nve->nve_ops_arr[params->type];
+
+	if (!ops->can_offload(nve, params->dev, extack))
+		return -EINVAL;
+
+	memset(&config, 0, sizeof(config));
+	ops->nve_config(nve, params->dev, &config);
+	if (nve->num_nve_tunnels &&
+	    memcmp(&config, &nve->config, sizeof(config))) {
+		NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration");
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel");
+		return err;
+	}
+
+	err = mlxsw_sp_fid_vni_set(fid, params->type, params->vni,
+				   params->dev->ifindex);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
+		goto err_fid_vni_set;
+	}
+
+	nve->config = config;
+
+	err = ops->fdb_replay(params->dev, params->vni, extack);
+	if (err)
+		goto err_fdb_replay;
+
+	return 0;
+
+err_fdb_replay:
+	mlxsw_sp_fid_vni_clear(fid);
+err_fid_vni_set:
+	mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+	return err;
+}
+
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fid *fid)
+{
+	u16 fid_index = mlxsw_sp_fid_index(fid);
+	struct net_device *nve_dev;
+	int nve_ifindex;
+	__be32 vni;
+
+	mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid);
+	mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index);
+
+	if (WARN_ON(mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex) ||
+		    mlxsw_sp_fid_vni(fid, &vni)))
+		goto out;
+
+	nve_dev = dev_get_by_index(&init_net, nve_ifindex);
+	if (!nve_dev)
+		goto out;
+
+	mlxsw_sp_nve_fdb_clear_offload(mlxsw_sp, fid, nve_dev, vni);
+	mlxsw_sp_fid_fdb_clear_offload(fid, nve_dev);
+
+	dev_put(nve_dev);
+
+out:
+	mlxsw_sp_fid_vni_clear(fid);
+	mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+}
+
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char tnqdr_pl[MLXSW_REG_TNQDR_LEN];
+
+	mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl);
+}
+
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
+
+static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char tnqcr_pl[MLXSW_REG_TNQCR_LEN];
+
+	mlxsw_reg_tnqcr_pack(tnqcr_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl);
+}
+
+static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		u8 outer_ecn = INET_ECN_encapsulate(0, i);
+		char tneem_pl[MLXSW_REG_TNEEM_LEN];
+		int err;
+
+		mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem),
+				      tneem_pl);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp,
+					 u8 inner_ecn, u8 outer_ecn)
+{
+	char tndem_pl[MLXSW_REG_TNDEM_LEN];
+	bool trap_en, set_ce = false;
+	u8 new_inner_ecn;
+
+	trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+	new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+	mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+			     trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl);
+}
+
+static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		int j;
+
+		/* Iterate over outer ECN values */
+		for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+			int err;
+
+			err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp);
+	if (err)
+		return err;
+
+	return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp);
+}
+
+static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned int max;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6))
+		return -EIO;
+	max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4);
+	mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max;
+	max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6);
+	mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max;
+
+	return 0;
+}
+
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_nve *nve;
+	int err;
+
+	nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL);
+	if (!nve)
+		return -ENOMEM;
+	mlxsw_sp->nve = nve;
+	nve->mlxsw_sp = mlxsw_sp;
+	nve->nve_ops_arr = mlxsw_sp->nve_ops_arr;
+
+	err = rhashtable_init(&nve->mc_list_ht,
+			      &mlxsw_sp_nve_mc_list_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
+	err = mlxsw_sp_nve_qos_init(mlxsw_sp);
+	if (err)
+		goto err_nve_qos_init;
+
+	err = mlxsw_sp_nve_ecn_init(mlxsw_sp);
+	if (err)
+		goto err_nve_ecn_init;
+
+	err = mlxsw_sp_nve_resources_query(mlxsw_sp);
+	if (err)
+		goto err_nve_resources_query;
+
+	return 0;
+
+err_nve_resources_query:
+err_nve_ecn_init:
+err_nve_qos_init:
+	rhashtable_destroy(&nve->mc_list_ht);
+err_rhashtable_init:
+	mlxsw_sp->nve = NULL;
+	kfree(nve);
+	return err;
+}
+
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	WARN_ON(mlxsw_sp->nve->num_nve_tunnels);
+	rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht);
+	kfree(mlxsw_sp->nve);
+	mlxsw_sp->nve = NULL;
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
new file mode 100644
index 0000000..12f664f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h

@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_NVE_H
+#define _MLXSW_SPECTRUM_NVE_H
+
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_nve_config {
+	enum mlxsw_sp_nve_type type;
+	u8 ttl;
+	u8 learning_en:1;
+	__be16 udp_dport;
+	__be32 flowlabel;
+	u32 ul_tb_id;
+	enum mlxsw_sp_l3proto ul_proto;
+	union mlxsw_sp_l3addr ul_sip;
+};
+
+struct mlxsw_sp_nve {
+	struct mlxsw_sp_nve_config config;
+	struct rhashtable mc_list_ht;
+	struct mlxsw_sp *mlxsw_sp;
+	const struct mlxsw_sp_nve_ops **nve_ops_arr;
+	unsigned int num_nve_tunnels;	/* Protected by RTNL */
+	unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
+	u32 tunnel_index;
+	u16 ul_rif_index;	/* Reserved for Spectrum */
+	unsigned int inc_parsing_depth_refs;
+};
+
+struct mlxsw_sp_nve_ops {
+	enum mlxsw_sp_nve_type type;
+	bool (*can_offload)(const struct mlxsw_sp_nve *nve,
+			    const struct net_device *dev,
+			    struct netlink_ext_ack *extack);
+	void (*nve_config)(const struct mlxsw_sp_nve *nve,
+			   const struct net_device *dev,
+			   struct mlxsw_sp_nve_config *config);
+	int (*init)(struct mlxsw_sp_nve *nve,
+		    const struct mlxsw_sp_nve_config *config);
+	void (*fini)(struct mlxsw_sp_nve *nve);
+	int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni,
+			  struct netlink_ext_ack *extack);
+	void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni);
+};
+
+extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
+extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
new file mode 100644
index 0000000..05517c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c

@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <net/vxlan.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
+ *
+ * In the worst case - where we have a VLAN tag on the outer Ethernet
+ * header and IPv6 in overlay and underlay - we need to parse 128 bytes
+ */
+#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
+#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
+
+#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS	(VXLAN_F_UDP_ZERO_CSUM_TX | \
+						 VXLAN_F_LEARN)
+
+static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+					   const struct net_device *dev,
+					   struct netlink_ext_ack *extack)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_config *cfg = &vxlan->cfg;
+
+	if (cfg->saddr.sa.sa_family != AF_INET) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
+		return false;
+	}
+
+	if (vxlan_addr_multicast(&cfg->remote_ip)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
+		return false;
+	}
+
+	if (vxlan_addr_any(&cfg->saddr)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
+		return false;
+	}
+
+	if (cfg->remote_ifindex) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
+		return false;
+	}
+
+	if (cfg->port_min || cfg->port_max) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
+		return false;
+	}
+
+	if (cfg->tos != 1) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
+		return false;
+	}
+
+	if (cfg->flags & VXLAN_F_TTL_INHERIT) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
+		return false;
+	}
+
+	if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
+		return false;
+	}
+
+	if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+		return false;
+	}
+
+	if (cfg->ttl == 0) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
+		return false;
+	}
+
+	if (cfg->label != 0) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
+		return false;
+	}
+
+	return true;
+}
+
+static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
+				      const struct net_device *dev,
+				      struct mlxsw_sp_nve_config *config)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_config *cfg = &vxlan->cfg;
+
+	config->type = MLXSW_SP_NVE_TYPE_VXLAN;
+	config->ttl = cfg->ttl;
+	config->flowlabel = cfg->label;
+	config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
+	config->ul_tb_id = RT_TABLE_MAIN;
+	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+	config->udp_dport = cfg->dst_port;
+}
+
+static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+				      unsigned int parsing_depth,
+				      __be16 udp_dport)
+{
+	char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+	mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+}
+
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+				    __be16 udp_dport)
+{
+	int parsing_depth = mlxsw_sp->nve->inc_parsing_depth_refs ?
+				MLXSW_SP_NVE_VXLAN_PARSING_DEPTH :
+				MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH;
+
+	return __mlxsw_sp_nve_parsing_set(mlxsw_sp, parsing_depth, udp_dport);
+}
+
+static int
+__mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp,
+				     __be16 udp_dport)
+{
+	int err;
+
+	mlxsw_sp->nve->inc_parsing_depth_refs++;
+
+	err = mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
+	if (err)
+		goto err_nve_parsing_set;
+	return 0;
+
+err_nve_parsing_set:
+	mlxsw_sp->nve->inc_parsing_depth_refs--;
+	return err;
+}
+
+static void
+__mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp,
+				     __be16 udp_dport)
+{
+	mlxsw_sp->nve->inc_parsing_depth_refs--;
+	mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
+}
+
+int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp)
+{
+	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
+
+	return __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, udp_dport);
+}
+
+void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp)
+{
+	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
+
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, udp_dport);
+}
+
+static void
+mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
+				  const struct mlxsw_sp_nve_config *config)
+{
+	u8 udp_sport;
+
+	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
+			     config->ttl);
+	/* VxLAN driver's default UDP source port range is 32768 (0x8000)
+	 * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
+	 * to a random number between 0x80 and 0xee
+	 */
+	get_random_bytes(&udp_sport, sizeof(udp_sport));
+	udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
+	mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
+	mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+}
+
+static int
+mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+			       const struct mlxsw_sp_nve_config *config)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+	u16 ul_vr_id;
+	int err;
+
+	err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
+					  &ul_vr_id);
+	if (err)
+		return err;
+
+	mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config);
+	mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
+	mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+					unsigned int tunnel_index)
+{
+	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+				    const struct mlxsw_sp_nve_config *config)
+{
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+	int err;
+
+	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
+	if (err)
+		goto err_config_set;
+
+	err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
+	if (err)
+		goto err_rtdp_set;
+
+	err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+						config->ul_proto,
+						&config->ul_sip,
+						nve->tunnel_index);
+	if (err)
+		goto err_promote_decap;
+
+	return 0;
+
+err_promote_decap:
+err_rtdp_set:
+	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+	return err;
+}
+
+static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+	struct mlxsw_sp_nve_config *config = &nve->config;
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+					 config->ul_proto, &config->ul_sip);
+	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+}
+
+static int
+mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni,
+			      struct netlink_ext_ack *extack)
+{
+	if (WARN_ON(!netif_is_vxlan(nve_dev)))
+		return -EINVAL;
+	return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier,
+				extack);
+}
+
+static void
+mlxsw_sp_nve_vxlan_clear_offload(const struct net_device *nve_dev, __be32 vni)
+{
+	if (WARN_ON(!netif_is_vxlan(nve_dev)))
+		return;
+	vxlan_fdb_clear_offload(nve_dev, vni);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
+	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
+	.can_offload	= mlxsw_sp_nve_vxlan_can_offload,
+	.nve_config	= mlxsw_sp_nve_vxlan_config,
+	.init		= mlxsw_sp1_nve_vxlan_init,
+	.fini		= mlxsw_sp1_nve_vxlan_fini,
+	.fdb_replay	= mlxsw_sp_nve_vxlan_fdb_replay,
+	.fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload,
+};
+
+static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp,
+					     bool learning_en)
+{
+	char tnpc_pl[MLXSW_REG_TNPC_LEN];
+
+	mlxsw_reg_tnpc_pack(tnpc_pl, MLXSW_REG_TNPC_TUNNEL_PORT_NVE,
+			    learning_en);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnpc), tnpc_pl);
+}
+
+static int
+mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+			       const struct mlxsw_sp_nve_config *config)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+	u16 ul_rif_index;
+	int err;
+
+	err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, config->ul_tb_id,
+					 &ul_rif_index);
+	if (err)
+		return err;
+	mlxsw_sp->nve->ul_rif_index = ul_rif_index;
+
+	err = mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, config->learning_en);
+	if (err)
+		goto err_vxlan_learning_set;
+
+	mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config);
+	mlxsw_reg_tngcr_underlay_rif_set(tngcr_pl, ul_rif_index);
+
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+	if (err)
+		goto err_tngcr_write;
+
+	return 0;
+
+err_tngcr_write:
+	mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false);
+err_vxlan_learning_set:
+	mlxsw_sp_router_ul_rif_put(mlxsw_sp, ul_rif_index);
+	return err;
+}
+
+static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+	mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false);
+	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->nve->ul_rif_index);
+}
+
+static int mlxsw_sp2_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+					unsigned int tunnel_index,
+					u16 ul_rif_index)
+{
+	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+	mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_index);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+				    const struct mlxsw_sp_nve_config *config)
+{
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+	int err;
+
+	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+	if (err)
+		return err;
+
+	err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config);
+	if (err)
+		goto err_config_set;
+
+	err = mlxsw_sp2_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index,
+					   nve->ul_rif_index);
+	if (err)
+		goto err_rtdp_set;
+
+	err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+						config->ul_proto,
+						&config->ul_sip,
+						nve->tunnel_index);
+	if (err)
+		goto err_promote_decap;
+
+	return 0;
+
+err_promote_decap:
+err_rtdp_set:
+	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+	return err;
+}
+
+static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+	struct mlxsw_sp_nve_config *config = &nve->config;
+	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+					 config->ul_proto, &config->ul_sip);
+	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
+	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
+	.can_offload	= mlxsw_sp_nve_vxlan_can_offload,
+	.nve_config	= mlxsw_sp_nve_vxlan_config,
+	.init		= mlxsw_sp2_nve_vxlan_init,
+	.fini		= mlxsw_sp2_nve_vxlan_fini,
+	.fdb_replay	= mlxsw_sp_nve_vxlan_fdb_replay,
+	.fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload,
+};

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
new file mode 100644
index 0000000..ec2ff3d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c

@@ -0,0 +1,1167 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/clocksource.h>
+#include <linux/timecounter.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+
+#include "spectrum.h"
+#include "spectrum_ptp.h"
+#include "core.h"
+
+#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT	29
+#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ		156257 /* 6.4nSec */
+#define MLXSW_SP1_PTP_CLOCK_MASK		64
+
+#define MLXSW_SP1_PTP_HT_GC_INTERVAL		500 /* ms */
+
+/* How long, approximately, should the unmatched entries stay in the hash table
+ * before they are collected. Should be evenly divisible by the GC interval.
+ */
+#define MLXSW_SP1_PTP_HT_GC_TIMEOUT		1000 /* ms */
+
+struct mlxsw_sp_ptp_state {
+	struct mlxsw_sp *mlxsw_sp;
+	struct rhltable unmatched_ht;
+	spinlock_t unmatched_lock; /* protects the HT */
+	struct delayed_work ht_gc_dw;
+	u32 gc_cycle;
+};
+
+struct mlxsw_sp1_ptp_key {
+	u8 local_port;
+	u8 message_type;
+	u16 sequence_id;
+	u8 domain_number;
+	bool ingress;
+};
+
+struct mlxsw_sp1_ptp_unmatched {
+	struct mlxsw_sp1_ptp_key key;
+	struct rhlist_head ht_node;
+	struct rcu_head rcu;
+	struct sk_buff *skb;
+	u64 timestamp;
+	u32 gc_cycle;
+};
+
+static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = {
+	.key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key),
+	.key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key),
+	.head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node),
+};
+
+struct mlxsw_sp_ptp_clock {
+	struct mlxsw_core *core;
+	spinlock_t lock; /* protect this structure */
+	struct cyclecounter cycles;
+	struct timecounter tc;
+	u32 nominal_c_mult;
+	struct ptp_clock *ptp;
+	struct ptp_clock_info ptp_info;
+	unsigned long overflow_period;
+	struct delayed_work overflow_work;
+};
+
+static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp_ptp_clock *clock,
+				    struct ptp_system_timestamp *sts)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	u32 frc_h1, frc_h2, frc_l;
+
+	frc_h1 = mlxsw_core_read_frc_h(mlxsw_core);
+	ptp_read_system_prets(sts);
+	frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+	ptp_read_system_postts(sts);
+	frc_h2 = mlxsw_core_read_frc_h(mlxsw_core);
+
+	if (frc_h1 != frc_h2) {
+		/* wrap around */
+		ptp_read_system_prets(sts);
+		frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+		ptp_read_system_postts(sts);
+	}
+
+	return (u64) frc_l | (u64) frc_h2 << 32;
+}
+
+static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(cc, struct mlxsw_sp_ptp_clock, cycles);
+
+	return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask;
+}
+
+static int
+mlxsw_sp1_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+
+	mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ,
+			     freq_adj, 0);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec)
+{
+	u64 cycles = (u64) nsec;
+
+	cycles <<= tc->cc->shift;
+	cycles = div_u64(cycles, tc->cc->mult);
+
+	return cycles;
+}
+
+static int
+mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec)
+{
+	struct mlxsw_core *mlxsw_core = clock->core;
+	u64 next_sec, next_sec_in_nsec, cycles;
+	char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+	char mtpps_pl[MLXSW_REG_MTPPS_LEN];
+	int err;
+
+	next_sec = div_u64(nsec, NSEC_PER_SEC) + 1;
+	next_sec_in_nsec = next_sec * NSEC_PER_SEC;
+
+	spin_lock_bh(&clock->lock);
+	cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec);
+	spin_unlock_bh(&clock->lock);
+
+	mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles);
+	err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtutc_pack(mtutc_pl,
+			     MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC,
+			     0, next_sec);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	int neg_adj = 0;
+	u32 diff;
+	u64 adj;
+	s32 ppb;
+
+	ppb = scaled_ppm_to_ppb(scaled_ppm);
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+
+	adj = clock->nominal_c_mult;
+	adj *= ppb;
+	diff = div_u64(adj, NSEC_PER_SEC);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_read(&clock->tc);
+	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+				       clock->nominal_c_mult + diff;
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb);
+}
+
+static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 nsec;
+
+	spin_lock_bh(&clock->lock);
+	timecounter_adjtime(&clock->tc, delta);
+	nsec = timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 cycles, nsec;
+
+	spin_lock_bh(&clock->lock);
+	cycles = __mlxsw_sp1_ptp_read_frc(clock, sts);
+	nsec = timecounter_cyc2time(&clock->tc, cycles);
+	spin_unlock_bh(&clock->lock);
+
+	*ts = ns_to_timespec64(nsec);
+
+	return 0;
+}
+
+static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct mlxsw_sp_ptp_clock *clock =
+		container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+	u64 nsec = timespec64_to_ns(ts);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_init(&clock->tc, &clock->cycles, nsec);
+	nsec = timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+
+	return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "mlxsw_sp_clock",
+	.max_adj	= 100000000,
+	.adjfine	= mlxsw_sp1_ptp_adjfine,
+	.adjtime	= mlxsw_sp1_ptp_adjtime,
+	.gettimex64	= mlxsw_sp1_ptp_gettimex,
+	.settime64	= mlxsw_sp1_ptp_settime,
+};
+
+static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp_ptp_clock *clock;
+
+	clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work);
+
+	spin_lock_bh(&clock->lock);
+	timecounter_read(&clock->tc);
+	spin_unlock_bh(&clock->lock);
+	mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period);
+}
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	u64 overflow_cycles, nsec, frac = 0;
+	struct mlxsw_sp_ptp_clock *clock;
+	int err;
+
+	clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+	if (!clock)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&clock->lock);
+	clock->cycles.read = mlxsw_sp1_ptp_read_frc;
+	clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT;
+	clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ,
+						  clock->cycles.shift);
+	clock->nominal_c_mult = clock->cycles.mult;
+	clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK);
+	clock->core = mlxsw_sp->core;
+
+	timecounter_init(&clock->tc, &clock->cycles,
+			 ktime_to_ns(ktime_get_real()));
+
+	/* Calculate period in seconds to call the overflow watchdog - to make
+	 * sure counter is checked at least twice every wrap around.
+	 * The period is calculated as the minimum between max HW cycles count
+	 * (The clock source mask) and max amount of cycles that can be
+	 * multiplied by clock multiplier where the result doesn't exceed
+	 * 64bits.
+	 */
+	overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
+	overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+
+	nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac);
+	clock->overflow_period = nsecs_to_jiffies(nsec);
+
+	INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow);
+	mlxsw_core_schedule_dw(&clock->overflow_work, 0);
+
+	clock->ptp_info = mlxsw_sp1_ptp_clock_info;
+	clock->ptp = ptp_clock_register(&clock->ptp_info, dev);
+	if (IS_ERR(clock->ptp)) {
+		err = PTR_ERR(clock->ptp);
+		dev_err(dev, "ptp_clock_register failed %d\n", err);
+		goto err_ptp_clock_register;
+	}
+
+	return clock;
+
+err_ptp_clock_register:
+	cancel_delayed_work_sync(&clock->overflow_work);
+	kfree(clock);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+	ptp_clock_unregister(clock->ptp);
+	cancel_delayed_work_sync(&clock->overflow_work);
+	kfree(clock);
+}
+
+static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
+			      u8 *p_domain_number,
+			      u8 *p_message_type,
+			      u16 *p_sequence_id)
+{
+	unsigned int offset = 0;
+	unsigned int ptp_class;
+	u8 *data;
+
+	data = skb_mac_header(skb);
+	ptp_class = ptp_classify_raw(skb);
+
+	switch (ptp_class & PTP_CLASS_VMASK) {
+	case PTP_CLASS_V1:
+	case PTP_CLASS_V2:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (ptp_class & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (ptp_class & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* PTP header is 34 bytes. */
+	if (skb->len < offset + 34)
+		return -EINVAL;
+
+	*p_message_type = data[offset] & 0x0f;
+	*p_domain_number = data[offset + 4];
+	*p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+	return 0;
+}
+
+/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on
+ * error.
+ */
+static int
+mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp1_ptp_key key,
+			     struct sk_buff *skb,
+			     u64 timestamp)
+{
+	int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
+	struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	int err;
+
+	unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC);
+	if (!unmatched)
+		return -ENOMEM;
+
+	unmatched->key = key;
+	unmatched->skb = skb;
+	unmatched->timestamp = timestamp;
+	unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
+
+	err = rhltable_insert(&ptp_state->unmatched_ht, &unmatched->ht_node,
+			      mlxsw_sp1_ptp_unmatched_ht_params);
+	if (err)
+		kfree(unmatched);
+
+	return err;
+}
+
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_key key, int *p_length)
+{
+	struct mlxsw_sp1_ptp_unmatched *unmatched, *last = NULL;
+	struct rhlist_head *tmp, *list;
+	int length = 0;
+
+	list = rhltable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
+			       mlxsw_sp1_ptp_unmatched_ht_params);
+	rhl_for_each_entry_rcu(unmatched, tmp, list, ht_node) {
+		last = unmatched;
+		length++;
+	}
+
+	*p_length = length;
+	return last;
+}
+
+static int
+mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	return rhltable_remove(&mlxsw_sp->ptp_state->unmatched_ht,
+			       &unmatched->ht_node,
+			       mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+/* This function is called in the following scenarios:
+ *
+ * 1) When a packet is matched with its timestamp.
+ * 2) In several situation when it is necessary to immediately pass on
+ *    an SKB without a timestamp.
+ * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish().
+ *    This case is similar to 2) above.
+ */
+static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp,
+					struct sk_buff *skb, u8 local_port,
+					bool ingress,
+					struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	/* Between capturing the packet and finishing it, there is a window of
+	 * opportunity for the originating port to go away (e.g. due to a
+	 * split). Also make sure the SKB device reference is still valid.
+	 */
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) {
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	if (ingress) {
+		if (hwtstamps)
+			*skb_hwtstamps(skb) = *hwtstamps;
+		mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+	} else {
+		/* skb_tstamp_tx() allows hwtstamps to be NULL. */
+		skb_tstamp_tx(skb, hwtstamps);
+		dev_kfree_skb_any(skb);
+	}
+}
+
+static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp1_ptp_key key,
+				       struct sk_buff *skb,
+				       u64 timestamp)
+{
+	struct skb_shared_hwtstamps hwtstamps;
+	u64 nsec;
+
+	spin_lock_bh(&mlxsw_sp->clock->lock);
+	nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp);
+	spin_unlock_bh(&mlxsw_sp->clock->lock);
+
+	hwtstamps.hwtstamp = ns_to_ktime(nsec);
+	mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+				    key.local_port, key.ingress, &hwtstamps);
+}
+
+static void
+mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	if (unmatched->skb && unmatched->timestamp)
+		mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key,
+					   unmatched->skb,
+					   unmatched->timestamp);
+	else if (unmatched->skb)
+		mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb,
+					    unmatched->key.local_port,
+					    unmatched->key.ingress, NULL);
+	kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg)
+{
+	struct mlxsw_sp1_ptp_unmatched *unmatched = ptr;
+
+	/* This is invoked at a point where the ports are gone already. Nothing
+	 * to do with whatever is left in the HT but to free it.
+	 */
+	if (unmatched->skb)
+		dev_kfree_skb_any(unmatched->skb);
+	kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp1_ptp_key key,
+				    struct sk_buff *skb, u64 timestamp)
+{
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	int length;
+	int err;
+
+	rcu_read_lock();
+
+	spin_lock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+	unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key, &length);
+	if (skb && unmatched && unmatched->timestamp) {
+		unmatched->skb = skb;
+	} else if (timestamp && unmatched && unmatched->skb) {
+		unmatched->timestamp = timestamp;
+	} else {
+		/* Either there is no entry to match, or one that is there is
+		 * incompatible.
+		 */
+		if (length < 100)
+			err = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+							   skb, timestamp);
+		else
+			err = -E2BIG;
+		if (err && skb)
+			mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+						    key.local_port,
+						    key.ingress, NULL);
+		unmatched = NULL;
+	}
+
+	if (unmatched) {
+		err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
+		WARN_ON_ONCE(err);
+	}
+
+	spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+	if (unmatched)
+		mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched);
+
+	rcu_read_unlock();
+}
+
+static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp,
+				     struct sk_buff *skb, u8 local_port,
+				     bool ingress)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp1_ptp_key key;
+	u8 types;
+	int err;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto immediate;
+
+	types = ingress ? mlxsw_sp_port->ptp.ing_types :
+			  mlxsw_sp_port->ptp.egr_types;
+	if (!types)
+		goto immediate;
+
+	memset(&key, 0, sizeof(key));
+	key.local_port = local_port;
+	key.ingress = ingress;
+
+	err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type,
+				 &key.sequence_id);
+	if (err)
+		goto immediate;
+
+	/* For packets whose timestamping was not enabled on this port, don't
+	 * bother trying to match the timestamp.
+	 */
+	if (!((1 << key.message_type) & types))
+		goto immediate;
+
+	mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0);
+	return;
+
+immediate:
+	mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL);
+}
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+				 u8 local_port, u8 message_type,
+				 u8 domain_number, u16 sequence_id,
+				 u64 timestamp)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp1_ptp_key key;
+	u8 types;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		return;
+
+	types = ingress ? mlxsw_sp_port->ptp.ing_types :
+			  mlxsw_sp_port->ptp.egr_types;
+
+	/* For message types whose timestamping was not enabled on this port,
+	 * don't bother with the timestamp.
+	 */
+	if (!((1 << message_type) & types))
+		return;
+
+	memset(&key, 0, sizeof(key));
+	key.local_port = local_port;
+	key.domain_number = domain_number;
+	key.message_type = message_type;
+	key.sequence_id = sequence_id;
+	key.ingress = ingress;
+
+	mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp);
+}
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			   u8 local_port)
+{
+	skb_reset_mac_header(skb);
+	mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true);
+}
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+			       struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false);
+}
+
+static void
+mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
+			    struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+	struct mlxsw_sp_ptp_port_dir_stats *stats;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
+
+	/* If an unmatched entry has an SKB, it has to be handed over to the
+	 * networking stack. This is usually done from a trap handler, which is
+	 * invoked in a softirq context. Here we are going to do it in process
+	 * context. If that were to be interrupted by a softirq, it could cause
+	 * a deadlock when an attempt is made to take an already-taken lock
+	 * somewhere along the sending path. Disable softirqs to prevent this.
+	 */
+	local_bh_disable();
+
+	spin_lock(&ptp_state->unmatched_lock);
+	err = rhltable_remove(&ptp_state->unmatched_ht, &unmatched->ht_node,
+			      mlxsw_sp1_ptp_unmatched_ht_params);
+	spin_unlock(&ptp_state->unmatched_lock);
+
+	if (err)
+		/* The packet was matched with timestamp during the walk. */
+		goto out;
+
+	mlxsw_sp_port = ptp_state->mlxsw_sp->ports[unmatched->key.local_port];
+	if (mlxsw_sp_port) {
+		stats = unmatched->key.ingress ?
+			&mlxsw_sp_port->ptp.stats.rx_gcd :
+			&mlxsw_sp_port->ptp.stats.tx_gcd;
+		if (unmatched->skb)
+			stats->packets++;
+		else
+			stats->timestamps++;
+	}
+
+	/* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While
+	 * the comment at that function states that it can only be called in
+	 * soft IRQ context, this pattern of local_bh_disable() +
+	 * netif_receive_skb(), in process context, is seen elsewhere in the
+	 * kernel, notably in pktgen.
+	 */
+	mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched);
+
+out:
+	local_bh_enable();
+}
+
+static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	struct mlxsw_sp_ptp_state *ptp_state;
+	struct rhashtable_iter iter;
+	u32 gc_cycle;
+	void *obj;
+
+	ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw);
+	gc_cycle = ptp_state->gc_cycle++;
+
+	rhltable_walk_enter(&ptp_state->unmatched_ht, &iter);
+	rhashtable_walk_start(&iter);
+	while ((obj = rhashtable_walk_next(&iter))) {
+		if (IS_ERR(obj))
+			continue;
+
+		unmatched = obj;
+		if (unmatched->gc_cycle <= gc_cycle)
+			mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched);
+	}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+
+	mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+			       MLXSW_SP1_PTP_HT_GC_INTERVAL);
+}
+
+static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp,
+				   enum mlxsw_reg_mtptpt_trap_id trap_id,
+				   u16 message_type)
+{
+	char mtptpt_pl[MLXSW_REG_MTPTPT_LEN];
+
+	mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl);
+}
+
+static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp,
+					      bool clr)
+{
+	char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0};
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr);
+	mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+}
+
+static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp,
+				    u16 ing_types, u16 egr_types)
+{
+	char mtpppc_pl[MLXSW_REG_MTPPPC_LEN];
+
+	mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl);
+}
+
+struct mlxsw_sp1_ptp_shaper_params {
+	u32 ethtool_speed;
+	enum mlxsw_reg_qpsc_port_speed port_speed;
+	u8 shaper_time_exp;
+	u8 shaper_time_mantissa;
+	u8 shaper_inc;
+	u8 shaper_bs;
+	u8 port_to_shaper_credits;
+	int ing_timestamp_inc;
+	int egr_timestamp_inc;
+};
+
+static const struct mlxsw_sp1_ptp_shaper_params
+mlxsw_sp1_ptp_shaper_params[] = {
+	{
+		.ethtool_speed		= SPEED_100,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_100M,
+		.shaper_time_exp	= 4,
+		.shaper_time_mantissa	= 12,
+		.shaper_inc		= 9,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -313,
+		.egr_timestamp_inc	= 313,
+	},
+	{
+		.ethtool_speed		= SPEED_1000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_1G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 12,
+		.shaper_inc		= 6,
+		.shaper_bs		= 0,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -35,
+		.egr_timestamp_inc	= 35,
+	},
+	{
+		.ethtool_speed		= SPEED_10000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_10G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 2,
+		.shaper_inc		= 14,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -11,
+		.egr_timestamp_inc	= 11,
+	},
+	{
+		.ethtool_speed		= SPEED_25000,
+		.port_speed		= MLXSW_REG_QPSC_PORT_SPEED_25G,
+		.shaper_time_exp	= 0,
+		.shaper_time_mantissa	= 0,
+		.shaper_inc		= 11,
+		.shaper_bs		= 1,
+		.port_to_shaper_credits	= 1,
+		.ing_timestamp_inc	= -14,
+		.egr_timestamp_inc	= 14,
+	},
+};
+
+#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params)
+
+static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp)
+{
+	const struct mlxsw_sp1_ptp_shaper_params *params;
+	char qpsc_pl[MLXSW_REG_QPSC_LEN];
+	int i, err;
+
+	for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+		params = &mlxsw_sp1_ptp_shaper_params[i];
+		mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed,
+				    params->shaper_time_exp,
+				    params->shaper_time_mantissa,
+				    params->shaper_inc, params->shaper_bs,
+				    params->port_to_shaper_credits,
+				    params->ing_timestamp_inc,
+				    params->egr_timestamp_inc);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_ptp_state *ptp_state;
+	u16 message_type;
+	int err;
+
+	err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp);
+	if (err)
+		return ERR_PTR(err);
+
+	ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL);
+	if (!ptp_state)
+		return ERR_PTR(-ENOMEM);
+	ptp_state->mlxsw_sp = mlxsw_sp;
+
+	spin_lock_init(&ptp_state->unmatched_lock);
+
+	err = rhltable_init(&ptp_state->unmatched_ht,
+			    &mlxsw_sp1_ptp_unmatched_ht_params);
+	if (err)
+		goto err_hashtable_init;
+
+	/* Delive these message types as PTP0. */
+	message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) |
+		       BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP);
+	err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+				      message_type);
+	if (err)
+		goto err_mtptpt_set;
+
+	/* Everything else is PTP1. */
+	message_type = ~message_type;
+	err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+				      message_type);
+	if (err)
+		goto err_mtptpt1_set;
+
+	err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true);
+	if (err)
+		goto err_fifo_clr;
+
+	INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc);
+	mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+			       MLXSW_SP1_PTP_HT_GC_INTERVAL);
+	return ptp_state;
+
+err_fifo_clr:
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+err_mtptpt1_set:
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+err_mtptpt_set:
+	rhltable_destroy(&ptp_state->unmatched_ht);
+err_hashtable_init:
+	kfree(ptp_state);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+	struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp;
+
+	cancel_delayed_work_sync(&ptp_state->ht_gc_dw);
+	mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0);
+	mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+	rhltable_free_and_destroy(&ptp_state->unmatched_ht,
+				  &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
+	kfree(ptp_state);
+}
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config)
+{
+	*config = mlxsw_sp_port->ptp.hwtstamp_config;
+	return 0;
+}
+
+static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
+					  u16 *p_ing_types, u16 *p_egr_types,
+					  enum hwtstamp_rx_filters *p_rx_filter)
+{
+	enum hwtstamp_rx_filters rx_filter = config->rx_filter;
+	enum hwtstamp_tx_types tx_type = config->tx_type;
+	u16 ing_types = 0x00;
+	u16 egr_types = 0x00;
+
+	switch (tx_type) {
+	case HWTSTAMP_TX_OFF:
+		egr_types = 0x00;
+		break;
+	case HWTSTAMP_TX_ON:
+		egr_types = 0xff;
+		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		return -ERANGE;
+	}
+
+	switch (rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		ing_types = 0x00;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		ing_types = 0x01;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		ing_types = 0x02;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		ing_types = 0x0f;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+		ing_types = 0xff;
+		break;
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		return -ERANGE;
+	}
+
+	*p_ing_types = ing_types;
+	*p_egr_types = egr_types;
+	*p_rx_filter = rx_filter;
+	return 0;
+}
+
+static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
+				       u16 ing_types, u16 egr_types)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_port *tmp;
+	u16 orig_ing_types = 0;
+	u16 orig_egr_types = 0;
+	int err;
+	int i;
+
+	/* MTPPPC configures timestamping globally, not per port. Find the
+	 * configuration that contains all configured timestamping requests.
+	 */
+	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+		tmp = mlxsw_sp->ports[i];
+		if (tmp) {
+			orig_ing_types |= tmp->ptp.ing_types;
+			orig_egr_types |= tmp->ptp.egr_types;
+		}
+		if (tmp && tmp != mlxsw_sp_port) {
+			ing_types |= tmp->ptp.ing_types;
+			egr_types |= tmp->ptp.egr_types;
+		}
+	}
+
+	if ((ing_types || egr_types) && !(orig_ing_types || orig_egr_types)) {
+		err = mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp);
+		if (err) {
+			netdev_err(mlxsw_sp_port->dev, "Failed to increase parsing depth");
+			return err;
+		}
+	}
+	if (!(ing_types || egr_types) && (orig_ing_types || orig_egr_types))
+		mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp);
+
+	return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
+				       ing_types, egr_types);
+}
+
+static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types;
+}
+
+static int
+mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+	mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u32 eth_proto_oper, speed;
+	bool ptps = false;
+	int err, i;
+
+	if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+		return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false);
+
+	port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
+	port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
+					       mlxsw_sp_port->local_port, 0,
+					       false);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+	port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
+						 &eth_proto_oper);
+
+	speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
+	for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+		if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) {
+			ptps = true;
+			break;
+		}
+	}
+
+	return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps);
+}
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
+
+	mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
+				     ptp.shaper_dw);
+
+	if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+		return;
+
+	err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+	if (err)
+		netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n");
+}
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config)
+{
+	enum hwtstamp_rx_filters rx_filter;
+	u16 ing_types;
+	u16 egr_types;
+	int err;
+
+	err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types,
+					     &rx_filter);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types);
+	if (err)
+		return err;
+
+	mlxsw_sp_port->ptp.hwtstamp_config = *config;
+	mlxsw_sp_port->ptp.ing_types = ing_types;
+	mlxsw_sp_port->ptp.egr_types = egr_types;
+
+	err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+	if (err)
+		return err;
+
+	/* Notify the ioctl caller what we are actually timestamping. */
+	config->rx_filter = rx_filter;
+
+	return 0;
+}
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+			      struct ethtool_ts_info *info)
+{
+	info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
+struct mlxsw_sp_ptp_port_stat {
+	char str[ETH_GSTRING_LEN];
+	ptrdiff_t offset;
+};
+
+#define MLXSW_SP_PTP_PORT_STAT(NAME, FIELD)				\
+	{								\
+		.str = NAME,						\
+		.offset = offsetof(struct mlxsw_sp_ptp_port_stats,	\
+				    FIELD),				\
+	}
+
+static const struct mlxsw_sp_ptp_port_stat mlxsw_sp_ptp_port_stats[] = {
+	MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_packets",    rx_gcd.packets),
+	MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_timestamps", rx_gcd.timestamps),
+	MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_packets",    tx_gcd.packets),
+	MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_timestamps", tx_gcd.timestamps),
+};
+
+#undef MLXSW_SP_PTP_PORT_STAT
+
+#define MLXSW_SP_PTP_PORT_STATS_LEN \
+	ARRAY_SIZE(mlxsw_sp_ptp_port_stats)
+
+int mlxsw_sp1_get_stats_count(void)
+{
+	return MLXSW_SP_PTP_PORT_STATS_LEN;
+}
+
+void mlxsw_sp1_get_stats_strings(u8 **p)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) {
+		memcpy(*p, mlxsw_sp_ptp_port_stats[i].str,
+		       ETH_GSTRING_LEN);
+		*p += ETH_GSTRING_LEN;
+	}
+}
+
+void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+			 u64 *data, int data_index)
+{
+	void *stats = &mlxsw_sp_port->ptp.stats;
+	ptrdiff_t offset;
+	int i;
+
+	data += data_index;
+	for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) {
+		offset = mlxsw_sp_ptp_port_stats[i].offset;
+		*data++ = *(u64 *)(stats + offset);
+	}
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
new file mode 100644
index 0000000..8c38657
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h

@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_PTP_H
+#define _MLXSW_SPECTRUM_PTP_H
+
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+struct mlxsw_sp_ptp_clock;
+
+enum {
+	MLXSW_SP_PTP_MESSAGE_TYPE_SYNC,
+	MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ,
+	MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ,
+	MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP,
+};
+
+static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info)
+{
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+	info->phc_index = -1;
+	return 0;
+}
+
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock);
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp);
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state);
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			   u8 local_port);
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+			       struct sk_buff *skb, u8 local_port);
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+				 u8 local_port, u8 message_type,
+				 u8 domain_number, u16 sequence_id,
+				 u64 timestamp);
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config);
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct hwtstamp_config *config);
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work);
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+			      struct ethtool_ts_info *info);
+
+int mlxsw_sp1_get_stats_count(void);
+void mlxsw_sp1_get_stats_strings(u8 **p);
+void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+			 u64 *data, int data_index);
+
+#else
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+					 struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+					     struct sk_buff *skb, u8 local_port)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static inline void
+mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+			    u8 local_port, u8 message_type,
+			    u8 domain_number,
+			    u16 sequence_id, u64 timestamp)
+{
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+					    struct ethtool_ts_info *info)
+{
+	return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+static inline int mlxsw_sp1_get_stats_count(void)
+{
+	return 0;
+}
+
+static inline void mlxsw_sp1_get_stats_strings(u8 **p)
+{
+}
+
+static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+				       u64 *data, int data_index)
+{
+}
+#endif
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+					 struct sk_buff *skb, u8 local_port)
+{
+	mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+					     struct sk_buff *skb, u8 local_port)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct hwtstamp_config *config)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+					    struct ethtool_ts_info *info)
+{
+	return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+static inline int mlxsw_sp2_get_stats_count(void)
+{
+	return 0;
+}
+
+static inline void mlxsw_sp2_get_stats_strings(u8 **p)
+{
+}
+
+static inline void mlxsw_sp2_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+				       u64 *data, int data_index)
+{
+}
+
+#endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2ab9cf2..39d600c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

@@ -13,13 +13,15 @@
 #include <linux/socket.h>
 #include <linux/route.h>
 #include <linux/gcd.h>
-#include <linux/random.h>
 #include <linux/if_macvlan.h>
+#include <linux/refcount.h>
+#include <linux/jhash.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
 #include <net/ip6_fib.h>
+#include <net/nexthop.h>
 #include <net/fib_rules.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
@@ -70,6 +72,8 @@
 	bool aborted;
 	struct notifier_block fib_nb;
 	struct notifier_block netevent_nb;
+	struct notifier_block inetaddr_nb;
+	struct notifier_block inet6addr_nb;
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
 };
@@ -77,7 +81,7 @@
 struct mlxsw_sp_rif {
 	struct list_head nexthop_list;
 	struct list_head neigh_list;
-	struct net_device *dev;
+	struct net_device *dev; /* NULL for underlay RIF */
 	struct mlxsw_sp_fid *fid;
 	unsigned char addr[ETH_ALEN];
 	int mtu;
@@ -104,6 +108,7 @@
 
 struct mlxsw_sp_rif_subport {
 	struct mlxsw_sp_rif common;
+	refcount_t ref_count;
 	union {
 		u16 system_port;
 		u16 lag_id;
@@ -116,6 +121,7 @@
 	struct mlxsw_sp_rif common;
 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
+	u16 ul_rif_id; /* Reserved for Spectrum. */
 };
 
 struct mlxsw_sp_rif_params_ipip_lb {
@@ -136,6 +142,7 @@
 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 };
 
+static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_lpm_tree *lpm_tree);
@@ -358,6 +365,7 @@
 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
 
 	/* This is a special case of local delivery, where a packet should be
 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
@@ -366,6 +374,7 @@
 	 * encapsulating entries.)
 	 */
 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -434,6 +443,8 @@
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
+	struct mlxsw_sp_rif *ul_rif;
+	refcount_t ul_rif_refcnt;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -741,6 +752,19 @@
 	return NULL;
 }
 
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				u16 *vr_id)
+{
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+	if (!vr)
+		return -ESRCH;
+	*vr_id = vr->id;
+
+	return 0;
+}
+
 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 					    enum mlxsw_sp_l3proto proto)
 {
@@ -970,7 +994,7 @@
 	if (d)
 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
 	else
-		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
+		return RT_TABLE_MAIN;
 }
 
 static struct mlxsw_sp_rif *
@@ -1128,6 +1152,52 @@
 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
 }
 
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+				     enum mlxsw_sp_l3proto proto,
+				     const union mlxsw_sp_l3addr *addr,
+				     enum mlxsw_sp_fib_entry_type type)
+{
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	unsigned char addr_prefix_len;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
+	const void *addrp;
+	size_t addr_len;
+	u32 addr4;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+	if (!vr)
+		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, proto);
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		addr4 = be32_to_cpu(addr->addr4);
+		addrp = &addr4;
+		addr_len = 4;
+		addr_prefix_len = 32;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+
+	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
+					    addr_prefix_len);
+	if (!fib_node || list_empty(&fib_node->entry_list))
+		return NULL;
+
+	fib_entry = list_first_entry(&fib_node->entry_list,
+				     struct mlxsw_sp_fib_entry, list);
+	if (fib_entry->type != type)
+		return NULL;
+
+	return fib_entry;
+}
+
 /* Given an IPIP entry, find the corresponding decap route. */
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
@@ -1215,15 +1285,12 @@
 {
 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
-	struct net_device *ipip_ul_dev;
 
 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
 		return false;
 
-	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
-						 ul_tb_id, ipip_entry) &&
-	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
+						 ul_tb_id, ipip_entry);
 }
 
 /* Given decap parameters, find the corresponding IPIP entry. */
@@ -1375,8 +1442,8 @@
 }
 
 static int
-mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
-			struct mlxsw_sp_vr *ul_vr, bool enable)
+mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
+			u16 ul_rif_id, bool enable)
 {
 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
 	struct mlxsw_sp_rif *rif = &lb_rif->common;
@@ -1391,7 +1458,7 @@
 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
-			    ul_vr->id, saddr4, lb_cf.okey);
+			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
 		break;
 
 	case MLXSW_SP_L3_PROTO_IPV6:
@@ -1406,14 +1473,13 @@
 {
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
-	struct mlxsw_sp_vr *ul_vr;
 	int err = 0;
 
 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
 	if (ipip_entry) {
 		lb_rif = ipip_entry->ol_lb;
-		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
-		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
+					      lb_rif->ul_rif_id, true);
 		if (err)
 			goto out;
 		lb_rif->common.mtu = ol_dev->mtu;
@@ -1532,27 +1598,10 @@
 {
 	struct mlxsw_sp_ipip_entry *ipip_entry =
 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
-	enum mlxsw_sp_l3proto ul_proto;
-	union mlxsw_sp_l3addr saddr;
-	u32 ul_tb_id;
 
 	if (!ipip_entry)
 		return 0;
 
-	/* For flat configuration cases, moving overlay to a different VRF might
-	 * cause local address conflict, and the conflicting tunnels need to be
-	 * demoted.
-	 */
-	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
-	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
-	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
-	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
-						 saddr, ul_tb_id,
-						 ipip_entry)) {
-		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
-		return 0;
-	}
-
 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
 						   true, false, false, extack);
 }
@@ -1765,6 +1814,56 @@
 	return 0;
 }
 
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip,
+				      u32 tunnel_index)
+{
+	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	int err;
+
+	/* It is valid to create a tunnel with a local IP and only later
+	 * assign this IP address to a local interface
+	 */
+	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+							 ul_proto, ul_sip,
+							 type);
+	if (!fib_entry)
+		return 0;
+
+	fib_entry->decap.tunnel_index = tunnel_index;
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	if (err)
+		goto err_fib_entry_update;
+
+	return 0;
+
+err_fib_entry_update:
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+	return err;
+}
+
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+				      enum mlxsw_sp_l3proto ul_proto,
+				      const union mlxsw_sp_l3addr *ul_sip)
+{
+	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+							 ul_proto, ul_sip,
+							 type);
+	if (!fib_entry)
+		return;
+
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
 struct mlxsw_sp_neigh_key {
 	struct neighbour *n;
 };
@@ -2248,7 +2347,7 @@
 static void
 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_neigh_entry *neigh_entry,
-			      bool removing);
+			      bool removing, bool dead);
 
 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
 {
@@ -2256,7 +2355,7 @@
 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
 }
 
-static void
+static int
 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_neigh_entry *neigh_entry,
 				enum mlxsw_reg_rauht_op op)
@@ -2270,10 +2369,10 @@
 	if (neigh_entry->counter_valid)
 		mlxsw_reg_rauht_pack_counter(rauht_pl,
 					     neigh_entry->counter_index);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
 }
 
-static void
+static int
 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_neigh_entry *neigh_entry,
 				enum mlxsw_reg_rauht_op op)
@@ -2287,7 +2386,7 @@
 	if (neigh_entry->counter_valid)
 		mlxsw_reg_rauht_pack_counter(rauht_pl,
 					     neigh_entry->counter_index);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
 }
 
 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
@@ -2309,20 +2408,33 @@
 			    struct mlxsw_sp_neigh_entry *neigh_entry,
 			    bool adding)
 {
+	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
+	int err;
+
 	if (!adding && !neigh_entry->connected)
 		return;
 	neigh_entry->connected = adding;
 	if (neigh_entry->key.n->tbl->family == AF_INET) {
-		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
-						mlxsw_sp_rauht_op(adding));
+		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
+						      op);
+		if (err)
+			return;
 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
 			return;
-		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
-						mlxsw_sp_rauht_op(adding));
+		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+						      op);
+		if (err)
+			return;
 	} else {
 		WARN_ON_ONCE(1);
+		return;
 	}
+
+	if (adding)
+		neigh_entry->key.n->flags |= NTF_OFFLOADED;
+	else
+		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
 }
 
 void
@@ -2379,7 +2491,8 @@
 
 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
-	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
+	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
+				      dead);
 
 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
@@ -2758,12 +2871,13 @@
 		return false;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct in6_addr *gw;
 		int ifindex, weight;
 
-		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
-		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
-		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
+		ifindex = fib6_nh->fib_nh_dev->ifindex;
+		weight = fib6_nh->fib_nh_weight;
+		gw = &fib6_nh->fib_nh_gw6;
 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
 							 weight))
 			return false;
@@ -2812,7 +2926,7 @@
 		val = nh_grp->count;
 		for (i = 0; i < nh_grp->count; i++) {
 			nh = &nh_grp->nexthops[i];
-			val ^= nh->ifindex;
+			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
 		}
 		return jhash(&val, sizeof(val), seed);
 	default:
@@ -2829,8 +2943,8 @@
 	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
-		val ^= dev->ifindex;
+		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
+		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
 	}
 
 	return jhash(&val, sizeof(val), seed);
@@ -3343,13 +3457,79 @@
 	nh->update = 1;
 }
 
+static int
+mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	struct neighbour *n, *old_n = neigh_entry->key.n;
+	struct mlxsw_sp_nexthop *nh;
+	bool entry_connected;
+	u8 nud_state, dead;
+	int err;
+
+	nh = list_first_entry(&neigh_entry->nexthop_list,
+			      struct mlxsw_sp_nexthop, neigh_list_node);
+
+	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
+	if (!n) {
+		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+				 nh->rif->dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+		neigh_event_send(n, NULL);
+	}
+
+	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+	neigh_entry->key.n = n;
+	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+	if (err)
+		goto err_neigh_entry_insert;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	dead = n->dead;
+	read_unlock_bh(&n->lock);
+	entry_connected = nud_state & NUD_VALID && !dead;
+
+	list_for_each_entry(nh, &neigh_entry->nexthop_list,
+			    neigh_list_node) {
+		neigh_release(old_n);
+		neigh_clone(n);
+		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
+		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+	}
+
+	neigh_release(n);
+
+	return 0;
+
+err_neigh_entry_insert:
+	neigh_entry->key.n = old_n;
+	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+	neigh_release(n);
+	return err;
+}
+
 static void
 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_neigh_entry *neigh_entry,
-			      bool removing)
+			      bool removing, bool dead)
 {
 	struct mlxsw_sp_nexthop *nh;
 
+	if (list_empty(&neigh_entry->nexthop_list))
+		return;
+
+	if (dead) {
+		int err;
+
+		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
+							  neigh_entry);
+		if (err)
+			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
+		return;
+	}
+
 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
 			    neigh_list_node) {
 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
@@ -3495,7 +3675,7 @@
 					const struct fib_nh *fib_nh,
 					enum mlxsw_sp_ipip_type *p_ipipt)
 {
-	struct net_device *dev = fib_nh->nh_dev;
+	struct net_device *dev = fib_nh->fib_nh_dev;
 
 	return dev &&
 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
@@ -3522,7 +3702,7 @@
 				       struct fib_nh *fib_nh)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
-	struct net_device *dev = fib_nh->nh_dev;
+	struct net_device *dev = fib_nh->fib_nh_dev;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_rif *rif;
 	int err;
@@ -3566,18 +3746,18 @@
 				  struct mlxsw_sp_nexthop *nh,
 				  struct fib_nh *fib_nh)
 {
-	struct net_device *dev = fib_nh->nh_dev;
+	struct net_device *dev = fib_nh->fib_nh_dev;
 	struct in_device *in_dev;
 	int err;
 
 	nh->nh_grp = nh_grp;
 	nh->key.fib_nh = fib_nh;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	nh->nh_weight = fib_nh->nh_weight;
+	nh->nh_weight = fib_nh->fib_nh_weight;
 #else
 	nh->nh_weight = 1;
 #endif
-	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
+	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
 	if (err)
 		return err;
@@ -3590,7 +3770,7 @@
 
 	in_dev = __in_dev_get_rtnl(dev);
 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
+	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
 		return 0;
 
 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
@@ -3687,25 +3867,25 @@
 }
 
 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
-				   const struct fib_info *fi)
+				   struct fib_info *fi)
 {
-	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
-	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+	const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+	return nh->fib_nh_scope == RT_SCOPE_LINK ||
+	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
 }
 
 static struct mlxsw_sp_nexthop_group *
 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
+	unsigned int nhs = fib_info_num_path(fi);
 	struct mlxsw_sp_nexthop_group *nh_grp;
 	struct mlxsw_sp_nexthop *nh;
 	struct fib_nh *fib_nh;
-	size_t alloc_size;
 	int i;
 	int err;
 
-	alloc_size = sizeof(*nh_grp) +
-		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
-	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
 	if (!nh_grp)
 		return ERR_PTR(-ENOMEM);
 	nh_grp->priv = fi;
@@ -3713,11 +3893,11 @@
 	nh_grp->neigh_tbl = &arp_tbl;
 
 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
-	nh_grp->count = fi->fib_nhs;
+	nh_grp->count = nhs;
 	fib_info_hold(fi);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
-		fib_nh = &fi->fib_nh[i];
+		fib_nh = fib_info_nh(fi, i);
 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
 		if (err)
 			goto err_nexthop4_init;
@@ -3814,7 +3994,9 @@
 		return !!nh_group->adj_index_valid;
 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
 		return !!nh_group->nh_rif;
+	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
 		return true;
 	default:
 		return false;
@@ -3831,9 +4013,9 @@
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
+		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
-				    &rt->fib6_nh.nh_gw))
+				    &rt->fib6_nh->fib_nh_gw6))
 			return nh;
 		continue;
 	}
@@ -3848,8 +4030,10 @@
 	int i;
 
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
-		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
+		nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
@@ -3857,9 +4041,9 @@
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 
 		if (nh->offloaded)
-			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		else
-			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -3875,7 +4059,7 @@
 	for (i = 0; i < nh_grp->count; i++) {
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 
-		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+		nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -3888,21 +4072,23 @@
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
 
-	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
+				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct mlxsw_sp_nexthop *nh;
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
 		if (nh && nh->offloaded)
-			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
+			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		else
-			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
+			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -3917,7 +4103,7 @@
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
+		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -4056,6 +4242,19 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
+static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
+					   struct mlxsw_sp_fib_entry *fib_entry,
+					   enum mlxsw_reg_ralue_op op)
+{
+	enum mlxsw_reg_ralue_trap_action trap_action;
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
 static int
 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_fib_entry *fib_entry,
@@ -4072,6 +4271,18 @@
 				      fib_entry->decap.tunnel_index);
 }
 
+static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
+					   struct mlxsw_sp_fib_entry *fib_entry,
+					   enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+					   fib_entry->decap.tunnel_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_fib_entry *fib_entry,
 				   enum mlxsw_reg_ralue_op op)
@@ -4083,9 +4294,13 @@
 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
+		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
 							fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
+		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
 	}
 	return -EINVAL;
 }
@@ -4120,8 +4335,9 @@
 			     const struct fib_entry_notifier_info *fen_info,
 			     struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
-	struct net_device *dev = fen_info->fi->fib_dev;
+	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct fib_info *fi = fen_info->fi;
 
@@ -4135,12 +4351,23 @@
 							     fib_entry,
 							     ipip_entry);
 		}
+		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
+						     dip.addr4)) {
+			u32 t_index;
+
+			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
+			fib_entry->decap.tunnel_index = t_index;
+			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+			return 0;
+		}
 		/* fall through */
 	case RTN_BROADCAST:
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
 		return 0;
+	case RTN_BLACKHOLE:
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
+		return 0;
 	case RTN_UNREACHABLE: /* fall through */
-	case RTN_BLACKHOLE: /* fall through */
 	case RTN_PROHIBIT:
 		/* Packets hitting these routes need to be trapped, but
 		 * can do so with a lower priority than packets directed
@@ -4754,7 +4981,8 @@
 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
-	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+	return !(rt->fib6_flags & RTF_ADDRCONF) &&
+		rt->fib6_nh->fib_nh_gw_family;
 }
 
 static struct fib6_info *
@@ -4813,8 +5041,8 @@
 					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
-	return rt->fib6_nh.nh_dev &&
-	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
+	return rt->fib6_nh->fib_nh_dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
 }
 
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -4824,7 +5052,7 @@
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
@@ -4867,11 +5095,11 @@
 				  struct mlxsw_sp_nexthop *nh,
 				  const struct fib6_info *rt)
 {
-	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 
 	nh->nh_grp = nh_grp;
-	nh->nh_weight = rt->fib6_nh.nh_weight;
-	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
+	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 
 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -4894,7 +5122,7 @@
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
 				    const struct fib6_info *rt)
 {
-	return rt->fib6_flags & RTF_GATEWAY ||
+	return rt->fib6_nh->fib_nh_gw_family ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
@@ -4905,13 +5133,11 @@
 	struct mlxsw_sp_nexthop_group *nh_grp;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 	struct mlxsw_sp_nexthop *nh;
-	size_t alloc_size;
 	int i = 0;
 	int err;
 
-	alloc_size = sizeof(*nh_grp) +
-		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
-	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
+			 GFP_KERNEL);
 	if (!nh_grp)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&nh_grp->fib_list);
@@ -5035,17 +5261,21 @@
 static int
 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct fib6_info *rt)
+				struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
-	int err;
+	int err, i;
 
-	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
-	if (IS_ERR(mlxsw_sp_rt6))
-		return PTR_ERR(mlxsw_sp_rt6);
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+		if (IS_ERR(mlxsw_sp_rt6)) {
+			err = PTR_ERR(mlxsw_sp_rt6);
+			goto err_rt6_create;
+		}
 
-	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
-	fib6_entry->nrt6++;
+		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+		fib6_entry->nrt6++;
+	}
 
 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
 	if (err)
@@ -5054,27 +5284,38 @@
 	return 0;
 
 err_nexthop6_group_update:
-	fib6_entry->nrt6--;
-	list_del(&mlxsw_sp_rt6->list);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	i = nrt6;
+err_rt6_create:
+	for (i--; i >= 0; i--) {
+		fib6_entry->nrt6--;
+		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+					       struct mlxsw_sp_rt6, list);
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
 	return err;
 }
 
 static void
 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct fib6_info *rt)
+				struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int i;
 
-	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
-	if (WARN_ON(!mlxsw_sp_rt6))
-		return;
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
+							   rt_arr[i]);
+		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
+			continue;
 
-	fib6_entry->nrt6--;
-	list_del(&mlxsw_sp_rt6->list);
+		fib6_entry->nrt6--;
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
+
 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
 }
 
 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
@@ -5089,6 +5330,8 @@
 	 */
 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	else if (rt->fib6_type == RTN_BLACKHOLE)
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
 	else if (rt->fib6_flags & RTF_REJECT)
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
@@ -5113,29 +5356,32 @@
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
-			   struct fib6_info *rt)
+			   struct fib6_info **rt_arr, unsigned int nrt6)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
-	int err;
+	int err, i;
 
 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
 	if (!fib6_entry)
 		return ERR_PTR(-ENOMEM);
 	fib_entry = &fib6_entry->common;
 
-	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
-	if (IS_ERR(mlxsw_sp_rt6)) {
-		err = PTR_ERR(mlxsw_sp_rt6);
-		goto err_rt6_create;
+	INIT_LIST_HEAD(&fib6_entry->rt6_list);
+
+	for (i = 0; i < nrt6; i++) {
+		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+		if (IS_ERR(mlxsw_sp_rt6)) {
+			err = PTR_ERR(mlxsw_sp_rt6);
+			goto err_rt6_create;
+		}
+		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+		fib6_entry->nrt6++;
 	}
 
-	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
+	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
 
-	INIT_LIST_HEAD(&fib6_entry->rt6_list);
-	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
-	fib6_entry->nrt6 = 1;
 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
 	if (err)
 		goto err_nexthop6_group_get;
@@ -5145,9 +5391,15 @@
 	return fib6_entry;
 
 err_nexthop6_group_get:
-	list_del(&mlxsw_sp_rt6->list);
-	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	i = nrt6;
 err_rt6_create:
+	for (i--; i >= 0; i--) {
+		fib6_entry->nrt6--;
+		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+					       struct mlxsw_sp_rt6, list);
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
 	kfree(fib6_entry);
 	return ERR_PTR(err);
 }
@@ -5190,16 +5442,16 @@
 
 static int
 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
-			       bool replace)
+			       bool *p_replace)
 {
 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
-	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
 
-	if (replace && WARN_ON(!fib6_entry))
-		return -EINVAL;
+	if (*p_replace && !fib6_entry)
+		*p_replace = false;
 
 	if (fib6_entry) {
 		list_add_tail(&new6_entry->common.list,
@@ -5234,11 +5486,11 @@
 
 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib6_entry *fib6_entry,
-					 bool replace)
+					 bool *p_replace)
 {
 	int err;
 
-	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
 	if (err)
 		return err;
 
@@ -5311,10 +5563,12 @@
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct fib6_info *rt, bool replace)
+				    struct fib6_info **rt_arr,
+				    unsigned int nrt6, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct fib6_info *rt = rt_arr[0];
 	int err;
 
 	if (mlxsw_sp->router->aborted)
@@ -5339,19 +5593,21 @@
 	 */
 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
 	if (fib6_entry) {
-		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
+						      rt_arr, nrt6);
 		if (err)
 			goto err_fib6_entry_nexthop_add;
 		return 0;
 	}
 
-	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
+						nrt6);
 	if (IS_ERR(fib6_entry)) {
 		err = PTR_ERR(fib6_entry);
 		goto err_fib6_entry_create;
 	}
 
-	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
 	if (err)
 		goto err_fib6_node_entry_link;
 
@@ -5368,10 +5624,12 @@
 }
 
 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
-				     struct fib6_info *rt)
+				     struct fib6_info **rt_arr,
+				     unsigned int nrt6)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct fib6_info *rt = rt_arr[0];
 
 	if (mlxsw_sp->router->aborted)
 		return;
@@ -5383,11 +5641,12 @@
 	if (WARN_ON(!fib6_entry))
 		return;
 
-	/* If route is part of a multipath entry, but not the last one
-	 * removed, then only reduce its nexthop group.
+	/* If not all the nexthops are deleted, then only reduce the nexthop
+	 * group.
 	 */
-	if (!list_is_singular(&fib6_entry->rt6_list)) {
-		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+	if (nrt6 != fib6_entry->nrt6) {
+		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
+						nrt6);
 		return;
 	}
 
@@ -5648,10 +5907,15 @@
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
 }
 
+struct mlxsw_sp_fib6_event_work {
+	struct fib6_info **rt_arr;
+	unsigned int nrt6;
+};
+
 struct mlxsw_sp_fib_event_work {
 	struct work_struct work;
 	union {
-		struct fib6_entry_notifier_info fen6_info;
+		struct mlxsw_sp_fib6_event_work fib6_work;
 		struct fib_entry_notifier_info fen_info;
 		struct fib_rule_notifier_info fr_info;
 		struct fib_nh_notifier_info fnh_info;
@@ -5662,6 +5926,54 @@
 	unsigned long event;
 };
 
+static int
+mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
+			       struct fib6_entry_notifier_info *fen6_info)
+{
+	struct fib6_info *rt = fen6_info->rt;
+	struct fib6_info **rt_arr;
+	struct fib6_info *iter;
+	unsigned int nrt6;
+	int i = 0;
+
+	nrt6 = fen6_info->nsiblings + 1;
+
+	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
+	if (!rt_arr)
+		return -ENOMEM;
+
+	fib6_work->rt_arr = rt_arr;
+	fib6_work->nrt6 = nrt6;
+
+	rt_arr[0] = rt;
+	fib6_info_hold(rt);
+
+	if (!fen6_info->nsiblings)
+		return 0;
+
+	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
+		if (i == fen6_info->nsiblings)
+			break;
+
+		rt_arr[i + 1] = iter;
+		fib6_info_hold(iter);
+		i++;
+	}
+	WARN_ON_ONCE(i != fen6_info->nsiblings);
+
+	return 0;
+}
+
+static void
+mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
+{
+	int i;
+
+	for (i = 0; i < fib6_work->nrt6; i++)
+		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
+	kfree(fib6_work->rt_arr);
+}
+
 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_fib_event_work *fib_work =
@@ -5720,18 +6032,21 @@
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
-					       fib_work->fen6_info.rt, replace);
+					       fib_work->fib6_work.rt_arr,
+					       fib_work->fib6_work.nrt6,
+					       replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
-		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
-		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		mlxsw_sp_router_fib6_del(mlxsw_sp,
+					 fib_work->fib6_work.rt_arr,
+					 fib_work->fib6_work.nrt6);
+		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
 		break;
 	case FIB_EVENT_RULE_ADD:
 		/* if we get here, a rule was added that we do not support.
@@ -5820,22 +6135,26 @@
 	}
 }
 
-static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
-				       struct fib_notifier_info *info)
+static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+				      struct fib_notifier_info *info)
 {
 	struct fib6_entry_notifier_info *fen6_info;
+	int err;
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
-		fib_work->fen6_info = *fen6_info;
-		fib6_info_hold(fib_work->fen6_info.rt);
+		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
+						     fen6_info);
+		if (err)
+			return err;
 		break;
 	}
+
+	return 0;
 }
 
 static void
@@ -5876,6 +6195,10 @@
 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
 	rule = fr_info->rule;
 
+	/* Rule only affects locally generated traffic */
+	if (rule->iifindex == info->net->loopback_dev->ifindex)
+		return 0;
+
 	switch (info->family) {
 	case AF_INET:
 		if (!fib4_rule_default(rule) && !rule->l3mdev)
@@ -5927,10 +6250,34 @@
 			return notifier_from_errno(err);
 		break;
 	case FIB_EVENT_ENTRY_ADD:
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND:  /* fall through */
 		if (router->aborted) {
 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
 			return notifier_from_errno(-EINVAL);
 		}
+		if (info->family == AF_INET) {
+			struct fib_entry_notifier_info *fen_info = ptr;
+
+			if (fen_info->fi->fib_nh_is_v6) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
+			if (fen_info->fi->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
+		} else if (info->family == AF_INET6) {
+			struct fib6_entry_notifier_info *fen6_info;
+
+			fen6_info = container_of(info,
+						 struct fib6_entry_notifier_info,
+						 info);
+			if (fen6_info->rt->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
+				return notifier_from_errno(-EINVAL);
+			}
+		}
 		break;
 	}
 
@@ -5948,7 +6295,9 @@
 		break;
 	case AF_INET6:
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
-		mlxsw_sp_router_fib6_event(fib_work, info);
+		err = mlxsw_sp_router_fib6_event(fib_work, info);
+		if (err)
+			goto err_fib_event;
 		break;
 	case RTNL_FAMILY_IP6MR:
 	case RTNL_FAMILY_IPMR:
@@ -5960,6 +6309,10 @@
 	mlxsw_core_schedule_work(&fib_work->work);
 
 	return NOTIFY_DONE;
+
+err_fib_event:
+	kfree(fib_work);
+	return NOTIFY_BAD;
 }
 
 struct mlxsw_sp_rif *
@@ -5983,7 +6336,7 @@
 
 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-	if (WARN_ON_ONCE(err))
+	if (err)
 		return err;
 
 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
@@ -6086,10 +6439,12 @@
 
 	INIT_LIST_HEAD(&rif->nexthop_list);
 	INIT_LIST_HEAD(&rif->neigh_list);
-	ether_addr_copy(rif->addr, l3_dev->dev_addr);
-	rif->mtu = l3_dev->mtu;
+	if (l3_dev) {
+		ether_addr_copy(rif->addr, l3_dev->dev_addr);
+		rif->mtu = l3_dev->mtu;
+		rif->dev = l3_dev;
+	}
 	rif->vr_id = vr_id;
-	rif->dev = l3_dev;
 	rif->rif_index = rif_index;
 
 	return rif;
@@ -6113,7 +6468,19 @@
 
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
 {
-	return lb_rif->ul_vr_id;
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
+	struct mlxsw_sp_vr *ul_vr;
+
+	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
+	if (WARN_ON(IS_ERR(ul_vr)))
+		return 0;
+
+	return ul_vr->id;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+	return lb_rif->ul_rif_id;
 }
 
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
@@ -6146,7 +6513,7 @@
 	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
-	ops = mlxsw_sp->router->rif_ops_arr[type];
+	ops = mlxsw_sp->rif_ops_arr[type];
 
 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
 	if (IS_ERR(vr))
@@ -6164,6 +6531,8 @@
 		err = -ENOMEM;
 		goto err_rif_alloc;
 	}
+	dev_hold(rif->dev);
+	mlxsw_sp->router->rifs[rif_index] = rif;
 	rif->mlxsw_sp = mlxsw_sp;
 	rif->ops = ops;
 
@@ -6190,7 +6559,6 @@
 	}
 
 	mlxsw_sp_rif_counters_alloc(rif);
-	mlxsw_sp->router->rifs[rif_index] = rif;
 
 	return rif;
 
@@ -6202,6 +6570,8 @@
 	if (fid)
 		mlxsw_sp_fid_put(fid);
 err_fid_get:
+	mlxsw_sp->router->rifs[rif_index] = NULL;
+	dev_put(rif->dev);
 	kfree(rif);
 err_rif_alloc:
 err_rif_index_alloc:
@@ -6210,7 +6580,7 @@
 	return ERR_PTR(err);
 }
 
-void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
+static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 {
 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
@@ -6221,7 +6591,6 @@
 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
 
-	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	mlxsw_sp_rif_counters_free(rif);
 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
@@ -6229,6 +6598,8 @@
 	if (fid)
 		/* Loopback RIFs are not associated with a FID. */
 		mlxsw_sp_fid_put(fid);
+	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
+	dev_put(rif->dev);
 	kfree(rif);
 	vr->rif_count--;
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
@@ -6259,6 +6630,40 @@
 		params->system_port = mlxsw_sp_port->local_port;
 }
 
+static struct mlxsw_sp_rif_subport *
+mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
+{
+	return container_of(rif, struct mlxsw_sp_rif_subport, common);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
+			 const struct mlxsw_sp_rif_params *params,
+			 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_rif_subport *rif_subport;
+	struct mlxsw_sp_rif *rif;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
+	if (!rif)
+		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
+
+	rif_subport = mlxsw_sp_rif_subport_rif(rif);
+	refcount_inc(&rif_subport->ref_count);
+	return rif;
+}
+
+static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_subport *rif_subport;
+
+	rif_subport = mlxsw_sp_rif_subport_rif(rif);
+	if (!refcount_dec_and_test(&rif_subport->ref_count))
+		return;
+
+	mlxsw_sp_rif_destroy(rif);
+}
+
 static int
 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
 			       struct net_device *l3_dev,
@@ -6266,22 +6671,18 @@
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_rif_params params = {
+		.dev = l3_dev,
+	};
 	u16 vid = mlxsw_sp_port_vlan->vid;
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_fid *fid;
 	int err;
 
-	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
-	if (!rif) {
-		struct mlxsw_sp_rif_params params = {
-			.dev = l3_dev,
-		};
-
-		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
-		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
-		if (IS_ERR(rif))
-			return PTR_ERR(rif);
-	}
+	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
+	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
+	if (IS_ERR(rif))
+		return PTR_ERR(rif);
 
 	/* FID was already created, just take a reference */
 	fid = rif->ops->fid_get(rif, extack);
@@ -6308,6 +6709,7 @@
 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
 err_fid_port_vid_map:
 	mlxsw_sp_fid_put(fid);
+	mlxsw_sp_rif_subport_put(rif);
 	return err;
 }
 
@@ -6316,6 +6718,7 @@
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
 	u16 vid = mlxsw_sp_port_vlan->vid;
 
 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
@@ -6325,10 +6728,8 @@
 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
-	/* If router port holds the last reference on the rFID, then the
-	 * associated Sub-port RIF will be destroyed.
-	 */
 	mlxsw_sp_fid_put(fid);
+	mlxsw_sp_rif_subport_put(rif);
 }
 
 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
@@ -6364,8 +6765,8 @@
 	    netif_is_ovs_port(port_dev))
 		return 0;
 
-	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
-						 extack);
+	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
+						 MLXSW_SP_DEFAULT_VID, extack);
 }
 
 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
@@ -6398,15 +6799,15 @@
 	if (netif_is_bridge_port(lag_dev))
 		return 0;
 
-	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
-					     extack);
+	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
+					     MLXSW_SP_DEFAULT_VID, extack);
 }
 
-static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
+					  struct net_device *l3_dev,
 					  unsigned long event,
 					  struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
 	struct mlxsw_sp_rif_params params = {
 		.dev = l3_dev,
 	};
@@ -6427,7 +6828,8 @@
 	return 0;
 }
 
-static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
+static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
+					struct net_device *vlan_dev,
 					unsigned long event,
 					struct netlink_ext_ack *extack)
 {
@@ -6444,7 +6846,8 @@
 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
 						     vid, extack);
 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
-		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
+		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
+						      extack);
 
 	return 0;
 }
@@ -6545,16 +6948,11 @@
 			    mlxsw_sp_fid_index(rif->fid), false);
 }
 
-static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
+static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
+					   struct net_device *macvlan_dev,
 					   unsigned long event,
 					   struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp *mlxsw_sp;
-
-	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
-	if (!mlxsw_sp)
-		return 0;
-
 	switch (event) {
 	case NETDEV_UP:
 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
@@ -6566,7 +6964,35 @@
 	return 0;
 }
 
-static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
+static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
+					       struct net_device *dev,
+					       const unsigned char *dev_addr,
+					       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_rif *rif;
+	int i;
+
+	/* A RIF is not created for macvlan netdevs. Their MAC is used to
+	 * populate the FDB
+	 */
+	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
+		return 0;
+
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+		rif = mlxsw_sp->router->rifs[i];
+		if (rif && rif->dev && rif->dev != dev &&
+		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
+					     mlxsw_sp->mac_mask)) {
+			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
+				     struct net_device *dev,
 				     unsigned long event,
 				     struct netlink_ext_ack *extack)
 {
@@ -6575,21 +7001,24 @@
 	else if (netif_is_lag_master(dev))
 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
 	else if (netif_is_bridge_master(dev))
-		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
+		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
+						      extack);
 	else if (is_vlan_dev(dev))
-		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
+		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
+						    extack);
 	else if (netif_is_macvlan(dev))
-		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
+		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
+						       extack);
 	else
 		return 0;
 }
 
-int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr)
+static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
 {
 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
 	struct net_device *dev = ifa->ifa_dev->dev;
-	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_router *router;
 	struct mlxsw_sp_rif *rif;
 	int err = 0;
 
@@ -6597,15 +7026,12 @@
 	if (event == NETDEV_UP)
 		goto out;
 
-	mlxsw_sp = mlxsw_sp_lower_get(dev);
-	if (!mlxsw_sp)
-		goto out;
-
-	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
+	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
+	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
 out:
 	return notifier_from_errno(err);
 }
@@ -6627,13 +7053,19 @@
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
+	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
+						  ivi->extack);
+	if (err)
+		goto out;
+
+	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
 out:
 	return notifier_from_errno(err);
 }
 
 struct mlxsw_sp_inet6addr_event_work {
 	struct work_struct work;
+	struct mlxsw_sp *mlxsw_sp;
 	struct net_device *dev;
 	unsigned long event;
 };
@@ -6642,21 +7074,18 @@
 {
 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
+	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
 	struct net_device *dev = inet6addr_work->dev;
 	unsigned long event = inet6addr_work->event;
-	struct mlxsw_sp *mlxsw_sp;
 	struct mlxsw_sp_rif *rif;
 
 	rtnl_lock();
-	mlxsw_sp = mlxsw_sp_lower_get(dev);
-	if (!mlxsw_sp)
-		goto out;
 
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	__mlxsw_sp_inetaddr_event(dev, event, NULL);
+	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
 out:
 	rtnl_unlock();
 	dev_put(dev);
@@ -6664,25 +7093,25 @@
 }
 
 /* Called with rcu_read_lock() */
-int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr)
+static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
+				    unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
 	struct net_device *dev = if6->idev->dev;
+	struct mlxsw_sp_router *router;
 
 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
 	if (event == NETDEV_UP)
 		return NOTIFY_DONE;
 
-	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
-		return NOTIFY_DONE;
-
 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
 	if (!inet6addr_work)
 		return NOTIFY_BAD;
 
+	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
+	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
 	inet6addr_work->dev = dev;
 	inet6addr_work->event = event;
 	dev_hold(dev);
@@ -6708,7 +7137,12 @@
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
+	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
+						  i6vi->extack);
+	if (err)
+		goto out;
+
+	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
 out:
 	return notifier_from_errno(err);
 }
@@ -6730,20 +7164,14 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 }
 
-int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+static int
+mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_rif *rif)
 {
-	struct mlxsw_sp *mlxsw_sp;
-	struct mlxsw_sp_rif *rif;
+	struct net_device *dev = rif->dev;
 	u16 fid_index;
 	int err;
 
-	mlxsw_sp = mlxsw_sp_lower_get(dev);
-	if (!mlxsw_sp)
-		return 0;
-
-	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-	if (!rif)
-		return 0;
 	fid_index = mlxsw_sp_fid_index(rif->fid);
 
 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
@@ -6787,6 +7215,41 @@
 	return err;
 }
 
+static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
+			    struct netdev_notifier_pre_changeaddr_info *info)
+{
+	struct netlink_ext_ack *extack;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
+						   info->dev_addr, extack);
+}
+
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
+					 unsigned long event, void *ptr)
+{
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *rif;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		return 0;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!rif)
+		return 0;
+
+	switch (event) {
+	case NETDEV_CHANGEMTU: /* fall through */
+	case NETDEV_CHANGEADDR:
+		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
+	case NETDEV_PRE_CHANGEADDR:
+		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
+	}
+
+	return 0;
+}
+
 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
 				  struct net_device *l3_dev,
 				  struct netlink_ext_ack *extack)
@@ -6798,9 +7261,10 @@
 	 */
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
 	if (rif)
-		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
+		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
+					  extack);
 
-	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
+	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
 }
 
 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
@@ -6811,7 +7275,7 @@
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
 	if (!rif)
 		return;
-	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
+	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
 }
 
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
@@ -6865,18 +7329,13 @@
 					     __mlxsw_sp_rif_macvlan_flush, rif);
 }
 
-static struct mlxsw_sp_rif_subport *
-mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
-{
-	return container_of(rif, struct mlxsw_sp_rif_subport, common);
-}
-
 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
 				       const struct mlxsw_sp_rif_params *params)
 {
 	struct mlxsw_sp_rif_subport *rif_subport;
 
 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
+	refcount_set(&rif_subport->ref_count, 1);
 	rif_subport->vid = params->vid;
 	rif_subport->lag = params->lag;
 	if (params->lag)
@@ -7031,11 +7490,15 @@
 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
 			  struct netlink_ext_ack *extack)
 {
+	struct net_device *br_dev = rif->dev;
 	u16 vid;
 	int err;
 
 	if (is_vlan_dev(rif->dev)) {
 		vid = vlan_dev_vlan_id(rif->dev);
+		br_dev = vlan_dev_real_dev(rif->dev);
+		if (WARN_ON(!netif_is_bridge_master(br_dev)))
+			return ERR_PTR(-EINVAL);
 	} else {
 		err = br_vlan_get_pvid(rif->dev, &vid);
 		if (err < 0 || !vid) {
@@ -7044,7 +7507,7 @@
 		}
 	}
 
-	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
+	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
 }
 
 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
@@ -7061,7 +7524,8 @@
 
 	info.addr = mac;
 	info.vid = vid;
-	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
+				 NULL);
 }
 
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
@@ -7134,7 +7598,7 @@
 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
 			 struct netlink_ext_ack *extack)
 {
-	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
+	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
 }
 
 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
@@ -7148,7 +7612,8 @@
 
 	info.addr = mac;
 	info.vid = 0;
-	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
+				 NULL);
 }
 
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
@@ -7160,6 +7625,15 @@
 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
 };
 
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
+	.type			= MLXSW_SP_RIF_TYPE_VLAN,
+	.rif_size		= sizeof(struct mlxsw_sp_rif),
+	.configure		= mlxsw_sp_rif_fid_configure,
+	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
+	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
+	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
+};
+
 static struct mlxsw_sp_rif_ipip_lb *
 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
 {
@@ -7180,7 +7654,7 @@
 }
 
 static int
-mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
@@ -7192,11 +7666,12 @@
 	if (IS_ERR(ul_vr))
 		return PTR_ERR(ul_vr);
 
-	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
 	if (err)
 		goto err_loopback_op;
 
 	lb_rif->ul_vr_id = ul_vr->id;
+	lb_rif->ul_rif_id = 0;
 	++ul_vr->rif_count;
 	return 0;
 
@@ -7205,32 +7680,213 @@
 	return err;
 }
 
-static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	struct mlxsw_sp_vr *ul_vr;
 
 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
-	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
+	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
 
 	--ul_vr->rif_count;
 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
 }
 
-static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
+static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
-	.configure		= mlxsw_sp_rif_ipip_lb_configure,
-	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
+	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
+	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
 };
 
-static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
+const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
-	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
+	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
-	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
+	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
+};
+
+static int
+mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+
+	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
+			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
+	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
+					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
+		       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_rif *ul_rif;
+	u16 rif_index;
+	int err;
+
+	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
+		return ERR_PTR(err);
+	}
+
+	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
+	if (!ul_rif)
+		return ERR_PTR(-ENOMEM);
+
+	mlxsw_sp->router->rifs[rif_index] = ul_rif;
+	ul_rif->mlxsw_sp = mlxsw_sp;
+	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
+	if (err)
+		goto ul_rif_op_err;
+
+	return ul_rif;
+
+ul_rif_op_err:
+	mlxsw_sp->router->rifs[rif_index] = NULL;
+	kfree(ul_rif);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+
+	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
+	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
+	kfree(ul_rif);
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+		    struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_vr *vr;
+	int err;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
+	if (IS_ERR(vr))
+		return ERR_CAST(vr);
+
+	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
+		return vr->ul_rif;
+
+	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
+	if (IS_ERR(vr->ul_rif)) {
+		err = PTR_ERR(vr->ul_rif);
+		goto err_ul_rif_create;
+	}
+
+	vr->rif_count++;
+	refcount_set(&vr->ul_rif_refcnt, 1);
+
+	return vr->ul_rif;
+
+err_ul_rif_create:
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
+{
+	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+	struct mlxsw_sp_vr *vr;
+
+	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
+
+	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
+		return;
+
+	vr->rif_count--;
+	mlxsw_sp_ul_rif_destroy(ul_rif);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+			       u16 *ul_rif_index)
+{
+	struct mlxsw_sp_rif *ul_rif;
+
+	ASSERT_RTNL();
+
+	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
+	if (IS_ERR(ul_rif))
+		return PTR_ERR(ul_rif);
+	*ul_rif_index = ul_rif->rif_index;
+
+	return 0;
+}
+
+void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
+{
+	struct mlxsw_sp_rif *ul_rif;
+
+	ASSERT_RTNL();
+
+	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
+	if (WARN_ON(!ul_rif))
+		return;
+
+	mlxsw_sp_ul_rif_put(ul_rif);
+}
+
+static int
+mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_rif *ul_rif;
+	int err;
+
+	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
+	if (IS_ERR(ul_rif))
+		return PTR_ERR(ul_rif);
+
+	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
+	if (err)
+		goto err_loopback_op;
+
+	lb_rif->ul_vr_id = 0;
+	lb_rif->ul_rif_id = ul_rif->rif_index;
+
+	return 0;
+
+err_loopback_op:
+	mlxsw_sp_ul_rif_put(ul_rif);
+	return err;
+}
+
+static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_rif *ul_rif;
+
+	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
+	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
+	mlxsw_sp_ul_rif_put(ul_rif);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
+	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
+	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
+	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
+	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
+	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
+};
+
+const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
+	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
+	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
+	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
+	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
 };
 
 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
@@ -7243,8 +7899,6 @@
 	if (!mlxsw_sp->router->rifs)
 		return -ENOMEM;
 
-	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
-
 	return 0;
 }
 
@@ -7348,7 +8002,7 @@
 	char recr2_pl[MLXSW_REG_RECR2_LEN];
 	u32 seed;
 
-	get_random_bytes(&seed, sizeof(seed));
+	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
 	mlxsw_reg_recr2_pack(recr2_pl, seed);
 	mlxsw_sp_mp4_hash_init(recr2_pl);
 	mlxsw_sp_mp6_hash_init(recr2_pl);
@@ -7419,6 +8073,16 @@
 	mlxsw_sp->router = router;
 	router->mlxsw_sp = mlxsw_sp;
 
+	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
+	err = register_inetaddr_notifier(&router->inetaddr_nb);
+	if (err)
+		goto err_register_inetaddr_notifier;
+
+	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
+	err = register_inet6addr_notifier(&router->inet6addr_nb);
+	if (err)
+		goto err_register_inet6addr_notifier;
+
 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
 	err = __mlxsw_sp_router_init(mlxsw_sp);
 	if (err)
@@ -7504,6 +8168,10 @@
 err_rifs_init:
 	__mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	unregister_inet6addr_notifier(&router->inet6addr_nb);
+err_register_inet6addr_notifier:
+	unregister_inetaddr_notifier(&router->inetaddr_nb);
+err_register_inetaddr_notifier:
 	kfree(mlxsw_sp->router);
 	return err;
 }
@@ -7521,5 +8189,7 @@
 	mlxsw_sp_ipips_fini(mlxsw_sp);
 	mlxsw_sp_rifs_fini(mlxsw_sp);
 	__mlxsw_sp_router_fini(mlxsw_sp);
+	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
+	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
 	kfree(mlxsw_sp->router);
 }

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 1a60391..cc1de91 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h

@@ -7,17 +7,6 @@
 #include "spectrum.h"
 #include "reg.h"
 
-enum mlxsw_sp_l3proto {
-	MLXSW_SP_L3_PROTO_IPV4,
-	MLXSW_SP_L3_PROTO_IPV6,
-#define MLXSW_SP_L3_PROTO_MAX	(MLXSW_SP_L3_PROTO_IPV6 + 1)
-};
-
-union mlxsw_sp_l3addr {
-	__be32 addr4;
-	struct in6_addr addr6;
-};
-
 struct mlxsw_sp_rif_ipip_lb;
 struct mlxsw_sp_rif_ipip_lb_config {
 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
@@ -35,18 +24,15 @@
 struct mlxsw_sp_nexthop;
 struct mlxsw_sp_ipip_entry;
 
-struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
-					      const struct net_device *dev);
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
-u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
-struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_rif *rif,
 				   enum mlxsw_sp_rif_counter_dir dir,

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index d965fd2..560a60e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c

@@ -305,7 +305,7 @@
 
 	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
 	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
-			    0, 0, parms.link, tun->fwmark);
+			    0, 0, parms.link, tun->fwmark, 0);
 
 	rt = ip_route_output_key(tun->net, &fl4);
 	if (IS_ERR(rt))
@@ -316,7 +316,11 @@
 
 	dev = rt->dst.dev;
 	*saddrp = fl4.saddr;
-	*daddrp = rt->rt_gateway;
+	if (rt->rt_gw_family == AF_INET)
+		*daddrp = rt->rt_gw4;
+	/* can not offload if route has an IPv6 gateway */
+	else if (rt->rt_gw_family == AF_INET6)
+		dev = NULL;
 
 out:
 	ip_rt_put(rt);
@@ -383,7 +387,7 @@
 }
 
 static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
-	.can_handle = is_gretap_dev,
+	.can_handle = netif_is_gretap,
 	.parms = mlxsw_sp_span_entry_gretap4_parms,
 	.configure = mlxsw_sp_span_entry_gretap4_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
@@ -484,7 +488,7 @@
 
 static const
 struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
-	.can_handle = is_ip6gretap_dev,
+	.can_handle = netif_is_ip6gretap,
 	.parms = mlxsw_sp_span_entry_gretap6_parms,
 	.configure = mlxsw_sp_span_entry_gretap6_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 4eb64cb..5ecb451 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c

@@ -15,9 +15,9 @@
 #include <linux/rtnetlink.h>
 #include <linux/netlink.h>
 #include <net/switchdev.h>
+#include <net/vxlan.h>
 
 #include "spectrum_span.h"
-#include "spectrum_router.h"
 #include "spectrum_switchdev.h"
 #include "spectrum.h"
 #include "core.h"
@@ -84,9 +84,17 @@
 	void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
 			   struct mlxsw_sp_bridge_port *bridge_port,
 			   struct mlxsw_sp_port *mlxsw_sp_port);
+	int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device,
+			  const struct net_device *vxlan_dev, u16 vid,
+			  struct netlink_ext_ack *extack);
 	struct mlxsw_sp_fid *
 		(*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
-			   u16 vid);
+			   u16 vid, struct netlink_ext_ack *extack);
+	struct mlxsw_sp_fid *
+		(*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device,
+			      u16 vid);
+	u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device,
+		       const struct mlxsw_sp_fid *fid);
 };
 
 static int
@@ -282,30 +290,6 @@
 	kfree(bridge_port);
 }
 
-static bool
-mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port *
-				    bridge_port)
-{
-	struct net_device *dev = bridge_port->dev;
-	struct mlxsw_sp *mlxsw_sp;
-
-	if (is_vlan_dev(dev))
-		mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev));
-	else
-		mlxsw_sp = mlxsw_sp_lower_get(dev);
-
-	/* In case ports were pulled from out of a bridged LAG, then
-	 * it's possible the reference count isn't zero, yet the bridge
-	 * port should be destroyed, as it's no longer an upper of ours.
-	 */
-	if (!mlxsw_sp && list_empty(&bridge_port->vlans_list))
-		return true;
-	else if (bridge_port->ref_count == 0)
-		return true;
-	else
-		return false;
-}
-
 static struct mlxsw_sp_bridge_port *
 mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge,
 			 struct net_device *brport_dev)
@@ -343,8 +327,7 @@
 {
 	struct mlxsw_sp_bridge_device *bridge_device;
 
-	bridge_port->ref_count--;
-	if (!mlxsw_sp_bridge_port_should_destroy(bridge_port))
+	if (--bridge_port->ref_count != 0)
 		return;
 	bridge_device = bridge_port->bridge_device;
 	mlxsw_sp_bridge_port_destroy(bridge_port);
@@ -448,46 +431,6 @@
 		mlxsw_sp_bridge_vlan_destroy(bridge_vlan);
 }
 
-static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge,
-					   struct net_device *dev,
-					   unsigned long *brport_flags)
-{
-	struct mlxsw_sp_bridge_port *bridge_port;
-
-	bridge_port = mlxsw_sp_bridge_port_find(bridge, dev);
-	if (WARN_ON(!bridge_port))
-		return;
-
-	memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags));
-}
-
-static int mlxsw_sp_port_attr_get(struct net_device *dev,
-				  struct switchdev_attr *attr)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
-		memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac,
-		       attr->u.ppid.id_len);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-		mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev,
-					       &attr->u.brport_flags);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
-					       BR_MCAST_FLOOD;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
 static int
 mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct mlxsw_sp_bridge_vlan *bridge_vlan,
@@ -637,6 +580,17 @@
 	return err;
 }
 
+static int mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port
+					       *mlxsw_sp_port,
+					       struct switchdev_trans *trans,
+					       unsigned long brport_flags)
+{
+	if (brport_flags & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					   struct switchdev_trans *trans,
 					   struct net_device *orig_dev,
@@ -883,6 +837,11 @@
 						       attr->orig_dev,
 						       attr->u.stp_state);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		err = mlxsw_sp_port_attr_br_pre_flags_set(mlxsw_sp_port,
+							  trans,
+							  attr->u.brport_flags);
+		break;
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
 		err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
 						      attr->orig_dev,
@@ -925,7 +884,8 @@
 
 static int
 mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
-			    struct mlxsw_sp_bridge_port *bridge_port)
+			    struct mlxsw_sp_bridge_port *bridge_port,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	struct mlxsw_sp_bridge_device *bridge_device;
@@ -935,7 +895,7 @@
 	int err;
 
 	bridge_device = bridge_port->bridge_device;
-	fid = bridge_device->ops->fid_get(bridge_device, vid);
+	fid = bridge_device->ops->fid_get(bridge_device, vid, extack);
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
@@ -1003,7 +963,8 @@
 
 static int
 mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
-			       struct mlxsw_sp_bridge_port *bridge_port)
+			       struct mlxsw_sp_bridge_port *bridge_port,
+			       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	struct mlxsw_sp_bridge_vlan *bridge_vlan;
@@ -1011,12 +972,11 @@
 	int err;
 
 	/* No need to continue if only VLAN flags were changed */
-	if (mlxsw_sp_port_vlan->bridge_port) {
-		mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+	if (mlxsw_sp_port_vlan->bridge_port)
 		return 0;
-	}
 
-	err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port);
+	err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port,
+					  extack);
 	if (err)
 		return err;
 
@@ -1093,16 +1053,29 @@
 static int
 mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_bridge_port *bridge_port,
-			      u16 vid, bool is_untagged, bool is_pvid)
+			      u16 vid, bool is_untagged, bool is_pvid,
+			      struct netlink_ext_ack *extack)
 {
 	u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid);
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 	u16 old_pvid = mlxsw_sp_port->pvid;
 	int err;
 
-	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid);
-	if (IS_ERR(mlxsw_sp_port_vlan))
-		return PTR_ERR(mlxsw_sp_port_vlan);
+	/* The only valid scenario in which a port-vlan already exists, is if
+	 * the VLAN flags were changed and the port-vlan is associated with the
+	 * correct bridge port
+	 */
+	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+	if (mlxsw_sp_port_vlan &&
+	    mlxsw_sp_port_vlan->bridge_port != bridge_port)
+		return -EEXIST;
+
+	if (!mlxsw_sp_port_vlan) {
+		mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port,
+							       vid);
+		if (IS_ERR(mlxsw_sp_port_vlan))
+			return PTR_ERR(mlxsw_sp_port_vlan);
+	}
 
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true,
 				     is_untagged);
@@ -1113,7 +1086,8 @@
 	if (err)
 		goto err_port_pvid_set;
 
-	err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port);
+	err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port,
+					     extack);
 	if (err)
 		goto err_port_vlan_bridge_join;
 
@@ -1124,7 +1098,7 @@
 err_port_pvid_set:
 	mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
 err_port_vlan_set:
-	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
 	return err;
 }
 
@@ -1163,7 +1137,8 @@
 
 static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 				   const struct switchdev_obj_port_vlan *vlan,
-				   struct switchdev_trans *trans)
+				   struct switchdev_trans *trans,
+				   struct netlink_ext_ack *extack)
 {
 	bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
@@ -1185,7 +1160,7 @@
 		return err;
 	}
 
-	if (switchdev_trans_ph_prepare(trans))
+	if (switchdev_trans_ph_commit(trans))
 		return 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
@@ -1200,7 +1175,7 @@
 
 		err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port,
 						    vid, flag_untagged,
-						    flag_pvid);
+						    flag_pvid, extack);
 		if (err)
 			return err;
 	}
@@ -1234,7 +1209,7 @@
 static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic)
 {
 	return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
-			 MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
+			 MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG;
 }
 
 static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
@@ -1243,10 +1218,55 @@
 			MLXSW_REG_SFD_OP_WRITE_REMOVE;
 }
 
+static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
+					  const char *mac, u16 fid,
+					  enum mlxsw_sp_l3proto proto,
+					  const union mlxsw_sp_l3addr *addr,
+					  bool adding, bool dynamic)
+{
+	enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto;
+	char *sfd_pl;
+	u8 num_rec;
+	u32 uip;
+	int err;
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		uip = be32_to_cpu(addr->addr4);
+		sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+	default:
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
+
+	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+	if (!sfd_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+	mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0,
+				     mlxsw_sp_sfd_rec_policy(dynamic), mac, fid,
+				     MLXSW_REG_SFD_REC_ACTION_NOP, uip,
+				     sfd_proto);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
+
+out:
+	kfree(sfd_pl);
+	return err;
+}
+
 static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				     const char *mac, u16 fid, bool adding,
 				     enum mlxsw_reg_sfd_rec_action action,
-				     bool dynamic)
+				     enum mlxsw_reg_sfd_rec_policy policy)
 {
 	char *sfd_pl;
 	u8 num_rec;
@@ -1257,8 +1277,7 @@
 		return -ENOMEM;
 
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
-	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-			      mac, fid, action, local_port);
+	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port);
 	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	if (err)
@@ -1277,7 +1296,8 @@
 				   bool dynamic)
 {
 	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding,
-					 MLXSW_REG_SFD_REC_ACTION_NOP, dynamic);
+					 MLXSW_REG_SFD_REC_ACTION_NOP,
+					 mlxsw_sp_sfd_rec_policy(dynamic));
 }
 
 int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
@@ -1285,7 +1305,7 @@
 {
 	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding,
 					 MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER,
-					 false);
+					 MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY);
 }
 
 static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
@@ -1610,7 +1630,7 @@
 	u16 fid_index;
 	int err = 0;
 
-	if (switchdev_trans_ph_prepare(trans))
+	if (switchdev_trans_ph_commit(trans))
 		return 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
@@ -1724,7 +1744,8 @@
 
 static int mlxsw_sp_port_obj_add(struct net_device *dev,
 				 const struct switchdev_obj *obj,
-				 struct switchdev_trans *trans)
+				 struct switchdev_trans *trans,
+				 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	const struct switchdev_obj_port_vlan *vlan;
@@ -1733,7 +1754,8 @@
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-		err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans);
+		err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans,
+					      extack);
 
 		if (switchdev_trans_ph_prepare(trans)) {
 			/* The event is emitted before the changes are actually
@@ -1761,7 +1783,7 @@
 mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_bridge_port *bridge_port, u16 vid)
 {
-	u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : vid;
+	u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid;
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
@@ -1771,7 +1793,7 @@
 	mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
 	mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid);
 	mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
-	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
 }
 
 static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -1916,32 +1938,20 @@
 	return NULL;
 }
 
-static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
-	.switchdev_port_attr_get	= mlxsw_sp_port_attr_get,
-	.switchdev_port_attr_set	= mlxsw_sp_port_attr_set,
-	.switchdev_port_obj_add		= mlxsw_sp_port_obj_add,
-	.switchdev_port_obj_del		= mlxsw_sp_port_obj_del,
-};
-
 static int
 mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct mlxsw_sp_bridge_port *bridge_port,
 				struct mlxsw_sp_port *mlxsw_sp_port,
 				struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-
 	if (is_vlan_dev(bridge_port->dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
 		return -EINVAL;
 	}
 
-	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
-	if (WARN_ON(!mlxsw_sp_port_vlan))
-		return -EINVAL;
-
-	/* Let VLAN-aware bridge take care of its own VLANs */
-	mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+	/* Port is no longer usable as a router interface */
+	if (mlxsw_sp_port->default_vlan->fid)
+		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan);
 
 	return 0;
 }
@@ -1951,24 +1961,150 @@
 				 struct mlxsw_sp_bridge_port *bridge_port,
 				 struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
 	/* Make sure untagged frames are allowed to ingress */
-	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+	mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID);
+}
+
+static int
+mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+				 const struct net_device *vxlan_dev, u16 vid,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	struct mlxsw_sp_nve_params params = {
+		.type = MLXSW_SP_NVE_TYPE_VXLAN,
+		.vni = vxlan->cfg.vni,
+		.dev = vxlan_dev,
+	};
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	/* If the VLAN is 0, we need to find the VLAN that is configured as
+	 * PVID and egress untagged on the bridge port of the VxLAN device.
+	 * It is possible no such VLAN exists
+	 */
+	if (!vid) {
+		err = mlxsw_sp_vxlan_mapped_vid(vxlan_dev, &vid);
+		if (err || !vid)
+			return err;
+	}
+
+	/* If no other port is member in the VLAN, then the FID does not exist.
+	 * NVE will be enabled on the FID once a port joins the VLAN
+	 */
+	fid = mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid);
+	if (!fid)
+		return 0;
+
+	if (mlxsw_sp_fid_vni_is_set(fid)) {
+		NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID");
+		err = -EINVAL;
+		goto err_vni_exists;
+	}
+
+	err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+	if (err)
+		goto err_nve_fid_enable;
+
+	/* The tunnel port does not hold a reference on the FID. Only
+	 * local ports and the router port
+	 */
+	mlxsw_sp_fid_put(fid);
+
+	return 0;
+
+err_nve_fid_enable:
+err_vni_exists:
+	mlxsw_sp_fid_put(fid);
+	return err;
+}
+
+static struct net_device *
+mlxsw_sp_bridge_8021q_vxlan_dev_find(struct net_device *br_dev, u16 vid)
+{
+	struct net_device *dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(br_dev, dev, iter) {
+		u16 pvid;
+		int err;
+
+		if (!netif_is_vxlan(dev))
+			continue;
+
+		err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid);
+		if (err || pvid != vid)
+			continue;
+
+		return dev;
+	}
+
+	return NULL;
 }
 
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
-			      u16 vid)
+			      u16 vid, struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct net_device *vxlan_dev;
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
+	if (IS_ERR(fid))
+		return fid;
+
+	if (mlxsw_sp_fid_vni_is_set(fid))
+		return fid;
+
+	/* Find the VxLAN device that has the specified VLAN configured as
+	 * PVID and egress untagged. There can be at most one such device
+	 */
+	vxlan_dev = mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev,
+							 vid);
+	if (!vxlan_dev)
+		return fid;
+
+	if (!netif_running(vxlan_dev))
+		return fid;
+
+	err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid,
+					       extack);
+	if (err)
+		goto err_vxlan_join;
+
+	return fid;
+
+err_vxlan_join:
+	mlxsw_sp_fid_put(fid);
+	return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+				 u16 vid)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
 
-	return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
+	return mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid);
+}
+
+static u16
+mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+			      const struct mlxsw_sp_fid *fid)
+{
+	return mlxsw_sp_fid_8021q_vid(fid);
 }
 
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
 	.port_join	= mlxsw_sp_bridge_8021q_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021q_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021q_vxlan_join,
 	.fid_get	= mlxsw_sp_bridge_8021q_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021q_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
 };
 
 static bool
@@ -1998,7 +2134,7 @@
 	struct net_device *dev = bridge_port->dev;
 	u16 vid;
 
-	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return -EINVAL;
@@ -2012,7 +2148,8 @@
 	if (mlxsw_sp_port_vlan->fid)
 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
 
-	return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port);
+	return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port,
+					      extack);
 }
 
 static void
@@ -2024,27 +2161,120 @@
 	struct net_device *dev = bridge_port->dev;
 	u16 vid;
 
-	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_port_vlan)
+	if (!mlxsw_sp_port_vlan || !mlxsw_sp_port_vlan->bridge_port)
 		return;
 
 	mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
 }
 
+static int
+mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+				 const struct net_device *vxlan_dev, u16 vid,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	struct mlxsw_sp_nve_params params = {
+		.type = MLXSW_SP_NVE_TYPE_VXLAN,
+		.vni = vxlan->cfg.vni,
+		.dev = vxlan_dev,
+	};
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+	if (!fid) {
+		NL_SET_ERR_MSG_MOD(extack, "Did not find a corresponding FID");
+		return -EINVAL;
+	}
+
+	if (mlxsw_sp_fid_vni_is_set(fid)) {
+		NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID");
+		err = -EINVAL;
+		goto err_vni_exists;
+	}
+
+	err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+	if (err)
+		goto err_nve_fid_enable;
+
+	/* The tunnel port does not hold a reference on the FID. Only
+	 * local ports and the router port
+	 */
+	mlxsw_sp_fid_put(fid);
+
+	return 0;
+
+err_nve_fid_enable:
+err_vni_exists:
+	mlxsw_sp_fid_put(fid);
+	return err;
+}
+
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
-			      u16 vid)
+			      u16 vid, struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+	struct net_device *vxlan_dev;
+	struct mlxsw_sp_fid *fid;
+	int err;
+
+	fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+	if (IS_ERR(fid))
+		return fid;
+
+	if (mlxsw_sp_fid_vni_is_set(fid))
+		return fid;
+
+	vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev);
+	if (!vxlan_dev)
+		return fid;
+
+	if (!netif_running(vxlan_dev))
+		return fid;
+
+	err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, 0,
+					       extack);
+	if (err)
+		goto err_vxlan_join;
+
+	return fid;
+
+err_vxlan_join:
+	mlxsw_sp_fid_put(fid);
+	return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+				 u16 vid)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
 
-	return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+	/* The only valid VLAN for a VLAN-unaware bridge is 0 */
+	if (vid)
+		return NULL;
+
+	return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static u16
+mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+			      const struct mlxsw_sp_fid *fid)
+{
+	return 0;
 }
 
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
 	.port_join	= mlxsw_sp_bridge_8021d_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021d_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021d_vxlan_join,
 	.fid_get	= mlxsw_sp_bridge_8021d_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021d_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021d_fid_vid,
 };
 
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2094,16 +2324,126 @@
 	mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
 }
 
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+			       const struct net_device *br_dev,
+			       const struct net_device *vxlan_dev, u16 vid,
+			       struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (WARN_ON(!bridge_device))
+		return -EINVAL;
+
+	return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
+					      extack);
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *vxlan_dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	struct mlxsw_sp_fid *fid;
+
+	/* If the VxLAN device is down, then the FID does not have a VNI */
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan->cfg.vni);
+	if (!fid)
+		return;
+
+	mlxsw_sp_nve_fid_disable(mlxsw_sp, fid);
+	mlxsw_sp_fid_put(fid);
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
+					     const struct net_device *br_dev,
+					     u16 vid,
+					     struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (WARN_ON(!bridge_device))
+		return ERR_PTR(-EINVAL);
+
+	return bridge_device->ops->fid_get(bridge_device, vid, extack);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
+				      enum mlxsw_sp_l3proto *proto,
+				      union mlxsw_sp_l3addr *addr)
+{
+	if (vxlan_addr->sa.sa_family == AF_INET) {
+		addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
+		*proto = MLXSW_SP_L3_PROTO_IPV4;
+	} else {
+		addr->addr6 = vxlan_addr->sin6.sin6_addr;
+		*proto = MLXSW_SP_L3_PROTO_IPV6;
+	}
+}
+
+static void
+mlxsw_sp_switchdev_addr_vxlan_convert(enum mlxsw_sp_l3proto proto,
+				      const union mlxsw_sp_l3addr *addr,
+				      union vxlan_addr *vxlan_addr)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		vxlan_addr->sa.sa_family = AF_INET;
+		vxlan_addr->sin.sin_addr.s_addr = addr->addr4;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		vxlan_addr->sa.sa_family = AF_INET6;
+		vxlan_addr->sin6.sin6_addr = addr->addr6;
+		break;
+	}
+}
+
+static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev,
+					      const char *mac,
+					      enum mlxsw_sp_l3proto proto,
+					      union mlxsw_sp_l3addr *addr,
+					      __be32 vni, bool adding)
+{
+	struct switchdev_notifier_vxlan_fdb_info info;
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	enum switchdev_notifier_type type;
+
+	type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE :
+			SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE;
+	mlxsw_sp_switchdev_addr_vxlan_convert(proto, addr, &info.remote_ip);
+	info.remote_port = vxlan->cfg.dst_port;
+	info.remote_vni = vni;
+	info.remote_ifindex = 0;
+	ether_addr_copy(info.eth_addr, mac);
+	info.vni = vni;
+	info.offloaded = adding;
+	call_switchdev_notifiers(type, dev, &info.info, NULL);
+}
+
+static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev,
+					    const char *mac,
+					    enum mlxsw_sp_l3proto proto,
+					    union mlxsw_sp_l3addr *addr,
+					    __be32 vni,
+					    bool adding)
+{
+	if (netif_is_vxlan(dev))
+		mlxsw_sp_fdb_vxlan_call_notifiers(dev, mac, proto, addr, vni,
+						  adding);
+}
+
 static void
 mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
 			    const char *mac, u16 vid,
-			    struct net_device *dev)
+			    struct net_device *dev, bool offloaded)
 {
 	struct switchdev_notifier_fdb_info info;
 
 	info.addr = mac;
 	info.vid = vid;
-	call_switchdev_notifiers(type, dev, &info.info);
+	info.offloaded = offloaded;
+	call_switchdev_notifiers(type, dev, &info.info, NULL);
 }
 
 static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
@@ -2128,6 +2468,9 @@
 		goto just_remove;
 	}
 
+	if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid))
+		goto just_remove;
+
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
 	if (!mlxsw_sp_port_vlan) {
 		netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
@@ -2154,7 +2497,7 @@
 	if (!do_notification)
 		return;
 	type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
 	return;
 
@@ -2187,6 +2530,9 @@
 		goto just_remove;
 	}
 
+	if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid))
+		goto just_remove;
+
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
 	if (!mlxsw_sp_port_vlan) {
 		netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
@@ -2201,7 +2547,8 @@
 
 	bridge_device = bridge_port->bridge_device;
 	vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0;
-	lag_vid = mlxsw_sp_port_vlan->vid;
+	lag_vid = mlxsw_sp_fid_lag_vid_valid(mlxsw_sp_port_vlan->fid) ?
+		  mlxsw_sp_port_vlan->vid : 0;
 
 do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
@@ -2214,7 +2561,7 @@
 	if (!do_notification)
 		return;
 	type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+	mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
 	return;
 
@@ -2224,6 +2571,122 @@
 	goto do_fdb_op;
 }
 
+static int
+__mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
+					    const struct mlxsw_sp_fid *fid,
+					    bool adding,
+					    struct net_device **nve_dev,
+					    u16 *p_vid, __be32 *p_vni)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *br_dev, *dev;
+	int nve_ifindex;
+	int err;
+
+	err = mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_fid_vni(fid, p_vni);
+	if (err)
+		return err;
+
+	dev = __dev_get_by_index(&init_net, nve_ifindex);
+	if (!dev)
+		return -EINVAL;
+	*nve_dev = dev;
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (adding && !br_port_flag_is_set(dev, BR_LEARNING))
+		return -EINVAL;
+
+	if (adding && netif_is_vxlan(dev)) {
+		struct vxlan_dev *vxlan = netdev_priv(dev);
+
+		if (!(vxlan->cfg.flags & VXLAN_F_LEARN))
+			return -EINVAL;
+	}
+
+	br_dev = netdev_master_upper_dev_get(dev);
+	if (!br_dev)
+		return -EINVAL;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return -EINVAL;
+
+	*p_vid = bridge_device->ops->fid_vid(bridge_device, fid);
+
+	return 0;
+}
+
+static void mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
+						      char *sfn_pl,
+						      int rec_index,
+						      bool adding)
+{
+	enum mlxsw_reg_sfn_uc_tunnel_protocol sfn_proto;
+	enum switchdev_notifier_type type;
+	struct net_device *nve_dev;
+	union mlxsw_sp_l3addr addr;
+	struct mlxsw_sp_fid *fid;
+	char mac[ETH_ALEN];
+	u16 fid_index, vid;
+	__be32 vni;
+	u32 uip;
+	int err;
+
+	mlxsw_reg_sfn_uc_tunnel_unpack(sfn_pl, rec_index, mac, &fid_index,
+				       &uip, &sfn_proto);
+
+	fid = mlxsw_sp_fid_lookup_by_index(mlxsw_sp, fid_index);
+	if (!fid)
+		goto err_fid_lookup;
+
+	err = mlxsw_sp_nve_learned_ip_resolve(mlxsw_sp, uip,
+					      (enum mlxsw_sp_l3proto) sfn_proto,
+					      &addr);
+	if (err)
+		goto err_ip_resolve;
+
+	err = __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, fid, adding,
+							  &nve_dev, &vid, &vni);
+	if (err)
+		goto err_fdb_process;
+
+	err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index,
+					     (enum mlxsw_sp_l3proto) sfn_proto,
+					     &addr, adding, true);
+	if (err)
+		goto err_fdb_op;
+
+	mlxsw_sp_fdb_nve_call_notifiers(nve_dev, mac,
+					(enum mlxsw_sp_l3proto) sfn_proto,
+					&addr, vni, adding);
+
+	type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE :
+			SWITCHDEV_FDB_DEL_TO_BRIDGE;
+	mlxsw_sp_fdb_call_notifiers(type, mac, vid, nve_dev, adding);
+
+	mlxsw_sp_fid_put(fid);
+
+	return;
+
+err_fdb_op:
+err_fdb_process:
+err_ip_resolve:
+	mlxsw_sp_fid_put(fid);
+err_fid_lookup:
+	/* Remove an FDB entry in case we cannot process it. Otherwise the
+	 * device will keep sending the same notification over and over again.
+	 */
+	mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index,
+				       (enum mlxsw_sp_l3proto) sfn_proto, &addr,
+				       false, true);
+}
+
 static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
 					    char *sfn_pl, int rec_index)
 {
@@ -2244,6 +2707,14 @@
 		mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
 						    rec_index, false);
 		break;
+	case MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL:
+		mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl,
+							  rec_index, true);
+		break;
+	case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL:
+		mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl,
+							  rec_index, false);
+		break;
 	}
 }
 
@@ -2290,12 +2761,114 @@
 
 struct mlxsw_sp_switchdev_event_work {
 	struct work_struct work;
-	struct switchdev_notifier_fdb_info fdb_info;
+	union {
+		struct switchdev_notifier_fdb_info fdb_info;
+		struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+	};
 	struct net_device *dev;
 	unsigned long event;
 };
 
-static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
+static void
+mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_switchdev_event_work *
+					  switchdev_work,
+					  struct mlxsw_sp_fid *fid, __be32 vni)
+{
+	struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct net_device *dev = switchdev_work->dev;
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	int err;
+
+	fdb_info = &switchdev_work->fdb_info;
+	err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info);
+	if (err)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip,
+					      &proto, &addr);
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+						     vxlan_fdb_info.eth_addr,
+						     mlxsw_sp_fid_index(fid),
+						     proto, &addr, true, false);
+		if (err)
+			return;
+		vxlan_fdb_info.offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info.info, NULL);
+		mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+					    vxlan_fdb_info.eth_addr,
+					    fdb_info->vid, dev, true);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+						     vxlan_fdb_info.eth_addr,
+						     mlxsw_sp_fid_index(fid),
+						     proto, &addr, false,
+						     false);
+		vxlan_fdb_info.offloaded = false;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info.info, NULL);
+		break;
+	}
+}
+
+static void
+mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
+					switchdev_work)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	struct net_device *br_dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_fid *fid;
+	__be32 vni;
+	int err;
+
+	if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE &&
+	    switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
+		return;
+
+	if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE &&
+	    !switchdev_work->fdb_info.added_by_user)
+		return;
+
+	if (!netif_running(dev))
+		return;
+	br_dev = netdev_master_upper_dev_get(dev);
+	if (!br_dev)
+		return;
+	if (!netif_is_bridge_master(br_dev))
+		return;
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return;
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = bridge_device->ops->fid_lookup(bridge_device,
+					     switchdev_work->fdb_info.vid);
+	if (!fid)
+		return;
+
+	err = mlxsw_sp_fid_vni(fid, &vni);
+	if (err)
+		goto out;
+
+	mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid,
+						  vni);
+
+out:
+	mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_switchdev_event_work *switchdev_work =
 		container_of(work, struct mlxsw_sp_switchdev_event_work, work);
@@ -2305,6 +2878,11 @@
 	int err;
 
 	rtnl_lock();
+	if (netif_is_vxlan(dev)) {
+		mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work);
+		goto out;
+	}
+
 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
 	if (!mlxsw_sp_port)
 		goto out;
@@ -2319,7 +2897,7 @@
 			break;
 		mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
 					    fdb_info->addr,
-					    fdb_info->vid, dev);
+					    fdb_info->vid, dev, true);
 		break;
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
@@ -2342,22 +2920,231 @@
 	dev_put(dev);
 }
 
+static void
+mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_switchdev_event_work *
+				 switchdev_work)
+{
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	u8 all_zeros_mac[ETH_ALEN] = { 0 };
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	struct net_device *br_dev;
+	struct mlxsw_sp_fid *fid;
+	u16 vid;
+	int err;
+
+	vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+	br_dev = netdev_master_upper_dev_get(dev);
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+	if (!fid)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+					      &proto, &addr);
+
+	if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+		err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr);
+		if (err) {
+			mlxsw_sp_fid_put(fid);
+			return;
+		}
+		vxlan_fdb_info->offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+					 &vxlan_fdb_info->info, NULL);
+		mlxsw_sp_fid_put(fid);
+		return;
+	}
+
+	/* The device has a single FDB table, whereas Linux has two - one
+	 * in the bridge driver and another in the VxLAN driver. We only
+	 * program an entry to the device if the MAC points to the VxLAN
+	 * device in the bridge's FDB table
+	 */
+	vid = bridge_device->ops->fid_vid(bridge_device, fid);
+	if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev)
+		goto err_br_fdb_find;
+
+	err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+					     mlxsw_sp_fid_index(fid), proto,
+					     &addr, true, false);
+	if (err)
+		goto err_fdb_tunnel_uc_op;
+	vxlan_fdb_info->offloaded = true;
+	call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+				 &vxlan_fdb_info->info, NULL);
+	mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+				    vxlan_fdb_info->eth_addr, vid, dev, true);
+
+	mlxsw_sp_fid_put(fid);
+
+	return;
+
+err_fdb_tunnel_uc_op:
+err_br_fdb_find:
+	mlxsw_sp_fid_put(fid);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_switchdev_event_work *
+				 switchdev_work)
+{
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct net_device *dev = switchdev_work->dev;
+	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+	u8 all_zeros_mac[ETH_ALEN] = { 0 };
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr addr;
+	struct mlxsw_sp_fid *fid;
+	u16 vid;
+
+	vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+	if (!fid)
+		return;
+
+	mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+					      &proto, &addr);
+
+	if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+		mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr);
+		mlxsw_sp_fid_put(fid);
+		return;
+	}
+
+	mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+				       mlxsw_sp_fid_index(fid), proto, &addr,
+				       false, false);
+	vid = bridge_device->ops->fid_vid(bridge_device, fid);
+	mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+				    vxlan_fdb_info->eth_addr, vid, dev, false);
+
+	mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_switchdev_event_work *switchdev_work =
+		container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct net_device *br_dev;
+
+	rtnl_lock();
+
+	if (!netif_running(dev))
+		goto out;
+	br_dev = netdev_master_upper_dev_get(dev);
+	if (!br_dev)
+		goto out;
+	if (!netif_is_bridge_master(br_dev))
+		goto out;
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+		mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work);
+		break;
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+		mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work);
+		break;
+	}
+
+out:
+	rtnl_unlock();
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
+				      switchdev_work,
+				      struct switchdev_notifier_info *info)
+{
+	struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
+	struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+	struct vxlan_config *cfg = &vxlan->cfg;
+	struct netlink_ext_ack *extack;
+
+	extack = switchdev_notifier_info_to_extack(info);
+	vxlan_fdb_info = container_of(info,
+				      struct switchdev_notifier_vxlan_fdb_info,
+				      info);
+
+	if (vxlan_fdb_info->remote_port != cfg->dst_port) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default remote port is not supported");
+		return -EOPNOTSUPP;
+	}
+	if (vxlan_fdb_info->remote_vni != cfg->vni ||
+	    vxlan_fdb_info->vni != cfg->vni) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default VNI is not supported");
+		return -EOPNOTSUPP;
+	}
+	if (vxlan_fdb_info->remote_ifindex) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Local interface is not supported");
+		return -EOPNOTSUPP;
+	}
+	if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast MAC addresses not supported");
+		return -EOPNOTSUPP;
+	}
+	if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast destination IP is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
+
+	return 0;
+}
+
 /* Called under rcu_read_lock() */
 static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
 	struct mlxsw_sp_switchdev_event_work *switchdev_work;
-	struct switchdev_notifier_fdb_info *fdb_info = ptr;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct switchdev_notifier_info *info = ptr;
+	struct net_device *br_dev;
+	int err;
 
-	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+	if (event == SWITCHDEV_PORT_ATTR_SET) {
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     mlxsw_sp_port_dev_check,
+						     mlxsw_sp_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	/* Tunnel devices are not our uppers, so check their master instead */
+	br_dev = netdev_master_upper_dev_get_rcu(dev);
+	if (!br_dev)
+		return NOTIFY_DONE;
+	if (!netif_is_bridge_master(br_dev))
+		return NOTIFY_DONE;
+	if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev))
 		return NOTIFY_DONE;
 
 	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
 	if (!switchdev_work)
 		return NOTIFY_BAD;
 
-	INIT_WORK(&switchdev_work->work, mlxsw_sp_switchdev_event_work);
 	switchdev_work->dev = dev;
 	switchdev_work->event = event;
 
@@ -2366,6 +3153,11 @@
 	case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */
 	case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
 	case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+		fdb_info = container_of(info,
+					struct switchdev_notifier_fdb_info,
+					info);
+		INIT_WORK(&switchdev_work->work,
+			  mlxsw_sp_switchdev_bridge_fdb_event_work);
 		memcpy(&switchdev_work->fdb_info, ptr,
 		       sizeof(switchdev_work->fdb_info));
 		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
@@ -2379,6 +3171,16 @@
 		 */
 		dev_hold(dev);
 		break;
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+		INIT_WORK(&switchdev_work->work,
+			  mlxsw_sp_switchdev_vxlan_fdb_event_work);
+		err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work,
+							    info);
+		if (err)
+			goto err_vxlan_work_prepare;
+		dev_hold(dev);
+		break;
 	default:
 		kfree(switchdev_work);
 		return NOTIFY_DONE;
@@ -2388,15 +3190,285 @@
 
 	return NOTIFY_DONE;
 
+err_vxlan_work_prepare:
 err_addr_alloc:
 	kfree(switchdev_work);
 	return NOTIFY_BAD;
 }
 
-static struct notifier_block mlxsw_sp_switchdev_notifier = {
+struct notifier_block mlxsw_sp_switchdev_notifier = {
 	.notifier_call = mlxsw_sp_switchdev_event,
 };
 
+static int
+mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_bridge_device *bridge_device,
+				  const struct net_device *vxlan_dev, u16 vid,
+				  bool flag_untagged, bool flag_pvid,
+				  struct netlink_ext_ack *extack)
+{
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	__be32 vni = vxlan->cfg.vni;
+	struct mlxsw_sp_fid *fid;
+	u16 old_vid;
+	int err;
+
+	/* We cannot have the same VLAN as PVID and egress untagged on multiple
+	 * VxLAN devices. Note that we get this notification before the VLAN is
+	 * actually added to the bridge's database, so it is not possible for
+	 * the lookup function to return 'vxlan_dev'
+	 */
+	if (flag_untagged && flag_pvid &&
+	    mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) {
+		NL_SET_ERR_MSG_MOD(extack, "VLAN already mapped to a different VNI");
+		return -EINVAL;
+	}
+
+	if (!netif_running(vxlan_dev))
+		return 0;
+
+	/* First case: FID is not associated with this VNI, but the new VLAN
+	 * is both PVID and egress untagged. Need to enable NVE on the FID, if
+	 * it exists
+	 */
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni);
+	if (!fid) {
+		if (!flag_untagged || !flag_pvid)
+			return 0;
+		return mlxsw_sp_bridge_8021q_vxlan_join(bridge_device,
+							vxlan_dev, vid, extack);
+	}
+
+	/* Second case: FID is associated with the VNI and the VLAN associated
+	 * with the FID is the same as the notified VLAN. This means the flags
+	 * (PVID / egress untagged) were toggled and that NVE should be
+	 * disabled on the FID
+	 */
+	old_vid = mlxsw_sp_fid_8021q_vid(fid);
+	if (vid == old_vid) {
+		if (WARN_ON(flag_untagged && flag_pvid)) {
+			mlxsw_sp_fid_put(fid);
+			return -EINVAL;
+		}
+		mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+		mlxsw_sp_fid_put(fid);
+		return 0;
+	}
+
+	/* Third case: A new VLAN was configured on the VxLAN device, but this
+	 * VLAN is not PVID, so there is nothing to do.
+	 */
+	if (!flag_pvid) {
+		mlxsw_sp_fid_put(fid);
+		return 0;
+	}
+
+	/* Fourth case: Thew new VLAN is PVID, which means the VLAN currently
+	 * mapped to the VNI should be unmapped
+	 */
+	mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+	mlxsw_sp_fid_put(fid);
+
+	/* Fifth case: The new VLAN is also egress untagged, which means the
+	 * VLAN needs to be mapped to the VNI
+	 */
+	if (!flag_untagged)
+		return 0;
+
+	err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid,
+					       extack);
+	if (err)
+		goto err_vxlan_join;
+
+	return 0;
+
+err_vxlan_join:
+	mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, old_vid,
+					 NULL);
+	return err;
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_bridge_device *bridge_device,
+				  const struct net_device *vxlan_dev, u16 vid)
+{
+	struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+	__be32 vni = vxlan->cfg.vni;
+	struct mlxsw_sp_fid *fid;
+
+	if (!netif_running(vxlan_dev))
+		return;
+
+	fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni);
+	if (!fid)
+		return;
+
+	/* A different VLAN than the one mapped to the VNI is deleted */
+	if (mlxsw_sp_fid_8021q_vid(fid) != vid)
+		goto out;
+
+	mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+
+out:
+	mlxsw_sp_fid_put(fid);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev,
+				   struct switchdev_notifier_port_obj_info *
+				   port_obj_info)
+{
+	struct switchdev_obj_port_vlan *vlan =
+		SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj);
+	bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+	bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+	struct switchdev_trans *trans = port_obj_info->trans;
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct netlink_ext_ack *extack;
+	struct mlxsw_sp *mlxsw_sp;
+	struct net_device *br_dev;
+	u16 vid;
+
+	extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+	br_dev = netdev_master_upper_dev_get(vxlan_dev);
+	if (!br_dev)
+		return 0;
+
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return 0;
+
+	port_obj_info->handled = true;
+
+	if (switchdev_trans_ph_commit(trans))
+		return 0;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return -EINVAL;
+
+	if (!bridge_device->vlan_enabled)
+		return 0;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+		int err;
+
+		err = mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device,
+							vxlan_dev, vid,
+							flag_untagged,
+							flag_pvid, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_vlans_del(struct net_device *vxlan_dev,
+				   struct switchdev_notifier_port_obj_info *
+				   port_obj_info)
+{
+	struct switchdev_obj_port_vlan *vlan =
+		SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj);
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct mlxsw_sp *mlxsw_sp;
+	struct net_device *br_dev;
+	u16 vid;
+
+	br_dev = netdev_master_upper_dev_get(vxlan_dev);
+	if (!br_dev)
+		return;
+
+	mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+	if (!mlxsw_sp)
+		return;
+
+	port_obj_info->handled = true;
+
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+	if (!bridge_device)
+		return;
+
+	if (!bridge_device->vlan_enabled)
+		return;
+
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++)
+		mlxsw_sp_switchdev_vxlan_vlan_del(mlxsw_sp, bridge_device,
+						  vxlan_dev, vid);
+}
+
+static int
+mlxsw_sp_switchdev_handle_vxlan_obj_add(struct net_device *vxlan_dev,
+					struct switchdev_notifier_port_obj_info *
+					port_obj_info)
+{
+	int err = 0;
+
+	switch (port_obj_info->obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev,
+							 port_obj_info);
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static void
+mlxsw_sp_switchdev_handle_vxlan_obj_del(struct net_device *vxlan_dev,
+					struct switchdev_notifier_port_obj_info *
+					port_obj_info)
+{
+	switch (port_obj_info->obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		mlxsw_sp_switchdev_vxlan_vlans_del(vxlan_dev, port_obj_info);
+		break;
+	default:
+		break;
+	}
+}
+
+static int mlxsw_sp_switchdev_blocking_event(struct notifier_block *unused,
+					     unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err = 0;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		if (netif_is_vxlan(dev))
+			err = mlxsw_sp_switchdev_handle_vxlan_obj_add(dev, ptr);
+		else
+			err = switchdev_handle_port_obj_add(dev, ptr,
+							mlxsw_sp_port_dev_check,
+							mlxsw_sp_port_obj_add);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_OBJ_DEL:
+		if (netif_is_vxlan(dev))
+			mlxsw_sp_switchdev_handle_vxlan_obj_del(dev, ptr);
+		else
+			err = switchdev_handle_port_obj_del(dev, ptr,
+							mlxsw_sp_port_dev_check,
+							mlxsw_sp_port_obj_del);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     mlxsw_sp_port_dev_check,
+						     mlxsw_sp_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block mlxsw_sp_switchdev_blocking_notifier = {
+	.notifier_call = mlxsw_sp_switchdev_blocking_event,
+};
+
 u8
 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port)
 {
@@ -2406,6 +3478,7 @@
 static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge;
+	struct notifier_block *nb;
 	int err;
 
 	err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME);
@@ -2420,17 +3493,33 @@
 		return err;
 	}
 
+	nb = &mlxsw_sp_switchdev_blocking_notifier;
+	err = register_switchdev_blocking_notifier(nb);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev blocking notifier\n");
+		goto err_register_switchdev_blocking_notifier;
+	}
+
 	INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
 	bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
 	mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
 	return 0;
+
+err_register_switchdev_blocking_notifier:
+	unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
+	return err;
 }
 
 static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw);
-	unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
+	struct notifier_block *nb;
 
+	cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw);
+
+	nb = &mlxsw_sp_switchdev_blocking_notifier;
+	unregister_switchdev_blocking_notifier(nb);
+
+	unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
 }
 
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
@@ -2458,11 +3547,3 @@
 	kfree(mlxsw_sp->bridge);
 }
 
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
-}
-
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
new file mode 100644
index 0000000..7c03b66
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c

@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <net/devlink.h>
+#include <uapi/linux/devlink.h>
+
+#include "core.h"
+#include "reg.h"
+#include "spectrum.h"
+
+#define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
+
+static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
+				      void *priv);
+
+#define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
+	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
+			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
+			     MLXSW_SP_TRAP_METADATA)
+
+#define MLXSW_SP_RXL_DISCARD(_id, _group_id)				      \
+	MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT,   \
+		  false, SP_##_group_id, DISCARD)
+
+static struct devlink_trap mlxsw_sp_traps_arr[] = {
+	MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
+};
+
+static struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
+	MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS),
+};
+
+/* Mapping between hardware trap and devlink trap. Multiple hardware traps can
+ * be mapped to the same devlink trap. Order is according to
+ * 'mlxsw_sp_listeners_arr'.
+ */
+static u16 mlxsw_sp_listener_devlink_map[] = {
+	DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
+	DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH,
+	DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
+	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
+	DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER,
+};
+
+static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+				u8 local_port,
+				struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_port_pcpu_stats *pcpu_stats;
+
+	if (unlikely(!mlxsw_sp_port)) {
+		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n",
+				     local_port);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	skb->dev = mlxsw_sp_port->dev;
+
+	pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats);
+	u64_stats_update_begin(&pcpu_stats->syncp);
+	pcpu_stats->rx_packets++;
+	pcpu_stats->rx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_stats->syncp);
+
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	return 0;
+}
+
+static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
+				      void *trap_ctx)
+{
+	struct devlink_port *in_devlink_port;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp;
+	struct devlink *devlink;
+
+	mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+
+	if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port))
+		return;
+
+	devlink = priv_to_devlink(mlxsw_sp->core);
+	in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
+							   local_port);
+	skb_push(skb, ETH_HLEN);
+	devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port);
+	consume_skb(skb);
+}
+
+int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) !=
+		    ARRAY_SIZE(mlxsw_sp_listeners_arr)))
+		return -EINVAL;
+
+	return devlink_traps_register(devlink, mlxsw_sp_traps_arr,
+				      ARRAY_SIZE(mlxsw_sp_traps_arr),
+				      mlxsw_sp);
+}
+
+void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	devlink_traps_unregister(devlink, mlxsw_sp_traps_arr,
+				 ARRAY_SIZE(mlxsw_sp_traps_arr));
+}
+
+int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core,
+		       const struct devlink_trap *trap, void *trap_ctx)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+		struct mlxsw_listener *listener;
+		int err;
+
+		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+			continue;
+		listener = &mlxsw_sp_listeners_arr[i];
+
+		err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core,
+			const struct devlink_trap *trap, void *trap_ctx)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+		struct mlxsw_listener *listener;
+
+		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+			continue;
+		listener = &mlxsw_sp_listeners_arr[i];
+
+		mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx);
+	}
+}
+
+int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core,
+			     const struct devlink_trap *trap,
+			     enum devlink_trap_action action)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+		enum mlxsw_reg_hpkt_action hw_action;
+		struct mlxsw_listener *listener;
+		int err;
+
+		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+			continue;
+		listener = &mlxsw_sp_listeners_arr[i];
+
+		switch (action) {
+		case DEVLINK_TRAP_ACTION_DROP:
+			hw_action = MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT;
+			break;
+		case DEVLINK_TRAP_ACTION_TRAP:
+			hw_action = MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		err = mlxsw_core_trap_action_set(mlxsw_core, listener,
+						 hw_action);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#define MLXSW_SP_DISCARD_POLICER_ID	(MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1)
+
+static int
+mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp,
+				 const struct devlink_trap_group *group)
+{
+	enum mlxsw_reg_qpcr_ir_units ir_units;
+	char qpcr_pl[MLXSW_REG_QPCR_LEN];
+	u16 policer_id;
+	u8 burst_size;
+	bool is_bytes;
+	u32 rate;
+
+	switch (group->id) {
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS:
+		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
+		ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
+		is_bytes = false;
+		rate = 10 * 1024; /* 10Kpps */
+		burst_size = 7;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mlxsw_reg_qpcr_pack(qpcr_pl, policer_id, ir_units, is_bytes, rate,
+			    burst_size);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl);
+}
+
+static int
+__mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp,
+			   const struct devlink_trap_group *group)
+{
+	char htgt_pl[MLXSW_REG_HTGT_LEN];
+	u8 priority, tc, group_id;
+	u16 policer_id;
+
+	switch (group->id) {
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS:
+		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS;
+		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
+		priority = 0;
+		tc = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mlxsw_reg_htgt_pack(htgt_pl, group_id, policer_id, priority, tc);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
+}
+
+int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
+			     const struct devlink_trap_group *group)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	int err;
+
+	err = mlxsw_sp_trap_group_policer_init(mlxsw_sp, group);
+	if (err)
+		return err;
+
+	err = __mlxsw_sp_trap_group_init(mlxsw_sp, group);
+	if (err)
+		return err;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
index bcf2e79..0d9356b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c

@@ -30,6 +30,7 @@
 	struct mlxsw_sib_port **ports;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
+	u8 hw_id[ETH_ALEN];
 };
 
 struct mlxsw_sib_port {
@@ -102,6 +103,18 @@
 	mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
 }
 
+static int mlxsw_sib_hw_id_get(struct mlxsw_sib *mlxsw_sib)
+{
+	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(spad), spad_pl);
+	if (err)
+		return err;
+	mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sib->hw_id);
+	return 0;
+}
+
 static int
 mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port,
 				bool is_up)
@@ -267,7 +280,9 @@
 {
 	int err;
 
-	err = mlxsw_core_port_init(mlxsw_sib->core, local_port);
+	err = mlxsw_core_port_init(mlxsw_sib->core, local_port,
+				   module + 1, false, 0,
+				   mlxsw_sib->hw_id, sizeof(mlxsw_sib->hw_id));
 	if (err) {
 		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n",
 			local_port);
@@ -439,6 +454,12 @@
 	mlxsw_sib->core = mlxsw_core;
 	mlxsw_sib->bus_info = mlxsw_bus_info;
 
+	err = mlxsw_sib_hw_id_get(mlxsw_sib);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Failed to get switch HW ID\n");
+		return err;
+	}
+
 	err = mlxsw_sib_ports_create(mlxsw_sib);
 	if (err) {
 		dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n");

diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 2d4f213..1c14c05 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c

@@ -11,7 +11,6 @@
 #include <linux/device.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
-#include <net/switchdev.h>
 
 #include "pci.h"
 #include "core.h"
@@ -300,6 +299,8 @@
 	u64 len;
 	int err;
 
+	memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
 	if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
 		return NETDEV_TX_BUSY;
 
@@ -380,14 +381,14 @@
 	stats->tx_dropped	= tx_dropped;
 }
 
-static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name,
-					    size_t len)
+static struct devlink_port *
+mlxsw_sx_port_get_devlink_port(struct net_device *dev)
 {
 	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
 
-	return mlxsw_core_port_get_phys_port_name(mlxsw_sx_port->mlxsw_sx->core,
-						  mlxsw_sx_port->local_port,
-						  name, len);
+	return mlxsw_core_port_devlink_port_get(mlxsw_sx->core,
+						mlxsw_sx_port->local_port);
 }
 
 static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
@@ -396,7 +397,7 @@
 	.ndo_start_xmit		= mlxsw_sx_port_xmit,
 	.ndo_change_mtu		= mlxsw_sx_port_change_mtu,
 	.ndo_get_stats64	= mlxsw_sx_port_get_stats64,
-	.ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name,
+	.ndo_get_devlink_port	= mlxsw_sx_port_get_devlink_port,
 };
 
 static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
@@ -636,12 +637,6 @@
 		.speed		= 50000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.supported	= SUPPORTED_56000baseKR4_Full,
-		.advertised	= ADVERTISED_56000baseKR4_Full,
-		.speed		= 56000,
-	},
-	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
@@ -901,28 +896,6 @@
 	.set_link_ksettings	= mlxsw_sx_port_set_link_ksettings,
 };
 
-static int mlxsw_sx_port_attr_get(struct net_device *dev,
-				  struct switchdev_attr *attr)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id);
-		memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
-static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = {
-	.switchdev_port_attr_get	= mlxsw_sx_port_attr_get,
-};
-
 static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx)
 {
 	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
@@ -1034,7 +1007,6 @@
 
 	dev->netdev_ops = &mlxsw_sx_port_netdev_ops;
 	dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops;
-	dev->switchdev_ops = &mlxsw_sx_port_switchdev_ops;
 
 	err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port);
 	if (err) {
@@ -1113,7 +1085,7 @@
 	}
 
 	mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
-				mlxsw_sx_port, dev, module + 1, false, 0);
+				mlxsw_sx_port, dev);
 	mlxsw_sx->ports[local_port] = mlxsw_sx_port;
 	return 0;
 
@@ -1138,7 +1110,9 @@
 {
 	int err;
 
-	err = mlxsw_core_port_init(mlxsw_sx->core, local_port);
+	err = mlxsw_core_port_init(mlxsw_sx->core, local_port,
+				   module + 1, false, 0,
+				   mlxsw_sx->hw_id, sizeof(mlxsw_sx->hw_id));
 	if (err) {
 		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
 			local_port);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 5302072..7618f08 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h

@@ -17,6 +17,8 @@
 	MLXSW_TRAP_ID_MVRP = 0x15,
 	MLXSW_TRAP_ID_RPVST = 0x16,
 	MLXSW_TRAP_ID_DHCP = 0x19,
+	MLXSW_TRAP_ID_PTP0 = 0x28,
+	MLXSW_TRAP_ID_PTP1 = 0x29,
 	MLXSW_TRAP_ID_IGMP_QUERY = 0x30,
 	MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31,
 	MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
@@ -24,6 +26,7 @@
 	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
 	MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
 	MLXSW_TRAP_ID_FID_MISS = 0x3D,
+	MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
 	MLXSW_TRAP_ID_ARPBC = 0x50,
 	MLXSW_TRAP_ID_ARPUC = 0x51,
 	MLXSW_TRAP_ID_MTUERROR = 0x52,
@@ -59,8 +62,17 @@
 	MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
 	MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
 	MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
+	MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8,
+	MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
+	MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140,
+	MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VTAG_ALLOW = 0x148,
+	MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VLAN = 0x149,
+	MLXSW_TRAP_ID_DISCARD_ING_SWITCH_STP = 0x14A,
+	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_UC = 0x150,
+	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_MC_NULL = 0x151,
+	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_LB = 0x152,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
 	/* Multicast trap used for routes with trap action */
 	MLXSW_TRAP_ID_ACL1 = 0x1C1,
@@ -73,6 +85,10 @@
 enum mlxsw_event_trap_id {
 	/* Port Up/Down event generated by hardware */
 	MLXSW_TRAP_ID_PUDE = 0x8,
+	/* PTP Ingress FIFO has a new entry */
+	MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
+	/* PTP Egress FIFO has a new entry */
+	MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E,
 };
 
 #endif /* _MLXSW_TRAP_H */

diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig
index b7e2f49..b9c4d48 100644
--- a/drivers/net/ethernet/micrel/Kconfig
+++ b/drivers/net/ethernet/micrel/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Micrel device configuration
 #
@@ -5,8 +6,7 @@
 config NET_VENDOR_MICREL
 	bool "Micrel devices"
 	default y
-	depends on (HAS_IOMEM && DMA_ENGINE) || SPI || PCI || HAS_IOMEM || \
-		   (ARM && ARCH_KS8695)
+	depends on (HAS_IOMEM && DMA_ENGINE) || SPI || PCI || HAS_IOMEM
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -17,14 +17,6 @@
 
 if NET_VENDOR_MICREL
 
-config ARM_KS8695_ETHER
-	tristate "KS8695 Ethernet support"
-	depends on ARM && ARCH_KS8695
-	select MII
-	---help---
-	  If you wish to compile a kernel for the KS8695 and want to
-	  use the internal ethernet then you should answer Y to this.
-
 config KS8842
 	tristate "Micrel KSZ8841/42 with generic bus interface"
 	depends on HAS_IOMEM && DMA_ENGINE

diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile
index 848fc1c..6d8ac55 100644
--- a/drivers/net/ethernet/micrel/Makefile
+++ b/drivers/net/ethernet/micrel/Makefile

@@ -3,7 +3,6 @@
 # Makefile for the Micrel network device drivers.
 #
 
-obj-$(CONFIG_ARM_KS8695_ETHER) += ks8695net.o
 obj-$(CONFIG_KS8842) += ks8842.o
 obj-$(CONFIG_KS8851) += ks8851.o
 obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o

diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c
deleted file mode 100644
index bd51e05..0000000
--- a/drivers/net/ethernet/micrel/ks8695net.c
+++ /dev/null

@@ -1,1641 +0,0 @@
-/*
- * Micrel KS8695 (Centaur) Ethernet.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
- * General Public License for more details.
- *
- * Copyright 2008 Simtec Electronics
- *		  Daniel Silverstone <dsilvers@simtec.co.uk>
- *		  Vincent Sanders <vince@simtec.co.uk>
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/interrupt.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/crc32.h>
-#include <linux/mii.h>
-#include <linux/ethtool.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-
-#include <asm/irq.h>
-
-#include <mach/regs-switch.h>
-#include <mach/regs-misc.h>
-#include <asm/mach/irq.h>
-#include <mach/regs-irq.h>
-
-#include "ks8695net.h"
-
-#define MODULENAME	"ks8695_ether"
-#define MODULEVERSION	"1.02"
-
-/*
- * Transmit and device reset timeout, default 5 seconds.
- */
-static int watchdog = 5000;
-
-/* Hardware structures */
-
-/**
- *	struct rx_ring_desc - Receive descriptor ring element
- *	@status: The status of the descriptor element (E.g. who owns it)
- *	@length: The number of bytes in the block pointed to by data_ptr
- *	@data_ptr: The physical address of the data block to receive into
- *	@next_desc: The physical address of the next descriptor element.
- */
-struct rx_ring_desc {
-	__le32	status;
-	__le32	length;
-	__le32	data_ptr;
-	__le32	next_desc;
-};
-
-/**
- *	struct tx_ring_desc - Transmit descriptor ring element
- *	@owner: Who owns the descriptor
- *	@status: The number of bytes in the block pointed to by data_ptr
- *	@data_ptr: The physical address of the data block to receive into
- *	@next_desc: The physical address of the next descriptor element.
- */
-struct tx_ring_desc {
-	__le32	owner;
-	__le32	status;
-	__le32	data_ptr;
-	__le32	next_desc;
-};
-
-/**
- *	struct ks8695_skbuff - sk_buff wrapper for rx/tx rings.
- *	@skb: The buffer in the ring
- *	@dma_ptr: The mapped DMA pointer of the buffer
- *	@length: The number of bytes mapped to dma_ptr
- */
-struct ks8695_skbuff {
-	struct sk_buff	*skb;
-	dma_addr_t	dma_ptr;
-	u32		length;
-};
-
-/* Private device structure */
-
-#define MAX_TX_DESC 8
-#define MAX_TX_DESC_MASK 0x7
-#define MAX_RX_DESC 16
-#define MAX_RX_DESC_MASK 0xf
-
-/*napi_weight have better more than rx DMA buffers*/
-#define NAPI_WEIGHT   64
-
-#define MAX_RXBUF_SIZE 0x700
-
-#define TX_RING_DMA_SIZE (sizeof(struct tx_ring_desc) * MAX_TX_DESC)
-#define RX_RING_DMA_SIZE (sizeof(struct rx_ring_desc) * MAX_RX_DESC)
-#define RING_DMA_SIZE (TX_RING_DMA_SIZE + RX_RING_DMA_SIZE)
-
-/**
- *	enum ks8695_dtype - Device type
- *	@KS8695_DTYPE_WAN: This device is a WAN interface
- *	@KS8695_DTYPE_LAN: This device is a LAN interface
- *	@KS8695_DTYPE_HPNA: This device is an HPNA interface
- */
-enum ks8695_dtype {
-	KS8695_DTYPE_WAN,
-	KS8695_DTYPE_LAN,
-	KS8695_DTYPE_HPNA,
-};
-
-/**
- *	struct ks8695_priv - Private data for the KS8695 Ethernet
- *	@in_suspend: Flag to indicate if we're suspending/resuming
- *	@ndev: The net_device for this interface
- *	@dev: The platform device object for this interface
- *	@dtype: The type of this device
- *	@io_regs: The ioremapped registers for this interface
- *      @napi : Add support NAPI for Rx
- *	@rx_irq_name: The textual name of the RX IRQ from the platform data
- *	@tx_irq_name: The textual name of the TX IRQ from the platform data
- *	@link_irq_name: The textual name of the link IRQ from the
- *			platform data if available
- *	@rx_irq: The IRQ number for the RX IRQ
- *	@tx_irq: The IRQ number for the TX IRQ
- *	@link_irq: The IRQ number for the link IRQ if available
- *	@regs_req: The resource request for the registers region
- *	@phyiface_req: The resource request for the phy/switch region
- *		       if available
- *	@phyiface_regs: The ioremapped registers for the phy/switch if available
- *	@ring_base: The base pointer of the dma coherent memory for the rings
- *	@ring_base_dma: The DMA mapped equivalent of ring_base
- *	@tx_ring: The pointer in ring_base of the TX ring
- *	@tx_ring_used: The number of slots in the TX ring which are occupied
- *	@tx_ring_next_slot: The next slot to fill in the TX ring
- *	@tx_ring_dma: The DMA mapped equivalent of tx_ring
- *	@tx_buffers: The sk_buff mappings for the TX ring
- *	@txq_lock: A lock to protect the tx_buffers tx_ring_used etc variables
- *	@rx_ring: The pointer in ring_base of the RX ring
- *	@rx_ring_dma: The DMA mapped equivalent of rx_ring
- *	@rx_buffers: The sk_buff mappings for the RX ring
- *	@next_rx_desc_read: The next RX descriptor to read from on IRQ
- *      @rx_lock: A lock to protect Rx irq function
- *	@msg_enable: The flags for which messages to emit
- */
-struct ks8695_priv {
-	int in_suspend;
-	struct net_device *ndev;
-	struct device *dev;
-	enum ks8695_dtype dtype;
-	void __iomem *io_regs;
-
-	struct napi_struct	napi;
-
-	const char *rx_irq_name, *tx_irq_name, *link_irq_name;
-	int rx_irq, tx_irq, link_irq;
-
-	struct resource *regs_req, *phyiface_req;
-	void __iomem *phyiface_regs;
-
-	void *ring_base;
-	dma_addr_t ring_base_dma;
-
-	struct tx_ring_desc *tx_ring;
-	int tx_ring_used;
-	int tx_ring_next_slot;
-	dma_addr_t tx_ring_dma;
-	struct ks8695_skbuff tx_buffers[MAX_TX_DESC];
-	spinlock_t txq_lock;
-
-	struct rx_ring_desc *rx_ring;
-	dma_addr_t rx_ring_dma;
-	struct ks8695_skbuff rx_buffers[MAX_RX_DESC];
-	int next_rx_desc_read;
-	spinlock_t rx_lock;
-
-	int msg_enable;
-};
-
-/* Register access */
-
-/**
- *	ks8695_readreg - Read from a KS8695 ethernet register
- *	@ksp: The device to read from
- *	@reg: The register to read
- */
-static inline u32
-ks8695_readreg(struct ks8695_priv *ksp, int reg)
-{
-	return readl(ksp->io_regs + reg);
-}
-
-/**
- *	ks8695_writereg - Write to a KS8695 ethernet register
- *	@ksp: The device to write to
- *	@reg: The register to write
- *	@value: The value to write to the register
- */
-static inline void
-ks8695_writereg(struct ks8695_priv *ksp, int reg, u32 value)
-{
-	writel(value, ksp->io_regs + reg);
-}
-
-/* Utility functions */
-
-/**
- *	ks8695_port_type - Retrieve port-type as user-friendly string
- *	@ksp: The device to return the type for
- *
- *	Returns a string indicating which of the WAN, LAN or HPNA
- *	ports this device is likely to represent.
- */
-static const char *
-ks8695_port_type(struct ks8695_priv *ksp)
-{
-	switch (ksp->dtype) {
-	case KS8695_DTYPE_LAN:
-		return "LAN";
-	case KS8695_DTYPE_WAN:
-		return "WAN";
-	case KS8695_DTYPE_HPNA:
-		return "HPNA";
-	}
-
-	return "UNKNOWN";
-}
-
-/**
- *	ks8695_update_mac - Update the MAC registers in the device
- *	@ksp: The device to update
- *
- *	Updates the MAC registers in the KS8695 device from the address in the
- *	net_device structure associated with this interface.
- */
-static void
-ks8695_update_mac(struct ks8695_priv *ksp)
-{
-	/* Update the HW with the MAC from the net_device */
-	struct net_device *ndev = ksp->ndev;
-	u32 machigh, maclow;
-
-	maclow	= ((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) |
-		   (ndev->dev_addr[4] <<  8) | (ndev->dev_addr[5] <<  0));
-	machigh = ((ndev->dev_addr[0] <<  8) | (ndev->dev_addr[1] <<  0));
-
-	ks8695_writereg(ksp, KS8695_MAL, maclow);
-	ks8695_writereg(ksp, KS8695_MAH, machigh);
-
-}
-
-/**
- *	ks8695_refill_rxbuffers - Re-fill the RX buffer ring
- *	@ksp: The device to refill
- *
- *	Iterates the RX ring of the device looking for empty slots.
- *	For each empty slot, we allocate and map a new SKB and give it
- *	to the hardware.
- *	This can be called from interrupt context safely.
- */
-static void
-ks8695_refill_rxbuffers(struct ks8695_priv *ksp)
-{
-	/* Run around the RX ring, filling in any missing sk_buff's */
-	int buff_n;
-
-	for (buff_n = 0; buff_n < MAX_RX_DESC; ++buff_n) {
-		if (!ksp->rx_buffers[buff_n].skb) {
-			struct sk_buff *skb =
-				netdev_alloc_skb(ksp->ndev, MAX_RXBUF_SIZE);
-			dma_addr_t mapping;
-
-			ksp->rx_buffers[buff_n].skb = skb;
-			if (skb == NULL) {
-				/* Failed to allocate one, perhaps
-				 * we'll try again later.
-				 */
-				break;
-			}
-
-			mapping = dma_map_single(ksp->dev, skb->data,
-						 MAX_RXBUF_SIZE,
-						 DMA_FROM_DEVICE);
-			if (unlikely(dma_mapping_error(ksp->dev, mapping))) {
-				/* Failed to DMA map this SKB, try later */
-				dev_kfree_skb_irq(skb);
-				ksp->rx_buffers[buff_n].skb = NULL;
-				break;
-			}
-			ksp->rx_buffers[buff_n].dma_ptr = mapping;
-			ksp->rx_buffers[buff_n].length = MAX_RXBUF_SIZE;
-
-			/* Record this into the DMA ring */
-			ksp->rx_ring[buff_n].data_ptr = cpu_to_le32(mapping);
-			ksp->rx_ring[buff_n].length =
-				cpu_to_le32(MAX_RXBUF_SIZE);
-
-			wmb();
-
-			/* And give ownership over to the hardware */
-			ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN);
-		}
-	}
-}
-
-/* Maximum number of multicast addresses which the KS8695 HW supports */
-#define KS8695_NR_ADDRESSES	16
-
-/**
- *	ks8695_init_partial_multicast - Init the mcast addr registers
- *	@ksp: The device to initialise
- *	@addr: The multicast address list to use
- *	@nr_addr: The number of addresses in the list
- *
- *	This routine is a helper for ks8695_set_multicast - it writes
- *	the additional-address registers in the KS8695 ethernet device
- *	and cleans up any others left behind.
- */
-static void
-ks8695_init_partial_multicast(struct ks8695_priv *ksp,
-			      struct net_device *ndev)
-{
-	u32 low, high;
-	int i;
-	struct netdev_hw_addr *ha;
-
-	i = 0;
-	netdev_for_each_mc_addr(ha, ndev) {
-		/* Ran out of space in chip? */
-		BUG_ON(i == KS8695_NR_ADDRESSES);
-
-		low = (ha->addr[2] << 24) | (ha->addr[3] << 16) |
-		      (ha->addr[4] << 8) | (ha->addr[5]);
-		high = (ha->addr[0] << 8) | (ha->addr[1]);
-
-		ks8695_writereg(ksp, KS8695_AAL_(i), low);
-		ks8695_writereg(ksp, KS8695_AAH_(i), AAH_E | high);
-		i++;
-	}
-
-	/* Clear the remaining Additional Station Addresses */
-	for (; i < KS8695_NR_ADDRESSES; i++) {
-		ks8695_writereg(ksp, KS8695_AAL_(i), 0);
-		ks8695_writereg(ksp, KS8695_AAH_(i), 0);
-	}
-}
-
-/* Interrupt handling */
-
-/**
- *	ks8695_tx_irq - Transmit IRQ handler
- *	@irq: The IRQ which went off (ignored)
- *	@dev_id: The net_device for the interrupt
- *
- *	Process the TX ring, clearing out any transmitted slots.
- *	Allows the net_device to pass us new packets once slots are
- *	freed.
- */
-static irqreturn_t
-ks8695_tx_irq(int irq, void *dev_id)
-{
-	struct net_device *ndev = (struct net_device *)dev_id;
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	int buff_n;
-
-	for (buff_n = 0; buff_n < MAX_TX_DESC; ++buff_n) {
-		if (ksp->tx_buffers[buff_n].skb &&
-		    !(ksp->tx_ring[buff_n].owner & cpu_to_le32(TDES_OWN))) {
-			rmb();
-			/* An SKB which is not owned by HW is present */
-			/* Update the stats for the net_device */
-			ndev->stats.tx_packets++;
-			ndev->stats.tx_bytes += ksp->tx_buffers[buff_n].length;
-
-			/* Free the packet from the ring */
-			ksp->tx_ring[buff_n].data_ptr = 0;
-
-			/* Free the sk_buff */
-			dma_unmap_single(ksp->dev,
-					 ksp->tx_buffers[buff_n].dma_ptr,
-					 ksp->tx_buffers[buff_n].length,
-					 DMA_TO_DEVICE);
-			dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb);
-			ksp->tx_buffers[buff_n].skb = NULL;
-			ksp->tx_ring_used--;
-		}
-	}
-
-	netif_wake_queue(ndev);
-
-	return IRQ_HANDLED;
-}
-
-/**
- *	ks8695_get_rx_enable_bit - Get rx interrupt enable/status bit
- *	@ksp: Private data for the KS8695 Ethernet
- *
- *    For KS8695 document:
- *    Interrupt Enable Register (offset 0xE204)
- *        Bit29 : WAN MAC Receive Interrupt Enable
- *        Bit16 : LAN MAC Receive Interrupt Enable
- *    Interrupt Status Register (Offset 0xF208)
- *        Bit29: WAN MAC Receive Status
- *        Bit16: LAN MAC Receive Status
- *    So, this Rx interrupt enable/status bit number is equal
- *    as Rx IRQ number.
- */
-static inline u32 ks8695_get_rx_enable_bit(struct ks8695_priv *ksp)
-{
-	return ksp->rx_irq;
-}
-
-/**
- *	ks8695_rx_irq - Receive IRQ handler
- *	@irq: The IRQ which went off (ignored)
- *	@dev_id: The net_device for the interrupt
- *
- *	Inform NAPI that packet reception needs to be scheduled
- */
-
-static irqreturn_t
-ks8695_rx_irq(int irq, void *dev_id)
-{
-	struct net_device *ndev = (struct net_device *)dev_id;
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	spin_lock(&ksp->rx_lock);
-
-	if (napi_schedule_prep(&ksp->napi)) {
-		unsigned long status = readl(KS8695_IRQ_VA + KS8695_INTEN);
-		unsigned long mask_bit = 1 << ks8695_get_rx_enable_bit(ksp);
-		/*disable rx interrupt*/
-		status &= ~mask_bit;
-		writel(status , KS8695_IRQ_VA + KS8695_INTEN);
-		__napi_schedule(&ksp->napi);
-	}
-
-	spin_unlock(&ksp->rx_lock);
-	return IRQ_HANDLED;
-}
-
-/**
- *	ks8695_rx - Receive packets called by NAPI poll method
- *	@ksp: Private data for the KS8695 Ethernet
- *	@budget: Number of packets allowed to process
- */
-static int ks8695_rx(struct ks8695_priv *ksp, int budget)
-{
-	struct net_device *ndev = ksp->ndev;
-	struct sk_buff *skb;
-	int buff_n;
-	u32 flags;
-	int pktlen;
-	int received = 0;
-
-	buff_n = ksp->next_rx_desc_read;
-	while (received < budget
-			&& ksp->rx_buffers[buff_n].skb
-			&& (!(ksp->rx_ring[buff_n].status &
-					cpu_to_le32(RDES_OWN)))) {
-			rmb();
-			flags = le32_to_cpu(ksp->rx_ring[buff_n].status);
-
-			/* Found an SKB which we own, this means we
-			 * received a packet
-			 */
-			if ((flags & (RDES_FS | RDES_LS)) !=
-			    (RDES_FS | RDES_LS)) {
-				/* This packet is not the first and
-				 * the last segment.  Therefore it is
-				 * a "spanning" packet and we can't
-				 * handle it
-				 */
-				goto rx_failure;
-			}
-
-			if (flags & (RDES_ES | RDES_RE)) {
-				/* It's an error packet */
-				ndev->stats.rx_errors++;
-				if (flags & RDES_TL)
-					ndev->stats.rx_length_errors++;
-				if (flags & RDES_RF)
-					ndev->stats.rx_length_errors++;
-				if (flags & RDES_CE)
-					ndev->stats.rx_crc_errors++;
-				if (flags & RDES_RE)
-					ndev->stats.rx_missed_errors++;
-
-				goto rx_failure;
-			}
-
-			pktlen = flags & RDES_FLEN;
-			pktlen -= 4; /* Drop the CRC */
-
-			/* Retrieve the sk_buff */
-			skb = ksp->rx_buffers[buff_n].skb;
-
-			/* Clear it from the ring */
-			ksp->rx_buffers[buff_n].skb = NULL;
-			ksp->rx_ring[buff_n].data_ptr = 0;
-
-			/* Unmap the SKB */
-			dma_unmap_single(ksp->dev,
-					 ksp->rx_buffers[buff_n].dma_ptr,
-					 ksp->rx_buffers[buff_n].length,
-					 DMA_FROM_DEVICE);
-
-			/* Relinquish the SKB to the network layer */
-			skb_put(skb, pktlen);
-			skb->protocol = eth_type_trans(skb, ndev);
-			napi_gro_receive(&ksp->napi, skb);
-
-			/* Record stats */
-			ndev->stats.rx_packets++;
-			ndev->stats.rx_bytes += pktlen;
-			goto rx_finished;
-
-rx_failure:
-			/* This ring entry is an error, but we can
-			 * re-use the skb
-			 */
-			/* Give the ring entry back to the hardware */
-			ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN);
-rx_finished:
-			received++;
-			buff_n = (buff_n + 1) & MAX_RX_DESC_MASK;
-	}
-
-	/* And note which RX descriptor we last did */
-	ksp->next_rx_desc_read = buff_n;
-
-	/* And refill the buffers */
-	ks8695_refill_rxbuffers(ksp);
-
-	/* Kick the RX DMA engine, in case it became suspended */
-	ks8695_writereg(ksp, KS8695_DRSC, 0);
-
-	return received;
-}
-
-
-/**
- *	ks8695_poll - Receive packet by NAPI poll method
- *	@ksp: Private data for the KS8695 Ethernet
- *	@budget: The remaining number packets for network subsystem
- *
- *     Invoked by the network core when it requests for new
- *     packets from the driver
- */
-static int ks8695_poll(struct napi_struct *napi, int budget)
-{
-	struct ks8695_priv *ksp = container_of(napi, struct ks8695_priv, napi);
-	unsigned long isr = readl(KS8695_IRQ_VA + KS8695_INTEN);
-	unsigned long mask_bit = 1 << ks8695_get_rx_enable_bit(ksp);
-	int work_done;
-
-	work_done = ks8695_rx(ksp, budget);
-
-	if (work_done < budget && napi_complete_done(napi, work_done)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&ksp->rx_lock, flags);
-		/* enable rx interrupt */
-		writel(isr | mask_bit, KS8695_IRQ_VA + KS8695_INTEN);
-		spin_unlock_irqrestore(&ksp->rx_lock, flags);
-	}
-	return work_done;
-}
-
-/**
- *	ks8695_link_irq - Link change IRQ handler
- *	@irq: The IRQ which went off (ignored)
- *	@dev_id: The net_device for the interrupt
- *
- *	The WAN interface can generate an IRQ when the link changes,
- *	report this to the net layer and the user.
- */
-static irqreturn_t
-ks8695_link_irq(int irq, void *dev_id)
-{
-	struct net_device *ndev = (struct net_device *)dev_id;
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-
-	ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-	if (ctrl & WMC_WLS) {
-		netif_carrier_on(ndev);
-		if (netif_msg_link(ksp))
-			dev_info(ksp->dev,
-				 "%s: Link is now up (10%sMbps/%s-duplex)\n",
-				 ndev->name,
-				 (ctrl & WMC_WSS) ? "0" : "",
-				 (ctrl & WMC_WDS) ? "Full" : "Half");
-	} else {
-		netif_carrier_off(ndev);
-		if (netif_msg_link(ksp))
-			dev_info(ksp->dev, "%s: Link is now down.\n",
-				 ndev->name);
-	}
-
-	return IRQ_HANDLED;
-}
-
-
-/* KS8695 Device functions */
-
-/**
- *	ks8695_reset - Reset a KS8695 ethernet interface
- *	@ksp: The interface to reset
- *
- *	Perform an engine reset of the interface and re-program it
- *	with sensible defaults.
- */
-static void
-ks8695_reset(struct ks8695_priv *ksp)
-{
-	int reset_timeout = watchdog;
-	/* Issue the reset via the TX DMA control register */
-	ks8695_writereg(ksp, KS8695_DTXC, DTXC_TRST);
-	while (reset_timeout--) {
-		if (!(ks8695_readreg(ksp, KS8695_DTXC) & DTXC_TRST))
-			break;
-		msleep(1);
-	}
-
-	if (reset_timeout < 0) {
-		dev_crit(ksp->dev,
-			 "Timeout waiting for DMA engines to reset\n");
-		/* And blithely carry on */
-	}
-
-	/* Definitely wait long enough before attempting to program
-	 * the engines
-	 */
-	msleep(10);
-
-	/* RX: unicast and broadcast */
-	ks8695_writereg(ksp, KS8695_DRXC, DRXC_RU | DRXC_RB);
-	/* TX: pad and add CRC */
-	ks8695_writereg(ksp, KS8695_DTXC, DTXC_TEP | DTXC_TAC);
-}
-
-/**
- *	ks8695_shutdown - Shut down a KS8695 ethernet interface
- *	@ksp: The interface to shut down
- *
- *	This disables packet RX/TX, cleans up IRQs, drains the rings,
- *	and basically places the interface into a clean shutdown
- *	state.
- */
-static void
-ks8695_shutdown(struct ks8695_priv *ksp)
-{
-	u32 ctrl;
-	int buff_n;
-
-	/* Disable packet transmission */
-	ctrl = ks8695_readreg(ksp, KS8695_DTXC);
-	ks8695_writereg(ksp, KS8695_DTXC, ctrl & ~DTXC_TE);
-
-	/* Disable packet reception */
-	ctrl = ks8695_readreg(ksp, KS8695_DRXC);
-	ks8695_writereg(ksp, KS8695_DRXC, ctrl & ~DRXC_RE);
-
-	/* Release the IRQs */
-	free_irq(ksp->rx_irq, ksp->ndev);
-	free_irq(ksp->tx_irq, ksp->ndev);
-	if (ksp->link_irq != -1)
-		free_irq(ksp->link_irq, ksp->ndev);
-
-	/* Throw away any pending TX packets */
-	for (buff_n = 0; buff_n < MAX_TX_DESC; ++buff_n) {
-		if (ksp->tx_buffers[buff_n].skb) {
-			/* Remove this SKB from the TX ring */
-			ksp->tx_ring[buff_n].owner = 0;
-			ksp->tx_ring[buff_n].status = 0;
-			ksp->tx_ring[buff_n].data_ptr = 0;
-
-			/* Unmap and bin this SKB */
-			dma_unmap_single(ksp->dev,
-					 ksp->tx_buffers[buff_n].dma_ptr,
-					 ksp->tx_buffers[buff_n].length,
-					 DMA_TO_DEVICE);
-			dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb);
-			ksp->tx_buffers[buff_n].skb = NULL;
-		}
-	}
-
-	/* Purge the RX buffers */
-	for (buff_n = 0; buff_n < MAX_RX_DESC; ++buff_n) {
-		if (ksp->rx_buffers[buff_n].skb) {
-			/* Remove the SKB from the RX ring */
-			ksp->rx_ring[buff_n].status = 0;
-			ksp->rx_ring[buff_n].data_ptr = 0;
-
-			/* Unmap and bin the SKB */
-			dma_unmap_single(ksp->dev,
-					 ksp->rx_buffers[buff_n].dma_ptr,
-					 ksp->rx_buffers[buff_n].length,
-					 DMA_FROM_DEVICE);
-			dev_kfree_skb_irq(ksp->rx_buffers[buff_n].skb);
-			ksp->rx_buffers[buff_n].skb = NULL;
-		}
-	}
-}
-
-
-/**
- *	ks8695_setup_irq - IRQ setup helper function
- *	@irq: The IRQ number to claim
- *	@irq_name: The name to give the IRQ claimant
- *	@handler: The function to call to handle the IRQ
- *	@ndev: The net_device to pass in as the dev_id argument to the handler
- *
- *	Return 0 on success.
- */
-static int
-ks8695_setup_irq(int irq, const char *irq_name,
-		 irq_handler_t handler, struct net_device *ndev)
-{
-	int ret;
-
-	ret = request_irq(irq, handler, IRQF_SHARED, irq_name, ndev);
-
-	if (ret) {
-		dev_err(&ndev->dev, "failure to request IRQ %d\n", irq);
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- *	ks8695_init_net - Initialise a KS8695 ethernet interface
- *	@ksp: The interface to initialise
- *
- *	This routine fills the RX ring, initialises the DMA engines,
- *	allocates the IRQs and then starts the packet TX and RX
- *	engines.
- */
-static int
-ks8695_init_net(struct ks8695_priv *ksp)
-{
-	int ret;
-	u32 ctrl;
-
-	ks8695_refill_rxbuffers(ksp);
-
-	/* Initialise the DMA engines */
-	ks8695_writereg(ksp, KS8695_RDLB, (u32) ksp->rx_ring_dma);
-	ks8695_writereg(ksp, KS8695_TDLB, (u32) ksp->tx_ring_dma);
-
-	/* Request the IRQs */
-	ret = ks8695_setup_irq(ksp->rx_irq, ksp->rx_irq_name,
-			       ks8695_rx_irq, ksp->ndev);
-	if (ret)
-		return ret;
-	ret = ks8695_setup_irq(ksp->tx_irq, ksp->tx_irq_name,
-			       ks8695_tx_irq, ksp->ndev);
-	if (ret)
-		return ret;
-	if (ksp->link_irq != -1) {
-		ret = ks8695_setup_irq(ksp->link_irq, ksp->link_irq_name,
-				       ks8695_link_irq, ksp->ndev);
-		if (ret)
-			return ret;
-	}
-
-	/* Set up the ring indices */
-	ksp->next_rx_desc_read = 0;
-	ksp->tx_ring_next_slot = 0;
-	ksp->tx_ring_used = 0;
-
-	/* Bring up transmission */
-	ctrl = ks8695_readreg(ksp, KS8695_DTXC);
-	/* Enable packet transmission */
-	ks8695_writereg(ksp, KS8695_DTXC, ctrl | DTXC_TE);
-
-	/* Bring up the reception */
-	ctrl = ks8695_readreg(ksp, KS8695_DRXC);
-	/* Enable packet reception */
-	ks8695_writereg(ksp, KS8695_DRXC, ctrl | DRXC_RE);
-	/* And start the DMA engine */
-	ks8695_writereg(ksp, KS8695_DRSC, 0);
-
-	/* All done */
-	return 0;
-}
-
-/**
- *	ks8695_release_device - HW resource release for KS8695 e-net
- *	@ksp: The device to be freed
- *
- *	This unallocates io memory regions, dma-coherent regions etc
- *	which were allocated in ks8695_probe.
- */
-static void
-ks8695_release_device(struct ks8695_priv *ksp)
-{
-	/* Unmap the registers */
-	iounmap(ksp->io_regs);
-	if (ksp->phyiface_regs)
-		iounmap(ksp->phyiface_regs);
-
-	/* And release the request */
-	release_resource(ksp->regs_req);
-	kfree(ksp->regs_req);
-	if (ksp->phyiface_req) {
-		release_resource(ksp->phyiface_req);
-		kfree(ksp->phyiface_req);
-	}
-
-	/* Free the ring buffers */
-	dma_free_coherent(ksp->dev, RING_DMA_SIZE,
-			  ksp->ring_base, ksp->ring_base_dma);
-}
-
-/* Ethtool support */
-
-/**
- *	ks8695_get_msglevel - Get the messages enabled for emission
- *	@ndev: The network device to read from
- */
-static u32
-ks8695_get_msglevel(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	return ksp->msg_enable;
-}
-
-/**
- *	ks8695_set_msglevel - Set the messages enabled for emission
- *	@ndev: The network device to configure
- *	@value: The messages to set for emission
- */
-static void
-ks8695_set_msglevel(struct net_device *ndev, u32 value)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	ksp->msg_enable = value;
-}
-
-/**
- *	ks8695_wan_get_link_ksettings - Get device-specific settings.
- *	@ndev: The network device to read settings from
- *	@cmd: The ethtool structure to read into
- */
-static int
-ks8695_wan_get_link_ksettings(struct net_device *ndev,
-			      struct ethtool_link_ksettings *cmd)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-	u32 supported, advertising;
-
-	/* All ports on the KS8695 support these... */
-	supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
-			  SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
-			  SUPPORTED_TP | SUPPORTED_MII);
-
-	advertising = ADVERTISED_TP | ADVERTISED_MII;
-	cmd->base.port = PORT_MII;
-	supported |= (SUPPORTED_Autoneg | SUPPORTED_Pause);
-	cmd->base.phy_address = 0;
-
-	ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-	if ((ctrl & WMC_WAND) == 0) {
-		/* auto-negotiation is enabled */
-		advertising |= ADVERTISED_Autoneg;
-		if (ctrl & WMC_WANA100F)
-			advertising |= ADVERTISED_100baseT_Full;
-		if (ctrl & WMC_WANA100H)
-			advertising |= ADVERTISED_100baseT_Half;
-		if (ctrl & WMC_WANA10F)
-			advertising |= ADVERTISED_10baseT_Full;
-		if (ctrl & WMC_WANA10H)
-			advertising |= ADVERTISED_10baseT_Half;
-		if (ctrl & WMC_WANAP)
-			advertising |= ADVERTISED_Pause;
-		cmd->base.autoneg = AUTONEG_ENABLE;
-
-		cmd->base.speed = (ctrl & WMC_WSS) ? SPEED_100 : SPEED_10;
-		cmd->base.duplex = (ctrl & WMC_WDS) ?
-			DUPLEX_FULL : DUPLEX_HALF;
-	} else {
-		/* auto-negotiation is disabled */
-		cmd->base.autoneg = AUTONEG_DISABLE;
-
-		cmd->base.speed = (ctrl & WMC_WANF100) ?
-					    SPEED_100 : SPEED_10;
-		cmd->base.duplex = (ctrl & WMC_WANFF) ?
-			DUPLEX_FULL : DUPLEX_HALF;
-	}
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
-
-	return 0;
-}
-
-/**
- *	ks8695_wan_set_link_ksettings - Set device-specific settings.
- *	@ndev: The network device to configure
- *	@cmd: The settings to configure
- */
-static int
-ks8695_wan_set_link_ksettings(struct net_device *ndev,
-			      const struct ethtool_link_ksettings *cmd)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-	u32 advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
-
-	if ((cmd->base.speed != SPEED_10) && (cmd->base.speed != SPEED_100))
-		return -EINVAL;
-	if ((cmd->base.duplex != DUPLEX_HALF) &&
-	    (cmd->base.duplex != DUPLEX_FULL))
-		return -EINVAL;
-	if (cmd->base.port != PORT_MII)
-		return -EINVAL;
-	if ((cmd->base.autoneg != AUTONEG_DISABLE) &&
-	    (cmd->base.autoneg != AUTONEG_ENABLE))
-		return -EINVAL;
-
-	if (cmd->base.autoneg == AUTONEG_ENABLE) {
-		if ((advertising & (ADVERTISED_10baseT_Half |
-				ADVERTISED_10baseT_Full |
-				ADVERTISED_100baseT_Half |
-				ADVERTISED_100baseT_Full)) == 0)
-			return -EINVAL;
-
-		ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-
-		ctrl &= ~(WMC_WAND | WMC_WANA100F | WMC_WANA100H |
-			  WMC_WANA10F | WMC_WANA10H);
-		if (advertising & ADVERTISED_100baseT_Full)
-			ctrl |= WMC_WANA100F;
-		if (advertising & ADVERTISED_100baseT_Half)
-			ctrl |= WMC_WANA100H;
-		if (advertising & ADVERTISED_10baseT_Full)
-			ctrl |= WMC_WANA10F;
-		if (advertising & ADVERTISED_10baseT_Half)
-			ctrl |= WMC_WANA10H;
-
-		/* force a re-negotiation */
-		ctrl |= WMC_WANR;
-		writel(ctrl, ksp->phyiface_regs + KS8695_WMC);
-	} else {
-		ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-
-		/* disable auto-negotiation */
-		ctrl |= WMC_WAND;
-		ctrl &= ~(WMC_WANF100 | WMC_WANFF);
-
-		if (cmd->base.speed == SPEED_100)
-			ctrl |= WMC_WANF100;
-		if (cmd->base.duplex == DUPLEX_FULL)
-			ctrl |= WMC_WANFF;
-
-		writel(ctrl, ksp->phyiface_regs + KS8695_WMC);
-	}
-
-	return 0;
-}
-
-/**
- *	ks8695_wan_nwayreset - Restart the autonegotiation on the port.
- *	@ndev: The network device to restart autoneotiation on
- */
-static int
-ks8695_wan_nwayreset(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-
-	ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-
-	if ((ctrl & WMC_WAND) == 0)
-		writel(ctrl | WMC_WANR,
-		       ksp->phyiface_regs + KS8695_WMC);
-	else
-		/* auto-negotiation not enabled */
-		return -EINVAL;
-
-	return 0;
-}
-
-/**
- *	ks8695_wan_get_pause - Retrieve network pause/flow-control advertising
- *	@ndev: The device to retrieve settings from
- *	@param: The structure to fill out with the information
- */
-static void
-ks8695_wan_get_pause(struct net_device *ndev, struct ethtool_pauseparam *param)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-
-	ctrl = readl(ksp->phyiface_regs + KS8695_WMC);
-
-	/* advertise Pause */
-	param->autoneg = (ctrl & WMC_WANAP);
-
-	/* current Rx Flow-control */
-	ctrl = ks8695_readreg(ksp, KS8695_DRXC);
-	param->rx_pause = (ctrl & DRXC_RFCE);
-
-	/* current Tx Flow-control */
-	ctrl = ks8695_readreg(ksp, KS8695_DTXC);
-	param->tx_pause = (ctrl & DTXC_TFCE);
-}
-
-/**
- *	ks8695_get_drvinfo - Retrieve driver information
- *	@ndev: The network device to retrieve info about
- *	@info: The info structure to fill out.
- */
-static void
-ks8695_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info)
-{
-	strlcpy(info->driver, MODULENAME, sizeof(info->driver));
-	strlcpy(info->version, MODULEVERSION, sizeof(info->version));
-	strlcpy(info->bus_info, dev_name(ndev->dev.parent),
-		sizeof(info->bus_info));
-}
-
-static const struct ethtool_ops ks8695_ethtool_ops = {
-	.get_msglevel	= ks8695_get_msglevel,
-	.set_msglevel	= ks8695_set_msglevel,
-	.get_drvinfo	= ks8695_get_drvinfo,
-};
-
-static const struct ethtool_ops ks8695_wan_ethtool_ops = {
-	.get_msglevel	= ks8695_get_msglevel,
-	.set_msglevel	= ks8695_set_msglevel,
-	.nway_reset	= ks8695_wan_nwayreset,
-	.get_link	= ethtool_op_get_link,
-	.get_pauseparam = ks8695_wan_get_pause,
-	.get_drvinfo	= ks8695_get_drvinfo,
-	.get_link_ksettings = ks8695_wan_get_link_ksettings,
-	.set_link_ksettings = ks8695_wan_set_link_ksettings,
-};
-
-/* Network device interface functions */
-
-/**
- *	ks8695_set_mac - Update MAC in net dev and HW
- *	@ndev: The network device to update
- *	@addr: The new MAC address to set
- */
-static int
-ks8695_set_mac(struct net_device *ndev, void *addr)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	struct sockaddr *address = addr;
-
-	if (!is_valid_ether_addr(address->sa_data))
-		return -EADDRNOTAVAIL;
-
-	memcpy(ndev->dev_addr, address->sa_data, ndev->addr_len);
-
-	ks8695_update_mac(ksp);
-
-	dev_dbg(ksp->dev, "%s: Updated MAC address to %pM\n",
-		ndev->name, ndev->dev_addr);
-
-	return 0;
-}
-
-/**
- *	ks8695_set_multicast - Set up the multicast behaviour of the interface
- *	@ndev: The net_device to configure
- *
- *	This routine, called by the net layer, configures promiscuity
- *	and multicast reception behaviour for the interface.
- */
-static void
-ks8695_set_multicast(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	u32 ctrl;
-
-	ctrl = ks8695_readreg(ksp, KS8695_DRXC);
-
-	if (ndev->flags & IFF_PROMISC) {
-		/* enable promiscuous mode */
-		ctrl |= DRXC_RA;
-	} else if (ndev->flags & ~IFF_PROMISC) {
-		/* disable promiscuous mode */
-		ctrl &= ~DRXC_RA;
-	}
-
-	if (ndev->flags & IFF_ALLMULTI) {
-		/* enable all multicast mode */
-		ctrl |= DRXC_RM;
-	} else if (netdev_mc_count(ndev) > KS8695_NR_ADDRESSES) {
-		/* more specific multicast addresses than can be
-		 * handled in hardware
-		 */
-		ctrl |= DRXC_RM;
-	} else {
-		/* enable specific multicasts */
-		ctrl &= ~DRXC_RM;
-		ks8695_init_partial_multicast(ksp, ndev);
-	}
-
-	ks8695_writereg(ksp, KS8695_DRXC, ctrl);
-}
-
-/**
- *	ks8695_timeout - Handle a network tx/rx timeout.
- *	@ndev: The net_device which timed out.
- *
- *	A network transaction timed out, reset the device.
- */
-static void
-ks8695_timeout(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	netif_stop_queue(ndev);
-	ks8695_shutdown(ksp);
-
-	ks8695_reset(ksp);
-
-	ks8695_update_mac(ksp);
-
-	/* We ignore the return from this since it managed to init
-	 * before it probably will be okay to init again.
-	 */
-	ks8695_init_net(ksp);
-
-	/* Reconfigure promiscuity etc */
-	ks8695_set_multicast(ndev);
-
-	/* And start the TX queue once more */
-	netif_start_queue(ndev);
-}
-
-/**
- *	ks8695_start_xmit - Start a packet transmission
- *	@skb: The packet to transmit
- *	@ndev: The network device to send the packet on
- *
- *	This routine, called by the net layer, takes ownership of the
- *	sk_buff and adds it to the TX ring. It then kicks the TX DMA
- *	engine to ensure transmission begins.
- */
-static int
-ks8695_start_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	int buff_n;
-	dma_addr_t dmap;
-
-	spin_lock_irq(&ksp->txq_lock);
-
-	if (ksp->tx_ring_used == MAX_TX_DESC) {
-		/* Somehow we got entered when we have no room */
-		spin_unlock_irq(&ksp->txq_lock);
-		return NETDEV_TX_BUSY;
-	}
-
-	buff_n = ksp->tx_ring_next_slot;
-
-	BUG_ON(ksp->tx_buffers[buff_n].skb);
-
-	dmap = dma_map_single(ksp->dev, skb->data, skb->len, DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(ksp->dev, dmap))) {
-		/* Failed to DMA map this SKB, give it back for now */
-		spin_unlock_irq(&ksp->txq_lock);
-		dev_dbg(ksp->dev, "%s: Could not map DMA memory for "\
-			"transmission, trying later\n", ndev->name);
-		return NETDEV_TX_BUSY;
-	}
-
-	ksp->tx_buffers[buff_n].dma_ptr = dmap;
-	/* Mapped okay, store the buffer pointer and length for later */
-	ksp->tx_buffers[buff_n].skb = skb;
-	ksp->tx_buffers[buff_n].length = skb->len;
-
-	/* Fill out the TX descriptor */
-	ksp->tx_ring[buff_n].data_ptr =
-		cpu_to_le32(ksp->tx_buffers[buff_n].dma_ptr);
-	ksp->tx_ring[buff_n].status =
-		cpu_to_le32(TDES_IC | TDES_FS | TDES_LS |
-			    (skb->len & TDES_TBS));
-
-	wmb();
-
-	/* Hand it over to the hardware */
-	ksp->tx_ring[buff_n].owner = cpu_to_le32(TDES_OWN);
-
-	if (++ksp->tx_ring_used == MAX_TX_DESC)
-		netif_stop_queue(ndev);
-
-	/* Kick the TX DMA in case it decided to go IDLE */
-	ks8695_writereg(ksp, KS8695_DTSC, 0);
-
-	/* And update the next ring slot */
-	ksp->tx_ring_next_slot = (buff_n + 1) & MAX_TX_DESC_MASK;
-
-	spin_unlock_irq(&ksp->txq_lock);
-	return NETDEV_TX_OK;
-}
-
-/**
- *	ks8695_stop - Stop (shutdown) a KS8695 ethernet interface
- *	@ndev: The net_device to stop
- *
- *	This disables the TX queue and cleans up a KS8695 ethernet
- *	device.
- */
-static int
-ks8695_stop(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	netif_stop_queue(ndev);
-	napi_disable(&ksp->napi);
-
-	ks8695_shutdown(ksp);
-
-	return 0;
-}
-
-/**
- *	ks8695_open - Open (bring up) a KS8695 ethernet interface
- *	@ndev: The net_device to open
- *
- *	This resets, configures the MAC, initialises the RX ring and
- *	DMA engines and starts the TX queue for a KS8695 ethernet
- *	device.
- */
-static int
-ks8695_open(struct net_device *ndev)
-{
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-	int ret;
-
-	ks8695_reset(ksp);
-
-	ks8695_update_mac(ksp);
-
-	ret = ks8695_init_net(ksp);
-	if (ret) {
-		ks8695_shutdown(ksp);
-		return ret;
-	}
-
-	napi_enable(&ksp->napi);
-	netif_start_queue(ndev);
-
-	return 0;
-}
-
-/* Platform device driver */
-
-/**
- *	ks8695_init_switch - Init LAN switch to known good defaults.
- *	@ksp: The device to initialise
- *
- *	This initialises the LAN switch in the KS8695 to a known-good
- *	set of defaults.
- */
-static void
-ks8695_init_switch(struct ks8695_priv *ksp)
-{
-	u32 ctrl;
-
-	/* Default value for SEC0 according to datasheet */
-	ctrl = 0x40819e00;
-
-	/* LED0 = Speed	 LED1 = Link/Activity */
-	ctrl &= ~(SEC0_LLED1S | SEC0_LLED0S);
-	ctrl |= (LLED0S_LINK | LLED1S_LINK_ACTIVITY);
-
-	/* Enable Switch */
-	ctrl |= SEC0_ENABLE;
-
-	writel(ctrl, ksp->phyiface_regs + KS8695_SEC0);
-
-	/* Defaults for SEC1 */
-	writel(0x9400100, ksp->phyiface_regs + KS8695_SEC1);
-}
-
-/**
- *	ks8695_init_wan_phy - Initialise the WAN PHY to sensible defaults
- *	@ksp: The device to initialise
- *
- *	This initialises a KS8695's WAN phy to sensible values for
- *	autonegotiation etc.
- */
-static void
-ks8695_init_wan_phy(struct ks8695_priv *ksp)
-{
-	u32 ctrl;
-
-	/* Support auto-negotiation */
-	ctrl = (WMC_WANAP | WMC_WANA100F | WMC_WANA100H |
-		WMC_WANA10F | WMC_WANA10H);
-
-	/* LED0 = Activity , LED1 = Link */
-	ctrl |= (WLED0S_ACTIVITY | WLED1S_LINK);
-
-	/* Restart Auto-negotiation */
-	ctrl |= WMC_WANR;
-
-	writel(ctrl, ksp->phyiface_regs + KS8695_WMC);
-
-	writel(0, ksp->phyiface_regs + KS8695_WPPM);
-	writel(0, ksp->phyiface_regs + KS8695_PPS);
-}
-
-static const struct net_device_ops ks8695_netdev_ops = {
-	.ndo_open		= ks8695_open,
-	.ndo_stop		= ks8695_stop,
-	.ndo_start_xmit		= ks8695_start_xmit,
-	.ndo_tx_timeout		= ks8695_timeout,
-	.ndo_set_mac_address	= ks8695_set_mac,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_rx_mode	= ks8695_set_multicast,
-};
-
-/**
- *	ks8695_probe - Probe and initialise a KS8695 ethernet interface
- *	@pdev: The platform device to probe
- *
- *	Initialise a KS8695 ethernet device from platform data.
- *
- *	This driver requires at least one IORESOURCE_MEM for the
- *	registers and two IORESOURCE_IRQ for the RX and TX IRQs
- *	respectively. It can optionally take an additional
- *	IORESOURCE_MEM for the switch or phy in the case of the lan or
- *	wan ports, and an IORESOURCE_IRQ for the link IRQ for the wan
- *	port.
- */
-static int
-ks8695_probe(struct platform_device *pdev)
-{
-	struct ks8695_priv *ksp;
-	struct net_device *ndev;
-	struct resource *regs_res, *phyiface_res;
-	struct resource *rxirq_res, *txirq_res, *linkirq_res;
-	int ret = 0;
-	int buff_n;
-	bool inv_mac_addr = false;
-	u32 machigh, maclow;
-
-	/* Initialise a net_device */
-	ndev = alloc_etherdev(sizeof(struct ks8695_priv));
-	if (!ndev)
-		return -ENOMEM;
-
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-
-	dev_dbg(&pdev->dev, "ks8695_probe() called\n");
-
-	/* Configure our private structure a little */
-	ksp = netdev_priv(ndev);
-
-	ksp->dev = &pdev->dev;
-	ksp->ndev = ndev;
-	ksp->msg_enable = NETIF_MSG_LINK;
-
-	/* Retrieve resources */
-	regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	phyiface_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
-	rxirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	txirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
-	linkirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 2);
-
-	if (!(regs_res && rxirq_res && txirq_res)) {
-		dev_err(ksp->dev, "insufficient resources\n");
-		ret = -ENOENT;
-		goto failure;
-	}
-
-	ksp->regs_req = request_mem_region(regs_res->start,
-					   resource_size(regs_res),
-					   pdev->name);
-
-	if (!ksp->regs_req) {
-		dev_err(ksp->dev, "cannot claim register space\n");
-		ret = -EIO;
-		goto failure;
-	}
-
-	ksp->io_regs = ioremap(regs_res->start, resource_size(regs_res));
-
-	if (!ksp->io_regs) {
-		dev_err(ksp->dev, "failed to ioremap registers\n");
-		ret = -EINVAL;
-		goto failure;
-	}
-
-	if (phyiface_res) {
-		ksp->phyiface_req =
-			request_mem_region(phyiface_res->start,
-					   resource_size(phyiface_res),
-					   phyiface_res->name);
-
-		if (!ksp->phyiface_req) {
-			dev_err(ksp->dev,
-				"cannot claim switch register space\n");
-			ret = -EIO;
-			goto failure;
-		}
-
-		ksp->phyiface_regs = ioremap(phyiface_res->start,
-					     resource_size(phyiface_res));
-
-		if (!ksp->phyiface_regs) {
-			dev_err(ksp->dev,
-				"failed to ioremap switch registers\n");
-			ret = -EINVAL;
-			goto failure;
-		}
-	}
-
-	ksp->rx_irq = rxirq_res->start;
-	ksp->rx_irq_name = rxirq_res->name ? rxirq_res->name : "Ethernet RX";
-	ksp->tx_irq = txirq_res->start;
-	ksp->tx_irq_name = txirq_res->name ? txirq_res->name : "Ethernet TX";
-	ksp->link_irq = (linkirq_res ? linkirq_res->start : -1);
-	ksp->link_irq_name = (linkirq_res && linkirq_res->name) ?
-		linkirq_res->name : "Ethernet Link";
-
-	/* driver system setup */
-	ndev->netdev_ops = &ks8695_netdev_ops;
-	ndev->watchdog_timeo	 = msecs_to_jiffies(watchdog);
-
-	netif_napi_add(ndev, &ksp->napi, ks8695_poll, NAPI_WEIGHT);
-
-	/* Retrieve the default MAC addr from the chip. */
-	/* The bootloader should have left it in there for us. */
-
-	machigh = ks8695_readreg(ksp, KS8695_MAH);
-	maclow = ks8695_readreg(ksp, KS8695_MAL);
-
-	ndev->dev_addr[0] = (machigh >> 8) & 0xFF;
-	ndev->dev_addr[1] = machigh & 0xFF;
-	ndev->dev_addr[2] = (maclow >> 24) & 0xFF;
-	ndev->dev_addr[3] = (maclow >> 16) & 0xFF;
-	ndev->dev_addr[4] = (maclow >> 8) & 0xFF;
-	ndev->dev_addr[5] = maclow & 0xFF;
-
-	if (!is_valid_ether_addr(ndev->dev_addr))
-		inv_mac_addr = true;
-
-	/* In order to be efficient memory-wise, we allocate both
-	 * rings in one go.
-	 */
-	ksp->ring_base = dma_alloc_coherent(&pdev->dev, RING_DMA_SIZE,
-					    &ksp->ring_base_dma, GFP_KERNEL);
-	if (!ksp->ring_base) {
-		ret = -ENOMEM;
-		goto failure;
-	}
-
-	/* Specify the TX DMA ring buffer */
-	ksp->tx_ring = ksp->ring_base;
-	ksp->tx_ring_dma = ksp->ring_base_dma;
-
-	/* And initialise the queue's lock */
-	spin_lock_init(&ksp->txq_lock);
-	spin_lock_init(&ksp->rx_lock);
-
-	/* Specify the RX DMA ring buffer */
-	ksp->rx_ring = ksp->ring_base + TX_RING_DMA_SIZE;
-	ksp->rx_ring_dma = ksp->ring_base_dma + TX_RING_DMA_SIZE;
-
-	/* Zero the descriptor rings */
-	memset(ksp->tx_ring, 0, TX_RING_DMA_SIZE);
-	memset(ksp->rx_ring, 0, RX_RING_DMA_SIZE);
-
-	/* Build the rings */
-	for (buff_n = 0; buff_n < MAX_TX_DESC; ++buff_n) {
-		ksp->tx_ring[buff_n].next_desc =
-			cpu_to_le32(ksp->tx_ring_dma +
-				    (sizeof(struct tx_ring_desc) *
-				     ((buff_n + 1) & MAX_TX_DESC_MASK)));
-	}
-
-	for (buff_n = 0; buff_n < MAX_RX_DESC; ++buff_n) {
-		ksp->rx_ring[buff_n].next_desc =
-			cpu_to_le32(ksp->rx_ring_dma +
-				    (sizeof(struct rx_ring_desc) *
-				     ((buff_n + 1) & MAX_RX_DESC_MASK)));
-	}
-
-	/* Initialise the port (physically) */
-	if (ksp->phyiface_regs && ksp->link_irq == -1) {
-		ks8695_init_switch(ksp);
-		ksp->dtype = KS8695_DTYPE_LAN;
-		ndev->ethtool_ops = &ks8695_ethtool_ops;
-	} else if (ksp->phyiface_regs && ksp->link_irq != -1) {
-		ks8695_init_wan_phy(ksp);
-		ksp->dtype = KS8695_DTYPE_WAN;
-		ndev->ethtool_ops = &ks8695_wan_ethtool_ops;
-	} else {
-		/* No initialisation since HPNA does not have a PHY */
-		ksp->dtype = KS8695_DTYPE_HPNA;
-		ndev->ethtool_ops = &ks8695_ethtool_ops;
-	}
-
-	/* And bring up the net_device with the net core */
-	platform_set_drvdata(pdev, ndev);
-	ret = register_netdev(ndev);
-
-	if (ret == 0) {
-		if (inv_mac_addr)
-			dev_warn(ksp->dev, "%s: Invalid ethernet MAC address. Please set using ip\n",
-				 ndev->name);
-		dev_info(ksp->dev, "ks8695 ethernet (%s) MAC: %pM\n",
-			 ks8695_port_type(ksp), ndev->dev_addr);
-	} else {
-		/* Report the failure to register the net_device */
-		dev_err(ksp->dev, "ks8695net: failed to register netdev.\n");
-		goto failure;
-	}
-
-	/* All is well */
-	return 0;
-
-	/* Error exit path */
-failure:
-	ks8695_release_device(ksp);
-	free_netdev(ndev);
-
-	return ret;
-}
-
-/**
- *	ks8695_drv_suspend - Suspend a KS8695 ethernet platform device.
- *	@pdev: The device to suspend
- *	@state: The suspend state
- *
- *	This routine detaches and shuts down a KS8695 ethernet device.
- */
-static int
-ks8695_drv_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	ksp->in_suspend = 1;
-
-	if (netif_running(ndev)) {
-		netif_device_detach(ndev);
-		ks8695_shutdown(ksp);
-	}
-
-	return 0;
-}
-
-/**
- *	ks8695_drv_resume - Resume a KS8695 ethernet platform device.
- *	@pdev: The device to resume
- *
- *	This routine re-initialises and re-attaches a KS8695 ethernet
- *	device.
- */
-static int
-ks8695_drv_resume(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	if (netif_running(ndev)) {
-		ks8695_reset(ksp);
-		ks8695_init_net(ksp);
-		ks8695_set_multicast(ndev);
-		netif_device_attach(ndev);
-	}
-
-	ksp->in_suspend = 0;
-
-	return 0;
-}
-
-/**
- *	ks8695_drv_remove - Remove a KS8695 net device on driver unload.
- *	@pdev: The platform device to remove
- *
- *	This unregisters and releases a KS8695 ethernet device.
- */
-static int
-ks8695_drv_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct ks8695_priv *ksp = netdev_priv(ndev);
-
-	netif_napi_del(&ksp->napi);
-
-	unregister_netdev(ndev);
-	ks8695_release_device(ksp);
-	free_netdev(ndev);
-
-	dev_dbg(&pdev->dev, "released and freed device\n");
-	return 0;
-}
-
-static struct platform_driver ks8695_driver = {
-	.driver = {
-		.name	= MODULENAME,
-	},
-	.probe		= ks8695_probe,
-	.remove		= ks8695_drv_remove,
-	.suspend	= ks8695_drv_suspend,
-	.resume		= ks8695_drv_resume,
-};
-
-module_platform_driver(ks8695_driver);
-
-MODULE_AUTHOR("Simtec Electronics");
-MODULE_DESCRIPTION("Micrel KS8695 (Centaur) Ethernet driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" MODULENAME);
-
-module_param(watchdog, int, 0400);
-MODULE_PARM_DESC(watchdog, "transmit timeout in milliseconds");

diff --git a/drivers/net/ethernet/micrel/ks8695net.h b/drivers/net/ethernet/micrel/ks8695net.h
deleted file mode 100644
index b18fad4..0000000
--- a/drivers/net/ethernet/micrel/ks8695net.h
+++ /dev/null

@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Micrel KS8695 (Centaur) Ethernet.
- *
- * Copyright 2008 Simtec Electronics
- *		  Daniel Silverstone <dsilvers@simtec.co.uk>
- *		  Vincent Sanders <vince@simtec.co.uk>
- */
-
-#ifndef KS8695NET_H
-#define KS8695NET_H
-
-/* Receive descriptor flags */
-#define RDES_OWN	(1 << 31)	/* Ownership */
-#define RDES_FS		(1 << 30)	/* First Descriptor */
-#define RDES_LS		(1 << 29)	/* Last Descriptor */
-#define RDES_IPE	(1 << 28)	/* IP Checksum error */
-#define RDES_TCPE	(1 << 27)	/* TCP Checksum error */
-#define RDES_UDPE	(1 << 26)	/* UDP Checksum error */
-#define RDES_ES		(1 << 25)	/* Error summary */
-#define RDES_MF		(1 << 24)	/* Multicast Frame */
-#define RDES_RE		(1 << 19)	/* MII Error reported */
-#define RDES_TL		(1 << 18)	/* Frame too Long */
-#define RDES_RF		(1 << 17)	/* Runt Frame */
-#define RDES_CE		(1 << 16)	/* CRC error */
-#define RDES_FT		(1 << 15)	/* Frame Type */
-#define RDES_FLEN	(0x7ff)		/* Frame Length */
-
-#define RDES_RER	(1 << 25)	/* Receive End of Ring */
-#define RDES_RBS	(0x7ff)		/* Receive Buffer Size */
-
-/* Transmit descriptor flags */
-
-#define TDES_OWN	(1 << 31)	/* Ownership */
-
-#define TDES_IC		(1 << 31)	/* Interrupt on Completion */
-#define TDES_FS		(1 << 30)	/* First Segment */
-#define TDES_LS		(1 << 29)	/* Last Segment */
-#define TDES_IPCKG	(1 << 28)	/* IP Checksum generate */
-#define TDES_TCPCKG	(1 << 27)	/* TCP Checksum generate */
-#define TDES_UDPCKG	(1 << 26)	/* UDP Checksum generate */
-#define TDES_TER	(1 << 25)	/* Transmit End of Ring */
-#define TDES_TBS	(0x7ff)		/* Transmit Buffer Size */
-
-/*
- * Network controller register offsets
- */
-#define KS8695_DTXC		(0x00)		/* DMA Transmit Control */
-#define KS8695_DRXC		(0x04)		/* DMA Receive Control */
-#define KS8695_DTSC		(0x08)		/* DMA Transmit Start Command */
-#define KS8695_DRSC		(0x0c)		/* DMA Receive Start Command */
-#define KS8695_TDLB		(0x10)		/* Transmit Descriptor List
-						 * Base Address
-						 */
-#define KS8695_RDLB		(0x14)		/* Receive Descriptor List
-						 * Base Address
-						 */
-#define KS8695_MAL		(0x18)		/* MAC Station Address Low */
-#define KS8695_MAH		(0x1c)		/* MAC Station Address High */
-#define KS8695_AAL_(n)		(0x80 + ((n)*8))	/* MAC Additional
-							 * Station Address
-							 * (0..15) Low
-							 */
-#define KS8695_AAH_(n)		(0x84 + ((n)*8))	/* MAC Additional
-							 * Station Address
-							 * (0..15) High
-							 */
-
-
-/* DMA Transmit Control Register */
-#define DTXC_TRST		(1    << 31)	/* Soft Reset */
-#define DTXC_TBS		(0x3f << 24)	/* Transmit Burst Size */
-#define DTXC_TUCG		(1    << 18)	/* Transmit UDP
-						 * Checksum Generate
-						 */
-#define DTXC_TTCG		(1    << 17)	/* Transmit TCP
-						 * Checksum Generate
-						 */
-#define DTXC_TICG		(1    << 16)	/* Transmit IP
-						 * Checksum Generate
-						 */
-#define DTXC_TFCE		(1    <<  9)	/* Transmit Flow
-						 * Control Enable
-						 */
-#define DTXC_TLB		(1    <<  8)	/* Loopback mode */
-#define DTXC_TEP		(1    <<  2)	/* Transmit Enable Padding */
-#define DTXC_TAC		(1    <<  1)	/* Transmit Add CRC */
-#define DTXC_TE			(1    <<  0)	/* TX Enable */
-
-/* DMA Receive Control Register */
-#define DRXC_RBS		(0x3f << 24)	/* Receive Burst Size */
-#define DRXC_RUCC		(1    << 18)	/* Receive UDP Checksum check */
-#define DRXC_RTCG		(1    << 17)	/* Receive TCP Checksum check */
-#define DRXC_RICG		(1    << 16)	/* Receive IP Checksum check */
-#define DRXC_RFCE		(1    <<  9)	/* Receive Flow Control
-						 * Enable
-						 */
-#define DRXC_RB			(1    <<  6)	/* Receive Broadcast */
-#define DRXC_RM			(1    <<  5)	/* Receive Multicast */
-#define DRXC_RU			(1    <<  4)	/* Receive Unicast */
-#define DRXC_RERR		(1    <<  3)	/* Receive Error Frame */
-#define DRXC_RA			(1    <<  2)	/* Receive All */
-#define DRXC_RE			(1    <<  0)	/* RX Enable */
-
-/* Additional Station Address High */
-#define AAH_E			(1    << 31)	/* Address Enabled */
-
-#endif /* KS8695NET_H */

diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index e3d7c74..da329ca 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * ks8842.c timberdale KS8842 ethernet driver
  * Copyright (c) 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /* Supports:
@@ -592,9 +580,7 @@
 		dma_unmap_single(adapter->dev, sg_dma_address(sg),
 			DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
 	sg_dma_address(sg) = 0;
-	if (ctl->skb)
-		dev_kfree_skb(ctl->skb);
-
+	dev_kfree_skb(ctl->skb);
 	ctl->skb = NULL;
 
 	printk(KERN_ERR DRV_NAME": Failed to start RX DMA: %d\n", err);

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index bd6e901..33305c9 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* drivers/net/ethernet/micrel/ks8851.c
  *
  * Copyright 2009 Simtec Electronics
  *	http://www.simtec.co.uk/
  *	Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -142,6 +139,12 @@
 
 static int msg_enable;
 
+/* SPI frame opcodes */
+#define KS_SPIOP_RD	(0x00)
+#define KS_SPIOP_WR	(0x40)
+#define KS_SPIOP_RXFIFO	(0x80)
+#define KS_SPIOP_TXFIFO	(0xC0)
+
 /* shift for byte-enable data */
 #define BYTE_EN(_x)	((_x) << 2)
 
@@ -419,8 +422,8 @@
 	const u8 *mac_addr;
 
 	mac_addr = of_get_mac_address(ks->spidev->dev.of_node);
-	if (mac_addr) {
-		memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr)) {
+		ether_addr_copy(dev->dev_addr, mac_addr);
 		ks8851_write_mac_addr(dev);
 		return;
 	}
@@ -535,9 +538,8 @@
 		/* set dma read address */
 		ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00);
 
-		/* start the packet dma process, and set auto-dequeue rx */
-		ks8851_wrreg16(ks, KS_RXQCR,
-			       ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE);
+		/* start DMA access */
+		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
 
 		if (rxlen > 4) {
 			unsigned int rxalign;
@@ -568,7 +570,8 @@
 			}
 		}
 
-		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
+		/* end DMA access and dequeue packet */
+		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF);
 	}
 }
 
@@ -785,6 +788,15 @@
 static int ks8851_net_open(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	int ret;
+
+	ret = request_threaded_irq(dev->irq, NULL, ks8851_irq,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   dev->name, ks);
+	if (ret < 0) {
+		netdev_err(dev, "failed to get irq\n");
+		return ret;
+	}
 
 	/* lock the card, even if we may not actually be doing anything
 	 * else at the moment */
@@ -849,6 +861,7 @@
 	netif_dbg(ks, ifup, ks->netdev, "network device up\n");
 
 	mutex_unlock(&ks->lock);
+	mii_check_link(&ks->mii);
 	return 0;
 }
 
@@ -899,6 +912,8 @@
 		dev_kfree_skb(txb);
 	}
 
+	free_irq(dev->irq, ks);
+
 	return 0;
 }
 
@@ -1508,6 +1523,7 @@
 
 	spi_set_drvdata(spi, ks);
 
+	netif_carrier_off(ks->netdev);
 	ndev->if_port = IF_PORT_100BASET;
 	ndev->netdev_ops = &ks8851_netdev_ops;
 	ndev->irq = spi->irq;
@@ -1529,14 +1545,6 @@
 	ks8851_read_selftest(ks);
 	ks8851_init_mac(ks);
 
-	ret = request_threaded_irq(spi->irq, NULL, ks8851_irq,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   ndev->name, ks);
-	if (ret < 0) {
-		dev_err(&spi->dev, "failed to get irq\n");
-		goto err_irq;
-	}
-
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(&spi->dev, "failed to register network device\n");
@@ -1549,14 +1557,10 @@
 
 	return 0;
 
-
 err_netdev:
-	free_irq(ndev->irq, ks);
-
-err_irq:
+err_id:
 	if (gpio_is_valid(gpio))
 		gpio_set_value(gpio, 0);
-err_id:
 	regulator_disable(ks->vdd_reg);
 err_reg:
 	regulator_disable(ks->vdd_io);
@@ -1574,7 +1578,6 @@
 		dev_info(&spi->dev, "remove\n");
 
 	unregister_netdev(priv->netdev);
-	free_irq(spi->irq, priv);
 	if (gpio_is_valid(priv->gpio))
 		gpio_set_value(priv->gpio, 0);
 	regulator_disable(priv->vdd_reg);

diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index 852256e..8f834ae 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h

@@ -1,19 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* drivers/net/ethernet/micrel/ks8851.h
  *
  * Copyright 2009 Simtec Electronics
  *      Ben Dooks <ben@simtec.co.uk>
  *
  * KS8851 register definitions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
 */
 
 #define KS_CCR					0x08
+#define CCR_LE					(1 << 10)   /* KSZ8851-16MLL */
 #define CCR_EEPROM				(1 << 9)
-#define CCR_SPI					(1 << 8)
-#define CCR_32PIN				(1 << 0)
+#define CCR_SPI					(1 << 8)    /* KSZ8851SNL    */
+#define CCR_8BIT				(1 << 7)    /* KSZ8851-16MLL */
+#define CCR_16BIT				(1 << 6)    /* KSZ8851-16MLL */
+#define CCR_32BIT				(1 << 5)    /* KSZ8851-16MLL */
+#define CCR_SHARED				(1 << 4)    /* KSZ8851-16MLL */
+#define CCR_48PIN				(1 << 1)    /* KSZ8851-16MLL */
+#define CCR_32PIN				(1 << 0)    /* KSZ8851SNL    */
 
 /* MAC address registers */
 #define KS_MAR(_m)				(0x15 - (_m))
@@ -112,13 +115,13 @@
 #define RXCR1_RXE				(1 << 0)
 
 #define KS_RXCR2				0x76
-#define RXCR2_SRDBL_MASK			(0x7 << 5)
-#define RXCR2_SRDBL_SHIFT			(5)
-#define RXCR2_SRDBL_4B				(0x0 << 5)
-#define RXCR2_SRDBL_8B				(0x1 << 5)
-#define RXCR2_SRDBL_16B				(0x2 << 5)
-#define RXCR2_SRDBL_32B				(0x3 << 5)
-#define RXCR2_SRDBL_FRAME			(0x4 << 5)
+#define RXCR2_SRDBL_MASK			(0x7 << 5)  /* KSZ8851SNL    */
+#define RXCR2_SRDBL_SHIFT			(5)	    /* KSZ8851SNL    */
+#define RXCR2_SRDBL_4B				(0x0 << 5)  /* KSZ8851SNL    */
+#define RXCR2_SRDBL_8B				(0x1 << 5)  /* KSZ8851SNL    */
+#define RXCR2_SRDBL_16B				(0x2 << 5)  /* KSZ8851SNL    */
+#define RXCR2_SRDBL_32B				(0x3 << 5)  /* KSZ8851SNL    */
+#define RXCR2_SRDBL_FRAME			(0x4 << 5)  /* KSZ8851SNL    */
 #define RXCR2_IUFFP				(1 << 4)
 #define RXCR2_RXIUFCEZ				(1 << 3)
 #define RXCR2_UDPLFE				(1 << 2)
@@ -143,8 +146,10 @@
 #define RXFSHR_RXCE				(1 << 0)
 
 #define KS_RXFHBCR				0x7E
+#define RXFHBCR_CNT_MASK			(0xfff << 0)
+
 #define KS_TXQCR				0x80
-#define TXQCR_AETFE				(1 << 2)
+#define TXQCR_AETFE				(1 << 2)    /* KSZ8851SNL    */
 #define TXQCR_TXQMAM				(1 << 1)
 #define TXQCR_METFE				(1 << 0)
 
@@ -167,6 +172,10 @@
 
 #define KS_RXFDPR				0x86
 #define RXFDPR_RXFPAI				(1 << 14)
+#define RXFDPR_WST				(1 << 12)   /* KSZ8851-16MLL */
+#define RXFDPR_EMS				(1 << 11)   /* KSZ8851-16MLL */
+#define RXFDPR_RXFP_MASK			(0x7ff << 0)
+#define RXFDPR_RXFP_SHIFT			(0)
 
 #define KS_RXDTTR				0x8C
 #define KS_RXDBCTR				0x8E
@@ -184,7 +193,7 @@
 #define IRQ_RXMPDI				(1 << 4)
 #define IRQ_LDI					(1 << 3)
 #define IRQ_EDI					(1 << 2)
-#define IRQ_SPIBEI				(1 << 1)
+#define IRQ_SPIBEI				(1 << 1)    /* KSZ8851SNL    */
 #define IRQ_DEDI				(1 << 0)
 
 #define KS_RXFCTR				0x9C
@@ -257,42 +266,37 @@
 #define KS_P1ANLPR				0xEE
 
 #define KS_P1SCLMD				0xF4
-#define P1SCLMD_LEDOFF				(1 << 15)
-#define P1SCLMD_TXIDS				(1 << 14)
-#define P1SCLMD_RESTARTAN			(1 << 13)
-#define P1SCLMD_DISAUTOMDIX			(1 << 10)
-#define P1SCLMD_FORCEMDIX			(1 << 9)
-#define P1SCLMD_AUTONEGEN			(1 << 7)
-#define P1SCLMD_FORCE100			(1 << 6)
-#define P1SCLMD_FORCEFDX			(1 << 5)
-#define P1SCLMD_ADV_FLOW			(1 << 4)
-#define P1SCLMD_ADV_100BT_FDX			(1 << 3)
-#define P1SCLMD_ADV_100BT_HDX			(1 << 2)
-#define P1SCLMD_ADV_10BT_FDX			(1 << 1)
-#define P1SCLMD_ADV_10BT_HDX			(1 << 0)
 
 #define KS_P1CR					0xF6
-#define P1CR_HP_MDIX				(1 << 15)
-#define P1CR_REV_POL				(1 << 13)
-#define P1CR_OP_100M				(1 << 10)
-#define P1CR_OP_FDX				(1 << 9)
-#define P1CR_OP_MDI				(1 << 7)
-#define P1CR_AN_DONE				(1 << 6)
-#define P1CR_LINK_GOOD				(1 << 5)
-#define P1CR_PNTR_FLOW				(1 << 4)
-#define P1CR_PNTR_100BT_FDX			(1 << 3)
-#define P1CR_PNTR_100BT_HDX			(1 << 2)
-#define P1CR_PNTR_10BT_FDX			(1 << 1)
-#define P1CR_PNTR_10BT_HDX			(1 << 0)
+#define P1CR_LEDOFF				(1 << 15)
+#define P1CR_TXIDS				(1 << 14)
+#define P1CR_RESTARTAN				(1 << 13)
+#define P1CR_DISAUTOMDIX			(1 << 10)
+#define P1CR_FORCEMDIX				(1 << 9)
+#define P1CR_AUTONEGEN				(1 << 7)
+#define P1CR_FORCE100				(1 << 6)
+#define P1CR_FORCEFDX				(1 << 5)
+#define P1CR_ADV_FLOW				(1 << 4)
+#define P1CR_ADV_100BT_FDX			(1 << 3)
+#define P1CR_ADV_100BT_HDX			(1 << 2)
+#define P1CR_ADV_10BT_FDX			(1 << 1)
+#define P1CR_ADV_10BT_HDX			(1 << 0)
+
+#define KS_P1SR					0xF8
+#define P1SR_HP_MDIX				(1 << 15)
+#define P1SR_REV_POL				(1 << 13)
+#define P1SR_OP_100M				(1 << 10)
+#define P1SR_OP_FDX				(1 << 9)
+#define P1SR_OP_MDI				(1 << 7)
+#define P1SR_AN_DONE				(1 << 6)
+#define P1SR_LINK_GOOD				(1 << 5)
+#define P1SR_PNTR_FLOW				(1 << 4)
+#define P1SR_PNTR_100BT_FDX			(1 << 3)
+#define P1SR_PNTR_100BT_HDX			(1 << 2)
+#define P1SR_PNTR_10BT_FDX			(1 << 1)
+#define P1SR_PNTR_10BT_HDX			(1 << 0)
 
 /* TX Frame control */
-
 #define TXFR_TXIC				(1 << 15)
 #define TXFR_TXFID_MASK				(0x3f << 0)
 #define TXFR_TXFID_SHIFT			(0)
-
-/* SPI frame opcodes */
-#define KS_SPIOP_RD				(0x00)
-#define KS_SPIOP_WR				(0x40)
-#define KS_SPIOP_RXFIFO				(0x80)
-#define KS_SPIOP_TXFIFO				(0xC0)

diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 0e9719f..a41a90c 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /**
  * drivers/net/ethernet/micrel/ks8851_mll.c
  * Copyright (c) 2009 Micrel Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /* Supports:
@@ -40,6 +28,8 @@
 #include <linux/of_device.h>
 #include <linux/of_net.h>
 
+#include "ks8851.h"
+
 #define	DRV_NAME	"ks8851_mll"
 
 static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
@@ -48,319 +38,10 @@
 #define TX_BUF_SIZE			2000
 #define RX_BUF_SIZE			2000
 
-#define KS_CCR				0x08
-#define CCR_EEPROM			(1 << 9)
-#define CCR_SPI				(1 << 8)
-#define CCR_8BIT			(1 << 7)
-#define CCR_16BIT			(1 << 6)
-#define CCR_32BIT			(1 << 5)
-#define CCR_SHARED			(1 << 4)
-#define CCR_32PIN			(1 << 0)
-
-/* MAC address registers */
-#define KS_MARL				0x10
-#define KS_MARM				0x12
-#define KS_MARH				0x14
-
-#define KS_OBCR				0x20
-#define OBCR_ODS_16MA			(1 << 6)
-
-#define KS_EEPCR			0x22
-#define EEPCR_EESA			(1 << 4)
-#define EEPCR_EESB			(1 << 3)
-#define EEPCR_EEDO			(1 << 2)
-#define EEPCR_EESCK			(1 << 1)
-#define EEPCR_EECS			(1 << 0)
-
-#define KS_MBIR				0x24
-#define MBIR_TXMBF			(1 << 12)
-#define MBIR_TXMBFA			(1 << 11)
-#define MBIR_RXMBF			(1 << 4)
-#define MBIR_RXMBFA			(1 << 3)
-
-#define KS_GRR				0x26
-#define GRR_QMU				(1 << 1)
-#define GRR_GSR				(1 << 0)
-
-#define KS_WFCR				0x2A
-#define WFCR_MPRXE			(1 << 7)
-#define WFCR_WF3E			(1 << 3)
-#define WFCR_WF2E			(1 << 2)
-#define WFCR_WF1E			(1 << 1)
-#define WFCR_WF0E			(1 << 0)
-
-#define KS_WF0CRC0			0x30
-#define KS_WF0CRC1			0x32
-#define KS_WF0BM0			0x34
-#define KS_WF0BM1			0x36
-#define KS_WF0BM2			0x38
-#define KS_WF0BM3			0x3A
-
-#define KS_WF1CRC0			0x40
-#define KS_WF1CRC1			0x42
-#define KS_WF1BM0			0x44
-#define KS_WF1BM1			0x46
-#define KS_WF1BM2			0x48
-#define KS_WF1BM3			0x4A
-
-#define KS_WF2CRC0			0x50
-#define KS_WF2CRC1			0x52
-#define KS_WF2BM0			0x54
-#define KS_WF2BM1			0x56
-#define KS_WF2BM2			0x58
-#define KS_WF2BM3			0x5A
-
-#define KS_WF3CRC0			0x60
-#define KS_WF3CRC1			0x62
-#define KS_WF3BM0			0x64
-#define KS_WF3BM1			0x66
-#define KS_WF3BM2			0x68
-#define KS_WF3BM3			0x6A
-
-#define KS_TXCR				0x70
-#define TXCR_TCGICMP			(1 << 8)
-#define TXCR_TCGUDP			(1 << 7)
-#define TXCR_TCGTCP			(1 << 6)
-#define TXCR_TCGIP			(1 << 5)
-#define TXCR_FTXQ			(1 << 4)
-#define TXCR_TXFCE			(1 << 3)
-#define TXCR_TXPE			(1 << 2)
-#define TXCR_TXCRC			(1 << 1)
-#define TXCR_TXE			(1 << 0)
-
-#define KS_TXSR				0x72
-#define TXSR_TXLC			(1 << 13)
-#define TXSR_TXMC			(1 << 12)
-#define TXSR_TXFID_MASK			(0x3f << 0)
-#define TXSR_TXFID_SHIFT		(0)
-#define TXSR_TXFID_GET(_v)		(((_v) >> 0) & 0x3f)
-
-
-#define KS_RXCR1			0x74
-#define RXCR1_FRXQ			(1 << 15)
-#define RXCR1_RXUDPFCC			(1 << 14)
-#define RXCR1_RXTCPFCC			(1 << 13)
-#define RXCR1_RXIPFCC			(1 << 12)
-#define RXCR1_RXPAFMA			(1 << 11)
-#define RXCR1_RXFCE			(1 << 10)
-#define RXCR1_RXEFE			(1 << 9)
-#define RXCR1_RXMAFMA			(1 << 8)
-#define RXCR1_RXBE			(1 << 7)
-#define RXCR1_RXME			(1 << 6)
-#define RXCR1_RXUE			(1 << 5)
-#define RXCR1_RXAE			(1 << 4)
-#define RXCR1_RXINVF			(1 << 1)
-#define RXCR1_RXE			(1 << 0)
 #define RXCR1_FILTER_MASK    		(RXCR1_RXINVF | RXCR1_RXAE | \
 					 RXCR1_RXMAFMA | RXCR1_RXPAFMA)
-
-#define KS_RXCR2			0x76
-#define RXCR2_SRDBL_MASK		(0x7 << 5)
-#define RXCR2_SRDBL_SHIFT		(5)
-#define RXCR2_SRDBL_4B			(0x0 << 5)
-#define RXCR2_SRDBL_8B			(0x1 << 5)
-#define RXCR2_SRDBL_16B			(0x2 << 5)
-#define RXCR2_SRDBL_32B			(0x3 << 5)
-/* #define RXCR2_SRDBL_FRAME		(0x4 << 5) */
-#define RXCR2_IUFFP			(1 << 4)
-#define RXCR2_RXIUFCEZ			(1 << 3)
-#define RXCR2_UDPLFE			(1 << 2)
-#define RXCR2_RXICMPFCC			(1 << 1)
-#define RXCR2_RXSAF			(1 << 0)
-
-#define KS_TXMIR			0x78
-
-#define KS_RXFHSR			0x7C
-#define RXFSHR_RXFV			(1 << 15)
-#define RXFSHR_RXICMPFCS		(1 << 13)
-#define RXFSHR_RXIPFCS			(1 << 12)
-#define RXFSHR_RXTCPFCS			(1 << 11)
-#define RXFSHR_RXUDPFCS			(1 << 10)
-#define RXFSHR_RXBF			(1 << 7)
-#define RXFSHR_RXMF			(1 << 6)
-#define RXFSHR_RXUF			(1 << 5)
-#define RXFSHR_RXMR			(1 << 4)
-#define RXFSHR_RXFT			(1 << 3)
-#define RXFSHR_RXFTL			(1 << 2)
-#define RXFSHR_RXRF			(1 << 1)
-#define RXFSHR_RXCE			(1 << 0)
-#define	RXFSHR_ERR			(RXFSHR_RXCE | RXFSHR_RXRF |\
-					RXFSHR_RXFTL | RXFSHR_RXMR |\
-					RXFSHR_RXICMPFCS | RXFSHR_RXIPFCS |\
-					RXFSHR_RXTCPFCS)
-#define KS_RXFHBCR			0x7E
-#define RXFHBCR_CNT_MASK		0x0FFF
-
-#define KS_TXQCR			0x80
-#define TXQCR_AETFE			(1 << 2)
-#define TXQCR_TXQMAM			(1 << 1)
-#define TXQCR_METFE			(1 << 0)
-
-#define KS_RXQCR			0x82
-#define RXQCR_RXDTTS			(1 << 12)
-#define RXQCR_RXDBCTS			(1 << 11)
-#define RXQCR_RXFCTS			(1 << 10)
-#define RXQCR_RXIPHTOE			(1 << 9)
-#define RXQCR_RXDTTE			(1 << 7)
-#define RXQCR_RXDBCTE			(1 << 6)
-#define RXQCR_RXFCTE			(1 << 5)
-#define RXQCR_ADRFE			(1 << 4)
-#define RXQCR_SDA			(1 << 3)
-#define RXQCR_RRXEF			(1 << 0)
 #define RXQCR_CMD_CNTL                	(RXQCR_RXFCTE|RXQCR_ADRFE)
 
-#define KS_TXFDPR			0x84
-#define TXFDPR_TXFPAI			(1 << 14)
-#define TXFDPR_TXFP_MASK		(0x7ff << 0)
-#define TXFDPR_TXFP_SHIFT		(0)
-
-#define KS_RXFDPR			0x86
-#define RXFDPR_RXFPAI			(1 << 14)
-
-#define KS_RXDTTR			0x8C
-#define KS_RXDBCTR			0x8E
-
-#define KS_IER				0x90
-#define KS_ISR				0x92
-#define IRQ_LCI				(1 << 15)
-#define IRQ_TXI				(1 << 14)
-#define IRQ_RXI				(1 << 13)
-#define IRQ_RXOI			(1 << 11)
-#define IRQ_TXPSI			(1 << 9)
-#define IRQ_RXPSI			(1 << 8)
-#define IRQ_TXSAI			(1 << 6)
-#define IRQ_RXWFDI			(1 << 5)
-#define IRQ_RXMPDI			(1 << 4)
-#define IRQ_LDI				(1 << 3)
-#define IRQ_EDI				(1 << 2)
-#define IRQ_SPIBEI			(1 << 1)
-#define IRQ_DEDI			(1 << 0)
-
-#define KS_RXFCTR			0x9C
-#define RXFCTR_THRESHOLD_MASK     	0x00FF
-
-#define KS_RXFC				0x9D
-#define RXFCTR_RXFC_MASK		(0xff << 8)
-#define RXFCTR_RXFC_SHIFT		(8)
-#define RXFCTR_RXFC_GET(_v)		(((_v) >> 8) & 0xff)
-#define RXFCTR_RXFCT_MASK		(0xff << 0)
-#define RXFCTR_RXFCT_SHIFT		(0)
-
-#define KS_TXNTFSR			0x9E
-
-#define KS_MAHTR0			0xA0
-#define KS_MAHTR1			0xA2
-#define KS_MAHTR2			0xA4
-#define KS_MAHTR3			0xA6
-
-#define KS_FCLWR			0xB0
-#define KS_FCHWR			0xB2
-#define KS_FCOWR			0xB4
-
-#define KS_CIDER			0xC0
-#define CIDER_ID			0x8870
-#define CIDER_REV_MASK			(0x7 << 1)
-#define CIDER_REV_SHIFT			(1)
-#define CIDER_REV_GET(_v)		(((_v) >> 1) & 0x7)
-
-#define KS_CGCR				0xC6
-#define KS_IACR				0xC8
-#define IACR_RDEN			(1 << 12)
-#define IACR_TSEL_MASK			(0x3 << 10)
-#define IACR_TSEL_SHIFT			(10)
-#define IACR_TSEL_MIB			(0x3 << 10)
-#define IACR_ADDR_MASK			(0x1f << 0)
-#define IACR_ADDR_SHIFT			(0)
-
-#define KS_IADLR			0xD0
-#define KS_IAHDR			0xD2
-
-#define KS_PMECR			0xD4
-#define PMECR_PME_DELAY			(1 << 14)
-#define PMECR_PME_POL			(1 << 12)
-#define PMECR_WOL_WAKEUP		(1 << 11)
-#define PMECR_WOL_MAGICPKT		(1 << 10)
-#define PMECR_WOL_LINKUP		(1 << 9)
-#define PMECR_WOL_ENERGY		(1 << 8)
-#define PMECR_AUTO_WAKE_EN		(1 << 7)
-#define PMECR_WAKEUP_NORMAL		(1 << 6)
-#define PMECR_WKEVT_MASK		(0xf << 2)
-#define PMECR_WKEVT_SHIFT		(2)
-#define PMECR_WKEVT_GET(_v)		(((_v) >> 2) & 0xf)
-#define PMECR_WKEVT_ENERGY		(0x1 << 2)
-#define PMECR_WKEVT_LINK		(0x2 << 2)
-#define PMECR_WKEVT_MAGICPKT		(0x4 << 2)
-#define PMECR_WKEVT_FRAME		(0x8 << 2)
-#define PMECR_PM_MASK			(0x3 << 0)
-#define PMECR_PM_SHIFT			(0)
-#define PMECR_PM_NORMAL			(0x0 << 0)
-#define PMECR_PM_ENERGY			(0x1 << 0)
-#define PMECR_PM_SOFTDOWN		(0x2 << 0)
-#define PMECR_PM_POWERSAVE		(0x3 << 0)
-
-/* Standard MII PHY data */
-#define KS_P1MBCR			0xE4
-#define P1MBCR_FORCE_FDX		(1 << 8)
-
-#define KS_P1MBSR			0xE6
-#define P1MBSR_AN_COMPLETE		(1 << 5)
-#define P1MBSR_AN_CAPABLE		(1 << 3)
-#define P1MBSR_LINK_UP			(1 << 2)
-
-#define KS_PHY1ILR			0xE8
-#define KS_PHY1IHR			0xEA
-#define KS_P1ANAR			0xEC
-#define KS_P1ANLPR			0xEE
-
-#define KS_P1SCLMD			0xF4
-#define P1SCLMD_LEDOFF			(1 << 15)
-#define P1SCLMD_TXIDS			(1 << 14)
-#define P1SCLMD_RESTARTAN		(1 << 13)
-#define P1SCLMD_DISAUTOMDIX		(1 << 10)
-#define P1SCLMD_FORCEMDIX		(1 << 9)
-#define P1SCLMD_AUTONEGEN		(1 << 7)
-#define P1SCLMD_FORCE100		(1 << 6)
-#define P1SCLMD_FORCEFDX		(1 << 5)
-#define P1SCLMD_ADV_FLOW		(1 << 4)
-#define P1SCLMD_ADV_100BT_FDX		(1 << 3)
-#define P1SCLMD_ADV_100BT_HDX		(1 << 2)
-#define P1SCLMD_ADV_10BT_FDX		(1 << 1)
-#define P1SCLMD_ADV_10BT_HDX		(1 << 0)
-
-#define KS_P1CR				0xF6
-#define P1CR_HP_MDIX			(1 << 15)
-#define P1CR_REV_POL			(1 << 13)
-#define P1CR_OP_100M			(1 << 10)
-#define P1CR_OP_FDX			(1 << 9)
-#define P1CR_OP_MDI			(1 << 7)
-#define P1CR_AN_DONE			(1 << 6)
-#define P1CR_LINK_GOOD			(1 << 5)
-#define P1CR_PNTR_FLOW			(1 << 4)
-#define P1CR_PNTR_100BT_FDX		(1 << 3)
-#define P1CR_PNTR_100BT_HDX		(1 << 2)
-#define P1CR_PNTR_10BT_FDX		(1 << 1)
-#define P1CR_PNTR_10BT_HDX		(1 << 0)
-
-/* TX Frame control */
-
-#define TXFR_TXIC			(1 << 15)
-#define TXFR_TXFID_MASK			(0x3f << 0)
-#define TXFR_TXFID_SHIFT		(0)
-
-#define KS_P1SR				0xF8
-#define P1SR_HP_MDIX			(1 << 15)
-#define P1SR_REV_POL			(1 << 13)
-#define P1SR_OP_100M			(1 << 10)
-#define P1SR_OP_FDX			(1 << 9)
-#define P1SR_OP_MDI			(1 << 7)
-#define P1SR_AN_DONE			(1 << 6)
-#define P1SR_LINK_GOOD			(1 << 5)
-#define P1SR_PNTR_FLOW			(1 << 4)
-#define P1SR_PNTR_100BT_FDX		(1 << 3)
-#define P1SR_PNTR_100BT_HDX		(1 << 2)
-#define P1SR_PNTR_10BT_FDX		(1 << 1)
-#define P1SR_PNTR_10BT_HDX		(1 << 0)
-
 #define	ENUM_BUS_NONE			0
 #define	ENUM_BUS_8BIT			1
 #define	ENUM_BUS_16BIT			2
@@ -1021,9 +702,9 @@
  * spin_lock_irqsave is required because tx and rx should be mutual exclusive.
  * So while tx is in-progress, prevent IRQ interrupt from happenning.
  */
-static int ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
-	int retv = NETDEV_TX_OK;
+	netdev_tx_t retv = NETDEV_TX_OK;
 	struct ks_net *ks = netdev_priv(netdev);
 
 	disable_irq(netdev->irq);
@@ -1475,7 +1156,7 @@
 	ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
 
 	/* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */
-	ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_THRESHOLD_MASK);
+	ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK);
 
 	/* Setup RxQ Command Control (RXQCR) */
 	ks->rc_rxqcr = RXQCR_CMD_CNTL;
@@ -1488,7 +1169,7 @@
 	 */
 
 	w = ks_rdreg16(ks, KS_P1MBCR);
-	w &= ~P1MBCR_FORCE_FDX;
+	w &= ~BMCR_FULLDPLX;
 	ks_wrreg16(ks, KS_P1MBCR, w);
 
 	w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP;
@@ -1544,7 +1225,6 @@
 static int ks8851_probe(struct platform_device *pdev)
 {
 	int err;
-	struct resource *io_d, *io_c;
 	struct net_device *netdev;
 	struct ks_net *ks;
 	u16 id, data;
@@ -1559,15 +1239,13 @@
 	ks = netdev_priv(netdev);
 	ks->netdev = netdev;
 
-	io_d = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ks->hw_addr = devm_ioremap_resource(&pdev->dev, io_d);
+	ks->hw_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ks->hw_addr)) {
 		err = PTR_ERR(ks->hw_addr);
 		goto err_free;
 	}
 
-	io_c = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	ks->hw_addr_cmd = devm_ioremap_resource(&pdev->dev, io_c);
+	ks->hw_addr_cmd = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(ks->hw_addr_cmd)) {
 		err = PTR_ERR(ks->hw_addr_cmd);
 		goto err_free;
@@ -1629,13 +1307,13 @@
 	ks_setup_int(ks);
 
 	data = ks_rdreg16(ks, KS_OBCR);
-	ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16MA);
+	ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA);
 
 	/* overwriting the default MAC address */
 	if (pdev->dev.of_node) {
 		mac = of_get_mac_address(pdev->dev.of_node);
-		if (mac)
-			memcpy(ks->mac_addr, mac, ETH_ALEN);
+		if (!IS_ERR(mac))
+			ether_addr_copy(ks->mac_addr, mac);
 	} else {
 		struct ks8851_mll_platform_data *pdata;
 

diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index ebbdfb9..e102e15 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c

@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /**
  * drivers/net/ethernet/micrel/ksx884x.c - Micrel KSZ8841/2 PCI Ethernet driver
  *
  * Copyright (c) 2009-2010 Micrel, Inc.
  * 	Tristram Ha <Tristram.Ha@micrel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -2174,7 +2166,7 @@
 	num = (data & BROADCAST_STORM_RATE_HI);
 	num <<= 8;
 	num |= (data & BROADCAST_STORM_RATE_LO) >> 8;
-	num = (num * 100 + BROADCAST_STORM_VALUE / 2) / BROADCAST_STORM_VALUE;
+	num = DIV_ROUND_CLOSEST(num * 100, BROADCAST_STORM_VALUE);
 	*percent = (u8) num;
 }
 

diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index 16bd3f4..45fe41f 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Microchip network device configuration
 #
@@ -5,7 +6,6 @@
 config NET_VENDOR_MICROCHIP
 	bool "Microchip devices"
 	default y
-	depends on SPI
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 

diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index 538926d..da60354 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Microchip network device drivers.
 #

diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index f6ecfa7..0567e4f 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Microchip ENC28J60 ethernet driver (MAC + PHY)
  *
@@ -5,11 +6,6 @@
  * Author: Claudio Lanconelli <lanconelli.claudio@eptar.com>
  * based on enc28j60.c written by David Anders for 2.4 kernel version
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  * $Id: enc28j60.c,v 1.22 2007/12/20 10:47:01 claudio Exp $
  */
 
@@ -18,9 +14,9 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/interrupt.h>
+#include <linux/property.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -28,7 +24,6 @@
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
-#include <linux/of_net.h>
 
 #include "enc28j60_hw.h"
 
@@ -41,10 +36,11 @@
 	(NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_LINK)
 
 /* Buffer size required for the largest SPI transfer (i.e., reading a
- * frame). */
+ * frame).
+ */
 #define SPI_TRANSFER_BUF_LEN	(4 + MAX_FRAMELEN)
 
-#define TX_TIMEOUT	(4 * HZ)
+#define TX_TIMEOUT		(4 * HZ)
 
 /* Max TX retries in case of collision as suggested by errata datasheet */
 #define MAX_TX_RETRYCOUNT	16
@@ -83,11 +79,12 @@
 
 /*
  * SPI read buffer
- * wait for the SPI transfer and copy received data to destination
+ * Wait for the SPI transfer and copy received data to destination.
  */
 static int
 spi_read_buf(struct enc28j60_net *priv, int len, u8 *data)
 {
+	struct device *dev = &priv->spi->dev;
 	u8 *rx_buf = priv->spi_transfer_buf + 4;
 	u8 *tx_buf = priv->spi_transfer_buf;
 	struct spi_transfer tx = {
@@ -113,8 +110,8 @@
 		ret = msg.status;
 	}
 	if (ret && netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n",
-			__func__, ret);
+		dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n",
+			   __func__, ret);
 
 	return ret;
 }
@@ -122,9 +119,9 @@
 /*
  * SPI write buffer
  */
-static int spi_write_buf(struct enc28j60_net *priv, int len,
-			 const u8 *data)
+static int spi_write_buf(struct enc28j60_net *priv, int len, const u8 *data)
 {
+	struct device *dev = &priv->spi->dev;
 	int ret;
 
 	if (len > SPI_TRANSFER_BUF_LEN - 1 || len <= 0)
@@ -134,8 +131,8 @@
 		memcpy(&priv->spi_transfer_buf[1], data, len);
 		ret = spi_write(priv->spi, priv->spi_transfer_buf, len + 1);
 		if (ret && netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n",
-				__func__, ret);
+			dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n",
+				   __func__, ret);
 	}
 	return ret;
 }
@@ -143,9 +140,9 @@
 /*
  * basic SPI read operation
  */
-static u8 spi_read_op(struct enc28j60_net *priv, u8 op,
-			   u8 addr)
+static u8 spi_read_op(struct enc28j60_net *priv, u8 op, u8 addr)
 {
+	struct device *dev = &priv->spi->dev;
 	u8 tx_buf[2];
 	u8 rx_buf[4];
 	u8 val = 0;
@@ -159,8 +156,8 @@
 	tx_buf[0] = op | (addr & ADDR_MASK);
 	ret = spi_write_then_read(priv->spi, tx_buf, 1, rx_buf, slen);
 	if (ret)
-		printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n",
-			__func__, ret);
+		dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n",
+			   __func__, ret);
 	else
 		val = rx_buf[slen - 1];
 
@@ -170,28 +167,25 @@
 /*
  * basic SPI write operation
  */
-static int spi_write_op(struct enc28j60_net *priv, u8 op,
-			u8 addr, u8 val)
+static int spi_write_op(struct enc28j60_net *priv, u8 op, u8 addr, u8 val)
 {
+	struct device *dev = &priv->spi->dev;
 	int ret;
 
 	priv->spi_transfer_buf[0] = op | (addr & ADDR_MASK);
 	priv->spi_transfer_buf[1] = val;
 	ret = spi_write(priv->spi, priv->spi_transfer_buf, 2);
 	if (ret && netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n",
-			__func__, ret);
+		dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n",
+			   __func__, ret);
 	return ret;
 }
 
 static void enc28j60_soft_reset(struct enc28j60_net *priv)
 {
-	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__);
-
 	spi_write_op(priv, ENC28J60_SOFT_RESET, 0, ENC28J60_SOFT_RESET);
 	/* Errata workaround #1, CLKRDY check is unreliable,
-	 * delay at least 1 mS instead */
+	 * delay at least 1 ms instead */
 	udelay(2000);
 }
 
@@ -203,7 +197,7 @@
 	u8 b = (addr & BANK_MASK) >> 5;
 
 	/* These registers (EIE, EIR, ESTAT, ECON2, ECON1)
-	 * are present in all banks, no need to switch bank
+	 * are present in all banks, no need to switch bank.
 	 */
 	if (addr >= EIE && addr <= ECON1)
 		return;
@@ -242,15 +236,13 @@
 /*
  * Register bit field Set
  */
-static void nolock_reg_bfset(struct enc28j60_net *priv,
-				      u8 addr, u8 mask)
+static void nolock_reg_bfset(struct enc28j60_net *priv, u8 addr, u8 mask)
 {
 	enc28j60_set_bank(priv, addr);
 	spi_write_op(priv, ENC28J60_BIT_FIELD_SET, addr, mask);
 }
 
-static void locked_reg_bfset(struct enc28j60_net *priv,
-				      u8 addr, u8 mask)
+static void locked_reg_bfset(struct enc28j60_net *priv, u8 addr, u8 mask)
 {
 	mutex_lock(&priv->lock);
 	nolock_reg_bfset(priv, addr, mask);
@@ -260,15 +252,13 @@
 /*
  * Register bit field Clear
  */
-static void nolock_reg_bfclr(struct enc28j60_net *priv,
-				      u8 addr, u8 mask)
+static void nolock_reg_bfclr(struct enc28j60_net *priv, u8 addr, u8 mask)
 {
 	enc28j60_set_bank(priv, addr);
 	spi_write_op(priv, ENC28J60_BIT_FIELD_CLR, addr, mask);
 }
 
-static void locked_reg_bfclr(struct enc28j60_net *priv,
-				      u8 addr, u8 mask)
+static void locked_reg_bfclr(struct enc28j60_net *priv, u8 addr, u8 mask)
 {
 	mutex_lock(&priv->lock);
 	nolock_reg_bfclr(priv, addr, mask);
@@ -278,15 +268,13 @@
 /*
  * Register byte read
  */
-static int nolock_regb_read(struct enc28j60_net *priv,
-				     u8 address)
+static int nolock_regb_read(struct enc28j60_net *priv, u8 address)
 {
 	enc28j60_set_bank(priv, address);
 	return spi_read_op(priv, ENC28J60_READ_CTRL_REG, address);
 }
 
-static int locked_regb_read(struct enc28j60_net *priv,
-				     u8 address)
+static int locked_regb_read(struct enc28j60_net *priv, u8 address)
 {
 	int ret;
 
@@ -300,8 +288,7 @@
 /*
  * Register word read
  */
-static int nolock_regw_read(struct enc28j60_net *priv,
-				     u8 address)
+static int nolock_regw_read(struct enc28j60_net *priv, u8 address)
 {
 	int rl, rh;
 
@@ -312,8 +299,7 @@
 	return (rh << 8) | rl;
 }
 
-static int locked_regw_read(struct enc28j60_net *priv,
-				     u8 address)
+static int locked_regw_read(struct enc28j60_net *priv, u8 address)
 {
 	int ret;
 
@@ -327,15 +313,13 @@
 /*
  * Register byte write
  */
-static void nolock_regb_write(struct enc28j60_net *priv,
-				       u8 address, u8 data)
+static void nolock_regb_write(struct enc28j60_net *priv, u8 address, u8 data)
 {
 	enc28j60_set_bank(priv, address);
 	spi_write_op(priv, ENC28J60_WRITE_CTRL_REG, address, data);
 }
 
-static void locked_regb_write(struct enc28j60_net *priv,
-				       u8 address, u8 data)
+static void locked_regb_write(struct enc28j60_net *priv, u8 address, u8 data)
 {
 	mutex_lock(&priv->lock);
 	nolock_regb_write(priv, address, data);
@@ -345,8 +329,7 @@
 /*
  * Register word write
  */
-static void nolock_regw_write(struct enc28j60_net *priv,
-				       u8 address, u16 data)
+static void nolock_regw_write(struct enc28j60_net *priv, u8 address, u16 data)
 {
 	enc28j60_set_bank(priv, address);
 	spi_write_op(priv, ENC28J60_WRITE_CTRL_REG, address, (u8) data);
@@ -354,8 +337,7 @@
 		     (u8) (data >> 8));
 }
 
-static void locked_regw_write(struct enc28j60_net *priv,
-				       u8 address, u16 data)
+static void locked_regw_write(struct enc28j60_net *priv, u8 address, u16 data)
 {
 	mutex_lock(&priv->lock);
 	nolock_regw_write(priv, address, data);
@@ -364,20 +346,23 @@
 
 /*
  * Buffer memory read
- * Select the starting address and execute a SPI buffer read
+ * Select the starting address and execute a SPI buffer read.
  */
-static void enc28j60_mem_read(struct enc28j60_net *priv,
-				     u16 addr, int len, u8 *data)
+static void enc28j60_mem_read(struct enc28j60_net *priv, u16 addr, int len,
+			      u8 *data)
 {
 	mutex_lock(&priv->lock);
 	nolock_regw_write(priv, ERDPTL, addr);
 #ifdef CONFIG_ENC28J60_WRITEVERIFY
 	if (netif_msg_drv(priv)) {
+		struct device *dev = &priv->spi->dev;
 		u16 reg;
+
 		reg = nolock_regw_read(priv, ERDPTL);
 		if (reg != addr)
-			printk(KERN_DEBUG DRV_NAME ": %s() error writing ERDPT "
-				"(0x%04x - 0x%04x)\n", __func__, reg, addr);
+			dev_printk(KERN_DEBUG, dev,
+				   "%s() error writing ERDPT (0x%04x - 0x%04x)\n",
+				   __func__, reg, addr);
 	}
 #endif
 	spi_read_buf(priv, len, data);
@@ -390,6 +375,8 @@
 static void
 enc28j60_packet_write(struct enc28j60_net *priv, int len, const u8 *data)
 {
+	struct device *dev = &priv->spi->dev;
+
 	mutex_lock(&priv->lock);
 	/* Set the write pointer to start of transmit buffer area */
 	nolock_regw_write(priv, EWRPTL, TXSTART_INIT);
@@ -398,9 +385,9 @@
 		u16 reg;
 		reg = nolock_regw_read(priv, EWRPTL);
 		if (reg != TXSTART_INIT)
-			printk(KERN_DEBUG DRV_NAME
-				": %s() ERWPT:0x%04x != 0x%04x\n",
-				__func__, reg, TXSTART_INIT);
+			dev_printk(KERN_DEBUG, dev,
+				   "%s() ERWPT:0x%04x != 0x%04x\n",
+				   __func__, reg, TXSTART_INIT);
 	}
 #endif
 	/* Set the TXND pointer to correspond to the packet size given */
@@ -408,30 +395,28 @@
 	/* write per-packet control byte */
 	spi_write_op(priv, ENC28J60_WRITE_BUF_MEM, 0, 0x00);
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME
-			": %s() after control byte ERWPT:0x%04x\n",
-			__func__, nolock_regw_read(priv, EWRPTL));
+		dev_printk(KERN_DEBUG, dev,
+			   "%s() after control byte ERWPT:0x%04x\n",
+			   __func__, nolock_regw_read(priv, EWRPTL));
 	/* copy the packet into the transmit buffer */
 	spi_write_buf(priv, len, data);
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME
-			 ": %s() after write packet ERWPT:0x%04x, len=%d\n",
-			 __func__, nolock_regw_read(priv, EWRPTL), len);
+		dev_printk(KERN_DEBUG, dev,
+			   "%s() after write packet ERWPT:0x%04x, len=%d\n",
+			   __func__, nolock_regw_read(priv, EWRPTL), len);
 	mutex_unlock(&priv->lock);
 }
 
-static unsigned long msec20_to_jiffies;
-
 static int poll_ready(struct enc28j60_net *priv, u8 reg, u8 mask, u8 val)
 {
-	unsigned long timeout = jiffies + msec20_to_jiffies;
+	struct device *dev = &priv->spi->dev;
+	unsigned long timeout = jiffies + msecs_to_jiffies(20);
 
 	/* 20 msec timeout read */
 	while ((nolock_regb_read(priv, reg) & mask) != val) {
 		if (time_after(jiffies, timeout)) {
 			if (netif_msg_drv(priv))
-				dev_dbg(&priv->spi->dev,
-					"reg %02x ready timeout!\n", reg);
+				dev_dbg(dev, "reg %02x ready timeout!\n", reg);
 			return -ETIMEDOUT;
 		}
 		cpu_relax();
@@ -449,7 +434,7 @@
 
 /*
  * PHY register read
- * PHY registers are not accessed directly, but through the MII
+ * PHY registers are not accessed directly, but through the MII.
  */
 static u16 enc28j60_phy_read(struct enc28j60_net *priv, u8 address)
 {
@@ -465,7 +450,7 @@
 	/* quit reading */
 	nolock_regb_write(priv, MICMD, 0x00);
 	/* return the data */
-	ret  = nolock_regw_read(priv, MIRDL);
+	ret = nolock_regw_read(priv, MIRDL);
 	mutex_unlock(&priv->lock);
 
 	return ret;
@@ -494,13 +479,13 @@
 {
 	int ret;
 	struct enc28j60_net *priv = netdev_priv(ndev);
+	struct device *dev = &priv->spi->dev;
 
 	mutex_lock(&priv->lock);
 	if (!priv->hw_enable) {
 		if (netif_msg_drv(priv))
-			printk(KERN_INFO DRV_NAME
-				": %s: Setting MAC address to %pM\n",
-				ndev->name, ndev->dev_addr);
+			dev_info(dev, "%s: Setting MAC address to %pM\n",
+				 ndev->name, ndev->dev_addr);
 		/* NOTE: MAC address in ENC28J60 is byte-backward */
 		nolock_regb_write(priv, MAADR5, ndev->dev_addr[0]);
 		nolock_regb_write(priv, MAADR4, ndev->dev_addr[1]);
@@ -511,9 +496,9 @@
 		ret = 0;
 	} else {
 		if (netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME
-				": %s() Hardware must be disabled to set "
-				"Mac address\n", __func__);
+			dev_printk(KERN_DEBUG, dev,
+				   "%s() Hardware must be disabled to set Mac address\n",
+				   __func__);
 		ret = -EBUSY;
 	}
 	mutex_unlock(&priv->lock);
@@ -532,7 +517,7 @@
 	if (!is_valid_ether_addr(address->sa_data))
 		return -EADDRNOTAVAIL;
 
-	memcpy(dev->dev_addr, address->sa_data, dev->addr_len);
+	ether_addr_copy(dev->dev_addr, address->sa_data);
 	return enc28j60_set_hw_macaddr(dev);
 }
 
@@ -541,33 +526,36 @@
  */
 static void enc28j60_dump_regs(struct enc28j60_net *priv, const char *msg)
 {
+	struct device *dev = &priv->spi->dev;
+
 	mutex_lock(&priv->lock);
-	printk(KERN_DEBUG DRV_NAME " %s\n"
-		"HwRevID: 0x%02x\n"
-		"Cntrl: ECON1 ECON2 ESTAT  EIR  EIE\n"
-		"       0x%02x  0x%02x  0x%02x  0x%02x  0x%02x\n"
-		"MAC  : MACON1 MACON3 MACON4\n"
-		"       0x%02x   0x%02x   0x%02x\n"
-		"Rx   : ERXST  ERXND  ERXWRPT ERXRDPT ERXFCON EPKTCNT MAMXFL\n"
-		"       0x%04x 0x%04x 0x%04x  0x%04x  "
-		"0x%02x    0x%02x    0x%04x\n"
-		"Tx   : ETXST  ETXND  MACLCON1 MACLCON2 MAPHSUP\n"
-		"       0x%04x 0x%04x 0x%02x     0x%02x     0x%02x\n",
-		msg, nolock_regb_read(priv, EREVID),
-		nolock_regb_read(priv, ECON1), nolock_regb_read(priv, ECON2),
-		nolock_regb_read(priv, ESTAT), nolock_regb_read(priv, EIR),
-		nolock_regb_read(priv, EIE), nolock_regb_read(priv, MACON1),
-		nolock_regb_read(priv, MACON3), nolock_regb_read(priv, MACON4),
-		nolock_regw_read(priv, ERXSTL), nolock_regw_read(priv, ERXNDL),
-		nolock_regw_read(priv, ERXWRPTL),
-		nolock_regw_read(priv, ERXRDPTL),
-		nolock_regb_read(priv, ERXFCON),
-		nolock_regb_read(priv, EPKTCNT),
-		nolock_regw_read(priv, MAMXFLL), nolock_regw_read(priv, ETXSTL),
-		nolock_regw_read(priv, ETXNDL),
-		nolock_regb_read(priv, MACLCON1),
-		nolock_regb_read(priv, MACLCON2),
-		nolock_regb_read(priv, MAPHSUP));
+	dev_printk(KERN_DEBUG, dev,
+		   " %s\n"
+		   "HwRevID: 0x%02x\n"
+		   "Cntrl: ECON1 ECON2 ESTAT  EIR  EIE\n"
+		   "       0x%02x  0x%02x  0x%02x  0x%02x  0x%02x\n"
+		   "MAC  : MACON1 MACON3 MACON4\n"
+		   "       0x%02x   0x%02x   0x%02x\n"
+		   "Rx   : ERXST  ERXND  ERXWRPT ERXRDPT ERXFCON EPKTCNT MAMXFL\n"
+		   "       0x%04x 0x%04x 0x%04x  0x%04x  "
+		   "0x%02x    0x%02x    0x%04x\n"
+		   "Tx   : ETXST  ETXND  MACLCON1 MACLCON2 MAPHSUP\n"
+		   "       0x%04x 0x%04x 0x%02x     0x%02x     0x%02x\n",
+		   msg, nolock_regb_read(priv, EREVID),
+		   nolock_regb_read(priv, ECON1), nolock_regb_read(priv, ECON2),
+		   nolock_regb_read(priv, ESTAT), nolock_regb_read(priv, EIR),
+		   nolock_regb_read(priv, EIE), nolock_regb_read(priv, MACON1),
+		   nolock_regb_read(priv, MACON3), nolock_regb_read(priv, MACON4),
+		   nolock_regw_read(priv, ERXSTL), nolock_regw_read(priv, ERXNDL),
+		   nolock_regw_read(priv, ERXWRPTL),
+		   nolock_regw_read(priv, ERXRDPTL),
+		   nolock_regb_read(priv, ERXFCON),
+		   nolock_regb_read(priv, EPKTCNT),
+		   nolock_regw_read(priv, MAMXFLL), nolock_regw_read(priv, ETXSTL),
+		   nolock_regw_read(priv, ETXNDL),
+		   nolock_regb_read(priv, MACLCON1),
+		   nolock_regb_read(priv, MACLCON2),
+		   nolock_regb_read(priv, MAPHSUP));
 	mutex_unlock(&priv->lock);
 }
 
@@ -599,12 +587,13 @@
 
 static void nolock_rxfifo_init(struct enc28j60_net *priv, u16 start, u16 end)
 {
+	struct device *dev = &priv->spi->dev;
 	u16 erxrdpt;
 
 	if (start > 0x1FFF || end > 0x1FFF || start > end) {
 		if (netif_msg_drv(priv))
-			printk(KERN_ERR DRV_NAME ": %s(%d, %d) RXFIFO "
-				"bad parameters!\n", __func__, start, end);
+			dev_err(dev, "%s(%d, %d) RXFIFO bad parameters!\n",
+				__func__, start, end);
 		return;
 	}
 	/* set receive buffer start + end */
@@ -617,10 +606,12 @@
 
 static void nolock_txfifo_init(struct enc28j60_net *priv, u16 start, u16 end)
 {
+	struct device *dev = &priv->spi->dev;
+
 	if (start > 0x1FFF || end > 0x1FFF || start > end) {
 		if (netif_msg_drv(priv))
-			printk(KERN_ERR DRV_NAME ": %s(%d, %d) TXFIFO "
-				"bad parameters!\n", __func__, start, end);
+			dev_err(dev, "%s(%d, %d) TXFIFO bad parameters!\n",
+				__func__, start, end);
 		return;
 	}
 	/* set transmit buffer start + end */
@@ -630,14 +621,15 @@
 
 /*
  * Low power mode shrinks power consumption about 100x, so we'd like
- * the chip to be in that mode whenever it's inactive.  (However, we
- * can't stay in lowpower mode during suspend with WOL active.)
+ * the chip to be in that mode whenever it's inactive. (However, we
+ * can't stay in low power mode during suspend with WOL active.)
  */
 static void enc28j60_lowpower(struct enc28j60_net *priv, bool is_low)
 {
+	struct device *dev = &priv->spi->dev;
+
 	if (netif_msg_drv(priv))
-		dev_dbg(&priv->spi->dev, "%s power...\n",
-				is_low ? "low" : "high");
+		dev_dbg(dev, "%s power...\n", is_low ? "low" : "high");
 
 	mutex_lock(&priv->lock);
 	if (is_low) {
@@ -656,11 +648,12 @@
 
 static int enc28j60_hw_init(struct enc28j60_net *priv)
 {
+	struct device *dev = &priv->spi->dev;
 	u8 reg;
 
 	if (netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() - %s\n", __func__,
-			priv->full_duplex ? "FullDuplex" : "HalfDuplex");
+		dev_printk(KERN_DEBUG, dev, "%s() - %s\n", __func__,
+			   priv->full_duplex ? "FullDuplex" : "HalfDuplex");
 
 	mutex_lock(&priv->lock);
 	/* first reset the chip */
@@ -682,15 +675,15 @@
 	/*
 	 * Check the RevID.
 	 * If it's 0x00 or 0xFF probably the enc28j60 is not mounted or
-	 * damaged
+	 * damaged.
 	 */
 	reg = locked_regb_read(priv, EREVID);
 	if (netif_msg_drv(priv))
-		printk(KERN_INFO DRV_NAME ": chip RevID: 0x%02x\n", reg);
+		dev_info(dev, "chip RevID: 0x%02x\n", reg);
 	if (reg == 0x00 || reg == 0xff) {
 		if (netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME ": %s() Invalid RevId %d\n",
-				__func__, reg);
+			dev_printk(KERN_DEBUG, dev, "%s() Invalid RevId %d\n",
+				   __func__, reg);
 		return 0;
 	}
 
@@ -723,7 +716,7 @@
 	/*
 	 * MACLCON1 (default)
 	 * MACLCON2 (default)
-	 * Set the maximum packet size which the controller will accept
+	 * Set the maximum packet size which the controller will accept.
 	 */
 	locked_regw_write(priv, MAMXFLL, MAX_FRAMELEN);
 
@@ -750,10 +743,12 @@
 
 static void enc28j60_hw_enable(struct enc28j60_net *priv)
 {
+	struct device *dev = &priv->spi->dev;
+
 	/* enable interrupts */
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enabling interrupts.\n",
-			__func__);
+		dev_printk(KERN_DEBUG, dev, "%s() enabling interrupts.\n",
+			   __func__);
 
 	enc28j60_phy_write(priv, PHIE, PHIE_PGEIE | PHIE_PLNKIE);
 
@@ -772,7 +767,7 @@
 static void enc28j60_hw_disable(struct enc28j60_net *priv)
 {
 	mutex_lock(&priv->lock);
-	/* disable interrutps and packet reception */
+	/* disable interrupts and packet reception */
 	nolock_regb_write(priv, EIE, 0x00);
 	nolock_reg_bfclr(priv, ECON1, ECON1_RXEN);
 	priv->hw_enable = false;
@@ -793,14 +788,12 @@
 			priv->full_duplex = (duplex == DUPLEX_FULL);
 		else {
 			if (netif_msg_link(priv))
-				dev_warn(&ndev->dev,
-					"unsupported link setting\n");
+				netdev_warn(ndev, "unsupported link setting\n");
 			ret = -EOPNOTSUPP;
 		}
 	} else {
 		if (netif_msg_link(priv))
-			dev_warn(&ndev->dev, "Warning: hw must be disabled "
-				"to set link mode\n");
+			netdev_warn(ndev, "Warning: hw must be disabled to set link mode\n");
 		ret = -EBUSY;
 	}
 	return ret;
@@ -811,21 +804,23 @@
  */
 static void enc28j60_read_tsv(struct enc28j60_net *priv, u8 tsv[TSV_SIZE])
 {
+	struct device *dev = &priv->spi->dev;
 	int endptr;
 
 	endptr = locked_regw_read(priv, ETXNDL);
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME ": reading TSV at addr:0x%04x\n",
-			 endptr + 1);
+		dev_printk(KERN_DEBUG, dev, "reading TSV at addr:0x%04x\n",
+			   endptr + 1);
 	enc28j60_mem_read(priv, endptr + 1, TSV_SIZE, tsv);
 }
 
 static void enc28j60_dump_tsv(struct enc28j60_net *priv, const char *msg,
-				u8 tsv[TSV_SIZE])
+			      u8 tsv[TSV_SIZE])
 {
+	struct device *dev = &priv->spi->dev;
 	u16 tmp1, tmp2;
 
-	printk(KERN_DEBUG DRV_NAME ": %s - TSV:\n", msg);
+	dev_printk(KERN_DEBUG, dev, "%s - TSV:\n", msg);
 	tmp1 = tsv[1];
 	tmp1 <<= 8;
 	tmp1 |= tsv[0];
@@ -834,30 +829,32 @@
 	tmp2 <<= 8;
 	tmp2 |= tsv[4];
 
-	printk(KERN_DEBUG DRV_NAME ": ByteCount: %d, CollisionCount: %d,"
-		" TotByteOnWire: %d\n", tmp1, tsv[2] & 0x0f, tmp2);
-	printk(KERN_DEBUG DRV_NAME ": TxDone: %d, CRCErr:%d, LenChkErr: %d,"
-		" LenOutOfRange: %d\n", TSV_GETBIT(tsv, TSV_TXDONE),
-		TSV_GETBIT(tsv, TSV_TXCRCERROR),
-		TSV_GETBIT(tsv, TSV_TXLENCHKERROR),
-		TSV_GETBIT(tsv, TSV_TXLENOUTOFRANGE));
-	printk(KERN_DEBUG DRV_NAME ": Multicast: %d, Broadcast: %d, "
-		"PacketDefer: %d, ExDefer: %d\n",
-		TSV_GETBIT(tsv, TSV_TXMULTICAST),
-		TSV_GETBIT(tsv, TSV_TXBROADCAST),
-		TSV_GETBIT(tsv, TSV_TXPACKETDEFER),
-		TSV_GETBIT(tsv, TSV_TXEXDEFER));
-	printk(KERN_DEBUG DRV_NAME ": ExCollision: %d, LateCollision: %d, "
-		 "Giant: %d, Underrun: %d\n",
-		 TSV_GETBIT(tsv, TSV_TXEXCOLLISION),
-		 TSV_GETBIT(tsv, TSV_TXLATECOLLISION),
-		 TSV_GETBIT(tsv, TSV_TXGIANT), TSV_GETBIT(tsv, TSV_TXUNDERRUN));
-	printk(KERN_DEBUG DRV_NAME ": ControlFrame: %d, PauseFrame: %d, "
-		 "BackPressApp: %d, VLanTagFrame: %d\n",
-		 TSV_GETBIT(tsv, TSV_TXCONTROLFRAME),
-		 TSV_GETBIT(tsv, TSV_TXPAUSEFRAME),
-		 TSV_GETBIT(tsv, TSV_BACKPRESSUREAPP),
-		 TSV_GETBIT(tsv, TSV_TXVLANTAGFRAME));
+	dev_printk(KERN_DEBUG, dev,
+		   "ByteCount: %d, CollisionCount: %d, TotByteOnWire: %d\n",
+		   tmp1, tsv[2] & 0x0f, tmp2);
+	dev_printk(KERN_DEBUG, dev,
+		   "TxDone: %d, CRCErr:%d, LenChkErr: %d, LenOutOfRange: %d\n",
+		   TSV_GETBIT(tsv, TSV_TXDONE),
+		   TSV_GETBIT(tsv, TSV_TXCRCERROR),
+		   TSV_GETBIT(tsv, TSV_TXLENCHKERROR),
+		   TSV_GETBIT(tsv, TSV_TXLENOUTOFRANGE));
+	dev_printk(KERN_DEBUG, dev,
+		   "Multicast: %d, Broadcast: %d, PacketDefer: %d, ExDefer: %d\n",
+		   TSV_GETBIT(tsv, TSV_TXMULTICAST),
+		   TSV_GETBIT(tsv, TSV_TXBROADCAST),
+		   TSV_GETBIT(tsv, TSV_TXPACKETDEFER),
+		   TSV_GETBIT(tsv, TSV_TXEXDEFER));
+	dev_printk(KERN_DEBUG, dev,
+		   "ExCollision: %d, LateCollision: %d, Giant: %d, Underrun: %d\n",
+		   TSV_GETBIT(tsv, TSV_TXEXCOLLISION),
+		   TSV_GETBIT(tsv, TSV_TXLATECOLLISION),
+		   TSV_GETBIT(tsv, TSV_TXGIANT), TSV_GETBIT(tsv, TSV_TXUNDERRUN));
+	dev_printk(KERN_DEBUG, dev,
+		   "ControlFrame: %d, PauseFrame: %d, BackPressApp: %d, VLanTagFrame: %d\n",
+		   TSV_GETBIT(tsv, TSV_TXCONTROLFRAME),
+		   TSV_GETBIT(tsv, TSV_TXPAUSEFRAME),
+		   TSV_GETBIT(tsv, TSV_BACKPRESSUREAPP),
+		   TSV_GETBIT(tsv, TSV_TXVLANTAGFRAME));
 }
 
 /*
@@ -866,27 +863,29 @@
 static void enc28j60_dump_rsv(struct enc28j60_net *priv, const char *msg,
 			      u16 pk_ptr, int len, u16 sts)
 {
-	printk(KERN_DEBUG DRV_NAME ": %s - NextPk: 0x%04x - RSV:\n",
-		msg, pk_ptr);
-	printk(KERN_DEBUG DRV_NAME ": ByteCount: %d, DribbleNibble: %d\n", len,
-		 RSV_GETBIT(sts, RSV_DRIBBLENIBBLE));
-	printk(KERN_DEBUG DRV_NAME ": RxOK: %d, CRCErr:%d, LenChkErr: %d,"
-		 " LenOutOfRange: %d\n", RSV_GETBIT(sts, RSV_RXOK),
-		 RSV_GETBIT(sts, RSV_CRCERROR),
-		 RSV_GETBIT(sts, RSV_LENCHECKERR),
-		 RSV_GETBIT(sts, RSV_LENOUTOFRANGE));
-	printk(KERN_DEBUG DRV_NAME ": Multicast: %d, Broadcast: %d, "
-		 "LongDropEvent: %d, CarrierEvent: %d\n",
-		 RSV_GETBIT(sts, RSV_RXMULTICAST),
-		 RSV_GETBIT(sts, RSV_RXBROADCAST),
-		 RSV_GETBIT(sts, RSV_RXLONGEVDROPEV),
-		 RSV_GETBIT(sts, RSV_CARRIEREV));
-	printk(KERN_DEBUG DRV_NAME ": ControlFrame: %d, PauseFrame: %d,"
-		 " UnknownOp: %d, VLanTagFrame: %d\n",
-		 RSV_GETBIT(sts, RSV_RXCONTROLFRAME),
-		 RSV_GETBIT(sts, RSV_RXPAUSEFRAME),
-		 RSV_GETBIT(sts, RSV_RXUNKNOWNOPCODE),
-		 RSV_GETBIT(sts, RSV_RXTYPEVLAN));
+	struct device *dev = &priv->spi->dev;
+
+	dev_printk(KERN_DEBUG, dev, "%s - NextPk: 0x%04x - RSV:\n", msg, pk_ptr);
+	dev_printk(KERN_DEBUG, dev, "ByteCount: %d, DribbleNibble: %d\n",
+		   len, RSV_GETBIT(sts, RSV_DRIBBLENIBBLE));
+	dev_printk(KERN_DEBUG, dev,
+		   "RxOK: %d, CRCErr:%d, LenChkErr: %d, LenOutOfRange: %d\n",
+		   RSV_GETBIT(sts, RSV_RXOK),
+		   RSV_GETBIT(sts, RSV_CRCERROR),
+		   RSV_GETBIT(sts, RSV_LENCHECKERR),
+		   RSV_GETBIT(sts, RSV_LENOUTOFRANGE));
+	dev_printk(KERN_DEBUG, dev,
+		   "Multicast: %d, Broadcast: %d, LongDropEvent: %d, CarrierEvent: %d\n",
+		   RSV_GETBIT(sts, RSV_RXMULTICAST),
+		   RSV_GETBIT(sts, RSV_RXBROADCAST),
+		   RSV_GETBIT(sts, RSV_RXLONGEVDROPEV),
+		   RSV_GETBIT(sts, RSV_CARRIEREV));
+	dev_printk(KERN_DEBUG, dev,
+		   "ControlFrame: %d, PauseFrame: %d, UnknownOp: %d, VLanTagFrame: %d\n",
+		   RSV_GETBIT(sts, RSV_RXCONTROLFRAME),
+		   RSV_GETBIT(sts, RSV_RXPAUSEFRAME),
+		   RSV_GETBIT(sts, RSV_RXUNKNOWNOPCODE),
+		   RSV_GETBIT(sts, RSV_RXTYPEVLAN));
 }
 
 static void dump_packet(const char *msg, int len, const char *data)
@@ -904,20 +903,20 @@
 static void enc28j60_hw_rx(struct net_device *ndev)
 {
 	struct enc28j60_net *priv = netdev_priv(ndev);
+	struct device *dev = &priv->spi->dev;
 	struct sk_buff *skb = NULL;
 	u16 erxrdpt, next_packet, rxstat;
 	u8 rsv[RSV_SIZE];
 	int len;
 
 	if (netif_msg_rx_status(priv))
-		printk(KERN_DEBUG DRV_NAME ": RX pk_addr:0x%04x\n",
-			priv->next_pk_ptr);
+		netdev_printk(KERN_DEBUG, ndev, "RX pk_addr:0x%04x\n",
+			      priv->next_pk_ptr);
 
 	if (unlikely(priv->next_pk_ptr > RXEND_INIT)) {
 		if (netif_msg_rx_err(priv))
-			dev_err(&ndev->dev,
-				"%s() Invalid packet address!! 0x%04x\n",
-				__func__, priv->next_pk_ptr);
+			netdev_err(ndev, "%s() Invalid packet address!! 0x%04x\n",
+				   __func__, priv->next_pk_ptr);
 		/* packet address corrupted: reset RX logic */
 		mutex_lock(&priv->lock);
 		nolock_reg_bfclr(priv, ECON1, ECON1_RXEN);
@@ -950,7 +949,7 @@
 
 	if (!RSV_GETBIT(rxstat, RSV_RXOK) || len > MAX_FRAMELEN) {
 		if (netif_msg_rx_err(priv))
-			dev_err(&ndev->dev, "Rx Error (%04x)\n", rxstat);
+			netdev_err(ndev, "Rx Error (%04x)\n", rxstat);
 		ndev->stats.rx_errors++;
 		if (RSV_GETBIT(rxstat, RSV_CRCERROR))
 			ndev->stats.rx_crc_errors++;
@@ -962,8 +961,7 @@
 		skb = netdev_alloc_skb(ndev, len + NET_IP_ALIGN);
 		if (!skb) {
 			if (netif_msg_rx_err(priv))
-				dev_err(&ndev->dev,
-					"out of memory for Rx'd frame\n");
+				netdev_err(ndev, "out of memory for Rx'd frame\n");
 			ndev->stats.rx_dropped++;
 		} else {
 			skb_reserve(skb, NET_IP_ALIGN);
@@ -983,12 +981,12 @@
 	/*
 	 * Move the RX read pointer to the start of the next
 	 * received packet.
-	 * This frees the memory we just read out
+	 * This frees the memory we just read out.
 	 */
 	erxrdpt = erxrdpt_workaround(next_packet, RXSTART_INIT, RXEND_INIT);
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() ERXRDPT:0x%04x\n",
-			__func__, erxrdpt);
+		dev_printk(KERN_DEBUG, dev, "%s() ERXRDPT:0x%04x\n",
+			   __func__, erxrdpt);
 
 	mutex_lock(&priv->lock);
 	nolock_regw_write(priv, ERXRDPTL, erxrdpt);
@@ -997,9 +995,9 @@
 		u16 reg;
 		reg = nolock_regw_read(priv, ERXRDPTL);
 		if (reg != erxrdpt)
-			printk(KERN_DEBUG DRV_NAME ": %s() ERXRDPT verify "
-				"error (0x%04x - 0x%04x)\n", __func__,
-				reg, erxrdpt);
+			dev_printk(KERN_DEBUG, dev,
+				   "%s() ERXRDPT verify error (0x%04x - 0x%04x)\n",
+				   __func__, reg, erxrdpt);
 	}
 #endif
 	priv->next_pk_ptr = next_packet;
@@ -1013,6 +1011,7 @@
  */
 static int enc28j60_get_free_rxfifo(struct enc28j60_net *priv)
 {
+	struct net_device *ndev = priv->netdev;
 	int epkcnt, erxst, erxnd, erxwr, erxrd;
 	int free_space;
 
@@ -1035,8 +1034,8 @@
 	}
 	mutex_unlock(&priv->lock);
 	if (netif_msg_rx_status(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() free_space = %d\n",
-			__func__, free_space);
+		netdev_printk(KERN_DEBUG, ndev, "%s() free_space = %d\n",
+			      __func__, free_space);
 	return free_space;
 }
 
@@ -1046,24 +1045,25 @@
 static void enc28j60_check_link_status(struct net_device *ndev)
 {
 	struct enc28j60_net *priv = netdev_priv(ndev);
+	struct device *dev = &priv->spi->dev;
 	u16 reg;
 	int duplex;
 
 	reg = enc28j60_phy_read(priv, PHSTAT2);
 	if (netif_msg_hw(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() PHSTAT1: %04x, "
-			"PHSTAT2: %04x\n", __func__,
-			enc28j60_phy_read(priv, PHSTAT1), reg);
+		dev_printk(KERN_DEBUG, dev,
+			   "%s() PHSTAT1: %04x, PHSTAT2: %04x\n", __func__,
+			   enc28j60_phy_read(priv, PHSTAT1), reg);
 	duplex = reg & PHSTAT2_DPXSTAT;
 
 	if (reg & PHSTAT2_LSTAT) {
 		netif_carrier_on(ndev);
 		if (netif_msg_ifup(priv))
-			dev_info(&ndev->dev, "link up - %s\n",
-				duplex ? "Full duplex" : "Half duplex");
+			netdev_info(ndev, "link up - %s\n",
+				    duplex ? "Full duplex" : "Half duplex");
 	} else {
 		if (netif_msg_ifdown(priv))
-			dev_info(&ndev->dev, "link down\n");
+			netdev_info(ndev, "link down\n");
 		netif_carrier_off(ndev);
 	}
 }
@@ -1089,8 +1089,8 @@
 
 /*
  * RX handler
- * ignore PKTIF because is unreliable! (look at the errata datasheet)
- * check EPKTCNT is the suggested workaround.
+ * Ignore PKTIF because is unreliable! (Look at the errata datasheet)
+ * Check EPKTCNT is the suggested workaround.
  * We don't need to clear interrupt flag, automatically done when
  * enc28j60_hw_rx() decrements the packet counter.
  * Returns how many packet processed.
@@ -1102,13 +1102,14 @@
 
 	pk_counter = locked_regb_read(priv, EPKTCNT);
 	if (pk_counter && netif_msg_intr(priv))
-		printk(KERN_DEBUG DRV_NAME ": intRX, pk_cnt: %d\n", pk_counter);
+		netdev_printk(KERN_DEBUG, ndev, "intRX, pk_cnt: %d\n",
+			      pk_counter);
 	if (pk_counter > priv->max_pk_counter) {
 		/* update statistics */
 		priv->max_pk_counter = pk_counter;
 		if (netif_msg_rx_status(priv) && priv->max_pk_counter > 1)
-			printk(KERN_DEBUG DRV_NAME ": RX max_pk_cnt: %d\n",
-				priv->max_pk_counter);
+			netdev_printk(KERN_DEBUG, ndev, "RX max_pk_cnt: %d\n",
+				      priv->max_pk_counter);
 	}
 	ret = pk_counter;
 	while (pk_counter-- > 0)
@@ -1124,8 +1125,6 @@
 	struct net_device *ndev = priv->netdev;
 	int intflags, loop;
 
-	if (netif_msg_intr(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__);
 	/* disable further interrupts */
 	locked_reg_bfclr(priv, EIE, EIE_INTIE);
 
@@ -1136,16 +1135,16 @@
 		if ((intflags & EIR_DMAIF) != 0) {
 			loop++;
 			if (netif_msg_intr(priv))
-				printk(KERN_DEBUG DRV_NAME
-					": intDMA(%d)\n", loop);
+				netdev_printk(KERN_DEBUG, ndev, "intDMA(%d)\n",
+					      loop);
 			locked_reg_bfclr(priv, EIR, EIR_DMAIF);
 		}
 		/* LINK changed handler */
 		if ((intflags & EIR_LINKIF) != 0) {
 			loop++;
 			if (netif_msg_intr(priv))
-				printk(KERN_DEBUG DRV_NAME
-					": intLINK(%d)\n", loop);
+				netdev_printk(KERN_DEBUG, ndev, "intLINK(%d)\n",
+					      loop);
 			enc28j60_check_link_status(ndev);
 			/* read PHIR to clear the flag */
 			enc28j60_phy_read(priv, PHIR);
@@ -1156,13 +1155,12 @@
 			bool err = false;
 			loop++;
 			if (netif_msg_intr(priv))
-				printk(KERN_DEBUG DRV_NAME
-					": intTX(%d)\n", loop);
+				netdev_printk(KERN_DEBUG, ndev, "intTX(%d)\n",
+					      loop);
 			priv->tx_retry_count = 0;
 			if (locked_regb_read(priv, ESTAT) & ESTAT_TXABRT) {
 				if (netif_msg_tx_err(priv))
-					dev_err(&ndev->dev,
-						"Tx Error (aborted)\n");
+					netdev_err(ndev, "Tx Error (aborted)\n");
 				err = true;
 			}
 			if (netif_msg_tx_done(priv)) {
@@ -1179,8 +1177,8 @@
 
 			loop++;
 			if (netif_msg_intr(priv))
-				printk(KERN_DEBUG DRV_NAME
-					": intTXErr(%d)\n", loop);
+				netdev_printk(KERN_DEBUG, ndev, "intTXErr(%d)\n",
+					      loop);
 			locked_reg_bfclr(priv, ECON1, ECON1_TXRTS);
 			enc28j60_read_tsv(priv, tsv);
 			if (netif_msg_tx_err(priv))
@@ -1194,9 +1192,9 @@
 			/* Transmit Late collision check for retransmit */
 			if (TSV_GETBIT(tsv, TSV_TXLATECOLLISION)) {
 				if (netif_msg_tx_err(priv))
-					printk(KERN_DEBUG DRV_NAME
-						": LateCollision TXErr (%d)\n",
-						priv->tx_retry_count);
+					netdev_printk(KERN_DEBUG, ndev,
+						      "LateCollision TXErr (%d)\n",
+						      priv->tx_retry_count);
 				if (priv->tx_retry_count++ < MAX_TX_RETRYCOUNT)
 					locked_reg_bfset(priv, ECON1,
 							   ECON1_TXRTS);
@@ -1210,13 +1208,12 @@
 		if ((intflags & EIR_RXERIF) != 0) {
 			loop++;
 			if (netif_msg_intr(priv))
-				printk(KERN_DEBUG DRV_NAME
-					": intRXErr(%d)\n", loop);
+				netdev_printk(KERN_DEBUG, ndev, "intRXErr(%d)\n",
+					      loop);
 			/* Check free FIFO space to flag RX overrun */
 			if (enc28j60_get_free_rxfifo(priv) <= 0) {
 				if (netif_msg_rx_err(priv))
-					printk(KERN_DEBUG DRV_NAME
-						": RX Overrun\n");
+					netdev_printk(KERN_DEBUG, ndev, "RX Overrun\n");
 				ndev->stats.rx_dropped++;
 			}
 			locked_reg_bfclr(priv, EIR, EIR_RXERIF);
@@ -1228,8 +1225,6 @@
 
 	/* re-enable interrupts */
 	locked_reg_bfset(priv, EIE, EIE_INTIE);
-	if (netif_msg_intr(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() exit\n", __func__);
 }
 
 /*
@@ -1239,11 +1234,13 @@
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+	struct net_device *ndev = priv->netdev;
+
 	BUG_ON(!priv->tx_skb);
 
 	if (netif_msg_tx_queued(priv))
-		printk(KERN_DEBUG DRV_NAME
-			": Tx Packet Len:%d\n", priv->tx_skb->len);
+		netdev_printk(KERN_DEBUG, ndev, "Tx Packet Len:%d\n",
+			      priv->tx_skb->len);
 
 	if (netif_msg_pktdata(priv))
 		dump_packet(__func__,
@@ -1253,6 +1250,7 @@
 #ifdef CONFIG_ENC28J60_WRITEVERIFY
 	/* readback and verify written data */
 	if (netif_msg_drv(priv)) {
+		struct device *dev = &priv->spi->dev;
 		int test_len, k;
 		u8 test_buf[64]; /* limit the test to the first 64 bytes */
 		int okflag;
@@ -1266,16 +1264,14 @@
 		okflag = 1;
 		for (k = 0; k < test_len; k++) {
 			if (priv->tx_skb->data[k] != test_buf[k]) {
-				printk(KERN_DEBUG DRV_NAME
-					 ": Error, %d location differ: "
-					 "0x%02x-0x%02x\n", k,
-					 priv->tx_skb->data[k], test_buf[k]);
+				dev_printk(KERN_DEBUG, dev,
+					   "Error, %d location differ: 0x%02x-0x%02x\n",
+					   k, priv->tx_skb->data[k], test_buf[k]);
 				okflag = 0;
 			}
 		}
 		if (!okflag)
-			printk(KERN_DEBUG DRV_NAME ": Tx write buffer, "
-				"verify ERROR!\n");
+			dev_printk(KERN_DEBUG, dev, "Tx write buffer, verify ERROR!\n");
 	}
 #endif
 	/* set TX request flag */
@@ -1287,14 +1283,11 @@
 {
 	struct enc28j60_net *priv = netdev_priv(dev);
 
-	if (netif_msg_tx_queued(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__);
-
 	/* If some error occurs while trying to transmit this
 	 * packet, you should return '1' from this function.
 	 * In such a case you _may not_ do anything to the
 	 * SKB, it is still owned by the network queueing
-	 * layer when an error is returned.  This means you
+	 * layer when an error is returned. This means you
 	 * may not modify any SKB fields, you may not free
 	 * the SKB, etc.
 	 */
@@ -1337,7 +1330,7 @@
 	struct enc28j60_net *priv = netdev_priv(ndev);
 
 	if (netif_msg_timer(priv))
-		dev_err(&ndev->dev, DRV_NAME " tx timeout\n");
+		netdev_err(ndev, "tx timeout\n");
 
 	ndev->stats.tx_errors++;
 	/* can't restart safely under softirq */
@@ -1356,13 +1349,9 @@
 {
 	struct enc28j60_net *priv = netdev_priv(dev);
 
-	if (netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__);
-
 	if (!is_valid_ether_addr(dev->dev_addr)) {
 		if (netif_msg_ifup(priv))
-			dev_err(&dev->dev, "invalid MAC address %pM\n",
-				dev->dev_addr);
+			netdev_err(dev, "invalid MAC address %pM\n", dev->dev_addr);
 		return -EADDRNOTAVAIL;
 	}
 	/* Reset the hardware here (and take it out of low power mode) */
@@ -1370,7 +1359,7 @@
 	enc28j60_hw_disable(priv);
 	if (!enc28j60_hw_init(priv)) {
 		if (netif_msg_ifup(priv))
-			dev_err(&dev->dev, "hw_reset() failed\n");
+			netdev_err(dev, "hw_reset() failed\n");
 		return -EINVAL;
 	}
 	/* Update the MAC address (in case user has changed it) */
@@ -1392,9 +1381,6 @@
 {
 	struct enc28j60_net *priv = netdev_priv(dev);
 
-	if (netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__);
-
 	enc28j60_hw_disable(priv);
 	enc28j60_lowpower(priv, true);
 	netif_stop_queue(dev);
@@ -1415,16 +1401,16 @@
 
 	if (dev->flags & IFF_PROMISC) {
 		if (netif_msg_link(priv))
-			dev_info(&dev->dev, "promiscuous mode\n");
+			netdev_info(dev, "promiscuous mode\n");
 		priv->rxfilter = RXFILTER_PROMISC;
 	} else if ((dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev)) {
 		if (netif_msg_link(priv))
-			dev_info(&dev->dev, "%smulticast mode\n",
-				(dev->flags & IFF_ALLMULTI) ? "all-" : "");
+			netdev_info(dev, "%smulticast mode\n",
+				    (dev->flags & IFF_ALLMULTI) ? "all-" : "");
 		priv->rxfilter = RXFILTER_MULTI;
 	} else {
 		if (netif_msg_link(priv))
-			dev_info(&dev->dev, "normal mode\n");
+			netdev_info(dev, "normal mode\n");
 		priv->rxfilter = RXFILTER_NORMAL;
 	}
 
@@ -1436,20 +1422,21 @@
 {
 	struct enc28j60_net *priv =
 		container_of(work, struct enc28j60_net, setrx_work);
+	struct device *dev = &priv->spi->dev;
 
 	if (priv->rxfilter == RXFILTER_PROMISC) {
 		if (netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME ": promiscuous mode\n");
+			dev_printk(KERN_DEBUG, dev, "promiscuous mode\n");
 		locked_regb_write(priv, ERXFCON, 0x00);
 	} else if (priv->rxfilter == RXFILTER_MULTI) {
 		if (netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME ": multicast mode\n");
+			dev_printk(KERN_DEBUG, dev, "multicast mode\n");
 		locked_regb_write(priv, ERXFCON,
 					ERXFCON_UCEN | ERXFCON_CRCEN |
 					ERXFCON_BCEN | ERXFCON_MCEN);
 	} else {
 		if (netif_msg_drv(priv))
-			printk(KERN_DEBUG DRV_NAME ": normal mode\n");
+			dev_printk(KERN_DEBUG, dev, "normal mode\n");
 		locked_regb_write(priv, ERXFCON,
 					ERXFCON_UCEN | ERXFCON_CRCEN |
 					ERXFCON_BCEN);
@@ -1468,7 +1455,7 @@
 		enc28j60_net_close(ndev);
 		ret = enc28j60_net_open(ndev);
 		if (unlikely(ret)) {
-			dev_info(&ndev->dev, " could not restart %d\n", ret);
+			netdev_info(ndev, "could not restart %d\n", ret);
 			dev_close(ndev);
 		}
 	}
@@ -1552,14 +1539,13 @@
 
 static int enc28j60_probe(struct spi_device *spi)
 {
+	unsigned char macaddr[ETH_ALEN];
 	struct net_device *dev;
 	struct enc28j60_net *priv;
-	const void *macaddr;
 	int ret = 0;
 
 	if (netif_msg_drv(&debug))
-		dev_info(&spi->dev, DRV_NAME " Ethernet driver %s loaded\n",
-			DRV_VERSION);
+		dev_info(&spi->dev, "Ethernet driver %s loaded\n", DRV_VERSION);
 
 	dev = alloc_etherdev(sizeof(struct enc28j60_net));
 	if (!dev) {
@@ -1570,8 +1556,7 @@
 
 	priv->netdev = dev;	/* priv to netdev reference */
 	priv->spi = spi;	/* priv to spi reference */
-	priv->msg_enable = netif_msg_init(debug.msg_enable,
-						ENC28J60_MSG_DEFAULT);
+	priv->msg_enable = netif_msg_init(debug.msg_enable, ENC28J60_MSG_DEFAULT);
 	mutex_init(&priv->lock);
 	INIT_WORK(&priv->tx_work, enc28j60_tx_work_handler);
 	INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler);
@@ -1582,13 +1567,12 @@
 
 	if (!enc28j60_chipset_init(dev)) {
 		if (netif_msg_probe(priv))
-			dev_info(&spi->dev, DRV_NAME " chip not found\n");
+			dev_info(&spi->dev, "chip not found\n");
 		ret = -EIO;
 		goto error_irq;
 	}
 
-	macaddr = of_get_mac_address(spi->dev.of_node);
-	if (macaddr)
+	if (device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
 		ether_addr_copy(dev->dev_addr, macaddr);
 	else
 		eth_hw_addr_random(dev);
@@ -1600,8 +1584,8 @@
 	ret = request_irq(spi->irq, enc28j60_irq, 0, DRV_NAME, priv);
 	if (ret < 0) {
 		if (netif_msg_probe(priv))
-			dev_err(&spi->dev, DRV_NAME ": request irq %d failed "
-				"(ret = %d)\n", spi->irq, ret);
+			dev_err(&spi->dev, "request irq %d failed (ret = %d)\n",
+				spi->irq, ret);
 		goto error_irq;
 	}
 
@@ -1616,11 +1600,10 @@
 	ret = register_netdev(dev);
 	if (ret) {
 		if (netif_msg_probe(priv))
-			dev_err(&spi->dev, "register netdev " DRV_NAME
-				" failed (ret = %d)\n", ret);
+			dev_err(&spi->dev, "register netdev failed (ret = %d)\n",
+				ret);
 		goto error_register;
 	}
-	dev_info(&dev->dev, DRV_NAME " driver registered\n");
 
 	return 0;
 
@@ -1636,9 +1619,6 @@
 {
 	struct enc28j60_net *priv = spi_get_drvdata(spi);
 
-	if (netif_msg_drv(priv))
-		printk(KERN_DEBUG DRV_NAME ": remove\n");
-
 	unregister_netdev(priv->netdev);
 	free_irq(spi->irq, priv);
 	free_netdev(priv->netdev);
@@ -1660,26 +1640,11 @@
 	.probe = enc28j60_probe,
 	.remove = enc28j60_remove,
 };
-
-static int __init enc28j60_init(void)
-{
-	msec20_to_jiffies = msecs_to_jiffies(20);
-
-	return spi_register_driver(&enc28j60_driver);
-}
-
-module_init(enc28j60_init);
-
-static void __exit enc28j60_exit(void)
-{
-	spi_unregister_driver(&enc28j60_driver);
-}
-
-module_exit(enc28j60_exit);
+module_spi_driver(enc28j60_driver);
 
 MODULE_DESCRIPTION(DRV_NAME " ethernet driver");
 MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
 MODULE_LICENSE("GPL");
 module_param_named(debug, debug.msg_enable, int, 0);
-MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)");
+MODULE_PARM_DESC(debug, "Debug verbosity level in amount of bits set (0=none, ..., 31=all)");
 MODULE_ALIAS("spi:" DRV_NAME);

diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 44bb04d..1f496fa 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /**
  * Register map access API - ENCX24J600 support
  *
  * Copyright 2015 Gridpoint
  *
  * Author: Jon Ringle <jringle@gridpoint.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index f831238..52c41d1 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c

@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /**
  * Microchip ENCX24J600 ethernet driver
  *
  * Copyright (C) 2015 Gridpoint
  * Author: Jon Ringle <jringle@gridpoint.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  */
 
 #include <linux/device.h>

diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index 07c1eb6..3a0b289 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c

@@ -14,61 +14,138 @@
 #define EEPROM_INDICATOR_1		    (0xA5)
 #define EEPROM_INDICATOR_2		    (0xAA)
 #define EEPROM_MAC_OFFSET		    (0x01)
-#define MAX_EEPROM_SIZE			    512
+#define MAX_EEPROM_SIZE			    (512)
+#define MAX_OTP_SIZE			    (1024)
 #define OTP_INDICATOR_1			    (0xF3)
 #define OTP_INDICATOR_2			    (0xF7)
 
+static int lan743x_otp_power_up(struct lan743x_adapter *adapter)
+{
+	u32 reg_value;
+
+	reg_value = lan743x_csr_read(adapter, OTP_PWR_DN);
+
+	if (reg_value & OTP_PWR_DN_PWRDN_N_) {
+		/* clear it and wait to be cleared */
+		reg_value &= ~OTP_PWR_DN_PWRDN_N_;
+		lan743x_csr_write(adapter, OTP_PWR_DN, reg_value);
+
+		usleep_range(100, 20000);
+	}
+
+	return 0;
+}
+
+static void lan743x_otp_power_down(struct lan743x_adapter *adapter)
+{
+	u32 reg_value;
+
+	reg_value = lan743x_csr_read(adapter, OTP_PWR_DN);
+	if (!(reg_value & OTP_PWR_DN_PWRDN_N_)) {
+		/* set power down bit */
+		reg_value |= OTP_PWR_DN_PWRDN_N_;
+		lan743x_csr_write(adapter, OTP_PWR_DN, reg_value);
+	}
+}
+
+static void lan743x_otp_set_address(struct lan743x_adapter *adapter,
+				    u32 address)
+{
+	lan743x_csr_write(adapter, OTP_ADDR_HIGH, (address >> 8) & 0x03);
+	lan743x_csr_write(adapter, OTP_ADDR_LOW, address & 0xFF);
+}
+
+static void lan743x_otp_read_go(struct lan743x_adapter *adapter)
+{
+	lan743x_csr_write(adapter, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
+	lan743x_csr_write(adapter, OTP_CMD_GO, OTP_CMD_GO_GO_);
+}
+
+static int lan743x_otp_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+	unsigned long timeout;
+	u32 reg_val;
+
+	timeout = jiffies + HZ;
+	do {
+		if (time_after(jiffies, timeout)) {
+			netif_warn(adapter, drv, adapter->netdev,
+				   "Timeout on OTP_STATUS completion\n");
+			return -EIO;
+		}
+		udelay(1);
+		reg_val = lan743x_csr_read(adapter, OTP_STATUS);
+	} while (reg_val & OTP_STATUS_BUSY_);
+
+	return 0;
+}
+
+static int lan743x_otp_read(struct lan743x_adapter *adapter, u32 offset,
+			    u32 length, u8 *data)
+{
+	int ret;
+	int i;
+
+	if (offset + length > MAX_OTP_SIZE)
+		return -EINVAL;
+
+	ret = lan743x_otp_power_up(adapter);
+	if (ret < 0)
+		return ret;
+
+	ret = lan743x_otp_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < length; i++) {
+		lan743x_otp_set_address(adapter, offset + i);
+
+		lan743x_otp_read_go(adapter);
+		ret = lan743x_otp_wait_till_not_busy(adapter);
+		if (ret < 0)
+			return ret;
+		data[i] = lan743x_csr_read(adapter, OTP_READ_DATA);
+	}
+
+	lan743x_otp_power_down(adapter);
+
+	return 0;
+}
+
 static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset,
 			     u32 length, u8 *data)
 {
-	unsigned long timeout;
-	u32 buf;
+	int ret;
 	int i;
 
-	buf = lan743x_csr_read(adapter, OTP_PWR_DN);
+	if (offset + length > MAX_OTP_SIZE)
+		return -EINVAL;
 
-	if (buf & OTP_PWR_DN_PWRDN_N_) {
-		/* clear it and wait to be cleared */
-		lan743x_csr_write(adapter, OTP_PWR_DN, 0);
+	ret = lan743x_otp_power_up(adapter);
+	if (ret < 0)
+		return ret;
 
-		timeout = jiffies + HZ;
-		do {
-			udelay(1);
-			buf = lan743x_csr_read(adapter, OTP_PWR_DN);
-			if (time_after(jiffies, timeout)) {
-				netif_warn(adapter, drv, adapter->netdev,
-					   "timeout on OTP_PWR_DN completion\n");
-				return -EIO;
-			}
-		} while (buf & OTP_PWR_DN_PWRDN_N_);
-	}
+	ret = lan743x_otp_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
 
 	/* set to BYTE program mode */
 	lan743x_csr_write(adapter, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
 
 	for (i = 0; i < length; i++) {
-		lan743x_csr_write(adapter, OTP_ADDR1,
-				  ((offset + i) >> 8) &
-				  OTP_ADDR1_15_11_MASK_);
-		lan743x_csr_write(adapter, OTP_ADDR2,
-				  ((offset + i) &
-				  OTP_ADDR2_10_3_MASK_));
+		lan743x_otp_set_address(adapter, offset + i);
+
 		lan743x_csr_write(adapter, OTP_PRGM_DATA, data[i]);
 		lan743x_csr_write(adapter, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
 		lan743x_csr_write(adapter, OTP_CMD_GO, OTP_CMD_GO_GO_);
 
-		timeout = jiffies + HZ;
-		do {
-			udelay(1);
-			buf = lan743x_csr_read(adapter, OTP_STATUS);
-			if (time_after(jiffies, timeout)) {
-				netif_warn(adapter, drv, adapter->netdev,
-					   "Timeout on OTP_STATUS completion\n");
-				return -EIO;
-			}
-		} while (buf & OTP_STATUS_BUSY_);
+		ret = lan743x_otp_wait_till_not_busy(adapter);
+		if (ret < 0)
+			return ret;
 	}
 
+	lan743x_otp_power_down(adapter);
+
 	return 0;
 }
 
@@ -120,6 +197,9 @@
 	u32 val;
 	int i;
 
+	if (offset + length > MAX_EEPROM_SIZE)
+		return -EINVAL;
+
 	retval = lan743x_eeprom_confirm_not_busy(adapter);
 	if (retval)
 		return retval;
@@ -148,6 +228,9 @@
 	u32 val;
 	int i;
 
+	if (offset + length > MAX_EEPROM_SIZE)
+		return -EINVAL;
+
 	retval = lan743x_eeprom_confirm_not_busy(adapter);
 	if (retval)
 		return retval;
@@ -207,6 +290,11 @@
 
 static int lan743x_ethtool_get_eeprom_len(struct net_device *netdev)
 {
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP)
+		return MAX_OTP_SIZE;
+
 	return MAX_EEPROM_SIZE;
 }
 
@@ -214,8 +302,14 @@
 				      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
 
-	return lan743x_eeprom_read(adapter, ee->offset, ee->len, data);
+	if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP)
+		ret = lan743x_otp_read(adapter, ee->offset, ee->len, data);
+	else
+		ret = lan743x_eeprom_read(adapter, ee->offset, ee->len, data);
+
+	return ret;
 }
 
 static int lan743x_ethtool_set_eeprom(struct net_device *netdev,
@@ -224,17 +318,18 @@
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
 	int ret = -EINVAL;
 
-	if (ee->magic == LAN743X_EEPROM_MAGIC)
-		ret = lan743x_eeprom_write(adapter, ee->offset, ee->len,
-					   data);
-	/* Beware!  OTP is One Time Programming ONLY!
-	 * So do some strict condition check before messing up
-	 */
-	else if ((ee->magic == LAN743X_OTP_MAGIC) &&
-		 (ee->offset == 0) &&
-		 (ee->len == MAX_EEPROM_SIZE) &&
-		 (data[0] == OTP_INDICATOR_1))
-		ret = lan743x_otp_write(adapter, ee->offset, ee->len, data);
+	if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) {
+		/* Beware!  OTP is One Time Programming ONLY! */
+		if (ee->magic == LAN743X_OTP_MAGIC) {
+			ret = lan743x_otp_write(adapter, ee->offset,
+						ee->len, data);
+		}
+	} else {
+		if (ee->magic == LAN743X_EEPROM_MAGIC) {
+			ret = lan743x_eeprom_write(adapter, ee->offset,
+						   ee->len, data);
+		}
+	}
 
 	return ret;
 }
@@ -360,6 +455,10 @@
 	STAT_TX_COUNTER_ROLLOVER_STATUS
 };
 
+static const char lan743x_priv_flags_strings[][ETH_GSTRING_LEN] = {
+	"OTP_ACCESS",
+};
+
 static void lan743x_ethtool_get_strings(struct net_device *netdev,
 					u32 stringset, u8 *data)
 {
@@ -375,6 +474,10 @@
 		       lan743x_set2_hw_cnt_strings,
 		       sizeof(lan743x_set2_hw_cnt_strings));
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, lan743x_priv_flags_strings,
+		       sizeof(lan743x_priv_flags_strings));
+		break;
 	}
 }
 
@@ -399,6 +502,22 @@
 	}
 }
 
+static u32 lan743x_ethtool_get_priv_flags(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->flags;
+}
+
+static int lan743x_ethtool_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	adapter->flags = flags;
+
+	return 0;
+}
+
 static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
@@ -411,6 +530,8 @@
 		ret += ARRAY_SIZE(lan743x_set2_hw_cnt_strings);
 		return ret;
 	}
+	case ETH_SS_PRIV_FLAGS:
+		return ARRAY_SIZE(lan743x_priv_flags_strings);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -705,6 +826,8 @@
 	.set_eeprom = lan743x_ethtool_set_eeprom,
 	.get_strings = lan743x_ethtool_get_strings,
 	.get_ethtool_stats = lan743x_ethtool_get_ethtool_stats,
+	.get_priv_flags = lan743x_ethtool_get_priv_flags,
+	.set_priv_flags = lan743x_ethtool_set_priv_flags,
 	.get_sset_count = lan743x_ethtool_get_sset_count,
 	.get_rxnfc = lan743x_ethtool_get_rxnfc,
 	.get_rxfh_key_size = lan743x_ethtool_get_rxfh_key_size,

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index aaedf10..a43140f 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c

@@ -585,8 +585,7 @@
 
 		if (adapter->csr.flags &
 		   LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
-			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
-				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
 				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
 				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
 				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
@@ -599,12 +598,6 @@
 			/* map TX interrupt to vector */
 			int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector);
 			lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1);
-			if (flags &
-			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
-				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
-				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
-						  int_vec_en_auto_clr);
-			}
 
 			/* Remove TX interrupt from shared mask */
 			intr->vector_list[0].int_mask &= ~int_bit;
@@ -802,14 +795,8 @@
 	u32 mac_addr_hi = 0;
 	u32 mac_addr_lo = 0;
 	u32 data;
-	int ret;
 
 	netdev = adapter->netdev;
-	lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_);
-	ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_,
-				       0, 1000, 20000, 100);
-	if (ret)
-		return ret;
 
 	/* setup auto duplex, and speed detection */
 	data = lan743x_csr_read(adapter, MAC_CR);
@@ -968,13 +955,10 @@
 
 		memset(&ksettings, 0, sizeof(ksettings));
 		phy_ethtool_get_link_ksettings(netdev, &ksettings);
-		local_advertisement = phy_read(phydev, MII_ADVERTISE);
-		if (local_advertisement < 0)
-			return;
-
-		remote_advertisement = phy_read(phydev, MII_LPA);
-		if (remote_advertisement < 0)
-			return;
+		local_advertisement =
+			linkmode_adv_to_mii_adv_t(phydev->advertising);
+		remote_advertisement =
+			linkmode_adv_to_mii_adv_t(phydev->lp_advertising);
 
 		lan743x_phy_update_flowcontrol(adapter,
 					       ksettings.base.duplex,
@@ -999,7 +983,6 @@
 	struct phy_device *phydev;
 	struct net_device *netdev;
 	int ret = -EIO;
-	u32 mii_adv;
 
 	netdev = adapter->netdev;
 	phydev = phy_find_first(adapter->mdiobus);
@@ -1013,13 +996,11 @@
 		goto return_error;
 
 	/* MAC doesn't support 1000T Half */
-	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	/* support both flow controls */
+	phy_support_asym_pause(phydev);
 	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
-	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
 	phy->fc_autoneg = phydev->autoneg;
 
 	phy_start(phydev);
@@ -1412,7 +1393,8 @@
 }
 
 static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
-				     unsigned int frame_length)
+				     unsigned int frame_length,
+				     int nr_frags)
 {
 	/* called only from within lan743x_tx_xmit_frame.
 	 * assuming tx->ring_lock has already been acquired.
@@ -1422,6 +1404,10 @@
 
 	/* wrap up previous descriptor */
 	tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+	if (nr_frags <= 0) {
+		tx->frame_data0 |= TX_DESC_DATA0_LS_;
+		tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	}
 	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
 	tx_descriptor->data0 = tx->frame_data0;
 
@@ -1448,7 +1434,7 @@
 }
 
 static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
-					 const struct skb_frag_struct *fragment,
+					 const skb_frag_t *fragment,
 					 unsigned int frame_length)
 {
 	/* called only from within lan743x_tx_xmit_frame
@@ -1526,8 +1512,11 @@
 	u32 tx_tail_flags = 0;
 
 	/* wrap up previous descriptor */
-	tx->frame_data0 |= TX_DESC_DATA0_LS_;
-	tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	if ((tx->frame_data0 & TX_DESC_DATA0_DTYPE_MASK_) ==
+	    TX_DESC_DATA0_DTYPE_DATA_) {
+		tx->frame_data0 |= TX_DESC_DATA0_LS_;
+		tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	}
 
 	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
 	buffer_info = &tx->buffer_info[tx->frame_tail];
@@ -1612,15 +1601,14 @@
 	}
 
 	if (gso)
-		lan743x_tx_frame_add_lso(tx, frame_length);
+		lan743x_tx_frame_add_lso(tx, frame_length, nr_frags);
 
 	if (nr_frags <= 0)
 		goto finish;
 
 	for (j = 0; j < nr_frags; j++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag = &(skb_shinfo(skb)->frags[j]);
 
-		frag = &(skb_shinfo(skb)->frags[j]);
 		if (lan743x_tx_frame_add_fragment(tx, frag, frame_length)) {
 			/* upon error no need to call
 			 *	lan743x_tx_frame_end
@@ -1906,7 +1894,17 @@
 	return ((++index) % rx->ring_size);
 }
 
-static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index)
+static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx)
+{
+	int length = 0;
+
+	length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
+	return __netdev_alloc_skb(rx->adapter->netdev,
+				  length, GFP_ATOMIC | GFP_DMA);
+}
+
+static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
+					struct sk_buff *skb)
 {
 	struct lan743x_rx_buffer_info *buffer_info;
 	struct lan743x_rx_descriptor *descriptor;
@@ -1915,9 +1913,7 @@
 	length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
 	descriptor = &rx->ring_cpu_ptr[index];
 	buffer_info = &rx->buffer_info[index];
-	buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev,
-					      length,
-					      GFP_ATOMIC | GFP_DMA);
+	buffer_info->skb = skb;
 	if (!(buffer_info->skb))
 		return -ENOMEM;
 	buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev,
@@ -2064,8 +2060,19 @@
 		/* packet is available */
 		if (first_index == last_index) {
 			/* single buffer packet */
+			struct sk_buff *new_skb = NULL;
 			int packet_length;
 
+			new_skb = lan743x_rx_allocate_skb(rx);
+			if (!new_skb) {
+				/* failed to allocate next skb.
+				 * Memory is very low.
+				 * Drop this packet and reuse buffer.
+				 */
+				lan743x_rx_reuse_ring_element(rx, first_index);
+				goto process_extension;
+			}
+
 			buffer_info = &rx->buffer_info[first_index];
 			skb = buffer_info->skb;
 			descriptor = &rx->ring_cpu_ptr[first_index];
@@ -2085,7 +2092,7 @@
 			skb_put(skb, packet_length - 4);
 			skb->protocol = eth_type_trans(skb,
 						       rx->adapter->netdev);
-			lan743x_rx_allocate_ring_element(rx, first_index);
+			lan743x_rx_init_ring_element(rx, first_index, new_skb);
 		} else {
 			int index = first_index;
 
@@ -2098,26 +2105,23 @@
 			if (first_index <= last_index) {
 				while ((index >= first_index) &&
 				       (index <= last_index)) {
-					lan743x_rx_release_ring_element(rx,
-									index);
-					lan743x_rx_allocate_ring_element(rx,
-									 index);
+					lan743x_rx_reuse_ring_element(rx,
+								      index);
 					index = lan743x_rx_next_index(rx,
 								      index);
 				}
 			} else {
 				while ((index >= first_index) ||
 				       (index <= last_index)) {
-					lan743x_rx_release_ring_element(rx,
-									index);
-					lan743x_rx_allocate_ring_element(rx,
-									 index);
+					lan743x_rx_reuse_ring_element(rx,
+								      index);
 					index = lan743x_rx_next_index(rx,
 								      index);
 				}
 			}
 		}
 
+process_extension:
 		if (extension_index >= 0) {
 			descriptor = &rx->ring_cpu_ptr[extension_index];
 			buffer_info = &rx->buffer_info[extension_index];
@@ -2168,9 +2172,8 @@
 	}
 	count = 0;
 	while (count < weight) {
-		int rx_process_result = -1;
+		int rx_process_result = lan743x_rx_process_packet(rx);
 
-		rx_process_result = lan743x_rx_process_packet(rx);
 		if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
 			count++;
 		} else if (rx_process_result ==
@@ -2294,7 +2297,9 @@
 
 	rx->last_head = 0;
 	for (index = 0; index < rx->ring_size; index++) {
-		ret = lan743x_rx_allocate_ring_element(rx, index);
+		struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx);
+
+		ret = lan743x_rx_init_ring_element(rx, index, new_skb);
 		if (ret)
 			goto cleanup;
 	}
@@ -2722,8 +2727,9 @@
 	snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE,
 		 "pci-%s", pci_name(adapter->pdev));
 
-	/* set to internal PHY id */
-	adapter->mdiobus->phy_mask = ~(u32)BIT(1);
+	if ((adapter->csr.id_rev & ID_REV_ID_MASK_) == ID_REV_ID_LAN7430_)
+		/* LAN7430 uses internal phy at address 1 */
+		adapter->mdiobus->phy_mask = ~(u32)BIT(1);
 
 	/* register mdiobus */
 	ret = mdiobus_register(adapter->mdiobus);

diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 2d6eea1..3b02eea 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h

@@ -26,6 +26,8 @@
 #define FPGA_REV_GET_MAJOR_(fpga_rev)	((fpga_rev) & 0x000000FF)
 
 #define HW_CFG					(0x010)
+#define HW_CFG_RELOAD_TYPE_ALL_			(0x00000FC0)
+#define HW_CFG_EE_OTP_RELOAD_			BIT(4)
 #define HW_CFG_LRST_				BIT(1)
 
 #define PMT_CTL					(0x014)
@@ -453,17 +455,19 @@
 #define OTP_PWR_DN				(0x1000)
 #define OTP_PWR_DN_PWRDN_N_			BIT(0)
 
-#define OTP_ADDR1				(0x1004)
-#define OTP_ADDR1_15_11_MASK_			(0x1F)
-
-#define OTP_ADDR2				(0x1008)
-#define OTP_ADDR2_10_3_MASK_			(0xFF)
+#define OTP_ADDR_HIGH				(0x1004)
+#define OTP_ADDR_LOW				(0x1008)
 
 #define OTP_PRGM_DATA				(0x1010)
 
 #define OTP_PRGM_MODE				(0x1014)
 #define OTP_PRGM_MODE_BYTE_			BIT(0)
 
+#define OTP_READ_DATA				(0x1018)
+
+#define OTP_FUNC_CMD				(0x1020)
+#define OTP_FUNC_CMD_READ_			BIT(0)
+
 #define OTP_TST_CMD				(0x1024)
 #define OTP_TST_CMD_PRGVRFY_			BIT(3)
 
@@ -713,6 +717,9 @@
 	struct lan743x_phy      phy;
 	struct lan743x_tx       tx[LAN743X_MAX_TX_CHANNELS];
 	struct lan743x_rx       rx[LAN743X_MAX_RX_CHANNELS];
+
+#define LAN743X_ADAPTER_FLAG_OTP		BIT(0)
+	u32			flags;
 };
 
 #define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))

diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index ccdf912..e8fe9a9 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c

@@ -429,6 +429,10 @@
 	int pulse_width = 0;
 	int perout_bit = 0;
 
+	/* Reject requests with unsupported flags */
+	if (perout->flags)
+		return -EOPNOTSUPP;
+
 	if (!on) {
 		lan743x_ptp_perout_off(adapter);
 		return 0;
@@ -963,8 +967,7 @@
 		index++) {
 		struct sk_buff *skb = ptp->tx_ts_skb_queue[index];
 
-		if (skb)
-			dev_kfree_skb(skb);
+		dev_kfree_skb(skb);
 		ptp->tx_ts_skb_queue[index] = NULL;
 		ptp->tx_ts_seconds_queue[index] = 0;
 		ptp->tx_ts_nseconds_queue[index] = 0;
@@ -977,8 +980,8 @@
 	lan743x_ptp_disable(adapter);
 }
 
-void lan743x_ptp_set_sync_ts_insert(struct lan743x_adapter *adapter,
-				    bool ts_insert_enable)
+static void lan743x_ptp_set_sync_ts_insert(struct lan743x_adapter *adapter,
+					   bool ts_insert_enable)
 {
 	u32 ptp_tx_mod = lan743x_csr_read(adapter, PTP_TX_MOD);
 

diff --git a/drivers/net/ethernet/moxa/Kconfig b/drivers/net/ethernet/moxa/Kconfig
index 5b531da..1a7cacb 100644
--- a/drivers/net/ethernet/moxa/Kconfig
+++ b/drivers/net/ethernet/moxa/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # MOXART device configuration
 #

diff --git a/drivers/net/ethernet/moxa/Makefile b/drivers/net/ethernet/moxa/Makefile
index aa3c73e..864e179 100644
--- a/drivers/net/ethernet/moxa/Makefile
+++ b/drivers/net/ethernet/moxa/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the MOXART network device drivers.
 #

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index b34055a..e165175 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c

@@ -81,11 +81,13 @@
 				 priv->rx_buf_size, DMA_FROM_DEVICE);
 
 	if (priv->tx_desc_base)
-		dma_free_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM,
+		dma_free_coherent(&priv->pdev->dev,
+				  TX_REG_DESC_SIZE * TX_DESC_NUM,
 				  priv->tx_desc_base, priv->tx_base);
 
 	if (priv->rx_desc_base)
-		dma_free_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM,
+		dma_free_coherent(&priv->pdev->dev,
+				  RX_REG_DESC_SIZE * RX_DESC_NUM,
 				  priv->rx_desc_base, priv->rx_base);
 
 	kfree(priv->tx_buf_base);
@@ -298,7 +300,7 @@
 		ndev->stats.tx_packets++;
 		ndev->stats.tx_bytes += priv->tx_skb[tx_tail]->len;
 
-		dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
+		dev_consume_skb_irq(priv->tx_skb[tx_tail]);
 		priv->tx_skb[tx_tail] = NULL;
 
 		tx_tail = TX_NEXT(tx_tail);
@@ -476,6 +478,7 @@
 
 	priv = netdev_priv(ndev);
 	priv->ndev = ndev;
+	priv->pdev = pdev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ndev->base_addr = res->start;
@@ -491,7 +494,7 @@
 	priv->tx_buf_size = TX_BUF_SIZE;
 	priv->rx_buf_size = RX_BUF_SIZE;
 
-	priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE *
+	priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE *
 						TX_DESC_NUM, &priv->tx_base,
 						GFP_DMA | GFP_KERNEL);
 	if (!priv->tx_desc_base) {
@@ -499,7 +502,7 @@
 		goto init_fail;
 	}
 
-	priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE *
+	priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE *
 						RX_DESC_NUM, &priv->rx_base,
 						GFP_DMA | GFP_KERNEL);
 	if (!priv->rx_desc_base) {

diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index bee608b..bf4c302 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h

@@ -292,6 +292,7 @@
 #define LINK_STATUS		0x4
 
 struct moxart_mac_priv_t {
+	struct platform_device *pdev;
 	void __iomem *base;
 	unsigned int reg_maccr;
 	unsigned int reg_imr;

diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index 36c8462..bcec058 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig

@@ -23,6 +23,8 @@
 config MSCC_OCELOT_SWITCH_OCELOT
 	tristate "Ocelot switch driver on Ocelot"
 	depends on MSCC_OCELOT_SWITCH
+	depends on GENERIC_PHY
+	depends on OF_NET
 	help
 	  This driver supports the Ocelot network switch device as present on
 	  the Ocelot SoCs.

diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
index cb52a3b..9a36c26 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile

@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR MIT)
 obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
 mscc_ocelot_common-y := ocelot.o ocelot_io.o
-mscc_ocelot_common-y += ocelot_regs.o
+mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o
 obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index ed4e298..672ea13 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c

@@ -14,13 +14,19 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
+#include <linux/iopoll.h>
 #include <net/arp.h>
 #include <net/netevent.h>
 #include <net/rtnetlink.h>
 #include <net/switchdev.h>
 
 #include "ocelot.h"
+#include "ocelot_ace.h"
+
+#define TABLE_UPDATE_SLEEP_US 10
+#define TABLE_UPDATE_TIMEOUT_US 100000
 
 /* MAC table entry types.
  * ENTRYTYPE_NORMAL is subject to aging.
@@ -41,23 +47,20 @@
 	enum macaccess_entry_type type;
 };
 
+static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
+{
+	return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
+}
+
 static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
 {
-	unsigned int val, timeout = 10;
+	u32 val;
 
-	/* Wait for the issued mac table command to be completed, or timeout.
-	 * When the command read from  ANA_TABLES_MACACCESS is
-	 * MACACCESS_CMD_IDLE, the issued command completed successfully.
-	 */
-	do {
-		val = ocelot_read(ocelot, ANA_TABLES_MACACCESS);
-		val &= ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M;
-	} while (val != MACACCESS_CMD_IDLE && timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	return 0;
+	return readx_poll_timeout(ocelot_mact_read_macaccess,
+		ocelot, val,
+		(val & ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M) ==
+		MACACCESS_CMD_IDLE,
+		TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
 }
 
 static void ocelot_mact_select(struct ocelot *ocelot,
@@ -129,23 +132,28 @@
 	ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
 }
 
+static void ocelot_vcap_enable(struct ocelot *ocelot, struct ocelot_port *port)
+{
+	ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA |
+			 ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa),
+			 ANA_PORT_VCAP_S2_CFG, port->chip_port);
+}
+
+static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot)
+{
+	return ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
+}
+
 static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
 {
-	unsigned int val, timeout = 10;
+	u32 val;
 
-	/* Wait for the issued vlan table command to be completed, or timeout.
-	 * When the command read from ANA_TABLES_VLANACCESS is
-	 * VLANACCESS_CMD_IDLE, the issued command completed successfully.
-	 */
-	do {
-		val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
-		val &= ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M;
-	} while (val != ANA_TABLES_VLANACCESS_CMD_IDLE && timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	return 0;
+	return readx_poll_timeout(ocelot_vlant_read_vlanaccess,
+		ocelot,
+		val,
+		(val & ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M) ==
+		ANA_TABLES_VLANACCESS_CMD_IDLE,
+		TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
 }
 
 static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask)
@@ -253,8 +261,15 @@
 		port->pvid = vid;
 
 	/* Untagged egress vlan clasification */
-	if (untagged)
+	if (untagged && port->vid != vid) {
+		if (port->vid) {
+			dev_err(ocelot->dev,
+				"Port already has a native VLAN: %d\n",
+				port->vid);
+			return -EBUSY;
+		}
 		port->vid = vid;
+	}
 
 	ocelot_vlan_port_apply(ocelot, port);
 
@@ -482,8 +497,17 @@
 			 ANA_PORT_PORT_CFG_PORTID_VAL(port->chip_port),
 			 ANA_PORT_PORT_CFG, port->chip_port);
 
+	if (port->serdes) {
+		err = phy_set_mode_ext(port->serdes, PHY_MODE_ETHERNET,
+				       port->phy_mode);
+		if (err) {
+			netdev_err(dev, "Could not set mode of SerDes\n");
+			return err;
+		}
+	}
+
 	err = phy_connect_direct(dev, port->phy, &ocelot_port_adjust_link,
-				 PHY_INTERFACE_MODE_NA);
+				 port->phy_mode);
 	if (err) {
 		netdev_err(dev, "Could not attach to PHY\n");
 		return err;
@@ -522,7 +546,7 @@
  */
 static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
 {
-	ifh[0] = IFH_INJ_BYPASS;
+	ifh[0] = IFH_INJ_BYPASS | ((0x1ff & info->rew_op) << 21);
 	ifh[1] = (0xf00 & info->port) >> 8;
 	ifh[2] = (0xff & info->port) << 24;
 	ifh[3] = (info->tag_type << 16) | info->vid;
@@ -532,6 +556,7 @@
 
 static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct ocelot_port *port = netdev_priv(dev);
 	struct ocelot *ocelot = port->ocelot;
 	u32 val, ifh[IFH_LEN];
@@ -550,6 +575,14 @@
 	info.port = BIT(port->chip_port);
 	info.tag_type = IFH_TAG_TYPE_C;
 	info.vid = skb_vlan_tag_get(skb);
+
+	/* Check if timestamping is needed */
+	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) {
+		info.rew_op = port->ptp_cmd;
+		if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+			info.rew_op |= (port->ts_id  % 4) << 3;
+	}
+
 	ocelot_gen_ifh(ifh, &info);
 
 	for (i = 0; i < IFH_LEN; i++)
@@ -580,50 +613,77 @@
 
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
-	dev_kfree_skb_any(skb);
 
+	if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP &&
+	    port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+		struct ocelot_skb *oskb =
+			kzalloc(sizeof(struct ocelot_skb), GFP_ATOMIC);
+
+		if (unlikely(!oskb))
+			goto out;
+
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+		oskb->skb = skb;
+		oskb->id = port->ts_id % 4;
+		port->ts_id++;
+
+		list_add_tail(&oskb->head, &port->skbs);
+
+		return NETDEV_TX_OK;
+	}
+
+out:
+	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
 
-static void ocelot_mact_mc_reset(struct ocelot_port *port)
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts)
 {
-	struct ocelot *ocelot = port->ocelot;
-	struct netdev_hw_addr *ha, *n;
+	unsigned long flags;
+	u32 val;
 
-	/* Free and forget all the MAC addresses stored in the port private mc
-	 * list. These are mc addresses that were previously added by calling
-	 * ocelot_mact_mc_add().
-	 */
-	list_for_each_entry_safe(ha, n, &port->mc, list) {
-		ocelot_mact_forget(ocelot, ha->addr, port->pvid);
-		list_del(&ha->list);
-		kfree(ha);
-	}
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	/* Read current PTP time to get seconds */
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+	ts->tv_sec = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+
+	/* Read packet HW timestamp from FIFO */
+	val = ocelot_read(ocelot, SYS_PTP_TXSTAMP);
+	ts->tv_nsec = SYS_PTP_TXSTAMP_PTP_TXSTAMP(val);
+
+	/* Sec has incremented since the ts was registered */
+	if ((ts->tv_sec & 0x1) != !!(val & SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC))
+		ts->tv_sec--;
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+}
+EXPORT_SYMBOL(ocelot_get_hwtimestamp);
+
+static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+
+	return ocelot_mact_forget(port->ocelot, addr, port->pvid);
 }
 
-static int ocelot_mact_mc_add(struct ocelot_port *port,
-			      struct netdev_hw_addr *hw_addr)
+static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr)
 {
-	struct ocelot *ocelot = port->ocelot;
-	struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL);
+	struct ocelot_port *port = netdev_priv(dev);
 
-	if (!ha)
-		return -ENOMEM;
-
-	memcpy(ha, hw_addr, sizeof(*ha));
-	list_add_tail(&ha->list, &port->mc);
-
-	ocelot_mact_learn(ocelot, PGID_CPU, ha->addr, port->pvid,
-			  ENTRYTYPE_LOCKED);
-
-	return 0;
+	return ocelot_mact_learn(port->ocelot, PGID_CPU, addr, port->pvid,
+				 ENTRYTYPE_LOCKED);
 }
 
 static void ocelot_set_rx_mode(struct net_device *dev)
 {
 	struct ocelot_port *port = netdev_priv(dev);
 	struct ocelot *ocelot = port->ocelot;
-	struct netdev_hw_addr *ha;
 	int i;
 	u32 val;
 
@@ -635,13 +695,7 @@
 	for (i = ocelot->num_phys_ports + 1; i < PGID_CPU; i++)
 		ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
 
-	/* Handle the device multicast addresses. First remove all the
-	 * previously installed addresses and then add the latest ones to the
-	 * mac table.
-	 */
-	ocelot_mact_mc_reset(port);
-	netdev_for_each_mc_addr(ha, dev)
-		ocelot_mact_mc_add(port, ha);
+	__dev_mc_sync(dev, ocelot_mc_sync, ocelot_mc_unsync);
 }
 
 static int ocelot_port_get_phys_port_name(struct net_device *dev,
@@ -713,7 +767,8 @@
 
 static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			  struct net_device *dev, const unsigned char *addr,
-			  u16 vid, u16 flags)
+			  u16 vid, u16 flags,
+			  struct netlink_ext_ack *extack)
 {
 	struct ocelot_port *port = netdev_priv(dev);
 	struct ocelot *ocelot = port->ocelot;
@@ -733,7 +788,7 @@
 	}
 
 	return ocelot_mact_learn(ocelot, port->chip_port, addr, vid,
-				 ENTRYTYPE_NORMAL);
+				 ENTRYTYPE_LOCKED);
 }
 
 static int ocelot_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
@@ -886,7 +941,7 @@
 static int ocelot_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 				  u16 vid)
 {
-	return ocelot_vlan_vid_add(dev, vid, false, true);
+	return ocelot_vlan_vid_add(dev, vid, false, false);
 }
 
 static int ocelot_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
@@ -901,12 +956,122 @@
 	struct ocelot_port *port = netdev_priv(dev);
 	netdev_features_t changed = dev->features ^ features;
 
+	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+	    port->tc.offload_cnt) {
+		netdev_err(dev,
+			   "Cannot disable HW TC offload while offloads active\n");
+		return -EBUSY;
+	}
+
 	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER)
 		ocelot_vlan_mode(port, features);
 
 	return 0;
 }
 
+static int ocelot_get_port_parent_id(struct net_device *dev,
+				     struct netdev_phys_item_id *ppid)
+{
+	struct ocelot_port *ocelot_port = netdev_priv(dev);
+	struct ocelot *ocelot = ocelot_port->ocelot;
+
+	ppid->id_len = sizeof(ocelot->base_mac);
+	memcpy(&ppid->id, &ocelot->base_mac, ppid->id_len);
+
+	return 0;
+}
+
+static int ocelot_hwstamp_get(struct ocelot_port *port, struct ifreq *ifr)
+{
+	struct ocelot *ocelot = port->ocelot;
+
+	return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
+			    sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int ocelot_hwstamp_set(struct ocelot_port *port, struct ifreq *ifr)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct hwtstamp_config cfg;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (cfg.flags)
+		return -EINVAL;
+
+	/* Tx type sanity check */
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_ON:
+		port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		/* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we
+		 * need to update the origin time.
+		 */
+		port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP;
+		break;
+	case HWTSTAMP_TX_OFF:
+		port->ptp_cmd = 0;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	mutex_lock(&ocelot->ptp_lock);
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	default:
+		mutex_unlock(&ocelot->ptp_lock);
+		return -ERANGE;
+	}
+
+	/* Commit back the result & save it */
+	memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
+	mutex_unlock(&ocelot->ptp_lock);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+	struct ocelot *ocelot = port->ocelot;
+
+	/* The function is only used for PTP operations for now */
+	if (!ocelot->ptp)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return ocelot_hwstamp_set(port, ifr);
+	case SIOCGHWTSTAMP:
+		return ocelot_hwstamp_get(port, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_open			= ocelot_port_open,
 	.ndo_stop			= ocelot_port_stop,
@@ -921,6 +1086,9 @@
 	.ndo_vlan_rx_add_vid		= ocelot_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid		= ocelot_vlan_rx_kill_vid,
 	.ndo_set_features		= ocelot_set_features,
+	.ndo_get_port_parent_id		= ocelot_get_port_parent_id,
+	.ndo_setup_tc			= ocelot_setup_tc,
+	.ndo_do_ioctl			= ocelot_ioctl,
 };
 
 static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -937,10 +1105,8 @@
 		       ETH_GSTRING_LEN);
 }
 
-static void ocelot_check_stats(struct work_struct *work)
+static void ocelot_update_stats(struct ocelot *ocelot)
 {
-	struct delayed_work *del_work = to_delayed_work(work);
-	struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work);
 	int i, j;
 
 	mutex_lock(&ocelot->stats_lock);
@@ -964,11 +1130,19 @@
 		}
 	}
 
-	cancel_delayed_work(&ocelot->stats_work);
+	mutex_unlock(&ocelot->stats_lock);
+}
+
+static void ocelot_check_stats_work(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct ocelot *ocelot = container_of(del_work, struct ocelot,
+					     stats_work);
+
+	ocelot_update_stats(ocelot);
+
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
-
-	mutex_unlock(&ocelot->stats_lock);
 }
 
 static void ocelot_get_ethtool_stats(struct net_device *dev,
@@ -979,7 +1153,7 @@
 	int i;
 
 	/* check and update now */
-	ocelot_check_stats(&ocelot->stats_work.work);
+	ocelot_update_stats(ocelot);
 
 	/* Copy all counters */
 	for (i = 0; i < ocelot->num_stats; i++)
@@ -996,33 +1170,39 @@
 	return ocelot->num_stats;
 }
 
+static int ocelot_get_ts_info(struct net_device *dev,
+			      struct ethtool_ts_info *info)
+{
+	struct ocelot_port *ocelot_port = netdev_priv(dev);
+	struct ocelot *ocelot = ocelot_port->ocelot;
+
+	if (!ocelot->ptp)
+		return ethtool_op_get_ts_info(dev, info);
+
+	info->phc_index = ocelot->ptp_clock ?
+			  ptp_clock_index(ocelot->ptp_clock) : -1;
+	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+				 SOF_TIMESTAMPING_RX_SOFTWARE |
+				 SOF_TIMESTAMPING_SOFTWARE |
+				 SOF_TIMESTAMPING_TX_HARDWARE |
+				 SOF_TIMESTAMPING_RX_HARDWARE |
+				 SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+			 BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static const struct ethtool_ops ocelot_ethtool_ops = {
 	.get_strings		= ocelot_get_strings,
 	.get_ethtool_stats	= ocelot_get_ethtool_stats,
 	.get_sset_count		= ocelot_get_sset_count,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_ts_info		= ocelot_get_ts_info,
 };
 
-static int ocelot_port_attr_get(struct net_device *dev,
-				struct switchdev_attr *attr)
-{
-	struct ocelot_port *ocelot_port = netdev_priv(dev);
-	struct ocelot *ocelot = ocelot_port->ocelot;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(ocelot->base_mac);
-		memcpy(&attr->u.ppid.id, &ocelot->base_mac,
-		       attr->u.ppid.id_len);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
 static int ocelot_port_attr_stp_state_set(struct ocelot_port *ocelot_port,
 					  struct switchdev_trans *trans,
 					  u8 state)
@@ -1279,7 +1459,8 @@
 
 static int ocelot_port_obj_add(struct net_device *dev,
 			       const struct switchdev_obj *obj,
-			       struct switchdev_trans *trans)
+			       struct switchdev_trans *trans,
+			       struct netlink_ext_ack *extack)
 {
 	int ret = 0;
 
@@ -1320,13 +1501,6 @@
 	return ret;
 }
 
-static const struct switchdev_ops ocelot_port_switchdev_ops = {
-	.switchdev_port_attr_get	= ocelot_port_attr_get,
-	.switchdev_port_attr_set	= ocelot_port_attr_set,
-	.switchdev_port_obj_add		= ocelot_port_obj_add,
-	.switchdev_port_obj_del		= ocelot_port_obj_del,
-};
-
 static int ocelot_port_bridge_join(struct ocelot_port *ocelot_port,
 				   struct net_device *bridge)
 {
@@ -1506,9 +1680,6 @@
 	struct ocelot_port *ocelot_port = netdev_priv(dev);
 	int err = 0;
 
-	if (!ocelot_netdevice_dev_check(dev))
-		return 0;
-
 	switch (event) {
 	case NETDEV_CHANGEUPPER:
 		if (netif_is_bridge_master(info->upper_dev)) {
@@ -1545,12 +1716,16 @@
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	int ret = 0;
 
+	if (!ocelot_netdevice_dev_check(dev))
+		return 0;
+
 	if (event == NETDEV_PRECHANGEUPPER &&
 	    netif_is_lag_master(info->upper_dev)) {
 		struct netdev_lag_upper_info *lag_upper_info = info->upper_info;
 		struct netlink_ext_ack *extack;
 
-		if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+		if (lag_upper_info &&
+		    lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
 			extack = netdev_notifier_info_to_extack(&info->info);
 			NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
 
@@ -1581,6 +1756,251 @@
 };
 EXPORT_SYMBOL(ocelot_netdevice_nb);
 
+static int ocelot_switchdev_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     ocelot_netdevice_dev_check,
+						     ocelot_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ocelot_switchdev_nb __read_mostly = {
+	.notifier_call = ocelot_switchdev_event,
+};
+EXPORT_SYMBOL(ocelot_switchdev_nb);
+
+static int ocelot_switchdev_blocking_event(struct notifier_block *unused,
+					   unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+		/* Blocking events. */
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = switchdev_handle_port_obj_add(dev, ptr,
+						    ocelot_netdevice_dev_check,
+						    ocelot_port_obj_add);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = switchdev_handle_port_obj_del(dev, ptr,
+						    ocelot_netdevice_dev_check,
+						    ocelot_port_obj_del);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     ocelot_netdevice_dev_check,
+						     ocelot_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
+	.notifier_call = ocelot_switchdev_blocking_event,
+};
+EXPORT_SYMBOL(ocelot_switchdev_blocking_nb);
+
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	time64_t s;
+	u32 val;
+	s64 ns;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	s = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN) & 0xffff;
+	s <<= 32;
+	s += ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+	ns = ocelot_read_rix(ocelot, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+	/* Deal with negative values */
+	if (ns >= 0x3ffffff0 && ns <= 0x3fffffff) {
+		s--;
+		ns &= 0xf;
+		ns += 999999984;
+	}
+
+	set_normalized_timespec64(ts, s, ns);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_gettime64);
+
+static int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	ocelot_write_rix(ocelot, lower_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_LSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, upper_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_MSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, ts->tv_nsec, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_LOAD);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+
+static int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+		struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+		unsigned long flags;
+		u32 val;
+
+		spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, delta, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_DELTA);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	} else {
+		/* Fall back using ocelot_ptp_settime64 which is not exact. */
+		struct timespec64 ts;
+		u64 now;
+
+		ocelot_ptp_gettime64(ptp, &ts);
+
+		now = ktime_to_ns(timespec64_to_ktime(ts));
+		ts = ns_to_timespec64(now + delta);
+
+		ocelot_ptp_settime64(ptp, &ts);
+	}
+	return 0;
+}
+
+static int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	u32 unit = 0, direction = 0;
+	unsigned long flags;
+	u64 adj = 0;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	if (!scaled_ppm)
+		goto disable_adj;
+
+	if (scaled_ppm < 0) {
+		direction = PTP_CFG_CLK_ADJ_CFG_DIR;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	adj = PSEC_PER_SEC << 16;
+	do_div(adj, scaled_ppm);
+	do_div(adj, 1000);
+
+	/* If the adjustment value is too large, use ns instead */
+	if (adj >= (1L << 30)) {
+		unit = PTP_CFG_CLK_ADJ_FREQ_NS;
+		do_div(adj, 1000);
+	}
+
+	/* Still too big */
+	if (adj >= (1L << 30))
+		goto disable_adj;
+
+	ocelot_write(ocelot, unit | adj, PTP_CLK_CFG_ADJ_FREQ);
+	ocelot_write(ocelot, PTP_CFG_CLK_ADJ_CFG_ENA | direction,
+		     PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+
+disable_adj:
+	ocelot_write(ocelot, 0, PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+
+static struct ptp_clock_info ocelot_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "ocelot ptp",
+	.max_adj	= 0x7fffffff,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.gettime64	= ocelot_ptp_gettime64,
+	.settime64	= ocelot_ptp_settime64,
+	.adjtime	= ocelot_ptp_adjtime,
+	.adjfine	= ocelot_ptp_adjfine,
+};
+
+static int ocelot_init_timestamp(struct ocelot *ocelot)
+{
+	ocelot->ptp_info = ocelot_ptp_clock_info;
+	ocelot->ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
+	if (IS_ERR(ocelot->ptp_clock))
+		return PTR_ERR(ocelot->ptp_clock);
+	/* Check if PHC support is missing at the configuration level */
+	if (!ocelot->ptp_clock)
+		return 0;
+
+	ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
+
+	ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC);
+
+	/* There is no device reconfiguration, PTP Rx stamping is always
+	 * enabled.
+	 */
+	ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+	return 0;
+}
+
 int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 		      void __iomem *regs,
 		      struct phy_device *phy)
@@ -1599,21 +2019,22 @@
 	ocelot_port->regs = regs;
 	ocelot_port->chip_port = port;
 	ocelot_port->phy = phy;
-	INIT_LIST_HEAD(&ocelot_port->mc);
 	ocelot->ports[port] = ocelot_port;
 
 	dev->netdev_ops = &ocelot_port_netdev_ops;
 	dev->ethtool_ops = &ocelot_ethtool_ops;
-	dev->switchdev_ops = &ocelot_port_switchdev_ops;
 
-	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS |
+		NETIF_F_HW_TC;
+	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
 
 	memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
 	dev->dev_addr[ETH_ALEN - 1] += port;
 	ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid,
 			  ENTRYTYPE_LOCKED);
 
+	INIT_LIST_HEAD(&ocelot_port->skbs);
+
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(ocelot->dev, "register_netdev failed\n");
@@ -1623,6 +2044,9 @@
 	/* Basic L2 initialization */
 	ocelot_vlan_port_apply(ocelot, ocelot_port);
 
+	/* Enable vcap lookups */
+	ocelot_vcap_enable(ocelot, ocelot_port);
+
 	return 0;
 
 err_register_netdev:
@@ -1634,7 +2058,7 @@
 int ocelot_init(struct ocelot *ocelot)
 {
 	u32 port;
-	int i, cpu = ocelot->num_phys_ports;
+	int i, ret, cpu = ocelot->num_phys_ports;
 	char queue_name[32];
 
 	ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
@@ -1649,6 +2073,8 @@
 		return -ENOMEM;
 
 	mutex_init(&ocelot->stats_lock);
+	mutex_init(&ocelot->ptp_lock);
+	spin_lock_init(&ocelot->ptp_clock_lock);
 	snprintf(queue_name, sizeof(queue_name), "%s-stats",
 		 dev_name(ocelot->dev));
 	ocelot->stats_queue = create_singlethread_workqueue(queue_name);
@@ -1657,6 +2083,7 @@
 
 	ocelot_mact_init(ocelot);
 	ocelot_vlan_init(ocelot);
+	ocelot_ace_init(ocelot);
 
 	for (port = 0; port < ocelot->num_phys_ports; port++) {
 		/* Clear all counters (5 groups) */
@@ -1758,17 +2185,46 @@
 				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
 				 ANA_CPUQ_8021_CFG, i);
 
-	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats);
+	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
+
+	if (ocelot->ptp) {
+		ret = ocelot_init_timestamp(ocelot);
+		if (ret) {
+			dev_err(ocelot->dev,
+				"Timestamp initialization failed\n");
+			return ret;
+		}
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(ocelot_init);
 
 void ocelot_deinit(struct ocelot *ocelot)
 {
+	struct list_head *pos, *tmp;
+	struct ocelot_port *port;
+	struct ocelot_skb *entry;
+	int i;
+
+	cancel_delayed_work(&ocelot->stats_work);
 	destroy_workqueue(ocelot->stats_queue);
 	mutex_destroy(&ocelot->stats_lock);
+	ocelot_ace_deinit();
+
+	for (i = 0; i < ocelot->num_phys_ports; i++) {
+		port = ocelot->ports[i];
+
+		list_for_each_safe(pos, tmp, &port->skbs) {
+			entry = list_entry(pos, struct ocelot_skb, head);
+
+			list_del(pos);
+			dev_kfree_skb_any(entry->skb);
+			kfree(entry);
+		}
+	}
 }
 EXPORT_SYMBOL(ocelot_deinit);
 

diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 616bec3..06ac806 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h

@@ -11,16 +11,21 @@
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
 #include <linux/platform_device.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/regmap.h>
 
 #include "ocelot_ana.h"
 #include "ocelot_dev.h"
-#include "ocelot_hsio.h"
 #include "ocelot_qsys.h"
 #include "ocelot_rew.h"
 #include "ocelot_sys.h"
 #include "ocelot_qs.h"
+#include "ocelot_tc.h"
+#include "ocelot_ptp.h"
 
 #define PGID_AGGR    64
 #define PGID_SRC     80
@@ -36,14 +41,17 @@
 
 #define OCELOT_STATS_CHECK_DELAY (2 * HZ)
 
+#define OCELOT_PTP_QUEUE_SZ	128
+
 #define IFH_LEN 4
 
 struct frame_info {
 	u32 len;
 	u16 port;
 	u16 vid;
-	u8 cpuq;
 	u8 tag_type;
+	u16 rew_op;
+	u32 timestamp;	/* rew_val */
 };
 
 #define IFH_INJ_BYPASS	BIT(31)
@@ -52,6 +60,12 @@
 #define IFH_TAG_TYPE_C 0
 #define IFH_TAG_TYPE_S 1
 
+#define IFH_REW_OP_NOOP			0x0
+#define IFH_REW_OP_DSCP			0x1
+#define IFH_REW_OP_ONE_STEP_PTP		0x2
+#define IFH_REW_OP_TWO_STEP_PTP		0x3
+#define IFH_REW_OP_ORIGIN_PTP		0x5
+
 #define OCELOT_SPEED_2500 0
 #define OCELOT_SPEED_1000 1
 #define OCELOT_SPEED_100  2
@@ -67,7 +81,9 @@
 	QSYS,
 	REW,
 	SYS,
+	S2,
 	HSIO,
+	PTP,
 	TARGET_MAX,
 };
 
@@ -333,79 +349,20 @@
 	SYS_CM_DATA_RD,
 	SYS_CM_OP,
 	SYS_CM_DATA,
-	HSIO_PLL5G_CFG0 = HSIO << TARGET_OFFSET,
-	HSIO_PLL5G_CFG1,
-	HSIO_PLL5G_CFG2,
-	HSIO_PLL5G_CFG3,
-	HSIO_PLL5G_CFG4,
-	HSIO_PLL5G_CFG5,
-	HSIO_PLL5G_CFG6,
-	HSIO_PLL5G_STATUS0,
-	HSIO_PLL5G_STATUS1,
-	HSIO_PLL5G_BIST_CFG0,
-	HSIO_PLL5G_BIST_CFG1,
-	HSIO_PLL5G_BIST_CFG2,
-	HSIO_PLL5G_BIST_STAT0,
-	HSIO_PLL5G_BIST_STAT1,
-	HSIO_RCOMP_CFG0,
-	HSIO_RCOMP_STATUS,
-	HSIO_SYNC_ETH_CFG,
-	HSIO_SYNC_ETH_PLL_CFG,
-	HSIO_S1G_DES_CFG,
-	HSIO_S1G_IB_CFG,
-	HSIO_S1G_OB_CFG,
-	HSIO_S1G_SER_CFG,
-	HSIO_S1G_COMMON_CFG,
-	HSIO_S1G_PLL_CFG,
-	HSIO_S1G_PLL_STATUS,
-	HSIO_S1G_DFT_CFG0,
-	HSIO_S1G_DFT_CFG1,
-	HSIO_S1G_DFT_CFG2,
-	HSIO_S1G_TP_CFG,
-	HSIO_S1G_RC_PLL_BIST_CFG,
-	HSIO_S1G_MISC_CFG,
-	HSIO_S1G_DFT_STATUS,
-	HSIO_S1G_MISC_STATUS,
-	HSIO_MCB_S1G_ADDR_CFG,
-	HSIO_S6G_DIG_CFG,
-	HSIO_S6G_DFT_CFG0,
-	HSIO_S6G_DFT_CFG1,
-	HSIO_S6G_DFT_CFG2,
-	HSIO_S6G_TP_CFG0,
-	HSIO_S6G_TP_CFG1,
-	HSIO_S6G_RC_PLL_BIST_CFG,
-	HSIO_S6G_MISC_CFG,
-	HSIO_S6G_OB_ANEG_CFG,
-	HSIO_S6G_DFT_STATUS,
-	HSIO_S6G_ERR_CNT,
-	HSIO_S6G_MISC_STATUS,
-	HSIO_S6G_DES_CFG,
-	HSIO_S6G_IB_CFG,
-	HSIO_S6G_IB_CFG1,
-	HSIO_S6G_IB_CFG2,
-	HSIO_S6G_IB_CFG3,
-	HSIO_S6G_IB_CFG4,
-	HSIO_S6G_IB_CFG5,
-	HSIO_S6G_OB_CFG,
-	HSIO_S6G_OB_CFG1,
-	HSIO_S6G_SER_CFG,
-	HSIO_S6G_COMMON_CFG,
-	HSIO_S6G_PLL_CFG,
-	HSIO_S6G_ACJTAG_CFG,
-	HSIO_S6G_GP_CFG,
-	HSIO_S6G_IB_STATUS0,
-	HSIO_S6G_IB_STATUS1,
-	HSIO_S6G_ACJTAG_STATUS,
-	HSIO_S6G_PLL_STATUS,
-	HSIO_S6G_REVID,
-	HSIO_MCB_S6G_ADDR_CFG,
-	HSIO_HW_CFG,
-	HSIO_HW_QSGMII_CFG,
-	HSIO_HW_QSGMII_STAT,
-	HSIO_CLK_CFG,
-	HSIO_TEMP_SENSOR_CTRL,
-	HSIO_TEMP_SENSOR_CFG,
-	HSIO_TEMP_SENSOR_STAT,
+	S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET,
+	S2_CORE_MV_CFG,
+	S2_CACHE_ENTRY_DAT,
+	S2_CACHE_MASK_DAT,
+	S2_CACHE_ACTION_DAT,
+	S2_CACHE_CNT_DAT,
+	S2_CACHE_TG_DAT,
+	PTP_PIN_CFG = PTP << TARGET_OFFSET,
+	PTP_PIN_TOD_SEC_MSB,
+	PTP_PIN_TOD_SEC_LSB,
+	PTP_PIN_TOD_NSEC,
+	PTP_CFG_MISC,
+	PTP_CLK_CFG_ADJ_CFG,
+	PTP_CLK_CFG_ADJ_FREQ,
 };
 
 enum ocelot_regfield {
@@ -456,6 +413,13 @@
 	REGFIELD_MAX
 };
 
+enum ocelot_clk_pins {
+	ALT_PPS_PIN	= 1,
+	EXT_CLK_PIN,
+	ALT_LDST_PIN,
+	TOD_ACC_PIN
+};
+
 struct ocelot_multicast {
 	struct list_head list;
 	unsigned char addr[ETH_ALEN];
@@ -505,6 +469,13 @@
 	u64 *stats;
 	struct delayed_work stats_work;
 	struct workqueue_struct *stats_queue;
+
+	u8 ptp:1;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_info;
+	struct hwtstamp_config hwtstamp_config;
+	struct mutex ptp_lock; /* Protects the PTP interface state */
+	spinlock_t ptp_clock_lock; /* Protects the PTP clock */
 };
 
 struct ocelot_port {
@@ -513,10 +484,6 @@
 	struct phy_device *phy;
 	void __iomem *regs;
 	u8 chip_port;
-	/* Keep a track of the mc addresses added to the mac table, so that they
-	 * can be removed when needed.
-	 */
-	struct list_head mc;
 
 	/* Ingress default VLAN (pvid) */
 	u16 pvid;
@@ -527,6 +494,21 @@
 	u8 vlan_aware;
 
 	u64 *stats;
+
+	phy_interface_t phy_mode;
+	struct phy *serdes;
+
+	struct ocelot_port_tc tc;
+
+	u8 ptp_cmd;
+	struct list_head skbs;
+	u8 ts_id;
+};
+
+struct ocelot_skb {
+	struct list_head head;
+	struct sk_buff *skb;
+	u8 id;
 };
 
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
@@ -541,7 +523,7 @@
 #define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri))
 #define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0)
 
-void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 mask,
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
 		     u32 offset);
 #define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
 #define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi))
@@ -568,5 +550,10 @@
 		      struct phy_device *phy);
 
 extern struct notifier_block ocelot_netdevice_nb;
+extern struct notifier_block ocelot_switchdev_nb;
+extern struct notifier_block ocelot_switchdev_blocking_nb;
+
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts);
 
 #endif

diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
new file mode 100644
index 0000000..86fc6e6
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ace.c

@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include <linux/iopoll.h>
+#include <linux/proc_fs.h>
+
+#include "ocelot_ace.h"
+#include "ocelot_vcap.h"
+#include "ocelot_s2.h"
+
+#define OCELOT_POLICER_DISCARD 0x17f
+
+static struct ocelot_acl_block *acl_block;
+
+struct vcap_props {
+	const char *name; /* Symbolic name */
+	u16 tg_width; /* Type-group width (in bits) */
+	u16 sw_count; /* Sub word count */
+	u16 entry_count; /* Entry count */
+	u16 entry_words; /* Number of entry words */
+	u16 entry_width; /* Entry width (in bits) */
+	u16 action_count; /* Action count */
+	u16 action_words; /* Number of action words */
+	u16 action_width; /* Action width (in bits) */
+	u16 action_type_width; /* Action type width (in bits) */
+	struct {
+		u16 width; /* Action type width (in bits) */
+		u16 count; /* Action type sub word count */
+	} action_table[2];
+	u16 counter_words; /* Number of counter words */
+	u16 counter_width; /* Counter width (in bits) */
+};
+
+#define ENTRY_WIDTH 32
+#define BITS_TO_32BIT(x) (1 + (((x) - 1) / ENTRY_WIDTH))
+
+static const struct vcap_props vcap_is2 = {
+	.name = "IS2",
+	.tg_width = 2,
+	.sw_count = 4,
+	.entry_count = VCAP_IS2_CNT,
+	.entry_words = BITS_TO_32BIT(VCAP_IS2_ENTRY_WIDTH),
+	.entry_width = VCAP_IS2_ENTRY_WIDTH,
+	.action_count = (VCAP_IS2_CNT + VCAP_PORT_CNT + 2),
+	.action_words = BITS_TO_32BIT(VCAP_IS2_ACTION_WIDTH),
+	.action_width = (VCAP_IS2_ACTION_WIDTH),
+	.action_type_width = 1,
+	.action_table = {
+		{
+			.width = (IS2_AO_ACL_ID + IS2_AL_ACL_ID),
+			.count = 2
+		},
+		{
+			.width = 6,
+			.count = 4
+		},
+	},
+	.counter_words = BITS_TO_32BIT(4 * ENTRY_WIDTH),
+	.counter_width = ENTRY_WIDTH,
+};
+
+enum vcap_sel {
+	VCAP_SEL_ENTRY = 0x1,
+	VCAP_SEL_ACTION = 0x2,
+	VCAP_SEL_COUNTER = 0x4,
+	VCAP_SEL_ALL = 0x7,
+};
+
+enum vcap_cmd {
+	VCAP_CMD_WRITE = 0, /* Copy from Cache to TCAM */
+	VCAP_CMD_READ = 1, /* Copy from TCAM to Cache */
+	VCAP_CMD_MOVE_UP = 2, /* Move <count> up */
+	VCAP_CMD_MOVE_DOWN = 3, /* Move <count> down */
+	VCAP_CMD_INITIALIZE = 4, /* Write all (from cache) */
+};
+
+#define VCAP_ENTRY_WIDTH 12 /* Max entry width (32bit words) */
+#define VCAP_COUNTER_WIDTH 4 /* Max counter width (32bit words) */
+
+struct vcap_data {
+	u32 entry[VCAP_ENTRY_WIDTH]; /* ENTRY_DAT */
+	u32 mask[VCAP_ENTRY_WIDTH]; /* MASK_DAT */
+	u32 action[VCAP_ENTRY_WIDTH]; /* ACTION_DAT */
+	u32 counter[VCAP_COUNTER_WIDTH]; /* CNT_DAT */
+	u32 tg; /* TG_DAT */
+	u32 type; /* Action type */
+	u32 tg_sw; /* Current type-group */
+	u32 cnt; /* Current counter */
+	u32 key_offset; /* Current entry offset */
+	u32 action_offset; /* Current action offset */
+	u32 counter_offset; /* Current counter offset */
+	u32 tg_value; /* Current type-group value */
+	u32 tg_mask; /* Current type-group mask */
+};
+
+static u32 vcap_s2_read_update_ctrl(struct ocelot *oc)
+{
+	return ocelot_read(oc, S2_CORE_UPDATE_CTRL);
+}
+
+static void vcap_cmd(struct ocelot *oc, u16 ix, int cmd, int sel)
+{
+	u32 value = (S2_CORE_UPDATE_CTRL_UPDATE_CMD(cmd) |
+		     S2_CORE_UPDATE_CTRL_UPDATE_ADDR(ix) |
+		     S2_CORE_UPDATE_CTRL_UPDATE_SHOT);
+
+	if ((sel & VCAP_SEL_ENTRY) && ix >= vcap_is2.entry_count)
+		return;
+
+	if (!(sel & VCAP_SEL_ENTRY))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS;
+
+	if (!(sel & VCAP_SEL_ACTION))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS;
+
+	if (!(sel & VCAP_SEL_COUNTER))
+		value |= S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS;
+
+	ocelot_write(oc, value, S2_CORE_UPDATE_CTRL);
+	readx_poll_timeout(vcap_s2_read_update_ctrl, oc, value,
+				(value & S2_CORE_UPDATE_CTRL_UPDATE_SHOT) == 0,
+				10, 100000);
+}
+
+/* Convert from 0-based row to VCAP entry row and run command */
+static void vcap_row_cmd(struct ocelot *oc, u32 row, int cmd, int sel)
+{
+	vcap_cmd(oc, vcap_is2.entry_count - row - 1, cmd, sel);
+}
+
+static void vcap_entry2cache(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i;
+
+	for (i = 0; i < vcap_is2.entry_words; i++) {
+		ocelot_write_rix(oc, data->entry[i], S2_CACHE_ENTRY_DAT, i);
+		ocelot_write_rix(oc, ~data->mask[i], S2_CACHE_MASK_DAT, i);
+	}
+	ocelot_write(oc, data->tg, S2_CACHE_TG_DAT);
+}
+
+static void vcap_cache2entry(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i;
+
+	for (i = 0; i < vcap_is2.entry_words; i++) {
+		data->entry[i] = ocelot_read_rix(oc, S2_CACHE_ENTRY_DAT, i);
+		// Invert mask
+		data->mask[i] = ~ocelot_read_rix(oc, S2_CACHE_MASK_DAT, i);
+	}
+	data->tg = ocelot_read(oc, S2_CACHE_TG_DAT);
+}
+
+static void vcap_action2cache(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i, width, mask;
+
+	/* Encode action type */
+	width = vcap_is2.action_type_width;
+	if (width) {
+		mask = GENMASK(width, 0);
+		data->action[0] = ((data->action[0] & ~mask) | data->type);
+	}
+
+	for (i = 0; i < vcap_is2.action_words; i++)
+		ocelot_write_rix(oc, data->action[i], S2_CACHE_ACTION_DAT, i);
+
+	for (i = 0; i < vcap_is2.counter_words; i++)
+		ocelot_write_rix(oc, data->counter[i], S2_CACHE_CNT_DAT, i);
+}
+
+static void vcap_cache2action(struct ocelot *oc, struct vcap_data *data)
+{
+	u32 i, width;
+
+	for (i = 0; i < vcap_is2.action_words; i++)
+		data->action[i] = ocelot_read_rix(oc, S2_CACHE_ACTION_DAT, i);
+
+	for (i = 0; i < vcap_is2.counter_words; i++)
+		data->counter[i] = ocelot_read_rix(oc, S2_CACHE_CNT_DAT, i);
+
+	/* Extract action type */
+	width = vcap_is2.action_type_width;
+	data->type = (width ? (data->action[0] & GENMASK(width, 0)) : 0);
+}
+
+/* Calculate offsets for entry */
+static void is2_data_get(struct vcap_data *data, int ix)
+{
+	u32 i, col, offset, count, cnt, base, width = vcap_is2.tg_width;
+
+	count = (data->tg_sw == VCAP_TG_HALF ? 2 : 4);
+	col = (ix % 2);
+	cnt = (vcap_is2.sw_count / count);
+	base = (vcap_is2.sw_count - col * cnt - cnt);
+	data->tg_value = 0;
+	data->tg_mask = 0;
+	for (i = 0; i < cnt; i++) {
+		offset = ((base + i) * width);
+		data->tg_value |= (data->tg_sw << offset);
+		data->tg_mask |= GENMASK(offset + width - 1, offset);
+	}
+
+	/* Calculate key/action/counter offsets */
+	col = (count - col - 1);
+	data->key_offset = (base * vcap_is2.entry_width) / vcap_is2.sw_count;
+	data->counter_offset = (cnt * col * vcap_is2.counter_width);
+	i = data->type;
+	width = vcap_is2.action_table[i].width;
+	cnt = vcap_is2.action_table[i].count;
+	data->action_offset =
+		(((cnt * col * width) / count) + vcap_is2.action_type_width);
+}
+
+static void vcap_data_set(u32 *data, u32 offset, u32 len, u32 value)
+{
+	u32 i, v, m;
+
+	for (i = 0; i < len; i++, offset++) {
+		v = data[offset / ENTRY_WIDTH];
+		m = (1 << (offset % ENTRY_WIDTH));
+		if (value & (1 << i))
+			v |= m;
+		else
+			v &= ~m;
+		data[offset / ENTRY_WIDTH] = v;
+	}
+}
+
+static u32 vcap_data_get(u32 *data, u32 offset, u32 len)
+{
+	u32 i, v, m, value = 0;
+
+	for (i = 0; i < len; i++, offset++) {
+		v = data[offset / ENTRY_WIDTH];
+		m = (1 << (offset % ENTRY_WIDTH));
+		if (v & m)
+			value |= (1 << i);
+	}
+	return value;
+}
+
+static void vcap_key_set(struct vcap_data *data, u32 offset, u32 width,
+			 u32 value, u32 mask)
+{
+	vcap_data_set(data->entry, offset + data->key_offset, width, value);
+	vcap_data_set(data->mask, offset + data->key_offset, width, mask);
+}
+
+static void vcap_key_bytes_set(struct vcap_data *data, u32 offset, u8 *val,
+			       u8 *msk, u32 count)
+{
+	u32 i, j, n = 0, value = 0, mask = 0;
+
+	/* Data wider than 32 bits are split up in chunks of maximum 32 bits.
+	 * The 32 LSB of the data are written to the 32 MSB of the TCAM.
+	 */
+	offset += (count * 8);
+	for (i = 0; i < count; i++) {
+		j = (count - i - 1);
+		value += (val[j] << n);
+		mask += (msk[j] << n);
+		n += 8;
+		if (n == ENTRY_WIDTH || (i + 1) == count) {
+			offset -= n;
+			vcap_key_set(data, offset, n, value, mask);
+			n = 0;
+			value = 0;
+			mask = 0;
+		}
+	}
+}
+
+static void vcap_key_l4_port_set(struct vcap_data *data, u32 offset,
+				 struct ocelot_vcap_udp_tcp *port)
+{
+	vcap_key_set(data, offset, 16, port->value, port->mask);
+}
+
+static void vcap_key_bit_set(struct vcap_data *data, u32 offset,
+			     enum ocelot_vcap_bit val)
+{
+	vcap_key_set(data, offset, 1, val == OCELOT_VCAP_BIT_1 ? 1 : 0,
+		     val == OCELOT_VCAP_BIT_ANY ? 0 : 1);
+}
+
+#define VCAP_KEY_SET(fld, val, msk) \
+	vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, val, msk)
+#define VCAP_KEY_ANY_SET(fld) \
+	vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, 0, 0)
+#define VCAP_KEY_BIT_SET(fld, val) vcap_key_bit_set(&data, IS2_HKO_##fld, val)
+#define VCAP_KEY_BYTES_SET(fld, val, msk) \
+	vcap_key_bytes_set(&data, IS2_HKO_##fld, val, msk, IS2_HKL_##fld / 8)
+
+static void vcap_action_set(struct vcap_data *data, u32 offset, u32 width,
+			    u32 value)
+{
+	vcap_data_set(data->action, offset + data->action_offset, width, value);
+}
+
+#define VCAP_ACT_SET(fld, val) \
+	vcap_action_set(data, IS2_AO_##fld, IS2_AL_##fld, val)
+
+static void is2_action_set(struct vcap_data *data,
+			   enum ocelot_ace_action action)
+{
+	switch (action) {
+	case OCELOT_ACL_ACTION_DROP:
+		VCAP_ACT_SET(PORT_MASK, 0x0);
+		VCAP_ACT_SET(MASK_MODE, 0x1);
+		VCAP_ACT_SET(POLICE_ENA, 0x1);
+		VCAP_ACT_SET(POLICE_IDX, OCELOT_POLICER_DISCARD);
+		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
+		VCAP_ACT_SET(CPU_COPY_ENA, 0x0);
+		break;
+	case OCELOT_ACL_ACTION_TRAP:
+		VCAP_ACT_SET(PORT_MASK, 0x0);
+		VCAP_ACT_SET(MASK_MODE, 0x1);
+		VCAP_ACT_SET(POLICE_ENA, 0x0);
+		VCAP_ACT_SET(POLICE_IDX, 0x0);
+		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
+		VCAP_ACT_SET(CPU_COPY_ENA, 0x1);
+		break;
+	}
+}
+
+static void is2_entry_set(struct ocelot *ocelot, int ix,
+			  struct ocelot_ace_rule *ace)
+{
+	u32 val, msk, type, type_mask = 0xf, i, count;
+	struct ocelot_ace_vlan *tag = &ace->vlan;
+	struct ocelot_vcap_u64 payload;
+	struct vcap_data data;
+	int row = (ix / 2);
+
+	memset(&payload, 0, sizeof(payload));
+	memset(&data, 0, sizeof(data));
+
+	/* Read row */
+	vcap_row_cmd(ocelot, row, VCAP_CMD_READ, VCAP_SEL_ALL);
+	vcap_cache2entry(ocelot, &data);
+	vcap_cache2action(ocelot, &data);
+
+	data.tg_sw = VCAP_TG_HALF;
+	is2_data_get(&data, ix);
+	data.tg = (data.tg & ~data.tg_mask);
+	if (ace->prio != 0)
+		data.tg |= data.tg_value;
+
+	data.type = IS2_ACTION_TYPE_NORMAL;
+
+	VCAP_KEY_ANY_SET(PAG);
+	VCAP_KEY_SET(IGR_PORT_MASK, 0, ~BIT(ace->chip_port));
+	VCAP_KEY_BIT_SET(FIRST, OCELOT_VCAP_BIT_1);
+	VCAP_KEY_BIT_SET(HOST_MATCH, OCELOT_VCAP_BIT_ANY);
+	VCAP_KEY_BIT_SET(L2_MC, ace->dmac_mc);
+	VCAP_KEY_BIT_SET(L2_BC, ace->dmac_bc);
+	VCAP_KEY_BIT_SET(VLAN_TAGGED, tag->tagged);
+	VCAP_KEY_SET(VID, tag->vid.value, tag->vid.mask);
+	VCAP_KEY_SET(PCP, tag->pcp.value[0], tag->pcp.mask[0]);
+	VCAP_KEY_BIT_SET(DEI, tag->dei);
+
+	switch (ace->type) {
+	case OCELOT_ACE_TYPE_ETYPE: {
+		struct ocelot_ace_frame_etype *etype = &ace->frame.etype;
+
+		type = IS2_TYPE_ETYPE;
+		VCAP_KEY_BYTES_SET(L2_DMAC, etype->dmac.value,
+				   etype->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, etype->smac.value,
+				   etype->smac.mask);
+		VCAP_KEY_BYTES_SET(MAC_ETYPE_ETYPE, etype->etype.value,
+				   etype->etype.mask);
+		VCAP_KEY_ANY_SET(MAC_ETYPE_L2_PAYLOAD); // Clear unused bits
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ETYPE_L2_PAYLOAD,
+				   etype->data.value, etype->data.mask, 2);
+		break;
+	}
+	case OCELOT_ACE_TYPE_LLC: {
+		struct ocelot_ace_frame_llc *llc = &ace->frame.llc;
+
+		type = IS2_TYPE_LLC;
+		VCAP_KEY_BYTES_SET(L2_DMAC, llc->dmac.value, llc->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, llc->smac.value, llc->smac.mask);
+		for (i = 0; i < 4; i++) {
+			payload.value[i] = llc->llc.value[i];
+			payload.mask[i] = llc->llc.mask[i];
+		}
+		VCAP_KEY_BYTES_SET(MAC_LLC_L2_LLC, payload.value, payload.mask);
+		break;
+	}
+	case OCELOT_ACE_TYPE_SNAP: {
+		struct ocelot_ace_frame_snap *snap = &ace->frame.snap;
+
+		type = IS2_TYPE_SNAP;
+		VCAP_KEY_BYTES_SET(L2_DMAC, snap->dmac.value, snap->dmac.mask);
+		VCAP_KEY_BYTES_SET(L2_SMAC, snap->smac.value, snap->smac.mask);
+		VCAP_KEY_BYTES_SET(MAC_SNAP_L2_SNAP,
+				   ace->frame.snap.snap.value,
+				   ace->frame.snap.snap.mask);
+		break;
+	}
+	case OCELOT_ACE_TYPE_ARP: {
+		struct ocelot_ace_frame_arp *arp = &ace->frame.arp;
+
+		type = IS2_TYPE_ARP;
+		VCAP_KEY_BYTES_SET(MAC_ARP_L2_SMAC, arp->smac.value,
+				   arp->smac.mask);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_ADDR_SPACE_OK, arp->ethernet);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_PROTO_SPACE_OK, arp->ip);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_LEN_OK, arp->length);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_TGT_MATCH, arp->dmac_match);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_SENDER_MATCH, arp->smac_match);
+		VCAP_KEY_BIT_SET(MAC_ARP_ARP_OPCODE_UNKNOWN, arp->unknown);
+
+		/* OPCODE is inverse, bit 0 is reply flag, bit 1 is RARP flag */
+		val = ((arp->req == OCELOT_VCAP_BIT_0 ? 1 : 0) |
+		       (arp->arp == OCELOT_VCAP_BIT_0 ? 2 : 0));
+		msk = ((arp->req == OCELOT_VCAP_BIT_ANY ? 0 : 1) |
+		       (arp->arp == OCELOT_VCAP_BIT_ANY ? 0 : 2));
+		VCAP_KEY_SET(MAC_ARP_ARP_OPCODE, val, msk);
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_DIP,
+				   arp->dip.value.addr, arp->dip.mask.addr, 4);
+		vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_SIP,
+				   arp->sip.value.addr, arp->sip.mask.addr, 4);
+		VCAP_KEY_ANY_SET(MAC_ARP_DIP_EQ_SIP);
+		break;
+	}
+	case OCELOT_ACE_TYPE_IPV4:
+	case OCELOT_ACE_TYPE_IPV6: {
+		enum ocelot_vcap_bit sip_eq_dip, sport_eq_dport, seq_zero, tcp;
+		enum ocelot_vcap_bit ttl, fragment, options, tcp_ack, tcp_urg;
+		enum ocelot_vcap_bit tcp_fin, tcp_syn, tcp_rst, tcp_psh;
+		struct ocelot_ace_frame_ipv4 *ipv4 = NULL;
+		struct ocelot_ace_frame_ipv6 *ipv6 = NULL;
+		struct ocelot_vcap_udp_tcp *sport, *dport;
+		struct ocelot_vcap_ipv4 sip, dip;
+		struct ocelot_vcap_u8 proto, ds;
+		struct ocelot_vcap_u48 *ip_data;
+
+		if (ace->type == OCELOT_ACE_TYPE_IPV4) {
+			ipv4 = &ace->frame.ipv4;
+			ttl = ipv4->ttl;
+			fragment = ipv4->fragment;
+			options = ipv4->options;
+			proto = ipv4->proto;
+			ds = ipv4->ds;
+			ip_data = &ipv4->data;
+			sip = ipv4->sip;
+			dip = ipv4->dip;
+			sport = &ipv4->sport;
+			dport = &ipv4->dport;
+			tcp_fin = ipv4->tcp_fin;
+			tcp_syn = ipv4->tcp_syn;
+			tcp_rst = ipv4->tcp_rst;
+			tcp_psh = ipv4->tcp_psh;
+			tcp_ack = ipv4->tcp_ack;
+			tcp_urg = ipv4->tcp_urg;
+			sip_eq_dip = ipv4->sip_eq_dip;
+			sport_eq_dport = ipv4->sport_eq_dport;
+			seq_zero = ipv4->seq_zero;
+		} else {
+			ipv6 = &ace->frame.ipv6;
+			ttl = ipv6->ttl;
+			fragment = OCELOT_VCAP_BIT_ANY;
+			options = OCELOT_VCAP_BIT_ANY;
+			proto = ipv6->proto;
+			ds = ipv6->ds;
+			ip_data = &ipv6->data;
+			for (i = 0; i < 8; i++) {
+				val = ipv6->sip.value[i + 8];
+				msk = ipv6->sip.mask[i + 8];
+				if (i < 4) {
+					dip.value.addr[i] = val;
+					dip.mask.addr[i] = msk;
+				} else {
+					sip.value.addr[i - 4] = val;
+					sip.mask.addr[i - 4] = msk;
+				}
+			}
+			sport = &ipv6->sport;
+			dport = &ipv6->dport;
+			tcp_fin = ipv6->tcp_fin;
+			tcp_syn = ipv6->tcp_syn;
+			tcp_rst = ipv6->tcp_rst;
+			tcp_psh = ipv6->tcp_psh;
+			tcp_ack = ipv6->tcp_ack;
+			tcp_urg = ipv6->tcp_urg;
+			sip_eq_dip = ipv6->sip_eq_dip;
+			sport_eq_dport = ipv6->sport_eq_dport;
+			seq_zero = ipv6->seq_zero;
+		}
+
+		VCAP_KEY_BIT_SET(IP4,
+				 ipv4 ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
+		VCAP_KEY_BIT_SET(L3_FRAGMENT, fragment);
+		VCAP_KEY_ANY_SET(L3_FRAG_OFS_GT0);
+		VCAP_KEY_BIT_SET(L3_OPTIONS, options);
+		VCAP_KEY_BIT_SET(L3_TTL_GT0, ttl);
+		VCAP_KEY_BYTES_SET(L3_TOS, ds.value, ds.mask);
+		vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_DIP, dip.value.addr,
+				   dip.mask.addr, 4);
+		vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_SIP, sip.value.addr,
+				   sip.mask.addr, 4);
+		VCAP_KEY_BIT_SET(DIP_EQ_SIP, sip_eq_dip);
+		val = proto.value[0];
+		msk = proto.mask[0];
+		type = IS2_TYPE_IP_UDP_TCP;
+		if (msk == 0xff && (val == 6 || val == 17)) {
+			/* UDP/TCP protocol match */
+			tcp = (val == 6 ?
+			       OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_TCP, tcp);
+			vcap_key_l4_port_set(&data,
+					     IS2_HKO_IP4_TCP_UDP_L4_DPORT,
+					     dport);
+			vcap_key_l4_port_set(&data,
+					     IS2_HKO_IP4_TCP_UDP_L4_SPORT,
+					     sport);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_RNG);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_SPORT_EQ_DPORT,
+					 sport_eq_dport);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_SEQUENCE_EQ0, seq_zero);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_FIN, tcp_fin);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_SYN, tcp_syn);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_RST, tcp_rst);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_PSH, tcp_psh);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_ACK, tcp_ack);
+			VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_URG, tcp_urg);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_DOM);
+			VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_VER);
+		} else {
+			if (msk == 0) {
+				/* Any IP protocol match */
+				type_mask = IS2_TYPE_MASK_IP_ANY;
+			} else {
+				/* Non-UDP/TCP protocol match */
+				type = IS2_TYPE_IP_OTHER;
+				for (i = 0; i < 6; i++) {
+					payload.value[i] = ip_data->value[i];
+					payload.mask[i] = ip_data->mask[i];
+				}
+			}
+			VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PROTO, proto.value,
+					   proto.mask);
+			VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PAYLOAD, payload.value,
+					   payload.mask);
+		}
+		break;
+	}
+	case OCELOT_ACE_TYPE_ANY:
+	default:
+		type = 0;
+		type_mask = 0;
+		count = (vcap_is2.entry_width / 2);
+		for (i = (IS2_HKO_PCP + IS2_HKL_PCP); i < count;
+		     i += ENTRY_WIDTH) {
+			/* Clear entry data */
+			vcap_key_set(&data, i, min(32u, count - i), 0, 0);
+		}
+		break;
+	}
+
+	VCAP_KEY_SET(TYPE, type, type_mask);
+	is2_action_set(&data, ace->action);
+	vcap_data_set(data.counter, data.counter_offset, vcap_is2.counter_width,
+		      ace->stats.pkts);
+
+	/* Write row */
+	vcap_entry2cache(ocelot, &data);
+	vcap_action2cache(ocelot, &data);
+	vcap_row_cmd(ocelot, row, VCAP_CMD_WRITE, VCAP_SEL_ALL);
+}
+
+static void is2_entry_get(struct ocelot_ace_rule *rule, int ix)
+{
+	struct ocelot *op = rule->port->ocelot;
+	struct vcap_data data;
+	int row = (ix / 2);
+	u32 cnt;
+
+	vcap_row_cmd(op, row, VCAP_CMD_READ, VCAP_SEL_COUNTER);
+	vcap_cache2action(op, &data);
+	data.tg_sw = VCAP_TG_HALF;
+	is2_data_get(&data, ix);
+	cnt = vcap_data_get(data.counter, data.counter_offset,
+			    vcap_is2.counter_width);
+
+	rule->stats.pkts = cnt;
+}
+
+static void ocelot_ace_rule_add(struct ocelot_acl_block *block,
+				struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	struct list_head *pos, *n;
+
+	block->count++;
+
+	if (list_empty(&block->rules)) {
+		list_add(&rule->list, &block->rules);
+		return;
+	}
+
+	list_for_each_safe(pos, n, &block->rules) {
+		tmp = list_entry(pos, struct ocelot_ace_rule, list);
+		if (rule->prio < tmp->prio)
+			break;
+	}
+	list_add(&rule->list, pos->prev);
+}
+
+static int ocelot_ace_rule_get_index_id(struct ocelot_acl_block *block,
+					struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	int index = -1;
+
+	list_for_each_entry(tmp, &block->rules, list) {
+		++index;
+		if (rule->id == tmp->id)
+			break;
+	}
+	return index;
+}
+
+static struct ocelot_ace_rule*
+ocelot_ace_rule_get_rule_index(struct ocelot_acl_block *block, int index)
+{
+	struct ocelot_ace_rule *tmp;
+	int i = 0;
+
+	list_for_each_entry(tmp, &block->rules, list) {
+		if (i == index)
+			return tmp;
+		++i;
+	}
+
+	return NULL;
+}
+
+int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *ace;
+	int i, index;
+
+	/* Add rule to the linked list */
+	ocelot_ace_rule_add(acl_block, rule);
+
+	/* Get the index of the inserted rule */
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+
+	/* Move down the rules to make place for the new rule */
+	for (i = acl_block->count - 1; i > index; i--) {
+		ace = ocelot_ace_rule_get_rule_index(acl_block, i);
+		is2_entry_set(rule->port->ocelot, i, ace);
+	}
+
+	/* Now insert the new rule */
+	is2_entry_set(rule->port->ocelot, index, rule);
+	return 0;
+}
+
+static void ocelot_ace_rule_del(struct ocelot_acl_block *block,
+				struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	struct list_head *pos, *q;
+
+	list_for_each_safe(pos, q, &block->rules) {
+		tmp = list_entry(pos, struct ocelot_ace_rule, list);
+		if (tmp->id == rule->id) {
+			list_del(pos);
+			kfree(tmp);
+		}
+	}
+
+	block->count--;
+}
+
+int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule del_ace;
+	struct ocelot_ace_rule *ace;
+	int i, index;
+
+	memset(&del_ace, 0, sizeof(del_ace));
+
+	/* Gets index of the rule */
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+
+	/* Delete rule */
+	ocelot_ace_rule_del(acl_block, rule);
+
+	/* Move up all the blocks over the deleted rule */
+	for (i = index; i < acl_block->count; i++) {
+		ace = ocelot_ace_rule_get_rule_index(acl_block, i);
+		is2_entry_set(rule->port->ocelot, i, ace);
+	}
+
+	/* Now delete the last rule, because it is duplicated */
+	is2_entry_set(rule->port->ocelot, acl_block->count, &del_ace);
+
+	return 0;
+}
+
+int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule)
+{
+	struct ocelot_ace_rule *tmp;
+	int index;
+
+	index = ocelot_ace_rule_get_index_id(acl_block, rule);
+	is2_entry_get(rule, index);
+
+	/* After we get the result we need to clear the counters */
+	tmp = ocelot_ace_rule_get_rule_index(acl_block, index);
+	tmp->stats.pkts = 0;
+	is2_entry_set(rule->port->ocelot, index, tmp);
+
+	return 0;
+}
+
+static struct ocelot_acl_block *ocelot_acl_block_create(struct ocelot *ocelot)
+{
+	struct ocelot_acl_block *block;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return NULL;
+
+	INIT_LIST_HEAD(&block->rules);
+	block->count = 0;
+	block->ocelot = ocelot;
+
+	return block;
+}
+
+static void ocelot_acl_block_destroy(struct ocelot_acl_block *block)
+{
+	kfree(block);
+}
+
+int ocelot_ace_init(struct ocelot *ocelot)
+{
+	struct vcap_data data;
+
+	memset(&data, 0, sizeof(data));
+	vcap_entry2cache(ocelot, &data);
+	ocelot_write(ocelot, vcap_is2.entry_count, S2_CORE_MV_CFG);
+	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE, VCAP_SEL_ENTRY);
+
+	vcap_action2cache(ocelot, &data);
+	ocelot_write(ocelot, vcap_is2.action_count, S2_CORE_MV_CFG);
+	vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE,
+		 VCAP_SEL_ACTION | VCAP_SEL_COUNTER);
+
+	/* Create a policer that will drop the frames for the cpu.
+	 * This policer will be used as action in the acl rules to drop
+	 * frames.
+	 */
+	ocelot_write_gix(ocelot, 0x299, ANA_POL_MODE_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x1, ANA_POL_PIR_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_PIR_STATE,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x0, ANA_POL_CIR_CFG,
+			 OCELOT_POLICER_DISCARD);
+	ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_CIR_STATE,
+			 OCELOT_POLICER_DISCARD);
+
+	acl_block = ocelot_acl_block_create(ocelot);
+
+	return 0;
+}
+
+void ocelot_ace_deinit(void)
+{
+	ocelot_acl_block_destroy(acl_block);
+}

diff --git a/drivers/net/ethernet/mscc/ocelot_ace.h b/drivers/net/ethernet/mscc/ocelot_ace.h
new file mode 100644
index 0000000..e98944c
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ace.h

@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_ACE_H_
+#define _MSCC_OCELOT_ACE_H_
+
+#include "ocelot.h"
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+
+struct ocelot_ipv4 {
+	u8 addr[4];
+};
+
+enum ocelot_vcap_bit {
+	OCELOT_VCAP_BIT_ANY,
+	OCELOT_VCAP_BIT_0,
+	OCELOT_VCAP_BIT_1
+};
+
+struct ocelot_vcap_u8 {
+	u8 value[1];
+	u8 mask[1];
+};
+
+struct ocelot_vcap_u16 {
+	u8 value[2];
+	u8 mask[2];
+};
+
+struct ocelot_vcap_u24 {
+	u8 value[3];
+	u8 mask[3];
+};
+
+struct ocelot_vcap_u32 {
+	u8 value[4];
+	u8 mask[4];
+};
+
+struct ocelot_vcap_u40 {
+	u8 value[5];
+	u8 mask[5];
+};
+
+struct ocelot_vcap_u48 {
+	u8 value[6];
+	u8 mask[6];
+};
+
+struct ocelot_vcap_u64 {
+	u8 value[8];
+	u8 mask[8];
+};
+
+struct ocelot_vcap_u128 {
+	u8 value[16];
+	u8 mask[16];
+};
+
+struct ocelot_vcap_vid {
+	u16 value;
+	u16 mask;
+};
+
+struct ocelot_vcap_ipv4 {
+	struct ocelot_ipv4 value;
+	struct ocelot_ipv4 mask;
+};
+
+struct ocelot_vcap_udp_tcp {
+	u16 value;
+	u16 mask;
+};
+
+enum ocelot_ace_type {
+	OCELOT_ACE_TYPE_ANY,
+	OCELOT_ACE_TYPE_ETYPE,
+	OCELOT_ACE_TYPE_LLC,
+	OCELOT_ACE_TYPE_SNAP,
+	OCELOT_ACE_TYPE_ARP,
+	OCELOT_ACE_TYPE_IPV4,
+	OCELOT_ACE_TYPE_IPV6
+};
+
+struct ocelot_ace_vlan {
+	struct ocelot_vcap_vid vid;    /* VLAN ID (12 bit) */
+	struct ocelot_vcap_u8  pcp;    /* PCP (3 bit) */
+	enum ocelot_vcap_bit dei;    /* DEI */
+	enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */
+};
+
+struct ocelot_ace_frame_etype {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+	struct ocelot_vcap_u16 etype;
+	struct ocelot_vcap_u16 data; /* MAC data */
+};
+
+struct ocelot_ace_frame_llc {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+
+	/* LLC header: DSAP at byte 0, SSAP at byte 1, Control at byte 2 */
+	struct ocelot_vcap_u32 llc;
+};
+
+struct ocelot_ace_frame_snap {
+	struct ocelot_vcap_u48 dmac;
+	struct ocelot_vcap_u48 smac;
+
+	/* SNAP header: Organization Code at byte 0, Type at byte 3 */
+	struct ocelot_vcap_u40 snap;
+};
+
+struct ocelot_ace_frame_arp {
+	struct ocelot_vcap_u48 smac;
+	enum ocelot_vcap_bit arp;	/* Opcode ARP/RARP */
+	enum ocelot_vcap_bit req;	/* Opcode request/reply */
+	enum ocelot_vcap_bit unknown;    /* Opcode unknown */
+	enum ocelot_vcap_bit smac_match; /* Sender MAC matches SMAC */
+	enum ocelot_vcap_bit dmac_match; /* Target MAC matches DMAC */
+
+	/**< Protocol addr. length 4, hardware length 6 */
+	enum ocelot_vcap_bit length;
+
+	enum ocelot_vcap_bit ip;       /* Protocol address type IP */
+	enum  ocelot_vcap_bit ethernet; /* Hardware address type Ethernet */
+	struct ocelot_vcap_ipv4 sip;     /* Sender IP address */
+	struct ocelot_vcap_ipv4 dip;     /* Target IP address */
+};
+
+struct ocelot_ace_frame_ipv4 {
+	enum ocelot_vcap_bit ttl;      /* TTL zero */
+	enum ocelot_vcap_bit fragment; /* Fragment */
+	enum ocelot_vcap_bit options;  /* Header options */
+	struct ocelot_vcap_u8 ds;
+	struct ocelot_vcap_u8 proto;      /* Protocol */
+	struct ocelot_vcap_ipv4 sip;      /* Source IP address */
+	struct ocelot_vcap_ipv4 dip;      /* Destination IP address */
+	struct ocelot_vcap_u48 data;      /* Not UDP/TCP: IP data */
+	struct ocelot_vcap_udp_tcp sport; /* UDP/TCP: Source port */
+	struct ocelot_vcap_udp_tcp dport; /* UDP/TCP: Destination port */
+	enum ocelot_vcap_bit tcp_fin;
+	enum ocelot_vcap_bit tcp_syn;
+	enum ocelot_vcap_bit tcp_rst;
+	enum ocelot_vcap_bit tcp_psh;
+	enum ocelot_vcap_bit tcp_ack;
+	enum ocelot_vcap_bit tcp_urg;
+	enum ocelot_vcap_bit sip_eq_dip;     /* SIP equals DIP  */
+	enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT  */
+	enum ocelot_vcap_bit seq_zero;       /* TCP sequence number is zero */
+};
+
+struct ocelot_ace_frame_ipv6 {
+	struct ocelot_vcap_u8 proto; /* IPv6 protocol */
+	struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */
+	enum ocelot_vcap_bit ttl;  /* TTL zero */
+	struct ocelot_vcap_u8 ds;
+	struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */
+	struct ocelot_vcap_udp_tcp sport;
+	struct ocelot_vcap_udp_tcp dport;
+	enum ocelot_vcap_bit tcp_fin;
+	enum ocelot_vcap_bit tcp_syn;
+	enum ocelot_vcap_bit tcp_rst;
+	enum ocelot_vcap_bit tcp_psh;
+	enum ocelot_vcap_bit tcp_ack;
+	enum ocelot_vcap_bit tcp_urg;
+	enum ocelot_vcap_bit sip_eq_dip;     /* SIP equals DIP  */
+	enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT  */
+	enum ocelot_vcap_bit seq_zero;       /* TCP sequence number is zero */
+};
+
+enum ocelot_ace_action {
+	OCELOT_ACL_ACTION_DROP,
+	OCELOT_ACL_ACTION_TRAP,
+};
+
+struct ocelot_ace_stats {
+	u64 bytes;
+	u64 pkts;
+	u64 used;
+};
+
+struct ocelot_ace_rule {
+	struct list_head list;
+	struct ocelot_port *port;
+
+	u16 prio;
+	u32 id;
+
+	enum ocelot_ace_action action;
+	struct ocelot_ace_stats stats;
+	int chip_port;
+
+	enum ocelot_vcap_bit dmac_mc;
+	enum ocelot_vcap_bit dmac_bc;
+	struct ocelot_ace_vlan vlan;
+
+	enum ocelot_ace_type type;
+	union {
+		/* ocelot_ACE_TYPE_ANY: No specific fields */
+		struct ocelot_ace_frame_etype etype;
+		struct ocelot_ace_frame_llc llc;
+		struct ocelot_ace_frame_snap snap;
+		struct ocelot_ace_frame_arp arp;
+		struct ocelot_ace_frame_ipv4 ipv4;
+		struct ocelot_ace_frame_ipv6 ipv6;
+	} frame;
+};
+
+struct ocelot_acl_block {
+	struct list_head rules;
+	struct ocelot *ocelot;
+	int count;
+};
+
+int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule);
+int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule);
+int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule);
+
+int ocelot_ace_init(struct ocelot *ocelot);
+void ocelot_ace_deinit(void);
+
+int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
+				      struct flow_block_offload *f);
+void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
+					 struct flow_block_offload *f);
+
+#endif /* _MSCC_OCELOT_ACE_H_ */

diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 3cdf63e..aac1151 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c

@@ -6,31 +6,37 @@
  */
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/of_net.h>
 #include <linux/netdevice.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
+#include <linux/mfd/syscon.h>
 #include <linux/skbuff.h>
+#include <net/switchdev.h>
 
 #include "ocelot.h"
 
-static int ocelot_parse_ifh(u32 *ifh, struct frame_info *info)
+#define IFH_EXTRACT_BITFIELD64(x, o, w) (((x) >> (o)) & GENMASK_ULL((w) - 1, 0))
+
+static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
 {
-	int i;
 	u8 llen, wlen;
+	u64 ifh[2];
 
-	/* The IFH is in network order, switch to CPU order */
-	for (i = 0; i < IFH_LEN; i++)
-		ifh[i] = ntohl((__force __be32)ifh[i]);
+	ifh[0] = be64_to_cpu(((__force __be64 *)_ifh)[0]);
+	ifh[1] = be64_to_cpu(((__force __be64 *)_ifh)[1]);
 
-	wlen = (ifh[1] >> 7) & 0xff;
-	llen = (ifh[1] >> 15) & 0x3f;
+	wlen = IFH_EXTRACT_BITFIELD64(ifh[0], 7,  8);
+	llen = IFH_EXTRACT_BITFIELD64(ifh[0], 15,  6);
+
 	info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
 
-	info->port = (ifh[2] & GENMASK(14, 11)) >> 11;
+	info->timestamp = IFH_EXTRACT_BITFIELD64(ifh[0], 21, 32);
 
-	info->cpuq = (ifh[3] & GENMASK(27, 20)) >> 20;
-	info->tag_type = (ifh[3] & BIT(16)) >> 16;
-	info->vid = ifh[3] & GENMASK(11, 0);
+	info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
+
+	info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16,  1);
+	info->vid = IFH_EXTRACT_BITFIELD64(ifh[1], 0,  12);
 
 	return 0;
 }
@@ -88,13 +94,14 @@
 		return IRQ_NONE;
 
 	do {
-		struct sk_buff *skb;
+		struct skb_shared_hwtstamps *shhwtstamps;
+		u64 tod_in_ns, full_ts_in_ns;
+		struct frame_info info = {};
 		struct net_device *dev;
-		u32 *buf;
+		u32 ifh[4], val, *buf;
+		struct timespec64 ts;
 		int sz, len, buf_len;
-		u32 ifh[4];
-		u32 val;
-		struct frame_info info;
+		struct sk_buff *skb;
 
 		for (i = 0; i < IFH_LEN; i++) {
 			err = ocelot_rx_frame_word(ocelot, grp, true, &ifh[i]);
@@ -126,16 +133,37 @@
 			len += sz;
 		} while (len < buf_len);
 
-		/* Read the FCS and discard it */
+		/* Read the FCS */
 		sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
 		/* Update the statistics if part of the FCS was read before */
 		len -= ETH_FCS_LEN - sz;
 
+		if (unlikely(dev->features & NETIF_F_RXFCS)) {
+			buf = (u32 *)skb_put(skb, ETH_FCS_LEN);
+			*buf = val;
+		}
+
 		if (sz < 0) {
 			err = sz;
 			break;
 		}
 
+		if (ocelot->ptp) {
+			ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
+
+			tod_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
+			if ((tod_in_ns & 0xffffffff) < info.timestamp)
+				full_ts_in_ns = (((tod_in_ns >> 32) - 1) << 32) |
+						info.timestamp;
+			else
+				full_ts_in_ns = (tod_in_ns & GENMASK_ULL(63, 32)) |
+						info.timestamp;
+
+			shhwtstamps = skb_hwtstamps(skb);
+			memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+			shhwtstamps->hwtstamp = full_ts_in_ns;
+		}
+
 		/* Everything we see on an interface that is in the HW bridge
 		 * has already been forwarded.
 		 */
@@ -155,6 +183,66 @@
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t ocelot_ptp_rdy_irq_handler(int irq, void *arg)
+{
+	int budget = OCELOT_PTP_QUEUE_SZ;
+	struct ocelot *ocelot = arg;
+
+	while (budget--) {
+		struct skb_shared_hwtstamps shhwtstamps;
+		struct list_head *pos, *tmp;
+		struct sk_buff *skb = NULL;
+		struct ocelot_skb *entry;
+		struct ocelot_port *port;
+		struct timespec64 ts;
+		u32 val, id, txport;
+
+		val = ocelot_read(ocelot, SYS_PTP_STATUS);
+
+		/* Check if a timestamp can be retrieved */
+		if (!(val & SYS_PTP_STATUS_PTP_MESS_VLD))
+			break;
+
+		WARN_ON(val & SYS_PTP_STATUS_PTP_OVFL);
+
+		/* Retrieve the ts ID and Tx port */
+		id = SYS_PTP_STATUS_PTP_MESS_ID_X(val);
+		txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val);
+
+		/* Retrieve its associated skb */
+		port = ocelot->ports[txport];
+
+		list_for_each_safe(pos, tmp, &port->skbs) {
+			entry = list_entry(pos, struct ocelot_skb, head);
+			if (entry->id != id)
+				continue;
+
+			skb = entry->skb;
+
+			list_del(pos);
+			kfree(entry);
+		}
+
+		/* Next ts */
+		ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
+
+		if (unlikely(!skb))
+			continue;
+
+		/* Get the h/w timestamp */
+		ocelot_get_hwtimestamp(ocelot, &ts);
+
+		/* Set the timestamp into the skb */
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+		skb_tstamp_tx(skb, &shhwtstamps);
+
+		dev_kfree_skb_any(skb);
+	}
+
+	return IRQ_HANDLED;
+}
+
 static const struct of_device_id mscc_ocelot_match[] = {
 	{ .compatible = "mscc,vsc7514-switch" },
 	{ }
@@ -163,23 +251,26 @@
 
 static int mscc_ocelot_probe(struct platform_device *pdev)
 {
-	int err, irq;
-	unsigned int i;
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *ports, *portnp;
+	int err, irq_xtr, irq_ptp_rdy;
 	struct ocelot *ocelot;
+	struct regmap *hsio;
+	unsigned int i;
 	u32 val;
 
 	struct {
 		enum ocelot_target id;
 		char *name;
+		u8 optional:1;
 	} res[] = {
 		{ SYS, "sys" },
 		{ REW, "rew" },
 		{ QSYS, "qsys" },
 		{ ANA, "ana" },
 		{ QS, "qs" },
-		{ HSIO, "hsio" },
+		{ S2, "s2" },
+		{ PTP, "ptp", 1 },
 	};
 
 	if (!np && !pdev->dev.platform_data)
@@ -196,26 +287,53 @@
 		struct regmap *target;
 
 		target = ocelot_io_platform_init(ocelot, pdev, res[i].name);
-		if (IS_ERR(target))
+		if (IS_ERR(target)) {
+			if (res[i].optional) {
+				ocelot->targets[res[i].id] = NULL;
+				continue;
+			}
+
 			return PTR_ERR(target);
+		}
 
 		ocelot->targets[res[i].id] = target;
 	}
 
+	hsio = syscon_regmap_lookup_by_compatible("mscc,ocelot-hsio");
+	if (IS_ERR(hsio)) {
+		dev_err(&pdev->dev, "missing hsio syscon\n");
+		return PTR_ERR(hsio);
+	}
+
+	ocelot->targets[HSIO] = hsio;
+
 	err = ocelot_chip_init(ocelot);
 	if (err)
 		return err;
 
-	irq = platform_get_irq_byname(pdev, "xtr");
-	if (irq < 0)
+	irq_xtr = platform_get_irq_byname(pdev, "xtr");
+	if (irq_xtr < 0)
 		return -ENODEV;
 
-	err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+	err = devm_request_threaded_irq(&pdev->dev, irq_xtr, NULL,
 					ocelot_xtr_irq_handler, IRQF_ONESHOT,
 					"frame extraction", ocelot);
 	if (err)
 		return err;
 
+	irq_ptp_rdy = platform_get_irq_byname(pdev, "ptp_rdy");
+	if (irq_ptp_rdy > 0 && ocelot->targets[PTP]) {
+		err = devm_request_threaded_irq(&pdev->dev, irq_ptp_rdy, NULL,
+						ocelot_ptp_rdy_irq_handler,
+						IRQF_ONESHOT, "ptp ready",
+						ocelot);
+		if (err)
+			return err;
+
+		/* Both the PTP interrupt and the PTP bank are available */
+		ocelot->ptp = 1;
+	}
+
 	regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_INIT], 1);
 	regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_ENA], 1);
 
@@ -244,20 +362,14 @@
 	INIT_LIST_HEAD(&ocelot->multicast);
 	ocelot_init(ocelot);
 
-	ocelot_rmw(ocelot, HSIO_HW_CFG_DEV1G_4_MODE |
-		     HSIO_HW_CFG_DEV1G_6_MODE |
-		     HSIO_HW_CFG_DEV1G_9_MODE,
-		     HSIO_HW_CFG_DEV1G_4_MODE |
-		     HSIO_HW_CFG_DEV1G_6_MODE |
-		     HSIO_HW_CFG_DEV1G_9_MODE,
-		     HSIO_HW_CFG);
-
 	for_each_available_child_of_node(ports, portnp) {
 		struct device_node *phy_node;
 		struct phy_device *phy;
 		struct resource *res;
+		struct phy *serdes;
 		void __iomem *regs;
 		char res_name[8];
+		int phy_mode;
 		u32 port;
 
 		if (of_property_read_u32(portnp, "reg", &port))
@@ -276,23 +388,70 @@
 			continue;
 
 		phy = of_phy_find_device(phy_node);
+		of_node_put(phy_node);
 		if (!phy)
 			continue;
 
 		err = ocelot_probe_port(ocelot, port, regs, phy);
 		if (err) {
-			dev_err(&pdev->dev, "failed to probe ports\n");
-			goto err_probe_ports;
+			of_node_put(portnp);
+			goto out_put_ports;
 		}
+
+		phy_mode = of_get_phy_mode(portnp);
+		if (phy_mode < 0)
+			ocelot->ports[port]->phy_mode = PHY_INTERFACE_MODE_NA;
+		else
+			ocelot->ports[port]->phy_mode = phy_mode;
+
+		switch (ocelot->ports[port]->phy_mode) {
+		case PHY_INTERFACE_MODE_NA:
+			continue;
+		case PHY_INTERFACE_MODE_SGMII:
+			break;
+		case PHY_INTERFACE_MODE_QSGMII:
+			/* Ensure clock signals and speed is set on all
+			 * QSGMII links
+			 */
+			ocelot_port_writel(ocelot->ports[port],
+					   DEV_CLOCK_CFG_LINK_SPEED
+					   (OCELOT_SPEED_1000),
+					   DEV_CLOCK_CFG);
+			break;
+		default:
+			dev_err(ocelot->dev,
+				"invalid phy mode for port%d, (Q)SGMII only\n",
+				port);
+			of_node_put(portnp);
+			err = -EINVAL;
+			goto out_put_ports;
+		}
+
+		serdes = devm_of_phy_get(ocelot->dev, portnp, NULL);
+		if (IS_ERR(serdes)) {
+			err = PTR_ERR(serdes);
+			if (err == -EPROBE_DEFER)
+				dev_dbg(ocelot->dev, "deferring probe\n");
+			else
+				dev_err(ocelot->dev,
+					"missing SerDes phys for port%d\n",
+					port);
+
+			of_node_put(portnp);
+			goto out_put_ports;
+		}
+
+		ocelot->ports[port]->serdes = serdes;
 	}
 
 	register_netdevice_notifier(&ocelot_netdevice_nb);
+	register_switchdev_notifier(&ocelot_switchdev_nb);
+	register_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb);
 
 	dev_info(&pdev->dev, "Ocelot switch probed\n");
 
-	return 0;
-
-err_probe_ports:
+out_put_ports:
+	of_node_put(ports);
 	return err;
 }
 
@@ -301,6 +460,8 @@
 	struct ocelot *ocelot = platform_get_drvdata(pdev);
 
 	ocelot_deinit(ocelot);
+	unregister_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb);
+	unregister_switchdev_notifier(&ocelot_switchdev_nb);
 	unregister_netdevice_notifier(&ocelot_netdevice_nb);
 
 	return 0;

diff --git a/drivers/net/ethernet/mscc/ocelot_dev_gmii.h b/drivers/net/ethernet/mscc/ocelot_dev_gmii.h
deleted file mode 100644
index 6aa40ea..0000000
--- a/drivers/net/ethernet/mscc/ocelot_dev_gmii.h
+++ /dev/null

@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_DEV_GMII_H_
-#define _MSCC_OCELOT_DEV_GMII_H_
-
-#define DEV_GMII_PORT_MODE_CLOCK_CFG                      0x0
-
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_MAC_TX_RST           BIT(5)
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_MAC_RX_RST           BIT(4)
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_PORT_RST             BIT(3)
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_PHY_RST              BIT(2)
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_LINK_SPEED(x)        ((x) & GENMASK(1, 0))
-#define DEV_GMII_PORT_MODE_CLOCK_CFG_LINK_SPEED_M         GENMASK(1, 0)
-
-#define DEV_GMII_PORT_MODE_PORT_MISC                      0x4
-
-#define DEV_GMII_PORT_MODE_PORT_MISC_MPLS_RX_ENA          BIT(5)
-#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_ERROR_ENA        BIT(4)
-#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_PAUSE_ENA        BIT(3)
-#define DEV_GMII_PORT_MODE_PORT_MISC_FWD_CTRL_ENA         BIT(2)
-#define DEV_GMII_PORT_MODE_PORT_MISC_GMII_LOOP_ENA        BIT(1)
-#define DEV_GMII_PORT_MODE_PORT_MISC_DEV_LOOP_ENA         BIT(0)
-
-#define DEV_GMII_PORT_MODE_EVENTS                         0x8
-
-#define DEV_GMII_PORT_MODE_EEE_CFG                        0xc
-
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_ENA                BIT(22)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE(x)       (((x) << 15) & GENMASK(21, 15))
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE_M        GENMASK(21, 15)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_AGE_X(x)     (((x) & GENMASK(21, 15)) >> 15)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP(x)    (((x) << 8) & GENMASK(14, 8))
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP_M     GENMASK(14, 8)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_WAKEUP_X(x)  (((x) & GENMASK(14, 8)) >> 8)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF(x)   (((x) << 1) & GENMASK(7, 1))
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF_M    GENMASK(7, 1)
-#define DEV_GMII_PORT_MODE_EEE_CFG_EEE_TIMER_HOLDOFF_X(x) (((x) & GENMASK(7, 1)) >> 1)
-#define DEV_GMII_PORT_MODE_EEE_CFG_PORT_LPI               BIT(0)
-
-#define DEV_GMII_PORT_MODE_RX_PATH_DELAY                  0x10
-
-#define DEV_GMII_PORT_MODE_TX_PATH_DELAY                  0x14
-
-#define DEV_GMII_PORT_MODE_PTP_PREDICT_CFG                0x18
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG               0x1c
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG_RX_ENA        BIT(4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_ENA_CFG_TX_ENA        BIT(0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG              0x20
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_FC_WORD_SYNC_ENA BIT(8)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_GIGA_MODE_ENA BIT(4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_MODE_CFG_FDX_ENA      BIT(0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_MAXLEN_CFG            0x24
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG              0x28
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID(x)    (((x) << 16) & GENMASK(31, 16))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID_M     GENMASK(31, 16)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_TAG_ID_X(x)  (((x) & GENMASK(31, 16)) >> 16)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_PB_ENA       BIT(1)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_ADV_CHK_CFG           0x2c
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_ADV_CHK_CFG_LEN_DROP_ENA BIT(0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG               0x30
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK BIT(17)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_REDUCED_TX_IFG BIT(16)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG(x)     (((x) << 8) & GENMASK(12, 8))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG_M      GENMASK(12, 8)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_TX_IFG_X(x)   (((x) & GENMASK(12, 8)) >> 8)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2(x)    (((x) << 4) & GENMASK(7, 4))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2_M     GENMASK(7, 4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG2_X(x)  (((x) & GENMASK(7, 4)) >> 4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG1(x)    ((x) & GENMASK(3, 0))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_IFG_CFG_RX_IFG1_M     GENMASK(3, 0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG               0x34
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_BYPASS_COL_SYNC BIT(26)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_OB_ENA        BIT(25)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_WEXC_DIS      BIT(24)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED(x)       (((x) << 16) & GENMASK(23, 16))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_M        GENMASK(23, 16)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_X(x)     (((x) & GENMASK(23, 16)) >> 16)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_SEED_LOAD     BIT(12)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA BIT(8)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_LATE_COL_POS(x) ((x) & GENMASK(6, 0))
-#define DEV_GMII_MAC_CFG_STATUS_MAC_HDX_CFG_LATE_COL_POS_M GENMASK(6, 0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG               0x38
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG_TBI_MODE      BIT(4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA BIT(0)
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_FC_MAC_LOW_CFG        0x3c
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_FC_MAC_HIGH_CFG       0x40
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY                0x44
-
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_IPG_SHRINK_STICKY BIT(9)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_PREAM_SHRINK_STICKY BIT(8)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_CARRIER_EXT_STICKY BIT(7)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_CARRIER_EXT_ERR_STICKY BIT(6)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_RX_JUNK_STICKY BIT(5)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_RETRANSMIT_STICKY BIT(4)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_JAM_STICKY  BIT(3)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_FIFO_OFLW_STICKY BIT(2)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_FRM_LEN_OVR_STICKY BIT(1)
-#define DEV_GMII_MAC_CFG_STATUS_MAC_STICKY_TX_ABORT_STICKY BIT(0)
-
-#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG                  0x48
-
-#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_MM_RX_ENA        BIT(0)
-#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_MM_TX_ENA        BIT(4)
-#define DEV_GMII_MM_CONFIG_ENABLE_CONFIG_KEEP_S_AFTER_D   BIT(8)
-
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG                   0x4c
-
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_DIS    BIT(0)
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME(x) (((x) << 4) & GENMASK(11, 4))
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME_M GENMASK(11, 4)
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_PRM_VERIFY_TIME_X(x) (((x) & GENMASK(11, 4)) >> 4)
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS(x) (((x) << 12) & GENMASK(13, 12))
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS_M GENMASK(13, 12)
-#define DEV_GMII_MM_CONFIG_VERIF_CONFIG_VERIF_TIMER_UNITS_X(x) (((x) & GENMASK(13, 12)) >> 12)
-
-#define DEV_GMII_MM_STATISTICS_MM_STATUS                  0x50
-
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_ACTIVE_STATUS BIT(0)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_ACTIVE_STICKY BIT(4)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE(x) (((x) << 8) & GENMASK(10, 8))
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE_M GENMASK(10, 8)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_PRMPT_VERIFY_STATE_X(x) (((x) & GENMASK(10, 8)) >> 8)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_UNEXP_RX_PFRM_STICKY BIT(12)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_UNEXP_TX_PFRM_STICKY BIT(16)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_RX_FRAME_STATUS BIT(20)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_TX_FRAME_STATUS BIT(24)
-#define DEV_GMII_MM_STATISTICS_MM_STATUS_MM_TX_PRMPT_STATUS BIT(28)
-
-#endif

diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
new file mode 100644
index 0000000..b894bc0
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c

@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+
+#include "ocelot_ace.h"
+
+struct ocelot_port_block {
+	struct ocelot_acl_block *block;
+	struct ocelot_port *port;
+};
+
+static int ocelot_flower_parse_action(struct flow_cls_offload *f,
+				      struct ocelot_ace_rule *rule)
+{
+	const struct flow_action_entry *a;
+	int i;
+
+	if (f->rule->action.num_entries != 1)
+		return -EOPNOTSUPP;
+
+	flow_action_for_each(i, a, &f->rule->action) {
+		switch (a->id) {
+		case FLOW_ACTION_DROP:
+			rule->action = OCELOT_ACL_ACTION_DROP;
+			break;
+		case FLOW_ACTION_TRAP:
+			rule->action = OCELOT_ACL_ACTION_TRAP;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int ocelot_flower_parse(struct flow_cls_offload *f,
+			       struct ocelot_ace_rule *ocelot_rule)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+		u16 proto = ntohs(f->common.protocol);
+
+		/* The hw support mac matches only for MAC_ETYPE key,
+		 * therefore if other matches(port, tcp flags, etc) are added
+		 * then just bail out
+		 */
+		if ((dissector->used_keys &
+		    (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+		     BIT(FLOW_DISSECTOR_KEY_BASIC) |
+		     BIT(FLOW_DISSECTOR_KEY_CONTROL))) !=
+		    (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+		     BIT(FLOW_DISSECTOR_KEY_BASIC) |
+		     BIT(FLOW_DISSECTOR_KEY_CONTROL)))
+			return -EOPNOTSUPP;
+
+		if (proto == ETH_P_IP ||
+		    proto == ETH_P_IPV6 ||
+		    proto == ETH_P_ARP)
+			return -EOPNOTSUPP;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		ocelot_rule->type = OCELOT_ACE_TYPE_ETYPE;
+		ether_addr_copy(ocelot_rule->frame.etype.dmac.value,
+				match.key->dst);
+		ether_addr_copy(ocelot_rule->frame.etype.smac.value,
+				match.key->src);
+		ether_addr_copy(ocelot_rule->frame.etype.dmac.mask,
+				match.mask->dst);
+		ether_addr_copy(ocelot_rule->frame.etype.smac.mask,
+				match.mask->src);
+		goto finished_key_parsing;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		if (ntohs(match.key->n_proto) == ETH_P_IP) {
+			ocelot_rule->type = OCELOT_ACE_TYPE_IPV4;
+			ocelot_rule->frame.ipv4.proto.value[0] =
+				match.key->ip_proto;
+			ocelot_rule->frame.ipv4.proto.mask[0] =
+				match.mask->ip_proto;
+		}
+		if (ntohs(match.key->n_proto) == ETH_P_IPV6) {
+			ocelot_rule->type = OCELOT_ACE_TYPE_IPV6;
+			ocelot_rule->frame.ipv6.proto.value[0] =
+				match.key->ip_proto;
+			ocelot_rule->frame.ipv6.proto.mask[0] =
+				match.mask->ip_proto;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) &&
+	    ntohs(f->common.protocol) == ETH_P_IP) {
+		struct flow_match_ipv4_addrs match;
+		u8 *tmp;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		tmp = &ocelot_rule->frame.ipv4.sip.value.addr[0];
+		memcpy(tmp, &match.key->src, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.sip.mask.addr[0];
+		memcpy(tmp, &match.mask->src, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.dip.value.addr[0];
+		memcpy(tmp, &match.key->dst, 4);
+
+		tmp = &ocelot_rule->frame.ipv4.dip.mask.addr[0];
+		memcpy(tmp, &match.mask->dst, 4);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) &&
+	    ntohs(f->common.protocol) == ETH_P_IPV6) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		ocelot_rule->frame.ipv4.sport.value = ntohs(match.key->src);
+		ocelot_rule->frame.ipv4.sport.mask = ntohs(match.mask->src);
+		ocelot_rule->frame.ipv4.dport.value = ntohs(match.key->dst);
+		ocelot_rule->frame.ipv4.dport.mask = ntohs(match.mask->dst);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		ocelot_rule->type = OCELOT_ACE_TYPE_ANY;
+		ocelot_rule->vlan.vid.value = match.key->vlan_id;
+		ocelot_rule->vlan.vid.mask = match.mask->vlan_id;
+		ocelot_rule->vlan.pcp.value[0] = match.key->vlan_priority;
+		ocelot_rule->vlan.pcp.mask[0] = match.mask->vlan_priority;
+	}
+
+finished_key_parsing:
+	ocelot_rule->prio = f->common.prio;
+	ocelot_rule->id = f->cookie;
+	return ocelot_flower_parse_action(f, ocelot_rule);
+}
+
+static
+struct ocelot_ace_rule *ocelot_ace_rule_create(struct flow_cls_offload *f,
+					       struct ocelot_port_block *block)
+{
+	struct ocelot_ace_rule *rule;
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->port = block->port;
+	rule->chip_port = block->port->chip_port;
+	return rule;
+}
+
+static int ocelot_flower_replace(struct flow_cls_offload *f,
+				 struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule *rule;
+	int ret;
+
+	rule = ocelot_ace_rule_create(f, port_block);
+	if (!rule)
+		return -ENOMEM;
+
+	ret = ocelot_flower_parse(f, rule);
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
+
+	ret = ocelot_ace_rule_offload_add(rule);
+	if (ret)
+		return ret;
+
+	port_block->port->tc.offload_cnt++;
+	return 0;
+}
+
+static int ocelot_flower_destroy(struct flow_cls_offload *f,
+				 struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule rule;
+	int ret;
+
+	rule.prio = f->common.prio;
+	rule.port = port_block->port;
+	rule.id = f->cookie;
+
+	ret = ocelot_ace_rule_offload_del(&rule);
+	if (ret)
+		return ret;
+
+	port_block->port->tc.offload_cnt--;
+	return 0;
+}
+
+static int ocelot_flower_stats_update(struct flow_cls_offload *f,
+				      struct ocelot_port_block *port_block)
+{
+	struct ocelot_ace_rule rule;
+	int ret;
+
+	rule.prio = f->common.prio;
+	rule.port = port_block->port;
+	rule.id = f->cookie;
+	ret = ocelot_ace_rule_stats_update(&rule);
+	if (ret)
+		return ret;
+
+	flow_stats_update(&f->stats, 0x0, rule.stats.pkts, 0x0);
+	return 0;
+}
+
+static int ocelot_setup_tc_cls_flower(struct flow_cls_offload *f,
+				      struct ocelot_port_block *port_block)
+{
+	switch (f->command) {
+	case FLOW_CLS_REPLACE:
+		return ocelot_flower_replace(f, port_block);
+	case FLOW_CLS_DESTROY:
+		return ocelot_flower_destroy(f, port_block);
+	case FLOW_CLS_STATS:
+		return ocelot_flower_stats_update(f, port_block);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb_flower(enum tc_setup_type type,
+					   void *type_data, void *cb_priv)
+{
+	struct ocelot_port_block *port_block = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(port_block->port->dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return ocelot_setup_tc_cls_flower(type_data, cb_priv);
+	case TC_SETUP_CLSMATCHALL:
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct ocelot_port_block*
+ocelot_port_block_create(struct ocelot_port *port)
+{
+	struct ocelot_port_block *port_block;
+
+	port_block = kzalloc(sizeof(*port_block), GFP_KERNEL);
+	if (!port_block)
+		return NULL;
+
+	port_block->port = port;
+
+	return port_block;
+}
+
+static void ocelot_port_block_destroy(struct ocelot_port_block *block)
+{
+	kfree(block);
+}
+
+static void ocelot_tc_block_unbind(void *cb_priv)
+{
+	struct ocelot_port_block *port_block = cb_priv;
+
+	ocelot_port_block_destroy(port_block);
+}
+
+int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
+				      struct flow_block_offload *f)
+{
+	struct ocelot_port_block *port_block;
+	struct flow_block_cb *block_cb;
+	int ret;
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+		return -EOPNOTSUPP;
+
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
+	if (!block_cb) {
+		port_block = ocelot_port_block_create(port);
+		if (!port_block)
+			return -ENOMEM;
+
+		block_cb = flow_block_cb_alloc(ocelot_setup_tc_block_cb_flower,
+					       port, port_block,
+					       ocelot_tc_block_unbind);
+		if (IS_ERR(block_cb)) {
+			ret = PTR_ERR(block_cb);
+			goto err_cb_register;
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, f->driver_block_list);
+	} else {
+		port_block = flow_block_cb_priv(block_cb);
+	}
+
+	flow_block_cb_incref(block_cb);
+	return 0;
+
+err_cb_register:
+	ocelot_port_block_destroy(port_block);
+
+	return ret;
+}
+
+void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
+					 struct flow_block_offload *f)
+{
+	struct flow_block_cb *block_cb;
+
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
+	if (!block_cb)
+		return;
+
+	if (!flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+	}
+}

diff --git a/drivers/net/ethernet/mscc/ocelot_hsio.h b/drivers/net/ethernet/mscc/ocelot_hsio.h
deleted file mode 100644
index d93ddec..0000000
--- a/drivers/net/ethernet/mscc/ocelot_hsio.h
+++ /dev/null

@@ -1,785 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_HSIO_H_
-#define _MSCC_OCELOT_HSIO_H_
-
-#define HSIO_PLL5G_CFG0_ENA_ROT                           BIT(31)
-#define HSIO_PLL5G_CFG0_ENA_LANE                          BIT(30)
-#define HSIO_PLL5G_CFG0_ENA_CLKTREE                       BIT(29)
-#define HSIO_PLL5G_CFG0_DIV4                              BIT(28)
-#define HSIO_PLL5G_CFG0_ENA_LOCK_FINE                     BIT(27)
-#define HSIO_PLL5G_CFG0_SELBGV820(x)                      (((x) << 23) & GENMASK(26, 23))
-#define HSIO_PLL5G_CFG0_SELBGV820_M                       GENMASK(26, 23)
-#define HSIO_PLL5G_CFG0_SELBGV820_X(x)                    (((x) & GENMASK(26, 23)) >> 23)
-#define HSIO_PLL5G_CFG0_LOOP_BW_RES(x)                    (((x) << 18) & GENMASK(22, 18))
-#define HSIO_PLL5G_CFG0_LOOP_BW_RES_M                     GENMASK(22, 18)
-#define HSIO_PLL5G_CFG0_LOOP_BW_RES_X(x)                  (((x) & GENMASK(22, 18)) >> 18)
-#define HSIO_PLL5G_CFG0_SELCPI(x)                         (((x) << 16) & GENMASK(17, 16))
-#define HSIO_PLL5G_CFG0_SELCPI_M                          GENMASK(17, 16)
-#define HSIO_PLL5G_CFG0_SELCPI_X(x)                       (((x) & GENMASK(17, 16)) >> 16)
-#define HSIO_PLL5G_CFG0_ENA_VCO_CONTRH                    BIT(15)
-#define HSIO_PLL5G_CFG0_ENA_CP1                           BIT(14)
-#define HSIO_PLL5G_CFG0_ENA_VCO_BUF                       BIT(13)
-#define HSIO_PLL5G_CFG0_ENA_BIAS                          BIT(12)
-#define HSIO_PLL5G_CFG0_CPU_CLK_DIV(x)                    (((x) << 6) & GENMASK(11, 6))
-#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_M                     GENMASK(11, 6)
-#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_X(x)                  (((x) & GENMASK(11, 6)) >> 6)
-#define HSIO_PLL5G_CFG0_CORE_CLK_DIV(x)                   ((x) & GENMASK(5, 0))
-#define HSIO_PLL5G_CFG0_CORE_CLK_DIV_M                    GENMASK(5, 0)
-
-#define HSIO_PLL5G_CFG1_ENA_DIRECT                        BIT(18)
-#define HSIO_PLL5G_CFG1_ROT_SPEED                         BIT(17)
-#define HSIO_PLL5G_CFG1_ROT_DIR                           BIT(16)
-#define HSIO_PLL5G_CFG1_READBACK_DATA_SEL                 BIT(15)
-#define HSIO_PLL5G_CFG1_RC_ENABLE                         BIT(14)
-#define HSIO_PLL5G_CFG1_RC_CTRL_DATA(x)                   (((x) << 6) & GENMASK(13, 6))
-#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_M                    GENMASK(13, 6)
-#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_X(x)                 (((x) & GENMASK(13, 6)) >> 6)
-#define HSIO_PLL5G_CFG1_QUARTER_RATE                      BIT(5)
-#define HSIO_PLL5G_CFG1_PWD_TX                            BIT(4)
-#define HSIO_PLL5G_CFG1_PWD_RX                            BIT(3)
-#define HSIO_PLL5G_CFG1_OUT_OF_RANGE_RECAL_ENA            BIT(2)
-#define HSIO_PLL5G_CFG1_HALF_RATE                         BIT(1)
-#define HSIO_PLL5G_CFG1_FORCE_SET_ENA                     BIT(0)
-
-#define HSIO_PLL5G_CFG2_ENA_TEST_MODE                     BIT(30)
-#define HSIO_PLL5G_CFG2_ENA_PFD_IN_FLIP                   BIT(29)
-#define HSIO_PLL5G_CFG2_ENA_VCO_NREF_TESTOUT              BIT(28)
-#define HSIO_PLL5G_CFG2_ENA_FBTESTOUT                     BIT(27)
-#define HSIO_PLL5G_CFG2_ENA_RCPLL                         BIT(26)
-#define HSIO_PLL5G_CFG2_ENA_CP2                           BIT(25)
-#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS1                   BIT(24)
-#define HSIO_PLL5G_CFG2_AMPC_SEL(x)                       (((x) << 16) & GENMASK(23, 16))
-#define HSIO_PLL5G_CFG2_AMPC_SEL_M                        GENMASK(23, 16)
-#define HSIO_PLL5G_CFG2_AMPC_SEL_X(x)                     (((x) & GENMASK(23, 16)) >> 16)
-#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS                    BIT(15)
-#define HSIO_PLL5G_CFG2_PWD_AMPCTRL_N                     BIT(14)
-#define HSIO_PLL5G_CFG2_ENA_AMPCTRL                       BIT(13)
-#define HSIO_PLL5G_CFG2_ENA_AMP_CTRL_FORCE                BIT(12)
-#define HSIO_PLL5G_CFG2_FRC_FSM_POR                       BIT(11)
-#define HSIO_PLL5G_CFG2_DISABLE_FSM_POR                   BIT(10)
-#define HSIO_PLL5G_CFG2_GAIN_TEST(x)                      (((x) << 5) & GENMASK(9, 5))
-#define HSIO_PLL5G_CFG2_GAIN_TEST_M                       GENMASK(9, 5)
-#define HSIO_PLL5G_CFG2_GAIN_TEST_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
-#define HSIO_PLL5G_CFG2_EN_RESET_OVERRUN                  BIT(4)
-#define HSIO_PLL5G_CFG2_EN_RESET_LIM_DET                  BIT(3)
-#define HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET                  BIT(2)
-#define HSIO_PLL5G_CFG2_DISABLE_FSM                       BIT(1)
-#define HSIO_PLL5G_CFG2_ENA_GAIN_TEST                     BIT(0)
-
-#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL(x)               (((x) << 22) & GENMASK(23, 22))
-#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_M                GENMASK(23, 22)
-#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_X(x)             (((x) & GENMASK(23, 22)) >> 22)
-#define HSIO_PLL5G_CFG3_TESTOUT_SEL(x)                    (((x) << 19) & GENMASK(21, 19))
-#define HSIO_PLL5G_CFG3_TESTOUT_SEL_M                     GENMASK(21, 19)
-#define HSIO_PLL5G_CFG3_TESTOUT_SEL_X(x)                  (((x) & GENMASK(21, 19)) >> 19)
-#define HSIO_PLL5G_CFG3_ENA_ANA_TEST_OUT                  BIT(18)
-#define HSIO_PLL5G_CFG3_ENA_TEST_OUT                      BIT(17)
-#define HSIO_PLL5G_CFG3_SEL_FBDCLK                        BIT(16)
-#define HSIO_PLL5G_CFG3_SEL_CML_CMOS_PFD                  BIT(15)
-#define HSIO_PLL5G_CFG3_RST_FB_N                          BIT(14)
-#define HSIO_PLL5G_CFG3_FORCE_VCO_CONTRH                  BIT(13)
-#define HSIO_PLL5G_CFG3_FORCE_LO                          BIT(12)
-#define HSIO_PLL5G_CFG3_FORCE_HI                          BIT(11)
-#define HSIO_PLL5G_CFG3_FORCE_ENA                         BIT(10)
-#define HSIO_PLL5G_CFG3_FORCE_CP                          BIT(9)
-#define HSIO_PLL5G_CFG3_FBDIVSEL_TST_ENA                  BIT(8)
-#define HSIO_PLL5G_CFG3_FBDIVSEL(x)                       ((x) & GENMASK(7, 0))
-#define HSIO_PLL5G_CFG3_FBDIVSEL_M                        GENMASK(7, 0)
-
-#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL(x)                   (((x) << 16) & GENMASK(23, 16))
-#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_M                    GENMASK(23, 16)
-#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_X(x)                 (((x) & GENMASK(23, 16)) >> 16)
-#define HSIO_PLL5G_CFG4_IB_CTRL(x)                        ((x) & GENMASK(15, 0))
-#define HSIO_PLL5G_CFG4_IB_CTRL_M                         GENMASK(15, 0)
-
-#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL(x)                   (((x) << 16) & GENMASK(23, 16))
-#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_M                    GENMASK(23, 16)
-#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_X(x)                 (((x) & GENMASK(23, 16)) >> 16)
-#define HSIO_PLL5G_CFG5_OB_CTRL(x)                        ((x) & GENMASK(15, 0))
-#define HSIO_PLL5G_CFG5_OB_CTRL_M                         GENMASK(15, 0)
-
-#define HSIO_PLL5G_CFG6_REFCLK_SEL_SRC                    BIT(23)
-#define HSIO_PLL5G_CFG6_REFCLK_SEL(x)                     (((x) << 20) & GENMASK(22, 20))
-#define HSIO_PLL5G_CFG6_REFCLK_SEL_M                      GENMASK(22, 20)
-#define HSIO_PLL5G_CFG6_REFCLK_SEL_X(x)                   (((x) & GENMASK(22, 20)) >> 20)
-#define HSIO_PLL5G_CFG6_REFCLK_SRC                        BIT(19)
-#define HSIO_PLL5G_CFG6_POR_DEL_SEL(x)                    (((x) << 16) & GENMASK(17, 16))
-#define HSIO_PLL5G_CFG6_POR_DEL_SEL_M                     GENMASK(17, 16)
-#define HSIO_PLL5G_CFG6_POR_DEL_SEL_X(x)                  (((x) & GENMASK(17, 16)) >> 16)
-#define HSIO_PLL5G_CFG6_DIV125REF_SEL(x)                  (((x) << 8) & GENMASK(15, 8))
-#define HSIO_PLL5G_CFG6_DIV125REF_SEL_M                   GENMASK(15, 8)
-#define HSIO_PLL5G_CFG6_DIV125REF_SEL_X(x)                (((x) & GENMASK(15, 8)) >> 8)
-#define HSIO_PLL5G_CFG6_ENA_REFCLKC2                      BIT(7)
-#define HSIO_PLL5G_CFG6_ENA_FBCLKC2                       BIT(6)
-#define HSIO_PLL5G_CFG6_DDR_CLK_DIV(x)                    ((x) & GENMASK(5, 0))
-#define HSIO_PLL5G_CFG6_DDR_CLK_DIV_M                     GENMASK(5, 0)
-
-#define HSIO_PLL5G_STATUS0_RANGE_LIM                      BIT(12)
-#define HSIO_PLL5G_STATUS0_OUT_OF_RANGE_ERR               BIT(11)
-#define HSIO_PLL5G_STATUS0_CALIBRATION_ERR                BIT(10)
-#define HSIO_PLL5G_STATUS0_CALIBRATION_DONE               BIT(9)
-#define HSIO_PLL5G_STATUS0_READBACK_DATA(x)               (((x) << 1) & GENMASK(8, 1))
-#define HSIO_PLL5G_STATUS0_READBACK_DATA_M                GENMASK(8, 1)
-#define HSIO_PLL5G_STATUS0_READBACK_DATA_X(x)             (((x) & GENMASK(8, 1)) >> 1)
-#define HSIO_PLL5G_STATUS0_LOCK_STATUS                    BIT(0)
-
-#define HSIO_PLL5G_STATUS1_SIG_DEL(x)                     (((x) << 21) & GENMASK(28, 21))
-#define HSIO_PLL5G_STATUS1_SIG_DEL_M                      GENMASK(28, 21)
-#define HSIO_PLL5G_STATUS1_SIG_DEL_X(x)                   (((x) & GENMASK(28, 21)) >> 21)
-#define HSIO_PLL5G_STATUS1_GAIN_STAT(x)                   (((x) << 16) & GENMASK(20, 16))
-#define HSIO_PLL5G_STATUS1_GAIN_STAT_M                    GENMASK(20, 16)
-#define HSIO_PLL5G_STATUS1_GAIN_STAT_X(x)                 (((x) & GENMASK(20, 16)) >> 16)
-#define HSIO_PLL5G_STATUS1_FBCNT_DIF(x)                   (((x) << 4) & GENMASK(13, 4))
-#define HSIO_PLL5G_STATUS1_FBCNT_DIF_M                    GENMASK(13, 4)
-#define HSIO_PLL5G_STATUS1_FBCNT_DIF_X(x)                 (((x) & GENMASK(13, 4)) >> 4)
-#define HSIO_PLL5G_STATUS1_FSM_STAT(x)                    (((x) << 1) & GENMASK(3, 1))
-#define HSIO_PLL5G_STATUS1_FSM_STAT_M                     GENMASK(3, 1)
-#define HSIO_PLL5G_STATUS1_FSM_STAT_X(x)                  (((x) & GENMASK(3, 1)) >> 1)
-#define HSIO_PLL5G_STATUS1_FSM_LOCK                       BIT(0)
-
-#define HSIO_PLL5G_BIST_CFG0_PLLB_START_BIST              BIT(31)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_MEAS_MODE               BIT(30)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT(x)          (((x) << 20) & GENMASK(23, 20))
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_M           GENMASK(23, 20)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_X(x)        (((x) & GENMASK(23, 20)) >> 20)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT(x)          (((x) << 16) & GENMASK(19, 16))
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_M           GENMASK(19, 16)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_X(x)        (((x) & GENMASK(19, 16)) >> 16)
-#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE(x)       ((x) & GENMASK(15, 0))
-#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE_M        GENMASK(15, 0)
-
-#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT(x)            (((x) << 4) & GENMASK(7, 4))
-#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_M             GENMASK(7, 4)
-#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_X(x)          (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_PLL5G_BIST_STAT0_PLLB_BUSY                   BIT(2)
-#define HSIO_PLL5G_BIST_STAT0_PLLB_DONE_N                 BIT(1)
-#define HSIO_PLL5G_BIST_STAT0_PLLB_FAIL                   BIT(0)
-
-#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT(x)             (((x) << 16) & GENMASK(31, 16))
-#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_M              GENMASK(31, 16)
-#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_X(x)           (((x) & GENMASK(31, 16)) >> 16)
-#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF(x)        ((x) & GENMASK(15, 0))
-#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF_M         GENMASK(15, 0)
-
-#define HSIO_RCOMP_CFG0_PWD_ENA                           BIT(13)
-#define HSIO_RCOMP_CFG0_RUN_CAL                           BIT(12)
-#define HSIO_RCOMP_CFG0_SPEED_SEL(x)                      (((x) << 10) & GENMASK(11, 10))
-#define HSIO_RCOMP_CFG0_SPEED_SEL_M                       GENMASK(11, 10)
-#define HSIO_RCOMP_CFG0_SPEED_SEL_X(x)                    (((x) & GENMASK(11, 10)) >> 10)
-#define HSIO_RCOMP_CFG0_MODE_SEL(x)                       (((x) << 8) & GENMASK(9, 8))
-#define HSIO_RCOMP_CFG0_MODE_SEL_M                        GENMASK(9, 8)
-#define HSIO_RCOMP_CFG0_MODE_SEL_X(x)                     (((x) & GENMASK(9, 8)) >> 8)
-#define HSIO_RCOMP_CFG0_FORCE_ENA                         BIT(4)
-#define HSIO_RCOMP_CFG0_RCOMP_VAL(x)                      ((x) & GENMASK(3, 0))
-#define HSIO_RCOMP_CFG0_RCOMP_VAL_M                       GENMASK(3, 0)
-
-#define HSIO_RCOMP_STATUS_BUSY                            BIT(12)
-#define HSIO_RCOMP_STATUS_DELTA_ALERT                     BIT(7)
-#define HSIO_RCOMP_STATUS_RCOMP(x)                        ((x) & GENMASK(3, 0))
-#define HSIO_RCOMP_STATUS_RCOMP_M                         GENMASK(3, 0)
-
-#define HSIO_SYNC_ETH_CFG_RSZ                             0x4
-
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC(x)             (((x) << 4) & GENMASK(7, 4))
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_M              GENMASK(7, 4)
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_X(x)           (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV(x)             (((x) << 1) & GENMASK(3, 1))
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_M              GENMASK(3, 1)
-#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_X(x)           (((x) & GENMASK(3, 1)) >> 1)
-#define HSIO_SYNC_ETH_CFG_RECO_CLK_ENA                    BIT(0)
-
-#define HSIO_SYNC_ETH_PLL_CFG_PLL_AUTO_SQUELCH_ENA        BIT(0)
-
-#define HSIO_S1G_DES_CFG_DES_PHS_CTRL(x)                  (((x) << 13) & GENMASK(16, 13))
-#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_M                   GENMASK(16, 13)
-#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_X(x)                (((x) & GENMASK(16, 13)) >> 13)
-#define HSIO_S1G_DES_CFG_DES_CPMD_SEL(x)                  (((x) << 11) & GENMASK(12, 11))
-#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_M                   GENMASK(12, 11)
-#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_X(x)                (((x) & GENMASK(12, 11)) >> 11)
-#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL(x)                 (((x) << 8) & GENMASK(10, 8))
-#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_M                  GENMASK(10, 8)
-#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_X(x)               (((x) & GENMASK(10, 8)) >> 8)
-#define HSIO_S1G_DES_CFG_DES_BW_ANA(x)                    (((x) << 5) & GENMASK(7, 5))
-#define HSIO_S1G_DES_CFG_DES_BW_ANA_M                     GENMASK(7, 5)
-#define HSIO_S1G_DES_CFG_DES_BW_ANA_X(x)                  (((x) & GENMASK(7, 5)) >> 5)
-#define HSIO_S1G_DES_CFG_DES_SWAP_ANA                     BIT(4)
-#define HSIO_S1G_DES_CFG_DES_BW_HYST(x)                   (((x) << 1) & GENMASK(3, 1))
-#define HSIO_S1G_DES_CFG_DES_BW_HYST_M                    GENMASK(3, 1)
-#define HSIO_S1G_DES_CFG_DES_BW_HYST_X(x)                 (((x) & GENMASK(3, 1)) >> 1)
-#define HSIO_S1G_DES_CFG_DES_SWAP_HYST                    BIT(0)
-
-#define HSIO_S1G_IB_CFG_IB_FX100_ENA                      BIT(27)
-#define HSIO_S1G_IB_CFG_ACJTAG_HYST(x)                    (((x) << 24) & GENMASK(26, 24))
-#define HSIO_S1G_IB_CFG_ACJTAG_HYST_M                     GENMASK(26, 24)
-#define HSIO_S1G_IB_CFG_ACJTAG_HYST_X(x)                  (((x) & GENMASK(26, 24)) >> 24)
-#define HSIO_S1G_IB_CFG_IB_DET_LEV(x)                     (((x) << 19) & GENMASK(21, 19))
-#define HSIO_S1G_IB_CFG_IB_DET_LEV_M                      GENMASK(21, 19)
-#define HSIO_S1G_IB_CFG_IB_DET_LEV_X(x)                   (((x) & GENMASK(21, 19)) >> 19)
-#define HSIO_S1G_IB_CFG_IB_HYST_LEV                       BIT(14)
-#define HSIO_S1G_IB_CFG_IB_ENA_CMV_TERM                   BIT(13)
-#define HSIO_S1G_IB_CFG_IB_ENA_DC_COUPLING                BIT(12)
-#define HSIO_S1G_IB_CFG_IB_ENA_DETLEV                     BIT(11)
-#define HSIO_S1G_IB_CFG_IB_ENA_HYST                       BIT(10)
-#define HSIO_S1G_IB_CFG_IB_ENA_OFFSET_COMP                BIT(9)
-#define HSIO_S1G_IB_CFG_IB_EQ_GAIN(x)                     (((x) << 6) & GENMASK(8, 6))
-#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_M                      GENMASK(8, 6)
-#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_X(x)                   (((x) & GENMASK(8, 6)) >> 6)
-#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ(x)             (((x) << 4) & GENMASK(5, 4))
-#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_M              GENMASK(5, 4)
-#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_X(x)           (((x) & GENMASK(5, 4)) >> 4)
-#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
-#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL_M                GENMASK(3, 0)
-
-#define HSIO_S1G_OB_CFG_OB_SLP(x)                         (((x) << 17) & GENMASK(18, 17))
-#define HSIO_S1G_OB_CFG_OB_SLP_M                          GENMASK(18, 17)
-#define HSIO_S1G_OB_CFG_OB_SLP_X(x)                       (((x) & GENMASK(18, 17)) >> 17)
-#define HSIO_S1G_OB_CFG_OB_AMP_CTRL(x)                    (((x) << 13) & GENMASK(16, 13))
-#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_M                     GENMASK(16, 13)
-#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_X(x)                  (((x) & GENMASK(16, 13)) >> 13)
-#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL(x)               (((x) << 10) & GENMASK(12, 10))
-#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_M                GENMASK(12, 10)
-#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_X(x)             (((x) & GENMASK(12, 10)) >> 10)
-#define HSIO_S1G_OB_CFG_OB_DIS_VCM_CTRL                   BIT(9)
-#define HSIO_S1G_OB_CFG_OB_EN_MEAS_VREG                   BIT(8)
-#define HSIO_S1G_OB_CFG_OB_VCM_CTRL(x)                    (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_M                     GENMASK(7, 4)
-#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_X(x)                  (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
-#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL_M                GENMASK(3, 0)
-
-#define HSIO_S1G_SER_CFG_SER_IDLE                         BIT(9)
-#define HSIO_S1G_SER_CFG_SER_DEEMPH                       BIT(8)
-#define HSIO_S1G_SER_CFG_SER_CPMD_SEL                     BIT(7)
-#define HSIO_S1G_SER_CFG_SER_SWAP_CPMD                    BIT(6)
-#define HSIO_S1G_SER_CFG_SER_ALISEL(x)                    (((x) << 4) & GENMASK(5, 4))
-#define HSIO_S1G_SER_CFG_SER_ALISEL_M                     GENMASK(5, 4)
-#define HSIO_S1G_SER_CFG_SER_ALISEL_X(x)                  (((x) & GENMASK(5, 4)) >> 4)
-#define HSIO_S1G_SER_CFG_SER_ENHYS                        BIT(3)
-#define HSIO_S1G_SER_CFG_SER_BIG_WIN                      BIT(2)
-#define HSIO_S1G_SER_CFG_SER_EN_WIN                       BIT(1)
-#define HSIO_S1G_SER_CFG_SER_ENALI                        BIT(0)
-
-#define HSIO_S1G_COMMON_CFG_SYS_RST                       BIT(31)
-#define HSIO_S1G_COMMON_CFG_SE_AUTO_SQUELCH_ENA           BIT(21)
-#define HSIO_S1G_COMMON_CFG_ENA_LANE                      BIT(18)
-#define HSIO_S1G_COMMON_CFG_PWD_RX                        BIT(17)
-#define HSIO_S1G_COMMON_CFG_PWD_TX                        BIT(16)
-#define HSIO_S1G_COMMON_CFG_LANE_CTRL(x)                  (((x) << 13) & GENMASK(15, 13))
-#define HSIO_S1G_COMMON_CFG_LANE_CTRL_M                   GENMASK(15, 13)
-#define HSIO_S1G_COMMON_CFG_LANE_CTRL_X(x)                (((x) & GENMASK(15, 13)) >> 13)
-#define HSIO_S1G_COMMON_CFG_ENA_DIRECT                    BIT(12)
-#define HSIO_S1G_COMMON_CFG_ENA_ELOOP                     BIT(11)
-#define HSIO_S1G_COMMON_CFG_ENA_FLOOP                     BIT(10)
-#define HSIO_S1G_COMMON_CFG_ENA_ILOOP                     BIT(9)
-#define HSIO_S1G_COMMON_CFG_ENA_PLOOP                     BIT(8)
-#define HSIO_S1G_COMMON_CFG_HRATE                         BIT(7)
-#define HSIO_S1G_COMMON_CFG_IF_MODE                       BIT(0)
-
-#define HSIO_S1G_PLL_CFG_PLL_ENA_FB_DIV2                  BIT(22)
-#define HSIO_S1G_PLL_CFG_PLL_ENA_RC_DIV2                  BIT(21)
-#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA(x)             (((x) << 8) & GENMASK(15, 8))
-#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_M              GENMASK(15, 8)
-#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x)           (((x) & GENMASK(15, 8)) >> 8)
-#define HSIO_S1G_PLL_CFG_PLL_FSM_ENA                      BIT(7)
-#define HSIO_S1G_PLL_CFG_PLL_FSM_FORCE_SET_ENA            BIT(6)
-#define HSIO_S1G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA            BIT(5)
-#define HSIO_S1G_PLL_CFG_PLL_RB_DATA_SEL                  BIT(3)
-
-#define HSIO_S1G_PLL_STATUS_PLL_CAL_NOT_DONE              BIT(12)
-#define HSIO_S1G_PLL_STATUS_PLL_CAL_ERR                   BIT(11)
-#define HSIO_S1G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR          BIT(10)
-#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA(x)                ((x) & GENMASK(7, 0))
-#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA_M                 GENMASK(7, 0)
-
-#define HSIO_S1G_DFT_CFG0_LAZYBIT                         BIT(31)
-#define HSIO_S1G_DFT_CFG0_INV_DIS                         BIT(23)
-#define HSIO_S1G_DFT_CFG0_PRBS_SEL(x)                     (((x) << 20) & GENMASK(21, 20))
-#define HSIO_S1G_DFT_CFG0_PRBS_SEL_M                      GENMASK(21, 20)
-#define HSIO_S1G_DFT_CFG0_PRBS_SEL_X(x)                   (((x) & GENMASK(21, 20)) >> 20)
-#define HSIO_S1G_DFT_CFG0_TEST_MODE(x)                    (((x) << 16) & GENMASK(18, 16))
-#define HSIO_S1G_DFT_CFG0_TEST_MODE_M                     GENMASK(18, 16)
-#define HSIO_S1G_DFT_CFG0_TEST_MODE_X(x)                  (((x) & GENMASK(18, 16)) >> 16)
-#define HSIO_S1G_DFT_CFG0_RX_PHS_CORR_DIS                 BIT(4)
-#define HSIO_S1G_DFT_CFG0_RX_PDSENS_ENA                   BIT(3)
-#define HSIO_S1G_DFT_CFG0_RX_DFT_ENA                      BIT(2)
-#define HSIO_S1G_DFT_CFG0_TX_DFT_ENA                      BIT(0)
-
-#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
-#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_M                GENMASK(17, 8)
-#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
-#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_M                  GENMASK(7, 4)
-#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S1G_DFT_CFG1_TX_JI_ENA                       BIT(3)
-#define HSIO_S1G_DFT_CFG1_TX_WAVEFORM_SEL                 BIT(2)
-#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_DIR                  BIT(1)
-#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_ENA                  BIT(0)
-
-#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
-#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_M                GENMASK(17, 8)
-#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
-#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_M                  GENMASK(7, 4)
-#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S1G_DFT_CFG2_RX_JI_ENA                       BIT(3)
-#define HSIO_S1G_DFT_CFG2_RX_WAVEFORM_SEL                 BIT(2)
-#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_DIR                  BIT(1)
-#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_ENA                  BIT(0)
-
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_ENA             BIT(20)
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x)     (((x) << 16) & GENMASK(17, 16))
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M      GENMASK(17, 16)
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x)   (((x) & GENMASK(17, 16)) >> 16)
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x)         (((x) << 8) & GENMASK(15, 8))
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M          GENMASK(15, 8)
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x)       (((x) & GENMASK(15, 8)) >> 8)
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x)          ((x) & GENMASK(7, 0))
-#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M           GENMASK(7, 0)
-
-#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE(x)          (((x) << 11) & GENMASK(12, 11))
-#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_M           GENMASK(12, 11)
-#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_X(x)        (((x) & GENMASK(12, 11)) >> 11)
-#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_SWAP             BIT(10)
-#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_MODE             BIT(9)
-#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_ENA              BIT(8)
-#define HSIO_S1G_MISC_CFG_RX_LPI_MODE_ENA                 BIT(5)
-#define HSIO_S1G_MISC_CFG_TX_LPI_MODE_ENA                 BIT(4)
-#define HSIO_S1G_MISC_CFG_RX_DATA_INV_ENA                 BIT(3)
-#define HSIO_S1G_MISC_CFG_TX_DATA_INV_ENA                 BIT(2)
-#define HSIO_S1G_MISC_CFG_LANE_RST                        BIT(0)
-
-#define HSIO_S1G_DFT_STATUS_PLL_BIST_NOT_DONE             BIT(7)
-#define HSIO_S1G_DFT_STATUS_PLL_BIST_FAILED               BIT(6)
-#define HSIO_S1G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR          BIT(5)
-#define HSIO_S1G_DFT_STATUS_BIST_ACTIVE                   BIT(3)
-#define HSIO_S1G_DFT_STATUS_BIST_NOSYNC                   BIT(2)
-#define HSIO_S1G_DFT_STATUS_BIST_COMPLETE_N               BIT(1)
-#define HSIO_S1G_DFT_STATUS_BIST_ERROR                    BIT(0)
-
-#define HSIO_S1G_MISC_STATUS_DES_100FX_PHASE_SEL          BIT(0)
-
-#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_WR_ONE_SHOT        BIT(31)
-#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_RD_ONE_SHOT        BIT(30)
-#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR(x)            ((x) & GENMASK(8, 0))
-#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR_M             GENMASK(8, 0)
-
-#define HSIO_S6G_DIG_CFG_GP(x)                            (((x) << 16) & GENMASK(18, 16))
-#define HSIO_S6G_DIG_CFG_GP_M                             GENMASK(18, 16)
-#define HSIO_S6G_DIG_CFG_GP_X(x)                          (((x) & GENMASK(18, 16)) >> 16)
-#define HSIO_S6G_DIG_CFG_TX_BIT_DOUBLING_MODE_ENA         BIT(7)
-#define HSIO_S6G_DIG_CFG_SIGDET_TESTMODE                  BIT(6)
-#define HSIO_S6G_DIG_CFG_SIGDET_AST(x)                    (((x) << 3) & GENMASK(5, 3))
-#define HSIO_S6G_DIG_CFG_SIGDET_AST_M                     GENMASK(5, 3)
-#define HSIO_S6G_DIG_CFG_SIGDET_AST_X(x)                  (((x) & GENMASK(5, 3)) >> 3)
-#define HSIO_S6G_DIG_CFG_SIGDET_DST(x)                    ((x) & GENMASK(2, 0))
-#define HSIO_S6G_DIG_CFG_SIGDET_DST_M                     GENMASK(2, 0)
-
-#define HSIO_S6G_DFT_CFG0_LAZYBIT                         BIT(31)
-#define HSIO_S6G_DFT_CFG0_INV_DIS                         BIT(23)
-#define HSIO_S6G_DFT_CFG0_PRBS_SEL(x)                     (((x) << 20) & GENMASK(21, 20))
-#define HSIO_S6G_DFT_CFG0_PRBS_SEL_M                      GENMASK(21, 20)
-#define HSIO_S6G_DFT_CFG0_PRBS_SEL_X(x)                   (((x) & GENMASK(21, 20)) >> 20)
-#define HSIO_S6G_DFT_CFG0_TEST_MODE(x)                    (((x) << 16) & GENMASK(18, 16))
-#define HSIO_S6G_DFT_CFG0_TEST_MODE_M                     GENMASK(18, 16)
-#define HSIO_S6G_DFT_CFG0_TEST_MODE_X(x)                  (((x) & GENMASK(18, 16)) >> 16)
-#define HSIO_S6G_DFT_CFG0_RX_PHS_CORR_DIS                 BIT(4)
-#define HSIO_S6G_DFT_CFG0_RX_PDSENS_ENA                   BIT(3)
-#define HSIO_S6G_DFT_CFG0_RX_DFT_ENA                      BIT(2)
-#define HSIO_S6G_DFT_CFG0_TX_DFT_ENA                      BIT(0)
-
-#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
-#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_M                GENMASK(17, 8)
-#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
-#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_M                  GENMASK(7, 4)
-#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S6G_DFT_CFG1_TX_JI_ENA                       BIT(3)
-#define HSIO_S6G_DFT_CFG1_TX_WAVEFORM_SEL                 BIT(2)
-#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_DIR                  BIT(1)
-#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_ENA                  BIT(0)
-
-#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL(x)               (((x) << 8) & GENMASK(17, 8))
-#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_M                GENMASK(17, 8)
-#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_X(x)             (((x) & GENMASK(17, 8)) >> 8)
-#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ(x)                 (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_M                  GENMASK(7, 4)
-#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_X(x)               (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S6G_DFT_CFG2_RX_JI_ENA                       BIT(3)
-#define HSIO_S6G_DFT_CFG2_RX_WAVEFORM_SEL                 BIT(2)
-#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_DIR                  BIT(1)
-#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_ENA                  BIT(0)
-
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_ENA             BIT(20)
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x)     (((x) << 16) & GENMASK(19, 16))
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M      GENMASK(19, 16)
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x)   (((x) & GENMASK(19, 16)) >> 16)
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x)         (((x) << 8) & GENMASK(15, 8))
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M          GENMASK(15, 8)
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x)       (((x) & GENMASK(15, 8)) >> 8)
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x)          ((x) & GENMASK(7, 0))
-#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M           GENMASK(7, 0)
-
-#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK(x)                 (((x) << 13) & GENMASK(14, 13))
-#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_M                  GENMASK(14, 13)
-#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_X(x)               (((x) & GENMASK(14, 13)) >> 13)
-#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE(x)          (((x) << 11) & GENMASK(12, 11))
-#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_M           GENMASK(12, 11)
-#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_X(x)        (((x) & GENMASK(12, 11)) >> 11)
-#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_SWAP             BIT(10)
-#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_MODE             BIT(9)
-#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_ENA              BIT(8)
-#define HSIO_S6G_MISC_CFG_RX_BUS_FLIP_ENA                 BIT(7)
-#define HSIO_S6G_MISC_CFG_TX_BUS_FLIP_ENA                 BIT(6)
-#define HSIO_S6G_MISC_CFG_RX_LPI_MODE_ENA                 BIT(5)
-#define HSIO_S6G_MISC_CFG_TX_LPI_MODE_ENA                 BIT(4)
-#define HSIO_S6G_MISC_CFG_RX_DATA_INV_ENA                 BIT(3)
-#define HSIO_S6G_MISC_CFG_TX_DATA_INV_ENA                 BIT(2)
-#define HSIO_S6G_MISC_CFG_LANE_RST                        BIT(0)
-
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0(x)               (((x) << 23) & GENMASK(28, 23))
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_M                GENMASK(28, 23)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_X(x)             (((x) & GENMASK(28, 23)) >> 23)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1(x)               (((x) << 18) & GENMASK(22, 18))
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_M                GENMASK(22, 18)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_X(x)             (((x) & GENMASK(22, 18)) >> 18)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC(x)                (((x) << 13) & GENMASK(17, 13))
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_M                 GENMASK(17, 13)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_X(x)              (((x) & GENMASK(17, 13)) >> 13)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS(x)             (((x) << 6) & GENMASK(8, 6))
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_M              GENMASK(8, 6)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_X(x)           (((x) & GENMASK(8, 6)) >> 6)
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV(x)                 ((x) & GENMASK(5, 0))
-#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV_M                  GENMASK(5, 0)
-
-#define HSIO_S6G_DFT_STATUS_PRBS_SYNC_STAT                BIT(8)
-#define HSIO_S6G_DFT_STATUS_PLL_BIST_NOT_DONE             BIT(7)
-#define HSIO_S6G_DFT_STATUS_PLL_BIST_FAILED               BIT(6)
-#define HSIO_S6G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR          BIT(5)
-#define HSIO_S6G_DFT_STATUS_BIST_ACTIVE                   BIT(3)
-#define HSIO_S6G_DFT_STATUS_BIST_NOSYNC                   BIT(2)
-#define HSIO_S6G_DFT_STATUS_BIST_COMPLETE_N               BIT(1)
-#define HSIO_S6G_DFT_STATUS_BIST_ERROR                    BIT(0)
-
-#define HSIO_S6G_MISC_STATUS_DES_100FX_PHASE_SEL          BIT(0)
-
-#define HSIO_S6G_DES_CFG_DES_PHS_CTRL(x)                  (((x) << 13) & GENMASK(16, 13))
-#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_M                   GENMASK(16, 13)
-#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_X(x)                (((x) & GENMASK(16, 13)) >> 13)
-#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL(x)                 (((x) << 10) & GENMASK(12, 10))
-#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_M                  GENMASK(12, 10)
-#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_X(x)               (((x) & GENMASK(12, 10)) >> 10)
-#define HSIO_S6G_DES_CFG_DES_CPMD_SEL(x)                  (((x) << 8) & GENMASK(9, 8))
-#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_M                   GENMASK(9, 8)
-#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_X(x)                (((x) & GENMASK(9, 8)) >> 8)
-#define HSIO_S6G_DES_CFG_DES_BW_HYST(x)                   (((x) << 5) & GENMASK(7, 5))
-#define HSIO_S6G_DES_CFG_DES_BW_HYST_M                    GENMASK(7, 5)
-#define HSIO_S6G_DES_CFG_DES_BW_HYST_X(x)                 (((x) & GENMASK(7, 5)) >> 5)
-#define HSIO_S6G_DES_CFG_DES_SWAP_HYST                    BIT(4)
-#define HSIO_S6G_DES_CFG_DES_BW_ANA(x)                    (((x) << 1) & GENMASK(3, 1))
-#define HSIO_S6G_DES_CFG_DES_BW_ANA_M                     GENMASK(3, 1)
-#define HSIO_S6G_DES_CFG_DES_BW_ANA_X(x)                  (((x) & GENMASK(3, 1)) >> 1)
-#define HSIO_S6G_DES_CFG_DES_SWAP_ANA                     BIT(0)
-
-#define HSIO_S6G_IB_CFG_IB_SOFSI(x)                       (((x) << 29) & GENMASK(30, 29))
-#define HSIO_S6G_IB_CFG_IB_SOFSI_M                        GENMASK(30, 29)
-#define HSIO_S6G_IB_CFG_IB_SOFSI_X(x)                     (((x) & GENMASK(30, 29)) >> 29)
-#define HSIO_S6G_IB_CFG_IB_VBULK_SEL                      BIT(28)
-#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ(x)                    (((x) << 24) & GENMASK(27, 24))
-#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_M                     GENMASK(27, 24)
-#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_X(x)                  (((x) & GENMASK(27, 24)) >> 24)
-#define HSIO_S6G_IB_CFG_IB_ICML_ADJ(x)                    (((x) << 20) & GENMASK(23, 20))
-#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_M                     GENMASK(23, 20)
-#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_X(x)                  (((x) & GENMASK(23, 20)) >> 20)
-#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL(x)               (((x) << 18) & GENMASK(19, 18))
-#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_M                GENMASK(19, 18)
-#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_X(x)             (((x) & GENMASK(19, 18)) >> 18)
-#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL(x)             (((x) << 15) & GENMASK(17, 15))
-#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_M              GENMASK(17, 15)
-#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_X(x)           (((x) & GENMASK(17, 15)) >> 15)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP(x)              (((x) << 13) & GENMASK(14, 13))
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_M               GENMASK(14, 13)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_X(x)            (((x) & GENMASK(14, 13)) >> 13)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID(x)             (((x) << 11) & GENMASK(12, 11))
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_M              GENMASK(12, 11)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_X(x)           (((x) & GENMASK(12, 11)) >> 11)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP(x)              (((x) << 9) & GENMASK(10, 9))
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_M               GENMASK(10, 9)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_X(x)            (((x) & GENMASK(10, 9)) >> 9)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET(x)          (((x) << 7) & GENMASK(8, 7))
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_M           GENMASK(8, 7)
-#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_X(x)        (((x) & GENMASK(8, 7)) >> 7)
-#define HSIO_S6G_IB_CFG_IB_ANA_TEST_ENA                   BIT(6)
-#define HSIO_S6G_IB_CFG_IB_SIG_DET_ENA                    BIT(5)
-#define HSIO_S6G_IB_CFG_IB_CONCUR                         BIT(4)
-#define HSIO_S6G_IB_CFG_IB_CAL_ENA                        BIT(3)
-#define HSIO_S6G_IB_CFG_IB_SAM_ENA                        BIT(2)
-#define HSIO_S6G_IB_CFG_IB_EQZ_ENA                        BIT(1)
-#define HSIO_S6G_IB_CFG_IB_REG_ENA                        BIT(0)
-
-#define HSIO_S6G_IB_CFG1_IB_TJTAG(x)                      (((x) << 17) & GENMASK(21, 17))
-#define HSIO_S6G_IB_CFG1_IB_TJTAG_M                       GENMASK(21, 17)
-#define HSIO_S6G_IB_CFG1_IB_TJTAG_X(x)                    (((x) & GENMASK(21, 17)) >> 17)
-#define HSIO_S6G_IB_CFG1_IB_TSDET(x)                      (((x) << 12) & GENMASK(16, 12))
-#define HSIO_S6G_IB_CFG1_IB_TSDET_M                       GENMASK(16, 12)
-#define HSIO_S6G_IB_CFG1_IB_TSDET_X(x)                    (((x) & GENMASK(16, 12)) >> 12)
-#define HSIO_S6G_IB_CFG1_IB_SCALY(x)                      (((x) << 8) & GENMASK(11, 8))
-#define HSIO_S6G_IB_CFG1_IB_SCALY_M                       GENMASK(11, 8)
-#define HSIO_S6G_IB_CFG1_IB_SCALY_X(x)                    (((x) & GENMASK(11, 8)) >> 8)
-#define HSIO_S6G_IB_CFG1_IB_FILT_HP                       BIT(7)
-#define HSIO_S6G_IB_CFG1_IB_FILT_MID                      BIT(6)
-#define HSIO_S6G_IB_CFG1_IB_FILT_LP                       BIT(5)
-#define HSIO_S6G_IB_CFG1_IB_FILT_OFFSET                   BIT(4)
-#define HSIO_S6G_IB_CFG1_IB_FRC_HP                        BIT(3)
-#define HSIO_S6G_IB_CFG1_IB_FRC_MID                       BIT(2)
-#define HSIO_S6G_IB_CFG1_IB_FRC_LP                        BIT(1)
-#define HSIO_S6G_IB_CFG1_IB_FRC_OFFSET                    BIT(0)
-
-#define HSIO_S6G_IB_CFG2_IB_TINFV(x)                      (((x) << 27) & GENMASK(29, 27))
-#define HSIO_S6G_IB_CFG2_IB_TINFV_M                       GENMASK(29, 27)
-#define HSIO_S6G_IB_CFG2_IB_TINFV_X(x)                    (((x) & GENMASK(29, 27)) >> 27)
-#define HSIO_S6G_IB_CFG2_IB_OINFI(x)                      (((x) << 22) & GENMASK(26, 22))
-#define HSIO_S6G_IB_CFG2_IB_OINFI_M                       GENMASK(26, 22)
-#define HSIO_S6G_IB_CFG2_IB_OINFI_X(x)                    (((x) & GENMASK(26, 22)) >> 22)
-#define HSIO_S6G_IB_CFG2_IB_TAUX(x)                       (((x) << 19) & GENMASK(21, 19))
-#define HSIO_S6G_IB_CFG2_IB_TAUX_M                        GENMASK(21, 19)
-#define HSIO_S6G_IB_CFG2_IB_TAUX_X(x)                     (((x) & GENMASK(21, 19)) >> 19)
-#define HSIO_S6G_IB_CFG2_IB_OINFS(x)                      (((x) << 16) & GENMASK(18, 16))
-#define HSIO_S6G_IB_CFG2_IB_OINFS_M                       GENMASK(18, 16)
-#define HSIO_S6G_IB_CFG2_IB_OINFS_X(x)                    (((x) & GENMASK(18, 16)) >> 16)
-#define HSIO_S6G_IB_CFG2_IB_OCALS(x)                      (((x) << 10) & GENMASK(15, 10))
-#define HSIO_S6G_IB_CFG2_IB_OCALS_M                       GENMASK(15, 10)
-#define HSIO_S6G_IB_CFG2_IB_OCALS_X(x)                    (((x) & GENMASK(15, 10)) >> 10)
-#define HSIO_S6G_IB_CFG2_IB_TCALV(x)                      (((x) << 5) & GENMASK(9, 5))
-#define HSIO_S6G_IB_CFG2_IB_TCALV_M                       GENMASK(9, 5)
-#define HSIO_S6G_IB_CFG2_IB_TCALV_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
-#define HSIO_S6G_IB_CFG2_IB_UMAX(x)                       (((x) << 3) & GENMASK(4, 3))
-#define HSIO_S6G_IB_CFG2_IB_UMAX_M                        GENMASK(4, 3)
-#define HSIO_S6G_IB_CFG2_IB_UMAX_X(x)                     (((x) & GENMASK(4, 3)) >> 3)
-#define HSIO_S6G_IB_CFG2_IB_UREG(x)                       ((x) & GENMASK(2, 0))
-#define HSIO_S6G_IB_CFG2_IB_UREG_M                        GENMASK(2, 0)
-
-#define HSIO_S6G_IB_CFG3_IB_INI_HP(x)                     (((x) << 18) & GENMASK(23, 18))
-#define HSIO_S6G_IB_CFG3_IB_INI_HP_M                      GENMASK(23, 18)
-#define HSIO_S6G_IB_CFG3_IB_INI_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
-#define HSIO_S6G_IB_CFG3_IB_INI_MID(x)                    (((x) << 12) & GENMASK(17, 12))
-#define HSIO_S6G_IB_CFG3_IB_INI_MID_M                     GENMASK(17, 12)
-#define HSIO_S6G_IB_CFG3_IB_INI_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
-#define HSIO_S6G_IB_CFG3_IB_INI_LP(x)                     (((x) << 6) & GENMASK(11, 6))
-#define HSIO_S6G_IB_CFG3_IB_INI_LP_M                      GENMASK(11, 6)
-#define HSIO_S6G_IB_CFG3_IB_INI_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
-#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET(x)                 ((x) & GENMASK(5, 0))
-#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET_M                  GENMASK(5, 0)
-
-#define HSIO_S6G_IB_CFG4_IB_MAX_HP(x)                     (((x) << 18) & GENMASK(23, 18))
-#define HSIO_S6G_IB_CFG4_IB_MAX_HP_M                      GENMASK(23, 18)
-#define HSIO_S6G_IB_CFG4_IB_MAX_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
-#define HSIO_S6G_IB_CFG4_IB_MAX_MID(x)                    (((x) << 12) & GENMASK(17, 12))
-#define HSIO_S6G_IB_CFG4_IB_MAX_MID_M                     GENMASK(17, 12)
-#define HSIO_S6G_IB_CFG4_IB_MAX_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
-#define HSIO_S6G_IB_CFG4_IB_MAX_LP(x)                     (((x) << 6) & GENMASK(11, 6))
-#define HSIO_S6G_IB_CFG4_IB_MAX_LP_M                      GENMASK(11, 6)
-#define HSIO_S6G_IB_CFG4_IB_MAX_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
-#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET(x)                 ((x) & GENMASK(5, 0))
-#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET_M                  GENMASK(5, 0)
-
-#define HSIO_S6G_IB_CFG5_IB_MIN_HP(x)                     (((x) << 18) & GENMASK(23, 18))
-#define HSIO_S6G_IB_CFG5_IB_MIN_HP_M                      GENMASK(23, 18)
-#define HSIO_S6G_IB_CFG5_IB_MIN_HP_X(x)                   (((x) & GENMASK(23, 18)) >> 18)
-#define HSIO_S6G_IB_CFG5_IB_MIN_MID(x)                    (((x) << 12) & GENMASK(17, 12))
-#define HSIO_S6G_IB_CFG5_IB_MIN_MID_M                     GENMASK(17, 12)
-#define HSIO_S6G_IB_CFG5_IB_MIN_MID_X(x)                  (((x) & GENMASK(17, 12)) >> 12)
-#define HSIO_S6G_IB_CFG5_IB_MIN_LP(x)                     (((x) << 6) & GENMASK(11, 6))
-#define HSIO_S6G_IB_CFG5_IB_MIN_LP_M                      GENMASK(11, 6)
-#define HSIO_S6G_IB_CFG5_IB_MIN_LP_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
-#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET(x)                 ((x) & GENMASK(5, 0))
-#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET_M                  GENMASK(5, 0)
-
-#define HSIO_S6G_OB_CFG_OB_IDLE                           BIT(31)
-#define HSIO_S6G_OB_CFG_OB_ENA1V_MODE                     BIT(30)
-#define HSIO_S6G_OB_CFG_OB_POL                            BIT(29)
-#define HSIO_S6G_OB_CFG_OB_POST0(x)                       (((x) << 23) & GENMASK(28, 23))
-#define HSIO_S6G_OB_CFG_OB_POST0_M                        GENMASK(28, 23)
-#define HSIO_S6G_OB_CFG_OB_POST0_X(x)                     (((x) & GENMASK(28, 23)) >> 23)
-#define HSIO_S6G_OB_CFG_OB_PREC(x)                        (((x) << 18) & GENMASK(22, 18))
-#define HSIO_S6G_OB_CFG_OB_PREC_M                         GENMASK(22, 18)
-#define HSIO_S6G_OB_CFG_OB_PREC_X(x)                      (((x) & GENMASK(22, 18)) >> 18)
-#define HSIO_S6G_OB_CFG_OB_R_ADJ_MUX                      BIT(17)
-#define HSIO_S6G_OB_CFG_OB_R_ADJ_PDR                      BIT(16)
-#define HSIO_S6G_OB_CFG_OB_POST1(x)                       (((x) << 11) & GENMASK(15, 11))
-#define HSIO_S6G_OB_CFG_OB_POST1_M                        GENMASK(15, 11)
-#define HSIO_S6G_OB_CFG_OB_POST1_X(x)                     (((x) & GENMASK(15, 11)) >> 11)
-#define HSIO_S6G_OB_CFG_OB_R_COR                          BIT(10)
-#define HSIO_S6G_OB_CFG_OB_SEL_RCTRL                      BIT(9)
-#define HSIO_S6G_OB_CFG_OB_SR_H                           BIT(8)
-#define HSIO_S6G_OB_CFG_OB_SR(x)                          (((x) << 4) & GENMASK(7, 4))
-#define HSIO_S6G_OB_CFG_OB_SR_M                           GENMASK(7, 4)
-#define HSIO_S6G_OB_CFG_OB_SR_X(x)                        (((x) & GENMASK(7, 4)) >> 4)
-#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL(x)               ((x) & GENMASK(3, 0))
-#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL_M                GENMASK(3, 0)
-
-#define HSIO_S6G_OB_CFG1_OB_ENA_CAS(x)                    (((x) << 6) & GENMASK(8, 6))
-#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_M                     GENMASK(8, 6)
-#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_X(x)                  (((x) & GENMASK(8, 6)) >> 6)
-#define HSIO_S6G_OB_CFG1_OB_LEV(x)                        ((x) & GENMASK(5, 0))
-#define HSIO_S6G_OB_CFG1_OB_LEV_M                         GENMASK(5, 0)
-
-#define HSIO_S6G_SER_CFG_SER_4TAP_ENA                     BIT(8)
-#define HSIO_S6G_SER_CFG_SER_CPMD_SEL                     BIT(7)
-#define HSIO_S6G_SER_CFG_SER_SWAP_CPMD                    BIT(6)
-#define HSIO_S6G_SER_CFG_SER_ALISEL(x)                    (((x) << 4) & GENMASK(5, 4))
-#define HSIO_S6G_SER_CFG_SER_ALISEL_M                     GENMASK(5, 4)
-#define HSIO_S6G_SER_CFG_SER_ALISEL_X(x)                  (((x) & GENMASK(5, 4)) >> 4)
-#define HSIO_S6G_SER_CFG_SER_ENHYS                        BIT(3)
-#define HSIO_S6G_SER_CFG_SER_BIG_WIN                      BIT(2)
-#define HSIO_S6G_SER_CFG_SER_EN_WIN                       BIT(1)
-#define HSIO_S6G_SER_CFG_SER_ENALI                        BIT(0)
-
-#define HSIO_S6G_COMMON_CFG_SYS_RST                       BIT(17)
-#define HSIO_S6G_COMMON_CFG_SE_DIV2_ENA                   BIT(16)
-#define HSIO_S6G_COMMON_CFG_SE_AUTO_SQUELCH_ENA           BIT(15)
-#define HSIO_S6G_COMMON_CFG_ENA_LANE                      BIT(14)
-#define HSIO_S6G_COMMON_CFG_PWD_RX                        BIT(13)
-#define HSIO_S6G_COMMON_CFG_PWD_TX                        BIT(12)
-#define HSIO_S6G_COMMON_CFG_LANE_CTRL(x)                  (((x) << 9) & GENMASK(11, 9))
-#define HSIO_S6G_COMMON_CFG_LANE_CTRL_M                   GENMASK(11, 9)
-#define HSIO_S6G_COMMON_CFG_LANE_CTRL_X(x)                (((x) & GENMASK(11, 9)) >> 9)
-#define HSIO_S6G_COMMON_CFG_ENA_DIRECT                    BIT(8)
-#define HSIO_S6G_COMMON_CFG_ENA_ELOOP                     BIT(7)
-#define HSIO_S6G_COMMON_CFG_ENA_FLOOP                     BIT(6)
-#define HSIO_S6G_COMMON_CFG_ENA_ILOOP                     BIT(5)
-#define HSIO_S6G_COMMON_CFG_ENA_PLOOP                     BIT(4)
-#define HSIO_S6G_COMMON_CFG_HRATE                         BIT(3)
-#define HSIO_S6G_COMMON_CFG_QRATE                         BIT(2)
-#define HSIO_S6G_COMMON_CFG_IF_MODE(x)                    ((x) & GENMASK(1, 0))
-#define HSIO_S6G_COMMON_CFG_IF_MODE_M                     GENMASK(1, 0)
-
-#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS(x)                  (((x) << 16) & GENMASK(17, 16))
-#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_M                   GENMASK(17, 16)
-#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_X(x)                (((x) & GENMASK(17, 16)) >> 16)
-#define HSIO_S6G_PLL_CFG_PLL_DIV4                         BIT(15)
-#define HSIO_S6G_PLL_CFG_PLL_ENA_ROT                      BIT(14)
-#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA(x)             (((x) << 6) & GENMASK(13, 6))
-#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_M              GENMASK(13, 6)
-#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x)           (((x) & GENMASK(13, 6)) >> 6)
-#define HSIO_S6G_PLL_CFG_PLL_FSM_ENA                      BIT(5)
-#define HSIO_S6G_PLL_CFG_PLL_FSM_FORCE_SET_ENA            BIT(4)
-#define HSIO_S6G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA            BIT(3)
-#define HSIO_S6G_PLL_CFG_PLL_RB_DATA_SEL                  BIT(2)
-#define HSIO_S6G_PLL_CFG_PLL_ROT_DIR                      BIT(1)
-#define HSIO_S6G_PLL_CFG_PLL_ROT_FRQ                      BIT(0)
-
-#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_N            BIT(5)
-#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_P            BIT(4)
-#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_CLK               BIT(3)
-#define HSIO_S6G_ACJTAG_CFG_OB_DIRECT                     BIT(2)
-#define HSIO_S6G_ACJTAG_CFG_ACJTAG_ENA                    BIT(1)
-#define HSIO_S6G_ACJTAG_CFG_JTAG_CTRL_ENA                 BIT(0)
-
-#define HSIO_S6G_GP_CFG_GP_MSB(x)                         (((x) << 16) & GENMASK(31, 16))
-#define HSIO_S6G_GP_CFG_GP_MSB_M                          GENMASK(31, 16)
-#define HSIO_S6G_GP_CFG_GP_MSB_X(x)                       (((x) & GENMASK(31, 16)) >> 16)
-#define HSIO_S6G_GP_CFG_GP_LSB(x)                         ((x) & GENMASK(15, 0))
-#define HSIO_S6G_GP_CFG_GP_LSB_M                          GENMASK(15, 0)
-
-#define HSIO_S6G_IB_STATUS0_IB_CAL_DONE                   BIT(8)
-#define HSIO_S6G_IB_STATUS0_IB_HP_GAIN_ACT                BIT(7)
-#define HSIO_S6G_IB_STATUS0_IB_MID_GAIN_ACT               BIT(6)
-#define HSIO_S6G_IB_STATUS0_IB_LP_GAIN_ACT                BIT(5)
-#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ACT                 BIT(4)
-#define HSIO_S6G_IB_STATUS0_IB_OFFSET_VLD                 BIT(3)
-#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ERR                 BIT(2)
-#define HSIO_S6G_IB_STATUS0_IB_OFFSDIR                    BIT(1)
-#define HSIO_S6G_IB_STATUS0_IB_SIG_DET                    BIT(0)
-
-#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT(x)            (((x) << 18) & GENMASK(23, 18))
-#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_M             GENMASK(23, 18)
-#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_X(x)          (((x) & GENMASK(23, 18)) >> 18)
-#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT(x)           (((x) << 12) & GENMASK(17, 12))
-#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_M            GENMASK(17, 12)
-#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_X(x)         (((x) & GENMASK(17, 12)) >> 12)
-#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT(x)            (((x) << 6) & GENMASK(11, 6))
-#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_M             GENMASK(11, 6)
-#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_X(x)          (((x) & GENMASK(11, 6)) >> 6)
-#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT(x)             ((x) & GENMASK(5, 0))
-#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT_M              GENMASK(5, 0)
-
-#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_N         BIT(2)
-#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_P         BIT(1)
-#define HSIO_S6G_ACJTAG_STATUS_IB_DIRECT                  BIT(0)
-
-#define HSIO_S6G_PLL_STATUS_PLL_CAL_NOT_DONE              BIT(10)
-#define HSIO_S6G_PLL_STATUS_PLL_CAL_ERR                   BIT(9)
-#define HSIO_S6G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR          BIT(8)
-#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA(x)                ((x) & GENMASK(7, 0))
-#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA_M                 GENMASK(7, 0)
-
-#define HSIO_S6G_REVID_SERDES_REV(x)                      (((x) << 26) & GENMASK(31, 26))
-#define HSIO_S6G_REVID_SERDES_REV_M                       GENMASK(31, 26)
-#define HSIO_S6G_REVID_SERDES_REV_X(x)                    (((x) & GENMASK(31, 26)) >> 26)
-#define HSIO_S6G_REVID_RCPLL_REV(x)                       (((x) << 21) & GENMASK(25, 21))
-#define HSIO_S6G_REVID_RCPLL_REV_M                        GENMASK(25, 21)
-#define HSIO_S6G_REVID_RCPLL_REV_X(x)                     (((x) & GENMASK(25, 21)) >> 21)
-#define HSIO_S6G_REVID_SER_REV(x)                         (((x) << 16) & GENMASK(20, 16))
-#define HSIO_S6G_REVID_SER_REV_M                          GENMASK(20, 16)
-#define HSIO_S6G_REVID_SER_REV_X(x)                       (((x) & GENMASK(20, 16)) >> 16)
-#define HSIO_S6G_REVID_DES_REV(x)                         (((x) << 10) & GENMASK(15, 10))
-#define HSIO_S6G_REVID_DES_REV_M                          GENMASK(15, 10)
-#define HSIO_S6G_REVID_DES_REV_X(x)                       (((x) & GENMASK(15, 10)) >> 10)
-#define HSIO_S6G_REVID_OB_REV(x)                          (((x) << 5) & GENMASK(9, 5))
-#define HSIO_S6G_REVID_OB_REV_M                           GENMASK(9, 5)
-#define HSIO_S6G_REVID_OB_REV_X(x)                        (((x) & GENMASK(9, 5)) >> 5)
-#define HSIO_S6G_REVID_IB_REV(x)                          ((x) & GENMASK(4, 0))
-#define HSIO_S6G_REVID_IB_REV_M                           GENMASK(4, 0)
-
-#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_WR_ONE_SHOT        BIT(31)
-#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_RD_ONE_SHOT        BIT(30)
-#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR(x)            ((x) & GENMASK(24, 0))
-#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR_M             GENMASK(24, 0)
-
-#define HSIO_HW_CFG_DEV2G5_10_MODE                        BIT(6)
-#define HSIO_HW_CFG_DEV1G_9_MODE                          BIT(5)
-#define HSIO_HW_CFG_DEV1G_6_MODE                          BIT(4)
-#define HSIO_HW_CFG_DEV1G_5_MODE                          BIT(3)
-#define HSIO_HW_CFG_DEV1G_4_MODE                          BIT(2)
-#define HSIO_HW_CFG_PCIE_ENA                              BIT(1)
-#define HSIO_HW_CFG_QSGMII_ENA                            BIT(0)
-
-#define HSIO_HW_QSGMII_CFG_SHYST_DIS                      BIT(3)
-#define HSIO_HW_QSGMII_CFG_E_DET_ENA                      BIT(2)
-#define HSIO_HW_QSGMII_CFG_USE_I1_ENA                     BIT(1)
-#define HSIO_HW_QSGMII_CFG_FLIP_LANES                     BIT(0)
-
-#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS(x)           (((x) << 1) & GENMASK(6, 1))
-#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_M            GENMASK(6, 1)
-#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_X(x)         (((x) & GENMASK(6, 1)) >> 1)
-#define HSIO_HW_QSGMII_STAT_SYNC                          BIT(0)
-
-#define HSIO_CLK_CFG_CLKDIV_PHY(x)                        (((x) << 1) & GENMASK(8, 1))
-#define HSIO_CLK_CFG_CLKDIV_PHY_M                         GENMASK(8, 1)
-#define HSIO_CLK_CFG_CLKDIV_PHY_X(x)                      (((x) & GENMASK(8, 1)) >> 1)
-#define HSIO_CLK_CFG_CLKDIV_PHY_DIS                       BIT(0)
-
-#define HSIO_TEMP_SENSOR_CTRL_FORCE_TEMP_RD               BIT(5)
-#define HSIO_TEMP_SENSOR_CTRL_FORCE_RUN                   BIT(4)
-#define HSIO_TEMP_SENSOR_CTRL_FORCE_NO_RST                BIT(3)
-#define HSIO_TEMP_SENSOR_CTRL_FORCE_POWER_UP              BIT(2)
-#define HSIO_TEMP_SENSOR_CTRL_FORCE_CLK                   BIT(1)
-#define HSIO_TEMP_SENSOR_CTRL_SAMPLE_ENA                  BIT(0)
-
-#define HSIO_TEMP_SENSOR_CFG_RUN_WID(x)                   (((x) << 8) & GENMASK(15, 8))
-#define HSIO_TEMP_SENSOR_CFG_RUN_WID_M                    GENMASK(15, 8)
-#define HSIO_TEMP_SENSOR_CFG_RUN_WID_X(x)                 (((x) & GENMASK(15, 8)) >> 8)
-#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER(x)                ((x) & GENMASK(7, 0))
-#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER_M                 GENMASK(7, 0)
-
-#define HSIO_TEMP_SENSOR_STAT_TEMP_VALID                  BIT(8)
-#define HSIO_TEMP_SENSOR_STAT_TEMP(x)                     ((x) & GENMASK(7, 0))
-#define HSIO_TEMP_SENSOR_STAT_TEMP_M                      GENMASK(7, 0)
-
-#endif

diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c
new file mode 100644
index 0000000..701e82d
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.c

@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_police.h"
+
+enum mscc_qos_rate_mode {
+	MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */
+	MSCC_QOS_RATE_MODE_LINE, /* Measure line rate in kbps incl. IPG */
+	MSCC_QOS_RATE_MODE_DATA, /* Measures data rate in kbps excl. IPG */
+	MSCC_QOS_RATE_MODE_FRAME, /* Measures frame rate in fps */
+	__MSCC_QOS_RATE_MODE_END,
+	NUM_MSCC_QOS_RATE_MODE = __MSCC_QOS_RATE_MODE_END,
+	MSCC_QOS_RATE_MODE_MAX = __MSCC_QOS_RATE_MODE_END - 1,
+};
+
+/* Types for ANA:POL[0-192]:POL_MODE_CFG.FRM_MODE */
+#define POL_MODE_LINERATE   0 /* Incl IPG. Unit: 33 1/3 kbps, 4096 bytes */
+#define POL_MODE_DATARATE   1 /* Excl IPG. Unit: 33 1/3 kbps, 4096 bytes  */
+#define POL_MODE_FRMRATE_HI 2 /* Unit: 33 1/3 fps, 32.8 frames */
+#define POL_MODE_FRMRATE_LO 3 /* Unit: 1/3 fps, 0.3 frames */
+
+/* Policer indexes */
+#define POL_IX_PORT    0    /* 0-11    : Port policers */
+#define POL_IX_QUEUE   32   /* 32-127  : Queue policers  */
+
+/* Default policer order */
+#define POL_ORDER 0x1d3 /* Ocelot policer order: Serial (QoS -> Port -> VCAP) */
+
+struct qos_policer_conf {
+	enum mscc_qos_rate_mode mode;
+	bool dlb; /* Enable DLB (dual leaky bucket mode */
+	bool cf;  /* Coupling flag (ignored in SLB mode) */
+	u32  cir; /* CIR in kbps/fps (ignored in SLB mode) */
+	u32  cbs; /* CBS in bytes/frames (ignored in SLB mode) */
+	u32  pir; /* PIR in kbps/fps */
+	u32  pbs; /* PBS in bytes/frames */
+	u8   ipg; /* Size of IPG when MSCC_QOS_RATE_MODE_LINE is chosen */
+};
+
+static int qos_policer_conf_set(struct ocelot_port *port, u32 pol_ix,
+				struct qos_policer_conf *conf)
+{
+	u32 cf = 0, cir_ena = 0, frm_mode = POL_MODE_LINERATE;
+	u32 cir = 0, cbs = 0, pir = 0, pbs = 0;
+	bool cir_discard = 0, pir_discard = 0;
+	struct ocelot *ocelot = port->ocelot;
+	u32 pbs_max = 0, cbs_max = 0;
+	u8 ipg = 20;
+	u32 value;
+
+	pir = conf->pir;
+	pbs = conf->pbs;
+
+	switch (conf->mode) {
+	case MSCC_QOS_RATE_MODE_LINE:
+	case MSCC_QOS_RATE_MODE_DATA:
+		if (conf->mode == MSCC_QOS_RATE_MODE_LINE) {
+			frm_mode = POL_MODE_LINERATE;
+			ipg = min_t(u8, GENMASK(4, 0), conf->ipg);
+		} else {
+			frm_mode = POL_MODE_DATARATE;
+		}
+		if (conf->dlb) {
+			cir_ena = 1;
+			cir = conf->cir;
+			cbs = conf->cbs;
+			if (cir == 0 && cbs == 0) {
+				/* Discard cir frames */
+				cir_discard = 1;
+			} else {
+				cir = DIV_ROUND_UP(cir, 100);
+				cir *= 3; /* 33 1/3 kbps */
+				cbs = DIV_ROUND_UP(cbs, 4096);
+				cbs = (cbs ? cbs : 1); /* No zero burst size */
+				cbs_max = 60; /* Limit burst size */
+				cf = conf->cf;
+				if (cf)
+					pir += conf->cir;
+			}
+		}
+		if (pir == 0 && pbs == 0) {
+			/* Discard PIR frames */
+			pir_discard = 1;
+		} else {
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 kbps */
+			pbs = DIV_ROUND_UP(pbs, 4096);
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = 60; /* Limit burst size */
+		}
+		break;
+	case MSCC_QOS_RATE_MODE_FRAME:
+		if (pir >= 100) {
+			frm_mode = POL_MODE_FRMRATE_HI;
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 fps */
+			pbs = (pbs * 10) / 328; /* 32.8 frames */
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = GENMASK(6, 0); /* Limit burst size */
+		} else {
+			frm_mode = POL_MODE_FRMRATE_LO;
+			if (pir == 0 && pbs == 0) {
+				/* Discard all frames */
+				pir_discard = 1;
+				cir_discard = 1;
+			} else {
+				pir *= 3; /* 1/3 fps */
+				pbs = (pbs * 10) / 3; /* 0.3 frames */
+				pbs = (pbs ? pbs : 1); /* No zero burst size */
+				pbs_max = 61; /* Limit burst size */
+			}
+		}
+		break;
+	default: /* MSCC_QOS_RATE_MODE_DISABLED */
+		/* Disable policer using maximum rate and zero burst */
+		pir = GENMASK(15, 0);
+		pbs = 0;
+		break;
+	}
+
+	/* Check limits */
+	if (pir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid pir\n");
+		return -EINVAL;
+	}
+
+	if (cir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid cir\n");
+		return -EINVAL;
+	}
+
+	if (pbs > pbs_max) {
+		netdev_err(port->dev, "Invalid pbs\n");
+		return -EINVAL;
+	}
+
+	if (cbs > cbs_max) {
+		netdev_err(port->dev, "Invalid cbs\n");
+		return -EINVAL;
+	}
+
+	value = (ANA_POL_MODE_CFG_IPG_SIZE(ipg) |
+		 ANA_POL_MODE_CFG_FRM_MODE(frm_mode) |
+		 (cf ? ANA_POL_MODE_CFG_DLB_COUPLED : 0) |
+		 (cir_ena ? ANA_POL_MODE_CFG_CIR_ENA : 0) |
+		 ANA_POL_MODE_CFG_OVERSHOOT_ENA);
+
+	ocelot_write_gix(ocelot, value, ANA_POL_MODE_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_PIR_CFG_PIR_RATE(pir) |
+			 ANA_POL_PIR_CFG_PIR_BURST(pbs),
+			 ANA_POL_PIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (pir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_PIR_STATE, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_CIR_CFG_CIR_RATE(cir) |
+			 ANA_POL_CIR_CFG_CIR_BURST(cbs),
+			 ANA_POL_CIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (cir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_CIR_STATE, pol_ix);
+
+	return 0;
+}
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	if (!pol)
+		return -EINVAL;
+
+	pp.mode = MSCC_QOS_RATE_MODE_DATA;
+	pp.pir = pol->rate;
+	pp.pbs = pol->burst;
+
+	netdev_dbg(port->dev,
+		   "%s: port %u pir %u kbps, pbs %u bytes\n",
+		   __func__, port->chip_port, pp.pir, pp.pbs);
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}
+
+int ocelot_port_policer_del(struct ocelot_port *port)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u\n", __func__, port->chip_port);
+
+	pp.mode = MSCC_QOS_RATE_MODE_DISABLED;
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h
new file mode 100644
index 0000000..d1137f7
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.h

@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_POLICE_H_
+#define _MSCC_OCELOT_POLICE_H_
+
+#include "ocelot.h"
+
+struct ocelot_policer {
+	u32 rate; /* kilobit per second */
+	u32 burst; /* bytes */
+};
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol);
+
+int ocelot_port_policer_del(struct ocelot_port *port);
+
+#endif /* _MSCC_OCELOT_POLICE_H_ */

diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.h b/drivers/net/ethernet/mscc/ocelot_ptp.h
new file mode 100644
index 0000000..9ede14a
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.h

@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * License: Dual MIT/GPL
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_PTP_H_
+#define _MSCC_OCELOT_PTP_H_
+
+#define PTP_PIN_CFG_RSZ			0x20
+#define PTP_PIN_TOD_SEC_MSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_SEC_LSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_NSEC_RSZ		PTP_PIN_CFG_RSZ
+
+#define PTP_PIN_CFG_DOM			BIT(0)
+#define PTP_PIN_CFG_SYNC		BIT(2)
+#define PTP_PIN_CFG_ACTION(x)		((x) << 3)
+#define PTP_PIN_CFG_ACTION_MASK		PTP_PIN_CFG_ACTION(0x7)
+
+enum {
+	PTP_PIN_ACTION_IDLE = 0,
+	PTP_PIN_ACTION_LOAD,
+	PTP_PIN_ACTION_SAVE,
+	PTP_PIN_ACTION_CLOCK,
+	PTP_PIN_ACTION_DELTA,
+	PTP_PIN_ACTION_NOSYNC,
+	PTP_PIN_ACTION_SYNC,
+};
+
+#define PTP_CFG_MISC_PTP_EN		BIT(2)
+
+#define PSEC_PER_SEC			1000000000000LL
+
+#define PTP_CFG_CLK_ADJ_CFG_ENA		BIT(0)
+#define PTP_CFG_CLK_ADJ_CFG_DIR		BIT(1)
+
+#define PTP_CFG_CLK_ADJ_FREQ_NS		BIT(30)
+
+#endif

diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
index e334b40..e59977d 100644
--- a/drivers/net/ethernet/mscc/ocelot_regs.c
+++ b/drivers/net/ethernet/mscc/ocelot_regs.c

@@ -5,6 +5,7 @@
  * Copyright (c) 2017 Microsemi Corporation
  */
 #include "ocelot.h"
+#include <soc/mscc/ocelot_hsio.h>
 
 static const u32 ocelot_ana_regmap[] = {
 	REG(ANA_ADVLEARN,                  0x009000),
@@ -102,82 +103,6 @@
 	REG(QS_INH_DBG,                    0x000048),
 };
 
-static const u32 ocelot_hsio_regmap[] = {
-	REG(HSIO_PLL5G_CFG0,               0x000000),
-	REG(HSIO_PLL5G_CFG1,               0x000004),
-	REG(HSIO_PLL5G_CFG2,               0x000008),
-	REG(HSIO_PLL5G_CFG3,               0x00000c),
-	REG(HSIO_PLL5G_CFG4,               0x000010),
-	REG(HSIO_PLL5G_CFG5,               0x000014),
-	REG(HSIO_PLL5G_CFG6,               0x000018),
-	REG(HSIO_PLL5G_STATUS0,            0x00001c),
-	REG(HSIO_PLL5G_STATUS1,            0x000020),
-	REG(HSIO_PLL5G_BIST_CFG0,          0x000024),
-	REG(HSIO_PLL5G_BIST_CFG1,          0x000028),
-	REG(HSIO_PLL5G_BIST_CFG2,          0x00002c),
-	REG(HSIO_PLL5G_BIST_STAT0,         0x000030),
-	REG(HSIO_PLL5G_BIST_STAT1,         0x000034),
-	REG(HSIO_RCOMP_CFG0,               0x000038),
-	REG(HSIO_RCOMP_STATUS,             0x00003c),
-	REG(HSIO_SYNC_ETH_CFG,             0x000040),
-	REG(HSIO_SYNC_ETH_PLL_CFG,         0x000048),
-	REG(HSIO_S1G_DES_CFG,              0x00004c),
-	REG(HSIO_S1G_IB_CFG,               0x000050),
-	REG(HSIO_S1G_OB_CFG,               0x000054),
-	REG(HSIO_S1G_SER_CFG,              0x000058),
-	REG(HSIO_S1G_COMMON_CFG,           0x00005c),
-	REG(HSIO_S1G_PLL_CFG,              0x000060),
-	REG(HSIO_S1G_PLL_STATUS,           0x000064),
-	REG(HSIO_S1G_DFT_CFG0,             0x000068),
-	REG(HSIO_S1G_DFT_CFG1,             0x00006c),
-	REG(HSIO_S1G_DFT_CFG2,             0x000070),
-	REG(HSIO_S1G_TP_CFG,               0x000074),
-	REG(HSIO_S1G_RC_PLL_BIST_CFG,      0x000078),
-	REG(HSIO_S1G_MISC_CFG,             0x00007c),
-	REG(HSIO_S1G_DFT_STATUS,           0x000080),
-	REG(HSIO_S1G_MISC_STATUS,          0x000084),
-	REG(HSIO_MCB_S1G_ADDR_CFG,         0x000088),
-	REG(HSIO_S6G_DIG_CFG,              0x00008c),
-	REG(HSIO_S6G_DFT_CFG0,             0x000090),
-	REG(HSIO_S6G_DFT_CFG1,             0x000094),
-	REG(HSIO_S6G_DFT_CFG2,             0x000098),
-	REG(HSIO_S6G_TP_CFG0,              0x00009c),
-	REG(HSIO_S6G_TP_CFG1,              0x0000a0),
-	REG(HSIO_S6G_RC_PLL_BIST_CFG,      0x0000a4),
-	REG(HSIO_S6G_MISC_CFG,             0x0000a8),
-	REG(HSIO_S6G_OB_ANEG_CFG,          0x0000ac),
-	REG(HSIO_S6G_DFT_STATUS,           0x0000b0),
-	REG(HSIO_S6G_ERR_CNT,              0x0000b4),
-	REG(HSIO_S6G_MISC_STATUS,          0x0000b8),
-	REG(HSIO_S6G_DES_CFG,              0x0000bc),
-	REG(HSIO_S6G_IB_CFG,               0x0000c0),
-	REG(HSIO_S6G_IB_CFG1,              0x0000c4),
-	REG(HSIO_S6G_IB_CFG2,              0x0000c8),
-	REG(HSIO_S6G_IB_CFG3,              0x0000cc),
-	REG(HSIO_S6G_IB_CFG4,              0x0000d0),
-	REG(HSIO_S6G_IB_CFG5,              0x0000d4),
-	REG(HSIO_S6G_OB_CFG,               0x0000d8),
-	REG(HSIO_S6G_OB_CFG1,              0x0000dc),
-	REG(HSIO_S6G_SER_CFG,              0x0000e0),
-	REG(HSIO_S6G_COMMON_CFG,           0x0000e4),
-	REG(HSIO_S6G_PLL_CFG,              0x0000e8),
-	REG(HSIO_S6G_ACJTAG_CFG,           0x0000ec),
-	REG(HSIO_S6G_GP_CFG,               0x0000f0),
-	REG(HSIO_S6G_IB_STATUS0,           0x0000f4),
-	REG(HSIO_S6G_IB_STATUS1,           0x0000f8),
-	REG(HSIO_S6G_ACJTAG_STATUS,        0x0000fc),
-	REG(HSIO_S6G_PLL_STATUS,           0x000100),
-	REG(HSIO_S6G_REVID,                0x000104),
-	REG(HSIO_MCB_S6G_ADDR_CFG,         0x000108),
-	REG(HSIO_HW_CFG,                   0x00010c),
-	REG(HSIO_HW_QSGMII_CFG,            0x000110),
-	REG(HSIO_HW_QSGMII_STAT,           0x000114),
-	REG(HSIO_CLK_CFG,                  0x000118),
-	REG(HSIO_TEMP_SENSOR_CTRL,         0x00011c),
-	REG(HSIO_TEMP_SENSOR_CFG,          0x000120),
-	REG(HSIO_TEMP_SENSOR_STAT,         0x000124),
-};
-
 static const u32 ocelot_qsys_regmap[] = {
 	REG(QSYS_PORT_MODE,                0x011200),
 	REG(QSYS_SWITCH_PORT_MODE,         0x011234),
@@ -299,13 +224,34 @@
 	REG(SYS_PTP_CFG,                   0x0006c4),
 };
 
+static const u32 ocelot_s2_regmap[] = {
+	REG(S2_CORE_UPDATE_CTRL,           0x000000),
+	REG(S2_CORE_MV_CFG,                0x000004),
+	REG(S2_CACHE_ENTRY_DAT,            0x000008),
+	REG(S2_CACHE_MASK_DAT,             0x000108),
+	REG(S2_CACHE_ACTION_DAT,           0x000208),
+	REG(S2_CACHE_CNT_DAT,              0x000308),
+	REG(S2_CACHE_TG_DAT,               0x000388),
+};
+
+static const u32 ocelot_ptp_regmap[] = {
+	REG(PTP_PIN_CFG,                   0x000000),
+	REG(PTP_PIN_TOD_SEC_MSB,           0x000004),
+	REG(PTP_PIN_TOD_SEC_LSB,           0x000008),
+	REG(PTP_PIN_TOD_NSEC,              0x00000c),
+	REG(PTP_CFG_MISC,                  0x0000a0),
+	REG(PTP_CLK_CFG_ADJ_CFG,           0x0000a4),
+	REG(PTP_CLK_CFG_ADJ_FREQ,          0x0000a8),
+};
+
 static const u32 *ocelot_regmap[] = {
 	[ANA] = ocelot_ana_regmap,
 	[QS] = ocelot_qs_regmap,
-	[HSIO] = ocelot_hsio_regmap,
 	[QSYS] = ocelot_qsys_regmap,
 	[REW] = ocelot_rew_regmap,
 	[SYS] = ocelot_sys_regmap,
+	[S2] = ocelot_s2_regmap,
+	[PTP] = ocelot_ptp_regmap,
 };
 
 static const struct reg_field ocelot_regfields[] = {
@@ -453,9 +399,11 @@
 	/* Configure PLL5. This will need a proper CCF driver
 	 * The values are coming from the VTSS API for Ocelot
 	 */
-	ocelot_write(ocelot, HSIO_PLL5G_CFG4_IB_CTRL(0x7600) |
-		     HSIO_PLL5G_CFG4_IB_BIAS_CTRL(0x8), HSIO_PLL5G_CFG4);
-	ocelot_write(ocelot, HSIO_PLL5G_CFG0_CORE_CLK_DIV(0x11) |
+	regmap_write(ocelot->targets[HSIO], HSIO_PLL5G_CFG4,
+		     HSIO_PLL5G_CFG4_IB_CTRL(0x7600) |
+		     HSIO_PLL5G_CFG4_IB_BIAS_CTRL(0x8));
+	regmap_write(ocelot->targets[HSIO], HSIO_PLL5G_CFG0,
+		     HSIO_PLL5G_CFG0_CORE_CLK_DIV(0x11) |
 		     HSIO_PLL5G_CFG0_CPU_CLK_DIV(2) |
 		     HSIO_PLL5G_CFG0_ENA_BIAS |
 		     HSIO_PLL5G_CFG0_ENA_VCO_BUF |
@@ -465,13 +413,14 @@
 		     HSIO_PLL5G_CFG0_SELBGV820(4) |
 		     HSIO_PLL5G_CFG0_DIV4 |
 		     HSIO_PLL5G_CFG0_ENA_CLKTREE |
-		     HSIO_PLL5G_CFG0_ENA_LANE, HSIO_PLL5G_CFG0);
-	ocelot_write(ocelot, HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET |
+		     HSIO_PLL5G_CFG0_ENA_LANE);
+	regmap_write(ocelot->targets[HSIO], HSIO_PLL5G_CFG2,
+		     HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET |
 		     HSIO_PLL5G_CFG2_EN_RESET_OVERRUN |
 		     HSIO_PLL5G_CFG2_GAIN_TEST(0x8) |
 		     HSIO_PLL5G_CFG2_ENA_AMPCTRL |
 		     HSIO_PLL5G_CFG2_PWD_AMPCTRL_N |
-		     HSIO_PLL5G_CFG2_AMPC_SEL(0x10), HSIO_PLL5G_CFG2);
+		     HSIO_PLL5G_CFG2_AMPC_SEL(0x10));
 }
 
 int ocelot_chip_init(struct ocelot *ocelot)

diff --git a/drivers/net/ethernet/mscc/ocelot_s2.h b/drivers/net/ethernet/mscc/ocelot_s2.h
new file mode 100644
index 0000000..80107be
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_s2.h

@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ * Copyright (c) 2018 Microsemi Corporation
+ */
+
+#ifndef _OCELOT_S2_CORE_H_
+#define _OCELOT_S2_CORE_H_
+
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD(x)      (((x) << 22) & GENMASK(24, 22))
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_M       GENMASK(24, 22)
+#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_X(x)    (((x) & GENMASK(24, 22)) >> 22)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS   BIT(21)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS  BIT(20)
+#define S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS     BIT(19)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR(x)     (((x) << 3) & GENMASK(18, 3))
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_M      GENMASK(18, 3)
+#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_X(x)   (((x) & GENMASK(18, 3)) >> 3)
+#define S2_CORE_UPDATE_CTRL_UPDATE_SHOT        BIT(2)
+#define S2_CORE_UPDATE_CTRL_CLEAR_CACHE        BIT(1)
+#define S2_CORE_UPDATE_CTRL_MV_TRAFFIC_IGN     BIT(0)
+
+#define S2_CORE_MV_CFG_MV_NUM_POS(x)           (((x) << 16) & GENMASK(31, 16))
+#define S2_CORE_MV_CFG_MV_NUM_POS_M            GENMASK(31, 16)
+#define S2_CORE_MV_CFG_MV_NUM_POS_X(x)         (((x) & GENMASK(31, 16)) >> 16)
+#define S2_CORE_MV_CFG_MV_SIZE(x)              ((x) & GENMASK(15, 0))
+#define S2_CORE_MV_CFG_MV_SIZE_M               GENMASK(15, 0)
+
+#define S2_CACHE_ENTRY_DAT_RSZ                 0x4
+
+#define S2_CACHE_MASK_DAT_RSZ                  0x4
+
+#define S2_CACHE_ACTION_DAT_RSZ                0x4
+
+#define S2_CACHE_CNT_DAT_RSZ                   0x4
+
+#define S2_STICKY_VCAP_ROW_DELETED_STICKY      BIT(0)
+
+#define S2_BIST_CTRL_TCAM_BIST                 BIT(1)
+#define S2_BIST_CTRL_TCAM_INIT                 BIT(0)
+
+#define S2_BIST_CFG_TCAM_BIST_SOE_ENA          BIT(8)
+#define S2_BIST_CFG_TCAM_HCG_DIS               BIT(7)
+#define S2_BIST_CFG_TCAM_CG_DIS                BIT(6)
+#define S2_BIST_CFG_TCAM_BIAS(x)               ((x) & GENMASK(5, 0))
+#define S2_BIST_CFG_TCAM_BIAS_M                GENMASK(5, 0)
+
+#define S2_BIST_STAT_BIST_RT_ERR               BIT(15)
+#define S2_BIST_STAT_BIST_PENC_ERR             BIT(14)
+#define S2_BIST_STAT_BIST_COMP_ERR             BIT(13)
+#define S2_BIST_STAT_BIST_ADDR_ERR             BIT(12)
+#define S2_BIST_STAT_BIST_BL1E_ERR             BIT(11)
+#define S2_BIST_STAT_BIST_BL1_ERR              BIT(10)
+#define S2_BIST_STAT_BIST_BL0E_ERR             BIT(9)
+#define S2_BIST_STAT_BIST_BL0_ERR              BIT(8)
+#define S2_BIST_STAT_BIST_PH1_ERR              BIT(7)
+#define S2_BIST_STAT_BIST_PH0_ERR              BIT(6)
+#define S2_BIST_STAT_BIST_PV1_ERR              BIT(5)
+#define S2_BIST_STAT_BIST_PV0_ERR              BIT(4)
+#define S2_BIST_STAT_BIST_RUN                  BIT(3)
+#define S2_BIST_STAT_BIST_ERR                  BIT(2)
+#define S2_BIST_STAT_BIST_BUSY                 BIT(1)
+#define S2_BIST_STAT_TCAM_RDY                  BIT(0)
+
+#endif /* _OCELOT_S2_CORE_H_ */

diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
new file mode 100644
index 0000000..16a6db7
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c

@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch TC driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_tc.h"
+#include "ocelot_police.h"
+#include "ocelot_ace.h"
+#include <net/pkt_cls.h>
+
+static int ocelot_setup_tc_cls_matchall(struct ocelot_port *port,
+					struct tc_cls_matchall_offload *f,
+					bool ingress)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct ocelot_policer pol = { 0 };
+	struct flow_action_entry *action;
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u command %d cookie %lu\n",
+		   __func__, port->chip_port, f->command, f->cookie);
+
+	if (!ingress) {
+		NL_SET_ERR_MSG_MOD(extack, "Only ingress is supported");
+		return -EOPNOTSUPP;
+	}
+
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		if (!flow_offload_has_one_action(&f->rule->action)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one action is supported");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.block_shared) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Rate limit is not supported on shared blocks");
+			return -EOPNOTSUPP;
+		}
+
+		action = &f->rule->action.entries[0];
+
+		if (action->id != FLOW_ACTION_POLICE) {
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.police_id && port->tc.police_id != f->cookie) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one policer per port is supported\n");
+			return -EEXIST;
+		}
+
+		pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
+		pol.burst = (u32)div_u64(action->police.rate_bytes_ps *
+					 PSCHED_NS2TICKS(action->police.burst),
+					 PSCHED_TICKS_PER_SEC);
+
+		err = ocelot_port_policer_add(port, &pol);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n");
+			return err;
+		}
+
+		port->tc.police_id = f->cookie;
+		port->tc.offload_cnt++;
+		return 0;
+	case TC_CLSMATCHALL_DESTROY:
+		if (port->tc.police_id != f->cookie)
+			return -ENOENT;
+
+		err = ocelot_port_policer_del(port);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Could not delete policer\n");
+			return err;
+		}
+		port->tc.police_id = 0;
+		port->tc.offload_cnt--;
+		return 0;
+	case TC_CLSMATCHALL_STATS: /* fall through */
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb(enum tc_setup_type type,
+				    void *type_data,
+				    void *cb_priv, bool ingress)
+{
+	struct ocelot_port *port = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(port->dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSMATCHALL %s\n",
+			   ingress ? "ingress" : "egress");
+
+		return ocelot_setup_tc_cls_matchall(port, type_data, ingress);
+	case TC_SETUP_CLSFLOWER:
+		return 0;
+	default:
+		netdev_dbg(port->dev, "tc_block_cb: type %d %s\n",
+			   type,
+			   ingress ? "ingress" : "egress");
+
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb_ig(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, true);
+}
+
+static int ocelot_setup_tc_block_cb_eg(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, false);
+}
+
+static LIST_HEAD(ocelot_block_cb_list);
+
+static int ocelot_setup_tc_block(struct ocelot_port *port,
+				 struct flow_block_offload *f)
+{
+	struct flow_block_cb *block_cb;
+	flow_setup_cb_t *cb;
+	int err;
+
+	netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
+		   f->command, f->binder_type);
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+		cb = ocelot_setup_tc_block_cb_ig;
+		port->tc.block_shared = f->block_shared;
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = ocelot_setup_tc_block_cb_eg;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	f->driver_block_list = &ocelot_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(cb, port, port, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		err = ocelot_setup_tc_block_flower_bind(port, f);
+		if (err < 0) {
+			flow_block_cb_free(block_cb);
+			return err;
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, f->driver_block_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, port);
+		if (!block_cb)
+			return -ENOENT;
+
+		ocelot_setup_tc_block_flower_unbind(port, f);
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return ocelot_setup_tc_block(port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}

diff --git a/drivers/net/ethernet/mscc/ocelot_tc.h b/drivers/net/ethernet/mscc/ocelot_tc.h
new file mode 100644
index 0000000..61757c2
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.h

@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_TC_H_
+#define _MSCC_OCELOT_TC_H_
+
+#include <linux/netdevice.h>
+
+struct ocelot_port_tc {
+	bool block_shared;
+	unsigned long offload_cnt;
+
+	unsigned long police_id;
+};
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data);
+
+#endif /* _MSCC_OCELOT_TC_H_ */

diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h
new file mode 100644
index 0000000..e22eac1
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.h

@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Microsemi Ocelot Switch driver
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _OCELOT_VCAP_H_
+#define _OCELOT_VCAP_H_
+
+/* =================================================================
+ *  VCAP Common
+ * =================================================================
+ */
+
+/* VCAP Type-Group values */
+#define VCAP_TG_NONE 0 /* Entry is invalid */
+#define VCAP_TG_FULL 1 /* Full entry */
+#define VCAP_TG_HALF 2 /* Half entry */
+#define VCAP_TG_QUARTER 3 /* Quarter entry */
+
+/* =================================================================
+ *  VCAP IS2
+ * =================================================================
+ */
+
+#define VCAP_IS2_CNT 64
+#define VCAP_IS2_ENTRY_WIDTH 376
+#define VCAP_IS2_ACTION_WIDTH 99
+#define VCAP_PORT_CNT 11
+
+/* IS2 half key types */
+#define IS2_TYPE_ETYPE 0
+#define IS2_TYPE_LLC 1
+#define IS2_TYPE_SNAP 2
+#define IS2_TYPE_ARP 3
+#define IS2_TYPE_IP_UDP_TCP 4
+#define IS2_TYPE_IP_OTHER 5
+#define IS2_TYPE_IPV6 6
+#define IS2_TYPE_OAM 7
+#define IS2_TYPE_SMAC_SIP6 8
+#define IS2_TYPE_ANY 100 /* Pseudo type */
+
+/* IS2 half key type mask for matching any IP */
+#define IS2_TYPE_MASK_IP_ANY 0xe
+
+/* IS2 action types */
+#define IS2_ACTION_TYPE_NORMAL 0
+#define IS2_ACTION_TYPE_SMAC_SIP 1
+
+/* IS2 MASK_MODE values */
+#define IS2_ACT_MASK_MODE_NONE 0
+#define IS2_ACT_MASK_MODE_FILTER 1
+#define IS2_ACT_MASK_MODE_POLICY 2
+#define IS2_ACT_MASK_MODE_REDIR 3
+
+/* IS2 REW_OP values */
+#define IS2_ACT_REW_OP_NONE 0
+#define IS2_ACT_REW_OP_PTP_ONE 2
+#define IS2_ACT_REW_OP_PTP_TWO 3
+#define IS2_ACT_REW_OP_SPECIAL 8
+#define IS2_ACT_REW_OP_PTP_ORG 9
+#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_1 (IS2_ACT_REW_OP_PTP_ONE | (1 << 3))
+#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_2 (IS2_ACT_REW_OP_PTP_ONE | (2 << 3))
+#define IS2_ACT_REW_OP_PTP_ONE_ADD_DELAY (IS2_ACT_REW_OP_PTP_ONE | (1 << 5))
+#define IS2_ACT_REW_OP_PTP_ONE_ADD_SUB BIT(7)
+
+#define VCAP_PORT_WIDTH 4
+
+/* IS2 quarter key - SMAC_SIP4 */
+#define IS2_QKO_IGR_PORT 0
+#define IS2_QKL_IGR_PORT VCAP_PORT_WIDTH
+#define IS2_QKO_L2_SMAC (IS2_QKO_IGR_PORT + IS2_QKL_IGR_PORT)
+#define IS2_QKL_L2_SMAC 48
+#define IS2_QKO_L3_IP4_SIP (IS2_QKO_L2_SMAC + IS2_QKL_L2_SMAC)
+#define IS2_QKL_L3_IP4_SIP 32
+
+/* IS2 half key - common */
+#define IS2_HKO_TYPE 0
+#define IS2_HKL_TYPE 4
+#define IS2_HKO_FIRST (IS2_HKO_TYPE + IS2_HKL_TYPE)
+#define IS2_HKL_FIRST 1
+#define IS2_HKO_PAG (IS2_HKO_FIRST + IS2_HKL_FIRST)
+#define IS2_HKL_PAG 8
+#define IS2_HKO_IGR_PORT_MASK (IS2_HKO_PAG + IS2_HKL_PAG)
+#define IS2_HKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1)
+#define IS2_HKO_SERVICE_FRM (IS2_HKO_IGR_PORT_MASK + IS2_HKL_IGR_PORT_MASK)
+#define IS2_HKL_SERVICE_FRM 1
+#define IS2_HKO_HOST_MATCH (IS2_HKO_SERVICE_FRM + IS2_HKL_SERVICE_FRM)
+#define IS2_HKL_HOST_MATCH 1
+#define IS2_HKO_L2_MC (IS2_HKO_HOST_MATCH + IS2_HKL_HOST_MATCH)
+#define IS2_HKL_L2_MC 1
+#define IS2_HKO_L2_BC (IS2_HKO_L2_MC + IS2_HKL_L2_MC)
+#define IS2_HKL_L2_BC 1
+#define IS2_HKO_VLAN_TAGGED (IS2_HKO_L2_BC + IS2_HKL_L2_BC)
+#define IS2_HKL_VLAN_TAGGED 1
+#define IS2_HKO_VID (IS2_HKO_VLAN_TAGGED + IS2_HKL_VLAN_TAGGED)
+#define IS2_HKL_VID 12
+#define IS2_HKO_DEI (IS2_HKO_VID + IS2_HKL_VID)
+#define IS2_HKL_DEI 1
+#define IS2_HKO_PCP (IS2_HKO_DEI + IS2_HKL_DEI)
+#define IS2_HKL_PCP 3
+
+/* IS2 half key - MAC_ETYPE/MAC_LLC/MAC_SNAP/OAM common */
+#define IS2_HKO_L2_DMAC (IS2_HKO_PCP + IS2_HKL_PCP)
+#define IS2_HKL_L2_DMAC 48
+#define IS2_HKO_L2_SMAC (IS2_HKO_L2_DMAC + IS2_HKL_L2_DMAC)
+#define IS2_HKL_L2_SMAC 48
+
+/* IS2 half key - MAC_ETYPE */
+#define IS2_HKO_MAC_ETYPE_ETYPE (IS2_HKO_L2_SMAC + IS2_HKL_L2_SMAC)
+#define IS2_HKL_MAC_ETYPE_ETYPE 16
+#define IS2_HKO_MAC_ETYPE_L2_PAYLOAD                                           \
+	(IS2_HKO_MAC_ETYPE_ETYPE + IS2_HKL_MAC_ETYPE_ETYPE)
+#define IS2_HKL_MAC_ETYPE_L2_PAYLOAD 27
+
+/* IS2 half key - MAC_LLC */
+#define IS2_HKO_MAC_LLC_L2_LLC IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_MAC_LLC_L2_LLC 40
+
+/* IS2 half key - MAC_SNAP */
+#define IS2_HKO_MAC_SNAP_L2_SNAP IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_MAC_SNAP_L2_SNAP 40
+
+/* IS2 half key - ARP */
+#define IS2_HKO_MAC_ARP_L2_SMAC IS2_HKO_L2_DMAC
+#define IS2_HKL_MAC_ARP_L2_SMAC 48
+#define IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK                                      \
+	(IS2_HKO_MAC_ARP_L2_SMAC + IS2_HKL_MAC_ARP_L2_SMAC)
+#define IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK 1
+#define IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK                                     \
+	(IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK + IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK)
+#define IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK 1
+#define IS2_HKO_MAC_ARP_ARP_LEN_OK                                             \
+	(IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK +                                  \
+	 IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK)
+#define IS2_HKL_MAC_ARP_ARP_LEN_OK 1
+#define IS2_HKO_MAC_ARP_ARP_TGT_MATCH                                          \
+	(IS2_HKO_MAC_ARP_ARP_LEN_OK + IS2_HKL_MAC_ARP_ARP_LEN_OK)
+#define IS2_HKL_MAC_ARP_ARP_TGT_MATCH 1
+#define IS2_HKO_MAC_ARP_ARP_SENDER_MATCH                                       \
+	(IS2_HKO_MAC_ARP_ARP_TGT_MATCH + IS2_HKL_MAC_ARP_ARP_TGT_MATCH)
+#define IS2_HKL_MAC_ARP_ARP_SENDER_MATCH 1
+#define IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN                                     \
+	(IS2_HKO_MAC_ARP_ARP_SENDER_MATCH + IS2_HKL_MAC_ARP_ARP_SENDER_MATCH)
+#define IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN 1
+#define IS2_HKO_MAC_ARP_ARP_OPCODE                                             \
+	(IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN +                                  \
+	 IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN)
+#define IS2_HKL_MAC_ARP_ARP_OPCODE 2
+#define IS2_HKO_MAC_ARP_L3_IP4_DIP                                             \
+	(IS2_HKO_MAC_ARP_ARP_OPCODE + IS2_HKL_MAC_ARP_ARP_OPCODE)
+#define IS2_HKL_MAC_ARP_L3_IP4_DIP 32
+#define IS2_HKO_MAC_ARP_L3_IP4_SIP                                             \
+	(IS2_HKO_MAC_ARP_L3_IP4_DIP + IS2_HKL_MAC_ARP_L3_IP4_DIP)
+#define IS2_HKL_MAC_ARP_L3_IP4_SIP 32
+#define IS2_HKO_MAC_ARP_DIP_EQ_SIP                                             \
+	(IS2_HKO_MAC_ARP_L3_IP4_SIP + IS2_HKL_MAC_ARP_L3_IP4_SIP)
+#define IS2_HKL_MAC_ARP_DIP_EQ_SIP 1
+
+/* IS2 half key - IP4_TCP_UDP/IP4_OTHER common */
+#define IS2_HKO_IP4 IS2_HKO_L2_DMAC
+#define IS2_HKL_IP4 1
+#define IS2_HKO_L3_FRAGMENT (IS2_HKO_IP4 + IS2_HKL_IP4)
+#define IS2_HKL_L3_FRAGMENT 1
+#define IS2_HKO_L3_FRAG_OFS_GT0 (IS2_HKO_L3_FRAGMENT + IS2_HKL_L3_FRAGMENT)
+#define IS2_HKL_L3_FRAG_OFS_GT0 1
+#define IS2_HKO_L3_OPTIONS (IS2_HKO_L3_FRAG_OFS_GT0 + IS2_HKL_L3_FRAG_OFS_GT0)
+#define IS2_HKL_L3_OPTIONS 1
+#define IS2_HKO_L3_TTL_GT0 (IS2_HKO_L3_OPTIONS + IS2_HKL_L3_OPTIONS)
+#define IS2_HKL_L3_TTL_GT0 1
+#define IS2_HKO_L3_TOS (IS2_HKO_L3_TTL_GT0 + IS2_HKL_L3_TTL_GT0)
+#define IS2_HKL_L3_TOS 8
+#define IS2_HKO_L3_IP4_DIP (IS2_HKO_L3_TOS + IS2_HKL_L3_TOS)
+#define IS2_HKL_L3_IP4_DIP 32
+#define IS2_HKO_L3_IP4_SIP (IS2_HKO_L3_IP4_DIP + IS2_HKL_L3_IP4_DIP)
+#define IS2_HKL_L3_IP4_SIP 32
+#define IS2_HKO_DIP_EQ_SIP (IS2_HKO_L3_IP4_SIP + IS2_HKL_L3_IP4_SIP)
+#define IS2_HKL_DIP_EQ_SIP 1
+
+/* IS2 half key - IP4_TCP_UDP */
+#define IS2_HKO_IP4_TCP_UDP_TCP (IS2_HKO_DIP_EQ_SIP + IS2_HKL_DIP_EQ_SIP)
+#define IS2_HKL_IP4_TCP_UDP_TCP 1
+#define IS2_HKO_IP4_TCP_UDP_L4_DPORT                                           \
+	(IS2_HKO_IP4_TCP_UDP_TCP + IS2_HKL_IP4_TCP_UDP_TCP)
+#define IS2_HKL_IP4_TCP_UDP_L4_DPORT 16
+#define IS2_HKO_IP4_TCP_UDP_L4_SPORT                                           \
+	(IS2_HKO_IP4_TCP_UDP_L4_DPORT + IS2_HKL_IP4_TCP_UDP_L4_DPORT)
+#define IS2_HKL_IP4_TCP_UDP_L4_SPORT 16
+#define IS2_HKO_IP4_TCP_UDP_L4_RNG                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_SPORT + IS2_HKL_IP4_TCP_UDP_L4_SPORT)
+#define IS2_HKL_IP4_TCP_UDP_L4_RNG 8
+#define IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT                                     \
+	(IS2_HKO_IP4_TCP_UDP_L4_RNG + IS2_HKL_IP4_TCP_UDP_L4_RNG)
+#define IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT 1
+#define IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0                                       \
+	(IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT +                                  \
+	 IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT)
+#define IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0 1
+#define IS2_HKO_IP4_TCP_UDP_L4_FIN                                             \
+	(IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0 + IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0)
+#define IS2_HKL_IP4_TCP_UDP_L4_FIN 1
+#define IS2_HKO_IP4_TCP_UDP_L4_SYN                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_FIN + IS2_HKL_IP4_TCP_UDP_L4_FIN)
+#define IS2_HKL_IP4_TCP_UDP_L4_SYN 1
+#define IS2_HKO_IP4_TCP_UDP_L4_RST                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_SYN + IS2_HKL_IP4_TCP_UDP_L4_SYN)
+#define IS2_HKL_IP4_TCP_UDP_L4_RST 1
+#define IS2_HKO_IP4_TCP_UDP_L4_PSH                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_RST + IS2_HKL_IP4_TCP_UDP_L4_RST)
+#define IS2_HKL_IP4_TCP_UDP_L4_PSH 1
+#define IS2_HKO_IP4_TCP_UDP_L4_ACK                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_PSH + IS2_HKL_IP4_TCP_UDP_L4_PSH)
+#define IS2_HKL_IP4_TCP_UDP_L4_ACK 1
+#define IS2_HKO_IP4_TCP_UDP_L4_URG                                             \
+	(IS2_HKO_IP4_TCP_UDP_L4_ACK + IS2_HKL_IP4_TCP_UDP_L4_ACK)
+#define IS2_HKL_IP4_TCP_UDP_L4_URG 1
+#define IS2_HKO_IP4_TCP_UDP_L4_1588_DOM                                        \
+	(IS2_HKO_IP4_TCP_UDP_L4_URG + IS2_HKL_IP4_TCP_UDP_L4_URG)
+#define IS2_HKL_IP4_TCP_UDP_L4_1588_DOM 8
+#define IS2_HKO_IP4_TCP_UDP_L4_1588_VER                                        \
+	(IS2_HKO_IP4_TCP_UDP_L4_1588_DOM + IS2_HKL_IP4_TCP_UDP_L4_1588_DOM)
+#define IS2_HKL_IP4_TCP_UDP_L4_1588_VER 4
+
+/* IS2 half key - IP4_OTHER */
+#define IS2_HKO_IP4_OTHER_L3_PROTO IS2_HKO_IP4_TCP_UDP_TCP
+#define IS2_HKL_IP4_OTHER_L3_PROTO 8
+#define IS2_HKO_IP4_OTHER_L3_PAYLOAD                                           \
+	(IS2_HKO_IP4_OTHER_L3_PROTO + IS2_HKL_IP4_OTHER_L3_PROTO)
+#define IS2_HKL_IP4_OTHER_L3_PAYLOAD 56
+
+/* IS2 half key - IP6_STD */
+#define IS2_HKO_IP6_STD_L3_TTL_GT0 IS2_HKO_L2_DMAC
+#define IS2_HKL_IP6_STD_L3_TTL_GT0 1
+#define IS2_HKO_IP6_STD_L3_IP6_SIP                                             \
+	(IS2_HKO_IP6_STD_L3_TTL_GT0 + IS2_HKL_IP6_STD_L3_TTL_GT0)
+#define IS2_HKL_IP6_STD_L3_IP6_SIP 128
+#define IS2_HKO_IP6_STD_L3_PROTO                                               \
+	(IS2_HKO_IP6_STD_L3_IP6_SIP + IS2_HKL_IP6_STD_L3_IP6_SIP)
+#define IS2_HKL_IP6_STD_L3_PROTO 8
+
+/* IS2 half key - OAM */
+#define IS2_HKO_OAM_OAM_MEL_FLAGS IS2_HKO_MAC_ETYPE_ETYPE
+#define IS2_HKL_OAM_OAM_MEL_FLAGS 7
+#define IS2_HKO_OAM_OAM_VER                                                    \
+	(IS2_HKO_OAM_OAM_MEL_FLAGS + IS2_HKL_OAM_OAM_MEL_FLAGS)
+#define IS2_HKL_OAM_OAM_VER 5
+#define IS2_HKO_OAM_OAM_OPCODE (IS2_HKO_OAM_OAM_VER + IS2_HKL_OAM_OAM_VER)
+#define IS2_HKL_OAM_OAM_OPCODE 8
+#define IS2_HKO_OAM_OAM_FLAGS (IS2_HKO_OAM_OAM_OPCODE + IS2_HKL_OAM_OAM_OPCODE)
+#define IS2_HKL_OAM_OAM_FLAGS 8
+#define IS2_HKO_OAM_OAM_MEPID (IS2_HKO_OAM_OAM_FLAGS + IS2_HKL_OAM_OAM_FLAGS)
+#define IS2_HKL_OAM_OAM_MEPID 16
+#define IS2_HKO_OAM_OAM_CCM_CNTS_EQ0                                           \
+	(IS2_HKO_OAM_OAM_MEPID + IS2_HKL_OAM_OAM_MEPID)
+#define IS2_HKL_OAM_OAM_CCM_CNTS_EQ0 1
+
+/* IS2 half key - SMAC_SIP6 */
+#define IS2_HKO_SMAC_SIP6_IGR_PORT IS2_HKL_TYPE
+#define IS2_HKL_SMAC_SIP6_IGR_PORT VCAP_PORT_WIDTH
+#define IS2_HKO_SMAC_SIP6_L2_SMAC                                              \
+	(IS2_HKO_SMAC_SIP6_IGR_PORT + IS2_HKL_SMAC_SIP6_IGR_PORT)
+#define IS2_HKL_SMAC_SIP6_L2_SMAC 48
+#define IS2_HKO_SMAC_SIP6_L3_IP6_SIP                                           \
+	(IS2_HKO_SMAC_SIP6_L2_SMAC + IS2_HKL_SMAC_SIP6_L2_SMAC)
+#define IS2_HKL_SMAC_SIP6_L3_IP6_SIP 128
+
+/* IS2 full key - common */
+#define IS2_FKO_TYPE 0
+#define IS2_FKL_TYPE 2
+#define IS2_FKO_FIRST (IS2_FKO_TYPE + IS2_FKL_TYPE)
+#define IS2_FKL_FIRST 1
+#define IS2_FKO_PAG (IS2_FKO_FIRST + IS2_FKL_FIRST)
+#define IS2_FKL_PAG 8
+#define IS2_FKO_IGR_PORT_MASK (IS2_FKO_PAG + IS2_FKL_PAG)
+#define IS2_FKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1)
+#define IS2_FKO_SERVICE_FRM (IS2_FKO_IGR_PORT_MASK + IS2_FKL_IGR_PORT_MASK)
+#define IS2_FKL_SERVICE_FRM 1
+#define IS2_FKO_HOST_MATCH (IS2_FKO_SERVICE_FRM + IS2_FKL_SERVICE_FRM)
+#define IS2_FKL_HOST_MATCH 1
+#define IS2_FKO_L2_MC (IS2_FKO_HOST_MATCH + IS2_FKL_HOST_MATCH)
+#define IS2_FKL_L2_MC 1
+#define IS2_FKO_L2_BC (IS2_FKO_L2_MC + IS2_FKL_L2_MC)
+#define IS2_FKL_L2_BC 1
+#define IS2_FKO_VLAN_TAGGED (IS2_FKO_L2_BC + IS2_FKL_L2_BC)
+#define IS2_FKL_VLAN_TAGGED 1
+#define IS2_FKO_VID (IS2_FKO_VLAN_TAGGED + IS2_FKL_VLAN_TAGGED)
+#define IS2_FKL_VID 12
+#define IS2_FKO_DEI (IS2_FKO_VID + IS2_FKL_VID)
+#define IS2_FKL_DEI 1
+#define IS2_FKO_PCP (IS2_FKO_DEI + IS2_FKL_DEI)
+#define IS2_FKL_PCP 3
+
+/* IS2 full key - IP6_TCP_UDP/IP6_OTHER common */
+#define IS2_FKO_L3_TTL_GT0 (IS2_FKO_PCP + IS2_FKL_PCP)
+#define IS2_FKL_L3_TTL_GT0 1
+#define IS2_FKO_L3_TOS (IS2_FKO_L3_TTL_GT0 + IS2_FKL_L3_TTL_GT0)
+#define IS2_FKL_L3_TOS 8
+#define IS2_FKO_L3_IP6_DIP (IS2_FKO_L3_TOS + IS2_FKL_L3_TOS)
+#define IS2_FKL_L3_IP6_DIP 128
+#define IS2_FKO_L3_IP6_SIP (IS2_FKO_L3_IP6_DIP + IS2_FKL_L3_IP6_DIP)
+#define IS2_FKL_L3_IP6_SIP 128
+#define IS2_FKO_DIP_EQ_SIP (IS2_FKO_L3_IP6_SIP + IS2_FKL_L3_IP6_SIP)
+#define IS2_FKL_DIP_EQ_SIP 1
+
+/* IS2 full key - IP6_TCP_UDP */
+#define IS2_FKO_IP6_TCP_UDP_TCP (IS2_FKO_DIP_EQ_SIP + IS2_FKL_DIP_EQ_SIP)
+#define IS2_FKL_IP6_TCP_UDP_TCP 1
+#define IS2_FKO_IP6_TCP_UDP_L4_DPORT                                           \
+	(IS2_FKO_IP6_TCP_UDP_TCP + IS2_FKL_IP6_TCP_UDP_TCP)
+#define IS2_FKL_IP6_TCP_UDP_L4_DPORT 16
+#define IS2_FKO_IP6_TCP_UDP_L4_SPORT                                           \
+	(IS2_FKO_IP6_TCP_UDP_L4_DPORT + IS2_FKL_IP6_TCP_UDP_L4_DPORT)
+#define IS2_FKL_IP6_TCP_UDP_L4_SPORT 16
+#define IS2_FKO_IP6_TCP_UDP_L4_RNG                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_SPORT + IS2_FKL_IP6_TCP_UDP_L4_SPORT)
+#define IS2_FKL_IP6_TCP_UDP_L4_RNG 8
+#define IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT                                     \
+	(IS2_FKO_IP6_TCP_UDP_L4_RNG + IS2_FKL_IP6_TCP_UDP_L4_RNG)
+#define IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT 1
+#define IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0                                       \
+	(IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT +                                  \
+	 IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT)
+#define IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0 1
+#define IS2_FKO_IP6_TCP_UDP_L4_FIN                                             \
+	(IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0 + IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0)
+#define IS2_FKL_IP6_TCP_UDP_L4_FIN 1
+#define IS2_FKO_IP6_TCP_UDP_L4_SYN                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_FIN + IS2_FKL_IP6_TCP_UDP_L4_FIN)
+#define IS2_FKL_IP6_TCP_UDP_L4_SYN 1
+#define IS2_FKO_IP6_TCP_UDP_L4_RST                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_SYN + IS2_FKL_IP6_TCP_UDP_L4_SYN)
+#define IS2_FKL_IP6_TCP_UDP_L4_RST 1
+#define IS2_FKO_IP6_TCP_UDP_L4_PSH                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_RST + IS2_FKL_IP6_TCP_UDP_L4_RST)
+#define IS2_FKL_IP6_TCP_UDP_L4_PSH 1
+#define IS2_FKO_IP6_TCP_UDP_L4_ACK                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_PSH + IS2_FKL_IP6_TCP_UDP_L4_PSH)
+#define IS2_FKL_IP6_TCP_UDP_L4_ACK 1
+#define IS2_FKO_IP6_TCP_UDP_L4_URG                                             \
+	(IS2_FKO_IP6_TCP_UDP_L4_ACK + IS2_FKL_IP6_TCP_UDP_L4_ACK)
+#define IS2_FKL_IP6_TCP_UDP_L4_URG 1
+#define IS2_FKO_IP6_TCP_UDP_L4_1588_DOM                                        \
+	(IS2_FKO_IP6_TCP_UDP_L4_URG + IS2_FKL_IP6_TCP_UDP_L4_URG)
+#define IS2_FKL_IP6_TCP_UDP_L4_1588_DOM 8
+#define IS2_FKO_IP6_TCP_UDP_L4_1588_VER                                        \
+	(IS2_FKO_IP6_TCP_UDP_L4_1588_DOM + IS2_FKL_IP6_TCP_UDP_L4_1588_DOM)
+#define IS2_FKL_IP6_TCP_UDP_L4_1588_VER 4
+
+/* IS2 full key - IP6_OTHER */
+#define IS2_FKO_IP6_OTHER_L3_PROTO IS2_FKO_IP6_TCP_UDP_TCP
+#define IS2_FKL_IP6_OTHER_L3_PROTO 8
+#define IS2_FKO_IP6_OTHER_L3_PAYLOAD                                           \
+	(IS2_FKO_IP6_OTHER_L3_PROTO + IS2_FKL_IP6_OTHER_L3_PROTO)
+#define IS2_FKL_IP6_OTHER_L3_PAYLOAD 56
+
+/* IS2 full key - CUSTOM */
+#define IS2_FKO_CUSTOM_CUSTOM_TYPE IS2_FKO_L3_TTL_GT0
+#define IS2_FKL_CUSTOM_CUSTOM_TYPE 1
+#define IS2_FKO_CUSTOM_CUSTOM                                                  \
+	(IS2_FKO_CUSTOM_CUSTOM_TYPE + IS2_FKL_CUSTOM_CUSTOM_TYPE)
+#define IS2_FKL_CUSTOM_CUSTOM 320
+
+/* IS2 action - BASE_TYPE */
+#define IS2_AO_HIT_ME_ONCE 0
+#define IS2_AL_HIT_ME_ONCE 1
+#define IS2_AO_CPU_COPY_ENA (IS2_AO_HIT_ME_ONCE + IS2_AL_HIT_ME_ONCE)
+#define IS2_AL_CPU_COPY_ENA 1
+#define IS2_AO_CPU_QU_NUM (IS2_AO_CPU_COPY_ENA + IS2_AL_CPU_COPY_ENA)
+#define IS2_AL_CPU_QU_NUM 3
+#define IS2_AO_MASK_MODE (IS2_AO_CPU_QU_NUM + IS2_AL_CPU_QU_NUM)
+#define IS2_AL_MASK_MODE 2
+#define IS2_AO_MIRROR_ENA (IS2_AO_MASK_MODE + IS2_AL_MASK_MODE)
+#define IS2_AL_MIRROR_ENA 1
+#define IS2_AO_LRN_DIS (IS2_AO_MIRROR_ENA + IS2_AL_MIRROR_ENA)
+#define IS2_AL_LRN_DIS 1
+#define IS2_AO_POLICE_ENA (IS2_AO_LRN_DIS + IS2_AL_LRN_DIS)
+#define IS2_AL_POLICE_ENA 1
+#define IS2_AO_POLICE_IDX (IS2_AO_POLICE_ENA + IS2_AL_POLICE_ENA)
+#define IS2_AL_POLICE_IDX 9
+#define IS2_AO_POLICE_VCAP_ONLY (IS2_AO_POLICE_IDX + IS2_AL_POLICE_IDX)
+#define IS2_AL_POLICE_VCAP_ONLY 1
+#define IS2_AO_PORT_MASK (IS2_AO_POLICE_VCAP_ONLY + IS2_AL_POLICE_VCAP_ONLY)
+#define IS2_AL_PORT_MASK VCAP_PORT_CNT
+#define IS2_AO_REW_OP (IS2_AO_PORT_MASK + IS2_AL_PORT_MASK)
+#define IS2_AL_REW_OP 9
+#define IS2_AO_LM_CNT_DIS (IS2_AO_REW_OP + IS2_AL_REW_OP)
+#define IS2_AL_LM_CNT_DIS 1
+#define IS2_AO_ISDX_ENA                                                        \
+	(IS2_AO_LM_CNT_DIS + IS2_AL_LM_CNT_DIS + 1) /* Reserved bit */
+#define IS2_AL_ISDX_ENA 1
+#define IS2_AO_ACL_ID (IS2_AO_ISDX_ENA + IS2_AL_ISDX_ENA)
+#define IS2_AL_ACL_ID 6
+
+/* IS2 action - SMAC_SIP */
+#define IS2_AO_SMAC_SIP_CPU_COPY_ENA 0
+#define IS2_AL_SMAC_SIP_CPU_COPY_ENA 1
+#define IS2_AO_SMAC_SIP_CPU_QU_NUM 1
+#define IS2_AL_SMAC_SIP_CPU_QU_NUM 3
+#define IS2_AO_SMAC_SIP_FWD_KILL_ENA 4
+#define IS2_AL_SMAC_SIP_FWD_KILL_ENA 1
+#define IS2_AO_SMAC_SIP_HOST_MATCH 5
+#define IS2_AL_SMAC_SIP_HOST_MATCH 1
+
+#endif /* _OCELOT_VCAP_H_ */

diff --git a/drivers/net/ethernet/myricom/Kconfig b/drivers/net/ethernet/myricom/Kconfig
index 9645c72..6bc993e 100644
--- a/drivers/net/ethernet/myricom/Kconfig
+++ b/drivers/net/ethernet/myricom/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Myricom device configuration
 #

diff --git a/drivers/net/ethernet/myricom/Makefile b/drivers/net/ethernet/myricom/Makefile
index 296c0a1..122fbd9 100644
--- a/drivers/net/ethernet/myricom/Makefile
+++ b/drivers/net/ethernet/myricom/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Myricom network device drivers.
 #

diff --git a/drivers/net/ethernet/myricom/myri10ge/Makefile b/drivers/net/ethernet/myricom/myri10ge/Makefile
index 5df8916..8d9585c 100644
--- a/drivers/net/ethernet/myricom/myri10ge/Makefile
+++ b/drivers/net/ethernet/myricom/myri10ge/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Myricom Myri-10G ethernet driver
 #

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index b2d2ec8..c979f38 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c

@@ -70,7 +70,6 @@
 #include <net/tcp.h>
 #include <asm/byteorder.h>
 #include <asm/processor.h>
-#include <net/busy_poll.h>
 
 #include "myri10ge_mcp.h"
 #include "myri10ge_mcp_gen_header.h"
@@ -1287,7 +1286,7 @@
 {
 	u8 *va;
 	struct vlan_ethhdr *veh;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	__wsum vsum;
 
 	va = addr;
@@ -1307,8 +1306,8 @@
 		skb->len -= VLAN_HLEN;
 		skb->data_len -= VLAN_HLEN;
 		frag = skb_shinfo(skb)->frags;
-		frag->page_offset += VLAN_HLEN;
-		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
+		skb_frag_off_add(frag, VLAN_HLEN);
+		skb_frag_size_sub(frag, VLAN_HLEN);
 	}
 }
 
@@ -1319,7 +1318,7 @@
 {
 	struct myri10ge_priv *mgp = ss->mgp;
 	struct sk_buff *skb;
-	struct skb_frag_struct *rx_frags;
+	skb_frag_t *rx_frags;
 	struct myri10ge_rx_buf *rx;
 	int i, idx, remainder, bytes;
 	struct pci_dev *pdev = mgp->pdev;
@@ -1352,7 +1351,7 @@
 		return 0;
 	}
 	rx_frags = skb_shinfo(skb)->frags;
-	/* Fill skb_frag_struct(s) with data from our receive */
+	/* Fill skb_frag_t(s) with data from our receive */
 	for (i = 0, remainder = len; remainder > 0; i++) {
 		myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
 		skb_fill_page_desc(skb, i, rx->info[idx].page,
@@ -1365,8 +1364,8 @@
 	}
 
 	/* remove padding */
-	rx_frags[0].page_offset += MXGEFW_PAD;
-	rx_frags[0].size -= MXGEFW_PAD;
+	skb_frag_off_add(&rx_frags[0], MXGEFW_PAD);
+	skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
 	len -= MXGEFW_PAD;
 
 	skb->len = len;
@@ -1409,7 +1408,7 @@
 		if (skb) {
 			ss->stats.tx_bytes += skb->len;
 			ss->stats.tx_packets++;
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 			if (len)
 				pci_unmap_single(pdev,
 						 dma_unmap_addr(&tx->info[idx],
@@ -1440,7 +1439,6 @@
 			tx->queue_active = 0;
 			put_be32(htonl(1), tx->send_stop);
 			mb();
-			mmiowb();
 		}
 		__netif_tx_unlock(dev_queue);
 	}
@@ -2630,7 +2628,7 @@
 	struct myri10ge_slice_state *ss;
 	struct mcp_kreq_ether_send *req;
 	struct myri10ge_tx_buf *tx;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	struct netdev_queue *netdev_queue;
 	dma_addr_t bus;
 	u32 low;
@@ -2862,7 +2860,6 @@
 		tx->queue_active = 1;
 		put_be32(htonl(1), tx->send_go);
 		mb();
-		mmiowb();
 	}
 	tx->pkt_start++;
 	if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
@@ -3040,7 +3037,6 @@
 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
-	int error = 0;
 
 	netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
 	if (mgp->running) {
@@ -3052,7 +3048,7 @@
 	} else
 		dev->mtu = new_mtu;
 
-	return error;
+	return 0;
 }
 
 /*
@@ -3605,9 +3601,9 @@
 	for (i = 0; i < mgp->num_slices; i++) {
 		ss = &mgp->ss[i];
 		bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry);
-		ss->rx_done.entry = dma_zalloc_coherent(&pdev->dev, bytes,
-							&ss->rx_done.bus,
-							GFP_KERNEL);
+		ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
+						       &ss->rx_done.bus,
+						       GFP_KERNEL);
 		if (ss->rx_done.entry == NULL)
 			goto abort;
 		bytes = sizeof(*ss->fw_stats);
@@ -3922,7 +3918,7 @@
 	 * setup (if available). */
 	status = myri10ge_request_irq(mgp);
 	if (status != 0)
-		goto abort_with_firmware;
+		goto abort_with_slices;
 	myri10ge_free_irq(mgp);
 
 	/* Save configuration space to be restored if the

diff --git a/drivers/net/ethernet/natsemi/Kconfig b/drivers/net/ethernet/natsemi/Kconfig
index 017fb23..c519c1f 100644
--- a/drivers/net/ethernet/natsemi/Kconfig
+++ b/drivers/net/ethernet/natsemi/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # National Semiconductor device configuration
 #

diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index b9a1a9f..1a2634c 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c

@@ -2173,7 +2173,7 @@
 					np->tx_skbuff[entry]->len,
 					PCI_DMA_TODEVICE);
 		/* Free the original skb. */
-		dev_kfree_skb_irq(np->tx_skbuff[entry]);
+		dev_consume_skb_irq(np->tx_skbuff[entry]);
 		np->tx_skbuff[entry] = NULL;
 	}
 	if (netif_queue_stopped(dev) &&

diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 958fced..6af9a7e 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 #define VERSION "0.23"
 /* ns83820.c by Benjamin LaHaise with contributions.
  *
@@ -10,21 +11,6 @@
  *
  * Mmmm, chocolate vanilla mocha...
  *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *
  * ChangeLog
  * =========
  *	20010414	0.1 - created
@@ -1003,7 +989,7 @@
 					addr,
 					len,
 					PCI_DMA_TODEVICE);
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 			atomic_dec(&dev->nr_tx_skbs);
 		} else
 			pci_unmap_page(dev->pci_dev,
@@ -1869,56 +1855,28 @@
 static void ns83820_probe_phy(struct net_device *ndev)
 {
 	struct ns83820 *dev = PRIV(ndev);
-	static int first;
-	int i;
-#define MII_PHYIDR1	0x02
-#define MII_PHYIDR2	0x03
+	int j;
+	unsigned a, b;
 
-#if 0
-	if (!first) {
-		unsigned tmp;
-		ns83820_mii_read_reg(dev, 1, 0x09);
-		ns83820_mii_write_reg(dev, 1, 0x10, 0x0d3e);
-
-		tmp = ns83820_mii_read_reg(dev, 1, 0x00);
-		ns83820_mii_write_reg(dev, 1, 0x00, tmp | 0x8000);
-		udelay(1300);
-		ns83820_mii_read_reg(dev, 1, 0x09);
+	for (j = 0; j < 0x16; j += 4) {
+		dprintk("%s: [0x%02x] %04x %04x %04x %04x\n",
+			ndev->name, j,
+			ns83820_mii_read_reg(dev, 1, 0 + j),
+			ns83820_mii_read_reg(dev, 1, 1 + j),
+			ns83820_mii_read_reg(dev, 1, 2 + j),
+			ns83820_mii_read_reg(dev, 1, 3 + j)
+			);
 	}
-#endif
-	first = 1;
 
-	for (i=1; i<2; i++) {
-		int j;
-		unsigned a, b;
-		a = ns83820_mii_read_reg(dev, i, MII_PHYIDR1);
-		b = ns83820_mii_read_reg(dev, i, MII_PHYIDR2);
+	/* read firmware version: memory addr is 0x8402 and 0x8403 */
+	ns83820_mii_write_reg(dev, 1, 0x16, 0x000d);
+	ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e);
+	a = ns83820_mii_read_reg(dev, 1, 0x1d);
 
-		//printk("%s: phy %d: 0x%04x 0x%04x\n",
-		//	ndev->name, i, a, b);
-
-		for (j=0; j<0x16; j+=4) {
-			dprintk("%s: [0x%02x] %04x %04x %04x %04x\n",
-				ndev->name, j,
-				ns83820_mii_read_reg(dev, i, 0 + j),
-				ns83820_mii_read_reg(dev, i, 1 + j),
-				ns83820_mii_read_reg(dev, i, 2 + j),
-				ns83820_mii_read_reg(dev, i, 3 + j)
-				);
-		}
-	}
-	{
-		unsigned a, b;
-		/* read firmware version: memory addr is 0x8402 and 0x8403 */
-		ns83820_mii_write_reg(dev, 1, 0x16, 0x000d);
-		ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e);
-		a = ns83820_mii_read_reg(dev, 1, 0x1d);
-
-		ns83820_mii_write_reg(dev, 1, 0x16, 0x000d);
-		ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e);
-		b = ns83820_mii_read_reg(dev, 1, 0x1d);
-		dprintk("version: 0x%04x 0x%04x\n", a, b);
-	}
+	ns83820_mii_write_reg(dev, 1, 0x16, 0x000d);
+	ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e);
+	b = ns83820_mii_read_reg(dev, 1, 0x1d);
+	dprintk("version: 0x%04x 0x%04x\n", a, b);
 }
 #endif
 

diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index c805dcb..b339125 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sonic.c
  *
@@ -231,9 +232,9 @@
 
 	laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE);
 	if (!laddr) {
-		printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name);
-		dev_kfree_skb(skb);
-		return NETDEV_TX_BUSY;
+		pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
 	}
 
 	sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0);       /* clear status */
@@ -328,7 +329,7 @@
 				}
 
 				/* We must free the original skb */
-				dev_kfree_skb_irq(lp->tx_skb[entry]);
+				dev_consume_skb_irq(lp->tx_skb[entry]);
 				lp->tx_skb[entry] = NULL;
 				/* and unmap DMA buffer */
 				dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE);

diff --git a/drivers/net/ethernet/neterion/Kconfig b/drivers/net/ethernet/neterion/Kconfig
index c26e0f7..5e630f3 100644
--- a/drivers/net/ethernet/neterion/Kconfig
+++ b/drivers/net/ethernet/neterion/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Exar device configuration
 #
@@ -26,7 +27,7 @@
 	  on its age.
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/s2io.txt>.
+	  <file:Documentation/networking/device_drivers/neterion/s2io.txt>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called s2io.
@@ -41,7 +42,7 @@
 	  labeled as either one, depending on its age.
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/vxge.txt>.
+	  <file:Documentation/networking/device_drivers/neterion/vxge.txt>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called vxge.

diff --git a/drivers/net/ethernet/neterion/Makefile b/drivers/net/ethernet/neterion/Makefile
index 70c8058..87ede8a 100644
--- a/drivers/net/ethernet/neterion/Makefile
+++ b/drivers/net/ethernet/neterion/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Exar network device drivers.
 #

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index b8983e7..e0b2bf3 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c

@@ -75,6 +75,7 @@
 #include <linux/tcp.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <net/tcp.h>
@@ -491,7 +492,7 @@
 };
 
 /* A simplifier macro used both by init and free shared_mem Fns(). */
-#define TXD_MEM_PAGE_CNT(len, per_each) ((len+per_each - 1) / per_each)
+#define TXD_MEM_PAGE_CNT(len, per_each) DIV_ROUND_UP(len, per_each)
 
 /* netqueue manipulation helper functions */
 static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
@@ -746,7 +747,6 @@
 				return -ENOMEM;
 			}
 			mem_allocated += size;
-			memset(tmp_v_addr, 0, size);
 
 			size = sizeof(struct rxd_info) *
 				rxd_count[nic->rxd_mode];
@@ -3054,7 +3054,7 @@
 
 		/* Updating the statistics block */
 		swstats->mem_freed += skb->truesize;
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 
 		get_info.offset++;
 		if (get_info.offset == get_info.fifo_len + 1)
@@ -3679,11 +3679,9 @@
 		writeq(nic->msix_info[i].data, &bar0->xmsi_data);
 		val64 = (s2BIT(7) | s2BIT(15) | vBIT(msix_index, 26, 6));
 		writeq(val64, &bar0->xmsi_access);
-		if (wait_for_msix_trans(nic, msix_index)) {
+		if (wait_for_msix_trans(nic, msix_index))
 			DBG_PRINT(ERR_DBG, "%s: index: %d failed\n",
 				  __func__, msix_index);
-			continue;
-		}
 	}
 }
 
@@ -4154,8 +4152,6 @@
 
 	writeq(val64, &tx_fifo->List_Control);
 
-	mmiowb();
-
 	put_off++;
 	if (put_off == fifo->tx_curr_put_info.fifo_len + 1)
 		put_off = 0;

diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 1a24a72..0a921f3 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h

@@ -10,6 +10,7 @@
  * system is licensed under the GPL.
  * See the file COPYING in this distribution for more information.
  ************************************************************************/
+#include <linux/io-64-nonatomic-lo-hi.h>
 #ifndef _S2IO_H
 #define _S2IO_H
 
@@ -970,27 +971,6 @@
 #define RESET_ERROR 1
 #define CMD_ERROR   2
 
-/*  OS related system calls */
-#ifndef readq
-static inline u64 readq(void __iomem *addr)
-{
-	u64 ret = 0;
-	ret = readl(addr + 4);
-	ret <<= 32;
-	ret |= readl(addr);
-
-	return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void writeq(u64 val, void __iomem *addr)
-{
-	writel((u32) (val), addr);
-	writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
 /*
  * Some registers have to be written in a particular order to
  * expect correct hardware operation. The macro SPECIAL_REG_WRITE

diff --git a/drivers/net/ethernet/neterion/vxge/Makefile b/drivers/net/ethernet/neterion/vxge/Makefile
index b625e2c..0820e81 100644
--- a/drivers/net/ethernet/neterion/vxge/Makefile
+++ b/drivers/net/ethernet/neterion/vxge/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for Exar Corp's X3100 Series 10 GbE PCIe I/O
 # Virtualized Server Adapter linux driver

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 398011c..51cd57a 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c

@@ -13,6 +13,7 @@
  ******************************************************************************/
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
 
@@ -807,7 +808,7 @@
 	struct vxge_hw_device_date *fw_date = &hw_info->fw_date;
 	struct vxge_hw_device_version *flash_version = &hw_info->flash_version;
 	struct vxge_hw_device_date *flash_date = &hw_info->flash_date;
-	u64 data0, data1 = 0, steer_ctrl = 0;
+	u64 data0 = 0, data1 = 0, steer_ctrl = 0;
 	enum vxge_hw_status status;
 
 	status = vxge_hw_vpath_fw_api(vpath,
@@ -2365,6 +2366,7 @@
 				dma_object->addr))) {
 			vxge_os_dma_free(devh->pdev, memblock,
 				&dma_object->acc_handle);
+			memblock = NULL;
 			goto exit;
 		}
 

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h
index d743a37..e678ba3 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h

@@ -2011,26 +2011,6 @@
 void
 vxge_hw_vpath_rx_doorbell_init(struct __vxge_hw_vpath_handle *vp);
 
-#ifndef readq
-static inline u64 readq(void __iomem *addr)
-{
-	u64 ret = 0;
-	ret = readl(addr + 4);
-	ret <<= 32;
-	ret |= readl(addr);
-
-	return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void writeq(u64 val, void __iomem *addr)
-{
-	writel((u32) (val), addr);
-	writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
 static inline void __vxge_hw_pio_mem_write32_upper(u32 val, void __iomem *addr)
 {
 	writel(val, addr + 4);

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 5ae3fa8..1d334f2 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c

@@ -114,7 +114,7 @@
 
 		/* free SKBs */
 		for (temp = completed; temp != skb_ptr; temp++)
-			dev_kfree_skb_irq(*temp);
+			dev_consume_skb_irq(*temp);
 	} while (more);
 }
 
@@ -1826,7 +1826,6 @@
 		vxge_hw_channel_msix_unmask(
 				(struct __vxge_hw_channel *)ring->handle,
 				ring->rx_vector_no);
-		mmiowb();
 	}
 
 	/* We are copying and returning the local variable, in case if after
@@ -2234,8 +2233,6 @@
 	vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle,
 				    fifo->tx_vector_no);
 
-	mmiowb();
-
 	return IRQ_HANDLED;
 }
 
@@ -2272,14 +2269,12 @@
 		 */
 		vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id);
 		vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id);
-		mmiowb();
 
 		status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle,
 			vdev->exec_mode);
 		if (status == VXGE_HW_OK) {
 			vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle,
 						  msix_id);
-			mmiowb();
 			continue;
 		}
 		vxge_debug_intr(VXGE_ERR,
@@ -2553,7 +2548,7 @@
 			vxge_debug_init(VXGE_ERR,
 				"%s: Defaulting to INTA",
 				vdev->ndev->name);
-				goto INTA_MODE;
+			goto INTA_MODE;
 		}
 
 		msix_idx = (vdev->vpaths[0].handle->vpath->vp_id *

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
index 0c3b5de..709d20d 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c

@@ -12,6 +12,7 @@
  * Copyright(c) 2002-2010 Exar Corp.
  ******************************************************************************/
 #include <linux/etherdevice.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/prefetch.h>
 
 #include "vxge-traffic.h"
@@ -1398,11 +1399,7 @@
 		VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop),
 		&fifo->nofl_db->control_0);
 
-	mmiowb();
-
 	writeq(txdl_ptr, &fifo->nofl_db->txdl_ptr);
-
-	mmiowb();
 }
 
 /**
@@ -1694,17 +1691,10 @@
  */
 void vxge_hw_fifo_txdl_free(struct __vxge_hw_fifo *fifo, void *txdlh)
 {
-	struct __vxge_hw_fifo_txdl_priv *txdl_priv;
-	u32 max_frags;
 	struct __vxge_hw_channel *channel;
 
 	channel = &fifo->channel;
 
-	txdl_priv = __vxge_hw_fifo_txdl_priv(fifo,
-			(struct vxge_hw_fifo_txd *)txdlh);
-
-	max_frags = fifo->config->max_frags;
-
 	vxge_hw_channel_dtr_free(channel, txdlh);
 }
 
@@ -2261,7 +2251,7 @@
 {
 	struct __vxge_hw_device *hldev = vp->vpath->hldev;
 
-	if ((hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT))
+	if (hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT)
 		__vxge_hw_pio_mem_write32_upper(
 			(u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
 			&hldev->common_reg->clr_msix_one_shot_vec[msix_id % 4]);

diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 66f15b0..bac5be4 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Netronome device configuration
 #
@@ -19,7 +20,8 @@
 	tristate "Netronome(R) NFP4000/NFP6000 NIC driver"
 	depends on PCI && PCI_MSI
 	depends on VXLAN || VXLAN=n
-	depends on MAY_USE_DEVLINK
+	depends on TLS && TLS_DEVICE || TLS_DEVICE=n
+	select NET_DEVLINK
 	---help---
 	  This driver supports the Netronome(R) NFP4000/NFP6000 based
 	  cards working as a advanced Ethernet NIC.  It works with both

diff --git a/drivers/net/ethernet/netronome/Makefile b/drivers/net/ethernet/netronome/Makefile
index 7fb3b84..d9a3948 100644
--- a/drivers/net/ethernet/netronome/Makefile
+++ b/drivers/net/ethernet/netronome/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Netronome network device drivers
 #

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 4afb103..d31772a 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile

@@ -15,6 +15,9 @@
 	    nfpcore/nfp_resource.o \
 	    nfpcore/nfp_rtsym.o \
 	    nfpcore/nfp_target.o \
+	    ccm.o \
+	    ccm_mbox.o \
+	    devlink_param.o \
 	    nfp_asm.o \
 	    nfp_app.o \
 	    nfp_app_nic.o \
@@ -33,6 +36,11 @@
 	    nfp_shared_buf.o \
 	    nic/main.o
 
+ifeq ($(CONFIG_TLS_DEVICE),y)
+nfp-objs += \
+	    crypto/tls.o
+endif
+
 ifeq ($(CONFIG_NFP_APP_FLOWER),y)
 nfp-objs += \
 	    flower/action.o \
@@ -42,7 +50,8 @@
 	    flower/match.o \
 	    flower/metadata.o \
 	    flower/offload.o \
-	    flower/tunnel_conf.o
+	    flower/tunnel_conf.o \
+	    flower/qos_conf.o
 endif
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
@@ -56,7 +65,9 @@
 
 ifeq ($(CONFIG_NFP_APP_ABM_NIC),y)
 nfp-objs += \
+	    abm/cls.o \
 	    abm/ctrl.o \
+	    abm/qdisc.o \
 	    abm/main.o
 endif
 

diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
new file mode 100644
index 0000000..9f8a1f6
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c

@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfp_app.h"
+#include "../nfp_net_repr.h"
+#include "main.h"
+
+struct nfp_abm_u32_match {
+	u32 handle;
+	u32 band;
+	u8 mask;
+	u8 val;
+	struct list_head list;
+};
+
+static bool
+nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
+			__be16 proto, struct netlink_ext_ack *extack)
+{
+	struct tc_u32_key *k;
+	unsigned int tos_off;
+
+	if (knode->exts && tcf_exts_has_actions(knode->exts)) {
+		NL_SET_ERR_MSG_MOD(extack, "action offload not supported");
+		return false;
+	}
+	if (knode->link_handle) {
+		NL_SET_ERR_MSG_MOD(extack, "linking not supported");
+		return false;
+	}
+	if (knode->sel->flags != TC_U32_TERMINAL) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "flags must be equal to TC_U32_TERMINAL");
+		return false;
+	}
+	if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
+	    knode->sel->offoff || knode->fshift) {
+		NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
+		return false;
+	}
+	if (knode->sel->hoff || knode->sel->hmask) {
+		NL_SET_ERR_MSG_MOD(extack, "hashing not supported");
+		return false;
+	}
+	if (knode->val || knode->mask) {
+		NL_SET_ERR_MSG_MOD(extack, "matching on mark not supported");
+		return false;
+	}
+	if (knode->res && knode->res->class) {
+		NL_SET_ERR_MSG_MOD(extack, "setting non-0 class not supported");
+		return false;
+	}
+	if (knode->res && knode->res->classid >= abm->num_bands) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "classid higher than number of bands");
+		return false;
+	}
+	if (knode->sel->nkeys != 1) {
+		NL_SET_ERR_MSG_MOD(extack, "exactly one key required");
+		return false;
+	}
+
+	switch (proto) {
+	case htons(ETH_P_IP):
+		tos_off = 16;
+		break;
+	case htons(ETH_P_IPV6):
+		tos_off = 20;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "only IP and IPv6 supported as filter protocol");
+		return false;
+	}
+
+	k = &knode->sel->keys[0];
+	if (k->offmask) {
+		NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
+		return false;
+	}
+	if (k->off) {
+		NL_SET_ERR_MSG_MOD(extack, "only DSCP fields can be matched");
+		return false;
+	}
+	if (k->val & ~k->mask) {
+		NL_SET_ERR_MSG_MOD(extack, "mask does not cover the key");
+		return false;
+	}
+	if (be32_to_cpu(k->mask) >> tos_off & ~abm->dscp_mask) {
+		NL_SET_ERR_MSG_MOD(extack, "only high DSCP class selector bits can be used");
+		nfp_err(abm->app->cpp,
+			"u32 offload: requested mask %x FW can support only %x\n",
+			be32_to_cpu(k->mask) >> tos_off, abm->dscp_mask);
+		return false;
+	}
+
+	return true;
+}
+
+/* This filter list -> map conversion is O(n * m), we expect single digit or
+ * low double digit number of prios and likewise for the filters.  Also u32
+ * doesn't report stats, so it's really only setup time cost.
+ */
+static unsigned int
+nfp_abm_find_band_for_prio(struct nfp_abm_link *alink, unsigned int prio)
+{
+	struct nfp_abm_u32_match *iter;
+
+	list_for_each_entry(iter, &alink->dscp_map, list)
+		if ((prio & iter->mask) == iter->val)
+			return iter->band;
+
+	return alink->def_band;
+}
+
+static int nfp_abm_update_band_map(struct nfp_abm_link *alink)
+{
+	unsigned int i, bits_per_prio, prios_per_word, base_shift;
+	struct nfp_abm *abm = alink->abm;
+	u32 field_mask;
+
+	alink->has_prio = !list_empty(&alink->dscp_map);
+
+	bits_per_prio = roundup_pow_of_two(order_base_2(abm->num_bands));
+	field_mask = (1 << bits_per_prio) - 1;
+	prios_per_word = sizeof(u32) * BITS_PER_BYTE / bits_per_prio;
+
+	/* FW mask applies from top bits */
+	base_shift = 8 - order_base_2(abm->num_prios);
+
+	for (i = 0; i < abm->num_prios; i++) {
+		unsigned int offset;
+		u32 *word;
+		u8 band;
+
+		word = &alink->prio_map[i / prios_per_word];
+		offset = (i % prios_per_word) * bits_per_prio;
+
+		band = nfp_abm_find_band_for_prio(alink, i << base_shift);
+
+		*word &= ~(field_mask << offset);
+		*word |= band << offset;
+	}
+
+	/* Qdisc offload status may change if has_prio changed */
+	nfp_abm_qdisc_offload_update(alink);
+
+	return nfp_abm_ctrl_prio_map_update(alink, alink->prio_map);
+}
+
+static void
+nfp_abm_u32_knode_delete(struct nfp_abm_link *alink,
+			 struct tc_cls_u32_knode *knode)
+{
+	struct nfp_abm_u32_match *iter;
+
+	list_for_each_entry(iter, &alink->dscp_map, list)
+		if (iter->handle == knode->handle) {
+			list_del(&iter->list);
+			kfree(iter);
+			nfp_abm_update_band_map(alink);
+			return;
+		}
+}
+
+static int
+nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
+			  struct tc_cls_u32_knode *knode,
+			  __be16 proto, struct netlink_ext_ack *extack)
+{
+	struct nfp_abm_u32_match *match = NULL, *iter;
+	unsigned int tos_off;
+	u8 mask, val;
+	int err;
+
+	if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) {
+		err = -EOPNOTSUPP;
+		goto err_delete;
+	}
+
+	tos_off = proto == htons(ETH_P_IP) ? 16 : 20;
+
+	/* Extract the DSCP Class Selector bits */
+	val = be32_to_cpu(knode->sel->keys[0].val) >> tos_off & 0xff;
+	mask = be32_to_cpu(knode->sel->keys[0].mask) >> tos_off & 0xff;
+
+	/* Check if there is no conflicting mapping and find match by handle */
+	list_for_each_entry(iter, &alink->dscp_map, list) {
+		u32 cmask;
+
+		if (iter->handle == knode->handle) {
+			match = iter;
+			continue;
+		}
+
+		cmask = iter->mask & mask;
+		if ((iter->val & cmask) == (val & cmask) &&
+		    iter->band != knode->res->classid) {
+			NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter");
+			err = -EOPNOTSUPP;
+			goto err_delete;
+		}
+	}
+
+	if (!match) {
+		match = kzalloc(sizeof(*match), GFP_KERNEL);
+		if (!match) {
+			err = -ENOMEM;
+			goto err_delete;
+		}
+
+		list_add(&match->list, &alink->dscp_map);
+	}
+	match->handle = knode->handle;
+	match->band = knode->res->classid;
+	match->mask = mask;
+	match->val = val;
+
+	err = nfp_abm_update_band_map(alink);
+	if (err)
+		goto err_delete;
+
+	return 0;
+
+err_delete:
+	nfp_abm_u32_knode_delete(alink, knode);
+	return err;
+}
+
+static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
+				     void *type_data, void *cb_priv)
+{
+	struct tc_cls_u32_offload *cls_u32 = type_data;
+	struct nfp_repr *repr = cb_priv;
+	struct nfp_abm_link *alink;
+
+	alink = repr->app_priv;
+
+	if (type != TC_SETUP_CLSU32) {
+		NL_SET_ERR_MSG_MOD(cls_u32->common.extack,
+				   "only offload of u32 classifier supported");
+		return -EOPNOTSUPP;
+	}
+	if (!tc_cls_can_offload_and_chain0(repr->netdev, &cls_u32->common))
+		return -EOPNOTSUPP;
+
+	if (cls_u32->common.protocol != htons(ETH_P_IP) &&
+	    cls_u32->common.protocol != htons(ETH_P_IPV6)) {
+		NL_SET_ERR_MSG_MOD(cls_u32->common.extack,
+				   "only IP and IPv6 supported as filter protocol");
+		return -EOPNOTSUPP;
+	}
+
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return nfp_abm_u32_knode_replace(alink, &cls_u32->knode,
+						 cls_u32->common.protocol,
+						 cls_u32->common.extack);
+	case TC_CLSU32_DELETE_KNODE:
+		nfp_abm_u32_knode_delete(alink, &cls_u32->knode);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(nfp_abm_block_cb_list);
+
+int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr,
+			    struct flow_block_offload *f)
+{
+	return flow_block_cb_setup_simple(f, &nfp_abm_block_cb_list,
+					  nfp_abm_setup_tc_block_cb,
+					  repr, repr, true);
+}

diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index b157ccd..69e84ff 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c

@@ -1,38 +1,9 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
+#include <linux/bitops.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfpcore/nfp_nffw.h"
@@ -42,47 +13,58 @@
 #include "../nfp_net.h"
 #include "main.h"
 
-#define NFP_QLVL_SYM_NAME	"_abi_nfd_out_q_lvls_%u"
+#define NFP_NUM_PRIOS_SYM_NAME	"_abi_pci_dscp_num_prio_%u"
+#define NFP_NUM_BANDS_SYM_NAME	"_abi_pci_dscp_num_band_%u"
+#define NFP_ACT_MASK_SYM_NAME	"_abi_nfd_out_q_actions_%u"
+
+#define NFP_RED_SUPPORT_SYM_NAME	"_abi_nfd_out_red_offload_%u"
+
+#define NFP_QLVL_SYM_NAME	"_abi_nfd_out_q_lvls_%u%s"
 #define NFP_QLVL_STRIDE		16
 #define NFP_QLVL_BLOG_BYTES	0
 #define NFP_QLVL_BLOG_PKTS	4
 #define NFP_QLVL_THRS		8
+#define NFP_QLVL_ACT		12
 
-#define NFP_QMSTAT_SYM_NAME	"_abi_nfdqm%u_stats"
+#define NFP_QMSTAT_SYM_NAME	"_abi_nfdqm%u_stats%s"
 #define NFP_QMSTAT_STRIDE	32
 #define NFP_QMSTAT_NON_STO	0
 #define NFP_QMSTAT_STO		8
 #define NFP_QMSTAT_DROP		16
 #define NFP_QMSTAT_ECN		24
 
-static unsigned long long
-nfp_abm_q_lvl_thrs(struct nfp_abm_link *alink, unsigned int queue)
-{
-	return alink->abm->q_lvls->addr +
-		(alink->queue_base + queue) * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
-}
+#define NFP_Q_STAT_SYM_NAME	"_abi_nfd_rxq_stats%u%s"
+#define NFP_Q_STAT_STRIDE	16
+#define NFP_Q_STAT_PKTS		0
+#define NFP_Q_STAT_BYTES	8
+
+#define NFP_NET_ABM_MBOX_CMD		NFP_NET_CFG_MBOX_SIMPLE_CMD
+#define NFP_NET_ABM_MBOX_RET		NFP_NET_CFG_MBOX_SIMPLE_RET
+#define NFP_NET_ABM_MBOX_DATALEN	NFP_NET_CFG_MBOX_SIMPLE_VAL
+#define NFP_NET_ABM_MBOX_RESERVED	(NFP_NET_CFG_MBOX_SIMPLE_VAL + 4)
+#define NFP_NET_ABM_MBOX_DATA		(NFP_NET_CFG_MBOX_SIMPLE_VAL + 8)
 
 static int
 nfp_abm_ctrl_stat(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
-		  unsigned int stride, unsigned int offset, unsigned int i,
-		  bool is_u64, u64 *res)
+		  unsigned int stride, unsigned int offset, unsigned int band,
+		  unsigned int queue, bool is_u64, u64 *res)
 {
 	struct nfp_cpp *cpp = alink->abm->app->cpp;
-	u32 val32, mur;
-	u64 val, addr;
+	u64 val, sym_offset;
+	unsigned int qid;
+	u32 val32;
 	int err;
 
-	mur = NFP_CPP_ATOMIC_RD(sym->target, sym->domain);
+	qid = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue;
 
-	addr = sym->addr + (alink->queue_base + i) * stride + offset;
+	sym_offset = qid * stride + offset;
 	if (is_u64)
-		err = nfp_cpp_readq(cpp, mur, addr, &val);
+		err = __nfp_rtsym_readq(cpp, sym, 3, 0, sym_offset, &val);
 	else
-		err = nfp_cpp_readl(cpp, mur, addr, &val32);
+		err = __nfp_rtsym_readl(cpp, sym, 3, 0, sym_offset, &val32);
 	if (err) {
-		nfp_err(cpp,
-			"RED offload reading stat failed on vNIC %d queue %d\n",
-			alink->id, i);
+		nfp_err(cpp, "RED offload reading stat failed on vNIC %d band %d queue %d (+ %d)\n",
+			alink->id, band, queue, alink->queue_base);
 		return err;
 	}
 
@@ -90,176 +72,179 @@
 	return 0;
 }
 
-static int
-nfp_abm_ctrl_stat_all(struct nfp_abm_link *alink, const struct nfp_rtsym *sym,
-		      unsigned int stride, unsigned int offset, bool is_u64,
-		      u64 *res)
+int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val)
 {
-	u64 val, sum = 0;
-	unsigned int i;
+	struct nfp_cpp *cpp = abm->app->cpp;
+	u64 sym_offset;
 	int err;
 
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_stat(alink, sym, stride, offset, i,
-					is_u64, &val);
-		if (err)
-			return err;
-		sum += val;
-	}
+	__clear_bit(id, abm->threshold_undef);
+	if (abm->thresholds[id] == val)
+		return 0;
 
-	*res = sum;
-	return 0;
-}
-
-int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i, u32 val)
-{
-	struct nfp_cpp *cpp = alink->abm->app->cpp;
-	u32 muw;
-	int err;
-
-	muw = NFP_CPP_ATOMIC_WR(alink->abm->q_lvls->target,
-				alink->abm->q_lvls->domain);
-
-	err = nfp_cpp_writel(cpp, muw, nfp_abm_q_lvl_thrs(alink, i), val);
+	sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_THRS;
+	err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, val);
 	if (err) {
-		nfp_err(cpp, "RED offload setting level failed on vNIC %d queue %d\n",
-			alink->id, i);
+		nfp_err(cpp,
+			"RED offload setting level failed on subqueue %d\n",
+			id);
 		return err;
 	}
 
+	abm->thresholds[id] = val;
 	return 0;
 }
 
-int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val)
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int band,
+			   unsigned int queue, u32 val)
 {
-	int i, err;
+	unsigned int threshold;
 
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		err = nfp_abm_ctrl_set_q_lvl(alink, i, val);
-		if (err)
-			return err;
+	threshold = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue;
+
+	return __nfp_abm_ctrl_set_q_lvl(alink->abm, threshold, val);
+}
+
+int __nfp_abm_ctrl_set_q_act(struct nfp_abm *abm, unsigned int id,
+			     enum nfp_abm_q_action act)
+{
+	struct nfp_cpp *cpp = abm->app->cpp;
+	u64 sym_offset;
+	int err;
+
+	if (abm->actions[id] == act)
+		return 0;
+
+	sym_offset = id * NFP_QLVL_STRIDE + NFP_QLVL_ACT;
+	err = __nfp_rtsym_writel(cpp, abm->q_lvls, 4, 0, sym_offset, act);
+	if (err) {
+		nfp_err(cpp,
+			"RED offload setting action failed on subqueue %d\n",
+			id);
+		return err;
 	}
 
+	abm->actions[id] = act;
 	return 0;
 }
 
-u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i)
+int nfp_abm_ctrl_set_q_act(struct nfp_abm_link *alink, unsigned int band,
+			   unsigned int queue, enum nfp_abm_q_action act)
 {
-	u64 val;
+	unsigned int qid;
 
-	if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, NFP_QMSTAT_STRIDE,
-			      NFP_QMSTAT_NON_STO, i, true, &val))
-		return 0;
-	return val;
+	qid = band * NFP_NET_MAX_RX_RINGS + alink->queue_base + queue;
+
+	return __nfp_abm_ctrl_set_q_act(alink->abm, qid, act);
 }
 
-u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i)
+u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int queue)
 {
-	u64 val;
+	unsigned int band;
+	u64 val, sum = 0;
 
-	if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats, NFP_QMSTAT_STRIDE,
-			      NFP_QMSTAT_STO, i, true, &val))
-		return 0;
-	return val;
+	for (band = 0; band < alink->abm->num_bands; band++) {
+		if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				      NFP_QMSTAT_STRIDE, NFP_QMSTAT_NON_STO,
+				      band, queue, true, &val))
+			return 0;
+		sum += val;
+	}
+
+	return sum;
 }
 
-int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
-			      struct nfp_alink_stats *stats)
+u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int queue)
+{
+	unsigned int band;
+	u64 val, sum = 0;
+
+	for (band = 0; band < alink->abm->num_bands; band++) {
+		if (nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
+				      NFP_QMSTAT_STRIDE, NFP_QMSTAT_STO,
+				      band, queue, true, &val))
+			return 0;
+		sum += val;
+	}
+
+	return sum;
+}
+
+static int
+nfp_abm_ctrl_stat_basic(struct nfp_abm_link *alink, unsigned int band,
+			unsigned int queue, unsigned int off, u64 *val)
+{
+	if (!nfp_abm_has_prio(alink->abm)) {
+		if (!band) {
+			unsigned int id = alink->queue_base + queue;
+
+			*val = nn_readq(alink->vnic,
+					NFP_NET_CFG_RXR_STATS(id) + off);
+		} else {
+			*val = 0;
+		}
+
+		return 0;
+	} else {
+		return nfp_abm_ctrl_stat(alink, alink->abm->q_stats,
+					 NFP_Q_STAT_STRIDE, off, band, queue,
+					 true, val);
+	}
+}
+
+int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int band,
+			      unsigned int queue, struct nfp_alink_stats *stats)
 {
 	int err;
 
-	stats->tx_pkts = nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
-	stats->tx_bytes = nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
+	err = nfp_abm_ctrl_stat_basic(alink, band, queue, NFP_Q_STAT_PKTS,
+				      &stats->tx_pkts);
+	if (err)
+		return err;
 
-	err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls,
-				NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
-				i, false, &stats->backlog_bytes);
+	err = nfp_abm_ctrl_stat_basic(alink, band, queue, NFP_Q_STAT_BYTES,
+				      &stats->tx_bytes);
+	if (err)
+		return err;
+
+	err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls, NFP_QLVL_STRIDE,
+				NFP_QLVL_BLOG_BYTES, band, queue, false,
+				&stats->backlog_bytes);
 	if (err)
 		return err;
 
 	err = nfp_abm_ctrl_stat(alink, alink->abm->q_lvls,
 				NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
-				i, false, &stats->backlog_pkts);
+				band, queue, false, &stats->backlog_pkts);
 	if (err)
 		return err;
 
 	err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
 				NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				i, true, &stats->drops);
+				band, queue, true, &stats->drops);
 	if (err)
 		return err;
 
 	return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
 				 NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				 i, true, &stats->overlimits);
+				 band, queue, true, &stats->overlimits);
 }
 
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats)
-{
-	u64 pkts = 0, bytes = 0;
-	int i, err;
-
-	for (i = 0; i < alink->vnic->max_rx_rings; i++) {
-		pkts += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i));
-		bytes += nn_readq(alink->vnic, NFP_NET_CFG_RXR_STATS(i) + 8);
-	}
-	stats->tx_pkts = pkts;
-	stats->tx_bytes = bytes;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_BYTES,
-				    false, &stats->backlog_bytes);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->q_lvls,
-				    NFP_QLVL_STRIDE, NFP_QLVL_BLOG_PKTS,
-				    false, &stats->backlog_pkts);
-	if (err)
-		return err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &stats->drops);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &stats->overlimits);
-}
-
-int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
+int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink,
+			       unsigned int band, unsigned int queue,
 			       struct nfp_alink_xstats *xstats)
 {
 	int err;
 
 	err = nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
 				NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				i, true, &xstats->pdrop);
+				band, queue, true, &xstats->pdrop);
 	if (err)
 		return err;
 
 	return nfp_abm_ctrl_stat(alink, alink->abm->qm_stats,
 				 NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				 i, true, &xstats->ecn_marked);
-}
-
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats)
-{
-	int err;
-
-	err = nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				    NFP_QMSTAT_STRIDE, NFP_QMSTAT_DROP,
-				    true, &xstats->pdrop);
-	if (err)
-		return err;
-
-	return nfp_abm_ctrl_stat_all(alink, alink->abm->qm_stats,
-				     NFP_QMSTAT_STRIDE, NFP_QMSTAT_ECN,
-				     true, &xstats->ecn_marked);
+				 band, queue, true, &xstats->ecn_marked);
 }
 
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm)
@@ -274,10 +259,68 @@
 			    NULL, 0, NULL, 0);
 }
 
-void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
+int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed)
+{
+	const u32 cmd = NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET;
+	struct nfp_net *nn = alink->vnic;
+	unsigned int i;
+	int err;
+
+	err = nfp_net_mbox_lock(nn, alink->abm->prio_map_len);
+	if (err)
+		return err;
+
+	/* Write data_len and wipe reserved */
+	nn_writeq(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATALEN,
+		  alink->abm->prio_map_len);
+
+	for (i = 0; i < alink->abm->prio_map_len; i += sizeof(u32))
+		nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATA + i,
+			  packed[i / sizeof(u32)]);
+
+	err = nfp_net_mbox_reconfig_and_unlock(nn, cmd);
+	if (err)
+		nfp_err(alink->abm->app->cpp,
+			"setting DSCP -> VQ map failed with error %d\n", err);
+	return err;
+}
+
+static int nfp_abm_ctrl_prio_check_params(struct nfp_abm_link *alink)
+{
+	struct nfp_abm *abm = alink->abm;
+	struct nfp_net *nn = alink->vnic;
+	unsigned int min_mbox_sz;
+
+	if (!nfp_abm_has_prio(alink->abm))
+		return 0;
+
+	min_mbox_sz = NFP_NET_ABM_MBOX_DATA + alink->abm->prio_map_len;
+	if (nn->tlv_caps.mbox_len < min_mbox_sz) {
+		nfp_err(abm->app->pf->cpp, "vNIC mailbox too small for prio offload: %u, need: %u\n",
+			nn->tlv_caps.mbox_len,  min_mbox_sz);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink)
 {
 	alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ);
 	alink->queue_base /= alink->vnic->stride_rx;
+
+	return nfp_abm_ctrl_prio_check_params(alink);
+}
+
+static unsigned int nfp_abm_ctrl_prio_map_size(struct nfp_abm *abm)
+{
+	unsigned int size;
+
+	size = roundup_pow_of_two(order_base_2(abm->num_bands));
+	size = DIV_ROUND_UP(size * abm->num_prios, BITS_PER_BYTE);
+	size = round_up(size, sizeof(u32));
+
+	return size;
 }
 
 static const struct nfp_rtsym *
@@ -290,10 +333,10 @@
 		nfp_err(pf->cpp, "Symbol '%s' not found\n", name);
 		return ERR_PTR(-ENOENT);
 	}
-	if (sym->size != size) {
+	if (nfp_rtsym_size(sym) != size) {
 		nfp_err(pf->cpp,
 			"Symbol '%s' wrong size: expected %u got %llu\n",
-			name, size, sym->size);
+			name, size, nfp_rtsym_size(sym));
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -301,33 +344,86 @@
 }
 
 static const struct nfp_rtsym *
-nfp_abm_ctrl_find_q_rtsym(struct nfp_pf *pf, const char *name,
-			  unsigned int size)
+nfp_abm_ctrl_find_q_rtsym(struct nfp_abm *abm, const char *name_fmt,
+			  size_t size)
 {
-	return nfp_abm_ctrl_find_rtsym(pf, name, size * NFP_NET_MAX_RX_RINGS);
+	char pf_symbol[64];
+
+	size = array3_size(size, abm->num_bands, NFP_NET_MAX_RX_RINGS);
+	snprintf(pf_symbol, sizeof(pf_symbol), name_fmt,
+		 abm->pf_id, nfp_abm_has_prio(abm) ? "_per_band" : "");
+
+	return nfp_abm_ctrl_find_rtsym(abm->app->pf, pf_symbol, size);
 }
 
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm)
 {
 	struct nfp_pf *pf = abm->app->pf;
 	const struct nfp_rtsym *sym;
-	unsigned int pf_id;
-	char pf_symbol[64];
+	int res;
 
-	pf_id =	nfp_cppcore_pcie_unit(pf->cpp);
-	abm->pf_id = pf_id;
+	abm->pf_id = nfp_cppcore_pcie_unit(pf->cpp);
 
-	snprintf(pf_symbol, sizeof(pf_symbol), NFP_QLVL_SYM_NAME, pf_id);
-	sym = nfp_abm_ctrl_find_q_rtsym(pf, pf_symbol, NFP_QLVL_STRIDE);
+	/* Check if Qdisc offloads are supported */
+	res = nfp_pf_rtsym_read_optional(pf, NFP_RED_SUPPORT_SYM_NAME, 1);
+	if (res < 0)
+		return res;
+	abm->red_support = res;
+
+	/* Read count of prios and prio bands */
+	res = nfp_pf_rtsym_read_optional(pf, NFP_NUM_BANDS_SYM_NAME, 1);
+	if (res < 0)
+		return res;
+	abm->num_bands = res;
+
+	res = nfp_pf_rtsym_read_optional(pf, NFP_NUM_PRIOS_SYM_NAME, 1);
+	if (res < 0)
+		return res;
+	abm->num_prios = res;
+
+	/* Read available actions */
+	res = nfp_pf_rtsym_read_optional(pf, NFP_ACT_MASK_SYM_NAME,
+					 BIT(NFP_ABM_ACT_MARK_DROP));
+	if (res < 0)
+		return res;
+	abm->action_mask = res;
+
+	abm->prio_map_len = nfp_abm_ctrl_prio_map_size(abm);
+	abm->dscp_mask = GENMASK(7, 8 - order_base_2(abm->num_prios));
+
+	/* Check values are sane, U16_MAX is arbitrarily chosen as max */
+	if (!is_power_of_2(abm->num_bands) || !is_power_of_2(abm->num_prios) ||
+	    abm->num_bands > U16_MAX || abm->num_prios > U16_MAX ||
+	    (abm->num_bands == 1) != (abm->num_prios == 1)) {
+		nfp_err(pf->cpp,
+			"invalid priomap description num bands: %u and num prios: %u\n",
+			abm->num_bands, abm->num_prios);
+		return -EINVAL;
+	}
+
+	/* Find level and stat symbols */
+	if (!abm->red_support)
+		return 0;
+
+	sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_QLVL_SYM_NAME,
+					NFP_QLVL_STRIDE);
 	if (IS_ERR(sym))
 		return PTR_ERR(sym);
 	abm->q_lvls = sym;
 
-	snprintf(pf_symbol, sizeof(pf_symbol), NFP_QMSTAT_SYM_NAME, pf_id);
-	sym = nfp_abm_ctrl_find_q_rtsym(pf, pf_symbol, NFP_QMSTAT_STRIDE);
+	sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_QMSTAT_SYM_NAME,
+					NFP_QMSTAT_STRIDE);
 	if (IS_ERR(sym))
 		return PTR_ERR(sym);
 	abm->qm_stats = sym;
 
+	if (nfp_abm_has_prio(abm)) {
+		sym = nfp_abm_ctrl_find_q_rtsym(abm, NFP_Q_STAT_SYM_NAME,
+						NFP_Q_STAT_STRIDE);
+		if (IS_ERR(sym))
+			return PTR_ERR(sym);
+		abm->q_stats = sym;
+	}
+
 	return 0;
 }

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index b84a6c2..9183b3e 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c

@@ -1,46 +1,14 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
+#include <linux/bitmap.h>
 #include <linux/etherdevice.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
-#include <net/pkt_cls.h>
-#include <net/pkt_sched.h>
-#include <net/red.h>
 
 #include "../nfpcore/nfp.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -59,269 +27,6 @@
 }
 
 static int
-__nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		     u32 handle, unsigned int qs, u32 init_val)
-{
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
-	int ret;
-
-	ret = nfp_abm_ctrl_set_all_q_lvls(alink, init_val);
-	memset(alink->qdiscs, 0, sizeof(*alink->qdiscs) * alink->num_qdiscs);
-
-	alink->parent = handle;
-	alink->num_qdiscs = qs;
-	port->tc_offload_cnt = qs;
-
-	return ret;
-}
-
-static void
-nfp_abm_reset_root(struct net_device *netdev, struct nfp_abm_link *alink,
-		   u32 handle, unsigned int qs)
-{
-	__nfp_abm_reset_root(netdev, alink, handle, qs, ~0);
-}
-
-static int
-nfp_abm_red_find(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	unsigned int i = TC_H_MIN(opt->parent) - 1;
-
-	if (opt->parent == TC_H_ROOT)
-		i = 0;
-	else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent))
-		i = TC_H_MIN(opt->parent) - 1;
-	else
-		return -EOPNOTSUPP;
-
-	if (i >= alink->num_qdiscs || opt->handle != alink->qdiscs[i].handle)
-		return -EOPNOTSUPP;
-
-	return i;
-}
-
-static void
-nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
-		    u32 handle)
-{
-	unsigned int i;
-
-	for (i = 0; i < alink->num_qdiscs; i++)
-		if (handle == alink->qdiscs[i].handle)
-			break;
-	if (i == alink->num_qdiscs)
-		return;
-
-	if (alink->parent == TC_H_ROOT) {
-		nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-	} else {
-		nfp_abm_ctrl_set_q_lvl(alink, i, ~0);
-		memset(&alink->qdiscs[i], 0, sizeof(*alink->qdiscs));
-	}
-}
-
-static int
-nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
-		    struct tc_red_qopt_offload *opt)
-{
-	bool existing;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	existing = i >= 0;
-
-	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {
-		nfp_warn(alink->abm->app->cpp,
-			 "RED offload failed - unsupported parameters\n");
-		err = -EINVAL;
-		goto err_destroy;
-	}
-
-	if (existing) {
-		if (alink->parent == TC_H_ROOT)
-			err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
-		else
-			err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
-		if (err)
-			goto err_destroy;
-		return 0;
-	}
-
-	if (opt->parent == TC_H_ROOT) {
-		i = 0;
-		err = __nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 1,
-					   opt->set.min);
-	} else if (TC_H_MAJ(alink->parent) == TC_H_MAJ(opt->parent)) {
-		i = TC_H_MIN(opt->parent) - 1;
-		err = nfp_abm_ctrl_set_q_lvl(alink, i, opt->set.min);
-	} else {
-		return -EINVAL;
-	}
-	/* Set the handle to try full clean up, in case IO failed */
-	alink->qdiscs[i].handle = opt->handle;
-	if (err)
-		goto err_destroy;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &alink->qdiscs[i].stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i,
-						&alink->qdiscs[i].stats);
-	if (err)
-		goto err_destroy;
-
-	if (opt->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink,
-					       &alink->qdiscs[i].xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i,
-						 &alink->qdiscs[i].xstats);
-	if (err)
-		goto err_destroy;
-
-	alink->qdiscs[i].stats.backlog_pkts = 0;
-	alink->qdiscs[i].stats.backlog_bytes = 0;
-
-	return 0;
-err_destroy:
-	/* If the qdisc keeps on living, but we can't offload undo changes */
-	if (existing) {
-		opt->set.qstats->qlen -= alink->qdiscs[i].stats.backlog_pkts;
-		opt->set.qstats->backlog -=
-			alink->qdiscs[i].stats.backlog_bytes;
-	}
-	nfp_abm_red_destroy(netdev, alink, opt->handle);
-
-	return err;
-}
-
-static void
-nfp_abm_update_stats(struct nfp_alink_stats *new, struct nfp_alink_stats *old,
-		     struct tc_qopt_offload_stats *stats)
-{
-	_bstats_update(stats->bstats, new->tx_bytes - old->tx_bytes,
-		       new->tx_pkts - old->tx_pkts);
-	stats->qstats->qlen += new->backlog_pkts - old->backlog_pkts;
-	stats->qstats->backlog += new->backlog_bytes - old->backlog_bytes;
-	stats->qstats->overlimits += new->overlimits - old->overlimits;
-	stats->qstats->drops += new->drops - old->drops;
-}
-
-static int
-nfp_abm_red_stats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_stats *prev_stats;
-	struct nfp_alink_stats stats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_stats = &alink->qdiscs[i].stats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_stats(alink, &stats);
-	else
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-	if (err)
-		return err;
-
-	nfp_abm_update_stats(&stats, prev_stats, &opt->stats);
-
-	*prev_stats = stats;
-
-	return 0;
-}
-
-static int
-nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
-{
-	struct nfp_alink_xstats *prev_xstats;
-	struct nfp_alink_xstats xstats;
-	int i, err;
-
-	i = nfp_abm_red_find(alink, opt);
-	if (i < 0)
-		return i;
-	prev_xstats = &alink->qdiscs[i].xstats;
-
-	if (alink->parent == TC_H_ROOT)
-		err = nfp_abm_ctrl_read_xstats(alink, &xstats);
-	else
-		err = nfp_abm_ctrl_read_q_xstats(alink, i, &xstats);
-	if (err)
-		return err;
-
-	opt->xstats->forced_mark += xstats.ecn_marked - prev_xstats->ecn_marked;
-	opt->xstats->pdrop += xstats.pdrop - prev_xstats->pdrop;
-
-	*prev_xstats = xstats;
-
-	return 0;
-}
-
-static int
-nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
-		     struct tc_red_qopt_offload *opt)
-{
-	switch (opt->command) {
-	case TC_RED_REPLACE:
-		return nfp_abm_red_replace(netdev, alink, opt);
-	case TC_RED_DESTROY:
-		nfp_abm_red_destroy(netdev, alink, opt->handle);
-		return 0;
-	case TC_RED_STATS:
-		return nfp_abm_red_stats(alink, opt);
-	case TC_RED_XSTATS:
-		return nfp_abm_red_xstats(alink, opt);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int
-nfp_abm_mq_stats(struct nfp_abm_link *alink, struct tc_mq_qopt_offload *opt)
-{
-	struct nfp_alink_stats stats;
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < alink->num_qdiscs; i++) {
-		if (alink->qdiscs[i].handle == TC_H_UNSPEC)
-			continue;
-
-		err = nfp_abm_ctrl_read_q_stats(alink, i, &stats);
-		if (err)
-			return err;
-
-		nfp_abm_update_stats(&stats, &alink->qdiscs[i].stats,
-				     &opt->stats);
-	}
-
-	return 0;
-}
-
-static int
-nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
-		    struct tc_mq_qopt_offload *opt)
-{
-	switch (opt->command) {
-	case TC_MQ_CREATE:
-		nfp_abm_reset_root(netdev, alink, opt->handle,
-				   alink->total_queues);
-		return 0;
-	case TC_MQ_DESTROY:
-		if (opt->handle == alink->parent)
-			nfp_abm_reset_root(netdev, alink, TC_H_ROOT, 0);
-		return 0;
-	case TC_MQ_STATS:
-		return nfp_abm_mq_stats(alink, opt);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int
 nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
 		 enum tc_setup_type type, void *type_data)
 {
@@ -333,16 +38,23 @@
 		return -EOPNOTSUPP;
 
 	switch (type) {
+	case TC_SETUP_ROOT_QDISC:
+		return nfp_abm_setup_root(netdev, repr->app_priv, type_data);
 	case TC_SETUP_QDISC_MQ:
 		return nfp_abm_setup_tc_mq(netdev, repr->app_priv, type_data);
 	case TC_SETUP_QDISC_RED:
 		return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data);
+	case TC_SETUP_QDISC_GRED:
+		return nfp_abm_setup_tc_gred(netdev, repr->app_priv, type_data);
+	case TC_SETUP_BLOCK:
+		return nfp_abm_setup_cls_block(netdev, repr, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
+static struct net_device *
+nfp_abm_repr_get(struct nfp_app *app, u32 port_id, bool *redir_egress)
 {
 	enum nfp_repr_type rtype;
 	struct nfp_reprs *reprs;
@@ -415,7 +127,9 @@
 
 	reprs = nfp_reprs_get_locked(app, rtype);
 	WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr");
+	rtnl_lock();
 	rcu_assign_pointer(reprs->reprs[alink->id], netdev);
+	rtnl_unlock();
 
 	nfp_info(app->cpp, "%s Port %d Representor(%s) created\n",
 		 ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys",
@@ -441,7 +155,9 @@
 	netdev = nfp_repr_get_locked(app, reprs, alink->id);
 	if (!netdev)
 		return;
+	rtnl_lock();
 	rcu_assign_pointer(reprs->reprs[alink->id], NULL);
+	rtnl_unlock();
 	synchronize_rcu();
 	/* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */
 	nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev));
@@ -492,6 +208,9 @@
 	struct nfp_net *nn;
 	int err;
 
+	if (!abm->red_support)
+		return -EOPNOTSUPP;
+
 	err = nfp_abm_ctrl_qm_enable(abm);
 	if (err)
 		return err;
@@ -540,8 +259,9 @@
 {
 	struct nfp_eth_table_port *eth_port = &pf->eth_tbl->ports[id];
 	u8 mac_addr[ETH_ALEN];
-	const char *mac_str;
-	char name[32];
+	struct nfp_nsp *nsp;
+	char hwinfo[32];
+	int err;
 
 	if (id > pf->eth_tbl->count) {
 		nfp_warn(pf->cpp, "No entry for persistent MAC address\n");
@@ -549,22 +269,37 @@
 		return;
 	}
 
-	snprintf(name, sizeof(name), "eth%u.mac.pf%u",
+	snprintf(hwinfo, sizeof(hwinfo), "eth%u.mac.pf%u",
 		 eth_port->eth_index, abm->pf_id);
 
-	mac_str = nfp_hwinfo_lookup(pf->hwinfo, name);
-	if (!mac_str) {
-		nfp_warn(pf->cpp, "Can't lookup persistent MAC address (%s)\n",
-			 name);
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		nfp_warn(pf->cpp, "Failed to access the NSP for persistent MAC address: %ld\n",
+			 PTR_ERR(nsp));
 		eth_hw_addr_random(nn->dp.netdev);
 		return;
 	}
 
-	if (sscanf(mac_str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+	if (!nfp_nsp_has_hwinfo_lookup(nsp)) {
+		nfp_warn(pf->cpp, "NSP doesn't support PF MAC generation\n");
+		eth_hw_addr_random(nn->dp.netdev);
+		return;
+	}
+
+	err = nfp_nsp_hwinfo_lookup(nsp, hwinfo, sizeof(hwinfo));
+	nfp_nsp_close(nsp);
+	if (err) {
+		nfp_warn(pf->cpp, "Reading persistent MAC address failed: %d\n",
+			 err);
+		eth_hw_addr_random(nn->dp.netdev);
+		return;
+	}
+
+	if (sscanf(hwinfo, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
 		   &mac_addr[0], &mac_addr[1], &mac_addr[2],
 		   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) {
 		nfp_warn(pf->cpp, "Can't parse persistent MAC address (%s)\n",
-			 mac_str);
+			 hwinfo);
 		eth_hw_addr_random(nn->dp.netdev);
 		return;
 	}
@@ -588,31 +323,34 @@
 	alink->abm = abm;
 	alink->vnic = nn;
 	alink->id = id;
-	alink->parent = TC_H_ROOT;
 	alink->total_queues = alink->vnic->max_rx_rings;
-	alink->qdiscs = kvcalloc(alink->total_queues, sizeof(*alink->qdiscs),
-				 GFP_KERNEL);
-	if (!alink->qdiscs) {
-		err = -ENOMEM;
+
+	INIT_LIST_HEAD(&alink->dscp_map);
+
+	err = nfp_abm_ctrl_read_params(alink);
+	if (err)
 		goto err_free_alink;
-	}
+
+	alink->prio_map = kzalloc(abm->prio_map_len, GFP_KERNEL);
+	if (!alink->prio_map)
+		goto err_free_alink;
 
 	/* This is a multi-host app, make sure MAC/PHY is up, but don't
 	 * make the MAC/PHY state follow the state of any of the ports.
 	 */
 	err = nfp_eth_set_configured(app->cpp, eth_port->index, true);
 	if (err < 0)
-		goto err_free_qdiscs;
+		goto err_free_priomap;
 
 	netif_keep_dst(nn->dp.netdev);
 
 	nfp_abm_vnic_set_mac(app->pf, abm, nn, id);
-	nfp_abm_ctrl_read_params(alink);
+	INIT_RADIX_TREE(&alink->qdiscs, GFP_KERNEL);
 
 	return 0;
 
-err_free_qdiscs:
-	kvfree(alink->qdiscs);
+err_free_priomap:
+	kfree(alink->prio_map);
 err_free_alink:
 	kfree(alink);
 	return err;
@@ -623,10 +361,20 @@
 	struct nfp_abm_link *alink = nn->app_priv;
 
 	nfp_abm_kill_reprs(alink->abm, alink);
-	kvfree(alink->qdiscs);
+	WARN(!radix_tree_empty(&alink->qdiscs), "left over qdiscs\n");
+	kfree(alink->prio_map);
 	kfree(alink);
 }
 
+static int nfp_abm_vnic_init(struct nfp_app *app, struct nfp_net *nn)
+{
+	struct nfp_abm_link *alink = nn->app_priv;
+
+	if (nfp_abm_has_prio(alink->abm))
+		return nfp_abm_ctrl_prio_map_update(alink, alink->prio_map);
+	return 0;
+}
+
 static u64 *
 nfp_abm_port_get_stats(struct nfp_app *app, struct nfp_port *port, u64 *data)
 {
@@ -674,6 +422,21 @@
 	return data;
 }
 
+static int nfp_abm_fw_init_reset(struct nfp_abm *abm)
+{
+	unsigned int i;
+
+	if (!abm->red_support)
+		return 0;
+
+	for (i = 0; i < abm->num_bands * NFP_NET_MAX_RX_RINGS; i++) {
+		__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+		__nfp_abm_ctrl_set_q_act(abm, i, NFP_ABM_ACT_DROP);
+	}
+
+	return nfp_abm_ctrl_qm_disable(abm);
+}
+
 static int nfp_abm_init(struct nfp_app *app)
 {
 	struct nfp_pf *pf = app->pf;
@@ -705,15 +468,31 @@
 	if (err)
 		goto err_free_abm;
 
-	/* We start in legacy mode, make sure advanced queuing is disabled */
-	err = nfp_abm_ctrl_qm_disable(abm);
-	if (err)
+	err = -ENOMEM;
+	abm->num_thresholds = array_size(abm->num_bands, NFP_NET_MAX_RX_RINGS);
+	abm->threshold_undef = bitmap_zalloc(abm->num_thresholds, GFP_KERNEL);
+	if (!abm->threshold_undef)
 		goto err_free_abm;
 
+	abm->thresholds = kvcalloc(abm->num_thresholds,
+				   sizeof(*abm->thresholds), GFP_KERNEL);
+	if (!abm->thresholds)
+		goto err_free_thresh_umap;
+
+	abm->actions = kvcalloc(abm->num_thresholds, sizeof(*abm->actions),
+				GFP_KERNEL);
+	if (!abm->actions)
+		goto err_free_thresh;
+
+	/* We start in legacy mode, make sure advanced queuing is disabled */
+	err = nfp_abm_fw_init_reset(abm);
+	if (err)
+		goto err_free_act;
+
 	err = -ENOMEM;
 	reprs = nfp_reprs_alloc(pf->max_data_vnics);
 	if (!reprs)
-		goto err_free_abm;
+		goto err_free_act;
 	RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs);
 
 	reprs = nfp_reprs_alloc(pf->max_data_vnics);
@@ -725,6 +504,12 @@
 
 err_free_phys:
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_free_act:
+	kvfree(abm->actions);
+err_free_thresh:
+	kvfree(abm->thresholds);
+err_free_thresh_umap:
+	bitmap_free(abm->threshold_undef);
 err_free_abm:
 	kfree(abm);
 	app->priv = NULL;
@@ -738,6 +523,9 @@
 	nfp_abm_eswitch_clean_up(abm);
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+	bitmap_free(abm->threshold_undef);
+	kvfree(abm->actions);
+	kvfree(abm->thresholds);
 	kfree(abm);
 	app->priv = NULL;
 }
@@ -751,6 +539,7 @@
 
 	.vnic_alloc	= nfp_abm_vnic_alloc,
 	.vnic_free	= nfp_abm_vnic_free,
+	.vnic_init	= nfp_abm_vnic_init,
 
 	.port_get_stats		= nfp_abm_port_get_stats,
 	.port_get_stats_count	= nfp_abm_port_get_stats_count,
@@ -761,5 +550,5 @@
 	.eswitch_mode_get	= nfp_abm_eswitch_mode_get,
 	.eswitch_mode_set	= nfp_abm_eswitch_mode_set,
 
-	.repr_get	= nfp_abm_repr_get,
+	.dev_get	= nfp_abm_repr_get,
 };

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 934a708..48746c9 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h

@@ -1,41 +1,22 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #ifndef __NFP_ABM_H__
 #define __NFP_ABM_H__ 1
 
+#include <linux/bits.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
 #include <net/devlink.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+
+/* Dump of 64 PRIOs and 256 REDs seems to take 850us on Xeon v4 @ 2.20GHz;
+ * 2.5ms / 400Hz seems more than sufficient for stats resolution.
+ */
+#define NFP_ABM_STATS_REFRESH_IVAL	(2500 * 1000) /* ns */
+
+#define NFP_ABM_LVL_INFINITY		S32_MAX
 
 struct nfp_app;
 struct nfp_net;
@@ -43,21 +24,64 @@
 #define NFP_ABM_PORTID_TYPE	GENMASK(23, 16)
 #define NFP_ABM_PORTID_ID	GENMASK(7, 0)
 
+/* The possible actions if thresholds are exceeded */
+enum nfp_abm_q_action {
+	/* mark if ECN capable, otherwise drop */
+	NFP_ABM_ACT_MARK_DROP		= 0,
+	/* mark if ECN capable, otherwise goto QM */
+	NFP_ABM_ACT_MARK_QUEUE		= 1,
+	NFP_ABM_ACT_DROP		= 2,
+	NFP_ABM_ACT_QUEUE		= 3,
+	NFP_ABM_ACT_NOQUEUE		= 4,
+};
+
 /**
  * struct nfp_abm - ABM NIC app structure
  * @app:	back pointer to nfp_app
  * @pf_id:	ID of our PF link
+ *
+ * @red_support:	is RED offload supported
+ * @num_prios:	number of supported DSCP priorities
+ * @num_bands:	number of supported DSCP priority bands
+ * @action_mask:	bitmask of supported actions
+ *
+ * @thresholds:		current threshold configuration
+ * @threshold_undef:	bitmap of thresholds which have not been set
+ * @actions:		current FW action configuration
+ * @num_thresholds:	number of @thresholds and bits in @threshold_undef
+ *
+ * @prio_map_len:	computed length of FW priority map (in bytes)
+ * @dscp_mask:		mask FW will apply on DSCP field
+ *
  * @eswitch_mode:	devlink eswitch mode, advanced functions only visible
  *			in switchdev mode
+ *
  * @q_lvls:	queue level control area
  * @qm_stats:	queue statistics symbol
+ * @q_stats:	basic queue statistics (only in per-band case)
  */
 struct nfp_abm {
 	struct nfp_app *app;
 	unsigned int pf_id;
+
+	unsigned int red_support;
+	unsigned int num_prios;
+	unsigned int num_bands;
+	unsigned int action_mask;
+
+	u32 *thresholds;
+	unsigned long *threshold_undef;
+	u8 *actions;
+	size_t num_thresholds;
+
+	unsigned int prio_map_len;
+	u8 dscp_mask;
+
 	enum devlink_eswitch_mode eswitch_mode;
+
 	const struct nfp_rtsym *q_lvls;
 	const struct nfp_rtsym *qm_stats;
+	const struct nfp_rtsym *q_stats;
 };
 
 /**
@@ -88,16 +112,76 @@
 	u64 pdrop;
 };
 
+enum nfp_qdisc_type {
+	NFP_QDISC_NONE = 0,
+	NFP_QDISC_MQ,
+	NFP_QDISC_RED,
+	NFP_QDISC_GRED,
+};
+
+#define NFP_QDISC_UNTRACKED	((struct nfp_qdisc *)1UL)
+
 /**
- * struct nfp_red_qdisc - representation of single RED Qdisc
- * @handle:	handle of currently offloaded RED Qdisc
- * @stats:	statistics from last refresh
- * @xstats:	base of extended statistics
+ * struct nfp_qdisc - tracked TC Qdisc
+ * @netdev:		netdev on which Qdisc was created
+ * @type:		Qdisc type
+ * @handle:		handle of this Qdisc
+ * @parent_handle:	handle of the parent (unreliable if Qdisc was grafted)
+ * @use_cnt:		number of attachment points in the hierarchy
+ * @num_children:	current size of the @children array
+ * @children:		pointers to children
+ *
+ * @params_ok:		parameters of this Qdisc are OK for offload
+ * @offload_mark:	offload refresh state - selected for offload
+ * @offloaded:		Qdisc is currently offloaded to the HW
+ *
+ * @mq:			MQ Qdisc specific parameters and state
+ * @mq.stats:		current stats of the MQ Qdisc
+ * @mq.prev_stats:	previously reported @mq.stats
+ *
+ * @red:		RED Qdisc specific parameters and state
+ * @red.num_bands:	Number of valid entries in the @red.band table
+ * @red.band:		Per-band array of RED instances
+ * @red.band.ecn:		ECN marking is enabled (rather than drop)
+ * @red.band.threshold:		ECN marking threshold
+ * @red.band.stats:		current stats of the RED Qdisc
+ * @red.band.prev_stats:	previously reported @red.stats
+ * @red.band.xstats:		extended stats for RED - current
+ * @red.band.prev_xstats:	extended stats for RED - previously reported
  */
-struct nfp_red_qdisc {
+struct nfp_qdisc {
+	struct net_device *netdev;
+	enum nfp_qdisc_type type;
 	u32 handle;
-	struct nfp_alink_stats stats;
-	struct nfp_alink_xstats xstats;
+	u32 parent_handle;
+	unsigned int use_cnt;
+	unsigned int num_children;
+	struct nfp_qdisc **children;
+
+	bool params_ok;
+	bool offload_mark;
+	bool offloaded;
+
+	union {
+		/* NFP_QDISC_MQ */
+		struct {
+			struct nfp_alink_stats stats;
+			struct nfp_alink_stats prev_stats;
+		} mq;
+		/* TC_SETUP_QDISC_RED, TC_SETUP_QDISC_GRED */
+		struct {
+			unsigned int num_bands;
+
+			struct {
+				bool ecn;
+				u32 threshold;
+				struct nfp_alink_stats stats;
+				struct nfp_alink_stats prev_stats;
+				struct nfp_alink_xstats xstats;
+				struct nfp_alink_xstats prev_xstats;
+			} band[MAX_DPs];
+		} red;
+	};
 };
 
 /**
@@ -107,9 +191,17 @@
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
  * @total_queues:	number of PF queues
- * @parent:	handle of expected parent, i.e. handle of MQ, or TC_H_ROOT
- * @num_qdiscs:	number of currently used qdiscs
- * @qdiscs:	array of qdiscs
+ *
+ * @last_stats_update:	ktime of last stats update
+ *
+ * @prio_map:		current map of priorities
+ * @has_prio:		@prio_map is valid
+ *
+ * @def_band:		default band to use
+ * @dscp_map:		list of DSCP to band mappings
+ *
+ * @root_qdisc:	pointer to the current root of the Qdisc hierarchy
+ * @qdiscs:	all qdiscs recorded by major part of the handle
  */
 struct nfp_abm_link {
 	struct nfp_abm *abm;
@@ -117,26 +209,65 @@
 	unsigned int id;
 	unsigned int queue_base;
 	unsigned int total_queues;
-	u32 parent;
-	unsigned int num_qdiscs;
-	struct nfp_red_qdisc *qdiscs;
+
+	u64 last_stats_update;
+
+	u32 *prio_map;
+	bool has_prio;
+
+	u8 def_band;
+	struct list_head dscp_map;
+
+	struct nfp_qdisc *root_qdisc;
+	struct radix_tree_root qdiscs;
 };
 
-void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
+static inline bool nfp_abm_has_prio(struct nfp_abm *abm)
+{
+	return abm->num_bands > 1;
+}
+
+static inline bool nfp_abm_has_drop(struct nfp_abm *abm)
+{
+	return abm->action_mask & BIT(NFP_ABM_ACT_DROP);
+}
+
+static inline bool nfp_abm_has_mark(struct nfp_abm *abm)
+{
+	return abm->action_mask & BIT(NFP_ABM_ACT_MARK_DROP);
+}
+
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink);
+int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct tc_root_qopt_offload *opt);
+int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
+			 struct tc_red_qopt_offload *opt);
+int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+			struct tc_mq_qopt_offload *opt);
+int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink,
+			  struct tc_gred_qopt_offload *opt);
+int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr,
+			    struct flow_block_offload *opt);
+
+int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
 int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm);
-int nfp_abm_ctrl_set_all_q_lvls(struct nfp_abm_link *alink, u32 val);
-int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int i,
-			   u32 val);
-int nfp_abm_ctrl_read_stats(struct nfp_abm_link *alink,
-			    struct nfp_alink_stats *stats);
-int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink, unsigned int i,
+int __nfp_abm_ctrl_set_q_lvl(struct nfp_abm *abm, unsigned int id, u32 val);
+int nfp_abm_ctrl_set_q_lvl(struct nfp_abm_link *alink, unsigned int band,
+			   unsigned int queue, u32 val);
+int __nfp_abm_ctrl_set_q_act(struct nfp_abm *abm, unsigned int id,
+			     enum nfp_abm_q_action act);
+int nfp_abm_ctrl_set_q_act(struct nfp_abm_link *alink, unsigned int band,
+			   unsigned int queue, enum nfp_abm_q_action act);
+int nfp_abm_ctrl_read_q_stats(struct nfp_abm_link *alink,
+			      unsigned int band, unsigned int queue,
 			      struct nfp_alink_stats *stats);
-int nfp_abm_ctrl_read_xstats(struct nfp_abm_link *alink,
-			     struct nfp_alink_xstats *xstats);
-int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink, unsigned int i,
+int nfp_abm_ctrl_read_q_xstats(struct nfp_abm_link *alink,
+			       unsigned int band, unsigned int queue,
 			       struct nfp_alink_xstats *xstats);
 u64 nfp_abm_ctrl_stat_non_sto(struct nfp_abm_link *alink, unsigned int i);
 u64 nfp_abm_ctrl_stat_sto(struct nfp_abm_link *alink, unsigned int i);
 int nfp_abm_ctrl_qm_enable(struct nfp_abm *abm);
 int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm);
+void nfp_abm_prio_map_update(struct nfp_abm *abm);
+int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed);
 #endif

diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
new file mode 100644
index 0000000..2473fb5
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c

@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+
+#include <linux/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/red.h>
+
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_port.h"
+#include "main.h"
+
+static bool nfp_abm_qdisc_is_red(struct nfp_qdisc *qdisc)
+{
+	return qdisc->type == NFP_QDISC_RED || qdisc->type == NFP_QDISC_GRED;
+}
+
+static bool nfp_abm_qdisc_child_valid(struct nfp_qdisc *qdisc, unsigned int id)
+{
+	return qdisc->children[id] &&
+	       qdisc->children[id] != NFP_QDISC_UNTRACKED;
+}
+
+static void *nfp_abm_qdisc_tree_deref_slot(void __rcu **slot)
+{
+	return rtnl_dereference(*slot);
+}
+
+static void
+nfp_abm_stats_propagate(struct nfp_alink_stats *parent,
+			struct nfp_alink_stats *child)
+{
+	parent->tx_pkts		+= child->tx_pkts;
+	parent->tx_bytes	+= child->tx_bytes;
+	parent->backlog_pkts	+= child->backlog_pkts;
+	parent->backlog_bytes	+= child->backlog_bytes;
+	parent->overlimits	+= child->overlimits;
+	parent->drops		+= child->drops;
+}
+
+static void
+nfp_abm_stats_update_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			 unsigned int queue)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	unsigned int i;
+	int err;
+
+	if (!qdisc->offloaded)
+		return;
+
+	for (i = 0; i < qdisc->red.num_bands; i++) {
+		err = nfp_abm_ctrl_read_q_stats(alink, i, queue,
+						&qdisc->red.band[i].stats);
+		if (err)
+			nfp_err(cpp, "RED stats (%d, %d) read failed with error %d\n",
+				i, queue, err);
+
+		err = nfp_abm_ctrl_read_q_xstats(alink, i, queue,
+						 &qdisc->red.band[i].xstats);
+		if (err)
+			nfp_err(cpp, "RED xstats (%d, %d) read failed with error %d\n",
+				i, queue, err);
+	}
+}
+
+static void
+nfp_abm_stats_update_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	if (qdisc->type != NFP_QDISC_MQ)
+		return;
+
+	for (i = 0; i < alink->total_queues; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i))
+			nfp_abm_stats_update_red(alink, qdisc->children[i], i);
+}
+
+static void __nfp_abm_stats_update(struct nfp_abm_link *alink, u64 time_now)
+{
+	alink->last_stats_update = time_now;
+	if (alink->root_qdisc)
+		nfp_abm_stats_update_mq(alink, alink->root_qdisc);
+}
+
+static void nfp_abm_stats_update(struct nfp_abm_link *alink)
+{
+	u64 now;
+
+	/* Limit the frequency of updates - stats of non-leaf qdiscs are a sum
+	 * of all their leafs, so we would read the same stat multiple times
+	 * for every dump.
+	 */
+	now = ktime_get();
+	if (now - alink->last_stats_update < NFP_ABM_STATS_REFRESH_IVAL)
+		return;
+
+	__nfp_abm_stats_update(alink, now);
+}
+
+static void
+nfp_abm_qdisc_unlink_children(struct nfp_qdisc *qdisc,
+			      unsigned int start, unsigned int end)
+{
+	unsigned int i;
+
+	for (i = start; i < end; i++)
+		if (nfp_abm_qdisc_child_valid(qdisc, i)) {
+			qdisc->children[i]->use_cnt--;
+			qdisc->children[i] = NULL;
+		}
+}
+
+static void
+nfp_abm_qdisc_offload_stop(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	/* Don't complain when qdisc is getting unlinked */
+	if (qdisc->use_cnt)
+		nfp_warn(alink->abm->app->cpp, "Offload of '%08x' stopped\n",
+			 qdisc->handle);
+
+	if (!nfp_abm_qdisc_is_red(qdisc))
+		return;
+
+	for (i = 0; i < qdisc->red.num_bands; i++) {
+		qdisc->red.band[i].stats.backlog_pkts = 0;
+		qdisc->red.band[i].stats.backlog_bytes = 0;
+	}
+}
+
+static int
+__nfp_abm_stats_init(struct nfp_abm_link *alink, unsigned int band,
+		     unsigned int queue, struct nfp_alink_stats *prev_stats,
+		     struct nfp_alink_xstats *prev_xstats)
+{
+	u64 backlog_pkts, backlog_bytes;
+	int err;
+
+	/* Don't touch the backlog, backlog can only be reset after it has
+	 * been reported back to the tc qdisc stats.
+	 */
+	backlog_pkts = prev_stats->backlog_pkts;
+	backlog_bytes = prev_stats->backlog_bytes;
+
+	err = nfp_abm_ctrl_read_q_stats(alink, band, queue, prev_stats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED stats init (%d, %d) failed with error %d\n",
+			band, queue, err);
+		return err;
+	}
+
+	err = nfp_abm_ctrl_read_q_xstats(alink, band, queue, prev_xstats);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"RED xstats init (%d, %d) failed with error %d\n",
+			band, queue, err);
+		return err;
+	}
+
+	prev_stats->backlog_pkts = backlog_pkts;
+	prev_stats->backlog_bytes = backlog_bytes;
+	return 0;
+}
+
+static int
+nfp_abm_stats_init(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+		   unsigned int queue)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < qdisc->red.num_bands; i++) {
+		err = __nfp_abm_stats_init(alink, i, queue,
+					   &qdisc->red.band[i].prev_stats,
+					   &qdisc->red.band[i].prev_xstats);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void
+nfp_abm_offload_compile_red(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc,
+			    unsigned int queue)
+{
+	bool good_red, good_gred;
+	unsigned int i;
+
+	good_red = qdisc->type == NFP_QDISC_RED &&
+		   qdisc->params_ok &&
+		   qdisc->use_cnt == 1 &&
+		   !alink->has_prio &&
+		   !qdisc->children[0];
+	good_gred = qdisc->type == NFP_QDISC_GRED &&
+		    qdisc->params_ok &&
+		    qdisc->use_cnt == 1;
+	qdisc->offload_mark = good_red || good_gred;
+
+	/* If we are starting offload init prev_stats */
+	if (qdisc->offload_mark && !qdisc->offloaded)
+		if (nfp_abm_stats_init(alink, qdisc, queue))
+			qdisc->offload_mark = false;
+
+	if (!qdisc->offload_mark)
+		return;
+
+	for (i = 0; i < alink->abm->num_bands; i++) {
+		enum nfp_abm_q_action act;
+
+		nfp_abm_ctrl_set_q_lvl(alink, i, queue,
+				       qdisc->red.band[i].threshold);
+		act = qdisc->red.band[i].ecn ?
+			NFP_ABM_ACT_MARK_DROP : NFP_ABM_ACT_DROP;
+		nfp_abm_ctrl_set_q_act(alink, i, queue, act);
+	}
+}
+
+static void
+nfp_abm_offload_compile_mq(struct nfp_abm_link *alink, struct nfp_qdisc *qdisc)
+{
+	unsigned int i;
+
+	qdisc->offload_mark = qdisc->type == NFP_QDISC_MQ;
+	if (!qdisc->offload_mark)
+		return;
+
+	for (i = 0; i < alink->total_queues; i++) {
+		struct nfp_qdisc *child = qdisc->children[i];
+
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
+			continue;
+
+		nfp_abm_offload_compile_red(alink, child, i);
+	}
+}
+
+void nfp_abm_qdisc_offload_update(struct nfp_abm_link *alink)
+{
+	struct nfp_abm *abm = alink->abm;
+	struct radix_tree_iter iter;
+	struct nfp_qdisc *qdisc;
+	void __rcu **slot;
+	size_t i;
+
+	/* Mark all thresholds as unconfigured */
+	for (i = 0; i < abm->num_bands; i++)
+		__bitmap_set(abm->threshold_undef,
+			     i * NFP_NET_MAX_RX_RINGS + alink->queue_base,
+			     alink->total_queues);
+
+	/* Clear offload marks */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		qdisc->offload_mark = false;
+	}
+
+	if (alink->root_qdisc)
+		nfp_abm_offload_compile_mq(alink, alink->root_qdisc);
+
+	/* Refresh offload status */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		qdisc = nfp_abm_qdisc_tree_deref_slot(slot);
+		if (!qdisc->offload_mark && qdisc->offloaded)
+			nfp_abm_qdisc_offload_stop(alink, qdisc);
+		qdisc->offloaded = qdisc->offload_mark;
+	}
+
+	/* Reset the unconfigured thresholds */
+	for (i = 0; i < abm->num_thresholds; i++)
+		if (test_bit(i, abm->threshold_undef))
+			__nfp_abm_ctrl_set_q_lvl(abm, i, NFP_ABM_LVL_INFINITY);
+
+	__nfp_abm_stats_update(alink, ktime_get());
+}
+
+static void
+nfp_abm_qdisc_clear_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct nfp_qdisc *qdisc)
+{
+	struct radix_tree_iter iter;
+	unsigned int mq_refs = 0;
+	void __rcu **slot;
+
+	if (!qdisc->use_cnt)
+		return;
+	/* MQ doesn't notify well on destruction, we need special handling of
+	 * MQ's children.
+	 */
+	if (qdisc->type == NFP_QDISC_MQ &&
+	    qdisc == alink->root_qdisc &&
+	    netdev->reg_state == NETREG_UNREGISTERING)
+		return;
+
+	/* Count refs held by MQ instances and clear pointers */
+	radix_tree_for_each_slot(slot, &alink->qdiscs, &iter, 0) {
+		struct nfp_qdisc *mq = nfp_abm_qdisc_tree_deref_slot(slot);
+		unsigned int i;
+
+		if (mq->type != NFP_QDISC_MQ || mq->netdev != netdev)
+			continue;
+		for (i = 0; i < mq->num_children; i++)
+			if (mq->children[i] == qdisc) {
+				mq->children[i] = NULL;
+				mq_refs++;
+			}
+	}
+
+	WARN(qdisc->use_cnt != mq_refs, "non-zero qdisc use count: %d (- %d)\n",
+	     qdisc->use_cnt, mq_refs);
+}
+
+static void
+nfp_abm_qdisc_free(struct net_device *netdev, struct nfp_abm_link *alink,
+		   struct nfp_qdisc *qdisc)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	if (!qdisc)
+		return;
+	nfp_abm_qdisc_clear_mq(netdev, alink, qdisc);
+	WARN_ON(radix_tree_delete(&alink->qdiscs,
+				  TC_H_MAJ(qdisc->handle)) != qdisc);
+
+	kfree(qdisc->children);
+	kfree(qdisc);
+
+	port->tc_offload_cnt--;
+}
+
+static struct nfp_qdisc *
+nfp_abm_qdisc_alloc(struct net_device *netdev, struct nfp_abm_link *alink,
+		    enum nfp_qdisc_type type, u32 parent_handle, u32 handle,
+		    unsigned int children)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	struct nfp_qdisc *qdisc;
+	int err;
+
+	qdisc = kzalloc(sizeof(*qdisc), GFP_KERNEL);
+	if (!qdisc)
+		return NULL;
+
+	if (children) {
+		qdisc->children = kcalloc(children, sizeof(void *), GFP_KERNEL);
+		if (!qdisc->children)
+			goto err_free_qdisc;
+	}
+
+	qdisc->netdev = netdev;
+	qdisc->type = type;
+	qdisc->parent_handle = parent_handle;
+	qdisc->handle = handle;
+	qdisc->num_children = children;
+
+	err = radix_tree_insert(&alink->qdiscs, TC_H_MAJ(qdisc->handle), qdisc);
+	if (err) {
+		nfp_err(alink->abm->app->cpp,
+			"Qdisc insertion into radix tree failed: %d\n", err);
+		goto err_free_child_tbl;
+	}
+
+	port->tc_offload_cnt++;
+	return qdisc;
+
+err_free_child_tbl:
+	kfree(qdisc->children);
+err_free_qdisc:
+	kfree(qdisc);
+	return NULL;
+}
+
+static struct nfp_qdisc *
+nfp_abm_qdisc_find(struct nfp_abm_link *alink, u32 handle)
+{
+	return radix_tree_lookup(&alink->qdiscs, TC_H_MAJ(handle));
+}
+
+static int
+nfp_abm_qdisc_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		      enum nfp_qdisc_type type, u32 parent_handle, u32 handle,
+		      unsigned int children, struct nfp_qdisc **qdisc)
+{
+	*qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (*qdisc) {
+		if (WARN_ON((*qdisc)->type != type))
+			return -EINVAL;
+		return 1;
+	}
+
+	*qdisc = nfp_abm_qdisc_alloc(netdev, alink, type, parent_handle, handle,
+				     children);
+	return *qdisc ? 0 : -ENOMEM;
+}
+
+static void
+nfp_abm_qdisc_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
+		      u32 handle)
+{
+	struct nfp_qdisc *qdisc;
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return;
+
+	/* We don't get TC_SETUP_ROOT_QDISC w/ MQ when netdev is unregistered */
+	if (alink->root_qdisc == qdisc)
+		qdisc->use_cnt--;
+
+	nfp_abm_qdisc_unlink_children(qdisc, 0, qdisc->num_children);
+	nfp_abm_qdisc_free(netdev, alink, qdisc);
+
+	if (alink->root_qdisc == qdisc) {
+		alink->root_qdisc = NULL;
+		/* Only root change matters, other changes are acted upon on
+		 * the graft notification.
+		 */
+		nfp_abm_qdisc_offload_update(alink);
+	}
+}
+
+static int
+nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
+		    unsigned int id)
+{
+	struct nfp_qdisc *parent, *child;
+
+	parent = nfp_abm_qdisc_find(alink, handle);
+	if (!parent)
+		return 0;
+
+	if (WARN(id >= parent->num_children,
+		 "graft child out of bound %d >= %d\n",
+		 id, parent->num_children))
+		return -EINVAL;
+
+	nfp_abm_qdisc_unlink_children(parent, id, id + 1);
+
+	child = nfp_abm_qdisc_find(alink, child_handle);
+	if (child)
+		child->use_cnt++;
+	else
+		child = NFP_QDISC_UNTRACKED;
+	parent->children[id] = child;
+
+	nfp_abm_qdisc_offload_update(alink);
+
+	return 0;
+}
+
+static void
+nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+			struct nfp_alink_stats *old,
+			struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_queue *qstats)
+{
+	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+		       new->tx_pkts - old->tx_pkts);
+	qstats->qlen += new->backlog_pkts - old->backlog_pkts;
+	qstats->backlog += new->backlog_bytes - old->backlog_bytes;
+	qstats->overlimits += new->overlimits - old->overlimits;
+	qstats->drops += new->drops - old->drops;
+}
+
+static void
+nfp_abm_stats_red_calculate(struct nfp_alink_xstats *new,
+			    struct nfp_alink_xstats *old,
+			    struct red_stats *stats)
+{
+	stats->forced_mark += new->ecn_marked - old->ecn_marked;
+	stats->pdrop += new->pdrop - old->pdrop;
+}
+
+static int
+nfp_abm_gred_stats(struct nfp_abm_link *alink, u32 handle,
+		   struct tc_gred_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc;
+	unsigned int i;
+
+	nfp_abm_stats_update(alink);
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+	/* If the qdisc offload has stopped we may need to adjust the backlog
+	 * counters back so carry on even if qdisc is not currently offloaded.
+	 */
+
+	for (i = 0; i < qdisc->red.num_bands; i++) {
+		if (!stats->xstats[i])
+			continue;
+
+		nfp_abm_stats_calculate(&qdisc->red.band[i].stats,
+					&qdisc->red.band[i].prev_stats,
+					&stats->bstats[i], &stats->qstats[i]);
+		qdisc->red.band[i].prev_stats = qdisc->red.band[i].stats;
+
+		nfp_abm_stats_red_calculate(&qdisc->red.band[i].xstats,
+					    &qdisc->red.band[i].prev_xstats,
+					    stats->xstats[i]);
+		qdisc->red.band[i].prev_xstats = qdisc->red.band[i].xstats;
+	}
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
+static bool
+nfp_abm_gred_check_params(struct nfp_abm_link *alink,
+			  struct tc_gred_qopt_offload *opt)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	struct nfp_abm *abm = alink->abm;
+	unsigned int i;
+
+	if (opt->set.grio_on || opt->set.wred_on) {
+		nfp_warn(cpp, "GRED offload failed - GRIO and WRED not supported (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.dp_def != alink->def_band) {
+		nfp_warn(cpp, "GRED offload failed - default band must be %d (p:%08x h:%08x)\n",
+			 alink->def_band, opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.dp_cnt != abm->num_bands) {
+		nfp_warn(cpp, "GRED offload failed - band count must be %d (p:%08x h:%08x)\n",
+			 abm->num_bands, opt->parent, opt->handle);
+		return false;
+	}
+
+	for (i = 0; i < abm->num_bands; i++) {
+		struct tc_gred_vq_qopt_offload_params *band = &opt->set.tab[i];
+
+		if (!band->present)
+			return false;
+		if (!band->is_ecn && !nfp_abm_has_drop(abm)) {
+			nfp_warn(cpp, "GRED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x vq:%d)\n",
+				 opt->parent, opt->handle, i);
+			return false;
+		}
+		if (band->is_ecn && !nfp_abm_has_mark(abm)) {
+			nfp_warn(cpp, "GRED offload failed - ECN marking not supported (p:%08x h:%08x vq:%d)\n",
+				 opt->parent, opt->handle, i);
+			return false;
+		}
+		if (band->is_harddrop) {
+			nfp_warn(cpp, "GRED offload failed - harddrop is not supported (p:%08x h:%08x vq:%d)\n",
+				 opt->parent, opt->handle, i);
+			return false;
+		}
+		if (band->min != band->max) {
+			nfp_warn(cpp, "GRED offload failed - threshold mismatch (p:%08x h:%08x vq:%d)\n",
+				 opt->parent, opt->handle, i);
+			return false;
+		}
+		if (band->min > S32_MAX) {
+			nfp_warn(cpp, "GRED offload failed - threshold too large %d > %d (p:%08x h:%08x vq:%d)\n",
+				 band->min, S32_MAX, opt->parent, opt->handle,
+				 i);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int
+nfp_abm_gred_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		     struct tc_gred_qopt_offload *opt)
+{
+	struct nfp_qdisc *qdisc;
+	unsigned int i;
+	int ret;
+
+	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_GRED, opt->parent,
+				    opt->handle, 0, &qdisc);
+	if (ret < 0)
+		return ret;
+
+	qdisc->params_ok = nfp_abm_gred_check_params(alink, opt);
+	if (qdisc->params_ok) {
+		qdisc->red.num_bands = opt->set.dp_cnt;
+		for (i = 0; i < qdisc->red.num_bands; i++) {
+			qdisc->red.band[i].ecn = opt->set.tab[i].is_ecn;
+			qdisc->red.band[i].threshold = opt->set.tab[i].min;
+		}
+	}
+
+	if (qdisc->use_cnt)
+		nfp_abm_qdisc_offload_update(alink);
+
+	return 0;
+}
+
+int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink,
+			  struct tc_gred_qopt_offload *opt)
+{
+	switch (opt->command) {
+	case TC_GRED_REPLACE:
+		return nfp_abm_gred_replace(netdev, alink, opt);
+	case TC_GRED_DESTROY:
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
+		return 0;
+	case TC_GRED_STATS:
+		return nfp_abm_gred_stats(alink, opt->handle, &opt->stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+nfp_abm_red_xstats(struct nfp_abm_link *alink, struct tc_red_qopt_offload *opt)
+{
+	struct nfp_qdisc *qdisc;
+
+	nfp_abm_stats_update(alink);
+
+	qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (!qdisc || !qdisc->offloaded)
+		return -EOPNOTSUPP;
+
+	nfp_abm_stats_red_calculate(&qdisc->red.band[0].xstats,
+				    &qdisc->red.band[0].prev_xstats,
+				    opt->xstats);
+	qdisc->red.band[0].prev_xstats = qdisc->red.band[0].xstats;
+	return 0;
+}
+
+static int
+nfp_abm_red_stats(struct nfp_abm_link *alink, u32 handle,
+		  struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc;
+
+	nfp_abm_stats_update(alink);
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+	/* If the qdisc offload has stopped we may need to adjust the backlog
+	 * counters back so carry on even if qdisc is not currently offloaded.
+	 */
+
+	nfp_abm_stats_calculate(&qdisc->red.band[0].stats,
+				&qdisc->red.band[0].prev_stats,
+				stats->bstats, stats->qstats);
+	qdisc->red.band[0].prev_stats = qdisc->red.band[0].stats;
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
+static bool
+nfp_abm_red_check_params(struct nfp_abm_link *alink,
+			 struct tc_red_qopt_offload *opt)
+{
+	struct nfp_cpp *cpp = alink->abm->app->cpp;
+	struct nfp_abm *abm = alink->abm;
+
+	if (!opt->set.is_ecn && !nfp_abm_has_drop(abm)) {
+		nfp_warn(cpp, "RED offload failed - drop is not supported (ECN option required) (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.is_ecn && !nfp_abm_has_mark(abm)) {
+		nfp_warn(cpp, "RED offload failed - ECN marking not supported (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.is_harddrop) {
+		nfp_warn(cpp, "RED offload failed - harddrop is not supported (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.min != opt->set.max) {
+		nfp_warn(cpp, "RED offload failed - unsupported min/max parameters (p:%08x h:%08x)\n",
+			 opt->parent, opt->handle);
+		return false;
+	}
+	if (opt->set.min > NFP_ABM_LVL_INFINITY) {
+		nfp_warn(cpp, "RED offload failed - threshold too large %d > %d (p:%08x h:%08x)\n",
+			 opt->set.min, NFP_ABM_LVL_INFINITY, opt->parent,
+			 opt->handle);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		    struct tc_red_qopt_offload *opt)
+{
+	struct nfp_qdisc *qdisc;
+	int ret;
+
+	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_RED, opt->parent,
+				    opt->handle, 1, &qdisc);
+	if (ret < 0)
+		return ret;
+
+	/* If limit != 0 child gets reset */
+	if (opt->set.limit) {
+		if (nfp_abm_qdisc_child_valid(qdisc, 0))
+			qdisc->children[0]->use_cnt--;
+		qdisc->children[0] = NULL;
+	} else {
+		/* Qdisc was just allocated without a limit will use noop_qdisc,
+		 * i.e. a block hole.
+		 */
+		if (!ret)
+			qdisc->children[0] = NFP_QDISC_UNTRACKED;
+	}
+
+	qdisc->params_ok = nfp_abm_red_check_params(alink, opt);
+	if (qdisc->params_ok) {
+		qdisc->red.num_bands = 1;
+		qdisc->red.band[0].ecn = opt->set.is_ecn;
+		qdisc->red.band[0].threshold = opt->set.min;
+	}
+
+	if (qdisc->use_cnt == 1)
+		nfp_abm_qdisc_offload_update(alink);
+
+	return 0;
+}
+
+int nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
+			 struct tc_red_qopt_offload *opt)
+{
+	switch (opt->command) {
+	case TC_RED_REPLACE:
+		return nfp_abm_red_replace(netdev, alink, opt);
+	case TC_RED_DESTROY:
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
+		return 0;
+	case TC_RED_STATS:
+		return nfp_abm_red_stats(alink, opt->handle, &opt->stats);
+	case TC_RED_XSTATS:
+		return nfp_abm_red_xstats(alink, opt);
+	case TC_RED_GRAFT:
+		return nfp_abm_qdisc_graft(alink, opt->handle,
+					   opt->child_handle, 0);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+nfp_abm_mq_create(struct net_device *netdev, struct nfp_abm_link *alink,
+		  struct tc_mq_qopt_offload *opt)
+{
+	struct nfp_qdisc *qdisc;
+	int ret;
+
+	ret = nfp_abm_qdisc_replace(netdev, alink, NFP_QDISC_MQ,
+				    TC_H_ROOT, opt->handle, alink->total_queues,
+				    &qdisc);
+	if (ret < 0)
+		return ret;
+
+	qdisc->params_ok = true;
+	qdisc->offloaded = true;
+	nfp_abm_qdisc_offload_update(alink);
+	return 0;
+}
+
+static int
+nfp_abm_mq_stats(struct nfp_abm_link *alink, u32 handle,
+		 struct tc_qopt_offload_stats *stats)
+{
+	struct nfp_qdisc *qdisc, *red;
+	unsigned int i, j;
+
+	qdisc = nfp_abm_qdisc_find(alink, handle);
+	if (!qdisc)
+		return -EOPNOTSUPP;
+
+	nfp_abm_stats_update(alink);
+
+	/* MQ stats are summed over the children in the core, so we need
+	 * to add up the unreported child values.
+	 */
+	memset(&qdisc->mq.stats, 0, sizeof(qdisc->mq.stats));
+	memset(&qdisc->mq.prev_stats, 0, sizeof(qdisc->mq.prev_stats));
+
+	for (i = 0; i < qdisc->num_children; i++) {
+		if (!nfp_abm_qdisc_child_valid(qdisc, i))
+			continue;
+
+		if (!nfp_abm_qdisc_is_red(qdisc->children[i]))
+			continue;
+		red = qdisc->children[i];
+
+		for (j = 0; j < red->red.num_bands; j++) {
+			nfp_abm_stats_propagate(&qdisc->mq.stats,
+						&red->red.band[j].stats);
+			nfp_abm_stats_propagate(&qdisc->mq.prev_stats,
+						&red->red.band[j].prev_stats);
+		}
+	}
+
+	nfp_abm_stats_calculate(&qdisc->mq.stats, &qdisc->mq.prev_stats,
+				stats->bstats, stats->qstats);
+
+	return qdisc->offloaded ? 0 : -EOPNOTSUPP;
+}
+
+int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink,
+			struct tc_mq_qopt_offload *opt)
+{
+	switch (opt->command) {
+	case TC_MQ_CREATE:
+		return nfp_abm_mq_create(netdev, alink, opt);
+	case TC_MQ_DESTROY:
+		nfp_abm_qdisc_destroy(netdev, alink, opt->handle);
+		return 0;
+	case TC_MQ_STATS:
+		return nfp_abm_mq_stats(alink, opt->handle, &opt->stats);
+	case TC_MQ_GRAFT:
+		return nfp_abm_qdisc_graft(alink, opt->handle,
+					   opt->graft_params.child_handle,
+					   opt->graft_params.queue);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int nfp_abm_setup_root(struct net_device *netdev, struct nfp_abm_link *alink,
+		       struct tc_root_qopt_offload *opt)
+{
+	if (opt->ingress)
+		return -EOPNOTSUPP;
+	if (alink->root_qdisc)
+		alink->root_qdisc->use_cnt--;
+	alink->root_qdisc = nfp_abm_qdisc_find(alink, opt->handle);
+	if (alink->root_qdisc)
+		alink->root_qdisc->use_cnt++;
+
+	nfp_abm_qdisc_offload_update(alink);
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/netronome/nfp/bpf/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 2572a4b..0e2db6e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c

@@ -1,83 +1,19 @@
-/*
- * Copyright (C) 2017-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/bpf.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/jiffies.h>
 #include <linux/skbuff.h>
-#include <linux/wait.h>
+#include <linux/timekeeping.h>
 
+#include "../ccm.h"
 #include "../nfp_app.h"
 #include "../nfp_net.h"
 #include "fw.h"
 #include "main.h"
 
-#define NFP_BPF_TAG_ALLOC_SPAN	(U16_MAX / 4)
-
-static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
-{
-	u16 used_tags;
-
-	used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last;
-
-	return used_tags > NFP_BPF_TAG_ALLOC_SPAN;
-}
-
-static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf)
-{
-	/* All FW communication for BPF is request-reply.  To make sure we
-	 * don't reuse the message ID too early after timeout - limit the
-	 * number of requests in flight.
-	 */
-	if (nfp_bpf_all_tags_busy(bpf)) {
-		cmsg_warn(bpf, "all FW request contexts busy!\n");
-		return -EAGAIN;
-	}
-
-	WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator));
-	return bpf->tag_alloc_next++;
-}
-
-static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
-	WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator));
-
-	while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) &&
-	       bpf->tag_alloc_last != bpf->tag_alloc_next)
-		bpf->tag_alloc_last++;
-}
-
 static struct sk_buff *
 nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
 {
@@ -89,158 +25,32 @@
 	return skb;
 }
 
-static struct sk_buff *
-nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
+static unsigned int
+nfp_bpf_cmsg_map_req_size(struct nfp_app_bpf *bpf, unsigned int n)
 {
 	unsigned int size;
 
 	size = sizeof(struct cmsg_req_map_op);
-	size += sizeof(struct cmsg_key_value_pair) * n;
+	size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
 
-	return nfp_bpf_cmsg_alloc(bpf, size);
-}
-
-static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
-{
-	struct cmsg_hdr *hdr;
-
-	hdr = (struct cmsg_hdr *)skb->data;
-
-	return hdr->type;
-}
-
-static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
-{
-	struct cmsg_hdr *hdr;
-
-	hdr = (struct cmsg_hdr *)skb->data;
-
-	return be16_to_cpu(hdr->tag);
-}
-
-static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
-	unsigned int msg_tag;
-	struct sk_buff *skb;
-
-	skb_queue_walk(&bpf->cmsg_replies, skb) {
-		msg_tag = nfp_bpf_cmsg_get_tag(skb);
-		if (msg_tag == tag) {
-			nfp_bpf_free_tag(bpf, tag);
-			__skb_unlink(skb, &bpf->cmsg_replies);
-			return skb;
-		}
-	}
-
-	return NULL;
-}
-
-static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
-	struct sk_buff *skb;
-
-	nfp_ctrl_lock(bpf->app->ctrl);
-	skb = __nfp_bpf_reply(bpf, tag);
-	nfp_ctrl_unlock(bpf->app->ctrl);
-
-	return skb;
-}
-
-static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
-	struct sk_buff *skb;
-
-	nfp_ctrl_lock(bpf->app->ctrl);
-	skb = __nfp_bpf_reply(bpf, tag);
-	if (!skb)
-		nfp_bpf_free_tag(bpf, tag);
-	nfp_ctrl_unlock(bpf->app->ctrl);
-
-	return skb;
+	return size;
 }
 
 static struct sk_buff *
-nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
-			int tag)
+nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
 {
-	struct sk_buff *skb;
-	int i, err;
-
-	for (i = 0; i < 50; i++) {
-		udelay(4);
-		skb = nfp_bpf_reply(bpf, tag);
-		if (skb)
-			return skb;
-	}
-
-	err = wait_event_interruptible_timeout(bpf->cmsg_wq,
-					       skb = nfp_bpf_reply(bpf, tag),
-					       msecs_to_jiffies(5000));
-	/* We didn't get a response - try last time and atomically drop
-	 * the tag even if no response is matched.
-	 */
-	if (!skb)
-		skb = nfp_bpf_reply_drop_tag(bpf, tag);
-	if (err < 0) {
-		cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n",
-			  err == ERESTARTSYS ? "interrupted" : "error",
-			  type, err);
-		return ERR_PTR(err);
-	}
-	if (!skb) {
-		cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n",
-			  type);
-		return ERR_PTR(-ETIMEDOUT);
-	}
-
-	return skb;
+	return nfp_bpf_cmsg_alloc(bpf, nfp_bpf_cmsg_map_req_size(bpf, n));
 }
 
-static struct sk_buff *
-nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
-			 enum nfp_bpf_cmsg_type type, unsigned int reply_size)
+static unsigned int
+nfp_bpf_cmsg_map_reply_size(struct nfp_app_bpf *bpf, unsigned int n)
 {
-	struct cmsg_hdr *hdr;
-	int tag;
+	unsigned int size;
 
-	nfp_ctrl_lock(bpf->app->ctrl);
-	tag = nfp_bpf_alloc_tag(bpf);
-	if (tag < 0) {
-		nfp_ctrl_unlock(bpf->app->ctrl);
-		dev_kfree_skb_any(skb);
-		return ERR_PTR(tag);
-	}
+	size = sizeof(struct cmsg_reply_map_op);
+	size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
 
-	hdr = (void *)skb->data;
-	hdr->ver = CMSG_MAP_ABI_VERSION;
-	hdr->type = type;
-	hdr->tag = cpu_to_be16(tag);
-
-	__nfp_app_ctrl_tx(bpf->app, skb);
-
-	nfp_ctrl_unlock(bpf->app->ctrl);
-
-	skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag);
-	if (IS_ERR(skb))
-		return skb;
-
-	hdr = (struct cmsg_hdr *)skb->data;
-	if (hdr->type != __CMSG_REPLY(type)) {
-		cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
-			  hdr->type, __CMSG_REPLY(type));
-		goto err_free;
-	}
-	/* 0 reply_size means caller will do the validation */
-	if (reply_size && skb->len != reply_size) {
-		cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
-			  type, skb->len, reply_size);
-		goto err_free;
-	}
-
-	return skb;
-err_free:
-	dev_kfree_skb_any(skb);
-	return ERR_PTR(-EIO);
+	return size;
 }
 
 static int
@@ -288,8 +98,8 @@
 	req->map_type = cpu_to_be32(map->map_type);
 	req->map_flags = 0;
 
-	skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC,
-				       sizeof(*reply));
+	skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_ALLOC,
+				  sizeof(*reply));
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -323,8 +133,8 @@
 	req = (void *)skb->data;
 	req->tid = cpu_to_be32(nfp_map->tid);
 
-	skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE,
-				       sizeof(*reply));
+	skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_FREE,
+				  sizeof(*reply));
 	if (IS_ERR(skb)) {
 		cmsg_warn(bpf, "leaking map - I/O error\n");
 		return;
@@ -338,147 +148,329 @@
 	dev_consume_skb_any(skb);
 }
 
+static void *
+nfp_bpf_ctrl_req_key(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
+		     unsigned int n)
+{
+	return &req->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
+}
+
+static void *
+nfp_bpf_ctrl_req_val(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
+		     unsigned int n)
+{
+	return &req->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
+}
+
+static void *
+nfp_bpf_ctrl_reply_key(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
+		       unsigned int n)
+{
+	return &reply->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
+}
+
+static void *
+nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
+		       unsigned int n)
+{
+	return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
+}
+
+static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
+{
+	return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
+	       op == NFP_CCM_TYPE_BPF_MAP_DELETE;
+}
+
+static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
+{
+	return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
+	       op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
+{
+	return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
+	       op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static unsigned int
+nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+			  const u8 *key, u8 *out_key, u8 *out_value,
+			  u32 *cache_gen)
+{
+	struct bpf_map *map = &nfp_map->offmap->map;
+	struct nfp_app_bpf *bpf = nfp_map->bpf;
+	unsigned int i, count, n_entries;
+	struct cmsg_reply_map_op *reply;
+
+	n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
+
+	spin_lock(&nfp_map->cache_lock);
+	*cache_gen = nfp_map->cache_gen;
+	if (nfp_map->cache_blockers)
+		n_entries = 1;
+
+	if (nfp_bpf_ctrl_op_cache_invalidate(op))
+		goto exit_block;
+	if (!nfp_bpf_ctrl_op_cache_capable(op))
+		goto exit_unlock;
+
+	if (!nfp_map->cache)
+		goto exit_unlock;
+	if (nfp_map->cache_to < ktime_get_ns())
+		goto exit_invalidate;
+
+	reply = (void *)nfp_map->cache->data;
+	count = be32_to_cpu(reply->count);
+
+	for (i = 0; i < count; i++) {
+		void *cached_key;
+
+		cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
+		if (memcmp(cached_key, key, map->key_size))
+			continue;
+
+		if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
+			memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
+			       map->value_size);
+		if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
+			if (i + 1 == count)
+				break;
+
+			memcpy(out_key,
+			       nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
+			       map->key_size);
+		}
+
+		n_entries = 0;
+		goto exit_unlock;
+	}
+	goto exit_unlock;
+
+exit_block:
+	nfp_map->cache_blockers++;
+exit_invalidate:
+	dev_consume_skb_any(nfp_map->cache);
+	nfp_map->cache = NULL;
+exit_unlock:
+	spin_unlock(&nfp_map->cache_lock);
+	return n_entries;
+}
+
+static void
+nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+			  struct sk_buff *skb, u32 cache_gen)
+{
+	bool blocker, filler;
+
+	blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
+	filler = nfp_bpf_ctrl_op_cache_fill(op);
+	if (blocker || filler) {
+		u64 to = 0;
+
+		if (filler)
+			to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
+
+		spin_lock(&nfp_map->cache_lock);
+		if (blocker) {
+			nfp_map->cache_blockers--;
+			nfp_map->cache_gen++;
+		}
+		if (filler && !nfp_map->cache_blockers &&
+		    nfp_map->cache_gen == cache_gen) {
+			nfp_map->cache_to = to;
+			swap(nfp_map->cache, skb);
+		}
+		spin_unlock(&nfp_map->cache_lock);
+	}
+
+	dev_consume_skb_any(skb);
+}
+
 static int
-nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
-		      enum nfp_bpf_cmsg_type op,
+nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
 		      u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
 {
 	struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+	unsigned int n_entries, reply_entries, count;
 	struct nfp_app_bpf *bpf = nfp_map->bpf;
 	struct bpf_map *map = &offmap->map;
 	struct cmsg_reply_map_op *reply;
 	struct cmsg_req_map_op *req;
 	struct sk_buff *skb;
+	u32 cache_gen;
 	int err;
 
 	/* FW messages have no space for more than 32 bits of flags */
 	if (flags >> 32)
 		return -EOPNOTSUPP;
 
+	/* Handle op cache */
+	n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
+					      out_value, &cache_gen);
+	if (!n_entries)
+		return 0;
+
 	skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
-	if (!skb)
-		return -ENOMEM;
+	if (!skb) {
+		err = -ENOMEM;
+		goto err_cache_put;
+	}
 
 	req = (void *)skb->data;
 	req->tid = cpu_to_be32(nfp_map->tid);
-	req->count = cpu_to_be32(1);
+	req->count = cpu_to_be32(n_entries);
 	req->flags = cpu_to_be32(flags);
 
 	/* Copy inputs */
 	if (key)
-		memcpy(&req->elem[0].key, key, map->key_size);
+		memcpy(nfp_bpf_ctrl_req_key(bpf, req, 0), key, map->key_size);
 	if (value)
-		memcpy(&req->elem[0].value, value, map->value_size);
+		memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
+		       map->value_size);
 
-	skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
-				       sizeof(*reply) + sizeof(*reply->elem));
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
+	skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(skb);
+		goto err_cache_put;
+	}
+
+	if (skb->len < sizeof(*reply)) {
+		cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
+			  op, skb->len);
+		err = -EIO;
+		goto err_free;
+	}
 
 	reply = (void *)skb->data;
+	count = be32_to_cpu(reply->count);
 	err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+	/* FW responds with message sized to hold the good entries,
+	 * plus one extra entry if there was an error.
+	 */
+	reply_entries = count + !!err;
+	if (n_entries > 1 && count)
+		err = 0;
 	if (err)
 		goto err_free;
 
+	if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
+		cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
+			  op, skb->len, reply_entries);
+		err = -EIO;
+		goto err_free;
+	}
+
 	/* Copy outputs */
 	if (out_key)
-		memcpy(out_key, &reply->elem[0].key, map->key_size);
+		memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
+		       map->key_size);
 	if (out_value)
-		memcpy(out_value, &reply->elem[0].value, map->value_size);
+		memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
+		       map->value_size);
 
-	dev_consume_skb_any(skb);
+	nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
 
 	return 0;
 err_free:
 	dev_kfree_skb_any(skb);
+err_cache_put:
+	nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
 	return err;
 }
 
 int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
 			      void *key, void *value, u64 flags)
 {
-	return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE,
+	return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_UPDATE,
 				     key, value, flags, NULL, NULL);
 }
 
 int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key)
 {
-	return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE,
+	return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_DELETE,
 				     key, NULL, 0, NULL, NULL);
 }
 
 int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
 			      void *key, void *value)
 {
-	return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP,
+	return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_LOOKUP,
 				     key, NULL, 0, NULL, value);
 }
 
 int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
 				void *next_key)
 {
-	return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST,
+	return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETFIRST,
 				     NULL, NULL, 0, next_key, NULL);
 }
 
 int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
 			       void *key, void *next_key)
 {
-	return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT,
+	return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETNEXT,
 				     key, NULL, 0, next_key, NULL);
 }
 
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
+{
+	return max(nfp_bpf_cmsg_map_req_size(bpf, 1),
+		   nfp_bpf_cmsg_map_reply_size(bpf, 1));
+}
+
+unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
+{
+	return max3(NFP_NET_DEFAULT_MTU,
+		    nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
+		    nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
+}
+
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
+{
+	unsigned int mtu, req_max, reply_max, entry_sz;
+
+	mtu = bpf->app->ctrl->dp.mtu;
+	entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
+	req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
+	reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
+
+	return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
+}
+
 void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
 {
 	struct nfp_app_bpf *bpf = app->priv;
-	unsigned int tag;
 
 	if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) {
 		cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len);
-		goto err_free;
+		dev_kfree_skb_any(skb);
+		return;
 	}
 
-	if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+	if (nfp_ccm_get_type(skb) == NFP_CCM_TYPE_BPF_BPF_EVENT) {
 		if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
 			dev_consume_skb_any(skb);
 		else
 			dev_kfree_skb_any(skb);
-		return;
 	}
 
-	nfp_ctrl_lock(bpf->app->ctrl);
-
-	tag = nfp_bpf_cmsg_get_tag(skb);
-	if (unlikely(!test_bit(tag, bpf->tag_allocator))) {
-		cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n",
-			  tag);
-		goto err_unlock;
-	}
-
-	__skb_queue_tail(&bpf->cmsg_replies, skb);
-	wake_up_interruptible_all(&bpf->cmsg_wq);
-
-	nfp_ctrl_unlock(bpf->app->ctrl);
-
-	return;
-err_unlock:
-	nfp_ctrl_unlock(bpf->app->ctrl);
-err_free:
-	dev_kfree_skb_any(skb);
+	nfp_ccm_rx(&bpf->ccm, skb);
 }
 
 void
 nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
 {
+	const struct nfp_ccm_hdr *hdr = data;
 	struct nfp_app_bpf *bpf = app->priv;
-	const struct cmsg_hdr *hdr = data;
 
 	if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
 		cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
 		return;
 	}
 
-	if (hdr->type == CMSG_TYPE_BPF_EVENT)
+	if (hdr->type == NFP_CCM_TYPE_BPF_BPF_EVENT)
 		nfp_bpf_event_output(bpf, data, len);
 	else
 		cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index e4f9b7e..a83a0ad 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h

@@ -1,41 +1,12 @@
-/*
- * Copyright (C) 2017-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef NFP_BPF_FW_H
 #define NFP_BPF_FW_H 1
 
 #include <linux/bitops.h>
 #include <linux/types.h>
+#include "../ccm.h"
 
 /* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
  * our FW ABI.  In that case we will do translation in the driver.
@@ -52,6 +23,8 @@
 	NFP_BPF_CAP_TYPE_RANDOM		= 4,
 	NFP_BPF_CAP_TYPE_QUEUE_SELECT	= 5,
 	NFP_BPF_CAP_TYPE_ADJUST_TAIL	= 6,
+	NFP_BPF_CAP_TYPE_ABI_VERSION	= 7,
+	NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT	= 8,
 };
 
 struct nfp_bpf_cap_tlv_func {
@@ -81,23 +54,8 @@
 /*
  * Types defined for map related control messages
  */
-#define CMSG_MAP_ABI_VERSION		1
 
-enum nfp_bpf_cmsg_type {
-	CMSG_TYPE_MAP_ALLOC	= 1,
-	CMSG_TYPE_MAP_FREE	= 2,
-	CMSG_TYPE_MAP_LOOKUP	= 3,
-	CMSG_TYPE_MAP_UPDATE	= 4,
-	CMSG_TYPE_MAP_DELETE	= 5,
-	CMSG_TYPE_MAP_GETNEXT	= 6,
-	CMSG_TYPE_MAP_GETFIRST	= 7,
-	CMSG_TYPE_BPF_EVENT	= 8,
-	__CMSG_TYPE_MAP_MAX,
-};
-
-#define CMSG_TYPE_MAP_REPLY_BIT		7
-#define __CMSG_REPLY(req)		(BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
-
+/* BPF ABIv2 fixed-length control message fields */
 #define CMSG_MAP_KEY_LW			16
 #define CMSG_MAP_VALUE_LW		16
 
@@ -112,19 +70,13 @@
 	CMSG_RC_ERR_MAP_E2BIG		= 7,
 };
 
-struct cmsg_hdr {
-	u8 type;
-	u8 ver;
-	__be16 tag;
-};
-
 struct cmsg_reply_map_simple {
-	struct cmsg_hdr hdr;
+	struct nfp_ccm_hdr hdr;
 	__be32 rc;
 };
 
 struct cmsg_req_map_alloc_tbl {
-	struct cmsg_hdr hdr;
+	struct nfp_ccm_hdr hdr;
 	__be32 key_size;		/* in bytes */
 	__be32 value_size;		/* in bytes */
 	__be32 max_entries;
@@ -138,7 +90,7 @@
 };
 
 struct cmsg_req_map_free_tbl {
-	struct cmsg_hdr hdr;
+	struct nfp_ccm_hdr hdr;
 	__be32 tid;
 };
 
@@ -147,28 +99,23 @@
 	__be32 count;
 };
 
-struct cmsg_key_value_pair {
-	__be32 key[CMSG_MAP_KEY_LW];
-	__be32 value[CMSG_MAP_VALUE_LW];
-};
-
 struct cmsg_req_map_op {
-	struct cmsg_hdr hdr;
+	struct nfp_ccm_hdr hdr;
 	__be32 tid;
 	__be32 count;
 	__be32 flags;
-	struct cmsg_key_value_pair elem[0];
+	u8 data[0];
 };
 
 struct cmsg_reply_map_op {
 	struct cmsg_reply_map_simple reply_hdr;
 	__be32 count;
 	__be32 resv;
-	struct cmsg_key_value_pair elem[0];
+	u8 data[0];
 };
 
 struct cmsg_bpf_event {
-	struct cmsg_hdr hdr;
+	struct nfp_ccm_hdr hdr;
 	__be32 cpu_id;
 	__be64 map_ptr;
 	__be32 data_size;

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index eff57f7..5afcb3c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2016-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 #define pr_fmt(fmt)	"NFP net bpf: " fmt
 
@@ -267,6 +237,38 @@
 }
 
 static void
+__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
+	      u8 defer, bool dst_lmextn, bool src_lmextn)
+{
+	u64 insn;
+
+	insn = OP_BR_ALU_BASE |
+		FIELD_PREP(OP_BR_ALU_A_SRC, areg) |
+		FIELD_PREP(OP_BR_ALU_B_SRC, breg) |
+		FIELD_PREP(OP_BR_ALU_DEFBR, defer) |
+		FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) |
+		FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn,
+		      reg.src_lmextn);
+}
+
+static void
 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
 	     enum immed_width width, bool invert,
 	     enum immed_shift shift, bool wr_both,
@@ -326,7 +328,18 @@
 		return;
 	}
 
-	if (sc == SHF_SC_L_SHF)
+	/* NFP shift instruction has something special. If shift direction is
+	 * left then shift amount of 1 to 31 is specified as 32 minus the amount
+	 * to shift.
+	 *
+	 * But no need to do this for indirect shift which has shift amount be
+	 * 0. Even after we do this subtraction, shift amount 0 will be turned
+	 * into 32 which will eventually be encoded the same as 0 because only
+	 * low 5 bits are encoded, but shift amount be 32 will fail the
+	 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
+	 * mask range.
+	 */
+	if (sc == SHF_SC_L_SHF && shift)
 		shift = 32 - shift;
 
 	insn = OP_SHF_BASE |
@@ -610,6 +623,13 @@
 }
 
 static void
+wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
+{
+	if (meta->flags & FLAG_INSN_DO_ZEXT)
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+}
+
+static void
 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
 	       enum nfp_relo_type relo)
 {
@@ -845,7 +865,8 @@
 }
 
 static int
-data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
+data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
+	u8 dst_gpr, int size)
 {
 	unsigned int i;
 	u16 shift, sz;
@@ -868,14 +889,15 @@
 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
 
 	if (i < 2)
-		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 
 	return 0;
 }
 
 static int
-data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
-		   swreg lreg, swreg rreg, int size, enum cmd_mode mode)
+data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		   u8 dst_gpr, swreg lreg, swreg rreg, int size,
+		   enum cmd_mode mode)
 {
 	unsigned int i;
 	u8 mask, sz;
@@ -898,33 +920,34 @@
 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
 
 	if (i < 2)
-		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 
 	return 0;
 }
 
 static int
-data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
-			  u8 dst_gpr, u8 size)
+data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
 {
-	return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
-				  size, CMD_MODE_32b);
+	return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
+				  offset, size, CMD_MODE_32b);
 }
 
 static int
-data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
-			  u8 dst_gpr, u8 size)
+data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
 {
 	swreg rega, regb;
 
 	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
 
-	return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
+	return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
 				  size, CMD_MODE_40b_BA);
 }
 
 static int
-construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
+construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		      u16 offset, u16 src, u8 size)
 {
 	swreg tmp_reg;
 
@@ -940,10 +963,12 @@
 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
 
 	/* Load data */
-	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
+	return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
 }
 
-static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+static int
+construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  u16 offset, u8 size)
 {
 	swreg tmp_reg;
 
@@ -954,7 +979,7 @@
 
 	/* Load data */
 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
-	return data_ld(nfp_prog, tmp_reg, 0, size);
+	return data_ld(nfp_prog, meta, tmp_reg, 0, size);
 }
 
 static int
@@ -1137,8 +1162,8 @@
 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
 	     bool clr_gpr, lmem_step step)
 {
-	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
-	bool first = true, last;
+	s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off;
+	bool first = true, narrow_ld, last;
 	bool needs_inc = false;
 	swreg stack_off_reg;
 	u8 prev_gpr = 255;
@@ -1146,7 +1171,8 @@
 	bool lm3 = true;
 	int ret;
 
-	if (meta->ptr_not_const) {
+	if (meta->ptr_not_const ||
+	    meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) {
 		/* Use of the last encountered ptr_off is OK, they all have
 		 * the same alignment.  Depend on low bits of value being
 		 * discarded when written to LMaddr register.
@@ -1183,14 +1209,23 @@
 
 		needs_inc = true;
 	}
+
+	narrow_ld = clr_gpr && size < 8;
+
 	if (lm3) {
+		unsigned int nop_cnt;
+
 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
-		/* For size < 4 one slot will be filled by zeroing of upper. */
-		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
+		/* For size < 4 one slot will be filled by zeroing of upper,
+		 * but be careful, that zeroing could be eliminated by zext
+		 * optimization.
+		 */
+		nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3;
+		wrp_nops(nfp_prog, nop_cnt);
 	}
 
-	if (clr_gpr && size < 8)
-		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+	if (narrow_ld)
+		wrp_zext(nfp_prog, meta, gpr);
 
 	while (size) {
 		u32 slice_end;
@@ -1263,7 +1298,7 @@
 	u64 imm = insn->imm; /* sign extend */
 
 	if (skip) {
-		meta->skip = true;
+		meta->flags |= FLAG_INSN_SKIP_NOOP;
 		return 0;
 	}
 
@@ -1288,17 +1323,13 @@
 
 static int
 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-	      enum alu_op alu_op, bool skip)
+	      enum alu_op alu_op)
 {
 	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
 
-	if (skip) {
-		meta->skip = true;
-		return 0;
-	}
-
-	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+	wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -1310,7 +1341,7 @@
 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
 
 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
@@ -1331,8 +1362,9 @@
 
 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
 			 insn->src_reg * 2, br_mask, insn->off);
-	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
-			 insn->src_reg * 2 + 1, br_mask, insn->off);
+	if (is_mbpf_jmp64(meta))
+		wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+				 insn->src_reg * 2 + 1, br_mask, insn->off);
 
 	return 0;
 }
@@ -1387,13 +1419,15 @@
 	else
 		emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
 
-	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
-	if (!code->swap)
-		emit_alu(nfp_prog, reg_none(),
-			 reg_a(reg + 1), carry_op, tmp_reg);
-	else
-		emit_alu(nfp_prog, reg_none(),
-			 tmp_reg, carry_op, reg_a(reg + 1));
+	if (is_mbpf_jmp64(meta)) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		if (!code->swap)
+			emit_alu(nfp_prog, reg_none(),
+				 reg_a(reg + 1), carry_op, tmp_reg);
+		else
+			emit_alu(nfp_prog, reg_none(),
+				 tmp_reg, carry_op, reg_a(reg + 1));
+	}
 
 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
@@ -1420,8 +1454,9 @@
 	}
 
 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
-	emit_alu(nfp_prog, reg_none(),
-		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+	if (is_mbpf_jmp64(meta))
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
 	return 0;
@@ -1695,7 +1730,7 @@
 	s64 lm_off;
 
 	/* We only have to reload LM0 if the key is not at start of stack */
-	lm_off = nfp_prog->stack_depth;
+	lm_off = nfp_prog->stack_frame_depth;
 	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
 	load_lm_ptr = meta->arg2.var_off || lm_off;
 
@@ -1808,10 +1843,10 @@
 		swreg stack_depth_reg;
 
 		stack_depth_reg = ur_load_imm_any(nfp_prog,
-						  nfp_prog->stack_depth,
+						  nfp_prog->stack_frame_depth,
 						  stack_imm(nfp_prog));
-		emit_alu(nfp_prog, reg_both(dst),
-			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
+		emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog),
+			 ALU_OP_ADD, stack_depth_reg);
 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
 	} else {
 		wrp_reg_mov(nfp_prog, dst, src);
@@ -1960,6 +1995,9 @@
  */
 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
 {
+	if (!shift_amt)
+		return 0;
+
 	if (shift_amt < 32) {
 		emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
 			 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
@@ -2072,6 +2110,9 @@
  */
 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
 {
+	if (!shift_amt)
+		return 0;
+
 	if (shift_amt < 32) {
 		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
 			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
@@ -2173,6 +2214,9 @@
  */
 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
 {
+	if (!shift_amt)
+		return 0;
+
 	if (shift_amt < 32) {
 		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
 			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
@@ -2306,7 +2350,7 @@
 
 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
 }
 
 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2316,7 +2360,7 @@
 
 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
 }
 
 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2326,7 +2370,7 @@
 
 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
 }
 
 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2336,7 +2380,7 @@
 
 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
 }
 
 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2346,7 +2390,7 @@
 
 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
 }
 
 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2374,23 +2418,132 @@
 	u8 dst = meta->insn.dst_reg * 2;
 
 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+	wrp_zext(nfp_prog, meta, dst);
 
 	return 0;
 }
 
+static int
+__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	   u8 shift_amt)
+{
+	if (shift_amt) {
+		/* Set signedness bit (MSB of result). */
+		emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR,
+			 reg_imm(0));
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
+	}
+	wrp_zext(nfp_prog, meta, dst);
+
+	return 0;
+}
+
+static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+
+	dst = insn->dst_reg * 2;
+	umin = meta->umin_src;
+	umax = meta->umax_src;
+	if (umin == umax)
+		return __ashr_imm(nfp_prog, meta, dst, umin);
+
+	src = insn->src_reg * 2;
+	/* NOTE: the first insn will set both indirect shift amount (source A)
+	 * and signedness bit (MSB of result).
+	 */
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+		       reg_b(dst), SHF_SC_R_SHF);
+	wrp_zext(nfp_prog, meta, dst);
+
+	return 0;
+}
+
+static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	return __ashr_imm(nfp_prog, meta, dst, insn->imm);
+}
+
+static int
+__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	  u8 shift_amt)
+{
+	if (shift_amt)
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
+	wrp_zext(nfp_prog, meta, dst);
+	return 0;
+}
+
+static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	return __shr_imm(nfp_prog, meta, dst, insn->imm);
+}
+
+static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+
+	dst = insn->dst_reg * 2;
+	umin = meta->umin_src;
+	umax = meta->umax_src;
+	if (umin == umax)
+		return __shr_imm(nfp_prog, meta, dst, umin);
+
+	src = insn->src_reg * 2;
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_R_SHF);
+	wrp_zext(nfp_prog, meta, dst);
+	return 0;
+}
+
+static int
+__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
+	  u8 shift_amt)
+{
+	if (shift_amt)
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
+	wrp_zext(nfp_prog, meta, dst);
+	return 0;
+}
+
 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
 
-	if (!insn->imm)
-		return 1; /* TODO: zero shift means indirect */
+	return __shl_imm(nfp_prog, meta, dst, insn->imm);
+}
 
-	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
-		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
-		 SHF_SC_L_SHF, insn->imm);
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
 
+	dst = insn->dst_reg * 2;
+	umin = meta->umin_src;
+	umax = meta->umax_src;
+	if (umin == umax)
+		return __shl_imm(nfp_prog, meta, dst, umin);
+
+	src = insn->src_reg * 2;
+	shl_reg64_lt32_low(nfp_prog, dst, src);
+	wrp_zext(nfp_prog, meta, dst);
 	return 0;
 }
 
@@ -2452,34 +2605,34 @@
 
 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
 }
 
 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
 }
 
 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
 }
 
 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 1);
 }
 
 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 2);
 }
 
 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
 				     meta->insn.src_reg * 2, 4);
 }
 
@@ -2557,7 +2710,7 @@
 
 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
-	return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
+	return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
 					 tmp_reg, meta->insn.dst_reg * 2, size);
 }
 
@@ -2569,7 +2722,7 @@
 
 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
-	return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
+	return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
 					 tmp_reg, meta->insn.dst_reg * 2, size);
 }
 
@@ -2630,7 +2783,7 @@
 	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
 
 	if (!len_mid) {
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 		return 0;
 	}
 
@@ -2638,7 +2791,7 @@
 
 	if (size <= REG_WIDTH) {
 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else {
 		swreg src_hi = reg_xfer(idx + 2);
 
@@ -2669,10 +2822,10 @@
 
 	if (size < REG_WIDTH) {
 		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else if (size == REG_WIDTH) {
 		wrp_mov(nfp_prog, dst_lo, src_lo);
-		wrp_immed(nfp_prog, dst_hi, 0);
+		wrp_zext(nfp_prog, meta, dst_gpr);
 	} else {
 		swreg src_hi = reg_xfer(idx + 1);
 
@@ -3002,30 +3155,37 @@
 	return 0;
 }
 
+static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	swreg tmp_reg;
+
+	tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
+	u8 dst_gpr = insn->dst_reg * 2;
 	swreg tmp_reg;
 
-	if (!imm) {
-		meta->skip = true;
-		return 0;
-	}
-
-	if (imm & ~0U) {
-		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	emit_alu(nfp_prog, imm_b(nfp_prog),
+		 reg_a(dst_gpr), ALU_OP_AND, tmp_reg);
+	/* Upper word of the mask can only be 0 or ~0 from sign extension,
+	 * so either ignore it or OR the whole thing in.
+	 */
+	if (is_mbpf_jmp64(meta) && imm >> 32) {
 		emit_alu(nfp_prog, reg_none(),
-			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
-		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+			 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
 	}
-
-	if (imm >> 32) {
-		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
-		emit_alu(nfp_prog, reg_none(),
-			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
-		emit_br(nfp_prog, BR_BNE, insn->off, 0);
-	}
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
 
 	return 0;
 }
@@ -3034,11 +3194,16 @@
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
+	bool is_jmp32 = is_mbpf_jmp32(meta);
 	swreg tmp_reg;
 
 	if (!imm) {
-		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
-			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+		if (is_jmp32)
+			emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
+				 reg_b(insn->dst_reg * 2));
+		else
+			emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+				 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
 		return 0;
 	}
@@ -3048,6 +3213,9 @@
 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
 
+	if (is_jmp32)
+		return 0;
+
 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
 	emit_alu(nfp_prog, reg_none(),
 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
@@ -3062,10 +3230,13 @@
 
 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
-	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
-		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
-	emit_alu(nfp_prog, reg_none(),
-		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+	if (is_mbpf_jmp64(meta)) {
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
+			 reg_b(insn->src_reg * 2 + 1));
+		emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
+			 imm_b(nfp_prog));
+	}
 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
 
 	return 0;
@@ -3081,7 +3252,93 @@
 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
 }
 
-static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int
+bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u32 ret_tgt, stack_depth, offset_br;
+	swreg tmp_reg;
+
+	stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN);
+	/* Space for saving the return address is accounted for by the callee,
+	 * so stack_depth can be zero for the main function.
+	 */
+	if (stack_depth) {
+		tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
+					  stack_imm(nfp_prog));
+		emit_alu(nfp_prog, stack_reg(nfp_prog),
+			 stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg);
+		emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
+			    NFP_CSR_ACT_LM_ADDR0);
+	}
+
+	/* Two cases for jumping to the callee:
+	 *
+	 * - If callee uses and needs to save R6~R9 then:
+	 *     1. Put the start offset of the callee into imm_b(). This will
+	 *        require a fixup step, as we do not necessarily know this
+	 *        address yet.
+	 *     2. Put the return address from the callee to the caller into
+	 *        register ret_reg().
+	 *     3. (After defer slots are consumed) Jump to the subroutine that
+	 *        pushes the registers to the stack.
+	 *   The subroutine acts as a trampoline, and returns to the address in
+	 *   imm_b(), i.e. jumps to the callee.
+	 *
+	 * - If callee does not need to save R6~R9 then just load return
+	 *   address to the caller in ret_reg(), and jump to the callee
+	 *   directly.
+	 *
+	 * Using ret_reg() to pass the return address to the callee is set here
+	 * as a convention. The callee can then push this address onto its
+	 * stack frame in its prologue. The advantages of passing the return
+	 * address through ret_reg(), instead of pushing it to the stack right
+	 * here, are the following:
+	 * - It looks cleaner.
+	 * - If the called function is called multiple time, we get a lower
+	 *   program size.
+	 * - We save two no-op instructions that should be added just before
+	 *   the emit_br() when stack depth is not null otherwise.
+	 * - If we ever find a register to hold the return address during whole
+	 *   execution of the callee, we will not have to push the return
+	 *   address to the stack for leaf functions.
+	 */
+	if (!meta->jmp_dst) {
+		pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
+		return -ELOOP;
+	}
+	if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
+		ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
+		emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
+			     RELO_BR_GO_CALL_PUSH_REGS);
+		offset_br = nfp_prog_current_offset(nfp_prog);
+		wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
+	} else {
+		ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
+		emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1);
+		offset_br = nfp_prog_current_offset(nfp_prog);
+	}
+	wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
+		return -EINVAL;
+
+	if (stack_depth) {
+		tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
+					  stack_imm(nfp_prog));
+		emit_alu(nfp_prog, stack_reg(nfp_prog),
+			 stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
+		emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
+			    NFP_CSR_ACT_LM_ADDR0);
+		wrp_nops(nfp_prog, 3);
+	}
+
+	meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog);
+	meta->num_insns_after_br -= offset_br;
+
+	return 0;
+}
+
+static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	switch (meta->insn.imm) {
 	case BPF_FUNC_xdp_adjust_head:
@@ -3102,6 +3359,19 @@
 	}
 }
 
+static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (is_mbpf_pseudo_call(meta))
+		return bpf_to_bpf_call(nfp_prog, meta);
+	else
+		return helper_call(nfp_prog, meta);
+}
+
+static bool nfp_is_main_function(struct nfp_insn_meta *meta)
+{
+	return meta->subprog_idx == 0;
+}
+
 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
@@ -3109,6 +3379,39 @@
 	return 0;
 }
 
+static int
+nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
+		/* Pop R6~R9 to the stack via related subroutine.
+		 * We loaded the return address to the caller into ret_reg().
+		 * This means that the subroutine does not come back here, we
+		 * make it jump back to the subprogram caller directly!
+		 */
+		emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
+			     RELO_BR_GO_CALL_POP_REGS);
+		/* Pop return address from the stack. */
+		wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
+	} else {
+		/* Pop return address from the stack. */
+		wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
+		/* Jump back to caller if no callee-saved registers were used
+		 * by the subprogram.
+		 */
+		emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
+	}
+
+	return 0;
+}
+
+static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (nfp_is_main_function(meta))
+		return goto_out(nfp_prog, meta);
+	else
+		return nfp_subprog_epilogue(nfp_prog, meta);
+}
+
 static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
@@ -3150,7 +3453,12 @@
 	[BPF_ALU | BPF_DIV | BPF_X] =	div_reg,
 	[BPF_ALU | BPF_DIV | BPF_K] =	div_imm,
 	[BPF_ALU | BPF_NEG] =		neg_reg,
+	[BPF_ALU | BPF_LSH | BPF_X] =	shl_reg,
 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
+	[BPF_ALU | BPF_RSH | BPF_X] =	shr_reg,
+	[BPF_ALU | BPF_RSH | BPF_K] =	shr_imm,
+	[BPF_ALU | BPF_ARSH | BPF_X] =	ashr_reg,
+	[BPF_ALU | BPF_ARSH | BPF_K] =	ashr_imm,
 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
@@ -3196,22 +3504,62 @@
 	[BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
+	[BPF_JMP32 | BPF_JEQ | BPF_K] =	jeq32_imm,
+	[BPF_JMP32 | BPF_JGT | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JGE | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JLT | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JLE | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
+	[BPF_JMP32 | BPF_JNE | BPF_K] =	jne_imm,
+	[BPF_JMP32 | BPF_JEQ | BPF_X] =	jeq_reg,
+	[BPF_JMP32 | BPF_JGT | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JGE | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JLT | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JLE | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
+	[BPF_JMP32 | BPF_JNE | BPF_X] =	jne_reg,
 	[BPF_JMP | BPF_CALL] =		call,
-	[BPF_JMP | BPF_EXIT] =		goto_out,
+	[BPF_JMP | BPF_EXIT] =		jmp_exit,
 };
 
 /* --- Assembler logic --- */
+static int
+nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		     struct nfp_insn_meta *jmp_dst, u32 br_idx)
+{
+	if (immed_get_value(nfp_prog->prog[br_idx + 1])) {
+		pr_err("BUG: failed to fix up callee register saving\n");
+		return -EINVAL;
+	}
+
+	immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off);
+
+	return 0;
+}
+
 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
 {
 	struct nfp_insn_meta *meta, *jmp_dst;
 	u32 idx, br_idx;
+	int err;
 
 	list_for_each_entry(meta, &nfp_prog->insns, l) {
-		if (meta->skip)
+		if (meta->flags & FLAG_INSN_SKIP_MASK)
 			continue;
-		if (meta->insn.code == (BPF_JMP | BPF_CALL))
+		if (!is_mbpf_jmp(meta))
 			continue;
-		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+		if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
+		    !nfp_is_main_function(meta))
+			continue;
+		if (is_mbpf_helper_call(meta))
 			continue;
 
 		if (list_is_last(&meta->l, &nfp_prog->insns))
@@ -3219,14 +3567,26 @@
 		else
 			br_idx = list_next_entry(meta, l)->off - 1;
 
+		/* For BPF-to-BPF function call, a stack adjustment sequence is
+		 * generated after the return instruction. Therefore, we must
+		 * withdraw the length of this sequence to have br_idx pointing
+		 * to where the "branch" NFP instruction is expected to be.
+		 */
+		if (is_mbpf_pseudo_call(meta))
+			br_idx -= meta->num_insns_after_br;
+
 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
 			return -ELOOP;
 		}
+
+		if (meta->insn.code == (BPF_JMP | BPF_EXIT))
+			continue;
+
 		/* Leave special branches for later */
 		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
-		    RELO_BR_REL)
+		    RELO_BR_REL && !is_mbpf_pseudo_call(meta))
 			continue;
 
 		if (!meta->jmp_dst) {
@@ -3236,11 +3596,23 @@
 
 		jmp_dst = meta->jmp_dst;
 
-		if (jmp_dst->skip) {
+		if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) {
 			pr_err("Branch landing on removed instruction!!\n");
 			return -ELOOP;
 		}
 
+		if (is_mbpf_pseudo_call(meta) &&
+		    nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
+			err = nfp_fixup_immed_relo(nfp_prog, meta,
+						   jmp_dst, br_idx);
+			if (err)
+				return err;
+		}
+
+		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
+		    RELO_BR_REL)
+			continue;
+
 		for (idx = meta->off; idx <= br_idx; idx++) {
 			if (!nfp_is_br(nfp_prog->prog[idx]))
 				continue;
@@ -3258,6 +3630,27 @@
 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
 }
 
+static void
+nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	/* Save return address into the stack. */
+	wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog));
+}
+
+static void
+nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth;
+
+	nfp_prog->stack_frame_depth = round_up(depth, 4);
+	nfp_subprog_prologue(nfp_prog, meta);
+}
+
+bool nfp_is_subprog_start(struct nfp_insn_meta *meta)
+{
+	return meta->flags & FLAG_INSN_IS_SUBPROG_START;
+}
+
 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
 {
 	/* TC direct-action mode:
@@ -3348,6 +3741,67 @@
 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
 }
 
+static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
+{
+	unsigned int idx;
+
+	for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
+		if (nfp_prog->subprog[idx].needs_reg_push)
+			return true;
+
+	return false;
+}
+
+static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
+{
+	u8 reg;
+
+	/* Subroutine: Save all callee saved registers (R6 ~ R9).
+	 * imm_b() holds the return address.
+	 */
+	nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog);
+	for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
+		u8 adj = (reg - BPF_REG_0) * 2;
+		u8 idx = (reg - BPF_REG_6) * 2;
+
+		/* The first slot in the stack frame is used to push the return
+		 * address in bpf_to_bpf_call(), start just after.
+		 */
+		wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj));
+
+		if (reg == BPF_REG_8)
+			/* Prepare to jump back, last 3 insns use defer slots */
+			emit_rtn(nfp_prog, imm_b(nfp_prog), 3);
+
+		wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1));
+	}
+}
+
+static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog)
+{
+	u8 reg;
+
+	/* Subroutine: Restore all callee saved registers (R6 ~ R9).
+	 * ret_reg() holds the return address.
+	 */
+	nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog);
+	for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
+		u8 adj = (reg - BPF_REG_0) * 2;
+		u8 idx = (reg - BPF_REG_6) * 2;
+
+		/* The first slot in the stack frame holds the return address,
+		 * start popping just after that.
+		 */
+		wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx));
+
+		if (reg == BPF_REG_8)
+			/* Prepare to jump back, last 3 insns use defer slots */
+			emit_rtn(nfp_prog, ret_reg(nfp_prog), 3);
+
+		wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1));
+	}
+}
+
 static void nfp_outro(struct nfp_prog *nfp_prog)
 {
 	switch (nfp_prog->type) {
@@ -3360,13 +3814,23 @@
 	default:
 		WARN_ON(1);
 	}
+
+	if (!nfp_prog_needs_callee_reg_save(nfp_prog))
+		return;
+
+	nfp_push_callee_registers(nfp_prog);
+	nfp_pop_callee_registers(nfp_prog);
 }
 
 static int nfp_translate(struct nfp_prog *nfp_prog)
 {
 	struct nfp_insn_meta *meta;
+	unsigned int depth;
 	int err;
 
+	depth = nfp_prog->subprog[0].stack_depth;
+	nfp_prog->stack_frame_depth = round_up(depth, 4);
+
 	nfp_intro(nfp_prog);
 	if (nfp_prog->error)
 		return nfp_prog->error;
@@ -3376,7 +3840,13 @@
 
 		meta->off = nfp_prog_current_offset(nfp_prog);
 
-		if (meta->skip) {
+		if (nfp_is_subprog_start(meta)) {
+			nfp_start_subprog(nfp_prog, meta);
+			if (nfp_prog->error)
+				return nfp_prog->error;
+		}
+
+		if (meta->flags & FLAG_INSN_SKIP_MASK) {
 			nfp_prog->n_translated++;
 			continue;
 		}
@@ -3424,10 +3894,10 @@
 		/* Programs start with R6 = R1 but we ignore the skb pointer */
 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
 		    insn.src_reg == 1 && insn.dst_reg == 6)
-			meta->skip = true;
+			meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
 
 		/* Return as soon as something doesn't match */
-		if (!meta->skip)
+		if (!(meta->flags & FLAG_INSN_SKIP_MASK))
 			return;
 	}
 }
@@ -3442,19 +3912,17 @@
 	list_for_each_entry(meta, &nfp_prog->insns, l) {
 		struct bpf_insn insn = meta->insn;
 
-		if (meta->skip)
+		if (meta->flags & FLAG_INSN_SKIP_MASK)
 			continue;
 
-		if (BPF_CLASS(insn.code) != BPF_ALU &&
-		    BPF_CLASS(insn.code) != BPF_ALU64 &&
-		    BPF_CLASS(insn.code) != BPF_JMP)
+		if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
 			continue;
 		if (BPF_SRC(insn.code) != BPF_K)
 			continue;
 		if (insn.imm >= 0)
 			continue;
 
-		if (BPF_CLASS(insn.code) == BPF_JMP) {
+		if (is_mbpf_jmp(meta)) {
 			switch (BPF_OP(insn.code)) {
 			case BPF_JGE:
 			case BPF_JSGE:
@@ -3516,7 +3984,7 @@
 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
 			continue;
 
-		meta2->skip = true;
+		meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
 	}
 }
 
@@ -3556,8 +4024,8 @@
 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
 			continue;
 
-		meta2->skip = true;
-		meta3->skip = true;
+		meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
+		meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
 	}
 }
 
@@ -3752,7 +4220,8 @@
 				}
 
 				head_ld_meta->paired_st = &head_st_meta->insn;
-				head_st_meta->skip = true;
+				head_st_meta->flags |=
+					FLAG_INSN_SKIP_PREC_DEPENDENT;
 			} else {
 				head_ld_meta->ldst_gather_len = 0;
 			}
@@ -3785,8 +4254,8 @@
 			head_ld_meta = meta1;
 			head_st_meta = meta2;
 		} else {
-			meta1->skip = true;
-			meta2->skip = true;
+			meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
+			meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
 		}
 
 		head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
@@ -3811,7 +4280,7 @@
 		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
 			cache_avail = false;
 
-		if (meta->skip)
+		if (meta->flags & FLAG_INSN_SKIP_MASK)
 			continue;
 
 		insn = &meta->insn;
@@ -3897,7 +4366,7 @@
 	}
 
 	list_for_each_entry(meta, &nfp_prog->insns, l) {
-		if (meta->skip)
+		if (meta->flags & FLAG_INSN_SKIP_MASK)
 			continue;
 
 		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
@@ -3933,7 +4402,8 @@
 	u32 id;
 
 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
-		if (meta1->skip || meta2->skip)
+		if (meta1->flags & FLAG_INSN_SKIP_MASK ||
+		    meta2->flags & FLAG_INSN_SKIP_MASK)
 			continue;
 
 		if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
@@ -4012,26 +4482,41 @@
 	return ret;
 }
 
-void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
+void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog)
 {
 	struct nfp_insn_meta *meta;
 
 	/* Another pass to record jump information. */
 	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		struct nfp_insn_meta *dst_meta;
 		u64 code = meta->insn.code;
+		unsigned int dst_idx;
+		bool pseudo_call;
 
-		if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
-		    BPF_OP(code) != BPF_CALL) {
-			struct nfp_insn_meta *dst_meta;
-			unsigned short dst_indx;
+		if (!is_mbpf_jmp(meta))
+			continue;
+		if (BPF_OP(code) == BPF_EXIT)
+			continue;
+		if (is_mbpf_helper_call(meta))
+			continue;
 
-			dst_indx = meta->n + 1 + meta->insn.off;
-			dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
-						     cnt);
+		/* If opcode is BPF_CALL at this point, this can only be a
+		 * BPF-to-BPF call (a.k.a pseudo call).
+		 */
+		pseudo_call = BPF_OP(code) == BPF_CALL;
 
-			meta->jmp_dst = dst_meta;
-			dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
-		}
+		if (pseudo_call)
+			dst_idx = meta->n + 1 + meta->insn.imm;
+		else
+			dst_idx = meta->n + 1 + meta->insn.off;
+
+		dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx);
+
+		if (pseudo_call)
+			dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START;
+
+		dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
+		meta->jmp_dst = dst_meta;
 	}
 }
 
@@ -4054,6 +4539,7 @@
 	for (i = 0; i < nfp_prog->prog_len; i++) {
 		enum nfp_relo_type special;
 		u32 val;
+		u16 off;
 
 		special = FIELD_GET(OP_RELO_TYPE, prog[i]);
 		switch (special) {
@@ -4070,6 +4556,24 @@
 			br_set_offset(&prog[i],
 				      nfp_prog->tgt_abort + bv->start_off);
 			break;
+		case RELO_BR_GO_CALL_PUSH_REGS:
+			if (!nfp_prog->tgt_call_push_regs) {
+				pr_err("BUG: failed to detect subprogram registers needs\n");
+				err = -EINVAL;
+				goto err_free_prog;
+			}
+			off = nfp_prog->tgt_call_push_regs + bv->start_off;
+			br_set_offset(&prog[i], off);
+			break;
+		case RELO_BR_GO_CALL_POP_REGS:
+			if (!nfp_prog->tgt_call_pop_regs) {
+				pr_err("BUG: failed to detect subprogram registers needs\n");
+				err = -EINVAL;
+				goto err_free_prog;
+			}
+			off = nfp_prog->tgt_call_pop_regs + bv->start_off;
+			br_set_offset(&prog[i], off);
+			break;
 		case RELO_BR_NEXT_PKT:
 			br_set_offset(&prog[i], bv->tgt_done);
 			break;

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 970af07..8f73277 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <net/pkt_cls.h>
 
@@ -54,11 +24,14 @@
 static bool nfp_net_ebpf_capable(struct nfp_net *nn)
 {
 #ifdef __LITTLE_ENDIAN
-	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
-	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
-		return true;
-#endif
+	struct nfp_app_bpf *bpf = nn->app->priv;
+
+	return nn->cap & NFP_NET_CFG_CTRL_BPF &&
+	       bpf->abi_version &&
+	       nn_readb(nn, NFP_NET_CFG_BPF_ABI) == bpf->abi_version;
+#else
 	return false;
+#endif
 }
 
 static int
@@ -187,35 +160,19 @@
 	return 0;
 }
 
-static int nfp_bpf_setup_tc_block(struct net_device *netdev,
-				  struct tc_block_offload *f)
-{
-	struct nfp_net *nn = netdev_priv(netdev);
-
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     nfp_bpf_setup_tc_block_cb,
-					     nn, nn, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					nfp_bpf_setup_tc_block_cb,
-					nn);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(nfp_bpf_block_cb_list);
 
 static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
 			    enum tc_setup_type type, void *type_data)
 {
+	struct nfp_net *nn = netdev_priv(netdev);
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return nfp_bpf_setup_tc_block(netdev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &nfp_bpf_block_cb_list,
+						  nfp_bpf_setup_tc_block_cb,
+						  nn, nn, true);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -342,6 +299,34 @@
 	return 0;
 }
 
+static int
+nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
+				 u32 length)
+{
+	bpf->cmsg_multi_ent = true;
+	return 0;
+}
+
+static int
+nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
+			      u32 length)
+{
+	if (length < 4) {
+		nfp_err(bpf->app->cpp, "truncated ABI version TLV: %d\n",
+			length);
+		return -EINVAL;
+	}
+
+	bpf->abi_version = readl(value);
+	if (bpf->abi_version < 2 || bpf->abi_version > 3) {
+		nfp_warn(bpf->app->cpp, "unsupported BPF ABI version: %d\n",
+			 bpf->abi_version);
+		bpf->abi_version = 0;
+	}
+
+	return 0;
+}
+
 static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 {
 	struct nfp_cpp *cpp = app->pf->cpp;
@@ -393,6 +378,16 @@
 							  length))
 				goto err_release_free;
 			break;
+		case NFP_BPF_CAP_TYPE_ABI_VERSION:
+			if (nfp_bpf_parse_cap_abi_version(app->priv, value,
+							  length))
+				goto err_release_free;
+			break;
+		case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
+			if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
+							     length))
+				goto err_release_free;
+			break;
 		default:
 			nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
 			break;
@@ -414,6 +409,11 @@
 	return -EINVAL;
 }
 
+static void nfp_bpf_init_capabilities(struct nfp_app_bpf *bpf)
+{
+	bpf->abi_version = 2; /* Original BPF ABI version */
+}
+
 static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
 {
 	struct nfp_app_bpf *bpf = app->priv;
@@ -428,6 +428,25 @@
 	bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
 }
 
+static int nfp_bpf_start(struct nfp_app *app)
+{
+	struct nfp_app_bpf *bpf = app->priv;
+
+	if (app->ctrl->dp.mtu < nfp_bpf_ctrl_cmsg_min_mtu(bpf)) {
+		nfp_err(bpf->app->cpp,
+			"ctrl channel MTU below min required %u < %u\n",
+			app->ctrl->dp.mtu, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
+		return -EINVAL;
+	}
+
+	if (bpf->cmsg_multi_ent)
+		bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
+	else
+		bpf->cmsg_cache_cnt = 1;
+
+	return 0;
+}
+
 static int nfp_bpf_init(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf;
@@ -439,19 +458,32 @@
 	bpf->app = app;
 	app->priv = bpf;
 
-	skb_queue_head_init(&bpf->cmsg_replies);
-	init_waitqueue_head(&bpf->cmsg_wq);
 	INIT_LIST_HEAD(&bpf->map_list);
 
+	err = nfp_ccm_init(&bpf->ccm, app);
+	if (err)
+		goto err_free_bpf;
+
 	err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
 	if (err)
-		goto err_free_bpf;
+		goto err_clean_ccm;
+
+	nfp_bpf_init_capabilities(bpf);
 
 	err = nfp_bpf_parse_capabilities(app);
 	if (err)
 		goto err_free_neutral_maps;
 
-	bpf->bpf_dev = bpf_offload_dev_create();
+	if (bpf->abi_version < 3) {
+		bpf->cmsg_key_sz = CMSG_MAP_KEY_LW * 4;
+		bpf->cmsg_val_sz = CMSG_MAP_VALUE_LW * 4;
+	} else {
+		bpf->cmsg_key_sz = bpf->maps.max_key_sz;
+		bpf->cmsg_val_sz = bpf->maps.max_val_sz;
+		app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
+	}
+
+	bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf);
 	err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
 	if (err)
 		goto err_free_neutral_maps;
@@ -460,22 +492,19 @@
 
 err_free_neutral_maps:
 	rhashtable_destroy(&bpf->maps_neutral);
+err_clean_ccm:
+	nfp_ccm_clean(&bpf->ccm);
 err_free_bpf:
 	kfree(bpf);
 	return err;
 }
 
-static void nfp_check_rhashtable_empty(void *ptr, void *arg)
-{
-	WARN_ON_ONCE(1);
-}
-
 static void nfp_bpf_clean(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf = app->priv;
 
 	bpf_offload_dev_destroy(bpf->bpf_dev);
-	WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
+	nfp_ccm_clean(&bpf->ccm);
 	WARN_ON(!list_empty(&bpf->map_list));
 	WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
 	rhashtable_free_and_destroy(&bpf->maps_neutral,
@@ -491,6 +520,7 @@
 
 	.init		= nfp_bpf_init,
 	.clean		= nfp_bpf_clean,
+	.start		= nfp_bpf_start,
 
 	.check_mtu	= nfp_bpf_check_mtu,
 

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index dbd0098..fac9c6f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2016-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 #ifndef __NFP_BPF_H__
 #define __NFP_BPF_H__ 1
@@ -44,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 
+#include "../ccm.h"
 #include "../nfp_asm.h"
 #include "fw.h"
 
@@ -61,6 +32,8 @@
 	/* internal jumps to parts of the outro */
 	RELO_BR_GO_OUT,
 	RELO_BR_GO_ABORT,
+	RELO_BR_GO_CALL_PUSH_REGS,
+	RELO_BR_GO_CALL_POP_REGS,
 	/* external jumps to fixed addresses */
 	RELO_BR_NEXT_PKT,
 	RELO_BR_HELPER,
@@ -104,6 +77,7 @@
 #define imma_a(np)	reg_a(STATIC_REG_IMMA)
 #define imma_b(np)	reg_b(STATIC_REG_IMMA)
 #define imm_both(np)	reg_both(STATIC_REG_IMM)
+#define ret_reg(np)	imm_a(np)
 
 #define NFP_BPF_ABI_FLAGS	reg_imm(0)
 #define   NFP_BPF_ABI_FLAG_MARK	1
@@ -111,15 +85,12 @@
 /**
  * struct nfp_app_bpf - bpf app priv structure
  * @app:		backpointer to the app
+ * @ccm:		common control message handler data
  *
  * @bpf_dev:		BPF offload device handle
  *
- * @tag_allocator:	bitmap of control message tags in use
- * @tag_alloc_next:	next tag bit to allocate
- * @tag_alloc_last:	next tag bit to be freed
- *
- * @cmsg_replies:	received cmsg replies waiting to be consumed
- * @cmsg_wq:		work queue for waiting for cmsg replies
+ * @cmsg_key_sz:	size of key in cmsg element array
+ * @cmsg_val_sz:	size of value in cmsg element array
  *
  * @map_list:		list of offloaded maps
  * @maps_in_use:	number of currently offloaded maps
@@ -127,6 +98,9 @@
  *
  * @maps_neutral:	hash table of offload-neutral maps (on pointer)
  *
+ * @abi_version:	global BPF ABI version
+ * @cmsg_cache_cnt:	number of entries to read for caching
+ *
  * @adjust_head:	adjust head capability
  * @adjust_head.flags:		extra flags for adjust head
  * @adjust_head.off_min:	minimal packet offset within buffer required
@@ -151,18 +125,18 @@
  * @pseudo_random:	FW initialized the pseudo-random machinery (CSRs)
  * @queue_select:	BPF can set the RX queue ID in packet vector
  * @adjust_tail:	BPF can simply trunc packet size for adjust tail
+ * @cmsg_multi_ent:	FW can pack multiple map entries in a single cmsg
  */
 struct nfp_app_bpf {
 	struct nfp_app *app;
+	struct nfp_ccm ccm;
 
 	struct bpf_offload_dev *bpf_dev;
 
-	DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
-	u16 tag_alloc_next;
-	u16 tag_alloc_last;
+	unsigned int cmsg_key_sz;
+	unsigned int cmsg_val_sz;
 
-	struct sk_buff_head cmsg_replies;
-	struct wait_queue_head cmsg_wq;
+	unsigned int cmsg_cache_cnt;
 
 	struct list_head map_list;
 	unsigned int maps_in_use;
@@ -170,6 +144,8 @@
 
 	struct rhashtable maps_neutral;
 
+	u32 abi_version;
+
 	struct nfp_bpf_cap_adjust_head {
 		u32 flags;
 		int off_min;
@@ -197,6 +173,7 @@
 	bool pseudo_random;
 	bool queue_select;
 	bool adjust_tail;
+	bool cmsg_multi_ent;
 };
 
 enum nfp_bpf_map_use {
@@ -206,11 +183,26 @@
 	NFP_MAP_USE_ATOMIC_CNT,
 };
 
+struct nfp_bpf_map_word {
+	unsigned char type		:4;
+	unsigned char non_zero_update	:1;
+};
+
+#define NFP_BPF_MAP_CACHE_CNT		4U
+#define NFP_BPF_MAP_CACHE_TIME_NS	(250 * 1000)
+
 /**
  * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
  * @offmap:	pointer to the offloaded BPF map
  * @bpf:	back pointer to bpf app private structure
  * @tid:	table id identifying map on datapath
+ *
+ * @cache_lock:	protects @cache_blockers, @cache_to, @cache
+ * @cache_blockers:	number of ops in flight which block caching
+ * @cache_gen:	counter incremented by every blocker on exit
+ * @cache_to:	time when cache will no longer be valid (ns)
+ * @cache:	skb with cached response
+ *
  * @l:		link on the nfp_app_bpf->map_list list
  * @use_map:	map of how the value is used (in 4B chunks)
  */
@@ -218,8 +210,15 @@
 	struct bpf_offloaded_map *offmap;
 	struct nfp_app_bpf *bpf;
 	u32 tid;
+
+	spinlock_t cache_lock;
+	u32 cache_blockers;
+	u32 cache_gen;
+	u64 cache_to;
+	struct sk_buff *cache;
+
 	struct list_head l;
-	enum nfp_bpf_map_use use_map[];
+	struct nfp_bpf_map_word use_map[];
 };
 
 struct nfp_bpf_neutral_map {
@@ -252,7 +251,21 @@
 	bool var_off;
 };
 
-#define FLAG_INSN_IS_JUMP_DST	BIT(0)
+#define FLAG_INSN_IS_JUMP_DST			BIT(0)
+#define FLAG_INSN_IS_SUBPROG_START		BIT(1)
+#define FLAG_INSN_PTR_CALLER_STACK_FRAME	BIT(2)
+/* Instruction is pointless, noop even on its own */
+#define FLAG_INSN_SKIP_NOOP			BIT(3)
+/* Instruction is optimized out based on preceding instructions */
+#define FLAG_INSN_SKIP_PREC_DEPENDENT		BIT(4)
+/* Instruction is optimized by the verifier */
+#define FLAG_INSN_SKIP_VERIFIER_OPT		BIT(5)
+/* Instruction needs to zero extend to high 32-bit */
+#define FLAG_INSN_DO_ZEXT			BIT(6)
+
+#define FLAG_INSN_SKIP_MASK		(FLAG_INSN_SKIP_NOOP | \
+					 FLAG_INSN_SKIP_PREC_DEPENDENT | \
+					 FLAG_INSN_SKIP_VERIFIER_OPT)
 
 /**
  * struct nfp_insn_meta - BPF instruction wrapper
@@ -269,6 +282,7 @@
  * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
  * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
+ * @num_insns_after_br: number of insns following a branch jump, used for fixup
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
@@ -279,7 +293,7 @@
  * @off: index of first generated machine instruction (in nfp_prog.prog)
  * @n: eBPF instruction number
  * @flags: eBPF instruction extra optimization flags
- * @skip: skip this instruction (optimized out)
+ * @subprog_idx: index of subprogram to which the instruction belongs
  * @double_cb: callback for second part of the instruction
  * @l: link on nfp_prog->insns list
  */
@@ -304,6 +318,7 @@
 		struct {
 			struct nfp_insn_meta *jmp_dst;
 			bool jump_neg_op;
+			u32 num_insns_after_br; /* only for BPF-to-BPF calls */
 		};
 		/* function calls */
 		struct {
@@ -325,7 +340,7 @@
 	unsigned int off;
 	unsigned short n;
 	unsigned short flags;
-	bool skip;
+	unsigned short subprog_idx;
 	instr_cb_t double_cb;
 
 	struct list_head l;
@@ -363,6 +378,21 @@
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
 }
 
+static inline bool is_mbpf_jmp32(const struct nfp_insn_meta *meta)
+{
+	return mbpf_class(meta) == BPF_JMP32;
+}
+
+static inline bool is_mbpf_jmp64(const struct nfp_insn_meta *meta)
+{
+	return mbpf_class(meta) == BPF_JMP;
+}
+
+static inline bool is_mbpf_jmp(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_jmp32(meta) || is_mbpf_jmp64(meta);
+}
+
 static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
 {
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
@@ -413,23 +443,71 @@
 	return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV;
 }
 
+static inline bool is_mbpf_cond_jump(const struct nfp_insn_meta *meta)
+{
+	u8 op;
+
+	if (is_mbpf_jmp32(meta))
+		return true;
+
+	if (!is_mbpf_jmp64(meta))
+		return false;
+
+	op = mbpf_op(meta);
+	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+}
+
+static inline bool is_mbpf_helper_call(const struct nfp_insn_meta *meta)
+{
+	struct bpf_insn insn = meta->insn;
+
+	return insn.code == (BPF_JMP | BPF_CALL) &&
+		insn.src_reg != BPF_PSEUDO_CALL;
+}
+
+static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta)
+{
+	struct bpf_insn insn = meta->insn;
+
+	return insn.code == (BPF_JMP | BPF_CALL) &&
+		insn.src_reg == BPF_PSEUDO_CALL;
+}
+
+#define STACK_FRAME_ALIGN 64
+
+/**
+ * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info
+ * @stack_depth:	maximum stack depth used by this sub-program
+ * @needs_reg_push:	whether sub-program uses callee-saved registers
+ */
+struct nfp_bpf_subprog_info {
+	u16 stack_depth;
+	u8 needs_reg_push : 1;
+};
+
 /**
  * struct nfp_prog - nfp BPF program
  * @bpf: backpointer to the bpf app priv structure
  * @prog: machine code
  * @prog_len: number of valid instructions in @prog array
  * @__prog_alloc_len: alloc size of @prog array
+ * @stack_size: total amount of stack used
  * @verifier_meta: temporary storage for verifier's insn meta
  * @type: BPF program type
  * @last_bpf_off: address of the last instruction translated from BPF
  * @tgt_out: jump target for normal exit
  * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
+ * @tgt_call_push_regs: jump target for subroutine for saving R6~R9 to stack
+ * @tgt_call_pop_regs: jump target for subroutine used for restoring R6~R9
  * @n_translated: number of successfully translated instructions (for errors)
  * @error: error code if something went wrong
- * @stack_depth: max stack depth from the verifier
+ * @stack_frame_depth: max stack depth for current frame
  * @adjust_head_location: if program has single adjust head call - the insn no.
  * @map_records_cnt: the number of map pointers recorded for this prog
+ * @subprog_cnt: number of sub-programs, including main function
  * @map_records: the map record pointers from bpf->maps_neutral
+ * @subprog: pointer to an array of objects holding info about sub-programs
+ * @n_insns: number of instructions on @insns list
  * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
  */
 struct nfp_prog {
@@ -439,6 +517,8 @@
 	unsigned int prog_len;
 	unsigned int __prog_alloc_len;
 
+	unsigned int stack_size;
+
 	struct nfp_insn_meta *verifier_meta;
 
 	enum bpf_prog_type type;
@@ -446,16 +526,21 @@
 	unsigned int last_bpf_off;
 	unsigned int tgt_out;
 	unsigned int tgt_abort;
+	unsigned int tgt_call_push_regs;
+	unsigned int tgt_call_pop_regs;
 
 	unsigned int n_translated;
 	int error;
 
-	unsigned int stack_depth;
+	unsigned int stack_frame_depth;
 	unsigned int adjust_head_location;
 
 	unsigned int map_records_cnt;
+	unsigned int subprog_cnt;
 	struct nfp_bpf_neutral_map **map_records;
+	struct nfp_bpf_subprog_info *subprog;
 
+	unsigned int n_insns;
 	struct list_head insns;
 };
 
@@ -471,11 +556,20 @@
 	unsigned int tgt_done;
 };
 
-void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
+bool nfp_is_subprog_start(struct nfp_insn_meta *meta);
+void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog);
 int nfp_bpf_jit(struct nfp_prog *prog);
 bool nfp_bpf_supported_opcode(u8 code);
 
-extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
+int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
+		    int prev_insn_idx);
+int nfp_bpf_finalize(struct bpf_verifier_env *env);
+
+int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off,
+			     struct bpf_insn *insn);
+int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
+
+extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops;
 
 struct netdev_bpf;
 struct nfp_app;
@@ -488,10 +582,13 @@
 
 struct nfp_insn_meta *
 nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-		  unsigned int insn_idx, unsigned int n_insns);
+		  unsigned int insn_idx);
 
 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
 
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
+unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
 long long int
 nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
 void

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 1ccd637..88fab6a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2016-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net_offload.c
@@ -52,6 +22,7 @@
 #include <net/tc_act/tc_mirred.h>
 
 #include "main.h"
+#include "../ccm.h"
 #include "../nfp_app.h"
 #include "../nfp_net_ctrl.h"
 #include "../nfp_net.h"
@@ -63,9 +34,6 @@
 	struct nfp_bpf_neutral_map *record;
 	int err;
 
-	/* Map record paths are entered via ndo, update side is protected. */
-	ASSERT_RTNL();
-
 	/* Reuse path - other offloaded program is already tracking this map. */
 	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
 					nfp_bpf_maps_neutral_params);
@@ -114,8 +82,6 @@
 	bool freed = false;
 	int i;
 
-	ASSERT_RTNL();
-
 	for (i = 0; i < nfp_prog->map_records_cnt; i++) {
 		if (--nfp_prog->map_records[i]->count) {
 			nfp_prog->map_records[i] = NULL;
@@ -198,8 +164,9 @@
 
 		list_add_tail(&meta->l, &nfp_prog->insns);
 	}
+	nfp_prog->n_insns = cnt;
 
-	nfp_bpf_jit_prepare(nfp_prog, cnt);
+	nfp_bpf_jit_prepare(nfp_prog);
 
 	return 0;
 }
@@ -208,6 +175,8 @@
 {
 	struct nfp_insn_meta *meta, *tmp;
 
+	kfree(nfp_prog->subprog);
+
 	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
 		list_del(&meta->l);
 		kfree(meta);
@@ -215,11 +184,8 @@
 	kfree(nfp_prog);
 }
 
-static int
-nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
-		      struct netdev_bpf *bpf)
+static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct bpf_prog *prog = bpf->verifier.prog;
 	struct nfp_prog *nfp_prog;
 	int ret;
 
@@ -230,14 +196,13 @@
 
 	INIT_LIST_HEAD(&nfp_prog->insns);
 	nfp_prog->type = prog->type;
-	nfp_prog->bpf = app->priv;
+	nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
 	ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
 	if (ret)
 		goto err_free;
 
 	nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog);
-	bpf->verifier.ops = &nfp_bpf_analyzer_ops;
 
 	return 0;
 
@@ -247,20 +212,16 @@
 	return ret;
 }
 
-static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
+static int nfp_bpf_translate(struct bpf_prog *prog)
 {
+	struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
-	unsigned int stack_size;
 	unsigned int max_instr;
 	int err;
 
-	stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
-	if (prog->aux->stack_depth > stack_size) {
-		nn_info(nn, "stack too large: program %dB > FW stack %dB\n",
-			prog->aux->stack_depth, stack_size);
-		return -EOPNOTSUPP;
-	}
-	nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4);
+	/* We depend on dead code elimination succeeding */
+	if (prog->aux->offload->opt_failed)
+		return -EINVAL;
 
 	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
 	nfp_prog->__prog_alloc_len = max_instr * sizeof(u64);
@@ -279,15 +240,13 @@
 	return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
 }
 
-static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
+static void nfp_bpf_destroy(struct bpf_prog *prog)
 {
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
 
 	kvfree(nfp_prog->prog);
 	nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
 	nfp_prog_free(nfp_prog);
-
-	return 0;
 }
 
 /* Atomic engine requires values to be in big endian, we need to byte swap
@@ -299,10 +258,25 @@
 	unsigned int i;
 
 	for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
-		if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT)
+		if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT)
 			word[i] = (__force u32)cpu_to_be32(word[i]);
 }
 
+/* Mark value as unsafely initialized in case it becomes atomic later
+ * and we didn't byte swap something non-byte swap neutral.
+ */
+static void
+nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value)
+{
+	u32 *word = value;
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
+		if (nfp_map->use_map[i].type == NFP_MAP_UNUSED &&
+		    word[i] != (__force u32)cpu_to_be32(word[i]))
+			nfp_map->use_map[i].non_zero_update = 1;
+}
+
 static int
 nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap,
 			 void *key, void *value)
@@ -322,6 +296,7 @@
 			 void *key, void *value, u64 flags)
 {
 	nfp_map_bpf_byte_swap(offmap->dev_priv, value);
+	nfp_map_bpf_byte_swap_record(offmap->dev_priv, value);
 	return nfp_bpf_ctrl_update_entry(offmap, key, value, flags);
 }
 
@@ -410,6 +385,7 @@
 	offmap->dev_priv = nfp_map;
 	nfp_map->offmap = offmap;
 	nfp_map->bpf = bpf;
+	spin_lock_init(&nfp_map->cache_lock);
 
 	res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
 	if (res < 0) {
@@ -432,6 +408,8 @@
 	struct nfp_bpf_map *nfp_map = offmap->dev_priv;
 
 	nfp_bpf_ctrl_free_map(bpf, nfp_map);
+	dev_consume_skb_any(nfp_map->cache);
+	WARN_ON_ONCE(nfp_map->cache_blockers);
 	list_del_init(&nfp_map->l);
 	bpf->map_elems_in_use -= offmap->map.max_entries;
 	bpf->maps_in_use--;
@@ -443,12 +421,6 @@
 int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
 {
 	switch (bpf->command) {
-	case BPF_OFFLOAD_VERIFIER_PREP:
-		return nfp_bpf_verifier_prep(app, nn, bpf);
-	case BPF_OFFLOAD_TRANSLATE:
-		return nfp_bpf_translate(nn, bpf->offload.prog);
-	case BPF_OFFLOAD_DESTROY:
-		return nfp_bpf_destroy(nn, bpf->offload.prog);
 	case BPF_OFFLOAD_MAP_ALLOC:
 		return nfp_bpf_map_alloc(app->priv, bpf->offmap);
 	case BPF_OFFLOAD_MAP_FREE:
@@ -484,7 +456,7 @@
 
 	if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
 		return -EINVAL;
-	if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+	if (cbe->hdr.ver != NFP_CCM_ABI_VERSION)
 		return -EINVAL;
 
 	rcu_read_lock();
@@ -510,14 +482,27 @@
 		 struct netlink_ext_ack *extack)
 {
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
-	unsigned int max_mtu;
+	unsigned int fw_mtu, pkt_off, max_stack, max_prog_len;
 	dma_addr_t dma_addr;
 	void *img;
 	int err;
 
-	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
-	if (max_mtu < nn->dp.netdev->mtu) {
-		NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary");
+	fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+	pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu);
+	if (fw_mtu < pkt_off) {
+		NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary");
+		return -EOPNOTSUPP;
+	}
+
+	max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
+	if (nfp_prog->stack_size > max_stack) {
+		NL_SET_ERR_MSG_MOD(extack, "stack too large");
+		return -EOPNOTSUPP;
+	}
+
+	max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+	if (nfp_prog->prog_len > max_prog_len) {
+		NL_SET_ERR_MSG_MOD(extack, "program too long");
 		return -EOPNOTSUPP;
 	}
 
@@ -609,3 +594,13 @@
 
 	return 0;
 }
+
+const struct bpf_prog_offload_ops nfp_bpf_dev_ops = {
+	.insn_hook	= nfp_verify_insn,
+	.finalize	= nfp_bpf_finalize,
+	.replace_insn	= nfp_bpf_opt_replace_insn,
+	.remove_insns	= nfp_bpf_opt_remove_insns,
+	.prepare	= nfp_bpf_verifier_prep,
+	.translate	= nfp_bpf_translate,
+	.destroy	= nfp_bpf_destroy,
+};

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index a6e9248..e92ee51 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c

@@ -1,43 +1,15 @@
-/*
- * Copyright (C) 2016-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/kernel.h>
+#include <linux/netdevice.h>
 #include <linux/pkt_cls.h>
 
 #include "../nfp_app.h"
 #include "../nfp_main.h"
+#include "../nfp_net.h"
 #include "fw.h"
 #include "main.h"
 
@@ -46,15 +18,15 @@
 
 struct nfp_insn_meta *
 nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-		  unsigned int insn_idx, unsigned int n_insns)
+		  unsigned int insn_idx)
 {
 	unsigned int forward, backward, i;
 
 	backward = meta->n - insn_idx;
 	forward = insn_idx - meta->n;
 
-	if (min(forward, backward) > n_insns - insn_idx - 1) {
-		backward = n_insns - insn_idx - 1;
+	if (min(forward, backward) > nfp_prog->n_insns - insn_idx - 1) {
+		backward = nfp_prog->n_insns - insn_idx - 1;
 		meta = nfp_prog_last_meta(nfp_prog);
 	}
 	if (min(forward, backward) > insn_idx && backward > insn_idx) {
@@ -108,6 +80,46 @@
 	nfp_prog->adjust_head_location = location;
 }
 
+static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
+	const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3;
+	struct bpf_offloaded_map *offmap;
+	struct bpf_func_state *state;
+	struct nfp_bpf_map *nfp_map;
+	int off, i;
+
+	state = env->cur_state->frame[reg3->frameno];
+
+	/* We need to record each time update happens with non-zero words,
+	 * in case such word is used in atomic operations.
+	 * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before.
+	 */
+
+	offmap = map_to_offmap(reg1->map_ptr);
+	nfp_map = offmap->dev_priv;
+	off = reg3->off + reg3->var_off.value;
+
+	for (i = 0; i < offmap->map.value_size; i++) {
+		struct bpf_stack_state *stack_entry;
+		unsigned int soff;
+
+		soff = -(off + i) - 1;
+		stack_entry = &state->stack[soff / BPF_REG_SIZE];
+		if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO)
+			continue;
+
+		if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) {
+			pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n",
+				i, soff);
+			return false;
+		}
+		nfp_map->use_map[i / 4].non_zero_update = 1;
+	}
+
+	return true;
+}
+
 static int
 nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
 		     const struct bpf_reg_state *reg,
@@ -155,8 +167,9 @@
 }
 
 static int
-nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
-		   struct nfp_insn_meta *meta)
+nfp_bpf_check_helper_call(struct nfp_prog *nfp_prog,
+			  struct bpf_verifier_env *env,
+			  struct nfp_insn_meta *meta)
 {
 	const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
 	const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
@@ -198,7 +211,8 @@
 					 bpf->helpers.map_update, reg1) ||
 		    !nfp_bpf_stack_arg_ok("map_update", env, reg2,
 					  meta->func_id ? &meta->arg2 : NULL) ||
-		    !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
+		    !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) ||
+		    !nfp_bpf_map_update_value_ok(env))
 			return -EOPNOTSUPP;
 		break;
 
@@ -333,6 +347,9 @@
 {
 	s32 old_off, new_off;
 
+	if (reg->frameno != env->cur_state->curframe)
+		meta->flags |= FLAG_INSN_PTR_CALLER_STACK_FRAME;
+
 	if (!tnum_is_const(reg->var_off)) {
 		pr_vlog(env, "variable ptr stack access\n");
 		return -EINVAL;
@@ -376,15 +393,22 @@
 			  struct nfp_bpf_map *nfp_map,
 			  unsigned int off, enum nfp_bpf_map_use use)
 {
-	if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED &&
-	    nfp_map->use_map[off / 4] != use) {
+	if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED &&
+	    nfp_map->use_map[off / 4].type != use) {
 		pr_vlog(env, "map value use type conflict %s vs %s off: %u\n",
-			nfp_bpf_map_use_name(nfp_map->use_map[off / 4]),
+			nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type),
 			nfp_bpf_map_use_name(use), off);
 		return -EOPNOTSUPP;
 	}
 
-	nfp_map->use_map[off / 4] = use;
+	if (nfp_map->use_map[off / 4].non_zero_update &&
+	    use == NFP_MAP_USE_ATOMIC_CNT) {
+		pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n",
+			off);
+		return -EOPNOTSUPP;
+	}
+
+	nfp_map->use_map[off / 4].type = use;
 
 	return 0;
 }
@@ -599,13 +623,13 @@
 	return 0;
 }
 
-static int
-nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx,
+		    int prev_insn_idx)
 {
 	struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
 	struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
 
-	meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len);
+	meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx);
 	nfp_prog->verifier_meta = meta;
 
 	if (!nfp_bpf_supported_opcode(meta->insn.code)) {
@@ -620,8 +644,8 @@
 		return -EINVAL;
 	}
 
-	if (meta->insn.code == (BPF_JMP | BPF_CALL))
-		return nfp_bpf_check_call(nfp_prog, env, meta);
+	if (is_mbpf_helper_call(meta))
+		return nfp_bpf_check_helper_call(nfp_prog, env, meta);
 	if (meta->insn.code == (BPF_JMP | BPF_EXIT))
 		return nfp_bpf_check_exit(nfp_prog, env);
 
@@ -640,6 +664,195 @@
 	return 0;
 }
 
-const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
-	.insn_hook = nfp_verify_insn,
-};
+static int
+nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env,
+				struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+	int index = 0;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (nfp_is_subprog_start(meta))
+			index++;
+		meta->subprog_idx = index;
+
+		if (meta->insn.dst_reg >= BPF_REG_6 &&
+		    meta->insn.dst_reg <= BPF_REG_9)
+			nfp_prog->subprog[index].needs_reg_push = 1;
+	}
+
+	if (index + 1 != nfp_prog->subprog_cnt) {
+		pr_vlog(env, "BUG: number of processed BPF functions is not consistent (processed %d, expected %d)\n",
+			index + 1, nfp_prog->subprog_cnt);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static unsigned int nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta = nfp_prog_first_meta(nfp_prog);
+	unsigned int max_depth = 0, depth = 0, frame = 0;
+	struct nfp_insn_meta *ret_insn[MAX_CALL_FRAMES];
+	unsigned short frame_depths[MAX_CALL_FRAMES];
+	unsigned short ret_prog[MAX_CALL_FRAMES];
+	unsigned short idx = meta->subprog_idx;
+
+	/* Inspired from check_max_stack_depth() from kernel verifier.
+	 * Starting from main subprogram, walk all instructions and recursively
+	 * walk all callees that given subprogram can call. Since recursion is
+	 * prevented by the kernel verifier, this algorithm only needs a local
+	 * stack of MAX_CALL_FRAMES to remember callsites.
+	 */
+process_subprog:
+	frame_depths[frame] = nfp_prog->subprog[idx].stack_depth;
+	frame_depths[frame] = round_up(frame_depths[frame], STACK_FRAME_ALIGN);
+	depth += frame_depths[frame];
+	max_depth = max(max_depth, depth);
+
+continue_subprog:
+	for (; meta != nfp_prog_last_meta(nfp_prog) && meta->subprog_idx == idx;
+	     meta = nfp_meta_next(meta)) {
+		if (!is_mbpf_pseudo_call(meta))
+			continue;
+
+		/* We found a call to a subprogram. Remember instruction to
+		 * return to and subprog id.
+		 */
+		ret_insn[frame] = nfp_meta_next(meta);
+		ret_prog[frame] = idx;
+
+		/* Find the callee and start processing it. */
+		meta = nfp_bpf_goto_meta(nfp_prog, meta,
+					 meta->n + 1 + meta->insn.imm);
+		idx = meta->subprog_idx;
+		frame++;
+		goto process_subprog;
+	}
+	/* End of for() loop means the last instruction of the subprog was
+	 * reached. If we popped all stack frames, return; otherwise, go on
+	 * processing remaining instructions from the caller.
+	 */
+	if (frame == 0)
+		return max_depth;
+
+	depth -= frame_depths[frame];
+	frame--;
+	meta = ret_insn[frame];
+	idx = ret_prog[frame];
+	goto continue_subprog;
+}
+
+static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog,
+				   struct bpf_insn_aux_data *aux)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (aux[meta->n].zext_dst)
+			meta->flags |= FLAG_INSN_DO_ZEXT;
+	}
+}
+
+int nfp_bpf_finalize(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *info;
+	struct nfp_prog *nfp_prog;
+	unsigned int max_stack;
+	struct nfp_net *nn;
+	int i;
+
+	nfp_prog = env->prog->aux->offload->dev_priv;
+	nfp_prog->subprog_cnt = env->subprog_cnt;
+	nfp_prog->subprog = kcalloc(nfp_prog->subprog_cnt,
+				    sizeof(nfp_prog->subprog[0]), GFP_KERNEL);
+	if (!nfp_prog->subprog)
+		return -ENOMEM;
+
+	nfp_assign_subprog_idx_and_regs(env, nfp_prog);
+
+	info = env->subprog_info;
+	for (i = 0; i < nfp_prog->subprog_cnt; i++) {
+		nfp_prog->subprog[i].stack_depth = info[i].stack_depth;
+
+		if (i == 0)
+			continue;
+
+		/* Account for size of return address. */
+		nfp_prog->subprog[i].stack_depth += REG_WIDTH;
+		/* Account for size of saved registers, if necessary. */
+		if (nfp_prog->subprog[i].needs_reg_push)
+			nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4;
+	}
+
+	nn = netdev_priv(env->prog->aux->offload->netdev);
+	max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
+	nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog);
+	if (nfp_prog->stack_size > max_stack) {
+		pr_vlog(env, "stack too large: program %dB > FW stack %dB\n",
+			nfp_prog->stack_size, max_stack);
+		return -EOPNOTSUPP;
+	}
+
+	nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data);
+	return 0;
+}
+
+int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off,
+			     struct bpf_insn *insn)
+{
+	struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
+
+	meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx);
+	nfp_prog->verifier_meta = meta;
+
+	/* conditional jump to jump conversion */
+	if (is_mbpf_cond_jump(meta) &&
+	    insn->code == (BPF_JMP | BPF_JA | BPF_K)) {
+		unsigned int tgt_off;
+
+		tgt_off = off + insn->off + 1;
+
+		if (!insn->off) {
+			meta->jmp_dst = list_next_entry(meta, l);
+			meta->jump_neg_op = false;
+		} else if (meta->jmp_dst->n != aux_data[tgt_off].orig_idx) {
+			pr_vlog(env, "branch hard wire at %d changes target %d -> %d\n",
+				off, meta->jmp_dst->n,
+				aux_data[tgt_off].orig_idx);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	pr_vlog(env, "unsupported instruction replacement %hhx -> %hhx\n",
+		meta->insn.code, insn->code);
+	return -EINVAL;
+}
+
+int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
+	unsigned int i;
+
+	meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx);
+
+	for (i = 0; i < cnt; i++) {
+		if (WARN_ON_ONCE(&meta->l == &nfp_prog->insns))
+			return -EINVAL;
+
+		/* doesn't count if it already has the flag */
+		if (meta->flags & FLAG_INSN_SKIP_VERIFIER_OPT)
+			i--;
+
+		meta->flags |= FLAG_INSN_SKIP_VERIFIER_OPT;
+		meta = list_next_entry(meta, l);
+	}
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c
new file mode 100644
index 0000000..71afd11
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm.c

@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#include <linux/bitops.h>
+
+#include "ccm.h"
+#include "nfp_app.h"
+#include "nfp_net.h"
+
+#define ccm_warn(app, msg...)	nn_dp_warn(&(app)->ctrl->dp, msg)
+
+#define NFP_CCM_TAG_ALLOC_SPAN	(U16_MAX / 4)
+
+static bool nfp_ccm_all_tags_busy(struct nfp_ccm *ccm)
+{
+	u16 used_tags;
+
+	used_tags = ccm->tag_alloc_next - ccm->tag_alloc_last;
+
+	return used_tags > NFP_CCM_TAG_ALLOC_SPAN;
+}
+
+static int nfp_ccm_alloc_tag(struct nfp_ccm *ccm)
+{
+	/* CCM is for FW communication which is request-reply.  To make sure
+	 * we don't reuse the message ID too early after timeout - limit the
+	 * number of requests in flight.
+	 */
+	if (unlikely(nfp_ccm_all_tags_busy(ccm))) {
+		ccm_warn(ccm->app, "all FW request contexts busy!\n");
+		return -EAGAIN;
+	}
+
+	WARN_ON(__test_and_set_bit(ccm->tag_alloc_next, ccm->tag_allocator));
+	return ccm->tag_alloc_next++;
+}
+
+static void nfp_ccm_free_tag(struct nfp_ccm *ccm, u16 tag)
+{
+	WARN_ON(!__test_and_clear_bit(tag, ccm->tag_allocator));
+
+	while (!test_bit(ccm->tag_alloc_last, ccm->tag_allocator) &&
+	       ccm->tag_alloc_last != ccm->tag_alloc_next)
+		ccm->tag_alloc_last++;
+}
+
+static struct sk_buff *__nfp_ccm_reply(struct nfp_ccm *ccm, u16 tag)
+{
+	unsigned int msg_tag;
+	struct sk_buff *skb;
+
+	skb_queue_walk(&ccm->replies, skb) {
+		msg_tag = nfp_ccm_get_tag(skb);
+		if (msg_tag == tag) {
+			nfp_ccm_free_tag(ccm, tag);
+			__skb_unlink(skb, &ccm->replies);
+			return skb;
+		}
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *
+nfp_ccm_reply(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+	struct sk_buff *skb;
+
+	nfp_ctrl_lock(app->ctrl);
+	skb = __nfp_ccm_reply(ccm, tag);
+	nfp_ctrl_unlock(app->ctrl);
+
+	return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_reply_drop_tag(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+	struct sk_buff *skb;
+
+	nfp_ctrl_lock(app->ctrl);
+	skb = __nfp_ccm_reply(ccm, tag);
+	if (!skb)
+		nfp_ccm_free_tag(ccm, tag);
+	nfp_ctrl_unlock(app->ctrl);
+
+	return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_wait_reply(struct nfp_ccm *ccm, struct nfp_app *app,
+		   enum nfp_ccm_type type, int tag)
+{
+	struct sk_buff *skb;
+	int i, err;
+
+	for (i = 0; i < 50; i++) {
+		udelay(4);
+		skb = nfp_ccm_reply(ccm, app, tag);
+		if (skb)
+			return skb;
+	}
+
+	err = wait_event_interruptible_timeout(ccm->wq,
+					       skb = nfp_ccm_reply(ccm, app,
+								   tag),
+					       msecs_to_jiffies(5000));
+	/* We didn't get a response - try last time and atomically drop
+	 * the tag even if no response is matched.
+	 */
+	if (!skb)
+		skb = nfp_ccm_reply_drop_tag(ccm, app, tag);
+	if (err < 0) {
+		ccm_warn(app, "%s waiting for response to 0x%02x: %d\n",
+			 err == ERESTARTSYS ? "interrupted" : "error",
+			 type, err);
+		return ERR_PTR(err);
+	}
+	if (!skb) {
+		ccm_warn(app, "timeout waiting for response to 0x%02x\n", type);
+		return ERR_PTR(-ETIMEDOUT);
+	}
+
+	return skb;
+}
+
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+		    enum nfp_ccm_type type, unsigned int reply_size)
+{
+	struct nfp_app *app = ccm->app;
+	struct nfp_ccm_hdr *hdr;
+	int reply_type, tag;
+
+	nfp_ctrl_lock(app->ctrl);
+	tag = nfp_ccm_alloc_tag(ccm);
+	if (tag < 0) {
+		nfp_ctrl_unlock(app->ctrl);
+		dev_kfree_skb_any(skb);
+		return ERR_PTR(tag);
+	}
+
+	hdr = (void *)skb->data;
+	hdr->ver = NFP_CCM_ABI_VERSION;
+	hdr->type = type;
+	hdr->tag = cpu_to_be16(tag);
+
+	__nfp_app_ctrl_tx(app, skb);
+
+	nfp_ctrl_unlock(app->ctrl);
+
+	skb = nfp_ccm_wait_reply(ccm, app, type, tag);
+	if (IS_ERR(skb))
+		return skb;
+
+	reply_type = nfp_ccm_get_type(skb);
+	if (reply_type != __NFP_CCM_REPLY(type)) {
+		ccm_warn(app, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
+			 reply_type, __NFP_CCM_REPLY(type));
+		goto err_free;
+	}
+	/* 0 reply_size means caller will do the validation */
+	if (reply_size && skb->len != reply_size) {
+		ccm_warn(app, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
+			 type, skb->len, reply_size);
+		goto err_free;
+	}
+
+	return skb;
+err_free:
+	dev_kfree_skb_any(skb);
+	return ERR_PTR(-EIO);
+}
+
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb)
+{
+	struct nfp_app *app = ccm->app;
+	unsigned int tag;
+
+	if (unlikely(skb->len < sizeof(struct nfp_ccm_hdr))) {
+		ccm_warn(app, "cmsg drop - too short %d!\n", skb->len);
+		goto err_free;
+	}
+
+	nfp_ctrl_lock(app->ctrl);
+
+	tag = nfp_ccm_get_tag(skb);
+	if (unlikely(!test_bit(tag, ccm->tag_allocator))) {
+		ccm_warn(app, "cmsg drop - no one is waiting for tag %u!\n",
+			 tag);
+		goto err_unlock;
+	}
+
+	__skb_queue_tail(&ccm->replies, skb);
+	wake_up_interruptible_all(&ccm->wq);
+
+	nfp_ctrl_unlock(app->ctrl);
+	return;
+
+err_unlock:
+	nfp_ctrl_unlock(app->ctrl);
+err_free:
+	dev_kfree_skb_any(skb);
+}
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app)
+{
+	ccm->app = app;
+	skb_queue_head_init(&ccm->replies);
+	init_waitqueue_head(&ccm->wq);
+	return 0;
+}
+
+void nfp_ccm_clean(struct nfp_ccm *ccm)
+{
+	WARN_ON(!skb_queue_empty(&ccm->replies));
+}

diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
new file mode 100644
index 0000000..a460c75
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h

@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CCM_H
+#define NFP_CCM_H 1
+
+#include <linux/bitmap.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+
+struct nfp_app;
+struct nfp_net;
+
+/* Firmware ABI */
+
+enum nfp_ccm_type {
+	NFP_CCM_TYPE_BPF_MAP_ALLOC	= 1,
+	NFP_CCM_TYPE_BPF_MAP_FREE	= 2,
+	NFP_CCM_TYPE_BPF_MAP_LOOKUP	= 3,
+	NFP_CCM_TYPE_BPF_MAP_UPDATE	= 4,
+	NFP_CCM_TYPE_BPF_MAP_DELETE	= 5,
+	NFP_CCM_TYPE_BPF_MAP_GETNEXT	= 6,
+	NFP_CCM_TYPE_BPF_MAP_GETFIRST	= 7,
+	NFP_CCM_TYPE_BPF_BPF_EVENT	= 8,
+	NFP_CCM_TYPE_CRYPTO_RESET	= 9,
+	NFP_CCM_TYPE_CRYPTO_ADD		= 10,
+	NFP_CCM_TYPE_CRYPTO_DEL		= 11,
+	NFP_CCM_TYPE_CRYPTO_UPDATE	= 12,
+	__NFP_CCM_TYPE_MAX,
+};
+
+#define NFP_CCM_ABI_VERSION		1
+
+#define NFP_CCM_TYPE_REPLY_BIT		7
+#define __NFP_CCM_REPLY(req)		(BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
+
+struct nfp_ccm_hdr {
+	union {
+		struct {
+			u8 type;
+			u8 ver;
+			__be16 tag;
+		};
+		__be32 raw;
+	};
+};
+
+static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
+{
+	struct nfp_ccm_hdr *hdr;
+
+	hdr = (struct nfp_ccm_hdr *)skb->data;
+
+	return hdr->type;
+}
+
+static inline __be16 __nfp_ccm_get_tag(struct sk_buff *skb)
+{
+	struct nfp_ccm_hdr *hdr;
+
+	hdr = (struct nfp_ccm_hdr *)skb->data;
+
+	return hdr->tag;
+}
+
+static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+{
+	return be16_to_cpu(__nfp_ccm_get_tag(skb));
+}
+
+#define NFP_NET_MBOX_TLV_TYPE		GENMASK(31, 16)
+#define NFP_NET_MBOX_TLV_LEN		GENMASK(15, 0)
+
+enum nfp_ccm_mbox_tlv_type {
+	NFP_NET_MBOX_TLV_TYPE_UNKNOWN	= 0,
+	NFP_NET_MBOX_TLV_TYPE_END	= 1,
+	NFP_NET_MBOX_TLV_TYPE_MSG	= 2,
+	NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP	= 3,
+	NFP_NET_MBOX_TLV_TYPE_RESV	= 4,
+};
+
+/* Implementation */
+
+/**
+ * struct nfp_ccm - common control message handling
+ * @app:		APP handle
+ *
+ * @tag_allocator:	bitmap of control message tags in use
+ * @tag_alloc_next:	next tag bit to allocate
+ * @tag_alloc_last:	next tag bit to be freed
+ *
+ * @replies:		received cmsg replies waiting to be consumed
+ * @wq:			work queue for waiting for cmsg replies
+ */
+struct nfp_ccm {
+	struct nfp_app *app;
+
+	DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
+	u16 tag_alloc_next;
+	u16 tag_alloc_last;
+
+	struct sk_buff_head replies;
+	wait_queue_head_t wq;
+};
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app);
+void nfp_ccm_clean(struct nfp_ccm *ccm);
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb);
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+		    enum nfp_ccm_type type, unsigned int reply_size);
+
+int nfp_ccm_mbox_alloc(struct nfp_net *nn);
+void nfp_ccm_mbox_free(struct nfp_net *nn);
+int nfp_ccm_mbox_init(struct nfp_net *nn);
+void nfp_ccm_mbox_clean(struct nfp_net *nn);
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size);
+struct sk_buff *
+nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size,
+		       unsigned int reply_size, gfp_t flags);
+int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			       enum nfp_ccm_type type,
+			       unsigned int reply_size,
+			       unsigned int max_reply_size, bool critical);
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size);
+int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb,
+		      enum nfp_ccm_type type, unsigned int max_reply_size);
+#endif

diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
new file mode 100644
index 0000000..f0783aa
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c

@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/skbuff.h>
+
+#include "ccm.h"
+#include "nfp_net.h"
+
+/* CCM messages via the mailbox.  CMSGs get wrapped into simple TLVs
+ * and copied into the mailbox.  Multiple messages can be copied to
+ * form a batch.  Threads come in with CMSG formed in an skb, then
+ * enqueue that skb onto the request queue.  If threads skb is first
+ * in queue this thread will handle the mailbox operation.  It copies
+ * up to 64 messages into the mailbox (making sure that both requests
+ * and replies will fit.  After FW is done processing the batch it
+ * copies the data out and wakes waiting threads.
+ * If a thread is waiting it either gets its the message completed
+ * (response is copied into the same skb as the request, overwriting
+ * it), or becomes the first in queue.
+ * Completions and next-to-run are signaled via the control buffer
+ * to limit potential cache line bounces.
+ */
+
+#define NFP_CCM_MBOX_BATCH_LIMIT	64
+#define NFP_CCM_TIMEOUT			(NFP_NET_POLL_TIMEOUT * 1000)
+#define NFP_CCM_MAX_QLEN		1024
+
+enum nfp_net_mbox_cmsg_state {
+	NFP_NET_MBOX_CMSG_STATE_QUEUED,
+	NFP_NET_MBOX_CMSG_STATE_NEXT,
+	NFP_NET_MBOX_CMSG_STATE_BUSY,
+	NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND,
+	NFP_NET_MBOX_CMSG_STATE_DONE,
+};
+
+/**
+ * struct nfp_ccm_mbox_skb_cb - CCM mailbox specific info
+ * @state:	processing state (/stage) of the message
+ * @err:	error encountered during processing if any
+ * @max_len:	max(request_len, reply_len)
+ * @exp_reply:	expected reply length (0 means don't validate)
+ * @posted:	the message was posted and nobody waits for the reply
+ */
+struct nfp_ccm_mbox_cmsg_cb {
+	enum nfp_net_mbox_cmsg_state state;
+	int err;
+	unsigned int max_len;
+	unsigned int exp_reply;
+	bool posted;
+};
+
+static u32 nfp_ccm_mbox_max_msg(struct nfp_net *nn)
+{
+	return round_down(nn->tlv_caps.mbox_len, 4) -
+		NFP_NET_CFG_MBOX_SIMPLE_VAL - /* common mbox command header */
+		4 * 2; /* Msg TLV plus End TLV headers */
+}
+
+static void
+nfp_ccm_mbox_msg_init(struct sk_buff *skb, unsigned int exp_reply, int max_len)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_QUEUED;
+	cb->err = 0;
+	cb->max_len = max_len;
+	cb->exp_reply = exp_reply;
+	cb->posted = false;
+}
+
+static int nfp_ccm_mbox_maxlen(const struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->max_len;
+}
+
+static bool nfp_ccm_mbox_done(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_DONE;
+}
+
+static bool nfp_ccm_mbox_in_progress(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state != NFP_NET_MBOX_CMSG_STATE_QUEUED &&
+	       cb->state != NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_set_busy(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->state = NFP_NET_MBOX_CMSG_STATE_BUSY;
+}
+
+static bool nfp_ccm_mbox_is_posted(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->posted;
+}
+
+static void nfp_ccm_mbox_mark_posted(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	cb->posted = true;
+}
+
+static bool nfp_ccm_mbox_is_first(struct nfp_net *nn, struct sk_buff *skb)
+{
+	return skb_queue_is_first(&nn->mbox_cmsg.queue, skb);
+}
+
+static bool nfp_ccm_mbox_should_run(struct nfp_net *nn, struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	return cb->state == NFP_NET_MBOX_CMSG_STATE_NEXT;
+}
+
+static void nfp_ccm_mbox_mark_next_runner(struct nfp_net *nn)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	skb = skb_peek(&nn->mbox_cmsg.queue);
+	if (!skb)
+		return;
+
+	cb = (void *)skb->cb;
+	cb->state = NFP_NET_MBOX_CMSG_STATE_NEXT;
+	if (cb->posted)
+		queue_work(nn->mbox_cmsg.workq, &nn->mbox_cmsg.runq_work);
+}
+
+static void
+nfp_ccm_mbox_write_tlv(struct nfp_net *nn, u32 off, u32 type, u32 len)
+{
+	nn_writel(nn, off,
+		  FIELD_PREP(NFP_NET_MBOX_TLV_TYPE, type) |
+		  FIELD_PREP(NFP_NET_MBOX_TLV_LEN, len));
+}
+
+static void nfp_ccm_mbox_copy_in(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+	int reserve, i, cnt;
+	__be32 *data;
+	u32 off, len;
+
+	off = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_MSG,
+				       skb->len);
+		off += 4;
+
+		/* Write data word by word, skb->data should be aligned */
+		data = (__be32 *)skb->data;
+		cnt = skb->len / 4;
+		for (i = 0 ; i < cnt; i++) {
+			nn_writel(nn, off, be32_to_cpu(data[i]));
+			off += 4;
+		}
+		if (skb->len & 3) {
+			__be32 tmp = 0;
+
+			memcpy(&tmp, &data[i], skb->len & 3);
+			nn_writel(nn, off, be32_to_cpu(tmp));
+			off += 4;
+		}
+
+		/* Reserve space if reply is bigger */
+		len = round_up(skb->len, 4);
+		reserve = nfp_ccm_mbox_maxlen(skb) - len;
+		if (reserve > 0) {
+			nfp_ccm_mbox_write_tlv(nn, off,
+					       NFP_NET_MBOX_TLV_TYPE_RESV,
+					       reserve);
+			off += 4 + reserve;
+		}
+
+		if (skb == last)
+			break;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+
+	nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_END, 0);
+}
+
+static struct sk_buff *
+nfp_ccm_mbox_find_req(struct nfp_net *nn, __be16 tag, struct sk_buff *last)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	while (true) {
+		if (__nfp_ccm_get_tag(skb) == tag)
+			return skb;
+
+		if (skb == last)
+			return NULL;
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, skb);
+	}
+}
+
+static void nfp_ccm_mbox_copy_out(struct nfp_net *nn, struct sk_buff *last)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	u8 __iomem *data, *end;
+	struct sk_buff *skb;
+
+	data = nn->dp.ctrl_bar + nn->tlv_caps.mbox_off +
+		NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	end = data + nn->tlv_caps.mbox_len;
+
+	while (true) {
+		unsigned int length, offset, type;
+		struct nfp_ccm_hdr hdr;
+		u32 tlv_hdr;
+
+		tlv_hdr = readl(data);
+		type = FIELD_GET(NFP_NET_MBOX_TLV_TYPE, tlv_hdr);
+		length = FIELD_GET(NFP_NET_MBOX_TLV_LEN, tlv_hdr);
+		offset = data - nn->dp.ctrl_bar;
+
+		/* Advance past the header */
+		data += 4;
+
+		if (data + length > end) {
+			nn_dp_warn(&nn->dp, "mailbox oversized TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_END)
+			break;
+		if (type == NFP_NET_MBOX_TLV_TYPE_RESV)
+			goto next_tlv;
+		if (type != NFP_NET_MBOX_TLV_TYPE_MSG &&
+		    type != NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp, "mailbox unknown TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		if (length < 4) {
+			nn_dp_warn(&nn->dp, "mailbox msg too short to contain header TLV type:%d offset:%u len:%u\n",
+				   type, offset, length);
+			break;
+		}
+
+		hdr.raw = cpu_to_be32(readl(data));
+
+		skb = nfp_ccm_mbox_find_req(nn, hdr.tag, last);
+		if (!skb) {
+			nn_dp_warn(&nn->dp, "mailbox request not found:%u\n",
+				   be16_to_cpu(hdr.tag));
+			break;
+		}
+		cb = (void *)skb->cb;
+
+		if (type == NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) {
+			nn_dp_warn(&nn->dp,
+				   "mailbox msg not supported type:%d\n",
+				   nfp_ccm_get_type(skb));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (hdr.type != __NFP_CCM_REPLY(nfp_ccm_get_type(skb))) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong type:%u expected:%lu\n",
+				   hdr.type,
+				   __NFP_CCM_REPLY(nfp_ccm_get_type(skb)));
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (cb->exp_reply && length != cb->exp_reply) {
+			nn_dp_warn(&nn->dp, "mailbox msg reply wrong size type:%u expected:%u have:%u\n",
+				   hdr.type, length, cb->exp_reply);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+		if (length > cb->max_len) {
+			nn_dp_warn(&nn->dp, "mailbox msg oversized reply type:%u max:%u have:%u\n",
+				   hdr.type, cb->max_len, length);
+			cb->err = -EIO;
+			goto next_tlv;
+		}
+
+		if (!cb->posted) {
+			__be32 *skb_data;
+			int i, cnt;
+
+			if (length <= skb->len)
+				__skb_trim(skb, length);
+			else
+				skb_put(skb, length - skb->len);
+
+			/* We overcopy here slightly, but that's okay,
+			 * the skb is large enough, and the garbage will
+			 * be ignored (beyond skb->len).
+			 */
+			skb_data = (__be32 *)skb->data;
+			memcpy(skb_data, &hdr, 4);
+
+			cnt = DIV_ROUND_UP(length, 4);
+			for (i = 1 ; i < cnt; i++)
+				skb_data[i] = cpu_to_be32(readl(data + i * 4));
+		}
+
+		cb->state = NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND;
+next_tlv:
+		data += round_up(length, 4);
+		if (data + 4 > end) {
+			nn_dp_warn(&nn->dp,
+				   "reached end of MBOX without END TLV\n");
+			break;
+		}
+	}
+
+	smp_wmb(); /* order the skb->data vs. cb->state */
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		if (cb->state != NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND) {
+			cb->err = -ENOENT;
+			smp_wmb(); /* order the cb->err vs. cb->state */
+		}
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+
+		if (cb->posted) {
+			if (cb->err)
+				nn_dp_warn(&nn->dp,
+					   "mailbox posted msg failed type:%u err:%d\n",
+					   nfp_ccm_get_type(skb), cb->err);
+			dev_consume_skb_any(skb);
+		}
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void
+nfp_ccm_mbox_mark_all_err(struct nfp_net *nn, struct sk_buff *last, int err)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb;
+	struct sk_buff *skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+	do {
+		skb = __skb_dequeue(&nn->mbox_cmsg.queue);
+		cb = (void *)skb->cb;
+
+		cb->err = err;
+		smp_wmb(); /* order the cb->err vs. cb->state */
+		cb->state = NFP_NET_MBOX_CMSG_STATE_DONE;
+	} while (skb != last);
+
+	nfp_ccm_mbox_mark_next_runner(nn);
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+}
+
+static void nfp_ccm_mbox_run_queue_unlock(struct nfp_net *nn)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	int space = nn->tlv_caps.mbox_len - NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	struct sk_buff *skb, *last;
+	int cnt, err;
+
+	space -= 4; /* for End TLV */
+
+	/* First skb must fit, because it's ours and we checked it fits */
+	cnt = 1;
+	last = skb = __skb_peek(&nn->mbox_cmsg.queue);
+	space -= 4 + nfp_ccm_mbox_maxlen(skb);
+
+	while (!skb_queue_is_last(&nn->mbox_cmsg.queue, last)) {
+		skb = skb_queue_next(&nn->mbox_cmsg.queue, last);
+		space -= 4 + nfp_ccm_mbox_maxlen(skb);
+		if (space < 0)
+			break;
+		last = skb;
+		nfp_ccm_mbox_set_busy(skb);
+		cnt++;
+		if (cnt == NFP_CCM_MBOX_BATCH_LIMIT)
+			break;
+	}
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	/* Now we own all skb's marked in progress, new requests may arrive
+	 * at the end of the queue.
+	 */
+
+	nn_ctrl_bar_lock(nn);
+
+	nfp_ccm_mbox_copy_in(nn, last);
+
+	err = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_TLV_CMSG);
+	if (!err)
+		nfp_ccm_mbox_copy_out(nn, last);
+	else
+		nfp_ccm_mbox_mark_all_err(nn, last, -EIO);
+
+	nn_ctrl_bar_unlock(nn);
+
+	wake_up_all(&nn->mbox_cmsg.wq);
+}
+
+static int nfp_ccm_mbox_skb_return(struct sk_buff *skb)
+{
+	struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb;
+
+	if (cb->err)
+		dev_kfree_skb_any(skb);
+	return cb->err;
+}
+
+/* If wait timed out but the command is already in progress we have
+ * to wait until it finishes.  Runners has ownership of the skbs marked
+ * as busy.
+ */
+static int
+nfp_ccm_mbox_unlink_unlock(struct nfp_net *nn, struct sk_buff *skb,
+			   enum nfp_ccm_type type)
+	__releases(&nn->mbox_cmsg.queue.lock)
+{
+	bool was_first;
+
+	if (nfp_ccm_mbox_in_progress(skb)) {
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		wait_event(nn->mbox_cmsg.wq, nfp_ccm_mbox_done(skb));
+		smp_rmb(); /* pairs with smp_wmb() after data is written */
+		return nfp_ccm_mbox_skb_return(skb);
+	}
+
+	was_first = nfp_ccm_mbox_should_run(nn, skb);
+	__skb_unlink(skb, &nn->mbox_cmsg.queue);
+	if (was_first)
+		nfp_ccm_mbox_mark_next_runner(nn);
+
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	if (was_first)
+		wake_up_all(&nn->mbox_cmsg.wq);
+
+	nn_dp_warn(&nn->dp, "time out waiting for mbox response to 0x%02x\n",
+		   type);
+	return -ETIMEDOUT;
+}
+
+static int
+nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type,
+			 unsigned int reply_size, unsigned int max_reply_size,
+			 gfp_t flags)
+{
+	const unsigned int mbox_max = nfp_ccm_mbox_max_msg(nn);
+	unsigned int max_len;
+	ssize_t undersize;
+	int err;
+
+	if (unlikely(!(nn->tlv_caps.mbox_cmsg_types & BIT(type)))) {
+		nn_dp_warn(&nn->dp,
+			   "message type %d not supported by mailbox\n", type);
+		return -EINVAL;
+	}
+
+	/* If the reply size is unknown assume it will take the entire
+	 * mailbox, the callers should do their best for this to never
+	 * happen.
+	 */
+	if (!max_reply_size)
+		max_reply_size = mbox_max;
+	max_reply_size = round_up(max_reply_size, 4);
+
+	/* Make sure we can fit the entire reply into the skb,
+	 * and that we don't have to slow down the mbox handler
+	 * with allocations.
+	 */
+	undersize = max_reply_size - (skb_end_pointer(skb) - skb->data);
+	if (undersize > 0) {
+		err = pskb_expand_head(skb, 0, undersize, flags);
+		if (err) {
+			nn_dp_warn(&nn->dp,
+				   "can't allocate reply buffer for mailbox\n");
+			return err;
+		}
+	}
+
+	/* Make sure that request and response both fit into the mailbox */
+	max_len = max(max_reply_size, round_up(skb->len, 4));
+	if (max_len > mbox_max) {
+		nn_dp_warn(&nn->dp,
+			   "message too big for tha mailbox: %u/%u vs %u\n",
+			   skb->len, max_reply_size, mbox_max);
+		return -EMSGSIZE;
+	}
+
+	nfp_ccm_mbox_msg_init(skb, reply_size, max_len);
+
+	return 0;
+}
+
+static int
+nfp_ccm_mbox_msg_enqueue(struct nfp_net *nn, struct sk_buff *skb,
+			 enum nfp_ccm_type type, bool critical)
+{
+	struct nfp_ccm_hdr *hdr;
+
+	assert_spin_locked(&nn->mbox_cmsg.queue.lock);
+
+	if (!critical && nn->mbox_cmsg.queue.qlen >= NFP_CCM_MAX_QLEN) {
+		nn_dp_warn(&nn->dp, "mailbox request queue too long\n");
+		return -EBUSY;
+	}
+
+	hdr = (void *)skb->data;
+	hdr->ver = NFP_CCM_ABI_VERSION;
+	hdr->type = type;
+	hdr->tag = cpu_to_be16(nn->mbox_cmsg.tag++);
+
+	__skb_queue_tail(&nn->mbox_cmsg.queue, skb);
+
+	return 0;
+}
+
+int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			       enum nfp_ccm_type type,
+			       unsigned int reply_size,
+			       unsigned int max_reply_size, bool critical)
+{
+	int err;
+
+	err = nfp_ccm_mbox_msg_prepare(nn, skb, type, reply_size,
+				       max_reply_size, GFP_KERNEL);
+	if (err)
+		goto err_free_skb;
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, critical);
+	if (err)
+		goto err_unlock;
+
+	/* First in queue takes the mailbox lock and processes the batch */
+	if (!nfp_ccm_mbox_is_first(nn, skb)) {
+		bool to;
+
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+		to = !wait_event_timeout(nn->mbox_cmsg.wq,
+					 nfp_ccm_mbox_done(skb) ||
+					 nfp_ccm_mbox_should_run(nn, skb),
+					 msecs_to_jiffies(NFP_CCM_TIMEOUT));
+
+		/* fast path for those completed by another thread */
+		if (nfp_ccm_mbox_done(skb)) {
+			smp_rmb(); /* pairs with wmb after data is written */
+			return nfp_ccm_mbox_skb_return(skb);
+		}
+
+		spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+		if (!nfp_ccm_mbox_is_first(nn, skb)) {
+			WARN_ON(!to);
+
+			err = nfp_ccm_mbox_unlink_unlock(nn, skb, type);
+			if (err)
+				goto err_free_skb;
+			return 0;
+		}
+	}
+
+	/* run queue expects the lock held */
+	nfp_ccm_mbox_run_queue_unlock(nn);
+	return nfp_ccm_mbox_skb_return(skb);
+
+err_unlock:
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	return err;
+}
+
+int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb,
+			     enum nfp_ccm_type type,
+			     unsigned int reply_size,
+			     unsigned int max_reply_size)
+{
+	return __nfp_ccm_mbox_communicate(nn, skb, type, reply_size,
+					  max_reply_size, false);
+}
+
+static void nfp_ccm_mbox_post_runq_work(struct work_struct *work)
+{
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+
+	nn = container_of(work, struct nfp_net, mbox_cmsg.runq_work);
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	skb = __skb_peek(&nn->mbox_cmsg.queue);
+	if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb) ||
+		    !nfp_ccm_mbox_should_run(nn, skb))) {
+		spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+		return;
+	}
+
+	nfp_ccm_mbox_run_queue_unlock(nn);
+}
+
+static void nfp_ccm_mbox_post_wait_work(struct work_struct *work)
+{
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+	int err;
+
+	nn = container_of(work, struct nfp_net, mbox_cmsg.wait_work);
+
+	skb = skb_peek(&nn->mbox_cmsg.queue);
+	if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb)))
+		/* Should never happen so it's unclear what to do here.. */
+		goto exit_unlock_wake;
+
+	err = nfp_net_mbox_reconfig_wait_posted(nn);
+	if (!err)
+		nfp_ccm_mbox_copy_out(nn, skb);
+	else
+		nfp_ccm_mbox_mark_all_err(nn, skb, -EIO);
+exit_unlock_wake:
+	nn_ctrl_bar_unlock(nn);
+	wake_up_all(&nn->mbox_cmsg.wq);
+}
+
+int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb,
+		      enum nfp_ccm_type type, unsigned int max_reply_size)
+{
+	int err;
+
+	err = nfp_ccm_mbox_msg_prepare(nn, skb, type, 0, max_reply_size,
+				       GFP_ATOMIC);
+	if (err)
+		goto err_free_skb;
+
+	nfp_ccm_mbox_mark_posted(skb);
+
+	spin_lock_bh(&nn->mbox_cmsg.queue.lock);
+
+	err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, false);
+	if (err)
+		goto err_unlock;
+
+	if (nfp_ccm_mbox_is_first(nn, skb)) {
+		if (nn_ctrl_bar_trylock(nn)) {
+			nfp_ccm_mbox_copy_in(nn, skb);
+			nfp_net_mbox_reconfig_post(nn,
+						   NFP_NET_CFG_MBOX_CMD_TLV_CMSG);
+			queue_work(nn->mbox_cmsg.workq,
+				   &nn->mbox_cmsg.wait_work);
+		} else {
+			nfp_ccm_mbox_mark_next_runner(nn);
+		}
+	}
+
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+
+	return 0;
+
+err_unlock:
+	spin_unlock_bh(&nn->mbox_cmsg.queue.lock);
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	return err;
+}
+
+struct sk_buff *
+nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size,
+		       unsigned int reply_size, gfp_t flags)
+{
+	unsigned int max_size;
+	struct sk_buff *skb;
+
+	if (!reply_size)
+		max_size = nfp_ccm_mbox_max_msg(nn);
+	else
+		max_size = max(req_size, reply_size);
+	max_size = round_up(max_size, 4);
+
+	skb = alloc_skb(max_size, flags);
+	if (!skb)
+		return NULL;
+
+	skb_put(skb, req_size);
+
+	return skb;
+}
+
+bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size)
+{
+	return nfp_ccm_mbox_max_msg(nn) >= size;
+}
+
+int nfp_ccm_mbox_init(struct nfp_net *nn)
+{
+	return 0;
+}
+
+void nfp_ccm_mbox_clean(struct nfp_net *nn)
+{
+	drain_workqueue(nn->mbox_cmsg.workq);
+}
+
+int nfp_ccm_mbox_alloc(struct nfp_net *nn)
+{
+	skb_queue_head_init(&nn->mbox_cmsg.queue);
+	init_waitqueue_head(&nn->mbox_cmsg.wq);
+	INIT_WORK(&nn->mbox_cmsg.wait_work, nfp_ccm_mbox_post_wait_work);
+	INIT_WORK(&nn->mbox_cmsg.runq_work, nfp_ccm_mbox_post_runq_work);
+
+	nn->mbox_cmsg.workq = alloc_workqueue("nfp-ccm-mbox", WQ_UNBOUND, 0);
+	if (!nn->mbox_cmsg.workq)
+		return -ENOMEM;
+	return 0;
+}
+
+void nfp_ccm_mbox_free(struct nfp_net *nn)
+{
+	destroy_workqueue(nn->mbox_cmsg.workq);
+	WARN_ON(!skb_queue_empty(&nn->mbox_cmsg.queue));
+}

diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
new file mode 100644
index 0000000..60372dd
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h

@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_H
+#define NFP_CRYPTO_H 1
+
+struct nfp_net_tls_offload_ctx {
+	__be32 fw_handle[2];
+
+	u8 rx_end[0];
+	/* Tx only fields follow - Rx side does not have enough driver state
+	 * to fit these
+	 */
+
+	u32 next_seq;
+};
+
+#ifdef CONFIG_TLS_DEVICE
+int nfp_net_tls_init(struct nfp_net *nn);
+#else
+static inline int nfp_net_tls_init(struct nfp_net *nn)
+{
+	return 0;
+}
+#endif
+
+#endif

diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
new file mode 100644
index 0000000..67413d9
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h

@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CRYPTO_FW_H
+#define NFP_CRYPTO_FW_H 1
+
+#include "../ccm.h"
+
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC	0
+#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC	1
+
+struct nfp_crypto_reply_simple {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+};
+
+struct nfp_crypto_req_reset {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+};
+
+#define NFP_NET_TLS_IPVER		GENMASK(15, 12)
+#define NFP_NET_TLS_VLAN		GENMASK(11, 0)
+#define NFP_NET_TLS_VLAN_UNUSED			4095
+
+struct nfp_crypto_req_add_front {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	u8 key_len;
+	__be16 ipver_vlan __packed;
+	u8 l4_proto;
+#define NFP_NET_TLS_NON_ADDR_KEY_LEN	8
+	u8 l3_addrs[0];
+};
+
+struct nfp_crypto_req_add_back {
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 key[8];
+	__be32 salt;
+	__be32 iv[2];
+	__be32 counter;
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+
+struct nfp_crypto_req_add_v4 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip;
+	__be32 dst_ip;
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_req_add_v6 {
+	struct nfp_crypto_req_add_front front;
+	__be32 src_ip[4];
+	__be32 dst_ip[4];
+	struct nfp_crypto_req_add_back back;
+};
+
+struct nfp_crypto_reply_add {
+	struct nfp_ccm_hdr hdr;
+	__be32 error;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_del {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	__be32 handle[2];
+};
+
+struct nfp_crypto_req_update {
+	struct nfp_ccm_hdr hdr;
+	__be32 ep_id;
+	u8 resv[3];
+	u8 opcode;
+	__be32 handle[2];
+	__be32 rec_no[2];
+	__be32 tcp_seq;
+};
+#endif

diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
new file mode 100644
index 0000000..96a96b3
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c

@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitfield.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <net/tls.h>
+
+#include "../ccm.h"
+#include "../nfp_net.h"
+#include "crypto.h"
+#include "fw.h"
+
+#define NFP_NET_TLS_CCM_MBOX_OPS_MASK		\
+	(BIT(NFP_CCM_TYPE_CRYPTO_RESET) |	\
+	 BIT(NFP_CCM_TYPE_CRYPTO_ADD) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_DEL) |		\
+	 BIT(NFP_CCM_TYPE_CRYPTO_UPDATE))
+
+#define NFP_NET_TLS_OPCODE_MASK_RX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC)
+
+#define NFP_NET_TLS_OPCODE_MASK_TX			\
+	BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC)
+
+#define NFP_NET_TLS_OPCODE_MASK						\
+	(NFP_NET_TLS_OPCODE_MASK_RX | NFP_NET_TLS_OPCODE_MASK_TX)
+
+static void nfp_net_crypto_set_op(struct nfp_net *nn, u8 opcode, bool on)
+{
+	u32 off, val;
+
+	off = nn->tlv_caps.crypto_enable_off + round_down(opcode / 8, 4);
+
+	val = nn_readl(nn, off);
+	if (on)
+		val |= BIT(opcode & 31);
+	else
+		val &= ~BIT(opcode & 31);
+	nn_writel(nn, off, val);
+}
+
+static bool
+__nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			       enum tls_offload_ctx_dir direction)
+{
+	u8 opcode;
+	int cnt;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+		nn->ktls_tx_conn_cnt += add;
+		cnt = nn->ktls_tx_conn_cnt;
+		nn->dp.ktls_tx = !!nn->ktls_tx_conn_cnt;
+	} else {
+		opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+		nn->ktls_rx_conn_cnt += add;
+		cnt = nn->ktls_rx_conn_cnt;
+	}
+
+	/* Care only about 0 -> 1 and 1 -> 0 transitions */
+	if (cnt > 1)
+		return false;
+
+	nfp_net_crypto_set_op(nn, opcode, cnt);
+	return true;
+}
+
+static int
+nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add,
+			     enum tls_offload_ctx_dir direction)
+{
+	int ret = 0;
+
+	/* Use the BAR lock to protect the connection counts */
+	nn_ctrl_bar_lock(nn);
+	if (__nfp_net_tls_conn_cnt_changed(nn, add, direction)) {
+		ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+		/* Undo the cnt adjustment if failed */
+		if (ret)
+			__nfp_net_tls_conn_cnt_changed(nn, -add, direction);
+	}
+	nn_ctrl_bar_unlock(nn);
+
+	return ret;
+}
+
+static int
+nfp_net_tls_conn_add(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, 1, direction);
+}
+
+static int
+nfp_net_tls_conn_remove(struct nfp_net *nn, enum tls_offload_ctx_dir direction)
+{
+	return nfp_net_tls_conn_cnt_changed(nn, -1, direction);
+}
+
+static struct sk_buff *
+nfp_net_tls_alloc_simple(struct nfp_net *nn, size_t req_sz, gfp_t flags)
+{
+	return nfp_ccm_mbox_msg_alloc(nn, req_sz,
+				      sizeof(struct nfp_crypto_reply_simple),
+				      flags);
+}
+
+static int
+nfp_net_tls_communicate_simple(struct nfp_net *nn, struct sk_buff *skb,
+			       const char *name, enum nfp_ccm_type type)
+{
+	struct nfp_crypto_reply_simple *reply;
+	int err;
+
+	err = __nfp_ccm_mbox_communicate(nn, skb, type,
+					 sizeof(*reply), sizeof(*reply),
+					 type == NFP_CCM_TYPE_CRYPTO_DEL);
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to %s TLS: %d\n", name, err);
+		return err;
+	}
+
+	reply = (void *)skb->data;
+	err = -be32_to_cpu(reply->error);
+	if (err)
+		nn_dp_warn(&nn->dp, "failed to %s TLS, fw replied: %d\n",
+			   name, err);
+	dev_consume_skb_any(skb);
+
+	return err;
+}
+
+static void nfp_net_tls_del_fw(struct nfp_net *nn, __be32 *fw_handle)
+{
+	struct nfp_crypto_req_del *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+	memcpy(req->handle, fw_handle, sizeof(req->handle));
+
+	nfp_net_tls_communicate_simple(nn, skb, "delete",
+				       NFP_CCM_TYPE_CRYPTO_DEL);
+}
+
+static void
+nfp_net_tls_set_ipver_vlan(struct nfp_crypto_req_add_front *front, u8 ipver)
+{
+	front->ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, ipver) |
+					FIELD_PREP(NFP_NET_TLS_VLAN,
+						   NFP_NET_TLS_VLAN_UNUSED));
+}
+
+static void
+nfp_net_tls_assign_conn_id(struct nfp_net *nn,
+			   struct nfp_crypto_req_add_front *front)
+{
+	u32 len;
+	u64 id;
+
+	id = atomic64_inc_return(&nn->ktls_conn_id_gen);
+	len = front->key_len - NFP_NET_TLS_NON_ADDR_KEY_LEN;
+
+	memcpy(front->l3_addrs, &id, sizeof(id));
+	memset(front->l3_addrs + sizeof(id), 0, len - sizeof(id));
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv4(struct nfp_net *nn, struct nfp_crypto_req_add_v4 *req,
+		     struct sock *sk, int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	req->front.key_len += sizeof(__be32) * 2;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		nfp_net_tls_assign_conn_id(nn, &req->front);
+	} else {
+		req->src_ip = inet->inet_daddr;
+		req->dst_ip = inet->inet_saddr;
+	}
+
+	return &req->back;
+}
+
+static struct nfp_crypto_req_add_back *
+nfp_net_tls_set_ipv6(struct nfp_net *nn, struct nfp_crypto_req_add_v6 *req,
+		     struct sock *sk, int direction)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	req->front.key_len += sizeof(struct in6_addr) * 2;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		nfp_net_tls_assign_conn_id(nn, &req->front);
+	} else {
+		memcpy(req->src_ip, &sk->sk_v6_daddr, sizeof(req->src_ip));
+		memcpy(req->dst_ip, &np->saddr, sizeof(req->dst_ip));
+	}
+
+#endif
+	return &req->back;
+}
+
+static void
+nfp_net_tls_set_l4(struct nfp_crypto_req_add_front *front,
+		   struct nfp_crypto_req_add_back *back, struct sock *sk,
+		   int direction)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	front->l4_proto = IPPROTO_TCP;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		back->src_port = 0;
+		back->dst_port = 0;
+	} else {
+		back->src_port = inet->inet_dport;
+		back->dst_port = inet->inet_sport;
+	}
+}
+
+static u8 nfp_tls_1_2_dir_to_opcode(enum tls_offload_ctx_dir direction)
+{
+	switch (direction) {
+	case TLS_OFFLOAD_CTX_DIR_TX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+	case TLS_OFFLOAD_CTX_DIR_RX:
+		return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static bool
+nfp_net_cipher_supported(struct nfp_net *nn, u16 cipher_type,
+			 enum tls_offload_ctx_dir direction)
+{
+	u8 bit;
+
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+			bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC;
+		else
+			bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC;
+		break;
+	default:
+		return false;
+	}
+
+	return nn->tlv_caps.crypto_ops & BIT(bit);
+}
+
+static int
+nfp_net_tls_add(struct net_device *netdev, struct sock *sk,
+		enum tls_offload_ctx_dir direction,
+		struct tls_crypto_info *crypto_info,
+		u32 start_offload_tcp_sn)
+{
+	struct tls12_crypto_info_aes_gcm_128 *tls_ci;
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_crypto_req_add_front *front;
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct nfp_crypto_req_add_back *back;
+	struct nfp_crypto_reply_add *reply;
+	struct sk_buff *skb;
+	size_t req_sz;
+	void *req;
+	bool ipv6;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct nfp_net_tls_offload_ctx) >
+		     TLS_DRIVER_STATE_SIZE_TX);
+	BUILD_BUG_ON(offsetof(struct nfp_net_tls_offload_ctx, rx_end) >
+		     TLS_DRIVER_STATE_SIZE_RX);
+
+	if (!nfp_net_cipher_supported(nn, crypto_info->cipher_type, direction))
+		return -EOPNOTSUPP;
+
+	switch (sk->sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		if (sk->sk_ipv6only ||
+		    ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
+			req_sz = sizeof(struct nfp_crypto_req_add_v6);
+			ipv6 = true;
+			break;
+		}
+#endif
+		/* fall through */
+	case AF_INET:
+		req_sz = sizeof(struct nfp_crypto_req_add_v4);
+		ipv6 = false;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nfp_net_tls_conn_add(nn, direction);
+	if (err)
+		return err;
+
+	skb = nfp_ccm_mbox_msg_alloc(nn, req_sz, sizeof(*reply), GFP_KERNEL);
+	if (!skb) {
+		err = -ENOMEM;
+		goto err_conn_remove;
+	}
+
+	front = (void *)skb->data;
+	front->ep_id = 0;
+	front->key_len = NFP_NET_TLS_NON_ADDR_KEY_LEN;
+	front->opcode = nfp_tls_1_2_dir_to_opcode(direction);
+	memset(front->resv, 0, sizeof(front->resv));
+
+	nfp_net_tls_set_ipver_vlan(front, ipv6 ? 6 : 4);
+
+	req = (void *)skb->data;
+	if (ipv6)
+		back = nfp_net_tls_set_ipv6(nn, req, sk, direction);
+	else
+		back = nfp_net_tls_set_ipv4(nn, req, sk, direction);
+
+	nfp_net_tls_set_l4(front, back, sk, direction);
+
+	back->counter = 0;
+	back->tcp_seq = cpu_to_be32(start_offload_tcp_sn);
+
+	tls_ci = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+	memcpy(back->key, tls_ci->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memset(&back->key[TLS_CIPHER_AES_GCM_128_KEY_SIZE / 4], 0,
+	       sizeof(back->key) - TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	memcpy(back->iv, tls_ci->iv, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	memcpy(&back->salt, tls_ci->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(back->rec_no, tls_ci->rec_seq, sizeof(tls_ci->rec_seq));
+
+	/* Get an extra ref on the skb so we can wipe the key after */
+	skb_get(skb);
+
+	err = nfp_ccm_mbox_communicate(nn, skb, NFP_CCM_TYPE_CRYPTO_ADD,
+				       sizeof(*reply), sizeof(*reply));
+	reply = (void *)skb->data;
+
+	/* We depend on CCM MBOX code not reallocating skb we sent
+	 * so we can clear the key material out of the memory.
+	 */
+	if (!WARN_ON_ONCE((u8 *)back < skb->head ||
+			  (u8 *)back > skb_end_pointer(skb)) &&
+	    !WARN_ON_ONCE((u8 *)&reply[1] > (u8 *)back))
+		memzero_explicit(back, sizeof(*back));
+	dev_consume_skb_any(skb); /* the extra ref from skb_get() above */
+
+	if (err) {
+		nn_dp_warn(&nn->dp, "failed to add TLS: %d (%d)\n",
+			   err, direction == TLS_OFFLOAD_CTX_DIR_TX);
+		/* communicate frees skb on error */
+		goto err_conn_remove;
+	}
+
+	err = -be32_to_cpu(reply->error);
+	if (err) {
+		if (err == -ENOSPC) {
+			if (!atomic_fetch_inc(&nn->ktls_no_space))
+				nn_info(nn, "HW TLS table full\n");
+		} else {
+			nn_dp_warn(&nn->dp,
+				   "failed to add TLS, FW replied: %d\n", err);
+		}
+		goto err_free_skb;
+	}
+
+	if (!reply->handle[0] && !reply->handle[1]) {
+		nn_dp_warn(&nn->dp, "FW returned NULL handle\n");
+		err = -EINVAL;
+		goto err_fw_remove;
+	}
+
+	ntls = tls_driver_ctx(sk, direction);
+	memcpy(ntls->fw_handle, reply->handle, sizeof(ntls->fw_handle));
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+		ntls->next_seq = start_offload_tcp_sn;
+	dev_consume_skb_any(skb);
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+		return 0;
+
+	tls_offload_rx_resync_set_type(sk,
+				       TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+	return 0;
+
+err_fw_remove:
+	nfp_net_tls_del_fw(nn, reply->handle);
+err_free_skb:
+	dev_consume_skb_any(skb);
+err_conn_remove:
+	nfp_net_tls_conn_remove(nn, direction);
+	return err;
+}
+
+static void
+nfp_net_tls_del(struct net_device *netdev, struct tls_context *tls_ctx,
+		enum tls_offload_ctx_dir direction)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+
+	nfp_net_tls_conn_remove(nn, direction);
+
+	ntls = __tls_driver_ctx(tls_ctx, direction);
+	nfp_net_tls_del_fw(nn, ntls->fw_handle);
+}
+
+static int
+nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
+		   u8 *rcd_sn, enum tls_offload_ctx_dir direction)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct nfp_crypto_req_update *req;
+	struct sk_buff *skb;
+	gfp_t flags;
+	int err;
+
+	flags = direction == TLS_OFFLOAD_CTX_DIR_TX ? GFP_KERNEL : GFP_ATOMIC;
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), flags);
+	if (!skb)
+		return -ENOMEM;
+
+	ntls = tls_driver_ctx(sk, direction);
+	req = (void *)skb->data;
+	req->ep_id = 0;
+	req->opcode = nfp_tls_1_2_dir_to_opcode(direction);
+	memset(req->resv, 0, sizeof(req->resv));
+	memcpy(req->handle, ntls->fw_handle, sizeof(ntls->fw_handle));
+	req->tcp_seq = cpu_to_be32(seq);
+	memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no));
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		err = nfp_net_tls_communicate_simple(nn, skb, "sync",
+						     NFP_CCM_TYPE_CRYPTO_UPDATE);
+		if (err)
+			return err;
+		ntls->next_seq = seq;
+	} else {
+		nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE,
+				  sizeof(struct nfp_crypto_reply_simple));
+	}
+
+	return 0;
+}
+
+static const struct tlsdev_ops nfp_net_tls_ops = {
+	.tls_dev_add = nfp_net_tls_add,
+	.tls_dev_del = nfp_net_tls_del,
+	.tls_dev_resync = nfp_net_tls_resync,
+};
+
+static int nfp_net_tls_reset(struct nfp_net *nn)
+{
+	struct nfp_crypto_req_reset *req;
+	struct sk_buff *skb;
+
+	skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (void *)skb->data;
+	req->ep_id = 0;
+
+	return nfp_net_tls_communicate_simple(nn, skb, "reset",
+					      NFP_CCM_TYPE_CRYPTO_RESET);
+}
+
+int nfp_net_tls_init(struct nfp_net *nn)
+{
+	struct net_device *netdev = nn->dp.netdev;
+	int err;
+
+	if (!(nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK))
+		return 0;
+
+	if ((nn->tlv_caps.mbox_cmsg_types & NFP_NET_TLS_CCM_MBOX_OPS_MASK) !=
+	    NFP_NET_TLS_CCM_MBOX_OPS_MASK)
+		return 0;
+
+	if (!nfp_ccm_mbox_fits(nn, sizeof(struct nfp_crypto_req_add_v6))) {
+		nn_warn(nn, "disabling TLS offload - mbox too small: %d\n",
+			nn->tlv_caps.mbox_len);
+		return 0;
+	}
+
+	err = nfp_net_tls_reset(nn);
+	if (err)
+		return err;
+
+	nn_ctrl_bar_lock(nn);
+	nn_writel(nn, nn->tlv_caps.crypto_enable_off, 0);
+	err = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO);
+	nn_ctrl_bar_unlock(nn);
+	if (err)
+		return err;
+
+	if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_RX) {
+		netdev->hw_features |= NETIF_F_HW_TLS_RX;
+		netdev->features |= NETIF_F_HW_TLS_RX;
+	}
+	if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_TX) {
+		netdev->hw_features |= NETIF_F_HW_TLS_TX;
+		netdev->features |= NETIF_F_HW_TLS_TX;
+	}
+
+	netdev->tlsdev_ops = &nfp_net_tls_ops;
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/netronome/nfp/devlink_param.c b/drivers/net/ethernet/netronome/nfp/devlink_param.c
new file mode 100644
index 0000000..3649183
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/devlink_param.c

@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <net/devlink.h>
+
+#include "nfpcore/nfp.h"
+#include "nfpcore/nfp_nsp.h"
+#include "nfp_main.h"
+
+/**
+ * struct nfp_devlink_param_u8_arg - Devlink u8 parameter get/set arguments
+ * @hwinfo_name:	HWinfo key name
+ * @default_hi_val:	Default HWinfo value if HWinfo doesn't exist
+ * @invalid_dl_val:	Devlink value to use if HWinfo is unknown/invalid.
+ *			-errno if there is no unknown/invalid value available
+ * @hi_to_dl:	HWinfo to devlink value mapping
+ * @dl_to_hi:	Devlink to hwinfo value mapping
+ * @max_dl_val:	Maximum devlink value supported, for validation only
+ * @max_hi_val:	Maximum HWinfo value supported, for validation only
+ */
+struct nfp_devlink_param_u8_arg {
+	const char *hwinfo_name;
+	const char *default_hi_val;
+	int invalid_dl_val;
+	u8 hi_to_dl[4];
+	u8 dl_to_hi[4];
+	u8 max_dl_val;
+	u8 max_hi_val;
+};
+
+static const struct nfp_devlink_param_u8_arg nfp_devlink_u8_args[] = {
+	[DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY] = {
+		.hwinfo_name = "app_fw_from_flash",
+		.default_hi_val = NFP_NSP_APP_FW_LOAD_DEFAULT,
+		.invalid_dl_val =
+			DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN,
+		.hi_to_dl = {
+			[NFP_NSP_APP_FW_LOAD_DISK] =
+				DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+			[NFP_NSP_APP_FW_LOAD_FLASH] =
+				DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+			[NFP_NSP_APP_FW_LOAD_PREF] =
+				DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
+		},
+		.dl_to_hi = {
+			[DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER] =
+				NFP_NSP_APP_FW_LOAD_PREF,
+			[DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH] =
+				NFP_NSP_APP_FW_LOAD_FLASH,
+			[DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK] =
+				NFP_NSP_APP_FW_LOAD_DISK,
+		},
+		.max_dl_val = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+		.max_hi_val = NFP_NSP_APP_FW_LOAD_PREF,
+	},
+	[DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE] = {
+		.hwinfo_name = "abi_drv_reset",
+		.default_hi_val = NFP_NSP_DRV_RESET_DEFAULT,
+		.invalid_dl_val =
+			DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN,
+		.hi_to_dl = {
+			[NFP_NSP_DRV_RESET_ALWAYS] =
+				DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS,
+			[NFP_NSP_DRV_RESET_NEVER] =
+				DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER,
+			[NFP_NSP_DRV_RESET_DISK] =
+				DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK,
+		},
+		.dl_to_hi = {
+			[DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS] =
+				NFP_NSP_DRV_RESET_ALWAYS,
+			[DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER] =
+				NFP_NSP_DRV_RESET_NEVER,
+			[DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK] =
+				NFP_NSP_DRV_RESET_DISK,
+		},
+		.max_dl_val = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK,
+		.max_hi_val = NFP_NSP_DRV_RESET_NEVER,
+	}
+};
+
+static int
+nfp_devlink_param_u8_get(struct devlink *devlink, u32 id,
+			 struct devlink_param_gset_ctx *ctx)
+{
+	const struct nfp_devlink_param_u8_arg *arg;
+	struct nfp_pf *pf = devlink_priv(devlink);
+	struct nfp_nsp *nsp;
+	char hwinfo[32];
+	long value;
+	int err;
+
+	if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+		return -EOPNOTSUPP;
+
+	arg = &nfp_devlink_u8_args[id];
+
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		nfp_warn(pf->cpp, "can't access NSP: %d\n", err);
+		return err;
+	}
+
+	snprintf(hwinfo, sizeof(hwinfo), arg->hwinfo_name);
+	err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+					     arg->default_hi_val);
+	if (err) {
+		nfp_warn(pf->cpp, "HWinfo lookup failed: %d\n", err);
+		goto exit_close_nsp;
+	}
+
+	err = kstrtol(hwinfo, 0, &value);
+	if (err || value < 0 || value > arg->max_hi_val) {
+		nfp_warn(pf->cpp, "HWinfo '%s' value %li invalid\n",
+			 arg->hwinfo_name, value);
+
+		if (arg->invalid_dl_val >= 0)
+			ctx->val.vu8 = arg->invalid_dl_val;
+		else
+			err = arg->invalid_dl_val;
+
+		goto exit_close_nsp;
+	}
+
+	ctx->val.vu8 = arg->hi_to_dl[value];
+
+exit_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
+static int
+nfp_devlink_param_u8_set(struct devlink *devlink, u32 id,
+			 struct devlink_param_gset_ctx *ctx)
+{
+	const struct nfp_devlink_param_u8_arg *arg;
+	struct nfp_pf *pf = devlink_priv(devlink);
+	struct nfp_nsp *nsp;
+	char hwinfo[32];
+	int err;
+
+	if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+		return -EOPNOTSUPP;
+
+	arg = &nfp_devlink_u8_args[id];
+
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		nfp_warn(pf->cpp, "can't access NSP: %d\n", err);
+		return err;
+	}
+
+	/* Note the value has already been validated. */
+	snprintf(hwinfo, sizeof(hwinfo), "%s=%u",
+		 arg->hwinfo_name, arg->dl_to_hi[ctx->val.vu8]);
+	err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo));
+	if (err) {
+		nfp_warn(pf->cpp, "HWinfo set failed: %d\n", err);
+		goto exit_close_nsp;
+	}
+
+exit_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
+static int
+nfp_devlink_param_u8_validate(struct devlink *devlink, u32 id,
+			      union devlink_param_value val,
+			      struct netlink_ext_ack *extack)
+{
+	const struct nfp_devlink_param_u8_arg *arg;
+
+	if (id >= ARRAY_SIZE(nfp_devlink_u8_args))
+		return -EOPNOTSUPP;
+
+	arg = &nfp_devlink_u8_args[id];
+
+	if (val.vu8 > arg->max_dl_val) {
+		NL_SET_ERR_MSG_MOD(extack, "parameter out of range");
+		return -EINVAL;
+	}
+
+	if (val.vu8 == arg->invalid_dl_val) {
+		NL_SET_ERR_MSG_MOD(extack, "unknown/invalid value specified");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct devlink_param nfp_devlink_params[] = {
+	DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY,
+			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      nfp_devlink_param_u8_get,
+			      nfp_devlink_param_u8_set,
+			      nfp_devlink_param_u8_validate),
+	DEVLINK_PARAM_GENERIC(RESET_DEV_ON_DRV_PROBE,
+			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      nfp_devlink_param_u8_get,
+			      nfp_devlink_param_u8_set,
+			      nfp_devlink_param_u8_validate),
+};
+
+static int nfp_devlink_supports_params(struct nfp_pf *pf)
+{
+	struct nfp_nsp *nsp;
+	bool supported;
+	int err;
+
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		dev_err(&pf->pdev->dev, "Failed to access the NSP: %d\n", err);
+		return err;
+	}
+
+	supported = nfp_nsp_has_hwinfo_lookup(nsp) &&
+		    nfp_nsp_has_hwinfo_set(nsp);
+
+	nfp_nsp_close(nsp);
+	return supported;
+}
+
+int nfp_devlink_params_register(struct nfp_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	int err;
+
+	err = nfp_devlink_supports_params(pf);
+	if (err <= 0)
+		return err;
+
+	err = devlink_params_register(devlink, nfp_devlink_params,
+				      ARRAY_SIZE(nfp_devlink_params));
+	if (err)
+		return err;
+
+	devlink_params_publish(devlink);
+	return 0;
+}
+
+void nfp_devlink_params_unregister(struct nfp_pf *pf)
+{
+	int err;
+
+	err = nfp_devlink_supports_params(pf);
+	if (err <= 0)
+		return;
+
+	devlink_params_unregister(priv_to_devlink(pf), nfp_devlink_params,
+				  ARRAY_SIZE(nfp_devlink_params));
+}

diff --git a/drivers/net/ethernet/netronome/nfp/flower/Makefile b/drivers/net/ethernet/netronome/nfp/flower/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/netronome/nfp/flower/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 7a1e9cd..1b019fd 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c

@@ -1,43 +1,13 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
-#include <net/geneve.h>
+#include <linux/mpls.h>
 #include <net/pkt_cls.h>
-#include <net/switchdev.h>
 #include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_mpls.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
@@ -57,6 +27,80 @@
 						 NFP_FL_TUNNEL_KEY | \
 						 NFP_FL_TUNNEL_GENEVE_OPT)
 
+static int
+nfp_fl_push_mpls(struct nfp_fl_push_mpls *push_mpls,
+		 const struct flow_action_entry *act,
+		 struct netlink_ext_ack *extack)
+{
+	size_t act_size = sizeof(struct nfp_fl_push_mpls);
+	u32 mpls_lse = 0;
+
+	push_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_MPLS;
+	push_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+	/* BOS is optional in the TC action but required for offload. */
+	if (act->mpls_push.bos != ACT_MPLS_BOS_NOT_SET) {
+		mpls_lse |= act->mpls_push.bos << MPLS_LS_S_SHIFT;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: BOS field must explicitly be set for MPLS push");
+		return -EOPNOTSUPP;
+	}
+
+	/* Leave MPLS TC as a default value of 0 if not explicitly set. */
+	if (act->mpls_push.tc != ACT_MPLS_TC_NOT_SET)
+		mpls_lse |= act->mpls_push.tc << MPLS_LS_TC_SHIFT;
+
+	/* Proto, label and TTL are enforced and verified for MPLS push. */
+	mpls_lse |= act->mpls_push.label << MPLS_LS_LABEL_SHIFT;
+	mpls_lse |= act->mpls_push.ttl << MPLS_LS_TTL_SHIFT;
+	push_mpls->ethtype = act->mpls_push.proto;
+	push_mpls->lse = cpu_to_be32(mpls_lse);
+
+	return 0;
+}
+
+static void
+nfp_fl_pop_mpls(struct nfp_fl_pop_mpls *pop_mpls,
+		const struct flow_action_entry *act)
+{
+	size_t act_size = sizeof(struct nfp_fl_pop_mpls);
+
+	pop_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_POP_MPLS;
+	pop_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+	pop_mpls->ethtype = act->mpls_pop.proto;
+}
+
+static void
+nfp_fl_set_mpls(struct nfp_fl_set_mpls *set_mpls,
+		const struct flow_action_entry *act)
+{
+	size_t act_size = sizeof(struct nfp_fl_set_mpls);
+	u32 mpls_lse = 0, mpls_mask = 0;
+
+	set_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_SET_MPLS;
+	set_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+	if (act->mpls_mangle.label != ACT_MPLS_LABEL_NOT_SET) {
+		mpls_lse |= act->mpls_mangle.label << MPLS_LS_LABEL_SHIFT;
+		mpls_mask |= MPLS_LS_LABEL_MASK;
+	}
+	if (act->mpls_mangle.tc != ACT_MPLS_TC_NOT_SET) {
+		mpls_lse |= act->mpls_mangle.tc << MPLS_LS_TC_SHIFT;
+		mpls_mask |= MPLS_LS_TC_MASK;
+	}
+	if (act->mpls_mangle.bos != ACT_MPLS_BOS_NOT_SET) {
+		mpls_lse |= act->mpls_mangle.bos << MPLS_LS_S_SHIFT;
+		mpls_mask |= MPLS_LS_S_MASK;
+	}
+	if (act->mpls_mangle.ttl) {
+		mpls_lse |= act->mpls_mangle.ttl << MPLS_LS_TTL_SHIFT;
+		mpls_mask |= MPLS_LS_TTL_MASK;
+	}
+
+	set_mpls->lse = cpu_to_be32(mpls_lse);
+	set_mpls->lse_mask = cpu_to_be32(mpls_mask);
+}
+
 static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
 {
 	size_t act_size = sizeof(struct nfp_fl_pop_vlan);
@@ -68,7 +112,7 @@
 
 static void
 nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
-		 const struct tc_action *action)
+		 const struct flow_action_entry *act)
 {
 	size_t act_size = sizeof(struct nfp_fl_push_vlan);
 	u16 tmp_push_vlan_tci;
@@ -76,30 +120,32 @@
 	push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN;
 	push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 	push_vlan->reserved = 0;
-	push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
+	push_vlan->vlan_tpid = act->vlan.proto;
 
 	tmp_push_vlan_tci =
-		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) |
-		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) |
-		NFP_FL_PUSH_VLAN_CFI;
+		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) |
+		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid);
 	push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
 }
 
 static int
-nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action,
-	       struct nfp_fl_payload *nfp_flow, int act_len)
+nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act,
+	       struct nfp_fl_payload *nfp_flow, int act_len,
+	       struct netlink_ext_ack *extack)
 {
 	size_t act_size = sizeof(struct nfp_fl_pre_lag);
 	struct nfp_fl_pre_lag *pre_lag;
 	struct net_device *out_dev;
 	int err;
 
-	out_dev = tcf_mirred_dev(action);
+	out_dev = act->dev;
 	if (!out_dev || !netif_is_lag_master(out_dev))
 		return 0;
 
-	if (act_len + act_size > NFP_FL_MAX_A_SIZ)
+	if (act_len + act_size > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at LAG action");
 		return -EOPNOTSUPP;
+	}
 
 	/* Pre_lag action must be first on action list.
 	 * If other actions already exist they need pushed forward.
@@ -109,7 +155,7 @@
 			nfp_flow->action_data, act_len);
 
 	pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data;
-	err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag);
+	err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag, extack);
 	if (err)
 		return err;
 
@@ -121,26 +167,13 @@
 	return act_size;
 }
 
-static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
-					 enum nfp_flower_tun_type tun_type)
-{
-	if (!out_dev->rtnl_link_ops)
-		return false;
-
-	if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan"))
-		return tun_type == NFP_FL_TUNNEL_VXLAN;
-
-	if (!strcmp(out_dev->rtnl_link_ops->kind, "geneve"))
-		return tun_type == NFP_FL_TUNNEL_GENEVE;
-
-	return false;
-}
-
 static int
 nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
-	      const struct tc_action *action, struct nfp_fl_payload *nfp_flow,
+	      const struct flow_action_entry *act,
+	      struct nfp_fl_payload *nfp_flow,
 	      bool last, struct net_device *in_dev,
-	      enum nfp_flower_tun_type tun_type, int *tun_out_cnt)
+	      enum nfp_flower_tun_type tun_type, int *tun_out_cnt,
+	      bool pkt_host, struct netlink_ext_ack *extack)
 {
 	size_t act_size = sizeof(struct nfp_fl_output);
 	struct nfp_flower_priv *priv = app->priv;
@@ -150,19 +183,25 @@
 	output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT;
 	output->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
-	out_dev = tcf_mirred_dev(action);
-	if (!out_dev)
+	out_dev = act->dev;
+	if (!out_dev) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid egress interface for mirred action");
 		return -EOPNOTSUPP;
+	}
 
 	tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
 
 	if (tun_type) {
 		/* Verify the egress netdev matches the tunnel type. */
-		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
+		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface does not match the required tunnel type");
 			return -EOPNOTSUPP;
+		}
 
-		if (*tun_out_cnt)
+		if (*tun_out_cnt) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot offload more than one tunnel mirred output per filter");
 			return -EOPNOTSUPP;
+		}
 		(*tun_out_cnt)++;
 
 		output->flags = cpu_to_be16(tmp_flags |
@@ -174,41 +213,87 @@
 
 		output->flags = cpu_to_be16(tmp_flags);
 		gid = nfp_flower_lag_get_output_id(app, out_dev);
-		if (gid < 0)
+		if (gid < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot find group id for LAG action");
 			return gid;
+		}
 		output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid);
+	} else if (nfp_flower_internal_port_can_offload(app, out_dev)) {
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules not supported in loaded firmware");
+			return -EOPNOTSUPP;
+		}
+
+		if (nfp_flow->pre_tun_rule.dev || !pkt_host) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules require single egress dev and ptype HOST action");
+			return -EOPNOTSUPP;
+		}
+
+		nfp_flow->pre_tun_rule.dev = out_dev;
+
+		return 0;
 	} else {
 		/* Set action output parameters. */
 		output->flags = cpu_to_be16(tmp_flags);
 
-		/* Only offload if egress ports are on the same device as the
-		 * ingress port.
-		 */
-		if (!switchdev_port_same_parent_id(in_dev, out_dev))
+		if (nfp_netdev_is_nfp_repr(in_dev)) {
+			/* Confirm ingress and egress are on same device. */
+			if (!netdev_port_same_parent_id(in_dev, out_dev)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress and egress interfaces are on different devices");
+				return -EOPNOTSUPP;
+			}
+		}
+
+		if (!nfp_netdev_is_nfp_repr(out_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface is not an nfp port");
 			return -EOPNOTSUPP;
-		if (!nfp_netdev_is_nfp_repr(out_dev))
-			return -EOPNOTSUPP;
+		}
 
 		output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
-		if (!output->port)
+		if (!output->port) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid port id for egress interface");
 			return -EOPNOTSUPP;
+		}
 	}
 	nfp_flow->meta.shortcut = output->port;
 
 	return 0;
 }
 
-static enum nfp_flower_tun_type
-nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app,
-				const struct tc_action *action)
+static bool
+nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
 {
-	struct ip_tunnel_info *tun = tcf_tunnel_info(action);
+	struct flow_action_entry *act = flow->rule->action.entries;
+	int num_act = flow->rule->action.num_entries;
+	int act_idx;
+
+	/* Preparse action list for next mirred or redirect action */
+	for (act_idx = start_idx + 1; act_idx < num_act; act_idx++)
+		if (act[act_idx].id == FLOW_ACTION_REDIRECT ||
+		    act[act_idx].id == FLOW_ACTION_MIRRED)
+			return netif_is_gretap(act[act_idx].dev);
+
+	return false;
+}
+
+static enum nfp_flower_tun_type
+nfp_fl_get_tun_from_act(struct nfp_app *app,
+			struct flow_cls_offload *flow,
+			const struct flow_action_entry *act, int act_idx)
+{
+	const struct ip_tunnel_info *tun = act->tunnel;
 	struct nfp_flower_priv *priv = app->priv;
 
+	/* Determine the tunnel type based on the egress netdev
+	 * in the mirred action for tunnels without l4.
+	 */
+	if (nfp_flower_tun_is_gre(flow, act_idx))
+		return NFP_FL_TUNNEL_GRE;
+
 	switch (tun->key.tp_dst) {
-	case htons(NFP_FL_VXLAN_PORT):
+	case htons(IANA_VXLAN_UDP_PORT):
 		return NFP_FL_TUNNEL_VXLAN;
-	case htons(NFP_FL_GENEVE_PORT):
+	case htons(GENEVE_UDP_PORT):
 		if (priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)
 			return NFP_FL_TUNNEL_GENEVE;
 		/* FALLTHROUGH */
@@ -240,9 +325,10 @@
 
 static int
 nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
-			   const struct tc_action *action)
+			   const struct flow_action_entry *act,
+			   struct netlink_ext_ack *extack)
 {
-	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+	struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel;
 	int opt_len, opt_cnt, act_start, tot_push_len;
 	u8 *src = ip_tunnel_info_opts(ip_tun);
 
@@ -258,20 +344,26 @@
 		struct geneve_opt *opt = (struct geneve_opt *)src;
 
 		opt_cnt++;
-		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT)
+		if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed number of geneve options exceeded");
 			return -EOPNOTSUPP;
+		}
 
 		tot_push_len += sizeof(struct nfp_fl_push_geneve) +
 			       opt->length * 4;
-		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT)
+		if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options");
 			return -EOPNOTSUPP;
+		}
 
 		opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
 		src += sizeof(struct geneve_opt) + opt->length * 4;
 	}
 
-	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ)
+	if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options");
 		return -EOPNOTSUPP;
+	}
 
 	act_start = *list_len;
 	*list_len += tot_push_len;
@@ -302,15 +394,14 @@
 }
 
 static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
-			struct nfp_fl_set_ipv4_udp_tun *set_tun,
-			const struct tc_action *action,
-			struct nfp_fl_pre_tunnel *pre_tun,
-			enum nfp_flower_tun_type tun_type,
-			struct net_device *netdev)
+nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
+		    const struct flow_action_entry *act,
+		    struct nfp_fl_pre_tunnel *pre_tun,
+		    enum nfp_flower_tun_type tun_type,
+		    struct net_device *netdev, struct netlink_ext_ack *extack)
 {
-	size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
-	struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+	size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun);
+	const struct ip_tunnel_info *ip_tun = act->tunnel;
 	struct nfp_flower_priv *priv = app->priv;
 	u32 tmp_set_ip_tun_type_index = 0;
 	/* Currently support one pre-tunnel so index is always 0. */
@@ -321,8 +412,10 @@
 		     NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
 	if (ip_tun->options_len &&
 	    (tun_type != NFP_FL_TUNNEL_GENEVE ||
-	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))
+	    !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve options offload");
 		return -EOPNOTSUPP;
+	}
 
 	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
 	set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
@@ -362,8 +455,10 @@
 	set_tun->tos = ip_tun->key.tos;
 
 	if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
-	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
+	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload");
 		return -EOPNOTSUPP;
+	}
 	set_tun->tun_flags = ip_tun->key.tun_flags;
 
 	if (tun_type == NFP_FL_TUNNEL_GENEVE) {
@@ -390,19 +485,23 @@
 }
 
 static int
-nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off,
-	       struct nfp_fl_set_eth *set_eth)
+nfp_fl_set_eth(const struct flow_action_entry *act, u32 off,
+	       struct nfp_fl_set_eth *set_eth, struct netlink_ext_ack *extack)
 {
 	u32 exact, mask;
 
-	if (off + 4 > ETH_ALEN * 2)
+	if (off + 4 > ETH_ALEN * 2) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action");
 		return -EOPNOTSUPP;
+	}
 
-	mask = ~tcf_pedit_mask(action, idx);
-	exact = tcf_pedit_val(action, idx);
+	mask = ~act->mangle.mask;
+	exact = act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action");
 		return -EOPNOTSUPP;
+	}
 
 	nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off],
 			    &set_eth->eth_addr_mask[off]);
@@ -414,38 +513,90 @@
 	return 0;
 }
 
+struct ipv4_ttl_word {
+	__u8	ttl;
+	__u8	protocol;
+	__sum16	check;
+};
+
 static int
-nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
-	       struct nfp_fl_set_ip4_addrs *set_ip_addr)
+nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
+	       struct nfp_fl_set_ip4_addrs *set_ip_addr,
+	       struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos,
+	       struct netlink_ext_ack *extack)
 {
+	struct ipv4_ttl_word *ttl_word_mask;
+	struct ipv4_ttl_word *ttl_word;
+	struct iphdr *tos_word_mask;
+	struct iphdr *tos_word;
 	__be32 exact, mask;
 
 	/* We are expecting tcf_pedit to return a big endian value */
-	mask = (__force __be32)~tcf_pedit_mask(action, idx);
-	exact = (__force __be32)tcf_pedit_val(action, idx);
+	mask = (__force __be32)~act->mangle.mask;
+	exact = (__force __be32)act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 action");
 		return -EOPNOTSUPP;
+	}
 
 	switch (off) {
 	case offsetof(struct iphdr, daddr):
 		set_ip_addr->ipv4_dst_mask |= mask;
 		set_ip_addr->ipv4_dst &= ~mask;
 		set_ip_addr->ipv4_dst |= exact & mask;
+		set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS;
+		set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >>
+					   NFP_FL_LW_SIZ;
 		break;
 	case offsetof(struct iphdr, saddr):
 		set_ip_addr->ipv4_src_mask |= mask;
 		set_ip_addr->ipv4_src &= ~mask;
 		set_ip_addr->ipv4_src |= exact & mask;
+		set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS;
+		set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >>
+					   NFP_FL_LW_SIZ;
+		break;
+	case offsetof(struct iphdr, ttl):
+		ttl_word_mask = (struct ipv4_ttl_word *)&mask;
+		ttl_word = (struct ipv4_ttl_word *)&exact;
+
+		if (ttl_word_mask->protocol || ttl_word_mask->check) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 ttl action");
+			return -EOPNOTSUPP;
+		}
+
+		set_ip_ttl_tos->ipv4_ttl_mask |= ttl_word_mask->ttl;
+		set_ip_ttl_tos->ipv4_ttl &= ~ttl_word_mask->ttl;
+		set_ip_ttl_tos->ipv4_ttl |= ttl_word->ttl & ttl_word_mask->ttl;
+		set_ip_ttl_tos->head.jump_id =
+			NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS;
+		set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >>
+					      NFP_FL_LW_SIZ;
+		break;
+	case round_down(offsetof(struct iphdr, tos), 4):
+		tos_word_mask = (struct iphdr *)&mask;
+		tos_word = (struct iphdr *)&exact;
+
+		if (tos_word_mask->version || tos_word_mask->ihl ||
+		    tos_word_mask->tot_len) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 tos action");
+			return -EOPNOTSUPP;
+		}
+
+		set_ip_ttl_tos->ipv4_tos_mask |= tos_word_mask->tos;
+		set_ip_ttl_tos->ipv4_tos &= ~tos_word_mask->tos;
+		set_ip_ttl_tos->ipv4_tos |= tos_word->tos & tos_word_mask->tos;
+		set_ip_ttl_tos->head.jump_id =
+			NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS;
+		set_ip_ttl_tos->head.len_lw = sizeof(*set_ip_ttl_tos) >>
+					      NFP_FL_LW_SIZ;
 		break;
 	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv4 header");
 		return -EOPNOTSUPP;
 	}
 
-	set_ip_addr->reserved = cpu_to_be16(0);
-	set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS;
-	set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ;
-
 	return 0;
 }
 
@@ -462,23 +613,77 @@
 	ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ;
 }
 
+struct ipv6_hop_limit_word {
+	__be16 payload_len;
+	u8 nexthdr;
+	u8 hop_limit;
+};
+
 static int
-nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
+nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask,
+				    struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl,
+				    struct netlink_ext_ack *extack)
+{
+	struct ipv6_hop_limit_word *fl_hl_mask;
+	struct ipv6_hop_limit_word *fl_hl;
+
+	switch (off) {
+	case offsetof(struct ipv6hdr, payload_len):
+		fl_hl_mask = (struct ipv6_hop_limit_word *)&mask;
+		fl_hl = (struct ipv6_hop_limit_word *)&exact;
+
+		if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 hop limit action");
+			return -EOPNOTSUPP;
+		}
+
+		ip_hl_fl->ipv6_hop_limit_mask |= fl_hl_mask->hop_limit;
+		ip_hl_fl->ipv6_hop_limit &= ~fl_hl_mask->hop_limit;
+		ip_hl_fl->ipv6_hop_limit |= fl_hl->hop_limit &
+					    fl_hl_mask->hop_limit;
+		break;
+	case round_down(offsetof(struct ipv6hdr, flow_lbl), 4):
+		if (mask & ~IPV6_FLOW_LABEL_MASK ||
+		    exact & ~IPV6_FLOW_LABEL_MASK) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow label action");
+			return -EOPNOTSUPP;
+		}
+
+		ip_hl_fl->ipv6_label_mask |= mask;
+		ip_hl_fl->ipv6_label &= ~mask;
+		ip_hl_fl->ipv6_label |= exact & mask;
+		break;
+	}
+
+	ip_hl_fl->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL;
+	ip_hl_fl->head.len_lw = sizeof(*ip_hl_fl) >> NFP_FL_LW_SIZ;
+
+	return 0;
+}
+
+static int
+nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
 	       struct nfp_fl_set_ipv6_addr *ip_dst,
-	       struct nfp_fl_set_ipv6_addr *ip_src)
+	       struct nfp_fl_set_ipv6_addr *ip_src,
+	       struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl,
+	       struct netlink_ext_ack *extack)
 {
 	__be32 exact, mask;
+	int err = 0;
 	u8 word;
 
 	/* We are expecting tcf_pedit to return a big endian value */
-	mask = (__force __be32)~tcf_pedit_mask(action, idx);
-	exact = (__force __be32)tcf_pedit_val(action, idx);
+	mask = (__force __be32)~act->mangle.mask;
+	exact = (__force __be32)act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 action");
 		return -EOPNOTSUPP;
+	}
 
 	if (off < offsetof(struct ipv6hdr, saddr)) {
-		return -EOPNOTSUPP;
+		err = nfp_fl_set_ip6_hop_limit_flow_label(off, exact, mask,
+							  ip_hl_fl, extack);
 	} else if (off < offsetof(struct ipv6hdr, daddr)) {
 		word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact);
 		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word,
@@ -489,26 +694,32 @@
 		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word,
 				      exact, mask, ip_dst);
 	} else {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv6 header");
 		return -EOPNOTSUPP;
 	}
 
-	return 0;
+	return err;
 }
 
 static int
-nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off,
-		 struct nfp_fl_set_tport *set_tport, int opcode)
+nfp_fl_set_tport(const struct flow_action_entry *act, u32 off,
+		 struct nfp_fl_set_tport *set_tport, int opcode,
+		 struct netlink_ext_ack *extack)
 {
 	u32 exact, mask;
 
-	if (off)
+	if (off) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of L4 header");
 		return -EOPNOTSUPP;
+	}
 
-	mask = ~tcf_pedit_mask(action, idx);
-	exact = tcf_pedit_val(action, idx);
+	mask = ~act->mangle.mask;
+	exact = act->mangle.val;
 
-	if (exact & ~mask)
+	if (exact & ~mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit L4 action");
 		return -EOPNOTSUPP;
+	}
 
 	nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val,
 			    set_tport->tp_port_mask);
@@ -538,122 +749,107 @@
 	}
 }
 
-static int
-nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
-	     char *nfp_action, int *a_len, u32 *csum_updated)
-{
+struct nfp_flower_pedit_acts {
 	struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src;
+	struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl;
+	struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos;
 	struct nfp_fl_set_ip4_addrs set_ip_addr;
 	struct nfp_fl_set_tport set_tport;
 	struct nfp_fl_set_eth set_eth;
-	enum pedit_header_type htype;
-	int idx, nkeys, err;
+};
+
+static int
+nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
+		     int *a_len, struct nfp_flower_pedit_acts *set_act,
+		     u32 *csum_updated)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	size_t act_size = 0;
-	u32 offset, cmd;
 	u8 ip_proto = 0;
 
-	memset(&set_ip6_dst, 0, sizeof(set_ip6_dst));
-	memset(&set_ip6_src, 0, sizeof(set_ip6_src));
-	memset(&set_ip_addr, 0, sizeof(set_ip_addr));
-	memset(&set_tport, 0, sizeof(set_tport));
-	memset(&set_eth, 0, sizeof(set_eth));
-	nkeys = tcf_pedit_nkeys(action);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-	for (idx = 0; idx < nkeys; idx++) {
-		cmd = tcf_pedit_cmd(action, idx);
-		htype = tcf_pedit_htype(action, idx);
-		offset = tcf_pedit_offset(action, idx);
-
-		if (cmd != TCA_PEDIT_KEY_EX_CMD_SET)
-			return -EOPNOTSUPP;
-
-		switch (htype) {
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
-			err = nfp_fl_set_eth(action, idx, offset, &set_eth);
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
-			err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr);
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
-			err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst,
-					     &set_ip6_src);
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
-			err = nfp_fl_set_tport(action, idx, offset, &set_tport,
-					       NFP_FL_ACTION_OPCODE_SET_TCP);
-			break;
-		case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
-			err = nfp_fl_set_tport(action, idx, offset, &set_tport,
-					       NFP_FL_ACTION_OPCODE_SET_UDP);
-			break;
-		default:
-			return -EOPNOTSUPP;
-		}
-		if (err)
-			return err;
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *basic;
-
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  flow->key);
-		ip_proto = basic->ip_proto;
-	}
-
-	if (set_eth.head.len_lw) {
-		act_size = sizeof(set_eth);
-		memcpy(nfp_action, &set_eth, act_size);
+	if (set_act->set_eth.head.len_lw) {
+		act_size = sizeof(set_act->set_eth);
+		memcpy(nfp_action, &set_act->set_eth, act_size);
 		*a_len += act_size;
 	}
-	if (set_ip_addr.head.len_lw) {
+
+	if (set_act->set_ip_ttl_tos.head.len_lw) {
 		nfp_action += act_size;
-		act_size = sizeof(set_ip_addr);
-		memcpy(nfp_action, &set_ip_addr, act_size);
+		act_size = sizeof(set_act->set_ip_ttl_tos);
+		memcpy(nfp_action, &set_act->set_ip_ttl_tos, act_size);
 		*a_len += act_size;
 
 		/* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
 		*csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
 				nfp_fl_csum_l4_to_flag(ip_proto);
 	}
-	if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
-		/* TC compiles set src and dst IPv6 address as a single action,
-		 * the hardware requires this to be 2 separate actions.
-		 */
+
+	if (set_act->set_ip_addr.head.len_lw) {
 		nfp_action += act_size;
-		act_size = sizeof(set_ip6_src);
-		memcpy(nfp_action, &set_ip6_src, act_size);
+		act_size = sizeof(set_act->set_ip_addr);
+		memcpy(nfp_action, &set_act->set_ip_addr, act_size);
 		*a_len += act_size;
 
-		act_size = sizeof(set_ip6_dst);
-		memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst,
-		       act_size);
-		*a_len += act_size;
+		/* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
+		*csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
+				nfp_fl_csum_l4_to_flag(ip_proto);
+	}
 
-		/* Hardware will automatically fix TCP/UDP checksum. */
-		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_ip6_dst.head.len_lw) {
+	if (set_act->set_ip6_tc_hl_fl.head.len_lw) {
 		nfp_action += act_size;
-		act_size = sizeof(set_ip6_dst);
-		memcpy(nfp_action, &set_ip6_dst, act_size);
-		*a_len += act_size;
-
-		/* Hardware will automatically fix TCP/UDP checksum. */
-		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
-	} else if (set_ip6_src.head.len_lw) {
-		nfp_action += act_size;
-		act_size = sizeof(set_ip6_src);
-		memcpy(nfp_action, &set_ip6_src, act_size);
+		act_size = sizeof(set_act->set_ip6_tc_hl_fl);
+		memcpy(nfp_action, &set_act->set_ip6_tc_hl_fl, act_size);
 		*a_len += act_size;
 
 		/* Hardware will automatically fix TCP/UDP checksum. */
 		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
 	}
-	if (set_tport.head.len_lw) {
+
+	if (set_act->set_ip6_dst.head.len_lw &&
+	    set_act->set_ip6_src.head.len_lw) {
+		/* TC compiles set src and dst IPv6 address as a single action,
+		 * the hardware requires this to be 2 separate actions.
+		 */
 		nfp_action += act_size;
-		act_size = sizeof(set_tport);
-		memcpy(nfp_action, &set_tport, act_size);
+		act_size = sizeof(set_act->set_ip6_src);
+		memcpy(nfp_action, &set_act->set_ip6_src, act_size);
+		*a_len += act_size;
+
+		act_size = sizeof(set_act->set_ip6_dst);
+		memcpy(&nfp_action[sizeof(set_act->set_ip6_src)],
+		       &set_act->set_ip6_dst, act_size);
+		*a_len += act_size;
+
+		/* Hardware will automatically fix TCP/UDP checksum. */
+		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
+	} else if (set_act->set_ip6_dst.head.len_lw) {
+		nfp_action += act_size;
+		act_size = sizeof(set_act->set_ip6_dst);
+		memcpy(nfp_action, &set_act->set_ip6_dst, act_size);
+		*a_len += act_size;
+
+		/* Hardware will automatically fix TCP/UDP checksum. */
+		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
+	} else if (set_act->set_ip6_src.head.len_lw) {
+		nfp_action += act_size;
+		act_size = sizeof(set_act->set_ip6_src);
+		memcpy(nfp_action, &set_act->set_ip6_src, act_size);
+		*a_len += act_size;
+
+		/* Hardware will automatically fix TCP/UDP checksum. */
+		*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
+	}
+	if (set_act->set_tport.head.len_lw) {
+		nfp_action += act_size;
+		act_size = sizeof(set_act->set_tport);
+		memcpy(nfp_action, &set_act->set_tport, act_size);
 		*a_len += act_size;
 
 		/* Hardware will automatically fix TCP/UDP checksum. */
@@ -664,11 +860,47 @@
 }
 
 static int
-nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a,
+nfp_fl_pedit(const struct flow_action_entry *act,
+	     struct flow_cls_offload *flow, char *nfp_action, int *a_len,
+	     u32 *csum_updated, struct nfp_flower_pedit_acts *set_act,
+	     struct netlink_ext_ack *extack)
+{
+	enum flow_action_mangle_base htype;
+	u32 offset;
+
+	htype = act->mangle.htype;
+	offset = act->mangle.offset;
+
+	switch (htype) {
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+		return nfp_fl_set_eth(act, offset, &set_act->set_eth, extack);
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+		return nfp_fl_set_ip4(act, offset, &set_act->set_ip_addr,
+				      &set_act->set_ip_ttl_tos, extack);
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+		return nfp_fl_set_ip6(act, offset, &set_act->set_ip6_dst,
+				      &set_act->set_ip6_src,
+				      &set_act->set_ip6_tc_hl_fl, extack);
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+		return nfp_fl_set_tport(act, offset, &set_act->set_tport,
+					NFP_FL_ACTION_OPCODE_SET_TCP, extack);
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+		return nfp_fl_set_tport(act, offset, &set_act->set_tport,
+					NFP_FL_ACTION_OPCODE_SET_UDP, extack);
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported header");
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+nfp_flower_output_action(struct nfp_app *app,
+			 const struct flow_action_entry *act,
 			 struct nfp_fl_payload *nfp_fl, int *a_len,
 			 struct net_device *netdev, bool last,
 			 enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
-			 int *out_cnt, u32 *csum_updated)
+			 int *out_cnt, u32 *csum_updated, bool pkt_host,
+			 struct netlink_ext_ack *extack)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_output *output;
@@ -677,15 +909,19 @@
 	/* If csum_updated has not been reset by now, it means HW will
 	 * incorrectly update csums when they are not requested.
 	 */
-	if (*csum_updated)
+	if (*csum_updated) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: set actions without updating checksums are not supported");
 		return -EOPNOTSUPP;
+	}
 
-	if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+	if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: mirred output increases action list size beyond the allowed maximum");
 		return -EOPNOTSUPP;
+	}
 
 	output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
-	err = nfp_fl_output(app, output, a, nfp_fl, last, netdev, *tun_type,
-			    tun_out_cnt);
+	err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type,
+			    tun_out_cnt, pkt_host, extack);
 	if (err)
 		return err;
 
@@ -695,11 +931,13 @@
 		/* nfp_fl_pre_lag returns -err or size of prelag action added.
 		 * This will be 0 if it is not egressing to a lag dev.
 		 */
-		prelag_size = nfp_fl_pre_lag(app, a, nfp_fl, *a_len);
-		if (prelag_size < 0)
+		prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len, extack);
+		if (prelag_size < 0) {
 			return prelag_size;
-		else if (prelag_size > 0 && (!last || *out_cnt))
+		} else if (prelag_size > 0 && (!last || *out_cnt)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: LAG action has to be last action in action list");
 			return -EOPNOTSUPP;
+		}
 
 		*a_len += prelag_size;
 	}
@@ -709,117 +947,235 @@
 }
 
 static int
-nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
-		       struct tc_cls_flower_offload *flow,
+nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
+		       struct flow_cls_offload *flow,
 		       struct nfp_fl_payload *nfp_fl, int *a_len,
 		       struct net_device *netdev,
 		       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
-		       int *out_cnt, u32 *csum_updated)
+		       int *out_cnt, u32 *csum_updated,
+		       struct nfp_flower_pedit_acts *set_act, bool *pkt_host,
+		       struct netlink_ext_ack *extack, int act_idx)
 {
-	struct nfp_fl_set_ipv4_udp_tun *set_tun;
+	struct nfp_fl_set_ipv4_tun *set_tun;
 	struct nfp_fl_pre_tunnel *pre_tun;
 	struct nfp_fl_push_vlan *psh_v;
+	struct nfp_fl_push_mpls *psh_m;
 	struct nfp_fl_pop_vlan *pop_v;
+	struct nfp_fl_pop_mpls *pop_m;
+	struct nfp_fl_set_mpls *set_m;
 	int err;
 
-	if (is_tcf_gact_shot(a)) {
+	switch (act->id) {
+	case FLOW_ACTION_DROP:
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
-	} else if (is_tcf_mirred_egress_redirect(a)) {
-		err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
+		break;
+	case FLOW_ACTION_REDIRECT_INGRESS:
+	case FLOW_ACTION_REDIRECT:
+		err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev,
 					       true, tun_type, tun_out_cnt,
-					       out_cnt, csum_updated);
+					       out_cnt, csum_updated, *pkt_host,
+					       extack);
 		if (err)
 			return err;
-
-	} else if (is_tcf_mirred_egress_mirror(a)) {
-		err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
+		break;
+	case FLOW_ACTION_MIRRED_INGRESS:
+	case FLOW_ACTION_MIRRED:
+		err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev,
 					       false, tun_type, tun_out_cnt,
-					       out_cnt, csum_updated);
+					       out_cnt, csum_updated, *pkt_host,
+					       extack);
 		if (err)
 			return err;
-
-	} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
-		if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
+		break;
+	case FLOW_ACTION_VLAN_POP:
+		if (*a_len +
+		    sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop vlan");
 			return -EOPNOTSUPP;
+		}
 
 		pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len];
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV);
 
 		nfp_fl_pop_vlan(pop_v);
 		*a_len += sizeof(struct nfp_fl_pop_vlan);
-	} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-		if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ)
+		break;
+	case FLOW_ACTION_VLAN_PUSH:
+		if (*a_len +
+		    sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push vlan");
 			return -EOPNOTSUPP;
+		}
 
 		psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len];
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
 
-		nfp_fl_push_vlan(psh_v, a);
+		nfp_fl_push_vlan(psh_v, act);
 		*a_len += sizeof(struct nfp_fl_push_vlan);
-	} else if (is_tcf_tunnel_set(a)) {
-		struct ip_tunnel_info *ip_tun = tcf_tunnel_info(a);
-		struct nfp_repr *repr = netdev_priv(netdev);
+		break;
+	case FLOW_ACTION_TUNNEL_ENCAP: {
+		const struct ip_tunnel_info *ip_tun = act->tunnel;
 
-		*tun_type = nfp_fl_get_tun_from_act_l4_port(repr->app, a);
-		if (*tun_type == NFP_FL_TUNNEL_NONE)
+		*tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx);
+		if (*tun_type == NFP_FL_TUNNEL_NONE) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list");
 			return -EOPNOTSUPP;
+		}
 
-		if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS)
+		if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel flags in action list");
 			return -EOPNOTSUPP;
+		}
 
 		/* Pre-tunnel action is required for tunnel encap.
 		 * This checks for next hop entries on NFP.
 		 * If none, the packet falls back before applying other actions.
 		 */
 		if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
-		    sizeof(struct nfp_fl_set_ipv4_udp_tun) > NFP_FL_MAX_A_SIZ)
+		    sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap");
 			return -EOPNOTSUPP;
+		}
 
 		pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
 		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
 		*a_len += sizeof(struct nfp_fl_pre_tunnel);
 
-		err = nfp_fl_push_geneve_options(nfp_fl, a_len, a);
+		err = nfp_fl_push_geneve_options(nfp_fl, a_len, act, extack);
 		if (err)
 			return err;
 
 		set_tun = (void *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun,
-					      *tun_type, netdev);
+		err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun,
+					  *tun_type, netdev, extack);
 		if (err)
 			return err;
-		*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
-	} else if (is_tcf_tunnel_release(a)) {
+		*a_len += sizeof(struct nfp_fl_set_ipv4_tun);
+		}
+		break;
+	case FLOW_ACTION_TUNNEL_DECAP:
 		/* Tunnel decap is handled by default so accept action. */
 		return 0;
-	} else if (is_tcf_pedit(a)) {
-		if (nfp_fl_pedit(a, flow, &nfp_fl->action_data[*a_len],
-				 a_len, csum_updated))
+	case FLOW_ACTION_MANGLE:
+		if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len],
+				 a_len, csum_updated, set_act, extack))
 			return -EOPNOTSUPP;
-	} else if (is_tcf_csum(a)) {
+		break;
+	case FLOW_ACTION_CSUM:
 		/* csum action requests recalc of something we have not fixed */
-		if (tcf_csum_update_flags(a) & ~*csum_updated)
+		if (act->csum_flags & ~*csum_updated) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported csum update action in action list");
 			return -EOPNOTSUPP;
+		}
 		/* If we will correctly fix the csum we can remove it from the
 		 * csum update list. Which will later be used to check support.
 		 */
-		*csum_updated &= ~tcf_csum_update_flags(a);
-	} else {
+		*csum_updated &= ~act->csum_flags;
+		break;
+	case FLOW_ACTION_MPLS_PUSH:
+		if (*a_len +
+		    sizeof(struct nfp_fl_push_mpls) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push MPLS");
+			return -EOPNOTSUPP;
+		}
+
+		psh_m = (struct nfp_fl_push_mpls *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+		err = nfp_fl_push_mpls(psh_m, act, extack);
+		if (err)
+			return err;
+		*a_len += sizeof(struct nfp_fl_push_mpls);
+		break;
+	case FLOW_ACTION_MPLS_POP:
+		if (*a_len +
+		    sizeof(struct nfp_fl_pop_mpls) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop MPLS");
+			return -EOPNOTSUPP;
+		}
+
+		pop_m = (struct nfp_fl_pop_mpls *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+		nfp_fl_pop_mpls(pop_m, act);
+		*a_len += sizeof(struct nfp_fl_pop_mpls);
+		break;
+	case FLOW_ACTION_MPLS_MANGLE:
+		if (*a_len +
+		    sizeof(struct nfp_fl_set_mpls) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at set MPLS");
+			return -EOPNOTSUPP;
+		}
+
+		set_m = (struct nfp_fl_set_mpls *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+		nfp_fl_set_mpls(set_m, act);
+		*a_len += sizeof(struct nfp_fl_set_mpls);
+		break;
+	case FLOW_ACTION_PTYPE:
+		/* TC ptype skbedit sets PACKET_HOST for ingress redirect. */
+		if (act->ptype != PACKET_HOST)
+			return -EOPNOTSUPP;
+
+		*pkt_host = true;
+		break;
+	default:
 		/* Currently we do not handle any other actions. */
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list");
 		return -EOPNOTSUPP;
 	}
 
 	return 0;
 }
 
+static bool nfp_fl_check_mangle_start(struct flow_action *flow_act,
+				      int current_act_idx)
+{
+	struct flow_action_entry current_act;
+	struct flow_action_entry prev_act;
+
+	current_act = flow_act->entries[current_act_idx];
+	if (current_act.id != FLOW_ACTION_MANGLE)
+		return false;
+
+	if (current_act_idx == 0)
+		return true;
+
+	prev_act = flow_act->entries[current_act_idx - 1];
+
+	return prev_act.id != FLOW_ACTION_MANGLE;
+}
+
+static bool nfp_fl_check_mangle_end(struct flow_action *flow_act,
+				    int current_act_idx)
+{
+	struct flow_action_entry current_act;
+	struct flow_action_entry next_act;
+
+	current_act = flow_act->entries[current_act_idx];
+	if (current_act.id != FLOW_ACTION_MANGLE)
+		return false;
+
+	if (current_act_idx == flow_act->num_entries)
+		return true;
+
+	next_act = flow_act->entries[current_act_idx + 1];
+
+	return next_act.id != FLOW_ACTION_MANGLE;
+}
+
 int nfp_flower_compile_action(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct net_device *netdev,
-			      struct nfp_fl_payload *nfp_flow)
+			      struct nfp_fl_payload *nfp_flow,
+			      struct netlink_ext_ack *extack)
 {
 	int act_len, act_cnt, err, tun_out_cnt, out_cnt, i;
+	struct nfp_flower_pedit_acts set_act;
 	enum nfp_flower_tun_type tun_type;
-	const struct tc_action *a;
+	struct flow_action_entry *act;
+	bool pkt_host = false;
 	u32 csum_updated = 0;
 
 	memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
@@ -830,13 +1186,20 @@
 	tun_out_cnt = 0;
 	out_cnt = 0;
 
-	tcf_exts_for_each_action(i, a, flow->exts) {
-		err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len,
+	flow_action_for_each(i, act, &flow->rule->action) {
+		if (nfp_fl_check_mangle_start(&flow->rule->action, i))
+			memset(&set_act, 0, sizeof(set_act));
+		err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len,
 					     netdev, &tun_type, &tun_out_cnt,
-					     &out_cnt, &csum_updated);
+					     &out_cnt, &csum_updated,
+					     &set_act, &pkt_host, extack, i);
 		if (err)
 			return err;
 		act_cnt++;
+		if (nfp_fl_check_mangle_end(&flow->rule->action, i))
+			nfp_fl_commit_mangle(flow,
+					     &nfp_flow->action_data[act_len],
+					     &act_len, &set_act, &csum_updated);
 	}
 
 	/* We optimise when the action list is small, this can unfortunately

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index cb85652..05981b5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
 #include <linux/netdevice.h>
@@ -75,11 +45,9 @@
 {
 	struct nfp_flower_cmsg_mac_repr *msg;
 	struct sk_buff *skb;
-	unsigned int size;
 
-	size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]);
-	skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR,
-				    GFP_KERNEL);
+	skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports),
+				    NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL);
 	if (!skb)
 		return NULL;
 
@@ -191,7 +159,7 @@
 
 	rtnl_lock();
 	rcu_read_lock();
-	netdev = nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+	netdev = nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
 	rcu_read_unlock();
 	if (!netdev) {
 		nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
@@ -224,7 +192,7 @@
 	msg = nfp_flower_cmsg_get_data(skb);
 
 	rcu_read_lock();
-	exists = !!nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+	exists = !!nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
 	rcu_read_unlock();
 	if (!exists) {
 		nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
@@ -233,7 +201,51 @@
 	}
 
 	atomic_inc(&priv->reify_replies);
-	wake_up_interruptible(&priv->reify_wait_queue);
+	wake_up(&priv->reify_wait_queue);
+}
+
+static void
+nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb)
+{
+	unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
+	struct nfp_flower_cmsg_merge_hint *msg;
+	struct nfp_fl_payload *sub_flows[2];
+	int err, i, flow_cnt;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	/* msg->count starts at 0 and always assumes at least 1 entry. */
+	flow_cnt = msg->count + 1;
+
+	if (msg_len < struct_size(msg, flow, flow_cnt)) {
+		nfp_flower_cmsg_warn(app, "Merge hint ctrl msg too short - %d bytes but expect %zd\n",
+				     msg_len, struct_size(msg, flow, flow_cnt));
+		return;
+	}
+
+	if (flow_cnt != 2) {
+		nfp_flower_cmsg_warn(app, "Merge hint contains %d flows - two are expected\n",
+				     flow_cnt);
+		return;
+	}
+
+	rtnl_lock();
+	for (i = 0; i < flow_cnt; i++) {
+		u32 ctx = be32_to_cpu(msg->flow[i].host_ctx);
+
+		sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx);
+		if (!sub_flows[i]) {
+			nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n");
+			goto err_rtnl_unlock;
+		}
+	}
+
+	err = nfp_flower_merge_offloaded_flows(app, sub_flows[0], sub_flows[1]);
+	/* Only warn on memory fail. Hint veto will not break functionality. */
+	if (err == -ENOMEM)
+		nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n");
+
+err_rtnl_unlock:
+	rtnl_unlock();
 }
 
 static void
@@ -248,18 +260,24 @@
 
 	type = cmsg_hdr->type;
 	switch (type) {
-	case NFP_FLOWER_CMSG_TYPE_PORT_REIFY:
-		nfp_flower_cmsg_portreify_rx(app, skb);
-		break;
 	case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
 		nfp_flower_cmsg_portmod_rx(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_MERGE_HINT:
+		if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE) {
+			nfp_flower_cmsg_merge_hint_rx(app, skb);
+			break;
+		}
+		goto err_default;
 	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
 		nfp_tunnel_request_route(app, skb);
 		break;
 	case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
 		nfp_tunnel_keep_alive(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_QOS_STATS:
+		nfp_flower_stats_rlim_reply(app, skb);
+		break;
 	case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG:
 		if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
 			skb_stored = nfp_flower_lag_unprocessed_msg(app, skb);
@@ -267,6 +285,7 @@
 		}
 		/* fall through */
 	default:
+err_default:
 		nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
 				     type);
 		goto out;
@@ -306,8 +325,7 @@
 	struct nfp_flower_priv *priv = app->priv;
 	struct sk_buff_head *skb_head;
 
-	if (type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY ||
-	    type == NFP_FLOWER_CMSG_TYPE_PORT_MOD)
+	if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD)
 		skb_head = &priv->cmsg_skbs_high;
 	else
 		skb_head = &priv->cmsg_skbs_low;
@@ -346,6 +364,10 @@
 	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) {
 		/* Acks from the NFP that the route is added - ignore. */
 		dev_consume_skb_any(skb);
+	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) {
+		/* Handle REIFY acks outside wq to prevent RTNL conflict. */
+		nfp_flower_cmsg_portreify_rx(app, skb);
+		dev_consume_skb_any(skb);
 	} else {
 		nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type);
 	}

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 325954b..7eb2ec8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef NFP_FLOWER_CMSG_H
 #define NFP_FLOWER_CMSG_H
@@ -38,6 +8,8 @@
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <net/geneve.h>
+#include <net/gre.h>
+#include <net/vxlan.h>
 
 #include "../nfp_app.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -51,11 +23,12 @@
 #define NFP_FLOWER_LAYER_CT		BIT(6)
 #define NFP_FLOWER_LAYER_VXLAN		BIT(7)
 
+#define NFP_FLOWER_LAYER2_GRE		BIT(0)
 #define NFP_FLOWER_LAYER2_GENEVE	BIT(5)
 #define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
 
 #define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)
-#define NFP_FLOWER_MASK_VLAN_CFI	BIT(12)
+#define NFP_FLOWER_MASK_VLAN_PRESENT	BIT(12)
 #define NFP_FLOWER_MASK_VLAN_VID	GENMASK(11, 0)
 
 #define NFP_FLOWER_MASK_MPLS_LB		GENMASK(31, 12)
@@ -66,6 +39,9 @@
 #define NFP_FL_IP_FRAG_FIRST		BIT(7)
 #define NFP_FL_IP_FRAGMENTED		BIT(6)
 
+/* GRE Tunnel flags */
+#define NFP_FL_GRE_FLAG_KEY		BIT(2)
+
 /* Compressed HW representation of TCP Flags */
 #define NFP_FL_TCP_FLAG_URG		BIT(4)
 #define NFP_FL_TCP_FLAG_PSH		BIT(3)
@@ -92,11 +68,16 @@
 #define NFP_FL_ACTION_OPCODE_OUTPUT		0
 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1
 #define NFP_FL_ACTION_OPCODE_POP_VLAN		2
+#define NFP_FL_ACTION_OPCODE_PUSH_MPLS		3
+#define NFP_FL_ACTION_OPCODE_POP_MPLS		4
 #define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL	6
 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET	7
+#define NFP_FL_ACTION_OPCODE_SET_MPLS		8
 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS	9
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS	10
 #define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC	11
 #define NFP_FL_ACTION_OPCODE_SET_IPV6_DST	12
+#define NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL	13
 #define NFP_FL_ACTION_OPCODE_SET_UDP		14
 #define NFP_FL_ACTION_OPCODE_SET_TCP		15
 #define NFP_FL_ACTION_OPCODE_PRE_LAG		16
@@ -109,9 +90,10 @@
 #define NFP_FL_OUT_FLAGS_TYPE_IDX	GENMASK(2, 0)
 
 #define NFP_FL_PUSH_VLAN_PRIO		GENMASK(15, 13)
-#define NFP_FL_PUSH_VLAN_CFI		BIT(12)
 #define NFP_FL_PUSH_VLAN_VID		GENMASK(11, 0)
 
+#define IPV6_FLOW_LABEL_MASK		cpu_to_be32(0x000fffff)
+
 /* LAG ports */
 #define NFP_FL_LAG_OUT			0xC0DE0000
 
@@ -122,6 +104,9 @@
 
 #define NFP_FLOWER_WORKQ_MAX_SKBS	30000
 
+/* Cmesg reply (empirical) timeout*/
+#define NFP_FL_REPLY_TIMEOUT		msecs_to_jiffies(40)
+
 #define nfp_flower_cmsg_warn(app, fmt, args...)                         \
 	do {                                                            \
 		if (net_ratelimit())                                    \
@@ -130,6 +115,7 @@
 
 enum nfp_flower_tun_type {
 	NFP_FL_TUNNEL_NONE =	0,
+	NFP_FL_TUNNEL_GRE =	1,
 	NFP_FL_TUNNEL_VXLAN =	2,
 	NFP_FL_TUNNEL_GENEVE =	4,
 };
@@ -155,6 +141,26 @@
 	__be32 ipv4_dst;
 };
 
+struct nfp_fl_set_ip4_ttl_tos {
+	struct nfp_fl_act_head head;
+	u8 ipv4_ttl_mask;
+	u8 ipv4_tos_mask;
+	u8 ipv4_ttl;
+	u8 ipv4_tos;
+	__be16 reserved;
+};
+
+struct nfp_fl_set_ipv6_tc_hl_fl {
+	struct nfp_fl_act_head head;
+	u8 ipv6_tc_mask;
+	u8 ipv6_hop_limit_mask;
+	__be16 reserved;
+	u8 ipv6_tc;
+	u8 ipv6_hop_limit;
+	__be32 ipv6_label_mask;
+	__be32 ipv6_label;
+};
+
 struct nfp_fl_set_ipv6_addr {
 	struct nfp_fl_act_head head;
 	__be16 reserved;
@@ -206,7 +212,7 @@
 	__be32 extra[3];
 };
 
-struct nfp_fl_set_ipv4_udp_tun {
+struct nfp_fl_set_ipv4_tun {
 	struct nfp_fl_act_head head;
 	__be16 reserved;
 	__be64 tun_id __packed;
@@ -214,7 +220,8 @@
 	__be16 tun_flags;
 	u8 ttl;
 	u8 tos;
-	__be32 extra;
+	__be16 outer_vlan_tpid;
+	__be16 outer_vlan_tci;
 	u8 tun_len;
 	u8 res2;
 	__be16 tun_proto;
@@ -229,6 +236,24 @@
 	u8 opt_data[];
 };
 
+struct nfp_fl_push_mpls {
+	struct nfp_fl_act_head head;
+	__be16 ethtype;
+	__be32 lse;
+};
+
+struct nfp_fl_pop_mpls {
+	struct nfp_fl_act_head head;
+	__be16 ethtype;
+};
+
+struct nfp_fl_set_mpls {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	__be32 lse_mask;
+	__be32 lse;
+};
+
 /* Metadata with L2 (1W/4B)
  * ----------------------------------------------------------------
  *    3                   2                   1
@@ -357,6 +382,16 @@
 	struct in6_addr ipv6_dst;
 };
 
+struct nfp_flower_tun_ipv4 {
+	__be32 src;
+	__be32 dst;
+};
+
+struct nfp_flower_tun_ip_ext {
+	u8 tos;
+	u8 ttl;
+};
+
 /* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B)
  * -----------------------------------------------------------------
  *    3                   2                   1
@@ -374,15 +409,42 @@
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv4_udp_tun {
-	__be32 ip_src;
-	__be32 ip_dst;
+	struct nfp_flower_tun_ipv4 ipv4;
 	__be16 reserved1;
-	u8 tos;
-	u8 ttl;
+	struct nfp_flower_tun_ip_ext ip_ext;
 	__be32 reserved2;
 	__be32 tun_id;
 };
 
+/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_src                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_dst                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           tun_flags           |       tos     |       ttl     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            Reserved           |           Ethertype           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                              Key                              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Reserved                            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct nfp_flower_ipv4_gre_tun {
+	struct nfp_flower_tun_ipv4 ipv4;
+	__be16 tun_flags;
+	struct nfp_flower_tun_ip_ext ip_ext;
+	__be16 reserved1;
+	__be16 ethertype;
+	__be32 tun_key;
+	__be32 reserved2;
+};
+
 struct nfp_flower_geneve_options {
 	u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
 };
@@ -405,11 +467,13 @@
 /* Types defined for port related control messages  */
 enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_FLOW_ADD =		0,
+	NFP_FLOWER_CMSG_TYPE_FLOW_MOD =		1,
 	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
 	NFP_FLOWER_CMSG_TYPE_LAG_CONFIG =	4,
 	NFP_FLOWER_CMSG_TYPE_PORT_REIFY =	6,
 	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
+	NFP_FLOWER_CMSG_TYPE_MERGE_HINT =	9,
 	NFP_FLOWER_CMSG_TYPE_NO_NEIGH =		10,
 	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
 	NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS =	12,
@@ -417,6 +481,10 @@
 	NFP_FLOWER_CMSG_TYPE_TUN_IPS =		14,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
+	NFP_FLOWER_CMSG_TYPE_QOS_MOD =		18,
+	NFP_FLOWER_CMSG_TYPE_QOS_DEL =		19,
+	NFP_FLOWER_CMSG_TYPE_QOS_STATS =	20,
+	NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE =	21,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
 };
 
@@ -454,6 +522,16 @@
 
 #define NFP_FLOWER_CMSG_PORTREIFY_INFO_EXIST	BIT(0)
 
+/* NFP_FLOWER_CMSG_TYPE_FLOW_MERGE_HINT */
+struct nfp_flower_cmsg_merge_hint {
+	u8 reserved[3];
+	u8 count;
+	struct {
+		__be32 host_ctx;
+		__be64 host_cookie;
+	} __packed flow[0];
+};
+
 enum nfp_flower_cmsg_port_type {
 	NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC =	0x0,
 	NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT =	0x1,
@@ -476,6 +554,13 @@
 #define NFP_FLOWER_CMSG_PORT_PCIE_Q		GENMASK(5, 0)
 #define NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM	GENMASK(7, 0)
 
+static inline u32 nfp_flower_internal_port_get_port_id(u8 internal_port)
+{
+	return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, internal_port) |
+		FIELD_PREP(NFP_FLOWER_CMSG_PORT_TYPE,
+			   NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT);
+}
+
 static inline u32 nfp_flower_cmsg_phys_port(u8 phys_port)
 {
 	return FIELD_PREP(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, phys_port) |
@@ -505,6 +590,36 @@
 	return skb->len - NFP_FLOWER_CMSG_HLEN;
 }
 
+static inline bool
+nfp_fl_netdev_is_tunnel_type(struct net_device *netdev,
+			     enum nfp_flower_tun_type tun_type)
+{
+	if (netif_is_vxlan(netdev))
+		return tun_type == NFP_FL_TUNNEL_VXLAN;
+	if (netif_is_gretap(netdev))
+		return tun_type == NFP_FL_TUNNEL_GRE;
+	if (netif_is_geneve(netdev))
+		return tun_type == NFP_FL_TUNNEL_GENEVE;
+
+	return false;
+}
+
+static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev)
+{
+	if (!netdev->rtnl_link_ops)
+		return false;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+		return true;
+	if (netif_is_vxlan(netdev))
+		return true;
+	if (netif_is_geneve(netdev))
+		return true;
+	if (netif_is_gretap(netdev))
+		return true;
+
+	return false;
+}
+
 struct sk_buff *
 nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
 void

diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
index bf10598..63907ae 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #include "main.h"
 
@@ -186,7 +156,8 @@
 
 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
 				       struct net_device *master,
-				       struct nfp_fl_pre_lag *pre_act)
+				       struct nfp_fl_pre_lag *pre_act,
+				       struct netlink_ext_ack *extack)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_lag_group *group = NULL;
@@ -197,6 +168,7 @@
 							  master);
 	if (!group) {
 		mutex_unlock(&priv->nfp_lag.lock);
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: group does not exist for LAG action");
 		return -ENOENT;
 	}
 
@@ -502,17 +474,25 @@
 	schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
 }
 
-static int
+static void
 nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
 				 struct net_device *master)
 {
 	struct nfp_fl_lag_group *group;
+	struct nfp_flower_priv *priv;
+
+	priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+	if (!netif_is_bond_master(master))
+		return;
 
 	mutex_lock(&lag->lock);
 	group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
 	if (!group) {
 		mutex_unlock(&lag->lock);
-		return -ENOENT;
+		nfp_warn(priv->app->cpp, "untracked bond got unregistered %s\n",
+			 netdev_name(master));
+		return;
 	}
 
 	group->to_remove = true;
@@ -520,7 +500,6 @@
 	mutex_unlock(&lag->lock);
 
 	schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
-	return 0;
 }
 
 static int
@@ -605,7 +584,7 @@
 	return 0;
 }
 
-static int
+static void
 nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
 			  struct netdev_notifier_changelowerstate_info *info)
 {
@@ -616,18 +595,18 @@
 	unsigned long *flags;
 
 	if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
-		return 0;
+		return;
 
 	lag_lower_info = info->lower_state_info;
 	if (!lag_lower_info)
-		return 0;
+		return;
 
 	priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
 	repr = netdev_priv(netdev);
 
 	/* Verify that the repr is associated with this app. */
 	if (repr->app != priv->app)
-		return 0;
+		return;
 
 	repr_priv = repr->app_priv;
 	flags = &repr_priv->lag_port_flags;
@@ -647,20 +626,15 @@
 	mutex_unlock(&lag->lock);
 
 	schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
-	return 0;
 }
 
-static int
-nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event,
-			void *ptr)
+int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv,
+				struct net_device *netdev,
+				unsigned long event, void *ptr)
 {
-	struct net_device *netdev;
-	struct nfp_fl_lag *lag;
+	struct nfp_fl_lag *lag = &priv->nfp_lag;
 	int err;
 
-	netdev = netdev_notifier_info_to_dev(ptr);
-	lag = container_of(nb, struct nfp_fl_lag, lag_nb);
-
 	switch (event) {
 	case NETDEV_CHANGEUPPER:
 		err = nfp_fl_lag_changeupper_event(lag, ptr);
@@ -668,17 +642,11 @@
 			return NOTIFY_BAD;
 		return NOTIFY_OK;
 	case NETDEV_CHANGELOWERSTATE:
-		err = nfp_fl_lag_changels_event(lag, netdev, ptr);
-		if (err)
-			return NOTIFY_BAD;
+		nfp_fl_lag_changels_event(lag, netdev, ptr);
 		return NOTIFY_OK;
 	case NETDEV_UNREGISTER:
-		if (netif_is_bond_master(netdev)) {
-			err = nfp_fl_lag_schedule_group_delete(lag, netdev);
-			if (err)
-				return NOTIFY_BAD;
-			return NOTIFY_OK;
-		}
+		nfp_fl_lag_schedule_group_delete(lag, netdev);
+		return NOTIFY_OK;
 	}
 
 	return NOTIFY_DONE;
@@ -703,8 +671,6 @@
 
 	/* 0 is a reserved batch version so increment to first valid value. */
 	nfp_fl_increment_version(lag);
-
-	lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event;
 }
 
 void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index e57d237..d8ad934 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/etherdevice.h>
 #include <linux/lockdep.h>
@@ -52,6 +22,9 @@
 
 #define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
 
+#define NFP_MIN_INT_PORT_ID	1
+#define NFP_MAX_INT_PORT_ID	256
+
 static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 {
 	return "FLOWER";
@@ -62,6 +35,178 @@
 	return DEVLINK_ESWITCH_MODE_SWITCHDEV;
 }
 
+static int
+nfp_flower_lookup_internal_port_id(struct nfp_flower_priv *priv,
+				   struct net_device *netdev)
+{
+	struct net_device *entry;
+	int i, id = 0;
+
+	rcu_read_lock();
+	idr_for_each_entry(&priv->internal_ports.port_ids, entry, i)
+		if (entry == netdev) {
+			id = i;
+			break;
+		}
+	rcu_read_unlock();
+
+	return id;
+}
+
+static int
+nfp_flower_get_internal_port_id(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	int id;
+
+	id = nfp_flower_lookup_internal_port_id(priv, netdev);
+	if (id > 0)
+		return id;
+
+	idr_preload(GFP_ATOMIC);
+	spin_lock_bh(&priv->internal_ports.lock);
+	id = idr_alloc(&priv->internal_ports.port_ids, netdev,
+		       NFP_MIN_INT_PORT_ID, NFP_MAX_INT_PORT_ID, GFP_ATOMIC);
+	spin_unlock_bh(&priv->internal_ports.lock);
+	idr_preload_end();
+
+	return id;
+}
+
+u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app,
+				       struct net_device *netdev)
+{
+	int ext_port;
+
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		return nfp_repr_get_port_id(netdev);
+	} else if (nfp_flower_internal_port_can_offload(app, netdev)) {
+		ext_port = nfp_flower_get_internal_port_id(app, netdev);
+		if (ext_port < 0)
+			return 0;
+
+		return nfp_flower_internal_port_get_port_id(ext_port);
+	}
+
+	return 0;
+}
+
+static struct net_device *
+nfp_flower_get_netdev_from_internal_port_id(struct nfp_app *app, int port_id)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct net_device *netdev;
+
+	rcu_read_lock();
+	netdev = idr_find(&priv->internal_ports.port_ids, port_id);
+	rcu_read_unlock();
+
+	return netdev;
+}
+
+static void
+nfp_flower_free_internal_port_id(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	int id;
+
+	id = nfp_flower_lookup_internal_port_id(priv, netdev);
+	if (!id)
+		return;
+
+	spin_lock_bh(&priv->internal_ports.lock);
+	idr_remove(&priv->internal_ports.port_ids, id);
+	spin_unlock_bh(&priv->internal_ports.lock);
+}
+
+static int
+nfp_flower_internal_port_event_handler(struct nfp_app *app,
+				       struct net_device *netdev,
+				       unsigned long event)
+{
+	if (event == NETDEV_UNREGISTER &&
+	    nfp_flower_internal_port_can_offload(app, netdev))
+		nfp_flower_free_internal_port_id(app, netdev);
+
+	return NOTIFY_OK;
+}
+
+static void nfp_flower_internal_port_init(struct nfp_flower_priv *priv)
+{
+	spin_lock_init(&priv->internal_ports.lock);
+	idr_init(&priv->internal_ports.port_ids);
+}
+
+static void nfp_flower_internal_port_cleanup(struct nfp_flower_priv *priv)
+{
+	idr_destroy(&priv->internal_ports.port_ids);
+}
+
+static struct nfp_flower_non_repr_priv *
+nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_flower_non_repr_priv *entry;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(entry, &priv->non_repr_priv, list)
+		if (entry->netdev == netdev)
+			return entry;
+
+	return NULL;
+}
+
+void
+__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv)
+{
+	non_repr_priv->ref_count++;
+}
+
+struct nfp_flower_non_repr_priv *
+nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_flower_non_repr_priv *entry;
+
+	entry = nfp_flower_non_repr_priv_lookup(app, netdev);
+	if (entry)
+		goto inc_ref;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return NULL;
+
+	entry->netdev = netdev;
+	list_add(&entry->list, &priv->non_repr_priv);
+
+inc_ref:
+	__nfp_flower_non_repr_priv_get(entry);
+	return entry;
+}
+
+void
+__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv)
+{
+	if (--non_repr_priv->ref_count)
+		return;
+
+	list_del(&non_repr_priv->list);
+	kfree(non_repr_priv);
+}
+
+void
+nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_flower_non_repr_priv *entry;
+
+	entry = nfp_flower_non_repr_priv_lookup(app, netdev);
+	if (!entry)
+		return;
+
+	__nfp_flower_non_repr_priv_put(entry);
+}
+
 static enum nfp_repr_type
 nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port)
 {
@@ -84,12 +229,21 @@
 }
 
 static struct net_device *
-nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
+nfp_flower_dev_get(struct nfp_app *app, u32 port_id, bool *redir_egress)
 {
 	enum nfp_repr_type repr_type;
 	struct nfp_reprs *reprs;
 	u8 port = 0;
 
+	/* Check if the port is internal. */
+	if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port_id) ==
+	    NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT) {
+		if (redir_egress)
+			*redir_egress = true;
+		port = FIELD_GET(NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM, port_id);
+		return nfp_flower_get_netdev_from_internal_port_id(app, port);
+	}
+
 	repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port);
 	if (repr_type > NFP_REPR_TYPE_MAX)
 		return NULL;
@@ -137,16 +291,14 @@
 nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl)
 {
 	struct nfp_flower_priv *priv = app->priv;
-	int err;
 
 	if (!tot_repl)
 		return 0;
 
 	lockdep_assert_held(&app->pf->lock);
-	err = wait_event_interruptible_timeout(priv->reify_wait_queue,
-					       atomic_read(replies) >= tot_repl,
-					       msecs_to_jiffies(10));
-	if (err <= 0) {
+	if (!wait_event_timeout(priv->reify_wait_queue,
+				atomic_read(replies) >= tot_repl,
+				NFP_FL_REPLY_TIMEOUT)) {
 		nfp_warn(app->cpp, "Not all reprs responded to reify\n");
 		return -EIO;
 	}
@@ -176,23 +328,12 @@
 	return nfp_flower_cmsg_portmod(repr, false, repr->netdev->mtu, false);
 }
 
-static int
-nfp_flower_repr_netdev_init(struct nfp_app *app, struct net_device *netdev)
-{
-	return tc_setup_cb_egdev_register(netdev,
-					  nfp_flower_setup_tc_egress_cb,
-					  netdev_priv(netdev));
-}
-
 static void
 nfp_flower_repr_netdev_clean(struct nfp_app *app, struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
 
 	kfree(repr->app_priv);
-
-	tc_setup_cb_egdev_unregister(netdev, nfp_flower_setup_tc_egress_cb,
-				     netdev_priv(netdev));
 }
 
 static void
@@ -259,11 +400,13 @@
 		repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
 		if (!repr_priv) {
 			err = -ENOMEM;
+			nfp_repr_free(repr);
 			goto err_reprs_clean;
 		}
 
 		nfp_repr = netdev_priv(repr);
 		nfp_repr->app_priv = repr_priv;
+		repr_priv->nfp_repr = nfp_repr;
 
 		/* For now we only support 1 PF */
 		WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
@@ -271,6 +414,7 @@
 		port = nfp_port_alloc(app, port_type, repr);
 		if (IS_ERR(port)) {
 			err = PTR_ERR(port);
+			kfree(repr_priv);
 			nfp_repr_free(repr);
 			goto err_reprs_clean;
 		}
@@ -291,6 +435,7 @@
 		err = nfp_repr_init(app, repr,
 				    port_id, port, priv->nn->dp.netdev);
 		if (err) {
+			kfree(repr_priv);
 			nfp_port_free(port);
 			nfp_repr_free(repr);
 			goto err_reprs_clean;
@@ -373,20 +518,24 @@
 		repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
 		if (!repr_priv) {
 			err = -ENOMEM;
+			nfp_repr_free(repr);
 			goto err_reprs_clean;
 		}
 
 		nfp_repr = netdev_priv(repr);
 		nfp_repr->app_priv = repr_priv;
+		repr_priv->nfp_repr = nfp_repr;
 
 		port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
 		if (IS_ERR(port)) {
 			err = PTR_ERR(port);
+			kfree(repr_priv);
 			nfp_repr_free(repr);
 			goto err_reprs_clean;
 		}
 		err = nfp_port_init_phy_port(app->pf, app, port, i);
 		if (err) {
+			kfree(repr_priv);
 			nfp_port_free(port);
 			nfp_repr_free(repr);
 			goto err_reprs_clean;
@@ -399,6 +548,7 @@
 		err = nfp_repr_init(app, repr,
 				    cmsg_port_id, port, priv->nn->dp.netdev);
 		if (err) {
+			kfree(repr_priv);
 			nfp_port_free(port);
 			nfp_repr_free(repr);
 			goto err_reprs_clean;
@@ -517,9 +667,9 @@
 
 static int nfp_flower_init(struct nfp_app *app)
 {
+	u64 version, features, ctx_count, num_mems;
 	const struct nfp_pf *pf = app->pf;
 	struct nfp_flower_priv *app_priv;
-	u64 version, features;
 	int err;
 
 	if (!pf->eth_tbl) {
@@ -543,6 +693,33 @@
 		return err;
 	}
 
+	num_mems = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_SPLIT",
+				     &err);
+	if (err) {
+		nfp_warn(app->cpp,
+			 "FlowerNIC: unsupported host context memory: %d\n",
+			 err);
+		err = 0;
+		num_mems = 1;
+	}
+
+	if (!FIELD_FIT(NFP_FL_STAT_ID_MU_NUM, num_mems) || !num_mems) {
+		nfp_warn(app->cpp,
+			 "FlowerNIC: invalid host context memory: %llu\n",
+			 num_mems);
+		return -EINVAL;
+	}
+
+	ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT",
+				      &err);
+	if (err) {
+		nfp_warn(app->cpp,
+			 "FlowerNIC: unsupported host context count: %d\n",
+			 err);
+		err = 0;
+		ctx_count = BIT(17);
+	}
+
 	/* We need to ensure hardware has enough flower capabilities. */
 	if (version != NFP_FLOWER_ALLOWED_VER) {
 		nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n");
@@ -553,6 +730,9 @@
 	if (!app_priv)
 		return -ENOMEM;
 
+	app_priv->total_mem_units = num_mems;
+	app_priv->active_mem_unit = 0;
+	app_priv->stats_ring_size = roundup_pow_of_two(ctx_count);
 	app->priv = app_priv;
 	app_priv->app = app;
 	skb_queue_head_init(&app_priv->cmsg_skbs_high);
@@ -563,7 +743,7 @@
 	init_waitqueue_head(&app_priv->mtu_conf.wait_q);
 	spin_lock_init(&app_priv->mtu_conf.lock);
 
-	err = nfp_flower_metadata_init(app);
+	err = nfp_flower_metadata_init(app, ctx_count, num_mems);
 	if (err)
 		goto err_free_app_priv;
 
@@ -587,8 +767,34 @@
 		goto err_cleanup_metadata;
 	}
 
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) {
+		/* Tell the firmware that the driver supports flow merging. */
+		err = nfp_rtsym_write_le(app->pf->rtbl,
+					 "_abi_flower_merge_hint_enable", 1);
+		if (!err) {
+			app_priv->flower_ext_feats |= NFP_FL_FEATS_FLOW_MERGE;
+			nfp_flower_internal_port_init(app_priv);
+		} else if (err == -ENOENT) {
+			nfp_warn(app->cpp, "Flow merge not supported by FW.\n");
+		} else {
+			goto err_lag_clean;
+		}
+	} else {
+		nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n");
+	}
+
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
+		nfp_flower_qos_init(app);
+
+	INIT_LIST_HEAD(&app_priv->indr_block_cb_priv);
+	INIT_LIST_HEAD(&app_priv->non_repr_priv);
+	app_priv->pre_tun_rule_cnt = 0;
+
 	return 0;
 
+err_lag_clean:
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
 err_cleanup_metadata:
 	nfp_flower_metadata_cleanup(app);
 err_free_app_priv:
@@ -604,9 +810,15 @@
 	skb_queue_purge(&app_priv->cmsg_skbs_low);
 	flush_work(&app_priv->cmsg_work);
 
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
+		nfp_flower_qos_cleanup(app);
+
 	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
 		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
 
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE)
+		nfp_flower_internal_port_cleanup(app_priv);
+
 	nfp_flower_metadata_cleanup(app);
 	vfree(app->priv);
 	app->priv = NULL;
@@ -629,7 +841,7 @@
 {
 	struct nfp_flower_priv *app_priv = app->priv;
 	struct nfp_repr *repr = netdev_priv(netdev);
-	int err, ack;
+	int err;
 
 	/* Only need to config FW for physical port MTU change. */
 	if (repr->port->type != NFP_PORT_PHYS_PORT)
@@ -656,11 +868,9 @@
 	}
 
 	/* Wait for fw to ack the change. */
-	ack = wait_event_timeout(app_priv->mtu_conf.wait_q,
-				 nfp_flower_check_ack(app_priv),
-				 msecs_to_jiffies(10));
-
-	if (!ack) {
+	if (!wait_event_timeout(app_priv->mtu_conf.wait_q,
+				nfp_flower_check_ack(app_priv),
+				NFP_FL_REPLY_TIMEOUT)) {
 		spin_lock_bh(&app_priv->mtu_conf.lock);
 		app_priv->mtu_conf.requested_val = 0;
 		spin_unlock_bh(&app_priv->mtu_conf.lock);
@@ -680,10 +890,6 @@
 		err = nfp_flower_lag_reset(&app_priv->nfp_lag);
 		if (err)
 			return err;
-
-		err = register_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
-		if (err)
-			return err;
 	}
 
 	return nfp_tunnel_config_start(app);
@@ -691,14 +897,33 @@
 
 static void nfp_flower_stop(struct nfp_app *app)
 {
-	struct nfp_flower_priv *app_priv = app->priv;
-
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
-		unregister_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
-
 	nfp_tunnel_config_stop(app);
 }
 
+static int
+nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev,
+			unsigned long event, void *ptr)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	int ret;
+
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+		ret = nfp_flower_lag_netdev_event(app_priv, netdev, event, ptr);
+		if (ret & NOTIFY_STOP_MASK)
+			return ret;
+	}
+
+	ret = nfp_flower_reg_indir_block_handler(app, netdev, event);
+	if (ret & NOTIFY_STOP_MASK)
+		return ret;
+
+	ret = nfp_flower_internal_port_event_handler(app, netdev, event);
+	if (ret & NOTIFY_STOP_MASK)
+		return ret;
+
+	return nfp_tunnel_mac_event_handler(app, netdev, event, ptr);
+}
+
 const struct nfp_app_type app_flower = {
 	.id		= NFP_APP_FLOWER_NIC,
 	.name		= "flower",
@@ -717,7 +942,6 @@
 	.vnic_init	= nfp_flower_vnic_init,
 	.vnic_clean	= nfp_flower_vnic_clean,
 
-	.repr_init	= nfp_flower_repr_netdev_init,
 	.repr_preclean	= nfp_flower_repr_netdev_preclean,
 	.repr_clean	= nfp_flower_repr_netdev_clean,
 
@@ -727,13 +951,15 @@
 	.start		= nfp_flower_start,
 	.stop		= nfp_flower_stop,
 
+	.netdev_event	= nfp_flower_netdev_event,
+
 	.ctrl_msg_rx	= nfp_flower_cmsg_rx,
 
 	.sriov_enable	= nfp_flower_sriov_enable,
 	.sriov_disable	= nfp_flower_sriov_disable,
 
 	.eswitch_mode_get  = eswitch_mode_get,
-	.repr_get	= nfp_flower_repr_get,
+	.dev_get	= nfp_flower_dev_get,
 
 	.setup_tc	= nfp_flower_setup_tc,
 };

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 81d941a..31d9459 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h

@@ -1,43 +1,15 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef __NFP_FLOWER_H__
 #define __NFP_FLOWER_H__ 1
 
 #include "cmsg.h"
+#include "../nfp_net.h"
 
 #include <linux/circ_buf.h>
 #include <linux/hashtable.h>
+#include <linux/rhashtable.h>
 #include <linux/time64.h>
 #include <linux/types.h>
 #include <net/pkt_cls.h>
@@ -49,11 +21,11 @@
 struct net_device;
 struct nfp_app;
 
-#define NFP_FL_STATS_CTX_DONT_CARE	cpu_to_be32(0xffffffff)
-#define NFP_FL_STATS_ENTRY_RS		BIT(20)
-#define NFP_FL_STATS_ELEM_RS		4
-#define NFP_FL_REPEATED_HASH_MAX	BIT(17)
-#define NFP_FLOWER_HASH_BITS		19
+#define NFP_FL_STAT_ID_MU_NUM		GENMASK(31, 22)
+#define NFP_FL_STAT_ID_STAT		GENMASK(21, 0)
+
+#define NFP_FL_STATS_ELEM_RS		FIELD_SIZEOF(struct nfp_fl_stats_id, \
+						     init_unalloc)
 #define NFP_FLOWER_MASK_ENTRY_RS	256
 #define NFP_FLOWER_MASK_ELEMENT_RS	1
 #define NFP_FLOWER_MASK_HASH_BITS	10
@@ -63,14 +35,15 @@
 #define NFP_FL_MASK_REUSE_TIME_NS	40000
 #define NFP_FL_MASK_ID_LOCATION		1
 
-#define NFP_FL_VXLAN_PORT		4789
-#define NFP_FL_GENEVE_PORT		6081
-
 /* Extra features bitmap. */
 #define NFP_FL_FEATS_GENEVE		BIT(0)
 #define NFP_FL_NBI_MTU_SETTING		BIT(1)
 #define NFP_FL_FEATS_GENEVE_OPT		BIT(2)
 #define NFP_FL_FEATS_VLAN_PCP		BIT(3)
+#define NFP_FL_FEATS_VF_RLIM		BIT(4)
+#define NFP_FL_FEATS_FLOW_MOD		BIT(5)
+#define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
+#define NFP_FL_FEATS_FLOW_MERGE		BIT(30)
 #define NFP_FL_FEATS_LAG		BIT(31)
 
 struct nfp_fl_mask_id {
@@ -86,6 +59,26 @@
 };
 
 /**
+ * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads
+ * @offloaded_macs:	Hashtable of the offloaded MAC addresses
+ * @ipv4_off_list:	List of IPv4 addresses to offload
+ * @neigh_off_list:	List of neighbour offloads
+ * @ipv4_off_lock:	Lock for the IPv4 address list
+ * @neigh_off_lock:	Lock for the neighbour address list
+ * @mac_off_ids:	IDA to manage id assignment for offloaded MACs
+ * @neigh_nb:		Notifier to monitor neighbour state
+ */
+struct nfp_fl_tunnel_offloads {
+	struct rhashtable offloaded_macs;
+	struct list_head ipv4_off_list;
+	struct list_head neigh_off_list;
+	struct mutex ipv4_off_lock;
+	spinlock_t neigh_off_lock;
+	struct ida mac_off_ids;
+	struct notifier_block neigh_nb;
+};
+
+/**
  * struct nfp_mtu_conf - manage MTU setting
  * @portnum:		NFP port number of repr with requested MTU change
  * @requested_val:	MTU value requested for repr
@@ -103,7 +96,6 @@
 
 /**
  * struct nfp_fl_lag - Flower APP priv data for link aggregation
- * @lag_nb:		Notifier to track master/slave events
  * @work:		Work queue for writing configs to the HW
  * @lock:		Lock to protect lag_group_list
  * @group_list:		List of all master/slave groups offloaded
@@ -116,7 +108,6 @@
  *			retransmission
  */
 struct nfp_fl_lag {
-	struct notifier_block lag_nb;
 	struct delayed_work work;
 	struct mutex lock;
 	struct list_head group_list;
@@ -129,6 +120,16 @@
 };
 
 /**
+ * struct nfp_fl_internal_ports - Flower APP priv data for additional ports
+ * @port_ids:	Assignment of ids to any additional ports
+ * @lock:	Lock for extra ports list
+ */
+struct nfp_fl_internal_ports {
+	struct idr port_ids;
+	spinlock_t lock;
+};
+
+/**
  * struct nfp_flower_priv - Flower APP per-vNIC priv data
  * @app:		Back pointer to app
  * @nn:			Pointer to vNIC
@@ -138,29 +139,31 @@
  * @stats_ids:		List of free stats ids
  * @mask_ids:		List of free mask ids
  * @mask_table:		Hash table used to store masks
+ * @stats_ring_size:	Maximum number of allowed stats ids
  * @flow_table:		Hash table used to store flower rules
+ * @stats:		Stored stats updates for flower rules
+ * @stats_lock:		Lock for flower rule stats updates
+ * @stats_ctx_table:	Hash table to map stats contexts to its flow rule
  * @cmsg_work:		Workqueue for control messages processing
  * @cmsg_skbs_high:	List of higher priority skbs for control message
  *			processing
  * @cmsg_skbs_low:	List of lower priority skbs for control message
  *			processing
- * @nfp_mac_off_list:	List of MAC addresses to offload
- * @nfp_mac_index_list:	List of unique 8-bit indexes for non NFP netdevs
- * @nfp_ipv4_off_list:	List of IPv4 addresses to offload
- * @nfp_neigh_off_list:	List of neighbour offloads
- * @nfp_mac_off_lock:	Lock for the MAC address list
- * @nfp_mac_index_lock:	Lock for the MAC index list
- * @nfp_ipv4_off_lock:	Lock for the IPv4 address list
- * @nfp_neigh_off_lock:	Lock for the neighbour address list
- * @nfp_mac_off_ids:	IDA to manage id assignment for offloaded macs
- * @nfp_mac_off_count:	Number of MACs in address list
- * @nfp_tun_mac_nb:	Notifier to monitor link state
- * @nfp_tun_neigh_nb:	Notifier to monitor neighbour state
+ * @tun:		Tunnel offload data
  * @reify_replies:	atomically stores the number of replies received
  *			from firmware for repr reify
  * @reify_wait_queue:	wait queue for repr reify response counting
  * @mtu_conf:		Configuration of repr MTU value
  * @nfp_lag:		Link aggregation data block
+ * @indr_block_cb_priv:	List of priv data passed to indirect block cbs
+ * @non_repr_priv:	List of offloaded non-repr ports and their priv data
+ * @active_mem_unit:	Current active memory unit for flower rules
+ * @total_mem_units:	Total number of available memory units for flower rules
+ * @internal_ports:	Internal port ids used in offloaded rules
+ * @qos_stats_work:	Workqueue for qos stats processing
+ * @qos_rate_limiters:	Current active qos rate limiters
+ * @qos_stats_lock:	Lock on qos stats updates
+ * @pre_tun_rule_cnt:	Number of pre-tunnel rules offloaded
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -171,34 +174,80 @@
 	struct nfp_fl_stats_id stats_ids;
 	struct nfp_fl_mask_id mask_ids;
 	DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
-	DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+	u32 stats_ring_size;
+	struct rhashtable flow_table;
+	struct nfp_fl_stats *stats;
+	spinlock_t stats_lock; /* lock stats */
+	struct rhashtable stats_ctx_table;
 	struct work_struct cmsg_work;
 	struct sk_buff_head cmsg_skbs_high;
 	struct sk_buff_head cmsg_skbs_low;
-	struct list_head nfp_mac_off_list;
-	struct list_head nfp_mac_index_list;
-	struct list_head nfp_ipv4_off_list;
-	struct list_head nfp_neigh_off_list;
-	struct mutex nfp_mac_off_lock;
-	struct mutex nfp_mac_index_lock;
-	struct mutex nfp_ipv4_off_lock;
-	spinlock_t nfp_neigh_off_lock;
-	struct ida nfp_mac_off_ids;
-	int nfp_mac_off_count;
-	struct notifier_block nfp_tun_mac_nb;
-	struct notifier_block nfp_tun_neigh_nb;
+	struct nfp_fl_tunnel_offloads tun;
 	atomic_t reify_replies;
 	wait_queue_head_t reify_wait_queue;
 	struct nfp_mtu_conf mtu_conf;
 	struct nfp_fl_lag nfp_lag;
+	struct list_head indr_block_cb_priv;
+	struct list_head non_repr_priv;
+	unsigned int active_mem_unit;
+	unsigned int total_mem_units;
+	struct nfp_fl_internal_ports internal_ports;
+	struct delayed_work qos_stats_work;
+	unsigned int qos_rate_limiters;
+	spinlock_t qos_stats_lock; /* Protect the qos stats */
+	int pre_tun_rule_cnt;
+};
+
+/**
+ * struct nfp_fl_qos - Flower APP priv data for quality of service
+ * @netdev_port_id:	NFP port number of repr with qos info
+ * @curr_stats:		Currently stored stats updates for qos info
+ * @prev_stats:		Previously stored updates for qos info
+ * @last_update:	Stored time when last stats were updated
+ */
+struct nfp_fl_qos {
+	u32 netdev_port_id;
+	struct nfp_stat_pair curr_stats;
+	struct nfp_stat_pair prev_stats;
+	u64 last_update;
 };
 
 /**
  * struct nfp_flower_repr_priv - Flower APP per-repr priv data
+ * @nfp_repr:		Back pointer to nfp_repr
  * @lag_port_flags:	Extended port flags to record lag state of repr
+ * @mac_offloaded:	Flag indicating a MAC address is offloaded for repr
+ * @offloaded_mac_addr:	MAC address that has been offloaded for repr
+ * @block_shared:	Flag indicating if offload applies to shared blocks
+ * @mac_list:		List entry of reprs that share the same offloaded MAC
+ * @qos_table:		Stored info on filters implementing qos
+ * @on_bridge:		Indicates if the repr is attached to a bridge
  */
 struct nfp_flower_repr_priv {
+	struct nfp_repr *nfp_repr;
 	unsigned long lag_port_flags;
+	bool mac_offloaded;
+	u8 offloaded_mac_addr[ETH_ALEN];
+	bool block_shared;
+	struct list_head mac_list;
+	struct nfp_fl_qos qos_table;
+	bool on_bridge;
+};
+
+/**
+ * struct nfp_flower_non_repr_priv - Priv data for non-repr offloaded ports
+ * @list:		List entry of offloaded reprs
+ * @netdev:		Pointer to non-repr net_device
+ * @ref_count:		Number of references held for this priv data
+ * @mac_offloaded:	Flag indicating a MAC address is offloaded for device
+ * @offloaded_mac_addr:	MAC address that has been offloaded for dev
+ */
+struct nfp_flower_non_repr_priv {
+	struct list_head list;
+	struct net_device *netdev;
+	int ref_count;
+	bool mac_offloaded;
+	u8 offloaded_mac_addr[ETH_ALEN];
 };
 
 struct nfp_fl_key_ls {
@@ -227,18 +276,41 @@
 struct nfp_fl_payload {
 	struct nfp_fl_rule_metadata meta;
 	unsigned long tc_flower_cookie;
-	struct hlist_node link;
+	struct rhash_head fl_node;
 	struct rcu_head rcu;
-	spinlock_t lock; /* lock stats */
-	struct nfp_fl_stats stats;
 	__be32 nfp_tun_ipv4_addr;
 	struct net_device *ingress_dev;
 	char *unmasked_data;
 	char *mask_data;
 	char *action_data;
-	bool ingress_offload;
+	struct list_head linked_flows;
+	bool in_hw;
+	struct {
+		struct net_device *dev;
+		__be16 vlan_tci;
+		__be16 port_idx;
+	} pre_tun_rule;
 };
 
+struct nfp_fl_payload_link {
+	/* A link contains a pointer to a merge flow and an associated sub_flow.
+	 * Each merge flow will feature in 2 links to its underlying sub_flows.
+	 * A sub_flow will have at least 1 link to a merge flow or more if it
+	 * has been used to create multiple merge flows.
+	 *
+	 * For a merge flow, 'linked_flows' in its nfp_fl_payload struct lists
+	 * all links to sub_flows (sub_flow.flow) via merge.list.
+	 * For a sub_flow, 'linked_flows' gives all links to merge flows it has
+	 * formed (merge_flow.flow) via sub_flow.list.
+	 */
+	struct {
+		struct list_head list;
+		struct nfp_fl_payload *flow;
+	} merge_flow, sub_flow;
+};
+
+extern const struct rhashtable_params nfp_flower_table_params;
+
 struct nfp_fl_stats_frame {
 	__be32 stats_con_id;
 	__be32 pkt_count;
@@ -246,30 +318,72 @@
 	__be64 stats_cookie;
 };
 
-int nfp_flower_metadata_init(struct nfp_app *app);
+static inline bool
+nfp_flower_internal_port_can_offload(struct nfp_app *app,
+				     struct net_device *netdev)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+
+	if (!(app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE))
+		return false;
+	if (!netdev->rtnl_link_ops)
+		return false;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+		return true;
+
+	return false;
+}
+
+/* The address of the merged flow acts as its cookie.
+ * Cookies supplied to us by TC flower are also addresses to allocated
+ * memory and thus this scheme should not generate any collisions.
+ */
+static inline bool nfp_flower_is_merge_flow(struct nfp_fl_payload *flow_pay)
+{
+	return flow_pay->tc_flower_cookie == (unsigned long)flow_pay;
+}
+
+static inline bool nfp_flower_is_supported_bridge(struct net_device *netdev)
+{
+	return netif_is_ovs_master(netdev);
+}
+
+int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
+			     unsigned int host_ctx_split);
 void nfp_flower_metadata_cleanup(struct nfp_app *app);
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
 			enum tc_setup_type type, void *type_data);
-int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
+				     struct nfp_fl_payload *sub_flow1,
+				     struct nfp_fl_payload *sub_flow2);
+int nfp_flower_compile_flow_match(struct nfp_app *app,
+				  struct flow_cls_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow,
-				  enum nfp_flower_tun_type tun_type);
+				  enum nfp_flower_tun_type tun_type,
+				  struct netlink_ext_ack *extack);
 int nfp_flower_compile_action(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct net_device *netdev,
-			      struct nfp_fl_payload *nfp_flow);
-int nfp_compile_flow_metadata(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
 			      struct nfp_fl_payload *nfp_flow,
-			      struct net_device *netdev);
+			      struct netlink_ext_ack *extack);
+int nfp_compile_flow_metadata(struct nfp_app *app,
+			      struct flow_cls_offload *flow,
+			      struct nfp_fl_payload *nfp_flow,
+			      struct net_device *netdev,
+			      struct netlink_ext_ack *extack);
+void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv,
+				struct nfp_fl_payload *nfp_flow);
 int nfp_modify_flow_metadata(struct nfp_app *app,
 			     struct nfp_fl_payload *nfp_flow);
 
 struct nfp_fl_payload *
 nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
-			   struct net_device *netdev, __be32 host_ctx);
+			   struct net_device *netdev);
+struct nfp_fl_payload *
+nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id);
 struct nfp_fl_payload *
 nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
 
@@ -277,21 +391,47 @@
 
 int nfp_tunnel_config_start(struct nfp_app *app);
 void nfp_tunnel_config_stop(struct nfp_app *app);
-void nfp_tunnel_write_macs(struct nfp_app *app);
+int nfp_tunnel_mac_event_handler(struct nfp_app *app,
+				 struct net_device *netdev,
+				 unsigned long event, void *ptr);
 void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
 void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
-int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
-				  void *cb_priv);
 void nfp_flower_lag_init(struct nfp_fl_lag *lag);
 void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag);
 int nfp_flower_lag_reset(struct nfp_fl_lag *lag);
+int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv,
+				struct net_device *netdev,
+				unsigned long event, void *ptr);
 bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb);
 int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
 				       struct net_device *master,
-				       struct nfp_fl_pre_lag *pre_act);
+				       struct nfp_fl_pre_lag *pre_act,
+				       struct netlink_ext_ack *extack);
 int nfp_flower_lag_get_output_id(struct nfp_app *app,
 				 struct net_device *master);
+void nfp_flower_qos_init(struct nfp_app *app);
+void nfp_flower_qos_cleanup(struct nfp_app *app);
+int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev,
+				 struct tc_cls_matchall_offload *flow);
+void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb);
+int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
+				       struct net_device *netdev,
+				       unsigned long event);
 
+void
+__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv);
+struct nfp_flower_non_repr_priv *
+nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev);
+void
+__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv);
+void
+nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev);
+u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app,
+				       struct net_device *netdev);
+int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
+				 struct nfp_fl_payload *flow);
+int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app,
+				     struct nfp_fl_payload *flow);
 #endif

diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 17acb8c..9cc3ba1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
 #include <net/pkt_cls.h>
@@ -38,32 +8,41 @@
 #include "main.h"
 
 static void
-nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame,
-			    struct tc_cls_flower_offload *flow, u8 key_type,
-			    bool mask_version)
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
+			    struct nfp_flower_meta_tci *msk,
+			    struct flow_cls_offload *flow, u8 key_type)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_vlan *flow_vlan;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	u16 tmp_tci;
 
-	memset(frame, 0, sizeof(struct nfp_flower_meta_tci));
-	/* Populate the metadata frame. */
-	frame->nfp_flow_key_layer = key_type;
-	frame->mask_id = ~0;
+	memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
+	memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		flow_vlan = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_VLAN,
-						      target);
+	/* Populate the metadata frame. */
+	ext->nfp_flow_key_layer = key_type;
+	ext->mask_id = ~0;
+
+	msk->nfp_flow_key_layer = key_type;
+	msk->mask_id = ~0;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
 		/* Populate the tci field. */
-		if (flow_vlan->vlan_id || flow_vlan->vlan_priority) {
-			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
-					     flow_vlan->vlan_priority) |
-				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
-					     flow_vlan->vlan_id) |
-				  NFP_FLOWER_MASK_VLAN_CFI;
-			frame->tci = cpu_to_be16(tmp_tci);
-		}
+		tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+		tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+				      match.key->vlan_priority) |
+			   FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+				      match.key->vlan_id);
+		ext->tci = cpu_to_be16(tmp_tci);
+
+		tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+		tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+				      match.mask->vlan_priority) |
+			   FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+				      match.mask->vlan_id);
+		msk->tci = cpu_to_be16(tmp_tci);
 	}
 }
 
@@ -75,273 +54,346 @@
 
 static int
 nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
-			bool mask_version, enum nfp_flower_tun_type tun_type)
+			bool mask_version, enum nfp_flower_tun_type tun_type,
+			struct netlink_ext_ack *extack)
 {
 	if (mask_version) {
 		frame->in_port = cpu_to_be32(~0);
 		return 0;
 	}
 
-	if (tun_type)
+	if (tun_type) {
 		frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
-	else
+	} else {
+		if (!cmsg_port) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid ingress interface for match offload");
+			return -EOPNOTSUPP;
+		}
 		frame->in_port = cpu_to_be32(cmsg_port);
+	}
 
 	return 0;
 }
 
 static void
-nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
-		       struct tc_cls_flower_offload *flow,
-		       bool mask_version)
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
+		       struct nfp_flower_mac_mpls *msk,
+		       struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_eth_addrs *addr;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_mac_mpls));
+	memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
+	memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						 target);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
 		/* Populate mac frame. */
-		ether_addr_copy(frame->mac_dst, &addr->dst[0]);
-		ether_addr_copy(frame->mac_src, &addr->src[0]);
+		ether_addr_copy(ext->mac_dst, &match.key->dst[0]);
+		ether_addr_copy(ext->mac_src, &match.key->src[0]);
+		ether_addr_copy(msk->mac_dst, &match.mask->dst[0]);
+		ether_addr_copy(msk->mac_src, &match.mask->src[0]);
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
-		struct flow_dissector_key_mpls *mpls;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
+		struct flow_match_mpls match;
 		u32 t_mpls;
 
-		mpls = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_MPLS,
-						 target);
-
-		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) |
+		flow_rule_match_mpls(rule, &match);
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.key->mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.key->mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.key->mpls_bos) |
 			 NFP_FLOWER_MASK_MPLS_Q;
-
-		frame->mpls_lse = cpu_to_be32(t_mpls);
-	} else if (dissector_uses_key(flow->dissector,
-				      FLOW_DISSECTOR_KEY_BASIC)) {
+		ext->mpls_lse = cpu_to_be32(t_mpls);
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.mask->mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.mask->mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.mask->mpls_bos) |
+			 NFP_FLOWER_MASK_MPLS_Q;
+		msk->mpls_lse = cpu_to_be32(t_mpls);
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 		/* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
 		 * bit, which indicates an mpls ether type but without any
 		 * mpls fields.
 		 */
-		struct flow_dissector_key_basic *key_basic;
+		struct flow_match_basic match;
 
-		key_basic = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_BASIC,
-						      flow->key);
-		if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
-		    key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC))
-			frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+		flow_rule_match_basic(rule, &match);
+		if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
+		    match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) {
+			ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+			msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+		}
 	}
 }
 
 static void
-nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
-			 struct tc_cls_flower_offload *flow,
-			 bool mask_version)
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
+			 struct nfp_flower_tp_ports *msk,
+			 struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ports *tp;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_tp_ports));
+	memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
+	memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		tp = skb_flow_dissector_target(flow->dissector,
-					       FLOW_DISSECTOR_KEY_PORTS,
-					       target);
-		frame->port_src = tp->src;
-		frame->port_dst = tp->dst;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		ext->port_src = match.key->src;
+		ext->port_dst = match.key->dst;
+		msk->port_src = match.mask->src;
+		msk->port_dst = match.mask->dst;
 	}
 }
 
 static void
-nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame,
-			  struct tc_cls_flower_offload *flow,
-			  bool mask_version)
+nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
+			  struct nfp_flower_ip_ext *msk,
+			  struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *basic;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  target);
-		frame->proto = basic->ip_proto;
+		flow_rule_match_basic(rule, &match);
+		ext->proto = match.key->ip_proto;
+		msk->proto = match.mask->ip_proto;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *flow_ip;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
 
-		flow_ip = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_IP,
-						    target);
-		frame->tos = flow_ip->tos;
-		frame->ttl = flow_ip->ttl;
+		flow_rule_match_ip(rule, &match);
+		ext->tos = match.key->tos;
+		ext->ttl = match.key->ttl;
+		msk->tos = match.mask->tos;
+		msk->ttl = match.mask->ttl;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *tcp;
-		u32 tcp_flags;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		u16 tcp_flags, tcp_flags_mask;
+		struct flow_match_tcp match;
 
-		tcp = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_TCP, target);
-		tcp_flags = be16_to_cpu(tcp->flags);
+		flow_rule_match_tcp(rule, &match);
+		tcp_flags = be16_to_cpu(match.key->flags);
+		tcp_flags_mask = be16_to_cpu(match.mask->flags);
 
 		if (tcp_flags & TCPHDR_FIN)
-			frame->flags |= NFP_FL_TCP_FLAG_FIN;
+			ext->flags |= NFP_FL_TCP_FLAG_FIN;
+		if (tcp_flags_mask & TCPHDR_FIN)
+			msk->flags |= NFP_FL_TCP_FLAG_FIN;
+
 		if (tcp_flags & TCPHDR_SYN)
-			frame->flags |= NFP_FL_TCP_FLAG_SYN;
+			ext->flags |= NFP_FL_TCP_FLAG_SYN;
+		if (tcp_flags_mask & TCPHDR_SYN)
+			msk->flags |= NFP_FL_TCP_FLAG_SYN;
+
 		if (tcp_flags & TCPHDR_RST)
-			frame->flags |= NFP_FL_TCP_FLAG_RST;
+			ext->flags |= NFP_FL_TCP_FLAG_RST;
+		if (tcp_flags_mask & TCPHDR_RST)
+			msk->flags |= NFP_FL_TCP_FLAG_RST;
+
 		if (tcp_flags & TCPHDR_PSH)
-			frame->flags |= NFP_FL_TCP_FLAG_PSH;
+			ext->flags |= NFP_FL_TCP_FLAG_PSH;
+		if (tcp_flags_mask & TCPHDR_PSH)
+			msk->flags |= NFP_FL_TCP_FLAG_PSH;
+
 		if (tcp_flags & TCPHDR_URG)
-			frame->flags |= NFP_FL_TCP_FLAG_URG;
+			ext->flags |= NFP_FL_TCP_FLAG_URG;
+		if (tcp_flags_mask & TCPHDR_URG)
+			msk->flags |= NFP_FL_TCP_FLAG_URG;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		key = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_CONTROL,
-						target);
-		if (key->flags & FLOW_DIS_IS_FRAGMENT)
-			frame->flags |= NFP_FL_IP_FRAGMENTED;
-		if (key->flags & FLOW_DIS_FIRST_FRAG)
-			frame->flags |= NFP_FL_IP_FRAG_FIRST;
+		flow_rule_match_control(rule, &match);
+		if (match.key->flags & FLOW_DIS_IS_FRAGMENT)
+			ext->flags |= NFP_FL_IP_FRAGMENTED;
+		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT)
+			msk->flags |= NFP_FL_IP_FRAGMENTED;
+		if (match.key->flags & FLOW_DIS_FIRST_FRAG)
+			ext->flags |= NFP_FL_IP_FRAG_FIRST;
+		if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
+			msk->flags |= NFP_FL_IP_FRAG_FIRST;
 	}
 }
 
 static void
-nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
+			struct nfp_flower_ipv4 *msk,
+			struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *addr;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct flow_match_ipv4_addrs match;
 
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4));
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 target);
-		frame->ipv4_src = addr->src;
-		frame->ipv4_dst = addr->dst;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		flow_rule_match_ipv4_addrs(rule, &match);
+		ext->ipv4_src = match.key->src;
+		ext->ipv4_dst = match.key->dst;
+		msk->ipv4_src = match.mask->src;
+		msk->ipv4_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
 }
 
 static void
-nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
+			struct nfp_flower_ipv6 *msk,
+			struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv6_addrs *addr;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_ipv6));
+	memset(ext, 0, sizeof(struct nfp_flower_ipv6));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv6));
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						 target);
-		frame->ipv6_src = addr->src;
-		frame->ipv6_dst = addr->dst;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+		ext->ipv6_src = match.key->src;
+		ext->ipv6_dst = match.key->dst;
+		msk->ipv6_src = match.mask->src;
+		msk->ipv6_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
 }
 
 static int
-nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow,
-			      bool mask_version)
+nfp_flower_compile_geneve_opt(void *ext, void *msk,
+			      struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_enc_opts *opts;
+	struct flow_match_enc_opts match;
 
-	opts = skb_flow_dissector_target(flow->dissector,
-					 FLOW_DISSECTOR_KEY_ENC_OPTS,
-					 target);
-	memcpy(key_buf, opts->data, opts->len);
+	flow_rule_match_enc_opts(flow->rule, &match);
+	memcpy(ext, match.key->data, match.key->len);
+	memcpy(msk, match.mask->data, match.mask->len);
 
 	return 0;
 }
 
 static void
-nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
-				struct tc_cls_flower_offload *flow,
-				bool mask_version)
+nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
+				  struct nfp_flower_tun_ipv4 *msk,
+				  struct flow_cls_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *tun_ips;
-	struct flow_dissector_key_keyid *vni;
-	struct flow_dissector_key_ip *ip;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		u32 temp_vni;
-
-		vni = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_ENC_KEYID,
-						target);
-		temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
-		frame->tun_id = cpu_to_be32(temp_vni);
-	}
-
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-		tun_ips =
-		   skb_flow_dissector_target(flow->dissector,
-					     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-					     target);
-		frame->ip_src = tun_ips->src;
-		frame->ip_dst = tun_ips->dst;
-	}
-
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
-		ip = skb_flow_dissector_target(flow->dissector,
-					       FLOW_DISSECTOR_KEY_ENC_IP,
-					       target);
-		frame->tos = ip->tos;
-		frame->ttl = ip->ttl;
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
+		ext->src = match.key->src;
+		ext->dst = match.key->dst;
+		msk->src = match.mask->src;
+		msk->dst = match.mask->dst;
 	}
 }
 
-int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+static void
+nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
+			      struct nfp_flower_tun_ip_ext *msk,
+			      struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_enc_ip(rule, &match);
+		ext->tos = match.key->tos;
+		ext->ttl = match.key->ttl;
+		msk->tos = match.mask->tos;
+		msk->ttl = match.mask->ttl;
+	}
+}
+
+static void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+				struct nfp_flower_ipv4_gre_tun *msk,
+				struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+
+	/* NVGRE is the only supported GRE tunnel type */
+	ext->ethertype = cpu_to_be16(ETH_P_TEB);
+	msk->ethertype = cpu_to_be16(~0);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		ext->tun_key = match.key->keyid;
+		msk->tun_key = match.mask->keyid;
+
+		ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+		msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+	}
+
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+}
+
+static void
+nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
+				struct nfp_flower_ipv4_udp_tun *msk,
+				struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+		u32 temp_vni;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		ext->tun_id = cpu_to_be32(temp_vni);
+		temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		msk->tun_id = cpu_to_be32(temp_vni);
+	}
+
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+}
+
+int nfp_flower_compile_flow_match(struct nfp_app *app,
+				  struct flow_cls_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow,
-				  enum nfp_flower_tun_type tun_type)
+				  enum nfp_flower_tun_type tun_type,
+				  struct netlink_ext_ack *extack)
 {
-	struct nfp_repr *netdev_repr;
+	u32 port_id;
 	int err;
 	u8 *ext;
 	u8 *msk;
 
+	port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
+
 	memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
 	memset(nfp_flow->mask_data, 0, key_ls->key_size);
 
 	ext = nfp_flow->unmasked_data;
 	msk = nfp_flow->mask_data;
 
-	/* Populate Exact Metadata. */
 	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
-				    flow, key_ls->key_layer, false);
-	/* Populate Mask Metadata. */
-	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)msk,
-				    flow, key_ls->key_layer, true);
+				    (struct nfp_flower_meta_tci *)msk,
+				    flow, key_ls->key_layer);
 	ext += sizeof(struct nfp_flower_meta_tci);
 	msk += sizeof(struct nfp_flower_meta_tci);
 
@@ -357,15 +409,13 @@
 
 	/* Populate Exact Port data. */
 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
-				      nfp_repr_get_port_id(netdev),
-				      false, tun_type);
+				      port_id, false, tun_type, extack);
 	if (err)
 		return err;
 
 	/* Populate Mask Port Data. */
 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
-				      nfp_repr_get_port_id(netdev),
-				      true, tun_type);
+				      port_id, true, tun_type, extack);
 	if (err)
 		return err;
 
@@ -373,79 +423,69 @@
 	msk += sizeof(struct nfp_flower_in_port);
 
 	if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
-		/* Populate Exact MAC Data. */
 		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
-				       flow, false);
-		/* Populate Mask MAC Data. */
-		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk,
-				       flow, true);
+				       (struct nfp_flower_mac_mpls *)msk,
+				       flow);
 		ext += sizeof(struct nfp_flower_mac_mpls);
 		msk += sizeof(struct nfp_flower_mac_mpls);
 	}
 
 	if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
-		/* Populate Exact TP Data. */
 		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
-					 flow, false);
-		/* Populate Mask TP Data. */
-		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk,
-					 flow, true);
+					 (struct nfp_flower_tp_ports *)msk,
+					 flow);
 		ext += sizeof(struct nfp_flower_tp_ports);
 		msk += sizeof(struct nfp_flower_tp_ports);
 	}
 
 	if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
-		/* Populate Exact IPv4 Data. */
 		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
-					flow, false);
-		/* Populate Mask IPv4 Data. */
-		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk,
-					flow, true);
+					(struct nfp_flower_ipv4 *)msk,
+					flow);
 		ext += sizeof(struct nfp_flower_ipv4);
 		msk += sizeof(struct nfp_flower_ipv4);
 	}
 
 	if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
-		/* Populate Exact IPv4 Data. */
 		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
-					flow, false);
-		/* Populate Mask IPv4 Data. */
-		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk,
-					flow, true);
+					(struct nfp_flower_ipv6 *)msk,
+					flow);
 		ext += sizeof(struct nfp_flower_ipv6);
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
 
+	if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+		__be32 tun_dst;
+
+		nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow);
+		tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
+		ext += sizeof(struct nfp_flower_ipv4_gre_tun);
+		msk += sizeof(struct nfp_flower_ipv4_gre_tun);
+
+		/* Store the tunnel destination in the rule data.
+		 * This must be present and be an exact match.
+		 */
+		nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+		nfp_tunnel_add_ipv4_off(app, tun_dst);
+	}
+
 	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
 	    key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
 		__be32 tun_dst;
 
-		/* Populate Exact VXLAN Data. */
-		nfp_flower_compile_ipv4_udp_tun((void *)ext, flow, false);
-		/* Populate Mask VXLAN Data. */
-		nfp_flower_compile_ipv4_udp_tun((void *)msk, flow, true);
-		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst;
+		nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
+		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
 		ext += sizeof(struct nfp_flower_ipv4_udp_tun);
 		msk += sizeof(struct nfp_flower_ipv4_udp_tun);
 
-		/* Configure tunnel end point MAC. */
-		if (nfp_netdev_is_nfp_repr(netdev)) {
-			netdev_repr = netdev_priv(netdev);
-			nfp_tunnel_write_macs(netdev_repr->app);
-
-			/* Store the tunnel destination in the rule data.
-			 * This must be present and be an exact match.
-			 */
-			nfp_flow->nfp_tun_ipv4_addr = tun_dst;
-			nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
-		}
+		/* Store the tunnel destination in the rule data.
+		 * This must be present and be an exact match.
+		 */
+		nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+		nfp_tunnel_add_ipv4_off(app, tun_dst);
 
 		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
-			err = nfp_flower_compile_geneve_opt(ext, flow, false);
-			if (err)
-				return err;
-
-			err = nfp_flower_compile_geneve_opt(msk, flow, true);
+			err = nfp_flower_compile_geneve_opt(ext, msk, flow);
 			if (err)
 				return err;
 		}

diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index c098730..7c4a15e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c

@@ -1,39 +1,10 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/hash.h>
 #include <linux/hashtable.h>
 #include <linux/jhash.h>
+#include <linux/math64.h>
 #include <linux/vmalloc.h>
 #include <net/pkt_cls.h>
 
@@ -48,6 +19,23 @@
 	u8 mask_id;
 };
 
+struct nfp_fl_flow_table_cmp_arg {
+	struct net_device *netdev;
+	unsigned long cookie;
+};
+
+struct nfp_fl_stats_ctx_to_flow {
+	struct rhash_head ht_node;
+	u32 stats_cxt;
+	struct nfp_fl_payload *flow;
+};
+
+static const struct rhashtable_params stats_ctx_table_params = {
+	.key_offset	= offsetof(struct nfp_fl_stats_ctx_to_flow, stats_cxt),
+	.head_offset	= offsetof(struct nfp_fl_stats_ctx_to_flow, ht_node),
+	.key_len	= sizeof(u32),
+};
+
 static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -55,14 +43,14 @@
 
 	ring = &priv->stats_ids.free_list;
 	/* Check if buffer is full. */
-	if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS *
-			NFP_FL_STATS_ELEM_RS -
+	if (!CIRC_SPACE(ring->head, ring->tail,
+			priv->stats_ring_size * NFP_FL_STATS_ELEM_RS -
 			NFP_FL_STATS_ELEM_RS + 1))
 		return -ENOBUFS;
 
 	memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS);
 	ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) %
-		     (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+		     (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS);
 
 	return 0;
 }
@@ -74,11 +62,20 @@
 	struct circ_buf *ring;
 
 	ring = &priv->stats_ids.free_list;
-	freed_stats_id = NFP_FL_STATS_ENTRY_RS;
+	freed_stats_id = priv->stats_ring_size;
 	/* Check for unallocated entries first. */
 	if (priv->stats_ids.init_unalloc > 0) {
-		*stats_context_id = priv->stats_ids.init_unalloc - 1;
-		priv->stats_ids.init_unalloc--;
+		if (priv->active_mem_unit == priv->total_mem_units) {
+			priv->stats_ids.init_unalloc--;
+			priv->active_mem_unit = 0;
+		}
+
+		*stats_context_id =
+			FIELD_PREP(NFP_FL_STAT_ID_STAT,
+				   priv->stats_ids.init_unalloc - 1) |
+			FIELD_PREP(NFP_FL_STAT_ID_MU_NUM,
+				   priv->active_mem_unit);
+		priv->active_mem_unit++;
 		return 0;
 	}
 
@@ -92,7 +89,7 @@
 	*stats_context_id = temp_stats_id;
 	memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS);
 	ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) %
-		     (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+		     (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS);
 
 	return 0;
 }
@@ -100,58 +97,38 @@
 /* Must be called with either RTNL or rcu_read_lock */
 struct nfp_fl_payload *
 nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
-			   struct net_device *netdev, __be32 host_ctx)
+			   struct net_device *netdev)
 {
+	struct nfp_fl_flow_table_cmp_arg flower_cmp_arg;
 	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_fl_payload *flower_entry;
 
-	hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
-				   tc_flower_cookie)
-		if (flower_entry->tc_flower_cookie == tc_flower_cookie &&
-		    (!netdev || flower_entry->ingress_dev == netdev) &&
-		    (host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
-		     flower_entry->meta.host_ctx_id == host_ctx))
-			return flower_entry;
+	flower_cmp_arg.netdev = netdev;
+	flower_cmp_arg.cookie = tc_flower_cookie;
 
-	return NULL;
-}
-
-static void
-nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
-{
-	struct nfp_fl_payload *nfp_flow;
-	unsigned long flower_cookie;
-
-	flower_cookie = be64_to_cpu(stats->stats_cookie);
-
-	rcu_read_lock();
-	nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL,
-					      stats->stats_con_id);
-	if (!nfp_flow)
-		goto exit_rcu_unlock;
-
-	spin_lock(&nfp_flow->lock);
-	nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
-	nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
-	nfp_flow->stats.used = jiffies;
-	spin_unlock(&nfp_flow->lock);
-
-exit_rcu_unlock:
-	rcu_read_unlock();
+	return rhashtable_lookup_fast(&priv->flow_table, &flower_cmp_arg,
+				      nfp_flower_table_params);
 }
 
 void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
 {
 	unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
-	struct nfp_fl_stats_frame *stats_frame;
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_stats_frame *stats;
 	unsigned char *msg;
+	u32 ctx_id;
 	int i;
 
 	msg = nfp_flower_cmsg_get_data(skb);
 
-	stats_frame = (struct nfp_fl_stats_frame *)msg;
-	for (i = 0; i < msg_len / sizeof(*stats_frame); i++)
-		nfp_flower_update_stats(app, stats_frame + i);
+	spin_lock(&priv->stats_lock);
+	for (i = 0; i < msg_len / sizeof(*stats); i++) {
+		stats = (struct nfp_fl_stats_frame *)msg + i;
+		ctx_id = be32_to_cpu(stats->stats_con_id);
+		priv->stats[ctx_id].pkts += be32_to_cpu(stats->pkt_count);
+		priv->stats[ctx_id].bytes += be64_to_cpu(stats->byte_count);
+		priv->stats[ctx_id].used = jiffies;
+	}
+	spin_unlock(&priv->stats_lock);
 }
 
 static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
@@ -299,9 +276,6 @@
 	if (!mask_entry)
 		return false;
 
-	if (meta_flags)
-		*meta_flags &= ~NFP_FL_META_FLAG_MANAGE_MASK;
-
 	*mask_id = mask_entry->mask_id;
 	mask_entry->ref_cnt--;
 	if (!mask_entry->ref_cnt) {
@@ -316,28 +290,55 @@
 }
 
 int nfp_compile_flow_metadata(struct nfp_app *app,
-			      struct tc_cls_flower_offload *flow,
+			      struct flow_cls_offload *flow,
 			      struct nfp_fl_payload *nfp_flow,
-			      struct net_device *netdev)
+			      struct net_device *netdev,
+			      struct netlink_ext_ack *extack)
 {
+	struct nfp_fl_stats_ctx_to_flow *ctx_entry;
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_payload *check_entry;
 	u8 new_mask_id;
 	u32 stats_cxt;
+	int err;
 
-	if (nfp_get_stats_entry(app, &stats_cxt))
-		return -ENOENT;
+	err = nfp_get_stats_entry(app, &stats_cxt);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate new stats context");
+		return err;
+	}
 
 	nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
 	nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+	nfp_flow->ingress_dev = netdev;
+
+	ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL);
+	if (!ctx_entry) {
+		err = -ENOMEM;
+		goto err_release_stats;
+	}
+
+	ctx_entry->stats_cxt = stats_cxt;
+	ctx_entry->flow = nfp_flow;
+
+	if (rhashtable_insert_fast(&priv->stats_ctx_table, &ctx_entry->ht_node,
+				   stats_ctx_table_params)) {
+		err = -ENOMEM;
+		goto err_free_ctx_entry;
+	}
 
 	new_mask_id = 0;
 	if (!nfp_check_mask_add(app, nfp_flow->mask_data,
 				nfp_flow->meta.mask_len,
 				&nfp_flow->meta.flags, &new_mask_id)) {
-		if (nfp_release_stats_entry(app, stats_cxt))
-			return -EINVAL;
-		return -ENOENT;
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id");
+		if (nfp_release_stats_entry(app, stats_cxt)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context");
+			err = -EINVAL;
+			goto err_remove_rhash;
+		}
+		err = -ENOENT;
+		goto err_remove_rhash;
 	}
 
 	nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
@@ -345,56 +346,156 @@
 
 	/* Update flow payload with mask ids. */
 	nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
-	nfp_flow->stats.pkts = 0;
-	nfp_flow->stats.bytes = 0;
-	nfp_flow->stats.used = jiffies;
+	priv->stats[stats_cxt].pkts = 0;
+	priv->stats[stats_cxt].bytes = 0;
+	priv->stats[stats_cxt].used = jiffies;
 
-	check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev,
-						 NFP_FL_STATS_CTX_DONT_CARE);
+	check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev);
 	if (check_entry) {
-		if (nfp_release_stats_entry(app, stats_cxt))
-			return -EINVAL;
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry");
+		if (nfp_release_stats_entry(app, stats_cxt)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context");
+			err = -EINVAL;
+			goto err_remove_mask;
+		}
 
 		if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
 					   nfp_flow->meta.mask_len,
-					   NULL, &new_mask_id))
-			return -EINVAL;
+					   NULL, &new_mask_id)) {
+			NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id");
+			err = -EINVAL;
+			goto err_remove_mask;
+		}
 
-		return -EEXIST;
+		err = -EEXIST;
+		goto err_remove_mask;
 	}
 
 	return 0;
+
+err_remove_mask:
+	nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len,
+			      NULL, &new_mask_id);
+err_remove_rhash:
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table,
+					    &ctx_entry->ht_node,
+					    stats_ctx_table_params));
+err_free_ctx_entry:
+	kfree(ctx_entry);
+err_release_stats:
+	nfp_release_stats_entry(app, stats_cxt);
+
+	return err;
+}
+
+void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv,
+				struct nfp_fl_payload *nfp_flow)
+{
+	nfp_flow->meta.flags &= ~NFP_FL_META_FLAG_MANAGE_MASK;
+	nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+	priv->flower_version++;
 }
 
 int nfp_modify_flow_metadata(struct nfp_app *app,
 			     struct nfp_fl_payload *nfp_flow)
 {
+	struct nfp_fl_stats_ctx_to_flow *ctx_entry;
 	struct nfp_flower_priv *priv = app->priv;
 	u8 new_mask_id = 0;
 	u32 temp_ctx_id;
 
+	__nfp_modify_flow_metadata(priv, nfp_flow);
+
 	nfp_check_mask_remove(app, nfp_flow->mask_data,
 			      nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
 			      &new_mask_id);
 
-	nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
-	priv->flower_version++;
-
 	/* Update flow payload with mask ids. */
 	nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
 
-	/* Release the stats ctx id. */
+	/* Release the stats ctx id and ctx to flow table entry. */
 	temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
 
+	ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &temp_ctx_id,
+					   stats_ctx_table_params);
+	if (!ctx_entry)
+		return -ENOENT;
+
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table,
+					    &ctx_entry->ht_node,
+					    stats_ctx_table_params));
+	kfree(ctx_entry);
+
 	return nfp_release_stats_entry(app, temp_ctx_id);
 }
 
-int nfp_flower_metadata_init(struct nfp_app *app)
+struct nfp_fl_payload *
+nfp_flower_get_fl_payload_from_ctx(struct nfp_app *app, u32 ctx_id)
 {
+	struct nfp_fl_stats_ctx_to_flow *ctx_entry;
 	struct nfp_flower_priv *priv = app->priv;
 
+	ctx_entry = rhashtable_lookup_fast(&priv->stats_ctx_table, &ctx_id,
+					   stats_ctx_table_params);
+	if (!ctx_entry)
+		return NULL;
+
+	return ctx_entry->flow;
+}
+
+static int nfp_fl_obj_cmpfn(struct rhashtable_compare_arg *arg,
+			    const void *obj)
+{
+	const struct nfp_fl_flow_table_cmp_arg *cmp_arg = arg->key;
+	const struct nfp_fl_payload *flow_entry = obj;
+
+	if (flow_entry->ingress_dev == cmp_arg->netdev)
+		return flow_entry->tc_flower_cookie != cmp_arg->cookie;
+
+	return 1;
+}
+
+static u32 nfp_fl_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct nfp_fl_payload *flower_entry = data;
+
+	return jhash2((u32 *)&flower_entry->tc_flower_cookie,
+		      sizeof(flower_entry->tc_flower_cookie) / sizeof(u32),
+		      seed);
+}
+
+static u32 nfp_fl_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct nfp_fl_flow_table_cmp_arg *cmp_arg = data;
+
+	return jhash2((u32 *)&cmp_arg->cookie,
+		      sizeof(cmp_arg->cookie) / sizeof(u32), seed);
+}
+
+const struct rhashtable_params nfp_flower_table_params = {
+	.head_offset		= offsetof(struct nfp_fl_payload, fl_node),
+	.hashfn			= nfp_fl_key_hashfn,
+	.obj_cmpfn		= nfp_fl_obj_cmpfn,
+	.obj_hashfn		= nfp_fl_obj_hashfn,
+	.automatic_shrinking	= true,
+};
+
+int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
+			     unsigned int host_num_mems)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	int err, stats_size;
+
 	hash_init(priv->mask_table);
-	hash_init(priv->flow_table);
+
+	err = rhashtable_init(&priv->flow_table, &nfp_flower_table_params);
+	if (err)
+		return err;
+
+	err = rhashtable_init(&priv->stats_ctx_table, &stats_ctx_table_params);
+	if (err)
+		goto err_free_flow_table;
+
 	get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
 
 	/* Init ring buffer and unallocated mask_ids. */
@@ -402,7 +503,7 @@
 		kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
 			      NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
 	if (!priv->mask_ids.mask_id_free_list.buf)
-		return -ENOMEM;
+		goto err_free_stats_ctx_table;
 
 	priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
 
@@ -416,18 +517,33 @@
 	/* Init ring buffer and unallocated stats_ids. */
 	priv->stats_ids.free_list.buf =
 		vmalloc(array_size(NFP_FL_STATS_ELEM_RS,
-				   NFP_FL_STATS_ENTRY_RS));
+				   priv->stats_ring_size));
 	if (!priv->stats_ids.free_list.buf)
 		goto err_free_last_used;
 
-	priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+	priv->stats_ids.init_unalloc = div_u64(host_ctx_count, host_num_mems);
+
+	stats_size = FIELD_PREP(NFP_FL_STAT_ID_STAT, host_ctx_count) |
+		     FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, host_num_mems - 1);
+	priv->stats = kvmalloc_array(stats_size, sizeof(struct nfp_fl_stats),
+				     GFP_KERNEL);
+	if (!priv->stats)
+		goto err_free_ring_buf;
+
+	spin_lock_init(&priv->stats_lock);
 
 	return 0;
 
+err_free_ring_buf:
+	vfree(priv->stats_ids.free_list.buf);
 err_free_last_used:
 	kfree(priv->mask_ids.last_used);
 err_free_mask_id:
 	kfree(priv->mask_ids.mask_id_free_list.buf);
+err_free_stats_ctx_table:
+	rhashtable_destroy(&priv->stats_ctx_table);
+err_free_flow_table:
+	rhashtable_destroy(&priv->flow_table);
 	return -ENOMEM;
 }
 
@@ -438,6 +554,11 @@
 	if (!priv)
 		return;
 
+	rhashtable_free_and_destroy(&priv->flow_table,
+				    nfp_check_rhashtable_empty, NULL);
+	rhashtable_free_and_destroy(&priv->stats_ctx_table,
+				    nfp_check_rhashtable_empty, NULL);
+	kvfree(priv->stats);
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	kfree(priv->mask_ids.last_used);
 	vfree(priv->stats_ids.free_list.buf);

diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index bd19624..987ae22 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/skbuff.h>
 #include <net/devlink.h>
@@ -82,15 +52,40 @@
 
 #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
 	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
-	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
-	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS))
+
+#define NFP_FLOWER_MERGE_FIELDS \
+	(NFP_FLOWER_LAYER_PORT | \
+	 NFP_FLOWER_LAYER_MAC | \
+	 NFP_FLOWER_LAYER_TP | \
+	 NFP_FLOWER_LAYER_IPV4 | \
+	 NFP_FLOWER_LAYER_IPV6)
+
+#define NFP_FLOWER_PRE_TUN_RULE_FIELDS \
+	(NFP_FLOWER_LAYER_PORT | \
+	 NFP_FLOWER_LAYER_MAC | \
+	 NFP_FLOWER_LAYER_IPV4)
+
+struct nfp_flower_merge_check {
+	union {
+		struct {
+			__be16 tci;
+			struct nfp_flower_mac_mpls l2;
+			struct nfp_flower_tp_ports l4;
+			union {
+				struct nfp_flower_ipv4 ipv4;
+				struct nfp_flower_ipv6 ipv6;
+			};
+		};
+		unsigned long vals[8];
+	};
+};
 
 static int
-nfp_flower_xmit_flow(struct net_device *netdev,
-		     struct nfp_fl_payload *nfp_flow, u8 mtype)
+nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
+		     u8 mtype)
 {
 	u32 meta_len, key_len, mask_len, act_len, tot_len;
-	struct nfp_repr *priv = netdev_priv(netdev);
 	struct sk_buff *skb;
 	unsigned char *msg;
 
@@ -108,7 +103,7 @@
 	nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ;
 	nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ;
 
-	skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype, GFP_KERNEL);
+	skb = nfp_flower_cmsg_alloc(app, tot_len, mtype, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
@@ -126,28 +121,38 @@
 	nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ;
 	nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ;
 
-	nfp_ctrl_tx(priv->app->ctrl, skb);
+	nfp_ctrl_tx(app->ctrl, skb);
 
 	return 0;
 }
 
-static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
+static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f)
 {
-	return dissector_uses_key(f->dissector,
-				  FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
-		dissector_uses_key(f->dissector,
-				   FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
-		dissector_uses_key(f->dissector,
-				   FLOW_DISSECTOR_KEY_PORTS) ||
-		dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+	return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
+}
+
+static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+	return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
 }
 
 static int
 nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
-			  u32 *key_layer_two, int *key_size)
+			  u32 *key_layer_two, int *key_size,
+			  struct netlink_ext_ack *extack)
 {
-	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length");
 		return -EOPNOTSUPP;
+	}
 
 	if (enc_opts->len > 0) {
 		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
@@ -158,147 +163,186 @@
 }
 
 static int
-nfp_flower_calculate_key_layers(struct nfp_app *app,
-				struct nfp_fl_key_ls *ret_key_ls,
-				struct tc_cls_flower_offload *flow,
-				bool egress,
-				enum nfp_flower_tun_type *tun_type)
+nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
+			      struct flow_dissector_key_enc_opts *enc_op,
+			      u32 *key_layer_two, u8 *key_layer, int *key_size,
+			      struct nfp_flower_priv *priv,
+			      enum nfp_flower_tun_type *tun_type,
+			      struct netlink_ext_ack *extack)
 {
-	struct flow_dissector_key_basic *mask_basic = NULL;
-	struct flow_dissector_key_basic *key_basic = NULL;
+	int err;
+
+	switch (enc_ports->dst) {
+	case htons(IANA_VXLAN_UDP_PORT):
+		*tun_type = NFP_FL_TUNNEL_VXLAN;
+		*key_layer |= NFP_FLOWER_LAYER_VXLAN;
+		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (enc_op) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
+			return -EOPNOTSUPP;
+		}
+		break;
+	case htons(GENEVE_UDP_PORT):
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload");
+			return -EOPNOTSUPP;
+		}
+		*tun_type = NFP_FL_TUNNEL_GENEVE;
+		*key_layer |= NFP_FLOWER_LAYER_EXT_META;
+		*key_size += sizeof(struct nfp_flower_ext_meta);
+		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
+		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (!enc_op)
+			break;
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
+			return -EOPNOTSUPP;
+		}
+		err = nfp_flower_calc_opt_layer(enc_op, key_layer_two,
+						key_size, extack);
+		if (err)
+			return err;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+nfp_flower_calculate_key_layers(struct nfp_app *app,
+				struct net_device *netdev,
+				struct nfp_fl_key_ls *ret_key_ls,
+				struct flow_cls_offload *flow,
+				enum nfp_flower_tun_type *tun_type,
+				struct netlink_ext_ack *extack)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_match_basic basic = { NULL, NULL};
 	struct nfp_flower_priv *priv = app->priv;
 	u32 key_layer_two;
 	u8 key_layer;
 	int key_size;
 	int err;
 
-	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
+	if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match not supported");
 		return -EOPNOTSUPP;
+	}
 
 	/* If any tun dissector is used then the required set must be used. */
-	if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
-	    (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
-	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+	    (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported");
 		return -EOPNOTSUPP;
+	}
 
 	key_layer_two = 0;
 	key_layer = NFP_FLOWER_LAYER_PORT;
 	key_size = sizeof(struct nfp_flower_meta_tci) +
 		   sizeof(struct nfp_flower_in_port);
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS) ||
-	    dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
 		key_layer |= NFP_FLOWER_LAYER_MAC;
 		key_size += sizeof(struct nfp_flower_mac_mpls);
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *flow_vlan;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan vlan;
 
-		flow_vlan = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_VLAN,
-						      flow->mask);
+		flow_rule_match_vlan(rule, &vlan);
 		if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) &&
-		    flow_vlan->vlan_priority)
+		    vlan.key->vlan_priority) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload");
 			return -EOPNOTSUPP;
+		}
 	}
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
-		struct flow_dissector_key_ports *mask_enc_ports = NULL;
-		struct flow_dissector_key_enc_opts *enc_op = NULL;
-		struct flow_dissector_key_ports *enc_ports = NULL;
-		struct flow_dissector_key_control *mask_enc_ctl =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  flow->mask);
-		struct flow_dissector_key_control *enc_ctl =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  flow->key);
-		if (!egress)
-			return -EOPNOTSUPP;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_match_enc_opts enc_op = { NULL, NULL };
+		struct flow_match_ipv4_addrs ipv4_addrs;
+		struct flow_match_control enc_ctl;
+		struct flow_match_ports enc_ports;
 
-		if (mask_enc_ctl->addr_type != 0xffff ||
-		    enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+		flow_rule_match_enc_control(rule, &enc_ctl);
+
+		if (enc_ctl.mask->addr_type != 0xffff) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported");
 			return -EOPNOTSUPP;
+		}
+		if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported");
+			return -EOPNOTSUPP;
+		}
 
 		/* These fields are already verified as used. */
-		mask_ipv4 =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  flow->mask);
-		if (mask_ipv4->dst != cpu_to_be32(~0))
+		flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
+		if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
 			return -EOPNOTSUPP;
-
-		mask_enc_ports =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  flow->mask);
-		enc_ports =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  flow->key);
-
-		if (mask_enc_ports->dst != cpu_to_be16(~0))
-			return -EOPNOTSUPP;
-
-		if (dissector_uses_key(flow->dissector,
-				       FLOW_DISSECTOR_KEY_ENC_OPTS)) {
-			enc_op = skb_flow_dissector_target(flow->dissector,
-							   FLOW_DISSECTOR_KEY_ENC_OPTS,
-							   flow->key);
 		}
 
-		switch (enc_ports->dst) {
-		case htons(NFP_FL_VXLAN_PORT):
-			*tun_type = NFP_FL_TUNNEL_VXLAN;
-			key_layer |= NFP_FLOWER_LAYER_VXLAN;
-			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+			flow_rule_match_enc_opts(rule, &enc_op);
 
-			if (enc_op)
-				return -EOPNOTSUPP;
-			break;
-		case htons(NFP_FL_GENEVE_PORT):
-			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE))
-				return -EOPNOTSUPP;
-			*tun_type = NFP_FL_TUNNEL_GENEVE;
-			key_layer |= NFP_FLOWER_LAYER_EXT_META;
-			key_size += sizeof(struct nfp_flower_ext_meta);
-			key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
-			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
 
-			if (!enc_op)
-				break;
-			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
+		if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+			/* check if GRE, which has no enc_ports */
+			if (netif_is_gretap(netdev)) {
+				*tun_type = NFP_FL_TUNNEL_GRE;
+				key_layer |= NFP_FLOWER_LAYER_EXT_META;
+				key_size += sizeof(struct nfp_flower_ext_meta);
+				key_layer_two |= NFP_FLOWER_LAYER2_GRE;
+				key_size +=
+					sizeof(struct nfp_flower_ipv4_gre_tun);
+
+				if (enc_op.key) {
+					NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
+					return -EOPNOTSUPP;
+				}
+			} else {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
 				return -EOPNOTSUPP;
-			err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two,
-							&key_size);
+			}
+		} else {
+			flow_rule_match_enc_ports(rule, &enc_ports);
+			if (enc_ports.mask->dst != cpu_to_be16(~0)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported");
+				return -EOPNOTSUPP;
+			}
+
+			err = nfp_flower_calc_udp_tun_layer(enc_ports.key,
+							    enc_op.key,
+							    &key_layer_two,
+							    &key_layer,
+							    &key_size, priv,
+							    tun_type, extack);
 			if (err)
 				return err;
-			break;
-		default:
-			return -EOPNOTSUPP;
+
+			/* Ensure the ingress netdev matches the expected
+			 * tun type.
+			 */
+			if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type");
+				return -EOPNOTSUPP;
+			}
 		}
-	} else if (egress) {
-		/* Reject non tunnel matches offloaded to egress repr. */
-		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		mask_basic = skb_flow_dissector_target(flow->dissector,
-						       FLOW_DISSECTOR_KEY_BASIC,
-						       flow->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC))
+		flow_rule_match_basic(rule, &basic);
 
-		key_basic = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_BASIC,
-						      flow->key);
-	}
-
-	if (mask_basic && mask_basic->n_proto) {
+	if (basic.mask && basic.mask->n_proto) {
 		/* Ethernet type is present in the key. */
-		switch (key_basic->n_proto) {
+		switch (basic.key->n_proto) {
 		case cpu_to_be16(ETH_P_IP):
 			key_layer |= NFP_FLOWER_LAYER_IPV4;
 			key_size += sizeof(struct nfp_flower_ipv4);
@@ -313,6 +357,7 @@
 		 * because we rely on it to get to the host.
 		 */
 		case cpu_to_be16(ETH_P_ARP):
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ARP not supported");
 			return -EOPNOTSUPP;
 
 		case cpu_to_be16(ETH_P_MPLS_UC):
@@ -328,18 +373,16 @@
 			break;
 
 		default:
-			/* Other ethtype - we need check the masks for the
-			 * remainder of the key to ensure we can offload.
-			 */
-			if (nfp_flower_check_higher_than_mac(flow))
-				return -EOPNOTSUPP;
-			break;
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported");
+			return -EOPNOTSUPP;
 		}
+	} else if (nfp_flower_check_higher_than_mac(flow)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType");
+		return -EOPNOTSUPP;
 	}
 
-	if (mask_basic && mask_basic->ip_proto) {
-		/* Ethernet type is present in the key. */
-		switch (key_basic->ip_proto) {
+	if (basic.mask && basic.mask->ip_proto) {
+		switch (basic.key->ip_proto) {
 		case IPPROTO_TCP:
 		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
@@ -348,52 +391,73 @@
 			key_layer |= NFP_FLOWER_LAYER_TP;
 			key_size += sizeof(struct nfp_flower_tp_ports);
 			break;
-		default:
-			/* Other ip proto - we need check the masks for the
-			 * remainder of the key to ensure we can offload.
-			 */
-			return -EOPNOTSUPP;
 		}
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *tcp;
+	if (!(key_layer & NFP_FLOWER_LAYER_TP) &&
+	    nfp_flower_check_higher_than_l3(flow)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type");
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp tcp;
 		u32 tcp_flags;
 
-		tcp = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_TCP,
-						flow->key);
-		tcp_flags = be16_to_cpu(tcp->flags);
+		flow_rule_match_tcp(rule, &tcp);
+		tcp_flags = be16_to_cpu(tcp.key->flags);
 
-		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: no match support for selected TCP flags");
 			return -EOPNOTSUPP;
+		}
 
 		/* We only support PSH and URG flags when either
 		 * FIN, SYN or RST is present as well.
 		 */
 		if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
-		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: PSH and URG is only supported when used with FIN, SYN or RST");
 			return -EOPNOTSUPP;
+		}
 
-		/* We need to store TCP flags in the IPv4 key space, thus
-		 * we need to ensure we include a IPv4 key layer if we have
-		 * not done so already.
+		/* We need to store TCP flags in the either the IPv4 or IPv6 key
+		 * space, thus we need to ensure we include a IPv4/IPv6 key
+		 * layer if we have not done so already.
 		 */
-		if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
-			key_layer |= NFP_FLOWER_LAYER_IPV4;
-			key_size += sizeof(struct nfp_flower_ipv4);
+		if (!basic.key) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on L3 protocol");
+			return -EOPNOTSUPP;
+		}
+
+		if (!(key_layer & NFP_FLOWER_LAYER_IPV4) &&
+		    !(key_layer & NFP_FLOWER_LAYER_IPV6)) {
+			switch (basic.key->n_proto) {
+			case cpu_to_be16(ETH_P_IP):
+				key_layer |= NFP_FLOWER_LAYER_IPV4;
+				key_size += sizeof(struct nfp_flower_ipv4);
+				break;
+
+			case cpu_to_be16(ETH_P_IPV6):
+					key_layer |= NFP_FLOWER_LAYER_IPV6;
+				key_size += sizeof(struct nfp_flower_ipv6);
+				break;
+
+			default:
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on IPv4/IPv6");
+				return -EOPNOTSUPP;
+			}
 		}
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key_ctl;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control ctl;
 
-		key_ctl = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_CONTROL,
-						    flow->key);
-
-		if (key_ctl->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
+		flow_rule_match_control(rule, &ctl);
+		if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag");
 			return -EOPNOTSUPP;
+		}
 	}
 
 	ret_key_ls->key_layer = key_layer;
@@ -404,7 +468,7 @@
 }
 
 static struct nfp_fl_payload *
-nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress)
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 {
 	struct nfp_fl_payload *flow_pay;
 
@@ -428,9 +492,9 @@
 
 	flow_pay->nfp_tun_ipv4_addr = 0;
 	flow_pay->meta.flags = 0;
-	spin_lock_init(&flow_pay->lock);
-
-	flow_pay->ingress_offload = !egress;
+	INIT_LIST_HEAD(&flow_pay->linked_flows);
+	flow_pay->in_hw = false;
+	flow_pay->pre_tun_rule.dev = NULL;
 
 	return flow_pay;
 
@@ -443,12 +507,606 @@
 	return NULL;
 }
 
+static int
+nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
+				     struct nfp_flower_merge_check *merge,
+				     u8 *last_act_id, int *act_out)
+{
+	struct nfp_fl_set_ipv6_tc_hl_fl *ipv6_tc_hl_fl;
+	struct nfp_fl_set_ip4_ttl_tos *ipv4_ttl_tos;
+	struct nfp_fl_set_ip4_addrs *ipv4_add;
+	struct nfp_fl_set_ipv6_addr *ipv6_add;
+	struct nfp_fl_push_vlan *push_vlan;
+	struct nfp_fl_set_tport *tport;
+	struct nfp_fl_set_eth *eth;
+	struct nfp_fl_act_head *a;
+	unsigned int act_off = 0;
+	u8 act_id = 0;
+	u8 *ports;
+	int i;
+
+	while (act_off < flow->meta.act_len) {
+		a = (struct nfp_fl_act_head *)&flow->action_data[act_off];
+		act_id = a->jump_id;
+
+		switch (act_id) {
+		case NFP_FL_ACTION_OPCODE_OUTPUT:
+			if (act_out)
+				(*act_out)++;
+			break;
+		case NFP_FL_ACTION_OPCODE_PUSH_VLAN:
+			push_vlan = (struct nfp_fl_push_vlan *)a;
+			if (push_vlan->vlan_tci)
+				merge->tci = cpu_to_be16(0xffff);
+			break;
+		case NFP_FL_ACTION_OPCODE_POP_VLAN:
+			merge->tci = cpu_to_be16(0);
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL:
+			/* New tunnel header means l2 to l4 can be matched. */
+			eth_broadcast_addr(&merge->l2.mac_dst[0]);
+			eth_broadcast_addr(&merge->l2.mac_src[0]);
+			memset(&merge->l4, 0xff,
+			       sizeof(struct nfp_flower_tp_ports));
+			memset(&merge->ipv4, 0xff,
+			       sizeof(struct nfp_flower_ipv4));
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_ETHERNET:
+			eth = (struct nfp_fl_set_eth *)a;
+			for (i = 0; i < ETH_ALEN; i++)
+				merge->l2.mac_dst[i] |= eth->eth_addr_mask[i];
+			for (i = 0; i < ETH_ALEN; i++)
+				merge->l2.mac_src[i] |=
+					eth->eth_addr_mask[ETH_ALEN + i];
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS:
+			ipv4_add = (struct nfp_fl_set_ip4_addrs *)a;
+			merge->ipv4.ipv4_src |= ipv4_add->ipv4_src_mask;
+			merge->ipv4.ipv4_dst |= ipv4_add->ipv4_dst_mask;
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS:
+			ipv4_ttl_tos = (struct nfp_fl_set_ip4_ttl_tos *)a;
+			merge->ipv4.ip_ext.ttl |= ipv4_ttl_tos->ipv4_ttl_mask;
+			merge->ipv4.ip_ext.tos |= ipv4_ttl_tos->ipv4_tos_mask;
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV6_SRC:
+			ipv6_add = (struct nfp_fl_set_ipv6_addr *)a;
+			for (i = 0; i < 4; i++)
+				merge->ipv6.ipv6_src.in6_u.u6_addr32[i] |=
+					ipv6_add->ipv6[i].mask;
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV6_DST:
+			ipv6_add = (struct nfp_fl_set_ipv6_addr *)a;
+			for (i = 0; i < 4; i++)
+				merge->ipv6.ipv6_dst.in6_u.u6_addr32[i] |=
+					ipv6_add->ipv6[i].mask;
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL:
+			ipv6_tc_hl_fl = (struct nfp_fl_set_ipv6_tc_hl_fl *)a;
+			merge->ipv6.ip_ext.ttl |=
+				ipv6_tc_hl_fl->ipv6_hop_limit_mask;
+			merge->ipv6.ip_ext.tos |= ipv6_tc_hl_fl->ipv6_tc_mask;
+			merge->ipv6.ipv6_flow_label_exthdr |=
+				ipv6_tc_hl_fl->ipv6_label_mask;
+			break;
+		case NFP_FL_ACTION_OPCODE_SET_UDP:
+		case NFP_FL_ACTION_OPCODE_SET_TCP:
+			tport = (struct nfp_fl_set_tport *)a;
+			ports = (u8 *)&merge->l4.port_src;
+			for (i = 0; i < 4; i++)
+				ports[i] |= tport->tp_port_mask[i];
+			break;
+		case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+		case NFP_FL_ACTION_OPCODE_PRE_LAG:
+		case NFP_FL_ACTION_OPCODE_PUSH_GENEVE:
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		act_off += a->len_lw << NFP_FL_LW_SIZ;
+	}
+
+	if (last_act_id)
+		*last_act_id = act_id;
+
+	return 0;
+}
+
+static int
+nfp_flower_populate_merge_match(struct nfp_fl_payload *flow,
+				struct nfp_flower_merge_check *merge,
+				bool extra_fields)
+{
+	struct nfp_flower_meta_tci *meta_tci;
+	u8 *mask = flow->mask_data;
+	u8 key_layer, match_size;
+
+	memset(merge, 0, sizeof(struct nfp_flower_merge_check));
+
+	meta_tci = (struct nfp_flower_meta_tci *)mask;
+	key_layer = meta_tci->nfp_flow_key_layer;
+
+	if (key_layer & ~NFP_FLOWER_MERGE_FIELDS && !extra_fields)
+		return -EOPNOTSUPP;
+
+	merge->tci = meta_tci->tci;
+	mask += sizeof(struct nfp_flower_meta_tci);
+
+	if (key_layer & NFP_FLOWER_LAYER_EXT_META)
+		mask += sizeof(struct nfp_flower_ext_meta);
+
+	mask += sizeof(struct nfp_flower_in_port);
+
+	if (key_layer & NFP_FLOWER_LAYER_MAC) {
+		match_size = sizeof(struct nfp_flower_mac_mpls);
+		memcpy(&merge->l2, mask, match_size);
+		mask += match_size;
+	}
+
+	if (key_layer & NFP_FLOWER_LAYER_TP) {
+		match_size = sizeof(struct nfp_flower_tp_ports);
+		memcpy(&merge->l4, mask, match_size);
+		mask += match_size;
+	}
+
+	if (key_layer & NFP_FLOWER_LAYER_IPV4) {
+		match_size = sizeof(struct nfp_flower_ipv4);
+		memcpy(&merge->ipv4, mask, match_size);
+	}
+
+	if (key_layer & NFP_FLOWER_LAYER_IPV6) {
+		match_size = sizeof(struct nfp_flower_ipv6);
+		memcpy(&merge->ipv6, mask, match_size);
+	}
+
+	return 0;
+}
+
+static int
+nfp_flower_can_merge(struct nfp_fl_payload *sub_flow1,
+		     struct nfp_fl_payload *sub_flow2)
+{
+	/* Two flows can be merged if sub_flow2 only matches on bits that are
+	 * either matched by sub_flow1 or set by a sub_flow1 action. This
+	 * ensures that every packet that hits sub_flow1 and recirculates is
+	 * guaranteed to hit sub_flow2.
+	 */
+	struct nfp_flower_merge_check sub_flow1_merge, sub_flow2_merge;
+	int err, act_out = 0;
+	u8 last_act_id = 0;
+
+	err = nfp_flower_populate_merge_match(sub_flow1, &sub_flow1_merge,
+					      true);
+	if (err)
+		return err;
+
+	err = nfp_flower_populate_merge_match(sub_flow2, &sub_flow2_merge,
+					      false);
+	if (err)
+		return err;
+
+	err = nfp_flower_update_merge_with_actions(sub_flow1, &sub_flow1_merge,
+						   &last_act_id, &act_out);
+	if (err)
+		return err;
+
+	/* Must only be 1 output action and it must be the last in sequence. */
+	if (act_out != 1 || last_act_id != NFP_FL_ACTION_OPCODE_OUTPUT)
+		return -EOPNOTSUPP;
+
+	/* Reject merge if sub_flow2 matches on something that is not matched
+	 * on or set in an action by sub_flow1.
+	 */
+	err = bitmap_andnot(sub_flow2_merge.vals, sub_flow2_merge.vals,
+			    sub_flow1_merge.vals,
+			    sizeof(struct nfp_flower_merge_check) * 8);
+	if (err)
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int
+nfp_flower_copy_pre_actions(char *act_dst, char *act_src, int len,
+			    bool *tunnel_act)
+{
+	unsigned int act_off = 0, act_len;
+	struct nfp_fl_act_head *a;
+	u8 act_id = 0;
+
+	while (act_off < len) {
+		a = (struct nfp_fl_act_head *)&act_src[act_off];
+		act_len = a->len_lw << NFP_FL_LW_SIZ;
+		act_id = a->jump_id;
+
+		switch (act_id) {
+		case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+			if (tunnel_act)
+				*tunnel_act = true;
+			/* fall through */
+		case NFP_FL_ACTION_OPCODE_PRE_LAG:
+			memcpy(act_dst + act_off, act_src + act_off, act_len);
+			break;
+		default:
+			return act_off;
+		}
+
+		act_off += act_len;
+	}
+
+	return act_off;
+}
+
+static int
+nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan)
+{
+	struct nfp_fl_act_head *a;
+	unsigned int act_off = 0;
+
+	while (act_off < len) {
+		a = (struct nfp_fl_act_head *)&acts[act_off];
+
+		if (a->jump_id == NFP_FL_ACTION_OPCODE_PUSH_VLAN && !act_off)
+			*vlan = (struct nfp_fl_push_vlan *)a;
+		else if (a->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT)
+			return -EOPNOTSUPP;
+
+		act_off += a->len_lw << NFP_FL_LW_SIZ;
+	}
+
+	/* Ensure any VLAN push also has an egress action. */
+	if (*vlan && act_off <= sizeof(struct nfp_fl_push_vlan))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int
+nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan)
+{
+	struct nfp_fl_set_ipv4_tun *tun;
+	struct nfp_fl_act_head *a;
+	unsigned int act_off = 0;
+
+	while (act_off < len) {
+		a = (struct nfp_fl_act_head *)&acts[act_off];
+
+		if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) {
+			tun = (struct nfp_fl_set_ipv4_tun *)a;
+			tun->outer_vlan_tpid = vlan->vlan_tpid;
+			tun->outer_vlan_tci = vlan->vlan_tci;
+
+			return 0;
+		}
+
+		act_off += a->len_lw << NFP_FL_LW_SIZ;
+	}
+
+	/* Return error if no tunnel action is found. */
+	return -EOPNOTSUPP;
+}
+
+static int
+nfp_flower_merge_action(struct nfp_fl_payload *sub_flow1,
+			struct nfp_fl_payload *sub_flow2,
+			struct nfp_fl_payload *merge_flow)
+{
+	unsigned int sub1_act_len, sub2_act_len, pre_off1, pre_off2;
+	struct nfp_fl_push_vlan *post_tun_push_vlan = NULL;
+	bool tunnel_act = false;
+	char *merge_act;
+	int err;
+
+	/* The last action of sub_flow1 must be output - do not merge this. */
+	sub1_act_len = sub_flow1->meta.act_len - sizeof(struct nfp_fl_output);
+	sub2_act_len = sub_flow2->meta.act_len;
+
+	if (!sub2_act_len)
+		return -EINVAL;
+
+	if (sub1_act_len + sub2_act_len > NFP_FL_MAX_A_SIZ)
+		return -EINVAL;
+
+	/* A shortcut can only be applied if there is a single action. */
+	if (sub1_act_len)
+		merge_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+	else
+		merge_flow->meta.shortcut = sub_flow2->meta.shortcut;
+
+	merge_flow->meta.act_len = sub1_act_len + sub2_act_len;
+	merge_act = merge_flow->action_data;
+
+	/* Copy any pre-actions to the start of merge flow action list. */
+	pre_off1 = nfp_flower_copy_pre_actions(merge_act,
+					       sub_flow1->action_data,
+					       sub1_act_len, &tunnel_act);
+	merge_act += pre_off1;
+	sub1_act_len -= pre_off1;
+	pre_off2 = nfp_flower_copy_pre_actions(merge_act,
+					       sub_flow2->action_data,
+					       sub2_act_len, NULL);
+	merge_act += pre_off2;
+	sub2_act_len -= pre_off2;
+
+	/* FW does a tunnel push when egressing, therefore, if sub_flow 1 pushes
+	 * a tunnel, there are restrictions on what sub_flow 2 actions lead to a
+	 * valid merge.
+	 */
+	if (tunnel_act) {
+		char *post_tun_acts = &sub_flow2->action_data[pre_off2];
+
+		err = nfp_fl_verify_post_tun_acts(post_tun_acts, sub2_act_len,
+						  &post_tun_push_vlan);
+		if (err)
+			return err;
+
+		if (post_tun_push_vlan) {
+			pre_off2 += sizeof(*post_tun_push_vlan);
+			sub2_act_len -= sizeof(*post_tun_push_vlan);
+		}
+	}
+
+	/* Copy remaining actions from sub_flows 1 and 2. */
+	memcpy(merge_act, sub_flow1->action_data + pre_off1, sub1_act_len);
+
+	if (post_tun_push_vlan) {
+		/* Update tunnel action in merge to include VLAN push. */
+		err = nfp_fl_push_vlan_after_tun(merge_act, sub1_act_len,
+						 post_tun_push_vlan);
+		if (err)
+			return err;
+
+		merge_flow->meta.act_len -= sizeof(*post_tun_push_vlan);
+	}
+
+	merge_act += sub1_act_len;
+	memcpy(merge_act, sub_flow2->action_data + pre_off2, sub2_act_len);
+
+	return 0;
+}
+
+/* Flow link code should only be accessed under RTNL. */
+static void nfp_flower_unlink_flow(struct nfp_fl_payload_link *link)
+{
+	list_del(&link->merge_flow.list);
+	list_del(&link->sub_flow.list);
+	kfree(link);
+}
+
+static void nfp_flower_unlink_flows(struct nfp_fl_payload *merge_flow,
+				    struct nfp_fl_payload *sub_flow)
+{
+	struct nfp_fl_payload_link *link;
+
+	list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list)
+		if (link->sub_flow.flow == sub_flow) {
+			nfp_flower_unlink_flow(link);
+			return;
+		}
+}
+
+static int nfp_flower_link_flows(struct nfp_fl_payload *merge_flow,
+				 struct nfp_fl_payload *sub_flow)
+{
+	struct nfp_fl_payload_link *link;
+
+	link = kmalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	link->merge_flow.flow = merge_flow;
+	list_add_tail(&link->merge_flow.list, &merge_flow->linked_flows);
+	link->sub_flow.flow = sub_flow;
+	list_add_tail(&link->sub_flow.list, &sub_flow->linked_flows);
+
+	return 0;
+}
+
+/**
+ * nfp_flower_merge_offloaded_flows() - Merge 2 existing flows to single flow.
+ * @app:	Pointer to the APP handle
+ * @sub_flow1:	Initial flow matched to produce merge hint
+ * @sub_flow2:	Post recirculation flow matched in merge hint
+ *
+ * Combines 2 flows (if valid) to a single flow, removing the initial from hw
+ * and offloading the new, merged flow.
+ *
+ * Return: negative value on error, 0 in success.
+ */
+int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
+				     struct nfp_fl_payload *sub_flow1,
+				     struct nfp_fl_payload *sub_flow2)
+{
+	struct flow_cls_offload merge_tc_off;
+	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
+	struct nfp_fl_payload *merge_flow;
+	struct nfp_fl_key_ls merge_key_ls;
+	int err;
+
+	ASSERT_RTNL();
+
+	extack = merge_tc_off.common.extack;
+	if (sub_flow1 == sub_flow2 ||
+	    nfp_flower_is_merge_flow(sub_flow1) ||
+	    nfp_flower_is_merge_flow(sub_flow2))
+		return -EINVAL;
+
+	err = nfp_flower_can_merge(sub_flow1, sub_flow2);
+	if (err)
+		return err;
+
+	merge_key_ls.key_size = sub_flow1->meta.key_len;
+
+	merge_flow = nfp_flower_allocate_new(&merge_key_ls);
+	if (!merge_flow)
+		return -ENOMEM;
+
+	merge_flow->tc_flower_cookie = (unsigned long)merge_flow;
+	merge_flow->ingress_dev = sub_flow1->ingress_dev;
+
+	memcpy(merge_flow->unmasked_data, sub_flow1->unmasked_data,
+	       sub_flow1->meta.key_len);
+	memcpy(merge_flow->mask_data, sub_flow1->mask_data,
+	       sub_flow1->meta.mask_len);
+
+	err = nfp_flower_merge_action(sub_flow1, sub_flow2, merge_flow);
+	if (err)
+		goto err_destroy_merge_flow;
+
+	err = nfp_flower_link_flows(merge_flow, sub_flow1);
+	if (err)
+		goto err_destroy_merge_flow;
+
+	err = nfp_flower_link_flows(merge_flow, sub_flow2);
+	if (err)
+		goto err_unlink_sub_flow1;
+
+	merge_tc_off.cookie = merge_flow->tc_flower_cookie;
+	err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow,
+					merge_flow->ingress_dev, extack);
+	if (err)
+		goto err_unlink_sub_flow2;
+
+	err = rhashtable_insert_fast(&priv->flow_table, &merge_flow->fl_node,
+				     nfp_flower_table_params);
+	if (err)
+		goto err_release_metadata;
+
+	err = nfp_flower_xmit_flow(app, merge_flow,
+				   NFP_FLOWER_CMSG_TYPE_FLOW_MOD);
+	if (err)
+		goto err_remove_rhash;
+
+	merge_flow->in_hw = true;
+	sub_flow1->in_hw = false;
+
+	return 0;
+
+err_remove_rhash:
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+					    &merge_flow->fl_node,
+					    nfp_flower_table_params));
+err_release_metadata:
+	nfp_modify_flow_metadata(app, merge_flow);
+err_unlink_sub_flow2:
+	nfp_flower_unlink_flows(merge_flow, sub_flow2);
+err_unlink_sub_flow1:
+	nfp_flower_unlink_flows(merge_flow, sub_flow1);
+err_destroy_merge_flow:
+	kfree(merge_flow->action_data);
+	kfree(merge_flow->mask_data);
+	kfree(merge_flow->unmasked_data);
+	kfree(merge_flow);
+	return err;
+}
+
+/**
+ * nfp_flower_validate_pre_tun_rule()
+ * @app:	Pointer to the APP handle
+ * @flow:	Pointer to NFP flow representation of rule
+ * @extack:	Netlink extended ACK report
+ *
+ * Verifies the flow as a pre-tunnel rule.
+ *
+ * Return: negative value on error, 0 if verified.
+ */
+static int
+nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
+				 struct nfp_fl_payload *flow,
+				 struct netlink_ext_ack *extack)
+{
+	struct nfp_flower_meta_tci *meta_tci;
+	struct nfp_flower_mac_mpls *mac;
+	struct nfp_fl_act_head *act;
+	u8 *mask = flow->mask_data;
+	bool vlan = false;
+	int act_offset;
+	u8 key_layer;
+
+	meta_tci = (struct nfp_flower_meta_tci *)flow->unmasked_data;
+	if (meta_tci->tci & cpu_to_be16(NFP_FLOWER_MASK_VLAN_PRESENT)) {
+		u16 vlan_tci = be16_to_cpu(meta_tci->tci);
+
+		vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT;
+		flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci);
+		vlan = true;
+	} else {
+		flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff);
+	}
+
+	key_layer = meta_tci->nfp_flow_key_layer;
+	if (key_layer & ~NFP_FLOWER_PRE_TUN_RULE_FIELDS) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: too many match fields");
+		return -EOPNOTSUPP;
+	}
+
+	if (!(key_layer & NFP_FLOWER_LAYER_MAC)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MAC fields match required");
+		return -EOPNOTSUPP;
+	}
+
+	/* Skip fields known to exist. */
+	mask += sizeof(struct nfp_flower_meta_tci);
+	mask += sizeof(struct nfp_flower_in_port);
+
+	/* Ensure destination MAC address is fully matched. */
+	mac = (struct nfp_flower_mac_mpls *)mask;
+	if (!is_broadcast_ether_addr(&mac->mac_dst[0])) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC field must not be masked");
+		return -EOPNOTSUPP;
+	}
+
+	if (key_layer & NFP_FLOWER_LAYER_IPV4) {
+		int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags);
+		int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto);
+		int i;
+
+		mask += sizeof(struct nfp_flower_mac_mpls);
+
+		/* Ensure proto and flags are the only IP layer fields. */
+		for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++)
+			if (mask[i] && i != ip_flags && i != ip_proto) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header");
+				return -EOPNOTSUPP;
+			}
+	}
+
+	/* Action must be a single egress or pop_vlan and egress. */
+	act_offset = 0;
+	act = (struct nfp_fl_act_head *)&flow->action_data[act_offset];
+	if (vlan) {
+		if (act->jump_id != NFP_FL_ACTION_OPCODE_POP_VLAN) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on VLAN must have VLAN pop as first action");
+			return -EOPNOTSUPP;
+		}
+
+		act_offset += act->len_lw << NFP_FL_LW_SIZ;
+		act = (struct nfp_fl_act_head *)&flow->action_data[act_offset];
+	}
+
+	if (act->jump_id != NFP_FL_ACTION_OPCODE_OUTPUT) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: non egress action detected where egress was expected");
+		return -EOPNOTSUPP;
+	}
+
+	act_offset += act->len_lw << NFP_FL_LW_SIZ;
+
+	/* Ensure there are no more actions after egress. */
+	if (act_offset != flow->meta.act_len) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: egress is not the last action");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 /**
  * nfp_flower_add_offload() - Adds a new flow to hardware.
  * @app:	Pointer to the APP handle
  * @netdev:	netdev structure.
  * @flow:	TC flower classifier offload structure.
- * @egress:	NFP netdev is the egress.
  *
  * Adds a new flow to the repeated hash structure and action payload.
  *
@@ -456,73 +1114,86 @@
  */
 static int
 nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
-		       struct tc_cls_flower_offload *flow, bool egress)
+		       struct flow_cls_offload *flow)
 {
 	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
 	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *flow_pay;
 	struct nfp_fl_key_ls *key_layer;
-	struct net_device *ingr_dev;
+	struct nfp_port *port = NULL;
 	int err;
 
-	ingr_dev = egress ? NULL : netdev;
-	flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
-					      NFP_FL_STATS_CTX_DONT_CARE);
-	if (flow_pay) {
-		/* Ignore as duplicate if it has been added by different cb. */
-		if (flow_pay->ingress_offload && egress)
-			return 0;
-		else
-			return -EOPNOTSUPP;
-	}
+	extack = flow->common.extack;
+	if (nfp_netdev_is_nfp_repr(netdev))
+		port = nfp_port_from_netdev(netdev);
 
 	key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
 	if (!key_layer)
 		return -ENOMEM;
 
-	err = nfp_flower_calculate_key_layers(app, key_layer, flow, egress,
-					      &tun_type);
+	err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow,
+					      &tun_type, extack);
 	if (err)
 		goto err_free_key_ls;
 
-	flow_pay = nfp_flower_allocate_new(key_layer, egress);
+	flow_pay = nfp_flower_allocate_new(key_layer);
 	if (!flow_pay) {
 		err = -ENOMEM;
 		goto err_free_key_ls;
 	}
 
-	flow_pay->ingress_dev = egress ? NULL : netdev;
-
-	err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay,
-					    tun_type);
+	err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev,
+					    flow_pay, tun_type, extack);
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_flower_compile_action(app, flow, netdev, flow_pay);
+	err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack);
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_compile_flow_metadata(app, flow, flow_pay,
-					flow_pay->ingress_dev);
+	if (flow_pay->pre_tun_rule.dev) {
+		err = nfp_flower_validate_pre_tun_rule(app, flow_pay, extack);
+		if (err)
+			goto err_destroy_flow;
+	}
+
+	err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack);
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_flower_xmit_flow(netdev, flow_pay,
-				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
-	if (err)
-		goto err_destroy_flow;
-
-	INIT_HLIST_NODE(&flow_pay->link);
 	flow_pay->tc_flower_cookie = flow->cookie;
-	hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie);
-	port->tc_offload_cnt++;
+	err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
+				     nfp_flower_table_params);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot insert flow into tables for offloads");
+		goto err_release_metadata;
+	}
+
+	if (flow_pay->pre_tun_rule.dev)
+		err = nfp_flower_xmit_pre_tun_flow(app, flow_pay);
+	else
+		err = nfp_flower_xmit_flow(app, flow_pay,
+					   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+	if (err)
+		goto err_remove_rhash;
+
+	if (port)
+		port->tc_offload_cnt++;
+
+	flow_pay->in_hw = true;
 
 	/* Deallocate flow payload when flower rule has been destroyed. */
 	kfree(key_layer);
 
 	return 0;
 
+err_remove_rhash:
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+					    &flow_pay->fl_node,
+					    nfp_flower_table_params));
+err_release_metadata:
+	nfp_modify_flow_metadata(app, flow_pay);
 err_destroy_flow:
 	kfree(flow_pay->action_data);
 	kfree(flow_pay->mask_data);
@@ -533,61 +1204,190 @@
 	return err;
 }
 
+static void
+nfp_flower_remove_merge_flow(struct nfp_app *app,
+			     struct nfp_fl_payload *del_sub_flow,
+			     struct nfp_fl_payload *merge_flow)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_payload_link *link, *temp;
+	struct nfp_fl_payload *origin;
+	bool mod = false;
+	int err;
+
+	link = list_first_entry(&merge_flow->linked_flows,
+				struct nfp_fl_payload_link, merge_flow.list);
+	origin = link->sub_flow.flow;
+
+	/* Re-add rule the merge had overwritten if it has not been deleted. */
+	if (origin != del_sub_flow)
+		mod = true;
+
+	err = nfp_modify_flow_metadata(app, merge_flow);
+	if (err) {
+		nfp_flower_cmsg_warn(app, "Metadata fail for merge flow delete.\n");
+		goto err_free_links;
+	}
+
+	if (!mod) {
+		err = nfp_flower_xmit_flow(app, merge_flow,
+					   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+		if (err) {
+			nfp_flower_cmsg_warn(app, "Failed to delete merged flow.\n");
+			goto err_free_links;
+		}
+	} else {
+		__nfp_modify_flow_metadata(priv, origin);
+		err = nfp_flower_xmit_flow(app, origin,
+					   NFP_FLOWER_CMSG_TYPE_FLOW_MOD);
+		if (err)
+			nfp_flower_cmsg_warn(app, "Failed to revert merge flow.\n");
+		origin->in_hw = true;
+	}
+
+err_free_links:
+	/* Clean any links connected with the merged flow. */
+	list_for_each_entry_safe(link, temp, &merge_flow->linked_flows,
+				 merge_flow.list)
+		nfp_flower_unlink_flow(link);
+
+	kfree(merge_flow->action_data);
+	kfree(merge_flow->mask_data);
+	kfree(merge_flow->unmasked_data);
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+					    &merge_flow->fl_node,
+					    nfp_flower_table_params));
+	kfree_rcu(merge_flow, rcu);
+}
+
+static void
+nfp_flower_del_linked_merge_flows(struct nfp_app *app,
+				  struct nfp_fl_payload *sub_flow)
+{
+	struct nfp_fl_payload_link *link, *temp;
+
+	/* Remove any merge flow formed from the deleted sub_flow. */
+	list_for_each_entry_safe(link, temp, &sub_flow->linked_flows,
+				 sub_flow.list)
+		nfp_flower_remove_merge_flow(app, sub_flow,
+					     link->merge_flow.flow);
+}
+
 /**
  * nfp_flower_del_offload() - Removes a flow from hardware.
  * @app:	Pointer to the APP handle
  * @netdev:	netdev structure.
  * @flow:	TC flower classifier offload structure
- * @egress:	Netdev is the egress dev.
  *
  * Removes a flow from the repeated hash structure and clears the
- * action payload.
+ * action payload. Any flows merged from this are also deleted.
  *
  * Return: negative value on error, 0 if removed successfully.
  */
 static int
 nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
-		       struct tc_cls_flower_offload *flow, bool egress)
+		       struct flow_cls_offload *flow)
 {
-	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *nfp_flow;
-	struct net_device *ingr_dev;
+	struct nfp_port *port = NULL;
 	int err;
 
-	ingr_dev = egress ? NULL : netdev;
-	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
-					      NFP_FL_STATS_CTX_DONT_CARE);
-	if (!nfp_flow)
-		return egress ? 0 : -ENOENT;
+	extack = flow->common.extack;
+	if (nfp_netdev_is_nfp_repr(netdev))
+		port = nfp_port_from_netdev(netdev);
+
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
+	if (!nfp_flow) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist");
+		return -ENOENT;
+	}
 
 	err = nfp_modify_flow_metadata(app, nfp_flow);
 	if (err)
-		goto err_free_flow;
+		goto err_free_merge_flow;
 
 	if (nfp_flow->nfp_tun_ipv4_addr)
 		nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
 
-	err = nfp_flower_xmit_flow(netdev, nfp_flow,
-				   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
-	if (err)
-		goto err_free_flow;
+	if (!nfp_flow->in_hw) {
+		err = 0;
+		goto err_free_merge_flow;
+	}
 
-err_free_flow:
-	hash_del_rcu(&nfp_flow->link);
-	port->tc_offload_cnt--;
+	if (nfp_flow->pre_tun_rule.dev)
+		err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow);
+	else
+		err = nfp_flower_xmit_flow(app, nfp_flow,
+					   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+	/* Fall through on error. */
+
+err_free_merge_flow:
+	nfp_flower_del_linked_merge_flows(app, nfp_flow);
+	if (port)
+		port->tc_offload_cnt--;
 	kfree(nfp_flow->action_data);
 	kfree(nfp_flow->mask_data);
 	kfree(nfp_flow->unmasked_data);
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+					    &nfp_flow->fl_node,
+					    nfp_flower_table_params));
 	kfree_rcu(nfp_flow, rcu);
 	return err;
 }
 
+static void
+__nfp_flower_update_merge_stats(struct nfp_app *app,
+				struct nfp_fl_payload *merge_flow)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_payload_link *link;
+	struct nfp_fl_payload *sub_flow;
+	u64 pkts, bytes, used;
+	u32 ctx_id;
+
+	ctx_id = be32_to_cpu(merge_flow->meta.host_ctx_id);
+	pkts = priv->stats[ctx_id].pkts;
+	/* Do not cycle subflows if no stats to distribute. */
+	if (!pkts)
+		return;
+	bytes = priv->stats[ctx_id].bytes;
+	used = priv->stats[ctx_id].used;
+
+	/* Reset stats for the merge flow. */
+	priv->stats[ctx_id].pkts = 0;
+	priv->stats[ctx_id].bytes = 0;
+
+	/* The merge flow has received stats updates from firmware.
+	 * Distribute these stats to all subflows that form the merge.
+	 * The stats will collected from TC via the subflows.
+	 */
+	list_for_each_entry(link, &merge_flow->linked_flows, merge_flow.list) {
+		sub_flow = link->sub_flow.flow;
+		ctx_id = be32_to_cpu(sub_flow->meta.host_ctx_id);
+		priv->stats[ctx_id].pkts += pkts;
+		priv->stats[ctx_id].bytes += bytes;
+		max_t(u64, priv->stats[ctx_id].used, used);
+	}
+}
+
+static void
+nfp_flower_update_merge_stats(struct nfp_app *app,
+			      struct nfp_fl_payload *sub_flow)
+{
+	struct nfp_fl_payload_link *link;
+
+	/* Get merge flows that the subflow forms to distribute their stats. */
+	list_for_each_entry(link, &sub_flow->linked_flows, sub_flow.list)
+		__nfp_flower_update_merge_stats(app, link->merge_flow.flow);
+}
+
 /**
  * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
  * @app:	Pointer to the APP handle
  * @netdev:	Netdev structure.
  * @flow:	TC flower classifier offload structure
- * @egress:	Netdev is the egress dev.
  *
  * Populates a flow statistics structure which which corresponds to a
  * specific flow.
@@ -596,62 +1396,51 @@
  */
 static int
 nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
-		     struct tc_cls_flower_offload *flow, bool egress)
+		     struct flow_cls_offload *flow)
 {
+	struct nfp_flower_priv *priv = app->priv;
+	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *nfp_flow;
-	struct net_device *ingr_dev;
+	u32 ctx_id;
 
-	ingr_dev = egress ? NULL : netdev;
-	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
-					      NFP_FL_STATS_CTX_DONT_CARE);
-	if (!nfp_flow)
+	extack = flow->common.extack;
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
+	if (!nfp_flow) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot dump stats for flow that does not exist");
 		return -EINVAL;
+	}
 
-	if (nfp_flow->ingress_offload && egress)
-		return 0;
+	ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
 
-	spin_lock_bh(&nfp_flow->lock);
-	tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
-			      nfp_flow->stats.pkts, nfp_flow->stats.used);
+	spin_lock_bh(&priv->stats_lock);
+	/* If request is for a sub_flow, update stats from merged flows. */
+	if (!list_empty(&nfp_flow->linked_flows))
+		nfp_flower_update_merge_stats(app, nfp_flow);
 
-	nfp_flow->stats.pkts = 0;
-	nfp_flow->stats.bytes = 0;
-	spin_unlock_bh(&nfp_flow->lock);
+	flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes,
+			  priv->stats[ctx_id].pkts, priv->stats[ctx_id].used);
+
+	priv->stats[ctx_id].pkts = 0;
+	priv->stats[ctx_id].bytes = 0;
+	spin_unlock_bh(&priv->stats_lock);
 
 	return 0;
 }
 
 static int
 nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
-			struct tc_cls_flower_offload *flower, bool egress)
+			struct flow_cls_offload *flower)
 {
 	if (!eth_proto_is_802_3(flower->common.protocol))
 		return -EOPNOTSUPP;
 
 	switch (flower->command) {
-	case TC_CLSFLOWER_REPLACE:
-		return nfp_flower_add_offload(app, netdev, flower, egress);
-	case TC_CLSFLOWER_DESTROY:
-		return nfp_flower_del_offload(app, netdev, flower, egress);
-	case TC_CLSFLOWER_STATS:
-		return nfp_flower_get_stats(app, netdev, flower, egress);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
-				  void *cb_priv)
-{
-	struct nfp_repr *repr = cb_priv;
-
-	if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
-		return -EOPNOTSUPP;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return nfp_flower_repr_offload(repr->app, repr->netdev,
-					       type_data, true);
+	case FLOW_CLS_REPLACE:
+		return nfp_flower_add_offload(app, netdev, flower);
+	case FLOW_CLS_DESTROY:
+		return nfp_flower_del_offload(app, netdev, flower);
+	case FLOW_CLS_STATS:
+		return nfp_flower_get_stats(app, netdev, flower);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -668,29 +1457,54 @@
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return nfp_flower_repr_offload(repr->app, repr->netdev,
-					       type_data, false);
+					       type_data);
+	case TC_SETUP_CLSMATCHALL:
+		return nfp_flower_setup_qos_offload(repr->app, repr->netdev,
+						    type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
+static LIST_HEAD(nfp_block_cb_list);
+
 static int nfp_flower_setup_tc_block(struct net_device *netdev,
-				     struct tc_block_offload *f)
+				     struct flow_block_offload *f)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	struct nfp_flower_repr_priv *repr_priv;
+	struct flow_block_cb *block_cb;
 
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	repr_priv = repr->app_priv;
+	repr_priv->block_shared = f->block_shared;
+	f->driver_block_list = &nfp_block_cb_list;
+
 	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     nfp_flower_setup_tc_block_cb,
-					     repr, repr, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block,
-					nfp_flower_setup_tc_block_cb,
-					repr);
+	case FLOW_BLOCK_BIND:
+		if (flow_block_cb_is_busy(nfp_flower_setup_tc_block_cb, repr,
+					  &nfp_block_cb_list))
+			return -EBUSY;
+
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb,
+					       repr, repr, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block,
+						nfp_flower_setup_tc_block_cb,
+						repr);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -707,3 +1521,154 @@
 		return -EOPNOTSUPP;
 	}
 }
+
+struct nfp_flower_indr_block_cb_priv {
+	struct net_device *netdev;
+	struct nfp_app *app;
+	struct list_head list;
+};
+
+static struct nfp_flower_indr_block_cb_priv *
+nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app,
+				     struct net_device *netdev)
+{
+	struct nfp_flower_indr_block_cb_priv *cb_priv;
+	struct nfp_flower_priv *priv = app->priv;
+
+	/* All callback list access should be protected by RTNL. */
+	ASSERT_RTNL();
+
+	list_for_each_entry(cb_priv, &priv->indr_block_cb_priv, list)
+		if (cb_priv->netdev == netdev)
+			return cb_priv;
+
+	return NULL;
+}
+
+static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
+{
+	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
+	struct flow_cls_offload *flower = type_data;
+
+	if (flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return nfp_flower_repr_offload(priv->app, priv->netdev,
+					       type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void nfp_flower_setup_indr_tc_release(void *cb_priv)
+{
+	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
+
+	list_del(&priv->list);
+	kfree(priv);
+}
+
+static int
+nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
+			       struct flow_block_offload *f)
+{
+	struct nfp_flower_indr_block_cb_priv *cb_priv;
+	struct nfp_flower_priv *priv = app->priv;
+	struct flow_block_cb *block_cb;
+
+	if ((f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+	     !nfp_flower_internal_port_can_offload(app, netdev)) ||
+	    (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
+	     nfp_flower_internal_port_can_offload(app, netdev)))
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev);
+		if (cb_priv &&
+		    flow_block_cb_is_busy(nfp_flower_setup_indr_block_cb,
+					  cb_priv,
+					  &nfp_block_cb_list))
+			return -EBUSY;
+
+		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
+		if (!cb_priv)
+			return -ENOMEM;
+
+		cb_priv->netdev = netdev;
+		cb_priv->app = app;
+		list_add(&cb_priv->list, &priv->indr_block_cb_priv);
+
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb,
+					       cb_priv, cb_priv,
+					       nfp_flower_setup_indr_tc_release);
+		if (IS_ERR(block_cb)) {
+			list_del(&cb_priv->list);
+			kfree(cb_priv);
+			return PTR_ERR(block_cb);
+		}
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev);
+		if (!cb_priv)
+			return -ENOENT;
+
+		block_cb = flow_block_cb_lookup(f->block,
+						nfp_flower_setup_indr_block_cb,
+						cb_priv);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int
+nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+			    enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return nfp_flower_setup_indr_tc_block(netdev, cb_priv,
+						      type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
+				       struct net_device *netdev,
+				       unsigned long event)
+{
+	int err;
+
+	if (!nfp_fl_is_netdev_to_offload(netdev))
+		return NOTIFY_OK;
+
+	if (event == NETDEV_REGISTER) {
+		err = __flow_indr_block_cb_register(netdev, app,
+						    nfp_flower_indr_setup_tc_cb,
+						    app);
+		if (err)
+			nfp_flower_cmsg_warn(app,
+					     "Indirect block reg failed - %s\n",
+					     netdev->name);
+	} else if (event == NETDEV_UNREGISTER) {
+		__flow_indr_block_cb_unregister(netdev,
+						nfp_flower_indr_setup_tc_cb,
+						app);
+	}
+
+	return NOTIFY_OK;
+}

diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
new file mode 100644
index 0000000..124a43d
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c

@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/math64.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_port.h"
+
+#define NFP_FL_QOS_UPDATE		msecs_to_jiffies(1000)
+
+struct nfp_police_cfg_head {
+	__be32 flags_opts;
+	__be32 port;
+};
+
+/* Police cmsg for configuring a trTCM traffic conditioner (8W/32B)
+ * See RFC 2698 for more details.
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Flag options                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Port Ingress                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        Token Bucket Peak                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     Token Bucket Committed                    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         Peak Burst Size                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Committed Burst Size                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Peak Information Rate                    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                    Committed Information Rate                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_police_config {
+	struct nfp_police_cfg_head head;
+	__be32 bkt_tkn_p;
+	__be32 bkt_tkn_c;
+	__be32 pbs;
+	__be32 cbs;
+	__be32 pir;
+	__be32 cir;
+};
+
+struct nfp_police_stats_reply {
+	struct nfp_police_cfg_head head;
+	__be64 pass_bytes;
+	__be64 pass_pkts;
+	__be64 drop_bytes;
+	__be64 drop_pkts;
+};
+
+static int
+nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
+				struct tc_cls_matchall_offload *flow,
+				struct netlink_ext_ack *extack)
+{
+	struct flow_action_entry *action = &flow->rule->action.entries[0];
+	struct nfp_flower_priv *fl_priv = app->priv;
+	struct nfp_flower_repr_priv *repr_priv;
+	struct nfp_police_config *config;
+	struct nfp_repr *repr;
+	struct sk_buff *skb;
+	u32 netdev_port_id;
+	u64 burst, rate;
+
+	if (!nfp_netdev_is_nfp_repr(netdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port");
+		return -EOPNOTSUPP;
+	}
+	repr = netdev_priv(netdev);
+	repr_priv = repr->app_priv;
+
+	if (repr_priv->block_shared) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on shared blocks");
+		return -EOPNOTSUPP;
+	}
+
+	if (repr->port->type != NFP_PORT_VF_PORT) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on non-VF ports");
+		return -EOPNOTSUPP;
+	}
+
+	if (!flow_offload_has_one_action(&flow->rule->action)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires a single action");
+		return -EOPNOTSUPP;
+	}
+
+	if (flow->common.prio != 1) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority");
+		return -EOPNOTSUPP;
+	}
+
+	if (action->id != FLOW_ACTION_POLICE) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires police action");
+		return -EOPNOTSUPP;
+	}
+
+	rate = action->police.rate_bytes_ps;
+	burst = div_u64(rate * PSCHED_NS2TICKS(action->police.burst),
+			PSCHED_TICKS_PER_SEC);
+	netdev_port_id = nfp_repr_get_port_id(netdev);
+
+	skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
+				    NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	config = nfp_flower_cmsg_get_data(skb);
+	memset(config, 0, sizeof(struct nfp_police_config));
+	config->head.port = cpu_to_be32(netdev_port_id);
+	config->bkt_tkn_p = cpu_to_be32(burst);
+	config->bkt_tkn_c = cpu_to_be32(burst);
+	config->pbs = cpu_to_be32(burst);
+	config->cbs = cpu_to_be32(burst);
+	config->pir = cpu_to_be32(rate);
+	config->cir = cpu_to_be32(rate);
+	nfp_ctrl_tx(repr->app->ctrl, skb);
+
+	repr_priv->qos_table.netdev_port_id = netdev_port_id;
+	fl_priv->qos_rate_limiters++;
+	if (fl_priv->qos_rate_limiters == 1)
+		schedule_delayed_work(&fl_priv->qos_stats_work,
+				      NFP_FL_QOS_UPDATE);
+
+	return 0;
+}
+
+static int
+nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev,
+			       struct tc_cls_matchall_offload *flow,
+			       struct netlink_ext_ack *extack)
+{
+	struct nfp_flower_priv *fl_priv = app->priv;
+	struct nfp_flower_repr_priv *repr_priv;
+	struct nfp_police_config *config;
+	struct nfp_repr *repr;
+	struct sk_buff *skb;
+	u32 netdev_port_id;
+
+	if (!nfp_netdev_is_nfp_repr(netdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port");
+		return -EOPNOTSUPP;
+	}
+	repr = netdev_priv(netdev);
+
+	netdev_port_id = nfp_repr_get_port_id(netdev);
+	repr_priv = repr->app_priv;
+
+	if (!repr_priv->qos_table.netdev_port_id) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot remove qos entry that does not exist");
+		return -EOPNOTSUPP;
+	}
+
+	skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
+				    NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Clear all qos associate data for this interface */
+	memset(&repr_priv->qos_table, 0, sizeof(struct nfp_fl_qos));
+	fl_priv->qos_rate_limiters--;
+	if (!fl_priv->qos_rate_limiters)
+		cancel_delayed_work_sync(&fl_priv->qos_stats_work);
+
+	config = nfp_flower_cmsg_get_data(skb);
+	memset(config, 0, sizeof(struct nfp_police_config));
+	config->head.port = cpu_to_be32(netdev_port_id);
+	nfp_ctrl_tx(repr->app->ctrl, skb);
+
+	return 0;
+}
+
+void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_flower_priv *fl_priv = app->priv;
+	struct nfp_flower_repr_priv *repr_priv;
+	struct nfp_police_stats_reply *msg;
+	struct nfp_stat_pair *curr_stats;
+	struct nfp_stat_pair *prev_stats;
+	struct net_device *netdev;
+	struct nfp_repr *repr;
+	u32 netdev_port_id;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	netdev_port_id = be32_to_cpu(msg->head.port);
+	rcu_read_lock();
+	netdev = nfp_app_dev_get(app, netdev_port_id, NULL);
+	if (!netdev)
+		goto exit_unlock_rcu;
+
+	repr = netdev_priv(netdev);
+	repr_priv = repr->app_priv;
+	curr_stats = &repr_priv->qos_table.curr_stats;
+	prev_stats = &repr_priv->qos_table.prev_stats;
+
+	spin_lock_bh(&fl_priv->qos_stats_lock);
+	curr_stats->pkts = be64_to_cpu(msg->pass_pkts) +
+			   be64_to_cpu(msg->drop_pkts);
+	curr_stats->bytes = be64_to_cpu(msg->pass_bytes) +
+			    be64_to_cpu(msg->drop_bytes);
+
+	if (!repr_priv->qos_table.last_update) {
+		prev_stats->pkts = curr_stats->pkts;
+		prev_stats->bytes = curr_stats->bytes;
+	}
+
+	repr_priv->qos_table.last_update = jiffies;
+	spin_unlock_bh(&fl_priv->qos_stats_lock);
+
+exit_unlock_rcu:
+	rcu_read_unlock();
+}
+
+static void
+nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv,
+			      u32 netdev_port_id)
+{
+	struct nfp_police_cfg_head *head;
+	struct sk_buff *skb;
+
+	skb = nfp_flower_cmsg_alloc(fl_priv->app,
+				    sizeof(struct nfp_police_cfg_head),
+				    NFP_FLOWER_CMSG_TYPE_QOS_STATS,
+				    GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	head = nfp_flower_cmsg_get_data(skb);
+	memset(head, 0, sizeof(struct nfp_police_cfg_head));
+	head->port = cpu_to_be32(netdev_port_id);
+
+	nfp_ctrl_tx(fl_priv->app->ctrl, skb);
+}
+
+static void
+nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv)
+{
+	struct nfp_reprs *repr_set;
+	int i;
+
+	rcu_read_lock();
+	repr_set = rcu_dereference(fl_priv->app->reprs[NFP_REPR_TYPE_VF]);
+	if (!repr_set)
+		goto exit_unlock_rcu;
+
+	for (i = 0; i < repr_set->num_reprs; i++) {
+		struct net_device *netdev;
+
+		netdev = rcu_dereference(repr_set->reprs[i]);
+		if (netdev) {
+			struct nfp_repr *priv = netdev_priv(netdev);
+			struct nfp_flower_repr_priv *repr_priv;
+			u32 netdev_port_id;
+
+			repr_priv = priv->app_priv;
+			netdev_port_id = repr_priv->qos_table.netdev_port_id;
+			if (!netdev_port_id)
+				continue;
+
+			nfp_flower_stats_rlim_request(fl_priv, netdev_port_id);
+		}
+	}
+
+exit_unlock_rcu:
+	rcu_read_unlock();
+}
+
+static void update_stats_cache(struct work_struct *work)
+{
+	struct delayed_work *delayed_work;
+	struct nfp_flower_priv *fl_priv;
+
+	delayed_work = to_delayed_work(work);
+	fl_priv = container_of(delayed_work, struct nfp_flower_priv,
+			       qos_stats_work);
+
+	nfp_flower_stats_rlim_request_all(fl_priv);
+	schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE);
+}
+
+static int
+nfp_flower_stats_rate_limiter(struct nfp_app *app, struct net_device *netdev,
+			      struct tc_cls_matchall_offload *flow,
+			      struct netlink_ext_ack *extack)
+{
+	struct nfp_flower_priv *fl_priv = app->priv;
+	struct nfp_flower_repr_priv *repr_priv;
+	struct nfp_stat_pair *curr_stats;
+	struct nfp_stat_pair *prev_stats;
+	u64 diff_bytes, diff_pkts;
+	struct nfp_repr *repr;
+
+	if (!nfp_netdev_is_nfp_repr(netdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port");
+		return -EOPNOTSUPP;
+	}
+	repr = netdev_priv(netdev);
+
+	repr_priv = repr->app_priv;
+	if (!repr_priv->qos_table.netdev_port_id) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot find qos entry for stats update");
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_bh(&fl_priv->qos_stats_lock);
+	curr_stats = &repr_priv->qos_table.curr_stats;
+	prev_stats = &repr_priv->qos_table.prev_stats;
+	diff_pkts = curr_stats->pkts - prev_stats->pkts;
+	diff_bytes = curr_stats->bytes - prev_stats->bytes;
+	prev_stats->pkts = curr_stats->pkts;
+	prev_stats->bytes = curr_stats->bytes;
+	spin_unlock_bh(&fl_priv->qos_stats_lock);
+
+	flow_stats_update(&flow->stats, diff_bytes, diff_pkts,
+			  repr_priv->qos_table.last_update);
+	return 0;
+}
+
+void nfp_flower_qos_init(struct nfp_app *app)
+{
+	struct nfp_flower_priv *fl_priv = app->priv;
+
+	spin_lock_init(&fl_priv->qos_stats_lock);
+	INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache);
+}
+
+void nfp_flower_qos_cleanup(struct nfp_app *app)
+{
+	struct nfp_flower_priv *fl_priv = app->priv;
+
+	cancel_delayed_work_sync(&fl_priv->qos_stats_work);
+}
+
+int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev,
+				 struct tc_cls_matchall_offload *flow)
+{
+	struct netlink_ext_ack *extack = flow->common.extack;
+	struct nfp_flower_priv *fl_priv = app->priv;
+
+	if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support qos rate limit offload");
+		return -EOPNOTSUPP;
+	}
+
+	switch (flow->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return nfp_flower_install_rate_limiter(app, netdev, flow,
+						       extack);
+	case TC_CLSMATCHALL_DESTROY:
+		return nfp_flower_remove_rate_limiter(app, netdev, flow,
+						      extack);
+	case TC_CLSMATCHALL_STATS:
+		return nfp_flower_stats_rate_limiter(app, netdev, flow,
+						     extack);
+	default:
+		return -EOPNOTSUPP;
+	}
+}

diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 382bb93..2600ce4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
@@ -45,6 +15,24 @@
 
 #define NFP_FL_MAX_ROUTES               32
 
+#define NFP_TUN_PRE_TUN_RULE_LIMIT	32
+#define NFP_TUN_PRE_TUN_RULE_DEL	0x1
+#define NFP_TUN_PRE_TUN_IDX_BIT		0x8
+
+/**
+ * struct nfp_tun_pre_run_rule - rule matched before decap
+ * @flags:		options for the rule offset
+ * @port_idx:		index of destination MAC address for the rule
+ * @vlan_tci:		VLAN info associated with MAC
+ * @host_ctx_id:	stats context of rule to update
+ */
+struct nfp_tun_pre_tun_rule {
+	__be32 flags;
+	__be16 port_idx;
+	__be16 vlan_tci;
+	__be32 host_ctx_id;
+};
+
 /**
  * struct nfp_tun_active_tuns - periodic message of active tunnels
  * @seq:		sequence number of the message
@@ -128,47 +116,53 @@
 	struct list_head list;
 };
 
-/**
- * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
- * @reserved:	reserved for future use
- * @count:	number of MAC addresses in the message
- * @addresses.index:	index of MAC address in the lookup table
- * @addresses.addr:	interface MAC address
- * @addresses:	series of MACs to offload
- */
-struct nfp_tun_mac_addr {
-	__be16 reserved;
-	__be16 count;
-	struct index_mac_addr {
-		__be16 index;
-		u8 addr[ETH_ALEN];
-	} addresses[];
-};
+#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG	0x2
 
 /**
- * struct nfp_tun_mac_offload_entry - list of MACs to offload
- * @index:	index of MAC address for offloading
+ * struct nfp_tun_mac_addr_offload - configure MAC address of tunnel EP on NFP
+ * @flags:	MAC address offload options
+ * @count:	number of MAC addresses in the message (should be 1)
+ * @index:	index of MAC address in the lookup table
  * @addr:	interface MAC address
- * @list:	list pointer
  */
-struct nfp_tun_mac_offload_entry {
+struct nfp_tun_mac_addr_offload {
+	__be16 flags;
+	__be16 count;
 	__be16 index;
 	u8 addr[ETH_ALEN];
-	struct list_head list;
+};
+
+enum nfp_flower_mac_offload_cmd {
+	NFP_TUNNEL_MAC_OFFLOAD_ADD =		0,
+	NFP_TUNNEL_MAC_OFFLOAD_DEL =		1,
+	NFP_TUNNEL_MAC_OFFLOAD_MOD =		2,
 };
 
 #define NFP_MAX_MAC_INDEX       0xff
 
 /**
- * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id
- * @ifindex:	netdev ifindex of the device
- * @index:	index of netdevs mac on NFP
- * @list:	list pointer
+ * struct nfp_tun_offloaded_mac - hashtable entry for an offloaded MAC
+ * @ht_node:		Hashtable entry
+ * @addr:		Offloaded MAC address
+ * @index:		Offloaded index for given MAC address
+ * @ref_count:		Number of devs using this MAC address
+ * @repr_list:		List of reprs sharing this MAC address
+ * @bridge_count:	Number of bridge/internal devs with MAC
  */
-struct nfp_tun_mac_non_nfp_idx {
-	int ifindex;
-	u8 index;
-	struct list_head list;
+struct nfp_tun_offloaded_mac {
+	struct rhash_head ht_node;
+	u8 addr[ETH_ALEN];
+	u16 index;
+	int ref_count;
+	struct list_head repr_list;
+	int bridge_count;
+};
+
+static const struct rhashtable_params offloaded_macs_params = {
+	.key_offset	= offsetof(struct nfp_tun_offloaded_mac, addr),
+	.head_offset	= offsetof(struct nfp_tun_offloaded_mac, ht_node),
+	.key_len	= ETH_ALEN,
+	.automatic_shrinking	= true,
 };
 
 void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
@@ -188,16 +182,16 @@
 	}
 
 	pay_len = nfp_flower_cmsg_get_data_len(skb);
-	if (pay_len != sizeof(struct nfp_tun_active_tuns) +
-	    sizeof(struct route_ip_info) * count) {
+	if (pay_len != struct_size(payload, tun_info, count)) {
 		nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
 		return;
 	}
 
+	rcu_read_lock();
 	for (i = 0; i < count; i++) {
 		ipv4_addr = payload->tun_info[i].ipv4;
 		port = be32_to_cpu(payload->tun_info[i].egress_port);
-		netdev = nfp_app_repr_get(app, port);
+		netdev = nfp_app_dev_get(app, port, NULL);
 		if (!netdev)
 			continue;
 
@@ -209,18 +203,7 @@
 		neigh_event_send(n, NULL);
 		neigh_release(n);
 	}
-}
-
-static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
-{
-	if (!netdev->rtnl_link_ops)
-		return false;
-	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
-		return true;
-	if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
-		return true;
-
-	return false;
+	rcu_read_unlock();
 }
 
 static int
@@ -247,15 +230,15 @@
 	struct nfp_ipv4_route_entry *entry;
 	struct list_head *ptr, *storage;
 
-	spin_lock_bh(&priv->nfp_neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+	spin_lock_bh(&priv->tun.neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
 		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
 		if (entry->ipv4_addr == ipv4_addr) {
-			spin_unlock_bh(&priv->nfp_neigh_off_lock);
+			spin_unlock_bh(&priv->tun.neigh_off_lock);
 			return true;
 		}
 	}
-	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+	spin_unlock_bh(&priv->tun.neigh_off_lock);
 	return false;
 }
 
@@ -265,24 +248,24 @@
 	struct nfp_ipv4_route_entry *entry;
 	struct list_head *ptr, *storage;
 
-	spin_lock_bh(&priv->nfp_neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+	spin_lock_bh(&priv->tun.neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
 		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
 		if (entry->ipv4_addr == ipv4_addr) {
-			spin_unlock_bh(&priv->nfp_neigh_off_lock);
+			spin_unlock_bh(&priv->tun.neigh_off_lock);
 			return;
 		}
 	}
 	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry) {
-		spin_unlock_bh(&priv->nfp_neigh_off_lock);
+		spin_unlock_bh(&priv->tun.neigh_off_lock);
 		nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
 		return;
 	}
 
 	entry->ipv4_addr = ipv4_addr;
-	list_add_tail(&entry->list, &priv->nfp_neigh_off_list);
-	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+	list_add_tail(&entry->list, &priv->tun.neigh_off_list);
+	spin_unlock_bh(&priv->tun.neigh_off_lock);
 }
 
 static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
@@ -291,8 +274,8 @@
 	struct nfp_ipv4_route_entry *entry;
 	struct list_head *ptr, *storage;
 
-	spin_lock_bh(&priv->nfp_neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+	spin_lock_bh(&priv->tun.neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
 		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
 		if (entry->ipv4_addr == ipv4_addr) {
 			list_del(&entry->list);
@@ -300,7 +283,7 @@
 			break;
 		}
 	}
-	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+	spin_unlock_bh(&priv->tun.neigh_off_lock);
 }
 
 static void
@@ -308,9 +291,10 @@
 		    struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
 {
 	struct nfp_tun_neigh payload;
+	u32 port_id;
 
-	/* Only offload representor IPv4s for now. */
-	if (!nfp_netdev_is_nfp_repr(netdev))
+	port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
+	if (!port_id)
 		return;
 
 	memset(&payload, 0, sizeof(struct nfp_tun_neigh));
@@ -328,7 +312,7 @@
 	payload.src_ipv4 = flow->saddr;
 	ether_addr_copy(payload.src_addr, netdev->dev_addr);
 	neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
-	payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev));
+	payload.port_id = cpu_to_be32(port_id);
 	/* Add destination of new route to NFP cache. */
 	nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
 
@@ -364,13 +348,13 @@
 
 	flow.daddr = *(__be32 *)n->primary_key;
 
-	/* Only concerned with route changes for representors. */
-	if (!nfp_netdev_is_nfp_repr(n->dev))
-		return NOTIFY_DONE;
-
-	app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb);
+	app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
 	app = app_priv->app;
 
+	if (!nfp_netdev_is_nfp_repr(n->dev) &&
+	    !nfp_flower_internal_port_can_offload(app, n->dev))
+		return NOTIFY_DONE;
+
 	/* Only concerned with changes to routes already added to NFP. */
 	if (!nfp_tun_has_route(app, flow.daddr))
 		return NOTIFY_DONE;
@@ -404,9 +388,10 @@
 
 	payload = nfp_flower_cmsg_get_data(skb);
 
-	netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port));
+	rcu_read_lock();
+	netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
 	if (!netdev)
-		goto route_fail_warning;
+		goto fail_rcu_unlock;
 
 	flow.daddr = payload->ipv4_addr;
 	flow.flowi4_proto = IPPROTO_UDP;
@@ -416,21 +401,23 @@
 	rt = ip_route_output_key(dev_net(netdev), &flow);
 	err = PTR_ERR_OR_ZERO(rt);
 	if (err)
-		goto route_fail_warning;
+		goto fail_rcu_unlock;
 #else
-	goto route_fail_warning;
+	goto fail_rcu_unlock;
 #endif
 
 	/* Get the neighbour entry for the lookup */
 	n = dst_neigh_lookup(&rt->dst, &flow.daddr);
 	ip_rt_put(rt);
 	if (!n)
-		goto route_fail_warning;
-	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL);
+		goto fail_rcu_unlock;
+	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
 	neigh_release(n);
+	rcu_read_unlock();
 	return;
 
-route_fail_warning:
+fail_rcu_unlock:
+	rcu_read_unlock();
 	nfp_flower_cmsg_warn(app, "Requested route not found.\n");
 }
 
@@ -443,11 +430,11 @@
 	int count;
 
 	memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr));
-	mutex_lock(&priv->nfp_ipv4_off_lock);
+	mutex_lock(&priv->tun.ipv4_off_lock);
 	count = 0;
-	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+	list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
 		if (count >= NFP_FL_IPV4_ADDRS_MAX) {
-			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			mutex_unlock(&priv->tun.ipv4_off_lock);
 			nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n");
 			return;
 		}
@@ -455,7 +442,7 @@
 		payload.ipv4_addr[count++] = entry->ipv4_addr;
 	}
 	payload.count = cpu_to_be32(count);
-	mutex_unlock(&priv->nfp_ipv4_off_lock);
+	mutex_unlock(&priv->tun.ipv4_off_lock);
 
 	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS,
 				 sizeof(struct nfp_tun_ipv4_addr),
@@ -468,26 +455,26 @@
 	struct nfp_ipv4_addr_entry *entry;
 	struct list_head *ptr, *storage;
 
-	mutex_lock(&priv->nfp_ipv4_off_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+	mutex_lock(&priv->tun.ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
 		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
 		if (entry->ipv4_addr == ipv4) {
 			entry->ref_count++;
-			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			mutex_unlock(&priv->tun.ipv4_off_lock);
 			return;
 		}
 	}
 
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry) {
-		mutex_unlock(&priv->nfp_ipv4_off_lock);
+		mutex_unlock(&priv->tun.ipv4_off_lock);
 		nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
 		return;
 	}
 	entry->ipv4_addr = ipv4;
 	entry->ref_count = 1;
-	list_add_tail(&entry->list, &priv->nfp_ipv4_off_list);
-	mutex_unlock(&priv->nfp_ipv4_off_lock);
+	list_add_tail(&entry->list, &priv->tun.ipv4_off_list);
+	mutex_unlock(&priv->tun.ipv4_off_lock);
 
 	nfp_tun_write_ipv4_list(app);
 }
@@ -498,8 +485,8 @@
 	struct nfp_ipv4_addr_entry *entry;
 	struct list_head *ptr, *storage;
 
-	mutex_lock(&priv->nfp_ipv4_off_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+	mutex_lock(&priv->tun.ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
 		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
 		if (entry->ipv4_addr == ipv4) {
 			entry->ref_count--;
@@ -510,297 +497,568 @@
 			break;
 		}
 	}
-	mutex_unlock(&priv->nfp_ipv4_off_lock);
+	mutex_unlock(&priv->tun.ipv4_off_lock);
 
 	nfp_tun_write_ipv4_list(app);
 }
 
-void nfp_tunnel_write_macs(struct nfp_app *app)
+static int
+__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del)
 {
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_tun_mac_offload_entry *entry;
-	struct nfp_tun_mac_addr *payload;
-	struct list_head *ptr, *storage;
-	int mac_count, err, pay_size;
+	struct nfp_tun_mac_addr_offload payload;
 
-	mutex_lock(&priv->nfp_mac_off_lock);
-	if (!priv->nfp_mac_off_count) {
-		mutex_unlock(&priv->nfp_mac_off_lock);
-		return;
-	}
+	memset(&payload, 0, sizeof(payload));
 
-	pay_size = sizeof(struct nfp_tun_mac_addr) +
-		   sizeof(struct index_mac_addr) * priv->nfp_mac_off_count;
+	if (del)
+		payload.flags = cpu_to_be16(NFP_TUN_MAC_OFFLOAD_DEL_FLAG);
 
-	payload = kzalloc(pay_size, GFP_KERNEL);
-	if (!payload) {
-		mutex_unlock(&priv->nfp_mac_off_lock);
-		return;
-	}
+	/* FW supports multiple MACs per cmsg but restrict to single. */
+	payload.count = cpu_to_be16(1);
+	payload.index = cpu_to_be16(idx);
+	ether_addr_copy(payload.addr, mac);
 
-	payload->count = cpu_to_be16(priv->nfp_mac_off_count);
-
-	mac_count = 0;
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
-		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
-				   list);
-		payload->addresses[mac_count].index = entry->index;
-		ether_addr_copy(payload->addresses[mac_count].addr,
-				entry->addr);
-		mac_count++;
-	}
-
-	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
-				       pay_size, payload, GFP_KERNEL);
-
-	kfree(payload);
-
-	if (err) {
-		mutex_unlock(&priv->nfp_mac_off_lock);
-		/* Write failed so retain list for future retry. */
-		return;
-	}
-
-	/* If list was successfully offloaded, flush it. */
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
-		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
-				   list);
-		list_del(&entry->list);
-		kfree(entry);
-	}
-
-	priv->nfp_mac_off_count = 0;
-	mutex_unlock(&priv->nfp_mac_off_lock);
+	return nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
+					sizeof(struct nfp_tun_mac_addr_offload),
+					&payload, GFP_KERNEL);
 }
 
-static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex)
+static bool nfp_tunnel_port_is_phy_repr(int port)
 {
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_tun_mac_non_nfp_idx *entry;
-	struct list_head *ptr, *storage;
-	int idx;
-
-	mutex_lock(&priv->nfp_mac_index_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
-		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
-		if (entry->ifindex == ifindex) {
-			idx = entry->index;
-			mutex_unlock(&priv->nfp_mac_index_lock);
-			return idx;
-		}
-	}
-
-	idx = ida_simple_get(&priv->nfp_mac_off_ids, 0,
-			     NFP_MAX_MAC_INDEX, GFP_KERNEL);
-	if (idx < 0) {
-		mutex_unlock(&priv->nfp_mac_index_lock);
-		return idx;
-	}
-
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry) {
-		mutex_unlock(&priv->nfp_mac_index_lock);
-		return -ENOMEM;
-	}
-	entry->ifindex = ifindex;
-	entry->index = idx;
-	list_add_tail(&entry->list, &priv->nfp_mac_index_list);
-	mutex_unlock(&priv->nfp_mac_index_lock);
-
-	return idx;
-}
-
-static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex)
-{
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_tun_mac_non_nfp_idx *entry;
-	struct list_head *ptr, *storage;
-
-	mutex_lock(&priv->nfp_mac_index_lock);
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
-		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
-		if (entry->ifindex == ifindex) {
-			ida_simple_remove(&priv->nfp_mac_off_ids,
-					  entry->index);
-			list_del(&entry->list);
-			kfree(entry);
-			break;
-		}
-	}
-	mutex_unlock(&priv->nfp_mac_index_lock);
-}
-
-static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev,
-					    struct nfp_app *app)
-{
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_tun_mac_offload_entry *entry;
-	u16 nfp_mac_idx;
-	int port = 0;
-
-	/* Check if MAC should be offloaded. */
-	if (!is_valid_ether_addr(netdev->dev_addr))
-		return;
-
-	if (nfp_netdev_is_nfp_repr(netdev))
-		port = nfp_repr_get_port_id(netdev);
-	else if (!nfp_tun_is_netdev_to_offload(netdev))
-		return;
-
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry) {
-		nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n");
-		return;
-	}
-
 	if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
-	    NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) {
-		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
-	} else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
-		   NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) {
-		port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port);
-		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT;
-	} else {
-		/* Must assign our own unique 8-bit index. */
-		int idx = nfp_tun_get_mac_idx(app, netdev->ifindex);
+	    NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT)
+		return true;
 
-		if (idx < 0) {
-			nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n");
-			kfree(entry);
-			return;
-		}
-		nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
-	}
-
-	entry->index = cpu_to_be16(nfp_mac_idx);
-	ether_addr_copy(entry->addr, netdev->dev_addr);
-
-	mutex_lock(&priv->nfp_mac_off_lock);
-	priv->nfp_mac_off_count++;
-	list_add_tail(&entry->list, &priv->nfp_mac_off_list);
-	mutex_unlock(&priv->nfp_mac_off_lock);
+	return false;
 }
 
-static int nfp_tun_mac_event_handler(struct notifier_block *nb,
-				     unsigned long event, void *ptr)
+static u16 nfp_tunnel_get_mac_idx_from_phy_port_id(int port)
 {
-	struct nfp_flower_priv *app_priv;
-	struct net_device *netdev;
-	struct nfp_app *app;
+	return port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
+}
 
-	if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) {
-		app_priv = container_of(nb, struct nfp_flower_priv,
-					nfp_tun_mac_nb);
-		app = app_priv->app;
-		netdev = netdev_notifier_info_to_dev(ptr);
+static u16 nfp_tunnel_get_global_mac_idx_from_ida(int id)
+{
+	return id << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
+}
 
-		/* If non-nfp netdev then free its offload index. */
-		if (nfp_tun_is_netdev_to_offload(netdev))
-			nfp_tun_del_mac_idx(app, netdev->ifindex);
-	} else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR ||
-		   event == NETDEV_REGISTER) {
-		app_priv = container_of(nb, struct nfp_flower_priv,
-					nfp_tun_mac_nb);
-		app = app_priv->app;
-		netdev = netdev_notifier_info_to_dev(ptr);
+static int nfp_tunnel_get_ida_from_global_mac_idx(u16 nfp_mac_idx)
+{
+	return nfp_mac_idx >> 8;
+}
 
-		nfp_tun_add_to_mac_offload_list(netdev, app);
+static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx)
+{
+	return (nfp_mac_idx & 0xff) == NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
+}
 
-		/* Force a list write to keep NFP up to date. */
-		nfp_tunnel_write_macs(app);
+static struct nfp_tun_offloaded_mac *
+nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, u8 *mac)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	return rhashtable_lookup_fast(&priv->tun.offloaded_macs, mac,
+				      offloaded_macs_params);
+}
+
+static void
+nfp_tunnel_offloaded_macs_inc_ref_and_link(struct nfp_tun_offloaded_mac *entry,
+					   struct net_device *netdev, bool mod)
+{
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		struct nfp_flower_repr_priv *repr_priv;
+		struct nfp_repr *repr;
+
+		repr = netdev_priv(netdev);
+		repr_priv = repr->app_priv;
+
+		/* If modifing MAC, remove repr from old list first. */
+		if (mod)
+			list_del(&repr_priv->mac_list);
+
+		list_add_tail(&repr_priv->mac_list, &entry->repr_list);
+	} else if (nfp_flower_is_supported_bridge(netdev)) {
+		entry->bridge_count++;
+	}
+
+	entry->ref_count++;
+}
+
+static int
+nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
+			  int port, bool mod)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	int ida_idx = NFP_MAX_MAC_INDEX, err;
+	struct nfp_tun_offloaded_mac *entry;
+	u16 nfp_mac_idx = 0;
+
+	entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
+	if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) {
+		if (entry->bridge_count ||
+		    !nfp_flower_is_supported_bridge(netdev)) {
+			nfp_tunnel_offloaded_macs_inc_ref_and_link(entry,
+								   netdev, mod);
+			return 0;
+		}
+
+		/* MAC is global but matches need to go to pre_tun table. */
+		nfp_mac_idx = entry->index | NFP_TUN_PRE_TUN_IDX_BIT;
+	}
+
+	if (!nfp_mac_idx) {
+		/* Assign a global index if non-repr or MAC is now shared. */
+		if (entry || !port) {
+			ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0,
+						 NFP_MAX_MAC_INDEX, GFP_KERNEL);
+			if (ida_idx < 0)
+				return ida_idx;
+
+			nfp_mac_idx =
+				nfp_tunnel_get_global_mac_idx_from_ida(ida_idx);
+
+			if (nfp_flower_is_supported_bridge(netdev))
+				nfp_mac_idx |= NFP_TUN_PRE_TUN_IDX_BIT;
+
+		} else {
+			nfp_mac_idx =
+				nfp_tunnel_get_mac_idx_from_phy_port_id(port);
+		}
+	}
+
+	if (!entry) {
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry) {
+			err = -ENOMEM;
+			goto err_free_ida;
+		}
+
+		ether_addr_copy(entry->addr, netdev->dev_addr);
+		INIT_LIST_HEAD(&entry->repr_list);
+
+		if (rhashtable_insert_fast(&priv->tun.offloaded_macs,
+					   &entry->ht_node,
+					   offloaded_macs_params)) {
+			err = -ENOMEM;
+			goto err_free_entry;
+		}
+	}
+
+	err = __nfp_tunnel_offload_mac(app, netdev->dev_addr,
+				       nfp_mac_idx, false);
+	if (err) {
+		/* If not shared then free. */
+		if (!entry->ref_count)
+			goto err_remove_hash;
+		goto err_free_ida;
+	}
+
+	entry->index = nfp_mac_idx;
+	nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod);
+
+	return 0;
+
+err_remove_hash:
+	rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node,
+			       offloaded_macs_params);
+err_free_entry:
+	kfree(entry);
+err_free_ida:
+	if (ida_idx != NFP_MAX_MAC_INDEX)
+		ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
+
+	return err;
+}
+
+static int
+nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
+			  u8 *mac, bool mod)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_flower_repr_priv *repr_priv;
+	struct nfp_tun_offloaded_mac *entry;
+	struct nfp_repr *repr;
+	int ida_idx;
+
+	entry = nfp_tunnel_lookup_offloaded_macs(app, mac);
+	if (!entry)
+		return 0;
+
+	entry->ref_count--;
+	/* If del is part of a mod then mac_list is still in use elsewheree. */
+	if (nfp_netdev_is_nfp_repr(netdev) && !mod) {
+		repr = netdev_priv(netdev);
+		repr_priv = repr->app_priv;
+		list_del(&repr_priv->mac_list);
+	}
+
+	if (nfp_flower_is_supported_bridge(netdev)) {
+		entry->bridge_count--;
+
+		if (!entry->bridge_count && entry->ref_count) {
+			u16 nfp_mac_idx;
+
+			nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
+			if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx,
+						     false)) {
+				nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n",
+						     netdev_name(netdev));
+				return 0;
+			}
+
+			entry->index = nfp_mac_idx;
+			return 0;
+		}
+	}
+
+	/* If MAC is now used by 1 repr set the offloaded MAC index to port. */
+	if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) {
+		u16 nfp_mac_idx;
+		int port, err;
+
+		repr_priv = list_first_entry(&entry->repr_list,
+					     struct nfp_flower_repr_priv,
+					     mac_list);
+		repr = repr_priv->nfp_repr;
+		port = nfp_repr_get_port_id(repr->netdev);
+		nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port);
+		err = __nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false);
+		if (err) {
+			nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n",
+					     netdev_name(netdev));
+			return 0;
+		}
+
+		ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
+		ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
+		entry->index = nfp_mac_idx;
+		return 0;
+	}
+
+	if (entry->ref_count)
+		return 0;
+
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs,
+					    &entry->ht_node,
+					    offloaded_macs_params));
+	/* If MAC has global ID then extract and free the ida entry. */
+	if (nfp_tunnel_is_mac_idx_global(entry->index)) {
+		ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
+		ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
+	}
+
+	kfree(entry);
+
+	return __nfp_tunnel_offload_mac(app, mac, 0, true);
+}
+
+static int
+nfp_tunnel_offload_mac(struct nfp_app *app, struct net_device *netdev,
+		       enum nfp_flower_mac_offload_cmd cmd)
+{
+	struct nfp_flower_non_repr_priv *nr_priv = NULL;
+	bool non_repr = false, *mac_offloaded;
+	u8 *off_mac = NULL;
+	int err, port = 0;
+
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		struct nfp_flower_repr_priv *repr_priv;
+		struct nfp_repr *repr;
+
+		repr = netdev_priv(netdev);
+		if (repr->app != app)
+			return 0;
+
+		repr_priv = repr->app_priv;
+		if (repr_priv->on_bridge)
+			return 0;
+
+		mac_offloaded = &repr_priv->mac_offloaded;
+		off_mac = &repr_priv->offloaded_mac_addr[0];
+		port = nfp_repr_get_port_id(netdev);
+		if (!nfp_tunnel_port_is_phy_repr(port))
+			return 0;
+	} else if (nfp_fl_is_netdev_to_offload(netdev)) {
+		nr_priv = nfp_flower_non_repr_priv_get(app, netdev);
+		if (!nr_priv)
+			return -ENOMEM;
+
+		mac_offloaded = &nr_priv->mac_offloaded;
+		off_mac = &nr_priv->offloaded_mac_addr[0];
+		non_repr = true;
+	} else {
+		return 0;
+	}
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		err = -EINVAL;
+		goto err_put_non_repr_priv;
+	}
+
+	if (cmd == NFP_TUNNEL_MAC_OFFLOAD_MOD && !*mac_offloaded)
+		cmd = NFP_TUNNEL_MAC_OFFLOAD_ADD;
+
+	switch (cmd) {
+	case NFP_TUNNEL_MAC_OFFLOAD_ADD:
+		err = nfp_tunnel_add_shared_mac(app, netdev, port, false);
+		if (err)
+			goto err_put_non_repr_priv;
+
+		if (non_repr)
+			__nfp_flower_non_repr_priv_get(nr_priv);
+
+		*mac_offloaded = true;
+		ether_addr_copy(off_mac, netdev->dev_addr);
+		break;
+	case NFP_TUNNEL_MAC_OFFLOAD_DEL:
+		/* Only attempt delete if add was successful. */
+		if (!*mac_offloaded)
+			break;
+
+		if (non_repr)
+			__nfp_flower_non_repr_priv_put(nr_priv);
+
+		*mac_offloaded = false;
+
+		err = nfp_tunnel_del_shared_mac(app, netdev, netdev->dev_addr,
+						false);
+		if (err)
+			goto err_put_non_repr_priv;
+
+		break;
+	case NFP_TUNNEL_MAC_OFFLOAD_MOD:
+		/* Ignore if changing to the same address. */
+		if (ether_addr_equal(netdev->dev_addr, off_mac))
+			break;
+
+		err = nfp_tunnel_add_shared_mac(app, netdev, port, true);
+		if (err)
+			goto err_put_non_repr_priv;
+
+		/* Delete the previous MAC address. */
+		err = nfp_tunnel_del_shared_mac(app, netdev, off_mac, true);
+		if (err)
+			nfp_flower_cmsg_warn(app, "Failed to remove offload of replaced MAC addr on %s.\n",
+					     netdev_name(netdev));
+
+		ether_addr_copy(off_mac, netdev->dev_addr);
+		break;
+	default:
+		err = -EINVAL;
+		goto err_put_non_repr_priv;
+	}
+
+	if (non_repr)
+		__nfp_flower_non_repr_priv_put(nr_priv);
+
+	return 0;
+
+err_put_non_repr_priv:
+	if (non_repr)
+		__nfp_flower_non_repr_priv_put(nr_priv);
+
+	return err;
+}
+
+int nfp_tunnel_mac_event_handler(struct nfp_app *app,
+				 struct net_device *netdev,
+				 unsigned long event, void *ptr)
+{
+	int err;
+
+	if (event == NETDEV_DOWN) {
+		err = nfp_tunnel_offload_mac(app, netdev,
+					     NFP_TUNNEL_MAC_OFFLOAD_DEL);
+		if (err)
+			nfp_flower_cmsg_warn(app, "Failed to delete offload MAC on %s.\n",
+					     netdev_name(netdev));
+	} else if (event == NETDEV_UP) {
+		err = nfp_tunnel_offload_mac(app, netdev,
+					     NFP_TUNNEL_MAC_OFFLOAD_ADD);
+		if (err)
+			nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n",
+					     netdev_name(netdev));
+	} else if (event == NETDEV_CHANGEADDR) {
+		/* Only offload addr change if netdev is already up. */
+		if (!(netdev->flags & IFF_UP))
+			return NOTIFY_OK;
+
+		err = nfp_tunnel_offload_mac(app, netdev,
+					     NFP_TUNNEL_MAC_OFFLOAD_MOD);
+		if (err)
+			nfp_flower_cmsg_warn(app, "Failed to offload MAC change on %s.\n",
+					     netdev_name(netdev));
+	} else if (event == NETDEV_CHANGEUPPER) {
+		/* If a repr is attached to a bridge then tunnel packets
+		 * entering the physical port are directed through the bridge
+		 * datapath and cannot be directly detunneled. Therefore,
+		 * associated offloaded MACs and indexes should not be used
+		 * by fw for detunneling.
+		 */
+		struct netdev_notifier_changeupper_info *info = ptr;
+		struct net_device *upper = info->upper_dev;
+		struct nfp_flower_repr_priv *repr_priv;
+		struct nfp_repr *repr;
+
+		if (!nfp_netdev_is_nfp_repr(netdev) ||
+		    !nfp_flower_is_supported_bridge(upper))
+			return NOTIFY_OK;
+
+		repr = netdev_priv(netdev);
+		if (repr->app != app)
+			return NOTIFY_OK;
+
+		repr_priv = repr->app_priv;
+
+		if (info->linking) {
+			if (nfp_tunnel_offload_mac(app, netdev,
+						   NFP_TUNNEL_MAC_OFFLOAD_DEL))
+				nfp_flower_cmsg_warn(app, "Failed to delete offloaded MAC on %s.\n",
+						     netdev_name(netdev));
+			repr_priv->on_bridge = true;
+		} else {
+			repr_priv->on_bridge = false;
+
+			if (!(netdev->flags & IFF_UP))
+				return NOTIFY_OK;
+
+			if (nfp_tunnel_offload_mac(app, netdev,
+						   NFP_TUNNEL_MAC_OFFLOAD_ADD))
+				nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n",
+						     netdev_name(netdev));
+		}
 	}
 	return NOTIFY_OK;
 }
 
+int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
+				 struct nfp_fl_payload *flow)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_tun_offloaded_mac *mac_entry;
+	struct nfp_tun_pre_tun_rule payload;
+	struct net_device *internal_dev;
+	int err;
+
+	if (app_priv->pre_tun_rule_cnt == NFP_TUN_PRE_TUN_RULE_LIMIT)
+		return -ENOSPC;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule));
+
+	internal_dev = flow->pre_tun_rule.dev;
+	payload.vlan_tci = flow->pre_tun_rule.vlan_tci;
+	payload.host_ctx_id = flow->meta.host_ctx_id;
+
+	/* Lookup MAC index for the pre-tunnel rule egress device.
+	 * Note that because the device is always an internal port, it will
+	 * have a constant global index so does not need to be tracked.
+	 */
+	mac_entry = nfp_tunnel_lookup_offloaded_macs(app,
+						     internal_dev->dev_addr);
+	if (!mac_entry)
+		return -ENOENT;
+
+	payload.port_idx = cpu_to_be16(mac_entry->index);
+
+	/* Copy mac id and vlan to flow - dev may not exist at delete time. */
+	flow->pre_tun_rule.vlan_tci = payload.vlan_tci;
+	flow->pre_tun_rule.port_idx = payload.port_idx;
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE,
+				       sizeof(struct nfp_tun_pre_tun_rule),
+				       (unsigned char *)&payload, GFP_KERNEL);
+	if (err)
+		return err;
+
+	app_priv->pre_tun_rule_cnt++;
+
+	return 0;
+}
+
+int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app,
+				     struct nfp_fl_payload *flow)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_tun_pre_tun_rule payload;
+	u32 tmp_flags = 0;
+	int err;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_pre_tun_rule));
+
+	tmp_flags |= NFP_TUN_PRE_TUN_RULE_DEL;
+	payload.flags = cpu_to_be32(tmp_flags);
+	payload.vlan_tci = flow->pre_tun_rule.vlan_tci;
+	payload.port_idx = flow->pre_tun_rule.port_idx;
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE,
+				       sizeof(struct nfp_tun_pre_tun_rule),
+				       (unsigned char *)&payload, GFP_KERNEL);
+	if (err)
+		return err;
+
+	app_priv->pre_tun_rule_cnt--;
+
+	return 0;
+}
+
 int nfp_tunnel_config_start(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
-	struct net_device *netdev;
 	int err;
 
-	/* Initialise priv data for MAC offloading. */
-	priv->nfp_mac_off_count = 0;
-	mutex_init(&priv->nfp_mac_off_lock);
-	INIT_LIST_HEAD(&priv->nfp_mac_off_list);
-	priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler;
-	mutex_init(&priv->nfp_mac_index_lock);
-	INIT_LIST_HEAD(&priv->nfp_mac_index_list);
-	ida_init(&priv->nfp_mac_off_ids);
+	/* Initialise rhash for MAC offload tracking. */
+	err = rhashtable_init(&priv->tun.offloaded_macs,
+			      &offloaded_macs_params);
+	if (err)
+		return err;
+
+	ida_init(&priv->tun.mac_off_ids);
 
 	/* Initialise priv data for IPv4 offloading. */
-	mutex_init(&priv->nfp_ipv4_off_lock);
-	INIT_LIST_HEAD(&priv->nfp_ipv4_off_list);
+	mutex_init(&priv->tun.ipv4_off_lock);
+	INIT_LIST_HEAD(&priv->tun.ipv4_off_list);
 
 	/* Initialise priv data for neighbour offloading. */
-	spin_lock_init(&priv->nfp_neigh_off_lock);
-	INIT_LIST_HEAD(&priv->nfp_neigh_off_list);
-	priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
+	spin_lock_init(&priv->tun.neigh_off_lock);
+	INIT_LIST_HEAD(&priv->tun.neigh_off_list);
+	priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
 
-	err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
-	if (err)
-		goto err_free_mac_ida;
-
-	err = register_netevent_notifier(&priv->nfp_tun_neigh_nb);
-	if (err)
-		goto err_unreg_mac_nb;
-
-	/* Parse netdevs already registered for MACs that need offloaded. */
-	rtnl_lock();
-	for_each_netdev(&init_net, netdev)
-		nfp_tun_add_to_mac_offload_list(netdev, app);
-	rtnl_unlock();
+	err = register_netevent_notifier(&priv->tun.neigh_nb);
+	if (err) {
+		rhashtable_free_and_destroy(&priv->tun.offloaded_macs,
+					    nfp_check_rhashtable_empty, NULL);
+		return err;
+	}
 
 	return 0;
-
-err_unreg_mac_nb:
-	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
-err_free_mac_ida:
-	ida_destroy(&priv->nfp_mac_off_ids);
-	return err;
 }
 
 void nfp_tunnel_config_stop(struct nfp_app *app)
 {
-	struct nfp_tun_mac_offload_entry *mac_entry;
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_ipv4_route_entry *route_entry;
-	struct nfp_tun_mac_non_nfp_idx *mac_idx;
 	struct nfp_ipv4_addr_entry *ip_entry;
 	struct list_head *ptr, *storage;
 
-	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
-	unregister_netevent_notifier(&priv->nfp_tun_neigh_nb);
+	unregister_netevent_notifier(&priv->tun.neigh_nb);
 
-	/* Free any memory that may be occupied by MAC list. */
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
-		mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
-				       list);
-		list_del(&mac_entry->list);
-		kfree(mac_entry);
-	}
-
-	/* Free any memory that may be occupied by MAC index list. */
-	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
-		mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx,
-				     list);
-		list_del(&mac_idx->list);
-		kfree(mac_idx);
-	}
-
-	ida_destroy(&priv->nfp_mac_off_ids);
+	ida_destroy(&priv->tun.mac_off_ids);
 
 	/* Free any memory that may be occupied by ipv4 list. */
-	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+	list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
 		ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
 		list_del(&ip_entry->list);
 		kfree(ip_entry);
 	}
 
 	/* Free any memory that may be occupied by the route list. */
-	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
 		route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
 					 list);
 		list_del(&route_entry->list);
 		kfree(route_entry);
 	}
+
+	/* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */
+	rhashtable_free_and_destroy(&priv->tun.offloaded_macs,
+				    nfp_check_rhashtable_empty, NULL);
 }

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_abi.h b/drivers/net/ethernet/netronome/nfp/nfp_abi.h
index 8b56c27..dd359a4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_abi.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_abi.h

@@ -1,36 +1,5 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #ifndef __NFP_ABI__
 #define __NFP_ABI__ 1

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 8607d09..3a97328 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/bug.h>
 #include <linux/lockdep.h>
@@ -60,6 +30,11 @@
 #endif
 };
 
+void nfp_check_rhashtable_empty(void *ptr, void *arg)
+{
+	WARN_ON_ONCE(1);
+}
+
 struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
 {
 	if (nfp_netdev_is_nfp_net(netdev)) {
@@ -156,11 +131,100 @@
 	struct nfp_reprs *old;
 
 	old = nfp_reprs_get_locked(app, type);
+	rtnl_lock();
 	rcu_assign_pointer(app->reprs[type], reprs);
+	rtnl_unlock();
 
 	return old;
 }
 
+static void
+nfp_app_netdev_feat_change(struct nfp_app *app, struct net_device *netdev)
+{
+	struct nfp_net *nn;
+	unsigned int type;
+
+	if (!nfp_netdev_is_nfp_net(netdev))
+		return;
+	nn = netdev_priv(netdev);
+	if (nn->app != app)
+		return;
+
+	for (type = 0; type < __NFP_REPR_TYPE_MAX; type++) {
+		struct nfp_reprs *reprs;
+		unsigned int i;
+
+		reprs = rtnl_dereference(app->reprs[type]);
+		if (!reprs)
+			continue;
+
+		for (i = 0; i < reprs->num_reprs; i++) {
+			struct net_device *repr;
+
+			repr = rtnl_dereference(reprs->reprs[i]);
+			if (!repr)
+				continue;
+
+			nfp_repr_transfer_features(repr, netdev);
+		}
+	}
+}
+
+static int
+nfp_app_netdev_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct net_device *netdev;
+	struct nfp_app *app;
+
+	netdev = netdev_notifier_info_to_dev(ptr);
+	app = container_of(nb, struct nfp_app, netdev_nb);
+
+	/* Handle events common code is interested in */
+	switch (event) {
+	case NETDEV_FEAT_CHANGE:
+		nfp_app_netdev_feat_change(app, netdev);
+		break;
+	}
+
+	/* Call offload specific handlers */
+	if (app->type->netdev_event)
+		return app->type->netdev_event(app, netdev, event, ptr);
+	return NOTIFY_DONE;
+}
+
+int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
+{
+	int err;
+
+	app->ctrl = ctrl;
+
+	if (app->type->start) {
+		err = app->type->start(app);
+		if (err)
+			return err;
+	}
+
+	app->netdev_nb.notifier_call = nfp_app_netdev_event;
+	err = register_netdevice_notifier(&app->netdev_nb);
+	if (err)
+		goto err_app_stop;
+
+	return 0;
+
+err_app_stop:
+	if (app->type->stop)
+		app->type->stop(app);
+	return err;
+}
+
+void nfp_app_stop(struct nfp_app *app)
+{
+	unregister_netdevice_notifier(&app->netdev_nb);
+
+	if (app->type->stop)
+		app->type->stop(app);
+}
+
 struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
 {
 	struct nfp_app *app;

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 4e1eb33..76d13af 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef _NFP_APP_H
 #define _NFP_APP_H 1
@@ -40,6 +10,8 @@
 
 #include "nfp_net_repr.h"
 
+#define NFP_APP_CTRL_MTU_MAX	U32_MAX
+
 struct bpf_prog;
 struct net_device;
 struct netdev_bpf;
@@ -97,6 +69,7 @@
  * @port_get_stats_strings:	get strings for extra statistics
  * @start:	start application logic
  * @stop:	stop application logic
+ * @netdev_event:	Netdevice notifier event
  * @ctrl_msg_rx:    control message handler
  * @ctrl_msg_rx_raw:	handler for control messages from data queues
  * @setup_tc:	setup TC ndo
@@ -106,7 +79,7 @@
  * @eswitch_mode_set:    set SR-IOV eswitch mode (under pf->lock)
  * @sriov_enable: app-specific sriov initialisation
  * @sriov_disable: app-specific sriov clean-up
- * @repr_get:	get representor netdev
+ * @dev_get:	get representor or internal port representing netdev
  */
 struct nfp_app_type {
 	enum nfp_app_id id;
@@ -150,6 +123,9 @@
 	int (*start)(struct nfp_app *app);
 	void (*stop)(struct nfp_app *app);
 
+	int (*netdev_event)(struct nfp_app *app, struct net_device *netdev,
+			    unsigned long event, void *ptr);
+
 	void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
 	void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data,
 				unsigned int len);
@@ -167,7 +143,8 @@
 
 	enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app);
 	int (*eswitch_mode_set)(struct nfp_app *app, u16 mode);
-	struct net_device *(*repr_get)(struct nfp_app *app, u32 id);
+	struct net_device *(*dev_get)(struct nfp_app *app, u32 id,
+				      bool *redir_egress);
 };
 
 /**
@@ -178,6 +155,8 @@
  * @ctrl:	pointer to ctrl vNIC struct
  * @reprs:	array of pointers to representors
  * @type:	pointer to const application ops and info
+ * @ctrl_mtu:	MTU to set on the control vNIC (set in .init())
+ * @netdev_nb:	Netdevice notifier block
  * @priv:	app-specific priv data
  */
 struct nfp_app {
@@ -189,9 +168,14 @@
 	struct nfp_reprs __rcu *reprs[NFP_REPR_TYPE_MAX + 1];
 
 	const struct nfp_app_type *type;
+	unsigned int ctrl_mtu;
+
+	struct notifier_block netdev_nb;
+
 	void *priv;
 };
 
+void nfp_check_rhashtable_empty(void *ptr, void *arg);
 bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
 bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
 
@@ -289,21 +273,6 @@
 	return app->type->repr_change_mtu(app, netdev, new_mtu);
 }
 
-static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
-{
-	app->ctrl = ctrl;
-	if (!app->type->start)
-		return 0;
-	return app->type->start(app);
-}
-
-static inline void nfp_app_stop(struct nfp_app *app)
-{
-	if (!app->type->stop)
-		return;
-	app->type->stop(app);
-}
-
 static inline const char *nfp_app_name(struct nfp_app *app)
 {
 	if (!app)
@@ -429,12 +398,14 @@
 		app->type->sriov_disable(app);
 }
 
-static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
+static inline
+struct net_device *nfp_app_dev_get(struct nfp_app *app, u32 id,
+				   bool *redir_egress)
 {
-	if (unlikely(!app || !app->type->repr_get))
+	if (unlikely(!app || !app->type->dev_get))
 		return NULL;
 
-	return app->type->repr_get(app, id);
+	return app->type->dev_get(app, id, redir_egress);
 }
 
 struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
@@ -455,6 +426,8 @@
 
 struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id);
 void nfp_app_free(struct nfp_app *app);
+int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl);
+void nfp_app_stop(struct nfp_app *app);
 
 /* Callbacks shared between apps */
 
@@ -463,4 +436,6 @@
 int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
 				   struct nfp_net *nn, unsigned int id);
 
+struct devlink_port *nfp_devlink_get_devlink_port(struct net_device *netdev);
+
 #endif

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
index e2dfe4f..f119277 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include "nfpcore/nfp_cpp.h"
 #include "nfpcore/nfp_nsp.h"

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index cc6ace2..b04b836 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 #include <linux/bitops.h>
 #include <linux/errno.h>

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index fad0e62..648c281 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
 
 #ifndef __NFP_ASM_H__
 #define __NFP_ASM_H__ 1
@@ -82,6 +52,15 @@
 #define OP_BR_BIT_ADDR_LO	OP_BR_ADDR_LO
 #define OP_BR_BIT_ADDR_HI	OP_BR_ADDR_HI
 
+#define OP_BR_ALU_BASE		0x0e800000000ULL
+#define OP_BR_ALU_BASE_MASK	0x0ff80000000ULL
+#define OP_BR_ALU_A_SRC		0x000000003ffULL
+#define OP_BR_ALU_B_SRC		0x000000ffc00ULL
+#define OP_BR_ALU_DEFBR		0x00000300000ULL
+#define OP_BR_ALU_IMM_HI	0x0007fc00000ULL
+#define OP_BR_ALU_SRC_LMEXTN	0x40000000000ULL
+#define OP_BR_ALU_DST_LMEXTN	0x80000000000ULL
+
 static inline bool nfp_is_br(u64 insn)
 {
 	return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index e9a4179..c50fce4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c

@@ -1,39 +1,10 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/rtnetlink.h>
 #include <net/devlink.h>
 
+#include "nfpcore/nfp.h"
 #include "nfpcore/nfp_nsp.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
@@ -173,7 +144,8 @@
 static int
 nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
 			u16 pool_index,
-			u32 size, enum devlink_sb_threshold_type threshold_type)
+			u32 size, enum devlink_sb_threshold_type threshold_type,
+			struct netlink_ext_ack *extack)
 {
 	struct nfp_pf *pf = devlink_priv(devlink);
 
@@ -188,7 +160,8 @@
 	return nfp_app_eswitch_mode_get(pf->app, mode);
 }
 
-static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+					struct netlink_ext_ack *extack)
 {
 	struct nfp_pf *pf = devlink_priv(devlink);
 	int ret;
@@ -200,6 +173,173 @@
 	return ret;
 }
 
+static const struct nfp_devlink_versions_simple {
+	const char *key;
+	const char *hwinfo;
+} nfp_devlink_versions_hwinfo[] = {
+	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_ID,	"assembly.partno", },
+	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_REV,	"assembly.revision", },
+	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", },
+	{ "board.model", /* code name */		"assembly.model", },
+};
+
+static int
+nfp_devlink_versions_get_hwinfo(struct nfp_pf *pf, struct devlink_info_req *req)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(nfp_devlink_versions_hwinfo); i++) {
+		const struct nfp_devlink_versions_simple *info;
+		const char *val;
+
+		info = &nfp_devlink_versions_hwinfo[i];
+
+		val = nfp_hwinfo_lookup(pf->hwinfo, info->hwinfo);
+		if (!val)
+			continue;
+
+		err = devlink_info_version_fixed_put(req, info->key, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct nfp_devlink_versions {
+	enum nfp_nsp_versions id;
+	const char *key;
+} nfp_devlink_versions_nsp[] = {
+	{ NFP_VERSIONS_BUNDLE,	"fw.bundle_id", },
+	{ NFP_VERSIONS_BSP,	DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, },
+	{ NFP_VERSIONS_CPLD,	"fw.cpld", },
+	{ NFP_VERSIONS_APP,	DEVLINK_INFO_VERSION_GENERIC_FW_APP, },
+	{ NFP_VERSIONS_UNDI,	DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, },
+	{ NFP_VERSIONS_NCSI,	DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, },
+	{ NFP_VERSIONS_CFGR,	"chip.init", },
+};
+
+static int
+nfp_devlink_versions_get_nsp(struct devlink_info_req *req, bool flash,
+			     const u8 *buf, unsigned int size)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(nfp_devlink_versions_nsp); i++) {
+		const struct nfp_devlink_versions *info;
+		const char *version;
+
+		info = &nfp_devlink_versions_nsp[i];
+
+		version = nfp_nsp_versions_get(info->id, flash, buf, size);
+		if (IS_ERR(version)) {
+			if (PTR_ERR(version) == -ENOENT)
+				continue;
+			else
+				return PTR_ERR(version);
+		}
+
+		if (flash)
+			err = devlink_info_version_stored_put(req, info->key,
+							      version);
+		else
+			err = devlink_info_version_running_put(req, info->key,
+							       version);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int
+nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+		     struct netlink_ext_ack *extack)
+{
+	struct nfp_pf *pf = devlink_priv(devlink);
+	const char *sn, *vendor, *part;
+	struct nfp_nsp *nsp;
+	char *buf = NULL;
+	int err;
+
+	err = devlink_info_driver_name_put(req, "nfp");
+	if (err)
+		return err;
+
+	vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor");
+	part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno");
+	sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial");
+	if (vendor && part && sn) {
+		char *buf;
+
+		buf = kmalloc(strlen(vendor) + strlen(part) + strlen(sn) + 1,
+			      GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		buf[0] = '\0';
+		strcat(buf, vendor);
+		strcat(buf, part);
+		strcat(buf, sn);
+
+		err = devlink_info_serial_number_put(req, buf);
+		kfree(buf);
+		if (err)
+			return err;
+	}
+
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		NL_SET_ERR_MSG_MOD(extack, "can't access NSP");
+		return PTR_ERR(nsp);
+	}
+
+	if (nfp_nsp_has_versions(nsp)) {
+		buf = kzalloc(NFP_NSP_VERSION_BUFSZ, GFP_KERNEL);
+		if (!buf) {
+			err = -ENOMEM;
+			goto err_close_nsp;
+		}
+
+		err = nfp_nsp_versions(nsp, buf, NFP_NSP_VERSION_BUFSZ);
+		if (err)
+			goto err_free_buf;
+
+		err = nfp_devlink_versions_get_nsp(req, false,
+						   buf, NFP_NSP_VERSION_BUFSZ);
+		if (err)
+			goto err_free_buf;
+
+		err = nfp_devlink_versions_get_nsp(req, true,
+						   buf, NFP_NSP_VERSION_BUFSZ);
+		if (err)
+			goto err_free_buf;
+
+		kfree(buf);
+	}
+
+	nfp_nsp_close(nsp);
+
+	return nfp_devlink_versions_get_hwinfo(pf, req);
+
+err_free_buf:
+	kfree(buf);
+err_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
+static int
+nfp_devlink_flash_update(struct devlink *devlink, const char *path,
+			 const char *component, struct netlink_ext_ack *extack)
+{
+	if (component)
+		return -EOPNOTSUPP;
+	return nfp_flash_update_common(devlink_priv(devlink), path, extack);
+}
+
 const struct devlink_ops nfp_devlink_ops = {
 	.port_split		= nfp_devlink_port_split,
 	.port_unsplit		= nfp_devlink_port_unsplit,
@@ -207,12 +347,16 @@
 	.sb_pool_set		= nfp_devlink_sb_pool_set,
 	.eswitch_mode_get	= nfp_devlink_eswitch_mode_get,
 	.eswitch_mode_set	= nfp_devlink_eswitch_mode_set,
+	.info_get		= nfp_devlink_info_get,
+	.flash_update		= nfp_devlink_flash_update,
 };
 
 int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
 {
 	struct nfp_eth_table_port eth_port;
 	struct devlink *devlink;
+	const u8 *serial;
+	int serial_len;
 	int ret;
 
 	rtnl_lock();
@@ -221,10 +365,10 @@
 	if (ret)
 		return ret;
 
-	devlink_port_type_eth_set(&port->dl_port, port->netdev);
+	serial_len = nfp_cpp_serial(port->app->cpp, &serial);
 	devlink_port_attrs_set(&port->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
 			       eth_port.label_port, eth_port.is_split,
-			       eth_port.label_subport);
+			       eth_port.label_subport, serial, serial_len);
 
 	devlink = priv_to_devlink(app->pf);
 
@@ -235,3 +379,24 @@
 {
 	devlink_port_unregister(&port->dl_port);
 }
+
+void nfp_devlink_port_type_eth_set(struct nfp_port *port)
+{
+	devlink_port_type_eth_set(&port->dl_port, port->netdev);
+}
+
+void nfp_devlink_port_type_clear(struct nfp_port *port)
+{
+	devlink_port_type_clear(&port->dl_port);
+}
+
+struct devlink_port *nfp_devlink_get_devlink_port(struct net_device *netdev)
+{
+	struct nfp_port *port;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!port)
+		return NULL;
+
+	return &port->dl_port;
+}

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
index f0dcf45..5cabb1a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017 Netronome Systems, Inc. */
 
 #include <linux/kernel.h>
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 4a540c5..4d282fc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_main.c
@@ -68,6 +38,10 @@
 	  PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID,
 	  PCI_ANY_ID, 0,
 	},
+	{ PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP5000,
+	  PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID,
+	  PCI_ANY_ID, 0,
+	},
 	{ PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP4000,
 	  PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID,
 	  PCI_ANY_ID, 0,
@@ -112,23 +86,18 @@
 int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
 		 void *out_data, u64 out_length)
 {
-	unsigned long long addr;
 	unsigned long err_at;
 	u64 max_data_sz;
 	u32 val = 0;
-	u32 cpp_id;
 	int n, err;
 
 	if (!pf->mbox)
 		return -EOPNOTSUPP;
 
-	cpp_id = NFP_CPP_ISLAND_ID(pf->mbox->target, NFP_CPP_ACTION_RW, 0,
-				   pf->mbox->domain);
-	addr = pf->mbox->addr;
-	max_data_sz = pf->mbox->size - NFP_MBOX_SYM_MIN_SIZE;
+	max_data_sz = nfp_rtsym_size(pf->mbox) - NFP_MBOX_SYM_MIN_SIZE;
 
 	/* Check if cmd field is clear */
-	err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+	err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_CMD, &val);
 	if (err || val) {
 		nfp_warn(pf->cpp, "failed to issue command (%u): %u, err: %d\n",
 			 cmd, val, err);
@@ -136,30 +105,29 @@
 	}
 
 	in_length = min(in_length, max_data_sz);
-	n = nfp_cpp_write(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
-			  in_data, in_length);
+	n = nfp_rtsym_write(pf->cpp, pf->mbox, NFP_MBOX_DATA, in_data,
+			    in_length);
 	if (n != in_length)
 		return -EIO;
 	/* Write data_len and wipe reserved */
-	err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN,
-			     in_length);
+	err = nfp_rtsym_writeq(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, in_length);
 	if (err)
 		return err;
 
 	/* Read back for ordering */
-	err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+	err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, &val);
 	if (err)
 		return err;
 
 	/* Write cmd and wipe return value */
-	err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, cmd);
+	err = nfp_rtsym_writeq(pf->cpp, pf->mbox, NFP_MBOX_CMD, cmd);
 	if (err)
 		return err;
 
 	err_at = jiffies + 5 * HZ;
 	while (true) {
 		/* Wait for command to go to 0 (NFP_MBOX_NO_CMD) */
-		err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val);
+		err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_CMD, &val);
 		if (err)
 			return err;
 		if (!val)
@@ -172,18 +140,18 @@
 	}
 
 	/* Copy output if any (could be error info, do it before reading ret) */
-	err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val);
+	err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_DATA_LEN, &val);
 	if (err)
 		return err;
 
 	out_length = min_t(u32, val, min(out_length, max_data_sz));
-	n = nfp_cpp_read(pf->cpp, cpp_id, addr + NFP_MBOX_DATA,
-			 out_data, out_length);
+	n = nfp_rtsym_read(pf->cpp, pf->mbox, NFP_MBOX_DATA,
+			   out_data, out_length);
 	if (n != out_length)
 		return -EIO;
 
 	/* Check if there is an error */
-	err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_RET, &val);
+	err = nfp_rtsym_readl(pf->cpp, pf->mbox, NFP_MBOX_RET, &val);
 	if (err)
 		return err;
 	if (val)
@@ -326,12 +294,56 @@
 
 static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
+	if (!pci_get_drvdata(pdev))
+		return -ENOENT;
+
 	if (num_vfs == 0)
 		return nfp_pcie_sriov_disable(pdev);
 	else
 		return nfp_pcie_sriov_enable(pdev, num_vfs);
 }
 
+int nfp_flash_update_common(struct nfp_pf *pf, const char *path,
+			    struct netlink_ext_ack *extack)
+{
+	struct device *dev = &pf->pdev->dev;
+	const struct firmware *fw;
+	struct nfp_nsp *nsp;
+	int err;
+
+	nsp = nfp_nsp_open(pf->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		if (extack)
+			NL_SET_ERR_MSG_MOD(extack, "can't access NSP");
+		else
+			dev_err(dev, "Failed to access the NSP: %d\n", err);
+		return err;
+	}
+
+	err = request_firmware_direct(&fw, path, dev);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "unable to read flash file from disk");
+		goto exit_close_nsp;
+	}
+
+	dev_info(dev, "Please be patient while writing flash image: %s\n",
+		 path);
+
+	err = nfp_nsp_write_flash(nsp, fw);
+	if (err < 0)
+		goto exit_release_fw;
+	dev_info(dev, "Finished writing flash image\n");
+	err = 0;
+
+exit_release_fw:
+	release_firmware(fw);
+exit_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
 static const struct firmware *
 nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name)
 {
@@ -340,7 +352,7 @@
 
 	err = request_firmware_direct(&fw, name, &pdev->dev);
 	nfp_info(pf->cpp, "  %s: %s\n",
-		 name, err ? "not found" : "found, loading...");
+		 name, err ? "not found" : "found");
 	if (err)
 		return NULL;
 
@@ -418,8 +430,35 @@
 	return nfp_net_fw_request(pdev, pf, fw_name);
 }
 
+static int
+nfp_get_fw_policy_value(struct pci_dev *pdev, struct nfp_nsp *nsp,
+			const char *key, const char *default_val, int max_val,
+			int *value)
+{
+	char hwinfo[64];
+	long hi_val;
+	int err;
+
+	snprintf(hwinfo, sizeof(hwinfo), key);
+	err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+					     default_val);
+	if (err)
+		return err;
+
+	err = kstrtol(hwinfo, 0, &hi_val);
+	if (err || hi_val < 0 || hi_val > max_val) {
+		dev_warn(&pdev->dev,
+			 "Invalid value '%s' from '%s', ignoring\n",
+			 hwinfo, key);
+		err = kstrtol(default_val, 0, &hi_val);
+	}
+
+	*value = hi_val;
+	return err;
+}
+
 /**
- * nfp_net_fw_load() - Load the firmware image
+ * nfp_fw_load() - Load the firmware image
  * @pdev:       PCI Device structure
  * @pf:		NFP PF Device structure
  * @nsp:	NFP SP handle
@@ -429,42 +468,107 @@
 static int
 nfp_fw_load(struct pci_dev *pdev, struct nfp_pf *pf, struct nfp_nsp *nsp)
 {
-	const struct firmware *fw;
+	bool do_reset, fw_loaded = false;
+	const struct firmware *fw = NULL;
+	int err, reset, policy, ifcs = 0;
+	char *token, *ptr;
+	char hwinfo[64];
 	u16 interface;
-	int err;
+
+	snprintf(hwinfo, sizeof(hwinfo), "abi_drv_load_ifc");
+	err = nfp_nsp_hwinfo_lookup_optional(nsp, hwinfo, sizeof(hwinfo),
+					     NFP_NSP_DRV_LOAD_IFC_DEFAULT);
+	if (err)
+		return err;
 
 	interface = nfp_cpp_interface(pf->cpp);
-	if (NFP_CPP_INTERFACE_UNIT_of(interface) != 0) {
-		/* Only Unit 0 should reset or load firmware */
+	ptr = hwinfo;
+	while ((token = strsep(&ptr, ","))) {
+		unsigned long interface_hi;
+
+		err = kstrtoul(token, 0, &interface_hi);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Failed to parse interface '%s': %d\n",
+				token, err);
+			return err;
+		}
+
+		ifcs++;
+		if (interface == interface_hi)
+			break;
+	}
+
+	if (!token) {
 		dev_info(&pdev->dev, "Firmware will be loaded by partner\n");
 		return 0;
 	}
 
+	err = nfp_get_fw_policy_value(pdev, nsp, "abi_drv_reset",
+				      NFP_NSP_DRV_RESET_DEFAULT,
+				      NFP_NSP_DRV_RESET_NEVER, &reset);
+	if (err)
+		return err;
+
+	err = nfp_get_fw_policy_value(pdev, nsp, "app_fw_from_flash",
+				      NFP_NSP_APP_FW_LOAD_DEFAULT,
+				      NFP_NSP_APP_FW_LOAD_PREF, &policy);
+	if (err)
+		return err;
+
 	fw = nfp_net_fw_find(pdev, pf);
-	if (!fw)
-		return 0;
+	do_reset = reset == NFP_NSP_DRV_RESET_ALWAYS ||
+		   (fw && reset == NFP_NSP_DRV_RESET_DISK);
 
-	dev_info(&pdev->dev, "Soft-reset, loading FW image\n");
-	err = nfp_nsp_device_soft_reset(nsp);
-	if (err < 0) {
-		dev_err(&pdev->dev, "Failed to soft reset the NFP: %d\n",
-			err);
-		goto exit_release_fw;
+	if (do_reset) {
+		dev_info(&pdev->dev, "Soft-resetting the NFP\n");
+		err = nfp_nsp_device_soft_reset(nsp);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"Failed to soft reset the NFP: %d\n", err);
+			goto exit_release_fw;
+		}
 	}
 
-	err = nfp_nsp_load_fw(nsp, fw);
+	if (fw && policy != NFP_NSP_APP_FW_LOAD_FLASH) {
+		if (nfp_nsp_has_fw_loaded(nsp) && nfp_nsp_fw_loaded(nsp))
+			goto exit_release_fw;
 
-	if (err < 0) {
-		dev_err(&pdev->dev, "FW loading failed: %d\n", err);
-		goto exit_release_fw;
+		err = nfp_nsp_load_fw(nsp, fw);
+		if (err < 0) {
+			dev_err(&pdev->dev, "FW loading failed: %d\n",
+				err);
+			goto exit_release_fw;
+		}
+		dev_info(&pdev->dev, "Finished loading FW image\n");
+		fw_loaded = true;
+	} else if (policy != NFP_NSP_APP_FW_LOAD_DISK &&
+		   nfp_nsp_has_stored_fw_load(nsp)) {
+
+		/* Don't propagate this error to stick with legacy driver
+		 * behavior, failure will be detected later during init.
+		 */
+		if (!nfp_nsp_load_stored_fw(nsp))
+			dev_info(&pdev->dev, "Finished loading stored FW image\n");
+
+		/* Don't flag the fw_loaded in this case since other devices
+		 * may reuse the firmware when configured this way
+		 */
+	} else {
+		dev_warn(&pdev->dev, "Didn't load firmware, please update flash or reconfigure card\n");
 	}
 
-	dev_info(&pdev->dev, "Finished loading FW image\n");
-
 exit_release_fw:
 	release_firmware(fw);
 
-	return err < 0 ? err : 1;
+	/* We don't want to unload firmware when other devices may still be
+	 * dependent on it, which could be the case if there are multiple
+	 * devices that could load firmware.
+	 */
+	if (fw_loaded && ifcs == 1)
+		pf->unload_fw_on_remove = true;
+
+	return err < 0 ? err : fw_loaded;
 }
 
 static void
@@ -566,9 +670,9 @@
 	/* Optional per-PCI PF mailbox */
 	snprintf(pf_symbol, sizeof(pf_symbol), NFP_MBOX_SYM_NAME, pf_id);
 	pf->mbox = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
-	if (pf->mbox && pf->mbox->size < NFP_MBOX_SYM_MIN_SIZE) {
+	if (pf->mbox && nfp_rtsym_size(pf->mbox) < NFP_MBOX_SYM_MIN_SIZE) {
 		nfp_err(pf->cpp, "PF mailbox symbol too small: %llu < %d\n",
-			pf->mbox->size, NFP_MBOX_SYM_MIN_SIZE);
+			nfp_rtsym_size(pf->mbox), NFP_MBOX_SYM_MIN_SIZE);
 		return -EINVAL;
 	}
 
@@ -582,6 +686,10 @@
 	struct nfp_pf *pf;
 	int err;
 
+	if (pdev->vendor == PCI_VENDOR_ID_NETRONOME &&
+	    pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000_VF)
+		dev_warn(&pdev->dev, "Binding NFP VF device to the NFP PF driver, the VF driver is called 'nfp_netvf'\n");
+
 	err = pci_enable_device(pdev);
 	if (err < 0)
 		return err;
@@ -686,7 +794,7 @@
 err_fw_unload:
 	kfree(pf->rtbl);
 	nfp_mip_close(pf->mip);
-	if (pf->fw_loaded)
+	if (pf->unload_fw_on_remove)
 		nfp_fw_unload(pf);
 	kfree(pf->eth_tbl);
 	kfree(pf->nspi);
@@ -709,9 +817,13 @@
 	return err;
 }
 
-static void nfp_pci_remove(struct pci_dev *pdev)
+static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw)
 {
-	struct nfp_pf *pf = pci_get_drvdata(pdev);
+	struct nfp_pf *pf;
+
+	pf = pci_get_drvdata(pdev);
+	if (!pf)
+		return;
 
 	nfp_hwmon_unregister(pf);
 
@@ -722,7 +834,7 @@
 	vfree(pf->dumpspec);
 	kfree(pf->rtbl);
 	nfp_mip_close(pf->mip);
-	if (pf->fw_loaded)
+	if (unload_fw && pf->unload_fw_on_remove)
 		nfp_fw_unload(pf);
 
 	destroy_workqueue(pf->wq);
@@ -738,11 +850,22 @@
 	pci_disable_device(pdev);
 }
 
+static void nfp_pci_remove(struct pci_dev *pdev)
+{
+	__nfp_pci_shutdown(pdev, true);
+}
+
+static void nfp_pci_shutdown(struct pci_dev *pdev)
+{
+	__nfp_pci_shutdown(pdev, false);
+}
+
 static struct pci_driver nfp_pci_driver = {
 	.name			= nfp_driver_name,
 	.id_table		= nfp_pci_device_ids,
 	.probe			= nfp_pci_probe,
 	.remove			= nfp_pci_remove,
+	.shutdown		= nfp_pci_shutdown,
 	.sriov_configure	= nfp_pcie_sriov_configure,
 };
 
@@ -782,6 +905,8 @@
 module_init(nfp_main_init);
 module_exit(nfp_main_exit);
 
+MODULE_FIRMWARE("netronome/nic_AMDA0058-0011_2x40.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0058-0012_2x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0081-0001_1x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0081-0001_4x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0096-0001_2x10.nffw");

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index 595b3dc..5d5812f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_main.h
@@ -94,6 +64,7 @@
  * @limit_vfs:		Number of VFs supported by firmware (~0 for PCI limit)
  * @num_vfs:		Number of SR-IOV VFs enabled
  * @fw_loaded:		Is the firmware loaded?
+ * @unload_fw_on_remove:Do we need to unload firmware on driver removal?
  * @ctrl_vnic:		Pointer to the control vNIC if available
  * @mip:		MIP handle
  * @rtbl:		RTsym table
@@ -140,6 +111,7 @@
 	unsigned int num_vfs;
 
 	bool fw_loaded;
+	bool unload_fw_on_remove;
 
 	struct nfp_net *ctrl_vnic;
 
@@ -194,6 +166,8 @@
 		 unsigned int min_size, struct nfp_cpp_area **area);
 int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length,
 		 void *out_data, u64 out_length);
+int nfp_flash_update_common(struct nfp_pf *pf, const char *path,
+			    struct netlink_ext_ack *extack);
 
 enum nfp_dump_diag {
 	NFP_DUMP_NSP_DIAG = 0,
@@ -213,4 +187,7 @@
 int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb,
 			    u16 pool_index, u32 size,
 			    enum devlink_sb_threshold_type threshold_type);
+
+int nfp_devlink_params_register(struct nfp_pf *pf);
+void nfp_devlink_params_unregister(struct nfp_pf *pf);
 #endif /* NFP_MAIN_H */

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 439e6ff..250f510 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net.h
@@ -42,11 +12,14 @@
 #ifndef _NFP_NET_H_
 #define _NFP_NET_H_
 
+#include <linux/atomic.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
 #include <net/xdp.h>
 
 #include "nfp_net_ctrl.h"
@@ -93,7 +66,7 @@
 #define NFP_NET_MAX_DMA_BITS	40
 
 /* Default size for MTU and freelist buffer sizes */
-#define NFP_NET_DEFAULT_MTU		1500
+#define NFP_NET_DEFAULT_MTU		1500U
 
 /* Maximum number of bytes prepended to a packet */
 #define NFP_NET_MAX_PREPEND		64
@@ -188,6 +161,7 @@
 			__le16 data_len; /* Length of frame + meta data */
 		} __packed;
 		__le32 vals[4];
+		__le64 vals8[2];
 	};
 };
 
@@ -267,7 +241,7 @@
 #define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
 #define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
 #define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_BPF		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_DECRYPTED		cpu_to_le16(BIT(8))
 #define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
 #define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
 #define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
@@ -394,6 +368,7 @@
  * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK
  * @hw_csum_rx_complete: Counter of packets with CHECKSUM_COMPLETE reported
  * @hw_csum_rx_error:	 Counter of packets with bad checksums
+ * @hw_tls_rx:	    Number of packets with TLS decrypted by hardware
  * @tx_sync:	    Seqlock for atomic updates of TX stats
  * @tx_pkts:	    Number of Transmitted packets
  * @tx_bytes:	    Number of Transmitted bytes
@@ -401,6 +376,11 @@
  * @hw_csum_tx_inner:	 Counter of inner TX checksum offload requests
  * @tx_gather:	    Counter of packets with Gather DMA
  * @tx_lso:	    Counter of LSO packets sent
+ * @hw_tls_tx:	    Counter of TLS packets sent with crypto offloaded to HW
+ * @tls_tx_fallback:	Counter of TLS packets sent which had to be encrypted
+ *			by the fallback path because packets came out of order
+ * @tls_tx_no_fallback:	Counter of TLS packets not sent because the fallback
+ *			path could not encrypt them
  * @tx_errors:	    How many TX errors were encountered
  * @tx_busy:        How often was TX busy (no space)?
  * @rx_replace_buf_alloc_fail:	Counter of RX buffer allocation failures
@@ -421,7 +401,7 @@
 		struct {
 			struct tasklet_struct tasklet;
 			struct sk_buff_head queue;
-			struct spinlock lock;
+			spinlock_t lock;
 		};
 	};
 
@@ -437,22 +417,30 @@
 	u64 hw_csum_rx_ok;
 	u64 hw_csum_rx_inner_ok;
 	u64 hw_csum_rx_complete;
+	u64 hw_tls_rx;
+
+	u64 hw_csum_rx_error;
+	u64 rx_replace_buf_alloc_fail;
 
 	struct nfp_net_tx_ring *xdp_ring;
 
 	struct u64_stats_sync tx_sync;
 	u64 tx_pkts;
 	u64 tx_bytes;
-	u64 hw_csum_tx;
+
+	u64 ____cacheline_aligned_in_smp hw_csum_tx;
 	u64 hw_csum_tx_inner;
 	u64 tx_gather;
 	u64 tx_lso;
+	u64 hw_tls_tx;
 
-	u64 hw_csum_rx_error;
-	u64 rx_replace_buf_alloc_fail;
+	u64 tls_tx_fallback;
+	u64 tls_tx_no_fallback;
 	u64 tx_errors;
 	u64 tx_busy;
 
+	/* Cold data follows */
+
 	u32 irq_vector;
 	irq_handler_t handler;
 	char name[IFNAMSIZ + 8];
@@ -487,6 +475,7 @@
  * @netdev:		Backpointer to net_device structure
  * @is_vf:		Is the driver attached to a VF?
  * @chained_metadata_format:  Firemware will use new metadata format
+ * @ktls_tx:		Is kTLS TX enabled?
  * @rx_dma_dir:		Mapping direction for RX buffers
  * @rx_dma_off:		Offset at which DMA packets (for XDP headroom)
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -511,6 +500,7 @@
 
 	u8 is_vf:1;
 	u8 chained_metadata_format:1;
+	u8 ktls_tx:1;
 
 	u8 rx_dma_dir;
 	u8 rx_offset;
@@ -568,11 +558,17 @@
  * @shared_handler:     Handler for shared interrupts
  * @shared_name:        Name for shared interrupt
  * @me_freq_mhz:        ME clock_freq (MHz)
- * @reconfig_lock:	Protects HW reconfiguration request regs/machinery
+ * @reconfig_lock:	Protects @reconfig_posted, @reconfig_timer_active,
+ *			@reconfig_sync_present and HW reconfiguration request
+ *			regs/machinery from async requests (sync must take
+ *			@bar_lock)
  * @reconfig_posted:	Pending reconfig bits coming from async sources
  * @reconfig_timer_active:  Timer for reading reconfiguration results is pending
  * @reconfig_sync_present:  Some thread is performing synchronous reconfig
  * @reconfig_timer:	Timer for async reading of reconfig results
+ * @reconfig_in_progress_update:	Update FW is processing now (debug only)
+ * @bar_lock:		vNIC config BAR access lock, protects: update,
+ *			mailbox area, crypto TLV
  * @link_up:            Is the link up?
  * @link_status_lock:	Protects @link_* and ensures atomicity with BAR reading
  * @rx_coalesce_usecs:      RX interrupt moderation usecs delay parameter
@@ -585,6 +581,18 @@
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @tlv_caps:		Parsed TLV capabilities
+ * @ktls_tx_conn_cnt:	Number of offloaded kTLS TX connections
+ * @ktls_rx_conn_cnt:	Number of offloaded kTLS RX connections
+ * @ktls_conn_id_gen:	Trivial generator for kTLS connection ids (for TX)
+ * @ktls_no_space:	Counter of firmware rejecting kTLS connection due to
+ *			lack of space
+ * @mbox_cmsg:		Common Control Message via vNIC mailbox state
+ * @mbox_cmsg.queue:	CCM mbox queue of pending messages
+ * @mbox_cmsg.wq:	CCM mbox wait queue of waiting processes
+ * @mbox_cmsg.workq:	CCM mbox work queue for @wait_work and @runq_work
+ * @mbox_cmsg.wait_work:    CCM mbox posted msg reconfig wait work
+ * @mbox_cmsg.runq_work:    CCM mbox posted msg queue runner work
+ * @mbox_cmsg.tag:	CCM mbox message tag allocator
  * @debugfs_dir:	Device directory in debugfs
  * @vnic_list:		Entry on device vNIC list
  * @pdev:		Backpointer to PCI device
@@ -641,6 +649,9 @@
 	bool reconfig_timer_active;
 	bool reconfig_sync_present;
 	struct timer_list reconfig_timer;
+	u32 reconfig_in_progress_update;
+
+	struct semaphore bar_lock;
 
 	u32 rx_coalesce_usecs;
 	u32 rx_coalesce_max_frames;
@@ -657,6 +668,22 @@
 
 	struct nfp_net_tlv_caps tlv_caps;
 
+	unsigned int ktls_tx_conn_cnt;
+	unsigned int ktls_rx_conn_cnt;
+
+	atomic64_t ktls_conn_id_gen;
+
+	atomic_t ktls_no_space;
+
+	struct {
+		struct sk_buff_head queue;
+		wait_queue_head_t wq;
+		struct workqueue_struct *workq;
+		struct work_struct wait_work;
+		struct work_struct runq_work;
+		u16 tag;
+	} mbox_cmsg;
+
 	struct dentry *debugfs_dir;
 
 	struct list_head vnic_list;
@@ -866,6 +893,21 @@
 	spin_unlock_bh(&nn->r_vecs[0].lock);
 }
 
+static inline void nn_ctrl_bar_lock(struct nfp_net *nn)
+{
+	down(&nn->bar_lock);
+}
+
+static inline bool nn_ctrl_bar_trylock(struct nfp_net *nn)
+{
+	return !down_trylock(&nn->bar_lock);
+}
+
+static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
+{
+	up(&nn->bar_lock);
+}
+
 /* Globals */
 extern const char nfp_driver_version[];
 
@@ -881,7 +923,7 @@
 			    void __iomem *ctrl_bar);
 
 struct nfp_net *
-nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev,
+nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 	      unsigned int max_tx_rings, unsigned int max_rx_rings);
 void nfp_net_free(struct nfp_net *nn);
 
@@ -893,11 +935,17 @@
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev);
 void nfp_net_info(struct nfp_net *nn);
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update);
 int nfp_net_reconfig(struct nfp_net *nn, u32 update);
 unsigned int nfp_net_rss_key_sz(struct nfp_net *nn);
 void nfp_net_rss_write_itbl(struct nfp_net *nn);
 void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size);
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd);
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd);
+void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update);
+int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn);
 
 unsigned int
 nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index c6d29fd..61aabff 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net_common.c
@@ -66,15 +36,17 @@
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
 
-#include <net/switchdev.h>
+#include <net/tls.h>
 #include <net/vxlan.h>
 
 #include "nfpcore/nfp_nsp.h"
+#include "ccm.h"
 #include "nfp_app.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
 #include "nfp_net_sriov.h"
 #include "nfp_port.h"
+#include "crypto/crypto.h"
 
 /**
  * nfp_net_get_fw_version() - Read and parse the FW version
@@ -131,6 +103,7 @@
 	/* ensure update is written before pinging HW */
 	nn_pci_flush(nn);
 	nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
+	nn->reconfig_in_progress_update = update;
 }
 
 /* Pass 0 as update to run posted reconfigs. */
@@ -153,30 +126,51 @@
 	if (reg == 0)
 		return true;
 	if (reg & NFP_NET_CFG_UPDATE_ERR) {
-		nn_err(nn, "Reconfig error: 0x%08x\n", reg);
+		nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
+		       reg, nn->reconfig_in_progress_update,
+		       nn_readl(nn, NFP_NET_CFG_CTRL));
 		return true;
 	} else if (last_check) {
-		nn_err(nn, "Reconfig timeout: 0x%08x\n", reg);
+		nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
+		       reg, nn->reconfig_in_progress_update,
+		       nn_readl(nn, NFP_NET_CFG_CTRL));
 		return true;
 	}
 
 	return false;
 }
 
-static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 {
 	bool timed_out = false;
+	int i;
 
-	/* Poll update field, waiting for NFP to ack the config */
+	/* Poll update field, waiting for NFP to ack the config.
+	 * Do an opportunistic wait-busy loop, afterward sleep.
+	 */
+	for (i = 0; i < 50; i++) {
+		if (nfp_net_reconfig_check_done(nn, false))
+			return false;
+		udelay(4);
+	}
+
 	while (!nfp_net_reconfig_check_done(nn, timed_out)) {
-		msleep(1);
+		usleep_range(250, 500);
 		timed_out = time_is_before_eq_jiffies(deadline);
 	}
 
+	return timed_out;
+}
+
+static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+{
+	if (__nfp_net_reconfig_wait(nn, deadline))
+		return -EIO;
+
 	if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
 		return -EIO;
 
-	return timed_out ? -EIO : 0;
+	return 0;
 }
 
 static void nfp_net_reconfig_timer(struct timer_list *t)
@@ -236,6 +230,7 @@
 
 	spin_lock_bh(&nn->reconfig_lock);
 
+	WARN_ON(nn->reconfig_sync_present);
 	nn->reconfig_sync_present = true;
 
 	if (nn->reconfig_timer_active) {
@@ -269,7 +264,7 @@
 }
 
 /**
- * nfp_net_reconfig() - Reconfigure the firmware
+ * __nfp_net_reconfig() - Reconfigure the firmware
  * @nn:      NFP Net device to reconfigure
  * @update:  The value for the update field in the BAR config
  *
@@ -279,7 +274,7 @@
  *
  * Return: Negative errno on error, 0 on success
  */
-int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 {
 	int ret;
 
@@ -300,8 +295,31 @@
 	return ret;
 }
 
+int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+{
+	int ret;
+
+	nn_ctrl_bar_lock(nn);
+	ret = __nfp_net_reconfig(nn, update);
+	nn_ctrl_bar_unlock(nn);
+
+	return ret;
+}
+
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
+{
+	if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
+		nn_err(nn, "mailbox too small for %u of data (%u)\n",
+		       data_size, nn->tlv_caps.mbox_len);
+		return -EIO;
+	}
+
+	nn_ctrl_bar_lock(nn);
+	return 0;
+}
+
 /**
- * nfp_net_reconfig_mbox() - Reconfigure the firmware via the mailbox
+ * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
  * @nn:        NFP Net device to reconfigure
  * @mbox_cmd:  The value for the mailbox command
  *
@@ -309,19 +327,14 @@
  *
  * Return: Negative errno on error, 0 on success
  */
-static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 {
 	u32 mbox = nn->tlv_caps.mbox_off;
 	int ret;
 
-	if (!nfp_net_has_mbox(&nn->tlv_caps)) {
-		nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd);
-		return -EIO;
-	}
-
 	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 
-	ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
+	ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
 	if (ret) {
 		nn_err(nn, "Mailbox update error\n");
 		return ret;
@@ -330,6 +343,33 @@
 	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 }
 
+void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd)
+{
+	u32 mbox = nn->tlv_caps.mbox_off;
+
+	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
+
+	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX);
+}
+
+int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn)
+{
+	u32 mbox = nn->tlv_caps.mbox_off;
+
+	nfp_net_reconfig_wait_posted(nn);
+
+	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
+}
+
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
+{
+	int ret;
+
+	ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
+	nn_ctrl_bar_unlock(nn);
+	return ret;
+}
+
 /* Interrupt configuration and handling
  */
 
@@ -677,27 +717,29 @@
  * @txbuf: Pointer to driver soft TX descriptor
  * @txd: Pointer to HW TX descriptor
  * @skb: Pointer to SKB
+ * @md_bytes: Prepend length
  *
  * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
  * Return error on packet header greater than maximum supported LSO header size.
  */
 static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
 			   struct nfp_net_tx_buf *txbuf,
-			   struct nfp_net_tx_desc *txd, struct sk_buff *skb)
+			   struct nfp_net_tx_desc *txd, struct sk_buff *skb,
+			   u32 md_bytes)
 {
-	u32 hdrlen;
+	u32 l3_offset, l4_offset, hdrlen;
 	u16 mss;
 
 	if (!skb_is_gso(skb))
 		return;
 
 	if (!skb->encapsulation) {
-		txd->l3_offset = skb_network_offset(skb);
-		txd->l4_offset = skb_transport_offset(skb);
+		l3_offset = skb_network_offset(skb);
+		l4_offset = skb_transport_offset(skb);
 		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 	} else {
-		txd->l3_offset = skb_inner_network_offset(skb);
-		txd->l4_offset = skb_inner_transport_offset(skb);
+		l3_offset = skb_inner_network_offset(skb);
+		l4_offset = skb_inner_transport_offset(skb);
 		hdrlen = skb_inner_transport_header(skb) - skb->data +
 			inner_tcp_hdrlen(skb);
 	}
@@ -706,7 +748,9 @@
 	txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
 
 	mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
-	txd->lso_hdrlen = hdrlen;
+	txd->l3_offset = l3_offset - md_bytes;
+	txd->l4_offset = l4_offset - md_bytes;
+	txd->lso_hdrlen = hdrlen - md_bytes;
 	txd->mss = cpu_to_le16(mss);
 	txd->flags |= PCIE_DESC_TX_LSO;
 
@@ -778,6 +822,99 @@
 	u64_stats_update_end(&r_vec->tx_sync);
 }
 
+static struct sk_buff *
+nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+	       struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
+{
+#ifdef CONFIG_TLS_DEVICE
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct sk_buff *nskb;
+	bool resync_pending;
+	u32 datalen, seq;
+
+	if (likely(!dp->ktls_tx))
+		return skb;
+	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+		return skb;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	seq = ntohl(tcp_hdr(skb)->seq);
+	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+	resync_pending = tls_offload_tx_resync_pending(skb->sk);
+	if (unlikely(resync_pending || ntls->next_seq != seq)) {
+		/* Pure ACK out of order already */
+		if (!datalen)
+			return skb;
+
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->tls_tx_fallback++;
+		u64_stats_update_end(&r_vec->tx_sync);
+
+		nskb = tls_encrypt_skb(skb);
+		if (!nskb) {
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tls_tx_no_fallback++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			return NULL;
+		}
+		/* encryption wasn't necessary */
+		if (nskb == skb)
+			return skb;
+		/* we don't re-check ring space */
+		if (unlikely(skb_is_nonlinear(nskb))) {
+			nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
+			u64_stats_update_begin(&r_vec->tx_sync);
+			r_vec->tx_errors++;
+			u64_stats_update_end(&r_vec->tx_sync);
+			dev_kfree_skb_any(nskb);
+			return NULL;
+		}
+
+		/* jump forward, a TX may have gotten lost, need to sync TX */
+		if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
+			tls_offload_tx_resync_request(nskb->sk);
+
+		*nr_frags = 0;
+		return nskb;
+	}
+
+	if (datalen) {
+		u64_stats_update_begin(&r_vec->tx_sync);
+		if (!skb_is_gso(skb))
+			r_vec->hw_tls_tx++;
+		else
+			r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
+		u64_stats_update_end(&r_vec->tx_sync);
+	}
+
+	memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
+	ntls->next_seq += datalen;
+#endif
+	return skb;
+}
+
+static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
+{
+#ifdef CONFIG_TLS_DEVICE
+	struct nfp_net_tls_offload_ctx *ntls;
+	u32 datalen, seq;
+
+	if (!tls_handle)
+		return;
+	if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
+		return;
+
+	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	seq = ntohl(tcp_hdr(skb)->seq);
+
+	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+	if (ntls->next_seq == seq + datalen)
+		ntls->next_seq = seq;
+	else
+		WARN_ON_ONCE(1);
+#endif
+}
+
 static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 {
 	wmb();
@@ -785,24 +922,47 @@
 	tx_ring->wr_ptr_add = 0;
 }
 
-static int nfp_net_prep_port_id(struct sk_buff *skb)
+static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
 	unsigned char *data;
+	u32 meta_id = 0;
+	int md_bytes;
 
-	if (likely(!md_dst))
+	if (likely(!md_dst && !tls_handle))
 		return 0;
-	if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
-		return 0;
+	if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
+		if (!tls_handle)
+			return 0;
+		md_dst = NULL;
+	}
 
-	if (unlikely(skb_cow_head(skb, 8)))
+	md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
+
+	if (unlikely(skb_cow_head(skb, md_bytes)))
 		return -ENOMEM;
 
-	data = skb_push(skb, 8);
-	put_unaligned_be32(NFP_NET_META_PORTID, data);
-	put_unaligned_be32(md_dst->u.port_info.port_id, data + 4);
+	meta_id = 0;
+	data = skb_push(skb, md_bytes) + md_bytes;
+	if (md_dst) {
+		data -= 4;
+		put_unaligned_be32(md_dst->u.port_info.port_id, data);
+		meta_id = NFP_NET_META_PORTID;
+	}
+	if (tls_handle) {
+		/* conn handle is opaque, we just use u64 to be able to quickly
+		 * compare it to zero
+		 */
+		data -= 8;
+		memcpy(data, &tls_handle, sizeof(tls_handle));
+		meta_id <<= NFP_NET_META_FIELD_SIZE;
+		meta_id |= NFP_NET_META_CONN_HANDLE;
+	}
 
-	return 8;
+	data -= 4;
+	put_unaligned_be32(meta_id, data);
+
+	return md_bytes;
 }
 
 /**
@@ -815,29 +975,30 @@
 static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
-	const struct skb_frag_struct *frag;
-	struct nfp_net_tx_desc *txd, txdg;
+	const skb_frag_t *frag;
 	int f, nr_frags, wr_idx, md_bytes;
 	struct nfp_net_tx_ring *tx_ring;
 	struct nfp_net_r_vector *r_vec;
 	struct nfp_net_tx_buf *txbuf;
+	struct nfp_net_tx_desc *txd;
 	struct netdev_queue *nd_q;
 	struct nfp_net_dp *dp;
 	dma_addr_t dma_addr;
 	unsigned int fsize;
+	u64 tls_handle = 0;
 	u16 qidx;
 
 	dp = &nn->dp;
 	qidx = skb_get_queue_mapping(skb);
 	tx_ring = &dp->tx_rings[qidx];
 	r_vec = tx_ring->r_vec;
-	nd_q = netdev_get_tx_queue(dp->netdev, qidx);
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
 
 	if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
 		nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
 			   qidx, tx_ring->wr_p, tx_ring->rd_p);
+		nd_q = netdev_get_tx_queue(dp->netdev, qidx);
 		netif_tx_stop_queue(nd_q);
 		nfp_net_tx_xmit_more_flush(tx_ring);
 		u64_stats_update_begin(&r_vec->tx_sync);
@@ -846,18 +1007,21 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	md_bytes = nfp_net_prep_port_id(skb);
-	if (unlikely(md_bytes < 0)) {
+	skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
+	if (unlikely(!skb)) {
 		nfp_net_tx_xmit_more_flush(tx_ring);
-		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
 
+	md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
+	if (unlikely(md_bytes < 0))
+		goto err_flush;
+
 	/* Start with the head skbuf */
 	dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
 				  DMA_TO_DEVICE);
 	if (dma_mapping_error(dp->dev, dma_addr))
-		goto err_free;
+		goto err_dma_err;
 
 	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
 
@@ -881,7 +1045,7 @@
 	txd->lso_hdrlen = 0;
 
 	/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
-	nfp_net_tx_tso(r_vec, txbuf, txd, skb);
+	nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
 	nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
 	if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
 		txd->flags |= PCIE_DESC_TX_VLAN;
@@ -890,8 +1054,10 @@
 
 	/* Gather DMA */
 	if (nr_frags > 0) {
+		__le64 second_half;
+
 		/* all descs must match except for in addr, length and eop */
-		txdg = *txd;
+		second_half = txd->vals8[1];
 
 		for (f = 0; f < nr_frags; f++) {
 			frag = &skb_shinfo(skb)->frags[f];
@@ -908,11 +1074,11 @@
 			tx_ring->txbufs[wr_idx].fidx = f;
 
 			txd = &tx_ring->txds[wr_idx];
-			*txd = txdg;
 			txd->dma_len = cpu_to_le16(fsize);
 			nfp_desc_set_dma_addr(txd, dma_addr);
-			txd->offset_eop |=
-				(f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0;
+			txd->offset_eop = md_bytes |
+				((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
+			txd->vals8[1] = second_half;
 		}
 
 		u64_stats_update_begin(&r_vec->tx_sync);
@@ -920,16 +1086,16 @@
 		u64_stats_update_end(&r_vec->tx_sync);
 	}
 
-	netdev_tx_sent_queue(nd_q, txbuf->real_len);
-
 	skb_tx_timestamp(skb);
 
+	nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
+
 	tx_ring->wr_p += nr_frags + 1;
 	if (nfp_net_tx_ring_should_stop(tx_ring))
 		nfp_net_tx_ring_stop(nd_q, tx_ring);
 
 	tx_ring->wr_ptr_add += nr_frags + 1;
-	if (!skb->xmit_more || netif_xmit_stopped(nd_q))
+	if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more()))
 		nfp_net_tx_xmit_more_flush(tx_ring);
 
 	return NETDEV_TX_OK;
@@ -951,12 +1117,14 @@
 	tx_ring->txbufs[wr_idx].skb = NULL;
 	tx_ring->txbufs[wr_idx].dma_addr = 0;
 	tx_ring->txbufs[wr_idx].fidx = -2;
-err_free:
+err_dma_err:
 	nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
+err_flush:
 	nfp_net_tx_xmit_more_flush(tx_ring);
 	u64_stats_update_begin(&r_vec->tx_sync);
 	r_vec->tx_errors++;
 	u64_stats_update_end(&r_vec->tx_sync);
+	nfp_net_tls_tx_undo(skb, tls_handle);
 	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
@@ -970,14 +1138,10 @@
 {
 	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
-	const struct skb_frag_struct *frag;
 	struct netdev_queue *nd_q;
 	u32 done_pkts = 0, done_bytes = 0;
-	struct sk_buff *skb;
-	int todo, nr_frags;
 	u32 qcp_rd_p;
-	int fidx;
-	int idx;
+	int todo;
 
 	if (tx_ring->wr_p == tx_ring->rd_p)
 		return;
@@ -991,26 +1155,33 @@
 	todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
 
 	while (todo--) {
-		idx = D_IDX(tx_ring, tx_ring->rd_p++);
+		const skb_frag_t *frag;
+		struct nfp_net_tx_buf *tx_buf;
+		struct sk_buff *skb;
+		int fidx, nr_frags;
+		int idx;
 
-		skb = tx_ring->txbufs[idx].skb;
+		idx = D_IDX(tx_ring, tx_ring->rd_p++);
+		tx_buf = &tx_ring->txbufs[idx];
+
+		skb = tx_buf->skb;
 		if (!skb)
 			continue;
 
 		nr_frags = skb_shinfo(skb)->nr_frags;
-		fidx = tx_ring->txbufs[idx].fidx;
+		fidx = tx_buf->fidx;
 
 		if (fidx == -1) {
 			/* unmap head */
-			dma_unmap_single(dp->dev, tx_ring->txbufs[idx].dma_addr,
+			dma_unmap_single(dp->dev, tx_buf->dma_addr,
 					 skb_headlen(skb), DMA_TO_DEVICE);
 
-			done_pkts += tx_ring->txbufs[idx].pkt_cnt;
-			done_bytes += tx_ring->txbufs[idx].real_len;
+			done_pkts += tx_buf->pkt_cnt;
+			done_bytes += tx_buf->real_len;
 		} else {
 			/* unmap fragment */
 			frag = &skb_shinfo(skb)->frags[fidx];
-			dma_unmap_page(dp->dev, tx_ring->txbufs[idx].dma_addr,
+			dma_unmap_page(dp->dev, tx_buf->dma_addr,
 				       skb_frag_size(frag), DMA_TO_DEVICE);
 		}
 
@@ -1018,9 +1189,9 @@
 		if (fidx == nr_frags - 1)
 			napi_consume_skb(skb, budget);
 
-		tx_ring->txbufs[idx].dma_addr = 0;
-		tx_ring->txbufs[idx].skb = NULL;
-		tx_ring->txbufs[idx].fidx = -2;
+		tx_buf->dma_addr = 0;
+		tx_buf->skb = NULL;
+		tx_buf->fidx = -2;
 	}
 
 	tx_ring->qcp_rd_p = qcp_rd_p;
@@ -1099,7 +1270,7 @@
 static void
 nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
 {
-	const struct skb_frag_struct *frag;
+	const skb_frag_t *frag;
 	struct netdev_queue *nd_q;
 
 	while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
@@ -1652,6 +1823,7 @@
 		struct nfp_net_rx_buf *rxbuf;
 		struct nfp_net_rx_desc *rxd;
 		struct nfp_meta_parsed meta;
+		bool redir_egress = false;
 		struct net_device *netdev;
 		dma_addr_t new_dma_addr;
 		u32 meta_len_xdp = 0;
@@ -1787,13 +1959,16 @@
 			struct nfp_net *nn;
 
 			nn = netdev_priv(dp->netdev);
-			netdev = nfp_app_repr_get(nn->app, meta.portid);
+			netdev = nfp_app_dev_get(nn->app, meta.portid,
+						 &redir_egress);
 			if (unlikely(!netdev)) {
 				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
 						NULL);
 				continue;
 			}
-			nfp_repr_inc_rx_stats(netdev, pkt_len);
+
+			if (nfp_netdev_is_nfp_repr(netdev))
+				nfp_repr_inc_rx_stats(netdev, pkt_len);
 		}
 
 		skb = build_skb(rxbuf->frag, true_bufsz);
@@ -1822,13 +1997,29 @@
 
 		nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
 
+#ifdef CONFIG_TLS_DEVICE
+		if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
+			skb->decrypted = true;
+			u64_stats_update_begin(&r_vec->rx_sync);
+			r_vec->hw_tls_rx++;
+			u64_stats_update_end(&r_vec->rx_sync);
+		}
+#endif
+
 		if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       le16_to_cpu(rxd->rxd.vlan));
 		if (meta_len_xdp)
 			skb_metadata_set(skb, meta_len_xdp);
 
-		napi_gro_receive(&rx_ring->r_vec->napi, skb);
+		if (likely(!redir_egress)) {
+			napi_gro_receive(&rx_ring->r_vec->napi, skb);
+		} else {
+			skb->dev = netdev;
+			skb_reset_network_header(skb);
+			__skb_push(skb, ETH_HLEN);
+			dev_queue_xmit(skb);
+		}
 	}
 
 	if (xdp_prog) {
@@ -2094,10 +2285,10 @@
 {
 	struct nfp_net_r_vector *r_vec = (void *)arg;
 
-	spin_lock_bh(&r_vec->lock);
+	spin_lock(&r_vec->lock);
 	nfp_net_tx_complete(r_vec->tx_ring, 0);
 	__nfp_ctrl_tx_queued(r_vec);
-	spin_unlock_bh(&r_vec->lock);
+	spin_unlock(&r_vec->lock);
 
 	if (nfp_ctrl_rx(r_vec)) {
 		nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
@@ -2186,10 +2377,14 @@
 	tx_ring->cnt = dp->txd_cnt;
 
 	tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
-	tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
-					    &tx_ring->dma, GFP_KERNEL);
-	if (!tx_ring->txds)
+	tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size,
+					   &tx_ring->dma,
+					   GFP_KERNEL | __GFP_NOWARN);
+	if (!tx_ring->txds) {
+		netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
+			    tx_ring->cnt);
 		goto err_alloc;
+	}
 
 	tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
 				   GFP_KERNEL);
@@ -2340,10 +2535,14 @@
 
 	rx_ring->cnt = dp->rxd_cnt;
 	rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
-	rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
-					    &rx_ring->dma, GFP_KERNEL);
-	if (!rx_ring->rxds)
+	rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
+					   &rx_ring->dma,
+					   GFP_KERNEL | __GFP_NOWARN);
+	if (!rx_ring->rxds) {
+		netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
+			    rx_ring->cnt);
 		goto err_alloc;
+	}
 
 	rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
 				   GFP_KERNEL);
@@ -3120,7 +3319,9 @@
 static int
 nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
+	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
 	struct nfp_net *nn = netdev_priv(netdev);
+	int err;
 
 	/* Priority tagged packets with vlan id 0 are processed by the
 	 * NFP as untagged packets
@@ -3128,17 +3329,23 @@
 	if (!vid)
 		return 0;
 
+	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+	if (err)
+		return err;
+
 	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
 	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
 		  ETH_P_8021Q);
 
-	return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD);
+	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
 }
 
 static int
 nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
+	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
 	struct nfp_net *nn = netdev_priv(netdev);
+	int err;
 
 	/* Priority tagged packets with vlan id 0 are processed by the
 	 * NFP as untagged packets
@@ -3146,11 +3353,15 @@
 	if (!vid)
 		return 0;
 
+	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+	if (err)
+		return err;
+
 	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
 	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
 		  ETH_P_8021Q);
 
-	return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
+	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
 }
 
 static void nfp_net_stat64(struct net_device *netdev,
@@ -3159,6 +3370,7 @@
 	struct nfp_net *nn = netdev_priv(netdev);
 	int r;
 
+	/* Collect software stats */
 	for (r = 0; r < nn->max_r_vecs; r++) {
 		struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
 		u64 data[3];
@@ -3184,6 +3396,14 @@
 		stats->tx_bytes += data[1];
 		stats->tx_errors += data[2];
 	}
+
+	/* Add in device stats */
+	stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES);
+	stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS);
+	stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS);
+
+	stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS);
+	stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS);
 }
 
 static int nfp_net_set_features(struct net_device *netdev,
@@ -3288,7 +3508,10 @@
 		hdrlen = skb_inner_transport_header(skb) - skb->data +
 			inner_tcp_hdrlen(skb);
 
-		if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ))
+		/* Assume worst case scenario of having longest possible
+		 * metadata prepend - 8B
+		 */
+		if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
 			features &= ~NETIF_F_GSO_MASK;
 	}
 
@@ -3321,8 +3544,11 @@
 	struct nfp_net *nn = netdev_priv(netdev);
 	int n;
 
+	/* If port is defined, devlink_port is registered and devlink core
+	 * is taking care of name formatting.
+	 */
 	if (nn->port)
-		return nfp_port_get_phys_port_name(netdev, name, len);
+		return -EOPNOTSUPP;
 
 	if (nn->dp.is_vf || nn->vnic_no_name)
 		return -EOPNOTSUPP;
@@ -3514,6 +3740,7 @@
 	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
 	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
 	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
+	.ndo_set_vf_trust	= nfp_app_set_vf_trust,
 	.ndo_get_vf_config	= nfp_app_get_vf_config,
 	.ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
 	.ndo_setup_tc		= nfp_port_setup_tc,
@@ -3527,6 +3754,7 @@
 	.ndo_udp_tunnel_add	= nfp_net_add_vxlan_port,
 	.ndo_udp_tunnel_del	= nfp_net_del_vxlan_port,
 	.ndo_bpf		= nfp_net_xdp,
+	.ndo_get_devlink_port	= nfp_devlink_get_devlink_port,
 };
 
 /**
@@ -3543,7 +3771,7 @@
 		nn->fw_ver.resv, nn->fw_ver.class,
 		nn->fw_ver.major, nn->fw_ver.minor,
 		nn->max_mtu);
-	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 		nn->cap,
 		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
 		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
@@ -3559,7 +3787,6 @@
 		nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS1 "     : "",
 		nn->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSS2 "     : "",
 		nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
-		nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "",
 		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
 		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
 		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
@@ -3573,6 +3800,7 @@
 /**
  * nfp_net_alloc() - Allocate netdev and related structure
  * @pdev:         PCI device
+ * @ctrl_bar:     PCI IOMEM with vNIC config memory
  * @needs_netdev: Whether to allocate a netdev for this vNIC
  * @max_tx_rings: Maximum number of TX rings supported by device
  * @max_rx_rings: Maximum number of RX rings supported by device
@@ -3583,11 +3811,12 @@
  *
  * Return: NFP Net device structure, or ERR_PTR on error.
  */
-struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev,
-			      unsigned int max_tx_rings,
-			      unsigned int max_rx_rings)
+struct nfp_net *
+nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
+	      unsigned int max_tx_rings, unsigned int max_rx_rings)
 {
 	struct nfp_net *nn;
+	int err;
 
 	if (needs_netdev) {
 		struct net_device *netdev;
@@ -3607,6 +3836,7 @@
 	}
 
 	nn->dp.dev = &pdev->dev;
+	nn->dp.ctrl_bar = ctrl_bar;
 	nn->pdev = pdev;
 
 	nn->max_tx_rings = max_tx_rings;
@@ -3624,12 +3854,30 @@
 	nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
 	nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
+	sema_init(&nn->bar_lock, 1);
+
 	spin_lock_init(&nn->reconfig_lock);
 	spin_lock_init(&nn->link_status_lock);
 
 	timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
 
+	err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
+				     &nn->tlv_caps);
+	if (err)
+		goto err_free_nn;
+
+	err = nfp_ccm_mbox_alloc(nn);
+	if (err)
+		goto err_free_nn;
+
 	return nn;
+
+err_free_nn:
+	if (nn->dp.netdev)
+		free_netdev(nn->dp.netdev);
+	else
+		vfree(nn);
+	return ERR_PTR(err);
 }
 
 /**
@@ -3639,6 +3887,8 @@
 void nfp_net_free(struct nfp_net *nn)
 {
 	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
+	nfp_ccm_mbox_free(nn);
+
 	if (nn->dp.netdev)
 		free_netdev(nn->dp.netdev);
 	else
@@ -3751,15 +4001,18 @@
 	}
 	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)
 		netdev->hw_features |= NETIF_F_RXHASH;
-	if (nn->cap & NFP_NET_CFG_CTRL_VXLAN &&
-	    nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
+	if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) {
 		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
-			netdev->hw_features |= NETIF_F_GSO_GRE |
-					       NETIF_F_GSO_UDP_TUNNEL;
-		nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
-
-		netdev->hw_enc_features = netdev->hw_features;
+			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+		nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN;
 	}
+	if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
+		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
+			netdev->hw_features |= NETIF_F_GSO_GRE;
+		nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE;
+	}
+	if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
+		netdev->hw_enc_features = netdev->hw_features;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -3793,8 +4046,6 @@
 	netdev->netdev_ops = &nfp_net_netdev_ops;
 	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
 
-	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
-
 	/* MTU range: 68 - hw-specific max */
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = nn->max_mtu;
@@ -3864,10 +4115,13 @@
 		return err;
 
 	/* Set default MTU and Freelist buffer size */
-	if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
+	if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
+		nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
+	} else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
 		nn->dp.mtu = nn->max_mtu;
-	else
+	} else {
 		nn->dp.mtu = NFP_NET_DEFAULT_MTU;
+	}
 	nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
 
 	if (nfp_app_ctrl_uses_data_vnics(nn->app))
@@ -3889,14 +4143,6 @@
 		nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
 	}
 
-	err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
-				     &nn->tlv_caps);
-	if (err)
-		return err;
-
-	if (nn->dp.netdev)
-		nfp_net_netdev_init(nn);
-
 	/* Stash the re-configuration queue away.  First odd queue in TX Bar */
 	nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
 
@@ -3909,11 +4155,27 @@
 	if (err)
 		return err;
 
+	if (nn->dp.netdev) {
+		nfp_net_netdev_init(nn);
+
+		err = nfp_ccm_mbox_init(nn);
+		if (err)
+			return err;
+
+		err = nfp_net_tls_init(nn);
+		if (err)
+			goto err_clean_mbox;
+	}
+
 	nfp_net_vecs_init(nn);
 
 	if (!nn->dp.netdev)
 		return 0;
 	return register_netdev(nn->dp.netdev);
+
+err_clean_mbox:
+	nfp_ccm_mbox_clean(nn);
+	return err;
 }
 
 /**
@@ -3926,5 +4188,6 @@
 		return;
 
 	unregister_netdev(nn->dp.netdev);
+	nfp_ccm_mbox_clean(nn);
 	nfp_net_reconfig_wait_posted(nn);
 }

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index 1f9149b..d835c14 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
 #include <linux/device.h>
@@ -71,8 +41,8 @@
 		data += 4;
 
 		if (length % NFP_NET_CFG_TLV_LENGTH_INC) {
-			dev_err(dev, "TLV size not multiple of %u len:%u\n",
-				NFP_NET_CFG_TLV_LENGTH_INC, length);
+			dev_err(dev, "TLV size not multiple of %u offset:%u len:%u\n",
+				NFP_NET_CFG_TLV_LENGTH_INC, offset, length);
 			return -EINVAL;
 		}
 		if (data + length > end) {
@@ -91,14 +61,14 @@
 			if (!length)
 				return 0;
 
-			dev_err(dev, "END TLV should be empty, has len:%d\n",
-				length);
+			dev_err(dev, "END TLV should be empty, has offset:%u len:%d\n",
+				offset, length);
 			return -EINVAL;
 		case NFP_NET_CFG_TLV_TYPE_ME_FREQ:
 			if (length != 4) {
 				dev_err(dev,
-					"ME FREQ TLV should be 4B, is %dB\n",
-					length);
+					"ME FREQ TLV should be 4B, is %dB offset:%u\n",
+					length, offset);
 				return -EINVAL;
 			}
 
@@ -113,6 +83,37 @@
 				caps->mbox_len = length;
 			}
 			break;
+		case NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0:
+		case NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1:
+			dev_warn(dev,
+				 "experimental TLV type:%u offset:%u len:%u\n",
+				 FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr),
+				 offset, length);
+			break;
+		case NFP_NET_CFG_TLV_TYPE_REPR_CAP:
+			if (length < 4) {
+				dev_err(dev, "REPR CAP TLV short %dB < 4B offset:%u\n",
+					length, offset);
+				return -EINVAL;
+			}
+
+			caps->repr_cap = readl(data);
+			break;
+		case NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+			if (length >= 4)
+				caps->mbox_cmsg_types = readl(data);
+			break;
+		case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+			if (length < 32) {
+				dev_err(dev,
+					"CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
+					length, offset);
+				return -EINVAL;
+			}
+
+			caps->crypto_ops = readl(data);
+			caps->crypto_enable_off = data - ctrl_mem + 16;
+			break;
 		default:
 			if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
 				break;

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 44d3ea7..ee6b24e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net_ctrl.h
@@ -74,6 +44,7 @@
 #define NFP_NET_META_MARK		2
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
+#define NFP_NET_META_CONN_HANDLE	7
 
 #define NFP_META_PORT_ID_CTRL		~0U
 
@@ -134,8 +105,6 @@
 #define   NFP_NET_CFG_CTRL_RINGPRIO	  (0x1 << 19) /* Ring priorities */
 #define   NFP_NET_CFG_CTRL_MSIXAUTO	  (0x1 << 20) /* MSI-X auto-masking */
 #define   NFP_NET_CFG_CTRL_TXRWB	  (0x1 << 21) /* Write-back of TX ring*/
-#define   NFP_NET_CFG_CTRL_L2SWITCH	  (0x1 << 22) /* L2 Switch */
-#define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
 #define   NFP_NET_CFG_CTRL_BPF		  (0x1 << 27) /* BPF offload capable */
@@ -160,7 +129,6 @@
 #define   NFP_NET_CFG_UPDATE_TXRPRIO	  (0x1 <<  3) /* TX Ring prio change */
 #define   NFP_NET_CFG_UPDATE_RXRPRIO	  (0x1 <<  4) /* RX Ring prio change */
 #define   NFP_NET_CFG_UPDATE_MSIX	  (0x1 <<  5) /* MSI-X change */
-#define   NFP_NET_CFG_UPDATE_L2SWITCH	  (0x1 <<  6) /* Switch changes */
 #define   NFP_NET_CFG_UPDATE_RESET	  (0x1 <<  7) /* Update due to FLR */
 #define   NFP_NET_CFG_UPDATE_IRQMOD	  (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
@@ -168,6 +136,7 @@
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
+#define   NFP_NET_CFG_UPDATE_CRYPTO	  (0x1 << 14) /* Crypto on/off */
 #define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE		0x0008
 #define NFP_NET_CFG_RXRS_ENABLE		0x0010
@@ -264,7 +233,6 @@
  * %NFP_NET_CFG_BPF_ADDR:	DMA address of the buffer with JITed BPF code
  */
 #define NFP_NET_CFG_BPF_ABI		0x0080
-#define   NFP_NET_BPF_ABI		2
 #define NFP_NET_CFG_BPF_CAP		0x0081
 #define   NFP_NET_BPF_CAP_RELO		(1 << 0) /* seamless reload */
 #define NFP_NET_CFG_BPF_MAX_LEN		0x0082
@@ -423,11 +391,13 @@
 #define NFP_NET_CFG_MBOX_SIMPLE_CMD	0x0
 #define NFP_NET_CFG_MBOX_SIMPLE_RET	0x4
 #define NFP_NET_CFG_MBOX_SIMPLE_VAL	0x8
-#define NFP_NET_CFG_MBOX_SIMPLE_LEN	0x12
 
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
 
+#define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET	5
+#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG			6
+
 /**
  * VLAN filtering using general use mailbox
  * %NFP_NET_CFG_VLAN_FILTER:		Base address of VLAN filter mailbox
@@ -489,12 +459,37 @@
  * %NFP_NET_CFG_TLV_TYPE_MBOX:
  * Variable, mailbox area.  Overwrites the default location which is
  * %NFP_NET_CFG_MBOX_BASE and length %NFP_NET_CFG_MBOX_VAL_MAX_SZ.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0:
+ * %NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1:
+ * Variable, experimental IDs.  IDs designated for internal development and
+ * experiments before a stable TLV ID has been allocated to a feature.  Should
+ * never be present in production firmware.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_REPR_CAP:
+ * Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which
+ * can be used on representors.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES:
+ * Variable, bitmap of control message types supported by the mailbox handler.
+ * Bit 0 corresponds to message type 0, bit 1 to 1, etc.  Control messages are
+ * encapsulated into simple TLVs, with an end TLV and written to the Mailbox.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
+ * 8 words, bitmaps of supported and enabled crypto operations.
+ * First 16B (4 words) contains a bitmap of supported crypto operations,
+ * and next 16B contain the enabled operations.
  */
 #define NFP_NET_CFG_TLV_TYPE_UNKNOWN		0
 #define NFP_NET_CFG_TLV_TYPE_RESERVED		1
 #define NFP_NET_CFG_TLV_TYPE_END		2
 #define NFP_NET_CFG_TLV_TYPE_ME_FREQ		3
 #define NFP_NET_CFG_TLV_TYPE_MBOX		4
+#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0	5
+#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1	6
+#define NFP_NET_CFG_TLV_TYPE_REPR_CAP		7
+#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES	10
+#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS		11 /* see crypto/fw.h */
 
 struct device;
 
@@ -503,19 +498,21 @@
  * @me_freq_mhz:	ME clock_freq (MHz)
  * @mbox_off:		vNIC mailbox area offset
  * @mbox_len:		vNIC mailbox area length
+ * @repr_cap:		capabilities for representors
+ * @mbox_cmsg_types:	cmsgs which can be passed through the mailbox
+ * @crypto_ops:		supported crypto operations
+ * @crypto_enable_off:	offset of crypto ops enable region
  */
 struct nfp_net_tlv_caps {
 	u32 me_freq_mhz;
 	unsigned int mbox_off;
 	unsigned int mbox_len;
+	u32 repr_cap;
+	u32 mbox_cmsg_types;
+	u32 crypto_ops;
+	unsigned int crypto_enable_off;
 };
 
 int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
 			   struct nfp_net_tlv_caps *caps);
-
-static inline bool nfp_net_has_mbox(struct nfp_net_tlv_caps *caps)
-{
-	return caps->mbox_len >= NFP_NET_CFG_MBOX_SIMPLE_LEN;
-}
-
 #endif /* _NFP_NET_CTRL_H_ */

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
index bb8ed46..769ceef 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/ethtool.h>
 #include <linux/vmalloc.h>
@@ -188,25 +158,21 @@
 	const struct nfp_rtsym *specsym;
 	struct nfp_dumpspec *dumpspec;
 	int bytes_read;
-	u32 cpp_id;
+	u64 sym_size;
 
 	specsym = nfp_rtsym_lookup(rtbl, NFP_DUMP_SPEC_RTSYM);
 	if (!specsym)
 		return NULL;
+	sym_size = nfp_rtsym_size(specsym);
 
 	/* expected size of this buffer is in the order of tens of kilobytes */
-	dumpspec = vmalloc(sizeof(*dumpspec) + specsym->size);
+	dumpspec = vmalloc(sizeof(*dumpspec) + sym_size);
 	if (!dumpspec)
 		return NULL;
+	dumpspec->size = sym_size;
 
-	dumpspec->size = specsym->size;
-
-	cpp_id = NFP_CPP_ISLAND_ID(specsym->target, NFP_CPP_ACTION_RW, 0,
-				   specsym->domain);
-
-	bytes_read = nfp_cpp_read(cpp, cpp_id, specsym->addr, dumpspec->data,
-				  specsym->size);
-	if (bytes_read != specsym->size) {
+	bytes_read = nfp_rtsym_read(cpp, specsym, 0, dumpspec->data, sym_size);
+	if (bytes_read != sym_size) {
 		vfree(dumpspec);
 		nfp_warn(cpp, "Debug dump specification read failed.\n");
 		return NULL;
@@ -266,7 +232,6 @@
 	struct nfp_dumpspec_rtsym *spec_rtsym;
 	const struct nfp_rtsym *sym;
 	u32 tl_len, key_len;
-	u32 size;
 
 	spec_rtsym = (struct nfp_dumpspec_rtsym *)spec;
 	tl_len = be32_to_cpu(spec->length);
@@ -278,13 +243,8 @@
 	if (!sym)
 		return nfp_dump_error_tlv_size(spec);
 
-	if (sym->type == NFP_RTSYM_TYPE_ABS)
-		size = sizeof(sym->addr);
-	else
-		size = sym->size;
-
 	return ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1) +
-	       ALIGN8(size);
+	       ALIGN8(nfp_rtsym_size(sym));
 }
 
 static int
@@ -644,7 +604,6 @@
 	const struct nfp_rtsym *sym;
 	u32 tl_len, key_len;
 	int bytes_read;
-	u32 cpp_id;
 	void *dest;
 	int err;
 
@@ -657,11 +616,7 @@
 	if (!sym)
 		return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump);
 
-	if (sym->type == NFP_RTSYM_TYPE_ABS)
-		sym_size = sizeof(sym->addr);
-	else
-		sym_size = sym->size;
-
+	sym_size = nfp_rtsym_size(sym);
 	header_size =
 		ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1);
 	total_size = header_size + ALIGN8(sym_size);
@@ -676,23 +631,20 @@
 	memcpy(dump_header->rtsym, spec->rtsym, key_len + 1);
 	dump_header->cpp.dump_length = cpu_to_be32(sym_size);
 
-	if (sym->type == NFP_RTSYM_TYPE_ABS) {
-		*(u64 *)dest = sym->addr;
-	} else {
+	if (sym->type != NFP_RTSYM_TYPE_ABS) {
 		cpp_params.target = sym->target;
 		cpp_params.action = NFP_CPP_ACTION_RW;
 		cpp_params.token  = 0;
 		cpp_params.island = sym->domain;
-		cpp_id = nfp_get_numeric_cpp_id(&cpp_params);
 		dump_header->cpp.cpp_id = cpp_params;
 		dump_header->cpp.offset = cpu_to_be32(sym->addr);
-		bytes_read = nfp_cpp_read(pf->cpp, cpp_id, sym->addr, dest,
-					  sym_size);
-		if (bytes_read != sym_size) {
-			if (bytes_read >= 0)
-				bytes_read = -EIO;
-			dump_header->error = cpu_to_be32(bytes_read);
-		}
+	}
+
+	bytes_read = nfp_rtsym_read(pf->cpp, sym, 0, dest, sym_size);
+	if (bytes_read != sym_size) {
+		if (bytes_read >= 0)
+			bytes_read = -EIO;
+		dump_header->error = cpu_to_be32(bytes_read);
 	}
 
 	return 0;

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index 099b63d..553c708 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 #include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
@@ -38,7 +8,7 @@
 
 static struct dentry *nfp_dir;
 
-static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
+static int nfp_rx_q_show(struct seq_file *file, void *data)
 {
 	struct nfp_net_r_vector *r_vec = file->private;
 	struct nfp_net_rx_ring *rx_ring;
@@ -95,31 +65,12 @@
 	rtnl_unlock();
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(nfp_rx_q);
 
-static int nfp_net_debugfs_rx_q_open(struct inode *inode, struct file *f)
-{
-	return single_open(f, nfp_net_debugfs_rx_q_read, inode->i_private);
-}
+static int nfp_tx_q_show(struct seq_file *file, void *data);
+DEFINE_SHOW_ATTRIBUTE(nfp_tx_q);
 
-static const struct file_operations nfp_rx_q_fops = {
-	.owner = THIS_MODULE,
-	.open = nfp_net_debugfs_rx_q_open,
-	.release = single_release,
-	.read = seq_read,
-	.llseek = seq_lseek
-};
-
-static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f);
-
-static const struct file_operations nfp_tx_q_fops = {
-	.owner = THIS_MODULE,
-	.open = nfp_net_debugfs_tx_q_open,
-	.release = single_release,
-	.read = seq_read,
-	.llseek = seq_lseek
-};
-
-static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
+static int nfp_tx_q_show(struct seq_file *file, void *data)
 {
 	struct nfp_net_r_vector *r_vec = file->private;
 	struct nfp_net_tx_ring *tx_ring;
@@ -188,18 +139,11 @@
 	return 0;
 }
 
-static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f)
+static int nfp_xdp_q_show(struct seq_file *file, void *data)
 {
-	return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private);
+	return nfp_tx_q_show(file, data);
 }
-
-static const struct file_operations nfp_xdp_q_fops = {
-	.owner = THIS_MODULE,
-	.open = nfp_net_debugfs_tx_q_open,
-	.release = single_release,
-	.read = seq_read,
-	.llseek = seq_lseek
-};
+DEFINE_SHOW_ATTRIBUTE(nfp_xdp_q);
 
 void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir)
 {
@@ -215,19 +159,13 @@
 	else
 		strcpy(name, "ctrl-vnic");
 	nn->debugfs_dir = debugfs_create_dir(name, ddir);
-	if (IS_ERR_OR_NULL(nn->debugfs_dir))
-		return;
 
 	/* Create queue debugging sub-tree */
 	queues = debugfs_create_dir("queue", nn->debugfs_dir);
-	if (IS_ERR_OR_NULL(queues))
-		return;
 
 	rx = debugfs_create_dir("rx", queues);
 	tx = debugfs_create_dir("tx", queues);
 	xdp = debugfs_create_dir("xdp", queues);
-	if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp))
-		return;
 
 	for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
 		sprintf(name, "%d", i);
@@ -246,16 +184,7 @@
 
 struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev)
 {
-	struct dentry *dev_dir;
-
-	if (IS_ERR_OR_NULL(nfp_dir))
-		return NULL;
-
-	dev_dir = debugfs_create_dir(pci_name(pdev), nfp_dir);
-	if (IS_ERR_OR_NULL(dev_dir))
-		return NULL;
-
-	return dev_dir;
+	return debugfs_create_dir(pci_name(pdev), nfp_dir);
 }
 
 void nfp_net_debugfs_dir_clean(struct dentry **dir)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 6a79c8e..1b840ee 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net_ethtool.c
@@ -48,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/ethtool.h>
 #include <linux/firmware.h>
+#include <linux/sfp.h>
 
 #include "nfpcore/nfp.h"
 #include "nfpcore/nfp_nsp.h"
@@ -179,8 +150,11 @@
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
-#define NN_RVEC_GATHER_STATS	9
+#define NN_RVEC_GATHER_STATS	13
 #define NN_RVEC_PER_Q_STATS	3
+#define NN_CTRL_PATH_STATS	1
+
+#define SFP_SFF_REV_COMPLIANCE	1
 
 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version)
 {
@@ -450,7 +424,8 @@
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS;
+	return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS +
+		NN_CTRL_PATH_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
@@ -469,10 +444,16 @@
 	data = nfp_pr_et(data, "hw_rx_csum_complete");
 	data = nfp_pr_et(data, "hw_rx_csum_err");
 	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
+	data = nfp_pr_et(data, "rx_tls_decrypted_packets");
 	data = nfp_pr_et(data, "hw_tx_csum");
 	data = nfp_pr_et(data, "hw_tx_inner_csum");
 	data = nfp_pr_et(data, "tx_gather");
 	data = nfp_pr_et(data, "tx_lso");
+	data = nfp_pr_et(data, "tx_tls_encrypted_packets");
+	data = nfp_pr_et(data, "tx_tls_ooo");
+	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
+
+	data = nfp_pr_et(data, "hw_tls_no_space");
 
 	return data;
 }
@@ -495,16 +476,20 @@
 			tmp[2] = nn->r_vecs[i].hw_csum_rx_complete;
 			tmp[3] = nn->r_vecs[i].hw_csum_rx_error;
 			tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail;
+			tmp[5] = nn->r_vecs[i].hw_tls_rx;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start));
 
 		do {
 			start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync);
 			data[1] = nn->r_vecs[i].tx_pkts;
 			data[2] = nn->r_vecs[i].tx_busy;
-			tmp[5] = nn->r_vecs[i].hw_csum_tx;
-			tmp[6] = nn->r_vecs[i].hw_csum_tx_inner;
-			tmp[7] = nn->r_vecs[i].tx_gather;
-			tmp[8] = nn->r_vecs[i].tx_lso;
+			tmp[6] = nn->r_vecs[i].hw_csum_tx;
+			tmp[7] = nn->r_vecs[i].hw_csum_tx_inner;
+			tmp[8] = nn->r_vecs[i].tx_gather;
+			tmp[9] = nn->r_vecs[i].tx_lso;
+			tmp[10] = nn->r_vecs[i].hw_tls_tx;
+			tmp[11] = nn->r_vecs[i].tls_tx_fallback;
+			tmp[12] = nn->r_vecs[i].tls_tx_no_fallback;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
 
 		data += NN_RVEC_PER_Q_STATS;
@@ -516,6 +501,8 @@
 	for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
 		*data++ = gathered_stats[j];
 
+	*data++ = atomic_read(&nn->ktls_no_space);
+
 	return data;
 }
 
@@ -1126,6 +1113,130 @@
 					    buffer);
 }
 
+static int
+nfp_port_get_module_info(struct net_device *netdev,
+			 struct ethtool_modinfo *modinfo)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+	unsigned int read_len;
+	struct nfp_nsp *nsp;
+	int err = 0;
+	u8 data;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return -EOPNOTSUPP;
+
+	nsp = nfp_nsp_open(port->app->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		netdev_err(netdev, "Failed to access the NSP: %d\n", err);
+		return err;
+	}
+
+	if (!nfp_nsp_has_read_module_eeprom(nsp)) {
+		netdev_info(netdev, "reading module EEPROM not supported. Please update flash\n");
+		err = -EOPNOTSUPP;
+		goto exit_close_nsp;
+	}
+
+	switch (eth_port->interface) {
+	case NFP_INTERFACE_SFP:
+	case NFP_INTERFACE_SFP28:
+		err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index,
+						 SFP_SFF8472_COMPLIANCE, &data,
+						 1, &read_len);
+		if (err < 0)
+			goto exit_close_nsp;
+
+		if (!data) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+	case NFP_INTERFACE_QSFP:
+		err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index,
+						 SFP_SFF_REV_COMPLIANCE, &data,
+						 1, &read_len);
+		if (err < 0)
+			goto exit_close_nsp;
+
+		if (data < 0x3) {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+		}
+		break;
+	case NFP_INTERFACE_QSFP28:
+		modinfo->type = ETH_MODULE_SFF_8636;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+		break;
+	default:
+		netdev_err(netdev, "Unsupported module 0x%x detected\n",
+			   eth_port->interface);
+		err = -EINVAL;
+	}
+
+exit_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
+static int
+nfp_port_get_module_eeprom(struct net_device *netdev,
+			   struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+	struct nfp_nsp *nsp;
+	int err;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = __nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return -EOPNOTSUPP;
+
+	nsp = nfp_nsp_open(port->app->cpp);
+	if (IS_ERR(nsp)) {
+		err = PTR_ERR(nsp);
+		netdev_err(netdev, "Failed to access the NSP: %d\n", err);
+		return err;
+	}
+
+	if (!nfp_nsp_has_read_module_eeprom(nsp)) {
+		netdev_info(netdev, "reading module EEPROM not supported. Please update flash\n");
+		err = -EOPNOTSUPP;
+		goto exit_close_nsp;
+	}
+
+	err = nfp_nsp_read_module_eeprom(nsp, eth_port->eth_index,
+					 eeprom->offset, data, eeprom->len,
+					 &eeprom->len);
+	if (err < 0) {
+		if (eeprom->len) {
+			netdev_warn(netdev,
+				    "Incomplete read from module EEPROM: %d\n",
+				     err);
+			err = 0;
+		} else {
+			netdev_err(netdev,
+				   "Reading from module EEPROM failed: %d\n",
+				   err);
+		}
+	}
+
+exit_close_nsp:
+	nfp_nsp_close(nsp);
+	return err;
+}
+
 static int nfp_net_set_coalesce(struct net_device *netdev,
 				struct ethtool_coalesce *ec)
 {
@@ -1264,57 +1375,6 @@
 	return nfp_net_set_num_rings(nn, total_rx, total_tx);
 }
 
-static int
-nfp_net_flash_device(struct net_device *netdev, struct ethtool_flash *flash)
-{
-	const struct firmware *fw;
-	struct nfp_app *app;
-	struct nfp_nsp *nsp;
-	struct device *dev;
-	int err;
-
-	if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
-		return -EOPNOTSUPP;
-
-	app = nfp_app_from_netdev(netdev);
-	if (!app)
-		return -EOPNOTSUPP;
-
-	dev = &app->pdev->dev;
-
-	nsp = nfp_nsp_open(app->cpp);
-	if (IS_ERR(nsp)) {
-		err = PTR_ERR(nsp);
-		dev_err(dev, "Failed to access the NSP: %d\n", err);
-		return err;
-	}
-
-	err = request_firmware_direct(&fw, flash->data, dev);
-	if (err)
-		goto exit_close_nsp;
-
-	dev_info(dev, "Please be patient while writing flash image: %s\n",
-		 flash->data);
-	dev_hold(netdev);
-	rtnl_unlock();
-
-	err = nfp_nsp_write_flash(nsp, fw);
-	if (err < 0) {
-		dev_err(dev, "Flash write failed: %d\n", err);
-		goto exit_rtnl_lock;
-	}
-	dev_info(dev, "Finished writing flash image\n");
-
-exit_rtnl_lock:
-	rtnl_lock();
-	dev_put(netdev);
-	release_firmware(fw);
-
-exit_close_nsp:
-	nfp_nsp_close(nsp);
-	return err;
-}
-
 static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.get_drvinfo		= nfp_net_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -1325,7 +1385,6 @@
 	.get_sset_count		= nfp_net_get_sset_count,
 	.get_rxnfc		= nfp_net_get_rxnfc,
 	.set_rxnfc		= nfp_net_set_rxnfc,
-	.flash_device		= nfp_net_flash_device,
 	.get_rxfh_indir_size	= nfp_net_get_rxfh_indir_size,
 	.get_rxfh_key_size	= nfp_net_get_rxfh_key_size,
 	.get_rxfh		= nfp_net_get_rxfh,
@@ -1335,6 +1394,8 @@
 	.set_dump		= nfp_app_set_dump,
 	.get_dump_flag		= nfp_app_get_dump_flag,
 	.get_dump_data		= nfp_app_get_dump_data,
+	.get_module_info	= nfp_port_get_module_info,
+	.get_module_eeprom	= nfp_port_get_module_eeprom,
 	.get_coalesce           = nfp_net_get_coalesce,
 	.set_coalesce           = nfp_net_set_coalesce,
 	.get_channels		= nfp_net_get_channels,
@@ -1351,10 +1412,11 @@
 	.get_strings		= nfp_port_get_strings,
 	.get_ethtool_stats	= nfp_port_get_stats,
 	.get_sset_count		= nfp_port_get_sset_count,
-	.flash_device		= nfp_net_flash_device,
 	.set_dump		= nfp_app_set_dump,
 	.get_dump_flag		= nfp_app_get_dump_flag,
 	.get_dump_data		= nfp_app_get_dump_data,
+	.get_module_info	= nfp_port_get_module_info,
+	.get_module_eeprom	= nfp_port_get_module_eeprom,
 	.get_link_ksettings	= nfp_net_get_link_ksettings,
 	.set_link_ksettings	= nfp_net_set_link_ksettings,
 	.get_fecparam		= nfp_port_get_fecparam,

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 28516ee..921db40 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_net_main.c
@@ -146,13 +116,13 @@
 	n_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS);
 
 	/* Allocate and initialise the vNIC */
-	nn = nfp_net_alloc(pf->pdev, needs_netdev, n_tx_rings, n_rx_rings);
+	nn = nfp_net_alloc(pf->pdev, ctrl_bar, needs_netdev,
+			   n_tx_rings, n_rx_rings);
 	if (IS_ERR(nn))
 		return nn;
 
 	nn->app = pf->app;
 	nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar);
-	nn->dp.ctrl_bar = ctrl_bar;
 	nn->tx_bar = qc_bar + tx_base * NFP_QCP_QUEUE_ADDR_SZ;
 	nn->rx_bar = qc_bar + rx_base * NFP_QCP_QUEUE_ADDR_SZ;
 	nn->dp.is_vf = 0;
@@ -180,34 +150,39 @@
 
 	nn->id = id;
 
-	err = nfp_net_init(nn);
-	if (err)
-		return err;
-
-	nfp_net_debugfs_vnic_add(nn, pf->ddir);
-
 	if (nn->port) {
 		err = nfp_devlink_port_register(pf->app, nn->port);
 		if (err)
-			goto err_dfs_clean;
+			return err;
 	}
 
+	err = nfp_net_init(nn);
+	if (err)
+		goto err_devlink_port_clean;
+
+	nfp_net_debugfs_vnic_add(nn, pf->ddir);
+
+	if (nn->port)
+		nfp_devlink_port_type_eth_set(nn->port);
+
 	nfp_net_info(nn);
 
 	if (nfp_net_is_data_vnic(nn)) {
 		err = nfp_app_vnic_init(pf->app, nn);
 		if (err)
-			goto err_devlink_port_clean;
+			goto err_devlink_port_type_clean;
 	}
 
 	return 0;
 
+err_devlink_port_type_clean:
+	if (nn->port)
+		nfp_devlink_port_type_clear(nn->port);
+	nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
+	nfp_net_clean(nn);
 err_devlink_port_clean:
 	if (nn->port)
 		nfp_devlink_port_unregister(nn->port);
-err_dfs_clean:
-	nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
-	nfp_net_clean(nn);
 	return err;
 }
 
@@ -230,10 +205,8 @@
 		ctrl_bar += NFP_PF_CSR_SLICE_SIZE;
 
 		/* Kill the vNIC if app init marked it as invalid */
-		if (nn->port && nn->port->type == NFP_PORT_INVALID) {
+		if (nn->port && nn->port->type == NFP_PORT_INVALID)
 			nfp_net_pf_free_vnic(pf, nn);
-			continue;
-		}
 	}
 
 	if (list_empty(&pf->vnics))
@@ -251,9 +224,11 @@
 	if (nfp_net_is_data_vnic(nn))
 		nfp_app_vnic_clean(pf->app, nn);
 	if (nn->port)
-		nfp_devlink_port_unregister(nn->port);
+		nfp_devlink_port_type_clear(nn->port);
 	nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
 	nfp_net_clean(nn);
+	if (nn->port)
+		nfp_devlink_port_unregister(nn->port);
 }
 
 static int nfp_net_pf_alloc_irqs(struct nfp_pf *pf)
@@ -470,8 +445,8 @@
 
 static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 {
+	u32 min_size, cpp_id;
 	u8 __iomem *mem;
-	u32 min_size;
 	int err;
 
 	min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
@@ -519,9 +494,9 @@
 		pf->vfcfg_tbl2 = NULL;
 	}
 
-	mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
-			       NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
-			       &pf->qc_area);
+	cpp_id = NFP_CPP_ISLAND_ID(0, NFP_CPP_ACTION_RW, 0, 0);
+	mem = nfp_cpp_map_area(pf->cpp, "net.qc", cpp_id, NFP_PCIE_QUEUE(0),
+			       NFP_QCP_QUEUE_AREA_SZ, &pf->qc_area);
 	if (IS_ERR(mem)) {
 		nfp_err(pf->cpp, "Failed to map Queue Controller area.\n");
 		err = PTR_ERR(mem);
@@ -734,6 +709,10 @@
 	if (err)
 		goto err_devlink_unreg;
 
+	err = nfp_devlink_params_register(pf);
+	if (err)
+		goto err_shared_buf_unreg;
+
 	mutex_lock(&pf->lock);
 	pf->ddir = nfp_net_debugfs_device_add(pf->pdev);
 
@@ -767,6 +746,8 @@
 err_clean_ddir:
 	nfp_net_debugfs_dir_clean(&pf->ddir);
 	mutex_unlock(&pf->lock);
+	nfp_devlink_params_unregister(pf);
+err_shared_buf_unreg:
 	nfp_shared_buf_unregister(pf);
 err_devlink_unreg:
 	cancel_work_sync(&pf->port_refresh_work);
@@ -796,6 +777,7 @@
 
 	mutex_unlock(&pf->lock);
 
+	nfp_devlink_params_unregister(pf);
 	nfp_shared_buf_unregister(pf);
 	devlink_unregister(priv_to_devlink(pf));
 

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 18a09cd..79d72c8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c

@@ -1,46 +1,16 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/etherdevice.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/lockdep.h>
 #include <net/dst_metadata.h>
-#include <net/switchdev.h>
 
 #include "nfpcore/nfp_cpp.h"
 #include "nfpcore/nfp_nsp.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
+#include "nfp_net.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net_repr.h"
 #include "nfp_net_sriov.h"
@@ -225,7 +195,7 @@
 	ret = dev_queue_xmit(skb);
 	nfp_repr_inc_tx_stats(netdev, len, ret);
 
-	return ret;
+	return NETDEV_TX_OK;
 }
 
 static int nfp_repr_stop(struct net_device *netdev)
@@ -261,6 +231,27 @@
 	return err;
 }
 
+static netdev_features_t
+nfp_repr_fix_features(struct net_device *netdev, netdev_features_t features)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+	netdev_features_t old_features = features;
+	netdev_features_t lower_features;
+	struct net_device *lower_dev;
+
+	lower_dev = repr->dst->u.port_info.lower_dev;
+
+	lower_features = lower_dev->features;
+	if (lower_features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
+		lower_features |= NETIF_F_HW_CSUM;
+
+	features = netdev_intersect_features(features, lower_features);
+	features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_HW_TC);
+	features |= NETIF_F_LLTX;
+
+	return features;
+}
+
 const struct net_device_ops nfp_repr_netdev_ops = {
 	.ndo_init		= nfp_app_ndo_init,
 	.ndo_uninit		= nfp_app_ndo_uninit,
@@ -276,12 +267,30 @@
 	.ndo_set_vf_mac		= nfp_app_set_vf_mac,
 	.ndo_set_vf_vlan	= nfp_app_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= nfp_app_set_vf_spoofchk,
+	.ndo_set_vf_trust	= nfp_app_set_vf_trust,
 	.ndo_get_vf_config	= nfp_app_get_vf_config,
 	.ndo_set_vf_link_state	= nfp_app_set_vf_link_state,
+	.ndo_fix_features	= nfp_repr_fix_features,
 	.ndo_set_features	= nfp_port_set_features,
 	.ndo_set_mac_address    = eth_mac_addr,
+	.ndo_get_port_parent_id	= nfp_port_get_port_parent_id,
+	.ndo_get_devlink_port	= nfp_devlink_get_devlink_port,
 };
 
+void
+nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+
+	if (repr->dst->u.port_info.lower_dev != lower)
+		return;
+
+	netdev->gso_max_size = lower->gso_max_size;
+	netdev->gso_max_segs = lower->gso_max_segs;
+
+	netdev_update_features(netdev);
+}
+
 static void nfp_repr_clean(struct nfp_repr *repr)
 {
 	unregister_netdev(repr->netdev);
@@ -290,31 +299,15 @@
 	nfp_port_free(repr->port);
 }
 
-static struct lock_class_key nfp_repr_netdev_xmit_lock_key;
-static struct lock_class_key nfp_repr_netdev_addr_lock_key;
-
-static void nfp_repr_set_lockdep_class_one(struct net_device *dev,
-					   struct netdev_queue *txq,
-					   void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key);
-}
-
-static void nfp_repr_set_lockdep_class(struct net_device *dev)
-{
-	lockdep_set_class(&dev->addr_list_lock, &nfp_repr_netdev_addr_lock_key);
-	netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL);
-}
-
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 		  u32 cmsg_port_id, struct nfp_port *port,
 		  struct net_device *pf_netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	struct nfp_net *nn = netdev_priv(pf_netdev);
+	u32 repr_cap = nn->tlv_caps.repr_cap;
 	int err;
 
-	nfp_repr_set_lockdep_class(netdev);
-
 	repr->port = port;
 	repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
 	if (!repr->dst)
@@ -327,7 +320,54 @@
 
 	netdev->max_mtu = pf_netdev->max_mtu;
 
-	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+	/* Set features the lower device can support with representors */
+	if (repr_cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
+		netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+	netdev->hw_features = NETIF_F_HIGHDMA;
+	if (repr_cap & NFP_NET_CFG_CTRL_RXCSUM_ANY)
+		netdev->hw_features |= NETIF_F_RXCSUM;
+	if (repr_cap & NFP_NET_CFG_CTRL_TXCSUM)
+		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	if (repr_cap & NFP_NET_CFG_CTRL_GATHER)
+		netdev->hw_features |= NETIF_F_SG;
+	if ((repr_cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
+	    repr_cap & NFP_NET_CFG_CTRL_LSO2)
+		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
+	if (repr_cap & NFP_NET_CFG_CTRL_RSS_ANY)
+		netdev->hw_features |= NETIF_F_RXHASH;
+	if (repr_cap & NFP_NET_CFG_CTRL_VXLAN) {
+		if (repr_cap & NFP_NET_CFG_CTRL_LSO)
+			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+	}
+	if (repr_cap & NFP_NET_CFG_CTRL_NVGRE) {
+		if (repr_cap & NFP_NET_CFG_CTRL_LSO)
+			netdev->hw_features |= NETIF_F_GSO_GRE;
+	}
+	if (repr_cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
+		netdev->hw_enc_features = netdev->hw_features;
+
+	netdev->vlan_features = netdev->hw_features;
+
+	if (repr_cap & NFP_NET_CFG_CTRL_RXVLAN)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+	if (repr_cap & NFP_NET_CFG_CTRL_TXVLAN) {
+		if (repr_cap & NFP_NET_CFG_CTRL_LSO2)
+			netdev_warn(netdev, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
+		else
+			netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+	}
+	if (repr_cap & NFP_NET_CFG_CTRL_CTAG_FILTER)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	netdev->features = netdev->hw_features;
+
+	/* Advertise but disable TSO by default. */
+	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
+	netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
+
+	netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
+	netdev->features |= NETIF_F_LLTX;
 
 	if (nfp_app_has_tc(app)) {
 		netdev->features |= NETIF_F_HW_TC;
@@ -472,7 +512,9 @@
 			continue;
 
 		nfp_app_repr_preclean(app, netdev);
+		rtnl_lock();
 		rcu_assign_pointer(reprs->reprs[i], NULL);
+		rtnl_unlock();
 		synchronize_rcu();
 		nfp_repr_clean(repr);
 	}

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index 1bf2b18..e0f13df 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef NFP_NET_REPR_H
 #define NFP_NET_REPR_H
@@ -122,6 +92,8 @@
 		    unsigned int id);
 
 void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
+void
+nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower);
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 		  u32 cmsg_port_id, struct nfp_port *port,
 		  struct net_device *pf_netdev);

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
index 8b1b962..3fdaaf8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2019 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
 #include <linux/errno.h>
@@ -176,6 +146,30 @@
 				    "spoofchk");
 }
 
+int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool enable)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	u8 vf_ctrl;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_TRUST,
+				  "trust");
+	if (err)
+		return err;
+
+	/* Write trust control bit to VF entry in VF config symbol */
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ +
+		NFP_NET_VF_CFG_CTRL;
+	vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset);
+	vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_TRUST;
+	vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_TRUST, enable);
+	writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset);
+
+	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_TRUST,
+				    "trust");
+}
+
 int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
 			      int link_state)
 {
@@ -243,6 +237,7 @@
 	ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tci);
 
 	ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags);
+	ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags);
 	ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags);
 
 	return 0;

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
index e9df9d1..a3db0cb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2019 Netronome Systems, Inc. */
 
 #ifndef _NFP_NET_SRIOV_H_
 #define _NFP_NET_SRIOV_H_
@@ -49,12 +19,14 @@
 #define   NFP_NET_VF_CFG_MB_CAP_VLAN			  (0x1 << 1)
 #define   NFP_NET_VF_CFG_MB_CAP_SPOOF			  (0x1 << 2)
 #define   NFP_NET_VF_CFG_MB_CAP_LINK_STATE		  (0x1 << 3)
+#define   NFP_NET_VF_CFG_MB_CAP_TRUST			  (0x1 << 4)
 #define NFP_NET_VF_CFG_MB_RET				0x2
 #define NFP_NET_VF_CFG_MB_UPD				0x4
 #define   NFP_NET_VF_CFG_MB_UPD_MAC			  (0x1 << 0)
 #define   NFP_NET_VF_CFG_MB_UPD_VLAN			  (0x1 << 1)
 #define   NFP_NET_VF_CFG_MB_UPD_SPOOF			  (0x1 << 2)
 #define   NFP_NET_VF_CFG_MB_UPD_LINK_STATE		  (0x1 << 3)
+#define   NFP_NET_VF_CFG_MB_UPD_TRUST			  (0x1 << 4)
 #define NFP_NET_VF_CFG_MB_VF_NUM			0x7
 
 /* VF config entry
@@ -65,6 +37,7 @@
 #define   NFP_NET_VF_CFG_MAC_HI				  0x0
 #define   NFP_NET_VF_CFG_MAC_LO				  0x6
 #define NFP_NET_VF_CFG_CTRL				0x4
+#define   NFP_NET_VF_CFG_CTRL_TRUST			  0x8
 #define   NFP_NET_VF_CFG_CTRL_SPOOF			  0x4
 #define   NFP_NET_VF_CFG_CTRL_LINK_STATE		  0x3
 #define     NFP_NET_VF_CFG_LS_MODE_AUTO			    0
@@ -78,6 +51,7 @@
 int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 			__be16 vlan_proto);
 int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool setting);
 int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
 			      int link_state);
 int nfp_app_get_vf_config(struct net_device *netdev, int vf,

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index 68928c8..e4977cd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_netvf_main.c
@@ -202,7 +172,7 @@
 	rx_bar_off = NFP_PCIE_QUEUE(startq);
 
 	/* Allocate and initialise the netdev */
-	nn = nfp_net_alloc(pdev, true, max_tx_rings, max_rx_rings);
+	nn = nfp_net_alloc(pdev, ctrl_bar, true, max_tx_rings, max_rx_rings);
 	if (IS_ERR(nn)) {
 		err = PTR_ERR(nn);
 		goto err_ctrl_unmap;
@@ -210,7 +180,6 @@
 	vf->nn = nn;
 
 	nn->fw_ver = fw_ver;
-	nn->dp.ctrl_bar = ctrl_bar;
 	nn->dp.is_vf = 1;
 	nn->stride_tx = stride;
 	nn->stride_rx = stride;
@@ -313,8 +282,14 @@
 
 static void nfp_netvf_pci_remove(struct pci_dev *pdev)
 {
-	struct nfp_net_vf *vf = pci_get_drvdata(pdev);
-	struct nfp_net *nn = vf->nn;
+	struct nfp_net_vf *vf;
+	struct nfp_net *nn;
+
+	vf = pci_get_drvdata(pdev);
+	if (!vf)
+		return;
+
+	nn = vf->nn;
 
 	/* Note, the order is slightly different from above as we need
 	 * to keep the nn pointer around till we have freed everything.
@@ -348,4 +323,5 @@
 	.id_table    = nfp_netvf_pci_device_ids,
 	.probe       = nfp_netvf_pci_probe,
 	.remove      = nfp_netvf_pci_remove,
+	.shutdown    = nfp_netvf_pci_remove,
 };

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 9c12981..93c5bfc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c

@@ -1,39 +1,8 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
-#include <net/switchdev.h>
 
 #include "nfpcore/nfp_cpp.h"
 #include "nfpcore/nfp_nsp.h"
@@ -61,34 +30,22 @@
 	return NULL;
 }
 
-static int
-nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr)
+int nfp_port_get_port_parent_id(struct net_device *netdev,
+				struct netdev_phys_item_id *ppid)
 {
 	struct nfp_port *port;
+	const u8 *serial;
 
 	port = nfp_port_from_netdev(netdev);
 	if (!port)
 		return -EOPNOTSUPP;
 
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: {
-		const u8 *serial;
-		/* N.B: attr->u.ppid.id is binary data */
-		attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial);
-		memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len);
-		break;
-	}
-	default:
-		return -EOPNOTSUPP;
-	}
+	ppid->id_len = nfp_cpp_serial(port->app->cpp, &serial);
+	memcpy(&ppid->id, serial, ppid->id_len);
 
 	return 0;
 }
 
-const struct switchdev_ops nfp_port_switchdev_ops = {
-	.switchdev_port_attr_get	= nfp_port_attr_get,
-};
-
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 		      void *type_data)
 {

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index 51f10ae..d7fd203 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #ifndef _NFP_PORT_H_
 #define _NFP_PORT_H_
@@ -37,6 +7,7 @@
 #include <net/devlink.h>
 
 struct net_device;
+struct netdev_phys_item_id;
 struct nfp_app;
 struct nfp_pf;
 struct nfp_port;
@@ -120,7 +91,6 @@
 };
 
 extern const struct ethtool_ops nfp_port_ethtool_ops;
-extern const struct switchdev_ops nfp_port_switchdev_ops;
 
 __printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...);
 
@@ -136,6 +106,8 @@
 nfp_port_set_features(struct net_device *netdev, netdev_features_t features);
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
+int nfp_port_get_port_parent_id(struct net_device *netdev,
+				struct netdev_phys_item_id *ppid);
 struct nfp_port *
 nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id);
 struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port);
@@ -159,6 +131,8 @@
 
 int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port);
 void nfp_devlink_port_unregister(struct nfp_port *port);
+void nfp_devlink_port_type_eth_set(struct nfp_port *port);
+void nfp_devlink_port_type_clear(struct nfp_port *port);
 
 /**
  * Mac stats (0x0000 - 0x0200)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
index 0ecd837..ea2e3f8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c

@@ -1,36 +1,5 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (C) 2018 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #include <linux/kernel.h>
 #include <net/devlink.h>
@@ -79,6 +48,7 @@
 	pool_info->pool_type = le32_to_cpu(get_data.pool_type);
 	pool_info->threshold_type = le32_to_cpu(get_data.threshold_type);
 	pool_info->size = le32_to_cpu(get_data.size) * unit_size;
+	pool_info->cell_size = unit_size;
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h
index 6cee638..afab6f0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 #ifndef NFP_CRC32_H
 #define NFP_CRC32_H

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
index f44d0a8..db94b0b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp.h

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h
index 0e497a6..4a12133 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp6000.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 #ifndef NFP6000_NFP6000_H
 #define NFP6000_NFP6000_H

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h
index 40fb199..9a86ec1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/nfp_xpb.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /*
  * nfp_xpb.h

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index c8d0b10..85d46f2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp6000_pcie.c
@@ -138,6 +108,7 @@
 
 /* The number of explicit BARs to reserve.
  * Minimum is 0, maximum is 4 on the NFP6000.
+ * The NFP3800 can have only one per PF.
  */
 #define NFP_PCIE_EXPLICIT_BARS		2
 
@@ -589,8 +560,8 @@
 			NFP_PCIE_BAR_PCIE2CPP_MapType_EXPLICIT3),
 	};
 	char status_msg[196] = {};
+	int i, err, bars_free;
 	struct nfp_bar *bar;
-	int i, bars_free;
 	int expl_groups;
 	char *msg, *end;
 
@@ -643,6 +614,8 @@
 		bar->iomem = ioremap_nocache(nfp_bar_resource_start(bar),
 					     nfp_bar_resource_len(bar));
 	if (bar->iomem) {
+		int pf;
+
 		msg += snprintf(msg, end - msg,	"0.0: General/MSI-X SRAM, ");
 		atomic_inc(&bar->refcnt);
 		bars_free--;
@@ -651,22 +624,40 @@
 
 		nfp->expl.data = bar->iomem + NFP_PCIE_SRAM + 0x1000;
 
-		if (nfp->pdev->device == PCI_DEVICE_ID_NETRONOME_NFP4000 ||
-		    nfp->pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000) {
-			nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(0);
-		} else {
-			int pf = nfp->pdev->devfn & 7;
-
+		switch (nfp->pdev->device) {
+		case PCI_DEVICE_ID_NETRONOME_NFP3800:
+			pf = nfp->pdev->devfn & 7;
 			nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(pf);
+			break;
+		case PCI_DEVICE_ID_NETRONOME_NFP4000:
+		case PCI_DEVICE_ID_NETRONOME_NFP5000:
+		case PCI_DEVICE_ID_NETRONOME_NFP6000:
+			nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(0);
+			break;
+		default:
+			dev_err(nfp->dev, "Unsupported device ID: %04hx!\n",
+				nfp->pdev->device);
+			err = -EINVAL;
+			goto err_unmap_bar0;
 		}
 		nfp->iomem.em = bar->iomem + NFP_PCIE_EM;
 	}
 
-	if (nfp->pdev->device == PCI_DEVICE_ID_NETRONOME_NFP4000 ||
-	    nfp->pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000)
-		expl_groups = 4;
-	else
+	switch (nfp->pdev->device) {
+	case PCI_DEVICE_ID_NETRONOME_NFP3800:
 		expl_groups = 1;
+		break;
+	case PCI_DEVICE_ID_NETRONOME_NFP4000:
+	case PCI_DEVICE_ID_NETRONOME_NFP5000:
+	case PCI_DEVICE_ID_NETRONOME_NFP6000:
+		expl_groups = 4;
+		break;
+	default:
+		dev_err(nfp->dev, "Unsupported device ID: %04hx!\n",
+			nfp->pdev->device);
+		err = -EINVAL;
+		goto err_unmap_bar0;
+	}
 
 	/* Configure, and lock, BAR0.1 for PCIe XPB (MSI-X PBA) */
 	bar = &nfp->bar[1];
@@ -711,6 +702,11 @@
 	dev_info(nfp->dev, "%sfree: %d/%d\n", status_msg, bars_free, nfp->bars);
 
 	return 0;
+
+err_unmap_bar0:
+	if (nfp->bar[0].iomem)
+		iounmap(nfp->bar[0].iomem);
+	return err;
 }
 
 static void disable_bars(struct nfp6000_pcie *nfp)
@@ -1327,7 +1323,7 @@
 
 	/*  Finished with card initialization. */
 	dev_info(&pdev->dev,
-		 "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n");
+		 "Netronome Flow Processor NFP4000/NFP5000/NFP6000 PCIe Card Probe\n");
 	pcie_print_link_status(pdev);
 
 	nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h
index 245d8aa..6d1bffa 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /*
  * nfp6000_pcie.h

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h
index 31fe922..3d172e2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_arm.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /*
  * nfp_arm.h

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index c338d53..2dd0f58 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_cpp.h
@@ -56,9 +26,16 @@
 	dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
 #define nfp_dbg(cpp, fmt, args...) \
 	dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
+#define nfp_printk(level, cpp, fmt, args...) \
+	dev_printk(level, nfp_cpp_device(cpp)->parent,	\
+		   NFP_SUBSYS ": " fmt,	## args)
 
 #define PCI_64BIT_BAR_COUNT             3
 
+/* NFP hardware vendor/device ids.
+ */
+#define PCI_DEVICE_ID_NETRONOME_NFP3800	0x3800
+
 #define NFP_CPP_NUM_TARGETS             16
 /* Max size of area it should be safe to request */
 #define NFP_CPP_SAFE_AREA_SIZE		SZ_2M
@@ -226,6 +203,7 @@
 u32 nfp_cpp_model(struct nfp_cpp *cpp);
 u16 nfp_cpp_interface(struct nfp_cpp *cpp);
 int nfp_cpp_serial(struct nfp_cpp *cpp, const u8 **serial);
+unsigned int nfp_cpp_mu_locality_lsb(struct nfp_cpp *cpp);
 
 struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp,
 						  u32 cpp_id,
@@ -286,8 +264,8 @@
 		   unsigned long long address, u64 value);
 
 u8 __iomem *
-nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
-		 u64 addr, unsigned long size, struct nfp_cpp_area **area);
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, u32 cpp_id, u64 addr,
+		 unsigned long size, struct nfp_cpp_area **area);
 
 struct nfp_cpp_mutex;
 

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 73de57a..94994a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_cppcore.c
@@ -75,6 +45,7 @@
  * @interface:		chip interface id we are using to reach it
  * @serial:		chip serial number
  * @imb_cat_table:	CPP Mapping Table
+ * @mu_locality_lsb:	MU access type bit offset
  *
  * Following fields use explicit locking:
  * @resource_list:	NFP CPP resource list
@@ -100,6 +71,7 @@
 	wait_queue_head_t waitq;
 
 	u32 imb_cat_table[16];
+	unsigned int mu_locality_lsb;
 
 	struct mutex area_cache_mutex;
 	struct list_head area_cache_list;
@@ -266,6 +238,34 @@
 	return sizeof(cpp->serial);
 }
 
+#define NFP_IMB_TGTADDRESSMODECFG_MODE_of(_x)		(((_x) >> 13) & 0x7)
+#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE		BIT(12)
+#define   NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_32_BIT	0
+#define   NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_40_BIT	BIT(12)
+
+static int nfp_cpp_set_mu_locality_lsb(struct nfp_cpp *cpp)
+{
+	unsigned int mode, addr40;
+	u32 imbcppat;
+	int res;
+
+	imbcppat = cpp->imb_cat_table[NFP_CPP_TARGET_MU];
+	mode = NFP_IMB_TGTADDRESSMODECFG_MODE_of(imbcppat);
+	addr40 = !!(imbcppat & NFP_IMB_TGTADDRESSMODECFG_ADDRMODE);
+
+	res = nfp_cppat_mu_locality_lsb(mode, addr40);
+	if (res < 0)
+		return res;
+	cpp->mu_locality_lsb = res;
+
+	return 0;
+}
+
+unsigned int nfp_cpp_mu_locality_lsb(struct nfp_cpp *cpp)
+{
+	return cpp->mu_locality_lsb;
+}
+
 /**
  * nfp_cpp_area_alloc_with_name() - allocate a new CPP area
  * @cpp:	CPP device handle
@@ -1241,6 +1241,12 @@
 	nfp_cpp_readl(cpp, arm, NFP_ARM_GCSR + NFP_ARM_GCSR_SOFTMODEL3,
 		      &mask[1]);
 
+	err = nfp_cpp_set_mu_locality_lsb(cpp);
+	if (err < 0) {
+		dev_err(parent,	"Can't calculate MU locality bit offset\n");
+		goto err_out;
+	}
+
 	dev_info(cpp->dev.parent, "Model: 0x%08x, SN: %pM, Ifc: 0x%04x\n",
 		 nfp_cpp_model(cpp), cpp->serial, nfp_cpp_interface(cpp));
 

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
index 20bad05..85734c6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_cpplib.c
@@ -54,8 +24,9 @@
 /* NFP6000 PL */
 #define NFP_PL_DEVICE_ID			0x00000004
 #define   NFP_PL_DEVICE_ID_MASK			GENMASK(7, 0)
-
-#define NFP6000_ARM_GCSR_SOFTMODEL0		0x00400144
+#define   NFP_PL_DEVICE_PART_MASK		GENMASK(31, 16)
+#define NFP_PL_DEVICE_MODEL_MASK		(NFP_PL_DEVICE_PART_MASK | \
+						 NFP_PL_DEVICE_ID_MASK)
 
 /**
  * nfp_cpp_readl() - Read a u32 word from a CPP location
@@ -150,22 +121,17 @@
  */
 int nfp_cpp_model_autodetect(struct nfp_cpp *cpp, u32 *model)
 {
-	const u32 arm_id = NFP_CPP_ID(NFP_CPP_TARGET_ARM, 0, 0);
 	u32 reg;
 	int err;
 
-	err = nfp_cpp_readl(cpp, arm_id, NFP6000_ARM_GCSR_SOFTMODEL0, model);
-	if (err < 0)
-		return err;
-
-	/* The PL's PluDeviceID revision code is authoratative */
-	*model &= ~0xff;
 	err = nfp_xpb_readl(cpp, NFP_XPB_DEVICE(1, 1, 16) + NFP_PL_DEVICE_ID,
 			    &reg);
 	if (err < 0)
 		return err;
 
-	*model |= (NFP_PL_DEVICE_ID_MASK & reg) - 0x10;
+	*model = reg & NFP_PL_DEVICE_MODEL_MASK;
+	if (*model & NFP_PL_DEVICE_ID_MASK)
+		*model -= 0x10;
 
 	return 0;
 }
@@ -294,8 +260,7 @@
  * nfp_cpp_map_area() - Helper function to map an area
  * @cpp:    NFP CPP handler
  * @name:   Name for the area
- * @domain: CPP domain
- * @target: CPP target
+ * @cpp_id: CPP ID for operation
  * @addr:   CPP address
  * @size:   Size of the area
  * @area:   Area handle (output)
@@ -306,15 +271,12 @@
  * Return: Pointer to memory mapped area or ERR_PTR
  */
 u8 __iomem *
-nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
-		 u64 addr, unsigned long size, struct nfp_cpp_area **area)
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, u32 cpp_id, u64 addr,
+		 unsigned long size, struct nfp_cpp_area **area)
 {
 	u8 __iomem *res;
-	u32 dest;
 
-	dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain);
-
-	*area = nfp_cpp_area_alloc_acquire(cpp, name, dest, addr, size);
+	*area = nfp_cpp_area_alloc_acquire(cpp, name, cpp_id, addr, size);
 	if (!*area)
 		goto err_eio;
 

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c
index 063a9a6..f05dd34 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /* Parse the hwinfo table that the ARM firmware builds in the ARM scratch SRAM
  * after chip reset.

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c
index 5f193fe..79e1794 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mip.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /*
  * nfp_mip.c

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
index c88bf67..7bc17b9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 #include <linux/delay.h>
 #include <linux/device.h>

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
index 4051086..d4e0254 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_nffw.c
@@ -156,29 +126,6 @@
 	return (mip_off_hi & 0xFF) << 32 | le32_to_cpu(fi->mip_offset_lo);
 }
 
-#define NFP_IMB_TGTADDRESSMODECFG_MODE_of(_x)		(((_x) >> 13) & 0x7)
-#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE		BIT(12)
-#define   NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_32_BIT	0
-#define   NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_40_BIT	BIT(12)
-
-static int nfp_mip_mu_locality_lsb(struct nfp_cpp *cpp)
-{
-	unsigned int mode, addr40;
-	u32 xpbaddr, imbcppat;
-	int err;
-
-	/* Hardcoded XPB IMB Base, island 0 */
-	xpbaddr = 0x000a0000 + NFP_CPP_TARGET_MU * 4;
-	err = nfp_xpb_readl(cpp, xpbaddr, &imbcppat);
-	if (err < 0)
-		return err;
-
-	mode = NFP_IMB_TGTADDRESSMODECFG_MODE_of(imbcppat);
-	addr40 = !!(imbcppat & NFP_IMB_TGTADDRESSMODECFG_ADDRMODE);
-
-	return nfp_cppat_mu_locality_lsb(mode, addr40);
-}
-
 static unsigned int
 nffw_res_fwinfos(struct nfp_nffw_info_data *fwinf, struct nffw_fwinfo **arr)
 {
@@ -304,14 +251,7 @@
 	*off = nffw_fwinfo_mip_offset_get(fwinfo);
 
 	if (nffw_fwinfo_mip_mu_da_get(fwinfo)) {
-		int locality_off;
-
-		if (NFP_CPP_ID_TARGET_of(*cpp_id) != NFP_CPP_TARGET_MU)
-			return 0;
-
-		locality_off = nfp_mip_mu_locality_lsb(state->cpp);
-		if (locality_off < 0)
-			return locality_off;
+		int locality_off = nfp_cpp_mu_locality_lsb(state->cpp);
 
 		*off &= ~(NFP_MU_ADDR_ACCESS_TYPE_MASK << locality_off);
 		*off |= NFP_MU_ADDR_ACCESS_TYPE_DIRECT << locality_off;

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
index df599d5..49a4d3f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_nffw.h
@@ -61,10 +31,12 @@
 
 /* Implemented in nfp_rtsym.c */
 
-#define NFP_RTSYM_TYPE_NONE		0
-#define NFP_RTSYM_TYPE_OBJECT		1
-#define NFP_RTSYM_TYPE_FUNCTION		2
-#define NFP_RTSYM_TYPE_ABS		3
+enum nfp_rtsym_type {
+	NFP_RTSYM_TYPE_NONE	= 0,
+	NFP_RTSYM_TYPE_OBJECT	= 1,
+	NFP_RTSYM_TYPE_FUNCTION	= 2,
+	NFP_RTSYM_TYPE_ABS	= 3,
+};
 
 #define NFP_RTSYM_TARGET_NONE		0
 #define NFP_RTSYM_TARGET_LMEM		-1
@@ -83,7 +55,7 @@
 	const char *name;
 	u64 addr;
 	u64 size;
-	int type;
+	enum nfp_rtsym_type type;
 	int target;
 	int domain;
 };
@@ -98,6 +70,32 @@
 const struct nfp_rtsym *
 nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name);
 
+u64 nfp_rtsym_size(const struct nfp_rtsym *rtsym);
+int __nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		     u8 action, u8 token, u64 off, void *buf, size_t len);
+int nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		   void *buf, size_t len);
+int __nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, u32 *value);
+int nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    u32 *value);
+int __nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, u64 *value);
+int nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    u64 *value);
+int __nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, void *buf, size_t len);
+int nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    void *buf, size_t len);
+int __nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		       u8 action, u8 token, u64 off, u32 value);
+int nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		     u32 value);
+int __nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		       u8 action, u8 token, u64 off, u64 value);
+int nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		     u64 value);
+
 u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
 		      int *error);
 int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name,

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 2abee0f..f18e787 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_nsp.c
@@ -37,11 +7,13 @@
  *         Jason McMullan <jason.mcmullan@netronome.com>
  */
 
+#include <asm/unaligned.h>
 #include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/overflow.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
@@ -66,6 +38,7 @@
 #define NSP_COMMAND		0x08
 #define   NSP_COMMAND_OPTION	GENMASK_ULL(63, 32)
 #define   NSP_COMMAND_CODE	GENMASK_ULL(31, 16)
+#define   NSP_COMMAND_DMA_BUF	BIT_ULL(1)
 #define   NSP_COMMAND_START	BIT_ULL(0)
 
 /* CPP address to retrieve the data from */
@@ -78,8 +51,12 @@
 #define   NSP_DFLT_BUFFER_ADDRESS	GENMASK_ULL(39, 0)
 
 #define NSP_DFLT_BUFFER_CONFIG	0x20
+#define   NSP_DFLT_BUFFER_DMA_CHUNK_ORDER	GENMASK_ULL(63, 58)
+#define   NSP_DFLT_BUFFER_SIZE_4KB	GENMASK_ULL(15, 8)
 #define   NSP_DFLT_BUFFER_SIZE_MB	GENMASK_ULL(7, 0)
 
+#define NFP_CAP_CMD_DMA_SG	0x28
+
 #define NSP_MAGIC		0xab10
 #define NSP_MAJOR		0
 #define NSP_MINOR		8
@@ -87,6 +64,23 @@
 #define NSP_CODE_MAJOR		GENMASK(15, 12)
 #define NSP_CODE_MINOR		GENMASK(11, 0)
 
+#define NFP_FW_LOAD_RET_MAJOR	GENMASK(15, 8)
+#define NFP_FW_LOAD_RET_MINOR	GENMASK(23, 16)
+
+#define NFP_HWINFO_LOOKUP_SIZE	GENMASK(11, 0)
+
+#define NFP_VERSIONS_SIZE	GENMASK(11, 0)
+#define NFP_VERSIONS_CNT_OFF	0
+#define NFP_VERSIONS_BSP_OFF	2
+#define NFP_VERSIONS_CPLD_OFF	6
+#define NFP_VERSIONS_APP_OFF	10
+#define NFP_VERSIONS_BUNDLE_OFF	14
+#define NFP_VERSIONS_UNDI_OFF	18
+#define NFP_VERSIONS_NCSI_OFF	22
+#define NFP_VERSIONS_CFGR_OFF	26
+
+#define NSP_SFF_EEPROM_BLOCK_LEN	8
+
 enum nfp_nsp_cmd {
 	SPCODE_NOOP		= 0, /* No operation */
 	SPCODE_SOFT_RESET	= 1, /* Soft reset the NFP */
@@ -100,6 +94,22 @@
 	SPCODE_NSP_WRITE_FLASH	= 11, /* Load and flash image from buffer */
 	SPCODE_NSP_SENSORS	= 12, /* Read NSP sensor(s) */
 	SPCODE_NSP_IDENTIFY	= 13, /* Read NSP version */
+	SPCODE_FW_STORED	= 16, /* If no FW loaded, load flash app FW */
+	SPCODE_HWINFO_LOOKUP	= 17, /* Lookup HWinfo with overwrites etc. */
+	SPCODE_HWINFO_SET	= 18, /* Set HWinfo entry */
+	SPCODE_FW_LOADED	= 19, /* Is application firmware loaded */
+	SPCODE_VERSIONS		= 21, /* Report FW versions */
+	SPCODE_READ_SFF_EEPROM	= 22, /* Read module EEPROM */
+};
+
+struct nfp_nsp_dma_buf {
+	__le32 chunk_cnt;
+	__le32 reserved[3];
+	struct {
+		__le32 size;
+		__le32 reserved;
+		__le64 addr;
+	} descs[];
 };
 
 static const struct {
@@ -127,6 +137,43 @@
 	void *entries;
 };
 
+/**
+ * struct nfp_nsp_command_arg - NFP command argument structure
+ * @code:	NFP SP Command Code
+ * @dma:	@buf points to a host buffer, not NSP buffer
+ * @timeout_sec:Timeout value to wait for completion in seconds
+ * @option:	NFP SP Command Argument
+ * @buf:	NFP SP Buffer Address
+ * @error_cb:	Callback for interpreting option if error occurred
+ * @error_quiet:Don't print command error/warning. Protocol errors are still
+ *		    logged.
+ */
+struct nfp_nsp_command_arg {
+	u16 code;
+	bool dma;
+	unsigned int timeout_sec;
+	u32 option;
+	u64 buf;
+	void (*error_cb)(struct nfp_nsp *state, u32 ret_val);
+	bool error_quiet;
+};
+
+/**
+ * struct nfp_nsp_command_buf_arg - NFP command with buffer argument structure
+ * @arg:	NFP command argument structure
+ * @in_buf:	Buffer with data for input
+ * @in_size:	Size of @in_buf
+ * @out_buf:	Buffer for output data
+ * @out_size:	Size of @out_buf
+ */
+struct nfp_nsp_command_buf_arg {
+	struct nfp_nsp_command_arg arg;
+	const void *in_buf;
+	unsigned int in_size;
+	void *out_buf;
+	unsigned int out_size;
+};
+
 struct nfp_cpp *nfp_nsp_cpp(struct nfp_nsp *state)
 {
 	return state->cpp;
@@ -199,11 +246,16 @@
 	state->ver.major = FIELD_GET(NSP_STATUS_MAJOR, reg);
 	state->ver.minor = FIELD_GET(NSP_STATUS_MINOR, reg);
 
-	if (state->ver.major != NSP_MAJOR || state->ver.minor < NSP_MINOR) {
+	if (state->ver.major != NSP_MAJOR) {
 		nfp_err(cpp, "Unsupported ABI %hu.%hu\n",
 			state->ver.major, state->ver.minor);
 		return -EINVAL;
 	}
+	if (state->ver.minor < NSP_MINOR) {
+		nfp_err(cpp, "ABI too old to support NIC operation (%u.%hu < %u.%u), please update the management FW on the flash\n",
+			NSP_MAJOR, state->ver.minor, NSP_MAJOR, NSP_MINOR);
+		return -EINVAL;
+	}
 
 	if (reg & NSP_STATUS_BUSY) {
 		nfp_err(cpp, "Service processor busy!\n");
@@ -291,11 +343,7 @@
 /**
  * __nfp_nsp_command() - Execute a command on the NFP Service Processor
  * @state:	NFP SP state
- * @code:	NFP SP Command Code
- * @option:	NFP SP Command Argument
- * @buff_cpp:	NFP SP Buffer CPP Address info
- * @buff_addr:	NFP SP Buffer Host address
- * @timeout_sec:Timeout value to wait for completion in seconds
+ * @arg:	NFP command argument structure
  *
  * Return: 0 for success with no result
  *
@@ -308,8 +356,7 @@
  *	-ETIMEDOUT if the NSP took longer than @timeout_sec seconds to complete
  */
 static int
-__nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option, u32 buff_cpp,
-		  u64 buff_addr, u32 timeout_sec)
+__nfp_nsp_command(struct nfp_nsp *state, const struct nfp_nsp_command_arg *arg)
 {
 	u64 reg, ret_val, nsp_base, nsp_buffer, nsp_status, nsp_command;
 	struct nfp_cpp *cpp = state->cpp;
@@ -326,22 +373,14 @@
 	if (err)
 		return err;
 
-	if (!FIELD_FIT(NSP_BUFFER_CPP, buff_cpp >> 8) ||
-	    !FIELD_FIT(NSP_BUFFER_ADDRESS, buff_addr)) {
-		nfp_err(cpp, "Host buffer out of reach %08x %016llx\n",
-			buff_cpp, buff_addr);
-		return -EINVAL;
-	}
-
-	err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_buffer,
-			     FIELD_PREP(NSP_BUFFER_CPP, buff_cpp >> 8) |
-			     FIELD_PREP(NSP_BUFFER_ADDRESS, buff_addr));
+	err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_buffer, arg->buf);
 	if (err < 0)
 		return err;
 
 	err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command,
-			     FIELD_PREP(NSP_COMMAND_OPTION, option) |
-			     FIELD_PREP(NSP_COMMAND_CODE, code) |
+			     FIELD_PREP(NSP_COMMAND_OPTION, arg->option) |
+			     FIELD_PREP(NSP_COMMAND_CODE, arg->code) |
+			     FIELD_PREP(NSP_COMMAND_DMA_BUF, arg->dma) |
 			     FIELD_PREP(NSP_COMMAND_START, 1));
 	if (err < 0)
 		return err;
@@ -351,16 +390,16 @@
 			       NSP_COMMAND_START, 0, NFP_NSP_TIMEOUT_DEFAULT);
 	if (err) {
 		nfp_err(cpp, "Error %d waiting for code 0x%04x to start\n",
-			err, code);
+			err, arg->code);
 		return err;
 	}
 
 	/* Wait for NSP_STATUS_BUSY to go to 0 */
 	err = nfp_nsp_wait_reg(cpp, &reg, nsp_cpp, nsp_status, NSP_STATUS_BUSY,
-			       0, timeout_sec);
+			       0, arg->timeout_sec ?: NFP_NSP_TIMEOUT_DEFAULT);
 	if (err) {
 		nfp_err(cpp, "Error %d waiting for code 0x%04x to complete\n",
-			err, code);
+			err, arg->code);
 		return err;
 	}
 
@@ -371,55 +410,38 @@
 
 	err = FIELD_GET(NSP_STATUS_RESULT, reg);
 	if (err) {
-		nfp_warn(cpp, "Result (error) code set: %d (%d) command: %d\n",
-			 -err, (int)ret_val, code);
-		nfp_nsp_print_extended_error(state, ret_val);
+		if (!arg->error_quiet)
+			nfp_warn(cpp, "Result (error) code set: %d (%d) command: %d\n",
+				 -err, (int)ret_val, arg->code);
+
+		if (arg->error_cb)
+			arg->error_cb(state, ret_val);
+		else
+			nfp_nsp_print_extended_error(state, ret_val);
 		return -err;
 	}
 
 	return ret_val;
 }
 
-static int
-nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option, u32 buff_cpp,
-		u64 buff_addr)
+static int nfp_nsp_command(struct nfp_nsp *state, u16 code)
 {
-	return __nfp_nsp_command(state, code, option, buff_cpp, buff_addr,
-				 NFP_NSP_TIMEOUT_DEFAULT);
+	const struct nfp_nsp_command_arg arg = {
+		.code		= code,
+	};
+
+	return __nfp_nsp_command(state, &arg);
 }
 
 static int
-__nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
-		      const void *in_buf, unsigned int in_size, void *out_buf,
-		      unsigned int out_size, u32 timeout_sec)
+nfp_nsp_command_buf_def(struct nfp_nsp *nsp,
+			struct nfp_nsp_command_buf_arg *arg)
 {
 	struct nfp_cpp *cpp = nsp->cpp;
-	unsigned int max_size;
 	u64 reg, cpp_buf;
-	int ret, err;
+	int err, ret;
 	u32 cpp_id;
 
-	if (nsp->ver.minor < 13) {
-		nfp_err(cpp, "NSP: Code 0x%04x with buffer not supported (ABI %hu.%hu)\n",
-			code, nsp->ver.major, nsp->ver.minor);
-		return -EOPNOTSUPP;
-	}
-
-	err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res),
-			    nfp_resource_address(nsp->res) +
-			    NSP_DFLT_BUFFER_CONFIG,
-			    &reg);
-	if (err < 0)
-		return err;
-
-	max_size = max(in_size, out_size);
-	if (FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M < max_size) {
-		nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%llu < %u)\n",
-			code, FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M,
-			max_size);
-		return -EINVAL;
-	}
-
 	err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res),
 			    nfp_resource_address(nsp->res) +
 			    NSP_DFLT_BUFFER,
@@ -430,27 +452,36 @@
 	cpp_id = FIELD_GET(NSP_DFLT_BUFFER_CPP, reg) << 8;
 	cpp_buf = FIELD_GET(NSP_DFLT_BUFFER_ADDRESS, reg);
 
-	if (in_buf && in_size) {
-		err = nfp_cpp_write(cpp, cpp_id, cpp_buf, in_buf, in_size);
+	if (arg->in_buf && arg->in_size) {
+		err = nfp_cpp_write(cpp, cpp_id, cpp_buf,
+				    arg->in_buf, arg->in_size);
 		if (err < 0)
 			return err;
 	}
 	/* Zero out remaining part of the buffer */
-	if (out_buf && out_size && out_size > in_size) {
-		memset(out_buf, 0, out_size - in_size);
-		err = nfp_cpp_write(cpp, cpp_id, cpp_buf + in_size,
-				    out_buf, out_size - in_size);
+	if (arg->out_buf && arg->out_size && arg->out_size > arg->in_size) {
+		err = nfp_cpp_write(cpp, cpp_id, cpp_buf + arg->in_size,
+				    arg->out_buf, arg->out_size - arg->in_size);
 		if (err < 0)
 			return err;
 	}
 
-	ret = __nfp_nsp_command(nsp, code, option, cpp_id, cpp_buf,
-				timeout_sec);
+	if (!FIELD_FIT(NSP_BUFFER_CPP, cpp_id >> 8) ||
+	    !FIELD_FIT(NSP_BUFFER_ADDRESS, cpp_buf)) {
+		nfp_err(cpp, "Buffer out of reach %08x %016llx\n",
+			cpp_id, cpp_buf);
+		return -EINVAL;
+	}
+
+	arg->arg.buf = FIELD_PREP(NSP_BUFFER_CPP, cpp_id >> 8) |
+		       FIELD_PREP(NSP_BUFFER_ADDRESS, cpp_buf);
+	ret = __nfp_nsp_command(nsp, &arg->arg);
 	if (ret < 0)
 		return ret;
 
-	if (out_buf && out_size) {
-		err = nfp_cpp_read(cpp, cpp_id, cpp_buf, out_buf, out_size);
+	if (arg->out_buf && arg->out_size) {
+		err = nfp_cpp_read(cpp, cpp_id, cpp_buf,
+				   arg->out_buf, arg->out_size);
 		if (err < 0)
 			return err;
 	}
@@ -459,13 +490,207 @@
 }
 
 static int
-nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
-		    const void *in_buf, unsigned int in_size, void *out_buf,
-		    unsigned int out_size)
+nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
+			   struct nfp_nsp_command_buf_arg *arg,
+			   unsigned int max_size, unsigned int chunk_order,
+			   unsigned int dma_order)
 {
-	return __nfp_nsp_command_buf(nsp, code, option, in_buf, in_size,
-				     out_buf, out_size,
-				     NFP_NSP_TIMEOUT_DEFAULT);
+	struct nfp_cpp *cpp = nsp->cpp;
+	struct nfp_nsp_dma_buf *desc;
+	struct {
+		dma_addr_t dma_addr;
+		unsigned long len;
+		void *chunk;
+	} *chunks;
+	size_t chunk_size, dma_size;
+	dma_addr_t dma_desc;
+	struct device *dev;
+	unsigned long off;
+	int i, ret, nseg;
+	size_t desc_sz;
+
+	chunk_size = BIT_ULL(chunk_order);
+	dma_size = BIT_ULL(dma_order);
+	nseg = DIV_ROUND_UP(max_size, chunk_size);
+
+	chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+	if (!chunks)
+		return -ENOMEM;
+
+	off = 0;
+	ret = -ENOMEM;
+	for (i = 0; i < nseg; i++) {
+		unsigned long coff;
+
+		chunks[i].chunk = kmalloc(chunk_size,
+					  GFP_KERNEL | __GFP_NOWARN);
+		if (!chunks[i].chunk)
+			goto exit_free_prev;
+
+		chunks[i].len = min_t(u64, chunk_size, max_size - off);
+
+		coff = 0;
+		if (arg->in_size > off) {
+			coff = min_t(u64, arg->in_size - off, chunk_size);
+			memcpy(chunks[i].chunk, arg->in_buf + off, coff);
+		}
+		memset(chunks[i].chunk + coff, 0, chunk_size - coff);
+
+		off += chunks[i].len;
+	}
+
+	dev = nfp_cpp_device(cpp)->parent;
+
+	for (i = 0; i < nseg; i++) {
+		dma_addr_t addr;
+
+		addr = dma_map_single(dev, chunks[i].chunk, chunks[i].len,
+				      DMA_BIDIRECTIONAL);
+		chunks[i].dma_addr = addr;
+
+		ret = dma_mapping_error(dev, addr);
+		if (ret)
+			goto exit_unmap_prev;
+
+		if (WARN_ONCE(round_down(addr, dma_size) !=
+			      round_down(addr + chunks[i].len - 1, dma_size),
+			      "unaligned DMA address: %pad %lu %zd\n",
+			      &addr, chunks[i].len, dma_size)) {
+			ret = -EFAULT;
+			i++;
+			goto exit_unmap_prev;
+		}
+	}
+
+	desc_sz = struct_size(desc, descs, nseg);
+	desc = kmalloc(desc_sz, GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto exit_unmap_all;
+	}
+
+	desc->chunk_cnt = cpu_to_le32(nseg);
+	for (i = 0; i < nseg; i++) {
+		desc->descs[i].size = cpu_to_le32(chunks[i].len);
+		desc->descs[i].addr = cpu_to_le64(chunks[i].dma_addr);
+	}
+
+	dma_desc = dma_map_single(dev, desc, desc_sz, DMA_TO_DEVICE);
+	ret = dma_mapping_error(dev, dma_desc);
+	if (ret)
+		goto exit_free_desc;
+
+	arg->arg.dma = true;
+	arg->arg.buf = dma_desc;
+	ret = __nfp_nsp_command(nsp, &arg->arg);
+	if (ret < 0)
+		goto exit_unmap_desc;
+
+	i = 0;
+	off = 0;
+	while (off < arg->out_size) {
+		unsigned int len;
+
+		len = min_t(u64, chunks[i].len, arg->out_size - off);
+		memcpy(arg->out_buf + off, chunks[i].chunk, len);
+		off += len;
+		i++;
+	}
+
+exit_unmap_desc:
+	dma_unmap_single(dev, dma_desc, desc_sz, DMA_TO_DEVICE);
+exit_free_desc:
+	kfree(desc);
+exit_unmap_all:
+	i = nseg;
+exit_unmap_prev:
+	while (--i >= 0)
+		dma_unmap_single(dev, chunks[i].dma_addr, chunks[i].len,
+				 DMA_BIDIRECTIONAL);
+	i = nseg;
+exit_free_prev:
+	while (--i >= 0)
+		kfree(chunks[i].chunk);
+	kfree(chunks);
+	if (ret < 0)
+		nfp_err(cpp, "NSP: SG DMA failed for command 0x%04x: %d (sz:%d cord:%d)\n",
+			arg->arg.code, ret, max_size, chunk_order);
+	return ret;
+}
+
+static int
+nfp_nsp_command_buf_dma(struct nfp_nsp *nsp,
+			struct nfp_nsp_command_buf_arg *arg,
+			unsigned int max_size, unsigned int dma_order)
+{
+	unsigned int chunk_order, buf_order;
+	struct nfp_cpp *cpp = nsp->cpp;
+	bool sg_ok;
+	u64 reg;
+	int err;
+
+	buf_order = order_base_2(roundup_pow_of_two(max_size));
+
+	err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res),
+			    nfp_resource_address(nsp->res) + NFP_CAP_CMD_DMA_SG,
+			    &reg);
+	if (err < 0)
+		return err;
+	sg_ok = reg & BIT_ULL(arg->arg.code - 1);
+
+	if (!sg_ok) {
+		if (buf_order > dma_order) {
+			nfp_err(cpp, "NSP: can't service non-SG DMA for command 0x%04x\n",
+				arg->arg.code);
+			return -ENOMEM;
+		}
+		chunk_order = buf_order;
+	} else {
+		chunk_order = min_t(unsigned int, dma_order, PAGE_SHIFT);
+	}
+
+	return nfp_nsp_command_buf_dma_sg(nsp, arg, max_size, chunk_order,
+					  dma_order);
+}
+
+static int
+nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg)
+{
+	unsigned int dma_order, def_size, max_size;
+	struct nfp_cpp *cpp = nsp->cpp;
+	u64 reg;
+	int err;
+
+	if (nsp->ver.minor < 13) {
+		nfp_err(cpp, "NSP: Code 0x%04x with buffer not supported (ABI %hu.%hu)\n",
+			arg->arg.code, nsp->ver.major, nsp->ver.minor);
+		return -EOPNOTSUPP;
+	}
+
+	err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res),
+			    nfp_resource_address(nsp->res) +
+			    NSP_DFLT_BUFFER_CONFIG,
+			    &reg);
+	if (err < 0)
+		return err;
+
+	/* Zero out undefined part of the out buffer */
+	if (arg->out_buf && arg->out_size && arg->out_size > arg->in_size)
+		memset(arg->out_buf, 0, arg->out_size - arg->in_size);
+
+	max_size = max(arg->in_size, arg->out_size);
+	def_size = FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M +
+		   FIELD_GET(NSP_DFLT_BUFFER_SIZE_4KB, reg) * SZ_4K;
+	dma_order = FIELD_GET(NSP_DFLT_BUFFER_DMA_CHUNK_ORDER, reg);
+	if (def_size >= max_size) {
+		return nfp_nsp_command_buf_def(nsp, arg);
+	} else if (!dma_order) {
+		nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n",
+			arg->arg.code, def_size, max_size);
+		return -EINVAL;
+	}
+
+	return nfp_nsp_command_buf_dma(nsp, arg, max_size, dma_order);
 }
 
 int nfp_nsp_wait(struct nfp_nsp *state)
@@ -479,7 +704,7 @@
 	for (;;) {
 		const unsigned long start_time = jiffies;
 
-		err = nfp_nsp_command(state, SPCODE_NOOP, 0, 0, 0);
+		err = nfp_nsp_command(state, SPCODE_NOOP);
 		if (err != -EAGAIN)
 			break;
 
@@ -501,53 +726,378 @@
 
 int nfp_nsp_device_soft_reset(struct nfp_nsp *state)
 {
-	return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0);
+	return nfp_nsp_command(state, SPCODE_SOFT_RESET);
 }
 
 int nfp_nsp_mac_reinit(struct nfp_nsp *state)
 {
-	return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0);
+	return nfp_nsp_command(state, SPCODE_MAC_INIT);
+}
+
+static void nfp_nsp_load_fw_extended_msg(struct nfp_nsp *state, u32 ret_val)
+{
+	static const char * const major_msg[] = {
+		/* 0 */ "Firmware from driver loaded",
+		/* 1 */ "Firmware from flash loaded",
+		/* 2 */ "Firmware loading failure",
+	};
+	static const char * const minor_msg[] = {
+		/*  0 */ "",
+		/*  1 */ "no named partition on flash",
+		/*  2 */ "error reading from flash",
+		/*  3 */ "can not deflate",
+		/*  4 */ "not a trusted file",
+		/*  5 */ "can not parse FW file",
+		/*  6 */ "MIP not found in FW file",
+		/*  7 */ "null firmware name in MIP",
+		/*  8 */ "FW version none",
+		/*  9 */ "FW build number none",
+		/* 10 */ "no FW selection policy HWInfo key found",
+		/* 11 */ "static FW selection policy",
+		/* 12 */ "FW version has precedence",
+		/* 13 */ "different FW application load requested",
+		/* 14 */ "development build",
+	};
+	unsigned int major, minor;
+	const char *level;
+
+	major = FIELD_GET(NFP_FW_LOAD_RET_MAJOR, ret_val);
+	minor = FIELD_GET(NFP_FW_LOAD_RET_MINOR, ret_val);
+
+	if (!nfp_nsp_has_stored_fw_load(state))
+		return;
+
+	/* Lower the message level in legacy case */
+	if (major == 0 && (minor == 0 || minor == 10))
+		level = KERN_DEBUG;
+	else if (major == 2)
+		level = KERN_ERR;
+	else
+		level = KERN_INFO;
+
+	if (major >= ARRAY_SIZE(major_msg))
+		nfp_printk(level, state->cpp, "FW loading status: %x\n",
+			   ret_val);
+	else if (minor >= ARRAY_SIZE(minor_msg))
+		nfp_printk(level, state->cpp, "%s, reason code: %d\n",
+			   major_msg[major], minor);
+	else
+		nfp_printk(level, state->cpp, "%s%c %s\n",
+			   major_msg[major], minor ? ',' : '.',
+			   minor_msg[minor]);
 }
 
 int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw)
 {
-	return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, fw->size, fw->data,
-				   fw->size, NULL, 0);
+	struct nfp_nsp_command_buf_arg load_fw = {
+		{
+			.code		= SPCODE_FW_LOAD,
+			.option		= fw->size,
+			.error_cb	= nfp_nsp_load_fw_extended_msg,
+		},
+		.in_buf		= fw->data,
+		.in_size	= fw->size,
+	};
+	int ret;
+
+	ret = nfp_nsp_command_buf(state, &load_fw);
+	if (ret < 0)
+		return ret;
+
+	nfp_nsp_load_fw_extended_msg(state, ret);
+	return 0;
 }
 
 int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw)
 {
-	/* The flash time is specified to take a maximum of 70s so we add an
-	 * additional factor to this spec time.
-	 */
-	u32 timeout_sec = 2.5 * 70;
+	struct nfp_nsp_command_buf_arg write_flash = {
+		{
+			.code		= SPCODE_NSP_WRITE_FLASH,
+			.option		= fw->size,
+			.timeout_sec	= 900,
+		},
+		.in_buf		= fw->data,
+		.in_size	= fw->size,
+	};
 
-	return __nfp_nsp_command_buf(state, SPCODE_NSP_WRITE_FLASH, fw->size,
-				     fw->data, fw->size, NULL, 0, timeout_sec);
+	return nfp_nsp_command_buf(state, &write_flash);
 }
 
 int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size)
 {
-	return nfp_nsp_command_buf(state, SPCODE_ETH_RESCAN, size, NULL, 0,
-				   buf, size);
+	struct nfp_nsp_command_buf_arg eth_rescan = {
+		{
+			.code		= SPCODE_ETH_RESCAN,
+			.option		= size,
+		},
+		.out_buf	= buf,
+		.out_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &eth_rescan);
 }
 
 int nfp_nsp_write_eth_table(struct nfp_nsp *state,
 			    const void *buf, unsigned int size)
 {
-	return nfp_nsp_command_buf(state, SPCODE_ETH_CONTROL, size, buf, size,
-				   NULL, 0);
+	struct nfp_nsp_command_buf_arg eth_ctrl = {
+		{
+			.code		= SPCODE_ETH_CONTROL,
+			.option		= size,
+		},
+		.in_buf		= buf,
+		.in_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &eth_ctrl);
 }
 
 int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size)
 {
-	return nfp_nsp_command_buf(state, SPCODE_NSP_IDENTIFY, size, NULL, 0,
-				   buf, size);
+	struct nfp_nsp_command_buf_arg identify = {
+		{
+			.code		= SPCODE_NSP_IDENTIFY,
+			.option		= size,
+		},
+		.out_buf	= buf,
+		.out_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &identify);
 }
 
 int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask,
 			 void *buf, unsigned int size)
 {
-	return nfp_nsp_command_buf(state, SPCODE_NSP_SENSORS, sensor_mask,
-				   NULL, 0, buf, size);
+	struct nfp_nsp_command_buf_arg sensors = {
+		{
+			.code		= SPCODE_NSP_SENSORS,
+			.option		= sensor_mask,
+		},
+		.out_buf	= buf,
+		.out_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &sensors);
+}
+
+int nfp_nsp_load_stored_fw(struct nfp_nsp *state)
+{
+	const struct nfp_nsp_command_arg arg = {
+		.code		= SPCODE_FW_STORED,
+		.error_cb	= nfp_nsp_load_fw_extended_msg,
+	};
+	int ret;
+
+	ret = __nfp_nsp_command(state, &arg);
+	if (ret < 0)
+		return ret;
+
+	nfp_nsp_load_fw_extended_msg(state, ret);
+	return 0;
+}
+
+static int
+__nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size,
+			bool optional)
+{
+	struct nfp_nsp_command_buf_arg hwinfo_lookup = {
+		{
+			.code		= SPCODE_HWINFO_LOOKUP,
+			.option		= size,
+			.error_quiet	= optional,
+		},
+		.in_buf		= buf,
+		.in_size	= size,
+		.out_buf	= buf,
+		.out_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &hwinfo_lookup);
+}
+
+int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+	int err;
+
+	size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE);
+
+	err = __nfp_nsp_hwinfo_lookup(state, buf, size, false);
+	if (err)
+		return err;
+
+	if (strnlen(buf, size) == size) {
+		nfp_err(state->cpp, "NSP HWinfo value not NULL-terminated\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
+				   unsigned int size, const char *default_val)
+{
+	int err;
+
+	/* Ensure that the default value is usable irrespective of whether
+	 * it is actually going to be used.
+	 */
+	if (strnlen(default_val, size) == size)
+		return -EINVAL;
+
+	if (!nfp_nsp_has_hwinfo_lookup(state)) {
+		strcpy(buf, default_val);
+		return 0;
+	}
+
+	size = min_t(u32, size, NFP_HWINFO_LOOKUP_SIZE);
+
+	err = __nfp_nsp_hwinfo_lookup(state, buf, size, true);
+	if (err) {
+		if (err == -ENOENT) {
+			strcpy(buf, default_val);
+			return 0;
+		}
+
+		nfp_err(state->cpp, "NSP HWinfo lookup failed: %d\n", err);
+		return err;
+	}
+
+	if (strnlen(buf, size) == size) {
+		nfp_err(state->cpp, "NSP HWinfo value not NULL-terminated\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+	struct nfp_nsp_command_buf_arg hwinfo_set = {
+		{
+			.code		= SPCODE_HWINFO_SET,
+			.option		= size,
+		},
+		.in_buf		= buf,
+		.in_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &hwinfo_set);
+}
+
+int nfp_nsp_fw_loaded(struct nfp_nsp *state)
+{
+	const struct nfp_nsp_command_arg arg = {
+		.code		= SPCODE_FW_LOADED,
+	};
+
+	return __nfp_nsp_command(state, &arg);
+}
+
+int nfp_nsp_versions(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+	struct nfp_nsp_command_buf_arg versions = {
+		{
+			.code		= SPCODE_VERSIONS,
+			.option		= min_t(u32, size, NFP_VERSIONS_SIZE),
+		},
+		.out_buf	= buf,
+		.out_size	= min_t(u32, size, NFP_VERSIONS_SIZE),
+	};
+
+	return nfp_nsp_command_buf(state, &versions);
+}
+
+const char *nfp_nsp_versions_get(enum nfp_nsp_versions id, bool flash,
+				 const u8 *buf, unsigned int size)
+{
+	static const u32 id2off[] = {
+		[NFP_VERSIONS_BSP] =	NFP_VERSIONS_BSP_OFF,
+		[NFP_VERSIONS_CPLD] =	NFP_VERSIONS_CPLD_OFF,
+		[NFP_VERSIONS_APP] =	NFP_VERSIONS_APP_OFF,
+		[NFP_VERSIONS_BUNDLE] =	NFP_VERSIONS_BUNDLE_OFF,
+		[NFP_VERSIONS_UNDI] =	NFP_VERSIONS_UNDI_OFF,
+		[NFP_VERSIONS_NCSI] =	NFP_VERSIONS_NCSI_OFF,
+		[NFP_VERSIONS_CFGR] =	NFP_VERSIONS_CFGR_OFF,
+	};
+	unsigned int field, buf_field_cnt, buf_off;
+
+	if (id >= ARRAY_SIZE(id2off) || !id2off[id])
+		return ERR_PTR(-EINVAL);
+
+	field = id * 2 + flash;
+
+	buf_field_cnt = get_unaligned_le16(buf);
+	if (buf_field_cnt <= field)
+		return ERR_PTR(-ENOENT);
+
+	buf_off = get_unaligned_le16(buf + id2off[id] + flash * 2);
+	if (!buf_off)
+		return ERR_PTR(-ENOENT);
+
+	if (buf_off >= size)
+		return ERR_PTR(-EINVAL);
+	if (strnlen(&buf[buf_off], size - buf_off) == size - buf_off)
+		return ERR_PTR(-EINVAL);
+
+	return (const char *)&buf[buf_off];
+}
+
+static int
+__nfp_nsp_module_eeprom(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+	struct nfp_nsp_command_buf_arg module_eeprom = {
+		{
+			.code		= SPCODE_READ_SFF_EEPROM,
+			.option		= size,
+		},
+		.in_buf		= buf,
+		.in_size	= size,
+		.out_buf	= buf,
+		.out_size	= size,
+	};
+
+	return nfp_nsp_command_buf(state, &module_eeprom);
+}
+
+int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index,
+			       unsigned int offset, void *data,
+			       unsigned int len, unsigned int *read_len)
+{
+	struct eeprom_buf {
+		u8 metalen;
+		__le16 length;
+		__le16 offset;
+		__le16 readlen;
+		u8 eth_index;
+		u8 data[0];
+	} __packed *buf;
+	int bufsz, ret;
+
+	BUILD_BUG_ON(offsetof(struct eeprom_buf, data) % 8);
+
+	/* Buffer must be large enough and rounded to the next block size. */
+	bufsz = struct_size(buf, data, round_up(len, NSP_SFF_EEPROM_BLOCK_LEN));
+	buf = kzalloc(bufsz, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->metalen =
+		offsetof(struct eeprom_buf, data) / NSP_SFF_EEPROM_BLOCK_LEN;
+	buf->length = cpu_to_le16(len);
+	buf->offset = cpu_to_le16(offset);
+	buf->eth_index = eth_index;
+
+	ret = __nfp_nsp_module_eeprom(state, buf, bufsz);
+
+	*read_len = min_t(unsigned int, len, le16_to_cpu(buf->readlen));
+	if (*read_len)
+		memcpy(data, buf->data, *read_len);
+
+	if (!ret && *read_len < len)
+		ret = -EIO;
+
+	kfree(buf);
+
+	return ret;
 }

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index f23d9e0..1531c18 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 #ifndef NSP_NSP_H
 #define NSP_NSP_H 1
@@ -50,18 +20,58 @@
 int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw);
 int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw);
 int nfp_nsp_mac_reinit(struct nfp_nsp *state);
+int nfp_nsp_load_stored_fw(struct nfp_nsp *state);
+int nfp_nsp_hwinfo_lookup(struct nfp_nsp *state, void *buf, unsigned int size);
+int nfp_nsp_hwinfo_lookup_optional(struct nfp_nsp *state, void *buf,
+				   unsigned int size, const char *default_val);
+int nfp_nsp_hwinfo_set(struct nfp_nsp *state, void *buf, unsigned int size);
+int nfp_nsp_fw_loaded(struct nfp_nsp *state);
+int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index,
+			       unsigned int offset, void *data,
+			       unsigned int len, unsigned int *read_len);
 
 static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state)
 {
 	return nfp_nsp_get_abi_ver_minor(state) > 20;
 }
 
+static inline bool nfp_nsp_has_stored_fw_load(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 23;
+}
+
+static inline bool nfp_nsp_has_hwinfo_lookup(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 24;
+}
+
+static inline bool nfp_nsp_has_hwinfo_set(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 25;
+}
+
+static inline bool nfp_nsp_has_fw_loaded(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 25;
+}
+
+static inline bool nfp_nsp_has_versions(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 27;
+}
+
+static inline bool nfp_nsp_has_read_module_eeprom(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 28;
+}
+
 enum nfp_eth_interface {
 	NFP_INTERFACE_NONE	= 0,
 	NFP_INTERFACE_SFP	= 1,
 	NFP_INTERFACE_SFPP	= 10,
 	NFP_INTERFACE_SFP28	= 28,
 	NFP_INTERFACE_QSFP	= 40,
+	NFP_INTERFACE_RJ45	= 45,
 	NFP_INTERFACE_CXP	= 100,
 	NFP_INTERFACE_QSFP28	= 112,
 };
@@ -92,6 +102,21 @@
 #define NFP_FEC_REED_SOLOMON	BIT(NFP_FEC_REED_SOLOMON_BIT)
 #define NFP_FEC_DISABLED	BIT(NFP_FEC_DISABLED_BIT)
 
+/* Defines the valid values of the 'abi_drv_reset' hwinfo key */
+#define NFP_NSP_DRV_RESET_DISK			0
+#define NFP_NSP_DRV_RESET_ALWAYS		1
+#define NFP_NSP_DRV_RESET_NEVER			2
+#define NFP_NSP_DRV_RESET_DEFAULT		"0"
+
+/* Defines the valid values of the 'app_fw_from_flash' hwinfo key */
+#define NFP_NSP_APP_FW_LOAD_DISK		0
+#define NFP_NSP_APP_FW_LOAD_FLASH		1
+#define NFP_NSP_APP_FW_LOAD_PREF		2
+#define NFP_NSP_APP_FW_LOAD_DEFAULT		"2"
+
+/* Define the default value for the 'abi_drv_load_ifc' key */
+#define NFP_NSP_DRV_LOAD_IFC_DEFAULT		"0x10ff"
+
 /**
  * struct nfp_eth_table - ETH table information
  * @count:	number of table entries
@@ -226,4 +251,19 @@
 int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id,
 			  long *val);
 
+#define NFP_NSP_VERSION_BUFSZ	1024 /* reasonable size, not in the ABI */
+
+enum nfp_nsp_versions {
+	NFP_VERSIONS_BSP,
+	NFP_VERSIONS_CPLD,
+	NFP_VERSIONS_APP,
+	NFP_VERSIONS_BUNDLE,
+	NFP_VERSIONS_UNDI,
+	NFP_VERSIONS_NCSI,
+	NFP_VERSIONS_CFGR,
+};
+
+int nfp_nsp_versions(struct nfp_nsp *state, void *buf, unsigned int size);
+const char *nfp_nsp_versions_get(enum nfp_nsp_versions id, bool flash,
+				 const u8 *buf, unsigned int size);
 #endif

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c
index 5d362f8..0997d12 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017 Netronome Systems, Inc. */
 
 #include <linux/kernel.h>
 #include <linux/slab.h>

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
index 7ca5896..311a5be 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2017 Netronome Systems, Inc. */
 
 /* Authors: David Brunecz <david.brunecz@netronome.com>
  *          Jakub Kicinski <jakub.kicinski@netronome.com>
@@ -236,6 +206,9 @@
 	if (entry->interface == NFP_INTERFACE_NONE) {
 		entry->port_type = PORT_NONE;
 		return;
+	} else if (entry->interface == NFP_INTERFACE_RJ45) {
+		entry->port_type = PORT_TP;
+		return;
 	}
 
 	if (entry->media == NFP_MEDIA_FIBRE)
@@ -299,8 +272,7 @@
 		goto err;
 	}
 
-	table = kzalloc(sizeof(*table) +
-			sizeof(struct nfp_eth_table_port) * cnt, GFP_KERNEL);
+	table = kzalloc(struct_size(table, ports, cnt), GFP_KERNEL);
 	if (!table)
 		goto err;
 

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
index d32af59..ce7492a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_resource.c

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
index 9e34216..75f0124 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_rtsym.c
@@ -39,6 +9,8 @@
  *          Espen Skoglund <espen.skoglund@netronome.com>
  *          Francois H. Theron <francois.theron@netronome.com>
  */
+
+#include <asm/unaligned.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -233,6 +205,229 @@
 	return NULL;
 }
 
+u64 nfp_rtsym_size(const struct nfp_rtsym *sym)
+{
+	switch (sym->type) {
+	case NFP_RTSYM_TYPE_NONE:
+		pr_err("rtsym '%s': type NONE\n", sym->name);
+		return 0;
+	default:
+		pr_warn("rtsym '%s': unknown type: %d\n", sym->name, sym->type);
+		/* fall through */
+	case NFP_RTSYM_TYPE_OBJECT:
+	case NFP_RTSYM_TYPE_FUNCTION:
+		return sym->size;
+	case NFP_RTSYM_TYPE_ABS:
+		return sizeof(u64);
+	}
+}
+
+static int
+nfp_rtsym_to_dest(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		  u8 action, u8 token, u64 off, u32 *cpp_id, u64 *addr)
+{
+	if (sym->type != NFP_RTSYM_TYPE_OBJECT) {
+		nfp_err(cpp, "rtsym '%s': direct access to non-object rtsym\n",
+			sym->name);
+		return -EINVAL;
+	}
+
+	*addr = sym->addr + off;
+
+	if (sym->target == NFP_RTSYM_TARGET_EMU_CACHE) {
+		int locality_off = nfp_cpp_mu_locality_lsb(cpp);
+
+		*addr &= ~(NFP_MU_ADDR_ACCESS_TYPE_MASK << locality_off);
+		*addr |= NFP_MU_ADDR_ACCESS_TYPE_DIRECT << locality_off;
+
+		*cpp_id = NFP_CPP_ISLAND_ID(NFP_CPP_TARGET_MU, action, token,
+					    sym->domain);
+	} else if (sym->target < 0) {
+		nfp_err(cpp, "rtsym '%s': unhandled target encoding: %d\n",
+			sym->name, sym->target);
+		return -EINVAL;
+	} else {
+		*cpp_id = NFP_CPP_ISLAND_ID(sym->target, action, token,
+					    sym->domain);
+	}
+
+	return 0;
+}
+
+int __nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		     u8 action, u8 token, u64 off, void *buf, size_t len)
+{
+	u64 sym_size = nfp_rtsym_size(sym);
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off > sym_size) {
+		nfp_err(cpp, "rtsym '%s': read out of bounds: off: %lld + len: %zd > size: %lld\n",
+			sym->name, off, len, sym_size);
+		return -ENXIO;
+	}
+	len = min_t(size_t, len, sym_size - off);
+
+	if (sym->type == NFP_RTSYM_TYPE_ABS) {
+		u8 tmp[8];
+
+		put_unaligned_le64(sym->addr, tmp);
+		memcpy(buf, &tmp[off], len);
+
+		return len;
+	}
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_read(cpp, cpp_id, addr, buf, len);
+}
+
+int nfp_rtsym_read(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		   void *buf, size_t len)
+{
+	return __nfp_rtsym_read(cpp, sym, NFP_CPP_ACTION_RW, 0, off, buf, len);
+}
+
+int __nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, u32 *value)
+{
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off + 4 > nfp_rtsym_size(sym)) {
+		nfp_err(cpp, "rtsym '%s': readl out of bounds: off: %lld + 4 > size: %lld\n",
+			sym->name, off, nfp_rtsym_size(sym));
+		return -ENXIO;
+	}
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_readl(cpp, cpp_id, addr, value);
+}
+
+int nfp_rtsym_readl(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    u32 *value)
+{
+	return __nfp_rtsym_readl(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value);
+}
+
+int __nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, u64 *value)
+{
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off + 8 > nfp_rtsym_size(sym)) {
+		nfp_err(cpp, "rtsym '%s': readq out of bounds: off: %lld + 8 > size: %lld\n",
+			sym->name, off, nfp_rtsym_size(sym));
+		return -ENXIO;
+	}
+
+	if (sym->type == NFP_RTSYM_TYPE_ABS) {
+		*value = sym->addr;
+		return 0;
+	}
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_readq(cpp, cpp_id, addr, value);
+}
+
+int nfp_rtsym_readq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    u64 *value)
+{
+	return __nfp_rtsym_readq(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value);
+}
+
+int __nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		      u8 action, u8 token, u64 off, void *buf, size_t len)
+{
+	u64 sym_size = nfp_rtsym_size(sym);
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off > sym_size) {
+		nfp_err(cpp, "rtsym '%s': write out of bounds: off: %lld + len: %zd > size: %lld\n",
+			sym->name, off, len, sym_size);
+		return -ENXIO;
+	}
+	len = min_t(size_t, len, sym_size - off);
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_write(cpp, cpp_id, addr, buf, len);
+}
+
+int nfp_rtsym_write(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		    void *buf, size_t len)
+{
+	return __nfp_rtsym_write(cpp, sym, NFP_CPP_ACTION_RW, 0, off, buf, len);
+}
+
+int __nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		       u8 action, u8 token, u64 off, u32 value)
+{
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off + 4 > nfp_rtsym_size(sym)) {
+		nfp_err(cpp, "rtsym '%s': writel out of bounds: off: %lld + 4 > size: %lld\n",
+			sym->name, off, nfp_rtsym_size(sym));
+		return -ENXIO;
+	}
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_writel(cpp, cpp_id, addr, value);
+}
+
+int nfp_rtsym_writel(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		     u32 value)
+{
+	return __nfp_rtsym_writel(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value);
+}
+
+int __nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym,
+		       u8 action, u8 token, u64 off, u64 value)
+{
+	u32 cpp_id;
+	u64 addr;
+	int err;
+
+	if (off + 8 > nfp_rtsym_size(sym)) {
+		nfp_err(cpp, "rtsym '%s': writeq out of bounds: off: %lld + 8 > size: %lld\n",
+			sym->name, off, nfp_rtsym_size(sym));
+		return -ENXIO;
+	}
+
+	err = nfp_rtsym_to_dest(cpp, sym, action, token, off, &cpp_id, &addr);
+	if (err)
+		return err;
+
+	return nfp_cpp_writeq(cpp, cpp_id, addr, value);
+}
+
+int nfp_rtsym_writeq(struct nfp_cpp *cpp, const struct nfp_rtsym *sym, u64 off,
+		     u64 value)
+{
+	return __nfp_rtsym_writeq(cpp, sym, NFP_CPP_ACTION_RW, 0, off, value);
+}
+
 /**
  * nfp_rtsym_read_le() - Read a simple unsigned scalar value from symbol
  * @rtbl:	NFP RTsym table
@@ -249,7 +444,7 @@
 		      int *error)
 {
 	const struct nfp_rtsym *sym;
-	u32 val32, id;
+	u32 val32;
 	u64 val;
 	int err;
 
@@ -259,20 +454,18 @@
 		goto exit;
 	}
 
-	id = NFP_CPP_ISLAND_ID(sym->target, NFP_CPP_ACTION_RW, 0, sym->domain);
-
-	switch (sym->size) {
+	switch (nfp_rtsym_size(sym)) {
 	case 4:
-		err = nfp_cpp_readl(rtbl->cpp, id, sym->addr, &val32);
+		err = nfp_rtsym_readl(rtbl->cpp, sym, 0, &val32);
 		val = val32;
 		break;
 	case 8:
-		err = nfp_cpp_readq(rtbl->cpp, id, sym->addr, &val);
+		err = nfp_rtsym_readq(rtbl->cpp, sym, 0, &val);
 		break;
 	default:
 		nfp_err(rtbl->cpp,
-			"rtsym '%s' unsupported or non-scalar size: %lld\n",
-			name, sym->size);
+			"rtsym '%s': unsupported or non-scalar size: %lld\n",
+			name, nfp_rtsym_size(sym));
 		err = -EINVAL;
 		break;
 	}
@@ -303,25 +496,22 @@
 {
 	const struct nfp_rtsym *sym;
 	int err;
-	u32 id;
 
 	sym = nfp_rtsym_lookup(rtbl, name);
 	if (!sym)
 		return -ENOENT;
 
-	id = NFP_CPP_ISLAND_ID(sym->target, NFP_CPP_ACTION_RW, 0, sym->domain);
-
-	switch (sym->size) {
+	switch (nfp_rtsym_size(sym)) {
 	case 4:
-		err = nfp_cpp_writel(rtbl->cpp, id, sym->addr, value);
+		err = nfp_rtsym_writel(rtbl->cpp, sym, 0, value);
 		break;
 	case 8:
-		err = nfp_cpp_writeq(rtbl->cpp, id, sym->addr, value);
+		err = nfp_rtsym_writeq(rtbl->cpp, sym, 0, value);
 		break;
 	default:
 		nfp_err(rtbl->cpp,
-			"rtsym '%s' unsupported or non-scalar size: %lld\n",
-			name, sym->size);
+			"rtsym '%s': unsupported or non-scalar size: %lld\n",
+			name, nfp_rtsym_size(sym));
 		err = -EINVAL;
 		break;
 	}
@@ -335,20 +525,29 @@
 {
 	const struct nfp_rtsym *sym;
 	u8 __iomem *mem;
+	u32 cpp_id;
+	u64 addr;
+	int err;
 
 	sym = nfp_rtsym_lookup(rtbl, name);
 	if (!sym)
 		return (u8 __iomem *)ERR_PTR(-ENOENT);
 
+	err = nfp_rtsym_to_dest(rtbl->cpp, sym, NFP_CPP_ACTION_RW, 0, 0,
+				&cpp_id, &addr);
+	if (err) {
+		nfp_err(rtbl->cpp, "rtsym '%s': mapping failed\n", name);
+		return (u8 __iomem *)ERR_PTR(err);
+	}
+
 	if (sym->size < min_size) {
-		nfp_err(rtbl->cpp, "Symbol %s too small\n", name);
+		nfp_err(rtbl->cpp, "rtsym '%s': too small\n", name);
 		return (u8 __iomem *)ERR_PTR(-EINVAL);
 	}
 
-	mem = nfp_cpp_map_area(rtbl->cpp, id, sym->domain, sym->target,
-			       sym->addr, sym->size, area);
+	mem = nfp_cpp_map_area(rtbl->cpp, id, cpp_id, addr, sym->size, area);
 	if (IS_ERR(mem)) {
-		nfp_err(rtbl->cpp, "Failed to map symbol %s: %ld\n",
+		nfp_err(rtbl->cpp, "rtysm '%s': failed to map: %ld\n",
 			name, PTR_ERR(mem));
 		return mem;
 	}

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c
index 4ea1e58..79470f1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_target.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
 /*
  * nfp_target.c
@@ -39,7 +9,11 @@
  *          Francois H. Theron <francois.theron@netronome.com>
  */
 
+#define pr_fmt(fmt)       "NFP target: " fmt
+
 #include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
 
 #include "nfp_cpp.h"
 
@@ -733,8 +707,10 @@
 	u32 imb;
 	int err;
 
-	if (target < 0 || target >= 16)
+	if (target < 0 || target >= 16) {
+		pr_err("Invalid CPP target: %d\n", target);
 		return -EINVAL;
+	}
 
 	if (island == 0) {
 		/* Already translated */
@@ -753,8 +729,10 @@
 	err = nfp_cppat_addr_encode(cpp_target_address, island, target,
 				    ((imb >> 13) & 7), ((imb >> 12) & 1),
 				    ((imb >> 6)  & 0x3f), ((imb >> 0)  & 0x3f));
-	if (err)
+	if (err) {
+		pr_err("Can't encode CPP address: %d\n", err);
 		return err;
+	}
 
 	*cpp_target_id = NFP_CPP_ID(target,
 				    NFP_CPP_ID_ACTION_of(cpp_island_id),

diff --git a/drivers/net/ethernet/netronome/nfp/nic/Makefile b/drivers/net/ethernet/netronome/nfp/nic/Makefile
deleted file mode 100644
index 805fa28..0000000
--- a/drivers/net/ethernet/netronome/nfp/nic/Makefile
+++ /dev/null

@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# kbuild requires Makefile in a directory to build individual objects

diff --git a/drivers/net/ethernet/netronome/nfp/nic/main.c b/drivers/net/ethernet/netronome/nfp/nic/main.c
index d5b587f..aea8579 100644
--- a/drivers/net/ethernet/netronome/nfp/nic/main.c
+++ b/drivers/net/ethernet/netronome/nfp/nic/main.c

@@ -1,35 +1,5 @@
-/*
- * Copyright (C) 2017 Netronome Systems, Inc.
- *
- * This software is dual licensed under the GNU General License Version 2,
- * June 1991 as shown in the file COPYING in the top-level directory of this
- * source tree or the BSD 2-Clause License provided below.  You have the
- * option to license this software under the complete terms of either license.
- *
- * The BSD 2-Clause License:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      1. Redistributions of source code must retain the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer.
- *
- *      2. Redistributions in binary form must reproduce the above
- *         copyright notice, this list of conditions and the following
- *         disclaimer in the documentation and/or other materials
- *         provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017 Netronome Systems, Inc. */
 
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfpcore/nfp_nsp.h"

diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
deleted file mode 100644
index df4188c..0000000
--- a/drivers/net/ethernet/netx-eth.c
+++ /dev/null

@@ -1,508 +0,0 @@
-/*
- * drivers/net/ethernet/netx-eth.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/mii.h>
-
-#include <asm/io.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-#include <mach/pfifo.h>
-#include <mach/xc.h>
-#include <linux/platform_data/eth-netx.h>
-
-/* XC Fifo Offsets */
-#define EMPTY_PTR_FIFO(xcno)    (0 + ((xcno) << 3))	/* Index of the empty pointer FIFO */
-#define IND_FIFO_PORT_HI(xcno)  (1 + ((xcno) << 3))	/* Index of the FIFO where received */
-							/* Data packages are indicated by XC */
-#define IND_FIFO_PORT_LO(xcno)  (2 + ((xcno) << 3))	/* Index of the FIFO where received */
-							/* Data packages are indicated by XC */
-#define REQ_FIFO_PORT_HI(xcno)  (3 + ((xcno) << 3))	/* Index of the FIFO where Data packages */
-							/* have to be indicated by ARM which */
-							/* shall be sent */
-#define REQ_FIFO_PORT_LO(xcno)  (4 + ((xcno) << 3))	/* Index of the FIFO where Data packages */
-							/* have to be indicated by ARM which shall */
-							/* be sent */
-#define CON_FIFO_PORT_HI(xcno)  (5 + ((xcno) << 3))	/* Index of the FIFO where sent Data packages */
-							/* are confirmed */
-#define CON_FIFO_PORT_LO(xcno)  (6 + ((xcno) << 3))	/* Index of the FIFO where sent Data */
-							/* packages are confirmed */
-#define PFIFO_MASK(xcno)        (0x7f << (xcno*8))
-
-#define FIFO_PTR_FRAMELEN_SHIFT 0
-#define FIFO_PTR_FRAMELEN_MASK  (0x7ff << 0)
-#define FIFO_PTR_FRAMELEN(len)  (((len) << 0) & FIFO_PTR_FRAMELEN_MASK)
-#define FIFO_PTR_TIMETRIG       (1<<11)
-#define FIFO_PTR_MULTI_REQ
-#define FIFO_PTR_ORIGIN         (1<<14)
-#define FIFO_PTR_VLAN           (1<<15)
-#define FIFO_PTR_FRAMENO_SHIFT  16
-#define FIFO_PTR_FRAMENO_MASK   (0x3f << 16)
-#define FIFO_PTR_FRAMENO(no)    (((no) << 16) & FIFO_PTR_FRAMENO_MASK)
-#define FIFO_PTR_SEGMENT_SHIFT  22
-#define FIFO_PTR_SEGMENT_MASK   (0xf << 22)
-#define FIFO_PTR_SEGMENT(seg)   (((seg) & 0xf) << 22)
-#define FIFO_PTR_ERROR_SHIFT    28
-#define FIFO_PTR_ERROR_MASK     (0xf << 28)
-
-#define ISR_LINK_STATUS_CHANGE (1<<4)
-#define ISR_IND_LO             (1<<3)
-#define ISR_CON_LO             (1<<2)
-#define ISR_IND_HI             (1<<1)
-#define ISR_CON_HI             (1<<0)
-
-#define ETH_MAC_LOCAL_CONFIG 0x1560
-#define ETH_MAC_4321         0x1564
-#define ETH_MAC_65           0x1568
-
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT 16
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK (0xf<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT)
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT(x) (((x)<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT) & MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK)
-#define LOCAL_CONFIG_LINK_STATUS_IRQ_EN (1<<24)
-#define LOCAL_CONFIG_CON_LO_IRQ_EN (1<<23)
-#define LOCAL_CONFIG_CON_HI_IRQ_EN (1<<22)
-#define LOCAL_CONFIG_IND_LO_IRQ_EN (1<<21)
-#define LOCAL_CONFIG_IND_HI_IRQ_EN (1<<20)
-
-#define CARDNAME "netx-eth"
-
-/* LSB must be zero */
-#define INTERNAL_PHY_ADR 0x1c
-
-struct netx_eth_priv {
-	void                    __iomem *sram_base, *xpec_base, *xmac_base;
-	int                     id;
-	struct mii_if_info      mii;
-	u32                     msg_enable;
-	struct xc               *xc;
-	spinlock_t              lock;
-};
-
-static void netx_eth_set_multicast_list(struct net_device *ndev)
-{
-	/* implement me */
-}
-
-static int
-netx_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned char *buf = skb->data;
-	unsigned int len = skb->len;
-
-	spin_lock_irq(&priv->lock);
-	memcpy_toio(priv->sram_base + 1560, (void *)buf, len);
-	if (len < 60) {
-		memset_io(priv->sram_base + 1560 + len, 0, 60 - len);
-		len = 60;
-	}
-
-	pfifo_push(REQ_FIFO_PORT_LO(priv->id),
-	           FIFO_PTR_SEGMENT(priv->id) |
-	           FIFO_PTR_FRAMENO(1) |
-	           FIFO_PTR_FRAMELEN(len));
-
-	ndev->stats.tx_packets++;
-	ndev->stats.tx_bytes += skb->len;
-
-	netif_stop_queue(ndev);
-	spin_unlock_irq(&priv->lock);
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static void netx_eth_receive(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned int val, frameno, seg, len;
-	unsigned char *data;
-	struct sk_buff *skb;
-
-	val = pfifo_pop(IND_FIFO_PORT_LO(priv->id));
-
-	frameno = (val & FIFO_PTR_FRAMENO_MASK) >> FIFO_PTR_FRAMENO_SHIFT;
-	seg = (val & FIFO_PTR_SEGMENT_MASK) >> FIFO_PTR_SEGMENT_SHIFT;
-	len = (val & FIFO_PTR_FRAMELEN_MASK) >> FIFO_PTR_FRAMELEN_SHIFT;
-
-	skb = netdev_alloc_skb(ndev, len);
-	if (unlikely(skb == NULL)) {
-		ndev->stats.rx_dropped++;
-		return;
-	}
-
-	data = skb_put(skb, len);
-
-	memcpy_fromio(data, priv->sram_base + frameno * 1560, len);
-
-	pfifo_push(EMPTY_PTR_FIFO(priv->id),
-		FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
-
-	skb->protocol = eth_type_trans(skb, ndev);
-	netif_rx(skb);
-	ndev->stats.rx_packets++;
-	ndev->stats.rx_bytes += len;
-}
-
-static irqreturn_t
-netx_eth_interrupt(int irq, void *dev_id)
-{
-	struct net_device *ndev = dev_id;
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	int status;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-
-	status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-	while (status) {
-		int fill_level;
-		writel(status, NETX_PFIFO_XPEC_ISR(priv->id));
-
-		if ((status & ISR_CON_HI) || (status & ISR_IND_HI))
-			printk("%s: unexpected status: 0x%08x\n",
-			    __func__, status);
-
-		fill_level =
-		    readl(NETX_PFIFO_FILL_LEVEL(IND_FIFO_PORT_LO(priv->id)));
-		while (fill_level--)
-			netx_eth_receive(ndev);
-
-		if (status & ISR_CON_LO)
-			netif_wake_queue(ndev);
-
-		if (status & ISR_LINK_STATUS_CHANGE)
-			mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-
-		status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-	return IRQ_HANDLED;
-}
-
-static int netx_eth_open(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	if (request_irq
-	    (ndev->irq, netx_eth_interrupt, IRQF_SHARED, ndev->name, ndev))
-		return -EAGAIN;
-
-	writel(ndev->dev_addr[0] |
-	       ndev->dev_addr[1]<<8 |
-	       ndev->dev_addr[2]<<16 |
-	       ndev->dev_addr[3]<<24,
-	       priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-	writel(ndev->dev_addr[4] |
-	       ndev->dev_addr[5]<<8,
-	       priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-	writel(LOCAL_CONFIG_LINK_STATUS_IRQ_EN |
-		LOCAL_CONFIG_CON_LO_IRQ_EN |
-		LOCAL_CONFIG_CON_HI_IRQ_EN |
-		LOCAL_CONFIG_IND_LO_IRQ_EN |
-		LOCAL_CONFIG_IND_HI_IRQ_EN,
-		priv->xpec_base + NETX_XPEC_RAM_START_OFS +
-		ETH_MAC_LOCAL_CONFIG);
-
-	mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-	netif_start_queue(ndev);
-
-	return 0;
-}
-
-static int netx_eth_close(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	netif_stop_queue(ndev);
-
-	writel(0,
-	    priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_LOCAL_CONFIG);
-
-	free_irq(ndev->irq, ndev);
-
-	return 0;
-}
-
-static void netx_eth_timeout(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	int i;
-
-	printk(KERN_ERR "%s: transmit timed out, resetting\n", ndev->name);
-
-	spin_lock_irq(&priv->lock);
-
-	xc_reset(priv->xc);
-	xc_start(priv->xc);
-
-	for (i=2; i<=18; i++)
-		pfifo_push(EMPTY_PTR_FIFO(priv->id),
-			FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-	spin_unlock_irq(&priv->lock);
-
-	netif_wake_queue(ndev);
-}
-
-static int
-netx_eth_phy_read(struct net_device *ndev, int phy_id, int reg)
-{
-	unsigned int val;
-
-	val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-	      MIIMU_REGADDR(reg) | MIIMU_PHY_NRES;
-
-	writel(val, NETX_MIIMU);
-	while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-
-	return readl(NETX_MIIMU) >> 16;
-
-}
-
-static void
-netx_eth_phy_write(struct net_device *ndev, int phy_id, int reg, int value)
-{
-	unsigned int val;
-
-	val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-	      MIIMU_REGADDR(reg) | MIIMU_PHY_NRES | MIIMU_OPMODE_WRITE |
-	      MIIMU_DATA(value);
-
-	writel(val, NETX_MIIMU);
-	while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-}
-
-static const struct net_device_ops netx_eth_netdev_ops = {
-	.ndo_open		= netx_eth_open,
-	.ndo_stop		= netx_eth_close,
-	.ndo_start_xmit		= netx_eth_hard_start_xmit,
-	.ndo_tx_timeout		= netx_eth_timeout,
-	.ndo_set_rx_mode	= netx_eth_set_multicast_list,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= eth_mac_addr,
-};
-
-static int netx_eth_enable(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned int mac4321, mac65;
-	int running, i, ret;
-	bool inv_mac_addr = false;
-
-	ndev->netdev_ops = &netx_eth_netdev_ops;
-	ndev->watchdog_timeo = msecs_to_jiffies(5000);
-
-	priv->msg_enable       = NETIF_MSG_LINK;
-	priv->mii.phy_id_mask  = 0x1f;
-	priv->mii.reg_num_mask = 0x1f;
-	priv->mii.force_media  = 0;
-	priv->mii.full_duplex  = 0;
-	priv->mii.dev	     = ndev;
-	priv->mii.mdio_read    = netx_eth_phy_read;
-	priv->mii.mdio_write   = netx_eth_phy_write;
-	priv->mii.phy_id = INTERNAL_PHY_ADR + priv->id;
-
-	running = xc_running(priv->xc);
-	xc_stop(priv->xc);
-
-	/* if the xc engine is already running, assume the bootloader has
-	 * loaded the firmware for us
-	 */
-	if (running) {
-		/* get Node Address from hardware */
-		mac4321 = readl(priv->xpec_base +
-			NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-		mac65 = readl(priv->xpec_base +
-			NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-		ndev->dev_addr[0] = mac4321 & 0xff;
-		ndev->dev_addr[1] = (mac4321 >> 8) & 0xff;
-		ndev->dev_addr[2] = (mac4321 >> 16) & 0xff;
-		ndev->dev_addr[3] = (mac4321 >> 24) & 0xff;
-		ndev->dev_addr[4] = mac65 & 0xff;
-		ndev->dev_addr[5] = (mac65 >> 8) & 0xff;
-	} else {
-		if (xc_request_firmware(priv->xc)) {
-			printk(CARDNAME ": requesting firmware failed\n");
-			return -ENODEV;
-		}
-	}
-
-	xc_reset(priv->xc);
-	xc_start(priv->xc);
-
-	if (!is_valid_ether_addr(ndev->dev_addr))
-		inv_mac_addr = true;
-
-	for (i=2; i<=18; i++)
-		pfifo_push(EMPTY_PTR_FIFO(priv->id),
-			FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-	ret = register_netdev(ndev);
-	if (inv_mac_addr)
-		printk("%s: Invalid ethernet MAC address. Please set using ip\n",
-		       ndev->name);
-
-	return ret;
-}
-
-static int netx_eth_drv_probe(struct platform_device *pdev)
-{
-	struct netx_eth_priv *priv;
-	struct net_device *ndev;
-	struct netxeth_platform_data *pdata;
-	int ret;
-
-	ndev = alloc_etherdev(sizeof (struct netx_eth_priv));
-	if (!ndev) {
-		ret = -ENOMEM;
-		goto exit;
-	}
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-
-	platform_set_drvdata(pdev, ndev);
-
-	priv = netdev_priv(ndev);
-
-	pdata = dev_get_platdata(&pdev->dev);
-	priv->xc = request_xc(pdata->xcno, &pdev->dev);
-	if (!priv->xc) {
-		dev_err(&pdev->dev, "unable to request xc engine\n");
-		ret = -ENODEV;
-		goto exit_free_netdev;
-	}
-
-	ndev->irq = priv->xc->irq;
-	priv->id = pdev->id;
-	priv->xpec_base = priv->xc->xpec_base;
-	priv->xmac_base = priv->xc->xmac_base;
-	priv->sram_base = priv->xc->sram_base;
-
-	spin_lock_init(&priv->lock);
-
-	ret = pfifo_request(PFIFO_MASK(priv->id));
-	if (ret) {
-		printk("unable to request PFIFO\n");
-		goto exit_free_xc;
-	}
-
-	ret = netx_eth_enable(ndev);
-	if (ret)
-		goto exit_free_pfifo;
-
-	return 0;
-exit_free_pfifo:
-	pfifo_free(PFIFO_MASK(priv->id));
-exit_free_xc:
-	free_xc(priv->xc);
-exit_free_netdev:
-	free_netdev(ndev);
-exit:
-	return ret;
-}
-
-static int netx_eth_drv_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	unregister_netdev(ndev);
-	xc_stop(priv->xc);
-	free_xc(priv->xc);
-	free_netdev(ndev);
-	pfifo_free(PFIFO_MASK(priv->id));
-
-	return 0;
-}
-
-static int netx_eth_drv_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	dev_err(&pdev->dev, "suspend not implemented\n");
-	return 0;
-}
-
-static int netx_eth_drv_resume(struct platform_device *pdev)
-{
-	dev_err(&pdev->dev, "resume not implemented\n");
-	return 0;
-}
-
-static struct platform_driver netx_eth_driver = {
-	.probe		= netx_eth_drv_probe,
-	.remove		= netx_eth_drv_remove,
-	.suspend	= netx_eth_drv_suspend,
-	.resume		= netx_eth_drv_resume,
-	.driver		= {
-		.name	= CARDNAME,
-	},
-};
-
-static int __init netx_eth_init(void)
-{
-	unsigned int phy_control, val;
-
-	printk("NetX Ethernet driver\n");
-
-	phy_control = PHY_CONTROL_PHY_ADDRESS(INTERNAL_PHY_ADR>>1) |
-		      PHY_CONTROL_PHY1_MODE(PHY_MODE_ALL) |
-		      PHY_CONTROL_PHY1_AUTOMDIX |
-		      PHY_CONTROL_PHY1_EN |
-		      PHY_CONTROL_PHY0_MODE(PHY_MODE_ALL) |
-		      PHY_CONTROL_PHY0_AUTOMDIX |
-		      PHY_CONTROL_PHY0_EN |
-		      PHY_CONTROL_CLK_XLATIN;
-
-	val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-	writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-	writel(phy_control | PHY_CONTROL_RESET, NETX_SYSTEM_PHY_CONTROL);
-	udelay(100);
-
-	val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-	writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-	writel(phy_control, NETX_SYSTEM_PHY_CONTROL);
-
-	return platform_driver_register(&netx_eth_driver);
-}
-
-static void __exit netx_eth_cleanup(void)
-{
-	platform_driver_unregister(&netx_eth_driver);
-}
-
-module_init(netx_eth_init);
-module_exit(netx_eth_cleanup);
-
-MODULE_AUTHOR("Sascha Hauer, Pengutronix");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" CARDNAME);
-MODULE_FIRMWARE("xc0.bin");
-MODULE_FIRMWARE("xc1.bin");
-MODULE_FIRMWARE("xc2.bin");

diff --git a/drivers/net/ethernet/ni/Kconfig b/drivers/net/ethernet/ni/Kconfig
index aa41e5f..0122919 100644
--- a/drivers/net/ethernet/ni/Kconfig
+++ b/drivers/net/ethernet/ni/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # National Instuments network device configuration
 #
@@ -10,7 +11,7 @@
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
-	  the questions about National Instrument devices.
+	  the questions about National Instruments devices.
 	  If you say Y, you will be asked for your specific device in the
 	  following questions.
 
@@ -18,8 +19,9 @@
 
 config NI_XGE_MANAGEMENT_ENET
 	tristate "National Instruments XGE management enet support"
-	depends on ARCH_ZYNQ
+	depends on HAS_IOMEM && HAS_DMA
 	select PHYLIB
+	select OF_MDIO if OF
 	help
 	  Simple LAN device for debug or management purposes. Can
 	  support either 10G or 1G PHYs via SFP+ ports.

diff --git a/drivers/net/ethernet/ni/Makefile b/drivers/net/ethernet/ni/Makefile
index 99c6646..b31bbea 100644
--- a/drivers/net/ethernet/ni/Makefile
+++ b/drivers/net/ethernet/ni/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NI_XGE_MANAGEMENT_ENET) += nixge.o

diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 76efed0..2761f3a 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c

@@ -105,11 +105,17 @@
 #define NIXGE_MAX_JUMBO_FRAME_SIZE \
 	(NIXGE_JUMBO_MTU + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE)
 
+enum nixge_version {
+	NIXGE_V2,
+	NIXGE_V3,
+	NIXGE_VERSION_COUNT
+};
+
 struct nixge_hw_dma_bd {
-	u32 next;
-	u32 reserved1;
-	u32 phys;
-	u32 reserved2;
+	u32 next_lo;
+	u32 next_hi;
+	u32 phys_lo;
+	u32 phys_hi;
 	u32 reserved3;
 	u32 reserved4;
 	u32 cntrl;
@@ -119,11 +125,39 @@
 	u32 app2;
 	u32 app3;
 	u32 app4;
-	u32 sw_id_offset;
-	u32 reserved5;
+	u32 sw_id_offset_lo;
+	u32 sw_id_offset_hi;
 	u32 reserved6;
 };
 
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define nixge_hw_dma_bd_set_addr(bd, field, addr) \
+	do { \
+		(bd)->field##_lo = lower_32_bits((addr)); \
+		(bd)->field##_hi = upper_32_bits((addr)); \
+	} while (0)
+#else
+#define nixge_hw_dma_bd_set_addr(bd, field, addr) \
+	((bd)->field##_lo = lower_32_bits((addr)))
+#endif
+
+#define nixge_hw_dma_bd_set_phys(bd, addr) \
+	nixge_hw_dma_bd_set_addr((bd), phys, (addr))
+
+#define nixge_hw_dma_bd_set_next(bd, addr) \
+	nixge_hw_dma_bd_set_addr((bd), next, (addr))
+
+#define nixge_hw_dma_bd_set_offset(bd, addr) \
+	nixge_hw_dma_bd_set_addr((bd), sw_id_offset, (addr))
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define nixge_hw_dma_bd_get_addr(bd, field) \
+	(dma_addr_t)((((u64)(bd)->field##_hi) << 32) | ((bd)->field##_lo))
+#else
+#define nixge_hw_dma_bd_get_addr(bd, field) \
+	(dma_addr_t)((bd)->field##_lo)
+#endif
+
 struct nixge_tx_skb {
 	struct sk_buff *skb;
 	dma_addr_t mapping;
@@ -176,6 +210,15 @@
 	writel(val, priv->dma_regs + offset);
 }
 
+static void nixge_dma_write_desc_reg(struct nixge_priv *priv, off_t offset,
+				     dma_addr_t addr)
+{
+	writel(lower_32_bits(addr), priv->dma_regs + offset);
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	writel(upper_32_bits(addr), priv->dma_regs + offset + 4);
+#endif
+}
+
 static u32 nixge_dma_read_reg(const struct nixge_priv *priv, off_t offset)
 {
 	return readl(priv->dma_regs + offset);
@@ -202,13 +245,22 @@
 static void nixge_hw_dma_bd_release(struct net_device *ndev)
 {
 	struct nixge_priv *priv = netdev_priv(ndev);
+	dma_addr_t phys_addr;
+	struct sk_buff *skb;
 	int i;
 
 	for (i = 0; i < RX_BD_NUM; i++) {
-		dma_unmap_single(ndev->dev.parent, priv->rx_bd_v[i].phys,
-				 NIXGE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
-		dev_kfree_skb((struct sk_buff *)
-			      (priv->rx_bd_v[i].sw_id_offset));
+		phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+						     phys);
+
+		dma_unmap_single(ndev->dev.parent, phys_addr,
+				 NIXGE_MAX_JUMBO_FRAME_SIZE,
+				 DMA_FROM_DEVICE);
+
+		skb = (struct sk_buff *)(uintptr_t)
+			nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+						 sw_id_offset);
+		dev_kfree_skb(skb);
 	}
 
 	if (priv->rx_bd_v)
@@ -231,6 +283,7 @@
 {
 	struct nixge_priv *priv = netdev_priv(ndev);
 	struct sk_buff *skb;
+	dma_addr_t phys;
 	u32 cr;
 	int i;
 
@@ -240,9 +293,9 @@
 	priv->rx_bd_ci = 0;
 
 	/* Allocate the Tx and Rx buffer descriptors. */
-	priv->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					    sizeof(*priv->tx_bd_v) * TX_BD_NUM,
-					    &priv->tx_bd_p, GFP_KERNEL);
+	priv->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					   sizeof(*priv->tx_bd_v) * TX_BD_NUM,
+					   &priv->tx_bd_p, GFP_KERNEL);
 	if (!priv->tx_bd_v)
 		goto out;
 
@@ -252,34 +305,37 @@
 	if (!priv->tx_skb)
 		goto out;
 
-	priv->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					    sizeof(*priv->rx_bd_v) * RX_BD_NUM,
-					    &priv->rx_bd_p, GFP_KERNEL);
+	priv->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					   sizeof(*priv->rx_bd_v) * RX_BD_NUM,
+					   &priv->rx_bd_p, GFP_KERNEL);
 	if (!priv->rx_bd_v)
 		goto out;
 
 	for (i = 0; i < TX_BD_NUM; i++) {
-		priv->tx_bd_v[i].next = priv->tx_bd_p +
-				      sizeof(*priv->tx_bd_v) *
-				      ((i + 1) % TX_BD_NUM);
+		nixge_hw_dma_bd_set_next(&priv->tx_bd_v[i],
+					 priv->tx_bd_p +
+					 sizeof(*priv->tx_bd_v) *
+					 ((i + 1) % TX_BD_NUM));
 	}
 
 	for (i = 0; i < RX_BD_NUM; i++) {
-		priv->rx_bd_v[i].next = priv->rx_bd_p +
-				      sizeof(*priv->rx_bd_v) *
-				      ((i + 1) % RX_BD_NUM);
+		nixge_hw_dma_bd_set_next(&priv->rx_bd_v[i],
+					 priv->rx_bd_p
+					 + sizeof(*priv->rx_bd_v) *
+					 ((i + 1) % RX_BD_NUM));
 
 		skb = netdev_alloc_skb_ip_align(ndev,
 						NIXGE_MAX_JUMBO_FRAME_SIZE);
 		if (!skb)
 			goto out;
 
-		priv->rx_bd_v[i].sw_id_offset = (u32)skb;
-		priv->rx_bd_v[i].phys =
-			dma_map_single(ndev->dev.parent,
-				       skb->data,
-				       NIXGE_MAX_JUMBO_FRAME_SIZE,
-				       DMA_FROM_DEVICE);
+		nixge_hw_dma_bd_set_offset(&priv->rx_bd_v[i], (uintptr_t)skb);
+		phys = dma_map_single(ndev->dev.parent, skb->data,
+				      NIXGE_MAX_JUMBO_FRAME_SIZE,
+				      DMA_FROM_DEVICE);
+
+		nixge_hw_dma_bd_set_phys(&priv->rx_bd_v[i], phys);
+
 		priv->rx_bd_v[i].cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE;
 	}
 
@@ -312,18 +368,18 @@
 	/* Populate the tail pointer and bring the Rx Axi DMA engine out of
 	 * halted state. This will make the Rx side ready for reception.
 	 */
-	nixge_dma_write_reg(priv, XAXIDMA_RX_CDESC_OFFSET, priv->rx_bd_p);
+	nixge_dma_write_desc_reg(priv, XAXIDMA_RX_CDESC_OFFSET, priv->rx_bd_p);
 	cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
 	nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET,
 			    cr | XAXIDMA_CR_RUNSTOP_MASK);
-	nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, priv->rx_bd_p +
+	nixge_dma_write_desc_reg(priv, XAXIDMA_RX_TDESC_OFFSET, priv->rx_bd_p +
 			    (sizeof(*priv->rx_bd_v) * (RX_BD_NUM - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
 	 * tail pointer register that the Tx channel will start transmitting.
 	 */
-	nixge_dma_write_reg(priv, XAXIDMA_TX_CDESC_OFFSET, priv->tx_bd_p);
+	nixge_dma_write_desc_reg(priv, XAXIDMA_TX_CDESC_OFFSET, priv->tx_bd_p);
 	cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
 	nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET,
 			    cr | XAXIDMA_CR_RUNSTOP_MASK);
@@ -451,7 +507,7 @@
 	struct nixge_priv *priv = netdev_priv(ndev);
 	struct nixge_hw_dma_bd *cur_p;
 	struct nixge_tx_skb *tx_skb;
-	dma_addr_t tail_p;
+	dma_addr_t tail_p, cur_phys;
 	skb_frag_t *frag;
 	u32 num_frag;
 	u32 ii;
@@ -466,15 +522,16 @@
 		return NETDEV_TX_OK;
 	}
 
-	cur_p->phys = dma_map_single(ndev->dev.parent, skb->data,
-				     skb_headlen(skb), DMA_TO_DEVICE);
-	if (dma_mapping_error(ndev->dev.parent, cur_p->phys))
+	cur_phys = dma_map_single(ndev->dev.parent, skb->data,
+				  skb_headlen(skb), DMA_TO_DEVICE);
+	if (dma_mapping_error(ndev->dev.parent, cur_phys))
 		goto drop;
+	nixge_hw_dma_bd_set_phys(cur_p, cur_phys);
 
 	cur_p->cntrl = skb_headlen(skb) | XAXIDMA_BD_CTRL_TXSOF_MASK;
 
 	tx_skb->skb = NULL;
-	tx_skb->mapping = cur_p->phys;
+	tx_skb->mapping = cur_phys;
 	tx_skb->size = skb_headlen(skb);
 	tx_skb->mapped_as_page = false;
 
@@ -485,16 +542,17 @@
 		tx_skb = &priv->tx_skb[priv->tx_bd_tail];
 		frag = &skb_shinfo(skb)->frags[ii];
 
-		cur_p->phys = skb_frag_dma_map(ndev->dev.parent, frag, 0,
-					       skb_frag_size(frag),
-					       DMA_TO_DEVICE);
-		if (dma_mapping_error(ndev->dev.parent, cur_p->phys))
+		cur_phys = skb_frag_dma_map(ndev->dev.parent, frag, 0,
+					    skb_frag_size(frag),
+					    DMA_TO_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, cur_phys))
 			goto frag_err;
+		nixge_hw_dma_bd_set_phys(cur_p, cur_phys);
 
 		cur_p->cntrl = skb_frag_size(frag);
 
 		tx_skb->skb = NULL;
-		tx_skb->mapping = cur_p->phys;
+		tx_skb->mapping = cur_phys;
 		tx_skb->size = skb_frag_size(frag);
 		tx_skb->mapped_as_page = true;
 	}
@@ -506,7 +564,7 @@
 
 	tail_p = priv->tx_bd_p + sizeof(*priv->tx_bd_v) * priv->tx_bd_tail;
 	/* Start the transfer */
-	nixge_dma_write_reg(priv, XAXIDMA_TX_TDESC_OFFSET, tail_p);
+	nixge_dma_write_desc_reg(priv, XAXIDMA_TX_TDESC_OFFSET, tail_p);
 	++priv->tx_bd_tail;
 	priv->tx_bd_tail %= TX_BD_NUM;
 
@@ -537,7 +595,7 @@
 	struct nixge_priv *priv = netdev_priv(ndev);
 	struct sk_buff *skb, *new_skb;
 	struct nixge_hw_dma_bd *cur_p;
-	dma_addr_t tail_p = 0;
+	dma_addr_t tail_p = 0, cur_phys = 0;
 	u32 packets = 0;
 	u32 length = 0;
 	u32 size = 0;
@@ -549,13 +607,15 @@
 		tail_p = priv->rx_bd_p + sizeof(*priv->rx_bd_v) *
 			 priv->rx_bd_ci;
 
-		skb = (struct sk_buff *)(cur_p->sw_id_offset);
+		skb = (struct sk_buff *)(uintptr_t)
+			nixge_hw_dma_bd_get_addr(cur_p, sw_id_offset);
 
 		length = cur_p->status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
 		if (length > NIXGE_MAX_JUMBO_FRAME_SIZE)
 			length = NIXGE_MAX_JUMBO_FRAME_SIZE;
 
-		dma_unmap_single(ndev->dev.parent, cur_p->phys,
+		dma_unmap_single(ndev->dev.parent,
+				 nixge_hw_dma_bd_get_addr(cur_p, phys),
 				 NIXGE_MAX_JUMBO_FRAME_SIZE,
 				 DMA_FROM_DEVICE);
 
@@ -579,16 +639,17 @@
 		if (!new_skb)
 			return packets;
 
-		cur_p->phys = dma_map_single(ndev->dev.parent, new_skb->data,
-					     NIXGE_MAX_JUMBO_FRAME_SIZE,
-					     DMA_FROM_DEVICE);
-		if (dma_mapping_error(ndev->dev.parent, cur_p->phys)) {
+		cur_phys = dma_map_single(ndev->dev.parent, new_skb->data,
+					  NIXGE_MAX_JUMBO_FRAME_SIZE,
+					  DMA_FROM_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, cur_phys)) {
 			/* FIXME: bail out and clean up */
 			netdev_err(ndev, "Failed to map ...\n");
 		}
+		nixge_hw_dma_bd_set_phys(cur_p, cur_phys);
 		cur_p->cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE;
 		cur_p->status = 0;
-		cur_p->sw_id_offset = (u32)new_skb;
+		nixge_hw_dma_bd_set_offset(cur_p, (uintptr_t)new_skb);
 
 		++priv->rx_bd_ci;
 		priv->rx_bd_ci %= RX_BD_NUM;
@@ -599,7 +660,7 @@
 	ndev->stats.rx_bytes += size;
 
 	if (tail_p)
-		nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, tail_p);
+		nixge_dma_write_desc_reg(priv, XAXIDMA_RX_TDESC_OFFSET, tail_p);
 
 	return packets;
 }
@@ -637,6 +698,7 @@
 	struct nixge_priv *priv = netdev_priv(_ndev);
 	struct net_device *ndev = _ndev;
 	unsigned int status;
+	dma_addr_t phys;
 	u32 cr;
 
 	status = nixge_dma_read_reg(priv, XAXIDMA_TX_SR_OFFSET);
@@ -650,9 +712,11 @@
 		return IRQ_NONE;
 	}
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
+		phys = nixge_hw_dma_bd_get_addr(&priv->tx_bd_v[priv->tx_bd_ci],
+						phys);
+
 		netdev_err(ndev, "DMA Tx error 0x%x\n", status);
-		netdev_err(ndev, "Current BD is at: 0x%x\n",
-			   (priv->tx_bd_v[priv->tx_bd_ci]).phys);
+		netdev_err(ndev, "Current BD is at: 0x%llx\n", (u64)phys);
 
 		cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
 		/* Disable coalesce, delay timer and error interrupts */
@@ -678,6 +742,7 @@
 	struct nixge_priv *priv = netdev_priv(_ndev);
 	struct net_device *ndev = _ndev;
 	unsigned int status;
+	dma_addr_t phys;
 	u32 cr;
 
 	status = nixge_dma_read_reg(priv, XAXIDMA_RX_SR_OFFSET);
@@ -697,9 +762,10 @@
 		return IRQ_NONE;
 	}
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
+		phys = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[priv->rx_bd_ci],
+						phys);
 		netdev_err(ndev, "DMA Rx error 0x%x\n", status);
-		netdev_err(ndev, "Current BD is at: 0x%x\n",
-			   (priv->rx_bd_v[priv->rx_bd_ci]).phys);
+		netdev_err(ndev, "Current BD is at: 0x%llx\n", (u64)phys);
 
 		cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
 		/* Disable coalesce, delay timer and error interrupts */
@@ -735,10 +801,10 @@
 		tx_skb = &lp->tx_skb[i];
 		nixge_tx_skb_unmap(lp, tx_skb);
 
-		cur_p->phys = 0;
+		nixge_hw_dma_bd_set_phys(cur_p, 0);
 		cur_p->cntrl = 0;
 		cur_p->status = 0;
-		cur_p->sw_id_offset = 0;
+		nixge_hw_dma_bd_set_offset(cur_p, 0);
 	}
 
 	for (i = 0; i < RX_BD_NUM; i++) {
@@ -779,18 +845,18 @@
 	/* Populate the tail pointer and bring the Rx Axi DMA engine out of
 	 * halted state. This will make the Rx side ready for reception.
 	 */
-	nixge_dma_write_reg(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
+	nixge_dma_write_desc_reg(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
 	cr = nixge_dma_read_reg(lp, XAXIDMA_RX_CR_OFFSET);
 	nixge_dma_write_reg(lp, XAXIDMA_RX_CR_OFFSET,
 			    cr | XAXIDMA_CR_RUNSTOP_MASK);
-	nixge_dma_write_reg(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
+	nixge_dma_write_desc_reg(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
 			    (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
 	 * tail pointer register that the Tx channel will start transmitting
 	 */
-	nixge_dma_write_reg(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
+	nixge_dma_write_desc_reg(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
 	cr = nixge_dma_read_reg(lp, XAXIDMA_TX_CR_OFFSET);
 	nixge_dma_write_reg(lp, XAXIDMA_TX_CR_OFFSET,
 			    cr | XAXIDMA_CR_RUNSTOP_MASK);
@@ -924,7 +990,7 @@
 				       struct ethtool_drvinfo *ed)
 {
 	strlcpy(ed->driver, "nixge", sizeof(ed->driver));
-	strlcpy(ed->bus_info, "platform", sizeof(ed->driver));
+	strlcpy(ed->bus_info, "platform", sizeof(ed->bus_info));
 }
 
 static int nixge_ethtools_get_coalesce(struct net_device *ndev,
@@ -1165,11 +1231,60 @@
 	return mac;
 }
 
+/* Match table for of_platform binding */
+static const struct of_device_id nixge_dt_ids[] = {
+	{ .compatible = "ni,xge-enet-2.00", .data = (void *)NIXGE_V2 },
+	{ .compatible = "ni,xge-enet-3.00", .data = (void *)NIXGE_V3 },
+	{},
+};
+MODULE_DEVICE_TABLE(of, nixge_dt_ids);
+
+static int nixge_of_get_resources(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id;
+	enum nixge_version version;
+	struct resource *ctrlres;
+	struct resource *dmares;
+	struct net_device *ndev;
+	struct nixge_priv *priv;
+
+	ndev = platform_get_drvdata(pdev);
+	priv = netdev_priv(ndev);
+	of_id = of_match_node(nixge_dt_ids, pdev->dev.of_node);
+	if (!of_id)
+		return -ENODEV;
+
+	version = (enum nixge_version)of_id->data;
+	if (version <= NIXGE_V2)
+		dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	else
+		dmares = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						      "dma");
+
+	priv->dma_regs = devm_ioremap_resource(&pdev->dev, dmares);
+	if (IS_ERR(priv->dma_regs)) {
+		netdev_err(ndev, "failed to map dma regs\n");
+		return PTR_ERR(priv->dma_regs);
+	}
+	if (version <= NIXGE_V2) {
+		priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET;
+	} else {
+		ctrlres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						       "ctrl");
+		priv->ctrl_regs = devm_ioremap_resource(&pdev->dev, ctrlres);
+	}
+	if (IS_ERR(priv->ctrl_regs)) {
+		netdev_err(ndev, "failed to map ctrl regs\n");
+		return PTR_ERR(priv->ctrl_regs);
+	}
+	return 0;
+}
+
 static int nixge_probe(struct platform_device *pdev)
 {
+	struct device_node *mn, *phy_node;
 	struct nixge_priv *priv;
 	struct net_device *ndev;
-	struct resource *dmares;
 	const u8 *mac_addr;
 	int err;
 
@@ -1201,14 +1316,9 @@
 	priv->dev = &pdev->dev;
 
 	netif_napi_add(ndev, &priv->napi, nixge_poll, NAPI_POLL_WEIGHT);
-
-	dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->dma_regs = devm_ioremap_resource(&pdev->dev, dmares);
-	if (IS_ERR(priv->dma_regs)) {
-		netdev_err(ndev, "failed to map dma regs\n");
-		return PTR_ERR(priv->dma_regs);
-	}
-	priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET;
+	err = nixge_of_get_resources(pdev);
+	if (err)
+		return err;
 	__nixge_hw_set_mac_address(ndev);
 
 	priv->tx_irq = platform_get_irq_byname(pdev, "tx");
@@ -1226,36 +1336,50 @@
 	priv->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
 	priv->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
 
-	err = nixge_mdio_setup(priv, pdev->dev.of_node);
-	if (err) {
-		netdev_err(ndev, "error registering mdio bus");
-		goto free_netdev;
+	mn = of_get_child_by_name(pdev->dev.of_node, "mdio");
+	if (mn) {
+		err = nixge_mdio_setup(priv, mn);
+		of_node_put(mn);
+		if (err) {
+			netdev_err(ndev, "error registering mdio bus");
+			goto free_netdev;
+		}
 	}
 
 	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-	if (priv->phy_mode < 0) {
+	if ((int)priv->phy_mode < 0) {
 		netdev_err(ndev, "not find \"phy-mode\" property\n");
 		err = -EINVAL;
 		goto unregister_mdio;
 	}
 
-	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
-	if (!priv->phy_node) {
-		netdev_err(ndev, "not find \"phy-handle\" property\n");
-		err = -EINVAL;
-		goto unregister_mdio;
+	phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!phy_node && of_phy_is_fixed_link(pdev->dev.of_node)) {
+		err = of_phy_register_fixed_link(pdev->dev.of_node);
+		if (err < 0) {
+			netdev_err(ndev, "broken fixed-link specification\n");
+			goto unregister_mdio;
+		}
+		phy_node = of_node_get(pdev->dev.of_node);
 	}
+	priv->phy_node = phy_node;
 
 	err = register_netdev(priv->ndev);
 	if (err) {
 		netdev_err(ndev, "register_netdev() error (%i)\n", err);
-		goto unregister_mdio;
+		goto free_phy;
 	}
 
 	return 0;
 
+free_phy:
+	if (of_phy_is_fixed_link(pdev->dev.of_node))
+		of_phy_deregister_fixed_link(pdev->dev.of_node);
+	of_node_put(phy_node);
+
 unregister_mdio:
-	mdiobus_unregister(priv->mii_bus);
+	if (priv->mii_bus)
+		mdiobus_unregister(priv->mii_bus);
 
 free_netdev:
 	free_netdev(ndev);
@@ -1270,20 +1394,18 @@
 
 	unregister_netdev(ndev);
 
-	mdiobus_unregister(priv->mii_bus);
+	if (of_phy_is_fixed_link(pdev->dev.of_node))
+		of_phy_deregister_fixed_link(pdev->dev.of_node);
+	of_node_put(priv->phy_node);
+
+	if (priv->mii_bus)
+		mdiobus_unregister(priv->mii_bus);
 
 	free_netdev(ndev);
 
 	return 0;
 }
 
-/* Match table for of_platform binding */
-static const struct of_device_id nixge_dt_ids[] = {
-	{ .compatible = "ni,xge-enet-2.00", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, nixge_dt_ids);
-
 static struct platform_driver nixge_driver = {
 	.probe		= nixge_probe,
 	.remove		= nixge_remove,

diff --git a/drivers/net/ethernet/nuvoton/Kconfig b/drivers/net/ethernet/nuvoton/Kconfig
deleted file mode 100644
index 71c973f..0000000
--- a/drivers/net/ethernet/nuvoton/Kconfig
+++ /dev/null

@@ -1,28 +0,0 @@
-#
-# Nuvoton network device configuration
-#
-
-config NET_VENDOR_NUVOTON
-	bool "Nuvoton devices"
-	default y
-	depends on ARM && ARCH_W90X900
-	---help---
-	  If you have a network (Ethernet) card belonging to this class, say Y.
-
-	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about Nuvoton cards. If you say Y, you will be asked
-	  for your specific card in the following questions.
-
-if NET_VENDOR_NUVOTON
-
-config W90P910_ETH
-	tristate "Nuvoton w90p910 Ethernet support"
-	depends on ARM && ARCH_W90X900
-	select PHYLIB
-	select MII
-	---help---
-	  Say Y here if you want to use built-in Ethernet ports
-	  on w90p910 processor.
-
-endif # NET_VENDOR_NUVOTON

diff --git a/drivers/net/ethernet/nuvoton/Makefile b/drivers/net/ethernet/nuvoton/Makefile
deleted file mode 100644
index 171aa04..0000000
--- a/drivers/net/ethernet/nuvoton/Makefile
+++ /dev/null

@@ -1,5 +0,0 @@
-#
-# Makefile for the Nuvoton network device drivers.
-#
-
-obj-$(CONFIG_W90P910_ETH) += w90p910_ether.o

diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
deleted file mode 100644
index 052b3d2..0000000
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ /dev/null

@@ -1,1086 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation;version 2 of the License.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/ethtool.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/gfp.h>
-
-#define DRV_MODULE_NAME		"w90p910-emc"
-#define DRV_MODULE_VERSION	"0.1"
-
-/* Ethernet MAC Registers */
-#define REG_CAMCMR		0x00
-#define REG_CAMEN		0x04
-#define REG_CAMM_BASE		0x08
-#define REG_CAML_BASE		0x0c
-#define REG_TXDLSA		0x88
-#define REG_RXDLSA		0x8C
-#define REG_MCMDR		0x90
-#define REG_MIID		0x94
-#define REG_MIIDA		0x98
-#define REG_FFTCR		0x9C
-#define REG_TSDR		0xa0
-#define REG_RSDR		0xa4
-#define REG_DMARFC		0xa8
-#define REG_MIEN		0xac
-#define REG_MISTA		0xb0
-#define REG_CTXDSA		0xcc
-#define REG_CTXBSA		0xd0
-#define REG_CRXDSA		0xd4
-#define REG_CRXBSA		0xd8
-
-/* mac controller bit */
-#define MCMDR_RXON		0x01
-#define MCMDR_ACP		(0x01 << 3)
-#define MCMDR_SPCRC		(0x01 << 5)
-#define MCMDR_TXON		(0x01 << 8)
-#define MCMDR_FDUP		(0x01 << 18)
-#define MCMDR_ENMDC		(0x01 << 19)
-#define MCMDR_OPMOD		(0x01 << 20)
-#define SWR			(0x01 << 24)
-
-/* cam command regiser */
-#define CAMCMR_AUP		0x01
-#define CAMCMR_AMP		(0x01 << 1)
-#define CAMCMR_ABP		(0x01 << 2)
-#define CAMCMR_CCAM		(0x01 << 3)
-#define CAMCMR_ECMP		(0x01 << 4)
-#define CAM0EN			0x01
-
-/* mac mii controller bit */
-#define MDCCR			(0x0a << 20)
-#define PHYAD			(0x01 << 8)
-#define PHYWR			(0x01 << 16)
-#define PHYBUSY			(0x01 << 17)
-#define PHYPRESP		(0x01 << 18)
-#define CAM_ENTRY_SIZE		0x08
-
-/* rx and tx status */
-#define TXDS_TXCP		(0x01 << 19)
-#define RXDS_CRCE		(0x01 << 17)
-#define RXDS_PTLE		(0x01 << 19)
-#define RXDS_RXGD		(0x01 << 20)
-#define RXDS_ALIE		(0x01 << 21)
-#define RXDS_RP			(0x01 << 22)
-
-/* mac interrupt status*/
-#define MISTA_EXDEF		(0x01 << 19)
-#define MISTA_TXBERR		(0x01 << 24)
-#define MISTA_TDU		(0x01 << 23)
-#define MISTA_RDU		(0x01 << 10)
-#define MISTA_RXBERR		(0x01 << 11)
-
-#define ENSTART			0x01
-#define ENRXINTR		0x01
-#define ENRXGD			(0x01 << 4)
-#define ENRXBERR		(0x01 << 11)
-#define ENTXINTR		(0x01 << 16)
-#define ENTXCP			(0x01 << 18)
-#define ENTXABT			(0x01 << 21)
-#define ENTXBERR		(0x01 << 24)
-#define ENMDC			(0x01 << 19)
-#define PHYBUSY			(0x01 << 17)
-#define MDCCR_VAL		0xa00000
-
-/* rx and tx owner bit */
-#define RX_OWEN_DMA		(0x01 << 31)
-#define RX_OWEN_CPU		(~(0x03 << 30))
-#define TX_OWEN_DMA		(0x01 << 31)
-#define TX_OWEN_CPU		(~(0x01 << 31))
-
-/* tx frame desc controller bit */
-#define MACTXINTEN		0x04
-#define CRCMODE			0x02
-#define PADDINGMODE		0x01
-
-/* fftcr controller bit */
-#define TXTHD 			(0x03 << 8)
-#define BLENGTH			(0x01 << 20)
-
-/* global setting for driver */
-#define RX_DESC_SIZE		50
-#define TX_DESC_SIZE		10
-#define MAX_RBUFF_SZ		0x600
-#define MAX_TBUFF_SZ		0x600
-#define TX_TIMEOUT		(HZ/2)
-#define DELAY			1000
-#define CAM0			0x0
-
-static int w90p910_mdio_read(struct net_device *dev, int phy_id, int reg);
-
-struct w90p910_rxbd {
-	unsigned int sl;
-	unsigned int buffer;
-	unsigned int reserved;
-	unsigned int next;
-};
-
-struct w90p910_txbd {
-	unsigned int mode;
-	unsigned int buffer;
-	unsigned int sl;
-	unsigned int next;
-};
-
-struct recv_pdesc {
-	struct w90p910_rxbd desclist[RX_DESC_SIZE];
-	char recv_buf[RX_DESC_SIZE][MAX_RBUFF_SZ];
-};
-
-struct tran_pdesc {
-	struct w90p910_txbd desclist[TX_DESC_SIZE];
-	char tran_buf[TX_DESC_SIZE][MAX_TBUFF_SZ];
-};
-
-struct  w90p910_ether {
-	struct recv_pdesc *rdesc;
-	struct tran_pdesc *tdesc;
-	dma_addr_t rdesc_phys;
-	dma_addr_t tdesc_phys;
-	struct platform_device *pdev;
-	struct resource *res;
-	struct sk_buff *skb;
-	struct clk *clk;
-	struct clk *rmiiclk;
-	struct mii_if_info mii;
-	struct timer_list check_timer;
-	void __iomem *reg;
-	int rxirq;
-	int txirq;
-	unsigned int cur_tx;
-	unsigned int cur_rx;
-	unsigned int finish_tx;
-	unsigned int rx_packets;
-	unsigned int rx_bytes;
-	unsigned int start_tx_ptr;
-	unsigned int start_rx_ptr;
-	unsigned int linkflag;
-};
-
-static void update_linkspeed_register(struct net_device *dev,
-				unsigned int speed, unsigned int duplex)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = __raw_readl(ether->reg + REG_MCMDR);
-
-	if (speed == SPEED_100) {
-		/* 100 full/half duplex */
-		if (duplex == DUPLEX_FULL) {
-			val |= (MCMDR_OPMOD | MCMDR_FDUP);
-		} else {
-			val |= MCMDR_OPMOD;
-			val &= ~MCMDR_FDUP;
-		}
-	} else {
-		/* 10 full/half duplex */
-		if (duplex == DUPLEX_FULL) {
-			val |= MCMDR_FDUP;
-			val &= ~MCMDR_OPMOD;
-		} else {
-			val &= ~(MCMDR_FDUP | MCMDR_OPMOD);
-		}
-	}
-
-	__raw_writel(val, ether->reg + REG_MCMDR);
-}
-
-static void update_linkspeed(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct platform_device *pdev;
-	unsigned int bmsr, bmcr, lpa, speed, duplex;
-
-	pdev = ether->pdev;
-
-	if (!mii_link_ok(&ether->mii)) {
-		ether->linkflag = 0x0;
-		netif_carrier_off(dev);
-		dev_warn(&pdev->dev, "%s: Link down.\n", dev->name);
-		return;
-	}
-
-	if (ether->linkflag == 1)
-		return;
-
-	bmsr = w90p910_mdio_read(dev, ether->mii.phy_id, MII_BMSR);
-	bmcr = w90p910_mdio_read(dev, ether->mii.phy_id, MII_BMCR);
-
-	if (bmcr & BMCR_ANENABLE) {
-		if (!(bmsr & BMSR_ANEGCOMPLETE))
-			return;
-
-		lpa = w90p910_mdio_read(dev, ether->mii.phy_id, MII_LPA);
-
-		if ((lpa & LPA_100FULL) || (lpa & LPA_100HALF))
-			speed = SPEED_100;
-		else
-			speed = SPEED_10;
-
-		if ((lpa & LPA_100FULL) || (lpa & LPA_10FULL))
-			duplex = DUPLEX_FULL;
-		else
-			duplex = DUPLEX_HALF;
-
-	} else {
-		speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
-	}
-
-	update_linkspeed_register(dev, speed, duplex);
-
-	dev_info(&pdev->dev, "%s: Link now %i-%s\n", dev->name, speed,
-			(duplex == DUPLEX_FULL) ? "FullDuplex" : "HalfDuplex");
-	ether->linkflag = 0x01;
-
-	netif_carrier_on(dev);
-}
-
-static void w90p910_check_link(struct timer_list *t)
-{
-	struct w90p910_ether *ether = from_timer(ether, t, check_timer);
-	struct net_device *dev = ether->mii.dev;
-
-	update_linkspeed(dev);
-	mod_timer(&ether->check_timer, jiffies + msecs_to_jiffies(1000));
-}
-
-static void w90p910_write_cam(struct net_device *dev,
-				unsigned int x, unsigned char *pval)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int msw, lsw;
-
-	msw = (pval[0] << 24) | (pval[1] << 16) | (pval[2] << 8) | pval[3];
-
-	lsw = (pval[4] << 24) | (pval[5] << 16);
-
-	__raw_writel(lsw, ether->reg + REG_CAML_BASE + x * CAM_ENTRY_SIZE);
-	__raw_writel(msw, ether->reg + REG_CAMM_BASE + x * CAM_ENTRY_SIZE);
-}
-
-static int w90p910_init_desc(struct net_device *dev)
-{
-	struct w90p910_ether *ether;
-	struct w90p910_txbd  *tdesc;
-	struct w90p910_rxbd  *rdesc;
-	struct platform_device *pdev;
-	unsigned int i;
-
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	ether->tdesc = dma_alloc_coherent(&pdev->dev, sizeof(struct tran_pdesc),
-					  &ether->tdesc_phys, GFP_KERNEL);
-	if (!ether->tdesc)
-		return -ENOMEM;
-
-	ether->rdesc = dma_alloc_coherent(&pdev->dev, sizeof(struct recv_pdesc),
-					  &ether->rdesc_phys, GFP_KERNEL);
-	if (!ether->rdesc) {
-		dma_free_coherent(&pdev->dev, sizeof(struct tran_pdesc),
-				  ether->tdesc, ether->tdesc_phys);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < TX_DESC_SIZE; i++) {
-		unsigned int offset;
-
-		tdesc = &(ether->tdesc->desclist[i]);
-
-		if (i == TX_DESC_SIZE - 1)
-			offset = offsetof(struct tran_pdesc, desclist[0]);
-		else
-			offset = offsetof(struct tran_pdesc, desclist[i + 1]);
-
-		tdesc->next = ether->tdesc_phys + offset;
-		tdesc->buffer = ether->tdesc_phys +
-			offsetof(struct tran_pdesc, tran_buf[i]);
-		tdesc->sl = 0;
-		tdesc->mode = 0;
-	}
-
-	ether->start_tx_ptr = ether->tdesc_phys;
-
-	for (i = 0; i < RX_DESC_SIZE; i++) {
-		unsigned int offset;
-
-		rdesc = &(ether->rdesc->desclist[i]);
-
-		if (i == RX_DESC_SIZE - 1)
-			offset = offsetof(struct recv_pdesc, desclist[0]);
-		else
-			offset = offsetof(struct recv_pdesc, desclist[i + 1]);
-
-		rdesc->next = ether->rdesc_phys + offset;
-		rdesc->sl = RX_OWEN_DMA;
-		rdesc->buffer = ether->rdesc_phys +
-			offsetof(struct recv_pdesc, recv_buf[i]);
-	  }
-
-	ether->start_rx_ptr = ether->rdesc_phys;
-
-	return 0;
-}
-
-static void w90p910_set_fifo_threshold(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = TXTHD | BLENGTH;
-	__raw_writel(val, ether->reg + REG_FFTCR);
-}
-
-static void w90p910_return_default_idle(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = __raw_readl(ether->reg + REG_MCMDR);
-	val |= SWR;
-	__raw_writel(val, ether->reg + REG_MCMDR);
-}
-
-static void w90p910_trigger_rx(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	__raw_writel(ENSTART, ether->reg + REG_RSDR);
-}
-
-static void w90p910_trigger_tx(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	__raw_writel(ENSTART, ether->reg + REG_TSDR);
-}
-
-static void w90p910_enable_mac_interrupt(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = ENTXINTR | ENRXINTR | ENRXGD | ENTXCP;
-	val |= ENTXBERR | ENRXBERR | ENTXABT;
-
-	__raw_writel(val, ether->reg + REG_MIEN);
-}
-
-static void w90p910_get_and_clear_int(struct net_device *dev,
-							unsigned int *val)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	*val = __raw_readl(ether->reg + REG_MISTA);
-	__raw_writel(*val, ether->reg + REG_MISTA);
-}
-
-static void w90p910_set_global_maccmd(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = __raw_readl(ether->reg + REG_MCMDR);
-	val |= MCMDR_SPCRC | MCMDR_ENMDC | MCMDR_ACP | ENMDC;
-	__raw_writel(val, ether->reg + REG_MCMDR);
-}
-
-static void w90p910_enable_cam(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	w90p910_write_cam(dev, CAM0, dev->dev_addr);
-
-	val = __raw_readl(ether->reg + REG_CAMEN);
-	val |= CAM0EN;
-	__raw_writel(val, ether->reg + REG_CAMEN);
-}
-
-static void w90p910_enable_cam_command(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = CAMCMR_ECMP | CAMCMR_ABP | CAMCMR_AMP;
-	__raw_writel(val, ether->reg + REG_CAMCMR);
-}
-
-static void w90p910_enable_tx(struct net_device *dev, unsigned int enable)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = __raw_readl(ether->reg + REG_MCMDR);
-
-	if (enable)
-		val |= MCMDR_TXON;
-	else
-		val &= ~MCMDR_TXON;
-
-	__raw_writel(val, ether->reg + REG_MCMDR);
-}
-
-static void w90p910_enable_rx(struct net_device *dev, unsigned int enable)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	unsigned int val;
-
-	val = __raw_readl(ether->reg + REG_MCMDR);
-
-	if (enable)
-		val |= MCMDR_RXON;
-	else
-		val &= ~MCMDR_RXON;
-
-	__raw_writel(val, ether->reg + REG_MCMDR);
-}
-
-static void w90p910_set_curdest(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	__raw_writel(ether->start_rx_ptr, ether->reg + REG_RXDLSA);
-	__raw_writel(ether->start_tx_ptr, ether->reg + REG_TXDLSA);
-}
-
-static void w90p910_reset_mac(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	w90p910_enable_tx(dev, 0);
-	w90p910_enable_rx(dev, 0);
-	w90p910_set_fifo_threshold(dev);
-	w90p910_return_default_idle(dev);
-
-	if (!netif_queue_stopped(dev))
-		netif_stop_queue(dev);
-
-	w90p910_init_desc(dev);
-
-	netif_trans_update(dev); /* prevent tx timeout */
-	ether->cur_tx = 0x0;
-	ether->finish_tx = 0x0;
-	ether->cur_rx = 0x0;
-
-	w90p910_set_curdest(dev);
-	w90p910_enable_cam(dev);
-	w90p910_enable_cam_command(dev);
-	w90p910_enable_mac_interrupt(dev);
-	w90p910_enable_tx(dev, 1);
-	w90p910_enable_rx(dev, 1);
-	w90p910_trigger_tx(dev);
-	w90p910_trigger_rx(dev);
-
-	netif_trans_update(dev); /* prevent tx timeout */
-
-	if (netif_queue_stopped(dev))
-		netif_wake_queue(dev);
-}
-
-static void w90p910_mdio_write(struct net_device *dev,
-					int phy_id, int reg, int data)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct platform_device *pdev;
-	unsigned int val, i;
-
-	pdev = ether->pdev;
-
-	__raw_writel(data, ether->reg + REG_MIID);
-
-	val = (phy_id << 0x08) | reg;
-	val |= PHYBUSY | PHYWR | MDCCR_VAL;
-	__raw_writel(val, ether->reg + REG_MIIDA);
-
-	for (i = 0; i < DELAY; i++) {
-		if ((__raw_readl(ether->reg + REG_MIIDA) & PHYBUSY) == 0)
-			break;
-	}
-
-	if (i == DELAY)
-		dev_warn(&pdev->dev, "mdio write timed out\n");
-}
-
-static int w90p910_mdio_read(struct net_device *dev, int phy_id, int reg)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct platform_device *pdev;
-	unsigned int val, i, data;
-
-	pdev = ether->pdev;
-
-	val = (phy_id << 0x08) | reg;
-	val |= PHYBUSY | MDCCR_VAL;
-	__raw_writel(val, ether->reg + REG_MIIDA);
-
-	for (i = 0; i < DELAY; i++) {
-		if ((__raw_readl(ether->reg + REG_MIIDA) & PHYBUSY) == 0)
-			break;
-	}
-
-	if (i == DELAY) {
-		dev_warn(&pdev->dev, "mdio read timed out\n");
-		data = 0xffff;
-	} else {
-		data = __raw_readl(ether->reg + REG_MIID);
-	}
-
-	return data;
-}
-
-static int w90p910_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *address = addr;
-
-	if (!is_valid_ether_addr(address->sa_data))
-		return -EADDRNOTAVAIL;
-
-	memcpy(dev->dev_addr, address->sa_data, dev->addr_len);
-	w90p910_write_cam(dev, CAM0, dev->dev_addr);
-
-	return 0;
-}
-
-static int w90p910_ether_close(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct platform_device *pdev;
-
-	pdev = ether->pdev;
-
-	dma_free_coherent(&pdev->dev, sizeof(struct recv_pdesc),
-					ether->rdesc, ether->rdesc_phys);
-	dma_free_coherent(&pdev->dev, sizeof(struct tran_pdesc),
-					ether->tdesc, ether->tdesc_phys);
-
-	netif_stop_queue(dev);
-
-	del_timer_sync(&ether->check_timer);
-	clk_disable(ether->rmiiclk);
-	clk_disable(ether->clk);
-
-	free_irq(ether->txirq, dev);
-	free_irq(ether->rxirq, dev);
-
-	return 0;
-}
-
-static int w90p910_send_frame(struct net_device *dev,
-					unsigned char *data, int length)
-{
-	struct w90p910_ether *ether;
-	struct w90p910_txbd *txbd;
-	struct platform_device *pdev;
-	unsigned char *buffer;
-
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	txbd = &ether->tdesc->desclist[ether->cur_tx];
-	buffer = ether->tdesc->tran_buf[ether->cur_tx];
-
-	if (length > 1514) {
-		dev_err(&pdev->dev, "send data %d bytes, check it\n", length);
-		length = 1514;
-	}
-
-	txbd->sl = length & 0xFFFF;
-
-	memcpy(buffer, data, length);
-
-	txbd->mode = TX_OWEN_DMA | PADDINGMODE | CRCMODE | MACTXINTEN;
-
-	w90p910_enable_tx(dev, 1);
-
-	w90p910_trigger_tx(dev);
-
-	if (++ether->cur_tx >= TX_DESC_SIZE)
-		ether->cur_tx = 0;
-
-	txbd = &ether->tdesc->desclist[ether->cur_tx];
-
-	if (txbd->mode & TX_OWEN_DMA)
-		netif_stop_queue(dev);
-
-	return 0;
-}
-
-static int w90p910_ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	if (!(w90p910_send_frame(dev, skb->data, skb->len))) {
-		ether->skb = skb;
-		dev_kfree_skb_irq(skb);
-		return 0;
-	}
-	return -EAGAIN;
-}
-
-static irqreturn_t w90p910_tx_interrupt(int irq, void *dev_id)
-{
-	struct w90p910_ether *ether;
-	struct w90p910_txbd  *txbd;
-	struct platform_device *pdev;
-	struct net_device *dev;
-	unsigned int cur_entry, entry, status;
-
-	dev = dev_id;
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	w90p910_get_and_clear_int(dev, &status);
-
-	cur_entry = __raw_readl(ether->reg + REG_CTXDSA);
-
-	entry = ether->tdesc_phys +
-		offsetof(struct tran_pdesc, desclist[ether->finish_tx]);
-
-	while (entry != cur_entry) {
-		txbd = &ether->tdesc->desclist[ether->finish_tx];
-
-		if (++ether->finish_tx >= TX_DESC_SIZE)
-			ether->finish_tx = 0;
-
-		if (txbd->sl & TXDS_TXCP) {
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += txbd->sl & 0xFFFF;
-		} else {
-			dev->stats.tx_errors++;
-		}
-
-		txbd->sl = 0x0;
-		txbd->mode = 0x0;
-
-		if (netif_queue_stopped(dev))
-			netif_wake_queue(dev);
-
-		entry = ether->tdesc_phys +
-			offsetof(struct tran_pdesc, desclist[ether->finish_tx]);
-	}
-
-	if (status & MISTA_EXDEF) {
-		dev_err(&pdev->dev, "emc defer exceed interrupt\n");
-	} else if (status & MISTA_TXBERR) {
-		dev_err(&pdev->dev, "emc bus error interrupt\n");
-		w90p910_reset_mac(dev);
-	} else if (status & MISTA_TDU) {
-		if (netif_queue_stopped(dev))
-			netif_wake_queue(dev);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static void netdev_rx(struct net_device *dev)
-{
-	struct w90p910_ether *ether;
-	struct w90p910_rxbd *rxbd;
-	struct platform_device *pdev;
-	struct sk_buff *skb;
-	unsigned char *data;
-	unsigned int length, status, val, entry;
-
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	rxbd = &ether->rdesc->desclist[ether->cur_rx];
-
-	do {
-		val = __raw_readl(ether->reg + REG_CRXDSA);
-
-		entry = ether->rdesc_phys +
-			offsetof(struct recv_pdesc, desclist[ether->cur_rx]);
-
-		if (val == entry)
-			break;
-
-		status = rxbd->sl;
-		length = status & 0xFFFF;
-
-		if (status & RXDS_RXGD) {
-			data = ether->rdesc->recv_buf[ether->cur_rx];
-			skb = netdev_alloc_skb(dev, length + 2);
-			if (!skb) {
-				dev->stats.rx_dropped++;
-				return;
-			}
-
-			skb_reserve(skb, 2);
-			skb_put(skb, length);
-			skb_copy_to_linear_data(skb, data, length);
-			skb->protocol = eth_type_trans(skb, dev);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += length;
-			netif_rx(skb);
-		} else {
-			dev->stats.rx_errors++;
-
-			if (status & RXDS_RP) {
-				dev_err(&pdev->dev, "rx runt err\n");
-				dev->stats.rx_length_errors++;
-			} else if (status & RXDS_CRCE) {
-				dev_err(&pdev->dev, "rx crc err\n");
-				dev->stats.rx_crc_errors++;
-			} else if (status & RXDS_ALIE) {
-				dev_err(&pdev->dev, "rx alignment err\n");
-				dev->stats.rx_frame_errors++;
-			} else if (status & RXDS_PTLE) {
-				dev_err(&pdev->dev, "rx longer err\n");
-				dev->stats.rx_over_errors++;
-			}
-		}
-
-		rxbd->sl = RX_OWEN_DMA;
-		rxbd->reserved = 0x0;
-
-		if (++ether->cur_rx >= RX_DESC_SIZE)
-			ether->cur_rx = 0;
-
-		rxbd = &ether->rdesc->desclist[ether->cur_rx];
-
-	} while (1);
-}
-
-static irqreturn_t w90p910_rx_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev;
-	struct w90p910_ether  *ether;
-	struct platform_device *pdev;
-	unsigned int status;
-
-	dev = dev_id;
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	w90p910_get_and_clear_int(dev, &status);
-
-	if (status & MISTA_RDU) {
-		netdev_rx(dev);
-		w90p910_trigger_rx(dev);
-
-		return IRQ_HANDLED;
-	} else if (status & MISTA_RXBERR) {
-		dev_err(&pdev->dev, "emc rx bus error\n");
-		w90p910_reset_mac(dev);
-	}
-
-	netdev_rx(dev);
-	return IRQ_HANDLED;
-}
-
-static int w90p910_ether_open(struct net_device *dev)
-{
-	struct w90p910_ether *ether;
-	struct platform_device *pdev;
-
-	ether = netdev_priv(dev);
-	pdev = ether->pdev;
-
-	w90p910_reset_mac(dev);
-	w90p910_set_fifo_threshold(dev);
-	w90p910_set_curdest(dev);
-	w90p910_enable_cam(dev);
-	w90p910_enable_cam_command(dev);
-	w90p910_enable_mac_interrupt(dev);
-	w90p910_set_global_maccmd(dev);
-	w90p910_enable_rx(dev, 1);
-
-	clk_enable(ether->rmiiclk);
-	clk_enable(ether->clk);
-
-	ether->rx_packets = 0x0;
-	ether->rx_bytes = 0x0;
-
-	if (request_irq(ether->txirq, w90p910_tx_interrupt,
-						0x0, pdev->name, dev)) {
-		dev_err(&pdev->dev, "register irq tx failed\n");
-		return -EAGAIN;
-	}
-
-	if (request_irq(ether->rxirq, w90p910_rx_interrupt,
-						0x0, pdev->name, dev)) {
-		dev_err(&pdev->dev, "register irq rx failed\n");
-		free_irq(ether->txirq, dev);
-		return -EAGAIN;
-	}
-
-	mod_timer(&ether->check_timer, jiffies + msecs_to_jiffies(1000));
-	netif_start_queue(dev);
-	w90p910_trigger_rx(dev);
-
-	dev_info(&pdev->dev, "%s is OPENED\n", dev->name);
-
-	return 0;
-}
-
-static void w90p910_ether_set_multicast_list(struct net_device *dev)
-{
-	struct w90p910_ether *ether;
-	unsigned int rx_mode;
-
-	ether = netdev_priv(dev);
-
-	if (dev->flags & IFF_PROMISC)
-		rx_mode = CAMCMR_AUP | CAMCMR_AMP | CAMCMR_ABP | CAMCMR_ECMP;
-	else if ((dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev))
-		rx_mode = CAMCMR_AMP | CAMCMR_ABP | CAMCMR_ECMP;
-	else
-		rx_mode = CAMCMR_ECMP | CAMCMR_ABP;
-	__raw_writel(rx_mode, ether->reg + REG_CAMCMR);
-}
-
-static int w90p910_ether_ioctl(struct net_device *dev,
-						struct ifreq *ifr, int cmd)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct mii_ioctl_data *data = if_mii(ifr);
-
-	return generic_mii_ioctl(&ether->mii, data, cmd, NULL);
-}
-
-static void w90p910_get_drvinfo(struct net_device *dev,
-					struct ethtool_drvinfo *info)
-{
-	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
-	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
-}
-
-static int w90p910_get_link_ksettings(struct net_device *dev,
-				      struct ethtool_link_ksettings *cmd)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	mii_ethtool_get_link_ksettings(&ether->mii, cmd);
-
-	return 0;
-}
-
-static int w90p910_set_link_ksettings(struct net_device *dev,
-				      const struct ethtool_link_ksettings *cmd)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	return mii_ethtool_set_link_ksettings(&ether->mii, cmd);
-}
-
-static int w90p910_nway_reset(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	return mii_nway_restart(&ether->mii);
-}
-
-static u32 w90p910_get_link(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	return mii_link_ok(&ether->mii);
-}
-
-static const struct ethtool_ops w90p910_ether_ethtool_ops = {
-	.get_drvinfo	= w90p910_get_drvinfo,
-	.nway_reset	= w90p910_nway_reset,
-	.get_link	= w90p910_get_link,
-	.get_link_ksettings = w90p910_get_link_ksettings,
-	.set_link_ksettings = w90p910_set_link_ksettings,
-};
-
-static const struct net_device_ops w90p910_ether_netdev_ops = {
-	.ndo_open		= w90p910_ether_open,
-	.ndo_stop		= w90p910_ether_close,
-	.ndo_start_xmit		= w90p910_ether_start_xmit,
-	.ndo_set_rx_mode	= w90p910_ether_set_multicast_list,
-	.ndo_set_mac_address	= w90p910_set_mac_address,
-	.ndo_do_ioctl		= w90p910_ether_ioctl,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static void __init get_mac_address(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-	struct platform_device *pdev;
-	char addr[ETH_ALEN];
-
-	pdev = ether->pdev;
-
-	addr[0] = 0x00;
-	addr[1] = 0x02;
-	addr[2] = 0xac;
-	addr[3] = 0x55;
-	addr[4] = 0x88;
-	addr[5] = 0xa8;
-
-	if (is_valid_ether_addr(addr))
-		memcpy(dev->dev_addr, &addr, ETH_ALEN);
-	else
-		dev_err(&pdev->dev, "invalid mac address\n");
-}
-
-static int w90p910_ether_setup(struct net_device *dev)
-{
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	dev->netdev_ops = &w90p910_ether_netdev_ops;
-	dev->ethtool_ops = &w90p910_ether_ethtool_ops;
-
-	dev->tx_queue_len = 16;
-	dev->dma = 0x0;
-	dev->watchdog_timeo = TX_TIMEOUT;
-
-	get_mac_address(dev);
-
-	ether->cur_tx = 0x0;
-	ether->cur_rx = 0x0;
-	ether->finish_tx = 0x0;
-	ether->linkflag = 0x0;
-	ether->mii.phy_id = 0x01;
-	ether->mii.phy_id_mask = 0x1f;
-	ether->mii.reg_num_mask = 0x1f;
-	ether->mii.dev = dev;
-	ether->mii.mdio_read = w90p910_mdio_read;
-	ether->mii.mdio_write = w90p910_mdio_write;
-
-	timer_setup(&ether->check_timer, w90p910_check_link, 0);
-
-	return 0;
-}
-
-static int w90p910_ether_probe(struct platform_device *pdev)
-{
-	struct w90p910_ether *ether;
-	struct net_device *dev;
-	int error;
-
-	dev = alloc_etherdev(sizeof(struct w90p910_ether));
-	if (!dev)
-		return -ENOMEM;
-
-	ether = netdev_priv(dev);
-
-	ether->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ether->res == NULL) {
-		dev_err(&pdev->dev, "failed to get I/O memory\n");
-		error = -ENXIO;
-		goto failed_free;
-	}
-
-	if (!request_mem_region(ether->res->start,
-				resource_size(ether->res), pdev->name)) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto failed_free;
-	}
-
-	ether->reg = ioremap(ether->res->start, resource_size(ether->res));
-	if (ether->reg == NULL) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -ENXIO;
-		goto failed_free_mem;
-	}
-
-	ether->txirq = platform_get_irq(pdev, 0);
-	if (ether->txirq < 0) {
-		dev_err(&pdev->dev, "failed to get ether tx irq\n");
-		error = -ENXIO;
-		goto failed_free_io;
-	}
-
-	ether->rxirq = platform_get_irq(pdev, 1);
-	if (ether->rxirq < 0) {
-		dev_err(&pdev->dev, "failed to get ether rx irq\n");
-		error = -ENXIO;
-		goto failed_free_io;
-	}
-
-	platform_set_drvdata(pdev, dev);
-
-	ether->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(ether->clk)) {
-		dev_err(&pdev->dev, "failed to get ether clock\n");
-		error = PTR_ERR(ether->clk);
-		goto failed_free_io;
-	}
-
-	ether->rmiiclk = clk_get(&pdev->dev, "RMII");
-	if (IS_ERR(ether->rmiiclk)) {
-		dev_err(&pdev->dev, "failed to get ether clock\n");
-		error = PTR_ERR(ether->rmiiclk);
-		goto failed_put_clk;
-	}
-
-	ether->pdev = pdev;
-
-	w90p910_ether_setup(dev);
-
-	error = register_netdev(dev);
-	if (error != 0) {
-		dev_err(&pdev->dev, "Register EMC w90p910 FAILED\n");
-		error = -ENODEV;
-		goto failed_put_rmiiclk;
-	}
-
-	return 0;
-failed_put_rmiiclk:
-	clk_put(ether->rmiiclk);
-failed_put_clk:
-	clk_put(ether->clk);
-failed_free_io:
-	iounmap(ether->reg);
-failed_free_mem:
-	release_mem_region(ether->res->start, resource_size(ether->res));
-failed_free:
-	free_netdev(dev);
-	return error;
-}
-
-static int w90p910_ether_remove(struct platform_device *pdev)
-{
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct w90p910_ether *ether = netdev_priv(dev);
-
-	unregister_netdev(dev);
-
-	clk_put(ether->rmiiclk);
-	clk_put(ether->clk);
-
-	iounmap(ether->reg);
-	release_mem_region(ether->res->start, resource_size(ether->res));
-
-	del_timer_sync(&ether->check_timer);
-
-	free_netdev(dev);
-	return 0;
-}
-
-static struct platform_driver w90p910_ether_driver = {
-	.probe		= w90p910_ether_probe,
-	.remove		= w90p910_ether_remove,
-	.driver		= {
-		.name	= "nuc900-emc",
-	},
-};
-
-module_platform_driver(w90p910_ether_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("w90p910 MAC driver!");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-emc");
-

diff --git a/drivers/net/ethernet/nvidia/Kconfig b/drivers/net/ethernet/nvidia/Kconfig
index 4efc9fe..faacbd1 100644
--- a/drivers/net/ethernet/nvidia/Kconfig
+++ b/drivers/net/ethernet/nvidia/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # NVIDIA network device configuration
 #

diff --git a/drivers/net/ethernet/nvidia/Makefile b/drivers/net/ethernet/nvidia/Makefile
index e079ae5..8935699 100644
--- a/drivers/net/ethernet/nvidia/Makefile
+++ b/drivers/net/ethernet/nvidia/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the NVIDIA network device drivers.
 #

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 1d9b0d4..05d2b47 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * forcedeth: Ethernet driver for NVIDIA nForce media access controllers.
  *
@@ -15,19 +16,6 @@
  *		IRQ rate fixes, bigendian fixes, cleanups, verification)
  * Copyright (c) 2004,2005,2006,2007,2008,2009 NVIDIA Corporation
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
  * This means recovery from netif_stop_queue only happens if the hw timer
@@ -725,6 +713,21 @@
 	struct nv_skb_map *next_tx_ctx;
 };
 
+struct nv_txrx_stats {
+	u64 stat_rx_packets;
+	u64 stat_rx_bytes; /* not always available in HW */
+	u64 stat_rx_missed_errors;
+	u64 stat_rx_dropped;
+	u64 stat_tx_packets; /* not always available in HW */
+	u64 stat_tx_bytes;
+	u64 stat_tx_dropped;
+};
+
+#define nv_txrx_stats_inc(member) \
+		__this_cpu_inc(np->txrx_stats->member)
+#define nv_txrx_stats_add(member, count) \
+		__this_cpu_add(np->txrx_stats->member, (count))
+
 /*
  * SMP locking:
  * All hardware access under netdev_priv(dev)->lock, except the performance
@@ -809,10 +812,7 @@
 
 	/* RX software stats */
 	struct u64_stats_sync swstats_rx_syncp;
-	u64 stat_rx_packets;
-	u64 stat_rx_bytes; /* not always available in HW */
-	u64 stat_rx_missed_errors;
-	u64 stat_rx_dropped;
+	struct nv_txrx_stats __percpu *txrx_stats;
 
 	/* media detection workaround.
 	 * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
@@ -838,9 +838,6 @@
 
 	/* TX software stats */
 	struct u64_stats_sync swstats_tx_syncp;
-	u64 stat_tx_packets; /* not always available in HW */
-	u64 stat_tx_bytes;
-	u64 stat_tx_dropped;
 
 	/* msi/msi-x fields */
 	u32 msi_flags;
@@ -1733,6 +1730,39 @@
 	}
 }
 
+static void nv_get_stats(int cpu, struct fe_priv *np,
+			 struct rtnl_link_stats64 *storage)
+{
+	struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu);
+	unsigned int syncp_start;
+	u64 rx_packets, rx_bytes, rx_dropped, rx_missed_errors;
+	u64 tx_packets, tx_bytes, tx_dropped;
+
+	do {
+		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
+		rx_packets       = src->stat_rx_packets;
+		rx_bytes         = src->stat_rx_bytes;
+		rx_dropped       = src->stat_rx_dropped;
+		rx_missed_errors = src->stat_rx_missed_errors;
+	} while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
+
+	storage->rx_packets       += rx_packets;
+	storage->rx_bytes         += rx_bytes;
+	storage->rx_dropped       += rx_dropped;
+	storage->rx_missed_errors += rx_missed_errors;
+
+	do {
+		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
+		tx_packets  = src->stat_tx_packets;
+		tx_bytes    = src->stat_tx_bytes;
+		tx_dropped  = src->stat_tx_dropped;
+	} while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+
+	storage->tx_packets += tx_packets;
+	storage->tx_bytes   += tx_bytes;
+	storage->tx_dropped += tx_dropped;
+}
+
 /*
  * nv_get_stats64: dev->ndo_get_stats64 function
  * Get latest stats value from the nic.
@@ -1745,7 +1775,7 @@
 	__releases(&netdev_priv(dev)->hwstats_lock)
 {
 	struct fe_priv *np = netdev_priv(dev);
-	unsigned int syncp_start;
+	int cpu;
 
 	/*
 	 * Note: because HW stats are not always available and for
@@ -1758,20 +1788,8 @@
 	 */
 
 	/* software stats */
-	do {
-		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
-		storage->rx_packets       = np->stat_rx_packets;
-		storage->rx_bytes         = np->stat_rx_bytes;
-		storage->rx_dropped       = np->stat_rx_dropped;
-		storage->rx_missed_errors = np->stat_rx_missed_errors;
-	} while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
-
-	do {
-		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
-		storage->tx_packets = np->stat_tx_packets;
-		storage->tx_bytes   = np->stat_tx_bytes;
-		storage->tx_dropped = np->stat_tx_dropped;
-	} while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+	for_each_online_cpu(cpu)
+		nv_get_stats(cpu, np, storage);
 
 	/* If the nic supports hw counters then retrieve latest values */
 	if (np->driver_data & DEV_HAS_STATISTICS_V123) {
@@ -1839,7 +1857,7 @@
 		} else {
 packet_dropped:
 			u64_stats_update_begin(&np->swstats_rx_syncp);
-			np->stat_rx_dropped++;
+			nv_txrx_stats_inc(stat_rx_dropped);
 			u64_stats_update_end(&np->swstats_rx_syncp);
 			return 1;
 		}
@@ -1881,7 +1899,7 @@
 		} else {
 packet_dropped:
 			u64_stats_update_begin(&np->swstats_rx_syncp);
-			np->stat_rx_dropped++;
+			nv_txrx_stats_inc(stat_rx_dropped);
 			u64_stats_update_end(&np->swstats_rx_syncp);
 			return 1;
 		}
@@ -2025,7 +2043,7 @@
 		}
 		if (nv_release_txskb(np, &np->tx_skb[i])) {
 			u64_stats_update_begin(&np->swstats_tx_syncp);
-			np->stat_tx_dropped++;
+			nv_txrx_stats_inc(stat_tx_dropped);
 			u64_stats_update_end(&np->swstats_tx_syncp);
 		}
 		np->tx_skb[i].dma = 0;
@@ -2239,7 +2257,7 @@
 			/* on DMA mapping error - drop the packet */
 			dev_kfree_skb_any(skb);
 			u64_stats_update_begin(&np->swstats_tx_syncp);
-			np->stat_tx_dropped++;
+			nv_txrx_stats_inc(stat_tx_dropped);
 			u64_stats_update_end(&np->swstats_tx_syncp);
 			return NETDEV_TX_OK;
 		}
@@ -2285,7 +2303,7 @@
 				dev_kfree_skb_any(skb);
 				np->put_tx_ctx = start_tx_ctx;
 				u64_stats_update_begin(&np->swstats_tx_syncp);
-				np->stat_tx_dropped++;
+				nv_txrx_stats_inc(stat_tx_dropped);
 				u64_stats_update_end(&np->swstats_tx_syncp);
 				return NETDEV_TX_OK;
 			}
@@ -2396,7 +2414,7 @@
 			/* on DMA mapping error - drop the packet */
 			dev_kfree_skb_any(skb);
 			u64_stats_update_begin(&np->swstats_tx_syncp);
-			np->stat_tx_dropped++;
+			nv_txrx_stats_inc(stat_tx_dropped);
 			u64_stats_update_end(&np->swstats_tx_syncp);
 			return NETDEV_TX_OK;
 		}
@@ -2443,7 +2461,7 @@
 				dev_kfree_skb_any(skb);
 				np->put_tx_ctx = start_tx_ctx;
 				u64_stats_update_begin(&np->swstats_tx_syncp);
-				np->stat_tx_dropped++;
+				nv_txrx_stats_inc(stat_tx_dropped);
 				u64_stats_update_end(&np->swstats_tx_syncp);
 				return NETDEV_TX_OK;
 			}
@@ -2572,9 +2590,12 @@
 					    && !(flags & NV_TX_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
 				} else {
+					unsigned int len;
+
 					u64_stats_update_begin(&np->swstats_tx_syncp);
-					np->stat_tx_packets++;
-					np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+					nv_txrx_stats_inc(stat_tx_packets);
+					len = np->get_tx_ctx->skb->len;
+					nv_txrx_stats_add(stat_tx_bytes, len);
 					u64_stats_update_end(&np->swstats_tx_syncp);
 				}
 				bytes_compl += np->get_tx_ctx->skb->len;
@@ -2589,9 +2610,12 @@
 					    && !(flags & NV_TX2_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
 				} else {
+					unsigned int len;
+
 					u64_stats_update_begin(&np->swstats_tx_syncp);
-					np->stat_tx_packets++;
-					np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+					nv_txrx_stats_inc(stat_tx_packets);
+					len = np->get_tx_ctx->skb->len;
+					nv_txrx_stats_add(stat_tx_bytes, len);
 					u64_stats_update_end(&np->swstats_tx_syncp);
 				}
 				bytes_compl += np->get_tx_ctx->skb->len;
@@ -2639,9 +2663,12 @@
 						nv_legacybackoff_reseed(dev);
 				}
 			} else {
+				unsigned int len;
+
 				u64_stats_update_begin(&np->swstats_tx_syncp);
-				np->stat_tx_packets++;
-				np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+				nv_txrx_stats_inc(stat_tx_packets);
+				len = np->get_tx_ctx->skb->len;
+				nv_txrx_stats_add(stat_tx_bytes, len);
 				u64_stats_update_end(&np->swstats_tx_syncp);
 			}
 
@@ -2818,6 +2845,15 @@
 	}
 }
 
+static void rx_missing_handler(u32 flags, struct fe_priv *np)
+{
+	if (flags & NV_RX_MISSEDFRAME) {
+		u64_stats_update_begin(&np->swstats_rx_syncp);
+		nv_txrx_stats_inc(stat_rx_missed_errors);
+		u64_stats_update_end(&np->swstats_rx_syncp);
+	}
+}
+
 static int nv_rx_process(struct net_device *dev, int limit)
 {
 	struct fe_priv *np = netdev_priv(dev);
@@ -2860,11 +2896,7 @@
 					}
 					/* the rest are hard errors */
 					else {
-						if (flags & NV_RX_MISSEDFRAME) {
-							u64_stats_update_begin(&np->swstats_rx_syncp);
-							np->stat_rx_missed_errors++;
-							u64_stats_update_end(&np->swstats_rx_syncp);
-						}
+						rx_missing_handler(flags, np);
 						dev_kfree_skb(skb);
 						goto next_pkt;
 					}
@@ -2908,8 +2940,8 @@
 		skb->protocol = eth_type_trans(skb, dev);
 		napi_gro_receive(&np->napi, skb);
 		u64_stats_update_begin(&np->swstats_rx_syncp);
-		np->stat_rx_packets++;
-		np->stat_rx_bytes += len;
+		nv_txrx_stats_inc(stat_rx_packets);
+		nv_txrx_stats_add(stat_rx_bytes, len);
 		u64_stats_update_end(&np->swstats_rx_syncp);
 next_pkt:
 		if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
@@ -2994,8 +3026,8 @@
 			}
 			napi_gro_receive(&np->napi, skb);
 			u64_stats_update_begin(&np->swstats_rx_syncp);
-			np->stat_rx_packets++;
-			np->stat_rx_bytes += len;
+			nv_txrx_stats_inc(stat_rx_packets);
+			nv_txrx_stats_add(stat_rx_bytes, len);
 			u64_stats_update_end(&np->swstats_rx_syncp);
 		} else {
 			dev_kfree_skb(skb);
@@ -5663,6 +5695,12 @@
 	SET_NETDEV_DEV(dev, &pci_dev->dev);
 	u64_stats_init(&np->swstats_rx_syncp);
 	u64_stats_init(&np->swstats_tx_syncp);
+	np->txrx_stats = alloc_percpu(struct nv_txrx_stats);
+	if (!np->txrx_stats) {
+		pr_err("np->txrx_stats, alloc memory error.\n");
+		err = -ENOMEM;
+		goto out_alloc_percpu;
+	}
 
 	timer_setup(&np->oom_kick, nv_do_rx_refill, 0);
 	timer_setup(&np->nic_poll, nv_do_nic_poll, 0);
@@ -6072,6 +6110,8 @@
 out_disable:
 	pci_disable_device(pci_dev);
 out_free:
+	free_percpu(np->txrx_stats);
+out_alloc_percpu:
 	free_netdev(dev);
 out:
 	return err;
@@ -6117,6 +6157,9 @@
 static void nv_remove(struct pci_dev *pci_dev)
 {
 	struct net_device *dev = pci_get_drvdata(pci_dev);
+	struct fe_priv *np = netdev_priv(dev);
+
+	free_percpu(np->txrx_stats);
 
 	unregister_netdev(dev);
 
@@ -6138,8 +6181,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int nv_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	int i;

diff --git a/drivers/net/ethernet/nxp/Kconfig b/drivers/net/ethernet/nxp/Kconfig
index 0d9baf9..ee83a71 100644
--- a/drivers/net/ethernet/nxp/Kconfig
+++ b/drivers/net/ethernet/nxp/Kconfig

@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config LPC_ENET
-        tristate "NXP ethernet MAC on LPC devices"
-        depends on ARCH_LPC32XX
-        select PHYLIB
-        help
+	tristate "NXP ethernet MAC on LPC devices"
+	depends on ARCH_LPC32XX || COMPILE_TEST
+	select PHYLIB
+	help
 	  Say Y or M here if you want to use the NXP ethernet MAC included on
 	  some NXP LPC devices. You can safely enable this option for LPC32xx
 	  SoC. Also available as a module.

diff --git a/drivers/net/ethernet/nxp/Makefile b/drivers/net/ethernet/nxp/Makefile
index a128114..cba6ddc 100644
--- a/drivers/net/ethernet/nxp/Makefile
+++ b/drivers/net/ethernet/nxp/Makefile

@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_LPC_ENET) += lpc_eth.o

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 08381ef..544012a 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/nxp/lpc_eth.c
  *
@@ -5,48 +6,20 @@
  *
  * Copyright (C) 2010 NXP Semiconductors
  * Copyright (C) 2012 Roland Stigge <stigge@antcom.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/crc32.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
 #include <linux/clk.h>
-#include <linux/workqueue.h>
-#include <linux/netdevice.h>
+#include <linux/crc32.h>
 #include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/phy.h>
-#include <linux/dma-mapping.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
-#include <linux/types.h>
-
-#include <linux/io.h>
-#include <mach/board.h>
-#include <mach/platform.h>
-#include <mach/hardware.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/soc/nxp/lpc32xx-misc.h>
 
 #define MODNAME "lpc-eth"
 #define DRV_VERSION "1.00"
@@ -296,7 +269,7 @@
 #define LPC_FCCR_MIRRORCOUNTERCURRENT(n)	((n) & 0xFFFF)
 
 /*
- * rxfliterctrl, rxfilterwolstatus, and rxfilterwolclear shared
+ * rxfilterctrl, rxfilterwolstatus, and rxfilterwolclear shared
  * register definitions
  */
 #define LPC_RXFLTRW_ACCEPTUNICAST		(1 << 0)
@@ -307,7 +280,7 @@
 #define LPC_RXFLTRW_ACCEPTPERFECT		(1 << 5)
 
 /*
- * rxfliterctrl register definitions
+ * rxfilterctrl register definitions
  */
 #define LPC_RXFLTRWSTS_MAGICPACKETENWOL		(1 << 12)
 #define LPC_RXFLTRWSTS_RXFILTERENWOL		(1 << 13)
@@ -797,10 +770,7 @@
 		return PTR_ERR(phydev);
 	}
 
-	/* mask with MAC supported features */
-	phydev->supported &= PHY_BASIC_FEATURES;
-
-	phydev->advertising = phydev->supported;
+	phy_set_max_speed(phydev, SPEED_100);
 
 	pldat->link = 0;
 	pldat->speed = 0;
@@ -1258,28 +1228,22 @@
 
 static int lpc_eth_drv_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-	struct net_device *ndev;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct netdata_local *pldat;
-	struct phy_device *phydev;
+	struct net_device *ndev;
 	dma_addr_t dma_handle;
+	struct resource *res;
 	int irq, ret;
-	u32 tmp;
 
 	/* Setup network interface for RMII or MII mode */
-	tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL);
-	tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK;
-	if (lpc_phy_interface_mode(&pdev->dev) == PHY_INTERFACE_MODE_MII)
-		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS;
-	else
-		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS;
-	__raw_writel(tmp, LPC32XX_CLKPWR_MACCLK_CTRL);
+	lpc32xx_set_phy_interface_mode(lpc_phy_interface_mode(dev));
 
 	/* Get platform resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (!res || irq < 0) {
-		dev_err(&pdev->dev, "error getting resources.\n");
+		dev_err(dev, "error getting resources.\n");
 		ret = -ENXIO;
 		goto err_exit;
 	}
@@ -1287,12 +1251,12 @@
 	/* Allocate net driver data structure */
 	ndev = alloc_etherdev(sizeof(struct netdata_local));
 	if (!ndev) {
-		dev_err(&pdev->dev, "could not allocate device.\n");
+		dev_err(dev, "could not allocate device.\n");
 		ret = -ENOMEM;
 		goto err_exit;
 	}
 
-	SET_NETDEV_DEV(ndev, &pdev->dev);
+	SET_NETDEV_DEV(ndev, dev);
 
 	pldat = netdev_priv(ndev);
 	pldat->pdev = pdev;
@@ -1304,9 +1268,9 @@
 	ndev->irq = irq;
 
 	/* Get clock for the device */
-	pldat->clk = clk_get(&pdev->dev, NULL);
+	pldat->clk = clk_get(dev, NULL);
 	if (IS_ERR(pldat->clk)) {
-		dev_err(&pdev->dev, "error getting clock.\n");
+		dev_err(dev, "error getting clock.\n");
 		ret = PTR_ERR(pldat->clk);
 		goto err_out_free_dev;
 	}
@@ -1319,14 +1283,14 @@
 	/* Map IO space */
 	pldat->net_base = ioremap(res->start, resource_size(res));
 	if (!pldat->net_base) {
-		dev_err(&pdev->dev, "failed to map registers\n");
+		dev_err(dev, "failed to map registers\n");
 		ret = -ENOMEM;
 		goto err_out_disable_clocks;
 	}
 	ret = request_irq(ndev->irq, __lpc_eth_interrupt, 0,
 			  ndev->name, ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "error requesting interrupt.\n");
+		dev_err(dev, "error requesting interrupt.\n");
 		goto err_out_iounmap;
 	}
 
@@ -1338,20 +1302,19 @@
 	/* Get size of DMA buffers/descriptors region */
 	pldat->dma_buff_size = (ENET_TX_DESC + ENET_RX_DESC) * (ENET_MAXF_SIZE +
 		sizeof(struct txrx_desc_t) + sizeof(struct rx_status_t));
-	pldat->dma_buff_base_v = 0;
 
-	if (use_iram_for_net(&pldat->pdev->dev)) {
-		dma_handle = LPC32XX_IRAM_BASE;
-		if (pldat->dma_buff_size <= lpc32xx_return_iram_size())
-			pldat->dma_buff_base_v =
-				io_p2v(LPC32XX_IRAM_BASE);
-		else
+	if (use_iram_for_net(dev)) {
+		if (pldat->dma_buff_size >
+		    lpc32xx_return_iram(&pldat->dma_buff_base_v, &dma_handle)) {
+			pldat->dma_buff_base_v = NULL;
+			pldat->dma_buff_size = 0;
 			netdev_err(ndev,
 				"IRAM not big enough for net buffers, using SDRAM instead.\n");
+		}
 	}
 
-	if (pldat->dma_buff_base_v == 0) {
-		ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (pldat->dma_buff_base_v == NULL) {
+		ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
 		if (ret)
 			goto err_out_free_irq;
 
@@ -1360,7 +1323,7 @@
 		/* Allocate a chunk of memory for the DMA ethernet buffers
 		   and descriptors */
 		pldat->dma_buff_base_v =
-			dma_alloc_coherent(&pldat->pdev->dev,
+			dma_alloc_coherent(dev,
 					   pldat->dma_buff_size, &dma_handle,
 					   GFP_KERNEL);
 		if (pldat->dma_buff_base_v == NULL) {
@@ -1371,13 +1334,14 @@
 	pldat->dma_buff_base_p = dma_handle;
 
 	netdev_dbg(ndev, "IO address space     :%pR\n", res);
-	netdev_dbg(ndev, "IO address size      :%d\n", resource_size(res));
+	netdev_dbg(ndev, "IO address size      :%zd\n",
+			(size_t)resource_size(res));
 	netdev_dbg(ndev, "IO address (mapped)  :0x%p\n",
 			pldat->net_base);
 	netdev_dbg(ndev, "IRQ number           :%d\n", ndev->irq);
-	netdev_dbg(ndev, "DMA buffer size      :%d\n", pldat->dma_buff_size);
-	netdev_dbg(ndev, "DMA buffer P address :0x%08x\n",
-			pldat->dma_buff_base_p);
+	netdev_dbg(ndev, "DMA buffer size      :%zd\n", pldat->dma_buff_size);
+	netdev_dbg(ndev, "DMA buffer P address :%pad\n",
+			&pldat->dma_buff_base_p);
 	netdev_dbg(ndev, "DMA buffer V address :0x%p\n",
 			pldat->dma_buff_base_v);
 
@@ -1385,16 +1349,13 @@
 	__lpc_get_mac(pldat, ndev->dev_addr);
 
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
-		const char *macaddr = of_get_mac_address(pdev->dev.of_node);
-		if (macaddr)
-			memcpy(ndev->dev_addr, macaddr, ETH_ALEN);
+		const char *macaddr = of_get_mac_address(np);
+		if (!IS_ERR(macaddr))
+			ether_addr_copy(ndev->dev_addr, macaddr);
 	}
 	if (!is_valid_ether_addr(ndev->dev_addr))
 		eth_hw_addr_random(ndev);
 
-	/* Reset the ethernet controller */
-	__lpc_eth_reset(pldat);
-
 	/* then shut everything down to save power */
 	__lpc_eth_shutdown(pldat);
 
@@ -1415,7 +1376,7 @@
 
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+		dev_err(dev, "Cannot register net device, aborting.\n");
 		goto err_out_dma_unmap;
 	}
 	platform_set_drvdata(pdev, ndev);
@@ -1424,22 +1385,20 @@
 	if (ret)
 		goto err_out_unregister_netdev;
 
-	netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
-	       res->start, ndev->irq);
+	netdev_info(ndev, "LPC mac at 0x%08lx irq %d\n",
+	       (unsigned long)res->start, ndev->irq);
 
-	phydev = ndev->phydev;
-
-	device_init_wakeup(&pdev->dev, 1);
-	device_set_wakeup_enable(&pdev->dev, 0);
+	device_init_wakeup(dev, 1);
+	device_set_wakeup_enable(dev, 0);
 
 	return 0;
 
 err_out_unregister_netdev:
 	unregister_netdev(ndev);
 err_out_dma_unmap:
-	if (!use_iram_for_net(&pldat->pdev->dev) ||
-	    pldat->dma_buff_size > lpc32xx_return_iram_size())
-		dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size,
+	if (!use_iram_for_net(dev) ||
+	    pldat->dma_buff_size > lpc32xx_return_iram(NULL, NULL))
+		dma_free_coherent(dev, pldat->dma_buff_size,
 				  pldat->dma_buff_base_v,
 				  pldat->dma_buff_base_p);
 err_out_free_irq:
@@ -1465,7 +1424,7 @@
 	unregister_netdev(ndev);
 
 	if (!use_iram_for_net(&pldat->pdev->dev) ||
-	    pldat->dma_buff_size > lpc32xx_return_iram_size())
+	    pldat->dma_buff_size > lpc32xx_return_iram(NULL, NULL))
 		dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size,
 				  pldat->dma_buff_base_v,
 				  pldat->dma_buff_base_p);
@@ -1534,13 +1493,11 @@
 }
 #endif
 
-#ifdef CONFIG_OF
 static const struct of_device_id lpc_eth_match[] = {
 	{ .compatible = "nxp,lpc-eth" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, lpc_eth_match);
-#endif
 
 static struct platform_driver lpc_eth_driver = {
 	.probe		= lpc_eth_drv_probe,
@@ -1551,7 +1508,7 @@
 #endif
 	.driver		= {
 		.name	= MODNAME,
-		.of_match_table = of_match_ptr(lpc_eth_match),
+		.of_match_table = lpc_eth_match,
 	},
 };
 

diff --git a/drivers/net/ethernet/oki-semi/Kconfig b/drivers/net/ethernet/oki-semi/Kconfig
index 5a975af..1c455c6 100644
--- a/drivers/net/ethernet/oki-semi/Kconfig
+++ b/drivers/net/ethernet/oki-semi/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # OKI Semiconductor device configuration
 #

diff --git a/drivers/net/ethernet/oki-semi/Makefile b/drivers/net/ethernet/oki-semi/Makefile
index b6780c8..b97baf9 100644
--- a/drivers/net/ethernet/oki-semi/Makefile
+++ b/drivers/net/ethernet/oki-semi/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the OKI Semiconductor device drivers.
 #

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index 5f7a352..69e11d1 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # OKI Semiconductor device configuration
 #

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Makefile b/drivers/net/ethernet/oki-semi/pch_gbe/Makefile
index 862de0f..c4762b3 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Makefile
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_PCH_GBE) += pch_gbe.o
 
 pch_gbe-y := pch_gbe_phy.o pch_gbe_ethtool.o pch_gbe_param.o

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 44c2f29..32b9d77 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h

@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _PCH_GBE_H_

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index adaa002..1a3008e 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c

@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "pch_gbe.h"
 #include "pch_gbe_phy.h"

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 43c0c10..18e6d87 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c

@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 - 2012 LAPIS SEMICONDUCTOR CO., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "pch_gbe.h"
@@ -27,7 +16,6 @@
 #define DRV_VERSION     "1.01"
 const char pch_driver_version[] = DRV_VERSION;
 
-#define PCI_DEVICE_ID_INTEL_IOH1_GBE	0x8802		/* Pci device ID */
 #define PCH_GBE_MAR_ENTRIES		16
 #define PCH_GBE_SHORT_PKT		64
 #define DSC_INIT16			0xC000
@@ -37,11 +25,9 @@
 #define PCH_GBE_PCI_BAR			1
 #define PCH_GBE_RESERVE_MEMORY		0x200000	/* 2MB */
 
-/* Macros for ML7223 */
-#define PCI_VENDOR_ID_ROHM			0x10db
-#define PCI_DEVICE_ID_ROHM_ML7223_GBE		0x8013
+#define PCI_DEVICE_ID_INTEL_IOH1_GBE		0x8802
 
-/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7223_GBE		0x8013
 #define PCI_DEVICE_ID_ROHM_ML7831_GBE		0x8802
 
 #define PCH_GBE_TX_WEIGHT         64
@@ -1440,8 +1426,8 @@
 
 	size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY;
 	rx_ring->rx_buff_pool =
-		dma_zalloc_coherent(&pdev->dev, size,
-				    &rx_ring->rx_buff_pool_logic, GFP_KERNEL);
+		dma_alloc_coherent(&pdev->dev, size,
+				   &rx_ring->rx_buff_pool_logic, GFP_KERNEL);
 	if (!rx_ring->rx_buff_pool)
 		return -ENOMEM;
 
@@ -1755,8 +1741,8 @@
 
 	tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc);
 
-	tx_ring->desc = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
-					    &tx_ring->dma, GFP_KERNEL);
+	tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
 	if (!tx_ring->desc) {
 		vfree(tx_ring->buffer_info);
 		return -ENOMEM;
@@ -1798,8 +1784,8 @@
 		return -ENOMEM;
 
 	rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc);
-	rx_ring->desc =	dma_zalloc_coherent(&pdev->dev, rx_ring->size,
-					    &rx_ring->dma, GFP_KERNEL);
+	rx_ring->desc =	dma_alloc_coherent(&pdev->dev, rx_ring->size,
+						  &rx_ring->dma, GFP_KERNEL);
 	if (!rx_ring->desc) {
 		vfree(rx_ring->buffer_info);
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
index e097e6b..a26966f 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c

@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "pch_gbe.h"

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
index 6b35b57..ed83204 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c

@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "pch_gbe.h"

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h
index 23ac387..7f7531d 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h

@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
  * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef _PCH_GBE_PHY_H_
 #define _PCH_GBE_PHY_H_

diff --git a/drivers/net/ethernet/packetengines/Kconfig b/drivers/net/ethernet/packetengines/Kconfig
index 1df28f2..ead3750 100644
--- a/drivers/net/ethernet/packetengines/Kconfig
+++ b/drivers/net/ethernet/packetengines/Kconfig

@@ -1,9 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
-# Packet engine device configuration
+# Packet Engines device configuration
 #
 
 config NET_VENDOR_PACKET_ENGINES
-	bool "Packet Engine devices"
+	bool "Packet Engines devices"
 	default y
 	depends on PCI
 	---help---
@@ -11,7 +12,7 @@
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
-	  the questions about packet engine devices. If you say Y, you will
+	  the questions about Packet Engines devices. If you say Y, you will
 	  be asked for your specific card in the following questions.
 
 if NET_VENDOR_PACKET_ENGINES

diff --git a/drivers/net/ethernet/packetengines/Makefile b/drivers/net/ethernet/packetengines/Makefile
index 995ccd0..cf054b7 100644
--- a/drivers/net/ethernet/packetengines/Makefile
+++ b/drivers/net/ethernet/packetengines/Makefile

@@ -1,5 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
-# Makefile for the Packet Engine network device drivers.
+# Makefile for the Packet Engines network device drivers.
 #
 
 obj-$(CONFIG_HAMACHI) += hamachi.o

diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index c9529c2..eee883a 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c

@@ -1337,7 +1337,7 @@
 							leXX_to_cpu(hmp->tx_ring[entry].addr),
 							skb->len,
 							PCI_DMA_TODEVICE);
-						dev_kfree_skb_irq(skb);
+						dev_consume_skb_irq(skb);
 						hmp->tx_skbuff[entry] = NULL;
 					}
 					hmp->tx_ring[entry].status_n_length = 0;

diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 54224d1..5113ee6 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c

@@ -925,7 +925,7 @@
 			/* Free the original skb. */
 			pci_unmap_single(yp->pci_dev, le32_to_cpu(yp->tx_ring[entry].addr),
 				skb->len, PCI_DMA_TODEVICE);
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 			yp->tx_skbuff[entry] = NULL;
 		}
 		if (yp->tx_full &&
@@ -983,7 +983,7 @@
 				pci_unmap_single(yp->pci_dev,
 					yp->tx_ring[entry<<1].addr, skb->len,
 					PCI_DMA_TODEVICE);
-				dev_kfree_skb_irq(skb);
+				dev_consume_skb_irq(skb);
 				yp->tx_skbuff[entry] = 0;
 				/* Mark status as empty. */
 				yp->tx_status[entry].tx_errs = 0;
@@ -1258,8 +1258,7 @@
 		yp->rx_skbuff[i] = NULL;
 	}
 	for (i = 0; i < TX_RING_SIZE; i++) {
-		if (yp->tx_skbuff[i])
-			dev_kfree_skb(yp->tx_skbuff[i]);
+		dev_kfree_skb(yp->tx_skbuff[i]);
 		yp->tx_skbuff[i] = NULL;
 	}
 

diff --git a/drivers/net/ethernet/pasemi/Kconfig b/drivers/net/ethernet/pasemi/Kconfig
index 7c92e83..f456224 100644
--- a/drivers/net/ethernet/pasemi/Kconfig
+++ b/drivers/net/ethernet/pasemi/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # PA Semi network device configuration
 #

diff --git a/drivers/net/ethernet/pasemi/Makefile b/drivers/net/ethernet/pasemi/Makefile
index 90497ff..f51e614 100644
--- a/drivers/net/ethernet/pasemi/Makefile
+++ b/drivers/net/ethernet/pasemi/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the A Semi network device drivers.
 #

diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index 8a31a02..be66601 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c

@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2006-2007 PA Semi, Inc
  *
  * Driver for the PA Semi PWRficient onchip 1G/10G Ethernet MACs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/module.h>
@@ -401,9 +390,9 @@
 	if (pasemi_dma_alloc_ring(&ring->chan, RX_RING_SIZE))
 		goto out_ring_desc;
 
-	ring->buffers = dma_zalloc_coherent(&mac->dma_pdev->dev,
-					    RX_RING_SIZE * sizeof(u64),
-					    &ring->buf_dma, GFP_KERNEL);
+	ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev,
+					   RX_RING_SIZE * sizeof(u64),
+					   &ring->buf_dma, GFP_KERNEL);
 	if (!ring->buffers)
 		goto out_ring_desc;
 
@@ -1053,7 +1042,6 @@
 
 	dn = pci_device_to_OF_node(mac->pdev);
 	phy_dn = of_parse_phandle(dn, "phy-handle", 0);
-	of_node_put(phy_dn);
 
 	mac->link = 0;
 	mac->speed = 0;
@@ -1062,6 +1050,7 @@
 	phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0,
 				PHY_INTERFACE_MODE_SGMII);
 
+	of_node_put(phy_dn);
 	if (!phydev) {
 		printk(KERN_ERR "%s: Could not attach to phy\n", dev->name);
 		return -ENODEV;
@@ -1355,7 +1344,7 @@
 	const int nh_off = skb_network_offset(skb);
 	const int nh_len = skb_network_header_len(skb);
 	const int nfrags = skb_shinfo(skb)->nr_frags;
-	int cs_size, i, fill, hdr, cpyhdr, evt;
+	int cs_size, i, fill, hdr, evt;
 	dma_addr_t csdma;
 
 	fund = XCT_FUN_ST | XCT_FUN_RR_8BRES |
@@ -1396,7 +1385,6 @@
 		fill++;
 
 	/* Copy the result into the TCP packet */
-	cpyhdr = fill;
 	CS_DESC(csring, fill++) = XCT_FUN_O | XCT_FUN_FUN(csring->fun) |
 				  XCT_FUN_LLEN(2) | XCT_FUN_SE;
 	CS_DESC(csring, fill++) = XCT_PTR_LEN(2) | XCT_PTR_ADDR(cs_dest) | XCT_PTR_T;
@@ -1716,6 +1704,7 @@
 		err = -ENODEV;
 		goto out;
 	}
+	dma_set_mask(&mac->dma_pdev->dev, DMA_BIT_MASK(64));
 
 	mac->iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
 	if (!mac->iob_pdev) {
@@ -1838,7 +1827,7 @@
 	pci_unregister_driver(&pasemi_mac_driver);
 }
 
-int pasemi_mac_init_module(void)
+static int pasemi_mac_init_module(void)
 {
 	int err;
 

diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.h b/drivers/net/ethernet/pasemi/pasemi_mac.h
index 7c47e26..d7d2068 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.h
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.h

@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2006 PA Semi, Inc
  *
  * Driver for the PA6T-1682M onchip 1G/10G Ethernet MACs, soft state and
  * hardware register layouts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef PASEMI_MAC_H

diff --git a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
index d0afc2b..e1a3048 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c

@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2006-2008 PA Semi, Inc
  *
  * Ethtool hooks for the PA Semi PWRficient onchip 1G/10G Ethernet MACs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 

diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
new file mode 100644
index 0000000..d25b88f
--- /dev/null
+++ b/drivers/net/ethernet/pensando/Kconfig

@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2019 Pensando Systems, Inc
+#
+# Pensando device configuration
+#
+
+config NET_VENDOR_PENSANDO
+	bool "Pensando devices"
+	default y
+	help
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Pensando cards. If you say Y, you will be asked
+	  for your specific card in the following questions.
+
+if NET_VENDOR_PENSANDO
+
+config IONIC
+	tristate "Pensando Ethernet IONIC Support"
+	depends on 64BIT && PCI
+	select NET_DEVLINK
+	help
+	  This enables the support for the Pensando family of Ethernet
+	  adapters.  More specific information on this driver can be
+	  found in
+	  <file:Documentation/networking/device_drivers/pensando/ionic.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ionic.
+
+endif # NET_VENDOR_PENSANDO

diff --git a/drivers/net/ethernet/pensando/Makefile b/drivers/net/ethernet/pensando/Makefile
new file mode 100644
index 0000000..21ce749
--- /dev/null
+++ b/drivers/net/ethernet/pensando/Makefile

@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Pensando network device drivers.
+#
+
+obj-$(CONFIG_IONIC) += ionic/

diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile
new file mode 100644
index 0000000..29f304d
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/Makefile

@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2017 - 2019 Pensando Systems, Inc
+
+obj-$(CONFIG_IONIC) := ionic.o
+
+ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \
+	   ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \
+	   ionic_txrx.o ionic_stats.o

diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
new file mode 100644
index 0000000..7a70606
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h

@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_H_
+#define _IONIC_H_
+
+struct ionic_lif;
+
+#include "ionic_if.h"
+#include "ionic_dev.h"
+#include "ionic_devlink.h"
+
+#define IONIC_DRV_NAME		"ionic"
+#define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
+#define IONIC_DRV_VERSION	"0.15.0-k"
+
+#define PCI_VENDOR_ID_PENSANDO			0x1dd8
+
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF	0x1002
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF	0x1003
+
+#define IONIC_SUBDEV_ID_NAPLES_25	0x4000
+#define IONIC_SUBDEV_ID_NAPLES_100_4	0x4001
+#define IONIC_SUBDEV_ID_NAPLES_100_8	0x4002
+
+#define DEVCMD_TIMEOUT  10
+
+struct ionic {
+	struct pci_dev *pdev;
+	struct device *dev;
+	struct devlink_port dl_port;
+	struct ionic_dev idev;
+	struct mutex dev_cmd_lock;	/* lock for dev_cmd operations */
+	struct dentry *dentry;
+	struct ionic_dev_bar bars[IONIC_BARS_MAX];
+	unsigned int num_bars;
+	struct ionic_identity ident;
+	struct list_head lifs;
+	struct ionic_lif *master_lif;
+	unsigned int nnqs_per_lif;
+	unsigned int neqs_per_lif;
+	unsigned int ntxqs_per_lif;
+	unsigned int nrxqs_per_lif;
+	DECLARE_BITMAP(lifbits, IONIC_LIFS_MAX);
+	unsigned int nintrs;
+	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
+	struct work_struct nb_work;
+	struct notifier_block nb;
+};
+
+struct ionic_admin_ctx {
+	struct completion work;
+	union ionic_adminq_cmd cmd;
+	union ionic_adminq_comp comp;
+};
+
+int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
+	       ionic_cq_done_cb done_cb, void *done_arg);
+
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_set_dma_mask(struct ionic *ionic);
+int ionic_setup(struct ionic *ionic);
+
+int ionic_identify(struct ionic *ionic);
+int ionic_init(struct ionic *ionic);
+int ionic_reset(struct ionic *ionic);
+
+int ionic_port_identify(struct ionic *ionic);
+int ionic_port_init(struct ionic *ionic);
+int ionic_port_reset(struct ionic *ionic);
+
+#endif /* _IONIC_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus.h b/drivers/net/ethernet/pensando/ionic/ionic_bus.h
new file mode 100644
index 0000000..2f4d08c
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus.h

@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_BUS_H_
+#define _IONIC_BUS_H_
+
+int ionic_bus_get_irq(struct ionic *ionic, unsigned int num);
+const char *ionic_bus_info(struct ionic *ionic);
+int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs);
+void ionic_bus_free_irq_vectors(struct ionic *ionic);
+int ionic_bus_register_driver(void);
+void ionic_bus_unregister_driver(void);
+void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num);
+void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page);
+
+#endif /* _IONIC_BUS_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
new file mode 100644
index 0000000..9a9ab8c
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c

@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+/* Supported devices */
+static const struct pci_device_id ionic_id_table[] = {
+	{ PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF) },
+	{ PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF) },
+	{ 0, }	/* end of table */
+};
+MODULE_DEVICE_TABLE(pci, ionic_id_table);
+
+int ionic_bus_get_irq(struct ionic *ionic, unsigned int num)
+{
+	return pci_irq_vector(ionic->pdev, num);
+}
+
+const char *ionic_bus_info(struct ionic *ionic)
+{
+	return pci_name(ionic->pdev);
+}
+
+int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs)
+{
+	return pci_alloc_irq_vectors(ionic->pdev, nintrs, nintrs,
+				     PCI_IRQ_MSIX);
+}
+
+void ionic_bus_free_irq_vectors(struct ionic *ionic)
+{
+	pci_free_irq_vectors(ionic->pdev);
+}
+
+static int ionic_map_bars(struct ionic *ionic)
+{
+	struct pci_dev *pdev = ionic->pdev;
+	struct device *dev = ionic->dev;
+	struct ionic_dev_bar *bars;
+	unsigned int i, j;
+
+	bars = ionic->bars;
+	ionic->num_bars = 0;
+
+	for (i = 0, j = 0; i < IONIC_BARS_MAX; i++) {
+		if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+			continue;
+		bars[j].len = pci_resource_len(pdev, i);
+
+		/* only map the whole bar 0 */
+		if (j > 0) {
+			bars[j].vaddr = NULL;
+		} else {
+			bars[j].vaddr = pci_iomap(pdev, i, bars[j].len);
+			if (!bars[j].vaddr) {
+				dev_err(dev,
+					"Cannot memory-map BAR %d, aborting\n",
+					i);
+				return -ENODEV;
+			}
+		}
+
+		bars[j].bus_addr = pci_resource_start(pdev, i);
+		bars[j].res_index = i;
+		ionic->num_bars++;
+		j++;
+	}
+
+	return 0;
+}
+
+static void ionic_unmap_bars(struct ionic *ionic)
+{
+	struct ionic_dev_bar *bars = ionic->bars;
+	unsigned int i;
+
+	for (i = 0; i < IONIC_BARS_MAX; i++) {
+		if (bars[i].vaddr) {
+			iounmap(bars[i].vaddr);
+			bars[i].bus_addr = 0;
+			bars[i].vaddr = NULL;
+			bars[i].len = 0;
+		}
+	}
+}
+
+void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num)
+{
+	return pci_iomap_range(ionic->pdev,
+			       ionic->bars[IONIC_PCI_BAR_DBELL].res_index,
+			       (u64)page_num << PAGE_SHIFT, PAGE_SIZE);
+}
+
+void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
+{
+	iounmap(page);
+}
+
+static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct ionic *ionic;
+	int err;
+
+	ionic = ionic_devlink_alloc(dev);
+	if (!ionic)
+		return -ENOMEM;
+
+	ionic->pdev = pdev;
+	ionic->dev = dev;
+	pci_set_drvdata(pdev, ionic);
+	mutex_init(&ionic->dev_cmd_lock);
+
+	/* Query system for DMA addressing limitation for the device. */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN));
+	if (err) {
+		dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting.  err=%d\n",
+			err);
+		goto err_out_clear_drvdata;
+	}
+
+	ionic_debugfs_add_dev(ionic);
+
+	/* Setup PCI device */
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device: %d, aborting\n", err);
+		goto err_out_debugfs_del_dev;
+	}
+
+	err = pci_request_regions(pdev, IONIC_DRV_NAME);
+	if (err) {
+		dev_err(dev, "Cannot request PCI regions: %d, aborting\n", err);
+		goto err_out_pci_disable_device;
+	}
+
+	pci_set_master(pdev);
+
+	err = ionic_map_bars(ionic);
+	if (err)
+		goto err_out_pci_clear_master;
+
+	/* Configure the device */
+	err = ionic_setup(ionic);
+	if (err) {
+		dev_err(dev, "Cannot setup device: %d, aborting\n", err);
+		goto err_out_unmap_bars;
+	}
+
+	err = ionic_identify(ionic);
+	if (err) {
+		dev_err(dev, "Cannot identify device: %d, aborting\n", err);
+		goto err_out_teardown;
+	}
+
+	err = ionic_init(ionic);
+	if (err) {
+		dev_err(dev, "Cannot init device: %d, aborting\n", err);
+		goto err_out_teardown;
+	}
+
+	/* Configure the ports */
+	err = ionic_port_identify(ionic);
+	if (err) {
+		dev_err(dev, "Cannot identify port: %d, aborting\n", err);
+		goto err_out_reset;
+	}
+
+	err = ionic_port_init(ionic);
+	if (err) {
+		dev_err(dev, "Cannot init port: %d, aborting\n", err);
+		goto err_out_reset;
+	}
+
+	/* Configure LIFs */
+	err = ionic_lif_identify(ionic, IONIC_LIF_TYPE_CLASSIC,
+				 &ionic->ident.lif);
+	if (err) {
+		dev_err(dev, "Cannot identify LIFs: %d, aborting\n", err);
+		goto err_out_port_reset;
+	}
+
+	err = ionic_lifs_size(ionic);
+	if (err) {
+		dev_err(dev, "Cannot size LIFs: %d, aborting\n", err);
+		goto err_out_port_reset;
+	}
+
+	err = ionic_lifs_alloc(ionic);
+	if (err) {
+		dev_err(dev, "Cannot allocate LIFs: %d, aborting\n", err);
+		goto err_out_free_irqs;
+	}
+
+	err = ionic_lifs_init(ionic);
+	if (err) {
+		dev_err(dev, "Cannot init LIFs: %d, aborting\n", err);
+		goto err_out_free_lifs;
+	}
+
+	err = ionic_lifs_register(ionic);
+	if (err) {
+		dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
+		goto err_out_deinit_lifs;
+	}
+
+	err = ionic_devlink_register(ionic);
+	if (err) {
+		dev_err(dev, "Cannot register devlink: %d\n", err);
+		goto err_out_deregister_lifs;
+	}
+
+	return 0;
+
+err_out_deregister_lifs:
+	ionic_lifs_unregister(ionic);
+err_out_deinit_lifs:
+	ionic_lifs_deinit(ionic);
+err_out_free_lifs:
+	ionic_lifs_free(ionic);
+err_out_free_irqs:
+	ionic_bus_free_irq_vectors(ionic);
+err_out_port_reset:
+	ionic_port_reset(ionic);
+err_out_reset:
+	ionic_reset(ionic);
+err_out_teardown:
+	ionic_dev_teardown(ionic);
+err_out_unmap_bars:
+	ionic_unmap_bars(ionic);
+	pci_release_regions(pdev);
+err_out_pci_clear_master:
+	pci_clear_master(pdev);
+err_out_pci_disable_device:
+	pci_disable_device(pdev);
+err_out_debugfs_del_dev:
+	ionic_debugfs_del_dev(ionic);
+err_out_clear_drvdata:
+	mutex_destroy(&ionic->dev_cmd_lock);
+	ionic_devlink_free(ionic);
+
+	return err;
+}
+
+static void ionic_remove(struct pci_dev *pdev)
+{
+	struct ionic *ionic = pci_get_drvdata(pdev);
+
+	if (!ionic)
+		return;
+
+	ionic_devlink_unregister(ionic);
+	ionic_lifs_unregister(ionic);
+	ionic_lifs_deinit(ionic);
+	ionic_lifs_free(ionic);
+	ionic_bus_free_irq_vectors(ionic);
+	ionic_port_reset(ionic);
+	ionic_reset(ionic);
+	ionic_dev_teardown(ionic);
+	ionic_unmap_bars(ionic);
+	pci_release_regions(pdev);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+	ionic_debugfs_del_dev(ionic);
+	mutex_destroy(&ionic->dev_cmd_lock);
+	ionic_devlink_free(ionic);
+}
+
+static struct pci_driver ionic_driver = {
+	.name = IONIC_DRV_NAME,
+	.id_table = ionic_id_table,
+	.probe = ionic_probe,
+	.remove = ionic_remove,
+};
+
+int ionic_bus_register_driver(void)
+{
+	return pci_register_driver(&ionic_driver);
+}
+
+void ionic_bus_unregister_driver(void)
+{
+	pci_unregister_driver(&ionic_driver);
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
new file mode 100644
index 0000000..bc03cec
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c

@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+static struct dentry *ionic_dir;
+
+void ionic_debugfs_create(void)
+{
+	ionic_dir = debugfs_create_dir(IONIC_DRV_NAME, NULL);
+}
+
+void ionic_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(ionic_dir);
+}
+
+void ionic_debugfs_add_dev(struct ionic *ionic)
+{
+	ionic->dentry = debugfs_create_dir(ionic_bus_info(ionic), ionic_dir);
+}
+
+void ionic_debugfs_del_dev(struct ionic *ionic)
+{
+	debugfs_remove_recursive(ionic->dentry);
+	ionic->dentry = NULL;
+}
+
+static int identity_show(struct seq_file *seq, void *v)
+{
+	struct ionic *ionic = seq->private;
+	struct ionic_identity *ident;
+
+	ident = &ionic->ident;
+
+	seq_printf(seq, "nlifs:            %d\n", ident->dev.nlifs);
+	seq_printf(seq, "nintrs:           %d\n", ident->dev.nintrs);
+	seq_printf(seq, "ndbpgs_per_lif:   %d\n", ident->dev.ndbpgs_per_lif);
+	seq_printf(seq, "intr_coal_mult:   %d\n", ident->dev.intr_coal_mult);
+	seq_printf(seq, "intr_coal_div:    %d\n", ident->dev.intr_coal_div);
+
+	seq_printf(seq, "max_ucast_filters:  %d\n", ident->lif.eth.max_ucast_filters);
+	seq_printf(seq, "max_mcast_filters:  %d\n", ident->lif.eth.max_mcast_filters);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(identity);
+
+void ionic_debugfs_add_ident(struct ionic *ionic)
+{
+	debugfs_create_file("identity", 0400, ionic->dentry,
+			    ionic, &identity_fops);
+}
+
+void ionic_debugfs_add_sizes(struct ionic *ionic)
+{
+	debugfs_create_u32("nlifs", 0400, ionic->dentry,
+			   (u32 *)&ionic->ident.dev.nlifs);
+	debugfs_create_u32("nintrs", 0400, ionic->dentry, &ionic->nintrs);
+
+	debugfs_create_u32("ntxqs_per_lif", 0400, ionic->dentry,
+			   (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_TXQ]);
+	debugfs_create_u32("nrxqs_per_lif", 0400, ionic->dentry,
+			   (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_RXQ]);
+}
+
+static int q_tail_show(struct seq_file *seq, void *v)
+{
+	struct ionic_queue *q = seq->private;
+
+	seq_printf(seq, "%d\n", q->tail->index);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(q_tail);
+
+static int q_head_show(struct seq_file *seq, void *v)
+{
+	struct ionic_queue *q = seq->private;
+
+	seq_printf(seq, "%d\n", q->head->index);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(q_head);
+
+static int cq_tail_show(struct seq_file *seq, void *v)
+{
+	struct ionic_cq *cq = seq->private;
+
+	seq_printf(seq, "%d\n", cq->tail->index);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cq_tail);
+
+static const struct debugfs_reg32 intr_ctrl_regs[] = {
+	{ .name = "coal_init", .offset = 0, },
+	{ .name = "mask", .offset = 4, },
+	{ .name = "credits", .offset = 8, },
+	{ .name = "mask_on_assert", .offset = 12, },
+	{ .name = "coal_timer", .offset = 16, },
+};
+
+void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct dentry *q_dentry, *cq_dentry, *intr_dentry, *stats_dentry;
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct debugfs_regset32 *intr_ctrl_regset;
+	struct ionic_intr_info *intr = &qcq->intr;
+	struct debugfs_blob_wrapper *desc_blob;
+	struct device *dev = lif->ionic->dev;
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_cq *cq = &qcq->cq;
+
+	qcq->dentry = debugfs_create_dir(q->name, lif->dentry);
+
+	debugfs_create_x32("total_size", 0400, qcq->dentry, &qcq->total_size);
+	debugfs_create_x64("base_pa", 0400, qcq->dentry, &qcq->base_pa);
+
+	q_dentry = debugfs_create_dir("q", qcq->dentry);
+
+	debugfs_create_u32("index", 0400, q_dentry, &q->index);
+	debugfs_create_x64("base_pa", 0400, q_dentry, &q->base_pa);
+	if (qcq->flags & IONIC_QCQ_F_SG) {
+		debugfs_create_x64("sg_base_pa", 0400, q_dentry,
+				   &q->sg_base_pa);
+		debugfs_create_u32("sg_desc_size", 0400, q_dentry,
+				   &q->sg_desc_size);
+	}
+	debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
+	debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
+	debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
+	debugfs_create_u32("qid", 0400, q_dentry, &q->hw_index);
+	debugfs_create_u32("qtype", 0400, q_dentry, &q->hw_type);
+	debugfs_create_u64("drop", 0400, q_dentry, &q->drop);
+	debugfs_create_u64("stop", 0400, q_dentry, &q->stop);
+	debugfs_create_u64("wake", 0400, q_dentry, &q->wake);
+
+	debugfs_create_file("tail", 0400, q_dentry, q, &q_tail_fops);
+	debugfs_create_file("head", 0400, q_dentry, q, &q_head_fops);
+
+	desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+	if (!desc_blob)
+		return;
+	desc_blob->data = q->base;
+	desc_blob->size = (unsigned long)q->num_descs * q->desc_size;
+	debugfs_create_blob("desc_blob", 0400, q_dentry, desc_blob);
+
+	if (qcq->flags & IONIC_QCQ_F_SG) {
+		desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+		if (!desc_blob)
+			return;
+		desc_blob->data = q->sg_base;
+		desc_blob->size = (unsigned long)q->num_descs * q->sg_desc_size;
+		debugfs_create_blob("sg_desc_blob", 0400, q_dentry,
+				    desc_blob);
+	}
+
+	cq_dentry = debugfs_create_dir("cq", qcq->dentry);
+
+	debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
+	debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs);
+	debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size);
+	debugfs_create_u8("done_color", 0400, cq_dentry,
+			  (u8 *)&cq->done_color);
+
+	debugfs_create_file("tail", 0400, cq_dentry, cq, &cq_tail_fops);
+
+	desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+	if (!desc_blob)
+		return;
+	desc_blob->data = cq->base;
+	desc_blob->size = (unsigned long)cq->num_descs * cq->desc_size;
+	debugfs_create_blob("desc_blob", 0400, cq_dentry, desc_blob);
+
+	if (qcq->flags & IONIC_QCQ_F_INTR) {
+		intr_dentry = debugfs_create_dir("intr", qcq->dentry);
+
+		debugfs_create_u32("index", 0400, intr_dentry,
+				   &intr->index);
+		debugfs_create_u32("vector", 0400, intr_dentry,
+				   &intr->vector);
+
+		intr_ctrl_regset = devm_kzalloc(dev, sizeof(*intr_ctrl_regset),
+						GFP_KERNEL);
+		if (!intr_ctrl_regset)
+			return;
+		intr_ctrl_regset->regs = intr_ctrl_regs;
+		intr_ctrl_regset->nregs = ARRAY_SIZE(intr_ctrl_regs);
+		intr_ctrl_regset->base = &idev->intr_ctrl[intr->index];
+
+		debugfs_create_regset32("intr_ctrl", 0400, intr_dentry,
+					intr_ctrl_regset);
+	}
+
+	if (qcq->flags & IONIC_QCQ_F_NOTIFYQ) {
+		stats_dentry = debugfs_create_dir("notifyblock", qcq->dentry);
+
+		debugfs_create_u64("eid", 0400, stats_dentry,
+				   (u64 *)&lif->info->status.eid);
+		debugfs_create_u16("link_status", 0400, stats_dentry,
+				   (u16 *)&lif->info->status.link_status);
+		debugfs_create_u32("link_speed", 0400, stats_dentry,
+				   (u32 *)&lif->info->status.link_speed);
+		debugfs_create_u16("link_down_count", 0400, stats_dentry,
+				   (u16 *)&lif->info->status.link_down_count);
+	}
+}
+
+static int netdev_show(struct seq_file *seq, void *v)
+{
+	struct net_device *netdev = seq->private;
+
+	seq_printf(seq, "%s\n", netdev->name);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(netdev);
+
+void ionic_debugfs_add_lif(struct ionic_lif *lif)
+{
+	lif->dentry = debugfs_create_dir(lif->name, lif->ionic->dentry);
+	debugfs_create_file("netdev", 0400, lif->dentry,
+			    lif->netdev, &netdev_fops);
+}
+
+void ionic_debugfs_del_lif(struct ionic_lif *lif)
+{
+	debugfs_remove_recursive(lif->dentry);
+	lif->dentry = NULL;
+}
+
+void ionic_debugfs_del_qcq(struct ionic_qcq *qcq)
+{
+	debugfs_remove_recursive(qcq->dentry);
+	qcq->dentry = NULL;
+}
+
+#endif

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h
new file mode 100644
index 0000000..c44ebde
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h

@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEBUGFS_H_
+#define _IONIC_DEBUGFS_H_
+
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+void ionic_debugfs_create(void);
+void ionic_debugfs_destroy(void);
+void ionic_debugfs_add_dev(struct ionic *ionic);
+void ionic_debugfs_del_dev(struct ionic *ionic);
+void ionic_debugfs_add_ident(struct ionic *ionic);
+void ionic_debugfs_add_sizes(struct ionic *ionic);
+void ionic_debugfs_add_lif(struct ionic_lif *lif);
+void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq);
+void ionic_debugfs_del_lif(struct ionic_lif *lif);
+void ionic_debugfs_del_qcq(struct ionic_qcq *qcq);
+#else
+static inline void ionic_debugfs_create(void) { }
+static inline void ionic_debugfs_destroy(void) { }
+static inline void ionic_debugfs_add_dev(struct ionic *ionic) { }
+static inline void ionic_debugfs_del_dev(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_ident(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_sizes(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_lif(struct ionic_lif *lif) { }
+static inline void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) { }
+static inline void ionic_debugfs_del_lif(struct ionic_lif *lif) { }
+static inline void ionic_debugfs_del_qcq(struct ionic_qcq *qcq) { }
+#endif
+
+#endif /* _IONIC_DEBUGFS_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
new file mode 100644
index 0000000..d168a64
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c

@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/etherdevice.h>
+#include "ionic.h"
+#include "ionic_dev.h"
+#include "ionic_lif.h"
+
+void ionic_init_devinfo(struct ionic *ionic)
+{
+	struct ionic_dev *idev = &ionic->idev;
+
+	idev->dev_info.asic_type = ioread8(&idev->dev_info_regs->asic_type);
+	idev->dev_info.asic_rev = ioread8(&idev->dev_info_regs->asic_rev);
+
+	memcpy_fromio(idev->dev_info.fw_version,
+		      idev->dev_info_regs->fw_version,
+		      IONIC_DEVINFO_FWVERS_BUFLEN);
+
+	memcpy_fromio(idev->dev_info.serial_num,
+		      idev->dev_info_regs->serial_num,
+		      IONIC_DEVINFO_SERIAL_BUFLEN);
+
+	idev->dev_info.fw_version[IONIC_DEVINFO_FWVERS_BUFLEN] = 0;
+	idev->dev_info.serial_num[IONIC_DEVINFO_SERIAL_BUFLEN] = 0;
+
+	dev_dbg(ionic->dev, "fw_version %s\n", idev->dev_info.fw_version);
+}
+
+int ionic_dev_setup(struct ionic *ionic)
+{
+	struct ionic_dev_bar *bar = ionic->bars;
+	unsigned int num_bars = ionic->num_bars;
+	struct ionic_dev *idev = &ionic->idev;
+	struct device *dev = ionic->dev;
+	u32 sig;
+
+	/* BAR0: dev_cmd and interrupts */
+	if (num_bars < 1) {
+		dev_err(dev, "No bars found, aborting\n");
+		return -EFAULT;
+	}
+
+	if (bar->len < IONIC_BAR0_SIZE) {
+		dev_err(dev, "Resource bar size %lu too small, aborting\n",
+			bar->len);
+		return -EFAULT;
+	}
+
+	idev->dev_info_regs = bar->vaddr + IONIC_BAR0_DEV_INFO_REGS_OFFSET;
+	idev->dev_cmd_regs = bar->vaddr + IONIC_BAR0_DEV_CMD_REGS_OFFSET;
+	idev->intr_status = bar->vaddr + IONIC_BAR0_INTR_STATUS_OFFSET;
+	idev->intr_ctrl = bar->vaddr + IONIC_BAR0_INTR_CTRL_OFFSET;
+
+	sig = ioread32(&idev->dev_info_regs->signature);
+	if (sig != IONIC_DEV_INFO_SIGNATURE) {
+		dev_err(dev, "Incompatible firmware signature %x", sig);
+		return -EFAULT;
+	}
+
+	ionic_init_devinfo(ionic);
+
+	/* BAR1: doorbells */
+	bar++;
+	if (num_bars < 2) {
+		dev_err(dev, "Doorbell bar missing, aborting\n");
+		return -EFAULT;
+	}
+
+	idev->db_pages = bar->vaddr;
+	idev->phy_db_pages = bar->bus_addr;
+
+	return 0;
+}
+
+void ionic_dev_teardown(struct ionic *ionic)
+{
+	/* place holder */
+}
+
+/* Devcmd Interface */
+u8 ionic_dev_cmd_status(struct ionic_dev *idev)
+{
+	return ioread8(&idev->dev_cmd_regs->comp.comp.status);
+}
+
+bool ionic_dev_cmd_done(struct ionic_dev *idev)
+{
+	return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE;
+}
+
+void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp)
+{
+	memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp));
+}
+
+void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd)
+{
+	memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd));
+	iowrite32(0, &idev->dev_cmd_regs->done);
+	iowrite32(1, &idev->dev_cmd_regs->doorbell);
+}
+
+/* Device commands */
+void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver)
+{
+	union ionic_dev_cmd cmd = {
+		.identify.opcode = IONIC_CMD_IDENTIFY,
+		.identify.ver = ver,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_init(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.init.opcode = IONIC_CMD_INIT,
+		.init.type = 0,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_reset(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.reset.opcode = IONIC_CMD_RESET,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+/* Port commands */
+void ionic_dev_cmd_port_identify(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.port_init.opcode = IONIC_CMD_PORT_IDENTIFY,
+		.port_init.index = 0,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_init(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.port_init.opcode = IONIC_CMD_PORT_INIT,
+		.port_init.index = 0,
+		.port_init.info_pa = cpu_to_le64(idev->port_info_pa),
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_reset(struct ionic_dev *idev)
+{
+	union ionic_dev_cmd cmd = {
+		.port_reset.opcode = IONIC_CMD_PORT_RESET,
+		.port_reset.index = 0,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state)
+{
+	union ionic_dev_cmd cmd = {
+		.port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+		.port_setattr.index = 0,
+		.port_setattr.attr = IONIC_PORT_ATTR_STATE,
+		.port_setattr.state = state,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed)
+{
+	union ionic_dev_cmd cmd = {
+		.port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+		.port_setattr.index = 0,
+		.port_setattr.attr = IONIC_PORT_ATTR_SPEED,
+		.port_setattr.speed = cpu_to_le32(speed),
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable)
+{
+	union ionic_dev_cmd cmd = {
+		.port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+		.port_setattr.index = 0,
+		.port_setattr.attr = IONIC_PORT_ATTR_AUTONEG,
+		.port_setattr.an_enable = an_enable,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type)
+{
+	union ionic_dev_cmd cmd = {
+		.port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+		.port_setattr.index = 0,
+		.port_setattr.attr = IONIC_PORT_ATTR_FEC,
+		.port_setattr.fec_type = fec_type,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
+{
+	union ionic_dev_cmd cmd = {
+		.port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+		.port_setattr.index = 0,
+		.port_setattr.attr = IONIC_PORT_ATTR_PAUSE,
+		.port_setattr.pause_type = pause_type,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+/* LIF commands */
+void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
+{
+	union ionic_dev_cmd cmd = {
+		.lif_identify.opcode = IONIC_CMD_LIF_IDENTIFY,
+		.lif_identify.type = type,
+		.lif_identify.ver = ver,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
+			    dma_addr_t info_pa)
+{
+	union ionic_dev_cmd cmd = {
+		.lif_init.opcode = IONIC_CMD_LIF_INIT,
+		.lif_init.index = cpu_to_le16(lif_index),
+		.lif_init.info_pa = cpu_to_le64(info_pa),
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index)
+{
+	union ionic_dev_cmd cmd = {
+		.lif_init.opcode = IONIC_CMD_LIF_RESET,
+		.lif_init.index = cpu_to_le16(lif_index),
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
+			       u16 lif_index, u16 intr_index)
+{
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_cq *cq = &qcq->cq;
+
+	union ionic_dev_cmd cmd = {
+		.q_init.opcode = IONIC_CMD_Q_INIT,
+		.q_init.lif_index = cpu_to_le16(lif_index),
+		.q_init.type = q->type,
+		.q_init.index = cpu_to_le32(q->index),
+		.q_init.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+					    IONIC_QINIT_F_ENA),
+		.q_init.pid = cpu_to_le16(q->pid),
+		.q_init.intr_index = cpu_to_le16(intr_index),
+		.q_init.ring_size = ilog2(q->num_descs),
+		.q_init.ring_base = cpu_to_le64(q->base_pa),
+		.q_init.cq_ring_base = cpu_to_le64(cq->base_pa),
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
+int ionic_db_page_num(struct ionic_lif *lif, int pid)
+{
+	return (lif->hw_index * lif->dbid_count) + pid;
+}
+
+int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
+		  struct ionic_intr_info *intr,
+		  unsigned int num_descs, size_t desc_size)
+{
+	struct ionic_cq_info *cur;
+	unsigned int ring_size;
+	unsigned int i;
+
+	if (desc_size == 0 || !is_power_of_2(num_descs))
+		return -EINVAL;
+
+	ring_size = ilog2(num_descs);
+	if (ring_size < 2 || ring_size > 16)
+		return -EINVAL;
+
+	cq->lif = lif;
+	cq->bound_intr = intr;
+	cq->num_descs = num_descs;
+	cq->desc_size = desc_size;
+	cq->tail = cq->info;
+	cq->done_color = 1;
+
+	cur = cq->info;
+
+	for (i = 0; i < num_descs; i++) {
+		if (i + 1 == num_descs) {
+			cur->next = cq->info;
+			cur->last = true;
+		} else {
+			cur->next = cur + 1;
+		}
+		cur->index = i;
+		cur++;
+	}
+
+	return 0;
+}
+
+void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa)
+{
+	struct ionic_cq_info *cur;
+	unsigned int i;
+
+	cq->base = base;
+	cq->base_pa = base_pa;
+
+	for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
+		cur->cq_desc = base + (i * cq->desc_size);
+}
+
+void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q)
+{
+	cq->bound_q = q;
+}
+
+unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
+			      ionic_cq_cb cb, ionic_cq_done_cb done_cb,
+			      void *done_arg)
+{
+	unsigned int work_done = 0;
+
+	if (work_to_do == 0)
+		return 0;
+
+	while (cb(cq, cq->tail)) {
+		if (cq->tail->last)
+			cq->done_color = !cq->done_color;
+		cq->tail = cq->tail->next;
+		DEBUG_STATS_CQE_CNT(cq);
+
+		if (++work_done >= work_to_do)
+			break;
+	}
+
+	if (work_done && done_cb)
+		done_cb(done_arg);
+
+	return work_done;
+}
+
+int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
+		 struct ionic_queue *q, unsigned int index, const char *name,
+		 unsigned int num_descs, size_t desc_size,
+		 size_t sg_desc_size, unsigned int pid)
+{
+	struct ionic_desc_info *cur;
+	unsigned int ring_size;
+	unsigned int i;
+
+	if (desc_size == 0 || !is_power_of_2(num_descs))
+		return -EINVAL;
+
+	ring_size = ilog2(num_descs);
+	if (ring_size < 2 || ring_size > 16)
+		return -EINVAL;
+
+	q->lif = lif;
+	q->idev = idev;
+	q->index = index;
+	q->num_descs = num_descs;
+	q->desc_size = desc_size;
+	q->sg_desc_size = sg_desc_size;
+	q->tail = q->info;
+	q->head = q->tail;
+	q->pid = pid;
+
+	snprintf(q->name, sizeof(q->name), "L%d-%s%u", lif->index, name, index);
+
+	cur = q->info;
+
+	for (i = 0; i < num_descs; i++) {
+		if (i + 1 == num_descs)
+			cur->next = q->info;
+		else
+			cur->next = cur + 1;
+		cur->index = i;
+		cur->left = num_descs - i;
+		cur++;
+	}
+
+	return 0;
+}
+
+void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
+{
+	struct ionic_desc_info *cur;
+	unsigned int i;
+
+	q->base = base;
+	q->base_pa = base_pa;
+
+	for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+		cur->desc = base + (i * q->desc_size);
+}
+
+void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
+{
+	struct ionic_desc_info *cur;
+	unsigned int i;
+
+	q->sg_base = base;
+	q->sg_base_pa = base_pa;
+
+	for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+		cur->sg_desc = base + (i * q->sg_desc_size);
+}
+
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
+		  void *cb_arg)
+{
+	struct device *dev = q->lif->ionic->dev;
+	struct ionic_lif *lif = q->lif;
+
+	q->head->cb = cb;
+	q->head->cb_arg = cb_arg;
+	q->head = q->head->next;
+
+	dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n",
+		q->lif->index, q->name, q->hw_type, q->hw_index,
+		q->head->index, ring_doorbell);
+
+	if (ring_doorbell)
+		ionic_dbell_ring(lif->kern_dbpage, q->hw_type,
+				 q->dbval | q->head->index);
+}
+
+static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
+{
+	unsigned int mask, tail, head;
+
+	mask = q->num_descs - 1;
+	tail = q->tail->index;
+	head = q->head->index;
+
+	return ((pos - tail) & mask) < ((head - tail) & mask);
+}
+
+void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
+		     unsigned int stop_index)
+{
+	struct ionic_desc_info *desc_info;
+	ionic_desc_cb cb;
+	void *cb_arg;
+
+	/* check for empty queue */
+	if (q->tail->index == q->head->index)
+		return;
+
+	/* stop index must be for a descriptor that is not yet completed */
+	if (unlikely(!ionic_q_is_posted(q, stop_index)))
+		dev_err(q->lif->ionic->dev,
+			"ionic stop is not posted %s stop %u tail %u head %u\n",
+			q->name, stop_index, q->tail->index, q->head->index);
+
+	do {
+		desc_info = q->tail;
+		q->tail = desc_info->next;
+
+		cb = desc_info->cb;
+		cb_arg = desc_info->cb_arg;
+
+		desc_info->cb = NULL;
+		desc_info->cb_arg = NULL;
+
+		if (cb)
+			cb(q, desc_info, cq_info, cb_arg);
+	} while (desc_info->index != stop_index);
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
new file mode 100644
index 0000000..9610aeb
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h

@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEV_H_
+#define _IONIC_DEV_H_
+
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+#include "ionic_if.h"
+#include "ionic_regs.h"
+
+#define IONIC_MIN_MTU			ETH_MIN_MTU
+#define IONIC_MAX_MTU			9194
+#define IONIC_MAX_TXRX_DESC		16384
+#define IONIC_MIN_TXRX_DESC		16
+#define IONIC_DEF_TXRX_DESC		4096
+#define IONIC_LIFS_MAX			1024
+#define IONIC_ITR_COAL_USEC_DEFAULT	64
+
+#define IONIC_DEV_CMD_REG_VERSION	1
+#define IONIC_DEV_INFO_REG_COUNT	32
+#define IONIC_DEV_CMD_REG_COUNT		32
+
+struct ionic_dev_bar {
+	void __iomem *vaddr;
+	phys_addr_t bus_addr;
+	unsigned long len;
+	int res_index;
+};
+
+/* Registers */
+static_assert(sizeof(struct ionic_intr) == 32);
+
+static_assert(sizeof(struct ionic_doorbell) == 8);
+static_assert(sizeof(struct ionic_intr_status) == 8);
+static_assert(sizeof(union ionic_dev_regs) == 4096);
+static_assert(sizeof(union ionic_dev_info_regs) == 2048);
+static_assert(sizeof(union ionic_dev_cmd_regs) == 2048);
+static_assert(sizeof(struct ionic_lif_stats) == 1024);
+
+static_assert(sizeof(struct ionic_admin_cmd) == 64);
+static_assert(sizeof(struct ionic_admin_comp) == 16);
+static_assert(sizeof(struct ionic_nop_cmd) == 64);
+static_assert(sizeof(struct ionic_nop_comp) == 16);
+
+/* Device commands */
+static_assert(sizeof(struct ionic_dev_identify_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_identify_comp) == 16);
+static_assert(sizeof(struct ionic_dev_init_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_init_comp) == 16);
+static_assert(sizeof(struct ionic_dev_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_reset_comp) == 16);
+static_assert(sizeof(struct ionic_dev_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_dev_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_setattr_comp) == 16);
+
+/* Port commands */
+static_assert(sizeof(struct ionic_port_identify_cmd) == 64);
+static_assert(sizeof(struct ionic_port_identify_comp) == 16);
+static_assert(sizeof(struct ionic_port_init_cmd) == 64);
+static_assert(sizeof(struct ionic_port_init_comp) == 16);
+static_assert(sizeof(struct ionic_port_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_port_reset_comp) == 16);
+static_assert(sizeof(struct ionic_port_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_port_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_port_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_port_setattr_comp) == 16);
+
+/* LIF commands */
+static_assert(sizeof(struct ionic_lif_init_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_init_comp) == 16);
+static_assert(sizeof(struct ionic_lif_reset_cmd) == 64);
+static_assert(sizeof(ionic_lif_reset_comp) == 16);
+static_assert(sizeof(struct ionic_lif_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_lif_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_setattr_comp) == 16);
+
+static_assert(sizeof(struct ionic_q_init_cmd) == 64);
+static_assert(sizeof(struct ionic_q_init_comp) == 16);
+static_assert(sizeof(struct ionic_q_control_cmd) == 64);
+static_assert(sizeof(ionic_q_control_comp) == 16);
+
+static_assert(sizeof(struct ionic_rx_mode_set_cmd) == 64);
+static_assert(sizeof(ionic_rx_mode_set_comp) == 16);
+static_assert(sizeof(struct ionic_rx_filter_add_cmd) == 64);
+static_assert(sizeof(struct ionic_rx_filter_add_comp) == 16);
+static_assert(sizeof(struct ionic_rx_filter_del_cmd) == 64);
+static_assert(sizeof(ionic_rx_filter_del_comp) == 16);
+
+/* RDMA commands */
+static_assert(sizeof(struct ionic_rdma_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_rdma_queue_cmd) == 64);
+
+/* Events */
+static_assert(sizeof(struct ionic_notifyq_cmd) == 4);
+static_assert(sizeof(union ionic_notifyq_comp) == 64);
+static_assert(sizeof(struct ionic_notifyq_event) == 64);
+static_assert(sizeof(struct ionic_link_change_event) == 64);
+static_assert(sizeof(struct ionic_reset_event) == 64);
+static_assert(sizeof(struct ionic_heartbeat_event) == 64);
+static_assert(sizeof(struct ionic_log_event) == 64);
+
+/* I/O */
+static_assert(sizeof(struct ionic_txq_desc) == 16);
+static_assert(sizeof(struct ionic_txq_sg_desc) == 128);
+static_assert(sizeof(struct ionic_txq_comp) == 16);
+
+static_assert(sizeof(struct ionic_rxq_desc) == 16);
+static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
+static_assert(sizeof(struct ionic_rxq_comp) == 16);
+
+struct ionic_devinfo {
+	u8 asic_type;
+	u8 asic_rev;
+	char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN + 1];
+	char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN + 1];
+};
+
+struct ionic_dev {
+	union ionic_dev_info_regs __iomem *dev_info_regs;
+	union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
+
+	u64 __iomem *db_pages;
+	dma_addr_t phy_db_pages;
+
+	struct ionic_intr __iomem *intr_ctrl;
+	u64 __iomem *intr_status;
+
+	u32 port_info_sz;
+	struct ionic_port_info *port_info;
+	dma_addr_t port_info_pa;
+
+	struct ionic_devinfo dev_info;
+};
+
+struct ionic_cq_info {
+	void *cq_desc;
+	struct ionic_cq_info *next;
+	unsigned int index;
+	bool last;
+};
+
+struct ionic_queue;
+struct ionic_qcq;
+struct ionic_desc_info;
+
+typedef void (*ionic_desc_cb)(struct ionic_queue *q,
+			      struct ionic_desc_info *desc_info,
+			      struct ionic_cq_info *cq_info, void *cb_arg);
+
+struct ionic_desc_info {
+	void *desc;
+	void *sg_desc;
+	struct ionic_desc_info *next;
+	unsigned int index;
+	unsigned int left;
+	ionic_desc_cb cb;
+	void *cb_arg;
+};
+
+#define QUEUE_NAME_MAX_SZ		32
+
+struct ionic_queue {
+	u64 dbell_count;
+	u64 drop;
+	u64 stop;
+	u64 wake;
+	struct ionic_lif *lif;
+	struct ionic_desc_info *info;
+	struct ionic_desc_info *tail;
+	struct ionic_desc_info *head;
+	struct ionic_dev *idev;
+	unsigned int index;
+	unsigned int type;
+	unsigned int hw_index;
+	unsigned int hw_type;
+	u64 dbval;
+	void *base;
+	void *sg_base;
+	dma_addr_t base_pa;
+	dma_addr_t sg_base_pa;
+	unsigned int num_descs;
+	unsigned int desc_size;
+	unsigned int sg_desc_size;
+	unsigned int pid;
+	char name[QUEUE_NAME_MAX_SZ];
+};
+
+#define INTR_INDEX_NOT_ASSIGNED		-1
+#define INTR_NAME_MAX_SZ		32
+
+struct ionic_intr_info {
+	char name[INTR_NAME_MAX_SZ];
+	unsigned int index;
+	unsigned int vector;
+	u64 rearm_count;
+	unsigned int cpu;
+	cpumask_t affinity_mask;
+};
+
+struct ionic_cq {
+	void *base;
+	dma_addr_t base_pa;
+	struct ionic_lif *lif;
+	struct ionic_cq_info *info;
+	struct ionic_cq_info *tail;
+	struct ionic_queue *bound_q;
+	struct ionic_intr_info *bound_intr;
+	bool done_color;
+	unsigned int num_descs;
+	u64 compl_count;
+	unsigned int desc_size;
+};
+
+struct ionic;
+
+static inline void ionic_intr_init(struct ionic_dev *idev,
+				   struct ionic_intr_info *intr,
+				   unsigned long index)
+{
+	ionic_intr_clean(idev->intr_ctrl, index);
+	intr->index = index;
+}
+
+static inline unsigned int ionic_q_space_avail(struct ionic_queue *q)
+{
+	unsigned int avail = q->tail->index;
+
+	if (q->head->index >= avail)
+		avail += q->head->left - 1;
+	else
+		avail -= q->head->index + 1;
+
+	return avail;
+}
+
+static inline bool ionic_q_has_space(struct ionic_queue *q, unsigned int want)
+{
+	return ionic_q_space_avail(q) >= want;
+}
+
+void ionic_init_devinfo(struct ionic *ionic);
+int ionic_dev_setup(struct ionic *ionic);
+void ionic_dev_teardown(struct ionic *ionic);
+
+void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd);
+u8 ionic_dev_cmd_status(struct ionic_dev *idev);
+bool ionic_dev_cmd_done(struct ionic_dev *idev);
+void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp);
+
+void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver);
+void ionic_dev_cmd_init(struct ionic_dev *idev);
+void ionic_dev_cmd_reset(struct ionic_dev *idev);
+
+void ionic_dev_cmd_port_identify(struct ionic_dev *idev);
+void ionic_dev_cmd_port_init(struct ionic_dev *idev);
+void ionic_dev_cmd_port_reset(struct ionic_dev *idev);
+void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state);
+void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed);
+void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
+void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
+void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
+
+void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
+void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
+			    dma_addr_t addr);
+void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index);
+void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
+			       u16 lif_index, u16 intr_index);
+
+int ionic_db_page_num(struct ionic_lif *lif, int pid);
+
+int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
+		  struct ionic_intr_info *intr,
+		  unsigned int num_descs, size_t desc_size);
+void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa);
+void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q);
+typedef bool (*ionic_cq_cb)(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+typedef void (*ionic_cq_done_cb)(void *done_arg);
+unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
+			      ionic_cq_cb cb, ionic_cq_done_cb done_cb,
+			      void *done_arg);
+
+int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
+		 struct ionic_queue *q, unsigned int index, const char *name,
+		 unsigned int num_descs, size_t desc_size,
+		 size_t sg_desc_size, unsigned int pid);
+void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
+void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
+		  void *cb_arg);
+void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
+void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
+		     unsigned int stop_index);
+
+#endif /* _IONIC_DEV_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
new file mode 100644
index 0000000..af1647a
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c

@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_devlink.h"
+
+static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+			     struct netlink_ext_ack *extack)
+{
+	struct ionic *ionic = devlink_priv(dl);
+	struct ionic_dev *idev = &ionic->idev;
+	char buf[16];
+	int err = 0;
+
+	err = devlink_info_driver_name_put(req, IONIC_DRV_NAME);
+	if (err)
+		goto info_out;
+
+	err = devlink_info_version_running_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       idev->dev_info.fw_version);
+	if (err)
+		goto info_out;
+
+	snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type);
+	err = devlink_info_version_fixed_put(req,
+					     DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+					     buf);
+	if (err)
+		goto info_out;
+
+	snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev);
+	err = devlink_info_version_fixed_put(req,
+					     DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
+					     buf);
+	if (err)
+		goto info_out;
+
+	err = devlink_info_serial_number_put(req, idev->dev_info.serial_num);
+
+info_out:
+	return err;
+}
+
+static const struct devlink_ops ionic_dl_ops = {
+	.info_get	= ionic_dl_info_get,
+};
+
+struct ionic *ionic_devlink_alloc(struct device *dev)
+{
+	struct devlink *dl;
+
+	dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+
+	return devlink_priv(dl);
+}
+
+void ionic_devlink_free(struct ionic *ionic)
+{
+	struct devlink *dl = priv_to_devlink(ionic);
+
+	devlink_free(dl);
+}
+
+int ionic_devlink_register(struct ionic *ionic)
+{
+	struct devlink *dl = priv_to_devlink(ionic);
+	int err;
+
+	err = devlink_register(dl, ionic->dev);
+	if (err) {
+		dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
+		return err;
+	}
+
+	devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+			       0, false, 0, NULL, 0);
+	err = devlink_port_register(dl, &ionic->dl_port, 0);
+	if (err)
+		dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
+	else
+		devlink_port_type_eth_set(&ionic->dl_port,
+					  ionic->master_lif->netdev);
+
+	return err;
+}
+
+void ionic_devlink_unregister(struct ionic *ionic)
+{
+	struct devlink *dl = priv_to_devlink(ionic);
+
+	devlink_port_unregister(&ionic->dl_port);
+	devlink_unregister(dl);
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.h b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
new file mode 100644
index 0000000..0690172
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h

@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEVLINK_H_
+#define _IONIC_DEVLINK_H_
+
+#include <net/devlink.h>
+
+struct ionic *ionic_devlink_alloc(struct device *dev);
+void ionic_devlink_free(struct ionic *ionic);
+int ionic_devlink_register(struct ionic *ionic);
+void ionic_devlink_unregister(struct ionic *ionic);
+
+#endif /* _IONIC_DEVLINK_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
new file mode 100644
index 0000000..7d10265
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c

@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_ethtool.h"
+#include "ionic_stats.h"
+
+static const char ionic_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define PRIV_F_SW_DBG_STATS		BIT(0)
+	"sw-dbg-stats",
+};
+#define PRIV_FLAGS_COUNT ARRAY_SIZE(ionic_priv_flags_strings)
+
+static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf)
+{
+	u32 i;
+
+	for (i = 0; i < ionic_num_stats_grps; i++)
+		ionic_stats_groups[i].get_strings(lif, &buf);
+}
+
+static void ionic_get_stats(struct net_device *netdev,
+			    struct ethtool_stats *stats, u64 *buf)
+{
+	struct ionic_lif *lif;
+	u32 i;
+
+	lif = netdev_priv(netdev);
+
+	memset(buf, 0, stats->n_stats * sizeof(*buf));
+	for (i = 0; i < ionic_num_stats_grps; i++)
+		ionic_stats_groups[i].get_values(lif, &buf);
+}
+
+static int ionic_get_stats_count(struct ionic_lif *lif)
+{
+	int i, num_stats = 0;
+
+	for (i = 0; i < ionic_num_stats_grps; i++)
+		num_stats += ionic_stats_groups[i].get_count(lif);
+
+	return num_stats;
+}
+
+static int ionic_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int count = 0;
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		count = ionic_get_stats_count(lif);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		count = PRIV_FLAGS_COUNT;
+		break;
+	}
+	return count;
+}
+
+static void ionic_get_strings(struct net_device *netdev,
+			      u32 sset, u8 *buf)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		ionic_get_stats_strings(lif, buf);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(buf, ionic_priv_flags_strings,
+		       PRIV_FLAGS_COUNT * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static void ionic_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+
+	strlcpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, IONIC_DRV_VERSION, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version,
+		sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, ionic_bus_info(ionic),
+		sizeof(drvinfo->bus_info));
+}
+
+static int ionic_get_regs_len(struct net_device *netdev)
+{
+	return (IONIC_DEV_INFO_REG_COUNT + IONIC_DEV_CMD_REG_COUNT) * sizeof(u32);
+}
+
+static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+			   void *p)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	unsigned int size;
+
+	regs->version = IONIC_DEV_CMD_REG_VERSION;
+
+	size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32);
+	memcpy_fromio(p, lif->ionic->idev.dev_info_regs->words, size);
+
+	size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32);
+	memcpy_fromio(p, lif->ionic->idev.dev_cmd_regs->words, size);
+}
+
+static int ionic_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *ks)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_dev *idev = &lif->ionic->idev;
+	int copper_seen = 0;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+
+	/* The port_info data is found in a DMA space that the NIC keeps
+	 * up-to-date, so there's no need to request the data from the
+	 * NIC, we already have it in our memory space.
+	 */
+
+	switch (le16_to_cpu(idev->port_info->status.xcvr.pid)) {
+		/* Copper */
+	case IONIC_XCVR_PID_QSFP_100G_CR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		copper_seen++;
+		break;
+	case IONIC_XCVR_PID_QSFP_40GBASE_CR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		copper_seen++;
+		break;
+	case IONIC_XCVR_PID_SFP_25GBASE_CR_S:
+	case IONIC_XCVR_PID_SFP_25GBASE_CR_L:
+	case IONIC_XCVR_PID_SFP_25GBASE_CR_N:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		copper_seen++;
+		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_AOC:
+	case IONIC_XCVR_PID_SFP_10GBASE_CU:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseCR_Full);
+		copper_seen++;
+		break;
+
+		/* Fibre */
+	case IONIC_XCVR_PID_QSFP_100G_SR4:
+	case IONIC_XCVR_PID_QSFP_100G_AOC:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseSR4_Full);
+		break;
+	case IONIC_XCVR_PID_QSFP_100G_LR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseLR4_ER4_Full);
+		break;
+	case IONIC_XCVR_PID_QSFP_100G_ER4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseLR4_ER4_Full);
+		break;
+	case IONIC_XCVR_PID_QSFP_40GBASE_SR4:
+	case IONIC_XCVR_PID_QSFP_40GBASE_AOC:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		break;
+	case IONIC_XCVR_PID_QSFP_40GBASE_LR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_25GBASE_SR:
+	case IONIC_XCVR_PID_SFP_25GBASE_AOC:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_SR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_LR:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_LRM:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLRM_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_ER:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseER_Full);
+		break;
+	case IONIC_XCVR_PID_UNKNOWN:
+		/* This means there's no module plugged in */
+		break;
+	default:
+		dev_info(lif->ionic->dev, "unknown xcvr type pid=%d / 0x%x\n",
+			 idev->port_info->status.xcvr.pid,
+			 idev->port_info->status.xcvr.pid);
+		break;
+	}
+
+	bitmap_copy(ks->link_modes.advertising, ks->link_modes.supported,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+	if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_FC)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_BASER);
+	else if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_RS)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+	ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+	if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_COPPER ||
+	    copper_seen)
+		ks->base.port = PORT_DA;
+	else if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_FIBER)
+		ks->base.port = PORT_FIBRE;
+	else
+		ks->base.port = PORT_NONE;
+
+	if (ks->base.port != PORT_NONE) {
+		ks->base.speed = le32_to_cpu(lif->info->status.link_speed);
+
+		if (le16_to_cpu(lif->info->status.link_status))
+			ks->base.duplex = DUPLEX_FULL;
+		else
+			ks->base.duplex = DUPLEX_UNKNOWN;
+
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+
+		if (idev->port_info->config.an_enable) {
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     Autoneg);
+			ks->base.autoneg = AUTONEG_ENABLE;
+		}
+	}
+
+	return 0;
+}
+
+static int ionic_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *ks)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	struct ionic_dev *idev;
+	u32 req_rs, req_fc;
+	u8 fec_type;
+	int err = 0;
+
+	idev = &lif->ionic->idev;
+	fec_type = IONIC_PORT_FEC_TYPE_NONE;
+
+	/* set autoneg */
+	if (ks->base.autoneg != idev->port_info->config.an_enable) {
+		mutex_lock(&ionic->dev_cmd_lock);
+		ionic_dev_cmd_port_autoneg(idev, ks->base.autoneg);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&ionic->dev_cmd_lock);
+		if (err)
+			return err;
+	}
+
+	/* set speed */
+	if (ks->base.speed != le32_to_cpu(idev->port_info->config.speed)) {
+		mutex_lock(&ionic->dev_cmd_lock);
+		ionic_dev_cmd_port_speed(idev, ks->base.speed);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&ionic->dev_cmd_lock);
+		if (err)
+			return err;
+	}
+
+	/* set FEC */
+	req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS);
+	req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER);
+	if (req_rs && req_fc) {
+		netdev_info(netdev, "Only select one FEC mode at a time\n");
+		return -EINVAL;
+	} else if (req_fc) {
+		fec_type = IONIC_PORT_FEC_TYPE_FC;
+	} else if (req_rs) {
+		fec_type = IONIC_PORT_FEC_TYPE_RS;
+	} else if (!(req_rs | req_fc)) {
+		fec_type = IONIC_PORT_FEC_TYPE_NONE;
+	}
+
+	if (fec_type != idev->port_info->config.fec_type) {
+		mutex_lock(&ionic->dev_cmd_lock);
+		ionic_dev_cmd_port_fec(idev, fec_type);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&ionic->dev_cmd_lock);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void ionic_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u8 pause_type;
+
+	pause->autoneg = 0;
+
+	pause_type = lif->ionic->idev.port_info->config.pause_type;
+	if (pause_type) {
+		pause->rx_pause = pause_type & IONIC_PAUSE_F_RX ? 1 : 0;
+		pause->tx_pause = pause_type & IONIC_PAUSE_F_TX ? 1 : 0;
+	}
+}
+
+static int ionic_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	u32 requested_pause;
+	int err;
+
+	if (pause->autoneg)
+		return -EOPNOTSUPP;
+
+	/* change both at the same time */
+	requested_pause = IONIC_PORT_PAUSE_TYPE_LINK;
+	if (pause->rx_pause)
+		requested_pause |= IONIC_PAUSE_F_RX;
+	if (pause->tx_pause)
+		requested_pause |= IONIC_PAUSE_F_TX;
+
+	if (requested_pause == lif->ionic->idev.port_info->config.pause_type)
+		return 0;
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_port_pause(&lif->ionic->idev, requested_pause);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int ionic_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *coalesce)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	/* Tx uses Rx interrupt */
+	coalesce->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+	coalesce->rx_coalesce_usecs = lif->rx_coalesce_usecs;
+
+	return 0;
+}
+
+static int ionic_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *coalesce)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_identity *ident;
+	struct ionic_qcq *qcq;
+	unsigned int i;
+	u32 usecs;
+	u32 coal;
+
+	if (coalesce->rx_max_coalesced_frames ||
+	    coalesce->rx_coalesce_usecs_irq ||
+	    coalesce->rx_max_coalesced_frames_irq ||
+	    coalesce->tx_max_coalesced_frames ||
+	    coalesce->tx_coalesce_usecs_irq ||
+	    coalesce->tx_max_coalesced_frames_irq ||
+	    coalesce->stats_block_coalesce_usecs ||
+	    coalesce->use_adaptive_rx_coalesce ||
+	    coalesce->use_adaptive_tx_coalesce ||
+	    coalesce->pkt_rate_low ||
+	    coalesce->rx_coalesce_usecs_low ||
+	    coalesce->rx_max_coalesced_frames_low ||
+	    coalesce->tx_coalesce_usecs_low ||
+	    coalesce->tx_max_coalesced_frames_low ||
+	    coalesce->pkt_rate_high ||
+	    coalesce->rx_coalesce_usecs_high ||
+	    coalesce->rx_max_coalesced_frames_high ||
+	    coalesce->tx_coalesce_usecs_high ||
+	    coalesce->tx_max_coalesced_frames_high ||
+	    coalesce->rate_sample_interval)
+		return -EINVAL;
+
+	ident = &lif->ionic->ident;
+	if (ident->dev.intr_coal_div == 0) {
+		netdev_warn(netdev, "bad HW value in dev.intr_coal_div = %d\n",
+			    ident->dev.intr_coal_div);
+		return -EIO;
+	}
+
+	/* Tx uses Rx interrupt, so only change Rx */
+	if (coalesce->tx_coalesce_usecs != lif->rx_coalesce_usecs) {
+		netdev_warn(netdev, "only the rx-usecs can be changed\n");
+		return -EINVAL;
+	}
+
+	coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs);
+
+	if (coal > IONIC_INTR_CTRL_COAL_MAX)
+		return -ERANGE;
+
+	/* If they asked for non-zero and it resolved to zero, bump it up */
+	if (!coal && coalesce->rx_coalesce_usecs)
+		coal = 1;
+
+	/* Convert it back to get device resolution */
+	usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+
+	if (usecs != lif->rx_coalesce_usecs) {
+		lif->rx_coalesce_usecs = usecs;
+
+		if (test_bit(IONIC_LIF_UP, lif->state)) {
+			for (i = 0; i < lif->nxqs; i++) {
+				qcq = lif->rxqcqs[i].qcq;
+				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+						     qcq->intr.index, coal);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void ionic_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	ring->tx_max_pending = IONIC_MAX_TXRX_DESC;
+	ring->tx_pending = lif->ntxq_descs;
+	ring->rx_max_pending = IONIC_MAX_TXRX_DESC;
+	ring->rx_pending = lif->nrxq_descs;
+}
+
+static int ionic_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	bool running;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
+		netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
+		return -EINVAL;
+	}
+
+	if (!is_power_of_2(ring->tx_pending) ||
+	    !is_power_of_2(ring->rx_pending)) {
+		netdev_info(netdev, "Descriptor count must be a power of 2\n");
+		return -EINVAL;
+	}
+
+	/* if nothing to do return success */
+	if (ring->tx_pending == lif->ntxq_descs &&
+	    ring->rx_pending == lif->nrxq_descs)
+		return 0;
+
+	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+		return -EBUSY;
+
+	running = test_bit(IONIC_LIF_UP, lif->state);
+	if (running)
+		ionic_stop(netdev);
+
+	lif->ntxq_descs = ring->tx_pending;
+	lif->nrxq_descs = ring->rx_pending;
+
+	if (running)
+		ionic_open(netdev);
+	clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+	return 0;
+}
+
+static void ionic_get_channels(struct net_device *netdev,
+			       struct ethtool_channels *ch)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	/* report maximum channels */
+	ch->max_combined = lif->ionic->ntxqs_per_lif;
+
+	/* report current channels */
+	ch->combined_count = lif->nxqs;
+}
+
+static int ionic_set_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	bool running;
+
+	if (!ch->combined_count || ch->other_count ||
+	    ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	if (ch->combined_count == lif->nxqs)
+		return 0;
+
+	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+		return -EBUSY;
+
+	running = test_bit(IONIC_LIF_UP, lif->state);
+	if (running)
+		ionic_stop(netdev);
+
+	lif->nxqs = ch->combined_count;
+
+	if (running)
+		ionic_open(netdev);
+	clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+	return 0;
+}
+
+static u32 ionic_get_priv_flags(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state))
+		priv_flags |= PRIV_F_SW_DBG_STATS;
+
+	return priv_flags;
+}
+
+static int ionic_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u32 flags = lif->flags;
+
+	clear_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state);
+	if (priv_flags & PRIV_F_SW_DBG_STATS)
+		set_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state);
+
+	if (flags != lif->flags)
+		lif->flags = flags;
+
+	return 0;
+}
+
+static int ionic_get_rxnfc(struct net_device *netdev,
+			   struct ethtool_rxnfc *info, u32 *rules)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int err = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data = lif->nxqs;
+		break;
+	default:
+		netdev_err(netdev, "Command parameter %d is not supported\n",
+			   info->cmd);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static u32 ionic_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	return le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+}
+
+static u32 ionic_get_rxfh_key_size(struct net_device *netdev)
+{
+	return IONIC_RSS_HASH_KEY_SIZE;
+}
+
+static int ionic_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			  u8 *hfunc)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	unsigned int i, tbl_sz;
+
+	if (indir) {
+		tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+		for (i = 0; i < tbl_sz; i++)
+			indir[i] = lif->rss_ind_tbl[i];
+	}
+
+	if (key)
+		memcpy(key, lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	return 0;
+}
+
+static int ionic_set_rxfh(struct net_device *netdev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int err;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	err = ionic_lif_rss_config(lif, lif->rss_types, key, indir);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int ionic_set_tunable(struct net_device *dev,
+			     const struct ethtool_tunable *tuna,
+			     const void *data)
+{
+	struct ionic_lif *lif = netdev_priv(dev);
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		lif->rx_copybreak = *(u32 *)data;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int ionic_get_tunable(struct net_device *netdev,
+			     const struct ethtool_tunable *tuna, void *data)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = lif->rx_copybreak;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int ionic_get_module_info(struct net_device *netdev,
+				 struct ethtool_modinfo *modinfo)
+
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct ionic_xcvr_status *xcvr;
+
+	xcvr = &idev->port_info->status.xcvr;
+
+	/* report the module data type and length */
+	switch (xcvr->sprom[0]) {
+	case 0x03: /* SFP */
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		break;
+	case 0x0D: /* QSFP */
+	case 0x11: /* QSFP28 */
+		modinfo->type = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		break;
+	default:
+		netdev_info(netdev, "unknown xcvr type 0x%02x\n",
+			    xcvr->sprom[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static int ionic_get_module_eeprom(struct net_device *netdev,
+				   struct ethtool_eeprom *ee,
+				   u8 *data)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct ionic_xcvr_status *xcvr;
+	char tbuf[sizeof(xcvr->sprom)];
+	int count = 10;
+	u32 len;
+
+	/* The NIC keeps the module prom up-to-date in the DMA space
+	 * so we can simply copy the module bytes into the data buffer.
+	 */
+	xcvr = &idev->port_info->status.xcvr;
+	len = min_t(u32, sizeof(xcvr->sprom), ee->len);
+
+	do {
+		memcpy(data, xcvr->sprom, len);
+		memcpy(tbuf, xcvr->sprom, len);
+
+		/* Let's make sure we got a consistent copy */
+		if (!memcmp(data, tbuf, len))
+			break;
+
+	} while (--count);
+
+	if (!count)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int ionic_nway_reset(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	int err = 0;
+
+	/* flap the link to force auto-negotiation */
+
+	mutex_lock(&ionic->dev_cmd_lock);
+
+	ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_DOWN);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+	if (!err) {
+		ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	}
+
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
+static const struct ethtool_ops ionic_ethtool_ops = {
+	.get_drvinfo		= ionic_get_drvinfo,
+	.get_regs_len		= ionic_get_regs_len,
+	.get_regs		= ionic_get_regs,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= ionic_get_link_ksettings,
+	.get_coalesce		= ionic_get_coalesce,
+	.set_coalesce		= ionic_set_coalesce,
+	.get_ringparam		= ionic_get_ringparam,
+	.set_ringparam		= ionic_set_ringparam,
+	.get_channels		= ionic_get_channels,
+	.set_channels		= ionic_set_channels,
+	.get_strings		= ionic_get_strings,
+	.get_ethtool_stats	= ionic_get_stats,
+	.get_sset_count		= ionic_get_sset_count,
+	.get_priv_flags		= ionic_get_priv_flags,
+	.set_priv_flags		= ionic_set_priv_flags,
+	.get_rxnfc		= ionic_get_rxnfc,
+	.get_rxfh_indir_size	= ionic_get_rxfh_indir_size,
+	.get_rxfh_key_size	= ionic_get_rxfh_key_size,
+	.get_rxfh		= ionic_get_rxfh,
+	.set_rxfh		= ionic_set_rxfh,
+	.get_tunable		= ionic_get_tunable,
+	.set_tunable		= ionic_set_tunable,
+	.get_module_info	= ionic_get_module_info,
+	.get_module_eeprom	= ionic_get_module_eeprom,
+	.get_pauseparam		= ionic_get_pauseparam,
+	.set_pauseparam		= ionic_set_pauseparam,
+	.set_link_ksettings	= ionic_set_link_ksettings,
+	.nway_reset		= ionic_nway_reset,
+};
+
+void ionic_ethtool_set_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ionic_ethtool_ops;
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h
new file mode 100644
index 0000000..38b91b1
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h

@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_ETHTOOL_H_
+#define _IONIC_ETHTOOL_H_
+
+void ionic_ethtool_set_ops(struct net_device *netdev);
+
+#endif /* _IONIC_ETHTOOL_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
new file mode 100644
index 0000000..5bfdda1
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h

@@ -0,0 +1,2482 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* Copyright (c) 2017-2019 Pensando Systems, Inc.  All rights reserved. */
+
+#ifndef _IONIC_IF_H_
+#define _IONIC_IF_H_
+
+#pragma pack(push, 1)
+
+#define IONIC_DEV_INFO_SIGNATURE		0x44455649      /* 'DEVI' */
+#define IONIC_DEV_INFO_VERSION			1
+#define IONIC_IFNAMSIZ				16
+
+/**
+ * Commands
+ */
+enum ionic_cmd_opcode {
+	IONIC_CMD_NOP				= 0,
+
+	/* Device commands */
+	IONIC_CMD_IDENTIFY			= 1,
+	IONIC_CMD_INIT				= 2,
+	IONIC_CMD_RESET				= 3,
+	IONIC_CMD_GETATTR			= 4,
+	IONIC_CMD_SETATTR			= 5,
+
+	/* Port commands */
+	IONIC_CMD_PORT_IDENTIFY			= 10,
+	IONIC_CMD_PORT_INIT			= 11,
+	IONIC_CMD_PORT_RESET			= 12,
+	IONIC_CMD_PORT_GETATTR			= 13,
+	IONIC_CMD_PORT_SETATTR			= 14,
+
+	/* LIF commands */
+	IONIC_CMD_LIF_IDENTIFY			= 20,
+	IONIC_CMD_LIF_INIT			= 21,
+	IONIC_CMD_LIF_RESET			= 22,
+	IONIC_CMD_LIF_GETATTR			= 23,
+	IONIC_CMD_LIF_SETATTR			= 24,
+
+	IONIC_CMD_RX_MODE_SET			= 30,
+	IONIC_CMD_RX_FILTER_ADD			= 31,
+	IONIC_CMD_RX_FILTER_DEL			= 32,
+
+	/* Queue commands */
+	IONIC_CMD_Q_INIT			= 40,
+	IONIC_CMD_Q_CONTROL			= 41,
+
+	/* RDMA commands */
+	IONIC_CMD_RDMA_RESET_LIF		= 50,
+	IONIC_CMD_RDMA_CREATE_EQ		= 51,
+	IONIC_CMD_RDMA_CREATE_CQ		= 52,
+	IONIC_CMD_RDMA_CREATE_ADMINQ		= 53,
+
+	/* QoS commands */
+	IONIC_CMD_QOS_CLASS_IDENTIFY		= 240,
+	IONIC_CMD_QOS_CLASS_INIT		= 241,
+	IONIC_CMD_QOS_CLASS_RESET		= 242,
+
+	/* Firmware commands */
+	IONIC_CMD_FW_DOWNLOAD			= 254,
+	IONIC_CMD_FW_CONTROL			= 255,
+};
+
+/**
+ * Command Return codes
+ */
+enum ionic_status_code {
+	IONIC_RC_SUCCESS	= 0,	/* Success */
+	IONIC_RC_EVERSION	= 1,	/* Incorrect version for request */
+	IONIC_RC_EOPCODE	= 2,	/* Invalid cmd opcode */
+	IONIC_RC_EIO		= 3,	/* I/O error */
+	IONIC_RC_EPERM		= 4,	/* Permission denied */
+	IONIC_RC_EQID		= 5,	/* Bad qid */
+	IONIC_RC_EQTYPE		= 6,	/* Bad qtype */
+	IONIC_RC_ENOENT		= 7,	/* No such element */
+	IONIC_RC_EINTR		= 8,	/* operation interrupted */
+	IONIC_RC_EAGAIN		= 9,	/* Try again */
+	IONIC_RC_ENOMEM		= 10,	/* Out of memory */
+	IONIC_RC_EFAULT		= 11,	/* Bad address */
+	IONIC_RC_EBUSY		= 12,	/* Device or resource busy */
+	IONIC_RC_EEXIST		= 13,	/* object already exists */
+	IONIC_RC_EINVAL		= 14,	/* Invalid argument */
+	IONIC_RC_ENOSPC		= 15,	/* No space left or alloc failure */
+	IONIC_RC_ERANGE		= 16,	/* Parameter out of range */
+	IONIC_RC_BAD_ADDR	= 17,	/* Descriptor contains a bad ptr */
+	IONIC_RC_DEV_CMD	= 18,	/* Device cmd attempted on AdminQ */
+	IONIC_RC_ENOSUPP	= 19,	/* Operation not supported */
+	IONIC_RC_ERROR		= 29,	/* Generic error */
+
+	IONIC_RC_ERDMA		= 30,	/* Generic RDMA error */
+};
+
+enum ionic_notifyq_opcode {
+	IONIC_EVENT_LINK_CHANGE		= 1,
+	IONIC_EVENT_RESET		= 2,
+	IONIC_EVENT_HEARTBEAT		= 3,
+	IONIC_EVENT_LOG			= 4,
+};
+
+/**
+ * struct cmd - General admin command format
+ * @opcode:     Opcode for the command
+ * @lif_index:  LIF index
+ * @cmd_data:   Opcode-specific command bytes
+ */
+struct ionic_admin_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	u8     cmd_data[60];
+};
+
+/**
+ * struct admin_comp - General admin command completion format
+ * @status:     The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ *              is the completion.
+ * @cmd_data:   Command-specific bytes.
+ * @color:      Color bit.  (Always 0 for commands issued to the
+ *              Device Cmd Registers.)
+ */
+struct ionic_admin_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     cmd_data[11];
+	u8     color;
+#define IONIC_COMP_COLOR_MASK  0x80
+};
+
+static inline u8 color_match(u8 color, u8 done_color)
+{
+	return (!!(color & IONIC_COMP_COLOR_MASK)) == done_color;
+}
+
+/**
+ * struct nop_cmd - NOP command
+ * @opcode: opcode
+ */
+struct ionic_nop_cmd {
+	u8 opcode;
+	u8 rsvd[63];
+};
+
+/**
+ * struct nop_comp - NOP command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_nop_comp {
+	u8 status;
+	u8 rsvd[15];
+};
+
+/**
+ * struct dev_init_cmd - Device init command
+ * @opcode:    opcode
+ * @type:      device type
+ */
+struct ionic_dev_init_cmd {
+	u8     opcode;
+	u8     type;
+	u8     rsvd[62];
+};
+
+/**
+ * struct init_comp - Device init command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_dev_init_comp {
+	u8 status;
+	u8 rsvd[15];
+};
+
+/**
+ * struct dev_reset_cmd - Device reset command
+ * @opcode: opcode
+ */
+struct ionic_dev_reset_cmd {
+	u8 opcode;
+	u8 rsvd[63];
+};
+
+/**
+ * struct reset_comp - Reset command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_dev_reset_comp {
+	u8 status;
+	u8 rsvd[15];
+};
+
+#define IONIC_IDENTITY_VERSION_1	1
+
+/**
+ * struct dev_identify_cmd - Driver/device identify command
+ * @opcode:  opcode
+ * @ver:     Highest version of identify supported by driver
+ */
+struct ionic_dev_identify_cmd {
+	u8 opcode;
+	u8 ver;
+	u8 rsvd[62];
+};
+
+/**
+ * struct dev_identify_comp - Driver/device identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver:    Version of identify returned by device
+ */
+struct ionic_dev_identify_comp {
+	u8 status;
+	u8 ver;
+	u8 rsvd[14];
+};
+
+enum ionic_os_type {
+	IONIC_OS_TYPE_LINUX   = 1,
+	IONIC_OS_TYPE_WIN     = 2,
+	IONIC_OS_TYPE_DPDK    = 3,
+	IONIC_OS_TYPE_FREEBSD = 4,
+	IONIC_OS_TYPE_IPXE    = 5,
+	IONIC_OS_TYPE_ESXI    = 6,
+};
+
+/**
+ * union drv_identity - driver identity information
+ * @os_type:          OS type (see enum os_type)
+ * @os_dist:          OS distribution, numeric format
+ * @os_dist_str:      OS distribution, string format
+ * @kernel_ver:       Kernel version, numeric format
+ * @kernel_ver_str:   Kernel version, string format
+ * @driver_ver_str:   Driver version, string format
+ */
+union ionic_drv_identity {
+	struct {
+		__le32 os_type;
+		__le32 os_dist;
+		char   os_dist_str[128];
+		__le32 kernel_ver;
+		char   kernel_ver_str[32];
+		char   driver_ver_str[32];
+	};
+	__le32 words[512];
+};
+
+/**
+ * union dev_identity - device identity information
+ * @version:          Version of device identify
+ * @type:             Identify type (0 for now)
+ * @nports:           Number of ports provisioned
+ * @nlifs:            Number of LIFs provisioned
+ * @nintrs:           Number of interrupts provisioned
+ * @ndbpgs_per_lif:   Number of doorbell pages per LIF
+ * @intr_coal_mult:   Interrupt coalescing multiplication factor.
+ *                    Scale user-supplied interrupt coalescing
+ *                    value in usecs to device units using:
+ *                    device units = usecs * mult / div
+ * @intr_coal_div:    Interrupt coalescing division factor.
+ *                    Scale user-supplied interrupt coalescing
+ *                    value in usecs to device units using:
+ *                    device units = usecs * mult / div
+ *
+ */
+union ionic_dev_identity {
+	struct {
+		u8     version;
+		u8     type;
+		u8     rsvd[2];
+		u8     nports;
+		u8     rsvd2[3];
+		__le32 nlifs;
+		__le32 nintrs;
+		__le32 ndbpgs_per_lif;
+		__le32 intr_coal_mult;
+		__le32 intr_coal_div;
+	};
+	__le32 words[512];
+};
+
+enum ionic_lif_type {
+	IONIC_LIF_TYPE_CLASSIC = 0,
+	IONIC_LIF_TYPE_MACVLAN = 1,
+	IONIC_LIF_TYPE_NETQUEUE = 2,
+};
+
+/**
+ * struct lif_identify_cmd - lif identify command
+ * @opcode:  opcode
+ * @type:    lif type (enum lif_type)
+ * @ver:     version of identify returned by device
+ */
+struct ionic_lif_identify_cmd {
+	u8 opcode;
+	u8 type;
+	u8 ver;
+	u8 rsvd[61];
+};
+
+/**
+ * struct lif_identify_comp - lif identify command completion
+ * @status:  status of the command (enum status_code)
+ * @ver:     version of identify returned by device
+ */
+struct ionic_lif_identify_comp {
+	u8 status;
+	u8 ver;
+	u8 rsvd2[14];
+};
+
+enum ionic_lif_capability {
+	IONIC_LIF_CAP_ETH        = BIT(0),
+	IONIC_LIF_CAP_RDMA       = BIT(1),
+};
+
+/**
+ * Logical Queue Types
+ */
+enum ionic_logical_qtype {
+	IONIC_QTYPE_ADMINQ  = 0,
+	IONIC_QTYPE_NOTIFYQ = 1,
+	IONIC_QTYPE_RXQ     = 2,
+	IONIC_QTYPE_TXQ     = 3,
+	IONIC_QTYPE_EQ      = 4,
+	IONIC_QTYPE_MAX     = 16,
+};
+
+/**
+ * struct lif_logical_qtype - Descriptor of logical to hardware queue type.
+ * @qtype:          Hardware Queue Type.
+ * @qid_count:      Number of Queue IDs of the logical type.
+ * @qid_base:       Minimum Queue ID of the logical type.
+ */
+struct ionic_lif_logical_qtype {
+	u8     qtype;
+	u8     rsvd[3];
+	__le32 qid_count;
+	__le32 qid_base;
+};
+
+enum ionic_lif_state {
+	IONIC_LIF_DISABLE	= 0,
+	IONIC_LIF_ENABLE	= 1,
+	IONIC_LIF_HANG_RESET	= 2,
+};
+
+/**
+ * LIF configuration
+ * @state:          lif state (enum lif_state)
+ * @name:           lif name
+ * @mtu:            mtu
+ * @mac:            station mac address
+ * @features:       features (enum eth_hw_features)
+ * @queue_count:    queue counts per queue-type
+ */
+union ionic_lif_config {
+	struct {
+		u8     state;
+		u8     rsvd[3];
+		char   name[IONIC_IFNAMSIZ];
+		__le32 mtu;
+		u8     mac[6];
+		u8     rsvd2[2];
+		__le64 features;
+		__le32 queue_count[IONIC_QTYPE_MAX];
+	};
+	__le32 words[64];
+};
+
+/**
+ * struct lif_identity - lif identity information (type-specific)
+ *
+ * @capabilities    LIF capabilities
+ *
+ * Ethernet:
+ *     @version:          Ethernet identify structure version.
+ *     @features:         Ethernet features supported on this lif type.
+ *     @max_ucast_filters:  Number of perfect unicast addresses supported.
+ *     @max_mcast_filters:  Number of perfect multicast addresses supported.
+ *     @min_frame_size:   Minimum size of frames to be sent
+ *     @max_frame_size:   Maximim size of frames to be sent
+ *     @config:           LIF config struct with features, mtu, mac, q counts
+ *
+ * RDMA:
+ *     @version:         RDMA version of opcodes and queue descriptors.
+ *     @qp_opcodes:      Number of rdma queue pair opcodes supported.
+ *     @admin_opcodes:   Number of rdma admin opcodes supported.
+ *     @npts_per_lif:    Page table size per lif
+ *     @nmrs_per_lif:    Number of memory regions per lif
+ *     @nahs_per_lif:    Number of address handles per lif
+ *     @max_stride:      Max work request stride.
+ *     @cl_stride:       Cache line stride.
+ *     @pte_stride:      Page table entry stride.
+ *     @rrq_stride:      Remote RQ work request stride.
+ *     @rsq_stride:      Remote SQ work request stride.
+ *     @dcqcn_profiles:  Number of DCQCN profiles
+ *     @aq_qtype:        RDMA Admin Qtype.
+ *     @sq_qtype:        RDMA Send Qtype.
+ *     @rq_qtype:        RDMA Receive Qtype.
+ *     @cq_qtype:        RDMA Completion Qtype.
+ *     @eq_qtype:        RDMA Event Qtype.
+ */
+union ionic_lif_identity {
+	struct {
+		__le64 capabilities;
+
+		struct {
+			u8 version;
+			u8 rsvd[3];
+			__le32 max_ucast_filters;
+			__le32 max_mcast_filters;
+			__le16 rss_ind_tbl_sz;
+			__le32 min_frame_size;
+			__le32 max_frame_size;
+			u8 rsvd2[106];
+			union ionic_lif_config config;
+		} eth;
+
+		struct {
+			u8 version;
+			u8 qp_opcodes;
+			u8 admin_opcodes;
+			u8 rsvd;
+			__le32 npts_per_lif;
+			__le32 nmrs_per_lif;
+			__le32 nahs_per_lif;
+			u8 max_stride;
+			u8 cl_stride;
+			u8 pte_stride;
+			u8 rrq_stride;
+			u8 rsq_stride;
+			u8 dcqcn_profiles;
+			u8 rsvd_dimensions[10];
+			struct ionic_lif_logical_qtype aq_qtype;
+			struct ionic_lif_logical_qtype sq_qtype;
+			struct ionic_lif_logical_qtype rq_qtype;
+			struct ionic_lif_logical_qtype cq_qtype;
+			struct ionic_lif_logical_qtype eq_qtype;
+		} rdma;
+	};
+	__le32 words[512];
+};
+
+/**
+ * struct lif_init_cmd - LIF init command
+ * @opcode:       opcode
+ * @type:         LIF type (enum lif_type)
+ * @index:        LIF index
+ * @info_pa:      destination address for lif info (struct lif_info)
+ */
+struct ionic_lif_init_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 index;
+	__le32 rsvd;
+	__le64 info_pa;
+	u8     rsvd2[48];
+};
+
+/**
+ * struct lif_init_comp - LIF init command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_lif_init_comp {
+	u8 status;
+	u8 rsvd;
+	__le16 hw_index;
+	u8 rsvd2[12];
+};
+
+/**
+ * struct q_init_cmd - Queue init command
+ * @opcode:       opcode
+ * @type:         Logical queue type
+ * @ver:          Queue version (defines opcode/descriptor scope)
+ * @lif_index:    LIF index
+ * @index:        (lif, qtype) relative admin queue index
+ * @intr_index:   Interrupt control register index
+ * @pid:          Process ID
+ * @flags:
+ *    IRQ:        Interrupt requested on completion
+ *    ENA:        Enable the queue.  If ENA=0 the queue is initialized
+ *                but remains disabled, to be later enabled with the
+ *                Queue Enable command.  If ENA=1, then queue is
+ *                initialized and then enabled.
+ *    SG:         Enable Scatter-Gather on the queue.
+ *                in number of descs.  The actual ring size is
+ *                (1 << ring_size).  For example, to
+ *                select a ring size of 64 descriptors write
+ *                ring_size = 6.  The minimum ring_size value is 2
+ *                for a ring size of 4 descriptors.  The maximum
+ *                ring_size value is 16 for a ring size of 64k
+ *                descriptors.  Values of ring_size <2 and >16 are
+ *                reserved.
+ *    EQ:         Enable the Event Queue
+ * @cos:          Class of service for this queue.
+ * @ring_size:    Queue ring size, encoded as a log2(size)
+ * @ring_base:    Queue ring base address
+ * @cq_ring_base: Completion queue ring base address
+ * @sg_ring_base: Scatter/Gather ring base address
+ * @eq_index:	  Event queue index
+ */
+struct ionic_q_init_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	u8     type;
+	u8     ver;
+	u8     rsvd1[2];
+	__le32 index;
+	__le16 pid;
+	__le16 intr_index;
+	__le16 flags;
+#define IONIC_QINIT_F_IRQ	0x01	/* Request interrupt on completion */
+#define IONIC_QINIT_F_ENA	0x02	/* Enable the queue */
+#define IONIC_QINIT_F_SG	0x04	/* Enable scatter/gather on the queue */
+#define IONIC_QINIT_F_EQ	0x08	/* Enable event queue */
+#define IONIC_QINIT_F_DEBUG 0x80	/* Enable queue debugging */
+	u8     cos;
+	u8     ring_size;
+	__le64 ring_base;
+	__le64 cq_ring_base;
+	__le64 sg_ring_base;
+	__le32 eq_index;
+	u8     rsvd2[16];
+};
+
+/**
+ * struct q_init_comp - Queue init command completion
+ * @status:     The status of the command (enum status_code)
+ * @ver:        Queue version (defines opcode/descriptor scope)
+ * @comp_index: The index in the descriptor ring for which this
+ *              is the completion.
+ * @hw_index:   Hardware Queue ID
+ * @hw_type:    Hardware Queue type
+ * @color:      Color
+ */
+struct ionic_q_init_comp {
+	u8     status;
+	u8     ver;
+	__le16 comp_index;
+	__le32 hw_index;
+	u8     hw_type;
+	u8     rsvd2[6];
+	u8     color;
+};
+
+/* the device's internal addressing uses up to 52 bits */
+#define IONIC_ADDR_LEN		52
+#define IONIC_ADDR_MASK		(BIT_ULL(IONIC_ADDR_LEN) - 1)
+
+enum ionic_txq_desc_opcode {
+	IONIC_TXQ_DESC_OPCODE_CSUM_NONE = 0,
+	IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL = 1,
+	IONIC_TXQ_DESC_OPCODE_CSUM_HW = 2,
+	IONIC_TXQ_DESC_OPCODE_TSO = 3,
+};
+
+/**
+ * struct txq_desc - Ethernet Tx queue descriptor format
+ * @opcode:       Tx operation, see TXQ_DESC_OPCODE_*:
+ *
+ *                   IONIC_TXQ_DESC_OPCODE_CSUM_NONE:
+ *
+ *                      Non-offload send.  No segmentation,
+ *                      fragmentation or checksum calc/insertion is
+ *                      performed by device; packet is prepared
+ *                      to send by software stack and requires
+ *                      no further manipulation from device.
+ *
+ *                   IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL:
+ *
+ *                      Offload 16-bit L4 checksum
+ *                      calculation/insertion.  The device will
+ *                      calculate the L4 checksum value and
+ *                      insert the result in the packet's L4
+ *                      header checksum field.  The L4 checksum
+ *                      is calculated starting at @csum_start bytes
+ *                      into the packet to the end of the packet.
+ *                      The checksum insertion position is given
+ *                      in @csum_offset.  This feature is only
+ *                      applicable to protocols such as TCP, UDP
+ *                      and ICMP where a standard (i.e. the
+ *                      'IP-style' checksum) one's complement
+ *                      16-bit checksum is used, using an IP
+ *                      pseudo-header to seed the calculation.
+ *                      Software will preload the L4 checksum
+ *                      field with the IP pseudo-header checksum.
+ *
+ *                      For tunnel encapsulation, @csum_start and
+ *                      @csum_offset refer to the inner L4
+ *                      header.  Supported tunnels encapsulations
+ *                      are: IPIP, GRE, and UDP.  If the @encap
+ *                      is clear, no further processing by the
+ *                      device is required; software will
+ *                      calculate the outer header checksums.  If
+ *                      the @encap is set, the device will
+ *                      offload the outer header checksums using
+ *                      LCO (local checksum offload) (see
+ *                      Documentation/networking/checksum-
+ *                      offloads.txt for more info).
+ *
+ *                   IONIC_TXQ_DESC_OPCODE_CSUM_HW:
+ *
+ *                      Offload 16-bit checksum computation to hardware.
+ *                      If @csum_l3 is set then the packet's L3 checksum is
+ *                      updated. Similarly, if @csum_l4 is set the the L4
+ *                      checksum is updated. If @encap is set then encap header
+ *                      checksums are also updated.
+ *
+ *                   IONIC_TXQ_DESC_OPCODE_TSO:
+ *
+ *                      Device preforms TCP segmentation offload
+ *                      (TSO).  @hdr_len is the number of bytes
+ *                      to the end of TCP header (the offset to
+ *                      the TCP payload).  @mss is the desired
+ *                      MSS, the TCP payload length for each
+ *                      segment.  The device will calculate/
+ *                      insert IP (IPv4 only) and TCP checksums
+ *                      for each segment.  In the first data
+ *                      buffer containing the header template,
+ *                      the driver will set IPv4 checksum to 0
+ *                      and preload TCP checksum with the IP
+ *                      pseudo header calculated with IP length = 0.
+ *
+ *                      Supported tunnel encapsulations are IPIP,
+ *                      layer-3 GRE, and UDP. @hdr_len includes
+ *                      both outer and inner headers.  The driver
+ *                      will set IPv4 checksum to zero and
+ *                      preload TCP checksum with IP pseudo
+ *                      header on the inner header.
+ *
+ *                      TCP ECN offload is supported.  The device
+ *                      will set CWR flag in the first segment if
+ *                      CWR is set in the template header, and
+ *                      clear CWR in remaining segments.
+ * @flags:
+ *                vlan:
+ *                    Insert an L2 VLAN header using @vlan_tci.
+ *                encap:
+ *                    Calculate encap header checksum.
+ *                csum_l3:
+ *                    Compute L3 header checksum.
+ *                csum_l4:
+ *                    Compute L4 header checksum.
+ *                tso_sot:
+ *                    TSO start
+ *                tso_eot:
+ *                    TSO end
+ * @num_sg_elems: Number of scatter-gather elements in SG
+ *                descriptor
+ * @addr:         First data buffer's DMA address.
+ *                (Subsequent data buffers are on txq_sg_desc).
+ * @len:          First data buffer's length, in bytes
+ * @vlan_tci:     VLAN tag to insert in the packet (if requested
+ *                by @V-bit).  Includes .1p and .1q tags
+ * @hdr_len:      Length of packet headers, including
+ *                encapsulating outer header, if applicable.
+ *                Valid for opcodes TXQ_DESC_OPCODE_CALC_CSUM and
+ *                TXQ_DESC_OPCODE_TSO.  Should be set to zero for
+ *                all other modes.  For
+ *                TXQ_DESC_OPCODE_CALC_CSUM, @hdr_len is length
+ *                of headers up to inner-most L4 header.  For
+ *                TXQ_DESC_OPCODE_TSO, @hdr_len is up to
+ *                inner-most L4 payload, so inclusive of
+ *                inner-most L4 header.
+ * @mss:          Desired MSS value for TSO.  Only applicable for
+ *                TXQ_DESC_OPCODE_TSO.
+ * @csum_start:   Offset into inner-most L3 header of checksum
+ * @csum_offset:  Offset into inner-most L4 header of checksum
+ */
+
+#define IONIC_TXQ_DESC_OPCODE_MASK		0xf
+#define IONIC_TXQ_DESC_OPCODE_SHIFT		4
+#define IONIC_TXQ_DESC_FLAGS_MASK		0xf
+#define IONIC_TXQ_DESC_FLAGS_SHIFT		0
+#define IONIC_TXQ_DESC_NSGE_MASK		0xf
+#define IONIC_TXQ_DESC_NSGE_SHIFT		8
+#define IONIC_TXQ_DESC_ADDR_MASK		(BIT_ULL(IONIC_ADDR_LEN) - 1)
+#define IONIC_TXQ_DESC_ADDR_SHIFT		12
+
+/* common flags */
+#define IONIC_TXQ_DESC_FLAG_VLAN		0x1
+#define IONIC_TXQ_DESC_FLAG_ENCAP		0x2
+
+/* flags for csum_hw opcode */
+#define IONIC_TXQ_DESC_FLAG_CSUM_L3		0x4
+#define IONIC_TXQ_DESC_FLAG_CSUM_L4		0x8
+
+/* flags for tso opcode */
+#define IONIC_TXQ_DESC_FLAG_TSO_SOT		0x4
+#define IONIC_TXQ_DESC_FLAG_TSO_EOT		0x8
+
+struct ionic_txq_desc {
+	__le64  cmd;
+	__le16  len;
+	union {
+		__le16  vlan_tci;
+		__le16  hword0;
+	};
+	union {
+		__le16  csum_start;
+		__le16  hdr_len;
+		__le16  hword1;
+	};
+	union {
+		__le16  csum_offset;
+		__le16  mss;
+		__le16  hword2;
+	};
+};
+
+static inline u64 encode_txq_desc_cmd(u8 opcode, u8 flags,
+				      u8 nsge, u64 addr)
+{
+	u64 cmd;
+
+	cmd = (opcode & IONIC_TXQ_DESC_OPCODE_MASK) << IONIC_TXQ_DESC_OPCODE_SHIFT;
+	cmd |= (flags & IONIC_TXQ_DESC_FLAGS_MASK) << IONIC_TXQ_DESC_FLAGS_SHIFT;
+	cmd |= (nsge & IONIC_TXQ_DESC_NSGE_MASK) << IONIC_TXQ_DESC_NSGE_SHIFT;
+	cmd |= (addr & IONIC_TXQ_DESC_ADDR_MASK) << IONIC_TXQ_DESC_ADDR_SHIFT;
+
+	return cmd;
+};
+
+static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags,
+				       u8 *nsge, u64 *addr)
+{
+	*opcode = (cmd >> IONIC_TXQ_DESC_OPCODE_SHIFT) & IONIC_TXQ_DESC_OPCODE_MASK;
+	*flags = (cmd >> IONIC_TXQ_DESC_FLAGS_SHIFT) & IONIC_TXQ_DESC_FLAGS_MASK;
+	*nsge = (cmd >> IONIC_TXQ_DESC_NSGE_SHIFT) & IONIC_TXQ_DESC_NSGE_MASK;
+	*addr = (cmd >> IONIC_TXQ_DESC_ADDR_SHIFT) & IONIC_TXQ_DESC_ADDR_MASK;
+};
+
+#define IONIC_TX_MAX_SG_ELEMS	8
+#define IONIC_RX_MAX_SG_ELEMS	8
+
+/**
+ * struct txq_sg_desc - Transmit scatter-gather (SG) list
+ * @addr:      DMA address of SG element data buffer
+ * @len:       Length of SG element data buffer, in bytes
+ */
+struct ionic_txq_sg_desc {
+	struct ionic_txq_sg_elem {
+		__le64 addr;
+		__le16 len;
+		__le16 rsvd[3];
+	} elems[IONIC_TX_MAX_SG_ELEMS];
+};
+
+/**
+ * struct txq_comp - Ethernet transmit queue completion descriptor
+ * @status:     The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ *                 is the completion.
+ * @color:      Color bit.
+ */
+struct ionic_txq_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     rsvd2[11];
+	u8     color;
+};
+
+enum ionic_rxq_desc_opcode {
+	IONIC_RXQ_DESC_OPCODE_SIMPLE = 0,
+	IONIC_RXQ_DESC_OPCODE_SG = 1,
+};
+
+/**
+ * struct rxq_desc - Ethernet Rx queue descriptor format
+ * @opcode:       Rx operation, see RXQ_DESC_OPCODE_*:
+ *
+ *                   RXQ_DESC_OPCODE_SIMPLE:
+ *
+ *                      Receive full packet into data buffer
+ *                      starting at @addr.  Results of
+ *                      receive, including actual bytes received,
+ *                      are recorded in Rx completion descriptor.
+ *
+ * @len:          Data buffer's length, in bytes.
+ * @addr:         Data buffer's DMA address
+ */
+struct ionic_rxq_desc {
+	u8     opcode;
+	u8     rsvd[5];
+	__le16 len;
+	__le64 addr;
+};
+
+/**
+ * struct rxq_sg_desc - Receive scatter-gather (SG) list
+ * @addr:      DMA address of SG element data buffer
+ * @len:       Length of SG element data buffer, in bytes
+ */
+struct ionic_rxq_sg_desc {
+	struct ionic_rxq_sg_elem {
+		__le64 addr;
+		__le16 len;
+		__le16 rsvd[3];
+	} elems[IONIC_RX_MAX_SG_ELEMS];
+};
+
+/**
+ * struct rxq_comp - Ethernet receive queue completion descriptor
+ * @status:       The status of the command (enum status_code)
+ * @num_sg_elems: Number of SG elements used by this descriptor
+ * @comp_index:   The index in the descriptor ring for which this
+ *                is the completion.
+ * @rss_hash:     32-bit RSS hash
+ * @csum:         16-bit sum of the packet's L2 payload.
+ *                If the packet's L2 payload is odd length, an extra
+ *                zero-value byte is included in the @csum calculation but
+ *                not included in @len.
+ * @vlan_tci:     VLAN tag stripped from the packet.  Valid if @VLAN is
+ *                set.  Includes .1p and .1q tags.
+ * @len:          Received packet length, in bytes.  Excludes FCS.
+ * @csum_calc     L2 payload checksum is computed or not
+ * @csum_tcp_ok:  The TCP checksum calculated by the device
+ *                matched the checksum in the receive packet's
+ *                TCP header
+ * @csum_tcp_bad: The TCP checksum calculated by the device did
+ *                not match the checksum in the receive packet's
+ *                TCP header.
+ * @csum_udp_ok:  The UDP checksum calculated by the device
+ *                matched the checksum in the receive packet's
+ *                UDP header
+ * @csum_udp_bad: The UDP checksum calculated by the device did
+ *                not match the checksum in the receive packet's
+ *                UDP header.
+ * @csum_ip_ok:   The IPv4 checksum calculated by the device
+ *                matched the checksum in the receive packet's
+ *                first IPv4 header.  If the receive packet
+ *                contains both a tunnel IPv4 header and a
+ *                transport IPv4 header, the device validates the
+ *                checksum for the both IPv4 headers.
+ * @csum_ip_bad:  The IPv4 checksum calculated by the device did
+ *                not match the checksum in the receive packet's
+ *                first IPv4 header. If the receive packet
+ *                contains both a tunnel IPv4 header and a
+ *                transport IPv4 header, the device validates the
+ *                checksum for both IP headers.
+ * @VLAN:         VLAN header was stripped and placed in @vlan_tci.
+ * @pkt_type:     Packet type
+ * @color:        Color bit.
+ */
+struct ionic_rxq_comp {
+	u8     status;
+	u8     num_sg_elems;
+	__le16 comp_index;
+	__le32 rss_hash;
+	__le16 csum;
+	__le16 vlan_tci;
+	__le16 len;
+	u8     csum_flags;
+#define IONIC_RXQ_COMP_CSUM_F_TCP_OK	0x01
+#define IONIC_RXQ_COMP_CSUM_F_TCP_BAD	0x02
+#define IONIC_RXQ_COMP_CSUM_F_UDP_OK	0x04
+#define IONIC_RXQ_COMP_CSUM_F_UDP_BAD	0x08
+#define IONIC_RXQ_COMP_CSUM_F_IP_OK	0x10
+#define IONIC_RXQ_COMP_CSUM_F_IP_BAD	0x20
+#define IONIC_RXQ_COMP_CSUM_F_VLAN	0x40
+#define IONIC_RXQ_COMP_CSUM_F_CALC	0x80
+	u8     pkt_type_color;
+#define IONIC_RXQ_COMP_PKT_TYPE_MASK	0x0f
+};
+
+enum ionic_pkt_type {
+	IONIC_PKT_TYPE_NON_IP     = 0x000,
+	IONIC_PKT_TYPE_IPV4       = 0x001,
+	IONIC_PKT_TYPE_IPV4_TCP   = 0x003,
+	IONIC_PKT_TYPE_IPV4_UDP   = 0x005,
+	IONIC_PKT_TYPE_IPV6       = 0x008,
+	IONIC_PKT_TYPE_IPV6_TCP   = 0x018,
+	IONIC_PKT_TYPE_IPV6_UDP   = 0x028,
+};
+
+enum ionic_eth_hw_features {
+	IONIC_ETH_HW_VLAN_TX_TAG	= BIT(0),
+	IONIC_ETH_HW_VLAN_RX_STRIP	= BIT(1),
+	IONIC_ETH_HW_VLAN_RX_FILTER	= BIT(2),
+	IONIC_ETH_HW_RX_HASH		= BIT(3),
+	IONIC_ETH_HW_RX_CSUM		= BIT(4),
+	IONIC_ETH_HW_TX_SG		= BIT(5),
+	IONIC_ETH_HW_RX_SG		= BIT(6),
+	IONIC_ETH_HW_TX_CSUM		= BIT(7),
+	IONIC_ETH_HW_TSO		= BIT(8),
+	IONIC_ETH_HW_TSO_IPV6		= BIT(9),
+	IONIC_ETH_HW_TSO_ECN		= BIT(10),
+	IONIC_ETH_HW_TSO_GRE		= BIT(11),
+	IONIC_ETH_HW_TSO_GRE_CSUM	= BIT(12),
+	IONIC_ETH_HW_TSO_IPXIP4	= BIT(13),
+	IONIC_ETH_HW_TSO_IPXIP6	= BIT(14),
+	IONIC_ETH_HW_TSO_UDP		= BIT(15),
+	IONIC_ETH_HW_TSO_UDP_CSUM	= BIT(16),
+};
+
+/**
+ * struct q_control_cmd - Queue control command
+ * @opcode:     opcode
+ * @type:       Queue type
+ * @lif_index:  LIF index
+ * @index:      Queue index
+ * @oper:       Operation (enum q_control_oper)
+ */
+struct ionic_q_control_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 lif_index;
+	__le32 index;
+	u8     oper;
+	u8     rsvd[55];
+};
+
+typedef struct ionic_admin_comp ionic_q_control_comp;
+
+enum q_control_oper {
+	IONIC_Q_DISABLE		= 0,
+	IONIC_Q_ENABLE		= 1,
+	IONIC_Q_HANG_RESET	= 2,
+};
+
+/**
+ * Physical connection type
+ */
+enum ionic_phy_type {
+	IONIC_PHY_TYPE_NONE	= 0,
+	IONIC_PHY_TYPE_COPPER	= 1,
+	IONIC_PHY_TYPE_FIBER	= 2,
+};
+
+/**
+ * Transceiver status
+ */
+enum ionic_xcvr_state {
+	IONIC_XCVR_STATE_REMOVED	 = 0,
+	IONIC_XCVR_STATE_INSERTED	 = 1,
+	IONIC_XCVR_STATE_PENDING	 = 2,
+	IONIC_XCVR_STATE_SPROM_READ	 = 3,
+	IONIC_XCVR_STATE_SPROM_READ_ERR  = 4,
+};
+
+/**
+ * Supported link modes
+ */
+enum ionic_xcvr_pid {
+	IONIC_XCVR_PID_UNKNOWN           = 0,
+
+	/* CU */
+	IONIC_XCVR_PID_QSFP_100G_CR4     = 1,
+	IONIC_XCVR_PID_QSFP_40GBASE_CR4  = 2,
+	IONIC_XCVR_PID_SFP_25GBASE_CR_S  = 3,
+	IONIC_XCVR_PID_SFP_25GBASE_CR_L  = 4,
+	IONIC_XCVR_PID_SFP_25GBASE_CR_N  = 5,
+
+	/* Fiber */
+	IONIC_XCVR_PID_QSFP_100G_AOC    = 50,
+	IONIC_XCVR_PID_QSFP_100G_ACC    = 51,
+	IONIC_XCVR_PID_QSFP_100G_SR4    = 52,
+	IONIC_XCVR_PID_QSFP_100G_LR4    = 53,
+	IONIC_XCVR_PID_QSFP_100G_ER4    = 54,
+	IONIC_XCVR_PID_QSFP_40GBASE_ER4 = 55,
+	IONIC_XCVR_PID_QSFP_40GBASE_SR4 = 56,
+	IONIC_XCVR_PID_QSFP_40GBASE_LR4 = 57,
+	IONIC_XCVR_PID_QSFP_40GBASE_AOC = 58,
+	IONIC_XCVR_PID_SFP_25GBASE_SR   = 59,
+	IONIC_XCVR_PID_SFP_25GBASE_LR   = 60,
+	IONIC_XCVR_PID_SFP_25GBASE_ER   = 61,
+	IONIC_XCVR_PID_SFP_25GBASE_AOC  = 62,
+	IONIC_XCVR_PID_SFP_10GBASE_SR   = 63,
+	IONIC_XCVR_PID_SFP_10GBASE_LR   = 64,
+	IONIC_XCVR_PID_SFP_10GBASE_LRM  = 65,
+	IONIC_XCVR_PID_SFP_10GBASE_ER   = 66,
+	IONIC_XCVR_PID_SFP_10GBASE_AOC  = 67,
+	IONIC_XCVR_PID_SFP_10GBASE_CU   = 68,
+	IONIC_XCVR_PID_QSFP_100G_CWDM4  = 69,
+	IONIC_XCVR_PID_QSFP_100G_PSM4   = 70,
+};
+
+/**
+ * Port types
+ */
+enum ionic_port_type {
+	IONIC_PORT_TYPE_NONE = 0,  /* port type not configured */
+	IONIC_PORT_TYPE_ETH  = 1,  /* port carries ethernet traffic (inband) */
+	IONIC_PORT_TYPE_MGMT = 2,  /* port carries mgmt traffic (out-of-band) */
+};
+
+/**
+ * Port config state
+ */
+enum ionic_port_admin_state {
+	IONIC_PORT_ADMIN_STATE_NONE = 0,   /* port admin state not configured */
+	IONIC_PORT_ADMIN_STATE_DOWN = 1,   /* port is admin disabled */
+	IONIC_PORT_ADMIN_STATE_UP   = 2,   /* port is admin enabled */
+};
+
+/**
+ * Port operational status
+ */
+enum ionic_port_oper_status {
+	IONIC_PORT_OPER_STATUS_NONE  = 0,	/* port is disabled */
+	IONIC_PORT_OPER_STATUS_UP    = 1,	/* port is linked up */
+	IONIC_PORT_OPER_STATUS_DOWN  = 2,	/* port link status is down */
+};
+
+/**
+ * Ethernet Forward error correction (fec) modes
+ */
+enum ionic_port_fec_type {
+	IONIC_PORT_FEC_TYPE_NONE = 0,		/* Disabled */
+	IONIC_PORT_FEC_TYPE_FC   = 1,		/* FireCode */
+	IONIC_PORT_FEC_TYPE_RS   = 2,		/* ReedSolomon */
+};
+
+/**
+ * Ethernet pause (flow control) modes
+ */
+enum ionic_port_pause_type {
+	IONIC_PORT_PAUSE_TYPE_NONE = 0,	/* Disable Pause */
+	IONIC_PORT_PAUSE_TYPE_LINK = 1,	/* Link level pause */
+	IONIC_PORT_PAUSE_TYPE_PFC  = 2,	/* Priority-Flow control */
+};
+
+/**
+ * Loopback modes
+ */
+enum ionic_port_loopback_mode {
+	IONIC_PORT_LOOPBACK_MODE_NONE = 0,	/* Disable loopback */
+	IONIC_PORT_LOOPBACK_MODE_MAC  = 1,	/* MAC loopback */
+	IONIC_PORT_LOOPBACK_MODE_PHY  = 2,	/* PHY/Serdes loopback */
+};
+
+/**
+ * Transceiver Status information
+ * @state:    Transceiver status (enum xcvr_state)
+ * @phy:      Physical connection type (enum phy_type)
+ * @pid:      Transceiver link mode (enum pid)
+ * @sprom:    Transceiver sprom contents
+ */
+struct ionic_xcvr_status {
+	u8     state;
+	u8     phy;
+	__le16 pid;
+	u8     sprom[256];
+};
+
+/**
+ * Port configuration
+ * @speed:              port speed (in Mbps)
+ * @mtu:                mtu
+ * @state:              port admin state (enum port_admin_state)
+ * @an_enable:          autoneg enable
+ * @fec_type:           fec type (enum port_fec_type)
+ * @pause_type:         pause type (enum port_pause_type)
+ * @loopback_mode:      loopback mode (enum port_loopback_mode)
+ */
+union ionic_port_config {
+	struct {
+#define IONIC_SPEED_100G	100000	/* 100G in Mbps */
+#define IONIC_SPEED_50G		50000	/* 50G in Mbps */
+#define IONIC_SPEED_40G		40000	/* 40G in Mbps */
+#define IONIC_SPEED_25G		25000	/* 25G in Mbps */
+#define IONIC_SPEED_10G		10000	/* 10G in Mbps */
+#define IONIC_SPEED_1G		1000	/* 1G in Mbps */
+		__le32 speed;
+		__le32 mtu;
+		u8     state;
+		u8     an_enable;
+		u8     fec_type;
+#define IONIC_PAUSE_TYPE_MASK		0x0f
+#define IONIC_PAUSE_FLAGS_MASK		0xf0
+#define IONIC_PAUSE_F_TX		0x10
+#define IONIC_PAUSE_F_RX		0x20
+		u8     pause_type;
+		u8     loopback_mode;
+	};
+	__le32 words[64];
+};
+
+/**
+ * Port Status information
+ * @status:             link status (enum port_oper_status)
+ * @id:                 port id
+ * @speed:              link speed (in Mbps)
+ * @xcvr:               tranceiver status
+ */
+struct ionic_port_status {
+	__le32 id;
+	__le32 speed;
+	u8     status;
+	u8     rsvd[51];
+	struct ionic_xcvr_status  xcvr;
+};
+
+/**
+ * struct port_identify_cmd - Port identify command
+ * @opcode:     opcode
+ * @index:      port index
+ * @ver:        Highest version of identify supported by driver
+ */
+struct ionic_port_identify_cmd {
+	u8 opcode;
+	u8 index;
+	u8 ver;
+	u8 rsvd[61];
+};
+
+/**
+ * struct port_identify_comp - Port identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver:    Version of identify returned by device
+ */
+struct ionic_port_identify_comp {
+	u8 status;
+	u8 ver;
+	u8 rsvd[14];
+};
+
+/**
+ * struct port_init_cmd - Port initialization command
+ * @opcode:     opcode
+ * @index:      port index
+ * @info_pa:    destination address for port info (struct port_info)
+ */
+struct ionic_port_init_cmd {
+	u8     opcode;
+	u8     index;
+	u8     rsvd[6];
+	__le64 info_pa;
+	u8     rsvd2[48];
+};
+
+/**
+ * struct port_init_comp - Port initialization command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_port_init_comp {
+	u8 status;
+	u8 rsvd[15];
+};
+
+/**
+ * struct port_reset_cmd - Port reset command
+ * @opcode:     opcode
+ * @index:      port index
+ */
+struct ionic_port_reset_cmd {
+	u8 opcode;
+	u8 index;
+	u8 rsvd[62];
+};
+
+/**
+ * struct port_reset_comp - Port reset command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_port_reset_comp {
+	u8 status;
+	u8 rsvd[15];
+};
+
+/**
+ * enum stats_ctl_cmd - List of commands for stats control
+ */
+enum ionic_stats_ctl_cmd {
+	IONIC_STATS_CTL_RESET		= 0,
+};
+
+
+/**
+ * enum ionic_port_attr - List of device attributes
+ */
+enum ionic_port_attr {
+	IONIC_PORT_ATTR_STATE		= 0,
+	IONIC_PORT_ATTR_SPEED		= 1,
+	IONIC_PORT_ATTR_MTU		= 2,
+	IONIC_PORT_ATTR_AUTONEG		= 3,
+	IONIC_PORT_ATTR_FEC		= 4,
+	IONIC_PORT_ATTR_PAUSE		= 5,
+	IONIC_PORT_ATTR_LOOPBACK	= 6,
+	IONIC_PORT_ATTR_STATS_CTRL	= 7,
+};
+
+/**
+ * struct port_setattr_cmd - Set port attributes on the NIC
+ * @opcode:     Opcode
+ * @index:      port index
+ * @attr:       Attribute type (enum ionic_port_attr)
+ */
+struct ionic_port_setattr_cmd {
+	u8     opcode;
+	u8     index;
+	u8     attr;
+	u8     rsvd;
+	union {
+		u8      state;
+		__le32  speed;
+		__le32  mtu;
+		u8      an_enable;
+		u8      fec_type;
+		u8      pause_type;
+		u8      loopback_mode;
+		u8	stats_ctl;
+		u8      rsvd2[60];
+	};
+};
+
+/**
+ * struct port_setattr_comp - Port set attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @color:      Color bit
+ */
+struct ionic_port_setattr_comp {
+	u8     status;
+	u8     rsvd[14];
+	u8     color;
+};
+
+/**
+ * struct port_getattr_cmd - Get port attributes from the NIC
+ * @opcode:     Opcode
+ * @index:      port index
+ * @attr:       Attribute type (enum ionic_port_attr)
+ */
+struct ionic_port_getattr_cmd {
+	u8     opcode;
+	u8     index;
+	u8     attr;
+	u8     rsvd[61];
+};
+
+/**
+ * struct port_getattr_comp - Port get attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @color:      Color bit
+ */
+struct ionic_port_getattr_comp {
+	u8     status;
+	u8     rsvd[3];
+	union {
+		u8      state;
+		__le32  speed;
+		__le32  mtu;
+		u8      an_enable;
+		u8      fec_type;
+		u8      pause_type;
+		u8      loopback_mode;
+		u8      rsvd2[11];
+	};
+	u8     color;
+};
+
+/**
+ * struct lif_status - Lif status register
+ * @eid:             most recent NotifyQ event id
+ * @port_num:        port the lif is connected to
+ * @link_status:     port status (enum port_oper_status)
+ * @link_speed:      speed of link in Mbps
+ * @link_down_count: number of times link status changes
+ */
+struct ionic_lif_status {
+	__le64 eid;
+	u8     port_num;
+	u8     rsvd;
+	__le16 link_status;
+	__le32 link_speed;		/* units of 1Mbps: eg 10000 = 10Gbps */
+	__le16 link_down_count;
+	u8      rsvd2[46];
+};
+
+/**
+ * struct lif_reset_cmd - LIF reset command
+ * @opcode:    opcode
+ * @index:     LIF index
+ */
+struct ionic_lif_reset_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 index;
+	__le32 rsvd2[15];
+};
+
+typedef struct ionic_admin_comp ionic_lif_reset_comp;
+
+enum ionic_dev_state {
+	IONIC_DEV_DISABLE	= 0,
+	IONIC_DEV_ENABLE	= 1,
+	IONIC_DEV_HANG_RESET	= 2,
+};
+
+/**
+ * enum dev_attr - List of device attributes
+ */
+enum ionic_dev_attr {
+	IONIC_DEV_ATTR_STATE    = 0,
+	IONIC_DEV_ATTR_NAME     = 1,
+	IONIC_DEV_ATTR_FEATURES = 2,
+};
+
+/**
+ * struct dev_setattr_cmd - Set Device attributes on the NIC
+ * @opcode:     Opcode
+ * @attr:       Attribute type (enum dev_attr)
+ * @state:      Device state (enum dev_state)
+ * @name:       The bus info, e.g. PCI slot-device-function, 0 terminated
+ * @features:   Device features
+ */
+struct ionic_dev_setattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 rsvd;
+	union {
+		u8      state;
+		char    name[IONIC_IFNAMSIZ];
+		__le64  features;
+		u8      rsvd2[60];
+	};
+};
+
+/**
+ * struct dev_setattr_comp - Device set attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @features:   Device features
+ * @color:      Color bit
+ */
+struct ionic_dev_setattr_comp {
+	u8     status;
+	u8     rsvd[3];
+	union {
+		__le64  features;
+		u8      rsvd2[11];
+	};
+	u8     color;
+};
+
+/**
+ * struct dev_getattr_cmd - Get Device attributes from the NIC
+ * @opcode:     opcode
+ * @attr:       Attribute type (enum dev_attr)
+ */
+struct ionic_dev_getattr_cmd {
+	u8     opcode;
+	u8     attr;
+	u8     rsvd[62];
+};
+
+/**
+ * struct dev_setattr_comp - Device set attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @features:   Device features
+ * @color:      Color bit
+ */
+struct ionic_dev_getattr_comp {
+	u8     status;
+	u8     rsvd[3];
+	union {
+		__le64  features;
+		u8      rsvd2[11];
+	};
+	u8     color;
+};
+
+/**
+ * RSS parameters
+ */
+#define IONIC_RSS_HASH_KEY_SIZE		40
+
+enum ionic_rss_hash_types {
+	IONIC_RSS_TYPE_IPV4	= BIT(0),
+	IONIC_RSS_TYPE_IPV4_TCP	= BIT(1),
+	IONIC_RSS_TYPE_IPV4_UDP	= BIT(2),
+	IONIC_RSS_TYPE_IPV6	= BIT(3),
+	IONIC_RSS_TYPE_IPV6_TCP	= BIT(4),
+	IONIC_RSS_TYPE_IPV6_UDP	= BIT(5),
+};
+
+/**
+ * enum lif_attr - List of LIF attributes
+ */
+enum ionic_lif_attr {
+	IONIC_LIF_ATTR_STATE        = 0,
+	IONIC_LIF_ATTR_NAME         = 1,
+	IONIC_LIF_ATTR_MTU          = 2,
+	IONIC_LIF_ATTR_MAC          = 3,
+	IONIC_LIF_ATTR_FEATURES     = 4,
+	IONIC_LIF_ATTR_RSS          = 5,
+	IONIC_LIF_ATTR_STATS_CTRL   = 6,
+};
+
+/**
+ * struct lif_setattr_cmd - Set LIF attributes on the NIC
+ * @opcode:     Opcode
+ * @type:       Attribute type (enum lif_attr)
+ * @index:      LIF index
+ * @state:      lif state (enum lif_state)
+ * @name:       The netdev name string, 0 terminated
+ * @mtu:        Mtu
+ * @mac:        Station mac
+ * @features:   Features (enum eth_hw_features)
+ * @rss:        RSS properties
+ *              @types:     The hash types to enable (see rss_hash_types).
+ *              @key:       The hash secret key.
+ *              @addr:      Address for the indirection table shared memory.
+ * @stats_ctl:  stats control commands (enum stats_ctl_cmd)
+ */
+struct ionic_lif_setattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 index;
+	union {
+		u8      state;
+		char    name[IONIC_IFNAMSIZ];
+		__le32  mtu;
+		u8      mac[6];
+		__le64  features;
+		struct {
+			__le16 types;
+			u8     key[IONIC_RSS_HASH_KEY_SIZE];
+			u8     rsvd[6];
+			__le64 addr;
+		} rss;
+		u8	stats_ctl;
+		u8      rsvd[60];
+	};
+};
+
+/**
+ * struct lif_setattr_comp - LIF set attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ *              is the completion.
+ * @features:   features (enum eth_hw_features)
+ * @color:      Color bit
+ */
+struct ionic_lif_setattr_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	union {
+		__le64  features;
+		u8      rsvd2[11];
+	};
+	u8     color;
+};
+
+/**
+ * struct lif_getattr_cmd - Get LIF attributes from the NIC
+ * @opcode:     Opcode
+ * @attr:       Attribute type (enum lif_attr)
+ * @index:      LIF index
+ */
+struct ionic_lif_getattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 index;
+	u8     rsvd[60];
+};
+
+/**
+ * struct lif_getattr_comp - LIF get attr command completion
+ * @status:     The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ *              is the completion.
+ * @state:      lif state (enum lif_state)
+ * @name:       The netdev name string, 0 terminated
+ * @mtu:        Mtu
+ * @mac:        Station mac
+ * @features:   Features (enum eth_hw_features)
+ * @color:      Color bit
+ */
+struct ionic_lif_getattr_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	union {
+		u8      state;
+		__le32  mtu;
+		u8      mac[6];
+		__le64  features;
+		u8      rsvd2[11];
+	};
+	u8     color;
+};
+
+enum ionic_rx_mode {
+	IONIC_RX_MODE_F_UNICAST    = BIT(0),
+	IONIC_RX_MODE_F_MULTICAST  = BIT(1),
+	IONIC_RX_MODE_F_BROADCAST  = BIT(2),
+	IONIC_RX_MODE_F_PROMISC    = BIT(3),
+	IONIC_RX_MODE_F_ALLMULTI   = BIT(4),
+};
+
+/**
+ * struct rx_mode_set_cmd - Set LIF's Rx mode command
+ * @opcode:     opcode
+ * @lif_index:  LIF index
+ * @rx_mode:    Rx mode flags:
+ *                  IONIC_RX_MODE_F_UNICAST: Accept known unicast packets.
+ *                  IONIC_RX_MODE_F_MULTICAST: Accept known multicast packets.
+ *                  IONIC_RX_MODE_F_BROADCAST: Accept broadcast packets.
+ *                  IONIC_RX_MODE_F_PROMISC: Accept any packets.
+ *                  IONIC_RX_MODE_F_ALLMULTI: Accept any multicast packets.
+ */
+struct ionic_rx_mode_set_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	__le16 rx_mode;
+	__le16 rsvd2[29];
+};
+
+typedef struct ionic_admin_comp ionic_rx_mode_set_comp;
+
+enum ionic_rx_filter_match_type {
+	IONIC_RX_FILTER_MATCH_VLAN = 0,
+	IONIC_RX_FILTER_MATCH_MAC,
+	IONIC_RX_FILTER_MATCH_MAC_VLAN,
+};
+
+/**
+ * struct rx_filter_add_cmd - Add LIF Rx filter command
+ * @opcode:     opcode
+ * @qtype:      Queue type
+ * @lif_index:  LIF index
+ * @qid:        Queue ID
+ * @match:      Rx filter match type.  (See IONIC_RX_FILTER_MATCH_xxx)
+ * @vlan:       VLAN ID
+ * @addr:       MAC address (network-byte order)
+ */
+struct ionic_rx_filter_add_cmd {
+	u8     opcode;
+	u8     qtype;
+	__le16 lif_index;
+	__le32 qid;
+	__le16 match;
+	union {
+		struct {
+			__le16 vlan;
+		} vlan;
+		struct {
+			u8     addr[6];
+		} mac;
+		struct {
+			__le16 vlan;
+			u8     addr[6];
+		} mac_vlan;
+		u8 rsvd[54];
+	};
+};
+
+/**
+ * struct rx_filter_add_comp - Add LIF Rx filter command completion
+ * @status:     The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ *              is the completion.
+ * @filter_id:  Filter ID
+ * @color:      Color bit.
+ */
+struct ionic_rx_filter_add_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	__le32 filter_id;
+	u8     rsvd2[7];
+	u8     color;
+};
+
+/**
+ * struct rx_filter_del_cmd - Delete LIF Rx filter command
+ * @opcode:     opcode
+ * @lif_index:  LIF index
+ * @filter_id:  Filter ID
+ */
+struct ionic_rx_filter_del_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	__le32 filter_id;
+	u8     rsvd2[56];
+};
+
+typedef struct ionic_admin_comp ionic_rx_filter_del_comp;
+
+/**
+ * struct qos_identify_cmd - QoS identify command
+ * @opcode:    opcode
+ * @ver:     Highest version of identify supported by driver
+ *
+ */
+struct ionic_qos_identify_cmd {
+	u8 opcode;
+	u8 ver;
+	u8 rsvd[62];
+};
+
+/**
+ * struct qos_identify_comp - QoS identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver:    Version of identify returned by device
+ */
+struct ionic_qos_identify_comp {
+	u8 status;
+	u8 ver;
+	u8 rsvd[14];
+};
+
+#define IONIC_QOS_CLASS_MAX		7
+#define IONIC_QOS_CLASS_NAME_SZ		32
+#define IONIC_QOS_DSCP_MAX_VALUES	64
+
+/**
+ * enum qos_class
+ */
+enum ionic_qos_class {
+	IONIC_QOS_CLASS_DEFAULT		= 0,
+	IONIC_QOS_CLASS_USER_DEFINED_1	= 1,
+	IONIC_QOS_CLASS_USER_DEFINED_2	= 2,
+	IONIC_QOS_CLASS_USER_DEFINED_3	= 3,
+	IONIC_QOS_CLASS_USER_DEFINED_4	= 4,
+	IONIC_QOS_CLASS_USER_DEFINED_5	= 5,
+	IONIC_QOS_CLASS_USER_DEFINED_6	= 6,
+};
+
+/**
+ * enum qos_class_type - Traffic classification criteria
+ */
+enum ionic_qos_class_type {
+	IONIC_QOS_CLASS_TYPE_NONE	= 0,
+	IONIC_QOS_CLASS_TYPE_PCP	= 1,	/* Dot1Q pcp */
+	IONIC_QOS_CLASS_TYPE_DSCP	= 2,	/* IP dscp */
+};
+
+/**
+ * enum qos_sched_type - Qos class scheduling type
+ */
+enum ionic_qos_sched_type {
+	IONIC_QOS_SCHED_TYPE_STRICT	= 0,	/* Strict priority */
+	IONIC_QOS_SCHED_TYPE_DWRR	= 1,	/* Deficit weighted round-robin */
+};
+
+/**
+ * union qos_config - Qos configuration structure
+ * @flags:		Configuration flags
+ *	IONIC_QOS_CONFIG_F_ENABLE		enable
+ *	IONIC_QOS_CONFIG_F_DROP			drop/nodrop
+ *	IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP		enable dot1q pcp rewrite
+ *	IONIC_QOS_CONFIG_F_RW_IP_DSCP		enable ip dscp rewrite
+ * @sched_type:		Qos class scheduling type (enum qos_sched_type)
+ * @class_type:		Qos class type (enum qos_class_type)
+ * @pause_type:		Qos pause type (enum qos_pause_type)
+ * @name:		Qos class name
+ * @mtu:		MTU of the class
+ * @pfc_dot1q_pcp:	Pcp value for pause frames (valid iff F_NODROP)
+ * @dwrr_weight:	Qos class scheduling weight
+ * @strict_rlmt:	Rate limit for strict priority scheduling
+ * @rw_dot1q_pcp:	Rewrite dot1q pcp to this value	(valid iff F_RW_DOT1Q_PCP)
+ * @rw_ip_dscp:		Rewrite ip dscp to this value	(valid iff F_RW_IP_DSCP)
+ * @dot1q_pcp:		Dot1q pcp value
+ * @ndscp:		Number of valid dscp values in the ip_dscp field
+ * @ip_dscp:		IP dscp values
+ */
+union ionic_qos_config {
+	struct {
+#define IONIC_QOS_CONFIG_F_ENABLE		BIT(0)
+#define IONIC_QOS_CONFIG_F_DROP			BIT(1)
+#define IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP		BIT(2)
+#define IONIC_QOS_CONFIG_F_RW_IP_DSCP		BIT(3)
+		u8      flags;
+		u8      sched_type;
+		u8      class_type;
+		u8      pause_type;
+		char    name[IONIC_QOS_CLASS_NAME_SZ];
+		__le32  mtu;
+		/* flow control */
+		u8      pfc_cos;
+		/* scheduler */
+		union {
+			u8      dwrr_weight;
+			__le64  strict_rlmt;
+		};
+		/* marking */
+		union {
+			u8      rw_dot1q_pcp;
+			u8      rw_ip_dscp;
+		};
+		/* classification */
+		union {
+			u8      dot1q_pcp;
+			struct {
+				u8      ndscp;
+				u8      ip_dscp[IONIC_QOS_DSCP_MAX_VALUES];
+			};
+		};
+	};
+	__le32  words[64];
+};
+
+/**
+ * union qos_identity - QoS identity structure
+ * @version:	Version of the identify structure
+ * @type:	QoS system type
+ * @nclasses:	Number of usable QoS classes
+ * @config:	Current configuration of classes
+ */
+union ionic_qos_identity {
+	struct {
+		u8     version;
+		u8     type;
+		u8     rsvd[62];
+		union  ionic_qos_config config[IONIC_QOS_CLASS_MAX];
+	};
+	__le32 words[512];
+};
+
+/**
+ * struct qos_init_cmd - QoS config init command
+ * @opcode:	Opcode
+ * @group:	Qos class id
+ * @info_pa:	destination address for qos info
+ */
+struct ionic_qos_init_cmd {
+	u8     opcode;
+	u8     group;
+	u8     rsvd[6];
+	__le64 info_pa;
+	u8     rsvd1[48];
+};
+
+typedef struct ionic_admin_comp ionic_qos_init_comp;
+
+/**
+ * struct qos_reset_cmd - Qos config reset command
+ * @opcode:	Opcode
+ */
+struct ionic_qos_reset_cmd {
+	u8    opcode;
+	u8    group;
+	u8    rsvd[62];
+};
+
+typedef struct ionic_admin_comp ionic_qos_reset_comp;
+
+/**
+ * struct fw_download_cmd - Firmware download command
+ * @opcode:	opcode
+ * @addr:	dma address of the firmware buffer
+ * @offset:	offset of the firmware buffer within the full image
+ * @length:	number of valid bytes in the firmware buffer
+ */
+struct ionic_fw_download_cmd {
+	u8     opcode;
+	u8     rsvd[3];
+	__le32 offset;
+	__le64 addr;
+	__le32 length;
+};
+
+typedef struct ionic_admin_comp ionic_fw_download_comp;
+
+enum ionic_fw_control_oper {
+	IONIC_FW_RESET		= 0,	/* Reset firmware */
+	IONIC_FW_INSTALL	= 1,	/* Install firmware */
+	IONIC_FW_ACTIVATE	= 2,	/* Activate firmware */
+};
+
+/**
+ * struct fw_control_cmd - Firmware control command
+ * @opcode:    opcode
+ * @oper:      firmware control operation (enum fw_control_oper)
+ * @slot:      slot to activate
+ */
+struct ionic_fw_control_cmd {
+	u8  opcode;
+	u8  rsvd[3];
+	u8  oper;
+	u8  slot;
+	u8  rsvd1[58];
+};
+
+/**
+ * struct fw_control_comp - Firmware control copletion
+ * @opcode:    opcode
+ * @slot:      slot where the firmware was installed
+ */
+struct ionic_fw_control_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     slot;
+	u8     rsvd1[10];
+	u8     color;
+};
+
+/******************************************************************
+ ******************* RDMA Commands ********************************
+ ******************************************************************/
+
+/**
+ * struct rdma_reset_cmd - Reset RDMA LIF cmd
+ * @opcode:        opcode
+ * @lif_index:     lif index
+ *
+ * There is no rdma specific dev command completion struct.  Completion uses
+ * the common struct admin_comp.  Only the status is indicated.  Nonzero status
+ * means the LIF does not support rdma.
+ **/
+struct ionic_rdma_reset_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	u8     rsvd2[60];
+};
+
+/**
+ * struct rdma_queue_cmd - Create RDMA Queue command
+ * @opcode:        opcode, 52, 53
+ * @lif_index      lif index
+ * @qid_ver:       (qid | (rdma version << 24))
+ * @cid:           intr, eq_id, or cq_id
+ * @dbid:          doorbell page id
+ * @depth_log2:    log base two of queue depth
+ * @stride_log2:   log base two of queue stride
+ * @dma_addr:      address of the queue memory
+ * @xxx_table_index: temporary, but should not need pgtbl for contig. queues.
+ *
+ * The same command struct is used to create an rdma event queue, completion
+ * queue, or rdma admin queue.  The cid is an interrupt number for an event
+ * queue, an event queue id for a completion queue, or a completion queue id
+ * for an rdma admin queue.
+ *
+ * The queue created via a dev command must be contiguous in dma space.
+ *
+ * The dev commands are intended only to be used during driver initialization,
+ * to create queues supporting the rdma admin queue.  Other queues, and other
+ * types of rdma resources like memory regions, will be created and registered
+ * via the rdma admin queue, and will support a more complete interface
+ * providing scatter gather lists for larger, scattered queue buffers and
+ * memory registration.
+ *
+ * There is no rdma specific dev command completion struct.  Completion uses
+ * the common struct admin_comp.  Only the status is indicated.
+ **/
+struct ionic_rdma_queue_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_index;
+	__le32 qid_ver;
+	__le32 cid;
+	__le16 dbid;
+	u8     depth_log2;
+	u8     stride_log2;
+	__le64 dma_addr;
+	u8     rsvd2[36];
+	__le32 xxx_table_index;
+};
+
+/******************************************************************
+ ******************* Notify Events ********************************
+ ******************************************************************/
+
+/**
+ * struct notifyq_event
+ * @eid:   event number
+ * @ecode: event code
+ * @data:  unspecified data about the event
+ *
+ * This is the generic event report struct from which the other
+ * actual events will be formed.
+ */
+struct ionic_notifyq_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     data[54];
+};
+
+/**
+ * struct link_change_event
+ * @eid:		event number
+ * @ecode:		event code = EVENT_OPCODE_LINK_CHANGE
+ * @link_status:	link up or down, with error bits (enum port_status)
+ * @link_speed:		speed of the network link
+ *
+ * Sent when the network link state changes between UP and DOWN
+ */
+struct ionic_link_change_event {
+	__le64 eid;
+	__le16 ecode;
+	__le16 link_status;
+	__le32 link_speed;	/* units of 1Mbps: e.g. 10000 = 10Gbps */
+	u8     rsvd[48];
+};
+
+/**
+ * struct reset_event
+ * @eid:		event number
+ * @ecode:		event code = EVENT_OPCODE_RESET
+ * @reset_code:		reset type
+ * @state:		0=pending, 1=complete, 2=error
+ *
+ * Sent when the NIC or some subsystem is going to be or
+ * has been reset.
+ */
+struct ionic_reset_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     reset_code;
+	u8     state;
+	u8     rsvd[52];
+};
+
+/**
+ * struct heartbeat_event
+ * @eid:	event number
+ * @ecode:	event code = EVENT_OPCODE_HEARTBEAT
+ *
+ * Sent periodically by the NIC to indicate continued health
+ */
+struct ionic_heartbeat_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     rsvd[54];
+};
+
+/**
+ * struct log_event
+ * @eid:	event number
+ * @ecode:	event code = EVENT_OPCODE_LOG
+ * @data:	log data
+ *
+ * Sent to notify the driver of an internal error.
+ */
+struct ionic_log_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     data[54];
+};
+
+/**
+ * struct port_stats
+ */
+struct ionic_port_stats {
+	__le64 frames_rx_ok;
+	__le64 frames_rx_all;
+	__le64 frames_rx_bad_fcs;
+	__le64 frames_rx_bad_all;
+	__le64 octets_rx_ok;
+	__le64 octets_rx_all;
+	__le64 frames_rx_unicast;
+	__le64 frames_rx_multicast;
+	__le64 frames_rx_broadcast;
+	__le64 frames_rx_pause;
+	__le64 frames_rx_bad_length;
+	__le64 frames_rx_undersized;
+	__le64 frames_rx_oversized;
+	__le64 frames_rx_fragments;
+	__le64 frames_rx_jabber;
+	__le64 frames_rx_pripause;
+	__le64 frames_rx_stomped_crc;
+	__le64 frames_rx_too_long;
+	__le64 frames_rx_vlan_good;
+	__le64 frames_rx_dropped;
+	__le64 frames_rx_less_than_64b;
+	__le64 frames_rx_64b;
+	__le64 frames_rx_65b_127b;
+	__le64 frames_rx_128b_255b;
+	__le64 frames_rx_256b_511b;
+	__le64 frames_rx_512b_1023b;
+	__le64 frames_rx_1024b_1518b;
+	__le64 frames_rx_1519b_2047b;
+	__le64 frames_rx_2048b_4095b;
+	__le64 frames_rx_4096b_8191b;
+	__le64 frames_rx_8192b_9215b;
+	__le64 frames_rx_other;
+	__le64 frames_tx_ok;
+	__le64 frames_tx_all;
+	__le64 frames_tx_bad;
+	__le64 octets_tx_ok;
+	__le64 octets_tx_total;
+	__le64 frames_tx_unicast;
+	__le64 frames_tx_multicast;
+	__le64 frames_tx_broadcast;
+	__le64 frames_tx_pause;
+	__le64 frames_tx_pripause;
+	__le64 frames_tx_vlan;
+	__le64 frames_tx_less_than_64b;
+	__le64 frames_tx_64b;
+	__le64 frames_tx_65b_127b;
+	__le64 frames_tx_128b_255b;
+	__le64 frames_tx_256b_511b;
+	__le64 frames_tx_512b_1023b;
+	__le64 frames_tx_1024b_1518b;
+	__le64 frames_tx_1519b_2047b;
+	__le64 frames_tx_2048b_4095b;
+	__le64 frames_tx_4096b_8191b;
+	__le64 frames_tx_8192b_9215b;
+	__le64 frames_tx_other;
+	__le64 frames_tx_pri_0;
+	__le64 frames_tx_pri_1;
+	__le64 frames_tx_pri_2;
+	__le64 frames_tx_pri_3;
+	__le64 frames_tx_pri_4;
+	__le64 frames_tx_pri_5;
+	__le64 frames_tx_pri_6;
+	__le64 frames_tx_pri_7;
+	__le64 frames_rx_pri_0;
+	__le64 frames_rx_pri_1;
+	__le64 frames_rx_pri_2;
+	__le64 frames_rx_pri_3;
+	__le64 frames_rx_pri_4;
+	__le64 frames_rx_pri_5;
+	__le64 frames_rx_pri_6;
+	__le64 frames_rx_pri_7;
+	__le64 tx_pripause_0_1us_count;
+	__le64 tx_pripause_1_1us_count;
+	__le64 tx_pripause_2_1us_count;
+	__le64 tx_pripause_3_1us_count;
+	__le64 tx_pripause_4_1us_count;
+	__le64 tx_pripause_5_1us_count;
+	__le64 tx_pripause_6_1us_count;
+	__le64 tx_pripause_7_1us_count;
+	__le64 rx_pripause_0_1us_count;
+	__le64 rx_pripause_1_1us_count;
+	__le64 rx_pripause_2_1us_count;
+	__le64 rx_pripause_3_1us_count;
+	__le64 rx_pripause_4_1us_count;
+	__le64 rx_pripause_5_1us_count;
+	__le64 rx_pripause_6_1us_count;
+	__le64 rx_pripause_7_1us_count;
+	__le64 rx_pause_1us_count;
+	__le64 frames_tx_truncated;
+};
+
+struct ionic_mgmt_port_stats {
+	__le64 frames_rx_ok;
+	__le64 frames_rx_all;
+	__le64 frames_rx_bad_fcs;
+	__le64 frames_rx_bad_all;
+	__le64 octets_rx_ok;
+	__le64 octets_rx_all;
+	__le64 frames_rx_unicast;
+	__le64 frames_rx_multicast;
+	__le64 frames_rx_broadcast;
+	__le64 frames_rx_pause;
+	__le64 frames_rx_bad_length0;
+	__le64 frames_rx_undersized1;
+	__le64 frames_rx_oversized2;
+	__le64 frames_rx_fragments3;
+	__le64 frames_rx_jabber4;
+	__le64 frames_rx_64b5;
+	__le64 frames_rx_65b_127b6;
+	__le64 frames_rx_128b_255b7;
+	__le64 frames_rx_256b_511b8;
+	__le64 frames_rx_512b_1023b9;
+	__le64 frames_rx_1024b_1518b0;
+	__le64 frames_rx_gt_1518b1;
+	__le64 frames_rx_fifo_full2;
+	__le64 frames_tx_ok3;
+	__le64 frames_tx_all4;
+	__le64 frames_tx_bad5;
+	__le64 octets_tx_ok6;
+	__le64 octets_tx_total7;
+	__le64 frames_tx_unicast8;
+	__le64 frames_tx_multicast9;
+	__le64 frames_tx_broadcast0;
+	__le64 frames_tx_pause1;
+};
+
+/**
+ * struct port_identity - port identity structure
+ * @version:        identity structure version
+ * @type:           type of port (enum port_type)
+ * @num_lanes:      number of lanes for the port
+ * @autoneg:        autoneg supported
+ * @min_frame_size: minimum frame size supported
+ * @max_frame_size: maximum frame size supported
+ * @fec_type:       supported fec types
+ * @pause_type:     supported pause types
+ * @loopback_mode:  supported loopback mode
+ * @speeds:         supported speeds
+ * @config:         current port configuration
+ */
+union ionic_port_identity {
+	struct {
+		u8     version;
+		u8     type;
+		u8     num_lanes;
+		u8     autoneg;
+		__le32 min_frame_size;
+		__le32 max_frame_size;
+		u8     fec_type[4];
+		u8     pause_type[2];
+		u8     loopback_mode[2];
+		__le32 speeds[16];
+		u8     rsvd2[44];
+		union ionic_port_config config;
+	};
+	__le32 words[512];
+};
+
+/**
+ * struct port_info - port info structure
+ * @port_status:     port status
+ * @port_stats:      port stats
+ */
+struct ionic_port_info {
+	union ionic_port_config config;
+	struct ionic_port_status status;
+	struct ionic_port_stats stats;
+};
+
+/**
+ * struct lif_stats
+ */
+struct ionic_lif_stats {
+	/* RX */
+	__le64 rx_ucast_bytes;
+	__le64 rx_ucast_packets;
+	__le64 rx_mcast_bytes;
+	__le64 rx_mcast_packets;
+	__le64 rx_bcast_bytes;
+	__le64 rx_bcast_packets;
+	__le64 rsvd0;
+	__le64 rsvd1;
+	/* RX drops */
+	__le64 rx_ucast_drop_bytes;
+	__le64 rx_ucast_drop_packets;
+	__le64 rx_mcast_drop_bytes;
+	__le64 rx_mcast_drop_packets;
+	__le64 rx_bcast_drop_bytes;
+	__le64 rx_bcast_drop_packets;
+	__le64 rx_dma_error;
+	__le64 rsvd2;
+	/* TX */
+	__le64 tx_ucast_bytes;
+	__le64 tx_ucast_packets;
+	__le64 tx_mcast_bytes;
+	__le64 tx_mcast_packets;
+	__le64 tx_bcast_bytes;
+	__le64 tx_bcast_packets;
+	__le64 rsvd3;
+	__le64 rsvd4;
+	/* TX drops */
+	__le64 tx_ucast_drop_bytes;
+	__le64 tx_ucast_drop_packets;
+	__le64 tx_mcast_drop_bytes;
+	__le64 tx_mcast_drop_packets;
+	__le64 tx_bcast_drop_bytes;
+	__le64 tx_bcast_drop_packets;
+	__le64 tx_dma_error;
+	__le64 rsvd5;
+	/* Rx Queue/Ring drops */
+	__le64 rx_queue_disabled;
+	__le64 rx_queue_empty;
+	__le64 rx_queue_error;
+	__le64 rx_desc_fetch_error;
+	__le64 rx_desc_data_error;
+	__le64 rsvd6;
+	__le64 rsvd7;
+	__le64 rsvd8;
+	/* Tx Queue/Ring drops */
+	__le64 tx_queue_disabled;
+	__le64 tx_queue_error;
+	__le64 tx_desc_fetch_error;
+	__le64 tx_desc_data_error;
+	__le64 rsvd9;
+	__le64 rsvd10;
+	__le64 rsvd11;
+	__le64 rsvd12;
+
+	/* RDMA/ROCE TX */
+	__le64 tx_rdma_ucast_bytes;
+	__le64 tx_rdma_ucast_packets;
+	__le64 tx_rdma_mcast_bytes;
+	__le64 tx_rdma_mcast_packets;
+	__le64 tx_rdma_cnp_packets;
+	__le64 rsvd13;
+	__le64 rsvd14;
+	__le64 rsvd15;
+
+	/* RDMA/ROCE RX */
+	__le64 rx_rdma_ucast_bytes;
+	__le64 rx_rdma_ucast_packets;
+	__le64 rx_rdma_mcast_bytes;
+	__le64 rx_rdma_mcast_packets;
+	__le64 rx_rdma_cnp_packets;
+	__le64 rx_rdma_ecn_packets;
+	__le64 rsvd16;
+	__le64 rsvd17;
+
+	__le64 rsvd18;
+	__le64 rsvd19;
+	__le64 rsvd20;
+	__le64 rsvd21;
+	__le64 rsvd22;
+	__le64 rsvd23;
+	__le64 rsvd24;
+	__le64 rsvd25;
+
+	__le64 rsvd26;
+	__le64 rsvd27;
+	__le64 rsvd28;
+	__le64 rsvd29;
+	__le64 rsvd30;
+	__le64 rsvd31;
+	__le64 rsvd32;
+	__le64 rsvd33;
+
+	__le64 rsvd34;
+	__le64 rsvd35;
+	__le64 rsvd36;
+	__le64 rsvd37;
+	__le64 rsvd38;
+	__le64 rsvd39;
+	__le64 rsvd40;
+	__le64 rsvd41;
+
+	__le64 rsvd42;
+	__le64 rsvd43;
+	__le64 rsvd44;
+	__le64 rsvd45;
+	__le64 rsvd46;
+	__le64 rsvd47;
+	__le64 rsvd48;
+	__le64 rsvd49;
+
+	/* RDMA/ROCE REQ Error/Debugs (768 - 895) */
+	__le64 rdma_req_rx_pkt_seq_err;
+	__le64 rdma_req_rx_rnr_retry_err;
+	__le64 rdma_req_rx_remote_access_err;
+	__le64 rdma_req_rx_remote_inv_req_err;
+	__le64 rdma_req_rx_remote_oper_err;
+	__le64 rdma_req_rx_implied_nak_seq_err;
+	__le64 rdma_req_rx_cqe_err;
+	__le64 rdma_req_rx_cqe_flush_err;
+
+	__le64 rdma_req_rx_dup_responses;
+	__le64 rdma_req_rx_invalid_packets;
+	__le64 rdma_req_tx_local_access_err;
+	__le64 rdma_req_tx_local_oper_err;
+	__le64 rdma_req_tx_memory_mgmt_err;
+	__le64 rsvd52;
+	__le64 rsvd53;
+	__le64 rsvd54;
+
+	/* RDMA/ROCE RESP Error/Debugs (896 - 1023) */
+	__le64 rdma_resp_rx_dup_requests;
+	__le64 rdma_resp_rx_out_of_buffer;
+	__le64 rdma_resp_rx_out_of_seq_pkts;
+	__le64 rdma_resp_rx_cqe_err;
+	__le64 rdma_resp_rx_cqe_flush_err;
+	__le64 rdma_resp_rx_local_len_err;
+	__le64 rdma_resp_rx_inv_request_err;
+	__le64 rdma_resp_rx_local_qp_oper_err;
+
+	__le64 rdma_resp_rx_out_of_atomic_resource;
+	__le64 rdma_resp_tx_pkt_seq_err;
+	__le64 rdma_resp_tx_remote_inv_req_err;
+	__le64 rdma_resp_tx_remote_access_err;
+	__le64 rdma_resp_tx_remote_oper_err;
+	__le64 rdma_resp_tx_rnr_retry_err;
+	__le64 rsvd57;
+	__le64 rsvd58;
+};
+
+/**
+ * struct lif_info - lif info structure
+ */
+struct ionic_lif_info {
+	union ionic_lif_config config;
+	struct ionic_lif_status status;
+	struct ionic_lif_stats stats;
+};
+
+union ionic_dev_cmd {
+	u32 words[16];
+	struct ionic_admin_cmd cmd;
+	struct ionic_nop_cmd nop;
+
+	struct ionic_dev_identify_cmd identify;
+	struct ionic_dev_init_cmd init;
+	struct ionic_dev_reset_cmd reset;
+	struct ionic_dev_getattr_cmd getattr;
+	struct ionic_dev_setattr_cmd setattr;
+
+	struct ionic_port_identify_cmd port_identify;
+	struct ionic_port_init_cmd port_init;
+	struct ionic_port_reset_cmd port_reset;
+	struct ionic_port_getattr_cmd port_getattr;
+	struct ionic_port_setattr_cmd port_setattr;
+
+	struct ionic_lif_identify_cmd lif_identify;
+	struct ionic_lif_init_cmd lif_init;
+	struct ionic_lif_reset_cmd lif_reset;
+
+	struct ionic_qos_identify_cmd qos_identify;
+	struct ionic_qos_init_cmd qos_init;
+	struct ionic_qos_reset_cmd qos_reset;
+
+	struct ionic_q_init_cmd q_init;
+};
+
+union ionic_dev_cmd_comp {
+	u32 words[4];
+	u8 status;
+	struct ionic_admin_comp comp;
+	struct ionic_nop_comp nop;
+
+	struct ionic_dev_identify_comp identify;
+	struct ionic_dev_init_comp init;
+	struct ionic_dev_reset_comp reset;
+	struct ionic_dev_getattr_comp getattr;
+	struct ionic_dev_setattr_comp setattr;
+
+	struct ionic_port_identify_comp port_identify;
+	struct ionic_port_init_comp port_init;
+	struct ionic_port_reset_comp port_reset;
+	struct ionic_port_getattr_comp port_getattr;
+	struct ionic_port_setattr_comp port_setattr;
+
+	struct ionic_lif_identify_comp lif_identify;
+	struct ionic_lif_init_comp lif_init;
+	ionic_lif_reset_comp lif_reset;
+
+	struct ionic_qos_identify_comp qos_identify;
+	ionic_qos_init_comp qos_init;
+	ionic_qos_reset_comp qos_reset;
+
+	struct ionic_q_init_comp q_init;
+};
+
+/**
+ * union dev_info - Device info register format (read-only)
+ * @signature:       Signature value of 0x44455649 ('DEVI').
+ * @version:         Current version of info.
+ * @asic_type:       Asic type.
+ * @asic_rev:        Asic revision.
+ * @fw_status:       Firmware status.
+ * @fw_heartbeat:    Firmware heartbeat counter.
+ * @serial_num:      Serial number.
+ * @fw_version:      Firmware version.
+ */
+union ionic_dev_info_regs {
+#define IONIC_DEVINFO_FWVERS_BUFLEN 32
+#define IONIC_DEVINFO_SERIAL_BUFLEN 32
+	struct {
+		u32    signature;
+		u8     version;
+		u8     asic_type;
+		u8     asic_rev;
+		u8     fw_status;
+		u32    fw_heartbeat;
+		char   fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
+		char   serial_num[IONIC_DEVINFO_SERIAL_BUFLEN];
+	};
+	u32 words[512];
+};
+
+/**
+ * union dev_cmd_regs - Device command register format (read-write)
+ * @doorbell:        Device Cmd Doorbell, write-only.
+ *                   Write a 1 to signal device to process cmd,
+ *                   poll done for completion.
+ * @done:            Done indicator, bit 0 == 1 when command is complete.
+ * @cmd:             Opcode-specific command bytes
+ * @comp:            Opcode-specific response bytes
+ * @data:            Opcode-specific side-data
+ */
+union ionic_dev_cmd_regs {
+	struct {
+		u32                   doorbell;
+		u32                   done;
+		union ionic_dev_cmd         cmd;
+		union ionic_dev_cmd_comp    comp;
+		u8                    rsvd[48];
+		u32                   data[478];
+	};
+	u32 words[512];
+};
+
+/**
+ * union dev_regs - Device register format in for bar 0 page 0
+ * @info:            Device info registers
+ * @devcmd:          Device command registers
+ */
+union ionic_dev_regs {
+	struct {
+		union ionic_dev_info_regs info;
+		union ionic_dev_cmd_regs  devcmd;
+	};
+	__le32 words[1024];
+};
+
+union ionic_adminq_cmd {
+	struct ionic_admin_cmd cmd;
+	struct ionic_nop_cmd nop;
+	struct ionic_q_init_cmd q_init;
+	struct ionic_q_control_cmd q_control;
+	struct ionic_lif_setattr_cmd lif_setattr;
+	struct ionic_lif_getattr_cmd lif_getattr;
+	struct ionic_rx_mode_set_cmd rx_mode_set;
+	struct ionic_rx_filter_add_cmd rx_filter_add;
+	struct ionic_rx_filter_del_cmd rx_filter_del;
+	struct ionic_rdma_reset_cmd rdma_reset;
+	struct ionic_rdma_queue_cmd rdma_queue;
+	struct ionic_fw_download_cmd fw_download;
+	struct ionic_fw_control_cmd fw_control;
+};
+
+union ionic_adminq_comp {
+	struct ionic_admin_comp comp;
+	struct ionic_nop_comp nop;
+	struct ionic_q_init_comp q_init;
+	struct ionic_lif_setattr_comp lif_setattr;
+	struct ionic_lif_getattr_comp lif_getattr;
+	struct ionic_rx_filter_add_comp rx_filter_add;
+	struct ionic_fw_control_comp fw_control;
+};
+
+#define IONIC_BARS_MAX			6
+#define IONIC_PCI_BAR_DBELL		1
+
+/* BAR0 */
+#define IONIC_BAR0_SIZE				0x8000
+
+#define IONIC_BAR0_DEV_INFO_REGS_OFFSET		0x0000
+#define IONIC_BAR0_DEV_CMD_REGS_OFFSET		0x0800
+#define IONIC_BAR0_DEV_CMD_DATA_REGS_OFFSET	0x0c00
+#define IONIC_BAR0_INTR_STATUS_OFFSET		0x1000
+#define IONIC_BAR0_INTR_CTRL_OFFSET		0x2000
+#define IONIC_DEV_CMD_DONE			0x00000001
+
+#define IONIC_ASIC_TYPE_CAPRI			0
+
+/**
+ * struct doorbell - Doorbell register layout
+ * @p_index: Producer index
+ * @ring:    Selects the specific ring of the queue to update.
+ *           Type-specific meaning:
+ *              ring=0: Default producer/consumer queue.
+ *              ring=1: (CQ, EQ) Re-Arm queue.  RDMA CQs
+ *              send events to EQs when armed.  EQs send
+ *              interrupts when armed.
+ * @qid:     The queue id selects the queue destination for the
+ *           producer index and flags.
+ */
+struct ionic_doorbell {
+	__le16 p_index;
+	u8     ring;
+	u8     qid_lo;
+	__le16 qid_hi;
+	u16    rsvd2;
+};
+
+struct ionic_intr_status {
+	u32 status[2];
+};
+
+struct ionic_notifyq_cmd {
+	__le32 data;	/* Not used but needed for qcq structure */
+};
+
+union ionic_notifyq_comp {
+	struct ionic_notifyq_event event;
+	struct ionic_link_change_event link_change;
+	struct ionic_reset_event reset;
+	struct ionic_heartbeat_event heartbeat;
+	struct ionic_log_event log;
+};
+
+/* Deprecate */
+struct ionic_identity {
+	union ionic_drv_identity drv;
+	union ionic_dev_identity dev;
+	union ionic_lif_identity lif;
+	union ionic_port_identity port;
+	union ionic_qos_identity qos;
+};
+
+#pragma pack(pop)
+
+#endif /* _IONIC_IF_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
new file mode 100644
index 0000000..20faa8d
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c

@@ -0,0 +1,2277 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/cpumask.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_txrx.h"
+#include "ionic_ethtool.h"
+#include "ionic_debugfs.h"
+
+static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
+static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
+static void ionic_link_status_check(struct ionic_lif *lif);
+
+static void ionic_lif_deferred_work(struct work_struct *work)
+{
+	struct ionic_lif *lif = container_of(work, struct ionic_lif, deferred.work);
+	struct ionic_deferred *def = &lif->deferred;
+	struct ionic_deferred_work *w = NULL;
+
+	spin_lock_bh(&def->lock);
+	if (!list_empty(&def->list)) {
+		w = list_first_entry(&def->list,
+				     struct ionic_deferred_work, list);
+		list_del(&w->list);
+	}
+	spin_unlock_bh(&def->lock);
+
+	if (w) {
+		switch (w->type) {
+		case IONIC_DW_TYPE_RX_MODE:
+			ionic_lif_rx_mode(lif, w->rx_mode);
+			break;
+		case IONIC_DW_TYPE_RX_ADDR_ADD:
+			ionic_lif_addr_add(lif, w->addr);
+			break;
+		case IONIC_DW_TYPE_RX_ADDR_DEL:
+			ionic_lif_addr_del(lif, w->addr);
+			break;
+		case IONIC_DW_TYPE_LINK_STATUS:
+			ionic_link_status_check(lif);
+			break;
+		default:
+			break;
+		}
+		kfree(w);
+		schedule_work(&def->work);
+	}
+}
+
+static void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
+				       struct ionic_deferred_work *work)
+{
+	spin_lock_bh(&def->lock);
+	list_add_tail(&work->list, &def->list);
+	spin_unlock_bh(&def->lock);
+	schedule_work(&def->work);
+}
+
+static void ionic_link_status_check(struct ionic_lif *lif)
+{
+	struct net_device *netdev = lif->netdev;
+	u16 link_status;
+	bool link_up;
+
+	link_status = le16_to_cpu(lif->info->status.link_status);
+	link_up = link_status == IONIC_PORT_OPER_STATUS_UP;
+
+	/* filter out the no-change cases */
+	if (link_up == netif_carrier_ok(netdev))
+		goto link_out;
+
+	if (link_up) {
+		netdev_info(netdev, "Link up - %d Gbps\n",
+			    le32_to_cpu(lif->info->status.link_speed) / 1000);
+
+		if (test_bit(IONIC_LIF_UP, lif->state)) {
+			netif_tx_wake_all_queues(lif->netdev);
+			netif_carrier_on(netdev);
+		}
+	} else {
+		netdev_info(netdev, "Link down\n");
+
+		/* carrier off first to avoid watchdog timeout */
+		netif_carrier_off(netdev);
+		if (test_bit(IONIC_LIF_UP, lif->state))
+			netif_tx_stop_all_queues(netdev);
+	}
+
+link_out:
+	clear_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state);
+}
+
+static void ionic_link_status_check_request(struct ionic_lif *lif)
+{
+	struct ionic_deferred_work *work;
+
+	/* we only need one request outstanding at a time */
+	if (test_and_set_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state))
+		return;
+
+	if (in_interrupt()) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work)
+			return;
+
+		work->type = IONIC_DW_TYPE_LINK_STATUS;
+		ionic_lif_deferred_enqueue(&lif->deferred, work);
+	} else {
+		ionic_link_status_check(lif);
+	}
+}
+
+static irqreturn_t ionic_isr(int irq, void *data)
+{
+	struct napi_struct *napi = data;
+
+	napi_schedule_irqoff(napi);
+
+	return IRQ_HANDLED;
+}
+
+static int ionic_request_irq(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct ionic_intr_info *intr = &qcq->intr;
+	struct device *dev = lif->ionic->dev;
+	struct ionic_queue *q = &qcq->q;
+	const char *name;
+
+	if (lif->registered)
+		name = lif->netdev->name;
+	else
+		name = dev_name(dev);
+
+	snprintf(intr->name, sizeof(intr->name),
+		 "%s-%s-%s", IONIC_DRV_NAME, name, q->name);
+
+	return devm_request_irq(dev, intr->vector, ionic_isr,
+				0, intr->name, &qcq->napi);
+}
+
+static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
+{
+	struct ionic *ionic = lif->ionic;
+	int index;
+
+	index = find_first_zero_bit(ionic->intrs, ionic->nintrs);
+	if (index == ionic->nintrs) {
+		netdev_warn(lif->netdev, "%s: no intr, index=%d nintrs=%d\n",
+			    __func__, index, ionic->nintrs);
+		return -ENOSPC;
+	}
+
+	set_bit(index, ionic->intrs);
+	ionic_intr_init(&ionic->idev, intr, index);
+
+	return 0;
+}
+
+static void ionic_intr_free(struct ionic_lif *lif, int index)
+{
+	if (index != INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs)
+		clear_bit(index, lif->ionic->intrs);
+}
+
+static int ionic_qcq_enable(struct ionic_qcq *qcq)
+{
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_lif *lif = q->lif;
+	struct ionic_dev *idev;
+	struct device *dev;
+
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.q_control = {
+			.opcode = IONIC_CMD_Q_CONTROL,
+			.lif_index = cpu_to_le16(lif->index),
+			.type = q->type,
+			.index = cpu_to_le32(q->index),
+			.oper = IONIC_Q_ENABLE,
+		},
+	};
+
+	idev = &lif->ionic->idev;
+	dev = lif->ionic->dev;
+
+	dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n",
+		ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
+	if (qcq->flags & IONIC_QCQ_F_INTR) {
+		irq_set_affinity_hint(qcq->intr.vector,
+				      &qcq->intr.affinity_mask);
+		napi_enable(&qcq->napi);
+		ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
+		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+				IONIC_INTR_MASK_CLEAR);
+	}
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_qcq_disable(struct ionic_qcq *qcq)
+{
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_lif *lif = q->lif;
+	struct ionic_dev *idev;
+	struct device *dev;
+
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.q_control = {
+			.opcode = IONIC_CMD_Q_CONTROL,
+			.lif_index = cpu_to_le16(lif->index),
+			.type = q->type,
+			.index = cpu_to_le32(q->index),
+			.oper = IONIC_Q_DISABLE,
+		},
+	};
+
+	idev = &lif->ionic->idev;
+	dev = lif->ionic->dev;
+
+	dev_dbg(dev, "q_disable.index %d q_disable.qtype %d\n",
+		ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
+	if (qcq->flags & IONIC_QCQ_F_INTR) {
+		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+				IONIC_INTR_MASK_SET);
+		synchronize_irq(qcq->intr.vector);
+		irq_set_affinity_hint(qcq->intr.vector, NULL);
+		napi_disable(&qcq->napi);
+	}
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct device *dev = lif->ionic->dev;
+
+	if (!qcq)
+		return;
+
+	ionic_debugfs_del_qcq(qcq);
+
+	if (!(qcq->flags & IONIC_QCQ_F_INITED))
+		return;
+
+	if (qcq->flags & IONIC_QCQ_F_INTR) {
+		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+				IONIC_INTR_MASK_SET);
+		devm_free_irq(dev, qcq->intr.vector, &qcq->napi);
+		netif_napi_del(&qcq->napi);
+	}
+
+	qcq->flags &= ~IONIC_QCQ_F_INITED;
+}
+
+static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct device *dev = lif->ionic->dev;
+
+	if (!qcq)
+		return;
+
+	dma_free_coherent(dev, qcq->total_size, qcq->base, qcq->base_pa);
+	qcq->base = NULL;
+	qcq->base_pa = 0;
+
+	if (qcq->flags & IONIC_QCQ_F_INTR)
+		ionic_intr_free(lif, qcq->intr.index);
+
+	devm_kfree(dev, qcq->cq.info);
+	qcq->cq.info = NULL;
+	devm_kfree(dev, qcq->q.info);
+	qcq->q.info = NULL;
+	devm_kfree(dev, qcq);
+}
+
+static void ionic_qcqs_free(struct ionic_lif *lif)
+{
+	struct device *dev = lif->ionic->dev;
+	unsigned int i;
+
+	if (lif->notifyqcq) {
+		ionic_qcq_free(lif, lif->notifyqcq);
+		lif->notifyqcq = NULL;
+	}
+
+	if (lif->adminqcq) {
+		ionic_qcq_free(lif, lif->adminqcq);
+		lif->adminqcq = NULL;
+	}
+
+	for (i = 0; i < lif->nxqs; i++)
+		if (lif->rxqcqs[i].stats)
+			devm_kfree(dev, lif->rxqcqs[i].stats);
+
+	devm_kfree(dev, lif->rxqcqs);
+	lif->rxqcqs = NULL;
+
+	for (i = 0; i < lif->nxqs; i++)
+		if (lif->txqcqs[i].stats)
+			devm_kfree(dev, lif->txqcqs[i].stats);
+
+	devm_kfree(dev, lif->txqcqs);
+	lif->txqcqs = NULL;
+}
+
+static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
+				      struct ionic_qcq *n_qcq)
+{
+	if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
+		ionic_intr_free(n_qcq->cq.lif, n_qcq->intr.index);
+		n_qcq->flags &= ~IONIC_QCQ_F_INTR;
+	}
+
+	n_qcq->intr.vector = src_qcq->intr.vector;
+	n_qcq->intr.index = src_qcq->intr.index;
+}
+
+static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
+			   unsigned int index,
+			   const char *name, unsigned int flags,
+			   unsigned int num_descs, unsigned int desc_size,
+			   unsigned int cq_desc_size,
+			   unsigned int sg_desc_size,
+			   unsigned int pid, struct ionic_qcq **qcq)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+	u32 q_size, cq_size, sg_size, total_size;
+	struct device *dev = lif->ionic->dev;
+	void *q_base, *cq_base, *sg_base;
+	dma_addr_t cq_base_pa = 0;
+	dma_addr_t sg_base_pa = 0;
+	dma_addr_t q_base_pa = 0;
+	struct ionic_qcq *new;
+	int err;
+
+	*qcq = NULL;
+
+	q_size  = num_descs * desc_size;
+	cq_size = num_descs * cq_desc_size;
+	sg_size = num_descs * sg_desc_size;
+
+	total_size = ALIGN(q_size, PAGE_SIZE) + ALIGN(cq_size, PAGE_SIZE);
+	/* Note: aligning q_size/cq_size is not enough due to cq_base
+	 * address aligning as q_base could be not aligned to the page.
+	 * Adding PAGE_SIZE.
+	 */
+	total_size += PAGE_SIZE;
+	if (flags & IONIC_QCQ_F_SG) {
+		total_size += ALIGN(sg_size, PAGE_SIZE);
+		total_size += PAGE_SIZE;
+	}
+
+	new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
+	if (!new) {
+		netdev_err(lif->netdev, "Cannot allocate queue structure\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	new->flags = flags;
+
+	new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs,
+				   GFP_KERNEL);
+	if (!new->q.info) {
+		netdev_err(lif->netdev, "Cannot allocate queue info\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	new->q.type = type;
+
+	err = ionic_q_init(lif, idev, &new->q, index, name, num_descs,
+			   desc_size, sg_desc_size, pid);
+	if (err) {
+		netdev_err(lif->netdev, "Cannot initialize queue\n");
+		goto err_out;
+	}
+
+	if (flags & IONIC_QCQ_F_INTR) {
+		err = ionic_intr_alloc(lif, &new->intr);
+		if (err) {
+			netdev_warn(lif->netdev, "no intr for %s: %d\n",
+				    name, err);
+			goto err_out;
+		}
+
+		err = ionic_bus_get_irq(lif->ionic, new->intr.index);
+		if (err < 0) {
+			netdev_warn(lif->netdev, "no vector for %s: %d\n",
+				    name, err);
+			goto err_out_free_intr;
+		}
+		new->intr.vector = err;
+		ionic_intr_mask_assert(idev->intr_ctrl, new->intr.index,
+				       IONIC_INTR_MASK_SET);
+
+		new->intr.cpu = new->intr.index % num_online_cpus();
+		if (cpu_online(new->intr.cpu))
+			cpumask_set_cpu(new->intr.cpu,
+					&new->intr.affinity_mask);
+	} else {
+		new->intr.index = INTR_INDEX_NOT_ASSIGNED;
+	}
+
+	new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs,
+				    GFP_KERNEL);
+	if (!new->cq.info) {
+		netdev_err(lif->netdev, "Cannot allocate completion queue info\n");
+		err = -ENOMEM;
+		goto err_out_free_intr;
+	}
+
+	err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
+	if (err) {
+		netdev_err(lif->netdev, "Cannot initialize completion queue\n");
+		goto err_out_free_intr;
+	}
+
+	new->base = dma_alloc_coherent(dev, total_size, &new->base_pa,
+				       GFP_KERNEL);
+	if (!new->base) {
+		netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
+		err = -ENOMEM;
+		goto err_out_free_intr;
+	}
+
+	new->total_size = total_size;
+
+	q_base = new->base;
+	q_base_pa = new->base_pa;
+
+	cq_base = (void *)ALIGN((uintptr_t)q_base + q_size, PAGE_SIZE);
+	cq_base_pa = ALIGN(q_base_pa + q_size, PAGE_SIZE);
+
+	if (flags & IONIC_QCQ_F_SG) {
+		sg_base = (void *)ALIGN((uintptr_t)cq_base + cq_size,
+					PAGE_SIZE);
+		sg_base_pa = ALIGN(cq_base_pa + cq_size, PAGE_SIZE);
+		ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
+	}
+
+	ionic_q_map(&new->q, q_base, q_base_pa);
+	ionic_cq_map(&new->cq, cq_base, cq_base_pa);
+	ionic_cq_bind(&new->cq, &new->q);
+
+	*qcq = new;
+
+	return 0;
+
+err_out_free_intr:
+	ionic_intr_free(lif, new->intr.index);
+err_out:
+	dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
+	return err;
+}
+
+static int ionic_qcqs_alloc(struct ionic_lif *lif)
+{
+	struct device *dev = lif->ionic->dev;
+	unsigned int q_list_size;
+	unsigned int flags;
+	int err;
+	int i;
+
+	flags = IONIC_QCQ_F_INTR;
+	err = ionic_qcq_alloc(lif, IONIC_QTYPE_ADMINQ, 0, "admin", flags,
+			      IONIC_ADMINQ_LENGTH,
+			      sizeof(struct ionic_admin_cmd),
+			      sizeof(struct ionic_admin_comp),
+			      0, lif->kern_pid, &lif->adminqcq);
+	if (err)
+		return err;
+
+	if (lif->ionic->nnqs_per_lif) {
+		flags = IONIC_QCQ_F_NOTIFYQ;
+		err = ionic_qcq_alloc(lif, IONIC_QTYPE_NOTIFYQ, 0, "notifyq",
+				      flags, IONIC_NOTIFYQ_LENGTH,
+				      sizeof(struct ionic_notifyq_cmd),
+				      sizeof(union ionic_notifyq_comp),
+				      0, lif->kern_pid, &lif->notifyqcq);
+		if (err)
+			goto err_out_free_adminqcq;
+
+		/* Let the notifyq ride on the adminq interrupt */
+		ionic_link_qcq_interrupts(lif->adminqcq, lif->notifyqcq);
+	}
+
+	q_list_size = sizeof(*lif->txqcqs) * lif->nxqs;
+	err = -ENOMEM;
+	lif->txqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+	if (!lif->txqcqs)
+		goto err_out_free_notifyqcq;
+	for (i = 0; i < lif->nxqs; i++) {
+		lif->txqcqs[i].stats = devm_kzalloc(dev,
+						    sizeof(struct ionic_q_stats),
+						    GFP_KERNEL);
+		if (!lif->txqcqs[i].stats)
+			goto err_out_free_tx_stats;
+	}
+
+	lif->rxqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+	if (!lif->rxqcqs)
+		goto err_out_free_tx_stats;
+	for (i = 0; i < lif->nxqs; i++) {
+		lif->rxqcqs[i].stats = devm_kzalloc(dev,
+						    sizeof(struct ionic_q_stats),
+						    GFP_KERNEL);
+		if (!lif->rxqcqs[i].stats)
+			goto err_out_free_rx_stats;
+	}
+
+	return 0;
+
+err_out_free_rx_stats:
+	for (i = 0; i < lif->nxqs; i++)
+		if (lif->rxqcqs[i].stats)
+			devm_kfree(dev, lif->rxqcqs[i].stats);
+	devm_kfree(dev, lif->rxqcqs);
+	lif->rxqcqs = NULL;
+err_out_free_tx_stats:
+	for (i = 0; i < lif->nxqs; i++)
+		if (lif->txqcqs[i].stats)
+			devm_kfree(dev, lif->txqcqs[i].stats);
+	devm_kfree(dev, lif->txqcqs);
+	lif->txqcqs = NULL;
+err_out_free_notifyqcq:
+	if (lif->notifyqcq) {
+		ionic_qcq_free(lif, lif->notifyqcq);
+		lif->notifyqcq = NULL;
+	}
+err_out_free_adminqcq:
+	ionic_qcq_free(lif, lif->adminqcq);
+	lif->adminqcq = NULL;
+
+	return err;
+}
+
+static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct device *dev = lif->ionic->dev;
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_cq *cq = &qcq->cq;
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.q_init = {
+			.opcode = IONIC_CMD_Q_INIT,
+			.lif_index = cpu_to_le16(lif->index),
+			.type = q->type,
+			.index = cpu_to_le32(q->index),
+			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+					     IONIC_QINIT_F_SG),
+			.intr_index = cpu_to_le16(lif->rxqcqs[q->index].qcq->intr.index),
+			.pid = cpu_to_le16(q->pid),
+			.ring_size = ilog2(q->num_descs),
+			.ring_base = cpu_to_le64(q->base_pa),
+			.cq_ring_base = cpu_to_le64(cq->base_pa),
+			.sg_ring_base = cpu_to_le64(q->sg_base_pa),
+		},
+	};
+	int err;
+
+	dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
+	dev_dbg(dev, "txq_init.index %d\n", ctx.cmd.q_init.index);
+	dev_dbg(dev, "txq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+	dev_dbg(dev, "txq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	q->hw_type = ctx.comp.q_init.hw_type;
+	q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+	q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+	dev_dbg(dev, "txq->hw_type %d\n", q->hw_type);
+	dev_dbg(dev, "txq->hw_index %d\n", q->hw_index);
+
+	qcq->flags |= IONIC_QCQ_F_INITED;
+
+	ionic_debugfs_add_qcq(lif, qcq);
+
+	return 0;
+}
+
+static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+	struct device *dev = lif->ionic->dev;
+	struct ionic_queue *q = &qcq->q;
+	struct ionic_cq *cq = &qcq->cq;
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.q_init = {
+			.opcode = IONIC_CMD_Q_INIT,
+			.lif_index = cpu_to_le16(lif->index),
+			.type = q->type,
+			.index = cpu_to_le32(q->index),
+			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ),
+			.intr_index = cpu_to_le16(cq->bound_intr->index),
+			.pid = cpu_to_le16(q->pid),
+			.ring_size = ilog2(q->num_descs),
+			.ring_base = cpu_to_le64(q->base_pa),
+			.cq_ring_base = cpu_to_le64(cq->base_pa),
+		},
+	};
+	int err;
+
+	dev_dbg(dev, "rxq_init.pid %d\n", ctx.cmd.q_init.pid);
+	dev_dbg(dev, "rxq_init.index %d\n", ctx.cmd.q_init.index);
+	dev_dbg(dev, "rxq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+	dev_dbg(dev, "rxq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	q->hw_type = ctx.comp.q_init.hw_type;
+	q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+	q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+	dev_dbg(dev, "rxq->hw_type %d\n", q->hw_type);
+	dev_dbg(dev, "rxq->hw_index %d\n", q->hw_index);
+
+	netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi,
+		       NAPI_POLL_WEIGHT);
+
+	err = ionic_request_irq(lif, qcq);
+	if (err) {
+		netif_napi_del(&qcq->napi);
+		return err;
+	}
+
+	qcq->flags |= IONIC_QCQ_F_INITED;
+
+	ionic_debugfs_add_qcq(lif, qcq);
+
+	return 0;
+}
+
+static bool ionic_notifyq_service(struct ionic_cq *cq,
+				  struct ionic_cq_info *cq_info)
+{
+	union ionic_notifyq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev;
+	struct ionic_queue *q;
+	struct ionic_lif *lif;
+	u64 eid;
+
+	q = cq->bound_q;
+	lif = q->info[0].cb_arg;
+	netdev = lif->netdev;
+	eid = le64_to_cpu(comp->event.eid);
+
+	/* Have we run out of new completions to process? */
+	if (eid <= lif->last_eid)
+		return false;
+
+	lif->last_eid = eid;
+
+	dev_dbg(lif->ionic->dev, "notifyq event:\n");
+	dynamic_hex_dump("event ", DUMP_PREFIX_OFFSET, 16, 1,
+			 comp, sizeof(*comp), true);
+
+	switch (le16_to_cpu(comp->event.ecode)) {
+	case IONIC_EVENT_LINK_CHANGE:
+		ionic_link_status_check_request(lif);
+		break;
+	case IONIC_EVENT_RESET:
+		netdev_info(netdev, "Notifyq IONIC_EVENT_RESET eid=%lld\n",
+			    eid);
+		netdev_info(netdev, "  reset_code=%d state=%d\n",
+			    comp->reset.reset_code,
+			    comp->reset.state);
+		break;
+	default:
+		netdev_warn(netdev, "Notifyq unknown event ecode=%d eid=%lld\n",
+			    comp->event.ecode, eid);
+		break;
+	}
+
+	return true;
+}
+
+static int ionic_notifyq_clean(struct ionic_lif *lif, int budget)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct ionic_cq *cq = &lif->notifyqcq->cq;
+	u32 work_done;
+
+	work_done = ionic_cq_service(cq, budget, ionic_notifyq_service,
+				     NULL, NULL);
+	if (work_done)
+		ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+				   work_done, IONIC_INTR_CRED_RESET_COALESCE);
+
+	return work_done;
+}
+
+static bool ionic_adminq_service(struct ionic_cq *cq,
+				 struct ionic_cq_info *cq_info)
+{
+	struct ionic_admin_comp *comp = cq_info->cq_desc;
+
+	if (!color_match(comp->color, cq->done_color))
+		return false;
+
+	ionic_q_service(cq->bound_q, cq_info, le16_to_cpu(comp->comp_index));
+
+	return true;
+}
+
+static int ionic_adminq_napi(struct napi_struct *napi, int budget)
+{
+	struct ionic_lif *lif = napi_to_cq(napi)->lif;
+	int n_work = 0;
+	int a_work = 0;
+
+	if (likely(lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED))
+		n_work = ionic_notifyq_clean(lif, budget);
+	a_work = ionic_napi(napi, budget, ionic_adminq_service, NULL, NULL);
+
+	return max(n_work, a_work);
+}
+
+static void ionic_get_stats64(struct net_device *netdev,
+			      struct rtnl_link_stats64 *ns)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_lif_stats *ls;
+
+	memset(ns, 0, sizeof(*ns));
+	ls = &lif->info->stats;
+
+	ns->rx_packets = le64_to_cpu(ls->rx_ucast_packets) +
+			 le64_to_cpu(ls->rx_mcast_packets) +
+			 le64_to_cpu(ls->rx_bcast_packets);
+
+	ns->tx_packets = le64_to_cpu(ls->tx_ucast_packets) +
+			 le64_to_cpu(ls->tx_mcast_packets) +
+			 le64_to_cpu(ls->tx_bcast_packets);
+
+	ns->rx_bytes = le64_to_cpu(ls->rx_ucast_bytes) +
+		       le64_to_cpu(ls->rx_mcast_bytes) +
+		       le64_to_cpu(ls->rx_bcast_bytes);
+
+	ns->tx_bytes = le64_to_cpu(ls->tx_ucast_bytes) +
+		       le64_to_cpu(ls->tx_mcast_bytes) +
+		       le64_to_cpu(ls->tx_bcast_bytes);
+
+	ns->rx_dropped = le64_to_cpu(ls->rx_ucast_drop_packets) +
+			 le64_to_cpu(ls->rx_mcast_drop_packets) +
+			 le64_to_cpu(ls->rx_bcast_drop_packets);
+
+	ns->tx_dropped = le64_to_cpu(ls->tx_ucast_drop_packets) +
+			 le64_to_cpu(ls->tx_mcast_drop_packets) +
+			 le64_to_cpu(ls->tx_bcast_drop_packets);
+
+	ns->multicast = le64_to_cpu(ls->rx_mcast_packets);
+
+	ns->rx_over_errors = le64_to_cpu(ls->rx_queue_empty);
+
+	ns->rx_missed_errors = le64_to_cpu(ls->rx_dma_error) +
+			       le64_to_cpu(ls->rx_queue_disabled) +
+			       le64_to_cpu(ls->rx_desc_fetch_error) +
+			       le64_to_cpu(ls->rx_desc_data_error);
+
+	ns->tx_aborted_errors = le64_to_cpu(ls->tx_dma_error) +
+				le64_to_cpu(ls->tx_queue_disabled) +
+				le64_to_cpu(ls->tx_desc_fetch_error) +
+				le64_to_cpu(ls->tx_desc_data_error);
+
+	ns->rx_errors = ns->rx_over_errors +
+			ns->rx_missed_errors;
+
+	ns->tx_errors = ns->tx_aborted_errors;
+}
+
+static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_add = {
+			.opcode = IONIC_CMD_RX_FILTER_ADD,
+			.lif_index = cpu_to_le16(lif->index),
+			.match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
+		},
+	};
+	struct ionic_rx_filter *f;
+	int err;
+
+	/* don't bother if we already have it */
+	spin_lock_bh(&lif->rx_filters.lock);
+	f = ionic_rx_filter_by_addr(lif, addr);
+	spin_unlock_bh(&lif->rx_filters.lock);
+	if (f)
+		return 0;
+
+	netdev_dbg(lif->netdev, "rx_filter add ADDR %pM (id %d)\n", addr,
+		   ctx.comp.rx_filter_add.filter_id);
+
+	memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+}
+
+static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_del = {
+			.opcode = IONIC_CMD_RX_FILTER_DEL,
+			.lif_index = cpu_to_le16(lif->index),
+		},
+	};
+	struct ionic_rx_filter *f;
+	int err;
+
+	spin_lock_bh(&lif->rx_filters.lock);
+	f = ionic_rx_filter_by_addr(lif, addr);
+	if (!f) {
+		spin_unlock_bh(&lif->rx_filters.lock);
+		return -ENOENT;
+	}
+
+	ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
+	ionic_rx_filter_free(lif, f);
+	spin_unlock_bh(&lif->rx_filters.lock);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", addr,
+		   ctx.cmd.rx_filter_del.filter_id);
+
+	return 0;
+}
+
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
+{
+	struct ionic *ionic = lif->ionic;
+	struct ionic_deferred_work *work;
+	unsigned int nmfilters;
+	unsigned int nufilters;
+
+	if (add) {
+		/* Do we have space for this filter?  We test the counters
+		 * here before checking the need for deferral so that we
+		 * can return an overflow error to the stack.
+		 */
+		nmfilters = le32_to_cpu(ionic->ident.lif.eth.max_mcast_filters);
+		nufilters = le32_to_cpu(ionic->ident.lif.eth.max_ucast_filters);
+
+		if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters))
+			lif->nmcast++;
+		else if (!is_multicast_ether_addr(addr) &&
+			 lif->nucast < nufilters)
+			lif->nucast++;
+		else
+			return -ENOSPC;
+	} else {
+		if (is_multicast_ether_addr(addr) && lif->nmcast)
+			lif->nmcast--;
+		else if (!is_multicast_ether_addr(addr) && lif->nucast)
+			lif->nucast--;
+	}
+
+	if (in_interrupt()) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work) {
+			netdev_err(lif->netdev, "%s OOM\n", __func__);
+			return -ENOMEM;
+		}
+		work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
+				   IONIC_DW_TYPE_RX_ADDR_DEL;
+		memcpy(work->addr, addr, ETH_ALEN);
+		netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
+			   add ? "add" : "del", addr);
+		ionic_lif_deferred_enqueue(&lif->deferred, work);
+	} else {
+		netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+			   add ? "add" : "del", addr);
+		if (add)
+			return ionic_lif_addr_add(lif, addr);
+		else
+			return ionic_lif_addr_del(lif, addr);
+	}
+
+	return 0;
+}
+
+static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
+{
+	return ionic_lif_addr(netdev_priv(netdev), addr, true);
+}
+
+static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
+{
+	return ionic_lif_addr(netdev_priv(netdev), addr, false);
+}
+
+static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_mode_set = {
+			.opcode = IONIC_CMD_RX_MODE_SET,
+			.lif_index = cpu_to_le16(lif->index),
+			.rx_mode = cpu_to_le16(rx_mode),
+		},
+	};
+	char buf[128];
+	int err;
+	int i;
+#define REMAIN(__x) (sizeof(buf) - (__x))
+
+	i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+		     lif->rx_mode, rx_mode);
+	if (rx_mode & IONIC_RX_MODE_F_UNICAST)
+		i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+	if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
+		i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+	if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
+		i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+	if (rx_mode & IONIC_RX_MODE_F_PROMISC)
+		i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+	if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
+		i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+	netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
+			    rx_mode, err);
+	else
+		lif->rx_mode = rx_mode;
+}
+
+static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
+{
+	struct ionic_deferred_work *work;
+
+	if (in_interrupt()) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work) {
+			netdev_err(lif->netdev, "%s OOM\n", __func__);
+			return;
+		}
+		work->type = IONIC_DW_TYPE_RX_MODE;
+		work->rx_mode = rx_mode;
+		netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+		ionic_lif_deferred_enqueue(&lif->deferred, work);
+	} else {
+		ionic_lif_rx_mode(lif, rx_mode);
+	}
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_identity *ident;
+	unsigned int nfilters;
+	unsigned int rx_mode;
+
+	ident = &lif->ionic->ident;
+
+	rx_mode = IONIC_RX_MODE_F_UNICAST;
+	rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+	rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+	rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+	rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+
+	/* sync unicast addresses
+	 * next check to see if we're in an overflow state
+	 *    if so, we track that we overflowed and enable NIC PROMISC
+	 *    else if the overflow is set and not needed
+	 *       we remove our overflow flag and check the netdev flags
+	 *       to see if we can disable NIC PROMISC
+	 */
+	__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+	nfilters = le32_to_cpu(ident->lif.eth.max_ucast_filters);
+	if (netdev_uc_count(netdev) + 1 > nfilters) {
+		rx_mode |= IONIC_RX_MODE_F_PROMISC;
+		lif->uc_overflow = true;
+	} else if (lif->uc_overflow) {
+		lif->uc_overflow = false;
+		if (!(netdev->flags & IFF_PROMISC))
+			rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
+	}
+
+	/* same for multicast */
+	__dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
+	nfilters = le32_to_cpu(ident->lif.eth.max_mcast_filters);
+	if (netdev_mc_count(netdev) > nfilters) {
+		rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
+		lif->mc_overflow = true;
+	} else if (lif->mc_overflow) {
+		lif->mc_overflow = false;
+		if (!(netdev->flags & IFF_ALLMULTI))
+			rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
+	}
+
+	if (lif->rx_mode != rx_mode)
+		_ionic_lif_rx_mode(lif, rx_mode);
+}
+
+static __le64 ionic_netdev_features_to_nic(netdev_features_t features)
+{
+	u64 wanted = 0;
+
+	if (features & NETIF_F_HW_VLAN_CTAG_TX)
+		wanted |= IONIC_ETH_HW_VLAN_TX_TAG;
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		wanted |= IONIC_ETH_HW_VLAN_RX_STRIP;
+	if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
+		wanted |= IONIC_ETH_HW_VLAN_RX_FILTER;
+	if (features & NETIF_F_RXHASH)
+		wanted |= IONIC_ETH_HW_RX_HASH;
+	if (features & NETIF_F_RXCSUM)
+		wanted |= IONIC_ETH_HW_RX_CSUM;
+	if (features & NETIF_F_SG)
+		wanted |= IONIC_ETH_HW_TX_SG;
+	if (features & NETIF_F_HW_CSUM)
+		wanted |= IONIC_ETH_HW_TX_CSUM;
+	if (features & NETIF_F_TSO)
+		wanted |= IONIC_ETH_HW_TSO;
+	if (features & NETIF_F_TSO6)
+		wanted |= IONIC_ETH_HW_TSO_IPV6;
+	if (features & NETIF_F_TSO_ECN)
+		wanted |= IONIC_ETH_HW_TSO_ECN;
+	if (features & NETIF_F_GSO_GRE)
+		wanted |= IONIC_ETH_HW_TSO_GRE;
+	if (features & NETIF_F_GSO_GRE_CSUM)
+		wanted |= IONIC_ETH_HW_TSO_GRE_CSUM;
+	if (features & NETIF_F_GSO_IPXIP4)
+		wanted |= IONIC_ETH_HW_TSO_IPXIP4;
+	if (features & NETIF_F_GSO_IPXIP6)
+		wanted |= IONIC_ETH_HW_TSO_IPXIP6;
+	if (features & NETIF_F_GSO_UDP_TUNNEL)
+		wanted |= IONIC_ETH_HW_TSO_UDP;
+	if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM)
+		wanted |= IONIC_ETH_HW_TSO_UDP_CSUM;
+
+	return cpu_to_le64(wanted);
+}
+
+static int ionic_set_nic_features(struct ionic_lif *lif,
+				  netdev_features_t features)
+{
+	struct device *dev = lif->ionic->dev;
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.index = cpu_to_le16(lif->index),
+			.attr = IONIC_LIF_ATTR_FEATURES,
+		},
+	};
+	u64 vlan_flags = IONIC_ETH_HW_VLAN_TX_TAG |
+			 IONIC_ETH_HW_VLAN_RX_STRIP |
+			 IONIC_ETH_HW_VLAN_RX_FILTER;
+	int err;
+
+	ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features);
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	lif->hw_features = le64_to_cpu(ctx.cmd.lif_setattr.features &
+				       ctx.comp.lif_setattr.features);
+
+	if ((vlan_flags & features) &&
+	    !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features)))
+		dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n");
+
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG)
+		dev_dbg(dev, "feature ETH_HW_VLAN_TX_TAG\n");
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP)
+		dev_dbg(dev, "feature ETH_HW_VLAN_RX_STRIP\n");
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER)
+		dev_dbg(dev, "feature ETH_HW_VLAN_RX_FILTER\n");
+	if (lif->hw_features & IONIC_ETH_HW_RX_HASH)
+		dev_dbg(dev, "feature ETH_HW_RX_HASH\n");
+	if (lif->hw_features & IONIC_ETH_HW_TX_SG)
+		dev_dbg(dev, "feature ETH_HW_TX_SG\n");
+	if (lif->hw_features & IONIC_ETH_HW_TX_CSUM)
+		dev_dbg(dev, "feature ETH_HW_TX_CSUM\n");
+	if (lif->hw_features & IONIC_ETH_HW_RX_CSUM)
+		dev_dbg(dev, "feature ETH_HW_RX_CSUM\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO)
+		dev_dbg(dev, "feature ETH_HW_TSO\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6)
+		dev_dbg(dev, "feature ETH_HW_TSO_IPV6\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_ECN)
+		dev_dbg(dev, "feature ETH_HW_TSO_ECN\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_GRE)
+		dev_dbg(dev, "feature ETH_HW_TSO_GRE\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM)
+		dev_dbg(dev, "feature ETH_HW_TSO_GRE_CSUM\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4)
+		dev_dbg(dev, "feature ETH_HW_TSO_IPXIP4\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6)
+		dev_dbg(dev, "feature ETH_HW_TSO_IPXIP6\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_UDP)
+		dev_dbg(dev, "feature ETH_HW_TSO_UDP\n");
+	if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM)
+		dev_dbg(dev, "feature ETH_HW_TSO_UDP_CSUM\n");
+
+	return 0;
+}
+
+static int ionic_init_nic_features(struct ionic_lif *lif)
+{
+	struct net_device *netdev = lif->netdev;
+	netdev_features_t features;
+	int err;
+
+	/* set up what we expect to support by default */
+	features = NETIF_F_HW_VLAN_CTAG_TX |
+		   NETIF_F_HW_VLAN_CTAG_RX |
+		   NETIF_F_HW_VLAN_CTAG_FILTER |
+		   NETIF_F_RXHASH |
+		   NETIF_F_SG |
+		   NETIF_F_HW_CSUM |
+		   NETIF_F_RXCSUM |
+		   NETIF_F_TSO |
+		   NETIF_F_TSO6 |
+		   NETIF_F_TSO_ECN;
+
+	err = ionic_set_nic_features(lif, features);
+	if (err)
+		return err;
+
+	/* tell the netdev what we actually can support */
+	netdev->features |= NETIF_F_HIGHDMA;
+
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+	if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	if (lif->hw_features & IONIC_ETH_HW_RX_HASH)
+		netdev->hw_features |= NETIF_F_RXHASH;
+	if (lif->hw_features & IONIC_ETH_HW_TX_SG)
+		netdev->hw_features |= NETIF_F_SG;
+
+	if (lif->hw_features & IONIC_ETH_HW_TX_CSUM)
+		netdev->hw_enc_features |= NETIF_F_HW_CSUM;
+	if (lif->hw_features & IONIC_ETH_HW_RX_CSUM)
+		netdev->hw_enc_features |= NETIF_F_RXCSUM;
+	if (lif->hw_features & IONIC_ETH_HW_TSO)
+		netdev->hw_enc_features |= NETIF_F_TSO;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6)
+		netdev->hw_enc_features |= NETIF_F_TSO6;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_ECN)
+		netdev->hw_enc_features |= NETIF_F_TSO_ECN;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_GRE)
+		netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM)
+		netdev->hw_enc_features |= NETIF_F_GSO_GRE_CSUM;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4)
+		netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6)
+		netdev->hw_enc_features |= NETIF_F_GSO_IPXIP6;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_UDP)
+		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+	if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM)
+		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->hw_features |= netdev->hw_enc_features;
+	netdev->features |= netdev->hw_features;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	return 0;
+}
+
+static int ionic_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int err;
+
+	netdev_dbg(netdev, "%s: lif->features=0x%08llx new_features=0x%08llx\n",
+		   __func__, (u64)lif->netdev->features, (u64)features);
+
+	err = ionic_set_nic_features(lif, features);
+
+	return err;
+}
+
+static int ionic_set_mac_address(struct net_device *netdev, void *sa)
+{
+	struct sockaddr *addr = sa;
+	u8 *mac;
+	int err;
+
+	mac = (u8 *)addr->sa_data;
+	if (ether_addr_equal(netdev->dev_addr, mac))
+		return 0;
+
+	err = eth_prepare_mac_addr_change(netdev, addr);
+	if (err)
+		return err;
+
+	if (!is_zero_ether_addr(netdev->dev_addr)) {
+		netdev_info(netdev, "deleting mac addr %pM\n",
+			    netdev->dev_addr);
+		ionic_addr_del(netdev, netdev->dev_addr);
+	}
+
+	eth_commit_mac_addr_change(netdev, addr);
+	netdev_info(netdev, "updating mac addr %pM\n", mac);
+
+	return ionic_addr_add(netdev, mac);
+}
+
+static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.index = cpu_to_le16(lif->index),
+			.attr = IONIC_LIF_ATTR_MTU,
+			.mtu = cpu_to_le32(new_mtu),
+		},
+	};
+	int err;
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	netdev->mtu = new_mtu;
+	err = ionic_reset_queues(lif);
+
+	return err;
+}
+
+static void ionic_tx_timeout_work(struct work_struct *ws)
+{
+	struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+
+	netdev_info(lif->netdev, "Tx Timeout recovery\n");
+
+	rtnl_lock();
+	ionic_reset_queues(lif);
+	rtnl_unlock();
+}
+
+static void ionic_tx_timeout(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	schedule_work(&lif->tx_timeout_work);
+}
+
+static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto,
+				 u16 vid)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_add = {
+			.opcode = IONIC_CMD_RX_FILTER_ADD,
+			.lif_index = cpu_to_le16(lif->index),
+			.match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN),
+			.vlan.vlan = cpu_to_le16(vid),
+		},
+	};
+	int err;
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	netdev_dbg(netdev, "rx_filter add VLAN %d (id %d)\n", vid,
+		   ctx.comp.rx_filter_add.filter_id);
+
+	return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+}
+
+static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
+				  u16 vid)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_del = {
+			.opcode = IONIC_CMD_RX_FILTER_DEL,
+			.lif_index = cpu_to_le16(lif->index),
+		},
+	};
+	struct ionic_rx_filter *f;
+
+	spin_lock_bh(&lif->rx_filters.lock);
+
+	f = ionic_rx_filter_by_vlan(lif, vid);
+	if (!f) {
+		spin_unlock_bh(&lif->rx_filters.lock);
+		return -ENOENT;
+	}
+
+	netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", vid,
+		   le32_to_cpu(ctx.cmd.rx_filter_del.filter_id));
+
+	ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
+	ionic_rx_filter_free(lif, f);
+	spin_unlock_bh(&lif->rx_filters.lock);
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+int ionic_lif_rss_config(struct ionic_lif *lif, const u16 types,
+			 const u8 *key, const u32 *indir)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.attr = IONIC_LIF_ATTR_RSS,
+			.rss.types = cpu_to_le16(types),
+			.rss.addr = cpu_to_le64(lif->rss_ind_tbl_pa),
+		},
+	};
+	unsigned int i, tbl_sz;
+
+	lif->rss_types = types;
+
+	if (key)
+		memcpy(lif->rss_hash_key, key, IONIC_RSS_HASH_KEY_SIZE);
+
+	if (indir) {
+		tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+		for (i = 0; i < tbl_sz; i++)
+			lif->rss_ind_tbl[i] = indir[i];
+	}
+
+	memcpy(ctx.cmd.lif_setattr.rss.key, lif->rss_hash_key,
+	       IONIC_RSS_HASH_KEY_SIZE);
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_lif_rss_init(struct ionic_lif *lif)
+{
+	u8 rss_key[IONIC_RSS_HASH_KEY_SIZE];
+	unsigned int tbl_sz;
+	unsigned int i;
+
+	netdev_rss_key_fill(rss_key, IONIC_RSS_HASH_KEY_SIZE);
+
+	lif->rss_types = IONIC_RSS_TYPE_IPV4     |
+			 IONIC_RSS_TYPE_IPV4_TCP |
+			 IONIC_RSS_TYPE_IPV4_UDP |
+			 IONIC_RSS_TYPE_IPV6     |
+			 IONIC_RSS_TYPE_IPV6_TCP |
+			 IONIC_RSS_TYPE_IPV6_UDP;
+
+	/* Fill indirection table with 'default' values */
+	tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+	for (i = 0; i < tbl_sz; i++)
+		lif->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, lif->nxqs);
+
+	return ionic_lif_rss_config(lif, lif->rss_types, rss_key, NULL);
+}
+
+static int ionic_lif_rss_deinit(struct ionic_lif *lif)
+{
+	return ionic_lif_rss_config(lif, 0x0, NULL, NULL);
+}
+
+static void ionic_txrx_disable(struct ionic_lif *lif)
+{
+	unsigned int i;
+
+	for (i = 0; i < lif->nxqs; i++) {
+		ionic_qcq_disable(lif->txqcqs[i].qcq);
+		ionic_qcq_disable(lif->rxqcqs[i].qcq);
+	}
+}
+
+static void ionic_txrx_deinit(struct ionic_lif *lif)
+{
+	unsigned int i;
+
+	for (i = 0; i < lif->nxqs; i++) {
+		ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+		ionic_tx_flush(&lif->txqcqs[i].qcq->cq);
+
+		ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+		ionic_rx_flush(&lif->rxqcqs[i].qcq->cq);
+		ionic_rx_empty(&lif->rxqcqs[i].qcq->q);
+	}
+}
+
+static void ionic_txrx_free(struct ionic_lif *lif)
+{
+	unsigned int i;
+
+	for (i = 0; i < lif->nxqs; i++) {
+		ionic_qcq_free(lif, lif->txqcqs[i].qcq);
+		lif->txqcqs[i].qcq = NULL;
+
+		ionic_qcq_free(lif, lif->rxqcqs[i].qcq);
+		lif->rxqcqs[i].qcq = NULL;
+	}
+}
+
+static int ionic_txrx_alloc(struct ionic_lif *lif)
+{
+	unsigned int flags;
+	unsigned int i;
+	int err = 0;
+	u32 coal;
+
+	flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
+	for (i = 0; i < lif->nxqs; i++) {
+		err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
+				      lif->ntxq_descs,
+				      sizeof(struct ionic_txq_desc),
+				      sizeof(struct ionic_txq_comp),
+				      sizeof(struct ionic_txq_sg_desc),
+				      lif->kern_pid, &lif->txqcqs[i].qcq);
+		if (err)
+			goto err_out;
+
+		lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
+	}
+
+	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR;
+	coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs);
+	for (i = 0; i < lif->nxqs; i++) {
+		err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
+				      lif->nrxq_descs,
+				      sizeof(struct ionic_rxq_desc),
+				      sizeof(struct ionic_rxq_comp),
+				      0, lif->kern_pid, &lif->rxqcqs[i].qcq);
+		if (err)
+			goto err_out;
+
+		lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
+
+		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+				     lif->rxqcqs[i].qcq->intr.index, coal);
+		ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
+					  lif->txqcqs[i].qcq);
+	}
+
+	return 0;
+
+err_out:
+	ionic_txrx_free(lif);
+
+	return err;
+}
+
+static int ionic_txrx_init(struct ionic_lif *lif)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < lif->nxqs; i++) {
+		err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq);
+		if (err)
+			goto err_out;
+
+		err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq);
+		if (err) {
+			ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+			goto err_out;
+		}
+	}
+
+	if (lif->netdev->features & NETIF_F_RXHASH)
+		ionic_lif_rss_init(lif);
+
+	ionic_set_rx_mode(lif->netdev);
+
+	return 0;
+
+err_out:
+	while (i--) {
+		ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+		ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+	}
+
+	return err;
+}
+
+static int ionic_txrx_enable(struct ionic_lif *lif)
+{
+	int i, err;
+
+	for (i = 0; i < lif->nxqs; i++) {
+		err = ionic_qcq_enable(lif->txqcqs[i].qcq);
+		if (err)
+			goto err_out;
+
+		ionic_rx_fill(&lif->rxqcqs[i].qcq->q);
+		err = ionic_qcq_enable(lif->rxqcqs[i].qcq);
+		if (err) {
+			ionic_qcq_disable(lif->txqcqs[i].qcq);
+			goto err_out;
+		}
+	}
+
+	return 0;
+
+err_out:
+	while (i--) {
+		ionic_qcq_disable(lif->rxqcqs[i].qcq);
+		ionic_qcq_disable(lif->txqcqs[i].qcq);
+	}
+
+	return err;
+}
+
+int ionic_open(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int err;
+
+	netif_carrier_off(netdev);
+
+	err = ionic_txrx_alloc(lif);
+	if (err)
+		return err;
+
+	err = ionic_txrx_init(lif);
+	if (err)
+		goto err_txrx_free;
+
+	err = ionic_txrx_enable(lif);
+	if (err)
+		goto err_txrx_deinit;
+
+	netif_set_real_num_tx_queues(netdev, lif->nxqs);
+	netif_set_real_num_rx_queues(netdev, lif->nxqs);
+
+	set_bit(IONIC_LIF_UP, lif->state);
+
+	ionic_link_status_check_request(lif);
+	if (netif_carrier_ok(netdev))
+		netif_tx_wake_all_queues(netdev);
+
+	return 0;
+
+err_txrx_deinit:
+	ionic_txrx_deinit(lif);
+err_txrx_free:
+	ionic_txrx_free(lif);
+	return err;
+}
+
+int ionic_stop(struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int err = 0;
+
+	if (!test_bit(IONIC_LIF_UP, lif->state)) {
+		dev_dbg(lif->ionic->dev, "%s: %s state=DOWN\n",
+			__func__, lif->name);
+		return 0;
+	}
+	dev_dbg(lif->ionic->dev, "%s: %s state=UP\n", __func__, lif->name);
+	clear_bit(IONIC_LIF_UP, lif->state);
+
+	/* carrier off before disabling queues to avoid watchdog timeout */
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+	netif_tx_disable(netdev);
+
+	ionic_txrx_disable(lif);
+	ionic_txrx_deinit(lif);
+	ionic_txrx_free(lif);
+
+	return err;
+}
+
+static const struct net_device_ops ionic_netdev_ops = {
+	.ndo_open               = ionic_open,
+	.ndo_stop               = ionic_stop,
+	.ndo_start_xmit		= ionic_start_xmit,
+	.ndo_get_stats64	= ionic_get_stats64,
+	.ndo_set_rx_mode	= ionic_set_rx_mode,
+	.ndo_set_features	= ionic_set_features,
+	.ndo_set_mac_address	= ionic_set_mac_address,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_tx_timeout         = ionic_tx_timeout,
+	.ndo_change_mtu         = ionic_change_mtu,
+	.ndo_vlan_rx_add_vid    = ionic_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid   = ionic_vlan_rx_kill_vid,
+};
+
+int ionic_reset_queues(struct ionic_lif *lif)
+{
+	bool running;
+	int err = 0;
+
+	/* Put off the next watchdog timeout */
+	netif_trans_update(lif->netdev);
+
+	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+		return -EBUSY;
+
+	running = netif_running(lif->netdev);
+	if (running)
+		err = ionic_stop(lif->netdev);
+	if (!err && running)
+		ionic_open(lif->netdev);
+
+	clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+	return err;
+}
+
+static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index)
+{
+	struct device *dev = ionic->dev;
+	struct net_device *netdev;
+	struct ionic_lif *lif;
+	int tbl_sz;
+	u32 coal;
+	int err;
+
+	netdev = alloc_etherdev_mqs(sizeof(*lif),
+				    ionic->ntxqs_per_lif, ionic->ntxqs_per_lif);
+	if (!netdev) {
+		dev_err(dev, "Cannot allocate netdev, aborting\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	SET_NETDEV_DEV(netdev, dev);
+
+	lif = netdev_priv(netdev);
+	lif->netdev = netdev;
+	ionic->master_lif = lif;
+	netdev->netdev_ops = &ionic_netdev_ops;
+	ionic_ethtool_set_ops(netdev);
+
+	netdev->watchdog_timeo = 2 * HZ;
+	netdev->min_mtu = IONIC_MIN_MTU;
+	netdev->max_mtu = IONIC_MAX_MTU;
+
+	lif->neqs = ionic->neqs_per_lif;
+	lif->nxqs = ionic->ntxqs_per_lif;
+
+	lif->ionic = ionic;
+	lif->index = index;
+	lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
+	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+
+	/* Convert the default coalesce value to actual hw resolution */
+	coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT);
+	lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+
+	snprintf(lif->name, sizeof(lif->name), "lif%u", index);
+
+	spin_lock_init(&lif->adminq_lock);
+
+	spin_lock_init(&lif->deferred.lock);
+	INIT_LIST_HEAD(&lif->deferred.list);
+	INIT_WORK(&lif->deferred.work, ionic_lif_deferred_work);
+
+	/* allocate lif info */
+	lif->info_sz = ALIGN(sizeof(*lif->info), PAGE_SIZE);
+	lif->info = dma_alloc_coherent(dev, lif->info_sz,
+				       &lif->info_pa, GFP_KERNEL);
+	if (!lif->info) {
+		dev_err(dev, "Failed to allocate lif info, aborting\n");
+		err = -ENOMEM;
+		goto err_out_free_netdev;
+	}
+
+	/* allocate queues */
+	err = ionic_qcqs_alloc(lif);
+	if (err)
+		goto err_out_free_lif_info;
+
+	/* allocate rss indirection table */
+	tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+	lif->rss_ind_tbl_sz = sizeof(*lif->rss_ind_tbl) * tbl_sz;
+	lif->rss_ind_tbl = dma_alloc_coherent(dev, lif->rss_ind_tbl_sz,
+					      &lif->rss_ind_tbl_pa,
+					      GFP_KERNEL);
+
+	if (!lif->rss_ind_tbl) {
+		err = -ENOMEM;
+		dev_err(dev, "Failed to allocate rss indirection table, aborting\n");
+		goto err_out_free_qcqs;
+	}
+
+	list_add_tail(&lif->list, &ionic->lifs);
+
+	return lif;
+
+err_out_free_qcqs:
+	ionic_qcqs_free(lif);
+err_out_free_lif_info:
+	dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
+	lif->info = NULL;
+	lif->info_pa = 0;
+err_out_free_netdev:
+	free_netdev(lif->netdev);
+	lif = NULL;
+
+	return ERR_PTR(err);
+}
+
+int ionic_lifs_alloc(struct ionic *ionic)
+{
+	struct ionic_lif *lif;
+
+	INIT_LIST_HEAD(&ionic->lifs);
+
+	/* only build the first lif, others are for later features */
+	set_bit(0, ionic->lifbits);
+	lif = ionic_lif_alloc(ionic, 0);
+
+	return PTR_ERR_OR_ZERO(lif);
+}
+
+static void ionic_lif_reset(struct ionic_lif *lif)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+
+	mutex_lock(&lif->ionic->dev_cmd_lock);
+	ionic_dev_cmd_lif_reset(idev, lif->index);
+	ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&lif->ionic->dev_cmd_lock);
+}
+
+static void ionic_lif_free(struct ionic_lif *lif)
+{
+	struct device *dev = lif->ionic->dev;
+
+	/* free rss indirection table */
+	dma_free_coherent(dev, lif->rss_ind_tbl_sz, lif->rss_ind_tbl,
+			  lif->rss_ind_tbl_pa);
+	lif->rss_ind_tbl = NULL;
+	lif->rss_ind_tbl_pa = 0;
+
+	/* free queues */
+	ionic_qcqs_free(lif);
+	ionic_lif_reset(lif);
+
+	/* free lif info */
+	dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
+	lif->info = NULL;
+	lif->info_pa = 0;
+
+	/* unmap doorbell page */
+	ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
+	lif->kern_dbpage = NULL;
+	kfree(lif->dbid_inuse);
+	lif->dbid_inuse = NULL;
+
+	/* free netdev & lif */
+	ionic_debugfs_del_lif(lif);
+	list_del(&lif->list);
+	free_netdev(lif->netdev);
+}
+
+void ionic_lifs_free(struct ionic *ionic)
+{
+	struct list_head *cur, *tmp;
+	struct ionic_lif *lif;
+
+	list_for_each_safe(cur, tmp, &ionic->lifs) {
+		lif = list_entry(cur, struct ionic_lif, list);
+
+		ionic_lif_free(lif);
+	}
+}
+
+static void ionic_lif_deinit(struct ionic_lif *lif)
+{
+	if (!test_bit(IONIC_LIF_INITED, lif->state))
+		return;
+
+	clear_bit(IONIC_LIF_INITED, lif->state);
+
+	ionic_rx_filters_deinit(lif);
+	ionic_lif_rss_deinit(lif);
+
+	napi_disable(&lif->adminqcq->napi);
+	ionic_lif_qcq_deinit(lif, lif->notifyqcq);
+	ionic_lif_qcq_deinit(lif, lif->adminqcq);
+
+	ionic_lif_reset(lif);
+}
+
+void ionic_lifs_deinit(struct ionic *ionic)
+{
+	struct list_head *cur, *tmp;
+	struct ionic_lif *lif;
+
+	list_for_each_safe(cur, tmp, &ionic->lifs) {
+		lif = list_entry(cur, struct ionic_lif, list);
+		ionic_lif_deinit(lif);
+	}
+}
+
+static int ionic_lif_adminq_init(struct ionic_lif *lif)
+{
+	struct device *dev = lif->ionic->dev;
+	struct ionic_q_init_comp comp;
+	struct ionic_dev *idev;
+	struct ionic_qcq *qcq;
+	struct ionic_queue *q;
+	int err;
+
+	idev = &lif->ionic->idev;
+	qcq = lif->adminqcq;
+	q = &qcq->q;
+
+	mutex_lock(&lif->ionic->dev_cmd_lock);
+	ionic_dev_cmd_adminq_init(idev, qcq, lif->index, qcq->intr.index);
+	err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+	ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp);
+	mutex_unlock(&lif->ionic->dev_cmd_lock);
+	if (err) {
+		netdev_err(lif->netdev, "adminq init failed %d\n", err);
+		return err;
+	}
+
+	q->hw_type = comp.hw_type;
+	q->hw_index = le32_to_cpu(comp.hw_index);
+	q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+	dev_dbg(dev, "adminq->hw_type %d\n", q->hw_type);
+	dev_dbg(dev, "adminq->hw_index %d\n", q->hw_index);
+
+	netif_napi_add(lif->netdev, &qcq->napi, ionic_adminq_napi,
+		       NAPI_POLL_WEIGHT);
+
+	err = ionic_request_irq(lif, qcq);
+	if (err) {
+		netdev_warn(lif->netdev, "adminq irq request failed %d\n", err);
+		netif_napi_del(&qcq->napi);
+		return err;
+	}
+
+	napi_enable(&qcq->napi);
+
+	if (qcq->flags & IONIC_QCQ_F_INTR)
+		ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+				IONIC_INTR_MASK_CLEAR);
+
+	qcq->flags |= IONIC_QCQ_F_INITED;
+
+	ionic_debugfs_add_qcq(lif, qcq);
+
+	return 0;
+}
+
+static int ionic_lif_notifyq_init(struct ionic_lif *lif)
+{
+	struct ionic_qcq *qcq = lif->notifyqcq;
+	struct device *dev = lif->ionic->dev;
+	struct ionic_queue *q = &qcq->q;
+	int err;
+
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.q_init = {
+			.opcode = IONIC_CMD_Q_INIT,
+			.lif_index = cpu_to_le16(lif->index),
+			.type = q->type,
+			.index = cpu_to_le32(q->index),
+			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+					     IONIC_QINIT_F_ENA),
+			.intr_index = cpu_to_le16(lif->adminqcq->intr.index),
+			.pid = cpu_to_le16(q->pid),
+			.ring_size = ilog2(q->num_descs),
+			.ring_base = cpu_to_le64(q->base_pa),
+		}
+	};
+
+	dev_dbg(dev, "notifyq_init.pid %d\n", ctx.cmd.q_init.pid);
+	dev_dbg(dev, "notifyq_init.index %d\n", ctx.cmd.q_init.index);
+	dev_dbg(dev, "notifyq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+	dev_dbg(dev, "notifyq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	q->hw_type = ctx.comp.q_init.hw_type;
+	q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+	q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+	dev_dbg(dev, "notifyq->hw_type %d\n", q->hw_type);
+	dev_dbg(dev, "notifyq->hw_index %d\n", q->hw_index);
+
+	/* preset the callback info */
+	q->info[0].cb_arg = lif;
+
+	qcq->flags |= IONIC_QCQ_F_INITED;
+
+	ionic_debugfs_add_qcq(lif, qcq);
+
+	return 0;
+}
+
+static int ionic_station_set(struct ionic_lif *lif)
+{
+	struct net_device *netdev = lif->netdev;
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_getattr = {
+			.opcode = IONIC_CMD_LIF_GETATTR,
+			.index = cpu_to_le16(lif->index),
+			.attr = IONIC_LIF_ATTR_MAC,
+		},
+	};
+	struct sockaddr addr;
+	int err;
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err)
+		return err;
+
+	memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
+	addr.sa_family = AF_INET;
+	err = eth_prepare_mac_addr_change(netdev, &addr);
+	if (err)
+		return err;
+
+	if (!is_zero_ether_addr(netdev->dev_addr)) {
+		netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
+			   netdev->dev_addr);
+		ionic_lif_addr(lif, netdev->dev_addr, false);
+	}
+
+	eth_commit_mac_addr_change(netdev, &addr);
+	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
+		   netdev->dev_addr);
+	ionic_lif_addr(lif, netdev->dev_addr, true);
+
+	return 0;
+}
+
+static int ionic_lif_init(struct ionic_lif *lif)
+{
+	struct ionic_dev *idev = &lif->ionic->idev;
+	struct device *dev = lif->ionic->dev;
+	struct ionic_lif_init_comp comp;
+	int dbpage_num;
+	int err;
+
+	ionic_debugfs_add_lif(lif);
+
+	mutex_lock(&lif->ionic->dev_cmd_lock);
+	ionic_dev_cmd_lif_init(idev, lif->index, lif->info_pa);
+	err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+	ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp);
+	mutex_unlock(&lif->ionic->dev_cmd_lock);
+	if (err)
+		return err;
+
+	lif->hw_index = le16_to_cpu(comp.hw_index);
+
+	/* now that we have the hw_index we can figure out our doorbell page */
+	lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+	if (!lif->dbid_count) {
+		dev_err(dev, "No doorbell pages, aborting\n");
+		return -EINVAL;
+	}
+
+	lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
+	if (!lif->dbid_inuse) {
+		dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
+		return -ENOMEM;
+	}
+
+	/* first doorbell id reserved for kernel (dbid aka pid == zero) */
+	set_bit(0, lif->dbid_inuse);
+	lif->kern_pid = 0;
+
+	dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
+	lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
+	if (!lif->kern_dbpage) {
+		dev_err(dev, "Cannot map dbpage, aborting\n");
+		err = -ENOMEM;
+		goto err_out_free_dbid;
+	}
+
+	err = ionic_lif_adminq_init(lif);
+	if (err)
+		goto err_out_adminq_deinit;
+
+	if (lif->ionic->nnqs_per_lif) {
+		err = ionic_lif_notifyq_init(lif);
+		if (err)
+			goto err_out_notifyq_deinit;
+	}
+
+	err = ionic_init_nic_features(lif);
+	if (err)
+		goto err_out_notifyq_deinit;
+
+	err = ionic_rx_filters_init(lif);
+	if (err)
+		goto err_out_notifyq_deinit;
+
+	err = ionic_station_set(lif);
+	if (err)
+		goto err_out_notifyq_deinit;
+
+	lif->rx_copybreak = IONIC_RX_COPYBREAK_DEFAULT;
+
+	set_bit(IONIC_LIF_INITED, lif->state);
+
+	INIT_WORK(&lif->tx_timeout_work, ionic_tx_timeout_work);
+
+	return 0;
+
+err_out_notifyq_deinit:
+	ionic_lif_qcq_deinit(lif, lif->notifyqcq);
+err_out_adminq_deinit:
+	ionic_lif_qcq_deinit(lif, lif->adminqcq);
+	ionic_lif_reset(lif);
+	ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
+	lif->kern_dbpage = NULL;
+err_out_free_dbid:
+	kfree(lif->dbid_inuse);
+	lif->dbid_inuse = NULL;
+
+	return err;
+}
+
+int ionic_lifs_init(struct ionic *ionic)
+{
+	struct list_head *cur, *tmp;
+	struct ionic_lif *lif;
+	int err;
+
+	list_for_each_safe(cur, tmp, &ionic->lifs) {
+		lif = list_entry(cur, struct ionic_lif, list);
+		err = ionic_lif_init(lif);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void ionic_lif_notify_work(struct work_struct *ws)
+{
+}
+
+static void ionic_lif_set_netdev_info(struct ionic_lif *lif)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.index = cpu_to_le16(lif->index),
+			.attr = IONIC_LIF_ATTR_NAME,
+		},
+	};
+
+	strlcpy(ctx.cmd.lif_setattr.name, lif->netdev->name,
+		sizeof(ctx.cmd.lif_setattr.name));
+
+	ionic_adminq_post_wait(lif, &ctx);
+}
+
+static struct ionic_lif *ionic_netdev_lif(struct net_device *netdev)
+{
+	if (!netdev || netdev->netdev_ops->ndo_start_xmit != ionic_start_xmit)
+		return NULL;
+
+	return netdev_priv(netdev);
+}
+
+static int ionic_lif_notify(struct notifier_block *nb,
+			    unsigned long event, void *info)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(info);
+	struct ionic *ionic = container_of(nb, struct ionic, nb);
+	struct ionic_lif *lif = ionic_netdev_lif(ndev);
+
+	if (!lif || lif->ionic != ionic)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_CHANGENAME:
+		ionic_lif_set_netdev_info(lif);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+int ionic_lifs_register(struct ionic *ionic)
+{
+	int err;
+
+	INIT_WORK(&ionic->nb_work, ionic_lif_notify_work);
+
+	ionic->nb.notifier_call = ionic_lif_notify;
+
+	err = register_netdevice_notifier(&ionic->nb);
+	if (err)
+		ionic->nb.notifier_call = NULL;
+
+	/* only register LIF0 for now */
+	err = register_netdev(ionic->master_lif->netdev);
+	if (err) {
+		dev_err(ionic->dev, "Cannot register net device, aborting\n");
+		return err;
+	}
+
+	ionic_link_status_check_request(ionic->master_lif);
+	ionic->master_lif->registered = true;
+
+	return 0;
+}
+
+void ionic_lifs_unregister(struct ionic *ionic)
+{
+	if (ionic->nb.notifier_call) {
+		unregister_netdevice_notifier(&ionic->nb);
+		cancel_work_sync(&ionic->nb_work);
+		ionic->nb.notifier_call = NULL;
+	}
+
+	/* There is only one lif ever registered in the
+	 * current model, so don't bother searching the
+	 * ionic->lif for candidates to unregister
+	 */
+	cancel_work_sync(&ionic->master_lif->deferred.work);
+	cancel_work_sync(&ionic->master_lif->tx_timeout_work);
+	if (ionic->master_lif->netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(ionic->master_lif->netdev);
+}
+
+int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
+		       union ionic_lif_identity *lid)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	size_t sz;
+	int err;
+
+	sz = min(sizeof(*lid), sizeof(idev->dev_cmd_regs->data));
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_lif_identify(idev, lif_type, IONIC_IDENTITY_VERSION_1);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	memcpy_fromio(lid, &idev->dev_cmd_regs->data, sz);
+	mutex_unlock(&ionic->dev_cmd_lock);
+	if (err)
+		return (err);
+
+	dev_dbg(ionic->dev, "capabilities 0x%llx\n",
+		le64_to_cpu(lid->capabilities));
+
+	dev_dbg(ionic->dev, "eth.max_ucast_filters %d\n",
+		le32_to_cpu(lid->eth.max_ucast_filters));
+	dev_dbg(ionic->dev, "eth.max_mcast_filters %d\n",
+		le32_to_cpu(lid->eth.max_mcast_filters));
+	dev_dbg(ionic->dev, "eth.features 0x%llx\n",
+		le64_to_cpu(lid->eth.config.features));
+	dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_ADMINQ] %d\n",
+		le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_ADMINQ]));
+	dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_NOTIFYQ] %d\n",
+		le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_NOTIFYQ]));
+	dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_RXQ] %d\n",
+		le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_RXQ]));
+	dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_TXQ] %d\n",
+		le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_TXQ]));
+	dev_dbg(ionic->dev, "eth.config.name %s\n", lid->eth.config.name);
+	dev_dbg(ionic->dev, "eth.config.mac %pM\n", lid->eth.config.mac);
+	dev_dbg(ionic->dev, "eth.config.mtu %d\n",
+		le32_to_cpu(lid->eth.config.mtu));
+
+	return 0;
+}
+
+int ionic_lifs_size(struct ionic *ionic)
+{
+	struct ionic_identity *ident = &ionic->ident;
+	unsigned int nintrs, dev_nintrs;
+	union ionic_lif_config *lc;
+	unsigned int ntxqs_per_lif;
+	unsigned int nrxqs_per_lif;
+	unsigned int neqs_per_lif;
+	unsigned int nnqs_per_lif;
+	unsigned int nxqs, neqs;
+	unsigned int min_intrs;
+	int err;
+
+	lc = &ident->lif.eth.config;
+	dev_nintrs = le32_to_cpu(ident->dev.nintrs);
+	neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count);
+	nnqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_NOTIFYQ]);
+	ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]);
+	nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]);
+
+	nxqs = min(ntxqs_per_lif, nrxqs_per_lif);
+	nxqs = min(nxqs, num_online_cpus());
+	neqs = min(neqs_per_lif, num_online_cpus());
+
+try_again:
+	/* interrupt usage:
+	 *    1 for master lif adminq/notifyq
+	 *    1 for each CPU for master lif TxRx queue pairs
+	 *    whatever's left is for RDMA queues
+	 */
+	nintrs = 1 + nxqs + neqs;
+	min_intrs = 2;  /* adminq + 1 TxRx queue pair */
+
+	if (nintrs > dev_nintrs)
+		goto try_fewer;
+
+	err = ionic_bus_alloc_irq_vectors(ionic, nintrs);
+	if (err < 0 && err != -ENOSPC) {
+		dev_err(ionic->dev, "Can't get intrs from OS: %d\n", err);
+		return err;
+	}
+	if (err == -ENOSPC)
+		goto try_fewer;
+
+	if (err != nintrs) {
+		ionic_bus_free_irq_vectors(ionic);
+		goto try_fewer;
+	}
+
+	ionic->nnqs_per_lif = nnqs_per_lif;
+	ionic->neqs_per_lif = neqs;
+	ionic->ntxqs_per_lif = nxqs;
+	ionic->nrxqs_per_lif = nxqs;
+	ionic->nintrs = nintrs;
+
+	ionic_debugfs_add_sizes(ionic);
+
+	return 0;
+
+try_fewer:
+	if (nnqs_per_lif > 1) {
+		nnqs_per_lif >>= 1;
+		goto try_again;
+	}
+	if (neqs > 1) {
+		neqs >>= 1;
+		goto try_again;
+	}
+	if (nxqs > 1) {
+		nxqs >>= 1;
+		goto try_again;
+	}
+	dev_err(ionic->dev, "Can't get minimum %d intrs from OS\n", min_intrs);
+	return -ENOSPC;
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
new file mode 100644
index 0000000..6a95b42
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h

@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_LIF_H_
+#define _IONIC_LIF_H_
+
+#include <linux/pci.h>
+#include "ionic_rx_filter.h"
+
+#define IONIC_ADMINQ_LENGTH	16	/* must be a power of two */
+#define IONIC_NOTIFYQ_LENGTH	64	/* must be a power of two */
+
+#define IONIC_MAX_NUM_NAPI_CNTR		(NAPI_POLL_WEIGHT + 1)
+#define IONIC_MAX_NUM_SG_CNTR		(IONIC_TX_MAX_SG_ELEMS + 1)
+#define IONIC_RX_COPYBREAK_DEFAULT	256
+
+struct ionic_tx_stats {
+	u64 dma_map_err;
+	u64 pkts;
+	u64 bytes;
+	u64 clean;
+	u64 linearize;
+	u64 no_csum;
+	u64 csum;
+	u64 crc32_csum;
+	u64 tso;
+	u64 frags;
+	u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
+};
+
+struct ionic_rx_stats {
+	u64 dma_map_err;
+	u64 alloc_err;
+	u64 pkts;
+	u64 bytes;
+	u64 csum_none;
+	u64 csum_complete;
+	u64 csum_error;
+	u64 buffers_posted;
+};
+
+#define IONIC_QCQ_F_INITED		BIT(0)
+#define IONIC_QCQ_F_SG			BIT(1)
+#define IONIC_QCQ_F_INTR		BIT(2)
+#define IONIC_QCQ_F_TX_STATS		BIT(3)
+#define IONIC_QCQ_F_RX_STATS		BIT(4)
+#define IONIC_QCQ_F_NOTIFYQ		BIT(5)
+
+struct ionic_napi_stats {
+	u64 poll_count;
+	u64 work_done_cntr[IONIC_MAX_NUM_NAPI_CNTR];
+};
+
+struct ionic_q_stats {
+	union {
+		struct ionic_tx_stats tx;
+		struct ionic_rx_stats rx;
+	};
+};
+
+struct ionic_qcq {
+	void *base;
+	dma_addr_t base_pa;
+	unsigned int total_size;
+	struct ionic_queue q;
+	struct ionic_cq cq;
+	struct ionic_intr_info intr;
+	struct napi_struct napi;
+	struct ionic_napi_stats napi_stats;
+	struct ionic_q_stats *stats;
+	unsigned int flags;
+	struct dentry *dentry;
+};
+
+struct ionic_qcqst {
+	struct ionic_qcq *qcq;
+	struct ionic_q_stats *stats;
+};
+
+#define q_to_qcq(q)		container_of(q, struct ionic_qcq, q)
+#define q_to_tx_stats(q)	(&q_to_qcq(q)->stats->tx)
+#define q_to_rx_stats(q)	(&q_to_qcq(q)->stats->rx)
+#define napi_to_qcq(napi)	container_of(napi, struct ionic_qcq, napi)
+#define napi_to_cq(napi)	(&napi_to_qcq(napi)->cq)
+
+enum ionic_deferred_work_type {
+	IONIC_DW_TYPE_RX_MODE,
+	IONIC_DW_TYPE_RX_ADDR_ADD,
+	IONIC_DW_TYPE_RX_ADDR_DEL,
+	IONIC_DW_TYPE_LINK_STATUS,
+	IONIC_DW_TYPE_LIF_RESET,
+};
+
+struct ionic_deferred_work {
+	struct list_head list;
+	enum ionic_deferred_work_type type;
+	union {
+		unsigned int rx_mode;
+		u8 addr[ETH_ALEN];
+	};
+};
+
+struct ionic_deferred {
+	spinlock_t lock;		/* lock for deferred work list */
+	struct list_head list;
+	struct work_struct work;
+};
+
+struct ionic_lif_sw_stats {
+	u64 tx_packets;
+	u64 tx_bytes;
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 tx_tso;
+	u64 tx_no_csum;
+	u64 tx_csum;
+	u64 rx_csum_none;
+	u64 rx_csum_complete;
+	u64 rx_csum_error;
+};
+
+enum ionic_lif_state_flags {
+	IONIC_LIF_INITED,
+	IONIC_LIF_SW_DEBUG_STATS,
+	IONIC_LIF_UP,
+	IONIC_LIF_LINK_CHECK_REQUESTED,
+	IONIC_LIF_QUEUE_RESET,
+
+	/* leave this as last */
+	IONIC_LIF_STATE_SIZE
+};
+
+#define IONIC_LIF_NAME_MAX_SZ		32
+struct ionic_lif {
+	char name[IONIC_LIF_NAME_MAX_SZ];
+	struct list_head list;
+	struct net_device *netdev;
+	DECLARE_BITMAP(state, IONIC_LIF_STATE_SIZE);
+	struct ionic *ionic;
+	bool registered;
+	unsigned int index;
+	unsigned int hw_index;
+	unsigned int kern_pid;
+	u64 __iomem *kern_dbpage;
+	spinlock_t adminq_lock;		/* lock for AdminQ operations */
+	struct ionic_qcq *adminqcq;
+	struct ionic_qcq *notifyqcq;
+	struct ionic_qcqst *txqcqs;
+	struct ionic_qcqst *rxqcqs;
+	u64 last_eid;
+	unsigned int neqs;
+	unsigned int nxqs;
+	unsigned int ntxq_descs;
+	unsigned int nrxq_descs;
+	u32 rx_copybreak;
+	unsigned int rx_mode;
+	u64 hw_features;
+	bool mc_overflow;
+	unsigned int nmcast;
+	bool uc_overflow;
+	unsigned int nucast;
+
+	struct ionic_lif_info *info;
+	dma_addr_t info_pa;
+	u32 info_sz;
+
+	u16 rss_types;
+	u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE];
+	u8 *rss_ind_tbl;
+	dma_addr_t rss_ind_tbl_pa;
+	u32 rss_ind_tbl_sz;
+
+	struct ionic_rx_filters rx_filters;
+	struct ionic_deferred deferred;
+	unsigned long *dbid_inuse;
+	unsigned int dbid_count;
+	struct dentry *dentry;
+	u32 rx_coalesce_usecs;
+	u32 flags;
+	struct work_struct tx_timeout_work;
+};
+
+#define lif_to_txqcq(lif, i)	((lif)->txqcqs[i].qcq)
+#define lif_to_rxqcq(lif, i)	((lif)->rxqcqs[i].qcq)
+#define lif_to_txstats(lif, i)	((lif)->txqcqs[i].stats->tx)
+#define lif_to_rxstats(lif, i)	((lif)->rxqcqs[i].stats->rx)
+#define lif_to_txq(lif, i)	(&lif_to_txqcq((lif), i)->q)
+#define lif_to_rxq(lif, i)	(&lif_to_txqcq((lif), i)->q)
+
+static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname)
+{
+	unsigned long tlimit = jiffies + HZ;
+
+	while (test_and_set_bit(bitname, lif->state) &&
+	       time_before(jiffies, tlimit))
+		usleep_range(100, 200);
+
+	return test_bit(bitname, lif->state);
+}
+
+static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
+{
+	u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
+	u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div);
+
+	/* Div-by-zero should never be an issue, but check anyway */
+	if (!div || !mult)
+		return 0;
+
+	/* Round up in case usecs is close to the next hw unit */
+	usecs += (div / mult) >> 1;
+
+	/* Convert from usecs to device units */
+	return (usecs * mult) / div;
+}
+
+static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units)
+{
+	u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
+	u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div);
+
+	/* Div-by-zero should never be an issue, but check anyway */
+	if (!div || !mult)
+		return 0;
+
+	/* Convert from device units to usec */
+	return (units * div) / mult;
+}
+
+int ionic_lifs_alloc(struct ionic *ionic);
+void ionic_lifs_free(struct ionic *ionic);
+void ionic_lifs_deinit(struct ionic *ionic);
+int ionic_lifs_init(struct ionic *ionic);
+int ionic_lifs_register(struct ionic *ionic);
+void ionic_lifs_unregister(struct ionic *ionic);
+int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
+		       union ionic_lif_identity *lif_ident);
+int ionic_lifs_size(struct ionic *ionic);
+int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
+			 const u8 *key, const u32 *indir);
+
+int ionic_open(struct net_device *netdev);
+int ionic_stop(struct net_device *netdev);
+int ionic_reset_queues(struct ionic_lif *lif);
+
+static inline void debug_stats_txq_post(struct ionic_qcq *qcq,
+					struct ionic_txq_desc *desc, bool dbell)
+{
+	u8 num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
+						& IONIC_TXQ_DESC_NSGE_MASK);
+
+	qcq->q.dbell_count += dbell;
+
+	if (num_sg_elems > (IONIC_MAX_NUM_SG_CNTR - 1))
+		num_sg_elems = IONIC_MAX_NUM_SG_CNTR - 1;
+
+	qcq->stats->tx.sg_cntr[num_sg_elems]++;
+}
+
+static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
+					 unsigned int work_done)
+{
+	qcq->napi_stats.poll_count++;
+
+	if (work_done > (IONIC_MAX_NUM_NAPI_CNTR - 1))
+		work_done = IONIC_MAX_NUM_NAPI_CNTR - 1;
+
+	qcq->napi_stats.work_done_cntr[work_done]++;
+}
+
+#define DEBUG_STATS_CQE_CNT(cq)		((cq)->compl_count++)
+#define DEBUG_STATS_RX_BUFF_CNT(qcq)	((qcq)->stats->rx.buffers_posted++)
+#define DEBUG_STATS_INTR_REARM(intr)	((intr)->rearm_count++)
+#define DEBUG_STATS_TXQ_POST(qcq, txdesc, dbell) \
+	debug_stats_txq_post(qcq, txdesc, dbell)
+#define DEBUG_STATS_NAPI_POLL(qcq, work_done) \
+	debug_stats_napi_poll(qcq, work_done)
+
+#endif /* _IONIC_LIF_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
new file mode 100644
index 0000000..aab3114
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c

@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION);
+MODULE_AUTHOR("Pensando Systems, Inc");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(IONIC_DRV_VERSION);
+
+static const char *ionic_error_to_str(enum ionic_status_code code)
+{
+	switch (code) {
+	case IONIC_RC_SUCCESS:
+		return "IONIC_RC_SUCCESS";
+	case IONIC_RC_EVERSION:
+		return "IONIC_RC_EVERSION";
+	case IONIC_RC_EOPCODE:
+		return "IONIC_RC_EOPCODE";
+	case IONIC_RC_EIO:
+		return "IONIC_RC_EIO";
+	case IONIC_RC_EPERM:
+		return "IONIC_RC_EPERM";
+	case IONIC_RC_EQID:
+		return "IONIC_RC_EQID";
+	case IONIC_RC_EQTYPE:
+		return "IONIC_RC_EQTYPE";
+	case IONIC_RC_ENOENT:
+		return "IONIC_RC_ENOENT";
+	case IONIC_RC_EINTR:
+		return "IONIC_RC_EINTR";
+	case IONIC_RC_EAGAIN:
+		return "IONIC_RC_EAGAIN";
+	case IONIC_RC_ENOMEM:
+		return "IONIC_RC_ENOMEM";
+	case IONIC_RC_EFAULT:
+		return "IONIC_RC_EFAULT";
+	case IONIC_RC_EBUSY:
+		return "IONIC_RC_EBUSY";
+	case IONIC_RC_EEXIST:
+		return "IONIC_RC_EEXIST";
+	case IONIC_RC_EINVAL:
+		return "IONIC_RC_EINVAL";
+	case IONIC_RC_ENOSPC:
+		return "IONIC_RC_ENOSPC";
+	case IONIC_RC_ERANGE:
+		return "IONIC_RC_ERANGE";
+	case IONIC_RC_BAD_ADDR:
+		return "IONIC_RC_BAD_ADDR";
+	case IONIC_RC_DEV_CMD:
+		return "IONIC_RC_DEV_CMD";
+	case IONIC_RC_ERROR:
+		return "IONIC_RC_ERROR";
+	case IONIC_RC_ERDMA:
+		return "IONIC_RC_ERDMA";
+	default:
+		return "IONIC_RC_UNKNOWN";
+	}
+}
+
+static int ionic_error_to_errno(enum ionic_status_code code)
+{
+	switch (code) {
+	case IONIC_RC_SUCCESS:
+		return 0;
+	case IONIC_RC_EVERSION:
+	case IONIC_RC_EQTYPE:
+	case IONIC_RC_EQID:
+	case IONIC_RC_EINVAL:
+		return -EINVAL;
+	case IONIC_RC_EPERM:
+		return -EPERM;
+	case IONIC_RC_ENOENT:
+		return -ENOENT;
+	case IONIC_RC_EAGAIN:
+		return -EAGAIN;
+	case IONIC_RC_ENOMEM:
+		return -ENOMEM;
+	case IONIC_RC_EFAULT:
+		return -EFAULT;
+	case IONIC_RC_EBUSY:
+		return -EBUSY;
+	case IONIC_RC_EEXIST:
+		return -EEXIST;
+	case IONIC_RC_ENOSPC:
+		return -ENOSPC;
+	case IONIC_RC_ERANGE:
+		return -ERANGE;
+	case IONIC_RC_BAD_ADDR:
+		return -EFAULT;
+	case IONIC_RC_EOPCODE:
+	case IONIC_RC_EINTR:
+	case IONIC_RC_DEV_CMD:
+	case IONIC_RC_ERROR:
+	case IONIC_RC_ERDMA:
+	case IONIC_RC_EIO:
+	default:
+		return -EIO;
+	}
+}
+
+static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
+{
+	switch (opcode) {
+	case IONIC_CMD_NOP:
+		return "IONIC_CMD_NOP";
+	case IONIC_CMD_INIT:
+		return "IONIC_CMD_INIT";
+	case IONIC_CMD_RESET:
+		return "IONIC_CMD_RESET";
+	case IONIC_CMD_IDENTIFY:
+		return "IONIC_CMD_IDENTIFY";
+	case IONIC_CMD_GETATTR:
+		return "IONIC_CMD_GETATTR";
+	case IONIC_CMD_SETATTR:
+		return "IONIC_CMD_SETATTR";
+	case IONIC_CMD_PORT_IDENTIFY:
+		return "IONIC_CMD_PORT_IDENTIFY";
+	case IONIC_CMD_PORT_INIT:
+		return "IONIC_CMD_PORT_INIT";
+	case IONIC_CMD_PORT_RESET:
+		return "IONIC_CMD_PORT_RESET";
+	case IONIC_CMD_PORT_GETATTR:
+		return "IONIC_CMD_PORT_GETATTR";
+	case IONIC_CMD_PORT_SETATTR:
+		return "IONIC_CMD_PORT_SETATTR";
+	case IONIC_CMD_LIF_INIT:
+		return "IONIC_CMD_LIF_INIT";
+	case IONIC_CMD_LIF_RESET:
+		return "IONIC_CMD_LIF_RESET";
+	case IONIC_CMD_LIF_IDENTIFY:
+		return "IONIC_CMD_LIF_IDENTIFY";
+	case IONIC_CMD_LIF_SETATTR:
+		return "IONIC_CMD_LIF_SETATTR";
+	case IONIC_CMD_LIF_GETATTR:
+		return "IONIC_CMD_LIF_GETATTR";
+	case IONIC_CMD_RX_MODE_SET:
+		return "IONIC_CMD_RX_MODE_SET";
+	case IONIC_CMD_RX_FILTER_ADD:
+		return "IONIC_CMD_RX_FILTER_ADD";
+	case IONIC_CMD_RX_FILTER_DEL:
+		return "IONIC_CMD_RX_FILTER_DEL";
+	case IONIC_CMD_Q_INIT:
+		return "IONIC_CMD_Q_INIT";
+	case IONIC_CMD_Q_CONTROL:
+		return "IONIC_CMD_Q_CONTROL";
+	case IONIC_CMD_RDMA_RESET_LIF:
+		return "IONIC_CMD_RDMA_RESET_LIF";
+	case IONIC_CMD_RDMA_CREATE_EQ:
+		return "IONIC_CMD_RDMA_CREATE_EQ";
+	case IONIC_CMD_RDMA_CREATE_CQ:
+		return "IONIC_CMD_RDMA_CREATE_CQ";
+	case IONIC_CMD_RDMA_CREATE_ADMINQ:
+		return "IONIC_CMD_RDMA_CREATE_ADMINQ";
+	case IONIC_CMD_FW_DOWNLOAD:
+		return "IONIC_CMD_FW_DOWNLOAD";
+	case IONIC_CMD_FW_CONTROL:
+		return "IONIC_CMD_FW_CONTROL";
+	default:
+		return "DEVCMD_UNKNOWN";
+	}
+}
+
+static void ionic_adminq_flush(struct ionic_lif *lif)
+{
+	struct ionic_queue *adminq = &lif->adminqcq->q;
+
+	spin_lock(&lif->adminq_lock);
+
+	while (adminq->tail != adminq->head) {
+		memset(adminq->tail->desc, 0, sizeof(union ionic_adminq_cmd));
+		adminq->tail->cb = NULL;
+		adminq->tail->cb_arg = NULL;
+		adminq->tail = adminq->tail->next;
+	}
+	spin_unlock(&lif->adminq_lock);
+}
+
+static int ionic_adminq_check_err(struct ionic_lif *lif,
+				  struct ionic_admin_ctx *ctx,
+				  bool timeout)
+{
+	struct net_device *netdev = lif->netdev;
+	const char *opcode_str;
+	const char *status_str;
+	int err = 0;
+
+	if (ctx->comp.comp.status || timeout) {
+		opcode_str = ionic_opcode_to_str(ctx->cmd.cmd.opcode);
+		status_str = ionic_error_to_str(ctx->comp.comp.status);
+		err = timeout ? -ETIMEDOUT :
+				ionic_error_to_errno(ctx->comp.comp.status);
+
+		netdev_err(netdev, "%s (%d) failed: %s (%d)\n",
+			   opcode_str, ctx->cmd.cmd.opcode,
+			   timeout ? "TIMEOUT" : status_str, err);
+
+		if (timeout)
+			ionic_adminq_flush(lif);
+	}
+
+	return err;
+}
+
+static void ionic_adminq_cb(struct ionic_queue *q,
+			    struct ionic_desc_info *desc_info,
+			    struct ionic_cq_info *cq_info, void *cb_arg)
+{
+	struct ionic_admin_ctx *ctx = cb_arg;
+	struct ionic_admin_comp *comp;
+	struct device *dev;
+
+	if (!ctx)
+		return;
+
+	comp = cq_info->cq_desc;
+	dev = &q->lif->netdev->dev;
+
+	memcpy(&ctx->comp, comp, sizeof(*comp));
+
+	dev_dbg(dev, "comp admin queue command:\n");
+	dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
+			 &ctx->comp, sizeof(ctx->comp), true);
+
+	complete_all(&ctx->work);
+}
+
+static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+{
+	struct ionic_queue *adminq = &lif->adminqcq->q;
+	int err = 0;
+
+	WARN_ON(in_interrupt());
+
+	spin_lock(&lif->adminq_lock);
+	if (!ionic_q_has_space(adminq, 1)) {
+		err = -ENOSPC;
+		goto err_out;
+	}
+
+	memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
+
+	dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
+	dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+			 &ctx->cmd, sizeof(ctx->cmd), true);
+
+	ionic_q_post(adminq, true, ionic_adminq_cb, ctx);
+
+err_out:
+	spin_unlock(&lif->adminq_lock);
+
+	return err;
+}
+
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+{
+	struct net_device *netdev = lif->netdev;
+	unsigned long remaining;
+	const char *name;
+	int err;
+
+	err = ionic_adminq_post(lif, ctx);
+	if (err) {
+		name = ionic_opcode_to_str(ctx->cmd.cmd.opcode);
+		netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
+			   name, ctx->cmd.cmd.opcode, err);
+		return err;
+	}
+
+	remaining = wait_for_completion_timeout(&ctx->work,
+						HZ * (ulong)DEVCMD_TIMEOUT);
+	return ionic_adminq_check_err(lif, ctx, (remaining == 0));
+}
+
+int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
+	       ionic_cq_done_cb done_cb, void *done_arg)
+{
+	struct ionic_qcq *qcq = napi_to_qcq(napi);
+	struct ionic_cq *cq = &qcq->cq;
+	u32 work_done, flags = 0;
+
+	work_done = ionic_cq_service(cq, budget, cb, done_cb, done_arg);
+
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		flags |= IONIC_INTR_CRED_UNMASK;
+		DEBUG_STATS_INTR_REARM(cq->bound_intr);
+	}
+
+	if (work_done || flags) {
+		flags |= IONIC_INTR_CRED_RESET_COALESCE;
+		ionic_intr_credits(cq->lif->ionic->idev.intr_ctrl,
+				   cq->bound_intr->index,
+				   work_done, flags);
+	}
+
+	DEBUG_STATS_NAPI_POLL(qcq, work_done);
+
+	return work_done;
+}
+
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	unsigned long start_time;
+	unsigned long max_wait;
+	unsigned long duration;
+	int opcode;
+	int done;
+	int err;
+
+	WARN_ON(in_interrupt());
+
+	/* Wait for dev cmd to complete, retrying if we get EAGAIN,
+	 * but don't wait any longer than max_seconds.
+	 */
+	max_wait = jiffies + (max_seconds * HZ);
+try_again:
+	start_time = jiffies;
+	do {
+		done = ionic_dev_cmd_done(idev);
+		if (done)
+			break;
+		msleep(20);
+	} while (!done && time_before(jiffies, max_wait));
+	duration = jiffies - start_time;
+
+	opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
+	dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
+		ionic_opcode_to_str(opcode), opcode,
+		done, duration / HZ, duration);
+
+	if (!done && !time_before(jiffies, max_wait)) {
+		dev_warn(ionic->dev, "DEVCMD %s (%d) timeout after %ld secs\n",
+			 ionic_opcode_to_str(opcode), opcode, max_seconds);
+		return -ETIMEDOUT;
+	}
+
+	err = ionic_dev_cmd_status(&ionic->idev);
+	if (err) {
+		if (err == IONIC_RC_EAGAIN && !time_after(jiffies, max_wait)) {
+			dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) retrying...\n",
+				ionic_opcode_to_str(opcode), opcode,
+				ionic_error_to_str(err), err);
+
+			msleep(1000);
+			iowrite32(0, &idev->dev_cmd_regs->done);
+			iowrite32(1, &idev->dev_cmd_regs->doorbell);
+			goto try_again;
+		}
+
+		dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+			ionic_opcode_to_str(opcode), opcode,
+			ionic_error_to_str(err), err);
+
+		return ionic_error_to_errno(err);
+	}
+
+	return 0;
+}
+
+int ionic_setup(struct ionic *ionic)
+{
+	int err;
+
+	err = ionic_dev_setup(ionic);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int ionic_identify(struct ionic *ionic)
+{
+	struct ionic_identity *ident = &ionic->ident;
+	struct ionic_dev *idev = &ionic->idev;
+	size_t sz;
+	int err;
+
+	memset(ident, 0, sizeof(*ident));
+
+	ident->drv.os_type = cpu_to_le32(IONIC_OS_TYPE_LINUX);
+	strncpy(ident->drv.driver_ver_str, IONIC_DRV_VERSION,
+		sizeof(ident->drv.driver_ver_str) - 1);
+
+	mutex_lock(&ionic->dev_cmd_lock);
+
+	sz = min(sizeof(ident->drv), sizeof(idev->dev_cmd_regs->data));
+	memcpy_toio(&idev->dev_cmd_regs->data, &ident->drv, sz);
+
+	ionic_dev_cmd_identify(idev, IONIC_IDENTITY_VERSION_1);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	if (!err) {
+		sz = min(sizeof(ident->dev), sizeof(idev->dev_cmd_regs->data));
+		memcpy_fromio(&ident->dev, &idev->dev_cmd_regs->data, sz);
+	}
+
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	if (err)
+		goto err_out_unmap;
+
+	ionic_debugfs_add_ident(ionic);
+
+	return 0;
+
+err_out_unmap:
+	return err;
+}
+
+int ionic_init(struct ionic *ionic)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	int err;
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_init(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
+int ionic_reset(struct ionic *ionic)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	int err;
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_reset(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
+int ionic_port_identify(struct ionic *ionic)
+{
+	struct ionic_identity *ident = &ionic->ident;
+	struct ionic_dev *idev = &ionic->idev;
+	size_t sz;
+	int err;
+
+	mutex_lock(&ionic->dev_cmd_lock);
+
+	ionic_dev_cmd_port_identify(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	if (!err) {
+		sz = min(sizeof(ident->port), sizeof(idev->dev_cmd_regs->data));
+		memcpy_fromio(&ident->port, &idev->dev_cmd_regs->data, sz);
+	}
+
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
+int ionic_port_init(struct ionic *ionic)
+{
+	struct ionic_identity *ident = &ionic->ident;
+	struct ionic_dev *idev = &ionic->idev;
+	size_t sz;
+	int err;
+
+	if (idev->port_info)
+		return 0;
+
+	idev->port_info_sz = ALIGN(sizeof(*idev->port_info), PAGE_SIZE);
+	idev->port_info = dma_alloc_coherent(ionic->dev, idev->port_info_sz,
+					     &idev->port_info_pa,
+					     GFP_KERNEL);
+	if (!idev->port_info) {
+		dev_err(ionic->dev, "Failed to allocate port info, aborting\n");
+		return -ENOMEM;
+	}
+
+	sz = min(sizeof(ident->port.config), sizeof(idev->dev_cmd_regs->data));
+
+	mutex_lock(&ionic->dev_cmd_lock);
+
+	memcpy_toio(&idev->dev_cmd_regs->data, &ident->port.config, sz);
+	ionic_dev_cmd_port_init(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+	ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP);
+	(void)ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+	mutex_unlock(&ionic->dev_cmd_lock);
+	if (err) {
+		dev_err(ionic->dev, "Failed to init port\n");
+		dma_free_coherent(ionic->dev, idev->port_info_sz,
+				  idev->port_info, idev->port_info_pa);
+		idev->port_info = NULL;
+		idev->port_info_pa = 0;
+	}
+
+	return err;
+}
+
+int ionic_port_reset(struct ionic *ionic)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	int err;
+
+	if (!idev->port_info)
+		return 0;
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_port_reset(idev);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	dma_free_coherent(ionic->dev, idev->port_info_sz,
+			  idev->port_info, idev->port_info_pa);
+
+	idev->port_info = NULL;
+	idev->port_info_pa = 0;
+
+	if (err)
+		dev_err(ionic->dev, "Failed to reset port\n");
+
+	return err;
+}
+
+static int __init ionic_init_module(void)
+{
+	pr_info("%s %s, ver %s\n",
+		IONIC_DRV_NAME, IONIC_DRV_DESCRIPTION, IONIC_DRV_VERSION);
+	ionic_debugfs_create();
+	return ionic_bus_register_driver();
+}
+
+static void __exit ionic_cleanup_module(void)
+{
+	ionic_bus_unregister_driver();
+	ionic_debugfs_destroy();
+
+	pr_info("%s removed\n", IONIC_DRV_NAME);
+}
+
+module_init(ionic_init_module);
+module_exit(ionic_cleanup_module);

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
new file mode 100644
index 0000000..03ee5a3
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h

@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* Copyright (c) 2018-2019 Pensando Systems, Inc.  All rights reserved. */
+
+#ifndef IONIC_REGS_H
+#define IONIC_REGS_H
+
+#include <linux/io.h>
+
+/** struct ionic_intr - interrupt control register set.
+ * @coal_init:			coalesce timer initial value.
+ * @mask:			interrupt mask value.
+ * @credits:			interrupt credit count and return.
+ * @mask_assert:		interrupt mask value on assert.
+ * @coal:			coalesce timer time remaining.
+ */
+struct ionic_intr {
+	u32 coal_init;
+	u32 mask;
+	u32 credits;
+	u32 mask_assert;
+	u32 coal;
+	u32 rsvd[3];
+};
+
+#define IONIC_INTR_CTRL_REGS_MAX	2048
+#define IONIC_INTR_CTRL_COAL_MAX	0x3F
+
+/** enum ionic_intr_mask_vals - valid values for mask and mask_assert.
+ * @IONIC_INTR_MASK_CLEAR:	unmask interrupt.
+ * @IONIC_INTR_MASK_SET:	mask interrupt.
+ */
+enum ionic_intr_mask_vals {
+	IONIC_INTR_MASK_CLEAR		= 0,
+	IONIC_INTR_MASK_SET		= 1,
+};
+
+/** enum ionic_intr_credits_bits - bitwise composition of credits values.
+ * @IONIC_INTR_CRED_COUNT:	bit mask of credit count, no shift needed.
+ * @IONIC_INTR_CRED_COUNT_SIGNED: bit mask of credit count, including sign bit.
+ * @IONIC_INTR_CRED_UNMASK:	unmask the interrupt.
+ * @IONIC_INTR_CRED_RESET_COALESCE: reset the coalesce timer.
+ * @IONIC_INTR_CRED_REARM:	unmask the and reset the timer.
+ */
+enum ionic_intr_credits_bits {
+	IONIC_INTR_CRED_COUNT		= 0x7fffu,
+	IONIC_INTR_CRED_COUNT_SIGNED	= 0xffffu,
+	IONIC_INTR_CRED_UNMASK		= 0x10000u,
+	IONIC_INTR_CRED_RESET_COALESCE	= 0x20000u,
+	IONIC_INTR_CRED_REARM		= (IONIC_INTR_CRED_UNMASK |
+					   IONIC_INTR_CRED_RESET_COALESCE),
+};
+
+static inline void ionic_intr_coal_init(struct ionic_intr __iomem *intr_ctrl,
+					int intr_idx, u32 coal)
+{
+	iowrite32(coal, &intr_ctrl[intr_idx].coal_init);
+}
+
+static inline void ionic_intr_mask(struct ionic_intr __iomem *intr_ctrl,
+				   int intr_idx, u32 mask)
+{
+	iowrite32(mask, &intr_ctrl[intr_idx].mask);
+}
+
+static inline void ionic_intr_credits(struct ionic_intr __iomem *intr_ctrl,
+				      int intr_idx, u32 cred, u32 flags)
+{
+	if (WARN_ON_ONCE(cred > IONIC_INTR_CRED_COUNT)) {
+		cred = ioread32(&intr_ctrl[intr_idx].credits);
+		cred &= IONIC_INTR_CRED_COUNT_SIGNED;
+	}
+
+	iowrite32(cred | flags, &intr_ctrl[intr_idx].credits);
+}
+
+static inline void ionic_intr_clean(struct ionic_intr __iomem *intr_ctrl,
+				    int intr_idx)
+{
+	u32 cred;
+
+	cred = ioread32(&intr_ctrl[intr_idx].credits);
+	cred &= IONIC_INTR_CRED_COUNT_SIGNED;
+	cred |= IONIC_INTR_CRED_RESET_COALESCE;
+	iowrite32(cred, &intr_ctrl[intr_idx].credits);
+}
+
+static inline void ionic_intr_mask_assert(struct ionic_intr __iomem *intr_ctrl,
+					  int intr_idx, u32 mask)
+{
+	iowrite32(mask, &intr_ctrl[intr_idx].mask_assert);
+}
+
+/** enum ionic_dbell_bits - bitwise composition of dbell values.
+ *
+ * @IONIC_DBELL_QID_MASK:	unshifted mask of valid queue id bits.
+ * @IONIC_DBELL_QID_SHIFT:	queue id shift amount in dbell value.
+ * @IONIC_DBELL_QID:		macro to build QID component of dbell value.
+ *
+ * @IONIC_DBELL_RING_MASK:	unshifted mask of valid ring bits.
+ * @IONIC_DBELL_RING_SHIFT:	ring shift amount in dbell value.
+ * @IONIC_DBELL_RING:		macro to build ring component of dbell value.
+ *
+ * @IONIC_DBELL_RING_0:		ring zero dbell component value.
+ * @IONIC_DBELL_RING_1:		ring one dbell component value.
+ * @IONIC_DBELL_RING_2:		ring two dbell component value.
+ * @IONIC_DBELL_RING_3:		ring three dbell component value.
+ *
+ * @IONIC_DBELL_INDEX_MASK:	bit mask of valid index bits, no shift needed.
+ */
+enum ionic_dbell_bits {
+	IONIC_DBELL_QID_MASK		= 0xffffff,
+	IONIC_DBELL_QID_SHIFT		= 24,
+
+#define IONIC_DBELL_QID(n) \
+	(((u64)(n) & IONIC_DBELL_QID_MASK) << IONIC_DBELL_QID_SHIFT)
+
+	IONIC_DBELL_RING_MASK		= 0x7,
+	IONIC_DBELL_RING_SHIFT		= 16,
+
+#define IONIC_DBELL_RING(n) \
+	(((u64)(n) & IONIC_DBELL_RING_MASK) << IONIC_DBELL_RING_SHIFT)
+
+	IONIC_DBELL_RING_0		= 0,
+	IONIC_DBELL_RING_1		= IONIC_DBELL_RING(1),
+	IONIC_DBELL_RING_2		= IONIC_DBELL_RING(2),
+	IONIC_DBELL_RING_3		= IONIC_DBELL_RING(3),
+
+	IONIC_DBELL_INDEX_MASK		= 0xffff,
+};
+
+static inline void ionic_dbell_ring(u64 __iomem *db_page, int qtype, u64 val)
+{
+	writeq(val, &db_page[qtype]);
+}
+
+#endif /* IONIC_REGS_H */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
new file mode 100644
index 0000000..7a093f1
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c

@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_rx_filter.h"
+
+void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f)
+{
+	struct device *dev = lif->ionic->dev;
+
+	hlist_del(&f->by_id);
+	hlist_del(&f->by_hash);
+	devm_kfree(dev, f);
+}
+
+int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_del = {
+			.opcode = IONIC_CMD_RX_FILTER_DEL,
+			.filter_id = cpu_to_le32(f->filter_id),
+		},
+	};
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+int ionic_rx_filters_init(struct ionic_lif *lif)
+{
+	unsigned int i;
+
+	spin_lock_init(&lif->rx_filters.lock);
+
+	for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+		INIT_HLIST_HEAD(&lif->rx_filters.by_hash[i]);
+		INIT_HLIST_HEAD(&lif->rx_filters.by_id[i]);
+	}
+
+	return 0;
+}
+
+void ionic_rx_filters_deinit(struct ionic_lif *lif)
+{
+	struct ionic_rx_filter *f;
+	struct hlist_head *head;
+	struct hlist_node *tmp;
+	unsigned int i;
+
+	for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+		head = &lif->rx_filters.by_id[i];
+		hlist_for_each_entry_safe(f, tmp, head, by_id)
+			ionic_rx_filter_free(lif, f);
+	}
+}
+
+int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
+			 u32 hash, struct ionic_admin_ctx *ctx)
+{
+	struct device *dev = lif->ionic->dev;
+	struct ionic_rx_filter_add_cmd *ac;
+	struct ionic_rx_filter *f;
+	struct hlist_head *head;
+	unsigned int key;
+
+	ac = &ctx->cmd.rx_filter_add;
+
+	switch (le16_to_cpu(ac->match)) {
+	case IONIC_RX_FILTER_MATCH_VLAN:
+		key = le16_to_cpu(ac->vlan.vlan);
+		break;
+	case IONIC_RX_FILTER_MATCH_MAC:
+		key = *(u32 *)ac->mac.addr;
+		break;
+	case IONIC_RX_FILTER_MATCH_MAC_VLAN:
+		key = le16_to_cpu(ac->mac_vlan.vlan);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return -ENOMEM;
+
+	f->flow_id = flow_id;
+	f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id);
+	f->rxq_index = rxq_index;
+	memcpy(&f->cmd, ac, sizeof(f->cmd));
+
+	INIT_HLIST_NODE(&f->by_hash);
+	INIT_HLIST_NODE(&f->by_id);
+
+	spin_lock_bh(&lif->rx_filters.lock);
+
+	key = hash_32(key, IONIC_RX_FILTER_HASH_BITS);
+	head = &lif->rx_filters.by_hash[key];
+	hlist_add_head(&f->by_hash, head);
+
+	key = f->filter_id & IONIC_RX_FILTER_HLISTS_MASK;
+	head = &lif->rx_filters.by_id[key];
+	hlist_add_head(&f->by_id, head);
+
+	spin_unlock_bh(&lif->rx_filters.lock);
+
+	return 0;
+}
+
+struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid)
+{
+	struct ionic_rx_filter *f;
+	struct hlist_head *head;
+	unsigned int key;
+
+	key = hash_32(vid, IONIC_RX_FILTER_HASH_BITS);
+	head = &lif->rx_filters.by_hash[key];
+
+	hlist_for_each_entry(f, head, by_hash) {
+		if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_VLAN)
+			continue;
+		if (le16_to_cpu(f->cmd.vlan.vlan) == vid)
+			return f;
+	}
+
+	return NULL;
+}
+
+struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif,
+						const u8 *addr)
+{
+	struct ionic_rx_filter *f;
+	struct hlist_head *head;
+	unsigned int key;
+
+	key = hash_32(*(u32 *)addr, IONIC_RX_FILTER_HASH_BITS);
+	head = &lif->rx_filters.by_hash[key];
+
+	hlist_for_each_entry(f, head, by_hash) {
+		if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_MAC)
+			continue;
+		if (memcmp(addr, f->cmd.mac.addr, ETH_ALEN) == 0)
+			return f;
+	}
+
+	return NULL;
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
new file mode 100644
index 0000000..b6aec9c
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h

@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_RX_FILTER_H_
+#define _IONIC_RX_FILTER_H_
+
+#define IONIC_RXQ_INDEX_ANY		(0xFFFF)
+struct ionic_rx_filter {
+	u32 flow_id;
+	u32 filter_id;
+	u16 rxq_index;
+	struct ionic_rx_filter_add_cmd cmd;
+	struct hlist_node by_hash;
+	struct hlist_node by_id;
+};
+
+#define IONIC_RX_FILTER_HASH_BITS	10
+#define IONIC_RX_FILTER_HLISTS		BIT(IONIC_RX_FILTER_HASH_BITS)
+#define IONIC_RX_FILTER_HLISTS_MASK	(IONIC_RX_FILTER_HLISTS - 1)
+struct ionic_rx_filters {
+	spinlock_t lock;				    /* filter list lock */
+	struct hlist_head by_hash[IONIC_RX_FILTER_HLISTS];  /* by skb hash */
+	struct hlist_head by_id[IONIC_RX_FILTER_HLISTS];    /* by filter_id */
+};
+
+void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f);
+int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f);
+int ionic_rx_filters_init(struct ionic_lif *lif);
+void ionic_rx_filters_deinit(struct ionic_lif *lif);
+int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
+			 u32 hash, struct ionic_admin_ctx *ctx);
+struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid);
+struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr);
+
+#endif /* _IONIC_RX_FILTER_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
new file mode 100644
index 0000000..03916b6
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c

@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_stats.h"
+
+static const struct ionic_stat_desc ionic_lif_stats_desc[] = {
+	IONIC_LIF_STAT_DESC(tx_packets),
+	IONIC_LIF_STAT_DESC(tx_bytes),
+	IONIC_LIF_STAT_DESC(rx_packets),
+	IONIC_LIF_STAT_DESC(rx_bytes),
+	IONIC_LIF_STAT_DESC(tx_tso),
+	IONIC_LIF_STAT_DESC(tx_no_csum),
+	IONIC_LIF_STAT_DESC(tx_csum),
+	IONIC_LIF_STAT_DESC(rx_csum_none),
+	IONIC_LIF_STAT_DESC(rx_csum_complete),
+	IONIC_LIF_STAT_DESC(rx_csum_error),
+};
+
+static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
+	IONIC_TX_STAT_DESC(pkts),
+	IONIC_TX_STAT_DESC(bytes),
+	IONIC_TX_STAT_DESC(clean),
+	IONIC_TX_STAT_DESC(dma_map_err),
+	IONIC_TX_STAT_DESC(linearize),
+	IONIC_TX_STAT_DESC(frags),
+};
+
+static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
+	IONIC_RX_STAT_DESC(pkts),
+	IONIC_RX_STAT_DESC(bytes),
+	IONIC_RX_STAT_DESC(dma_map_err),
+	IONIC_RX_STAT_DESC(alloc_err),
+	IONIC_RX_STAT_DESC(csum_none),
+	IONIC_RX_STAT_DESC(csum_complete),
+	IONIC_RX_STAT_DESC(csum_error),
+};
+
+static const struct ionic_stat_desc ionic_txq_stats_desc[] = {
+	IONIC_TX_Q_STAT_DESC(stop),
+	IONIC_TX_Q_STAT_DESC(wake),
+	IONIC_TX_Q_STAT_DESC(drop),
+	IONIC_TX_Q_STAT_DESC(dbell_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_cq_stats_desc[] = {
+	IONIC_CQ_STAT_DESC(compl_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_intr_stats_desc[] = {
+	IONIC_INTR_STAT_DESC(rearm_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
+	IONIC_NAPI_STAT_DESC(poll_count),
+};
+
+#define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc)
+#define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc)
+#define IONIC_NUM_RX_STATS ARRAY_SIZE(ionic_rx_stats_desc)
+#define IONIC_NUM_TX_Q_STATS ARRAY_SIZE(ionic_txq_stats_desc)
+#define IONIC_NUM_DBG_CQ_STATS ARRAY_SIZE(ionic_dbg_cq_stats_desc)
+#define IONIC_NUM_DBG_INTR_STATS ARRAY_SIZE(ionic_dbg_intr_stats_desc)
+#define IONIC_NUM_DBG_NAPI_STATS ARRAY_SIZE(ionic_dbg_napi_stats_desc)
+
+#define MAX_Q(lif)   ((lif)->netdev->real_num_tx_queues)
+
+static void ionic_get_lif_stats(struct ionic_lif *lif,
+				struct ionic_lif_sw_stats *stats)
+{
+	struct ionic_tx_stats *tstats;
+	struct ionic_rx_stats *rstats;
+	struct ionic_qcq *txqcq;
+	struct ionic_qcq *rxqcq;
+	int q_num;
+
+	memset(stats, 0, sizeof(*stats));
+
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+		txqcq = lif_to_txqcq(lif, q_num);
+		if (txqcq && txqcq->stats) {
+			tstats = &txqcq->stats->tx;
+			stats->tx_packets += tstats->pkts;
+			stats->tx_bytes += tstats->bytes;
+			stats->tx_tso += tstats->tso;
+			stats->tx_no_csum += tstats->no_csum;
+			stats->tx_csum += tstats->csum;
+		}
+
+		rxqcq = lif_to_rxqcq(lif, q_num);
+		if (rxqcq && rxqcq->stats) {
+			rstats = &rxqcq->stats->rx;
+			stats->rx_packets += rstats->pkts;
+			stats->rx_bytes += rstats->bytes;
+			stats->rx_csum_none += rstats->csum_none;
+			stats->rx_csum_complete += rstats->csum_complete;
+			stats->rx_csum_error += rstats->csum_error;
+		}
+	}
+}
+
+static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
+{
+	u64 total = 0;
+
+	/* lif stats */
+	total += IONIC_NUM_LIF_STATS;
+
+	/* tx stats */
+	total += MAX_Q(lif) * IONIC_NUM_TX_STATS;
+
+	/* rx stats */
+	total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
+
+	if (test_bit(IONIC_LIF_UP, lif->state) &&
+	    test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+		/* tx debug stats */
+		total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+				      IONIC_NUM_TX_Q_STATS +
+				      IONIC_NUM_DBG_INTR_STATS +
+				      IONIC_MAX_NUM_SG_CNTR);
+
+		/* rx debug stats */
+		total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+				      IONIC_NUM_DBG_INTR_STATS +
+				      IONIC_NUM_DBG_NAPI_STATS +
+				      IONIC_MAX_NUM_NAPI_CNTR);
+	}
+
+	return total;
+}
+
+static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
+{
+	int i, q_num;
+
+	for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
+		snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name);
+		*buf += ETH_GSTRING_LEN;
+	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
+			snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s",
+				 q_num, ionic_tx_stats_desc[i].name);
+			*buf += ETH_GSTRING_LEN;
+		}
+
+		if (test_bit(IONIC_LIF_UP, lif->state) &&
+		    test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "txq_%d_%s",
+					 q_num,
+					 ionic_txq_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "txq_%d_cq_%s",
+					 q_num,
+					 ionic_dbg_cq_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "txq_%d_intr_%s",
+					 q_num,
+					 ionic_dbg_intr_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "txq_%d_sg_cntr_%d",
+					 q_num, i);
+				*buf += ETH_GSTRING_LEN;
+			}
+		}
+	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
+			snprintf(*buf, ETH_GSTRING_LEN,
+				 "rx_%d_%s",
+				 q_num, ionic_rx_stats_desc[i].name);
+			*buf += ETH_GSTRING_LEN;
+		}
+
+		if (test_bit(IONIC_LIF_UP, lif->state) &&
+		    test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "rxq_%d_cq_%s",
+					 q_num,
+					 ionic_dbg_cq_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "rxq_%d_intr_%s",
+					 q_num,
+					 ionic_dbg_intr_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "rxq_%d_napi_%s",
+					 q_num,
+					 ionic_dbg_napi_stats_desc[i].name);
+				*buf += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+				snprintf(*buf, ETH_GSTRING_LEN,
+					 "rxq_%d_napi_work_done_%d",
+					 q_num, i);
+				*buf += ETH_GSTRING_LEN;
+			}
+		}
+	}
+}
+
+static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
+{
+	struct ionic_lif_sw_stats lif_stats;
+	struct ionic_qcq *txqcq, *rxqcq;
+	struct ionic_tx_stats *txstats;
+	struct ionic_rx_stats *rxstats;
+	int i, q_num;
+
+	ionic_get_lif_stats(lif, &lif_stats);
+
+	for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&lif_stats, &ionic_lif_stats_desc[i]);
+		(*buf)++;
+	}
+
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+		txstats = &lif_to_txstats(lif, q_num);
+
+		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
+			**buf = IONIC_READ_STAT64(txstats,
+						  &ionic_tx_stats_desc[i]);
+			(*buf)++;
+		}
+
+		if (test_bit(IONIC_LIF_UP, lif->state) &&
+		    test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+			txqcq = lif_to_txqcq(lif, q_num);
+			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&txqcq->q,
+						      &ionic_txq_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&txqcq->cq,
+						   &ionic_dbg_cq_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&txqcq->intr,
+						 &ionic_dbg_intr_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
+				**buf = txstats->sg_cntr[i];
+				(*buf)++;
+			}
+		}
+	}
+
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+		rxstats = &lif_to_rxstats(lif, q_num);
+
+		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
+			**buf = IONIC_READ_STAT64(rxstats,
+						  &ionic_rx_stats_desc[i]);
+			(*buf)++;
+		}
+
+		if (test_bit(IONIC_LIF_UP, lif->state) &&
+		    test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+			rxqcq = lif_to_rxqcq(lif, q_num);
+			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&rxqcq->cq,
+						   &ionic_dbg_cq_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&rxqcq->intr,
+						 &ionic_dbg_intr_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+				**buf = IONIC_READ_STAT64(&rxqcq->napi_stats,
+						 &ionic_dbg_napi_stats_desc[i]);
+				(*buf)++;
+			}
+			for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+				**buf = rxqcq->napi_stats.work_done_cntr[i];
+				(*buf)++;
+			}
+		}
+	}
+}
+
+const struct ionic_stats_group_intf ionic_stats_groups[] = {
+	/* SW Stats group */
+	{
+		.get_strings = ionic_sw_stats_get_strings,
+		.get_values = ionic_sw_stats_get_values,
+		.get_count = ionic_sw_stats_get_count,
+	},
+	/* Add more stat groups here */
+};
+
+const int ionic_num_stats_grps = ARRAY_SIZE(ionic_stats_groups);

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.h b/drivers/net/ethernet/pensando/ionic/ionic_stats.h
new file mode 100644
index 0000000..d2c1122
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.h

@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_STATS_H_
+#define _IONIC_STATS_H_
+
+#define IONIC_STAT_TO_OFFSET(type, stat_name) (offsetof(type, stat_name))
+
+#define IONIC_STAT_DESC(type, stat_name) { \
+	.name = #stat_name, \
+	.offset = IONIC_STAT_TO_OFFSET(type, stat_name) \
+}
+
+#define IONIC_LIF_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_lif_sw_stats, stat_name)
+
+#define IONIC_TX_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_tx_stats, stat_name)
+
+#define IONIC_RX_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_rx_stats, stat_name)
+
+#define IONIC_TX_Q_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_queue, stat_name)
+
+#define IONIC_CQ_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_cq, stat_name)
+
+#define IONIC_INTR_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_intr_info, stat_name)
+
+#define IONIC_NAPI_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_napi_stats, stat_name)
+
+/* Interface structure for a particalar stats group */
+struct ionic_stats_group_intf {
+	void (*get_strings)(struct ionic_lif *lif, u8 **buf);
+	void (*get_values)(struct ionic_lif *lif, u64 **buf);
+	u64 (*get_count)(struct ionic_lif *lif);
+};
+
+extern const struct ionic_stats_group_intf ionic_stats_groups[];
+extern const int ionic_num_stats_grps;
+
+#define IONIC_READ_STAT64(base_ptr, desc_ptr) \
+	(*((u64 *)(((u8 *)(base_ptr)) + (desc_ptr)->offset)))
+
+struct ionic_stat_desc {
+	char name[ETH_GSTRING_LEN];
+	u64 offset;
+};
+
+#endif /* _IONIC_STATS_H_ */

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
new file mode 100644
index 0000000..ab6663d
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c

@@ -0,0 +1,925 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
+#include <net/ip6_checksum.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_txrx.h"
+
+static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info, void *cb_arg);
+
+static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
+				  ionic_desc_cb cb_func, void *cb_arg)
+{
+	DEBUG_STATS_TXQ_POST(q_to_qcq(q), q->head->desc, ring_dbell);
+
+	ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+}
+
+static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
+				  ionic_desc_cb cb_func, void *cb_arg)
+{
+	ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+
+	DEBUG_STATS_RX_BUFF_CNT(q_to_qcq(q));
+}
+
+static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
+{
+	return netdev_get_tx_queue(q->lif->netdev, q->index);
+}
+
+static void ionic_rx_recycle(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+			     struct sk_buff *skb)
+{
+	struct ionic_rxq_desc *old = desc_info->desc;
+	struct ionic_rxq_desc *new = q->head->desc;
+
+	new->addr = old->addr;
+	new->len = old->len;
+
+	ionic_rxq_post(q, true, ionic_rx_clean, skb);
+}
+
+static bool ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+			       struct ionic_cq_info *cq_info, struct sk_buff **skb)
+{
+	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_desc *desc = desc_info->desc;
+	struct net_device *netdev = q->lif->netdev;
+	struct device *dev = q->lif->ionic->dev;
+	struct sk_buff *new_skb;
+	u16 clen, dlen;
+
+	clen = le16_to_cpu(comp->len);
+	dlen = le16_to_cpu(desc->len);
+	if (clen > q->lif->rx_copybreak) {
+		dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+				 dlen, DMA_FROM_DEVICE);
+		return false;
+	}
+
+	new_skb = netdev_alloc_skb_ip_align(netdev, clen);
+	if (!new_skb) {
+		dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+				 dlen, DMA_FROM_DEVICE);
+		return false;
+	}
+
+	dma_sync_single_for_cpu(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+				clen, DMA_FROM_DEVICE);
+
+	memcpy(new_skb->data, (*skb)->data, clen);
+
+	ionic_rx_recycle(q, desc_info, *skb);
+	*skb = new_skb;
+
+	return true;
+}
+
+static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info, void *cb_arg)
+{
+	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_qcq *qcq = q_to_qcq(q);
+	struct sk_buff *skb = cb_arg;
+	struct ionic_rx_stats *stats;
+	struct net_device *netdev;
+
+	stats = q_to_rx_stats(q);
+	netdev = q->lif->netdev;
+
+	if (comp->status) {
+		ionic_rx_recycle(q, desc_info, skb);
+		return;
+	}
+
+	if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
+		/* no packet processing while resetting */
+		ionic_rx_recycle(q, desc_info, skb);
+		return;
+	}
+
+	stats->pkts++;
+	stats->bytes += le16_to_cpu(comp->len);
+
+	ionic_rx_copybreak(q, desc_info, cq_info, &skb);
+
+	skb_put(skb, le16_to_cpu(comp->len));
+	skb->protocol = eth_type_trans(skb, netdev);
+
+	skb_record_rx_queue(skb, q->index);
+
+	if (netdev->features & NETIF_F_RXHASH) {
+		switch (comp->pkt_type_color & IONIC_RXQ_COMP_PKT_TYPE_MASK) {
+		case IONIC_PKT_TYPE_IPV4:
+		case IONIC_PKT_TYPE_IPV6:
+			skb_set_hash(skb, le32_to_cpu(comp->rss_hash),
+				     PKT_HASH_TYPE_L3);
+			break;
+		case IONIC_PKT_TYPE_IPV4_TCP:
+		case IONIC_PKT_TYPE_IPV6_TCP:
+		case IONIC_PKT_TYPE_IPV4_UDP:
+		case IONIC_PKT_TYPE_IPV6_UDP:
+			skb_set_hash(skb, le32_to_cpu(comp->rss_hash),
+				     PKT_HASH_TYPE_L4);
+			break;
+		}
+	}
+
+	if (netdev->features & NETIF_F_RXCSUM) {
+		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			skb->csum = (__wsum)le16_to_cpu(comp->csum);
+			stats->csum_complete++;
+		}
+	} else {
+		stats->csum_none++;
+	}
+
+	if ((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
+	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
+	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD))
+		stats->csum_error++;
+
+	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       le16_to_cpu(comp->vlan_tci));
+	}
+
+	napi_gro_receive(&qcq->napi, skb);
+}
+
+static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+{
+	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_queue *q = cq->bound_q;
+	struct ionic_desc_info *desc_info;
+
+	if (!color_match(comp->pkt_type_color, cq->done_color))
+		return false;
+
+	/* check for empty queue */
+	if (q->tail->index == q->head->index)
+		return false;
+
+	desc_info = q->tail;
+	if (desc_info->index != le16_to_cpu(comp->comp_index))
+		return false;
+
+	q->tail = desc_info->next;
+
+	/* clean the related q entry, only one per qc completion */
+	ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg);
+
+	desc_info->cb = NULL;
+	desc_info->cb_arg = NULL;
+
+	return true;
+}
+
+static u32 ionic_rx_walk_cq(struct ionic_cq *rxcq, u32 limit)
+{
+	u32 work_done = 0;
+
+	while (ionic_rx_service(rxcq, rxcq->tail)) {
+		if (rxcq->tail->last)
+			rxcq->done_color = !rxcq->done_color;
+		rxcq->tail = rxcq->tail->next;
+		DEBUG_STATS_CQE_CNT(rxcq);
+
+		if (++work_done >= limit)
+			break;
+	}
+
+	return work_done;
+}
+
+void ionic_rx_flush(struct ionic_cq *cq)
+{
+	struct ionic_dev *idev = &cq->lif->ionic->idev;
+	u32 work_done;
+
+	work_done = ionic_rx_walk_cq(cq, cq->num_descs);
+
+	if (work_done)
+		ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+				   work_done, IONIC_INTR_CRED_RESET_COALESCE);
+}
+
+static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, unsigned int len,
+					  dma_addr_t *dma_addr)
+{
+	struct ionic_lif *lif = q->lif;
+	struct ionic_rx_stats *stats;
+	struct net_device *netdev;
+	struct sk_buff *skb;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+	stats = q_to_rx_stats(q);
+	skb = netdev_alloc_skb_ip_align(netdev, len);
+	if (!skb) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
+		return NULL;
+	}
+
+	*dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, *dma_addr)) {
+		dev_kfree_skb(skb);
+		net_warn_ratelimited("%s: DMA single map failed on %s!\n",
+				     netdev->name, q->name);
+		stats->dma_map_err++;
+		return NULL;
+	}
+
+	return skb;
+}
+
+#define IONIC_RX_RING_DOORBELL_STRIDE		((1 << 2) - 1)
+
+void ionic_rx_fill(struct ionic_queue *q)
+{
+	struct net_device *netdev = q->lif->netdev;
+	struct ionic_rxq_desc *desc;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	bool ring_doorbell;
+	unsigned int len;
+	unsigned int i;
+
+	len = netdev->mtu + ETH_HLEN;
+
+	for (i = ionic_q_space_avail(q); i; i--) {
+		skb = ionic_rx_skb_alloc(q, len, &dma_addr);
+		if (!skb)
+			return;
+
+		desc = q->head->desc;
+		desc->addr = cpu_to_le64(dma_addr);
+		desc->len = cpu_to_le16(len);
+		desc->opcode = IONIC_RXQ_DESC_OPCODE_SIMPLE;
+
+		ring_doorbell = ((q->head->index + 1) &
+				IONIC_RX_RING_DOORBELL_STRIDE) == 0;
+
+		ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, skb);
+	}
+}
+
+static void ionic_rx_fill_cb(void *arg)
+{
+	ionic_rx_fill(arg);
+}
+
+void ionic_rx_empty(struct ionic_queue *q)
+{
+	struct device *dev = q->lif->ionic->dev;
+	struct ionic_desc_info *cur;
+	struct ionic_rxq_desc *desc;
+
+	for (cur = q->tail; cur != q->head; cur = cur->next) {
+		desc = cur->desc;
+		dma_unmap_single(dev, le64_to_cpu(desc->addr),
+				 le16_to_cpu(desc->len), DMA_FROM_DEVICE);
+		dev_kfree_skb(cur->cb_arg);
+		cur->cb_arg = NULL;
+	}
+}
+
+int ionic_rx_napi(struct napi_struct *napi, int budget)
+{
+	struct ionic_qcq *qcq = napi_to_qcq(napi);
+	struct ionic_cq *rxcq = napi_to_cq(napi);
+	unsigned int qi = rxcq->bound_q->index;
+	struct ionic_dev *idev;
+	struct ionic_lif *lif;
+	struct ionic_cq *txcq;
+	u32 work_done = 0;
+	u32 flags = 0;
+
+	lif = rxcq->bound_q->lif;
+	idev = &lif->ionic->idev;
+	txcq = &lif->txqcqs[qi].qcq->cq;
+
+	ionic_tx_flush(txcq);
+
+	work_done = ionic_rx_walk_cq(rxcq, budget);
+
+	if (work_done)
+		ionic_rx_fill_cb(rxcq->bound_q);
+
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		flags |= IONIC_INTR_CRED_UNMASK;
+		DEBUG_STATS_INTR_REARM(rxcq->bound_intr);
+	}
+
+	if (work_done || flags) {
+		flags |= IONIC_INTR_CRED_RESET_COALESCE;
+		ionic_intr_credits(idev->intr_ctrl, rxcq->bound_intr->index,
+				   work_done, flags);
+	}
+
+	DEBUG_STATS_NAPI_POLL(qcq, work_done);
+
+	return work_done;
+}
+
+static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct device *dev = q->lif->ionic->dev;
+	dma_addr_t dma_addr;
+
+	dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		net_warn_ratelimited("%s: DMA single map failed on %s!\n",
+				     q->lif->netdev->name, q->name);
+		stats->dma_map_err++;
+		return 0;
+	}
+	return dma_addr;
+}
+
+static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, const skb_frag_t *frag,
+				    size_t offset, size_t len)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct device *dev = q->lif->ionic->dev;
+	dma_addr_t dma_addr;
+
+	dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		net_warn_ratelimited("%s: DMA frag map failed on %s!\n",
+				     q->lif->netdev->name, q->name);
+		stats->dma_map_err++;
+	}
+	return dma_addr;
+}
+
+static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info, void *cb_arg)
+{
+	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
+	struct ionic_txq_sg_elem *elem = sg_desc->elems;
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_txq_desc *desc = desc_info->desc;
+	struct device *dev = q->lif->ionic->dev;
+	u8 opcode, flags, nsge;
+	u16 queue_index;
+	unsigned int i;
+	u64 addr;
+
+	decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
+			    &opcode, &flags, &nsge, &addr);
+
+	/* use unmap_single only if either this is not TSO,
+	 * or this is first descriptor of a TSO
+	 */
+	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO ||
+	    flags & IONIC_TXQ_DESC_FLAG_TSO_SOT)
+		dma_unmap_single(dev, (dma_addr_t)addr,
+				 le16_to_cpu(desc->len), DMA_TO_DEVICE);
+	else
+		dma_unmap_page(dev, (dma_addr_t)addr,
+			       le16_to_cpu(desc->len), DMA_TO_DEVICE);
+
+	for (i = 0; i < nsge; i++, elem++)
+		dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
+			       le16_to_cpu(elem->len), DMA_TO_DEVICE);
+
+	if (cb_arg) {
+		struct sk_buff *skb = cb_arg;
+		u32 len = skb->len;
+
+		queue_index = skb_get_queue_mapping(skb);
+		if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
+						      queue_index))) {
+			netif_wake_subqueue(q->lif->netdev, queue_index);
+			q->wake++;
+		}
+		dev_kfree_skb_any(skb);
+		stats->clean++;
+		netdev_tx_completed_queue(q_to_ndq(q), 1, len);
+	}
+}
+
+void ionic_tx_flush(struct ionic_cq *cq)
+{
+	struct ionic_txq_comp *comp = cq->tail->cq_desc;
+	struct ionic_dev *idev = &cq->lif->ionic->idev;
+	struct ionic_queue *q = cq->bound_q;
+	struct ionic_desc_info *desc_info;
+	unsigned int work_done = 0;
+
+	/* walk the completed cq entries */
+	while (work_done < cq->num_descs &&
+	       color_match(comp->color, cq->done_color)) {
+
+		/* clean the related q entries, there could be
+		 * several q entries completed for each cq completion
+		 */
+		do {
+			desc_info = q->tail;
+			q->tail = desc_info->next;
+			ionic_tx_clean(q, desc_info, cq->tail,
+				       desc_info->cb_arg);
+			desc_info->cb = NULL;
+			desc_info->cb_arg = NULL;
+		} while (desc_info->index != le16_to_cpu(comp->comp_index));
+
+		if (cq->tail->last)
+			cq->done_color = !cq->done_color;
+
+		cq->tail = cq->tail->next;
+		comp = cq->tail->cq_desc;
+		DEBUG_STATS_CQE_CNT(cq);
+
+		work_done++;
+	}
+
+	if (work_done)
+		ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+				   work_done, 0);
+}
+
+static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
+{
+	int err;
+
+	err = skb_cow_head(skb, 0);
+	if (err)
+		return err;
+
+	if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+		inner_ip_hdr(skb)->check = 0;
+		inner_tcp_hdr(skb)->check =
+			~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
+					   inner_ip_hdr(skb)->daddr,
+					   0, IPPROTO_TCP, 0);
+	} else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) {
+		inner_tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+					 &inner_ipv6_hdr(skb)->daddr,
+					 0, IPPROTO_TCP, 0);
+	}
+
+	return 0;
+}
+
+static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb)
+{
+	int err;
+
+	err = skb_cow_head(skb, 0);
+	if (err)
+		return err;
+
+	if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+		ip_hdr(skb)->check = 0;
+		tcp_hdr(skb)->check =
+			~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					   ip_hdr(skb)->daddr,
+					   0, IPPROTO_TCP, 0);
+	} else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) {
+		tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					 &ipv6_hdr(skb)->daddr,
+					 0, IPPROTO_TCP, 0);
+	}
+
+	return 0;
+}
+
+static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc,
+			      struct sk_buff *skb,
+			      dma_addr_t addr, u8 nsge, u16 len,
+			      unsigned int hdrlen, unsigned int mss,
+			      bool outer_csum,
+			      u16 vlan_tci, bool has_vlan,
+			      bool start, bool done)
+{
+	u8 flags = 0;
+	u64 cmd;
+
+	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+	flags |= outer_csum ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+	flags |= start ? IONIC_TXQ_DESC_FLAG_TSO_SOT : 0;
+	flags |= done ? IONIC_TXQ_DESC_FLAG_TSO_EOT : 0;
+
+	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_TSO, flags, nsge, addr);
+	desc->cmd = cpu_to_le64(cmd);
+	desc->len = cpu_to_le16(len);
+	desc->vlan_tci = cpu_to_le16(vlan_tci);
+	desc->hdr_len = cpu_to_le16(hdrlen);
+	desc->mss = cpu_to_le16(mss);
+
+	if (done) {
+		skb_tx_timestamp(skb);
+		netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+		ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+	} else {
+		ionic_txq_post(q, false, ionic_tx_clean, NULL);
+	}
+}
+
+static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
+						struct ionic_txq_sg_elem **elem)
+{
+	struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+	struct ionic_txq_desc *desc = q->head->desc;
+
+	*elem = sg_desc->elems;
+	return desc;
+}
+
+static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_desc_info *abort = q->head;
+	struct device *dev = q->lif->ionic->dev;
+	struct ionic_desc_info *rewind = abort;
+	struct ionic_txq_sg_elem *elem;
+	struct ionic_txq_desc *desc;
+	unsigned int frag_left = 0;
+	unsigned int offset = 0;
+	unsigned int len_left;
+	dma_addr_t desc_addr;
+	unsigned int hdrlen;
+	unsigned int nfrags;
+	unsigned int seglen;
+	u64 total_bytes = 0;
+	u64 total_pkts = 0;
+	unsigned int left;
+	unsigned int len;
+	unsigned int mss;
+	skb_frag_t *frag;
+	bool start, done;
+	bool outer_csum;
+	bool has_vlan;
+	u16 desc_len;
+	u8 desc_nsge;
+	u16 vlan_tci;
+	bool encap;
+	int err;
+
+	mss = skb_shinfo(skb)->gso_size;
+	nfrags = skb_shinfo(skb)->nr_frags;
+	len_left = skb->len - skb_headlen(skb);
+	outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
+		     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+	has_vlan = !!skb_vlan_tag_present(skb);
+	vlan_tci = skb_vlan_tag_get(skb);
+	encap = skb->encapsulation;
+
+	/* Preload inner-most TCP csum field with IP pseudo hdr
+	 * calculated with IP length set to zero.  HW will later
+	 * add in length to each TCP segment resulting from the TSO.
+	 */
+
+	if (encap)
+		err = ionic_tx_tcp_inner_pseudo_csum(skb);
+	else
+		err = ionic_tx_tcp_pseudo_csum(skb);
+	if (err)
+		return err;
+
+	if (encap)
+		hdrlen = skb_inner_transport_header(skb) - skb->data +
+			 inner_tcp_hdrlen(skb);
+	else
+		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	seglen = hdrlen + mss;
+	left = skb_headlen(skb);
+
+	desc = ionic_tx_tso_next(q, &elem);
+	start = true;
+
+	/* Chop skb->data up into desc segments */
+
+	while (left > 0) {
+		len = min(seglen, left);
+		frag_left = seglen - len;
+		desc_addr = ionic_tx_map_single(q, skb->data + offset, len);
+		if (dma_mapping_error(dev, desc_addr))
+			goto err_out_abort;
+		desc_len = len;
+		desc_nsge = 0;
+		left -= len;
+		offset += len;
+		if (nfrags > 0 && frag_left > 0)
+			continue;
+		done = (nfrags == 0 && left == 0);
+		ionic_tx_tso_post(q, desc, skb,
+				  desc_addr, desc_nsge, desc_len,
+				  hdrlen, mss,
+				  outer_csum,
+				  vlan_tci, has_vlan,
+				  start, done);
+		total_pkts++;
+		total_bytes += start ? len : len + hdrlen;
+		desc = ionic_tx_tso_next(q, &elem);
+		start = false;
+		seglen = mss;
+	}
+
+	/* Chop skb frags into desc segments */
+
+	for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
+		offset = 0;
+		left = skb_frag_size(frag);
+		len_left -= left;
+		nfrags--;
+		stats->frags++;
+
+		while (left > 0) {
+			if (frag_left > 0) {
+				len = min(frag_left, left);
+				frag_left -= len;
+				elem->addr =
+				    cpu_to_le64(ionic_tx_map_frag(q, frag,
+								  offset, len));
+				if (dma_mapping_error(dev, elem->addr))
+					goto err_out_abort;
+				elem->len = cpu_to_le16(len);
+				elem++;
+				desc_nsge++;
+				left -= len;
+				offset += len;
+				if (nfrags > 0 && frag_left > 0)
+					continue;
+				done = (nfrags == 0 && left == 0);
+				ionic_tx_tso_post(q, desc, skb, desc_addr,
+						  desc_nsge, desc_len,
+						  hdrlen, mss, outer_csum,
+						  vlan_tci, has_vlan,
+						  start, done);
+				total_pkts++;
+				total_bytes += start ? len : len + hdrlen;
+				desc = ionic_tx_tso_next(q, &elem);
+				start = false;
+			} else {
+				len = min(mss, left);
+				frag_left = mss - len;
+				desc_addr = ionic_tx_map_frag(q, frag,
+							      offset, len);
+				if (dma_mapping_error(dev, desc_addr))
+					goto err_out_abort;
+				desc_len = len;
+				desc_nsge = 0;
+				left -= len;
+				offset += len;
+				if (nfrags > 0 && frag_left > 0)
+					continue;
+				done = (nfrags == 0 && left == 0);
+				ionic_tx_tso_post(q, desc, skb, desc_addr,
+						  desc_nsge, desc_len,
+						  hdrlen, mss, outer_csum,
+						  vlan_tci, has_vlan,
+						  start, done);
+				total_pkts++;
+				total_bytes += start ? len : len + hdrlen;
+				desc = ionic_tx_tso_next(q, &elem);
+				start = false;
+			}
+		}
+	}
+
+	stats->pkts += total_pkts;
+	stats->bytes += total_bytes;
+	stats->tso++;
+
+	return 0;
+
+err_out_abort:
+	while (rewind->desc != q->head->desc) {
+		ionic_tx_clean(q, rewind, NULL, NULL);
+		rewind = rewind->next;
+	}
+	q->head = abort;
+
+	return -ENOMEM;
+}
+
+static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_txq_desc *desc = q->head->desc;
+	struct device *dev = q->lif->ionic->dev;
+	dma_addr_t dma_addr;
+	bool has_vlan;
+	u8 flags = 0;
+	bool encap;
+	u64 cmd;
+
+	has_vlan = !!skb_vlan_tag_present(skb);
+	encap = skb->encapsulation;
+
+	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+	if (dma_mapping_error(dev, dma_addr))
+		return -ENOMEM;
+
+	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+
+	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL,
+				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+	desc->cmd = cpu_to_le64(cmd);
+	desc->len = cpu_to_le16(skb_headlen(skb));
+	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
+	desc->csum_offset = cpu_to_le16(skb->csum_offset);
+
+	if (skb->csum_not_inet)
+		stats->crc32_csum++;
+	else
+		stats->csum++;
+
+	return 0;
+}
+
+static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_txq_desc *desc = q->head->desc;
+	struct device *dev = q->lif->ionic->dev;
+	dma_addr_t dma_addr;
+	bool has_vlan;
+	u8 flags = 0;
+	bool encap;
+	u64 cmd;
+
+	has_vlan = !!skb_vlan_tag_present(skb);
+	encap = skb->encapsulation;
+
+	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+	if (dma_mapping_error(dev, dma_addr))
+		return -ENOMEM;
+
+	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+
+	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
+				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+	desc->cmd = cpu_to_le64(cmd);
+	desc->len = cpu_to_le16(skb_headlen(skb));
+	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+
+	stats->no_csum++;
+
+	return 0;
+}
+
+static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+	unsigned int len_left = skb->len - skb_headlen(skb);
+	struct ionic_txq_sg_elem *elem = sg_desc->elems;
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct device *dev = q->lif->ionic->dev;
+	dma_addr_t dma_addr;
+	skb_frag_t *frag;
+	u16 len;
+
+	for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) {
+		len = skb_frag_size(frag);
+		elem->len = cpu_to_le16(len);
+		dma_addr = ionic_tx_map_frag(q, frag, 0, len);
+		if (dma_mapping_error(dev, dma_addr))
+			return -ENOMEM;
+		elem->addr = cpu_to_le64(dma_addr);
+		len_left -= len;
+		stats->frags++;
+	}
+
+	return 0;
+}
+
+static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	int err;
+
+	/* set up the initial descriptor */
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		err = ionic_tx_calc_csum(q, skb);
+	else
+		err = ionic_tx_calc_no_csum(q, skb);
+	if (err)
+		return err;
+
+	/* add frags */
+	err = ionic_tx_skb_frags(q, skb);
+	if (err)
+		return err;
+
+	skb_tx_timestamp(skb);
+	stats->pkts++;
+	stats->bytes += skb->len;
+
+	netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+	ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+
+	return 0;
+}
+
+static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
+{
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	int err;
+
+	/* If TSO, need roundup(skb->len/mss) descs */
+	if (skb_is_gso(skb))
+		return (skb->len / skb_shinfo(skb)->gso_size) + 1;
+
+	/* If non-TSO, just need 1 desc and nr_frags sg elems */
+	if (skb_shinfo(skb)->nr_frags <= IONIC_TX_MAX_SG_ELEMS)
+		return 1;
+
+	/* Too many frags, so linearize */
+	err = skb_linearize(skb);
+	if (err)
+		return err;
+
+	stats->linearize++;
+
+	/* Need 1 desc and zero sg elems */
+	return 1;
+}
+
+static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
+{
+	int stopped = 0;
+
+	if (unlikely(!ionic_q_has_space(q, ndescs))) {
+		netif_stop_subqueue(q->lif->netdev, q->index);
+		q->stop++;
+		stopped = 1;
+
+		/* Might race with ionic_tx_clean, check again */
+		smp_rmb();
+		if (ionic_q_has_space(q, ndescs)) {
+			netif_wake_subqueue(q->lif->netdev, q->index);
+			stopped = 0;
+		}
+	}
+
+	return stopped;
+}
+
+netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	u16 queue_index = skb_get_queue_mapping(skb);
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_queue *q;
+	int ndescs;
+	int err;
+
+	if (unlikely(!test_bit(IONIC_LIF_UP, lif->state))) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (unlikely(!lif_to_txqcq(lif, queue_index)))
+		queue_index = 0;
+	q = lif_to_txq(lif, queue_index);
+
+	ndescs = ionic_tx_descs_needed(q, skb);
+	if (ndescs < 0)
+		goto err_out_drop;
+
+	if (unlikely(ionic_maybe_stop_tx(q, ndescs)))
+		return NETDEV_TX_BUSY;
+
+	if (skb_is_gso(skb))
+		err = ionic_tx_tso(q, skb);
+	else
+		err = ionic_tx(q, skb);
+
+	if (err)
+		goto err_out_drop;
+
+	/* Stop the queue if there aren't descriptors for the next packet.
+	 * Since our SG lists per descriptor take care of most of the possible
+	 * fragmentation, we don't need to have many descriptors available.
+	 */
+	ionic_maybe_stop_tx(q, 4);
+
+	return NETDEV_TX_OK;
+
+err_out_drop:
+	q->stop++;
+	q->drop++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
new file mode 100644
index 0000000..53775c6
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h

@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_TXRX_H_
+#define _IONIC_TXRX_H_
+
+void ionic_rx_flush(struct ionic_cq *cq);
+void ionic_tx_flush(struct ionic_cq *cq);
+
+void ionic_rx_fill(struct ionic_queue *q);
+void ionic_rx_empty(struct ionic_queue *q);
+int ionic_rx_napi(struct napi_struct *napi, int budget);
+netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+
+#endif /* _IONIC_TXRX_H_ */

diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 0ee2490..55a29ec 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # QLogic network device configuration
 #
@@ -65,15 +66,6 @@
 
 	  This data is available via the hwmon sysfs interface.
 
-config QLGE
-	tristate "QLogic QLGE 10Gb Ethernet Driver Support"
-	depends on PCI
-	---help---
-	  This driver supports QLogic ISP8XXX 10Gb Ethernet cards.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called qlge.
-
 config NETXEN_NIC
 	tristate "NetXen Multi port (1/10) Gigabit Ethernet NIC"
 	depends on PCI
@@ -86,6 +78,7 @@
 	depends on PCI
 	select ZLIB_INFLATE
 	select CRC8
+	select NET_DEVLINK
 	---help---
 	  This enables the support for ...
 

diff --git a/drivers/net/ethernet/qlogic/Makefile b/drivers/net/ethernet/qlogic/Makefile
index 6cd2e33..1ae4a07 100644
--- a/drivers/net/ethernet/qlogic/Makefile
+++ b/drivers/net/ethernet/qlogic/Makefile

@@ -5,7 +5,6 @@
 
 obj-$(CONFIG_QLA3XXX) += qla3xxx.o
 obj-$(CONFIG_QLCNIC) += qlcnic/
-obj-$(CONFIG_QLGE) += qlge/
 obj-$(CONFIG_NETXEN_NIC) += netxen/
 obj-$(CONFIG_QED) += qed/
 obj-$(CONFIG_QEDE)+= qede/

diff --git a/drivers/net/ethernet/qlogic/netxen/Makefile b/drivers/net/ethernet/qlogic/netxen/Makefile
index e14e60c..d6e80b3 100644
--- a/drivers/net/ethernet/qlogic/netxen/Makefile
+++ b/drivers/net/ethernet/qlogic/netxen/Makefile

@@ -1,23 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
 # Copyright (C) 2003 - 2009 NetXen, Inc.
 # Copyright (C) 2009 - QLogic Corporation.
 # All rights reserved.
-# 
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#                            
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#                                   
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-# 
-# The full GNU General Public License is included in this distribution
-# in the file called "COPYING".
-# 
 #
 
 

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 0a5e204..3dce769 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h

@@ -1,24 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #ifndef _NETXEN_NIC_H_

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index 7503aa2..5e9f8ee 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c

@@ -1,24 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #include "netxen_nic_hw.h"
@@ -458,10 +442,8 @@
 		goto out_free_rq;
 	}
 
-	memset(rq_addr, 0, rq_size);
 	prq = rq_addr;
 
-	memset(rsp_addr, 0, rsp_size);
 	prsp = rsp_addr;
 
 	prq->host_rsp_dma_addr = cpu_to_le64(rsp_phys_addr);
@@ -771,7 +753,6 @@
 		return -ENOMEM;
 	}
 
-	memset(addr, 0, sizeof(struct netxen_ring_ctx));
 	recv_ctx->hwctx = addr;
 	recv_ctx->hwctx->ctx_id = cpu_to_le32(port);
 	recv_ctx->hwctx->cmd_consumer_offset =

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
index 3c1be87..6a2d91d 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c

@@ -1,24 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #include <linux/types.h>

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
index a310c2f..09b33e1 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h

@@ -1,24 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #ifndef __NETXEN_NIC_HDR_H_

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 52ad806..6e12cd2 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c

@@ -1,24 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #include <linux/io-64-nonatomic-lo-hi.h>

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.h
index 7433c4d..de73766 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.h

@@ -1,24 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #ifndef __NETXEN_NIC_HW_H_

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
index 0ea141e..94546ed 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c

@@ -1,24 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #include <linux/netdevice.h>
@@ -1125,7 +1109,8 @@
 		return -EINVAL;
 	}
 	val = nx_get_bios_version(adapter);
-	netxen_rom_fast_read(adapter, NX_BIOS_VERSION_OFFSET, (int *)&bios);
+	if (netxen_rom_fast_read(adapter, NX_BIOS_VERSION_OFFSET, (int *)&bios))
+		return -EIO;
 	if ((__force u32)val != bios) {
 		dev_err(&pdev->dev, "%s: firmware bios is incompatible\n",
 				fw_name[fw_type]);

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 59c70be..c692a41 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c

@@ -1,24 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2003 - 2009 NetXen, Inc.
  * Copyright (C) 2009 - QLogic Corporation.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution
- * in the file called "COPYING".
- *
  */
 
 #include <linux/slab.h>
@@ -1784,11 +1768,6 @@
 	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
 
-static void netxen_io_resume(struct pci_dev *pdev)
-{
-	pci_cleanup_aer_uncorrect_error_status(pdev);
-}
-
 static void netxen_nic_shutdown(struct pci_dev *pdev)
 {
 	struct netxen_adapter *adapter = pci_get_drvdata(pdev);
@@ -2001,7 +1980,7 @@
 		struct sk_buff *skb, struct netxen_cmd_buffer *pbuf)
 {
 	struct netxen_skb_frag *nf;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	int i, nr_frags;
 	dma_addr_t map;
 
@@ -2064,7 +2043,7 @@
 	struct pci_dev *pdev;
 	int i, k;
 	int delta = 0;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 
 	u32 producer;
 	int frag_count;
@@ -3269,6 +3248,7 @@
 		struct net_device *dev, unsigned long event)
 {
 	struct in_device *indev;
+	struct in_ifaddr *ifa;
 
 	if (!netxen_destip_supported(adapter))
 		return;
@@ -3277,7 +3257,8 @@
 	if (!indev)
 		return;
 
-	for_ifa(indev) {
+	rcu_read_lock();
+	in_dev_for_each_ifa_rcu(ifa, indev) {
 		switch (event) {
 		case NETDEV_UP:
 			netxen_list_config_ip(adapter, ifa, NX_IP_UP);
@@ -3288,8 +3269,8 @@
 		default:
 			break;
 		}
-	} endfor_ifa(indev);
-
+	}
+	rcu_read_unlock();
 	in_dev_put(indev);
 }
 
@@ -3465,7 +3446,6 @@
 static const struct pci_error_handlers netxen_err_handler = {
 	.error_detected = netxen_io_error_detected,
 	.slot_reset = netxen_io_slot_reset,
-	.resume = netxen_io_resume,
 };
 
 static struct pci_driver netxen_driver = {

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index a60e1c8..89fe091 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h

@@ -53,7 +53,7 @@
 extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_MAJOR_VERSION		8
-#define QED_MINOR_VERSION		33
+#define QED_MINOR_VERSION		37
 #define QED_REVISION_VERSION		0
 #define QED_ENGINEERING_VERSION		20
 
@@ -140,6 +140,7 @@
 struct qed_sb_sp_info;
 struct qed_ll2_info;
 struct qed_mcp_info;
+struct qed_llh_info;
 
 struct qed_rt_data {
 	u32	*init_val;
@@ -296,6 +297,12 @@
 	QED_WOL_SUPPORT_PME,
 };
 
+enum qed_db_rec_exec {
+	DB_REC_DRY_RUN,
+	DB_REC_REAL_DEAL,
+	DB_REC_ONCE,
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality personality;
@@ -425,6 +432,18 @@
 	u8 num_pf_rls;
 };
 
+#define QED_OVERFLOW_BIT	1
+
+struct qed_db_recovery_info {
+	struct list_head list;
+
+	/* Lock to protect the doorbell recovery mechanism list */
+	spinlock_t lock;
+	bool dorq_attn;
+	u32 db_recovery_counter;
+	unsigned long overflow;
+};
+
 struct storm_stats {
 	u32     address;
 	u32     len;
@@ -478,6 +497,9 @@
 
 	/* Allow DSCP to TC mapping */
 	QED_MF_DSCP_TO_TC_MAP,
+
+	/* Do not insert a vlan tag with id 0 */
+	QED_MF_DONT_ADD_VLAN0_TAG,
 };
 
 enum qed_ufp_mode {
@@ -522,6 +544,7 @@
 
 enum qed_slowpath_wq_flag {
 	QED_SLOWPATH_MFW_TLV_REQ,
+	QED_SLOWPATH_PERIODIC_DB_REC,
 };
 
 struct qed_hwfn {
@@ -539,7 +562,6 @@
 	u8				dp_level;
 	char				name[NAME_SIZE];
 
-	bool				first_on_engine;
 	bool				hw_init_done;
 
 	u8				num_funcs_on_engine;
@@ -623,6 +645,7 @@
 	void				*unzip_buf;
 
 	struct dbg_tools_data		dbg_info;
+	void				*dbg_user_info;
 
 	/* PWM region specific data */
 	u16				wid_count;
@@ -639,6 +662,9 @@
 	/* L2-related */
 	struct qed_l2_info *p_l2_info;
 
+	/* Mechanism for recovering from doorbell drop */
+	struct qed_db_recovery_info db_recovery_info;
+
 	/* Nvm images number and attributes */
 	struct qed_nvm_image_info nvm_info;
 
@@ -651,11 +677,12 @@
 	struct delayed_work iov_task;
 	unsigned long iov_task_flags;
 #endif
-
-	struct z_stream_s		*stream;
+	struct z_stream_s *stream;
+	bool slowpath_wq_active;
 	struct workqueue_struct *slowpath_wq;
 	struct delayed_work slowpath_task;
 	unsigned long slowpath_task_flags;
+	u32 periodic_db_rec_count;
 };
 
 struct pci_params {
@@ -715,6 +742,7 @@
 #define QED_DEV_ID_MASK		0xff00
 #define QED_DEV_ID_MASK_BB	0x1600
 #define QED_DEV_ID_MASK_AH	0x8000
+#define QED_IS_E4(dev)  (QED_IS_BB(dev) || QED_IS_AH(dev))
 
 	u16	chip_num;
 #define CHIP_NUM_MASK                   0xffff
@@ -734,6 +762,7 @@
 #define CHIP_BOND_ID_SHIFT              0
 
 	u8				num_engines;
+	u8				num_ports;
 	u8				num_ports_in_engine;
 	u8				num_funcs_in_port;
 
@@ -774,6 +803,11 @@
 	u8				num_hwfns;
 	struct qed_hwfn			hwfns[MAX_HWFNS_PER_DEVICE];
 
+	/* Engine affinity */
+	u8				l2_affin_hint;
+	u8				fir_affin;
+	u8				iwarp_affin;
+
 	/* SRIOV */
 	struct qed_hw_sriov_info *p_iov_info;
 #define IS_QED_SRIOV(cdev)              (!!(cdev)->p_iov_info)
@@ -785,6 +819,13 @@
 
 	u32				mcp_nvm_resp;
 
+	/* Recovery */
+	bool recov_in_prog;
+
+	/* LLH info */
+	u8 ppfid_bitmap;
+	struct qed_llh_info *p_llh_info;
+
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -822,6 +863,9 @@
 	u32 rdma_max_inline;
 	u32 rdma_max_srq_sge;
 	u16 tunn_feature_mask;
+
+	struct devlink			*dl;
+	bool				iwarp_cmt;
 };
 
 #define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
@@ -870,11 +914,18 @@
 
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 int qed_device_num_engines(struct qed_dev *cdev);
-int qed_device_get_port_id(struct qed_dev *cdev);
 void qed_set_fw_mac_addr(__le16 *fw_msb,
 			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+#define QED_IS_CMT(dev)		((dev)->num_hwfns > 1)
+/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */
+#define QED_FIR_AFFIN_HWFN(dev)		(&(dev)->hwfns[dev->fir_affin])
+#define QED_IWARP_AFFIN_HWFN(dev)       (&(dev)->hwfns[dev->iwarp_affin])
+#define QED_AFFIN_HWFN(dev)				   \
+	(QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \
+	 QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev))
+#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1)
 
 /* Flags for indication of required queues */
 #define PQ_FLAGS_RLS    (BIT(0))
@@ -894,7 +945,10 @@
 u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 
-#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+/* doorbell recovery mechanism */
+void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
+bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
 
 /* Other Linux specific common definitions */
 #define DP_NAME(cdev) ((cdev)->name)
@@ -911,13 +965,18 @@
 	writel((u32)val, (void __iomem *)((u8 __iomem *)\
 					  (cdev->doorbells) + (db_addr)))
 
+#define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %			  \
+				  qed_device_num_ports((_p_hwfn)->cdev))
+int qed_device_num_ports(struct qed_dev *cdev);
+
 /* Prototypes */
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info);
-void qed_link_update(struct qed_hwfn *hwfn);
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
@@ -930,4 +989,6 @@
 			  union qed_mfw_tlv_data *tlv_data);
 
 void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc);
+
+void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn);
 #endif /* _QED_H */

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index f1977aa..8e1bdf5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c

@@ -40,7 +40,6 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/bitops.h>
 #include "qed.h"
 #include "qed_cxt.h"
 #include "qed_dev_api.h"
@@ -937,9 +936,9 @@
 		u32 size = min_t(u32, total_size, psz);
 		void **p_virt = &p_mngr->t2[i].p_virt;
 
-		*p_virt = dma_zalloc_coherent(&p_hwfn->cdev->pdev->dev,
-					      size, &p_mngr->t2[i].p_phys,
-					      GFP_KERNEL);
+		*p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, size,
+					     &p_mngr->t2[i].p_phys,
+					     GFP_KERNEL);
 		if (!p_mngr->t2[i].p_virt) {
 			rc = -ENOMEM;
 			goto t2_fail;
@@ -1055,8 +1054,8 @@
 		u32 size;
 
 		size = min_t(u32, sz_left, p_blk->real_size_in_page);
-		p_virt = dma_zalloc_coherent(&p_hwfn->cdev->pdev->dev, size,
-					     &p_phys, GFP_KERNEL);
+		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, size,
+					    &p_phys, GFP_KERNEL);
 		if (!p_virt)
 			return -ENOMEM;
 
@@ -2130,17 +2129,18 @@
 					       rdma_tasks);
 		/* no need for break since RoCE coexist with Ethernet */
 	}
+	/* fall through */
 	case QED_PCI_ETH:
 	{
 		struct qed_eth_pf_params *p_params =
 		    &p_hwfn->pf_params.eth_pf_params;
 
-			if (!p_params->num_vf_cons)
-				p_params->num_vf_cons =
-				    ETH_PF_PARAMS_VF_CONS_DEFAULT;
-			qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-						    p_params->num_cons,
-						    p_params->num_vf_cons);
+		if (!p_params->num_vf_cons)
+			p_params->num_vf_cons =
+			    ETH_PF_PARAMS_VF_CONS_DEFAULT;
+		qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+					    p_params->num_cons,
+					    p_params->num_vf_cons);
 		p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters;
 		break;
 	}
@@ -2307,9 +2307,9 @@
 		goto out0;
 	}
 
-	p_virt = dma_zalloc_coherent(&p_hwfn->cdev->pdev->dev,
-				     p_blk->real_size_in_page, &p_phys,
-				     GFP_KERNEL);
+	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				    p_blk->real_size_in_page, &p_phys,
+				    GFP_KERNEL);
 	if (!p_virt) {
 		rc = -ENOMEM;
 		goto out1;
@@ -2351,7 +2351,8 @@
 
 	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
 	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
-			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
+			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
+			  NULL);
 
 	if (elem_type == QED_ELEM_CXT) {
 		u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2457,7 +2458,7 @@
 				  (u64) (uintptr_t) &ilt_hw_entry,
 				  reg_offset,
 				  sizeof(ilt_hw_entry) / sizeof(u32),
-				  0);
+				  NULL);
 	}
 
 	qed_ptt_release(p_hwfn, p_ptt);

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 5900a50..5c6a276 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c

@@ -204,9 +204,7 @@
 	else
 		p_data->arr[type].update = DONT_UPDATE_DCB_DSCP;
 
-	/* Do not add vlan tag 0 when DCB is enabled and port in UFP/OV mode */
-	if ((test_bit(QED_MF_8021Q_TAGGING, &p_hwfn->cdev->mf_bits) ||
-	     test_bit(QED_MF_8021AD_TAGGING, &p_hwfn->cdev->mf_bits)))
+	if (test_bit(QED_MF_DONT_ADD_VLAN0_TAG, &p_hwfn->cdev->mf_bits))
 		p_data->arr[type].dont_add_vlan0 = true;
 
 	/* QM reconf data */
@@ -262,8 +260,9 @@
 		*type = DCBX_PROTOCOL_ROCE_V2;
 	} else {
 		*type = DCBX_MAX_PROTOCOL_TYPE;
-		DP_ERR(p_hwfn, "No action required, App TLV entry = 0x%x\n",
-		       app_prio_bitmap);
+		DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+			   "No action required, App TLV entry = 0x%x\n",
+			   app_prio_bitmap);
 		return false;
 	}
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 1aa9fc1..859caa6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* QLogic qed NIC Driver
  * Copyright (c) 2015 QLogic Corporation
- *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
  */
 
 #include <linux/module.h>
@@ -1759,6 +1756,15 @@
 	return dword;
 }
 
+/* Sets the value of the specified GRC param */
+static void qed_grc_set_param(struct qed_hwfn *p_hwfn,
+			      enum dbg_grc_params grc_param, u32 val)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	dev_data->grc.param_val[grc_param] = val;
+}
+
 /* Returns the value of the specified GRC param */
 static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
 			     enum dbg_grc_params grc_param)
@@ -2537,7 +2543,7 @@
 	    (len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
 	     wide_bus)) {
 		if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
-				       (u64)(uintptr_t)(dump_buf), len, 0))
+				       (u64)(uintptr_t)(dump_buf), len, NULL))
 			return len;
 		dev_data->use_dmae = 0;
 		DP_VERBOSE(p_hwfn,
@@ -3454,7 +3460,8 @@
 			addr = BYTES_TO_DWORDS(storm->sem_fast_mem_addr +
 					       SEM_FAST_REG_STORM_REG_FILE) +
 			       IOR_SET_OFFSET(set_id);
-			buf[strlen(buf) - 1] = '0' + set_id;
+			if (strlen(buf) > 0)
+				buf[strlen(buf) - 1] = '0' + set_id;
 			offset += qed_grc_dump_mem(p_hwfn,
 						   p_ptt,
 						   dump_buf + offset,
@@ -5121,6 +5128,69 @@
 	return false;
 }
 
+enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum dbg_grc_params grc_param, u32 val)
+{
+	enum dbg_status status;
+	int i;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+		   "dbg_grc_config: paramId = %d, val = %d\n", grc_param, val);
+
+	status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Initializes the GRC parameters (if not initialized). Needed in order
+	 * to set the default parameter values for the first time.
+	 */
+	qed_dbg_grc_init_params(p_hwfn);
+
+	if (grc_param >= MAX_DBG_GRC_PARAMS)
+		return DBG_STATUS_INVALID_ARGS;
+	if (val < s_grc_param_defs[grc_param].min ||
+	    val > s_grc_param_defs[grc_param].max)
+		return DBG_STATUS_INVALID_ARGS;
+
+	if (s_grc_param_defs[grc_param].is_preset) {
+		/* Preset param */
+
+		/* Disabling a preset is not allowed. Call
+		 * dbg_grc_set_params_default instead.
+		 */
+		if (!val)
+			return DBG_STATUS_INVALID_ARGS;
+
+		/* Update all params with the preset values */
+		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) {
+			u32 preset_val;
+
+			/* Skip persistent params */
+			if (s_grc_param_defs[i].is_persistent)
+				continue;
+
+			/* Find preset value */
+			if (grc_param == DBG_GRC_PARAM_EXCLUDE_ALL)
+				preset_val =
+				    s_grc_param_defs[i].exclude_all_preset_val;
+			else if (grc_param == DBG_GRC_PARAM_CRASH)
+				preset_val =
+				    s_grc_param_defs[i].crash_preset_val;
+			else
+				return DBG_STATUS_INVALID_ARGS;
+
+			qed_grc_set_param(p_hwfn,
+					  (enum dbg_grc_params)i, preset_val);
+		}
+	} else {
+		/* Regular param - set its value */
+		qed_grc_set_param(p_hwfn, grc_param, val);
+	}
+
+	return DBG_STATUS_OK;
+}
+
 /* Assign default GRC param values */
 void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
 {
@@ -5563,35 +5633,6 @@
 	enum block_id id;
 };
 
-struct mcp_trace_format {
-	u32 data;
-#define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
-#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
-#define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
-#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
-#define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
-#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
-#define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
-#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
-#define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
-#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
-#define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
-#define MCP_TRACE_FORMAT_LEN_SHIFT	24
-
-	char *format_str;
-};
-
-/* Meta data structure, generated by a perl script during MFW build. therefore,
- * the structs mcp_trace_meta and mcp_trace_format are duplicated in the perl
- * script.
- */
-struct mcp_trace_meta {
-	u32 modules_num;
-	char **modules;
-	u32 formats_num;
-	struct mcp_trace_format *formats;
-};
-
 /* REG fifo element */
 struct reg_fifo_element {
 	u64 data;
@@ -5714,6 +5755,20 @@
 	enum igu_fifo_addr_types type;
 };
 
+struct mcp_trace_meta {
+	u32 modules_num;
+	char **modules;
+	u32 formats_num;
+	struct mcp_trace_format *formats;
+	bool is_allocated;
+};
+
+/* Debug Tools user data */
+struct dbg_tools_user_data {
+	struct mcp_trace_meta mcp_trace_meta;
+	const u32 *mcp_trace_user_meta_buf;
+};
+
 /******************************** Constants **********************************/
 
 #define MAX_MSG_LEN				1024
@@ -6085,7 +6140,7 @@
 	"no error",
 	"length error",
 	"function disabled",
-	"VF sent command to attnetion address",
+	"VF sent command to attention address",
 	"host sent prod update command",
 	"read of during interrupt register while in MIMD mode",
 	"access to PXP BAR reserved address",
@@ -6137,15 +6192,6 @@
 
 /******************************** Variables **********************************/
 
-/* MCP Trace meta data array - used in case the dump doesn't contain the
- * meta data (e.g. due to no NVRAM access).
- */
-static struct user_dbg_array s_mcp_trace_meta_arr = { NULL, 0 };
-
-/* Parsed MCP Trace meta data info, based on MCP trace meta array */
-static struct mcp_trace_meta s_mcp_trace_meta;
-static bool s_mcp_trace_meta_valid;
-
 /* Temporary buffer, used for print size calculations */
 static char s_temp_buf[MAX_MSG_LEN];
 
@@ -6311,6 +6357,12 @@
 	return dump_offset;
 }
 
+static struct dbg_tools_user_data *
+qed_dbg_get_user_data(struct qed_hwfn *p_hwfn)
+{
+	return (struct dbg_tools_user_data *)p_hwfn->dbg_user_info;
+}
+
 /* Parses the idle check rules and returns the number of characters printed.
  * In case of parsing error, returns 0.
  */
@@ -6570,43 +6622,26 @@
 	return DBG_STATUS_OK;
 }
 
-/* Frees the specified MCP Trace meta data */
-static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
-				    struct mcp_trace_meta *meta)
-{
-	u32 i;
-
-	s_mcp_trace_meta_valid = false;
-
-	/* Release modules */
-	if (meta->modules) {
-		for (i = 0; i < meta->modules_num; i++)
-			kfree(meta->modules[i]);
-		kfree(meta->modules);
-	}
-
-	/* Release formats */
-	if (meta->formats) {
-		for (i = 0; i < meta->formats_num; i++)
-			kfree(meta->formats[i].format_str);
-		kfree(meta->formats);
-	}
-}
-
 /* Allocates and fills MCP Trace meta data based on the specified meta data
  * dump buffer.
  * Returns debug status code.
  */
-static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
-						const u32 *meta_buf,
-						struct mcp_trace_meta *meta)
+static enum dbg_status
+qed_mcp_trace_alloc_meta_data(struct qed_hwfn *p_hwfn,
+			      const u32 *meta_buf)
 {
-	u8 *meta_buf_bytes = (u8 *)meta_buf;
+	struct dbg_tools_user_data *dev_user_data;
 	u32 offset = 0, signature, i;
+	struct mcp_trace_meta *meta;
+	u8 *meta_buf_bytes;
+
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	meta_buf_bytes = (u8 *)meta_buf;
 
 	/* Free the previous meta before loading a new one. */
-	if (s_mcp_trace_meta_valid)
-		qed_mcp_trace_free_meta(p_hwfn, meta);
+	if (meta->is_allocated)
+		qed_mcp_trace_free_meta_data(p_hwfn);
 
 	memset(meta, 0, sizeof(*meta));
 
@@ -6674,7 +6709,7 @@
 				      format_len, format_ptr->format_str);
 	}
 
-	s_mcp_trace_meta_valid = true;
+	meta->is_allocated = true;
 	return DBG_STATUS_OK;
 }
 
@@ -6687,21 +6722,26 @@
  *               buffer.
  * data_size - size in bytes of data to parse.
  * parsed_buf - destination buffer for parsed data.
- * parsed_bytes - size of parsed data in bytes.
+ * parsed_results_bytes - size of parsed data in bytes.
  */
-static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
+static enum dbg_status qed_parse_mcp_trace_buf(struct qed_hwfn *p_hwfn,
+					       u8 *trace_buf,
 					       u32 trace_buf_size,
 					       u32 data_offset,
 					       u32 data_size,
 					       char *parsed_buf,
-					       u32 *parsed_bytes)
+					       u32 *parsed_results_bytes)
 {
+	struct dbg_tools_user_data *dev_user_data;
+	struct mcp_trace_meta *meta;
 	u32 param_mask, param_shift;
 	enum dbg_status status;
 
-	*parsed_bytes = 0;
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	*parsed_results_bytes = 0;
 
-	if (!s_mcp_trace_meta_valid)
+	if (!meta->is_allocated)
 		return DBG_STATUS_MCP_TRACE_BAD_DATA;
 
 	status = DBG_STATUS_OK;
@@ -6723,7 +6763,7 @@
 		format_idx = header & MFW_TRACE_EVENTID_MASK;
 
 		/* Skip message if its index doesn't exist in the meta data */
-		if (format_idx >= s_mcp_trace_meta.formats_num) {
+		if (format_idx >= meta->formats_num) {
 			u8 format_size =
 				(u8)((header & MFW_TRACE_PRM_SIZE_MASK) >>
 				     MFW_TRACE_PRM_SIZE_SHIFT);
@@ -6738,7 +6778,7 @@
 			continue;
 		}
 
-		format_ptr = &s_mcp_trace_meta.formats[format_idx];
+		format_ptr = &meta->formats[format_idx];
 
 		for (i = 0,
 		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK,
@@ -6783,19 +6823,20 @@
 			return DBG_STATUS_MCP_TRACE_BAD_DATA;
 
 		/* Print current message to results buffer */
-		*parsed_bytes +=
-			sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+		*parsed_results_bytes +=
+			sprintf(qed_get_buf_ptr(parsed_buf,
+						*parsed_results_bytes),
 				"%s %-8s: ",
 				s_mcp_trace_level_str[format_level],
-				s_mcp_trace_meta.modules[format_module]);
-		*parsed_bytes +=
-		    sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+				meta->modules[format_module]);
+		*parsed_results_bytes +=
+		    sprintf(qed_get_buf_ptr(parsed_buf, *parsed_results_bytes),
 			    format_ptr->format_str,
 			    params[0], params[1], params[2]);
 	}
 
 	/* Add string NULL terminator */
-	(*parsed_bytes)++;
+	(*parsed_results_bytes)++;
 
 	return status;
 }
@@ -6803,24 +6844,25 @@
 /* Parses an MCP Trace dump buffer.
  * If result_buf is not NULL, the MCP Trace results are printed to it.
  * In any case, the required results buffer size is assigned to
- * parsed_bytes.
+ * parsed_results_bytes.
  * The parsing status is returned.
  */
 static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 						u32 *dump_buf,
-						char *parsed_buf,
-						u32 *parsed_bytes)
+						char *results_buf,
+						u32 *parsed_results_bytes,
+						bool free_meta_data)
 {
 	const char *section_name, *param_name, *param_str_val;
 	u32 data_size, trace_data_dwords, trace_meta_dwords;
-	u32 offset, results_offset, parsed_buf_bytes;
+	u32 offset, results_offset, results_buf_bytes;
 	u32 param_num_val, num_section_params;
 	struct mcp_trace *trace;
 	enum dbg_status status;
 	const u32 *meta_buf;
 	u8 *trace_buf;
 
-	*parsed_bytes = 0;
+	*parsed_results_bytes = 0;
 
 	/* Read global_params section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6831,7 +6873,7 @@
 	/* Print global params */
 	dump_buf += qed_print_section_params(dump_buf,
 					     num_section_params,
-					     parsed_buf, &results_offset);
+					     results_buf, &results_offset);
 
 	/* Read trace_data section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6846,6 +6888,9 @@
 
 	/* Prepare trace info */
 	trace = (struct mcp_trace *)dump_buf;
+	if (trace->signature != MFW_TRACE_SIGNATURE || !trace->size)
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
 	trace_buf = (u8 *)dump_buf + sizeof(*trace);
 	offset = trace->trace_oldest;
 	data_size = qed_cyclic_sub(trace->trace_prod, offset, trace->size);
@@ -6865,31 +6910,39 @@
 	/* Choose meta data buffer */
 	if (!trace_meta_dwords) {
 		/* Dump doesn't include meta data */
-		if (!s_mcp_trace_meta_arr.ptr)
+		struct dbg_tools_user_data *dev_user_data =
+			qed_dbg_get_user_data(p_hwfn);
+
+		if (!dev_user_data->mcp_trace_user_meta_buf)
 			return DBG_STATUS_MCP_TRACE_NO_META;
-		meta_buf = s_mcp_trace_meta_arr.ptr;
+
+		meta_buf = dev_user_data->mcp_trace_user_meta_buf;
 	} else {
 		/* Dump includes meta data */
 		meta_buf = dump_buf;
 	}
 
 	/* Allocate meta data memory */
-	status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &s_mcp_trace_meta);
+	status = qed_mcp_trace_alloc_meta_data(p_hwfn, meta_buf);
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	status = qed_parse_mcp_trace_buf(trace_buf,
+	status = qed_parse_mcp_trace_buf(p_hwfn,
+					 trace_buf,
 					 trace->size,
 					 offset,
 					 data_size,
-					 parsed_buf ?
-					 parsed_buf + results_offset :
+					 results_buf ?
+					 results_buf + results_offset :
 					 NULL,
-					 &parsed_buf_bytes);
+					 &results_buf_bytes);
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	*parsed_bytes = results_offset + parsed_buf_bytes;
+	if (free_meta_data)
+		qed_mcp_trace_free_meta_data(p_hwfn);
+
+	*parsed_results_bytes = results_offset + results_buf_bytes;
 
 	return DBG_STATUS_OK;
 }
@@ -7361,6 +7414,16 @@
 	return DBG_STATUS_OK;
 }
 
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn)
+{
+	p_hwfn->dbg_user_info = kzalloc(sizeof(struct dbg_tools_user_data),
+					GFP_KERNEL);
+	if (!p_hwfn->dbg_user_info)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	return DBG_STATUS_OK;
+}
+
 const char *qed_dbg_get_status_str(enum dbg_status status)
 {
 	return (status <
@@ -7397,10 +7460,13 @@
 				       num_errors, num_warnings);
 }
 
-void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size)
+void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
+				     const u32 *meta_buf)
 {
-	s_mcp_trace_meta_arr.ptr = data;
-	s_mcp_trace_meta_arr.size_in_dwords = size;
+	struct dbg_tools_user_data *dev_user_data =
+		qed_dbg_get_user_data(p_hwfn);
+
+	dev_user_data->mcp_trace_user_meta_buf = meta_buf;
 }
 
 enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
@@ -7409,7 +7475,7 @@
 						   u32 *results_buf_size)
 {
 	return qed_parse_mcp_trace_dump(p_hwfn,
-					dump_buf, NULL, results_buf_size);
+					dump_buf, NULL, results_buf_size, true);
 }
 
 enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
@@ -7421,20 +7487,61 @@
 
 	return qed_parse_mcp_trace_dump(p_hwfn,
 					dump_buf,
-					results_buf, &parsed_buf_size);
+					results_buf, &parsed_buf_size, true);
 }
 
-enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
+						 u32 *dump_buf,
+						 char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_mcp_trace_dump(p_hwfn, dump_buf, results_buf,
+					&parsed_buf_size, false);
+}
+
+enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+					 u8 *dump_buf,
 					 u32 num_dumped_bytes,
 					 char *results_buf)
 {
-	u32 parsed_bytes;
+	u32 parsed_results_bytes;
 
-	return qed_parse_mcp_trace_buf(dump_buf,
+	return qed_parse_mcp_trace_buf(p_hwfn,
+				       dump_buf,
 				       num_dumped_bytes,
 				       0,
 				       num_dumped_bytes,
-				       results_buf, &parsed_bytes);
+				       results_buf, &parsed_results_bytes);
+}
+
+/* Frees the specified MCP Trace meta data */
+void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn)
+{
+	struct dbg_tools_user_data *dev_user_data;
+	struct mcp_trace_meta *meta;
+	u32 i;
+
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	if (!meta->is_allocated)
+		return;
+
+	/* Release modules */
+	if (meta->modules) {
+		for (i = 0; i < meta->modules_num; i++)
+			kfree(meta->modules[i]);
+		kfree(meta->modules);
+	}
+
+	/* Release formats */
+	if (meta->formats) {
+		for (i = 0; i < meta->formats_num; i++)
+			kfree(meta->formats[i].format_str);
+		kfree(meta->formats);
+	}
+
+	meta->is_allocated = false;
 }
 
 enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
@@ -7962,9 +8069,16 @@
 int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 {
 	u8 cur_engine, omit_engine = 0, org_engine;
+	struct qed_hwfn *p_hwfn =
+		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	int grc_params[MAX_DBG_GRC_PARAMS], i;
 	u32 offset = 0, feature_size;
 	int rc;
 
+	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+		grc_params[i] = dev_data->grc.param_val[i];
+
 	if (cdev->num_hwfns == 1)
 		omit_engine = 1;
 
@@ -8052,6 +8166,9 @@
 			       rc);
 		}
 
+		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+			dev_data->grc.param_val[i] = grc_params[i];
+
 		/* GRC dump - must be last because when mcp stuck it will
 		 * clutter idle_chk, reg_fifo, ...
 		 */

diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h
index ea1cc8e..e47e0e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h

@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* QLogic qed NIC Driver
  * Copyright (c) 2015 QLogic Corporation
- *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
  */
 
 #ifndef _QED_DEBUGFS_H

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 2f69ee9..a1ebc2b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c

@@ -66,6 +66,1222 @@
 
 static DEFINE_SPINLOCK(qm_lock);
 
+/******************** Doorbell Recovery *******************/
+/* The doorbell recovery mechanism consists of a list of entries which represent
+ * doorbelling entities (l2 queues, roce sq/rq/cqs, the slowpath spq, etc). Each
+ * entity needs to register with the mechanism and provide the parameters
+ * describing it's doorbell, including a location where last used doorbell data
+ * can be found. The doorbell execute function will traverse the list and
+ * doorbell all of the registered entries.
+ */
+struct qed_db_recovery_entry {
+	struct list_head list_entry;
+	void __iomem *db_addr;
+	void *db_data;
+	enum qed_db_rec_width db_width;
+	enum qed_db_rec_space db_space;
+	u8 hwfn_idx;
+};
+
+/* Display a single doorbell recovery entry */
+static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn,
+				     struct qed_db_recovery_entry *db_entry,
+				     char *action)
+{
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_SPQ,
+		   "(%s: db_entry %p, addr %p, data %p, width %s, %s space, hwfn %d)\n",
+		   action,
+		   db_entry,
+		   db_entry->db_addr,
+		   db_entry->db_data,
+		   db_entry->db_width == DB_REC_WIDTH_32B ? "32b" : "64b",
+		   db_entry->db_space == DB_REC_USER ? "user" : "kernel",
+		   db_entry->hwfn_idx);
+}
+
+/* Doorbell address sanity (address within doorbell bar range) */
+static bool qed_db_rec_sanity(struct qed_dev *cdev,
+			      void __iomem *db_addr,
+			      enum qed_db_rec_width db_width,
+			      void *db_data)
+{
+	u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64;
+
+	/* Make sure doorbell address is within the doorbell bar */
+	if (db_addr < cdev->doorbells ||
+	    (u8 __iomem *)db_addr + width >
+	    (u8 __iomem *)cdev->doorbells + cdev->db_size) {
+		WARN(true,
+		     "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
+		     db_addr,
+		     cdev->doorbells,
+		     (u8 __iomem *)cdev->doorbells + cdev->db_size);
+		return false;
+	}
+
+	/* ake sure doorbell data pointer is not null */
+	if (!db_data) {
+		WARN(true, "Illegal doorbell data pointer: %p", db_data);
+		return false;
+	}
+
+	return true;
+}
+
+/* Find hwfn according to the doorbell address */
+static struct qed_hwfn *qed_db_rec_find_hwfn(struct qed_dev *cdev,
+					     void __iomem *db_addr)
+{
+	struct qed_hwfn *p_hwfn;
+
+	/* In CMT doorbell bar is split down the middle between engine 0 and enigne 1 */
+	if (cdev->num_hwfns > 1)
+		p_hwfn = db_addr < cdev->hwfns[1].doorbells ?
+		    &cdev->hwfns[0] : &cdev->hwfns[1];
+	else
+		p_hwfn = QED_LEADING_HWFN(cdev);
+
+	return p_hwfn;
+}
+
+/* Add a new entry to the doorbell recovery mechanism */
+int qed_db_recovery_add(struct qed_dev *cdev,
+			void __iomem *db_addr,
+			void *db_data,
+			enum qed_db_rec_width db_width,
+			enum qed_db_rec_space db_space)
+{
+	struct qed_db_recovery_entry *db_entry;
+	struct qed_hwfn *p_hwfn;
+
+	/* Shortcircuit VFs, for now */
+	if (IS_VF(cdev)) {
+		DP_VERBOSE(cdev,
+			   QED_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return 0;
+	}
+
+	/* Sanitize doorbell address */
+	if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data))
+		return -EINVAL;
+
+	/* Obtain hwfn from doorbell address */
+	p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
+
+	/* Create entry */
+	db_entry = kzalloc(sizeof(*db_entry), GFP_KERNEL);
+	if (!db_entry) {
+		DP_NOTICE(cdev, "Failed to allocate a db recovery entry\n");
+		return -ENOMEM;
+	}
+
+	/* Populate entry */
+	db_entry->db_addr = db_addr;
+	db_entry->db_data = db_data;
+	db_entry->db_width = db_width;
+	db_entry->db_space = db_space;
+	db_entry->hwfn_idx = p_hwfn->my_id;
+
+	/* Display */
+	qed_db_recovery_dp_entry(p_hwfn, db_entry, "Adding");
+
+	/* Protect the list */
+	spin_lock_bh(&p_hwfn->db_recovery_info.lock);
+	list_add_tail(&db_entry->list_entry, &p_hwfn->db_recovery_info.list);
+	spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
+
+	return 0;
+}
+
+/* Remove an entry from the doorbell recovery mechanism */
+int qed_db_recovery_del(struct qed_dev *cdev,
+			void __iomem *db_addr, void *db_data)
+{
+	struct qed_db_recovery_entry *db_entry = NULL;
+	struct qed_hwfn *p_hwfn;
+	int rc = -EINVAL;
+
+	/* Shortcircuit VFs, for now */
+	if (IS_VF(cdev)) {
+		DP_VERBOSE(cdev,
+			   QED_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return 0;
+	}
+
+	/* Obtain hwfn from doorbell address */
+	p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
+
+	/* Protect the list */
+	spin_lock_bh(&p_hwfn->db_recovery_info.lock);
+	list_for_each_entry(db_entry,
+			    &p_hwfn->db_recovery_info.list, list_entry) {
+		/* search according to db_data addr since db_addr is not unique (roce) */
+		if (db_entry->db_data == db_data) {
+			qed_db_recovery_dp_entry(p_hwfn, db_entry, "Deleting");
+			list_del(&db_entry->list_entry);
+			rc = 0;
+			break;
+		}
+	}
+
+	spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
+
+	if (rc == -EINVAL)
+
+		DP_NOTICE(p_hwfn,
+			  "Failed to find element in list. Key (db_data addr) was %p. db_addr was %p\n",
+			  db_data, db_addr);
+	else
+		kfree(db_entry);
+
+	return rc;
+}
+
+/* Initialize the doorbell recovery mechanism */
+static int qed_db_recovery_setup(struct qed_hwfn *p_hwfn)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Setting up db recovery\n");
+
+	/* Make sure db_size was set in cdev */
+	if (!p_hwfn->cdev->db_size) {
+		DP_ERR(p_hwfn->cdev, "db_size not set\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&p_hwfn->db_recovery_info.list);
+	spin_lock_init(&p_hwfn->db_recovery_info.lock);
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+
+	return 0;
+}
+
+/* Destroy the doorbell recovery mechanism */
+static void qed_db_recovery_teardown(struct qed_hwfn *p_hwfn)
+{
+	struct qed_db_recovery_entry *db_entry = NULL;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Tearing down db recovery\n");
+	if (!list_empty(&p_hwfn->db_recovery_info.list)) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Doorbell Recovery teardown found the doorbell recovery list was not empty (Expected in disorderly driver unload (e.g. recovery) otherwise this probably means some flow forgot to db_recovery_del). Prepare to purge doorbell recovery list...\n");
+		while (!list_empty(&p_hwfn->db_recovery_info.list)) {
+			db_entry =
+			    list_first_entry(&p_hwfn->db_recovery_info.list,
+					     struct qed_db_recovery_entry,
+					     list_entry);
+			qed_db_recovery_dp_entry(p_hwfn, db_entry, "Purging");
+			list_del(&db_entry->list_entry);
+			kfree(db_entry);
+		}
+	}
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+}
+
+/* Print the content of the doorbell recovery mechanism */
+void qed_db_recovery_dp(struct qed_hwfn *p_hwfn)
+{
+	struct qed_db_recovery_entry *db_entry = NULL;
+
+	DP_NOTICE(p_hwfn,
+		  "Displaying doorbell recovery database. Counter was %d\n",
+		  p_hwfn->db_recovery_info.db_recovery_counter);
+
+	/* Protect the list */
+	spin_lock_bh(&p_hwfn->db_recovery_info.lock);
+	list_for_each_entry(db_entry,
+			    &p_hwfn->db_recovery_info.list, list_entry) {
+		qed_db_recovery_dp_entry(p_hwfn, db_entry, "Printing");
+	}
+
+	spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
+}
+
+/* Ring the doorbell of a single doorbell recovery entry */
+static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
+				 struct qed_db_recovery_entry *db_entry)
+{
+	/* Print according to width */
+	if (db_entry->db_width == DB_REC_WIDTH_32B) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "ringing doorbell address %p data %x\n",
+			   db_entry->db_addr,
+			   *(u32 *)db_entry->db_data);
+	} else {
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "ringing doorbell address %p data %llx\n",
+			   db_entry->db_addr,
+			   *(u64 *)(db_entry->db_data));
+	}
+
+	/* Sanity */
+	if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr,
+			       db_entry->db_width, db_entry->db_data))
+		return;
+
+	/* Flush the write combined buffer. Since there are multiple doorbelling
+	 * entities using the same address, if we don't flush, a transaction
+	 * could be lost.
+	 */
+	wmb();
+
+	/* Ring the doorbell */
+	if (db_entry->db_width == DB_REC_WIDTH_32B)
+		DIRECT_REG_WR(db_entry->db_addr,
+			      *(u32 *)(db_entry->db_data));
+	else
+		DIRECT_REG_WR64(db_entry->db_addr,
+				*(u64 *)(db_entry->db_data));
+
+	/* Flush the write combined buffer. Next doorbell may come from a
+	 * different entity to the same address...
+	 */
+	wmb();
+}
+
+/* Traverse the doorbell recovery entry list and ring all the doorbells */
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
+{
+	struct qed_db_recovery_entry *db_entry = NULL;
+
+	DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n",
+		  p_hwfn->db_recovery_info.db_recovery_counter);
+
+	/* Track amount of times recovery was executed */
+	p_hwfn->db_recovery_info.db_recovery_counter++;
+
+	/* Protect the list */
+	spin_lock_bh(&p_hwfn->db_recovery_info.lock);
+	list_for_each_entry(db_entry,
+			    &p_hwfn->db_recovery_info.list, list_entry)
+		qed_db_recovery_ring(p_hwfn, db_entry);
+	spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
+}
+
+/******************** Doorbell Recovery end ****************/
+
+/********************************** NIG LLH ***********************************/
+
+enum qed_llh_filter_type {
+	QED_LLH_FILTER_TYPE_MAC,
+	QED_LLH_FILTER_TYPE_PROTOCOL,
+};
+
+struct qed_llh_mac_filter {
+	u8 addr[ETH_ALEN];
+};
+
+struct qed_llh_protocol_filter {
+	enum qed_llh_prot_filter_type_t type;
+	u16 source_port_or_eth_type;
+	u16 dest_port;
+};
+
+union qed_llh_filter {
+	struct qed_llh_mac_filter mac;
+	struct qed_llh_protocol_filter protocol;
+};
+
+struct qed_llh_filter_info {
+	bool b_enabled;
+	u32 ref_cnt;
+	enum qed_llh_filter_type type;
+	union qed_llh_filter filter;
+};
+
+struct qed_llh_info {
+	/* Number of LLH filters banks */
+	u8 num_ppfid;
+
+#define MAX_NUM_PPFID   8
+	u8 ppfid_array[MAX_NUM_PPFID];
+
+	/* Array of filters arrays:
+	 * "num_ppfid" elements of filters banks, where each is an array of
+	 * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters.
+	 */
+	struct qed_llh_filter_info **pp_filters;
+};
+
+static void qed_llh_free(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	u32 i;
+
+	if (p_llh_info) {
+		if (p_llh_info->pp_filters)
+			for (i = 0; i < p_llh_info->num_ppfid; i++)
+				kfree(p_llh_info->pp_filters[i]);
+
+		kfree(p_llh_info->pp_filters);
+	}
+
+	kfree(p_llh_info);
+	cdev->p_llh_info = NULL;
+}
+
+static int qed_llh_alloc(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info;
+	u32 size, i;
+
+	p_llh_info = kzalloc(sizeof(*p_llh_info), GFP_KERNEL);
+	if (!p_llh_info)
+		return -ENOMEM;
+	cdev->p_llh_info = p_llh_info;
+
+	for (i = 0; i < MAX_NUM_PPFID; i++) {
+		if (!(cdev->ppfid_bitmap & (0x1 << i)))
+			continue;
+
+		p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i;
+		DP_VERBOSE(cdev, QED_MSG_SP, "ppfid_array[%d] = %hhd\n",
+			   p_llh_info->num_ppfid, i);
+		p_llh_info->num_ppfid++;
+	}
+
+	size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters);
+	p_llh_info->pp_filters = kzalloc(size, GFP_KERNEL);
+	if (!p_llh_info->pp_filters)
+		return -ENOMEM;
+
+	size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+	    sizeof(**p_llh_info->pp_filters);
+	for (i = 0; i < p_llh_info->num_ppfid; i++) {
+		p_llh_info->pp_filters[i] = kzalloc(size, GFP_KERNEL);
+		if (!p_llh_info->pp_filters[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int qed_llh_shadow_sanity(struct qed_dev *cdev,
+				 u8 ppfid, u8 filter_idx, const char *action)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n",
+			  action, ppfid, p_llh_info->num_ppfid);
+		return -EINVAL;
+	}
+
+	if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n",
+			  action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define QED_LLH_INVALID_FILTER_IDX      0xff
+
+static int
+qed_llh_shadow_search_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "search");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!memcmp(p_filter, &p_filters[i].filter,
+			    sizeof(*p_filter))) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_get_free_idx(struct qed_dev *cdev, u8 ppfid, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "get_free_idx");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!p_filters[i].b_enabled) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+__qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    u8 filter_idx,
+			    enum qed_llh_filter_type type,
+			    union qed_llh_filter *p_filter, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "add");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		p_filters[filter_idx].b_enabled = true;
+		p_filters[filter_idx].type = type;
+		memcpy(&p_filters[filter_idx].filter, p_filter,
+		       sizeof(p_filters[filter_idx].filter));
+	}
+
+	*p_ref_cnt = ++p_filters[filter_idx].ref_cnt;
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			  u8 ppfid,
+			  enum qed_llh_filter_type type,
+			  union qed_llh_filter *p_filter,
+			  u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	/* Check if the same filter already exist */
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* Find a new entry in case of a new filter */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		rc = qed_llh_shadow_get_free_idx(cdev, ppfid, p_filter_idx);
+		if (rc)
+			return rc;
+	}
+
+	/* No free entry was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev,
+			  "Failed to find an empty LLH filter to utilize [ppfid %d]\n",
+			  ppfid);
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_add_filter(cdev, ppfid, *p_filter_idx, type,
+					   p_filter, p_ref_cnt);
+}
+
+static int
+__qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 filter_idx, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "remove");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		DP_NOTICE(cdev,
+			  "LLH shadow: trying to remove a filter with ref_cnt=0\n");
+		return -EINVAL;
+	}
+
+	*p_ref_cnt = --p_filters[filter_idx].ref_cnt;
+	if (!p_filters[filter_idx].ref_cnt)
+		memset(&p_filters[filter_idx],
+		       0, sizeof(p_filters[filter_idx]));
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter,
+			     u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* No matching filter was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev, "Failed to find a filter in the LLH shadow\n");
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_remove_filter(cdev, ppfid, *p_filter_idx,
+					      p_ref_cnt);
+}
+
+static int qed_llh_abs_ppfid(struct qed_dev *cdev, u8 ppfid, u8 *p_abs_ppfid)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "ppfid %d is not valid, available indices are 0..%hhd\n",
+			  ppfid, p_llh_info->num_ppfid - 1);
+		*p_abs_ppfid = 0;
+		return -EINVAL;
+	}
+
+	*p_abs_ppfid = p_llh_info->ppfid_array[ppfid];
+
+	return 0;
+}
+
+static int
+qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_eng eng;
+	u8 ppfid;
+	int rc;
+
+	rc = qed_mcp_get_engine_config(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to get the engine affinity configuration\n");
+		return rc;
+	}
+
+	/* RoCE PF is bound to a single engine */
+	if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+		rc = qed_llh_set_roce_affinity(cdev, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the RoCE engine affinity\n");
+			return rc;
+		}
+
+		DP_VERBOSE(cdev,
+			   QED_MSG_SP,
+			   "LLH: Set the engine affinity of RoCE packets as %d\n",
+			   eng);
+	}
+
+	/* Storage PF is bound to a single engine while L2 PF uses both */
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+	else			/* L2_PERSONALITY */
+		eng = QED_BOTH_ENG;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the engine affinity of ppfid %d\n",
+				  ppfid);
+			return rc;
+		}
+	}
+
+	DP_VERBOSE(cdev, QED_MSG_SP,
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
+static int qed_llh_hw_init_pf(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 ppfid, abs_ppfid;
+	int rc;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		u32 addr;
+
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			return rc;
+
+		addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
+	}
+
+	if (test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+	    !QED_IS_FCOE_PERSONALITY(p_hwfn)) {
+		rc = qed_llh_add_mac_filter(cdev, 0,
+					    p_hwfn->hw_info.hw_mac_addr);
+		if (rc)
+			DP_NOTICE(cdev,
+				  "Failed to add an LLH filter with the primary MAC\n");
+	}
+
+	if (QED_IS_CMT(cdev)) {
+		rc = qed_llh_set_engine_affin(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev)
+{
+	return cdev->p_llh_info->num_ppfid;
+}
+
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK             0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT            0
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK         0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT        2
+
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, u8 ppfid, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for ppfid [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+	val = qed_rd(p_hwfn, p_ptt, addr);
+	SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel);
+	qed_wr(p_hwfn, p_ptt, addr, val);
+
+	/* The iWARP affinity is set as the affinity of ppfid 0 */
+	if (!ppfid && QED_IS_IWARP_PERSONALITY(p_hwfn))
+		cdev->iwarp_affin = (eng == QED_ENG1) ? 1 : 0;
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 ppfid, abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL,
+		       0xf);  /* QP bit 15 */
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for RoCE [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto out;
+
+		addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+		val = qed_rd(p_hwfn, p_ptt, addr);
+		SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel);
+		qed_wr(p_hwfn, p_ptt, addr, val);
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+struct qed_llh_filter_details {
+	u64 value;
+	u32 mode;
+	u32 protocol_type;
+	u32 hdr_sel;
+	u32 enable;
+};
+
+static int
+qed_llh_access_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 abs_ppfid,
+		      u8 filter_idx,
+		      struct qed_llh_filter_details *p_details)
+{
+	struct qed_dmae_params params = {0};
+	u32 addr;
+	u8 pfid;
+	int rc;
+
+	/* The NIG/LLH registers that are accessed in this function have only 16
+	 * rows which are exposed to a PF. I.e. only the 16 filters of its
+	 * default ppfid. Accessing filters of other ppfids requires pretending
+	 * to another PFs.
+	 * The calculation of PPFID->PFID in AH is based on the relative index
+	 * of a PF on its port.
+	 * For BB the pfid is actually the abs_ppfid.
+	 */
+	if (QED_IS_BB(p_hwfn->cdev))
+		pfid = abs_ppfid;
+	else
+		pfid = abs_ppfid * p_hwfn->cdev->num_ports_in_engine +
+		    MFW_PORT(p_hwfn);
+
+	/* Filter enable - should be done first when removing a filter */
+	if (!p_details->enable) {
+		qed_fid_pretend(p_hwfn, p_ptt,
+				pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+
+		qed_fid_pretend(p_hwfn, p_ptt,
+				p_hwfn->rel_pf_id <<
+				PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+	}
+
+	/* Filter value */
+	addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4;
+
+	params.flags = QED_DMAE_FLAG_PF_DST;
+	params.dst_pfid = pfid;
+	rc = qed_dmae_host2grc(p_hwfn,
+			       p_ptt,
+			       (u64)(uintptr_t)&p_details->value,
+			       addr, 2 /* size_in_dwords */,
+			       &params);
+	if (rc)
+		return rc;
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	/* Filter mode */
+	addr = NIG_REG_LLH_FUNC_FILTER_MODE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->mode);
+
+	/* Filter protocol type */
+	addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->protocol_type);
+
+	/* Filter header select */
+	addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->hdr_sel);
+
+	/* Filter enable - should be done last when adding a filter */
+	if (p_details->enable) {
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+	}
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			p_hwfn->rel_pf_id <<
+			PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	return 0;
+}
+
+static int
+qed_llh_add_filter(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u8 abs_ppfid,
+		   u8 filter_idx, u8 filter_prot_type, u32 high, u32 low)
+{
+	struct qed_llh_filter_details filter_details;
+
+	filter_details.enable = 1;
+	filter_details.value = ((u64)high << 32) | low;
+	filter_details.hdr_sel = 0;
+	filter_details.protocol_type = filter_prot_type;
+	/* Mode: 0: MAC-address classification 1: protocol classification */
+	filter_details.mode = filter_prot_type ? 1 : 0;
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+static int
+qed_llh_remove_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx)
+{
+	struct qed_llh_filter_details filter_details = {0};
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	memcpy(filter.mac.addr, mac_addr, ETH_ALEN);
+	rc = qed_llh_shadow_add_filter(cdev, ppfid,
+				       QED_LLH_FILTER_TYPE_MAC,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto err;
+
+		high = mac_addr[1] | (mac_addr[0] << 8);
+		low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) |
+		      (mac_addr[2] << 24);
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+					0, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added MAC filter [%pM] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to add MAC filter [%pM] to ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+static int
+qed_llh_protocol_filter_stringify(struct qed_dev *cdev,
+				  enum qed_llh_prot_filter_type_t type,
+				  u16 source_port_or_eth_type,
+				  u16 dest_port, u8 *str, size_t str_len)
+{
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		snprintf(str, str_len, "Ethertype 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+		snprintf(str, str_len, "TCP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		snprintf(str, str_len, "UDP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+		snprintf(str, str_len, "TCP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		snprintf(str, str_len, "UDP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "TCP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "UDP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_protocol_filter_to_hilo(struct qed_dev *cdev,
+				enum qed_llh_prot_filter_type_t type,
+				u16 source_port_or_eth_type,
+				u16 dest_port, u32 *p_high, u32 *p_low)
+{
+	*p_high = 0;
+	*p_low = 0;
+
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		*p_high = source_port_or_eth_type;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		*p_low = source_port_or_eth_type << 16;
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		*p_low = dest_port;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		*p_low = (source_port_or_eth_type << 16) | dest_port;
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32], type_bitmap;
+	union qed_llh_filter filter = {};
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_add_filter(cdev,
+				       ppfid,
+				       QED_LLH_FILTER_TYPE_PROTOCOL,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_protocol_filter_to_hilo(cdev, type,
+						     source_port_or_eth_type,
+						     dest_port, &high, &low);
+		if (rc)
+			goto err;
+
+		type_bitmap = 0x1 << type;
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid,
+					filter_idx, type_bitmap, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(p_hwfn,
+		  "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	ether_addr_copy(filter.mac.addr, mac_addr);
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed MAC filter [%pM] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove MAC filter [%pM] from ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+void qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+				    u8 ppfid,
+				    enum qed_llh_prot_filter_type_t type,
+				    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32];
+	union qed_llh_filter filter = {};
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+/******************************* NIG LLH - End ********************************/
+
 #define QED_MIN_DPIS            (4)
 #define QED_MIN_PWM_REGION      (QED_WID_SIZE * QED_MIN_DPIS)
 
@@ -144,6 +1360,12 @@
 	qm_info->wfq_data = NULL;
 }
 
+static void qed_dbg_user_data_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->dbg_user_info);
+	p_hwfn->dbg_user_info = NULL;
+}
+
 void qed_resc_free(struct qed_dev *cdev)
 {
 	int i;
@@ -160,6 +1382,8 @@
 	kfree(cdev->reset_stats);
 	cdev->reset_stats = NULL;
 
+	qed_llh_free(cdev);
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -187,6 +1411,10 @@
 		qed_l2_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn);
+		qed_dbg_user_data_free(p_hwfn);
+
+		/* Destroy doorbell recovery mechanism */
+		qed_db_recovery_teardown(p_hwfn);
 	}
 }
 
@@ -473,19 +1701,19 @@
 
 /* get pq index according to PQ_FLAGS */
 static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
-					   u32 pq_flags)
+					   unsigned long pq_flags)
 {
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 
 	/* Can't have multiple flags set here */
-	if (bitmap_weight((unsigned long *)&pq_flags,
+	if (bitmap_weight(&pq_flags,
 			  sizeof(pq_flags) * BITS_PER_BYTE) > 1) {
-		DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags);
+		DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags);
 		goto err;
 	}
 
 	if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) {
-		DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags);
+		DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags);
 		goto err;
 	}
 
@@ -962,6 +2190,11 @@
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 		u32 n_eqes, num_cons;
 
+		/* Initialize the doorbell recovery mechanism */
+		rc = qed_db_recovery_setup(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
 		/* First allocate the context manager structure */
 		rc = qed_cxt_mngr_alloc(p_hwfn);
 		if (rc)
@@ -1112,6 +2345,17 @@
 		rc = qed_dcbx_info_alloc(p_hwfn);
 		if (rc)
 			goto alloc_err;
+
+		rc = qed_dbg_alloc_user_data(p_hwfn);
+		if (rc)
+			goto alloc_err;
+	}
+
+	rc = qed_llh_alloc(cdev);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to allocate memory for the llh_info structure\n");
+		goto alloc_err;
 	}
 
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
@@ -1457,6 +2701,14 @@
 	QED_ROCE_EDPM_MODE_DISABLE = 2,
 };
 
+bool qed_edpm_enabled(struct qed_hwfn *p_hwfn)
+{
+	if (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm)
+		return false;
+
+	return true;
+}
+
 static int
 qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
@@ -1526,13 +2778,13 @@
 	p_hwfn->wid_count = (u16) n_cpus;
 
 	DP_INFO(p_hwfn,
-		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
+		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s, page_size=%lu\n",
 		norm_regsize,
 		pwm_regsize,
 		p_hwfn->dpi_size,
 		p_hwfn->dpi_count,
-		((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
-		"disabled" : "enabled");
+		(!qed_edpm_enabled(p_hwfn)) ?
+		"disabled" : "enabled", PAGE_SIZE);
 
 	if (rc) {
 		DP_ERR(p_hwfn,
@@ -1557,6 +2809,10 @@
 {
 	int rc = 0;
 
+	/* In CMT the gate should be cleared by the 2nd hwfn */
+	if (!QED_IS_CMT(p_hwfn->cdev) || !IS_LEAD_HWFN(p_hwfn))
+		STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0);
+
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
 	if (rc)
 		return rc;
@@ -1620,11 +2876,6 @@
 		     (p_hwfn->hw_info.personality == QED_PCI_FCOE) ? 1 : 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
 
-	/* Cleanup chip from previous driver if such remains exist */
-	rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
-	if (rc)
-		return rc;
-
 	/* Sanity check before the PF init sequence that uses DMAE */
 	rc = qed_dmae_sanity(p_hwfn, p_ptt, "pf_phase");
 	if (rc)
@@ -1647,6 +2898,13 @@
 	if (rc)
 		return rc;
 
+	/* Use the leading hwfn since in CMT only NIG #0 is operational */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_llh_hw_init_pf(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	if (b_hw_start) {
 		/* enable interrupts */
 		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -1668,17 +2926,15 @@
 	return rc;
 }
 
-static int qed_change_pci_hwfn(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u8 enable)
+int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt, bool b_enable)
 {
-	u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
+	u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0;
 
-	/* Change PF in PXP */
-	qed_wr(p_hwfn, p_ptt,
-	       PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
+	/* Configure the PF's internal FID_enable for master transactions */
+	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
 
-	/* wait until value is set - try for 1 second every 50us */
+	/* Wait until value is set - try for 1 second every 50us */
 	for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
 		val = qed_rd(p_hwfn, p_ptt,
 			     PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
@@ -1732,13 +2988,19 @@
 	return 0;
 }
 
+static void qed_pglueb_clear_err(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
+	       BIT(p_hwfn->abs_pf_id));
+}
+
 int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 {
 	struct qed_load_req_params load_req_params;
 	u32 load_code, resp, param, drv_mb_param;
 	bool b_default_mtu = true;
 	struct qed_hwfn *p_hwfn;
-	int rc = 0, mfw_rc, i;
+	int rc = 0, i;
 	u16 ether_type;
 
 	if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
@@ -1753,7 +3015,7 @@
 	}
 
 	for_each_hwfn(cdev, i) {
-		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+		p_hwfn = &cdev->hwfns[i];
 
 		/* If management didn't provide a default, set one of our own */
 		if (!p_hwfn->hw_info.mtu) {
@@ -1766,9 +3028,6 @@
 			continue;
 		}
 
-		/* Enable DMAE in PXP */
-		rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
-
 		rc = qed_calc_hw_mode(p_hwfn);
 		if (rc)
 			return rc;
@@ -1805,12 +3064,43 @@
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		/* Only relevant for recovery:
+		 * Clear the indication after LOAD_REQ is responded by the MFW.
+		 */
+		cdev->recov_in_prog = false;
+
 		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
 
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
 
-		p_hwfn->first_on_engine = (load_code ==
-					   FW_MSG_CODE_DRV_LOAD_ENGINE);
+		/* Clean up chip from previous driver if such remains exist.
+		 * This is not needed when the PF is the first one on the
+		 * engine, since afterwards we are going to init the FW.
+		 */
+		if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) {
+			rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
+					       p_hwfn->rel_pf_id, false);
+			if (rc) {
+				DP_NOTICE(p_hwfn, "Final cleanup failed\n");
+				goto load_err;
+			}
+		}
+
+		/* Log and clear previous pglue_b errors if such exist */
+		qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt);
+
+		/* Enable the PF's internal FID_enable in the PXP */
+		rc = qed_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt,
+						true);
+		if (rc)
+			goto load_err;
+
+		/* Clear the pglue_b was_error indication.
+		 * In E4 it must be done after the BME and the internal
+		 * FID_enable for the PF are set, since VDMs may cause the
+		 * indication to be set again.
+		 */
+		qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
 
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
@@ -1841,39 +3131,29 @@
 			break;
 		}
 
-		if (rc)
+		if (rc) {
 			DP_NOTICE(p_hwfn,
 				  "init phase failed for loadcode 0x%x (rc %d)\n",
-				   load_code, rc);
-
-		/* ACK mfw regardless of success or failure of initialization */
-		mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				     DRV_MSG_CODE_LOAD_DONE,
-				     0, &load_code, &param);
-		if (rc)
-			return rc;
-		if (mfw_rc) {
-			DP_NOTICE(p_hwfn, "Failed sending LOAD_DONE command\n");
-			return mfw_rc;
+				  load_code, rc);
+			goto load_err;
 		}
 
-		/* Check if there is a DID mismatch between nvm-cfg/efuse */
-		if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
-			DP_NOTICE(p_hwfn,
-				  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
+		rc = qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
+		if (rc)
+			return rc;
 
 		/* send DCBX attention request command */
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_DCB,
 			   "sending phony dcbx set command to trigger DCBx attention handling\n");
-		mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				     DRV_MSG_CODE_SET_DCBX,
-				     1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
-				     &load_code, &param);
-		if (mfw_rc) {
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_SET_DCBX,
+				 1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
+				 &resp, &param);
+		if (rc) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to send DCBX attention request\n");
-			return mfw_rc;
+			return rc;
 		}
 
 		p_hwfn->hw_init_done = true;
@@ -1922,6 +3202,12 @@
 	}
 
 	return 0;
+
+load_err:
+	/* The MFW load lock should be released also when initialization fails.
+	 */
+	qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
+	return rc;
 }
 
 #define QED_HW_STOP_RETRY_LIMIT (10)
@@ -1934,6 +3220,9 @@
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
 
+	if (cdev->recov_in_prog)
+		return;
+
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@@ -1996,12 +3285,14 @@
 		p_hwfn->hw_init_done = false;
 
 		/* Send unload command to MCP */
-		rc = qed_mcp_unload_req(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_req(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 
 		qed_slowpath_irq_sync(p_hwfn);
@@ -2043,27 +3334,37 @@
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
-		qed_mcp_unload_done(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (IS_LEAD_HWFN(p_hwfn) &&
+		    test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+		    !QED_IS_FCOE_PERSONALITY(p_hwfn))
+			qed_llh_remove_mac_filter(cdev, 0,
+						  p_hwfn->hw_info.hw_mac_addr);
+
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 	}
 
-	if (IS_PF(cdev)) {
+	if (IS_PF(cdev) && !cdev->recov_in_prog) {
 		p_hwfn = QED_LEADING_HWFN(cdev);
 		p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
 
-		/* Disable DMAE in PXP - in CMT, this should only be done for
-		 * first hw-function, and only after all transactions have
-		 * stopped for all active hw-functions.
+		/* Clear the PF's internal FID_enable in the PXP.
+		 * In CMT this should only be done for first hw-function, and
+		 * only after all transactions have stopped for all active
+		 * hw-functions.
 		 */
-		rc = qed_change_pci_hwfn(p_hwfn, p_ptt, false);
+		rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
 		if (rc) {
 			DP_NOTICE(p_hwfn,
-				  "qed_change_pci_hwfn failed. rc = %d.\n", rc);
+				  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
+				  rc);
 			rc2 = -EINVAL;
 		}
 	}
@@ -2163,9 +3464,8 @@
 		       PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
 	}
 
-	/* Clean Previous errors if such exist */
-	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
+	/* Clean previous pglue_b errors if such exist */
+	qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
 
 	/* enable internal target-read */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -2515,6 +3815,36 @@
 	return 0;
 }
 
+static int qed_hw_get_ppfid_bitmap(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 native_ppfid_idx;
+	int rc;
+
+	/* Calculation of BB/AH is different for native_ppfid_idx */
+	if (QED_IS_BB(cdev))
+		native_ppfid_idx = p_hwfn->rel_pf_id;
+	else
+		native_ppfid_idx = p_hwfn->rel_pf_id /
+		    cdev->num_ports_in_engine;
+
+	rc = qed_mcp_get_ppfid_bitmap(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return rc;
+	else if (rc == -EOPNOTSUPP)
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+
+	if (!(cdev->ppfid_bitmap & (0x1 << native_ppfid_idx))) {
+		DP_INFO(p_hwfn,
+			"Fix the PPFID bitmap to include the native PPFID [native_ppfid_idx %hhd, orig_bitmap 0x%hhx]\n",
+			native_ppfid_idx, cdev->ppfid_bitmap);
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+	}
+
+	return 0;
+}
+
 static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_resc_unlock_params resc_unlock_params;
@@ -2572,6 +3902,13 @@
 				"Failed to release the resource lock for the resource allocation commands\n");
 	}
 
+	/* PPFID bitmap */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_hw_get_ppfid_bitmap(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	/* Sanity for ILT */
 	if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) ||
 	    (!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) {
@@ -2694,6 +4031,9 @@
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_10G:
 		link->speed.forced_speed = 10000;
 		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_20G:
+		link->speed.forced_speed = 20000;
+		break;
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_25G:
 		link->speed.forced_speed = 25000;
 		break;
@@ -2784,12 +4124,14 @@
 			cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) |
 					BIT(QED_MF_LLH_PROTO_CLSS) |
 					BIT(QED_MF_UFP_SPECIFIC) |
-					BIT(QED_MF_8021Q_TAGGING);
+					BIT(QED_MF_8021Q_TAGGING) |
+					BIT(QED_MF_DONT_ADD_VLAN0_TAG);
 			break;
 		case NVM_CFG1_GLOB_MF_MODE_BD:
 			cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) |
 					BIT(QED_MF_LLH_PROTO_CLSS) |
-					BIT(QED_MF_8021AD_TAGGING);
+					BIT(QED_MF_8021AD_TAGGING) |
+					BIT(QED_MF_DONT_ADD_VLAN0_TAG);
 			break;
 		case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
 			cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) |
@@ -2896,55 +4238,43 @@
 		   p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine);
 }
 
-static void qed_hw_info_port_num_bb(struct qed_hwfn *p_hwfn,
-				    struct qed_ptt *p_ptt)
-{
-	u32 port_mode;
-
-	port_mode = qed_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB);
-
-	if (port_mode < 3) {
-		p_hwfn->cdev->num_ports_in_engine = 1;
-	} else if (port_mode <= 5) {
-		p_hwfn->cdev->num_ports_in_engine = 2;
-	} else {
-		DP_NOTICE(p_hwfn, "PORT MODE: %d not supported\n",
-			  p_hwfn->cdev->num_ports_in_engine);
-
-		/* Default num_ports_in_engine to something */
-		p_hwfn->cdev->num_ports_in_engine = 1;
-	}
-}
-
-static void qed_hw_info_port_num_ah(struct qed_hwfn *p_hwfn,
-				    struct qed_ptt *p_ptt)
-{
-	u32 port;
-	int i;
-
-	p_hwfn->cdev->num_ports_in_engine = 0;
-
-	for (i = 0; i < MAX_NUM_PORTS_K2; i++) {
-		port = qed_rd(p_hwfn, p_ptt,
-			      CNIG_REG_NIG_PORT0_CONF_K2 + (i * 4));
-		if (port & 1)
-			p_hwfn->cdev->num_ports_in_engine++;
-	}
-
-	if (!p_hwfn->cdev->num_ports_in_engine) {
-		DP_NOTICE(p_hwfn, "All NIG ports are inactive\n");
-
-		/* Default num_ports_in_engine to something */
-		p_hwfn->cdev->num_ports_in_engine = 1;
-	}
-}
-
 static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	if (QED_IS_BB(p_hwfn->cdev))
-		qed_hw_info_port_num_bb(p_hwfn, p_ptt);
-	else
-		qed_hw_info_port_num_ah(p_hwfn, p_ptt);
+	u32 addr, global_offsize, global_addr, port_mode;
+	struct qed_dev *cdev = p_hwfn->cdev;
+
+	/* In CMT there is always only one port */
+	if (cdev->num_hwfns > 1) {
+		cdev->num_ports_in_engine = 1;
+		cdev->num_ports = 1;
+		return;
+	}
+
+	/* Determine the number of ports per engine */
+	port_mode = qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE);
+	switch (port_mode) {
+	case 0x0:
+		cdev->num_ports_in_engine = 1;
+		break;
+	case 0x1:
+		cdev->num_ports_in_engine = 2;
+		break;
+	case 0x2:
+		cdev->num_ports_in_engine = 4;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unknown port mode 0x%08x\n", port_mode);
+		cdev->num_ports_in_engine = 1;	/* Default to something */
+		break;
+	}
+
+	/* Get the total number of ports of the device */
+	addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+				    PUBLIC_GLOBAL);
+	global_offsize = qed_rd(p_hwfn, p_ptt, addr);
+	global_addr = SECTION_ADDR(global_offsize, 0);
+	addr = global_addr + offsetof(struct public_global, max_ports);
+	cdev->num_ports = (u8)qed_rd(p_hwfn, p_ptt, addr);
 }
 
 static void qed_get_eee_caps(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -2982,7 +4312,8 @@
 			return rc;
 	}
 
-	qed_hw_info_port_num(p_hwfn, p_ptt);
+	if (IS_LEAD_HWFN(p_hwfn))
+		qed_hw_info_port_num(p_hwfn, p_ptt);
 
 	qed_mcp_get_capabilities(p_hwfn, p_ptt);
 
@@ -3096,13 +4427,16 @@
 static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_regview,
 				 void __iomem *p_doorbells,
+				 u64 db_phys_addr,
 				 enum qed_pci_personality personality)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	int rc = 0;
 
 	/* Split PCI bars evenly between hwfns */
 	p_hwfn->regview = p_regview;
 	p_hwfn->doorbells = p_doorbells;
+	p_hwfn->db_phys_addr = db_phys_addr;
 
 	if (IS_VF(p_hwfn->cdev))
 		return qed_vf_hw_prepare(p_hwfn);
@@ -3150,7 +4484,7 @@
 	/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
 	 * is called as it sets the ports number in an engine.
 	 */
-	if (IS_LEAD_HWFN(p_hwfn)) {
+	if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
 		rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc)
 			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
@@ -3198,7 +4532,9 @@
 	/* Initialize the first hwfn - will learn number of hwfns */
 	rc = qed_hw_prepare_single(p_hwfn,
 				   cdev->regview,
-				   cdev->doorbells, personality);
+				   cdev->doorbells,
+				   cdev->db_phys_addr,
+				   personality);
 	if (rc)
 		return rc;
 
@@ -3207,22 +4543,25 @@
 	/* Initialize the rest of the hwfns */
 	if (cdev->num_hwfns > 1) {
 		void __iomem *p_regview, *p_doorbell;
-		u8 __iomem *addr;
+		u64 db_phys_addr;
+		u32 offset;
 
 		/* adjust bar offset for second engine */
-		addr = cdev->regview +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_0) / 2;
-		p_regview = addr;
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_0) / 2;
+		p_regview = cdev->regview + offset;
 
-		addr = cdev->doorbells +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_1) / 2;
-		p_doorbell = addr;
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_1) / 2;
+
+		p_doorbell = cdev->doorbells + offset;
+
+		db_phys_addr = cdev->db_phys_addr + offset;
 
 		/* prepare second hw function */
 		rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
-					   p_doorbell, personality);
+					   p_doorbell, db_phys_addr,
+					   personality);
 
 		/* in case of error, need to free the previously
 		 * initiliazed hwfn 0.
@@ -3603,269 +4942,6 @@
 	return 0;
 }
 
-static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low,
-				  u8 *p_filter)
-{
-	*p_high = p_filter[1] | (p_filter[0] << 8);
-	*p_low = p_filter[5] | (p_filter[4] << 8) |
-		 (p_filter[3] << 16) | (p_filter[2] << 24);
-}
-
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-
-	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-		   "mac: %pM is added at %d\n",
-		   p_filter, i);
-
-	return 0;
-}
-
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find the entry and clean it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "mac: %pM is removed from %d\n",
-			   p_filter, i);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
-int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port, enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return -EINVAL;
-	}
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 1);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 1 << type);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "ETH type %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	}
-	return 0;
-}
-
-void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return;
-	}
-
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)))
-			continue;
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32)))
-			continue;
-		if (!(qed_rd(p_hwfn, p_ptt,
-			     NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-			     i * sizeof(u32)) & BIT(type)))
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-		break;
-	}
-
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
 static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			    u32 hw_addr, void *p_eth_qzone,
 			    size_t eth_qzone_size, u8 timeset)
@@ -4386,23 +5462,9 @@
 	       sizeof(*p_hwfn->qm_info.wfq_data) * p_hwfn->qm_info.num_vports);
 }
 
-int qed_device_num_engines(struct qed_dev *cdev)
+int qed_device_num_ports(struct qed_dev *cdev)
 {
-	return QED_IS_BB(cdev) ? 2 : 1;
-}
-
-static int qed_device_num_ports(struct qed_dev *cdev)
-{
-	/* in CMT always only one port */
-	if (cdev->num_hwfns > 1)
-		return 1;
-
-	return cdev->num_ports_in_engine * qed_device_num_engines(cdev);
-}
-
-int qed_device_get_port_id(struct qed_dev *cdev)
-{
-	return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
+	return cdev->num_ports;
 }
 
 void qed_set_fw_mac_addr(__le16 *fw_msb,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index defdda1..47376d4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h

@@ -241,11 +241,17 @@
 #define QED_DMAE_FLAG_VF_SRC		0x00000002
 #define QED_DMAE_FLAG_VF_DST		0x00000004
 #define QED_DMAE_FLAG_COMPLETION_DST	0x00000008
+#define QED_DMAE_FLAG_PORT		0x00000010
+#define QED_DMAE_FLAG_PF_SRC		0x00000020
+#define QED_DMAE_FLAG_PF_DST		0x00000040
 
 struct qed_dmae_params {
 	u32 flags; /* consists of QED_DMAE_FLAG_* values */
 	u8 src_vfid;
 	u8 dst_vfid;
+	u8 port_id;
+	u8 src_pfid;
+	u8 dst_pfid;
 };
 
 /**
@@ -257,7 +263,7 @@
  * @param source_addr
  * @param grc_addr (dmae_data_offset)
  * @param size_in_dwords
- * @param flags (one of the flags defined above)
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int
 qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
@@ -265,7 +271,7 @@
 		  u64 source_addr,
 		  u32 grc_addr,
 		  u32 size_in_dwords,
-		  u32 flags);
+		  struct qed_dmae_params *p_params);
 
  /**
  * @brief qed_dmae_grc2host - Read data from dmae data offset
@@ -275,11 +281,11 @@
  * @param grc_addr (dmae_data_offset)
  * @param dest_addr
  * @param size_in_dwords
- * @param flags - one of the flags defined above
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		      u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
-		      u32 flags);
+		      struct qed_dmae_params *p_params);
 
 /**
  * @brief qed_dmae_host2host - copy data from to source address
@@ -290,7 +296,7 @@
  * @param source_addr
  * @param dest_addr
  * @param size_in_dwords
- * @param params
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
@@ -368,26 +374,66 @@
 		   u8 *dst_id);
 
 /**
- * @brief qed_llh_add_mac_filter - configures a MAC filter in llh
+ * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
+ *	banks that are allocated to the PF.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to add
+ * @param cdev
+ *
+ * @return u8 - Number of LLH filter banks
  */
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter);
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
+
+enum qed_eng {
+	QED_ENG0,
+	QED_ENG1,
+	QED_BOTH_ENG,
+};
 
 /**
- * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh
+ * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
+ *	LLH filter bank.
  *
- * @param p_hwfn
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
+			       u8 ppfid, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
+ *
+ * @param cdev
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+ *	bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to add
+ */
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN]);
+
+/**
+ * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+ *	filter bank.
+ *
  * @param p_ptt
  * @param p_filter - MAC to remove
  */
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter);
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN]);
 
-enum qed_llh_port_filter_type_t {
+enum qed_llh_prot_filter_type_t {
 	QED_LLH_FILTER_ETHERTYPE,
 	QED_LLH_FILTER_TCP_SRC_PORT,
 	QED_LLH_FILTER_TCP_DEST_PORT,
@@ -398,36 +444,37 @@
 };
 
 /**
- * @brief qed_llh_add_protocol_filter - configures a protocol filter in llh
+ * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
+ *	given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
  * @param type - type of filters and comparing
  */
 int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port,
-			    enum qed_llh_port_filter_type_t type);
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * @brief qed_llh_remove_protocol_filter - remove a protocol filter in llh
+ * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
+ *	the given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
- * @param type - type of filters and comparing
  */
 void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type);
+qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+			       u8 ppfid,
+			       enum qed_llh_prot_filter_type_t type,
+			       u16 source_port_or_eth_type, u16 dest_port);
 
 /**
  * *@brief Cleanup of previous driver remains prior to load
@@ -472,6 +519,46 @@
 int
 qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
 
+/**
+ * @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param b_enable - true/false
+ *
+ * @return int
+ */
+int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt, bool b_enable);
+
+/**
+ * @brief db_recovery_add - add doorbell information to the doorbell
+ * recovery mechanism.
+ *
+ * @param cdev
+ * @param db_addr - doorbell address
+ * @param db_data - address of where db_data is stored
+ * @param db_width - doorbell is 32b pr 64b
+ * @param db_space - doorbell recovery addresses are user or kernel space
+ */
+int qed_db_recovery_add(struct qed_dev *cdev,
+			void __iomem *db_addr,
+			void *db_data,
+			enum qed_db_rec_width db_width,
+			enum qed_db_rec_space db_space);
+
+/**
+ * @brief db_recovery_del - remove doorbell information from the doorbell
+ * recovery mechanism. db_data serves as key (db_addr is not unique).
+ *
+ * @param cdev
+ * @param db_addr - doorbell address
+ * @param db_data - address where db_data is stored. Serves as key for the
+ *                  entry to delete.
+ */
+int qed_db_recovery_del(struct qed_dev *cdev,
+			void __iomem *db_addr, void *db_data);
+
 
 const char *qed_hw_get_resc_name(enum qed_resources res_id);
 #endif

diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d..de31a38 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c

@@ -745,7 +745,7 @@
 static int qed_fill_fcoe_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_fcoe_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 	int rc;
 
 	memset(info, 0, sizeof(*info));
@@ -806,15 +806,15 @@
 		return -EINVAL;
 	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	p_ptt = qed_ptt_acquire(QED_AFFIN_HWFN(cdev));
 	if (!p_ptt)
 		return -EAGAIN;
 
 	/* Stop the fcoe */
-	rc = qed_sp_fcoe_func_stop(QED_LEADING_HWFN(cdev), p_ptt,
+	rc = qed_sp_fcoe_func_stop(QED_AFFIN_HWFN(cdev), p_ptt,
 				   QED_SPQ_MODE_EBLOCK, NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	qed_ptt_release(QED_AFFIN_HWFN(cdev), p_ptt);
 
 	return rc;
 }
@@ -828,8 +828,8 @@
 		return 0;
 	}
 
-	rc = qed_sp_fcoe_func_start(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_fcoe_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start fcoe\n");
 		return rc;
@@ -849,7 +849,7 @@
 			return -ENOMEM;
 		}
 
-		rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), tid_info);
+		rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 		if (rc) {
 			DP_NOTICE(cdev, "Failed to gather task information\n");
 			qed_fcoe_stop(cdev);
@@ -884,7 +884,7 @@
 	}
 
 	/* Acquire the connection */
-	rc = qed_fcoe_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_fcoe_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					 &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -898,7 +898,7 @@
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_fcoe_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_fcoe_get_db_addr(QED_AFFIN_HWFN(cdev),
 						   *handle);
 
 	return 0;
@@ -916,7 +916,7 @@
 	}
 
 	hlist_del(&hash_con->node);
-	qed_fcoe_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_fcoe_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -971,7 +971,7 @@
 	con->d_id.addr_mid = conn_info->d_id.addr_mid;
 	con->d_id.addr_lo = conn_info->d_id.addr_lo;
 
-	return qed_sp_fcoe_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -992,13 +992,13 @@
 	con = hash_con->con;
 	con->terminate_params = terminate_params;
 
-	return qed_sp_fcoe_conn_destroy(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_destroy(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
 {
-	return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index a713826..cf3ceb6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h

@@ -274,7 +274,8 @@
 	u8 mf_si_mcast_accept_all;
 	struct core_rx_action_on_error action_on_error;
 	u8 gsi_offload_flag;
-	u8 reserved[6];
+	u8 wipe_inner_vlan_pri_en;
+	u8 reserved[5];
 };
 
 /* Ramrod data for rx queue stop ramrod */
@@ -351,7 +352,8 @@
 	__le16 pbl_size;
 	__le16 qm_pq_id;
 	u8 gsi_offload_flag;
-	u8 resrved[3];
+	u8 vport_id;
+	u8 resrved[2];
 };
 
 /* Ramrod data for tx queue stop ramrod */
@@ -914,6 +916,16 @@
 	__le16 reserved1;
 };
 
+/* Update RSS indirection table entry command */
+struct eth_tstorm_rss_update_data {
+	u8 valid;
+	u8 vport_id;
+	u8 ind_table_index;
+	u8 reserved;
+	__le16 ind_table_value;
+	__le16 reserved1;
+};
+
 struct eth_ustorm_per_pf_stat {
 	struct regpair rcv_lb_ucast_bytes;
 	struct regpair rcv_lb_mcast_bytes;
@@ -1241,6 +1253,10 @@
 	u8 rl_id_first;
 	u8 rl_id_last;
 	u8 rl_dc_qcn_flg;
+	u8 dcqcn_reset_alpha_on_idle;
+	u8 rl_bc_stage_th;
+	u8 rl_timer_stage_th;
+	u8 reserved1;
 	__le32 rl_bc_rate;
 	__le16 rl_max_rate;
 	__le16 rl_r_ai;
@@ -1249,7 +1265,7 @@
 	__le32 dcqcn_k_us;
 	__le32 dcqcn_timeuot_us;
 	__le32 qcn_timeuot_us;
-	__le32 reserved[2];
+	__le32 reserved2;
 };
 
 /* Slowpath Element (SPQE) */
@@ -3008,6 +3024,21 @@
  */
 bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, struct fw_info *fw_info);
+/**
+ * @brief qed_dbg_grc_config - Sets the value of a GRC parameter.
+ *
+ * @param p_hwfn -	HW device data
+ * @param grc_param -	GRC parameter
+ * @param val -		Value to set.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- grc_param is invalid
+ *	- val is outside the allowed boundaries
+ */
+enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum dbg_grc_params grc_param, u32 val);
 
 /**
  * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
@@ -3322,6 +3353,25 @@
 enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
 
+/******************************* Data Types **********************************/
+
+struct mcp_trace_format {
+	u32 data;
+#define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
+#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
+#define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
+#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
+#define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
+#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
+#define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
+#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
+#define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
+#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
+#define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
+#define MCP_TRACE_FORMAT_LEN_SHIFT	24
+	char *format_str;
+};
+
 /******************************** Constants **********************************/
 
 #define MAX_NAME_LEN	16
@@ -3337,6 +3387,13 @@
 enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
 
 /**
+ * @brief qed_dbg_alloc_user_data - Allocates user debug data.
+ *
+ * @param p_hwfn -		 HW device data
+ */
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn);
+
+/**
  * @brief qed_dbg_get_status_str - Returns a string for the specified status.
  *
  * @param status - a debug status code.
@@ -3381,8 +3438,7 @@
 					   u32 *num_warnings);
 
 /**
- * @brief qed_dbg_mcp_trace_set_meta_data - Sets a pointer to the MCP Trace
- *	meta data.
+ * @brief qed_dbg_mcp_trace_set_meta_data - Sets the MCP Trace meta data.
  *
  * Needed in case the MCP Trace dump doesn't contain the meta data (e.g. due to
  * no NVRAM access).
@@ -3390,7 +3446,8 @@
  * @param data - pointer to MCP Trace meta data
  * @param size - size of MCP Trace meta data in dwords
  */
-void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size);
+void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
+				     const u32 *meta_buf);
 
 /**
  * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
@@ -3425,19 +3482,45 @@
 					    char *results_buf);
 
 /**
+ * @brief qed_print_mcp_trace_results_cont - Prints MCP Trace results, and
+ * keeps the MCP trace meta data allocated, to support continuous MCP Trace
+ * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should
+ * be called to free the meta data.
+ *
+ * @param p_hwfn -	      HW device data
+ * @param dump_buf -	      mcp trace dump buffer, starting from the header.
+ * @param results_buf -	      buffer for printing the mcp trace results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
+						 u32 *dump_buf,
+						 char *results_buf);
+
+/**
  * @brief print_mcp_trace_line - Prints MCP Trace results for a single line
  *
+ * @param p_hwfn -	      HW device data
  * @param dump_buf -	      mcp trace dump buffer, starting from the header.
  * @param num_dumped_bytes -  number of bytes that were dumped.
  * @param results_buf -	      buffer for printing the mcp trace results.
  *
  * @return error if the parsing fails, ok otherwise.
  */
-enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+					 u8 *dump_buf,
 					 u32 num_dumped_bytes,
 					 char *results_buf);
 
 /**
+ * @brief mcp_trace_free_meta_data - Frees the MCP Trace meta data.
+ * Should be called after continuous MCP Trace parsing.
+ *
+ * @param p_hwfn - HW device data
+ */
+void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn);
+
+/**
  * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
  *	for reg_fifo results (in bytes).
  *
@@ -4303,154 +4386,161 @@
 	(IRO[29].base + ((pf_id) * IRO[29].m1))
 #define ETH_RX_RATE_LIMIT_SIZE				(IRO[29].size)
 
+/* RSS indirection table entry update command per PF offset in TSTORM PF BAR0.
+ * Use eth_tstorm_rss_update_data for update.
+ */
+#define TSTORM_ETH_RSS_UPDATE_OFFSET(pf_id) \
+	(IRO[30].base + ((pf_id) * IRO[30].m1))
+#define TSTORM_ETH_RSS_UPDATE_SIZE			(IRO[30].size)
+
 /* Xstorm queue zone */
 #define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-	(IRO[30].base + ((queue_id) * IRO[30].m1))
-#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[30].size)
+	(IRO[31].base + ((queue_id) * IRO[31].m1))
+#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[31].size)
 
 /* Ystorm cqe producer */
 #define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[31].base + ((rss_id) * IRO[31].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[31].size)
+	(IRO[32].base + ((rss_id) * IRO[32].m1))
+#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[32].size)
 
 /* Ustorm cqe producer */
 #define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[32].base + ((rss_id) * IRO[32].m1))
-#define USTORM_TOE_CQ_PROD_SIZE				(IRO[32].size)
+	(IRO[33].base + ((rss_id) * IRO[33].m1))
+#define USTORM_TOE_CQ_PROD_SIZE				(IRO[33].size)
 
 /* Ustorm grq producer */
 #define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \
-	(IRO[33].base + ((pf_id) * IRO[33].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[33].size)
+	(IRO[34].base + ((pf_id) * IRO[34].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[34].size)
 
 /* Tstorm cmdq-cons of given command queue-id */
 #define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
-	(IRO[34].base + ((cmdq_queue_id) * IRO[34].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[34].size)
+	(IRO[35].base + ((cmdq_queue_id) * IRO[35].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[35].size)
 
 /* Tstorm (reflects M-Storm) bdq-external-producer of given function ID,
  * BDqueue-id.
  */
 #define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[35].base + ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2))
-#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[35].size)
+	(IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[36].size)
 
 /* Mstorm bdq-external-producer of given BDQ resource ID, BDqueue-id */
 #define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
-#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[36].size)
+	(IRO[37].base + ((func_id) * IRO[37].m1) + ((bdq_id) * IRO[37].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[37].size)
 
 /* Tstorm iSCSI RX stats */
 #define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[37].base + ((pf_id) * IRO[37].m1))
-#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[37].size)
+	(IRO[38].base + ((pf_id) * IRO[38].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[38].size)
 
 /* Mstorm iSCSI RX stats */
 #define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[38].base + ((pf_id) * IRO[38].m1))
-#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[38].size)
+	(IRO[39].base + ((pf_id) * IRO[39].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[39].size)
 
 /* Ustorm iSCSI RX stats */
 #define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[39].base + ((pf_id) * IRO[39].m1))
-#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[39].size)
+	(IRO[40].base + ((pf_id) * IRO[40].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[40].size)
 
 /* Xstorm iSCSI TX stats */
 #define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[40].base + ((pf_id) * IRO[40].m1))
-#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[40].size)
+	(IRO[41].base + ((pf_id) * IRO[41].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[41].size)
 
 /* Ystorm iSCSI TX stats */
 #define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[41].base + ((pf_id) * IRO[41].m1))
-#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[41].size)
+	(IRO[42].base + ((pf_id) * IRO[42].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[42].size)
 
 /* Pstorm iSCSI TX stats */
 #define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[42].base + ((pf_id) * IRO[42].m1))
-#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[42].size)
+	(IRO[43].base + ((pf_id) * IRO[43].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[43].size)
 
 /* Tstorm FCoE RX stats */
 #define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
-	(IRO[43].base + ((pf_id) * IRO[43].m1))
-#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[43].size)
+	(IRO[44].base + ((pf_id) * IRO[44].m1))
+#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[44].size)
 
 /* Pstorm FCoE TX stats */
 #define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
-	(IRO[44].base + ((pf_id) * IRO[44].m1))
-#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[44].size)
+	(IRO[45].base + ((pf_id) * IRO[45].m1))
+#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[45].size)
 
 /* Pstorm RDMA queue statistics */
 #define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1))
-#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[45].size)
+	(IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
+#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[46].size)
 
 /* Tstorm RDMA queue statistics */
 #define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
-#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[46].size)
+	(IRO[47].base + ((rdma_stat_counter_id) * IRO[47].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[47].size)
 
 /* Xstorm error level for assert */
 #define XSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[47].base +	((pf_id) * IRO[47].m1))
-#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[47].size)
+	(IRO[48].base +	((pf_id) * IRO[48].m1))
+#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[48].size)
 
 /* Ystorm error level for assert */
 #define YSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[48].base + ((pf_id) * IRO[48].m1))
-#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[48].size)
+	(IRO[49].base + ((pf_id) * IRO[49].m1))
+#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[49].size)
 
 /* Pstorm error level for assert */
 #define PSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[49].base +	((pf_id) * IRO[49].m1))
-#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[49].size)
+	(IRO[50].base +	((pf_id) * IRO[50].m1))
+#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[50].size)
 
 /* Tstorm error level for assert */
 #define TSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[50].base +	((pf_id) * IRO[50].m1))
-#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[50].size)
+	(IRO[51].base +	((pf_id) * IRO[51].m1))
+#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[51].size)
 
 /* Mstorm error level for assert */
 #define MSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[51].base + ((pf_id) * IRO[51].m1))
-#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[51].size)
+	(IRO[52].base + ((pf_id) * IRO[52].m1))
+#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[52].size)
 
 /* Ustorm error level for assert */
 #define USTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[52].base + ((pf_id) * IRO[52].m1))
-#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[52].size)
+	(IRO[53].base + ((pf_id) * IRO[53].m1))
+#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[53].size)
 
 /* Xstorm iWARP rxmit stats */
 #define XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) \
-	(IRO[53].base +	((pf_id) * IRO[53].m1))
-#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[53].size)
+	(IRO[54].base +	((pf_id) * IRO[54].m1))
+#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[54].size)
 
 /* Tstorm RoCE Event Statistics */
 #define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) \
-	(IRO[54].base + ((roce_pf_id) * IRO[54].m1))
-#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[54].size)
+	(IRO[55].base + ((roce_pf_id) * IRO[55].m1))
+#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[55].size)
 
 /* DCQCN Received Statistics */
 #define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) \
-	(IRO[55].base + ((roce_pf_id) * IRO[55].m1))
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[55].size)
+	(IRO[56].base + ((roce_pf_id) * IRO[56].m1))
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[56].size)
 
 /* RoCE Error Statistics */
 #define YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id) \
-	(IRO[56].base + ((roce_pf_id) * IRO[56].m1))
-#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[56].size)
+	(IRO[57].base + ((roce_pf_id) * IRO[57].m1))
+#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[57].size)
 
 /* DCQCN Sent Statistics */
 #define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) \
-	(IRO[57].base + ((roce_pf_id) * IRO[57].m1))
-#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[57].size)
+	(IRO[58].base + ((roce_pf_id) * IRO[58].m1))
+#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[58].size)
 
 /* RoCE CQEs Statistics */
 #define USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id) \
-	(IRO[58].base + ((roce_pf_id) * IRO[58].m1))
-#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[58].size)
+	(IRO[59].base + ((roce_pf_id) * IRO[59].m1))
+#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[59].size)
 
-static const struct iro iro_arr[59] = {
+static const struct iro iro_arr[60] = {
 	{0x0, 0x0, 0x0, 0x0, 0x8},
 	{0x4cb8, 0x88, 0x0, 0x0, 0x88},
 	{0x6530, 0x20, 0x0, 0x0, 0x20},
@@ -4461,14 +4551,14 @@
 	{0x84, 0x8, 0x0, 0x0, 0x2},
 	{0x4c48, 0x0, 0x0, 0x0, 0x78},
 	{0x3e38, 0x0, 0x0, 0x0, 0x78},
-	{0x2b78, 0x0, 0x0, 0x0, 0x78},
+	{0x3ef8, 0x0, 0x0, 0x0, 0x78},
 	{0x4c40, 0x0, 0x0, 0x0, 0x78},
 	{0x4998, 0x0, 0x0, 0x0, 0x78},
 	{0x7f50, 0x0, 0x0, 0x0, 0x78},
 	{0xa28, 0x8, 0x0, 0x0, 0x8},
 	{0x6210, 0x10, 0x0, 0x0, 0x10},
 	{0xb820, 0x30, 0x0, 0x0, 0x30},
-	{0x96c0, 0x30, 0x0, 0x0, 0x30},
+	{0xa990, 0x30, 0x0, 0x0, 0x30},
 	{0x4b68, 0x80, 0x0, 0x0, 0x40},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0x53a8, 0x80, 0x4, 0x0, 0x4},
@@ -4476,11 +4566,12 @@
 	{0x4ba8, 0x80, 0x0, 0x0, 0x20},
 	{0x8158, 0x40, 0x0, 0x0, 0x30},
 	{0xe770, 0x60, 0x0, 0x0, 0x60},
-	{0x2d10, 0x80, 0x0, 0x0, 0x38},
-	{0xf2b8, 0x78, 0x0, 0x0, 0x78},
+	{0x4090, 0x80, 0x0, 0x0, 0x38},
+	{0xfea8, 0x78, 0x0, 0x0, 0x78},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0xaf20, 0x0, 0x0, 0x0, 0xf0},
 	{0xb010, 0x8, 0x0, 0x0, 0x8},
+	{0xc00, 0x8, 0x0, 0x0, 0x8},
 	{0x1f8, 0x8, 0x0, 0x0, 0x8},
 	{0xac0, 0x8, 0x0, 0x0, 0x8},
 	{0x2578, 0x8, 0x0, 0x0, 0x8},
@@ -4492,23 +4583,23 @@
 	{0x12908, 0x18, 0x0, 0x0, 0x10},
 	{0x11aa8, 0x40, 0x0, 0x0, 0x18},
 	{0xa588, 0x50, 0x0, 0x0, 0x20},
-	{0x8700, 0x40, 0x0, 0x0, 0x28},
-	{0x10300, 0x18, 0x0, 0x0, 0x10},
+	{0x8f00, 0x40, 0x0, 0x0, 0x28},
+	{0x10e30, 0x18, 0x0, 0x0, 0x10},
 	{0xde48, 0x48, 0x0, 0x0, 0x38},
-	{0x10768, 0x20, 0x0, 0x0, 0x20},
-	{0x2d48, 0x80, 0x0, 0x0, 0x10},
+	{0x11298, 0x20, 0x0, 0x0, 0x20},
+	{0x40c8, 0x80, 0x0, 0x0, 0x10},
 	{0x5048, 0x10, 0x0, 0x0, 0x10},
 	{0xc748, 0x8, 0x0, 0x0, 0x1},
-	{0xa128, 0x8, 0x0, 0x0, 0x1},
-	{0x10f00, 0x8, 0x0, 0x0, 0x1},
+	{0xa928, 0x8, 0x0, 0x0, 0x1},
+	{0x11a30, 0x8, 0x0, 0x0, 0x1},
 	{0xf030, 0x8, 0x0, 0x0, 0x1},
 	{0x13028, 0x8, 0x0, 0x0, 0x1},
 	{0x12c58, 0x8, 0x0, 0x0, 0x1},
 	{0xc9b8, 0x30, 0x0, 0x0, 0x10},
 	{0xed90, 0x28, 0x0, 0x0, 0x28},
-	{0xa520, 0x18, 0x0, 0x0, 0x18},
-	{0xa6a0, 0x8, 0x0, 0x0, 0x8},
-	{0x13108, 0x8, 0x0, 0x0, 0x8},
+	{0xad20, 0x18, 0x0, 0x0, 0x18},
+	{0xaea0, 0x8, 0x0, 0x0, 0x8},
+	{0x13c38, 0x8, 0x0, 0x0, 0x8},
 	{0x13c50, 0x18, 0x0, 0x0, 0x18},
 };
 
@@ -5661,6 +5752,14 @@
 	MAX_ETH_FILTER_TYPE
 };
 
+/* inner to inner vlan priority translation configurations */
+struct eth_in_to_in_pri_map_cfg {
+	u8 inner_vlan_pri_remap_en;
+	u8 reserved[7];
+	u8 non_rdma_in_to_in_pri_map[8];
+	u8 rdma_in_to_in_pri_map[8];
+};
+
 /* Eth IPv4 Fragment Type */
 enum eth_ipv4_frag_type {
 	ETH_IPV4_NOT_FRAG,
@@ -6018,6 +6117,14 @@
 	struct regpair reserved1[5];
 };
 
+/* Inner to Inner VLAN priority map update mode */
+enum update_in_to_in_pri_map_mode_enum {
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_DISABLED,
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_NON_RDMA_TBL,
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_RDMA_TBL,
+	MAX_UPDATE_IN_TO_IN_PRI_MAP_MODE_ENUM
+};
+
 /* Ramrod data for vport update ramrod */
 struct vport_filter_update_ramrod_data {
 	struct eth_filter_cmd_header filter_cmd_hdr;
@@ -6048,7 +6155,8 @@
 	u8 zero_placement_offset;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
-	u8 reserved[1];
+	u8 wipe_inner_vlan_pri_en;
+	struct eth_in_to_in_pri_map_cfg in_to_in_vlan_pri_map_cfg;
 };
 
 /* Ramrod data for vport stop ramrod */
@@ -6100,7 +6208,9 @@
 	u8 update_ctl_frame_checks_en_flg;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
-	u8 reserved[15];
+	u8 update_in_to_in_pri_map_mode;
+	u8 in_to_in_pri_map[8];
+	u8 reserved[6];
 };
 
 struct vport_update_ramrod_mcast {
@@ -6929,11 +7039,6 @@
 	struct regpair temp[4];
 };
 
-/* The roce task context of Ustorm */
-struct ustorm_rdma_task_st_ctx {
-	struct regpair temp[2];
-};
-
 struct e4_ustorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 state;
@@ -7007,8 +7112,6 @@
 	struct e4_mstorm_rdma_task_ag_ctx mstorm_ag_context;
 	struct mstorm_rdma_task_st_ctx mstorm_st_context;
 	struct rdif_task_context rdif_context;
-	struct ustorm_rdma_task_st_ctx ustorm_st_context;
-	struct regpair ustorm_st_padding[2];
 	struct e4_ustorm_rdma_task_ag_ctx ustorm_ag_context;
 };
 
@@ -7388,7 +7491,7 @@
 #define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK		0x1
 #define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
-	u8 byte3;
+	u8 nvmf_only;
 	__le16 conn_dpi;
 	__le16 word1;
 	__le32 cq_cons;
@@ -7831,7 +7934,12 @@
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	u8 stats_counter_id;
-	u8 reserved3[7];
+	u8 reserved3[6];
+	u8 flags2;
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_MASK			0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_SHIFT			0
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x7F
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			1
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
 };
@@ -7954,6 +8062,7 @@
 	ROCE_EVENT_DESTROY_QP,
 	ROCE_EVENT_CREATE_UD_QP,
 	ROCE_EVENT_DESTROY_UD_QP,
+	ROCE_EVENT_FUNC_UPDATE,
 	MAX_ROCE_EVENT_OPCODE
 };
 
@@ -7962,7 +8071,13 @@
 	u8 ll2_queue_id;
 	u8 cnp_vlan_priority;
 	u8 cnp_dscp;
-	u8 reserved;
+	u8 flags;
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_NP_EN_MASK		0x1
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_NP_EN_SHIFT		0
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_RP_EN_MASK		0x1
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_RP_EN_SHIFT		1
+#define ROCE_INIT_FUNC_PARAMS_RESERVED0_MASK		0x3F
+#define ROCE_INIT_FUNC_PARAMS_RESERVED0_SHIFT		2
 	__le32 cnp_send_timeout;
 	__le16 rl_offset;
 	u8 rl_count_log;
@@ -8109,9 +8224,24 @@
 	ROCE_RAMROD_DESTROY_QP,
 	ROCE_RAMROD_CREATE_UD_QP,
 	ROCE_RAMROD_DESTROY_UD_QP,
+	ROCE_RAMROD_FUNC_UPDATE,
 	MAX_ROCE_RAMROD_CMD_ID
 };
 
+/* RoCE func init ramrod data */
+struct roce_update_func_params {
+	u8 cnp_vlan_priority;
+	u8 cnp_dscp;
+	__le16 flags;
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_NP_EN_MASK	0x1
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_NP_EN_SHIFT	0
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_RP_EN_MASK	0x1
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_RP_EN_SHIFT	1
+#define ROCE_UPDATE_FUNC_PARAMS_RESERVED0_MASK		0x3FFF
+#define ROCE_UPDATE_FUNC_PARAMS_RESERVED0_SHIFT		2
+	__le32 cnp_send_timeout;
+};
+
 struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 	u8 reserved0;
 	u8 state;
@@ -12092,11 +12222,56 @@
 	u32 transceiver_data;
 #define ETH_TRANSCEIVER_STATE_MASK	0x000000FF
 #define ETH_TRANSCEIVER_STATE_SHIFT	0x00000000
+#define ETH_TRANSCEIVER_STATE_OFFSET	0x00000000
 #define ETH_TRANSCEIVER_STATE_UNPLUGGED	0x00000000
 #define ETH_TRANSCEIVER_STATE_PRESENT	0x00000001
 #define ETH_TRANSCEIVER_STATE_VALID	0x00000003
 #define ETH_TRANSCEIVER_STATE_UPDATING	0x00000008
-
+#define ETH_TRANSCEIVER_TYPE_MASK       0x0000FF00
+#define ETH_TRANSCEIVER_TYPE_OFFSET     0x8
+#define ETH_TRANSCEIVER_TYPE_NONE                       0x00
+#define ETH_TRANSCEIVER_TYPE_UNKNOWN                    0xFF
+#define ETH_TRANSCEIVER_TYPE_1G_PCC                     0x01
+#define ETH_TRANSCEIVER_TYPE_1G_ACC                     0x02
+#define ETH_TRANSCEIVER_TYPE_1G_LX                      0x03
+#define ETH_TRANSCEIVER_TYPE_1G_SX                      0x04
+#define ETH_TRANSCEIVER_TYPE_10G_SR                     0x05
+#define ETH_TRANSCEIVER_TYPE_10G_LR                     0x06
+#define ETH_TRANSCEIVER_TYPE_10G_LRM                    0x07
+#define ETH_TRANSCEIVER_TYPE_10G_ER                     0x08
+#define ETH_TRANSCEIVER_TYPE_10G_PCC                    0x09
+#define ETH_TRANSCEIVER_TYPE_10G_ACC                    0x0a
+#define ETH_TRANSCEIVER_TYPE_XLPPI                      0x0b
+#define ETH_TRANSCEIVER_TYPE_40G_LR4                    0x0c
+#define ETH_TRANSCEIVER_TYPE_40G_SR4                    0x0d
+#define ETH_TRANSCEIVER_TYPE_40G_CR4                    0x0e
+#define ETH_TRANSCEIVER_TYPE_100G_AOC                   0x0f
+#define ETH_TRANSCEIVER_TYPE_100G_SR4                   0x10
+#define ETH_TRANSCEIVER_TYPE_100G_LR4                   0x11
+#define ETH_TRANSCEIVER_TYPE_100G_ER4                   0x12
+#define ETH_TRANSCEIVER_TYPE_100G_ACC                   0x13
+#define ETH_TRANSCEIVER_TYPE_100G_CR4                   0x14
+#define ETH_TRANSCEIVER_TYPE_4x10G_SR                   0x15
+#define ETH_TRANSCEIVER_TYPE_25G_CA_N                   0x16
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_S                  0x17
+#define ETH_TRANSCEIVER_TYPE_25G_CA_S                   0x18
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_M                  0x19
+#define ETH_TRANSCEIVER_TYPE_25G_CA_L                   0x1a
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_L                  0x1b
+#define ETH_TRANSCEIVER_TYPE_25G_SR                     0x1c
+#define ETH_TRANSCEIVER_TYPE_25G_LR                     0x1d
+#define ETH_TRANSCEIVER_TYPE_25G_AOC                    0x1e
+#define ETH_TRANSCEIVER_TYPE_4x10G                      0x1f
+#define ETH_TRANSCEIVER_TYPE_4x25G_CR                   0x20
+#define ETH_TRANSCEIVER_TYPE_1000BASET                  0x21
+#define ETH_TRANSCEIVER_TYPE_10G_BASET                  0x22
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR      0x30
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR      0x31
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR      0x32
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR     0x33
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR     0x34
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR     0x35
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC    0x36
 	u32 wol_info;
 	u32 wol_pkt_len;
 	u32 wol_pkt_details;
@@ -12161,7 +12336,7 @@
 #define FUNC_MF_CFG_MAX_BW_DEFAULT	0x00640000
 
 	u32 status;
-#define FUNC_STATUS_VLINK_DOWN		0x00000001
+#define FUNC_STATUS_VIRTUAL_LINK_UP	0x00000001
 
 	u32 mac_upper;
 #define FUNC_MF_CFG_UPPERMAC_MASK	0x0000ffff
@@ -12420,6 +12595,8 @@
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
 #define DRV_MSG_CODE_S_TAG_UPDATE_ACK		0x3b000000
+#define DRV_MSG_CODE_GET_NVM_CFG_OPTION		0x003e0000
+#define DRV_MSG_CODE_SET_NVM_CFG_OPTION		0x003f0000
 #define DRV_MSG_CODE_INITIATE_PF_FLR            0x02010000
 #define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
@@ -12452,8 +12629,10 @@
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
-#define DRV_MSG_CODE_RESOURCE_CMD	0x00230000
+#define DRV_MSG_CODE_RESOURCE_CMD		0x00230000
 #define DRV_MSG_CODE_GET_TLV_DONE		0x002f0000
+#define DRV_MSG_CODE_GET_ENGINE_CONFIG		0x00370000
+#define DRV_MSG_CODE_GET_PPFID_BITMAP		0x43000000
 
 #define RESOURCE_CMD_REQ_RESC_MASK		0x0000001F
 #define RESOURCE_CMD_REQ_RESC_SHIFT		0
@@ -12495,6 +12674,7 @@
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
 
+#define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MBI     0x3
 #define DRV_MB_PARAM_NVM_LEN_OFFSET		24
 
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT	0
@@ -12583,6 +12763,22 @@
 #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_MASK		0x0000FFFF
 #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_OFFSET	0
 #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE		0x00000002
+#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK		0x00010000
+
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT		0
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK		0x0000FFFF
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT		16
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_MASK		0x00010000
+#define DRV_MB_PARAM_NVM_CFG_OPTION_INIT_SHIFT		17
+#define DRV_MB_PARAM_NVM_CFG_OPTION_INIT_MASK		0x00020000
+#define DRV_MB_PARAM_NVM_CFG_OPTION_COMMIT_SHIFT	18
+#define DRV_MB_PARAM_NVM_CFG_OPTION_COMMIT_MASK		0x00040000
+#define DRV_MB_PARAM_NVM_CFG_OPTION_FREE_SHIFT		19
+#define DRV_MB_PARAM_NVM_CFG_OPTION_FREE_MASK		0x00080000
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_SEL_SHIFT	20
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_SEL_MASK	0x00100000
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_ID_SHIFT	24
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_ID_MASK	0x0f000000
 
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK			0xffff0000
@@ -12634,10 +12830,24 @@
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
 
 /* get MFW feature support response */
+#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ	0x00000001
 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE		0x00000002
+#define FW_MB_PARAM_FEATURE_SUPPORT_VLINK	0x00010000
 
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK   0x00000001
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_SHIFT 0
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK   0x00000002
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_SHIFT 1
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK    0x00000004
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_SHIFT  2
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK    0x00000008
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_SHIFT  3
+
+#define FW_MB_PARAM_PPFID_BITMAP_MASK	0xFF
+#define FW_MB_PARAM_PPFID_BITMAP_SHIFT	0
+
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK			0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
@@ -12652,6 +12862,11 @@
 	union drv_union_data union_data;
 };
 
+#define FW_MB_PARAM_NVM_PUT_FILE_REQ_OFFSET_MASK	0x00ffffff
+#define FW_MB_PARAM_NVM_PUT_FILE_REQ_OFFSET_SHIFT	0
+#define FW_MB_PARAM_NVM_PUT_FILE_REQ_SIZE_MASK		0xff000000
+#define FW_MB_PARAM_NVM_PUT_FILE_REQ_SIZE_SHIFT		24
+
 enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LINK_CHANGE,
 	MFW_DRV_MSG_FLR_FW_ACK_FAILED,
@@ -12659,7 +12874,7 @@
 	MFW_DRV_MSG_LLDP_DATA_UPDATED,
 	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
 	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
-	MFW_DRV_MSG_RESERVED4,
+	MFW_DRV_MSG_ERROR_RECOVERY,
 	MFW_DRV_MSG_BW_UPDATE,
 	MFW_DRV_MSG_S_TAG_UPDATE,
 	MFW_DRV_MSG_GET_LAN_STATS,
@@ -12669,8 +12884,9 @@
 	MFW_DRV_MSG_BW_UPDATE10,
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
 	MFW_DRV_MSG_BW_UPDATE11,
-	MFW_DRV_MSG_OEM_CFG_UPDATE,
+	MFW_DRV_MSG_RESERVED,
 	MFW_DRV_MSG_GET_TLV_REQ,
+	MFW_DRV_MSG_OEM_CFG_UPDATE,
 	MFW_DRV_MSG_MAX
 };
 
@@ -13040,6 +13256,7 @@
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET		0
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G		0x1
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G		0x2
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G             0x4
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G		0x8
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G		0x10
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G		0x20
@@ -13050,6 +13267,7 @@
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG			0x0
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_1G				0x1
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_10G			0x2
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_20G                        0x3
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_25G			0x4
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_40G			0x5
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_50G			0x6
@@ -13080,6 +13298,13 @@
 	u32 transceiver_00;
 	u32 device_ids;
 	u32 board_cfg;
+#define NVM_CFG1_PORT_PORT_TYPE_MASK                            0x000000FF
+#define NVM_CFG1_PORT_PORT_TYPE_OFFSET                          0
+#define NVM_CFG1_PORT_PORT_TYPE_UNDEFINED                       0x0
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE                          0x1
+#define NVM_CFG1_PORT_PORT_TYPE_BACKPLANE                       0x2
+#define NVM_CFG1_PORT_PORT_TYPE_EXT_PHY                         0x3
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE_SLAVE                    0x4
 	u32 mnm_10g_cap;
 	u32 mnm_10g_ctrl;
 	u32 mnm_10g_misc;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 70504dc..a4de9e3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c

@@ -392,11 +392,15 @@
 }
 
 /* DMAE */
+#define QED_DMAE_FLAGS_IS_SET(params, flag) \
+	((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag))
+
 static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    const u8 is_src_type_grc,
 			    const u8 is_dst_type_grc,
 			    struct qed_dmae_params *p_params)
 {
+	u8 src_pfid, dst_pfid, port_id;
 	u16 opcode_b = 0;
 	u32 opcode = 0;
 
@@ -407,14 +411,18 @@
 	opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
 				   : DMAE_CMD_SRC_MASK_PCIE) <<
 		   DMAE_CMD_SRC_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
+	src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
+		   p_params->src_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
 		   DMAE_CMD_SRC_PF_ID_SHIFT);
 
 	/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
 	opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
 				   : DMAE_CMD_DST_MASK_PCIE) <<
 		   DMAE_CMD_DST_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
+	dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
+		   p_params->dst_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
 		   DMAE_CMD_DST_PF_ID_SHIFT);
 
 	/* Whether to write a completion word to the completion destination:
@@ -425,12 +433,14 @@
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
 		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
 
-	if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST)
+	if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
 		opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
 
 	opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
 
-	opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT);
+	port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
+		   p_params->port_id : p_hwfn->port_id;
+	opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT);
 
 	/* reset source address in next go */
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
@@ -441,7 +451,7 @@
 		   DMAE_CMD_DST_ADDR_RESET_SHIFT);
 
 	/* SRC/DST VFID: all 1's - pf, otherwise VF id */
-	if (p_params->flags & QED_DMAE_FLAG_VF_SRC) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
 		opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT;
 	} else {
@@ -449,7 +459,7 @@
 			    DMAE_CMD_SRC_VF_ID_SHIFT;
 	}
 
-	if (p_params->flags & QED_DMAE_FLAG_VF_DST) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
 		opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
 	} else {
@@ -703,6 +713,17 @@
 	int qed_status = 0;
 	u32 offset = 0;
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_HW,
+			   "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
+			   src_addr, src_type, dst_addr, dst_type,
+			   size_in_dwords);
+
+		/* Let the flow complete w/o any error handling */
+		return 0;
+	}
+
 	qed_dmae_opcode(p_hwfn,
 			(src_type == QED_DMAE_ADDRESS_GRC),
 			(dst_type == QED_DMAE_ADDRESS_GRC),
@@ -722,7 +743,7 @@
 	for (i = 0; i <= cnt_split; i++) {
 		offset = length_limit * i;
 
-		if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) {
+		if (!QED_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) {
 			if (src_type == QED_DMAE_ADDRESS_GRC)
 				src_addr_split = src_addr + offset;
 			else
@@ -760,14 +781,12 @@
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
-		  u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
+		      u64 source_addr, u32 grc_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
@@ -775,7 +794,7 @@
 				      grc_addr_in_dw,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
 				      QED_DMAE_ADDRESS_GRC,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -785,21 +804,19 @@
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
 		      u32 grc_addr,
-		      dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+		      dma_addr_t dest_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
 	rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
 				      dest_addr, QED_DMAE_ADDRESS_GRC,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -831,7 +848,6 @@
 		    struct qed_ptt *p_ptt, const char *phase)
 {
 	u32 size = PAGE_SIZE / 2, val;
-	struct qed_dmae_params params;
 	int rc = 0;
 	dma_addr_t p_phys;
 	void *p_virt;
@@ -864,9 +880,8 @@
 		   (u64)p_phys,
 		   p_virt, (u64)(p_phys + size), (u8 *)p_virt + size, size);
 
-	memset(&params, 0, sizeof(params));
 	rc = qed_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size,
-				size / 4 /* size_in_dwords */, &params);
+				size / 4, NULL);
 	if (rc) {
 		DP_NOTICE(p_hwfn,
 			  "DMAE sanity [%s]: qed_dmae_host2host() failed. rc = %d.\n",

diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 34193c2..a868d7f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c

@@ -131,7 +131,7 @@
 
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(p_init_val + i),
-				       addr + (i << 2), segment, 0);
+				       addr + (i << 2), segment, NULL);
 		if (rc)
 			return rc;
 
@@ -194,7 +194,7 @@
 	} else {
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(buf + dmae_data_offset),
-				       addr, size, 0);
+				       addr, size, NULL);
 	}
 
 	return rc;
@@ -205,6 +205,7 @@
 			      u32 addr, u32 fill, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+	struct qed_dmae_params params = {};
 
 	memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
 
@@ -214,10 +215,10 @@
 	 * 3. p_hwfb->temp_data,
 	 * 4. fill_count
 	 */
-
+	params.flags = QED_DMAE_FLAG_RW_REPL_SRC;
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
-				 addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
+				 addr, fill_count, &params);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index b22f464..9f51136 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c

@@ -255,138 +255,298 @@
 #define PGLUE_ATTENTION_ICPL_VALID		(1 << 23)
 #define PGLUE_ATTENTION_ZLR_VALID		(1 << 25)
 #define PGLUE_ATTENTION_ILT_VALID		(1 << 23)
-static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn)
+
+int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt)
 {
 	u32 tmp;
 
-	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		     PGLUE_B_REG_TX_ERR_WR_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_ADD_63_32);
-		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_WR_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"Illegal write by chip to [%08x:%08x] blocked.\n"
-			"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
-			"Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
-			addr_hi, addr_lo, details,
-			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
-			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
-			GET_FIELD(details,
-				  PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
-			tmp,
-			GET_FIELD(tmp,
-				  PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
-			GET_FIELD(tmp,
-				  PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
-			GET_FIELD(tmp,
-				  PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
+		DP_NOTICE(p_hwfn,
+			  "Illegal write by chip to [%08x:%08x] blocked.\n"
+			  "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
+			  "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			  addr_hi, addr_lo, details,
+			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
+			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
+			  GET_FIELD(details,
+				    PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
+			  tmp,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
 	}
 
-	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		     PGLUE_B_REG_TX_ERR_RD_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_RD_VALID) {
 		u32 addr_lo, addr_hi, details;
 
-		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_ADD_63_32);
-		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_TX_ERR_RD_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"Illegal read by chip from [%08x:%08x] blocked.\n"
-			" Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
-			" Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
-			addr_hi, addr_lo, details,
-			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
-			(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
-			GET_FIELD(details,
-				  PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
-			tmp,
-			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
-									 : 0,
-			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
-			GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
-									: 0);
+		DP_NOTICE(p_hwfn,
+			  "Illegal read by chip from [%08x:%08x] blocked.\n"
+			  "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
+			  "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
+			  addr_hi, addr_lo, details,
+			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
+			  (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
+			  GET_FIELD(details,
+				    PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
+			  tmp,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
+			  GET_FIELD(tmp,
+				    PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
 	}
 
-	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		     PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
+	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
 	if (tmp & PGLUE_ATTENTION_ICPL_VALID)
-		DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp);
+		DP_NOTICE(p_hwfn, "ICPL error - %08x\n", tmp);
 
-	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		     PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
+	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
 	if (tmp & PGLUE_ATTENTION_ZLR_VALID) {
 		u32 addr_hi, addr_lo;
 
-		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32);
 
-		DP_INFO(p_hwfn, "ZLR eror - %08x [Address %08x:%08x]\n",
-			tmp, addr_hi, addr_lo);
+		DP_NOTICE(p_hwfn, "ZLR error - %08x [Address %08x:%08x]\n",
+			  tmp, addr_hi, addr_lo);
 	}
 
-	tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-		     PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
+	tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
 	if (tmp & PGLUE_ATTENTION_ILT_VALID) {
 		u32 addr_hi, addr_lo, details;
 
-		addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_lo = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_ADD_31_0);
-		addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		addr_hi = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_ADD_63_32);
-		details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+		details = qed_rd(p_hwfn, p_ptt,
 				 PGLUE_B_REG_VF_ILT_ERR_DETAILS);
 
-		DP_INFO(p_hwfn,
-			"ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
-			details, tmp, addr_hi, addr_lo);
+		DP_NOTICE(p_hwfn,
+			  "ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
+			  details, tmp, addr_hi, addr_lo);
 	}
 
 	/* Clear the indications */
-	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
-	       PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
+	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, BIT(2));
 
 	return 0;
 }
 
-#define QED_DORQ_ATTENTION_REASON_MASK	(0xfffff)
-#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
-#define QED_DORQ_ATTENTION_SIZE_MASK	(0x7f)
-#define QED_DORQ_ATTENTION_SIZE_SHIFT	(16)
-static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
 {
-	u32 reason;
+	return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
+}
 
-	reason = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_REASON) &
-			QED_DORQ_ATTENTION_REASON_MASK;
-	if (reason) {
-		u32 details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				     DORQ_REG_DB_DROP_DETAILS);
+#define QED_DORQ_ATTENTION_REASON_MASK  (0xfffff)
+#define QED_DORQ_ATTENTION_OPAQUE_MASK  (0xffff)
+#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
+#define QED_DORQ_ATTENTION_SIZE_MASK            (0x7f)
+#define QED_DORQ_ATTENTION_SIZE_SHIFT           (16)
 
-		DP_INFO(p_hwfn->cdev,
-			"DORQ db_drop: address 0x%08x Opaque FID 0x%04x Size [bytes] 0x%08x Reason: 0x%08x\n",
-			qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			       DORQ_REG_DB_DROP_DETAILS_ADDRESS),
-			(u16)(details & QED_DORQ_ATTENTION_OPAQUE_MASK),
-			GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
-			reason);
+#define QED_DB_REC_COUNT                        1000
+#define QED_DB_REC_INTERVAL                     100
+
+static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt)
+{
+	u32 count = QED_DB_REC_COUNT;
+	u32 usage = 1;
+
+	/* Flush any pending (e)dpms as they may never arrive */
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
+	/* wait for usage to zero or count to run out. This is necessary since
+	 * EDPM doorbell transactions can take multiple 64b cycles, and as such
+	 * can "split" over the pci. Possibly, the doorbell drop can happen with
+	 * half an EDPM in the queue and other half dropped. Another EDPM
+	 * doorbell to the same address (from doorbell recovery mechanism or
+	 * from the doorbelling entity) could have first half dropped and second
+	 * half interpreted as continuation of the first. To prevent such
+	 * malformed doorbells from reaching the device, flush the queue before
+	 * releasing the overflow sticky indication.
+	 */
+	while (count-- && usage) {
+		usage = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_USAGE_CNT);
+		udelay(QED_DB_REC_INTERVAL);
 	}
 
+	/* should have been depleted by now */
+	if (usage) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "DB recovery: doorbell usage failed to zero after %d usec. usage was %x\n",
+			  QED_DB_REC_INTERVAL * QED_DB_REC_COUNT, usage);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 attn_ovfl, cur_ovfl;
+	int rc;
+
+	attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT,
+				       &p_hwfn->db_recovery_info.overflow);
+	cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+	if (!cur_ovfl && !attn_ovfl)
+		return 0;
+
+	DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n",
+		  attn_ovfl, cur_ovfl);
+
+	if (cur_ovfl && !p_hwfn->db_bar_no_edpm) {
+		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
+	/* Release overflow sticky indication (stop silently dropping everything) */
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+
+	/* Repeat all last doorbells (doorbell drop recovery) */
+	qed_db_recovery_execute(p_hwfn);
+
+	return 0;
+}
+
+static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+	u32 overflow;
+	int rc;
+
+	overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+	if (!overflow)
+		goto out;
+
+	/* Run PF doorbell recovery in next periodic handler */
+	set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow);
+
+	if (!p_hwfn->db_bar_no_edpm) {
+		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+		if (rc)
+			goto out;
+	}
+
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+out:
+	/* Schedule the handler even if overflow was not detected */
+	qed_periodic_db_rec_start(p_hwfn);
+}
+
+static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
+{
+	u32 int_sts, first_drop_reason, details, address, all_drops_reason;
+	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+
+	/* int_sts may be zero since all PFs were interrupted for doorbell
+	 * overflow but another one already handled it. Can abort here. If
+	 * This PF also requires overflow recovery we will be interrupted again.
+	 * The masked almost full indication may also be set. Ignoring.
+	 */
+	int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+	if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
+		return 0;
+
+	DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+
+	/* check if db_drop or overflow happened */
+	if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
+		/* Obtain data about db drop/overflow */
+		first_drop_reason = qed_rd(p_hwfn, p_ptt,
+					   DORQ_REG_DB_DROP_REASON) &
+		    QED_DORQ_ATTENTION_REASON_MASK;
+		details = qed_rd(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS);
+		address = qed_rd(p_hwfn, p_ptt,
+				 DORQ_REG_DB_DROP_DETAILS_ADDRESS);
+		all_drops_reason = qed_rd(p_hwfn, p_ptt,
+					  DORQ_REG_DB_DROP_DETAILS_REASON);
+
+		/* Log info */
+		DP_NOTICE(p_hwfn->cdev,
+			  "Doorbell drop occurred\n"
+			  "Address\t\t0x%08x\t(second BAR address)\n"
+			  "FID\t\t0x%04x\t\t(Opaque FID)\n"
+			  "Size\t\t0x%04x\t\t(in bytes)\n"
+			  "1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
+			  "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n",
+			  address,
+			  GET_FIELD(details, QED_DORQ_ATTENTION_OPAQUE),
+			  GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
+			  first_drop_reason, all_drops_reason);
+
+		/* Clear the doorbell drop details and prepare for next drop */
+		qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
+
+		/* Mark interrupt as handled (note: even if drop was due to a different
+		 * reason than overflow we mark as handled)
+		 */
+		qed_wr(p_hwfn,
+		       p_ptt,
+		       DORQ_REG_INT_STS_WR,
+		       DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR);
+
+		/* If there are no indications other than drop indications, success */
+		if ((int_sts & ~(DORQ_REG_INT_STS_DB_DROP |
+				 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR |
+				 DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) == 0)
+			return 0;
+	}
+
+	/* Some other indication was present - non recoverable */
+	DP_INFO(p_hwfn, "DORQ fatal attention\n");
+
 	return -EINVAL;
 }
 
+static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+{
+	p_hwfn->db_recovery_info.dorq_attn = true;
+	qed_dorq_attn_overflow(p_hwfn);
+
+	return qed_dorq_attn_int_sts(p_hwfn);
+}
+
+static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
+{
+	if (p_hwfn->db_recovery_info.dorq_attn)
+		goto out;
+
+	/* Call DORQ callback if the attention was missed */
+	qed_dorq_attn_cb(p_hwfn);
+out:
+	p_hwfn->db_recovery_info.dorq_attn = false;
+}
+
 /* Instead of major changes to the data-structure, we have a some 'special'
  * identifiers for sources that changed meaning between adapters.
  */
@@ -422,7 +582,7 @@
 			{"PGLUE misc_flr", ATTENTION_SINGLE,
 			 NULL, MAX_BLOCK_ID},
 			{"PGLUE B RBC", ATTENTION_PAR_INT,
-			 qed_pglub_rbc_attn_cb, BLOCK_PGLUE_B},
+			 qed_pglueb_rbc_attn_cb, BLOCK_PGLUE_B},
 			{"PGLUE misc_mctp", ATTENTION_SINGLE,
 			 NULL, MAX_BLOCK_ID},
 			{"Flash event", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID},
@@ -654,18 +814,12 @@
 {
 	u16 rc = 0, index;
 
-	/* Make certain HW write took affect */
-	mmiowb();
-
 	index = le16_to_cpu(p_sb_desc->sb_attn->sb_index);
 	if (p_sb_desc->index != index) {
 		p_sb_desc->index	= index;
 		rc		      = QED_SB_ATT_IDX;
 	}
 
-	/* Make certain we got a consistent view with HW */
-	mmiowb();
-
 	return rc;
 }
 
@@ -939,7 +1093,7 @@
 						snprintf(bit_name, 30,
 							 p_aeu->bit_name, num);
 					else
-						strncpy(bit_name,
+						strlcpy(bit_name,
 							p_aeu->bit_name, 30);
 
 					/* We now need to pass bitmask in its
@@ -960,6 +1114,9 @@
 		}
 	}
 
+	/* Handle missed DORQ attention */
+	qed_dorq_attn_handler(p_hwfn);
+
 	/* Clear IGU indication for the deasserted bits */
 	DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
 				    GTT_BAR0_MAP_REG_IGU_CMD +
@@ -1050,7 +1207,6 @@
 	/* Both segments (interrupts & acks) are written to same place address;
 	 * Need to guarantee all commands will be received (in-order) by HW.
 	 */
-	mmiowb();
 	barrier();
 }
 
@@ -1352,10 +1508,10 @@
 
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr,
 				  CAU_REG_SB_ADDR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 	} else {
 		/* Initialize Status Block Address */
 		STORE_RT_REG_AGG(p_hwfn,
@@ -1685,9 +1841,6 @@
 	qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0xfff);
 	qed_wr(p_hwfn, p_ptt, IGU_REG_ATTENTION_ENABLE, 0xfff);
 
-	/* Flush the writes to IGU */
-	mmiowb();
-
 	/* Unmask AEU signals toward IGU */
 	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
 }
@@ -1751,9 +1904,6 @@
 
 	qed_wr(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_CTRL, cmd_ctrl);
 
-	/* Flush the write to IGU */
-	mmiowb();
-
 	/* calculate where to read the status bit from */
 	sb_bit = 1 << (igu_sb_id % 32);
 	sb_bit_addr = igu_sb_id / 32 * sizeof(u32);
@@ -2212,7 +2362,7 @@
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       sb_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2226,7 +2376,7 @@
 	rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 			       (u64)(uintptr_t)&sb_entry,
 			       CAU_REG_SB_VAR_MEMORY +
-			       sb_id * sizeof(u64), 2, 0);
+			       sb_id * sizeof(u64), 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
 		return rc;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 54b4ee0..d473b52 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h

@@ -190,6 +190,16 @@
  */
 void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
+/**
+ * @brief - Doorbell Recovery handler.
+ *          Run doorbell recovery in case of PF overflow (and flush DORQ if
+ *          needed).
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 #define QED_CAU_DEF_RX_TIMER_RES 0
 #define QED_CAU_DEF_TX_TIMER_RES 0
 
@@ -421,4 +431,7 @@
 
 #define QED_MAPPING_MEMORY_SIZE(dev)	(NUM_OF_SBS(dev))
 
+int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt);
+
 #endif

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685..5585c18 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c

@@ -1082,7 +1082,7 @@
 static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
 				   struct qed_dev_iscsi_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 
 	int rc;
 
@@ -1141,8 +1141,8 @@
 	}
 
 	/* Stop the iscsi */
-	rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_iscsi_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
 
 	return rc;
@@ -1161,9 +1161,8 @@
 		return 0;
 	}
 
-	rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
-				     QED_SPQ_MODE_EBLOCK, NULL, event_context,
-				     async_event_cb);
+	rc = qed_sp_iscsi_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				     NULL, event_context, async_event_cb);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start iscsi\n");
 		return rc;
@@ -1182,8 +1181,7 @@
 		return -ENOMEM;
 	}
 
-	rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
-				      tid_info);
+	rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to gather task information\n");
 		qed_iscsi_stop(cdev);
@@ -1215,7 +1213,7 @@
 		return -ENOMEM;
 
 	/* Acquire the connection */
-	rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_iscsi_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					  &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -1229,7 +1227,7 @@
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_iscsi_get_db_addr(QED_AFFIN_HWFN(cdev),
 						    *handle);
 
 	return 0;
@@ -1247,7 +1245,7 @@
 	}
 
 	hlist_del(&hash_con->node);
-	qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_iscsi_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -1324,7 +1322,7 @@
 	/* Set default values on other connection fields */
 	con->offl_flags = 0x1;
 
-	return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					 QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1351,7 +1349,7 @@
 	con->first_seq_length = conn_info->first_seq_length;
 	con->exp_stat_sn = conn_info->exp_stat_sn;
 
-	return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_update(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1366,8 +1364,7 @@
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
-					  hash_con->con,
+	return qed_sp_iscsi_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con,
 					  QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1385,14 +1382,13 @@
 
 	hash_con->con->abortive_dsconnect = abrt_conn;
 
-	return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
-					   hash_con->con,
+	return qed_sp_iscsi_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con,
 					   QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
 {
-	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1407,8 +1403,7 @@
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev),
-				       hash_con->con,
+	return qed_sp_iscsi_mac_update(QED_AFFIN_HWFN(cdev), hash_con->con,
 				       QED_SPQ_MODE_EBLOCK, NULL);
 }
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index e860bdf..65ec16a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c

@@ -63,7 +63,12 @@
 #define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
 
 #define QED_IWARP_INVALID_TCP_CID	0xffffffff
-#define QED_IWARP_RCV_WND_SIZE_DEF	(256 * 1024)
+
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_2P (200 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_4P (100 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_2P (150 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_4P (90 * 1024)
+
 #define QED_IWARP_RCV_WND_SIZE_MIN	(0xffff)
 #define TIMESTAMP_HEADER_SIZE		(12)
 #define QED_IWARP_MAX_FIN_RT_DEFAULT	(2)
@@ -377,7 +382,7 @@
 	}
 }
 
-const static char *iwarp_state_names[] = {
+static const char * const iwarp_state_names[] = {
 	"IDLE",
 	"RTS",
 	"TERMINATE",
@@ -532,7 +537,8 @@
 
 	/* Make sure ep is closed before returning and freeing memory. */
 	if (ep) {
-		while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+		while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED &&
+		       wait_count++ < 200)
 			msleep(100);
 
 		if (ep->state != QED_IWARP_EP_CLOSED)
@@ -935,9 +941,8 @@
 	}
 	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 
-	list_del(&ep->list_entry);
-	list_add_tail(&ep->list_entry,
-		      &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+	list_move_tail(&ep->list_entry,
+		       &p_hwfn->p_rdma_info->iwarp.ep_free_list);
 
 	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 }
@@ -1023,8 +1028,6 @@
 
 	params.ep_context = ep;
 
-	ep->state = QED_IWARP_EP_CLOSED;
-
 	switch (fw_return_code) {
 	case RDMA_RETURN_OK:
 		ep->qp->max_rd_atomic_req = ep->cm_info.ord;
@@ -1084,6 +1087,10 @@
 		break;
 	}
 
+	if (fw_return_code != RDMA_RETURN_OK)
+		/* paired with READ_ONCE in destroy_qp */
+		smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
 	ep->event_cb(ep->cb_context, &params);
 
 	/* on passive side, if there is no associated QP (REJECT) we need to
@@ -1689,6 +1696,15 @@
 
 	eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
 
+	if (!ether_addr_equal(ethh->h_dest,
+			      p_hwfn->p_rdma_info->iwarp.mac_addr)) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "Got unexpected mac %pM instead of %pM\n",
+			   ethh->h_dest, p_hwfn->p_rdma_info->iwarp.mac_addr);
+		return -EINVAL;
+	}
+
 	ether_addr_copy(remote_mac_addr, ethh->h_source);
 	ether_addr_copy(local_mac_addr, ethh->h_dest);
 
@@ -2270,8 +2286,8 @@
 		if (rc == -EBUSY)
 			break;
 
-		list_del(&mpa_buf->list_entry);
-		list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list);
+		list_move_tail(&mpa_buf->list_entry,
+			       &iwarp_info->mpa_buf_list);
 
 		if (rc) {	/* different error, don't continue */
 			DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc);
@@ -2520,7 +2536,7 @@
 		memset(fpdu, 0, sizeof(*fpdu));
 }
 
-static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 	int rc = 0;
@@ -2555,8 +2571,9 @@
 		iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
 	}
 
-	qed_llh_remove_mac_filter(p_hwfn,
-				  p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+	qed_llh_remove_mac_filter(p_hwfn->cdev, 0,
+				  p_hwfn->p_rdma_info->iwarp.mac_addr);
+
 	return rc;
 }
 
@@ -2601,12 +2618,12 @@
 static int
 qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params,
-		    struct qed_ptt *p_ptt)
+		    u32 rcv_wnd_size)
 {
 	struct qed_iwarp_info *iwarp_info;
 	struct qed_ll2_acquire_data data;
 	struct qed_ll2_cbs cbs;
-	u32 mpa_buff_size;
+	u32 buff_size;
 	u16 n_ooo_bufs;
 	int rc = 0;
 	int i;
@@ -2620,7 +2637,7 @@
 
 	ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
 
-	rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+	rc = qed_llh_add_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 	if (rc)
 		return rc;
 
@@ -2629,11 +2646,12 @@
 	cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
 	cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
 	cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+	cbs.slowpath_cb = NULL;
 	cbs.cookie = p_hwfn;
 
 	memset(&data, 0, sizeof(data));
 	data.input.conn_type = QED_LL2_TYPE_IWARP;
-	data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE;
+	data.input.mtu = params->max_mtu;
 	data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE;
 	data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE;
 	data.input.tx_max_bds_per_packet = 1;	/* will never be fragmented */
@@ -2645,7 +2663,7 @@
 	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+		qed_llh_remove_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 		return rc;
 	}
 
@@ -2655,9 +2673,10 @@
 		goto err;
 	}
 
+	buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu);
 	rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
 					 QED_IWARP_LL2_SYN_RX_SIZE,
-					 QED_IWARP_MAX_SYN_PKT_SIZE,
+					 buff_size,
 					 iwarp_info->ll2_syn_handle);
 	if (rc)
 		goto err;
@@ -2666,7 +2685,7 @@
 	data.input.conn_type = QED_LL2_TYPE_OOO;
 	data.input.mtu = params->max_mtu;
 
-	n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) /
+	n_ooo_bufs = (QED_IWARP_MAX_OOO * rcv_wnd_size) /
 		     iwarp_info->max_mtu;
 	n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE);
 
@@ -2699,6 +2718,8 @@
 	data.input.rx_num_desc = n_ooo_bufs * 2;
 	data.input.tx_num_desc = data.input.rx_num_desc;
 	data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
+	data.input.tx_tc = PKT_LB_TC;
+	data.input.tx_dest = QED_LL2_TX_DEST_LB;
 	data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
 	data.input.secondary_queue = true;
 	data.cbs = &cbs;
@@ -2711,10 +2732,9 @@
 	if (rc)
 		goto err;
 
-	mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu);
 	rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
 					 data.input.rx_num_desc,
-					 mpa_buff_size,
+					 buff_size,
 					 iwarp_info->ll2_mpa_handle);
 	if (rc)
 		goto err;
@@ -2727,7 +2747,7 @@
 
 	iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
 
-	iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL);
+	iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL);
 	if (!iwarp_info->mpa_intermediate_buf)
 		goto err;
 
@@ -2749,21 +2769,35 @@
 			      &iwarp_info->mpa_buf_list);
 	return rc;
 err:
-	qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	qed_iwarp_ll2_stop(p_hwfn);
 
 	return rc;
 }
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+static struct {
+	u32 two_ports;
+	u32 four_ports;
+} qed_iwarp_rcv_wnd_size[MAX_CHIP_IDS] = {
+	{QED_IWARP_RCV_WND_SIZE_DEF_BB_2P, QED_IWARP_RCV_WND_SIZE_DEF_BB_4P},
+	{QED_IWARP_RCV_WND_SIZE_DEF_AH_2P, QED_IWARP_RCV_WND_SIZE_DEF_AH_4P}
+};
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_iwarp_info *iwarp_info;
+	enum chip_ids chip_id;
 	u32 rcv_wnd_size;
 
 	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 
 	iwarp_info->tcp_flags = QED_IWARP_TS_EN;
-	rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+	chip_id = QED_IS_BB(cdev) ? CHIP_BB : CHIP_K2;
+	rcv_wnd_size = (qed_device_num_ports(cdev) == 4) ?
+		qed_iwarp_rcv_wnd_size[chip_id].four_ports :
+		qed_iwarp_rcv_wnd_size[chip_id].two_ports;
 
 	/* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
 	iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
@@ -2786,10 +2820,10 @@
 				  qed_iwarp_async_event);
 	qed_ooo_setup(p_hwfn);
 
-	return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+	return qed_iwarp_ll2_start(p_hwfn, params, rcv_wnd_size);
 }
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn)
 {
 	int rc;
 
@@ -2800,7 +2834,7 @@
 
 	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
 
-	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	return qed_iwarp_ll2_stop(p_hwfn);
 }
 
 static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
@@ -2817,7 +2851,9 @@
 	params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
 			 0 : -ECONNRESET;
 
-	ep->state = QED_IWARP_EP_CLOSED;
+	/* paired with READ_ONCE in destroy_qp */
+	smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
 	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 	list_del(&ep->list_entry);
 	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
@@ -2906,7 +2942,8 @@
 	params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
 	params.ep_context = ep;
 	params.cm_info = &ep->cm_info;
-	ep->state = QED_IWARP_EP_CLOSED;
+	/* paired with READ_ONCE in destroy_qp */
+	smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
 
 	switch (fw_return_code) {
 	case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index b8f612d..c1b2057 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h

@@ -46,7 +46,6 @@
 
 #define QED_IWARP_LL2_SYN_TX_SIZE       (128)
 #define QED_IWARP_LL2_SYN_RX_SIZE       (256)
-#define QED_IWARP_MAX_SYN_PKT_SIZE      (128)
 
 #define QED_IWARP_LL2_OOO_DEF_TX_SIZE   (256)
 #define QED_IWARP_MAX_OOO		(16)
@@ -184,13 +183,13 @@
 
 int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params);
 
 void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 			      struct iwarp_init_func_ramrod_data *p_ramrod);
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn);
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 67c02ea..1a5fc2a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c

@@ -609,6 +609,10 @@
 			  (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) &&
 			   !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED)));
 
+		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
+			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
+			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
+
 		SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL,
 			  !!(accept_filter & QED_ACCEPT_BCAST));
 
@@ -744,6 +748,11 @@
 		return rc;
 	}
 
+	if (p_params->update_ctl_frame_check) {
+		p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en;
+		p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en;
+	}
+
 	/* Update mcast bins for VFs, PF doesn't use this functionality */
 	qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params);
 
@@ -1622,10 +1631,9 @@
 	}
 }
 
-static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
-				   struct qed_eth_stats *p_stats,
-				   u16 statistics_bin)
+static noinline_for_stack void
+__qed_get_vport_pstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		       struct qed_eth_stats *p_stats, u16 statistics_bin)
 {
 	struct eth_pstorm_per_queue_stat pstats;
 	u32 pstats_addr = 0, pstats_len = 0;
@@ -1652,10 +1660,9 @@
 	    HILO_64_REGPAIR(pstats.error_drop_pkts);
 }
 
-static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
-				   struct qed_eth_stats *p_stats,
-				   u16 statistics_bin)
+static noinline_for_stack void
+__qed_get_vport_tstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		       struct qed_eth_stats *p_stats, u16 statistics_bin)
 {
 	struct tstorm_per_port_stat tstats;
 	u32 tstats_addr, tstats_len;
@@ -1700,10 +1707,9 @@
 	}
 }
 
-static void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
-				   struct qed_eth_stats *p_stats,
-				   u16 statistics_bin)
+static noinline_for_stack
+void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			    struct qed_eth_stats *p_stats, u16 statistics_bin)
 {
 	struct eth_ustorm_per_queue_stat ustats;
 	u32 ustats_addr = 0, ustats_len = 0;
@@ -1742,10 +1748,9 @@
 	}
 }
 
-static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
-				   struct qed_eth_stats *p_stats,
-				   u16 statistics_bin)
+static noinline_for_stack void
+__qed_get_vport_mstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		       struct qed_eth_stats *p_stats, u16 statistics_bin)
 {
 	struct eth_mstorm_per_queue_stat mstats;
 	u32 mstats_addr = 0, mstats_len = 0;
@@ -1771,9 +1776,9 @@
 	    HILO_64_REGPAIR(mstats.tpa_coalesced_bytes);
 }
 
-static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn,
-				       struct qed_ptt *p_ptt,
-				       struct qed_eth_stats *p_stats)
+static noinline_for_stack void
+__qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			   struct qed_eth_stats *p_stats)
 {
 	struct qed_eth_stats_common *p_common = &p_stats->common;
 	struct port_stats port_stats;
@@ -1889,6 +1894,7 @@
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 		struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
 						    :  NULL;
+		bool b_get_port_stats;
 
 		if (IS_PF(cdev)) {
 			/* The main vport index is relative first */
@@ -1903,8 +1909,9 @@
 			continue;
 		}
 
+		b_get_port_stats = IS_PF(cdev) && IS_LEAD_HWFN(p_hwfn);
 		__qed_get_vport_stats(p_hwfn, p_ptt, stats, fw_vport,
-				      IS_PF(cdev) ? true : false);
+				      b_get_port_stats);
 
 out:
 		if (IS_PF(cdev) && p_ptt)
@@ -2100,7 +2107,7 @@
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2133,7 +2140,7 @@
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2207,7 +2214,7 @@
 			u16 num_queues = 0;
 
 			/* Since the feature controls only queue-zones,
-			 * make sure we have the contexts [rx, tx, xdp] to
+			 * make sure we have the contexts [rx, xdp, tcs] to
 			 * match.
 			 */
 			for_each_hwfn(cdev, i) {
@@ -2217,7 +2224,8 @@
 				u16 cids;
 
 				cids = hwfn->pf_params.eth_pf_params.num_cons;
-				num_queues += min_t(u16, l2_queues, cids / 3);
+				cids /= (2 + info->num_tc);
+				num_queues += min_t(u16, l2_queues, cids);
 			}
 
 			/* queues might theoretically be >256, but interrupts'
@@ -2688,7 +2696,8 @@
 	if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) {
 		accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED |
 						 QED_ACCEPT_MCAST_UNMATCHED;
-		accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
+		accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED |
+						 QED_ACCEPT_MCAST_UNMATCHED;
 	} else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) {
 		accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
 		accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
@@ -2860,7 +2869,8 @@
 	p_hwfn = p_cid->p_owner;
 	rc = qed_get_queue_coalesce(p_hwfn, coal, handle);
 	if (rc)
-		DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n");
+		DP_VERBOSE(cdev, QED_MSG_DEBUG,
+			   "Unable to read queue coalescing\n");
 
 	return rc;
 }

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 8d80f10..7127d5a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h

@@ -219,6 +219,9 @@
 	struct qed_rss_params		*rss_params;
 	struct qed_filter_accept_flags	accept_flags;
 	struct qed_sge_tpa_params	*sge_tpa_params;
+	u8				update_ctl_frame_check;
+	u8				mac_chk_en;
+	u8				ethtype_chk_en;
 };
 
 int qed_sp_vport_update(struct qed_hwfn *p_hwfn,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 14ac9ca..19a1a58 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c

@@ -63,8 +63,8 @@
 #include "qed_sp.h"
 #include "qed_rdma.h"
 
-#define QED_LL2_RX_REGISTERED(ll2)	((ll2)->rx_queue.b_cb_registred)
-#define QED_LL2_TX_REGISTERED(ll2)	((ll2)->tx_queue.b_cb_registred)
+#define QED_LL2_RX_REGISTERED(ll2)	((ll2)->rx_queue.b_cb_registered)
+#define QED_LL2_TX_REGISTERED(ll2)	((ll2)->tx_queue.b_cb_registered)
 
 #define QED_LL2_TX_SIZE (256)
 #define QED_LL2_RX_SIZE (4096)
@@ -239,9 +239,8 @@
 	buffer->phys_addr = new_phys_addr;
 
 out_post:
-	rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle,
-				    buffer->phys_addr, 0,  buffer, 1);
-
+	rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle,
+				    buffer->phys_addr, 0, buffer, 1);
 	if (rc)
 		qed_ll2_dealloc_buffer(cdev, buffer);
 }
@@ -796,7 +795,18 @@
 		tx_pkt.vlan = p_buffer->vlan;
 		tx_pkt.bd_flags = bd_flags;
 		tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w;
-		tx_pkt.tx_dest = p_ll2_conn->tx_dest;
+		switch (p_ll2_conn->tx_dest) {
+		case CORE_TX_DEST_NW:
+			tx_pkt.tx_dest = QED_LL2_TX_DEST_NW;
+			break;
+		case CORE_TX_DEST_LB:
+			tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+			break;
+		case CORE_TX_DEST_DROP:
+		default:
+			tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP;
+			break;
+		}
 		tx_pkt.first_frag = first_frag;
 		tx_pkt.first_frag_len = p_buffer->packet_length;
 		tx_pkt.cookie = p_buffer;
@@ -915,16 +925,15 @@
 	return 0;
 }
 
-static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+static void qed_ll2_stop_ooo(struct qed_hwfn *p_hwfn)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 
-	DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
-		   *handle);
+	DP_VERBOSE(p_hwfn, (QED_MSG_STORAGE | QED_MSG_LL2),
+		   "Stopping LL2 OOO queue [%02x]\n", *handle);
 
-	qed_ll2_terminate_connection(hwfn, *handle);
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_terminate_connection(p_hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 	*handle = QED_LL2_UNUSED_HANDLE;
 }
 
@@ -1074,7 +1083,14 @@
 
 	p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable;
 
-	return qed_spq_post(p_hwfn, p_ent, NULL);
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		return rc;
+
+	rc = qed_db_recovery_add(p_hwfn->cdev, p_tx->doorbell_addr,
+				 &p_tx->db_msg, DB_REC_WIDTH_32B,
+				 DB_REC_KERNEL);
+	return rc;
 }
 
 static int qed_sp_ll2_rx_queue_stop(struct qed_hwfn *p_hwfn,
@@ -1108,9 +1124,11 @@
 static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn,
 				    struct qed_ll2_info *p_ll2_conn)
 {
+	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	int rc = -EINVAL;
+	qed_db_recovery_del(p_hwfn->cdev, p_tx->doorbell_addr, &p_tx->db_msg);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -1404,7 +1422,7 @@
 				    &p_hwfn->p_ll2_info[i],
 				    &p_ll2_info->rx_queue.rx_sb_index,
 				    &p_ll2_info->rx_queue.p_fw_cons);
-		p_ll2_info->rx_queue.b_cb_registred = true;
+		p_ll2_info->rx_queue.b_cb_registered = true;
 	}
 
 	if (data->input.tx_num_desc) {
@@ -1413,7 +1431,7 @@
 				    &p_hwfn->p_ll2_info[i],
 				    &p_ll2_info->tx_queue.tx_sb_index,
 				    &p_ll2_info->tx_queue.p_fw_cons);
-		p_ll2_info->tx_queue.b_cb_registred = true;
+		p_ll2_info->tx_queue.b_cb_registered = true;
 	}
 
 	*data->p_connection_handle = i;
@@ -1531,6 +1549,13 @@
 	p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells +
 					    qed_db_addr(p_ll2_conn->cid,
 							DQ_DEMS_LEGACY);
+	/* prepare db data */
+	SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+	SET_FIELD(p_tx->db_msg.params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_TX_BD_PROD_CMD);
+	p_tx->db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+
 
 	rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn);
 	if (rc)
@@ -1547,12 +1572,12 @@
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-						    ETH_P_FCOE, 0,
-						    QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-					    ETH_P_FIP, 0,
-					    QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+						    QED_LLH_FILTER_ETHERTYPE,
+						    ETH_P_FCOE, 0);
+		qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+					    QED_LLH_FILTER_ETHERTYPE,
+					    ETH_P_FIP, 0);
 	}
 
 out:
@@ -1592,6 +1617,10 @@
 	cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain);
 	rx_prod.bd_prod = cpu_to_le16(bd_prod);
 	rx_prod.cqe_prod = cpu_to_le16(cq_prod);
+
+	/* Make sure chain element is updated before ringing the doorbell */
+	dma_wmb();
+
 	DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod));
 }
 
@@ -1769,7 +1798,6 @@
 	bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw;
 	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
 	struct qed_ll2_tx_packet *p_pkt = NULL;
-	struct core_db_data db_msg = { 0, 0, 0 };
 	u16 bd_prod;
 
 	/* If there are missing BDs, don't do anything now */
@@ -1798,24 +1826,19 @@
 		list_move_tail(&p_pkt->list_entry, &p_tx->active_descq);
 	}
 
-	SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
-	SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
-	SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_VAL_SEL,
-		  DQ_XCM_CORE_TX_BD_PROD_CMD);
-	db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-	db_msg.spq_prod = cpu_to_le16(bd_prod);
+	p_tx->db_msg.spq_prod = cpu_to_le16(bd_prod);
 
 	/* Make sure the BDs data is updated before ringing the doorbell */
 	wmb();
 
-	DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg));
+	DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&p_tx->db_msg));
 
 	DP_VERBOSE(p_hwfn,
 		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
 		   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n",
 		   p_ll2_conn->queue_id,
 		   p_ll2_conn->cid,
-		   p_ll2_conn->input.conn_type, db_msg.spq_prod);
+		   p_ll2_conn->input.conn_type, p_tx->db_msg.spq_prod);
 }
 
 int qed_ll2_prepare_tx_packet(void *cxt,
@@ -1929,7 +1952,7 @@
 
 	/* Stop Tx & Rx of connection, if needed */
 	if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
-		p_ll2_conn->tx_queue.b_cb_registred = false;
+		p_ll2_conn->tx_queue.b_cb_registered = false;
 		smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
 		rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
 		if (rc)
@@ -1940,7 +1963,7 @@
 	}
 
 	if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
-		p_ll2_conn->rx_queue.b_cb_registred = false;
+		p_ll2_conn->rx_queue.b_cb_registered = false;
 		smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
 		rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
 		if (rc)
@@ -1955,12 +1978,12 @@
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-						       ETH_P_FCOE, 0,
-						      QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-					       ETH_P_FIP, 0,
-					       QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+						       QED_LLH_FILTER_ETHERTYPE,
+						       ETH_P_FCOE, 0);
+		qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+					       QED_LLH_FILTER_ETHERTYPE,
+					       ETH_P_FIP, 0);
 	}
 
 out:
@@ -2061,12 +2084,12 @@
 			TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)),
 			sizeof(port_stats));
 
-	p_stats->gsi_invalid_hdr = HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
-	p_stats->gsi_invalid_pkt_length =
+	p_stats->gsi_invalid_hdr += HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
+	p_stats->gsi_invalid_pkt_length +=
 	    HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length);
-	p_stats->gsi_unsupported_pkt_typ =
+	p_stats->gsi_unsupported_pkt_typ +=
 	    HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ);
-	p_stats->gsi_crcchksm_error =
+	p_stats->gsi_crcchksm_error +=
 	    HILO_64_REGPAIR(port_stats.gsi_crcchksm_error);
 }
 
@@ -2084,9 +2107,9 @@
 		      CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
 
-	p_stats->packet_too_big_discard =
+	p_stats->packet_too_big_discard +=
 			HILO_64_REGPAIR(tstats.packet_too_big_discard);
-	p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard);
+	p_stats->no_buff_discard += HILO_64_REGPAIR(tstats.no_buff_discard);
 }
 
 static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
@@ -2103,12 +2126,12 @@
 		      CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
 
-	p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
-	p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
-	p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
-	p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
-	p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
-	p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+	p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+	p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+	p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+	p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+	p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+	p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
 }
 
 static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
@@ -2125,23 +2148,21 @@
 		      CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id);
 	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
 
-	p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes);
-	p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes);
-	p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes);
-	p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts);
-	p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts);
-	p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+	p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+	p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+	p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+	p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+	p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+	p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts);
 }
 
-int qed_ll2_get_stats(void *cxt,
-		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+static int __qed_ll2_get_stats(void *cxt, u8 connection_handle,
+			       struct qed_ll2_stats *p_stats)
 {
 	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	struct qed_ptt *p_ptt;
 
-	memset(p_stats, 0, sizeof(*p_stats));
-
 	if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) ||
 	    !p_hwfn->p_ll2_info)
 		return -EINVAL;
@@ -2156,15 +2177,26 @@
 
 	if (p_ll2_conn->input.gsi_enable)
 		_qed_ll2_get_port_stats(p_hwfn, p_ptt, p_stats);
+
 	_qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	_qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	if (p_ll2_conn->tx_stats_en)
 		_qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
 
 	qed_ptt_release(p_hwfn, p_ptt);
+
 	return 0;
 }
 
+int qed_ll2_get_stats(void *cxt,
+		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+{
+	memset(p_stats, 0, sizeof(*p_stats));
+	return __qed_ll2_get_stats(cxt, connection_handle, p_stats);
+}
+
 static void qed_ll2b_release_rx_packet(void *cxt,
 				       u8 connection_handle,
 				       void *cookie,
@@ -2191,7 +2223,7 @@
 	.tx_release_cb = &qed_ll2b_complete_tx_packet,
 };
 
-static void qed_ll2_set_conn_data(struct qed_dev *cdev,
+static void qed_ll2_set_conn_data(struct qed_hwfn *p_hwfn,
 				  struct qed_ll2_acquire_data *data,
 				  struct qed_ll2_params *params,
 				  enum qed_ll2_conn_type conn_type,
@@ -2207,7 +2239,7 @@
 	data->input.tx_num_desc = QED_LL2_TX_SIZE;
 	data->p_connection_handle = handle;
 	data->cbs = &ll2_cbs;
-	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
+	ll2_cbs.cookie = p_hwfn;
 
 	if (lb) {
 		data->input.tx_tc = PKT_LB_TC;
@@ -2218,74 +2250,102 @@
 	}
 }
 
-static int qed_ll2_start_ooo(struct qed_dev *cdev,
+static int qed_ll2_start_ooo(struct qed_hwfn *p_hwfn,
 			     struct qed_ll2_params *params)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 	struct qed_ll2_acquire_data data;
 	int rc;
 
-	qed_ll2_set_conn_data(cdev, &data, params,
+	qed_ll2_set_conn_data(p_hwfn, &data, params,
 			      QED_LL2_TYPE_OOO, handle, true);
 
-	rc = qed_ll2_acquire_connection(hwfn, &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to acquire LL2 OOO connection\n");
 		goto out;
 	}
 
-	rc = qed_ll2_establish_connection(hwfn, *handle);
+	rc = qed_ll2_establish_connection(p_hwfn, *handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to establish LL2 OOO connection\n");
 		goto fail;
 	}
 
 	return 0;
 
 fail:
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 out:
 	*handle = QED_LL2_UNUSED_HANDLE;
 	return rc;
 }
 
-static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev)
 {
-	struct qed_ll2_buffer *buffer, *tmp_buffer;
-	enum qed_ll2_conn_type conn_type;
-	struct qed_ll2_acquire_data data;
-	struct qed_ptt *p_ptt;
-	int rc, i;
+	return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
+		QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
+		(QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev));
+}
 
+static int __qed_ll2_stop(struct qed_hwfn *p_hwfn)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
 
-	/* Initialize LL2 locks & lists */
-	INIT_LIST_HEAD(&cdev->ll2->list);
-	spin_lock_init(&cdev->ll2->lock);
-	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
-			     L1_CACHE_BYTES + params->mtu;
+	rc = qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+	if (rc)
+		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
 
-	/*Allocate memory for LL2 */
-	DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n",
-		cdev->ll2->rx_size);
-	for (i = 0; i < QED_LL2_RX_SIZE; i++) {
-		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-		if (!buffer) {
-			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
-			goto fail;
-		}
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
 
-		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
-					  &buffer->phys_addr);
-		if (rc) {
-			kfree(buffer);
-			goto fail;
-		}
+	return rc;
+}
 
-		list_add_tail(&buffer->list, &cdev->ll2->list);
+static int qed_ll2_stop(struct qed_dev *cdev)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc = 0, rc2 = 0;
+
+	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
+		return 0;
+
+	qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
+	eth_zero_addr(cdev->ll2_mac_address);
+
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc2 = __qed_ll2_stop(QED_LEADING_HWFN(cdev));
+		if (rc2)
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to stop LL2 on engine 0\n");
 	}
 
-	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
+	rc = __qed_ll2_stop(p_hwfn);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Failed to stop LL2\n");
+
+	qed_ll2_kill_buffers(cdev);
+
+	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+	return rc | rc2;
+}
+
+static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
+			   struct qed_ll2_params *params)
+{
+	struct qed_ll2_buffer *buffer, *tmp_buffer;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_ll2_conn_type conn_type;
+	struct qed_ll2_acquire_data data;
+	int rc, rx_cnt;
+
+	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_FCOE:
 		conn_type = QED_LL2_TYPE_FCOE;
 		break;
@@ -2296,33 +2356,34 @@
 		conn_type = QED_LL2_TYPE_ROCE;
 		break;
 	default:
+
 		conn_type = QED_LL2_TYPE_TEST;
 	}
 
-	qed_ll2_set_conn_data(cdev, &data, params, conn_type,
+	qed_ll2_set_conn_data(p_hwfn, &data, params, conn_type,
 			      &cdev->ll2->handle, false);
 
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 connection\n");
-		goto fail;
+		DP_INFO(p_hwfn, "Failed to acquire LL2 connection\n");
+		return rc;
 	}
 
-	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
+	rc = qed_ll2_establish_connection(p_hwfn, cdev->ll2->handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establish LL2 connection\n");
-		goto release_fail;
+		DP_INFO(p_hwfn, "Failed to establish LL2 connection\n");
+		goto release_conn;
 	}
 
 	/* Post all Rx buffers to FW */
 	spin_lock_bh(&cdev->ll2->lock);
+	rx_cnt = cdev->ll2->rx_cnt;
 	list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) {
-		rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_post_rx_buffer(p_hwfn,
 					    cdev->ll2->handle,
 					    buffer->phys_addr, 0, buffer, 1);
 		if (rc) {
-			DP_INFO(cdev,
+			DP_INFO(p_hwfn,
 				"Failed to post an Rx buffer; Deleting it\n");
 			dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
 					 cdev->ll2->rx_size, DMA_FROM_DEVICE);
@@ -2330,115 +2391,147 @@
 			list_del(&buffer->list);
 			kfree(buffer);
 		} else {
-			cdev->ll2->rx_cnt++;
+			rx_cnt++;
 		}
 	}
 	spin_unlock_bh(&cdev->ll2->lock);
 
-	if (!cdev->ll2->rx_cnt) {
-		DP_INFO(cdev, "Failed passing even a single Rx buffer\n");
-		goto release_terminate;
+	if (rx_cnt == cdev->ll2->rx_cnt) {
+		DP_NOTICE(p_hwfn, "Failed passing even a single Rx buffer\n");
+		goto terminate_conn;
 	}
+	cdev->ll2->rx_cnt = rx_cnt;
+
+	return 0;
+
+terminate_conn:
+	qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+release_conn:
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
+	return rc;
+}
+
+static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	struct qed_ll2_buffer *buffer;
+	int rx_num_desc, i, rc;
 
 	if (!is_valid_ether_addr(params->ll2_mac_address)) {
-		DP_INFO(cdev, "Invalid Ethernet address\n");
-		goto release_terminate;
+		DP_NOTICE(cdev, "Invalid Ethernet address\n");
+		return -EINVAL;
 	}
 
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) {
-		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
-		rc = qed_ll2_start_ooo(cdev, params);
+	WARN_ON(!cdev->ll2->cbs);
+
+	/* Initialize LL2 locks & lists */
+	INIT_LIST_HEAD(&cdev->ll2->list);
+	spin_lock_init(&cdev->ll2->lock);
+
+	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+			     L1_CACHE_BYTES + params->mtu;
+
+	/* Allocate memory for LL2.
+	 * In CMT mode, in case of a storage PF which is affintized to engine 1,
+	 * LL2 is started also on engine 0 and thus we need twofold buffers.
+	 */
+	rx_num_desc = QED_LL2_RX_SIZE * (b_is_storage_eng1 ? 2 : 1);
+	DP_INFO(cdev, "Allocating %d LL2 buffers of size %08x bytes\n",
+		rx_num_desc, cdev->ll2->rx_size);
+	for (i = 0; i < rx_num_desc; i++) {
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!buffer) {
+			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
+					  &buffer->phys_addr);
 		if (rc) {
-			DP_INFO(cdev,
-				"Failed to initialize the OOO LL2 queue\n");
-			goto release_terminate;
+			kfree(buffer);
+			goto err0;
+		}
+
+		list_add_tail(&buffer->list, &cdev->ll2->list);
+	}
+
+	rc = __qed_ll2_start(p_hwfn, params);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to start LL2\n");
+		goto err0;
+	}
+
+	/* In CMT mode, always need to start LL2 on engine 0 for a storage PF,
+	 * since broadcast/mutlicast packets are routed to engine 0.
+	 */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_start(QED_LEADING_HWFN(cdev), params);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to start LL2 on engine 0\n");
+			goto err1;
 		}
 	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto release_terminate;
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) {
+		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+		rc = qed_ll2_start_ooo(p_hwfn, params);
+		if (rc) {
+			DP_NOTICE(cdev, "Failed to start OOO LL2\n");
+			goto err2;
+		}
 	}
 
-	rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				    params->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
 	if (rc) {
-		DP_ERR(cdev, "Failed to allocate LLH filter\n");
-		goto release_terminate_all;
+		DP_NOTICE(cdev, "Failed to add an LLH filter\n");
+		goto err3;
 	}
 
 	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
+
 	return 0;
 
-release_terminate_all:
-
-release_terminate:
-	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-release_fail:
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-fail:
+err3:
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+err2:
+	if (b_is_storage_eng1)
+		__qed_ll2_stop(QED_LEADING_HWFN(cdev));
+err1:
+	__qed_ll2_stop(p_hwfn);
+err0:
 	qed_ll2_kill_buffers(cdev);
 	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-	return -EINVAL;
-}
-
-static int qed_ll2_stop(struct qed_dev *cdev)
-{
-	struct qed_ptt *p_ptt;
-	int rc;
-
-	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
-		return 0;
-
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto fail;
-	}
-
-	qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				  cdev->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
-	eth_zero_addr(cdev->ll2_mac_address);
-
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI)
-		qed_ll2_stop_ooo(cdev);
-
-	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
-	if (rc)
-		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
-
-	qed_ll2_kill_buffers(cdev);
-
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-
 	return rc;
-fail:
-	return -EINVAL;
 }
 
 static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 			      unsigned long xmit_flags)
 {
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 	struct qed_ll2_tx_pkt_info pkt;
 	const skb_frag_t *frag;
+	u8 flags = 0, nr_frags;
 	int rc = -EINVAL, i;
 	dma_addr_t mapping;
 	u16 vlan = 0;
-	u8 flags = 0;
 
 	if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {
 		DP_INFO(cdev, "Cannot transmit a checksummed packet\n");
 		return -EINVAL;
 	}
 
-	if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {
+	/* Cache number of fragments from SKB since SKB may be freed by
+	 * the completion routine after calling qed_ll2_prepare_tx_packet()
+	 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {
 		DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n",
-		       1 + skb_shinfo(skb)->nr_frags);
+		       1 + nr_frags);
 		return -EINVAL;
 	}
 
@@ -2460,7 +2553,7 @@
 	}
 
 	memset(&pkt, 0, sizeof(pkt));
-	pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags;
+	pkt.num_of_bds = 1 + nr_frags;
 	pkt.vlan = vlan;
 	pkt.bd_flags = flags;
 	pkt.tx_dest = QED_LL2_TX_DEST_NW;
@@ -2471,12 +2564,17 @@
 	    test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags))
 		pkt.remove_stag = true;
 
-	rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+	/* qed_ll2_prepare_tx_packet() may actually send the packet if
+	 * there are no fragments in the skb and subsequently the completion
+	 * routine may run and free the SKB, so no dereferencing the SKB
+	 * beyond this point unless skb has any fragments.
+	 */
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle,
 				       &pkt, 1);
 	if (rc)
 		goto err;
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+	for (i = 0; i < nr_frags; i++) {
 		frag = &skb_shinfo(skb)->frags[i];
 
 		mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,
@@ -2485,16 +2583,17 @@
 		if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) {
 			DP_NOTICE(cdev,
 				  "Unable to map frag - dropping packet\n");
+			rc = -ENOMEM;
 			goto err;
 		}
 
-		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
 						       cdev->ll2->handle,
 						       mapping,
 						       skb_frag_size(frag));
 
 		/* if failed not much to do here, partial packet has been posted
-		 * we can't free memory, will need to wait for completion.
+		 * we can't free memory, will need to wait for completion
 		 */
 		if (rc)
 			goto err2;
@@ -2504,18 +2603,37 @@
 
 err:
 	dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE);
-
 err2:
 	return rc;
 }
 
 static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
 {
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc;
+
 	if (!cdev->ll2)
 		return -EINVAL;
 
-	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
-				 cdev->ll2->handle, stats);
+	rc = qed_ll2_get_stats(p_hwfn, cdev->ll2->handle, stats);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to get LL2 stats\n");
+		return rc;
+	}
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+					 cdev->ll2->handle, stats);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to get LL2 stats on engine 0\n");
+			return rc;
+		}
+	}
+
+	return 0;
 }
 
 const struct qed_ll2_ops qed_ll2_ops_pass = {

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index f658170..5f01fbd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h

@@ -79,7 +79,7 @@
 	struct qed_chain rxq_chain;
 	struct qed_chain rcq_chain;
 	u8 rx_sb_index;
-	bool b_cb_registred;
+	bool b_cb_registered;
 	__le16 *p_fw_cons;
 	struct list_head active_descq;
 	struct list_head free_descq;
@@ -93,7 +93,7 @@
 	spinlock_t lock;
 	struct qed_chain txq_chain;
 	u8 tx_sb_index;
-	bool b_cb_registred;
+	bool b_cb_registered;
 	__le16 *p_fw_cons;
 	struct list_head active_descq;
 	struct list_head free_descq;
@@ -103,6 +103,7 @@
 	struct qed_ll2_tx_packet cur_completing_packet;
 	u16 cur_completing_bd_idx;
 	void __iomem *doorbell_addr;
+	struct core_db_data db_msg;
 	u16 bds_idx;
 	u16 cur_send_frag_num;
 	u16 cur_completing_frag_num;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index cf3b0e3..38f7f40 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c

@@ -48,6 +48,7 @@
 #include <linux/crc32.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
+#include <net/devlink.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
@@ -58,6 +59,7 @@
 #include "qed_iscsi.h"
 
 #include "qed_mcp.h"
+#include "qed_reg_addr.h"
 #include "qed_hw.h"
 #include "qed_selftest.h"
 #include "qed_debug.h"
@@ -65,6 +67,9 @@
 #define QED_ROCE_QPS			(8192)
 #define QED_ROCE_DPIS			(8)
 #define QED_RDMA_SRQS                   QED_ROCE_QPS
+#define QED_NVM_CFG_GET_FLAGS		0xA
+#define QED_NVM_CFG_GET_PF_FLAGS	0x1A
+#define QED_NVM_CFG_MAX_ATTRS		50
 
 static char version[] =
 	"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -280,6 +285,8 @@
 		if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
 			dev_info->wol_support = true;
 
+		dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn);
+
 		dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
 	} else {
 		qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
@@ -339,6 +346,107 @@
 	return 0;
 }
 
+struct qed_devlink {
+	struct qed_dev *cdev;
+};
+
+enum qed_devlink_param_id {
+	QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	ctx->val.vbool = cdev->iwarp_cmt;
+
+	return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	cdev->iwarp_cmt = ctx->val.vbool;
+
+	return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+			     "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static const struct devlink_ops qed_dl_ops;
+
+static int qed_devlink_register(struct qed_dev *cdev)
+{
+	union devlink_param_value value;
+	struct qed_devlink *qed_dl;
+	struct devlink *dl;
+	int rc;
+
+	dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
+	if (!dl)
+		return -ENOMEM;
+
+	qed_dl = devlink_priv(dl);
+
+	cdev->dl = dl;
+	qed_dl->cdev = cdev;
+
+	rc = devlink_register(dl, &cdev->pdev->dev);
+	if (rc)
+		goto err_free;
+
+	rc = devlink_params_register(dl, qed_devlink_params,
+				     ARRAY_SIZE(qed_devlink_params));
+	if (rc)
+		goto err_unregister;
+
+	value.vbool = false;
+	devlink_param_driverinit_value_set(dl,
+					   QED_DEVLINK_PARAM_ID_IWARP_CMT,
+					   value);
+
+	devlink_params_publish(dl);
+	cdev->iwarp_cmt = false;
+
+	return 0;
+
+err_unregister:
+	devlink_unregister(dl);
+
+err_free:
+	cdev->dl = NULL;
+	devlink_free(dl);
+
+	return rc;
+}
+
+static void qed_devlink_unregister(struct qed_dev *cdev)
+{
+	if (!cdev->dl)
+		return;
+
+	devlink_params_unregister(cdev->dl, qed_devlink_params,
+				  ARRAY_SIZE(qed_devlink_params));
+
+	devlink_unregister(cdev->dl);
+	devlink_free(cdev->dl);
+}
+
 /* probing */
 static struct qed_dev *qed_probe(struct pci_dev *pdev,
 				 struct qed_probe_params *params)
@@ -358,6 +466,8 @@
 
 	qed_init_dp(cdev, params->dp_module, params->dp_level);
 
+	cdev->recov_in_prog = params->recov_in_prog;
+
 	rc = qed_init_pci(cdev, pdev);
 	if (rc) {
 		DP_ERR(cdev, "init pci failed\n");
@@ -365,6 +475,12 @@
 	}
 	DP_INFO(cdev, "PCI init completed successfully\n");
 
+	rc = qed_devlink_register(cdev);
+	if (rc) {
+		DP_INFO(cdev, "Failed to register devlink.\n");
+		goto err2;
+	}
+
 	rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
 	if (rc) {
 		DP_ERR(cdev, "hw prepare failed\n");
@@ -394,6 +510,8 @@
 
 	qed_set_power_state(cdev, PCI_D3hot);
 
+	qed_devlink_unregister(cdev);
+
 	qed_free_cdev(cdev);
 }
 
@@ -965,9 +1083,47 @@
 	}
 }
 
+#define QED_PERIODIC_DB_REC_COUNT		10
+#define QED_PERIODIC_DB_REC_INTERVAL_MS		100
+#define QED_PERIODIC_DB_REC_INTERVAL \
+	msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
+#define QED_PERIODIC_DB_REC_WAIT_COUNT		10
+#define QED_PERIODIC_DB_REC_WAIT_INTERVAL \
+	(QED_PERIODIC_DB_REC_INTERVAL_MS / QED_PERIODIC_DB_REC_WAIT_COUNT)
+
+static int qed_slowpath_delayed_work(struct qed_hwfn *hwfn,
+				     enum qed_slowpath_wq_flag wq_flag,
+				     unsigned long delay)
+{
+	if (!hwfn->slowpath_wq_active)
+		return -EINVAL;
+
+	/* Memory barrier for setting atomic bit */
+	smp_mb__before_atomic();
+	set_bit(wq_flag, &hwfn->slowpath_task_flags);
+	smp_mb__after_atomic();
+	queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, delay);
+
+	return 0;
+}
+
+void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn)
+{
+	/* Reset periodic Doorbell Recovery counter */
+	p_hwfn->periodic_db_rec_count = QED_PERIODIC_DB_REC_COUNT;
+
+	/* Don't schedule periodic Doorbell Recovery if already scheduled */
+	if (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+		     &p_hwfn->slowpath_task_flags))
+		return;
+
+	qed_slowpath_delayed_work(p_hwfn, QED_SLOWPATH_PERIODIC_DB_REC,
+				  QED_PERIODIC_DB_REC_INTERVAL);
+}
+
 static void qed_slowpath_wq_stop(struct qed_dev *cdev)
 {
-	int i;
+	int i, sleep_count = QED_PERIODIC_DB_REC_WAIT_COUNT;
 
 	if (IS_VF(cdev))
 		return;
@@ -976,6 +1132,15 @@
 		if (!cdev->hwfns[i].slowpath_wq)
 			continue;
 
+		/* Stop queuing new delayed works */
+		cdev->hwfns[i].slowpath_wq_active = false;
+
+		/* Wait until the last periodic doorbell recovery is executed */
+		while (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+				&cdev->hwfns[i].slowpath_task_flags) &&
+		       sleep_count--)
+			msleep(QED_PERIODIC_DB_REC_WAIT_INTERVAL);
+
 		flush_workqueue(cdev->hwfns[i].slowpath_wq);
 		destroy_workqueue(cdev->hwfns[i].slowpath_wq);
 	}
@@ -988,7 +1153,10 @@
 	struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
 
 	if (!ptt) {
-		queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, 0);
+		if (hwfn->slowpath_wq_active)
+			queue_delayed_work(hwfn->slowpath_wq,
+					   &hwfn->slowpath_task, 0);
+
 		return;
 	}
 
@@ -996,6 +1164,15 @@
 			       &hwfn->slowpath_task_flags))
 		qed_mfw_process_tlv_req(hwfn, ptt);
 
+	if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+			       &hwfn->slowpath_task_flags)) {
+		qed_db_rec_handler(hwfn, ptt);
+		if (hwfn->periodic_db_rec_count--)
+			qed_slowpath_delayed_work(hwfn,
+						  QED_SLOWPATH_PERIODIC_DB_REC,
+						  QED_PERIODIC_DB_REC_INTERVAL);
+	}
+
 	qed_ptt_release(hwfn, ptt);
 }
 
@@ -1022,6 +1199,7 @@
 		}
 
 		INIT_DELAYED_WORK(&hwfn->slowpath_task, qed_slowpath_task);
+		hwfn->slowpath_wq_active = true;
 	}
 
 	return 0;
@@ -1150,7 +1328,7 @@
 					      &drv_version);
 		if (rc) {
 			DP_NOTICE(cdev, "Failed sending drv version command\n");
-			return rc;
+			goto err4;
 		}
 	}
 
@@ -1158,6 +1336,8 @@
 
 	return 0;
 
+err4:
+	qed_ll2_dealloc_if(cdev);
 err3:
 	qed_hw_stop(cdev);
 err2:
@@ -1236,26 +1416,21 @@
 {
 	struct qed_hwfn *p_hwfn;
 	struct qed_ptt *p_ptt;
-	int hwfn_index;
 	u16 rel_sb_id;
-	u8 n_hwfns;
 	u32 rc;
 
-	/* RoCE uses single engine and CMT uses two engines. When using both
-	 * we force only a single engine. Storage uses only engine 0 too.
-	 */
-	if (type == QED_SB_TYPE_L2_QUEUE)
-		n_hwfns = cdev->num_hwfns;
-	else
-		n_hwfns = 1;
-
-	hwfn_index = sb_id % n_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / n_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	if (IS_PF(p_hwfn->cdev)) {
 		p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1274,20 +1449,26 @@
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-			  struct qed_sb_info *sb_info, u16 sb_id)
+			  struct qed_sb_info *sb_info,
+			  u16 sb_id,
+			  enum qed_sb_type type)
 {
 	struct qed_hwfn *p_hwfn;
-	int hwfn_index;
 	u16 rel_sb_id;
 	u32 rc;
 
-	hwfn_index = sb_id % cdev->num_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / cdev->num_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id);
 
@@ -1304,6 +1485,7 @@
 	struct qed_hwfn *hwfn;
 	struct qed_mcp_link_params *link_params;
 	struct qed_ptt *ptt;
+	u32 sup_caps;
 	int rc;
 
 	if (!cdev)
@@ -1330,23 +1512,50 @@
 		link_params->speed.autoneg = params->autoneg;
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
 		link_params->speed.advertised_speeds = 0;
-		if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
-		    (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
+		sup_caps = QED_LM_1000baseT_Full_BIT |
+			   QED_LM_1000baseKX_Full_BIT |
+			   QED_LM_1000baseX_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
-		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
+		sup_caps = QED_LM_10000baseT_Full_BIT |
+			   QED_LM_10000baseKR_Full_BIT |
+			   QED_LM_10000baseKX4_Full_BIT |
+			   QED_LM_10000baseR_FEC_BIT |
+			   QED_LM_10000baseCR_Full_BIT |
+			   QED_LM_10000baseSR_Full_BIT |
+			   QED_LM_10000baseLR_Full_BIT |
+			   QED_LM_10000baseLRM_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
-		if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
+		if (params->adv_speeds & QED_LM_20000baseKR2_Full_BIT)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G;
+		sup_caps = QED_LM_25000baseKR_Full_BIT |
+			   QED_LM_25000baseCR_Full_BIT |
+			   QED_LM_25000baseSR_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
-		if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
+		sup_caps = QED_LM_40000baseLR4_Full_BIT |
+			   QED_LM_40000baseKR4_Full_BIT |
+			   QED_LM_40000baseCR4_Full_BIT |
+			   QED_LM_40000baseSR4_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
-			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
-		if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		sup_caps = QED_LM_50000baseKR2_Full_BIT |
+			   QED_LM_50000baseCR2_Full_BIT |
+			   QED_LM_50000baseSR2_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
-		if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
+		sup_caps = QED_LM_100000baseKR4_Full_BIT |
+			   QED_LM_100000baseSR4_Full_BIT |
+			   QED_LM_100000baseCR4_Full_BIT |
+			   QED_LM_100000baseLR4_ER4_Full_BIT;
+		if (params->adv_speeds & sup_caps)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
 	}
@@ -1459,12 +1668,154 @@
 	return 0;
 }
 
+static void qed_fill_link_capability(struct qed_hwfn *hwfn,
+				     struct qed_ptt *ptt, u32 capability,
+				     u32 *if_capability)
+{
+	u32 media_type, tcvr_state, tcvr_type;
+	u32 speed_mask, board_cfg;
+
+	if (qed_mcp_get_media_type(hwfn, ptt, &media_type))
+		media_type = MEDIA_UNSPECIFIED;
+
+	if (qed_mcp_get_transceiver_data(hwfn, ptt, &tcvr_state, &tcvr_type))
+		tcvr_type = ETH_TRANSCEIVER_STATE_UNPLUGGED;
+
+	if (qed_mcp_trans_speed_mask(hwfn, ptt, &speed_mask))
+		speed_mask = 0xFFFFFFFF;
+
+	if (qed_mcp_get_board_config(hwfn, ptt, &board_cfg))
+		board_cfg = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+
+	DP_VERBOSE(hwfn->cdev, NETIF_MSG_DRV,
+		   "Media_type = 0x%x tcvr_state = 0x%x tcvr_type = 0x%x speed_mask = 0x%x board_cfg = 0x%x\n",
+		   media_type, tcvr_state, tcvr_type, speed_mask, board_cfg);
+
+	switch (media_type) {
+	case MEDIA_DA_TWINAX:
+		*if_capability |= QED_LM_FIBRE_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		/* For DAC media multiple speed capabilities are supported*/
+		capability = capability & speed_mask;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseCR4_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseCR2_Full_BIT;
+		if (capability &
+			NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseCR4_Full_BIT;
+		break;
+	case MEDIA_BASE_T:
+		*if_capability |= QED_LM_TP_BIT;
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) {
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			}
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+			}
+		}
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) {
+			*if_capability |= QED_LM_FIBRE_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET)
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET)
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+		}
+		break;
+	case MEDIA_SFP_1G_FIBER:
+	case MEDIA_SFPP_10G_FIBER:
+	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
+		*if_capability |= QED_LM_FIBRE_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+			if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) ||
+			    (tcvr_type == ETH_TRANSCEIVER_TYPE_1G_SX))
+				*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_SR)
+				*if_capability |= QED_LM_10000baseSR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LR)
+				*if_capability |= QED_LM_10000baseLR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LRM)
+				*if_capability |= QED_LM_10000baseLRM_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_ER)
+				*if_capability |= QED_LM_10000baseR_FEC_BIT;
+		}
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_25G_SR)
+				*if_capability |= QED_LM_25000baseSR_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_LR4)
+				*if_capability |= QED_LM_40000baseLR4_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_SR4)
+				*if_capability |= QED_LM_40000baseSR4_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_100G_SR4)
+				*if_capability |= QED_LM_100000baseSR4_Full_BIT;
+		}
+
+		break;
+	case MEDIA_KR:
+		*if_capability |= QED_LM_Backplane_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseKR4_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseKR4_Full_BIT;
+		break;
+	case MEDIA_UNSPECIFIED:
+	case MEDIA_NOT_PRESENT:
+		DP_VERBOSE(hwfn->cdev, QED_MSG_DEBUG,
+			   "Unknown media and transceiver type;\n");
+		break;
+	}
+}
+
 static void qed_fill_link(struct qed_hwfn *hwfn,
+			  struct qed_ptt *ptt,
 			  struct qed_link_output *if_link)
 {
+	struct qed_mcp_link_capabilities link_caps;
 	struct qed_mcp_link_params params;
 	struct qed_mcp_link_state link;
-	struct qed_mcp_link_capabilities link_caps;
 	u32 media_type;
 
 	memset(if_link, 0, sizeof(*if_link));
@@ -1480,7 +1831,6 @@
 		if_link->link_up = true;
 
 	/* TODO - at the moment assume supported and advertised speed equal */
-	if_link->supported_caps = QED_LM_FIBRE_BIT;
 	if (link_caps.default_speed_autoneg)
 		if_link->supported_caps |= QED_LM_Autoneg_BIT;
 	if (params.pause.autoneg ||
@@ -1495,52 +1845,20 @@
 		if_link->advertised_caps |= QED_LM_Autoneg_BIT;
 	else
 		if_link->advertised_caps &= ~QED_LM_Autoneg_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
 
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
+	/* Fill link advertised capability*/
+	qed_fill_link_capability(hwfn, ptt, params.speed.advertised_speeds,
+				 &if_link->advertised_caps);
+	/* Fill link supported capability*/
+	qed_fill_link_capability(hwfn, ptt, link_caps.speed_capabilities,
+				 &if_link->supported_caps);
 
 	if (link.link_up)
 		if_link->speed = link.speed;
 
 	/* TODO - fill duplex properly */
 	if_link->duplex = DUPLEX_FULL;
-	qed_mcp_get_media_type(hwfn->cdev, &media_type);
+	qed_mcp_get_media_type(hwfn, ptt, &media_type);
 	if_link->port = qed_get_port_type(media_type);
 
 	if_link->autoneg = params.speed.autoneg;
@@ -1553,12 +1871,13 @@
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
 	/* Link partner capabilities */
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
-		if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_1G_FD)
 		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
 		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_20G)
+		if_link->lp_caps |= QED_LM_20000baseKR2_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G)
 		if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G)
@@ -1596,21 +1915,34 @@
 static void qed_get_current_link(struct qed_dev *cdev,
 				 struct qed_link_output *if_link)
 {
+	struct qed_hwfn *hwfn;
+	struct qed_ptt *ptt;
 	int i;
 
-	qed_fill_link(&cdev->hwfns[0], if_link);
+	hwfn = &cdev->hwfns[0];
+	if (IS_PF(cdev)) {
+		ptt = qed_ptt_acquire(hwfn);
+		if (ptt) {
+			qed_fill_link(hwfn, ptt, if_link);
+			qed_ptt_release(hwfn, ptt);
+		} else {
+			DP_NOTICE(hwfn, "Failed to fill link; No PTT\n");
+		}
+	} else {
+		qed_fill_link(hwfn, NULL, if_link);
+	}
 
 	for_each_hwfn(cdev, i)
 		qed_inform_vf_link_state(&cdev->hwfns[i]);
 }
 
-void qed_link_update(struct qed_hwfn *hwfn)
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt)
 {
 	void *cookie = hwfn->cdev->ops_cookie;
 	struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common;
 	struct qed_link_output if_link;
 
-	qed_fill_link(hwfn, &if_link);
+	qed_fill_link(hwfn, ptt, &if_link);
 	qed_inform_vf_link_state(hwfn);
 
 	if (IS_LEAD_HWFN(hwfn) && cookie)
@@ -1791,21 +2123,30 @@
  * 0B  |                       0x3 [command index]                            |
  * 4B  | b'0: check_response?   | b'1-31  reserved                            |
  * 8B  | File-type |                   reserved                               |
+ * 12B |                    Image length in bytes                             |
  *     \----------------------------------------------------------------------/
  *     Start a new file of the provided type
  */
 static int qed_nvm_flash_image_file_start(struct qed_dev *cdev,
 					  const u8 **data, bool *check_resp)
 {
+	u32 file_type, file_size = 0;
 	int rc;
 
 	*data += 4;
 	*check_resp = !!(**data & BIT(0));
 	*data += 4;
+	file_type = **data;
 
 	DP_VERBOSE(cdev, NETIF_MSG_DRV,
-		   "About to start a new file of type %02x\n", **data);
-	rc = qed_mcp_nvm_put_file_begin(cdev, **data);
+		   "About to start a new file of type %02x\n", file_type);
+	if (file_type == DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MBI) {
+		*data += 4;
+		file_size = *((u32 *)(*data));
+	}
+
+	rc = qed_mcp_nvm_write(cdev, QED_PUT_FILE_BEGIN, file_type,
+			       (u8 *)(&file_size), 4);
 	*data += 4;
 
 	return rc;
@@ -1895,6 +2236,135 @@
 	return 0;
 }
 
+/* Binary file format -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       0x5 [command index]                            |
+ * 4B  | Number of config attributes     |          Reserved                  |
+ * 4B  | Config ID                       | Entity ID      | Length            |
+ * 4B  | Value                                                                |
+ *     |                                                                      |
+ *     \----------------------------------------------------------------------/
+ * There can be several cfg_id-entity_id-Length-Value sets as specified by
+ * 'Number of config attributes'.
+ *
+ * The API parses config attributes from the user provided buffer and flashes
+ * them to the respective NVM path using Management FW inerface.
+ */
+static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 entity_id, len, buf[32];
+	bool need_nvm_init = true;
+	struct qed_ptt *ptt;
+	u16 cfg_id, count;
+	int rc = 0, i;
+	u32 flags;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	/* NVM CFG ID attribute header */
+	*data += 4;
+	count = *((u16 *)*data);
+	*data += 4;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Read config ids: num_attrs = %0d\n", count);
+	/* NVM CFG ID attributes. Start loop index from 1 to avoid additional
+	 * arithmetic operations in the implementation.
+	 */
+	for (i = 1; i <= count; i++) {
+		cfg_id = *((u16 *)*data);
+		*data += 2;
+		entity_id = **data;
+		(*data)++;
+		len = **data;
+		(*data)++;
+		memcpy(buf, *data, len);
+		*data += len;
+
+		flags = 0;
+		if (need_nvm_init) {
+			flags |= QED_NVM_CFG_OPTION_INIT;
+			need_nvm_init = false;
+		}
+
+		/* Commit to flash and free the resources */
+		if (!(i % QED_NVM_CFG_MAX_ATTRS) || i == count) {
+			flags |= QED_NVM_CFG_OPTION_COMMIT |
+				 QED_NVM_CFG_OPTION_FREE;
+			need_nvm_init = true;
+		}
+
+		if (entity_id)
+			flags |= QED_NVM_CFG_OPTION_ENTITY_SEL;
+
+		DP_VERBOSE(cdev, NETIF_MSG_DRV,
+			   "cfg_id = %d entity = %d len = %d\n", cfg_id,
+			   entity_id, len);
+		rc = qed_mcp_nvm_set_cfg(hwfn, ptt, cfg_id, entity_id, flags,
+					 buf, len);
+		if (rc) {
+			DP_ERR(cdev, "Error %d configuring %d\n", rc, cfg_id);
+			break;
+		}
+	}
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+#define QED_MAX_NVM_BUF_LEN	32
+static int qed_nvm_flash_cfg_len(struct qed_dev *cdev, u32 cmd)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 buf[QED_MAX_NVM_BUF_LEN];
+	struct qed_ptt *ptt;
+	u32 len;
+	int rc;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return QED_MAX_NVM_BUF_LEN;
+
+	rc = qed_mcp_nvm_get_cfg(hwfn, ptt, cmd, 0, QED_NVM_CFG_GET_FLAGS, buf,
+				 &len);
+	if (rc || !len) {
+		DP_ERR(cdev, "Error %d reading %d\n", rc, cmd);
+		len = QED_MAX_NVM_BUF_LEN;
+	}
+
+	qed_ptt_release(hwfn, ptt);
+
+	return len;
+}
+
+static int qed_nvm_flash_cfg_read(struct qed_dev *cdev, u8 **data,
+				  u32 cmd, u32 entity_id)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	u32 flags, len;
+	int rc = 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Read config cmd = %d entity id %d\n", cmd, entity_id);
+	flags = entity_id ? QED_NVM_CFG_GET_PF_FLAGS : QED_NVM_CFG_GET_FLAGS;
+	rc = qed_mcp_nvm_get_cfg(hwfn, ptt, cmd, entity_id, flags, *data, &len);
+	if (rc)
+		DP_ERR(cdev, "Error %d reading %d\n", rc, cmd);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static int qed_nvm_flash(struct qed_dev *cdev, const char *name)
 {
 	const struct firmware *image;
@@ -1936,6 +2406,9 @@
 			rc = qed_nvm_flash_image_access(cdev, &data,
 							&check_resp);
 			break;
+		case QED_NVM_FLASH_CMD_NVM_CFG_ID:
+			rc = qed_nvm_flash_cfg_write(cdev, &data);
+			break;
 		default:
 			DP_ERR(cdev, "Unknown command %08x\n", cmd_type);
 			rc = -EINVAL;
@@ -1986,6 +2459,15 @@
 	return qed_mcp_get_nvm_image(hwfn, type, buf, len);
 }
 
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
+{
+	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+	void *cookie = p_hwfn->cdev->ops_cookie;
+
+	if (ops && ops->schedule_recovery_handler)
+		ops->schedule_recovery_handler(cookie);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
@@ -2009,6 +2491,23 @@
 	return status;
 }
 
+static int qed_recovery_process(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	rc = qed_start_recovery_process(p_hwfn, p_ptt);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static int qed_update_wol(struct qed_dev *cdev, bool enabled)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -2125,6 +2624,31 @@
 	return rc;
 }
 
+static int qed_set_grc_config(struct qed_dev *cdev, u32 cfg_id, u32 val)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_dbg_grc_config(hwfn, ptt, cfg_id, val);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
+static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev)
+{
+	return QED_AFFIN_HWFN_IDX(cdev);
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -2163,11 +2687,19 @@
 	.nvm_get_image = &qed_nvm_get_image,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
+	.recovery_process = &qed_recovery_process,
+	.recovery_prolog = &qed_recovery_prolog,
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
 	.update_wol = &qed_update_wol,
+	.db_recovery_add = &qed_db_recovery_add,
+	.db_recovery_del = &qed_db_recovery_del,
 	.read_module_eeprom = &qed_read_module_eeprom,
+	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
+	.read_nvm_cfg = &qed_nvm_flash_cfg_read,
+	.read_nvm_cfg_len = &qed_nvm_flash_cfg_len,
+	.set_grc_config = &qed_set_grc_config,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 58c7eb9..36ddb89 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c

@@ -1070,6 +1070,27 @@
 	return 0;
 }
 
+int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 resp = 0, param = 0;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_DONE, 0, &resp,
+			 &param);
+	if (rc) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to send a LOAD_DONE command, rc = %d\n", rc);
+		return rc;
+	}
+
+	/* Check if there is a DID mismatch between nvm-cfg/efuse */
+	if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
+		DP_NOTICE(p_hwfn,
+			  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
+
+	return 0;
+}
+
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_mb_params mb_params;
@@ -1247,6 +1268,52 @@
 		p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV;
 }
 
+static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct public_func *p_data, int pfid)
+{
+	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+					PUBLIC_FUNC);
+	u32 mfw_path_offsize = qed_rd(p_hwfn, p_ptt, addr);
+	u32 func_addr;
+	u32 i, size;
+
+	func_addr = SECTION_ADDR(mfw_path_offsize, pfid);
+	memset(p_data, 0, sizeof(*p_data));
+
+	size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
+	for (i = 0; i < size / sizeof(u32); i++)
+		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
+					    func_addr + (i << 2));
+	return size;
+}
+
+static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
+				  struct public_func *p_shmem_info)
+{
+	struct qed_mcp_function_info *p_info;
+
+	p_info = &p_hwfn->mcp_info->func_info;
+
+	p_info->bandwidth_min = QED_MFW_GET_FIELD(p_shmem_info->config,
+						  FUNC_MF_CFG_MIN_BW);
+	if (p_info->bandwidth_min < 1 || p_info->bandwidth_min > 100) {
+		DP_INFO(p_hwfn,
+			"bandwidth minimum out of bounds [%02x]. Set to 1\n",
+			p_info->bandwidth_min);
+		p_info->bandwidth_min = 1;
+	}
+
+	p_info->bandwidth_max = QED_MFW_GET_FIELD(p_shmem_info->config,
+						  FUNC_MF_CFG_MAX_BW);
+	if (p_info->bandwidth_max < 1 || p_info->bandwidth_max > 100) {
+		DP_INFO(p_hwfn,
+			"bandwidth maximum out of bounds [%02x]. Set to 100\n",
+			p_info->bandwidth_max);
+		p_info->bandwidth_max = 100;
+	}
+}
+
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt, bool b_reset)
 {
@@ -1274,10 +1341,29 @@
 		goto out;
 	}
 
-	if (p_hwfn->b_drv_link_init)
-		p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
-	else
+	if (p_hwfn->b_drv_link_init) {
+		/* Link indication with modern MFW arrives as per-PF
+		 * indication.
+		 */
+		if (p_hwfn->mcp_info->capabilities &
+		    FW_MB_PARAM_FEATURE_SUPPORT_VLINK) {
+			struct public_func shmem_info;
+
+			qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+					       MCP_PF_ID(p_hwfn));
+			p_link->link_up = !!(shmem_info.status &
+					     FUNC_STATUS_VIRTUAL_LINK_UP);
+			qed_read_pf_bandwidth(p_hwfn, &shmem_info);
+			DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+				   "Virtual link_up = %d\n", p_link->link_up);
+		} else {
+			p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
+			DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
+				   "Physical link_up = %d\n", p_link->link_up);
+		}
+	} else {
 		p_link->link_up = false;
+	}
 
 	p_link->full_duplex = true;
 	switch ((status & LINK_STATUS_SPEED_AND_DUPLEX_MASK)) {
@@ -1382,7 +1468,7 @@
 	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
 		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
 
-	qed_link_update(p_hwfn);
+	qed_link_update(p_hwfn, p_ptt);
 out:
 	spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
 }
@@ -1463,6 +1549,60 @@
 	return 0;
 }
 
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
+{
+	u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+						 PUBLIC_PATH);
+	path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr);
+	path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn));
+
+	proc_kill_cnt = qed_rd(p_hwfn, p_ptt,
+			       path_addr +
+			       offsetof(struct public_path, process_kill)) &
+			PROCESS_KILL_COUNTER_MASK;
+
+	return proc_kill_cnt;
+}
+
+static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 proc_kill_cnt;
+
+	/* Prevent possible attentions/interrupts during the recovery handling
+	 * and till its load phase, during which they will be re-enabled.
+	 */
+	qed_int_igu_disable_int(p_hwfn, p_ptt);
+
+	DP_NOTICE(p_hwfn, "Received a process kill indication\n");
+
+	/* The following operations should be done once, and thus in CMT mode
+	 * are carried out by only the first HW function.
+	 */
+	if (p_hwfn != QED_LEADING_HWFN(cdev))
+		return;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Ignoring the indication since a recovery process is already in progress\n");
+		return;
+	}
+
+	cdev->recov_in_prog = true;
+
+	proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt);
+	DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt);
+
+	qed_schedule_recovery_handler(p_hwfn);
+}
+
 static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
 					struct qed_ptt *p_ptt,
 					enum MFW_DRV_MSG_TYPE type)
@@ -1504,53 +1644,6 @@
 	qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 }
 
-static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
-				  struct public_func *p_shmem_info)
-{
-	struct qed_mcp_function_info *p_info;
-
-	p_info = &p_hwfn->mcp_info->func_info;
-
-	p_info->bandwidth_min = (p_shmem_info->config &
-				 FUNC_MF_CFG_MIN_BW_MASK) >>
-					FUNC_MF_CFG_MIN_BW_SHIFT;
-	if (p_info->bandwidth_min < 1 || p_info->bandwidth_min > 100) {
-		DP_INFO(p_hwfn,
-			"bandwidth minimum out of bounds [%02x]. Set to 1\n",
-			p_info->bandwidth_min);
-		p_info->bandwidth_min = 1;
-	}
-
-	p_info->bandwidth_max = (p_shmem_info->config &
-				 FUNC_MF_CFG_MAX_BW_MASK) >>
-					FUNC_MF_CFG_MAX_BW_SHIFT;
-	if (p_info->bandwidth_max < 1 || p_info->bandwidth_max > 100) {
-		DP_INFO(p_hwfn,
-			"bandwidth maximum out of bounds [%02x]. Set to 100\n",
-			p_info->bandwidth_max);
-		p_info->bandwidth_max = 100;
-	}
-}
-
-static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
-				  struct qed_ptt *p_ptt,
-				  struct public_func *p_data, int pfid)
-{
-	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
-					PUBLIC_FUNC);
-	u32 mfw_path_offsize = qed_rd(p_hwfn, p_ptt, addr);
-	u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid);
-	u32 i, size;
-
-	memset(p_data, 0, sizeof(*p_data));
-
-	size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
-	for (i = 0; i < size / sizeof(u32); i++)
-		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
-					    func_addr + (i << 2));
-	return size;
-}
-
 static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_function_info *p_info;
@@ -1601,7 +1694,7 @@
 		qed_sp_pf_update_stag(p_hwfn);
 	}
 
-	DP_VERBOSE(p_hwfn, QED_MSG_SP, "ovlan  = %d hw_mode = 0x%x\n",
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "ovlan = %d hw_mode = 0x%x\n",
 		   p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode);
 
 	/* Acknowledge the MFW */
@@ -1623,7 +1716,9 @@
 	val = (port_cfg & OEM_CFG_CHANNEL_TYPE_MASK) >>
 		OEM_CFG_CHANNEL_TYPE_OFFSET;
 	if (val != OEM_CFG_CHANNEL_TYPE_STAGGED)
-		DP_NOTICE(p_hwfn, "Incorrect UFP Channel type  %d\n", val);
+		DP_NOTICE(p_hwfn,
+			  "Incorrect UFP Channel type  %d port_id 0x%02x\n",
+			  val, MFW_PORT(p_hwfn));
 
 	val = (port_cfg & OEM_CFG_SCHED_TYPE_MASK) >> OEM_CFG_SCHED_TYPE_OFFSET;
 	if (val == OEM_CFG_SCHED_TYPE_ETS) {
@@ -1632,7 +1727,9 @@
 		p_hwfn->ufp_info.mode = QED_UFP_MODE_VNIC_BW;
 	} else {
 		p_hwfn->ufp_info.mode = QED_UFP_MODE_UNKNOWN;
-		DP_NOTICE(p_hwfn, "Unknown UFP scheduling mode %d\n", val);
+		DP_NOTICE(p_hwfn,
+			  "Unknown UFP scheduling mode %d port_id 0x%02x\n",
+			  val, MFW_PORT(p_hwfn));
 	}
 
 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
@@ -1647,13 +1744,15 @@
 		p_hwfn->ufp_info.pri_type = QED_UFP_PRI_OS;
 	} else {
 		p_hwfn->ufp_info.pri_type = QED_UFP_PRI_UNKNOWN;
-		DP_NOTICE(p_hwfn, "Unknown Host priority control %d\n", val);
+		DP_NOTICE(p_hwfn,
+			  "Unknown Host priority control %d port_id 0x%02x\n",
+			  val, MFW_PORT(p_hwfn));
 	}
 
 	DP_NOTICE(p_hwfn,
-		  "UFP shmem config: mode = %d tc = %d pri_type = %d\n",
-		  p_hwfn->ufp_info.mode,
-		  p_hwfn->ufp_info.tc, p_hwfn->ufp_info.pri_type);
+		  "UFP shmem config: mode = %d tc = %d pri_type = %d port_id 0x%02x\n",
+		  p_hwfn->ufp_info.mode, p_hwfn->ufp_info.tc,
+		  p_hwfn->ufp_info.pri_type, MFW_PORT(p_hwfn));
 }
 
 static int
@@ -1734,6 +1833,9 @@
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_ERROR_RECOVERY:
+			qed_mcp_handle_process_kill(p_hwfn, p_ptt);
+			break;
 		case MFW_DRV_MSG_GET_LAN_STATS:
 		case MFW_DRV_MSG_GET_FCOE_STATS:
 		case MFW_DRV_MSG_GET_ISCSI_STATS:
@@ -1849,12 +1951,12 @@
 	return 0;
 }
 
-int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u32 *p_media_type)
 {
-	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
-	struct qed_ptt  *p_ptt;
+	*p_media_type = MEDIA_UNSPECIFIED;
 
-	if (IS_VF(cdev))
+	if (IS_VF(p_hwfn->cdev))
 		return -EINVAL;
 
 	if (!qed_mcp_is_init(p_hwfn)) {
@@ -1862,16 +1964,198 @@
 		return -EBUSY;
 	}
 
-	*p_media_type = MEDIA_UNSPECIFIED;
+	if (!p_ptt) {
+		*p_media_type = MEDIA_UNSPECIFIED;
+		return -EINVAL;
+	}
 
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt)
+	*p_media_type = qed_rd(p_hwfn, p_ptt,
+			       p_hwfn->mcp_info->port_addr +
+			       offsetof(struct public_port,
+					media_type));
+
+	return 0;
+}
+
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_transceiver_type)
+{
+	u32 transceiver_info;
+
+	*p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
+	*p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
+	}
 
-	*p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
-			       offsetof(struct public_port, media_type));
+	transceiver_info = qed_rd(p_hwfn, p_ptt,
+				  p_hwfn->mcp_info->port_addr +
+				  offsetof(struct public_port,
+					   transceiver_data));
 
-	qed_ptt_release(p_hwfn, p_ptt);
+	*p_transceiver_state = (transceiver_info &
+				ETH_TRANSCEIVER_STATE_MASK) >>
+				ETH_TRANSCEIVER_STATE_OFFSET;
+
+	if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
+		*p_transceiver_type = (transceiver_info &
+				       ETH_TRANSCEIVER_TYPE_MASK) >>
+				       ETH_TRANSCEIVER_TYPE_OFFSET;
+	else
+		*p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN;
+
+	return 0;
+}
+static bool qed_is_transceiver_ready(u32 transceiver_state,
+				     u32 transceiver_type)
+{
+	if ((transceiver_state & ETH_TRANSCEIVER_STATE_PRESENT) &&
+	    ((transceiver_state & ETH_TRANSCEIVER_STATE_UPDATING) == 0x0) &&
+	    (transceiver_type != ETH_TRANSCEIVER_TYPE_NONE))
+		return true;
+
+	return false;
+}
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask)
+{
+	u32 transceiver_type, transceiver_state;
+	int ret;
+
+	ret = qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+					   &transceiver_type);
+	if (ret)
+		return ret;
+
+	if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
+				     false)
+		return -EINVAL;
+
+	switch (transceiver_type) {
+	case ETH_TRANSCEIVER_TYPE_1G_LX:
+	case ETH_TRANSCEIVER_TYPE_1G_SX:
+	case ETH_TRANSCEIVER_TYPE_1G_PCC:
+	case ETH_TRANSCEIVER_TYPE_1G_ACC:
+	case ETH_TRANSCEIVER_TYPE_1000BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_SR:
+	case ETH_TRANSCEIVER_TYPE_10G_LR:
+	case ETH_TRANSCEIVER_TYPE_10G_LRM:
+	case ETH_TRANSCEIVER_TYPE_10G_ER:
+	case ETH_TRANSCEIVER_TYPE_10G_PCC:
+	case ETH_TRANSCEIVER_TYPE_10G_ACC:
+	case ETH_TRANSCEIVER_TYPE_4x10G:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_LR4:
+	case ETH_TRANSCEIVER_TYPE_40G_SR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_AOC:
+	case ETH_TRANSCEIVER_TYPE_100G_SR4:
+	case ETH_TRANSCEIVER_TYPE_100G_LR4:
+	case ETH_TRANSCEIVER_TYPE_100G_ER4:
+	case ETH_TRANSCEIVER_TYPE_100G_ACC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_SR:
+	case ETH_TRANSCEIVER_TYPE_25G_LR:
+	case ETH_TRANSCEIVER_TYPE_25G_AOC:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_S:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_M:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_L:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_CA_N:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_S:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_L:
+	case ETH_TRANSCEIVER_TYPE_4x25G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_XLPPI:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	default:
+		DP_INFO(p_hwfn, "Unknown transceiver type 0x%x\n",
+			transceiver_type);
+		*p_speed_mask = 0xff;
+		break;
+	}
+
+	return 0;
+}
+
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config)
+{
+	u32 nvm_cfg_addr, nvm_cfg1_offset, port_cfg_addr;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
+		return -EBUSY;
+	}
+	if (!p_ptt) {
+		*p_board_config = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+		return -EINVAL;
+	}
+
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
+	*p_board_config = qed_rd(p_hwfn, p_ptt,
+				 port_cfg_addr +
+				 offsetof(struct nvm_cfg1_port,
+					  board_cfg));
 
 	return 0;
 }
@@ -2097,6 +2381,43 @@
 	return 0;
 }
 
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Avoid triggering a recovery since such a process is already in progress\n");
+		return -EAGAIN;
+	}
+
+	DP_NOTICE(p_hwfn, "Triggering a recovery process\n");
+	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1);
+
+	return 0;
+}
+
+#define QED_RECOVERY_PROLOG_SLEEP_MS    100
+
+int qed_recovery_prolog(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
+	int rc;
+
+	/* Allow ongoing PCIe transactions to complete */
+	msleep(QED_RECOVERY_PROLOG_SLEEP_MS);
+
+	/* Clear the PF's internal FID_enable in the PXP */
+	rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
+	if (rc)
+		DP_NOTICE(p_hwfn,
+			  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
+			  rc);
+
+	return rc;
+}
+
 static int
 qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 vf_id, u8 num)
@@ -2539,24 +2860,6 @@
 	return 0;
 }
 
-int qed_mcp_nvm_put_file_begin(struct qed_dev *cdev, u32 addr)
-{
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt;
-	u32 resp, param;
-	int rc;
-
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt)
-		return -EBUSY;
-	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_PUT_FILE_BEGIN, addr,
-			 &resp, &param);
-	cdev->mcp_nvm_resp = resp;
-	qed_ptt_release(p_hwfn, p_ptt);
-
-	return rc;
-}
-
 int qed_mcp_nvm_write(struct qed_dev *cdev,
 		      u32 cmd, u32 addr, u8 *p_buf, u32 len)
 {
@@ -2570,6 +2873,9 @@
 		return -EBUSY;
 
 	switch (cmd) {
+	case QED_PUT_FILE_BEGIN:
+		nvm_cmd = DRV_MSG_CODE_NVM_PUT_FILE_BEGIN;
+		break;
 	case QED_PUT_FILE_DATA:
 		nvm_cmd = DRV_MSG_CODE_NVM_PUT_FILE_DATA;
 		break;
@@ -2582,10 +2888,14 @@
 		goto out;
 	}
 
+	buf_size = min_t(u32, (len - buf_idx), MCP_DRV_NVM_BUF_LEN);
 	while (buf_idx < len) {
-		buf_size = min_t(u32, (len - buf_idx), MCP_DRV_NVM_BUF_LEN);
-		nvm_offset = ((buf_size << DRV_MB_PARAM_NVM_LEN_OFFSET) |
-			      addr) + buf_idx;
+		if (cmd == QED_PUT_FILE_BEGIN)
+			nvm_offset = addr;
+		else
+			nvm_offset = ((buf_size <<
+				       DRV_MB_PARAM_NVM_LEN_OFFSET) | addr) +
+				       buf_idx;
 		rc = qed_mcp_nvm_wr_cmd(p_hwfn, p_ptt, nvm_cmd, nvm_offset,
 					&resp, &param, buf_size,
 					(u32 *)&p_buf[buf_idx]);
@@ -2610,7 +2920,19 @@
 		if (buf_idx % 0x1000 > (buf_idx + buf_size) % 0x1000)
 			usleep_range(1000, 2000);
 
-		buf_idx += buf_size;
+		/* For MBI upgrade, MFW response includes the next buffer offset
+		 * to be delivered to MFW.
+		 */
+		if (param && cmd == QED_PUT_FILE_DATA) {
+			buf_idx = QED_MFW_GET_FIELD(param,
+					FW_MB_PARAM_NVM_PUT_FILE_REQ_OFFSET);
+			buf_size = QED_MFW_GET_FIELD(param,
+					 FW_MB_PARAM_NVM_PUT_FILE_REQ_SIZE);
+		} else {
+			buf_idx += buf_size;
+			buf_size = min_t(u32, (len - buf_idx),
+					 MCP_DRV_NVM_BUF_LEN);
+		}
 	}
 
 	cdev->mcp_nvm_resp = resp;
@@ -3332,6 +3654,12 @@
 	}
 }
 
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn)
+{
+	return !!(p_hwfn->mcp_info->capabilities &
+		  FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ);
+}
+
 int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 mcp_resp;
@@ -3351,8 +3679,135 @@
 {
 	u32 mcp_resp, mcp_param, features;
 
-	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE;
+	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE |
+		   DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK;
 
 	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
 			   features, &mcp_resp, &mcp_param);
 }
+
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 fir_valid, l2_valid;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_engine_config command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	fir_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID);
+	if (fir_valid)
+		cdev->fir_affin =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE);
+
+	l2_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				     FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID);
+	if (l2_valid)
+		cdev->l2_affin_hint =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE);
+
+	DP_INFO(p_hwfn,
+		"Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n",
+		fir_valid, cdev->fir_affin, l2_valid, cdev->l2_affin_hint);
+
+	return 0;
+}
+
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_ppfid_bitmap command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	cdev->ppfid_bitmap = QED_MFW_GET_FIELD(mb_params.mcp_param,
+					       FW_MB_PARAM_PPFID_BITMAP);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "PPFID bitmap 0x%hhx\n",
+		   cdev->ppfid_bitmap);
+
+	return 0;
+}
+
+int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 *p_len)
+{
+	u32 mb_param = 0, resp, param;
+	int rc;
+
+	QED_MFW_SET_FIELD(mb_param, DRV_MB_PARAM_NVM_CFG_OPTION_ID, option_id);
+	if (flags & QED_NVM_CFG_OPTION_INIT)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_INIT, 1);
+	if (flags & QED_NVM_CFG_OPTION_FREE)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_FREE, 1);
+	if (flags & QED_NVM_CFG_OPTION_ENTITY_SEL) {
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_SEL, 1);
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_ID,
+				  entity_id);
+	}
+
+	rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				DRV_MSG_CODE_GET_NVM_CFG_OPTION,
+				mb_param, &resp, &param, p_len, (u32 *)p_buf);
+
+	return rc;
+}
+
+int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 len)
+{
+	u32 mb_param = 0, resp, param;
+
+	QED_MFW_SET_FIELD(mb_param, DRV_MB_PARAM_NVM_CFG_OPTION_ID, option_id);
+	if (flags & QED_NVM_CFG_OPTION_ALL)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ALL, 1);
+	if (flags & QED_NVM_CFG_OPTION_INIT)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_INIT, 1);
+	if (flags & QED_NVM_CFG_OPTION_COMMIT)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_COMMIT, 1);
+	if (flags & QED_NVM_CFG_OPTION_FREE)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_FREE, 1);
+	if (flags & QED_NVM_CFG_OPTION_ENTITY_SEL) {
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_SEL, 1);
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_ID,
+				  entity_id);
+	}
+
+	return qed_mcp_nvm_wr_cmd(p_hwfn, p_ptt,
+				  DRV_MSG_CODE_SET_NVM_CFG_OPTION,
+				  mb_param, &resp, &param, len, (u32 *)p_buf);
+}

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 85e6b39..9c4c276 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h

@@ -251,6 +251,12 @@
 	struct qed_mfw_tlv_iscsi iscsi;
 };
 
+#define QED_NVM_CFG_OPTION_ALL		BIT(0)
+#define QED_NVM_CFG_OPTION_INIT		BIT(1)
+#define QED_NVM_CFG_OPTION_COMMIT       BIT(2)
+#define QED_NVM_CFG_OPTION_FREE		BIT(3)
+#define QED_NVM_CFG_OPTION_ENTITY_SEL	BIT(4)
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -322,14 +328,61 @@
  * @brief Get media type value of the port.
  *
  * @param cdev      - qed dev pointer
+ * @param p_ptt
  * @param mfw_ver    - media type value
  *
  * @return int -
  *      0 - Operation was successul.
  *      -EBUSY - Operation failed
  */
-int qed_mcp_get_media_type(struct qed_dev      *cdev,
-			   u32                  *media_type);
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u32 *media_type);
+
+/**
+ * @brief Get transceiver data of the port.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_transceiver_state - transceiver state.
+ * @param p_transceiver_type - media type value
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_tranceiver_type);
+
+/**
+ * @brief Get transceiver supported speed mask.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_speed_mask - Bit mask of all supported speeds.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask);
+
+/**
+ * @brief Get board configuration.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_board_config - Board config.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config);
 
 /**
  * @brief General function for sending commands to the MCP
@@ -394,6 +447,38 @@
 			 struct qed_mcp_drv_version *p_ver);
 
 /**
+ * @brief Read the MFW process kill counter
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return u32
+ */
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt);
+
+/**
+ * @brief Trigger a recovery process
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief A recovery handler must call this function as its first step.
+ *        It is assumed that the handler is not run from an interrupt context.
+ *
+ *  @param cdev
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_recovery_prolog(struct qed_dev *cdev);
+
+/**
  * @brief Notify MFW about the change in base device properties
  *
  *  @param p_hwfn
@@ -496,16 +581,6 @@
 		      u32 cmd, u32 addr, u8 *p_buf, u32 len);
 
 /**
- * @brief Put file begin
- *
- *  @param cdev
- *  @param addr - nvm offset
- *
- * @return int - 0 - operation was successful.
- */
-int qed_mcp_nvm_put_file_begin(struct qed_dev *cdev, u32 addr);
-
-/**
  * @brief Check latest response
  *
  *  @param cdev
@@ -622,10 +697,6 @@
 					    rel_pfid)
 #define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
 
-#define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %			  \
-				 ((_p_hwfn)->cdev->num_ports_in_engine * \
-				  qed_device_num_engines((_p_hwfn)->cdev)))
-
 struct qed_mcp_info {
 	/* List for mailbox commands which were sent and wait for a response */
 	struct list_head			cmd_list;
@@ -764,6 +835,16 @@
 		     struct qed_load_req_params *p_params);
 
 /**
+ * @brief Sends a LOAD_DONE message to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return int - 0 - Operation was successful.
+ */
+int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
  * @brief Sends a UNLOAD_REQ message to the MFW
  *
  * @param p_hwfn
@@ -1069,6 +1150,16 @@
 				    struct qed_resc_unlock_params *p_unlock,
 				    enum qed_resc_lock
 				    resource, bool b_is_permanent);
+
+/**
+ * @brief - Return whether management firmware support smart AN
+ *
+ * @param p_hwfn
+ *
+ * @return bool - true if feature is supported.
+ */
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief Learn of supported MFW features; To be done during early init
  *
@@ -1101,4 +1192,49 @@
  */
 int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
 
+/**
+ * @brief Get the engine affinity configuration.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Get the PPFID bitmap.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Get NVM config attribute value.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param option_id
+ * @param entity_id
+ * @param flags
+ * @param p_buf
+ * @param p_len
+ */
+int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 *p_len);
+
+/**
+ * @brief Set NVM config attribute value.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param option_id
+ * @param entity_id
+ * @param flags
+ * @param p_buf
+ * @param len
+ */
+int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 len);
 #endif

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 6172354..ffac4ac 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c

@@ -211,9 +211,8 @@
 			if (!p_buffer)
 				break;
 
-			list_del(&p_buffer->list_entry);
-			list_add_tail(&p_buffer->list_entry,
-				      &p_ooo_info->free_buffers_list);
+			list_move_tail(&p_buffer->list_entry,
+				       &p_ooo_info->free_buffers_list);
 		}
 		list_add_tail(&p_isle->list_entry,
 			      &p_ooo_info->free_isles_list);
@@ -247,9 +246,8 @@
 				if (!p_buffer)
 					break;
 
-			list_del(&p_buffer->list_entry);
-				list_add_tail(&p_buffer->list_entry,
-					      &p_ooo_info->free_buffers_list);
+				list_move_tail(&p_buffer->list_entry,
+					       &p_ooo_info->free_buffers_list);
 			}
 			list_add_tail(&p_isle->list_entry,
 				      &p_ooo_info->free_isles_list);
@@ -353,11 +351,9 @@
 			  struct qed_ooo_info *p_ooo_info,
 			  u32 cid, u8 drop_isle, u8 drop_size)
 {
-	struct qed_ooo_archipelago *p_archipelago = NULL;
 	struct qed_ooo_isle *p_isle = NULL;
 	u8 isle_idx;
 
-	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
 	for (isle_idx = 0; isle_idx < drop_size; isle_idx++) {
 		p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, drop_isle);
 		if (!p_isle) {
@@ -462,7 +458,6 @@
 void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
 			struct qed_ooo_info *p_ooo_info, u32 cid, u8 left_isle)
 {
-	struct qed_ooo_archipelago *p_archipelago = NULL;
 	struct qed_ooo_isle *p_right_isle = NULL;
 	struct qed_ooo_isle *p_left_isle = NULL;
 
@@ -475,7 +470,6 @@
 		return;
 	}
 
-	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
 	list_del(&p_right_isle->list_entry);
 	p_ooo_info->cur_isles_number--;
 	if (left_isle) {

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 5a90d69..0dacf2c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c

@@ -44,10 +44,12 @@
 /* Add/subtract the Adjustment_Value when making a Drift adjustment */
 #define QED_DRIFT_CNTR_DIRECTION_SHIFT		31
 #define QED_TIMESTAMP_MASK			BIT(16)
+/* Param mask for Hardware to detect/timestamp the unicast PTP packets */
+#define QED_PTP_UCAST_PARAM_MASK		0xF
 
 static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn)
 {
-	switch (qed_device_get_port_id(p_hwfn->cdev)) {
+	switch (MFW_PORT(p_hwfn)) {
 	case 0:
 		return QED_RESC_LOCK_PTP_PORT0;
 	case 1:
@@ -157,7 +159,8 @@
 	*timestamp = 0;
 	val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID);
 	if (!(val & QED_TIMESTAMP_MASK)) {
-		DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val);
+		DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+			   "Invalid Tx timestamp, buf_seqid = %08x\n", val);
 		return -EINVAL;
 	}
 
@@ -242,7 +245,8 @@
 		return -EINVAL;
 	}
 
-	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0);
+	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK,
+	       QED_PTP_UCAST_PARAM_MASK);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, rule_mask);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, enable_cfg);
 
@@ -252,7 +256,8 @@
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF);
 	} else {
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, enable_cfg);
-		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0);
+		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK,
+		       QED_PTP_UCAST_PARAM_MASK);
 		qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, rule_mask);
 	}
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 7873d6d..38b1f40 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c

@@ -442,7 +442,7 @@
 	/* Vendor specific information */
 	dev->vendor_id = cdev->vendor_id;
 	dev->vendor_part_id = cdev->device_id;
-	dev->hw_ver = 0;
+	dev->hw_ver = cdev->chip_rev;
 	dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) |
 		      (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION);
 
@@ -530,9 +530,8 @@
 	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
 
 	/* Check atomic operations support in PCI configuration space. */
-	pci_read_config_dword(cdev->pdev,
-			      cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
-			      &pci_status_control);
+	pcie_capability_read_dword(cdev->pdev, PCI_EXP_DEVCTL2,
+				   &pci_status_control);
 
 	if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
 		SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
@@ -700,7 +699,7 @@
 		return rc;
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+		rc = qed_iwarp_setup(p_hwfn, params);
 		if (rc)
 			return rc;
 	} else {
@@ -742,7 +741,7 @@
 	       (ll2_ethertype_en & 0xFFFE));
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_stop(p_hwfn, p_ptt);
+		rc = qed_iwarp_stop(p_hwfn);
 		if (rc) {
 			qed_ptt_release(p_hwfn, p_ptt);
 			return rc;
@@ -799,11 +798,10 @@
 	/* Calculate the corresponding DPI address */
 	dpi_start_offset = p_hwfn->dpi_start_offset;
 
-	out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
-				     dpi_start_offset +
-				     ((out_params->dpi) * p_hwfn->dpi_size));
+	out_params->dpi_addr = p_hwfn->doorbells + dpi_start_offset +
+			       out_params->dpi * p_hwfn->dpi_size;
 
-	out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
+	out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
 				    dpi_start_offset +
 				    ((out_params->dpi) * p_hwfn->dpi_size);
 
@@ -818,14 +816,17 @@
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 	struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+	struct qed_mcp_link_state *p_link_output;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
 
-	/* Link may have changed */
-	p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
-			     QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+	/* The link state is saved only for the leading hwfn */
+	p_link_output = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output;
 
-	p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+	p_port->port_state = p_link_output->link_up ? QED_RDMA_PORT_UP
+	    : QED_RDMA_PORT_DOWN;
+
+	p_port->link_speed = p_link_output->speed;
 
 	p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE;
 
@@ -870,7 +871,7 @@
 static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_rdma_info *info)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 
 	memset(info, 0, sizeof(*info));
 
@@ -889,9 +890,9 @@
 	int feat_num;
 
 	if (cdev->num_hwfns > 1)
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE);
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE);
 	else
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) *
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE) *
 			   cdev->num_hwfns;
 
 	return feat_num;
@@ -899,7 +900,7 @@
 
 static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev)
 {
-	int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ);
+	int n_cnq = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_RDMA_CNQ);
 	int n_msix = cdev->int_params.rdma_msix_cnt;
 
 	return min_t(int, n_cnq, n_msix);
@@ -1653,7 +1654,7 @@
 
 static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
 {
-	return QED_LEADING_HWFN(cdev);
+	return QED_AFFIN_HWFN(cdev);
 }
 
 static int qed_rdma_modify_srq(void *rdma_cxt,
@@ -1881,7 +1882,7 @@
 static int qed_rdma_init(struct qed_dev *cdev,
 			 struct qed_rdma_start_in_params *params)
 {
-	return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
+	return qed_rdma_start(QED_AFFIN_HWFN(cdev), params);
 }
 
 static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
@@ -1899,23 +1900,12 @@
 				       u8 *old_mac_address,
 				       u8 *new_mac_address)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt;
 	int rc = 0;
 
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt) {
-		DP_ERR(cdev,
-		       "qed roce ll2 mac filter set: failed to acquire PTT\n");
-		return -EINVAL;
-	}
-
 	if (old_mac_address)
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address);
+		qed_llh_remove_mac_filter(cdev, 0, old_mac_address);
 	if (new_mac_address)
-		rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address);
-
-	qed_ptt_release(p_hwfn, p_ptt);
+		rc = qed_llh_add_mac_filter(cdev, 0, new_mac_address);
 
 	if (rc)
 		DP_ERR(cdev,
@@ -1924,6 +1914,36 @@
 	return rc;
 }
 
+static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset)
+{
+	enum qed_eng eng;
+	u8 ppfid = 0;
+	int rc;
+
+	/* Make sure iwarp cmt mode is enabled before setting affinity */
+	if (!cdev->iwarp_cmt)
+		return -EINVAL;
+
+	if (b_reset)
+		eng = QED_BOTH_ENG;
+	else
+		eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0;
+
+	rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to set the engine affinity of ppfid %d\n",
+			  ppfid);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP),
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
 static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_rdma_dev_info,
@@ -1963,6 +1983,7 @@
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_set_engine_affin = &qed_iwarp_set_engine_affin,
 	.iwarp_connect = &qed_iwarp_connect,
 	.iwarp_create_listen = &qed_iwarp_create_listen,
 	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,

diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 2440970..60f850c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h

@@ -254,6 +254,10 @@
 	0x500840UL
 #define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \
 	0x50196cUL
+#define NIG_REG_LLH_PPFID2PFID_TBL_0 \
+	0x501970UL
+#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL	\
+	0x50
 #define NIG_REG_LLH_CLS_TYPE_DUALMODE \
 	0x501964UL
 #define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL
@@ -518,6 +522,8 @@
 	0x180824UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
 	0x008400UL
+#define MISC_REG_AEU_GENERAL_ATTN_35 \
+	0x00848cUL
 #define  CAU_REG_SB_ADDR_MEMORY \
 	0x1c8000UL
 #define  CAU_REG_SB_VAR_MEMORY \
@@ -1243,6 +1249,56 @@
 	0x1701534UL
 #define TSEM_REG_DBG_FORCE_FRAME \
 	0x1701538UL
+#define DORQ_REG_PF_USAGE_CNT \
+	0x1009c0UL
+#define DORQ_REG_PF_OVFL_STICKY	\
+	0x1009d0UL
+#define DORQ_REG_DPM_FORCE_ABORT \
+	0x1009d8UL
+#define DORQ_REG_INT_STS \
+	0x100180UL
+#define DORQ_REG_INT_STS_ADDRESS_ERROR \
+	(0x1UL << 0)
+#define DORQ_REG_INT_STS_WR \
+	0x100188UL
+#define DORQ_REG_DB_DROP_DETAILS_REL \
+	0x100a28UL
+#define DORQ_REG_INT_STS_ADDRESS_ERROR_SHIFT \
+	0
+#define DORQ_REG_INT_STS_DB_DROP \
+		(0x1UL << 1)
+#define DORQ_REG_INT_STS_DB_DROP_SHIFT \
+	1
+#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR \
+		(0x1UL << 2)
+#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR_SHIFT \
+	2
+#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL\
+		(0x1UL << 3)
+#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL_SHIFT \
+	3
+#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR \
+		(0x1UL << 4)
+#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR_SHIFT \
+	4
+#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR \
+		(0x1UL << 5)
+#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR_SHIFT \
+	5
+#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR \
+		(0x1UL << 6)
+#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR_SHIFT	\
+	6
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR \
+		(0x1UL << 7)
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR_SHIFT	\
+	7
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR \
+		(0x1UL << 8)
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR_SHIFT \
+	8
+#define DORQ_REG_DB_DROP_DETAILS_REASON	\
+	0x100a20UL
 #define MSEM_REG_DBG_SELECT \
 	0x1801528UL
 #define MSEM_REG_DBG_DWORD_ENABLE \
@@ -1574,6 +1630,8 @@
 #define PHY_PCIE_REG_PHY1_K2_E5 \
 	0x624000UL
 #define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL
+#define NIG_REG_PPF_TO_ENGINE_SEL_SIZE 8
 #define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
 #define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
 #define DORQ_REG_PF_DPM_ENABLE 0x100510UL

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 3157c0d..96ab77a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h

@@ -227,7 +227,9 @@
 	u32			comp_count;
 
 	u32			cid;
-	qed_spq_async_comp_cb async_comp_cb[MAX_PROTOCOL_TYPE];
+	u32			db_addr_offset;
+	struct core_db_data	db_data;
+	qed_spq_async_comp_cb	async_comp_cb[MAX_PROTOCOL_TYPE];
 };
 
 /**
@@ -380,6 +382,7 @@
  * @param p_hwfn
  */
 void qed_consq_free(struct qed_hwfn *p_hwfn);
+int qed_spq_pend_post(struct qed_hwfn *p_hwfn);
 
 /**
  * @file

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 888274f..7e0b795 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c

@@ -588,7 +588,7 @@
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -604,6 +604,9 @@
 
 	p_ent->ramrod.pf_update.update_mf_vlan_flag = true;
 	p_ent->ramrod.pf_update.mf_vlan = cpu_to_le16(p_hwfn->hw_info.ovlan);
+	if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+		p_ent->ramrod.pf_update.mf_vlan |=
+			cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13));
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 7106ad1..f5f3c03 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c

@@ -252,9 +252,9 @@
 			   struct qed_spq *p_spq, struct qed_spq_entry *p_ent)
 {
 	struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
+	struct core_db_data *p_db_data = &p_spq->db_data;
 	u16 echo = qed_chain_get_prod_idx(p_chain);
 	struct slow_path_element	*elem;
-	struct core_db_data		db;
 
 	p_ent->elem.hdr.echo	= cpu_to_le16(echo);
 	elem = qed_chain_produce(p_chain);
@@ -266,27 +266,22 @@
 	*elem = p_ent->elem; /* struct assignment */
 
 	/* send a doorbell on the slow hwfn session */
-	memset(&db, 0, sizeof(db));
-	SET_FIELD(db.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
-		  DQ_XCM_CORE_SPQ_PROD_CMD);
-	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
+	p_db_data->spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
 
 	/* make sure the SPQE is updated before the doorbell */
 	wmb();
 
-	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
+	DOORBELL(p_hwfn, p_spq->db_addr_offset, *(u32 *)p_db_data);
 
 	/* make sure doorbell is rang */
 	wmb();
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
-		   qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY),
-		   p_spq->cid, db.params, db.agg_flags,
-		   qed_chain_get_prod_idx(p_chain));
+		   p_spq->db_addr_offset,
+		   p_spq->cid,
+		   p_db_data->params,
+		   p_db_data->agg_flags, qed_chain_get_prod_idx(p_chain));
 
 	return 0;
 }
@@ -346,9 +341,6 @@
 		   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
 
 	REG_WR16(p_hwfn, addr, prod);
-
-	/* keep prod updates ordered */
-	mmiowb();
 }
 
 int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
@@ -402,6 +394,11 @@
 
 	qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain));
 
+	/* Attempt to post pending requests */
+	spin_lock_bh(&p_hwfn->p_spq->lock);
+	rc = qed_spq_pend_post(p_hwfn);
+	spin_unlock_bh(&p_hwfn->p_spq->lock);
+
 	return rc;
 }
 
@@ -490,8 +487,11 @@
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	struct qed_spq_entry *p_virt = NULL;
+	struct core_db_data *p_db_data;
+	void __iomem *db_addr;
 	dma_addr_t p_phys = 0;
 	u32 i, capacity;
+	int rc;
 
 	INIT_LIST_HEAD(&p_spq->pending);
 	INIT_LIST_HEAD(&p_spq->completion_pending);
@@ -528,6 +528,25 @@
 
 	/* reset the chain itself */
 	qed_chain_reset(&p_spq->chain);
+
+	/* Initialize the address/data of the SPQ doorbell */
+	p_spq->db_addr_offset = qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY);
+	p_db_data = &p_spq->db_data;
+	memset(p_db_data, 0, sizeof(*p_db_data));
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_MAX);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_SPQ_PROD_CMD);
+	p_db_data->agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+
+	/* Register the SPQ doorbell with the doorbell recovery mechanism */
+	db_addr = (void __iomem *)((u8 __iomem *)p_hwfn->doorbells +
+				   p_spq->db_addr_offset);
+	rc = qed_db_recovery_add(p_hwfn->cdev, db_addr, &p_spq->db_data,
+				 DB_REC_WIDTH_32B, DB_REC_KERNEL);
+	if (rc)
+		DP_INFO(p_hwfn,
+			"Failed to register the SPQ doorbell with the doorbell recovery mechanism\n");
 }
 
 int qed_spq_alloc(struct qed_hwfn *p_hwfn)
@@ -575,11 +594,17 @@
 void qed_spq_free(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
+	void __iomem *db_addr;
 	u32 capacity;
 
 	if (!p_spq)
 		return;
 
+	/* Delete the SPQ doorbell from the doorbell recovery mechanism */
+	db_addr = (void __iomem *)((u8 __iomem *)p_hwfn->doorbells +
+				   p_spq->db_addr_offset);
+	qed_db_recovery_del(p_hwfn->cdev, db_addr, &p_spq->db_data);
+
 	if (p_spq->p_virt) {
 		capacity = qed_chain_get_capacity(&p_spq->chain);
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
@@ -730,8 +755,7 @@
 	       !list_empty(head)) {
 		struct qed_spq_entry *p_ent =
 			list_first_entry(head, struct qed_spq_entry, list);
-		list_del(&p_ent->list);
-		list_add_tail(&p_ent->list, &p_spq->completion_pending);
+		list_move_tail(&p_ent->list, &p_spq->completion_pending);
 		p_spq->comp_sent_count++;
 
 		rc = qed_spq_hw_post(p_hwfn, p_spq, p_ent);
@@ -745,7 +769,7 @@
 	return 0;
 }
 
-static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
+int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	struct qed_spq_entry *p_ent = NULL;
@@ -768,6 +792,17 @@
 				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
 }
 
+static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent,
+				       u8 *fw_return_code)
+{
+	if (!fw_return_code)
+		return;
+
+	if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE ||
+	    p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP)
+		*fw_return_code = RDMA_RETURN_OK;
+}
+
 /* Avoid overriding of SPQ entries when getting out-of-order completions, by
  * marking the completions in a bitmap and increasing the chain consumer only
  * for the first successive completed entries.
@@ -803,6 +838,17 @@
 		return -EINVAL;
 	}
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n",
+			   p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id);
+
+		/* Let the flow complete w/o any error handling */
+		qed_spq_recov_set_ret_code(p_ent, fw_return_code);
+		return 0;
+	}
+
 	/* Complete the entry */
 	rc = qed_spq_fill_entry(p_hwfn, p_ent);
 
@@ -883,7 +929,6 @@
 	struct qed_spq_entry	*p_ent = NULL;
 	struct qed_spq_entry	*tmp;
 	struct qed_spq_entry	*found = NULL;
-	int			rc;
 
 	if (!p_hwfn)
 		return -EINVAL;
@@ -941,12 +986,7 @@
 		 */
 		qed_spq_return_entry(p_hwfn, found);
 
-	/* Attempt to post pending requests */
-	spin_lock_bh(&p_spq->lock);
-	rc = qed_spq_pend_post(p_hwfn);
-	spin_unlock_bh(&p_spq->lock);
-
-	return rc;
+	return 0;
 }
 
 int qed_consq_alloc(struct qed_hwfn *p_hwfn)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index ca6290f..dcb5c91 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c

@@ -917,10 +917,11 @@
 		/* Configure igu sb in CAU which were marked valid */
 		qed_init_cau_sb_entry(p_hwfn, &sb_entry,
 				      p_hwfn->rel_pf_id, vf->abs_vf_id, 1);
+
 		qed_dmae_host2grc(p_hwfn, p_ptt,
 				  (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  p_block->igu_sb_id * sizeof(u64), 2, 0);
+				  p_block->igu_sb_id * sizeof(u64), 2, NULL);
 	}
 
 	vf->num_sbs = (u8) num_rx_queues;
@@ -1591,7 +1592,7 @@
 			p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
 		} else {
 			DP_INFO(p_hwfn,
-				"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+				"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n",
 				vf->abs_vf_id,
 				req->vfdev_info.eth_fp_hsi_major,
 				req->vfdev_info.eth_fp_hsi_minor,
@@ -1969,7 +1970,9 @@
 	params.vport_id = vf->vport_id;
 	params.max_buffers_per_cqe = start->max_buffers_per_cqe;
 	params.mtu = vf->mtu;
-	params.check_mac = true;
+
+	/* Non trusted VFs should enable control frame filtering */
+	params.check_mac = !vf->p_vf_info.is_trusted_configured;
 
 	rc = qed_sp_eth_vport_start(p_hwfn, &params);
 	if (rc) {
@@ -2002,7 +2005,7 @@
 	    (qed_iov_validate_active_txq(p_hwfn, vf))) {
 		vf->b_malicious = true;
 		DP_NOTICE(p_hwfn,
-			  "VF [%02x] - considered malicious; Unable to stop RX/TX queuess\n",
+			  "VF [%02x] - considered malicious; Unable to stop RX/TX queues\n",
 			  vf->abs_vf_id);
 		status = PFVF_STATUS_MALICIOUS;
 		goto out;
@@ -4447,6 +4450,13 @@
 	if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled)
 		pci_disable_sriov(cdev->pdev);
 
+	if (cdev->recov_in_prog) {
+		DP_VERBOSE(cdev,
+			   QED_MSG_IOV,
+			   "Skip SRIOV disable operations in the device since a recovery is in progress\n");
+		goto out;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[i];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
@@ -4486,7 +4496,7 @@
 
 		qed_ptt_release(hwfn, ptt);
 	}
-
+out:
 	qed_iov_set_vfs_to_disable(cdev, false);
 
 	return 0;
@@ -5130,6 +5140,9 @@
 		params.opaque_fid = vf->opaque_fid;
 		params.vport_id = vf->vport_id;
 
+		params.update_ctl_frame_check = 1;
+		params.mac_chk_en = !vf_info->is_trusted_configured;
+
 		if (vf_info->rx_accept_mode & mask) {
 			flags->update_rx_mode_config = 1;
 			flags->rx_accept_filter = vf_info->rx_accept_mode;
@@ -5147,7 +5160,8 @@
 		}
 
 		if (flags->update_rx_mode_config ||
-		    flags->update_tx_mode_config)
+		    flags->update_tx_mode_config ||
+		    params.update_ctl_frame_check)
 			qed_sp_vport_update(hwfn, &params,
 					    QED_SPQ_MODE_EBLOCK, NULL);
 	}

diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index be118d0..856051f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c

@@ -231,7 +231,7 @@
 {
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_IOV,
-		   "PF unwilling to fullill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]. Try PF recommended amount\n",
+		   "PF unwilling to fulfill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]. Try PF recommended amount\n",
 		   p_req->num_rxqs,
 		   p_resp->num_rxqs,
 		   p_req->num_rxqs,
@@ -261,6 +261,7 @@
 	struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp;
 	struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info;
 	struct vf_pf_resc_request *p_resc;
+	u8 retry_cnt = VF_ACQUIRE_THRESH;
 	bool resources_acquired = false;
 	struct vfpf_acquire_tlv *req;
 	int rc = 0, attempts = 0;
@@ -314,6 +315,15 @@
 
 		/* send acquire request */
 		rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+
+		/* Re-try acquire in case of vf-pf hw channel timeout */
+		if (retry_cnt && rc == -EBUSY) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF retrying to acquire due to VPC timeout\n");
+			retry_cnt--;
+			continue;
+		}
+
 		if (rc)
 			goto exit;
 
@@ -1688,7 +1698,7 @@
 	ops->ports_update(cookie, vxlan_port, geneve_port);
 
 	/* Always update link configuration according to bulletin */
-	qed_link_update(hwfn);
+	qed_link_update(hwfn, NULL);
 }
 
 void qed_iov_vf_task(struct work_struct *work)

diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile
index 75408fb..3fc91d1 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_QEDE) := qede.o
 
 qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o qede_ptp.o

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 6a4d266..c303a92 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h

@@ -56,7 +56,7 @@
 #include <net/tc_act/tc_gact.h>
 
 #define QEDE_MAJOR_VERSION		8
-#define QEDE_MINOR_VERSION		33
+#define QEDE_MINOR_VERSION		37
 #define QEDE_REVISION_VERSION		0
 #define QEDE_ENGINEERING_VERSION	20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "."	\
@@ -92,6 +92,7 @@
 	u64 non_coalesced_pkts;
 	u64 coalesced_bytes;
 	u64 link_change_count;
+	u64 ptp_skip_txts;
 
 	/* port */
 	u64 rx_64_byte_packets;
@@ -162,12 +163,34 @@
 	struct list_head entry;
 	struct list_head rdma_event_list;
 	struct workqueue_struct *rdma_wq;
+	bool exp_recovery;
 };
 
 struct qede_ptp;
 
 #define QEDE_RFS_MAX_FLTR	256
 
+enum qede_flags_bit {
+	QEDE_FLAGS_IS_VF = 0,
+	QEDE_FLAGS_LINK_REQUESTED,
+	QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+	QEDE_FLAGS_TX_TIMESTAMPING_EN
+};
+
+#define QEDE_DUMP_MAX_ARGS 4
+enum qede_dump_cmd {
+	QEDE_DUMP_CMD_NONE = 0,
+	QEDE_DUMP_CMD_NVM_CFG,
+	QEDE_DUMP_CMD_GRCDUMP,
+	QEDE_DUMP_CMD_MAX
+};
+
+struct qede_dump_info {
+	enum qede_dump_cmd cmd;
+	u8 num_args;
+	u32 args[QEDE_DUMP_MAX_ARGS];
+};
+
 struct qede_dev {
 	struct qed_dev			*cdev;
 	struct net_device		*ndev;
@@ -177,13 +200,11 @@
 	u8				dp_level;
 
 	unsigned long flags;
-#define QEDE_FLAG_IS_VF			BIT(0)
-#define IS_VF(edev)	(!!((edev)->flags & QEDE_FLAG_IS_VF))
-#define QEDE_TX_TIMESTAMPING_EN		BIT(1)
-#define QEDE_FLAGS_PTP_TX_IN_PRORGESS	BIT(2)
+#define IS_VF(edev)	(test_bit(QEDE_FLAGS_IS_VF, &(edev)->flags))
 
 	const struct qed_eth_ops	*ops;
 	struct qede_ptp			*ptp;
+	u64				ptp_skip_txts;
 
 	struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev)	((edev)->dev_info.num_queues)
@@ -255,11 +276,13 @@
 	struct qede_rdma_dev		rdma_info;
 
 	struct bpf_prog *xdp_prog;
+	struct qede_dump_info		dump_info;
 };
 
 enum QEDE_STATE {
 	QEDE_STATE_CLOSED,
 	QEDE_STATE_OPEN,
+	QEDE_STATE_RECOVERY,
 };
 
 #define HILO_U64(hi, lo)		((((u64)(hi)) << 32) + (lo))
@@ -377,6 +400,7 @@
 
 	u64 xmit_pkts;
 	u64 stopped_cnt;
+	u64 tx_mem_alloc_err;
 
 	__le16 *hw_cons_ptr;
 
@@ -440,7 +464,7 @@
 	struct qede_tx_queue	*txq;
 	struct qede_tx_queue	*xdp_tx;
 
-#define VEC_NAME_SIZE	(sizeof(((struct net_device *)0)->name) + 8)
+#define VEC_NAME_SIZE  (FIELD_SIZEOF(struct net_device, name) + 8)
 	char	name[VEC_NAME_SIZE];
 };
 
@@ -457,6 +481,7 @@
 #define QEDE_CSUM_UNNECESSARY		BIT(1)
 #define QEDE_TUNN_CSUM_UNNECESSARY	BIT(2)
 
+#define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
 
 #ifdef CONFIG_RFS_ACCEL
@@ -489,6 +514,8 @@
 
 /* Datapath functions definition */
 netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev);
+u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
+		      struct net_device *sb_dev);
 netdev_features_t qede_features_check(struct sk_buff *skb,
 				      struct net_device *dev,
 				      netdev_features_t features);
@@ -539,7 +566,7 @@
 void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count);
 void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
-			    struct tc_cls_flower_offload *f);
+			    struct flow_cls_offload *f);
 
 #define RX_RING_SIZE_POW	13
 #define RX_RING_SIZE		((u16)BIT(RX_RING_SIZE_POW))

diff --git a/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c b/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c
index 6e7747b..e6e844a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_dcbnl.c

@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* QLogic qede NIC Driver
 * Copyright (c) 2015 QLogic Corporation
-*
-* This software is available under the terms of the GNU General Public License
-* (GPL) Version 2, available from the file COPYING in the main directory of
-* this source tree.
 */
 
 #include <linux/types.h>

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 19652cd..8a426af 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c

@@ -48,6 +48,8 @@
 	 {QEDE_RQSTAT_OFFSET(stat_name), QEDE_RQSTAT_STRING(stat_name)}
 
 #define QEDE_SELFTEST_POLL_COUNT 100
+#define QEDE_DUMP_VERSION	0x1
+#define QEDE_DUMP_NVM_ARG_COUNT	2
 
 static const struct {
 	u64 offset;
@@ -73,6 +75,7 @@
 } qede_tqstats_arr[] = {
 	QEDE_TQSTAT(xmit_pkts),
 	QEDE_TQSTAT(stopped_cnt),
+	QEDE_TQSTAT(tx_mem_alloc_err),
 };
 
 #define QEDE_STAT_OFFSET(stat_name, type, base) \
@@ -173,6 +176,7 @@
 	QEDE_STAT(coalesced_bytes),
 
 	QEDE_STAT(link_change_count),
+	QEDE_STAT(ptp_skip_txts),
 };
 
 #define QEDE_NUM_STATS	ARRAY_SIZE(qede_stats_arr)
@@ -185,11 +189,13 @@
 
 enum {
 	QEDE_PRI_FLAG_CMT,
+	QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
 	QEDE_PRI_FLAG_LEN,
 };
 
 static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
 	"Coupled-Function",
+	"SmartAN capable",
 };
 
 enum qede_ethtool_tests {
@@ -403,8 +409,15 @@
 static u32 qede_get_priv_flags(struct net_device *dev)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	u32 flags = 0;
 
-	return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT;
+	if (edev->dev_info.common.num_hwfns > 1)
+		flags |= BIT(QEDE_PRI_FLAG_CMT);
+
+	if (edev->dev_info.common.smart_an)
+		flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
+
+	return flags;
 }
 
 struct qede_link_mode_mapping {
@@ -417,14 +430,39 @@
 	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
 	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
 	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
 	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+	{QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT},
+	{QED_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
+	{QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+	{QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
+	{QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT},
 	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
-	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_10000baseR_FEC_BIT, ETHTOOL_LINK_MODE_10000baseR_FEC_BIT},
+	{QED_LM_20000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT},
+	{QED_LM_40000baseKR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT},
+	{QED_LM_40000baseCR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT},
+	{QED_LM_40000baseSR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT},
 	{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+	{QED_LM_25000baseCR_Full_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT},
+	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_25000baseSR_Full_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT},
+	{QED_LM_50000baseCR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT},
 	{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
 	{QED_LM_100000baseKR4_Full_BIT,
-	 ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+		ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+	{QED_LM_100000baseSR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT},
+	{QED_LM_100000baseCR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT},
+	{QED_LM_100000baseLR4_ER4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT},
+	{QED_LM_50000baseSR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT},
+	{QED_LM_1000baseX_Full_BIT, ETHTOOL_LINK_MODE_1000baseX_Full_BIT},
+	{QED_LM_10000baseCR_Full_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT},
+	{QED_LM_10000baseSR_Full_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT},
+	{QED_LM_10000baseLR_Full_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT},
+	{QED_LM_10000baseLRM_Full_BIT, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT},
 };
 
 #define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)	\
@@ -494,6 +532,7 @@
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 	struct qed_link_params params;
+	u32 sup_caps;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
 		DP_INFO(edev, "Link settings are not allowed to be changed\n");
@@ -520,52 +559,85 @@
 		params.forced_speed = base->speed;
 		switch (base->speed) {
 		case SPEED_1000:
-			if (!(current_link.supported_caps &
-			      QED_LM_1000baseT_Full_BIT)) {
+			sup_caps = QED_LM_1000baseT_Full_BIT |
+					QED_LM_1000baseKX_Full_BIT |
+					QED_LM_1000baseX_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "1G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_1000baseT_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_10000:
-			if (!(current_link.supported_caps &
-			      QED_LM_10000baseKR_Full_BIT)) {
+			sup_caps = QED_LM_10000baseT_Full_BIT |
+					QED_LM_10000baseKR_Full_BIT |
+					QED_LM_10000baseKX4_Full_BIT |
+					QED_LM_10000baseR_FEC_BIT |
+					QED_LM_10000baseCR_Full_BIT |
+					QED_LM_10000baseSR_Full_BIT |
+					QED_LM_10000baseLR_Full_BIT |
+					QED_LM_10000baseLRM_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "10G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
+			break;
+		case SPEED_20000:
+			if (!(current_link.supported_caps &
+			    QED_LM_20000baseKR2_Full_BIT)) {
+				DP_INFO(edev, "20G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_20000baseKR2_Full_BIT;
 			break;
 		case SPEED_25000:
-			if (!(current_link.supported_caps &
-			      QED_LM_25000baseKR_Full_BIT)) {
+			sup_caps = QED_LM_25000baseKR_Full_BIT |
+					QED_LM_25000baseCR_Full_BIT |
+					QED_LM_25000baseSR_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "25G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_40000:
-			if (!(current_link.supported_caps &
-			      QED_LM_40000baseLR4_Full_BIT)) {
+			sup_caps = QED_LM_40000baseLR4_Full_BIT |
+					QED_LM_40000baseKR4_Full_BIT |
+					QED_LM_40000baseCR4_Full_BIT |
+					QED_LM_40000baseSR4_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "40G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_50000:
-			if (!(current_link.supported_caps &
-			      QED_LM_50000baseKR2_Full_BIT)) {
+			sup_caps = QED_LM_50000baseKR2_Full_BIT |
+					QED_LM_50000baseCR2_Full_BIT |
+					QED_LM_50000baseSR2_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "50G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		case SPEED_100000:
-			if (!(current_link.supported_caps &
-			      QED_LM_100000baseKR4_Full_BIT)) {
+			sup_caps = QED_LM_100000baseKR4_Full_BIT |
+					QED_LM_100000baseSR4_Full_BIT |
+					QED_LM_100000baseCR4_Full_BIT |
+					QED_LM_100000baseLR4_ER4_Full_BIT;
+			if (!(current_link.supported_caps & sup_caps)) {
 				DP_INFO(edev, "100G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
+			params.adv_speeds = current_link.supported_caps &
+						sup_caps;
 			break;
 		default:
 			DP_INFO(edev, "Unsupported speed %u\n", base->speed);
@@ -584,9 +656,9 @@
 {
 	char mfw[ETHTOOL_FWVERS_LEN], storm[ETHTOOL_FWVERS_LEN];
 	struct qede_dev *edev = netdev_priv(ndev);
+	char mbi[ETHTOOL_FWVERS_LEN];
 
 	strlcpy(info->driver, "qede", sizeof(info->driver));
-	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
 
 	snprintf(storm, ETHTOOL_FWVERS_LEN, "%d.%d.%d.%d",
 		 edev->dev_info.common.fw_major,
@@ -600,13 +672,27 @@
 		 (edev->dev_info.common.mfw_rev >> 8) & 0xFF,
 		 edev->dev_info.common.mfw_rev & 0xFF);
 
-	if ((strlen(storm) + strlen(mfw) + strlen("mfw storm  ")) <
-	    sizeof(info->fw_version)) {
+	if ((strlen(storm) + strlen(DRV_MODULE_VERSION) + strlen("[storm]  ")) <
+	    sizeof(info->version))
+		snprintf(info->version, sizeof(info->version),
+			 "%s [storm %s]", DRV_MODULE_VERSION, storm);
+	else
+		snprintf(info->version, sizeof(info->version),
+			 "%s %s", DRV_MODULE_VERSION, storm);
+
+	if (edev->dev_info.common.mbi_version) {
+		snprintf(mbi, ETHTOOL_FWVERS_LEN, "%d.%d.%d",
+			 (edev->dev_info.common.mbi_version &
+			  QED_MBI_VERSION_2_MASK) >> QED_MBI_VERSION_2_OFFSET,
+			 (edev->dev_info.common.mbi_version &
+			  QED_MBI_VERSION_1_MASK) >> QED_MBI_VERSION_1_OFFSET,
+			 (edev->dev_info.common.mbi_version &
+			  QED_MBI_VERSION_0_MASK) >> QED_MBI_VERSION_0_OFFSET);
 		snprintf(info->fw_version, sizeof(info->fw_version),
-			 "mfw %s storm %s", mfw, storm);
+			 "mbi %s [mfw %s]", mbi, mfw);
 	} else {
 		snprintf(info->fw_version, sizeof(info->fw_version),
-			 "%s %s", mfw, storm);
+			 "mfw %s", mfw);
 	}
 
 	strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info));
@@ -1458,14 +1544,6 @@
 	barrier();
 	writel(txq->tx_db.raw, txq->doorbell_addr);
 
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the queue lock is released and another start_xmit is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
-
 	for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) {
 		if (qede_txq_has_work(txq))
 			break;
@@ -1595,8 +1673,11 @@
 	/* Wait for loopback configuration to apply */
 	msleep_interruptible(500);
 
-	/* prepare the loopback packet */
-	pkt_size = edev->ndev->mtu + ETH_HLEN;
+	/* Setting max packet size to 1.5K to avoid data being split over
+	 * multiple BDs in cases where MTU > PAGE_SIZE.
+	 */
+	pkt_size = (((edev->ndev->mtu < ETH_DATA_LEN) ?
+		     edev->ndev->mtu : ETH_DATA_LEN) + ETH_HLEN);
 
 	skb = netdev_alloc_skb(edev->ndev, pkt_size);
 	if (!skb) {
@@ -1894,6 +1975,117 @@
 	return rc;
 }
 
+static int qede_set_dump(struct net_device *dev, struct ethtool_dump *val)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	int rc = 0;
+
+	if (edev->dump_info.cmd == QEDE_DUMP_CMD_NONE) {
+		if (val->flag > QEDE_DUMP_CMD_MAX) {
+			DP_ERR(edev, "Invalid command %d\n", val->flag);
+			return -EINVAL;
+		}
+		edev->dump_info.cmd = val->flag;
+		edev->dump_info.num_args = 0;
+		return 0;
+	}
+
+	if (edev->dump_info.num_args == QEDE_DUMP_MAX_ARGS) {
+		DP_ERR(edev, "Arg count = %d\n", edev->dump_info.num_args);
+		return -EINVAL;
+	}
+
+	switch (edev->dump_info.cmd) {
+	case QEDE_DUMP_CMD_NVM_CFG:
+		edev->dump_info.args[edev->dump_info.num_args] = val->flag;
+		edev->dump_info.num_args++;
+		break;
+	case QEDE_DUMP_CMD_GRCDUMP:
+		rc = edev->ops->common->set_grc_config(edev->cdev,
+						       val->flag, 1);
+		break;
+	default:
+		break;
+	}
+
+	return rc;
+}
+
+static int qede_get_dump_flag(struct net_device *dev,
+			      struct ethtool_dump *dump)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	if (!edev->ops || !edev->ops->common) {
+		DP_ERR(edev, "Edev ops not populated\n");
+		return -EINVAL;
+	}
+
+	dump->version = QEDE_DUMP_VERSION;
+	switch (edev->dump_info.cmd) {
+	case QEDE_DUMP_CMD_NVM_CFG:
+		dump->flag = QEDE_DUMP_CMD_NVM_CFG;
+		dump->len = edev->ops->common->read_nvm_cfg_len(edev->cdev,
+						edev->dump_info.args[0]);
+		break;
+	case QEDE_DUMP_CMD_GRCDUMP:
+		dump->flag = QEDE_DUMP_CMD_GRCDUMP;
+		dump->len = edev->ops->common->dbg_all_data_size(edev->cdev);
+		break;
+	default:
+		DP_ERR(edev, "Invalid cmd = %d\n", edev->dump_info.cmd);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(edev, QED_MSG_DEBUG,
+		   "dump->version = 0x%x dump->flag = %d dump->len = %d\n",
+		   dump->version, dump->flag, dump->len);
+	return 0;
+}
+
+static int qede_get_dump_data(struct net_device *dev,
+			      struct ethtool_dump *dump, void *buf)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	int rc = 0;
+
+	if (!edev->ops || !edev->ops->common) {
+		DP_ERR(edev, "Edev ops not populated\n");
+		rc = -EINVAL;
+		goto err;
+	}
+
+	switch (edev->dump_info.cmd) {
+	case QEDE_DUMP_CMD_NVM_CFG:
+		if (edev->dump_info.num_args != QEDE_DUMP_NVM_ARG_COUNT) {
+			DP_ERR(edev, "Arg count = %d required = %d\n",
+			       edev->dump_info.num_args,
+			       QEDE_DUMP_NVM_ARG_COUNT);
+			rc = -EINVAL;
+			goto err;
+		}
+		rc =  edev->ops->common->read_nvm_cfg(edev->cdev, (u8 **)&buf,
+						      edev->dump_info.args[0],
+						      edev->dump_info.args[1]);
+		break;
+	case QEDE_DUMP_CMD_GRCDUMP:
+		memset(buf, 0, dump->len);
+		rc = edev->ops->common->dbg_all_data(edev->cdev, buf);
+		break;
+	default:
+		DP_ERR(edev, "Invalid cmd = %d\n", edev->dump_info.cmd);
+		rc = -EINVAL;
+		break;
+	}
+
+err:
+	edev->dump_info.cmd = QEDE_DUMP_CMD_NONE;
+	edev->dump_info.num_args = 0;
+	memset(edev->dump_info.args, 0, sizeof(edev->dump_info.args));
+
+	return rc;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_link_ksettings = qede_get_link_ksettings,
 	.set_link_ksettings = qede_set_link_ksettings,
@@ -1935,6 +2127,9 @@
 	.get_tunable = qede_get_tunable,
 	.set_tunable = qede_set_tunable,
 	.flash_device = qede_flash_device,
+	.get_dump_flag = qede_get_dump_flag,
+	.get_dump_data = qede_get_dump_data,
+	.set_dump = qede_set_dump,
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {

diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index b16ce7d..9a6a9a0 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c

@@ -1665,198 +1665,6 @@
 	return 0;
 }
 
-static int qede_flow_spec_to_tuple_ipv4_common(struct qede_dev *edev,
-					       struct qede_arfs_tuple *t,
-					       struct ethtool_rx_flow_spec *fs)
-{
-	if ((fs->h_u.tcp_ip4_spec.ip4src &
-	     fs->m_u.tcp_ip4_spec.ip4src) != fs->h_u.tcp_ip4_spec.ip4src) {
-		DP_INFO(edev, "Don't support IP-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if ((fs->h_u.tcp_ip4_spec.ip4dst &
-	     fs->m_u.tcp_ip4_spec.ip4dst) != fs->h_u.tcp_ip4_spec.ip4dst) {
-		DP_INFO(edev, "Don't support IP-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if ((fs->h_u.tcp_ip4_spec.psrc &
-	     fs->m_u.tcp_ip4_spec.psrc) != fs->h_u.tcp_ip4_spec.psrc) {
-		DP_INFO(edev, "Don't support port-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if ((fs->h_u.tcp_ip4_spec.pdst &
-	     fs->m_u.tcp_ip4_spec.pdst) != fs->h_u.tcp_ip4_spec.pdst) {
-		DP_INFO(edev, "Don't support port-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (fs->h_u.tcp_ip4_spec.tos) {
-		DP_INFO(edev, "Don't support tos\n");
-		return -EOPNOTSUPP;
-	}
-
-	t->eth_proto = htons(ETH_P_IP);
-	t->src_ipv4 = fs->h_u.tcp_ip4_spec.ip4src;
-	t->dst_ipv4 = fs->h_u.tcp_ip4_spec.ip4dst;
-	t->src_port = fs->h_u.tcp_ip4_spec.psrc;
-	t->dst_port = fs->h_u.tcp_ip4_spec.pdst;
-
-	return qede_set_v4_tuple_to_profile(edev, t);
-}
-
-static int qede_flow_spec_to_tuple_tcpv4(struct qede_dev *edev,
-					 struct qede_arfs_tuple *t,
-					 struct ethtool_rx_flow_spec *fs)
-{
-	t->ip_proto = IPPROTO_TCP;
-
-	if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int qede_flow_spec_to_tuple_udpv4(struct qede_dev *edev,
-					 struct qede_arfs_tuple *t,
-					 struct ethtool_rx_flow_spec *fs)
-{
-	t->ip_proto = IPPROTO_UDP;
-
-	if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int qede_flow_spec_to_tuple_ipv6_common(struct qede_dev *edev,
-					       struct qede_arfs_tuple *t,
-					       struct ethtool_rx_flow_spec *fs)
-{
-	struct in6_addr zero_addr;
-
-	memset(&zero_addr, 0, sizeof(zero_addr));
-
-	if ((fs->h_u.tcp_ip6_spec.psrc &
-	     fs->m_u.tcp_ip6_spec.psrc) != fs->h_u.tcp_ip6_spec.psrc) {
-		DP_INFO(edev, "Don't support port-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if ((fs->h_u.tcp_ip6_spec.pdst &
-	     fs->m_u.tcp_ip6_spec.pdst) != fs->h_u.tcp_ip6_spec.pdst) {
-		DP_INFO(edev, "Don't support port-masks\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (fs->h_u.tcp_ip6_spec.tclass) {
-		DP_INFO(edev, "Don't support tclass\n");
-		return -EOPNOTSUPP;
-	}
-
-	t->eth_proto = htons(ETH_P_IPV6);
-	memcpy(&t->src_ipv6, &fs->h_u.tcp_ip6_spec.ip6src,
-	       sizeof(struct in6_addr));
-	memcpy(&t->dst_ipv6, &fs->h_u.tcp_ip6_spec.ip6dst,
-	       sizeof(struct in6_addr));
-	t->src_port = fs->h_u.tcp_ip6_spec.psrc;
-	t->dst_port = fs->h_u.tcp_ip6_spec.pdst;
-
-	return qede_set_v6_tuple_to_profile(edev, t, &zero_addr);
-}
-
-static int qede_flow_spec_to_tuple_tcpv6(struct qede_dev *edev,
-					 struct qede_arfs_tuple *t,
-					 struct ethtool_rx_flow_spec *fs)
-{
-	t->ip_proto = IPPROTO_TCP;
-
-	if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int qede_flow_spec_to_tuple_udpv6(struct qede_dev *edev,
-					 struct qede_arfs_tuple *t,
-					 struct ethtool_rx_flow_spec *fs)
-{
-	t->ip_proto = IPPROTO_UDP;
-
-	if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int qede_flow_spec_to_tuple(struct qede_dev *edev,
-				   struct qede_arfs_tuple *t,
-				   struct ethtool_rx_flow_spec *fs)
-{
-	memset(t, 0, sizeof(*t));
-
-	if (qede_flow_spec_validate_unused(edev, fs))
-		return -EOPNOTSUPP;
-
-	switch ((fs->flow_type & ~FLOW_EXT)) {
-	case TCP_V4_FLOW:
-		return qede_flow_spec_to_tuple_tcpv4(edev, t, fs);
-	case UDP_V4_FLOW:
-		return qede_flow_spec_to_tuple_udpv4(edev, t, fs);
-	case TCP_V6_FLOW:
-		return qede_flow_spec_to_tuple_tcpv6(edev, t, fs);
-	case UDP_V6_FLOW:
-		return qede_flow_spec_to_tuple_udpv6(edev, t, fs);
-	default:
-		DP_VERBOSE(edev, NETIF_MSG_IFUP,
-			   "Can't support flow of type %08x\n", fs->flow_type);
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
-static int qede_flow_spec_validate(struct qede_dev *edev,
-				   struct ethtool_rx_flow_spec *fs,
-				   struct qede_arfs_tuple *t)
-{
-	if (fs->location >= QEDE_RFS_MAX_FLTR) {
-		DP_INFO(edev, "Location out-of-bounds\n");
-		return -EINVAL;
-	}
-
-	/* Check location isn't already in use */
-	if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) {
-		DP_INFO(edev, "Location already in use\n");
-		return -EINVAL;
-	}
-
-	/* Check if the filtering-mode could support the filter */
-	if (edev->arfs->filter_count &&
-	    edev->arfs->mode != t->mode) {
-		DP_INFO(edev,
-			"flow_spec would require filtering mode %08x, but %08x is configured\n",
-			t->mode, edev->arfs->filter_count);
-		return -EINVAL;
-	}
-
-	/* If drop requested then no need to validate other data */
-	if (fs->ring_cookie == RX_CLS_FLOW_DISC)
-		return 0;
-
-	if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie))
-		return 0;
-
-	if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) {
-		DP_INFO(edev, "Queue out-of-bounds\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 /* Must be called while qede lock is held */
 static struct qede_arfs_fltr_node *
 qede_flow_find_fltr(struct qede_dev *edev, struct qede_arfs_tuple *t)
@@ -1896,72 +1704,6 @@
 			   "Configuring N-tuple for VF 0x%02x\n", n->vfid - 1);
 }
 
-int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
-{
-	struct ethtool_rx_flow_spec *fsp = &info->fs;
-	struct qede_arfs_fltr_node *n;
-	struct qede_arfs_tuple t;
-	int min_hlen, rc;
-
-	__qede_lock(edev);
-
-	if (!edev->arfs) {
-		rc = -EPERM;
-		goto unlock;
-	}
-
-	/* Translate the flow specification into something fittign our DB */
-	rc = qede_flow_spec_to_tuple(edev, &t, fsp);
-	if (rc)
-		goto unlock;
-
-	/* Make sure location is valid and filter isn't already set */
-	rc = qede_flow_spec_validate(edev, fsp, &t);
-	if (rc)
-		goto unlock;
-
-	if (qede_flow_find_fltr(edev, &t)) {
-		rc = -EINVAL;
-		goto unlock;
-	}
-
-	n = kzalloc(sizeof(*n), GFP_KERNEL);
-	if (!n) {
-		rc = -ENOMEM;
-		goto unlock;
-	}
-
-	min_hlen = qede_flow_get_min_header_size(&t);
-	n->data = kzalloc(min_hlen, GFP_KERNEL);
-	if (!n->data) {
-		kfree(n);
-		rc = -ENOMEM;
-		goto unlock;
-	}
-
-	n->sw_id = fsp->location;
-	set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap);
-	n->buf_len = min_hlen;
-
-	memcpy(&n->tuple, &t, sizeof(n->tuple));
-
-	qede_flow_set_destination(edev, n, fsp);
-
-	/* Build a minimal header according to the flow */
-	n->tuple.build_hdr(&n->tuple, n->data);
-
-	rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0);
-	if (rc)
-		goto unlock;
-
-	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
-	rc = qede_poll_arfs_filter_config(edev, n);
-unlock:
-	__qede_unlock(edev);
-
-	return rc;
-}
-
 int qede_delete_flow_filter(struct qede_dev *edev, u64 cookie)
 {
 	struct qede_arfs_fltr_node *fltr = NULL;
@@ -2004,190 +1746,172 @@
 }
 
 static int qede_parse_actions(struct qede_dev *edev,
-			      struct tcf_exts *exts)
+			      struct flow_action *flow_action)
 {
-	int rc = -EINVAL, num_act = 0, i;
-	const struct tc_action *a;
-	bool is_drop = false;
+	const struct flow_action_entry *act;
+	int i;
 
-	if (!tcf_exts_has_actions(exts)) {
-		DP_NOTICE(edev, "No tc actions received\n");
-		return rc;
+	if (!flow_action_has_entries(flow_action)) {
+		DP_NOTICE(edev, "No actions received\n");
+		return -EINVAL;
 	}
 
-	tcf_exts_for_each_action(i, a, exts) {
-		num_act++;
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
+			break;
+		case FLOW_ACTION_QUEUE:
+			if (act->queue.vf)
+				break;
 
-		if (is_tcf_gact_shot(a))
-			is_drop = true;
-	}
-
-	if (num_act == 1 && is_drop)
-		return 0;
-
-	return rc;
-}
-
-static int
-qede_tc_parse_ports(struct qede_dev *edev,
-		    struct tc_cls_flower_offload *f,
-		    struct qede_arfs_tuple *t)
-{
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key, *mask;
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_PORTS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_PORTS,
-						 f->mask);
-
-		if ((key->src && mask->src != U16_MAX) ||
-		    (key->dst && mask->dst != U16_MAX)) {
-			DP_NOTICE(edev, "Do not support ports masks\n");
+			if (act->queue.index >= QEDE_RSS_COUNT(edev)) {
+				DP_INFO(edev, "Queue out-of-bounds\n");
+				return -EINVAL;
+			}
+			break;
+		default:
 			return -EINVAL;
 		}
-
-		t->src_port = key->src;
-		t->dst_port = key->dst;
 	}
 
 	return 0;
 }
 
 static int
-qede_tc_parse_v6_common(struct qede_dev *edev,
-			struct tc_cls_flower_offload *f,
-			struct qede_arfs_tuple *t)
+qede_flow_parse_ports(struct qede_dev *edev, struct flow_rule *rule,
+		      struct qede_arfs_tuple *t)
+{
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if ((match.key->src && match.mask->src != U16_MAX) ||
+		    (match.key->dst && match.mask->dst != U16_MAX)) {
+			DP_NOTICE(edev, "Do not support ports masks\n");
+			return -EINVAL;
+		}
+
+		t->src_port = match.key->src;
+		t->dst_port = match.key->dst;
+	}
+
+	return 0;
+}
+
+static int
+qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule,
+			  struct qede_arfs_tuple *t)
 {
 	struct in6_addr zero_addr, addr;
 
 	memset(&zero_addr, 0, sizeof(addr));
 	memset(&addr, 0xff, sizeof(addr));
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		struct flow_dissector_key_ipv6_addrs *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						 f->mask);
-
-		if ((memcmp(&key->src, &zero_addr, sizeof(addr)) &&
-		     memcmp(&mask->src, &addr, sizeof(addr))) ||
-		    (memcmp(&key->dst, &zero_addr, sizeof(addr)) &&
-		     memcmp(&mask->dst, &addr, sizeof(addr)))) {
+		flow_rule_match_ipv6_addrs(rule, &match);
+		if ((memcmp(&match.key->src, &zero_addr, sizeof(addr)) &&
+		     memcmp(&match.mask->src, &addr, sizeof(addr))) ||
+		    (memcmp(&match.key->dst, &zero_addr, sizeof(addr)) &&
+		     memcmp(&match.mask->dst, &addr, sizeof(addr)))) {
 			DP_NOTICE(edev,
 				  "Do not support IPv6 address prefix/mask\n");
 			return -EINVAL;
 		}
 
-		memcpy(&t->src_ipv6, &key->src, sizeof(addr));
-		memcpy(&t->dst_ipv6, &key->dst, sizeof(addr));
+		memcpy(&t->src_ipv6, &match.key->src, sizeof(addr));
+		memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr));
 	}
 
-	if (qede_tc_parse_ports(edev, f, t))
+	if (qede_flow_parse_ports(edev, rule, t))
 		return -EINVAL;
 
 	return qede_set_v6_tuple_to_profile(edev, t, &zero_addr);
 }
 
 static int
-qede_tc_parse_v4_common(struct qede_dev *edev,
-			struct tc_cls_flower_offload *f,
+qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule,
 			struct qede_arfs_tuple *t)
 {
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 f->mask);
-
-		if ((key->src && mask->src != U32_MAX) ||
-		    (key->dst && mask->dst != U32_MAX)) {
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if ((match.key->src && match.mask->src != U32_MAX) ||
+		    (match.key->dst && match.mask->dst != U32_MAX)) {
 			DP_NOTICE(edev, "Do not support ipv4 prefix/masks\n");
 			return -EINVAL;
 		}
 
-		t->src_ipv4 = key->src;
-		t->dst_ipv4 = key->dst;
+		t->src_ipv4 = match.key->src;
+		t->dst_ipv4 = match.key->dst;
 	}
 
-	if (qede_tc_parse_ports(edev, f, t))
+	if (qede_flow_parse_ports(edev, rule, t))
 		return -EINVAL;
 
 	return qede_set_v4_tuple_to_profile(edev, t);
 }
 
 static int
-qede_tc_parse_tcp_v6(struct qede_dev *edev,
-		     struct tc_cls_flower_offload *f,
+qede_flow_parse_tcp_v6(struct qede_dev *edev, struct flow_rule *rule,
 		     struct qede_arfs_tuple *tuple)
 {
 	tuple->ip_proto = IPPROTO_TCP;
 	tuple->eth_proto = htons(ETH_P_IPV6);
 
-	return qede_tc_parse_v6_common(edev, f, tuple);
+	return qede_flow_parse_v6_common(edev, rule, tuple);
 }
 
 static int
-qede_tc_parse_tcp_v4(struct qede_dev *edev,
-		     struct tc_cls_flower_offload *f,
+qede_flow_parse_tcp_v4(struct qede_dev *edev, struct flow_rule *rule,
 		     struct qede_arfs_tuple *tuple)
 {
 	tuple->ip_proto = IPPROTO_TCP;
 	tuple->eth_proto = htons(ETH_P_IP);
 
-	return qede_tc_parse_v4_common(edev, f, tuple);
+	return qede_flow_parse_v4_common(edev, rule, tuple);
 }
 
 static int
-qede_tc_parse_udp_v6(struct qede_dev *edev,
-		     struct tc_cls_flower_offload *f,
+qede_flow_parse_udp_v6(struct qede_dev *edev, struct flow_rule *rule,
 		     struct qede_arfs_tuple *tuple)
 {
 	tuple->ip_proto = IPPROTO_UDP;
 	tuple->eth_proto = htons(ETH_P_IPV6);
 
-	return qede_tc_parse_v6_common(edev, f, tuple);
+	return qede_flow_parse_v6_common(edev, rule, tuple);
 }
 
 static int
-qede_tc_parse_udp_v4(struct qede_dev *edev,
-		     struct tc_cls_flower_offload *f,
+qede_flow_parse_udp_v4(struct qede_dev *edev, struct flow_rule *rule,
 		     struct qede_arfs_tuple *tuple)
 {
 	tuple->ip_proto = IPPROTO_UDP;
 	tuple->eth_proto = htons(ETH_P_IP);
 
-	return qede_tc_parse_v4_common(edev, f, tuple);
+	return qede_flow_parse_v4_common(edev, rule, tuple);
 }
 
 static int
-qede_parse_flower_attr(struct qede_dev *edev, __be16 proto,
-		       struct tc_cls_flower_offload *f,
-		       struct qede_arfs_tuple *tuple)
+qede_parse_flow_attr(struct qede_dev *edev, __be16 proto,
+		     struct flow_rule *rule, struct qede_arfs_tuple *tuple)
 {
+	struct flow_dissector *dissector = rule->match.dissector;
 	int rc = -EINVAL;
 	u8 ip_proto = 0;
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS))) {
 		DP_NOTICE(edev, "Unsupported key set:0x%x\n",
-			  f->dissector->used_keys);
+			  dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
@@ -2197,31 +1921,29 @@
 		return -EPROTONOSUPPORT;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_BASIC,
-						f->key);
-		ip_proto = key->ip_proto;
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 	}
 
 	if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP))
-		rc = qede_tc_parse_tcp_v4(edev, f, tuple);
+		rc = qede_flow_parse_tcp_v4(edev, rule, tuple);
 	else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6))
-		rc = qede_tc_parse_tcp_v6(edev, f, tuple);
+		rc = qede_flow_parse_tcp_v6(edev, rule, tuple);
 	else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP))
-		rc = qede_tc_parse_udp_v4(edev, f, tuple);
+		rc = qede_flow_parse_udp_v4(edev, rule, tuple);
 	else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6))
-		rc = qede_tc_parse_udp_v6(edev, f, tuple);
+		rc = qede_flow_parse_udp_v6(edev, rule, tuple);
 	else
-		DP_NOTICE(edev, "Invalid tc protocol request\n");
+		DP_NOTICE(edev, "Invalid protocol request\n");
 
 	return rc;
 }
 
 int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
-			    struct tc_cls_flower_offload *f)
+			    struct flow_cls_offload *f)
 {
 	struct qede_arfs_fltr_node *n;
 	int min_hlen, rc = -EINVAL;
@@ -2235,7 +1957,7 @@
 	}
 
 	/* parse flower attribute and prepare filter */
-	if (qede_parse_flower_attr(edev, proto, f, &t))
+	if (qede_parse_flow_attr(edev, proto, f->rule, &t))
 		goto unlock;
 
 	/* Validate profile mode and number of filters */
@@ -2248,7 +1970,7 @@
 	}
 
 	/* parse tc actions and get the vf_id */
-	if (qede_parse_actions(edev, f->exts))
+	if (qede_parse_actions(edev, &f->rule->action))
 		goto unlock;
 
 	if (qede_flow_find_fltr(edev, &t)) {
@@ -2290,3 +2012,141 @@
 	__qede_unlock(edev);
 	return rc;
 }
+
+static int qede_flow_spec_validate(struct qede_dev *edev,
+				   struct flow_action *flow_action,
+				   struct qede_arfs_tuple *t,
+				   __u32 location)
+{
+	if (location >= QEDE_RFS_MAX_FLTR) {
+		DP_INFO(edev, "Location out-of-bounds\n");
+		return -EINVAL;
+	}
+
+	/* Check location isn't already in use */
+	if (test_bit(location, edev->arfs->arfs_fltr_bmap)) {
+		DP_INFO(edev, "Location already in use\n");
+		return -EINVAL;
+	}
+
+	/* Check if the filtering-mode could support the filter */
+	if (edev->arfs->filter_count &&
+	    edev->arfs->mode != t->mode) {
+		DP_INFO(edev,
+			"flow_spec would require filtering mode %08x, but %08x is configured\n",
+			t->mode, edev->arfs->filter_count);
+		return -EINVAL;
+	}
+
+	if (qede_parse_actions(edev, flow_action))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int qede_flow_spec_to_rule(struct qede_dev *edev,
+				  struct qede_arfs_tuple *t,
+				  struct ethtool_rx_flow_spec *fs)
+{
+	struct ethtool_rx_flow_spec_input input = {};
+	struct ethtool_rx_flow_rule *flow;
+	__be16 proto;
+	int err = 0;
+
+	if (qede_flow_spec_validate_unused(edev, fs))
+		return -EOPNOTSUPP;
+
+	switch ((fs->flow_type & ~FLOW_EXT)) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		proto = htons(ETH_P_IP);
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		proto = htons(ETH_P_IPV6);
+		break;
+	default:
+		DP_VERBOSE(edev, NETIF_MSG_IFUP,
+			   "Can't support flow of type %08x\n", fs->flow_type);
+		return -EOPNOTSUPP;
+	}
+
+	input.fs = fs;
+	flow = ethtool_rx_flow_rule_create(&input);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
+
+	if (qede_parse_flow_attr(edev, proto, flow->rule, t)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	/* Make sure location is valid and filter isn't already set */
+	err = qede_flow_spec_validate(edev, &flow->rule->action, t,
+				      fs->location);
+err_out:
+	ethtool_rx_flow_rule_destroy(flow);
+	return err;
+
+}
+
+int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec *fsp = &info->fs;
+	struct qede_arfs_fltr_node *n;
+	struct qede_arfs_tuple t;
+	int min_hlen, rc;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	/* Translate the flow specification into something fittign our DB */
+	rc = qede_flow_spec_to_rule(edev, &t, fsp);
+	if (rc)
+		goto unlock;
+
+	if (qede_flow_find_fltr(edev, &t)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (!n) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	min_hlen = qede_flow_get_min_header_size(&t);
+	n->data = kzalloc(min_hlen, GFP_KERNEL);
+	if (!n->data) {
+		kfree(n);
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	n->sw_id = fsp->location;
+	set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap);
+	n->buf_len = min_hlen;
+
+	memcpy(&n->tuple, &t, sizeof(n->tuple));
+
+	qede_flow_set_destination(edev, n, fsp);
+
+	/* Build a minimal header according to the flow */
+	n->tuple.build_hdr(&n->tuple, n->data);
+
+	rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0);
+	if (rc)
+		goto unlock;
+
+	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
+	rc = qede_poll_arfs_filter_config(edev, n);
+unlock:
+	__qede_unlock(edev);
+
+	return rc;
+}

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 1a78027..004c0bf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c

@@ -580,14 +580,6 @@
 
 	internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
 			(u32 *)&rx_prods);
-
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the napi lock is released and another qede_poll is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
 }
 
 static void qede_get_rxhash(struct sk_buff *skb, u8 bitfields, __le32 rss_hash)
@@ -787,8 +779,7 @@
 			return NULL;
 
 		skb_reserve(skb, pad);
-		memcpy(skb_put(skb, len),
-		       page_address(bd->data) + offset, len);
+		skb_put_data(skb, page_address(bd->data) + offset, len);
 		qede_reuse_page(rxq, bd);
 		goto out;
 	}
@@ -1466,8 +1457,8 @@
 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
 	if (qede_pkt_req_lin(skb, xmit_type)) {
 		if (skb_linearize(skb)) {
-			DP_NOTICE(edev,
-				  "SKB linearization failed - silently dropping this SKB\n");
+			txq->tx_mem_alloc_err++;
+
 			dev_kfree_skb_any(skb);
 			return NETDEV_TX_OK;
 		}
@@ -1665,12 +1656,12 @@
 	txq->tx_db.data.bd_prod =
 		cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-	if (!skb->xmit_more || netif_xmit_stopped(netdev_txq))
+	if (!netdev_xmit_more() || netif_xmit_stopped(netdev_txq))
 		qede_update_tx_producer(txq);
 
 	if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
 		      < (MAX_SKB_FRAGS + 1))) {
-		if (skb->xmit_more)
+		if (netdev_xmit_more())
 			qede_update_tx_producer(txq);
 
 		netif_tx_stop_queue(netdev_txq);
@@ -1695,6 +1686,18 @@
 	return NETDEV_TX_OK;
 }
 
+u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
+		      struct net_device *sb_dev)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	int total_txq;
+
+	total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc;
+
+	return QEDE_TSS_COUNT(edev) ?
+		netdev_pick_tx(dev, skb, NULL) % total_txq :  0;
+}
+
 /* 8B udp header + 8B base tunnel header + 32B option length */
 #define QEDE_MAX_TUN_HDR_LEN 48
 

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 46d0f2e..a220cc7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c

@@ -133,23 +133,12 @@
 static void qede_remove(struct pci_dev *pdev);
 static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
+static void qede_schedule_recovery_handler(void *dev);
+static void qede_recovery_handler(struct qede_dev *edev);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
 				      struct qed_generic_tlvs *data);
 
-/* The qede lock is used to protect driver state change and driver flows that
- * are not reentrant.
- */
-void __qede_lock(struct qede_dev *edev)
-{
-	mutex_lock(&edev->qede_lock);
-}
-
-void __qede_unlock(struct qede_dev *edev)
-{
-	mutex_unlock(&edev->qede_lock);
-}
-
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -231,6 +220,7 @@
 		.arfs_filter_op = qede_arfs_filter_op,
 #endif
 		.link_update = qede_link_update,
+		.schedule_recovery_handler = qede_schedule_recovery_handler,
 		.get_generic_tlv_data = qede_get_generic_tlv_data,
 		.get_protocol_tlv_data = qede_get_eth_tlv_data,
 	},
@@ -400,6 +390,7 @@
 	p_common->brb_discards = stats.common.brb_discards;
 	p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
 	p_common->link_change_count = stats.common.link_change_count;
+	p_common->ptp_skip_txts = edev->ptp_skip_txts;
 
 	if (QEDE_IS_BB(edev)) {
 		struct qede_stats_bb *p_bb = &edev->stats.bb;
@@ -557,13 +548,13 @@
 }
 
 static int
-qede_set_flower(struct qede_dev *edev, struct tc_cls_flower_offload *f,
+qede_set_flower(struct qede_dev *edev, struct flow_cls_offload *f,
 		__be16 proto)
 {
 	switch (f->command) {
-	case TC_CLSFLOWER_REPLACE:
+	case FLOW_CLS_REPLACE:
 		return qede_add_tc_flower_fltr(edev, proto, f);
-	case TC_CLSFLOWER_DESTROY:
+	case FLOW_CLS_DESTROY:
 		return qede_delete_flow_filter(edev, f->cookie);
 	default:
 		return -EOPNOTSUPP;
@@ -573,7 +564,7 @@
 static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 				  void *cb_priv)
 {
-	struct tc_cls_flower_offload *f;
+	struct flow_cls_offload *f;
 	struct qede_dev *edev = cb_priv;
 
 	if (!tc_cls_can_offload_and_chain0(edev->ndev, type_data))
@@ -588,24 +579,7 @@
 	}
 }
 
-static int qede_setup_tc_block(struct qede_dev *edev,
-			       struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block,
-					     qede_setup_tc_block_cb,
-					     edev, edev, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, qede_setup_tc_block_cb, edev);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(qede_block_cb_list);
 
 static int
 qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
@@ -616,7 +590,10 @@
 
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return qede_setup_tc_block(edev, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &qede_block_cb_list,
+						  qede_setup_tc_block_cb,
+						  edev, edev, true);
 	case TC_SETUP_QDISC_MQPRIO:
 		mqprio = type_data;
 
@@ -631,6 +608,7 @@
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
 	.ndo_start_xmit = qede_start_xmit,
+	.ndo_select_queue = qede_select_queue,
 	.ndo_set_rx_mode = qede_set_rx_mode,
 	.ndo_set_mac_address = qede_set_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
@@ -666,6 +644,7 @@
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
 	.ndo_start_xmit = qede_start_xmit,
+	.ndo_select_queue = qede_select_queue,
 	.ndo_set_rx_mode = qede_set_rx_mode,
 	.ndo_set_mac_address = qede_set_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
@@ -684,6 +663,7 @@
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
 	.ndo_start_xmit = qede_start_xmit,
+	.ndo_select_queue = qede_select_queue,
 	.ndo_set_rx_mode = qede_set_rx_mode,
 	.ndo_set_mac_address = qede_set_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
@@ -950,11 +930,57 @@
 	return -ENOMEM;
 }
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+	mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+	mutex_unlock(&edev->qede_lock);
+}
+
+/* This version of the lock should be used when acquiring the RTNL lock is also
+ * needed in addition to the internal qede lock.
+ */
+static void qede_lock(struct qede_dev *edev)
+{
+	rtnl_lock();
+	__qede_lock(edev);
+}
+
+static void qede_unlock(struct qede_dev *edev)
+{
+	__qede_unlock(edev);
+	rtnl_unlock();
+}
+
 static void qede_sp_task(struct work_struct *work)
 {
 	struct qede_dev *edev = container_of(work, struct qede_dev,
 					     sp_task.work);
 
+	/* The locking scheme depends on the specific flag:
+	 * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
+	 * ensure that ongoing flows are ended and new ones are not started.
+	 * In other cases - only the internal qede lock should be acquired.
+	 */
+
+	if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+		/* SRIOV must be disabled outside the lock to avoid a deadlock.
+		 * The recovery of the active VFs is currently not supported.
+		 */
+		qede_sriov_configure(edev->pdev, 0);
+#endif
+		qede_lock(edev);
+		qede_recovery_handler(edev);
+		qede_unlock(edev);
+	}
+
 	__qede_lock(edev);
 
 	if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@@ -1031,6 +1057,7 @@
 
 enum qede_probe_mode {
 	QEDE_PROBE_NORMAL,
+	QEDE_PROBE_RECOVERY,
 };
 
 static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
@@ -1051,6 +1078,7 @@
 	probe_params.dp_module = dp_module;
 	probe_params.dp_level = dp_level;
 	probe_params.is_vf = is_vf;
+	probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
 	cdev = qed_ops->common->probe(pdev, &probe_params);
 	if (!cdev) {
 		rc = -ENODEV;
@@ -1078,40 +1106,52 @@
 	if (rc)
 		goto err2;
 
-	edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
-				   dp_level);
-	if (!edev) {
-		rc = -ENOMEM;
-		goto err2;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
+					   dp_level);
+		if (!edev) {
+			rc = -ENOMEM;
+			goto err2;
+		}
+	} else {
+		struct net_device *ndev = pci_get_drvdata(pdev);
+
+		edev = netdev_priv(ndev);
+		edev->cdev = cdev;
+		memset(&edev->stats, 0, sizeof(edev->stats));
+		memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
 	}
 
 	if (is_vf)
-		edev->flags |= QEDE_FLAG_IS_VF;
+		set_bit(QEDE_FLAGS_IS_VF, &edev->flags);
 
 	qede_init_ndev(edev);
 
-	rc = qede_rdma_dev_add(edev);
+	rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY));
 	if (rc)
 		goto err3;
 
-	/* Prepare the lock prior to the registration of the netdev,
-	 * as once it's registered we might reach flows requiring it
-	 * [it's even possible to reach a flow needing it directly
-	 * from there, although it's unlikely].
-	 */
-	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
-	mutex_init(&edev->qede_lock);
-	rc = register_netdev(edev->ndev);
-	if (rc) {
-		DP_NOTICE(edev, "Cannot register net-device\n");
-		goto err4;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		/* Prepare the lock prior to the registration of the netdev,
+		 * as once it's registered we might reach flows requiring it
+		 * [it's even possible to reach a flow needing it directly
+		 * from there, although it's unlikely].
+		 */
+		INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+		mutex_init(&edev->qede_lock);
+
+		rc = register_netdev(edev->ndev);
+		if (rc) {
+			DP_NOTICE(edev, "Cannot register net-device\n");
+			goto err4;
+		}
 	}
 
 	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
-		qede_ptp_enable(edev, true);
+		qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -1126,7 +1166,7 @@
 	return 0;
 
 err4:
-	qede_rdma_dev_remove(edev);
+	qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -1162,26 +1202,39 @@
 
 enum qede_remove_mode {
 	QEDE_REMOVE_NORMAL,
+	QEDE_REMOVE_RECOVERY,
 };
 
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 {
 	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct qede_dev *edev = netdev_priv(ndev);
-	struct qed_dev *cdev = edev->cdev;
+	struct qede_dev *edev;
+	struct qed_dev *cdev;
+
+	if (!ndev) {
+		dev_info(&pdev->dev, "Device has already been removed\n");
+		return;
+	}
+
+	edev = netdev_priv(ndev);
+	cdev = edev->cdev;
 
 	DP_INFO(edev, "Starting qede_remove\n");
 
-	qede_rdma_dev_remove(edev);
-	unregister_netdev(ndev);
-	cancel_delayed_work_sync(&edev->sp_task);
+	qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
+
+	if (mode != QEDE_REMOVE_RECOVERY) {
+		unregister_netdev(ndev);
+
+		cancel_delayed_work_sync(&edev->sp_task);
+
+		edev->ops->common->set_power_state(cdev, PCI_D0);
+
+		pci_set_drvdata(pdev, NULL);
+	}
 
 	qede_ptp_disable(edev);
 
-	edev->ops->common->set_power_state(cdev, PCI_D0);
-
-	pci_set_drvdata(pdev, NULL);
-
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
 	if (system_state == SYSTEM_POWER_OFF)
@@ -1194,7 +1247,8 @@
 	 * [e.g., QED register callbacks] won't break anything when
 	 * accessing the netdevice.
 	 */
-	 free_netdev(ndev);
+	if (mode != QEDE_REMOVE_RECOVERY)
+		free_netdev(ndev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
@@ -1247,7 +1301,8 @@
 			     u16 sb_id)
 {
 	if (sb_info->sb_virt) {
-		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id);
+		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id,
+					      QED_SB_TYPE_L2_QUEUE);
 		dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt),
 				  (void *)sb_info->sb_virt, sb_info->sb_phys);
 		memset(sb_info, 0, sizeof(*sb_info));
@@ -1539,6 +1594,58 @@
 	return 0;
 }
 
+static void qede_empty_tx_queue(struct qede_dev *edev,
+				struct qede_tx_queue *txq)
+{
+	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct netdev_queue *netdev_txq;
+	int rc, len = 0;
+
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
+
+	while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+	       qed_chain_get_prod_idx(&txq->tx_pbl)) {
+		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+			   "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+			   txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
+			   qed_chain_get_prod_idx(&txq->tx_pbl));
+
+		rc = qede_free_tx_pkt(edev, txq, &len);
+		if (rc) {
+			DP_NOTICE(edev,
+				  "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+				  txq->index,
+				  qed_chain_get_cons_idx(&txq->tx_pbl),
+				  qed_chain_get_prod_idx(&txq->tx_pbl));
+			break;
+		}
+
+		bytes_compl += len;
+		pkts_compl++;
+		txq->sw_tx_cons++;
+	}
+
+	netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
+}
+
+static void qede_empty_tx_queues(struct qede_dev *edev)
+{
+	int i;
+
+	for_each_queue(i)
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				struct qede_fastpath *fp;
+
+				fp = &edev->fp_array[i];
+				qede_empty_tx_queue(edev,
+						    &fp->txq[cos]);
+			}
+		}
+}
+
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
@@ -1774,6 +1881,10 @@
 static int qede_stop_txq(struct qede_dev *edev,
 			 struct qede_tx_queue *txq, int rss_id)
 {
+	/* delete doorbell from doorbell recovery mechanism */
+	edev->ops->common->db_recovery_del(edev->cdev, txq->doorbell_addr,
+					   &txq->tx_db);
+
 	return edev->ops->q_tx_stop(edev->cdev, rss_id, txq->handle);
 }
 
@@ -1910,6 +2021,11 @@
 		  DQ_XCM_ETH_TX_BD_PROD_CMD);
 	txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
 
+	/* register doorbell with doorbell recovery mechanism */
+	rc = edev->ops->common->db_recovery_add(edev->cdev, txq->doorbell_addr,
+						&txq->tx_db, DB_REC_WIDTH_32B,
+						DB_REC_KERNEL);
+
 	return rc;
 }
 
@@ -2044,6 +2160,7 @@
 
 enum qede_unload_mode {
 	QEDE_UNLOAD_NORMAL,
+	QEDE_UNLOAD_RECOVERY,
 };
 
 static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@@ -2057,7 +2174,10 @@
 	if (!is_locked)
 		__qede_lock(edev);
 
-	edev->state = QEDE_STATE_CLOSED;
+	clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
+
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		edev->state = QEDE_STATE_CLOSED;
 
 	qede_rdma_dev_event_close(edev);
 
@@ -2065,17 +2185,20 @@
 	netif_tx_disable(edev->ndev);
 	netif_carrier_off(edev->ndev);
 
-	/* Reset the link */
-	memset(&link_params, 0, sizeof(link_params));
-	link_params.link_up = false;
-	edev->ops->common->set_link(edev->cdev, &link_params);
-	rc = qede_stop_queues(edev);
-	if (rc) {
-		qede_sync_free_irqs(edev);
-		goto out;
-	}
+	if (mode != QEDE_UNLOAD_RECOVERY) {
+		/* Reset the link */
+		memset(&link_params, 0, sizeof(link_params));
+		link_params.link_up = false;
+		edev->ops->common->set_link(edev->cdev, &link_params);
 
-	DP_INFO(edev, "Stopped Queues\n");
+		rc = qede_stop_queues(edev);
+		if (rc) {
+			qede_sync_free_irqs(edev);
+			goto out;
+		}
+
+		DP_INFO(edev, "Stopped Queues\n");
+	}
 
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
@@ -2091,18 +2214,28 @@
 
 	qede_napi_disable_remove(edev);
 
+	if (mode == QEDE_UNLOAD_RECOVERY)
+		qede_empty_tx_queues(edev);
+
 	qede_free_mem_load(edev);
 	qede_free_fp_array(edev);
 
 out:
 	if (!is_locked)
 		__qede_unlock(edev);
+
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		DP_NOTICE(edev, "Link is down\n");
+
+	edev->ptp_skip_txts = 0;
+
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
 	QEDE_LOAD_RELOAD,
+	QEDE_LOAD_RECOVERY,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@@ -2163,6 +2296,8 @@
 	/* Program un-configured VLANs */
 	qede_configure_vlan_filters(edev);
 
+	set_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
+
 	/* Ask for link-up using current configuration */
 	memset(&link_params, 0, sizeof(link_params));
 	link_params.link_up = true;
@@ -2258,8 +2393,8 @@
 {
 	struct qede_dev *edev = dev;
 
-	if (!netif_running(edev->ndev)) {
-		DP_VERBOSE(edev, NETIF_MSG_LINK, "Interface is not running\n");
+	if (!test_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags)) {
+		DP_VERBOSE(edev, NETIF_MSG_LINK, "Interface is not ready\n");
 		return;
 	}
 
@@ -2280,6 +2415,77 @@
 	}
 }
 
+static void qede_schedule_recovery_handler(void *dev)
+{
+	struct qede_dev *edev = dev;
+
+	if (edev->state == QEDE_STATE_RECOVERY) {
+		DP_NOTICE(edev,
+			  "Avoid scheduling a recovery handling since already in recovery state\n");
+		return;
+	}
+
+	set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+
+	DP_INFO(edev, "Scheduled a recovery handler\n");
+}
+
+static void qede_recovery_failed(struct qede_dev *edev)
+{
+	netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
+
+	netif_device_detach(edev->ndev);
+
+	if (edev->cdev)
+		edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
+}
+
+static void qede_recovery_handler(struct qede_dev *edev)
+{
+	u32 curr_state = edev->state;
+	int rc;
+
+	DP_NOTICE(edev, "Starting a recovery process\n");
+
+	/* No need to acquire first the qede_lock since is done by qede_sp_task
+	 * before calling this function.
+	 */
+	edev->state = QEDE_STATE_RECOVERY;
+
+	edev->ops->common->recovery_prolog(edev->cdev);
+
+	if (curr_state == QEDE_STATE_OPEN)
+		qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
+
+	__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
+
+	rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
+			  IS_VF(edev), QEDE_PROBE_RECOVERY);
+	if (rc) {
+		edev->cdev = NULL;
+		goto err;
+	}
+
+	if (curr_state == QEDE_STATE_OPEN) {
+		rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
+		if (rc)
+			goto err;
+
+		qede_config_rx_mode(edev->ndev);
+		udp_tunnel_get_rx_info(edev->ndev);
+	}
+
+	edev->state = curr_state;
+
+	DP_NOTICE(edev, "Recovery handling is done\n");
+
+	return;
+
+err:
+	qede_recovery_failed(edev);
+}
+
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 013ff56..f815435 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c

@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #include "qede_ptp.h"
+#define QEDE_PTP_TX_TIMEOUT (2 * HZ)
 
 struct qede_ptp {
 	const struct qed_eth_ptp_ops	*ops;
@@ -38,6 +39,7 @@
 	struct timecounter		tc;
 	struct ptp_clock		*clock;
 	struct work_struct		work;
+	unsigned long			ptp_tx_start;
 	struct qede_dev			*edev;
 	struct sk_buff			*tx_skb;
 
@@ -160,18 +162,30 @@
 	struct qede_dev *edev;
 	struct qede_ptp *ptp;
 	u64 timestamp, ns;
+	bool timedout;
 	int rc;
 
 	ptp = container_of(work, struct qede_ptp, work);
 	edev = ptp->edev;
+	timedout = time_is_before_jiffies(ptp->ptp_tx_start +
+					  QEDE_PTP_TX_TIMEOUT);
 
 	/* Read Tx timestamp registers */
 	spin_lock_bh(&ptp->lock);
 	rc = ptp->ops->read_tx_ts(edev->cdev, &timestamp);
 	spin_unlock_bh(&ptp->lock);
 	if (rc) {
-		/* Reschedule to keep checking for a valid timestamp value */
-		schedule_work(&ptp->work);
+		if (unlikely(timedout)) {
+			DP_INFO(edev, "Tx timestamp is not recorded\n");
+			dev_kfree_skb_any(ptp->tx_skb);
+			ptp->tx_skb = NULL;
+			clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+					 &edev->flags);
+			edev->ptp_skip_txts++;
+		} else {
+			/* Reschedule to keep checking for a valid TS value */
+			schedule_work(&ptp->work);
+		}
 		return;
 	}
 
@@ -223,12 +237,12 @@
 
 	switch (ptp->tx_type) {
 	case HWTSTAMP_TX_ON:
-		edev->flags |= QEDE_TX_TIMESTAMPING_EN;
+		set_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags);
 		tx_type = QED_PTP_HWTSTAMP_TX_ON;
 		break;
 
 	case HWTSTAMP_TX_OFF:
-		edev->flags &= ~QEDE_TX_TIMESTAMPING_EN;
+		clear_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags);
 		tx_type = QED_PTP_HWTSTAMP_TX_OFF;
 		break;
 
@@ -490,18 +504,17 @@
 
 	ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
 	if (IS_ERR(ptp->clock)) {
-		rc = -EINVAL;
 		DP_ERR(edev, "PTP clock registration failed\n");
+		qede_ptp_disable(edev);
+		rc = -EINVAL;
 		goto err2;
 	}
 
 	return 0;
 
-err2:
-	qede_ptp_disable(edev);
-	ptp->clock = NULL;
 err1:
 	kfree(ptp);
+err2:
 	edev->ptp = NULL;
 
 	return rc;
@@ -515,19 +528,28 @@
 	if (!ptp)
 		return;
 
-	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags))
+	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+				  &edev->flags)) {
+		DP_ERR(edev, "Timestamping in progress\n");
+		edev->ptp_skip_txts++;
 		return;
+	}
 
-	if (unlikely(!(edev->flags & QEDE_TX_TIMESTAMPING_EN))) {
-		DP_NOTICE(edev,
-			  "Tx timestamping was not enabled, this packet will not be timestamped\n");
+	if (unlikely(!test_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags))) {
+		DP_ERR(edev,
+		       "Tx timestamping was not enabled, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else if (unlikely(ptp->tx_skb)) {
-		DP_NOTICE(edev,
-			  "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		DP_ERR(edev,
+		       "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		/* schedule check for Tx timestamp */
 		ptp->tx_skb = skb_get(skb);
+		ptp->ptp_tx_start = jiffies;
 		schedule_work(&ptp->work);
 	}
 }

diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 1900bf7..ffabc2d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c

@@ -50,6 +50,8 @@
 	if (!qedr_drv)
 		return;
 
+	/* Leftovers from previous error recovery */
+	edev->rdma_info.exp_recovery = false;
 	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
 						 edev->ndev);
 }
@@ -87,21 +89,26 @@
 	destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_rdma_dev_add(struct qede_dev *edev)
+int qede_rdma_dev_add(struct qede_dev *edev, bool recovery)
 {
-	int rc = 0;
+	int rc;
 
-	if (qede_rdma_supported(edev)) {
-		rc = qede_rdma_create_wq(edev);
-		if (rc)
-			return rc;
+	if (!qede_rdma_supported(edev))
+		return 0;
 
-		INIT_LIST_HEAD(&edev->rdma_info.entry);
-		mutex_lock(&qedr_dev_list_lock);
-		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-		_qede_rdma_dev_add(edev);
-		mutex_unlock(&qedr_dev_list_lock);
-	}
+	/* Cannot start qedr while recovering since it wasn't fully stopped */
+	if (recovery)
+		return 0;
+
+	rc = qede_rdma_create_wq(edev);
+	if (rc)
+		return rc;
+
+	INIT_LIST_HEAD(&edev->rdma_info.entry);
+	mutex_lock(&qedr_dev_list_lock);
+	list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+	_qede_rdma_dev_add(edev);
+	mutex_unlock(&qedr_dev_list_lock);
 
 	return rc;
 }
@@ -110,19 +117,30 @@
 {
 	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
 		qedr_drv->remove(edev->rdma_info.qedr_dev);
-	edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_rdma_dev_remove(struct qede_dev *edev)
+void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery)
 {
 	if (!qede_rdma_supported(edev))
 		return;
 
-	qede_rdma_destroy_wq(edev);
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_rdma_dev_remove(edev);
-	list_del(&edev->rdma_info.entry);
-	mutex_unlock(&qedr_dev_list_lock);
+	/* Cannot remove qedr while recovering since it wasn't fully stopped */
+	if (!recovery) {
+		qede_rdma_destroy_wq(edev);
+		mutex_lock(&qedr_dev_list_lock);
+		if (!edev->rdma_info.exp_recovery)
+			_qede_rdma_dev_remove(edev);
+		edev->rdma_info.qedr_dev = NULL;
+		list_del(&edev->rdma_info.entry);
+		mutex_unlock(&qedr_dev_list_lock);
+	} else {
+		if (!edev->rdma_info.exp_recovery) {
+			mutex_lock(&qedr_dev_list_lock);
+			_qede_rdma_dev_remove(edev);
+			mutex_unlock(&qedr_dev_list_lock);
+		}
+		edev->rdma_info.exp_recovery = true;
+	}
 }
 
 static void _qede_rdma_dev_open(struct qede_dev *edev)
@@ -204,7 +222,8 @@
 
 	mutex_lock(&qedr_dev_list_lock);
 	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		if (edev->rdma_info.qedr_dev)
+		/* If device has experienced recovery it was already removed */
+		if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
 			_qede_rdma_dev_remove(edev);
 	}
 	qedr_drv = NULL;
@@ -284,6 +303,10 @@
 {
 	struct qede_rdma_event_work *event_node;
 
+	/* If a recovery was experienced avoid adding the event */
+	if (edev->rdma_info.exp_recovery)
+		return;
+
 	if (!edev->rdma_info.qedr_dev)
 		return;
 

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 10b075b..b4b8ba0 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c

@@ -1858,7 +1858,6 @@
 		wmb();
 		writel_relaxed(qdev->small_buf_q_producer_index,
 			       &port_regs->CommonRegs.rxSmallQProducerIndex);
-		mmiowb();
 	}
 }
 
@@ -2788,6 +2787,7 @@
 				netdev_err(qdev->ndev,
 					   "PCI mapping failed with error: %d\n",
 					   err);
+				dev_kfree_skb_irq(skb);
 				ql_free_large_buffers(qdev);
 				return -ENOMEM;
 			}
@@ -3886,6 +3886,12 @@
 	netif_stop_queue(ndev);
 
 	qdev->workqueue = create_singlethread_workqueue(ndev->name);
+	if (!qdev->workqueue) {
+		unregister_netdev(ndev);
+		err = -ENOMEM;
+		goto err_out_iounmap;
+	}
+
 	INIT_DELAYED_WORK(&qdev->reset_work, ql_reset_work);
 	INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work);
 	INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 0c443ea..374a4d4 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h

@@ -497,7 +497,7 @@
 	u16 board_type;
 	u16 supported_type;
 
-	u16 link_speed;
+	u32 link_speed;
 	u16 link_duplex;
 	u16 link_autoneg;
 	u16 module_type;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index a79d84f..2a53328 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c

@@ -4233,7 +4233,6 @@
 {
 	struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
 
-	pci_cleanup_aer_uncorrect_error_status(pdev);
 	if (test_and_clear_bit(__QLCNIC_AER, &adapter->state))
 		qlcnic_83xx_aer_start_poll_work(adapter);
 }

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index d344e9d..af38d3d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c

@@ -434,14 +434,14 @@
 	*(tx_ring->hw_consumer) = 0;
 
 	rq_size = SIZEOF_HOSTRQ_TX(struct qlcnic_hostrq_tx_ctx);
-	rq_addr = dma_zalloc_coherent(&adapter->pdev->dev, rq_size,
-				      &rq_phys_addr, GFP_KERNEL);
+	rq_addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size,
+				     &rq_phys_addr, GFP_KERNEL);
 	if (!rq_addr)
 		return -ENOMEM;
 
 	rsp_size = SIZEOF_CARDRSP_TX(struct qlcnic_cardrsp_tx_ctx);
-	rsp_addr = dma_zalloc_coherent(&adapter->pdev->dev, rsp_size,
-				       &rsp_phys_addr, GFP_KERNEL);
+	rsp_addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size,
+				      &rsp_phys_addr, GFP_KERNEL);
 	if (!rsp_addr) {
 		err = -ENOMEM;
 		goto out_free_rq;
@@ -855,8 +855,8 @@
 	struct qlcnic_cmd_args cmd;
 	size_t  nic_size = sizeof(struct qlcnic_info_le);
 
-	nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size,
-					    &nic_dma_t, GFP_KERNEL);
+	nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size,
+					   &nic_dma_t, GFP_KERNEL);
 	if (!nic_info_addr)
 		return -ENOMEM;
 
@@ -909,8 +909,8 @@
 	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return err;
 
-	nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size,
-					    &nic_dma_t, GFP_KERNEL);
+	nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size,
+					   &nic_dma_t, GFP_KERNEL);
 	if (!nic_info_addr)
 		return -ENOMEM;
 
@@ -964,8 +964,8 @@
 	void *pci_info_addr;
 	int err = 0, i;
 
-	pci_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, pci_size,
-					    &pci_info_dma_t, GFP_KERNEL);
+	pci_info_addr = dma_alloc_coherent(&adapter->pdev->dev, pci_size,
+					   &pci_info_dma_t, GFP_KERNEL);
 	if (!pci_info_addr)
 		return -ENOMEM;
 
@@ -1078,8 +1078,8 @@
 		return -EIO;
 	}
 
-	stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size,
-					 &stats_dma_t, GFP_KERNEL);
+	stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size,
+					&stats_dma_t, GFP_KERNEL);
 	if (!stats_addr)
 		return -ENOMEM;
 
@@ -1134,8 +1134,8 @@
 	if (mac_stats == NULL)
 		return -ENOMEM;
 
-	stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size,
-					 &stats_dma_t, GFP_KERNEL);
+	stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size,
+					&stats_dma_t, GFP_KERNEL);
 	if (!stats_addr)
 		return -ENOMEM;
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
index 4b76c69..834208e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c

@@ -883,7 +883,7 @@
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	if (!test_bit(QLCNIC_DCB_STATE, &adapter->dcb->state))
-		return 0;
+		return 1;
 
 	switch (capid) {
 	case DCB_CAP_ATTR_PG:

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 3b0adda..a4cd6f2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c

@@ -1048,6 +1048,8 @@
 
 	for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
 		skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
+		if (!skb)
+			break;
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 		adapter->ahw->diag_cnt = 0;

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 9647578..ac61f61 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c

@@ -459,7 +459,7 @@
 			 struct cmd_desc_type0 *first_desc, struct sk_buff *skb,
 			 struct qlcnic_host_tx_ring *tx_ring)
 {
-	u8 l4proto, opcode = 0, hdr_len = 0;
+	u8 l4proto, opcode = 0, hdr_len = 0, tag_vlan = 0;
 	u16 flags = 0, vlan_tci = 0;
 	int copied, offset, copy_len, size;
 	struct cmd_desc_type0 *hwdesc;
@@ -472,14 +472,16 @@
 		flags = QLCNIC_FLAGS_VLAN_TAGGED;
 		vlan_tci = ntohs(vh->h_vlan_TCI);
 		protocol = ntohs(vh->h_vlan_encapsulated_proto);
+		tag_vlan = 1;
 	} else if (skb_vlan_tag_present(skb)) {
 		flags = QLCNIC_FLAGS_VLAN_OOB;
 		vlan_tci = skb_vlan_tag_get(skb);
+		tag_vlan = 1;
 	}
 	if (unlikely(adapter->tx_pvid)) {
-		if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
+		if (tag_vlan && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
 			return -EIO;
-		if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED))
+		if (tag_vlan && (adapter->flags & QLCNIC_TAGGING_ENABLED))
 			goto set_flags;
 
 		flags = QLCNIC_FLAGS_VLAN_OOB;
@@ -579,7 +581,7 @@
 			     struct qlcnic_cmd_buffer *pbuf)
 {
 	struct qlcnic_skb_frag *nf;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	int i, nr_frags;
 	dma_addr_t map;
 

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index dbd4801..c07438d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c

@@ -396,7 +396,8 @@
 
 static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			struct net_device *netdev,
-			const unsigned char *addr, u16 vid, u16 flags)
+			const unsigned char *addr, u16 vid, u16 flags,
+			struct netlink_ext_ack *extack)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = 0;
@@ -2993,10 +2994,8 @@
 static inline void dump_tx_ring_desc(struct qlcnic_host_tx_ring *tx_ring)
 {
 	int i;
-	struct cmd_desc_type0 *tx_desc_info;
 
 	for (i = 0; i < tx_ring->num_desc; i++) {
-		tx_desc_info = &tx_ring->desc_head[i];
 		pr_info("TX Desc: %d\n", i);
 		print_hex_dump(KERN_INFO, "TX: ", DUMP_PREFIX_OFFSET, 16, 1,
 			       &tx_ring->desc_head[i],
@@ -3930,7 +3929,6 @@
 	u32 state;
 	struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
 
-	pci_cleanup_aer_uncorrect_error_status(pdev);
 	state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
 	if (state == QLCNIC_DEV_READY && test_and_clear_bit(__QLCNIC_AER,
 							    &adapter->state))
@@ -4009,19 +4007,12 @@
 			  int queue_type)
 {
 	struct net_device *netdev = adapter->netdev;
-	u8 max_hw_rings = 0;
 	char buf[8];
-	int cur_rings;
 
-	if (queue_type == QLCNIC_RX_QUEUE) {
-		max_hw_rings = adapter->max_sds_rings;
-		cur_rings = adapter->drv_sds_rings;
+	if (queue_type == QLCNIC_RX_QUEUE)
 		strcpy(buf, "SDS");
-	} else if (queue_type == QLCNIC_TX_QUEUE) {
-		max_hw_rings = adapter->max_tx_rings;
-		cur_rings = adapter->drv_tx_rings;
+	else
 		strcpy(buf, "Tx");
-	}
 
 	if (!is_power_of_2(ring_cnt)) {
 		netdev_err(netdev, "%s rings value should be a power of 2\n",
@@ -4128,13 +4119,14 @@
 qlcnic_config_indev_addr(struct qlcnic_adapter *adapter,
 			struct net_device *dev, unsigned long event)
 {
+	const struct in_ifaddr *ifa;
 	struct in_device *indev;
 
 	indev = in_dev_get(dev);
 	if (!indev)
 		return;
 
-	for_ifa(indev) {
+	in_dev_for_each_ifa_rtnl(ifa, indev) {
 		switch (event) {
 		case NETDEV_UP:
 			qlcnic_config_ipaddr(adapter,
@@ -4147,7 +4139,7 @@
 		default:
 			break;
 		}
-	} endfor_ifa(indev);
+	}
 
 	in_dev_put(indev);
 }

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 77e386e..f7c2f32 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c

@@ -904,13 +904,11 @@
 				     u32 *hdr, u32 *pay, u32 size)
 {
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
-	u32 fw_mbx;
 	u8 i, max = 2, hdr_size, j;
 
 	hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32));
 	max = (size / sizeof(u32)) + hdr_size;
 
-	fw_mbx = readl(QLCNIC_MBX_FW(ahw, 0));
 	for (i = 2, j = 0; j < hdr_size; i++, j++)
 		*(hdr++) = readl(QLCNIC_MBX_FW(ahw, i));
 	for (; j < max; i++, j++)
@@ -936,7 +934,7 @@
 static int qlcnic_sriov_issue_bc_post(struct qlcnic_bc_trans *trans, u8 type)
 {
 	struct qlcnic_vf_info *vf = trans->vf;
-	u32 pay_size, hdr_size;
+	u32 pay_size;
 	u32 *hdr, *pay;
 	int ret;
 	u8 pci_func = trans->func_id;
@@ -947,14 +945,12 @@
 	if (type == QLC_BC_COMMAND) {
 		hdr = (u32 *)(trans->req_hdr + trans->curr_req_frag);
 		pay = (u32 *)(trans->req_pay + trans->curr_req_frag);
-		hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32));
 		pay_size = qlcnic_sriov_get_bc_paysize(trans->req_pay_size,
 						       trans->curr_req_frag);
 		pay_size = (pay_size / sizeof(u32));
 	} else {
 		hdr = (u32 *)(trans->rsp_hdr + trans->curr_rsp_frag);
 		pay = (u32 *)(trans->rsp_pay + trans->curr_rsp_frag);
-		hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32));
 		pay_size = qlcnic_sriov_get_bc_paysize(trans->rsp_pay_size,
 						       trans->curr_rsp_frag);
 		pay_size = (pay_size / sizeof(u32));

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 50eaafa..5632da0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c

@@ -1066,10 +1066,7 @@
 {
 	struct qlcnic_vf_info *vf = trans->vf;
 	struct qlcnic_adapter *adapter = vf->adapter;
-	int err = -EIO;
-	u8 op;
-
-	op =  cmd->req.arg[1] & 0xff;
+	int err;
 
 	cmd->req.arg[1] |= vf->vp->handle << 16;
 	cmd->req.arg[1] |= BIT_31;
@@ -1339,14 +1336,13 @@
 {
 	struct qlcnic_vf_info *vf = trans->vf;
 	struct qlcnic_vport *vp = vf->vp;
-	u8 cmd_op, mode = vp->vlan_mode;
+	u8 mode = vp->vlan_mode;
 	struct qlcnic_adapter *adapter;
 	struct qlcnic_sriov *sriov;
 
 	adapter = vf->adapter;
 	sriov = adapter->ahw->sriov;
 
-	cmd_op = trans->req_hdr->cmd_op;
 	cmd->rsp.arg[0] |= 1 << 25;
 
 	/* For 84xx adapter in case of PVID , PFD should send vlan mode as

diff --git a/drivers/net/ethernet/qlogic/qlge/Makefile b/drivers/net/ethernet/qlogic/qlge/Makefile
deleted file mode 100644
index 8a19765..0000000
--- a/drivers/net/ethernet/qlogic/qlge/Makefile
+++ /dev/null

@@ -1,7 +0,0 @@
-#
-# Makefile for the Qlogic 10GbE PCI Express ethernet driver
-#
-
-obj-$(CONFIG_QLGE) += qlge.o
-
-qlge-objs := qlge_main.o qlge_dbg.o qlge_mpi.o qlge_ethtool.o

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
deleted file mode 100644
index 3e71b65..0000000
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ /dev/null

@@ -1,2354 +0,0 @@
-/*
- * QLogic QLA41xx NIC HBA Driver
- * Copyright (c)  2003-2006 QLogic Corporation
- *
- * See LICENSE.qlge for copyright and licensing details.
- */
-#ifndef _QLGE_H_
-#define _QLGE_H_
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/if_vlan.h>
-
-/*
- * General definitions...
- */
-#define DRV_NAME  	"qlge"
-#define DRV_STRING 	"QLogic 10 Gigabit PCI-E Ethernet Driver "
-#define DRV_VERSION	"1.00.00.35"
-
-#define WQ_ADDR_ALIGN	0x3	/* 4 byte alignment */
-
-#define QLGE_VENDOR_ID    0x1077
-#define QLGE_DEVICE_ID_8012	0x8012
-#define QLGE_DEVICE_ID_8000	0x8000
-#define QLGE_MEZZ_SSYS_ID_068	0x0068
-#define QLGE_MEZZ_SSYS_ID_180	0x0180
-#define MAX_CPUS 8
-#define MAX_TX_RINGS MAX_CPUS
-#define MAX_RX_RINGS ((MAX_CPUS * 2) + 1)
-
-#define NUM_TX_RING_ENTRIES	256
-#define NUM_RX_RING_ENTRIES	256
-
-#define NUM_SMALL_BUFFERS   512
-#define NUM_LARGE_BUFFERS   512
-#define DB_PAGE_SIZE 4096
-
-/* Calculate the number of (4k) pages required to
- * contain a buffer queue of the given length.
- */
-#define MAX_DB_PAGES_PER_BQ(x) \
-		(((x * sizeof(u64)) / DB_PAGE_SIZE) + \
-		(((x * sizeof(u64)) % DB_PAGE_SIZE) ? 1 : 0))
-
-#define RX_RING_SHADOW_SPACE	(sizeof(u64) + \
-		MAX_DB_PAGES_PER_BQ(NUM_SMALL_BUFFERS) * sizeof(u64) + \
-		MAX_DB_PAGES_PER_BQ(NUM_LARGE_BUFFERS) * sizeof(u64))
-#define LARGE_BUFFER_MAX_SIZE 8192
-#define LARGE_BUFFER_MIN_SIZE 2048
-
-#define MAX_CQ 128
-#define DFLT_COALESCE_WAIT 100	/* 100 usec wait for coalescing */
-#define MAX_INTER_FRAME_WAIT 10	/* 10 usec max interframe-wait for coalescing */
-#define DFLT_INTER_FRAME_WAIT (MAX_INTER_FRAME_WAIT/2)
-#define UDELAY_COUNT 3
-#define UDELAY_DELAY 100
-
-
-#define TX_DESC_PER_IOCB 8
-
-#if ((MAX_SKB_FRAGS - TX_DESC_PER_IOCB) + 2) > 0
-#define TX_DESC_PER_OAL ((MAX_SKB_FRAGS - TX_DESC_PER_IOCB) + 2)
-#else /* all other page sizes */
-#define TX_DESC_PER_OAL 0
-#endif
-
-/* Word shifting for converting 64-bit
- * address to a series of 16-bit words.
- * This is used for some MPI firmware
- * mailbox commands.
- */
-#define LSW(x)  ((u16)(x))
-#define MSW(x)  ((u16)((u32)(x) >> 16))
-#define LSD(x)  ((u32)((u64)(x)))
-#define MSD(x)  ((u32)((((u64)(x)) >> 32)))
-
-/* MPI test register definitions. This register
- * is used for determining alternate NIC function's
- * PCI->func number.
- */
-enum {
-	MPI_TEST_FUNC_PORT_CFG = 0x1002,
-	MPI_TEST_FUNC_PRB_CTL = 0x100e,
-		MPI_TEST_FUNC_PRB_EN = 0x18a20000,
-	MPI_TEST_FUNC_RST_STS = 0x100a,
-		MPI_TEST_FUNC_RST_FRC = 0x00000003,
-	MPI_TEST_NIC_FUNC_MASK = 0x00000007,
-	MPI_TEST_NIC1_FUNCTION_ENABLE = (1 << 0),
-	MPI_TEST_NIC1_FUNCTION_MASK = 0x0000000e,
-	MPI_TEST_NIC1_FUNC_SHIFT = 1,
-	MPI_TEST_NIC2_FUNCTION_ENABLE = (1 << 4),
-	MPI_TEST_NIC2_FUNCTION_MASK = 0x000000e0,
-	MPI_TEST_NIC2_FUNC_SHIFT = 5,
-	MPI_TEST_FC1_FUNCTION_ENABLE = (1 << 8),
-	MPI_TEST_FC1_FUNCTION_MASK	= 0x00000e00,
-	MPI_TEST_FC1_FUNCTION_SHIFT = 9,
-	MPI_TEST_FC2_FUNCTION_ENABLE = (1 << 12),
-	MPI_TEST_FC2_FUNCTION_MASK = 0x0000e000,
-	MPI_TEST_FC2_FUNCTION_SHIFT = 13,
-
-	MPI_NIC_READ = 0x00000000,
-	MPI_NIC_REG_BLOCK = 0x00020000,
-	MPI_NIC_FUNCTION_SHIFT = 6,
-};
-
-/*
- * Processor Address Register (PROC_ADDR) bit definitions.
- */
-enum {
-
-	/* Misc. stuff */
-	MAILBOX_COUNT = 16,
-	MAILBOX_TIMEOUT = 5,
-
-	PROC_ADDR_RDY = (1 << 31),
-	PROC_ADDR_R = (1 << 30),
-	PROC_ADDR_ERR = (1 << 29),
-	PROC_ADDR_DA = (1 << 28),
-	PROC_ADDR_FUNC0_MBI = 0x00001180,
-	PROC_ADDR_FUNC0_MBO = (PROC_ADDR_FUNC0_MBI + MAILBOX_COUNT),
-	PROC_ADDR_FUNC0_CTL = 0x000011a1,
-	PROC_ADDR_FUNC2_MBI = 0x00001280,
-	PROC_ADDR_FUNC2_MBO = (PROC_ADDR_FUNC2_MBI + MAILBOX_COUNT),
-	PROC_ADDR_FUNC2_CTL = 0x000012a1,
-	PROC_ADDR_MPI_RISC = 0x00000000,
-	PROC_ADDR_MDE = 0x00010000,
-	PROC_ADDR_REGBLOCK = 0x00020000,
-	PROC_ADDR_RISC_REG = 0x00030000,
-};
-
-/*
- * System Register (SYS) bit definitions.
- */
-enum {
-	SYS_EFE = (1 << 0),
-	SYS_FAE = (1 << 1),
-	SYS_MDC = (1 << 2),
-	SYS_DST = (1 << 3),
-	SYS_DWC = (1 << 4),
-	SYS_EVW = (1 << 5),
-	SYS_OMP_DLY_MASK = 0x3f000000,
-	/*
-	 * There are no values defined as of edit #15.
-	 */
-	SYS_ODI = (1 << 14),
-};
-
-/*
- *  Reset/Failover Register (RST_FO) bit definitions.
- */
-enum {
-	RST_FO_TFO = (1 << 0),
-	RST_FO_RR_MASK = 0x00060000,
-	RST_FO_RR_CQ_CAM = 0x00000000,
-	RST_FO_RR_DROP = 0x00000002,
-	RST_FO_RR_DQ = 0x00000004,
-	RST_FO_RR_RCV_FUNC_CQ = 0x00000006,
-	RST_FO_FRB = (1 << 12),
-	RST_FO_MOP = (1 << 13),
-	RST_FO_REG = (1 << 14),
-	RST_FO_FR = (1 << 15),
-};
-
-/*
- * Function Specific Control Register (FSC) bit definitions.
- */
-enum {
-	FSC_DBRST_MASK = 0x00070000,
-	FSC_DBRST_256 = 0x00000000,
-	FSC_DBRST_512 = 0x00000001,
-	FSC_DBRST_768 = 0x00000002,
-	FSC_DBRST_1024 = 0x00000003,
-	FSC_DBL_MASK = 0x00180000,
-	FSC_DBL_DBRST = 0x00000000,
-	FSC_DBL_MAX_PLD = 0x00000008,
-	FSC_DBL_MAX_BRST = 0x00000010,
-	FSC_DBL_128_BYTES = 0x00000018,
-	FSC_EC = (1 << 5),
-	FSC_EPC_MASK = 0x00c00000,
-	FSC_EPC_INBOUND = (1 << 6),
-	FSC_EPC_OUTBOUND = (1 << 7),
-	FSC_VM_PAGESIZE_MASK = 0x07000000,
-	FSC_VM_PAGE_2K = 0x00000100,
-	FSC_VM_PAGE_4K = 0x00000200,
-	FSC_VM_PAGE_8K = 0x00000300,
-	FSC_VM_PAGE_64K = 0x00000600,
-	FSC_SH = (1 << 11),
-	FSC_DSB = (1 << 12),
-	FSC_STE = (1 << 13),
-	FSC_FE = (1 << 15),
-};
-
-/*
- *  Host Command Status Register (CSR) bit definitions.
- */
-enum {
-	CSR_ERR_STS_MASK = 0x0000003f,
-	/*
-	 * There are no valued defined as of edit #15.
-	 */
-	CSR_RR = (1 << 8),
-	CSR_HRI = (1 << 9),
-	CSR_RP = (1 << 10),
-	CSR_CMD_PARM_SHIFT = 22,
-	CSR_CMD_NOP = 0x00000000,
-	CSR_CMD_SET_RST = 0x10000000,
-	CSR_CMD_CLR_RST = 0x20000000,
-	CSR_CMD_SET_PAUSE = 0x30000000,
-	CSR_CMD_CLR_PAUSE = 0x40000000,
-	CSR_CMD_SET_H2R_INT = 0x50000000,
-	CSR_CMD_CLR_H2R_INT = 0x60000000,
-	CSR_CMD_PAR_EN = 0x70000000,
-	CSR_CMD_SET_BAD_PAR = 0x80000000,
-	CSR_CMD_CLR_BAD_PAR = 0x90000000,
-	CSR_CMD_CLR_R2PCI_INT = 0xa0000000,
-};
-
-/*
- *  Configuration Register (CFG) bit definitions.
- */
-enum {
-	CFG_LRQ = (1 << 0),
-	CFG_DRQ = (1 << 1),
-	CFG_LR = (1 << 2),
-	CFG_DR = (1 << 3),
-	CFG_LE = (1 << 5),
-	CFG_LCQ = (1 << 6),
-	CFG_DCQ = (1 << 7),
-	CFG_Q_SHIFT = 8,
-	CFG_Q_MASK = 0x7f000000,
-};
-
-/*
- *  Status Register (STS) bit definitions.
- */
-enum {
-	STS_FE = (1 << 0),
-	STS_PI = (1 << 1),
-	STS_PL0 = (1 << 2),
-	STS_PL1 = (1 << 3),
-	STS_PI0 = (1 << 4),
-	STS_PI1 = (1 << 5),
-	STS_FUNC_ID_MASK = 0x000000c0,
-	STS_FUNC_ID_SHIFT = 6,
-	STS_F0E = (1 << 8),
-	STS_F1E = (1 << 9),
-	STS_F2E = (1 << 10),
-	STS_F3E = (1 << 11),
-	STS_NFE = (1 << 12),
-};
-
-/*
- * Interrupt Enable Register (INTR_EN) bit definitions.
- */
-enum {
-	INTR_EN_INTR_MASK = 0x007f0000,
-	INTR_EN_TYPE_MASK = 0x03000000,
-	INTR_EN_TYPE_ENABLE = 0x00000100,
-	INTR_EN_TYPE_DISABLE = 0x00000200,
-	INTR_EN_TYPE_READ = 0x00000300,
-	INTR_EN_IHD = (1 << 13),
-	INTR_EN_IHD_MASK = (INTR_EN_IHD << 16),
-	INTR_EN_EI = (1 << 14),
-	INTR_EN_EN = (1 << 15),
-};
-
-/*
- * Interrupt Mask Register (INTR_MASK) bit definitions.
- */
-enum {
-	INTR_MASK_PI = (1 << 0),
-	INTR_MASK_HL0 = (1 << 1),
-	INTR_MASK_LH0 = (1 << 2),
-	INTR_MASK_HL1 = (1 << 3),
-	INTR_MASK_LH1 = (1 << 4),
-	INTR_MASK_SE = (1 << 5),
-	INTR_MASK_LSC = (1 << 6),
-	INTR_MASK_MC = (1 << 7),
-	INTR_MASK_LINK_IRQS = INTR_MASK_LSC | INTR_MASK_SE | INTR_MASK_MC,
-};
-
-/*
- *  Register (REV_ID) bit definitions.
- */
-enum {
-	REV_ID_MASK = 0x0000000f,
-	REV_ID_NICROLL_SHIFT = 0,
-	REV_ID_NICREV_SHIFT = 4,
-	REV_ID_XGROLL_SHIFT = 8,
-	REV_ID_XGREV_SHIFT = 12,
-	REV_ID_CHIPREV_SHIFT = 28,
-};
-
-/*
- *  Force ECC Error Register (FRC_ECC_ERR) bit definitions.
- */
-enum {
-	FRC_ECC_ERR_VW = (1 << 12),
-	FRC_ECC_ERR_VB = (1 << 13),
-	FRC_ECC_ERR_NI = (1 << 14),
-	FRC_ECC_ERR_NO = (1 << 15),
-	FRC_ECC_PFE_SHIFT = 16,
-	FRC_ECC_ERR_DO = (1 << 18),
-	FRC_ECC_P14 = (1 << 19),
-};
-
-/*
- *  Error Status Register (ERR_STS) bit definitions.
- */
-enum {
-	ERR_STS_NOF = (1 << 0),
-	ERR_STS_NIF = (1 << 1),
-	ERR_STS_DRP = (1 << 2),
-	ERR_STS_XGP = (1 << 3),
-	ERR_STS_FOU = (1 << 4),
-	ERR_STS_FOC = (1 << 5),
-	ERR_STS_FOF = (1 << 6),
-	ERR_STS_FIU = (1 << 7),
-	ERR_STS_FIC = (1 << 8),
-	ERR_STS_FIF = (1 << 9),
-	ERR_STS_MOF = (1 << 10),
-	ERR_STS_TA = (1 << 11),
-	ERR_STS_MA = (1 << 12),
-	ERR_STS_MPE = (1 << 13),
-	ERR_STS_SCE = (1 << 14),
-	ERR_STS_STE = (1 << 15),
-	ERR_STS_FOW = (1 << 16),
-	ERR_STS_UE = (1 << 17),
-	ERR_STS_MCH = (1 << 26),
-	ERR_STS_LOC_SHIFT = 27,
-};
-
-/*
- *  RAM Debug Address Register (RAM_DBG_ADDR) bit definitions.
- */
-enum {
-	RAM_DBG_ADDR_FW = (1 << 30),
-	RAM_DBG_ADDR_FR = (1 << 31),
-};
-
-/*
- * Semaphore Register (SEM) bit definitions.
- */
-enum {
-	/*
-	 * Example:
-	 * reg = SEM_XGMAC0_MASK | (SEM_SET << SEM_XGMAC0_SHIFT)
-	 */
-	SEM_CLEAR = 0,
-	SEM_SET = 1,
-	SEM_FORCE = 3,
-	SEM_XGMAC0_SHIFT = 0,
-	SEM_XGMAC1_SHIFT = 2,
-	SEM_ICB_SHIFT = 4,
-	SEM_MAC_ADDR_SHIFT = 6,
-	SEM_FLASH_SHIFT = 8,
-	SEM_PROBE_SHIFT = 10,
-	SEM_RT_IDX_SHIFT = 12,
-	SEM_PROC_REG_SHIFT = 14,
-	SEM_XGMAC0_MASK = 0x00030000,
-	SEM_XGMAC1_MASK = 0x000c0000,
-	SEM_ICB_MASK = 0x00300000,
-	SEM_MAC_ADDR_MASK = 0x00c00000,
-	SEM_FLASH_MASK = 0x03000000,
-	SEM_PROBE_MASK = 0x0c000000,
-	SEM_RT_IDX_MASK = 0x30000000,
-	SEM_PROC_REG_MASK = 0xc0000000,
-};
-
-/*
- *  10G MAC Address  Register (XGMAC_ADDR) bit definitions.
- */
-enum {
-	XGMAC_ADDR_RDY = (1 << 31),
-	XGMAC_ADDR_R = (1 << 30),
-	XGMAC_ADDR_XME = (1 << 29),
-
-	/* XGMAC control registers */
-	PAUSE_SRC_LO = 0x00000100,
-	PAUSE_SRC_HI = 0x00000104,
-	GLOBAL_CFG = 0x00000108,
-	GLOBAL_CFG_RESET = (1 << 0),
-	GLOBAL_CFG_JUMBO = (1 << 6),
-	GLOBAL_CFG_TX_STAT_EN = (1 << 10),
-	GLOBAL_CFG_RX_STAT_EN = (1 << 11),
-	TX_CFG = 0x0000010c,
-	TX_CFG_RESET = (1 << 0),
-	TX_CFG_EN = (1 << 1),
-	TX_CFG_PREAM = (1 << 2),
-	RX_CFG = 0x00000110,
-	RX_CFG_RESET = (1 << 0),
-	RX_CFG_EN = (1 << 1),
-	RX_CFG_PREAM = (1 << 2),
-	FLOW_CTL = 0x0000011c,
-	PAUSE_OPCODE = 0x00000120,
-	PAUSE_TIMER = 0x00000124,
-	PAUSE_FRM_DEST_LO = 0x00000128,
-	PAUSE_FRM_DEST_HI = 0x0000012c,
-	MAC_TX_PARAMS = 0x00000134,
-	MAC_TX_PARAMS_JUMBO = (1 << 31),
-	MAC_TX_PARAMS_SIZE_SHIFT = 16,
-	MAC_RX_PARAMS = 0x00000138,
-	MAC_SYS_INT = 0x00000144,
-	MAC_SYS_INT_MASK = 0x00000148,
-	MAC_MGMT_INT = 0x0000014c,
-	MAC_MGMT_IN_MASK = 0x00000150,
-	EXT_ARB_MODE = 0x000001fc,
-
-	/* XGMAC TX statistics  registers */
-	TX_PKTS = 0x00000200,
-	TX_BYTES = 0x00000208,
-	TX_MCAST_PKTS = 0x00000210,
-	TX_BCAST_PKTS = 0x00000218,
-	TX_UCAST_PKTS = 0x00000220,
-	TX_CTL_PKTS = 0x00000228,
-	TX_PAUSE_PKTS = 0x00000230,
-	TX_64_PKT = 0x00000238,
-	TX_65_TO_127_PKT = 0x00000240,
-	TX_128_TO_255_PKT = 0x00000248,
-	TX_256_511_PKT = 0x00000250,
-	TX_512_TO_1023_PKT = 0x00000258,
-	TX_1024_TO_1518_PKT = 0x00000260,
-	TX_1519_TO_MAX_PKT = 0x00000268,
-	TX_UNDERSIZE_PKT = 0x00000270,
-	TX_OVERSIZE_PKT = 0x00000278,
-
-	/* XGMAC statistics control registers */
-	RX_HALF_FULL_DET = 0x000002a0,
-	TX_HALF_FULL_DET = 0x000002a4,
-	RX_OVERFLOW_DET = 0x000002a8,
-	TX_OVERFLOW_DET = 0x000002ac,
-	RX_HALF_FULL_MASK = 0x000002b0,
-	TX_HALF_FULL_MASK = 0x000002b4,
-	RX_OVERFLOW_MASK = 0x000002b8,
-	TX_OVERFLOW_MASK = 0x000002bc,
-	STAT_CNT_CTL = 0x000002c0,
-	STAT_CNT_CTL_CLEAR_TX = (1 << 0),
-	STAT_CNT_CTL_CLEAR_RX = (1 << 1),
-	AUX_RX_HALF_FULL_DET = 0x000002d0,
-	AUX_TX_HALF_FULL_DET = 0x000002d4,
-	AUX_RX_OVERFLOW_DET = 0x000002d8,
-	AUX_TX_OVERFLOW_DET = 0x000002dc,
-	AUX_RX_HALF_FULL_MASK = 0x000002f0,
-	AUX_TX_HALF_FULL_MASK = 0x000002f4,
-	AUX_RX_OVERFLOW_MASK = 0x000002f8,
-	AUX_TX_OVERFLOW_MASK = 0x000002fc,
-
-	/* XGMAC RX statistics  registers */
-	RX_BYTES = 0x00000300,
-	RX_BYTES_OK = 0x00000308,
-	RX_PKTS = 0x00000310,
-	RX_PKTS_OK = 0x00000318,
-	RX_BCAST_PKTS = 0x00000320,
-	RX_MCAST_PKTS = 0x00000328,
-	RX_UCAST_PKTS = 0x00000330,
-	RX_UNDERSIZE_PKTS = 0x00000338,
-	RX_OVERSIZE_PKTS = 0x00000340,
-	RX_JABBER_PKTS = 0x00000348,
-	RX_UNDERSIZE_FCERR_PKTS = 0x00000350,
-	RX_DROP_EVENTS = 0x00000358,
-	RX_FCERR_PKTS = 0x00000360,
-	RX_ALIGN_ERR = 0x00000368,
-	RX_SYMBOL_ERR = 0x00000370,
-	RX_MAC_ERR = 0x00000378,
-	RX_CTL_PKTS = 0x00000380,
-	RX_PAUSE_PKTS = 0x00000388,
-	RX_64_PKTS = 0x00000390,
-	RX_65_TO_127_PKTS = 0x00000398,
-	RX_128_255_PKTS = 0x000003a0,
-	RX_256_511_PKTS = 0x000003a8,
-	RX_512_TO_1023_PKTS = 0x000003b0,
-	RX_1024_TO_1518_PKTS = 0x000003b8,
-	RX_1519_TO_MAX_PKTS = 0x000003c0,
-	RX_LEN_ERR_PKTS = 0x000003c8,
-
-	/* XGMAC MDIO control registers */
-	MDIO_TX_DATA = 0x00000400,
-	MDIO_RX_DATA = 0x00000410,
-	MDIO_CMD = 0x00000420,
-	MDIO_PHY_ADDR = 0x00000430,
-	MDIO_PORT = 0x00000440,
-	MDIO_STATUS = 0x00000450,
-
-	XGMAC_REGISTER_END = 0x00000740,
-};
-
-/*
- *  Enhanced Transmission Schedule Registers (NIC_ETS,CNA_ETS) bit definitions.
- */
-enum {
-	ETS_QUEUE_SHIFT = 29,
-	ETS_REF = (1 << 26),
-	ETS_RS = (1 << 27),
-	ETS_P = (1 << 28),
-	ETS_FC_COS_SHIFT = 23,
-};
-
-/*
- *  Flash Address Register (FLASH_ADDR) bit definitions.
- */
-enum {
-	FLASH_ADDR_RDY = (1 << 31),
-	FLASH_ADDR_R = (1 << 30),
-	FLASH_ADDR_ERR = (1 << 29),
-};
-
-/*
- *  Stop CQ Processing Register (CQ_STOP) bit definitions.
- */
-enum {
-	CQ_STOP_QUEUE_MASK = (0x007f0000),
-	CQ_STOP_TYPE_MASK = (0x03000000),
-	CQ_STOP_TYPE_START = 0x00000100,
-	CQ_STOP_TYPE_STOP = 0x00000200,
-	CQ_STOP_TYPE_READ = 0x00000300,
-	CQ_STOP_EN = (1 << 15),
-};
-
-/*
- *  MAC Protocol Address Index Register (MAC_ADDR_IDX) bit definitions.
- */
-enum {
-	MAC_ADDR_IDX_SHIFT = 4,
-	MAC_ADDR_TYPE_SHIFT = 16,
-	MAC_ADDR_TYPE_COUNT = 10,
-	MAC_ADDR_TYPE_MASK = 0x000f0000,
-	MAC_ADDR_TYPE_CAM_MAC = 0x00000000,
-	MAC_ADDR_TYPE_MULTI_MAC = 0x00010000,
-	MAC_ADDR_TYPE_VLAN = 0x00020000,
-	MAC_ADDR_TYPE_MULTI_FLTR = 0x00030000,
-	MAC_ADDR_TYPE_FC_MAC = 0x00040000,
-	MAC_ADDR_TYPE_MGMT_MAC = 0x00050000,
-	MAC_ADDR_TYPE_MGMT_VLAN = 0x00060000,
-	MAC_ADDR_TYPE_MGMT_V4 = 0x00070000,
-	MAC_ADDR_TYPE_MGMT_V6 = 0x00080000,
-	MAC_ADDR_TYPE_MGMT_TU_DP = 0x00090000,
-	MAC_ADDR_ADR = (1 << 25),
-	MAC_ADDR_RS = (1 << 26),
-	MAC_ADDR_E = (1 << 27),
-	MAC_ADDR_MR = (1 << 30),
-	MAC_ADDR_MW = (1 << 31),
-	MAX_MULTICAST_ENTRIES = 32,
-
-	/* Entry count and words per entry
-	 * for each address type in the filter.
-	 */
-	MAC_ADDR_MAX_CAM_ENTRIES = 512,
-	MAC_ADDR_MAX_CAM_WCOUNT = 3,
-	MAC_ADDR_MAX_MULTICAST_ENTRIES = 32,
-	MAC_ADDR_MAX_MULTICAST_WCOUNT = 2,
-	MAC_ADDR_MAX_VLAN_ENTRIES = 4096,
-	MAC_ADDR_MAX_VLAN_WCOUNT = 1,
-	MAC_ADDR_MAX_MCAST_FLTR_ENTRIES = 4096,
-	MAC_ADDR_MAX_MCAST_FLTR_WCOUNT = 1,
-	MAC_ADDR_MAX_FC_MAC_ENTRIES = 4,
-	MAC_ADDR_MAX_FC_MAC_WCOUNT = 2,
-	MAC_ADDR_MAX_MGMT_MAC_ENTRIES = 8,
-	MAC_ADDR_MAX_MGMT_MAC_WCOUNT = 2,
-	MAC_ADDR_MAX_MGMT_VLAN_ENTRIES = 16,
-	MAC_ADDR_MAX_MGMT_VLAN_WCOUNT = 1,
-	MAC_ADDR_MAX_MGMT_V4_ENTRIES = 4,
-	MAC_ADDR_MAX_MGMT_V4_WCOUNT = 1,
-	MAC_ADDR_MAX_MGMT_V6_ENTRIES = 4,
-	MAC_ADDR_MAX_MGMT_V6_WCOUNT = 4,
-	MAC_ADDR_MAX_MGMT_TU_DP_ENTRIES = 4,
-	MAC_ADDR_MAX_MGMT_TU_DP_WCOUNT = 1,
-};
-
-/*
- *  MAC Protocol Address Index Register (SPLT_HDR) bit definitions.
- */
-enum {
-	SPLT_HDR_EP = (1 << 31),
-};
-
-/*
- *  FCoE Receive Configuration Register (FC_RCV_CFG) bit definitions.
- */
-enum {
-	FC_RCV_CFG_ECT = (1 << 15),
-	FC_RCV_CFG_DFH = (1 << 20),
-	FC_RCV_CFG_DVF = (1 << 21),
-	FC_RCV_CFG_RCE = (1 << 27),
-	FC_RCV_CFG_RFE = (1 << 28),
-	FC_RCV_CFG_TEE = (1 << 29),
-	FC_RCV_CFG_TCE = (1 << 30),
-	FC_RCV_CFG_TFE = (1 << 31),
-};
-
-/*
- *  NIC Receive Configuration Register (NIC_RCV_CFG) bit definitions.
- */
-enum {
-	NIC_RCV_CFG_PPE = (1 << 0),
-	NIC_RCV_CFG_VLAN_MASK = 0x00060000,
-	NIC_RCV_CFG_VLAN_ALL = 0x00000000,
-	NIC_RCV_CFG_VLAN_MATCH_ONLY = 0x00000002,
-	NIC_RCV_CFG_VLAN_MATCH_AND_NON = 0x00000004,
-	NIC_RCV_CFG_VLAN_NONE_AND_NON = 0x00000006,
-	NIC_RCV_CFG_RV = (1 << 3),
-	NIC_RCV_CFG_DFQ_MASK = (0x7f000000),
-	NIC_RCV_CFG_DFQ_SHIFT = 8,
-	NIC_RCV_CFG_DFQ = 0,	/* HARDCODE default queue to 0. */
-};
-
-/*
- *   Mgmt Receive Configuration Register (MGMT_RCV_CFG) bit definitions.
- */
-enum {
-	MGMT_RCV_CFG_ARP = (1 << 0),
-	MGMT_RCV_CFG_DHC = (1 << 1),
-	MGMT_RCV_CFG_DHS = (1 << 2),
-	MGMT_RCV_CFG_NP = (1 << 3),
-	MGMT_RCV_CFG_I6N = (1 << 4),
-	MGMT_RCV_CFG_I6R = (1 << 5),
-	MGMT_RCV_CFG_DH6 = (1 << 6),
-	MGMT_RCV_CFG_UD1 = (1 << 7),
-	MGMT_RCV_CFG_UD0 = (1 << 8),
-	MGMT_RCV_CFG_BCT = (1 << 9),
-	MGMT_RCV_CFG_MCT = (1 << 10),
-	MGMT_RCV_CFG_DM = (1 << 11),
-	MGMT_RCV_CFG_RM = (1 << 12),
-	MGMT_RCV_CFG_STL = (1 << 13),
-	MGMT_RCV_CFG_VLAN_MASK = 0xc0000000,
-	MGMT_RCV_CFG_VLAN_ALL = 0x00000000,
-	MGMT_RCV_CFG_VLAN_MATCH_ONLY = 0x00004000,
-	MGMT_RCV_CFG_VLAN_MATCH_AND_NON = 0x00008000,
-	MGMT_RCV_CFG_VLAN_NONE_AND_NON = 0x0000c000,
-};
-
-/*
- *  Routing Index Register (RT_IDX) bit definitions.
- */
-enum {
-	RT_IDX_IDX_SHIFT = 8,
-	RT_IDX_TYPE_MASK = 0x000f0000,
-	RT_IDX_TYPE_SHIFT = 16,
-	RT_IDX_TYPE_RT = 0x00000000,
-	RT_IDX_TYPE_RT_INV = 0x00010000,
-	RT_IDX_TYPE_NICQ = 0x00020000,
-	RT_IDX_TYPE_NICQ_INV = 0x00030000,
-	RT_IDX_DST_MASK = 0x00700000,
-	RT_IDX_DST_RSS = 0x00000000,
-	RT_IDX_DST_CAM_Q = 0x00100000,
-	RT_IDX_DST_COS_Q = 0x00200000,
-	RT_IDX_DST_DFLT_Q = 0x00300000,
-	RT_IDX_DST_DEST_Q = 0x00400000,
-	RT_IDX_RS = (1 << 26),
-	RT_IDX_E = (1 << 27),
-	RT_IDX_MR = (1 << 30),
-	RT_IDX_MW = (1 << 31),
-
-	/* Nic Queue format - type 2 bits */
-	RT_IDX_BCAST = (1 << 0),
-	RT_IDX_MCAST = (1 << 1),
-	RT_IDX_MCAST_MATCH = (1 << 2),
-	RT_IDX_MCAST_REG_MATCH = (1 << 3),
-	RT_IDX_MCAST_HASH_MATCH = (1 << 4),
-	RT_IDX_FC_MACH = (1 << 5),
-	RT_IDX_ETH_FCOE = (1 << 6),
-	RT_IDX_CAM_HIT = (1 << 7),
-	RT_IDX_CAM_BIT0 = (1 << 8),
-	RT_IDX_CAM_BIT1 = (1 << 9),
-	RT_IDX_VLAN_TAG = (1 << 10),
-	RT_IDX_VLAN_MATCH = (1 << 11),
-	RT_IDX_VLAN_FILTER = (1 << 12),
-	RT_IDX_ETH_SKIP1 = (1 << 13),
-	RT_IDX_ETH_SKIP2 = (1 << 14),
-	RT_IDX_BCAST_MCAST_MATCH = (1 << 15),
-	RT_IDX_802_3 = (1 << 16),
-	RT_IDX_LLDP = (1 << 17),
-	RT_IDX_UNUSED018 = (1 << 18),
-	RT_IDX_UNUSED019 = (1 << 19),
-	RT_IDX_UNUSED20 = (1 << 20),
-	RT_IDX_UNUSED21 = (1 << 21),
-	RT_IDX_ERR = (1 << 22),
-	RT_IDX_VALID = (1 << 23),
-	RT_IDX_TU_CSUM_ERR = (1 << 24),
-	RT_IDX_IP_CSUM_ERR = (1 << 25),
-	RT_IDX_MAC_ERR = (1 << 26),
-	RT_IDX_RSS_TCP6 = (1 << 27),
-	RT_IDX_RSS_TCP4 = (1 << 28),
-	RT_IDX_RSS_IPV6 = (1 << 29),
-	RT_IDX_RSS_IPV4 = (1 << 30),
-	RT_IDX_RSS_MATCH = (1 << 31),
-
-	/* Hierarchy for the NIC Queue Mask */
-	RT_IDX_ALL_ERR_SLOT = 0,
-	RT_IDX_MAC_ERR_SLOT = 0,
-	RT_IDX_IP_CSUM_ERR_SLOT = 1,
-	RT_IDX_TCP_UDP_CSUM_ERR_SLOT = 2,
-	RT_IDX_BCAST_SLOT = 3,
-	RT_IDX_MCAST_MATCH_SLOT = 4,
-	RT_IDX_ALLMULTI_SLOT = 5,
-	RT_IDX_UNUSED6_SLOT = 6,
-	RT_IDX_UNUSED7_SLOT = 7,
-	RT_IDX_RSS_MATCH_SLOT = 8,
-	RT_IDX_RSS_IPV4_SLOT = 8,
-	RT_IDX_RSS_IPV6_SLOT = 9,
-	RT_IDX_RSS_TCP4_SLOT = 10,
-	RT_IDX_RSS_TCP6_SLOT = 11,
-	RT_IDX_CAM_HIT_SLOT = 12,
-	RT_IDX_UNUSED013 = 13,
-	RT_IDX_UNUSED014 = 14,
-	RT_IDX_PROMISCUOUS_SLOT = 15,
-	RT_IDX_MAX_RT_SLOTS = 8,
-	RT_IDX_MAX_NIC_SLOTS = 16,
-};
-
-/*
- * Serdes Address Register (XG_SERDES_ADDR) bit definitions.
- */
-enum {
-	XG_SERDES_ADDR_RDY = (1 << 31),
-	XG_SERDES_ADDR_R = (1 << 30),
-
-	XG_SERDES_ADDR_STS = 0x00001E06,
-	XG_SERDES_ADDR_XFI1_PWR_UP = 0x00000005,
-	XG_SERDES_ADDR_XFI2_PWR_UP = 0x0000000a,
-	XG_SERDES_ADDR_XAUI_PWR_DOWN = 0x00000001,
-
-	/* Serdes coredump definitions. */
-	XG_SERDES_XAUI_AN_START = 0x00000000,
-	XG_SERDES_XAUI_AN_END = 0x00000034,
-	XG_SERDES_XAUI_HSS_PCS_START = 0x00000800,
-	XG_SERDES_XAUI_HSS_PCS_END = 0x0000880,
-	XG_SERDES_XFI_AN_START = 0x00001000,
-	XG_SERDES_XFI_AN_END = 0x00001034,
-	XG_SERDES_XFI_TRAIN_START = 0x10001050,
-	XG_SERDES_XFI_TRAIN_END = 0x1000107C,
-	XG_SERDES_XFI_HSS_PCS_START = 0x00001800,
-	XG_SERDES_XFI_HSS_PCS_END = 0x00001838,
-	XG_SERDES_XFI_HSS_TX_START = 0x00001c00,
-	XG_SERDES_XFI_HSS_TX_END = 0x00001c1f,
-	XG_SERDES_XFI_HSS_RX_START = 0x00001c40,
-	XG_SERDES_XFI_HSS_RX_END = 0x00001c5f,
-	XG_SERDES_XFI_HSS_PLL_START = 0x00001e00,
-	XG_SERDES_XFI_HSS_PLL_END = 0x00001e1f,
-};
-
-/*
- *  NIC Probe Mux Address Register (PRB_MX_ADDR) bit definitions.
- */
-enum {
-	PRB_MX_ADDR_ARE = (1 << 16),
-	PRB_MX_ADDR_UP = (1 << 15),
-	PRB_MX_ADDR_SWP = (1 << 14),
-
-	/* Module select values. */
-	PRB_MX_ADDR_MAX_MODS = 21,
-	PRB_MX_ADDR_MOD_SEL_SHIFT = 9,
-	PRB_MX_ADDR_MOD_SEL_TBD = 0,
-	PRB_MX_ADDR_MOD_SEL_IDE1 = 1,
-	PRB_MX_ADDR_MOD_SEL_IDE2 = 2,
-	PRB_MX_ADDR_MOD_SEL_FRB = 3,
-	PRB_MX_ADDR_MOD_SEL_ODE1 = 4,
-	PRB_MX_ADDR_MOD_SEL_ODE2 = 5,
-	PRB_MX_ADDR_MOD_SEL_DA1 = 6,
-	PRB_MX_ADDR_MOD_SEL_DA2 = 7,
-	PRB_MX_ADDR_MOD_SEL_IMP1 = 8,
-	PRB_MX_ADDR_MOD_SEL_IMP2 = 9,
-	PRB_MX_ADDR_MOD_SEL_OMP1 = 10,
-	PRB_MX_ADDR_MOD_SEL_OMP2 = 11,
-	PRB_MX_ADDR_MOD_SEL_ORS1 = 12,
-	PRB_MX_ADDR_MOD_SEL_ORS2 = 13,
-	PRB_MX_ADDR_MOD_SEL_REG = 14,
-	PRB_MX_ADDR_MOD_SEL_MAC1 = 16,
-	PRB_MX_ADDR_MOD_SEL_MAC2 = 17,
-	PRB_MX_ADDR_MOD_SEL_VQM1 = 18,
-	PRB_MX_ADDR_MOD_SEL_VQM2 = 19,
-	PRB_MX_ADDR_MOD_SEL_MOP = 20,
-	/* Bit fields indicating which modules
-	 * are valid for each clock domain.
-	 */
-	PRB_MX_ADDR_VALID_SYS_MOD = 0x000f7ff7,
-	PRB_MX_ADDR_VALID_PCI_MOD = 0x000040c1,
-	PRB_MX_ADDR_VALID_XGM_MOD = 0x00037309,
-	PRB_MX_ADDR_VALID_FC_MOD = 0x00003001,
-	PRB_MX_ADDR_VALID_TOTAL = 34,
-
-	/* Clock domain values. */
-	PRB_MX_ADDR_CLOCK_SHIFT = 6,
-	PRB_MX_ADDR_SYS_CLOCK = 0,
-	PRB_MX_ADDR_PCI_CLOCK = 2,
-	PRB_MX_ADDR_FC_CLOCK = 5,
-	PRB_MX_ADDR_XGM_CLOCK = 6,
-
-	PRB_MX_ADDR_MAX_MUX = 64,
-};
-
-/*
- * Control Register Set Map
- */
-enum {
-	PROC_ADDR = 0,		/* Use semaphore */
-	PROC_DATA = 0x04,	/* Use semaphore */
-	SYS = 0x08,
-	RST_FO = 0x0c,
-	FSC = 0x10,
-	CSR = 0x14,
-	LED = 0x18,
-	ICB_RID = 0x1c,		/* Use semaphore */
-	ICB_L = 0x20,		/* Use semaphore */
-	ICB_H = 0x24,		/* Use semaphore */
-	CFG = 0x28,
-	BIOS_ADDR = 0x2c,
-	STS = 0x30,
-	INTR_EN = 0x34,
-	INTR_MASK = 0x38,
-	ISR1 = 0x3c,
-	ISR2 = 0x40,
-	ISR3 = 0x44,
-	ISR4 = 0x48,
-	REV_ID = 0x4c,
-	FRC_ECC_ERR = 0x50,
-	ERR_STS = 0x54,
-	RAM_DBG_ADDR = 0x58,
-	RAM_DBG_DATA = 0x5c,
-	ECC_ERR_CNT = 0x60,
-	SEM = 0x64,
-	GPIO_1 = 0x68,		/* Use semaphore */
-	GPIO_2 = 0x6c,		/* Use semaphore */
-	GPIO_3 = 0x70,		/* Use semaphore */
-	RSVD2 = 0x74,
-	XGMAC_ADDR = 0x78,	/* Use semaphore */
-	XGMAC_DATA = 0x7c,	/* Use semaphore */
-	NIC_ETS = 0x80,
-	CNA_ETS = 0x84,
-	FLASH_ADDR = 0x88,	/* Use semaphore */
-	FLASH_DATA = 0x8c,	/* Use semaphore */
-	CQ_STOP = 0x90,
-	PAGE_TBL_RID = 0x94,
-	WQ_PAGE_TBL_LO = 0x98,
-	WQ_PAGE_TBL_HI = 0x9c,
-	CQ_PAGE_TBL_LO = 0xa0,
-	CQ_PAGE_TBL_HI = 0xa4,
-	MAC_ADDR_IDX = 0xa8,	/* Use semaphore */
-	MAC_ADDR_DATA = 0xac,	/* Use semaphore */
-	COS_DFLT_CQ1 = 0xb0,
-	COS_DFLT_CQ2 = 0xb4,
-	ETYPE_SKIP1 = 0xb8,
-	ETYPE_SKIP2 = 0xbc,
-	SPLT_HDR = 0xc0,
-	FC_PAUSE_THRES = 0xc4,
-	NIC_PAUSE_THRES = 0xc8,
-	FC_ETHERTYPE = 0xcc,
-	FC_RCV_CFG = 0xd0,
-	NIC_RCV_CFG = 0xd4,
-	FC_COS_TAGS = 0xd8,
-	NIC_COS_TAGS = 0xdc,
-	MGMT_RCV_CFG = 0xe0,
-	RT_IDX = 0xe4,
-	RT_DATA = 0xe8,
-	RSVD7 = 0xec,
-	XG_SERDES_ADDR = 0xf0,
-	XG_SERDES_DATA = 0xf4,
-	PRB_MX_ADDR = 0xf8,	/* Use semaphore */
-	PRB_MX_DATA = 0xfc,	/* Use semaphore */
-};
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#define SMALL_BUFFER_SIZE 256
-#define SMALL_BUF_MAP_SIZE SMALL_BUFFER_SIZE
-#define SPLT_SETTING  FSC_DBRST_1024
-#define SPLT_LEN 0
-#define QLGE_SB_PAD 0
-#else
-#define SMALL_BUFFER_SIZE 512
-#define SMALL_BUF_MAP_SIZE (SMALL_BUFFER_SIZE / 2)
-#define SPLT_SETTING  FSC_SH
-#define SPLT_LEN (SPLT_HDR_EP | \
-	min(SMALL_BUF_MAP_SIZE, 1023))
-#define QLGE_SB_PAD 32
-#endif
-
-/*
- * CAM output format.
- */
-enum {
-	CAM_OUT_ROUTE_FC = 0,
-	CAM_OUT_ROUTE_NIC = 1,
-	CAM_OUT_FUNC_SHIFT = 2,
-	CAM_OUT_RV = (1 << 4),
-	CAM_OUT_SH = (1 << 15),
-	CAM_OUT_CQ_ID_SHIFT = 5,
-};
-
-/*
- * Mailbox  definitions
- */
-enum {
-	/* Asynchronous Event Notifications */
-	AEN_SYS_ERR = 0x00008002,
-	AEN_LINK_UP = 0x00008011,
-	AEN_LINK_DOWN = 0x00008012,
-	AEN_IDC_CMPLT = 0x00008100,
-	AEN_IDC_REQ = 0x00008101,
-	AEN_IDC_EXT = 0x00008102,
-	AEN_DCBX_CHG = 0x00008110,
-	AEN_AEN_LOST = 0x00008120,
-	AEN_AEN_SFP_IN = 0x00008130,
-	AEN_AEN_SFP_OUT = 0x00008131,
-	AEN_FW_INIT_DONE = 0x00008400,
-	AEN_FW_INIT_FAIL = 0x00008401,
-
-	/* Mailbox Command Opcodes. */
-	MB_CMD_NOP = 0x00000000,
-	MB_CMD_EX_FW = 0x00000002,
-	MB_CMD_MB_TEST = 0x00000006,
-	MB_CMD_CSUM_TEST = 0x00000007,	/* Verify Checksum */
-	MB_CMD_ABOUT_FW = 0x00000008,
-	MB_CMD_COPY_RISC_RAM = 0x0000000a,
-	MB_CMD_LOAD_RISC_RAM = 0x0000000b,
-	MB_CMD_DUMP_RISC_RAM = 0x0000000c,
-	MB_CMD_WRITE_RAM = 0x0000000d,
-	MB_CMD_INIT_RISC_RAM = 0x0000000e,
-	MB_CMD_READ_RAM = 0x0000000f,
-	MB_CMD_STOP_FW = 0x00000014,
-	MB_CMD_MAKE_SYS_ERR = 0x0000002a,
-	MB_CMD_WRITE_SFP = 0x00000030,
-	MB_CMD_READ_SFP = 0x00000031,
-	MB_CMD_INIT_FW = 0x00000060,
-	MB_CMD_GET_IFCB = 0x00000061,
-	MB_CMD_GET_FW_STATE = 0x00000069,
-	MB_CMD_IDC_REQ = 0x00000100,	/* Inter-Driver Communication */
-	MB_CMD_IDC_ACK = 0x00000101,	/* Inter-Driver Communication */
-	MB_CMD_SET_WOL_MODE = 0x00000110,	/* Wake On Lan */
-	MB_WOL_DISABLE = 0,
-	MB_WOL_MAGIC_PKT = (1 << 1),
-	MB_WOL_FLTR = (1 << 2),
-	MB_WOL_UCAST = (1 << 3),
-	MB_WOL_MCAST = (1 << 4),
-	MB_WOL_BCAST = (1 << 5),
-	MB_WOL_LINK_UP = (1 << 6),
-	MB_WOL_LINK_DOWN = (1 << 7),
-	MB_WOL_MODE_ON = (1 << 16),		/* Wake on Lan Mode on */
-	MB_CMD_SET_WOL_FLTR = 0x00000111,	/* Wake On Lan Filter */
-	MB_CMD_CLEAR_WOL_FLTR = 0x00000112, /* Wake On Lan Filter */
-	MB_CMD_SET_WOL_MAGIC = 0x00000113,	/* Wake On Lan Magic Packet */
-	MB_CMD_CLEAR_WOL_MAGIC = 0x00000114,/* Wake On Lan Magic Packet */
-	MB_CMD_SET_WOL_IMMED = 0x00000115,
-	MB_CMD_PORT_RESET = 0x00000120,
-	MB_CMD_SET_PORT_CFG = 0x00000122,
-	MB_CMD_GET_PORT_CFG = 0x00000123,
-	MB_CMD_GET_LINK_STS = 0x00000124,
-	MB_CMD_SET_LED_CFG = 0x00000125, /* Set LED Configuration Register */
-		QL_LED_BLINK = 0x03e803e8,
-	MB_CMD_GET_LED_CFG = 0x00000126, /* Get LED Configuration Register */
-	MB_CMD_SET_MGMNT_TFK_CTL = 0x00000160, /* Set Mgmnt Traffic Control */
-	MB_SET_MPI_TFK_STOP = (1 << 0),
-	MB_SET_MPI_TFK_RESUME = (1 << 1),
-	MB_CMD_GET_MGMNT_TFK_CTL = 0x00000161, /* Get Mgmnt Traffic Control */
-	MB_GET_MPI_TFK_STOPPED = (1 << 0),
-	MB_GET_MPI_TFK_FIFO_EMPTY = (1 << 1),
-	/* Sub-commands for IDC request.
-	 * This describes the reason for the
-	 * IDC request.
-	 */
-	MB_CMD_IOP_NONE = 0x0000,
-	MB_CMD_IOP_PREP_UPDATE_MPI	= 0x0001,
-	MB_CMD_IOP_COMP_UPDATE_MPI	= 0x0002,
-	MB_CMD_IOP_PREP_LINK_DOWN	= 0x0010,
-	MB_CMD_IOP_DVR_START	 = 0x0100,
-	MB_CMD_IOP_FLASH_ACC	 = 0x0101,
-	MB_CMD_IOP_RESTART_MPI	= 0x0102,
-	MB_CMD_IOP_CORE_DUMP_MPI	= 0x0103,
-
-	/* Mailbox Command Status. */
-	MB_CMD_STS_GOOD = 0x00004000,	/* Success. */
-	MB_CMD_STS_INTRMDT = 0x00001000,	/* Intermediate Complete. */
-	MB_CMD_STS_INVLD_CMD = 0x00004001,	/* Invalid. */
-	MB_CMD_STS_XFC_ERR = 0x00004002,	/* Interface Error. */
-	MB_CMD_STS_CSUM_ERR = 0x00004003,	/* Csum Error. */
-	MB_CMD_STS_ERR = 0x00004005,	/* System Error. */
-	MB_CMD_STS_PARAM_ERR = 0x00004006,	/* Parameter Error. */
-};
-
-struct mbox_params {
-	u32 mbox_in[MAILBOX_COUNT];
-	u32 mbox_out[MAILBOX_COUNT];
-	int in_count;
-	int out_count;
-};
-
-struct flash_params_8012 {
-	u8 dev_id_str[4];
-	__le16 size;
-	__le16 csum;
-	__le16 ver;
-	__le16 sub_dev_id;
-	u8 mac_addr[6];
-	__le16 res;
-};
-
-/* 8000 device's flash is a different structure
- * at a different offset in flash.
- */
-#define FUNC0_FLASH_OFFSET 0x140200
-#define FUNC1_FLASH_OFFSET 0x140600
-
-/* Flash related data structures. */
-struct flash_params_8000 {
-	u8 dev_id_str[4];	/* "8000" */
-	__le16 ver;
-	__le16 size;
-	__le16 csum;
-	__le16 reserved0;
-	__le16 total_size;
-	__le16 entry_count;
-	u8 data_type0;
-	u8 data_size0;
-	u8 mac_addr[6];
-	u8 data_type1;
-	u8 data_size1;
-	u8 mac_addr1[6];
-	u8 data_type2;
-	u8 data_size2;
-	__le16 vlan_id;
-	u8 data_type3;
-	u8 data_size3;
-	__le16 last;
-	u8 reserved1[464];
-	__le16	subsys_ven_id;
-	__le16	subsys_dev_id;
-	u8 reserved2[4];
-};
-
-union flash_params {
-	struct flash_params_8012 flash_params_8012;
-	struct flash_params_8000 flash_params_8000;
-};
-
-/*
- * doorbell space for the rx ring context
- */
-struct rx_doorbell_context {
-	u32 cnsmr_idx;		/* 0x00 */
-	u32 valid;		/* 0x04 */
-	u32 reserved[4];	/* 0x08-0x14 */
-	u32 lbq_prod_idx;	/* 0x18 */
-	u32 sbq_prod_idx;	/* 0x1c */
-};
-
-/*
- * doorbell space for the tx ring context
- */
-struct tx_doorbell_context {
-	u32 prod_idx;		/* 0x00 */
-	u32 valid;		/* 0x04 */
-	u32 reserved[4];	/* 0x08-0x14 */
-	u32 lbq_prod_idx;	/* 0x18 */
-	u32 sbq_prod_idx;	/* 0x1c */
-};
-
-/* DATA STRUCTURES SHARED WITH HARDWARE. */
-struct tx_buf_desc {
-	__le64 addr;
-	__le32 len;
-#define TX_DESC_LEN_MASK	0x000fffff
-#define TX_DESC_C	0x40000000
-#define TX_DESC_E	0x80000000
-} __packed;
-
-/*
- * IOCB Definitions...
- */
-
-#define OPCODE_OB_MAC_IOCB 			0x01
-#define OPCODE_OB_MAC_TSO_IOCB		0x02
-#define OPCODE_IB_MAC_IOCB			0x20
-#define OPCODE_IB_MPI_IOCB			0x21
-#define OPCODE_IB_AE_IOCB			0x3f
-
-struct ob_mac_iocb_req {
-	u8 opcode;
-	u8 flags1;
-#define OB_MAC_IOCB_REQ_OI	0x01
-#define OB_MAC_IOCB_REQ_I	0x02
-#define OB_MAC_IOCB_REQ_D	0x08
-#define OB_MAC_IOCB_REQ_F	0x10
-	u8 flags2;
-	u8 flags3;
-#define OB_MAC_IOCB_DFP	0x02
-#define OB_MAC_IOCB_V	0x04
-	__le32 reserved1[2];
-	__le16 frame_len;
-#define OB_MAC_IOCB_LEN_MASK 0x3ffff
-	__le16 reserved2;
-	u32 tid;
-	u32 txq_idx;
-	__le32 reserved3;
-	__le16 vlan_tci;
-	__le16 reserved4;
-	struct tx_buf_desc tbd[TX_DESC_PER_IOCB];
-} __packed;
-
-struct ob_mac_iocb_rsp {
-	u8 opcode;		/* */
-	u8 flags1;		/* */
-#define OB_MAC_IOCB_RSP_OI	0x01	/* */
-#define OB_MAC_IOCB_RSP_I	0x02	/* */
-#define OB_MAC_IOCB_RSP_E	0x08	/* */
-#define OB_MAC_IOCB_RSP_S	0x10	/* too Short */
-#define OB_MAC_IOCB_RSP_L	0x20	/* too Large */
-#define OB_MAC_IOCB_RSP_P	0x40	/* Padded */
-	u8 flags2;		/* */
-	u8 flags3;		/* */
-#define OB_MAC_IOCB_RSP_B	0x80	/* */
-	u32 tid;
-	u32 txq_idx;
-	__le32 reserved[13];
-} __packed;
-
-struct ob_mac_tso_iocb_req {
-	u8 opcode;
-	u8 flags1;
-#define OB_MAC_TSO_IOCB_OI	0x01
-#define OB_MAC_TSO_IOCB_I	0x02
-#define OB_MAC_TSO_IOCB_D	0x08
-#define OB_MAC_TSO_IOCB_IP4	0x40
-#define OB_MAC_TSO_IOCB_IP6	0x80
-	u8 flags2;
-#define OB_MAC_TSO_IOCB_LSO	0x20
-#define OB_MAC_TSO_IOCB_UC	0x40
-#define OB_MAC_TSO_IOCB_TC	0x80
-	u8 flags3;
-#define OB_MAC_TSO_IOCB_IC	0x01
-#define OB_MAC_TSO_IOCB_DFP	0x02
-#define OB_MAC_TSO_IOCB_V	0x04
-	__le32 reserved1[2];
-	__le32 frame_len;
-	u32 tid;
-	u32 txq_idx;
-	__le16 total_hdrs_len;
-	__le16 net_trans_offset;
-#define OB_MAC_TRANSPORT_HDR_SHIFT 6
-	__le16 vlan_tci;
-	__le16 mss;
-	struct tx_buf_desc tbd[TX_DESC_PER_IOCB];
-} __packed;
-
-struct ob_mac_tso_iocb_rsp {
-	u8 opcode;
-	u8 flags1;
-#define OB_MAC_TSO_IOCB_RSP_OI	0x01
-#define OB_MAC_TSO_IOCB_RSP_I	0x02
-#define OB_MAC_TSO_IOCB_RSP_E	0x08
-#define OB_MAC_TSO_IOCB_RSP_S	0x10
-#define OB_MAC_TSO_IOCB_RSP_L	0x20
-#define OB_MAC_TSO_IOCB_RSP_P	0x40
-	u8 flags2;		/* */
-	u8 flags3;		/* */
-#define OB_MAC_TSO_IOCB_RSP_B	0x8000
-	u32 tid;
-	u32 txq_idx;
-	__le32 reserved2[13];
-} __packed;
-
-struct ib_mac_iocb_rsp {
-	u8 opcode;		/* 0x20 */
-	u8 flags1;
-#define IB_MAC_IOCB_RSP_OI	0x01	/* Override intr delay */
-#define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
-#define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
-#define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
-#define IB_MAC_IOCB_RSP_NU	0x08	/* No checksum rcvd */
-#define IB_MAC_IOCB_RSP_IE	0x10	/* IPv4 checksum error */
-#define IB_MAC_IOCB_RSP_M_MASK	0x60	/* Multicast info */
-#define IB_MAC_IOCB_RSP_M_NONE	0x00	/* Not mcast frame */
-#define IB_MAC_IOCB_RSP_M_HASH	0x20	/* HASH mcast frame */
-#define IB_MAC_IOCB_RSP_M_REG 	0x40	/* Registered mcast frame */
-#define IB_MAC_IOCB_RSP_M_PROM 	0x60	/* Promiscuous mcast frame */
-#define IB_MAC_IOCB_RSP_B	0x80	/* Broadcast frame */
-	u8 flags2;
-#define IB_MAC_IOCB_RSP_P	0x01	/* Promiscuous frame */
-#define IB_MAC_IOCB_RSP_V	0x02	/* Vlan tag present */
-#define IB_MAC_IOCB_RSP_ERR_MASK	0x1c	/*  */
-#define IB_MAC_IOCB_RSP_ERR_CODE_ERR	0x04
-#define IB_MAC_IOCB_RSP_ERR_OVERSIZE	0x08
-#define IB_MAC_IOCB_RSP_ERR_UNDERSIZE	0x10
-#define IB_MAC_IOCB_RSP_ERR_PREAMBLE	0x14
-#define IB_MAC_IOCB_RSP_ERR_FRAME_LEN	0x18
-#define IB_MAC_IOCB_RSP_ERR_CRC		0x1c
-#define IB_MAC_IOCB_RSP_U	0x20	/* UDP packet */
-#define IB_MAC_IOCB_RSP_T	0x40	/* TCP packet */
-#define IB_MAC_IOCB_RSP_FO	0x80	/* Failover port */
-	u8 flags3;
-#define IB_MAC_IOCB_RSP_RSS_MASK	0x07	/* RSS mask */
-#define IB_MAC_IOCB_RSP_M_NONE	0x00	/* No RSS match */
-#define IB_MAC_IOCB_RSP_M_IPV4	0x04	/* IPv4 RSS match */
-#define IB_MAC_IOCB_RSP_M_IPV6	0x02	/* IPv6 RSS match */
-#define IB_MAC_IOCB_RSP_M_TCP_V4 	0x05	/* TCP with IPv4 */
-#define IB_MAC_IOCB_RSP_M_TCP_V6 	0x03	/* TCP with IPv6 */
-#define IB_MAC_IOCB_RSP_V4	0x08	/* IPV4 */
-#define IB_MAC_IOCB_RSP_V6	0x10	/* IPV6 */
-#define IB_MAC_IOCB_RSP_IH	0x20	/* Split after IP header */
-#define IB_MAC_IOCB_RSP_DS	0x40	/* data is in small buffer */
-#define IB_MAC_IOCB_RSP_DL	0x80	/* data is in large buffer */
-	__le32 data_len;	/* */
-	__le64 data_addr;	/* */
-	__le32 rss;		/* */
-	__le16 vlan_id;		/* 12 bits */
-#define IB_MAC_IOCB_RSP_C	0x1000	/* VLAN CFI bit */
-#define IB_MAC_IOCB_RSP_COS_SHIFT	12	/* class of service value */
-#define IB_MAC_IOCB_RSP_VLAN_MASK	0x0ffff
-
-	__le16 reserved1;
-	__le32 reserved2[6];
-	u8 reserved3[3];
-	u8 flags4;
-#define IB_MAC_IOCB_RSP_HV	0x20
-#define IB_MAC_IOCB_RSP_HS	0x40
-#define IB_MAC_IOCB_RSP_HL	0x80
-	__le32 hdr_len;		/* */
-	__le64 hdr_addr;	/* */
-} __packed;
-
-struct ib_ae_iocb_rsp {
-	u8 opcode;
-	u8 flags1;
-#define IB_AE_IOCB_RSP_OI		0x01
-#define IB_AE_IOCB_RSP_I		0x02
-	u8 event;
-#define LINK_UP_EVENT              0x00
-#define LINK_DOWN_EVENT            0x01
-#define CAM_LOOKUP_ERR_EVENT       0x06
-#define SOFT_ECC_ERROR_EVENT       0x07
-#define MGMT_ERR_EVENT             0x08
-#define TEN_GIG_MAC_EVENT          0x09
-#define GPI0_H2L_EVENT       	0x10
-#define GPI0_L2H_EVENT       	0x20
-#define GPI1_H2L_EVENT       	0x11
-#define GPI1_L2H_EVENT       	0x21
-#define PCI_ERR_ANON_BUF_RD        0x40
-	u8 q_id;
-	__le32 reserved[15];
-} __packed;
-
-/*
- * These three structures are for generic
- * handling of ib and ob iocbs.
- */
-struct ql_net_rsp_iocb {
-	u8 opcode;
-	u8 flags0;
-	__le16 length;
-	__le32 tid;
-	__le32 reserved[14];
-} __packed;
-
-struct net_req_iocb {
-	u8 opcode;
-	u8 flags0;
-	__le16 flags1;
-	__le32 tid;
-	__le32 reserved1[30];
-} __packed;
-
-/*
- * tx ring initialization control block for chip.
- * It is defined as:
- * "Work Queue Initialization Control Block"
- */
-struct wqicb {
-	__le16 len;
-#define Q_LEN_V		(1 << 4)
-#define Q_LEN_CPP_CONT	0x0000
-#define Q_LEN_CPP_16	0x0001
-#define Q_LEN_CPP_32	0x0002
-#define Q_LEN_CPP_64	0x0003
-#define Q_LEN_CPP_512	0x0006
-	__le16 flags;
-#define Q_PRI_SHIFT	1
-#define Q_FLAGS_LC	0x1000
-#define Q_FLAGS_LB	0x2000
-#define Q_FLAGS_LI	0x4000
-#define Q_FLAGS_LO	0x8000
-	__le16 cq_id_rss;
-#define Q_CQ_ID_RSS_RV 0x8000
-	__le16 rid;
-	__le64 addr;
-	__le64 cnsmr_idx_addr;
-} __packed;
-
-/*
- * rx ring initialization control block for chip.
- * It is defined as:
- * "Completion Queue Initialization Control Block"
- */
-struct cqicb {
-	u8 msix_vect;
-	u8 reserved1;
-	u8 reserved2;
-	u8 flags;
-#define FLAGS_LV	0x08
-#define FLAGS_LS	0x10
-#define FLAGS_LL	0x20
-#define FLAGS_LI	0x40
-#define FLAGS_LC	0x80
-	__le16 len;
-#define LEN_V		(1 << 4)
-#define LEN_CPP_CONT	0x0000
-#define LEN_CPP_32	0x0001
-#define LEN_CPP_64	0x0002
-#define LEN_CPP_128	0x0003
-	__le16 rid;
-	__le64 addr;
-	__le64 prod_idx_addr;
-	__le16 pkt_delay;
-	__le16 irq_delay;
-	__le64 lbq_addr;
-	__le16 lbq_buf_size;
-	__le16 lbq_len;		/* entry count */
-	__le64 sbq_addr;
-	__le16 sbq_buf_size;
-	__le16 sbq_len;		/* entry count */
-} __packed;
-
-struct ricb {
-	u8 base_cq;
-#define RSS_L4K 0x80
-	u8 flags;
-#define RSS_L6K 0x01
-#define RSS_LI  0x02
-#define RSS_LB  0x04
-#define RSS_LM  0x08
-#define RSS_RI4 0x10
-#define RSS_RT4 0x20
-#define RSS_RI6 0x40
-#define RSS_RT6 0x80
-	__le16 mask;
-	u8 hash_cq_id[1024];
-	__le32 ipv6_hash_key[10];
-	__le32 ipv4_hash_key[4];
-} __packed;
-
-/* SOFTWARE/DRIVER DATA STRUCTURES. */
-
-struct oal {
-	struct tx_buf_desc oal[TX_DESC_PER_OAL];
-};
-
-struct map_list {
-	DEFINE_DMA_UNMAP_ADDR(mapaddr);
-	DEFINE_DMA_UNMAP_LEN(maplen);
-};
-
-struct tx_ring_desc {
-	struct sk_buff *skb;
-	struct ob_mac_iocb_req *queue_entry;
-	u32 index;
-	struct oal oal;
-	struct map_list map[MAX_SKB_FRAGS + 2];
-	int map_cnt;
-	struct tx_ring_desc *next;
-};
-
-struct page_chunk {
-	struct page *page;	/* master page */
-	char *va;		/* virt addr for this chunk */
-	u64 map;		/* mapping for master */
-	unsigned int offset;	/* offset for this chunk */
-	unsigned int last_flag; /* flag set for last chunk in page */
-};
-
-struct bq_desc {
-	union {
-		struct page_chunk pg_chunk;
-		struct sk_buff *skb;
-	} p;
-	__le64 *addr;
-	u32 index;
-	DEFINE_DMA_UNMAP_ADDR(mapaddr);
-	DEFINE_DMA_UNMAP_LEN(maplen);
-};
-
-#define QL_TXQ_IDX(qdev, skb) (smp_processor_id()%(qdev->tx_ring_count))
-
-struct tx_ring {
-	/*
-	 * queue info.
-	 */
-	struct wqicb wqicb;	/* structure used to inform chip of new queue */
-	void *wq_base;		/* pci_alloc:virtual addr for tx */
-	dma_addr_t wq_base_dma;	/* pci_alloc:dma addr for tx */
-	__le32 *cnsmr_idx_sh_reg;	/* shadow copy of consumer idx */
-	dma_addr_t cnsmr_idx_sh_reg_dma;	/* dma-shadow copy of consumer */
-	u32 wq_size;		/* size in bytes of queue area */
-	u32 wq_len;		/* number of entries in queue */
-	void __iomem *prod_idx_db_reg;	/* doorbell area index reg at offset 0x00 */
-	void __iomem *valid_db_reg;	/* doorbell area valid reg at offset 0x04 */
-	u16 prod_idx;		/* current value for prod idx */
-	u16 cq_id;		/* completion (rx) queue for tx completions */
-	u8 wq_id;		/* queue id for this entry */
-	u8 reserved1[3];
-	struct tx_ring_desc *q;	/* descriptor list for the queue */
-	spinlock_t lock;
-	atomic_t tx_count;	/* counts down for every outstanding IO */
-	struct delayed_work tx_work;
-	struct ql_adapter *qdev;
-	u64 tx_packets;
-	u64 tx_bytes;
-	u64 tx_errors;
-};
-
-/*
- * Type of inbound queue.
- */
-enum {
-	DEFAULT_Q = 2,		/* Handles slow queue and chip/MPI events. */
-	TX_Q = 3,		/* Handles outbound completions. */
-	RX_Q = 4,		/* Handles inbound completions. */
-};
-
-struct rx_ring {
-	struct cqicb cqicb;	/* The chip's completion queue init control block. */
-
-	/* Completion queue elements. */
-	void *cq_base;
-	dma_addr_t cq_base_dma;
-	u32 cq_size;
-	u32 cq_len;
-	u16 cq_id;
-	__le32 *prod_idx_sh_reg;	/* Shadowed producer register. */
-	dma_addr_t prod_idx_sh_reg_dma;
-	void __iomem *cnsmr_idx_db_reg;	/* PCI doorbell mem area + 0 */
-	u32 cnsmr_idx;		/* current sw idx */
-	struct ql_net_rsp_iocb *curr_entry;	/* next entry on queue */
-	void __iomem *valid_db_reg;	/* PCI doorbell mem area + 0x04 */
-
-	/* Large buffer queue elements. */
-	u32 lbq_len;		/* entry count */
-	u32 lbq_size;		/* size in bytes of queue */
-	u32 lbq_buf_size;
-	void *lbq_base;
-	dma_addr_t lbq_base_dma;
-	void *lbq_base_indirect;
-	dma_addr_t lbq_base_indirect_dma;
-	struct page_chunk pg_chunk; /* current page for chunks */
-	struct bq_desc *lbq;	/* array of control blocks */
-	void __iomem *lbq_prod_idx_db_reg;	/* PCI doorbell mem area + 0x18 */
-	u32 lbq_prod_idx;	/* current sw prod idx */
-	u32 lbq_curr_idx;	/* next entry we expect */
-	u32 lbq_clean_idx;	/* beginning of new descs */
-	u32 lbq_free_cnt;	/* free buffer desc cnt */
-
-	/* Small buffer queue elements. */
-	u32 sbq_len;		/* entry count */
-	u32 sbq_size;		/* size in bytes of queue */
-	u32 sbq_buf_size;
-	void *sbq_base;
-	dma_addr_t sbq_base_dma;
-	void *sbq_base_indirect;
-	dma_addr_t sbq_base_indirect_dma;
-	struct bq_desc *sbq;	/* array of control blocks */
-	void __iomem *sbq_prod_idx_db_reg; /* PCI doorbell mem area + 0x1c */
-	u32 sbq_prod_idx;	/* current sw prod idx */
-	u32 sbq_curr_idx;	/* next entry we expect */
-	u32 sbq_clean_idx;	/* beginning of new descs */
-	u32 sbq_free_cnt;	/* free buffer desc cnt */
-
-	/* Misc. handler elements. */
-	u32 type;		/* Type of queue, tx, rx. */
-	u32 irq;		/* Which vector this ring is assigned. */
-	u32 cpu;		/* Which CPU this should run on. */
-	char name[IFNAMSIZ + 5];
-	struct napi_struct napi;
-	u8 reserved;
-	struct ql_adapter *qdev;
-	u64 rx_packets;
-	u64 rx_multicast;
-	u64 rx_bytes;
-	u64 rx_dropped;
-	u64 rx_errors;
-};
-
-/*
- * RSS Initialization Control Block
- */
-struct hash_id {
-	u8 value[4];
-};
-
-struct nic_stats {
-	/*
-	 * These stats come from offset 200h to 278h
-	 * in the XGMAC register.
-	 */
-	u64 tx_pkts;
-	u64 tx_bytes;
-	u64 tx_mcast_pkts;
-	u64 tx_bcast_pkts;
-	u64 tx_ucast_pkts;
-	u64 tx_ctl_pkts;
-	u64 tx_pause_pkts;
-	u64 tx_64_pkt;
-	u64 tx_65_to_127_pkt;
-	u64 tx_128_to_255_pkt;
-	u64 tx_256_511_pkt;
-	u64 tx_512_to_1023_pkt;
-	u64 tx_1024_to_1518_pkt;
-	u64 tx_1519_to_max_pkt;
-	u64 tx_undersize_pkt;
-	u64 tx_oversize_pkt;
-
-	/*
-	 * These stats come from offset 300h to 3C8h
-	 * in the XGMAC register.
-	 */
-	u64 rx_bytes;
-	u64 rx_bytes_ok;
-	u64 rx_pkts;
-	u64 rx_pkts_ok;
-	u64 rx_bcast_pkts;
-	u64 rx_mcast_pkts;
-	u64 rx_ucast_pkts;
-	u64 rx_undersize_pkts;
-	u64 rx_oversize_pkts;
-	u64 rx_jabber_pkts;
-	u64 rx_undersize_fcerr_pkts;
-	u64 rx_drop_events;
-	u64 rx_fcerr_pkts;
-	u64 rx_align_err;
-	u64 rx_symbol_err;
-	u64 rx_mac_err;
-	u64 rx_ctl_pkts;
-	u64 rx_pause_pkts;
-	u64 rx_64_pkts;
-	u64 rx_65_to_127_pkts;
-	u64 rx_128_255_pkts;
-	u64 rx_256_511_pkts;
-	u64 rx_512_to_1023_pkts;
-	u64 rx_1024_to_1518_pkts;
-	u64 rx_1519_to_max_pkts;
-	u64 rx_len_err_pkts;
-	/* Receive Mac Err stats */
-	u64 rx_code_err;
-	u64 rx_oversize_err;
-	u64 rx_undersize_err;
-	u64 rx_preamble_err;
-	u64 rx_frame_len_err;
-	u64 rx_crc_err;
-	u64 rx_err_count;
-	/*
-	 * These stats come from offset 500h to 5C8h
-	 * in the XGMAC register.
-	 */
-	u64 tx_cbfc_pause_frames0;
-	u64 tx_cbfc_pause_frames1;
-	u64 tx_cbfc_pause_frames2;
-	u64 tx_cbfc_pause_frames3;
-	u64 tx_cbfc_pause_frames4;
-	u64 tx_cbfc_pause_frames5;
-	u64 tx_cbfc_pause_frames6;
-	u64 tx_cbfc_pause_frames7;
-	u64 rx_cbfc_pause_frames0;
-	u64 rx_cbfc_pause_frames1;
-	u64 rx_cbfc_pause_frames2;
-	u64 rx_cbfc_pause_frames3;
-	u64 rx_cbfc_pause_frames4;
-	u64 rx_cbfc_pause_frames5;
-	u64 rx_cbfc_pause_frames6;
-	u64 rx_cbfc_pause_frames7;
-	u64 rx_nic_fifo_drop;
-};
-
-/* Firmware coredump internal register address/length pairs. */
-enum {
-	MPI_CORE_REGS_ADDR = 0x00030000,
-	MPI_CORE_REGS_CNT = 127,
-	MPI_CORE_SH_REGS_CNT = 16,
-	TEST_REGS_ADDR = 0x00001000,
-	TEST_REGS_CNT = 23,
-	RMII_REGS_ADDR = 0x00001040,
-	RMII_REGS_CNT = 64,
-	FCMAC1_REGS_ADDR = 0x00001080,
-	FCMAC2_REGS_ADDR = 0x000010c0,
-	FCMAC_REGS_CNT = 64,
-	FC1_MBX_REGS_ADDR = 0x00001100,
-	FC2_MBX_REGS_ADDR = 0x00001240,
-	FC_MBX_REGS_CNT = 64,
-	IDE_REGS_ADDR = 0x00001140,
-	IDE_REGS_CNT = 64,
-	NIC1_MBX_REGS_ADDR = 0x00001180,
-	NIC2_MBX_REGS_ADDR = 0x00001280,
-	NIC_MBX_REGS_CNT = 64,
-	SMBUS_REGS_ADDR = 0x00001200,
-	SMBUS_REGS_CNT = 64,
-	I2C_REGS_ADDR = 0x00001fc0,
-	I2C_REGS_CNT = 64,
-	MEMC_REGS_ADDR = 0x00003000,
-	MEMC_REGS_CNT = 256,
-	PBUS_REGS_ADDR = 0x00007c00,
-	PBUS_REGS_CNT = 256,
-	MDE_REGS_ADDR = 0x00010000,
-	MDE_REGS_CNT = 6,
-	CODE_RAM_ADDR = 0x00020000,
-	CODE_RAM_CNT = 0x2000,
-	MEMC_RAM_ADDR = 0x00100000,
-	MEMC_RAM_CNT = 0x2000,
-};
-
-#define MPI_COREDUMP_COOKIE 0x5555aaaa
-struct mpi_coredump_global_header {
-	u32	cookie;
-	u8	idString[16];
-	u32	timeLo;
-	u32	timeHi;
-	u32	imageSize;
-	u32	headerSize;
-	u8	info[220];
-};
-
-struct mpi_coredump_segment_header {
-	u32	cookie;
-	u32	segNum;
-	u32	segSize;
-	u32	extra;
-	u8	description[16];
-};
-
-/* Firmware coredump header segment numbers. */
-enum {
-	CORE_SEG_NUM = 1,
-	TEST_LOGIC_SEG_NUM = 2,
-	RMII_SEG_NUM = 3,
-	FCMAC1_SEG_NUM = 4,
-	FCMAC2_SEG_NUM = 5,
-	FC1_MBOX_SEG_NUM = 6,
-	IDE_SEG_NUM = 7,
-	NIC1_MBOX_SEG_NUM = 8,
-	SMBUS_SEG_NUM = 9,
-	FC2_MBOX_SEG_NUM = 10,
-	NIC2_MBOX_SEG_NUM = 11,
-	I2C_SEG_NUM = 12,
-	MEMC_SEG_NUM = 13,
-	PBUS_SEG_NUM = 14,
-	MDE_SEG_NUM = 15,
-	NIC1_CONTROL_SEG_NUM = 16,
-	NIC2_CONTROL_SEG_NUM = 17,
-	NIC1_XGMAC_SEG_NUM = 18,
-	NIC2_XGMAC_SEG_NUM = 19,
-	WCS_RAM_SEG_NUM = 20,
-	MEMC_RAM_SEG_NUM = 21,
-	XAUI_AN_SEG_NUM = 22,
-	XAUI_HSS_PCS_SEG_NUM = 23,
-	XFI_AN_SEG_NUM = 24,
-	XFI_TRAIN_SEG_NUM = 25,
-	XFI_HSS_PCS_SEG_NUM = 26,
-	XFI_HSS_TX_SEG_NUM = 27,
-	XFI_HSS_RX_SEG_NUM = 28,
-	XFI_HSS_PLL_SEG_NUM = 29,
-	MISC_NIC_INFO_SEG_NUM = 30,
-	INTR_STATES_SEG_NUM = 31,
-	CAM_ENTRIES_SEG_NUM = 32,
-	ROUTING_WORDS_SEG_NUM = 33,
-	ETS_SEG_NUM = 34,
-	PROBE_DUMP_SEG_NUM = 35,
-	ROUTING_INDEX_SEG_NUM = 36,
-	MAC_PROTOCOL_SEG_NUM = 37,
-	XAUI2_AN_SEG_NUM = 38,
-	XAUI2_HSS_PCS_SEG_NUM = 39,
-	XFI2_AN_SEG_NUM = 40,
-	XFI2_TRAIN_SEG_NUM = 41,
-	XFI2_HSS_PCS_SEG_NUM = 42,
-	XFI2_HSS_TX_SEG_NUM = 43,
-	XFI2_HSS_RX_SEG_NUM = 44,
-	XFI2_HSS_PLL_SEG_NUM = 45,
-	SEM_REGS_SEG_NUM = 50
-
-};
-
-/* There are 64 generic NIC registers. */
-#define NIC_REGS_DUMP_WORD_COUNT		64
-/* XGMAC word count. */
-#define XGMAC_DUMP_WORD_COUNT		(XGMAC_REGISTER_END / 4)
-/* Word counts for the SERDES blocks. */
-#define XG_SERDES_XAUI_AN_COUNT		14
-#define XG_SERDES_XAUI_HSS_PCS_COUNT	33
-#define XG_SERDES_XFI_AN_COUNT		14
-#define XG_SERDES_XFI_TRAIN_COUNT		12
-#define XG_SERDES_XFI_HSS_PCS_COUNT	15
-#define XG_SERDES_XFI_HSS_TX_COUNT		32
-#define XG_SERDES_XFI_HSS_RX_COUNT		32
-#define XG_SERDES_XFI_HSS_PLL_COUNT	32
-
-/* There are 2 CNA ETS and 8 NIC ETS registers. */
-#define ETS_REGS_DUMP_WORD_COUNT		10
-
-/* Each probe mux entry stores the probe type plus 64 entries
- * that are each each 64-bits in length. There are a total of
- * 34 (PRB_MX_ADDR_VALID_TOTAL) valid probes.
- */
-#define PRB_MX_ADDR_PRB_WORD_COUNT		(1 + (PRB_MX_ADDR_MAX_MUX * 2))
-#define PRB_MX_DUMP_TOT_COUNT		(PRB_MX_ADDR_PRB_WORD_COUNT * \
-							PRB_MX_ADDR_VALID_TOTAL)
-/* Each routing entry consists of 4 32-bit words.
- * They are route type, index, index word, and result.
- * There are 2 route blocks with 8 entries each and
- *  2 NIC blocks with 16 entries each.
- * The totol entries is 48 with 4 words each.
- */
-#define RT_IDX_DUMP_ENTRIES			48
-#define RT_IDX_DUMP_WORDS_PER_ENTRY	4
-#define RT_IDX_DUMP_TOT_WORDS		(RT_IDX_DUMP_ENTRIES * \
-						RT_IDX_DUMP_WORDS_PER_ENTRY)
-/* There are 10 address blocks in filter, each with
- * different entry counts and different word-count-per-entry.
- */
-#define MAC_ADDR_DUMP_ENTRIES \
-	((MAC_ADDR_MAX_CAM_ENTRIES * MAC_ADDR_MAX_CAM_WCOUNT) + \
-	(MAC_ADDR_MAX_MULTICAST_ENTRIES * MAC_ADDR_MAX_MULTICAST_WCOUNT) + \
-	(MAC_ADDR_MAX_VLAN_ENTRIES * MAC_ADDR_MAX_VLAN_WCOUNT) + \
-	(MAC_ADDR_MAX_MCAST_FLTR_ENTRIES * MAC_ADDR_MAX_MCAST_FLTR_WCOUNT) + \
-	(MAC_ADDR_MAX_FC_MAC_ENTRIES * MAC_ADDR_MAX_FC_MAC_WCOUNT) + \
-	(MAC_ADDR_MAX_MGMT_MAC_ENTRIES * MAC_ADDR_MAX_MGMT_MAC_WCOUNT) + \
-	(MAC_ADDR_MAX_MGMT_VLAN_ENTRIES * MAC_ADDR_MAX_MGMT_VLAN_WCOUNT) + \
-	(MAC_ADDR_MAX_MGMT_V4_ENTRIES * MAC_ADDR_MAX_MGMT_V4_WCOUNT) + \
-	(MAC_ADDR_MAX_MGMT_V6_ENTRIES * MAC_ADDR_MAX_MGMT_V6_WCOUNT) + \
-	(MAC_ADDR_MAX_MGMT_TU_DP_ENTRIES * MAC_ADDR_MAX_MGMT_TU_DP_WCOUNT))
-#define MAC_ADDR_DUMP_WORDS_PER_ENTRY	2
-#define MAC_ADDR_DUMP_TOT_WORDS		(MAC_ADDR_DUMP_ENTRIES * \
-						MAC_ADDR_DUMP_WORDS_PER_ENTRY)
-/* Maximum of 4 functions whose semaphore registeres are
- * in the coredump.
- */
-#define MAX_SEMAPHORE_FUNCTIONS		4
-/* Defines for access the MPI shadow registers. */
-#define RISC_124		0x0003007c
-#define RISC_127		0x0003007f
-#define SHADOW_OFFSET	0xb0000000
-#define SHADOW_REG_SHIFT	20
-
-struct ql_nic_misc {
-	u32 rx_ring_count;
-	u32 tx_ring_count;
-	u32 intr_count;
-	u32 function;
-};
-
-struct ql_reg_dump {
-
-	/* segment 0 */
-	struct mpi_coredump_global_header mpi_global_header;
-
-	/* segment 16 */
-	struct mpi_coredump_segment_header nic_regs_seg_hdr;
-	u32 nic_regs[64];
-
-	/* segment 30 */
-	struct mpi_coredump_segment_header misc_nic_seg_hdr;
-	struct ql_nic_misc misc_nic_info;
-
-	/* segment 31 */
-	/* one interrupt state for each CQ */
-	struct mpi_coredump_segment_header intr_states_seg_hdr;
-	u32 intr_states[MAX_CPUS];
-
-	/* segment 32 */
-	/* 3 cam words each for 16 unicast,
-	 * 2 cam words for each of 32 multicast.
-	 */
-	struct mpi_coredump_segment_header cam_entries_seg_hdr;
-	u32 cam_entries[(16 * 3) + (32 * 3)];
-
-	/* segment 33 */
-	struct mpi_coredump_segment_header nic_routing_words_seg_hdr;
-	u32 nic_routing_words[16];
-
-	/* segment 34 */
-	struct mpi_coredump_segment_header ets_seg_hdr;
-	u32 ets[8+2];
-};
-
-struct ql_mpi_coredump {
-	/* segment 0 */
-	struct mpi_coredump_global_header mpi_global_header;
-
-	/* segment 1 */
-	struct mpi_coredump_segment_header core_regs_seg_hdr;
-	u32 mpi_core_regs[MPI_CORE_REGS_CNT];
-	u32 mpi_core_sh_regs[MPI_CORE_SH_REGS_CNT];
-
-	/* segment 2 */
-	struct mpi_coredump_segment_header test_logic_regs_seg_hdr;
-	u32 test_logic_regs[TEST_REGS_CNT];
-
-	/* segment 3 */
-	struct mpi_coredump_segment_header rmii_regs_seg_hdr;
-	u32 rmii_regs[RMII_REGS_CNT];
-
-	/* segment 4 */
-	struct mpi_coredump_segment_header fcmac1_regs_seg_hdr;
-	u32 fcmac1_regs[FCMAC_REGS_CNT];
-
-	/* segment 5 */
-	struct mpi_coredump_segment_header fcmac2_regs_seg_hdr;
-	u32 fcmac2_regs[FCMAC_REGS_CNT];
-
-	/* segment 6 */
-	struct mpi_coredump_segment_header fc1_mbx_regs_seg_hdr;
-	u32 fc1_mbx_regs[FC_MBX_REGS_CNT];
-
-	/* segment 7 */
-	struct mpi_coredump_segment_header ide_regs_seg_hdr;
-	u32 ide_regs[IDE_REGS_CNT];
-
-	/* segment 8 */
-	struct mpi_coredump_segment_header nic1_mbx_regs_seg_hdr;
-	u32 nic1_mbx_regs[NIC_MBX_REGS_CNT];
-
-	/* segment 9 */
-	struct mpi_coredump_segment_header smbus_regs_seg_hdr;
-	u32 smbus_regs[SMBUS_REGS_CNT];
-
-	/* segment 10 */
-	struct mpi_coredump_segment_header fc2_mbx_regs_seg_hdr;
-	u32 fc2_mbx_regs[FC_MBX_REGS_CNT];
-
-	/* segment 11 */
-	struct mpi_coredump_segment_header nic2_mbx_regs_seg_hdr;
-	u32 nic2_mbx_regs[NIC_MBX_REGS_CNT];
-
-	/* segment 12 */
-	struct mpi_coredump_segment_header i2c_regs_seg_hdr;
-	u32 i2c_regs[I2C_REGS_CNT];
-	/* segment 13 */
-	struct mpi_coredump_segment_header memc_regs_seg_hdr;
-	u32 memc_regs[MEMC_REGS_CNT];
-
-	/* segment 14 */
-	struct mpi_coredump_segment_header pbus_regs_seg_hdr;
-	u32 pbus_regs[PBUS_REGS_CNT];
-
-	/* segment 15 */
-	struct mpi_coredump_segment_header mde_regs_seg_hdr;
-	u32 mde_regs[MDE_REGS_CNT];
-
-	/* segment 16 */
-	struct mpi_coredump_segment_header nic_regs_seg_hdr;
-	u32 nic_regs[NIC_REGS_DUMP_WORD_COUNT];
-
-	/* segment 17 */
-	struct mpi_coredump_segment_header nic2_regs_seg_hdr;
-	u32 nic2_regs[NIC_REGS_DUMP_WORD_COUNT];
-
-	/* segment 18 */
-	struct mpi_coredump_segment_header xgmac1_seg_hdr;
-	u32 xgmac1[XGMAC_DUMP_WORD_COUNT];
-
-	/* segment 19 */
-	struct mpi_coredump_segment_header xgmac2_seg_hdr;
-	u32 xgmac2[XGMAC_DUMP_WORD_COUNT];
-
-	/* segment 20 */
-	struct mpi_coredump_segment_header code_ram_seg_hdr;
-	u32 code_ram[CODE_RAM_CNT];
-
-	/* segment 21 */
-	struct mpi_coredump_segment_header memc_ram_seg_hdr;
-	u32 memc_ram[MEMC_RAM_CNT];
-
-	/* segment 22 */
-	struct mpi_coredump_segment_header xaui_an_hdr;
-	u32 serdes_xaui_an[XG_SERDES_XAUI_AN_COUNT];
-
-	/* segment 23 */
-	struct mpi_coredump_segment_header xaui_hss_pcs_hdr;
-	u32 serdes_xaui_hss_pcs[XG_SERDES_XAUI_HSS_PCS_COUNT];
-
-	/* segment 24 */
-	struct mpi_coredump_segment_header xfi_an_hdr;
-	u32 serdes_xfi_an[XG_SERDES_XFI_AN_COUNT];
-
-	/* segment 25 */
-	struct mpi_coredump_segment_header xfi_train_hdr;
-	u32 serdes_xfi_train[XG_SERDES_XFI_TRAIN_COUNT];
-
-	/* segment 26 */
-	struct mpi_coredump_segment_header xfi_hss_pcs_hdr;
-	u32 serdes_xfi_hss_pcs[XG_SERDES_XFI_HSS_PCS_COUNT];
-
-	/* segment 27 */
-	struct mpi_coredump_segment_header xfi_hss_tx_hdr;
-	u32 serdes_xfi_hss_tx[XG_SERDES_XFI_HSS_TX_COUNT];
-
-	/* segment 28 */
-	struct mpi_coredump_segment_header xfi_hss_rx_hdr;
-	u32 serdes_xfi_hss_rx[XG_SERDES_XFI_HSS_RX_COUNT];
-
-	/* segment 29 */
-	struct mpi_coredump_segment_header xfi_hss_pll_hdr;
-	u32 serdes_xfi_hss_pll[XG_SERDES_XFI_HSS_PLL_COUNT];
-
-	/* segment 30 */
-	struct mpi_coredump_segment_header misc_nic_seg_hdr;
-	struct ql_nic_misc misc_nic_info;
-
-	/* segment 31 */
-	/* one interrupt state for each CQ */
-	struct mpi_coredump_segment_header intr_states_seg_hdr;
-	u32 intr_states[MAX_RX_RINGS];
-
-	/* segment 32 */
-	/* 3 cam words each for 16 unicast,
-	 * 2 cam words for each of 32 multicast.
-	 */
-	struct mpi_coredump_segment_header cam_entries_seg_hdr;
-	u32 cam_entries[(16 * 3) + (32 * 3)];
-
-	/* segment 33 */
-	struct mpi_coredump_segment_header nic_routing_words_seg_hdr;
-	u32 nic_routing_words[16];
-	/* segment 34 */
-	struct mpi_coredump_segment_header ets_seg_hdr;
-	u32 ets[ETS_REGS_DUMP_WORD_COUNT];
-
-	/* segment 35 */
-	struct mpi_coredump_segment_header probe_dump_seg_hdr;
-	u32 probe_dump[PRB_MX_DUMP_TOT_COUNT];
-
-	/* segment 36 */
-	struct mpi_coredump_segment_header routing_reg_seg_hdr;
-	u32 routing_regs[RT_IDX_DUMP_TOT_WORDS];
-
-	/* segment 37 */
-	struct mpi_coredump_segment_header mac_prot_reg_seg_hdr;
-	u32 mac_prot_regs[MAC_ADDR_DUMP_TOT_WORDS];
-
-	/* segment 38 */
-	struct mpi_coredump_segment_header xaui2_an_hdr;
-	u32 serdes2_xaui_an[XG_SERDES_XAUI_AN_COUNT];
-
-	/* segment 39 */
-	struct mpi_coredump_segment_header xaui2_hss_pcs_hdr;
-	u32 serdes2_xaui_hss_pcs[XG_SERDES_XAUI_HSS_PCS_COUNT];
-
-	/* segment 40 */
-	struct mpi_coredump_segment_header xfi2_an_hdr;
-	u32 serdes2_xfi_an[XG_SERDES_XFI_AN_COUNT];
-
-	/* segment 41 */
-	struct mpi_coredump_segment_header xfi2_train_hdr;
-	u32 serdes2_xfi_train[XG_SERDES_XFI_TRAIN_COUNT];
-
-	/* segment 42 */
-	struct mpi_coredump_segment_header xfi2_hss_pcs_hdr;
-	u32 serdes2_xfi_hss_pcs[XG_SERDES_XFI_HSS_PCS_COUNT];
-
-	/* segment 43 */
-	struct mpi_coredump_segment_header xfi2_hss_tx_hdr;
-	u32 serdes2_xfi_hss_tx[XG_SERDES_XFI_HSS_TX_COUNT];
-
-	/* segment 44 */
-	struct mpi_coredump_segment_header xfi2_hss_rx_hdr;
-	u32 serdes2_xfi_hss_rx[XG_SERDES_XFI_HSS_RX_COUNT];
-
-	/* segment 45 */
-	struct mpi_coredump_segment_header xfi2_hss_pll_hdr;
-	u32 serdes2_xfi_hss_pll[XG_SERDES_XFI_HSS_PLL_COUNT];
-
-	/* segment 50 */
-	/* semaphore register for all 5 functions */
-	struct mpi_coredump_segment_header sem_regs_seg_hdr;
-	u32 sem_regs[MAX_SEMAPHORE_FUNCTIONS];
-};
-
-/*
- * intr_context structure is used during initialization
- * to hook the interrupts.  It is also used in a single
- * irq environment as a context to the ISR.
- */
-struct intr_context {
-	struct ql_adapter *qdev;
-	u32 intr;
-	u32 irq_mask;		/* Mask of which rings the vector services. */
-	u32 hooked;
-	u32 intr_en_mask;	/* value/mask used to enable this intr */
-	u32 intr_dis_mask;	/* value/mask used to disable this intr */
-	u32 intr_read_mask;	/* value/mask used to read this intr */
-	char name[IFNAMSIZ * 2];
-	atomic_t irq_cnt;	/* irq_cnt is used in single vector
-				 * environment.  It's incremented for each
-				 * irq handler that is scheduled.  When each
-				 * handler finishes it decrements irq_cnt and
-				 * enables interrupts if it's zero. */
-	irq_handler_t handler;
-};
-
-/* adapter flags definitions. */
-enum {
-	QL_ADAPTER_UP = 0,	/* Adapter has been brought up. */
-	QL_LEGACY_ENABLED = 1,
-	QL_MSI_ENABLED = 2,
-	QL_MSIX_ENABLED = 3,
-	QL_DMA64 = 4,
-	QL_PROMISCUOUS = 5,
-	QL_ALLMULTI = 6,
-	QL_PORT_CFG = 7,
-	QL_CAM_RT_SET = 8,
-	QL_SELFTEST = 9,
-	QL_LB_LINK_UP = 10,
-	QL_FRC_COREDUMP = 11,
-	QL_EEH_FATAL = 12,
-	QL_ASIC_RECOVERY = 14, /* We are in ascic recovery. */
-};
-
-/* link_status bit definitions */
-enum {
-	STS_LOOPBACK_MASK = 0x00000700,
-	STS_LOOPBACK_PCS = 0x00000100,
-	STS_LOOPBACK_HSS = 0x00000200,
-	STS_LOOPBACK_EXT = 0x00000300,
-	STS_PAUSE_MASK = 0x000000c0,
-	STS_PAUSE_STD = 0x00000040,
-	STS_PAUSE_PRI = 0x00000080,
-	STS_SPEED_MASK = 0x00000038,
-	STS_SPEED_100Mb = 0x00000000,
-	STS_SPEED_1Gb = 0x00000008,
-	STS_SPEED_10Gb = 0x00000010,
-	STS_LINK_TYPE_MASK = 0x00000007,
-	STS_LINK_TYPE_XFI = 0x00000001,
-	STS_LINK_TYPE_XAUI = 0x00000002,
-	STS_LINK_TYPE_XFI_BP = 0x00000003,
-	STS_LINK_TYPE_XAUI_BP = 0x00000004,
-	STS_LINK_TYPE_10GBASET = 0x00000005,
-};
-
-/* link_config bit definitions */
-enum {
-	CFG_JUMBO_FRAME_SIZE = 0x00010000,
-	CFG_PAUSE_MASK = 0x00000060,
-	CFG_PAUSE_STD = 0x00000020,
-	CFG_PAUSE_PRI = 0x00000040,
-	CFG_DCBX = 0x00000010,
-	CFG_LOOPBACK_MASK = 0x00000007,
-	CFG_LOOPBACK_PCS = 0x00000002,
-	CFG_LOOPBACK_HSS = 0x00000004,
-	CFG_LOOPBACK_EXT = 0x00000006,
-	CFG_DEFAULT_MAX_FRAME_SIZE = 0x00002580,
-};
-
-struct nic_operations {
-
-	int (*get_flash) (struct ql_adapter *);
-	int (*port_initialize) (struct ql_adapter *);
-};
-
-/*
- * The main Adapter structure definition.
- * This structure has all fields relevant to the hardware.
- */
-struct ql_adapter {
-	struct ricb ricb;
-	unsigned long flags;
-	u32 wol;
-
-	struct nic_stats nic_stats;
-
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-
-	/* PCI Configuration information for this device */
-	struct pci_dev *pdev;
-	struct net_device *ndev;	/* Parent NET device */
-
-	/* Hardware information */
-	u32 chip_rev_id;
-	u32 fw_rev_id;
-	u32 func;		/* PCI function for this adapter */
-	u32 alt_func;		/* PCI function for alternate adapter */
-	u32 port;		/* Port number this adapter */
-
-	spinlock_t adapter_lock;
-	spinlock_t hw_lock;
-	spinlock_t stats_lock;
-
-	/* PCI Bus Relative Register Addresses */
-	void __iomem *reg_base;
-	void __iomem *doorbell_area;
-	u32 doorbell_area_size;
-
-	u32 msg_enable;
-
-	/* Page for Shadow Registers */
-	void *rx_ring_shadow_reg_area;
-	dma_addr_t rx_ring_shadow_reg_dma;
-	void *tx_ring_shadow_reg_area;
-	dma_addr_t tx_ring_shadow_reg_dma;
-
-	u32 mailbox_in;
-	u32 mailbox_out;
-	struct mbox_params idc_mbc;
-	struct mutex	mpi_mutex;
-
-	int tx_ring_size;
-	int rx_ring_size;
-	u32 intr_count;
-	struct msix_entry *msi_x_entry;
-	struct intr_context intr_context[MAX_RX_RINGS];
-
-	int tx_ring_count;	/* One per online CPU. */
-	u32 rss_ring_count;	/* One per irq vector.  */
-	/*
-	 * rx_ring_count =
-	 *  (CPU count * outbound completion rx_ring) +
-	 *  (irq_vector_cnt * inbound (RSS) completion rx_ring)
-	 */
-	int rx_ring_count;
-	int ring_mem_size;
-	void *ring_mem;
-
-	struct rx_ring rx_ring[MAX_RX_RINGS];
-	struct tx_ring tx_ring[MAX_TX_RINGS];
-	unsigned int lbq_buf_order;
-
-	int rx_csum;
-	u32 default_rx_queue;
-
-	u16 rx_coalesce_usecs;	/* cqicb->int_delay */
-	u16 rx_max_coalesced_frames;	/* cqicb->pkt_int_delay */
-	u16 tx_coalesce_usecs;	/* cqicb->int_delay */
-	u16 tx_max_coalesced_frames;	/* cqicb->pkt_int_delay */
-
-	u32 xg_sem_mask;
-	u32 port_link_up;
-	u32 port_init;
-	u32 link_status;
-	struct ql_mpi_coredump *mpi_coredump;
-	u32 core_is_dumped;
-	u32 link_config;
-	u32 led_config;
-	u32 max_frame_size;
-
-	union flash_params flash;
-
-	struct workqueue_struct *workqueue;
-	struct delayed_work asic_reset_work;
-	struct delayed_work mpi_reset_work;
-	struct delayed_work mpi_work;
-	struct delayed_work mpi_port_cfg_work;
-	struct delayed_work mpi_idc_work;
-	struct delayed_work mpi_core_to_log;
-	struct completion ide_completion;
-	const struct nic_operations *nic_ops;
-	u16 device_id;
-	struct timer_list timer;
-	atomic_t lb_count;
-	/* Keep local copy of current mac address. */
-	char current_mac_addr[ETH_ALEN];
-};
-
-/*
- * Typical Register accessor for memory mapped device.
- */
-static inline u32 ql_read32(const struct ql_adapter *qdev, int reg)
-{
-	return readl(qdev->reg_base + reg);
-}
-
-/*
- * Typical Register accessor for memory mapped device.
- */
-static inline void ql_write32(const struct ql_adapter *qdev, int reg, u32 val)
-{
-	writel(val, qdev->reg_base + reg);
-}
-
-/*
- * Doorbell Registers:
- * Doorbell registers are virtual registers in the PCI memory space.
- * The space is allocated by the chip during PCI initialization.  The
- * device driver finds the doorbell address in BAR 3 in PCI config space.
- * The registers are used to control outbound and inbound queues. For
- * example, the producer index for an outbound queue.  Each queue uses
- * 1 4k chunk of memory.  The lower half of the space is for outbound
- * queues. The upper half is for inbound queues.
- */
-static inline void ql_write_db_reg(u32 val, void __iomem *addr)
-{
-	writel(val, addr);
-	mmiowb();
-}
-
-/*
- * Doorbell Registers:
- * Doorbell registers are virtual registers in the PCI memory space.
- * The space is allocated by the chip during PCI initialization.  The
- * device driver finds the doorbell address in BAR 3 in PCI config space.
- * The registers are used to control outbound and inbound queues. For
- * example, the producer index for an outbound queue.  Each queue uses
- * 1 4k chunk of memory.  The lower half of the space is for outbound
- * queues. The upper half is for inbound queues.
- * Caller has to guarantee ordering.
- */
-static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr)
-{
-	writel_relaxed(val, addr);
-}
-
-/*
- * Shadow Registers:
- * Outbound queues have a consumer index that is maintained by the chip.
- * Inbound queues have a producer index that is maintained by the chip.
- * For lower overhead, these registers are "shadowed" to host memory
- * which allows the device driver to track the queue progress without
- * PCI reads. When an entry is placed on an inbound queue, the chip will
- * update the relevant index register and then copy the value to the
- * shadow register in host memory.
- */
-static inline u32 ql_read_sh_reg(__le32  *addr)
-{
-	u32 reg;
-	reg =  le32_to_cpu(*addr);
-	rmb();
-	return reg;
-}
-
-extern char qlge_driver_name[];
-extern const char qlge_driver_version[];
-extern const struct ethtool_ops qlge_ethtool_ops;
-
-int ql_sem_spinlock(struct ql_adapter *qdev, u32 sem_mask);
-void ql_sem_unlock(struct ql_adapter *qdev, u32 sem_mask);
-int ql_read_xgmac_reg(struct ql_adapter *qdev, u32 reg, u32 *data);
-int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
-			u32 *value);
-int ql_get_routing_reg(struct ql_adapter *qdev, u32 index, u32 *value);
-int ql_write_cfg(struct ql_adapter *qdev, void *ptr, int size, u32 bit,
-		 u16 q_id);
-void ql_queue_fw_error(struct ql_adapter *qdev);
-void ql_mpi_work(struct work_struct *work);
-void ql_mpi_reset_work(struct work_struct *work);
-void ql_mpi_core_to_log(struct work_struct *work);
-int ql_wait_reg_rdy(struct ql_adapter *qdev, u32 reg, u32 bit, u32 ebit);
-void ql_queue_asic_error(struct ql_adapter *qdev);
-u32 ql_enable_completion_interrupt(struct ql_adapter *qdev, u32 intr);
-void ql_set_ethtool_ops(struct net_device *ndev);
-int ql_read_xgmac_reg64(struct ql_adapter *qdev, u32 reg, u64 *data);
-void ql_mpi_idc_work(struct work_struct *work);
-void ql_mpi_port_cfg_work(struct work_struct *work);
-int ql_mb_get_fw_state(struct ql_adapter *qdev);
-int ql_cam_route_initialize(struct ql_adapter *qdev);
-int ql_read_mpi_reg(struct ql_adapter *qdev, u32 reg, u32 *data);
-int ql_write_mpi_reg(struct ql_adapter *qdev, u32 reg, u32 data);
-int ql_unpause_mpi_risc(struct ql_adapter *qdev);
-int ql_pause_mpi_risc(struct ql_adapter *qdev);
-int ql_hard_reset_mpi_risc(struct ql_adapter *qdev);
-int ql_soft_reset_mpi_risc(struct ql_adapter *qdev);
-int ql_dump_risc_ram_area(struct ql_adapter *qdev, void *buf, u32 ram_addr,
-			  int word_count);
-int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump);
-int ql_mb_about_fw(struct ql_adapter *qdev);
-int ql_mb_wol_set_magic(struct ql_adapter *qdev, u32 enable_wol);
-int ql_mb_wol_mode(struct ql_adapter *qdev, u32 wol);
-int ql_mb_set_led_cfg(struct ql_adapter *qdev, u32 led_config);
-int ql_mb_get_led_cfg(struct ql_adapter *qdev);
-void ql_link_on(struct ql_adapter *qdev);
-void ql_link_off(struct ql_adapter *qdev);
-int ql_mb_set_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 control);
-int ql_mb_get_port_cfg(struct ql_adapter *qdev);
-int ql_mb_set_port_cfg(struct ql_adapter *qdev);
-int ql_wait_fifo_empty(struct ql_adapter *qdev);
-void ql_get_dump(struct ql_adapter *qdev, void *buff);
-netdev_tx_t ql_lb_send(struct sk_buff *skb, struct net_device *ndev);
-void ql_check_lb_frame(struct ql_adapter *, struct sk_buff *);
-int ql_own_firmware(struct ql_adapter *qdev);
-int ql_clean_lb_rx_ring(struct rx_ring *rx_ring, int budget);
-
-/* #define QL_ALL_DUMP */
-/* #define QL_REG_DUMP */
-/* #define QL_DEV_DUMP */
-/* #define QL_CB_DUMP */
-/* #define QL_IB_DUMP */
-/* #define QL_OB_DUMP */
-
-#ifdef QL_REG_DUMP
-void ql_dump_xgmac_control_regs(struct ql_adapter *qdev);
-void ql_dump_routing_entries(struct ql_adapter *qdev);
-void ql_dump_regs(struct ql_adapter *qdev);
-#define QL_DUMP_REGS(qdev) ql_dump_regs(qdev)
-#define QL_DUMP_ROUTE(qdev) ql_dump_routing_entries(qdev)
-#define QL_DUMP_XGMAC_CONTROL_REGS(qdev) ql_dump_xgmac_control_regs(qdev)
-#else
-#define QL_DUMP_REGS(qdev)
-#define QL_DUMP_ROUTE(qdev)
-#define QL_DUMP_XGMAC_CONTROL_REGS(qdev)
-#endif
-
-#ifdef QL_STAT_DUMP
-void ql_dump_stat(struct ql_adapter *qdev);
-#define QL_DUMP_STAT(qdev) ql_dump_stat(qdev)
-#else
-#define QL_DUMP_STAT(qdev)
-#endif
-
-#ifdef QL_DEV_DUMP
-void ql_dump_qdev(struct ql_adapter *qdev);
-#define QL_DUMP_QDEV(qdev) ql_dump_qdev(qdev)
-#else
-#define QL_DUMP_QDEV(qdev)
-#endif
-
-#ifdef QL_CB_DUMP
-void ql_dump_wqicb(struct wqicb *wqicb);
-void ql_dump_tx_ring(struct tx_ring *tx_ring);
-void ql_dump_ricb(struct ricb *ricb);
-void ql_dump_cqicb(struct cqicb *cqicb);
-void ql_dump_rx_ring(struct rx_ring *rx_ring);
-void ql_dump_hw_cb(struct ql_adapter *qdev, int size, u32 bit, u16 q_id);
-#define QL_DUMP_RICB(ricb) ql_dump_ricb(ricb)
-#define QL_DUMP_WQICB(wqicb) ql_dump_wqicb(wqicb)
-#define QL_DUMP_TX_RING(tx_ring) ql_dump_tx_ring(tx_ring)
-#define QL_DUMP_CQICB(cqicb) ql_dump_cqicb(cqicb)
-#define QL_DUMP_RX_RING(rx_ring) ql_dump_rx_ring(rx_ring)
-#define QL_DUMP_HW_CB(qdev, size, bit, q_id) \
-		ql_dump_hw_cb(qdev, size, bit, q_id)
-#else
-#define QL_DUMP_RICB(ricb)
-#define QL_DUMP_WQICB(wqicb)
-#define QL_DUMP_TX_RING(tx_ring)
-#define QL_DUMP_CQICB(cqicb)
-#define QL_DUMP_RX_RING(rx_ring)
-#define QL_DUMP_HW_CB(qdev, size, bit, q_id)
-#endif
-
-#ifdef QL_OB_DUMP
-void ql_dump_tx_desc(struct tx_buf_desc *tbd);
-void ql_dump_ob_mac_iocb(struct ob_mac_iocb_req *ob_mac_iocb);
-void ql_dump_ob_mac_rsp(struct ob_mac_iocb_rsp *ob_mac_rsp);
-#define QL_DUMP_OB_MAC_IOCB(ob_mac_iocb) ql_dump_ob_mac_iocb(ob_mac_iocb)
-#define QL_DUMP_OB_MAC_RSP(ob_mac_rsp) ql_dump_ob_mac_rsp(ob_mac_rsp)
-#else
-#define QL_DUMP_OB_MAC_IOCB(ob_mac_iocb)
-#define QL_DUMP_OB_MAC_RSP(ob_mac_rsp)
-#endif
-
-#ifdef QL_IB_DUMP
-void ql_dump_ib_mac_rsp(struct ib_mac_iocb_rsp *ib_mac_rsp);
-#define QL_DUMP_IB_MAC_RSP(ib_mac_rsp) ql_dump_ib_mac_rsp(ib_mac_rsp)
-#else
-#define QL_DUMP_IB_MAC_RSP(ib_mac_rsp)
-#endif
-
-#ifdef	QL_ALL_DUMP
-void ql_dump_all(struct ql_adapter *qdev);
-#define QL_DUMP_ALL(qdev) ql_dump_all(qdev)
-#else
-#define QL_DUMP_ALL(qdev)
-#endif
-
-#endif /* _QLGE_H_ */

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
deleted file mode 100644
index 31389ab..0000000
--- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
+++ /dev/null

@@ -1,2024 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/slab.h>
-
-#include "qlge.h"
-
-/* Read a NIC register from the alternate function. */
-static u32 ql_read_other_func_reg(struct ql_adapter *qdev,
-						u32 reg)
-{
-	u32 register_to_read;
-	u32 reg_val;
-	unsigned int status = 0;
-
-	register_to_read = MPI_NIC_REG_BLOCK
-				| MPI_NIC_READ
-				| (qdev->alt_func << MPI_NIC_FUNCTION_SHIFT)
-				| reg;
-	status = ql_read_mpi_reg(qdev, register_to_read, &reg_val);
-	if (status != 0)
-		return 0xffffffff;
-
-	return reg_val;
-}
-
-/* Write a NIC register from the alternate function. */
-static int ql_write_other_func_reg(struct ql_adapter *qdev,
-					u32 reg, u32 reg_val)
-{
-	u32 register_to_read;
-	int status = 0;
-
-	register_to_read = MPI_NIC_REG_BLOCK
-				| MPI_NIC_READ
-				| (qdev->alt_func << MPI_NIC_FUNCTION_SHIFT)
-				| reg;
-	status = ql_write_mpi_reg(qdev, register_to_read, reg_val);
-
-	return status;
-}
-
-static int ql_wait_other_func_reg_rdy(struct ql_adapter *qdev, u32 reg,
-					u32 bit, u32 err_bit)
-{
-	u32 temp;
-	int count = 10;
-
-	while (count) {
-		temp = ql_read_other_func_reg(qdev, reg);
-
-		/* check for errors */
-		if (temp & err_bit)
-			return -1;
-		else if (temp & bit)
-			return 0;
-		mdelay(10);
-		count--;
-	}
-	return -1;
-}
-
-static int ql_read_other_func_serdes_reg(struct ql_adapter *qdev, u32 reg,
-							u32 *data)
-{
-	int status;
-
-	/* wait for reg to come ready */
-	status = ql_wait_other_func_reg_rdy(qdev, XG_SERDES_ADDR / 4,
-						XG_SERDES_ADDR_RDY, 0);
-	if (status)
-		goto exit;
-
-	/* set up for reg read */
-	ql_write_other_func_reg(qdev, XG_SERDES_ADDR/4, reg | PROC_ADDR_R);
-
-	/* wait for reg to come ready */
-	status = ql_wait_other_func_reg_rdy(qdev, XG_SERDES_ADDR / 4,
-						XG_SERDES_ADDR_RDY, 0);
-	if (status)
-		goto exit;
-
-	/* get the data */
-	*data = ql_read_other_func_reg(qdev, (XG_SERDES_DATA / 4));
-exit:
-	return status;
-}
-
-/* Read out the SERDES registers */
-static int ql_read_serdes_reg(struct ql_adapter *qdev, u32 reg, u32 *data)
-{
-	int status;
-
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, XG_SERDES_ADDR, XG_SERDES_ADDR_RDY, 0);
-	if (status)
-		goto exit;
-
-	/* set up for reg read */
-	ql_write32(qdev, XG_SERDES_ADDR, reg | PROC_ADDR_R);
-
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, XG_SERDES_ADDR, XG_SERDES_ADDR_RDY, 0);
-	if (status)
-		goto exit;
-
-	/* get the data */
-	*data = ql_read32(qdev, XG_SERDES_DATA);
-exit:
-	return status;
-}
-
-static void ql_get_both_serdes(struct ql_adapter *qdev, u32 addr,
-			u32 *direct_ptr, u32 *indirect_ptr,
-			unsigned int direct_valid, unsigned int indirect_valid)
-{
-	unsigned int status;
-
-	status = 1;
-	if (direct_valid)
-		status = ql_read_serdes_reg(qdev, addr, direct_ptr);
-	/* Dead fill any failures or invalids. */
-	if (status)
-		*direct_ptr = 0xDEADBEEF;
-
-	status = 1;
-	if (indirect_valid)
-		status = ql_read_other_func_serdes_reg(
-						qdev, addr, indirect_ptr);
-	/* Dead fill any failures or invalids. */
-	if (status)
-		*indirect_ptr = 0xDEADBEEF;
-}
-
-static int ql_get_serdes_regs(struct ql_adapter *qdev,
-				struct ql_mpi_coredump *mpi_coredump)
-{
-	int status;
-	unsigned int xfi_direct_valid, xfi_indirect_valid, xaui_direct_valid;
-	unsigned int xaui_indirect_valid, i;
-	u32 *direct_ptr, temp;
-	u32 *indirect_ptr;
-
-	xfi_direct_valid = xfi_indirect_valid = 0;
-	xaui_direct_valid = xaui_indirect_valid = 1;
-
-	/* The XAUI needs to be read out per port */
-	status = ql_read_other_func_serdes_reg(qdev,
-			XG_SERDES_XAUI_HSS_PCS_START, &temp);
-	if (status)
-		temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
-
-	if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-				XG_SERDES_ADDR_XAUI_PWR_DOWN)
-		xaui_indirect_valid = 0;
-
-	status = ql_read_serdes_reg(qdev, XG_SERDES_XAUI_HSS_PCS_START, &temp);
-
-	if (status)
-		temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
-
-	if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-				XG_SERDES_ADDR_XAUI_PWR_DOWN)
-		xaui_direct_valid = 0;
-
-	/*
-	 * XFI register is shared so only need to read one
-	 * functions and then check the bits.
-	 */
-	status = ql_read_serdes_reg(qdev, XG_SERDES_ADDR_STS, &temp);
-	if (status)
-		temp = 0;
-
-	if ((temp & XG_SERDES_ADDR_XFI1_PWR_UP) ==
-					XG_SERDES_ADDR_XFI1_PWR_UP) {
-		/* now see if i'm NIC 1 or NIC 2 */
-		if (qdev->func & 1)
-			/* I'm NIC 2, so the indirect (NIC1) xfi is up. */
-			xfi_indirect_valid = 1;
-		else
-			xfi_direct_valid = 1;
-	}
-	if ((temp & XG_SERDES_ADDR_XFI2_PWR_UP) ==
-					XG_SERDES_ADDR_XFI2_PWR_UP) {
-		/* now see if i'm NIC 1 or NIC 2 */
-		if (qdev->func & 1)
-			/* I'm NIC 2, so the indirect (NIC1) xfi is up. */
-			xfi_direct_valid = 1;
-		else
-			xfi_indirect_valid = 1;
-	}
-
-	/* Get XAUI_AN register block. */
-	if (qdev->func & 1) {
-		/* Function 2 is direct	*/
-		direct_ptr = mpi_coredump->serdes2_xaui_an;
-		indirect_ptr = mpi_coredump->serdes_xaui_an;
-	} else {
-		/* Function 1 is direct	*/
-		direct_ptr = mpi_coredump->serdes_xaui_an;
-		indirect_ptr = mpi_coredump->serdes2_xaui_an;
-	}
-
-	for (i = 0; i <= 0x000000034; i += 4, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xaui_direct_valid, xaui_indirect_valid);
-
-	/* Get XAUI_HSS_PCS register block. */
-	if (qdev->func & 1) {
-		direct_ptr =
-			mpi_coredump->serdes2_xaui_hss_pcs;
-		indirect_ptr =
-			mpi_coredump->serdes_xaui_hss_pcs;
-	} else {
-		direct_ptr =
-			mpi_coredump->serdes_xaui_hss_pcs;
-		indirect_ptr =
-			mpi_coredump->serdes2_xaui_hss_pcs;
-	}
-
-	for (i = 0x800; i <= 0x880; i += 4, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xaui_direct_valid, xaui_indirect_valid);
-
-	/* Get XAUI_XFI_AN register block. */
-	if (qdev->func & 1) {
-		direct_ptr = mpi_coredump->serdes2_xfi_an;
-		indirect_ptr = mpi_coredump->serdes_xfi_an;
-	} else {
-		direct_ptr = mpi_coredump->serdes_xfi_an;
-		indirect_ptr = mpi_coredump->serdes2_xfi_an;
-	}
-
-	for (i = 0x1000; i <= 0x1034; i += 4, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-
-	/* Get XAUI_XFI_TRAIN register block. */
-	if (qdev->func & 1) {
-		direct_ptr = mpi_coredump->serdes2_xfi_train;
-		indirect_ptr =
-			mpi_coredump->serdes_xfi_train;
-	} else {
-		direct_ptr = mpi_coredump->serdes_xfi_train;
-		indirect_ptr =
-			mpi_coredump->serdes2_xfi_train;
-	}
-
-	for (i = 0x1050; i <= 0x107c; i += 4, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-
-	/* Get XAUI_XFI_HSS_PCS register block. */
-	if (qdev->func & 1) {
-		direct_ptr =
-			mpi_coredump->serdes2_xfi_hss_pcs;
-		indirect_ptr =
-			mpi_coredump->serdes_xfi_hss_pcs;
-	} else {
-		direct_ptr =
-			mpi_coredump->serdes_xfi_hss_pcs;
-		indirect_ptr =
-			mpi_coredump->serdes2_xfi_hss_pcs;
-	}
-
-	for (i = 0x1800; i <= 0x1838; i += 4, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-
-	/* Get XAUI_XFI_HSS_TX register block. */
-	if (qdev->func & 1) {
-		direct_ptr =
-			mpi_coredump->serdes2_xfi_hss_tx;
-		indirect_ptr =
-			mpi_coredump->serdes_xfi_hss_tx;
-	} else {
-		direct_ptr = mpi_coredump->serdes_xfi_hss_tx;
-		indirect_ptr =
-			mpi_coredump->serdes2_xfi_hss_tx;
-	}
-	for (i = 0x1c00; i <= 0x1c1f; i++, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-
-	/* Get XAUI_XFI_HSS_RX register block. */
-	if (qdev->func & 1) {
-		direct_ptr =
-			mpi_coredump->serdes2_xfi_hss_rx;
-		indirect_ptr =
-			mpi_coredump->serdes_xfi_hss_rx;
-	} else {
-		direct_ptr = mpi_coredump->serdes_xfi_hss_rx;
-		indirect_ptr =
-			mpi_coredump->serdes2_xfi_hss_rx;
-	}
-
-	for (i = 0x1c40; i <= 0x1c5f; i++, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-
-
-	/* Get XAUI_XFI_HSS_PLL register block. */
-	if (qdev->func & 1) {
-		direct_ptr =
-			mpi_coredump->serdes2_xfi_hss_pll;
-		indirect_ptr =
-			mpi_coredump->serdes_xfi_hss_pll;
-	} else {
-		direct_ptr =
-			mpi_coredump->serdes_xfi_hss_pll;
-		indirect_ptr =
-			mpi_coredump->serdes2_xfi_hss_pll;
-	}
-	for (i = 0x1e00; i <= 0x1e1f; i++, direct_ptr++, indirect_ptr++)
-		ql_get_both_serdes(qdev, i, direct_ptr, indirect_ptr,
-					xfi_direct_valid, xfi_indirect_valid);
-	return 0;
-}
-
-static int ql_read_other_func_xgmac_reg(struct ql_adapter *qdev, u32 reg,
-							u32 *data)
-{
-	int status = 0;
-
-	/* wait for reg to come ready */
-	status = ql_wait_other_func_reg_rdy(qdev, XGMAC_ADDR / 4,
-						XGMAC_ADDR_RDY, XGMAC_ADDR_XME);
-	if (status)
-		goto exit;
-
-	/* set up for reg read */
-	ql_write_other_func_reg(qdev, XGMAC_ADDR / 4, reg | XGMAC_ADDR_R);
-
-	/* wait for reg to come ready */
-	status = ql_wait_other_func_reg_rdy(qdev, XGMAC_ADDR / 4,
-						XGMAC_ADDR_RDY, XGMAC_ADDR_XME);
-	if (status)
-		goto exit;
-
-	/* get the data */
-	*data = ql_read_other_func_reg(qdev, XGMAC_DATA / 4);
-exit:
-	return status;
-}
-
-/* Read the 400 xgmac control/statistics registers
- * skipping unused locations.
- */
-static int ql_get_xgmac_regs(struct ql_adapter *qdev, u32 *buf,
-					unsigned int other_function)
-{
-	int status = 0;
-	int i;
-
-	for (i = PAUSE_SRC_LO; i < XGMAC_REGISTER_END; i += 4, buf++) {
-		/* We're reading 400 xgmac registers, but we filter out
-		 * serveral locations that are non-responsive to reads.
-		 */
-		if ((i == 0x00000114) ||
-			(i == 0x00000118) ||
-			(i == 0x0000013c) ||
-			(i == 0x00000140) ||
-			(i > 0x00000150 && i < 0x000001fc) ||
-			(i > 0x00000278 && i < 0x000002a0) ||
-			(i > 0x000002c0 && i < 0x000002cf) ||
-			(i > 0x000002dc && i < 0x000002f0) ||
-			(i > 0x000003c8 && i < 0x00000400) ||
-			(i > 0x00000400 && i < 0x00000410) ||
-			(i > 0x00000410 && i < 0x00000420) ||
-			(i > 0x00000420 && i < 0x00000430) ||
-			(i > 0x00000430 && i < 0x00000440) ||
-			(i > 0x00000440 && i < 0x00000450) ||
-			(i > 0x00000450 && i < 0x00000500) ||
-			(i > 0x0000054c && i < 0x00000568) ||
-			(i > 0x000005c8 && i < 0x00000600)) {
-			if (other_function)
-				status =
-				ql_read_other_func_xgmac_reg(qdev, i, buf);
-			else
-				status = ql_read_xgmac_reg(qdev, i, buf);
-
-			if (status)
-				*buf = 0xdeadbeef;
-			break;
-		}
-	}
-	return status;
-}
-
-static int ql_get_ets_regs(struct ql_adapter *qdev, u32 *buf)
-{
-	int status = 0;
-	int i;
-
-	for (i = 0; i < 8; i++, buf++) {
-		ql_write32(qdev, NIC_ETS, i << 29 | 0x08000000);
-		*buf = ql_read32(qdev, NIC_ETS);
-	}
-
-	for (i = 0; i < 2; i++, buf++) {
-		ql_write32(qdev, CNA_ETS, i << 29 | 0x08000000);
-		*buf = ql_read32(qdev, CNA_ETS);
-	}
-
-	return status;
-}
-
-static void ql_get_intr_states(struct ql_adapter *qdev, u32 *buf)
-{
-	int i;
-
-	for (i = 0; i < qdev->rx_ring_count; i++, buf++) {
-		ql_write32(qdev, INTR_EN,
-				qdev->intr_context[i].intr_read_mask);
-		*buf = ql_read32(qdev, INTR_EN);
-	}
-}
-
-static int ql_get_cam_entries(struct ql_adapter *qdev, u32 *buf)
-{
-	int i, status;
-	u32 value[3];
-
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return status;
-
-	for (i = 0; i < 16; i++) {
-		status = ql_get_mac_addr_reg(qdev,
-					MAC_ADDR_TYPE_CAM_MAC, i, value);
-		if (status) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Failed read of mac index register\n");
-			goto err;
-		}
-		*buf++ = value[0];	/* lower MAC address */
-		*buf++ = value[1];	/* upper MAC address */
-		*buf++ = value[2];	/* output */
-	}
-	for (i = 0; i < 32; i++) {
-		status = ql_get_mac_addr_reg(qdev,
-					MAC_ADDR_TYPE_MULTI_MAC, i, value);
-		if (status) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Failed read of mac index register\n");
-			goto err;
-		}
-		*buf++ = value[0];	/* lower Mcast address */
-		*buf++ = value[1];	/* upper Mcast address */
-	}
-err:
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-	return status;
-}
-
-static int ql_get_routing_entries(struct ql_adapter *qdev, u32 *buf)
-{
-	int status;
-	u32 value, i;
-
-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (status)
-		return status;
-
-	for (i = 0; i < 16; i++) {
-		status = ql_get_routing_reg(qdev, i, &value);
-		if (status) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Failed read of routing index register\n");
-			goto err;
-		} else {
-			*buf++ = value;
-		}
-	}
-err:
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-	return status;
-}
-
-/* Read the MPI Processor shadow registers */
-static int ql_get_mpi_shadow_regs(struct ql_adapter *qdev, u32 *buf)
-{
-	u32 i;
-	int status;
-
-	for (i = 0; i < MPI_CORE_SH_REGS_CNT; i++, buf++) {
-		status = ql_write_mpi_reg(qdev, RISC_124,
-				(SHADOW_OFFSET | i << SHADOW_REG_SHIFT));
-		if (status)
-			goto end;
-		status = ql_read_mpi_reg(qdev, RISC_127, buf);
-		if (status)
-			goto end;
-	}
-end:
-	return status;
-}
-
-/* Read the MPI Processor core registers */
-static int ql_get_mpi_regs(struct ql_adapter *qdev, u32 *buf,
-				u32 offset, u32 count)
-{
-	int i, status = 0;
-	for (i = 0; i < count; i++, buf++) {
-		status = ql_read_mpi_reg(qdev, offset + i, buf);
-		if (status)
-			return status;
-	}
-	return status;
-}
-
-/* Read the ASIC probe dump */
-static unsigned int *ql_get_probe(struct ql_adapter *qdev, u32 clock,
-					u32 valid, u32 *buf)
-{
-	u32 module, mux_sel, probe, lo_val, hi_val;
-
-	for (module = 0; module < PRB_MX_ADDR_MAX_MODS; module++) {
-		if (!((valid >> module) & 1))
-			continue;
-		for (mux_sel = 0; mux_sel < PRB_MX_ADDR_MAX_MUX; mux_sel++) {
-			probe = clock
-				| PRB_MX_ADDR_ARE
-				| mux_sel
-				| (module << PRB_MX_ADDR_MOD_SEL_SHIFT);
-			ql_write32(qdev, PRB_MX_ADDR, probe);
-			lo_val = ql_read32(qdev, PRB_MX_DATA);
-			if (mux_sel == 0) {
-				*buf = probe;
-				buf++;
-			}
-			probe |= PRB_MX_ADDR_UP;
-			ql_write32(qdev, PRB_MX_ADDR, probe);
-			hi_val = ql_read32(qdev, PRB_MX_DATA);
-			*buf = lo_val;
-			buf++;
-			*buf = hi_val;
-			buf++;
-		}
-	}
-	return buf;
-}
-
-static int ql_get_probe_dump(struct ql_adapter *qdev, unsigned int *buf)
-{
-	/* First we have to enable the probe mux */
-	ql_write_mpi_reg(qdev, MPI_TEST_FUNC_PRB_CTL, MPI_TEST_FUNC_PRB_EN);
-	buf = ql_get_probe(qdev, PRB_MX_ADDR_SYS_CLOCK,
-			PRB_MX_ADDR_VALID_SYS_MOD, buf);
-	buf = ql_get_probe(qdev, PRB_MX_ADDR_PCI_CLOCK,
-			PRB_MX_ADDR_VALID_PCI_MOD, buf);
-	buf = ql_get_probe(qdev, PRB_MX_ADDR_XGM_CLOCK,
-			PRB_MX_ADDR_VALID_XGM_MOD, buf);
-	buf = ql_get_probe(qdev, PRB_MX_ADDR_FC_CLOCK,
-			PRB_MX_ADDR_VALID_FC_MOD, buf);
-	return 0;
-
-}
-
-/* Read out the routing index registers */
-static int ql_get_routing_index_registers(struct ql_adapter *qdev, u32 *buf)
-{
-	int status;
-	u32 type, index, index_max;
-	u32 result_index;
-	u32 result_data;
-	u32 val;
-
-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (status)
-		return status;
-
-	for (type = 0; type < 4; type++) {
-		if (type < 2)
-			index_max = 8;
-		else
-			index_max = 16;
-		for (index = 0; index < index_max; index++) {
-			val = RT_IDX_RS
-				| (type << RT_IDX_TYPE_SHIFT)
-				| (index << RT_IDX_IDX_SHIFT);
-			ql_write32(qdev, RT_IDX, val);
-			result_index = 0;
-			while ((result_index & RT_IDX_MR) == 0)
-				result_index = ql_read32(qdev, RT_IDX);
-			result_data = ql_read32(qdev, RT_DATA);
-			*buf = type;
-			buf++;
-			*buf = index;
-			buf++;
-			*buf = result_index;
-			buf++;
-			*buf = result_data;
-			buf++;
-		}
-	}
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-	return status;
-}
-
-/* Read out the MAC protocol registers */
-static void ql_get_mac_protocol_registers(struct ql_adapter *qdev, u32 *buf)
-{
-	u32 result_index, result_data;
-	u32 type;
-	u32 index;
-	u32 offset;
-	u32 val;
-	u32 initial_val = MAC_ADDR_RS;
-	u32 max_index;
-	u32 max_offset;
-
-	for (type = 0; type < MAC_ADDR_TYPE_COUNT; type++) {
-		switch (type) {
-
-		case 0: /* CAM */
-			initial_val |= MAC_ADDR_ADR;
-			max_index = MAC_ADDR_MAX_CAM_ENTRIES;
-			max_offset = MAC_ADDR_MAX_CAM_WCOUNT;
-			break;
-		case 1: /* Multicast MAC Address */
-			max_index = MAC_ADDR_MAX_CAM_WCOUNT;
-			max_offset = MAC_ADDR_MAX_CAM_WCOUNT;
-			break;
-		case 2: /* VLAN filter mask */
-		case 3: /* MC filter mask */
-			max_index = MAC_ADDR_MAX_CAM_WCOUNT;
-			max_offset = MAC_ADDR_MAX_CAM_WCOUNT;
-			break;
-		case 4: /* FC MAC addresses */
-			max_index = MAC_ADDR_MAX_FC_MAC_ENTRIES;
-			max_offset = MAC_ADDR_MAX_FC_MAC_WCOUNT;
-			break;
-		case 5: /* Mgmt MAC addresses */
-			max_index = MAC_ADDR_MAX_MGMT_MAC_ENTRIES;
-			max_offset = MAC_ADDR_MAX_MGMT_MAC_WCOUNT;
-			break;
-		case 6: /* Mgmt VLAN addresses */
-			max_index = MAC_ADDR_MAX_MGMT_VLAN_ENTRIES;
-			max_offset = MAC_ADDR_MAX_MGMT_VLAN_WCOUNT;
-			break;
-		case 7: /* Mgmt IPv4 address */
-			max_index = MAC_ADDR_MAX_MGMT_V4_ENTRIES;
-			max_offset = MAC_ADDR_MAX_MGMT_V4_WCOUNT;
-			break;
-		case 8: /* Mgmt IPv6 address */
-			max_index = MAC_ADDR_MAX_MGMT_V6_ENTRIES;
-			max_offset = MAC_ADDR_MAX_MGMT_V6_WCOUNT;
-			break;
-		case 9: /* Mgmt TCP/UDP Dest port */
-			max_index = MAC_ADDR_MAX_MGMT_TU_DP_ENTRIES;
-			max_offset = MAC_ADDR_MAX_MGMT_TU_DP_WCOUNT;
-			break;
-		default:
-			pr_err("Bad type!!! 0x%08x\n", type);
-			max_index = 0;
-			max_offset = 0;
-			break;
-		}
-		for (index = 0; index < max_index; index++) {
-			for (offset = 0; offset < max_offset; offset++) {
-				val = initial_val
-					| (type << MAC_ADDR_TYPE_SHIFT)
-					| (index << MAC_ADDR_IDX_SHIFT)
-					| (offset);
-				ql_write32(qdev, MAC_ADDR_IDX, val);
-				result_index = 0;
-				while ((result_index & MAC_ADDR_MR) == 0) {
-					result_index = ql_read32(qdev,
-								MAC_ADDR_IDX);
-				}
-				result_data = ql_read32(qdev, MAC_ADDR_DATA);
-				*buf = result_index;
-				buf++;
-				*buf = result_data;
-				buf++;
-			}
-		}
-	}
-}
-
-static void ql_get_sem_registers(struct ql_adapter *qdev, u32 *buf)
-{
-	u32 func_num, reg, reg_val;
-	int status;
-
-	for (func_num = 0; func_num < MAX_SEMAPHORE_FUNCTIONS ; func_num++) {
-		reg = MPI_NIC_REG_BLOCK
-			| (func_num << MPI_NIC_FUNCTION_SHIFT)
-			| (SEM / 4);
-		status = ql_read_mpi_reg(qdev, reg, &reg_val);
-		*buf = reg_val;
-		/* if the read failed then dead fill the element. */
-		if (!status)
-			*buf = 0xdeadbeef;
-		buf++;
-	}
-}
-
-/* Create a coredump segment header */
-static void ql_build_coredump_seg_header(
-		struct mpi_coredump_segment_header *seg_hdr,
-		u32 seg_number, u32 seg_size, u8 *desc)
-{
-	memset(seg_hdr, 0, sizeof(struct mpi_coredump_segment_header));
-	seg_hdr->cookie = MPI_COREDUMP_COOKIE;
-	seg_hdr->segNum = seg_number;
-	seg_hdr->segSize = seg_size;
-	strncpy(seg_hdr->description, desc, (sizeof(seg_hdr->description)) - 1);
-}
-
-/*
- * This function should be called when a coredump / probedump
- * is to be extracted from the HBA. It is assumed there is a
- * qdev structure that contains the base address of the register
- * space for this function as well as a coredump structure that
- * will contain the dump.
- */
-int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump)
-{
-	int status;
-	int i;
-
-	if (!mpi_coredump) {
-		netif_err(qdev, drv, qdev->ndev, "No memory allocated\n");
-		return -EINVAL;
-	}
-
-	/* Try to get the spinlock, but dont worry if
-	 * it isn't available.  If the firmware died it
-	 * might be holding the sem.
-	 */
-	ql_sem_spinlock(qdev, SEM_PROC_REG_MASK);
-
-	status = ql_pause_mpi_risc(qdev);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed RISC pause. Status = 0x%.08x\n", status);
-		goto err;
-	}
-
-	/* Insert the global header */
-	memset(&(mpi_coredump->mpi_global_header), 0,
-		sizeof(struct mpi_coredump_global_header));
-	mpi_coredump->mpi_global_header.cookie = MPI_COREDUMP_COOKIE;
-	mpi_coredump->mpi_global_header.headerSize =
-		sizeof(struct mpi_coredump_global_header);
-	mpi_coredump->mpi_global_header.imageSize =
-		sizeof(struct ql_mpi_coredump);
-	strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
-		sizeof(mpi_coredump->mpi_global_header.idString));
-
-	/* Get generic NIC reg dump */
-	ql_build_coredump_seg_header(&mpi_coredump->nic_regs_seg_hdr,
-			NIC1_CONTROL_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->nic_regs), "NIC1 Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->nic2_regs_seg_hdr,
-			NIC2_CONTROL_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->nic2_regs), "NIC2 Registers");
-
-	/* Get XGMac registers. (Segment 18, Rev C. step 21) */
-	ql_build_coredump_seg_header(&mpi_coredump->xgmac1_seg_hdr,
-			NIC1_XGMAC_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->xgmac1), "NIC1 XGMac Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xgmac2_seg_hdr,
-			NIC2_XGMAC_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->xgmac2), "NIC2 XGMac Registers");
-
-	if (qdev->func & 1) {
-		/* Odd means our function is NIC 2 */
-		for (i = 0; i < NIC_REGS_DUMP_WORD_COUNT; i++)
-			mpi_coredump->nic2_regs[i] =
-					 ql_read32(qdev, i * sizeof(u32));
-
-		for (i = 0; i < NIC_REGS_DUMP_WORD_COUNT; i++)
-			mpi_coredump->nic_regs[i] =
-			ql_read_other_func_reg(qdev, (i * sizeof(u32)) / 4);
-
-		ql_get_xgmac_regs(qdev, &mpi_coredump->xgmac2[0], 0);
-		ql_get_xgmac_regs(qdev, &mpi_coredump->xgmac1[0], 1);
-	} else {
-		/* Even means our function is NIC 1 */
-		for (i = 0; i < NIC_REGS_DUMP_WORD_COUNT; i++)
-			mpi_coredump->nic_regs[i] =
-					ql_read32(qdev, i * sizeof(u32));
-		for (i = 0; i < NIC_REGS_DUMP_WORD_COUNT; i++)
-			mpi_coredump->nic2_regs[i] =
-			ql_read_other_func_reg(qdev, (i * sizeof(u32)) / 4);
-
-		ql_get_xgmac_regs(qdev, &mpi_coredump->xgmac1[0], 0);
-		ql_get_xgmac_regs(qdev, &mpi_coredump->xgmac2[0], 1);
-	}
-
-	/* Rev C. Step 20a */
-	ql_build_coredump_seg_header(&mpi_coredump->xaui_an_hdr,
-			XAUI_AN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xaui_an),
-			"XAUI AN Registers");
-
-	/* Rev C. Step 20b */
-	ql_build_coredump_seg_header(&mpi_coredump->xaui_hss_pcs_hdr,
-			XAUI_HSS_PCS_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xaui_hss_pcs),
-			"XAUI HSS PCS Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_an_hdr, XFI_AN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_an),
-			"XFI AN Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_train_hdr,
-			XFI_TRAIN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_train),
-			"XFI TRAIN Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_hss_pcs_hdr,
-			XFI_HSS_PCS_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_hss_pcs),
-			"XFI HSS PCS Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_hss_tx_hdr,
-			XFI_HSS_TX_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_hss_tx),
-			"XFI HSS TX Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_hss_rx_hdr,
-			XFI_HSS_RX_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_hss_rx),
-			"XFI HSS RX Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi_hss_pll_hdr,
-			XFI_HSS_PLL_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes_xfi_hss_pll),
-			"XFI HSS PLL Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xaui2_an_hdr,
-			XAUI2_AN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xaui_an),
-			"XAUI2 AN Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xaui2_hss_pcs_hdr,
-			XAUI2_HSS_PCS_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xaui_hss_pcs),
-			"XAUI2 HSS PCS Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_an_hdr,
-			XFI2_AN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_an),
-			"XFI2 AN Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_train_hdr,
-			XFI2_TRAIN_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_train),
-			"XFI2 TRAIN Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_hss_pcs_hdr,
-			XFI2_HSS_PCS_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_hss_pcs),
-			"XFI2 HSS PCS Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_hss_tx_hdr,
-			XFI2_HSS_TX_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_hss_tx),
-			"XFI2 HSS TX Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_hss_rx_hdr,
-			XFI2_HSS_RX_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_hss_rx),
-			"XFI2 HSS RX Registers");
-
-	ql_build_coredump_seg_header(&mpi_coredump->xfi2_hss_pll_hdr,
-			XFI2_HSS_PLL_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->serdes2_xfi_hss_pll),
-			"XFI2 HSS PLL Registers");
-
-	status = ql_get_serdes_regs(qdev, mpi_coredump);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Dump of Serdes Registers. Status = 0x%.08x\n",
-			  status);
-		goto err;
-	}
-
-	ql_build_coredump_seg_header(&mpi_coredump->core_regs_seg_hdr,
-				CORE_SEG_NUM,
-				sizeof(mpi_coredump->core_regs_seg_hdr) +
-				sizeof(mpi_coredump->mpi_core_regs) +
-				sizeof(mpi_coredump->mpi_core_sh_regs),
-				"Core Registers");
-
-	/* Get the MPI Core Registers */
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->mpi_core_regs[0],
-				 MPI_CORE_REGS_ADDR, MPI_CORE_REGS_CNT);
-	if (status)
-		goto err;
-	/* Get the 16 MPI shadow registers */
-	status = ql_get_mpi_shadow_regs(qdev,
-					&mpi_coredump->mpi_core_sh_regs[0]);
-	if (status)
-		goto err;
-
-	/* Get the Test Logic Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->test_logic_regs_seg_hdr,
-				TEST_LOGIC_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->test_logic_regs),
-				"Test Logic Regs");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->test_logic_regs[0],
-				 TEST_REGS_ADDR, TEST_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the RMII Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->rmii_regs_seg_hdr,
-				RMII_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->rmii_regs),
-				"RMII Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->rmii_regs[0],
-				 RMII_REGS_ADDR, RMII_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the FCMAC1 Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->fcmac1_regs_seg_hdr,
-				FCMAC1_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->fcmac1_regs),
-				"FCMAC1 Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->fcmac1_regs[0],
-				 FCMAC1_REGS_ADDR, FCMAC_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the FCMAC2 Registers */
-
-	ql_build_coredump_seg_header(&mpi_coredump->fcmac2_regs_seg_hdr,
-				FCMAC2_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->fcmac2_regs),
-				"FCMAC2 Registers");
-
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->fcmac2_regs[0],
-				 FCMAC2_REGS_ADDR, FCMAC_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the FC1 MBX Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->fc1_mbx_regs_seg_hdr,
-				FC1_MBOX_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->fc1_mbx_regs),
-				"FC1 MBox Regs");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->fc1_mbx_regs[0],
-				 FC1_MBX_REGS_ADDR, FC_MBX_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the IDE Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->ide_regs_seg_hdr,
-				IDE_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->ide_regs),
-				"IDE Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->ide_regs[0],
-				 IDE_REGS_ADDR, IDE_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the NIC1 MBX Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->nic1_mbx_regs_seg_hdr,
-				NIC1_MBOX_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->nic1_mbx_regs),
-				"NIC1 MBox Regs");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->nic1_mbx_regs[0],
-				 NIC1_MBX_REGS_ADDR, NIC_MBX_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the SMBus Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->smbus_regs_seg_hdr,
-				SMBUS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->smbus_regs),
-				"SMBus Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->smbus_regs[0],
-				 SMBUS_REGS_ADDR, SMBUS_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the FC2 MBX Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->fc2_mbx_regs_seg_hdr,
-				FC2_MBOX_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->fc2_mbx_regs),
-				"FC2 MBox Regs");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->fc2_mbx_regs[0],
-				 FC2_MBX_REGS_ADDR, FC_MBX_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the NIC2 MBX Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->nic2_mbx_regs_seg_hdr,
-				NIC2_MBOX_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->nic2_mbx_regs),
-				"NIC2 MBox Regs");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->nic2_mbx_regs[0],
-				 NIC2_MBX_REGS_ADDR, NIC_MBX_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the I2C Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->i2c_regs_seg_hdr,
-				I2C_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->i2c_regs),
-				"I2C Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->i2c_regs[0],
-				 I2C_REGS_ADDR, I2C_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the MEMC Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->memc_regs_seg_hdr,
-				MEMC_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->memc_regs),
-				"MEMC Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->memc_regs[0],
-				 MEMC_REGS_ADDR, MEMC_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the PBus Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->pbus_regs_seg_hdr,
-				PBUS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->pbus_regs),
-				"PBUS Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->pbus_regs[0],
-				 PBUS_REGS_ADDR, PBUS_REGS_CNT);
-	if (status)
-		goto err;
-
-	/* Get the MDE Registers */
-	ql_build_coredump_seg_header(&mpi_coredump->mde_regs_seg_hdr,
-				MDE_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->mde_regs),
-				"MDE Registers");
-	status = ql_get_mpi_regs(qdev, &mpi_coredump->mde_regs[0],
-				 MDE_REGS_ADDR, MDE_REGS_CNT);
-	if (status)
-		goto err;
-
-	ql_build_coredump_seg_header(&mpi_coredump->misc_nic_seg_hdr,
-				MISC_NIC_INFO_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->misc_nic_info),
-				"MISC NIC INFO");
-	mpi_coredump->misc_nic_info.rx_ring_count = qdev->rx_ring_count;
-	mpi_coredump->misc_nic_info.tx_ring_count = qdev->tx_ring_count;
-	mpi_coredump->misc_nic_info.intr_count = qdev->intr_count;
-	mpi_coredump->misc_nic_info.function = qdev->func;
-
-	/* Segment 31 */
-	/* Get indexed register values. */
-	ql_build_coredump_seg_header(&mpi_coredump->intr_states_seg_hdr,
-				INTR_STATES_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->intr_states),
-				"INTR States");
-	ql_get_intr_states(qdev, &mpi_coredump->intr_states[0]);
-
-	ql_build_coredump_seg_header(&mpi_coredump->cam_entries_seg_hdr,
-				CAM_ENTRIES_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->cam_entries),
-				"CAM Entries");
-	status = ql_get_cam_entries(qdev, &mpi_coredump->cam_entries[0]);
-	if (status)
-		goto err;
-
-	ql_build_coredump_seg_header(&mpi_coredump->nic_routing_words_seg_hdr,
-				ROUTING_WORDS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->nic_routing_words),
-				"Routing Words");
-	status = ql_get_routing_entries(qdev,
-			 &mpi_coredump->nic_routing_words[0]);
-	if (status)
-		goto err;
-
-	/* Segment 34 (Rev C. step 23) */
-	ql_build_coredump_seg_header(&mpi_coredump->ets_seg_hdr,
-				ETS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->ets),
-				"ETS Registers");
-	status = ql_get_ets_regs(qdev, &mpi_coredump->ets[0]);
-	if (status)
-		goto err;
-
-	ql_build_coredump_seg_header(&mpi_coredump->probe_dump_seg_hdr,
-				PROBE_DUMP_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->probe_dump),
-				"Probe Dump");
-	ql_get_probe_dump(qdev, &mpi_coredump->probe_dump[0]);
-
-	ql_build_coredump_seg_header(&mpi_coredump->routing_reg_seg_hdr,
-				ROUTING_INDEX_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->routing_regs),
-				"Routing Regs");
-	status = ql_get_routing_index_registers(qdev,
-					&mpi_coredump->routing_regs[0]);
-	if (status)
-		goto err;
-
-	ql_build_coredump_seg_header(&mpi_coredump->mac_prot_reg_seg_hdr,
-				MAC_PROTOCOL_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->mac_prot_regs),
-				"MAC Prot Regs");
-	ql_get_mac_protocol_registers(qdev, &mpi_coredump->mac_prot_regs[0]);
-
-	/* Get the semaphore registers for all 5 functions */
-	ql_build_coredump_seg_header(&mpi_coredump->sem_regs_seg_hdr,
-			SEM_REGS_SEG_NUM,
-			sizeof(struct mpi_coredump_segment_header) +
-			sizeof(mpi_coredump->sem_regs),	"Sem Registers");
-
-	ql_get_sem_registers(qdev, &mpi_coredump->sem_regs[0]);
-
-	/* Prevent the mpi restarting while we dump the memory.*/
-	ql_write_mpi_reg(qdev, MPI_TEST_FUNC_RST_STS, MPI_TEST_FUNC_RST_FRC);
-
-	/* clear the pause */
-	status = ql_unpause_mpi_risc(qdev);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed RISC unpause. Status = 0x%.08x\n", status);
-		goto err;
-	}
-
-	/* Reset the RISC so we can dump RAM */
-	status = ql_hard_reset_mpi_risc(qdev);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed RISC reset. Status = 0x%.08x\n", status);
-		goto err;
-	}
-
-	ql_build_coredump_seg_header(&mpi_coredump->code_ram_seg_hdr,
-				WCS_RAM_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->code_ram),
-				"WCS RAM");
-	status = ql_dump_risc_ram_area(qdev, &mpi_coredump->code_ram[0],
-					CODE_RAM_ADDR, CODE_RAM_CNT);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Dump of CODE RAM. Status = 0x%.08x\n",
-			  status);
-		goto err;
-	}
-
-	/* Insert the segment header */
-	ql_build_coredump_seg_header(&mpi_coredump->memc_ram_seg_hdr,
-				MEMC_RAM_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->memc_ram),
-				"MEMC RAM");
-	status = ql_dump_risc_ram_area(qdev, &mpi_coredump->memc_ram[0],
-					MEMC_RAM_ADDR, MEMC_RAM_CNT);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Dump of MEMC RAM. Status = 0x%.08x\n",
-			  status);
-		goto err;
-	}
-err:
-	ql_sem_unlock(qdev, SEM_PROC_REG_MASK); /* does flush too */
-	return status;
-
-}
-
-static void ql_get_core_dump(struct ql_adapter *qdev)
-{
-	if (!ql_own_firmware(qdev)) {
-		netif_err(qdev, drv, qdev->ndev, "Don't own firmware!\n");
-		return;
-	}
-
-	if (!netif_running(qdev->ndev)) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Force Coredump can only be done from interface that is up\n");
-		return;
-	}
-	ql_queue_fw_error(qdev);
-}
-
-static void ql_gen_reg_dump(struct ql_adapter *qdev,
-			    struct ql_reg_dump *mpi_coredump)
-{
-	int i, status;
-
-
-	memset(&(mpi_coredump->mpi_global_header), 0,
-		sizeof(struct mpi_coredump_global_header));
-	mpi_coredump->mpi_global_header.cookie = MPI_COREDUMP_COOKIE;
-	mpi_coredump->mpi_global_header.headerSize =
-		sizeof(struct mpi_coredump_global_header);
-	mpi_coredump->mpi_global_header.imageSize =
-		sizeof(struct ql_reg_dump);
-	strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump",
-		sizeof(mpi_coredump->mpi_global_header.idString));
-
-
-	/* segment 16 */
-	ql_build_coredump_seg_header(&mpi_coredump->misc_nic_seg_hdr,
-				MISC_NIC_INFO_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->misc_nic_info),
-				"MISC NIC INFO");
-	mpi_coredump->misc_nic_info.rx_ring_count = qdev->rx_ring_count;
-	mpi_coredump->misc_nic_info.tx_ring_count = qdev->tx_ring_count;
-	mpi_coredump->misc_nic_info.intr_count = qdev->intr_count;
-	mpi_coredump->misc_nic_info.function = qdev->func;
-
-	/* Segment 16, Rev C. Step 18 */
-	ql_build_coredump_seg_header(&mpi_coredump->nic_regs_seg_hdr,
-				NIC1_CONTROL_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->nic_regs),
-				"NIC Registers");
-	/* Get generic reg dump */
-	for (i = 0; i < 64; i++)
-		mpi_coredump->nic_regs[i] = ql_read32(qdev, i * sizeof(u32));
-
-	/* Segment 31 */
-	/* Get indexed register values. */
-	ql_build_coredump_seg_header(&mpi_coredump->intr_states_seg_hdr,
-				INTR_STATES_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->intr_states),
-				"INTR States");
-	ql_get_intr_states(qdev, &mpi_coredump->intr_states[0]);
-
-	ql_build_coredump_seg_header(&mpi_coredump->cam_entries_seg_hdr,
-				CAM_ENTRIES_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->cam_entries),
-				"CAM Entries");
-	status = ql_get_cam_entries(qdev, &mpi_coredump->cam_entries[0]);
-	if (status)
-		return;
-
-	ql_build_coredump_seg_header(&mpi_coredump->nic_routing_words_seg_hdr,
-				ROUTING_WORDS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->nic_routing_words),
-				"Routing Words");
-	status = ql_get_routing_entries(qdev,
-			 &mpi_coredump->nic_routing_words[0]);
-	if (status)
-		return;
-
-	/* Segment 34 (Rev C. step 23) */
-	ql_build_coredump_seg_header(&mpi_coredump->ets_seg_hdr,
-				ETS_SEG_NUM,
-				sizeof(struct mpi_coredump_segment_header)
-				+ sizeof(mpi_coredump->ets),
-				"ETS Registers");
-	status = ql_get_ets_regs(qdev, &mpi_coredump->ets[0]);
-	if (status)
-		return;
-}
-
-void ql_get_dump(struct ql_adapter *qdev, void *buff)
-{
-	/*
-	 * If the dump has already been taken and is stored
-	 * in our internal buffer and if force dump is set then
-	 * just start the spool to dump it to the log file
-	 * and also, take a snapshot of the general regs to
-	 * to the user's buffer or else take complete dump
-	 * to the user's buffer if force is not set.
-	 */
-
-	if (!test_bit(QL_FRC_COREDUMP, &qdev->flags)) {
-		if (!ql_core_dump(qdev, buff))
-			ql_soft_reset_mpi_risc(qdev);
-		else
-			netif_err(qdev, drv, qdev->ndev, "coredump failed!\n");
-	} else {
-		ql_gen_reg_dump(qdev, buff);
-		ql_get_core_dump(qdev);
-	}
-}
-
-/* Coredump to messages log file using separate worker thread */
-void ql_mpi_core_to_log(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-		container_of(work, struct ql_adapter, mpi_core_to_log.work);
-	u32 *tmp, count;
-	int i;
-
-	count = sizeof(struct ql_mpi_coredump) / sizeof(u32);
-	tmp = (u32 *)qdev->mpi_coredump;
-	netif_printk(qdev, drv, KERN_DEBUG, qdev->ndev,
-		     "Core is dumping to log file!\n");
-
-	for (i = 0; i < count; i += 8) {
-		pr_err("%.08x: %.08x %.08x %.08x %.08x %.08x "
-			"%.08x %.08x %.08x\n", i,
-			tmp[i + 0],
-			tmp[i + 1],
-			tmp[i + 2],
-			tmp[i + 3],
-			tmp[i + 4],
-			tmp[i + 5],
-			tmp[i + 6],
-			tmp[i + 7]);
-		msleep(5);
-	}
-}
-
-#ifdef QL_REG_DUMP
-static void ql_dump_intr_states(struct ql_adapter *qdev)
-{
-	int i;
-	u32 value;
-	for (i = 0; i < qdev->intr_count; i++) {
-		ql_write32(qdev, INTR_EN, qdev->intr_context[i].intr_read_mask);
-		value = ql_read32(qdev, INTR_EN);
-		pr_err("%s: Interrupt %d is %s\n",
-		       qdev->ndev->name, i,
-		       (value & INTR_EN_EN ? "enabled" : "disabled"));
-	}
-}
-
-#define DUMP_XGMAC(qdev, reg)					\
-do {								\
-	u32 data;						\
-	ql_read_xgmac_reg(qdev, reg, &data);			\
-	pr_err("%s: %s = 0x%.08x\n", qdev->ndev->name, #reg, data); \
-} while (0)
-
-void ql_dump_xgmac_control_regs(struct ql_adapter *qdev)
-{
-	if (ql_sem_spinlock(qdev, qdev->xg_sem_mask)) {
-		pr_err("%s: Couldn't get xgmac sem\n", __func__);
-		return;
-	}
-	DUMP_XGMAC(qdev, PAUSE_SRC_LO);
-	DUMP_XGMAC(qdev, PAUSE_SRC_HI);
-	DUMP_XGMAC(qdev, GLOBAL_CFG);
-	DUMP_XGMAC(qdev, TX_CFG);
-	DUMP_XGMAC(qdev, RX_CFG);
-	DUMP_XGMAC(qdev, FLOW_CTL);
-	DUMP_XGMAC(qdev, PAUSE_OPCODE);
-	DUMP_XGMAC(qdev, PAUSE_TIMER);
-	DUMP_XGMAC(qdev, PAUSE_FRM_DEST_LO);
-	DUMP_XGMAC(qdev, PAUSE_FRM_DEST_HI);
-	DUMP_XGMAC(qdev, MAC_TX_PARAMS);
-	DUMP_XGMAC(qdev, MAC_RX_PARAMS);
-	DUMP_XGMAC(qdev, MAC_SYS_INT);
-	DUMP_XGMAC(qdev, MAC_SYS_INT_MASK);
-	DUMP_XGMAC(qdev, MAC_MGMT_INT);
-	DUMP_XGMAC(qdev, MAC_MGMT_IN_MASK);
-	DUMP_XGMAC(qdev, EXT_ARB_MODE);
-	ql_sem_unlock(qdev, qdev->xg_sem_mask);
-}
-
-static void ql_dump_ets_regs(struct ql_adapter *qdev)
-{
-}
-
-static void ql_dump_cam_entries(struct ql_adapter *qdev)
-{
-	int i;
-	u32 value[3];
-
-	i = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (i)
-		return;
-	for (i = 0; i < 4; i++) {
-		if (ql_get_mac_addr_reg(qdev, MAC_ADDR_TYPE_CAM_MAC, i, value)) {
-			pr_err("%s: Failed read of mac index register\n",
-			       __func__);
-			return;
-		} else {
-			if (value[0])
-				pr_err("%s: CAM index %d CAM Lookup Lower = 0x%.08x:%.08x, Output = 0x%.08x\n",
-				       qdev->ndev->name, i, value[1], value[0],
-				       value[2]);
-		}
-	}
-	for (i = 0; i < 32; i++) {
-		if (ql_get_mac_addr_reg
-		    (qdev, MAC_ADDR_TYPE_MULTI_MAC, i, value)) {
-			pr_err("%s: Failed read of mac index register\n",
-			       __func__);
-			return;
-		} else {
-			if (value[0])
-				pr_err("%s: MCAST index %d CAM Lookup Lower = 0x%.08x:%.08x\n",
-				       qdev->ndev->name, i, value[1], value[0]);
-		}
-	}
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-}
-
-void ql_dump_routing_entries(struct ql_adapter *qdev)
-{
-	int i;
-	u32 value;
-	i = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (i)
-		return;
-	for (i = 0; i < 16; i++) {
-		value = 0;
-		if (ql_get_routing_reg(qdev, i, &value)) {
-			pr_err("%s: Failed read of routing index register\n",
-			       __func__);
-			return;
-		} else {
-			if (value)
-				pr_err("%s: Routing Mask %d = 0x%.08x\n",
-				       qdev->ndev->name, i, value);
-		}
-	}
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-}
-
-#define DUMP_REG(qdev, reg)			\
-	pr_err("%-32s= 0x%x\n", #reg, ql_read32(qdev, reg))
-
-void ql_dump_regs(struct ql_adapter *qdev)
-{
-	pr_err("reg dump for function #%d\n", qdev->func);
-	DUMP_REG(qdev, SYS);
-	DUMP_REG(qdev, RST_FO);
-	DUMP_REG(qdev, FSC);
-	DUMP_REG(qdev, CSR);
-	DUMP_REG(qdev, ICB_RID);
-	DUMP_REG(qdev, ICB_L);
-	DUMP_REG(qdev, ICB_H);
-	DUMP_REG(qdev, CFG);
-	DUMP_REG(qdev, BIOS_ADDR);
-	DUMP_REG(qdev, STS);
-	DUMP_REG(qdev, INTR_EN);
-	DUMP_REG(qdev, INTR_MASK);
-	DUMP_REG(qdev, ISR1);
-	DUMP_REG(qdev, ISR2);
-	DUMP_REG(qdev, ISR3);
-	DUMP_REG(qdev, ISR4);
-	DUMP_REG(qdev, REV_ID);
-	DUMP_REG(qdev, FRC_ECC_ERR);
-	DUMP_REG(qdev, ERR_STS);
-	DUMP_REG(qdev, RAM_DBG_ADDR);
-	DUMP_REG(qdev, RAM_DBG_DATA);
-	DUMP_REG(qdev, ECC_ERR_CNT);
-	DUMP_REG(qdev, SEM);
-	DUMP_REG(qdev, GPIO_1);
-	DUMP_REG(qdev, GPIO_2);
-	DUMP_REG(qdev, GPIO_3);
-	DUMP_REG(qdev, XGMAC_ADDR);
-	DUMP_REG(qdev, XGMAC_DATA);
-	DUMP_REG(qdev, NIC_ETS);
-	DUMP_REG(qdev, CNA_ETS);
-	DUMP_REG(qdev, FLASH_ADDR);
-	DUMP_REG(qdev, FLASH_DATA);
-	DUMP_REG(qdev, CQ_STOP);
-	DUMP_REG(qdev, PAGE_TBL_RID);
-	DUMP_REG(qdev, WQ_PAGE_TBL_LO);
-	DUMP_REG(qdev, WQ_PAGE_TBL_HI);
-	DUMP_REG(qdev, CQ_PAGE_TBL_LO);
-	DUMP_REG(qdev, CQ_PAGE_TBL_HI);
-	DUMP_REG(qdev, COS_DFLT_CQ1);
-	DUMP_REG(qdev, COS_DFLT_CQ2);
-	DUMP_REG(qdev, SPLT_HDR);
-	DUMP_REG(qdev, FC_PAUSE_THRES);
-	DUMP_REG(qdev, NIC_PAUSE_THRES);
-	DUMP_REG(qdev, FC_ETHERTYPE);
-	DUMP_REG(qdev, FC_RCV_CFG);
-	DUMP_REG(qdev, NIC_RCV_CFG);
-	DUMP_REG(qdev, FC_COS_TAGS);
-	DUMP_REG(qdev, NIC_COS_TAGS);
-	DUMP_REG(qdev, MGMT_RCV_CFG);
-	DUMP_REG(qdev, XG_SERDES_ADDR);
-	DUMP_REG(qdev, XG_SERDES_DATA);
-	DUMP_REG(qdev, PRB_MX_ADDR);
-	DUMP_REG(qdev, PRB_MX_DATA);
-	ql_dump_intr_states(qdev);
-	ql_dump_xgmac_control_regs(qdev);
-	ql_dump_ets_regs(qdev);
-	ql_dump_cam_entries(qdev);
-	ql_dump_routing_entries(qdev);
-}
-#endif
-
-#ifdef QL_STAT_DUMP
-
-#define DUMP_STAT(qdev, stat)	\
-	pr_err("%s = %ld\n", #stat, (unsigned long)qdev->nic_stats.stat)
-
-void ql_dump_stat(struct ql_adapter *qdev)
-{
-	pr_err("%s: Enter\n", __func__);
-	DUMP_STAT(qdev, tx_pkts);
-	DUMP_STAT(qdev, tx_bytes);
-	DUMP_STAT(qdev, tx_mcast_pkts);
-	DUMP_STAT(qdev, tx_bcast_pkts);
-	DUMP_STAT(qdev, tx_ucast_pkts);
-	DUMP_STAT(qdev, tx_ctl_pkts);
-	DUMP_STAT(qdev, tx_pause_pkts);
-	DUMP_STAT(qdev, tx_64_pkt);
-	DUMP_STAT(qdev, tx_65_to_127_pkt);
-	DUMP_STAT(qdev, tx_128_to_255_pkt);
-	DUMP_STAT(qdev, tx_256_511_pkt);
-	DUMP_STAT(qdev, tx_512_to_1023_pkt);
-	DUMP_STAT(qdev, tx_1024_to_1518_pkt);
-	DUMP_STAT(qdev, tx_1519_to_max_pkt);
-	DUMP_STAT(qdev, tx_undersize_pkt);
-	DUMP_STAT(qdev, tx_oversize_pkt);
-	DUMP_STAT(qdev, rx_bytes);
-	DUMP_STAT(qdev, rx_bytes_ok);
-	DUMP_STAT(qdev, rx_pkts);
-	DUMP_STAT(qdev, rx_pkts_ok);
-	DUMP_STAT(qdev, rx_bcast_pkts);
-	DUMP_STAT(qdev, rx_mcast_pkts);
-	DUMP_STAT(qdev, rx_ucast_pkts);
-	DUMP_STAT(qdev, rx_undersize_pkts);
-	DUMP_STAT(qdev, rx_oversize_pkts);
-	DUMP_STAT(qdev, rx_jabber_pkts);
-	DUMP_STAT(qdev, rx_undersize_fcerr_pkts);
-	DUMP_STAT(qdev, rx_drop_events);
-	DUMP_STAT(qdev, rx_fcerr_pkts);
-	DUMP_STAT(qdev, rx_align_err);
-	DUMP_STAT(qdev, rx_symbol_err);
-	DUMP_STAT(qdev, rx_mac_err);
-	DUMP_STAT(qdev, rx_ctl_pkts);
-	DUMP_STAT(qdev, rx_pause_pkts);
-	DUMP_STAT(qdev, rx_64_pkts);
-	DUMP_STAT(qdev, rx_65_to_127_pkts);
-	DUMP_STAT(qdev, rx_128_255_pkts);
-	DUMP_STAT(qdev, rx_256_511_pkts);
-	DUMP_STAT(qdev, rx_512_to_1023_pkts);
-	DUMP_STAT(qdev, rx_1024_to_1518_pkts);
-	DUMP_STAT(qdev, rx_1519_to_max_pkts);
-	DUMP_STAT(qdev, rx_len_err_pkts);
-};
-#endif
-
-#ifdef QL_DEV_DUMP
-
-#define DUMP_QDEV_FIELD(qdev, type, field)		\
-	pr_err("qdev->%-24s = " type "\n", #field, qdev->field)
-#define DUMP_QDEV_DMA_FIELD(qdev, field)		\
-	pr_err("qdev->%-24s = %llx\n", #field, (unsigned long long)qdev->field)
-#define DUMP_QDEV_ARRAY(qdev, type, array, index, field) \
-	pr_err("%s[%d].%s = " type "\n",		 \
-	       #array, index, #field, qdev->array[index].field);
-void ql_dump_qdev(struct ql_adapter *qdev)
-{
-	int i;
-	DUMP_QDEV_FIELD(qdev, "%lx", flags);
-	DUMP_QDEV_FIELD(qdev, "%p", vlgrp);
-	DUMP_QDEV_FIELD(qdev, "%p", pdev);
-	DUMP_QDEV_FIELD(qdev, "%p", ndev);
-	DUMP_QDEV_FIELD(qdev, "%d", chip_rev_id);
-	DUMP_QDEV_FIELD(qdev, "%p", reg_base);
-	DUMP_QDEV_FIELD(qdev, "%p", doorbell_area);
-	DUMP_QDEV_FIELD(qdev, "%d", doorbell_area_size);
-	DUMP_QDEV_FIELD(qdev, "%x", msg_enable);
-	DUMP_QDEV_FIELD(qdev, "%p", rx_ring_shadow_reg_area);
-	DUMP_QDEV_DMA_FIELD(qdev, rx_ring_shadow_reg_dma);
-	DUMP_QDEV_FIELD(qdev, "%p", tx_ring_shadow_reg_area);
-	DUMP_QDEV_DMA_FIELD(qdev, tx_ring_shadow_reg_dma);
-	DUMP_QDEV_FIELD(qdev, "%d", intr_count);
-	if (qdev->msi_x_entry)
-		for (i = 0; i < qdev->intr_count; i++) {
-			DUMP_QDEV_ARRAY(qdev, "%d", msi_x_entry, i, vector);
-			DUMP_QDEV_ARRAY(qdev, "%d", msi_x_entry, i, entry);
-		}
-	for (i = 0; i < qdev->intr_count; i++) {
-		DUMP_QDEV_ARRAY(qdev, "%p", intr_context, i, qdev);
-		DUMP_QDEV_ARRAY(qdev, "%d", intr_context, i, intr);
-		DUMP_QDEV_ARRAY(qdev, "%d", intr_context, i, hooked);
-		DUMP_QDEV_ARRAY(qdev, "0x%08x", intr_context, i, intr_en_mask);
-		DUMP_QDEV_ARRAY(qdev, "0x%08x", intr_context, i, intr_dis_mask);
-		DUMP_QDEV_ARRAY(qdev, "0x%08x", intr_context, i, intr_read_mask);
-	}
-	DUMP_QDEV_FIELD(qdev, "%d", tx_ring_count);
-	DUMP_QDEV_FIELD(qdev, "%d", rx_ring_count);
-	DUMP_QDEV_FIELD(qdev, "%d", ring_mem_size);
-	DUMP_QDEV_FIELD(qdev, "%p", ring_mem);
-	DUMP_QDEV_FIELD(qdev, "%d", intr_count);
-	DUMP_QDEV_FIELD(qdev, "%p", tx_ring);
-	DUMP_QDEV_FIELD(qdev, "%d", rss_ring_count);
-	DUMP_QDEV_FIELD(qdev, "%p", rx_ring);
-	DUMP_QDEV_FIELD(qdev, "%d", default_rx_queue);
-	DUMP_QDEV_FIELD(qdev, "0x%08x", xg_sem_mask);
-	DUMP_QDEV_FIELD(qdev, "0x%08x", port_link_up);
-	DUMP_QDEV_FIELD(qdev, "0x%08x", port_init);
-}
-#endif
-
-#ifdef QL_CB_DUMP
-void ql_dump_wqicb(struct wqicb *wqicb)
-{
-	pr_err("Dumping wqicb stuff...\n");
-	pr_err("wqicb->len = 0x%x\n", le16_to_cpu(wqicb->len));
-	pr_err("wqicb->flags = %x\n", le16_to_cpu(wqicb->flags));
-	pr_err("wqicb->cq_id_rss = %d\n",
-	       le16_to_cpu(wqicb->cq_id_rss));
-	pr_err("wqicb->rid = 0x%x\n", le16_to_cpu(wqicb->rid));
-	pr_err("wqicb->wq_addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(wqicb->addr));
-	pr_err("wqicb->wq_cnsmr_idx_addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(wqicb->cnsmr_idx_addr));
-}
-
-void ql_dump_tx_ring(struct tx_ring *tx_ring)
-{
-	if (tx_ring == NULL)
-		return;
-	pr_err("===================== Dumping tx_ring %d ===============\n",
-	       tx_ring->wq_id);
-	pr_err("tx_ring->base = %p\n", tx_ring->wq_base);
-	pr_err("tx_ring->base_dma = 0x%llx\n",
-	       (unsigned long long) tx_ring->wq_base_dma);
-	pr_err("tx_ring->cnsmr_idx_sh_reg, addr = 0x%p, value = %d\n",
-	       tx_ring->cnsmr_idx_sh_reg,
-	       tx_ring->cnsmr_idx_sh_reg
-			? ql_read_sh_reg(tx_ring->cnsmr_idx_sh_reg) : 0);
-	pr_err("tx_ring->size = %d\n", tx_ring->wq_size);
-	pr_err("tx_ring->len = %d\n", tx_ring->wq_len);
-	pr_err("tx_ring->prod_idx_db_reg = %p\n", tx_ring->prod_idx_db_reg);
-	pr_err("tx_ring->valid_db_reg = %p\n", tx_ring->valid_db_reg);
-	pr_err("tx_ring->prod_idx = %d\n", tx_ring->prod_idx);
-	pr_err("tx_ring->cq_id = %d\n", tx_ring->cq_id);
-	pr_err("tx_ring->wq_id = %d\n", tx_ring->wq_id);
-	pr_err("tx_ring->q = %p\n", tx_ring->q);
-	pr_err("tx_ring->tx_count = %d\n", atomic_read(&tx_ring->tx_count));
-}
-
-void ql_dump_ricb(struct ricb *ricb)
-{
-	int i;
-	pr_err("===================== Dumping ricb ===============\n");
-	pr_err("Dumping ricb stuff...\n");
-
-	pr_err("ricb->base_cq = %d\n", ricb->base_cq & 0x1f);
-	pr_err("ricb->flags = %s%s%s%s%s%s%s%s%s\n",
-	       ricb->base_cq & RSS_L4K ? "RSS_L4K " : "",
-	       ricb->flags & RSS_L6K ? "RSS_L6K " : "",
-	       ricb->flags & RSS_LI ? "RSS_LI " : "",
-	       ricb->flags & RSS_LB ? "RSS_LB " : "",
-	       ricb->flags & RSS_LM ? "RSS_LM " : "",
-	       ricb->flags & RSS_RI4 ? "RSS_RI4 " : "",
-	       ricb->flags & RSS_RT4 ? "RSS_RT4 " : "",
-	       ricb->flags & RSS_RI6 ? "RSS_RI6 " : "",
-	       ricb->flags & RSS_RT6 ? "RSS_RT6 " : "");
-	pr_err("ricb->mask = 0x%.04x\n", le16_to_cpu(ricb->mask));
-	for (i = 0; i < 16; i++)
-		pr_err("ricb->hash_cq_id[%d] = 0x%.08x\n", i,
-		       le32_to_cpu(ricb->hash_cq_id[i]));
-	for (i = 0; i < 10; i++)
-		pr_err("ricb->ipv6_hash_key[%d] = 0x%.08x\n", i,
-		       le32_to_cpu(ricb->ipv6_hash_key[i]));
-	for (i = 0; i < 4; i++)
-		pr_err("ricb->ipv4_hash_key[%d] = 0x%.08x\n", i,
-		       le32_to_cpu(ricb->ipv4_hash_key[i]));
-}
-
-void ql_dump_cqicb(struct cqicb *cqicb)
-{
-	pr_err("Dumping cqicb stuff...\n");
-
-	pr_err("cqicb->msix_vect = %d\n", cqicb->msix_vect);
-	pr_err("cqicb->flags = %x\n", cqicb->flags);
-	pr_err("cqicb->len = %d\n", le16_to_cpu(cqicb->len));
-	pr_err("cqicb->addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(cqicb->addr));
-	pr_err("cqicb->prod_idx_addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(cqicb->prod_idx_addr));
-	pr_err("cqicb->pkt_delay = 0x%.04x\n",
-	       le16_to_cpu(cqicb->pkt_delay));
-	pr_err("cqicb->irq_delay = 0x%.04x\n",
-	       le16_to_cpu(cqicb->irq_delay));
-	pr_err("cqicb->lbq_addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(cqicb->lbq_addr));
-	pr_err("cqicb->lbq_buf_size = 0x%.04x\n",
-	       le16_to_cpu(cqicb->lbq_buf_size));
-	pr_err("cqicb->lbq_len = 0x%.04x\n",
-	       le16_to_cpu(cqicb->lbq_len));
-	pr_err("cqicb->sbq_addr = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(cqicb->sbq_addr));
-	pr_err("cqicb->sbq_buf_size = 0x%.04x\n",
-	       le16_to_cpu(cqicb->sbq_buf_size));
-	pr_err("cqicb->sbq_len = 0x%.04x\n",
-	       le16_to_cpu(cqicb->sbq_len));
-}
-
-void ql_dump_rx_ring(struct rx_ring *rx_ring)
-{
-	if (rx_ring == NULL)
-		return;
-	pr_err("===================== Dumping rx_ring %d ===============\n",
-	       rx_ring->cq_id);
-	pr_err("Dumping rx_ring %d, type = %s%s%s\n",
-	       rx_ring->cq_id, rx_ring->type == DEFAULT_Q ? "DEFAULT" : "",
-	       rx_ring->type == TX_Q ? "OUTBOUND COMPLETIONS" : "",
-	       rx_ring->type == RX_Q ? "INBOUND_COMPLETIONS" : "");
-	pr_err("rx_ring->cqicb = %p\n", &rx_ring->cqicb);
-	pr_err("rx_ring->cq_base = %p\n", rx_ring->cq_base);
-	pr_err("rx_ring->cq_base_dma = %llx\n",
-	       (unsigned long long) rx_ring->cq_base_dma);
-	pr_err("rx_ring->cq_size = %d\n", rx_ring->cq_size);
-	pr_err("rx_ring->cq_len = %d\n", rx_ring->cq_len);
-	pr_err("rx_ring->prod_idx_sh_reg, addr = 0x%p, value = %d\n",
-	       rx_ring->prod_idx_sh_reg,
-	       rx_ring->prod_idx_sh_reg
-			? ql_read_sh_reg(rx_ring->prod_idx_sh_reg) : 0);
-	pr_err("rx_ring->prod_idx_sh_reg_dma = %llx\n",
-	       (unsigned long long) rx_ring->prod_idx_sh_reg_dma);
-	pr_err("rx_ring->cnsmr_idx_db_reg = %p\n",
-	       rx_ring->cnsmr_idx_db_reg);
-	pr_err("rx_ring->cnsmr_idx = %d\n", rx_ring->cnsmr_idx);
-	pr_err("rx_ring->curr_entry = %p\n", rx_ring->curr_entry);
-	pr_err("rx_ring->valid_db_reg = %p\n", rx_ring->valid_db_reg);
-
-	pr_err("rx_ring->lbq_base = %p\n", rx_ring->lbq_base);
-	pr_err("rx_ring->lbq_base_dma = %llx\n",
-	       (unsigned long long) rx_ring->lbq_base_dma);
-	pr_err("rx_ring->lbq_base_indirect = %p\n",
-	       rx_ring->lbq_base_indirect);
-	pr_err("rx_ring->lbq_base_indirect_dma = %llx\n",
-	       (unsigned long long) rx_ring->lbq_base_indirect_dma);
-	pr_err("rx_ring->lbq = %p\n", rx_ring->lbq);
-	pr_err("rx_ring->lbq_len = %d\n", rx_ring->lbq_len);
-	pr_err("rx_ring->lbq_size = %d\n", rx_ring->lbq_size);
-	pr_err("rx_ring->lbq_prod_idx_db_reg = %p\n",
-	       rx_ring->lbq_prod_idx_db_reg);
-	pr_err("rx_ring->lbq_prod_idx = %d\n", rx_ring->lbq_prod_idx);
-	pr_err("rx_ring->lbq_curr_idx = %d\n", rx_ring->lbq_curr_idx);
-	pr_err("rx_ring->lbq_clean_idx = %d\n", rx_ring->lbq_clean_idx);
-	pr_err("rx_ring->lbq_free_cnt = %d\n", rx_ring->lbq_free_cnt);
-	pr_err("rx_ring->lbq_buf_size = %d\n", rx_ring->lbq_buf_size);
-
-	pr_err("rx_ring->sbq_base = %p\n", rx_ring->sbq_base);
-	pr_err("rx_ring->sbq_base_dma = %llx\n",
-	       (unsigned long long) rx_ring->sbq_base_dma);
-	pr_err("rx_ring->sbq_base_indirect = %p\n",
-	       rx_ring->sbq_base_indirect);
-	pr_err("rx_ring->sbq_base_indirect_dma = %llx\n",
-	       (unsigned long long) rx_ring->sbq_base_indirect_dma);
-	pr_err("rx_ring->sbq = %p\n", rx_ring->sbq);
-	pr_err("rx_ring->sbq_len = %d\n", rx_ring->sbq_len);
-	pr_err("rx_ring->sbq_size = %d\n", rx_ring->sbq_size);
-	pr_err("rx_ring->sbq_prod_idx_db_reg addr = %p\n",
-	       rx_ring->sbq_prod_idx_db_reg);
-	pr_err("rx_ring->sbq_prod_idx = %d\n", rx_ring->sbq_prod_idx);
-	pr_err("rx_ring->sbq_curr_idx = %d\n", rx_ring->sbq_curr_idx);
-	pr_err("rx_ring->sbq_clean_idx = %d\n", rx_ring->sbq_clean_idx);
-	pr_err("rx_ring->sbq_free_cnt = %d\n", rx_ring->sbq_free_cnt);
-	pr_err("rx_ring->sbq_buf_size = %d\n", rx_ring->sbq_buf_size);
-	pr_err("rx_ring->cq_id = %d\n", rx_ring->cq_id);
-	pr_err("rx_ring->irq = %d\n", rx_ring->irq);
-	pr_err("rx_ring->cpu = %d\n", rx_ring->cpu);
-	pr_err("rx_ring->qdev = %p\n", rx_ring->qdev);
-}
-
-void ql_dump_hw_cb(struct ql_adapter *qdev, int size, u32 bit, u16 q_id)
-{
-	void *ptr;
-
-	pr_err("%s: Enter\n", __func__);
-
-	ptr = kmalloc(size, GFP_ATOMIC);
-	if (ptr == NULL)
-		return;
-
-	if (ql_write_cfg(qdev, ptr, size, bit, q_id)) {
-		pr_err("%s: Failed to upload control block!\n", __func__);
-		goto fail_it;
-	}
-	switch (bit) {
-	case CFG_DRQ:
-		ql_dump_wqicb((struct wqicb *)ptr);
-		break;
-	case CFG_DCQ:
-		ql_dump_cqicb((struct cqicb *)ptr);
-		break;
-	case CFG_DR:
-		ql_dump_ricb((struct ricb *)ptr);
-		break;
-	default:
-		pr_err("%s: Invalid bit value = %x\n", __func__, bit);
-		break;
-	}
-fail_it:
-	kfree(ptr);
-}
-#endif
-
-#ifdef QL_OB_DUMP
-void ql_dump_tx_desc(struct tx_buf_desc *tbd)
-{
-	pr_err("tbd->addr  = 0x%llx\n",
-	       le64_to_cpu((u64) tbd->addr));
-	pr_err("tbd->len   = %d\n",
-	       le32_to_cpu(tbd->len & TX_DESC_LEN_MASK));
-	pr_err("tbd->flags = %s %s\n",
-	       tbd->len & TX_DESC_C ? "C" : ".",
-	       tbd->len & TX_DESC_E ? "E" : ".");
-	tbd++;
-	pr_err("tbd->addr  = 0x%llx\n",
-	       le64_to_cpu((u64) tbd->addr));
-	pr_err("tbd->len   = %d\n",
-	       le32_to_cpu(tbd->len & TX_DESC_LEN_MASK));
-	pr_err("tbd->flags = %s %s\n",
-	       tbd->len & TX_DESC_C ? "C" : ".",
-	       tbd->len & TX_DESC_E ? "E" : ".");
-	tbd++;
-	pr_err("tbd->addr  = 0x%llx\n",
-	       le64_to_cpu((u64) tbd->addr));
-	pr_err("tbd->len   = %d\n",
-	       le32_to_cpu(tbd->len & TX_DESC_LEN_MASK));
-	pr_err("tbd->flags = %s %s\n",
-	       tbd->len & TX_DESC_C ? "C" : ".",
-	       tbd->len & TX_DESC_E ? "E" : ".");
-
-}
-
-void ql_dump_ob_mac_iocb(struct ob_mac_iocb_req *ob_mac_iocb)
-{
-	struct ob_mac_tso_iocb_req *ob_mac_tso_iocb =
-	    (struct ob_mac_tso_iocb_req *)ob_mac_iocb;
-	struct tx_buf_desc *tbd;
-	u16 frame_len;
-
-	pr_err("%s\n", __func__);
-	pr_err("opcode         = %s\n",
-	       (ob_mac_iocb->opcode == OPCODE_OB_MAC_IOCB) ? "MAC" : "TSO");
-	pr_err("flags1          = %s %s %s %s %s\n",
-	       ob_mac_tso_iocb->flags1 & OB_MAC_TSO_IOCB_OI ? "OI" : "",
-	       ob_mac_tso_iocb->flags1 & OB_MAC_TSO_IOCB_I ? "I" : "",
-	       ob_mac_tso_iocb->flags1 & OB_MAC_TSO_IOCB_D ? "D" : "",
-	       ob_mac_tso_iocb->flags1 & OB_MAC_TSO_IOCB_IP4 ? "IP4" : "",
-	       ob_mac_tso_iocb->flags1 & OB_MAC_TSO_IOCB_IP6 ? "IP6" : "");
-	pr_err("flags2          = %s %s %s\n",
-	       ob_mac_tso_iocb->flags2 & OB_MAC_TSO_IOCB_LSO ? "LSO" : "",
-	       ob_mac_tso_iocb->flags2 & OB_MAC_TSO_IOCB_UC ? "UC" : "",
-	       ob_mac_tso_iocb->flags2 & OB_MAC_TSO_IOCB_TC ? "TC" : "");
-	pr_err("flags3          = %s %s %s\n",
-	       ob_mac_tso_iocb->flags3 & OB_MAC_TSO_IOCB_IC ? "IC" : "",
-	       ob_mac_tso_iocb->flags3 & OB_MAC_TSO_IOCB_DFP ? "DFP" : "",
-	       ob_mac_tso_iocb->flags3 & OB_MAC_TSO_IOCB_V ? "V" : "");
-	pr_err("tid = %x\n", ob_mac_iocb->tid);
-	pr_err("txq_idx = %d\n", ob_mac_iocb->txq_idx);
-	pr_err("vlan_tci      = %x\n", ob_mac_tso_iocb->vlan_tci);
-	if (ob_mac_iocb->opcode == OPCODE_OB_MAC_TSO_IOCB) {
-		pr_err("frame_len      = %d\n",
-		       le32_to_cpu(ob_mac_tso_iocb->frame_len));
-		pr_err("mss      = %d\n",
-		       le16_to_cpu(ob_mac_tso_iocb->mss));
-		pr_err("prot_hdr_len   = %d\n",
-		       le16_to_cpu(ob_mac_tso_iocb->total_hdrs_len));
-		pr_err("hdr_offset     = 0x%.04x\n",
-		       le16_to_cpu(ob_mac_tso_iocb->net_trans_offset));
-		frame_len = le32_to_cpu(ob_mac_tso_iocb->frame_len);
-	} else {
-		pr_err("frame_len      = %d\n",
-		       le16_to_cpu(ob_mac_iocb->frame_len));
-		frame_len = le16_to_cpu(ob_mac_iocb->frame_len);
-	}
-	tbd = &ob_mac_iocb->tbd[0];
-	ql_dump_tx_desc(tbd);
-}
-
-void ql_dump_ob_mac_rsp(struct ob_mac_iocb_rsp *ob_mac_rsp)
-{
-	pr_err("%s\n", __func__);
-	pr_err("opcode         = %d\n", ob_mac_rsp->opcode);
-	pr_err("flags          = %s %s %s %s %s %s %s\n",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_OI ? "OI" : ".",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_I ? "I" : ".",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_E ? "E" : ".",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_S ? "S" : ".",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_L ? "L" : ".",
-	       ob_mac_rsp->flags1 & OB_MAC_IOCB_RSP_P ? "P" : ".",
-	       ob_mac_rsp->flags2 & OB_MAC_IOCB_RSP_B ? "B" : ".");
-	pr_err("tid = %x\n", ob_mac_rsp->tid);
-}
-#endif
-
-#ifdef QL_IB_DUMP
-void ql_dump_ib_mac_rsp(struct ib_mac_iocb_rsp *ib_mac_rsp)
-{
-	pr_err("%s\n", __func__);
-	pr_err("opcode         = 0x%x\n", ib_mac_rsp->opcode);
-	pr_err("flags1 = %s%s%s%s%s%s\n",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_OI ? "OI " : "",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_I ? "I " : "",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_TE ? "TE " : "",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_NU ? "NU " : "",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_IE ? "IE " : "",
-	       ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_B ? "B " : "");
-
-	if (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK)
-		pr_err("%s%s%s Multicast\n",
-		       (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-		       IB_MAC_IOCB_RSP_M_HASH ? "Hash" : "",
-		       (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-		       IB_MAC_IOCB_RSP_M_REG ? "Registered" : "",
-		       (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-		       IB_MAC_IOCB_RSP_M_PROM ? "Promiscuous" : "");
-
-	pr_err("flags2 = %s%s%s%s%s\n",
-	       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_P) ? "P " : "",
-	       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) ? "V " : "",
-	       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) ? "U " : "",
-	       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T) ? "T " : "",
-	       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_FO) ? "FO " : "");
-
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK)
-		pr_err("%s%s%s%s%s error\n",
-		       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) ==
-		       IB_MAC_IOCB_RSP_ERR_OVERSIZE ? "oversize" : "",
-		       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) ==
-		       IB_MAC_IOCB_RSP_ERR_UNDERSIZE ? "undersize" : "",
-		       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) ==
-		       IB_MAC_IOCB_RSP_ERR_PREAMBLE ? "preamble" : "",
-		       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) ==
-		       IB_MAC_IOCB_RSP_ERR_FRAME_LEN ? "frame length" : "",
-		       (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) ==
-		       IB_MAC_IOCB_RSP_ERR_CRC ? "CRC" : "");
-
-	pr_err("flags3 = %s%s\n",
-	       ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DS ? "DS " : "",
-	       ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL ? "DL " : "");
-
-	if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK)
-		pr_err("RSS flags = %s%s%s%s\n",
-		       ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK) ==
-			IB_MAC_IOCB_RSP_M_IPV4) ? "IPv4 RSS" : "",
-		       ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK) ==
-			IB_MAC_IOCB_RSP_M_IPV6) ? "IPv6 RSS " : "",
-		       ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK) ==
-			IB_MAC_IOCB_RSP_M_TCP_V4) ? "TCP/IPv4 RSS" : "",
-		       ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK) ==
-			IB_MAC_IOCB_RSP_M_TCP_V6) ? "TCP/IPv6 RSS" : "");
-
-	pr_err("data_len	= %d\n",
-	       le32_to_cpu(ib_mac_rsp->data_len));
-	pr_err("data_addr    = 0x%llx\n",
-	       (unsigned long long) le64_to_cpu(ib_mac_rsp->data_addr));
-	if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_RSS_MASK)
-		pr_err("rss    = %x\n",
-		       le32_to_cpu(ib_mac_rsp->rss));
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V)
-		pr_err("vlan_id    = %x\n",
-		       le16_to_cpu(ib_mac_rsp->vlan_id));
-
-	pr_err("flags4 = %s%s%s\n",
-		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV ? "HV " : "",
-		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS ? "HS " : "",
-		ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HL ? "HL " : "");
-
-	if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV) {
-		pr_err("hdr length	= %d\n",
-		       le32_to_cpu(ib_mac_rsp->hdr_len));
-		pr_err("hdr addr    = 0x%llx\n",
-		       (unsigned long long) le64_to_cpu(ib_mac_rsp->hdr_addr));
-	}
-}
-#endif
-
-#ifdef QL_ALL_DUMP
-void ql_dump_all(struct ql_adapter *qdev)
-{
-	int i;
-
-	QL_DUMP_REGS(qdev);
-	QL_DUMP_QDEV(qdev);
-	for (i = 0; i < qdev->tx_ring_count; i++) {
-		QL_DUMP_TX_RING(&qdev->tx_ring[i]);
-		QL_DUMP_WQICB((struct wqicb *)&qdev->tx_ring[i]);
-	}
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		QL_DUMP_RX_RING(&qdev->rx_ring[i]);
-		QL_DUMP_CQICB((struct cqicb *)&qdev->rx_ring[i]);
-	}
-}
-#endif

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
deleted file mode 100644
index 5edbd53..0000000
--- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
+++ /dev/null

@@ -1,735 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/pagemap.h>
-#include <linux/sched.h>
-#include <linux/dmapool.h>
-#include <linux/mempool.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/if_vlan.h>
-#include <linux/delay.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-
-#include "qlge.h"
-
-struct ql_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int sizeof_stat;
-	int stat_offset;
-};
-
-#define QL_SIZEOF(m) FIELD_SIZEOF(struct ql_adapter, m)
-#define QL_OFF(m) offsetof(struct ql_adapter, m)
-
-static const struct ql_stats ql_gstrings_stats[] = {
-	{"tx_pkts", QL_SIZEOF(nic_stats.tx_pkts), QL_OFF(nic_stats.tx_pkts)},
-	{"tx_bytes", QL_SIZEOF(nic_stats.tx_bytes), QL_OFF(nic_stats.tx_bytes)},
-	{"tx_mcast_pkts", QL_SIZEOF(nic_stats.tx_mcast_pkts),
-					QL_OFF(nic_stats.tx_mcast_pkts)},
-	{"tx_bcast_pkts", QL_SIZEOF(nic_stats.tx_bcast_pkts),
-					QL_OFF(nic_stats.tx_bcast_pkts)},
-	{"tx_ucast_pkts", QL_SIZEOF(nic_stats.tx_ucast_pkts),
-					QL_OFF(nic_stats.tx_ucast_pkts)},
-	{"tx_ctl_pkts", QL_SIZEOF(nic_stats.tx_ctl_pkts),
-					QL_OFF(nic_stats.tx_ctl_pkts)},
-	{"tx_pause_pkts", QL_SIZEOF(nic_stats.tx_pause_pkts),
-					QL_OFF(nic_stats.tx_pause_pkts)},
-	{"tx_64_pkts", QL_SIZEOF(nic_stats.tx_64_pkt),
-					QL_OFF(nic_stats.tx_64_pkt)},
-	{"tx_65_to_127_pkts", QL_SIZEOF(nic_stats.tx_65_to_127_pkt),
-					QL_OFF(nic_stats.tx_65_to_127_pkt)},
-	{"tx_128_to_255_pkts", QL_SIZEOF(nic_stats.tx_128_to_255_pkt),
-					QL_OFF(nic_stats.tx_128_to_255_pkt)},
-	{"tx_256_511_pkts", QL_SIZEOF(nic_stats.tx_256_511_pkt),
-					QL_OFF(nic_stats.tx_256_511_pkt)},
-	{"tx_512_to_1023_pkts",	QL_SIZEOF(nic_stats.tx_512_to_1023_pkt),
-					QL_OFF(nic_stats.tx_512_to_1023_pkt)},
-	{"tx_1024_to_1518_pkts", QL_SIZEOF(nic_stats.tx_1024_to_1518_pkt),
-					QL_OFF(nic_stats.tx_1024_to_1518_pkt)},
-	{"tx_1519_to_max_pkts",	QL_SIZEOF(nic_stats.tx_1519_to_max_pkt),
-					QL_OFF(nic_stats.tx_1519_to_max_pkt)},
-	{"tx_undersize_pkts", QL_SIZEOF(nic_stats.tx_undersize_pkt),
-					QL_OFF(nic_stats.tx_undersize_pkt)},
-	{"tx_oversize_pkts", QL_SIZEOF(nic_stats.tx_oversize_pkt),
-					QL_OFF(nic_stats.tx_oversize_pkt)},
-	{"rx_bytes", QL_SIZEOF(nic_stats.rx_bytes), QL_OFF(nic_stats.rx_bytes)},
-	{"rx_bytes_ok",	QL_SIZEOF(nic_stats.rx_bytes_ok),
-					QL_OFF(nic_stats.rx_bytes_ok)},
-	{"rx_pkts", QL_SIZEOF(nic_stats.rx_pkts), QL_OFF(nic_stats.rx_pkts)},
-	{"rx_pkts_ok", QL_SIZEOF(nic_stats.rx_pkts_ok),
-					QL_OFF(nic_stats.rx_pkts_ok)},
-	{"rx_bcast_pkts", QL_SIZEOF(nic_stats.rx_bcast_pkts),
-					QL_OFF(nic_stats.rx_bcast_pkts)},
-	{"rx_mcast_pkts", QL_SIZEOF(nic_stats.rx_mcast_pkts),
-					QL_OFF(nic_stats.rx_mcast_pkts)},
-	{"rx_ucast_pkts", QL_SIZEOF(nic_stats.rx_ucast_pkts),
-					QL_OFF(nic_stats.rx_ucast_pkts)},
-	{"rx_undersize_pkts", QL_SIZEOF(nic_stats.rx_undersize_pkts),
-					QL_OFF(nic_stats.rx_undersize_pkts)},
-	{"rx_oversize_pkts", QL_SIZEOF(nic_stats.rx_oversize_pkts),
-					QL_OFF(nic_stats.rx_oversize_pkts)},
-	{"rx_jabber_pkts", QL_SIZEOF(nic_stats.rx_jabber_pkts),
-					QL_OFF(nic_stats.rx_jabber_pkts)},
-	{"rx_undersize_fcerr_pkts",
-		QL_SIZEOF(nic_stats.rx_undersize_fcerr_pkts),
-				QL_OFF(nic_stats.rx_undersize_fcerr_pkts)},
-	{"rx_drop_events", QL_SIZEOF(nic_stats.rx_drop_events),
-					QL_OFF(nic_stats.rx_drop_events)},
-	{"rx_fcerr_pkts", QL_SIZEOF(nic_stats.rx_fcerr_pkts),
-					QL_OFF(nic_stats.rx_fcerr_pkts)},
-	{"rx_align_err", QL_SIZEOF(nic_stats.rx_align_err),
-					QL_OFF(nic_stats.rx_align_err)},
-	{"rx_symbol_err", QL_SIZEOF(nic_stats.rx_symbol_err),
-					QL_OFF(nic_stats.rx_symbol_err)},
-	{"rx_mac_err", QL_SIZEOF(nic_stats.rx_mac_err),
-					QL_OFF(nic_stats.rx_mac_err)},
-	{"rx_ctl_pkts",	QL_SIZEOF(nic_stats.rx_ctl_pkts),
-					QL_OFF(nic_stats.rx_ctl_pkts)},
-	{"rx_pause_pkts", QL_SIZEOF(nic_stats.rx_pause_pkts),
-					QL_OFF(nic_stats.rx_pause_pkts)},
-	{"rx_64_pkts", QL_SIZEOF(nic_stats.rx_64_pkts),
-					QL_OFF(nic_stats.rx_64_pkts)},
-	{"rx_65_to_127_pkts", QL_SIZEOF(nic_stats.rx_65_to_127_pkts),
-					QL_OFF(nic_stats.rx_65_to_127_pkts)},
-	{"rx_128_255_pkts", QL_SIZEOF(nic_stats.rx_128_255_pkts),
-					QL_OFF(nic_stats.rx_128_255_pkts)},
-	{"rx_256_511_pkts", QL_SIZEOF(nic_stats.rx_256_511_pkts),
-					QL_OFF(nic_stats.rx_256_511_pkts)},
-	{"rx_512_to_1023_pkts",	QL_SIZEOF(nic_stats.rx_512_to_1023_pkts),
-					QL_OFF(nic_stats.rx_512_to_1023_pkts)},
-	{"rx_1024_to_1518_pkts", QL_SIZEOF(nic_stats.rx_1024_to_1518_pkts),
-					QL_OFF(nic_stats.rx_1024_to_1518_pkts)},
-	{"rx_1519_to_max_pkts",	QL_SIZEOF(nic_stats.rx_1519_to_max_pkts),
-					QL_OFF(nic_stats.rx_1519_to_max_pkts)},
-	{"rx_len_err_pkts", QL_SIZEOF(nic_stats.rx_len_err_pkts),
-					QL_OFF(nic_stats.rx_len_err_pkts)},
-	{"rx_code_err",	QL_SIZEOF(nic_stats.rx_code_err),
-					QL_OFF(nic_stats.rx_code_err)},
-	{"rx_oversize_err", QL_SIZEOF(nic_stats.rx_oversize_err),
-					QL_OFF(nic_stats.rx_oversize_err)},
-	{"rx_undersize_err", QL_SIZEOF(nic_stats.rx_undersize_err),
-					QL_OFF(nic_stats.rx_undersize_err)},
-	{"rx_preamble_err", QL_SIZEOF(nic_stats.rx_preamble_err),
-					QL_OFF(nic_stats.rx_preamble_err)},
-	{"rx_frame_len_err", QL_SIZEOF(nic_stats.rx_frame_len_err),
-					QL_OFF(nic_stats.rx_frame_len_err)},
-	{"rx_crc_err", QL_SIZEOF(nic_stats.rx_crc_err),
-					QL_OFF(nic_stats.rx_crc_err)},
-	{"rx_err_count", QL_SIZEOF(nic_stats.rx_err_count),
-					QL_OFF(nic_stats.rx_err_count)},
-	{"tx_cbfc_pause_frames0", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames0),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames0)},
-	{"tx_cbfc_pause_frames1", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames1),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames1)},
-	{"tx_cbfc_pause_frames2", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames2),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames2)},
-	{"tx_cbfc_pause_frames3", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames3),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames3)},
-	{"tx_cbfc_pause_frames4", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames4),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames4)},
-	{"tx_cbfc_pause_frames5", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames5),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames5)},
-	{"tx_cbfc_pause_frames6", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames6),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames6)},
-	{"tx_cbfc_pause_frames7", QL_SIZEOF(nic_stats.tx_cbfc_pause_frames7),
-				QL_OFF(nic_stats.tx_cbfc_pause_frames7)},
-	{"rx_cbfc_pause_frames0", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames0),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames0)},
-	{"rx_cbfc_pause_frames1", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames1),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames1)},
-	{"rx_cbfc_pause_frames2", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames2),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames2)},
-	{"rx_cbfc_pause_frames3", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames3),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames3)},
-	{"rx_cbfc_pause_frames4", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames4),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames4)},
-	{"rx_cbfc_pause_frames5", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames5),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames5)},
-	{"rx_cbfc_pause_frames6", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames6),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames6)},
-	{"rx_cbfc_pause_frames7", QL_SIZEOF(nic_stats.rx_cbfc_pause_frames7),
-				QL_OFF(nic_stats.rx_cbfc_pause_frames7)},
-	{"rx_nic_fifo_drop", QL_SIZEOF(nic_stats.rx_nic_fifo_drop),
-					QL_OFF(nic_stats.rx_nic_fifo_drop)},
-};
-
-static const char ql_gstrings_test[][ETH_GSTRING_LEN] = {
-	"Loopback test  (offline)"
-};
-#define QLGE_TEST_LEN (sizeof(ql_gstrings_test) / ETH_GSTRING_LEN)
-#define QLGE_STATS_LEN ARRAY_SIZE(ql_gstrings_stats)
-#define QLGE_RCV_MAC_ERR_STATS	7
-
-static int ql_update_ring_coalescing(struct ql_adapter *qdev)
-{
-	int i, status = 0;
-	struct rx_ring *rx_ring;
-	struct cqicb *cqicb;
-
-	if (!netif_running(qdev->ndev))
-		return status;
-
-	/* Skip the default queue, and update the outbound handler
-	 * queues if they changed.
-	 */
-	cqicb = (struct cqicb *)&qdev->rx_ring[qdev->rss_ring_count];
-	if (le16_to_cpu(cqicb->irq_delay) != qdev->tx_coalesce_usecs ||
-		le16_to_cpu(cqicb->pkt_delay) !=
-				qdev->tx_max_coalesced_frames) {
-		for (i = qdev->rss_ring_count; i < qdev->rx_ring_count; i++) {
-			rx_ring = &qdev->rx_ring[i];
-			cqicb = (struct cqicb *)rx_ring;
-			cqicb->irq_delay = cpu_to_le16(qdev->tx_coalesce_usecs);
-			cqicb->pkt_delay =
-			    cpu_to_le16(qdev->tx_max_coalesced_frames);
-			cqicb->flags = FLAGS_LI;
-			status = ql_write_cfg(qdev, cqicb, sizeof(*cqicb),
-						CFG_LCQ, rx_ring->cq_id);
-			if (status) {
-				netif_err(qdev, ifup, qdev->ndev,
-					  "Failed to load CQICB.\n");
-				goto exit;
-			}
-		}
-	}
-
-	/* Update the inbound (RSS) handler queues if they changed. */
-	cqicb = (struct cqicb *)&qdev->rx_ring[0];
-	if (le16_to_cpu(cqicb->irq_delay) != qdev->rx_coalesce_usecs ||
-		le16_to_cpu(cqicb->pkt_delay) !=
-					qdev->rx_max_coalesced_frames) {
-		for (i = 0; i < qdev->rss_ring_count; i++, rx_ring++) {
-			rx_ring = &qdev->rx_ring[i];
-			cqicb = (struct cqicb *)rx_ring;
-			cqicb->irq_delay = cpu_to_le16(qdev->rx_coalesce_usecs);
-			cqicb->pkt_delay =
-			    cpu_to_le16(qdev->rx_max_coalesced_frames);
-			cqicb->flags = FLAGS_LI;
-			status = ql_write_cfg(qdev, cqicb, sizeof(*cqicb),
-						CFG_LCQ, rx_ring->cq_id);
-			if (status) {
-				netif_err(qdev, ifup, qdev->ndev,
-					  "Failed to load CQICB.\n");
-				goto exit;
-			}
-		}
-	}
-exit:
-	return status;
-}
-
-static void ql_update_stats(struct ql_adapter *qdev)
-{
-	u32 i;
-	u64 data;
-	u64 *iter = &qdev->nic_stats.tx_pkts;
-
-	spin_lock(&qdev->stats_lock);
-	if (ql_sem_spinlock(qdev, qdev->xg_sem_mask)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Couldn't get xgmac sem.\n");
-		goto quit;
-	}
-	/*
-	 * Get TX statistics.
-	 */
-	for (i = 0x200; i < 0x280; i += 8) {
-		if (ql_read_xgmac_reg64(qdev, i, &data)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Error reading status register 0x%.04x.\n",
-				  i);
-			goto end;
-		} else
-			*iter = data;
-		iter++;
-	}
-
-	/*
-	 * Get RX statistics.
-	 */
-	for (i = 0x300; i < 0x3d0; i += 8) {
-		if (ql_read_xgmac_reg64(qdev, i, &data)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Error reading status register 0x%.04x.\n",
-				  i);
-			goto end;
-		} else
-			*iter = data;
-		iter++;
-	}
-
-	/* Update receive mac error statistics */
-	iter += QLGE_RCV_MAC_ERR_STATS;
-
-	/*
-	 * Get Per-priority TX pause frame counter statistics.
-	 */
-	for (i = 0x500; i < 0x540; i += 8) {
-		if (ql_read_xgmac_reg64(qdev, i, &data)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Error reading status register 0x%.04x.\n",
-				  i);
-			goto end;
-		} else
-			*iter = data;
-		iter++;
-	}
-
-	/*
-	 * Get Per-priority RX pause frame counter statistics.
-	 */
-	for (i = 0x568; i < 0x5a8; i += 8) {
-		if (ql_read_xgmac_reg64(qdev, i, &data)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "Error reading status register 0x%.04x.\n",
-				  i);
-			goto end;
-		} else
-			*iter = data;
-		iter++;
-	}
-
-	/*
-	 * Get RX NIC FIFO DROP statistics.
-	 */
-	if (ql_read_xgmac_reg64(qdev, 0x5b8, &data)) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Error reading status register 0x%.04x.\n", i);
-		goto end;
-	} else
-		*iter = data;
-end:
-	ql_sem_unlock(qdev, qdev->xg_sem_mask);
-quit:
-	spin_unlock(&qdev->stats_lock);
-
-	QL_DUMP_STAT(qdev);
-}
-
-static void ql_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
-{
-	int index;
-	switch (stringset) {
-	case ETH_SS_TEST:
-		memcpy(buf, *ql_gstrings_test, QLGE_TEST_LEN * ETH_GSTRING_LEN);
-		break;
-	case ETH_SS_STATS:
-		for (index = 0; index < QLGE_STATS_LEN; index++) {
-			memcpy(buf + index * ETH_GSTRING_LEN,
-				ql_gstrings_stats[index].stat_string,
-				ETH_GSTRING_LEN);
-		}
-		break;
-	}
-}
-
-static int ql_get_sset_count(struct net_device *dev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_TEST:
-		return QLGE_TEST_LEN;
-	case ETH_SS_STATS:
-		return QLGE_STATS_LEN;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void
-ql_get_ethtool_stats(struct net_device *ndev,
-		     struct ethtool_stats *stats, u64 *data)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int index, length;
-
-	length = QLGE_STATS_LEN;
-	ql_update_stats(qdev);
-
-	for (index = 0; index < length; index++) {
-		char *p = (char *)qdev +
-			ql_gstrings_stats[index].stat_offset;
-		*data++ = (ql_gstrings_stats[index].sizeof_stat ==
-			sizeof(u64)) ? *(u64 *)p : (*(u32 *)p);
-	}
-}
-
-static int ql_get_link_ksettings(struct net_device *ndev,
-				 struct ethtool_link_ksettings *ecmd)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	u32 supported, advertising;
-
-	supported = SUPPORTED_10000baseT_Full;
-	advertising = ADVERTISED_10000baseT_Full;
-
-	if ((qdev->link_status & STS_LINK_TYPE_MASK) ==
-				STS_LINK_TYPE_10GBASET) {
-		supported |= (SUPPORTED_TP | SUPPORTED_Autoneg);
-		advertising |= (ADVERTISED_TP | ADVERTISED_Autoneg);
-		ecmd->base.port = PORT_TP;
-		ecmd->base.autoneg = AUTONEG_ENABLE;
-	} else {
-		supported |= SUPPORTED_FIBRE;
-		advertising |= ADVERTISED_FIBRE;
-		ecmd->base.port = PORT_FIBRE;
-	}
-
-	ecmd->base.speed = SPEED_10000;
-	ecmd->base.duplex = DUPLEX_FULL;
-
-	ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.advertising,
-						advertising);
-
-	return 0;
-}
-
-static void ql_get_drvinfo(struct net_device *ndev,
-			   struct ethtool_drvinfo *drvinfo)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	strlcpy(drvinfo->driver, qlge_driver_name, sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, qlge_driver_version,
-		sizeof(drvinfo->version));
-	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-		 "v%d.%d.%d",
-		 (qdev->fw_rev_id & 0x00ff0000) >> 16,
-		 (qdev->fw_rev_id & 0x0000ff00) >> 8,
-		 (qdev->fw_rev_id & 0x000000ff));
-	strlcpy(drvinfo->bus_info, pci_name(qdev->pdev),
-		sizeof(drvinfo->bus_info));
-}
-
-static void ql_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	unsigned short ssys_dev = qdev->pdev->subsystem_device;
-
-	/* WOL is only supported for mezz card. */
-	if (ssys_dev == QLGE_MEZZ_SSYS_ID_068 ||
-			ssys_dev == QLGE_MEZZ_SSYS_ID_180) {
-		wol->supported = WAKE_MAGIC;
-		wol->wolopts = qdev->wol;
-	}
-}
-
-static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	unsigned short ssys_dev = qdev->pdev->subsystem_device;
-
-	/* WOL is only supported for mezz card. */
-	if (ssys_dev != QLGE_MEZZ_SSYS_ID_068 &&
-			ssys_dev != QLGE_MEZZ_SSYS_ID_180) {
-		netif_info(qdev, drv, qdev->ndev,
-				"WOL is only supported for mezz card\n");
-		return -EOPNOTSUPP;
-	}
-	if (wol->wolopts & ~WAKE_MAGIC)
-		return -EINVAL;
-	qdev->wol = wol->wolopts;
-
-	netif_info(qdev, drv, qdev->ndev, "Set wol option 0x%x\n", qdev->wol);
-	return 0;
-}
-
-static int ql_set_phys_id(struct net_device *ndev,
-			  enum ethtool_phys_id_state state)
-
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	switch (state) {
-	case ETHTOOL_ID_ACTIVE:
-		/* Save the current LED settings */
-		if (ql_mb_get_led_cfg(qdev))
-			return -EIO;
-
-		/* Start blinking */
-		ql_mb_set_led_cfg(qdev, QL_LED_BLINK);
-		return 0;
-
-	case ETHTOOL_ID_INACTIVE:
-		/* Restore LED settings */
-		if (ql_mb_set_led_cfg(qdev, qdev->led_config))
-			return -EIO;
-		return 0;
-
-	default:
-		return -EINVAL;
-	}
-}
-
-static int ql_start_loopback(struct ql_adapter *qdev)
-{
-	if (netif_carrier_ok(qdev->ndev)) {
-		set_bit(QL_LB_LINK_UP, &qdev->flags);
-		netif_carrier_off(qdev->ndev);
-	} else
-		clear_bit(QL_LB_LINK_UP, &qdev->flags);
-	qdev->link_config |= CFG_LOOPBACK_PCS;
-	return ql_mb_set_port_cfg(qdev);
-}
-
-static void ql_stop_loopback(struct ql_adapter *qdev)
-{
-	qdev->link_config &= ~CFG_LOOPBACK_PCS;
-	ql_mb_set_port_cfg(qdev);
-	if (test_bit(QL_LB_LINK_UP, &qdev->flags)) {
-		netif_carrier_on(qdev->ndev);
-		clear_bit(QL_LB_LINK_UP, &qdev->flags);
-	}
-}
-
-static void ql_create_lb_frame(struct sk_buff *skb,
-					unsigned int frame_size)
-{
-	memset(skb->data, 0xFF, frame_size);
-	frame_size &= ~1;
-	memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
-	memset(&skb->data[frame_size / 2 + 10], 0xBE, 1);
-	memset(&skb->data[frame_size / 2 + 12], 0xAF, 1);
-}
-
-void ql_check_lb_frame(struct ql_adapter *qdev,
-					struct sk_buff *skb)
-{
-	unsigned int frame_size = skb->len;
-
-	if ((*(skb->data + 3) == 0xFF) &&
-		(*(skb->data + frame_size / 2 + 10) == 0xBE) &&
-		(*(skb->data + frame_size / 2 + 12) == 0xAF)) {
-			atomic_dec(&qdev->lb_count);
-			return;
-	}
-}
-
-static int ql_run_loopback_test(struct ql_adapter *qdev)
-{
-	int i;
-	netdev_tx_t rc;
-	struct sk_buff *skb;
-	unsigned int size = SMALL_BUF_MAP_SIZE;
-
-	for (i = 0; i < 64; i++) {
-		skb = netdev_alloc_skb(qdev->ndev, size);
-		if (!skb)
-			return -ENOMEM;
-
-		skb->queue_mapping = 0;
-		skb_put(skb, size);
-		ql_create_lb_frame(skb, size);
-		rc = ql_lb_send(skb, qdev->ndev);
-		if (rc != NETDEV_TX_OK)
-			return -EPIPE;
-		atomic_inc(&qdev->lb_count);
-	}
-	/* Give queue time to settle before testing results. */
-	msleep(2);
-	ql_clean_lb_rx_ring(&qdev->rx_ring[0], 128);
-	return atomic_read(&qdev->lb_count) ? -EIO : 0;
-}
-
-static int ql_loopback_test(struct ql_adapter *qdev, u64 *data)
-{
-	*data = ql_start_loopback(qdev);
-	if (*data)
-		goto out;
-	*data = ql_run_loopback_test(qdev);
-out:
-	ql_stop_loopback(qdev);
-	return *data;
-}
-
-static void ql_self_test(struct net_device *ndev,
-				struct ethtool_test *eth_test, u64 *data)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	memset(data, 0, sizeof(u64) * QLGE_TEST_LEN);
-
-	if (netif_running(ndev)) {
-		set_bit(QL_SELFTEST, &qdev->flags);
-		if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-			/* Offline tests */
-			if (ql_loopback_test(qdev, &data[0]))
-				eth_test->flags |= ETH_TEST_FL_FAILED;
-
-		} else {
-			/* Online tests */
-			data[0] = 0;
-		}
-		clear_bit(QL_SELFTEST, &qdev->flags);
-		/* Give link time to come up after
-		 * port configuration changes.
-		 */
-		msleep_interruptible(4 * 1000);
-	} else {
-		netif_err(qdev, drv, qdev->ndev,
-			  "is down, Loopback test will fail.\n");
-		eth_test->flags |= ETH_TEST_FL_FAILED;
-	}
-}
-
-static int ql_get_regs_len(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	if (!test_bit(QL_FRC_COREDUMP, &qdev->flags))
-		return sizeof(struct ql_mpi_coredump);
-	else
-		return sizeof(struct ql_reg_dump);
-}
-
-static void ql_get_regs(struct net_device *ndev,
-			struct ethtool_regs *regs, void *p)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	ql_get_dump(qdev, p);
-	qdev->core_is_dumped = 0;
-	if (!test_bit(QL_FRC_COREDUMP, &qdev->flags))
-		regs->len = sizeof(struct ql_mpi_coredump);
-	else
-		regs->len = sizeof(struct ql_reg_dump);
-}
-
-static int ql_get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
-{
-	struct ql_adapter *qdev = netdev_priv(dev);
-
-	c->rx_coalesce_usecs = qdev->rx_coalesce_usecs;
-	c->tx_coalesce_usecs = qdev->tx_coalesce_usecs;
-
-	/* This chip coalesces as follows:
-	 * If a packet arrives, hold off interrupts until
-	 * cqicb->int_delay expires, but if no other packets arrive don't
-	 * wait longer than cqicb->pkt_int_delay. But ethtool doesn't use a
-	 * timer to coalesce on a frame basis.  So, we have to take ethtool's
-	 * max_coalesced_frames value and convert it to a delay in microseconds.
-	 * We do this by using a basic thoughput of 1,000,000 frames per
-	 * second @ (1024 bytes).  This means one frame per usec. So it's a
-	 * simple one to one ratio.
-	 */
-	c->rx_max_coalesced_frames = qdev->rx_max_coalesced_frames;
-	c->tx_max_coalesced_frames = qdev->tx_max_coalesced_frames;
-
-	return 0;
-}
-
-static int ql_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *c)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	/* Validate user parameters. */
-	if (c->rx_coalesce_usecs > qdev->rx_ring_size / 2)
-		return -EINVAL;
-       /* Don't wait more than 10 usec. */
-	if (c->rx_max_coalesced_frames > MAX_INTER_FRAME_WAIT)
-		return -EINVAL;
-	if (c->tx_coalesce_usecs > qdev->tx_ring_size / 2)
-		return -EINVAL;
-	if (c->tx_max_coalesced_frames > MAX_INTER_FRAME_WAIT)
-		return -EINVAL;
-
-	/* Verify a change took place before updating the hardware. */
-	if (qdev->rx_coalesce_usecs == c->rx_coalesce_usecs &&
-	    qdev->tx_coalesce_usecs == c->tx_coalesce_usecs &&
-	    qdev->rx_max_coalesced_frames == c->rx_max_coalesced_frames &&
-	    qdev->tx_max_coalesced_frames == c->tx_max_coalesced_frames)
-		return 0;
-
-	qdev->rx_coalesce_usecs = c->rx_coalesce_usecs;
-	qdev->tx_coalesce_usecs = c->tx_coalesce_usecs;
-	qdev->rx_max_coalesced_frames = c->rx_max_coalesced_frames;
-	qdev->tx_max_coalesced_frames = c->tx_max_coalesced_frames;
-
-	return ql_update_ring_coalescing(qdev);
-}
-
-static void ql_get_pauseparam(struct net_device *netdev,
-			struct ethtool_pauseparam *pause)
-{
-	struct ql_adapter *qdev = netdev_priv(netdev);
-
-	ql_mb_get_port_cfg(qdev);
-	if (qdev->link_config & CFG_PAUSE_STD) {
-		pause->rx_pause = 1;
-		pause->tx_pause = 1;
-	}
-}
-
-static int ql_set_pauseparam(struct net_device *netdev,
-			struct ethtool_pauseparam *pause)
-{
-	struct ql_adapter *qdev = netdev_priv(netdev);
-	int status = 0;
-
-	if ((pause->rx_pause) && (pause->tx_pause))
-		qdev->link_config |= CFG_PAUSE_STD;
-	else if (!pause->rx_pause && !pause->tx_pause)
-		qdev->link_config &= ~CFG_PAUSE_STD;
-	else
-		return -EINVAL;
-
-	status = ql_mb_set_port_cfg(qdev);
-	return status;
-}
-
-static u32 ql_get_msglevel(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	return qdev->msg_enable;
-}
-
-static void ql_set_msglevel(struct net_device *ndev, u32 value)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	qdev->msg_enable = value;
-}
-
-const struct ethtool_ops qlge_ethtool_ops = {
-	.get_drvinfo = ql_get_drvinfo,
-	.get_wol = ql_get_wol,
-	.set_wol = ql_set_wol,
-	.get_regs_len	= ql_get_regs_len,
-	.get_regs = ql_get_regs,
-	.get_msglevel = ql_get_msglevel,
-	.set_msglevel = ql_set_msglevel,
-	.get_link = ethtool_op_get_link,
-	.set_phys_id		 = ql_set_phys_id,
-	.self_test		 = ql_self_test,
-	.get_pauseparam		 = ql_get_pauseparam,
-	.set_pauseparam		 = ql_set_pauseparam,
-	.get_coalesce = ql_get_coalesce,
-	.set_coalesce = ql_set_coalesce,
-	.get_sset_count = ql_get_sset_count,
-	.get_strings = ql_get_strings,
-	.get_ethtool_stats = ql_get_ethtool_stats,
-	.get_link_ksettings = ql_get_link_ksettings,
-};
-

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
deleted file mode 100644
index 059ba94..0000000
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ /dev/null

@@ -1,5023 +0,0 @@
-/*
- * QLogic qlge NIC HBA Driver
- * Copyright (c)  2003-2008 QLogic Corporation
- * See LICENSE.qlge for copyright and licensing details.
- * Author:     Linux qlge network device driver by
- *                      Ron Mercer <ron.mercer@qlogic.com>
- */
-#include <linux/kernel.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/pagemap.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/dmapool.h>
-#include <linux/mempool.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/skbuff.h>
-#include <linux/delay.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/prefetch.h>
-#include <net/ip6_checksum.h>
-
-#include "qlge.h"
-
-char qlge_driver_name[] = DRV_NAME;
-const char qlge_driver_version[] = DRV_VERSION;
-
-MODULE_AUTHOR("Ron Mercer <ron.mercer@qlogic.com>");
-MODULE_DESCRIPTION(DRV_STRING " ");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg =
-    NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK |
-/* NETIF_MSG_TIMER |	*/
-    NETIF_MSG_IFDOWN |
-    NETIF_MSG_IFUP |
-    NETIF_MSG_RX_ERR |
-    NETIF_MSG_TX_ERR |
-/*  NETIF_MSG_TX_QUEUED | */
-/*  NETIF_MSG_INTR | NETIF_MSG_TX_DONE | NETIF_MSG_RX_STATUS | */
-/* NETIF_MSG_PKTDATA | */
-    NETIF_MSG_HW | NETIF_MSG_WOL | 0;
-
-static int debug = -1;	/* defaults above */
-module_param(debug, int, 0664);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-#define MSIX_IRQ 0
-#define MSI_IRQ 1
-#define LEG_IRQ 2
-static int qlge_irq_type = MSIX_IRQ;
-module_param(qlge_irq_type, int, 0664);
-MODULE_PARM_DESC(qlge_irq_type, "0 = MSI-X, 1 = MSI, 2 = Legacy.");
-
-static int qlge_mpi_coredump;
-module_param(qlge_mpi_coredump, int, 0);
-MODULE_PARM_DESC(qlge_mpi_coredump,
-		"Option to enable MPI firmware dump. "
-		"Default is OFF - Do Not allocate memory. ");
-
-static int qlge_force_coredump;
-module_param(qlge_force_coredump, int, 0);
-MODULE_PARM_DESC(qlge_force_coredump,
-		"Option to allow force of firmware core dump. "
-		"Default is OFF - Do not allow.");
-
-static const struct pci_device_id qlge_pci_tbl[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, QLGE_DEVICE_ID_8012)},
-	{PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, QLGE_DEVICE_ID_8000)},
-	/* required last entry */
-	{0,}
-};
-
-MODULE_DEVICE_TABLE(pci, qlge_pci_tbl);
-
-static int ql_wol(struct ql_adapter *);
-static void qlge_set_multicast_list(struct net_device *);
-static int ql_adapter_down(struct ql_adapter *);
-static int ql_adapter_up(struct ql_adapter *);
-
-/* This hardware semaphore causes exclusive access to
- * resources shared between the NIC driver, MPI firmware,
- * FCOE firmware and the FC driver.
- */
-static int ql_sem_trylock(struct ql_adapter *qdev, u32 sem_mask)
-{
-	u32 sem_bits = 0;
-
-	switch (sem_mask) {
-	case SEM_XGMAC0_MASK:
-		sem_bits = SEM_SET << SEM_XGMAC0_SHIFT;
-		break;
-	case SEM_XGMAC1_MASK:
-		sem_bits = SEM_SET << SEM_XGMAC1_SHIFT;
-		break;
-	case SEM_ICB_MASK:
-		sem_bits = SEM_SET << SEM_ICB_SHIFT;
-		break;
-	case SEM_MAC_ADDR_MASK:
-		sem_bits = SEM_SET << SEM_MAC_ADDR_SHIFT;
-		break;
-	case SEM_FLASH_MASK:
-		sem_bits = SEM_SET << SEM_FLASH_SHIFT;
-		break;
-	case SEM_PROBE_MASK:
-		sem_bits = SEM_SET << SEM_PROBE_SHIFT;
-		break;
-	case SEM_RT_IDX_MASK:
-		sem_bits = SEM_SET << SEM_RT_IDX_SHIFT;
-		break;
-	case SEM_PROC_REG_MASK:
-		sem_bits = SEM_SET << SEM_PROC_REG_SHIFT;
-		break;
-	default:
-		netif_alert(qdev, probe, qdev->ndev, "bad Semaphore mask!.\n");
-		return -EINVAL;
-	}
-
-	ql_write32(qdev, SEM, sem_bits | sem_mask);
-	return !(ql_read32(qdev, SEM) & sem_bits);
-}
-
-int ql_sem_spinlock(struct ql_adapter *qdev, u32 sem_mask)
-{
-	unsigned int wait_count = 30;
-	do {
-		if (!ql_sem_trylock(qdev, sem_mask))
-			return 0;
-		udelay(100);
-	} while (--wait_count);
-	return -ETIMEDOUT;
-}
-
-void ql_sem_unlock(struct ql_adapter *qdev, u32 sem_mask)
-{
-	ql_write32(qdev, SEM, sem_mask);
-	ql_read32(qdev, SEM);	/* flush */
-}
-
-/* This function waits for a specific bit to come ready
- * in a given register.  It is used mostly by the initialize
- * process, but is also used in kernel thread API such as
- * netdev->set_multi, netdev->set_mac_address, netdev->vlan_rx_add_vid.
- */
-int ql_wait_reg_rdy(struct ql_adapter *qdev, u32 reg, u32 bit, u32 err_bit)
-{
-	u32 temp;
-	int count = UDELAY_COUNT;
-
-	while (count) {
-		temp = ql_read32(qdev, reg);
-
-		/* check for errors */
-		if (temp & err_bit) {
-			netif_alert(qdev, probe, qdev->ndev,
-				    "register 0x%.08x access error, value = 0x%.08x!.\n",
-				    reg, temp);
-			return -EIO;
-		} else if (temp & bit)
-			return 0;
-		udelay(UDELAY_DELAY);
-		count--;
-	}
-	netif_alert(qdev, probe, qdev->ndev,
-		    "Timed out waiting for reg %x to come ready.\n", reg);
-	return -ETIMEDOUT;
-}
-
-/* The CFG register is used to download TX and RX control blocks
- * to the chip. This function waits for an operation to complete.
- */
-static int ql_wait_cfg(struct ql_adapter *qdev, u32 bit)
-{
-	int count = UDELAY_COUNT;
-	u32 temp;
-
-	while (count) {
-		temp = ql_read32(qdev, CFG);
-		if (temp & CFG_LE)
-			return -EIO;
-		if (!(temp & bit))
-			return 0;
-		udelay(UDELAY_DELAY);
-		count--;
-	}
-	return -ETIMEDOUT;
-}
-
-
-/* Used to issue init control blocks to hw. Maps control block,
- * sets address, triggers download, waits for completion.
- */
-int ql_write_cfg(struct ql_adapter *qdev, void *ptr, int size, u32 bit,
-		 u16 q_id)
-{
-	u64 map;
-	int status = 0;
-	int direction;
-	u32 mask;
-	u32 value;
-
-	direction =
-	    (bit & (CFG_LRQ | CFG_LR | CFG_LCQ)) ? PCI_DMA_TODEVICE :
-	    PCI_DMA_FROMDEVICE;
-
-	map = pci_map_single(qdev->pdev, ptr, size, direction);
-	if (pci_dma_mapping_error(qdev->pdev, map)) {
-		netif_err(qdev, ifup, qdev->ndev, "Couldn't map DMA area.\n");
-		return -ENOMEM;
-	}
-
-	status = ql_sem_spinlock(qdev, SEM_ICB_MASK);
-	if (status)
-		return status;
-
-	status = ql_wait_cfg(qdev, bit);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Timed out waiting for CFG to come ready.\n");
-		goto exit;
-	}
-
-	ql_write32(qdev, ICB_L, (u32) map);
-	ql_write32(qdev, ICB_H, (u32) (map >> 32));
-
-	mask = CFG_Q_MASK | (bit << 16);
-	value = bit | (q_id << CFG_Q_SHIFT);
-	ql_write32(qdev, CFG, (mask | value));
-
-	/*
-	 * Wait for the bit to clear after signaling hw.
-	 */
-	status = ql_wait_cfg(qdev, bit);
-exit:
-	ql_sem_unlock(qdev, SEM_ICB_MASK);	/* does flush too */
-	pci_unmap_single(qdev->pdev, map, size, direction);
-	return status;
-}
-
-/* Get a specific MAC address from the CAM.  Used for debug and reg dump. */
-int ql_get_mac_addr_reg(struct ql_adapter *qdev, u32 type, u16 index,
-			u32 *value)
-{
-	u32 offset = 0;
-	int status;
-
-	switch (type) {
-	case MAC_ADDR_TYPE_MULTI_MAC:
-	case MAC_ADDR_TYPE_CAM_MAC:
-		{
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) | /* index */
-				   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MR, 0);
-			if (status)
-				goto exit;
-			*value++ = ql_read32(qdev, MAC_ADDR_DATA);
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) | /* index */
-				   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MR, 0);
-			if (status)
-				goto exit;
-			*value++ = ql_read32(qdev, MAC_ADDR_DATA);
-			if (type == MAC_ADDR_TYPE_CAM_MAC) {
-				status =
-				    ql_wait_reg_rdy(qdev,
-					MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-				if (status)
-					goto exit;
-				ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
-					   (index << MAC_ADDR_IDX_SHIFT) | /* index */
-					   MAC_ADDR_ADR | MAC_ADDR_RS | type); /* type */
-				status =
-				    ql_wait_reg_rdy(qdev, MAC_ADDR_IDX,
-						    MAC_ADDR_MR, 0);
-				if (status)
-					goto exit;
-				*value++ = ql_read32(qdev, MAC_ADDR_DATA);
-			}
-			break;
-		}
-	case MAC_ADDR_TYPE_VLAN:
-	case MAC_ADDR_TYPE_MULTI_FLTR:
-	default:
-		netif_crit(qdev, ifup, qdev->ndev,
-			   "Address type %d not yet supported.\n", type);
-		status = -EPERM;
-	}
-exit:
-	return status;
-}
-
-/* Set up a MAC, multicast or VLAN address for the
- * inbound frame matching.
- */
-static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type,
-			       u16 index)
-{
-	u32 offset = 0;
-	int status = 0;
-
-	switch (type) {
-	case MAC_ADDR_TYPE_MULTI_MAC:
-		{
-			u32 upper = (addr[0] << 8) | addr[1];
-			u32 lower = (addr[2] << 24) | (addr[3] << 16) |
-					(addr[4] << 8) | (addr[5]);
-
-			status =
-				ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) |
-				(index << MAC_ADDR_IDX_SHIFT) |
-				type | MAC_ADDR_E);
-			ql_write32(qdev, MAC_ADDR_DATA, lower);
-			status =
-				ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) |
-				(index << MAC_ADDR_IDX_SHIFT) |
-				type | MAC_ADDR_E);
-
-			ql_write32(qdev, MAC_ADDR_DATA, upper);
-			status =
-				ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			break;
-		}
-	case MAC_ADDR_TYPE_CAM_MAC:
-		{
-			u32 cam_output;
-			u32 upper = (addr[0] << 8) | addr[1];
-			u32 lower =
-			    (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) |
-			    (addr[5]);
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) | /* index */
-				   type);	/* type */
-			ql_write32(qdev, MAC_ADDR_DATA, lower);
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset++) | /* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) | /* index */
-				   type);	/* type */
-			ql_write32(qdev, MAC_ADDR_DATA, upper);
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, (offset) |	/* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) |	/* index */
-				   type);	/* type */
-			/* This field should also include the queue id
-			   and possibly the function id.  Right now we hardcode
-			   the route field to NIC core.
-			 */
-			cam_output = (CAM_OUT_ROUTE_NIC |
-				      (qdev->
-				       func << CAM_OUT_FUNC_SHIFT) |
-					(0 << CAM_OUT_CQ_ID_SHIFT));
-			if (qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
-				cam_output |= CAM_OUT_RV;
-			/* route to NIC core */
-			ql_write32(qdev, MAC_ADDR_DATA, cam_output);
-			break;
-		}
-	case MAC_ADDR_TYPE_VLAN:
-		{
-			u32 enable_bit = *((u32 *) &addr[0]);
-			/* For VLAN, the addr actually holds a bit that
-			 * either enables or disables the vlan id we are
-			 * addressing. It's either MAC_ADDR_E on or off.
-			 * That's bit-27 we're talking about.
-			 */
-			status =
-			    ql_wait_reg_rdy(qdev,
-				MAC_ADDR_IDX, MAC_ADDR_MW, 0);
-			if (status)
-				goto exit;
-			ql_write32(qdev, MAC_ADDR_IDX, offset |	/* offset */
-				   (index << MAC_ADDR_IDX_SHIFT) |	/* index */
-				   type |	/* type */
-				   enable_bit);	/* enable/disable */
-			break;
-		}
-	case MAC_ADDR_TYPE_MULTI_FLTR:
-	default:
-		netif_crit(qdev, ifup, qdev->ndev,
-			   "Address type %d not yet supported.\n", type);
-		status = -EPERM;
-	}
-exit:
-	return status;
-}
-
-/* Set or clear MAC address in hardware. We sometimes
- * have to clear it to prevent wrong frame routing
- * especially in a bonding environment.
- */
-static int ql_set_mac_addr(struct ql_adapter *qdev, int set)
-{
-	int status;
-	char zero_mac_addr[ETH_ALEN];
-	char *addr;
-
-	if (set) {
-		addr = &qdev->current_mac_addr[0];
-		netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-			     "Set Mac addr %pM\n", addr);
-	} else {
-		eth_zero_addr(zero_mac_addr);
-		addr = &zero_mac_addr[0];
-		netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-			     "Clearing MAC address\n");
-	}
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return status;
-	status = ql_set_mac_addr_reg(qdev, (u8 *) addr,
-			MAC_ADDR_TYPE_CAM_MAC, qdev->func * MAX_CQ);
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to init mac address.\n");
-	return status;
-}
-
-void ql_link_on(struct ql_adapter *qdev)
-{
-	netif_err(qdev, link, qdev->ndev, "Link is up.\n");
-	netif_carrier_on(qdev->ndev);
-	ql_set_mac_addr(qdev, 1);
-}
-
-void ql_link_off(struct ql_adapter *qdev)
-{
-	netif_err(qdev, link, qdev->ndev, "Link is down.\n");
-	netif_carrier_off(qdev->ndev);
-	ql_set_mac_addr(qdev, 0);
-}
-
-/* Get a specific frame routing value from the CAM.
- * Used for debug and reg dump.
- */
-int ql_get_routing_reg(struct ql_adapter *qdev, u32 index, u32 *value)
-{
-	int status = 0;
-
-	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MW, 0);
-	if (status)
-		goto exit;
-
-	ql_write32(qdev, RT_IDX,
-		   RT_IDX_TYPE_NICQ | RT_IDX_RS | (index << RT_IDX_IDX_SHIFT));
-	status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MR, 0);
-	if (status)
-		goto exit;
-	*value = ql_read32(qdev, RT_DATA);
-exit:
-	return status;
-}
-
-/* The NIC function for this chip has 16 routing indexes.  Each one can be used
- * to route different frame types to various inbound queues.  We send broadcast/
- * multicast/error frames to the default queue for slow handling,
- * and CAM hit/RSS frames to the fast handling queues.
- */
-static int ql_set_routing_reg(struct ql_adapter *qdev, u32 index, u32 mask,
-			      int enable)
-{
-	int status = -EINVAL; /* Return error if no mask match. */
-	u32 value = 0;
-
-	switch (mask) {
-	case RT_IDX_CAM_HIT:
-		{
-			value = RT_IDX_DST_CAM_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_CAM_HIT_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_VALID:	/* Promiscuous Mode frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_PROMISCUOUS_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_ERR:	/* Pass up MAC,IP,TCP/UDP error frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_ALL_ERR_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_IP_CSUM_ERR: /* Pass up IP CSUM error frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q | /* dest */
-				RT_IDX_TYPE_NICQ | /* type */
-				(RT_IDX_IP_CSUM_ERR_SLOT <<
-				RT_IDX_IDX_SHIFT); /* index */
-			break;
-		}
-	case RT_IDX_TU_CSUM_ERR: /* Pass up TCP/UDP CSUM error frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q | /* dest */
-				RT_IDX_TYPE_NICQ | /* type */
-				(RT_IDX_TCP_UDP_CSUM_ERR_SLOT <<
-				RT_IDX_IDX_SHIFT); /* index */
-			break;
-		}
-	case RT_IDX_BCAST:	/* Pass up Broadcast frames to default Q. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_BCAST_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_MCAST:	/* Pass up All Multicast frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_ALLMULTI_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_MCAST_MATCH:	/* Pass up matched Multicast frames. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_MCAST_MATCH_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case RT_IDX_RSS_MATCH:	/* Pass up matched RSS frames. */
-		{
-			value = RT_IDX_DST_RSS |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (RT_IDX_RSS_MATCH_SLOT << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	case 0:		/* Clear the E-bit on an entry. */
-		{
-			value = RT_IDX_DST_DFLT_Q |	/* dest */
-			    RT_IDX_TYPE_NICQ |	/* type */
-			    (index << RT_IDX_IDX_SHIFT);/* index */
-			break;
-		}
-	default:
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Mask type %d not yet supported.\n", mask);
-		status = -EPERM;
-		goto exit;
-	}
-
-	if (value) {
-		status = ql_wait_reg_rdy(qdev, RT_IDX, RT_IDX_MW, 0);
-		if (status)
-			goto exit;
-		value |= (enable ? RT_IDX_E : 0);
-		ql_write32(qdev, RT_IDX, value);
-		ql_write32(qdev, RT_DATA, enable ? mask : 0);
-	}
-exit:
-	return status;
-}
-
-static void ql_enable_interrupts(struct ql_adapter *qdev)
-{
-	ql_write32(qdev, INTR_EN, (INTR_EN_EI << 16) | INTR_EN_EI);
-}
-
-static void ql_disable_interrupts(struct ql_adapter *qdev)
-{
-	ql_write32(qdev, INTR_EN, (INTR_EN_EI << 16));
-}
-
-/* If we're running with multiple MSI-X vectors then we enable on the fly.
- * Otherwise, we may have multiple outstanding workers and don't want to
- * enable until the last one finishes. In this case, the irq_cnt gets
- * incremented every time we queue a worker and decremented every time
- * a worker finishes.  Once it hits zero we enable the interrupt.
- */
-u32 ql_enable_completion_interrupt(struct ql_adapter *qdev, u32 intr)
-{
-	u32 var = 0;
-	unsigned long hw_flags = 0;
-	struct intr_context *ctx = qdev->intr_context + intr;
-
-	if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags) && intr)) {
-		/* Always enable if we're MSIX multi interrupts and
-		 * it's not the default (zeroeth) interrupt.
-		 */
-		ql_write32(qdev, INTR_EN,
-			   ctx->intr_en_mask);
-		var = ql_read32(qdev, STS);
-		return var;
-	}
-
-	spin_lock_irqsave(&qdev->hw_lock, hw_flags);
-	if (atomic_dec_and_test(&ctx->irq_cnt)) {
-		ql_write32(qdev, INTR_EN,
-			   ctx->intr_en_mask);
-		var = ql_read32(qdev, STS);
-	}
-	spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
-	return var;
-}
-
-static u32 ql_disable_completion_interrupt(struct ql_adapter *qdev, u32 intr)
-{
-	u32 var = 0;
-	struct intr_context *ctx;
-
-	/* HW disables for us if we're MSIX multi interrupts and
-	 * it's not the default (zeroeth) interrupt.
-	 */
-	if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags) && intr))
-		return 0;
-
-	ctx = qdev->intr_context + intr;
-	spin_lock(&qdev->hw_lock);
-	if (!atomic_read(&ctx->irq_cnt)) {
-		ql_write32(qdev, INTR_EN,
-		ctx->intr_dis_mask);
-		var = ql_read32(qdev, STS);
-	}
-	atomic_inc(&ctx->irq_cnt);
-	spin_unlock(&qdev->hw_lock);
-	return var;
-}
-
-static void ql_enable_all_completion_interrupts(struct ql_adapter *qdev)
-{
-	int i;
-	for (i = 0; i < qdev->intr_count; i++) {
-		/* The enable call does a atomic_dec_and_test
-		 * and enables only if the result is zero.
-		 * So we precharge it here.
-		 */
-		if (unlikely(!test_bit(QL_MSIX_ENABLED, &qdev->flags) ||
-			i == 0))
-			atomic_set(&qdev->intr_context[i].irq_cnt, 1);
-		ql_enable_completion_interrupt(qdev, i);
-	}
-
-}
-
-static int ql_validate_flash(struct ql_adapter *qdev, u32 size, const char *str)
-{
-	int status, i;
-	u16 csum = 0;
-	__le16 *flash = (__le16 *)&qdev->flash;
-
-	status = strncmp((char *)&qdev->flash, str, 4);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Invalid flash signature.\n");
-		return	status;
-	}
-
-	for (i = 0; i < size; i++)
-		csum += le16_to_cpu(*flash++);
-
-	if (csum)
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Invalid flash checksum, csum = 0x%.04x.\n", csum);
-
-	return csum;
-}
-
-static int ql_read_flash_word(struct ql_adapter *qdev, int offset, __le32 *data)
-{
-	int status = 0;
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev,
-			FLASH_ADDR, FLASH_ADDR_RDY, FLASH_ADDR_ERR);
-	if (status)
-		goto exit;
-	/* set up for reg read */
-	ql_write32(qdev, FLASH_ADDR, FLASH_ADDR_R | offset);
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev,
-			FLASH_ADDR, FLASH_ADDR_RDY, FLASH_ADDR_ERR);
-	if (status)
-		goto exit;
-	 /* This data is stored on flash as an array of
-	 * __le32.  Since ql_read32() returns cpu endian
-	 * we need to swap it back.
-	 */
-	*data = cpu_to_le32(ql_read32(qdev, FLASH_DATA));
-exit:
-	return status;
-}
-
-static int ql_get_8000_flash_params(struct ql_adapter *qdev)
-{
-	u32 i, size;
-	int status;
-	__le32 *p = (__le32 *)&qdev->flash;
-	u32 offset;
-	u8 mac_addr[6];
-
-	/* Get flash offset for function and adjust
-	 * for dword access.
-	 */
-	if (!qdev->port)
-		offset = FUNC0_FLASH_OFFSET / sizeof(u32);
-	else
-		offset = FUNC1_FLASH_OFFSET / sizeof(u32);
-
-	if (ql_sem_spinlock(qdev, SEM_FLASH_MASK))
-		return -ETIMEDOUT;
-
-	size = sizeof(struct flash_params_8000) / sizeof(u32);
-	for (i = 0; i < size; i++, p++) {
-		status = ql_read_flash_word(qdev, i+offset, p);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Error reading flash.\n");
-			goto exit;
-		}
-	}
-
-	status = ql_validate_flash(qdev,
-			sizeof(struct flash_params_8000) / sizeof(u16),
-			"8000");
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Invalid flash.\n");
-		status = -EINVAL;
-		goto exit;
-	}
-
-	/* Extract either manufacturer or BOFM modified
-	 * MAC address.
-	 */
-	if (qdev->flash.flash_params_8000.data_type1 == 2)
-		memcpy(mac_addr,
-			qdev->flash.flash_params_8000.mac_addr1,
-			qdev->ndev->addr_len);
-	else
-		memcpy(mac_addr,
-			qdev->flash.flash_params_8000.mac_addr,
-			qdev->ndev->addr_len);
-
-	if (!is_valid_ether_addr(mac_addr)) {
-		netif_err(qdev, ifup, qdev->ndev, "Invalid MAC address.\n");
-		status = -EINVAL;
-		goto exit;
-	}
-
-	memcpy(qdev->ndev->dev_addr,
-		mac_addr,
-		qdev->ndev->addr_len);
-
-exit:
-	ql_sem_unlock(qdev, SEM_FLASH_MASK);
-	return status;
-}
-
-static int ql_get_8012_flash_params(struct ql_adapter *qdev)
-{
-	int i;
-	int status;
-	__le32 *p = (__le32 *)&qdev->flash;
-	u32 offset = 0;
-	u32 size = sizeof(struct flash_params_8012) / sizeof(u32);
-
-	/* Second function's parameters follow the first
-	 * function's.
-	 */
-	if (qdev->port)
-		offset = size;
-
-	if (ql_sem_spinlock(qdev, SEM_FLASH_MASK))
-		return -ETIMEDOUT;
-
-	for (i = 0; i < size; i++, p++) {
-		status = ql_read_flash_word(qdev, i+offset, p);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Error reading flash.\n");
-			goto exit;
-		}
-
-	}
-
-	status = ql_validate_flash(qdev,
-			sizeof(struct flash_params_8012) / sizeof(u16),
-			"8012");
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Invalid flash.\n");
-		status = -EINVAL;
-		goto exit;
-	}
-
-	if (!is_valid_ether_addr(qdev->flash.flash_params_8012.mac_addr)) {
-		status = -EINVAL;
-		goto exit;
-	}
-
-	memcpy(qdev->ndev->dev_addr,
-		qdev->flash.flash_params_8012.mac_addr,
-		qdev->ndev->addr_len);
-
-exit:
-	ql_sem_unlock(qdev, SEM_FLASH_MASK);
-	return status;
-}
-
-/* xgmac register are located behind the xgmac_addr and xgmac_data
- * register pair.  Each read/write requires us to wait for the ready
- * bit before reading/writing the data.
- */
-static int ql_write_xgmac_reg(struct ql_adapter *qdev, u32 reg, u32 data)
-{
-	int status;
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev,
-			XGMAC_ADDR, XGMAC_ADDR_RDY, XGMAC_ADDR_XME);
-	if (status)
-		return status;
-	/* write the data to the data reg */
-	ql_write32(qdev, XGMAC_DATA, data);
-	/* trigger the write */
-	ql_write32(qdev, XGMAC_ADDR, reg);
-	return status;
-}
-
-/* xgmac register are located behind the xgmac_addr and xgmac_data
- * register pair.  Each read/write requires us to wait for the ready
- * bit before reading/writing the data.
- */
-int ql_read_xgmac_reg(struct ql_adapter *qdev, u32 reg, u32 *data)
-{
-	int status = 0;
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev,
-			XGMAC_ADDR, XGMAC_ADDR_RDY, XGMAC_ADDR_XME);
-	if (status)
-		goto exit;
-	/* set up for reg read */
-	ql_write32(qdev, XGMAC_ADDR, reg | XGMAC_ADDR_R);
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev,
-			XGMAC_ADDR, XGMAC_ADDR_RDY, XGMAC_ADDR_XME);
-	if (status)
-		goto exit;
-	/* get the data */
-	*data = ql_read32(qdev, XGMAC_DATA);
-exit:
-	return status;
-}
-
-/* This is used for reading the 64-bit statistics regs. */
-int ql_read_xgmac_reg64(struct ql_adapter *qdev, u32 reg, u64 *data)
-{
-	int status = 0;
-	u32 hi = 0;
-	u32 lo = 0;
-
-	status = ql_read_xgmac_reg(qdev, reg, &lo);
-	if (status)
-		goto exit;
-
-	status = ql_read_xgmac_reg(qdev, reg + 4, &hi);
-	if (status)
-		goto exit;
-
-	*data = (u64) lo | ((u64) hi << 32);
-
-exit:
-	return status;
-}
-
-static int ql_8000_port_initialize(struct ql_adapter *qdev)
-{
-	int status;
-	/*
-	 * Get MPI firmware version for driver banner
-	 * and ethool info.
-	 */
-	status = ql_mb_about_fw(qdev);
-	if (status)
-		goto exit;
-	status = ql_mb_get_fw_state(qdev);
-	if (status)
-		goto exit;
-	/* Wake up a worker to get/set the TX/RX frame sizes. */
-	queue_delayed_work(qdev->workqueue, &qdev->mpi_port_cfg_work, 0);
-exit:
-	return status;
-}
-
-/* Take the MAC Core out of reset.
- * Enable statistics counting.
- * Take the transmitter/receiver out of reset.
- * This functionality may be done in the MPI firmware at a
- * later date.
- */
-static int ql_8012_port_initialize(struct ql_adapter *qdev)
-{
-	int status = 0;
-	u32 data;
-
-	if (ql_sem_trylock(qdev, qdev->xg_sem_mask)) {
-		/* Another function has the semaphore, so
-		 * wait for the port init bit to come ready.
-		 */
-		netif_info(qdev, link, qdev->ndev,
-			   "Another function has the semaphore, so wait for the port init bit to come ready.\n");
-		status = ql_wait_reg_rdy(qdev, STS, qdev->port_init, 0);
-		if (status) {
-			netif_crit(qdev, link, qdev->ndev,
-				   "Port initialize timed out.\n");
-		}
-		return status;
-	}
-
-	netif_info(qdev, link, qdev->ndev, "Got xgmac semaphore!.\n");
-	/* Set the core reset. */
-	status = ql_read_xgmac_reg(qdev, GLOBAL_CFG, &data);
-	if (status)
-		goto end;
-	data |= GLOBAL_CFG_RESET;
-	status = ql_write_xgmac_reg(qdev, GLOBAL_CFG, data);
-	if (status)
-		goto end;
-
-	/* Clear the core reset and turn on jumbo for receiver. */
-	data &= ~GLOBAL_CFG_RESET;	/* Clear core reset. */
-	data |= GLOBAL_CFG_JUMBO;	/* Turn on jumbo. */
-	data |= GLOBAL_CFG_TX_STAT_EN;
-	data |= GLOBAL_CFG_RX_STAT_EN;
-	status = ql_write_xgmac_reg(qdev, GLOBAL_CFG, data);
-	if (status)
-		goto end;
-
-	/* Enable transmitter, and clear it's reset. */
-	status = ql_read_xgmac_reg(qdev, TX_CFG, &data);
-	if (status)
-		goto end;
-	data &= ~TX_CFG_RESET;	/* Clear the TX MAC reset. */
-	data |= TX_CFG_EN;	/* Enable the transmitter. */
-	status = ql_write_xgmac_reg(qdev, TX_CFG, data);
-	if (status)
-		goto end;
-
-	/* Enable receiver and clear it's reset. */
-	status = ql_read_xgmac_reg(qdev, RX_CFG, &data);
-	if (status)
-		goto end;
-	data &= ~RX_CFG_RESET;	/* Clear the RX MAC reset. */
-	data |= RX_CFG_EN;	/* Enable the receiver. */
-	status = ql_write_xgmac_reg(qdev, RX_CFG, data);
-	if (status)
-		goto end;
-
-	/* Turn on jumbo. */
-	status =
-	    ql_write_xgmac_reg(qdev, MAC_TX_PARAMS, MAC_TX_PARAMS_JUMBO | (0x2580 << 16));
-	if (status)
-		goto end;
-	status =
-	    ql_write_xgmac_reg(qdev, MAC_RX_PARAMS, 0x2580);
-	if (status)
-		goto end;
-
-	/* Signal to the world that the port is enabled.        */
-	ql_write32(qdev, STS, ((qdev->port_init << 16) | qdev->port_init));
-end:
-	ql_sem_unlock(qdev, qdev->xg_sem_mask);
-	return status;
-}
-
-static inline unsigned int ql_lbq_block_size(struct ql_adapter *qdev)
-{
-	return PAGE_SIZE << qdev->lbq_buf_order;
-}
-
-/* Get the next large buffer. */
-static struct bq_desc *ql_get_curr_lbuf(struct rx_ring *rx_ring)
-{
-	struct bq_desc *lbq_desc = &rx_ring->lbq[rx_ring->lbq_curr_idx];
-	rx_ring->lbq_curr_idx++;
-	if (rx_ring->lbq_curr_idx == rx_ring->lbq_len)
-		rx_ring->lbq_curr_idx = 0;
-	rx_ring->lbq_free_cnt++;
-	return lbq_desc;
-}
-
-static struct bq_desc *ql_get_curr_lchunk(struct ql_adapter *qdev,
-		struct rx_ring *rx_ring)
-{
-	struct bq_desc *lbq_desc = ql_get_curr_lbuf(rx_ring);
-
-	pci_dma_sync_single_for_cpu(qdev->pdev,
-					dma_unmap_addr(lbq_desc, mapaddr),
-				    rx_ring->lbq_buf_size,
-					PCI_DMA_FROMDEVICE);
-
-	/* If it's the last chunk of our master page then
-	 * we unmap it.
-	 */
-	if ((lbq_desc->p.pg_chunk.offset + rx_ring->lbq_buf_size)
-					== ql_lbq_block_size(qdev))
-		pci_unmap_page(qdev->pdev,
-				lbq_desc->p.pg_chunk.map,
-				ql_lbq_block_size(qdev),
-				PCI_DMA_FROMDEVICE);
-	return lbq_desc;
-}
-
-/* Get the next small buffer. */
-static struct bq_desc *ql_get_curr_sbuf(struct rx_ring *rx_ring)
-{
-	struct bq_desc *sbq_desc = &rx_ring->sbq[rx_ring->sbq_curr_idx];
-	rx_ring->sbq_curr_idx++;
-	if (rx_ring->sbq_curr_idx == rx_ring->sbq_len)
-		rx_ring->sbq_curr_idx = 0;
-	rx_ring->sbq_free_cnt++;
-	return sbq_desc;
-}
-
-/* Update an rx ring index. */
-static void ql_update_cq(struct rx_ring *rx_ring)
-{
-	rx_ring->cnsmr_idx++;
-	rx_ring->curr_entry++;
-	if (unlikely(rx_ring->cnsmr_idx == rx_ring->cq_len)) {
-		rx_ring->cnsmr_idx = 0;
-		rx_ring->curr_entry = rx_ring->cq_base;
-	}
-}
-
-static void ql_write_cq_idx(struct rx_ring *rx_ring)
-{
-	ql_write_db_reg(rx_ring->cnsmr_idx, rx_ring->cnsmr_idx_db_reg);
-}
-
-static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring,
-						struct bq_desc *lbq_desc)
-{
-	if (!rx_ring->pg_chunk.page) {
-		u64 map;
-		rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
-						qdev->lbq_buf_order);
-		if (unlikely(!rx_ring->pg_chunk.page)) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "page allocation failed.\n");
-			return -ENOMEM;
-		}
-		rx_ring->pg_chunk.offset = 0;
-		map = pci_map_page(qdev->pdev, rx_ring->pg_chunk.page,
-					0, ql_lbq_block_size(qdev),
-					PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(qdev->pdev, map)) {
-			__free_pages(rx_ring->pg_chunk.page,
-					qdev->lbq_buf_order);
-			rx_ring->pg_chunk.page = NULL;
-			netif_err(qdev, drv, qdev->ndev,
-				  "PCI mapping failed.\n");
-			return -ENOMEM;
-		}
-		rx_ring->pg_chunk.map = map;
-		rx_ring->pg_chunk.va = page_address(rx_ring->pg_chunk.page);
-	}
-
-	/* Copy the current master pg_chunk info
-	 * to the current descriptor.
-	 */
-	lbq_desc->p.pg_chunk = rx_ring->pg_chunk;
-
-	/* Adjust the master page chunk for next
-	 * buffer get.
-	 */
-	rx_ring->pg_chunk.offset += rx_ring->lbq_buf_size;
-	if (rx_ring->pg_chunk.offset == ql_lbq_block_size(qdev)) {
-		rx_ring->pg_chunk.page = NULL;
-		lbq_desc->p.pg_chunk.last_flag = 1;
-	} else {
-		rx_ring->pg_chunk.va += rx_ring->lbq_buf_size;
-		get_page(rx_ring->pg_chunk.page);
-		lbq_desc->p.pg_chunk.last_flag = 0;
-	}
-	return 0;
-}
-/* Process (refill) a large buffer queue. */
-static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
-{
-	u32 clean_idx = rx_ring->lbq_clean_idx;
-	u32 start_idx = clean_idx;
-	struct bq_desc *lbq_desc;
-	u64 map;
-	int i;
-
-	while (rx_ring->lbq_free_cnt > 32) {
-		for (i = (rx_ring->lbq_clean_idx % 16); i < 16; i++) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "lbq: try cleaning clean_idx = %d.\n",
-				     clean_idx);
-			lbq_desc = &rx_ring->lbq[clean_idx];
-			if (ql_get_next_chunk(qdev, rx_ring, lbq_desc)) {
-				rx_ring->lbq_clean_idx = clean_idx;
-				netif_err(qdev, ifup, qdev->ndev,
-						"Could not get a page chunk, i=%d, clean_idx =%d .\n",
-						i, clean_idx);
-				return;
-			}
-
-			map = lbq_desc->p.pg_chunk.map +
-				lbq_desc->p.pg_chunk.offset;
-				dma_unmap_addr_set(lbq_desc, mapaddr, map);
-			dma_unmap_len_set(lbq_desc, maplen,
-					rx_ring->lbq_buf_size);
-				*lbq_desc->addr = cpu_to_le64(map);
-
-			pci_dma_sync_single_for_device(qdev->pdev, map,
-						rx_ring->lbq_buf_size,
-						PCI_DMA_FROMDEVICE);
-			clean_idx++;
-			if (clean_idx == rx_ring->lbq_len)
-				clean_idx = 0;
-		}
-
-		rx_ring->lbq_clean_idx = clean_idx;
-		rx_ring->lbq_prod_idx += 16;
-		if (rx_ring->lbq_prod_idx == rx_ring->lbq_len)
-			rx_ring->lbq_prod_idx = 0;
-		rx_ring->lbq_free_cnt -= 16;
-	}
-
-	if (start_idx != clean_idx) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "lbq: updating prod idx = %d.\n",
-			     rx_ring->lbq_prod_idx);
-		ql_write_db_reg(rx_ring->lbq_prod_idx,
-				rx_ring->lbq_prod_idx_db_reg);
-	}
-}
-
-/* Process (refill) a small buffer queue. */
-static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
-{
-	u32 clean_idx = rx_ring->sbq_clean_idx;
-	u32 start_idx = clean_idx;
-	struct bq_desc *sbq_desc;
-	u64 map;
-	int i;
-
-	while (rx_ring->sbq_free_cnt > 16) {
-		for (i = (rx_ring->sbq_clean_idx % 16); i < 16; i++) {
-			sbq_desc = &rx_ring->sbq[clean_idx];
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "sbq: try cleaning clean_idx = %d.\n",
-				     clean_idx);
-			if (sbq_desc->p.skb == NULL) {
-				netif_printk(qdev, rx_status, KERN_DEBUG,
-					     qdev->ndev,
-					     "sbq: getting new skb for index %d.\n",
-					     sbq_desc->index);
-				sbq_desc->p.skb =
-				    netdev_alloc_skb(qdev->ndev,
-						     SMALL_BUFFER_SIZE);
-				if (sbq_desc->p.skb == NULL) {
-					rx_ring->sbq_clean_idx = clean_idx;
-					return;
-				}
-				skb_reserve(sbq_desc->p.skb, QLGE_SB_PAD);
-				map = pci_map_single(qdev->pdev,
-						     sbq_desc->p.skb->data,
-						     rx_ring->sbq_buf_size,
-						     PCI_DMA_FROMDEVICE);
-				if (pci_dma_mapping_error(qdev->pdev, map)) {
-					netif_err(qdev, ifup, qdev->ndev,
-						  "PCI mapping failed.\n");
-					rx_ring->sbq_clean_idx = clean_idx;
-					dev_kfree_skb_any(sbq_desc->p.skb);
-					sbq_desc->p.skb = NULL;
-					return;
-				}
-				dma_unmap_addr_set(sbq_desc, mapaddr, map);
-				dma_unmap_len_set(sbq_desc, maplen,
-						  rx_ring->sbq_buf_size);
-				*sbq_desc->addr = cpu_to_le64(map);
-			}
-
-			clean_idx++;
-			if (clean_idx == rx_ring->sbq_len)
-				clean_idx = 0;
-		}
-		rx_ring->sbq_clean_idx = clean_idx;
-		rx_ring->sbq_prod_idx += 16;
-		if (rx_ring->sbq_prod_idx == rx_ring->sbq_len)
-			rx_ring->sbq_prod_idx = 0;
-		rx_ring->sbq_free_cnt -= 16;
-	}
-
-	if (start_idx != clean_idx) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "sbq: updating prod idx = %d.\n",
-			     rx_ring->sbq_prod_idx);
-		ql_write_db_reg(rx_ring->sbq_prod_idx,
-				rx_ring->sbq_prod_idx_db_reg);
-	}
-}
-
-static void ql_update_buffer_queues(struct ql_adapter *qdev,
-				    struct rx_ring *rx_ring)
-{
-	ql_update_sbq(qdev, rx_ring);
-	ql_update_lbq(qdev, rx_ring);
-}
-
-/* Unmaps tx buffers.  Can be called from send() if a pci mapping
- * fails at some stage, or from the interrupt when a tx completes.
- */
-static void ql_unmap_send(struct ql_adapter *qdev,
-			  struct tx_ring_desc *tx_ring_desc, int mapped)
-{
-	int i;
-	for (i = 0; i < mapped; i++) {
-		if (i == 0 || (i == 7 && mapped > 7)) {
-			/*
-			 * Unmap the skb->data area, or the
-			 * external sglist (AKA the Outbound
-			 * Address List (OAL)).
-			 * If its the zeroeth element, then it's
-			 * the skb->data area.  If it's the 7th
-			 * element and there is more than 6 frags,
-			 * then its an OAL.
-			 */
-			if (i == 7) {
-				netif_printk(qdev, tx_done, KERN_DEBUG,
-					     qdev->ndev,
-					     "unmapping OAL area.\n");
-			}
-			pci_unmap_single(qdev->pdev,
-					 dma_unmap_addr(&tx_ring_desc->map[i],
-							mapaddr),
-					 dma_unmap_len(&tx_ring_desc->map[i],
-						       maplen),
-					 PCI_DMA_TODEVICE);
-		} else {
-			netif_printk(qdev, tx_done, KERN_DEBUG, qdev->ndev,
-				     "unmapping frag %d.\n", i);
-			pci_unmap_page(qdev->pdev,
-				       dma_unmap_addr(&tx_ring_desc->map[i],
-						      mapaddr),
-				       dma_unmap_len(&tx_ring_desc->map[i],
-						     maplen), PCI_DMA_TODEVICE);
-		}
-	}
-
-}
-
-/* Map the buffers for this transmit.  This will return
- * NETDEV_TX_BUSY or NETDEV_TX_OK based on success.
- */
-static int ql_map_send(struct ql_adapter *qdev,
-		       struct ob_mac_iocb_req *mac_iocb_ptr,
-		       struct sk_buff *skb, struct tx_ring_desc *tx_ring_desc)
-{
-	int len = skb_headlen(skb);
-	dma_addr_t map;
-	int frag_idx, err, map_idx = 0;
-	struct tx_buf_desc *tbd = mac_iocb_ptr->tbd;
-	int frag_cnt = skb_shinfo(skb)->nr_frags;
-
-	if (frag_cnt) {
-		netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
-			     "frag_cnt = %d.\n", frag_cnt);
-	}
-	/*
-	 * Map the skb buffer first.
-	 */
-	map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE);
-
-	err = pci_dma_mapping_error(qdev->pdev, map);
-	if (err) {
-		netif_err(qdev, tx_queued, qdev->ndev,
-			  "PCI mapping failed with error: %d\n", err);
-
-		return NETDEV_TX_BUSY;
-	}
-
-	tbd->len = cpu_to_le32(len);
-	tbd->addr = cpu_to_le64(map);
-	dma_unmap_addr_set(&tx_ring_desc->map[map_idx], mapaddr, map);
-	dma_unmap_len_set(&tx_ring_desc->map[map_idx], maplen, len);
-	map_idx++;
-
-	/*
-	 * This loop fills the remainder of the 8 address descriptors
-	 * in the IOCB.  If there are more than 7 fragments, then the
-	 * eighth address desc will point to an external list (OAL).
-	 * When this happens, the remainder of the frags will be stored
-	 * in this list.
-	 */
-	for (frag_idx = 0; frag_idx < frag_cnt; frag_idx++, map_idx++) {
-		skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx];
-		tbd++;
-		if (frag_idx == 6 && frag_cnt > 7) {
-			/* Let's tack on an sglist.
-			 * Our control block will now
-			 * look like this:
-			 * iocb->seg[0] = skb->data
-			 * iocb->seg[1] = frag[0]
-			 * iocb->seg[2] = frag[1]
-			 * iocb->seg[3] = frag[2]
-			 * iocb->seg[4] = frag[3]
-			 * iocb->seg[5] = frag[4]
-			 * iocb->seg[6] = frag[5]
-			 * iocb->seg[7] = ptr to OAL (external sglist)
-			 * oal->seg[0] = frag[6]
-			 * oal->seg[1] = frag[7]
-			 * oal->seg[2] = frag[8]
-			 * oal->seg[3] = frag[9]
-			 * oal->seg[4] = frag[10]
-			 *      etc...
-			 */
-			/* Tack on the OAL in the eighth segment of IOCB. */
-			map = pci_map_single(qdev->pdev, &tx_ring_desc->oal,
-					     sizeof(struct oal),
-					     PCI_DMA_TODEVICE);
-			err = pci_dma_mapping_error(qdev->pdev, map);
-			if (err) {
-				netif_err(qdev, tx_queued, qdev->ndev,
-					  "PCI mapping outbound address list with error: %d\n",
-					  err);
-				goto map_error;
-			}
-
-			tbd->addr = cpu_to_le64(map);
-			/*
-			 * The length is the number of fragments
-			 * that remain to be mapped times the length
-			 * of our sglist (OAL).
-			 */
-			tbd->len =
-			    cpu_to_le32((sizeof(struct tx_buf_desc) *
-					 (frag_cnt - frag_idx)) | TX_DESC_C);
-			dma_unmap_addr_set(&tx_ring_desc->map[map_idx], mapaddr,
-					   map);
-			dma_unmap_len_set(&tx_ring_desc->map[map_idx], maplen,
-					  sizeof(struct oal));
-			tbd = (struct tx_buf_desc *)&tx_ring_desc->oal;
-			map_idx++;
-		}
-
-		map = skb_frag_dma_map(&qdev->pdev->dev, frag, 0, skb_frag_size(frag),
-				       DMA_TO_DEVICE);
-
-		err = dma_mapping_error(&qdev->pdev->dev, map);
-		if (err) {
-			netif_err(qdev, tx_queued, qdev->ndev,
-				  "PCI mapping frags failed with error: %d.\n",
-				  err);
-			goto map_error;
-		}
-
-		tbd->addr = cpu_to_le64(map);
-		tbd->len = cpu_to_le32(skb_frag_size(frag));
-		dma_unmap_addr_set(&tx_ring_desc->map[map_idx], mapaddr, map);
-		dma_unmap_len_set(&tx_ring_desc->map[map_idx], maplen,
-				  skb_frag_size(frag));
-
-	}
-	/* Save the number of segments we've mapped. */
-	tx_ring_desc->map_cnt = map_idx;
-	/* Terminate the last segment. */
-	tbd->len = cpu_to_le32(le32_to_cpu(tbd->len) | TX_DESC_E);
-	return NETDEV_TX_OK;
-
-map_error:
-	/*
-	 * If the first frag mapping failed, then i will be zero.
-	 * This causes the unmap of the skb->data area.  Otherwise
-	 * we pass in the number of frags that mapped successfully
-	 * so they can be umapped.
-	 */
-	ql_unmap_send(qdev, tx_ring_desc, map_idx);
-	return NETDEV_TX_BUSY;
-}
-
-/* Categorizing receive firmware frame errors */
-static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err,
-				 struct rx_ring *rx_ring)
-{
-	struct nic_stats *stats = &qdev->nic_stats;
-
-	stats->rx_err_count++;
-	rx_ring->rx_errors++;
-
-	switch (rx_err & IB_MAC_IOCB_RSP_ERR_MASK) {
-	case IB_MAC_IOCB_RSP_ERR_CODE_ERR:
-		stats->rx_code_err++;
-		break;
-	case IB_MAC_IOCB_RSP_ERR_OVERSIZE:
-		stats->rx_oversize_err++;
-		break;
-	case IB_MAC_IOCB_RSP_ERR_UNDERSIZE:
-		stats->rx_undersize_err++;
-		break;
-	case IB_MAC_IOCB_RSP_ERR_PREAMBLE:
-		stats->rx_preamble_err++;
-		break;
-	case IB_MAC_IOCB_RSP_ERR_FRAME_LEN:
-		stats->rx_frame_len_err++;
-		break;
-	case IB_MAC_IOCB_RSP_ERR_CRC:
-		stats->rx_crc_err++;
-	default:
-		break;
-	}
-}
-
-/**
- * ql_update_mac_hdr_len - helper routine to update the mac header length
- * based on vlan tags if present
- */
-static void ql_update_mac_hdr_len(struct ql_adapter *qdev,
-				  struct ib_mac_iocb_rsp *ib_mac_rsp,
-				  void *page, size_t *len)
-{
-	u16 *tags;
-
-	if (qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
-		return;
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) {
-		tags = (u16 *)page;
-		/* Look for stacked vlan tags in ethertype field */
-		if (tags[6] == ETH_P_8021Q &&
-		    tags[8] == ETH_P_8021Q)
-			*len += 2 * VLAN_HLEN;
-		else
-			*len += VLAN_HLEN;
-	}
-}
-
-/* Process an inbound completion from an rx ring. */
-static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev,
-					struct rx_ring *rx_ring,
-					struct ib_mac_iocb_rsp *ib_mac_rsp,
-					u32 length,
-					u16 vlan_id)
-{
-	struct sk_buff *skb;
-	struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
-	struct napi_struct *napi = &rx_ring->napi;
-
-	/* Frame error, so drop the packet. */
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring);
-		put_page(lbq_desc->p.pg_chunk.page);
-		return;
-	}
-	napi->dev = qdev->ndev;
-
-	skb = napi_get_frags(napi);
-	if (!skb) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Couldn't get an skb, exiting.\n");
-		rx_ring->rx_dropped++;
-		put_page(lbq_desc->p.pg_chunk.page);
-		return;
-	}
-	prefetch(lbq_desc->p.pg_chunk.va);
-	__skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
-			     lbq_desc->p.pg_chunk.page,
-			     lbq_desc->p.pg_chunk.offset,
-			     length);
-
-	skb->len += length;
-	skb->data_len += length;
-	skb->truesize += length;
-	skb_shinfo(skb)->nr_frags++;
-
-	rx_ring->rx_packets++;
-	rx_ring->rx_bytes += length;
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb_record_rx_queue(skb, rx_ring->cq_id);
-	if (vlan_id != 0xffff)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id);
-	napi_gro_frags(napi);
-}
-
-/* Process an inbound completion from an rx ring. */
-static void ql_process_mac_rx_page(struct ql_adapter *qdev,
-					struct rx_ring *rx_ring,
-					struct ib_mac_iocb_rsp *ib_mac_rsp,
-					u32 length,
-					u16 vlan_id)
-{
-	struct net_device *ndev = qdev->ndev;
-	struct sk_buff *skb = NULL;
-	void *addr;
-	struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
-	struct napi_struct *napi = &rx_ring->napi;
-	size_t hlen = ETH_HLEN;
-
-	skb = netdev_alloc_skb(ndev, length);
-	if (!skb) {
-		rx_ring->rx_dropped++;
-		put_page(lbq_desc->p.pg_chunk.page);
-		return;
-	}
-
-	addr = lbq_desc->p.pg_chunk.va;
-	prefetch(addr);
-
-	/* Frame error, so drop the packet. */
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring);
-		goto err_out;
-	}
-
-	/* Update the MAC header length*/
-	ql_update_mac_hdr_len(qdev, ib_mac_rsp, addr, &hlen);
-
-	/* The max framesize filter on this chip is set higher than
-	 * MTU since FCoE uses 2k frames.
-	 */
-	if (skb->len > ndev->mtu + hlen) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Segment too small, dropping.\n");
-		rx_ring->rx_dropped++;
-		goto err_out;
-	}
-	skb_put_data(skb, addr, hlen);
-	netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-		     "%d bytes of headers and data in large. Chain page to new skb and pull tail.\n",
-		     length);
-	skb_fill_page_desc(skb, 0, lbq_desc->p.pg_chunk.page,
-				lbq_desc->p.pg_chunk.offset + hlen,
-				length - hlen);
-	skb->len += length - hlen;
-	skb->data_len += length - hlen;
-	skb->truesize += length - hlen;
-
-	rx_ring->rx_packets++;
-	rx_ring->rx_bytes += skb->len;
-	skb->protocol = eth_type_trans(skb, ndev);
-	skb_checksum_none_assert(skb);
-
-	if ((ndev->features & NETIF_F_RXCSUM) &&
-		!(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) {
-		/* TCP frame. */
-		if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "TCP checksum done!\n");
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		} else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) &&
-				(ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_V4)) {
-			/* Unfragmented ipv4 UDP frame. */
-			struct iphdr *iph =
-				(struct iphdr *)((u8 *)addr + hlen);
-			if (!(iph->frag_off &
-				htons(IP_MF|IP_OFFSET))) {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				netif_printk(qdev, rx_status, KERN_DEBUG,
-					     qdev->ndev,
-					     "UDP checksum done!\n");
-			}
-		}
-	}
-
-	skb_record_rx_queue(skb, rx_ring->cq_id);
-	if (vlan_id != 0xffff)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id);
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-		napi_gro_receive(napi, skb);
-	else
-		netif_receive_skb(skb);
-	return;
-err_out:
-	dev_kfree_skb_any(skb);
-	put_page(lbq_desc->p.pg_chunk.page);
-}
-
-/* Process an inbound completion from an rx ring. */
-static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
-					struct rx_ring *rx_ring,
-					struct ib_mac_iocb_rsp *ib_mac_rsp,
-					u32 length,
-					u16 vlan_id)
-{
-	struct net_device *ndev = qdev->ndev;
-	struct sk_buff *skb = NULL;
-	struct sk_buff *new_skb = NULL;
-	struct bq_desc *sbq_desc = ql_get_curr_sbuf(rx_ring);
-
-	skb = sbq_desc->p.skb;
-	/* Allocate new_skb and copy */
-	new_skb = netdev_alloc_skb(qdev->ndev, length + NET_IP_ALIGN);
-	if (new_skb == NULL) {
-		rx_ring->rx_dropped++;
-		return;
-	}
-	skb_reserve(new_skb, NET_IP_ALIGN);
-
-	pci_dma_sync_single_for_cpu(qdev->pdev,
-				    dma_unmap_addr(sbq_desc, mapaddr),
-				    dma_unmap_len(sbq_desc, maplen),
-				    PCI_DMA_FROMDEVICE);
-
-	skb_put_data(new_skb, skb->data, length);
-
-	pci_dma_sync_single_for_device(qdev->pdev,
-				       dma_unmap_addr(sbq_desc, mapaddr),
-				       dma_unmap_len(sbq_desc, maplen),
-				       PCI_DMA_FROMDEVICE);
-	skb = new_skb;
-
-	/* Frame error, so drop the packet. */
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring);
-		dev_kfree_skb_any(skb);
-		return;
-	}
-
-	/* loopback self test for ethtool */
-	if (test_bit(QL_SELFTEST, &qdev->flags)) {
-		ql_check_lb_frame(qdev, skb);
-		dev_kfree_skb_any(skb);
-		return;
-	}
-
-	/* The max framesize filter on this chip is set higher than
-	 * MTU since FCoE uses 2k frames.
-	 */
-	if (skb->len > ndev->mtu + ETH_HLEN) {
-		dev_kfree_skb_any(skb);
-		rx_ring->rx_dropped++;
-		return;
-	}
-
-	prefetch(skb->data);
-	if (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "%s Multicast.\n",
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_HASH ? "Hash" :
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_REG ? "Registered" :
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_PROM ? "Promiscuous" : "");
-	}
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_P)
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "Promiscuous Packet.\n");
-
-	rx_ring->rx_packets++;
-	rx_ring->rx_bytes += skb->len;
-	skb->protocol = eth_type_trans(skb, ndev);
-	skb_checksum_none_assert(skb);
-
-	/* If rx checksum is on, and there are no
-	 * csum or frame errors.
-	 */
-	if ((ndev->features & NETIF_F_RXCSUM) &&
-		!(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) {
-		/* TCP frame. */
-		if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "TCP checksum done!\n");
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		} else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) &&
-				(ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_V4)) {
-			/* Unfragmented ipv4 UDP frame. */
-			struct iphdr *iph = (struct iphdr *) skb->data;
-			if (!(iph->frag_off &
-				htons(IP_MF|IP_OFFSET))) {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				netif_printk(qdev, rx_status, KERN_DEBUG,
-					     qdev->ndev,
-					     "UDP checksum done!\n");
-			}
-		}
-	}
-
-	skb_record_rx_queue(skb, rx_ring->cq_id);
-	if (vlan_id != 0xffff)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id);
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-		napi_gro_receive(&rx_ring->napi, skb);
-	else
-		netif_receive_skb(skb);
-}
-
-static void ql_realign_skb(struct sk_buff *skb, int len)
-{
-	void *temp_addr = skb->data;
-
-	/* Undo the skb_reserve(skb,32) we did before
-	 * giving to hardware, and realign data on
-	 * a 2-byte boundary.
-	 */
-	skb->data -= QLGE_SB_PAD - NET_IP_ALIGN;
-	skb->tail -= QLGE_SB_PAD - NET_IP_ALIGN;
-	memmove(skb->data, temp_addr, len);
-}
-
-/*
- * This function builds an skb for the given inbound
- * completion.  It will be rewritten for readability in the near
- * future, but for not it works well.
- */
-static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
-				       struct rx_ring *rx_ring,
-				       struct ib_mac_iocb_rsp *ib_mac_rsp)
-{
-	struct bq_desc *lbq_desc;
-	struct bq_desc *sbq_desc;
-	struct sk_buff *skb = NULL;
-	u32 length = le32_to_cpu(ib_mac_rsp->data_len);
-	u32 hdr_len = le32_to_cpu(ib_mac_rsp->hdr_len);
-	size_t hlen = ETH_HLEN;
-
-	/*
-	 * Handle the header buffer if present.
-	 */
-	if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV &&
-	    ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "Header of %d bytes in small buffer.\n", hdr_len);
-		/*
-		 * Headers fit nicely into a small buffer.
-		 */
-		sbq_desc = ql_get_curr_sbuf(rx_ring);
-		pci_unmap_single(qdev->pdev,
-				dma_unmap_addr(sbq_desc, mapaddr),
-				dma_unmap_len(sbq_desc, maplen),
-				PCI_DMA_FROMDEVICE);
-		skb = sbq_desc->p.skb;
-		ql_realign_skb(skb, hdr_len);
-		skb_put(skb, hdr_len);
-		sbq_desc->p.skb = NULL;
-	}
-
-	/*
-	 * Handle the data buffer(s).
-	 */
-	if (unlikely(!length)) {	/* Is there data too? */
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "No Data buffer in this packet.\n");
-		return skb;
-	}
-
-	if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DS) {
-		if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Headers in small, data of %d bytes in small, combine them.\n",
-				     length);
-			/*
-			 * Data is less than small buffer size so it's
-			 * stuffed in a small buffer.
-			 * For this case we append the data
-			 * from the "data" small buffer to the "header" small
-			 * buffer.
-			 */
-			sbq_desc = ql_get_curr_sbuf(rx_ring);
-			pci_dma_sync_single_for_cpu(qdev->pdev,
-						    dma_unmap_addr
-						    (sbq_desc, mapaddr),
-						    dma_unmap_len
-						    (sbq_desc, maplen),
-						    PCI_DMA_FROMDEVICE);
-			skb_put_data(skb, sbq_desc->p.skb->data, length);
-			pci_dma_sync_single_for_device(qdev->pdev,
-						       dma_unmap_addr
-						       (sbq_desc,
-							mapaddr),
-						       dma_unmap_len
-						       (sbq_desc,
-							maplen),
-						       PCI_DMA_FROMDEVICE);
-		} else {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "%d bytes in a single small buffer.\n",
-				     length);
-			sbq_desc = ql_get_curr_sbuf(rx_ring);
-			skb = sbq_desc->p.skb;
-			ql_realign_skb(skb, length);
-			skb_put(skb, length);
-			pci_unmap_single(qdev->pdev,
-					 dma_unmap_addr(sbq_desc,
-							mapaddr),
-					 dma_unmap_len(sbq_desc,
-						       maplen),
-					 PCI_DMA_FROMDEVICE);
-			sbq_desc->p.skb = NULL;
-		}
-	} else if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) {
-		if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Header in small, %d bytes in large. Chain large to small!\n",
-				     length);
-			/*
-			 * The data is in a single large buffer.  We
-			 * chain it to the header buffer's skb and let
-			 * it rip.
-			 */
-			lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Chaining page at offset = %d, for %d bytes  to skb.\n",
-				     lbq_desc->p.pg_chunk.offset, length);
-			skb_fill_page_desc(skb, 0, lbq_desc->p.pg_chunk.page,
-						lbq_desc->p.pg_chunk.offset,
-						length);
-			skb->len += length;
-			skb->data_len += length;
-			skb->truesize += length;
-		} else {
-			/*
-			 * The headers and data are in a single large buffer. We
-			 * copy it to a new skb and let it go. This can happen with
-			 * jumbo mtu on a non-TCP/UDP frame.
-			 */
-			lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
-			skb = netdev_alloc_skb(qdev->ndev, length);
-			if (skb == NULL) {
-				netif_printk(qdev, probe, KERN_DEBUG, qdev->ndev,
-					     "No skb available, drop the packet.\n");
-				return NULL;
-			}
-			pci_unmap_page(qdev->pdev,
-				       dma_unmap_addr(lbq_desc,
-						      mapaddr),
-				       dma_unmap_len(lbq_desc, maplen),
-				       PCI_DMA_FROMDEVICE);
-			skb_reserve(skb, NET_IP_ALIGN);
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "%d bytes of headers and data in large. Chain page to new skb and pull tail.\n",
-				     length);
-			skb_fill_page_desc(skb, 0,
-						lbq_desc->p.pg_chunk.page,
-						lbq_desc->p.pg_chunk.offset,
-						length);
-			skb->len += length;
-			skb->data_len += length;
-			skb->truesize += length;
-			ql_update_mac_hdr_len(qdev, ib_mac_rsp,
-					      lbq_desc->p.pg_chunk.va,
-					      &hlen);
-			__pskb_pull_tail(skb, hlen);
-		}
-	} else {
-		/*
-		 * The data is in a chain of large buffers
-		 * pointed to by a small buffer.  We loop
-		 * thru and chain them to the our small header
-		 * buffer's skb.
-		 * frags:  There are 18 max frags and our small
-		 *         buffer will hold 32 of them. The thing is,
-		 *         we'll use 3 max for our 9000 byte jumbo
-		 *         frames.  If the MTU goes up we could
-		 *          eventually be in trouble.
-		 */
-		int size, i = 0;
-		sbq_desc = ql_get_curr_sbuf(rx_ring);
-		pci_unmap_single(qdev->pdev,
-				 dma_unmap_addr(sbq_desc, mapaddr),
-				 dma_unmap_len(sbq_desc, maplen),
-				 PCI_DMA_FROMDEVICE);
-		if (!(ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HS)) {
-			/*
-			 * This is an non TCP/UDP IP frame, so
-			 * the headers aren't split into a small
-			 * buffer.  We have to use the small buffer
-			 * that contains our sg list as our skb to
-			 * send upstairs. Copy the sg list here to
-			 * a local buffer and use it to find the
-			 * pages to chain.
-			 */
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "%d bytes of headers & data in chain of large.\n",
-				     length);
-			skb = sbq_desc->p.skb;
-			sbq_desc->p.skb = NULL;
-			skb_reserve(skb, NET_IP_ALIGN);
-		}
-		do {
-			lbq_desc = ql_get_curr_lchunk(qdev, rx_ring);
-			size = (length < rx_ring->lbq_buf_size) ? length :
-				rx_ring->lbq_buf_size;
-
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Adding page %d to skb for %d bytes.\n",
-				     i, size);
-			skb_fill_page_desc(skb, i,
-						lbq_desc->p.pg_chunk.page,
-						lbq_desc->p.pg_chunk.offset,
-						size);
-			skb->len += size;
-			skb->data_len += size;
-			skb->truesize += size;
-			length -= size;
-			i++;
-		} while (length > 0);
-		ql_update_mac_hdr_len(qdev, ib_mac_rsp, lbq_desc->p.pg_chunk.va,
-				      &hlen);
-		__pskb_pull_tail(skb, hlen);
-	}
-	return skb;
-}
-
-/* Process an inbound completion from an rx ring. */
-static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev,
-				   struct rx_ring *rx_ring,
-				   struct ib_mac_iocb_rsp *ib_mac_rsp,
-				   u16 vlan_id)
-{
-	struct net_device *ndev = qdev->ndev;
-	struct sk_buff *skb = NULL;
-
-	QL_DUMP_IB_MAC_RSP(ib_mac_rsp);
-
-	skb = ql_build_rx_skb(qdev, rx_ring, ib_mac_rsp);
-	if (unlikely(!skb)) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "No skb available, drop packet.\n");
-		rx_ring->rx_dropped++;
-		return;
-	}
-
-	/* Frame error, so drop the packet. */
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) {
-		ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring);
-		dev_kfree_skb_any(skb);
-		return;
-	}
-
-	/* The max framesize filter on this chip is set higher than
-	 * MTU since FCoE uses 2k frames.
-	 */
-	if (skb->len > ndev->mtu + ETH_HLEN) {
-		dev_kfree_skb_any(skb);
-		rx_ring->rx_dropped++;
-		return;
-	}
-
-	/* loopback self test for ethtool */
-	if (test_bit(QL_SELFTEST, &qdev->flags)) {
-		ql_check_lb_frame(qdev, skb);
-		dev_kfree_skb_any(skb);
-		return;
-	}
-
-	prefetch(skb->data);
-	if (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev, "%s Multicast.\n",
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_HASH ? "Hash" :
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_REG ? "Registered" :
-			     (ib_mac_rsp->flags1 & IB_MAC_IOCB_RSP_M_MASK) ==
-			     IB_MAC_IOCB_RSP_M_PROM ? "Promiscuous" : "");
-		rx_ring->rx_multicast++;
-	}
-	if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_P) {
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "Promiscuous Packet.\n");
-	}
-
-	skb->protocol = eth_type_trans(skb, ndev);
-	skb_checksum_none_assert(skb);
-
-	/* If rx checksum is on, and there are no
-	 * csum or frame errors.
-	 */
-	if ((ndev->features & NETIF_F_RXCSUM) &&
-		!(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) {
-		/* TCP frame. */
-		if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T) {
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "TCP checksum done!\n");
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		} else if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_U) &&
-				(ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_V4)) {
-		/* Unfragmented ipv4 UDP frame. */
-			struct iphdr *iph = (struct iphdr *) skb->data;
-			if (!(iph->frag_off &
-				htons(IP_MF|IP_OFFSET))) {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-					     "TCP checksum done!\n");
-			}
-		}
-	}
-
-	rx_ring->rx_packets++;
-	rx_ring->rx_bytes += skb->len;
-	skb_record_rx_queue(skb, rx_ring->cq_id);
-	if (vlan_id != 0xffff)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id);
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-		napi_gro_receive(&rx_ring->napi, skb);
-	else
-		netif_receive_skb(skb);
-}
-
-/* Process an inbound completion from an rx ring. */
-static unsigned long ql_process_mac_rx_intr(struct ql_adapter *qdev,
-					struct rx_ring *rx_ring,
-					struct ib_mac_iocb_rsp *ib_mac_rsp)
-{
-	u32 length = le32_to_cpu(ib_mac_rsp->data_len);
-	u16 vlan_id = ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) &&
-			(qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)) ?
-			((le16_to_cpu(ib_mac_rsp->vlan_id) &
-			IB_MAC_IOCB_RSP_VLAN_MASK)) : 0xffff;
-
-	QL_DUMP_IB_MAC_RSP(ib_mac_rsp);
-
-	if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV) {
-		/* The data and headers are split into
-		 * separate buffers.
-		 */
-		ql_process_mac_split_rx_intr(qdev, rx_ring, ib_mac_rsp,
-						vlan_id);
-	} else if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DS) {
-		/* The data fit in a single small buffer.
-		 * Allocate a new skb, copy the data and
-		 * return the buffer to the free pool.
-		 */
-		ql_process_mac_rx_skb(qdev, rx_ring, ib_mac_rsp,
-						length, vlan_id);
-	} else if ((ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) &&
-		!(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK) &&
-		(ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_T)) {
-		/* TCP packet in a page chunk that's been checksummed.
-		 * Tack it on to our GRO skb and let it go.
-		 */
-		ql_process_mac_rx_gro_page(qdev, rx_ring, ib_mac_rsp,
-						length, vlan_id);
-	} else if (ib_mac_rsp->flags3 & IB_MAC_IOCB_RSP_DL) {
-		/* Non-TCP packet in a page chunk. Allocate an
-		 * skb, tack it on frags, and send it up.
-		 */
-		ql_process_mac_rx_page(qdev, rx_ring, ib_mac_rsp,
-						length, vlan_id);
-	} else {
-		/* Non-TCP/UDP large frames that span multiple buffers
-		 * can be processed corrrectly by the split frame logic.
-		 */
-		ql_process_mac_split_rx_intr(qdev, rx_ring, ib_mac_rsp,
-						vlan_id);
-	}
-
-	return (unsigned long)length;
-}
-
-/* Process an outbound completion from an rx ring. */
-static void ql_process_mac_tx_intr(struct ql_adapter *qdev,
-				   struct ob_mac_iocb_rsp *mac_rsp)
-{
-	struct tx_ring *tx_ring;
-	struct tx_ring_desc *tx_ring_desc;
-
-	QL_DUMP_OB_MAC_RSP(mac_rsp);
-	tx_ring = &qdev->tx_ring[mac_rsp->txq_idx];
-	tx_ring_desc = &tx_ring->q[mac_rsp->tid];
-	ql_unmap_send(qdev, tx_ring_desc, tx_ring_desc->map_cnt);
-	tx_ring->tx_bytes += (tx_ring_desc->skb)->len;
-	tx_ring->tx_packets++;
-	dev_kfree_skb(tx_ring_desc->skb);
-	tx_ring_desc->skb = NULL;
-
-	if (unlikely(mac_rsp->flags1 & (OB_MAC_IOCB_RSP_E |
-					OB_MAC_IOCB_RSP_S |
-					OB_MAC_IOCB_RSP_L |
-					OB_MAC_IOCB_RSP_P | OB_MAC_IOCB_RSP_B))) {
-		if (mac_rsp->flags1 & OB_MAC_IOCB_RSP_E) {
-			netif_warn(qdev, tx_done, qdev->ndev,
-				   "Total descriptor length did not match transfer length.\n");
-		}
-		if (mac_rsp->flags1 & OB_MAC_IOCB_RSP_S) {
-			netif_warn(qdev, tx_done, qdev->ndev,
-				   "Frame too short to be valid, not sent.\n");
-		}
-		if (mac_rsp->flags1 & OB_MAC_IOCB_RSP_L) {
-			netif_warn(qdev, tx_done, qdev->ndev,
-				   "Frame too long, but sent anyway.\n");
-		}
-		if (mac_rsp->flags1 & OB_MAC_IOCB_RSP_B) {
-			netif_warn(qdev, tx_done, qdev->ndev,
-				   "PCI backplane error. Frame not sent.\n");
-		}
-	}
-	atomic_inc(&tx_ring->tx_count);
-}
-
-/* Fire up a handler to reset the MPI processor. */
-void ql_queue_fw_error(struct ql_adapter *qdev)
-{
-	ql_link_off(qdev);
-	queue_delayed_work(qdev->workqueue, &qdev->mpi_reset_work, 0);
-}
-
-void ql_queue_asic_error(struct ql_adapter *qdev)
-{
-	ql_link_off(qdev);
-	ql_disable_interrupts(qdev);
-	/* Clear adapter up bit to signal the recovery
-	 * process that it shouldn't kill the reset worker
-	 * thread
-	 */
-	clear_bit(QL_ADAPTER_UP, &qdev->flags);
-	/* Set asic recovery bit to indicate reset process that we are
-	 * in fatal error recovery process rather than normal close
-	 */
-	set_bit(QL_ASIC_RECOVERY, &qdev->flags);
-	queue_delayed_work(qdev->workqueue, &qdev->asic_reset_work, 0);
-}
-
-static void ql_process_chip_ae_intr(struct ql_adapter *qdev,
-				    struct ib_ae_iocb_rsp *ib_ae_rsp)
-{
-	switch (ib_ae_rsp->event) {
-	case MGMT_ERR_EVENT:
-		netif_err(qdev, rx_err, qdev->ndev,
-			  "Management Processor Fatal Error.\n");
-		ql_queue_fw_error(qdev);
-		return;
-
-	case CAM_LOOKUP_ERR_EVENT:
-		netdev_err(qdev->ndev, "Multiple CAM hits lookup occurred.\n");
-		netdev_err(qdev->ndev, "This event shouldn't occur.\n");
-		ql_queue_asic_error(qdev);
-		return;
-
-	case SOFT_ECC_ERROR_EVENT:
-		netdev_err(qdev->ndev, "Soft ECC error detected.\n");
-		ql_queue_asic_error(qdev);
-		break;
-
-	case PCI_ERR_ANON_BUF_RD:
-		netdev_err(qdev->ndev, "PCI error occurred when reading "
-					"anonymous buffers from rx_ring %d.\n",
-					ib_ae_rsp->q_id);
-		ql_queue_asic_error(qdev);
-		break;
-
-	default:
-		netif_err(qdev, drv, qdev->ndev, "Unexpected event %d.\n",
-			  ib_ae_rsp->event);
-		ql_queue_asic_error(qdev);
-		break;
-	}
-}
-
-static int ql_clean_outbound_rx_ring(struct rx_ring *rx_ring)
-{
-	struct ql_adapter *qdev = rx_ring->qdev;
-	u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
-	struct ob_mac_iocb_rsp *net_rsp = NULL;
-	int count = 0;
-
-	struct tx_ring *tx_ring;
-	/* While there are entries in the completion queue. */
-	while (prod != rx_ring->cnsmr_idx) {
-
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "cq_id = %d, prod = %d, cnsmr = %d\n",
-			     rx_ring->cq_id, prod, rx_ring->cnsmr_idx);
-
-		net_rsp = (struct ob_mac_iocb_rsp *)rx_ring->curr_entry;
-		rmb();
-		switch (net_rsp->opcode) {
-
-		case OPCODE_OB_MAC_TSO_IOCB:
-		case OPCODE_OB_MAC_IOCB:
-			ql_process_mac_tx_intr(qdev, net_rsp);
-			break;
-		default:
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Hit default case, not handled! dropping the packet, opcode = %x.\n",
-				     net_rsp->opcode);
-		}
-		count++;
-		ql_update_cq(rx_ring);
-		prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
-	}
-	if (!net_rsp)
-		return 0;
-	ql_write_cq_idx(rx_ring);
-	tx_ring = &qdev->tx_ring[net_rsp->txq_idx];
-	if (__netif_subqueue_stopped(qdev->ndev, tx_ring->wq_id)) {
-		if ((atomic_read(&tx_ring->tx_count) > (tx_ring->wq_len / 4)))
-			/*
-			 * The queue got stopped because the tx_ring was full.
-			 * Wake it up, because it's now at least 25% empty.
-			 */
-			netif_wake_subqueue(qdev->ndev, tx_ring->wq_id);
-	}
-
-	return count;
-}
-
-static int ql_clean_inbound_rx_ring(struct rx_ring *rx_ring, int budget)
-{
-	struct ql_adapter *qdev = rx_ring->qdev;
-	u32 prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
-	struct ql_net_rsp_iocb *net_rsp;
-	int count = 0;
-
-	/* While there are entries in the completion queue. */
-	while (prod != rx_ring->cnsmr_idx) {
-
-		netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-			     "cq_id = %d, prod = %d, cnsmr = %d\n",
-			     rx_ring->cq_id, prod, rx_ring->cnsmr_idx);
-
-		net_rsp = rx_ring->curr_entry;
-		rmb();
-		switch (net_rsp->opcode) {
-		case OPCODE_IB_MAC_IOCB:
-			ql_process_mac_rx_intr(qdev, rx_ring,
-					       (struct ib_mac_iocb_rsp *)
-					       net_rsp);
-			break;
-
-		case OPCODE_IB_AE_IOCB:
-			ql_process_chip_ae_intr(qdev, (struct ib_ae_iocb_rsp *)
-						net_rsp);
-			break;
-		default:
-			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-				     "Hit default case, not handled! dropping the packet, opcode = %x.\n",
-				     net_rsp->opcode);
-			break;
-		}
-		count++;
-		ql_update_cq(rx_ring);
-		prod = ql_read_sh_reg(rx_ring->prod_idx_sh_reg);
-		if (count == budget)
-			break;
-	}
-	ql_update_buffer_queues(qdev, rx_ring);
-	ql_write_cq_idx(rx_ring);
-	return count;
-}
-
-static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
-{
-	struct rx_ring *rx_ring = container_of(napi, struct rx_ring, napi);
-	struct ql_adapter *qdev = rx_ring->qdev;
-	struct rx_ring *trx_ring;
-	int i, work_done = 0;
-	struct intr_context *ctx = &qdev->intr_context[rx_ring->cq_id];
-
-	netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
-		     "Enter, NAPI POLL cq_id = %d.\n", rx_ring->cq_id);
-
-	/* Service the TX rings first.  They start
-	 * right after the RSS rings. */
-	for (i = qdev->rss_ring_count; i < qdev->rx_ring_count; i++) {
-		trx_ring = &qdev->rx_ring[i];
-		/* If this TX completion ring belongs to this vector and
-		 * it's not empty then service it.
-		 */
-		if ((ctx->irq_mask & (1 << trx_ring->cq_id)) &&
-			(ql_read_sh_reg(trx_ring->prod_idx_sh_reg) !=
-					trx_ring->cnsmr_idx)) {
-			netif_printk(qdev, intr, KERN_DEBUG, qdev->ndev,
-				     "%s: Servicing TX completion ring %d.\n",
-				     __func__, trx_ring->cq_id);
-			ql_clean_outbound_rx_ring(trx_ring);
-		}
-	}
-
-	/*
-	 * Now service the RSS ring if it's active.
-	 */
-	if (ql_read_sh_reg(rx_ring->prod_idx_sh_reg) !=
-					rx_ring->cnsmr_idx) {
-		netif_printk(qdev, intr, KERN_DEBUG, qdev->ndev,
-			     "%s: Servicing RX completion ring %d.\n",
-			     __func__, rx_ring->cq_id);
-		work_done = ql_clean_inbound_rx_ring(rx_ring, budget);
-	}
-
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
-		ql_enable_completion_interrupt(qdev, rx_ring->irq);
-	}
-	return work_done;
-}
-
-static void qlge_vlan_mode(struct net_device *ndev, netdev_features_t features)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX) {
-		ql_write32(qdev, NIC_RCV_CFG, NIC_RCV_CFG_VLAN_MASK |
-				 NIC_RCV_CFG_VLAN_MATCH_AND_NON);
-	} else {
-		ql_write32(qdev, NIC_RCV_CFG, NIC_RCV_CFG_VLAN_MASK);
-	}
-}
-
-/**
- * qlge_update_hw_vlan_features - helper routine to reinitialize the adapter
- * based on the features to enable/disable hardware vlan accel
- */
-static int qlge_update_hw_vlan_features(struct net_device *ndev,
-					netdev_features_t features)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int status = 0;
-	bool need_restart = netif_running(ndev);
-
-	if (need_restart) {
-		status = ql_adapter_down(qdev);
-		if (status) {
-			netif_err(qdev, link, qdev->ndev,
-				  "Failed to bring down the adapter\n");
-			return status;
-		}
-	}
-
-	/* update the features with resent change */
-	ndev->features = features;
-
-	if (need_restart) {
-		status = ql_adapter_up(qdev);
-		if (status) {
-			netif_err(qdev, link, qdev->ndev,
-				  "Failed to bring up the adapter\n");
-			return status;
-		}
-	}
-
-	return status;
-}
-
-static int qlge_set_features(struct net_device *ndev,
-	netdev_features_t features)
-{
-	netdev_features_t changed = ndev->features ^ features;
-	int err;
-
-	if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
-		/* Update the behavior of vlan accel in the adapter */
-		err = qlge_update_hw_vlan_features(ndev, features);
-		if (err)
-			return err;
-
-		qlge_vlan_mode(ndev, features);
-	}
-
-	return 0;
-}
-
-static int __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid)
-{
-	u32 enable_bit = MAC_ADDR_E;
-	int err;
-
-	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
-				  MAC_ADDR_TYPE_VLAN, vid);
-	if (err)
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to init vlan address.\n");
-	return err;
-}
-
-static int qlge_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int status;
-	int err;
-
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return status;
-
-	err = __qlge_vlan_rx_add_vid(qdev, vid);
-	set_bit(vid, qdev->active_vlans);
-
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-
-	return err;
-}
-
-static int __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid)
-{
-	u32 enable_bit = 0;
-	int err;
-
-	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
-				  MAC_ADDR_TYPE_VLAN, vid);
-	if (err)
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to clear vlan address.\n");
-	return err;
-}
-
-static int qlge_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int status;
-	int err;
-
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return status;
-
-	err = __qlge_vlan_rx_kill_vid(qdev, vid);
-	clear_bit(vid, qdev->active_vlans);
-
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-
-	return err;
-}
-
-static void qlge_restore_vlan(struct ql_adapter *qdev)
-{
-	int status;
-	u16 vid;
-
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return;
-
-	for_each_set_bit(vid, qdev->active_vlans, VLAN_N_VID)
-		__qlge_vlan_rx_add_vid(qdev, vid);
-
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-}
-
-/* MSI-X Multiple Vector Interrupt Handler for inbound completions. */
-static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id)
-{
-	struct rx_ring *rx_ring = dev_id;
-	napi_schedule(&rx_ring->napi);
-	return IRQ_HANDLED;
-}
-
-/* This handles a fatal error, MPI activity, and the default
- * rx_ring in an MSI-X multiple vector environment.
- * In MSI/Legacy environment it also process the rest of
- * the rx_rings.
- */
-static irqreturn_t qlge_isr(int irq, void *dev_id)
-{
-	struct rx_ring *rx_ring = dev_id;
-	struct ql_adapter *qdev = rx_ring->qdev;
-	struct intr_context *intr_context = &qdev->intr_context[0];
-	u32 var;
-	int work_done = 0;
-
-	spin_lock(&qdev->hw_lock);
-	if (atomic_read(&qdev->intr_context[0].irq_cnt)) {
-		netif_printk(qdev, intr, KERN_DEBUG, qdev->ndev,
-			     "Shared Interrupt, Not ours!\n");
-		spin_unlock(&qdev->hw_lock);
-		return IRQ_NONE;
-	}
-	spin_unlock(&qdev->hw_lock);
-
-	var = ql_disable_completion_interrupt(qdev, intr_context->intr);
-
-	/*
-	 * Check for fatal error.
-	 */
-	if (var & STS_FE) {
-		ql_queue_asic_error(qdev);
-		netdev_err(qdev->ndev, "Got fatal error, STS = %x.\n", var);
-		var = ql_read32(qdev, ERR_STS);
-		netdev_err(qdev->ndev, "Resetting chip. "
-					"Error Status Register = 0x%x\n", var);
-		return IRQ_HANDLED;
-	}
-
-	/*
-	 * Check MPI processor activity.
-	 */
-	if ((var & STS_PI) &&
-		(ql_read32(qdev, INTR_MASK) & INTR_MASK_PI)) {
-		/*
-		 * We've got an async event or mailbox completion.
-		 * Handle it and clear the source of the interrupt.
-		 */
-		netif_err(qdev, intr, qdev->ndev,
-			  "Got MPI processor interrupt.\n");
-		ql_disable_completion_interrupt(qdev, intr_context->intr);
-		ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
-		queue_delayed_work_on(smp_processor_id(),
-				qdev->workqueue, &qdev->mpi_work, 0);
-		work_done++;
-	}
-
-	/*
-	 * Get the bit-mask that shows the active queues for this
-	 * pass.  Compare it to the queues that this irq services
-	 * and call napi if there's a match.
-	 */
-	var = ql_read32(qdev, ISR1);
-	if (var & intr_context->irq_mask) {
-		netif_info(qdev, intr, qdev->ndev,
-			   "Waking handler for rx_ring[0].\n");
-		ql_disable_completion_interrupt(qdev, intr_context->intr);
-		napi_schedule(&rx_ring->napi);
-		work_done++;
-	}
-	ql_enable_completion_interrupt(qdev, intr_context->intr);
-	return work_done ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)
-{
-
-	if (skb_is_gso(skb)) {
-		int err;
-		__be16 l3_proto = vlan_get_protocol(skb);
-
-		err = skb_cow_head(skb, 0);
-		if (err < 0)
-			return err;
-
-		mac_iocb_ptr->opcode = OPCODE_OB_MAC_TSO_IOCB;
-		mac_iocb_ptr->flags3 |= OB_MAC_TSO_IOCB_IC;
-		mac_iocb_ptr->frame_len = cpu_to_le32((u32) skb->len);
-		mac_iocb_ptr->total_hdrs_len =
-		    cpu_to_le16(skb_transport_offset(skb) + tcp_hdrlen(skb));
-		mac_iocb_ptr->net_trans_offset =
-		    cpu_to_le16(skb_network_offset(skb) |
-				skb_transport_offset(skb)
-				<< OB_MAC_TRANSPORT_HDR_SHIFT);
-		mac_iocb_ptr->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
-		mac_iocb_ptr->flags2 |= OB_MAC_TSO_IOCB_LSO;
-		if (likely(l3_proto == htons(ETH_P_IP))) {
-			struct iphdr *iph = ip_hdr(skb);
-			iph->check = 0;
-			mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP4;
-			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
-								 iph->daddr, 0,
-								 IPPROTO_TCP,
-								 0);
-		} else if (l3_proto == htons(ETH_P_IPV6)) {
-			mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP6;
-			tcp_hdr(skb)->check =
-			    ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-					     &ipv6_hdr(skb)->daddr,
-					     0, IPPROTO_TCP, 0);
-		}
-		return 1;
-	}
-	return 0;
-}
-
-static void ql_hw_csum_setup(struct sk_buff *skb,
-			     struct ob_mac_tso_iocb_req *mac_iocb_ptr)
-{
-	int len;
-	struct iphdr *iph = ip_hdr(skb);
-	__sum16 *check;
-	mac_iocb_ptr->opcode = OPCODE_OB_MAC_TSO_IOCB;
-	mac_iocb_ptr->frame_len = cpu_to_le32((u32) skb->len);
-	mac_iocb_ptr->net_trans_offset =
-		cpu_to_le16(skb_network_offset(skb) |
-		skb_transport_offset(skb) << OB_MAC_TRANSPORT_HDR_SHIFT);
-
-	mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP4;
-	len = (ntohs(iph->tot_len) - (iph->ihl << 2));
-	if (likely(iph->protocol == IPPROTO_TCP)) {
-		check = &(tcp_hdr(skb)->check);
-		mac_iocb_ptr->flags2 |= OB_MAC_TSO_IOCB_TC;
-		mac_iocb_ptr->total_hdrs_len =
-		    cpu_to_le16(skb_transport_offset(skb) +
-				(tcp_hdr(skb)->doff << 2));
-	} else {
-		check = &(udp_hdr(skb)->check);
-		mac_iocb_ptr->flags2 |= OB_MAC_TSO_IOCB_UC;
-		mac_iocb_ptr->total_hdrs_len =
-		    cpu_to_le16(skb_transport_offset(skb) +
-				sizeof(struct udphdr));
-	}
-	*check = ~csum_tcpudp_magic(iph->saddr,
-				    iph->daddr, len, iph->protocol, 0);
-}
-
-static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct tx_ring_desc *tx_ring_desc;
-	struct ob_mac_iocb_req *mac_iocb_ptr;
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int tso;
-	struct tx_ring *tx_ring;
-	u32 tx_ring_idx = (u32) skb->queue_mapping;
-
-	tx_ring = &qdev->tx_ring[tx_ring_idx];
-
-	if (skb_padto(skb, ETH_ZLEN))
-		return NETDEV_TX_OK;
-
-	if (unlikely(atomic_read(&tx_ring->tx_count) < 2)) {
-		netif_info(qdev, tx_queued, qdev->ndev,
-			   "%s: BUG! shutting down tx queue %d due to lack of resources.\n",
-			   __func__, tx_ring_idx);
-		netif_stop_subqueue(ndev, tx_ring->wq_id);
-		tx_ring->tx_errors++;
-		return NETDEV_TX_BUSY;
-	}
-	tx_ring_desc = &tx_ring->q[tx_ring->prod_idx];
-	mac_iocb_ptr = tx_ring_desc->queue_entry;
-	memset((void *)mac_iocb_ptr, 0, sizeof(*mac_iocb_ptr));
-
-	mac_iocb_ptr->opcode = OPCODE_OB_MAC_IOCB;
-	mac_iocb_ptr->tid = tx_ring_desc->index;
-	/* We use the upper 32-bits to store the tx queue for this IO.
-	 * When we get the completion we can use it to establish the context.
-	 */
-	mac_iocb_ptr->txq_idx = tx_ring_idx;
-	tx_ring_desc->skb = skb;
-
-	mac_iocb_ptr->frame_len = cpu_to_le16((u16) skb->len);
-
-	if (skb_vlan_tag_present(skb)) {
-		netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
-			     "Adding a vlan tag %d.\n", skb_vlan_tag_get(skb));
-		mac_iocb_ptr->flags3 |= OB_MAC_IOCB_V;
-		mac_iocb_ptr->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
-	}
-	tso = ql_tso(skb, (struct ob_mac_tso_iocb_req *)mac_iocb_ptr);
-	if (tso < 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	} else if (unlikely(!tso) && (skb->ip_summed == CHECKSUM_PARTIAL)) {
-		ql_hw_csum_setup(skb,
-				 (struct ob_mac_tso_iocb_req *)mac_iocb_ptr);
-	}
-	if (ql_map_send(qdev, mac_iocb_ptr, skb, tx_ring_desc) !=
-			NETDEV_TX_OK) {
-		netif_err(qdev, tx_queued, qdev->ndev,
-			  "Could not map the segments.\n");
-		tx_ring->tx_errors++;
-		return NETDEV_TX_BUSY;
-	}
-	QL_DUMP_OB_MAC_IOCB(mac_iocb_ptr);
-	tx_ring->prod_idx++;
-	if (tx_ring->prod_idx == tx_ring->wq_len)
-		tx_ring->prod_idx = 0;
-	wmb();
-
-	ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
-	mmiowb();
-	netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
-		     "tx queued, slot %d, len %d\n",
-		     tx_ring->prod_idx, skb->len);
-
-	atomic_dec(&tx_ring->tx_count);
-
-	if (unlikely(atomic_read(&tx_ring->tx_count) < 2)) {
-		netif_stop_subqueue(ndev, tx_ring->wq_id);
-		if ((atomic_read(&tx_ring->tx_count) > (tx_ring->wq_len / 4)))
-			/*
-			 * The queue got stopped because the tx_ring was full.
-			 * Wake it up, because it's now at least 25% empty.
-			 */
-			netif_wake_subqueue(qdev->ndev, tx_ring->wq_id);
-	}
-	return NETDEV_TX_OK;
-}
-
-
-static void ql_free_shadow_space(struct ql_adapter *qdev)
-{
-	if (qdev->rx_ring_shadow_reg_area) {
-		pci_free_consistent(qdev->pdev,
-				    PAGE_SIZE,
-				    qdev->rx_ring_shadow_reg_area,
-				    qdev->rx_ring_shadow_reg_dma);
-		qdev->rx_ring_shadow_reg_area = NULL;
-	}
-	if (qdev->tx_ring_shadow_reg_area) {
-		pci_free_consistent(qdev->pdev,
-				    PAGE_SIZE,
-				    qdev->tx_ring_shadow_reg_area,
-				    qdev->tx_ring_shadow_reg_dma);
-		qdev->tx_ring_shadow_reg_area = NULL;
-	}
-}
-
-static int ql_alloc_shadow_space(struct ql_adapter *qdev)
-{
-	qdev->rx_ring_shadow_reg_area =
-		pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
-				      &qdev->rx_ring_shadow_reg_dma);
-	if (qdev->rx_ring_shadow_reg_area == NULL) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Allocation of RX shadow space failed.\n");
-		return -ENOMEM;
-	}
-
-	qdev->tx_ring_shadow_reg_area =
-		pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
-				      &qdev->tx_ring_shadow_reg_dma);
-	if (qdev->tx_ring_shadow_reg_area == NULL) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Allocation of TX shadow space failed.\n");
-		goto err_wqp_sh_area;
-	}
-	return 0;
-
-err_wqp_sh_area:
-	pci_free_consistent(qdev->pdev,
-			    PAGE_SIZE,
-			    qdev->rx_ring_shadow_reg_area,
-			    qdev->rx_ring_shadow_reg_dma);
-	return -ENOMEM;
-}
-
-static void ql_init_tx_ring(struct ql_adapter *qdev, struct tx_ring *tx_ring)
-{
-	struct tx_ring_desc *tx_ring_desc;
-	int i;
-	struct ob_mac_iocb_req *mac_iocb_ptr;
-
-	mac_iocb_ptr = tx_ring->wq_base;
-	tx_ring_desc = tx_ring->q;
-	for (i = 0; i < tx_ring->wq_len; i++) {
-		tx_ring_desc->index = i;
-		tx_ring_desc->skb = NULL;
-		tx_ring_desc->queue_entry = mac_iocb_ptr;
-		mac_iocb_ptr++;
-		tx_ring_desc++;
-	}
-	atomic_set(&tx_ring->tx_count, tx_ring->wq_len);
-}
-
-static void ql_free_tx_resources(struct ql_adapter *qdev,
-				 struct tx_ring *tx_ring)
-{
-	if (tx_ring->wq_base) {
-		pci_free_consistent(qdev->pdev, tx_ring->wq_size,
-				    tx_ring->wq_base, tx_ring->wq_base_dma);
-		tx_ring->wq_base = NULL;
-	}
-	kfree(tx_ring->q);
-	tx_ring->q = NULL;
-}
-
-static int ql_alloc_tx_resources(struct ql_adapter *qdev,
-				 struct tx_ring *tx_ring)
-{
-	tx_ring->wq_base =
-	    pci_alloc_consistent(qdev->pdev, tx_ring->wq_size,
-				 &tx_ring->wq_base_dma);
-
-	if ((tx_ring->wq_base == NULL) ||
-	    tx_ring->wq_base_dma & WQ_ADDR_ALIGN)
-		goto pci_alloc_err;
-
-	tx_ring->q =
-	    kmalloc_array(tx_ring->wq_len, sizeof(struct tx_ring_desc),
-			  GFP_KERNEL);
-	if (tx_ring->q == NULL)
-		goto err;
-
-	return 0;
-err:
-	pci_free_consistent(qdev->pdev, tx_ring->wq_size,
-			    tx_ring->wq_base, tx_ring->wq_base_dma);
-	tx_ring->wq_base = NULL;
-pci_alloc_err:
-	netif_err(qdev, ifup, qdev->ndev, "tx_ring alloc failed.\n");
-	return -ENOMEM;
-}
-
-static void ql_free_lbq_buffers(struct ql_adapter *qdev, struct rx_ring *rx_ring)
-{
-	struct bq_desc *lbq_desc;
-
-	uint32_t  curr_idx, clean_idx;
-
-	curr_idx = rx_ring->lbq_curr_idx;
-	clean_idx = rx_ring->lbq_clean_idx;
-	while (curr_idx != clean_idx) {
-		lbq_desc = &rx_ring->lbq[curr_idx];
-
-		if (lbq_desc->p.pg_chunk.last_flag) {
-			pci_unmap_page(qdev->pdev,
-				lbq_desc->p.pg_chunk.map,
-				ql_lbq_block_size(qdev),
-				       PCI_DMA_FROMDEVICE);
-			lbq_desc->p.pg_chunk.last_flag = 0;
-		}
-
-		put_page(lbq_desc->p.pg_chunk.page);
-		lbq_desc->p.pg_chunk.page = NULL;
-
-		if (++curr_idx == rx_ring->lbq_len)
-			curr_idx = 0;
-
-	}
-	if (rx_ring->pg_chunk.page) {
-		pci_unmap_page(qdev->pdev, rx_ring->pg_chunk.map,
-			ql_lbq_block_size(qdev), PCI_DMA_FROMDEVICE);
-		put_page(rx_ring->pg_chunk.page);
-		rx_ring->pg_chunk.page = NULL;
-	}
-}
-
-static void ql_free_sbq_buffers(struct ql_adapter *qdev, struct rx_ring *rx_ring)
-{
-	int i;
-	struct bq_desc *sbq_desc;
-
-	for (i = 0; i < rx_ring->sbq_len; i++) {
-		sbq_desc = &rx_ring->sbq[i];
-		if (sbq_desc == NULL) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "sbq_desc %d is NULL.\n", i);
-			return;
-		}
-		if (sbq_desc->p.skb) {
-			pci_unmap_single(qdev->pdev,
-					 dma_unmap_addr(sbq_desc, mapaddr),
-					 dma_unmap_len(sbq_desc, maplen),
-					 PCI_DMA_FROMDEVICE);
-			dev_kfree_skb(sbq_desc->p.skb);
-			sbq_desc->p.skb = NULL;
-		}
-	}
-}
-
-/* Free all large and small rx buffers associated
- * with the completion queues for this device.
- */
-static void ql_free_rx_buffers(struct ql_adapter *qdev)
-{
-	int i;
-	struct rx_ring *rx_ring;
-
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		rx_ring = &qdev->rx_ring[i];
-		if (rx_ring->lbq)
-			ql_free_lbq_buffers(qdev, rx_ring);
-		if (rx_ring->sbq)
-			ql_free_sbq_buffers(qdev, rx_ring);
-	}
-}
-
-static void ql_alloc_rx_buffers(struct ql_adapter *qdev)
-{
-	struct rx_ring *rx_ring;
-	int i;
-
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		rx_ring = &qdev->rx_ring[i];
-		if (rx_ring->type != TX_Q)
-			ql_update_buffer_queues(qdev, rx_ring);
-	}
-}
-
-static void ql_init_lbq_ring(struct ql_adapter *qdev,
-				struct rx_ring *rx_ring)
-{
-	int i;
-	struct bq_desc *lbq_desc;
-	__le64 *bq = rx_ring->lbq_base;
-
-	memset(rx_ring->lbq, 0, rx_ring->lbq_len * sizeof(struct bq_desc));
-	for (i = 0; i < rx_ring->lbq_len; i++) {
-		lbq_desc = &rx_ring->lbq[i];
-		memset(lbq_desc, 0, sizeof(*lbq_desc));
-		lbq_desc->index = i;
-		lbq_desc->addr = bq;
-		bq++;
-	}
-}
-
-static void ql_init_sbq_ring(struct ql_adapter *qdev,
-				struct rx_ring *rx_ring)
-{
-	int i;
-	struct bq_desc *sbq_desc;
-	__le64 *bq = rx_ring->sbq_base;
-
-	memset(rx_ring->sbq, 0, rx_ring->sbq_len * sizeof(struct bq_desc));
-	for (i = 0; i < rx_ring->sbq_len; i++) {
-		sbq_desc = &rx_ring->sbq[i];
-		memset(sbq_desc, 0, sizeof(*sbq_desc));
-		sbq_desc->index = i;
-		sbq_desc->addr = bq;
-		bq++;
-	}
-}
-
-static void ql_free_rx_resources(struct ql_adapter *qdev,
-				 struct rx_ring *rx_ring)
-{
-	/* Free the small buffer queue. */
-	if (rx_ring->sbq_base) {
-		pci_free_consistent(qdev->pdev,
-				    rx_ring->sbq_size,
-				    rx_ring->sbq_base, rx_ring->sbq_base_dma);
-		rx_ring->sbq_base = NULL;
-	}
-
-	/* Free the small buffer queue control blocks. */
-	kfree(rx_ring->sbq);
-	rx_ring->sbq = NULL;
-
-	/* Free the large buffer queue. */
-	if (rx_ring->lbq_base) {
-		pci_free_consistent(qdev->pdev,
-				    rx_ring->lbq_size,
-				    rx_ring->lbq_base, rx_ring->lbq_base_dma);
-		rx_ring->lbq_base = NULL;
-	}
-
-	/* Free the large buffer queue control blocks. */
-	kfree(rx_ring->lbq);
-	rx_ring->lbq = NULL;
-
-	/* Free the rx queue. */
-	if (rx_ring->cq_base) {
-		pci_free_consistent(qdev->pdev,
-				    rx_ring->cq_size,
-				    rx_ring->cq_base, rx_ring->cq_base_dma);
-		rx_ring->cq_base = NULL;
-	}
-}
-
-/* Allocate queues and buffers for this completions queue based
- * on the values in the parameter structure. */
-static int ql_alloc_rx_resources(struct ql_adapter *qdev,
-				 struct rx_ring *rx_ring)
-{
-
-	/*
-	 * Allocate the completion queue for this rx_ring.
-	 */
-	rx_ring->cq_base =
-	    pci_alloc_consistent(qdev->pdev, rx_ring->cq_size,
-				 &rx_ring->cq_base_dma);
-
-	if (rx_ring->cq_base == NULL) {
-		netif_err(qdev, ifup, qdev->ndev, "rx_ring alloc failed.\n");
-		return -ENOMEM;
-	}
-
-	if (rx_ring->sbq_len) {
-		/*
-		 * Allocate small buffer queue.
-		 */
-		rx_ring->sbq_base =
-		    pci_alloc_consistent(qdev->pdev, rx_ring->sbq_size,
-					 &rx_ring->sbq_base_dma);
-
-		if (rx_ring->sbq_base == NULL) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Small buffer queue allocation failed.\n");
-			goto err_mem;
-		}
-
-		/*
-		 * Allocate small buffer queue control blocks.
-		 */
-		rx_ring->sbq = kmalloc_array(rx_ring->sbq_len,
-					     sizeof(struct bq_desc),
-					     GFP_KERNEL);
-		if (rx_ring->sbq == NULL)
-			goto err_mem;
-
-		ql_init_sbq_ring(qdev, rx_ring);
-	}
-
-	if (rx_ring->lbq_len) {
-		/*
-		 * Allocate large buffer queue.
-		 */
-		rx_ring->lbq_base =
-		    pci_alloc_consistent(qdev->pdev, rx_ring->lbq_size,
-					 &rx_ring->lbq_base_dma);
-
-		if (rx_ring->lbq_base == NULL) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Large buffer queue allocation failed.\n");
-			goto err_mem;
-		}
-		/*
-		 * Allocate large buffer queue control blocks.
-		 */
-		rx_ring->lbq = kmalloc_array(rx_ring->lbq_len,
-					     sizeof(struct bq_desc),
-					     GFP_KERNEL);
-		if (rx_ring->lbq == NULL)
-			goto err_mem;
-
-		ql_init_lbq_ring(qdev, rx_ring);
-	}
-
-	return 0;
-
-err_mem:
-	ql_free_rx_resources(qdev, rx_ring);
-	return -ENOMEM;
-}
-
-static void ql_tx_ring_clean(struct ql_adapter *qdev)
-{
-	struct tx_ring *tx_ring;
-	struct tx_ring_desc *tx_ring_desc;
-	int i, j;
-
-	/*
-	 * Loop through all queues and free
-	 * any resources.
-	 */
-	for (j = 0; j < qdev->tx_ring_count; j++) {
-		tx_ring = &qdev->tx_ring[j];
-		for (i = 0; i < tx_ring->wq_len; i++) {
-			tx_ring_desc = &tx_ring->q[i];
-			if (tx_ring_desc && tx_ring_desc->skb) {
-				netif_err(qdev, ifdown, qdev->ndev,
-					  "Freeing lost SKB %p, from queue %d, index %d.\n",
-					  tx_ring_desc->skb, j,
-					  tx_ring_desc->index);
-				ql_unmap_send(qdev, tx_ring_desc,
-					      tx_ring_desc->map_cnt);
-				dev_kfree_skb(tx_ring_desc->skb);
-				tx_ring_desc->skb = NULL;
-			}
-		}
-	}
-}
-
-static void ql_free_mem_resources(struct ql_adapter *qdev)
-{
-	int i;
-
-	for (i = 0; i < qdev->tx_ring_count; i++)
-		ql_free_tx_resources(qdev, &qdev->tx_ring[i]);
-	for (i = 0; i < qdev->rx_ring_count; i++)
-		ql_free_rx_resources(qdev, &qdev->rx_ring[i]);
-	ql_free_shadow_space(qdev);
-}
-
-static int ql_alloc_mem_resources(struct ql_adapter *qdev)
-{
-	int i;
-
-	/* Allocate space for our shadow registers and such. */
-	if (ql_alloc_shadow_space(qdev))
-		return -ENOMEM;
-
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		if (ql_alloc_rx_resources(qdev, &qdev->rx_ring[i]) != 0) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "RX resource allocation failed.\n");
-			goto err_mem;
-		}
-	}
-	/* Allocate tx queue resources */
-	for (i = 0; i < qdev->tx_ring_count; i++) {
-		if (ql_alloc_tx_resources(qdev, &qdev->tx_ring[i]) != 0) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "TX resource allocation failed.\n");
-			goto err_mem;
-		}
-	}
-	return 0;
-
-err_mem:
-	ql_free_mem_resources(qdev);
-	return -ENOMEM;
-}
-
-/* Set up the rx ring control block and pass it to the chip.
- * The control block is defined as
- * "Completion Queue Initialization Control Block", or cqicb.
- */
-static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
-{
-	struct cqicb *cqicb = &rx_ring->cqicb;
-	void *shadow_reg = qdev->rx_ring_shadow_reg_area +
-		(rx_ring->cq_id * RX_RING_SHADOW_SPACE);
-	u64 shadow_reg_dma = qdev->rx_ring_shadow_reg_dma +
-		(rx_ring->cq_id * RX_RING_SHADOW_SPACE);
-	void __iomem *doorbell_area =
-	    qdev->doorbell_area + (DB_PAGE_SIZE * (128 + rx_ring->cq_id));
-	int err = 0;
-	u16 bq_len;
-	u64 tmp;
-	__le64 *base_indirect_ptr;
-	int page_entries;
-
-	/* Set up the shadow registers for this ring. */
-	rx_ring->prod_idx_sh_reg = shadow_reg;
-	rx_ring->prod_idx_sh_reg_dma = shadow_reg_dma;
-	*rx_ring->prod_idx_sh_reg = 0;
-	shadow_reg += sizeof(u64);
-	shadow_reg_dma += sizeof(u64);
-	rx_ring->lbq_base_indirect = shadow_reg;
-	rx_ring->lbq_base_indirect_dma = shadow_reg_dma;
-	shadow_reg += (sizeof(u64) * MAX_DB_PAGES_PER_BQ(rx_ring->lbq_len));
-	shadow_reg_dma += (sizeof(u64) * MAX_DB_PAGES_PER_BQ(rx_ring->lbq_len));
-	rx_ring->sbq_base_indirect = shadow_reg;
-	rx_ring->sbq_base_indirect_dma = shadow_reg_dma;
-
-	/* PCI doorbell mem area + 0x00 for consumer index register */
-	rx_ring->cnsmr_idx_db_reg = (u32 __iomem *) doorbell_area;
-	rx_ring->cnsmr_idx = 0;
-	rx_ring->curr_entry = rx_ring->cq_base;
-
-	/* PCI doorbell mem area + 0x04 for valid register */
-	rx_ring->valid_db_reg = doorbell_area + 0x04;
-
-	/* PCI doorbell mem area + 0x18 for large buffer consumer */
-	rx_ring->lbq_prod_idx_db_reg = (u32 __iomem *) (doorbell_area + 0x18);
-
-	/* PCI doorbell mem area + 0x1c */
-	rx_ring->sbq_prod_idx_db_reg = (u32 __iomem *) (doorbell_area + 0x1c);
-
-	memset((void *)cqicb, 0, sizeof(struct cqicb));
-	cqicb->msix_vect = rx_ring->irq;
-
-	bq_len = (rx_ring->cq_len == 65536) ? 0 : (u16) rx_ring->cq_len;
-	cqicb->len = cpu_to_le16(bq_len | LEN_V | LEN_CPP_CONT);
-
-	cqicb->addr = cpu_to_le64(rx_ring->cq_base_dma);
-
-	cqicb->prod_idx_addr = cpu_to_le64(rx_ring->prod_idx_sh_reg_dma);
-
-	/*
-	 * Set up the control block load flags.
-	 */
-	cqicb->flags = FLAGS_LC |	/* Load queue base address */
-	    FLAGS_LV |		/* Load MSI-X vector */
-	    FLAGS_LI;		/* Load irq delay values */
-	if (rx_ring->lbq_len) {
-		cqicb->flags |= FLAGS_LL;	/* Load lbq values */
-		tmp = (u64)rx_ring->lbq_base_dma;
-		base_indirect_ptr = rx_ring->lbq_base_indirect;
-		page_entries = 0;
-		do {
-			*base_indirect_ptr = cpu_to_le64(tmp);
-			tmp += DB_PAGE_SIZE;
-			base_indirect_ptr++;
-			page_entries++;
-		} while (page_entries < MAX_DB_PAGES_PER_BQ(rx_ring->lbq_len));
-		cqicb->lbq_addr =
-		    cpu_to_le64(rx_ring->lbq_base_indirect_dma);
-		bq_len = (rx_ring->lbq_buf_size == 65536) ? 0 :
-			(u16) rx_ring->lbq_buf_size;
-		cqicb->lbq_buf_size = cpu_to_le16(bq_len);
-		bq_len = (rx_ring->lbq_len == 65536) ? 0 :
-			(u16) rx_ring->lbq_len;
-		cqicb->lbq_len = cpu_to_le16(bq_len);
-		rx_ring->lbq_prod_idx = 0;
-		rx_ring->lbq_curr_idx = 0;
-		rx_ring->lbq_clean_idx = 0;
-		rx_ring->lbq_free_cnt = rx_ring->lbq_len;
-	}
-	if (rx_ring->sbq_len) {
-		cqicb->flags |= FLAGS_LS;	/* Load sbq values */
-		tmp = (u64)rx_ring->sbq_base_dma;
-		base_indirect_ptr = rx_ring->sbq_base_indirect;
-		page_entries = 0;
-		do {
-			*base_indirect_ptr = cpu_to_le64(tmp);
-			tmp += DB_PAGE_SIZE;
-			base_indirect_ptr++;
-			page_entries++;
-		} while (page_entries < MAX_DB_PAGES_PER_BQ(rx_ring->sbq_len));
-		cqicb->sbq_addr =
-		    cpu_to_le64(rx_ring->sbq_base_indirect_dma);
-		cqicb->sbq_buf_size =
-		    cpu_to_le16((u16)(rx_ring->sbq_buf_size));
-		bq_len = (rx_ring->sbq_len == 65536) ? 0 :
-			(u16) rx_ring->sbq_len;
-		cqicb->sbq_len = cpu_to_le16(bq_len);
-		rx_ring->sbq_prod_idx = 0;
-		rx_ring->sbq_curr_idx = 0;
-		rx_ring->sbq_clean_idx = 0;
-		rx_ring->sbq_free_cnt = rx_ring->sbq_len;
-	}
-	switch (rx_ring->type) {
-	case TX_Q:
-		cqicb->irq_delay = cpu_to_le16(qdev->tx_coalesce_usecs);
-		cqicb->pkt_delay = cpu_to_le16(qdev->tx_max_coalesced_frames);
-		break;
-	case RX_Q:
-		/* Inbound completion handling rx_rings run in
-		 * separate NAPI contexts.
-		 */
-		netif_napi_add(qdev->ndev, &rx_ring->napi, ql_napi_poll_msix,
-			       64);
-		cqicb->irq_delay = cpu_to_le16(qdev->rx_coalesce_usecs);
-		cqicb->pkt_delay = cpu_to_le16(qdev->rx_max_coalesced_frames);
-		break;
-	default:
-		netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-			     "Invalid rx_ring->type = %d.\n", rx_ring->type);
-	}
-	err = ql_write_cfg(qdev, cqicb, sizeof(struct cqicb),
-			   CFG_LCQ, rx_ring->cq_id);
-	if (err) {
-		netif_err(qdev, ifup, qdev->ndev, "Failed to load CQICB.\n");
-		return err;
-	}
-	return err;
-}
-
-static int ql_start_tx_ring(struct ql_adapter *qdev, struct tx_ring *tx_ring)
-{
-	struct wqicb *wqicb = (struct wqicb *)tx_ring;
-	void __iomem *doorbell_area =
-	    qdev->doorbell_area + (DB_PAGE_SIZE * tx_ring->wq_id);
-	void *shadow_reg = qdev->tx_ring_shadow_reg_area +
-	    (tx_ring->wq_id * sizeof(u64));
-	u64 shadow_reg_dma = qdev->tx_ring_shadow_reg_dma +
-	    (tx_ring->wq_id * sizeof(u64));
-	int err = 0;
-
-	/*
-	 * Assign doorbell registers for this tx_ring.
-	 */
-	/* TX PCI doorbell mem area for tx producer index */
-	tx_ring->prod_idx_db_reg = (u32 __iomem *) doorbell_area;
-	tx_ring->prod_idx = 0;
-	/* TX PCI doorbell mem area + 0x04 */
-	tx_ring->valid_db_reg = doorbell_area + 0x04;
-
-	/*
-	 * Assign shadow registers for this tx_ring.
-	 */
-	tx_ring->cnsmr_idx_sh_reg = shadow_reg;
-	tx_ring->cnsmr_idx_sh_reg_dma = shadow_reg_dma;
-
-	wqicb->len = cpu_to_le16(tx_ring->wq_len | Q_LEN_V | Q_LEN_CPP_CONT);
-	wqicb->flags = cpu_to_le16(Q_FLAGS_LC |
-				   Q_FLAGS_LB | Q_FLAGS_LI | Q_FLAGS_LO);
-	wqicb->cq_id_rss = cpu_to_le16(tx_ring->cq_id);
-	wqicb->rid = 0;
-	wqicb->addr = cpu_to_le64(tx_ring->wq_base_dma);
-
-	wqicb->cnsmr_idx_addr = cpu_to_le64(tx_ring->cnsmr_idx_sh_reg_dma);
-
-	ql_init_tx_ring(qdev, tx_ring);
-
-	err = ql_write_cfg(qdev, wqicb, sizeof(*wqicb), CFG_LRQ,
-			   (u16) tx_ring->wq_id);
-	if (err) {
-		netif_err(qdev, ifup, qdev->ndev, "Failed to load tx_ring.\n");
-		return err;
-	}
-	return err;
-}
-
-static void ql_disable_msix(struct ql_adapter *qdev)
-{
-	if (test_bit(QL_MSIX_ENABLED, &qdev->flags)) {
-		pci_disable_msix(qdev->pdev);
-		clear_bit(QL_MSIX_ENABLED, &qdev->flags);
-		kfree(qdev->msi_x_entry);
-		qdev->msi_x_entry = NULL;
-	} else if (test_bit(QL_MSI_ENABLED, &qdev->flags)) {
-		pci_disable_msi(qdev->pdev);
-		clear_bit(QL_MSI_ENABLED, &qdev->flags);
-	}
-}
-
-/* We start by trying to get the number of vectors
- * stored in qdev->intr_count. If we don't get that
- * many then we reduce the count and try again.
- */
-static void ql_enable_msix(struct ql_adapter *qdev)
-{
-	int i, err;
-
-	/* Get the MSIX vectors. */
-	if (qlge_irq_type == MSIX_IRQ) {
-		/* Try to alloc space for the msix struct,
-		 * if it fails then go to MSI/legacy.
-		 */
-		qdev->msi_x_entry = kcalloc(qdev->intr_count,
-					    sizeof(struct msix_entry),
-					    GFP_KERNEL);
-		if (!qdev->msi_x_entry) {
-			qlge_irq_type = MSI_IRQ;
-			goto msi;
-		}
-
-		for (i = 0; i < qdev->intr_count; i++)
-			qdev->msi_x_entry[i].entry = i;
-
-		err = pci_enable_msix_range(qdev->pdev, qdev->msi_x_entry,
-					    1, qdev->intr_count);
-		if (err < 0) {
-			kfree(qdev->msi_x_entry);
-			qdev->msi_x_entry = NULL;
-			netif_warn(qdev, ifup, qdev->ndev,
-				   "MSI-X Enable failed, trying MSI.\n");
-			qlge_irq_type = MSI_IRQ;
-		} else {
-			qdev->intr_count = err;
-			set_bit(QL_MSIX_ENABLED, &qdev->flags);
-			netif_info(qdev, ifup, qdev->ndev,
-				   "MSI-X Enabled, got %d vectors.\n",
-				   qdev->intr_count);
-			return;
-		}
-	}
-msi:
-	qdev->intr_count = 1;
-	if (qlge_irq_type == MSI_IRQ) {
-		if (!pci_enable_msi(qdev->pdev)) {
-			set_bit(QL_MSI_ENABLED, &qdev->flags);
-			netif_info(qdev, ifup, qdev->ndev,
-				   "Running with MSI interrupts.\n");
-			return;
-		}
-	}
-	qlge_irq_type = LEG_IRQ;
-	netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-		     "Running with legacy interrupts.\n");
-}
-
-/* Each vector services 1 RSS ring and and 1 or more
- * TX completion rings.  This function loops through
- * the TX completion rings and assigns the vector that
- * will service it.  An example would be if there are
- * 2 vectors (so 2 RSS rings) and 8 TX completion rings.
- * This would mean that vector 0 would service RSS ring 0
- * and TX completion rings 0,1,2 and 3.  Vector 1 would
- * service RSS ring 1 and TX completion rings 4,5,6 and 7.
- */
-static void ql_set_tx_vect(struct ql_adapter *qdev)
-{
-	int i, j, vect;
-	u32 tx_rings_per_vector = qdev->tx_ring_count / qdev->intr_count;
-
-	if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
-		/* Assign irq vectors to TX rx_rings.*/
-		for (vect = 0, j = 0, i = qdev->rss_ring_count;
-					 i < qdev->rx_ring_count; i++) {
-			if (j == tx_rings_per_vector) {
-				vect++;
-				j = 0;
-			}
-			qdev->rx_ring[i].irq = vect;
-			j++;
-		}
-	} else {
-		/* For single vector all rings have an irq
-		 * of zero.
-		 */
-		for (i = 0; i < qdev->rx_ring_count; i++)
-			qdev->rx_ring[i].irq = 0;
-	}
-}
-
-/* Set the interrupt mask for this vector.  Each vector
- * will service 1 RSS ring and 1 or more TX completion
- * rings.  This function sets up a bit mask per vector
- * that indicates which rings it services.
- */
-static void ql_set_irq_mask(struct ql_adapter *qdev, struct intr_context *ctx)
-{
-	int j, vect = ctx->intr;
-	u32 tx_rings_per_vector = qdev->tx_ring_count / qdev->intr_count;
-
-	if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
-		/* Add the RSS ring serviced by this vector
-		 * to the mask.
-		 */
-		ctx->irq_mask = (1 << qdev->rx_ring[vect].cq_id);
-		/* Add the TX ring(s) serviced by this vector
-		 * to the mask. */
-		for (j = 0; j < tx_rings_per_vector; j++) {
-			ctx->irq_mask |=
-			(1 << qdev->rx_ring[qdev->rss_ring_count +
-			(vect * tx_rings_per_vector) + j].cq_id);
-		}
-	} else {
-		/* For single vector we just shift each queue's
-		 * ID into the mask.
-		 */
-		for (j = 0; j < qdev->rx_ring_count; j++)
-			ctx->irq_mask |= (1 << qdev->rx_ring[j].cq_id);
-	}
-}
-
-/*
- * Here we build the intr_context structures based on
- * our rx_ring count and intr vector count.
- * The intr_context structure is used to hook each vector
- * to possibly different handlers.
- */
-static void ql_resolve_queues_to_irqs(struct ql_adapter *qdev)
-{
-	int i = 0;
-	struct intr_context *intr_context = &qdev->intr_context[0];
-
-	if (likely(test_bit(QL_MSIX_ENABLED, &qdev->flags))) {
-		/* Each rx_ring has it's
-		 * own intr_context since we have separate
-		 * vectors for each queue.
-		 */
-		for (i = 0; i < qdev->intr_count; i++, intr_context++) {
-			qdev->rx_ring[i].irq = i;
-			intr_context->intr = i;
-			intr_context->qdev = qdev;
-			/* Set up this vector's bit-mask that indicates
-			 * which queues it services.
-			 */
-			ql_set_irq_mask(qdev, intr_context);
-			/*
-			 * We set up each vectors enable/disable/read bits so
-			 * there's no bit/mask calculations in the critical path.
-			 */
-			intr_context->intr_en_mask =
-			    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK |
-			    INTR_EN_TYPE_ENABLE | INTR_EN_IHD_MASK | INTR_EN_IHD
-			    | i;
-			intr_context->intr_dis_mask =
-			    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK |
-			    INTR_EN_TYPE_DISABLE | INTR_EN_IHD_MASK |
-			    INTR_EN_IHD | i;
-			intr_context->intr_read_mask =
-			    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK |
-			    INTR_EN_TYPE_READ | INTR_EN_IHD_MASK | INTR_EN_IHD |
-			    i;
-			if (i == 0) {
-				/* The first vector/queue handles
-				 * broadcast/multicast, fatal errors,
-				 * and firmware events.  This in addition
-				 * to normal inbound NAPI processing.
-				 */
-				intr_context->handler = qlge_isr;
-				sprintf(intr_context->name, "%s-rx-%d",
-					qdev->ndev->name, i);
-			} else {
-				/*
-				 * Inbound queues handle unicast frames only.
-				 */
-				intr_context->handler = qlge_msix_rx_isr;
-				sprintf(intr_context->name, "%s-rx-%d",
-					qdev->ndev->name, i);
-			}
-		}
-	} else {
-		/*
-		 * All rx_rings use the same intr_context since
-		 * there is only one vector.
-		 */
-		intr_context->intr = 0;
-		intr_context->qdev = qdev;
-		/*
-		 * We set up each vectors enable/disable/read bits so
-		 * there's no bit/mask calculations in the critical path.
-		 */
-		intr_context->intr_en_mask =
-		    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK | INTR_EN_TYPE_ENABLE;
-		intr_context->intr_dis_mask =
-		    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK |
-		    INTR_EN_TYPE_DISABLE;
-		intr_context->intr_read_mask =
-		    INTR_EN_TYPE_MASK | INTR_EN_INTR_MASK | INTR_EN_TYPE_READ;
-		/*
-		 * Single interrupt means one handler for all rings.
-		 */
-		intr_context->handler = qlge_isr;
-		sprintf(intr_context->name, "%s-single_irq", qdev->ndev->name);
-		/* Set up this vector's bit-mask that indicates
-		 * which queues it services. In this case there is
-		 * a single vector so it will service all RSS and
-		 * TX completion rings.
-		 */
-		ql_set_irq_mask(qdev, intr_context);
-	}
-	/* Tell the TX completion rings which MSIx vector
-	 * they will be using.
-	 */
-	ql_set_tx_vect(qdev);
-}
-
-static void ql_free_irq(struct ql_adapter *qdev)
-{
-	int i;
-	struct intr_context *intr_context = &qdev->intr_context[0];
-
-	for (i = 0; i < qdev->intr_count; i++, intr_context++) {
-		if (intr_context->hooked) {
-			if (test_bit(QL_MSIX_ENABLED, &qdev->flags)) {
-				free_irq(qdev->msi_x_entry[i].vector,
-					 &qdev->rx_ring[i]);
-			} else {
-				free_irq(qdev->pdev->irq, &qdev->rx_ring[0]);
-			}
-		}
-	}
-	ql_disable_msix(qdev);
-}
-
-static int ql_request_irq(struct ql_adapter *qdev)
-{
-	int i;
-	int status = 0;
-	struct pci_dev *pdev = qdev->pdev;
-	struct intr_context *intr_context = &qdev->intr_context[0];
-
-	ql_resolve_queues_to_irqs(qdev);
-
-	for (i = 0; i < qdev->intr_count; i++, intr_context++) {
-		atomic_set(&intr_context->irq_cnt, 0);
-		if (test_bit(QL_MSIX_ENABLED, &qdev->flags)) {
-			status = request_irq(qdev->msi_x_entry[i].vector,
-					     intr_context->handler,
-					     0,
-					     intr_context->name,
-					     &qdev->rx_ring[i]);
-			if (status) {
-				netif_err(qdev, ifup, qdev->ndev,
-					  "Failed request for MSIX interrupt %d.\n",
-					  i);
-				goto err_irq;
-			}
-		} else {
-			netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-				     "trying msi or legacy interrupts.\n");
-			netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-				     "%s: irq = %d.\n", __func__, pdev->irq);
-			netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-				     "%s: context->name = %s.\n", __func__,
-				     intr_context->name);
-			netif_printk(qdev, ifup, KERN_DEBUG, qdev->ndev,
-				     "%s: dev_id = 0x%p.\n", __func__,
-				     &qdev->rx_ring[0]);
-			status =
-			    request_irq(pdev->irq, qlge_isr,
-					test_bit(QL_MSI_ENABLED,
-						 &qdev->
-						 flags) ? 0 : IRQF_SHARED,
-					intr_context->name, &qdev->rx_ring[0]);
-			if (status)
-				goto err_irq;
-
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Hooked intr %d, queue type %s, with name %s.\n",
-				  i,
-				  qdev->rx_ring[0].type == DEFAULT_Q ?
-				  "DEFAULT_Q" :
-				  qdev->rx_ring[0].type == TX_Q ? "TX_Q" :
-				  qdev->rx_ring[0].type == RX_Q ? "RX_Q" : "",
-				  intr_context->name);
-		}
-		intr_context->hooked = 1;
-	}
-	return status;
-err_irq:
-	netif_err(qdev, ifup, qdev->ndev, "Failed to get the interrupts!!!\n");
-	ql_free_irq(qdev);
-	return status;
-}
-
-static int ql_start_rss(struct ql_adapter *qdev)
-{
-	static const u8 init_hash_seed[] = {
-		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
-		0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
-		0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
-		0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
-		0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
-	};
-	struct ricb *ricb = &qdev->ricb;
-	int status = 0;
-	int i;
-	u8 *hash_id = (u8 *) ricb->hash_cq_id;
-
-	memset((void *)ricb, 0, sizeof(*ricb));
-
-	ricb->base_cq = RSS_L4K;
-	ricb->flags =
-		(RSS_L6K | RSS_LI | RSS_LB | RSS_LM | RSS_RT4 | RSS_RT6);
-	ricb->mask = cpu_to_le16((u16)(0x3ff));
-
-	/*
-	 * Fill out the Indirection Table.
-	 */
-	for (i = 0; i < 1024; i++)
-		hash_id[i] = (i & (qdev->rss_ring_count - 1));
-
-	memcpy((void *)&ricb->ipv6_hash_key[0], init_hash_seed, 40);
-	memcpy((void *)&ricb->ipv4_hash_key[0], init_hash_seed, 16);
-
-	status = ql_write_cfg(qdev, ricb, sizeof(*ricb), CFG_LR, 0);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Failed to load RICB.\n");
-		return status;
-	}
-	return status;
-}
-
-static int ql_clear_routing_entries(struct ql_adapter *qdev)
-{
-	int i, status = 0;
-
-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (status)
-		return status;
-	/* Clear all the entries in the routing table. */
-	for (i = 0; i < 16; i++) {
-		status = ql_set_routing_reg(qdev, i, 0, 0);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to init routing register for CAM packets.\n");
-			break;
-		}
-	}
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-	return status;
-}
-
-/* Initialize the frame-to-queue routing. */
-static int ql_route_initialize(struct ql_adapter *qdev)
-{
-	int status = 0;
-
-	/* Clear all the entries in the routing table. */
-	status = ql_clear_routing_entries(qdev);
-	if (status)
-		return status;
-
-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (status)
-		return status;
-
-	status = ql_set_routing_reg(qdev, RT_IDX_IP_CSUM_ERR_SLOT,
-						RT_IDX_IP_CSUM_ERR, 1);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			"Failed to init routing register "
-			"for IP CSUM error packets.\n");
-		goto exit;
-	}
-	status = ql_set_routing_reg(qdev, RT_IDX_TCP_UDP_CSUM_ERR_SLOT,
-						RT_IDX_TU_CSUM_ERR, 1);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			"Failed to init routing register "
-			"for TCP/UDP CSUM error packets.\n");
-		goto exit;
-	}
-	status = ql_set_routing_reg(qdev, RT_IDX_BCAST_SLOT, RT_IDX_BCAST, 1);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to init routing register for broadcast packets.\n");
-		goto exit;
-	}
-	/* If we have more than one inbound queue, then turn on RSS in the
-	 * routing block.
-	 */
-	if (qdev->rss_ring_count > 1) {
-		status = ql_set_routing_reg(qdev, RT_IDX_RSS_MATCH_SLOT,
-					RT_IDX_RSS_MATCH, 1);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to init routing register for MATCH RSS packets.\n");
-			goto exit;
-		}
-	}
-
-	status = ql_set_routing_reg(qdev, RT_IDX_CAM_HIT_SLOT,
-				    RT_IDX_CAM_HIT, 1);
-	if (status)
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to init routing register for CAM packets.\n");
-exit:
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-	return status;
-}
-
-int ql_cam_route_initialize(struct ql_adapter *qdev)
-{
-	int status, set;
-
-	/* If check if the link is up and use to
-	 * determine if we are setting or clearing
-	 * the MAC address in the CAM.
-	 */
-	set = ql_read32(qdev, STS);
-	set &= qdev->port_link_up;
-	status = ql_set_mac_addr(qdev, set);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Failed to init mac address.\n");
-		return status;
-	}
-
-	status = ql_route_initialize(qdev);
-	if (status)
-		netif_err(qdev, ifup, qdev->ndev, "Failed to init routing table.\n");
-
-	return status;
-}
-
-static int ql_adapter_initialize(struct ql_adapter *qdev)
-{
-	u32 value, mask;
-	int i;
-	int status = 0;
-
-	/*
-	 * Set up the System register to halt on errors.
-	 */
-	value = SYS_EFE | SYS_FAE;
-	mask = value << 16;
-	ql_write32(qdev, SYS, mask | value);
-
-	/* Set the default queue, and VLAN behavior. */
-	value = NIC_RCV_CFG_DFQ;
-	mask = NIC_RCV_CFG_DFQ_MASK;
-	if (qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
-		value |= NIC_RCV_CFG_RV;
-		mask |= (NIC_RCV_CFG_RV << 16);
-	}
-	ql_write32(qdev, NIC_RCV_CFG, (mask | value));
-
-	/* Set the MPI interrupt to enabled. */
-	ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16) | INTR_MASK_PI);
-
-	/* Enable the function, set pagesize, enable error checking. */
-	value = FSC_FE | FSC_EPC_INBOUND | FSC_EPC_OUTBOUND |
-	    FSC_EC | FSC_VM_PAGE_4K;
-	value |= SPLT_SETTING;
-
-	/* Set/clear header splitting. */
-	mask = FSC_VM_PAGESIZE_MASK |
-	    FSC_DBL_MASK | FSC_DBRST_MASK | (value << 16);
-	ql_write32(qdev, FSC, mask | value);
-
-	ql_write32(qdev, SPLT_HDR, SPLT_LEN);
-
-	/* Set RX packet routing to use port/pci function on which the
-	 * packet arrived on in addition to usual frame routing.
-	 * This is helpful on bonding where both interfaces can have
-	 * the same MAC address.
-	 */
-	ql_write32(qdev, RST_FO, RST_FO_RR_MASK | RST_FO_RR_RCV_FUNC_CQ);
-	/* Reroute all packets to our Interface.
-	 * They may have been routed to MPI firmware
-	 * due to WOL.
-	 */
-	value = ql_read32(qdev, MGMT_RCV_CFG);
-	value &= ~MGMT_RCV_CFG_RM;
-	mask = 0xffff0000;
-
-	/* Sticky reg needs clearing due to WOL. */
-	ql_write32(qdev, MGMT_RCV_CFG, mask);
-	ql_write32(qdev, MGMT_RCV_CFG, mask | value);
-
-	/* Default WOL is enable on Mezz cards */
-	if (qdev->pdev->subsystem_device == 0x0068 ||
-			qdev->pdev->subsystem_device == 0x0180)
-		qdev->wol = WAKE_MAGIC;
-
-	/* Start up the rx queues. */
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		status = ql_start_rx_ring(qdev, &qdev->rx_ring[i]);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to start rx ring[%d].\n", i);
-			return status;
-		}
-	}
-
-	/* If there is more than one inbound completion queue
-	 * then download a RICB to configure RSS.
-	 */
-	if (qdev->rss_ring_count > 1) {
-		status = ql_start_rss(qdev);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev, "Failed to start RSS.\n");
-			return status;
-		}
-	}
-
-	/* Start up the tx queues. */
-	for (i = 0; i < qdev->tx_ring_count; i++) {
-		status = ql_start_tx_ring(qdev, &qdev->tx_ring[i]);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to start tx ring[%d].\n", i);
-			return status;
-		}
-	}
-
-	/* Initialize the port and set the max framesize. */
-	status = qdev->nic_ops->port_initialize(qdev);
-	if (status)
-		netif_err(qdev, ifup, qdev->ndev, "Failed to start port.\n");
-
-	/* Set up the MAC address and frame routing filter. */
-	status = ql_cam_route_initialize(qdev);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Failed to init CAM/Routing tables.\n");
-		return status;
-	}
-
-	/* Start NAPI for the RSS queues. */
-	for (i = 0; i < qdev->rss_ring_count; i++)
-		napi_enable(&qdev->rx_ring[i].napi);
-
-	return status;
-}
-
-/* Issue soft reset to chip. */
-static int ql_adapter_reset(struct ql_adapter *qdev)
-{
-	u32 value;
-	int status = 0;
-	unsigned long end_jiffies;
-
-	/* Clear all the entries in the routing table. */
-	status = ql_clear_routing_entries(qdev);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev, "Failed to clear routing bits.\n");
-		return status;
-	}
-
-	/* Check if bit is set then skip the mailbox command and
-	 * clear the bit, else we are in normal reset process.
-	 */
-	if (!test_bit(QL_ASIC_RECOVERY, &qdev->flags)) {
-		/* Stop management traffic. */
-		ql_mb_set_mgmnt_traffic_ctl(qdev, MB_SET_MPI_TFK_STOP);
-
-		/* Wait for the NIC and MGMNT FIFOs to empty. */
-		ql_wait_fifo_empty(qdev);
-	} else
-		clear_bit(QL_ASIC_RECOVERY, &qdev->flags);
-
-	ql_write32(qdev, RST_FO, (RST_FO_FR << 16) | RST_FO_FR);
-
-	end_jiffies = jiffies + usecs_to_jiffies(30);
-	do {
-		value = ql_read32(qdev, RST_FO);
-		if ((value & RST_FO_FR) == 0)
-			break;
-		cpu_relax();
-	} while (time_before(jiffies, end_jiffies));
-
-	if (value & RST_FO_FR) {
-		netif_err(qdev, ifdown, qdev->ndev,
-			  "ETIMEDOUT!!! errored out of resetting the chip!\n");
-		status = -ETIMEDOUT;
-	}
-
-	/* Resume management traffic. */
-	ql_mb_set_mgmnt_traffic_ctl(qdev, MB_SET_MPI_TFK_RESUME);
-	return status;
-}
-
-static void ql_display_dev_info(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	netif_info(qdev, probe, qdev->ndev,
-		   "Function #%d, Port %d, NIC Roll %d, NIC Rev = %d, "
-		   "XG Roll = %d, XG Rev = %d.\n",
-		   qdev->func,
-		   qdev->port,
-		   qdev->chip_rev_id & 0x0000000f,
-		   qdev->chip_rev_id >> 4 & 0x0000000f,
-		   qdev->chip_rev_id >> 8 & 0x0000000f,
-		   qdev->chip_rev_id >> 12 & 0x0000000f);
-	netif_info(qdev, probe, qdev->ndev,
-		   "MAC address %pM\n", ndev->dev_addr);
-}
-
-static int ql_wol(struct ql_adapter *qdev)
-{
-	int status = 0;
-	u32 wol = MB_WOL_DISABLE;
-
-	/* The CAM is still intact after a reset, but if we
-	 * are doing WOL, then we may need to program the
-	 * routing regs. We would also need to issue the mailbox
-	 * commands to instruct the MPI what to do per the ethtool
-	 * settings.
-	 */
-
-	if (qdev->wol & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_PHY | WAKE_UCAST |
-			WAKE_MCAST | WAKE_BCAST)) {
-		netif_err(qdev, ifdown, qdev->ndev,
-			  "Unsupported WOL parameter. qdev->wol = 0x%x.\n",
-			  qdev->wol);
-		return -EINVAL;
-	}
-
-	if (qdev->wol & WAKE_MAGIC) {
-		status = ql_mb_wol_set_magic(qdev, 1);
-		if (status) {
-			netif_err(qdev, ifdown, qdev->ndev,
-				  "Failed to set magic packet on %s.\n",
-				  qdev->ndev->name);
-			return status;
-		} else
-			netif_info(qdev, drv, qdev->ndev,
-				   "Enabled magic packet successfully on %s.\n",
-				   qdev->ndev->name);
-
-		wol |= MB_WOL_MAGIC_PKT;
-	}
-
-	if (qdev->wol) {
-		wol |= MB_WOL_MODE_ON;
-		status = ql_mb_wol_mode(qdev, wol);
-		netif_err(qdev, drv, qdev->ndev,
-			  "WOL %s (wol code 0x%x) on %s\n",
-			  (status == 0) ? "Successfully set" : "Failed",
-			  wol, qdev->ndev->name);
-	}
-
-	return status;
-}
-
-static void ql_cancel_all_work_sync(struct ql_adapter *qdev)
-{
-
-	/* Don't kill the reset worker thread if we
-	 * are in the process of recovery.
-	 */
-	if (test_bit(QL_ADAPTER_UP, &qdev->flags))
-		cancel_delayed_work_sync(&qdev->asic_reset_work);
-	cancel_delayed_work_sync(&qdev->mpi_reset_work);
-	cancel_delayed_work_sync(&qdev->mpi_work);
-	cancel_delayed_work_sync(&qdev->mpi_idc_work);
-	cancel_delayed_work_sync(&qdev->mpi_core_to_log);
-	cancel_delayed_work_sync(&qdev->mpi_port_cfg_work);
-}
-
-static int ql_adapter_down(struct ql_adapter *qdev)
-{
-	int i, status = 0;
-
-	ql_link_off(qdev);
-
-	ql_cancel_all_work_sync(qdev);
-
-	for (i = 0; i < qdev->rss_ring_count; i++)
-		napi_disable(&qdev->rx_ring[i].napi);
-
-	clear_bit(QL_ADAPTER_UP, &qdev->flags);
-
-	ql_disable_interrupts(qdev);
-
-	ql_tx_ring_clean(qdev);
-
-	/* Call netif_napi_del() from common point.
-	 */
-	for (i = 0; i < qdev->rss_ring_count; i++)
-		netif_napi_del(&qdev->rx_ring[i].napi);
-
-	status = ql_adapter_reset(qdev);
-	if (status)
-		netif_err(qdev, ifdown, qdev->ndev, "reset(func #%d) FAILED!\n",
-			  qdev->func);
-	ql_free_rx_buffers(qdev);
-
-	return status;
-}
-
-static int ql_adapter_up(struct ql_adapter *qdev)
-{
-	int err = 0;
-
-	err = ql_adapter_initialize(qdev);
-	if (err) {
-		netif_info(qdev, ifup, qdev->ndev, "Unable to initialize adapter.\n");
-		goto err_init;
-	}
-	set_bit(QL_ADAPTER_UP, &qdev->flags);
-	ql_alloc_rx_buffers(qdev);
-	/* If the port is initialized and the
-	 * link is up the turn on the carrier.
-	 */
-	if ((ql_read32(qdev, STS) & qdev->port_init) &&
-			(ql_read32(qdev, STS) & qdev->port_link_up))
-		ql_link_on(qdev);
-	/* Restore rx mode. */
-	clear_bit(QL_ALLMULTI, &qdev->flags);
-	clear_bit(QL_PROMISCUOUS, &qdev->flags);
-	qlge_set_multicast_list(qdev->ndev);
-
-	/* Restore vlan setting. */
-	qlge_restore_vlan(qdev);
-
-	ql_enable_interrupts(qdev);
-	ql_enable_all_completion_interrupts(qdev);
-	netif_tx_start_all_queues(qdev->ndev);
-
-	return 0;
-err_init:
-	ql_adapter_reset(qdev);
-	return err;
-}
-
-static void ql_release_adapter_resources(struct ql_adapter *qdev)
-{
-	ql_free_mem_resources(qdev);
-	ql_free_irq(qdev);
-}
-
-static int ql_get_adapter_resources(struct ql_adapter *qdev)
-{
-	int status = 0;
-
-	if (ql_alloc_mem_resources(qdev)) {
-		netif_err(qdev, ifup, qdev->ndev, "Unable to  allocate memory.\n");
-		return -ENOMEM;
-	}
-	status = ql_request_irq(qdev);
-	return status;
-}
-
-static int qlge_close(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	/* If we hit pci_channel_io_perm_failure
-	 * failure condition, then we already
-	 * brought the adapter down.
-	 */
-	if (test_bit(QL_EEH_FATAL, &qdev->flags)) {
-		netif_err(qdev, drv, qdev->ndev, "EEH fatal did unload.\n");
-		clear_bit(QL_EEH_FATAL, &qdev->flags);
-		return 0;
-	}
-
-	/*
-	 * Wait for device to recover from a reset.
-	 * (Rarely happens, but possible.)
-	 */
-	while (!test_bit(QL_ADAPTER_UP, &qdev->flags))
-		msleep(1);
-	ql_adapter_down(qdev);
-	ql_release_adapter_resources(qdev);
-	return 0;
-}
-
-static int ql_configure_rings(struct ql_adapter *qdev)
-{
-	int i;
-	struct rx_ring *rx_ring;
-	struct tx_ring *tx_ring;
-	int cpu_cnt = min(MAX_CPUS, (int)num_online_cpus());
-	unsigned int lbq_buf_len = (qdev->ndev->mtu > 1500) ?
-		LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE;
-
-	qdev->lbq_buf_order = get_order(lbq_buf_len);
-
-	/* In a perfect world we have one RSS ring for each CPU
-	 * and each has it's own vector.  To do that we ask for
-	 * cpu_cnt vectors.  ql_enable_msix() will adjust the
-	 * vector count to what we actually get.  We then
-	 * allocate an RSS ring for each.
-	 * Essentially, we are doing min(cpu_count, msix_vector_count).
-	 */
-	qdev->intr_count = cpu_cnt;
-	ql_enable_msix(qdev);
-	/* Adjust the RSS ring count to the actual vector count. */
-	qdev->rss_ring_count = qdev->intr_count;
-	qdev->tx_ring_count = cpu_cnt;
-	qdev->rx_ring_count = qdev->tx_ring_count + qdev->rss_ring_count;
-
-	for (i = 0; i < qdev->tx_ring_count; i++) {
-		tx_ring = &qdev->tx_ring[i];
-		memset((void *)tx_ring, 0, sizeof(*tx_ring));
-		tx_ring->qdev = qdev;
-		tx_ring->wq_id = i;
-		tx_ring->wq_len = qdev->tx_ring_size;
-		tx_ring->wq_size =
-		    tx_ring->wq_len * sizeof(struct ob_mac_iocb_req);
-
-		/*
-		 * The completion queue ID for the tx rings start
-		 * immediately after the rss rings.
-		 */
-		tx_ring->cq_id = qdev->rss_ring_count + i;
-	}
-
-	for (i = 0; i < qdev->rx_ring_count; i++) {
-		rx_ring = &qdev->rx_ring[i];
-		memset((void *)rx_ring, 0, sizeof(*rx_ring));
-		rx_ring->qdev = qdev;
-		rx_ring->cq_id = i;
-		rx_ring->cpu = i % cpu_cnt;	/* CPU to run handler on. */
-		if (i < qdev->rss_ring_count) {
-			/*
-			 * Inbound (RSS) queues.
-			 */
-			rx_ring->cq_len = qdev->rx_ring_size;
-			rx_ring->cq_size =
-			    rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb);
-			rx_ring->lbq_len = NUM_LARGE_BUFFERS;
-			rx_ring->lbq_size =
-			    rx_ring->lbq_len * sizeof(__le64);
-			rx_ring->lbq_buf_size = (u16)lbq_buf_len;
-			rx_ring->sbq_len = NUM_SMALL_BUFFERS;
-			rx_ring->sbq_size =
-			    rx_ring->sbq_len * sizeof(__le64);
-			rx_ring->sbq_buf_size = SMALL_BUF_MAP_SIZE;
-			rx_ring->type = RX_Q;
-		} else {
-			/*
-			 * Outbound queue handles outbound completions only.
-			 */
-			/* outbound cq is same size as tx_ring it services. */
-			rx_ring->cq_len = qdev->tx_ring_size;
-			rx_ring->cq_size =
-			    rx_ring->cq_len * sizeof(struct ql_net_rsp_iocb);
-			rx_ring->lbq_len = 0;
-			rx_ring->lbq_size = 0;
-			rx_ring->lbq_buf_size = 0;
-			rx_ring->sbq_len = 0;
-			rx_ring->sbq_size = 0;
-			rx_ring->sbq_buf_size = 0;
-			rx_ring->type = TX_Q;
-		}
-	}
-	return 0;
-}
-
-static int qlge_open(struct net_device *ndev)
-{
-	int err = 0;
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	err = ql_adapter_reset(qdev);
-	if (err)
-		return err;
-
-	err = ql_configure_rings(qdev);
-	if (err)
-		return err;
-
-	err = ql_get_adapter_resources(qdev);
-	if (err)
-		goto error_up;
-
-	err = ql_adapter_up(qdev);
-	if (err)
-		goto error_up;
-
-	return err;
-
-error_up:
-	ql_release_adapter_resources(qdev);
-	return err;
-}
-
-static int ql_change_rx_buffers(struct ql_adapter *qdev)
-{
-	struct rx_ring *rx_ring;
-	int i, status;
-	u32 lbq_buf_len;
-
-	/* Wait for an outstanding reset to complete. */
-	if (!test_bit(QL_ADAPTER_UP, &qdev->flags)) {
-		int i = 4;
-
-		while (--i && !test_bit(QL_ADAPTER_UP, &qdev->flags)) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Waiting for adapter UP...\n");
-			ssleep(1);
-		}
-
-		if (!i) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Timed out waiting for adapter UP\n");
-			return -ETIMEDOUT;
-		}
-	}
-
-	status = ql_adapter_down(qdev);
-	if (status)
-		goto error;
-
-	/* Get the new rx buffer size. */
-	lbq_buf_len = (qdev->ndev->mtu > 1500) ?
-		LARGE_BUFFER_MAX_SIZE : LARGE_BUFFER_MIN_SIZE;
-	qdev->lbq_buf_order = get_order(lbq_buf_len);
-
-	for (i = 0; i < qdev->rss_ring_count; i++) {
-		rx_ring = &qdev->rx_ring[i];
-		/* Set the new size. */
-		rx_ring->lbq_buf_size = lbq_buf_len;
-	}
-
-	status = ql_adapter_up(qdev);
-	if (status)
-		goto error;
-
-	return status;
-error:
-	netif_alert(qdev, ifup, qdev->ndev,
-		    "Driver up/down cycle failed, closing device.\n");
-	set_bit(QL_ADAPTER_UP, &qdev->flags);
-	dev_close(qdev->ndev);
-	return status;
-}
-
-static int qlge_change_mtu(struct net_device *ndev, int new_mtu)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int status;
-
-	if (ndev->mtu == 1500 && new_mtu == 9000) {
-		netif_err(qdev, ifup, qdev->ndev, "Changing to jumbo MTU.\n");
-	} else if (ndev->mtu == 9000 && new_mtu == 1500) {
-		netif_err(qdev, ifup, qdev->ndev, "Changing to normal MTU.\n");
-	} else
-		return -EINVAL;
-
-	queue_delayed_work(qdev->workqueue,
-			&qdev->mpi_port_cfg_work, 3*HZ);
-
-	ndev->mtu = new_mtu;
-
-	if (!netif_running(qdev->ndev)) {
-		return 0;
-	}
-
-	status = ql_change_rx_buffers(qdev);
-	if (status) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Changing MTU failed.\n");
-	}
-
-	return status;
-}
-
-static struct net_device_stats *qlge_get_stats(struct net_device
-					       *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	struct rx_ring *rx_ring = &qdev->rx_ring[0];
-	struct tx_ring *tx_ring = &qdev->tx_ring[0];
-	unsigned long pkts, mcast, dropped, errors, bytes;
-	int i;
-
-	/* Get RX stats. */
-	pkts = mcast = dropped = errors = bytes = 0;
-	for (i = 0; i < qdev->rss_ring_count; i++, rx_ring++) {
-			pkts += rx_ring->rx_packets;
-			bytes += rx_ring->rx_bytes;
-			dropped += rx_ring->rx_dropped;
-			errors += rx_ring->rx_errors;
-			mcast += rx_ring->rx_multicast;
-	}
-	ndev->stats.rx_packets = pkts;
-	ndev->stats.rx_bytes = bytes;
-	ndev->stats.rx_dropped = dropped;
-	ndev->stats.rx_errors = errors;
-	ndev->stats.multicast = mcast;
-
-	/* Get TX stats. */
-	pkts = errors = bytes = 0;
-	for (i = 0; i < qdev->tx_ring_count; i++, tx_ring++) {
-			pkts += tx_ring->tx_packets;
-			bytes += tx_ring->tx_bytes;
-			errors += tx_ring->tx_errors;
-	}
-	ndev->stats.tx_packets = pkts;
-	ndev->stats.tx_bytes = bytes;
-	ndev->stats.tx_errors = errors;
-	return &ndev->stats;
-}
-
-static void qlge_set_multicast_list(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	struct netdev_hw_addr *ha;
-	int i, status;
-
-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
-	if (status)
-		return;
-	/*
-	 * Set or clear promiscuous mode if a
-	 * transition is taking place.
-	 */
-	if (ndev->flags & IFF_PROMISC) {
-		if (!test_bit(QL_PROMISCUOUS, &qdev->flags)) {
-			if (ql_set_routing_reg
-			    (qdev, RT_IDX_PROMISCUOUS_SLOT, RT_IDX_VALID, 1)) {
-				netif_err(qdev, hw, qdev->ndev,
-					  "Failed to set promiscuous mode.\n");
-			} else {
-				set_bit(QL_PROMISCUOUS, &qdev->flags);
-			}
-		}
-	} else {
-		if (test_bit(QL_PROMISCUOUS, &qdev->flags)) {
-			if (ql_set_routing_reg
-			    (qdev, RT_IDX_PROMISCUOUS_SLOT, RT_IDX_VALID, 0)) {
-				netif_err(qdev, hw, qdev->ndev,
-					  "Failed to clear promiscuous mode.\n");
-			} else {
-				clear_bit(QL_PROMISCUOUS, &qdev->flags);
-			}
-		}
-	}
-
-	/*
-	 * Set or clear all multicast mode if a
-	 * transition is taking place.
-	 */
-	if ((ndev->flags & IFF_ALLMULTI) ||
-	    (netdev_mc_count(ndev) > MAX_MULTICAST_ENTRIES)) {
-		if (!test_bit(QL_ALLMULTI, &qdev->flags)) {
-			if (ql_set_routing_reg
-			    (qdev, RT_IDX_ALLMULTI_SLOT, RT_IDX_MCAST, 1)) {
-				netif_err(qdev, hw, qdev->ndev,
-					  "Failed to set all-multi mode.\n");
-			} else {
-				set_bit(QL_ALLMULTI, &qdev->flags);
-			}
-		}
-	} else {
-		if (test_bit(QL_ALLMULTI, &qdev->flags)) {
-			if (ql_set_routing_reg
-			    (qdev, RT_IDX_ALLMULTI_SLOT, RT_IDX_MCAST, 0)) {
-				netif_err(qdev, hw, qdev->ndev,
-					  "Failed to clear all-multi mode.\n");
-			} else {
-				clear_bit(QL_ALLMULTI, &qdev->flags);
-			}
-		}
-	}
-
-	if (!netdev_mc_empty(ndev)) {
-		status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-		if (status)
-			goto exit;
-		i = 0;
-		netdev_for_each_mc_addr(ha, ndev) {
-			if (ql_set_mac_addr_reg(qdev, (u8 *) ha->addr,
-						MAC_ADDR_TYPE_MULTI_MAC, i)) {
-				netif_err(qdev, hw, qdev->ndev,
-					  "Failed to loadmulticast address.\n");
-				ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-				goto exit;
-			}
-			i++;
-		}
-		ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-		if (ql_set_routing_reg
-		    (qdev, RT_IDX_MCAST_MATCH_SLOT, RT_IDX_MCAST_MATCH, 1)) {
-			netif_err(qdev, hw, qdev->ndev,
-				  "Failed to set multicast match mode.\n");
-		} else {
-			set_bit(QL_ALLMULTI, &qdev->flags);
-		}
-	}
-exit:
-	ql_sem_unlock(qdev, SEM_RT_IDX_MASK);
-}
-
-static int qlge_set_mac_address(struct net_device *ndev, void *p)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	struct sockaddr *addr = p;
-	int status;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
-	/* Update local copy of current mac address. */
-	memcpy(qdev->current_mac_addr, ndev->dev_addr, ndev->addr_len);
-
-	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
-	if (status)
-		return status;
-	status = ql_set_mac_addr_reg(qdev, (u8 *) ndev->dev_addr,
-			MAC_ADDR_TYPE_CAM_MAC, qdev->func * MAX_CQ);
-	if (status)
-		netif_err(qdev, hw, qdev->ndev, "Failed to load MAC address.\n");
-	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
-	return status;
-}
-
-static void qlge_tx_timeout(struct net_device *ndev)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	ql_queue_asic_error(qdev);
-}
-
-static void ql_asic_reset_work(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-	    container_of(work, struct ql_adapter, asic_reset_work.work);
-	int status;
-	rtnl_lock();
-	status = ql_adapter_down(qdev);
-	if (status)
-		goto error;
-
-	status = ql_adapter_up(qdev);
-	if (status)
-		goto error;
-
-	/* Restore rx mode. */
-	clear_bit(QL_ALLMULTI, &qdev->flags);
-	clear_bit(QL_PROMISCUOUS, &qdev->flags);
-	qlge_set_multicast_list(qdev->ndev);
-
-	rtnl_unlock();
-	return;
-error:
-	netif_alert(qdev, ifup, qdev->ndev,
-		    "Driver up/down cycle failed, closing device\n");
-
-	set_bit(QL_ADAPTER_UP, &qdev->flags);
-	dev_close(qdev->ndev);
-	rtnl_unlock();
-}
-
-static const struct nic_operations qla8012_nic_ops = {
-	.get_flash		= ql_get_8012_flash_params,
-	.port_initialize	= ql_8012_port_initialize,
-};
-
-static const struct nic_operations qla8000_nic_ops = {
-	.get_flash		= ql_get_8000_flash_params,
-	.port_initialize	= ql_8000_port_initialize,
-};
-
-/* Find the pcie function number for the other NIC
- * on this chip.  Since both NIC functions share a
- * common firmware we have the lowest enabled function
- * do any common work.  Examples would be resetting
- * after a fatal firmware error, or doing a firmware
- * coredump.
- */
-static int ql_get_alt_pcie_func(struct ql_adapter *qdev)
-{
-	int status = 0;
-	u32 temp;
-	u32 nic_func1, nic_func2;
-
-	status = ql_read_mpi_reg(qdev, MPI_TEST_FUNC_PORT_CFG,
-			&temp);
-	if (status)
-		return status;
-
-	nic_func1 = ((temp >> MPI_TEST_NIC1_FUNC_SHIFT) &
-			MPI_TEST_NIC_FUNC_MASK);
-	nic_func2 = ((temp >> MPI_TEST_NIC2_FUNC_SHIFT) &
-			MPI_TEST_NIC_FUNC_MASK);
-
-	if (qdev->func == nic_func1)
-		qdev->alt_func = nic_func2;
-	else if (qdev->func == nic_func2)
-		qdev->alt_func = nic_func1;
-	else
-		status = -EIO;
-
-	return status;
-}
-
-static int ql_get_board_info(struct ql_adapter *qdev)
-{
-	int status;
-	qdev->func =
-	    (ql_read32(qdev, STS) & STS_FUNC_ID_MASK) >> STS_FUNC_ID_SHIFT;
-	if (qdev->func > 3)
-		return -EIO;
-
-	status = ql_get_alt_pcie_func(qdev);
-	if (status)
-		return status;
-
-	qdev->port = (qdev->func < qdev->alt_func) ? 0 : 1;
-	if (qdev->port) {
-		qdev->xg_sem_mask = SEM_XGMAC1_MASK;
-		qdev->port_link_up = STS_PL1;
-		qdev->port_init = STS_PI1;
-		qdev->mailbox_in = PROC_ADDR_MPI_RISC | PROC_ADDR_FUNC2_MBI;
-		qdev->mailbox_out = PROC_ADDR_MPI_RISC | PROC_ADDR_FUNC2_MBO;
-	} else {
-		qdev->xg_sem_mask = SEM_XGMAC0_MASK;
-		qdev->port_link_up = STS_PL0;
-		qdev->port_init = STS_PI0;
-		qdev->mailbox_in = PROC_ADDR_MPI_RISC | PROC_ADDR_FUNC0_MBI;
-		qdev->mailbox_out = PROC_ADDR_MPI_RISC | PROC_ADDR_FUNC0_MBO;
-	}
-	qdev->chip_rev_id = ql_read32(qdev, REV_ID);
-	qdev->device_id = qdev->pdev->device;
-	if (qdev->device_id == QLGE_DEVICE_ID_8012)
-		qdev->nic_ops = &qla8012_nic_ops;
-	else if (qdev->device_id == QLGE_DEVICE_ID_8000)
-		qdev->nic_ops = &qla8000_nic_ops;
-	return status;
-}
-
-static void ql_release_all(struct pci_dev *pdev)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	if (qdev->workqueue) {
-		destroy_workqueue(qdev->workqueue);
-		qdev->workqueue = NULL;
-	}
-
-	if (qdev->reg_base)
-		iounmap(qdev->reg_base);
-	if (qdev->doorbell_area)
-		iounmap(qdev->doorbell_area);
-	vfree(qdev->mpi_coredump);
-	pci_release_regions(pdev);
-}
-
-static int ql_init_device(struct pci_dev *pdev, struct net_device *ndev,
-			  int cards_found)
-{
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int err = 0;
-
-	memset((void *)qdev, 0, sizeof(*qdev));
-	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "PCI device enable failed.\n");
-		return err;
-	}
-
-	qdev->ndev = ndev;
-	qdev->pdev = pdev;
-	pci_set_drvdata(pdev, ndev);
-
-	/* Set PCIe read request size */
-	err = pcie_set_readrq(pdev, 4096);
-	if (err) {
-		dev_err(&pdev->dev, "Set readrq failed.\n");
-		goto err_out1;
-	}
-
-	err = pci_request_regions(pdev, DRV_NAME);
-	if (err) {
-		dev_err(&pdev->dev, "PCI region request failed.\n");
-		return err;
-	}
-
-	pci_set_master(pdev);
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		set_bit(QL_DMA64, &qdev->flags);
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	} else {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (!err)
-		       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	}
-
-	if (err) {
-		dev_err(&pdev->dev, "No usable DMA configuration.\n");
-		goto err_out2;
-	}
-
-	/* Set PCIe reset type for EEH to fundamental. */
-	pdev->needs_freset = 1;
-	pci_save_state(pdev);
-	qdev->reg_base =
-	    ioremap_nocache(pci_resource_start(pdev, 1),
-			    pci_resource_len(pdev, 1));
-	if (!qdev->reg_base) {
-		dev_err(&pdev->dev, "Register mapping failed.\n");
-		err = -ENOMEM;
-		goto err_out2;
-	}
-
-	qdev->doorbell_area_size = pci_resource_len(pdev, 3);
-	qdev->doorbell_area =
-	    ioremap_nocache(pci_resource_start(pdev, 3),
-			    pci_resource_len(pdev, 3));
-	if (!qdev->doorbell_area) {
-		dev_err(&pdev->dev, "Doorbell register mapping failed.\n");
-		err = -ENOMEM;
-		goto err_out2;
-	}
-
-	err = ql_get_board_info(qdev);
-	if (err) {
-		dev_err(&pdev->dev, "Register access failed.\n");
-		err = -EIO;
-		goto err_out2;
-	}
-	qdev->msg_enable = netif_msg_init(debug, default_msg);
-	spin_lock_init(&qdev->hw_lock);
-	spin_lock_init(&qdev->stats_lock);
-
-	if (qlge_mpi_coredump) {
-		qdev->mpi_coredump =
-			vmalloc(sizeof(struct ql_mpi_coredump));
-		if (qdev->mpi_coredump == NULL) {
-			err = -ENOMEM;
-			goto err_out2;
-		}
-		if (qlge_force_coredump)
-			set_bit(QL_FRC_COREDUMP, &qdev->flags);
-	}
-	/* make sure the EEPROM is good */
-	err = qdev->nic_ops->get_flash(qdev);
-	if (err) {
-		dev_err(&pdev->dev, "Invalid FLASH.\n");
-		goto err_out2;
-	}
-
-	/* Keep local copy of current mac address. */
-	memcpy(qdev->current_mac_addr, ndev->dev_addr, ndev->addr_len);
-
-	/* Set up the default ring sizes. */
-	qdev->tx_ring_size = NUM_TX_RING_ENTRIES;
-	qdev->rx_ring_size = NUM_RX_RING_ENTRIES;
-
-	/* Set up the coalescing parameters. */
-	qdev->rx_coalesce_usecs = DFLT_COALESCE_WAIT;
-	qdev->tx_coalesce_usecs = DFLT_COALESCE_WAIT;
-	qdev->rx_max_coalesced_frames = DFLT_INTER_FRAME_WAIT;
-	qdev->tx_max_coalesced_frames = DFLT_INTER_FRAME_WAIT;
-
-	/*
-	 * Set up the operating parameters.
-	 */
-	qdev->workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
-						  ndev->name);
-	INIT_DELAYED_WORK(&qdev->asic_reset_work, ql_asic_reset_work);
-	INIT_DELAYED_WORK(&qdev->mpi_reset_work, ql_mpi_reset_work);
-	INIT_DELAYED_WORK(&qdev->mpi_work, ql_mpi_work);
-	INIT_DELAYED_WORK(&qdev->mpi_port_cfg_work, ql_mpi_port_cfg_work);
-	INIT_DELAYED_WORK(&qdev->mpi_idc_work, ql_mpi_idc_work);
-	INIT_DELAYED_WORK(&qdev->mpi_core_to_log, ql_mpi_core_to_log);
-	init_completion(&qdev->ide_completion);
-	mutex_init(&qdev->mpi_mutex);
-
-	if (!cards_found) {
-		dev_info(&pdev->dev, "%s\n", DRV_STRING);
-		dev_info(&pdev->dev, "Driver name: %s, Version: %s.\n",
-			 DRV_NAME, DRV_VERSION);
-	}
-	return 0;
-err_out2:
-	ql_release_all(pdev);
-err_out1:
-	pci_disable_device(pdev);
-	return err;
-}
-
-static const struct net_device_ops qlge_netdev_ops = {
-	.ndo_open		= qlge_open,
-	.ndo_stop		= qlge_close,
-	.ndo_start_xmit		= qlge_send,
-	.ndo_change_mtu		= qlge_change_mtu,
-	.ndo_get_stats		= qlge_get_stats,
-	.ndo_set_rx_mode	= qlge_set_multicast_list,
-	.ndo_set_mac_address	= qlge_set_mac_address,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_tx_timeout		= qlge_tx_timeout,
-	.ndo_set_features	= qlge_set_features,
-	.ndo_vlan_rx_add_vid	= qlge_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= qlge_vlan_rx_kill_vid,
-};
-
-static void ql_timer(struct timer_list *t)
-{
-	struct ql_adapter *qdev = from_timer(qdev, t, timer);
-	u32 var = 0;
-
-	var = ql_read32(qdev, STS);
-	if (pci_channel_offline(qdev->pdev)) {
-		netif_err(qdev, ifup, qdev->ndev, "EEH STS = 0x%.08x.\n", var);
-		return;
-	}
-
-	mod_timer(&qdev->timer, jiffies + (5*HZ));
-}
-
-static int qlge_probe(struct pci_dev *pdev,
-		      const struct pci_device_id *pci_entry)
-{
-	struct net_device *ndev = NULL;
-	struct ql_adapter *qdev = NULL;
-	static int cards_found = 0;
-	int err = 0;
-
-	ndev = alloc_etherdev_mq(sizeof(struct ql_adapter),
-			min(MAX_CPUS, netif_get_num_default_rss_queues()));
-	if (!ndev)
-		return -ENOMEM;
-
-	err = ql_init_device(pdev, ndev, cards_found);
-	if (err < 0) {
-		free_netdev(ndev);
-		return err;
-	}
-
-	qdev = netdev_priv(ndev);
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-	ndev->hw_features = NETIF_F_SG |
-			    NETIF_F_IP_CSUM |
-			    NETIF_F_TSO |
-			    NETIF_F_TSO_ECN |
-			    NETIF_F_HW_VLAN_CTAG_TX |
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_FILTER |
-			    NETIF_F_RXCSUM;
-	ndev->features = ndev->hw_features;
-	ndev->vlan_features = ndev->hw_features;
-	/* vlan gets same features (except vlan filter) */
-	ndev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER |
-				 NETIF_F_HW_VLAN_CTAG_TX |
-				 NETIF_F_HW_VLAN_CTAG_RX);
-
-	if (test_bit(QL_DMA64, &qdev->flags))
-		ndev->features |= NETIF_F_HIGHDMA;
-
-	/*
-	 * Set up net_device structure.
-	 */
-	ndev->tx_queue_len = qdev->tx_ring_size;
-	ndev->irq = pdev->irq;
-
-	ndev->netdev_ops = &qlge_netdev_ops;
-	ndev->ethtool_ops = &qlge_ethtool_ops;
-	ndev->watchdog_timeo = 10 * HZ;
-
-	/* MTU range: this driver only supports 1500 or 9000, so this only
-	 * filters out values above or below, and we'll rely on
-	 * qlge_change_mtu to make sure only 1500 or 9000 are allowed
-	 */
-	ndev->min_mtu = ETH_DATA_LEN;
-	ndev->max_mtu = 9000;
-
-	err = register_netdev(ndev);
-	if (err) {
-		dev_err(&pdev->dev, "net device registration failed.\n");
-		ql_release_all(pdev);
-		pci_disable_device(pdev);
-		free_netdev(ndev);
-		return err;
-	}
-	/* Start up the timer to trigger EEH if
-	 * the bus goes dead
-	 */
-	timer_setup(&qdev->timer, ql_timer, TIMER_DEFERRABLE);
-	mod_timer(&qdev->timer, jiffies + (5*HZ));
-	ql_link_off(qdev);
-	ql_display_dev_info(ndev);
-	atomic_set(&qdev->lb_count, 0);
-	cards_found++;
-	return 0;
-}
-
-netdev_tx_t ql_lb_send(struct sk_buff *skb, struct net_device *ndev)
-{
-	return qlge_send(skb, ndev);
-}
-
-int ql_clean_lb_rx_ring(struct rx_ring *rx_ring, int budget)
-{
-	return ql_clean_inbound_rx_ring(rx_ring, budget);
-}
-
-static void qlge_remove(struct pci_dev *pdev)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	del_timer_sync(&qdev->timer);
-	ql_cancel_all_work_sync(qdev);
-	unregister_netdev(ndev);
-	ql_release_all(pdev);
-	pci_disable_device(pdev);
-	free_netdev(ndev);
-}
-
-/* Clean up resources without touching hardware. */
-static void ql_eeh_close(struct net_device *ndev)
-{
-	int i;
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	if (netif_carrier_ok(ndev)) {
-		netif_carrier_off(ndev);
-		netif_stop_queue(ndev);
-	}
-
-	/* Disabling the timer */
-	ql_cancel_all_work_sync(qdev);
-
-	for (i = 0; i < qdev->rss_ring_count; i++)
-		netif_napi_del(&qdev->rx_ring[i].napi);
-
-	clear_bit(QL_ADAPTER_UP, &qdev->flags);
-	ql_tx_ring_clean(qdev);
-	ql_free_rx_buffers(qdev);
-	ql_release_adapter_resources(qdev);
-}
-
-/*
- * This callback is called by the PCI subsystem whenever
- * a PCI bus error is detected.
- */
-static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev,
-					       enum pci_channel_state state)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	switch (state) {
-	case pci_channel_io_normal:
-		return PCI_ERS_RESULT_CAN_RECOVER;
-	case pci_channel_io_frozen:
-		netif_device_detach(ndev);
-		del_timer_sync(&qdev->timer);
-		if (netif_running(ndev))
-			ql_eeh_close(ndev);
-		pci_disable_device(pdev);
-		return PCI_ERS_RESULT_NEED_RESET;
-	case pci_channel_io_perm_failure:
-		dev_err(&pdev->dev,
-			"%s: pci_channel_io_perm_failure.\n", __func__);
-		del_timer_sync(&qdev->timer);
-		ql_eeh_close(ndev);
-		set_bit(QL_EEH_FATAL, &qdev->flags);
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	/* Request a slot reset. */
-	return PCI_ERS_RESULT_NEED_RESET;
-}
-
-/*
- * This callback is called after the PCI buss has been reset.
- * Basically, this tries to restart the card from scratch.
- * This is a shortened version of the device probe/discovery code,
- * it resembles the first-half of the () routine.
- */
-static pci_ers_result_t qlge_io_slot_reset(struct pci_dev *pdev)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-
-	pdev->error_state = pci_channel_io_normal;
-
-	pci_restore_state(pdev);
-	if (pci_enable_device(pdev)) {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Cannot re-enable PCI device after reset.\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-	pci_set_master(pdev);
-
-	if (ql_adapter_reset(qdev)) {
-		netif_err(qdev, drv, qdev->ndev, "reset FAILED!\n");
-		set_bit(QL_EEH_FATAL, &qdev->flags);
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-static void qlge_io_resume(struct pci_dev *pdev)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int err = 0;
-
-	if (netif_running(ndev)) {
-		err = qlge_open(ndev);
-		if (err) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Device initialization failed after reset.\n");
-			return;
-		}
-	} else {
-		netif_err(qdev, ifup, qdev->ndev,
-			  "Device was not running prior to EEH.\n");
-	}
-	mod_timer(&qdev->timer, jiffies + (5*HZ));
-	netif_device_attach(ndev);
-}
-
-static const struct pci_error_handlers qlge_err_handler = {
-	.error_detected = qlge_io_error_detected,
-	.slot_reset = qlge_io_slot_reset,
-	.resume = qlge_io_resume,
-};
-
-static int qlge_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int err;
-
-	netif_device_detach(ndev);
-	del_timer_sync(&qdev->timer);
-
-	if (netif_running(ndev)) {
-		err = ql_adapter_down(qdev);
-		if (!err)
-			return err;
-	}
-
-	ql_wol(qdev);
-	err = pci_save_state(pdev);
-	if (err)
-		return err;
-
-	pci_disable_device(pdev);
-
-	pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int qlge_resume(struct pci_dev *pdev)
-{
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct ql_adapter *qdev = netdev_priv(ndev);
-	int err;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	err = pci_enable_device(pdev);
-	if (err) {
-		netif_err(qdev, ifup, qdev->ndev, "Cannot enable PCI device from suspend\n");
-		return err;
-	}
-	pci_set_master(pdev);
-
-	pci_enable_wake(pdev, PCI_D3hot, 0);
-	pci_enable_wake(pdev, PCI_D3cold, 0);
-
-	if (netif_running(ndev)) {
-		err = ql_adapter_up(qdev);
-		if (err)
-			return err;
-	}
-
-	mod_timer(&qdev->timer, jiffies + (5*HZ));
-	netif_device_attach(ndev);
-
-	return 0;
-}
-#endif /* CONFIG_PM */
-
-static void qlge_shutdown(struct pci_dev *pdev)
-{
-	qlge_suspend(pdev, PMSG_SUSPEND);
-}
-
-static struct pci_driver qlge_driver = {
-	.name = DRV_NAME,
-	.id_table = qlge_pci_tbl,
-	.probe = qlge_probe,
-	.remove = qlge_remove,
-#ifdef CONFIG_PM
-	.suspend = qlge_suspend,
-	.resume = qlge_resume,
-#endif
-	.shutdown = qlge_shutdown,
-	.err_handler = &qlge_err_handler
-};
-
-module_pci_driver(qlge_driver);

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
deleted file mode 100644
index 957c729..0000000
--- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
+++ /dev/null

@@ -1,1285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "qlge.h"
-
-int ql_unpause_mpi_risc(struct ql_adapter *qdev)
-{
-	u32 tmp;
-
-	/* Un-pause the RISC */
-	tmp = ql_read32(qdev, CSR);
-	if (!(tmp & CSR_RP))
-		return -EIO;
-
-	ql_write32(qdev, CSR, CSR_CMD_CLR_PAUSE);
-	return 0;
-}
-
-int ql_pause_mpi_risc(struct ql_adapter *qdev)
-{
-	u32 tmp;
-	int count = UDELAY_COUNT;
-
-	/* Pause the RISC */
-	ql_write32(qdev, CSR, CSR_CMD_SET_PAUSE);
-	do {
-		tmp = ql_read32(qdev, CSR);
-		if (tmp & CSR_RP)
-			break;
-		mdelay(UDELAY_DELAY);
-		count--;
-	} while (count);
-	return (count == 0) ? -ETIMEDOUT : 0;
-}
-
-int ql_hard_reset_mpi_risc(struct ql_adapter *qdev)
-{
-	u32 tmp;
-	int count = UDELAY_COUNT;
-
-	/* Reset the RISC */
-	ql_write32(qdev, CSR, CSR_CMD_SET_RST);
-	do {
-		tmp = ql_read32(qdev, CSR);
-		if (tmp & CSR_RR) {
-			ql_write32(qdev, CSR, CSR_CMD_CLR_RST);
-			break;
-		}
-		mdelay(UDELAY_DELAY);
-		count--;
-	} while (count);
-	return (count == 0) ? -ETIMEDOUT : 0;
-}
-
-int ql_read_mpi_reg(struct ql_adapter *qdev, u32 reg, u32 *data)
-{
-	int status;
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, PROC_ADDR, PROC_ADDR_RDY, PROC_ADDR_ERR);
-	if (status)
-		goto exit;
-	/* set up for reg read */
-	ql_write32(qdev, PROC_ADDR, reg | PROC_ADDR_R);
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, PROC_ADDR, PROC_ADDR_RDY, PROC_ADDR_ERR);
-	if (status)
-		goto exit;
-	/* get the data */
-	*data = ql_read32(qdev, PROC_DATA);
-exit:
-	return status;
-}
-
-int ql_write_mpi_reg(struct ql_adapter *qdev, u32 reg, u32 data)
-{
-	int status = 0;
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, PROC_ADDR, PROC_ADDR_RDY, PROC_ADDR_ERR);
-	if (status)
-		goto exit;
-	/* write the data to the data reg */
-	ql_write32(qdev, PROC_DATA, data);
-	/* trigger the write */
-	ql_write32(qdev, PROC_ADDR, reg);
-	/* wait for reg to come ready */
-	status = ql_wait_reg_rdy(qdev, PROC_ADDR, PROC_ADDR_RDY, PROC_ADDR_ERR);
-	if (status)
-		goto exit;
-exit:
-	return status;
-}
-
-int ql_soft_reset_mpi_risc(struct ql_adapter *qdev)
-{
-	int status;
-	status = ql_write_mpi_reg(qdev, 0x00001010, 1);
-	return status;
-}
-
-/* Determine if we are in charge of the firwmare. If
- * we are the lower of the 2 NIC pcie functions, or if
- * we are the higher function and the lower function
- * is not enabled.
- */
-int ql_own_firmware(struct ql_adapter *qdev)
-{
-	u32 temp;
-
-	/* If we are the lower of the 2 NIC functions
-	 * on the chip the we are responsible for
-	 * core dump and firmware reset after an error.
-	 */
-	if (qdev->func < qdev->alt_func)
-		return 1;
-
-	/* If we are the higher of the 2 NIC functions
-	 * on the chip and the lower function is not
-	 * enabled, then we are responsible for
-	 * core dump and firmware reset after an error.
-	 */
-	temp =  ql_read32(qdev, STS);
-	if (!(temp & (1 << (8 + qdev->alt_func))))
-		return 1;
-
-	return 0;
-
-}
-
-static int ql_get_mb_sts(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int i, status;
-
-	status = ql_sem_spinlock(qdev, SEM_PROC_REG_MASK);
-	if (status)
-		return -EBUSY;
-	for (i = 0; i < mbcp->out_count; i++) {
-		status =
-		    ql_read_mpi_reg(qdev, qdev->mailbox_out + i,
-				     &mbcp->mbox_out[i]);
-		if (status) {
-			netif_err(qdev, drv, qdev->ndev, "Failed mailbox read.\n");
-			break;
-		}
-	}
-	ql_sem_unlock(qdev, SEM_PROC_REG_MASK);	/* does flush too */
-	return status;
-}
-
-/* Wait for a single mailbox command to complete.
- * Returns zero on success.
- */
-static int ql_wait_mbx_cmd_cmplt(struct ql_adapter *qdev)
-{
-	int count = 100;
-	u32 value;
-
-	do {
-		value = ql_read32(qdev, STS);
-		if (value & STS_PI)
-			return 0;
-		mdelay(UDELAY_DELAY); /* 100ms */
-	} while (--count);
-	return -ETIMEDOUT;
-}
-
-/* Execute a single mailbox command.
- * Caller must hold PROC_ADDR semaphore.
- */
-static int ql_exec_mb_cmd(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int i, status;
-
-	/*
-	 * Make sure there's nothing pending.
-	 * This shouldn't happen.
-	 */
-	if (ql_read32(qdev, CSR) & CSR_HRI)
-		return -EIO;
-
-	status = ql_sem_spinlock(qdev, SEM_PROC_REG_MASK);
-	if (status)
-		return status;
-
-	/*
-	 * Fill the outbound mailboxes.
-	 */
-	for (i = 0; i < mbcp->in_count; i++) {
-		status = ql_write_mpi_reg(qdev, qdev->mailbox_in + i,
-						mbcp->mbox_in[i]);
-		if (status)
-			goto end;
-	}
-	/*
-	 * Wake up the MPI firmware.
-	 */
-	ql_write32(qdev, CSR, CSR_CMD_SET_H2R_INT);
-end:
-	ql_sem_unlock(qdev, SEM_PROC_REG_MASK);
-	return status;
-}
-
-/* We are being asked by firmware to accept
- * a change to the port.  This is only
- * a change to max frame sizes (Tx/Rx), pause
- * parameters, or loopback mode. We wake up a worker
- * to handler processing this since a mailbox command
- * will need to be sent to ACK the request.
- */
-static int ql_idc_req_aen(struct ql_adapter *qdev)
-{
-	int status;
-	struct mbox_params *mbcp = &qdev->idc_mbc;
-
-	netif_err(qdev, drv, qdev->ndev, "Enter!\n");
-	/* Get the status data and start up a thread to
-	 * handle the request.
-	 */
-	mbcp->out_count = 4;
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Could not read MPI, resetting ASIC!\n");
-		ql_queue_asic_error(qdev);
-	} else	{
-		/* Begin polled mode early so
-		 * we don't get another interrupt
-		 * when we leave mpi_worker.
-		 */
-		ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
-		queue_delayed_work(qdev->workqueue, &qdev->mpi_idc_work, 0);
-	}
-	return status;
-}
-
-/* Process an inter-device event completion.
- * If good, signal the caller's completion.
- */
-static int ql_idc_cmplt_aen(struct ql_adapter *qdev)
-{
-	int status;
-	struct mbox_params *mbcp = &qdev->idc_mbc;
-	mbcp->out_count = 4;
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Could not read MPI, resetting RISC!\n");
-		ql_queue_fw_error(qdev);
-	} else
-		/* Wake up the sleeping mpi_idc_work thread that is
-		 * waiting for this event.
-		 */
-		complete(&qdev->ide_completion);
-
-	return status;
-}
-
-static void ql_link_up(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-	mbcp->out_count = 2;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "%s: Could not get mailbox status.\n", __func__);
-		return;
-	}
-
-	qdev->link_status = mbcp->mbox_out[1];
-	netif_err(qdev, drv, qdev->ndev, "Link Up.\n");
-
-	/* If we're coming back from an IDC event
-	 * then set up the CAM and frame routing.
-	 */
-	if (test_bit(QL_CAM_RT_SET, &qdev->flags)) {
-		status = ql_cam_route_initialize(qdev);
-		if (status) {
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to init CAM/Routing tables.\n");
-			return;
-		} else
-			clear_bit(QL_CAM_RT_SET, &qdev->flags);
-	}
-
-	/* Queue up a worker to check the frame
-	 * size information, and fix it if it's not
-	 * to our liking.
-	 */
-	if (!test_bit(QL_PORT_CFG, &qdev->flags)) {
-		netif_err(qdev, drv, qdev->ndev, "Queue Port Config Worker!\n");
-		set_bit(QL_PORT_CFG, &qdev->flags);
-		/* Begin polled mode early so
-		 * we don't get another interrupt
-		 * when we leave mpi_worker dpc.
-		 */
-		ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
-		queue_delayed_work(qdev->workqueue,
-				&qdev->mpi_port_cfg_work, 0);
-	}
-
-	ql_link_on(qdev);
-}
-
-static void ql_link_down(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-
-	mbcp->out_count = 3;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status)
-		netif_err(qdev, drv, qdev->ndev, "Link down AEN broken!\n");
-
-	ql_link_off(qdev);
-}
-
-static int ql_sfp_in(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-
-	mbcp->out_count = 5;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status)
-		netif_err(qdev, drv, qdev->ndev, "SFP in AEN broken!\n");
-	else
-		netif_err(qdev, drv, qdev->ndev, "SFP insertion detected.\n");
-
-	return status;
-}
-
-static int ql_sfp_out(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-
-	mbcp->out_count = 1;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status)
-		netif_err(qdev, drv, qdev->ndev, "SFP out AEN broken!\n");
-	else
-		netif_err(qdev, drv, qdev->ndev, "SFP removal detected.\n");
-
-	return status;
-}
-
-static int ql_aen_lost(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-
-	mbcp->out_count = 6;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status)
-		netif_err(qdev, drv, qdev->ndev, "Lost AEN broken!\n");
-	else {
-		int i;
-		netif_err(qdev, drv, qdev->ndev, "Lost AEN detected.\n");
-		for (i = 0; i < mbcp->out_count; i++)
-			netif_err(qdev, drv, qdev->ndev, "mbox_out[%d] = 0x%.08x.\n",
-				  i, mbcp->mbox_out[i]);
-
-	}
-
-	return status;
-}
-
-static void ql_init_fw_done(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-
-	mbcp->out_count = 2;
-
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev, "Firmware did not initialize!\n");
-	} else {
-		netif_err(qdev, drv, qdev->ndev, "Firmware Revision  = 0x%.08x.\n",
-			  mbcp->mbox_out[1]);
-		qdev->fw_rev_id = mbcp->mbox_out[1];
-		status = ql_cam_route_initialize(qdev);
-		if (status)
-			netif_err(qdev, ifup, qdev->ndev,
-				  "Failed to init CAM/Routing tables.\n");
-	}
-}
-
-/* Process an async event and clear it unless it's an
- * error condition.
- *  This can get called iteratively from the mpi_work thread
- *  when events arrive via an interrupt.
- *  It also gets called when a mailbox command is polling for
- *  it's completion. */
-static int ql_mpi_handler(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-	int orig_count = mbcp->out_count;
-
-	/* Just get mailbox zero for now. */
-	mbcp->out_count = 1;
-	status = ql_get_mb_sts(qdev, mbcp);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Could not read MPI, resetting ASIC!\n");
-		ql_queue_asic_error(qdev);
-		goto end;
-	}
-
-	switch (mbcp->mbox_out[0]) {
-
-	/* This case is only active when we arrive here
-	 * as a result of issuing a mailbox command to
-	 * the firmware.
-	 */
-	case MB_CMD_STS_INTRMDT:
-	case MB_CMD_STS_GOOD:
-	case MB_CMD_STS_INVLD_CMD:
-	case MB_CMD_STS_XFC_ERR:
-	case MB_CMD_STS_CSUM_ERR:
-	case MB_CMD_STS_ERR:
-	case MB_CMD_STS_PARAM_ERR:
-		/* We can only get mailbox status if we're polling from an
-		 * unfinished command.  Get the rest of the status data and
-		 * return back to the caller.
-		 * We only end up here when we're polling for a mailbox
-		 * command completion.
-		 */
-		mbcp->out_count = orig_count;
-		status = ql_get_mb_sts(qdev, mbcp);
-		return status;
-
-	/* We are being asked by firmware to accept
-	 * a change to the port.  This is only
-	 * a change to max frame sizes (Tx/Rx), pause
-	 * parameters, or loopback mode.
-	 */
-	case AEN_IDC_REQ:
-		status = ql_idc_req_aen(qdev);
-		break;
-
-	/* Process and inbound IDC event.
-	 * This will happen when we're trying to
-	 * change tx/rx max frame size, change pause
-	 * parameters or loopback mode.
-	 */
-	case AEN_IDC_CMPLT:
-	case AEN_IDC_EXT:
-		status = ql_idc_cmplt_aen(qdev);
-		break;
-
-	case AEN_LINK_UP:
-		ql_link_up(qdev, mbcp);
-		break;
-
-	case AEN_LINK_DOWN:
-		ql_link_down(qdev, mbcp);
-		break;
-
-	case AEN_FW_INIT_DONE:
-		/* If we're in process on executing the firmware,
-		 * then convert the status to normal mailbox status.
-		 */
-		if (mbcp->mbox_in[0] == MB_CMD_EX_FW) {
-			mbcp->out_count = orig_count;
-			status = ql_get_mb_sts(qdev, mbcp);
-			mbcp->mbox_out[0] = MB_CMD_STS_GOOD;
-			return status;
-		}
-		ql_init_fw_done(qdev, mbcp);
-		break;
-
-	case AEN_AEN_SFP_IN:
-		ql_sfp_in(qdev, mbcp);
-		break;
-
-	case AEN_AEN_SFP_OUT:
-		ql_sfp_out(qdev, mbcp);
-		break;
-
-	/* This event can arrive at boot time or after an
-	 * MPI reset if the firmware failed to initialize.
-	 */
-	case AEN_FW_INIT_FAIL:
-		/* If we're in process on executing the firmware,
-		 * then convert the status to normal mailbox status.
-		 */
-		if (mbcp->mbox_in[0] == MB_CMD_EX_FW) {
-			mbcp->out_count = orig_count;
-			status = ql_get_mb_sts(qdev, mbcp);
-			mbcp->mbox_out[0] = MB_CMD_STS_ERR;
-			return status;
-		}
-		netif_err(qdev, drv, qdev->ndev,
-			  "Firmware initialization failed.\n");
-		status = -EIO;
-		ql_queue_fw_error(qdev);
-		break;
-
-	case AEN_SYS_ERR:
-		netif_err(qdev, drv, qdev->ndev, "System Error.\n");
-		ql_queue_fw_error(qdev);
-		status = -EIO;
-		break;
-
-	case AEN_AEN_LOST:
-		ql_aen_lost(qdev, mbcp);
-		break;
-
-	case AEN_DCBX_CHG:
-		/* Need to support AEN 8110 */
-		break;
-	default:
-		netif_err(qdev, drv, qdev->ndev,
-			  "Unsupported AE %.08x.\n", mbcp->mbox_out[0]);
-		/* Clear the MPI firmware status. */
-	}
-end:
-	ql_write32(qdev, CSR, CSR_CMD_CLR_R2PCI_INT);
-	/* Restore the original mailbox count to
-	 * what the caller asked for.  This can get
-	 * changed when a mailbox command is waiting
-	 * for a response and an AEN arrives and
-	 * is handled.
-	 * */
-	mbcp->out_count = orig_count;
-	return status;
-}
-
-/* Execute a single mailbox command.
- * mbcp is a pointer to an array of u32.  Each
- * element in the array contains the value for it's
- * respective mailbox register.
- */
-static int ql_mailbox_command(struct ql_adapter *qdev, struct mbox_params *mbcp)
-{
-	int status;
-	unsigned long count;
-
-	mutex_lock(&qdev->mpi_mutex);
-
-	/* Begin polled mode for MPI */
-	ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
-
-	/* Load the mailbox registers and wake up MPI RISC. */
-	status = ql_exec_mb_cmd(qdev, mbcp);
-	if (status)
-		goto end;
-
-
-	/* If we're generating a system error, then there's nothing
-	 * to wait for.
-	 */
-	if (mbcp->mbox_in[0] == MB_CMD_MAKE_SYS_ERR)
-		goto end;
-
-	/* Wait for the command to complete. We loop
-	 * here because some AEN might arrive while
-	 * we're waiting for the mailbox command to
-	 * complete. If more than 5 seconds expire we can
-	 * assume something is wrong. */
-	count = jiffies + HZ * MAILBOX_TIMEOUT;
-	do {
-		/* Wait for the interrupt to come in. */
-		status = ql_wait_mbx_cmd_cmplt(qdev);
-		if (status)
-			continue;
-
-		/* Process the event.  If it's an AEN, it
-		 * will be handled in-line or a worker
-		 * will be spawned. If it's our completion
-		 * we will catch it below.
-		 */
-		status = ql_mpi_handler(qdev, mbcp);
-		if (status)
-			goto end;
-
-		/* It's either the completion for our mailbox
-		 * command complete or an AEN.  If it's our
-		 * completion then get out.
-		 */
-		if (((mbcp->mbox_out[0] & 0x0000f000) ==
-					MB_CMD_STS_GOOD) ||
-			((mbcp->mbox_out[0] & 0x0000f000) ==
-					MB_CMD_STS_INTRMDT))
-			goto done;
-	} while (time_before(jiffies, count));
-
-	netif_err(qdev, drv, qdev->ndev,
-		  "Timed out waiting for mailbox complete.\n");
-	status = -ETIMEDOUT;
-	goto end;
-
-done:
-
-	/* Now we can clear the interrupt condition
-	 * and look at our status.
-	 */
-	ql_write32(qdev, CSR, CSR_CMD_CLR_R2PCI_INT);
-
-	if (((mbcp->mbox_out[0] & 0x0000f000) !=
-					MB_CMD_STS_GOOD) &&
-		((mbcp->mbox_out[0] & 0x0000f000) !=
-					MB_CMD_STS_INTRMDT)) {
-		status = -EIO;
-	}
-end:
-	/* End polled mode for MPI */
-	ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16) | INTR_MASK_PI);
-	mutex_unlock(&qdev->mpi_mutex);
-	return status;
-}
-
-/* Get MPI firmware version. This will be used for
- * driver banner and for ethtool info.
- * Returns zero on success.
- */
-int ql_mb_about_fw(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status = 0;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 3;
-
-	mbcp->mbox_in[0] = MB_CMD_ABOUT_FW;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed about firmware command\n");
-		status = -EIO;
-	}
-
-	/* Store the firmware version */
-	qdev->fw_rev_id = mbcp->mbox_out[1];
-
-	return status;
-}
-
-/* Get functional state for MPI firmware.
- * Returns zero on success.
- */
-int ql_mb_get_fw_state(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status = 0;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 2;
-
-	mbcp->mbox_in[0] = MB_CMD_GET_FW_STATE;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Get Firmware State.\n");
-		status = -EIO;
-	}
-
-	/* If bit zero is set in mbx 1 then the firmware is
-	 * running, but not initialized.  This should never
-	 * happen.
-	 */
-	if (mbcp->mbox_out[1] & 1) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Firmware waiting for initialization.\n");
-		status = -EIO;
-	}
-
-	return status;
-}
-
-/* Send and ACK mailbox command to the firmware to
- * let it continue with the change.
- */
-static int ql_mb_idc_ack(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status = 0;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 5;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_IDC_ACK;
-	mbcp->mbox_in[1] = qdev->idc_mbc.mbox_out[1];
-	mbcp->mbox_in[2] = qdev->idc_mbc.mbox_out[2];
-	mbcp->mbox_in[3] = qdev->idc_mbc.mbox_out[3];
-	mbcp->mbox_in[4] = qdev->idc_mbc.mbox_out[4];
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev, "Failed IDC ACK send.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-/* Get link settings and maximum frame size settings
- * for the current port.
- * Most likely will block.
- */
-int ql_mb_set_port_cfg(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status = 0;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 3;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_SET_PORT_CFG;
-	mbcp->mbox_in[1] = qdev->link_config;
-	mbcp->mbox_in[2] = qdev->max_frame_size;
-
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] == MB_CMD_STS_INTRMDT) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Port Config sent, wait for IDC.\n");
-	} else	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Set Port Configuration.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-static int ql_mb_dump_ram(struct ql_adapter *qdev, u64 req_dma, u32 addr,
-	u32 size)
-{
-	int status = 0;
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 9;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_DUMP_RISC_RAM;
-	mbcp->mbox_in[1] = LSW(addr);
-	mbcp->mbox_in[2] = MSW(req_dma);
-	mbcp->mbox_in[3] = LSW(req_dma);
-	mbcp->mbox_in[4] = MSW(size);
-	mbcp->mbox_in[5] = LSW(size);
-	mbcp->mbox_in[6] = MSW(MSD(req_dma));
-	mbcp->mbox_in[7] = LSW(MSD(req_dma));
-	mbcp->mbox_in[8] = MSW(addr);
-
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev, "Failed to dump risc RAM.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-/* Issue a mailbox command to dump RISC RAM. */
-int ql_dump_risc_ram_area(struct ql_adapter *qdev, void *buf,
-		u32 ram_addr, int word_count)
-{
-	int status;
-	char *my_buf;
-	dma_addr_t buf_dma;
-
-	my_buf = pci_alloc_consistent(qdev->pdev, word_count * sizeof(u32),
-					&buf_dma);
-	if (!my_buf)
-		return -EIO;
-
-	status = ql_mb_dump_ram(qdev, buf_dma, ram_addr, word_count);
-	if (!status)
-		memcpy(buf, my_buf, word_count * sizeof(u32));
-
-	pci_free_consistent(qdev->pdev, word_count * sizeof(u32), my_buf,
-				buf_dma);
-	return status;
-}
-
-/* Get link settings and maximum frame size settings
- * for the current port.
- * Most likely will block.
- */
-int ql_mb_get_port_cfg(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status = 0;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 3;
-
-	mbcp->mbox_in[0] = MB_CMD_GET_PORT_CFG;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed Get Port Configuration.\n");
-		status = -EIO;
-	} else	{
-		netif_printk(qdev, drv, KERN_DEBUG, qdev->ndev,
-			     "Passed Get Port Configuration.\n");
-		qdev->link_config = mbcp->mbox_out[1];
-		qdev->max_frame_size = mbcp->mbox_out[2];
-	}
-	return status;
-}
-
-int ql_mb_wol_mode(struct ql_adapter *qdev, u32 wol)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 2;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_SET_WOL_MODE;
-	mbcp->mbox_in[1] = wol;
-
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev, "Failed to set WOL mode.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-int ql_mb_wol_set_magic(struct ql_adapter *qdev, u32 enable_wol)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-	u8 *addr = qdev->ndev->dev_addr;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 8;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_SET_WOL_MAGIC;
-	if (enable_wol) {
-		mbcp->mbox_in[1] = (u32)addr[0];
-		mbcp->mbox_in[2] = (u32)addr[1];
-		mbcp->mbox_in[3] = (u32)addr[2];
-		mbcp->mbox_in[4] = (u32)addr[3];
-		mbcp->mbox_in[5] = (u32)addr[4];
-		mbcp->mbox_in[6] = (u32)addr[5];
-		mbcp->mbox_in[7] = 0;
-	} else {
-		mbcp->mbox_in[1] = 0;
-		mbcp->mbox_in[2] = 1;
-		mbcp->mbox_in[3] = 1;
-		mbcp->mbox_in[4] = 1;
-		mbcp->mbox_in[5] = 1;
-		mbcp->mbox_in[6] = 1;
-		mbcp->mbox_in[7] = 0;
-	}
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev, "Failed to set WOL mode.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-/* IDC - Inter Device Communication...
- * Some firmware commands require consent of adjacent FCOE
- * function.  This function waits for the OK, or a
- * counter-request for a little more time.i
- * The firmware will complete the request if the other
- * function doesn't respond.
- */
-static int ql_idc_wait(struct ql_adapter *qdev)
-{
-	int status = -ETIMEDOUT;
-	long wait_time = 1 * HZ;
-	struct mbox_params *mbcp = &qdev->idc_mbc;
-	do {
-		/* Wait here for the command to complete
-		 * via the IDC process.
-		 */
-		wait_time =
-			wait_for_completion_timeout(&qdev->ide_completion,
-							wait_time);
-		if (!wait_time) {
-			netif_err(qdev, drv, qdev->ndev, "IDC Timeout.\n");
-			break;
-		}
-		/* Now examine the response from the IDC process.
-		 * We might have a good completion or a request for
-		 * more wait time.
-		 */
-		if (mbcp->mbox_out[0] == AEN_IDC_EXT) {
-			netif_err(qdev, drv, qdev->ndev,
-				  "IDC Time Extension from function.\n");
-			wait_time += (mbcp->mbox_out[1] >> 8) & 0x0000000f;
-		} else if (mbcp->mbox_out[0] == AEN_IDC_CMPLT) {
-			netif_err(qdev, drv, qdev->ndev, "IDC Success.\n");
-			status = 0;
-			break;
-		} else {
-			netif_err(qdev, drv, qdev->ndev,
-				  "IDC: Invalid State 0x%.04x.\n",
-				  mbcp->mbox_out[0]);
-			status = -EIO;
-			break;
-		}
-	} while (wait_time);
-
-	return status;
-}
-
-int ql_mb_set_led_cfg(struct ql_adapter *qdev, u32 led_config)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 2;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_SET_LED_CFG;
-	mbcp->mbox_in[1] = led_config;
-
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed to set LED Configuration.\n");
-		status = -EIO;
-	}
-
-	return status;
-}
-
-int ql_mb_get_led_cfg(struct ql_adapter *qdev)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 2;
-
-	mbcp->mbox_in[0] = MB_CMD_GET_LED_CFG;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] != MB_CMD_STS_GOOD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed to get LED Configuration.\n");
-		status = -EIO;
-	} else
-		qdev->led_config = mbcp->mbox_out[1];
-
-	return status;
-}
-
-int ql_mb_set_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 control)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 2;
-
-	mbcp->mbox_in[0] = MB_CMD_SET_MGMNT_TFK_CTL;
-	mbcp->mbox_in[1] = control;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] == MB_CMD_STS_GOOD)
-		return status;
-
-	if (mbcp->mbox_out[0] == MB_CMD_STS_INVLD_CMD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Command not supported by firmware.\n");
-		status = -EINVAL;
-	} else if (mbcp->mbox_out[0] == MB_CMD_STS_ERR) {
-		/* This indicates that the firmware is
-		 * already in the state we are trying to
-		 * change it to.
-		 */
-		netif_err(qdev, drv, qdev->ndev,
-			  "Command parameters make no change.\n");
-	}
-	return status;
-}
-
-/* Returns a negative error code or the mailbox command status. */
-static int ql_mb_get_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 *control)
-{
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int status;
-
-	memset(mbcp, 0, sizeof(struct mbox_params));
-	*control = 0;
-
-	mbcp->in_count = 1;
-	mbcp->out_count = 1;
-
-	mbcp->mbox_in[0] = MB_CMD_GET_MGMNT_TFK_CTL;
-
-	status = ql_mailbox_command(qdev, mbcp);
-	if (status)
-		return status;
-
-	if (mbcp->mbox_out[0] == MB_CMD_STS_GOOD) {
-		*control = mbcp->mbox_in[1];
-		return status;
-	}
-
-	if (mbcp->mbox_out[0] == MB_CMD_STS_INVLD_CMD) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Command not supported by firmware.\n");
-		status = -EINVAL;
-	} else if (mbcp->mbox_out[0] == MB_CMD_STS_ERR) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Failed to get MPI traffic control.\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-int ql_wait_fifo_empty(struct ql_adapter *qdev)
-{
-	int count = 5;
-	u32 mgmnt_fifo_empty;
-	u32 nic_fifo_empty;
-
-	do {
-		nic_fifo_empty = ql_read32(qdev, STS) & STS_NFE;
-		ql_mb_get_mgmnt_traffic_ctl(qdev, &mgmnt_fifo_empty);
-		mgmnt_fifo_empty &= MB_GET_MPI_TFK_FIFO_EMPTY;
-		if (nic_fifo_empty && mgmnt_fifo_empty)
-			return 0;
-		msleep(100);
-	} while (count-- > 0);
-	return -ETIMEDOUT;
-}
-
-/* API called in work thread context to set new TX/RX
- * maximum frame size values to match MTU.
- */
-static int ql_set_port_cfg(struct ql_adapter *qdev)
-{
-	int status;
-	status = ql_mb_set_port_cfg(qdev);
-	if (status)
-		return status;
-	status = ql_idc_wait(qdev);
-	return status;
-}
-
-/* The following routines are worker threads that process
- * events that may sleep waiting for completion.
- */
-
-/* This thread gets the maximum TX and RX frame size values
- * from the firmware and, if necessary, changes them to match
- * the MTU setting.
- */
-void ql_mpi_port_cfg_work(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-	    container_of(work, struct ql_adapter, mpi_port_cfg_work.work);
-	int status;
-
-	status = ql_mb_get_port_cfg(qdev);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Bug: Failed to get port config data.\n");
-		goto err;
-	}
-
-	if (qdev->link_config & CFG_JUMBO_FRAME_SIZE &&
-			qdev->max_frame_size ==
-			CFG_DEFAULT_MAX_FRAME_SIZE)
-		goto end;
-
-	qdev->link_config |=	CFG_JUMBO_FRAME_SIZE;
-	qdev->max_frame_size = CFG_DEFAULT_MAX_FRAME_SIZE;
-	status = ql_set_port_cfg(qdev);
-	if (status) {
-		netif_err(qdev, drv, qdev->ndev,
-			  "Bug: Failed to set port config data.\n");
-		goto err;
-	}
-end:
-	clear_bit(QL_PORT_CFG, &qdev->flags);
-	return;
-err:
-	ql_queue_fw_error(qdev);
-	goto end;
-}
-
-/* Process an inter-device request.  This is issues by
- * the firmware in response to another function requesting
- * a change to the port. We set a flag to indicate a change
- * has been made and then send a mailbox command ACKing
- * the change request.
- */
-void ql_mpi_idc_work(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-	    container_of(work, struct ql_adapter, mpi_idc_work.work);
-	int status;
-	struct mbox_params *mbcp = &qdev->idc_mbc;
-	u32 aen;
-	int timeout;
-
-	aen = mbcp->mbox_out[1] >> 16;
-	timeout = (mbcp->mbox_out[1] >> 8) & 0xf;
-
-	switch (aen) {
-	default:
-		netif_err(qdev, drv, qdev->ndev,
-			  "Bug: Unhandled IDC action.\n");
-		break;
-	case MB_CMD_PORT_RESET:
-	case MB_CMD_STOP_FW:
-		ql_link_off(qdev);
-		/* Fall through */
-	case MB_CMD_SET_PORT_CFG:
-		/* Signal the resulting link up AEN
-		 * that the frame routing and mac addr
-		 * needs to be set.
-		 * */
-		set_bit(QL_CAM_RT_SET, &qdev->flags);
-		/* Do ACK if required */
-		if (timeout) {
-			status = ql_mb_idc_ack(qdev);
-			if (status)
-				netif_err(qdev, drv, qdev->ndev,
-					  "Bug: No pending IDC!\n");
-		} else {
-			netif_printk(qdev, drv, KERN_DEBUG, qdev->ndev,
-				     "IDC ACK not required\n");
-			status = 0; /* success */
-		}
-		break;
-
-	/* These sub-commands issued by another (FCoE)
-	 * function are requesting to do an operation
-	 * on the shared resource (MPI environment).
-	 * We currently don't issue these so we just
-	 * ACK the request.
-	 */
-	case MB_CMD_IOP_RESTART_MPI:
-	case MB_CMD_IOP_PREP_LINK_DOWN:
-		/* Drop the link, reload the routing
-		 * table when link comes up.
-		 */
-		ql_link_off(qdev);
-		set_bit(QL_CAM_RT_SET, &qdev->flags);
-		/* Fall through. */
-	case MB_CMD_IOP_DVR_START:
-	case MB_CMD_IOP_FLASH_ACC:
-	case MB_CMD_IOP_CORE_DUMP_MPI:
-	case MB_CMD_IOP_PREP_UPDATE_MPI:
-	case MB_CMD_IOP_COMP_UPDATE_MPI:
-	case MB_CMD_IOP_NONE:	/*  an IDC without params */
-		/* Do ACK if required */
-		if (timeout) {
-			status = ql_mb_idc_ack(qdev);
-			if (status)
-				netif_err(qdev, drv, qdev->ndev,
-					  "Bug: No pending IDC!\n");
-		} else {
-			netif_printk(qdev, drv, KERN_DEBUG, qdev->ndev,
-				     "IDC ACK not required\n");
-			status = 0; /* success */
-		}
-		break;
-	}
-}
-
-void ql_mpi_work(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-	    container_of(work, struct ql_adapter, mpi_work.work);
-	struct mbox_params mbc;
-	struct mbox_params *mbcp = &mbc;
-	int err = 0;
-
-	mutex_lock(&qdev->mpi_mutex);
-	/* Begin polled mode for MPI */
-	ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16));
-
-	while (ql_read32(qdev, STS) & STS_PI) {
-		memset(mbcp, 0, sizeof(struct mbox_params));
-		mbcp->out_count = 1;
-		/* Don't continue if an async event
-		 * did not complete properly.
-		 */
-		err = ql_mpi_handler(qdev, mbcp);
-		if (err)
-			break;
-	}
-
-	/* End polled mode for MPI */
-	ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16) | INTR_MASK_PI);
-	mutex_unlock(&qdev->mpi_mutex);
-	ql_enable_completion_interrupt(qdev, 0);
-}
-
-void ql_mpi_reset_work(struct work_struct *work)
-{
-	struct ql_adapter *qdev =
-	    container_of(work, struct ql_adapter, mpi_reset_work.work);
-	cancel_delayed_work_sync(&qdev->mpi_work);
-	cancel_delayed_work_sync(&qdev->mpi_port_cfg_work);
-	cancel_delayed_work_sync(&qdev->mpi_idc_work);
-	/* If we're not the dominant NIC function,
-	 * then there is nothing to do.
-	 */
-	if (!ql_own_firmware(qdev)) {
-		netif_err(qdev, drv, qdev->ndev, "Don't own firmware!\n");
-		return;
-	}
-
-	if (qdev->mpi_coredump && !ql_core_dump(qdev, qdev->mpi_coredump)) {
-		netif_err(qdev, drv, qdev->ndev, "Core is dumped!\n");
-		qdev->core_is_dumped = 1;
-		queue_delayed_work(qdev->workqueue,
-			&qdev->mpi_core_to_log, 5 * HZ);
-	}
-	ql_soft_reset_mpi_risc(qdev);
-}

diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index f520071..09a678a 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Qualcomm network device configuration
 #

diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
index fc57ced..61d15e0 100644
--- a/drivers/net/ethernet/qualcomm/emac/Makefile
+++ b/drivers/net/ethernet/qualcomm/emac/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver
 #

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
index c8c6231..79e5007 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/ethtool.h>

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 031f6e6..bebe38d 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support
@@ -776,7 +768,7 @@
 			    8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */
 
 	ring_header->used = 0;
-	ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size,
+	ring_header->v_addr = dma_alloc_coherent(dev, ring_header->size,
 						 &ring_header->dma_addr,
 						 GFP_KERNEL);
 	if (!ring_header->v_addr)
@@ -1204,7 +1196,7 @@
 		if (tpbuf->skb) {
 			pkts_compl++;
 			bytes_compl += tpbuf->skb->len;
-			dev_kfree_skb_irq(tpbuf->skb);
+			dev_consume_skb_irq(tpbuf->skb);
 			tpbuf->skb = NULL;
 		}
 
@@ -1393,15 +1385,13 @@
 	}
 
 	for (i = 0; i < nr_frags; i++) {
-		struct skb_frag_struct *frag;
-
-		frag = &skb_shinfo(skb)->frags[i];
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
-		tpbuf->length = frag->size;
-		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
-					       frag->page.p, frag->page_offset,
-					       tpbuf->length, DMA_TO_DEVICE);
+		tpbuf->length = skb_frag_size(frag);
+		tpbuf->dma_addr = skb_frag_dma_map(adpt->netdev->dev.parent,
+						   frag, 0, tpbuf->length,
+						   DMA_TO_DEVICE);
 		ret = dma_mapping_error(adpt->netdev->dev.parent,
 					tpbuf->dma_addr);
 		if (ret)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
index 4beedb8..ae08bdd 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h

@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* EMAC DMA HW engine uses three rings:

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 53dbf1e..5c94af7 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. EMAC PHY Controller driver.

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h
index c0c301c..7d703e5 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.h

@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License version 2 and
-* only version 2 as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU General Public License for more details.
 */
 
 #ifndef _EMAC_PHY_H_

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
index 10de8d0..bd9ad32 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. FSM9900 EMAC SGMII Controller driver.

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
index 7116be4..b29148c 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. QDF2400 EMAC SGMII Controller driver.

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
index b9c0df7..65519ee 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. QDF2432 EMAC SGMII Controller driver.

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index c694e34..802ef81 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. EMAC SGMII Controller driver.

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
index 31ba21e..6daedda 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h

@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _EMAC_SGMII_H_

diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 2a0cbc5..c84ab05 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */
@@ -552,7 +544,6 @@
 				struct emac_adapter *adpt)
 {
 	struct net_device *netdev = adpt->netdev;
-	struct resource *res;
 	char maddr[ETH_ALEN];
 	int ret = 0;
 
@@ -564,22 +555,17 @@
 
 	/* Core 0 interrupt */
 	ret = platform_get_irq(pdev, 0);
-	if (ret < 0) {
-		dev_err(&pdev->dev,
-			"error: missing core0 irq resource (error=%i)\n", ret);
+	if (ret < 0)
 		return ret;
-	}
 	adpt->irq.irq = ret;
 
 	/* base register address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	adpt->base = devm_ioremap_resource(&pdev->dev, res);
+	adpt->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(adpt->base))
 		return PTR_ERR(adpt->base);
 
 	/* CSR register address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	adpt->csr = devm_ioremap_resource(&pdev->dev, res);
+	adpt->csr = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(adpt->csr))
 		return PTR_ERR(adpt->csr);
 

diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h
index d7c9f44..7e9e151 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac.h

@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _EMAC_H_

diff --git a/drivers/net/ethernet/qualcomm/qca_7k.c b/drivers/net/ethernet/qualcomm/qca_7k.c
index 6c8543f..4292c89 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k.c
+++ b/drivers/net/ethernet/qualcomm/qca_7k.c

@@ -81,8 +81,8 @@
 	return ret;
 }
 
-int
-qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value)
+static int
+__qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value)
 {
 	__be16 tx_data[2];
 	struct spi_transfer transfer[2];
@@ -117,3 +117,33 @@
 
 	return ret;
 }
+
+int
+qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value, int retry)
+{
+	int ret, i = 0;
+	u16 confirmed;
+
+	do {
+		ret = __qcaspi_write_register(qca, reg, value);
+		if (ret)
+			return ret;
+
+		if (!retry)
+			return 0;
+
+		ret = qcaspi_read_register(qca, reg, &confirmed);
+		if (ret)
+			return ret;
+
+		ret = confirmed != value;
+		if (!ret)
+			return 0;
+
+		i++;
+		qca->stats.write_verify_failed++;
+
+	} while (i <= retry);
+
+	return ret;
+}

diff --git a/drivers/net/ethernet/qualcomm/qca_7k.h b/drivers/net/ethernet/qualcomm/qca_7k.h
index 27124c2..356de8e 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k.h
+++ b/drivers/net/ethernet/qualcomm/qca_7k.h

@@ -66,6 +66,6 @@
 
 void qcaspi_spi_error(struct qcaspi *qca);
 int qcaspi_read_register(struct qcaspi *qca, u16 reg, u16 *result);
-int qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value);
+int qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value, int retry);
 
 #endif /* _QCA_7K_H */

diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index 51d89c8..702aa21 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c

@@ -60,6 +60,8 @@
 	"Write buffer misses",
 	"Transmit ring full",
 	"SPI errors",
+	"Write verify errors",
+	"Buffer available errors",
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -124,35 +126,16 @@
 
 	return 0;
 }
-
-static int
-qcaspi_info_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, qcaspi_info_show, inode->i_private);
-}
-
-static const struct file_operations qcaspi_info_ops = {
-	.open = qcaspi_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(qcaspi_info);
 
 void
 qcaspi_init_device_debugfs(struct qcaspi *qca)
 {
-	struct dentry *device_root;
+	qca->device_root = debugfs_create_dir(dev_name(&qca->net_dev->dev),
+					      NULL);
 
-	device_root = debugfs_create_dir(dev_name(&qca->net_dev->dev), NULL);
-	qca->device_root = device_root;
-
-	if (IS_ERR(device_root) || !device_root) {
-		pr_warn("failed to create debugfs directory for %s\n",
-			dev_name(&qca->net_dev->dev));
-		return;
-	}
-	debugfs_create_file("info", S_IFREG | 0444, device_root, qca,
-			    &qcaspi_info_ops);
+	debugfs_create_file("info", S_IFREG | 0444, qca->device_root, qca,
+			    &qcaspi_info_fops);
 }
 
 void

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 66b775d..baac016 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c

@@ -69,6 +69,12 @@
 module_param(qcaspi_pluggable, int, 0);
 MODULE_PARM_DESC(qcaspi_pluggable, "Pluggable SPI connection (yes/no).");
 
+#define QCASPI_WRITE_VERIFY_MIN 0
+#define QCASPI_WRITE_VERIFY_MAX 3
+static int wr_verify = QCASPI_WRITE_VERIFY_MIN;
+module_param(wr_verify, int, 0);
+MODULE_PARM_DESC(wr_verify, "SPI register write verify trails. Use 0-3.");
+
 #define QCASPI_TX_TIMEOUT (1 * HZ)
 #define QCASPI_QCA7K_REBOOT_TIME_MS 1000
 
@@ -77,7 +83,7 @@
 {
 	*intr_cause = 0;
 
-	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0);
+	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0, wr_verify);
 	qcaspi_read_register(qca, SPI_REG_INTR_CAUSE, intr_cause);
 	netdev_dbg(qca->net_dev, "interrupts: 0x%04x\n", *intr_cause);
 }
@@ -90,8 +96,8 @@
 			   SPI_INT_RDBUF_ERR |
 			   SPI_INT_WRBUF_ERR);
 
-	qcaspi_write_register(qca, SPI_REG_INTR_CAUSE, intr_cause);
-	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, intr_enable);
+	qcaspi_write_register(qca, SPI_REG_INTR_CAUSE, intr_cause, 0);
+	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, intr_enable, wr_verify);
 	netdev_dbg(qca->net_dev, "acking int: 0x%04x\n", intr_cause);
 }
 
@@ -239,7 +245,7 @@
 
 	len = skb->len;
 
-	qcaspi_write_register(qca, SPI_REG_BFR_SIZE, len);
+	qcaspi_write_register(qca, SPI_REG_BFR_SIZE, len, wr_verify);
 	if (qca->legacy_mode)
 		qcaspi_tx_cmd(qca, QCA7K_SPI_WRITE | QCA7K_SPI_EXTERNAL);
 
@@ -283,6 +289,14 @@
 
 	qcaspi_read_register(qca, SPI_REG_WRBUF_SPC_AVA, &available);
 
+	if (available > QCASPI_HW_BUF_LEN) {
+		/* This could only happen by interferences on the SPI line.
+		 * So retry later ...
+		 */
+		qca->stats.buf_avail_err++;
+		return -1;
+	}
+
 	while (qca->txr.skb[qca->txr.head]) {
 		pkt_len = qca->txr.skb[qca->txr.head]->len + QCASPI_HW_PKT_LEN;
 
@@ -345,15 +359,22 @@
 
 	/* Read the packet size. */
 	qcaspi_read_register(qca, SPI_REG_RDBUF_BYTE_AVA, &available);
+
 	netdev_dbg(net_dev, "qcaspi_receive: SPI_REG_RDBUF_BYTE_AVA: Value: %08x\n",
 		   available);
 
-	if (available == 0) {
+	if (available > QCASPI_HW_BUF_LEN + QCASPI_HW_PKT_LEN) {
+		/* This could only happen by interferences on the SPI line.
+		 * So retry later ...
+		 */
+		qca->stats.buf_avail_err++;
+		return -1;
+	} else if (available == 0) {
 		netdev_dbg(net_dev, "qcaspi_receive called without any data being available!\n");
 		return -1;
 	}
 
-	qcaspi_write_register(qca, SPI_REG_BFR_SIZE, available);
+	qcaspi_write_register(qca, SPI_REG_BFR_SIZE, available, wr_verify);
 
 	if (qca->legacy_mode)
 		qcaspi_tx_cmd(qca, QCA7K_SPI_READ | QCA7K_SPI_EXTERNAL);
@@ -475,7 +496,6 @@
 	u16 signature = 0;
 	u16 spi_config;
 	u16 wrbuf_space = 0;
-	static u16 reset_count;
 
 	if (event == QCASPI_EVENT_CPUON) {
 		/* Read signature twice, if not valid
@@ -524,17 +544,17 @@
 		netdev_dbg(qca->net_dev, "sync: resetting device.\n");
 		qcaspi_read_register(qca, SPI_REG_SPI_CONFIG, &spi_config);
 		spi_config |= QCASPI_SLAVE_RESET_BIT;
-		qcaspi_write_register(qca, SPI_REG_SPI_CONFIG, spi_config);
+		qcaspi_write_register(qca, SPI_REG_SPI_CONFIG, spi_config, 0);
 
 		qca->sync = QCASPI_SYNC_RESET;
 		qca->stats.trig_reset++;
-		reset_count = 0;
+		qca->reset_count = 0;
 		break;
 	case QCASPI_SYNC_RESET:
-		reset_count++;
+		qca->reset_count++;
 		netdev_dbg(qca->net_dev, "sync: waiting for CPU on, count %u.\n",
-			   reset_count);
-		if (reset_count >= QCASPI_RESET_TIMEOUT) {
+			   qca->reset_count);
+		if (qca->reset_count >= QCASPI_RESET_TIMEOUT) {
 			/* reset did not seem to take place, try again */
 			qca->sync = QCASPI_SYNC_UNKNOWN;
 			qca->stats.reset_timeout++;
@@ -684,7 +704,7 @@
 
 	netif_stop_queue(dev);
 
-	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0);
+	qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0, wr_verify);
 	free_irq(qca->spi_dev->irq, qca);
 
 	kthread_stop(qca->spi_thread);
@@ -816,8 +836,7 @@
 
 	kfree(qca->rx_buffer);
 	qca->buffer_size = 0;
-	if (qca->rx_skb)
-		dev_kfree_skb(qca->rx_skb);
+	dev_kfree_skb(qca->rx_skb);
 }
 
 static const struct net_device_ops qcaspi_netdev_ops = {
@@ -904,6 +923,13 @@
 		return -EINVAL;
 	}
 
+	if (wr_verify < QCASPI_WRITE_VERIFY_MIN ||
+	    wr_verify > QCASPI_WRITE_VERIFY_MAX) {
+		dev_err(&spi->dev, "Invalid write verify: %d\n",
+			wr_verify);
+		return -EINVAL;
+	}
+
 	dev_info(&spi->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n",
 		 QCASPI_DRV_VERSION,
 		 qcaspi_clkspeed,
@@ -938,7 +964,7 @@
 
 	mac = of_get_mac_address(spi->dev.of_node);
 
-	if (mac)
+	if (!IS_ERR(mac))
 		ether_addr_copy(qca->net_dev->dev_addr, mac);
 
 	if (!is_valid_ether_addr(qca->net_dev->dev_addr)) {

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index fc0e987..d13a67e 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h

@@ -73,6 +73,8 @@
 	u64 write_buf_miss;
 	u64 ring_full;
 	u64 spi_err;
+	u64 write_verify_failed;
+	u64 buf_avail_err;
 };
 
 struct qcaspi {
@@ -92,6 +94,7 @@
 
 	unsigned int intr_req;
 	unsigned int intr_svc;
+	u16 reset_count;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *device_root;

diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index db6068c..0981068 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c

@@ -285,8 +285,7 @@
 {
 	struct qcauart *qca = netdev_priv(dev);
 
-	if (qca->rx_skb)
-		dev_kfree_skb(qca->rx_skb);
+	dev_kfree_skb(qca->rx_skb);
 }
 
 static const struct net_device_ops qcauart_netdev_ops = {
@@ -351,7 +350,7 @@
 
 	mac = of_get_mac_address(serdev->dev.of_node);
 
-	if (mac)
+	if (!IS_ERR(mac))
 		ether_addr_copy(qca->net_dev->dev_addr, mac);
 
 	if (!is_valid_ether_addr(qca->net_dev->dev_addr)) {

diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
index 9bb06d2..9f92795 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/Kconfig
+++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # RMNET MAP driver
 #

diff --git a/drivers/net/ethernet/qualcomm/rmnet/Makefile b/drivers/net/ethernet/qualcomm/rmnet/Makefile
index 01bddf2..8252e40 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/Makefile
+++ b/drivers/net/ethernet/qualcomm/rmnet/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the RMNET module
 #

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 5f4e447..06de595 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET configuration engine
- *
  */
 
 #include <net/sock.h>
@@ -66,10 +57,10 @@
 	if (port->nr_rmnet_devs)
 		return -EINVAL;
 
-	kfree(port);
-
 	netdev_rx_handler_unregister(real_dev);
 
+	kfree(port);
+
 	/* release reference on real_dev */
 	dev_put(real_dev);
 
@@ -301,10 +292,13 @@
 	struct rmnet_port *port;
 	u16 mux_id;
 
+	if (!dev)
+		return -ENODEV;
+
 	real_dev = __dev_get_by_index(dev_net(dev),
 				      nla_get_u32(tb[IFLA_LINK]));
 
-	if (!real_dev || !dev || !rmnet_is_real_dev_registered(real_dev))
+	if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
 		return -ENODEV;
 
 	port = rmnet_get_port_rtnl(real_dev);

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 34ac45a..cd0a6bc 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET Data configuration engine
- *
  */
 
 #include <linux/skbuff.h>

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 11167ab..1b74bc1 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET Data ingress/egress handler
- *
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
index 3537e4c..c4571dc 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013, 2016-2017 The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET Data ingress/egress handler
- *
  */
 
 #ifndef _RMNET_HANDLERS_H_

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 884f1f5..576501d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h

@@ -1,17 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _RMNET_MAP_H_
 #define _RMNET_MAP_H_
+#include <linux/if_rmnet.h>
 
 struct rmnet_map_control_command {
 	u8  command_name;
@@ -39,30 +32,6 @@
 	RMNET_MAP_COMMAND_ENUM_LENGTH
 };
 
-struct rmnet_map_header {
-	u8  pad_len:6;
-	u8  reserved_bit:1;
-	u8  cd_bit:1;
-	u8  mux_id;
-	__be16 pkt_len;
-}  __aligned(1);
-
-struct rmnet_map_dl_csum_trailer {
-	u8  reserved1;
-	u8  valid:1;
-	u8  reserved2:7;
-	u16 csum_start_offset;
-	u16 csum_length;
-	__be16 csum_value;
-} __aligned(1);
-
-struct rmnet_map_ul_csum_header {
-	__be16 csum_start_offset;
-	u16 csum_insert_offset:14;
-	u16 udp_ip4_ind:1;
-	u16 csum_enabled:1;
-} __aligned(1);
-
 #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
 				 (Y)->data)->mux_id)
 #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index 3ee8ae9..beaee49 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c

@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/netdevice.h>
@@ -20,17 +12,12 @@
 				    struct rmnet_port *port,
 				    int enable)
 {
-	struct rmnet_map_control_command *cmd;
 	struct rmnet_endpoint *ep;
 	struct net_device *vnd;
-	u16 ip_family;
-	u16 fc_seq;
-	u32 qos_id;
 	u8 mux_id;
 	int r;
 
 	mux_id = RMNET_MAP_GET_MUX_ID(skb);
-	cmd = RMNET_MAP_GET_CMD_START(skb);
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
 		kfree_skb(skb);
@@ -45,10 +32,6 @@
 
 	vnd = ep->egress_dev;
 
-	ip_family = cmd->flow_control.ip_family;
-	fc_seq = ntohs(cmd->flow_control.flow_control_seq_num);
-	qos_id = ntohl(cmd->flow_control.qos_id);
-
 	/* Ignore the ip family and pass the sequence number for both v4 and v6
 	 * sequence. User space does not support creating dedicated flows for
 	 * the 2 protocols

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 57a9c31..21d3816 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c

@@ -1,16 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET Data MAP protocol
- *
  */
 
 #include <linux/netdevice.h>
@@ -215,9 +206,9 @@
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
 	if (ip4h->protocol == IPPROTO_UDP)
-		ul_header->udp_ip4_ind = 1;
+		ul_header->udp_ind = 1;
 	else
-		ul_header->udp_ip4_ind = 0;
+		ul_header->udp_ind = 0;
 
 	/* Changing remaining fields to network order */
 	hdr++;
@@ -248,6 +239,7 @@
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
 	__be16 *hdr = (__be16 *)ul_header, offset;
 
 	offset = htons((__force u16)(skb_transport_header(skb) -
@@ -255,7 +247,11 @@
 	ul_header->csum_start_offset = offset;
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
-	ul_header->udp_ip4_ind = 0;
+
+	if (ip6h->nexthdr == IPPROTO_UDP)
+		ul_header->udp_ind = 1;
+	else
+		ul_header->udp_ind = 0;
 
 	/* Changing remaining fields to network order */
 	hdr++;
@@ -428,7 +424,7 @@
 	ul_header->csum_start_offset = 0;
 	ul_header->csum_insert_offset = 0;
 	ul_header->csum_enabled = 0;
-	ul_header->udp_ip4_ind = 0;
+	ul_header->udp_ind = 0;
 
 	priv->stats.csum_sw++;
 }

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index b9cc4f8..e1337f1 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h

@@ -1,13 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _RMNET_PRIVATE_H_

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index d11c16a..509dfc8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c

@@ -1,17 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- *
  * RMNET Data virtual network driver
- *
  */
 
 #include <linux/etherdevice.h>

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 71e4c32..54cbaf3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h

@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * RMNET Data Virtual Network Device APIs
- *
  */
 
 #ifndef _RMNET_VND_H_

diff --git a/drivers/net/ethernet/rdc/Kconfig b/drivers/net/ethernet/rdc/Kconfig
index a9c4e99..76df60c 100644
--- a/drivers/net/ethernet/rdc/Kconfig
+++ b/drivers/net/ethernet/rdc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # RDC network device configuration
 #

diff --git a/drivers/net/ethernet/rdc/Makefile b/drivers/net/ethernet/rdc/Makefile
index 8d51fd2..8074654 100644
--- a/drivers/net/ethernet/rdc/Makefile
+++ b/drivers/net/ethernet/rdc/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the RDC network device drivers.
 #

diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index aa11b70..274e5b4 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * RDC R6040 Fast Ethernet MAC support
  *
@@ -5,21 +6,6 @@
  * Copyright (C) 2007
  *	Daniel Gimpelevich <daniel@gimpelevich.san-francisco.ca.us>
  * Copyright (C) 2007-2012 Florian Fainelli <f.fainelli@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
 */
 
 #include <linux/kernel.h>
@@ -840,7 +826,7 @@
 	skb_tx_timestamp(skb);
 
 	/* Trigger the MAC to check the TX descriptor */
-	if (!skb->xmit_more || netif_queue_stopped(dev))
+	if (!netdev_xmit_more() || netif_queue_stopped(dev))
 		iowrite16(TM2TX, ioaddr + MTPR);
 	lp->tx_insert_ptr = descptr->vndescp;
 
@@ -1024,16 +1010,8 @@
 		return PTR_ERR(phydev);
 	}
 
-	/* mask with MAC supported features */
-	phydev->supported &= (SUPPORTED_10baseT_Half
-				| SUPPORTED_10baseT_Full
-				| SUPPORTED_100baseT_Half
-				| SUPPORTED_100baseT_Full
-				| SUPPORTED_Autoneg
-				| SUPPORTED_MII
-				| SUPPORTED_TP);
+	phy_set_max_speed(phydev, SPEED_100);
 
-	phydev->advertising = phydev->supported;
 	lp->old_link = 0;
 	lp->old_duplex = -1;
 

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 44f6e48..4f910c4 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c

@@ -691,7 +691,7 @@
 			}
 			bytes_compl += skb->len;
 			pkts_compl++;
-			dev_kfree_skb_irq(skb);
+			dev_consume_skb_irq(skb);
 		}
 
 		cp->tx_skb[tx_tail] = NULL;

diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index ffd68a7..55d0126 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c

@@ -258,6 +258,7 @@
 	{0x126c, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 	{0x1743, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 	{0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
+	{0x16ec, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 
 #ifdef CONFIG_SH_SECUREEDGE5410
 	/* Bogus 8139 silicon reports 8129 without external PROM :-( */
@@ -1661,7 +1662,7 @@
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
-	synchronize_sched();
+	synchronize_rcu();
 
 	netdev_dbg(dev, "Transmit timeout, status %02x %04x %04x media %02x\n",
 		   RTL_R8(ChipCmd), RTL_R16(IntrStatus),

diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index 96d1b9c..5e0b9d2 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Realtek device configuration
 #
@@ -95,14 +96,19 @@
 	  old RX-reset behavior.  If unsure, say N.
 
 config R8169
-	tristate "Realtek 8169 gigabit ethernet support"
+	tristate "Realtek 8169/8168/8101/8125 ethernet support"
 	depends on PCI
 	select FW_LOADER
 	select CRC32
 	select PHYLIB
 	select REALTEK_PHY
 	---help---
-	  Say Y here if you have a Realtek 8169 PCI Gigabit Ethernet adapter.
+	  Say Y here if you have a Realtek Ethernet adapter belonging to
+	  the following families:
+	  RTL8169 Gigabit Ethernet
+	  RTL8168 Gigabit Ethernet
+	  RTL8101 Fast Ethernet
+	  RTL8125 2.5GBit Ethernet
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called r8169.  This is recommended.

diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile
index 71b1da3..d5304ba 100644
--- a/drivers/net/ethernet/realtek/Makefile
+++ b/drivers/net/ethernet/realtek/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Realtek network device drivers.
 #
@@ -5,4 +6,5 @@
 obj-$(CONFIG_8139CP) += 8139cp.o
 obj-$(CONFIG_8139TOO) += 8139too.o
 obj-$(CONFIG_ATP) += atp.o
+r8169-objs += r8169_main.o r8169_firmware.o
 obj-$(CONFIG_R8169) += r8169.o

diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 7e011c1..58e0ca9 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c

@@ -454,14 +454,14 @@
 {
 	struct net_local *lp = netdev_priv(dev);
 	long ioaddr = dev->base_addr;
-    int i;
+	int i;
 
 	/* Turn off the printer multiplexer on the 8012. */
 	for (i = 0; i < 8; i++)
 		outb(mux_8012[i], ioaddr + PAR_DATA);
 	write_reg_high(ioaddr, CMR1, CMR1h_RESET);
 
-    for (i = 0; i < 6; i++)
+	for (i = 0; i < 6; i++)
 		write_reg_byte(ioaddr, PAR0 + i, dev->dev_addr[i]);
 
 	write_reg_high(ioaddr, CMR2, lp->addr_mode);
@@ -471,18 +471,18 @@
 			   (read_nibble(ioaddr, CMR2_h) >> 3) & 0x0f);
 	}
 
-    write_reg(ioaddr, CMR2, CMR2_IRQOUT);
-    write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE);
+	write_reg(ioaddr, CMR2, CMR2_IRQOUT);
+	write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE);
 
 	/* Enable the interrupt line from the serial port. */
 	outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL);
 
 	/* Unmask the interesting interrupts. */
-    write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK);
-    write_reg_high(ioaddr, IMR, ISRh_RxErr);
+	write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK);
+	write_reg_high(ioaddr, IMR, ISRh_RxErr);
 
 	lp->tx_unit_busy = 0;
-    lp->pac_cnt_in_tx_buf = 0;
+	lp->pac_cnt_in_tx_buf = 0;
 	lp->saved_tx_size = 0;
 }
 
@@ -610,10 +610,12 @@
 	write_reg(ioaddr, CMR2, CMR2_NULL);
 	write_reg(ioaddr, IMR, 0);
 
-	if (net_debug > 5) printk(KERN_DEBUG "%s: In interrupt ", dev->name);
-    while (--boguscount > 0) {
+	if (net_debug > 5)
+		printk(KERN_DEBUG "%s: In interrupt ", dev->name);
+	while (--boguscount > 0) {
 		int status = read_nibble(ioaddr, ISR);
-		if (net_debug > 5) printk("loop status %02x..", status);
+		if (net_debug > 5)
+			printk("loop status %02x..", status);
 
 		if (status & (ISR_RxOK<<3)) {
 			handled = 1;
@@ -640,7 +642,8 @@
 			} while (--boguscount > 0);
 		} else if (status & ((ISR_TxErr + ISR_TxOK)<<3)) {
 			handled = 1;
-			if (net_debug > 6)  printk("handling Tx done..");
+			if (net_debug > 6)
+				printk("handling Tx done..");
 			/* Clear the Tx interrupt.  We should check for too many failures
 			   and reinitialize the adapter. */
 			write_reg(ioaddr, ISR, ISR_TxErr + ISR_TxOK);
@@ -680,7 +683,7 @@
 			break;
 		} else
 			break;
-    }
+	}
 
 	/* This following code fixes a rare (and very difficult to track down)
 	   problem where the adapter forgets its ethernet address. */
@@ -694,7 +697,7 @@
 	}
 
 	/* Tell the adapter that it can go back to using the output line as IRQ. */
-    write_reg(ioaddr, CMR2, CMR2_IRQOUT);
+	write_reg(ioaddr, CMR2, CMR2_IRQOUT);
 	/* Enable the physical interrupt line, which is sure to be low until.. */
 	outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL);
 	/* .. we enable the interrupt sources. */

diff --git a/drivers/net/ethernet/realtek/r8169_firmware.c b/drivers/net/ethernet/realtek/r8169_firmware.c
new file mode 100644
index 0000000..8f54a2c
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_firmware.c

@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* r8169_firmware.c: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include "r8169_firmware.h"
+
+enum rtl_fw_opcode {
+	PHY_READ		= 0x0,
+	PHY_DATA_OR		= 0x1,
+	PHY_DATA_AND		= 0x2,
+	PHY_BJMPN		= 0x3,
+	PHY_MDIO_CHG		= 0x4,
+	PHY_CLEAR_READCOUNT	= 0x7,
+	PHY_WRITE		= 0x8,
+	PHY_READCOUNT_EQ_SKIP	= 0x9,
+	PHY_COMP_EQ_SKIPN	= 0xa,
+	PHY_COMP_NEQ_SKIPN	= 0xb,
+	PHY_WRITE_PREVIOUS	= 0xc,
+	PHY_SKIPN		= 0xd,
+	PHY_DELAY_MS		= 0xe,
+};
+
+struct fw_info {
+	u32	magic;
+	char	version[RTL_VER_SIZE];
+	__le32	fw_start;
+	__le32	fw_len;
+	u8	chksum;
+} __packed;
+
+#define FW_OPCODE_SIZE	sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code))
+
+static bool rtl_fw_format_ok(struct rtl_fw *rtl_fw)
+{
+	const struct firmware *fw = rtl_fw->fw;
+	struct fw_info *fw_info = (struct fw_info *)fw->data;
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+
+	if (fw->size < FW_OPCODE_SIZE)
+		return false;
+
+	if (!fw_info->magic) {
+		size_t i, size, start;
+		u8 checksum = 0;
+
+		if (fw->size < sizeof(*fw_info))
+			return false;
+
+		for (i = 0; i < fw->size; i++)
+			checksum += fw->data[i];
+		if (checksum != 0)
+			return false;
+
+		start = le32_to_cpu(fw_info->fw_start);
+		if (start > fw->size)
+			return false;
+
+		size = le32_to_cpu(fw_info->fw_len);
+		if (size > (fw->size - start) / FW_OPCODE_SIZE)
+			return false;
+
+		strscpy(rtl_fw->version, fw_info->version, RTL_VER_SIZE);
+
+		pa->code = (__le32 *)(fw->data + start);
+		pa->size = size;
+	} else {
+		if (fw->size % FW_OPCODE_SIZE)
+			return false;
+
+		strscpy(rtl_fw->version, rtl_fw->fw_name, RTL_VER_SIZE);
+
+		pa->code = (__le32 *)fw->data;
+		pa->size = fw->size / FW_OPCODE_SIZE;
+	}
+
+	return true;
+}
+
+static bool rtl_fw_data_ok(struct rtl_fw *rtl_fw)
+{
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	size_t index;
+
+	for (index = 0; index < pa->size; index++) {
+		u32 action = le32_to_cpu(pa->code[index]);
+		u32 regno = (action & 0x0fff0000) >> 16;
+
+		switch (action >> 28) {
+		case PHY_READ:
+		case PHY_DATA_OR:
+		case PHY_DATA_AND:
+		case PHY_MDIO_CHG:
+		case PHY_CLEAR_READCOUNT:
+		case PHY_WRITE:
+		case PHY_WRITE_PREVIOUS:
+		case PHY_DELAY_MS:
+			break;
+
+		case PHY_BJMPN:
+			if (regno > index)
+				goto out;
+			break;
+		case PHY_READCOUNT_EQ_SKIP:
+			if (index + 2 >= pa->size)
+				goto out;
+			break;
+		case PHY_COMP_EQ_SKIPN:
+		case PHY_COMP_NEQ_SKIPN:
+		case PHY_SKIPN:
+			if (index + 1 + regno >= pa->size)
+				goto out;
+			break;
+
+		default:
+			dev_err(rtl_fw->dev, "Invalid action 0x%08x\n", action);
+			return false;
+		}
+	}
+
+	return true;
+out:
+	dev_err(rtl_fw->dev, "Out of range of firmware\n");
+	return false;
+}
+
+void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+{
+	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
+	rtl_fw_write_t fw_write = rtl_fw->phy_write;
+	rtl_fw_read_t fw_read = rtl_fw->phy_read;
+	int predata = 0, count = 0;
+	size_t index;
+
+	for (index = 0; index < pa->size; index++) {
+		u32 action = le32_to_cpu(pa->code[index]);
+		u32 data = action & 0x0000ffff;
+		u32 regno = (action & 0x0fff0000) >> 16;
+		enum rtl_fw_opcode opcode = action >> 28;
+
+		if (!action)
+			break;
+
+		switch (opcode) {
+		case PHY_READ:
+			predata = fw_read(tp, regno);
+			count++;
+			break;
+		case PHY_DATA_OR:
+			predata |= data;
+			break;
+		case PHY_DATA_AND:
+			predata &= data;
+			break;
+		case PHY_BJMPN:
+			index -= (regno + 1);
+			break;
+		case PHY_MDIO_CHG:
+			if (data == 0) {
+				fw_write = rtl_fw->phy_write;
+				fw_read = rtl_fw->phy_read;
+			} else if (data == 1) {
+				fw_write = rtl_fw->mac_mcu_write;
+				fw_read = rtl_fw->mac_mcu_read;
+			}
+
+			break;
+		case PHY_CLEAR_READCOUNT:
+			count = 0;
+			break;
+		case PHY_WRITE:
+			fw_write(tp, regno, data);
+			break;
+		case PHY_READCOUNT_EQ_SKIP:
+			if (count == data)
+				index++;
+			break;
+		case PHY_COMP_EQ_SKIPN:
+			if (predata == data)
+				index += regno;
+			break;
+		case PHY_COMP_NEQ_SKIPN:
+			if (predata != data)
+				index += regno;
+			break;
+		case PHY_WRITE_PREVIOUS:
+			fw_write(tp, regno, predata);
+			break;
+		case PHY_SKIPN:
+			index += regno;
+			break;
+		case PHY_DELAY_MS:
+			mdelay(data);
+			break;
+		}
+	}
+}
+
+void rtl_fw_release_firmware(struct rtl_fw *rtl_fw)
+{
+	release_firmware(rtl_fw->fw);
+}
+
+int rtl_fw_request_firmware(struct rtl_fw *rtl_fw)
+{
+	int rc;
+
+	rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, rtl_fw->dev);
+	if (rc < 0)
+		goto out;
+
+	if (!rtl_fw_format_ok(rtl_fw) || !rtl_fw_data_ok(rtl_fw)) {
+		release_firmware(rtl_fw->fw);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+out:
+	dev_err(rtl_fw->dev, "Unable to load firmware %s (%d)\n",
+		rtl_fw->fw_name, rc);
+	return rc;
+}

diff --git a/drivers/net/ethernet/realtek/r8169_firmware.h b/drivers/net/ethernet/realtek/r8169_firmware.h
new file mode 100644
index 0000000..7dc348e
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_firmware.h

@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* r8169_firmware.h: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+
+struct rtl8169_private;
+typedef void (*rtl_fw_write_t)(struct rtl8169_private *tp, int reg, int val);
+typedef int (*rtl_fw_read_t)(struct rtl8169_private *tp, int reg);
+
+#define RTL_VER_SIZE		32
+
+struct rtl_fw {
+	rtl_fw_write_t phy_write;
+	rtl_fw_read_t phy_read;
+	rtl_fw_write_t mac_mcu_write;
+	rtl_fw_read_t mac_mcu_read;
+	const struct firmware *fw;
+	const char *fw_name;
+	struct device *dev;
+
+	char version[RTL_VER_SIZE];
+
+	struct rtl_fw_phy_action {
+		__le32 *code;
+		size_t size;
+	} phy_action;
+};
+
+int rtl_fw_request_firmware(struct rtl_fw *rtl_fw);
+void rtl_fw_release_firmware(struct rtl_fw *rtl_fw);
+void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw);

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169_main.c
similarity index 68%
rename from drivers/net/ethernet/realtek/r8169.c
rename to drivers/net/ethernet/realtek/r8169_main.c
index 4930e03..1d67eee 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * r8169.c: RealTek 8169/8168/8101 ethernet driver.
  *
@@ -26,11 +27,12 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
-#include <linux/firmware.h>
 #include <linux/prefetch.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
+#include "r8169_firmware.h"
+
 #define MODULENAME "r8169"
 
 #define FIRMWARE_8168D_1	"rtl_nic/rtl8168d-1.fw"
@@ -52,20 +54,14 @@
 #define FIRMWARE_8168H_2	"rtl_nic/rtl8168h-2.fw"
 #define FIRMWARE_8107E_1	"rtl_nic/rtl8107e-1.fw"
 #define FIRMWARE_8107E_2	"rtl_nic/rtl8107e-2.fw"
+#define FIRMWARE_8125A_3	"rtl_nic/rtl8125a-3.fw"
 
 #define R8169_MSG_DEFAULT \
 	(NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN)
 
-#define TX_SLOTS_AVAIL(tp) \
-	(tp->dirty_tx + NUM_TX_DESC - tp->cur_tx)
-
-/* A skbuff with nr_frags needs nr_frags+1 entries in the tx queue */
-#define TX_FRAGS_READY_FOR(tp,nr_frags) \
-	(TX_SLOTS_AVAIL(tp) >= (nr_frags + 1))
-
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
    The RTL chips use a 64 element hash table based on the Ethernet CRC. */
-static const int multicast_filter_limit = 32;
+#define	MC_FILTER_LIMIT	32
 
 #define TX_DMA_BURST	7	/* Maximum PCI burst, '7' is unlimited */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
@@ -77,7 +73,7 @@
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
 #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 
-#define RTL8169_TX_TIMEOUT	(6*HZ)
+#define RTL_CFG_NO_GBIT	1
 
 /* write/read MMIO register */
 #define RTL_W8(tp, reg, val8)	writeb((val8), tp->mmio_addr + (reg))
@@ -88,7 +84,7 @@
 #define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
-	RTL_GIGA_MAC_VER_01 = 0,
+	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
 	RTL_GIGA_MAC_VER_02,
 	RTL_GIGA_MAC_VER_03,
 	RTL_GIGA_MAC_VER_04,
@@ -139,7 +135,9 @@
 	RTL_GIGA_MAC_VER_49,
 	RTL_GIGA_MAC_VER_50,
 	RTL_GIGA_MAC_VER_51,
-	RTL_GIGA_MAC_NONE   = 0xff,
+	RTL_GIGA_MAC_VER_60,
+	RTL_GIGA_MAC_VER_61,
+	RTL_GIGA_MAC_NONE
 };
 
 #define JUMBO_1K	ETH_DATA_LEN
@@ -153,7 +151,6 @@
 	const char *fw_name;
 } rtl_chip_infos[] = {
 	/* PCI devices. */
-	[RTL_GIGA_MAC_VER_01] = {"RTL8169"				},
 	[RTL_GIGA_MAC_VER_02] = {"RTL8169s"				},
 	[RTL_GIGA_MAC_VER_03] = {"RTL8110s"				},
 	[RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb"			},
@@ -162,7 +159,7 @@
 	/* PCI-E devices. */
 	[RTL_GIGA_MAC_VER_07] = {"RTL8102e"				},
 	[RTL_GIGA_MAC_VER_08] = {"RTL8102e"				},
-	[RTL_GIGA_MAC_VER_09] = {"RTL8102e"				},
+	[RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e"			},
 	[RTL_GIGA_MAC_VER_10] = {"RTL8101e"				},
 	[RTL_GIGA_MAC_VER_11] = {"RTL8168b/8111b"			},
 	[RTL_GIGA_MAC_VER_12] = {"RTL8168b/8111b"			},
@@ -195,9 +192,9 @@
 	[RTL_GIGA_MAC_VER_39] = {"RTL8106e",		FIRMWARE_8106E_1},
 	[RTL_GIGA_MAC_VER_40] = {"RTL8168g/8111g",	FIRMWARE_8168G_2},
 	[RTL_GIGA_MAC_VER_41] = {"RTL8168g/8111g"			},
-	[RTL_GIGA_MAC_VER_42] = {"RTL8168g/8111g",	FIRMWARE_8168G_3},
-	[RTL_GIGA_MAC_VER_43] = {"RTL8106e",		FIRMWARE_8106E_2},
-	[RTL_GIGA_MAC_VER_44] = {"RTL8411",		FIRMWARE_8411_2 },
+	[RTL_GIGA_MAC_VER_42] = {"RTL8168gu/8111gu",	FIRMWARE_8168G_3},
+	[RTL_GIGA_MAC_VER_43] = {"RTL8106eus",		FIRMWARE_8106E_2},
+	[RTL_GIGA_MAC_VER_44] = {"RTL8411b",		FIRMWARE_8411_2 },
 	[RTL_GIGA_MAC_VER_45] = {"RTL8168h/8111h",	FIRMWARE_8168H_1},
 	[RTL_GIGA_MAC_VER_46] = {"RTL8168h/8111h",	FIRMWARE_8168H_2},
 	[RTL_GIGA_MAC_VER_47] = {"RTL8107e",		FIRMWARE_8107E_1},
@@ -205,38 +202,35 @@
 	[RTL_GIGA_MAC_VER_49] = {"RTL8168ep/8111ep"			},
 	[RTL_GIGA_MAC_VER_50] = {"RTL8168ep/8111ep"			},
 	[RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep"			},
-};
-
-enum cfg_version {
-	RTL_CFG_0 = 0x00,
-	RTL_CFG_1,
-	RTL_CFG_2
+	[RTL_GIGA_MAC_VER_60] = {"RTL8125"				},
+	[RTL_GIGA_MAC_VER_61] = {"RTL8125",		FIRMWARE_8125A_3},
 };
 
 static const struct pci_device_id rtl8169_pci_tbl[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8129), 0, 0, RTL_CFG_0 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8136), 0, 0, RTL_CFG_2 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8161), 0, 0, RTL_CFG_1 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8167), 0, 0, RTL_CFG_0 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8168), 0, 0, RTL_CFG_1 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_NCUBE,	0x8168), 0, 0, RTL_CFG_1 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8169), 0, 0, RTL_CFG_0 },
-	{ PCI_VENDOR_ID_DLINK,			0x4300,
-		PCI_VENDOR_ID_DLINK, 0x4b10,		 0, 0, RTL_CFG_1 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4300), 0, 0, RTL_CFG_0 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4302), 0, 0, RTL_CFG_0 },
-	{ PCI_DEVICE(PCI_VENDOR_ID_AT,		0xc107), 0, 0, RTL_CFG_0 },
-	{ PCI_DEVICE(0x16ec,			0x0116), 0, 0, RTL_CFG_0 },
-	{ PCI_VENDOR_ID_LINKSYS,		0x1032,
-		PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 },
-	{ 0x0001,				0x8168,
-		PCI_ANY_ID, 0x2410, 0, 0, RTL_CFG_2 },
-	{0,},
+	{ PCI_VDEVICE(REALTEK,	0x2502) },
+	{ PCI_VDEVICE(REALTEK,	0x2600) },
+	{ PCI_VDEVICE(REALTEK,	0x8129) },
+	{ PCI_VDEVICE(REALTEK,	0x8136), RTL_CFG_NO_GBIT },
+	{ PCI_VDEVICE(REALTEK,	0x8161) },
+	{ PCI_VDEVICE(REALTEK,	0x8167) },
+	{ PCI_VDEVICE(REALTEK,	0x8168) },
+	{ PCI_VDEVICE(NCUBE,	0x8168) },
+	{ PCI_VDEVICE(REALTEK,	0x8169) },
+	{ PCI_VENDOR_ID_DLINK,	0x4300,
+		PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0 },
+	{ PCI_VDEVICE(DLINK,	0x4300) },
+	{ PCI_VDEVICE(DLINK,	0x4302) },
+	{ PCI_VDEVICE(AT,	0xc107) },
+	{ PCI_VDEVICE(USR,	0x0116) },
+	{ PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024 },
+	{ 0x0001, 0x8168, PCI_ANY_ID, 0x2410 },
+	{ PCI_VDEVICE(REALTEK,	0x8125) },
+	{ PCI_VDEVICE(REALTEK,	0x3000) },
+	{}
 };
 
 MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
 
-static int use_dac = -1;
 static struct {
 	u32 msg_enable;
 } debug = { -1 };
@@ -283,7 +277,6 @@
 	Config3		= 0x54,
 	Config4		= 0x55,
 	Config5		= 0x56,
-	MultiIntr	= 0x5c,
 	PHYAR		= 0x60,
 	PHYstatus	= 0x6c,
 	RxMaxSize	= 0xda,
@@ -397,6 +390,19 @@
 #define EARLY_TALLY_EN			(1 << 16)
 };
 
+enum rtl8125_registers {
+	IntrMask_8125		= 0x38,
+	IntrStatus_8125		= 0x3c,
+	TxPoll_8125		= 0x90,
+	MAC0_BKP		= 0x19e0,
+};
+
+#define RX_VLAN_INNER_8125	BIT(22)
+#define RX_VLAN_OUTER_8125	BIT(23)
+#define RX_VLAN_8125		(RX_VLAN_INNER_8125 | RX_VLAN_OUTER_8125)
+
+#define RX_FETCH_DFLT_8125	(8 << 27)
+
 enum rtl_register_content {
 	/* InterruptStatusBits */
 	SYSErr		= 0x8000,
@@ -412,8 +418,6 @@
 	RxOK		= 0x0001,
 
 	/* RxStatusDesc */
-	RxBOVF	= (1 << 24),
-	RxFOVF	= (1 << 23),
 	RxRWT	= (1 << 22),
 	RxRES	= (1 << 21),
 	RxRUNT	= (1 << 20),
@@ -498,10 +502,7 @@
 	PCIDAC		= (1 << 4),
 	PCIMulRW	= (1 << 3),
 #define INTT_MASK	GENMASK(1, 0)
-	INTT_0		= 0x0000,	// 8168
-	INTT_1		= 0x0001,	// 8168
-	INTT_2		= 0x0002,	// 8168
-	INTT_3		= 0x0003,	// 8168
+#define CPCMD_MASK	(Normal_mode | RxVlan | RxChkSum | INTT_MASK)
 
 	/* rtl8169_PHYstatus */
 	TBI_Enable	= 0x80,
@@ -513,9 +514,6 @@
 	LinkStatus	= 0x02,
 	FullDup		= 0x01,
 
-	/* _TBICSRBit */
-	TBILinkOK	= 0x02000000,
-
 	/* ResetCounterCommand */
 	CounterReset	= 0x1,
 
@@ -559,11 +557,11 @@
 	TD1_GTSENV4	= (1 << 26),		/* Giant Send for IPv4 */
 	TD1_GTSENV6	= (1 << 25),		/* Giant Send for IPv6 */
 #define GTTCPHO_SHIFT			18
-#define GTTCPHO_MAX			0x7fU
+#define GTTCPHO_MAX			0x7f
 
 	/* Second doubleword. */
 #define TCPHO_SHIFT			18
-#define TCPHO_MAX			0x3ffU
+#define TCPHO_MAX			0x3ff
 #define TD1_MSS_SHIFT			18	/* MSS position (11 bits) */
 	TD1_IPv6_CS	= (1 << 28),		/* Calculate IPv6 checksum */
 	TD1_IPv4_CS	= (1 << 29),		/* Calculate IPv4 checksum */
@@ -588,7 +586,11 @@
 };
 
 #define RsvdMask	0x3fffc000
-#define CPCMD_QUIRK_MASK	(Normal_mode | RxVlan | RxChkSum | INTT_MASK)
+
+#define RTL_GSO_MAX_SIZE_V1	32000
+#define RTL_GSO_MAX_SEGS_V1	24
+#define RTL_GSO_MAX_SIZE_V2	64000
+#define RTL_GSO_MAX_SEGS_V2	64
 
 struct TxDesc {
 	__le32 opts1;
@@ -605,7 +607,6 @@
 struct ring_info {
 	struct sk_buff	*skb;
 	u32		len;
-	u8		__pad[sizeof(void *) - sizeof(u32)];
 };
 
 struct rtl8169_counters {
@@ -633,7 +634,6 @@
 
 enum rtl_flag {
 	RTL_FLAG_TASK_ENABLED = 0,
-	RTL_FLAG_TASK_SLOW_PENDING,
 	RTL_FLAG_TASK_RESET_PENDING,
 	RTL_FLAG_MAX
 };
@@ -648,9 +648,10 @@
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
 	struct net_device *dev;
+	struct phy_device *phydev;
 	struct napi_struct napi;
 	u32 msg_enable;
-	u16 mac_version;
+	enum mac_version mac_version;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
@@ -660,63 +661,39 @@
 	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
 	dma_addr_t TxPhyAddr;
 	dma_addr_t RxPhyAddr;
-	void *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
+	struct page *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
 	u16 cp_cmd;
-
-	u16 event_slow;
-	const struct rtl_coalesce_info *coalesce_info;
+	u32 irq_mask;
 	struct clk *clk;
 
-	struct mdio_ops {
-		void (*write)(struct rtl8169_private *, int, int);
-		int (*read)(struct rtl8169_private *, int);
-	} mdio_ops;
-
-	struct jumbo_ops {
-		void (*enable)(struct rtl8169_private *);
-		void (*disable)(struct rtl8169_private *);
-	} jumbo_ops;
-
-	void (*hw_start)(struct rtl8169_private *tp);
-	bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
-
 	struct {
 		DECLARE_BITMAP(flags, RTL_FLAG_MAX);
 		struct mutex mutex;
 		struct work_struct work;
 	} wk;
 
+	unsigned irq_enabled:1;
 	unsigned supports_gmii:1;
-	struct mii_bus *mii_bus;
+	unsigned aspm_manageable:1;
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
 	u32 saved_wolopts;
 
-	struct rtl_fw {
-		const struct firmware *fw;
-
-#define RTL_VER_SIZE		32
-
-		char version[RTL_VER_SIZE];
-
-		struct rtl_fw_phy_action {
-			__le32 *code;
-			size_t size;
-		} phy_action;
-	} *rtl_fw;
-#define RTL_FIRMWARE_UNKNOWN	ERR_PTR(-EAGAIN)
+	const char *fw_name;
+	struct rtl_fw *rtl_fw;
 
 	u32 ocp_base;
 };
 
+typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
+
 MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
 MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
-module_param(use_dac, int, 0);
-MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot.");
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)");
+MODULE_SOFTDEP("pre: realtek");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(FIRMWARE_8168D_1);
 MODULE_FIRMWARE(FIRMWARE_8168D_2);
@@ -737,6 +714,7 @@
 MODULE_FIRMWARE(FIRMWARE_8168H_2);
 MODULE_FIRMWARE(FIRMWARE_8107E_1);
 MODULE_FIRMWARE(FIRMWARE_8107E_2);
+MODULE_FIRMWARE(FIRMWARE_8125A_3);
 
 static inline struct device *tp_to_dev(struct rtl8169_private *tp)
 {
@@ -753,12 +731,49 @@
 	mutex_unlock(&tp->wk.mutex);
 }
 
+static void rtl_lock_config_regs(struct rtl8169_private *tp)
+{
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+}
+
+static void rtl_unlock_config_regs(struct rtl8169_private *tp)
+{
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+}
+
 static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 {
 	pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					   PCI_EXP_DEVCTL_READRQ, force);
 }
 
+static bool rtl_is_8125(struct rtl8169_private *tp)
+{
+	return tp->mac_version >= RTL_GIGA_MAC_VER_60;
+}
+
+static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
+{
+	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
+	       tp->mac_version != RTL_GIGA_MAC_VER_39 &&
+	       tp->mac_version <= RTL_GIGA_MAC_VER_51;
+}
+
+static bool rtl_supports_eee(struct rtl8169_private *tp)
+{
+	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
+	       tp->mac_version != RTL_GIGA_MAC_VER_37 &&
+	       tp->mac_version != RTL_GIGA_MAC_VER_39;
+}
+
+static void rtl_read_mac_from_reg(struct rtl8169_private *tp, u8 *mac, int reg)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac[i] = RTL_R8(tp, reg + i);
+}
+
 struct rtl_cond {
 	bool (*check)(struct rtl8169_private *);
 	const char *msg;
@@ -776,9 +791,9 @@
 	int i;
 
 	for (i = 0; i < n; i++) {
-		delay(d);
 		if (c->check(tp) == high)
 			return true;
+		delay(d);
 	}
 	netif_err(tp, drv, tp->dev, "%s == %d (loop: %d, delay: %d).\n",
 		  c->msg, !high, n, d);
@@ -847,7 +862,7 @@
 	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
-static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
+static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
@@ -855,7 +870,7 @@
 	RTL_W32(tp, GPHY_OCP, reg << 15);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-		(RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
+		(RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
@@ -876,6 +891,14 @@
 	return RTL_R32(tp, OCPDR);
 }
 
+static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
+				 u16 set)
+{
+	u16 data = r8168_mac_ocp_read(tp, reg);
+
+	r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
+}
+
 #define OCP_STD_PHY_BASE	0xa400
 
 static void r8168g_mdio_write(struct rtl8169_private *tp, int reg, int value)
@@ -893,6 +916,9 @@
 
 static int r8168g_mdio_read(struct rtl8169_private *tp, int reg)
 {
+	if (reg == 0x1f)
+		return tp->ocp_base == OCP_STD_PHY_BASE ? 0 : tp->ocp_base >> 4;
+
 	if (tp->ocp_base != OCP_STD_PHY_BASE)
 		reg -= 0x10;
 
@@ -938,7 +964,7 @@
 	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
 	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-		RTL_R32(tp, PHYAR) & 0xffff : ~0;
+		RTL_R32(tp, PHYAR) & 0xffff : -ETIMEDOUT;
 
 	/*
 	 * According to hardware specs a 20us delay is required after read
@@ -978,7 +1004,7 @@
 	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
+		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : -ETIMEDOUT;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT	0x00020000
@@ -1006,6 +1032,10 @@
 {
 	int value;
 
+	/* Work around issue with chip reporting wrong PHY ID */
+	if (reg == MII_PHYSID2)
+		return 0xc912;
+
 	r8168dp_2_mdio_start(tp);
 
 	value = r8169_mdio_read(tp, reg);
@@ -1015,14 +1045,38 @@
 	return value;
 }
 
-static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val)
+static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
 {
-	tp->mdio_ops.write(tp, location, val);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_27:
+		r8168dp_1_mdio_write(tp, location, val);
+		break;
+	case RTL_GIGA_MAC_VER_28:
+	case RTL_GIGA_MAC_VER_31:
+		r8168dp_2_mdio_write(tp, location, val);
+		break;
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
+		r8168g_mdio_write(tp, location, val);
+		break;
+	default:
+		r8169_mdio_write(tp, location, val);
+		break;
+	}
 }
 
 static int rtl_readphy(struct rtl8169_private *tp, int location)
 {
-	return tp->mdio_ops.read(tp, location);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_27:
+		return r8168dp_1_mdio_read(tp, location);
+	case RTL_GIGA_MAC_VER_28:
+	case RTL_GIGA_MAC_VER_31:
+		return r8168dp_2_mdio_read(tp, location);
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
+		return r8168g_mdio_read(tp, location);
+	default:
+		return r8169_mdio_read(tp, location);
+	}
 }
 
 static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value)
@@ -1066,8 +1120,8 @@
 	return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
-static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
-			  u32 val, int type)
+static void _rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
+			   u32 val, int type)
 {
 	BUG_ON((addr & 3) || (mask == 0));
 	RTL_W32(tp, ERIDR, val);
@@ -1076,7 +1130,13 @@
 	rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
-static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
+static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
+			  u32 val)
+{
+	_rtl_eri_write(tp, addr, mask, val, ERIAR_EXGMAC);
+}
+
+static u32 _rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
 	RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
@@ -1084,13 +1144,30 @@
 		RTL_R32(tp, ERIDR) : ~0;
 }
 
+static u32 rtl_eri_read(struct rtl8169_private *tp, int addr)
+{
+	return _rtl_eri_read(tp, addr, ERIAR_EXGMAC);
+}
+
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
-			 u32 m, int type)
+			 u32 m)
 {
 	u32 val;
 
-	val = rtl_eri_read(tp, addr, type);
-	rtl_eri_write(tp, addr, mask, (val & ~m) | p, type);
+	val = rtl_eri_read(tp, addr);
+	rtl_eri_write(tp, addr, mask, (val & ~m) | p);
+}
+
+static void rtl_eri_set_bits(struct rtl8169_private *tp, int addr, u32 mask,
+			     u32 p)
+{
+	rtl_w0w1_eri(tp, addr, mask, p, 0);
+}
+
+static void rtl_eri_clear_bits(struct rtl8169_private *tp, int addr, u32 mask,
+			       u32 m)
+{
+	rtl_w0w1_eri(tp, addr, mask, 0, m);
 }
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1102,24 +1179,7 @@
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-	return rtl_eri_read(tp, reg, ERIAR_OOB);
-}
-
-static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_27:
-	case RTL_GIGA_MAC_VER_28:
-	case RTL_GIGA_MAC_VER_31:
-		return r8168dp_ocp_read(tp, mask, reg);
-	case RTL_GIGA_MAC_VER_49:
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		return r8168ep_ocp_read(tp, mask, reg);
-	default:
-		BUG();
-		return ~0;
-	}
+	return _rtl_eri_read(tp, reg, ERIAR_OOB);
 }
 
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
@@ -1133,34 +1193,15 @@
 static void r8168ep_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
 			      u32 data)
 {
-	rtl_eri_write(tp, reg, ((u32)mask & 0x0f) << ERIAR_MASK_SHIFT,
-		      data, ERIAR_OOB);
+	_rtl_eri_write(tp, reg, ((u32)mask & 0x0f) << ERIAR_MASK_SHIFT,
+		       data, ERIAR_OOB);
 }
 
-static void ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg, u32 data)
+static void r8168dp_oob_notify(struct rtl8169_private *tp, u8 cmd)
 {
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_27:
-	case RTL_GIGA_MAC_VER_28:
-	case RTL_GIGA_MAC_VER_31:
-		r8168dp_ocp_write(tp, mask, reg, data);
-		break;
-	case RTL_GIGA_MAC_VER_49:
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		r8168ep_ocp_write(tp, mask, reg, data);
-		break;
-	default:
-		BUG();
-		break;
-	}
-}
+	rtl_eri_write(tp, 0xe8, ERIAR_MASK_0001, cmd);
 
-static void rtl8168_oob_notify(struct rtl8169_private *tp, u8 cmd)
-{
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_0001, cmd, ERIAR_EXGMAC);
-
-	ocp_write(tp, 0x1, 0x30, 0x00000001);
+	r8168dp_ocp_write(tp, 0x1, 0x30, 0x00000001);
 }
 
 #define OOB_CMD_RESET		0x00
@@ -1172,18 +1213,18 @@
 	return (tp->mac_version == RTL_GIGA_MAC_VER_31) ? 0xb8 : 0x10;
 }
 
-DECLARE_RTL_COND(rtl_ocp_read_cond)
+DECLARE_RTL_COND(rtl_dp_ocp_read_cond)
 {
 	u16 reg;
 
 	reg = rtl8168_get_ocp_reg(tp);
 
-	return ocp_read(tp, 0x0f, reg) & 0x00000800;
+	return r8168dp_ocp_read(tp, 0x0f, reg) & 0x00000800;
 }
 
 DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 {
-	return ocp_read(tp, 0x0f, 0x124) & 0x00000001;
+	return r8168ep_ocp_read(tp, 0x0f, 0x124) & 0x00000001;
 }
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
@@ -1201,14 +1242,15 @@
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
 {
-	rtl8168_oob_notify(tp, OOB_CMD_DRIVER_START);
-	rtl_msleep_loop_wait_high(tp, &rtl_ocp_read_cond, 10, 10);
+	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
+	rtl_msleep_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10, 10);
 }
 
 static void rtl8168ep_driver_start(struct rtl8169_private *tp)
 {
-	ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
-	ocp_write(tp, 0x01, 0x30, ocp_read(tp, 0x01, 0x30) | 0x01);
+	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
+	r8168ep_ocp_write(tp, 0x01, 0x30,
+			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
 	rtl_msleep_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10, 10);
 }
 
@@ -1233,15 +1275,16 @@
 
 static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
 {
-	rtl8168_oob_notify(tp, OOB_CMD_DRIVER_STOP);
-	rtl_msleep_loop_wait_low(tp, &rtl_ocp_read_cond, 10, 10);
+	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
+	rtl_msleep_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10, 10);
 }
 
 static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
 {
 	rtl8168ep_stop_cmac(tp);
-	ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
-	ocp_write(tp, 0x01, 0x30, ocp_read(tp, 0x01, 0x30) | 0x01);
+	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
+	r8168ep_ocp_write(tp, 0x01, 0x30,
+			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
 	rtl_msleep_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10, 10);
 }
 
@@ -1268,12 +1311,12 @@
 {
 	u16 reg = rtl8168_get_ocp_reg(tp);
 
-	return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
+	return !!(r8168dp_ocp_read(tp, 0x0f, reg) & 0x00008000);
 }
 
 static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-	return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
+	return !!(r8168ep_ocp_read(tp, 0x0f, 0x128) & 0x00000001);
 }
 
 static bool r8168_check_dash(struct rtl8169_private *tp)
@@ -1292,19 +1335,10 @@
 	}
 }
 
-struct exgmac_reg {
-	u16 addr;
-	u16 mask;
-	u32 val;
-};
-
-static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
-				   const struct exgmac_reg *r, int len)
+static void rtl_reset_packet_filter(struct rtl8169_private *tp)
 {
-	while (len-- > 0) {
-		rtl_eri_write(tp, r->addr, r->mask, r->val, ERIAR_EXGMAC);
-		r++;
-	}
+	rtl_eri_clear_bits(tp, 0xdc, ERIAR_MASK_0001, BIT(0));
+	rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_0001, BIT(0));
 }
 
 DECLARE_RTL_COND(rtl_efusear_cond)
@@ -1320,48 +1354,56 @@
 		RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
-static u16 rtl_get_events(struct rtl8169_private *tp)
+static u32 rtl_get_events(struct rtl8169_private *tp)
 {
-	return RTL_R16(tp, IntrStatus);
+	if (rtl_is_8125(tp))
+		return RTL_R32(tp, IntrStatus_8125);
+	else
+		return RTL_R16(tp, IntrStatus);
 }
 
-static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
+static void rtl_ack_events(struct rtl8169_private *tp, u32 bits)
 {
-	RTL_W16(tp, IntrStatus, bits);
-	mmiowb();
+	if (rtl_is_8125(tp))
+		RTL_W32(tp, IntrStatus_8125, bits);
+	else
+		RTL_W16(tp, IntrStatus, bits);
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	RTL_W16(tp, IntrMask, 0);
-	mmiowb();
-}
-
-static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
-{
-	RTL_W16(tp, IntrMask, bits);
+	if (rtl_is_8125(tp))
+		RTL_W32(tp, IntrMask_8125, 0);
+	else
+		RTL_W16(tp, IntrMask, 0);
+	tp->irq_enabled = 0;
 }
 
 #define RTL_EVENT_NAPI_RX	(RxOK | RxErr)
 #define RTL_EVENT_NAPI_TX	(TxOK | TxErr)
 #define RTL_EVENT_NAPI		(RTL_EVENT_NAPI_RX | RTL_EVENT_NAPI_TX)
 
-static void rtl_irq_enable_all(struct rtl8169_private *tp)
+static void rtl_irq_enable(struct rtl8169_private *tp)
 {
-	rtl_irq_enable(tp, RTL_EVENT_NAPI | tp->event_slow);
+	tp->irq_enabled = 1;
+	if (rtl_is_8125(tp))
+		RTL_W32(tp, IntrMask_8125, tp->irq_mask);
+	else
+		RTL_W16(tp, IntrMask, tp->irq_mask);
 }
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
 	rtl_irq_disable(tp);
-	rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
+	rtl_ack_events(tp, 0xffffffff);
+	/* PCI commit */
 	RTL_R8(tp, ChipCmd);
 }
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
 	struct net_device *dev = tp->dev;
-	struct phy_device *phydev = dev->phydev;
+	struct phy_device *phydev = tp->phydev;
 
 	if (!netif_running(dev))
 		return;
@@ -1369,89 +1411,37 @@
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_38) {
 		if (phydev->speed == SPEED_1000) {
-			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005);
 		} else if (phydev->speed == SPEED_100) {
-			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005);
 		} else {
-			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x0000003f,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x0000003f);
 		}
-		/* Reset packet filter */
-		rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01,
-			     ERIAR_EXGMAC);
-		rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00,
-			     ERIAR_EXGMAC);
+		rtl_reset_packet_filter(tp);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
 		if (phydev->speed == SPEED_1000) {
-			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005);
 		} else {
-			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x0000003f,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x0000003f);
 		}
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
 		if (phydev->speed == SPEED_10) {
-			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
-				      ERIAR_EXGMAC);
-			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02);
+			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060a);
 		} else {
-			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000,
-				      ERIAR_EXGMAC);
+			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
 		}
 	}
 }
 
 #define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST)
 
-static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
-{
-	u8 options;
-	u32 wolopts = 0;
-
-	options = RTL_R8(tp, Config1);
-	if (!(options & PMEnable))
-		return 0;
-
-	options = RTL_R8(tp, Config3);
-	if (options & LinkUp)
-		wolopts |= WAKE_PHY;
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
-			wolopts |= WAKE_MAGIC;
-		break;
-	default:
-		if (options & MagicPacket)
-			wolopts |= WAKE_MAGIC;
-		break;
-	}
-
-	options = RTL_R8(tp, Config5);
-	if (options & UWF)
-		wolopts |= WAKE_UCAST;
-	if (options & BWF)
-		wolopts |= WAKE_BCAST;
-	if (options & MWF)
-		wolopts |= WAKE_MCAST;
-
-	return wolopts;
-}
-
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -1464,7 +1454,6 @@
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-	unsigned int i, tmp;
 	static const struct {
 		u32 opt;
 		u16 reg;
@@ -1477,32 +1466,25 @@
 		{ WAKE_ANY,   Config5, LanWake },
 		{ WAKE_MAGIC, Config3, MagicPacket }
 	};
+	unsigned int i, tmp = ARRAY_SIZE(cfg);
 	u8 options;
 
-	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+	rtl_unlock_config_regs(tp);
 
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		tmp = ARRAY_SIZE(cfg) - 1;
+	if (rtl_is_8168evl_up(tp)) {
+		tmp--;
 		if (wolopts & WAKE_MAGIC)
-			rtl_w0w1_eri(tp,
-				     0x0dc,
-				     ERIAR_MASK_0100,
-				     MagicPacket_v2,
-				     0x0000,
-				     ERIAR_EXGMAC);
+			rtl_eri_set_bits(tp, 0x0dc, ERIAR_MASK_0100,
+					 MagicPacket_v2);
 		else
-			rtl_w0w1_eri(tp,
-				     0x0dc,
-				     ERIAR_MASK_0100,
-				     0x0000,
-				     MagicPacket_v2,
-				     ERIAR_EXGMAC);
-		break;
-	default:
-		tmp = ARRAY_SIZE(cfg);
-		break;
+			rtl_eri_clear_bits(tp, 0x0dc, ERIAR_MASK_0100,
+					   MagicPacket_v2);
+	} else if (rtl_is_8125(tp)) {
+		tmp--;
+		if (wolopts & WAKE_MAGIC)
+			r8168_mac_ocp_modify(tp, 0xc0b6, 0, BIT(0));
+		else
+			r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
 	}
 
 	for (i = 0; i < tmp; i++) {
@@ -1513,21 +1495,28 @@
 	}
 
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
 		RTL_W8(tp, Config1, options);
 		break;
-	default:
+	case RTL_GIGA_MAC_VER_34:
+	case RTL_GIGA_MAC_VER_37:
+	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_51:
 		options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
 		RTL_W8(tp, Config2, options);
 		break;
+	default:
+		break;
 	}
 
-	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+	rtl_lock_config_regs(tp);
+
+	device_set_wakeup_enable(tp_to_dev(tp), wolopts);
+	tp->dev->wol_enabled = wolopts ? 1 : 0;
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1549,18 +1538,11 @@
 
 	rtl_unlock_work(tp);
 
-	device_set_wakeup_enable(d, tp->saved_wolopts);
-
 	pm_runtime_put_noidle(d);
 
 	return 0;
 }
 
-static const char *rtl_lookup_firmware_name(struct rtl8169_private *tp)
-{
-	return rtl_chip_infos[tp->mac_version].fw_name;
-}
-
 static void rtl8169_get_drvinfo(struct net_device *dev,
 				struct ethtool_drvinfo *info)
 {
@@ -1570,7 +1552,7 @@
 	strlcpy(info->driver, MODULENAME, sizeof(info->driver));
 	strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info));
 	BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version));
-	if (!IS_ERR_OR_NULL(rtl_fw))
+	if (rtl_fw)
 		strlcpy(info->fw_version, rtl_fw->version,
 			sizeof(info->fw_version));
 }
@@ -1609,6 +1591,13 @@
 	else
 		rx_config &= ~(AcceptErr | AcceptRunt);
 
+	if (rtl_is_8125(tp)) {
+		if (features & NETIF_F_HW_VLAN_CTAG_RX)
+			rx_config |= RX_VLAN_8125;
+		else
+			rx_config &= ~RX_VLAN_8125;
+	}
+
 	RTL_W32(tp, RxConfig, rx_config);
 
 	if (features & NETIF_F_RXCSUM)
@@ -1616,10 +1605,12 @@
 	else
 		tp->cp_cmd &= ~RxChkSum;
 
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		tp->cp_cmd |= RxVlan;
-	else
-		tp->cp_cmd &= ~RxVlan;
+	if (!rtl_is_8125(tp)) {
+		if (features & NETIF_F_HW_VLAN_CTAG_RX)
+			tp->cp_cmd |= RxVlan;
+		else
+			tp->cp_cmd &= ~RxVlan;
+	}
 
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 	RTL_R16(tp, CPlusCmd);
@@ -1730,11 +1721,13 @@
 
 static bool rtl8169_update_counters(struct rtl8169_private *tp)
 {
+	u8 val = RTL_R8(tp, ChipCmd);
+
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
-	 * is disabled.
+	 * is disabled. If 0xff chip may be in a PCI power-save state.
 	 */
-	if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
+	if (!(val & CmdRxEnb) || val == 0xff)
 		return true;
 
 	return rtl8169_do_counters(tp, CounterDump);
@@ -1886,18 +1879,16 @@
 static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct ethtool_link_ksettings ecmd;
 	const struct rtl_coalesce_info *ci;
-	int rc;
 
-	rc = phy_ethtool_get_link_ksettings(dev, &ecmd);
-	if (rc < 0)
-		return ERR_PTR(rc);
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		ci = rtl_coalesce_info_8169;
+	else
+		ci = rtl_coalesce_info_8168_8136;
 
-	for (ci = tp->coalesce_info; ci->speed != 0; ci++) {
-		if (ecmd.base.speed == ci->speed) {
+	for (; ci->speed; ci++) {
+		if (tp->phydev->speed == ci->speed)
 			return ci;
-		}
 	}
 
 	return ERR_PTR(-ELNRNG);
@@ -1918,6 +1909,9 @@
 	int i;
 	u16 w;
 
+	if (rtl_is_8125(tp))
+		return -EOPNOTSUPP;
+
 	memset(ec, 0, sizeof(*ec));
 
 	/* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */
@@ -1986,6 +1980,9 @@
 	u16 w = 0, cp01;
 	int i;
 
+	if (rtl_is_8125(tp))
+		return -EOPNOTSUPP;
+
 	scale = rtl_coalesce_choose_scale(dev,
 			max(p[0].usecs, p[1].usecs) * 1000, &cp01);
 	if (IS_ERR(scale))
@@ -2033,6 +2030,56 @@
 	return 0;
 }
 
+static int rtl8169_get_eee(struct net_device *dev, struct ethtool_eee *data)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	struct device *d = tp_to_dev(tp);
+	int ret;
+
+	if (!rtl_supports_eee(tp))
+		return -EOPNOTSUPP;
+
+	pm_runtime_get_noresume(d);
+
+	if (!pm_runtime_active(d)) {
+		ret = -EOPNOTSUPP;
+	} else {
+		ret = phy_ethtool_get_eee(tp->phydev, data);
+	}
+
+	pm_runtime_put_noidle(d);
+
+	return ret;
+}
+
+static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	struct device *d = tp_to_dev(tp);
+	int ret;
+
+	if (!rtl_supports_eee(tp))
+		return -EOPNOTSUPP;
+
+	pm_runtime_get_noresume(d);
+
+	if (!pm_runtime_active(d)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (dev->phydev->autoneg == AUTONEG_DISABLE ||
+	    dev->phydev->duplex != DUPLEX_FULL) {
+		ret = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	ret = phy_ethtool_set_eee(tp->phydev, data);
+out:
+	pm_runtime_put_noidle(d);
+	return ret;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_drvinfo		= rtl8169_get_drvinfo,
 	.get_regs_len		= rtl8169_get_regs_len,
@@ -2049,12 +2096,22 @@
 	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.nway_reset		= phy_ethtool_nway_reset,
+	.get_eee		= rtl8169_get_eee,
+	.set_eee		= rtl8169_set_eee,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
-static void rtl8169_get_mac_version(struct rtl8169_private *tp,
-				    u8 default_version)
+static void rtl_enable_eee(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+	int supported = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
+
+	if (supported > 0)
+		phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, supported);
+}
+
+static void rtl8169_get_mac_version(struct rtl8169_private *tp)
 {
 	/*
 	 * The driver currently handles the 8168Bf and the 8168Be identically
@@ -2068,127 +2125,117 @@
 	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
 	 */
 	static const struct rtl_mac_info {
-		u32 mask;
-		u32 val;
-		int mac_version;
+		u16 mask;
+		u16 val;
+		u16 mac_version;
 	} mac_info[] = {
+		/* 8125 family. */
+		{ 0x7cf, 0x608,	RTL_GIGA_MAC_VER_60 },
+		{ 0x7c8, 0x608,	RTL_GIGA_MAC_VER_61 },
+
 		/* 8168EP family. */
-		{ 0x7cf00000, 0x50200000,	RTL_GIGA_MAC_VER_51 },
-		{ 0x7cf00000, 0x50100000,	RTL_GIGA_MAC_VER_50 },
-		{ 0x7cf00000, 0x50000000,	RTL_GIGA_MAC_VER_49 },
+		{ 0x7cf, 0x502,	RTL_GIGA_MAC_VER_51 },
+		{ 0x7cf, 0x501,	RTL_GIGA_MAC_VER_50 },
+		{ 0x7cf, 0x500,	RTL_GIGA_MAC_VER_49 },
 
 		/* 8168H family. */
-		{ 0x7cf00000, 0x54100000,	RTL_GIGA_MAC_VER_46 },
-		{ 0x7cf00000, 0x54000000,	RTL_GIGA_MAC_VER_45 },
+		{ 0x7cf, 0x541,	RTL_GIGA_MAC_VER_46 },
+		{ 0x7cf, 0x540,	RTL_GIGA_MAC_VER_45 },
 
 		/* 8168G family. */
-		{ 0x7cf00000, 0x5c800000,	RTL_GIGA_MAC_VER_44 },
-		{ 0x7cf00000, 0x50900000,	RTL_GIGA_MAC_VER_42 },
-		{ 0x7cf00000, 0x4c100000,	RTL_GIGA_MAC_VER_41 },
-		{ 0x7cf00000, 0x4c000000,	RTL_GIGA_MAC_VER_40 },
+		{ 0x7cf, 0x5c8,	RTL_GIGA_MAC_VER_44 },
+		{ 0x7cf, 0x509,	RTL_GIGA_MAC_VER_42 },
+		{ 0x7cf, 0x4c1,	RTL_GIGA_MAC_VER_41 },
+		{ 0x7cf, 0x4c0,	RTL_GIGA_MAC_VER_40 },
 
 		/* 8168F family. */
-		{ 0x7c800000, 0x48800000,	RTL_GIGA_MAC_VER_38 },
-		{ 0x7cf00000, 0x48100000,	RTL_GIGA_MAC_VER_36 },
-		{ 0x7cf00000, 0x48000000,	RTL_GIGA_MAC_VER_35 },
+		{ 0x7c8, 0x488,	RTL_GIGA_MAC_VER_38 },
+		{ 0x7cf, 0x481,	RTL_GIGA_MAC_VER_36 },
+		{ 0x7cf, 0x480,	RTL_GIGA_MAC_VER_35 },
 
 		/* 8168E family. */
-		{ 0x7c800000, 0x2c800000,	RTL_GIGA_MAC_VER_34 },
-		{ 0x7cf00000, 0x2c100000,	RTL_GIGA_MAC_VER_32 },
-		{ 0x7c800000, 0x2c000000,	RTL_GIGA_MAC_VER_33 },
+		{ 0x7c8, 0x2c8,	RTL_GIGA_MAC_VER_34 },
+		{ 0x7cf, 0x2c1,	RTL_GIGA_MAC_VER_32 },
+		{ 0x7c8, 0x2c0,	RTL_GIGA_MAC_VER_33 },
 
 		/* 8168D family. */
-		{ 0x7cf00000, 0x28100000,	RTL_GIGA_MAC_VER_25 },
-		{ 0x7c800000, 0x28000000,	RTL_GIGA_MAC_VER_26 },
+		{ 0x7cf, 0x281,	RTL_GIGA_MAC_VER_25 },
+		{ 0x7c8, 0x280,	RTL_GIGA_MAC_VER_26 },
 
 		/* 8168DP family. */
-		{ 0x7cf00000, 0x28800000,	RTL_GIGA_MAC_VER_27 },
-		{ 0x7cf00000, 0x28a00000,	RTL_GIGA_MAC_VER_28 },
-		{ 0x7cf00000, 0x28b00000,	RTL_GIGA_MAC_VER_31 },
+		{ 0x7cf, 0x288,	RTL_GIGA_MAC_VER_27 },
+		{ 0x7cf, 0x28a,	RTL_GIGA_MAC_VER_28 },
+		{ 0x7cf, 0x28b,	RTL_GIGA_MAC_VER_31 },
 
 		/* 8168C family. */
-		{ 0x7cf00000, 0x3c900000,	RTL_GIGA_MAC_VER_23 },
-		{ 0x7cf00000, 0x3c800000,	RTL_GIGA_MAC_VER_18 },
-		{ 0x7c800000, 0x3c800000,	RTL_GIGA_MAC_VER_24 },
-		{ 0x7cf00000, 0x3c000000,	RTL_GIGA_MAC_VER_19 },
-		{ 0x7cf00000, 0x3c200000,	RTL_GIGA_MAC_VER_20 },
-		{ 0x7cf00000, 0x3c300000,	RTL_GIGA_MAC_VER_21 },
-		{ 0x7c800000, 0x3c000000,	RTL_GIGA_MAC_VER_22 },
+		{ 0x7cf, 0x3c9,	RTL_GIGA_MAC_VER_23 },
+		{ 0x7cf, 0x3c8,	RTL_GIGA_MAC_VER_18 },
+		{ 0x7c8, 0x3c8,	RTL_GIGA_MAC_VER_24 },
+		{ 0x7cf, 0x3c0,	RTL_GIGA_MAC_VER_19 },
+		{ 0x7cf, 0x3c2,	RTL_GIGA_MAC_VER_20 },
+		{ 0x7cf, 0x3c3,	RTL_GIGA_MAC_VER_21 },
+		{ 0x7c8, 0x3c0,	RTL_GIGA_MAC_VER_22 },
 
 		/* 8168B family. */
-		{ 0x7cf00000, 0x38000000,	RTL_GIGA_MAC_VER_12 },
-		{ 0x7c800000, 0x38000000,	RTL_GIGA_MAC_VER_17 },
-		{ 0x7c800000, 0x30000000,	RTL_GIGA_MAC_VER_11 },
+		{ 0x7cf, 0x380,	RTL_GIGA_MAC_VER_12 },
+		{ 0x7c8, 0x380,	RTL_GIGA_MAC_VER_17 },
+		{ 0x7c8, 0x300,	RTL_GIGA_MAC_VER_11 },
 
 		/* 8101 family. */
-		{ 0x7c800000, 0x44800000,	RTL_GIGA_MAC_VER_39 },
-		{ 0x7c800000, 0x44000000,	RTL_GIGA_MAC_VER_37 },
-		{ 0x7cf00000, 0x40900000,	RTL_GIGA_MAC_VER_29 },
-		{ 0x7c800000, 0x40800000,	RTL_GIGA_MAC_VER_30 },
-		{ 0x7cf00000, 0x34900000,	RTL_GIGA_MAC_VER_08 },
-		{ 0x7cf00000, 0x24900000,	RTL_GIGA_MAC_VER_08 },
-		{ 0x7cf00000, 0x34800000,	RTL_GIGA_MAC_VER_07 },
-		{ 0x7cf00000, 0x24800000,	RTL_GIGA_MAC_VER_07 },
-		{ 0x7cf00000, 0x34000000,	RTL_GIGA_MAC_VER_13 },
-		{ 0x7cf00000, 0x34300000,	RTL_GIGA_MAC_VER_10 },
-		{ 0x7cf00000, 0x34200000,	RTL_GIGA_MAC_VER_16 },
-		{ 0x7c800000, 0x34800000,	RTL_GIGA_MAC_VER_09 },
-		{ 0x7c800000, 0x24800000,	RTL_GIGA_MAC_VER_09 },
-		{ 0x7c800000, 0x34000000,	RTL_GIGA_MAC_VER_16 },
+		{ 0x7c8, 0x448,	RTL_GIGA_MAC_VER_39 },
+		{ 0x7c8, 0x440,	RTL_GIGA_MAC_VER_37 },
+		{ 0x7cf, 0x409,	RTL_GIGA_MAC_VER_29 },
+		{ 0x7c8, 0x408,	RTL_GIGA_MAC_VER_30 },
+		{ 0x7cf, 0x349,	RTL_GIGA_MAC_VER_08 },
+		{ 0x7cf, 0x249,	RTL_GIGA_MAC_VER_08 },
+		{ 0x7cf, 0x348,	RTL_GIGA_MAC_VER_07 },
+		{ 0x7cf, 0x248,	RTL_GIGA_MAC_VER_07 },
+		{ 0x7cf, 0x340,	RTL_GIGA_MAC_VER_13 },
+		{ 0x7cf, 0x343,	RTL_GIGA_MAC_VER_10 },
+		{ 0x7cf, 0x342,	RTL_GIGA_MAC_VER_16 },
+		{ 0x7c8, 0x348,	RTL_GIGA_MAC_VER_09 },
+		{ 0x7c8, 0x248,	RTL_GIGA_MAC_VER_09 },
+		{ 0x7c8, 0x340,	RTL_GIGA_MAC_VER_16 },
 		/* FIXME: where did these entries come from ? -- FR */
-		{ 0xfc800000, 0x38800000,	RTL_GIGA_MAC_VER_15 },
-		{ 0xfc800000, 0x30800000,	RTL_GIGA_MAC_VER_14 },
+		{ 0xfc8, 0x388,	RTL_GIGA_MAC_VER_15 },
+		{ 0xfc8, 0x308,	RTL_GIGA_MAC_VER_14 },
 
 		/* 8110 family. */
-		{ 0xfc800000, 0x98000000,	RTL_GIGA_MAC_VER_06 },
-		{ 0xfc800000, 0x18000000,	RTL_GIGA_MAC_VER_05 },
-		{ 0xfc800000, 0x10000000,	RTL_GIGA_MAC_VER_04 },
-		{ 0xfc800000, 0x04000000,	RTL_GIGA_MAC_VER_03 },
-		{ 0xfc800000, 0x00800000,	RTL_GIGA_MAC_VER_02 },
-		{ 0xfc800000, 0x00000000,	RTL_GIGA_MAC_VER_01 },
+		{ 0xfc8, 0x980,	RTL_GIGA_MAC_VER_06 },
+		{ 0xfc8, 0x180,	RTL_GIGA_MAC_VER_05 },
+		{ 0xfc8, 0x100,	RTL_GIGA_MAC_VER_04 },
+		{ 0xfc8, 0x040,	RTL_GIGA_MAC_VER_03 },
+		{ 0xfc8, 0x008,	RTL_GIGA_MAC_VER_02 },
 
 		/* Catch-all */
-		{ 0x00000000, 0x00000000,	RTL_GIGA_MAC_NONE   }
+		{ 0x000, 0x000,	RTL_GIGA_MAC_NONE   }
 	};
 	const struct rtl_mac_info *p = mac_info;
-	u32 reg;
+	u16 reg = RTL_R32(tp, TxConfig) >> 20;
 
-	reg = RTL_R32(tp, TxConfig);
 	while ((reg & p->mask) != p->val)
 		p++;
 	tp->mac_version = p->mac_version;
 
 	if (tp->mac_version == RTL_GIGA_MAC_NONE) {
-		dev_notice(tp_to_dev(tp),
-			   "unknown MAC, using family default\n");
-		tp->mac_version = default_version;
-	} else if (tp->mac_version == RTL_GIGA_MAC_VER_42) {
-		tp->mac_version = tp->supports_gmii ?
-				  RTL_GIGA_MAC_VER_42 :
-				  RTL_GIGA_MAC_VER_43;
-	} else if (tp->mac_version == RTL_GIGA_MAC_VER_45) {
-		tp->mac_version = tp->supports_gmii ?
-				  RTL_GIGA_MAC_VER_45 :
-				  RTL_GIGA_MAC_VER_47;
-	} else if (tp->mac_version == RTL_GIGA_MAC_VER_46) {
-		tp->mac_version = tp->supports_gmii ?
-				  RTL_GIGA_MAC_VER_46 :
-				  RTL_GIGA_MAC_VER_48;
+		dev_err(tp_to_dev(tp), "unknown chip XID %03x\n", reg & 0xfcf);
+	} else if (!tp->supports_gmii) {
+		if (tp->mac_version == RTL_GIGA_MAC_VER_42)
+			tp->mac_version = RTL_GIGA_MAC_VER_43;
+		else if (tp->mac_version == RTL_GIGA_MAC_VER_45)
+			tp->mac_version = RTL_GIGA_MAC_VER_47;
+		else if (tp->mac_version == RTL_GIGA_MAC_VER_46)
+			tp->mac_version = RTL_GIGA_MAC_VER_48;
 	}
 }
 
-static void rtl8169_print_mac_version(struct rtl8169_private *tp)
-{
-	netif_dbg(tp, drv, tp->dev, "mac_version = 0x%02x\n", tp->mac_version);
-}
-
 struct phy_reg {
 	u16 reg;
 	u16 val;
 };
 
-static void rtl_writephy_batch(struct rtl8169_private *tp,
-			       const struct phy_reg *regs, int len)
+static void __rtl_writephy_batch(struct rtl8169_private *tp,
+				 const struct phy_reg *regs, int len)
 {
 	while (len-- > 0) {
 		rtl_writephy(tp, regs->reg, regs->val);
@@ -2196,258 +2243,22 @@
 	}
 }
 
-#define PHY_READ		0x00000000
-#define PHY_DATA_OR		0x10000000
-#define PHY_DATA_AND		0x20000000
-#define PHY_BJMPN		0x30000000
-#define PHY_MDIO_CHG		0x40000000
-#define PHY_CLEAR_READCOUNT	0x70000000
-#define PHY_WRITE		0x80000000
-#define PHY_READCOUNT_EQ_SKIP	0x90000000
-#define PHY_COMP_EQ_SKIPN	0xa0000000
-#define PHY_COMP_NEQ_SKIPN	0xb0000000
-#define PHY_WRITE_PREVIOUS	0xc0000000
-#define PHY_SKIPN		0xd0000000
-#define PHY_DELAY_MS		0xe0000000
-
-struct fw_info {
-	u32	magic;
-	char	version[RTL_VER_SIZE];
-	__le32	fw_start;
-	__le32	fw_len;
-	u8	chksum;
-} __packed;
-
-#define FW_OPCODE_SIZE	sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code))
-
-static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	const struct firmware *fw = rtl_fw->fw;
-	struct fw_info *fw_info = (struct fw_info *)fw->data;
-	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	char *version = rtl_fw->version;
-	bool rc = false;
-
-	if (fw->size < FW_OPCODE_SIZE)
-		goto out;
-
-	if (!fw_info->magic) {
-		size_t i, size, start;
-		u8 checksum = 0;
-
-		if (fw->size < sizeof(*fw_info))
-			goto out;
-
-		for (i = 0; i < fw->size; i++)
-			checksum += fw->data[i];
-		if (checksum != 0)
-			goto out;
-
-		start = le32_to_cpu(fw_info->fw_start);
-		if (start > fw->size)
-			goto out;
-
-		size = le32_to_cpu(fw_info->fw_len);
-		if (size > (fw->size - start) / FW_OPCODE_SIZE)
-			goto out;
-
-		memcpy(version, fw_info->version, RTL_VER_SIZE);
-
-		pa->code = (__le32 *)(fw->data + start);
-		pa->size = size;
-	} else {
-		if (fw->size % FW_OPCODE_SIZE)
-			goto out;
-
-		strlcpy(version, rtl_lookup_firmware_name(tp), RTL_VER_SIZE);
-
-		pa->code = (__le32 *)fw->data;
-		pa->size = fw->size / FW_OPCODE_SIZE;
-	}
-	version[RTL_VER_SIZE - 1] = 0;
-
-	rc = true;
-out:
-	return rc;
-}
-
-static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev,
-			   struct rtl_fw_phy_action *pa)
-{
-	bool rc = false;
-	size_t index;
-
-	for (index = 0; index < pa->size; index++) {
-		u32 action = le32_to_cpu(pa->code[index]);
-		u32 regno = (action & 0x0fff0000) >> 16;
-
-		switch(action & 0xf0000000) {
-		case PHY_READ:
-		case PHY_DATA_OR:
-		case PHY_DATA_AND:
-		case PHY_MDIO_CHG:
-		case PHY_CLEAR_READCOUNT:
-		case PHY_WRITE:
-		case PHY_WRITE_PREVIOUS:
-		case PHY_DELAY_MS:
-			break;
-
-		case PHY_BJMPN:
-			if (regno > index) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-		case PHY_READCOUNT_EQ_SKIP:
-			if (index + 2 >= pa->size) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-		case PHY_COMP_EQ_SKIPN:
-		case PHY_COMP_NEQ_SKIPN:
-		case PHY_SKIPN:
-			if (index + 1 + regno >= pa->size) {
-				netif_err(tp, ifup, tp->dev,
-					  "Out of range of firmware\n");
-				goto out;
-			}
-			break;
-
-		default:
-			netif_err(tp, ifup, tp->dev,
-				  "Invalid action 0x%08x\n", action);
-			goto out;
-		}
-	}
-	rc = true;
-out:
-	return rc;
-}
-
-static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	struct net_device *dev = tp->dev;
-	int rc = -EINVAL;
-
-	if (!rtl_fw_format_ok(tp, rtl_fw)) {
-		netif_err(tp, ifup, dev, "invalid firmware\n");
-		goto out;
-	}
-
-	if (rtl_fw_data_ok(tp, dev, &rtl_fw->phy_action))
-		rc = 0;
-out:
-	return rc;
-}
-
-static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
-{
-	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	struct mdio_ops org, *ops = &tp->mdio_ops;
-	u32 predata, count;
-	size_t index;
-
-	predata = count = 0;
-	org.write = ops->write;
-	org.read = ops->read;
-
-	for (index = 0; index < pa->size; ) {
-		u32 action = le32_to_cpu(pa->code[index]);
-		u32 data = action & 0x0000ffff;
-		u32 regno = (action & 0x0fff0000) >> 16;
-
-		if (!action)
-			break;
-
-		switch(action & 0xf0000000) {
-		case PHY_READ:
-			predata = rtl_readphy(tp, regno);
-			count++;
-			index++;
-			break;
-		case PHY_DATA_OR:
-			predata |= data;
-			index++;
-			break;
-		case PHY_DATA_AND:
-			predata &= data;
-			index++;
-			break;
-		case PHY_BJMPN:
-			index -= regno;
-			break;
-		case PHY_MDIO_CHG:
-			if (data == 0) {
-				ops->write = org.write;
-				ops->read = org.read;
-			} else if (data == 1) {
-				ops->write = mac_mcu_write;
-				ops->read = mac_mcu_read;
-			}
-
-			index++;
-			break;
-		case PHY_CLEAR_READCOUNT:
-			count = 0;
-			index++;
-			break;
-		case PHY_WRITE:
-			rtl_writephy(tp, regno, data);
-			index++;
-			break;
-		case PHY_READCOUNT_EQ_SKIP:
-			index += (count == data) ? 2 : 1;
-			break;
-		case PHY_COMP_EQ_SKIPN:
-			if (predata == data)
-				index += regno;
-			index++;
-			break;
-		case PHY_COMP_NEQ_SKIPN:
-			if (predata != data)
-				index += regno;
-			index++;
-			break;
-		case PHY_WRITE_PREVIOUS:
-			rtl_writephy(tp, regno, predata);
-			index++;
-			break;
-		case PHY_SKIPN:
-			index += regno + 1;
-			break;
-		case PHY_DELAY_MS:
-			mdelay(data);
-			index++;
-			break;
-
-		default:
-			BUG();
-		}
-	}
-
-	ops->write = org.write;
-	ops->read = org.read;
-}
+#define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a))
 
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
-	if (!IS_ERR_OR_NULL(tp->rtl_fw)) {
-		release_firmware(tp->rtl_fw->fw);
+	if (tp->rtl_fw) {
+		rtl_fw_release_firmware(tp->rtl_fw);
 		kfree(tp->rtl_fw);
+		tp->rtl_fw = NULL;
 	}
-	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 }
 
 static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	struct rtl_fw *rtl_fw = tp->rtl_fw;
-
-	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
-	if (!IS_ERR_OR_NULL(rtl_fw))
-		rtl_phy_write_fw(tp, rtl_fw);
+	/* TODO: release firmware if rtl_fw_write_firmware signals failure. */
+	if (tp->rtl_fw)
+		rtl_fw_write_firmware(tp, tp->rtl_fw);
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2458,6 +2269,61 @@
 		rtl_apply_firmware(tp);
 }
 
+static void rtl8168_config_eee_mac(struct rtl8169_private *tp)
+{
+	/* Adjust EEE LED frequency */
+	if (tp->mac_version != RTL_GIGA_MAC_VER_38)
+		RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+
+	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_1111, 0x0003);
+}
+
+static void rtl8125_config_eee_mac(struct rtl8169_private *tp)
+{
+	r8168_mac_ocp_modify(tp, 0xe040, 0, BIT(1) | BIT(0));
+	r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1));
+}
+
+static void rtl8168f_config_eee_phy(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+
+	phy_write(phydev, 0x1f, 0x0007);
+	phy_write(phydev, 0x1e, 0x0020);
+	phy_set_bits(phydev, 0x15, BIT(8));
+
+	phy_write(phydev, 0x1f, 0x0005);
+	phy_write(phydev, 0x05, 0x8b85);
+	phy_set_bits(phydev, 0x06, BIT(13));
+
+	phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8168g_config_eee_phy(struct rtl8169_private *tp)
+{
+	phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4));
+}
+
+static void rtl8168h_config_eee_phy(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+
+	rtl8168g_config_eee_phy(tp);
+
+	phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200);
+	phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
+}
+
+static void rtl8125_config_eee_phy(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+
+	rtl8168h_config_eee_phy(tp);
+
+	phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
+}
+
 static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
 {
 	static const struct phy_reg phy_reg_init[] = {
@@ -2522,7 +2388,7 @@
 		{ 0x00, 0x9200 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp)
@@ -2533,7 +2399,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp)
@@ -2591,7 +2457,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	rtl8169scd_hw_phy_config_quirk(tp);
 }
@@ -2646,7 +2512,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp)
@@ -2659,7 +2525,7 @@
 	rtl_writephy(tp, 0x1f, 0x0001);
 	rtl_patchphy(tp, 0x16, 1 << 0);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp)
@@ -2670,7 +2536,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp)
@@ -2683,7 +2549,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp)
@@ -2698,7 +2564,7 @@
 	rtl_patchphy(tp, 0x14, 1 << 5);
 	rtl_patchphy(tp, 0x0d, 1 << 5);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp)
@@ -2723,7 +2589,7 @@
 		{ 0x09, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	rtl_patchphy(tp, 0x14, 1 << 5);
 	rtl_patchphy(tp, 0x0d, 1 << 5);
@@ -2750,7 +2616,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	rtl_patchphy(tp, 0x16, 1 << 0);
 	rtl_patchphy(tp, 0x14, 1 << 5);
@@ -2772,7 +2638,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	rtl_patchphy(tp, 0x16, 1 << 0);
 	rtl_patchphy(tp, 0x14, 1 << 5);
@@ -2785,50 +2651,59 @@
 	rtl8168c_3_hw_phy_config(tp);
 }
 
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+	/* Channel Estimation */
+	{ 0x1f, 0x0001 },
+	{ 0x06, 0x4064 },
+	{ 0x07, 0x2863 },
+	{ 0x08, 0x059c },
+	{ 0x09, 0x26b4 },
+	{ 0x0a, 0x6a19 },
+	{ 0x0b, 0xdcc8 },
+	{ 0x10, 0xf06d },
+	{ 0x14, 0x7f68 },
+	{ 0x18, 0x7fd9 },
+	{ 0x1c, 0xf0ff },
+	{ 0x1d, 0x3d9c },
+	{ 0x1f, 0x0003 },
+	{ 0x12, 0xf49f },
+	{ 0x13, 0x070b },
+	{ 0x1a, 0x05ad },
+	{ 0x14, 0x94c0 },
+
+	/*
+	 * Tx Error Issue
+	 * Enhance line driver power
+	 */
+	{ 0x1f, 0x0002 },
+	{ 0x06, 0x5561 },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8332 },
+	{ 0x06, 0x5561 },
+
+	/*
+	 * Can not link to 1Gbps with bad cable
+	 * Decrease SNR threshold form 21.07dB to 19.04dB
+	 */
+	{ 0x1f, 0x0001 },
+	{ 0x17, 0x0cc0 },
+
+	{ 0x1f, 0x0000 },
+	{ 0x0d, 0xf880 }
+};
+
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+	{ 0x1f, 0x0002 },
+	{ 0x05, 0x669a },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8330 },
+	{ 0x06, 0x669a },
+	{ 0x1f, 0x0002 }
+};
+
 static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 {
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
-
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
-
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init_0, ARRAY_SIZE(phy_reg_init_0));
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	/*
 	 * Rx Error Issue
@@ -2839,17 +2714,9 @@
 	rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 
@@ -2875,7 +2742,7 @@
 			{ 0x06, 0x6662 }
 		};
 
-		rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+		rtl_writephy_batch(tp, phy_reg_init);
 	}
 
 	/* RSET couple improve */
@@ -2898,62 +2765,12 @@
 
 static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
 {
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
-
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
-
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init_0, ARRAY_SIZE(phy_reg_init_0));
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 		if ((val & 0x00ff) != 0x006c) {
@@ -2978,7 +2795,7 @@
 			{ 0x06, 0x2642 }
 		};
 
-		rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+		rtl_writephy_batch(tp, phy_reg_init);
 	}
 
 	/* Fine tune PLL performance */
@@ -3056,7 +2873,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp)
@@ -3071,7 +2888,7 @@
 		{ 0x1f, 0x0000 }
 	};
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 	rtl_patchphy(tp, 0x0d, 1 << 5);
 }
 
@@ -3107,7 +2924,7 @@
 
 	rtl_apply_firmware(tp);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	/* DCO enable for 10M IDLE Power */
 	rtl_writephy(tp, 0x1f, 0x0007);
@@ -3155,14 +2972,11 @@
 		addr[2] | (addr[3] << 8),
 		addr[4] | (addr[5] << 8)
 	};
-	const struct exgmac_reg e[] = {
-		{ .addr = 0xe0, ERIAR_MASK_1111, .val = w[0] | (w[1] << 16) },
-		{ .addr = 0xe4, ERIAR_MASK_1111, .val = w[2] },
-		{ .addr = 0xf0, ERIAR_MASK_1111, .val = w[0] << 16 },
-		{ .addr = 0xf4, ERIAR_MASK_1111, .val = w[1] | (w[2] << 16) }
-	};
 
-	rtl_write_exgmac_batch(tp, e, ARRAY_SIZE(e));
+	rtl_eri_write(tp, 0xe0, ERIAR_MASK_1111, w[0] | (w[1] << 16));
+	rtl_eri_write(tp, 0xe4, ERIAR_MASK_1111, w[2]);
+	rtl_eri_write(tp, 0xf0, ERIAR_MASK_1111, w[0] << 16);
+	rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, w[1] | (w[2] << 16));
 }
 
 static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
@@ -3196,7 +3010,7 @@
 
 	rtl_apply_firmware(tp);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	/* For 4-corner performance improve */
 	rtl_writephy(tp, 0x1f, 0x0005);
@@ -3225,22 +3039,8 @@
 	rtl_w0w1_phy(tp, 0x06, 0x4000, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
 
-	/* EEE setting */
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_1111, 0x0003, 0x0000, ERIAR_EXGMAC);
-	rtl_writephy(tp, 0x1f, 0x0005);
-	rtl_writephy(tp, 0x05, 0x8b85);
-	rtl_w0w1_phy(tp, 0x06, 0x2000, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0004);
-	rtl_writephy(tp, 0x1f, 0x0007);
-	rtl_writephy(tp, 0x1e, 0x0020);
-	rtl_w0w1_phy(tp, 0x15, 0x0100, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x0d, 0x0007);
-	rtl_writephy(tp, 0x0e, 0x003c);
-	rtl_writephy(tp, 0x0d, 0x4007);
-	rtl_writephy(tp, 0x0e, 0x0006);
-	rtl_writephy(tp, 0x0d, 0x0000);
+	rtl8168f_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 
 	/* Green feature */
 	rtl_writephy(tp, 0x1f, 0x0003);
@@ -3275,6 +3075,9 @@
 	rtl_writephy(tp, 0x05, 0x8b86);
 	rtl_w0w1_phy(tp, 0x06, 0x0001, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
+
+	rtl8168f_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
@@ -3316,7 +3119,7 @@
 
 	rtl_apply_firmware(tp);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	rtl8168f_hw_phy_config(tp);
 
@@ -3382,7 +3185,7 @@
 	rtl_w0w1_phy(tp, 0x06, 0x4000, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 
 	/* Modify green table for giga */
 	rtl_writephy(tp, 0x1f, 0x0005);
@@ -3408,22 +3211,6 @@
 	rtl_w0w1_phy(tp, 0x06, 0x8000, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
 
-	/* eee setting */
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x00, 0x03, ERIAR_EXGMAC);
-	rtl_writephy(tp, 0x1f, 0x0005);
-	rtl_writephy(tp, 0x05, 0x8b85);
-	rtl_w0w1_phy(tp, 0x06, 0x0000, 0x2000);
-	rtl_writephy(tp, 0x1f, 0x0004);
-	rtl_writephy(tp, 0x1f, 0x0007);
-	rtl_writephy(tp, 0x1e, 0x0020);
-	rtl_w0w1_phy(tp, 0x15, 0x0000, 0x0100);
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x0d, 0x0007);
-	rtl_writephy(tp, 0x0e, 0x003c);
-	rtl_writephy(tp, 0x0d, 0x4007);
-	rtl_writephy(tp, 0x0e, 0x0000);
-	rtl_writephy(tp, 0x0d, 0x0000);
-
 	/* Green feature */
 	rtl_writephy(tp, 0x1f, 0x0003);
 	rtl_w0w1_phy(tp, 0x19, 0x0000, 0x0001);
@@ -3431,52 +3218,57 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 }
 
+static void rtl8168g_disable_aldps(struct rtl8169_private *tp)
+{
+	phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0);
+}
+
+static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+
+	phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
+	phy_write(phydev, 0x1f, 0x0a43);
+	phy_write(phydev, 0x13, 0x8084);
+	phy_clear_bits(phydev, 0x14, BIT(14) | BIT(13));
+	phy_set_bits(phydev, 0x10, BIT(12) | BIT(1) | BIT(0));
+
+	phy_write(phydev, 0x1f, 0x0000);
+}
+
 static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
 {
+	int ret;
+
 	rtl_apply_firmware(tp);
 
-	rtl_writephy(tp, 0x1f, 0x0a46);
-	if (rtl_readphy(tp, 0x10) & 0x0100) {
-		rtl_writephy(tp, 0x1f, 0x0bcc);
-		rtl_w0w1_phy(tp, 0x12, 0x0000, 0x8000);
-	} else {
-		rtl_writephy(tp, 0x1f, 0x0bcc);
-		rtl_w0w1_phy(tp, 0x12, 0x8000, 0x0000);
-	}
+	ret = phy_read_paged(tp->phydev, 0x0a46, 0x10);
+	if (ret & BIT(8))
+		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0);
+	else
+		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15));
 
-	rtl_writephy(tp, 0x1f, 0x0a46);
-	if (rtl_readphy(tp, 0x13) & 0x0100) {
-		rtl_writephy(tp, 0x1f, 0x0c41);
-		rtl_w0w1_phy(tp, 0x15, 0x0002, 0x0000);
-	} else {
-		rtl_writephy(tp, 0x1f, 0x0c41);
-		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x0002);
-	}
+	ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
+	if (ret & BIT(8))
+		phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1));
+	else
+		phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0);
 
 	/* Enable PHY auto speed down */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
 
-	rtl_writephy(tp, 0x1f, 0x0bcc);
-	rtl_w0w1_phy(tp, 0x14, 0x0100, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	rtl_writephy(tp, 0x13, 0x8084);
-	rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000);
-	rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000);
+	rtl8168g_phy_adjust_10m_aldps(tp);
 
 	/* EEE auto-fallback function */
-	rtl_writephy(tp, 0x1f, 0x0a4b);
-	rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
 
 	/* Enable UC LPF tune function */
 	rtl_writephy(tp, 0x1f, 0x0a43);
 	rtl_writephy(tp, 0x13, 0x8012);
 	rtl_w0w1_phy(tp, 0x14, 0x8000, 0x0000);
 
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
 	/* Improve SWR Efficiency */
 	rtl_writephy(tp, 0x1f, 0x0bcd);
@@ -3488,18 +3280,18 @@
 	rtl_writephy(tp, 0x14, 0x1065);
 	rtl_writephy(tp, 0x14, 0x9065);
 	rtl_writephy(tp, 0x14, 0x1065);
-
-	/* Check ALDPS bit, disable it if enabled */
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	if (rtl_readphy(tp, 0x10) & 0x0004)
-		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
-
 	rtl_writephy(tp, 0x1f, 0x0000);
+
+	rtl8168g_disable_aldps(tp);
+	rtl8168g_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp)
 {
 	rtl_apply_firmware(tp);
+	rtl8168g_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
@@ -3573,14 +3365,10 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* enable GPHY 10M */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
 
 	/* SAR ADC performance */
-	rtl_writephy(tp, 0x1f, 0x0bca);
-	rtl_w0w1_phy(tp, 0x17, 0x4000, 0x3000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
 
 	rtl_writephy(tp, 0x1f, 0x0a43);
 	rtl_writephy(tp, 0x13, 0x803f);
@@ -3600,16 +3388,11 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* disable phy pfm mode */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
 
-	/* Check ALDPS bit, disable it if enabled */
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	if (rtl_readphy(tp, 0x10) & 0x0004)
-		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
-
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_disable_aldps(tp);
+	rtl8168h_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
@@ -3635,9 +3418,7 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* enable GPHY 10M */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
 
 	r8168_mac_ocp_write(tp, 0xdd02, 0x807d);
 	data = r8168_mac_ocp_read(tp, 0xdd02);
@@ -3673,40 +3454,22 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* disable phy pfm mode */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
 
-	/* Check ALDPS bit, disable it if enabled */
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	if (rtl_readphy(tp, 0x10) & 0x0004)
-		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
-
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_disable_aldps(tp);
+	rtl8168g_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
 {
 	/* Enable PHY auto speed down */
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
 
-	/* patch 10M & ALDPS */
-	rtl_writephy(tp, 0x1f, 0x0bcc);
-	rtl_w0w1_phy(tp, 0x14, 0x0000, 0x0100);
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	rtl_writephy(tp, 0x13, 0x8084);
-	rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000);
-	rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_phy_adjust_10m_aldps(tp);
 
 	/* Enable EEE auto-fallback function */
-	rtl_writephy(tp, 0x1f, 0x0a4b);
-	rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
 
 	/* Enable UC LPF tune function */
 	rtl_writephy(tp, 0x1f, 0x0a43);
@@ -3715,30 +3478,16 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* set rg_sel_sdm_rate */
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
-	/* Check ALDPS bit, disable it if enabled */
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	if (rtl_readphy(tp, 0x10) & 0x0004)
-		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
-
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_disable_aldps(tp);
+	rtl8168g_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp)
 {
-	/* patch 10M & ALDPS */
-	rtl_writephy(tp, 0x1f, 0x0bcc);
-	rtl_w0w1_phy(tp, 0x14, 0x0000, 0x0100);
-	rtl_writephy(tp, 0x1f, 0x0a44);
-	rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	rtl_writephy(tp, 0x13, 0x8084);
-	rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000);
-	rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_phy_adjust_10m_aldps(tp);
 
 	/* Enable UC LPF tune function */
 	rtl_writephy(tp, 0x1f, 0x0a43);
@@ -3747,9 +3496,7 @@
 	rtl_writephy(tp, 0x1f, 0x0000);
 
 	/* Set rg_sel_sdm_rate */
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
 
 	/* Channel estimation parameters */
 	rtl_writephy(tp, 0x1f, 0x0a43);
@@ -3810,12 +3557,9 @@
 	rtl_writephy(tp, 0x14, 0x1065);
 	rtl_writephy(tp, 0x1f, 0x0000);
 
-	/* Check ALDPS bit, disable it if enabled */
-	rtl_writephy(tp, 0x1f, 0x0a43);
-	if (rtl_readphy(tp, 0x10) & 0x0004)
-		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
-
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl8168g_disable_aldps(tp);
+	rtl8168g_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl8102e_hw_phy_config(struct rtl8169_private *tp)
@@ -3832,7 +3576,7 @@
 	rtl_patchphy(tp, 0x19, 1 << 13);
 	rtl_patchphy(tp, 0x10, 1 << 15);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
@@ -3858,7 +3602,7 @@
 
 	rtl_apply_firmware(tp);
 
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_writephy_batch(tp, phy_reg_init);
 }
 
 static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
@@ -3871,7 +3615,7 @@
 	rtl_apply_firmware(tp);
 
 	/* EEE setting */
-	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0004);
 	rtl_writephy(tp, 0x10, 0x401f);
 	rtl_writephy(tp, 0x19, 0x7030);
@@ -3894,141 +3638,202 @@
 
 	rtl_apply_firmware(tp);
 
-	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
+	rtl_writephy_batch(tp, phy_reg_init);
 
-	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
+}
+
+static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+
+	phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
+	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+	phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
+	phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
+	phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
+	phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
+	phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
+
+	phy_write(phydev, 0x1f, 0x0a43);
+	phy_write(phydev, 0x13, 0x80ea);
+	phy_modify(phydev, 0x14, 0xff00, 0xc400);
+	phy_write(phydev, 0x13, 0x80eb);
+	phy_modify(phydev, 0x14, 0x0700, 0x0300);
+	phy_write(phydev, 0x13, 0x80f8);
+	phy_modify(phydev, 0x14, 0xff00, 0x1c00);
+	phy_write(phydev, 0x13, 0x80f1);
+	phy_modify(phydev, 0x14, 0xff00, 0x3000);
+	phy_write(phydev, 0x13, 0x80fe);
+	phy_modify(phydev, 0x14, 0xff00, 0xa500);
+	phy_write(phydev, 0x13, 0x8102);
+	phy_modify(phydev, 0x14, 0xff00, 0x5000);
+	phy_write(phydev, 0x13, 0x8105);
+	phy_modify(phydev, 0x14, 0xff00, 0x3300);
+	phy_write(phydev, 0x13, 0x8100);
+	phy_modify(phydev, 0x14, 0xff00, 0x7000);
+	phy_write(phydev, 0x13, 0x8104);
+	phy_modify(phydev, 0x14, 0xff00, 0xf000);
+	phy_write(phydev, 0x13, 0x8106);
+	phy_modify(phydev, 0x14, 0xff00, 0x6500);
+	phy_write(phydev, 0x13, 0x80dc);
+	phy_modify(phydev, 0x14, 0xff00, 0xed00);
+	phy_write(phydev, 0x13, 0x80df);
+	phy_set_bits(phydev, 0x14, BIT(8));
+	phy_write(phydev, 0x13, 0x80e1);
+	phy_clear_bits(phydev, 0x14, BIT(8));
+	phy_write(phydev, 0x1f, 0x0000);
+
+	phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
+	phy_write_paged(phydev, 0xa43, 0x13, 0x819f);
+	phy_write_paged(phydev, 0xa43, 0x14, 0xd0b6);
+
+	phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
+	phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
+	phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
+	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+	rtl8125_config_eee_phy(tp);
+	rtl_enable_eee(tp);
+}
+
+static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
+{
+	struct phy_device *phydev = tp->phydev;
+	int i;
+
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
+	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+	phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
+	phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
+	phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
+	phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
+	phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
+	phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
+	phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
+	phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
+	phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
+
+	phy_write(phydev, 0x1f, 0x0b87);
+	phy_write(phydev, 0x16, 0x80a2);
+	phy_write(phydev, 0x17, 0x0153);
+	phy_write(phydev, 0x16, 0x809c);
+	phy_write(phydev, 0x17, 0x0153);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	phy_write(phydev, 0x1f, 0x0a43);
+	phy_write(phydev, 0x13, 0x81B3);
+	phy_write(phydev, 0x14, 0x0043);
+	phy_write(phydev, 0x14, 0x00A7);
+	phy_write(phydev, 0x14, 0x00D6);
+	phy_write(phydev, 0x14, 0x00EC);
+	phy_write(phydev, 0x14, 0x00F6);
+	phy_write(phydev, 0x14, 0x00FB);
+	phy_write(phydev, 0x14, 0x00FD);
+	phy_write(phydev, 0x14, 0x00FF);
+	phy_write(phydev, 0x14, 0x00BB);
+	phy_write(phydev, 0x14, 0x0058);
+	phy_write(phydev, 0x14, 0x0029);
+	phy_write(phydev, 0x14, 0x0013);
+	phy_write(phydev, 0x14, 0x0009);
+	phy_write(phydev, 0x14, 0x0004);
+	phy_write(phydev, 0x14, 0x0002);
+	for (i = 0; i < 25; i++)
+		phy_write(phydev, 0x14, 0x0000);
+
+	phy_write(phydev, 0x13, 0x8257);
+	phy_write(phydev, 0x14, 0x020F);
+
+	phy_write(phydev, 0x13, 0x80EA);
+	phy_write(phydev, 0x14, 0x7843);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	rtl_apply_firmware(tp);
+
+	phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
+
+	phy_write(phydev, 0x1f, 0x0a43);
+	phy_write(phydev, 0x13, 0x81a2);
+	phy_set_bits(phydev, 0x14, BIT(8));
+	phy_write(phydev, 0x1f, 0x0000);
+
+	phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
+	phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
+	phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+	rtl8125_config_eee_phy(tp);
+	rtl_enable_eee(tp);
 }
 
 static void rtl_hw_phy_config(struct net_device *dev)
 {
+	static const rtl_generic_fct phy_configs[] = {
+		/* PCI devices. */
+		[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
+		[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
+		[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
+		[RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
+		[RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
+		/* PCI-E devices. */
+		[RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_10] = NULL,
+		[RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
+		[RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
+		[RTL_GIGA_MAC_VER_13] = NULL,
+		[RTL_GIGA_MAC_VER_14] = NULL,
+		[RTL_GIGA_MAC_VER_15] = NULL,
+		[RTL_GIGA_MAC_VER_16] = NULL,
+		[RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
+		[RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
+		[RTL_GIGA_MAC_VER_22] = rtl8168c_4_hw_phy_config,
+		[RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
+		[RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
+		[RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_31] = NULL,
+		[RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
+		[RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
+		[RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_41] = NULL,
+		[RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
+	};
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	rtl8169_print_mac_version(tp);
-
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01:
-		break;
-	case RTL_GIGA_MAC_VER_02:
-	case RTL_GIGA_MAC_VER_03:
-		rtl8169s_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_04:
-		rtl8169sb_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_05:
-		rtl8169scd_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_06:
-		rtl8169sce_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_07:
-	case RTL_GIGA_MAC_VER_08:
-	case RTL_GIGA_MAC_VER_09:
-		rtl8102e_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_11:
-		rtl8168bb_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_12:
-		rtl8168bef_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_17:
-		rtl8168bef_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_18:
-		rtl8168cp_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_19:
-		rtl8168c_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_20:
-		rtl8168c_2_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_21:
-		rtl8168c_3_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_22:
-		rtl8168c_4_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_23:
-	case RTL_GIGA_MAC_VER_24:
-		rtl8168cp_2_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_25:
-		rtl8168d_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_26:
-		rtl8168d_2_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_27:
-		rtl8168d_3_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_28:
-		rtl8168d_4_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_29:
-	case RTL_GIGA_MAC_VER_30:
-		rtl8105e_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_31:
-		/* None. */
-		break;
-	case RTL_GIGA_MAC_VER_32:
-	case RTL_GIGA_MAC_VER_33:
-		rtl8168e_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_34:
-		rtl8168e_2_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_35:
-		rtl8168f_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_36:
-		rtl8168f_2_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_37:
-		rtl8402_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_38:
-		rtl8411_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_39:
-		rtl8106e_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_40:
-		rtl8168g_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_42:
-	case RTL_GIGA_MAC_VER_43:
-	case RTL_GIGA_MAC_VER_44:
-		rtl8168g_2_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_45:
-	case RTL_GIGA_MAC_VER_47:
-		rtl8168h_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_46:
-	case RTL_GIGA_MAC_VER_48:
-		rtl8168h_2_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_49:
-		rtl8168ep_1_hw_phy_config(tp);
-		break;
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		rtl8168ep_2_hw_phy_config(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_41:
-	default:
-		break;
-	}
+	if (phy_configs[tp->mac_version])
+		phy_configs[tp->mac_version](tp);
 }
 
 static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
@@ -4037,55 +3842,29 @@
 		schedule_work(&tp->wk.work);
 }
 
-static bool rtl_tbi_enabled(struct rtl8169_private *tp)
-{
-	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	       (RTL_R8(tp, PHYstatus) & TBI_Enable);
-}
-
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
 	rtl_hw_phy_config(dev);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
-		netif_dbg(tp, drv, dev,
-			  "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(tp, 0x82, 0x01);
-	}
-
-	pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
-
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
 		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-
-	if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
 		netif_dbg(tp, drv, dev,
 			  "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
 		RTL_W8(tp, 0x82, 0x01);
-		netif_dbg(tp, drv, dev,
-			  "Set PHY Reg 0x0bh = 0x00h\n");
-		rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
 	}
 
 	/* We may have called phy_speed_down before */
-	phy_speed_up(dev->phydev);
+	phy_speed_up(tp->phydev);
 
-	genphy_soft_reset(dev->phydev);
-
-	/* It was reported that several chips end up with 10MBit/Half on a
-	 * 1GBit link after resuming from S3. For whatever reason the PHY on
-	 * these chips doesn't properly start a renegotiation when soft-reset.
-	 * Explicitly requesting a renegotiation fixes this.
-	 */
-	if (dev->phydev->autoneg == AUTONEG_ENABLE)
-		phy_restart_aneg(dev->phydev);
+	genphy_soft_reset(tp->phydev);
 }
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
 	rtl_lock_work(tp);
 
-	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+	rtl_unlock_config_regs(tp);
 
 	RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
 	RTL_R32(tp, MAC4);
@@ -4096,7 +3875,7 @@
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34)
 		rtl_rar_exgmac_set(tp, addr);
 
-	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+	rtl_lock_config_regs(tp);
 
 	rtl_unlock_work(tp);
 }
@@ -4123,35 +3902,12 @@
 
 static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct rtl8169_private *tp = netdev_priv(dev);
+
 	if (!netif_running(dev))
 		return -ENODEV;
 
-	return phy_mii_ioctl(dev->phydev, ifr, cmd);
-}
-
-static void rtl_init_mdio_ops(struct rtl8169_private *tp)
-{
-	struct mdio_ops *ops = &tp->mdio_ops;
-
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_27:
-		ops->write	= r8168dp_1_mdio_write;
-		ops->read	= r8168dp_1_mdio_read;
-		break;
-	case RTL_GIGA_MAC_VER_28:
-	case RTL_GIGA_MAC_VER_31:
-		ops->write	= r8168dp_2_mdio_write;
-		ops->read	= r8168dp_2_mdio_read;
-		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		ops->write	= r8168g_mdio_write;
-		ops->read	= r8168g_mdio_read;
-		break;
-	default:
-		ops->write	= r8169_mdio_write;
-		ops->read	= r8169_mdio_read;
-		break;
-	}
+	return phy_mii_ioctl(tp->phydev, ifr, cmd);
 }
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
@@ -4173,23 +3929,7 @@
 	}
 }
 
-static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev;
-
-	if (!__rtl8169_get_wol(tp))
-		return false;
-
-	/* phydev may not be attached to netdevice */
-	phydev = mdiobus_get_phy(tp->mii_bus, 0);
-
-	phy_speed_down(phydev, false);
-	rtl_wol_suspend_quirk(tp);
-
-	return true;
-}
-
-static void r8168_pll_power_down(struct rtl8169_private *tp)
+static void rtl_pll_power_down(struct rtl8169_private *tp)
 {
 	if (r8168_check_dash(tp))
 		return;
@@ -4198,8 +3938,11 @@
 	    tp->mac_version == RTL_GIGA_MAC_VER_33)
 		rtl_ephy_write(tp, 0x19, 0xff64);
 
-	if (rtl_wol_pll_power_down(tp))
+	if (device_may_wakeup(tp_to_dev(tp))) {
+		phy_speed_down(tp->phydev, false);
+		rtl_wol_suspend_quirk(tp);
 		return;
+	}
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
@@ -4213,19 +3956,22 @@
 	case RTL_GIGA_MAC_VER_48:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
+	case RTL_GIGA_MAC_VER_60:
+	case RTL_GIGA_MAC_VER_61:
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
-		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
-			     0xfc000000, ERIAR_EXGMAC);
+		rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
+	default:
+		break;
 	}
 }
 
-static void r8168_pll_power_up(struct rtl8169_private *tp)
+static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
@@ -4241,48 +3987,29 @@
 	case RTL_GIGA_MAC_VER_48:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
+	case RTL_GIGA_MAC_VER_60:
+	case RTL_GIGA_MAC_VER_61:
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
-		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
-			     0x00000000, ERIAR_EXGMAC);
+		rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
+		break;
+	default:
 		break;
 	}
 
-	phy_resume(tp->dev->phydev);
+	phy_resume(tp->phydev);
 	/* give MAC/PHY some time to resume */
 	msleep(20);
 }
 
-static void rtl_pll_power_down(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_down(tp);
-	}
-}
-
-static void rtl_pll_power_up(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_up(tp);
-	}
-}
-
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
 		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
@@ -4294,6 +4021,10 @@
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
+	case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_VLAN_8125 |
+				      RX_DMA_BURST);
+		break;
 	default:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
@@ -4305,24 +4036,6 @@
 	tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0;
 }
 
-static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
-{
-	if (tp->jumbo_ops.enable) {
-		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-		tp->jumbo_ops.enable(tp);
-		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-	}
-}
-
-static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
-{
-	if (tp->jumbo_ops.disable) {
-		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-		tp->jumbo_ops.disable(tp);
-		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-	}
-}
-
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
@@ -4389,55 +4102,64 @@
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
-static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
+static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	struct jumbo_ops *ops = &tp->jumbo_ops;
-
+	rtl_unlock_config_regs(tp);
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
-		ops->disable	= r8168b_0_hw_jumbo_disable;
-		ops->enable	= r8168b_0_hw_jumbo_enable;
+		r8168b_0_hw_jumbo_enable(tp);
 		break;
 	case RTL_GIGA_MAC_VER_12:
 	case RTL_GIGA_MAC_VER_17:
-		ops->disable	= r8168b_1_hw_jumbo_disable;
-		ops->enable	= r8168b_1_hw_jumbo_enable;
+		r8168b_1_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_18: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_19:
-	case RTL_GIGA_MAC_VER_20:
-	case RTL_GIGA_MAC_VER_21: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_22:
-	case RTL_GIGA_MAC_VER_23:
-	case RTL_GIGA_MAC_VER_24:
-	case RTL_GIGA_MAC_VER_25:
-	case RTL_GIGA_MAC_VER_26:
-		ops->disable	= r8168c_hw_jumbo_disable;
-		ops->enable	= r8168c_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26:
+		r8168c_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_27:
-	case RTL_GIGA_MAC_VER_28:
-		ops->disable	= r8168dp_hw_jumbo_disable;
-		ops->enable	= r8168dp_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28:
+		r8168dp_hw_jumbo_enable(tp);
 		break;
-	case RTL_GIGA_MAC_VER_31: /* Wild guess. Needs info from Realtek. */
-	case RTL_GIGA_MAC_VER_32:
-	case RTL_GIGA_MAC_VER_33:
-	case RTL_GIGA_MAC_VER_34:
-		ops->disable	= r8168e_hw_jumbo_disable;
-		ops->enable	= r8168e_hw_jumbo_enable;
+	case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_33:
+		r8168e_hw_jumbo_enable(tp);
 		break;
-
-	/*
-	 * No action needed for jumbo frames with 8169.
-	 * No jumbo for 810x at all.
-	 */
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
 	default:
-		ops->disable	= NULL;
-		ops->enable	= NULL;
 		break;
 	}
+	rtl_lock_config_regs(tp);
+}
+
+static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	rtl_unlock_config_regs(tp);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_11:
+		r8168b_0_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_12:
+	case RTL_GIGA_MAC_VER_17:
+		r8168b_1_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26:
+		r8168c_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28:
+		r8168dp_hw_jumbo_disable(tp);
+		break;
+	case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34:
+		r8168e_hw_jumbo_disable(tp);
+		break;
+	default:
+		break;
+	}
+	rtl_lock_config_regs(tp);
+}
+
+static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu)
+{
+	if (mtu > ETH_DATA_LEN)
+		rtl_hw_jumbo_enable(tp);
+	else
+		rtl_hw_jumbo_disable(tp);
 }
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
@@ -4452,48 +4174,31 @@
 	rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
-static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
-{
-	struct rtl_fw *rtl_fw;
-	const char *name;
-	int rc = -ENOMEM;
-
-	name = rtl_lookup_firmware_name(tp);
-	if (!name)
-		goto out_no_firmware;
-
-	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
-	if (!rtl_fw)
-		goto err_warn;
-
-	rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp));
-	if (rc < 0)
-		goto err_free;
-
-	rc = rtl_check_firmware(tp, rtl_fw);
-	if (rc < 0)
-		goto err_release_firmware;
-
-	tp->rtl_fw = rtl_fw;
-out:
-	return;
-
-err_release_firmware:
-	release_firmware(rtl_fw->fw);
-err_free:
-	kfree(rtl_fw);
-err_warn:
-	netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n",
-		   name, rc);
-out_no_firmware:
-	tp->rtl_fw = NULL;
-	goto out;
-}
-
 static void rtl_request_firmware(struct rtl8169_private *tp)
 {
-	if (IS_ERR(tp->rtl_fw))
-		rtl_request_uncached_firmware(tp);
+	struct rtl_fw *rtl_fw;
+
+	/* firmware loaded already or no firmware available */
+	if (tp->rtl_fw || !tp->fw_name)
+		return;
+
+	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
+	if (!rtl_fw) {
+		netif_warn(tp, ifup, tp->dev, "Unable to load firmware, out of memory\n");
+		return;
+	}
+
+	rtl_fw->phy_write = rtl_writephy;
+	rtl_fw->phy_read = rtl_readphy;
+	rtl_fw->mac_mcu_write = mac_mcu_write;
+	rtl_fw->mac_mcu_read = mac_mcu_read;
+	rtl_fw->fw_name = tp->fw_name;
+	rtl_fw->dev = tp_to_dev(tp);
+
+	if (rtl_fw_request_firmware(rtl_fw))
+		kfree(rtl_fw);
+	else
+		tp->rtl_fw = rtl_fw;
 }
 
 static void rtl_rx_close(struct rtl8169_private *tp)
@@ -4543,8 +4248,7 @@
 	u32 val = TX_DMA_BURST << TxDMAShift |
 		  InterFrameGap << TxInterFrameGapShift;
 
-	if (tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
-	    tp->mac_version != RTL_GIGA_MAC_VER_39)
+	if (rtl_is_8168evl_up(tp))
 		val |= TXCFG_AUTO_FIFO;
 
 	RTL_W32(tp, TxConfig, val);
@@ -4571,130 +4275,63 @@
 
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
-	static const struct rtl_cfg2_info {
-		u32 mac_version;
-		u32 clk;
-		u32 val;
-	} cfg2_info [] = {
-		{ RTL_GIGA_MAC_VER_05, PCI_Clock_33MHz, 0x000fff00 }, // 8110SCd
-		{ RTL_GIGA_MAC_VER_05, PCI_Clock_66MHz, 0x000fffff },
-		{ RTL_GIGA_MAC_VER_06, PCI_Clock_33MHz, 0x00ffff00 }, // 8110SCe
-		{ RTL_GIGA_MAC_VER_06, PCI_Clock_66MHz, 0x00ffffff }
-	};
-	const struct rtl_cfg2_info *p = cfg2_info;
-	unsigned int i;
-	u32 clk;
+	u32 val;
 
-	clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
-	for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
-		if ((p->mac_version == mac_version) && (p->clk == clk)) {
-			RTL_W32(tp, 0x7c, p->val);
-			break;
-		}
-	}
+	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+		val = 0x000fff00;
+	else if (tp->mac_version == RTL_GIGA_MAC_VER_06)
+		val = 0x00ffff00;
+	else
+		return;
+
+	if (RTL_R8(tp, Config2) & PCI_Clock_66MHz)
+		val |= 0xff;
+
+	RTL_W32(tp, 0x7c, val);
 }
 
 static void rtl_set_rx_mode(struct net_device *dev)
 {
+	u32 rx_mode = AcceptBroadcast | AcceptMyPhys | AcceptMulticast;
+	/* Multicast hash filter */
+	u32 mc_filter[2] = { 0xffffffff, 0xffffffff };
 	struct rtl8169_private *tp = netdev_priv(dev);
-	u32 mc_filter[2];	/* Multicast hash filter */
-	int rx_mode;
-	u32 tmp = 0;
+	u32 tmp;
 
 	if (dev->flags & IFF_PROMISC) {
 		/* Unconditionally log net taps. */
 		netif_notice(tp, link, dev, "Promiscuous mode enabled\n");
-		rx_mode =
-		    AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
-		    AcceptAllPhys;
-		mc_filter[1] = mc_filter[0] = 0xffffffff;
-	} else if ((netdev_mc_count(dev) > multicast_filter_limit) ||
-		   (dev->flags & IFF_ALLMULTI)) {
-		/* Too many to filter perfectly -- accept all multicasts. */
-		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
-		mc_filter[1] = mc_filter[0] = 0xffffffff;
+		rx_mode |= AcceptAllPhys;
+	} else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
+		   dev->flags & IFF_ALLMULTI ||
+		   tp->mac_version == RTL_GIGA_MAC_VER_35) {
+		/* accept all multicasts */
+	} else if (netdev_mc_empty(dev)) {
+		rx_mode &= ~AcceptMulticast;
 	} else {
 		struct netdev_hw_addr *ha;
 
-		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
 		netdev_for_each_mc_addr(ha, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
-			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
-			rx_mode |= AcceptMulticast;
+			u32 bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
+			mc_filter[bit_nr >> 5] |= BIT(bit_nr & 31);
+		}
+
+		if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
+			tmp = mc_filter[0];
+			mc_filter[0] = swab32(mc_filter[1]);
+			mc_filter[1] = swab32(tmp);
 		}
 	}
 
 	if (dev->features & NETIF_F_RXALL)
 		rx_mode |= (AcceptErr | AcceptRunt);
 
-	tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
-
-	if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
-		u32 data = mc_filter[0];
-
-		mc_filter[0] = swab32(mc_filter[1]);
-		mc_filter[1] = swab32(data);
-	}
-
-	if (tp->mac_version == RTL_GIGA_MAC_VER_35)
-		mc_filter[1] = mc_filter[0] = 0xffffffff;
-
 	RTL_W32(tp, MAR0 + 4, mc_filter[1]);
 	RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-	RTL_W32(tp, RxConfig, tmp);
-}
-
-static void rtl_hw_start(struct  rtl8169_private *tp)
-{
-	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
-	tp->hw_start(tp);
-
-	rtl_set_rx_max_size(tp);
-	rtl_set_rx_tx_desc_registers(tp);
-	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
-	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-	RTL_R8(tp, IntrMask);
-	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-	rtl_init_rxcfg(tp);
-	rtl_set_tx_config_registers(tp);
-
-	rtl_set_rx_mode(tp->dev);
-	/* no early-rx interrupts */
-	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
-	rtl_irq_enable_all(tp);
-}
-
-static void rtl_hw_start_8169(struct rtl8169_private *tp)
-{
-	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
-		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-
-	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
-
-	tp->cp_cmd |= PCIMulRW;
-
-	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
-		netif_dbg(tp, drv, tp->dev,
-			  "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
-		tp->cp_cmd |= (1 << 14);
-	}
-
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-
-	rtl8169_set_magic_reg(tp, tp->mac_version);
-
-	/*
-	 * Undocumented corner. Supposedly:
-	 * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
-	 */
-	RTL_W16(tp, IntrMitigate, 0x0000);
-
-	RTL_W32(tp, RxMissed, 0);
+	tmp = RTL_R32(tp, RxConfig);
+	RTL_W32(tp, RxConfig, (tmp & ~RX_CONFIG_ACCEPT_MASK) | rx_mode);
 }
 
 DECLARE_RTL_COND(rtl_csiar_cond)
@@ -4754,8 +4391,8 @@
 	u16 bits;
 };
 
-static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e,
-			  int len)
+static void __rtl_ephy_init(struct rtl8169_private *tp,
+			    const struct ephy_info *e, int len)
 {
 	u16 w;
 
@@ -4766,6 +4403,8 @@
 	}
 }
 
+#define rtl_ephy_init(tp, a) __rtl_ephy_init(tp, a, ARRAY_SIZE(a))
+
 static void rtl_disable_clock_request(struct rtl8169_private *tp)
 {
 	pcie_capability_clear_word(tp->pci_dev, PCI_EXP_LNKCTL,
@@ -4778,23 +4417,16 @@
 				 PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
-static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
+static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp)
 {
-	u8 data;
-
-	data = RTL_R8(tp, Config3);
-
-	if (enable)
-		data |= Rdy_to_L23;
-	else
-		data &= ~Rdy_to_L23;
-
-	RTL_W8(tp, Config3, data);
+	/* work around an issue when PCI reset occurs during L2/L3 state */
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23);
 }
 
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
-	if (enable) {
+	/* Don't enable ASPM in the chip if OS can't control ASPM */
+	if (enable && tp->aspm_manageable) {
 		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
 	} else {
@@ -4805,25 +4437,33 @@
 	udelay(10);
 }
 
+static void rtl_set_fifo_size(struct rtl8169_private *tp, u16 rx_stat,
+			      u16 tx_stat, u16 rx_dyn, u16 tx_dyn)
+{
+	/* Usage of dynamic vs. static FIFO is controlled by bit
+	 * TXCFG_AUTO_FIFO. Exact meaning of FIFO values isn't known.
+	 */
+	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, (rx_stat << 16) | rx_dyn);
+	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, (tx_stat << 16) | tx_dyn);
+}
+
+static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp,
+					  u8 low, u8 high)
+{
+	/* FIFO thresholds for pause flow control */
+	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, low);
+	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, high);
+}
+
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-
-	if (tp->dev->mtu <= ETH_DATA_LEN) {
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
-					 PCI_EXP_DEVCTL_NOSNOOP_EN);
-	}
 }
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
 	rtl_hw_start_8168bb(tp);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
@@ -4833,13 +4473,7 @@
 
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_disable_clock_request(tp);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -4854,7 +4488,7 @@
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_ephy_init(tp, e_info_8168cp, ARRAY_SIZE(e_info_8168cp));
+	rtl_ephy_init(tp, e_info_8168cp);
 
 	__rtl_hw_start_8168cp(tp);
 }
@@ -4864,12 +4498,6 @@
 	rtl_set_def_aspm_entry_latency(tp);
 
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -4880,14 +4508,6 @@
 
 	/* Magic. */
 	RTL_W8(tp, DBG_REG, 0x20);
-
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -4902,7 +4522,7 @@
 
 	RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
-	rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
+	rtl_ephy_init(tp, e_info_8168c_1);
 
 	__rtl_hw_start_8168cp(tp);
 }
@@ -4911,12 +4531,12 @@
 {
 	static const struct ephy_info e_info_8168c_2[] = {
 		{ 0x01, 0,	0x0001 },
-		{ 0x03, 0x0400,	0x0220 }
+		{ 0x03, 0x0400,	0x0020 }
 	};
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_ephy_init(tp, e_info_8168c_2, ARRAY_SIZE(e_info_8168c_2));
+	rtl_ephy_init(tp, e_info_8168c_2);
 
 	__rtl_hw_start_8168cp(tp);
 }
@@ -4939,13 +4559,8 @@
 
 	rtl_disable_clock_request(tp);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
@@ -4955,8 +4570,6 @@
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
 	rtl_disable_clock_request(tp);
 }
 
@@ -4965,16 +4578,15 @@
 	static const struct ephy_info e_info_8168d_4[] = {
 		{ 0x0b, 0x0000,	0x0048 },
 		{ 0x19, 0x0020,	0x0050 },
-		{ 0x0c, 0x0100,	0x0020 }
+		{ 0x0c, 0x0100,	0x0020 },
+		{ 0x10, 0x0004,	0x0000 },
 	};
 
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
-	rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
+	rtl_ephy_init(tp, e_info_8168d_4);
 
 	rtl_enable_clock_request(tp);
 }
@@ -4999,12 +4611,7 @@
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
-
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+	rtl_ephy_init(tp, e_info_8168e_1);
 
 	rtl_disable_clock_request(tp);
 
@@ -5019,33 +4626,28 @@
 {
 	static const struct ephy_info e_info_8168e_2[] = {
 		{ 0x09, 0x0000,	0x0080 },
-		{ 0x19, 0x0000,	0x0224 }
+		{ 0x19, 0x0000,	0x0224 },
+		{ 0x00, 0x0000,	0x0004 },
+		{ 0x0c, 0x3df0,	0x0200 },
 	};
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
+	rtl_ephy_init(tp, e_info_8168e_2);
 
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00100002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x07ff0060, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
-
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
+	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
+	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x07ff0060);
+	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4));
+	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00);
 
 	rtl_disable_clock_request(tp);
 
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
-	/* Adjust EEE LED frequency */
-	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+	rtl8168_config_eee_mac(tp);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
@@ -5060,18 +4662,14 @@
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00100002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x1d0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
-
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
+	rtl_reset_packet_filter(tp);
+	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4));
+	rtl_eri_set_bits(tp, 0x1d0, ERIAR_MASK_0001, BIT(4));
+	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
+	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060);
 
 	rtl_disable_clock_request(tp);
 
@@ -5079,6 +4677,8 @@
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
 	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
+
+	rtl8168_config_eee_mac(tp);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
@@ -5087,17 +4687,16 @@
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x08, 0x0001,	0x0002 },
 		{ 0x09, 0x0000,	0x0080 },
-		{ 0x19, 0x0000,	0x0224 }
+		{ 0x19, 0x0000,	0x0224 },
+		{ 0x00, 0x0000,	0x0004 },
+		{ 0x0c, 0x3df0,	0x0200 },
 	};
 
 	rtl_hw_start_8168f(tp);
 
-	rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1));
+	rtl_ephy_init(tp, e_info_8168f_1);
 
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
-
-	/* Adjust EEE LED frequency */
-	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -5105,53 +4704,49 @@
 	static const struct ephy_info e_info_8168f_1[] = {
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x0f, 0xffff,	0x5200 },
-		{ 0x1e, 0x0000,	0x4000 },
-		{ 0x19, 0x0000,	0x0224 }
+		{ 0x19, 0x0000,	0x0224 },
+		{ 0x00, 0x0000,	0x0004 },
+		{ 0x0c, 0x3df0,	0x0200 },
 	};
 
 	rtl_hw_start_8168f(tp);
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 
-	rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1));
+	rtl_ephy_init(tp, e_info_8168f_1);
 
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_set_bits(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00);
 }
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
+	rtl_set_fifo_size(tp, 0x08, 0x10, 0x02, 0x06);
+	rtl8168g_set_pause_thresholds(tp, 0x38, 0x48);
 
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
+	rtl_reset_packet_filter(tp);
+	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 
-	/* Adjust EEE LED frequency */
-	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+	rtl8168_config_eee_mac(tp);
 
-	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
+	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06);
+	rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12));
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 }
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
 	static const struct ephy_info e_info_8168g_1[] = {
-		{ 0x00, 0x0000,	0x0008 },
-		{ 0x0c, 0x37d0,	0x0820 },
+		{ 0x00, 0x0008,	0x0000 },
+		{ 0x0c, 0x3ff0,	0x0820 },
 		{ 0x1e, 0x0000,	0x0001 },
 		{ 0x19, 0x8000,	0x0000 }
 	};
@@ -5160,17 +4755,22 @@
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
+	rtl_ephy_init(tp, e_info_8168g_1);
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
 	static const struct ephy_info e_info_8168g_2[] = {
-		{ 0x00, 0x0000,	0x0008 },
-		{ 0x0c, 0x3df0,	0x0200 },
-		{ 0x19, 0xffff,	0xfc00 },
-		{ 0x1e, 0xffff,	0x20eb }
+		{ 0x00, 0x0008,	0x0000 },
+		{ 0x0c, 0x3ff0,	0x0820 },
+		{ 0x19, 0xffff,	0x7c00 },
+		{ 0x1e, 0xffff,	0x20eb },
+		{ 0x0d, 0xffff,	0x1666 },
+		{ 0x00, 0xffff,	0x10a3 },
+		{ 0x06, 0xffff,	0xf050 },
+		{ 0x04, 0x0000,	0x0010 },
+		{ 0x1d, 0x4000,	0x0000 },
 	};
 
 	rtl_hw_start_8168g(tp);
@@ -5178,79 +4778,215 @@
 	/* disable aspm and clock request before access ephy */
 	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
 	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
-	rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
+	rtl_ephy_init(tp, e_info_8168g_2);
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
 	static const struct ephy_info e_info_8411_2[] = {
-		{ 0x00, 0x0000,	0x0008 },
-		{ 0x0c, 0x3df0,	0x0200 },
-		{ 0x0f, 0xffff,	0x5200 },
-		{ 0x19, 0x0020,	0x0000 },
-		{ 0x1e, 0x0000,	0x2000 }
+		{ 0x00, 0x0008,	0x0000 },
+		{ 0x0c, 0x37d0,	0x0820 },
+		{ 0x1e, 0x0000,	0x0001 },
+		{ 0x19, 0x8021,	0x0000 },
+		{ 0x1e, 0x0000,	0x2000 },
+		{ 0x0d, 0x0100,	0x0200 },
+		{ 0x00, 0x0000,	0x0080 },
+		{ 0x06, 0x0000,	0x0010 },
+		{ 0x04, 0x0000,	0x0010 },
+		{ 0x1d, 0x0000,	0x4000 },
 	};
 
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
+	rtl_ephy_init(tp, e_info_8411_2);
+
+	/* The following Realtek-provided magic fixes an issue with the RX unit
+	 * getting confused after the PHY having been powered-down.
+	 */
+	r8168_mac_ocp_write(tp, 0xFC28, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC2A, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC2C, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC2E, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC30, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC32, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC34, 0x0000);
+	r8168_mac_ocp_write(tp, 0xFC36, 0x0000);
+	mdelay(3);
+	r8168_mac_ocp_write(tp, 0xFC26, 0x0000);
+
+	r8168_mac_ocp_write(tp, 0xF800, 0xE008);
+	r8168_mac_ocp_write(tp, 0xF802, 0xE00A);
+	r8168_mac_ocp_write(tp, 0xF804, 0xE00C);
+	r8168_mac_ocp_write(tp, 0xF806, 0xE00E);
+	r8168_mac_ocp_write(tp, 0xF808, 0xE027);
+	r8168_mac_ocp_write(tp, 0xF80A, 0xE04F);
+	r8168_mac_ocp_write(tp, 0xF80C, 0xE05E);
+	r8168_mac_ocp_write(tp, 0xF80E, 0xE065);
+	r8168_mac_ocp_write(tp, 0xF810, 0xC602);
+	r8168_mac_ocp_write(tp, 0xF812, 0xBE00);
+	r8168_mac_ocp_write(tp, 0xF814, 0x0000);
+	r8168_mac_ocp_write(tp, 0xF816, 0xC502);
+	r8168_mac_ocp_write(tp, 0xF818, 0xBD00);
+	r8168_mac_ocp_write(tp, 0xF81A, 0x074C);
+	r8168_mac_ocp_write(tp, 0xF81C, 0xC302);
+	r8168_mac_ocp_write(tp, 0xF81E, 0xBB00);
+	r8168_mac_ocp_write(tp, 0xF820, 0x080A);
+	r8168_mac_ocp_write(tp, 0xF822, 0x6420);
+	r8168_mac_ocp_write(tp, 0xF824, 0x48C2);
+	r8168_mac_ocp_write(tp, 0xF826, 0x8C20);
+	r8168_mac_ocp_write(tp, 0xF828, 0xC516);
+	r8168_mac_ocp_write(tp, 0xF82A, 0x64A4);
+	r8168_mac_ocp_write(tp, 0xF82C, 0x49C0);
+	r8168_mac_ocp_write(tp, 0xF82E, 0xF009);
+	r8168_mac_ocp_write(tp, 0xF830, 0x74A2);
+	r8168_mac_ocp_write(tp, 0xF832, 0x8CA5);
+	r8168_mac_ocp_write(tp, 0xF834, 0x74A0);
+	r8168_mac_ocp_write(tp, 0xF836, 0xC50E);
+	r8168_mac_ocp_write(tp, 0xF838, 0x9CA2);
+	r8168_mac_ocp_write(tp, 0xF83A, 0x1C11);
+	r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0);
+	r8168_mac_ocp_write(tp, 0xF83E, 0xE006);
+	r8168_mac_ocp_write(tp, 0xF840, 0x74F8);
+	r8168_mac_ocp_write(tp, 0xF842, 0x48C4);
+	r8168_mac_ocp_write(tp, 0xF844, 0x8CF8);
+	r8168_mac_ocp_write(tp, 0xF846, 0xC404);
+	r8168_mac_ocp_write(tp, 0xF848, 0xBC00);
+	r8168_mac_ocp_write(tp, 0xF84A, 0xC403);
+	r8168_mac_ocp_write(tp, 0xF84C, 0xBC00);
+	r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2);
+	r8168_mac_ocp_write(tp, 0xF850, 0x0C0A);
+	r8168_mac_ocp_write(tp, 0xF852, 0xE434);
+	r8168_mac_ocp_write(tp, 0xF854, 0xD3C0);
+	r8168_mac_ocp_write(tp, 0xF856, 0x49D9);
+	r8168_mac_ocp_write(tp, 0xF858, 0xF01F);
+	r8168_mac_ocp_write(tp, 0xF85A, 0xC526);
+	r8168_mac_ocp_write(tp, 0xF85C, 0x64A5);
+	r8168_mac_ocp_write(tp, 0xF85E, 0x1400);
+	r8168_mac_ocp_write(tp, 0xF860, 0xF007);
+	r8168_mac_ocp_write(tp, 0xF862, 0x0C01);
+	r8168_mac_ocp_write(tp, 0xF864, 0x8CA5);
+	r8168_mac_ocp_write(tp, 0xF866, 0x1C15);
+	r8168_mac_ocp_write(tp, 0xF868, 0xC51B);
+	r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0);
+	r8168_mac_ocp_write(tp, 0xF86C, 0xE013);
+	r8168_mac_ocp_write(tp, 0xF86E, 0xC519);
+	r8168_mac_ocp_write(tp, 0xF870, 0x74A0);
+	r8168_mac_ocp_write(tp, 0xF872, 0x48C4);
+	r8168_mac_ocp_write(tp, 0xF874, 0x8CA0);
+	r8168_mac_ocp_write(tp, 0xF876, 0xC516);
+	r8168_mac_ocp_write(tp, 0xF878, 0x74A4);
+	r8168_mac_ocp_write(tp, 0xF87A, 0x48C8);
+	r8168_mac_ocp_write(tp, 0xF87C, 0x48CA);
+	r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4);
+	r8168_mac_ocp_write(tp, 0xF880, 0xC512);
+	r8168_mac_ocp_write(tp, 0xF882, 0x1B00);
+	r8168_mac_ocp_write(tp, 0xF884, 0x9BA0);
+	r8168_mac_ocp_write(tp, 0xF886, 0x1B1C);
+	r8168_mac_ocp_write(tp, 0xF888, 0x483F);
+	r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2);
+	r8168_mac_ocp_write(tp, 0xF88C, 0x1B04);
+	r8168_mac_ocp_write(tp, 0xF88E, 0xC508);
+	r8168_mac_ocp_write(tp, 0xF890, 0x9BA0);
+	r8168_mac_ocp_write(tp, 0xF892, 0xC505);
+	r8168_mac_ocp_write(tp, 0xF894, 0xBD00);
+	r8168_mac_ocp_write(tp, 0xF896, 0xC502);
+	r8168_mac_ocp_write(tp, 0xF898, 0xBD00);
+	r8168_mac_ocp_write(tp, 0xF89A, 0x0300);
+	r8168_mac_ocp_write(tp, 0xF89C, 0x051E);
+	r8168_mac_ocp_write(tp, 0xF89E, 0xE434);
+	r8168_mac_ocp_write(tp, 0xF8A0, 0xE018);
+	r8168_mac_ocp_write(tp, 0xF8A2, 0xE092);
+	r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20);
+	r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0);
+	r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F);
+	r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4);
+	r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3);
+	r8168_mac_ocp_write(tp, 0xF8AE, 0xF007);
+	r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0);
+	r8168_mac_ocp_write(tp, 0xF8B2, 0xF103);
+	r8168_mac_ocp_write(tp, 0xF8B4, 0xC607);
+	r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00);
+	r8168_mac_ocp_write(tp, 0xF8B8, 0xC606);
+	r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00);
+	r8168_mac_ocp_write(tp, 0xF8BC, 0xC602);
+	r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00);
+	r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C);
+	r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28);
+	r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C);
+	r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00);
+	r8168_mac_ocp_write(tp, 0xF8C8, 0xC707);
+	r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00);
+	r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2);
+	r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1);
+	r8168_mac_ocp_write(tp, 0xF8D0, 0xC502);
+	r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00);
+	r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA);
+	r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0);
+	r8168_mac_ocp_write(tp, 0xF8D8, 0xC502);
+	r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00);
+	r8168_mac_ocp_write(tp, 0xF8DC, 0x0132);
+
+	r8168_mac_ocp_write(tp, 0xFC26, 0x8000);
+
+	r8168_mac_ocp_write(tp, 0xFC2A, 0x0743);
+	r8168_mac_ocp_write(tp, 0xFC2C, 0x0801);
+	r8168_mac_ocp_write(tp, 0xFC2E, 0x0BE9);
+	r8168_mac_ocp_write(tp, 0xFC30, 0x02FD);
+	r8168_mac_ocp_write(tp, 0xFC32, 0x0C25);
+	r8168_mac_ocp_write(tp, 0xFC34, 0x00A9);
+	r8168_mac_ocp_write(tp, 0xFC36, 0x012D);
+
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-	int rg_saw_cnt;
-	u32 data;
 	static const struct ephy_info e_info_8168h_1[] = {
 		{ 0x1e, 0x0800,	0x0001 },
 		{ 0x1d, 0x0000,	0x0800 },
 		{ 0x05, 0xffff,	0x2089 },
 		{ 0x06, 0xffff,	0x5881 },
-		{ 0x04, 0xffff,	0x154a },
+		{ 0x04, 0xffff,	0x854a },
 		{ 0x01, 0xffff,	0x068b }
 	};
+	int rg_saw_cnt;
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
+	rtl_ephy_init(tp, e_info_8168h_1);
 
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
+	rtl_set_fifo_size(tp, 0x08, 0x10, 0x02, 0x06);
+	rtl8168g_set_pause_thresholds(tp, 0x38, 0x48);
 
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
+	rtl_reset_packet_filter(tp);
 
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_1111, 0x0010, 0x00, ERIAR_EXGMAC);
+	rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_1111, BIT(4));
 
-	rtl_w0w1_eri(tp, 0xd4, ERIAR_MASK_1111, 0x1f00, 0x00, ERIAR_EXGMAC);
+	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f00);
 
-	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 
-	/* Adjust EEE LED frequency */
-	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+	rtl8168_config_eee_mac(tp);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
-	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
+	rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12));
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 
 	rtl_writephy(tp, 0x1f, 0x0c42);
 	rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff);
@@ -5260,31 +4996,13 @@
 
 		sw_cnt_1ms_ini = 16000000/rg_saw_cnt;
 		sw_cnt_1ms_ini &= 0x0fff;
-		data = r8168_mac_ocp_read(tp, 0xd412);
-		data &= ~0x0fff;
-		data |= sw_cnt_1ms_ini;
-		r8168_mac_ocp_write(tp, 0xd412, data);
+		r8168_mac_ocp_modify(tp, 0xd412, 0x0fff, sw_cnt_1ms_ini);
 	}
 
-	data = r8168_mac_ocp_read(tp, 0xe056);
-	data &= ~0xf0;
-	data |= 0x70;
-	r8168_mac_ocp_write(tp, 0xe056, data);
-
-	data = r8168_mac_ocp_read(tp, 0xe052);
-	data &= ~0x6000;
-	data |= 0x8008;
-	r8168_mac_ocp_write(tp, 0xe052, data);
-
-	data = r8168_mac_ocp_read(tp, 0xe0d6);
-	data &= ~0x01ff;
-	data |= 0x017f;
-	r8168_mac_ocp_write(tp, 0xe0d6, data);
-
-	data = r8168_mac_ocp_read(tp, 0xd420);
-	data &= ~0x0fff;
-	data |= 0x047f;
-	r8168_mac_ocp_write(tp, 0xd420, data);
+	r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0070);
+	r8168_mac_ocp_modify(tp, 0xe052, 0x6000, 0x8008);
+	r8168_mac_ocp_modify(tp, 0xe0d6, 0x01ff, 0x017f);
+	r8168_mac_ocp_modify(tp, 0xd420, 0x0fff, 0x047f);
 
 	r8168_mac_ocp_write(tp, 0xe63e, 0x0001);
 	r8168_mac_ocp_write(tp, 0xe63e, 0x0000);
@@ -5298,36 +5016,31 @@
 {
 	rtl8168ep_stop_cmac(tp);
 
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
+	rtl_set_fifo_size(tp, 0x08, 0x10, 0x02, 0x06);
+	rtl8168g_set_pause_thresholds(tp, 0x2f, 0x5f);
 
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
+	rtl_reset_packet_filter(tp);
 
-	rtl_w0w1_eri(tp, 0xd4, ERIAR_MASK_1111, 0x1f80, 0x00, ERIAR_EXGMAC);
+	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f80);
 
-	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
-	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 
-	/* Adjust EEE LED frequency */
-	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
+	rtl8168_config_eee_mac(tp);
 
-	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
+	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
@@ -5342,7 +5055,7 @@
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
+	rtl_ephy_init(tp, e_info_8168ep_1);
 
 	rtl_hw_start_8168ep(tp);
 
@@ -5359,7 +5072,7 @@
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
+	rtl_ephy_init(tp, e_info_8168ep_2);
 
 	rtl_hw_start_8168ep(tp);
 
@@ -5371,161 +5084,29 @@
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-	u32 data;
 	static const struct ephy_info e_info_8168ep_3[] = {
-		{ 0x00, 0xffff,	0x10a3 },
-		{ 0x19, 0xffff,	0x7c00 },
-		{ 0x1e, 0xffff,	0x20eb },
-		{ 0x0d, 0xffff,	0x1666 }
+		{ 0x00, 0x0000,	0x0080 },
+		{ 0x0d, 0x0100,	0x0200 },
+		{ 0x19, 0x8021,	0x0000 },
+		{ 0x1e, 0x0000,	0x2000 },
 	};
 
 	/* disable aspm and clock request before access ephy */
 	rtl_hw_aspm_clkreq_enable(tp, false);
-	rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
+	rtl_ephy_init(tp, e_info_8168ep_3);
 
 	rtl_hw_start_8168ep(tp);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-	data = r8168_mac_ocp_read(tp, 0xd3e2);
-	data &= 0xf000;
-	data |= 0x0271;
-	r8168_mac_ocp_write(tp, 0xd3e2, data);
-
-	data = r8168_mac_ocp_read(tp, 0xd3e4);
-	data &= 0xff00;
-	r8168_mac_ocp_write(tp, 0xd3e4, data);
-
-	data = r8168_mac_ocp_read(tp, 0xe860);
-	data |= 0x0080;
-	r8168_mac_ocp_write(tp, 0xe860, data);
+	r8168_mac_ocp_modify(tp, 0xd3e2, 0x0fff, 0x0271);
+	r8168_mac_ocp_modify(tp, 0xd3e4, 0x00ff, 0x0000);
+	r8168_mac_ocp_modify(tp, 0xe860, 0x0000, 0x0080);
 
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
-static void rtl_hw_start_8168(struct rtl8169_private *tp)
-{
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
-	tp->cp_cmd &= ~INTT_MASK;
-	tp->cp_cmd |= PktCntrDisable | INTT_1;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-
-	RTL_W16(tp, IntrMitigate, 0x5151);
-
-	/* Work around for RxFIFO overflow. */
-	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
-		tp->event_slow |= RxFIFOOver | PCSTimeout;
-		tp->event_slow &= ~RxOverflow;
-	}
-
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_11:
-		rtl_hw_start_8168bb(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_12:
-	case RTL_GIGA_MAC_VER_17:
-		rtl_hw_start_8168bef(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_18:
-		rtl_hw_start_8168cp_1(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_19:
-		rtl_hw_start_8168c_1(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_20:
-		rtl_hw_start_8168c_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_21:
-		rtl_hw_start_8168c_3(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_22:
-		rtl_hw_start_8168c_4(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_23:
-		rtl_hw_start_8168cp_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_24:
-		rtl_hw_start_8168cp_3(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_25:
-	case RTL_GIGA_MAC_VER_26:
-	case RTL_GIGA_MAC_VER_27:
-		rtl_hw_start_8168d(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_28:
-		rtl_hw_start_8168d_4(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_31:
-		rtl_hw_start_8168dp(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_32:
-	case RTL_GIGA_MAC_VER_33:
-		rtl_hw_start_8168e_1(tp);
-		break;
-	case RTL_GIGA_MAC_VER_34:
-		rtl_hw_start_8168e_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_35:
-	case RTL_GIGA_MAC_VER_36:
-		rtl_hw_start_8168f_1(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_38:
-		rtl_hw_start_8411(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_40:
-	case RTL_GIGA_MAC_VER_41:
-		rtl_hw_start_8168g_1(tp);
-		break;
-	case RTL_GIGA_MAC_VER_42:
-		rtl_hw_start_8168g_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_44:
-		rtl_hw_start_8411_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_45:
-	case RTL_GIGA_MAC_VER_46:
-		rtl_hw_start_8168h_1(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_49:
-		rtl_hw_start_8168ep_1(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_50:
-		rtl_hw_start_8168ep_2(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_51:
-		rtl_hw_start_8168ep_3(tp);
-		break;
-
-	default:
-		netif_err(tp, drv, tp->dev,
-			  "unknown chipset (mac_version = %d)\n",
-			  tp->mac_version);
-		break;
-	}
-}
-
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
 	static const struct ephy_info e_info_8102e_1[] = {
@@ -5554,7 +5135,7 @@
 	if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
 		RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
-	rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
+	rtl_ephy_init(tp, e_info_8102e_1);
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
@@ -5596,9 +5177,9 @@
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
-	rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
+	rtl_ephy_init(tp, e_info_8105e_1);
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 }
 
 static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
@@ -5621,19 +5202,17 @@
 
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
-	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
+	rtl_ephy_init(tp, e_info_8402);
 
 	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
-	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC);
+	rtl_set_fifo_size(tp, 0x00, 0x00, 0x02, 0x06);
+	rtl_reset_packet_filter(tp);
+	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00);
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 }
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
@@ -5647,72 +5226,277 @@
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
-	rtl_pcie_state_l2l3_enable(tp, false);
+	rtl_pcie_state_l2l3_disable(tp);
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
-static void rtl_hw_start_8101(struct rtl8169_private *tp)
+DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
 {
-	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
-		tp->event_slow &= ~RxFIFOOver;
+	return r8168_mac_ocp_read(tp, 0xe00e) & BIT(13);
+}
 
+static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
+{
+	rtl_pcie_state_l2l3_disable(tp);
+
+	RTL_W16(tp, 0x382, 0x221b);
+	RTL_W8(tp, 0x4500, 0);
+	RTL_W16(tp, 0x4800, 0);
+
+	/* disable UPS */
+	r8168_mac_ocp_modify(tp, 0xd40a, 0x0010, 0x0000);
+
+	RTL_W8(tp, Config1, RTL_R8(tp, Config1) & ~0x10);
+
+	r8168_mac_ocp_write(tp, 0xc140, 0xffff);
+	r8168_mac_ocp_write(tp, 0xc142, 0xffff);
+
+	r8168_mac_ocp_modify(tp, 0xd3e2, 0x0fff, 0x03a9);
+	r8168_mac_ocp_modify(tp, 0xd3e4, 0x00ff, 0x0000);
+	r8168_mac_ocp_modify(tp, 0xe860, 0x0000, 0x0080);
+
+	/* disable new tx descriptor format */
+	r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000);
+
+	r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);
+	r8168_mac_ocp_modify(tp, 0xe63e, 0x0c30, 0x0020);
+	r8168_mac_ocp_modify(tp, 0xc0b4, 0x0000, 0x000c);
+	r8168_mac_ocp_modify(tp, 0xeb6a, 0x00ff, 0x0033);
+	r8168_mac_ocp_modify(tp, 0xeb50, 0x03e0, 0x0040);
+	r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030);
+	r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000);
+	r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
+	r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0067);
+	r8168_mac_ocp_modify(tp, 0xc0ac, 0x0080, 0x1f00);
+	r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);
+	r8168_mac_ocp_modify(tp, 0xe84c, 0x0000, 0x00c0);
+	r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
+	r8168_mac_ocp_modify(tp, 0xeb54, 0x0000, 0x0001);
+	udelay(1);
+	r8168_mac_ocp_modify(tp, 0xeb54, 0x0001, 0x0000);
+	RTL_W16(tp, 0x1880, RTL_R16(tp, 0x1880) & ~0x0030);
+
+	r8168_mac_ocp_write(tp, 0xe098, 0xc302);
+
+	rtl_udelay_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
+
+	rtl8125_config_eee_mac(tp);
+
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	udelay(10);
+}
+
+static void rtl_hw_start_8125_1(struct rtl8169_private *tp)
+{
+	static const struct ephy_info e_info_8125_1[] = {
+		{ 0x01, 0xffff, 0xa812 },
+		{ 0x09, 0xffff, 0x520c },
+		{ 0x04, 0xffff, 0xd000 },
+		{ 0x0d, 0xffff, 0xf702 },
+		{ 0x0a, 0xffff, 0x8653 },
+		{ 0x06, 0xffff, 0x001e },
+		{ 0x08, 0xffff, 0x3595 },
+		{ 0x20, 0xffff, 0x9455 },
+		{ 0x21, 0xffff, 0x99ff },
+		{ 0x02, 0xffff, 0x6046 },
+		{ 0x29, 0xffff, 0xfe00 },
+		{ 0x23, 0xffff, 0xab62 },
+
+		{ 0x41, 0xffff, 0xa80c },
+		{ 0x49, 0xffff, 0x520c },
+		{ 0x44, 0xffff, 0xd000 },
+		{ 0x4d, 0xffff, 0xf702 },
+		{ 0x4a, 0xffff, 0x8653 },
+		{ 0x46, 0xffff, 0x001e },
+		{ 0x48, 0xffff, 0x3595 },
+		{ 0x60, 0xffff, 0x9455 },
+		{ 0x61, 0xffff, 0x99ff },
+		{ 0x42, 0xffff, 0x6046 },
+		{ 0x69, 0xffff, 0xfe00 },
+		{ 0x63, 0xffff, 0xab62 },
+	};
+
+	rtl_set_def_aspm_entry_latency(tp);
+
+	/* disable aspm and clock request before access ephy */
+	rtl_hw_aspm_clkreq_enable(tp, false);
+	rtl_ephy_init(tp, e_info_8125_1);
+
+	rtl_hw_start_8125_common(tp);
+}
+
+static void rtl_hw_start_8125_2(struct rtl8169_private *tp)
+{
+	static const struct ephy_info e_info_8125_2[] = {
+		{ 0x04, 0xffff, 0xd000 },
+		{ 0x0a, 0xffff, 0x8653 },
+		{ 0x23, 0xffff, 0xab66 },
+		{ 0x20, 0xffff, 0x9455 },
+		{ 0x21, 0xffff, 0x99ff },
+		{ 0x29, 0xffff, 0xfe04 },
+
+		{ 0x44, 0xffff, 0xd000 },
+		{ 0x4a, 0xffff, 0x8653 },
+		{ 0x63, 0xffff, 0xab66 },
+		{ 0x60, 0xffff, 0x9455 },
+		{ 0x61, 0xffff, 0x99ff },
+		{ 0x69, 0xffff, 0xfe04 },
+	};
+
+	rtl_set_def_aspm_entry_latency(tp);
+
+	/* disable aspm and clock request before access ephy */
+	rtl_hw_aspm_clkreq_enable(tp, false);
+	rtl_ephy_init(tp, e_info_8125_2);
+
+	rtl_hw_start_8125_common(tp);
+}
+
+static void rtl_hw_config(struct rtl8169_private *tp)
+{
+	static const rtl_generic_fct hw_configs[] = {
+		[RTL_GIGA_MAC_VER_07] = rtl_hw_start_8102e_1,
+		[RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
+		[RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
+		[RTL_GIGA_MAC_VER_10] = NULL,
+		[RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168bb,
+		[RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168bef,
+		[RTL_GIGA_MAC_VER_13] = NULL,
+		[RTL_GIGA_MAC_VER_14] = NULL,
+		[RTL_GIGA_MAC_VER_15] = NULL,
+		[RTL_GIGA_MAC_VER_16] = NULL,
+		[RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168bef,
+		[RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
+		[RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
+		[RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
+		[RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3,
+		[RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4,
+		[RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2,
+		[RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3,
+		[RTL_GIGA_MAC_VER_25] = rtl_hw_start_8168d,
+		[RTL_GIGA_MAC_VER_26] = rtl_hw_start_8168d,
+		[RTL_GIGA_MAC_VER_27] = rtl_hw_start_8168d,
+		[RTL_GIGA_MAC_VER_28] = rtl_hw_start_8168d_4,
+		[RTL_GIGA_MAC_VER_29] = rtl_hw_start_8105e_1,
+		[RTL_GIGA_MAC_VER_30] = rtl_hw_start_8105e_2,
+		[RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168dp,
+		[RTL_GIGA_MAC_VER_32] = rtl_hw_start_8168e_1,
+		[RTL_GIGA_MAC_VER_33] = rtl_hw_start_8168e_1,
+		[RTL_GIGA_MAC_VER_34] = rtl_hw_start_8168e_2,
+		[RTL_GIGA_MAC_VER_35] = rtl_hw_start_8168f_1,
+		[RTL_GIGA_MAC_VER_36] = rtl_hw_start_8168f_1,
+		[RTL_GIGA_MAC_VER_37] = rtl_hw_start_8402,
+		[RTL_GIGA_MAC_VER_38] = rtl_hw_start_8411,
+		[RTL_GIGA_MAC_VER_39] = rtl_hw_start_8106,
+		[RTL_GIGA_MAC_VER_40] = rtl_hw_start_8168g_1,
+		[RTL_GIGA_MAC_VER_41] = rtl_hw_start_8168g_1,
+		[RTL_GIGA_MAC_VER_42] = rtl_hw_start_8168g_2,
+		[RTL_GIGA_MAC_VER_43] = rtl_hw_start_8168g_2,
+		[RTL_GIGA_MAC_VER_44] = rtl_hw_start_8411_2,
+		[RTL_GIGA_MAC_VER_45] = rtl_hw_start_8168h_1,
+		[RTL_GIGA_MAC_VER_46] = rtl_hw_start_8168h_1,
+		[RTL_GIGA_MAC_VER_47] = rtl_hw_start_8168h_1,
+		[RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1,
+		[RTL_GIGA_MAC_VER_49] = rtl_hw_start_8168ep_1,
+		[RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2,
+		[RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3,
+		[RTL_GIGA_MAC_VER_60] = rtl_hw_start_8125_1,
+		[RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125_2,
+	};
+
+	if (hw_configs[tp->mac_version])
+		hw_configs[tp->mac_version](tp);
+}
+
+static void rtl_hw_start_8125(struct rtl8169_private *tp)
+{
+	int i;
+
+	/* disable interrupt coalescing */
+	for (i = 0xa00; i < 0xb00; i += 4)
+		RTL_W32(tp, i, 0);
+
+	rtl_hw_config(tp);
+}
+
+static void rtl_hw_start_8168(struct rtl8169_private *tp)
+{
 	if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_16)
 		pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+	if (rtl_is_8168evl_up(tp))
+		RTL_W8(tp, MaxTxPacketSize, EarlySize);
+	else
+		RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	tp->cp_cmd &= CPCMD_QUIRK_MASK;
-	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	rtl_hw_config(tp);
 
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_07:
-		rtl_hw_start_8102e_1(tp);
-		break;
+	/* disable interrupt coalescing */
+	RTL_W16(tp, IntrMitigate, 0x0000);
+}
 
-	case RTL_GIGA_MAC_VER_08:
-		rtl_hw_start_8102e_3(tp);
-		break;
+static void rtl_hw_start_8169(struct rtl8169_private *tp)
+{
+	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
 
-	case RTL_GIGA_MAC_VER_09:
-		rtl_hw_start_8102e_2(tp);
-		break;
+	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-	case RTL_GIGA_MAC_VER_29:
-		rtl_hw_start_8105e_1(tp);
-		break;
-	case RTL_GIGA_MAC_VER_30:
-		rtl_hw_start_8105e_2(tp);
-		break;
+	tp->cp_cmd |= PCIMulRW;
 
-	case RTL_GIGA_MAC_VER_37:
-		rtl_hw_start_8402(tp);
-		break;
-
-	case RTL_GIGA_MAC_VER_39:
-		rtl_hw_start_8106(tp);
-		break;
-	case RTL_GIGA_MAC_VER_43:
-		rtl_hw_start_8168g_2(tp);
-		break;
-	case RTL_GIGA_MAC_VER_47:
-	case RTL_GIGA_MAC_VER_48:
-		rtl_hw_start_8168h_1(tp);
-		break;
+	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
+		netif_dbg(tp, drv, tp->dev,
+			  "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
+		tp->cp_cmd |= (1 << 14);
 	}
 
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+
+	rtl8169_set_magic_reg(tp, tp->mac_version);
+
+	RTL_W32(tp, RxMissed, 0);
+
+	/* disable interrupt coalescing */
 	RTL_W16(tp, IntrMitigate, 0x0000);
 }
 
+static void rtl_hw_start(struct  rtl8169_private *tp)
+{
+	rtl_unlock_config_regs(tp);
+
+	tp->cp_cmd &= CPCMD_MASK;
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		rtl_hw_start_8169(tp);
+	else if (rtl_is_8125(tp))
+		rtl_hw_start_8125(tp);
+	else
+		rtl_hw_start_8168(tp);
+
+	rtl_set_rx_max_size(tp);
+	rtl_set_rx_tx_desc_registers(tp);
+	rtl_lock_config_regs(tp);
+
+	rtl_jumbo_config(tp, tp->dev->mtu);
+
+	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
+	RTL_R16(tp, CPlusCmd);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+	rtl_init_rxcfg(tp);
+	rtl_set_tx_config_registers(tp);
+	rtl_set_rx_mode(tp->dev);
+	rtl_irq_enable(tp);
+}
+
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if (new_mtu > ETH_DATA_LEN)
-		rtl_hw_jumbo_enable(tp);
-	else
-		rtl_hw_jumbo_disable(tp);
+	rtl_jumbo_config(tp, new_mtu);
 
 	dev->mtu = new_mtu;
 	netdev_update_features(dev);
@@ -5726,17 +5510,6 @@
 	desc->opts1 &= ~cpu_to_le32(DescOwn | RsvdMask);
 }
 
-static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
-				     void **data_buff, struct RxDesc *desc)
-{
-	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr),
-			 R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
-
-	kfree(*data_buff);
-	*data_buff = NULL;
-	rtl8169_make_unusable_by_asic(desc);
-}
-
 static inline void rtl8169_mark_to_asic(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
@@ -5747,56 +5520,43 @@
 	desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE);
 }
 
-static inline void *rtl8169_align(void *data)
+static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
+					  struct RxDesc *desc)
 {
-	return (void *)ALIGN((long)data, 16);
-}
-
-static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
-					     struct RxDesc *desc)
-{
-	void *data;
-	dma_addr_t mapping;
 	struct device *d = tp_to_dev(tp);
 	int node = dev_to_node(d);
+	dma_addr_t mapping;
+	struct page *data;
 
-	data = kmalloc_node(R8169_RX_BUF_SIZE, GFP_KERNEL, node);
+	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
 	if (!data)
 		return NULL;
 
-	if (rtl8169_align(data) != data) {
-		kfree(data);
-		data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node);
-		if (!data)
-			return NULL;
-	}
-
-	mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE,
-				 DMA_FROM_DEVICE);
+	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(d, mapping))) {
 		if (net_ratelimit())
 			netif_err(tp, drv, tp->dev, "Failed to map RX DMA!\n");
-		goto err_out;
+		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
+		return NULL;
 	}
 
 	desc->addr = cpu_to_le64(mapping);
 	rtl8169_mark_to_asic(desc);
-	return data;
 
-err_out:
-	kfree(data);
-	return NULL;
+	return data;
 }
 
 static void rtl8169_rx_clear(struct rtl8169_private *tp)
 {
 	unsigned int i;
 
-	for (i = 0; i < NUM_RX_DESC; i++) {
-		if (tp->Rx_databuff[i]) {
-			rtl8169_free_rx_databuff(tp, tp->Rx_databuff + i,
-					    tp->RxDescArray + i);
-		}
+	for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) {
+		dma_unmap_page(tp_to_dev(tp),
+			       le64_to_cpu(tp->RxDescArray[i].addr),
+			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
+		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
+		tp->Rx_databuff[i] = NULL;
+		rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
 	}
 }
 
@@ -5810,7 +5570,7 @@
 	unsigned int i;
 
 	for (i = 0; i < NUM_RX_DESC; i++) {
-		void *data;
+		struct page *data;
 
 		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
 		if (!data) {
@@ -5878,6 +5638,7 @@
 {
 	rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
 	tp->cur_tx = tp->dirty_tx = 0;
+	netdev_reset_queue(tp->dev);
 }
 
 static void rtl_reset_work(struct rtl8169_private *tp)
@@ -5887,7 +5648,7 @@
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
-	synchronize_sched();
+	synchronize_rcu();
 
 	rtl8169_hw_reset(tp);
 
@@ -5909,6 +5670,16 @@
 	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
 
+static __le32 rtl8169_get_txd_opts1(u32 opts0, u32 len, unsigned int entry)
+{
+	u32 status = opts0 | len;
+
+	if (entry == NUM_TX_DESC - 1)
+		status |= RingEnd;
+
+	return cpu_to_le32(status);
+}
+
 static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 			      u32 *opts)
 {
@@ -5921,7 +5692,7 @@
 	for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) {
 		const skb_frag_t *frag = info->frags + cur_frag;
 		dma_addr_t mapping;
-		u32 status, len;
+		u32 len;
 		void *addr;
 
 		entry = (entry + 1) % NUM_TX_DESC;
@@ -5937,11 +5708,7 @@
 			goto err_out;
 		}
 
-		/* Anti gcc 2.95.3 bugware (sic) */
-		status = opts[0] | len |
-			(RingEnd * !((entry + 1) % NUM_TX_DESC));
-
-		txd->opts1 = cpu_to_le32(status);
+		txd->opts1 = rtl8169_get_txd_opts1(opts[0], len, entry);
 		txd->opts2 = cpu_to_le32(opts[1]);
 		txd->addr = cpu_to_le64(mapping);
 
@@ -5965,47 +5732,6 @@
 	return skb->len < ETH_ZLEN && tp->mac_version == RTL_GIGA_MAC_VER_34;
 }
 
-static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
-				      struct net_device *dev);
-/* r8169_csum_workaround()
- * The hw limites the value the transport offset. When the offset is out of the
- * range, calculate the checksum by sw.
- */
-static void r8169_csum_workaround(struct rtl8169_private *tp,
-				  struct sk_buff *skb)
-{
-	if (skb_shinfo(skb)->gso_size) {
-		netdev_features_t features = tp->dev->features;
-		struct sk_buff *segs, *nskb;
-
-		features &= ~(NETIF_F_SG | NETIF_F_IPV6_CSUM | NETIF_F_TSO6);
-		segs = skb_gso_segment(skb, features);
-		if (IS_ERR(segs) || !segs)
-			goto drop;
-
-		do {
-			nskb = segs;
-			segs = segs->next;
-			nskb->next = NULL;
-			rtl8169_start_xmit(nskb, tp->dev);
-		} while (segs);
-
-		dev_consume_skb_any(skb);
-	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (skb_checksum_help(skb) < 0)
-			goto drop;
-
-		rtl8169_start_xmit(skb, tp->dev);
-	} else {
-		struct net_device_stats *stats;
-
-drop:
-		stats = &tp->dev->stats;
-		stats->tx_dropped++;
-		dev_kfree_skb_any(skb);
-	}
-}
-
 /* msdn_giant_send_check()
  * According to the document of microsoft, the TCP Pseudo Header excludes the
  * packet length for IPv6 TCP large packets.
@@ -6029,8 +5755,7 @@
 	return ret;
 }
 
-static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp,
-				struct sk_buff *skb, u32 *opts)
+static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts)
 {
 	u32 mss = skb_shinfo(skb)->gso_size;
 
@@ -6047,8 +5772,6 @@
 		else
 			WARN_ON_ONCE(1);
 	}
-
-	return true;
 }
 
 static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
@@ -6058,13 +5781,6 @@
 	u32 mss = skb_shinfo(skb)->gso_size;
 
 	if (mss) {
-		if (transport_offset > GTTCPHO_MAX) {
-			netif_warn(tp, tx_err, tp->dev,
-				   "Invalid transport offset 0x%x for TSO\n",
-				   transport_offset);
-			return false;
-		}
-
 		switch (vlan_get_protocol(skb)) {
 		case htons(ETH_P_IP):
 			opts[0] |= TD1_GTSENV4;
@@ -6087,16 +5803,6 @@
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u8 ip_protocol;
 
-		if (unlikely(rtl_test_hw_pad_bug(tp, skb)))
-			return !(skb_checksum_help(skb) || eth_skb_pad(skb));
-
-		if (transport_offset > TCPHO_MAX) {
-			netif_warn(tp, tx_err, tp->dev,
-				   "Invalid transport offset 0x%x\n",
-				   transport_offset);
-			return false;
-		}
-
 		switch (vlan_get_protocol(skb)) {
 		case htons(ETH_P_IP):
 			opts[1] |= TD1_IPv4_CS;
@@ -6129,6 +5835,35 @@
 	return true;
 }
 
+static bool rtl_tx_slots_avail(struct rtl8169_private *tp,
+			       unsigned int nr_frags)
+{
+	unsigned int slots_avail = tp->dirty_tx + NUM_TX_DESC - tp->cur_tx;
+
+	/* A skbuff with nr_frags needs nr_frags+1 entries in the tx queue */
+	return slots_avail > nr_frags;
+}
+
+/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */
+static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
+{
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static void rtl8169_doorbell(struct rtl8169_private *tp)
+{
+	if (rtl_is_8125(tp))
+		RTL_W16(tp, TxPoll_8125, BIT(0));
+	else
+		RTL_W8(tp, TxPoll, NPQ);
+}
+
 static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
@@ -6137,11 +5872,12 @@
 	struct TxDesc *txd = tp->TxDescArray + entry;
 	struct device *d = tp_to_dev(tp);
 	dma_addr_t mapping;
-	u32 status, len;
-	u32 opts[2];
+	u32 opts[2], len;
+	bool stop_queue;
+	bool door_bell;
 	int frags;
 
-	if (unlikely(!TX_FRAGS_READY_FOR(tp, skb_shinfo(skb)->nr_frags))) {
+	if (unlikely(!rtl_tx_slots_avail(tp, skb_shinfo(skb)->nr_frags))) {
 		netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n");
 		goto err_stop_0;
 	}
@@ -6149,12 +5885,14 @@
 	if (unlikely(le32_to_cpu(txd->opts1) & DescOwn))
 		goto err_stop_0;
 
-	opts[1] = cpu_to_le32(rtl8169_tx_vlan_tag(skb));
+	opts[1] = rtl8169_tx_vlan_tag(skb);
 	opts[0] = DescOwn;
 
-	if (!tp->tso_csum(tp, skb, opts)) {
-		r8169_csum_workaround(tp, skb);
-		return NETDEV_TX_OK;
+	if (rtl_chip_supports_csum_v2(tp)) {
+		if (!rtl8169_tso_csum_v2(tp, skb, opts))
+			goto err_dma_0;
+	} else {
+		rtl8169_tso_csum_v1(skb, opts);
 	}
 
 	len = skb_headlen(skb);
@@ -6185,25 +5923,29 @@
 	/* Force memory writes to complete before releasing descriptor */
 	dma_wmb();
 
-	/* Anti gcc 2.95.3 bugware (sic) */
-	status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
-	txd->opts1 = cpu_to_le32(status);
+	door_bell = __netdev_sent_queue(dev, skb->len, netdev_xmit_more());
+
+	txd->opts1 = rtl8169_get_txd_opts1(opts[0], len, entry);
 
 	/* Force all memory writes to complete before notifying device */
 	wmb();
 
 	tp->cur_tx += frags + 1;
 
-	RTL_W8(tp, TxPoll, NPQ);
-
-	mmiowb();
-
-	if (!TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS)) {
+	stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS);
+	if (unlikely(stop_queue)) {
 		/* Avoid wrongly optimistic queue wake-up: rtl_tx thread must
 		 * not miss a ring update when it notices a stopped queue.
 		 */
 		smp_wmb();
 		netif_stop_queue(dev);
+		door_bell = true;
+	}
+
+	if (door_bell)
+		rtl8169_doorbell(tp);
+
+	if (unlikely(stop_queue)) {
 		/* Sync with rtl_tx:
 		 * - publish queue status and cur_tx ring index (write barrier)
 		 * - refresh dirty_tx ring index (read barrier).
@@ -6212,8 +5954,8 @@
 		 * can't.
 		 */
 		smp_mb();
-		if (TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS))
-			netif_wake_queue(dev);
+		if (rtl_tx_slots_avail(tp, MAX_SKB_FRAGS))
+			netif_start_queue(dev);
 	}
 
 	return NETDEV_TX_OK;
@@ -6231,6 +5973,39 @@
 	return NETDEV_TX_BUSY;
 }
 
+static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
+{
+	int transport_offset = skb_transport_offset(skb);
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	if (skb_is_gso(skb)) {
+		if (transport_offset > GTTCPHO_MAX &&
+		    rtl_chip_supports_csum_v2(tp))
+			features &= ~NETIF_F_ALL_TSO;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->len < ETH_ZLEN) {
+			switch (tp->mac_version) {
+			case RTL_GIGA_MAC_VER_11:
+			case RTL_GIGA_MAC_VER_12:
+			case RTL_GIGA_MAC_VER_17:
+			case RTL_GIGA_MAC_VER_34:
+				features &= ~NETIF_F_CSUM_MASK;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (transport_offset > TCPHO_MAX &&
+		    rtl_chip_supports_csum_v2(tp))
+			features &= ~NETIF_F_CSUM_MASK;
+	}
+
+	return vlan_features_check(skb, features);
+}
+
 static void rtl8169_pcierr_interrupt(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -6263,22 +6038,13 @@
 		PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT |
 		PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT));
 
-	/* The infamous DAC f*ckup only happens at boot time */
-	if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-		netif_info(tp, intr, dev, "disabling PCI DAC\n");
-		tp->cp_cmd &= ~PCIDAC;
-		RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-		dev->features &= ~NETIF_F_HIGHDMA;
-	}
-
-	rtl8169_hw_reset(tp);
-
 	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
 
-static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
+static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
+		   int budget)
 {
-	unsigned int dirty_tx, tx_left;
+	unsigned int dirty_tx, tx_left, bytes_compl = 0, pkts_compl = 0;
 
 	dirty_tx = tp->dirty_tx;
 	smp_rmb();
@@ -6301,12 +6067,10 @@
 
 		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 				     tp->TxDescArray + entry);
-		if (status & LastFrag) {
-			u64_stats_update_begin(&tp->tx_stats.syncp);
-			tp->tx_stats.packets++;
-			tp->tx_stats.bytes += tx_skb->skb->len;
-			u64_stats_update_end(&tp->tx_stats.syncp);
-			dev_consume_skb_any(tx_skb->skb);
+		if (tx_skb->skb) {
+			pkts_compl++;
+			bytes_compl += tx_skb->skb->len;
+			napi_consume_skb(tx_skb->skb, budget);
 			tx_skb->skb = NULL;
 		}
 		dirty_tx++;
@@ -6314,6 +6078,13 @@
 	}
 
 	if (tp->dirty_tx != dirty_tx) {
+		netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+		u64_stats_update_begin(&tp->tx_stats.syncp);
+		tp->tx_stats.packets += pkts_compl;
+		tp->tx_stats.bytes += bytes_compl;
+		u64_stats_update_end(&tp->tx_stats.syncp);
+
 		tp->dirty_tx = dirty_tx;
 		/* Sync with rtl8169_start_xmit:
 		 * - publish dirty_tx ring index (write barrier)
@@ -6324,7 +6095,7 @@
 		 */
 		smp_mb();
 		if (netif_queue_stopped(dev) &&
-		    TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS)) {
+		    rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) {
 			netif_wake_queue(dev);
 		}
 		/*
@@ -6334,7 +6105,7 @@
 		 * it is slow enough). -- FR
 		 */
 		if (tp->cur_tx != dirty_tx)
-			RTL_W8(tp, TxPoll, NPQ);
+			rtl8169_doorbell(tp);
 	}
 }
 
@@ -6354,25 +6125,6 @@
 		skb_checksum_none_assert(skb);
 }
 
-static struct sk_buff *rtl8169_try_rx_copy(void *data,
-					   struct rtl8169_private *tp,
-					   int pkt_size,
-					   dma_addr_t addr)
-{
-	struct sk_buff *skb;
-	struct device *d = tp_to_dev(tp);
-
-	data = rtl8169_align(data);
-	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
-	prefetch(data);
-	skb = napi_alloc_skb(&tp->napi, pkt_size);
-	if (skb)
-		skb_copy_to_linear_data(skb, data, pkt_size);
-	dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
-
-	return skb;
-}
-
 static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget)
 {
 	unsigned int cur_rx, rx_left;
@@ -6382,6 +6134,7 @@
 
 	for (rx_left = min(budget, NUM_RX_DESC); rx_left > 0; rx_left--, cur_rx++) {
 		unsigned int entry = cur_rx % NUM_RX_DESC;
+		const void *rx_buf = page_address(tp->Rx_databuff[entry]);
 		struct RxDesc *desc = tp->RxDescArray + entry;
 		u32 status;
 
@@ -6403,28 +6156,18 @@
 				dev->stats.rx_length_errors++;
 			if (status & RxCRC)
 				dev->stats.rx_crc_errors++;
-			/* RxFOVF is a reserved bit on later chip versions */
-			if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
-			    status & RxFOVF) {
-				rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
-				dev->stats.rx_fifo_errors++;
-			} else if (status & (RxRUNT | RxCRC) &&
-				   !(status & RxRWT) &&
-				   dev->features & NETIF_F_RXALL) {
+			if (status & (RxRUNT | RxCRC) && !(status & RxRWT) &&
+			    dev->features & NETIF_F_RXALL) {
 				goto process_pkt;
 			}
 		} else {
+			unsigned int pkt_size;
 			struct sk_buff *skb;
-			dma_addr_t addr;
-			int pkt_size;
 
 process_pkt:
-			addr = le64_to_cpu(desc->addr);
+			pkt_size = status & GENMASK(13, 0);
 			if (likely(!(dev->features & NETIF_F_RXFCS)))
-				pkt_size = (status & 0x00003fff) - 4;
-			else
-				pkt_size = status & 0x00003fff;
-
+				pkt_size -= ETH_FCS_LEN;
 			/*
 			 * The driver does not support incoming fragmented
 			 * frames. They are seen as a symptom of over-mtu
@@ -6436,15 +6179,25 @@
 				goto release_descriptor;
 			}
 
-			skb = rtl8169_try_rx_copy(tp->Rx_databuff[entry],
-						  tp, pkt_size, addr);
-			if (!skb) {
+			skb = napi_alloc_skb(&tp->napi, pkt_size);
+			if (unlikely(!skb)) {
 				dev->stats.rx_dropped++;
 				goto release_descriptor;
 			}
 
+			dma_sync_single_for_cpu(tp_to_dev(tp),
+						le64_to_cpu(desc->addr),
+						pkt_size, DMA_FROM_DEVICE);
+			prefetch(rx_buf);
+			skb_copy_to_linear_data(skb, rx_buf, pkt_size);
+			skb->tail += pkt_size;
+			skb->len = pkt_size;
+
+			dma_sync_single_for_device(tp_to_dev(tp),
+						   le64_to_cpu(desc->addr),
+						   pkt_size, DMA_FROM_DEVICE);
+
 			rtl8169_rx_csum(skb, status);
-			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
 			rtl8169_rx_vlan_tag(desc, skb);
@@ -6473,47 +6226,33 @@
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
 	struct rtl8169_private *tp = dev_instance;
-	u16 status = rtl_get_events(tp);
+	u32 status = rtl_get_events(tp);
 
-	if (status == 0xffff || !(status & (RTL_EVENT_NAPI | tp->event_slow)))
+	if (!tp->irq_enabled || (status & 0xffff) == 0xffff ||
+	    !(status & tp->irq_mask))
 		return IRQ_NONE;
 
+	if (unlikely(status & SYSErr)) {
+		rtl8169_pcierr_interrupt(tp->dev);
+		goto out;
+	}
+
+	if (status & LinkChg)
+		phy_mac_interrupt(tp->phydev);
+
+	if (unlikely(status & RxFIFOOver &&
+	    tp->mac_version == RTL_GIGA_MAC_VER_11)) {
+		netif_stop_queue(tp->dev);
+		/* XXX - Hack alert. See rtl_task(). */
+		set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
+	}
+
 	rtl_irq_disable(tp);
 	napi_schedule_irqoff(&tp->napi);
-
-	return IRQ_HANDLED;
-}
-
-/*
- * Workqueue context.
- */
-static void rtl_slow_event_work(struct rtl8169_private *tp)
-{
-	struct net_device *dev = tp->dev;
-	u16 status;
-
-	status = rtl_get_events(tp) & tp->event_slow;
+out:
 	rtl_ack_events(tp, status);
 
-	if (unlikely(status & RxFIFOOver)) {
-		switch (tp->mac_version) {
-		/* Work around for rx fifo overflow */
-		case RTL_GIGA_MAC_VER_11:
-			netif_stop_queue(dev);
-			/* XXX - Hack alert. See rtl_task(). */
-			set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
-		default:
-			break;
-		}
-	}
-
-	if (unlikely(status & SYSErr))
-		rtl8169_pcierr_interrupt(dev);
-
-	if (status & LinkChg)
-		phy_mac_interrupt(dev->phydev);
-
-	rtl_irq_enable_all(tp);
+	return IRQ_HANDLED;
 }
 
 static void rtl_task(struct work_struct *work)
@@ -6522,8 +6261,6 @@
 		int bitnr;
 		void (*action)(struct rtl8169_private *);
 	} rtl_work[] = {
-		/* XXX - keep rtl_slow_event_work() as first element. */
-		{ RTL_FLAG_TASK_SLOW_PENDING,	rtl_slow_event_work },
 		{ RTL_FLAG_TASK_RESET_PENDING,	rtl_reset_work },
 	};
 	struct rtl8169_private *tp =
@@ -6553,28 +6290,15 @@
 {
 	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
 	struct net_device *dev = tp->dev;
-	u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow;
 	int work_done;
-	u16 status;
-
-	status = rtl_get_events(tp);
-	rtl_ack_events(tp, status & ~tp->event_slow);
 
 	work_done = rtl_rx(dev, tp, (u32) budget);
 
-	rtl_tx(dev, tp);
-
-	if (status & tp->event_slow) {
-		enable_mask &= ~tp->event_slow;
-
-		rtl_schedule_task(tp, RTL_FLAG_TASK_SLOW_PENDING);
-	}
+	rtl_tx(dev, tp, budget);
 
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-
-		rtl_irq_enable(tp, enable_mask);
-		mmiowb();
+		rtl_irq_enable(tp);
 	}
 
 	return work_done;
@@ -6603,12 +6327,12 @@
 	}
 
 	if (net_ratelimit())
-		phy_print_status(ndev->phydev);
+		phy_print_status(tp->phydev);
 }
 
 static int r8169_phy_connect(struct rtl8169_private *tp)
 {
-	struct phy_device *phydev = mdiobus_get_phy(tp->mii_bus, 0);
+	struct phy_device *phydev = tp->phydev;
 	phy_interface_t phy_mode;
 	int ret;
 
@@ -6623,8 +6347,7 @@
 	if (!tp->supports_gmii)
 		phy_set_max_speed(phydev, SPEED_100);
 
-	/* Ensure to advertise everything, incl. pause */
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	phy_attached_info(phydev);
 
@@ -6635,7 +6358,7 @@
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	phy_stop(dev->phydev);
+	phy_stop(tp->phydev);
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
@@ -6649,7 +6372,7 @@
 	rtl8169_rx_missed(dev);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
-	synchronize_sched();
+	synchronize_rcu();
 
 	rtl8169_tx_clear(tp);
 
@@ -6677,7 +6400,7 @@
 
 	cancel_work_sync(&tp->wk.work);
 
-	phy_disconnect(dev->phydev);
+	phy_disconnect(tp->phydev);
 
 	pci_free_irq(pdev, 0, tp);
 
@@ -6728,10 +6451,6 @@
 	if (retval < 0)
 		goto err_free_rx_1;
 
-	INIT_WORK(&tp->wk.work, rtl_task);
-
-	smp_mb();
-
 	rtl_request_firmware(tp);
 
 	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp,
@@ -6758,7 +6477,7 @@
 	if (!rtl8169_init_counter_offsets(tp))
 		netif_warn(tp, hw, dev, "counter reset/update failed\n");
 
-	phy_start(dev->phydev);
+	phy_start(tp->phydev);
 	netif_start_queue(dev);
 
 	rtl_unlock_work(tp);
@@ -6820,7 +6539,7 @@
 	stats->multicast	= dev->stats.multicast;
 
 	/*
-	 * Fetch additonal counter values missing in stats collected by driver
+	 * Fetch additional counter values missing in stats collected by driver
 	 * from tally counters.
 	 */
 	if (pm_runtime_active(&pdev->dev))
@@ -6847,9 +6566,8 @@
 	if (!netif_running(dev))
 		return;
 
-	phy_stop(dev->phydev);
+	phy_stop(tp->phydev);
 	netif_device_detach(dev);
-	netif_stop_queue(dev);
 
 	rtl_lock_work(tp);
 	napi_disable(&tp->napi);
@@ -6865,8 +6583,7 @@
 
 static int rtl8169_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	rtl8169_net_suspend(dev);
@@ -6884,22 +6601,22 @@
 	rtl_pll_power_up(tp);
 	rtl8169_init_phy(dev, tp);
 
-	phy_start(tp->dev->phydev);
+	phy_start(tp->phydev);
 
 	rtl_lock_work(tp);
 	napi_enable(&tp->napi);
 	set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+	rtl_reset_work(tp);
 	rtl_unlock_work(tp);
-
-	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
 
 static int rtl8169_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
+	rtl_rar_set(tp, dev->dev_addr);
+
 	clk_prepare_enable(tp->clk);
 
 	if (netif_running(dev))
@@ -6910,8 +6627,7 @@
 
 static int rtl8169_runtime_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	if (!tp->TxDescArray)
@@ -6932,9 +6648,9 @@
 
 static int rtl8169_runtime_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rtl8169_private *tp = netdev_priv(dev);
+
 	rtl_rar_set(tp, dev->dev_addr);
 
 	if (!tp->TxDescArray)
@@ -6951,8 +6667,7 @@
 
 static int rtl8169_runtime_idle(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 
 	if (!netif_running(dev) || !netif_carrier_ok(dev))
 		pm_schedule_suspend(device, 10000);
@@ -7032,7 +6747,7 @@
 	netif_napi_del(&tp->napi);
 
 	unregister_netdev(dev);
-	mdiobus_unregister(tp->mii_bus);
+	mdiobus_unregister(tp->phydev->mdio.bus);
 
 	rtl_release_firmware(tp);
 
@@ -7048,6 +6763,7 @@
 	.ndo_stop		= rtl8169_close,
 	.ndo_get_stats64	= rtl8169_get_stats64,
 	.ndo_start_xmit		= rtl8169_start_xmit,
+	.ndo_features_check	= rtl8169_features_check,
 	.ndo_tx_timeout		= rtl8169_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_change_mtu		= rtl8169_change_mtu,
@@ -7062,52 +6778,60 @@
 
 };
 
-static const struct rtl_cfg_info {
-	void (*hw_start)(struct rtl8169_private *tp);
-	u16 event_slow;
-	unsigned int has_gmii:1;
-	const struct rtl_coalesce_info *coalesce_info;
-	u8 default_ver;
-} rtl_cfg_infos [] = {
-	[RTL_CFG_0] = {
-		.hw_start	= rtl_hw_start_8169,
-		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-		.has_gmii	= 1,
-		.coalesce_info	= rtl_coalesce_info_8169,
-		.default_ver	= RTL_GIGA_MAC_VER_01,
-	},
-	[RTL_CFG_1] = {
-		.hw_start	= rtl_hw_start_8168,
-		.event_slow	= SYSErr | LinkChg | RxOverflow,
-		.has_gmii	= 1,
-		.coalesce_info	= rtl_coalesce_info_8168_8136,
-		.default_ver	= RTL_GIGA_MAC_VER_11,
-	},
-	[RTL_CFG_2] = {
-		.hw_start	= rtl_hw_start_8101,
-		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
-				  PCSTimeout,
-		.coalesce_info	= rtl_coalesce_info_8168_8136,
-		.default_ver	= RTL_GIGA_MAC_VER_13,
-	}
-};
+static void rtl_set_irq_mask(struct rtl8169_private *tp)
+{
+	tp->irq_mask = RTL_EVENT_NAPI | LinkChg;
+
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
+		tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
+	else if (tp->mac_version == RTL_GIGA_MAC_VER_11)
+		/* special workaround needed */
+		tp->irq_mask |= RxFIFOOver;
+	else
+		tp->irq_mask |= RxOverflow;
+}
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	unsigned int flags;
 
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
-		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
+		rtl_unlock_config_regs(tp);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
-		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+		rtl_lock_config_regs(tp);
+		/* fall through */
+	case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_24:
 		flags = PCI_IRQ_LEGACY;
-	} else {
+		break;
+	default:
 		flags = PCI_IRQ_ALL_TYPES;
+		break;
 	}
 
 	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
+static void rtl_read_mac_address(struct rtl8169_private *tp,
+				 u8 mac_addr[ETH_ALEN])
+{
+	/* Get MAC address */
+	if (rtl_is_8168evl_up(tp) && tp->mac_version != RTL_GIGA_MAC_VER_34) {
+		u32 value = rtl_eri_read(tp, 0xe0);
+
+		mac_addr[0] = (value >>  0) & 0xff;
+		mac_addr[1] = (value >>  8) & 0xff;
+		mac_addr[2] = (value >> 16) & 0xff;
+		mac_addr[3] = (value >> 24) & 0xff;
+
+		value = rtl_eri_read(tp, 0xe4);
+		mac_addr[4] = (value >>  0) & 0xff;
+		mac_addr[5] = (value >>  8) & 0xff;
+	} else if (rtl_is_8125(tp)) {
+		rtl_read_mac_from_reg(tp, mac_addr, MAC0_BKP);
+	}
+}
+
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
 	return RTL_R8(tp, MCU) & LINK_LIST_RDY;
@@ -7144,7 +6868,6 @@
 static int r8169_mdio_register(struct rtl8169_private *tp)
 {
 	struct pci_dev *pdev = tp->pci_dev;
-	struct phy_device *phydev;
 	struct mii_bus *new_bus;
 	int ret;
 
@@ -7156,8 +6879,7 @@
 	new_bus->priv = tp;
 	new_bus->parent = &pdev->dev;
 	new_bus->irq[0] = PHY_IGNORE_INTERRUPT;
-	snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x",
-		 PCI_DEVID(pdev->bus->number, pdev->devfn));
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev));
 
 	new_bus->read = r8169_mdio_read_reg;
 	new_bus->write = r8169_mdio_write_reg;
@@ -7166,24 +6888,20 @@
 	if (ret)
 		return ret;
 
-	phydev = mdiobus_get_phy(new_bus, 0);
-	if (!phydev) {
+	tp->phydev = mdiobus_get_phy(new_bus, 0);
+	if (!tp->phydev) {
 		mdiobus_unregister(new_bus);
 		return -ENODEV;
 	}
 
 	/* PHY will be woken up in rtl_open() */
-	phy_suspend(phydev);
-
-	tp->mii_bus = new_bus;
+	phy_suspend(tp->phydev);
 
 	return 0;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	u32 data;
-
 	tp->ocp_base = OCP_STD_PHY_BASE;
 
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
@@ -7198,53 +6916,58 @@
 	msleep(1);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
-	data = r8168_mac_ocp_read(tp, 0xe8de);
-	data &= ~(1 << 14);
-	r8168_mac_ocp_write(tp, 0xe8de, data);
+	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
 		return;
 
-	data = r8168_mac_ocp_read(tp, 0xe8de);
-	data |= (1 << 15);
-	r8168_mac_ocp_write(tp, 0xe8de, data);
+	r8168_mac_ocp_modify(tp, 0xe8de, 0, BIT(15));
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
-		return;
+	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
-static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
+static void rtl_hw_init_8125(struct rtl8169_private *tp)
 {
-	rtl8168ep_stop_cmac(tp);
-	rtl_hw_init_8168g(tp);
+	tp->ocp_base = OCP_STD_PHY_BASE;
+
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
+
+	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
+		return;
+
+	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+	msleep(1);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+
+	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
+
+	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
+		return;
+
+	r8168_mac_ocp_write(tp, 0xc0aa, 0x07d0);
+	r8168_mac_ocp_write(tp, 0xc0a6, 0x0150);
+	r8168_mac_ocp_write(tp, 0xc01e, 0x5555);
+
+	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
 static void rtl_hw_initialize(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
+		rtl8168ep_stop_cmac(tp);
+		/* fall through */
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
 		rtl_hw_init_8168g(tp);
 		break;
-	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
-		rtl_hw_init_8168ep(tp);
+	case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+		rtl_hw_init_8125(tp);
 		break;
 	default:
 		break;
 	}
 }
 
-/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */
-static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
-		return false;
-	default:
-		return true;
-	}
-}
-
 static int rtl_jumbo_max(struct rtl8169_private *tp)
 {
 	/* Non-GBit versions don't support jumbo frames */
@@ -7253,7 +6976,7 @@
 
 	switch (tp->mac_version) {
 	/* RTL8169 */
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 		return JUMBO_7K;
 	/* RTL8168b */
 	case RTL_GIGA_MAC_VER_11:
@@ -7273,12 +6996,61 @@
 	clk_disable_unprepare(data);
 }
 
+static int rtl_get_ether_clk(struct rtl8169_private *tp)
+{
+	struct device *d = tp_to_dev(tp);
+	struct clk *clk;
+	int rc;
+
+	clk = devm_clk_get(d, "ether_clk");
+	if (IS_ERR(clk)) {
+		rc = PTR_ERR(clk);
+		if (rc == -ENOENT)
+			/* clk-core allows NULL (for suspend / resume) */
+			rc = 0;
+		else if (rc != -EPROBE_DEFER)
+			dev_err(d, "failed to get clk: %d\n", rc);
+	} else {
+		tp->clk = clk;
+		rc = clk_prepare_enable(clk);
+		if (rc)
+			dev_err(d, "failed to enable clk: %d\n", rc);
+		else
+			rc = devm_add_action_or_reset(d, rtl_disable_clk, clk);
+	}
+
+	return rc;
+}
+
+static void rtl_init_mac_address(struct rtl8169_private *tp)
+{
+	struct net_device *dev = tp->dev;
+	u8 *mac_addr = dev->dev_addr;
+	int rc;
+
+	rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
+	if (!rc)
+		goto done;
+
+	rtl_read_mac_address(tp, mac_addr);
+	if (is_valid_ether_addr(mac_addr))
+		goto done;
+
+	rtl_read_mac_from_reg(tp, mac_addr, MAC0);
+	if (is_valid_ether_addr(mac_addr))
+		goto done;
+
+	eth_hw_addr_random(dev);
+	dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
+done:
+	rtl_rar_set(tp, mac_addr);
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
 	struct rtl8169_private *tp;
 	struct net_device *dev;
-	int chipset, region, i;
+	int chipset, region;
 	int jumbo_max, rc;
 
 	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
@@ -7291,33 +7063,19 @@
 	tp->dev = dev;
 	tp->pci_dev = pdev;
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
-	tp->supports_gmii = cfg->has_gmii;
+	tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1;
 
 	/* Get the *optional* external "ether_clk" used on some boards */
-	tp->clk = devm_clk_get(&pdev->dev, "ether_clk");
-	if (IS_ERR(tp->clk)) {
-		rc = PTR_ERR(tp->clk);
-		if (rc == -ENOENT) {
-			/* clk-core allows NULL (for suspend / resume) */
-			tp->clk = NULL;
-		} else if (rc == -EPROBE_DEFER) {
-			return rc;
-		} else {
-			dev_err(&pdev->dev, "failed to get clk: %d\n", rc);
-			return rc;
-		}
-	} else {
-		rc = clk_prepare_enable(tp->clk);
-		if (rc) {
-			dev_err(&pdev->dev, "failed to enable clk: %d\n", rc);
-			return rc;
-		}
+	rc = rtl_get_ether_clk(tp);
+	if (rc)
+		return rc;
 
-		rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk,
-					      tp->clk);
-		if (rc)
-			return rc;
-	}
+	/* Disable ASPM completely as that cause random device stop working
+	 * problems as well as full system hangs for some PCIe devices users.
+	 */
+	rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
+					  PCIE_LINK_STATE_L1);
+	tp->aspm_manageable = !rc;
 
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
@@ -7350,54 +7108,27 @@
 
 	tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
-	if (!pci_is_pcie(pdev))
-		dev_info(&pdev->dev, "not PCI Express\n");
-
 	/* Identify chip attached to board */
-	rtl8169_get_mac_version(tp, cfg->default_ver);
-
-	if (rtl_tbi_enabled(tp)) {
-		dev_err(&pdev->dev, "TBI fiber mode not supported\n");
+	rtl8169_get_mac_version(tp);
+	if (tp->mac_version == RTL_GIGA_MAC_NONE)
 		return -ENODEV;
-	}
 
 	tp->cp_cmd = RTL_R16(tp, CPlusCmd);
 
-	if ((sizeof(dma_addr_t) > 4) &&
-	    (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) &&
-			      tp->mac_version >= RTL_GIGA_MAC_VER_18)) &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
-	    !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-
-		/* CPlusCmd Dual Access Cycle is only needed for non-PCIe */
-		if (!pci_is_pcie(pdev))
-			tp->cp_cmd |= PCIDAC;
+	if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
+	    !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
 		dev->features |= NETIF_F_HIGHDMA;
-	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc < 0) {
-			dev_err(&pdev->dev, "DMA configuration failed\n");
-			return rc;
-		}
-	}
 
 	rtl_init_rxcfg(tp);
 
-	rtl_irq_disable(tp);
+	rtl8169_irq_mask_and_ack(tp);
 
 	rtl_hw_initialize(tp);
 
 	rtl_hw_reset(tp);
 
-	rtl_ack_events(tp, 0xffff);
-
 	pci_set_master(pdev);
 
-	rtl_init_mdio_ops(tp);
-	rtl_init_jumbo_ops(tp);
-
-	rtl8169_print_mac_version(tp);
-
 	chipset = tp->mac_version;
 
 	rc = rtl_alloc_irq(tp);
@@ -7406,39 +7137,20 @@
 		return rc;
 	}
 
-	tp->saved_wolopts = __rtl8169_get_wol(tp);
-
 	mutex_init(&tp->wk.mutex);
+	INIT_WORK(&tp->wk.work, rtl_task);
 	u64_stats_init(&tp->rx_stats.syncp);
 	u64_stats_init(&tp->tx_stats.syncp);
 
-	/* Get MAC address */
-	switch (tp->mac_version) {
-		u8 mac_addr[ETH_ALEN] __aligned(4);
-	case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
-		*(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC);
-		*(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
-
-		if (is_valid_ether_addr(mac_addr))
-			rtl_rar_set(tp, mac_addr);
-		break;
-	default:
-		break;
-	}
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
+	rtl_init_mac_address(tp);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
-	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 
 	netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT);
 
-	/* don't enable SG, IP_CSUM and TSO by default - it might not work
-	 * properly for all devices */
-	dev->features |= NETIF_F_RXCSUM |
-		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-
+	dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
+		NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX |
+		NETIF_F_HW_VLAN_CTAG_RX;
 	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
 		NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX |
 		NETIF_F_HW_VLAN_CTAG_RX;
@@ -7446,8 +7158,10 @@
 		NETIF_F_HIGHDMA;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
-	tp->cp_cmd |= RxChkSum | RxVlan;
-
+	tp->cp_cmd |= RxChkSum;
+	/* RTL8125 uses register RxConfig for VLAN offloading config */
+	if (!rtl_is_8125(tp))
+		tp->cp_cmd |= RxVlan;
 	/*
 	 * Pretend we are using VLANs; This bypasses a nasty bug where
 	 * Interrupts stop flowing on high load on 8110SCd controllers.
@@ -7457,10 +7171,23 @@
 		dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 
 	if (rtl_chip_supports_csum_v2(tp)) {
-		tp->tso_csum = rtl8169_tso_csum_v2;
 		dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
+		dev->features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
+		dev->gso_max_size = RTL_GSO_MAX_SIZE_V2;
+		dev->gso_max_segs = RTL_GSO_MAX_SEGS_V2;
 	} else {
-		tp->tso_csum = rtl8169_tso_csum_v1;
+		dev->gso_max_size = RTL_GSO_MAX_SIZE_V1;
+		dev->gso_max_segs = RTL_GSO_MAX_SEGS_V1;
+	}
+
+	/* RTL8168e-vl and one RTL8168c variant are known to have a
+	 * HW issue with TSO.
+	 */
+	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_22) {
+		dev->vlan_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
+		dev->hw_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
+		dev->features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
 	}
 
 	dev->hw_features |= NETIF_F_RXALL;
@@ -7471,11 +7198,9 @@
 	jumbo_max = rtl_jumbo_max(tp);
 	dev->max_mtu = jumbo_max;
 
-	tp->hw_start = cfg->hw_start;
-	tp->event_slow = cfg->event_slow;
-	tp->coalesce_info = cfg->coalesce_info;
+	rtl_set_irq_mask(tp);
 
-	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
+	tp->fw_name = rtl_chip_infos[chipset].fw_name;
 
 	tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
 					    &tp->counters_phys_addr,
@@ -7496,9 +7221,9 @@
 	if (rc)
 		goto err_mdio_unregister;
 
-	netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n",
+	netif_info(tp, probe, dev, "%s, %pM, XID %03x, IRQ %d\n",
 		   rtl_chip_infos[chipset].name, dev->dev_addr,
-		   (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff),
+		   (RTL_R32(tp, TxConfig) >> 20) & 0xfcf,
 		   pci_irq_vector(pdev, 0));
 
 	if (jumbo_max > JUMBO_1K)
@@ -7516,7 +7241,7 @@
 	return 0;
 
 err_mdio_unregister:
-	mdiobus_unregister(tp->mii_bus);
+	mdiobus_unregister(tp->phydev->mdio.bus);
 	return rc;
 }
 

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 9b6bf55..9f88b5d 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h

@@ -193,16 +193,12 @@
 	GECMR	= 0x05b0,
 	MAHR	= 0x05c0,
 	MALR	= 0x05c8,
-	TROCR	= 0x0700,	/* Undocumented? */
-	CDCR	= 0x0708,	/* Undocumented? */
-	LCCR	= 0x0710,	/* Undocumented? */
+	TROCR	= 0x0700,	/* R-Car Gen3 only */
 	CEFCR	= 0x0740,
 	FRECR	= 0x0748,
 	TSFRCR	= 0x0750,
 	TLFRCR	= 0x0758,
 	RFCR	= 0x0760,
-	CERCR	= 0x0768,	/* Undocumented? */
-	CEECR	= 0x0770,	/* Undocumented? */
 	MAFCR	= 0x0778,
 };
 
@@ -220,7 +216,6 @@
 	CCC_CSEL_HPB	= 0x00010000,
 	CCC_CSEL_ETH_TX	= 0x00020000,
 	CCC_CSEL_GMII_REF = 0x00030000,
-	CCC_BOC		= 0x00100000,	/* Undocumented? */
 	CCC_LBME	= 0x01000000,
 };
 
@@ -317,7 +312,7 @@
 
 /* SFO */
 enum SFO_BIT {
-	SFO_FPB		= 0x0000003F,
+	SFO_FBP		= 0x0000003F,
 };
 
 /* RTC */
@@ -959,7 +954,12 @@
 #define RX_QUEUE_OFFSET	4
 #define NUM_RX_QUEUE	2
 #define NUM_TX_QUEUE	2
-#define NUM_TX_DESC	2	/* TX descriptors per packet */
+
+#define RX_BUF_SZ	(2048 - ETH_FCS_LEN + sizeof(__sum16))
+
+/* TX descriptors per packet */
+#define NUM_TX_DESC_GEN2	2
+#define NUM_TX_DESC_GEN3	1
 
 struct ravb_tstamp_skb {
 	struct list_head list;
@@ -1020,7 +1020,6 @@
 	u32 dirty_rx[NUM_RX_QUEUE];	/* Producer ring indices */
 	u32 cur_tx[NUM_TX_QUEUE];
 	u32 dirty_tx[NUM_TX_QUEUE];
-	u32 rx_buf_sz;			/* Based on MTU+slack. */
 	struct napi_struct napi[NUM_RX_QUEUE];
 	struct work_struct work;
 	/* MII transceiver section. */
@@ -1029,7 +1028,6 @@
 	phy_interface_t phy_interface;
 	int msg_enable;
 	int speed;
-	int duplex;
 	int emac_irq;
 	enum ravb_chip_id chip_id;
 	int rx_irqs[NUM_RX_QUEUE];
@@ -1038,6 +1036,7 @@
 	unsigned no_avb_link:1;
 	unsigned avb_link_active_low:1;
 	unsigned wol_enabled:1;
+	int num_tx_desc;	/* TX descriptors per packet */
 };
 
 static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index d6f7539..3f165c1 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Renesas Ethernet AVB device driver
  *
- * Copyright (C) 2014-2015 Renesas Electronics Corporation
+ * Copyright (C) 2014-2019 Renesas Electronics Corporation
  * Copyright (C) 2015 Renesas Solutions Corp.
  * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com>
  *
@@ -82,13 +82,6 @@
 	return error;
 }
 
-static void ravb_set_duplex(struct net_device *ndev)
-{
-	struct ravb_private *priv = netdev_priv(ndev);
-
-	ravb_modify(ndev, ECMR, ECMR_DM, priv->duplex ? ECMR_DM : 0);
-}
-
 static void ravb_set_rate(struct net_device *ndev)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
@@ -118,7 +111,7 @@
  */
 static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac)
 {
-	if (mac) {
+	if (!IS_ERR(mac)) {
 		ether_addr_copy(ndev->dev_addr, mac);
 	} else {
 		u32 mahr = ravb_read(ndev, MAHR);
@@ -182,6 +175,7 @@
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 	struct net_device_stats *stats = &priv->stats[q];
+	int num_tx_desc = priv->num_tx_desc;
 	struct ravb_tx_desc *desc;
 	int free_num = 0;
 	int entry;
@@ -191,7 +185,7 @@
 		bool txed;
 
 		entry = priv->dirty_tx[q] % (priv->num_tx_ring[q] *
-					     NUM_TX_DESC);
+					     num_tx_desc);
 		desc = &priv->tx_ring[q][entry];
 		txed = desc->die_dt == DT_FEMPTY;
 		if (free_txed_only && !txed)
@@ -200,12 +194,12 @@
 		dma_rmb();
 		size = le16_to_cpu(desc->ds_tagl) & TX_DS;
 		/* Free the original skb. */
-		if (priv->tx_skb[q][entry / NUM_TX_DESC]) {
+		if (priv->tx_skb[q][entry / num_tx_desc]) {
 			dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
 					 size, DMA_TO_DEVICE);
 			/* Last packet descriptor? */
-			if (entry % NUM_TX_DESC == NUM_TX_DESC - 1) {
-				entry /= NUM_TX_DESC;
+			if (entry % num_tx_desc == num_tx_desc - 1) {
+				entry /= num_tx_desc;
 				dev_kfree_skb_any(priv->tx_skb[q][entry]);
 				priv->tx_skb[q][entry] = NULL;
 				if (txed)
@@ -224,6 +218,7 @@
 static void ravb_ring_free(struct net_device *ndev, int q)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
+	int num_tx_desc = priv->num_tx_desc;
 	int ring_size;
 	int i;
 
@@ -235,7 +230,7 @@
 					       le32_to_cpu(desc->dptr)))
 				dma_unmap_single(ndev->dev.parent,
 						 le32_to_cpu(desc->dptr),
-						 priv->rx_buf_sz,
+						 RX_BUF_SZ,
 						 DMA_FROM_DEVICE);
 		}
 		ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -249,7 +244,7 @@
 		ravb_tx_free(ndev, q, false);
 
 		ring_size = sizeof(struct ravb_tx_desc) *
-			    (priv->num_tx_ring[q] * NUM_TX_DESC + 1);
+			    (priv->num_tx_ring[q] * num_tx_desc + 1);
 		dma_free_coherent(ndev->dev.parent, ring_size, priv->tx_ring[q],
 				  priv->tx_desc_dma[q]);
 		priv->tx_ring[q] = NULL;
@@ -278,12 +273,13 @@
 static void ravb_ring_format(struct net_device *ndev, int q)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
+	int num_tx_desc = priv->num_tx_desc;
 	struct ravb_ex_rx_desc *rx_desc;
 	struct ravb_tx_desc *tx_desc;
 	struct ravb_desc *desc;
 	int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
 	int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
-			   NUM_TX_DESC;
+			   num_tx_desc;
 	dma_addr_t dma_addr;
 	int i;
 
@@ -297,9 +293,9 @@
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
 		/* RX descriptor */
 		rx_desc = &priv->rx_ring[q][i];
-		rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+		rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
 		dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-					  priv->rx_buf_sz,
+					  RX_BUF_SZ,
 					  DMA_FROM_DEVICE);
 		/* We just set the data size to 0 for a failed mapping which
 		 * should prevent DMA from happening...
@@ -318,8 +314,10 @@
 	for (i = 0, tx_desc = priv->tx_ring[q]; i < priv->num_tx_ring[q];
 	     i++, tx_desc++) {
 		tx_desc->die_dt = DT_EEMPTY;
-		tx_desc++;
-		tx_desc->die_dt = DT_EEMPTY;
+		if (num_tx_desc > 1) {
+			tx_desc++;
+			tx_desc->die_dt = DT_EEMPTY;
+		}
 	}
 	tx_desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
 	tx_desc->die_dt = DT_LINKFIX; /* type */
@@ -339,13 +337,11 @@
 static int ravb_ring_init(struct net_device *ndev, int q)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
+	int num_tx_desc = priv->num_tx_desc;
 	struct sk_buff *skb;
 	int ring_size;
 	int i;
 
-	priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
-		ETH_HLEN + VLAN_HLEN;
-
 	/* Allocate RX and TX skb rings */
 	priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
 				  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,18 +351,20 @@
 		goto error;
 
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
-		skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
+		skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
 		if (!skb)
 			goto error;
 		ravb_set_buffer_align(skb);
 		priv->rx_skb[q][i] = skb;
 	}
 
-	/* Allocate rings for the aligned buffers */
-	priv->tx_align[q] = kmalloc(DPTR_ALIGN * priv->num_tx_ring[q] +
-				    DPTR_ALIGN - 1, GFP_KERNEL);
-	if (!priv->tx_align[q])
-		goto error;
+	if (num_tx_desc > 1) {
+		/* Allocate rings for the aligned buffers */
+		priv->tx_align[q] = kmalloc(DPTR_ALIGN * priv->num_tx_ring[q] +
+					    DPTR_ALIGN - 1, GFP_KERNEL);
+		if (!priv->tx_align[q])
+			goto error;
+	}
 
 	/* Allocate all RX descriptors. */
 	ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
@@ -380,7 +378,7 @@
 
 	/* Allocate all TX descriptors. */
 	ring_size = sizeof(struct ravb_tx_desc) *
-		    (priv->num_tx_ring[q] * NUM_TX_DESC + 1);
+		    (priv->num_tx_ring[q] * num_tx_desc + 1);
 	priv->tx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
 					      &priv->tx_desc_dma[q],
 					      GFP_KERNEL);
@@ -398,13 +396,11 @@
 /* E-MAC init function */
 static void ravb_emac_init(struct net_device *ndev)
 {
-	struct ravb_private *priv = netdev_priv(ndev);
-
 	/* Receive frame limit set register */
 	ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
 
 	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
-	ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+	ravb_write(ndev, ECMR_ZPF | ECMR_DM |
 		   (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
 		   ECMR_TE | ECMR_RE, ECMR);
 
@@ -448,18 +444,12 @@
 	ravb_ring_format(ndev, RAVB_BE);
 	ravb_ring_format(ndev, RAVB_NC);
 
-#if defined(__LITTLE_ENDIAN)
-	ravb_modify(ndev, CCC, CCC_BOC, 0);
-#else
-	ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC);
-#endif
-
 	/* Set AVB RX */
 	ravb_write(ndev,
 		   RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
 
 	/* Set FIFO size */
-	ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC);
+	ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00112200, TGC);
 
 	/* Timestamp enable */
 	ravb_write(ndev, TCCR_TFEN, TCCR);
@@ -514,7 +504,10 @@
 			kfree(ts_skb);
 			if (tag == tfa_tag) {
 				skb_tstamp_tx(skb, &shhwtstamps);
+				dev_consume_skb_any(skb);
 				break;
+			} else {
+				dev_kfree_skb_any(skb);
 			}
 		}
 		ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR);
@@ -525,13 +518,15 @@
 {
 	u8 *hw_csum;
 
-	/* The hardware checksum is 2 bytes appended to packet data */
-	if (unlikely(skb->len < 2))
+	/* The hardware checksum is contained in sizeof(__sum16) (2) bytes
+	 * appended to packet data
+	 */
+	if (unlikely(skb->len < sizeof(__sum16)))
 		return;
-	hw_csum = skb_tail_pointer(skb) - 2;
+	hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
 	skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
 	skb->ip_summed = CHECKSUM_COMPLETE;
-	skb_trim(skb, skb->len - 2);
+	skb_trim(skb, skb->len - sizeof(__sum16));
 }
 
 /* Packet receive function for Ethernet AVB */
@@ -586,7 +581,7 @@
 			skb = priv->rx_skb[q][entry];
 			priv->rx_skb[q][entry] = NULL;
 			dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-					 priv->rx_buf_sz,
+					 RX_BUF_SZ,
 					 DMA_FROM_DEVICE);
 			get_ts &= (q == RAVB_NC) ?
 					RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +614,11 @@
 	for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
 		entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
 		desc = &priv->rx_ring[q][entry];
-		desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+		desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
 
 		if (!priv->rx_skb[q][entry]) {
 			skb = netdev_alloc_skb(ndev,
-					       priv->rx_buf_sz +
+					       RX_BUF_SZ +
 					       RAVB_ALIGN - 1);
 			if (!skb)
 				break;	/* Better luck next round. */
@@ -727,7 +722,6 @@
 
 	spin_lock(&priv->lock);
 	ravb_emac_interrupt_unlocked(ndev);
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return IRQ_HANDLED;
 }
@@ -847,7 +841,6 @@
 		result = IRQ_HANDLED;
 	}
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -880,7 +873,6 @@
 		result = IRQ_HANDLED;
 	}
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -897,7 +889,6 @@
 	if (ravb_queue_interrupt(ndev, q))
 		result = IRQ_HANDLED;
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -942,7 +933,6 @@
 			ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
 			ravb_tx_free(ndev, q, true);
 			netif_wake_subqueue(ndev, q);
-			mmiowb();
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 	}
@@ -958,7 +948,6 @@
 		ravb_write(ndev, mask, RIE0);
 		ravb_write(ndev, mask, TIE);
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Receive error message handling */
@@ -987,12 +976,6 @@
 		ravb_rcv_snd_disable(ndev);
 
 	if (phydev->link) {
-		if (phydev->duplex != priv->duplex) {
-			new_state = true;
-			priv->duplex = phydev->duplex;
-			ravb_set_duplex(ndev);
-		}
-
 		if (phydev->speed != priv->speed) {
 			new_state = true;
 			priv->speed = phydev->speed;
@@ -1007,14 +990,12 @@
 		new_state = true;
 		priv->link = 0;
 		priv->speed = 0;
-		priv->duplex = -1;
 	}
 
 	/* Enable TX and RX right over here, if E-MAC change is ignored */
 	if (priv->no_avb_link && phydev->link)
 		ravb_rcv_snd_enable(ndev);
 
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (new_state && netif_msg_link(priv))
@@ -1037,7 +1018,6 @@
 
 	priv->link = 0;
 	priv->speed = 0;
-	priv->duplex = -1;
 
 	/* Try connecting to PHY */
 	pn = of_parse_phandle(np, "phy-handle", 0);
@@ -1074,8 +1054,15 @@
 		netdev_info(ndev, "limited PHY to 100Mbit/s\n");
 	}
 
-	/* 10BASE is not supported */
-	phydev->supported &= ~PHY_10BT_FEATURES;
+	/* 10BASE, Pause and Asym Pause is not supported */
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_Pause_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT);
+
+	/* Half Duplex is not supported */
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
 
 	phy_attached_info(phydev);
 
@@ -1485,6 +1472,7 @@
 static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
+	int num_tx_desc = priv->num_tx_desc;
 	u16 q = skb_get_queue_mapping(skb);
 	struct ravb_tstamp_skb *ts_skb;
 	struct ravb_tx_desc *desc;
@@ -1496,7 +1484,7 @@
 
 	spin_lock_irqsave(&priv->lock, flags);
 	if (priv->cur_tx[q] - priv->dirty_tx[q] > (priv->num_tx_ring[q] - 1) *
-	    NUM_TX_DESC) {
+	    num_tx_desc) {
 		netif_err(priv, tx_queued, ndev,
 			  "still transmitting with the full ring!\n");
 		netif_stop_subqueue(ndev, q);
@@ -1507,41 +1495,55 @@
 	if (skb_put_padto(skb, ETH_ZLEN))
 		goto exit;
 
-	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
-	priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
+	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * num_tx_desc);
+	priv->tx_skb[q][entry / num_tx_desc] = skb;
 
-	buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
-		 entry / NUM_TX_DESC * DPTR_ALIGN;
-	len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data;
-	/* Zero length DMA descriptors are problematic as they seem to
-	 * terminate DMA transfers. Avoid them by simply using a length of
-	 * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN.
-	 *
-	 * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of
-	 * data by the call to skb_put_padto() above this is safe with
-	 * respect to both the length of the first DMA descriptor (len)
-	 * overflowing the available data and the length of the second DMA
-	 * descriptor (skb->len - len) being negative.
-	 */
-	if (len == 0)
-		len = DPTR_ALIGN;
+	if (num_tx_desc > 1) {
+		buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
+			 entry / num_tx_desc * DPTR_ALIGN;
+		len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data;
 
-	memcpy(buffer, skb->data, len);
-	dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(ndev->dev.parent, dma_addr))
-		goto drop;
+		/* Zero length DMA descriptors are problematic as they seem
+		 * to terminate DMA transfers. Avoid them by simply using a
+		 * length of DPTR_ALIGN (4) when skb data is aligned to
+		 * DPTR_ALIGN.
+		 *
+		 * As skb is guaranteed to have at least ETH_ZLEN (60)
+		 * bytes of data by the call to skb_put_padto() above this
+		 * is safe with respect to both the length of the first DMA
+		 * descriptor (len) overflowing the available data and the
+		 * length of the second DMA descriptor (skb->len - len)
+		 * being negative.
+		 */
+		if (len == 0)
+			len = DPTR_ALIGN;
 
-	desc = &priv->tx_ring[q][entry];
-	desc->ds_tagl = cpu_to_le16(len);
-	desc->dptr = cpu_to_le32(dma_addr);
+		memcpy(buffer, skb->data, len);
+		dma_addr = dma_map_single(ndev->dev.parent, buffer, len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, dma_addr))
+			goto drop;
 
-	buffer = skb->data + len;
-	len = skb->len - len;
-	dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(ndev->dev.parent, dma_addr))
-		goto unmap;
+		desc = &priv->tx_ring[q][entry];
+		desc->ds_tagl = cpu_to_le16(len);
+		desc->dptr = cpu_to_le32(dma_addr);
 
-	desc++;
+		buffer = skb->data + len;
+		len = skb->len - len;
+		dma_addr = dma_map_single(ndev->dev.parent, buffer, len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, dma_addr))
+			goto unmap;
+
+		desc++;
+	} else {
+		desc = &priv->tx_ring[q][entry];
+		len = skb->len;
+		dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, dma_addr))
+			goto drop;
+	}
 	desc->ds_tagl = cpu_to_le16(len);
 	desc->dptr = cpu_to_le32(dma_addr);
 
@@ -1549,12 +1551,14 @@
 	if (q == RAVB_NC) {
 		ts_skb = kmalloc(sizeof(*ts_skb), GFP_ATOMIC);
 		if (!ts_skb) {
-			desc--;
-			dma_unmap_single(ndev->dev.parent, dma_addr, len,
-					 DMA_TO_DEVICE);
+			if (num_tx_desc > 1) {
+				desc--;
+				dma_unmap_single(ndev->dev.parent, dma_addr,
+						 len, DMA_TO_DEVICE);
+			}
 			goto unmap;
 		}
-		ts_skb->skb = skb;
+		ts_skb->skb = skb_get(skb);
 		ts_skb->tag = priv->ts_skb_tag++;
 		priv->ts_skb_tag &= 0x3ff;
 		list_add_tail(&ts_skb->list, &priv->ts_skb_list);
@@ -1568,20 +1572,22 @@
 	skb_tx_timestamp(skb);
 	/* Descriptor type must be set after all the above writes */
 	dma_wmb();
-	desc->die_dt = DT_FEND;
-	desc--;
-	desc->die_dt = DT_FSTART;
-
+	if (num_tx_desc > 1) {
+		desc->die_dt = DT_FEND;
+		desc--;
+		desc->die_dt = DT_FSTART;
+	} else {
+		desc->die_dt = DT_FSINGLE;
+	}
 	ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q);
 
-	priv->cur_tx[q] += NUM_TX_DESC;
+	priv->cur_tx[q] += num_tx_desc;
 	if (priv->cur_tx[q] - priv->dirty_tx[q] >
-	    (priv->num_tx_ring[q] - 1) * NUM_TX_DESC &&
+	    (priv->num_tx_ring[q] - 1) * num_tx_desc &&
 	    !ravb_tx_free(ndev, q, true))
 		netif_stop_subqueue(ndev, q);
 
 exit:
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 	return NETDEV_TX_OK;
 
@@ -1590,13 +1596,12 @@
 			 le16_to_cpu(desc->ds_tagl), DMA_TO_DEVICE);
 drop:
 	dev_kfree_skb_any(skb);
-	priv->tx_skb[q][entry / NUM_TX_DESC] = NULL;
+	priv->tx_skb[q][entry / num_tx_desc] = NULL;
 	goto exit;
 }
 
 static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			     struct net_device *sb_dev,
-			     select_queue_fallback_t fallback)
+			     struct net_device *sb_dev)
 {
 	/* If skb needs TX timestamp, it is handled in network control queue */
 	return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC :
@@ -1613,17 +1618,10 @@
 	stats0 = &priv->stats[RAVB_BE];
 	stats1 = &priv->stats[RAVB_NC];
 
-	nstats->tx_dropped += ravb_read(ndev, TROCR);
-	ravb_write(ndev, 0, TROCR);	/* (write clear) */
-	nstats->collisions += ravb_read(ndev, CDCR);
-	ravb_write(ndev, 0, CDCR);	/* (write clear) */
-	nstats->tx_carrier_errors += ravb_read(ndev, LCCR);
-	ravb_write(ndev, 0, LCCR);	/* (write clear) */
-
-	nstats->tx_carrier_errors += ravb_read(ndev, CERCR);
-	ravb_write(ndev, 0, CERCR);	/* (write clear) */
-	nstats->tx_carrier_errors += ravb_read(ndev, CEECR);
-	ravb_write(ndev, 0, CEECR);	/* (write clear) */
+	if (priv->chip_id == RCAR_GEN3) {
+		nstats->tx_dropped += ravb_read(ndev, TROCR);
+		ravb_write(ndev, 0, TROCR);	/* (write clear) */
+	}
 
 	nstats->rx_packets = stats0->rx_packets + stats1->rx_packets;
 	nstats->tx_packets = stats0->tx_packets + stats1->tx_packets;
@@ -1653,7 +1651,6 @@
 	spin_lock_irqsave(&priv->lock, flags);
 	ravb_modify(ndev, ECMR, ECMR_PRM,
 		    ndev->flags & IFF_PROMISC ? ECMR_PRM : 0);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
@@ -1683,6 +1680,7 @@
 	/* Clear the timestamp list */
 	list_for_each_entry_safe(ts_skb, ts_skb2, &priv->ts_skb_list, list) {
 		list_del(&ts_skb->list);
+		kfree_skb(ts_skb->skb);
 		kfree(ts_skb);
 	}
 
@@ -1800,10 +1798,15 @@
 
 static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
 {
-	if (netif_running(ndev))
-		return -EBUSY;
+	struct ravb_private *priv = netdev_priv(ndev);
 
 	ndev->mtu = new_mtu;
+
+	if (netif_running(ndev)) {
+		synchronize_irq(priv->emac_irq);
+		ravb_emac_init(ndev);
+	}
+
 	netdev_update_features(ndev);
 
 	return 0;
@@ -1950,6 +1953,13 @@
 	}
 }
 
+static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = {
+	{ .soc_id = "r8a774c0" },
+	{ .soc_id = "r8a77990" },
+	{ .soc_id = "r8a77995" },
+	{ /* sentinel */ }
+};
+
 /* Set tx and rx clock internal delay modes */
 static void ravb_set_delay_mode(struct net_device *ndev)
 {
@@ -1961,8 +1971,12 @@
 		set |= APSR_DM_RDM;
 
 	if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
-	    priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID)
-		set |= APSR_DM_TDM;
+	    priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+		if (!WARN(soc_device_match(ravb_delay_mode_quirk_match),
+			  "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree",
+			  phy_modes(priv->phy_interface)))
+			set |= APSR_DM_TDM;
+	}
 
 	ravb_modify(ndev, APSR, APSR_DM, set);
 }
@@ -2076,6 +2090,9 @@
 	ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
 	ndev->min_mtu = ETH_MIN_MTU;
 
+	priv->num_tx_desc = chip_id == RCAR_GEN2 ?
+		NUM_TX_DESC_GEN2 : NUM_TX_DESC_GEN3;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;

diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index dce2a40..6984bd5 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c

@@ -182,6 +182,13 @@
 	struct net_device *ndev = priv->ndev;
 	unsigned long flags;
 
+	/* Reject requests with unsupported flags */
+	if (req->flags & ~(PTP_ENABLE_FEATURE |
+			   PTP_RISING_EDGE |
+			   PTP_FALLING_EDGE |
+			   PTP_STRICT_FLAGS))
+		return -EOPNOTSUPP;
+
 	if (req->index)
 		return -EINVAL;
 
@@ -196,7 +203,6 @@
 		ravb_write(ndev, GIE_PTCS, GIE);
 	else
 		ravb_write(ndev, GID_PTCD, GID);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
@@ -212,6 +218,10 @@
 	unsigned long flags;
 	int error = 0;
 
+	/* Reject requests with unsupported flags */
+	if (req->flags)
+		return -EOPNOTSUPP;
+
 	if (req->index)
 		return -EINVAL;
 
@@ -259,7 +269,6 @@
 		else
 			ravb_write(ndev, GID_PTMD0, GID);
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return error;
@@ -331,7 +340,6 @@
 	spin_lock_irqsave(&priv->lock, flags);
 	ravb_wait(ndev, GCCR, GCCR_TCR, GCCR_TCR_NOREQ);
 	ravb_modify(ndev, GCCR, GCCR_TCSS, GCCR_TCSS_ADJGPTP);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	priv->ptp.clock = ptp_clock_register(&priv->ptp.info, &pdev->dev);

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f27a0dc..7ba35a0 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c

@@ -555,7 +555,7 @@
 	sh_eth_write(ndev, 0, RDFFR);
 
 	/* Reset HW CRC register */
-	if (mdp->cd->hw_checksum)
+	if (mdp->cd->csmr)
 		sh_eth_write(ndev, 0, CSMR);
 
 	/* Select MII mode */
@@ -619,7 +619,8 @@
 	.no_trimd	= 1,
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
-	.hw_checksum	= 1,
+	.csmr		= 1,
+	.rx_csum	= 1,
 	.tsu		= 1,
 	.no_tx_cntrs	= 1,
 };
@@ -668,7 +669,8 @@
 	.no_trimd	= 1,
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
-	.hw_checksum	= 1,
+	.csmr		= 1,
+	.rx_csum	= 1,
 	.tsu		= 1,
 	.select_mii	= 1,
 	.magic		= 1,
@@ -793,7 +795,8 @@
 	.no_trimd	= 1,
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
-	.hw_checksum	= 1,
+	.csmr		= 1,
+	.rx_csum	= 1,
 	.select_mii	= 1,
 	.magic		= 1,
 	.cexcr		= 1,
@@ -1045,7 +1048,8 @@
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
 	.tsu		= 1,
-	.hw_checksum	= 1,
+	.csmr		= 1,
+	.rx_csum	= 1,
 	.select_mii	= 1,
 	.magic		= 1,
 	.cexcr		= 1,
@@ -1088,6 +1092,7 @@
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
 	.cexcr		= 1,
+	.rx_csum	= 1,
 	.dual_port	= 1,
 };
 
@@ -1532,8 +1537,9 @@
 	mdp->irq_enabled = true;
 	sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR);
 
-	/* PAUSE Prohibition */
+	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
 	sh_eth_write(ndev, ECMR_ZPF | (mdp->duplex ? ECMR_DM : 0) |
+		     (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
 		     ECMR_TE | ECMR_RE, ECMR);
 
 	if (mdp->cd->set_rate)
@@ -1588,10 +1594,27 @@
 	sh_eth_get_stats(ndev);
 	mdp->cd->soft_reset(ndev);
 
+	/* Set the RMII mode again if required */
+	if (mdp->cd->rmiimode)
+		sh_eth_write(ndev, 0x1, RMIIMODE);
+
 	/* Set MAC address again */
 	update_mac_address(ndev);
 }
 
+static void sh_eth_rx_csum(struct sk_buff *skb)
+{
+	u8 *hw_csum;
+
+	/* The hardware checksum is 2 bytes appended to packet data */
+	if (unlikely(skb->len < sizeof(__sum16)))
+		return;
+	hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
+	skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb_trim(skb, skb->len - sizeof(__sum16));
+}
+
 /* Packet receive function */
 static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 {
@@ -1633,7 +1656,7 @@
 		 * the RFS bits are from bit 25 to bit 16. So, the
 		 * driver needs right shifting by 16.
 		 */
-		if (mdp->cd->hw_checksum)
+		if (mdp->cd->csmr)
 			desc_status >>= 16;
 
 		skb = mdp->rx_skbuff[entry];
@@ -1666,6 +1689,8 @@
 					 DMA_FROM_DEVICE);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
+			if (ndev->features & NETIF_F_RXCSUM)
+				sh_eth_rx_csum(skb);
 			netif_receive_skb(skb);
 			ndev->stats.rx_packets++;
 			ndev->stats.rx_bytes += pkt_len;
@@ -1989,7 +2014,6 @@
 	if ((mdp->cd->no_psr || mdp->no_ether_link) && phydev->link)
 		sh_eth_rcv_snd_enable(ndev);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	if (new_state && netif_msg_link(mdp))
@@ -2173,7 +2197,7 @@
 	add_reg(MAFCR);
 	if (cd->rtrate)
 		add_reg(RTRATE);
-	if (cd->hw_checksum)
+	if (cd->csmr)
 		add_reg(CSMR);
 	if (cd->select_mii)
 		add_reg(RMII_MII);
@@ -2921,6 +2945,39 @@
 	spin_unlock_irqrestore(&mdp->lock, flags);
 }
 
+static void sh_eth_set_rx_csum(struct net_device *ndev, bool enable)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdp->lock, flags);
+
+	/* Disable TX and RX */
+	sh_eth_rcv_snd_disable(ndev);
+
+	/* Modify RX Checksum setting */
+	sh_eth_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0);
+
+	/* Enable TX and RX */
+	sh_eth_rcv_snd_enable(ndev);
+
+	spin_unlock_irqrestore(&mdp->lock, flags);
+}
+
+static int sh_eth_set_features(struct net_device *ndev,
+			       netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	if (changed & NETIF_F_RXCSUM && mdp->cd->rx_csum)
+		sh_eth_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+	ndev->features = features;
+
+	return 0;
+}
+
 static int sh_eth_get_vtag_index(struct sh_eth_private *mdp)
 {
 	if (!mdp->port)
@@ -3102,6 +3159,7 @@
 	.ndo_change_mtu		= sh_eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= sh_eth_set_features,
 };
 
 static const struct net_device_ops sh_eth_netdev_ops_tsu = {
@@ -3117,6 +3175,7 @@
 	.ndo_change_mtu		= sh_eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= sh_eth_set_features,
 };
 
 #ifdef CONFIG_OF
@@ -3125,16 +3184,20 @@
 	struct device_node *np = dev->of_node;
 	struct sh_eth_plat_data *pdata;
 	const char *mac_addr;
+	int ret;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return NULL;
 
-	pdata->phy_interface = of_get_phy_mode(np);
+	ret = of_get_phy_mode(np);
+	if (ret < 0)
+		return NULL;
+	pdata->phy_interface = ret;
 
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
-		memcpy(pdata->mac_addr, mac_addr, ETH_ALEN);
+	if (!IS_ERR(mac_addr))
+		ether_addr_copy(pdata->mac_addr, mac_addr);
 
 	pdata->no_ether_link =
 		of_property_read_bool(np, "renesas,no-ether-link");
@@ -3245,6 +3308,11 @@
 	ndev->max_mtu = 2000 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
 	ndev->min_mtu = ETH_MIN_MTU;
 
+	if (mdp->cd->rx_csum) {
+		ndev->features = NETIF_F_RXCSUM;
+		ndev->hw_features = NETIF_F_RXCSUM;
+	}
+
 	/* set function */
 	if (mdp->cd->tsu)
 		ndev->netdev_ops = &sh_eth_netdev_ops_tsu;
@@ -3294,7 +3362,7 @@
 			goto out_release;
 		}
 		mdp->port = port;
-		ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
+		ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 		/* Need to init only the first port of the two sharing a TSU */
 		if (port == 0) {

diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 0c18650..8507263 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h

@@ -499,7 +499,8 @@
 	unsigned no_ade:1;	/* E-DMAC DOES NOT have ADE bit in EESR */
 	unsigned no_xdfar:1;	/* E-DMAC DOES NOT have RDFAR/TDFAR */
 	unsigned xdfar_rw:1;	/* E-DMAC has writeable RDFAR/TDFAR */
-	unsigned hw_checksum:1;	/* E-DMAC has CSMR */
+	unsigned csmr:1;	/* E-DMAC has CSMR */
+	unsigned rx_csum:1;	/* EtherC has ECMR.RCSC */
 	unsigned select_mii:1;	/* EtherC has RMII_MII (MII select register) */
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */

diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig
index b9952ef..1083de9 100644
--- a/drivers/net/ethernet/rocker/Kconfig
+++ b/drivers/net/ethernet/rocker/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Rocker device configuration
 #

diff --git a/drivers/net/ethernet/rocker/Makefile b/drivers/net/ethernet/rocker/Makefile
index faa36ac..6e0a363 100644
--- a/drivers/net/ethernet/rocker/Makefile
+++ b/drivers/net/ethernet/rocker/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Rocker network device drivers.
 #

diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 748fb12..6fad253 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/rocker/rocker.h - Rocker switch device driver
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _ROCKER_H
@@ -109,8 +105,6 @@
 	int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port,
 					  unsigned long brport_flags,
 					  struct switchdev_trans *trans);
-	int (*port_attr_bridge_flags_get)(const struct rocker_port *rocker_port,
-					  unsigned long *p_brport_flags);
 	int (*port_attr_bridge_flags_support_get)(const struct rocker_port *
 						  rocker_port,
 						  unsigned long *

diff --git a/drivers/net/ethernet/rocker/rocker_hw.h b/drivers/net/ethernet/rocker/rocker_hw.h
index 2adfe88..59f1f8b 100644
--- a/drivers/net/ethernet/rocker/rocker_hw.h
+++ b/drivers/net/ethernet/rocker/rocker_hw.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/rocker/rocker_hw.h - Rocker switch device driver
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _ROCKER_HW_H

diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index aeafdb9..786b158 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -371,7 +367,7 @@
 static struct rocker_desc_info *
 rocker_desc_head_get(const struct rocker_dma_ring_info *info)
 {
-	static struct rocker_desc_info *desc_info;
+	struct rocker_desc_info *desc_info;
 	u32 head = __pos_inc(info->head, info->size);
 
 	desc_info = &info->desc_info[info->head];
@@ -402,7 +398,7 @@
 static struct rocker_desc_info *
 rocker_desc_tail_get(struct rocker_dma_ring_info *info)
 {
-	static struct rocker_desc_info *desc_info;
+	struct rocker_desc_info *desc_info;
 
 	if (info->tail == info->head)
 		return NULL; /* nothing to be done between head and tail */
@@ -1566,6 +1562,43 @@
 }
 
 static int
+rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port *
+						rocker_port,
+						unsigned long *
+						p_brport_flags_support)
+{
+	struct rocker_world_ops *wops = rocker_port->rocker->wops;
+
+	if (!wops->port_attr_bridge_flags_support_get)
+		return -EOPNOTSUPP;
+	return wops->port_attr_bridge_flags_support_get(rocker_port,
+							p_brport_flags_support);
+}
+
+static int
+rocker_world_port_attr_pre_bridge_flags_set(struct rocker_port *rocker_port,
+					    unsigned long brport_flags,
+					    struct switchdev_trans *trans)
+{
+	struct rocker_world_ops *wops = rocker_port->rocker->wops;
+	unsigned long brport_flags_s;
+	int err;
+
+	if (!wops->port_attr_bridge_flags_set)
+		return -EOPNOTSUPP;
+
+	err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
+							      &brport_flags_s);
+	if (err)
+		return err;
+
+	if (brport_flags & ~brport_flags_s)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
 rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
 					unsigned long brport_flags,
 					struct switchdev_trans *trans)
@@ -1583,31 +1616,6 @@
 }
 
 static int
-rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
-					unsigned long *p_brport_flags)
-{
-	struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
-	if (!wops->port_attr_bridge_flags_get)
-		return -EOPNOTSUPP;
-	return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags);
-}
-
-static int
-rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port *
-						rocker_port,
-						unsigned long *
-						p_brport_flags_support)
-{
-	struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
-	if (!wops->port_attr_bridge_flags_support_get)
-		return -EOPNOTSUPP;
-	return wops->port_attr_bridge_flags_support_get(rocker_port,
-							p_brport_flags_support);
-}
-
-static int
 rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port,
 					      u32 ageing_time,
 					      struct switchdev_trans *trans)
@@ -1632,9 +1640,6 @@
 {
 	struct rocker_world_ops *wops = rocker_port->rocker->wops;
 
-	if (netif_is_bridge_master(vlan->obj.orig_dev))
-		return -EOPNOTSUPP;
-
 	if (!wops->port_obj_vlan_add)
 		return -EOPNOTSUPP;
 
@@ -2029,6 +2034,18 @@
 			    err);
 }
 
+static int rocker_port_get_port_parent_id(struct net_device *dev,
+					  struct netdev_phys_item_id *ppid)
+{
+	const struct rocker_port *rocker_port = netdev_priv(dev);
+	const struct rocker *rocker = rocker_port->rocker;
+
+	ppid->id_len = sizeof(rocker->hw.id);
+	memcpy(&ppid->id, &rocker->hw.id, ppid->id_len);
+
+	return 0;
+}
+
 static const struct net_device_ops rocker_port_netdev_ops = {
 	.ndo_open			= rocker_port_open,
 	.ndo_stop			= rocker_port_stop,
@@ -2038,39 +2055,13 @@
 	.ndo_get_phys_port_name		= rocker_port_get_phys_port_name,
 	.ndo_change_proto_down		= rocker_port_change_proto_down,
 	.ndo_neigh_destroy		= rocker_port_neigh_destroy,
+	.ndo_get_port_parent_id		= rocker_port_get_port_parent_id,
 };
 
 /********************
  * swdev interface
  ********************/
 
-static int rocker_port_attr_get(struct net_device *dev,
-				struct switchdev_attr *attr)
-{
-	const struct rocker_port *rocker_port = netdev_priv(dev);
-	const struct rocker *rocker = rocker_port->rocker;
-	int err = 0;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(rocker->hw.id);
-		memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-		err = rocker_world_port_attr_bridge_flags_get(rocker_port,
-							      &attr->u.brport_flags);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
-								      &attr->u.brport_flags_support);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return err;
-}
-
 static int rocker_port_attr_set(struct net_device *dev,
 				const struct switchdev_attr *attr,
 				struct switchdev_trans *trans)
@@ -2084,6 +2075,11 @@
 							   attr->u.stp_state,
 							   trans);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		err = rocker_world_port_attr_pre_bridge_flags_set(rocker_port,
+							      attr->u.brport_flags,
+							      trans);
+		break;
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
 		err = rocker_world_port_attr_bridge_flags_set(rocker_port,
 							      attr->u.brport_flags,
@@ -2142,13 +2138,6 @@
 	return err;
 }
 
-static const struct switchdev_ops rocker_port_switchdev_ops = {
-	.switchdev_port_attr_get	= rocker_port_attr_get,
-	.switchdev_port_attr_set	= rocker_port_attr_set,
-	.switchdev_port_obj_add		= rocker_port_obj_add,
-	.switchdev_port_obj_del		= rocker_port_obj_del,
-};
-
 struct rocker_fib_event_work {
 	struct work_struct work;
 	union {
@@ -2200,6 +2189,9 @@
 	struct rocker_fib_event_work *fib_work;
 	struct fib_notifier_info *info = ptr;
 
+	if (!net_eq(info->net, &init_net))
+		return NOTIFY_DONE;
+
 	if (info->family != AF_INET)
 		return NOTIFY_DONE;
 
@@ -2214,6 +2206,21 @@
 	switch (event) {
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
+		if (info->family == AF_INET) {
+			struct fib_entry_notifier_info *fen_info = ptr;
+
+			if (fen_info->fi->fib_nh_is_v6) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+				kfree(fib_work);
+				return notifier_from_errno(-EINVAL);
+			}
+			if (fen_info->fi->nh) {
+				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+				kfree(fib_work);
+				return notifier_from_errno(-EINVAL);
+			}
+		}
+
 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
 		/* Take referece on fib_info to prevent it from being
 		 * freed while work is queued. Release it afterwards.
@@ -2602,7 +2609,6 @@
 	rocker_port_dev_addr_init(rocker_port);
 	dev->netdev_ops = &rocker_port_netdev_ops;
 	dev->ethtool_ops = &rocker_port_ethtool_ops;
-	dev->switchdev_ops = &rocker_port_switchdev_ops;
 	netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
 			  NAPI_POLL_WEIGHT);
 	netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
@@ -2713,6 +2719,19 @@
 	return dev->netdev_ops == &rocker_port_netdev_ops;
 }
 
+static int
+rocker_switchdev_port_attr_set_event(struct net_device *netdev,
+		struct switchdev_notifier_port_attr_info *port_attr_info)
+{
+	int err;
+
+	err = rocker_port_attr_set(netdev, port_attr_info->attr,
+				   port_attr_info->trans);
+
+	port_attr_info->handled = true;
+	return notifier_from_errno(err);
+}
+
 struct rocker_switchdev_event_work {
 	struct work_struct work;
 	struct switchdev_notifier_fdb_info fdb_info;
@@ -2728,8 +2747,9 @@
 
 	info.addr = recv_info->addr;
 	info.vid = recv_info->vid;
+	info.offloaded = true;
 	call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
-				 rocker_port->dev, &info.info);
+				 rocker_port->dev, &info.info, NULL);
 }
 
 static void rocker_switchdev_event_work(struct work_struct *work)
@@ -2781,6 +2801,9 @@
 	if (!rocker_port_dev_check(dev))
 		return NOTIFY_DONE;
 
+	if (event == SWITCHDEV_PORT_ATTR_SET)
+		return rocker_switchdev_port_attr_set_event(dev, ptr);
+
 	rocker_port = netdev_priv(dev);
 	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
 	if (WARN_ON(!switchdev_work))
@@ -2796,6 +2819,11 @@
 		memcpy(&switchdev_work->fdb_info, ptr,
 		       sizeof(switchdev_work->fdb_info));
 		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (unlikely(!switchdev_work->fdb_info.addr)) {
+			kfree(switchdev_work);
+			return NOTIFY_BAD;
+		}
+
 		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
 				fdb_info->addr);
 		/* Take a reference on the rocker device */
@@ -2811,12 +2839,56 @@
 	return NOTIFY_DONE;
 }
 
+static int
+rocker_switchdev_port_obj_event(unsigned long event, struct net_device *netdev,
+			struct switchdev_notifier_port_obj_info *port_obj_info)
+{
+	int err = -EOPNOTSUPP;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = rocker_port_obj_add(netdev, port_obj_info->obj,
+					  port_obj_info->trans);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = rocker_port_obj_del(netdev, port_obj_info->obj);
+		break;
+	}
+
+	port_obj_info->handled = true;
+	return notifier_from_errno(err);
+}
+
+static int rocker_switchdev_blocking_event(struct notifier_block *unused,
+					   unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+	if (!rocker_port_dev_check(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+	case SWITCHDEV_PORT_OBJ_DEL:
+		return rocker_switchdev_port_obj_event(event, dev, ptr);
+	case SWITCHDEV_PORT_ATTR_SET:
+		return rocker_switchdev_port_attr_set_event(dev, ptr);
+	}
+
+	return NOTIFY_DONE;
+}
+
 static struct notifier_block rocker_switchdev_notifier = {
 	.notifier_call = rocker_switchdev_event,
 };
 
+static struct notifier_block rocker_switchdev_blocking_notifier = {
+	.notifier_call = rocker_switchdev_blocking_event,
+};
+
 static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct notifier_block *nb;
 	struct rocker *rocker;
 	int err;
 
@@ -2932,6 +3004,13 @@
 		goto err_register_switchdev_notifier;
 	}
 
+	nb = &rocker_switchdev_blocking_notifier;
+	err = register_switchdev_blocking_notifier(nb);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register switchdev blocking notifier\n");
+		goto err_register_switchdev_blocking_notifier;
+	}
+
 	rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
 
 	dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
@@ -2939,6 +3018,8 @@
 
 	return 0;
 
+err_register_switchdev_blocking_notifier:
+	unregister_switchdev_notifier(&rocker_switchdev_notifier);
 err_register_switchdev_notifier:
 	unregister_fib_notifier(&rocker->fib_nb);
 err_register_fib_notifier:
@@ -2970,6 +3051,10 @@
 static void rocker_remove(struct pci_dev *pdev)
 {
 	struct rocker *rocker = pci_get_drvdata(pdev);
+	struct notifier_block *nb;
+
+	nb = &rocker_switchdev_blocking_notifier;
+	unregister_switchdev_blocking_notifier(nb);
 
 	unregister_switchdev_notifier(&rocker_switchdev_notifier);
 	unregister_fib_notifier(&rocker->fib_nb);

diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6473cc6..7072b24 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c

@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/rocker/rocker_ofdpa.c - Rocker switch OF-DPA-like
  *					        implementation
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -22,6 +18,7 @@
 #include <net/neighbour.h>
 #include <net/switchdev.h>
 #include <net/ip_fib.h>
+#include <net/nexthop.h>
 #include <net/arp.h>
 
 #include "rocker.h"
@@ -1833,10 +1830,10 @@
 	rtnl_lock();
 	if (learned && removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
-					 lw->ofdpa_port->dev, &info.info);
+					 lw->ofdpa_port->dev, &info.info, NULL);
 	else if (learned && !removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
-					 lw->ofdpa_port->dev, &info.info);
+					 lw->ofdpa_port->dev, &info.info, NULL);
 	rtnl_unlock();
 
 	kfree(work);
@@ -2286,13 +2283,13 @@
 
 	/* XXX support ECMP */
 
-	nh = fi->fib_nh;
-	nh_on_port = (fi->fib_dev == ofdpa_port->dev);
-	has_gw = !!nh->nh_gw;
+	nh = fib_info_nh(fi, 0);
+	nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev);
+	has_gw = !!nh->fib_nh_gw4;
 
 	if (has_gw && nh_on_port) {
 		err = ofdpa_port_ipv4_nh(ofdpa_port, flags,
-					 nh->nh_gw, &index);
+					 nh->fib_nh_gw4, &index);
 		if (err)
 			return err;
 
@@ -2512,16 +2509,6 @@
 }
 
 static int
-ofdpa_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
-				 unsigned long *p_brport_flags)
-{
-	const struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
-
-	*p_brport_flags = ofdpa_port->brport_flags;
-	return 0;
-}
-
-static int
 ofdpa_port_attr_bridge_flags_support_get(const struct rocker_port *
 					 rocker_port,
 					 unsigned long *
@@ -2747,11 +2734,13 @@
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct fib_nh *nh;
 	int err;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	nh = fib_info_nh(fen_info->fi, 0);
+	ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
 	err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
@@ -2759,7 +2748,7 @@
 				  fen_info->tb_id, 0);
 	if (err)
 		return err;
-	fen_info->fi->fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+	nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 	return 0;
 }
 
@@ -2768,13 +2757,15 @@
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct fib_nh *nh;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	nh = fib_info_nh(fen_info->fi, 0);
+	ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
-	fen_info->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+	nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
 				   fen_info->dst_len, fen_info->fi,
 				   fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2794,14 +2785,16 @@
 
 	spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
 	hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+		struct fib_nh *nh;
+
 		if (flow_entry->key.tbl_id !=
 		    ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
 			continue;
-		ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
-						       rocker);
+		nh = fib_info_nh(flow_entry->fi, 0);
+		ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
 		if (!ofdpa_port)
 			continue;
-		flow_entry->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+		nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 		ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
 				   flow_entry);
 	}
@@ -2823,7 +2816,6 @@
 	.port_stop = ofdpa_port_stop,
 	.port_attr_stp_state_set = ofdpa_port_attr_stp_state_set,
 	.port_attr_bridge_flags_set = ofdpa_port_attr_bridge_flags_set,
-	.port_attr_bridge_flags_get = ofdpa_port_attr_bridge_flags_get,
 	.port_attr_bridge_flags_support_get = ofdpa_port_attr_bridge_flags_support_get,
 	.port_attr_bridge_ageing_time_set = ofdpa_port_attr_bridge_ageing_time_set,
 	.port_obj_vlan_add = ofdpa_port_obj_vlan_add,

diff --git a/drivers/net/ethernet/rocker/rocker_tlv.c b/drivers/net/ethernet/rocker/rocker_tlv.c
index 8185118..256447b 100644
--- a/drivers/net/ethernet/rocker/rocker_tlv.c
+++ b/drivers/net/ethernet/rocker/rocker_tlv.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * drivers/net/ethernet/rocker/rocker_tlv.c - Rocker switch device driver
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/types.h>

diff --git a/drivers/net/ethernet/rocker/rocker_tlv.h b/drivers/net/ethernet/rocker/rocker_tlv.h
index dfae3c9..fe876e5 100644
--- a/drivers/net/ethernet/rocker/rocker_tlv.h
+++ b/drivers/net/ethernet/rocker/rocker_tlv.h

@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * drivers/net/ethernet/rocker/rocker_tlv.h - Rocker switch device driver
  * Copyright (c) 2014-2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _ROCKER_TLV_H

diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig
index fbd5e06..e92a178 100644
--- a/drivers/net/ethernet/samsung/Kconfig
+++ b/drivers/net/ethernet/samsung/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Samsung Ethernet device configuration
 #
@@ -10,7 +11,7 @@
 	  say Y.
 
 	  Note that the answer to this question does not directly affect
-	  the kernel: saying N will just case the configurator to skip all
+	  the kernel: saying N will just cause the configurator to skip all
 	  the questions about Samsung chipsets. If you say Y, you will be asked
 	  for your specific chipset/driver in the following questions.
 

diff --git a/drivers/net/ethernet/samsung/Makefile b/drivers/net/ethernet/samsung/Makefile
index 1773c29..f94faec 100644
--- a/drivers/net/ethernet/samsung/Makefile
+++ b/drivers/net/ethernet/samsung/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Samsung Ethernet device drivers.
 #

diff --git a/drivers/net/ethernet/samsung/sxgbe/Makefile b/drivers/net/ethernet/samsung/sxgbe/Makefile
index 31e9685..b7e29d0 100644
--- a/drivers/net/ethernet/samsung/sxgbe/Makefile
+++ b/drivers/net/ethernet/samsung/sxgbe/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_SXGBE_ETH) += samsung-sxgbe.o
 samsung-sxgbe-objs:= sxgbe_platform.o sxgbe_main.o sxgbe_desc.o \
 		sxgbe_dma.o sxgbe_core.o sxgbe_mtl.o  sxgbe_mdio.o \

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
index c61f260..049dc6c 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __SXGBE_COMMON_H__

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c
index 58c3569..e96e2bd 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c
index 2686bb5..b33ebf2 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h
index 1860932..ede0827 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __SXGBE_DESC_H__
 #define __SXGBE_DESC_H__

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c
index bb9b5b8..243db04 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/delay.h>
 #include <linux/export.h>

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h
index 1607b54..e8f79a2 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __SXGBE_DMA_H__
 #define __SXGBE_DMA_H__

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index c9aad0e..0775b94 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index a9da1ad..c56fcbb 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -298,8 +295,8 @@
 	/* Stop Advertising 1000BASE Capability if interface is not GMII */
 	if ((phy_iface == PHY_INTERFACE_MODE_MII) ||
 	    (phy_iface == PHY_INTERFACE_MODE_RMII))
-		phydev->advertising &= ~(SUPPORTED_1000baseT_Half |
-					 SUPPORTED_1000baseT_Full);
+		phy_set_max_speed(phydev, SPEED_1000);
+
 	if (phydev->phy_id == 0) {
 		phy_disconnect(phydev);
 		return -ENODEV;
@@ -400,9 +397,9 @@
 	}
 
 	/* allocate memory for TX descriptors */
-	tx_ring->dma_tx = dma_zalloc_coherent(dev,
-					      tx_rsize * sizeof(struct sxgbe_tx_norm_desc),
-					      &tx_ring->dma_tx_phy, GFP_KERNEL);
+	tx_ring->dma_tx = dma_alloc_coherent(dev,
+					     tx_rsize * sizeof(struct sxgbe_tx_norm_desc),
+					     &tx_ring->dma_tx_phy, GFP_KERNEL);
 	if (!tx_ring->dma_tx)
 		return -ENOMEM;
 
@@ -479,9 +476,9 @@
 	rx_ring->queue_no = queue_no;
 
 	/* allocate memory for RX descriptors */
-	rx_ring->dma_rx = dma_zalloc_coherent(priv->device,
-					      rx_rsize * sizeof(struct sxgbe_rx_norm_desc),
-					      &rx_ring->dma_rx_phy, GFP_KERNEL);
+	rx_ring->dma_rx = dma_alloc_coherent(priv->device,
+					     rx_rsize * sizeof(struct sxgbe_rx_norm_desc),
+					     &rx_ring->dma_rx_phy, GFP_KERNEL);
 
 	if (rx_ring->dma_rx == NULL)
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c
index 467ff70..b1e7f7a 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c
index 324681c..298a740 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h
index 7e4810c..e563452 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __SXGBE_MTL_H__
 #define __SXGBE_MTL_H__

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index fbd00cb..2412c87 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -81,7 +78,6 @@
 {
 	int ret;
 	int i, chan;
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 	void __iomem *addr;
 	struct sxgbe_priv_data *priv = NULL;
@@ -91,8 +87,7 @@
 	struct device_node *node = dev->of_node;
 
 	/* Get memory resource */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	addr = devm_ioremap_resource(dev, res);
+	addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
 
@@ -124,7 +119,7 @@
 	}
 
 	/* Get MAC address if available (DT) */
-	if (mac)
+	if (!IS_ERR_OR_NULL(mac))
 		ether_addr_copy(priv->dev->dev_addr, mac);
 
 	/* Get the TX/RX IRQ numbers */

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h
index 81437d9..4def84e 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* 10G controller driver for Samsung SoCs
  *
  * Copyright (C) 2013 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * Author: Siva Reddy Kallam <siva.kallam@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __SXGBE_REGMAP_H__
 #define __SXGBE_REGMAP_H__

diff --git a/drivers/net/ethernet/seeq/Kconfig b/drivers/net/ethernet/seeq/Kconfig
index 69c62d8..f3ac9cb 100644
--- a/drivers/net/ethernet/seeq/Kconfig
+++ b/drivers/net/ethernet/seeq/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # SEEQ device configuration
 #

diff --git a/drivers/net/ethernet/seeq/Makefile b/drivers/net/ethernet/seeq/Makefile
index 0488e99..02aad78 100644
--- a/drivers/net/ethernet/seeq/Makefile
+++ b/drivers/net/ethernet/seeq/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the SEEQ network device drivers
 #

diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index d1bb73b..632a7c8 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/drivers/acorn/net/ether3.c
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * SEEQ nq8005 ethernet driver for Acorn/ANT Ether3 card
  *  for Acorn machines
  *

diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h
index be19e5f..585dd51 100644
--- a/drivers/net/ethernet/seeq/ether3.h
+++ b/drivers/net/ethernet/seeq/ether3.h

@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/drivers/acorn/net/ether3.h
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  network driver for Acorn/ANT Ether3 cards
  */
 

diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 70cce63..276c7ca 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * sgiseeq.c: Seeq8003 ethernet driver for SGI machines.
  *
@@ -735,6 +736,7 @@
 	}
 
 	platform_set_drvdata(pdev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
 	sp = netdev_priv(dev);
 
 	/* Make private data page aligned */
@@ -792,15 +794,16 @@
 		printk(KERN_ERR "Sgiseeq: Cannot register net device, "
 		       "aborting.\n");
 		err = -ENODEV;
-		goto err_out_free_page;
+		goto err_out_free_attrs;
 	}
 
 	printk(KERN_INFO "%s: %s %pM\n", dev->name, sgiseeqstr, dev->dev_addr);
 
 	return 0;
 
-err_out_free_page:
-	free_page((unsigned long) sp->srings);
+err_out_free_attrs:
+	dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings,
+		       sp->srings_dma, DMA_ATTR_NON_CONSISTENT);
 err_out_free_dev:
 	free_netdev(dev);
 

diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 2c03262..5f36774 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Solarflare device configuration
 #

diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h
index 41ad07d..1b59e9f 100644
--- a/drivers/net/ethernet/sfc/bitfield.h
+++ b/drivers/net/ethernet/sfc/bitfield.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_BITFIELD_H

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 7eeac3d..0ec13f5 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2012-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include "net_driver.h"
@@ -511,7 +508,7 @@
 					       struct device_attribute *attr,
 					       char *buf)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n",
 		       ((efx->mcdi->fn_flags) &
@@ -523,7 +520,7 @@
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n",
 		       ((efx->mcdi->fn_flags) &
@@ -6041,23 +6038,33 @@
 	{ NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0,   3, "sfc_exp_rom_cfg" },
 	{ NVRAM_PARTITION_TYPE_LICENSE,		   0,    0, "sfc_license" },
 	{ NVRAM_PARTITION_TYPE_PHY_MIN,		   0xff, 0, "sfc_phy_fw" },
+	{ NVRAM_PARTITION_TYPE_MUM_FIRMWARE,	   0,    0, "sfc_mumfw" },
+	{ NVRAM_PARTITION_TYPE_EXPANSION_UEFI,	   0,    0, "sfc_uefi" },
+	{ NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0,    0, "sfc_dynamic_cfg_dflt" },
+	{ NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0,    0, "sfc_exp_rom_cfg_dflt" },
+	{ NVRAM_PARTITION_TYPE_STATUS,		   0,    0, "sfc_status" },
+	{ NVRAM_PARTITION_TYPE_BUNDLE,		   0,    0, "sfc_bundle" },
+	{ NVRAM_PARTITION_TYPE_BUNDLE_METADATA,	   0,    0, "sfc_bundle_metadata" },
 };
+#define EF10_NVRAM_PARTITION_COUNT	ARRAY_SIZE(efx_ef10_nvram_types)
 
 static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
 					struct efx_mcdi_mtd_partition *part,
-					unsigned int type)
+					unsigned int type,
+					unsigned long *found)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX);
 	const struct efx_ef10_nvram_type_info *info;
 	size_t size, erase_size, outlen;
+	int type_idx = 0;
 	bool protected;
 	int rc;
 
-	for (info = efx_ef10_nvram_types; ; info++) {
-		if (info ==
-		    efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types))
+	for (type_idx = 0; ; type_idx++) {
+		if (type_idx == EF10_NVRAM_PARTITION_COUNT)
 			return -ENODEV;
+		info = efx_ef10_nvram_types + type_idx;
 		if ((type & ~info->type_mask) == info->type)
 			break;
 	}
@@ -6067,8 +6074,22 @@
 	rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
 	if (rc)
 		return rc;
+	if (protected &&
+	    (type != NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS &&
+	     type != NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS))
+		/* Hide protected partitions that don't provide defaults. */
+		return -ENODEV;
+
 	if (protected)
-		return -ENODEV; /* hide it */
+		/* Protected partitions are read only. */
+		erase_size = 0;
+
+	/* If we've already exposed a partition of this type, hide this
+	 * duplicate.  All operations on MTDs are keyed by the type anyway,
+	 * so we can't act on the duplicate.
+	 */
+	if (__test_and_set_bit(type_idx, found))
+		return -EEXIST;
 
 	part->nvram_type = type;
 
@@ -6091,6 +6112,9 @@
 	part->common.mtd.flags = MTD_CAP_NORFLASH;
 	part->common.mtd.size = size;
 	part->common.mtd.erasesize = erase_size;
+	/* sfc_status is read-only */
+	if (!erase_size)
+		part->common.mtd.flags |= MTD_NO_ERASE;
 
 	return 0;
 }
@@ -6098,6 +6122,7 @@
 static int efx_ef10_mtd_probe(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX);
+	DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 };
 	struct efx_mcdi_mtd_partition *parts;
 	size_t outlen, n_parts_total, i, n_parts;
 	unsigned int type;
@@ -6126,11 +6151,13 @@
 	for (i = 0; i < n_parts_total; i++) {
 		type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID,
 					i);
-		rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type);
-		if (rc == 0)
-			n_parts++;
-		else if (rc != -ENODEV)
+		rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type,
+						  found);
+		if (rc == -EEXIST || rc == -ENODEV)
+			continue;
+		if (rc)
 			goto fail;
+		n_parts++;
 	}
 
 	rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));

diff --git a/drivers/net/ethernet/sfc/ef10_regs.h b/drivers/net/ethernet/sfc/ef10_regs.h
index 6a56778..154cfad 100644
--- a/drivers/net/ethernet/sfc/ef10_regs.h
+++ b/drivers/net/ethernet/sfc/ef10_regs.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2012-2017 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_EF10_REGS_H

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 3d76fd1..52bd43f 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 #include <linux/etherdevice.h>
 #include <linux/pci.h>

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h
index 2aa444e..cfe556d 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.h
+++ b/drivers/net/ethernet/sfc/ef10_sriov.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF10_SRIOV_H

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 3d0dd39..2fef740 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/module.h>
@@ -915,7 +912,7 @@
 
 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 {
-	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
+	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
 }
 
 static bool efx_default_channel_want_txqs(struct efx_channel *channel)
@@ -2520,7 +2517,7 @@
 static ssize_t
 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", efx->phy_type);
 }
 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
@@ -2529,7 +2526,7 @@
 static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
 
 	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
@@ -2537,7 +2534,7 @@
 static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
 	bool enable = count > 0 && *buf != '0';
 
@@ -3167,7 +3164,7 @@
 {
 	u32 hash = efx_filter_spec_hash(spec);
 
-	WARN_ON(!spin_is_locked(&efx->rps_hash_lock));
+	lockdep_assert_held(&efx->rps_hash_lock);
 	if (!efx->rps_hash_table)
 		return NULL;
 	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
@@ -3617,11 +3614,7 @@
 		netif_warn(efx, probe, efx->net_dev,
 			   "failed to create MTDs (%d)\n", rc);
 
-	rc = pci_enable_pcie_error_reporting(pci_dev);
-	if (rc && rc != -EINVAL)
-		netif_notice(efx, probe, efx->net_dev,
-			     "PCIE error reporting unavailable (%d).\n",
-			     rc);
+	(void)pci_enable_pcie_error_reporting(pci_dev);
 
 	if (efx->type->udp_tnl_push_ports)
 		efx->type->udp_tnl_push_ports(efx);
@@ -3661,7 +3654,7 @@
 
 static int efx_pm_freeze(struct device *dev)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 
 	rtnl_lock();
 
@@ -3682,7 +3675,7 @@
 static int efx_pm_thaw(struct device *dev)
 {
 	int rc;
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct efx_nic *efx = dev_get_drvdata(dev);
 
 	rtnl_lock();
 
@@ -3821,7 +3814,6 @@
 {
 	struct efx_nic *efx = pci_get_drvdata(pdev);
 	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-	int rc;
 
 	if (pci_enable_device(pdev)) {
 		netif_err(efx, hw, efx->net_dev,
@@ -3829,13 +3821,6 @@
 		status =  PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (rc) {
-		netif_err(efx, hw, efx->net_dev,
-		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
-		/* Non-fatal error. Continue. */
-	}
-
 	return status;
 }
 

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 3f759eb..04fed7c 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_EFX_H

diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index 6fa8242..3332cdf 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2007-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_ENUM_H

diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 3143588..86b9658 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/netdevice.h>
@@ -539,7 +536,7 @@
 	/* We need rx buffers and interrupts. */
 	already_up = (efx->net_dev->flags & IFF_UP);
 	if (!already_up) {
-		rc = dev_open(efx->net_dev);
+		rc = dev_open(efx->net_dev, NULL);
 		if (rc) {
 			netif_err(efx, drv, efx->net_dev,
 				  "failed opening device.\n");

diff --git a/drivers/net/ethernet/sfc/falcon/Kconfig b/drivers/net/ethernet/sfc/falcon/Kconfig
index 6248e96..20e3619 100644
--- a/drivers/net/ethernet/sfc/falcon/Kconfig
+++ b/drivers/net/ethernet/sfc/falcon/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config SFC_FALCON
 	tristate "Solarflare SFC4000 support"
 	depends on PCI

diff --git a/drivers/net/ethernet/sfc/falcon/bitfield.h b/drivers/net/ethernet/sfc/falcon/bitfield.h
index 230fd77..5eb178d 100644
--- a/drivers/net/ethernet/sfc/falcon/bitfield.h
+++ b/drivers/net/ethernet/sfc/falcon/bitfield.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_BITFIELD_H

diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 03e2455..eecc348 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/module.h>
@@ -2259,7 +2256,7 @@
 static ssize_t
 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", efx->phy_type);
 }
 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
@@ -3002,7 +2999,7 @@
 
 static int ef4_pm_freeze(struct device *dev)
 {
-	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = dev_get_drvdata(dev);
 
 	rtnl_lock();
 
@@ -3023,7 +3020,7 @@
 static int ef4_pm_thaw(struct device *dev)
 {
 	int rc;
-	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = dev_get_drvdata(dev);
 
 	rtnl_lock();
 
@@ -3160,7 +3157,6 @@
 {
 	struct ef4_nic *efx = pci_get_drvdata(pdev);
 	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-	int rc;
 
 	if (pci_enable_device(pdev)) {
 		netif_err(efx, hw, efx->net_dev,
@@ -3168,13 +3164,6 @@
 		status =  PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
-	if (rc) {
-		netif_err(efx, hw, efx->net_dev,
-		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
-		/* Non-fatal error. Continue. */
-	}
-
 	return status;
 }
 

diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index a4e4d8e..d3b4646 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_EFX_H

diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
index 4824fcf..7e6277f 100644
--- a/drivers/net/ethernet/sfc/falcon/enum.h
+++ b/drivers/net/ethernet/sfc/falcon/enum.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2007-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_ENUM_H

diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c
index 1ccdb7a..08bd6a3 100644
--- a/drivers/net/ethernet/sfc/falcon/ethtool.c
+++ b/drivers/net/ethernet/sfc/falcon/ethtool.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/netdevice.h>
@@ -517,7 +514,7 @@
 	/* We need rx buffers and interrupts. */
 	already_up = (efx->net_dev->flags & IFF_UP);
 	if (!already_up) {
-		rc = dev_open(efx->net_dev);
+		rc = dev_open(efx->net_dev, NULL);
 		if (rc) {
 			netif_err(efx, drv, efx->net_dev,
 				  "failed opening device.\n");

diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
index 6520d7b..3324a62 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/sfc/falcon/falcon_boards.c b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
index dec83a2..605f486 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon_boards.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2007-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/rtnetlink.h>
@@ -360,7 +357,7 @@
 static ssize_t show_phy_flash_cfg(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", !!(efx->phy_mode & PHY_MODE_SPECIAL));
 }
 
@@ -368,7 +365,7 @@
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
 {
-	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = dev_get_drvdata(dev);
 	enum ef4_phy_mode old_mode, new_mode;
 	int err;
 
@@ -457,13 +454,13 @@
 
 #if IS_ENABLED(CONFIG_SENSORS_LM90)
 	board->hwmon_client =
-		i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info);
+		i2c_new_client_device(&board->i2c_adap, &sfe4001_hwmon_info);
 #else
 	board->hwmon_client =
-		i2c_new_dummy(&board->i2c_adap, sfe4001_hwmon_info.addr);
+		i2c_new_dummy_device(&board->i2c_adap, sfe4001_hwmon_info.addr);
 #endif
-	if (!board->hwmon_client)
-		return -EIO;
+	if (IS_ERR(board->hwmon_client))
+		return PTR_ERR(board->hwmon_client);
 
 	/* Raise board/PHY high limit from 85 to 90 degrees Celsius */
 	rc = i2c_smbus_write_byte_data(board->hwmon_client,
@@ -471,9 +468,9 @@
 	if (rc)
 		goto fail_hwmon;
 
-	board->ioexp_client = i2c_new_dummy(&board->i2c_adap, PCA9539);
-	if (!board->ioexp_client) {
-		rc = -EIO;
+	board->ioexp_client = i2c_new_dummy_device(&board->i2c_adap, PCA9539);
+	if (IS_ERR(board->ioexp_client)) {
+		rc = PTR_ERR(board->ioexp_client);
 		goto fail_hwmon;
 	}
 

diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c
index 411a2f4..3321832 100644
--- a/drivers/net/ethernet/sfc/falcon/farch.c
+++ b/drivers/net/ethernet/sfc/falcon/farch.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/sfc/falcon/farch_regs.h b/drivers/net/ethernet/sfc/falcon/farch_regs.h
index 8095f27..5b01f3f 100644
--- a/drivers/net/ethernet/sfc/falcon/farch_regs.h
+++ b/drivers/net/ethernet/sfc/falcon/farch_regs.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_FARCH_REGS_H

diff --git a/drivers/net/ethernet/sfc/falcon/filter.h b/drivers/net/ethernet/sfc/falcon/filter.h
index 647f6b2..bc6f5f5 100644
--- a/drivers/net/ethernet/sfc/falcon/filter.h
+++ b/drivers/net/ethernet/sfc/falcon/filter.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_FILTER_H

diff --git a/drivers/net/ethernet/sfc/falcon/io.h b/drivers/net/ethernet/sfc/falcon/io.h
index 7085ee1..bc23c80 100644
--- a/drivers/net/ethernet/sfc/falcon/io.h
+++ b/drivers/net/ethernet/sfc/falcon/io.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_IO_H
@@ -108,7 +105,6 @@
 	_ef4_writed(efx, value->u32[2], reg + 8);
 	_ef4_writed(efx, value->u32[3], reg + 12);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
@@ -130,7 +126,6 @@
 	__raw_writel((__force u32)value->u32[0], membase + addr);
 	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 

diff --git a/drivers/net/ethernet/sfc/falcon/mdio_10g.c b/drivers/net/ethernet/sfc/falcon/mdio_10g.c
index ee0713f..5402781 100644
--- a/drivers/net/ethernet/sfc/falcon/mdio_10g.c
+++ b/drivers/net/ethernet/sfc/falcon/mdio_10g.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 /*
  * Useful functions for working with MDIO clause 45 PHYs

diff --git a/drivers/net/ethernet/sfc/falcon/mdio_10g.h b/drivers/net/ethernet/sfc/falcon/mdio_10g.h
index 53cb5cc..de676bf 100644
--- a/drivers/net/ethernet/sfc/falcon/mdio_10g.h
+++ b/drivers/net/ethernet/sfc/falcon/mdio_10g.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_MDIO_10G_H

diff --git a/drivers/net/ethernet/sfc/falcon/mtd.c b/drivers/net/ethernet/sfc/falcon/mtd.c
index 2d67e46..15bd47b 100644
--- a/drivers/net/ethernet/sfc/falcon/mtd.c
+++ b/drivers/net/ethernet/sfc/falcon/mtd.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/module.h>

diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h
index 37a8bdf..a49ea2e 100644
--- a/drivers/net/ethernet/sfc/falcon/net_driver.h
+++ b/drivers/net/ethernet/sfc/falcon/net_driver.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 /* Common definitions for all Efx net driver code */

diff --git a/drivers/net/ethernet/sfc/falcon/nic.c b/drivers/net/ethernet/sfc/falcon/nic.c
index a8ecb33..156da31 100644
--- a/drivers/net/ethernet/sfc/falcon/nic.c
+++ b/drivers/net/ethernet/sfc/falcon/nic.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>
@@ -33,8 +30,8 @@
 int ef4_nic_alloc_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer,
 			 unsigned int len, gfp_t gfp_flags)
 {
-	buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len,
-					   &buffer->dma_addr, gfp_flags);
+	buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len,
+					  &buffer->dma_addr, gfp_flags);
 	if (!buffer->addr)
 		return -ENOMEM;
 	buffer->len = len;

diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h
index 07c62dc..9f41347 100644
--- a/drivers/net/ethernet/sfc/falcon/nic.h
+++ b/drivers/net/ethernet/sfc/falcon/nic.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_NIC_H

diff --git a/drivers/net/ethernet/sfc/falcon/phy.h b/drivers/net/ethernet/sfc/falcon/phy.h
index 362141c..69bb548 100644
--- a/drivers/net/ethernet/sfc/falcon/phy.h
+++ b/drivers/net/ethernet/sfc/falcon/phy.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2007-2010 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_PHY_H

diff --git a/drivers/net/ethernet/sfc/falcon/qt202x_phy.c b/drivers/net/ethernet/sfc/falcon/qt202x_phy.c
index f5e0f18..21af67e 100644
--- a/drivers/net/ethernet/sfc/falcon/qt202x_phy.c
+++ b/drivers/net/ethernet/sfc/falcon/qt202x_phy.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 /*
  * Driver for AMCC QT202x SFP+ and XFP adapters; see www.amcc.com for details

diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
index 02456ed..05ea352 100644
--- a/drivers/net/ethernet/sfc/falcon/rx.c
+++ b/drivers/net/ethernet/sfc/falcon/rx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/socket.h>
@@ -427,7 +424,6 @@
 		  unsigned int n_frags, u8 *eh)
 {
 	struct napi_struct *napi = &channel->napi_str;
-	gro_result_t gro_result;
 	struct ef4_nic *efx = channel->efx;
 	struct sk_buff *skb;
 
@@ -463,9 +459,7 @@
 
 	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
-	gro_result = napi_gro_frags(napi);
-	if (gro_result != GRO_DROP)
-		channel->irq_mod_score += 2;
+	napi_gro_frags(napi);
 }
 
 /* Allocate and construct an SKB around page fragments */

diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
index 55c0fbb..147677c 100644
--- a/drivers/net/ethernet/sfc/falcon/selftest.c
+++ b/drivers/net/ethernet/sfc/falcon/selftest.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/sfc/falcon/selftest.h b/drivers/net/ethernet/sfc/falcon/selftest.h
index be52a49..c0dbc63 100644
--- a/drivers/net/ethernet/sfc/falcon/selftest.h
+++ b/drivers/net/ethernet/sfc/falcon/selftest.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_SELFTEST_H

diff --git a/drivers/net/ethernet/sfc/falcon/tenxpress.c b/drivers/net/ethernet/sfc/falcon/tenxpress.c
index ff9b4e2..e27824e 100644
--- a/drivers/net/ethernet/sfc/falcon/tenxpress.c
+++ b/drivers/net/ethernet/sfc/falcon/tenxpress.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2007-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/delay.h>

diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index 3409bbf..f7306e9 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/pci.h>
@@ -321,7 +318,7 @@
 	netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
 
 	/* Pass off to hardware */
-	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+	if (!netdev_xmit_more() || netif_xmit_stopped(tx_queue->core_txq)) {
 		struct ef4_tx_queue *txq2 = ef4_tx_queue_partner(tx_queue);
 
 		/* There could be packets left on the partner queue if those
@@ -333,7 +330,7 @@
 
 		ef4_nic_push_buffers(tx_queue);
 	} else {
-		tx_queue->xmit_more_available = skb->xmit_more;
+		tx_queue->xmit_more_available = netdev_xmit_more();
 	}
 
 	tx_queue->tx_packets++;

diff --git a/drivers/net/ethernet/sfc/falcon/tx.h b/drivers/net/ethernet/sfc/falcon/tx.h
index a607eb0..2a88c59 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.h
+++ b/drivers/net/ethernet/sfc/falcon/tx.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_TX_H

diff --git a/drivers/net/ethernet/sfc/falcon/txc43128_phy.c b/drivers/net/ethernet/sfc/falcon/txc43128_phy.c
index 3c55fd2..f350396 100644
--- a/drivers/net/ethernet/sfc/falcon/txc43128_phy.c
+++ b/drivers/net/ethernet/sfc/falcon/txc43128_phy.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 /*

diff --git a/drivers/net/ethernet/sfc/falcon/workarounds.h b/drivers/net/ethernet/sfc/falcon/workarounds.h
index 6af800b..e28c67f 100644
--- a/drivers/net/ethernet/sfc/falcon/workarounds.h
+++ b/drivers/net/ethernet/sfc/falcon/workarounds.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EF4_WORKAROUNDS_H

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index e045a5d..eedd32e 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/sfc/farch_regs.h b/drivers/net/ethernet/sfc/farch_regs.h
index 7019a71..d138be4 100644
--- a/drivers/net/ethernet/sfc/farch_regs.h
+++ b/drivers/net/ethernet/sfc/farch_regs.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_FARCH_REGS_H

diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
index 59021ad..40b2af8 100644
--- a/drivers/net/ethernet/sfc/filter.h
+++ b/drivers/net/ethernet/sfc/filter.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_FILTER_H

diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h
index 8956317..c3c011b 100644
--- a/drivers/net/ethernet/sfc/io.h
+++ b/drivers/net/ethernet/sfc/io.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_IO_H
@@ -120,7 +117,6 @@
 	_efx_writed(efx, value->u32[2], reg + 8);
 	_efx_writed(efx, value->u32[3], reg + 12);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
@@ -142,7 +138,6 @@
 	__raw_writel((__force u32)value->u32[0], membase + addr);
 	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 

diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index dfad93f..2713300 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2008-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/delay.h>
@@ -2074,22 +2071,26 @@
 
 static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
 {
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN);
 	int rc;
 
 	MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type);
+	MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_START_V2_IN_FLAGS,
+			      NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT,
+			      1);
 
 	BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0);
 
 	rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf),
 			  NULL, 0, NULL);
+
 	return rc;
 }
 
 static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
 			       loff_t offset, u8 *buffer, size_t length)
 {
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_V2_LEN);
 	MCDI_DECLARE_BUF(outbuf,
 			 MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX));
 	size_t outlen;
@@ -2098,6 +2099,8 @@
 	MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type);
 	MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset);
 	MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length);
+	MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_V2_MODE,
+		       MC_CMD_NVRAM_READ_IN_V2_DEFAULT);
 
 	rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf),
 			  outbuf, sizeof(outbuf), &outlen);
@@ -2147,15 +2150,51 @@
 
 static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
 {
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN);
-	int rc;
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN);
+	size_t outlen;
+	int rc, rc2;
 
 	MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type);
-
-	BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN != 0);
+	/* Always set this flag. Old firmware ignores it */
+	MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS,
+			      NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT,
+			      1);
 
 	rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf),
-			  NULL, 0, NULL);
+			  outbuf, sizeof(outbuf), &outlen);
+	if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) {
+		rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE);
+		if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS)
+			netif_err(efx, drv, efx->net_dev,
+				  "NVRAM update failed verification with code 0x%x\n",
+				  rc2);
+		switch (rc2) {
+		case MC_CMD_NVRAM_VERIFY_RC_SUCCESS:
+			break;
+		case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED:
+		case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED:
+		case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED:
+		case MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED:
+		case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED:
+			rc = -EIO;
+			break;
+		case MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT:
+		case MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST:
+			rc = -EINVAL;
+			break;
+		case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES:
+		case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS:
+		case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH:
+			rc = -EPERM;
+			break;
+		default:
+			netif_err(efx, drv, efx->net_dev,
+				  "Unknown response to NVRAM_UPDATE_FINISH\n");
+			rc = -EIO;
+		}
+	}
+
 	return rc;
 }
 

diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index ebd9597..9081f84 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2008-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_MCDI_H

diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c
index f177515..5954fcf 100644
--- a/drivers/net/ethernet/sfc/mcdi_mon.c
+++ b/drivers/net/ethernet/sfc/mcdi_mon.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2011-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index 3839eec..79d834a 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2009-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 
@@ -6784,6 +6781,14 @@
  * subset of the information stored in this partition.
  */
 #define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
+/* enum: Bundle image partition */
+#define          NVRAM_PARTITION_TYPE_BUNDLE 0x1e00
+/* enum: Bundle metadata partition that holds additional information related to
+ * a bundle update in TLV format
+ */
+#define          NVRAM_PARTITION_TYPE_BUNDLE_METADATA 0x1e01
+/* enum: Bundle update non-volatile log output partition */
+#define          NVRAM_PARTITION_TYPE_BUNDLE_LOG 0x1e02
 /* enum: Start of reserved value range (firmware may use for any purpose) */
 #define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */

diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 9382bb0..fb7cde4 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2009-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 /*
@@ -342,6 +339,7 @@
 		break;
 	default:
 		WARN_ON(1);
+		/* Fall through */
 	case MC_CMD_FCNTL_OFF:
 		link_state->fc = 0;
 		break;

diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c
index 4ac30b6..273c08e 100644
--- a/drivers/net/ethernet/sfc/mtd.c
+++ b/drivers/net/ethernet/sfc/mtd.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/module.h>
@@ -66,6 +63,9 @@
 
 		part->mtd.writesize = 1;
 
+		if (!(part->mtd.flags & MTD_NO_ERASE))
+			part->mtd.flags |= MTD_WRITEABLE;
+
 		part->mtd.owner = THIS_MODULE;
 		part->mtd.priv = efx;
 		part->mtd.name = part->name;

diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 961b929..284a1b0 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 /* Common definitions for all Efx net driver code */

diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index aa1945a..b0baa70 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>
@@ -34,8 +31,8 @@
 int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
 			 unsigned int len, gfp_t gfp_flags)
 {
-	buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len,
-					   &buffer->dma_addr, gfp_flags);
+	buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len,
+					  &buffer->dma_addr, gfp_flags);
 	if (!buffer->addr)
 		return -ENOMEM;
 	buffer->len = len;

diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 5cca055..1f7c571 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_NIC_H

diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index f216615..af15a73 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2011-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 /* Theory of operation:
@@ -1534,7 +1531,8 @@
 	(void)efx_ptp_disable(efx);
 
 	cancel_work_sync(&efx->ptp_data->work);
-	cancel_work_sync(&efx->ptp_data->pps_work);
+	if (efx->ptp_data->pps_workwq)
+		cancel_work_sync(&efx->ptp_data->pps_work);
 
 	skb_queue_purge(&efx->ptp_data->rxq);
 	skb_queue_purge(&efx->ptp_data->txq);

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 396ff01..85ec07f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/socket.h>
@@ -360,8 +357,7 @@
 		rc = efx_init_rx_buffers(rx_queue, atomic);
 		if (unlikely(rc)) {
 			/* Ensure that we don't leave the rx queue empty */
-			if (rx_queue->added_count == rx_queue->removed_count)
-				efx_schedule_slow_fill(rx_queue);
+			efx_schedule_slow_fill(rx_queue);
 			goto out;
 		}
 	} while ((space -= batch_size) >= batch_size);
@@ -416,7 +412,6 @@
 		  unsigned int n_frags, u8 *eh)
 {
 	struct napi_struct *napi = &channel->napi_str;
-	gro_result_t gro_result;
 	struct efx_nic *efx = channel->efx;
 	struct sk_buff *skb;
 
@@ -453,9 +448,7 @@
 
 	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
-	gro_result = napi_gro_frags(napi);
-	if (gro_result != GRO_DROP)
-		channel->irq_mod_score += 2;
+	napi_gro_frags(napi);
 }
 
 /* Allocate and construct an SKB around page fragments */

diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index f693694..8474cf8 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index 32a4272..a355381 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_SELFTEST_H

diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 65161f6..8149924 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/bitops.h>

diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index da7b94f..dfbdf05 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2010-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 #include <linux/pci.h>
 #include <linux/module.h>

diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h
index d88d4da..e441c89 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.h
+++ b/drivers/net/ethernet/sfc/siena_sriov.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef SIENA_SRIOV_H

diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c
index 0b766fd..3f241e6 100644
--- a/drivers/net/ethernet/sfc/sriov.c
+++ b/drivers/net/ethernet/sfc/sriov.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2014-2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 #include <linux/module.h>
 #include "net_driver.h"

diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h
index 84c7984..747707b 100644
--- a/drivers/net/ethernet/sfc/sriov.h
+++ b/drivers/net/ethernet/sfc/sriov.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2014-2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_SRIOV_H

diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index c3ad564..65e81ec 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/pci.h>
@@ -277,7 +274,7 @@
 
 		vaddr = kmap_atomic(skb_frag_page(f));
 
-		efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
+		efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f),
 					   skb_frag_size(f), copy_buf);
 		kunmap_atomic(vaddr);
 	}
@@ -471,15 +468,13 @@
 	if (IS_ERR(segments))
 		return PTR_ERR(segments);
 
-	dev_kfree_skb_any(skb);
+	dev_consume_skb_any(skb);
 	skb = segments;
 
 	while (skb) {
 		next = skb->next;
 		skb->next = NULL;
 
-		if (next)
-			skb->xmit_more = true;
 		efx_enqueue_skb(tx_queue, skb);
 		skb = next;
 	}
@@ -506,7 +501,7 @@
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 {
 	unsigned int old_insert_count = tx_queue->insert_count;
-	bool xmit_more = skb->xmit_more;
+	bool xmit_more = netdev_xmit_more();
 	bool data_mapped = false;
 	unsigned int segments;
 	unsigned int skb_len;
@@ -533,7 +528,7 @@
 		if (rc)
 			goto err;
 #ifdef EFX_USE_PIO
-	} else if (skb_len <= efx_piobuf_size && !skb->xmit_more &&
+	} else if (skb_len <= efx_piobuf_size && !xmit_more &&
 		   efx_nic_may_tx_pio(tx_queue)) {
 		/* Use PIO for short packets with an empty queue. */
 		if (efx_enqueue_skb_pio(tx_queue, skb))
@@ -553,17 +548,14 @@
 	if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments)))
 		goto err;
 
-	/* Update BQL */
-	netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
-
 	efx_tx_maybe_stop_queue(tx_queue);
 
 	/* Pass off to hardware */
-	if (!xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+	if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more)) {
 		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
 
-		/* There could be packets left on the partner queue if those
-		 * SKBs had skb->xmit_more set. If we do not push those they
+		/* There could be packets left on the partner queue if
+		 * xmit_more was set. If we do not push those they
 		 * could be left for a long time and cause a netdev watchdog.
 		 */
 		if (txq2->xmit_more_available)
@@ -571,7 +563,7 @@
 
 		efx_nic_push_buffers(tx_queue);
 	} else {
-		tx_queue->xmit_more_available = skb->xmit_more;
+		tx_queue->xmit_more_available = xmit_more;
 	}
 
 	if (segments) {

diff --git a/drivers/net/ethernet/sfc/tx.h b/drivers/net/ethernet/sfc/tx.h
index 1cccc97..e04d5dd 100644
--- a/drivers/net/ethernet/sfc/tx.h
+++ b/drivers/net/ethernet/sfc/tx.h

@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2006-2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_TX_H

diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c
index e0cbda9..898e5c6 100644
--- a/drivers/net/ethernet/sfc/tx_tso.c
+++ b/drivers/net/ethernet/sfc/tx_tso.c

@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2005-2006 Fen Systems Ltd.
  * Copyright 2005-2015 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #include <linux/pci.h>

diff --git a/drivers/net/ethernet/sfc/vfdi.h b/drivers/net/ethernet/sfc/vfdi.h
index f62901d..480b872 100644
--- a/drivers/net/ethernet/sfc/vfdi.h
+++ b/drivers/net/ethernet/sfc/vfdi.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2010-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 #ifndef _VFDI_H
 #define _VFDI_H

diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index c67fa18..815be2d 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h

@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
  * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
  */
 
 #ifndef EFX_WORKAROUNDS_H

diff --git a/drivers/net/ethernet/sgi/Kconfig b/drivers/net/ethernet/sgi/Kconfig
index fbbb21c..37f048e 100644
--- a/drivers/net/ethernet/sgi/Kconfig
+++ b/drivers/net/ethernet/sgi/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # SGI device configuration
 #

diff --git a/drivers/net/ethernet/sgi/Makefile b/drivers/net/ethernet/sgi/Makefile
index e5bedd2..68eefbc 100644
--- a/drivers/net/ethernet/sgi/Makefile
+++ b/drivers/net/ethernet/sgi/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the SGI device drivers.
 #

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 3140999..deb636d 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c

@@ -1,9 +1,5 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Driver for SGI's IOC3 based Ethernet cards as found in the PCI card.
+// SPDX-License-Identifier: GPL-2.0
+/* Driver for SGI's IOC3 based Ethernet cards as found in the PCI card.
  *
  * Copyright (C) 1999, 2000, 01, 03, 06 Ralf Baechle
  * Copyright (C) 1995, 1999, 2000, 2001 by Silicon Graphics, Inc.
@@ -15,11 +11,8 @@
  *
  * To do:
  *
- *  o Handle allocation failures in ioc3_alloc_skb() more gracefully.
- *  o Handle allocation failures in ioc3_init_rings().
  *  o Use prefetching for large packets.  What is a good lower limit for
  *    prefetching?
- *  o We're probably allocating a bit too much memory.
  *  o Use hardware checksums.
  *  o Convert to using a IOC3 meta driver.
  *  o Which PHYs might possibly be attached to the IOC3 in real live,
@@ -39,10 +32,10 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/in.h>
+#include <linux/io.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
-#include <linux/dma-mapping.h>
 #include <linux/gfp.h>
 
 #ifdef CONFIG_SERIAL_8250
@@ -55,32 +48,52 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/skbuff.h>
+#include <linux/dma-direct.h>
+
 #include <net/ip.h>
 
 #include <asm/byteorder.h>
-#include <asm/io.h>
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
 #include <asm/sn/types.h>
 #include <asm/sn/ioc3.h>
 #include <asm/pci/bridge.h>
 
-/*
- * 64 RX buffers.  This is tunable in the range of 16 <= x < 512.  The
- * value must be a power of two.
+/* Number of RX buffers.  This is tunable in the range of 16 <= x < 512.
+ * The value must be a power of two.
  */
-#define RX_BUFFS 64
+#define RX_BUFFS		64
+#define RX_RING_ENTRIES		512		/* fixed in hardware */
+#define RX_RING_MASK		(RX_RING_ENTRIES - 1)
+#define RX_RING_SIZE		(RX_RING_ENTRIES * sizeof(u64))
 
-#define ETCSR_FD	((17<<ETCSR_IPGR2_SHIFT) | (11<<ETCSR_IPGR1_SHIFT) | 21)
-#define ETCSR_HD	((21<<ETCSR_IPGR2_SHIFT) | (21<<ETCSR_IPGR1_SHIFT) | 21)
+/* 128 TX buffers (not tunable) */
+#define TX_RING_ENTRIES		128
+#define TX_RING_MASK		(TX_RING_ENTRIES - 1)
+#define TX_RING_SIZE		(TX_RING_ENTRIES * sizeof(struct ioc3_etxd))
+
+/* IOC3 does dma transfers in 128 byte blocks */
+#define IOC3_DMA_XFER_LEN	128UL
+
+/* Every RX buffer starts with 8 byte descriptor data */
+#define RX_OFFSET		(sizeof(struct ioc3_erxbuf) + NET_IP_ALIGN)
+#define RX_BUF_SIZE		(13 * IOC3_DMA_XFER_LEN)
+
+#define ETCSR_FD   ((21 << ETCSR_IPGR2_SHIFT) | (21 << ETCSR_IPGR1_SHIFT) | 21)
+#define ETCSR_HD   ((17 << ETCSR_IPGR2_SHIFT) | (11 << ETCSR_IPGR1_SHIFT) | 21)
 
 /* Private per NIC data of the driver.  */
 struct ioc3_private {
-	struct ioc3 *regs;
+	struct ioc3_ethregs *regs;
+	struct ioc3 *all_regs;
+	struct device *dma_dev;
+	u32 *ssram;
 	unsigned long *rxr;		/* pointer to receiver ring */
 	struct ioc3_etxd *txr;
-	struct sk_buff *rx_skbs[512];
-	struct sk_buff *tx_skbs[128];
+	dma_addr_t rxr_dma;
+	dma_addr_t txr_dma;
+	struct sk_buff *rx_skbs[RX_RING_ENTRIES];
+	struct sk_buff *tx_skbs[TX_RING_ENTRIES];
 	int rx_ci;			/* RX consumer index */
 	int rx_pi;			/* RX producer index */
 	int tx_ci;			/* TX consumer index */
@@ -102,190 +115,138 @@
 static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void ioc3_timeout(struct net_device *dev);
 static inline unsigned int ioc3_hash(const unsigned char *addr);
+static void ioc3_start(struct ioc3_private *ip);
 static inline void ioc3_stop(struct ioc3_private *ip);
 static void ioc3_init(struct net_device *dev);
+static int ioc3_alloc_rx_bufs(struct net_device *dev);
+static void ioc3_free_rx_bufs(struct ioc3_private *ip);
+static inline void ioc3_clean_tx_ring(struct ioc3_private *ip);
 
 static const char ioc3_str[] = "IOC3 Ethernet";
 static const struct ethtool_ops ioc3_ethtool_ops;
 
-/* We use this to acquire receive skb's that we can DMA directly into. */
-
-#define IOC3_CACHELINE	128UL
 
 static inline unsigned long aligned_rx_skb_addr(unsigned long addr)
 {
-	return (~addr + 1) & (IOC3_CACHELINE - 1UL);
+	return (~addr + 1) & (IOC3_DMA_XFER_LEN - 1UL);
 }
 
-static inline struct sk_buff * ioc3_alloc_skb(unsigned long length,
-	unsigned int gfp_mask)
+static inline int ioc3_alloc_skb(struct ioc3_private *ip, struct sk_buff **skb,
+				 struct ioc3_erxbuf **rxb, dma_addr_t *rxb_dma)
 {
-	struct sk_buff *skb;
+	struct sk_buff *new_skb;
+	dma_addr_t d;
+	int offset;
 
-	skb = alloc_skb(length + IOC3_CACHELINE - 1, gfp_mask);
-	if (likely(skb)) {
-		int offset = aligned_rx_skb_addr((unsigned long) skb->data);
-		if (offset)
-			skb_reserve(skb, offset);
+	new_skb = alloc_skb(RX_BUF_SIZE + IOC3_DMA_XFER_LEN - 1, GFP_ATOMIC);
+	if (!new_skb)
+		return -ENOMEM;
+
+	/* ensure buffer is aligned to IOC3_DMA_XFER_LEN */
+	offset = aligned_rx_skb_addr((unsigned long)new_skb->data);
+	if (offset)
+		skb_reserve(new_skb, offset);
+
+	d = dma_map_single(ip->dma_dev, new_skb->data,
+			   RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(ip->dma_dev, d)) {
+		dev_kfree_skb_any(new_skb);
+		return -ENOMEM;
 	}
+	*rxb_dma = d;
+	*rxb = (struct ioc3_erxbuf *)new_skb->data;
+	skb_reserve(new_skb, RX_OFFSET);
+	*skb = new_skb;
 
-	return skb;
+	return 0;
 }
 
-static inline unsigned long ioc3_map(void *ptr, unsigned long vdev)
+#ifdef CONFIG_PCI_XTALK_BRIDGE
+static inline unsigned long ioc3_map(dma_addr_t addr, unsigned long attr)
 {
-#ifdef CONFIG_SGI_IP27
-	vdev <<= 57;   /* Shift to PCI64_ATTR_VIRTUAL */
-
-	return vdev | (0xaUL << PCI64_ATTR_TARG_SHFT) | PCI64_ATTR_PREF |
-	       ((unsigned long)ptr & TO_PHYS_MASK);
-#else
-	return virt_to_bus(ptr);
-#endif
+	return (addr & ~PCI64_ATTR_BAR) | attr;
 }
 
-/* BEWARE: The IOC3 documentation documents the size of rx buffers as
-   1644 while it's actually 1664.  This one was nasty to track down ...  */
-#define RX_OFFSET		10
-#define RX_BUF_ALLOC_SIZE	(1664 + RX_OFFSET + IOC3_CACHELINE)
+#define ERBAR_VAL	(ERBAR_BARRIER_BIT << ERBAR_RXBARR_SHIFT)
+#else
+static inline unsigned long ioc3_map(dma_addr_t addr, unsigned long attr)
+{
+	return addr;
+}
 
-/* DMA barrier to separate cached and uncached accesses.  */
-#define BARRIER()							\
-	__asm__("sync" ::: "memory")
-
+#define ERBAR_VAL	0
+#endif
 
 #define IOC3_SIZE 0x100000
 
-/*
- * IOC3 is a big endian device
- *
- * Unorthodox but makes the users of these macros more readable - the pointer
- * to the IOC3's memory mapped registers is expected as struct ioc3 * ioc3
- * in the environment.
- */
-#define ioc3_r_mcr()		be32_to_cpu(ioc3->mcr)
-#define ioc3_w_mcr(v)		do { ioc3->mcr = cpu_to_be32(v); } while (0)
-#define ioc3_w_gpcr_s(v)	do { ioc3->gpcr_s = cpu_to_be32(v); } while (0)
-#define ioc3_r_emcr()		be32_to_cpu(ioc3->emcr)
-#define ioc3_w_emcr(v)		do { ioc3->emcr = cpu_to_be32(v); } while (0)
-#define ioc3_r_eisr()		be32_to_cpu(ioc3->eisr)
-#define ioc3_w_eisr(v)		do { ioc3->eisr = cpu_to_be32(v); } while (0)
-#define ioc3_r_eier()		be32_to_cpu(ioc3->eier)
-#define ioc3_w_eier(v)		do { ioc3->eier = cpu_to_be32(v); } while (0)
-#define ioc3_r_ercsr()		be32_to_cpu(ioc3->ercsr)
-#define ioc3_w_ercsr(v)		do { ioc3->ercsr = cpu_to_be32(v); } while (0)
-#define ioc3_r_erbr_h()		be32_to_cpu(ioc3->erbr_h)
-#define ioc3_w_erbr_h(v)	do { ioc3->erbr_h = cpu_to_be32(v); } while (0)
-#define ioc3_r_erbr_l()		be32_to_cpu(ioc3->erbr_l)
-#define ioc3_w_erbr_l(v)	do { ioc3->erbr_l = cpu_to_be32(v); } while (0)
-#define ioc3_r_erbar()		be32_to_cpu(ioc3->erbar)
-#define ioc3_w_erbar(v)		do { ioc3->erbar = cpu_to_be32(v); } while (0)
-#define ioc3_r_ercir()		be32_to_cpu(ioc3->ercir)
-#define ioc3_w_ercir(v)		do { ioc3->ercir = cpu_to_be32(v); } while (0)
-#define ioc3_r_erpir()		be32_to_cpu(ioc3->erpir)
-#define ioc3_w_erpir(v)		do { ioc3->erpir = cpu_to_be32(v); } while (0)
-#define ioc3_r_ertr()		be32_to_cpu(ioc3->ertr)
-#define ioc3_w_ertr(v)		do { ioc3->ertr = cpu_to_be32(v); } while (0)
-#define ioc3_r_etcsr()		be32_to_cpu(ioc3->etcsr)
-#define ioc3_w_etcsr(v)		do { ioc3->etcsr = cpu_to_be32(v); } while (0)
-#define ioc3_r_ersr()		be32_to_cpu(ioc3->ersr)
-#define ioc3_w_ersr(v)		do { ioc3->ersr = cpu_to_be32(v); } while (0)
-#define ioc3_r_etcdc()		be32_to_cpu(ioc3->etcdc)
-#define ioc3_w_etcdc(v)		do { ioc3->etcdc = cpu_to_be32(v); } while (0)
-#define ioc3_r_ebir()		be32_to_cpu(ioc3->ebir)
-#define ioc3_w_ebir(v)		do { ioc3->ebir = cpu_to_be32(v); } while (0)
-#define ioc3_r_etbr_h()		be32_to_cpu(ioc3->etbr_h)
-#define ioc3_w_etbr_h(v)	do { ioc3->etbr_h = cpu_to_be32(v); } while (0)
-#define ioc3_r_etbr_l()		be32_to_cpu(ioc3->etbr_l)
-#define ioc3_w_etbr_l(v)	do { ioc3->etbr_l = cpu_to_be32(v); } while (0)
-#define ioc3_r_etcir()		be32_to_cpu(ioc3->etcir)
-#define ioc3_w_etcir(v)		do { ioc3->etcir = cpu_to_be32(v); } while (0)
-#define ioc3_r_etpir()		be32_to_cpu(ioc3->etpir)
-#define ioc3_w_etpir(v)		do { ioc3->etpir = cpu_to_be32(v); } while (0)
-#define ioc3_r_emar_h()		be32_to_cpu(ioc3->emar_h)
-#define ioc3_w_emar_h(v)	do { ioc3->emar_h = cpu_to_be32(v); } while (0)
-#define ioc3_r_emar_l()		be32_to_cpu(ioc3->emar_l)
-#define ioc3_w_emar_l(v)	do { ioc3->emar_l = cpu_to_be32(v); } while (0)
-#define ioc3_r_ehar_h()		be32_to_cpu(ioc3->ehar_h)
-#define ioc3_w_ehar_h(v)	do { ioc3->ehar_h = cpu_to_be32(v); } while (0)
-#define ioc3_r_ehar_l()		be32_to_cpu(ioc3->ehar_l)
-#define ioc3_w_ehar_l(v)	do { ioc3->ehar_l = cpu_to_be32(v); } while (0)
-#define ioc3_r_micr()		be32_to_cpu(ioc3->micr)
-#define ioc3_w_micr(v)		do { ioc3->micr = cpu_to_be32(v); } while (0)
-#define ioc3_r_midr_r()		be32_to_cpu(ioc3->midr_r)
-#define ioc3_w_midr_r(v)	do { ioc3->midr_r = cpu_to_be32(v); } while (0)
-#define ioc3_r_midr_w()		be32_to_cpu(ioc3->midr_w)
-#define ioc3_w_midr_w(v)	do { ioc3->midr_w = cpu_to_be32(v); } while (0)
-
 static inline u32 mcr_pack(u32 pulse, u32 sample)
 {
 	return (pulse << 10) | (sample << 2);
 }
 
-static int nic_wait(struct ioc3 *ioc3)
+static int nic_wait(u32 __iomem *mcr)
 {
-	u32 mcr;
+	u32 m;
 
-        do {
-                mcr = ioc3_r_mcr();
-        } while (!(mcr & 2));
+	do {
+		m = readl(mcr);
+	} while (!(m & 2));
 
-        return mcr & 1;
+	return m & 1;
 }
 
-static int nic_reset(struct ioc3 *ioc3)
+static int nic_reset(u32 __iomem *mcr)
 {
-        int presence;
+	int presence;
 
-	ioc3_w_mcr(mcr_pack(500, 65));
-	presence = nic_wait(ioc3);
+	writel(mcr_pack(500, 65), mcr);
+	presence = nic_wait(mcr);
 
-	ioc3_w_mcr(mcr_pack(0, 500));
-	nic_wait(ioc3);
+	writel(mcr_pack(0, 500), mcr);
+	nic_wait(mcr);
 
-        return presence;
+	return presence;
 }
 
-static inline int nic_read_bit(struct ioc3 *ioc3)
+static inline int nic_read_bit(u32 __iomem *mcr)
 {
 	int result;
 
-	ioc3_w_mcr(mcr_pack(6, 13));
-	result = nic_wait(ioc3);
-	ioc3_w_mcr(mcr_pack(0, 100));
-	nic_wait(ioc3);
+	writel(mcr_pack(6, 13), mcr);
+	result = nic_wait(mcr);
+	writel(mcr_pack(0, 100), mcr);
+	nic_wait(mcr);
 
 	return result;
 }
 
-static inline void nic_write_bit(struct ioc3 *ioc3, int bit)
+static inline void nic_write_bit(u32 __iomem *mcr, int bit)
 {
 	if (bit)
-		ioc3_w_mcr(mcr_pack(6, 110));
+		writel(mcr_pack(6, 110), mcr);
 	else
-		ioc3_w_mcr(mcr_pack(80, 30));
+		writel(mcr_pack(80, 30), mcr);
 
-	nic_wait(ioc3);
+	nic_wait(mcr);
 }
 
-/*
- * Read a byte from an iButton device
+/* Read a byte from an iButton device
  */
-static u32 nic_read_byte(struct ioc3 *ioc3)
+static u32 nic_read_byte(u32 __iomem *mcr)
 {
 	u32 result = 0;
 	int i;
 
 	for (i = 0; i < 8; i++)
-		result = (result >> 1) | (nic_read_bit(ioc3) << 7);
+		result = (result >> 1) | (nic_read_bit(mcr) << 7);
 
 	return result;
 }
 
-/*
- * Write a byte to an iButton device
+/* Write a byte to an iButton device
  */
-static void nic_write_byte(struct ioc3 *ioc3, int byte)
+static void nic_write_byte(u32 __iomem *mcr, int byte)
 {
 	int i, bit;
 
@@ -293,26 +254,26 @@
 		bit = byte & 1;
 		byte >>= 1;
 
-		nic_write_bit(ioc3, bit);
+		nic_write_bit(mcr, bit);
 	}
 }
 
-static u64 nic_find(struct ioc3 *ioc3, int *last)
+static u64 nic_find(u32 __iomem *mcr, int *last)
 {
 	int a, b, index, disc;
 	u64 address = 0;
 
-	nic_reset(ioc3);
+	nic_reset(mcr);
 	/* Search ROM.  */
-	nic_write_byte(ioc3, 0xf0);
+	nic_write_byte(mcr, 0xf0);
 
 	/* Algorithm from ``Book of iButton Standards''.  */
 	for (index = 0, disc = 0; index < 64; index++) {
-		a = nic_read_bit(ioc3);
-		b = nic_read_bit(ioc3);
+		a = nic_read_bit(mcr);
+		b = nic_read_bit(mcr);
 
 		if (a && b) {
-			printk("NIC search failed (not fatal).\n");
+			pr_warn("NIC search failed (not fatal).\n");
 			*last = 0;
 			return 0;
 		}
@@ -323,16 +284,17 @@
 			} else if (index > *last) {
 				address &= ~(1UL << index);
 				disc = index;
-			} else if ((address & (1UL << index)) == 0)
+			} else if ((address & (1UL << index)) == 0) {
 				disc = index;
-			nic_write_bit(ioc3, address & (1UL << index));
+			}
+			nic_write_bit(mcr, address & (1UL << index));
 			continue;
 		} else {
 			if (a)
 				address |= 1UL << index;
 			else
 				address &= ~(1UL << index);
-			nic_write_bit(ioc3, a);
+			nic_write_bit(mcr, a);
 			continue;
 		}
 	}
@@ -342,7 +304,7 @@
 	return address;
 }
 
-static int nic_init(struct ioc3 *ioc3)
+static int nic_init(u32 __iomem *mcr)
 {
 	const char *unknown = "unknown";
 	const char *type = unknown;
@@ -352,7 +314,8 @@
 
 	while (1) {
 		u64 reg;
-		reg = nic_find(ioc3, &save);
+
+		reg = nic_find(mcr, &save);
 
 		switch (reg & 0xff) {
 		case 0x91:
@@ -366,12 +329,12 @@
 			continue;
 		}
 
-		nic_reset(ioc3);
+		nic_reset(mcr);
 
 		/* Match ROM.  */
-		nic_write_byte(ioc3, 0x55);
+		nic_write_byte(mcr, 0x55);
 		for (i = 0; i < 8; i++)
-			nic_write_byte(ioc3, (reg >> (i << 3)) & 0xff);
+			nic_write_byte(mcr, (reg >> (i << 3)) & 0xff);
 
 		reg >>= 8; /* Shift out type.  */
 		for (i = 0; i < 6; i++) {
@@ -382,52 +345,50 @@
 		break;
 	}
 
-	printk("Found %s NIC", type);
+	pr_info("Found %s NIC", type);
 	if (type != unknown)
-		printk (" registration number %pM, CRC %02x", serial, crc);
-	printk(".\n");
+		pr_cont(" registration number %pM, CRC %02x", serial, crc);
+	pr_cont(".\n");
 
 	return 0;
 }
 
-/*
- * Read the NIC (Number-In-a-Can) device used to store the MAC address on
+/* Read the NIC (Number-In-a-Can) device used to store the MAC address on
  * SN0 / SN00 nodeboards and PCI cards.
  */
 static void ioc3_get_eaddr_nic(struct ioc3_private *ip)
 {
-	struct ioc3 *ioc3 = ip->regs;
-	u8 nic[14];
+	u32 __iomem *mcr = &ip->all_regs->mcr;
 	int tries = 2; /* There may be some problem with the battery?  */
+	u8 nic[14];
 	int i;
 
-	ioc3_w_gpcr_s(1 << 21);
+	writel(1 << 21, &ip->all_regs->gpcr_s);
 
 	while (tries--) {
-		if (!nic_init(ioc3))
+		if (!nic_init(mcr))
 			break;
 		udelay(500);
 	}
 
 	if (tries < 0) {
-		printk("Failed to read MAC address\n");
+		pr_err("Failed to read MAC address\n");
 		return;
 	}
 
 	/* Read Memory.  */
-	nic_write_byte(ioc3, 0xf0);
-	nic_write_byte(ioc3, 0x00);
-	nic_write_byte(ioc3, 0x00);
+	nic_write_byte(mcr, 0xf0);
+	nic_write_byte(mcr, 0x00);
+	nic_write_byte(mcr, 0x00);
 
 	for (i = 13; i >= 0; i--)
-		nic[i] = nic_read_byte(ioc3);
+		nic[i] = nic_read_byte(mcr);
 
 	for (i = 2; i < 8; i++)
 		ip->dev->dev_addr[i - 2] = nic[i];
 }
 
-/*
- * Ok, this is hosed by design.  It's necessary to know what machine the
+/* Ok, this is hosed by design.  It's necessary to know what machine the
  * NIC is in in order to know how to read the NIC address.  We also have
  * to know if it's a PCI card or a NIC in on the node board ...
  */
@@ -435,17 +396,21 @@
 {
 	ioc3_get_eaddr_nic(ip);
 
-	printk("Ethernet address is %pM.\n", ip->dev->dev_addr);
+	pr_info("Ethernet address is %pM.\n", ip->dev->dev_addr);
 }
 
 static void __ioc3_set_mac_address(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
 
-	ioc3_w_emar_h((dev->dev_addr[5] <<  8) | dev->dev_addr[4]);
-	ioc3_w_emar_l((dev->dev_addr[3] << 24) | (dev->dev_addr[2] << 16) |
-	              (dev->dev_addr[1] <<  8) | dev->dev_addr[0]);
+	writel((dev->dev_addr[5] <<  8) |
+	       dev->dev_addr[4],
+	       &ip->regs->emar_h);
+	writel((dev->dev_addr[3] << 24) |
+	       (dev->dev_addr[2] << 16) |
+	       (dev->dev_addr[1] <<  8) |
+	       dev->dev_addr[0],
+	       &ip->regs->emar_l);
 }
 
 static int ioc3_set_mac_address(struct net_device *dev, void *addr)
@@ -462,31 +427,35 @@
 	return 0;
 }
 
-/*
- * Caller must hold the ioc3_lock ever for MII readers.  This is also
+/* Caller must hold the ioc3_lock ever for MII readers.  This is also
  * used to protect the transmitter side but it's low contention.
  */
 static int ioc3_mdio_read(struct net_device *dev, int phy, int reg)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
 
-	while (ioc3_r_micr() & MICR_BUSY);
-	ioc3_w_micr((phy << MICR_PHYADDR_SHIFT) | reg | MICR_READTRIG);
-	while (ioc3_r_micr() & MICR_BUSY);
+	while (readl(&regs->micr) & MICR_BUSY)
+		;
+	writel((phy << MICR_PHYADDR_SHIFT) | reg | MICR_READTRIG,
+	       &regs->micr);
+	while (readl(&regs->micr) & MICR_BUSY)
+		;
 
-	return ioc3_r_midr_r() & MIDR_DATA_MASK;
+	return readl(&regs->midr_r) & MIDR_DATA_MASK;
 }
 
 static void ioc3_mdio_write(struct net_device *dev, int phy, int reg, int data)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
 
-	while (ioc3_r_micr() & MICR_BUSY);
-	ioc3_w_midr_w(data);
-	ioc3_w_micr((phy << MICR_PHYADDR_SHIFT) | reg);
-	while (ioc3_r_micr() & MICR_BUSY);
+	while (readl(&regs->micr) & MICR_BUSY)
+		;
+	writel(data, &regs->midr_w);
+	writel((phy << MICR_PHYADDR_SHIFT) | reg, &regs->micr);
+	while (readl(&regs->micr) & MICR_BUSY)
+		;
 }
 
 static int ioc3_mii_init(struct ioc3_private *ip);
@@ -494,23 +463,22 @@
 static struct net_device_stats *ioc3_get_stats(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
 
-	dev->stats.collisions += (ioc3_r_etcdc() & ETCDC_COLLCNT_MASK);
+	dev->stats.collisions += readl(&regs->etcdc) & ETCDC_COLLCNT_MASK;
 	return &dev->stats;
 }
 
-static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
+static void ioc3_tcpudp_checksum(struct sk_buff *skb, u32 hwsum, int len)
 {
 	struct ethhdr *eh = eth_hdr(skb);
-	uint32_t csum, ehsum;
 	unsigned int proto;
-	struct iphdr *ih;
-	uint16_t *ew;
 	unsigned char *cp;
+	struct iphdr *ih;
+	u32 csum, ehsum;
+	u16 *ew;
 
-	/*
-	 * Did hardware handle the checksum at all?  The cases we can handle
+	/* Did hardware handle the checksum at all?  The cases we can handle
 	 * are:
 	 *
 	 * - TCP and UDP checksums of IPv4 only.
@@ -526,7 +494,7 @@
 	if (eh->h_proto != htons(ETH_P_IP))
 		return;
 
-	ih = (struct iphdr *) ((char *)eh + ETH_HLEN);
+	ih = (struct iphdr *)((char *)eh + ETH_HLEN);
 	if (ip_is_fragment(ih))
 		return;
 
@@ -537,12 +505,12 @@
 	/* Same as tx - compute csum of pseudo header  */
 	csum = hwsum +
 	       (ih->tot_len - (ih->ihl << 2)) +
-	       htons((uint16_t)ih->protocol) +
+	       htons((u16)ih->protocol) +
 	       (ih->saddr >> 16) + (ih->saddr & 0xffff) +
 	       (ih->daddr >> 16) + (ih->daddr & 0xffff);
 
 	/* Sum up ethernet dest addr, src addr and protocol  */
-	ew = (uint16_t *) eh;
+	ew = (u16 *)eh;
 	ehsum = ew[0] + ew[1] + ew[2] + ew[3] + ew[4] + ew[5] + ew[6];
 
 	ehsum = (ehsum & 0xffff) + (ehsum >> 16);
@@ -551,14 +519,15 @@
 	csum += 0xffff ^ ehsum;
 
 	/* In the next step we also subtract the 1's complement
-	   checksum of the trailing ethernet CRC.  */
+	 * checksum of the trailing ethernet CRC.
+	 */
 	cp = (char *)eh + len;	/* points at trailing CRC */
 	if (len & 1) {
-		csum += 0xffff ^ (uint16_t) ((cp[1] << 8) | cp[0]);
-		csum += 0xffff ^ (uint16_t) ((cp[3] << 8) | cp[2]);
+		csum += 0xffff ^ (u16)((cp[1] << 8) | cp[0]);
+		csum += 0xffff ^ (u16)((cp[3] << 8) | cp[2]);
 	} else {
-		csum += 0xffff ^ (uint16_t) ((cp[0] << 8) | cp[1]);
-		csum += 0xffff ^ (uint16_t) ((cp[2] << 8) | cp[3]);
+		csum += 0xffff ^ (u16)((cp[0] << 8) | cp[1]);
+		csum += 0xffff ^ (u16)((cp[2] << 8) | cp[3]);
 	}
 
 	csum = (csum & 0xffff) + (csum >> 16);
@@ -572,10 +541,10 @@
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 	struct sk_buff *skb, *new_skb;
-	struct ioc3 *ioc3 = ip->regs;
 	int rx_entry, n_entry, len;
 	struct ioc3_erxbuf *rxb;
 	unsigned long *rxr;
+	dma_addr_t d;
 	u32 w0, err;
 
 	rxr = ip->rxr;		/* Ring base */
@@ -583,64 +552,67 @@
 	n_entry = ip->rx_pi;
 
 	skb = ip->rx_skbs[rx_entry];
-	rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
+	rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET);
 	w0 = be32_to_cpu(rxb->w0);
 
 	while (w0 & ERXBUF_V) {
 		err = be32_to_cpu(rxb->err);		/* It's valid ...  */
 		if (err & ERXBUF_GOODPKT) {
 			len = ((w0 >> ERXBUF_BYTECNT_SHIFT) & 0x7ff) - 4;
-			skb_trim(skb, len);
+			skb_put(skb, len);
 			skb->protocol = eth_type_trans(skb, dev);
 
-			new_skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
-			if (!new_skb) {
+			if (ioc3_alloc_skb(ip, &new_skb, &rxb, &d)) {
 				/* Ouch, drop packet and just recycle packet
-				   to keep the ring filled.  */
+				 * to keep the ring filled.
+				 */
 				dev->stats.rx_dropped++;
 				new_skb = skb;
+				d = rxr[rx_entry];
 				goto next;
 			}
 
 			if (likely(dev->features & NETIF_F_RXCSUM))
 				ioc3_tcpudp_checksum(skb,
-					w0 & ERXBUF_IPCKSUM_MASK, len);
+						     w0 & ERXBUF_IPCKSUM_MASK,
+						     len);
+
+			dma_unmap_single(ip->dma_dev, rxr[rx_entry],
+					 RX_BUF_SIZE, DMA_FROM_DEVICE);
 
 			netif_rx(skb);
 
 			ip->rx_skbs[rx_entry] = NULL;	/* Poison  */
 
-			/* Because we reserve afterwards. */
-			skb_put(new_skb, (1664 + RX_OFFSET));
-			rxb = (struct ioc3_erxbuf *) new_skb->data;
-			skb_reserve(new_skb, RX_OFFSET);
-
 			dev->stats.rx_packets++;		/* Statistics */
 			dev->stats.rx_bytes += len;
 		} else {
 			/* The frame is invalid and the skb never
-			   reached the network layer so we can just
-			   recycle it.  */
+			 * reached the network layer so we can just
+			 * recycle it.
+			 */
 			new_skb = skb;
+			d = rxr[rx_entry];
 			dev->stats.rx_errors++;
 		}
 		if (err & ERXBUF_CRCERR)	/* Statistics */
 			dev->stats.rx_crc_errors++;
 		if (err & ERXBUF_FRAMERR)
 			dev->stats.rx_frame_errors++;
+
 next:
 		ip->rx_skbs[n_entry] = new_skb;
-		rxr[n_entry] = cpu_to_be64(ioc3_map(rxb, 1));
+		rxr[n_entry] = cpu_to_be64(ioc3_map(d, PCI64_ATTR_BAR));
 		rxb->w0 = 0;				/* Clear valid flag */
-		n_entry = (n_entry + 1) & 511;		/* Update erpir */
+		n_entry = (n_entry + 1) & RX_RING_MASK;	/* Update erpir */
 
 		/* Now go on to the next ring entry.  */
-		rx_entry = (rx_entry + 1) & 511;
+		rx_entry = (rx_entry + 1) & RX_RING_MASK;
 		skb = ip->rx_skbs[rx_entry];
-		rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
+		rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET);
 		w0 = be32_to_cpu(rxb->w0);
 	}
-	ioc3_w_erpir((n_entry << 3) | ERPIR_ARM);
+	writel((n_entry << 3) | ERPIR_ARM, &ip->regs->erpir);
 	ip->rx_pi = n_entry;
 	ip->rx_ci = rx_entry;
 }
@@ -648,16 +620,16 @@
 static inline void ioc3_tx(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
+	struct ioc3_ethregs *regs = ip->regs;
 	unsigned long packets, bytes;
-	struct ioc3 *ioc3 = ip->regs;
 	int tx_entry, o_entry;
 	struct sk_buff *skb;
 	u32 etcir;
 
 	spin_lock(&ip->ioc3_lock);
-	etcir = ioc3_r_etcir();
+	etcir = readl(&regs->etcir);
 
-	tx_entry = (etcir >> 7) & 127;
+	tx_entry = (etcir >> 7) & TX_RING_MASK;
 	o_entry = ip->tx_ci;
 	packets = 0;
 	bytes = 0;
@@ -666,28 +638,27 @@
 		packets++;
 		skb = ip->tx_skbs[o_entry];
 		bytes += skb->len;
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		ip->tx_skbs[o_entry] = NULL;
 
-		o_entry = (o_entry + 1) & 127;		/* Next */
+		o_entry = (o_entry + 1) & TX_RING_MASK;	/* Next */
 
-		etcir = ioc3_r_etcir();			/* More pkts sent?  */
-		tx_entry = (etcir >> 7) & 127;
+		etcir = readl(&regs->etcir);		/* More pkts sent?  */
+		tx_entry = (etcir >> 7) & TX_RING_MASK;
 	}
 
 	dev->stats.tx_packets += packets;
 	dev->stats.tx_bytes += bytes;
 	ip->txqlen -= packets;
 
-	if (ip->txqlen < 128)
+	if (netif_queue_stopped(dev) && ip->txqlen < TX_RING_ENTRIES)
 		netif_wake_queue(dev);
 
 	ip->tx_ci = o_entry;
 	spin_unlock(&ip->ioc3_lock);
 }
 
-/*
- * Deal with fatal IOC3 errors.  This condition might be caused by a hard or
+/* Deal with fatal IOC3 errors.  This condition might be caused by a hard or
  * software problems, so we should try to recover
  * more gracefully if this ever happens.  In theory we might be flooded
  * with such error interrupts if something really goes wrong, so we might
@@ -696,25 +667,33 @@
 static void ioc3_error(struct net_device *dev, u32 eisr)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	unsigned char *iface = dev->name;
 
 	spin_lock(&ip->ioc3_lock);
 
 	if (eisr & EISR_RXOFLO)
-		printk(KERN_ERR "%s: RX overflow.\n", iface);
+		net_err_ratelimited("%s: RX overflow.\n", dev->name);
 	if (eisr & EISR_RXBUFOFLO)
-		printk(KERN_ERR "%s: RX buffer overflow.\n", iface);
+		net_err_ratelimited("%s: RX buffer overflow.\n", dev->name);
 	if (eisr & EISR_RXMEMERR)
-		printk(KERN_ERR "%s: RX PCI error.\n", iface);
+		net_err_ratelimited("%s: RX PCI error.\n", dev->name);
 	if (eisr & EISR_RXPARERR)
-		printk(KERN_ERR "%s: RX SSRAM parity error.\n", iface);
+		net_err_ratelimited("%s: RX SSRAM parity error.\n", dev->name);
 	if (eisr & EISR_TXBUFUFLO)
-		printk(KERN_ERR "%s: TX buffer underflow.\n", iface);
+		net_err_ratelimited("%s: TX buffer underflow.\n", dev->name);
 	if (eisr & EISR_TXMEMERR)
-		printk(KERN_ERR "%s: TX PCI error.\n", iface);
+		net_err_ratelimited("%s: TX PCI error.\n", dev->name);
 
 	ioc3_stop(ip);
+	ioc3_free_rx_bufs(ip);
+	ioc3_clean_tx_ring(ip);
+
 	ioc3_init(dev);
+	if (ioc3_alloc_rx_bufs(dev)) {
+		netdev_err(dev, "%s: rx buffer allocation failed\n", __func__);
+		spin_unlock(&ip->ioc3_lock);
+		return;
+	}
+	ioc3_start(ip);
 	ioc3_mii_init(ip);
 
 	netif_wake_queue(dev);
@@ -723,45 +702,45 @@
 }
 
 /* The interrupt handler does all of the Rx thread work and cleans up
-   after the Tx thread.  */
-static irqreturn_t ioc3_interrupt(int irq, void *_dev)
+ * after the Tx thread.
+ */
+static irqreturn_t ioc3_interrupt(int irq, void *dev_id)
 {
-	struct net_device *dev = (struct net_device *)_dev;
-	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
-	const u32 enabled = EISR_RXTIMERINT | EISR_RXOFLO | EISR_RXBUFOFLO |
-	                    EISR_RXMEMERR | EISR_RXPARERR | EISR_TXBUFUFLO |
-	                    EISR_TXEXPLICIT | EISR_TXMEMERR;
+	struct ioc3_private *ip = netdev_priv(dev_id);
+	struct ioc3_ethregs *regs = ip->regs;
 	u32 eisr;
 
-	eisr = ioc3_r_eisr() & enabled;
-
-	ioc3_w_eisr(eisr);
-	(void) ioc3_r_eisr();				/* Flush */
+	eisr = readl(&regs->eisr);
+	writel(eisr, &regs->eisr);
+	readl(&regs->eisr);				/* Flush */
 
 	if (eisr & (EISR_RXOFLO | EISR_RXBUFOFLO | EISR_RXMEMERR |
-	            EISR_RXPARERR | EISR_TXBUFUFLO | EISR_TXMEMERR))
-		ioc3_error(dev, eisr);
+		    EISR_RXPARERR | EISR_TXBUFUFLO | EISR_TXMEMERR))
+		ioc3_error(dev_id, eisr);
 	if (eisr & EISR_RXTIMERINT)
-		ioc3_rx(dev);
+		ioc3_rx(dev_id);
 	if (eisr & EISR_TXEXPLICIT)
-		ioc3_tx(dev);
+		ioc3_tx(dev_id);
 
 	return IRQ_HANDLED;
 }
 
 static inline void ioc3_setup_duplex(struct ioc3_private *ip)
 {
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
+
+	spin_lock_irq(&ip->ioc3_lock);
 
 	if (ip->mii.full_duplex) {
-		ioc3_w_etcsr(ETCSR_FD);
+		writel(ETCSR_FD, &regs->etcsr);
 		ip->emcr |= EMCR_DUPLEX;
 	} else {
-		ioc3_w_etcsr(ETCSR_HD);
+		writel(ETCSR_HD, &regs->etcsr);
 		ip->emcr &= ~EMCR_DUPLEX;
 	}
-	ioc3_w_emcr(ip->emcr);
+	writel(ip->emcr, &regs->emcr);
+
+	spin_unlock_irq(&ip->ioc3_lock);
 }
 
 static void ioc3_timer(struct timer_list *t)
@@ -772,12 +751,11 @@
 	mii_check_media(&ip->mii, 1, 0);
 	ioc3_setup_duplex(ip);
 
-	ip->ioc3_timer.expires = jiffies + ((12 * HZ)/10); /* 1.2s */
+	ip->ioc3_timer.expires = jiffies + ((12 * HZ) / 10); /* 1.2s */
 	add_timer(&ip->ioc3_timer);
 }
 
-/*
- * Try to find a PHY.  There is no apparent relation between the MII addresses
+/* Try to find a PHY.  There is no apparent relation between the MII addresses
  * in the SGI documentation and what we find in reality, so we simply probe
  * for the PHY.  It seems IOC3 PHYs usually live on address 31.  One of my
  * onboard IOC3s has the special oddity that probing doesn't seem to find it
@@ -786,8 +764,8 @@
  */
 static int ioc3_mii_init(struct ioc3_private *ip)
 {
-	int i, found = 0, res = 0;
 	int ioc3_phy_workaround = 1;
+	int i, found = 0, res = 0;
 	u16 word;
 
 	for (i = 0; i < 32; i++) {
@@ -800,9 +778,9 @@
 	}
 
 	if (!found) {
-		if (ioc3_phy_workaround)
+		if (ioc3_phy_workaround) {
 			i = 31;
-		else {
+		} else {
 			ip->mii.phy_id = -1;
 			res = -ENODEV;
 			goto out;
@@ -817,27 +795,27 @@
 
 static void ioc3_mii_start(struct ioc3_private *ip)
 {
-	ip->ioc3_timer.expires = jiffies + (12 * HZ)/10;  /* 1.2 sec. */
+	ip->ioc3_timer.expires = jiffies + (12 * HZ) / 10;  /* 1.2 sec. */
 	add_timer(&ip->ioc3_timer);
 }
 
-static inline void ioc3_clean_rx_ring(struct ioc3_private *ip)
+static inline void ioc3_tx_unmap(struct ioc3_private *ip, int entry)
 {
-	struct sk_buff *skb;
-	int i;
+	struct ioc3_etxd *desc;
+	u32 cmd, bufcnt, len;
 
-	for (i = ip->rx_ci; i & 15; i++) {
-		ip->rx_skbs[ip->rx_pi] = ip->rx_skbs[ip->rx_ci];
-		ip->rxr[ip->rx_pi++] = ip->rxr[ip->rx_ci++];
+	desc = &ip->txr[entry];
+	cmd = be32_to_cpu(desc->cmd);
+	bufcnt = be32_to_cpu(desc->bufcnt);
+	if (cmd & ETXD_B1V) {
+		len = (bufcnt & ETXD_B1CNT_MASK) >> ETXD_B1CNT_SHIFT;
+		dma_unmap_single(ip->dma_dev, be64_to_cpu(desc->p1),
+				 len, DMA_TO_DEVICE);
 	}
-	ip->rx_pi &= 511;
-	ip->rx_ci &= 511;
-
-	for (i = ip->rx_ci; i != ip->rx_pi; i = (i+1) & 511) {
-		struct ioc3_erxbuf *rxb;
-		skb = ip->rx_skbs[i];
-		rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
-		rxb->w0 = 0;
+	if (cmd & ETXD_B2V) {
+		len = (bufcnt & ETXD_B2CNT_MASK) >> ETXD_B2CNT_SHIFT;
+		dma_unmap_single(ip->dma_dev, be64_to_cpu(desc->p2),
+				 len, DMA_TO_DEVICE);
 	}
 }
 
@@ -846,9 +824,10 @@
 	struct sk_buff *skb;
 	int i;
 
-	for (i=0; i < 128; i++) {
+	for (i = 0; i < TX_RING_ENTRIES; i++) {
 		skb = ip->tx_skbs[i];
 		if (skb) {
+			ioc3_tx_unmap(ip, i);
 			ip->tx_skbs[i] = NULL;
 			dev_kfree_skb_any(skb);
 		}
@@ -858,179 +837,137 @@
 	ip->tx_ci = 0;
 }
 
-static void ioc3_free_rings(struct ioc3_private *ip)
+static void ioc3_free_rx_bufs(struct ioc3_private *ip)
 {
-	struct sk_buff *skb;
 	int rx_entry, n_entry;
+	struct sk_buff *skb;
 
-	if (ip->txr) {
-		ioc3_clean_tx_ring(ip);
-		free_pages((unsigned long)ip->txr, 2);
-		ip->txr = NULL;
-	}
+	n_entry = ip->rx_ci;
+	rx_entry = ip->rx_pi;
 
-	if (ip->rxr) {
-		n_entry = ip->rx_ci;
-		rx_entry = ip->rx_pi;
-
-		while (n_entry != rx_entry) {
-			skb = ip->rx_skbs[n_entry];
-			if (skb)
-				dev_kfree_skb_any(skb);
-
-			n_entry = (n_entry + 1) & 511;
+	while (n_entry != rx_entry) {
+		skb = ip->rx_skbs[n_entry];
+		if (skb) {
+			dma_unmap_single(ip->dma_dev,
+					 be64_to_cpu(ip->rxr[n_entry]),
+					 RX_BUF_SIZE, DMA_FROM_DEVICE);
+			dev_kfree_skb_any(skb);
 		}
-		free_page((unsigned long)ip->rxr);
-		ip->rxr = NULL;
+		n_entry = (n_entry + 1) & RX_RING_MASK;
 	}
 }
 
-static void ioc3_alloc_rings(struct net_device *dev)
+static int ioc3_alloc_rx_bufs(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 	struct ioc3_erxbuf *rxb;
-	unsigned long *rxr;
+	dma_addr_t d;
 	int i;
 
-	if (ip->rxr == NULL) {
-		/* Allocate and initialize rx ring.  4kb = 512 entries  */
-		ip->rxr = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
-		rxr = ip->rxr;
-		if (!rxr)
-			printk("ioc3_alloc_rings(): get_zeroed_page() failed!\n");
+	/* Now the rx buffers.  The RX ring may be larger but
+	 * we only allocate 16 buffers for now.  Need to tune
+	 * this for performance and memory later.
+	 */
+	for (i = 0; i < RX_BUFFS; i++) {
+		if (ioc3_alloc_skb(ip, &ip->rx_skbs[i], &rxb, &d))
+			return -ENOMEM;
 
-		/* Now the rx buffers.  The RX ring may be larger but
-		   we only allocate 16 buffers for now.  Need to tune
-		   this for performance and memory later.  */
-		for (i = 0; i < RX_BUFFS; i++) {
-			struct sk_buff *skb;
-
-			skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
-			if (!skb) {
-				show_free_areas(0, NULL);
-				continue;
-			}
-
-			ip->rx_skbs[i] = skb;
-
-			/* Because we reserve afterwards. */
-			skb_put(skb, (1664 + RX_OFFSET));
-			rxb = (struct ioc3_erxbuf *) skb->data;
-			rxr[i] = cpu_to_be64(ioc3_map(rxb, 1));
-			skb_reserve(skb, RX_OFFSET);
-		}
-		ip->rx_ci = 0;
-		ip->rx_pi = RX_BUFFS;
+		rxb->w0 = 0;	/* Clear valid flag */
+		ip->rxr[i] = cpu_to_be64(ioc3_map(d, PCI64_ATTR_BAR));
 	}
+	ip->rx_ci = 0;
+	ip->rx_pi = RX_BUFFS;
 
-	if (ip->txr == NULL) {
-		/* Allocate and initialize tx rings.  16kb = 128 bufs.  */
-		ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2);
-		if (!ip->txr)
-			printk("ioc3_alloc_rings(): __get_free_pages() failed!\n");
-		ip->tx_pi = 0;
-		ip->tx_ci = 0;
-	}
-}
-
-static void ioc3_init_rings(struct net_device *dev)
-{
-	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
-	unsigned long ring;
-
-	ioc3_free_rings(ip);
-	ioc3_alloc_rings(dev);
-
-	ioc3_clean_rx_ring(ip);
-	ioc3_clean_tx_ring(ip);
-
-	/* Now the rx ring base, consume & produce registers.  */
-	ring = ioc3_map(ip->rxr, 0);
-	ioc3_w_erbr_h(ring >> 32);
-	ioc3_w_erbr_l(ring & 0xffffffff);
-	ioc3_w_ercir(ip->rx_ci << 3);
-	ioc3_w_erpir((ip->rx_pi << 3) | ERPIR_ARM);
-
-	ring = ioc3_map(ip->txr, 0);
-
-	ip->txqlen = 0;					/* nothing queued  */
-
-	/* Now the tx ring base, consume & produce registers.  */
-	ioc3_w_etbr_h(ring >> 32);
-	ioc3_w_etbr_l(ring & 0xffffffff);
-	ioc3_w_etpir(ip->tx_pi << 7);
-	ioc3_w_etcir(ip->tx_ci << 7);
-	(void) ioc3_r_etcir();				/* Flush */
+	return 0;
 }
 
 static inline void ioc3_ssram_disc(struct ioc3_private *ip)
 {
-	struct ioc3 *ioc3 = ip->regs;
-	volatile u32 *ssram0 = &ioc3->ssram[0x0000];
-	volatile u32 *ssram1 = &ioc3->ssram[0x4000];
-	unsigned int pattern = 0x5555;
+	struct ioc3_ethregs *regs = ip->regs;
+	u32 *ssram0 = &ip->ssram[0x0000];
+	u32 *ssram1 = &ip->ssram[0x4000];
+	u32 pattern = 0x5555;
 
 	/* Assume the larger size SSRAM and enable parity checking */
-	ioc3_w_emcr(ioc3_r_emcr() | (EMCR_BUFSIZ | EMCR_RAMPAR));
+	writel(readl(&regs->emcr) | (EMCR_BUFSIZ | EMCR_RAMPAR), &regs->emcr);
+	readl(&regs->emcr); /* Flush */
 
-	*ssram0 = pattern;
-	*ssram1 = ~pattern & IOC3_SSRAM_DM;
+	writel(pattern, ssram0);
+	writel(~pattern & IOC3_SSRAM_DM, ssram1);
 
-	if ((*ssram0 & IOC3_SSRAM_DM) != pattern ||
-	    (*ssram1 & IOC3_SSRAM_DM) != (~pattern & IOC3_SSRAM_DM)) {
+	if ((readl(ssram0) & IOC3_SSRAM_DM) != pattern ||
+	    (readl(ssram1) & IOC3_SSRAM_DM) != (~pattern & IOC3_SSRAM_DM)) {
 		/* set ssram size to 64 KB */
-		ip->emcr = EMCR_RAMPAR;
-		ioc3_w_emcr(ioc3_r_emcr() & ~EMCR_BUFSIZ);
-	} else
-		ip->emcr = EMCR_BUFSIZ | EMCR_RAMPAR;
+		ip->emcr |= EMCR_RAMPAR;
+		writel(readl(&regs->emcr) & ~EMCR_BUFSIZ, &regs->emcr);
+	} else {
+		ip->emcr |= EMCR_BUFSIZ | EMCR_RAMPAR;
+	}
 }
 
 static void ioc3_init(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
 
 	del_timer_sync(&ip->ioc3_timer);	/* Kill if running	*/
 
-	ioc3_w_emcr(EMCR_RST);			/* Reset		*/
-	(void) ioc3_r_emcr();			/* Flush WB		*/
+	writel(EMCR_RST, &regs->emcr);		/* Reset		*/
+	readl(&regs->emcr);			/* Flush WB		*/
 	udelay(4);				/* Give it time ...	*/
-	ioc3_w_emcr(0);
-	(void) ioc3_r_emcr();
+	writel(0, &regs->emcr);
+	readl(&regs->emcr);
 
 	/* Misc registers  */
-#ifdef CONFIG_SGI_IP27
-	ioc3_w_erbar(PCI64_ATTR_BAR >> 32);	/* Barrier on last store */
-#else
-	ioc3_w_erbar(0);			/* Let PCI API get it right */
-#endif
-	(void) ioc3_r_etcdc();			/* Clear on read */
-	ioc3_w_ercsr(15);			/* RX low watermark  */
-	ioc3_w_ertr(0);				/* Interrupt immediately */
+	writel(ERBAR_VAL, &regs->erbar);
+	readl(&regs->etcdc);			/* Clear on read */
+	writel(15, &regs->ercsr);		/* RX low watermark  */
+	writel(0, &regs->ertr);			/* Interrupt immediately */
 	__ioc3_set_mac_address(dev);
-	ioc3_w_ehar_h(ip->ehar_h);
-	ioc3_w_ehar_l(ip->ehar_l);
-	ioc3_w_ersr(42);			/* XXX should be random */
+	writel(ip->ehar_h, &regs->ehar_h);
+	writel(ip->ehar_l, &regs->ehar_l);
+	writel(42, &regs->ersr);		/* XXX should be random */
+}
 
-	ioc3_init_rings(dev);
+static void ioc3_start(struct ioc3_private *ip)
+{
+	struct ioc3_ethregs *regs = ip->regs;
+	unsigned long ring;
+
+	/* Now the rx ring base, consume & produce registers.  */
+	ring = ioc3_map(ip->rxr_dma, PCI64_ATTR_PREC);
+	writel(ring >> 32, &regs->erbr_h);
+	writel(ring & 0xffffffff, &regs->erbr_l);
+	writel(ip->rx_ci << 3, &regs->ercir);
+	writel((ip->rx_pi << 3) | ERPIR_ARM, &regs->erpir);
+
+	ring = ioc3_map(ip->txr_dma, PCI64_ATTR_PREC);
+
+	ip->txqlen = 0;					/* nothing queued  */
+
+	/* Now the tx ring base, consume & produce registers.  */
+	writel(ring >> 32, &regs->etbr_h);
+	writel(ring & 0xffffffff, &regs->etbr_l);
+	writel(ip->tx_pi << 7, &regs->etpir);
+	writel(ip->tx_ci << 7, &regs->etcir);
+	readl(&regs->etcir);				/* Flush */
 
 	ip->emcr |= ((RX_OFFSET / 2) << EMCR_RXOFF_SHIFT) | EMCR_TXDMAEN |
-	             EMCR_TXEN | EMCR_RXDMAEN | EMCR_RXEN | EMCR_PADEN;
-	ioc3_w_emcr(ip->emcr);
-	ioc3_w_eier(EISR_RXTIMERINT | EISR_RXOFLO | EISR_RXBUFOFLO |
-	            EISR_RXMEMERR | EISR_RXPARERR | EISR_TXBUFUFLO |
-	            EISR_TXEXPLICIT | EISR_TXMEMERR);
-	(void) ioc3_r_eier();
+		    EMCR_TXEN | EMCR_RXDMAEN | EMCR_RXEN | EMCR_PADEN;
+	writel(ip->emcr, &regs->emcr);
+	writel(EISR_RXTIMERINT | EISR_RXOFLO | EISR_RXBUFOFLO |
+	       EISR_RXMEMERR | EISR_RXPARERR | EISR_TXBUFUFLO |
+	       EISR_TXEXPLICIT | EISR_TXMEMERR, &regs->eier);
+	readl(&regs->eier);
 }
 
 static inline void ioc3_stop(struct ioc3_private *ip)
 {
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
 
-	ioc3_w_emcr(0);				/* Shutup */
-	ioc3_w_eier(0);				/* Disable interrupts */
-	(void) ioc3_r_eier();			/* Flush */
+	writel(0, &regs->emcr);			/* Shutup */
+	writel(0, &regs->eier);			/* Disable interrupts */
+	readl(&regs->eier);			/* Flush */
 }
 
 static int ioc3_open(struct net_device *dev)
@@ -1038,14 +975,20 @@
 	struct ioc3_private *ip = netdev_priv(dev);
 
 	if (request_irq(dev->irq, ioc3_interrupt, IRQF_SHARED, ioc3_str, dev)) {
-		printk(KERN_ERR "%s: Can't get irq %d\n", dev->name, dev->irq);
+		netdev_err(dev, "Can't get irq %d\n", dev->irq);
 
 		return -EAGAIN;
 	}
 
 	ip->ehar_h = 0;
 	ip->ehar_l = 0;
+
 	ioc3_init(dev);
+	if (ioc3_alloc_rx_bufs(dev)) {
+		netdev_err(dev, "%s: rx buffer allocation failed\n", __func__);
+		return -ENOMEM;
+	}
+	ioc3_start(ip);
 	ioc3_mii_start(ip);
 
 	netif_start_queue(dev);
@@ -1063,12 +1006,13 @@
 	ioc3_stop(ip);
 	free_irq(dev->irq, dev);
 
-	ioc3_free_rings(ip);
+	ioc3_free_rx_bufs(ip);
+	ioc3_clean_tx_ring(ip);
+
 	return 0;
 }
 
-/*
- * MENET cards have four IOC3 chips, which are attached to two sets of
+/* MENET cards have four IOC3 chips, which are attached to two sets of
  * PCI slot resources each: the primary connections are on slots
  * 0..3 and the secondaries are on 4..7
  *
@@ -1085,7 +1029,7 @@
 
 	if (dev) {
 		if (dev->vendor == PCI_VENDOR_ID_SGI &&
-			dev->device == PCI_DEVICE_ID_SGI_IOC3)
+		    dev->device == PCI_DEVICE_ID_SGI_IOC3)
 			ret = 1;
 		pci_dev_put(dev);
 	}
@@ -1095,15 +1039,14 @@
 
 static int ioc3_is_menet(struct pci_dev *pdev)
 {
-	return pdev->bus->parent == NULL &&
+	return !pdev->bus->parent &&
 	       ioc3_adjacent_is_ioc3(pdev, 0) &&
 	       ioc3_adjacent_is_ioc3(pdev, 1) &&
 	       ioc3_adjacent_is_ioc3(pdev, 2);
 }
 
 #ifdef CONFIG_SERIAL_8250
-/*
- * Note about serial ports and consoles:
+/* Note about serial ports and consoles:
  * For console output, everyone uses the IOC3 UARTA (offset 0x178)
  * connected to the master node (look in ip27_setup_console() and
  * ip27prom_console_write()).
@@ -1140,31 +1083,32 @@
 #define COSMISC_CONSTANT 6
 
 	struct uart_8250_port port = {
-	        .port = {
+		.port = {
 			.irq		= 0,
 			.flags		= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF,
 			.iotype		= UPIO_MEM,
 			.regshift	= 0,
 			.uartclk	= (22000000 << 1) / COSMISC_CONSTANT,
 
-			.membase	= (unsigned char __iomem *) uart,
-			.mapbase	= (unsigned long) uart,
-                }
+			.membase	= (unsigned char __iomem *)uart,
+			.mapbase	= (unsigned long)uart,
+		}
 	};
 	unsigned char lcr;
 
-	lcr = uart->iu_lcr;
-	uart->iu_lcr = lcr | UART_LCR_DLAB;
-	uart->iu_scr = COSMISC_CONSTANT,
-	uart->iu_lcr = lcr;
-	uart->iu_lcr;
+	lcr = readb(&uart->iu_lcr);
+	writeb(lcr | UART_LCR_DLAB, &uart->iu_lcr);
+	writeb(COSMISC_CONSTANT, &uart->iu_scr);
+	writeb(lcr, &uart->iu_lcr);
+	readb(&uart->iu_lcr);
 	serial8250_register_8250_port(&port);
 }
 
 static void ioc3_serial_probe(struct pci_dev *pdev, struct ioc3 *ioc3)
 {
-	/*
-	 * We need to recognice and treat the fourth MENET serial as it
+	u32 sio_iec;
+
+	/* We need to recognice and treat the fourth MENET serial as it
 	 * does not have an SuperIO chip attached to it, therefore attempting
 	 * to access it will result in bus errors.  We call something an
 	 * MENET if PCI slot 0, 1, 2 and 3 of a master PCI bus all have an IOC3
@@ -1175,33 +1119,34 @@
 	if (ioc3_is_menet(pdev) && PCI_SLOT(pdev->devfn) == 3)
 		return;
 
-	/*
-	 * Switch IOC3 to PIO mode.  It probably already was but let's be
+	/* Switch IOC3 to PIO mode.  It probably already was but let's be
 	 * paranoid
 	 */
-	ioc3->gpcr_s = GPCR_UARTA_MODESEL | GPCR_UARTB_MODESEL;
-	ioc3->gpcr_s;
-	ioc3->gppr_6 = 0;
-	ioc3->gppr_6;
-	ioc3->gppr_7 = 0;
-	ioc3->gppr_7;
-	ioc3->sscr_a = ioc3->sscr_a & ~SSCR_DMA_EN;
-	ioc3->sscr_a;
-	ioc3->sscr_b = ioc3->sscr_b & ~SSCR_DMA_EN;
-	ioc3->sscr_b;
+	writel(GPCR_UARTA_MODESEL | GPCR_UARTB_MODESEL, &ioc3->gpcr_s);
+	readl(&ioc3->gpcr_s);
+	writel(0, &ioc3->gppr[6]);
+	readl(&ioc3->gppr[6]);
+	writel(0, &ioc3->gppr[7]);
+	readl(&ioc3->gppr[7]);
+	writel(readl(&ioc3->port_a.sscr) & ~SSCR_DMA_EN, &ioc3->port_a.sscr);
+	readl(&ioc3->port_a.sscr);
+	writel(readl(&ioc3->port_b.sscr) & ~SSCR_DMA_EN, &ioc3->port_b.sscr);
+	readl(&ioc3->port_b.sscr);
 	/* Disable all SA/B interrupts except for SA/B_INT in SIO_IEC. */
-	ioc3->sio_iec &= ~ (SIO_IR_SA_TX_MT | SIO_IR_SA_RX_FULL |
-			    SIO_IR_SA_RX_HIGH | SIO_IR_SA_RX_TIMER |
-			    SIO_IR_SA_DELTA_DCD | SIO_IR_SA_DELTA_CTS |
-			    SIO_IR_SA_TX_EXPLICIT | SIO_IR_SA_MEMERR);
-	ioc3->sio_iec |= SIO_IR_SA_INT;
-	ioc3->sscr_a = 0;
-	ioc3->sio_iec &= ~ (SIO_IR_SB_TX_MT | SIO_IR_SB_RX_FULL |
-			    SIO_IR_SB_RX_HIGH | SIO_IR_SB_RX_TIMER |
-			    SIO_IR_SB_DELTA_DCD | SIO_IR_SB_DELTA_CTS |
-			    SIO_IR_SB_TX_EXPLICIT | SIO_IR_SB_MEMERR);
-	ioc3->sio_iec |= SIO_IR_SB_INT;
-	ioc3->sscr_b = 0;
+	sio_iec = readl(&ioc3->sio_iec);
+	sio_iec &= ~(SIO_IR_SA_TX_MT | SIO_IR_SA_RX_FULL |
+		     SIO_IR_SA_RX_HIGH | SIO_IR_SA_RX_TIMER |
+		     SIO_IR_SA_DELTA_DCD | SIO_IR_SA_DELTA_CTS |
+		     SIO_IR_SA_TX_EXPLICIT | SIO_IR_SA_MEMERR);
+	sio_iec |= SIO_IR_SA_INT;
+	sio_iec &= ~(SIO_IR_SB_TX_MT | SIO_IR_SB_RX_FULL |
+		     SIO_IR_SB_RX_HIGH | SIO_IR_SB_RX_TIMER |
+		     SIO_IR_SB_DELTA_DCD | SIO_IR_SB_DELTA_CTS |
+		     SIO_IR_SB_TX_EXPLICIT | SIO_IR_SB_MEMERR);
+	sio_iec |= SIO_IR_SB_INT;
+	writel(sio_iec, &ioc3->sio_iec);
+	writel(0, &ioc3->port_a.sscr);
+	writel(0, &ioc3->port_b.sscr);
 
 	ioc3_8250_register(&ioc3->sregs.uarta);
 	ioc3_8250_register(&ioc3->sregs.uartb);
@@ -1236,15 +1181,15 @@
 		pci_using_dac = 1;
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err < 0) {
-			printk(KERN_ERR "%s: Unable to obtain 64 bit DMA "
-			       "for consistent allocations\n", pci_name(pdev));
+			pr_err("%s: Unable to obtain 64 bit DMA for consistent allocations\n",
+			       pci_name(pdev));
 			goto out;
 		}
 	} else {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			printk(KERN_ERR "%s: No usable DMA configuration, "
-			       "aborting.\n", pci_name(pdev));
+			pr_err("%s: No usable DMA configuration, aborting.\n",
+			       pci_name(pdev));
 			goto out;
 		}
 		pci_using_dac = 0;
@@ -1270,19 +1215,22 @@
 
 	ip = netdev_priv(dev);
 	ip->dev = dev;
+	ip->dma_dev = &pdev->dev;
 
 	dev->irq = pdev->irq;
 
 	ioc3_base = pci_resource_start(pdev, 0);
 	ioc3_size = pci_resource_len(pdev, 0);
-	ioc3 = (struct ioc3 *) ioremap(ioc3_base, ioc3_size);
+	ioc3 = (struct ioc3 *)ioremap(ioc3_base, ioc3_size);
 	if (!ioc3) {
-		printk(KERN_CRIT "ioc3eth(%s): ioremap failed, goodbye.\n",
+		pr_err("ioc3eth(%s): ioremap failed, goodbye.\n",
 		       pci_name(pdev));
 		err = -ENOMEM;
 		goto out_res;
 	}
-	ip->regs = ioc3;
+	ip->regs = &ioc3->eth;
+	ip->ssram = ioc3->ssram;
+	ip->all_regs = ioc3;
 
 #ifdef CONFIG_SERIAL_8250
 	ioc3_serial_probe(pdev, ioc3);
@@ -1292,6 +1240,26 @@
 	timer_setup(&ip->ioc3_timer, ioc3_timer, 0);
 
 	ioc3_stop(ip);
+
+	/* Allocate rx ring.  4kb = 512 entries, must be 4kb aligned */
+	ip->rxr = dma_direct_alloc_pages(ip->dma_dev, RX_RING_SIZE,
+					 &ip->rxr_dma, GFP_ATOMIC, 0);
+	if (!ip->rxr) {
+		pr_err("ioc3-eth: rx ring allocation failed\n");
+		err = -ENOMEM;
+		goto out_stop;
+	}
+
+	/* Allocate tx rings.  16kb = 128 bufs, must be 16kb aligned  */
+	ip->txr = dma_direct_alloc_pages(ip->dma_dev, TX_RING_SIZE,
+					 &ip->txr_dma,
+					 GFP_KERNEL | __GFP_ZERO, 0);
+	if (!ip->txr) {
+		pr_err("ioc3-eth: tx ring allocation failed\n");
+		err = -ENOMEM;
+		goto out_stop;
+	}
+
 	ioc3_init(dev);
 
 	ip->pdev = pdev;
@@ -1305,7 +1273,7 @@
 	ioc3_mii_init(ip);
 
 	if (ip->mii.phy_id == -1) {
-		printk(KERN_CRIT "ioc3-eth(%s): Didn't find a PHY, goodbye.\n",
+		pr_err("ioc3-eth(%s): Didn't find a PHY, goodbye.\n",
 		       pci_name(pdev));
 		err = -ENODEV;
 		goto out_stop;
@@ -1335,24 +1303,27 @@
 	vendor = (sw_physid1 << 12) | (sw_physid2 >> 4);
 	model  = (sw_physid2 >> 4) & 0x3f;
 	rev    = sw_physid2 & 0xf;
-	printk(KERN_INFO "%s: Using PHY %d, vendor 0x%x, model %d, "
-	       "rev %d.\n", dev->name, ip->mii.phy_id, vendor, model, rev);
-	printk(KERN_INFO "%s: IOC3 SSRAM has %d kbyte.\n", dev->name,
-	       ip->emcr & EMCR_BUFSIZ ? 128 : 64);
+	netdev_info(dev, "Using PHY %d, vendor 0x%x, model %d, rev %d.\n",
+		    ip->mii.phy_id, vendor, model, rev);
+	netdev_info(dev, "IOC3 SSRAM has %d kbyte.\n",
+		    ip->emcr & EMCR_BUFSIZ ? 128 : 64);
 
 	return 0;
 
 out_stop:
-	ioc3_stop(ip);
 	del_timer_sync(&ip->ioc3_timer);
-	ioc3_free_rings(ip);
+	if (ip->rxr)
+		dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
+				      ip->rxr_dma, 0);
+	if (ip->txr)
+		dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
+				      ip->txr_dma, 0);
 out_res:
 	pci_release_regions(pdev);
 out_free:
 	free_netdev(dev);
 out_disable:
-	/*
-	 * We should call pci_disable_device(pdev); here if the IOC3 wasn't
+	/* We should call pci_disable_device(pdev); here if the IOC3 wasn't
 	 * such a weird device ...
 	 */
 out:
@@ -1363,16 +1334,19 @@
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+
+	dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
+			      ip->rxr_dma, 0);
+	dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
+			      ip->txr_dma, 0);
 
 	unregister_netdev(dev);
 	del_timer_sync(&ip->ioc3_timer);
 
-	iounmap(ioc3);
+	iounmap(ip->all_regs);
 	pci_release_regions(pdev);
 	free_netdev(dev);
-	/*
-	 * We should call pci_disable_device(pdev); here if the IOC3 wasn't
+	/* We should call pci_disable_device(pdev); here if the IOC3 wasn't
 	 * such a weird device ...
 	 */
 }
@@ -1392,16 +1366,14 @@
 
 static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	unsigned long data;
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
-	unsigned int len;
 	struct ioc3_etxd *desc;
-	uint32_t w0 = 0;
+	unsigned long data;
+	unsigned int len;
 	int produce;
+	u32 w0 = 0;
 
-	/*
-	 * IOC3 has a fairly simple minded checksumming hardware which simply
+	/* IOC3 has a fairly simple minded checksumming hardware which simply
 	 * adds up the 1's complement checksum for the entire packet and
 	 * inserts it at an offset which can be specified in the descriptor
 	 * into the transmit packet.  This means we have to compensate for the
@@ -1412,25 +1384,23 @@
 		const struct iphdr *ih = ip_hdr(skb);
 		const int proto = ntohs(ih->protocol);
 		unsigned int csoff;
-		uint32_t csum, ehsum;
-		uint16_t *eh;
+		u32 csum, ehsum;
+		u16 *eh;
 
 		/* The MAC header.  skb->mac seem the logic approach
-		   to find the MAC header - except it's a NULL pointer ...  */
-		eh = (uint16_t *) skb->data;
+		 * to find the MAC header - except it's a NULL pointer ...
+		 */
+		eh = (u16 *)skb->data;
 
 		/* Sum up dest addr, src addr and protocol  */
 		ehsum = eh[0] + eh[1] + eh[2] + eh[3] + eh[4] + eh[5] + eh[6];
 
-		/* Fold ehsum.  can't use csum_fold which negates also ...  */
-		ehsum = (ehsum & 0xffff) + (ehsum >> 16);
-		ehsum = (ehsum & 0xffff) + (ehsum >> 16);
-
 		/* Skip IP header; it's sum is always zero and was
-		   already filled in by ip_output.c */
+		 * already filled in by ip_output.c
+		 */
 		csum = csum_tcpudp_nofold(ih->saddr, ih->daddr,
-		                          ih->tot_len - (ih->ihl << 2),
-		                          proto, 0xffff ^ ehsum);
+					  ih->tot_len - (ih->ihl << 2),
+					  proto, csum_fold(ehsum));
 
 		csum = (csum & 0xffff) + (csum >> 16);	/* Fold again */
 		csum = (csum & 0xffff) + (csum >> 16);
@@ -1450,7 +1420,7 @@
 
 	spin_lock_irq(&ip->ioc3_lock);
 
-	data = (unsigned long) skb->data;
+	data = (unsigned long)skb->data;
 	len = skb->len;
 
 	produce = ip->tx_pi;
@@ -1470,47 +1440,78 @@
 		unsigned long b2 = (data | 0x3fffUL) + 1UL;
 		unsigned long s1 = b2 - data;
 		unsigned long s2 = data + len - b2;
+		dma_addr_t d1, d2;
 
 		desc->cmd    = cpu_to_be32(len | ETXD_INTWHENDONE |
-		                           ETXD_B1V | ETXD_B2V | w0);
+					   ETXD_B1V | ETXD_B2V | w0);
 		desc->bufcnt = cpu_to_be32((s1 << ETXD_B1CNT_SHIFT) |
-		                           (s2 << ETXD_B2CNT_SHIFT));
-		desc->p1     = cpu_to_be64(ioc3_map(skb->data, 1));
-		desc->p2     = cpu_to_be64(ioc3_map((void *) b2, 1));
+					   (s2 << ETXD_B2CNT_SHIFT));
+		d1 = dma_map_single(ip->dma_dev, skb->data, s1, DMA_TO_DEVICE);
+		if (dma_mapping_error(ip->dma_dev, d1))
+			goto drop_packet;
+		d2 = dma_map_single(ip->dma_dev, (void *)b2, s1, DMA_TO_DEVICE);
+		if (dma_mapping_error(ip->dma_dev, d2)) {
+			dma_unmap_single(ip->dma_dev, d1, len, DMA_TO_DEVICE);
+			goto drop_packet;
+		}
+		desc->p1     = cpu_to_be64(ioc3_map(d1, PCI64_ATTR_PREF));
+		desc->p2     = cpu_to_be64(ioc3_map(d2, PCI64_ATTR_PREF));
 	} else {
+		dma_addr_t d;
+
 		/* Normal sized packet that doesn't cross a page boundary. */
 		desc->cmd = cpu_to_be32(len | ETXD_INTWHENDONE | ETXD_B1V | w0);
 		desc->bufcnt = cpu_to_be32(len << ETXD_B1CNT_SHIFT);
-		desc->p1     = cpu_to_be64(ioc3_map(skb->data, 1));
+		d = dma_map_single(ip->dma_dev, skb->data, len, DMA_TO_DEVICE);
+		if (dma_mapping_error(ip->dma_dev, d))
+			goto drop_packet;
+		desc->p1     = cpu_to_be64(ioc3_map(d, PCI64_ATTR_PREF));
 	}
 
-	BARRIER();
+	mb(); /* make sure all descriptor changes are visible */
 
 	ip->tx_skbs[produce] = skb;			/* Remember skb */
-	produce = (produce + 1) & 127;
+	produce = (produce + 1) & TX_RING_MASK;
 	ip->tx_pi = produce;
-	ioc3_w_etpir(produce << 7);			/* Fire ... */
+	writel(produce << 7, &ip->regs->etpir);		/* Fire ... */
 
 	ip->txqlen++;
 
-	if (ip->txqlen >= 127)
+	if (ip->txqlen >= (TX_RING_ENTRIES - 1))
 		netif_stop_queue(dev);
 
 	spin_unlock_irq(&ip->ioc3_lock);
 
 	return NETDEV_TX_OK;
+
+drop_packet:
+	dev_kfree_skb_any(skb);
+	dev->stats.tx_dropped++;
+
+	spin_unlock_irq(&ip->ioc3_lock);
+
+	return NETDEV_TX_OK;
 }
 
 static void ioc3_timeout(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 
-	printk(KERN_ERR "%s: transmit timed out, resetting\n", dev->name);
+	netdev_err(dev, "transmit timed out, resetting\n");
 
 	spin_lock_irq(&ip->ioc3_lock);
 
 	ioc3_stop(ip);
+	ioc3_free_rx_bufs(ip);
+	ioc3_clean_tx_ring(ip);
+
 	ioc3_init(dev);
+	if (ioc3_alloc_rx_bufs(dev)) {
+		netdev_err(dev, "%s: rx buffer allocation failed\n", __func__);
+		spin_unlock_irq(&ip->ioc3_lock);
+		return;
+	}
+	ioc3_start(ip);
 	ioc3_mii_init(ip);
 	ioc3_mii_start(ip);
 
@@ -1519,16 +1520,14 @@
 	netif_wake_queue(dev);
 }
 
-/*
- * Given a multicast ethernet address, this routine calculates the
+/* Given a multicast ethernet address, this routine calculates the
  * address's bit index in the logical address filter mask
  */
-
 static inline unsigned int ioc3_hash(const unsigned char *addr)
 {
 	unsigned int temp = 0;
-	u32 crc;
 	int bits;
+	u32 crc;
 
 	crc = ether_crc_le(ETH_ALEN, addr);
 
@@ -1542,8 +1541,8 @@
 	return temp;
 }
 
-static void ioc3_get_drvinfo (struct net_device *dev,
-	struct ethtool_drvinfo *info)
+static void ioc3_get_drvinfo(struct net_device *dev,
+			     struct ethtool_drvinfo *info)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 
@@ -1623,27 +1622,28 @@
 
 static void ioc3_set_multicast_list(struct net_device *dev)
 {
-	struct netdev_hw_addr *ha;
 	struct ioc3_private *ip = netdev_priv(dev);
-	struct ioc3 *ioc3 = ip->regs;
+	struct ioc3_ethregs *regs = ip->regs;
+	struct netdev_hw_addr *ha;
 	u64 ehar = 0;
 
-	netif_stop_queue(dev);				/* Lock out others. */
+	spin_lock_irq(&ip->ioc3_lock);
 
 	if (dev->flags & IFF_PROMISC) {			/* Set promiscuous.  */
 		ip->emcr |= EMCR_PROMISC;
-		ioc3_w_emcr(ip->emcr);
-		(void) ioc3_r_emcr();
+		writel(ip->emcr, &regs->emcr);
+		readl(&regs->emcr);
 	} else {
 		ip->emcr &= ~EMCR_PROMISC;
-		ioc3_w_emcr(ip->emcr);			/* Clear promiscuous. */
-		(void) ioc3_r_emcr();
+		writel(ip->emcr, &regs->emcr);		/* Clear promiscuous. */
+		readl(&regs->emcr);
 
 		if ((dev->flags & IFF_ALLMULTI) ||
 		    (netdev_mc_count(dev) > 64)) {
 			/* Too many for hashing to make sense or we want all
-			   multicast packets anyway,  so skip computing all the
-			   hashes and just accept all packets.  */
+			 * multicast packets anyway,  so skip computing all the
+			 * hashes and just accept all packets.
+			 */
 			ip->ehar_h = 0xffffffff;
 			ip->ehar_l = 0xffffffff;
 		} else {
@@ -1653,11 +1653,11 @@
 			ip->ehar_h = ehar >> 32;
 			ip->ehar_l = ehar & 0xffffffff;
 		}
-		ioc3_w_ehar_h(ip->ehar_h);
-		ioc3_w_ehar_l(ip->ehar_l);
+		writel(ip->ehar_h, &regs->ehar_h);
+		writel(ip->ehar_l, &regs->ehar_l);
 	}
 
-	netif_wake_queue(dev);			/* Let us get going again. */
+	spin_unlock_irq(&ip->ioc3_lock);
 }
 
 module_pci_driver(ioc3_driver);

diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 703fbbe..539bc5d 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * meth.c -- O2 Builtin 10/100 Ethernet driver
  *
  * Copyright (C) 2001-2003 Ilya Volynets
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
  */
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -68,6 +64,8 @@
  * packets in and out, so there is place for a packet
  */
 struct meth_private {
+	struct platform_device *pdev;
+
 	/* in-memory copy of MAC Control register */
 	u64 mac_ctrl;
 
@@ -211,8 +209,8 @@
 static int meth_init_tx_ring(struct meth_private *priv)
 {
 	/* Init TX ring */
-	priv->tx_ring = dma_zalloc_coherent(NULL, TX_RING_BUFFER_SIZE,
-					    &priv->tx_ring_dma, GFP_ATOMIC);
+	priv->tx_ring = dma_alloc_coherent(&priv->pdev->dev,
+			TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_ATOMIC);
 	if (!priv->tx_ring)
 		return -ENOMEM;
 
@@ -236,7 +234,7 @@
 		priv->rx_ring[i]=(rx_packet*)(priv->rx_skbs[i]->head);
 		/* I'll need to re-sync it after each RX */
 		priv->rx_ring_dmas[i] =
-			dma_map_single(NULL, priv->rx_ring[i],
+			dma_map_single(&priv->pdev->dev, priv->rx_ring[i],
 				       METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
 		mace->eth.rx_fifo = priv->rx_ring_dmas[i];
 	}
@@ -249,11 +247,10 @@
 
 	/* Remove any pending skb */
 	for (i = 0; i < TX_RING_ENTRIES; i++) {
-		if (priv->tx_skbs[i])
-			dev_kfree_skb(priv->tx_skbs[i]);
+		dev_kfree_skb(priv->tx_skbs[i]);
 		priv->tx_skbs[i] = NULL;
 	}
-	dma_free_coherent(NULL, TX_RING_BUFFER_SIZE, priv->tx_ring,
+	dma_free_coherent(&priv->pdev->dev, TX_RING_BUFFER_SIZE, priv->tx_ring,
 	                  priv->tx_ring_dma);
 }
 
@@ -263,7 +260,7 @@
 	int i;
 
 	for (i = 0; i < RX_RING_ENTRIES; i++) {
-		dma_unmap_single(NULL, priv->rx_ring_dmas[i],
+		dma_unmap_single(&priv->pdev->dev, priv->rx_ring_dmas[i],
 				 METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
 		priv->rx_ring[i] = 0;
 		priv->rx_ring_dmas[i] = 0;
@@ -393,7 +390,8 @@
 		fifo_rptr = (fifo_rptr - 1) & 0x0f;
 	}
 	while (priv->rx_write != fifo_rptr) {
-		dma_unmap_single(NULL, priv->rx_ring_dmas[priv->rx_write],
+		dma_unmap_single(&priv->pdev->dev,
+				 priv->rx_ring_dmas[priv->rx_write],
 				 METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
 		status = priv->rx_ring[priv->rx_write]->status.raw;
 #if MFE_DEBUG
@@ -454,7 +452,8 @@
 		priv->rx_ring[priv->rx_write] = (rx_packet*)skb->head;
 		priv->rx_ring[priv->rx_write]->status.raw = 0;
 		priv->rx_ring_dmas[priv->rx_write] =
-			dma_map_single(NULL, priv->rx_ring[priv->rx_write],
+			dma_map_single(&priv->pdev->dev,
+				       priv->rx_ring[priv->rx_write],
 				       METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
 		mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write];
 		ADVANCE_RX_PTR(priv->rx_write);
@@ -521,7 +520,7 @@
 			DPRINTK("RPTR points us here, but packet not done?\n");
 			break;
 		}
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		priv->tx_skbs[priv->tx_read] = NULL;
 		priv->tx_ring[priv->tx_read].header.raw = 0;
 		priv->tx_read = (priv->tx_read+1)&(TX_RING_ENTRIES-1);
@@ -637,7 +636,7 @@
 	}
 
 	/* first page */
-	catbuf = dma_map_single(NULL, buffer_data, buffer_len,
+	catbuf = dma_map_single(&priv->pdev->dev, buffer_data, buffer_len,
 				DMA_TO_DEVICE);
 	desc->data.cat_buf[0].form.start_addr = catbuf >> 3;
 	desc->data.cat_buf[0].form.len = buffer_len - 1;
@@ -663,12 +662,12 @@
 	}
 
 	/* first page */
-	catbuf1 = dma_map_single(NULL, buffer1_data, buffer1_len,
+	catbuf1 = dma_map_single(&priv->pdev->dev, buffer1_data, buffer1_len,
 				 DMA_TO_DEVICE);
 	desc->data.cat_buf[0].form.start_addr = catbuf1 >> 3;
 	desc->data.cat_buf[0].form.len = buffer1_len - 1;
 	/* second page */
-	catbuf2 = dma_map_single(NULL, buffer2_data, buffer2_len,
+	catbuf2 = dma_map_single(&priv->pdev->dev, buffer2_data, buffer2_len,
 				 DMA_TO_DEVICE);
 	desc->data.cat_buf[1].form.start_addr = catbuf2 >> 3;
 	desc->data.cat_buf[1].form.len = buffer2_len - 1;
@@ -840,6 +839,7 @@
 	memcpy(dev->dev_addr, o2meth_eaddr, ETH_ALEN);
 
 	priv = netdev_priv(dev);
+	priv->pdev = pdev;
 	spin_lock_init(&priv->meth_lock);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 

diff --git a/drivers/net/ethernet/silan/Kconfig b/drivers/net/ethernet/silan/Kconfig
index ac982be..71929d1 100644
--- a/drivers/net/ethernet/silan/Kconfig
+++ b/drivers/net/ethernet/silan/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Silan device configuration
 #

diff --git a/drivers/net/ethernet/silan/Makefile b/drivers/net/ethernet/silan/Makefile
index 4ad3523..86f716f 100644
--- a/drivers/net/ethernet/silan/Makefile
+++ b/drivers/net/ethernet/silan/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Silan network device drivers.
 #

diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index c07fd59..c7641a2 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*  Silan SC92031 PCI Fast Ethernet Adapter driver
  *
  *  Based on vendor drivers:
@@ -251,7 +252,6 @@
  * use of mdelay() at _sc92031_reset.
  * Functions prefixed with _sc92031_ must be called with the lock held;
  * functions prefixed with sc92031_ must be called without the lock held.
- * Use mmiowb() before unlocking if the hardware was written to.
  */
 
 /* Locking rules for the interrupt:
@@ -361,7 +361,6 @@
 	/* stop interrupts */
 	iowrite32(0, port_base + IntrMask);
 	_sc92031_dummy_read(port_base);
-	mmiowb();
 
 	/* wait for any concurrent interrupt/tasklet to finish */
 	synchronize_irq(priv->pdev->irq);
@@ -379,7 +378,6 @@
 	wmb();
 
 	iowrite32(IntrBits, port_base + IntrMask);
-	mmiowb();
 }
 
 static void _sc92031_disable_tx_rx(struct net_device *dev)
@@ -867,7 +865,6 @@
 	rmb();
 
 	iowrite32(intr_mask, port_base + IntrMask);
-	mmiowb();
 
 	spin_unlock(&priv->lock);
 }
@@ -901,7 +898,6 @@
 	rmb();
 
 	iowrite32(intr_mask, port_base + IntrMask);
-	mmiowb();
 
 	return IRQ_NONE;
 }
@@ -978,7 +974,6 @@
 	iowrite32(priv->tx_bufs_dma_addr + entry * TX_BUF_SIZE,
 			port_base + TxAddr0 + entry * 4);
 	iowrite32(tx_status, port_base + TxStatus0 + entry * 4);
-	mmiowb();
 
 	if (priv->tx_head - priv->tx_tail >= NUM_TX_DESC)
 		netif_stop_queue(dev);
@@ -1024,7 +1019,6 @@
 	spin_lock_bh(&priv->lock);
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 	sc92031_enable_interrupts(dev);
@@ -1060,7 +1054,6 @@
 
 	_sc92031_disable_tx_rx(dev);
 	_sc92031_tx_clear(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1081,7 +1074,6 @@
 
 	_sc92031_set_mar(dev);
 	_sc92031_set_rx_config(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 }
@@ -1098,7 +1090,6 @@
 	priv->tx_timeouts++;
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock(&priv->lock);
 
@@ -1140,7 +1131,6 @@
 
 	output_status = _sc92031_mii_read(port_base, MII_OutputStatus);
 	_sc92031_mii_scan(port_base);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1311,7 +1301,6 @@
 
 	priv->pm_config = pm_config;
 	iowrite32(pm_config, port_base + PMConfig);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1337,7 +1326,6 @@
 
 out:
 	_sc92031_mii_scan(port_base);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1530,7 +1518,6 @@
 
 	_sc92031_disable_tx_rx(dev);
 	_sc92031_tx_clear(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1555,7 +1542,6 @@
 	spin_lock_bh(&priv->lock);
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 	sc92031_enable_interrupts(dev);

diff --git a/drivers/net/ethernet/sis/Kconfig b/drivers/net/ethernet/sis/Kconfig
index 22ec98e..d848ab0 100644
--- a/drivers/net/ethernet/sis/Kconfig
+++ b/drivers/net/ethernet/sis/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Silicon Integrated Systems (SiS) device configuration
 #

diff --git a/drivers/net/ethernet/sis/Makefile b/drivers/net/ethernet/sis/Makefile
index 58d3ac1..853407b 100644
--- a/drivers/net/ethernet/sis/Makefile
+++ b/drivers/net/ethernet/sis/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for Silicon Integrated Systems (SiS) network device drivers.
 #

diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index c2c5052..5b351be 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c

@@ -714,7 +714,7 @@
 
 		sis190_unmap_tx_skb(tp->pci_dev, skb, txd);
 		tp->Tx_skbuff[entry] = NULL;
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 
 	if (tp->dirty_tx != dirty_tx) {
@@ -1142,7 +1142,7 @@
 		if (!poll_locked)
 			poll_locked++;
 
-		synchronize_sched();
+		synchronize_rcu();
 
 	} while (SIS_R32(IntrMask));
 

diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 4bb89f7..85eaccb 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c

@@ -191,6 +191,8 @@
 	unsigned int tx_full; /* The Tx queue is full. */
 	u8 host_bridge_rev;
 	u8 chipset_rev;
+	/* EEPROM data */
+	int eeprom_size;
 };
 
 MODULE_AUTHOR("Jim Huang <cmhuang@sis.com.tw>, Ollie Lho <ollie@sis.com.tw>");
@@ -262,7 +264,7 @@
 	/* check to see if we have sane EEPROM */
 	signature = (u16) read_eeprom(ioaddr, EEPROMSignature);
 	if (signature == 0xffff || signature == 0x0000) {
-		printk (KERN_WARNING "%s: Error EERPOM read %x\n",
+		printk (KERN_WARNING "%s: Error EEPROM read %x\n",
 			pci_name(pci_dev), signature);
 		return 0;
 	}
@@ -359,9 +361,9 @@
  *
  *	SiS962 or SiS963 model, use EEPROM to store MAC address. And EEPROM
  *	is shared by
- *	LAN and 1394. When access EEPROM, send EEREQ signal to hardware first
- *	and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be access
- *	by LAN, otherwise is not. After MAC address is read from EEPROM, send
+ *	LAN and 1394. When accessing EEPROM, send EEREQ signal to hardware first
+ *	and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be accessed
+ *	by LAN, otherwise it is not. After MAC address is read from EEPROM, send
  *	EEDONE signal to refuse EEPROM access by LAN.
  *	The EEPROM map of SiS962 or SiS963 is different to SiS900.
  *	The signature field in SiS962 or SiS963 spec is meaningless.
@@ -475,6 +477,8 @@
 	sis_priv->pci_dev = pci_dev;
 	spin_lock_init(&sis_priv->lock);
 
+	sis_priv->eeprom_size = 24;
+
 	pci_set_drvdata(pci_dev, net_dev);
 
 	ring_space = pci_alloc_consistent(pci_dev, TX_TOTAL_SIZE, &ring_dma);
@@ -730,10 +734,10 @@
 		status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
 
 		/* Link ON & Not select default PHY & not ghost PHY */
-		 if ((status & MII_STAT_LINK) && !default_phy &&
-					(phy->phy_types != UNKNOWN))
-		 	default_phy = phy;
-		 else {
+		if ((status & MII_STAT_LINK) && !default_phy &&
+		    (phy->phy_types != UNKNOWN)) {
+			default_phy = phy;
+		} else {
 			status = mdio_read(net_dev, phy->phy_addr, MII_CONTROL);
 			mdio_write(net_dev, phy->phy_addr, MII_CONTROL,
 				status | MII_CNTL_AUTO | MII_CNTL_ISOLATE);
@@ -741,7 +745,7 @@
 				phy_home = phy;
 			else if(phy->phy_types == LAN)
 				phy_lan = phy;
-		 }
+		}
 	}
 
 	if (!default_phy && phy_home)
@@ -882,7 +886,7 @@
  *	mdio_read - read MII PHY register
  *	@net_dev: the net device to read
  *	@phy_id: the phy address to read
- *	@location: the phy regiester id to read
+ *	@location: the phy register id to read
  *
  *	Read MII registers through MDIO and MDC
  *	using MDIO management frame structure and protocol(defined by ISO/IEC).
@@ -926,7 +930,7 @@
  *	mdio_write - write MII PHY register
  *	@net_dev: the net device to write
  *	@phy_id: the phy address to write
- *	@location: the phy regiester id to write
+ *	@location: the phy register id to write
  *	@value: the register value to write with
  *
  *	Write MII registers with @value through MDIO and MDC
@@ -1057,7 +1061,7 @@
 	sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 	sw32(cr, RxENA | sr32(cr));
 	sw32(ier, IE);
 
@@ -1101,7 +1105,7 @@
 		sw32(rfdr, w);
 
 		if (netif_msg_hw(sis_priv)) {
-			printk(KERN_DEBUG "%s: Receive Filter Addrss[%d]=%x\n",
+			printk(KERN_DEBUG "%s: Receive Filter Address[%d]=%x\n",
 			       net_dev->name, i, sr32(rfdr));
 		}
 	}
@@ -1148,7 +1152,7 @@
  *	@net_dev: the net device to initialize for
  *
  *	Initialize the Rx descriptor ring,
- *	and pre-allocate recevie buffers (socket buffer)
+ *	and pre-allocate receive buffers (socket buffer)
  */
 
 static void
@@ -1578,7 +1582,7 @@
 	sw32(txdp, sis_priv->tx_ring_dma);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 }
 
 /**
@@ -1618,7 +1622,7 @@
 			spin_unlock_irqrestore(&sis_priv->lock, flags);
 			return NETDEV_TX_OK;
 	}
-	sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len);
+	sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len);
 	sw32(cr, TxENA | sr32(cr));
 
 	sis_priv->cur_tx ++;
@@ -1674,8 +1678,8 @@
 	do {
 		status = sr32(isr);
 
-		if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0)
-			/* nothing intresting happened */
+		if ((status & (HIBERR|TxURN|TxERR|TxDESC|RxORN|RxERR|RxOK)) == 0)
+			/* nothing interesting happened */
 			break;
 		handled = 1;
 
@@ -1684,7 +1688,7 @@
 			/* Rx interrupt */
 			sis900_rx(net_dev);
 
-		if (status & (TxURN | TxERR | TxIDLE))
+		if (status & (TxURN | TxERR | TxDESC))
 			/* Tx interrupt */
 			sis900_finish_xmit(net_dev);
 
@@ -1896,8 +1900,8 @@
 
 		if (tx_status & OWN) {
 			/* The packet is not transmitted yet (owned by hardware) !
-			 * Note: the interrupt is generated only when Tx Machine
-			 * is idle, so this is an almost impossible case */
+			 * Note: this is an almost impossible condition
+			 * on TxDESC interrupt ('descriptor interrupt') */
 			break;
 		}
 
@@ -1927,7 +1931,7 @@
 		pci_unmap_single(sis_priv->pci_dev,
 			sis_priv->tx_ring[entry].bufptr, skb->len,
 			PCI_DMA_TODEVICE);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		sis_priv->tx_skbuff[entry] = NULL;
 		sis_priv->tx_ring[entry].bufptr = 0;
 		sis_priv->tx_ring[entry].cmdsts = 0;
@@ -2122,6 +2126,68 @@
 	wol->supported = (WAKE_PHY | WAKE_MAGIC);
 }
 
+static int sis900_get_eeprom_len(struct net_device *dev)
+{
+	struct sis900_private *sis_priv = netdev_priv(dev);
+
+	return sis_priv->eeprom_size;
+}
+
+static int sis900_read_eeprom(struct net_device *net_dev, u8 *buf)
+{
+	struct sis900_private *sis_priv = netdev_priv(net_dev);
+	void __iomem *ioaddr = sis_priv->ioaddr;
+	int wait, ret = -EAGAIN;
+	u16 signature;
+	u16 *ebuf = (u16 *)buf;
+	int i;
+
+	if (sis_priv->chipset_rev == SIS96x_900_REV) {
+		sw32(mear, EEREQ);
+		for (wait = 0; wait < 2000; wait++) {
+			if (sr32(mear) & EEGNT) {
+				/* read 16 bits, and index by 16 bits */
+				for (i = 0; i < sis_priv->eeprom_size / 2; i++)
+					ebuf[i] = (u16)read_eeprom(ioaddr, i);
+				ret = 0;
+				break;
+			}
+			udelay(1);
+		}
+		sw32(mear, EEDONE);
+	} else {
+		signature = (u16)read_eeprom(ioaddr, EEPROMSignature);
+		if (signature != 0xffff && signature != 0x0000) {
+			/* read 16 bits, and index by 16 bits */
+			for (i = 0; i < sis_priv->eeprom_size / 2; i++)
+				ebuf[i] = (u16)read_eeprom(ioaddr, i);
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+#define SIS900_EEPROM_MAGIC	0xBABE
+static int sis900_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct sis900_private *sis_priv = netdev_priv(dev);
+	u8 *eebuf;
+	int res;
+
+	eebuf = kmalloc(sis_priv->eeprom_size, GFP_KERNEL);
+	if (!eebuf)
+		return -ENOMEM;
+
+	eeprom->magic = SIS900_EEPROM_MAGIC;
+	spin_lock_irq(&sis_priv->lock);
+	res = sis900_read_eeprom(dev, eebuf);
+	spin_unlock_irq(&sis_priv->lock);
+	if (!res)
+		memcpy(data, eebuf + eeprom->offset, eeprom->len);
+	kfree(eebuf);
+	return res;
+}
+
 static const struct ethtool_ops sis900_ethtool_ops = {
 	.get_drvinfo 	= sis900_get_drvinfo,
 	.get_msglevel	= sis900_get_msglevel,
@@ -2132,6 +2198,8 @@
 	.set_wol	= sis900_set_wol,
 	.get_link_ksettings = sis900_get_link_ksettings,
 	.set_link_ksettings = sis900_set_link_ksettings,
+	.get_eeprom_len = sis900_get_eeprom_len,
+	.get_eeprom = sis900_get_eeprom,
 };
 
 /**
@@ -2473,7 +2541,7 @@
 	sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
 
 	/* Enable all known interrupts by setting the interrupt mask. */
-	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+	sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
 	sw32(cr, RxENA | sr32(cr));
 	sw32(ier, IE);
 

diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 3588202..9e1c375 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Western Digital/SMC network device configuration
 #
@@ -27,7 +28,7 @@
 	  option if you have a DELL laptop with the docking station, or
 	  another SMC9192/9194 based chipset.  Say Y if you want it compiled
 	  into the kernel, and read the file
-	  <file:Documentation/networking/smc9.txt>.
+	  <file:Documentation/networking/device_drivers/smsc/smc9.txt>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called smc9194.
@@ -43,12 +44,12 @@
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
 	  compiled into the kernel, and read the file
-	  <file:Documentation/networking/smc9.txt>.
+	  <file:Documentation/networking/device_drivers/smsc/smc9.txt>.
 
 	  This driver is also available as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want).
 	  The module will be called smc91x.  If you want to compile it as a
-	  module, say M here and read <file:Documentation/kbuild/modules.txt>.
+	  module, say M here and read <file:Documentation/kbuild/modules.rst>.
 
 config PCMCIA_SMC91C92
 	tristate "SMC 91Cxx PCMCIA support"
@@ -85,7 +86,7 @@
 
 	  This driver is also available as a module. The module will be
 	  called smc911x.  If you want to compile it as a module, say M
-	  here and read <file:Documentation/kbuild/modules.txt>
+	  here and read <file:Documentation/kbuild/modules.rst>
 
 config SMSC911X
 	tristate "SMSC LAN911x/LAN921x families embedded ethernet support"
@@ -120,6 +121,6 @@
 
 	  This driver is also available as a module. The module will be
 	  called smsc9420.  If you want to compile it as a module, say M
-	  here and read <file:Documentation/kbuild/modules.txt>
+	  here and read <file:Documentation/kbuild/modules.rst>
 
 endif # NET_VENDOR_SMSC

diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 15c62c1..be47d86 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c

@@ -1037,7 +1037,7 @@
 		skb = ep->tx_skbuff[entry];
 		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
 				 skb->len, PCI_DMA_TODEVICE);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		ep->tx_skbuff[entry] = NULL;
 	}
 

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index b1b53f6..8d88e40 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * smc911x.c
  * This is a driver for SMSC's LAN911{5,6,7,8} single-chip Ethernet devices.
@@ -6,19 +7,6 @@
  *	   Derived from the unified SMC91x driver by Nicolas Pitre
  *	   and the smsc911x.c reference driver by SMSC
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Arguments:
  *	 watchdog  = TX watchdog timeout
  *	 tx_fifo_kb = Size of TX FIFO in KB
@@ -513,7 +501,8 @@
  * now, or set the card to generates an interrupt when ready
  * for the packet.
  */
-static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	unsigned int free;
@@ -723,6 +712,7 @@
 					/* Found an external PHY */
 					break;
 			}
+			/* Else, fall through */
 		default:
 			/* Internal media only */
 			SMC_GET_PHY_ID1(lp, 1, id1);
@@ -1187,7 +1177,7 @@
 
 	DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n");
 	BUG_ON(skb == NULL);
-	dma_unmap_single(NULL, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE);
+	dma_unmap_single(lp->dev, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE);
 	netif_trans_update(dev);
 	dev_kfree_skb_irq(skb);
 	lp->current_tx_skb = NULL;
@@ -1218,7 +1208,7 @@
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
 	DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n");
-	dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE);
+	dma_unmap_single(lp->dev, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE);
 	BUG_ON(skb == NULL);
 	lp->current_rx_skb = NULL;
 	PRINT_PKT(skb->data, skb->len);

diff --git a/drivers/net/ethernet/smsc/smc911x.h b/drivers/net/ethernet/smsc/smc911x.h
index fa528ea..d4edcc0 100644
--- a/drivers/net/ethernet/smsc/smc911x.h
+++ b/drivers/net/ethernet/smsc/smc911x.h

@@ -1,21 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*------------------------------------------------------------------------
  . smc911x.h - macros for SMSC's LAN911{5,6,7,8} single-chip Ethernet device.
  .
  . Copyright (C) 2005 Sensoria Corp.
  . Derived from the unified SMC91x driver by Nicolas Pitre
  .
- . This program is free software; you can redistribute it and/or modify
- . it under the terms of the GNU General Public License as published by
- . the Free Software Foundation; either version 2 of the License, or
- . (at your option) any later version.
- .
- . This program is distributed in the hope that it will be useful,
- . but WITHOUT ANY WARRANTY; without even the implied warranty of
- . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- . GNU General Public License for more details.
- .
- . You should have received a copy of the GNU General Public License
- . along with this program; if not, see <http://www.gnu.org/licenses/>.
  .
  . Information contained in this file was obtained from the LAN9118
  . manual from SMC.  To get a copy, if you really want one, you can find

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index b944828..3a67611 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * smc91x.c
  * This is a driver for SMSC's 91C9x/91C1xx single-chip Ethernet devices.
@@ -8,19 +9,6 @@
  * Copyright (C) 2003 Monta Vista Software, Inc.
  *	Unified SMC91x driver by Nicolas Pitre
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * Arguments:
  * 	io	= for the base address
  *	irq	= for the IRQ
@@ -390,8 +378,7 @@
 	pending_skb = lp->pending_tx_skb;
 	lp->pending_tx_skb = NULL;
 	spin_unlock_irq(&lp->lock);
-	if (pending_skb)
-		dev_kfree_skb(pending_skb);
+	dev_kfree_skb(pending_skb);
 
 	/* and tell the card to stay away from that nasty outside world */
 	SMC_SELECT_BANK(lp, 0);
@@ -638,7 +625,8 @@
  * now, or set the card to generates an interrupt when ready
  * for the packet.
  */
-static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	void __iomem *ioaddr = lp->base;
@@ -2446,8 +2434,7 @@
 
 static int smc_drv_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (ndev) {
 		if (netif_running(ndev)) {

diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
index a273522..387539a 100644
--- a/drivers/net/ethernet/smsc/smc91x.h
+++ b/drivers/net/ethernet/smsc/smc91x.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*------------------------------------------------------------------------
  . smc91x.h - macros for SMSC's 91C9x/91C1xx single-chip Ethernet device.
  .
@@ -7,18 +8,6 @@
  . Copyright (C) 2003 Monta Vista Software, Inc.
  .	Unified SMC91x driver by Nicolas Pitre
  .
- . This program is free software; you can redistribute it and/or modify
- . it under the terms of the GNU General Public License as published by
- . the Free Software Foundation; either version 2 of the License, or
- . (at your option) any later version.
- .
- . This program is distributed in the hope that it will be useful,
- . but WITHOUT ANY WARRANTY; without even the implied warranty of
- . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- . GNU General Public License for more details.
- .
- . You should have received a copy of the GNU General Public License
- . along with this program; if not, see <http://www.gnu.org/licenses/>.
  .
  . Information contained in this file was obtained from the LAN91C111
  . manual from SMC.  To get a copy, if you really want one, you can find

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index f0afb88..38068fc 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /***************************************************************************
  *
  * Copyright (C) 2004-2008 SMSC
  * Copyright (C) 2005-2008 ARM
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  ***************************************************************************
  * Rewritten, heavily based on smsc911x simple driver by SMSC.
  * Partly uses io macros from smc91x.c by Nicolas Pitre
@@ -26,7 +14,6 @@
  *   LAN9210, LAN9211
  *   LAN9220, LAN9221
  *   LAN89218,LAN9250
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1048,10 +1035,10 @@
 
 	phy_attached_info(phydev);
 
+	phy_set_max_speed(phydev, SPEED_100);
+
 	/* mask with MAC supported features */
-	phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause);
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	pdata->last_duplex = -1;
 	pdata->last_carrier = -1;
@@ -1786,7 +1773,8 @@
 }
 
 /* Entry point for transmitting a packet */
-static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
 	unsigned int freespace;

diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 8d75508..09b4638 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /***************************************************************************
  *
  * Copyright (C) 2004-2008 SMSC
  * Copyright (C) 2005-2008 ARM
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  ***************************************************************************/
 #ifndef __SMSC911X_H__
 #define __SMSC911X_H__
@@ -67,7 +55,7 @@
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SMSC_ASSERT_MAC_LOCK(pdata) \
-		WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock))
+		lockdep_assert_held(&pdata->mac_lock)
 #else
 #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0)
 #endif				/* CONFIG_DEBUG_SPINLOCK */

diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 2fa3c1d..a6962a4 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
  /***************************************************************************
  *
  * Copyright (C) 2007,2008  SMSC
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  ***************************************************************************
  */
 
@@ -1135,10 +1123,10 @@
 		return PTR_ERR(phydev);
 	}
 
+	phy_set_max_speed(phydev, SPEED_100);
+
 	/* mask with MAC supported features */
-	phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause);
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	phy_attached_info(phydev);
 

diff --git a/drivers/net/ethernet/smsc/smsc9420.h b/drivers/net/ethernet/smsc/smsc9420.h
index c63c763..409e82b 100644
--- a/drivers/net/ethernet/smsc/smsc9420.h
+++ b/drivers/net/ethernet/smsc/smsc9420.h

@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
  /***************************************************************************
  *
  * Copyright (C) 2007,2008  SMSC
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  ***************************************************************************
  */
 

diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig
index b80048c..95e99ba 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config NET_VENDOR_SOCIONEXT
 	bool "Socionext ethernet drivers"
 	default y
@@ -25,6 +26,7 @@
 	tristate "Socionext NETSEC ethernet support"
 	depends on (ARCH_SYNQUACER || COMPILE_TEST) && OF
 	select PHYLIB
+	select PAGE_POOL
 	select MII
 	---help---
 	  Enable to add support for the SocioNext NetSec Gigabit Ethernet

diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index d2caeb9..f9e6744 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c

@@ -9,8 +9,12 @@
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 
 #include <net/tcp.h>
+#include <net/page_pool.h>
 #include <net/ip6_checksum.h>
 
 #define NETSEC_REG_SOFT_RST			0x104
@@ -234,33 +238,59 @@
 
 #define DESC_NUM	256
 
+#define NETSEC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+			       NET_IP_ALIGN)
+#define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 #define DESC_SZ	sizeof(struct netsec_de)
 
 #define NETSEC_F_NETSEC_VER_MAJOR_NUM(x)	((x) & 0xffff0000)
 
+#define NETSEC_XDP_PASS          0
+#define NETSEC_XDP_CONSUMED      BIT(0)
+#define NETSEC_XDP_TX            BIT(1)
+#define NETSEC_XDP_REDIR         BIT(2)
+#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
+
 enum ring_id {
 	NETSEC_RING_TX = 0,
 	NETSEC_RING_RX
 };
 
+enum buf_type {
+	TYPE_NETSEC_SKB = 0,
+	TYPE_NETSEC_XDP_TX,
+	TYPE_NETSEC_XDP_NDO,
+};
+
 struct netsec_desc {
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
 	dma_addr_t dma_addr;
 	void *addr;
 	u16 len;
+	u8 buf_type;
 };
 
 struct netsec_desc_ring {
 	dma_addr_t desc_dma;
 	struct netsec_desc *desc;
 	void *vaddr;
-	u16 pkt_cnt;
 	u16 head, tail;
+	u16 xdp_xmit; /* netsec_xdp_xmit packets */
+	struct page_pool *page_pool;
+	struct xdp_rxq_info xdp_rxq;
+	spinlock_t lock; /* XDP tx queue locking */
 };
 
 struct netsec_priv {
 	struct netsec_desc_ring desc_ring[NETSEC_RING_MAX];
 	struct ethtool_coalesce et_coalesce;
+	struct bpf_prog *xdp_prog;
 	spinlock_t reglock; /* protect reg access */
 	struct napi_struct napi;
 	phy_interface_t phy_interface;
@@ -274,6 +304,7 @@
 	struct clk *clk;
 	u32 msg_enable;
 	u32 freq;
+	u32 phy_addr;
 	bool rx_cksum_offload_flag;
 };
 
@@ -431,9 +462,12 @@
 	return 0;
 }
 
+static int netsec_phy_read(struct mii_bus *bus, int phy_addr, int reg_addr);
+
 static int netsec_phy_write(struct mii_bus *bus,
 			    int phy_addr, int reg, u16 val)
 {
+	int status;
 	struct netsec_priv *priv = bus->priv;
 
 	if (netsec_mac_write(priv, GMAC_REG_GDR, val))
@@ -446,8 +480,19 @@
 			      GMAC_REG_SHIFT_CR_GAR)))
 		return -ETIMEDOUT;
 
-	return netsec_mac_wait_while_busy(priv, GMAC_REG_GAR,
-					  NETSEC_GMAC_GAR_REG_GB);
+	status = netsec_mac_wait_while_busy(priv, GMAC_REG_GAR,
+					    NETSEC_GMAC_GAR_REG_GB);
+
+	/* Developerbox implements RTL8211E PHY and there is
+	 * a compatibility problem with F_GMAC4.
+	 * RTL8211E expects MDC clock must be kept toggling for several
+	 * clock cycle with MDIO high before entering the IDLE state.
+	 * To meet this requirement, netsec driver needs to issue dummy
+	 * read(e.g. read PHYID1(offset 0x2) register) right after write.
+	 */
+	netsec_phy_read(bus, phy_addr, MII_PHYSID1);
+
+	return status;
 }
 
 static int netsec_phy_read(struct mii_bus *bus, int phy_addr, int reg_addr)
@@ -556,34 +601,10 @@
 
 /************* NETDEV_OPS FOLLOW *************/
 
-static struct sk_buff *netsec_alloc_skb(struct netsec_priv *priv,
-					struct netsec_desc *desc)
-{
-	struct sk_buff *skb;
-
-	if (device_get_dma_attr(priv->dev) == DEV_DMA_COHERENT) {
-		skb = netdev_alloc_skb_ip_align(priv->ndev, desc->len);
-	} else {
-		desc->len = L1_CACHE_ALIGN(desc->len);
-		skb = netdev_alloc_skb(priv->ndev, desc->len);
-	}
-	if (!skb)
-		return NULL;
-
-	desc->addr = skb->data;
-	desc->dma_addr = dma_map_single(priv->dev, desc->addr, desc->len,
-					DMA_FROM_DEVICE);
-	if (dma_mapping_error(priv->dev, desc->dma_addr)) {
-		dev_kfree_skb_any(skb);
-		return NULL;
-	}
-	return skb;
-}
 
 static void netsec_set_rx_de(struct netsec_priv *priv,
 			     struct netsec_desc_ring *dring, u16 idx,
-			     const struct netsec_desc *desc,
-			     struct sk_buff *skb)
+			     const struct netsec_desc *desc)
 {
 	struct netsec_de *de = dring->vaddr + DESC_SZ * idx;
 	u32 attr = (1 << NETSEC_RX_PKT_OWN_FIELD) |
@@ -602,219 +623,158 @@
 	dring->desc[idx].dma_addr = desc->dma_addr;
 	dring->desc[idx].addr = desc->addr;
 	dring->desc[idx].len = desc->len;
-	dring->desc[idx].skb = skb;
 }
 
-static struct sk_buff *netsec_get_rx_de(struct netsec_priv *priv,
-					struct netsec_desc_ring *dring,
-					u16 idx,
-					struct netsec_rx_pkt_info *rxpi,
-					struct netsec_desc *desc, u16 *len)
-{
-	struct netsec_de de = {};
-
-	memcpy(&de, dring->vaddr + DESC_SZ * idx, DESC_SZ);
-
-	*len = de.buf_len_info >> 16;
-
-	rxpi->err_flag = (de.attr >> NETSEC_RX_PKT_ER_FIELD) & 1;
-	rxpi->rx_cksum_result = (de.attr >> NETSEC_RX_PKT_CO_FIELD) & 3;
-	rxpi->err_code = (de.attr >> NETSEC_RX_PKT_ERR_FIELD) &
-							NETSEC_RX_PKT_ERR_MASK;
-	*desc = dring->desc[idx];
-	return desc->skb;
-}
-
-static struct sk_buff *netsec_get_rx_pkt_data(struct netsec_priv *priv,
-					      struct netsec_rx_pkt_info *rxpi,
-					      struct netsec_desc *desc,
-					      u16 *len)
-{
-	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	struct sk_buff *tmp_skb, *skb = NULL;
-	struct netsec_desc td;
-	int tail;
-
-	*rxpi = (struct netsec_rx_pkt_info){};
-
-	td.len = priv->ndev->mtu + 22;
-
-	tmp_skb = netsec_alloc_skb(priv, &td);
-
-	tail = dring->tail;
-
-	if (!tmp_skb) {
-		netsec_set_rx_de(priv, dring, tail, &dring->desc[tail],
-				 dring->desc[tail].skb);
-	} else {
-		skb = netsec_get_rx_de(priv, dring, tail, rxpi, desc, len);
-		netsec_set_rx_de(priv, dring, tail, &td, tmp_skb);
-	}
-
-	/* move tail ahead */
-	dring->tail = (dring->tail + 1) % DESC_NUM;
-
-	return skb;
-}
-
-static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
+static bool netsec_clean_tx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
-	unsigned int pkts, bytes;
+	struct netsec_de *entry;
+	int tail = dring->tail;
+	unsigned int bytes;
+	int cnt = 0;
 
-	dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
+	spin_lock(&dring->lock);
 
-	if (dring->pkt_cnt < budget)
-		budget = dring->pkt_cnt;
-
-	pkts = 0;
 	bytes = 0;
+	entry = dring->vaddr + DESC_SZ * tail;
 
-	while (pkts < budget) {
+	while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) &&
+	       cnt < DESC_NUM) {
 		struct netsec_desc *desc;
-		struct netsec_de *entry;
-		int tail, eop;
+		int eop;
 
-		tail = dring->tail;
+		desc = &dring->desc[tail];
+		eop = (entry->attr >> NETSEC_TX_LAST) & 1;
+		dma_rmb();
 
+		/* if buf_type is either TYPE_NETSEC_SKB or
+		 * TYPE_NETSEC_XDP_NDO we mapped it
+		 */
+		if (desc->buf_type != TYPE_NETSEC_XDP_TX)
+			dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+					 DMA_TO_DEVICE);
+
+		if (!eop)
+			goto next;
+
+		if (desc->buf_type == TYPE_NETSEC_SKB) {
+			bytes += desc->skb->len;
+			dev_kfree_skb(desc->skb);
+		} else {
+			xdp_return_frame(desc->xdpf);
+		}
+next:
+		/* clean up so netsec_uninit_pkt_dring() won't free the skb
+		 * again
+		 */
+		*desc = (struct netsec_desc){};
+
+		/* entry->attr is not going to be accessed by the NIC until
+		 * netsec_set_tx_de() is called. No need for a dma_wmb() here
+		 */
+		entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
 		/* move tail ahead */
 		dring->tail = (tail + 1) % DESC_NUM;
 
-		desc = &dring->desc[tail];
+		tail = dring->tail;
 		entry = dring->vaddr + DESC_SZ * tail;
-
-		eop = (entry->attr >> NETSEC_TX_LAST) & 1;
-
-		dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
-				 DMA_TO_DEVICE);
-		if (eop) {
-			pkts++;
-			bytes += desc->skb->len;
-			dev_kfree_skb(desc->skb);
-		}
-		*desc = (struct netsec_desc){};
+		cnt++;
 	}
-	dring->pkt_cnt -= budget;
 
-	priv->ndev->stats.tx_packets += budget;
+	spin_unlock(&dring->lock);
+
+	if (!cnt)
+		return false;
+
+	/* reading the register clears the irq */
+	netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT);
+
+	priv->ndev->stats.tx_packets += cnt;
 	priv->ndev->stats.tx_bytes += bytes;
 
-	netdev_completed_queue(priv->ndev, budget, bytes);
+	netdev_completed_queue(priv->ndev, cnt, bytes);
 
-	return budget;
+	return true;
 }
 
-static int netsec_process_tx(struct netsec_priv *priv, int budget)
+static void netsec_process_tx(struct netsec_priv *priv)
 {
 	struct net_device *ndev = priv->ndev;
-	int new, done = 0;
+	bool cleaned;
 
-	do {
-		new = netsec_clean_tx_dring(priv, budget);
-		done += new;
-		budget -= new;
-	} while (new);
+	cleaned = netsec_clean_tx_dring(priv);
 
-	if (done && netif_queue_stopped(ndev))
+	if (cleaned && netif_queue_stopped(ndev)) {
+		/* Make sure we update the value, anyone stopping the queue
+		 * after this will read the proper consumer idx
+		 */
+		smp_wmb();
 		netif_wake_queue(ndev);
-
-	return done;
+	}
 }
 
-static int netsec_process_rx(struct netsec_priv *priv, int budget)
+static void *netsec_alloc_rx_data(struct netsec_priv *priv,
+				  dma_addr_t *dma_handle, u16 *desc_len)
+
+{
+
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+	enum dma_data_direction dma_dir;
+	struct page *page;
+
+	page = page_pool_dev_alloc_pages(dring->page_pool);
+	if (!page)
+		return NULL;
+
+	/* We allocate the same buffer length for XDP and non-XDP cases.
+	 * page_pool API will map the whole page, skip what's needed for
+	 * network payloads and/or XDP
+	 */
+	*dma_handle = page_pool_get_dma_addr(page) + NETSEC_RXBUF_HEADROOM;
+	/* Make sure the incoming payload fits in the page for XDP and non-XDP
+	 * cases and reserve enough space for headroom + skb_shared_info
+	 */
+	*desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
+	dma_dir = page_pool_get_dma_dir(dring->page_pool);
+	dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
+
+	return page_address(page);
+}
+
+static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	struct net_device *ndev = priv->ndev;
-	struct netsec_rx_pkt_info rx_info;
-	int done = 0;
-	struct netsec_desc desc;
-	struct sk_buff *skb;
-	u16 len;
+	u16 idx = from;
 
-	while (done < budget) {
-		u16 idx = dring->tail;
-		struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
-
-		if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
-			/* reading the register clears the irq */
-			netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT);
-			break;
-		}
-
-		/* This  barrier is needed to keep us from reading
-		 * any other fields out of the netsec_de until we have
-		 * verified the descriptor has been written back
-		 */
-		dma_rmb();
-		done++;
-		skb = netsec_get_rx_pkt_data(priv, &rx_info, &desc, &len);
-		if (unlikely(!skb) || rx_info.err_flag) {
-			netif_err(priv, drv, priv->ndev,
-				  "%s: rx fail err(%d)\n",
-				  __func__, rx_info.err_code);
-			ndev->stats.rx_dropped++;
-			continue;
-		}
-
-		dma_unmap_single(priv->dev, desc.dma_addr, desc.len,
-				 DMA_FROM_DEVICE);
-		skb_put(skb, len);
-		skb->protocol = eth_type_trans(skb, priv->ndev);
-
-		if (priv->rx_cksum_offload_flag &&
-		    rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
-			ndev->stats.rx_packets++;
-			ndev->stats.rx_bytes += len;
-		}
+	while (num) {
+		netsec_set_rx_de(priv, dring, idx, &dring->desc[idx]);
+		idx++;
+		if (idx >= DESC_NUM)
+			idx = 0;
+		num--;
 	}
-
-	return done;
 }
 
-static int netsec_napi_poll(struct napi_struct *napi, int budget)
+static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts)
 {
-	struct netsec_priv *priv;
-	int tx, rx, done, todo;
+	if (likely(pkts))
+		netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts);
+}
 
-	priv = container_of(napi, struct netsec_priv, napi);
+static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res,
+				   u16 pkts)
+{
+	if (xdp_res & NETSEC_XDP_REDIR)
+		xdp_do_flush_map();
 
-	todo = budget;
-	do {
-		if (!todo)
-			break;
-
-		tx = netsec_process_tx(priv, todo);
-		todo -= tx;
-
-		if (!todo)
-			break;
-
-		rx = netsec_process_rx(priv, todo);
-		todo -= rx;
-	} while (rx || tx);
-
-	done = budget - todo;
-
-	if (done < budget && napi_complete_done(napi, done)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&priv->reglock, flags);
-		netsec_write(priv, NETSEC_REG_INTEN_SET,
-			     NETSEC_IRQ_RX | NETSEC_IRQ_TX);
-		spin_unlock_irqrestore(&priv->reglock, flags);
-	}
-
-	return done;
+	if (xdp_res & NETSEC_XDP_TX)
+		netsec_xdp_ring_tx_db(priv, pkts);
 }
 
 static void netsec_set_tx_de(struct netsec_priv *priv,
 			     struct netsec_desc_ring *dring,
 			     const struct netsec_tx_pkt_ctrl *tx_ctrl,
-			     const struct netsec_desc *desc,
-			     struct sk_buff *skb)
+			     const struct netsec_desc *desc, void *buf)
 {
 	int idx = dring->head;
 	struct netsec_de *de;
@@ -837,15 +797,317 @@
 	de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
 	de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
 	de->attr = attr;
-	dma_wmb();
 
 	dring->desc[idx] = *desc;
-	dring->desc[idx].skb = skb;
+	if (desc->buf_type == TYPE_NETSEC_SKB)
+		dring->desc[idx].skb = buf;
+	else if (desc->buf_type == TYPE_NETSEC_XDP_TX ||
+		 desc->buf_type == TYPE_NETSEC_XDP_NDO)
+		dring->desc[idx].xdpf = buf;
 
 	/* move head ahead */
 	dring->head = (dring->head + 1) % DESC_NUM;
 }
 
+/* The current driver only supports 1 Txq, this should run under spin_lock() */
+static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
+				struct xdp_frame *xdpf, bool is_ndo)
+
+{
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	struct page *page = virt_to_page(xdpf->data);
+	struct netsec_tx_pkt_ctrl tx_ctrl = {};
+	struct netsec_desc tx_desc;
+	dma_addr_t dma_handle;
+	u16 filled;
+
+	if (tx_ring->head >= tx_ring->tail)
+		filled = tx_ring->head - tx_ring->tail;
+	else
+		filled = tx_ring->head + DESC_NUM - tx_ring->tail;
+
+	if (DESC_NUM - filled <= 1)
+		return NETSEC_XDP_CONSUMED;
+
+	if (is_ndo) {
+		/* this is for ndo_xdp_xmit, the buffer needs mapping before
+		 * sending
+		 */
+		dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
+					    DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, dma_handle))
+			return NETSEC_XDP_CONSUMED;
+		tx_desc.buf_type = TYPE_NETSEC_XDP_NDO;
+	} else {
+		/* This is the device Rx buffer from page_pool. No need to remap
+		 * just sync and send it
+		 */
+		struct netsec_desc_ring *rx_ring =
+			&priv->desc_ring[NETSEC_RING_RX];
+		enum dma_data_direction dma_dir =
+			page_pool_get_dma_dir(rx_ring->page_pool);
+
+		dma_handle = page_pool_get_dma_addr(page) +
+			NETSEC_RXBUF_HEADROOM;
+		dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
+					   dma_dir);
+		tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
+	}
+
+	tx_desc.dma_addr = dma_handle;
+	tx_desc.addr = xdpf->data;
+	tx_desc.len = xdpf->len;
+
+	netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
+
+	return NETSEC_XDP_TX;
+}
+
+static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
+{
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+	u32 ret;
+
+	if (unlikely(!xdpf))
+		return NETSEC_XDP_CONSUMED;
+
+	spin_lock(&tx_ring->lock);
+	ret = netsec_xdp_queue_one(priv, xdpf, false);
+	spin_unlock(&tx_ring->lock);
+
+	return ret;
+}
+
+static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
+			  struct xdp_buff *xdp)
+{
+	u32 ret = NETSEC_XDP_PASS;
+	int err;
+	u32 act;
+
+	act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+		ret = NETSEC_XDP_PASS;
+		break;
+	case XDP_TX:
+		ret = netsec_xdp_xmit_back(priv, xdp);
+		if (ret != NETSEC_XDP_TX)
+			xdp_return_buff(xdp);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(priv->ndev, xdp, prog);
+		if (!err) {
+			ret = NETSEC_XDP_REDIR;
+		} else {
+			ret = NETSEC_XDP_CONSUMED;
+			xdp_return_buff(xdp);
+		}
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(priv->ndev, prog, act);
+		/* fall through -- handle aborts by dropping packet */
+	case XDP_DROP:
+		ret = NETSEC_XDP_CONSUMED;
+		xdp_return_buff(xdp);
+		break;
+	}
+
+	return ret;
+}
+
+static int netsec_process_rx(struct netsec_priv *priv, int budget)
+{
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+	struct net_device *ndev = priv->ndev;
+	struct netsec_rx_pkt_info rx_info;
+	enum dma_data_direction dma_dir;
+	struct bpf_prog *xdp_prog;
+	struct sk_buff *skb = NULL;
+	u16 xdp_xmit = 0;
+	u32 xdp_act = 0;
+	int done = 0;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(priv->xdp_prog);
+	dma_dir = page_pool_get_dma_dir(dring->page_pool);
+
+	while (done < budget) {
+		u16 idx = dring->tail;
+		struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
+		struct netsec_desc *desc = &dring->desc[idx];
+		struct page *page = virt_to_page(desc->addr);
+		u32 xdp_result = XDP_PASS;
+		u16 pkt_len, desc_len;
+		dma_addr_t dma_handle;
+		struct xdp_buff xdp;
+		void *buf_addr;
+
+		if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
+			/* reading the register clears the irq */
+			netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT);
+			break;
+		}
+
+		/* This  barrier is needed to keep us from reading
+		 * any other fields out of the netsec_de until we have
+		 * verified the descriptor has been written back
+		 */
+		dma_rmb();
+		done++;
+
+		pkt_len = de->buf_len_info >> 16;
+		rx_info.err_code = (de->attr >> NETSEC_RX_PKT_ERR_FIELD) &
+			NETSEC_RX_PKT_ERR_MASK;
+		rx_info.err_flag = (de->attr >> NETSEC_RX_PKT_ER_FIELD) & 1;
+		if (rx_info.err_flag) {
+			netif_err(priv, drv, priv->ndev,
+				  "%s: rx fail err(%d)\n", __func__,
+				  rx_info.err_code);
+			ndev->stats.rx_dropped++;
+			dring->tail = (dring->tail + 1) % DESC_NUM;
+			/* reuse buffer page frag */
+			netsec_rx_fill(priv, idx, 1);
+			continue;
+		}
+		rx_info.rx_cksum_result =
+			(de->attr >> NETSEC_RX_PKT_CO_FIELD) & 3;
+
+		/* allocate a fresh buffer and map it to the hardware.
+		 * This will eventually replace the old buffer in the hardware
+		 */
+		buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+
+		if (unlikely(!buf_addr))
+			break;
+
+		dma_sync_single_for_cpu(priv->dev, desc->dma_addr, pkt_len,
+					dma_dir);
+		prefetch(desc->addr);
+
+		xdp.data_hard_start = desc->addr;
+		xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
+		xdp_set_data_meta_invalid(&xdp);
+		xdp.data_end = xdp.data + pkt_len;
+		xdp.rxq = &dring->xdp_rxq;
+
+		if (xdp_prog) {
+			xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
+			if (xdp_result != NETSEC_XDP_PASS) {
+				xdp_act |= xdp_result;
+				if (xdp_result == NETSEC_XDP_TX)
+					xdp_xmit++;
+				goto next;
+			}
+		}
+		skb = build_skb(desc->addr, desc->len + NETSEC_RX_BUF_NON_DATA);
+
+		if (unlikely(!skb)) {
+			/* If skb fails recycle_direct will either unmap and
+			 * free the page or refill the cache depending on the
+			 * cache state. Since we paid the allocation cost if
+			 * building an skb fails try to put the page into cache
+			 */
+			page_pool_recycle_direct(dring->page_pool, page);
+			netif_err(priv, drv, priv->ndev,
+				  "rx failed to build skb\n");
+			break;
+		}
+		page_pool_release_page(dring->page_pool, page);
+
+		skb_reserve(skb, xdp.data - xdp.data_hard_start);
+		skb_put(skb, xdp.data_end - xdp.data);
+		skb->protocol = eth_type_trans(skb, priv->ndev);
+
+		if (priv->rx_cksum_offload_flag &&
+		    rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+next:
+		if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
+		    xdp_result & NETSEC_XDP_RX_OK) {
+			ndev->stats.rx_packets++;
+			ndev->stats.rx_bytes += xdp.data_end - xdp.data;
+		}
+
+		/* Update the descriptor with fresh buffers */
+		desc->len = desc_len;
+		desc->dma_addr = dma_handle;
+		desc->addr = buf_addr;
+
+		netsec_rx_fill(priv, idx, 1);
+		dring->tail = (dring->tail + 1) % DESC_NUM;
+	}
+	netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
+
+	rcu_read_unlock();
+
+	return done;
+}
+
+static int netsec_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct netsec_priv *priv;
+	int done;
+
+	priv = container_of(napi, struct netsec_priv, napi);
+
+	netsec_process_tx(priv);
+	done = netsec_process_rx(priv, budget);
+
+	if (done < budget && napi_complete_done(napi, done)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&priv->reglock, flags);
+		netsec_write(priv, NETSEC_REG_INTEN_SET,
+			     NETSEC_IRQ_RX | NETSEC_IRQ_TX);
+		spin_unlock_irqrestore(&priv->reglock, flags);
+	}
+
+	return done;
+}
+
+
+static int netsec_desc_used(struct netsec_desc_ring *dring)
+{
+	int used;
+
+	if (dring->head >= dring->tail)
+		used = dring->head - dring->tail;
+	else
+		used = dring->head + DESC_NUM - dring->tail;
+
+	return used;
+}
+
+static int netsec_check_stop_tx(struct netsec_priv *priv, int used)
+{
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+
+	/* keep tail from touching the queue */
+	if (DESC_NUM - used < 2) {
+		netif_stop_queue(priv->ndev);
+
+		/* Make sure we read the updated value in case
+		 * descriptors got freed
+		 */
+		smp_rmb();
+
+		used = netsec_desc_used(dring);
+		if (DESC_NUM - used < 2)
+			return NETDEV_TX_BUSY;
+
+		netif_wake_queue(priv->ndev);
+	}
+
+	return 0;
+}
+
 static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
 					    struct net_device *ndev)
 {
@@ -856,16 +1118,12 @@
 	u16 tso_seg_len = 0;
 	int filled;
 
-	/* differentiate between full/emtpy ring */
-	if (dring->head >= dring->tail)
-		filled = dring->head - dring->tail;
-	else
-		filled = dring->head + DESC_NUM - dring->tail;
-
-	if (DESC_NUM - filled < 2) { /* if less than 2 available */
-		netif_err(priv, drv, priv->ndev, "%s: TxQFull!\n", __func__);
-		netif_stop_queue(priv->ndev);
-		dma_wmb();
+	spin_lock_bh(&dring->lock);
+	filled = netsec_desc_used(dring);
+	if (netsec_check_stop_tx(priv, filled)) {
+		spin_unlock_bh(&dring->lock);
+		net_warn_ratelimited("%s %s Tx queue full\n",
+				     dev_name(priv->dev), ndev->name);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -896,6 +1154,7 @@
 	tx_desc.dma_addr = dma_map_single(priv->dev, skb->data,
 					  skb_headlen(skb), DMA_TO_DEVICE);
 	if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) {
+		spin_unlock_bh(&dring->lock);
 		netif_err(priv, drv, priv->ndev,
 			  "%s: DMA mapping failed\n", __func__);
 		ndev->stats.tx_dropped++;
@@ -904,11 +1163,13 @@
 	}
 	tx_desc.addr = skb->data;
 	tx_desc.len = skb_headlen(skb);
+	tx_desc.buf_type = TYPE_NETSEC_SKB;
 
 	skb_tx_timestamp(skb);
 	netdev_sent_queue(priv->ndev, skb->len);
 
 	netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb);
+	spin_unlock_bh(&dring->lock);
 	netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */
 
 	return NETDEV_TX_OK;
@@ -922,16 +1183,27 @@
 
 	if (!dring->vaddr || !dring->desc)
 		return;
-
 	for (idx = 0; idx < DESC_NUM; idx++) {
 		desc = &dring->desc[idx];
 		if (!desc->addr)
 			continue;
 
-		dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
-				 id == NETSEC_RING_RX ? DMA_FROM_DEVICE :
-							      DMA_TO_DEVICE);
-		dev_kfree_skb(desc->skb);
+		if (id == NETSEC_RING_RX) {
+			struct page *page = virt_to_page(desc->addr);
+
+			page_pool_put_page(dring->page_pool, page, false);
+		} else if (id == NETSEC_RING_TX) {
+			dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+					 DMA_TO_DEVICE);
+			dev_kfree_skb(desc->skb);
+		}
+	}
+
+	/* Rx is currently using page_pool */
+	if (id == NETSEC_RING_RX) {
+		if (xdp_rxq_info_is_reg(&dring->xdp_rxq))
+			xdp_rxq_info_unreg(&dring->xdp_rxq);
+		page_pool_destroy(dring->page_pool);
 	}
 
 	memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM);
@@ -939,7 +1211,6 @@
 
 	dring->head = 0;
 	dring->tail = 0;
-	dring->pkt_cnt = 0;
 
 	if (id == NETSEC_RING_TX)
 		netdev_reset_queue(priv->ndev);
@@ -962,47 +1233,95 @@
 static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[id];
-	int ret = 0;
 
-	dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM,
-					   &dring->desc_dma, GFP_KERNEL);
-	if (!dring->vaddr) {
-		ret = -ENOMEM;
+	dring->vaddr = dma_alloc_coherent(priv->dev, DESC_SZ * DESC_NUM,
+					  &dring->desc_dma, GFP_KERNEL);
+	if (!dring->vaddr)
 		goto err;
-	}
 
 	dring->desc = kcalloc(DESC_NUM, sizeof(*dring->desc), GFP_KERNEL);
-	if (!dring->desc) {
-		ret = -ENOMEM;
+	if (!dring->desc)
 		goto err;
-	}
 
 	return 0;
 err:
 	netsec_free_dring(priv, id);
 
-	return ret;
+	return -ENOMEM;
+}
+
+static void netsec_setup_tx_dring(struct netsec_priv *priv)
+{
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+	int i;
+
+	for (i = 0; i < DESC_NUM; i++) {
+		struct netsec_de *de;
+
+		de = dring->vaddr + (DESC_SZ * i);
+		/* de->attr is not going to be accessed by the NIC
+		 * until netsec_set_tx_de() is called.
+		 * No need for a dma_wmb() here
+		 */
+		de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
+	}
 }
 
 static int netsec_setup_rx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	struct netsec_desc desc;
-	struct sk_buff *skb;
-	int n;
+	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+	struct page_pool_params pp_params = { 0 };
+	int i, err;
 
-	desc.len = priv->ndev->mtu + 22;
+	pp_params.order = 0;
+	/* internal DMA mapping in page_pool */
+	pp_params.flags = PP_FLAG_DMA_MAP;
+	pp_params.pool_size = DESC_NUM;
+	pp_params.nid = cpu_to_node(0);
+	pp_params.dev = priv->dev;
+	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 
-	for (n = 0; n < DESC_NUM; n++) {
-		skb = netsec_alloc_skb(priv, &desc);
-		if (!skb) {
-			netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
-			return -ENOMEM;
-		}
-		netsec_set_rx_de(priv, dring, n, &desc, skb);
+	dring->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(dring->page_pool)) {
+		err = PTR_ERR(dring->page_pool);
+		dring->page_pool = NULL;
+		goto err_out;
 	}
 
+	err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0);
+	if (err)
+		goto err_out;
+
+	err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_POOL,
+					 dring->page_pool);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < DESC_NUM; i++) {
+		struct netsec_desc *desc = &dring->desc[i];
+		dma_addr_t dma_handle;
+		void *buf;
+		u16 len;
+
+		buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+
+		if (!buf) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		desc->dma_addr = dma_handle;
+		desc->addr = buf;
+		desc->len = len;
+	}
+
+	netsec_rx_fill(priv, 0, DESC_NUM);
+
 	return 0;
+
+err_out:
+	netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+	return err;
 }
 
 static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg,
@@ -1276,6 +1595,7 @@
 
 	pm_runtime_get_sync(priv->dev);
 
+	netsec_setup_tx_dring(priv);
 	ret = netsec_setup_rx_dring(priv);
 	if (ret) {
 		netif_err(priv, probe, priv->ndev,
@@ -1346,11 +1666,11 @@
 	netsec_uninit_pkt_dring(priv, NETSEC_RING_TX);
 	netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
 
-	ret = netsec_reset_hardware(priv, false);
-
 	phy_stop(ndev->phydev);
 	phy_disconnect(ndev->phydev);
 
+	ret = netsec_reset_hardware(priv, false);
+
 	pm_runtime_put_sync(priv->dev);
 
 	return ret;
@@ -1360,6 +1680,9 @@
 {
 	struct netsec_priv *priv = netdev_priv(ndev);
 	int ret;
+	u16 data;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(DESC_NUM);
 
 	ret = netsec_alloc_dring(priv, NETSEC_RING_TX);
 	if (ret)
@@ -1369,10 +1692,18 @@
 	if (ret)
 		goto err1;
 
+	/* set phy power down */
+	data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR) |
+		BMCR_PDOWN;
+	netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data);
+
 	ret = netsec_reset_hardware(priv, true);
 	if (ret)
 		goto err2;
 
+	spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock);
+	spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock);
+
 	return 0;
 err2:
 	netsec_free_dring(priv, NETSEC_RING_RX);
@@ -1405,6 +1736,81 @@
 	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
 }
 
+static int netsec_xdp_xmit(struct net_device *ndev, int n,
+			   struct xdp_frame **frames, u32 flags)
+{
+	struct netsec_priv *priv = netdev_priv(ndev);
+	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+	int drops = 0;
+	int i;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	spin_lock(&tx_ring->lock);
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = netsec_xdp_queue_one(priv, xdpf, true);
+		if (err != NETSEC_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		} else {
+			tx_ring->xdp_xmit++;
+		}
+	}
+	spin_unlock(&tx_ring->lock);
+
+	if (unlikely(flags & XDP_XMIT_FLUSH)) {
+		netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit);
+		tx_ring->xdp_xmit = 0;
+	}
+
+	return n - drops;
+}
+
+static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog,
+			    struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = priv->ndev;
+	struct bpf_prog *old_prog;
+
+	/* For now just support only the usual MTU sized frames */
+	if (prog && dev->mtu > 1500) {
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+		return -EOPNOTSUPP;
+	}
+
+	if (netif_running(dev))
+		netsec_netdev_stop(dev);
+
+	/* Detach old prog, if any */
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (netif_running(dev))
+		netsec_netdev_open(dev);
+
+	return 0;
+}
+
+static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp)
+{
+	struct netsec_priv *priv = netdev_priv(ndev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return netsec_xdp_setup(priv, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops netsec_netdev_ops = {
 	.ndo_init		= netsec_netdev_init,
 	.ndo_uninit		= netsec_netdev_uninit,
@@ -1415,10 +1821,12 @@
 	.ndo_set_mac_address    = eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= netsec_netdev_ioctl,
+	.ndo_xdp_xmit		= netsec_xdp_xmit,
+	.ndo_bpf		= netsec_xdp,
 };
 
 static int netsec_of_probe(struct platform_device *pdev,
-			   struct netsec_priv *priv)
+			   struct netsec_priv *priv, u32 *phy_addr)
 {
 	priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
 	if (!priv->phy_np) {
@@ -1426,6 +1834,8 @@
 		return -EINVAL;
 	}
 
+	*phy_addr = of_mdio_parse_addr(&pdev->dev, priv->phy_np);
+
 	priv->clk = devm_clk_get(&pdev->dev, NULL); /* get by 'phy_ref_clk' */
 	if (IS_ERR(priv->clk)) {
 		dev_err(&pdev->dev, "phy_ref_clk not found\n");
@@ -1581,7 +1991,7 @@
 			   NETIF_MSG_LINK | NETIF_MSG_PROBE;
 
 	priv->phy_interface = device_get_phy_mode(&pdev->dev);
-	if (priv->phy_interface < 0) {
+	if ((int)priv->phy_interface < 0) {
 		dev_err(&pdev->dev, "missing required property 'phy-mode'\n");
 		ret = -ENODEV;
 		goto free_ndev;
@@ -1626,12 +2036,14 @@
 	}
 
 	if (dev_of_node(&pdev->dev))
-		ret = netsec_of_probe(pdev, priv);
+		ret = netsec_of_probe(pdev, priv, &phy_addr);
 	else
 		ret = netsec_acpi_probe(pdev, priv, &phy_addr);
 	if (ret)
 		goto free_ndev;
 
+	priv->phy_addr = phy_addr;
+
 	if (!priv->freq) {
 		dev_err(&pdev->dev, "missing PHY reference clock frequency\n");
 		ret = -ENODEV;

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index f27d67a..6e984d5 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c

@@ -185,8 +185,8 @@
 				 NETIF_MSG_TX_ERR)
 
 /* Parameter for descriptor */
-#define AVE_NR_TXDESC		32	/* Tx descriptor */
-#define AVE_NR_RXDESC		64	/* Rx descriptor */
+#define AVE_NR_TXDESC		64	/* Tx descriptor */
+#define AVE_NR_RXDESC		256	/* Rx descriptor */
 
 #define AVE_DESC_OFS_CMDSTS	0
 #define AVE_DESC_OFS_ADDRL	4
@@ -262,6 +262,7 @@
 	struct regmap		*regmap;
 	unsigned int		pinmode_mask;
 	unsigned int		pinmode_val;
+	u32			wolopts;
 
 	/* stats */
 	struct ave_stats	stats_rx;
@@ -462,16 +463,7 @@
 	priv->pause_rx   = pause->rx_pause;
 	priv->pause_tx   = pause->tx_pause;
 
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	if (pause->rx_pause)
-		phydev->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (pause->tx_pause)
-		phydev->advertising ^= ADVERTISED_Asym_Pause;
-
-	if (pause->autoneg) {
-		if (netif_running(ndev))
-			phy_start_aneg(phydev);
-	}
+	phy_set_asym_pause(phydev, pause->rx_pause, pause->tx_pause);
 
 	return 0;
 }
@@ -906,11 +898,11 @@
 
 	/* assert reset */
 	writel(AVE_GRR_RXFFR, priv->base + AVE_GRR);
-	usleep_range(40, 50);
+	udelay(50);
 
 	/* negate reset */
 	writel(0, priv->base + AVE_GRR);
-	usleep_range(10, 20);
+	udelay(20);
 
 	/* negate interrupt status */
 	writel(AVE_GI_RXOVF, priv->base + AVE_GISR);
@@ -1127,11 +1119,8 @@
 			rmt_adv |= LPA_PAUSE_CAP;
 		if (phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
-		if (phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
+		lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising);
 		cap = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 		if (cap & FLOW_CTRL_TX)
 			txcr |= AVE_TXCR_FLOCTR;
@@ -1222,14 +1211,17 @@
 
 	priv->phydev = phydev;
 
-	phy_ethtool_get_wol(phydev, &wol);
+	ave_ethtool_get_wol(ndev, &wol);
 	device_set_wakeup_capable(&ndev->dev, !!wol.supported);
 
-	if (!phy_interface_is_rgmii(phydev)) {
-		phydev->supported &= ~PHY_GBIT_FEATURES;
-		phydev->supported |= PHY_BASIC_FEATURES;
-	}
-	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	/* set wol initial state disabled */
+	wol.wolopts = 0;
+	ave_ethtool_set_wol(ndev, &wol);
+
+	if (!phy_interface_is_rgmii(phydev))
+		phy_set_max_speed(phydev, SPEED_100);
+
+	phy_support_asym_pause(phydev);
 
 	phy_attached_info(phydev);
 
@@ -1561,7 +1553,6 @@
 	struct ave_private *priv;
 	struct net_device *ndev;
 	struct device_node *np;
-	struct resource	*res;
 	const void *mac_addr;
 	void __iomem *base;
 	const char *name;
@@ -1575,19 +1566,16 @@
 
 	np = dev->of_node;
 	phy_mode = of_get_phy_mode(np);
-	if (phy_mode < 0) {
+	if ((int)phy_mode < 0) {
 		dev_err(dev, "phy-mode not found\n");
 		return -EINVAL;
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "IRQ not found\n");
+	if (irq < 0)
 		return irq;
-	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
@@ -1607,7 +1595,7 @@
 	ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN);
 
 	mac_addr = of_get_mac_address(np);
-	if (mac_addr)
+	if (!IS_ERR(mac_addr))
 		ether_addr_copy(ndev->dev_addr, mac_addr);
 
 	/* if the mac address is invalid, use random mac address */
@@ -1674,19 +1662,19 @@
 					       "socionext,syscon-phy-mode",
 					       1, 0, &args);
 	if (ret) {
-		netdev_err(ndev, "can't get syscon-phy-mode property\n");
+		dev_err(dev, "can't get syscon-phy-mode property\n");
 		goto out_free_netdev;
 	}
 	priv->regmap = syscon_node_to_regmap(args.np);
 	of_node_put(args.np);
 	if (IS_ERR(priv->regmap)) {
-		netdev_err(ndev, "can't map syscon-phy-mode\n");
+		dev_err(dev, "can't map syscon-phy-mode\n");
 		ret = PTR_ERR(priv->regmap);
 		goto out_free_netdev;
 	}
 	ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
 	if (ret) {
-		netdev_err(ndev, "invalid phy-mode setting\n");
+		dev_err(dev, "invalid phy-mode setting\n");
 		goto out_free_netdev;
 	}
 
@@ -1704,9 +1692,10 @@
 		 pdev->name, pdev->id);
 
 	/* Register as a NAPI supported driver */
-	netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx, priv->rx.ndesc);
+	netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx,
+		       NAPI_POLL_WEIGHT);
 	netif_tx_napi_add(ndev, &priv->napi_tx, ave_napi_poll_tx,
-			  priv->tx.ndesc);
+			  NAPI_POLL_WEIGHT);
 
 	platform_set_drvdata(pdev, ndev);
 
@@ -1749,6 +1738,58 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int ave_suspend(struct device *dev)
+{
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct ave_private *priv = netdev_priv(ndev);
+	int ret = 0;
+
+	if (netif_running(ndev)) {
+		ret = ave_stop(ndev);
+		netif_device_detach(ndev);
+	}
+
+	ave_ethtool_get_wol(ndev, &wol);
+	priv->wolopts = wol.wolopts;
+
+	return ret;
+}
+
+static int ave_resume(struct device *dev)
+{
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct ave_private *priv = netdev_priv(ndev);
+	int ret = 0;
+
+	ave_global_reset(ndev);
+
+	ave_ethtool_get_wol(ndev, &wol);
+	wol.wolopts = priv->wolopts;
+	ave_ethtool_set_wol(ndev, &wol);
+
+	if (ndev->phydev) {
+		ret = phy_resume(ndev->phydev);
+		if (ret)
+			return ret;
+	}
+
+	if (netif_running(ndev)) {
+		ret = ave_open(ndev);
+		netif_device_attach(ndev);
+	}
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(ave_pm_ops, ave_suspend, ave_resume);
+#define AVE_PM_OPS	(&ave_pm_ops)
+#else
+#define AVE_PM_OPS	NULL
+#endif
+
 static int ave_pro4_get_pinmode(struct ave_private *priv,
 				phy_interface_t phy_mode, u32 arg)
 {
@@ -1923,10 +1964,12 @@
 	.remove = ave_remove,
 	.driver	= {
 		.name = "ave",
+		.pm   = AVE_PM_OPS,
 		.of_match_table	= of_ave_match,
 	},
 };
 module_platform_driver(ave_driver);
 
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
 MODULE_DESCRIPTION("Socionext UniPhier AVE ethernet driver");
 MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/ethernet/stmicro/Kconfig b/drivers/net/ethernet/stmicro/Kconfig
index ecd7a5e..39ef863 100644
--- a/drivers/net/ethernet/stmicro/Kconfig
+++ b/drivers/net/ethernet/stmicro/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # STMicroelectronics device configuration
 #

diff --git a/drivers/net/ethernet/stmicro/Makefile b/drivers/net/ethernet/stmicro/Makefile
index 9b3bfdd..72fd1f6 100644
--- a/drivers/net/ethernet/stmicro/Makefile
+++ b/drivers/net/ethernet/stmicro/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the STMicroelectronics device drivers.
 #

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 324049e..338e25a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig

@@ -1,8 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config STMMAC_ETH
-	tristate "STMicroelectronics 10/100/1000/EQOS Ethernet driver"
+	tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
 	depends on HAS_IOMEM && HAS_DMA
 	select MII
-	select PHYLIB
+	select PAGE_POOL
+	select PHYLINK
 	select CRC32
 	imply PTP_1588_CLOCK
 	select RESET_CONTROLLER
@@ -12,6 +14,16 @@
 
 if STMMAC_ETH
 
+config STMMAC_SELFTESTS
+	bool "Support for STMMAC Selftests"
+	depends on INET
+	depends on STMMAC_ETH
+	default n
+	---help---
+	  This adds support for STMMAC Selftests using ethtool. Enable this
+	  feature if you are facing problems with your HW and submit the test
+	  results to the netdev Mailing List.
+
 config STMMAC_PLATFORM
 	tristate "STMMAC Platform bus support"
 	depends on STMMAC_ETH
@@ -30,7 +42,6 @@
 
 config DWMAC_DWC_QOS_ETH
 	tristate "Support for snps,dwc-qos-ethernet.txt DT binding."
-	select PHYLIB
 	select CRC32
 	select MII
 	depends on OF && HAS_DMA
@@ -75,6 +86,14 @@
 	---help---
 	  Support for NXP LPC18xx/43xx DWMAC Ethernet.
 
+config DWMAC_MEDIATEK
+	tristate "MediaTek MT27xx GMAC support"
+	depends on OF && (ARCH_MEDIATEK || COMPILE_TEST)
+	help
+	  Support for MediaTek GMAC Ethernet controller.
+
+	  This selects the MT2712 SoC support for the stmmac driver.
+
 config DWMAC_MESON
 	tristate "Amlogic Meson dwmac support"
 	default ARCH_MESON
@@ -97,6 +116,16 @@
 	  This selects the Oxford Semiconductor OXNASSoC glue layer support for
 	  the stmmac device driver. This driver is used for OX820.
 
+config DWMAC_QCOM_ETHQOS
+	tristate "Qualcomm ETHQOS support"
+	default ARCH_QCOM
+	depends on OF && (ARCH_QCOM || COMPILE_TEST)
+	help
+	  Support for the Qualcomm ETHQOS core.
+
+	  This selects the Qualcomm ETHQOS glue layer support for the
+	  stmmac device driver.
+
 config DWMAC_ROCKCHIP
 	tristate "Rockchip dwmac support"
 	default ARCH_ROCKCHIP
@@ -171,6 +200,7 @@
 config STMMAC_PCI
 	tristate "STMMAC PCI bus support"
 	depends on STMMAC_ETH && PCI
+	depends on COMMON_CLK
 	---help---
 	  This selects the platform specific bus support for the stmmac driver.
 	  This driver was tested on XLINX XC2V3000 FF1152AMT0221

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 99967a8..c59926d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile

@@ -8,13 +8,17 @@
 	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
 	      $(stmmac-y)
 
+stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
+
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
+obj-$(CONFIG_DWMAC_MEDIATEK)	+= dwmac-mediatek.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o dwmac-meson8b.o
 obj-$(CONFIG_DWMAC_OXNAS)	+= dwmac-oxnas.o
+obj-$(CONFIG_DWMAC_QCOM_ETHQOS)	+= dwmac-qcom-ethqos.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
 obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o

diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index 8b50afc..cd478d2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c

@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright Altera Corporation (C) 2016. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  * Author: Tien Hock Loh <thloh@altera.com>
  */
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
index 2f58824..442812c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h

@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright Altera Corporation (C) 2016. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  * Author: Tien Hock Loh <thloh@altera.com>
  */
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index b9c9003..52971f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   Specialised functions for managing Chained mode
 
@@ -7,17 +8,6 @@
   descriptors in case of the DMA is configured to work in chained or
   in ring mode.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 272b9ca..912bbb6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h

@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   STMMAC Common Header File
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -85,6 +75,7 @@
 	unsigned long rx_missed_cntr;
 	unsigned long rx_overflow_cntr;
 	unsigned long rx_vlan;
+	unsigned long rx_split_hdr_pkt_n;
 	/* Tx/Rx IRQ error info */
 	unsigned long tx_undeflow_irq;
 	unsigned long tx_process_stopped_irq;
@@ -256,12 +247,13 @@
 
 /* Max/Min RI Watchdog Timer count value */
 #define MAX_DMA_RIWT		0xff
-#define MIN_DMA_RIWT		0x20
+#define MIN_DMA_RIWT		0x10
 /* Tx coalesce parameters */
 #define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	25
+#define STMMAC_TX_FRAMES	1
+#define STMMAC_RX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
@@ -335,6 +327,7 @@
 	/* 802.3az - Energy-Efficient Ethernet (EEE) */
 	unsigned int eee;
 	unsigned int av;
+	unsigned int hash_tb_sz;
 	unsigned int tsoen;
 	/* TX and RX csum */
 	unsigned int tx_coe;
@@ -361,6 +354,14 @@
 	unsigned int frpsel;
 	unsigned int frpbs;
 	unsigned int frpes;
+	unsigned int addr64;
+	unsigned int rssen;
+	unsigned int vlhash;
+	unsigned int sphen;
+	unsigned int vlins;
+	unsigned int dvlan;
+	unsigned int l3l4fnum;
+	unsigned int arpoffsel;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -388,6 +389,16 @@
 
 #define JUMBO_LEN		9000
 
+/* Receive Side Scaling */
+#define STMMAC_RSS_HASH_KEY_SIZE	40
+#define STMMAC_RSS_MAX_TABLE_SIZE	256
+
+/* VLAN */
+#define STMMAC_VLAN_NONE	0x0
+#define STMMAC_VLAN_REMOVE	0x1
+#define STMMAC_VLAN_INSERT	0x2
+#define STMMAC_VLAN_REPLACE	0x3
+
 extern const struct stmmac_desc_ops enh_desc_ops;
 extern const struct stmmac_desc_ops ndesc_ops;
 
@@ -402,8 +413,12 @@
 	u32 speed100;
 	u32 speed1000;
 	u32 speed2500;
-	u32 speed10000;
 	u32 duplex;
+	struct {
+		u32 speed2500;
+		u32 speed5000;
+		u32 speed10000;
+	} xgmii;
 };
 
 struct mii_regs {
@@ -424,12 +439,13 @@
 	const struct stmmac_mode_ops *mode;
 	const struct stmmac_hwtimestamp *ptp;
 	const struct stmmac_tc_ops *tc;
+	const struct stmmac_mmc_ops *mmc;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
-	int multicast_filter_bins;
-	int unicast_filter_entries;
-	int mcast_bits_log2;
+	unsigned int multicast_filter_bins;
+	unsigned int unicast_filter_entries;
+	unsigned int mcast_bits_log2;
 	unsigned int rx_csum;
 	unsigned int pcs;
 	unsigned int pmt;

diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index 0c2432b..9f0b9a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h

@@ -1,18 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   Header File to describe the DMA descriptors and related definitions.
   This is for DWMAC100 and 1000 cores.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -133,7 +123,7 @@
 #define	ETDES1_BUFFER2_SIZE_SHIFT	16
 
 /* Extended Receive descriptor definitions */
-#define	ERDES4_IP_PAYLOAD_TYPE_MASK	GENMASK(2, 6)
+#define	ERDES4_IP_PAYLOAD_TYPE_MASK	GENMASK(6, 2)
 #define	ERDES4_IP_HDR_ERR		BIT(3)
 #define	ERDES4_IP_PAYLOAD_ERR		BIT(4)
 #define	ERDES4_IP_CSUM_BYPASSED		BIT(5)

diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 40d6356..40f7f2d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   Header File to describe Normal/enhanced descriptor functions used for RING
   and CHAINED modes.
@@ -8,17 +9,6 @@
   descriptors in case of the DMA is configured to work in chained or
   in ring mode.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -29,11 +19,13 @@
 /* Specific functions used for Ring mode */
 
 /* Enhanced descriptors */
-static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end,
+					   int bfsize)
 {
-	p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
-			<< ERDES1_BUFFER2_SIZE_SHIFT)
-		   & ERDES1_BUFFER2_SIZE_MASK);
+	if (bfsize == BUF_SIZE_16KiB)
+		p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
+				<< ERDES1_BUFFER2_SIZE_SHIFT)
+			   & ERDES1_BUFFER2_SIZE_MASK);
 
 	if (end)
 		p->des1 |= cpu_to_le32(ERDES1_END_RING);
@@ -59,11 +51,15 @@
 }
 
 /* Normal descriptors */
-static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize)
 {
-	p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1)
-				<< RDES1_BUFFER2_SIZE_SHIFT)
-		    & RDES1_BUFFER2_SIZE_MASK);
+	if (bfsize >= BUF_SIZE_2KiB) {
+		int bfsize2;
+
+		bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1);
+		p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT)
+			    & RDES1_BUFFER2_SIZE_MASK);
+	}
 
 	if (end)
 		p->des1 |= cpu_to_le32(RDES1_END_RING);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 85ce80c..527f933 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c

@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Adaptrum Anarion DWMAC glue layer
  *
  * Copyright (C) 2017, Adaptrum, Inc.
  * (Written by Alexandru Gagniuc <alex.g at adaptrum.com> for Adaptrum, Inc.)
- * Licensed under the GPLv2 or (at your option) any later version.
  */
 
 #include <linux/io.h>
@@ -62,12 +62,10 @@
 static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
 {
 	int phy_mode;
-	struct resource *res;
 	void __iomem *ctl_block;
 	struct anarion_gmac *gmac;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	ctl_block = devm_ioremap_resource(&pdev->dev, res);
+	ctl_block = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(ctl_block)) {
 		dev_err(&pdev->dev, "Cannot get reset region (%ld)!\n",
 			PTR_ERR(ctl_block));

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 3256e5c..dd9967a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c

@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Synopsys DWC Ethernet Quality-of-Service v4.10a linux driver
  *
  * Copyright (C) 2016 Joao Pinto <jpinto@synopsys.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
@@ -339,6 +333,9 @@
 	usleep_range(2000, 4000);
 	gpiod_set_value(eqos->reset, 0);
 
+	/* MDIO bus was already reset just above */
+	data->mdio_bus_data->needs_reset = false;
+
 	eqos->rst = devm_reset_control_get(&pdev->dev, "eqos");
 	if (IS_ERR(eqos->rst)) {
 		err = PTR_ERR(eqos->rst);
@@ -421,7 +418,6 @@
 	const struct dwc_eth_dwmac_data *data;
 	struct plat_stmmacenet_data *plat_dat;
 	struct stmmac_resources stmmac_res;
-	struct resource *res;
 	void *priv;
 	int ret;
 
@@ -434,17 +430,11 @@
 	 * resource initialization is done in the glue logic.
 	 */
 	stmmac_res.irq = platform_get_irq(pdev, 0);
-	if (stmmac_res.irq < 0) {
-		if (stmmac_res.irq != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"IRQ configuration information not found\n");
-
+	if (stmmac_res.irq < 0)
 		return stmmac_res.irq;
-	}
 	stmmac_res.wol_irq = stmmac_res.irq;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	stmmac_res.addr = devm_ioremap_resource(&pdev->dev, res);
+	stmmac_res.addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(stmmac_res.addr))
 		return PTR_ERR(stmmac_res.addr);
 
@@ -455,7 +445,11 @@
 	priv = data->probe(pdev, plat_dat, &stmmac_res);
 	if (IS_ERR(priv)) {
 		ret = PTR_ERR(priv);
-		dev_err(&pdev->dev, "failed to probe subdriver: %d\n", ret);
+
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to probe subdriver: %d\n",
+				ret);
+
 		goto remove_config;
 	}
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 2c6d7c6..0d21082 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c

@@ -191,7 +191,7 @@
 	struct device *dev = &gmac->pdev->dev;
 
 	gmac->phy_mode = of_get_phy_mode(dev->of_node);
-	if (gmac->phy_mode < 0) {
+	if ((int)gmac->phy_mode < 0) {
 		dev_err(dev, "missing phy mode property\n");
 		return -EINVAL;
 	}

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
new file mode 100644
index 0000000..79f2ee3
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c

@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ */
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+/* Peri Configuration register for mt2712 */
+#define PERI_ETH_PHY_INTF_SEL	0x418
+#define PHY_INTF_MII		0
+#define PHY_INTF_RGMII		1
+#define PHY_INTF_RMII		4
+#define RMII_CLK_SRC_RXC	BIT(4)
+#define RMII_CLK_SRC_INTERNAL	BIT(5)
+
+#define PERI_ETH_DLY	0x428
+#define ETH_DLY_GTXC_INV	BIT(6)
+#define ETH_DLY_GTXC_ENABLE	BIT(5)
+#define ETH_DLY_GTXC_STAGES	GENMASK(4, 0)
+#define ETH_DLY_TXC_INV		BIT(20)
+#define ETH_DLY_TXC_ENABLE	BIT(19)
+#define ETH_DLY_TXC_STAGES	GENMASK(18, 14)
+#define ETH_DLY_RXC_INV		BIT(13)
+#define ETH_DLY_RXC_ENABLE	BIT(12)
+#define ETH_DLY_RXC_STAGES	GENMASK(11, 7)
+
+#define PERI_ETH_DLY_FINE	0x800
+#define ETH_RMII_DLY_TX_INV	BIT(2)
+#define ETH_FINE_DLY_GTXC	BIT(1)
+#define ETH_FINE_DLY_RXC	BIT(0)
+
+struct mac_delay_struct {
+	u32 tx_delay;
+	u32 rx_delay;
+	bool tx_inv;
+	bool rx_inv;
+};
+
+struct mediatek_dwmac_plat_data {
+	const struct mediatek_dwmac_variant *variant;
+	struct mac_delay_struct mac_delay;
+	struct clk_bulk_data *clks;
+	struct device_node *np;
+	struct regmap *peri_regmap;
+	struct device *dev;
+	int phy_mode;
+	bool rmii_rxc;
+};
+
+struct mediatek_dwmac_variant {
+	int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat);
+	int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat);
+
+	/* clock ids to be requested */
+	const char * const *clk_list;
+	int num_clks;
+
+	u32 dma_bit_mask;
+	u32 rx_delay_max;
+	u32 tx_delay_max;
+};
+
+/* list of clocks required for mac */
+static const char * const mt2712_dwmac_clk_l[] = {
+	"axi", "apb", "mac_main", "ptp_ref"
+};
+
+static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat)
+{
+	int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0;
+	u32 intf_val = 0;
+
+	/* select phy interface in top control domain */
+	switch (plat->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+		intf_val |= PHY_INTF_MII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		intf_val |= (PHY_INTF_RMII | rmii_rxc);
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		intf_val |= PHY_INTF_RGMII;
+		break;
+	default:
+		dev_err(plat->dev, "phy interface not supported\n");
+		return -EINVAL;
+	}
+
+	regmap_write(plat->peri_regmap, PERI_ETH_PHY_INTF_SEL, intf_val);
+
+	return 0;
+}
+
+static void mt2712_delay_ps2stage(struct mediatek_dwmac_plat_data *plat)
+{
+	struct mac_delay_struct *mac_delay = &plat->mac_delay;
+
+	switch (plat->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_RMII:
+		/* 550ps per stage for MII/RMII */
+		mac_delay->tx_delay /= 550;
+		mac_delay->rx_delay /= 550;
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		/* 170ps per stage for RGMII */
+		mac_delay->tx_delay /= 170;
+		mac_delay->rx_delay /= 170;
+		break;
+	default:
+		dev_err(plat->dev, "phy interface not supported\n");
+		break;
+	}
+}
+
+static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
+{
+	struct mac_delay_struct *mac_delay = &plat->mac_delay;
+	u32 delay_val = 0, fine_val = 0;
+
+	mt2712_delay_ps2stage(plat);
+
+	switch (plat->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+		delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->tx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->tx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->tx_inv);
+
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		/* the rmii reference clock is from external phy,
+		 * and the property "rmii_rxc" indicates which pin(TXC/RXC)
+		 * the reference clk is connected to. The reference clock is a
+		 * received signal, so rx_delay/rx_inv are used to indicate
+		 * the reference clock timing adjustment
+		 */
+		if (plat->rmii_rxc) {
+			/* the rmii reference clock from outside is connected
+			 * to RXC pin, the reference clock will be adjusted
+			 * by RXC delay macro circuit.
+			 */
+			delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
+		} else {
+			/* the rmii reference clock from outside is connected
+			 * to TXC pin, the reference clock will be adjusted
+			 * by TXC delay macro circuit.
+			 */
+			delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv);
+		}
+		/* tx_inv will inverse the tx clock inside mac relateive to
+		 * reference clock from external phy,
+		 * and this bit is located in the same register with fine-tune
+		 */
+		if (mac_delay->tx_inv)
+			fine_val = ETH_RMII_DLY_TX_INV;
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		fine_val = ETH_FINE_DLY_GTXC | ETH_FINE_DLY_RXC;
+
+		delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv);
+
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
+		delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
+		break;
+	default:
+		dev_err(plat->dev, "phy interface not supported\n");
+		return -EINVAL;
+	}
+	regmap_write(plat->peri_regmap, PERI_ETH_DLY, delay_val);
+	regmap_write(plat->peri_regmap, PERI_ETH_DLY_FINE, fine_val);
+
+	return 0;
+}
+
+static const struct mediatek_dwmac_variant mt2712_gmac_variant = {
+		.dwmac_set_phy_interface = mt2712_set_interface,
+		.dwmac_set_delay = mt2712_set_delay,
+		.clk_list = mt2712_dwmac_clk_l,
+		.num_clks = ARRAY_SIZE(mt2712_dwmac_clk_l),
+		.dma_bit_mask = 33,
+		.rx_delay_max = 17600,
+		.tx_delay_max = 17600,
+};
+
+static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
+{
+	struct mac_delay_struct *mac_delay = &plat->mac_delay;
+	u32 tx_delay_ps, rx_delay_ps;
+
+	plat->peri_regmap = syscon_regmap_lookup_by_phandle(plat->np, "mediatek,pericfg");
+	if (IS_ERR(plat->peri_regmap)) {
+		dev_err(plat->dev, "Failed to get pericfg syscon\n");
+		return PTR_ERR(plat->peri_regmap);
+	}
+
+	plat->phy_mode = of_get_phy_mode(plat->np);
+	if (plat->phy_mode < 0) {
+		dev_err(plat->dev, "not find phy-mode\n");
+		return -EINVAL;
+	}
+
+	if (!of_property_read_u32(plat->np, "mediatek,tx-delay-ps", &tx_delay_ps)) {
+		if (tx_delay_ps < plat->variant->tx_delay_max) {
+			mac_delay->tx_delay = tx_delay_ps;
+		} else {
+			dev_err(plat->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
+			return -EINVAL;
+		}
+	}
+
+	if (!of_property_read_u32(plat->np, "mediatek,rx-delay-ps", &rx_delay_ps)) {
+		if (rx_delay_ps < plat->variant->rx_delay_max) {
+			mac_delay->rx_delay = rx_delay_ps;
+		} else {
+			dev_err(plat->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
+			return -EINVAL;
+		}
+	}
+
+	mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse");
+	mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse");
+	plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc");
+
+	return 0;
+}
+
+static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat)
+{
+	const struct mediatek_dwmac_variant *variant = plat->variant;
+	int i, num = variant->num_clks;
+
+	plat->clks = devm_kcalloc(plat->dev, num, sizeof(*plat->clks), GFP_KERNEL);
+	if (!plat->clks)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++)
+		plat->clks[i].id = variant->clk_list[i];
+
+	return devm_clk_bulk_get(plat->dev, num, plat->clks);
+}
+
+static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
+{
+	struct mediatek_dwmac_plat_data *plat = priv;
+	const struct mediatek_dwmac_variant *variant = plat->variant;
+	int ret;
+
+	ret = dma_set_mask_and_coherent(plat->dev, DMA_BIT_MASK(variant->dma_bit_mask));
+	if (ret) {
+		dev_err(plat->dev, "No suitable DMA available, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = variant->dwmac_set_phy_interface(plat);
+	if (ret) {
+		dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = variant->dwmac_set_delay(plat);
+	if (ret) {
+		dev_err(plat->dev, "failed to set delay value, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
+	if (ret) {
+		dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
+		return ret;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	return 0;
+}
+
+static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct mediatek_dwmac_plat_data *plat = priv;
+	const struct mediatek_dwmac_variant *variant = plat->variant;
+
+	clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+}
+
+static int mediatek_dwmac_probe(struct platform_device *pdev)
+{
+	struct mediatek_dwmac_plat_data *priv_plat;
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	int ret;
+
+	priv_plat = devm_kzalloc(&pdev->dev, sizeof(*priv_plat), GFP_KERNEL);
+	if (!priv_plat)
+		return -ENOMEM;
+
+	priv_plat->variant = of_device_get_match_data(&pdev->dev);
+	if (!priv_plat->variant) {
+		dev_err(&pdev->dev, "Missing dwmac-mediatek variant\n");
+		return -EINVAL;
+	}
+
+	priv_plat->dev = &pdev->dev;
+	priv_plat->np = pdev->dev.of_node;
+
+	ret = mediatek_dwmac_config_dt(priv_plat);
+	if (ret)
+		return ret;
+
+	ret = mediatek_dwmac_clk_init(priv_plat);
+	if (ret)
+		return ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	plat_dat->interface = priv_plat->phy_mode;
+	plat_dat->has_gmac4 = 1;
+	plat_dat->has_gmac = 0;
+	plat_dat->pmt = 0;
+	plat_dat->riwt_off = 1;
+	plat_dat->maxmtu = ETH_DATA_LEN;
+	plat_dat->bsp_priv = priv_plat;
+	plat_dat->init = mediatek_dwmac_init;
+	plat_dat->exit = mediatek_dwmac_exit;
+	mediatek_dwmac_init(pdev, priv_plat);
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret) {
+		stmmac_remove_config_dt(pdev, plat_dat);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mediatek_dwmac_match[] = {
+	{ .compatible = "mediatek,mt2712-gmac",
+	  .data = &mt2712_gmac_variant },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
+
+static struct platform_driver mediatek_dwmac_driver = {
+	.probe  = mediatek_dwmac_probe,
+	.remove = stmmac_pltfr_remove,
+	.driver = {
+		.name           = "dwmac-mediatek",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = mediatek_dwmac_match,
+	},
+};
+module_platform_driver(mediatek_dwmac_driver);
+
+MODULE_AUTHOR("Biao Huang <biao.huang@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek DWMAC specific glue layer");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 7fdd176..bbc16b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c

@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Amlogic Meson6 and Meson8 DWMAC glue layer
  *
  * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/device.h>
@@ -52,7 +46,6 @@
 	struct plat_stmmacenet_data *plat_dat;
 	struct stmmac_resources stmmac_res;
 	struct meson_dwmac *dwmac;
-	struct resource *res;
 	int ret;
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
@@ -69,8 +62,7 @@
 		goto err_remove_config_dt;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	dwmac->reg = devm_ioremap_resource(&pdev->dev, res);
+	dwmac->reg = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(dwmac->reg)) {
 		ret = PTR_ERR(dwmac->reg);
 		goto err_remove_config_dt;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index c597956..306da8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c

@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Amlogic Meson8b, Meson8m2 and GXBB DWMAC glue layer
  *
  * Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
@@ -314,7 +308,6 @@
 {
 	struct plat_stmmacenet_data *plat_dat;
 	struct stmmac_resources stmmac_res;
-	struct resource *res;
 	struct meson8b_dwmac *dwmac;
 	int ret;
 
@@ -338,8 +331,7 @@
 		ret = -EINVAL;
 		goto err_remove_config_dt;
 	}
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	dwmac->regs = devm_ioremap_resource(&pdev->dev, res);
+	dwmac->regs = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(dwmac->regs)) {
 		ret = PTR_ERR(dwmac->regs);
 		goto err_remove_config_dt;
@@ -347,7 +339,7 @@
 
 	dwmac->dev = &pdev->dev;
 	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-	if (dwmac->phy_mode < 0) {
+	if ((int)dwmac->phy_mode < 0) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
 		ret = -EINVAL;
 		goto err_remove_config_dt;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index 3dc7d27..8551ea8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Oxford Semiconductor OXNAS DWMAC glue layer
  *
@@ -5,13 +6,6 @@
  * Copyright (C) 2014 Daniel Golle <daniel@makrotopia.org>
  * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com>
  * Copyright (C) 2012 John Crispin <blogic@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/device.h>

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
new file mode 100644
index 0000000..7ec8954
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c

@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-19, Linaro Limited
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/phy.h>
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+#define RGMII_IO_MACRO_CONFIG		0x0
+#define SDCC_HC_REG_DLL_CONFIG		0x4
+#define SDCC_HC_REG_DDR_CONFIG		0xC
+#define SDCC_HC_REG_DLL_CONFIG2		0x10
+#define SDC4_STATUS			0x14
+#define SDCC_USR_CTL			0x18
+#define RGMII_IO_MACRO_CONFIG2		0x1C
+#define RGMII_IO_MACRO_DEBUG1		0x20
+#define EMAC_SYSTEM_LOW_POWER_DEBUG	0x28
+
+/* RGMII_IO_MACRO_CONFIG fields */
+#define RGMII_CONFIG_FUNC_CLK_EN		BIT(30)
+#define RGMII_CONFIG_POS_NEG_DATA_SEL		BIT(23)
+#define RGMII_CONFIG_GPIO_CFG_RX_INT		GENMASK(21, 20)
+#define RGMII_CONFIG_GPIO_CFG_TX_INT		GENMASK(19, 17)
+#define RGMII_CONFIG_MAX_SPD_PRG_9		GENMASK(16, 8)
+#define RGMII_CONFIG_MAX_SPD_PRG_2		GENMASK(7, 6)
+#define RGMII_CONFIG_INTF_SEL			GENMASK(5, 4)
+#define RGMII_CONFIG_BYPASS_TX_ID_EN		BIT(3)
+#define RGMII_CONFIG_LOOPBACK_EN		BIT(2)
+#define RGMII_CONFIG_PROG_SWAP			BIT(1)
+#define RGMII_CONFIG_DDR_MODE			BIT(0)
+
+/* SDCC_HC_REG_DLL_CONFIG fields */
+#define SDCC_DLL_CONFIG_DLL_RST			BIT(30)
+#define SDCC_DLL_CONFIG_PDN			BIT(29)
+#define SDCC_DLL_CONFIG_MCLK_FREQ		GENMASK(26, 24)
+#define SDCC_DLL_CONFIG_CDR_SELEXT		GENMASK(23, 20)
+#define SDCC_DLL_CONFIG_CDR_EXT_EN		BIT(19)
+#define SDCC_DLL_CONFIG_CK_OUT_EN		BIT(18)
+#define SDCC_DLL_CONFIG_CDR_EN			BIT(17)
+#define SDCC_DLL_CONFIG_DLL_EN			BIT(16)
+#define SDCC_DLL_MCLK_GATING_EN			BIT(5)
+#define SDCC_DLL_CDR_FINE_PHASE			GENMASK(3, 2)
+
+/* SDCC_HC_REG_DDR_CONFIG fields */
+#define SDCC_DDR_CONFIG_PRG_DLY_EN		BIT(31)
+#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY	GENMASK(26, 21)
+#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE	GENMASK(29, 27)
+#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN	BIT(30)
+#define SDCC_DDR_CONFIG_PRG_RCLK_DLY		GENMASK(8, 0)
+
+/* SDCC_HC_REG_DLL_CONFIG2 fields */
+#define SDCC_DLL_CONFIG2_DLL_CLOCK_DIS		BIT(21)
+#define SDCC_DLL_CONFIG2_MCLK_FREQ_CALC		GENMASK(17, 10)
+#define SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL	GENMASK(3, 2)
+#define SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW	BIT(1)
+#define SDCC_DLL_CONFIG2_DDR_CAL_EN		BIT(0)
+
+/* SDC4_STATUS bits */
+#define SDC4_STATUS_DLL_LOCK			BIT(7)
+
+/* RGMII_IO_MACRO_CONFIG2 fields */
+#define RGMII_CONFIG2_RSVD_CONFIG15		GENMASK(31, 17)
+#define RGMII_CONFIG2_RGMII_CLK_SEL_CFG		BIT(16)
+#define RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN	BIT(13)
+#define RGMII_CONFIG2_CLK_DIVIDE_SEL		BIT(12)
+#define RGMII_CONFIG2_RX_PROG_SWAP		BIT(7)
+#define RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL	BIT(6)
+#define RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN	BIT(5)
+
+struct ethqos_emac_por {
+	unsigned int offset;
+	unsigned int value;
+};
+
+struct qcom_ethqos {
+	struct platform_device *pdev;
+	void __iomem *rgmii_base;
+
+	unsigned int rgmii_clk_rate;
+	struct clk *rgmii_clk;
+	unsigned int speed;
+
+	const struct ethqos_emac_por *por;
+	unsigned int num_por;
+};
+
+static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset)
+{
+	return readl(ethqos->rgmii_base + offset);
+}
+
+static void rgmii_writel(struct qcom_ethqos *ethqos,
+			 int value, unsigned int offset)
+{
+	writel(value, ethqos->rgmii_base + offset);
+}
+
+static void rgmii_updatel(struct qcom_ethqos *ethqos,
+			  int mask, int val, unsigned int offset)
+{
+	unsigned int temp;
+
+	temp =  rgmii_readl(ethqos, offset);
+	temp = (temp & ~(mask)) | val;
+	rgmii_writel(ethqos, temp, offset);
+}
+
+static void rgmii_dump(struct qcom_ethqos *ethqos)
+{
+	dev_dbg(&ethqos->pdev->dev, "Rgmii register dump\n");
+	dev_dbg(&ethqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n",
+		rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG));
+	dev_dbg(&ethqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG: %x\n",
+		rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG));
+	dev_dbg(&ethqos->pdev->dev, "SDCC_HC_REG_DDR_CONFIG: %x\n",
+		rgmii_readl(ethqos, SDCC_HC_REG_DDR_CONFIG));
+	dev_dbg(&ethqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n",
+		rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG2));
+	dev_dbg(&ethqos->pdev->dev, "SDC4_STATUS: %x\n",
+		rgmii_readl(ethqos, SDC4_STATUS));
+	dev_dbg(&ethqos->pdev->dev, "SDCC_USR_CTL: %x\n",
+		rgmii_readl(ethqos, SDCC_USR_CTL));
+	dev_dbg(&ethqos->pdev->dev, "RGMII_IO_MACRO_CONFIG2: %x\n",
+		rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG2));
+	dev_dbg(&ethqos->pdev->dev, "RGMII_IO_MACRO_DEBUG1: %x\n",
+		rgmii_readl(ethqos, RGMII_IO_MACRO_DEBUG1));
+	dev_dbg(&ethqos->pdev->dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n",
+		rgmii_readl(ethqos, EMAC_SYSTEM_LOW_POWER_DEBUG));
+}
+
+/* Clock rates */
+#define RGMII_1000_NOM_CLK_FREQ			(250 * 1000 * 1000UL)
+#define RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ	 (50 * 1000 * 1000UL)
+#define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ	  (5 * 1000 * 1000UL)
+
+static void
+ethqos_update_rgmii_clk(struct qcom_ethqos *ethqos, unsigned int speed)
+{
+	switch (speed) {
+	case SPEED_1000:
+		ethqos->rgmii_clk_rate =  RGMII_1000_NOM_CLK_FREQ;
+		break;
+
+	case SPEED_100:
+		ethqos->rgmii_clk_rate =  RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ;
+		break;
+
+	case SPEED_10:
+		ethqos->rgmii_clk_rate =  RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ;
+		break;
+	}
+
+	clk_set_rate(ethqos->rgmii_clk, ethqos->rgmii_clk_rate);
+}
+
+static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos)
+{
+	rgmii_updatel(ethqos, RGMII_CONFIG_FUNC_CLK_EN,
+		      RGMII_CONFIG_FUNC_CLK_EN, RGMII_IO_MACRO_CONFIG);
+}
+
+static const struct ethqos_emac_por emac_v2_3_0_por[] = {
+	{ .offset = RGMII_IO_MACRO_CONFIG,	.value = 0x00C01343 },
+	{ .offset = SDCC_HC_REG_DLL_CONFIG,	.value = 0x2004642C },
+	{ .offset = SDCC_HC_REG_DDR_CONFIG,	.value = 0x00000000 },
+	{ .offset = SDCC_HC_REG_DLL_CONFIG2,	.value = 0x00200000 },
+	{ .offset = SDCC_USR_CTL,		.value = 0x00010800 },
+	{ .offset = RGMII_IO_MACRO_CONFIG2,	.value = 0x00002060 },
+};
+
+static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
+{
+	unsigned int val;
+	int retry = 1000;
+
+	/* Set CDR_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CDR_EN,
+		      SDCC_DLL_CONFIG_CDR_EN, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Set CDR_EXT_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CDR_EXT_EN,
+		      SDCC_DLL_CONFIG_CDR_EXT_EN, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Clear CK_OUT_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN,
+		      0, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Set DLL_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN,
+		      SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG);
+
+	rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN,
+		      0, SDCC_HC_REG_DLL_CONFIG);
+
+	rgmii_updatel(ethqos, SDCC_DLL_CDR_FINE_PHASE,
+		      0, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Wait for CK_OUT_EN clear */
+	do {
+		val = rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG);
+		val &= SDCC_DLL_CONFIG_CK_OUT_EN;
+		if (!val)
+			break;
+		mdelay(1);
+		retry--;
+	} while (retry > 0);
+	if (!retry)
+		dev_err(&ethqos->pdev->dev, "Clear CK_OUT_EN timedout\n");
+
+	/* Set CK_OUT_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN,
+		      SDCC_DLL_CONFIG_CK_OUT_EN, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Wait for CK_OUT_EN set */
+	retry = 1000;
+	do {
+		val = rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG);
+		val &= SDCC_DLL_CONFIG_CK_OUT_EN;
+		if (val)
+			break;
+		mdelay(1);
+		retry--;
+	} while (retry > 0);
+	if (!retry)
+		dev_err(&ethqos->pdev->dev, "Set CK_OUT_EN timedout\n");
+
+	/* Set DDR_CAL_EN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN,
+		      SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_HC_REG_DLL_CONFIG2);
+
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS,
+		      0, SDCC_HC_REG_DLL_CONFIG2);
+
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_MCLK_FREQ_CALC,
+		      0x1A << 10, SDCC_HC_REG_DLL_CONFIG2);
+
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL,
+		      BIT(2), SDCC_HC_REG_DLL_CONFIG2);
+
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
+		      SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW,
+		      SDCC_HC_REG_DLL_CONFIG2);
+
+	return 0;
+}
+
+static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
+{
+	/* Disable loopback mode */
+	rgmii_updatel(ethqos, RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN,
+		      0, RGMII_IO_MACRO_CONFIG2);
+
+	/* Select RGMII, write 0 to interface select */
+	rgmii_updatel(ethqos, RGMII_CONFIG_INTF_SEL,
+		      0, RGMII_IO_MACRO_CONFIG);
+
+	switch (ethqos->speed) {
+	case SPEED_1000:
+		rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE,
+			      RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN,
+			      0, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL,
+			      RGMII_CONFIG_POS_NEG_DATA_SEL,
+			      RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP,
+			      RGMII_CONFIG_PROG_SWAP, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
+			      RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
+			      RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+			      RGMII_CONFIG2_RX_PROG_SWAP,
+			      RGMII_IO_MACRO_CONFIG2);
+
+		/* Set PRG_RCLK_DLY to 57 for 1.8 ns delay */
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY,
+			      57, SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN,
+			      SDCC_DDR_CONFIG_PRG_DLY_EN,
+			      SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
+			      RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
+		break;
+
+	case SPEED_100:
+		rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE,
+			      RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN,
+			      RGMII_CONFIG_BYPASS_TX_ID_EN,
+			      RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL,
+			      0, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP,
+			      0, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
+			      RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
+			      RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2,
+			      BIT(6), RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		/* Write 0x5 to PRG_RCLK_DLY_CODE */
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE,
+			      (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY,
+			      SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY,
+			      SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
+			      SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
+			      SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
+			      RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
+		break;
+
+	case SPEED_10:
+		rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE,
+			      RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN,
+			      RGMII_CONFIG_BYPASS_TX_ID_EN,
+			      RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL,
+			      0, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP,
+			      0, RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9,
+			      BIT(12) | GENMASK(9, 8),
+			      RGMII_IO_MACRO_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP,
+			      0, RGMII_IO_MACRO_CONFIG2);
+		/* Write 0x5 to PRG_RCLK_DLY_CODE */
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE,
+			      (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY,
+			      SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY,
+			      SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
+			      SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN,
+			      SDCC_HC_REG_DDR_CONFIG);
+		rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN,
+			      RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG);
+		break;
+	default:
+		dev_err(&ethqos->pdev->dev,
+			"Invalid speed %d\n", ethqos->speed);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ethqos_configure(struct qcom_ethqos *ethqos)
+{
+	volatile unsigned int dll_lock;
+	unsigned int i, retry = 1000;
+
+	/* Reset to POR values and enable clk */
+	for (i = 0; i < ethqos->num_por; i++)
+		rgmii_writel(ethqos, ethqos->por[i].value,
+			     ethqos->por[i].offset);
+	ethqos_set_func_clk_en(ethqos);
+
+	/* Initialize the DLL first */
+
+	/* Set DLL_RST */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_RST,
+		      SDCC_DLL_CONFIG_DLL_RST, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Set PDN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN,
+		      SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG);
+
+	/* Clear DLL_RST */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_RST, 0,
+		      SDCC_HC_REG_DLL_CONFIG);
+
+	/* Clear PDN */
+	rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, 0,
+		      SDCC_HC_REG_DLL_CONFIG);
+
+	if (ethqos->speed != SPEED_100 && ethqos->speed != SPEED_10) {
+		/* Set DLL_EN */
+		rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN,
+			      SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG);
+
+		/* Set CK_OUT_EN */
+		rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN,
+			      SDCC_DLL_CONFIG_CK_OUT_EN,
+			      SDCC_HC_REG_DLL_CONFIG);
+
+		/* Set USR_CTL bit 26 with mask of 3 bits */
+		rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26), SDCC_USR_CTL);
+
+		/* wait for DLL LOCK */
+		do {
+			mdelay(1);
+			dll_lock = rgmii_readl(ethqos, SDC4_STATUS);
+			if (dll_lock & SDC4_STATUS_DLL_LOCK)
+				break;
+		} while (retry > 0);
+		if (!retry)
+			dev_err(&ethqos->pdev->dev,
+				"Timeout while waiting for DLL lock\n");
+	}
+
+	if (ethqos->speed == SPEED_1000)
+		ethqos_dll_configure(ethqos);
+
+	ethqos_rgmii_macro_init(ethqos);
+
+	return 0;
+}
+
+static void ethqos_fix_mac_speed(void *priv, unsigned int speed)
+{
+	struct qcom_ethqos *ethqos = priv;
+
+	ethqos->speed = speed;
+	ethqos_update_rgmii_clk(ethqos, speed);
+	ethqos_configure(ethqos);
+}
+
+static int qcom_ethqos_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct qcom_ethqos *ethqos;
+	struct resource *res;
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat)) {
+		dev_err(&pdev->dev, "dt configuration failed\n");
+		return PTR_ERR(plat_dat);
+	}
+
+	ethqos = devm_kzalloc(&pdev->dev, sizeof(*ethqos), GFP_KERNEL);
+	if (!ethqos) {
+		ret = -ENOMEM;
+		goto err_mem;
+	}
+
+	ethqos->pdev = pdev;
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii");
+	ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ethqos->rgmii_base)) {
+		dev_err(&pdev->dev, "Can't get rgmii base\n");
+		ret = PTR_ERR(ethqos->rgmii_base);
+		goto err_mem;
+	}
+
+	ethqos->por = of_device_get_match_data(&pdev->dev);
+
+	ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii");
+	if (IS_ERR(ethqos->rgmii_clk)) {
+		ret = PTR_ERR(ethqos->rgmii_clk);
+		goto err_mem;
+	}
+
+	ret = clk_prepare_enable(ethqos->rgmii_clk);
+	if (ret)
+		goto err_mem;
+
+	ethqos->speed = SPEED_1000;
+	ethqos_update_rgmii_clk(ethqos, SPEED_1000);
+	ethqos_set_func_clk_en(ethqos);
+
+	plat_dat->bsp_priv = ethqos;
+	plat_dat->fix_mac_speed = ethqos_fix_mac_speed;
+	plat_dat->has_gmac4 = 1;
+	plat_dat->pmt = 1;
+	plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_clk;
+
+	rgmii_dump(ethqos);
+
+	return ret;
+
+err_clk:
+	clk_disable_unprepare(ethqos->rgmii_clk);
+
+err_mem:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
+}
+
+static int qcom_ethqos_remove(struct platform_device *pdev)
+{
+	struct qcom_ethqos *ethqos;
+	int ret;
+
+	ethqos = get_stmmac_bsp_priv(&pdev->dev);
+	if (!ethqos)
+		return -ENODEV;
+
+	ret = stmmac_pltfr_remove(pdev);
+	clk_disable_unprepare(ethqos->rgmii_clk);
+
+	return ret;
+}
+
+static const struct of_device_id qcom_ethqos_match[] = {
+	{ .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_por},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qcom_ethqos_match);
+
+static struct platform_driver qcom_ethqos_driver = {
+	.probe  = qcom_ethqos_probe,
+	.remove = qcom_ethqos_remove,
+	.driver = {
+		.name           = "qcom-ethqos",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = of_match_ptr(qcom_ethqos_match),
+	},
+};
+module_platform_driver(qcom_ethqos_driver);
+
+MODULE_DESCRIPTION("Qualcomm ETHQOS driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 7b92336..e2e469c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c

@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /**
  * dwmac-rk.c - Rockchip RK3288 DWMAC specific glue layer
  *
  * Copyright (C) 2014 Chen-Zhi (Roger Chen)
  *
  * Chen-Zhi (Roger Chen)  <roger.chen@rock-chips.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/stmmac.h>
@@ -1203,10 +1194,8 @@
 	int ret;
 	struct device *dev = &bsp_priv->pdev->dev;
 
-	if (!ldo) {
-		dev_err(dev, "no regulator found\n");
-		return -1;
-	}
+	if (!ldo)
+		return 0;
 
 	if (enable) {
 		ret = regulator_enable(ldo);
@@ -1342,8 +1331,10 @@
 	}
 
 	ret = phy_power_on(bsp_priv, true);
-	if (ret)
+	if (ret) {
+		gmac_clk_enable(bsp_priv, false);
 		return ret;
+	}
 
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 5b3b06a..e0212d2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c

@@ -1,21 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /* Copyright Altera Corporation (C) 2014. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  * Adopted from dwmac-sti.c
  */
 
-#include <linux/mfd/syscon.h>
+#include <linux/mfd/altera-sysmgr.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_net.h>
@@ -38,9 +27,12 @@
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003
 #define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010
+#define SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000100
 
 #define SYSMGR_FPGAGRP_MODULE_REG  0x00000028
 #define SYSMGR_FPGAGRP_MODULE_EMAC 0x00000004
+#define SYSMGR_FPGAINTF_EMAC_REG	0x00000070
+#define SYSMGR_FPGAINTF_EMAC_BIT	0x1
 
 #define EMAC_SPLITTER_CTRL_REG			0x0
 #define EMAC_SPLITTER_CTRL_SPEED_MASK		0x3
@@ -48,8 +40,12 @@
 #define EMAC_SPLITTER_CTRL_SPEED_100		0x3
 #define EMAC_SPLITTER_CTRL_SPEED_1000		0x0
 
+struct socfpga_dwmac;
+struct socfpga_dwmac_ops {
+	int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv);
+};
+
 struct socfpga_dwmac {
-	int	interface;
 	u32	reg_offset;
 	u32	reg_shift;
 	struct	device *dev;
@@ -59,6 +55,7 @@
 	void __iomem *splitter_base;
 	bool f2h_ptp_ref_clk;
 	struct tse_pcs pcs;
+	const struct socfpga_dwmac_ops *ops;
 };
 
 static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
@@ -112,9 +109,8 @@
 	struct resource res_tse_pcs;
 	struct resource res_sgmii_adapter;
 
-	dwmac->interface = of_get_phy_mode(np);
-
-	sys_mgr_base_addr = syscon_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon");
+	sys_mgr_base_addr =
+		altr_sysmgr_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon");
 	if (IS_ERR(sys_mgr_base_addr)) {
 		dev_info(dev, "No sysmgr-syscon node found\n");
 		return PTR_ERR(sys_mgr_base_addr);
@@ -232,25 +228,44 @@
 	return ret;
 }
 
-static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
+static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac)
 {
-	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
-	int phymode = dwmac->interface;
-	u32 reg_offset = dwmac->reg_offset;
-	u32 reg_shift = dwmac->reg_shift;
-	u32 ctrl, val, module;
+	struct net_device *ndev = dev_get_drvdata(dwmac->dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
 
+	return priv->plat->interface;
+}
+
+static int socfpga_set_phy_mode_common(int phymode, u32 *val)
+{
 	switch (phymode) {
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
-		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII;
 		break;
 	case PHY_INTERFACE_MODE_MII:
 	case PHY_INTERFACE_MODE_GMII:
 	case PHY_INTERFACE_MODE_SGMII:
-		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		*val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII;
 		break;
 	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+	int phymode = socfpga_get_plat_phymode(dwmac);
+	u32 reg_offset = dwmac->reg_offset;
+	u32 reg_shift = dwmac->reg_shift;
+	u32 ctrl, val, module;
+
+	if (socfpga_set_phy_mode_common(phymode, &val)) {
 		dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
 		return -EINVAL;
 	}
@@ -301,6 +316,62 @@
 	return 0;
 }
 
+static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac)
+{
+	struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr;
+	int phymode = socfpga_get_plat_phymode(dwmac);
+	u32 reg_offset = dwmac->reg_offset;
+	u32 reg_shift = dwmac->reg_shift;
+	u32 ctrl, val, module;
+
+	if (socfpga_set_phy_mode_common(phymode, &val))
+		return -EINVAL;
+
+	/* Overwrite val to GMII if splitter core is enabled. The phymode here
+	 * is the actual phy mode on phy hardware, but phy interface from
+	 * EMAC core is GMII.
+	 */
+	if (dwmac->splitter_base)
+		val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
+
+	/* Assert reset to the enet controller before changing the phy mode */
+	reset_control_assert(dwmac->stmmac_ocp_rst);
+	reset_control_assert(dwmac->stmmac_rst);
+
+	regmap_read(sys_mgr_base_addr, reg_offset, &ctrl);
+	ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK);
+	ctrl |= val;
+
+	if (dwmac->f2h_ptp_ref_clk ||
+	    phymode == PHY_INTERFACE_MODE_MII ||
+	    phymode == PHY_INTERFACE_MODE_GMII ||
+	    phymode == PHY_INTERFACE_MODE_SGMII) {
+		ctrl |= SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+		regmap_read(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+			    &module);
+		module |= (SYSMGR_FPGAINTF_EMAC_BIT << reg_shift);
+		regmap_write(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG,
+			     module);
+	} else {
+		ctrl &= ~SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK;
+	}
+
+	regmap_write(sys_mgr_base_addr, reg_offset, ctrl);
+
+	/* Deassert reset for the phy configuration to be sampled by
+	 * the enet controller, and operation to start in requested mode
+	 */
+	reset_control_deassert(dwmac->stmmac_ocp_rst);
+	reset_control_deassert(dwmac->stmmac_rst);
+	if (phymode == PHY_INTERFACE_MODE_SGMII) {
+		if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
+			dev_err(dwmac->dev, "Unable to initialize TSE PCS");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 static int socfpga_dwmac_probe(struct platform_device *pdev)
 {
 	struct plat_stmmacenet_data *plat_dat;
@@ -310,6 +381,13 @@
 	struct socfpga_dwmac	*dwmac;
 	struct net_device	*ndev;
 	struct stmmac_priv	*stpriv;
+	const struct socfpga_dwmac_ops *ops;
+
+	ops = device_get_match_data(&pdev->dev);
+	if (!ops) {
+		dev_err(&pdev->dev, "no of match data provided\n");
+		return -EINVAL;
+	}
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (ret)
@@ -340,6 +418,7 @@
 		goto err_remove_config_dt;
 	}
 
+	dwmac->ops = ops;
 	plat_dat->bsp_priv = dwmac;
 	plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
 
@@ -356,7 +435,7 @@
 	 */
 	dwmac->stmmac_rst = stpriv->plat->stmmac_rst;
 
-	ret = socfpga_dwmac_set_phy_mode(dwmac);
+	ret = ops->set_phy_mode(dwmac);
 	if (ret)
 		goto err_dvr_remove;
 
@@ -375,8 +454,9 @@
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev);
 
-	socfpga_dwmac_set_phy_mode(priv->plat->bsp_priv);
+	dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv);
 
 	/* Before the enet controller is suspended, the phy is suspended.
 	 * This causes the phy clock to be gated. The enet controller is
@@ -403,8 +483,17 @@
 static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend,
 					       socfpga_dwmac_resume);
 
+static const struct socfpga_dwmac_ops socfpga_gen5_ops = {
+	.set_phy_mode = socfpga_gen5_set_phy_mode,
+};
+
+static const struct socfpga_dwmac_ops socfpga_gen10_ops = {
+	.set_phy_mode = socfpga_gen10_set_phy_mode,
+};
+
 static const struct of_device_id socfpga_dwmac_match[] = {
-	{ .compatible = "altr,socfpga-stmmac" },
+	{ .compatible = "altr,socfpga-stmmac", .data = &socfpga_gen5_ops },
+	{ .compatible = "altr,socfpga-stmmac-a10-s10", .data = &socfpga_gen10_ops },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, socfpga_dwmac_match);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 86e0e05..e9fd661 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c

@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * dwmac-sti.c - STMicroelectronics DWMAC Specific Glue layer
  *
  * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited
  * Author: Srinivas Kandagatla <srinivas.kandagatla@st.com>
  * Contributors: Giuseppe Cavallaro <peppe.cavallaro@st.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/kernel.h>

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 7e2e79d..4ef041b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c

@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU
  *
  * Copyright (C) STMicroelectronics SA 2017
  * Author:  Alexandre Torgue <alexandre.torgue@st.com> for STMicroelectronics.
- * License terms:  GNU General Public License (GPL), version 2
- *
  */
 
 #include <linux/clk.h>
@@ -25,9 +24,24 @@
 
 #define SYSCFG_MCU_ETH_MASK		BIT(23)
 #define SYSCFG_MP1_ETH_MASK		GENMASK(23, 16)
+#define SYSCFG_PMCCLRR_OFFSET		0x40
 
 #define SYSCFG_PMCR_ETH_CLK_SEL		BIT(16)
 #define SYSCFG_PMCR_ETH_REF_CLK_SEL	BIT(17)
+
+/*  Ethernet PHY interface selection in register SYSCFG Configuration
+ *------------------------------------------
+ * src	 |BIT(23)| BIT(22)| BIT(21)|BIT(20)|
+ *------------------------------------------
+ * MII   |   0	 |   0	  |   0    |   1   |
+ *------------------------------------------
+ * GMII  |   0	 |   0	  |   0    |   0   |
+ *------------------------------------------
+ * RGMII |   0	 |   0	  |   1	   |  n/a  |
+ *------------------------------------------
+ * RMII  |   1	 |   0	  |   0	   |  n/a  |
+ *------------------------------------------
+ */
 #define SYSCFG_PMCR_ETH_SEL_MII		BIT(20)
 #define SYSCFG_PMCR_ETH_SEL_RGMII	BIT(21)
 #define SYSCFG_PMCR_ETH_SEL_RMII	BIT(23)
@@ -35,14 +49,54 @@
 #define SYSCFG_MCU_ETH_SEL_MII		0
 #define SYSCFG_MCU_ETH_SEL_RMII		1
 
+/* STM32MP1 register definitions
+ *
+ * Below table summarizes the clock requirement and clock sources for
+ * supported phy interface modes.
+ * __________________________________________________________________________
+ *|PHY_MODE | Normal | PHY wo crystal|   PHY wo crystal   |No 125Mhz from PHY|
+ *|         |        |      25MHz    |        50MHz       |                  |
+ * ---------------------------------------------------------------------------
+ *|  MII    |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
+ *|         |        |		     |                    |		     |
+ * ---------------------------------------------------------------------------
+ *|  GMII   |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
+ *|         |        |               |                    |		     |
+ * ---------------------------------------------------------------------------
+ *| RGMII   |	 -   |     eth-ck    |	      n/a	  |  eth-ck (no pin) |
+ *|         |        |               |                    |  st,eth-clk-sel  |
+ * ---------------------------------------------------------------------------
+ *| RMII    |	 -   |     eth-ck    |	    eth-ck        |	  n/a        |
+ *|         |        |		     | st,eth-ref-clk-sel |		     |
+ * ---------------------------------------------------------------------------
+ *
+ * BIT(17) : set this bit in RMII mode when you have PHY without crystal 50MHz
+ * BIT(16) : set this bit in GMII/RGMII PHY when you do not want use 125Mhz
+ * from PHY
+ *-----------------------------------------------------
+ * src	 |         BIT(17)       |       BIT(16)      |
+ *-----------------------------------------------------
+ * MII   |           n/a	 |         n/a        |
+ *-----------------------------------------------------
+ * GMII  |           n/a         |   st,eth-clk-sel   |
+ *-----------------------------------------------------
+ * RGMII |           n/a         |   st,eth-clk-sel   |
+ *-----------------------------------------------------
+ * RMII  |   st,eth-ref-clk-sel	 |         n/a        |
+ *-----------------------------------------------------
+ *
+ */
+
 struct stm32_dwmac {
 	struct clk *clk_tx;
 	struct clk *clk_rx;
 	struct clk *clk_eth_ck;
 	struct clk *clk_ethstp;
 	struct clk *syscfg_clk;
-	bool int_phyclk;	/* Clock from RCC to drive PHY */
-	u32 mode_reg;		/* MAC glue-logic mode register */
+	int eth_clk_sel_reg;
+	int eth_ref_clk_sel_reg;
+	int irq_pwr_wakeup;
+	u32 mode_reg;		 /* MAC glue-logic mode register */
 	struct regmap *regmap;
 	u32 speed;
 	const struct stm32_ops *ops;
@@ -102,7 +156,7 @@
 		if (ret)
 			return ret;
 
-		if (dwmac->int_phyclk) {
+		if (dwmac->clk_eth_ck) {
 			ret = clk_prepare_enable(dwmac->clk_eth_ck);
 			if (ret) {
 				clk_disable_unprepare(dwmac->syscfg_clk);
@@ -111,7 +165,7 @@
 		}
 	} else {
 		clk_disable_unprepare(dwmac->syscfg_clk);
-		if (dwmac->int_phyclk)
+		if (dwmac->clk_eth_ck)
 			clk_disable_unprepare(dwmac->clk_eth_ck);
 	}
 	return ret;
@@ -121,7 +175,7 @@
 {
 	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
 	u32 reg = dwmac->mode_reg;
-	int val;
+	int val, ret;
 
 	switch (plat_dat->interface) {
 	case PHY_INTERFACE_MODE_MII:
@@ -130,19 +184,22 @@
 		break;
 	case PHY_INTERFACE_MODE_GMII:
 		val = SYSCFG_PMCR_ETH_SEL_GMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_GMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RMII:
 		val = SYSCFG_PMCR_ETH_SEL_RMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_ref_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_REF_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
 		val = SYSCFG_PMCR_ETH_SEL_RGMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
 		break;
@@ -153,6 +210,11 @@
 		return -EINVAL;
 	}
 
+	/* Need to update PMCCLRR (clear register) */
+	ret = regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
+			   dwmac->ops->syscfg_eth_mask);
+
+	/* Update PMCSETR (set register) */
 	return regmap_update_bits(dwmac->regmap, reg,
 				 dwmac->ops->syscfg_eth_mask, val);
 }
@@ -180,7 +242,7 @@
 	}
 
 	return regmap_update_bits(dwmac->regmap, reg,
-				 dwmac->ops->syscfg_eth_mask, val);
+				 dwmac->ops->syscfg_eth_mask, val << 23);
 }
 
 static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
@@ -232,24 +294,29 @@
 static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 			       struct device *dev)
 {
+	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *np = dev->of_node;
+	int err = 0;
 
-	dwmac->int_phyclk = of_property_read_bool(np, "st,int-phyclk");
+	/* Gigabit Ethernet 125MHz clock selection. */
+	dwmac->eth_clk_sel_reg = of_property_read_bool(np, "st,eth-clk-sel");
 
-	/* Check if internal clk from RCC selected */
-	if (dwmac->int_phyclk) {
-		/*  Get ETH_CLK clocks */
-		dwmac->clk_eth_ck = devm_clk_get(dev, "eth-ck");
-		if (IS_ERR(dwmac->clk_eth_ck)) {
-			dev_err(dev, "No ETH CK clock provided...\n");
-			return PTR_ERR(dwmac->clk_eth_ck);
-		}
+	/* Ethernet 50Mhz RMII clock selection */
+	dwmac->eth_ref_clk_sel_reg =
+		of_property_read_bool(np, "st,eth-ref-clk-sel");
+
+	/*  Get ETH_CLK clocks */
+	dwmac->clk_eth_ck = devm_clk_get(dev, "eth-ck");
+	if (IS_ERR(dwmac->clk_eth_ck)) {
+		dev_warn(dev, "No phy clock provided...\n");
+		dwmac->clk_eth_ck = NULL;
 	}
 
 	/*  Clock used for low power mode */
 	dwmac->clk_ethstp = devm_clk_get(dev, "ethstp");
 	if (IS_ERR(dwmac->clk_ethstp)) {
-		dev_err(dev, "No ETH peripheral clock provided for CStop mode ...\n");
+		dev_err(dev,
+			"No ETH peripheral clock provided for CStop mode ...\n");
 		return PTR_ERR(dwmac->clk_ethstp);
 	}
 
@@ -260,7 +327,29 @@
 		return PTR_ERR(dwmac->syscfg_clk);
 	}
 
-	return 0;
+	/* Get IRQ information early to have an ability to ask for deferred
+	 * probe if needed before we went too far with resource allocation.
+	 */
+	dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev,
+							"stm32_pwr_wakeup");
+	if (dwmac->irq_pwr_wakeup == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
+	if (!dwmac->clk_eth_ck && dwmac->irq_pwr_wakeup >= 0) {
+		err = device_init_wakeup(&pdev->dev, true);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to init wake up irq\n");
+			return err;
+		}
+		err = dev_pm_set_dedicated_wake_irq(&pdev->dev,
+						    dwmac->irq_pwr_wakeup);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to set wake up irq\n");
+			device_init_wakeup(&pdev->dev, false);
+		}
+		device_set_wakeup_enable(&pdev->dev, false);
+	}
+	return err;
 }
 
 static int stm32_dwmac_probe(struct platform_device *pdev)
@@ -326,9 +415,15 @@
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	int ret = stmmac_dvr_remove(&pdev->dev);
+	struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
 
 	stm32_dwmac_clk_disable(priv->plat->bsp_priv);
 
+	if (dwmac->irq_pwr_wakeup >= 0) {
+		dev_pm_clear_wake_irq(&pdev->dev);
+		device_init_wakeup(&pdev->dev, false);
+	}
+
 	return ret;
 }
 
@@ -342,7 +437,7 @@
 
 	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->syscfg_clk);
-	if (dwmac->int_phyclk)
+	if (dwmac->clk_eth_ck)
 		clk_disable_unprepare(dwmac->clk_eth_ck);
 
 	return ret;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 0f660af..6e47be6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * dwmac-sun8i.c - Allwinner sun8i DWMAC specific glue layer
  *
  * Copyright (C) 2017 Corentin Labbe <clabbe.montjoie@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -147,6 +138,20 @@
 	.tx_delay_max = 7,
 };
 
+static const struct emac_variant emac_variant_h6 = {
+	.default_syscon_value = 0x50000,
+	.syscon_field = &sun8i_syscon_reg_field,
+	/* The "Internal PHY" of H6 is not on the die. It's on the
+	 * co-packaged AC200 chip instead.
+	 */
+	.soc_has_internal_phy = false,
+	.support_mii = true,
+	.support_rmii = true,
+	.support_rgmii = true,
+	.rx_delay_max = 31,
+	.tx_delay_max = 7,
+};
+
 #define EMAC_BASIC_CTL0 0x00
 #define EMAC_BASIC_CTL1 0x04
 #define EMAC_INT_STA    0x08
@@ -187,7 +192,7 @@
 
 /* Used in RX_CTL1*/
 #define EMAC_RX_MD              BIT(1)
-#define EMAC_RX_TH_MASK		GENMASK(4, 5)
+#define EMAC_RX_TH_MASK		GENMASK(5, 4)
 #define EMAC_RX_TH_32		0
 #define EMAC_RX_TH_64		(0x1 << 4)
 #define EMAC_RX_TH_96		(0x2 << 4)
@@ -198,7 +203,7 @@
 /* Used in TX_CTL1*/
 #define EMAC_TX_MD              BIT(1)
 #define EMAC_TX_NEXT_FRM        BIT(2)
-#define EMAC_TX_TH_MASK		GENMASK(8, 10)
+#define EMAC_TX_TH_MASK		GENMASK(10, 8)
 #define EMAC_TX_TH_64		0
 #define EMAC_TX_TH_128		(0x1 << 8)
 #define EMAC_TX_TH_192		(0x2 << 8)
@@ -284,18 +289,18 @@
 
 static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_rx_phy, u32 chan)
+				    dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* Write RX descriptors address */
-	writel(dma_rx_phy, ioaddr + EMAC_RX_DESC_LIST);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + EMAC_RX_DESC_LIST);
 }
 
 static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_tx_phy, u32 chan)
+				    dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* Write TX descriptors address */
-	writel(dma_tx_phy, ioaddr + EMAC_TX_DESC_LIST);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + EMAC_TX_DESC_LIST);
 }
 
 /* sun8i_dwmac_dump_regs() - Dump EMAC address space
@@ -646,7 +651,8 @@
 			}
 		}
 	} else {
-		netdev_info(dev, "Too many address, switching to promiscuous\n");
+		if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL))
+			netdev_info(dev, "Too many address, switching to promiscuous\n");
 		v = EMAC_FRM_FLT_RXALL;
 	}
 
@@ -868,7 +874,12 @@
 	int ret;
 	u32 reg, val;
 
-	regmap_field_read(gmac->regmap_field, &val);
+	ret = regmap_field_read(gmac->regmap_field, &val);
+	if (ret) {
+		dev_err(priv->device, "Fail to read from regmap field.\n");
+		return ret;
+	}
+
 	reg = gmac->variant->default_syscon_value;
 	if (reg != val)
 		dev_warn(priv->device,
@@ -893,6 +904,11 @@
 		 * address. No need to mask it again.
 		 */
 		reg |= 1 << H3_EPHY_ADDR_SHIFT;
+	} else {
+		/* For SoCs without internal PHY the PHY selection bit should be
+		 * set to 0 (external PHY).
+		 */
+		reg &= ~H3_EPHY_SELECT;
 	}
 
 	if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
@@ -986,6 +1002,18 @@
 		regulator_disable(gmac->regulator);
 }
 
+static void sun8i_dwmac_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + EMAC_BASIC_CTL0);
+
+	if (enable)
+		value |= EMAC_LOOPBACK;
+	else
+		value &= ~EMAC_LOOPBACK;
+
+	writel(value, ioaddr + EMAC_BASIC_CTL0);
+}
+
 static const struct stmmac_ops sun8i_dwmac_ops = {
 	.core_init = sun8i_dwmac_core_init,
 	.set_mac = sun8i_dwmac_set_mac,
@@ -995,6 +1023,7 @@
 	.flow_ctrl = sun8i_dwmac_flow_ctrl,
 	.set_umac_addr = sun8i_dwmac_set_umac_addr,
 	.get_umac_addr = sun8i_dwmac_get_umac_addr,
+	.set_mac_loopback = sun8i_dwmac_set_mac_loopback,
 };
 
 static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
@@ -1015,6 +1044,8 @@
 	mac->mac = &sun8i_dwmac_ops;
 	mac->dma = &sun8i_dwmac_dma_ops;
 
+	priv->dev->priv_flags |= IFF_UNICAST_FLT;
+
 	/* The loopback bit seems to be re-set when link change
 	 * Simply mask it each time
 	 * Speed 10/100/1000 are set in BIT(2)/BIT(3)
@@ -1147,7 +1178,10 @@
 		return ret;
 	}
 
-	plat_dat->interface = of_get_phy_mode(dev->of_node);
+	ret = of_get_phy_mode(dev->of_node);
+	if (ret < 0)
+		return -EINVAL;
+	plat_dat->interface = ret;
 
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers.
@@ -1192,7 +1226,7 @@
 dwmac_mux:
 	sun8i_dwmac_unset_syscon(gmac);
 dwmac_exit:
-	sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+	stmmac_pltfr_remove(pdev);
 return ret;
 }
 
@@ -1207,6 +1241,8 @@
 		.data = &emac_variant_r40 },
 	{ .compatible = "allwinner,sun50i-a64-emac",
 		.data = &emac_variant_a64 },
+	{ .compatible = "allwinner,sun50i-h6-emac",
+		.data = &emac_variant_h6 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index d07520f..a299da3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c

@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * dwmac-sunxi.c - Allwinner sunxi DWMAC specific glue layer
  *
  * Copyright (C) 2013 Chen-Yu Tsai
  *
  * Chen-Yu Tsai  <wens@csie.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/stmmac.h>
@@ -59,7 +50,9 @@
 		gmac->clk_enabled = 1;
 	} else {
 		clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE);
-		clk_prepare(gmac->tx_clk);
+		ret = clk_prepare(gmac->tx_clk);
+		if (ret)
+			return ret;
 	}
 
 	return 0;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100.h b/drivers/net/ethernet/stmicro/stmmac/dwmac100.h
index e149848..35ab8d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100.h

@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   MAC 10/100 Header File
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 184ca13..b70d44a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h

@@ -1,17 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -146,6 +136,7 @@
 #define GMAC_FRAME_FILTER_DAIF	0x00000008	/* DA Inverse Filtering */
 #define GMAC_FRAME_FILTER_PM	0x00000010	/* Pass all multicast */
 #define GMAC_FRAME_FILTER_DBF	0x00000020	/* Disable Broadcast frames */
+#define GMAC_FRAME_FILTER_PCF	0x00000080	/* Pass Control frames */
 #define GMAC_FRAME_FILTER_SAIF	0x00000100	/* Inverse Filtering */
 #define GMAC_FRAME_FILTER_SAF	0x00000200	/* Source Address Filter */
 #define GMAC_FRAME_FILTER_HPF	0x00000400	/* Hash or perfect Filter */

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 0877bde..3d69da1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This is the driver for the GMAC on-chip Ethernet controller for ST SoCs.
   DWC Ether MAC 10/100/1000 Universal version 3.41a  has been used for
@@ -7,17 +8,6 @@
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -172,7 +162,7 @@
 	memset(mc_filter, 0, sizeof(mc_filter));
 
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_FRAME_FILTER_PR;
+		value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF;
 	} else if (dev->flags & IFF_ALLMULTI) {
 		value = GMAC_FRAME_FILTER_PM;	/* pass all multi */
 	} else if (!netdev_mc_empty(dev)) {
@@ -198,6 +188,7 @@
 		}
 	}
 
+	value |= GMAC_FRAME_FILTER_HPF;
 	dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2);
 
 	/* Handle multiple unicast addresses (perfect filtering) */
@@ -216,6 +207,12 @@
 					    GMAC_ADDR_LOW(reg));
 			reg++;
 		}
+
+		while (reg <= perfect_addr_number) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 #ifdef FRAME_FILTER_DEBUG
@@ -499,6 +496,18 @@
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONTROL);
+
+	if (enable)
+		value |= GMAC_CONTROL_LM;
+	else
+		value &= ~GMAC_CONTROL_LM;
+
+	writel(value, ioaddr + GMAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac1000_ops = {
 	.core_init = dwmac1000_core_init,
 	.set_mac = stmmac_set_mac,
@@ -518,6 +527,7 @@
 	.pcs_ctrl_ane = dwmac1000_ctrl_ane,
 	.pcs_rane = dwmac1000_rane,
 	.pcs_get_adv_lp = dwmac1000_get_adv_lp,
+	.set_mac_loopback = dwmac1000_set_mac_loopback,
 };
 
 int dwmac1000_setup(struct stmmac_priv *priv)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index aacc4aa..2bac49b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This is the driver for the GMAC on-chip Ethernet controller for ST SoCs.
   DWC Ether MAC 10/100/1000 Universal version 3.41a  has been used for
@@ -7,17 +8,6 @@
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -122,18 +112,18 @@
 
 static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
 				  struct stmmac_dma_cfg *dma_cfg,
-				  u32 dma_rx_phy, u32 chan)
+				  dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base address list must be written into DMA CSR3 */
-	writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
 }
 
 static void dwmac1000_dma_init_tx(void __iomem *ioaddr,
 				  struct stmmac_dma_cfg *dma_cfg,
-				  u32 dma_tx_phy, u32 chan)
+				  dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base address list must be written into DMA CSR4 */
-	writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
 }
 
 static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index b735143..ebcad8d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This is the driver for the MAC 10/100 on-chip Ethernet controller
   currently tested on all the ST boards based on STb7109 and stx7200 SoCs.
@@ -9,17 +10,6 @@
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -160,6 +150,18 @@
 	return;
 }
 
+static void dwmac100_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + MAC_CONTROL);
+
+	if (enable)
+		value |= MAC_CONTROL_OM;
+	else
+		value &= ~MAC_CONTROL_OM;
+
+	writel(value, ioaddr + MAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac100_ops = {
 	.core_init = dwmac100_core_init,
 	.set_mac = stmmac_set_mac,
@@ -171,6 +173,7 @@
 	.pmt = dwmac100_pmt,
 	.set_umac_addr = dwmac100_set_umac_addr,
 	.get_umac_addr = dwmac100_get_umac_addr,
+	.set_mac_loopback = dwmac100_set_mac_loopback,
 };
 
 int dwmac100_setup(struct stmmac_priv *priv)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 21dee25..8f0d9bc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This is the driver for the MAC 10/100 on-chip Ethernet controller
   currently tested on all the ST boards based on STb7109 and stx7200 SoCs.
@@ -9,17 +10,6 @@
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -41,18 +31,18 @@
 
 static void dwmac100_dma_init_rx(void __iomem *ioaddr,
 				 struct stmmac_dma_cfg *dma_cfg,
-				 u32 dma_rx_phy, u32 chan)
+				 dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base addr lists must be written into DMA CSR3 */
-	writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR);
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
 }
 
 static void dwmac100_dma_init_tx(void __iomem *ioaddr,
 				 struct stmmac_dma_cfg *dma_cfg,
-				 u32 dma_tx_phy, u32 chan)
+				 dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base addr lists must be written into DMA CSR4 */
-	writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR);
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
 }
 
 /* Store and Forward capability is not used at all.

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index eb013d5..89a3420 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h

@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * DWMAC4 Header file.
  *
  * Copyright (C) 2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -18,9 +15,11 @@
 /*  MAC registers */
 #define GMAC_CONFIG			0x00000000
 #define GMAC_PACKET_FILTER		0x00000008
-#define GMAC_HASH_TAB_0_31		0x00000010
-#define GMAC_HASH_TAB_32_63		0x00000014
+#define GMAC_HASH_TAB(x)		(0x10 + (x) * 4)
+#define GMAC_VLAN_TAG			0x00000050
+#define GMAC_VLAN_HASH_TABLE		0x00000058
 #define GMAC_RX_FLOW_CTRL		0x00000090
+#define GMAC_VLAN_INCL			0x00000060
 #define GMAC_QX_TX_FLOW_CTRL(x)		(0x70 + x * 4)
 #define GMAC_TXQ_PRTY_MAP0		0x98
 #define GMAC_TXQ_PRTY_MAP1		0x9C
@@ -41,6 +40,7 @@
 #define GMAC_HW_FEATURE3		0x00000128
 #define GMAC_MDIO_ADDR			0x00000200
 #define GMAC_MDIO_DATA			0x00000204
+#define GMAC_ARP_ADDR			0x00000210
 #define GMAC_ADDR_HIGH(reg)		(0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)		(0x304 + reg * 8)
 
@@ -64,9 +64,24 @@
 #define GMAC_PACKET_FILTER_PR		BIT(0)
 #define GMAC_PACKET_FILTER_HMC		BIT(2)
 #define GMAC_PACKET_FILTER_PM		BIT(4)
+#define GMAC_PACKET_FILTER_PCF		BIT(7)
+#define GMAC_PACKET_FILTER_HPF		BIT(10)
+#define GMAC_PACKET_FILTER_VTFE		BIT(16)
 
 #define GMAC_MAX_PERFECT_ADDRESSES	128
 
+/* MAC VLAN */
+#define GMAC_VLAN_EDVLP			BIT(26)
+#define GMAC_VLAN_VTHM			BIT(25)
+#define GMAC_VLAN_DOVLTC		BIT(20)
+#define GMAC_VLAN_ESVL			BIT(18)
+#define GMAC_VLAN_ETV			BIT(16)
+#define GMAC_VLAN_VID			GENMASK(15, 0)
+#define GMAC_VLAN_VLTI			BIT(20)
+#define GMAC_VLAN_CSVL			BIT(19)
+#define GMAC_VLAN_VLC			GENMASK(17, 16)
+#define GMAC_VLAN_VLC_SHIFT		16
+
 /* MAC RX Queue Enable */
 #define GMAC_RX_QUEUE_CLEAR(queue)	~(GENMASK(1, 0) << ((queue) * 2))
 #define GMAC_RX_AV_QUEUE_ENABLE(queue)	BIT((queue) * 2)
@@ -151,6 +166,9 @@
 #define GMAC_DEBUG_RPESTS		BIT(0)
 
 /* MAC config */
+#define GMAC_CONFIG_ARPEN		BIT(31)
+#define GMAC_CONFIG_SARC		GENMASK(30, 28)
+#define GMAC_CONFIG_SARC_SHIFT		28
 #define GMAC_CONFIG_IPC			BIT(27)
 #define GMAC_CONFIG_2K			BIT(22)
 #define GMAC_CONFIG_ACS			BIT(20)
@@ -160,16 +178,19 @@
 #define GMAC_CONFIG_PS			BIT(15)
 #define GMAC_CONFIG_FES			BIT(14)
 #define GMAC_CONFIG_DM			BIT(13)
+#define GMAC_CONFIG_LM			BIT(12)
 #define GMAC_CONFIG_DCRS		BIT(9)
 #define GMAC_CONFIG_TE			BIT(1)
 #define GMAC_CONFIG_RE			BIT(0)
 
 /* MAC HW features0 bitmap */
+#define GMAC_HW_FEAT_SAVLANINS		BIT(27)
 #define GMAC_HW_FEAT_ADDMAC		BIT(18)
 #define GMAC_HW_FEAT_RXCOESEL		BIT(16)
 #define GMAC_HW_FEAT_TXCOSEL		BIT(14)
 #define GMAC_HW_FEAT_EEESEL		BIT(13)
 #define GMAC_HW_FEAT_TSSEL		BIT(12)
+#define GMAC_HW_FEAT_ARPOFFSEL		BIT(9)
 #define GMAC_HW_FEAT_MMCSEL		BIT(8)
 #define GMAC_HW_FEAT_MGKSEL		BIT(7)
 #define GMAC_HW_FEAT_RWKSEL		BIT(6)
@@ -181,6 +202,7 @@
 #define GMAC_HW_FEAT_MIISEL		BIT(0)
 
 /* MAC HW features1 bitmap */
+#define GMAC_HW_HASH_TB_SZ		GENMASK(25, 24)
 #define GMAC_HW_FEAT_AVSEL		BIT(20)
 #define GMAC_HW_TSOEN			BIT(18)
 #define GMAC_HW_TXFIFOSIZE		GENMASK(10, 6)
@@ -198,6 +220,7 @@
 #define GMAC_HW_FEAT_FRPES		GENMASK(14, 13)
 #define GMAC_HW_FEAT_FRPBS		GENMASK(12, 11)
 #define GMAC_HW_FEAT_FRPSEL		BIT(10)
+#define GMAC_HW_FEAT_DVLAN		BIT(5)
 
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS			GENMASK(18, 16)
@@ -352,7 +375,8 @@
 
 /* Default operating mode of the MAC */
 #define GMAC_CORE_INIT (GMAC_CONFIG_JD | GMAC_CONFIG_PS | \
-			GMAC_CONFIG_BE | GMAC_CONFIG_DCRS)
+			GMAC_CONFIG_BE | GMAC_CONFIG_DCRS | \
+			GMAC_CONFIG_JE)
 
 /* To dump the core regs excluding  the Address Registers */
 #define	GMAC_REG_NUM	132

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 7e5d5db..66e60c7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the driver for the GMAC on-chip Ethernet controller for ST SoCs.
  * DWC Ether MAC version 4.00  has been used for developing this code.
@@ -6,10 +7,6 @@
  *
  * Copyright (C) 2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -28,15 +25,9 @@
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 value = readl(ioaddr + GMAC_CONFIG);
-	int mtu = dev->mtu;
 
 	value |= GMAC_CORE_INIT;
 
-	if (mtu > 1500)
-		value |= GMAC_CONFIG_2K;
-	if (mtu > 2000)
-		value |= GMAC_CONFIG_JE;
-
 	if (hw->ps) {
 		value |= GMAC_CONFIG_TE;
 
@@ -88,6 +79,8 @@
 	u32 value;
 
 	base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
+	if (queue >= 4)
+		queue -= 4;
 
 	value = readl(ioaddr + base_register);
 
@@ -105,6 +98,8 @@
 	u32 value;
 
 	base_register = (queue < 4) ? GMAC_TXQ_PRTY_MAP0 : GMAC_TXQ_PRTY_MAP1;
+	if (queue >= 4)
+		queue -= 4;
 
 	value = readl(ioaddr + base_register);
 
@@ -192,6 +187,8 @@
 	default:
 		break;
 	}
+
+	writel(value, ioaddr + MTL_OPERATION_MODE);
 }
 
 static void dwmac4_set_mtl_tx_queue_weight(struct mac_device_info *hw,
@@ -401,57 +398,75 @@
 			      struct net_device *dev)
 {
 	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
-	unsigned int value = 0;
+	int numhashregs = (hw->multicast_filter_bins >> 5);
+	int mcbitslog2 = hw->mcast_bits_log2;
+	unsigned int value;
+	u32 mc_filter[8];
+	int i;
 
+	memset(mc_filter, 0, sizeof(mc_filter));
+
+	value = readl(ioaddr + GMAC_PACKET_FILTER);
+	value &= ~GMAC_PACKET_FILTER_HMC;
+	value &= ~GMAC_PACKET_FILTER_HPF;
+	value &= ~GMAC_PACKET_FILTER_PCF;
+	value &= ~GMAC_PACKET_FILTER_PM;
+	value &= ~GMAC_PACKET_FILTER_PR;
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_PACKET_FILTER_PR;
+		value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
-			(netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
+		   (netdev_mc_count(dev) > hw->multicast_filter_bins)) {
 		/* Pass all multi */
-		value = GMAC_PACKET_FILTER_PM;
-		/* Set the 64 bits of the HASH tab. To be updated if taller
-		 * hash table is used
-		 */
-		writel(0xffffffff, ioaddr + GMAC_HASH_TAB_0_31);
-		writel(0xffffffff, ioaddr + GMAC_HASH_TAB_32_63);
+		value |= GMAC_PACKET_FILTER_PM;
+		/* Set all the bits of the HASH tab */
+		memset(mc_filter, 0xff, sizeof(mc_filter));
 	} else if (!netdev_mc_empty(dev)) {
-		u32 mc_filter[2];
 		struct netdev_hw_addr *ha;
 
 		/* Hash filter for multicast */
-		value = GMAC_PACKET_FILTER_HMC;
+		value |= GMAC_PACKET_FILTER_HMC;
 
-		memset(mc_filter, 0, sizeof(mc_filter));
 		netdev_for_each_mc_addr(ha, dev) {
-			/* The upper 6 bits of the calculated CRC are used to
-			 * index the content of the Hash Table Reg 0 and 1.
+			/* The upper n bits of the calculated CRC are used to
+			 * index the contents of the hash table. The number of
+			 * bits used depends on the hardware configuration
+			 * selected at core configuration time.
 			 */
-			int bit_nr =
-				(bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26);
-			/* The most significant bit determines the register
-			 * to use while the other 5 bits determines the bit
-			 * within the selected register
+			u32 bit_nr = bitrev32(~crc32_le(~0, ha->addr,
+					ETH_ALEN)) >> (32 - mcbitslog2);
+			/* The most significant bit determines the register to
+			 * use (H/L) while the other 5 bits determine the bit
+			 * within the register.
 			 */
-			mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1F));
+			mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f));
 		}
-		writel(mc_filter[0], ioaddr + GMAC_HASH_TAB_0_31);
-		writel(mc_filter[1], ioaddr + GMAC_HASH_TAB_32_63);
 	}
 
+	for (i = 0; i < numhashregs; i++)
+		writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i));
+
+	value |= GMAC_PACKET_FILTER_HPF;
+
 	/* Handle multiple unicast addresses */
-	if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
+	if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
 		/* Switch to promiscuous mode if more than 128 addrs
 		 * are required
 		 */
 		value |= GMAC_PACKET_FILTER_PR;
-	} else if (!netdev_uc_empty(dev)) {
-		int reg = 1;
+	} else {
 		struct netdev_hw_addr *ha;
+		int reg = 1;
 
 		netdev_for_each_uc_addr(ha, dev) {
 			dwmac4_set_umac_addr(hw, ha->addr, reg);
 			reg++;
 		}
+
+		while (reg < GMAC_MAX_PERFECT_ADDRESSES) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 	writel(value, ioaddr + GMAC_PACKET_FILTER);
@@ -469,8 +484,9 @@
 	if (fc & FLOW_RX) {
 		pr_debug("\tReceive Flow-Control ON\n");
 		flow |= GMAC_RX_FLOW_CTRL_RFE;
-		writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
 	}
+	writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
+
 	if (fc & FLOW_TX) {
 		pr_debug("\tTransmit Flow-Control ON\n");
 
@@ -478,7 +494,7 @@
 			pr_debug("\tduplex mode: PAUSE %d\n", pause_time);
 
 		for (queue = 0; queue < tx_cnt; queue++) {
-			flow |= GMAC_TX_FLOW_CTRL_TFE;
+			flow = GMAC_TX_FLOW_CTRL_TFE;
 
 			if (duplex)
 				flow |=
@@ -486,6 +502,9 @@
 
 			writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
 		}
+	} else {
+		for (queue = 0; queue < tx_cnt; queue++)
+			writel(0, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
 	}
 }
 
@@ -701,6 +720,85 @@
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONFIG);
+
+	if (enable)
+		value |= GMAC_CONFIG_LM;
+	else
+		value &= ~GMAC_CONFIG_LM;
+
+	writel(value, ioaddr + GMAC_CONFIG);
+}
+
+static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash,
+				    bool is_double)
+{
+	void __iomem *ioaddr = hw->pcsr;
+
+	writel(hash, ioaddr + GMAC_VLAN_HASH_TABLE);
+
+	if (hash) {
+		u32 value = GMAC_VLAN_VTHM | GMAC_VLAN_ETV;
+		if (is_double) {
+			value |= GMAC_VLAN_EDVLP;
+			value |= GMAC_VLAN_ESVL;
+			value |= GMAC_VLAN_DOVLTC;
+		}
+
+		writel(value, ioaddr + GMAC_VLAN_TAG);
+	} else {
+		u32 value = readl(ioaddr + GMAC_VLAN_TAG);
+
+		value &= ~(GMAC_VLAN_VTHM | GMAC_VLAN_ETV);
+		value &= ~(GMAC_VLAN_EDVLP | GMAC_VLAN_ESVL);
+		value &= ~GMAC_VLAN_DOVLTC;
+		value &= ~GMAC_VLAN_VID;
+
+		writel(value, ioaddr + GMAC_VLAN_TAG);
+	}
+}
+
+static void dwmac4_sarc_configure(void __iomem *ioaddr, int val)
+{
+	u32 value = readl(ioaddr + GMAC_CONFIG);
+
+	value &= ~GMAC_CONFIG_SARC;
+	value |= val << GMAC_CONFIG_SARC_SHIFT;
+
+	writel(value, ioaddr + GMAC_CONFIG);
+}
+
+static void dwmac4_enable_vlan(struct mac_device_info *hw, u32 type)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + GMAC_VLAN_INCL);
+	value |= GMAC_VLAN_VLTI;
+	value |= GMAC_VLAN_CSVL; /* Only use SVLAN */
+	value &= ~GMAC_VLAN_VLC;
+	value |= (type << GMAC_VLAN_VLC_SHIFT) & GMAC_VLAN_VLC;
+	writel(value, ioaddr + GMAC_VLAN_INCL);
+}
+
+static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en,
+				   u32 addr)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	writel(addr, ioaddr + GMAC_ARP_ADDR);
+
+	value = readl(ioaddr + GMAC_CONFIG);
+	if (en)
+		value |= GMAC_CONFIG_ARPEN;
+	else
+		value &= ~GMAC_CONFIG_ARPEN;
+	writel(value, ioaddr + GMAC_CONFIG);
+}
+
 const struct stmmac_ops dwmac4_ops = {
 	.core_init = dwmac4_core_init,
 	.set_mac = stmmac_set_mac,
@@ -730,6 +828,11 @@
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
+	.update_vlan_hash = dwmac4_update_vlan_hash,
+	.sarc_configure = dwmac4_sarc_configure,
+	.enable_vlan = dwmac4_enable_vlan,
+	.set_arp_offload = dwmac4_set_arp_offload,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -761,6 +864,11 @@
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
+	.update_vlan_hash = dwmac4_update_vlan_hash,
+	.sarc_configure = dwmac4_sarc_configure,
+	.enable_vlan = dwmac4_enable_vlan,
+	.set_arp_offload = dwmac4_set_arp_offload,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -797,6 +905,11 @@
 	.safety_feat_dump = dwmac5_safety_feat_dump,
 	.rxp_config = dwmac5_rxp_config,
 	.flex_pps_config = dwmac5_flex_pps_config,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
+	.update_vlan_hash = dwmac4_update_vlan_hash,
+	.sarc_configure = dwmac4_sarc_configure,
+	.enable_vlan = dwmac4_enable_vlan,
+	.set_arp_offload = dwmac4_set_arp_offload,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 20299f6..15eb1ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c

@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This contains the functions to handle the descriptors for DesignWare databook
  * 4.xx.
  *
  * Copyright (C) 2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -241,15 +238,18 @@
 static int dwmac4_rx_check_timestamp(void *desc)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
+	unsigned int rdes0 = le32_to_cpu(p->des0);
+	unsigned int rdes1 = le32_to_cpu(p->des1);
+	unsigned int rdes3 = le32_to_cpu(p->des3);
 	u32 own, ctxt;
 	int ret = 1;
 
-	own = p->des3 & RDES3_OWN;
-	ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR)
+	own = rdes3 & RDES3_OWN;
+	ctxt = ((rdes3 & RDES3_CONTEXT_DESCRIPTOR)
 		>> RDES3_CONTEXT_DESCRIPTOR_SHIFT);
 
 	if (likely(!own && ctxt)) {
-		if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff))
+		if ((rdes0 == 0xffffffff) && (rdes1 == 0xffffffff))
 			/* Corrupted value */
 			ret = -EINVAL;
 		else
@@ -268,7 +268,7 @@
 	int ret = -EINVAL;
 
 	/* Get the status from normal w/b descriptor */
-	if (likely(p->des3 & TDES3_RS1V)) {
+	if (likely(le32_to_cpu(p->des3) & RDES3_RDES1_VALID)) {
 		if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) {
 			int i = 0;
 
@@ -293,7 +293,7 @@
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				   int mode, int end)
+				   int mode, int end, int bfsize)
 {
 	dwmac4_set_rx_owner(p, disable_rx_ic);
 }
@@ -443,6 +443,55 @@
 	p->des3 = 0;
 }
 
+static void dwmac4_set_sarc(struct dma_desc *p, u32 sarc_type)
+{
+	sarc_type <<= TDES3_SA_INSERT_CTRL_SHIFT;
+
+	p->des3 |= cpu_to_le32(sarc_type & TDES3_SA_INSERT_CTRL_MASK);
+}
+
+static int set_16kib_bfsize(int mtu)
+{
+	int ret = 0;
+
+	if (unlikely(mtu >= BUF_SIZE_8KiB))
+		ret = BUF_SIZE_16KiB;
+	return ret;
+}
+
+static void dwmac4_set_vlan_tag(struct dma_desc *p, u16 tag, u16 inner_tag,
+				u32 inner_type)
+{
+	p->des0 = 0;
+	p->des1 = 0;
+	p->des2 = 0;
+	p->des3 = 0;
+
+	/* Inner VLAN */
+	if (inner_type) {
+		u32 des = inner_tag << TDES2_IVT_SHIFT;
+
+		des &= TDES2_IVT_MASK;
+		p->des2 = cpu_to_le32(des);
+
+		des = inner_type << TDES3_IVTIR_SHIFT;
+		des &= TDES3_IVTIR_MASK;
+		p->des3 = cpu_to_le32(des | TDES3_IVLTV);
+	}
+
+	/* Outer VLAN */
+	p->des3 |= cpu_to_le32(tag & TDES3_VLAN_TAG);
+	p->des3 |= cpu_to_le32(TDES3_VLTV);
+
+	p->des3 |= cpu_to_le32(TDES3_CONTEXT_TYPE);
+}
+
+static void dwmac4_set_vlan(struct dma_desc *p, u32 type)
+{
+	type <<= TDES2_VLAN_TAG_SHIFT;
+	p->des2 |= cpu_to_le32(type & TDES2_VLAN_TAG_MASK);
+}
+
 const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.tx_status = dwmac4_wrback_get_tx_status,
 	.rx_status = dwmac4_wrback_get_rx_status,
@@ -467,6 +516,11 @@
 	.get_addr = dwmac4_get_addr,
 	.set_addr = dwmac4_set_addr,
 	.clear = dwmac4_clear,
+	.set_sarc = dwmac4_set_sarc,
+	.set_vlan_tag = dwmac4_set_vlan_tag,
+	.set_vlan = dwmac4_set_vlan,
 };
 
-const struct stmmac_mode_ops dwmac4_ring_mode_ops = { };
+const struct stmmac_mode_ops dwmac4_ring_mode_ops = {
+	.set_16kib_bfsize = set_16kib_bfsize,
+};

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
index 9736c50..0d7b3bb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h

@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Header File to describe the DMA descriptors and related definitions specific
  * for DesignWare databook 4.xx.
  *
  * Copyright (C) 2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -21,13 +18,21 @@
 /* TDES2 (read format) */
 #define TDES2_BUFFER1_SIZE_MASK		GENMASK(13, 0)
 #define TDES2_VLAN_TAG_MASK		GENMASK(15, 14)
+#define TDES2_VLAN_TAG_SHIFT		14
 #define TDES2_BUFFER2_SIZE_MASK		GENMASK(29, 16)
 #define TDES2_BUFFER2_SIZE_MASK_SHIFT	16
+#define TDES3_IVTIR_MASK		GENMASK(19, 18)
+#define TDES3_IVTIR_SHIFT		18
+#define TDES3_IVLTV			BIT(17)
 #define TDES2_TIMESTAMP_ENABLE		BIT(30)
+#define TDES2_IVT_MASK			GENMASK(31, 16)
+#define TDES2_IVT_SHIFT			16
 #define TDES2_INTERRUPT_ON_COMPLETION	BIT(31)
 
 /* TDES3 (read format) */
 #define TDES3_PACKET_SIZE_MASK		GENMASK(14, 0)
+#define TDES3_VLAN_TAG			GENMASK(15, 0)
+#define TDES3_VLTV			BIT(16)
 #define TDES3_CHECKSUM_INSERTION_MASK	GENMASK(17, 16)
 #define TDES3_CHECKSUM_INSERTION_SHIFT	16
 #define TDES3_TCP_PKT_PAYLOAD_MASK	GENMASK(17, 0)
@@ -35,6 +40,7 @@
 #define TDES3_HDR_LEN_SHIFT		19
 #define TDES3_SLOT_NUMBER_MASK		GENMASK(22, 19)
 #define TDES3_SA_INSERT_CTRL_MASK	GENMASK(25, 23)
+#define TDES3_SA_INSERT_CTRL_SHIFT	23
 #define TDES3_CRC_PAD_CTRL_MASK		GENMASK(27, 26)
 
 /* TDES3 (write back format) */

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index edb6053..68c1579 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * This is the driver for the GMAC on-chip Ethernet controller for ST SoCs.
  * DWC Ether MAC version 4.xx  has been used for  developing this code.
@@ -6,10 +7,6 @@
  *
  * Copyright (C) 2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -73,7 +70,7 @@
 
 static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_rx_phy, u32 chan)
+				    dma_addr_t dma_rx_phy, u32 chan)
 {
 	u32 value;
 	u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
@@ -82,12 +79,12 @@
 	value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
-	writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
+	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
 }
 
 static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 				    struct stmmac_dma_cfg *dma_cfg,
-				    u32 dma_tx_phy, u32 chan)
+				    dma_addr_t dma_tx_phy, u32 chan)
 {
 	u32 value;
 	u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
@@ -100,7 +97,7 @@
 
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
-	writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
+	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
 }
 
 static void dwmac4_dma_init_channel(void __iomem *ioaddr,
@@ -336,7 +333,7 @@
 	dma_cap->mbps_10_100 = (hw_cap & GMAC_HW_FEAT_MIISEL);
 	dma_cap->mbps_1000 = (hw_cap & GMAC_HW_FEAT_GMIISEL) >> 1;
 	dma_cap->half_duplex = (hw_cap & GMAC_HW_FEAT_HDSEL) >> 2;
-	dma_cap->hash_filter = (hw_cap & GMAC_HW_FEAT_VLHASH) >> 4;
+	dma_cap->vlhash = (hw_cap & GMAC_HW_FEAT_VLHASH) >> 4;
 	dma_cap->multi_addr = (hw_cap & GMAC_HW_FEAT_ADDMAC) >> 18;
 	dma_cap->pcs = (hw_cap & GMAC_HW_FEAT_PCSSEL) >> 3;
 	dma_cap->sma_mdio = (hw_cap & GMAC_HW_FEAT_SMASEL) >> 5;
@@ -351,9 +348,12 @@
 	/* TX and RX csum */
 	dma_cap->tx_coe = (hw_cap & GMAC_HW_FEAT_TXCOSEL) >> 14;
 	dma_cap->rx_coe =  (hw_cap & GMAC_HW_FEAT_RXCOESEL) >> 16;
+	dma_cap->vlins = (hw_cap & GMAC_HW_FEAT_SAVLANINS) >> 27;
+	dma_cap->arpoffsel = (hw_cap & GMAC_HW_FEAT_ARPOFFSEL) >> 9;
 
 	/* MAC HW feature1 */
 	hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
+	dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24;
 	dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
 	dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
 	/* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
@@ -387,6 +387,7 @@
 	dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13;
 	dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11;
 	dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10;
+	dma_cap->dvlan = (hw_cap & GMAC_HW_FEAT_DVLAN) >> 5;
 }
 
 /* Enable/disable TSO feature and set MSS */

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 22a4a6d..b66da02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * DWMAC4 DMA Header file.
  *
- *
  * Copyright (C) 2007-2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 49f5687..f2a29a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c

@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2007-2015  STMicroelectronics Ltd
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
  * Author: Alexandre Torgue <alexandre.torgue@st.com>
  */
 
@@ -88,10 +85,6 @@
 
 	value &= ~DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
-
-	value = readl(ioaddr + GMAC_CONFIG);
-	value &= ~GMAC_CONFIG_RE;
-	writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
@@ -124,9 +117,9 @@
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
 			 struct stmmac_extra_stats *x, u32 chan)
 {
-	int ret = 0;
-
 	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
+	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+	int ret = 0;
 
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) {
@@ -151,16 +144,11 @@
 	if (likely(intr_status & DMA_CHAN_STATUS_NIS)) {
 		x->normal_irq_n++;
 		if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
-			u32 value;
-
-			value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
-			/* to schedule NAPI on real RIE event. */
-			if (likely(value & DMA_CHAN_INTR_ENA_RIE)) {
-				x->rx_normal_irq_n++;
-				ret |= handle_rx;
-			}
+			x->rx_normal_irq_n++;
+			ret |= handle_rx;
 		}
-		if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
+		if (likely(intr_status & (DMA_CHAN_STATUS_TI |
+					  DMA_CHAN_STATUS_TBU))) {
 			x->tx_normal_irq_n++;
 			ret |= handle_tx;
 		}
@@ -168,12 +156,7 @@
 			x->rx_early_irq++;
 	}
 
-	/* Clear the interrupt by writing a logic 1 to the chanX interrupt
-	 * status [21-0] expect reserved bits [5-3]
-	 */
-	writel((intr_status & 0x3fffc7),
-	       ioaddr + DMA_CHAN_STATUS(chan));
-
+	writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan));
 	return ret;
 }
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 3f4f313..e436fa1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c

@@ -515,6 +515,7 @@
 
 	if (!enable) {
 		val |= PPSCMDx(index, 0x5);
+		val |= PPSEN0;
 		writel(val, ioaddr + MAC_PPS_CONTROL);
 		return 0;
 	}

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 775db77..23fecf6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 // Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
 // stmmac Support for 5.xx Ethernet QoS cores
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index adc5400..292b880 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h

@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   DWMAC DMA Header file.
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 7516ca2..1bc25aa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c

@@ -1,17 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 0a80fa2..9d08a93 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 /*
  * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
  * stmmac XGMAC definitions.
@@ -15,10 +15,14 @@
 /* MAC Registers */
 #define XGMAC_TX_CONFIG			0x00000000
 #define XGMAC_CONFIG_SS_OFF		29
-#define XGMAC_CONFIG_SS_MASK		GENMASK(30, 29)
+#define XGMAC_CONFIG_SS_MASK		GENMASK(31, 29)
 #define XGMAC_CONFIG_SS_10000		(0x0 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_2500		(0x2 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_1000		(0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500_GMII	(0x2 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_1000_GMII	(0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_100_MII		(0x4 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_5000		(0x5 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500		(0x6 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_10_MII		(0x7 << XGMAC_CONFIG_SS_OFF)
 #define XGMAC_CONFIG_SARC		GENMASK(22, 20)
 #define XGMAC_CONFIG_SARC_SHIFT		20
 #define XGMAC_CONFIG_JD			BIT(16)
@@ -28,7 +32,11 @@
 #define XGMAC_CONFIG_ARPEN		BIT(31)
 #define XGMAC_CONFIG_GPSL		GENMASK(29, 16)
 #define XGMAC_CONFIG_GPSL_SHIFT		16
+#define XGMAC_CONFIG_HDSMS		GENMASK(14, 12)
+#define XGMAC_CONFIG_HDSMS_SHIFT	12
+#define XGMAC_CONFIG_HDSMS_256		(0x2 << XGMAC_CONFIG_HDSMS_SHIFT)
 #define XGMAC_CONFIG_S2KP		BIT(11)
+#define XGMAC_CONFIG_LM			BIT(10)
 #define XGMAC_CONFIG_IPC		BIT(9)
 #define XGMAC_CONFIG_JE			BIT(8)
 #define XGMAC_CONFIG_WD			BIT(7)
@@ -36,13 +44,32 @@
 #define XGMAC_CONFIG_CST		BIT(2)
 #define XGMAC_CONFIG_ACS		BIT(1)
 #define XGMAC_CONFIG_RE			BIT(0)
-#define XGMAC_CORE_INIT_RX		0
+#define XGMAC_CORE_INIT_RX		(XGMAC_CONFIG_GPSLCE | XGMAC_CONFIG_WD | \
+					 (XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT))
 #define XGMAC_PACKET_FILTER		0x00000008
 #define XGMAC_FILTER_RA			BIT(31)
+#define XGMAC_FILTER_IPFE		BIT(20)
+#define XGMAC_FILTER_VTFE		BIT(16)
+#define XGMAC_FILTER_HPF		BIT(10)
+#define XGMAC_FILTER_PCF		BIT(7)
 #define XGMAC_FILTER_PM			BIT(4)
 #define XGMAC_FILTER_HMC		BIT(2)
 #define XGMAC_FILTER_PR			BIT(0)
 #define XGMAC_HASH_TABLE(x)		(0x00000010 + (x) * 4)
+#define XGMAC_MAX_HASH_TABLE		8
+#define XGMAC_VLAN_TAG			0x00000050
+#define XGMAC_VLAN_EDVLP		BIT(26)
+#define XGMAC_VLAN_VTHM			BIT(25)
+#define XGMAC_VLAN_DOVLTC		BIT(20)
+#define XGMAC_VLAN_ESVL			BIT(18)
+#define XGMAC_VLAN_ETV			BIT(16)
+#define XGMAC_VLAN_VID			GENMASK(15, 0)
+#define XGMAC_VLAN_HASH_TABLE		0x00000058
+#define XGMAC_VLAN_INCL			0x00000060
+#define XGMAC_VLAN_VLTI			BIT(20)
+#define XGMAC_VLAN_CSVL			BIT(19)
+#define XGMAC_VLAN_VLC			GENMASK(17, 16)
+#define XGMAC_VLAN_VLC_SHIFT		16
 #define XGMAC_RXQ_CTRL0			0x000000a0
 #define XGMAC_RXQEN(x)			GENMASK((x) * 2 + 1, (x) * 2)
 #define XGMAC_RXQEN_SHIFT(x)		((x) * 2)
@@ -51,12 +78,13 @@
 #define XGMAC_PSRQ(x)			GENMASK((x) * 8 + 7, (x) * 8)
 #define XGMAC_PSRQ_SHIFT(x)		((x) * 8)
 #define XGMAC_INT_STATUS		0x000000b0
+#define XGMAC_LPIIS			BIT(5)
 #define XGMAC_PMTIS			BIT(4)
 #define XGMAC_INT_EN			0x000000b4
 #define XGMAC_TSIE			BIT(12)
 #define XGMAC_LPIIE			BIT(5)
 #define XGMAC_PMTIE			BIT(4)
-#define XGMAC_INT_DEFAULT_EN		(XGMAC_LPIIE | XGMAC_PMTIE | XGMAC_TSIE)
+#define XGMAC_INT_DEFAULT_EN		(XGMAC_LPIIE | XGMAC_PMTIE)
 #define XGMAC_Qx_TX_FLOW_CTRL(x)	(0x00000070 + (x) * 4)
 #define XGMAC_PT			GENMASK(31, 16)
 #define XGMAC_PT_SHIFT			16
@@ -68,19 +96,37 @@
 #define XGMAC_RWKPKTEN			BIT(2)
 #define XGMAC_MGKPKTEN			BIT(1)
 #define XGMAC_PWRDWN			BIT(0)
+#define XGMAC_LPI_CTRL			0x000000d0
+#define XGMAC_TXCGE			BIT(21)
+#define XGMAC_LPITXA			BIT(19)
+#define XGMAC_PLS			BIT(17)
+#define XGMAC_LPITXEN			BIT(16)
+#define XGMAC_RLPIEX			BIT(3)
+#define XGMAC_RLPIEN			BIT(2)
+#define XGMAC_TLPIEX			BIT(1)
+#define XGMAC_TLPIEN			BIT(0)
+#define XGMAC_LPI_TIMER_CTRL		0x000000d4
 #define XGMAC_HW_FEATURE0		0x0000011c
 #define XGMAC_HWFEAT_SAVLANINS		BIT(27)
 #define XGMAC_HWFEAT_RXCOESEL		BIT(16)
 #define XGMAC_HWFEAT_TXCOESEL		BIT(14)
+#define XGMAC_HWFEAT_EEESEL		BIT(13)
 #define XGMAC_HWFEAT_TSSEL		BIT(12)
 #define XGMAC_HWFEAT_AVSEL		BIT(11)
 #define XGMAC_HWFEAT_RAVSEL		BIT(10)
 #define XGMAC_HWFEAT_ARPOFFSEL		BIT(9)
+#define XGMAC_HWFEAT_MMCSEL		BIT(8)
 #define XGMAC_HWFEAT_MGKSEL		BIT(7)
 #define XGMAC_HWFEAT_RWKSEL		BIT(6)
+#define XGMAC_HWFEAT_VLHASH		BIT(4)
 #define XGMAC_HWFEAT_GMIISEL		BIT(1)
 #define XGMAC_HW_FEATURE1		0x00000120
+#define XGMAC_HWFEAT_L3L4FNUM		GENMASK(30, 27)
+#define XGMAC_HWFEAT_HASHTBLSZ		GENMASK(25, 24)
+#define XGMAC_HWFEAT_RSSEN		BIT(20)
 #define XGMAC_HWFEAT_TSOEN		BIT(18)
+#define XGMAC_HWFEAT_SPHEN		BIT(17)
+#define XGMAC_HWFEAT_ADDR64		GENMASK(15, 14)
 #define XGMAC_HWFEAT_TXFIFOSIZE		GENMASK(10, 6)
 #define XGMAC_HWFEAT_RXFIFOSIZE		GENMASK(4, 0)
 #define XGMAC_HW_FEATURE2		0x00000124
@@ -89,23 +135,93 @@
 #define XGMAC_HWFEAT_RXCHCNT		GENMASK(15, 12)
 #define XGMAC_HWFEAT_TXQCNT		GENMASK(9, 6)
 #define XGMAC_HWFEAT_RXQCNT		GENMASK(3, 0)
+#define XGMAC_HW_FEATURE3		0x00000128
+#define XGMAC_HWFEAT_ASP		GENMASK(15, 14)
+#define XGMAC_HWFEAT_DVLAN		BIT(13)
+#define XGMAC_HWFEAT_FRPES		GENMASK(12, 11)
+#define XGMAC_HWFEAT_FRPPB		GENMASK(10, 9)
+#define XGMAC_HWFEAT_FRPSEL		BIT(3)
+#define XGMAC_MAC_DPP_FSM_INT_STATUS	0x00000150
+#define XGMAC_MAC_FSM_CONTROL		0x00000158
+#define XGMAC_PRTYEN			BIT(1)
+#define XGMAC_TMOUTEN			BIT(0)
 #define XGMAC_MDIO_ADDR			0x00000200
 #define XGMAC_MDIO_DATA			0x00000204
 #define XGMAC_MDIO_C22P			0x00000220
-#define XGMAC_ADDR0_HIGH		0x00000300
+#define XGMAC_ADDRx_HIGH(x)		(0x00000300 + (x) * 0x8)
+#define XGMAC_ADDR_MAX			32
 #define XGMAC_AE			BIT(31)
 #define XGMAC_DCS			GENMASK(19, 16)
 #define XGMAC_DCS_SHIFT			16
-#define XGMAC_ADDR0_LOW			0x00000304
+#define XGMAC_ADDRx_LOW(x)		(0x00000304 + (x) * 0x8)
+#define XGMAC_L3L4_ADDR_CTRL		0x00000c00
+#define XGMAC_IDDR			GENMASK(15, 8)
+#define XGMAC_IDDR_SHIFT		8
+#define XGMAC_IDDR_FNUM			4
+#define XGMAC_TT			BIT(1)
+#define XGMAC_XB			BIT(0)
+#define XGMAC_L3L4_DATA			0x00000c04
+#define XGMAC_L3L4_CTRL			0x0
+#define XGMAC_L4DPIM0			BIT(21)
+#define XGMAC_L4DPM0			BIT(20)
+#define XGMAC_L4SPIM0			BIT(19)
+#define XGMAC_L4SPM0			BIT(18)
+#define XGMAC_L4PEN0			BIT(16)
+#define XGMAC_L3HDBM0			GENMASK(15, 11)
+#define XGMAC_L3HSBM0			GENMASK(10, 6)
+#define XGMAC_L3DAIM0			BIT(5)
+#define XGMAC_L3DAM0			BIT(4)
+#define XGMAC_L3SAIM0			BIT(3)
+#define XGMAC_L3SAM0			BIT(2)
+#define XGMAC_L3PEN0			BIT(0)
+#define XGMAC_L4_ADDR			0x1
+#define XGMAC_L4DP0			GENMASK(31, 16)
+#define XGMAC_L4DP0_SHIFT		16
+#define XGMAC_L4SP0			GENMASK(15, 0)
+#define XGMAC_L3_ADDR0			0x4
+#define XGMAC_L3_ADDR1			0x5
+#define XGMAC_L3_ADDR2			0x6
+#define XMGAC_L3_ADDR3			0x7
 #define XGMAC_ARP_ADDR			0x00000c10
+#define XGMAC_RSS_CTRL			0x00000c80
+#define XGMAC_UDP4TE			BIT(3)
+#define XGMAC_TCP4TE			BIT(2)
+#define XGMAC_IP2TE			BIT(1)
+#define XGMAC_RSSE			BIT(0)
+#define XGMAC_RSS_ADDR			0x00000c88
+#define XGMAC_RSSIA_SHIFT		8
+#define XGMAC_ADDRT			BIT(2)
+#define XGMAC_CT			BIT(1)
+#define XGMAC_OB			BIT(0)
+#define XGMAC_RSS_DATA			0x00000c8c
 #define XGMAC_TIMESTAMP_STATUS		0x00000d20
 #define XGMAC_TXTSC			BIT(15)
 #define XGMAC_TXTIMESTAMP_NSEC		0x00000d30
 #define XGMAC_TXTSSTSLO			GENMASK(30, 0)
 #define XGMAC_TXTIMESTAMP_SEC		0x00000d34
+#define XGMAC_PPS_CONTROL		0x00000d70
+#define XGMAC_PPS_MAXIDX(x)		((((x) + 1) * 8) - 1)
+#define XGMAC_PPS_MINIDX(x)		((x) * 8)
+#define XGMAC_PPSx_MASK(x)		\
+	GENMASK(XGMAC_PPS_MAXIDX(x), XGMAC_PPS_MINIDX(x))
+#define XGMAC_TRGTMODSELx(x, val)	\
+	GENMASK(XGMAC_PPS_MAXIDX(x) - 1, XGMAC_PPS_MAXIDX(x) - 2) & \
+	((val) << (XGMAC_PPS_MAXIDX(x) - 2))
+#define XGMAC_PPSCMDx(x, val)		\
+	GENMASK(XGMAC_PPS_MINIDX(x) + 3, XGMAC_PPS_MINIDX(x)) & \
+	((val) << XGMAC_PPS_MINIDX(x))
+#define XGMAC_PPSCMD_START		0x2
+#define XGMAC_PPSCMD_STOP		0x5
+#define XGMAC_PPSEN0			BIT(4)
+#define XGMAC_PPSx_TARGET_TIME_SEC(x)	(0x00000d80 + (x) * 0x10)
+#define XGMAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000d84 + (x) * 0x10)
+#define XGMAC_TRGTBUSY0			BIT(31)
+#define XGMAC_PPSx_INTERVAL(x)		(0x00000d88 + (x) * 0x10)
+#define XGMAC_PPSx_WIDTH(x)		(0x00000d8c + (x) * 0x10)
 
 /* MTL Registers */
 #define XGMAC_MTL_OPMODE		0x00001000
+#define XGMAC_FRPE			BIT(15)
 #define XGMAC_ETSALG			GENMASK(6, 5)
 #define XGMAC_WRR			(0x0 << 5)
 #define XGMAC_WFQ			(0x1 << 5)
@@ -114,16 +230,52 @@
 #define XGMAC_MTL_INT_STATUS		0x00001020
 #define XGMAC_MTL_RXQ_DMA_MAP0		0x00001030
 #define XGMAC_MTL_RXQ_DMA_MAP1		0x00001034
-#define XGMAC_QxMDMACH(x)		GENMASK((x) * 8 + 3, (x) * 8)
+#define XGMAC_QxMDMACH(x)		GENMASK((x) * 8 + 7, (x) * 8)
 #define XGMAC_QxMDMACH_SHIFT(x)		((x) * 8)
+#define XGMAC_QDDMACH			BIT(7)
+#define XGMAC_TC_PRTY_MAP0		0x00001040
+#define XGMAC_TC_PRTY_MAP1		0x00001044
+#define XGMAC_PSTC(x)			GENMASK((x) * 8 + 7, (x) * 8)
+#define XGMAC_PSTC_SHIFT(x)		((x) * 8)
+#define XGMAC_MTL_RXP_CONTROL_STATUS	0x000010a0
+#define XGMAC_RXPI			BIT(31)
+#define XGMAC_NPE			GENMASK(23, 16)
+#define XGMAC_NVE			GENMASK(7, 0)
+#define XGMAC_MTL_RXP_IACC_CTRL_ST	0x000010b0
+#define XGMAC_STARTBUSY			BIT(31)
+#define XGMAC_WRRDN			BIT(16)
+#define XGMAC_ADDR			GENMASK(9, 0)
+#define XGMAC_MTL_RXP_IACC_DATA		0x000010b4
+#define XGMAC_MTL_ECC_CONTROL		0x000010c0
+#define XGMAC_MTL_SAFETY_INT_STATUS	0x000010c4
+#define XGMAC_MEUIS			BIT(1)
+#define XGMAC_MECIS			BIT(0)
+#define XGMAC_MTL_ECC_INT_ENABLE	0x000010c8
+#define XGMAC_RPCEIE			BIT(12)
+#define XGMAC_ECEIE			BIT(8)
+#define XGMAC_RXCEIE			BIT(4)
+#define XGMAC_TXCEIE			BIT(0)
+#define XGMAC_MTL_ECC_INT_STATUS	0x000010cc
 #define XGMAC_MTL_TXQ_OPMODE(x)		(0x00001100 + (0x80 * (x)))
 #define XGMAC_TQS			GENMASK(25, 16)
 #define XGMAC_TQS_SHIFT			16
+#define XGMAC_Q2TCMAP			GENMASK(10, 8)
+#define XGMAC_Q2TCMAP_SHIFT		8
 #define XGMAC_TTC			GENMASK(6, 4)
 #define XGMAC_TTC_SHIFT			4
 #define XGMAC_TXQEN			GENMASK(3, 2)
 #define XGMAC_TXQEN_SHIFT		2
 #define XGMAC_TSF			BIT(1)
+#define XGMAC_MTL_TCx_ETS_CONTROL(x)	(0x00001110 + (0x80 * (x)))
+#define XGMAC_MTL_TCx_QUANTUM_WEIGHT(x)	(0x00001118 + (0x80 * (x)))
+#define XGMAC_MTL_TCx_SENDSLOPE(x)	(0x0000111c + (0x80 * (x)))
+#define XGMAC_MTL_TCx_HICREDIT(x)	(0x00001120 + (0x80 * (x)))
+#define XGMAC_MTL_TCx_LOCREDIT(x)	(0x00001124 + (0x80 * (x)))
+#define XGMAC_CC			BIT(3)
+#define XGMAC_TSA			GENMASK(1, 0)
+#define XGMAC_SP			(0x0 << 0)
+#define XGMAC_CBS			(0x1 << 0)
+#define XGMAC_ETS			(0x2 << 0)
 #define XGMAC_MTL_RXQ_OPMODE(x)		(0x00001140 + (0x80 * (x)))
 #define XGMAC_RQS			GENMASK(25, 16)
 #define XGMAC_RQS_SHIFT			16
@@ -131,12 +283,18 @@
 #define XGMAC_RSF			BIT(5)
 #define XGMAC_RTC			GENMASK(1, 0)
 #define XGMAC_RTC_SHIFT			0
+#define XGMAC_MTL_RXQ_FLOW_CONTROL(x)	(0x00001150 + (0x80 * (x)))
+#define XGMAC_RFD			GENMASK(31, 17)
+#define XGMAC_RFD_SHIFT			17
+#define XGMAC_RFA			GENMASK(15, 1)
+#define XGMAC_RFA_SHIFT			1
 #define XGMAC_MTL_QINTEN(x)		(0x00001170 + (0x80 * (x)))
 #define XGMAC_RXOIE			BIT(16)
 #define XGMAC_MTL_QINT_STATUS(x)	(0x00001174 + (0x80 * (x)))
 #define XGMAC_RXOVFIS			BIT(16)
 #define XGMAC_ABPSIS			BIT(1)
 #define XGMAC_TXUNFIS			BIT(0)
+#define XGMAC_MAC_REGSIZE		(XGMAC_MTL_QINT_STATUS(15) / 4)
 
 /* DMA Registers */
 #define XGMAC_DMA_MODE			0x00003000
@@ -149,6 +307,7 @@
 #define XGMAC_EN_LPI			BIT(15)
 #define XGMAC_LPI_XIT_PKT		BIT(14)
 #define XGMAC_AAL			BIT(12)
+#define XGMAC_EAME			BIT(11)
 #define XGMAC_BLEN			GENMASK(7, 1)
 #define XGMAC_BLEN256			BIT(7)
 #define XGMAC_BLEN128			BIT(6)
@@ -158,7 +317,22 @@
 #define XGMAC_BLEN8			BIT(2)
 #define XGMAC_BLEN4			BIT(1)
 #define XGMAC_UNDEF			BIT(0)
+#define XGMAC_TX_EDMA_CTRL		0x00003040
+#define XGMAC_TDPS			GENMASK(29, 0)
+#define XGMAC_RX_EDMA_CTRL		0x00003044
+#define XGMAC_RDPS			GENMASK(29, 0)
+#define XGMAC_DMA_SAFETY_INT_STATUS	0x00003064
+#define XGMAC_MCSIS			BIT(31)
+#define XGMAC_MSUIS			BIT(29)
+#define XGMAC_MSCIS			BIT(28)
+#define XGMAC_DEUIS			BIT(1)
+#define XGMAC_DECIS			BIT(0)
+#define XGMAC_DMA_ECC_INT_ENABLE	0x00003068
+#define XGMAC_DCEIE			BIT(1)
+#define XGMAC_TCEIE			BIT(0)
+#define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
 #define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
+#define XGMAC_SPH			BIT(24)
 #define XGMAC_PBLx8			BIT(16)
 #define XGMAC_DMA_CH_TX_CONTROL(x)	(0x00003104 + (0x80 * (x)))
 #define XGMAC_TxPBL			GENMASK(21, 16)
@@ -170,7 +344,9 @@
 #define XGMAC_RxPBL			GENMASK(21, 16)
 #define XGMAC_RxPBL_SHIFT		16
 #define XGMAC_RXST			BIT(0)
+#define XGMAC_DMA_CH_TxDESC_HADDR(x)	(0x00003110 + (0x80 * (x)))
 #define XGMAC_DMA_CH_TxDESC_LADDR(x)	(0x00003114 + (0x80 * (x)))
+#define XGMAC_DMA_CH_RxDESC_HADDR(x)	(0x00003118 + (0x80 * (x)))
 #define XGMAC_DMA_CH_RxDESC_LADDR(x)	(0x0000311c + (0x80 * (x)))
 #define XGMAC_DMA_CH_TxDESC_TAIL_LPTR(x)	(0x00003124 + (0x80 * (x)))
 #define XGMAC_DMA_CH_RxDESC_TAIL_LPTR(x)	(0x0000312c + (0x80 * (x)))
@@ -181,9 +357,10 @@
 #define XGMAC_AIE			BIT(14)
 #define XGMAC_RBUE			BIT(7)
 #define XGMAC_RIE			BIT(6)
+#define XGMAC_TBUE			BIT(2)
 #define XGMAC_TIE			BIT(0)
 #define XGMAC_DMA_INT_DEFAULT_EN	(XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \
-					XGMAC_RIE | XGMAC_TIE)
+					XGMAC_RIE | XGMAC_TBUE | XGMAC_TIE)
 #define XGMAC_DMA_CH_Rx_WATCHDOG(x)	(0x0000313c + (0x80 * (x)))
 #define XGMAC_RWT			GENMASK(7, 0)
 #define XGMAC_DMA_CH_STATUS(x)		(0x00003160 + (0x80 * (x)))
@@ -192,14 +369,20 @@
 #define XGMAC_FBE			BIT(12)
 #define XGMAC_RBU			BIT(7)
 #define XGMAC_RI			BIT(6)
+#define XGMAC_TBU			BIT(2)
 #define XGMAC_TPS			BIT(1)
 #define XGMAC_TI			BIT(0)
+#define XGMAC_REGSIZE			((0x0000317c + (0x80 * 15)) / 4)
 
 /* Descriptors */
+#define XGMAC_TDES2_IVT			GENMASK(31, 16)
+#define XGMAC_TDES2_IVT_SHIFT		16
 #define XGMAC_TDES2_IOC			BIT(31)
 #define XGMAC_TDES2_TTSE		BIT(30)
 #define XGMAC_TDES2_B2L			GENMASK(29, 16)
 #define XGMAC_TDES2_B2L_SHIFT		16
+#define XGMAC_TDES2_VTIR		GENMASK(15, 14)
+#define XGMAC_TDES2_VTIR_SHIFT		14
 #define XGMAC_TDES2_B1L			GENMASK(13, 0)
 #define XGMAC_TDES3_OWN			BIT(31)
 #define XGMAC_TDES3_CTXT		BIT(30)
@@ -208,18 +391,33 @@
 #define XGMAC_TDES3_CPC			GENMASK(27, 26)
 #define XGMAC_TDES3_CPC_SHIFT		26
 #define XGMAC_TDES3_TCMSSV		BIT(26)
+#define XGMAC_TDES3_SAIC		GENMASK(25, 23)
+#define XGMAC_TDES3_SAIC_SHIFT		23
 #define XGMAC_TDES3_THL			GENMASK(22, 19)
 #define XGMAC_TDES3_THL_SHIFT		19
+#define XGMAC_TDES3_IVTIR		GENMASK(19, 18)
+#define XGMAC_TDES3_IVTIR_SHIFT		18
 #define XGMAC_TDES3_TSE			BIT(18)
+#define XGMAC_TDES3_IVLTV		BIT(17)
 #define XGMAC_TDES3_CIC			GENMASK(17, 16)
 #define XGMAC_TDES3_CIC_SHIFT		16
 #define XGMAC_TDES3_TPL			GENMASK(17, 0)
+#define XGMAC_TDES3_VLTV		BIT(16)
+#define XGMAC_TDES3_VT			GENMASK(15, 0)
 #define XGMAC_TDES3_FL			GENMASK(14, 0)
+#define XGMAC_RDES2_HL			GENMASK(9, 0)
 #define XGMAC_RDES3_OWN			BIT(31)
 #define XGMAC_RDES3_CTXT		BIT(30)
 #define XGMAC_RDES3_IOC			BIT(30)
 #define XGMAC_RDES3_LD			BIT(28)
 #define XGMAC_RDES3_CDA			BIT(27)
+#define XGMAC_RDES3_RSV			BIT(26)
+#define XGMAC_RDES3_L34T		GENMASK(23, 20)
+#define XGMAC_RDES3_L34T_SHIFT		20
+#define XGMAC_L34T_IP4TCP		0x1
+#define XGMAC_L34T_IP4UDP		0x2
+#define XGMAC_L34T_IP6TCP		0x9
+#define XGMAC_L34T_IP6UDP		0xA
 #define XGMAC_RDES3_ES			BIT(15)
 #define XGMAC_RDES3_PL			GENMASK(13, 0)
 #define XGMAC_RDES3_TSD			BIT(6)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index d182f82..070bd7d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c

@@ -4,14 +4,17 @@
  * stmmac XGMAC support.
  */
 
+#include <linux/bitrev.h>
+#include <linux/crc32.h>
+#include <linux/iopoll.h>
 #include "stmmac.h"
+#include "stmmac_ptp.h"
 #include "dwxgmac2.h"
 
 static void dwxgmac2_core_init(struct mac_device_info *hw,
 			       struct net_device *dev)
 {
 	void __iomem *ioaddr = hw->pcsr;
-	int mtu = dev->mtu;
 	u32 tx, rx;
 
 	tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -20,23 +23,13 @@
 	tx |= XGMAC_CORE_INIT_TX;
 	rx |= XGMAC_CORE_INIT_RX;
 
-	if (mtu >= 9000) {
-		rx |= XGMAC_CONFIG_GPSLCE;
-		rx |= XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT;
-		rx |= XGMAC_CONFIG_WD;
-	} else if (mtu > 2000) {
-		rx |= XGMAC_CONFIG_JE;
-	} else if (mtu > 1500) {
-		rx |= XGMAC_CONFIG_S2KP;
-	}
-
 	if (hw->ps) {
 		tx |= XGMAC_CONFIG_TE;
 		tx &= ~hw->link.speed_mask;
 
 		switch (hw->ps) {
 		case SPEED_10000:
-			tx |= hw->link.speed10000;
+			tx |= hw->link.xgmii.speed10000;
 			break;
 		case SPEED_2500:
 			tx |= hw->link.speed2500;
@@ -106,6 +99,8 @@
 	u32 value, reg;
 
 	reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
+	if (queue >= 4)
+		queue -= 4;
 
 	value = readl(ioaddr + reg);
 	value &= ~XGMAC_PSRQ(queue);
@@ -114,6 +109,23 @@
 	writel(value, ioaddr + reg);
 }
 
+static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
+				   u32 queue)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value, reg;
+
+	reg = (queue < 4) ? XGMAC_TC_PRTY_MAP0 : XGMAC_TC_PRTY_MAP1;
+	if (queue >= 4)
+		queue -= 4;
+
+	value = readl(ioaddr + reg);
+	value &= ~XGMAC_PSTC(queue);
+	value |= (prio << XGMAC_PSTC_SHIFT(queue)) & XGMAC_PSTC(queue);
+
+	writel(value, ioaddr + reg);
+}
+
 static void dwxgmac2_prog_mtl_rx_algorithms(struct mac_device_info *hw,
 					    u32 rx_alg)
 {
@@ -140,7 +152,9 @@
 					    u32 tx_alg)
 {
 	void __iomem *ioaddr = hw->pcsr;
+	bool ets = true;
 	u32 value;
+	int i;
 
 	value = readl(ioaddr + XGMAC_MTL_OPMODE);
 	value &= ~XGMAC_ETSALG;
@@ -156,10 +170,28 @@
 		value |= XGMAC_DWRR;
 		break;
 	default:
+		ets = false;
 		break;
 	}
 
 	writel(value, ioaddr + XGMAC_MTL_OPMODE);
+
+	/* Set ETS if desired */
+	for (i = 0; i < MTL_MAX_TX_QUEUES; i++) {
+		value = readl(ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(i));
+		value &= ~XGMAC_TSA;
+		if (ets)
+			value |= XGMAC_ETS;
+		writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(i));
+	}
+}
+
+static void dwxgmac2_set_mtl_tx_queue_weight(struct mac_device_info *hw,
+					     u32 weight, u32 queue)
+{
+	void __iomem *ioaddr = hw->pcsr;
+
+	writel(weight, ioaddr + XGMAC_MTL_TCx_QUANTUM_WEIGHT(queue));
 }
 
 static void dwxgmac2_map_mtl_to_dma(struct mac_device_info *hw, u32 queue,
@@ -169,6 +201,8 @@
 	u32 value, reg;
 
 	reg = (queue < 4) ? XGMAC_MTL_RXQ_DMA_MAP0 : XGMAC_MTL_RXQ_DMA_MAP1;
+	if (queue >= 4)
+		queue -= 4;
 
 	value = readl(ioaddr + reg);
 	value &= ~XGMAC_QxMDMACH(queue);
@@ -177,11 +211,39 @@
 	writel(value, ioaddr + reg);
 }
 
+static void dwxgmac2_config_cbs(struct mac_device_info *hw,
+				u32 send_slope, u32 idle_slope,
+				u32 high_credit, u32 low_credit, u32 queue)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	writel(send_slope, ioaddr + XGMAC_MTL_TCx_SENDSLOPE(queue));
+	writel(idle_slope, ioaddr + XGMAC_MTL_TCx_QUANTUM_WEIGHT(queue));
+	writel(high_credit, ioaddr + XGMAC_MTL_TCx_HICREDIT(queue));
+	writel(low_credit, ioaddr + XGMAC_MTL_TCx_LOCREDIT(queue));
+
+	value = readl(ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue));
+	value &= ~XGMAC_TSA;
+	value |= XGMAC_CC | XGMAC_CBS;
+	writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue));
+}
+
+static void dwxgmac2_dump_regs(struct mac_device_info *hw, u32 *reg_space)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	int i;
+
+	for (i = 0; i < XGMAC_MAC_REGSIZE; i++)
+		reg_space[i] = readl(ioaddr + i * 4);
+}
+
 static int dwxgmac2_host_irq_status(struct mac_device_info *hw,
 				    struct stmmac_extra_stats *x)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 stat, en;
+	int ret = 0;
 
 	en = readl(ioaddr + XGMAC_INT_EN);
 	stat = readl(ioaddr + XGMAC_INT_STATUS);
@@ -193,7 +255,24 @@
 		readl(ioaddr + XGMAC_PMT);
 	}
 
-	return 0;
+	if (stat & XGMAC_LPIIS) {
+		u32 lpi = readl(ioaddr + XGMAC_LPI_CTRL);
+
+		if (lpi & XGMAC_TLPIEN) {
+			ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE;
+			x->irq_tx_path_in_lpi_mode_n++;
+		}
+		if (lpi & XGMAC_TLPIEX) {
+			ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE;
+			x->irq_tx_path_exit_lpi_mode_n++;
+		}
+		if (lpi & XGMAC_RLPIEN)
+			x->irq_rx_path_in_lpi_mode_n++;
+		if (lpi & XGMAC_RLPIEX)
+			x->irq_rx_path_exit_lpi_mode_n++;
+	}
+
+	return ret;
 }
 
 static int dwxgmac2_host_mtl_irq_status(struct mac_device_info *hw, u32 chan)
@@ -261,10 +340,10 @@
 	u32 value;
 
 	value = (addr[5] << 8) | addr[4];
-	writel(value | XGMAC_AE, ioaddr + XGMAC_ADDR0_HIGH);
+	writel(value | XGMAC_AE, ioaddr + XGMAC_ADDRx_HIGH(reg_n));
 
 	value = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0];
-	writel(value, ioaddr + XGMAC_ADDR0_LOW);
+	writel(value, ioaddr + XGMAC_ADDRx_LOW(reg_n));
 }
 
 static void dwxgmac2_get_umac_addr(struct mac_device_info *hw,
@@ -274,8 +353,8 @@
 	u32 hi_addr, lo_addr;
 
 	/* Read the MAC address from the hardware */
-	hi_addr = readl(ioaddr + XGMAC_ADDR0_HIGH);
-	lo_addr = readl(ioaddr + XGMAC_ADDR0_LOW);
+	hi_addr = readl(ioaddr + XGMAC_ADDRx_HIGH(reg_n));
+	lo_addr = readl(ioaddr + XGMAC_ADDRx_LOW(reg_n));
 
 	/* Extract the MAC address from the high and low words */
 	addr[0] = lo_addr & 0xff;
@@ -286,53 +365,1028 @@
 	addr[5] = (hi_addr >> 8) & 0xff;
 }
 
+static void dwxgmac2_set_eee_mode(struct mac_device_info *hw,
+				  bool en_tx_lpi_clockgating)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_LPI_CTRL);
+
+	value |= XGMAC_LPITXEN | XGMAC_LPITXA;
+	if (en_tx_lpi_clockgating)
+		value |= XGMAC_TXCGE;
+
+	writel(value, ioaddr + XGMAC_LPI_CTRL);
+}
+
+static void dwxgmac2_reset_eee_mode(struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_LPI_CTRL);
+	value &= ~(XGMAC_LPITXEN | XGMAC_LPITXA | XGMAC_TXCGE);
+	writel(value, ioaddr + XGMAC_LPI_CTRL);
+}
+
+static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_LPI_CTRL);
+	if (link)
+		value |= XGMAC_PLS;
+	else
+		value &= ~XGMAC_PLS;
+	writel(value, ioaddr + XGMAC_LPI_CTRL);
+}
+
+static void dwxgmac2_set_eee_timer(struct mac_device_info *hw, int ls, int tw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = (tw & 0xffff) | ((ls & 0x3ff) << 16);
+	writel(value, ioaddr + XGMAC_LPI_TIMER_CTRL);
+}
+
+static void dwxgmac2_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
+				int mcbitslog2)
+{
+	int numhashregs, regs;
+
+	switch (mcbitslog2) {
+	case 6:
+		numhashregs = 2;
+		break;
+	case 7:
+		numhashregs = 4;
+		break;
+	case 8:
+		numhashregs = 8;
+		break;
+	default:
+		return;
+	}
+
+	for (regs = 0; regs < numhashregs; regs++)
+		writel(mcfilterbits[regs], ioaddr + XGMAC_HASH_TABLE(regs));
+}
+
 static void dwxgmac2_set_filter(struct mac_device_info *hw,
 				struct net_device *dev)
 {
 	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
-	u32 value = XGMAC_FILTER_RA;
+	u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
+	int mcbitslog2 = hw->mcast_bits_log2;
+	u32 mc_filter[8];
+	int i;
+
+	value &= ~(XGMAC_FILTER_PR | XGMAC_FILTER_HMC | XGMAC_FILTER_PM);
+	value |= XGMAC_FILTER_HPF;
+
+	memset(mc_filter, 0, sizeof(mc_filter));
 
 	if (dev->flags & IFF_PROMISC) {
 		value |= XGMAC_FILTER_PR;
+		value |= XGMAC_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
-		   (netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
+		   (netdev_mc_count(dev) > hw->multicast_filter_bins)) {
 		value |= XGMAC_FILTER_PM;
-		writel(~0x0, ioaddr + XGMAC_HASH_TABLE(0));
-		writel(~0x0, ioaddr + XGMAC_HASH_TABLE(1));
+
+		for (i = 0; i < XGMAC_MAX_HASH_TABLE; i++)
+			writel(~0x0, ioaddr + XGMAC_HASH_TABLE(i));
+	} else if (!netdev_mc_empty(dev)) {
+		struct netdev_hw_addr *ha;
+
+		value |= XGMAC_FILTER_HMC;
+
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >>
+					(32 - mcbitslog2));
+			mc_filter[nr >> 5] |= (1 << (nr & 0x1F));
+		}
+	}
+
+	dwxgmac2_set_mchash(ioaddr, mc_filter, mcbitslog2);
+
+	/* Handle multiple unicast addresses */
+	if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
+		value |= XGMAC_FILTER_PR;
+	} else {
+		struct netdev_hw_addr *ha;
+		int reg = 1;
+
+		netdev_for_each_uc_addr(ha, dev) {
+			dwxgmac2_set_umac_addr(hw, ha->addr, reg);
+			reg++;
+		}
+
+		for ( ; reg < XGMAC_ADDR_MAX; reg++) {
+			writel(0, ioaddr + XGMAC_ADDRx_HIGH(reg));
+			writel(0, ioaddr + XGMAC_ADDRx_LOW(reg));
+		}
 	}
 
 	writel(value, ioaddr + XGMAC_PACKET_FILTER);
 }
 
+static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+	if (enable)
+		value |= XGMAC_CONFIG_LM;
+	else
+		value &= ~XGMAC_CONFIG_LM;
+
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
+static int dwxgmac2_rss_write_reg(void __iomem *ioaddr, bool is_key, int idx,
+				  u32 val)
+{
+	u32 ctrl = 0;
+
+	writel(val, ioaddr + XGMAC_RSS_DATA);
+	ctrl |= idx << XGMAC_RSSIA_SHIFT;
+	ctrl |= is_key ? XGMAC_ADDRT : 0x0;
+	ctrl |= XGMAC_OB;
+	writel(ctrl, ioaddr + XGMAC_RSS_ADDR);
+
+	return readl_poll_timeout(ioaddr + XGMAC_RSS_ADDR, ctrl,
+				  !(ctrl & XGMAC_OB), 100, 10000);
+}
+
+static int dwxgmac2_rss_configure(struct mac_device_info *hw,
+				  struct stmmac_rss *cfg, u32 num_rxq)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value, *key;
+	int i, ret;
+
+	value = readl(ioaddr + XGMAC_RSS_CTRL);
+	if (!cfg || !cfg->enable) {
+		value &= ~XGMAC_RSSE;
+		writel(value, ioaddr + XGMAC_RSS_CTRL);
+		return 0;
+	}
+
+	key = (u32 *)cfg->key;
+	for (i = 0; i < (ARRAY_SIZE(cfg->key) / sizeof(u32)); i++) {
+		ret = dwxgmac2_rss_write_reg(ioaddr, true, i, key[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cfg->table); i++) {
+		ret = dwxgmac2_rss_write_reg(ioaddr, false, i, cfg->table[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < num_rxq; i++)
+		dwxgmac2_map_mtl_to_dma(hw, i, XGMAC_QDDMACH);
+
+	value |= XGMAC_UDP4TE | XGMAC_TCP4TE | XGMAC_IP2TE | XGMAC_RSSE;
+	writel(value, ioaddr + XGMAC_RSS_CTRL);
+	return 0;
+}
+
+static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash,
+				      bool is_double)
+{
+	void __iomem *ioaddr = hw->pcsr;
+
+	writel(hash, ioaddr + XGMAC_VLAN_HASH_TABLE);
+
+	if (hash) {
+		u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
+
+		value |= XGMAC_FILTER_VTFE;
+
+		writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+		value = XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV;
+		if (is_double) {
+			value |= XGMAC_VLAN_EDVLP;
+			value |= XGMAC_VLAN_ESVL;
+			value |= XGMAC_VLAN_DOVLTC;
+		}
+
+		writel(value, ioaddr + XGMAC_VLAN_TAG);
+	} else {
+		u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
+
+		value &= ~XGMAC_FILTER_VTFE;
+
+		writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+		value = readl(ioaddr + XGMAC_VLAN_TAG);
+
+		value &= ~(XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV);
+		value &= ~(XGMAC_VLAN_EDVLP | XGMAC_VLAN_ESVL);
+		value &= ~XGMAC_VLAN_DOVLTC;
+		value &= ~XGMAC_VLAN_VID;
+
+		writel(value, ioaddr + XGMAC_VLAN_TAG);
+	}
+}
+
+struct dwxgmac3_error_desc {
+	bool valid;
+	const char *desc;
+	const char *detailed_desc;
+};
+
+#define STAT_OFF(field)		offsetof(struct stmmac_safety_stats, field)
+
+static void dwxgmac3_log_error(struct net_device *ndev, u32 value, bool corr,
+			       const char *module_name,
+			       const struct dwxgmac3_error_desc *desc,
+			       unsigned long field_offset,
+			       struct stmmac_safety_stats *stats)
+{
+	unsigned long loc, mask;
+	u8 *bptr = (u8 *)stats;
+	unsigned long *ptr;
+
+	ptr = (unsigned long *)(bptr + field_offset);
+
+	mask = value;
+	for_each_set_bit(loc, &mask, 32) {
+		netdev_err(ndev, "Found %s error in %s: '%s: %s'\n", corr ?
+				"correctable" : "uncorrectable", module_name,
+				desc[loc].desc, desc[loc].detailed_desc);
+
+		/* Update counters */
+		ptr[loc]++;
+	}
+}
+
+static const struct dwxgmac3_error_desc dwxgmac3_mac_errors[32]= {
+	{ true, "ATPES", "Application Transmit Interface Parity Check Error" },
+	{ true, "DPES", "Descriptor Cache Data Path Parity Check Error" },
+	{ true, "TPES", "TSO Data Path Parity Check Error" },
+	{ true, "TSOPES", "TSO Header Data Path Parity Check Error" },
+	{ true, "MTPES", "MTL Data Path Parity Check Error" },
+	{ true, "MTSPES", "MTL TX Status Data Path Parity Check Error" },
+	{ true, "MTBUPES", "MAC TBU Data Path Parity Check Error" },
+	{ true, "MTFCPES", "MAC TFC Data Path Parity Check Error" },
+	{ true, "ARPES", "Application Receive Interface Data Path Parity Check Error" },
+	{ true, "MRWCPES", "MTL RWC Data Path Parity Check Error" },
+	{ true, "MRRCPES", "MTL RCC Data Path Parity Check Error" },
+	{ true, "CWPES", "CSR Write Data Path Parity Check Error" },
+	{ true, "ASRPES", "AXI Slave Read Data Path Parity Check Error" },
+	{ true, "TTES", "TX FSM Timeout Error" },
+	{ true, "RTES", "RX FSM Timeout Error" },
+	{ true, "CTES", "CSR FSM Timeout Error" },
+	{ true, "ATES", "APP FSM Timeout Error" },
+	{ true, "PTES", "PTP FSM Timeout Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ true, "MSTTES", "Master Read/Write Timeout Error" },
+	{ true, "SLVTES", "Slave Read/Write Timeout Error" },
+	{ true, "ATITES", "Application Timeout on ATI Interface Error" },
+	{ true, "ARITES", "Application Timeout on ARI Interface Error" },
+	{ true, "FSMPES", "FSM State Parity Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ true, "CPI", "Control Register Parity Check Error" },
+};
+
+static void dwxgmac3_handle_mac_err(struct net_device *ndev,
+				    void __iomem *ioaddr, bool correctable,
+				    struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_MAC_DPP_FSM_INT_STATUS);
+	writel(value, ioaddr + XGMAC_MAC_DPP_FSM_INT_STATUS);
+
+	dwxgmac3_log_error(ndev, value, correctable, "MAC",
+			   dwxgmac3_mac_errors, STAT_OFF(mac_errors), stats);
+}
+
+static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {
+	{ true, "TXCES", "MTL TX Memory Error" },
+	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },
+	{ true, "TXUES", "MTL TX Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
+	{ true, "RXCES", "MTL RX Memory Error" },
+	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },
+	{ true, "RXUES", "MTL RX Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
+	{ true, "ECES", "MTL EST Memory Error" },
+	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },
+	{ true, "EUES", "MTL EST Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
+	{ true, "RPCES", "MTL RX Parser Memory Error" },
+	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },
+	{ true, "RPUES", "MTL RX Parser Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
+};
+
+static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
+				    void __iomem *ioaddr, bool correctable,
+				    struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_MTL_ECC_INT_STATUS);
+	writel(value, ioaddr + XGMAC_MTL_ECC_INT_STATUS);
+
+	dwxgmac3_log_error(ndev, value, correctable, "MTL",
+			   dwxgmac3_mtl_errors, STAT_OFF(mtl_errors), stats);
+}
+
+static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
+	{ true, "TCES", "DMA TSO Memory Error" },
+	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },
+	{ true, "TUES", "DMA TSO Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
+	{ true, "DCES", "DMA DCACHE Memory Error" },
+	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
+	{ true, "DUES", "DMA DCACHE Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 10 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 12 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 13 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 14 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
+};
+
+static void dwxgmac3_handle_dma_err(struct net_device *ndev,
+				    void __iomem *ioaddr, bool correctable,
+				    struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_DMA_ECC_INT_STATUS);
+	writel(value, ioaddr + XGMAC_DMA_ECC_INT_STATUS);
+
+	dwxgmac3_log_error(ndev, value, correctable, "DMA",
+			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
+}
+
+static int dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
+{
+	u32 value;
+
+	if (!asp)
+		return -EINVAL;
+
+	/* 1. Enable Safety Features */
+	writel(0x0, ioaddr + XGMAC_MTL_ECC_CONTROL);
+
+	/* 2. Enable MTL Safety Interrupts */
+	value = readl(ioaddr + XGMAC_MTL_ECC_INT_ENABLE);
+	value |= XGMAC_RPCEIE; /* RX Parser Memory Correctable Error */
+	value |= XGMAC_ECEIE; /* EST Memory Correctable Error */
+	value |= XGMAC_RXCEIE; /* RX Memory Correctable Error */
+	value |= XGMAC_TXCEIE; /* TX Memory Correctable Error */
+	writel(value, ioaddr + XGMAC_MTL_ECC_INT_ENABLE);
+
+	/* 3. Enable DMA Safety Interrupts */
+	value = readl(ioaddr + XGMAC_DMA_ECC_INT_ENABLE);
+	value |= XGMAC_DCEIE; /* Descriptor Cache Memory Correctable Error */
+	value |= XGMAC_TCEIE; /* TSO Memory Correctable Error */
+	writel(value, ioaddr + XGMAC_DMA_ECC_INT_ENABLE);
+
+	/* Only ECC Protection for External Memory feature is selected */
+	if (asp <= 0x1)
+		return 0;
+
+	/* 4. Enable Parity and Timeout for FSM */
+	value = readl(ioaddr + XGMAC_MAC_FSM_CONTROL);
+	value |= XGMAC_PRTYEN; /* FSM Parity Feature */
+	value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
+	writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
+
+	return 0;
+}
+
+static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
+					   void __iomem *ioaddr,
+					   unsigned int asp,
+					   struct stmmac_safety_stats *stats)
+{
+	bool err, corr;
+	u32 mtl, dma;
+	int ret = 0;
+
+	if (!asp)
+		return -EINVAL;
+
+	mtl = readl(ioaddr + XGMAC_MTL_SAFETY_INT_STATUS);
+	dma = readl(ioaddr + XGMAC_DMA_SAFETY_INT_STATUS);
+
+	err = (mtl & XGMAC_MCSIS) || (dma & XGMAC_MCSIS);
+	corr = false;
+	if (err) {
+		dwxgmac3_handle_mac_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	err = (mtl & (XGMAC_MEUIS | XGMAC_MECIS)) ||
+	      (dma & (XGMAC_MSUIS | XGMAC_MSCIS));
+	corr = (mtl & XGMAC_MECIS) || (dma & XGMAC_MSCIS);
+	if (err) {
+		dwxgmac3_handle_mtl_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
+	corr = dma & XGMAC_DECIS;
+	if (err) {
+		dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	return ret;
+}
+
+static const struct dwxgmac3_error {
+	const struct dwxgmac3_error_desc *desc;
+} dwxgmac3_all_errors[] = {
+	{ dwxgmac3_mac_errors },
+	{ dwxgmac3_mtl_errors },
+	{ dwxgmac3_dma_errors },
+};
+
+static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
+				     int index, unsigned long *count,
+				     const char **desc)
+{
+	int module = index / 32, offset = index % 32;
+	unsigned long *ptr = (unsigned long *)stats;
+
+	if (module >= ARRAY_SIZE(dwxgmac3_all_errors))
+		return -EINVAL;
+	if (!dwxgmac3_all_errors[module].desc[offset].valid)
+		return -EINVAL;
+	if (count)
+		*count = *(ptr + index);
+	if (desc)
+		*desc = dwxgmac3_all_errors[module].desc[offset].desc;
+	return 0;
+}
+
+static int dwxgmac3_rxp_disable(void __iomem *ioaddr)
+{
+	u32 val = readl(ioaddr + XGMAC_MTL_OPMODE);
+
+	val &= ~XGMAC_FRPE;
+	writel(val, ioaddr + XGMAC_MTL_OPMODE);
+
+	return 0;
+}
+
+static void dwxgmac3_rxp_enable(void __iomem *ioaddr)
+{
+	u32 val;
+
+	val = readl(ioaddr + XGMAC_MTL_OPMODE);
+	val |= XGMAC_FRPE;
+	writel(val, ioaddr + XGMAC_MTL_OPMODE);
+}
+
+static int dwxgmac3_rxp_update_single_entry(void __iomem *ioaddr,
+					    struct stmmac_tc_entry *entry,
+					    int pos)
+{
+	int ret, i;
+
+	for (i = 0; i < (sizeof(entry->val) / sizeof(u32)); i++) {
+		int real_pos = pos * (sizeof(entry->val) / sizeof(u32)) + i;
+		u32 val;
+
+		/* Wait for ready */
+		ret = readl_poll_timeout(ioaddr + XGMAC_MTL_RXP_IACC_CTRL_ST,
+					 val, !(val & XGMAC_STARTBUSY), 1, 10000);
+		if (ret)
+			return ret;
+
+		/* Write data */
+		val = *((u32 *)&entry->val + i);
+		writel(val, ioaddr + XGMAC_MTL_RXP_IACC_DATA);
+
+		/* Write pos */
+		val = real_pos & XGMAC_ADDR;
+		writel(val, ioaddr + XGMAC_MTL_RXP_IACC_CTRL_ST);
+
+		/* Write OP */
+		val |= XGMAC_WRRDN;
+		writel(val, ioaddr + XGMAC_MTL_RXP_IACC_CTRL_ST);
+
+		/* Start Write */
+		val |= XGMAC_STARTBUSY;
+		writel(val, ioaddr + XGMAC_MTL_RXP_IACC_CTRL_ST);
+
+		/* Wait for done */
+		ret = readl_poll_timeout(ioaddr + XGMAC_MTL_RXP_IACC_CTRL_ST,
+					 val, !(val & XGMAC_STARTBUSY), 1, 10000);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static struct stmmac_tc_entry *
+dwxgmac3_rxp_get_next_entry(struct stmmac_tc_entry *entries,
+			    unsigned int count, u32 curr_prio)
+{
+	struct stmmac_tc_entry *entry;
+	u32 min_prio = ~0x0;
+	int i, min_prio_idx;
+	bool found = false;
+
+	for (i = count - 1; i >= 0; i--) {
+		entry = &entries[i];
+
+		/* Do not update unused entries */
+		if (!entry->in_use)
+			continue;
+		/* Do not update already updated entries (i.e. fragments) */
+		if (entry->in_hw)
+			continue;
+		/* Let last entry be updated last */
+		if (entry->is_last)
+			continue;
+		/* Do not return fragments */
+		if (entry->is_frag)
+			continue;
+		/* Check if we already checked this prio */
+		if (entry->prio < curr_prio)
+			continue;
+		/* Check if this is the minimum prio */
+		if (entry->prio < min_prio) {
+			min_prio = entry->prio;
+			min_prio_idx = i;
+			found = true;
+		}
+	}
+
+	if (found)
+		return &entries[min_prio_idx];
+	return NULL;
+}
+
+static int dwxgmac3_rxp_config(void __iomem *ioaddr,
+			       struct stmmac_tc_entry *entries,
+			       unsigned int count)
+{
+	struct stmmac_tc_entry *entry, *frag;
+	int i, ret, nve = 0;
+	u32 curr_prio = 0;
+	u32 old_val, val;
+
+	/* Force disable RX */
+	old_val = readl(ioaddr + XGMAC_RX_CONFIG);
+	val = old_val & ~XGMAC_CONFIG_RE;
+	writel(val, ioaddr + XGMAC_RX_CONFIG);
+
+	/* Disable RX Parser */
+	ret = dwxgmac3_rxp_disable(ioaddr);
+	if (ret)
+		goto re_enable;
+
+	/* Set all entries as NOT in HW */
+	for (i = 0; i < count; i++) {
+		entry = &entries[i];
+		entry->in_hw = false;
+	}
+
+	/* Update entries by reverse order */
+	while (1) {
+		entry = dwxgmac3_rxp_get_next_entry(entries, count, curr_prio);
+		if (!entry)
+			break;
+
+		curr_prio = entry->prio;
+		frag = entry->frag_ptr;
+
+		/* Set special fragment requirements */
+		if (frag) {
+			entry->val.af = 0;
+			entry->val.rf = 0;
+			entry->val.nc = 1;
+			entry->val.ok_index = nve + 2;
+		}
+
+		ret = dwxgmac3_rxp_update_single_entry(ioaddr, entry, nve);
+		if (ret)
+			goto re_enable;
+
+		entry->table_pos = nve++;
+		entry->in_hw = true;
+
+		if (frag && !frag->in_hw) {
+			ret = dwxgmac3_rxp_update_single_entry(ioaddr, frag, nve);
+			if (ret)
+				goto re_enable;
+			frag->table_pos = nve++;
+			frag->in_hw = true;
+		}
+	}
+
+	if (!nve)
+		goto re_enable;
+
+	/* Update all pass entry */
+	for (i = 0; i < count; i++) {
+		entry = &entries[i];
+		if (!entry->is_last)
+			continue;
+
+		ret = dwxgmac3_rxp_update_single_entry(ioaddr, entry, nve);
+		if (ret)
+			goto re_enable;
+
+		entry->table_pos = nve++;
+	}
+
+	/* Assume n. of parsable entries == n. of valid entries */
+	val = (nve << 16) & XGMAC_NPE;
+	val |= nve & XGMAC_NVE;
+	writel(val, ioaddr + XGMAC_MTL_RXP_CONTROL_STATUS);
+
+	/* Enable RX Parser */
+	dwxgmac3_rxp_enable(ioaddr);
+
+re_enable:
+	/* Re-enable RX */
+	writel(old_val, ioaddr + XGMAC_RX_CONFIG);
+	return ret;
+}
+
+static int dwxgmac2_get_mac_tx_timestamp(struct mac_device_info *hw, u64 *ts)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	if (readl_poll_timeout_atomic(ioaddr + XGMAC_TIMESTAMP_STATUS,
+				      value, value & XGMAC_TXTSC, 100, 10000))
+		return -EBUSY;
+
+	*ts = readl(ioaddr + XGMAC_TXTIMESTAMP_NSEC) & XGMAC_TXTSSTSLO;
+	*ts += readl(ioaddr + XGMAC_TXTIMESTAMP_SEC) * 1000000000ULL;
+	return 0;
+}
+
+static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
+				    struct stmmac_pps_cfg *cfg, bool enable,
+				    u32 sub_second_inc, u32 systime_flags)
+{
+	u32 tnsec = readl(ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+	u32 val = readl(ioaddr + XGMAC_PPS_CONTROL);
+	u64 period;
+
+	if (!cfg->available)
+		return -EINVAL;
+	if (tnsec & XGMAC_TRGTBUSY0)
+		return -EBUSY;
+	if (!sub_second_inc || !systime_flags)
+		return -EINVAL;
+
+	val &= ~XGMAC_PPSx_MASK(index);
+
+	if (!enable) {
+		val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_STOP);
+		writel(val, ioaddr + XGMAC_PPS_CONTROL);
+		return 0;
+	}
+
+	val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
+	val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
+	val |= XGMAC_PPSEN0;
+
+	writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
+
+	if (!(systime_flags & PTP_TCR_TSCTRLSSR))
+		cfg->start.tv_nsec = (cfg->start.tv_nsec * 1000) / 465;
+	writel(cfg->start.tv_nsec, ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+
+	period = cfg->period.tv_sec * 1000000000;
+	period += cfg->period.tv_nsec;
+
+	do_div(period, sub_second_inc);
+
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + XGMAC_PPSx_INTERVAL(index));
+
+	period >>= 1;
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + XGMAC_PPSx_WIDTH(index));
+
+	/* Finally, activate it */
+	writel(val, ioaddr + XGMAC_PPS_CONTROL);
+	return 0;
+}
+
+static void dwxgmac2_sarc_configure(void __iomem *ioaddr, int val)
+{
+	u32 value = readl(ioaddr + XGMAC_TX_CONFIG);
+
+	value &= ~XGMAC_CONFIG_SARC;
+	value |= val << XGMAC_CONFIG_SARC_SHIFT;
+
+	writel(value, ioaddr + XGMAC_TX_CONFIG);
+}
+
+static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_VLAN_INCL);
+	value |= XGMAC_VLAN_VLTI;
+	value |= XGMAC_VLAN_CSVL; /* Only use SVLAN */
+	value &= ~XGMAC_VLAN_VLC;
+	value |= (type << XGMAC_VLAN_VLC_SHIFT) & XGMAC_VLAN_VLC;
+	writel(value, ioaddr + XGMAC_VLAN_INCL);
+}
+
+static int dwxgmac2_filter_wait(struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	if (readl_poll_timeout(ioaddr + XGMAC_L3L4_ADDR_CTRL, value,
+			       !(value & XGMAC_XB), 100, 10000))
+		return -EBUSY;
+	return 0;
+}
+
+static int dwxgmac2_filter_read(struct mac_device_info *hw, u32 filter_no,
+				u8 reg, u32 *data)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+	value |= XGMAC_TT | XGMAC_XB;
+	writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	*data = readl(ioaddr + XGMAC_L3L4_DATA);
+	return 0;
+}
+
+static int dwxgmac2_filter_write(struct mac_device_info *hw, u32 filter_no,
+				 u8 reg, u32 data)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	ret = dwxgmac2_filter_wait(hw);
+	if (ret)
+		return ret;
+
+	writel(data, ioaddr + XGMAC_L3L4_DATA);
+
+	value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+	value |= XGMAC_XB;
+	writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+	return dwxgmac2_filter_wait(hw);
+}
+
+static int dwxgmac2_config_l3_filter(struct mac_device_info *hw, u32 filter_no,
+				     bool en, bool ipv6, bool sa, bool inv,
+				     u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	value = readl(ioaddr + XGMAC_PACKET_FILTER);
+	value |= XGMAC_FILTER_IPFE;
+	writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+	ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+	if (ret)
+		return ret;
+
+	/* For IPv6 not both SA/DA filters can be active */
+	if (ipv6) {
+		value |= XGMAC_L3PEN0;
+		value &= ~(XGMAC_L3SAM0 | XGMAC_L3SAIM0);
+		value &= ~(XGMAC_L3DAM0 | XGMAC_L3DAIM0);
+		if (sa) {
+			value |= XGMAC_L3SAM0;
+			if (inv)
+				value |= XGMAC_L3SAIM0;
+		} else {
+			value |= XGMAC_L3DAM0;
+			if (inv)
+				value |= XGMAC_L3DAIM0;
+		}
+	} else {
+		value &= ~XGMAC_L3PEN0;
+		if (sa) {
+			value |= XGMAC_L3SAM0;
+			if (inv)
+				value |= XGMAC_L3SAIM0;
+		} else {
+			value |= XGMAC_L3DAM0;
+			if (inv)
+				value |= XGMAC_L3DAIM0;
+		}
+	}
+
+	ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+	if (ret)
+		return ret;
+
+	if (sa) {
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR0, match);
+		if (ret)
+			return ret;
+	} else {
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR1, match);
+		if (ret)
+			return ret;
+	}
+
+	if (!en)
+		return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+	return 0;
+}
+
+static int dwxgmac2_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
+				     bool en, bool udp, bool sa, bool inv,
+				     u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	int ret;
+
+	value = readl(ioaddr + XGMAC_PACKET_FILTER);
+	value |= XGMAC_FILTER_IPFE;
+	writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+	ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+	if (ret)
+		return ret;
+
+	if (udp) {
+		value |= XGMAC_L4PEN0;
+	} else {
+		value &= ~XGMAC_L4PEN0;
+	}
+
+	value &= ~(XGMAC_L4SPM0 | XGMAC_L4SPIM0);
+	value &= ~(XGMAC_L4DPM0 | XGMAC_L4DPIM0);
+	if (sa) {
+		value |= XGMAC_L4SPM0;
+		if (inv)
+			value |= XGMAC_L4SPIM0;
+	} else {
+		value |= XGMAC_L4DPM0;
+		if (inv)
+			value |= XGMAC_L4DPIM0;
+	}
+
+	ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+	if (ret)
+		return ret;
+
+	if (sa) {
+		value = match & XGMAC_L4SP0;
+
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+		if (ret)
+			return ret;
+	} else {
+		value = (match << XGMAC_L4DP0_SHIFT) & XGMAC_L4DP0;
+
+		ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+		if (ret)
+			return ret;
+	}
+
+	if (!en)
+		return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+	return 0;
+}
+
+static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
+				     u32 addr)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	writel(addr, ioaddr + XGMAC_ARP_ADDR);
+
+	value = readl(ioaddr + XGMAC_RX_CONFIG);
+	if (en)
+		value |= XGMAC_CONFIG_ARPEN;
+	else
+		value &= ~XGMAC_CONFIG_ARPEN;
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
 	.rx_ipc = dwxgmac2_rx_ipc,
 	.rx_queue_enable = dwxgmac2_rx_queue_enable,
 	.rx_queue_prio = dwxgmac2_rx_queue_prio,
-	.tx_queue_prio = NULL,
+	.tx_queue_prio = dwxgmac2_tx_queue_prio,
 	.rx_queue_routing = NULL,
 	.prog_mtl_rx_algorithms = dwxgmac2_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwxgmac2_prog_mtl_tx_algorithms,
-	.set_mtl_tx_queue_weight = NULL,
+	.set_mtl_tx_queue_weight = dwxgmac2_set_mtl_tx_queue_weight,
 	.map_mtl_to_dma = dwxgmac2_map_mtl_to_dma,
-	.config_cbs = NULL,
-	.dump_regs = NULL,
+	.config_cbs = dwxgmac2_config_cbs,
+	.dump_regs = dwxgmac2_dump_regs,
 	.host_irq_status = dwxgmac2_host_irq_status,
 	.host_mtl_irq_status = dwxgmac2_host_mtl_irq_status,
 	.flow_ctrl = dwxgmac2_flow_ctrl,
 	.pmt = dwxgmac2_pmt,
 	.set_umac_addr = dwxgmac2_set_umac_addr,
 	.get_umac_addr = dwxgmac2_get_umac_addr,
-	.set_eee_mode = NULL,
-	.reset_eee_mode = NULL,
-	.set_eee_timer = NULL,
-	.set_eee_pls = NULL,
+	.set_eee_mode = dwxgmac2_set_eee_mode,
+	.reset_eee_mode = dwxgmac2_reset_eee_mode,
+	.set_eee_timer = dwxgmac2_set_eee_timer,
+	.set_eee_pls = dwxgmac2_set_eee_pls,
 	.pcs_ctrl_ane = NULL,
 	.pcs_rane = NULL,
 	.pcs_get_adv_lp = NULL,
 	.debug = NULL,
 	.set_filter = dwxgmac2_set_filter,
+	.safety_feat_config = dwxgmac3_safety_feat_config,
+	.safety_feat_irq_status = dwxgmac3_safety_feat_irq_status,
+	.safety_feat_dump = dwxgmac3_safety_feat_dump,
+	.set_mac_loopback = dwxgmac2_set_mac_loopback,
+	.rss_configure = dwxgmac2_rss_configure,
+	.update_vlan_hash = dwxgmac2_update_vlan_hash,
+	.rxp_config = dwxgmac3_rxp_config,
+	.get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp,
+	.flex_pps_config = dwxgmac2_flex_pps_config,
+	.sarc_configure = dwxgmac2_sarc_configure,
+	.enable_vlan = dwxgmac2_enable_vlan,
+	.config_l3_filter = dwxgmac2_config_l3_filter,
+	.config_l4_filter = dwxgmac2_config_l4_filter,
+	.set_arp_offload = dwxgmac2_set_arp_offload,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
@@ -351,11 +1405,13 @@
 		mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
 	mac->link.duplex = 0;
-	mac->link.speed10 = 0;
-	mac->link.speed100 = 0;
-	mac->link.speed1000 = XGMAC_CONFIG_SS_1000;
-	mac->link.speed2500 = XGMAC_CONFIG_SS_2500;
-	mac->link.speed10000 = XGMAC_CONFIG_SS_10000;
+	mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
+	mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
+	mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII;
+	mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII;
+	mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500;
+	mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000;
+	mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000;
 	mac->link.speed_mask = XGMAC_CONFIG_SS_MASK;
 
 	mac->mii.addr = XGMAC_MDIO_ADDR;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 1d858fd..bd5838c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c

@@ -26,16 +26,17 @@
 				  struct dma_desc *p)
 {
 	unsigned int rdes3 = le32_to_cpu(p->des3);
-	int ret = good_frame;
 
 	if (unlikely(rdes3 & XGMAC_RDES3_OWN))
 		return dma_own;
-	if (likely(!(rdes3 & XGMAC_RDES3_LD)))
+	if (unlikely(rdes3 & XGMAC_RDES3_CTXT))
 		return discard_frame;
-	if (unlikely(rdes3 & XGMAC_RDES3_ES))
-		ret = discard_frame;
+	if (likely(!(rdes3 & XGMAC_RDES3_LD)))
+		return rx_not_ls;
+	if (unlikely((rdes3 & XGMAC_RDES3_ES) && (rdes3 & XGMAC_RDES3_LD)))
+		return discard_frame;
 
-	return ret;
+	return good_frame;
 }
 
 static int dwxgmac2_get_tx_len(struct dma_desc *p)
@@ -55,7 +56,7 @@
 
 static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
 {
-	p->des3 = cpu_to_le32(XGMAC_RDES3_OWN);
+	p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
 
 	if (!disable_rx_ic)
 		p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
@@ -98,11 +99,17 @@
 	unsigned int rdes3 = le32_to_cpu(p->des3);
 	bool desc_valid, ts_valid;
 
+	dma_rmb();
+
 	desc_valid = !(rdes3 & XGMAC_RDES3_OWN) && (rdes3 & XGMAC_RDES3_CTXT);
 	ts_valid = !(rdes3 & XGMAC_RDES3_TSD) && (rdes3 & XGMAC_RDES3_TSA);
 
-	if (likely(desc_valid && ts_valid))
+	if (likely(desc_valid && ts_valid)) {
+		if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff))
+			return -EINVAL;
 		return 0;
+	}
+
 	return -EINVAL;
 }
 
@@ -113,17 +120,14 @@
 	unsigned int rdes3 = le32_to_cpu(p->des3);
 	int ret = -EBUSY;
 
-	if (likely(rdes3 & XGMAC_RDES3_CDA)) {
+	if (likely(rdes3 & XGMAC_RDES3_CDA))
 		ret = dwxgmac2_rx_check_timestamp(next_desc);
-		if (ret)
-			return ret;
-	}
 
-	return ret;
+	return !ret;
 }
 
 static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				  int mode, int end)
+				  int mode, int end, int bfsize)
 {
 	dwxgmac2_set_rx_owner(p, disable_rx_ic);
 }
@@ -144,7 +148,7 @@
 
 	p->des2 |= cpu_to_le32(len & XGMAC_TDES2_B1L);
 
-	tdes3 = tot_pkt_len & XGMAC_TDES3_FL;
+	tdes3 |= tot_pkt_len & XGMAC_TDES3_FL;
 	if (is_fs)
 		tdes3 |= XGMAC_TDES3_FD;
 	else
@@ -242,8 +246,8 @@
 
 static void dwxgmac2_set_addr(struct dma_desc *p, dma_addr_t addr)
 {
-	p->des0 = cpu_to_le32(addr);
-	p->des1 = 0;
+	p->des0 = cpu_to_le32(lower_32_bits(addr));
+	p->des1 = cpu_to_le32(upper_32_bits(addr));
 }
 
 static void dwxgmac2_clear(struct dma_desc *p)
@@ -254,6 +258,87 @@
 	p->des3 = 0;
 }
 
+static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
+				enum pkt_hash_types *type)
+{
+	unsigned int rdes3 = le32_to_cpu(p->des3);
+	u32 ptype;
+
+	if (rdes3 & XGMAC_RDES3_RSV) {
+		ptype = (rdes3 & XGMAC_RDES3_L34T) >> XGMAC_RDES3_L34T_SHIFT;
+
+		switch (ptype) {
+		case XGMAC_L34T_IP4TCP:
+		case XGMAC_L34T_IP4UDP:
+		case XGMAC_L34T_IP6TCP:
+		case XGMAC_L34T_IP6UDP:
+			*type = PKT_HASH_TYPE_L4;
+			break;
+		default:
+			*type = PKT_HASH_TYPE_L3;
+			break;
+		}
+
+		*hash = le32_to_cpu(p->des1);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
+{
+	if (le32_to_cpu(p->des3) & XGMAC_RDES3_L34T)
+		*len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
+	return 0;
+}
+
+static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+{
+	p->des2 = cpu_to_le32(lower_32_bits(addr));
+	p->des3 = cpu_to_le32(upper_32_bits(addr));
+}
+
+static void dwxgmac2_set_sarc(struct dma_desc *p, u32 sarc_type)
+{
+	sarc_type <<= XGMAC_TDES3_SAIC_SHIFT;
+
+	p->des3 |= cpu_to_le32(sarc_type & XGMAC_TDES3_SAIC);
+}
+
+static void dwxgmac2_set_vlan_tag(struct dma_desc *p, u16 tag, u16 inner_tag,
+				  u32 inner_type)
+{
+	p->des0 = 0;
+	p->des1 = 0;
+	p->des2 = 0;
+	p->des3 = 0;
+
+	/* Inner VLAN */
+	if (inner_type) {
+		u32 des = inner_tag << XGMAC_TDES2_IVT_SHIFT;
+
+		des &= XGMAC_TDES2_IVT;
+		p->des2 = cpu_to_le32(des);
+
+		des = inner_type << XGMAC_TDES3_IVTIR_SHIFT;
+		des &= XGMAC_TDES3_IVTIR;
+		p->des3 = cpu_to_le32(des | XGMAC_TDES3_IVLTV);
+	}
+
+	/* Outer VLAN */
+	p->des3 |= cpu_to_le32(tag & XGMAC_TDES3_VT);
+	p->des3 |= cpu_to_le32(XGMAC_TDES3_VLTV);
+
+	p->des3 |= cpu_to_le32(XGMAC_TDES3_CTXT);
+}
+
+static void dwxgmac2_set_vlan(struct dma_desc *p, u32 type)
+{
+	type <<= XGMAC_TDES2_VTIR_SHIFT;
+	p->des2 |= cpu_to_le32(type & XGMAC_TDES2_VTIR);
+}
+
 const struct stmmac_desc_ops dwxgmac210_desc_ops = {
 	.tx_status = dwxgmac2_get_tx_status,
 	.rx_status = dwxgmac2_get_rx_status,
@@ -277,4 +362,10 @@
 	.get_addr = dwxgmac2_get_addr,
 	.set_addr = dwxgmac2_set_addr,
 	.clear = dwxgmac2_clear,
+	.get_rx_hash = dwxgmac2_get_rx_hash,
+	.get_rx_header_len = dwxgmac2_get_rx_header_len,
+	.set_sec_addr = dwxgmac2_set_sec_addr,
+	.set_sarc = dwxgmac2_set_sarc,
+	.set_vlan_tag = dwxgmac2_set_vlan_tag,
+	.set_vlan = dwxgmac2_set_vlan,
 };

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 2090903..f70ca53 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c

@@ -27,7 +27,7 @@
 	if (dma_cfg->aal)
 		value |= XGMAC_AAL;
 
-	writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+	writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
 }
 
 static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
@@ -44,7 +44,7 @@
 
 static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr,
 				      struct stmmac_dma_cfg *dma_cfg,
-				      u32 dma_rx_phy, u32 chan)
+				      dma_addr_t phy, u32 chan)
 {
 	u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 	u32 value;
@@ -54,12 +54,13 @@
 	value |= (rxpbl << XGMAC_RxPBL_SHIFT) & XGMAC_RxPBL;
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 
-	writel(dma_rx_phy, ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan));
+	writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan));
+	writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan));
 }
 
 static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr,
 				      struct stmmac_dma_cfg *dma_cfg,
-				      u32 dma_tx_phy, u32 chan)
+				      dma_addr_t phy, u32 chan)
 {
 	u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
 	u32 value;
@@ -70,7 +71,8 @@
 	value |= XGMAC_OSP;
 	writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
 
-	writel(dma_tx_phy, ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan));
+	writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan));
+	writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan));
 }
 
 static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
@@ -91,11 +93,11 @@
 	value |= (axi->axi_rd_osr_lmt << XGMAC_RD_OSR_LMT_SHIFT) &
 		XGMAC_RD_OSR_LMT;
 
+	if (!axi->axi_fb)
+		value |= XGMAC_UNDEF;
+
 	value &= ~XGMAC_BLEN;
 	for (i = 0; i < AXI_BLEN; i++) {
-		if (axi->axi_blen[i])
-			value &= ~XGMAC_UNDEF;
-
 		switch (axi->axi_blen[i]) {
 		case 256:
 			value |= XGMAC_BLEN256;
@@ -122,6 +124,16 @@
 	}
 
 	writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+	writel(XGMAC_TDPS, ioaddr + XGMAC_TX_EDMA_CTRL);
+	writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
+}
+
+static void dwxgmac2_dma_dump_regs(void __iomem *ioaddr, u32 *reg_space)
+{
+	int i;
+
+	for (i = (XGMAC_DMA_MODE / 4); i < XGMAC_REGSIZE; i++)
+		reg_space[i] = readl(ioaddr + i * 4);
 }
 
 static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
@@ -147,6 +159,52 @@
 	value &= ~XGMAC_RQS;
 	value |= (rqs << XGMAC_RQS_SHIFT) & XGMAC_RQS;
 
+	if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) {
+		u32 flow = readl(ioaddr + XGMAC_MTL_RXQ_FLOW_CONTROL(channel));
+		unsigned int rfd, rfa;
+
+		value |= XGMAC_EHFC;
+
+		/* Set Threshold for Activating Flow Control to min 2 frames,
+		 * i.e. 1500 * 2 = 3000 bytes.
+		 *
+		 * Set Threshold for Deactivating Flow Control to min 1 frame,
+		 * i.e. 1500 bytes.
+		 */
+		switch (fifosz) {
+		case 4096:
+			/* This violates the above formula because of FIFO size
+			 * limit therefore overflow may occur in spite of this.
+			 */
+			rfd = 0x03; /* Full-2.5K */
+			rfa = 0x01; /* Full-1.5K */
+			break;
+
+		case 8192:
+			rfd = 0x06; /* Full-4K */
+			rfa = 0x0a; /* Full-6K */
+			break;
+
+		case 16384:
+			rfd = 0x06; /* Full-4K */
+			rfa = 0x12; /* Full-10K */
+			break;
+
+		default:
+			rfd = 0x06; /* Full-4K */
+			rfa = 0x1e; /* Full-16K */
+			break;
+		}
+
+		flow &= ~XGMAC_RFD;
+		flow |= rfd << XGMAC_RFD_SHIFT;
+
+		flow &= ~XGMAC_RFA;
+		flow |= rfa << XGMAC_RFA_SHIFT;
+
+		writel(flow, ioaddr + XGMAC_MTL_RXQ_FLOW_CONTROL(channel));
+	}
+
 	writel(value, ioaddr + XGMAC_MTL_RXQ_OPMODE(channel));
 
 	/* Enable MTL RX overflow */
@@ -182,6 +240,9 @@
 			value |= 0x7 << XGMAC_TTC_SHIFT;
 	}
 
+	/* Use static TC to Queue mapping */
+	value |= (channel << XGMAC_Q2TCMAP_SHIFT) & XGMAC_Q2TCMAP;
+
 	value &= ~XGMAC_TXQEN;
 	if (qmode != MTL_QUEUE_AVB)
 		value |= 0x2 << XGMAC_TXQEN_SHIFT;
@@ -250,20 +311,21 @@
 	value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 	value &= ~XGMAC_RXST;
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
-
-	value = readl(ioaddr + XGMAC_RX_CONFIG);
-	value &= ~XGMAC_CONFIG_RE;
-	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
 static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
 				  struct stmmac_extra_stats *x, u32 chan)
 {
 	u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
+	u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 	int ret = 0;
 
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & XGMAC_AIS)) {
+		if (unlikely(intr_status & XGMAC_RBU)) {
+			x->rx_buf_unav_irq++;
+			ret |= handle_rx;
+		}
 		if (unlikely(intr_status & XGMAC_TPS)) {
 			x->tx_process_stopped_irq++;
 			ret |= tx_hard_error;
@@ -279,20 +341,17 @@
 		x->normal_irq_n++;
 
 		if (likely(intr_status & XGMAC_RI)) {
-			u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
-			if (likely(value & XGMAC_RIE)) {
-				x->rx_normal_irq_n++;
-				ret |= handle_rx;
-			}
+			x->rx_normal_irq_n++;
+			ret |= handle_rx;
 		}
-		if (likely(intr_status & XGMAC_TI)) {
+		if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
 			x->tx_normal_irq_n++;
 			ret |= handle_tx;
 		}
 	}
 
 	/* Clear interrupts */
-	writel(~0x0, ioaddr + XGMAC_DMA_CH_STATUS(chan));
+	writel(intr_en & intr_status, ioaddr + XGMAC_DMA_CH_STATUS(chan));
 
 	return ret;
 }
@@ -304,18 +363,44 @@
 
 	/*  MAC HW feature 0 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE0);
+	dma_cap->vlins = (hw_cap & XGMAC_HWFEAT_SAVLANINS) >> 27;
 	dma_cap->rx_coe = (hw_cap & XGMAC_HWFEAT_RXCOESEL) >> 16;
 	dma_cap->tx_coe = (hw_cap & XGMAC_HWFEAT_TXCOESEL) >> 14;
+	dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13;
 	dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12;
 	dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11;
-	dma_cap->av &= (hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10;
+	dma_cap->av &= !((hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10);
+	dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9;
+	dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8;
 	dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7;
 	dma_cap->pmt_remote_wake_up = (hw_cap & XGMAC_HWFEAT_RWKSEL) >> 6;
+	dma_cap->vlhash = (hw_cap & XGMAC_HWFEAT_VLHASH) >> 4;
 	dma_cap->mbps_1000 = (hw_cap & XGMAC_HWFEAT_GMIISEL) >> 1;
 
 	/* MAC HW feature 1 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
+	dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27;
+	dma_cap->hash_tb_sz = (hw_cap & XGMAC_HWFEAT_HASHTBLSZ) >> 24;
+	dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
 	dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
+	dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;
+
+	dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
+	switch (dma_cap->addr64) {
+	case 0:
+		dma_cap->addr64 = 32;
+		break;
+	case 1:
+		dma_cap->addr64 = 40;
+		break;
+	case 2:
+		dma_cap->addr64 = 48;
+		break;
+	default:
+		dma_cap->addr64 = 32;
+		break;
+	}
+
 	dma_cap->tx_fifo_size =
 		128 << ((hw_cap & XGMAC_HWFEAT_TXFIFOSIZE) >> 6);
 	dma_cap->rx_fifo_size =
@@ -332,6 +417,14 @@
 		((hw_cap & XGMAC_HWFEAT_TXQCNT) >> 6) + 1;
 	dma_cap->number_rx_queues =
 		((hw_cap & XGMAC_HWFEAT_RXQCNT) >> 0) + 1;
+
+	/* MAC HW feature 3 */
+	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3);
+	dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14;
+	dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13;
+	dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11;
+	dma_cap->frpbs = (hw_cap & XGMAC_HWFEAT_FRPPB) >> 9;
+	dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3;
 }
 
 static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan)
@@ -374,6 +467,23 @@
 	writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
 }
 
+static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
+{
+	u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
+	u32 flow = readl(ioaddr + XGMAC_RX_FLOW_CTRL);
+
+	value &= ~XGMAC_TXQEN;
+	if (qmode != MTL_QUEUE_AVB) {
+		value |= 0x2 << XGMAC_TXQEN_SHIFT;
+		writel(0, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(channel));
+	} else {
+		value |= 0x1 << XGMAC_TXQEN_SHIFT;
+		writel(flow & (~XGMAC_RFE), ioaddr + XGMAC_RX_FLOW_CTRL);
+	}
+
+	writel(value, ioaddr +  XGMAC_MTL_TXQ_OPMODE(channel));
+}
+
 static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
 {
 	u32 value;
@@ -383,6 +493,22 @@
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 }
 
+static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
+{
+	u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+	value &= ~XGMAC_CONFIG_HDSMS;
+	value |= XGMAC_CONFIG_HDSMS_256; /* Segment max 256 bytes */
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+
+	value = readl(ioaddr + XGMAC_DMA_CH_CONTROL(chan));
+	if (en)
+		value |= XGMAC_SPH;
+	else
+		value &= ~XGMAC_SPH;
+	writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan));
+}
+
 const struct stmmac_dma_ops dwxgmac210_dma_ops = {
 	.reset = dwxgmac2_dma_reset,
 	.init = dwxgmac2_dma_init,
@@ -390,7 +516,7 @@
 	.init_rx_chan = dwxgmac2_dma_init_rx_chan,
 	.init_tx_chan = dwxgmac2_dma_init_tx_chan,
 	.axi = dwxgmac2_dma_axi,
-	.dump_regs = NULL,
+	.dump_regs = dwxgmac2_dma_dump_regs,
 	.dma_rx_mode = dwxgmac2_dma_rx_mode,
 	.dma_tx_mode = dwxgmac2_dma_tx_mode,
 	.enable_dma_irq = dwxgmac2_enable_dma_irq,
@@ -407,5 +533,7 @@
 	.set_rx_tail_ptr = dwxgmac2_set_rx_tail_ptr,
 	.set_tx_tail_ptr = dwxgmac2_set_tx_tail_ptr,
 	.enable_tso = dwxgmac2_enable_tso,
+	.qmode = dwxgmac2_qmode,
 	.set_bfsize = dwxgmac2_set_bfsize,
+	.enable_sph = dwxgmac2_enable_sph,
 };

diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 5ef91a7..d02cec2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This contains the functions to handle the enhanced descriptors.
 
   Copyright (C) 2007-2014  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -201,6 +191,11 @@
 	if (unlikely(rdes0 & RDES0_OWN))
 		return dma_own;
 
+	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
+		stats->rx_length_errors++;
+		return discard_frame;
+	}
+
 	if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
 		if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
 			x->rx_desc++;
@@ -231,9 +226,10 @@
 	 * It doesn't match with the information reported into the databook.
 	 * At any rate, we need to understand if the CSUM hw computation is ok
 	 * and report this info to the upper layers. */
-	ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
-				 !!(rdes0 & RDES0_FRAME_TYPE),
-				 !!(rdes0 & ERDES0_RX_MAC_ADDR));
+	if (likely(ret == good_frame))
+		ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
+					 !!(rdes0 & RDES0_FRAME_TYPE),
+					 !!(rdes0 & ERDES0_RX_MAC_ADDR));
 
 	if (unlikely(rdes0 & RDES0_DRIBBLING))
 		x->dribbling_bit++;
@@ -259,15 +255,19 @@
 }
 
 static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				  int mode, int end)
+				  int mode, int end, int bfsize)
 {
+	int bfsize1;
+
 	p->des0 |= cpu_to_le32(RDES0_OWN);
-	p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK);
+
+	bfsize1 = min(bfsize, BUF_SIZE_8KiB);
+	p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ehn_desc_rx_set_on_chain(p);
 	else
-		ehn_desc_rx_set_on_ring(p, end);
+		ehn_desc_rx_set_on_ring(p, end, bfsize);
 
 	if (disable_rx_ic)
 		p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC);

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 357309a..3af2e50 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c

@@ -81,6 +81,7 @@
 	const void *hwtimestamp;
 	const void *mode;
 	const void *tc;
+	const void *mmc;
 	int (*setup)(struct stmmac_priv *priv);
 	int (*quirks)(struct stmmac_priv *priv);
 } stmmac_hw[] = {
@@ -100,6 +101,7 @@
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac100_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -117,6 +119,7 @@
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac1000_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -133,7 +136,8 @@
 		.mac = &dwmac4_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
-		.tc = NULL,
+		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = stmmac_dwmac4_quirks,
 	}, {
@@ -150,7 +154,8 @@
 		.mac = &dwmac410_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
-		.tc = NULL,
+		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -167,7 +172,8 @@
 		.mac = &dwmac410_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
-		.tc = NULL,
+		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -185,6 +191,7 @@
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -194,14 +201,15 @@
 		.min_id = DWXGMAC_CORE_2_10,
 		.regs = {
 			.ptp_off = PTP_XGMAC_OFFSET,
-			.mmc_off = 0,
+			.mmc_off = MMC_XGMAC_OFFSET,
 		},
 		.desc = &dwxgmac210_desc_ops,
 		.dma = &dwxgmac210_dma_ops,
 		.mac = &dwxgmac210_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
-		.tc = NULL,
+		.tc = &dwmac510_tc_ops,
+		.mmc = &dwxgmac_mmc_ops,
 		.setup = dwxgmac2_setup,
 		.quirks = NULL,
 	},
@@ -267,6 +275,7 @@
 		mac->ptp = mac->ptp ? : entry->hwtimestamp;
 		mac->mode = mac->mode ? : entry->mode;
 		mac->tc = mac->tc ? : entry->tc;
+		mac->mmc = mac->mmc ? : entry->mmc;
 
 		priv->hw = mac;
 		priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off;

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 92b8944..9010d88 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h

@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 // Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
 // stmmac HW Interface Callbacks
 
@@ -6,6 +6,7 @@
 #define __STMMAC_HWIF_H__
 
 #include <linux/netdevice.h>
+#include <linux/stmmac.h>
 
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
@@ -33,7 +34,7 @@
 struct stmmac_desc_ops {
 	/* DMA RX descriptor ring initialization */
 	void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode,
-			int end);
+			int end, int bfsize);
 	/* DMA TX descriptor ring initialization */
 	void (*init_tx_desc)(struct dma_desc *p, int mode, int end);
 	/* Invoked by the xmit function to prepare the tx descriptor */
@@ -85,6 +86,15 @@
 	void (*set_addr)(struct dma_desc *p, dma_addr_t addr);
 	/* clear descriptor */
 	void (*clear)(struct dma_desc *p);
+	/* RSS */
+	int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
+			   enum pkt_hash_types *type);
+	int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
+	void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
+	void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
+	void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
+			     u32 inner_type);
+	void (*set_vlan)(struct dma_desc *p, u32 type);
 };
 
 #define stmmac_init_rx_desc(__priv, __args...) \
@@ -135,6 +145,18 @@
 	stmmac_do_void_callback(__priv, desc, set_addr, __args)
 #define stmmac_clear_desc(__priv, __args...) \
 	stmmac_do_void_callback(__priv, desc, clear, __args)
+#define stmmac_get_rx_hash(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_hash, __args)
+#define stmmac_get_rx_header_len(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_header_len, __args)
+#define stmmac_set_desc_sec_addr(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)
+#define stmmac_set_desc_sarc(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_sarc, __args)
+#define stmmac_set_desc_vlan_tag(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_vlan_tag, __args)
+#define stmmac_set_desc_vlan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_vlan, __args)
 
 struct stmmac_dma_cfg;
 struct dma_features;
@@ -149,10 +171,10 @@
 			  struct stmmac_dma_cfg *dma_cfg, u32 chan);
 	void (*init_rx_chan)(void __iomem *ioaddr,
 			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_rx_phy, u32 chan);
+			     dma_addr_t phy, u32 chan);
 	void (*init_tx_chan)(void __iomem *ioaddr,
 			     struct stmmac_dma_cfg *dma_cfg,
-			     u32 dma_tx_phy, u32 chan);
+			     dma_addr_t phy, u32 chan);
 	/* Configure the AXI Bus Mode Register */
 	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
 	/* Dump DMA registers */
@@ -185,6 +207,7 @@
 	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
 	void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
 	void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
+	void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan);
 };
 
 #define stmmac_reset(__priv, __args...) \
@@ -241,6 +264,8 @@
 	stmmac_do_void_callback(__priv, dma, qmode, __args)
 #define stmmac_set_dma_bfsize(__priv, __args...) \
 	stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
+#define stmmac_enable_sph(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, enable_sph, __args)
 
 struct mac_device_info;
 struct net_device;
@@ -248,6 +273,7 @@
 struct stmmac_safety_stats;
 struct stmmac_tc_entry;
 struct stmmac_pps_cfg;
+struct stmmac_rss;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -324,6 +350,27 @@
 	int (*flex_pps_config)(void __iomem *ioaddr, int index,
 			       struct stmmac_pps_cfg *cfg, bool enable,
 			       u32 sub_second_inc, u32 systime_flags);
+	/* Loopback for selftests */
+	void (*set_mac_loopback)(void __iomem *ioaddr, bool enable);
+	/* RSS */
+	int (*rss_configure)(struct mac_device_info *hw,
+			     struct stmmac_rss *cfg, u32 num_rxq);
+	/* VLAN */
+	void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
+				 bool is_double);
+	void (*enable_vlan)(struct mac_device_info *hw, u32 type);
+	/* TX Timestamp */
+	int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
+	/* Source Address Insertion / Replacement */
+	void (*sarc_configure)(void __iomem *ioaddr, int val);
+	/* Filtering */
+	int (*config_l3_filter)(struct mac_device_info *hw, u32 filter_no,
+				bool en, bool ipv6, bool sa, bool inv,
+				u32 match);
+	int (*config_l4_filter)(struct mac_device_info *hw, u32 filter_no,
+				bool en, bool udp, bool sa, bool inv,
+				u32 match);
+	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -392,6 +439,24 @@
 	stmmac_do_callback(__priv, mac, rxp_config, __args)
 #define stmmac_flex_pps_config(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, flex_pps_config, __args)
+#define stmmac_set_mac_loopback(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args)
+#define stmmac_rss_configure(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, rss_configure, __args)
+#define stmmac_update_vlan_hash(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args)
+#define stmmac_enable_vlan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, enable_vlan, __args)
+#define stmmac_get_mac_tx_timestamp(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args)
+#define stmmac_sarc_configure(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, sarc_configure, __args)
+#define stmmac_config_l3_filter(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, config_l3_filter, __args)
+#define stmmac_config_l4_filter(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, config_l4_filter, __args)
+#define stmmac_set_arp_offload(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -448,6 +513,7 @@
 struct stmmac_priv;
 struct tc_cls_u32_offload;
 struct tc_cbs_qopt_offload;
+struct flow_cls_offload;
 
 struct stmmac_tc_ops {
 	int (*init)(struct stmmac_priv *priv);
@@ -455,6 +521,8 @@
 			     struct tc_cls_u32_offload *cls);
 	int (*setup_cbs)(struct stmmac_priv *priv,
 			 struct tc_cbs_qopt_offload *qopt);
+	int (*setup_cls)(struct stmmac_priv *priv,
+			 struct flow_cls_offload *cls);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
@@ -463,6 +531,23 @@
 	stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
 #define stmmac_tc_setup_cbs(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, setup_cbs, __args)
+#define stmmac_tc_setup_cls(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_cls, __args)
+
+struct stmmac_counters;
+
+struct stmmac_mmc_ops {
+	void (*ctrl)(void __iomem *ioaddr, unsigned int mode);
+	void (*intr_all_mask)(void __iomem *ioaddr);
+	void (*read)(void __iomem *ioaddr, struct stmmac_counters *mmc);
+};
+
+#define stmmac_mmc_ctrl(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, ctrl, __args)
+#define stmmac_mmc_intr_all_mask(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, intr_all_mask, __args)
+#define stmmac_mmc_read(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, read, __args)
 
 struct stmmac_regs_off {
 	u32 ptp_off;
@@ -482,6 +567,8 @@
 extern const struct stmmac_ops dwxgmac210_ops;
 extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
 extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
+extern const struct stmmac_mmc_ops dwmac_mmc_ops;
+extern const struct stmmac_mmc_ops dwxgmac_mmc_ops;
 
 #define GMAC_VERSION		0x00000020	/* GMAC CORE Version */
 #define GMAC4_VERSION		0x00000110	/* GMAC4+ CORE Version */

diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index c037326..a0c0592 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h

@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   MMC Header file
 
   Copyright (C) 2011  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -34,6 +24,7 @@
 
 #define MMC_GMAC4_OFFSET		0x700
 #define MMC_GMAC3_X_OFFSET		0x100
+#define MMC_XGMAC_OFFSET		0x800
 
 struct stmmac_counters {
 	unsigned int mmc_tx_octetcount_gb;
@@ -126,10 +117,14 @@
 	unsigned int mmc_rx_tcp_err_octets;
 	unsigned int mmc_rx_icmp_gd_octets;
 	unsigned int mmc_rx_icmp_err_octets;
-};
 
-void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode);
-void dwmac_mmc_intr_all_mask(void __iomem *ioaddr);
-void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc);
+	/* FPE */
+	unsigned int mmc_tx_fpe_fragment_cntr;
+	unsigned int mmc_tx_hold_req_cntr;
+	unsigned int mmc_rx_packet_assembly_err_cntr;
+	unsigned int mmc_rx_packet_smd_err_cntr;
+	unsigned int mmc_rx_packet_assembly_ok_cntr;
+	unsigned int mmc_rx_fpe_fragment_cntr;
+};
 
 #endif /* __MMC_H__ */

diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index e9b04c2..252cf48 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c

@@ -1,25 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   DWMAC Management Counters
 
   Copyright (C) 2011  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include "hwif.h"
 #include "mmc.h"
 
 /* MAC Management Counters register offset */
@@ -128,7 +119,66 @@
 #define MMC_RX_ICMP_GD_OCTETS		0x180
 #define MMC_RX_ICMP_ERR_OCTETS		0x184
 
-void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
+/* XGMAC MMC Registers */
+#define MMC_XGMAC_TX_OCTET_GB		0x14
+#define MMC_XGMAC_TX_PKT_GB		0x1c
+#define MMC_XGMAC_TX_BROAD_PKT_G	0x24
+#define MMC_XGMAC_TX_MULTI_PKT_G	0x2c
+#define MMC_XGMAC_TX_64OCT_GB		0x34
+#define MMC_XGMAC_TX_65OCT_GB		0x3c
+#define MMC_XGMAC_TX_128OCT_GB		0x44
+#define MMC_XGMAC_TX_256OCT_GB		0x4c
+#define MMC_XGMAC_TX_512OCT_GB		0x54
+#define MMC_XGMAC_TX_1024OCT_GB		0x5c
+#define MMC_XGMAC_TX_UNI_PKT_GB		0x64
+#define MMC_XGMAC_TX_MULTI_PKT_GB	0x6c
+#define MMC_XGMAC_TX_BROAD_PKT_GB	0x74
+#define MMC_XGMAC_TX_UNDER		0x7c
+#define MMC_XGMAC_TX_OCTET_G		0x84
+#define MMC_XGMAC_TX_PKT_G		0x8c
+#define MMC_XGMAC_TX_PAUSE		0x94
+#define MMC_XGMAC_TX_VLAN_PKT_G		0x9c
+#define MMC_XGMAC_TX_LPI_USEC		0xa4
+#define MMC_XGMAC_TX_LPI_TRAN		0xa8
+
+#define MMC_XGMAC_RX_PKT_GB		0x100
+#define MMC_XGMAC_RX_OCTET_GB		0x108
+#define MMC_XGMAC_RX_OCTET_G		0x110
+#define MMC_XGMAC_RX_BROAD_PKT_G	0x118
+#define MMC_XGMAC_RX_MULTI_PKT_G	0x120
+#define MMC_XGMAC_RX_CRC_ERR		0x128
+#define MMC_XGMAC_RX_RUNT_ERR		0x130
+#define MMC_XGMAC_RX_JABBER_ERR		0x134
+#define MMC_XGMAC_RX_UNDER		0x138
+#define MMC_XGMAC_RX_OVER		0x13c
+#define MMC_XGMAC_RX_64OCT_GB		0x140
+#define MMC_XGMAC_RX_65OCT_GB		0x148
+#define MMC_XGMAC_RX_128OCT_GB		0x150
+#define MMC_XGMAC_RX_256OCT_GB		0x158
+#define MMC_XGMAC_RX_512OCT_GB		0x160
+#define MMC_XGMAC_RX_1024OCT_GB		0x168
+#define MMC_XGMAC_RX_UNI_PKT_G		0x170
+#define MMC_XGMAC_RX_LENGTH_ERR		0x178
+#define MMC_XGMAC_RX_RANGE		0x180
+#define MMC_XGMAC_RX_PAUSE		0x188
+#define MMC_XGMAC_RX_FIFOOVER_PKT	0x190
+#define MMC_XGMAC_RX_VLAN_PKT_GB	0x198
+#define MMC_XGMAC_RX_WATCHDOG_ERR	0x1a0
+#define MMC_XGMAC_RX_LPI_USEC		0x1a4
+#define MMC_XGMAC_RX_LPI_TRAN		0x1a8
+#define MMC_XGMAC_RX_DISCARD_PKT_GB	0x1ac
+#define MMC_XGMAC_RX_DISCARD_OCT_GB	0x1b4
+#define MMC_XGMAC_RX_ALIGN_ERR_PKT	0x1bc
+
+#define MMC_XGMAC_TX_FPE_FRAG		0x208
+#define MMC_XGMAC_TX_HOLD_REQ		0x20c
+#define MMC_XGMAC_RX_PKT_ASSEMBLY_ERR	0x228
+#define MMC_XGMAC_RX_PKT_SMD_ERR	0x22c
+#define MMC_XGMAC_RX_PKT_ASSEMBLY_OK	0x230
+#define MMC_XGMAC_RX_FPE_FRAG		0x234
+#define MMC_XGMAC_RX_IPC_INTR_MASK	0x25c
+
+static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
 {
 	u32 value = readl(mmcaddr + MMC_CNTRL);
 
@@ -141,7 +191,7 @@
 }
 
 /* To mask all all interrupts.*/
-void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
+static void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
 {
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK);
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK);
@@ -153,7 +203,7 @@
  * counter after a read. So all the field of the mmc struct
  * have to be incremented.
  */
-void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
+static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 {
 	mmc->mmc_tx_octetcount_gb += readl(mmcaddr + MMC_TX_OCTETCOUNT_GB);
 	mmc->mmc_tx_framecount_gb += readl(mmcaddr + MMC_TX_FRAMECOUNT_GB);
@@ -266,3 +316,144 @@
 	mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
 	mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
 }
+
+const struct stmmac_mmc_ops dwmac_mmc_ops = {
+	.ctrl = dwmac_mmc_ctrl,
+	.intr_all_mask = dwmac_mmc_intr_all_mask,
+	.read = dwmac_mmc_read,
+};
+
+static void dwxgmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
+{
+	u32 value = readl(mmcaddr + MMC_CNTRL);
+
+	value |= (mode & 0x3F);
+
+	writel(value, mmcaddr + MMC_CNTRL);
+}
+
+static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr)
+{
+	writel(0x0, mmcaddr + MMC_RX_INTR_MASK);
+	writel(0x0, mmcaddr + MMC_TX_INTR_MASK);
+	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK);
+}
+
+static void dwxgmac_read_mmc_reg(void __iomem *addr, u32 reg, u32 *dest)
+{
+	u64 tmp = 0;
+
+	tmp += readl(addr + reg);
+	tmp += ((u64 )readl(addr + reg + 0x4)) << 32;
+	if (tmp > GENMASK(31, 0))
+		*dest = ~0x0;
+	else
+		*dest = *dest + tmp;
+}
+
+/* This reads the MAC core counters (if actaully supported).
+ * by default the MMC core is programmed to reset each
+ * counter after a read. So all the field of the mmc struct
+ * have to be incremented.
+ */
+static void dwxgmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
+{
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_OCTET_GB,
+			     &mmc->mmc_tx_octetcount_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_PKT_GB,
+			     &mmc->mmc_tx_framecount_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_BROAD_PKT_G,
+			     &mmc->mmc_tx_broadcastframe_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_MULTI_PKT_G,
+			     &mmc->mmc_tx_multicastframe_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_64OCT_GB,
+			     &mmc->mmc_tx_64_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_65OCT_GB,
+			     &mmc->mmc_tx_65_to_127_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_128OCT_GB,
+			     &mmc->mmc_tx_128_to_255_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_256OCT_GB,
+			     &mmc->mmc_tx_256_to_511_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_512OCT_GB,
+			     &mmc->mmc_tx_512_to_1023_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_1024OCT_GB,
+			     &mmc->mmc_tx_1024_to_max_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_UNI_PKT_GB,
+			     &mmc->mmc_tx_unicast_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_MULTI_PKT_GB,
+			     &mmc->mmc_tx_multicast_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_BROAD_PKT_GB,
+			     &mmc->mmc_tx_broadcast_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_UNDER,
+			     &mmc->mmc_tx_underflow_error);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_OCTET_G,
+			     &mmc->mmc_tx_octetcount_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_PKT_G,
+			     &mmc->mmc_tx_framecount_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_PAUSE,
+			     &mmc->mmc_tx_pause_frame);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_TX_VLAN_PKT_G,
+			     &mmc->mmc_tx_vlan_frame_g);
+
+	/* MMC RX counter registers */
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_PKT_GB,
+			     &mmc->mmc_rx_framecount_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_OCTET_GB,
+			     &mmc->mmc_rx_octetcount_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_OCTET_G,
+			     &mmc->mmc_rx_octetcount_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_BROAD_PKT_G,
+			     &mmc->mmc_rx_broadcastframe_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_MULTI_PKT_G,
+			     &mmc->mmc_rx_multicastframe_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_CRC_ERR,
+			     &mmc->mmc_rx_crc_error);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_CRC_ERR,
+			     &mmc->mmc_rx_crc_error);
+	mmc->mmc_rx_run_error += readl(mmcaddr + MMC_XGMAC_RX_RUNT_ERR);
+	mmc->mmc_rx_jabber_error += readl(mmcaddr + MMC_XGMAC_RX_JABBER_ERR);
+	mmc->mmc_rx_undersize_g += readl(mmcaddr + MMC_XGMAC_RX_UNDER);
+	mmc->mmc_rx_oversize_g += readl(mmcaddr + MMC_XGMAC_RX_OVER);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_64OCT_GB,
+			     &mmc->mmc_rx_64_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_65OCT_GB,
+			     &mmc->mmc_rx_65_to_127_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_128OCT_GB,
+			     &mmc->mmc_rx_128_to_255_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_256OCT_GB,
+			     &mmc->mmc_rx_256_to_511_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_512OCT_GB,
+			     &mmc->mmc_rx_512_to_1023_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_1024OCT_GB,
+			     &mmc->mmc_rx_1024_to_max_octets_gb);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_UNI_PKT_G,
+			     &mmc->mmc_rx_unicast_g);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_LENGTH_ERR,
+			     &mmc->mmc_rx_length_error);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_RANGE,
+			     &mmc->mmc_rx_autofrangetype);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_PAUSE,
+			     &mmc->mmc_rx_pause_frames);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_FIFOOVER_PKT,
+			     &mmc->mmc_rx_fifo_overflow);
+	dwxgmac_read_mmc_reg(mmcaddr, MMC_XGMAC_RX_VLAN_PKT_GB,
+			     &mmc->mmc_rx_vlan_frames_gb);
+	mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_XGMAC_RX_WATCHDOG_ERR);
+
+	mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_XGMAC_TX_FPE_FRAG);
+	mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_XGMAC_TX_HOLD_REQ);
+	mmc->mmc_rx_packet_assembly_err_cntr +=
+		readl(mmcaddr + MMC_XGMAC_RX_PKT_ASSEMBLY_ERR);
+	mmc->mmc_rx_packet_smd_err_cntr +=
+		readl(mmcaddr + MMC_XGMAC_RX_PKT_SMD_ERR);
+	mmc->mmc_rx_packet_assembly_ok_cntr +=
+		readl(mmcaddr + MMC_XGMAC_RX_PKT_ASSEMBLY_OK);
+	mmc->mmc_rx_fpe_fragment_cntr +=
+		readl(mmcaddr + MMC_XGMAC_RX_FPE_FRAG);
+}
+
+const struct stmmac_mmc_ops dwxgmac_mmc_ops = {
+	.ctrl = dwxgmac_mmc_ctrl,
+	.intr_all_mask = dwxgmac_mmc_intr_all_mask,
+	.read = dwxgmac_mmc_read,
+};

diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index de65bb2..f083360 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This contains the functions to handle the normal descriptors.
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -91,8 +81,6 @@
 		return dma_own;
 
 	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
-		pr_warn("%s: Oversized frame spanned multiple buffers\n",
-			__func__);
 		stats->rx_length_errors++;
 		return discard_frame;
 	}
@@ -135,15 +123,19 @@
 }
 
 static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
-			       int end)
+			       int end, int bfsize)
 {
+	int bfsize1;
+
 	p->des0 |= cpu_to_le32(RDES0_OWN);
-	p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK);
+
+	bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
+	p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ndesc_rx_set_on_chain(p, end);
 	else
-		ndesc_rx_set_on_ring(p, end);
+		ndesc_rx_set_on_ring(p, end, bfsize);
 
 	if (disable_rx_ic)
 		p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);

diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index bc83ced..14bd5e7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   Specialised functions for managing Ring mode
 
@@ -7,17 +8,6 @@
   descriptors in case of the DMA is configured to work in chained or
   in ring mode.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -79,7 +69,8 @@
 
 		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
 		stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
-				STMMAC_RING_MODE, 1, true, skb->len);
+				STMMAC_RING_MODE, 1, !skb_is_nonlinear(skb),
+				skb->len);
 	} else {
 		des2 = dma_map_single(priv->device, skb->data,
 				      nopaged_len, DMA_TO_DEVICE);
@@ -91,7 +82,8 @@
 		tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
 		stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum,
-				STMMAC_RING_MODE, 0, true, skb->len);
+				STMMAC_RING_MODE, 0, !skb_is_nonlinear(skb),
+				skb->len);
 	}
 
 	tx_q->cur_tx = entry;
@@ -111,10 +103,11 @@
 
 static void refill_desc3(void *priv_ptr, struct dma_desc *p)
 {
-	struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
+	struct stmmac_rx_queue *rx_q = priv_ptr;
+	struct stmmac_priv *priv = rx_q->priv_data;
 
 	/* Fill DES3 in case of RING mode */
-	if (priv->dma_buf_sz >= BUF_SIZE_8KiB)
+	if (priv->dma_buf_sz == BUF_SIZE_16KiB)
 		p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 63e1064..d993fc7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h

@@ -1,17 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -23,12 +13,15 @@
 #define DRV_MODULE_VERSION	"Jan_2016"
 
 #include <linux/clk.h>
+#include <linux/if_vlan.h>
 #include <linux/stmmac.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/pci.h>
 #include "common.h"
 #include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
 #include <linux/reset.h>
+#include <net/page_pool.h>
 
 struct stmmac_resources {
 	void __iomem *addr;
@@ -63,26 +56,39 @@
 	u32 mss;
 };
 
+struct stmmac_rx_buffer {
+	struct page *page;
+	struct page *sec_page;
+	dma_addr_t addr;
+	dma_addr_t sec_addr;
+};
+
 struct stmmac_rx_queue {
+	u32 rx_count_frames;
 	u32 queue_index;
+	struct page_pool *page_pool;
+	struct stmmac_rx_buffer *buf_pool;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_erx;
 	struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
-	struct sk_buff **rx_skbuff;
-	dma_addr_t *rx_skbuff_dma;
 	unsigned int cur_rx;
 	unsigned int dirty_rx;
 	u32 rx_zeroc_thresh;
 	dma_addr_t dma_rx_phy;
 	u32 rx_tail_addr;
+	unsigned int state_saved;
+	struct {
+		struct sk_buff *skb;
+		unsigned int len;
+		unsigned int error;
+	} state;
 };
 
 struct stmmac_channel {
-	struct napi_struct napi ____cacheline_aligned_in_smp;
+	struct napi_struct rx_napi ____cacheline_aligned_in_smp;
+	struct napi_struct tx_napi ____cacheline_aligned_in_smp;
 	struct stmmac_priv *priv_data;
 	u32 index;
-	int has_rx;
-	int has_tx;
 };
 
 struct stmmac_tc_entry {
@@ -116,15 +122,34 @@
 	struct timespec64 period;
 };
 
+struct stmmac_rss {
+	int enable;
+	u8 key[STMMAC_RSS_HASH_KEY_SIZE];
+	u32 table[STMMAC_RSS_MAX_TABLE_SIZE];
+};
+
+#define STMMAC_FLOW_ACTION_DROP		BIT(0)
+struct stmmac_flow_entry {
+	unsigned long cookie;
+	unsigned long action;
+	u8 ip_proto;
+	int in_use;
+	int idx;
+	int is_l4;
+};
+
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
+	u32 rx_coal_frames;
 
 	int tx_coalesce;
 	int hwts_tx_en;
 	bool tx_path_in_lpi_mode;
 	bool tso;
+	int sph;
+	u32 sarc_type;
 
 	unsigned int dma_buf_sz;
 	unsigned int rx_copybreak;
@@ -147,14 +172,15 @@
 	/* Generic channel for NAPI */
 	struct stmmac_channel channel[STMMAC_CH_MAX];
 
-	bool oldlink;
 	int speed;
-	int oldduplex;
 	unsigned int flow_ctrl;
 	unsigned int pause;
 	struct mii_bus *mii;
 	int mii_irq[PHY_MAX_ADDR];
 
+	struct phylink_config phylink_config;
+	struct phylink *phylink;
+
 	struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp;
 	struct stmmac_safety_stats sstats;
 	struct plat_stmmacenet_data *plat;
@@ -174,6 +200,7 @@
 	unsigned int mode;
 	unsigned int chain_mode;
 	int extend_desc;
+	struct hwtstamp_config tstamp_config;
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_clock_ops;
 	unsigned int default_addend;
@@ -185,11 +212,10 @@
 	spinlock_t ptp_lock;
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbgfs_dir;
-	struct dentry *dbgfs_rings_status;
-	struct dentry *dbgfs_dma_cap;
 #endif
 
 	unsigned long state;
@@ -200,9 +226,14 @@
 	unsigned int tc_entries_max;
 	unsigned int tc_off_max;
 	struct stmmac_tc_entry *tc_entries;
+	unsigned int flow_entries_max;
+	struct stmmac_flow_entry *flow_entries;
 
 	/* Pulse Per Second output */
 	struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
+
+	/* Receive Side Scaling */
+	struct stmmac_rss rss;
 };
 
 enum stmmac_state {
@@ -228,4 +259,26 @@
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
 
+#if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf);
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data);
+int stmmac_selftest_get_count(struct stmmac_priv *priv);
+#else
+static inline void stmmac_selftest_run(struct net_device *dev,
+				       struct ethtool_test *etest, u64 *buf)
+{
+	/* Not enabled */
+}
+static inline void stmmac_selftest_get_strings(struct stmmac_priv *priv,
+					       u8 *data)
+{
+	/* Not enabled */
+}
+static inline int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_STMMAC_SELFTESTS */
+
 #endif /* __STMMAC_H__ */

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 5710864..1a76883 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   STMMAC Ethtool support
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -22,16 +12,18 @@
 #include <linux/ethtool.h>
 #include <linux/interrupt.h>
 #include <linux/mii.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/net_tstamp.h>
 #include <asm/io.h>
 
 #include "stmmac.h"
 #include "dwmac_dma.h"
+#include "dwxgmac2.h"
 
 #define REG_SPACE_SIZE	0x1060
 #define MAC100_ETHTOOL_NAME	"st_mac100"
 #define GMAC_ETHTOOL_NAME	"st_gmac"
+#define XGMAC_ETHTOOL_NAME	"st_xgmac"
 
 #define ETHTOOL_DMA_OFFSET	55
 
@@ -75,6 +67,7 @@
 	STMMAC_STAT(rx_missed_cntr),
 	STMMAC_STAT(rx_overflow_cntr),
 	STMMAC_STAT(rx_vlan),
+	STMMAC_STAT(rx_split_hdr_pkt_n),
 	/* Tx/Rx IRQ error info */
 	STMMAC_STAT(tx_undeflow_irq),
 	STMMAC_STAT(tx_process_stopped_irq),
@@ -253,6 +246,12 @@
 	STMMAC_MMC_STAT(mmc_rx_tcp_err_octets),
 	STMMAC_MMC_STAT(mmc_rx_icmp_gd_octets),
 	STMMAC_MMC_STAT(mmc_rx_icmp_err_octets),
+	STMMAC_MMC_STAT(mmc_tx_fpe_fragment_cntr),
+	STMMAC_MMC_STAT(mmc_tx_hold_req_cntr),
+	STMMAC_MMC_STAT(mmc_rx_packet_assembly_err_cntr),
+	STMMAC_MMC_STAT(mmc_rx_packet_smd_err_cntr),
+	STMMAC_MMC_STAT(mmc_rx_packet_assembly_ok_cntr),
+	STMMAC_MMC_STAT(mmc_rx_fpe_fragment_cntr),
 };
 #define STMMAC_MMC_STATS_LEN ARRAY_SIZE(stmmac_mmc)
 
@@ -263,6 +262,8 @@
 
 	if (priv->plat->has_gmac || priv->plat->has_gmac4)
 		strlcpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver));
+	else if (priv->plat->has_xgmac)
+		strlcpy(info->driver, XGMAC_ETHTOOL_NAME, sizeof(info->driver));
 	else
 		strlcpy(info->driver, MAC100_ETHTOOL_NAME,
 			sizeof(info->driver));
@@ -274,7 +275,6 @@
 					     struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = dev->phydev;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
@@ -353,18 +353,7 @@
 		return 0;
 	}
 
-	if (phy == NULL) {
-		pr_err("%s: %s: PHY is not registered\n",
-		       __func__, dev->name);
-		return -ENODEV;
-	}
-	if (!netif_running(dev)) {
-		pr_err("%s: interface is disabled: we cannot track "
-		"link speed / duplex setting\n", dev->name);
-		return -EBUSY;
-	}
-	phy_ethtool_ksettings_get(phy, cmd);
-	return 0;
+	return phylink_ethtool_ksettings_get(priv->phylink, cmd);
 }
 
 static int
@@ -372,8 +361,6 @@
 				  const struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = dev->phydev;
-	int rc;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
@@ -397,9 +384,7 @@
 		return 0;
 	}
 
-	rc = phy_ethtool_ksettings_set(phy, cmd);
-
-	return rc;
+	return phylink_ethtool_ksettings_set(priv->phylink, cmd);
 }
 
 static u32 stmmac_ethtool_getmsglevel(struct net_device *dev)
@@ -424,23 +409,35 @@
 
 static int stmmac_ethtool_get_regs_len(struct net_device *dev)
 {
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (priv->plat->has_xgmac)
+		return XGMAC_REGSIZE * 4;
 	return REG_SPACE_SIZE;
 }
 
 static void stmmac_ethtool_gregs(struct net_device *dev,
 			  struct ethtool_regs *regs, void *space)
 {
-	u32 *reg_space = (u32 *) space;
-
 	struct stmmac_priv *priv = netdev_priv(dev);
-
-	memset(reg_space, 0x0, REG_SPACE_SIZE);
+	u32 *reg_space = (u32 *) space;
 
 	stmmac_dump_mac_regs(priv, priv->hw, reg_space);
 	stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
-	/* Copy DMA registers to where ethtool expects them */
-	memcpy(&reg_space[ETHTOOL_DMA_OFFSET], &reg_space[DMA_BUS_MODE / 4],
-	       NUM_DWMAC1000_DMA_REGS * 4);
+
+	if (!priv->plat->has_xgmac) {
+		/* Copy DMA registers to where ethtool expects them */
+		memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
+		       &reg_space[DMA_BUS_MODE / 4],
+		       NUM_DWMAC1000_DMA_REGS * 4);
+	}
+}
+
+static int stmmac_nway_reset(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return phylink_ethtool_nway_reset(priv->phylink);
 }
 
 static void
@@ -450,26 +447,13 @@
 	struct stmmac_priv *priv = netdev_priv(netdev);
 	struct rgmii_adv adv_lp;
 
-	pause->rx_pause = 0;
-	pause->tx_pause = 0;
-
 	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
 		if (!adv_lp.pause)
 			return;
 	} else {
-		if (!(netdev->phydev->supported & SUPPORTED_Pause) ||
-		    !(netdev->phydev->supported & SUPPORTED_Asym_Pause))
-			return;
+		phylink_ethtool_get_pauseparam(priv->phylink, pause);
 	}
-
-	pause->autoneg = netdev->phydev->autoneg;
-
-	if (priv->flow_ctrl & FLOW_RX)
-		pause->rx_pause = 1;
-	if (priv->flow_ctrl & FLOW_TX)
-		pause->tx_pause = 1;
-
 }
 
 static int
@@ -477,37 +461,16 @@
 		      struct ethtool_pauseparam *pause)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	struct phy_device *phy = netdev->phydev;
-	int new_pause = FLOW_OFF;
 	struct rgmii_adv adv_lp;
 
 	if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
 		pause->autoneg = 1;
 		if (!adv_lp.pause)
 			return -EOPNOTSUPP;
+		return 0;
 	} else {
-		if (!(phy->supported & SUPPORTED_Pause) ||
-		    !(phy->supported & SUPPORTED_Asym_Pause))
-			return -EOPNOTSUPP;
+		return phylink_ethtool_set_pauseparam(priv->phylink, pause);
 	}
-
-	if (pause->rx_pause)
-		new_pause |= FLOW_RX;
-	if (pause->tx_pause)
-		new_pause |= FLOW_TX;
-
-	priv->flow_ctrl = new_pause;
-	phy->autoneg = pause->autoneg;
-
-	if (phy->autoneg) {
-		if (netif_running(netdev))
-			return phy_start_aneg(phy);
-	}
-
-	stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl,
-			priv->pause, tx_cnt);
-	return 0;
 }
 
 static void stmmac_get_ethtool_stats(struct net_device *dev,
@@ -533,7 +496,7 @@
 	if (ret) {
 		/* If supported, for new GMAC chips expose the MMC counters */
 		if (priv->dma_cap.rmon) {
-			dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
+			stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
 
 			for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) {
 				char *p;
@@ -545,7 +508,7 @@
 			}
 		}
 		if (priv->eee_enabled) {
-			int val = phy_get_eee_err(dev->phydev);
+			int val = phylink_get_eee_err(priv->phylink);
 			if (val)
 				priv->xstats.phy_eee_wakeup_error_n = val;
 		}
@@ -585,6 +548,8 @@
 		}
 
 		return len;
+	case ETH_SS_TEST:
+		return stmmac_selftest_get_count(priv);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -621,6 +586,9 @@
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_TEST:
+		stmmac_selftest_get_strings(priv, p);
+		break;
 	default:
 		WARN_ON(1);
 		break;
@@ -685,40 +653,45 @@
 	edata->eee_active = priv->eee_active;
 	edata->tx_lpi_timer = priv->tx_lpi_timer;
 
-	return phy_ethtool_get_eee(dev->phydev, edata);
+	return phylink_ethtool_get_eee(priv->phylink, edata);
 }
 
 static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 				     struct ethtool_eee *edata)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	int ret;
 
-	priv->eee_enabled = edata->eee_enabled;
-
-	if (!priv->eee_enabled)
+	if (!edata->eee_enabled) {
 		stmmac_disable_eee_mode(priv);
-	else {
+	} else {
 		/* We are asking for enabling the EEE but it is safe
 		 * to verify all by invoking the eee_init function.
 		 * In case of failure it will return an error.
 		 */
-		priv->eee_enabled = stmmac_eee_init(priv);
-		if (!priv->eee_enabled)
+		edata->eee_enabled = stmmac_eee_init(priv);
+		if (!edata->eee_enabled)
 			return -EOPNOTSUPP;
-
-		/* Do not change tx_lpi_timer in case of failure */
-		priv->tx_lpi_timer = edata->tx_lpi_timer;
 	}
 
-	return phy_ethtool_set_eee(dev->phydev, edata);
+	ret = phylink_ethtool_set_eee(priv->phylink, edata);
+	if (ret)
+		return ret;
+
+	priv->eee_enabled = edata->eee_enabled;
+	priv->tx_lpi_timer = edata->tx_lpi_timer;
+	return 0;
 }
 
 static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)
 {
 	unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
 
-	if (!clk)
-		return 0;
+	if (!clk) {
+		clk = priv->plat->clk_ref_rate;
+		if (!clk)
+			return 0;
+	}
 
 	return (usec * (clk / 1000000)) / 256;
 }
@@ -727,8 +700,11 @@
 {
 	unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
 
-	if (!clk)
-		return 0;
+	if (!clk) {
+		clk = priv->plat->clk_ref_rate;
+		if (!clk)
+			return 0;
+	}
 
 	return (riwt * 256) / (clk / 1000000);
 }
@@ -741,8 +717,10 @@
 	ec->tx_coalesce_usecs = priv->tx_coal_timer;
 	ec->tx_max_coalesced_frames = priv->tx_coal_frames;
 
-	if (priv->use_riwt)
+	if (priv->use_riwt) {
+		ec->rx_max_coalesced_frames = priv->rx_coal_frames;
 		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv);
+	}
 
 	return 0;
 }
@@ -755,7 +733,7 @@
 	unsigned int rx_riwt;
 
 	/* Check not supported parameters  */
-	if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) ||
+	if ((ec->rx_coalesce_usecs_irq) ||
 	    (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) ||
 	    (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) ||
 	    (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) ||
@@ -768,8 +746,15 @@
 	    (ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval))
 		return -EOPNOTSUPP;
 
-	if (ec->rx_coalesce_usecs == 0)
-		return -EINVAL;
+	if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) {
+		rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
+
+		if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
+			return -EINVAL;
+
+		priv->rx_riwt = rx_riwt;
+		stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+	}
 
 	if ((ec->tx_coalesce_usecs == 0) &&
 	    (ec->tx_max_coalesced_frames == 0))
@@ -779,22 +764,83 @@
 	    (ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES))
 		return -EINVAL;
 
-	rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
-
-	if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
-		return -EINVAL;
-	else if (!priv->use_riwt)
-		return -EOPNOTSUPP;
-
 	/* Only copy relevant parameters, ignore all others. */
 	priv->tx_coal_frames = ec->tx_max_coalesced_frames;
 	priv->tx_coal_timer = ec->tx_coalesce_usecs;
-	priv->rx_riwt = rx_riwt;
-	stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+	priv->rx_coal_frames = ec->rx_max_coalesced_frames;
+	return 0;
+}
+
+static int stmmac_get_rxnfc(struct net_device *dev,
+			    struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_GRXRINGS:
+		rxnfc->data = priv->plat->rx_queues_to_use;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
 	return 0;
 }
 
+static u32 stmmac_get_rxfh_key_size(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return sizeof(priv->rss.key);
+}
+
+static u32 stmmac_get_rxfh_indir_size(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return ARRAY_SIZE(priv->rss.table);
+}
+
+static int stmmac_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+			   u8 *hfunc)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+			indir[i] = priv->rss.table[i];
+	}
+
+	if (key)
+		memcpy(key, priv->rss.key, sizeof(priv->rss.key));
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	return 0;
+}
+
+static int stmmac_set_rxfh(struct net_device *dev, const u32 *indir,
+			   const u8 *key, const u8 hfunc)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+			priv->rss.table[i] = indir[i];
+	}
+
+	if (key)
+		memcpy(priv->rss.key, key, sizeof(priv->rss.key));
+
+	return stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				    priv->plat->rx_queues_to_use);
+}
+
 static int stmmac_get_ts_info(struct net_device *dev,
 			      struct ethtool_ts_info *info)
 {
@@ -875,9 +921,10 @@
 	.get_regs = stmmac_ethtool_gregs,
 	.get_regs_len = stmmac_ethtool_get_regs_len,
 	.get_link = ethtool_op_get_link,
-	.nway_reset = phy_ethtool_nway_reset,
+	.nway_reset = stmmac_nway_reset,
 	.get_pauseparam = stmmac_get_pauseparam,
 	.set_pauseparam = stmmac_set_pauseparam,
+	.self_test = stmmac_selftest_run,
 	.get_ethtool_stats = stmmac_get_ethtool_stats,
 	.get_strings = stmmac_get_strings,
 	.get_wol = stmmac_get_wol,
@@ -885,6 +932,11 @@
 	.get_eee = stmmac_ethtool_op_get_eee,
 	.set_eee = stmmac_ethtool_op_set_eee,
 	.get_sset_count	= stmmac_get_sset_count,
+	.get_rxnfc = stmmac_get_rxnfc,
+	.get_rxfh_key_size = stmmac_get_rxfh_key_size,
+	.get_rxfh_indir_size = stmmac_get_rxfh_indir_size,
+	.get_rxfh = stmmac_get_rxfh,
+	.set_rxfh = stmmac_set_rxfh,
 	.get_ts_info = stmmac_get_ts_info,
 	.get_coalesce = stmmac_get_coalesce,
 	.set_coalesce = stmmac_set_coalesce,

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 8d9cc21..0201596 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   Copyright (C) 2013  Vayavya Labs Pvt Ltd
 
   This implements all the API for managing HW timestamp & PTP.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Rayagond Kokatanur <rayagond@vayavyalabs.com>
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
@@ -122,7 +112,7 @@
 		 * programmed with (2^32 – <new_sec_value>)
 		 */
 		if (gmac4)
-			sec = (100000000ULL - sec);
+			sec = -sec;
 
 		value = readl(ioaddr + PTP_TCR);
 		if (value & PTP_TCR_TSCTRLSSR)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 99ea5c4..f826365 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -1,20 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This is the driver for the ST MAC 10/100/1000 on-chip Ethernet controllers.
   ST Ethernet IPs are built around a Synopsys IP Core.
 
 	Copyright(C) 2007-2011 STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 
@@ -45,6 +35,7 @@
 #include <linux/seq_file.h>
 #endif /* CONFIG_DEBUG_FS */
 #include <linux/net_tstamp.h>
+#include <linux/phylink.h>
 #include <net/pkt_cls.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
@@ -74,7 +65,7 @@
 #define STMMAC_TX_THRESH	(DMA_TX_SIZE / 4)
 #define STMMAC_RX_THRESH	(DMA_RX_SIZE / 4)
 
-static int flow_ctrl = FLOW_OFF;
+static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
 MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
 
@@ -114,7 +105,7 @@
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
 
 #ifdef CONFIG_DEBUG_FS
-static int stmmac_init_fs(struct net_device *dev);
+static void stmmac_init_fs(struct net_device *dev);
 static void stmmac_exit_fs(struct net_device *dev);
 #endif
 
@@ -155,7 +146,10 @@
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_disable(&ch->napi);
+		if (queue < rx_queues_cnt)
+			napi_disable(&ch->rx_napi);
+		if (queue < tx_queues_cnt)
+			napi_disable(&ch->tx_napi);
 	}
 }
 
@@ -173,7 +167,10 @@
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_enable(&ch->napi);
+		if (queue < rx_queues_cnt)
+			napi_enable(&ch->rx_napi);
+		if (queue < tx_queues_cnt)
+			napi_enable(&ch->tx_napi);
 	}
 }
 
@@ -322,21 +319,6 @@
 }
 
 /**
- * stmmac_hw_fix_mac_speed - callback for speed selection
- * @priv: driver private structure
- * Description: on some platforms (e.g. ST), some HW system configuration
- * registers have to be set according to the link speed negotiated.
- */
-static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
-{
-	struct net_device *ndev = priv->dev;
-	struct phy_device *phydev = ndev->phydev;
-
-	if (likely(priv->plat->fix_mac_speed))
-		priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed);
-}
-
-/**
  * stmmac_enable_eee_mode - check and enter in LPI mode
  * @priv: driver private structure
  * Description: this function is to verify and enter in LPI mode in case of
@@ -399,14 +381,7 @@
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-	struct net_device *ndev = priv->dev;
-	int interface = priv->plat->interface;
-	bool ret = false;
-
-	if ((interface != PHY_INTERFACE_MODE_MII) &&
-	    (interface != PHY_INTERFACE_MODE_GMII) &&
-	    !phy_interface_mode_is_rgmii(interface))
-		goto out;
+	int tx_lpi_timer = priv->tx_lpi_timer;
 
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
@@ -414,52 +389,35 @@
 	if ((priv->hw->pcs == STMMAC_PCS_RGMII) ||
 	    (priv->hw->pcs == STMMAC_PCS_TBI) ||
 	    (priv->hw->pcs == STMMAC_PCS_RTBI))
-		goto out;
+		return false;
 
-	/* MAC core supports the EEE feature. */
-	if (priv->dma_cap.eee) {
-		int tx_lpi_timer = priv->tx_lpi_timer;
+	/* Check if MAC core supports the EEE feature. */
+	if (!priv->dma_cap.eee)
+		return false;
 
-		/* Check if the PHY supports EEE */
-		if (phy_init_eee(ndev->phydev, 1)) {
-			/* To manage at run-time if the EEE cannot be supported
-			 * anymore (for example because the lp caps have been
-			 * changed).
-			 * In that case the driver disable own timers.
-			 */
-			mutex_lock(&priv->lock);
-			if (priv->eee_active) {
-				netdev_dbg(priv->dev, "disable EEE\n");
-				del_timer_sync(&priv->eee_ctrl_timer);
-				stmmac_set_eee_timer(priv, priv->hw, 0,
-						tx_lpi_timer);
-			}
-			priv->eee_active = 0;
-			mutex_unlock(&priv->lock);
-			goto out;
+	mutex_lock(&priv->lock);
+
+	/* Check if it needs to be deactivated */
+	if (!priv->eee_active) {
+		if (priv->eee_enabled) {
+			netdev_dbg(priv->dev, "disable EEE\n");
+			del_timer_sync(&priv->eee_ctrl_timer);
+			stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer);
 		}
-		/* Activate the EEE and start timers */
-		mutex_lock(&priv->lock);
-		if (!priv->eee_active) {
-			priv->eee_active = 1;
-			timer_setup(&priv->eee_ctrl_timer,
-				    stmmac_eee_ctrl_timer, 0);
-			mod_timer(&priv->eee_ctrl_timer,
-				  STMMAC_LPI_T(eee_timer));
-
-			stmmac_set_eee_timer(priv, priv->hw,
-					STMMAC_DEFAULT_LIT_LS, tx_lpi_timer);
-		}
-		/* Set HW EEE according to the speed */
-		stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link);
-
-		ret = true;
 		mutex_unlock(&priv->lock);
-
-		netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
+		return false;
 	}
-out:
-	return ret;
+
+	if (priv->eee_active && !priv->eee_enabled) {
+		timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
+		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+		stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
+				     tx_lpi_timer);
+	}
+
+	mutex_unlock(&priv->lock);
+	netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
+	return true;
 }
 
 /* stmmac_get_tx_hwtstamp - get HW TX timestamps
@@ -474,7 +432,8 @@
 				   struct dma_desc *p, struct sk_buff *skb)
 {
 	struct skb_shared_hwtstamps shhwtstamp;
-	u64 ns;
+	bool found = false;
+	u64 ns = 0;
 
 	if (!priv->hwts_tx_en)
 		return;
@@ -485,9 +444,13 @@
 
 	/* check tx tstamp status */
 	if (stmmac_get_tx_timestamp_status(priv, p)) {
-		/* get the valid tstamp */
 		stmmac_get_timestamp(priv, p, priv->adv_ts, &ns);
+		found = true;
+	} else if (!stmmac_get_mac_tx_timestamp(priv, priv->hw, &ns)) {
+		found = true;
+	}
 
+	if (found) {
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
@@ -495,8 +458,6 @@
 		/* pass tstamp to stack */
 		skb_tstamp_tx(skb, &shhwtstamp);
 	}
-
-	return;
 }
 
 /* stmmac_get_rx_hwtstamp - get HW RX timestamps
@@ -513,7 +474,7 @@
 {
 	struct skb_shared_hwtstamps *shhwtstamp = NULL;
 	struct dma_desc *desc = p;
-	u64 ns;
+	u64 ns = 0;
 
 	if (!priv->hwts_rx_en)
 		return;
@@ -534,7 +495,7 @@
 }
 
 /**
- *  stmmac_hwtstamp_ioctl - control hardware timestamping.
+ *  stmmac_hwtstamp_set - control hardware timestamping.
  *  @dev: device pointer.
  *  @ifr: An IOCTL specific structure, that can contain a pointer to
  *  a proprietary structure used to pass information to the driver.
@@ -544,7 +505,7 @@
  *  Return Value:
  *  0 on success and an appropriate -ve integer on failure.
  */
-static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct hwtstamp_config config;
@@ -558,8 +519,8 @@
 	u32 snap_type_sel = 0;
 	u32 ts_master_en = 0;
 	u32 ts_event_en = 0;
+	u32 sec_inc = 0;
 	u32 value = 0;
-	u32 sec_inc;
 	bool xmac;
 
 	xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
@@ -573,7 +534,7 @@
 	}
 
 	if (copy_from_user(&config, ifr->ifr_data,
-			   sizeof(struct hwtstamp_config)))
+			   sizeof(config)))
 		return -EFAULT;
 
 	netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n",
@@ -597,12 +558,13 @@
 		case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 			/* PTP v1, UDP, any kind of event packet */
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
-			/* take time stamp for all event messages */
-			if (xmac)
-				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
-			else
-				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
-
+			/* 'xmac' hardware can support Sync, Pdelay_Req and
+			 * Pdelay_resp by setting bit14 and bits17/16 to 01
+			 * This leaves Delay_Req timestamps out.
+			 * Enable all events *and* general purpose message
+			 * timestamping
+			 */
+			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
 			break;
@@ -633,10 +595,7 @@
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
 			/* take time stamp for all event messages */
-			if (xmac)
-				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
-			else
-				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -669,12 +628,8 @@
 			/* PTP v2/802.AS1 any layer, any kind of event packet */
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
-			/* take time stamp for all event messages */
-			if (xmac)
-				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
-			else
-				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
-
+			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			ts_event_en = PTP_TCR_TSEVNTENA;
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
 			ptp_over_ethernet = PTP_TCR_TSIPENA;
@@ -765,8 +720,31 @@
 				(u32)now.tv_sec, now.tv_nsec);
 	}
 
+	memcpy(&priv->tstamp_config, &config, sizeof(config));
+
 	return copy_to_user(ifr->ifr_data, &config,
-			    sizeof(struct hwtstamp_config)) ? -EFAULT : 0;
+			    sizeof(config)) ? -EFAULT : 0;
+}
+
+/**
+ *  stmmac_hwtstamp_get - read hardware timestamping.
+ *  @dev: device pointer.
+ *  @ifr: An IOCTL specific structure, that can contain a pointer to
+ *  a proprietary structure used to pass information to the driver.
+ *  Description:
+ *  This function obtain the current hardware timestamping settings
+    as requested.
+ */
+static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	struct hwtstamp_config *config = &priv->tstamp_config;
+
+	if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, config,
+			    sizeof(*config)) ? -EFAULT : 0;
 }
 
 /**
@@ -826,97 +804,173 @@
 			priv->pause, tx_cnt);
 }
 
-/**
- * stmmac_adjust_link - adjusts the link parameters
- * @dev: net device structure
- * Description: this is the helper called by the physical abstraction layer
- * drivers to communicate the phy link status. According the speed and duplex
- * this driver can invoke registered glue-logic as well.
- * It also invoke the eee initialization because it could happen when switch
- * on different networks (that are eee capable).
- */
-static void stmmac_adjust_link(struct net_device *dev)
+static void stmmac_validate(struct phylink_config *config,
+			    unsigned long *supported,
+			    struct phylink_link_state *state)
 {
-	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	bool new_state = false;
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+	int tx_cnt = priv->plat->tx_queues_to_use;
+	int max_speed = priv->plat->max_speed;
 
-	if (!phydev)
-		return;
+	phylink_set(mac_supported, 10baseT_Half);
+	phylink_set(mac_supported, 10baseT_Full);
+	phylink_set(mac_supported, 100baseT_Half);
+	phylink_set(mac_supported, 100baseT_Full);
+	phylink_set(mac_supported, 1000baseT_Half);
+	phylink_set(mac_supported, 1000baseT_Full);
+	phylink_set(mac_supported, 1000baseKX_Full);
 
-	mutex_lock(&priv->lock);
+	phylink_set(mac_supported, Autoneg);
+	phylink_set(mac_supported, Pause);
+	phylink_set(mac_supported, Asym_Pause);
+	phylink_set_port_modes(mac_supported);
 
-	if (phydev->link) {
-		u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
-
-		/* Now we make sure that we can be in full duplex mode.
-		 * If not, we operate in half-duplex mode. */
-		if (phydev->duplex != priv->oldduplex) {
-			new_state = true;
-			if (!phydev->duplex)
-				ctrl &= ~priv->hw->link.duplex;
-			else
-				ctrl |= priv->hw->link.duplex;
-			priv->oldduplex = phydev->duplex;
+	/* Cut down 1G if asked to */
+	if ((max_speed > 0) && (max_speed < 1000)) {
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+	} else if (priv->plat->has_xgmac) {
+		if (!max_speed || (max_speed >= 2500)) {
+			phylink_set(mac_supported, 2500baseT_Full);
+			phylink_set(mac_supported, 2500baseX_Full);
 		}
-		/* Flow Control operation */
-		if (phydev->pause)
-			stmmac_mac_flow_ctrl(priv, phydev->duplex);
-
-		if (phydev->speed != priv->speed) {
-			new_state = true;
-			ctrl &= ~priv->hw->link.speed_mask;
-			switch (phydev->speed) {
-			case SPEED_1000:
-				ctrl |= priv->hw->link.speed1000;
-				break;
-			case SPEED_100:
-				ctrl |= priv->hw->link.speed100;
-				break;
-			case SPEED_10:
-				ctrl |= priv->hw->link.speed10;
-				break;
-			default:
-				netif_warn(priv, link, priv->dev,
-					   "broken speed: %d\n", phydev->speed);
-				phydev->speed = SPEED_UNKNOWN;
-				break;
-			}
-			if (phydev->speed != SPEED_UNKNOWN)
-				stmmac_hw_fix_mac_speed(priv);
-			priv->speed = phydev->speed;
+		if (!max_speed || (max_speed >= 5000)) {
+			phylink_set(mac_supported, 5000baseT_Full);
 		}
-
-		writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
-
-		if (!priv->oldlink) {
-			new_state = true;
-			priv->oldlink = true;
+		if (!max_speed || (max_speed >= 10000)) {
+			phylink_set(mac_supported, 10000baseSR_Full);
+			phylink_set(mac_supported, 10000baseLR_Full);
+			phylink_set(mac_supported, 10000baseER_Full);
+			phylink_set(mac_supported, 10000baseLRM_Full);
+			phylink_set(mac_supported, 10000baseT_Full);
+			phylink_set(mac_supported, 10000baseKX4_Full);
+			phylink_set(mac_supported, 10000baseKR_Full);
 		}
-	} else if (priv->oldlink) {
-		new_state = true;
-		priv->oldlink = false;
-		priv->speed = SPEED_UNKNOWN;
-		priv->oldduplex = DUPLEX_UNKNOWN;
 	}
 
-	if (new_state && netif_msg_link(priv))
-		phy_print_status(phydev);
+	/* Half-Duplex can only work with single queue */
+	if (tx_cnt > 1) {
+		phylink_set(mask, 10baseT_Half);
+		phylink_set(mask, 100baseT_Half);
+		phylink_set(mask, 1000baseT_Half);
+	}
 
-	mutex_unlock(&priv->lock);
-
-	if (phydev->is_pseudo_fixed_link)
-		/* Stop PHY layer to call the hook to adjust the link in case
-		 * of a switch is attached to the stmmac driver.
-		 */
-		phydev->irq = PHY_IGNORE_INTERRUPT;
-	else
-		/* At this stage, init the EEE if supported.
-		 * Never called in case of fixed_link.
-		 */
-		priv->eee_enabled = stmmac_eee_init(priv);
+	bitmap_and(supported, supported, mac_supported,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_andnot(supported, supported, mask,
+		      __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mac_supported,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_andnot(state->advertising, state->advertising, mask,
+		      __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static int stmmac_mac_link_state(struct phylink_config *config,
+				 struct phylink_link_state *state)
+{
+	return -EOPNOTSUPP;
+}
+
+static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+	u32 ctrl;
+
+	ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+	ctrl &= ~priv->hw->link.speed_mask;
+
+	if (state->interface == PHY_INTERFACE_MODE_USXGMII) {
+		switch (state->speed) {
+		case SPEED_10000:
+			ctrl |= priv->hw->link.xgmii.speed10000;
+			break;
+		case SPEED_5000:
+			ctrl |= priv->hw->link.xgmii.speed5000;
+			break;
+		case SPEED_2500:
+			ctrl |= priv->hw->link.xgmii.speed2500;
+			break;
+		default:
+			return;
+		}
+	} else {
+		switch (state->speed) {
+		case SPEED_2500:
+			ctrl |= priv->hw->link.speed2500;
+			break;
+		case SPEED_1000:
+			ctrl |= priv->hw->link.speed1000;
+			break;
+		case SPEED_100:
+			ctrl |= priv->hw->link.speed100;
+			break;
+		case SPEED_10:
+			ctrl |= priv->hw->link.speed10;
+			break;
+		default:
+			return;
+		}
+	}
+
+	priv->speed = state->speed;
+
+	if (priv->plat->fix_mac_speed)
+		priv->plat->fix_mac_speed(priv->plat->bsp_priv, state->speed);
+
+	if (!state->duplex)
+		ctrl &= ~priv->hw->link.duplex;
+	else
+		ctrl |= priv->hw->link.duplex;
+
+	/* Flow Control operation */
+	if (state->pause)
+		stmmac_mac_flow_ctrl(priv, state->duplex);
+
+	writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+}
+
+static void stmmac_mac_an_restart(struct phylink_config *config)
+{
+	/* Not Supported */
+}
+
+static void stmmac_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+
+	stmmac_mac_set(priv, priv->ioaddr, false);
+	priv->eee_active = false;
+	stmmac_eee_init(priv);
+	stmmac_set_eee_pls(priv, priv->hw, false);
+}
+
+static void stmmac_mac_link_up(struct phylink_config *config,
+			       unsigned int mode, phy_interface_t interface,
+			       struct phy_device *phy)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+
+	stmmac_mac_set(priv, priv->ioaddr, true);
+	if (phy && priv->dma_cap.eee) {
+		priv->eee_active = phy_init_eee(phy, 1) >= 0;
+		priv->eee_enabled = stmmac_eee_init(priv);
+		stmmac_set_eee_pls(priv, priv->hw, true);
+	}
+}
+
+static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
+	.validate = stmmac_validate,
+	.mac_link_state = stmmac_mac_link_state,
+	.mac_config = stmmac_mac_config,
+	.mac_an_restart = stmmac_mac_an_restart,
+	.mac_link_down = stmmac_mac_link_down,
+	.mac_link_up = stmmac_mac_link_up,
+};
+
 /**
  * stmmac_check_pcs_mode - verify if RGMII/SGMII is supported
  * @priv: driver private structure
@@ -953,76 +1007,48 @@
 static int stmmac_init_phy(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_cnt = priv->plat->tx_queues_to_use;
-	struct phy_device *phydev;
-	char phy_id_fmt[MII_BUS_ID_SIZE + 3];
-	char bus_id[MII_BUS_ID_SIZE];
-	int interface = priv->plat->interface;
-	int max_speed = priv->plat->max_speed;
-	priv->oldlink = false;
-	priv->speed = SPEED_UNKNOWN;
-	priv->oldduplex = DUPLEX_UNKNOWN;
+	struct device_node *node;
+	int ret;
 
-	if (priv->plat->phy_node) {
-		phydev = of_phy_connect(dev, priv->plat->phy_node,
-					&stmmac_adjust_link, 0, interface);
-	} else {
-		snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
-			 priv->plat->bus_id);
+	node = priv->plat->phylink_node;
 
-		snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
-			 priv->plat->phy_addr);
-		netdev_dbg(priv->dev, "%s: trying to attach to %s\n", __func__,
-			   phy_id_fmt);
+	if (node)
+		ret = phylink_of_phy_connect(priv->phylink, node, 0);
 
-		phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link,
-				     interface);
-	}
+	/* Some DT bindings do not set-up the PHY handle. Let's try to
+	 * manually parse it
+	 */
+	if (!node || ret) {
+		int addr = priv->plat->phy_addr;
+		struct phy_device *phydev;
 
-	if (IS_ERR_OR_NULL(phydev)) {
-		netdev_err(priv->dev, "Could not attach to PHY\n");
-		if (!phydev)
+		phydev = mdiobus_get_phy(priv->mii, addr);
+		if (!phydev) {
+			netdev_err(priv->dev, "no phy at addr %d\n", addr);
 			return -ENODEV;
+		}
 
-		return PTR_ERR(phydev);
+		ret = phylink_connect_phy(priv->phylink, phydev);
 	}
 
-	/* Stop Advertising 1000BASE Capability if interface is not GMII */
-	if ((interface == PHY_INTERFACE_MODE_MII) ||
-	    (interface == PHY_INTERFACE_MODE_RMII) ||
-		(max_speed < 1000 && max_speed > 0))
-		phydev->advertising &= ~(SUPPORTED_1000baseT_Half |
-					 SUPPORTED_1000baseT_Full);
+	return ret;
+}
 
-	/*
-	 * Half-duplex mode not supported with multiqueue
-	 * half-duplex can only works with single queue
-	 */
-	if (tx_cnt > 1)
-		phydev->supported &= ~(SUPPORTED_1000baseT_Half |
-				       SUPPORTED_100baseT_Half |
-				       SUPPORTED_10baseT_Half);
+static int stmmac_phy_setup(struct stmmac_priv *priv)
+{
+	struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+	int mode = priv->plat->phy_interface;
+	struct phylink *phylink;
 
-	/*
-	 * Broken HW is sometimes missing the pull-up resistor on the
-	 * MDIO line, which results in reads to non-existent devices returning
-	 * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent
-	 * device as well.
-	 * Note: phydev->phy_id is the result of reading the UID PHY registers.
-	 */
-	if (!priv->plat->phy_node && phydev->phy_id == 0) {
-		phy_disconnect(phydev);
-		return -ENODEV;
-	}
+	priv->phylink_config.dev = &priv->dev->dev;
+	priv->phylink_config.type = PHYLINK_NETDEV;
 
-	/* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
-	 * subsequent PHY polling, make sure we force a link transition if
-	 * we have a UP/DOWN/UP transition
-	 */
-	if (phydev->is_pseudo_fixed_link)
-		phydev->irq = PHY_POLL;
+	phylink = phylink_create(&priv->phylink_config, fwnode,
+				 mode, &stmmac_phylink_mac_ops);
+	if (IS_ERR(phylink))
+		return PTR_ERR(phylink);
 
-	phy_attached_info(phydev);
+	priv->phylink = phylink;
 	return 0;
 }
 
@@ -1111,11 +1137,13 @@
 		if (priv->extend_desc)
 			stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1));
+					(i == DMA_RX_SIZE - 1),
+					priv->dma_buf_sz);
 		else
 			stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1));
+					(i == DMA_RX_SIZE - 1),
+					priv->dma_buf_sz);
 }
 
 /**
@@ -1175,26 +1203,25 @@
 				  int i, gfp_t flags, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-	struct sk_buff *skb;
+	struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-	skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
-	if (!skb) {
-		netdev_err(priv->dev,
-			   "%s: Rx init fails; skb is NULL\n", __func__);
+	buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+	if (!buf->page)
 		return -ENOMEM;
-	}
-	rx_q->rx_skbuff[i] = skb;
-	rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
-						priv->dma_buf_sz,
-						DMA_FROM_DEVICE);
-	if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) {
-		netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
-		dev_kfree_skb_any(skb);
-		return -EINVAL;
+
+	if (priv->sph) {
+		buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+		if (!buf->sec_page)
+			return -ENOMEM;
+
+		buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+	} else {
+		buf->sec_page = NULL;
 	}
 
-	stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[i]);
-
+	buf->addr = page_pool_get_dma_addr(buf->page);
+	stmmac_set_desc_addr(priv, p, buf->addr);
 	if (priv->dma_buf_sz == BUF_SIZE_16KiB)
 		stmmac_init_desc3(priv, p);
 
@@ -1210,13 +1237,15 @@
 static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-	if (rx_q->rx_skbuff[i]) {
-		dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i],
-				 priv->dma_buf_sz, DMA_FROM_DEVICE);
-		dev_kfree_skb_any(rx_q->rx_skbuff[i]);
-	}
-	rx_q->rx_skbuff[i] = NULL;
+	if (buf->page)
+		page_pool_put_page(rx_q->page_pool, buf->page, false);
+	buf->page = NULL;
+
+	if (buf->sec_page)
+		page_pool_put_page(rx_q->page_pool, buf->sec_page, false);
+	buf->sec_page = NULL;
 }
 
 /**
@@ -1287,6 +1316,8 @@
 			  "(%s) dma_rx_phy=0x%08x\n", __func__,
 			  (u32)rx_q->dma_rx_phy);
 
+		stmmac_clear_rx_descriptors(priv, queue);
+
 		for (i = 0; i < DMA_RX_SIZE; i++) {
 			struct dma_desc *p;
 
@@ -1299,17 +1330,11 @@
 						     queue);
 			if (ret)
 				goto err_init_rx_buffers;
-
-			netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
-				  rx_q->rx_skbuff[i], rx_q->rx_skbuff[i]->data,
-				  (unsigned int)rx_q->rx_skbuff_dma[i]);
 		}
 
 		rx_q->cur_rx = 0;
 		rx_q->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
 
-		stmmac_clear_rx_descriptors(priv, queue);
-
 		/* Setup the chained descriptor addresses */
 		if (priv->mode == STMMAC_CHAIN_MODE) {
 			if (priv->extend_desc)
@@ -1476,8 +1501,11 @@
 					  sizeof(struct dma_extended_desc),
 					  rx_q->dma_erx, rx_q->dma_rx_phy);
 
-		kfree(rx_q->rx_skbuff_dma);
-		kfree(rx_q->rx_skbuff);
+		kfree(rx_q->buf_pool);
+		if (rx_q->page_pool) {
+			page_pool_request_shutdown(rx_q->page_pool);
+			page_pool_destroy(rx_q->page_pool);
+		}
 	}
 }
 
@@ -1529,39 +1557,45 @@
 	/* RX queues buffers and DMA */
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+		struct page_pool_params pp_params = { 0 };
+		unsigned int num_pages;
 
 		rx_q->queue_index = queue;
 		rx_q->priv_data = priv;
 
-		rx_q->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE,
-						    sizeof(dma_addr_t),
-						    GFP_KERNEL);
-		if (!rx_q->rx_skbuff_dma)
-			goto err_dma;
+		pp_params.flags = PP_FLAG_DMA_MAP;
+		pp_params.pool_size = DMA_RX_SIZE;
+		num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+		pp_params.order = ilog2(num_pages);
+		pp_params.nid = dev_to_node(priv->device);
+		pp_params.dev = priv->device;
+		pp_params.dma_dir = DMA_FROM_DEVICE;
 
-		rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
-						sizeof(struct sk_buff *),
-						GFP_KERNEL);
-		if (!rx_q->rx_skbuff)
+		rx_q->page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(rx_q->page_pool)) {
+			ret = PTR_ERR(rx_q->page_pool);
+			rx_q->page_pool = NULL;
+			goto err_dma;
+		}
+
+		rx_q->buf_pool = kcalloc(DMA_RX_SIZE, sizeof(*rx_q->buf_pool),
+					 GFP_KERNEL);
+		if (!rx_q->buf_pool)
 			goto err_dma;
 
 		if (priv->extend_desc) {
-			rx_q->dma_erx = dma_zalloc_coherent(priv->device,
-							    DMA_RX_SIZE *
-							    sizeof(struct
-							    dma_extended_desc),
-							    &rx_q->dma_rx_phy,
-							    GFP_KERNEL);
+			rx_q->dma_erx = dma_alloc_coherent(priv->device,
+							   DMA_RX_SIZE * sizeof(struct dma_extended_desc),
+							   &rx_q->dma_rx_phy,
+							   GFP_KERNEL);
 			if (!rx_q->dma_erx)
 				goto err_dma;
 
 		} else {
-			rx_q->dma_rx = dma_zalloc_coherent(priv->device,
-							   DMA_RX_SIZE *
-							   sizeof(struct
-							   dma_desc),
-							   &rx_q->dma_rx_phy,
-							   GFP_KERNEL);
+			rx_q->dma_rx = dma_alloc_coherent(priv->device,
+							  DMA_RX_SIZE * sizeof(struct dma_desc),
+							  &rx_q->dma_rx_phy,
+							  GFP_KERNEL);
 			if (!rx_q->dma_rx)
 				goto err_dma;
 		}
@@ -1596,34 +1630,30 @@
 		tx_q->queue_index = queue;
 		tx_q->priv_data = priv;
 
-		tx_q->tx_skbuff_dma = kmalloc_array(DMA_TX_SIZE,
-						    sizeof(*tx_q->tx_skbuff_dma),
-						    GFP_KERNEL);
+		tx_q->tx_skbuff_dma = kcalloc(DMA_TX_SIZE,
+					      sizeof(*tx_q->tx_skbuff_dma),
+					      GFP_KERNEL);
 		if (!tx_q->tx_skbuff_dma)
 			goto err_dma;
 
-		tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
-						sizeof(struct sk_buff *),
-						GFP_KERNEL);
+		tx_q->tx_skbuff = kcalloc(DMA_TX_SIZE,
+					  sizeof(struct sk_buff *),
+					  GFP_KERNEL);
 		if (!tx_q->tx_skbuff)
 			goto err_dma;
 
 		if (priv->extend_desc) {
-			tx_q->dma_etx = dma_zalloc_coherent(priv->device,
-							    DMA_TX_SIZE *
-							    sizeof(struct
-							    dma_extended_desc),
-							    &tx_q->dma_tx_phy,
-							    GFP_KERNEL);
+			tx_q->dma_etx = dma_alloc_coherent(priv->device,
+							   DMA_TX_SIZE * sizeof(struct dma_extended_desc),
+							   &tx_q->dma_tx_phy,
+							   GFP_KERNEL);
 			if (!tx_q->dma_etx)
 				goto err_dma;
 		} else {
-			tx_q->dma_tx = dma_zalloc_coherent(priv->device,
-							   DMA_TX_SIZE *
-							   sizeof(struct
-								  dma_desc),
-							   &tx_q->dma_tx_phy,
-							   GFP_KERNEL);
+			tx_q->dma_tx = dma_alloc_coherent(priv->device,
+							  DMA_TX_SIZE * sizeof(struct dma_desc),
+							  &tx_q->dma_tx_phy,
+							  GFP_KERNEL);
 			if (!tx_q->dma_tx)
 				goto err_dma;
 		}
@@ -1944,6 +1974,10 @@
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
 
+	/* We still have pending packets, let's call for a new scheduling */
+	if (tx_q->dirty_tx != tx_q->cur_tx)
+		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
 	return count;
@@ -2034,24 +2068,17 @@
 	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
 						 &priv->xstats, chan);
 	struct stmmac_channel *ch = &priv->channel[chan];
-	bool needs_work = false;
 
-	if ((status & handle_rx) && ch->has_rx) {
-		needs_work = true;
-	} else {
-		status &= ~handle_rx;
+	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
+		if (napi_schedule_prep(&ch->rx_napi)) {
+			stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+			__napi_schedule_irqoff(&ch->rx_napi);
+			status |= handle_tx;
+		}
 	}
 
-	if ((status & handle_tx) && ch->has_tx) {
-		needs_work = true;
-	} else {
-		status &= ~handle_tx;
-	}
-
-	if (needs_work && napi_schedule_prep(&ch->napi)) {
-		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-		__napi_schedule(&ch->napi);
-	}
+	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
+		napi_schedule_irqoff(&ch->tx_napi);
 
 	return status;
 }
@@ -2113,10 +2140,10 @@
 	unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
 			    MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
 
-	dwmac_mmc_intr_all_mask(priv->mmcaddr);
+	stmmac_mmc_intr_all_mask(priv, priv->mmcaddr);
 
 	if (priv->dma_cap.rmon) {
-		dwmac_mmc_ctrl(priv->mmcaddr, mode);
+		stmmac_mmc_ctrl(priv, priv->mmcaddr, mode);
 		memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
 	} else
 		netdev_info(priv->dev, "No MAC Management Counters available\n");
@@ -2149,8 +2176,8 @@
 		stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
 		if (!is_valid_ether_addr(priv->dev->dev_addr))
 			eth_hw_addr_random(priv->dev);
-		netdev_info(priv->dev, "device MAC address %pM\n",
-			    priv->dev->dev_addr);
+		dev_info(priv->device, "device MAC address %pM\n",
+			 priv->dev->dev_addr);
 	}
 }
 
@@ -2193,6 +2220,10 @@
 	if (priv->plat->axi)
 		stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
 
+	/* DMA CSR Channel configuration */
+	for (chan = 0; chan < dma_csr_ch; chan++)
+		stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
+
 	/* DMA RX Channel Configuration */
 	for (chan = 0; chan < rx_channels_count; chan++) {
 		rx_q = &priv->rx_queue[chan];
@@ -2218,10 +2249,6 @@
 				       tx_q->tx_tail_addr, chan);
 	}
 
-	/* DMA CSR Channel configuration */
-	for (chan = 0; chan < dma_csr_ch; chan++)
-		stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
-
 	return ret;
 }
 
@@ -2246,25 +2273,32 @@
 
 	ch = &priv->channel[tx_q->queue_index];
 
-	if (likely(napi_schedule_prep(&ch->napi)))
-		__napi_schedule(&ch->napi);
+	/*
+	 * If NAPI is already running we can miss some events. Let's rearm
+	 * the timer and try again.
+	 */
+	if (likely(napi_schedule_prep(&ch->tx_napi)))
+		__napi_schedule(&ch->tx_napi);
+	else
+		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
 }
 
 /**
- * stmmac_init_tx_coalesce - init tx mitigation options.
+ * stmmac_init_coalesce - init mitigation options.
  * @priv: driver private structure
  * Description:
- * This inits the transmit coalesce parameters: i.e. timer rate,
+ * This inits the coalesce parameters: i.e. timer rate,
  * timer handler and default threshold used for enabling the
  * interrupt on completion bit.
  */
-static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
+static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
 	u32 tx_channel_count = priv->plat->tx_queues_to_use;
 	u32 chan;
 
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
+	priv->rx_coal_frames = STMMAC_RX_FRAMES;
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
@@ -2411,6 +2445,22 @@
 	}
 }
 
+static void stmmac_mac_config_rss(struct stmmac_priv *priv)
+{
+	if (!priv->dma_cap.rssen || !priv->plat->rss_en) {
+		priv->rss.enable = false;
+		return;
+	}
+
+	if (priv->dev->features & NETIF_F_RXHASH)
+		priv->rss.enable = true;
+	else
+		priv->rss.enable = false;
+
+	stmmac_rss_configure(priv, priv->hw, &priv->rss,
+			     priv->plat->rx_queues_to_use);
+}
+
 /**
  *  stmmac_mtl_configuration - Configure MTL
  *  @priv: driver private structure
@@ -2455,6 +2505,10 @@
 	/* Set RX routing */
 	if (rx_queues_count > 1)
 		stmmac_mac_config_rx_queues_routing(priv);
+
+	/* Receive Side Scaling */
+	if (rx_queues_count > 1)
+		stmmac_mac_config_rss(priv);
 }
 
 static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
@@ -2550,13 +2604,13 @@
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
 	if (priv->use_riwt) {
-		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
 		if (!ret)
-			priv->rx_riwt = MAX_DMA_RIWT;
+			priv->rx_riwt = MIN_DMA_RIWT;
 	}
 
 	if (priv->hw->pcs)
-		stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
+		stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
 
 	/* set TX and RX rings length */
 	stmmac_set_rings_length(priv);
@@ -2567,6 +2621,16 @@
 			stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
 	}
 
+	/* Enable Split Header */
+	if (priv->sph && priv->hw->rx_csum) {
+		for (chan = 0; chan < rx_cnt; chan++)
+			stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
+	}
+
+	/* VLAN Tag Insertion */
+	if (priv->dma_cap.vlins)
+		stmmac_enable_vlan(priv, priv->hw, STMMAC_VLAN_INSERT);
+
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
@@ -2595,8 +2659,6 @@
 	u32 chan;
 	int ret;
 
-	stmmac_check_ether_addr(priv);
-
 	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
 	    priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -2636,10 +2698,9 @@
 		goto init_error;
 	}
 
-	stmmac_init_tx_coalesce(priv);
+	stmmac_init_coalesce(priv);
 
-	if (dev->phydev)
-		phy_start(dev->phydev);
+	phylink_start(priv->phylink);
 
 	/* Request the IRQ lines */
 	ret = request_irq(dev->irq, stmmac_interrupt,
@@ -2686,8 +2747,7 @@
 wolirq_error:
 	free_irq(dev->irq, dev);
 irq_error:
-	if (dev->phydev)
-		phy_stop(dev->phydev);
+	phylink_stop(priv->phylink);
 
 	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
 		del_timer_sync(&priv->tx_queue[chan].txtimer);
@@ -2696,9 +2756,7 @@
 init_error:
 	free_dma_desc_resources(priv);
 dma_desc_error:
-	if (dev->phydev)
-		phy_disconnect(dev->phydev);
-
+	phylink_disconnect_phy(priv->phylink);
 	return ret;
 }
 
@@ -2717,10 +2775,8 @@
 		del_timer_sync(&priv->eee_ctrl_timer);
 
 	/* Stop and disconnect the PHY */
-	if (dev->phydev) {
-		phy_stop(dev->phydev);
-		phy_disconnect(dev->phydev);
-	}
+	phylink_stop(priv->phylink);
+	phylink_disconnect_phy(priv->phylink);
 
 	stmmac_stop_all_queues(priv);
 
@@ -2752,6 +2808,33 @@
 	return 0;
 }
 
+static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb,
+			       struct stmmac_tx_queue *tx_q)
+{
+	u16 tag = 0x0, inner_tag = 0x0;
+	u32 inner_type = 0x0;
+	struct dma_desc *p;
+
+	if (!priv->dma_cap.vlins)
+		return false;
+	if (!skb_vlan_tag_present(skb))
+		return false;
+	if (skb->vlan_proto == htons(ETH_P_8021AD)) {
+		inner_tag = skb_vlan_tag_get(skb);
+		inner_type = STMMAC_VLAN_INSERT;
+	}
+
+	tag = skb_vlan_tag_get(skb);
+
+	p = tx_q->dma_tx + tx_q->cur_tx;
+	if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type))
+		return false;
+
+	stmmac_set_tx_owner(priv, p);
+	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+	return true;
+}
+
 /**
  *  stmmac_tso_allocator - close entry point of the driver
  *  @priv: driver private structure
@@ -2763,7 +2846,7 @@
  *  This function fills descriptor and request new descriptors according to
  *  buffer length to fill
  */
-static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
+static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
 				 int total_len, bool last_segment, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
@@ -2774,11 +2857,18 @@
 	tmp_len = total_len;
 
 	while (tmp_len > 0) {
+		dma_addr_t curr_addr;
+
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 		desc = tx_q->dma_tx + tx_q->cur_tx;
 
-		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
+		curr_addr = des + (total_len - tmp_len);
+		if (priv->dma_cap.addr64 <= 32)
+			desc->des0 = cpu_to_le32(curr_addr);
+		else
+			stmmac_set_desc_addr(priv, desc, curr_addr);
+
 		buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
 			    TSO_MAX_BUFF_SIZE : tmp_len;
 
@@ -2824,11 +2914,13 @@
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	u32 queue = skb_get_queue_mapping(skb);
-	unsigned int first_entry, des;
 	struct stmmac_tx_queue *tx_q;
+	unsigned int first_entry;
 	int tmp_pay_len = 0;
 	u32 pay_len, mss;
 	u8 proto_hdr_len;
+	dma_addr_t des;
+	bool has_vlan;
 	int i;
 
 	tx_q = &priv->tx_queue[queue];
@@ -2870,12 +2962,18 @@
 			skb->data_len);
 	}
 
+	/* Check if VLAN can be inserted by HW */
+	has_vlan = stmmac_vlan_insert(priv, skb, tx_q);
+
 	first_entry = tx_q->cur_tx;
 	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	desc = tx_q->dma_tx + first_entry;
 	first = desc;
 
+	if (has_vlan)
+		stmmac_set_desc_vlan(priv, first, STMMAC_VLAN_INSERT);
+
 	/* first descriptor: fill Headers on Buf1 */
 	des = dma_map_single(priv->device, skb->data, skb_headlen(skb),
 			     DMA_TO_DEVICE);
@@ -2885,14 +2983,21 @@
 	tx_q->tx_skbuff_dma[first_entry].buf = des;
 	tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
 
-	first->des0 = cpu_to_le32(des);
+	if (priv->dma_cap.addr64 <= 32) {
+		first->des0 = cpu_to_le32(des);
 
-	/* Fill start of payload in buff2 of first descriptor */
-	if (pay_len)
-		first->des1 = cpu_to_le32(des + proto_hdr_len);
+		/* Fill start of payload in buff2 of first descriptor */
+		if (pay_len)
+			first->des1 = cpu_to_le32(des + proto_hdr_len);
 
-	/* If needed take extra descriptors to fill the remaining payload */
-	tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+		/* If needed take extra descriptors to fill the remaining payload */
+		tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+	} else {
+		stmmac_set_desc_addr(priv, first, des);
+		tmp_pay_len = pay_len;
+		des += proto_hdr_len;
+		pay_len = 0;
+	}
 
 	stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
 
@@ -2919,6 +3024,19 @@
 	/* Only the last descriptor gets to point to the skb. */
 	tx_q->tx_skbuff[tx_q->cur_tx] = skb;
 
+	/* Manage tx mitigation */
+	tx_q->tx_count_frames += nfrags + 1;
+	if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+	    !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+	      priv->hwts_tx_en)) {
+		stmmac_tx_timer_arm(priv, queue);
+	} else {
+		desc = &tx_q->dma_tx[tx_q->cur_tx];
+		tx_q->tx_count_frames = 0;
+		stmmac_set_tx_ic(priv, desc);
+		priv->xstats.tx_set_ic_bit++;
+	}
+
 	/* We've used all descriptors we need for this skb, however,
 	 * advance cur_tx so that it references a fresh descriptor.
 	 * ndo_start_xmit will fill this descriptor the next time it's
@@ -2936,15 +3054,8 @@
 	priv->xstats.tx_tso_frames++;
 	priv->xstats.tx_tso_nfrags += nfrags;
 
-	/* Manage tx mitigation */
-	tx_q->tx_count_frames += nfrags + 1;
-	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
-		stmmac_set_tx_ic(priv, desc);
-		priv->xstats.tx_set_ic_bit++;
-		tx_q->tx_count_frames = 0;
-	} else {
-		stmmac_tx_timer_arm(priv, queue);
-	}
+	if (priv->sarc_type)
+		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
 
 	skb_tx_timestamp(skb);
 
@@ -3019,15 +3130,19 @@
 	int i, csum_insertion = 0, is_jumbo = 0;
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
-	int entry;
-	unsigned int first_entry;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
+	unsigned int first_entry;
 	unsigned int enh_desc;
-	unsigned int des;
+	dma_addr_t des;
+	bool has_vlan;
+	int entry;
 
 	tx_q = &priv->tx_queue[queue];
 
+	if (priv->tx_path_in_lpi_mode)
+		stmmac_disable_eee_mode(priv);
+
 	/* Manage oversized TCP frames for GMAC4 device */
 	if (skb_is_gso(skb) && priv->tso) {
 		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
@@ -3046,8 +3161,8 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	if (priv->tx_path_in_lpi_mode)
-		stmmac_disable_eee_mode(priv);
+	/* Check if VLAN can be inserted by HW */
+	has_vlan = stmmac_vlan_insert(priv, skb, tx_q);
 
 	entry = tx_q->cur_tx;
 	first_entry = entry;
@@ -3062,6 +3177,9 @@
 
 	first = desc;
 
+	if (has_vlan)
+		stmmac_set_desc_vlan(priv, first, STMMAC_VLAN_INSERT);
+
 	enh_desc = priv->plat->enh_desc;
 	/* To program the descriptors according to the size of the frame */
 	if (enh_desc)
@@ -3107,6 +3225,27 @@
 	/* Only the last descriptor gets to point to the skb. */
 	tx_q->tx_skbuff[entry] = skb;
 
+	/* According to the coalesce parameter the IC bit for the latest
+	 * segment is reset and the timer re-started to clean the tx status.
+	 * This approach takes care about the fragments: desc is the first
+	 * element in case of no SG.
+	 */
+	tx_q->tx_count_frames += nfrags + 1;
+	if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+	    !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+	      priv->hwts_tx_en)) {
+		stmmac_tx_timer_arm(priv, queue);
+	} else {
+		if (likely(priv->extend_desc))
+			desc = &tx_q->dma_etx[entry].basic;
+		else
+			desc = &tx_q->dma_tx[entry];
+
+		tx_q->tx_count_frames = 0;
+		stmmac_set_tx_ic(priv, desc);
+		priv->xstats.tx_set_ic_bit++;
+	}
+
 	/* We've used all descriptors we need for this skb, however,
 	 * advance cur_tx so that it references a fresh descriptor.
 	 * ndo_start_xmit will fill this descriptor the next time it's
@@ -3142,19 +3281,8 @@
 
 	dev->stats.tx_bytes += skb->len;
 
-	/* According to the coalesce parameter the IC bit for the latest
-	 * segment is reset and the timer re-started to clean the tx status.
-	 * This approach takes care about the fragments: desc is the first
-	 * element in case of no SG.
-	 */
-	tx_q->tx_count_frames += nfrags + 1;
-	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
-		stmmac_set_tx_ic(priv, desc);
-		priv->xstats.tx_set_ic_bit++;
-		tx_q->tx_count_frames = 0;
-	} else {
-		stmmac_tx_timer_arm(priv, queue);
-	}
+	if (priv->sarc_type)
+		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
 
 	skb_tx_timestamp(skb);
 
@@ -3188,14 +3316,16 @@
 		stmmac_prepare_tx_desc(priv, first, 1, nopaged_len,
 				csum_insertion, priv->mode, 1, last_segment,
 				skb->len);
-
-		/* The own bit must be the latest setting done when prepare the
-		 * descriptor and then barrier is needed to make sure that
-		 * all is coherent before granting the DMA engine.
-		 */
-		wmb();
+	} else {
+		stmmac_set_tx_owner(priv, first);
 	}
 
+	/* The own bit must be the latest setting done when prepare the
+	 * descriptor and then barrier is needed to make sure that
+	 * all is coherent before granting the DMA engine.
+	 */
+	wmb();
+
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -3252,62 +3382,65 @@
 static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-	int dirty = stmmac_rx_dirty(priv, queue);
+	int len, dirty = stmmac_rx_dirty(priv, queue);
 	unsigned int entry = rx_q->dirty_rx;
 
-	int bfsize = priv->dma_buf_sz;
+	len = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
 
 	while (dirty-- > 0) {
+		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
 		struct dma_desc *p;
+		bool use_rx_wd;
 
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(rx_q->dma_erx + entry);
 		else
 			p = rx_q->dma_rx + entry;
 
-		if (likely(!rx_q->rx_skbuff[entry])) {
-			struct sk_buff *skb;
-
-			skb = netdev_alloc_skb_ip_align(priv->dev, bfsize);
-			if (unlikely(!skb)) {
-				/* so for a while no zero-copy! */
-				rx_q->rx_zeroc_thresh = STMMAC_RX_THRESH;
-				if (unlikely(net_ratelimit()))
-					dev_err(priv->device,
-						"fail to alloc skb entry %d\n",
-						entry);
+		if (!buf->page) {
+			buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+			if (!buf->page)
 				break;
-			}
-
-			rx_q->rx_skbuff[entry] = skb;
-			rx_q->rx_skbuff_dma[entry] =
-			    dma_map_single(priv->device, skb->data, bfsize,
-					   DMA_FROM_DEVICE);
-			if (dma_mapping_error(priv->device,
-					      rx_q->rx_skbuff_dma[entry])) {
-				netdev_err(priv->dev, "Rx DMA map failed\n");
-				dev_kfree_skb(skb);
-				break;
-			}
-
-			stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[entry]);
-			stmmac_refill_desc3(priv, rx_q, p);
-
-			if (rx_q->rx_zeroc_thresh > 0)
-				rx_q->rx_zeroc_thresh--;
-
-			netif_dbg(priv, rx_status, priv->dev,
-				  "refill entry #%d\n", entry);
 		}
-		dma_wmb();
 
-		stmmac_set_rx_owner(priv, p, priv->use_riwt);
+		if (priv->sph && !buf->sec_page) {
+			buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+			if (!buf->sec_page)
+				break;
+
+			buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+
+			dma_sync_single_for_device(priv->device, buf->sec_addr,
+						   len, DMA_FROM_DEVICE);
+		}
+
+		buf->addr = page_pool_get_dma_addr(buf->page);
+
+		/* Sync whole allocation to device. This will invalidate old
+		 * data.
+		 */
+		dma_sync_single_for_device(priv->device, buf->addr, len,
+					   DMA_FROM_DEVICE);
+
+		stmmac_set_desc_addr(priv, p, buf->addr);
+		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+		stmmac_refill_desc3(priv, rx_q, p);
+
+		rx_q->rx_count_frames++;
+		rx_q->rx_count_frames += priv->rx_coal_frames;
+		if (rx_q->rx_count_frames > priv->rx_coal_frames)
+			rx_q->rx_count_frames = 0;
+		use_rx_wd = priv->use_riwt && rx_q->rx_count_frames;
 
 		dma_wmb();
+		stmmac_set_rx_owner(priv, p, use_rx_wd);
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
 	}
 	rx_q->dirty_rx = entry;
+	rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+			    (rx_q->dirty_rx * sizeof(struct dma_desc));
+	stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
 }
 
 /**
@@ -3322,13 +3455,10 @@
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
-	unsigned int entry = rx_q->cur_rx;
-	int coe = priv->hw->rx_csum;
-	unsigned int next_entry;
-	unsigned int count = 0;
-	bool xmac;
-
-	xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+	unsigned int count = 0, error = 0, len = 0;
+	int status = 0, coe = priv->hw->rx_csum;
+	unsigned int next_entry = rx_q->cur_rx;
+	struct sk_buff *skb = NULL;
 
 	if (netif_msg_rx_status(priv)) {
 		void *rx_head;
@@ -3342,9 +3472,32 @@
 		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
 	}
 	while (count < limit) {
-		int status;
-		struct dma_desc *p;
-		struct dma_desc *np;
+		unsigned int hlen = 0, prev_len = 0;
+		enum pkt_hash_types hash_type;
+		struct stmmac_rx_buffer *buf;
+		struct dma_desc *np, *p;
+		unsigned int sec_len;
+		int entry;
+		u32 hash;
+
+		if (!count && rx_q->state_saved) {
+			skb = rx_q->state.skb;
+			error = rx_q->state.error;
+			len = rx_q->state.len;
+		} else {
+			rx_q->state_saved = false;
+			skb = NULL;
+			error = 0;
+			len = 0;
+		}
+
+		if (count >= limit)
+			break;
+
+read_again:
+		sec_len = 0;
+		entry = next_entry;
+		buf = &rx_q->buf_pool[entry];
 
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(rx_q->dma_erx + entry);
@@ -3358,8 +3511,6 @@
 		if (unlikely(status & dma_own))
 			break;
 
-		count++;
-
 		rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
 		next_entry = rx_q->cur_rx;
 
@@ -3369,44 +3520,34 @@
 			np = rx_q->dma_rx + next_entry;
 
 		prefetch(np);
+		prefetch(page_address(buf->page));
 
 		if (priv->extend_desc)
 			stmmac_rx_extended_status(priv, &priv->dev->stats,
 					&priv->xstats, rx_q->dma_erx + entry);
 		if (unlikely(status == discard_frame)) {
-			priv->dev->stats.rx_errors++;
-			if (priv->hwts_rx_en && !priv->extend_desc) {
-				/* DESC2 & DESC3 will be overwritten by device
-				 * with timestamp value, hence reinitialize
-				 * them in stmmac_rx_refill() function so that
-				 * device can reuse it.
-				 */
-				dev_kfree_skb_any(rx_q->rx_skbuff[entry]);
-				rx_q->rx_skbuff[entry] = NULL;
-				dma_unmap_single(priv->device,
-						 rx_q->rx_skbuff_dma[entry],
-						 priv->dma_buf_sz,
-						 DMA_FROM_DEVICE);
-			}
+			page_pool_recycle_direct(rx_q->page_pool, buf->page);
+			buf->page = NULL;
+			error = 1;
+			if (!priv->hwts_rx_en)
+				priv->dev->stats.rx_errors++;
+		}
+
+		if (unlikely(error && (status & rx_not_ls)))
+			goto read_again;
+		if (unlikely(error)) {
+			dev_kfree_skb(skb);
+			count++;
+			continue;
+		}
+
+		/* Buffer is good. Go on. */
+
+		if (likely(status & rx_not_ls)) {
+			len += priv->dma_buf_sz;
 		} else {
-			struct sk_buff *skb;
-			int frame_len;
-			unsigned int des;
-
-			stmmac_get_desc_addr(priv, p, &des);
-			frame_len = stmmac_get_rx_frame_len(priv, p, coe);
-
-			/*  If frame length is greater than skb buffer size
-			 *  (preallocated during init) then the packet is
-			 *  ignored
-			 */
-			if (frame_len > priv->dma_buf_sz) {
-				netdev_err(priv->dev,
-					   "len %d larger than size (%d)\n",
-					   frame_len, priv->dma_buf_sz);
-				priv->dev->stats.rx_length_errors++;
-				break;
-			}
+			prev_len = len;
+			len = stmmac_get_rx_frame_len(priv, p, coe);
 
 			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
 			 * Type frames (LLC/LLC-SNAP)
@@ -3417,89 +3558,99 @@
 			 */
 			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
 			    unlikely(status != llc_snap))
-				frame_len -= ETH_FCS_LEN;
-
-			if (netif_msg_rx_status(priv)) {
-				netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n",
-					   p, entry, des);
-				netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
-					   frame_len, status);
-			}
-
-			/* The zero-copy is always used for all the sizes
-			 * in case of GMAC4 because it needs
-			 * to refill the used descriptors, always.
-			 */
-			if (unlikely(!xmac &&
-				     ((frame_len < priv->rx_copybreak) ||
-				     stmmac_rx_threshold_count(rx_q)))) {
-				skb = netdev_alloc_skb_ip_align(priv->dev,
-								frame_len);
-				if (unlikely(!skb)) {
-					if (net_ratelimit())
-						dev_warn(priv->device,
-							 "packet dropped\n");
-					priv->dev->stats.rx_dropped++;
-					break;
-				}
-
-				dma_sync_single_for_cpu(priv->device,
-							rx_q->rx_skbuff_dma
-							[entry], frame_len,
-							DMA_FROM_DEVICE);
-				skb_copy_to_linear_data(skb,
-							rx_q->
-							rx_skbuff[entry]->data,
-							frame_len);
-
-				skb_put(skb, frame_len);
-				dma_sync_single_for_device(priv->device,
-							   rx_q->rx_skbuff_dma
-							   [entry], frame_len,
-							   DMA_FROM_DEVICE);
-			} else {
-				skb = rx_q->rx_skbuff[entry];
-				if (unlikely(!skb)) {
-					netdev_err(priv->dev,
-						   "%s: Inconsistent Rx chain\n",
-						   priv->dev->name);
-					priv->dev->stats.rx_dropped++;
-					break;
-				}
-				prefetch(skb->data - NET_IP_ALIGN);
-				rx_q->rx_skbuff[entry] = NULL;
-				rx_q->rx_zeroc_thresh++;
-
-				skb_put(skb, frame_len);
-				dma_unmap_single(priv->device,
-						 rx_q->rx_skbuff_dma[entry],
-						 priv->dma_buf_sz,
-						 DMA_FROM_DEVICE);
-			}
-
-			if (netif_msg_pktdata(priv)) {
-				netdev_dbg(priv->dev, "frame received (%dbytes)",
-					   frame_len);
-				print_pkt(skb->data, frame_len);
-			}
-
-			stmmac_get_rx_hwtstamp(priv, p, np, skb);
-
-			stmmac_rx_vlan(priv->dev, skb);
-
-			skb->protocol = eth_type_trans(skb, priv->dev);
-
-			if (unlikely(!coe))
-				skb_checksum_none_assert(skb);
-			else
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-			napi_gro_receive(&ch->napi, skb);
-
-			priv->dev->stats.rx_packets++;
-			priv->dev->stats.rx_bytes += frame_len;
+				len -= ETH_FCS_LEN;
 		}
-		entry = next_entry;
+
+		if (!skb) {
+			int ret = stmmac_get_rx_header_len(priv, p, &hlen);
+
+			if (priv->sph && !ret && (hlen > 0)) {
+				sec_len = len;
+				if (!(status & rx_not_ls))
+					sec_len = sec_len - hlen;
+				len = hlen;
+
+				prefetch(page_address(buf->sec_page));
+				priv->xstats.rx_split_hdr_pkt_n++;
+			}
+
+			skb = napi_alloc_skb(&ch->rx_napi, len);
+			if (!skb) {
+				priv->dev->stats.rx_dropped++;
+				count++;
+				continue;
+			}
+
+			dma_sync_single_for_cpu(priv->device, buf->addr, len,
+						DMA_FROM_DEVICE);
+			skb_copy_to_linear_data(skb, page_address(buf->page),
+						len);
+			skb_put(skb, len);
+
+			/* Data payload copied into SKB, page ready for recycle */
+			page_pool_recycle_direct(rx_q->page_pool, buf->page);
+			buf->page = NULL;
+		} else {
+			unsigned int buf_len = len - prev_len;
+
+			if (likely(status & rx_not_ls))
+				buf_len = priv->dma_buf_sz;
+
+			dma_sync_single_for_cpu(priv->device, buf->addr,
+						buf_len, DMA_FROM_DEVICE);
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+					buf->page, 0, buf_len,
+					priv->dma_buf_sz);
+
+			/* Data payload appended into SKB */
+			page_pool_release_page(rx_q->page_pool, buf->page);
+			buf->page = NULL;
+		}
+
+		if (sec_len > 0) {
+			dma_sync_single_for_cpu(priv->device, buf->sec_addr,
+						sec_len, DMA_FROM_DEVICE);
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+					buf->sec_page, 0, sec_len,
+					priv->dma_buf_sz);
+
+			len += sec_len;
+
+			/* Data payload appended into SKB */
+			page_pool_release_page(rx_q->page_pool, buf->sec_page);
+			buf->sec_page = NULL;
+		}
+
+		if (likely(status & rx_not_ls))
+			goto read_again;
+
+		/* Got entire packet into SKB. Finish it. */
+
+		stmmac_get_rx_hwtstamp(priv, p, np, skb);
+		stmmac_rx_vlan(priv->dev, skb);
+		skb->protocol = eth_type_trans(skb, priv->dev);
+
+		if (unlikely(!coe))
+			skb_checksum_none_assert(skb);
+		else
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+			skb_set_hash(skb, hash, hash_type);
+
+		skb_record_rx_queue(skb, queue);
+		napi_gro_receive(&ch->rx_napi, skb);
+
+		priv->dev->stats.rx_packets++;
+		priv->dev->stats.rx_bytes += len;
+		count++;
+	}
+
+	if (status & rx_not_ls) {
+		rx_q->state_saved = true;
+		rx_q->state.skb = skb;
+		rx_q->state.error = error;
+		rx_q->state.len = len;
 	}
 
 	stmmac_rx_refill(priv, queue);
@@ -3509,40 +3660,46 @@
 	return count;
 }
 
-/**
- *  stmmac_poll - stmmac poll method (NAPI)
- *  @napi : pointer to the napi structure.
- *  @budget : maximum number of packets that the current CPU can receive from
- *	      all interfaces.
- *  Description :
- *  To look at the incoming frames and clear the tx resources.
- */
-static int stmmac_napi_poll(struct napi_struct *napi, int budget)
+static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 {
 	struct stmmac_channel *ch =
-		container_of(napi, struct stmmac_channel, napi);
+		container_of(napi, struct stmmac_channel, rx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	int work_done = 0, work_rem = budget;
 	u32 chan = ch->index;
+	int work_done;
 
 	priv->xstats.napi_poll++;
 
-	if (ch->has_tx) {
-		int done = stmmac_tx_clean(priv, work_rem, chan);
-
-		work_done += done;
-		work_rem -= done;
-	}
-
-	if (ch->has_rx) {
-		int done = stmmac_rx(priv, work_rem, chan);
-
-		work_done += done;
-		work_rem -= done;
-	}
-
+	work_done = stmmac_rx(priv, budget, chan);
 	if (work_done < budget && napi_complete_done(napi, work_done))
 		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+	return work_done;
+}
+
+static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct stmmac_channel *ch =
+		container_of(napi, struct stmmac_channel, tx_napi);
+	struct stmmac_priv *priv = ch->priv_data;
+	struct stmmac_tx_queue *tx_q;
+	u32 chan = ch->index;
+	int work_done;
+
+	priv->xstats.napi_poll++;
+
+	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
+	work_done = min(work_done, budget);
+
+	if (work_done < budget)
+		napi_complete_done(napi, work_done);
+
+	/* Force transmission restart */
+	tx_q = &priv->tx_queue[chan];
+	if (tx_q->cur_tx != tx_q->dirty_tx) {
+		stmmac_enable_dma_transmission(priv, priv->ioaddr);
+		stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
+				       chan);
+	}
 
 	return work_done;
 }
@@ -3639,6 +3796,8 @@
 			       netdev_features_t features)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
+	bool sph_en;
+	u32 chan;
 
 	/* Keep the COE Type in case of csum is supporting */
 	if (features & NETIF_F_RXCSUM)
@@ -3650,6 +3809,10 @@
 	 */
 	stmmac_rx_ipc(priv, priv->hw);
 
+	sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+	for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
+		stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+
 	return 0;
 }
 
@@ -3757,6 +3920,7 @@
  */
 static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
+	struct stmmac_priv *priv = netdev_priv (dev);
 	int ret = -EOPNOTSUPP;
 
 	if (!netif_running(dev))
@@ -3766,12 +3930,13 @@
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!dev->phydev)
-			return -EINVAL;
-		ret = phy_mii_ioctl(dev->phydev, rq, cmd);
+		ret = phylink_mii_ioctl(priv->phylink, rq, cmd);
 		break;
 	case SIOCSHWTSTAMP:
-		ret = stmmac_hwtstamp_ioctl(dev, rq);
+		ret = stmmac_hwtstamp_set(dev, rq);
+		break;
+	case SIOCGHWTSTAMP:
+		ret = stmmac_hwtstamp_get(dev, rq);
 		break;
 	default:
 		break;
@@ -3786,12 +3951,17 @@
 	struct stmmac_priv *priv = cb_priv;
 	int ret = -EOPNOTSUPP;
 
+	if (!tc_cls_can_offload_and_chain0(priv->dev, type_data))
+		return ret;
+
 	stmmac_disable_all_queues(priv);
 
 	switch (type) {
 	case TC_SETUP_CLSU32:
-		if (tc_cls_can_offload_and_chain0(priv->dev, type_data))
-			ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+		ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+		break;
+	case TC_SETUP_CLSFLOWER:
+		ret = stmmac_tc_setup_cls(priv, priv, type_data);
 		break;
 	default:
 		break;
@@ -3801,23 +3971,7 @@
 	return ret;
 }
 
-static int stmmac_setup_tc_block(struct stmmac_priv *priv,
-				 struct tc_block_offload *f)
-{
-	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	switch (f->command) {
-	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
-				priv, priv, f->extack);
-	case TC_BLOCK_UNBIND:
-		tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
+static LIST_HEAD(stmmac_block_cb_list);
 
 static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 			   void *type_data)
@@ -3826,7 +3980,10 @@
 
 	switch (type) {
 	case TC_SETUP_BLOCK:
-		return stmmac_setup_tc_block(priv, type_data);
+		return flow_block_cb_setup_simple(type_data,
+						  &stmmac_block_cb_list,
+						  stmmac_setup_tc_block_cb,
+						  priv, priv, true);
 	case TC_SETUP_QDISC_CBS:
 		return stmmac_tc_setup_cbs(priv, priv, type_data);
 	default:
@@ -3834,6 +3991,22 @@
 	}
 }
 
+static u16 stmmac_select_queue(struct net_device *dev, struct sk_buff *skb,
+			       struct net_device *sb_dev)
+{
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+		/*
+		 * There is no way to determine the number of TSO
+		 * capable Queues. Let's use always the Queue 0
+		 * because if TSO is supported then at least this
+		 * one will be capable.
+		 */
+		return 0;
+	}
+
+	return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues;
+}
+
 static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -3878,7 +4051,7 @@
 	}
 }
 
-static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v)
+static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 {
 	struct net_device *dev = seq->private;
 	struct stmmac_priv *priv = netdev_priv(dev);
@@ -3923,23 +4096,9 @@
 
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(stmmac_rings_status);
 
-static int stmmac_sysfs_ring_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, stmmac_sysfs_ring_read, inode->i_private);
-}
-
-/* Debugfs files, should appear in /sys/kernel/debug/stmmaceth/eth0 */
-
-static const struct file_operations stmmac_rings_status_fops = {
-	.owner = THIS_MODULE,
-	.open = stmmac_sysfs_ring_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
+static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
 {
 	struct net_device *dev = seq->private;
 	struct stmmac_priv *priv = netdev_priv(dev);
@@ -4002,59 +4161,22 @@
 
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap);
 
-static int stmmac_sysfs_dma_cap_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, stmmac_sysfs_dma_cap_read, inode->i_private);
-}
-
-static const struct file_operations stmmac_dma_cap_fops = {
-	.owner = THIS_MODULE,
-	.open = stmmac_sysfs_dma_cap_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int stmmac_init_fs(struct net_device *dev)
+static void stmmac_init_fs(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
 	/* Create per netdev entries */
 	priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
 
-	if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) {
-		netdev_err(priv->dev, "ERROR failed to create debugfs directory\n");
-
-		return -ENOMEM;
-	}
-
 	/* Entry to report DMA RX/TX rings */
-	priv->dbgfs_rings_status =
-		debugfs_create_file("descriptors_status", 0444,
-				    priv->dbgfs_dir, dev,
-				    &stmmac_rings_status_fops);
-
-	if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) {
-		netdev_err(priv->dev, "ERROR creating stmmac ring debugfs file\n");
-		debugfs_remove_recursive(priv->dbgfs_dir);
-
-		return -ENOMEM;
-	}
+	debugfs_create_file("descriptors_status", 0444, priv->dbgfs_dir, dev,
+			    &stmmac_rings_status_fops);
 
 	/* Entry to report the DMA HW features */
-	priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", 0444,
-						  priv->dbgfs_dir,
-						  dev, &stmmac_dma_cap_fops);
-
-	if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
-		netdev_err(priv->dev, "ERROR creating stmmac MMC debugfs file\n");
-		debugfs_remove_recursive(priv->dbgfs_dir);
-
-		return -ENOMEM;
-	}
-
-	return 0;
+	debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev,
+			    &stmmac_dma_cap_fops);
 }
 
 static void stmmac_exit_fs(struct net_device *dev)
@@ -4065,6 +4187,79 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static u32 stmmac_vid_crc32_le(__le16 vid_le)
+{
+	unsigned char *data = (unsigned char *)&vid_le;
+	unsigned char data_byte = 0;
+	u32 crc = ~0x0;
+	u32 temp = 0;
+	int i, bits;
+
+	bits = get_bitmask_order(VLAN_VID_MASK);
+	for (i = 0; i < bits; i++) {
+		if ((i % 8) == 0)
+			data_byte = data[i / 8];
+
+		temp = ((crc & 1) ^ data_byte) & 1;
+		crc >>= 1;
+		data_byte >>= 1;
+
+		if (temp)
+			crc ^= 0xedb88320;
+	}
+
+	return crc;
+}
+
+static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double)
+{
+	u32 crc, hash = 0;
+	u16 vid;
+
+	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
+		__le16 vid_le = cpu_to_le16(vid);
+		crc = bitrev32(~stmmac_vid_crc32_le(vid_le)) >> 28;
+		hash |= (1 << crc);
+	}
+
+	return stmmac_update_vlan_hash(priv, priv->hw, hash, is_double);
+}
+
+static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	bool is_double = false;
+	int ret;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+	if (be16_to_cpu(proto) == ETH_P_8021AD)
+		is_double = true;
+
+	set_bit(vid, priv->active_vlans);
+	ret = stmmac_vlan_update(priv, is_double);
+	if (ret) {
+		clear_bit(vid, priv->active_vlans);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	bool is_double = false;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+	if (be16_to_cpu(proto) == ETH_P_8021AD)
+		is_double = true;
+
+	clear_bit(vid, priv->active_vlans);
+	return stmmac_vlan_update(priv, is_double);
+}
+
 static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_open = stmmac_open,
 	.ndo_start_xmit = stmmac_xmit,
@@ -4076,10 +4271,13 @@
 	.ndo_tx_timeout = stmmac_tx_timeout,
 	.ndo_do_ioctl = stmmac_ioctl,
 	.ndo_setup_tc = stmmac_setup_tc,
+	.ndo_select_queue = stmmac_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = stmmac_poll_controller,
 #endif
 	.ndo_set_mac_address = stmmac_set_mac_address,
+	.ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
@@ -4098,7 +4296,7 @@
 
 	set_bit(STMMAC_DOWN, &priv->state);
 	dev_close(priv->dev);
-	dev_open(priv->dev);
+	dev_open(priv->dev, NULL);
 	clear_bit(STMMAC_DOWN, &priv->state);
 	clear_bit(STMMAC_RESETING, &priv->state);
 	rtnl_unlock();
@@ -4148,6 +4346,12 @@
 		priv->plat->enh_desc = priv->dma_cap.enh_desc;
 		priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up;
 		priv->hw->pmt = priv->plat->pmt;
+		if (priv->dma_cap.hash_tb_sz) {
+			priv->hw->multicast_filter_bins =
+					(BIT(priv->dma_cap.hash_tb_sz) << 5);
+			priv->hw->mcast_bits_log2 =
+					ilog2(priv->hw->multicast_filter_bins);
+		}
 
 		/* TXCOE doesn't work in thresh DMA mode */
 		if (priv->plat->force_thresh_dma_mode)
@@ -4191,6 +4395,18 @@
 			return ret;
 	}
 
+	/* Rx Watchdog is available in the COREs newer than the 3.40.
+	 * In some case, for example on bugged HW this feature
+	 * has to be disable and this can be done by passing the
+	 * riwt_off field from the platform.
+	 */
+	if (((priv->synopsys_id >= DWMAC_CORE_3_50) ||
+	    (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) {
+		priv->use_riwt = 1;
+		dev_info(priv->device,
+			 "Enable RX Mitigation via HW Watchdog Timer\n");
+	}
+
 	return 0;
 }
 
@@ -4210,12 +4426,11 @@
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
-	u32 queue, maxq;
-	int ret = 0;
+	u32 queue, rxq, maxq;
+	int i, ret = 0;
 
-	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
-				  MTL_MAX_TX_QUEUES,
-				  MTL_MAX_RX_QUEUES);
+	ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
+				       MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES);
 	if (!ndev)
 		return -ENOMEM;
 
@@ -4235,7 +4450,7 @@
 	priv->wol_irq = res->wol_irq;
 	priv->lpi_irq = res->lpi_irq;
 
-	if (res->mac)
+	if (!IS_ERR_OR_NULL(res->mac))
 		memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN);
 
 	dev_set_drvdata(device, priv->dev);
@@ -4247,7 +4462,7 @@
 	priv->wq = create_singlethread_workqueue("stmmac_wq");
 	if (!priv->wq) {
 		dev_err(priv->device, "failed to create workqueue\n");
-		goto error_wq;
+		return -ENOMEM;
 	}
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
@@ -4273,6 +4488,8 @@
 	if (ret)
 		goto error_hw_init;
 
+	stmmac_check_ether_addr(priv);
+
 	/* Configure real RX and TX queues */
 	netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
 	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
@@ -4292,20 +4509,62 @@
 		priv->tso = true;
 		dev_info(priv->device, "TSO feature enabled\n");
 	}
+
+	if (priv->dma_cap.sphen) {
+		ndev->hw_features |= NETIF_F_GRO;
+		priv->sph = true;
+		dev_info(priv->device, "SPH feature enabled\n");
+	}
+
+	if (priv->dma_cap.addr64) {
+		ret = dma_set_mask_and_coherent(device,
+				DMA_BIT_MASK(priv->dma_cap.addr64));
+		if (!ret) {
+			dev_info(priv->device, "Using %d bits DMA width\n",
+				 priv->dma_cap.addr64);
+		} else {
+			ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
+			if (ret) {
+				dev_err(priv->device, "Failed to set DMA Mask\n");
+				goto error_hw_init;
+			}
+
+			priv->dma_cap.addr64 = 32;
+		}
+	}
+
 	ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
 	ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
 #ifdef STMMAC_VLAN_TAG_USED
 	/* Both mac100 and gmac support receive VLAN tag detection */
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
+	if (priv->dma_cap.vlhash) {
+		ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+		ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
+	}
+	if (priv->dma_cap.vlins) {
+		ndev->features |= NETIF_F_HW_VLAN_CTAG_TX;
+		if (priv->dma_cap.dvlan)
+			ndev->features |= NETIF_F_HW_VLAN_STAG_TX;
+	}
 #endif
 	priv->msg_enable = netif_msg_init(debug, default_msg_level);
 
+	/* Initialize RSS */
+	rxq = priv->plat->rx_queues_to_use;
+	netdev_rss_key_fill(priv->rss.key, sizeof(priv->rss.key));
+	for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+		priv->rss.table[i] = ethtool_rxfh_indir_default(i, rxq);
+
+	if (priv->dma_cap.rssen && priv->plat->rss_en)
+		ndev->features |= NETIF_F_RXHASH;
+
 	/* MTU range: 46 - hw-specific max */
 	ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
-	if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
-		ndev->max_mtu = JUMBO_LEN;
-	else if (priv->plat->has_xgmac)
+	if (priv->plat->has_xgmac)
 		ndev->max_mtu = XGMAC_JUMBO_LEN;
+	else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
+		ndev->max_mtu = JUMBO_LEN;
 	else
 		ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
 	/* Will not overwrite ndev->max_mtu if plat->maxmtu > ndev->max_mtu
@@ -4322,18 +4581,6 @@
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
-	/* Rx Watchdog is available in the COREs newer than the 3.40.
-	 * In some case, for example on bugged HW this feature
-	 * has to be disable and this can be done by passing the
-	 * riwt_off field from the platform.
-	 */
-	if (((priv->synopsys_id >= DWMAC_CORE_3_50) ||
-	    (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) {
-		priv->use_riwt = 1;
-		dev_info(priv->device,
-			 "Enable RX Mitigation via HW Watchdog Timer\n");
-	}
-
 	/* Setup channels NAPI */
 	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
 
@@ -4343,13 +4590,15 @@
 		ch->priv_data = priv;
 		ch->index = queue;
 
-		if (queue < priv->plat->rx_queues_to_use)
-			ch->has_rx = true;
-		if (queue < priv->plat->tx_queues_to_use)
-			ch->has_tx = true;
-
-		netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
-			       NAPI_POLL_WEIGHT);
+		if (queue < priv->plat->rx_queues_to_use) {
+			netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx,
+				       NAPI_POLL_WEIGHT);
+		}
+		if (queue < priv->plat->tx_queues_to_use) {
+			netif_tx_napi_add(ndev, &ch->tx_napi,
+					  stmmac_napi_poll_tx,
+					  NAPI_POLL_WEIGHT);
+		}
 	}
 
 	mutex_init(&priv->lock);
@@ -4360,10 +4609,10 @@
 	 * set the MDC clock dynamically according to the csr actual
 	 * clock input.
 	 */
-	if (!priv->plat->clk_csr)
-		stmmac_clk_csr_set(priv);
-	else
+	if (priv->plat->clk_csr >= 0)
 		priv->clk_csr = priv->plat->clk_csr;
+	else
+		stmmac_clk_csr_set(priv);
 
 	stmmac_check_pcs_mode(priv);
 
@@ -4380,6 +4629,12 @@
 		}
 	}
 
+	ret = stmmac_phy_setup(priv);
+	if (ret) {
+		netdev_err(ndev, "failed to setup phy (%d)\n", ret);
+		goto error_phy_setup;
+	}
+
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(priv->device, "%s: ERROR %i registering the device\n",
@@ -4388,15 +4643,14 @@
 	}
 
 #ifdef CONFIG_DEBUG_FS
-	ret = stmmac_init_fs(ndev);
-	if (ret < 0)
-		netdev_warn(priv->dev, "%s: failed debugFS registration\n",
-			    __func__);
+	stmmac_init_fs(ndev);
 #endif
 
 	return ret;
 
 error_netdev_register:
+	phylink_destroy(priv->phylink);
+error_phy_setup:
 	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
 	    priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
@@ -4405,12 +4659,13 @@
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
-		netif_napi_del(&ch->napi);
+		if (queue < priv->plat->rx_queues_to_use)
+			netif_napi_del(&ch->rx_napi);
+		if (queue < priv->plat->tx_queues_to_use)
+			netif_napi_del(&ch->tx_napi);
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
-error_wq:
-	free_netdev(ndev);
 
 	return ret;
 }
@@ -4437,6 +4692,7 @@
 	stmmac_mac_set(priv, priv->ioaddr, false);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
+	phylink_destroy(priv->phylink);
 	if (priv->plat->stmmac_rst)
 		reset_control_assert(priv->plat->stmmac_rst);
 	clk_disable_unprepare(priv->plat->pclk);
@@ -4447,7 +4703,6 @@
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
 	mutex_destroy(&priv->lock);
-	free_netdev(ndev);
 
 	return 0;
 }
@@ -4468,8 +4723,7 @@
 	if (!ndev || !netif_running(ndev))
 		return 0;
 
-	if (ndev->phydev)
-		phy_stop(ndev->phydev);
+	phylink_mac_change(priv->phylink, false);
 
 	mutex_lock(&priv->lock);
 
@@ -4486,17 +4740,23 @@
 		stmmac_pmt(priv, priv->hw, priv->wolopts);
 		priv->irq_wake = 1;
 	} else {
+		mutex_unlock(&priv->lock);
+		rtnl_lock();
+		phylink_stop(priv->phylink);
+		rtnl_unlock();
+		mutex_lock(&priv->lock);
+
 		stmmac_mac_set(priv, priv->ioaddr, false);
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
-		clk_disable(priv->plat->pclk);
-		clk_disable(priv->plat->stmmac_clk);
+		if (priv->plat->clk_ptp_ref)
+			clk_disable_unprepare(priv->plat->clk_ptp_ref);
+		clk_disable_unprepare(priv->plat->pclk);
+		clk_disable_unprepare(priv->plat->stmmac_clk);
 	}
 	mutex_unlock(&priv->lock);
 
-	priv->oldlink = false;
 	priv->speed = SPEED_UNKNOWN;
-	priv->oldduplex = DUPLEX_UNKNOWN;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(stmmac_suspend);
@@ -4555,8 +4815,10 @@
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
 		/* enable the clk previously disabled */
-		clk_enable(priv->plat->stmmac_clk);
-		clk_enable(priv->plat->pclk);
+		clk_prepare_enable(priv->plat->stmmac_clk);
+		clk_prepare_enable(priv->plat->pclk);
+		if (priv->plat->clk_ptp_ref)
+			clk_prepare_enable(priv->plat->clk_ptp_ref);
 		/* reset the phy so that it's ready */
 		if (priv->mii)
 			stmmac_mdio_reset(priv->mii);
@@ -4571,7 +4833,7 @@
 	stmmac_clear_descriptors(priv);
 
 	stmmac_hw_setup(ndev, false);
-	stmmac_init_tx_coalesce(priv);
+	stmmac_init_coalesce(priv);
 	stmmac_set_rx_mode(ndev);
 
 	stmmac_enable_all_queues(priv);
@@ -4580,8 +4842,13 @@
 
 	mutex_unlock(&priv->lock);
 
-	if (ndev->phydev)
-		phy_start(ndev->phydev);
+	if (!device_may_wakeup(priv->device)) {
+		rtnl_lock();
+		phylink_start(priv->phylink);
+		rtnl_unlock();
+	}
+
+	phylink_mac_change(priv->phylink, true);
 
 	return 0;
 }
@@ -4638,16 +4905,8 @@
 {
 #ifdef CONFIG_DEBUG_FS
 	/* Create debugfs main directory if it doesn't exist yet */
-	if (!stmmac_fs_dir) {
+	if (!stmmac_fs_dir)
 		stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
-
-		if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) {
-			pr_err("ERROR %s, debugfs create directory failed\n",
-			       STMMAC_RESOURCE_NAME);
-
-			return -ENOMEM;
-		}
-	}
 #endif
 
 	return 0;

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index bdd3515..40c4263 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

@@ -1,32 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   STMMAC Ethernet Driver -- MDIO bus implementation
   Provides Bus interface for MII registers
 
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Carl Shaw <carl.shaw@st.com>
   Maintainer: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#include <linux/gpio/consumer.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/mii.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 
 #include "dwxgmac2.h"
@@ -34,11 +24,14 @@
 
 #define MII_BUSY 0x00000001
 #define MII_WRITE 0x00000002
+#define MII_DATA_MASK GENMASK(15, 0)
 
 /* GMAC4 defines */
 #define MII_GMAC4_GOC_SHIFT		2
+#define MII_GMAC4_REG_ADDR_SHIFT	16
 #define MII_GMAC4_WRITE			(1 << MII_GMAC4_GOC_SHIFT)
 #define MII_GMAC4_READ			(3 << MII_GMAC4_GOC_SHIFT)
+#define MII_GMAC4_C45E			BIT(1)
 
 /* XGMAC defines */
 #define MII_XGMAC_SADDR			BIT(18)
@@ -165,22 +158,34 @@
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
-	u32 v;
-	int data;
 	u32 value = MII_BUSY;
+	int data = 0;
+	u32 v;
 
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
 	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
 		& priv->hw->mii.clk_csr_mask;
-	if (priv->plat->has_gmac4)
+	if (priv->plat->has_gmac4) {
 		value |= MII_GMAC4_READ;
+		if (phyreg & MII_ADDR_C45) {
+			value |= MII_GMAC4_C45E;
+			value &= ~priv->hw->mii.reg_mask;
+			value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) <<
+			       priv->hw->mii.reg_shift) &
+			       priv->hw->mii.reg_mask;
+
+			data |= (phyreg & MII_REGADDR_C45_MASK) <<
+				MII_GMAC4_REG_ADDR_SHIFT;
+		}
+	}
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
 			       100, 10000))
 		return -EBUSY;
 
+	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
@@ -188,7 +193,7 @@
 		return -EBUSY;
 
 	/* Read the data from the MII data register */
-	data = (int)readl(priv->ioaddr + mii_data);
+	data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK;
 
 	return data;
 }
@@ -208,8 +213,9 @@
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
-	u32 v;
 	u32 value = MII_BUSY;
+	int data = phydata;
+	u32 v;
 
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
@@ -217,10 +223,21 @@
 
 	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
 		& priv->hw->mii.clk_csr_mask;
-	if (priv->plat->has_gmac4)
+	if (priv->plat->has_gmac4) {
 		value |= MII_GMAC4_WRITE;
-	else
+		if (phyreg & MII_ADDR_C45) {
+			value |= MII_GMAC4_C45E;
+			value &= ~priv->hw->mii.reg_mask;
+			value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) <<
+			       priv->hw->mii.reg_shift) &
+			       priv->hw->mii.reg_mask;
+
+			data |= (phyreg & MII_REGADDR_C45_MASK) <<
+				MII_GMAC4_REG_ADDR_SHIFT;
+		}
+	} else {
 		value |= MII_WRITE;
+	}
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
@@ -228,7 +245,7 @@
 		return -EBUSY;
 
 	/* Set the MII address register to write */
-	writel(phydata, priv->ioaddr + mii_data);
+	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	/* Wait until any existing MII operation is complete */
@@ -247,50 +264,35 @@
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
-	struct stmmac_mdio_bus_data *data = priv->plat->mdio_bus_data;
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node) {
-		if (data->reset_gpio < 0) {
-			struct device_node *np = priv->device->of_node;
+		struct gpio_desc *reset_gpio;
+		u32 delays[3] = { 0, 0, 0 };
 
-			if (!np)
-				return 0;
+		reset_gpio = devm_gpiod_get_optional(priv->device,
+						     "snps,reset",
+						     GPIOD_OUT_LOW);
+		if (IS_ERR(reset_gpio))
+			return PTR_ERR(reset_gpio);
 
-			data->reset_gpio = of_get_named_gpio(np,
-						"snps,reset-gpio", 0);
-			if (data->reset_gpio < 0)
-				return 0;
+		device_property_read_u32_array(priv->device,
+					       "snps,reset-delays-us",
+					       delays, ARRAY_SIZE(delays));
 
-			data->active_low = of_property_read_bool(np,
-						"snps,reset-active-low");
-			of_property_read_u32_array(np,
-				"snps,reset-delays-us", data->delays, 3);
+		if (delays[0])
+			msleep(DIV_ROUND_UP(delays[0], 1000));
 
-			if (gpio_request(data->reset_gpio, "mdio-reset"))
-				return 0;
-		}
+		gpiod_set_value_cansleep(reset_gpio, 1);
+		if (delays[1])
+			msleep(DIV_ROUND_UP(delays[1], 1000));
 
-		gpio_direction_output(data->reset_gpio,
-				      data->active_low ? 1 : 0);
-		if (data->delays[0])
-			msleep(DIV_ROUND_UP(data->delays[0], 1000));
-
-		gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
-		if (data->delays[1])
-			msleep(DIV_ROUND_UP(data->delays[1], 1000));
-
-		gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
-		if (data->delays[2])
-			msleep(DIV_ROUND_UP(data->delays[2], 1000));
+		gpiod_set_value_cansleep(reset_gpio, 0);
+		if (delays[2])
+			msleep(DIV_ROUND_UP(delays[2], 1000));
 	}
 #endif
 
-	if (data->phy_reset) {
-		netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n");
-		data->phy_reset(priv->plat->bsp_priv);
-	}
-
 	/* This is a workaround for problems with the STE101P PHY.
 	 * It doesn't complete its reset until at least one clock cycle
 	 * on MDC, so perform a dummy mdio read. To be updated for GMAC4
@@ -327,11 +329,6 @@
 	if (mdio_bus_data->irqs)
 		memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq));
 
-#ifdef CONFIG_OF
-	if (priv->device->of_node)
-		mdio_bus_data->reset_gpio = -1;
-#endif
-
 	new_bus->name = "stmmac";
 
 	if (priv->plat->has_xgmac) {
@@ -351,7 +348,9 @@
 		max_addr = PHY_MAX_ADDR;
 	}
 
-	new_bus->reset = &stmmac_mdio_reset;
+	if (mdio_bus_data->needs_reset)
+		new_bus->reset = &stmmac_mdio_reset;
+
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
 		 new_bus->name, priv->plat->bus_id);
 	new_bus->priv = ndev;

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index c54a50d..292045f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c

@@ -1,24 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This contains the functions to handle the pci driver.
 
   Copyright (C) 2011-2012  Vayavya Labs Pvt Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Rayagond Kokatanur <rayagond@vayavyalabs.com>
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#include <linux/clk-provider.h>
 #include <linux/pci.h>
 #include <linux/dmi.h>
 
@@ -73,7 +64,7 @@
 	plat->has_gmac = 1;
 	plat->force_sf_dma_mode = 1;
 
-	plat->mdio_bus_data->phy_reset = NULL;
+	plat->mdio_bus_data->needs_reset = true;
 	plat->mdio_bus_data->phy_mask = 0;
 
 	/* Set default value for multicast hash bins */
@@ -118,6 +109,166 @@
 	.setup = stmmac_default_data,
 };
 
+static int intel_mgbe_common_data(struct pci_dev *pdev,
+				  struct plat_stmmacenet_data *plat)
+{
+	int i;
+
+	plat->clk_csr = 5;
+	plat->has_gmac = 0;
+	plat->has_gmac4 = 1;
+	plat->force_sf_dma_mode = 0;
+	plat->tso_en = 1;
+
+	plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+
+	for (i = 0; i < plat->rx_queues_to_use; i++) {
+		plat->rx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB;
+		plat->rx_queues_cfg[i].chan = i;
+
+		/* Disable Priority config by default */
+		plat->rx_queues_cfg[i].use_prio = false;
+
+		/* Disable RX queues routing by default */
+		plat->rx_queues_cfg[i].pkt_route = 0x0;
+	}
+
+	for (i = 0; i < plat->tx_queues_to_use; i++) {
+		plat->tx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB;
+
+		/* Disable Priority config by default */
+		plat->tx_queues_cfg[i].use_prio = false;
+	}
+
+	/* FIFO size is 4096 bytes for 1 tx/rx queue */
+	plat->tx_fifo_size = plat->tx_queues_to_use * 4096;
+	plat->rx_fifo_size = plat->rx_queues_to_use * 4096;
+
+	plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR;
+	plat->tx_queues_cfg[0].weight = 0x09;
+	plat->tx_queues_cfg[1].weight = 0x0A;
+	plat->tx_queues_cfg[2].weight = 0x0B;
+	plat->tx_queues_cfg[3].weight = 0x0C;
+	plat->tx_queues_cfg[4].weight = 0x0D;
+	plat->tx_queues_cfg[5].weight = 0x0E;
+	plat->tx_queues_cfg[6].weight = 0x0F;
+	plat->tx_queues_cfg[7].weight = 0x10;
+
+	plat->mdio_bus_data->phy_mask = 0;
+
+	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
+	plat->dma_cfg->fixed_burst = 0;
+	plat->dma_cfg->mixed_burst = 0;
+	plat->dma_cfg->aal = 0;
+
+	plat->axi = devm_kzalloc(&pdev->dev, sizeof(*plat->axi),
+				 GFP_KERNEL);
+	if (!plat->axi)
+		return -ENOMEM;
+
+	plat->axi->axi_lpi_en = 0;
+	plat->axi->axi_xit_frm = 0;
+	plat->axi->axi_wr_osr_lmt = 1;
+	plat->axi->axi_rd_osr_lmt = 1;
+	plat->axi->axi_blen[0] = 4;
+	plat->axi->axi_blen[1] = 8;
+	plat->axi->axi_blen[2] = 16;
+
+	plat->ptp_max_adj = plat->clk_ptp_rate;
+
+	/* Set system clock */
+	plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev,
+						   "stmmac-clk", NULL, 0,
+						   plat->clk_ptp_rate);
+
+	if (IS_ERR(plat->stmmac_clk)) {
+		dev_warn(&pdev->dev, "Fail to register stmmac-clk\n");
+		plat->stmmac_clk = NULL;
+	}
+	clk_prepare_enable(plat->stmmac_clk);
+
+	/* Set default value for multicast hash bins */
+	plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+	/* Set default value for unicast filter entries */
+	plat->unicast_filter_entries = 1;
+
+	/* Set the maxmtu to a default of JUMBO_LEN */
+	plat->maxmtu = JUMBO_LEN;
+
+	return 0;
+}
+
+static int ehl_common_data(struct pci_dev *pdev,
+			   struct plat_stmmacenet_data *plat)
+{
+	int ret;
+
+	plat->rx_queues_to_use = 8;
+	plat->tx_queues_to_use = 8;
+	plat->clk_ptp_rate = 200000000;
+	ret = intel_mgbe_common_data(pdev, plat);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ehl_sgmii_data(struct pci_dev *pdev,
+			  struct plat_stmmacenet_data *plat)
+{
+	plat->bus_id = 1;
+	plat->phy_addr = 0;
+	plat->interface = PHY_INTERFACE_MODE_SGMII;
+	return ehl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info ehl_sgmii1g_pci_info = {
+	.setup = ehl_sgmii_data,
+};
+
+static int ehl_rgmii_data(struct pci_dev *pdev,
+			  struct plat_stmmacenet_data *plat)
+{
+	plat->bus_id = 1;
+	plat->phy_addr = 0;
+	plat->interface = PHY_INTERFACE_MODE_RGMII;
+	return ehl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info ehl_rgmii1g_pci_info = {
+	.setup = ehl_rgmii_data,
+};
+
+static int tgl_common_data(struct pci_dev *pdev,
+			   struct plat_stmmacenet_data *plat)
+{
+	int ret;
+
+	plat->rx_queues_to_use = 6;
+	plat->tx_queues_to_use = 4;
+	plat->clk_ptp_rate = 200000000;
+	ret = intel_mgbe_common_data(pdev, plat);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int tgl_sgmii_data(struct pci_dev *pdev,
+			  struct plat_stmmacenet_data *plat)
+{
+	plat->bus_id = 1;
+	plat->phy_addr = 0;
+	plat->interface = PHY_INTERFACE_MODE_SGMII;
+	return tgl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info tgl_sgmii1g_pci_info = {
+	.setup = tgl_sgmii_data,
+};
+
 static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = {
 	{
 		.func = 6,
@@ -159,6 +310,12 @@
 		},
 		.driver_data = (void *)&galileo_stmmac_dmi_data,
 	},
+	/*
+	 * There are 2 types of SIMATIC IOT2000: IOT2020 and IOT2040.
+	 * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+	 * has only one pci network device while other asset tags are
+	 * for IOT2040 which has two.
+	 */
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +327,6 @@
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-			DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-					"6ES7647-0AA00-1YA2"),
 		},
 		.driver_data = (void *)&iot2040_stmmac_dmi_data,
 	},
@@ -204,7 +359,7 @@
 		ret = 1;
 	}
 
-	plat->bus_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
+	plat->bus_id = pci_dev_id(pdev);
 	plat->phy_addr = ret;
 	plat->interface = PHY_INTERFACE_MODE_RMII;
 
@@ -220,6 +375,75 @@
 	.setup = quark_default_data,
 };
 
+static int snps_gmac5_default_data(struct pci_dev *pdev,
+				   struct plat_stmmacenet_data *plat)
+{
+	int i;
+
+	plat->clk_csr = 5;
+	plat->has_gmac4 = 1;
+	plat->force_sf_dma_mode = 1;
+	plat->tso_en = 1;
+	plat->pmt = 1;
+
+	plat->mdio_bus_data->phy_mask = 0;
+
+	/* Set default value for multicast hash bins */
+	plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+	/* Set default value for unicast filter entries */
+	plat->unicast_filter_entries = 1;
+
+	/* Set the maxmtu to a default of JUMBO_LEN */
+	plat->maxmtu = JUMBO_LEN;
+
+	/* Set default number of RX and TX queues to use */
+	plat->tx_queues_to_use = 4;
+	plat->rx_queues_to_use = 4;
+
+	plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR;
+	for (i = 0; i < plat->tx_queues_to_use; i++) {
+		plat->tx_queues_cfg[i].use_prio = false;
+		plat->tx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB;
+		plat->tx_queues_cfg[i].weight = 25;
+	}
+
+	plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+	for (i = 0; i < plat->rx_queues_to_use; i++) {
+		plat->rx_queues_cfg[i].use_prio = false;
+		plat->rx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB;
+		plat->rx_queues_cfg[i].pkt_route = 0x0;
+		plat->rx_queues_cfg[i].chan = i;
+	}
+
+	plat->bus_id = 1;
+	plat->phy_addr = -1;
+	plat->interface = PHY_INTERFACE_MODE_GMII;
+
+	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
+
+	/* Axi Configuration */
+	plat->axi = devm_kzalloc(&pdev->dev, sizeof(*plat->axi), GFP_KERNEL);
+	if (!plat->axi)
+		return -ENOMEM;
+
+	plat->axi->axi_wr_osr_lmt = 31;
+	plat->axi->axi_rd_osr_lmt = 31;
+
+	plat->axi->axi_fb = false;
+	plat->axi->axi_blen[0] = 4;
+	plat->axi->axi_blen[1] = 8;
+	plat->axi->axi_blen[2] = 16;
+	plat->axi->axi_blen[3] = 32;
+
+	return 0;
+}
+
+static const struct stmmac_pci_info snps_gmac5_pci_info = {
+	.setup = snps_gmac5_default_data,
+};
+
 /**
  * stmmac_pci_probe
  *
@@ -299,7 +523,22 @@
  */
 static void stmmac_pci_remove(struct pci_dev *pdev)
 {
+	struct net_device *ndev = dev_get_drvdata(&pdev->dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int i;
+
 	stmmac_dvr_remove(&pdev->dev);
+
+	if (priv->plat->stmmac_clk)
+		clk_unregister_fixed_rate(priv->plat->stmmac_clk);
+
+	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+		if (pci_resource_len(pdev, i) == 0)
+			continue;
+		pcim_iounmap_regions(pdev, BIT(i));
+		break;
+	}
+
 	pci_disable_device(pdev);
 }
 
@@ -345,6 +584,10 @@
 
 #define STMMAC_QUARK_ID  0x0937
 #define STMMAC_DEVICE_ID 0x1108
+#define STMMAC_EHL_RGMII1G_ID	0x4b30
+#define STMMAC_EHL_SGMII1G_ID	0x4b31
+#define STMMAC_TGL_SGMII1G_ID	0xa0ac
+#define STMMAC_GMAC5_ID		0x7102
 
 #define STMMAC_DEVICE(vendor_id, dev_id, info)	{	\
 	PCI_VDEVICE(vendor_id, dev_id),			\
@@ -355,6 +598,10 @@
 	STMMAC_DEVICE(STMMAC, STMMAC_DEVICE_ID, stmmac_pci_info),
 	STMMAC_DEVICE(STMICRO, PCI_DEVICE_ID_STMICRO_MAC, stmmac_pci_info),
 	STMMAC_DEVICE(INTEL, STMMAC_QUARK_ID, quark_pci_info),
+	STMMAC_DEVICE(INTEL, STMMAC_EHL_RGMII1G_ID, ehl_rgmii1g_pci_info),
+	STMMAC_DEVICE(INTEL, STMMAC_EHL_SGMII1G_ID, ehl_sgmii1g_pci_info),
+	STMMAC_DEVICE(INTEL, STMMAC_TGL_SGMII1G_ID, tgl_sgmii1g_pci_info),
+	STMMAC_DEVICE(SYNOPSYS, STMMAC_GMAC5_ID, snps_gmac5_pci_info),
 	{}
 };
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
index eba41c2..aefc121 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h

@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * stmmac_pcs.h: Physical Coding Sublayer Header File
  *
  * Copyright (C) 2016 STMicroelectronics (R&D) Limited
  * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __STMMAC_PCS_H__

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 2b800ce..170c3a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   This contains the functions to handle the platform driver.
 
   Copyright (C) 2007-2011  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
@@ -33,6 +23,7 @@
 
 /**
  * dwmac1000_validate_mcast_bins - validates the number of Multicast filter bins
+ * @dev: struct device of the platform device
  * @mcast_bins: Multicast filtering bins
  * Description:
  * this function validates the number of Multicast filtering bins specified
@@ -43,7 +34,7 @@
  * invalid and will cause the filtering algorithm to use Multicast
  * promiscuous mode.
  */
-static int dwmac1000_validate_mcast_bins(int mcast_bins)
+static int dwmac1000_validate_mcast_bins(struct device *dev, int mcast_bins)
 {
 	int x = mcast_bins;
 
@@ -54,8 +45,8 @@
 		break;
 	default:
 		x = 0;
-		pr_info("Hash table entries set to unexpected value %d",
-			mcast_bins);
+		dev_info(dev, "Hash table entries set to unexpected value %d\n",
+			 mcast_bins);
 		break;
 	}
 	return x;
@@ -63,6 +54,7 @@
 
 /**
  * dwmac1000_validate_ucast_entries - validate the Unicast address entries
+ * @dev: struct device of the platform device
  * @ucast_entries: number of Unicast address entries
  * Description:
  * This function validates the number of Unicast address entries supported
@@ -72,7 +64,8 @@
  * selected, and defaults to 1 Unicast address if an unsupported
  * configuration is selected.
  */
-static int dwmac1000_validate_ucast_entries(int ucast_entries)
+static int dwmac1000_validate_ucast_entries(struct device *dev,
+					    int ucast_entries)
 {
 	int x = ucast_entries;
 
@@ -83,8 +76,8 @@
 		break;
 	default:
 		x = 1;
-		pr_info("Unicast table entries set to unexpected value %d\n",
-			ucast_entries);
+		dev_info(dev, "Unicast table entries set to unexpected value %d\n",
+			 ucast_entries);
 		break;
 	}
 	return x;
@@ -333,21 +326,6 @@
 		{},
 	};
 
-	/* If phy-handle property is passed from DT, use it as the PHY */
-	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
-	if (plat->phy_node)
-		dev_dbg(dev, "Found phy-handle subnode\n");
-
-	/* If phy-handle is not specified, check if we have a fixed-phy */
-	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
-		if ((of_phy_register_fixed_link(np) < 0))
-			return -ENODEV;
-
-		dev_dbg(dev, "Found fixed-link subnode\n");
-		plat->phy_node = of_node_get(np);
-		mdio = false;
-	}
-
 	if (of_match_node(need_mdio_ids, np)) {
 		plat->mdio_node = of_get_child_by_name(np, "mdio");
 	} else {
@@ -367,14 +345,46 @@
 		mdio = true;
 	}
 
-	if (mdio)
+	if (mdio) {
 		plat->mdio_bus_data =
 			devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data),
 				     GFP_KERNEL);
+		if (!plat->mdio_bus_data)
+			return -ENOMEM;
+
+		plat->mdio_bus_data->needs_reset = true;
+	}
+
 	return 0;
 }
 
 /**
+ * stmmac_of_get_mac_mode - retrieves the interface of the MAC
+ * @np - device-tree node
+ * Description:
+ * Similar to `of_get_phy_mode()`, this function will retrieve (from
+ * the device-tree) the interface mode on the MAC side. This assumes
+ * that there is mode converter in-between the MAC & PHY
+ * (e.g. GMII-to-RGMII).
+ */
+static int stmmac_of_get_mac_mode(struct device_node *np)
+{
+	const char *pm;
+	int err, i;
+
+	err = of_property_read_string(np, "mac-mode", &pm);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) {
+		if (!strcasecmp(pm, phy_modes(i)))
+			return i;
+	}
+
+	return -ENODEV;
+}
+
+/**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
  * @mac: MAC address to use
@@ -395,7 +405,27 @@
 		return ERR_PTR(-ENOMEM);
 
 	*mac = of_get_mac_address(np);
-	plat->interface = of_get_phy_mode(np);
+	if (IS_ERR(*mac)) {
+		if (PTR_ERR(*mac) == -EPROBE_DEFER)
+			return ERR_CAST(*mac);
+
+		*mac = NULL;
+	}
+
+	plat->phy_interface = of_get_phy_mode(np);
+	if (plat->phy_interface < 0)
+		return ERR_PTR(plat->phy_interface);
+
+	plat->interface = stmmac_of_get_mac_mode(np);
+	if (plat->interface < 0)
+		plat->interface = plat->phy_interface;
+
+	/* Some wrapper drivers still rely on phy_node. Let's save it while
+	 * they are not converted to phylink. */
+	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+
+	/* PHYLINK automatically parses the phy-handle property */
+	plat->phylink_node = np;
 
 	/* Get max speed of operation from device tree */
 	if (of_property_read_u32(np, "max-speed", &plat->max_speed))
@@ -408,6 +438,12 @@
 	/* Default to phy auto-detection */
 	plat->phy_addr = -1;
 
+	/* Default to get clk_csr from stmmac_clk_crs_set(),
+	 * or get clk_csr from device tree.
+	 */
+	plat->clk_csr = -1;
+	of_property_read_u32(np, "clk_csr", &plat->clk_csr);
+
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
@@ -462,9 +498,9 @@
 		of_property_read_u32(np, "snps,perfect-filter-entries",
 				     &plat->unicast_filter_entries);
 		plat->unicast_filter_entries = dwmac1000_validate_ucast_entries(
-					       plat->unicast_filter_entries);
+				&pdev->dev, plat->unicast_filter_entries);
 		plat->multicast_filter_bins = dwmac1000_validate_mcast_bins(
-					      plat->multicast_filter_bins);
+				&pdev->dev, plat->multicast_filter_bins);
 		plat->has_gmac = 1;
 		plat->pmt = 1;
 	}
@@ -513,7 +549,8 @@
 	plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
 	if (plat->force_thresh_dma_mode) {
 		plat->force_sf_dma_mode = 0;
-		pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
+		dev_warn(&pdev->dev,
+			 "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n");
 	}
 
 	of_property_read_u32(np, "snps,ps-speed", &plat->mac_port_sel_speed);
@@ -527,13 +564,15 @@
 	}
 
 	/* clock setup */
-	plat->stmmac_clk = devm_clk_get(&pdev->dev,
-					STMMAC_RESOURCE_NAME);
-	if (IS_ERR(plat->stmmac_clk)) {
-		dev_warn(&pdev->dev, "Cannot get CSR clock\n");
-		plat->stmmac_clk = NULL;
+	if (!of_device_is_compatible(np, "snps,dwc-qos-ethernet-4.10")) {
+		plat->stmmac_clk = devm_clk_get(&pdev->dev,
+						STMMAC_RESOURCE_NAME);
+		if (IS_ERR(plat->stmmac_clk)) {
+			dev_warn(&pdev->dev, "Cannot get CSR clock\n");
+			plat->stmmac_clk = NULL;
+		}
+		clk_prepare_enable(plat->stmmac_clk);
 	}
-	clk_prepare_enable(plat->stmmac_clk);
 
 	plat->pclk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(plat->pclk)) {
@@ -585,10 +624,6 @@
 void stmmac_remove_config_dt(struct platform_device *pdev,
 			     struct plat_stmmacenet_data *plat)
 {
-	struct device_node *np = pdev->dev.of_node;
-
-	if (of_phy_is_fixed_link(np))
-		of_phy_deregister_fixed_link(np);
 	of_node_put(plat->phy_node);
 	of_node_put(plat->mdio_node);
 }
@@ -618,13 +653,8 @@
 	 * probe if needed before we went too far with resource allocation.
 	 */
 	stmmac_res->irq = platform_get_irq_byname(pdev, "macirq");
-	if (stmmac_res->irq < 0) {
-		if (stmmac_res->irq != -EPROBE_DEFER) {
-			dev_err(&pdev->dev,
-				"MAC IRQ configuration information not found\n");
-		}
+	if (stmmac_res->irq < 0)
 		return stmmac_res->irq;
-	}
 
 	/* On some platforms e.g. SPEAr the wake up irq differs from the mac irq
 	 * The external wake up irq can be passed through the platform code

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index b72eb0d..3a4663b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h

@@ -1,17 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*******************************************************************************
   Copyright (C) 2007-2009  STMicroelectronics Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 2293e21..0989e2b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c

@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*******************************************************************************
   PTP 1588 clock using the STMMAC.
 
   Copyright (C) 2013  Vayavya Labs Pvt Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Rayagond Kokatanur <rayagond@vayavyalabs.com>
 *******************************************************************************/
@@ -105,7 +95,7 @@
 	struct stmmac_priv *priv =
 	    container_of(ptp, struct stmmac_priv, ptp_clock_ops);
 	unsigned long flags;
-	u64 ns;
+	u64 ns = 0;
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_get_systime(priv, priv->ptpaddr, &ns);
@@ -150,6 +140,10 @@
 
 	switch (rq->type) {
 	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
 		cfg = &priv->pps[rq->perout.index];
 
 		cfg->start.tv_sec = rq->perout.start.sec;
@@ -174,7 +168,7 @@
 /* structure describing a PTP hardware clock */
 static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.owner = THIS_MODULE,
-	.name = "stmmac_ptp_clock",
+	.name = "stmmac ptp",
 	.max_adj = 62500000,
 	.n_alarm = 0,
 	.n_ext_ts = 0,
@@ -204,6 +198,9 @@
 		priv->pps[i].available = true;
 	}
 
+	if (priv->plat->ptp_max_adj)
+		stmmac_ptp_clock_ops.max_adj = priv->plat->ptp_max_adj;
+
 	stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
 
 	spin_lock_init(&priv->ptp_lock);

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index ecccf89..7abb1d4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h

@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /******************************************************************************
   PTP Header file
 
   Copyright (C) 2013  Vayavya Labs Pvt Ltd
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
 
   Author: Rayagond Kokatanur <rayagond@vayavyalabs.com>
 ******************************************************************************/
@@ -59,9 +49,14 @@
 #define	PTP_TCR_TSEVNTENA	BIT(14)
 /* Enable Snapshot for Messages Relevant to Master */
 #define	PTP_TCR_TSMSTRENA	BIT(15)
-/* Select PTP packets for Taking Snapshots */
+/* Select PTP packets for Taking Snapshots
+ * On gmac4 specifically:
+ * Enable SYNC, Pdelay_Req, Pdelay_Resp when TSEVNTENA is enabled.
+ * or
+ * Enable  SYNC, Follow_Up, Delay_Req, Delay_Resp, Pdelay_Req, Pdelay_Resp,
+ * Pdelay_Resp_Follow_Up if TSEVNTENA is disabled
+ */
 #define	PTP_TCR_SNAPTYPSEL_1	BIT(16)
-#define	PTP_GMAC4_TCR_SNAPTYPSEL_1	GENMASK(17, 16)
 /* Enable MAC address for PTP Frame Filtering */
 #define	PTP_TCR_TSENMACADDR	BIT(18)
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
new file mode 100644
index 0000000..ac3f658
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c

@@ -0,0 +1,1922 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates.
+ * stmmac Selftests Support
+ *
+ * Author: Jose Abreu <joabreu@synopsys.com>
+ */
+
+#include <linux/bitrev.h>
+#include <linux/completion.h>
+#include <linux/crc32.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/phy.h>
+#include <linux/udp.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include "stmmac.h"
+
+struct stmmachdr {
+	__be32 version;
+	__be64 magic;
+	u8 id;
+} __packed;
+
+#define STMMAC_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			      sizeof(struct stmmachdr))
+#define STMMAC_TEST_PKT_MAGIC	0xdeadcafecafedeadULL
+#define STMMAC_LB_TIMEOUT	msecs_to_jiffies(200)
+
+struct stmmac_packet_attrs {
+	int vlan;
+	int vlan_id_in;
+	int vlan_id_out;
+	unsigned char *src;
+	unsigned char *dst;
+	u32 ip_src;
+	u32 ip_dst;
+	int tcp;
+	int sport;
+	int dport;
+	u32 exp_hash;
+	int dont_wait;
+	int timeout;
+	int size;
+	int max_size;
+	int remove_sa;
+	u8 id;
+	int sarc;
+	u16 queue_mapping;
+};
+
+static u8 stmmac_test_next_id;
+
+static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
+					       struct stmmac_packet_attrs *attr)
+{
+	struct sk_buff *skb = NULL;
+	struct udphdr *uhdr = NULL;
+	struct tcphdr *thdr = NULL;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct iphdr *ihdr;
+	int iplen, size;
+
+	size = attr->size + STMMAC_TEST_PKT_SIZE;
+	if (attr->vlan) {
+		size += 4;
+		if (attr->vlan > 1)
+			size += 4;
+	}
+
+	if (attr->tcp)
+		size += sizeof(struct tcphdr);
+	else
+		size += sizeof(struct udphdr);
+
+	if (attr->max_size && (attr->max_size > size))
+		size = attr->max_size;
+
+	skb = netdev_alloc_skb_ip_align(priv->dev, size);
+	if (!skb)
+		return NULL;
+
+	prefetchw(skb->data);
+
+	if (attr->vlan > 1)
+		ehdr = skb_push(skb, ETH_HLEN + 8);
+	else if (attr->vlan)
+		ehdr = skb_push(skb, ETH_HLEN + 4);
+	else if (attr->remove_sa)
+		ehdr = skb_push(skb, ETH_HLEN - 6);
+	else
+		ehdr = skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	skb_set_network_header(skb, skb->len);
+	ihdr = skb_put(skb, sizeof(*ihdr));
+
+	skb_set_transport_header(skb, skb->len);
+	if (attr->tcp)
+		thdr = skb_put(skb, sizeof(*thdr));
+	else
+		uhdr = skb_put(skb, sizeof(*uhdr));
+
+	if (!attr->remove_sa)
+		eth_zero_addr(ehdr->h_source);
+	eth_zero_addr(ehdr->h_dest);
+	if (attr->src && !attr->remove_sa)
+		ether_addr_copy(ehdr->h_source, attr->src);
+	if (attr->dst)
+		ether_addr_copy(ehdr->h_dest, attr->dst);
+
+	if (!attr->remove_sa) {
+		ehdr->h_proto = htons(ETH_P_IP);
+	} else {
+		__be16 *ptr = (__be16 *)ehdr;
+
+		/* HACK */
+		ptr[3] = htons(ETH_P_IP);
+	}
+
+	if (attr->vlan) {
+		__be16 *tag, *proto;
+
+		if (!attr->remove_sa) {
+			tag = (void *)ehdr + ETH_HLEN;
+			proto = (void *)ehdr + (2 * ETH_ALEN);
+		} else {
+			tag = (void *)ehdr + ETH_HLEN - 6;
+			proto = (void *)ehdr + ETH_ALEN;
+		}
+
+		proto[0] = htons(ETH_P_8021Q);
+		tag[0] = htons(attr->vlan_id_out);
+		tag[1] = htons(ETH_P_IP);
+		if (attr->vlan > 1) {
+			proto[0] = htons(ETH_P_8021AD);
+			tag[1] = htons(ETH_P_8021Q);
+			tag[2] = htons(attr->vlan_id_in);
+			tag[3] = htons(ETH_P_IP);
+		}
+	}
+
+	if (attr->tcp) {
+		thdr->source = htons(attr->sport);
+		thdr->dest = htons(attr->dport);
+		thdr->doff = sizeof(struct tcphdr) / 4;
+		thdr->check = 0;
+	} else {
+		uhdr->source = htons(attr->sport);
+		uhdr->dest = htons(attr->dport);
+		uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+		if (attr->max_size)
+			uhdr->len = htons(attr->max_size -
+					  (sizeof(*ihdr) + sizeof(*ehdr)));
+		uhdr->check = 0;
+	}
+
+	ihdr->ihl = 5;
+	ihdr->ttl = 32;
+	ihdr->version = 4;
+	if (attr->tcp)
+		ihdr->protocol = IPPROTO_TCP;
+	else
+		ihdr->protocol = IPPROTO_UDP;
+	iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size;
+	if (attr->tcp)
+		iplen += sizeof(*thdr);
+	else
+		iplen += sizeof(*uhdr);
+
+	if (attr->max_size)
+		iplen = attr->max_size - sizeof(*ehdr);
+
+	ihdr->tot_len = htons(iplen);
+	ihdr->frag_off = 0;
+	ihdr->saddr = htonl(attr->ip_src);
+	ihdr->daddr = htonl(attr->ip_dst);
+	ihdr->tos = 0;
+	ihdr->id = 0;
+	ip_send_check(ihdr);
+
+	shdr = skb_put(skb, sizeof(*shdr));
+	shdr->version = 0;
+	shdr->magic = cpu_to_be64(STMMAC_TEST_PKT_MAGIC);
+	attr->id = stmmac_test_next_id;
+	shdr->id = stmmac_test_next_id++;
+
+	if (attr->size)
+		skb_put(skb, attr->size);
+	if (attr->max_size && (attr->max_size > skb->len))
+		skb_put(skb, attr->max_size - skb->len);
+
+	skb->csum = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	if (attr->tcp) {
+		thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, ihdr->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct tcphdr, check);
+	} else {
+		udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->dev;
+
+	return skb;
+}
+
+static struct sk_buff *stmmac_test_get_arp_skb(struct stmmac_priv *priv,
+					       struct stmmac_packet_attrs *attr)
+{
+	__be32 ip_src = htonl(attr->ip_src);
+	__be32 ip_dst = htonl(attr->ip_dst);
+	struct sk_buff *skb = NULL;
+
+	skb = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip_dst, priv->dev, ip_src,
+			 NULL, attr->src, attr->dst);
+	if (!skb)
+		return NULL;
+
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->dev;
+
+	return skb;
+}
+
+struct stmmac_test_priv {
+	struct stmmac_packet_attrs *packet;
+	struct packet_type pt;
+	struct completion comp;
+	int double_vlan;
+	int vlan_id;
+	int ok;
+};
+
+static int stmmac_test_loopback_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct tcphdr *thdr;
+	struct iphdr *ihdr;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (tpriv->packet->dst) {
+		if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+			goto out;
+	}
+	if (tpriv->packet->sarc) {
+		if (!ether_addr_equal(ehdr->h_source, ehdr->h_dest))
+			goto out;
+	} else if (tpriv->packet->src) {
+		if (!ether_addr_equal(ehdr->h_source, tpriv->packet->src))
+			goto out;
+	}
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+
+	if (tpriv->packet->tcp) {
+		if (ihdr->protocol != IPPROTO_TCP)
+			goto out;
+
+		thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (thdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)thdr + sizeof(*thdr));
+	} else {
+		if (ihdr->protocol != IPPROTO_UDP)
+			goto out;
+
+		uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (uhdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+	}
+
+	if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+		goto out;
+	if (tpriv->packet->exp_hash && !skb->hash)
+		goto out;
+	if (tpriv->packet->id != shdr->id)
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int __stmmac_test_loopback(struct stmmac_priv *priv,
+				  struct stmmac_packet_attrs *attr)
+{
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_loopback_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = attr;
+
+	if (!attr->dont_wait)
+		dev_add_pack(&tpriv->pt);
+
+	skb = stmmac_test_get_udp_skb(priv, attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	skb_set_queue_mapping(skb, attr->queue_mapping);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto cleanup;
+
+	if (attr->dont_wait)
+		goto cleanup;
+
+	if (!attr->timeout)
+		attr->timeout = STMMAC_LB_TIMEOUT;
+
+	wait_for_completion_timeout(&tpriv->comp, attr->timeout);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup:
+	if (!attr->dont_wait)
+		dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_mac_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+
+	attr.dst = priv->dev->dev_addr;
+	return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_phy_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dev->phydev)
+		return -EBUSY;
+
+	ret = phy_loopback(priv->dev->phydev, true);
+	if (ret)
+		return ret;
+
+	attr.dst = priv->dev->dev_addr;
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	phy_loopback(priv->dev->phydev, false);
+	return ret;
+}
+
+static int stmmac_test_mmc(struct stmmac_priv *priv)
+{
+	struct stmmac_counters initial, final;
+	int ret;
+
+	memset(&initial, 0, sizeof(initial));
+	memset(&final, 0, sizeof(final));
+
+	if (!priv->dma_cap.rmon)
+		return -EOPNOTSUPP;
+
+	/* Save previous results into internal struct */
+	stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		return ret;
+
+	/* These will be loopback results so no need to save them */
+	stmmac_mmc_read(priv, priv->mmcaddr, &final);
+
+	/*
+	 * The number of MMC counters available depends on HW configuration
+	 * so we just use this one to validate the feature. I hope there is
+	 * not a version without this counter.
+	 */
+	if (final.mmc_tx_framecount_g <= initial.mmc_tx_framecount_g)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stmmac_test_eee(struct stmmac_priv *priv)
+{
+	struct stmmac_extra_stats *initial, *final;
+	int retries = 10;
+	int ret;
+
+	if (!priv->dma_cap.eee || !priv->eee_active)
+		return -EOPNOTSUPP;
+
+	initial = kzalloc(sizeof(*initial), GFP_KERNEL);
+	if (!initial)
+		return -ENOMEM;
+
+	final = kzalloc(sizeof(*final), GFP_KERNEL);
+	if (!final) {
+		ret = -ENOMEM;
+		goto out_free_initial;
+	}
+
+	memcpy(initial, &priv->xstats, sizeof(*initial));
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		goto out_free_final;
+
+	/* We have no traffic in the line so, sooner or later it will go LPI */
+	while (--retries) {
+		memcpy(final, &priv->xstats, sizeof(*final));
+
+		if (final->irq_tx_path_in_lpi_mode_n >
+		    initial->irq_tx_path_in_lpi_mode_n)
+			break;
+		msleep(100);
+	}
+
+	if (!retries) {
+		ret = -ETIMEDOUT;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_in_lpi_mode_n <=
+	    initial->irq_tx_path_in_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_exit_lpi_mode_n <=
+	    initial->irq_tx_path_exit_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+out_free_final:
+	kfree(final);
+out_free_initial:
+	kfree(initial);
+	return ret;
+}
+
+static int stmmac_filter_check(struct stmmac_priv *priv)
+{
+	if (!(priv->dev->flags & IFF_PROMISC))
+		return 0;
+
+	netdev_warn(priv->dev, "Test can't be run in promiscuous mode!\n");
+	return -EOPNOTSUPP;
+}
+
+static bool stmmac_hash_check(struct stmmac_priv *priv, unsigned char *addr)
+{
+	int mc_offset = 32 - priv->hw->mcast_bits_log2;
+	struct netdev_hw_addr *ha;
+	u32 hash, hash_nr;
+
+	/* First compute the hash for desired addr */
+	hash = bitrev32(~crc32_le(~0, addr, 6)) >> mc_offset;
+	hash_nr = hash >> 5;
+	hash = 1 << (hash & 0x1f);
+
+	/* Now, check if it collides with any existing one */
+	netdev_for_each_mc_addr(ha, priv->dev) {
+		u32 nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> mc_offset;
+		if (((nr >> 5) == hash_nr) && ((1 << (nr & 0x1f)) == hash))
+			return false;
+	}
+
+	/* No collisions, address is good to go */
+	return true;
+}
+
+static bool stmmac_perfect_check(struct stmmac_priv *priv, unsigned char *addr)
+{
+	struct netdev_hw_addr *ha;
+
+	/* Check if it collides with any existing one */
+	netdev_for_each_uc_addr(ha, priv->dev) {
+		if (!memcmp(ha->addr, addr, ETH_ALEN))
+			return false;
+	}
+
+	/* No collisions, address is good to go */
+	return true;
+}
+
+static int stmmac_test_hfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0xf1, 0xee, 0xdd, 0xcc, 0xbb, 0xaa};
+	unsigned char bd_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	int ret, tries = 256;
+
+	ret = stmmac_filter_check(priv);
+	if (ret)
+		return ret;
+
+	if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+		return -EOPNOTSUPP;
+
+	while (--tries) {
+		/* We only need to check the bd_addr for collisions */
+		bd_addr[ETH_ALEN - 1] = tries;
+		if (stmmac_hash_check(priv, bd_addr))
+			break;
+	}
+
+	if (!tries)
+		return -EOPNOTSUPP;
+
+	ret = dev_mc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+cleanup:
+	dev_mc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_test_pfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0xf0, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char bd_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	int ret, tries = 256;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+	if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
+		return -EOPNOTSUPP;
+
+	while (--tries) {
+		/* We only need to check the bd_addr for collisions */
+		bd_addr[ETH_ALEN - 1] = tries;
+		if (stmmac_perfect_check(priv, bd_addr))
+			break;
+	}
+
+	if (!tries)
+		return -EOPNOTSUPP;
+
+	ret = dev_uc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+cleanup:
+	dev_uc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_test_mcfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
+	unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	int ret, tries = 256;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+	if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
+		return -EOPNOTSUPP;
+
+	while (--tries) {
+		/* We only need to check the mc_addr for collisions */
+		mc_addr[ETH_ALEN - 1] = tries;
+		if (stmmac_hash_check(priv, mc_addr))
+			break;
+	}
+
+	if (!tries)
+		return -EOPNOTSUPP;
+
+	ret = dev_uc_add(priv->dev, uc_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = uc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = mc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+cleanup:
+	dev_uc_del(priv->dev, uc_addr);
+	return ret;
+}
+
+static int stmmac_test_ucfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
+	unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	int ret, tries = 256;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+	if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+		return -EOPNOTSUPP;
+
+	while (--tries) {
+		/* We only need to check the uc_addr for collisions */
+		uc_addr[ETH_ALEN - 1] = tries;
+		if (stmmac_perfect_check(priv, uc_addr))
+			break;
+	}
+
+	if (!tries)
+		return -EOPNOTSUPP;
+
+	ret = dev_mc_add(priv->dev, mc_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = mc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = uc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+cleanup:
+	dev_mc_del(priv->dev, mc_addr);
+	return ret;
+}
+
+static int stmmac_test_flowctrl_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct ethhdr *ehdr;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+		goto out;
+	if (ehdr->h_proto != htons(ETH_P_PAUSE))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_flowctrl(struct stmmac_priv *priv)
+{
+	unsigned char paddr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x01};
+	struct phy_device *phydev = priv->dev->phydev;
+	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	struct stmmac_test_priv *tpriv;
+	unsigned int pkt_count;
+	int i, ret = 0;
+
+	if (!phydev || (!phydev->pause && !phydev->asym_pause))
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+	tpriv->pt.type = htons(ETH_P_PAUSE);
+	tpriv->pt.func = stmmac_test_flowctrl_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	dev_add_pack(&tpriv->pt);
+
+	/* Compute minimum number of packets to make FIFO full */
+	pkt_count = priv->plat->rx_fifo_size;
+	if (!pkt_count)
+		pkt_count = priv->dma_cap.rx_fifo_size;
+	pkt_count /= 1400;
+	pkt_count *= 2;
+
+	for (i = 0; i < rx_cnt; i++)
+		stmmac_stop_rx(priv, priv->ioaddr, i);
+
+	ret = dev_set_promiscuity(priv->dev, 1);
+	if (ret)
+		goto cleanup;
+
+	ret = dev_mc_add(priv->dev, paddr);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < pkt_count; i++) {
+		struct stmmac_packet_attrs attr = { };
+
+		attr.dst = priv->dev->dev_addr;
+		attr.dont_wait = true;
+		attr.size = 1400;
+
+		ret = __stmmac_test_loopback(priv, &attr);
+		if (ret)
+			goto cleanup;
+		if (tpriv->ok)
+			break;
+	}
+
+	/* Wait for some time in case RX Watchdog is enabled */
+	msleep(200);
+
+	for (i = 0; i < rx_cnt; i++) {
+		struct stmmac_channel *ch = &priv->channel[i];
+		u32 tail;
+
+		tail = priv->rx_queue[i].dma_rx_phy +
+			(DMA_RX_SIZE * sizeof(struct dma_desc));
+
+		stmmac_set_rx_tail_ptr(priv, priv->ioaddr, tail, i);
+		stmmac_start_rx(priv, priv->ioaddr, i);
+
+		local_bh_disable();
+		napi_reschedule(&ch->rx_napi);
+		local_bh_enable();
+	}
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup:
+	dev_mc_del(priv->dev, paddr);
+	dev_set_promiscuity(priv->dev, -1);
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_rss(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+
+	if (!priv->dma_cap.rssen || !priv->rss.enable)
+		return -EOPNOTSUPP;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.exp_hash = true;
+	attr.sport = 0x321;
+	attr.dport = 0x123;
+
+	return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_vlan_validate(struct sk_buff *skb,
+				     struct net_device *ndev,
+				     struct packet_type *pt,
+				     struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct iphdr *ihdr;
+	u16 proto;
+
+	proto = tpriv->double_vlan ? ETH_P_8021AD : ETH_P_8021Q;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+	if (tpriv->vlan_id) {
+		if (skb->vlan_proto != htons(proto))
+			goto out;
+		if (skb->vlan_tci != tpriv->vlan_id)
+			goto out;
+	}
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+		goto out;
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+	if (ihdr->protocol != IPPROTO_UDP)
+		goto out;
+
+	uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+	if (uhdr->dest != htons(tpriv->packet->dport))
+		goto out;
+
+	shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+	if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0, i;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_vlan_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+
+	/*
+	 * As we use HASH filtering, false positives may appear. This is a
+	 * specially chosen ID so that adjacent IDs (+4) have different
+	 * HASH values.
+	 */
+	tpriv->vlan_id = 0x123;
+	dev_add_pack(&tpriv->pt);
+
+	ret = vlan_vid_add(priv->dev, htons(ETH_P_8021Q), tpriv->vlan_id);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < 4; i++) {
+		attr.vlan = 1;
+		attr.vlan_id_out = tpriv->vlan_id + i;
+		attr.dst = priv->dev->dev_addr;
+		attr.sport = 9;
+		attr.dport = 9;
+
+		skb = stmmac_test_get_udp_skb(priv, &attr);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto vlan_del;
+		}
+
+		skb_set_queue_mapping(skb, 0);
+		ret = dev_queue_xmit(skb);
+		if (ret)
+			goto vlan_del;
+
+		wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+		ret = tpriv->ok ? 0 : -ETIMEDOUT;
+		if (ret && !i) {
+			goto vlan_del;
+		} else if (!ret && i) {
+			ret = -EINVAL;
+			goto vlan_del;
+		} else {
+			ret = 0;
+		}
+
+		tpriv->ok = false;
+	}
+
+vlan_del:
+	vlan_vid_del(priv->dev, htons(ETH_P_8021Q), tpriv->vlan_id);
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0, i;
+
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	tpriv->double_vlan = true;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_8021Q);
+	tpriv->pt.func = stmmac_test_vlan_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+
+	/*
+	 * As we use HASH filtering, false positives may appear. This is a
+	 * specially chosen ID so that adjacent IDs (+4) have different
+	 * HASH values.
+	 */
+	tpriv->vlan_id = 0x123;
+	dev_add_pack(&tpriv->pt);
+
+	ret = vlan_vid_add(priv->dev, htons(ETH_P_8021AD), tpriv->vlan_id);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < 4; i++) {
+		attr.vlan = 2;
+		attr.vlan_id_out = tpriv->vlan_id + i;
+		attr.dst = priv->dev->dev_addr;
+		attr.sport = 9;
+		attr.dport = 9;
+
+		skb = stmmac_test_get_udp_skb(priv, &attr);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto vlan_del;
+		}
+
+		skb_set_queue_mapping(skb, 0);
+		ret = dev_queue_xmit(skb);
+		if (ret)
+			goto vlan_del;
+
+		wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+		ret = tpriv->ok ? 0 : -ETIMEDOUT;
+		if (ret && !i) {
+			goto vlan_del;
+		} else if (!ret && i) {
+			ret = -EINVAL;
+			goto vlan_del;
+		} else {
+			ret = 0;
+		}
+
+		tpriv->ok = false;
+	}
+
+vlan_del:
+	vlan_vid_del(priv->dev, htons(ETH_P_8021AD), tpriv->vlan_id);
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int stmmac_test_rxp(struct stmmac_priv *priv)
+{
+	unsigned char addr[ETH_ALEN] = {0xde, 0xad, 0xbe, 0xef, 0x00, 0x00};
+	struct tc_cls_u32_offload cls_u32 = { };
+	struct stmmac_packet_attrs attr = { };
+	struct tc_action **actions, *act;
+	struct tc_u32_sel *sel;
+	struct tcf_exts *exts;
+	int ret, i, nk = 1;
+
+	if (!tc_can_offload(priv->dev))
+		return -EOPNOTSUPP;
+	if (!priv->dma_cap.frpsel)
+		return -EOPNOTSUPP;
+
+	sel = kzalloc(sizeof(*sel) + nk * sizeof(struct tc_u32_key), GFP_KERNEL);
+	if (!sel)
+		return -ENOMEM;
+
+	exts = kzalloc(sizeof(*exts), GFP_KERNEL);
+	if (!exts) {
+		ret = -ENOMEM;
+		goto cleanup_sel;
+	}
+
+	actions = kzalloc(nk * sizeof(*actions), GFP_KERNEL);
+	if (!actions) {
+		ret = -ENOMEM;
+		goto cleanup_exts;
+	}
+
+	act = kzalloc(nk * sizeof(*act), GFP_KERNEL);
+	if (!act) {
+		ret = -ENOMEM;
+		goto cleanup_actions;
+	}
+
+	cls_u32.command = TC_CLSU32_NEW_KNODE;
+	cls_u32.common.chain_index = 0;
+	cls_u32.common.protocol = htons(ETH_P_ALL);
+	cls_u32.knode.exts = exts;
+	cls_u32.knode.sel = sel;
+	cls_u32.knode.handle = 0x123;
+
+	exts->nr_actions = nk;
+	exts->actions = actions;
+	for (i = 0; i < nk; i++) {
+		struct tcf_gact *gact = to_gact(&act[i]);
+
+		actions[i] = &act[i];
+		gact->tcf_action = TC_ACT_SHOT;
+	}
+
+	sel->nkeys = nk;
+	sel->offshift = 0;
+	sel->keys[0].off = 6;
+	sel->keys[0].val = htonl(0xdeadbeef);
+	sel->keys[0].mask = ~0x0;
+
+	ret = stmmac_tc_setup_cls_u32(priv, priv, &cls_u32);
+	if (ret)
+		goto cleanup_act;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.src = addr;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL; /* Shall NOT receive packet */
+
+	cls_u32.command = TC_CLSU32_DELETE_KNODE;
+	stmmac_tc_setup_cls_u32(priv, priv, &cls_u32);
+
+cleanup_act:
+	kfree(act);
+cleanup_actions:
+	kfree(actions);
+cleanup_exts:
+	kfree(exts);
+cleanup_sel:
+	kfree(sel);
+	return ret;
+}
+#else
+static int stmmac_test_rxp(struct stmmac_priv *priv)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_desc_sai(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dma_cap.vlins)
+		return -EOPNOTSUPP;
+
+	attr.remove_sa = true;
+	attr.sarc = true;
+	attr.src = src;
+	attr.dst = priv->dev->dev_addr;
+
+	priv->sarc_type = 0x1;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	priv->sarc_type = 0x0;
+	return ret;
+}
+
+static int stmmac_test_desc_sar(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dma_cap.vlins)
+		return -EOPNOTSUPP;
+
+	attr.sarc = true;
+	attr.src = src;
+	attr.dst = priv->dev->dev_addr;
+
+	priv->sarc_type = 0x2;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	priv->sarc_type = 0x0;
+	return ret;
+}
+
+static int stmmac_test_reg_sai(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dma_cap.vlins)
+		return -EOPNOTSUPP;
+
+	attr.remove_sa = true;
+	attr.sarc = true;
+	attr.src = src;
+	attr.dst = priv->dev->dev_addr;
+
+	if (stmmac_sarc_configure(priv, priv->ioaddr, 0x2))
+		return -EOPNOTSUPP;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	stmmac_sarc_configure(priv, priv->ioaddr, 0x0);
+	return ret;
+}
+
+static int stmmac_test_reg_sar(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dma_cap.vlins)
+		return -EOPNOTSUPP;
+
+	attr.sarc = true;
+	attr.src = src;
+	attr.dst = priv->dev->dev_addr;
+
+	if (stmmac_sarc_configure(priv, priv->ioaddr, 0x3))
+		return -EOPNOTSUPP;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	stmmac_sarc_configure(priv, priv->ioaddr, 0x0);
+	return ret;
+}
+
+static int stmmac_test_vlanoff_common(struct stmmac_priv *priv, bool svlan)
+{
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+	u16 proto;
+
+	if (!priv->dma_cap.vlins)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	proto = svlan ? ETH_P_8021AD : ETH_P_8021Q;
+
+	tpriv->ok = false;
+	tpriv->double_vlan = svlan;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = svlan ? htons(ETH_P_8021Q) : htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_vlan_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+	tpriv->vlan_id = 0x123;
+	dev_add_pack(&tpriv->pt);
+
+	ret = vlan_vid_add(priv->dev, htons(proto), tpriv->vlan_id);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = priv->dev->dev_addr;
+
+	skb = stmmac_test_get_udp_skb(priv, &attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto vlan_del;
+	}
+
+	__vlan_hwaccel_put_tag(skb, htons(proto), tpriv->vlan_id);
+	skb->protocol = htons(proto);
+
+	skb_set_queue_mapping(skb, 0);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto vlan_del;
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+vlan_del:
+	vlan_vid_del(priv->dev, htons(proto), tpriv->vlan_id);
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_vlanoff(struct stmmac_priv *priv)
+{
+	return stmmac_test_vlanoff_common(priv, false);
+}
+
+static int stmmac_test_svlanoff(struct stmmac_priv *priv)
+{
+	if (!priv->dma_cap.dvlan)
+		return -EOPNOTSUPP;
+	return stmmac_test_vlanoff_common(priv, true);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask)
+{
+	struct flow_dissector_key_ipv4_addrs key, mask;
+	unsigned long dummy_cookie = 0xdeadbeef;
+	struct stmmac_packet_attrs attr = { };
+	struct flow_dissector *dissector;
+	struct flow_cls_offload *cls;
+	struct flow_rule *rule;
+	int ret;
+
+	if (!tc_can_offload(priv->dev))
+		return -EOPNOTSUPP;
+	if (!priv->dma_cap.l3l4fnum)
+		return -EOPNOTSUPP;
+	if (priv->rss.enable)
+		stmmac_rss_configure(priv, priv->hw, NULL,
+				     priv->plat->rx_queues_to_use);
+
+	dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
+	if (!dissector) {
+		ret = -ENOMEM;
+		goto cleanup_rss;
+	}
+
+	dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+	dissector->offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = 0;
+
+	cls = kzalloc(sizeof(*cls), GFP_KERNEL);
+	if (!cls) {
+		ret = -ENOMEM;
+		goto cleanup_dissector;
+	}
+
+	cls->common.chain_index = 0;
+	cls->command = FLOW_CLS_REPLACE;
+	cls->cookie = dummy_cookie;
+
+	rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+	if (!rule) {
+		ret = -ENOMEM;
+		goto cleanup_cls;
+	}
+
+	rule->match.dissector = dissector;
+	rule->match.key = (void *)&key;
+	rule->match.mask = (void *)&mask;
+
+	key.src = htonl(src);
+	key.dst = htonl(dst);
+	mask.src = src_mask;
+	mask.dst = dst_mask;
+
+	cls->rule = rule;
+
+	rule->action.entries[0].id = FLOW_ACTION_DROP;
+	rule->action.num_entries = 1;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.ip_dst = dst;
+	attr.ip_src = src;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup_rule;
+
+	ret = stmmac_tc_setup_cls(priv, priv, cls);
+	if (ret)
+		goto cleanup_rule;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+	cls->command = FLOW_CLS_DESTROY;
+	stmmac_tc_setup_cls(priv, priv, cls);
+cleanup_rule:
+	kfree(rule);
+cleanup_cls:
+	kfree(cls);
+cleanup_dissector:
+	kfree(dissector);
+cleanup_rss:
+	if (priv->rss.enable) {
+		stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	return ret;
+}
+#else
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l3filt_da(struct stmmac_priv *priv)
+{
+	u32 addr = 0x10203040;
+
+	return __stmmac_test_l3filt(priv, addr, 0, ~0, 0);
+}
+
+static int stmmac_test_l3filt_sa(struct stmmac_priv *priv)
+{
+	u32 addr = 0x10203040;
+
+	return __stmmac_test_l3filt(priv, 0, addr, 0, ~0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask, bool udp)
+{
+	struct {
+		struct flow_dissector_key_basic bkey;
+		struct flow_dissector_key_ports key;
+	} __aligned(BITS_PER_LONG / 8) keys;
+	struct {
+		struct flow_dissector_key_basic bmask;
+		struct flow_dissector_key_ports mask;
+	} __aligned(BITS_PER_LONG / 8) masks;
+	unsigned long dummy_cookie = 0xdeadbeef;
+	struct stmmac_packet_attrs attr = { };
+	struct flow_dissector *dissector;
+	struct flow_cls_offload *cls;
+	struct flow_rule *rule;
+	int ret;
+
+	if (!tc_can_offload(priv->dev))
+		return -EOPNOTSUPP;
+	if (!priv->dma_cap.l3l4fnum)
+		return -EOPNOTSUPP;
+	if (priv->rss.enable)
+		stmmac_rss_configure(priv, priv->hw, NULL,
+				     priv->plat->rx_queues_to_use);
+
+	dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
+	if (!dissector) {
+		ret = -ENOMEM;
+		goto cleanup_rss;
+	}
+
+	dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_BASIC);
+	dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_PORTS);
+	dissector->offset[FLOW_DISSECTOR_KEY_BASIC] = 0;
+	dissector->offset[FLOW_DISSECTOR_KEY_PORTS] = offsetof(typeof(keys), key);
+
+	cls = kzalloc(sizeof(*cls), GFP_KERNEL);
+	if (!cls) {
+		ret = -ENOMEM;
+		goto cleanup_dissector;
+	}
+
+	cls->common.chain_index = 0;
+	cls->command = FLOW_CLS_REPLACE;
+	cls->cookie = dummy_cookie;
+
+	rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+	if (!rule) {
+		ret = -ENOMEM;
+		goto cleanup_cls;
+	}
+
+	rule->match.dissector = dissector;
+	rule->match.key = (void *)&keys;
+	rule->match.mask = (void *)&masks;
+
+	keys.bkey.ip_proto = udp ? IPPROTO_UDP : IPPROTO_TCP;
+	keys.key.src = htons(src);
+	keys.key.dst = htons(dst);
+	masks.mask.src = src_mask;
+	masks.mask.dst = dst_mask;
+
+	cls->rule = rule;
+
+	rule->action.entries[0].id = FLOW_ACTION_DROP;
+	rule->action.num_entries = 1;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.tcp = !udp;
+	attr.sport = src;
+	attr.dport = dst;
+	attr.ip_dst = 0;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup_rule;
+
+	ret = stmmac_tc_setup_cls(priv, priv, cls);
+	if (ret)
+		goto cleanup_rule;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = ret ? 0 : -EINVAL;
+
+	cls->command = FLOW_CLS_DESTROY;
+	stmmac_tc_setup_cls(priv, priv, cls);
+cleanup_rule:
+	kfree(rule);
+cleanup_cls:
+	kfree(cls);
+cleanup_dissector:
+	kfree(dissector);
+cleanup_rss:
+	if (priv->rss.enable) {
+		stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				     priv->plat->rx_queues_to_use);
+	}
+
+	return ret;
+}
+#else
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+				u32 dst_mask, u32 src_mask, bool udp)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l4filt_da_tcp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, false);
+}
+
+static int stmmac_test_l4filt_sa_tcp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, false);
+}
+
+static int stmmac_test_l4filt_da_udp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, true);
+}
+
+static int stmmac_test_l4filt_sa_udp(struct stmmac_priv *priv)
+{
+	u16 dummy_port = 0x123;
+
+	return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, true);
+}
+
+static int stmmac_test_arp_validate(struct sk_buff *skb,
+				    struct net_device *ndev,
+				    struct packet_type *pt,
+				    struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct ethhdr *ehdr;
+	struct arphdr *ahdr;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->src))
+		goto out;
+
+	ahdr = arp_hdr(skb);
+	if (ahdr->ar_op != htons(ARPOP_REPLY))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_arpoffload(struct stmmac_priv *priv)
+{
+	unsigned char src[ETH_ALEN] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06};
+	unsigned char dst[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	struct stmmac_packet_attrs attr = { };
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	u32 ip_addr = 0xdeadcafe;
+	u32 ip_src = 0xdeadbeef;
+	int ret;
+
+	if (!priv->dma_cap.arpoffsel)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_ARP);
+	tpriv->pt.func = stmmac_test_arp_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = &attr;
+	dev_add_pack(&tpriv->pt);
+
+	attr.src = src;
+	attr.ip_src = ip_src;
+	attr.dst = dst;
+	attr.ip_dst = ip_addr;
+
+	skb = stmmac_test_get_arp_skb(priv, &attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr);
+	if (ret)
+		goto cleanup;
+
+	ret = dev_set_promiscuity(priv->dev, 1);
+	if (ret)
+		goto cleanup;
+
+	skb_set_queue_mapping(skb, 0);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto cleanup_promisc;
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup_promisc:
+	dev_set_promiscuity(priv->dev, -1);
+cleanup:
+	stmmac_set_arp_offload(priv, priv->hw, false, 0x0);
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue)
+{
+	struct stmmac_packet_attrs attr = { };
+	int size = priv->dma_buf_sz;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.max_size = size - ETH_FCS_LEN;
+	attr.queue_mapping = queue;
+
+	return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_jumbo(struct stmmac_priv *priv)
+{
+	return __stmmac_test_jumbo(priv, 0);
+}
+
+static int stmmac_test_mjumbo(struct stmmac_priv *priv)
+{
+	u32 chan, tx_cnt = priv->plat->tx_queues_to_use;
+	int ret;
+
+	if (tx_cnt <= 1)
+		return -EOPNOTSUPP;
+
+	for (chan = 0; chan < tx_cnt; chan++) {
+		ret = __stmmac_test_jumbo(priv, chan);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int stmmac_test_sph(struct stmmac_priv *priv)
+{
+	unsigned long cnt_end, cnt_start = priv->xstats.rx_split_hdr_pkt_n;
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->sph)
+		return -EOPNOTSUPP;
+
+	/* Check for UDP first */
+	attr.dst = priv->dev->dev_addr;
+	attr.tcp = false;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		return ret;
+
+	cnt_end = priv->xstats.rx_split_hdr_pkt_n;
+	if (cnt_end <= cnt_start)
+		return -EINVAL;
+
+	/* Check for TCP now */
+	cnt_start = cnt_end;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.tcp = true;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		return ret;
+
+	cnt_end = priv->xstats.rx_split_hdr_pkt_n;
+	if (cnt_end <= cnt_start)
+		return -EINVAL;
+
+	return 0;
+}
+
+#define STMMAC_LOOPBACK_NONE	0
+#define STMMAC_LOOPBACK_MAC	1
+#define STMMAC_LOOPBACK_PHY	2
+
+static const struct stmmac_test {
+	char name[ETH_GSTRING_LEN];
+	int lb;
+	int (*fn)(struct stmmac_priv *priv);
+} stmmac_selftests[] = {
+	{
+		.name = "MAC Loopback         ",
+		.lb = STMMAC_LOOPBACK_MAC,
+		.fn = stmmac_test_mac_loopback,
+	}, {
+		.name = "PHY Loopback         ",
+		.lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
+		.fn = stmmac_test_phy_loopback,
+	}, {
+		.name = "MMC Counters         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mmc,
+	}, {
+		.name = "EEE                  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_eee,
+	}, {
+		.name = "Hash Filter MC       ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_hfilt,
+	}, {
+		.name = "Perfect Filter UC    ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_pfilt,
+	}, {
+		.name = "MC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mcfilt,
+	}, {
+		.name = "UC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_ucfilt,
+	}, {
+		.name = "Flow Control         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_flowctrl,
+	}, {
+		.name = "RSS                  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_rss,
+	}, {
+		.name = "VLAN Filtering       ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_vlanfilt,
+	}, {
+		.name = "Double VLAN Filtering",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_dvlanfilt,
+	}, {
+		.name = "Flexible RX Parser   ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_rxp,
+	}, {
+		.name = "SA Insertion (desc)  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_desc_sai,
+	}, {
+		.name = "SA Replacement (desc)",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_desc_sar,
+	}, {
+		.name = "SA Insertion (reg)  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_reg_sai,
+	}, {
+		.name = "SA Replacement (reg)",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_reg_sar,
+	}, {
+		.name = "VLAN TX Insertion   ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_vlanoff,
+	}, {
+		.name = "SVLAN TX Insertion  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_svlanoff,
+	}, {
+		.name = "L3 DA Filtering     ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l3filt_da,
+	}, {
+		.name = "L3 SA Filtering     ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l3filt_sa,
+	}, {
+		.name = "L4 DA TCP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_da_tcp,
+	}, {
+		.name = "L4 SA TCP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_sa_tcp,
+	}, {
+		.name = "L4 DA UDP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_da_udp,
+	}, {
+		.name = "L4 SA UDP Filtering ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_l4filt_sa_udp,
+	}, {
+		.name = "ARP Offload         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_arpoffload,
+	}, {
+		.name = "Jumbo Frame         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_jumbo,
+	}, {
+		.name = "Multichannel Jumbo  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mjumbo,
+	}, {
+		.name = "Split Header        ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_sph,
+	},
+};
+
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int count = stmmac_selftest_get_count(priv);
+	int carrier = netif_carrier_ok(dev);
+	int i, ret;
+
+	memset(buf, 0, sizeof(*buf) * count);
+	stmmac_test_next_id = 0;
+
+	if (etest->flags != ETH_TEST_FL_OFFLINE) {
+		netdev_err(priv->dev, "Only offline tests are supported\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	} else if (!carrier) {
+		netdev_err(priv->dev, "You need valid Link to execute tests\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+	/* We don't want extra traffic */
+	netif_carrier_off(dev);
+
+	/* Wait for queues drain */
+	msleep(200);
+
+	for (i = 0; i < count; i++) {
+		ret = 0;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, true);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			ret = stmmac_set_mac_loopback(priv, priv->ioaddr, true);
+			break;
+		case STMMAC_LOOPBACK_NONE:
+			break;
+		default:
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		/*
+		 * First tests will always be MAC / PHY loobpack. If any of
+		 * them is not supported we abort earlier.
+		 */
+		if (ret) {
+			netdev_err(priv->dev, "Loopback is not supported\n");
+			etest->flags |= ETH_TEST_FL_FAILED;
+			break;
+		}
+
+		ret = stmmac_selftests[i].fn(priv);
+		if (ret && (ret != -EOPNOTSUPP))
+			etest->flags |= ETH_TEST_FL_FAILED;
+		buf[i] = ret;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, false);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			stmmac_set_mac_loopback(priv, priv->ioaddr, false);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Restart everything */
+	if (carrier)
+		netif_carrier_on(dev);
+}
+
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	for (i = 0; i < stmmac_selftest_get_count(priv); i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
+			 stmmac_selftests[i].name);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return ARRAY_SIZE(stmmac_selftests);
+}

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 531294f..f9a9a9d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c

@@ -37,7 +37,7 @@
 		entry = &priv->tc_entries[i];
 		if (!entry->in_use && !first && free)
 			first = entry;
-		if (entry->handle == loc && !free)
+		if ((entry->handle == loc) && !free && !entry->is_frag)
 			dup = entry;
 	}
 
@@ -94,7 +94,7 @@
 	struct stmmac_tc_entry *entry, *frag = NULL;
 	struct tc_u32_sel *sel = cls->knode.sel;
 	u32 off, data, mask, real_off, rem;
-	u32 prio = cls->common.prio;
+	u32 prio = cls->common.prio << 16;
 	int ret;
 
 	/* Only 1 match per entry */
@@ -242,9 +242,27 @@
 {
 	struct dma_features *dma_cap = &priv->dma_cap;
 	unsigned int count;
+	int i;
 
+	if (dma_cap->l3l4fnum) {
+		priv->flow_entries_max = dma_cap->l3l4fnum;
+		priv->flow_entries = devm_kcalloc(priv->device,
+						  dma_cap->l3l4fnum,
+						  sizeof(*priv->flow_entries),
+						  GFP_KERNEL);
+		if (!priv->flow_entries)
+			return -ENOMEM;
+
+		for (i = 0; i < priv->flow_entries_max; i++)
+			priv->flow_entries[i].idx = i;
+
+		dev_info(priv->device, "Enabled Flow TC (entries=%d)\n",
+			 priv->flow_entries_max);
+	}
+
+	/* Fail silently as we can still use remaining features, e.g. CBS */
 	if (!dma_cap->frpsel)
-		return -EINVAL;
+		return 0;
 
 	switch (dma_cap->frpbs) {
 	case 0x0:
@@ -301,6 +319,8 @@
 	/* Queue 0 is not AVB capable */
 	if (queue <= 0 || queue >= tx_queues_count)
 		return -EINVAL;
+	if (!priv->dma_cap.av)
+		return -EOPNOTSUPP;
 	if (priv->speed != SPEED_100 && priv->speed != SPEED_1000)
 		return -EOPNOTSUPP;
 
@@ -347,8 +367,235 @@
 	return 0;
 }
 
+static int tc_parse_flow_actions(struct stmmac_priv *priv,
+				 struct flow_action *action,
+				 struct stmmac_flow_entry *entry)
+{
+	struct flow_action_entry *act;
+	int i;
+
+	if (!flow_action_has_entries(action))
+		return -EINVAL;
+
+	flow_action_for_each(i, act, action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
+			entry->action |= STMMAC_FLOW_ACTION_DROP;
+			return 0;
+		default:
+			break;
+		}
+	}
+
+	/* Nothing to do, maybe inverse filter ? */
+	return 0;
+}
+
+static int tc_add_basic_flow(struct stmmac_priv *priv,
+			     struct flow_cls_offload *cls,
+			     struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_match_basic match;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC))
+		return -EINVAL;
+
+	flow_rule_match_basic(rule, &match);
+	entry->ip_proto = match.key->ip_proto;
+	return 0;
+}
+
+static int tc_add_ip4_flow(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls,
+			   struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+	struct flow_match_ipv4_addrs match;
+	u32 hw_match;
+	int ret;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS))
+		return -EINVAL;
+
+	flow_rule_match_ipv4_addrs(rule, &match);
+	hw_match = ntohl(match.key->src) & ntohl(match.mask->src);
+	if (hw_match) {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+					      false, true, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	hw_match = ntohl(match.key->dst) & ntohl(match.mask->dst);
+	if (hw_match) {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+					      false, false, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int tc_add_ports_flow(struct stmmac_priv *priv,
+			     struct flow_cls_offload *cls,
+			     struct stmmac_flow_entry *entry)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+	struct flow_match_ports match;
+	u32 hw_match;
+	bool is_udp;
+	int ret;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS))
+		return -EINVAL;
+
+	switch (entry->ip_proto) {
+	case IPPROTO_TCP:
+		is_udp = false;
+		break;
+	case IPPROTO_UDP:
+		is_udp = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	flow_rule_match_ports(rule, &match);
+
+	hw_match = ntohs(match.key->src) & ntohs(match.mask->src);
+	if (hw_match) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+					      is_udp, true, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	hw_match = ntohs(match.key->dst) & ntohs(match.mask->dst);
+	if (hw_match) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+					      is_udp, false, inv, hw_match);
+		if (ret)
+			return ret;
+	}
+
+	entry->is_l4 = true;
+	return 0;
+}
+
+static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv,
+					      struct flow_cls_offload *cls,
+					      bool get_free)
+{
+	int i;
+
+	for (i = 0; i < priv->flow_entries_max; i++) {
+		struct stmmac_flow_entry *entry = &priv->flow_entries[i];
+
+		if (entry->cookie == cls->cookie)
+			return entry;
+		if (get_free && (entry->in_use == false))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct {
+	int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
+		  struct stmmac_flow_entry *entry);
+} tc_flow_parsers[] = {
+	{ .fn = tc_add_basic_flow },
+	{ .fn = tc_add_ip4_flow },
+	{ .fn = tc_add_ports_flow },
+};
+
+static int tc_add_flow(struct stmmac_priv *priv,
+		       struct flow_cls_offload *cls)
+{
+	struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	int i, ret;
+
+	if (!entry) {
+		entry = tc_find_flow(priv, cls, true);
+		if (!entry)
+			return -ENOENT;
+	}
+
+	ret = tc_parse_flow_actions(priv, &rule->action, entry);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) {
+		ret = tc_flow_parsers[i].fn(priv, cls, entry);
+		if (!ret) {
+			entry->in_use = true;
+			continue;
+		}
+	}
+
+	if (!entry->in_use)
+		return -EINVAL;
+
+	entry->cookie = cls->cookie;
+	return 0;
+}
+
+static int tc_del_flow(struct stmmac_priv *priv,
+		       struct flow_cls_offload *cls)
+{
+	struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+	int ret;
+
+	if (!entry || !entry->in_use)
+		return -ENOENT;
+
+	if (entry->is_l4) {
+		ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, false,
+					      false, false, false, 0);
+	} else {
+		ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, false,
+					      false, false, false, 0);
+	}
+
+	entry->in_use = false;
+	entry->cookie = 0;
+	entry->is_l4 = false;
+	return ret;
+}
+
+static int tc_setup_cls(struct stmmac_priv *priv,
+			struct flow_cls_offload *cls)
+{
+	int ret = 0;
+
+	switch (cls->command) {
+	case FLOW_CLS_REPLACE:
+		ret = tc_add_flow(priv, cls);
+		break;
+	case FLOW_CLS_DESTROY:
+		ret = tc_del_flow(priv, cls);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.init = tc_init,
 	.setup_cls_u32 = tc_setup_cls_u32,
 	.setup_cbs = tc_setup_cbs,
+	.setup_cls = tc_setup_cls,
 };

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 9020b08..c91876f 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c

@@ -1,22 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0+
 /* cassini.c: Sun Microsystems Cassini(+) ethernet driver.
  *
  * Copyright (C) 2004 Sun Microsystems Inc.
  * Copyright (C) 2003 Adrian Sun (asun@darksunrising.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * This driver uses the sungem driver (c) David Miller
  * (davem@redhat.com) as its basis.
  *
@@ -1911,7 +1898,7 @@
 		cp->net_stats[ring].tx_packets++;
 		cp->net_stats[ring].tx_bytes += skb->len;
 		spin_unlock(&cp->stat_lock[ring]);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 	cp->tx_old[ring] = entry;
 
@@ -2047,7 +2034,7 @@
 
 		__skb_frag_set_page(frag, page->buffer);
 		__skb_frag_ref(frag);
-		frag->page_offset = off;
+		skb_frag_off_set(frag, off);
 		skb_frag_size_set(frag, hlen - swivel);
 
 		/* any more data? */
@@ -2071,7 +2058,7 @@
 
 			__skb_frag_set_page(frag, page->buffer);
 			__skb_frag_ref(frag);
-			frag->page_offset = 0;
+			skb_frag_off_set(frag, 0);
 			skb_frag_size_set(frag, hlen);
 			RX_USED_ADD(page, hlen + cp->crc_size);
 		}
@@ -2829,7 +2816,7 @@
 		mapping = skb_frag_dma_map(&cp->pdev->dev, fragp, 0, len,
 					   DMA_TO_DEVICE);
 
-		tabort = cas_calc_tabort(cp, fragp->page_offset, len);
+		tabort = cas_calc_tabort(cp, skb_frag_off(fragp), len);
 		if (unlikely(tabort)) {
 			void *addr;
 
@@ -2840,7 +2827,7 @@
 
 			addr = cas_page_map(skb_frag_page(fragp));
 			memcpy(tx_tiny_buf(cp, ring, entry),
-			       addr + fragp->page_offset + len - tabort,
+			       addr + skb_frag_off(fragp) + len - tabort,
 			       tabort);
 			cas_page_unmap(addr);
 			mapping = tx_tiny_map(cp, ring, entry, tentry);

diff --git a/drivers/net/ethernet/sun/cassini.h b/drivers/net/ethernet/sun/cassini.h
index 13f3860..ae5f05f 100644
--- a/drivers/net/ethernet/sun/cassini.h
+++ b/drivers/net/ethernet/sun/cassini.h

@@ -1,23 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0+ */
 /* $Id: cassini.h,v 1.16 2004/08/17 21:15:16 zaumen Exp $
  * cassini.h: Definitions for Sun Microsystems Cassini(+) ethernet driver.
  *
  * Copyright (C) 2004 Sun Microsystems Inc.
  * Copyright (c) 2003 Adrian Sun (asun@darksunrising.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
  * vendor id: 0x108E (Sun Microsystems, Inc.)
  * device id: 0xabba (Cassini)
  * revision ids: 0x01 = Cassini

diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index d42f47f..01ea0d6 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c

@@ -101,8 +101,7 @@
 }
 
 static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    struct net_device *sb_dev,
-			    select_queue_fallback_t fallback)
+			    struct net_device *sb_dev)
 {
 	struct vnet_port *port = netdev_priv(dev);
 
@@ -113,7 +112,7 @@
 }
 
 /* Wrappers to common functions */
-static int vsw_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t vsw_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	return sunvnet_start_xmit_common(skb, dev, vsw_tx_port_find);
 }

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 9319d84..f5fd1f3 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c

@@ -1217,8 +1217,6 @@
 
 	spin_lock_irqsave(&np->lock, flags);
 
-	err = -EINVAL;
-
 	err = mii_read(np, np->phy_addr, MII_BMSR);
 	if (err < 0)
 		goto out;
@@ -6697,7 +6695,7 @@
 
 		len = skb_frag_size(frag);
 		mapping = np->ops->map_page(np->device, skb_frag_page(frag),
-					    frag->page_offset, len,
+					    skb_frag_off(frag), len,
 					    DMA_TO_DEVICE);
 
 		rp->tx_buffs[prod].skb = NULL;
@@ -7464,6 +7462,7 @@
 					class = CLASS_CODE_USER_PROG4;
 					break;
 				default:
+					class = CLASS_CODE_UNRECOG;
 					break;
 				}
 				ret = tcam_user_ip_class_set(np, class, 0,
@@ -8100,6 +8099,8 @@
 		start += 3;
 
 		prop_len = niu_pci_eeprom_read(np, start + 4);
+		if (prop_len < 0)
+			return prop_len;
 		err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64);
 		if (err < 0)
 			return err;
@@ -8144,8 +8145,12 @@
 			netif_printk(np, probe, KERN_DEBUG, np->dev,
 				     "VPD_SCAN: Reading in property [%s] len[%d]\n",
 				     namebuf, prop_len);
-			for (i = 0; i < prop_len; i++)
-				*prop_buf++ = niu_pci_eeprom_read(np, off + i);
+			for (i = 0; i < prop_len; i++) {
+				err = niu_pci_eeprom_read(np, off + i);
+				if (err >= 0)
+					*prop_buf = err;
+				++prop_buf;
+			}
 		}
 
 		start += len;

diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index f047b27..e9b757b 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c

@@ -781,7 +781,7 @@
 
 		DTX(("skb(%p) ", skb));
 		bp->tx_skbs[elem] = NULL;
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 
 		elem = NEXT_TX(elem);
 	}
@@ -950,7 +950,8 @@
 }
 
 /* Put a packet on the wire. */
-static int bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bigmac *bp = netdev_priv(dev);
 	int len, entry;

diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index b9221fc..3e76311 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c

@@ -2760,7 +2760,7 @@
 	void __iomem *p = pci_map_rom(pdev, &size);
 
 	if (p) {
-			int found;
+		int found;
 
 		found = readb(p) == 0x55 &&
 			readb(p + 1) == 0xaa &&

diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 06da2f5..d007dfe 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c

@@ -1962,7 +1962,7 @@
 			this = &txbase[elem];
 		}
 
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		dev->stats.tx_packets++;
 	}
 	hp->tx_old = elem;
@@ -2691,7 +2691,7 @@
 	sbus_dp = op->dev.parent->of_node;
 
 	/* We can match PCI devices too, do not accept those here. */
-	if (strcmp(sbus_dp->name, "sbus") && strcmp(sbus_dp->name, "sbi"))
+	if (!of_node_name_eq(sbus_dp, "sbus") && !of_node_name_eq(sbus_dp, "sbi"))
 		return err;
 
 	if (is_qfe) {
@@ -2999,7 +2999,7 @@
 	/* Now make sure pci_dev cookie is there. */
 #ifdef CONFIG_SPARC
 	dp = pci_device_to_OF_node(pdev);
-	strcpy(prom_name, dp->name);
+	snprintf(prom_name, sizeof(prom_name), "%pOFn", dp);
 #else
 	if (is_quattro_p(pdev))
 		strcpy(prom_name, "SUNW,qfe");

diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index 7fe0d5e..1468fa0 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c

@@ -570,7 +570,7 @@
 }
 
 /* Get a packet queued to go onto the wire. */
-static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sunqe *qep = netdev_priv(dev);
 	struct sunqe_buffers *qbufs = qep->buffers;

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 12539b3..96b883f 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c

@@ -234,8 +234,7 @@
 }
 
 static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     struct net_device *sb_dev,
-			     select_queue_fallback_t fallback)
+			     struct net_device *sb_dev)
 {
 	struct vnet *vp = netdev_priv(dev);
 	struct vnet_port *port = __tx_port_find(vp, skb);
@@ -247,7 +246,7 @@
 }
 
 /* Wrappers to common functions */
-static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	return sunvnet_start_xmit_common(skb, dev, vnet_tx_port_find);
 }

diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index d8f4c3f..8b94d9a 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c

@@ -1088,7 +1088,7 @@
 			vaddr = kmap_atomic(skb_frag_page(f));
 			blen = skb_frag_size(f);
 			blen += 8 - (blen & 7);
-			err = ldc_map_single(lp, vaddr + f->page_offset,
+			err = ldc_map_single(lp, vaddr + skb_frag_off(f),
 					     blen, cookies + nc, ncookies - nc,
 					     map_perm);
 			kunmap_atomic(vaddr);
@@ -1124,7 +1124,7 @@
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 
-		docopy |= f->page_offset & 7;
+		docopy |= skb_frag_off(f) & 7;
 	}
 	if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
 	    skb_tailroom(skb) < pad ||
@@ -1216,9 +1216,10 @@
 	return skb;
 }
 
-static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
-				struct vnet_port *(*vnet_tx_port)
-				(struct sk_buff *, struct net_device *))
+static netdev_tx_t
+vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
+		     struct vnet_port *(*vnet_tx_port)
+		     (struct sk_buff *, struct net_device *))
 {
 	struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
@@ -1321,9 +1322,10 @@
 	return NETDEV_TX_OK;
 }
 
-int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
-			      struct vnet_port *(*vnet_tx_port)
-			      (struct sk_buff *, struct net_device *))
+netdev_tx_t
+sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
+			  struct vnet_port *(*vnet_tx_port)
+			  (struct sk_buff *, struct net_device *))
 {
 	struct vnet_port *port = NULL;
 	struct vio_dring_state *dr;
@@ -1530,8 +1532,7 @@
 	else if (port)
 		del_timer(&port->clean_timer);
 	rcu_read_unlock();
-	if (skb)
-		dev_kfree_skb(skb);
+	dev_kfree_skb(skb);
 	vnet_free_skbs(freeskbs);
 	dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;

diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index 1ea0b01..2b808d2 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h

@@ -136,9 +136,10 @@
 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p);
 void sunvnet_tx_timeout_common(struct net_device *dev);
-int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
-			   struct vnet_port *(*vnet_tx_port)
-			   (struct sk_buff *, struct net_device *));
+netdev_tx_t
+sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
+			  struct vnet_port *(*vnet_tx_port)
+			  (struct sk_buff *, struct net_device *));
 #ifdef CONFIG_NET_POLL_CONTROLLER
 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp);
 #endif

diff --git a/drivers/net/ethernet/synopsys/Kconfig b/drivers/net/ethernet/synopsys/Kconfig
index a950388..9e19977 100644
--- a/drivers/net/ethernet/synopsys/Kconfig
+++ b/drivers/net/ethernet/synopsys/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Synopsys network device configuration
 #

diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
index 031cf9c..8c4195a 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c

@@ -503,7 +503,7 @@
 	struct xlgmac_desc_data *desc_data;
 	unsigned int offset, datalen, len;
 	struct xlgmac_pkt_info *pkt_info;
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	unsigned int tso, vlan;
 	dma_addr_t skb_dma;
 	unsigned int i;

diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
index 99d86e3..bf6c1c6 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c

@@ -995,7 +995,7 @@
 	smp_wmb();
 
 	ring->cur = cur_index + 1;
-	if (!pkt_info->skb->xmit_more ||
+	if (!netdev_xmit_more() ||
 	    netif_xmit_stopped(netdev_get_tx_queue(pdata->netdev,
 						   channel->queue_index)))
 		xlgmac_tx_start_xmit(channel, ring);

diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index 1f8e960..a1f5a1e 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c

@@ -116,7 +116,7 @@
 			       struct sk_buff *skb,
 			       struct xlgmac_pkt_info *pkt_info)
 {
-	struct skb_frag_struct *frag;
+	skb_frag_t *frag;
 	unsigned int context_desc;
 	unsigned int len;
 	unsigned int i;

diff --git a/drivers/net/ethernet/tehuti/Kconfig b/drivers/net/ethernet/tehuti/Kconfig
index b17f0ca..8ad1526 100644
--- a/drivers/net/ethernet/tehuti/Kconfig
+++ b/drivers/net/ethernet/tehuti/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Tehuti network device configuration
 #

diff --git a/drivers/net/ethernet/tehuti/Makefile b/drivers/net/ethernet/tehuti/Makefile
index f995421..13a0ddd 100644
--- a/drivers/net/ethernet/tehuti/Makefile
+++ b/drivers/net/ethernet/tehuti/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Tehuti network device drivers.
 #

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index dc966dd..0f8a924 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c

@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Tehuti Networks(R) Network Driver
  * ethtool interface implementation
  * Copyright (C) 2007 Tehuti Networks Ltd. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 /*
@@ -1505,7 +1501,7 @@
 	bdx_tx_db_inc_wptr(db);
 
 	for (i = 0; i < nr_frags; i++) {
-		const struct skb_frag_struct *frag;
+		const skb_frag_t *frag;
 
 		frag = &skb_shinfo(skb)->frags[i];
 		db->wptr->len = skb_frag_size(frag);
@@ -1739,7 +1735,7 @@
 		tx_level -= db->rptr->len;	/* '-' koz len is negative */
 
 		/* now should come skb pointer - free it */
-		dev_kfree_skb_irq(db->rptr->addr.skb);
+		dev_consume_skb_irq(db->rptr->addr.skb);
 		bdx_tx_db_inc_rptr(db);
 	}
 

diff --git a/drivers/net/ethernet/tehuti/tehuti.h b/drivers/net/ethernet/tehuti/tehuti.h
index 8e7b4c9..5fc03c8 100644
--- a/drivers/net/ethernet/tehuti/tehuti.h
+++ b/drivers/net/ethernet/tehuti/tehuti.h

@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Tehuti Networks(R) Network Driver
  * Copyright (C) 2007 Tehuti Networks Ltd. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef _TEHUTI_H

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index f932923..834afca 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # TI device configuration
 #
@@ -20,7 +21,6 @@
 	tristate "TI DaVinci EMAC Support"
 	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) || COMPILE_TEST
 	select TI_DAVINCI_MDIO
-	select TI_DAVINCI_CPDMA
 	select PHYLIB
 	---help---
 	  This driver supports TI's DaVinci Ethernet .
@@ -38,35 +38,19 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called davinci_mdio.  This is recommended.
 
-config TI_DAVINCI_CPDMA
-	tristate "TI DaVinci CPDMA Support"
-	depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
-	select GENERIC_ALLOCATOR
-	---help---
-	  This driver supports TI's DaVinci CPDMA dma engine.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called davinci_cpdma.  This is recommended.
-
 config TI_CPSW_PHY_SEL
-	bool
+	bool "TI CPSW Phy mode Selection (DEPRECATED)"
+	default n
 	---help---
 	  This driver supports configuring of the phy mode connected to
-	  the CPSW.
-
-config TI_CPSW_ALE
-	tristate "TI CPSW ALE Support"
-	---help---
-	  This driver supports TI's CPSW ALE module.
+	  the CPSW. DEPRECATED: use PHY_TI_GMII_SEL.
 
 config TI_CPSW
 	tristate "TI CPSW Switch Support"
 	depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
-	select TI_DAVINCI_CPDMA
 	select TI_DAVINCI_MDIO
-	select TI_CPSW_PHY_SEL
-	select TI_CPSW_ALE
 	select MFD_SYSCON
+	select PAGE_POOL
 	select REGMAP
 	---help---
 	  This driver supports TI's CPSW Ethernet Switch.
@@ -77,6 +61,7 @@
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP || COMPILE_TEST
+	depends on COMMON_CLK
 	depends on POSIX_TIMERS
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
@@ -94,7 +79,6 @@
 
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
-	select TI_CPSW_ALE
 	select TI_DAVINCI_MDIO
 	depends on OF
 	depends on KEYSTONE_NAVIGATOR_DMA && KEYSTONE_NAVIGATOR_QMSS
@@ -121,7 +105,8 @@
 
 	  Devices currently supported by this driver are Compaq Netelligent,
 	  Compaq NetFlex and Olicom cards.  Please read the file
-	  <file:Documentation/networking/tlan.txt> for more details.
+	  <file:Documentation/networking/device_drivers/ti/tlan.txt>
+	  for more details.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called tlan.

diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index 0be551d..ed12e1e 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile

@@ -8,16 +8,15 @@
 
 obj-$(CONFIG_TLAN) += tlan.o
 obj-$(CONFIG_CPMAC) += cpmac.o
-obj-$(CONFIG_TI_DAVINCI_EMAC) += davinci_emac.o
+obj-$(CONFIG_TI_DAVINCI_EMAC) += ti_davinci_emac.o
+ti_davinci_emac-y := davinci_emac.o davinci_cpdma.o
 obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
-obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
-obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
 obj-$(CONFIG_TI_CPTS_MOD) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
-ti_cpsw-y := cpsw.o
+ti_cpsw-y := cpsw.o davinci_cpdma.o cpsw_ale.o cpsw_priv.o cpsw_sl.o cpsw_ethtool.o
 
 obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o
-keystone_netcp-y := netcp_core.o
+keystone_netcp-y := netcp_core.o cpsw_ale.o
 obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o
-keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o
+keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.o

diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 9b8a30b..3a655a4 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c

@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Copyright (C) 2006, 2007 Eugene Konev
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include <linux/module.h>
@@ -608,7 +596,7 @@
 			netdev_dbg(dev, "sent 0x%p, len=%d\n",
 				   desc->skb, desc->skb->len);
 
-		dev_kfree_skb_irq(desc->skb);
+		dev_consume_skb_irq(desc->skb);
 		desc->skb = NULL;
 		if (__netif_subqueue_stopped(dev, queue))
 			netif_wake_subqueue(dev, queue);
@@ -991,7 +979,6 @@
 	cpmac_hw_start(dev);
 
 	napi_enable(&priv->napi);
-	dev->phydev->state = PHY_CHANGELINK;
 	phy_start(dev->phydev);
 
 	return 0;

diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 38d1cc5..bfa81bb 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c

@@ -1,14 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include <linux/kernel.h>
 #include <linux/module.h>

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 396e1cd..4e184ee 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c

@@ -1,17 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Texas Instruments Ethernet Switch Driver
  *
  * Copyright (C) 2013 Texas Instruments
  *
  * Module Author: Mugunthan V N <mugunthanvnm@ti.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/platform_device.h>
@@ -78,7 +71,7 @@
 	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
-	};
+	}
 
 	mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6);
 	mask |= BIT(slave + 4);
@@ -133,7 +126,7 @@
 	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
-	};
+	}
 
 	switch (slave) {
 	case 0:
@@ -158,9 +151,9 @@
 }
 
 static struct platform_driver cpsw_phy_sel_driver;
-static int match(struct device *dev, void *data)
+static int match(struct device *dev, const void *data)
 {
-	struct device_node *node = (struct device_node *)data;
+	const struct device_node *node = (const struct device_node *)data;
 	return dev->of_node == node &&
 		dev->driver == &cpsw_phy_sel_driver.driver;
 }

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 832bce0..f298d71 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c

@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Texas Instruments Ethernet Switch Driver
  *
  * Copyright (C) 2012 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
@@ -26,6 +19,7 @@
 #include <linux/netdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/phy.h>
+#include <linux/phy/phy.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
@@ -37,144 +31,23 @@
 #include <linux/if_vlan.h>
 #include <linux/kmemleak.h>
 #include <linux/sys_soc.h>
+#include <net/page_pool.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
 
 #include <linux/pinctrl/consumer.h>
 #include <net/pkt_cls.h>
 
 #include "cpsw.h"
 #include "cpsw_ale.h"
+#include "cpsw_priv.h"
+#include "cpsw_sl.h"
 #include "cpts.h"
 #include "davinci_cpdma.h"
 
 #include <net/pkt_sched.h>
 
-#define CPSW_DEBUG	(NETIF_MSG_HW		| NETIF_MSG_WOL		| \
-			 NETIF_MSG_DRV		| NETIF_MSG_LINK	| \
-			 NETIF_MSG_IFUP		| NETIF_MSG_INTR	| \
-			 NETIF_MSG_PROBE	| NETIF_MSG_TIMER	| \
-			 NETIF_MSG_IFDOWN	| NETIF_MSG_RX_ERR	| \
-			 NETIF_MSG_TX_ERR	| NETIF_MSG_TX_DONE	| \
-			 NETIF_MSG_PKTDATA	| NETIF_MSG_TX_QUEUED	| \
-			 NETIF_MSG_RX_STATUS)
-
-#define cpsw_info(priv, type, format, ...)		\
-do {								\
-	if (netif_msg_##type(priv) && net_ratelimit())		\
-		dev_info(priv->dev, format, ## __VA_ARGS__);	\
-} while (0)
-
-#define cpsw_err(priv, type, format, ...)		\
-do {								\
-	if (netif_msg_##type(priv) && net_ratelimit())		\
-		dev_err(priv->dev, format, ## __VA_ARGS__);	\
-} while (0)
-
-#define cpsw_dbg(priv, type, format, ...)		\
-do {								\
-	if (netif_msg_##type(priv) && net_ratelimit())		\
-		dev_dbg(priv->dev, format, ## __VA_ARGS__);	\
-} while (0)
-
-#define cpsw_notice(priv, type, format, ...)		\
-do {								\
-	if (netif_msg_##type(priv) && net_ratelimit())		\
-		dev_notice(priv->dev, format, ## __VA_ARGS__);	\
-} while (0)
-
-#define ALE_ALL_PORTS		0x7
-
-#define CPSW_MAJOR_VERSION(reg)		(reg >> 8 & 0x7)
-#define CPSW_MINOR_VERSION(reg)		(reg & 0xff)
-#define CPSW_RTL_VERSION(reg)		((reg >> 11) & 0x1f)
-
-#define CPSW_VERSION_1		0x19010a
-#define CPSW_VERSION_2		0x19010c
-#define CPSW_VERSION_3		0x19010f
-#define CPSW_VERSION_4		0x190112
-
-#define HOST_PORT_NUM		0
-#define CPSW_ALE_PORTS_NUM	3
-#define SLIVER_SIZE		0x40
-
-#define CPSW1_HOST_PORT_OFFSET	0x028
-#define CPSW1_SLAVE_OFFSET	0x050
-#define CPSW1_SLAVE_SIZE	0x040
-#define CPSW1_CPDMA_OFFSET	0x100
-#define CPSW1_STATERAM_OFFSET	0x200
-#define CPSW1_HW_STATS		0x400
-#define CPSW1_CPTS_OFFSET	0x500
-#define CPSW1_ALE_OFFSET	0x600
-#define CPSW1_SLIVER_OFFSET	0x700
-
-#define CPSW2_HOST_PORT_OFFSET	0x108
-#define CPSW2_SLAVE_OFFSET	0x200
-#define CPSW2_SLAVE_SIZE	0x100
-#define CPSW2_CPDMA_OFFSET	0x800
-#define CPSW2_HW_STATS		0x900
-#define CPSW2_STATERAM_OFFSET	0xa00
-#define CPSW2_CPTS_OFFSET	0xc00
-#define CPSW2_ALE_OFFSET	0xd00
-#define CPSW2_SLIVER_OFFSET	0xd80
-#define CPSW2_BD_OFFSET		0x2000
-
-#define CPDMA_RXTHRESH		0x0c0
-#define CPDMA_RXFREE		0x0e0
-#define CPDMA_TXHDP		0x00
-#define CPDMA_RXHDP		0x20
-#define CPDMA_TXCP		0x40
-#define CPDMA_RXCP		0x60
-
-#define CPSW_POLL_WEIGHT	64
-#define CPSW_RX_VLAN_ENCAP_HDR_SIZE		4
-#define CPSW_MIN_PACKET_SIZE	(VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN +\
-				 ETH_FCS_LEN +\
-				 CPSW_RX_VLAN_ENCAP_HDR_SIZE)
-
-#define RX_PRIORITY_MAPPING	0x76543210
-#define TX_PRIORITY_MAPPING	0x33221100
-#define CPDMA_TX_PRIORITY_MAP	0x76543210
-
-#define CPSW_VLAN_AWARE		BIT(1)
-#define CPSW_RX_VLAN_ENCAP	BIT(2)
-#define CPSW_ALE_VLAN_AWARE	1
-
-#define CPSW_FIFO_NORMAL_MODE		(0 << 16)
-#define CPSW_FIFO_DUAL_MAC_MODE		(1 << 16)
-#define CPSW_FIFO_RATE_LIMIT_MODE	(2 << 16)
-
-#define CPSW_INTPACEEN		(0x3f << 16)
-#define CPSW_INTPRESCALE_MASK	(0x7FF << 0)
-#define CPSW_CMINTMAX_CNT	63
-#define CPSW_CMINTMIN_CNT	2
-#define CPSW_CMINTMAX_INTVL	(1000 / CPSW_CMINTMIN_CNT)
-#define CPSW_CMINTMIN_INTVL	((1000 / CPSW_CMINTMAX_CNT) + 1)
-
-#define cpsw_slave_index(cpsw, priv)				\
-		((cpsw->data.dual_emac) ? priv->emac_port :	\
-		cpsw->data.active_slave)
-#define IRQ_NUM			2
-#define CPSW_MAX_QUEUES		8
-#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
-#define CPSW_FIFO_QUEUE_TYPE_SHIFT	16
-#define CPSW_FIFO_SHAPE_EN_SHIFT	16
-#define CPSW_FIFO_RATE_EN_SHIFT		20
-#define CPSW_TC_NUM			4
-#define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
-#define CPSW_PCT_MASK			0x7f
-
-#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
-#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
-#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT	16
-#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT	8
-#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK	GENMASK(1, 0)
-enum {
-	CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
-	CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
-	CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
-	CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
-};
-
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
@@ -191,366 +64,10 @@
 module_param(descs_pool_size, int, 0444);
 MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
 
-struct cpsw_wr_regs {
-	u32	id_ver;
-	u32	soft_reset;
-	u32	control;
-	u32	int_control;
-	u32	rx_thresh_en;
-	u32	rx_en;
-	u32	tx_en;
-	u32	misc_en;
-	u32	mem_allign1[8];
-	u32	rx_thresh_stat;
-	u32	rx_stat;
-	u32	tx_stat;
-	u32	misc_stat;
-	u32	mem_allign2[8];
-	u32	rx_imax;
-	u32	tx_imax;
+/* The buf includes headroom compatible with both skb and xdpf */
+#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN)
+#define CPSW_HEADROOM  ALIGN(CPSW_HEADROOM_NA, sizeof(long))
 
-};
-
-struct cpsw_ss_regs {
-	u32	id_ver;
-	u32	control;
-	u32	soft_reset;
-	u32	stat_port_en;
-	u32	ptype;
-	u32	soft_idle;
-	u32	thru_rate;
-	u32	gap_thresh;
-	u32	tx_start_wds;
-	u32	flow_control;
-	u32	vlan_ltype;
-	u32	ts_ltype;
-	u32	dlr_ltype;
-};
-
-/* CPSW_PORT_V1 */
-#define CPSW1_MAX_BLKS      0x00 /* Maximum FIFO Blocks */
-#define CPSW1_BLK_CNT       0x04 /* FIFO Block Usage Count (Read Only) */
-#define CPSW1_TX_IN_CTL     0x08 /* Transmit FIFO Control */
-#define CPSW1_PORT_VLAN     0x0c /* VLAN Register */
-#define CPSW1_TX_PRI_MAP    0x10 /* Tx Header Priority to Switch Pri Mapping */
-#define CPSW1_TS_CTL        0x14 /* Time Sync Control */
-#define CPSW1_TS_SEQ_LTYPE  0x18 /* Time Sync Sequence ID Offset and Msg Type */
-#define CPSW1_TS_VLAN       0x1c /* Time Sync VLAN1 and VLAN2 */
-
-/* CPSW_PORT_V2 */
-#define CPSW2_CONTROL       0x00 /* Control Register */
-#define CPSW2_MAX_BLKS      0x08 /* Maximum FIFO Blocks */
-#define CPSW2_BLK_CNT       0x0c /* FIFO Block Usage Count (Read Only) */
-#define CPSW2_TX_IN_CTL     0x10 /* Transmit FIFO Control */
-#define CPSW2_PORT_VLAN     0x14 /* VLAN Register */
-#define CPSW2_TX_PRI_MAP    0x18 /* Tx Header Priority to Switch Pri Mapping */
-#define CPSW2_TS_SEQ_MTYPE  0x1c /* Time Sync Sequence ID Offset and Msg Type */
-
-/* CPSW_PORT_V1 and V2 */
-#define SA_LO               0x20 /* CPGMAC_SL Source Address Low */
-#define SA_HI               0x24 /* CPGMAC_SL Source Address High */
-#define SEND_PERCENT        0x28 /* Transmit Queue Send Percentages */
-
-/* CPSW_PORT_V2 only */
-#define RX_DSCP_PRI_MAP0    0x30 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP1    0x34 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP2    0x38 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP3    0x3c /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP4    0x40 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP5    0x44 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP6    0x48 /* Rx DSCP Priority to Rx Packet Mapping */
-#define RX_DSCP_PRI_MAP7    0x4c /* Rx DSCP Priority to Rx Packet Mapping */
-
-/* Bit definitions for the CPSW2_CONTROL register */
-#define PASS_PRI_TAGGED     BIT(24) /* Pass Priority Tagged */
-#define VLAN_LTYPE2_EN      BIT(21) /* VLAN LTYPE 2 enable */
-#define VLAN_LTYPE1_EN      BIT(20) /* VLAN LTYPE 1 enable */
-#define DSCP_PRI_EN         BIT(16) /* DSCP Priority Enable */
-#define TS_107              BIT(15) /* Tyme Sync Dest IP Address 107 */
-#define TS_320              BIT(14) /* Time Sync Dest Port 320 enable */
-#define TS_319              BIT(13) /* Time Sync Dest Port 319 enable */
-#define TS_132              BIT(12) /* Time Sync Dest IP Addr 132 enable */
-#define TS_131              BIT(11) /* Time Sync Dest IP Addr 131 enable */
-#define TS_130              BIT(10) /* Time Sync Dest IP Addr 130 enable */
-#define TS_129              BIT(9)  /* Time Sync Dest IP Addr 129 enable */
-#define TS_TTL_NONZERO      BIT(8)  /* Time Sync Time To Live Non-zero enable */
-#define TS_ANNEX_F_EN       BIT(6)  /* Time Sync Annex F enable */
-#define TS_ANNEX_D_EN       BIT(4)  /* Time Sync Annex D enable */
-#define TS_LTYPE2_EN        BIT(3)  /* Time Sync LTYPE 2 enable */
-#define TS_LTYPE1_EN        BIT(2)  /* Time Sync LTYPE 1 enable */
-#define TS_TX_EN            BIT(1)  /* Time Sync Transmit Enable */
-#define TS_RX_EN            BIT(0)  /* Time Sync Receive Enable */
-
-#define CTRL_V2_TS_BITS \
-	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
-	 TS_TTL_NONZERO  | TS_ANNEX_D_EN | TS_LTYPE1_EN)
-
-#define CTRL_V2_ALL_TS_MASK (CTRL_V2_TS_BITS | TS_TX_EN | TS_RX_EN)
-#define CTRL_V2_TX_TS_BITS  (CTRL_V2_TS_BITS | TS_TX_EN)
-#define CTRL_V2_RX_TS_BITS  (CTRL_V2_TS_BITS | TS_RX_EN)
-
-
-#define CTRL_V3_TS_BITS \
-	(TS_107 | TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
-	 TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
-	 TS_LTYPE1_EN)
-
-#define CTRL_V3_ALL_TS_MASK (CTRL_V3_TS_BITS | TS_TX_EN | TS_RX_EN)
-#define CTRL_V3_TX_TS_BITS  (CTRL_V3_TS_BITS | TS_TX_EN)
-#define CTRL_V3_RX_TS_BITS  (CTRL_V3_TS_BITS | TS_RX_EN)
-
-/* Bit definitions for the CPSW2_TS_SEQ_MTYPE register */
-#define TS_SEQ_ID_OFFSET_SHIFT   (16)    /* Time Sync Sequence ID Offset */
-#define TS_SEQ_ID_OFFSET_MASK    (0x3f)
-#define TS_MSG_TYPE_EN_SHIFT     (0)     /* Time Sync Message Type Enable */
-#define TS_MSG_TYPE_EN_MASK      (0xffff)
-
-/* The PTP event messages - Sync, Delay_Req, Pdelay_Req, and Pdelay_Resp. */
-#define EVENT_MSG_BITS ((1<<0) | (1<<1) | (1<<2) | (1<<3))
-
-/* Bit definitions for the CPSW1_TS_CTL register */
-#define CPSW_V1_TS_RX_EN		BIT(0)
-#define CPSW_V1_TS_TX_EN		BIT(4)
-#define CPSW_V1_MSG_TYPE_OFS		16
-
-/* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */
-#define CPSW_V1_SEQ_ID_OFS_SHIFT	16
-
-#define CPSW_MAX_BLKS_TX		15
-#define CPSW_MAX_BLKS_TX_SHIFT		4
-#define CPSW_MAX_BLKS_RX		5
-
-struct cpsw_host_regs {
-	u32	max_blks;
-	u32	blk_cnt;
-	u32	tx_in_ctl;
-	u32	port_vlan;
-	u32	tx_pri_map;
-	u32	cpdma_tx_pri_map;
-	u32	cpdma_rx_chan_map;
-};
-
-struct cpsw_sliver_regs {
-	u32	id_ver;
-	u32	mac_control;
-	u32	mac_status;
-	u32	soft_reset;
-	u32	rx_maxlen;
-	u32	__reserved_0;
-	u32	rx_pause;
-	u32	tx_pause;
-	u32	__reserved_1;
-	u32	rx_pri_map;
-};
-
-struct cpsw_hw_stats {
-	u32	rxgoodframes;
-	u32	rxbroadcastframes;
-	u32	rxmulticastframes;
-	u32	rxpauseframes;
-	u32	rxcrcerrors;
-	u32	rxaligncodeerrors;
-	u32	rxoversizedframes;
-	u32	rxjabberframes;
-	u32	rxundersizedframes;
-	u32	rxfragments;
-	u32	__pad_0[2];
-	u32	rxoctets;
-	u32	txgoodframes;
-	u32	txbroadcastframes;
-	u32	txmulticastframes;
-	u32	txpauseframes;
-	u32	txdeferredframes;
-	u32	txcollisionframes;
-	u32	txsinglecollframes;
-	u32	txmultcollframes;
-	u32	txexcessivecollisions;
-	u32	txlatecollisions;
-	u32	txunderrun;
-	u32	txcarriersenseerrors;
-	u32	txoctets;
-	u32	octetframes64;
-	u32	octetframes65t127;
-	u32	octetframes128t255;
-	u32	octetframes256t511;
-	u32	octetframes512t1023;
-	u32	octetframes1024tup;
-	u32	netoctets;
-	u32	rxsofoverruns;
-	u32	rxmofoverruns;
-	u32	rxdmaoverruns;
-};
-
-struct cpsw_slave_data {
-	struct device_node *phy_node;
-	char		phy_id[MII_BUS_ID_SIZE];
-	int		phy_if;
-	u8		mac_addr[ETH_ALEN];
-	u16		dual_emac_res_vlan;	/* Reserved VLAN for DualEMAC */
-};
-
-struct cpsw_platform_data {
-	struct cpsw_slave_data	*slave_data;
-	u32	ss_reg_ofs;	/* Subsystem control register offset */
-	u32	channels;	/* number of cpdma channels (symmetric) */
-	u32	slaves;		/* number of slave cpgmac ports */
-	u32	active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
-	u32	ale_entries;	/* ale table size */
-	u32	bd_ram_size;  /*buffer descriptor ram size */
-	u32	mac_control;	/* Mac control register */
-	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
-	bool	dual_emac;	/* Enable Dual EMAC mode */
-};
-
-struct cpsw_slave {
-	void __iomem			*regs;
-	struct cpsw_sliver_regs __iomem	*sliver;
-	int				slave_num;
-	u32				mac_control;
-	struct cpsw_slave_data		*data;
-	struct phy_device		*phy;
-	struct net_device		*ndev;
-	u32				port_vlan;
-};
-
-static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
-{
-	return readl_relaxed(slave->regs + offset);
-}
-
-static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
-{
-	writel_relaxed(val, slave->regs + offset);
-}
-
-struct cpsw_vector {
-	struct cpdma_chan *ch;
-	int budget;
-};
-
-struct cpsw_common {
-	struct device			*dev;
-	struct cpsw_platform_data	data;
-	struct napi_struct		napi_rx;
-	struct napi_struct		napi_tx;
-	struct cpsw_ss_regs __iomem	*regs;
-	struct cpsw_wr_regs __iomem	*wr_regs;
-	u8 __iomem			*hw_stats;
-	struct cpsw_host_regs __iomem	*host_port_regs;
-	u32				version;
-	u32				coal_intvl;
-	u32				bus_freq_mhz;
-	int				rx_packet_max;
-	struct cpsw_slave		*slaves;
-	struct cpdma_ctlr		*dma;
-	struct cpsw_vector		txv[CPSW_MAX_QUEUES];
-	struct cpsw_vector		rxv[CPSW_MAX_QUEUES];
-	struct cpsw_ale			*ale;
-	bool				quirk_irq;
-	bool				rx_irq_disabled;
-	bool				tx_irq_disabled;
-	u32 irqs_table[IRQ_NUM];
-	struct cpts			*cpts;
-	int				rx_ch_num, tx_ch_num;
-	int				speed;
-	int				usage_count;
-};
-
-struct cpsw_priv {
-	struct net_device		*ndev;
-	struct device			*dev;
-	u32				msg_enable;
-	u8				mac_addr[ETH_ALEN];
-	bool				rx_pause;
-	bool				tx_pause;
-	bool				mqprio_hw;
-	int				fifo_bw[CPSW_TC_NUM];
-	int				shp_cfg_speed;
-	u32 emac_port;
-	struct cpsw_common *cpsw;
-};
-
-struct cpsw_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int type;
-	int sizeof_stat;
-	int stat_offset;
-};
-
-enum {
-	CPSW_STATS,
-	CPDMA_RX_STATS,
-	CPDMA_TX_STATS,
-};
-
-#define CPSW_STAT(m)		CPSW_STATS,				\
-				sizeof(((struct cpsw_hw_stats *)0)->m), \
-				offsetof(struct cpsw_hw_stats, m)
-#define CPDMA_RX_STAT(m)	CPDMA_RX_STATS,				   \
-				sizeof(((struct cpdma_chan_stats *)0)->m), \
-				offsetof(struct cpdma_chan_stats, m)
-#define CPDMA_TX_STAT(m)	CPDMA_TX_STATS,				   \
-				sizeof(((struct cpdma_chan_stats *)0)->m), \
-				offsetof(struct cpdma_chan_stats, m)
-
-static const struct cpsw_stats cpsw_gstrings_stats[] = {
-	{ "Good Rx Frames", CPSW_STAT(rxgoodframes) },
-	{ "Broadcast Rx Frames", CPSW_STAT(rxbroadcastframes) },
-	{ "Multicast Rx Frames", CPSW_STAT(rxmulticastframes) },
-	{ "Pause Rx Frames", CPSW_STAT(rxpauseframes) },
-	{ "Rx CRC Errors", CPSW_STAT(rxcrcerrors) },
-	{ "Rx Align/Code Errors", CPSW_STAT(rxaligncodeerrors) },
-	{ "Oversize Rx Frames", CPSW_STAT(rxoversizedframes) },
-	{ "Rx Jabbers", CPSW_STAT(rxjabberframes) },
-	{ "Undersize (Short) Rx Frames", CPSW_STAT(rxundersizedframes) },
-	{ "Rx Fragments", CPSW_STAT(rxfragments) },
-	{ "Rx Octets", CPSW_STAT(rxoctets) },
-	{ "Good Tx Frames", CPSW_STAT(txgoodframes) },
-	{ "Broadcast Tx Frames", CPSW_STAT(txbroadcastframes) },
-	{ "Multicast Tx Frames", CPSW_STAT(txmulticastframes) },
-	{ "Pause Tx Frames", CPSW_STAT(txpauseframes) },
-	{ "Deferred Tx Frames", CPSW_STAT(txdeferredframes) },
-	{ "Collisions", CPSW_STAT(txcollisionframes) },
-	{ "Single Collision Tx Frames", CPSW_STAT(txsinglecollframes) },
-	{ "Multiple Collision Tx Frames", CPSW_STAT(txmultcollframes) },
-	{ "Excessive Collisions", CPSW_STAT(txexcessivecollisions) },
-	{ "Late Collisions", CPSW_STAT(txlatecollisions) },
-	{ "Tx Underrun", CPSW_STAT(txunderrun) },
-	{ "Carrier Sense Errors", CPSW_STAT(txcarriersenseerrors) },
-	{ "Tx Octets", CPSW_STAT(txoctets) },
-	{ "Rx + Tx 64 Octet Frames", CPSW_STAT(octetframes64) },
-	{ "Rx + Tx 65-127 Octet Frames", CPSW_STAT(octetframes65t127) },
-	{ "Rx + Tx 128-255 Octet Frames", CPSW_STAT(octetframes128t255) },
-	{ "Rx + Tx 256-511 Octet Frames", CPSW_STAT(octetframes256t511) },
-	{ "Rx + Tx 512-1023 Octet Frames", CPSW_STAT(octetframes512t1023) },
-	{ "Rx + Tx 1024-Up Octet Frames", CPSW_STAT(octetframes1024tup) },
-	{ "Net Octets", CPSW_STAT(netoctets) },
-	{ "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) },
-	{ "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) },
-	{ "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) },
-};
-
-static const struct cpsw_stats cpsw_gstrings_ch_stats[] = {
-	{ "head_enqueue", CPDMA_RX_STAT(head_enqueue) },
-	{ "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
-	{ "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
-	{ "misqueued", CPDMA_RX_STAT(misqueued) },
-	{ "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
-	{ "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
-	{ "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
-	{ "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
-	{ "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
-	{ "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
-	{ "good_dequeue", CPDMA_RX_STAT(good_dequeue) },
-	{ "requeue", CPDMA_RX_STAT(requeue) },
-	{ "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
-};
-
-#define CPSW_STATS_COMMON_LEN	ARRAY_SIZE(cpsw_gstrings_stats)
-#define CPSW_STATS_CH_LEN	ARRAY_SIZE(cpsw_gstrings_ch_stats)
-
-#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
-#define napi_to_cpsw(napi)	container_of(napi, struct cpsw_common, napi)
 #define for_each_slave(priv, func, arg...)				\
 	do {								\
 		struct cpsw_slave *slave;				\
@@ -565,27 +82,13 @@
 				(func)(slave++, ##arg);			\
 	} while (0)
 
-static inline int cpsw_get_slave_port(u32 slave_num)
-{
-	return slave_num + 1;
-}
+#define CPSW_XMETA_OFFSET	ALIGN(sizeof(struct xdp_frame), sizeof(long))
 
-static void cpsw_add_mcast(struct cpsw_priv *priv, u8 *addr)
-{
-	struct cpsw_common *cpsw = priv->cpsw;
+#define CPSW_XDP_CONSUMED		1
+#define CPSW_XDP_PASS			0
 
-	if (cpsw->data.dual_emac) {
-		struct cpsw_slave *slave = cpsw->slaves + priv->emac_port;
-		int slave_port = cpsw_get_slave_port(slave->slave_num);
-
-		cpsw_ale_add_mcast(cpsw->ale, addr,
-				   1 << slave_port | ALE_PORT_HOST,
-				   ALE_VLAN, slave->port_vlan, 0);
-		return;
-	}
-
-	cpsw_ale_add_mcast(cpsw->ale, addr, ALE_ALL_PORTS, 0, 0, 0);
-}
+static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
+				    __be16 proto, u16 vid);
 
 static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 {
@@ -642,6 +145,7 @@
 
 			/* Clear all mcast from ALE */
 			cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS, -1);
+			__hw_addr_ref_unsync_dev(&ndev->mc, ndev, NULL);
 
 			/* Flood All Unicast Packets to Host port */
 			cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1);
@@ -662,21 +166,157 @@
 	}
 }
 
+/**
+ * cpsw_set_mc - adds multicast entry to the table if it's not added or deletes
+ * if it's not deleted
+ * @ndev: device to sync
+ * @addr: address to be added or deleted
+ * @vid: vlan id, if vid < 0 set/unset address for real device
+ * @add: add address if the flag is set or remove otherwise
+ */
+static int cpsw_set_mc(struct net_device *ndev, const u8 *addr,
+		       int vid, int add)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int mask, flags, ret;
+
+	if (vid < 0) {
+		if (cpsw->data.dual_emac)
+			vid = cpsw->slaves[priv->emac_port].port_vlan;
+		else
+			vid = 0;
+	}
+
+	mask = cpsw->data.dual_emac ? ALE_PORT_HOST : ALE_ALL_PORTS;
+	flags = vid ? ALE_VLAN : 0;
+
+	if (add)
+		ret = cpsw_ale_add_mcast(cpsw->ale, addr, mask, flags, vid, 0);
+	else
+		ret = cpsw_ale_del_mcast(cpsw->ale, addr, 0, flags, vid);
+
+	return ret;
+}
+
+static int cpsw_update_vlan_mc(struct net_device *vdev, int vid, void *ctx)
+{
+	struct addr_sync_ctx *sync_ctx = ctx;
+	struct netdev_hw_addr *ha;
+	int found = 0, ret = 0;
+
+	if (!vdev || !(vdev->flags & IFF_UP))
+		return 0;
+
+	/* vlan address is relevant if its sync_cnt != 0 */
+	netdev_for_each_mc_addr(ha, vdev) {
+		if (ether_addr_equal(ha->addr, sync_ctx->addr)) {
+			found = ha->sync_cnt;
+			break;
+		}
+	}
+
+	if (found)
+		sync_ctx->consumed++;
+
+	if (sync_ctx->flush) {
+		if (!found)
+			cpsw_set_mc(sync_ctx->ndev, sync_ctx->addr, vid, 0);
+		return 0;
+	}
+
+	if (found)
+		ret = cpsw_set_mc(sync_ctx->ndev, sync_ctx->addr, vid, 1);
+
+	return ret;
+}
+
+static int cpsw_add_mc_addr(struct net_device *ndev, const u8 *addr, int num)
+{
+	struct addr_sync_ctx sync_ctx;
+	int ret;
+
+	sync_ctx.consumed = 0;
+	sync_ctx.addr = addr;
+	sync_ctx.ndev = ndev;
+	sync_ctx.flush = 0;
+
+	ret = vlan_for_each(ndev, cpsw_update_vlan_mc, &sync_ctx);
+	if (sync_ctx.consumed < num && !ret)
+		ret = cpsw_set_mc(ndev, addr, -1, 1);
+
+	return ret;
+}
+
+static int cpsw_del_mc_addr(struct net_device *ndev, const u8 *addr, int num)
+{
+	struct addr_sync_ctx sync_ctx;
+
+	sync_ctx.consumed = 0;
+	sync_ctx.addr = addr;
+	sync_ctx.ndev = ndev;
+	sync_ctx.flush = 1;
+
+	vlan_for_each(ndev, cpsw_update_vlan_mc, &sync_ctx);
+	if (sync_ctx.consumed == num)
+		cpsw_set_mc(ndev, addr, -1, 0);
+
+	return 0;
+}
+
+static int cpsw_purge_vlan_mc(struct net_device *vdev, int vid, void *ctx)
+{
+	struct addr_sync_ctx *sync_ctx = ctx;
+	struct netdev_hw_addr *ha;
+	int found = 0;
+
+	if (!vdev || !(vdev->flags & IFF_UP))
+		return 0;
+
+	/* vlan address is relevant if its sync_cnt != 0 */
+	netdev_for_each_mc_addr(ha, vdev) {
+		if (ether_addr_equal(ha->addr, sync_ctx->addr)) {
+			found = ha->sync_cnt;
+			break;
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	sync_ctx->consumed++;
+	cpsw_set_mc(sync_ctx->ndev, sync_ctx->addr, vid, 0);
+	return 0;
+}
+
+static int cpsw_purge_all_mc(struct net_device *ndev, const u8 *addr, int num)
+{
+	struct addr_sync_ctx sync_ctx;
+
+	sync_ctx.addr = addr;
+	sync_ctx.ndev = ndev;
+	sync_ctx.consumed = 0;
+
+	vlan_for_each(ndev, cpsw_purge_vlan_mc, &sync_ctx);
+	if (sync_ctx.consumed < num)
+		cpsw_set_mc(ndev, addr, -1, 0);
+
+	return 0;
+}
+
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
-	int vid;
+	int slave_port = -1;
 
 	if (cpsw->data.dual_emac)
-		vid = cpsw->slaves[priv->emac_port].port_vlan;
-	else
-		vid = cpsw->data.default_vlan;
+		slave_port = priv->emac_port + 1;
 
 	if (ndev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cpsw_set_promiscious(ndev, true);
-		cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI);
+		cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI, slave_port);
 		return;
 	} else {
 		/* Disable promiscuous mode */
@@ -684,22 +324,15 @@
 	}
 
 	/* Restore allmulti on vlans if necessary */
-	cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI);
+	cpsw_ale_set_allmulti(cpsw->ale,
+			      ndev->flags & IFF_ALLMULTI, slave_port);
 
-	/* Clear all mcast from ALE */
-	cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid);
-
-	if (!netdev_mc_empty(ndev)) {
-		struct netdev_hw_addr *ha;
-
-		/* program multicast address list into ALE register */
-		netdev_for_each_mc_addr(ha, ndev) {
-			cpsw_add_mcast(priv, ha->addr);
-		}
-	}
+	/* add/remove mcast address either for real netdev or for vlan */
+	__hw_addr_ref_sync_dev(&ndev->mc, ndev, cpsw_add_mc_addr,
+			       cpsw_del_mc_addr);
 }
 
-static void cpsw_intr_enable(struct cpsw_common *cpsw)
+void cpsw_intr_enable(struct cpsw_common *cpsw)
 {
 	writel_relaxed(0xFF, &cpsw->wr_regs->tx_en);
 	writel_relaxed(0xFF, &cpsw->wr_regs->rx_en);
@@ -708,7 +341,7 @@
 	return;
 }
 
-static void cpsw_intr_disable(struct cpsw_common *cpsw)
+void cpsw_intr_disable(struct cpsw_common *cpsw)
 {
 	writel_relaxed(0, &cpsw->wr_regs->tx_en);
 	writel_relaxed(0, &cpsw->wr_regs->rx_en);
@@ -717,24 +350,58 @@
 	return;
 }
 
-static void cpsw_tx_handler(void *token, int len, int status)
+static int cpsw_is_xdpf_handle(void *handle)
 {
+	return (unsigned long)handle & BIT(0);
+}
+
+static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf)
+{
+	return (void *)((unsigned long)xdpf | BIT(0));
+}
+
+static struct xdp_frame *cpsw_handle_to_xdpf(void *handle)
+{
+	return (struct xdp_frame *)((unsigned long)handle & ~BIT(0));
+}
+
+struct __aligned(sizeof(long)) cpsw_meta_xdp {
+	struct net_device *ndev;
+	int ch;
+};
+
+void cpsw_tx_handler(void *token, int len, int status)
+{
+	struct cpsw_meta_xdp	*xmeta;
+	struct xdp_frame	*xdpf;
+	struct net_device	*ndev;
 	struct netdev_queue	*txq;
-	struct sk_buff		*skb = token;
-	struct net_device	*ndev = skb->dev;
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct sk_buff		*skb;
+	int			ch;
+
+	if (cpsw_is_xdpf_handle(token)) {
+		xdpf = cpsw_handle_to_xdpf(token);
+		xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
+		ndev = xmeta->ndev;
+		ch = xmeta->ch;
+		xdp_return_frame(xdpf);
+	} else {
+		skb = token;
+		ndev = skb->dev;
+		ch = skb_get_queue_mapping(skb);
+		cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb);
+		dev_kfree_skb_any(skb);
+	}
 
 	/* Check whether the queue is stopped due to stalled tx dma, if the
 	 * queue is stopped then start the queue as we have free desc for tx
 	 */
-	txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+	txq = netdev_get_tx_queue(ndev, ch);
 	if (unlikely(netif_tx_queue_stopped(txq)))
 		netif_tx_wake_queue(txq);
 
-	cpts_tx_timestamp(cpsw->cpts, skb);
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += len;
-	dev_kfree_skb_any(skb);
 }
 
 static void cpsw_rx_vlan_encap(struct sk_buff *skb)
@@ -780,23 +447,252 @@
 	}
 }
 
-static void cpsw_rx_handler(void *token, int len, int status)
+static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf,
+			     struct page *page)
 {
-	struct cpdma_chan	*ch;
-	struct sk_buff		*skb = token;
-	struct sk_buff		*new_skb;
-	struct net_device	*ndev = skb->dev;
-	int			ret = 0, port;
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_meta_xdp *xmeta;
+	struct cpdma_chan *txch;
+	dma_addr_t dma;
+	int ret, port;
 
-	if (cpsw->data.dual_emac) {
-		port = CPDMA_RX_SOURCE_PORT(status);
-		if (port) {
-			ndev = cpsw->slaves[--port].ndev;
-			skb->dev = ndev;
+	xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
+	xmeta->ndev = priv->ndev;
+	xmeta->ch = 0;
+	txch = cpsw->txv[0].ch;
+
+	port = priv->emac_port + cpsw->data.dual_emac;
+	if (page) {
+		dma = page_pool_get_dma_addr(page);
+		dma += xdpf->headroom + sizeof(struct xdp_frame);
+		ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf),
+					       dma, xdpf->len, port);
+	} else {
+		if (sizeof(*xmeta) > xdpf->headroom) {
+			xdp_return_frame_rx_napi(xdpf);
+			return -EINVAL;
+		}
+
+		ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf),
+					xdpf->data, xdpf->len, port);
+	}
+
+	if (ret) {
+		priv->ndev->stats.tx_dropped++;
+		xdp_return_frame_rx_napi(xdpf);
+	}
+
+	return ret;
+}
+
+static int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
+			struct page *page)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct net_device *ndev = priv->ndev;
+	int ret = CPSW_XDP_CONSUMED;
+	struct xdp_frame *xdpf;
+	struct bpf_prog *prog;
+	u32 act;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(priv->xdp_prog);
+	if (!prog) {
+		ret = CPSW_XDP_PASS;
+		goto out;
+	}
+
+	act = bpf_prog_run_xdp(prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		ret = CPSW_XDP_PASS;
+		break;
+	case XDP_TX:
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf))
+			goto drop;
+
+		cpsw_xdp_tx_frame(priv, xdpf, page);
+		break;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(ndev, xdp, prog))
+			goto drop;
+
+		/*  Have to flush here, per packet, instead of doing it in bulk
+		 *  at the end of the napi handler. The RX devices on this
+		 *  particular hardware is sharing a common queue, so the
+		 *  incoming device might change per packet.
+		 */
+		xdp_do_flush_map();
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(ndev, prog, act);
+		/* fall through -- handle aborts by dropping packet */
+	case XDP_DROP:
+		goto drop;
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+drop:
+	rcu_read_unlock();
+	page_pool_recycle_direct(cpsw->page_pool[ch], page);
+	return ret;
+}
+
+static unsigned int cpsw_rxbuf_total_len(unsigned int len)
+{
+	len += CPSW_HEADROOM;
+	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	return SKB_DATA_ALIGN(len);
+}
+
+static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
+					       int size)
+{
+	struct page_pool_params pp_params;
+	struct page_pool *pool;
+
+	pp_params.order = 0;
+	pp_params.flags = PP_FLAG_DMA_MAP;
+	pp_params.pool_size = size;
+	pp_params.nid = NUMA_NO_NODE;
+	pp_params.dma_dir = DMA_BIDIRECTIONAL;
+	pp_params.dev = cpsw->dev;
+
+	pool = page_pool_create(&pp_params);
+	if (IS_ERR(pool))
+		dev_err(cpsw->dev, "cannot create rx page pool\n");
+
+	return pool;
+}
+
+static int cpsw_ndev_create_xdp_rxq(struct cpsw_priv *priv, int ch)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct xdp_rxq_info *rxq;
+	struct page_pool *pool;
+	int ret;
+
+	pool = cpsw->page_pool[ch];
+	rxq = &priv->xdp_rxq[ch];
+
+	ret = xdp_rxq_info_reg(rxq, priv->ndev, ch);
+	if (ret)
+		return ret;
+
+	ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
+	if (ret)
+		xdp_rxq_info_unreg(rxq);
+
+	return ret;
+}
+
+static void cpsw_ndev_destroy_xdp_rxq(struct cpsw_priv *priv, int ch)
+{
+	struct xdp_rxq_info *rxq = &priv->xdp_rxq[ch];
+
+	if (!xdp_rxq_info_is_reg(rxq))
+		return;
+
+	xdp_rxq_info_unreg(rxq);
+}
+
+static int cpsw_create_rx_pool(struct cpsw_common *cpsw, int ch)
+{
+	struct page_pool *pool;
+	int ret = 0, pool_size;
+
+	pool_size = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
+	pool = cpsw_create_page_pool(cpsw, pool_size);
+	if (IS_ERR(pool))
+		ret = PTR_ERR(pool);
+	else
+		cpsw->page_pool[ch] = pool;
+
+	return ret;
+}
+
+void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i, ch;
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			ndev = cpsw->slaves[i].ndev;
+			if (!ndev)
+				continue;
+
+			cpsw_ndev_destroy_xdp_rxq(netdev_priv(ndev), ch);
+		}
+
+		page_pool_destroy(cpsw->page_pool[ch]);
+		cpsw->page_pool[ch] = NULL;
+	}
+}
+
+int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i, ch, ret;
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		ret = cpsw_create_rx_pool(cpsw, ch);
+		if (ret)
+			goto err_cleanup;
+
+		/* using same page pool is allowed as no running rx handlers
+		 * simultaneously for both ndevs
+		 */
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			ndev = cpsw->slaves[i].ndev;
+			if (!ndev)
+				continue;
+
+			ret = cpsw_ndev_create_xdp_rxq(netdev_priv(ndev), ch);
+			if (ret)
+				goto err_cleanup;
 		}
 	}
 
+	return 0;
+
+err_cleanup:
+	cpsw_destroy_xdp_rxqs(cpsw);
+
+	return ret;
+}
+
+static void cpsw_rx_handler(void *token, int len, int status)
+{
+	struct page		*new_page, *page = token;
+	void			*pa = page_address(page);
+	struct cpsw_meta_xdp	*xmeta = pa + CPSW_XMETA_OFFSET;
+	struct cpsw_common	*cpsw = ndev_to_cpsw(xmeta->ndev);
+	int			pkt_size = cpsw->rx_packet_max;
+	int			ret = 0, port, ch = xmeta->ch;
+	int			headroom = CPSW_HEADROOM;
+	struct net_device	*ndev = xmeta->ndev;
+	struct cpsw_priv	*priv;
+	struct page_pool	*pool;
+	struct sk_buff		*skb;
+	struct xdp_buff		xdp;
+	dma_addr_t		dma;
+
+	if (cpsw->data.dual_emac && status >= 0) {
+		port = CPDMA_RX_SOURCE_PORT(status);
+		if (port)
+			ndev = cpsw->slaves[--port].ndev;
+	}
+
+	priv = netdev_priv(ndev);
+	pool = cpsw->page_pool[ch];
 	if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
 		/* In dual emac mode check for all interfaces */
 		if (cpsw->data.dual_emac && cpsw->usage_count &&
@@ -805,52 +701,93 @@
 			 * is already down and the other interface is up
 			 * and running, instead of freeing which results
 			 * in reducing of the number of rx descriptor in
-			 * DMA engine, requeue skb back to cpdma.
+			 * DMA engine, requeue page back to cpdma.
 			 */
-			new_skb = skb;
+			new_page = page;
 			goto requeue;
 		}
 
-		/* the interface is going down, skbs are purged */
-		dev_kfree_skb_any(skb);
+		/* the interface is going down, pages are purged */
+		page_pool_recycle_direct(pool, page);
 		return;
 	}
 
-	new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
-	if (new_skb) {
-		skb_copy_queue_mapping(new_skb, skb);
-		skb_put(skb, len);
-		if (status & CPDMA_RX_VLAN_ENCAP)
-			cpsw_rx_vlan_encap(skb);
-		cpts_rx_timestamp(cpsw->cpts, skb);
-		skb->protocol = eth_type_trans(skb, ndev);
-		netif_receive_skb(skb);
-		ndev->stats.rx_bytes += len;
-		ndev->stats.rx_packets++;
-		kmemleak_not_leak(new_skb);
-	} else {
+	new_page = page_pool_dev_alloc_pages(pool);
+	if (unlikely(!new_page)) {
+		new_page = page;
 		ndev->stats.rx_dropped++;
-		new_skb = skb;
+		goto requeue;
 	}
 
+	if (priv->xdp_prog) {
+		if (status & CPDMA_RX_VLAN_ENCAP) {
+			xdp.data = pa + CPSW_HEADROOM +
+				   CPSW_RX_VLAN_ENCAP_HDR_SIZE;
+			xdp.data_end = xdp.data + len -
+				       CPSW_RX_VLAN_ENCAP_HDR_SIZE;
+		} else {
+			xdp.data = pa + CPSW_HEADROOM;
+			xdp.data_end = xdp.data + len;
+		}
+
+		xdp_set_data_meta_invalid(&xdp);
+
+		xdp.data_hard_start = pa;
+		xdp.rxq = &priv->xdp_rxq[ch];
+
+		ret = cpsw_run_xdp(priv, ch, &xdp, page);
+		if (ret != CPSW_XDP_PASS)
+			goto requeue;
+
+		/* XDP prog might have changed packet data and boundaries */
+		len = xdp.data_end - xdp.data;
+		headroom = xdp.data - xdp.data_hard_start;
+
+		/* XDP prog can modify vlan tag, so can't use encap header */
+		status &= ~CPDMA_RX_VLAN_ENCAP;
+	}
+
+	/* pass skb to netstack if no XDP prog or returned XDP_PASS */
+	skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size));
+	if (!skb) {
+		ndev->stats.rx_dropped++;
+		page_pool_recycle_direct(pool, page);
+		goto requeue;
+	}
+
+	skb_reserve(skb, headroom);
+	skb_put(skb, len);
+	skb->dev = ndev;
+	if (status & CPDMA_RX_VLAN_ENCAP)
+		cpsw_rx_vlan_encap(skb);
+	if (priv->rx_ts_enabled)
+		cpts_rx_timestamp(cpsw->cpts, skb);
+	skb->protocol = eth_type_trans(skb, ndev);
+
+	/* unmap page as no netstack skb page recycling */
+	page_pool_release_page(pool, page);
+	netif_receive_skb(skb);
+
+	ndev->stats.rx_bytes += len;
+	ndev->stats.rx_packets++;
+
 requeue:
-	if (netif_dormant(ndev)) {
-		dev_kfree_skb_any(new_skb);
-		return;
-	}
+	xmeta = page_address(new_page) + CPSW_XMETA_OFFSET;
+	xmeta->ndev = ndev;
+	xmeta->ch = ch;
 
-	ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
-	ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
-				skb_tailroom(new_skb), 0);
-	if (WARN_ON(ret < 0))
-		dev_kfree_skb_any(new_skb);
+	dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+	ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
+				       pkt_size, 0);
+	if (ret < 0) {
+		WARN_ON(ret == -ENOMEM);
+		page_pool_recycle_direct(pool, new_page);
+	}
 }
 
-static void cpsw_split_res(struct net_device *ndev)
+void cpsw_split_res(struct cpsw_common *cpsw)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
 	u32 consumed_rate = 0, bigest_rate = 0;
-	struct cpsw_common *cpsw = priv->cpsw;
 	struct cpsw_vector *txv = cpsw->txv;
 	int i, ch_weight, rlim_ch_num = 0;
 	int budget, bigest_rate_ch = 0;
@@ -1130,30 +1067,33 @@
 	slave_port = cpsw_get_slave_port(slave->slave_num);
 
 	if (phy->link) {
-		mac_control = cpsw->data.mac_control;
+		mac_control = CPSW_SL_CTL_GMII_EN;
+
+		if (phy->speed == 1000)
+			mac_control |= CPSW_SL_CTL_GIG;
+		if (phy->duplex)
+			mac_control |= CPSW_SL_CTL_FULLDUPLEX;
+
+		/* set speed_in input in case RMII mode is used in 100Mbps */
+		if (phy->speed == 100)
+			mac_control |= CPSW_SL_CTL_IFCTL_A;
+		/* in band mode only works in 10Mbps RGMII mode */
+		else if ((phy->speed == 10) && phy_interface_is_rgmii(phy))
+			mac_control |= CPSW_SL_CTL_EXT_EN; /* In Band mode */
+
+		if (priv->rx_pause)
+			mac_control |= CPSW_SL_CTL_RX_FLOW_EN;
+
+		if (priv->tx_pause)
+			mac_control |= CPSW_SL_CTL_TX_FLOW_EN;
+
+		if (mac_control != slave->mac_control)
+			cpsw_sl_ctl_set(slave->mac_sl, mac_control);
 
 		/* enable forwarding */
 		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-		if (phy->speed == 1000)
-			mac_control |= BIT(7);	/* GIGABITEN	*/
-		if (phy->duplex)
-			mac_control |= BIT(0);	/* FULLDUPLEXEN	*/
-
-		/* set speed_in input in case RMII mode is used in 100Mbps */
-		if (phy->speed == 100)
-			mac_control |= BIT(15);
-		/* in band mode only works in 10Mbps RGMII mode */
-		else if ((phy->speed == 10) && phy_interface_is_rgmii(phy))
-			mac_control |= BIT(18); /* In Band mode */
-
-		if (priv->rx_pause)
-			mac_control |= BIT(3);
-
-		if (priv->tx_pause)
-			mac_control |= BIT(4);
-
 		*link = true;
 
 		if (priv->shp_cfg_speed &&
@@ -1166,12 +1106,14 @@
 		/* disable forwarding */
 		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
+
+		cpsw_sl_wait_for_idle(slave->mac_sl, 100);
+
+		cpsw_sl_ctl_reset(slave->mac_sl);
 	}
 
-	if (mac_control != slave->mac_control) {
+	if (mac_control != slave->mac_control)
 		phy_print_status(phy);
-		writel_relaxed(mac_control, &slave->sliver->mac_control);
-	}
 
 	slave->mac_control = mac_control;
 }
@@ -1224,7 +1166,7 @@
 
 	if (link) {
 		if (cpsw_need_resplit(cpsw))
-			cpsw_split_res(ndev);
+			cpsw_split_res(cpsw);
 
 		netif_carrier_on(ndev);
 		if (netif_running(ndev))
@@ -1235,167 +1177,6 @@
 	}
 }
 
-static int cpsw_get_coalesce(struct net_device *ndev,
-				struct ethtool_coalesce *coal)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	coal->rx_coalesce_usecs = cpsw->coal_intvl;
-	return 0;
-}
-
-static int cpsw_set_coalesce(struct net_device *ndev,
-				struct ethtool_coalesce *coal)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	u32 int_ctrl;
-	u32 num_interrupts = 0;
-	u32 prescale = 0;
-	u32 addnl_dvdr = 1;
-	u32 coal_intvl = 0;
-	struct cpsw_common *cpsw = priv->cpsw;
-
-	coal_intvl = coal->rx_coalesce_usecs;
-
-	int_ctrl =  readl(&cpsw->wr_regs->int_control);
-	prescale = cpsw->bus_freq_mhz * 4;
-
-	if (!coal->rx_coalesce_usecs) {
-		int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN);
-		goto update_return;
-	}
-
-	if (coal_intvl < CPSW_CMINTMIN_INTVL)
-		coal_intvl = CPSW_CMINTMIN_INTVL;
-
-	if (coal_intvl > CPSW_CMINTMAX_INTVL) {
-		/* Interrupt pacer works with 4us Pulse, we can
-		 * throttle further by dilating the 4us pulse.
-		 */
-		addnl_dvdr = CPSW_INTPRESCALE_MASK / prescale;
-
-		if (addnl_dvdr > 1) {
-			prescale *= addnl_dvdr;
-			if (coal_intvl > (CPSW_CMINTMAX_INTVL * addnl_dvdr))
-				coal_intvl = (CPSW_CMINTMAX_INTVL
-						* addnl_dvdr);
-		} else {
-			addnl_dvdr = 1;
-			coal_intvl = CPSW_CMINTMAX_INTVL;
-		}
-	}
-
-	num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
-	writel(num_interrupts, &cpsw->wr_regs->rx_imax);
-	writel(num_interrupts, &cpsw->wr_regs->tx_imax);
-
-	int_ctrl |= CPSW_INTPACEEN;
-	int_ctrl &= (~CPSW_INTPRESCALE_MASK);
-	int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
-
-update_return:
-	writel(int_ctrl, &cpsw->wr_regs->int_control);
-
-	cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
-	cpsw->coal_intvl = coal_intvl;
-
-	return 0;
-}
-
-static int cpsw_get_sset_count(struct net_device *ndev, int sset)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	switch (sset) {
-	case ETH_SS_STATS:
-		return (CPSW_STATS_COMMON_LEN +
-		       (cpsw->rx_ch_num + cpsw->tx_ch_num) *
-		       CPSW_STATS_CH_LEN);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
-{
-	int ch_stats_len;
-	int line;
-	int i;
-
-	ch_stats_len = CPSW_STATS_CH_LEN * ch_num;
-	for (i = 0; i < ch_stats_len; i++) {
-		line = i % CPSW_STATS_CH_LEN;
-		snprintf(*p, ETH_GSTRING_LEN,
-			 "%s DMA chan %ld: %s", rx_dir ? "Rx" : "Tx",
-			 (long)(i / CPSW_STATS_CH_LEN),
-			 cpsw_gstrings_ch_stats[line].stat_string);
-		*p += ETH_GSTRING_LEN;
-	}
-}
-
-static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	u8 *p = data;
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) {
-			memcpy(p, cpsw_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-
-		cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1);
-		cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0);
-		break;
-	}
-}
-
-static void cpsw_get_ethtool_stats(struct net_device *ndev,
-				    struct ethtool_stats *stats, u64 *data)
-{
-	u8 *p;
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	struct cpdma_chan_stats ch_stats;
-	int i, l, ch;
-
-	/* Collect Davinci CPDMA stats for Rx and Tx Channel */
-	for (l = 0; l < CPSW_STATS_COMMON_LEN; l++)
-		data[l] = readl(cpsw->hw_stats +
-				cpsw_gstrings_stats[l].stat_offset);
-
-	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
-		cpdma_chan_get_stats(cpsw->rxv[ch].ch, &ch_stats);
-		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
-			p = (u8 *)&ch_stats +
-				cpsw_gstrings_ch_stats[i].stat_offset;
-			data[l] = *(u32 *)p;
-		}
-	}
-
-	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
-		cpdma_chan_get_stats(cpsw->txv[ch].ch, &ch_stats);
-		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
-			p = (u8 *)&ch_stats +
-				cpsw_gstrings_ch_stats[i].stat_offset;
-			data[l] = *(u32 *)p;
-		}
-	}
-}
-
-static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
-					struct sk_buff *skb,
-					struct cpdma_chan *txch)
-{
-	struct cpsw_common *cpsw = priv->cpsw;
-
-	skb_tx_timestamp(skb);
-	return cpdma_chan_submit(txch, skb, skb->data, skb->len,
-				 priv->emac_port + cpsw->data.dual_emac);
-}
-
 static inline void cpsw_add_dual_emac_def_ale_entries(
 		struct cpsw_priv *priv, struct cpsw_slave *slave,
 		u32 slave_port)
@@ -1410,7 +1191,7 @@
 	cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask,
 			  port_mask, port_mask, 0);
 	cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
-			   port_mask, ALE_VLAN, slave->port_vlan, 0);
+			   ALE_PORT_HOST, ALE_VLAN, slave->port_vlan, 0);
 	cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
 			   HOST_PORT_NUM, ALE_VLAN |
 			   ALE_SECURE, slave->port_vlan);
@@ -1418,24 +1199,18 @@
 			     ALE_PORT_DROP_UNKNOWN_VLAN, 1);
 }
 
-static void soft_reset_slave(struct cpsw_slave *slave)
-{
-	char name[32];
-
-	snprintf(name, sizeof(name), "slave-%d", slave->slave_num);
-	soft_reset(name, &slave->sliver->soft_reset);
-}
-
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	u32 slave_port;
 	struct phy_device *phy;
 	struct cpsw_common *cpsw = priv->cpsw;
 
-	soft_reset_slave(slave);
+	cpsw_sl_reset(slave->mac_sl, 100);
+	cpsw_sl_ctl_reset(slave->mac_sl);
 
 	/* setup priority mapping */
-	writel_relaxed(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
+	cpsw_sl_reg_write(slave->mac_sl, CPSW_SL_RX_PRI_MAP,
+			  RX_PRIORITY_MAPPING);
 
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
@@ -1461,7 +1236,8 @@
 	}
 
 	/* setup max packet size, and mac address */
-	writel_relaxed(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
+	cpsw_sl_reg_write(slave->mac_sl, CPSW_SL_RX_MAXLEN,
+			  cpsw->rx_packet_max);
 	cpsw_set_slave_mac(slave, priv);
 
 	slave->mac_control = 0;	/* no link yet */
@@ -1502,7 +1278,12 @@
 	phy_start(slave->phy);
 
 	/* Configure GMII_SEL register */
-	cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num);
+	if (!IS_ERR(slave->data->ifphy))
+		phy_set_mode_ext(slave->data->ifphy, PHY_MODE_ETHERNET,
+				 slave->data->phy_if);
+	else
+		cpsw_phy_sel(cpsw->dev, slave->phy->interface,
+			     slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
@@ -1567,36 +1348,42 @@
 	}
 }
 
-static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
+int cpsw_fill_rx_channels(struct cpsw_priv *priv)
 {
 	struct cpsw_common *cpsw = priv->cpsw;
-	struct sk_buff *skb;
+	struct cpsw_meta_xdp *xmeta;
+	struct page_pool *pool;
+	struct page *page;
 	int ch_buf_num;
 	int ch, i, ret;
+	dma_addr_t dma;
 
 	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		pool = cpsw->page_pool[ch];
 		ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
 		for (i = 0; i < ch_buf_num; i++) {
-			skb = __netdev_alloc_skb_ip_align(priv->ndev,
-							  cpsw->rx_packet_max,
-							  GFP_KERNEL);
-			if (!skb) {
-				cpsw_err(priv, ifup, "cannot allocate skb\n");
+			page = page_pool_dev_alloc_pages(pool);
+			if (!page) {
+				cpsw_err(priv, ifup, "allocate rx page err\n");
 				return -ENOMEM;
 			}
 
-			skb_set_queue_mapping(skb, ch);
-			ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb,
-						skb->data, skb_tailroom(skb),
-						0);
+			xmeta = page_address(page) + CPSW_XMETA_OFFSET;
+			xmeta->ndev = priv->ndev;
+			xmeta->ch = ch;
+
+			dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
+			ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
+							    page, dma,
+							    cpsw->rx_packet_max,
+							    0);
 			if (ret < 0) {
 				cpsw_err(priv, ifup,
-					 "cannot submit skb to channel %d rx, error %d\n",
+					 "cannot submit page to channel %d rx, error %d\n",
 					 ch, ret);
-				kfree_skb(skb);
+				page_pool_recycle_direct(pool, page);
 				return ret;
 			}
-			kmemleak_not_leak(skb);
 		}
 
 		cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
@@ -1619,7 +1406,8 @@
 	slave->phy = NULL;
 	cpsw_ale_control_set(cpsw->ale, slave_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
-	soft_reset_slave(slave);
+	cpsw_sl_reset(slave->mac_sl, 100);
+	cpsw_sl_ctl_reset(slave->mac_sl);
 }
 
 static int cpsw_tc_to_fifo(int tc, int num_tc)
@@ -1837,9 +1625,23 @@
 	slave_write(slave, tx_prio_map, tx_prio_rg);
 }
 
+static int cpsw_restore_vlans(struct net_device *vdev, int vid, void *arg)
+{
+	struct cpsw_priv *priv = arg;
+
+	if (!vdev)
+		return 0;
+
+	cpsw_ndo_vlan_rx_add_vid(priv->ndev, 0, vid);
+	return 0;
+}
+
 /* restore resources after port reset */
 static void cpsw_restore(struct cpsw_priv *priv)
 {
+	/* restore vlan configurations */
+	vlan_for_each(priv->ndev, cpsw_restore_vlans, priv);
+
 	/* restore MQPRIO offload */
 	for_each_slave(priv, cpsw_mqprio_resume, priv);
 
@@ -1917,6 +1719,13 @@
 			enable_irq(cpsw->irqs_table[0]);
 		}
 
+		/* create rxqs for both infs in dual mac as they use same pool
+		 * and must be destroyed together when no users.
+		 */
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret < 0)
+			goto err_cleanup;
+
 		ret = cpsw_fill_rx_channels(priv);
 		if (ret < 0)
 			goto err_cleanup;
@@ -1943,7 +1752,11 @@
 	return 0;
 
 err_cleanup:
-	cpdma_ctlr_stop(cpsw->dma);
+	if (!cpsw->usage_count) {
+		cpdma_ctlr_stop(cpsw->dma);
+		cpsw_destroy_xdp_rxqs(cpsw);
+	}
+
 	for_each_slave(priv, cpsw_slave_stop, cpsw);
 	pm_runtime_put_sync(cpsw->dev);
 	netif_carrier_off(priv->ndev);
@@ -1956,6 +1769,7 @@
 	struct cpsw_common *cpsw = priv->cpsw;
 
 	cpsw_info(priv, ifdown, "shutting down cpsw device\n");
+	__hw_addr_ref_unsync_dev(&ndev->mc, ndev, cpsw_purge_all_mc);
 	netif_tx_stop_all_queues(priv->ndev);
 	netif_carrier_off(priv->ndev);
 
@@ -1966,11 +1780,12 @@
 		cpsw_intr_disable(cpsw);
 		cpdma_ctlr_stop(cpsw->dma);
 		cpsw_ale_stop(cpsw->ale);
+		cpsw_destroy_xdp_rxqs(cpsw);
 	}
 	for_each_slave(priv, cpsw_slave_stop, cpsw);
 
 	if (cpsw_need_resplit(cpsw))
-		cpsw_split_res(ndev);
+		cpsw_split_res(cpsw);
 
 	cpsw->usage_count--;
 	pm_runtime_put_sync(cpsw->dev);
@@ -1994,7 +1809,7 @@
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-	    cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb))
+	    priv->tx_ts_enabled && cpts_can_timestamp(cpts, skb))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	q_idx = skb_get_queue_mapping(skb);
@@ -2003,7 +1818,9 @@
 
 	txch = cpsw->txv[q_idx].ch;
 	txq = netdev_get_tx_queue(ndev, q_idx);
-	ret = cpsw_tx_packet_submit(priv, skb, txch);
+	skb_tx_timestamp(skb);
+	ret = cpdma_chan_submit(txch, skb, skb->data, skb->len,
+				priv->emac_port + cpsw->data.dual_emac);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
 		goto fail;
@@ -2038,13 +1855,13 @@
 
 #if IS_ENABLED(CONFIG_TI_CPTS)
 
-static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
+static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
 {
+	struct cpsw_common *cpsw = priv->cpsw;
 	struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
 	u32 ts_en, seq_id;
 
-	if (!cpts_is_tx_enabled(cpsw->cpts) &&
-	    !cpts_is_rx_enabled(cpsw->cpts)) {
+	if (!priv->tx_ts_enabled && !priv->rx_ts_enabled) {
 		slave_write(slave, 0, CPSW1_TS_CTL);
 		return;
 	}
@@ -2052,10 +1869,10 @@
 	seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
 	ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-	if (cpts_is_tx_enabled(cpsw->cpts))
+	if (priv->tx_ts_enabled)
 		ts_en |= CPSW_V1_TS_TX_EN;
 
-	if (cpts_is_rx_enabled(cpsw->cpts))
+	if (priv->rx_ts_enabled)
 		ts_en |= CPSW_V1_TS_RX_EN;
 
 	slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -2075,20 +1892,20 @@
 	case CPSW_VERSION_2:
 		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-		if (cpts_is_tx_enabled(cpsw->cpts))
+		if (priv->tx_ts_enabled)
 			ctrl |= CTRL_V2_TX_TS_BITS;
 
-		if (cpts_is_rx_enabled(cpsw->cpts))
+		if (priv->rx_ts_enabled)
 			ctrl |= CTRL_V2_RX_TS_BITS;
 		break;
 	case CPSW_VERSION_3:
 	default:
 		ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-		if (cpts_is_tx_enabled(cpsw->cpts))
+		if (priv->tx_ts_enabled)
 			ctrl |= CTRL_V3_TX_TS_BITS;
 
-		if (cpts_is_rx_enabled(cpsw->cpts))
+		if (priv->rx_ts_enabled)
 			ctrl |= CTRL_V3_RX_TS_BITS;
 		break;
 	}
@@ -2098,6 +1915,7 @@
 	slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE);
 	slave_write(slave, ctrl, CPSW2_CONTROL);
 	writel_relaxed(ETH_P_1588, &cpsw->regs->ts_ltype);
+	writel_relaxed(ETH_P_8021Q, &cpsw->regs->vlan_ltype);
 }
 
 static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
@@ -2105,7 +1923,6 @@
 	struct cpsw_priv *priv = netdev_priv(dev);
 	struct hwtstamp_config cfg;
 	struct cpsw_common *cpsw = priv->cpsw;
-	struct cpts *cpts = cpsw->cpts;
 
 	if (cpsw->version != CPSW_VERSION_1 &&
 	    cpsw->version != CPSW_VERSION_2 &&
@@ -2124,7 +1941,7 @@
 
 	switch (cfg.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		cpts_rx_enable(cpts, 0);
+		priv->rx_ts_enabled = 0;
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_NTP_ALL:
@@ -2132,7 +1949,7 @@
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V1_L4_EVENT);
+		priv->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
@@ -2144,18 +1961,18 @@
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V2_EVENT);
+		priv->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
 		return -ERANGE;
 	}
 
-	cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
+	priv->tx_ts_enabled = cfg.tx_type == HWTSTAMP_TX_ON;
 
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
-		cpsw_hwtstamp_v1(cpsw);
+		cpsw_hwtstamp_v1(priv);
 		break;
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
@@ -2171,7 +1988,7 @@
 static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 {
 	struct cpsw_common *cpsw = ndev_to_cpsw(dev);
-	struct cpts *cpts = cpsw->cpts;
+	struct cpsw_priv *priv = netdev_priv(dev);
 	struct hwtstamp_config cfg;
 
 	if (cpsw->version != CPSW_VERSION_1 &&
@@ -2180,10 +1997,8 @@
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
-	cfg.tx_type = cpts_is_tx_enabled(cpts) ?
-		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-	cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
-			 cpts->rx_enable : HWTSTAMP_FILTER_NONE);
+	cfg.tx_type = priv->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = priv->rx_ts_enabled;
 
 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
@@ -2276,33 +2091,24 @@
 	return 0;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void cpsw_ndo_poll_controller(struct net_device *ndev)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	cpsw_intr_disable(cpsw);
-	cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw);
-	cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw);
-	cpsw_intr_enable(cpsw);
-}
-#endif
-
 static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
 				unsigned short vid)
 {
 	int ret;
 	int unreg_mcast_mask = 0;
+	int mcast_mask;
 	u32 port_mask;
 	struct cpsw_common *cpsw = priv->cpsw;
 
 	if (cpsw->data.dual_emac) {
 		port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST;
 
+		mcast_mask = ALE_PORT_HOST;
 		if (priv->ndev->flags & IFF_ALLMULTI)
-			unreg_mcast_mask = port_mask;
+			unreg_mcast_mask = mcast_mask;
 	} else {
 		port_mask = ALE_ALL_PORTS;
+		mcast_mask = port_mask;
 
 		if (priv->ndev->flags & IFF_ALLMULTI)
 			unreg_mcast_mask = ALE_ALL_PORTS;
@@ -2321,7 +2127,7 @@
 		goto clean_vid;
 
 	ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
-				 port_mask, ALE_VLAN, vid, 0);
+				 mcast_mask, ALE_VLAN, vid, 0);
 	if (ret != 0)
 		goto clean_vlan_ucast;
 	return 0;
@@ -2403,6 +2209,7 @@
 				  HOST_PORT_NUM, ALE_VLAN, vid);
 	ret |= cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
 				  0, ALE_VLAN, vid);
+	ret |= cpsw_ale_flush_multicast(cpsw->ale, 0, vid);
 err:
 	pm_runtime_put(cpsw->dev);
 	return ret;
@@ -2455,7 +2262,7 @@
 		netdev_get_tx_queue(slave->ndev, queue)->tx_maxrate = rate;
 	}
 
-	cpsw_split_res(ndev);
+	cpsw_split_res(cpsw);
 	return ret;
 }
 
@@ -2531,6 +2338,76 @@
 	}
 }
 
+static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf)
+{
+	struct bpf_prog *prog = bpf->prog;
+
+	if (!priv->xdpi.prog && !prog)
+		return 0;
+
+	if (!xdp_attachment_flags_ok(&priv->xdpi, bpf))
+		return -EBUSY;
+
+	WRITE_ONCE(priv->xdp_prog, prog);
+
+	xdp_attachment_setup(&priv->xdpi, bpf);
+
+	return 0;
+}
+
+static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return cpsw_xdp_prog_setup(priv, bpf);
+
+	case XDP_QUERY_PROG:
+		return xdp_attachment_query(&priv->xdpi, bpf);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
+			     struct xdp_frame **frames, u32 flags)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct xdp_frame *xdpf;
+	int i, drops = 0;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		xdpf = frames[i];
+		if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+			continue;
+		}
+
+		if (cpsw_xdp_tx_frame(priv, xdpf, NULL))
+			drops++;
+	}
+
+	return n - drops;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void cpsw_ndo_poll_controller(struct net_device *ndev)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	cpsw_intr_disable(cpsw);
+	cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw);
+	cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw);
+	cpsw_intr_enable(cpsw);
+}
+#endif
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -2547,27 +2424,10 @@
 	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
 	.ndo_setup_tc           = cpsw_ndo_setup_tc,
+	.ndo_bpf		= cpsw_ndo_bpf,
+	.ndo_xdp_xmit		= cpsw_ndo_xdp_xmit,
 };
 
-static int cpsw_get_regs_len(struct net_device *ndev)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
-}
-
-static void cpsw_get_regs(struct net_device *ndev,
-			  struct ethtool_regs *regs, void *p)
-{
-	u32 *reg = p;
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	/* update CPSW IP version */
-	regs->version = cpsw->version;
-
-	cpsw_ale_dump(cpsw->ale, reg);
-}
-
 static void cpsw_get_drvinfo(struct net_device *ndev,
 			     struct ethtool_drvinfo *info)
 {
@@ -2579,119 +2439,6 @@
 	strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info));
 }
 
-static u32 cpsw_get_msglevel(struct net_device *ndev)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	return priv->msg_enable;
-}
-
-static void cpsw_set_msglevel(struct net_device *ndev, u32 value)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	priv->msg_enable = value;
-}
-
-#if IS_ENABLED(CONFIG_TI_CPTS)
-static int cpsw_get_ts_info(struct net_device *ndev,
-			    struct ethtool_ts_info *info)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	info->so_timestamping =
-		SOF_TIMESTAMPING_TX_HARDWARE |
-		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE |
-		SOF_TIMESTAMPING_RAW_HARDWARE;
-	info->phc_index = cpsw->cpts->phc_index;
-	info->tx_types =
-		(1 << HWTSTAMP_TX_OFF) |
-		(1 << HWTSTAMP_TX_ON);
-	info->rx_filters =
-		(1 << HWTSTAMP_FILTER_NONE) |
-		(1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
-		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
-	return 0;
-}
-#else
-static int cpsw_get_ts_info(struct net_device *ndev,
-			    struct ethtool_ts_info *info)
-{
-	info->so_timestamping =
-		SOF_TIMESTAMPING_TX_SOFTWARE |
-		SOF_TIMESTAMPING_RX_SOFTWARE |
-		SOF_TIMESTAMPING_SOFTWARE;
-	info->phc_index = -1;
-	info->tx_types = 0;
-	info->rx_filters = 0;
-	return 0;
-}
-#endif
-
-static int cpsw_get_link_ksettings(struct net_device *ndev,
-				   struct ethtool_link_ksettings *ecmd)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (!cpsw->slaves[slave_no].phy)
-		return -EOPNOTSUPP;
-
-	phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy, ecmd);
-	return 0;
-}
-
-static int cpsw_set_link_ksettings(struct net_device *ndev,
-				   const struct ethtool_link_ksettings *ecmd)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy,
-						 ecmd);
-	else
-		return -EOPNOTSUPP;
-}
-
-static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	wol->supported = 0;
-	wol->wolopts = 0;
-
-	if (cpsw->slaves[slave_no].phy)
-		phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol);
-}
-
-static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol);
-	else
-		return -EOPNOTSUPP;
-}
-
-static void cpsw_get_pauseparam(struct net_device *ndev,
-				struct ethtool_pauseparam *pause)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-
-	pause->autoneg = AUTONEG_DISABLE;
-	pause->rx_pause = priv->rx_pause ? true : false;
-	pause->tx_pause = priv->tx_pause ? true : false;
-}
-
 static int cpsw_set_pauseparam(struct net_device *ndev,
 			       struct ethtool_pauseparam *pause)
 {
@@ -2705,316 +2452,10 @@
 	return 0;
 }
 
-static int cpsw_ethtool_op_begin(struct net_device *ndev)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int ret;
-
-	ret = pm_runtime_get_sync(cpsw->dev);
-	if (ret < 0) {
-		cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
-		pm_runtime_put_noidle(cpsw->dev);
-	}
-
-	return ret;
-}
-
-static void cpsw_ethtool_op_complete(struct net_device *ndev)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	int ret;
-
-	ret = pm_runtime_put(priv->cpsw->dev);
-	if (ret < 0)
-		cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
-}
-
-static void cpsw_get_channels(struct net_device *ndev,
-			      struct ethtool_channels *ch)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-
-	ch->max_rx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
-	ch->max_tx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
-	ch->max_combined = 0;
-	ch->max_other = 0;
-	ch->other_count = 0;
-	ch->rx_count = cpsw->rx_ch_num;
-	ch->tx_count = cpsw->tx_ch_num;
-	ch->combined_count = 0;
-}
-
-static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
-				  struct ethtool_channels *ch)
-{
-	if (cpsw->quirk_irq) {
-		dev_err(cpsw->dev, "Maximum one tx/rx queue is allowed");
-		return -EOPNOTSUPP;
-	}
-
-	if (ch->combined_count)
-		return -EINVAL;
-
-	/* verify we have at least one channel in each direction */
-	if (!ch->rx_count || !ch->tx_count)
-		return -EINVAL;
-
-	if (ch->rx_count > cpsw->data.channels ||
-	    ch->tx_count > cpsw->data.channels)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
-{
-	struct cpsw_common *cpsw = priv->cpsw;
-	void (*handler)(void *, int, int);
-	struct netdev_queue *queue;
-	struct cpsw_vector *vec;
-	int ret, *ch, vch;
-
-	if (rx) {
-		ch = &cpsw->rx_ch_num;
-		vec = cpsw->rxv;
-		handler = cpsw_rx_handler;
-	} else {
-		ch = &cpsw->tx_ch_num;
-		vec = cpsw->txv;
-		handler = cpsw_tx_handler;
-	}
-
-	while (*ch < ch_num) {
-		vch = rx ? *ch : 7 - *ch;
-		vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
-		queue = netdev_get_tx_queue(priv->ndev, *ch);
-		queue->tx_maxrate = 0;
-
-		if (IS_ERR(vec[*ch].ch))
-			return PTR_ERR(vec[*ch].ch);
-
-		if (!vec[*ch].ch)
-			return -EINVAL;
-
-		cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
-			  (rx ? "rx" : "tx"));
-		(*ch)++;
-	}
-
-	while (*ch > ch_num) {
-		(*ch)--;
-
-		ret = cpdma_chan_destroy(vec[*ch].ch);
-		if (ret)
-			return ret;
-
-		cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch,
-			  (rx ? "rx" : "tx"));
-	}
-
-	return 0;
-}
-
-static int cpsw_update_channels(struct cpsw_priv *priv,
-				struct ethtool_channels *ch)
-{
-	int ret;
-
-	ret = cpsw_update_channels_res(priv, ch->rx_count, 1);
-	if (ret)
-		return ret;
-
-	ret = cpsw_update_channels_res(priv, ch->tx_count, 0);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static void cpsw_suspend_data_pass(struct net_device *ndev)
-{
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	struct cpsw_slave *slave;
-	int i;
-
-	/* Disable NAPI scheduling */
-	cpsw_intr_disable(cpsw);
-
-	/* Stop all transmit queues for every network device.
-	 * Disable re-using rx descriptors with dormant_on.
-	 */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
-		if (!(slave->ndev && netif_running(slave->ndev)))
-			continue;
-
-		netif_tx_stop_all_queues(slave->ndev);
-		netif_dormant_on(slave->ndev);
-	}
-
-	/* Handle rest of tx packets and stop cpdma channels */
-	cpdma_ctlr_stop(cpsw->dma);
-}
-
-static int cpsw_resume_data_pass(struct net_device *ndev)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	struct cpsw_slave *slave;
-	int i, ret;
-
-	/* Allow rx packets handling */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++)
-		if (slave->ndev && netif_running(slave->ndev))
-			netif_dormant_off(slave->ndev);
-
-	/* After this receive is started */
-	if (cpsw->usage_count) {
-		ret = cpsw_fill_rx_channels(priv);
-		if (ret)
-			return ret;
-
-		cpdma_ctlr_start(cpsw->dma);
-		cpsw_intr_enable(cpsw);
-	}
-
-	/* Resume transmit for every affected interface */
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++)
-		if (slave->ndev && netif_running(slave->ndev))
-			netif_tx_start_all_queues(slave->ndev);
-
-	return 0;
-}
-
 static int cpsw_set_channels(struct net_device *ndev,
 			     struct ethtool_channels *chs)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	struct cpsw_slave *slave;
-	int i, ret;
-
-	ret = cpsw_check_ch_settings(cpsw, chs);
-	if (ret < 0)
-		return ret;
-
-	cpsw_suspend_data_pass(ndev);
-	ret = cpsw_update_channels(priv, chs);
-	if (ret)
-		goto err;
-
-	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
-		if (!(slave->ndev && netif_running(slave->ndev)))
-			continue;
-
-		/* Inform stack about new count of queues */
-		ret = netif_set_real_num_tx_queues(slave->ndev,
-						   cpsw->tx_ch_num);
-		if (ret) {
-			dev_err(priv->dev, "cannot set real number of tx queues\n");
-			goto err;
-		}
-
-		ret = netif_set_real_num_rx_queues(slave->ndev,
-						   cpsw->rx_ch_num);
-		if (ret) {
-			dev_err(priv->dev, "cannot set real number of rx queues\n");
-			goto err;
-		}
-	}
-
-	if (cpsw->usage_count)
-		cpsw_split_res(ndev);
-
-	ret = cpsw_resume_data_pass(ndev);
-	if (!ret)
-		return 0;
-err:
-	dev_err(priv->dev, "cannot update channels number, closing device\n");
-	dev_close(ndev);
-	return ret;
-}
-
-static int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_get_eee(cpsw->slaves[slave_no].phy, edata);
-	else
-		return -EOPNOTSUPP;
-}
-
-static int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata);
-	else
-		return -EOPNOTSUPP;
-}
-
-static int cpsw_nway_reset(struct net_device *ndev)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int slave_no = cpsw_slave_index(cpsw, priv);
-
-	if (cpsw->slaves[slave_no].phy)
-		return genphy_restart_aneg(cpsw->slaves[slave_no].phy);
-	else
-		return -EOPNOTSUPP;
-}
-
-static void cpsw_get_ringparam(struct net_device *ndev,
-			       struct ethtool_ringparam *ering)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-
-	/* not supported */
-	ering->tx_max_pending = 0;
-	ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma);
-	ering->rx_max_pending = descs_pool_size - CPSW_MAX_QUEUES;
-	ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma);
-}
-
-static int cpsw_set_ringparam(struct net_device *ndev,
-			      struct ethtool_ringparam *ering)
-{
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_common *cpsw = priv->cpsw;
-	int ret;
-
-	/* ignore ering->tx_pending - only rx_pending adjustment is supported */
-
-	if (ering->rx_mini_pending || ering->rx_jumbo_pending ||
-	    ering->rx_pending < CPSW_MAX_QUEUES ||
-	    ering->rx_pending > (descs_pool_size - CPSW_MAX_QUEUES))
-		return -EINVAL;
-
-	if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
-		return 0;
-
-	cpsw_suspend_data_pass(ndev);
-
-	cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
-
-	if (cpsw->usage_count)
-		cpdma_chan_split_pool(cpsw->dma);
-
-	ret = cpsw_resume_data_pass(ndev);
-	if (!ret)
-		return 0;
-
-	dev_err(&ndev->dev, "cannot set ring params, closing device\n");
-	dev_close(ndev);
-	return ret;
+	return cpsw_set_channels_common(ndev, chs, cpsw_rx_handler);
 }
 
 static const struct ethtool_ops cpsw_ethtool_ops = {
@@ -3047,19 +2488,6 @@
 	.set_ringparam = cpsw_set_ringparam,
 };
 
-static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
-			    u32 slave_reg_ofs, u32 sliver_reg_ofs)
-{
-	void __iomem		*regs = cpsw->regs;
-	int			slave_num = slave->slave_num;
-	struct cpsw_slave_data	*data = cpsw->data.slave_data + slave_num;
-
-	slave->data	= data;
-	slave->regs	= regs + slave_reg_ofs;
-	slave->sliver	= regs + sliver_reg_ofs;
-	slave->port_vlan = data->dual_emac_res_vlan;
-}
-
 static int cpsw_probe_dt(struct cpsw_platform_data *data,
 			 struct platform_device *pdev)
 {
@@ -3132,9 +2560,20 @@
 		const __be32 *parp;
 
 		/* This is no slave child node, continue */
-		if (strcmp(slave_node->name, "slave"))
+		if (!of_node_name_eq(slave_node, "slave"))
 			continue;
 
+		slave_data->ifphy = devm_of_phy_get(&pdev->dev, slave_node,
+						    NULL);
+		if (!IS_ENABLED(CONFIG_TI_CPSW_PHY_SEL) &&
+		    IS_ERR(slave_data->ifphy)) {
+			ret = PTR_ERR(slave_data->ifphy);
+			dev_err(&pdev->dev,
+				"%d: Error retrieving port phy: %d\n", i, ret);
+			goto err_node_put;
+		}
+
+		slave_data->slave_node = slave_node;
 		slave_data->phy_node = of_parse_phandle(slave_node,
 							"phy-handle", 0);
 		parp = of_get_property(slave_node, "phy_id", &lenp);
@@ -3150,7 +2589,7 @@
 			if (ret) {
 				if (ret != -EPROBE_DEFER)
 					dev_err(&pdev->dev, "failed to register fixed-link phy: %d\n", ret);
-				return ret;
+				goto err_node_put;
 			}
 			slave_data->phy_node = of_node_get(slave_node);
 		} else if (parp) {
@@ -3168,7 +2607,8 @@
 			of_node_put(mdio_node);
 			if (!mdio) {
 				dev_err(&pdev->dev, "Missing mdio platform device\n");
-				return -EINVAL;
+				ret = -EINVAL;
+				goto err_node_put;
 			}
 			snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
 				 PHY_ID_FMT, mdio->name, phyid);
@@ -3183,18 +2623,19 @@
 		if (slave_data->phy_if < 0) {
 			dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
 				i);
-			return slave_data->phy_if;
+			ret = slave_data->phy_if;
+			goto err_node_put;
 		}
 
 no_phy_slave:
 		mac_addr = of_get_mac_address(slave_node);
-		if (mac_addr) {
-			memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
+		if (!IS_ERR(mac_addr)) {
+			ether_addr_copy(slave_data->mac_addr, mac_addr);
 		} else {
 			ret = ti_cm_get_macid(&pdev->dev, i,
 					      slave_data->mac_addr);
 			if (ret)
-				return ret;
+				goto err_node_put;
 		}
 		if (data->dual_emac) {
 			if (of_property_read_u32(slave_node, "dual_emac_res_vlan",
@@ -3209,17 +2650,22 @@
 		}
 
 		i++;
-		if (i == data->slaves)
-			break;
+		if (i == data->slaves) {
+			ret = 0;
+			goto err_node_put;
+		}
 	}
 
 	return 0;
+
+err_node_put:
+	of_node_put(slave_node);
+	return ret;
 }
 
 static void cpsw_remove_dt(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = platform_get_drvdata(pdev);
 	struct cpsw_platform_data *data = &cpsw->data;
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *slave_node;
@@ -3228,7 +2674,7 @@
 	for_each_available_child_of_node(node, slave_node) {
 		struct cpsw_slave_data *slave_data = &data->slave_data[i];
 
-		if (strcmp(slave_node->name, "slave"))
+		if (!of_node_name_eq(slave_node, "slave"))
 			continue;
 
 		if (of_phy_is_fixed_link(slave_node))
@@ -3237,8 +2683,10 @@
 		of_node_put(slave_data->phy_node);
 
 		i++;
-		if (i == data->slaves)
+		if (i == data->slaves) {
+			of_node_put(slave_node);
 			break;
+		}
 	}
 
 	of_platform_depopulate(&pdev->dev);
@@ -3252,7 +2700,8 @@
 	struct cpsw_priv		*priv_sl2;
 	int ret = 0;
 
-	ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
+	ndev = devm_alloc_etherdev_mqs(cpsw->dev, sizeof(struct cpsw_priv),
+				       CPSW_MAX_QUEUES, CPSW_MAX_QUEUES);
 	if (!ndev) {
 		dev_err(cpsw->dev, "cpsw: error allocating net_device\n");
 		return -ENOMEM;
@@ -3285,12 +2734,10 @@
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, cpsw->dev);
+	ndev->dev.of_node = cpsw->slaves[1].data->slave_node;
 	ret = register_netdev(ndev);
-	if (ret) {
+	if (ret)
 		dev_err(cpsw->dev, "cpsw: error registering net device\n");
-		free_netdev(ndev);
-		ret = -ENODEV;
-	}
 
 	return ret;
 }
@@ -3311,63 +2758,74 @@
 
 static int cpsw_probe(struct platform_device *pdev)
 {
+	struct device			*dev = &pdev->dev;
 	struct clk			*clk;
 	struct cpsw_platform_data	*data;
 	struct net_device		*ndev;
 	struct cpsw_priv		*priv;
-	struct cpdma_params		dma_params;
-	struct cpsw_ale_params		ale_params;
 	void __iomem			*ss_regs;
-	void __iomem			*cpts_regs;
-	struct resource			*res, *ss_res;
+	struct resource			*ss_res;
 	struct gpio_descs		*mode;
-	u32 slave_offset, sliver_offset, slave_size;
 	const struct soc_device_attribute *soc;
 	struct cpsw_common		*cpsw;
-	int ret = 0, i, ch;
+	int ret = 0, ch;
 	int irq;
 
-	cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
+	cpsw = devm_kzalloc(dev, sizeof(struct cpsw_common), GFP_KERNEL);
 	if (!cpsw)
 		return -ENOMEM;
 
-	cpsw->dev = &pdev->dev;
+	platform_set_drvdata(pdev, cpsw);
+	cpsw->dev = dev;
 
-	ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
-	if (!ndev) {
-		dev_err(&pdev->dev, "error allocating net_device\n");
-		return -ENOMEM;
-	}
-
-	platform_set_drvdata(pdev, ndev);
-	priv = netdev_priv(ndev);
-	priv->cpsw = cpsw;
-	priv->ndev = ndev;
-	priv->dev  = &ndev->dev;
-	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-	cpsw->rx_packet_max = max(rx_packet_max, 128);
-
-	mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
+	mode = devm_gpiod_get_array_optional(dev, "mode", GPIOD_OUT_LOW);
 	if (IS_ERR(mode)) {
 		ret = PTR_ERR(mode);
-		dev_err(&pdev->dev, "gpio request failed, ret %d\n", ret);
-		goto clean_ndev_ret;
+		dev_err(dev, "gpio request failed, ret %d\n", ret);
+		return ret;
 	}
 
+	clk = devm_clk_get(dev, "fck");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		dev_err(dev, "fck is not found %d\n", ret);
+		return ret;
+	}
+	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
+
+	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ss_regs = devm_ioremap_resource(dev, ss_res);
+	if (IS_ERR(ss_regs))
+		return PTR_ERR(ss_regs);
+	cpsw->regs = ss_regs;
+
+	cpsw->wr_regs = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(cpsw->wr_regs))
+		return PTR_ERR(cpsw->wr_regs);
+
+	/* RX IRQ */
+	irq = platform_get_irq(pdev, 1);
+	if (irq < 0)
+		return irq;
+	cpsw->irqs_table[0] = irq;
+
+	/* TX IRQ */
+	irq = platform_get_irq(pdev, 2);
+	if (irq < 0)
+		return irq;
+	cpsw->irqs_table[1] = irq;
+
 	/*
 	 * This may be required here for child devices.
 	 */
-	pm_runtime_enable(&pdev->dev);
-
-	/* Select default pin state */
-	pinctrl_pm_select_default_state(&pdev->dev);
+	pm_runtime_enable(dev);
 
 	/* Need to enable clocks with runtime PM api to access module
 	 * registers
 	 */
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&pdev->dev);
+		pm_runtime_put_noidle(dev);
 		goto clean_runtime_disable_ret;
 	}
 
@@ -3375,169 +2833,70 @@
 	if (ret)
 		goto clean_dt_ret;
 
+	soc = soc_device_match(cpsw_soc_devices);
+	if (soc)
+		cpsw->quirk_irq = 1;
+
 	data = &cpsw->data;
-	cpsw->rx_ch_num = 1;
-	cpsw->tx_ch_num = 1;
-
-	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
-		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
-		dev_info(&pdev->dev, "Detected MACID = %pM\n", priv->mac_addr);
-	} else {
-		eth_random_addr(priv->mac_addr);
-		dev_info(&pdev->dev, "Random MACID = %pM\n", priv->mac_addr);
-	}
-
-	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
-
-	cpsw->slaves = devm_kcalloc(&pdev->dev,
+	cpsw->slaves = devm_kcalloc(dev,
 				    data->slaves, sizeof(struct cpsw_slave),
 				    GFP_KERNEL);
 	if (!cpsw->slaves) {
 		ret = -ENOMEM;
 		goto clean_dt_ret;
 	}
-	for (i = 0; i < data->slaves; i++)
-		cpsw->slaves[i].slave_num = i;
 
-	cpsw->slaves[0].ndev = ndev;
-	priv->emac_port = 0;
+	cpsw->rx_packet_max = max(rx_packet_max, CPSW_MAX_PACKET_SIZE);
+	cpsw->descs_pool_size = descs_pool_size;
 
-	clk = devm_clk_get(&pdev->dev, "fck");
-	if (IS_ERR(clk)) {
-		dev_err(priv->dev, "fck is not found\n");
-		ret = -ENODEV;
+	ret = cpsw_init_common(cpsw, ss_regs, ale_ageout,
+			       ss_res->start + CPSW2_BD_OFFSET,
+			       descs_pool_size);
+	if (ret)
 		goto clean_dt_ret;
-	}
-	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
-
-	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ss_regs = devm_ioremap_resource(&pdev->dev, ss_res);
-	if (IS_ERR(ss_regs)) {
-		ret = PTR_ERR(ss_regs);
-		goto clean_dt_ret;
-	}
-	cpsw->regs = ss_regs;
-
-	cpsw->version = readl(&cpsw->regs->id_ver);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(cpsw->wr_regs)) {
-		ret = PTR_ERR(cpsw->wr_regs);
-		goto clean_dt_ret;
-	}
-
-	memset(&dma_params, 0, sizeof(dma_params));
-	memset(&ale_params, 0, sizeof(ale_params));
-
-	switch (cpsw->version) {
-	case CPSW_VERSION_1:
-		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-		cpts_regs		= ss_regs + CPSW1_CPTS_OFFSET;
-		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
-		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
-		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
-		ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
-		slave_offset         = CPSW1_SLAVE_OFFSET;
-		slave_size           = CPSW1_SLAVE_SIZE;
-		sliver_offset        = CPSW1_SLIVER_OFFSET;
-		dma_params.desc_mem_phys = 0;
-		break;
-	case CPSW_VERSION_2:
-	case CPSW_VERSION_3:
-	case CPSW_VERSION_4:
-		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-		cpts_regs		= ss_regs + CPSW2_CPTS_OFFSET;
-		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
-		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
-		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
-		ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
-		slave_offset         = CPSW2_SLAVE_OFFSET;
-		slave_size           = CPSW2_SLAVE_SIZE;
-		sliver_offset        = CPSW2_SLIVER_OFFSET;
-		dma_params.desc_mem_phys =
-			(u32 __force) ss_res->start + CPSW2_BD_OFFSET;
-		break;
-	default:
-		dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version);
-		ret = -ENODEV;
-		goto clean_dt_ret;
-	}
-	for (i = 0; i < cpsw->data.slaves; i++) {
-		struct cpsw_slave *slave = &cpsw->slaves[i];
-
-		cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset);
-		slave_offset  += slave_size;
-		sliver_offset += SLIVER_SIZE;
-	}
-
-	dma_params.dev		= &pdev->dev;
-	dma_params.rxthresh	= dma_params.dmaregs + CPDMA_RXTHRESH;
-	dma_params.rxfree	= dma_params.dmaregs + CPDMA_RXFREE;
-	dma_params.rxhdp	= dma_params.txhdp + CPDMA_RXHDP;
-	dma_params.txcp		= dma_params.txhdp + CPDMA_TXCP;
-	dma_params.rxcp		= dma_params.txhdp + CPDMA_RXCP;
-
-	dma_params.num_chan		= data->channels;
-	dma_params.has_soft_reset	= true;
-	dma_params.min_packet_size	= CPSW_MIN_PACKET_SIZE;
-	dma_params.desc_mem_size	= data->bd_ram_size;
-	dma_params.desc_align		= 16;
-	dma_params.has_ext_regs		= true;
-	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
-	dma_params.bus_freq_mhz		= cpsw->bus_freq_mhz;
-	dma_params.descs_pool_size	= descs_pool_size;
-
-	cpsw->dma = cpdma_ctlr_create(&dma_params);
-	if (!cpsw->dma) {
-		dev_err(priv->dev, "error initializing dma\n");
-		ret = -ENOMEM;
-		goto clean_dt_ret;
-	}
-
-	soc = soc_device_match(cpsw_soc_devices);
-	if (soc)
-		cpsw->quirk_irq = 1;
 
 	ch = cpsw->quirk_irq ? 0 : 7;
 	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
 	if (IS_ERR(cpsw->txv[0].ch)) {
-		dev_err(priv->dev, "error initializing tx dma channel\n");
+		dev_err(dev, "error initializing tx dma channel\n");
 		ret = PTR_ERR(cpsw->txv[0].ch);
-		goto clean_dma_ret;
+		goto clean_cpts;
 	}
 
 	cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
 	if (IS_ERR(cpsw->rxv[0].ch)) {
-		dev_err(priv->dev, "error initializing rx dma channel\n");
+		dev_err(dev, "error initializing rx dma channel\n");
 		ret = PTR_ERR(cpsw->rxv[0].ch);
-		goto clean_dma_ret;
+		goto clean_cpts;
+	}
+	cpsw_split_res(cpsw);
+
+	/* setup netdev */
+	ndev = devm_alloc_etherdev_mqs(dev, sizeof(struct cpsw_priv),
+				       CPSW_MAX_QUEUES, CPSW_MAX_QUEUES);
+	if (!ndev) {
+		dev_err(dev, "error allocating net_device\n");
+		goto clean_cpts;
 	}
 
-	ale_params.dev			= &pdev->dev;
-	ale_params.ale_ageout		= ale_ageout;
-	ale_params.ale_entries		= data->ale_entries;
-	ale_params.ale_ports		= CPSW_ALE_PORTS_NUM;
+	priv = netdev_priv(ndev);
+	priv->cpsw = cpsw;
+	priv->ndev = ndev;
+	priv->dev  = dev;
+	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
+	priv->emac_port = 0;
 
-	cpsw->ale = cpsw_ale_create(&ale_params);
-	if (!cpsw->ale) {
-		dev_err(priv->dev, "error initializing ale engine\n");
-		ret = -ENODEV;
-		goto clean_dma_ret;
+	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
+		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
+		dev_info(dev, "Detected MACID = %pM\n", priv->mac_addr);
+	} else {
+		eth_random_addr(priv->mac_addr);
+		dev_info(dev, "Random MACID = %pM\n", priv->mac_addr);
 	}
 
-	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
-	if (IS_ERR(cpsw->cpts)) {
-		ret = PTR_ERR(cpsw->cpts);
-		goto clean_dma_ret;
-	}
+	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 
-	ndev->irq = platform_get_irq(pdev, 1);
-	if (ndev->irq < 0) {
-		dev_err(priv->dev, "error getting irq resource\n");
-		ret = ndev->irq;
-		goto clean_dma_ret;
-	}
+	cpsw->slaves[0].ndev = ndev;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
@@ -3549,15 +2908,15 @@
 	netif_tx_napi_add(ndev, &cpsw->napi_tx,
 			  cpsw->quirk_irq ? cpsw_tx_poll : cpsw_tx_mq_poll,
 			  CPSW_POLL_WEIGHT);
-	cpsw_split_res(ndev);
 
 	/* register the network device */
-	SET_NETDEV_DEV(ndev, &pdev->dev);
+	SET_NETDEV_DEV(ndev, dev);
+	ndev->dev.of_node = cpsw->slaves[0].data->slave_node;
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(priv->dev, "error registering net device\n");
+		dev_err(dev, "error registering net device\n");
 		ret = -ENODEV;
-		goto clean_dma_ret;
+		goto clean_cpts;
 	}
 
 	if (cpsw->data.dual_emac) {
@@ -3575,40 +2934,24 @@
 	 * If anyone wants to implement support for those, make sure to
 	 * first request and append them to irqs_table array.
 	 */
-
-	/* RX IRQ */
-	irq = platform_get_irq(pdev, 1);
-	if (irq < 0) {
-		ret = irq;
-		goto clean_dma_ret;
+	ret = devm_request_irq(dev, cpsw->irqs_table[0], cpsw_rx_interrupt,
+			       0, dev_name(dev), cpsw);
+	if (ret < 0) {
+		dev_err(dev, "error attaching irq (%d)\n", ret);
+		goto clean_unregister_netdev_ret;
 	}
 
-	cpsw->irqs_table[0] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
+
+	ret = devm_request_irq(dev, cpsw->irqs_table[1], cpsw_tx_interrupt,
 			       0, dev_name(&pdev->dev), cpsw);
 	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_dma_ret;
-	}
-
-	/* TX IRQ */
-	irq = platform_get_irq(pdev, 2);
-	if (irq < 0) {
-		ret = irq;
-		goto clean_dma_ret;
-	}
-
-	cpsw->irqs_table[1] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
-			       0, dev_name(&pdev->dev), cpsw);
-	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_dma_ret;
+		dev_err(dev, "error attaching irq (%d)\n", ret);
+		goto clean_unregister_netdev_ret;
 	}
 
 	cpsw_notice(priv, probe,
 		    "initialized device (regs %pa, irq %d, pool size %d)\n",
-		    &ss_res->start, ndev->irq, dma_params.descs_pool_size);
+		    &ss_res->start, cpsw->irqs_table[0], descs_pool_size);
 
 	pm_runtime_put(&pdev->dev);
 
@@ -3616,23 +2959,21 @@
 
 clean_unregister_netdev_ret:
 	unregister_netdev(ndev);
-clean_dma_ret:
+clean_cpts:
+	cpts_release(cpsw->cpts);
 	cpdma_ctlr_destroy(cpsw->dma);
 clean_dt_ret:
 	cpsw_remove_dt(pdev);
 	pm_runtime_put_sync(&pdev->dev);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
-clean_ndev_ret:
-	free_netdev(priv->ndev);
 	return ret;
 }
 
 static int cpsw_remove(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
-	int ret;
+	struct cpsw_common *cpsw = platform_get_drvdata(pdev);
+	int i, ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
@@ -3640,39 +2981,28 @@
 		return ret;
 	}
 
-	if (cpsw->data.dual_emac)
-		unregister_netdev(cpsw->slaves[1].ndev);
-	unregister_netdev(ndev);
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
+			unregister_netdev(cpsw->slaves[i].ndev);
 
 	cpts_release(cpsw->cpts);
 	cpdma_ctlr_destroy(cpsw->dma);
 	cpsw_remove_dt(pdev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	if (cpsw->data.dual_emac)
-		free_netdev(cpsw->slaves[1].ndev);
-	free_netdev(ndev);
 	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
 static int cpsw_suspend(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = dev_get_drvdata(dev);
+	int i;
 
-	if (cpsw->data.dual_emac) {
-		int i;
-
-		for (i = 0; i < cpsw->data.slaves; i++) {
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
 			if (netif_running(cpsw->slaves[i].ndev))
 				cpsw_ndo_stop(cpsw->slaves[i].ndev);
-		}
-	} else {
-		if (netif_running(ndev))
-			cpsw_ndo_stop(ndev);
-	}
 
 	/* Select sleep pin state */
 	pinctrl_pm_select_sleep_state(dev);
@@ -3682,26 +3012,20 @@
 
 static int cpsw_resume(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_common *cpsw = dev_get_drvdata(dev);
+	int i;
 
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(dev);
 
 	/* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */
 	rtnl_lock();
-	if (cpsw->data.dual_emac) {
-		int i;
 
-		for (i = 0; i < cpsw->data.slaves; i++) {
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].ndev)
 			if (netif_running(cpsw->slaves[i].ndev))
 				cpsw_ndo_open(cpsw->slaves[i].ndev);
-		}
-	} else {
-		if (netif_running(ndev))
-			cpsw_ndo_open(ndev);
-	}
+
 	rtnl_unlock();
 
 	return 0;

diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index cf111db..35d602f 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h

@@ -1,15 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Texas Instruments Ethernet Switch Driver
  *
  * Copyright (C) 2013 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #ifndef __CPSW_H__
 #define __CPSW_H__
@@ -21,7 +14,13 @@
 			 ((mac)[2] << 16) | ((mac)[3] << 24))
 #define mac_lo(mac)	(((mac)[4] << 0) | ((mac)[5] << 8))
 
+#if IS_ENABLED(CONFIG_TI_CPSW_PHY_SEL)
 void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave);
+#else
+static inline
+void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
+{}
+#endif
 int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr);
 
 #endif /* __CPSW_H__ */

diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 5766225..84025dc 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c

@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Texas Instruments N-Port Ethernet Switch Address Lookup Engine
  *
  * Copyright (C) 2012 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -136,7 +129,7 @@
 		addr[i] = cpsw_ale_get_field(ale_entry, 40 - 8*i, 8);
 }
 
-static inline void cpsw_ale_set_addr(u32 *ale_entry, u8 *addr)
+static inline void cpsw_ale_set_addr(u32 *ale_entry, const u8 *addr)
 {
 	int i;
 
@@ -175,7 +168,7 @@
 	return idx;
 }
 
-static int cpsw_ale_match_addr(struct cpsw_ale *ale, u8 *addr, u16 vid)
+static int cpsw_ale_match_addr(struct cpsw_ale *ale, const u8 *addr, u16 vid)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS];
 	int type, idx;
@@ -287,6 +280,9 @@
 		if (cpsw_ale_get_mcast(ale_entry)) {
 			u8 addr[6];
 
+			if (cpsw_ale_get_super(ale_entry))
+				continue;
+
 			cpsw_ale_get_addr(ale_entry, addr);
 			if (!is_broadcast_ether_addr(addr))
 				cpsw_ale_flush_mcast(ale, ale_entry, port_mask);
@@ -296,7 +292,6 @@
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_flush_multicast);
 
 static inline void cpsw_ale_set_vlan_entry_type(u32 *ale_entry,
 						int flags, u16 vid)
@@ -309,7 +304,7 @@
 	}
 }
 
-int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port,
+int cpsw_ale_add_ucast(struct cpsw_ale *ale, const u8 *addr, int port,
 		       int flags, u16 vid)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
@@ -334,9 +329,8 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_add_ucast);
 
-int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port,
+int cpsw_ale_del_ucast(struct cpsw_ale *ale, const u8 *addr, int port,
 		       int flags, u16 vid)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
@@ -350,9 +344,8 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_del_ucast);
 
-int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
+int cpsw_ale_add_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
 		       int flags, u16 vid, int mcast_state)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
@@ -365,7 +358,7 @@
 	cpsw_ale_set_vlan_entry_type(ale_entry, flags, vid);
 
 	cpsw_ale_set_addr(ale_entry, addr);
-	cpsw_ale_set_super(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);
+	cpsw_ale_set_super(ale_entry, (flags & ALE_SUPER) ? 1 : 0);
 	cpsw_ale_set_mcast_state(ale_entry, mcast_state);
 
 	mask = cpsw_ale_get_port_mask(ale_entry,
@@ -384,9 +377,8 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_add_mcast);
 
-int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
+int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
 		       int flags, u16 vid)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
@@ -407,7 +399,6 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_del_mcast);
 
 /* ALE NetCP NU switch specific vlan functions */
 static void cpsw_ale_set_vlan_mcast(struct cpsw_ale *ale, u32 *ale_entry,
@@ -458,7 +449,6 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_add_vlan);
 
 int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask)
 {
@@ -480,40 +470,39 @@
 	cpsw_ale_write(ale, idx, ale_entry);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_del_vlan);
 
-void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti)
+void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port)
 {
 	u32 ale_entry[ALE_ENTRY_WORDS];
-	int type, idx;
 	int unreg_mcast = 0;
-
-	/* Only bother doing the work if the setting is actually changing */
-	if (ale->allmulti == allmulti)
-		return;
-
-	/* Remember the new setting to check against next time */
-	ale->allmulti = allmulti;
+	int type, idx;
 
 	for (idx = 0; idx < ale->params.ale_entries; idx++) {
+		int vlan_members;
+
 		cpsw_ale_read(ale, idx, ale_entry);
 		type = cpsw_ale_get_entry_type(ale_entry);
 		if (type != ALE_TYPE_VLAN)
 			continue;
+		vlan_members =
+			cpsw_ale_get_vlan_member_list(ale_entry,
+						      ale->vlan_field_bits);
+
+		if (port != -1 && !(vlan_members & BIT(port)))
+			continue;
 
 		unreg_mcast =
 			cpsw_ale_get_vlan_unreg_mcast(ale_entry,
 						      ale->vlan_field_bits);
 		if (allmulti)
-			unreg_mcast |= 1;
+			unreg_mcast |= ALE_PORT_HOST;
 		else
-			unreg_mcast &= ~1;
+			unreg_mcast &= ~ALE_PORT_HOST;
 		cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
 					      ale->vlan_field_bits);
 		cpsw_ale_write(ale, idx, ale_entry);
 	}
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_set_allmulti);
 
 struct ale_control_info {
 	const char	*name;
@@ -739,7 +728,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_control_set);
 
 int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control)
 {
@@ -763,7 +751,6 @@
 	tmp = readl_relaxed(ale->params.ale_regs + offset) >> shift;
 	return tmp & BITMASK(info->bits);
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_control_get);
 
 static void cpsw_ale_timer(struct timer_list *t)
 {
@@ -788,14 +775,12 @@
 		add_timer(&ale->timer);
 	}
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_start);
 
 void cpsw_ale_stop(struct cpsw_ale *ale)
 {
 	del_timer_sync(&ale->timer);
 	cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0);
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_stop);
 
 struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
 {
@@ -879,7 +864,6 @@
 
 	return ale;
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_create);
 
 void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data)
 {
@@ -890,8 +874,3 @@
 		data += ALE_ENTRY_WORDS;
 	}
 }
-EXPORT_SYMBOL_GPL(cpsw_ale_dump);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("TI CPSW ALE driver");
-MODULE_AUTHOR("Texas Instruments");

diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index d4fe901..370df25 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h

@@ -1,16 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Texas Instruments N-Port Ethernet Switch Address Lookup Engine APIs
  *
  * Copyright (C) 2012 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #ifndef __TI_CPSW_ALE_H__
 #define __TI_CPSW_ALE_H__
@@ -37,7 +30,6 @@
 	struct cpsw_ale_params	params;
 	struct timer_list	timer;
 	unsigned long		ageout;
-	int			allmulti;
 	u32			version;
 	/* These bits are different on NetCP NU Switch ALE */
 	u32			port_mask_bits;
@@ -105,18 +97,18 @@
 void cpsw_ale_stop(struct cpsw_ale *ale);
 
 int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid);
-int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port,
+int cpsw_ale_add_ucast(struct cpsw_ale *ale, const u8 *addr, int port,
 		       int flags, u16 vid);
-int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port,
+int cpsw_ale_del_ucast(struct cpsw_ale *ale, const u8 *addr, int port,
 		       int flags, u16 vid);
-int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
+int cpsw_ale_add_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
 		       int flags, u16 vid, int mcast_state);
-int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
+int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
 		       int flags, u16 vid);
 int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
 			int reg_mcast, int unreg_mcast);
 int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port);
-void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti);
+void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port);
 
 int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control);
 int cpsw_ale_control_set(struct cpsw_ale *ale, int port,

diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
new file mode 100644
index 0000000..31248a6
--- /dev/null
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c

@@ -0,0 +1,746 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Texas Instruments Ethernet Switch Driver ethtool intf
+ *
+ * Copyright (C) 2019 Texas Instruments
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/kmemleak.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/pm_runtime.h>
+#include <linux/skbuff.h>
+
+#include "cpsw.h"
+#include "cpts.h"
+#include "cpsw_ale.h"
+#include "cpsw_priv.h"
+#include "davinci_cpdma.h"
+
+struct cpsw_hw_stats {
+	u32	rxgoodframes;
+	u32	rxbroadcastframes;
+	u32	rxmulticastframes;
+	u32	rxpauseframes;
+	u32	rxcrcerrors;
+	u32	rxaligncodeerrors;
+	u32	rxoversizedframes;
+	u32	rxjabberframes;
+	u32	rxundersizedframes;
+	u32	rxfragments;
+	u32	__pad_0[2];
+	u32	rxoctets;
+	u32	txgoodframes;
+	u32	txbroadcastframes;
+	u32	txmulticastframes;
+	u32	txpauseframes;
+	u32	txdeferredframes;
+	u32	txcollisionframes;
+	u32	txsinglecollframes;
+	u32	txmultcollframes;
+	u32	txexcessivecollisions;
+	u32	txlatecollisions;
+	u32	txunderrun;
+	u32	txcarriersenseerrors;
+	u32	txoctets;
+	u32	octetframes64;
+	u32	octetframes65t127;
+	u32	octetframes128t255;
+	u32	octetframes256t511;
+	u32	octetframes512t1023;
+	u32	octetframes1024tup;
+	u32	netoctets;
+	u32	rxsofoverruns;
+	u32	rxmofoverruns;
+	u32	rxdmaoverruns;
+};
+
+struct cpsw_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int type;
+	int sizeof_stat;
+	int stat_offset;
+};
+
+enum {
+	CPSW_STATS,
+	CPDMA_RX_STATS,
+	CPDMA_TX_STATS,
+};
+
+#define CPSW_STAT(m)		CPSW_STATS,				\
+				FIELD_SIZEOF(struct cpsw_hw_stats, m), \
+				offsetof(struct cpsw_hw_stats, m)
+#define CPDMA_RX_STAT(m)	CPDMA_RX_STATS,				   \
+				FIELD_SIZEOF(struct cpdma_chan_stats, m), \
+				offsetof(struct cpdma_chan_stats, m)
+#define CPDMA_TX_STAT(m)	CPDMA_TX_STATS,				   \
+				FIELD_SIZEOF(struct cpdma_chan_stats, m), \
+				offsetof(struct cpdma_chan_stats, m)
+
+static const struct cpsw_stats cpsw_gstrings_stats[] = {
+	{ "Good Rx Frames", CPSW_STAT(rxgoodframes) },
+	{ "Broadcast Rx Frames", CPSW_STAT(rxbroadcastframes) },
+	{ "Multicast Rx Frames", CPSW_STAT(rxmulticastframes) },
+	{ "Pause Rx Frames", CPSW_STAT(rxpauseframes) },
+	{ "Rx CRC Errors", CPSW_STAT(rxcrcerrors) },
+	{ "Rx Align/Code Errors", CPSW_STAT(rxaligncodeerrors) },
+	{ "Oversize Rx Frames", CPSW_STAT(rxoversizedframes) },
+	{ "Rx Jabbers", CPSW_STAT(rxjabberframes) },
+	{ "Undersize (Short) Rx Frames", CPSW_STAT(rxundersizedframes) },
+	{ "Rx Fragments", CPSW_STAT(rxfragments) },
+	{ "Rx Octets", CPSW_STAT(rxoctets) },
+	{ "Good Tx Frames", CPSW_STAT(txgoodframes) },
+	{ "Broadcast Tx Frames", CPSW_STAT(txbroadcastframes) },
+	{ "Multicast Tx Frames", CPSW_STAT(txmulticastframes) },
+	{ "Pause Tx Frames", CPSW_STAT(txpauseframes) },
+	{ "Deferred Tx Frames", CPSW_STAT(txdeferredframes) },
+	{ "Collisions", CPSW_STAT(txcollisionframes) },
+	{ "Single Collision Tx Frames", CPSW_STAT(txsinglecollframes) },
+	{ "Multiple Collision Tx Frames", CPSW_STAT(txmultcollframes) },
+	{ "Excessive Collisions", CPSW_STAT(txexcessivecollisions) },
+	{ "Late Collisions", CPSW_STAT(txlatecollisions) },
+	{ "Tx Underrun", CPSW_STAT(txunderrun) },
+	{ "Carrier Sense Errors", CPSW_STAT(txcarriersenseerrors) },
+	{ "Tx Octets", CPSW_STAT(txoctets) },
+	{ "Rx + Tx 64 Octet Frames", CPSW_STAT(octetframes64) },
+	{ "Rx + Tx 65-127 Octet Frames", CPSW_STAT(octetframes65t127) },
+	{ "Rx + Tx 128-255 Octet Frames", CPSW_STAT(octetframes128t255) },
+	{ "Rx + Tx 256-511 Octet Frames", CPSW_STAT(octetframes256t511) },
+	{ "Rx + Tx 512-1023 Octet Frames", CPSW_STAT(octetframes512t1023) },
+	{ "Rx + Tx 1024-Up Octet Frames", CPSW_STAT(octetframes1024tup) },
+	{ "Net Octets", CPSW_STAT(netoctets) },
+	{ "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) },
+	{ "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) },
+	{ "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) },
+};
+
+static const struct cpsw_stats cpsw_gstrings_ch_stats[] = {
+	{ "head_enqueue", CPDMA_RX_STAT(head_enqueue) },
+	{ "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
+	{ "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
+	{ "misqueued", CPDMA_RX_STAT(misqueued) },
+	{ "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
+	{ "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
+	{ "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
+	{ "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
+	{ "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
+	{ "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
+	{ "good_dequeue", CPDMA_RX_STAT(good_dequeue) },
+	{ "requeue", CPDMA_RX_STAT(requeue) },
+	{ "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
+};
+
+#define CPSW_STATS_COMMON_LEN	ARRAY_SIZE(cpsw_gstrings_stats)
+#define CPSW_STATS_CH_LEN	ARRAY_SIZE(cpsw_gstrings_ch_stats)
+
+u32 cpsw_get_msglevel(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	return priv->msg_enable;
+}
+
+void cpsw_set_msglevel(struct net_device *ndev, u32 value)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	priv->msg_enable = value;
+}
+
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	coal->rx_coalesce_usecs = cpsw->coal_intvl;
+	return 0;
+}
+
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	u32 int_ctrl;
+	u32 num_interrupts = 0;
+	u32 prescale = 0;
+	u32 addnl_dvdr = 1;
+	u32 coal_intvl = 0;
+	struct cpsw_common *cpsw = priv->cpsw;
+
+	coal_intvl = coal->rx_coalesce_usecs;
+
+	int_ctrl =  readl(&cpsw->wr_regs->int_control);
+	prescale = cpsw->bus_freq_mhz * 4;
+
+	if (!coal->rx_coalesce_usecs) {
+		int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN);
+		goto update_return;
+	}
+
+	if (coal_intvl < CPSW_CMINTMIN_INTVL)
+		coal_intvl = CPSW_CMINTMIN_INTVL;
+
+	if (coal_intvl > CPSW_CMINTMAX_INTVL) {
+		/* Interrupt pacer works with 4us Pulse, we can
+		 * throttle further by dilating the 4us pulse.
+		 */
+		addnl_dvdr = CPSW_INTPRESCALE_MASK / prescale;
+
+		if (addnl_dvdr > 1) {
+			prescale *= addnl_dvdr;
+			if (coal_intvl > (CPSW_CMINTMAX_INTVL * addnl_dvdr))
+				coal_intvl = (CPSW_CMINTMAX_INTVL
+						* addnl_dvdr);
+		} else {
+			addnl_dvdr = 1;
+			coal_intvl = CPSW_CMINTMAX_INTVL;
+		}
+	}
+
+	num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
+	writel(num_interrupts, &cpsw->wr_regs->rx_imax);
+	writel(num_interrupts, &cpsw->wr_regs->tx_imax);
+
+	int_ctrl |= CPSW_INTPACEEN;
+	int_ctrl &= (~CPSW_INTPRESCALE_MASK);
+	int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
+
+update_return:
+	writel(int_ctrl, &cpsw->wr_regs->int_control);
+
+	cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
+	cpsw->coal_intvl = coal_intvl;
+
+	return 0;
+}
+
+int cpsw_get_sset_count(struct net_device *ndev, int sset)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		return (CPSW_STATS_COMMON_LEN +
+		       (cpsw->rx_ch_num + cpsw->tx_ch_num) *
+		       CPSW_STATS_CH_LEN);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
+{
+	int ch_stats_len;
+	int line;
+	int i;
+
+	ch_stats_len = CPSW_STATS_CH_LEN * ch_num;
+	for (i = 0; i < ch_stats_len; i++) {
+		line = i % CPSW_STATS_CH_LEN;
+		snprintf(*p, ETH_GSTRING_LEN,
+			 "%s DMA chan %ld: %s", rx_dir ? "Rx" : "Tx",
+			 (long)(i / CPSW_STATS_CH_LEN),
+			 cpsw_gstrings_ch_stats[line].stat_string);
+		*p += ETH_GSTRING_LEN;
+	}
+}
+
+void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) {
+			memcpy(p, cpsw_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1);
+		cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0);
+		break;
+	}
+}
+
+void cpsw_get_ethtool_stats(struct net_device *ndev,
+			    struct ethtool_stats *stats, u64 *data)
+{
+	u8 *p;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpdma_chan_stats ch_stats;
+	int i, l, ch;
+
+	/* Collect Davinci CPDMA stats for Rx and Tx Channel */
+	for (l = 0; l < CPSW_STATS_COMMON_LEN; l++)
+		data[l] = readl(cpsw->hw_stats +
+				cpsw_gstrings_stats[l].stat_offset);
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		cpdma_chan_get_stats(cpsw->rxv[ch].ch, &ch_stats);
+		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+			p = (u8 *)&ch_stats +
+				cpsw_gstrings_ch_stats[i].stat_offset;
+			data[l] = *(u32 *)p;
+		}
+	}
+
+	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
+		cpdma_chan_get_stats(cpsw->txv[ch].ch, &ch_stats);
+		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+			p = (u8 *)&ch_stats +
+				cpsw_gstrings_ch_stats[i].stat_offset;
+			data[l] = *(u32 *)p;
+		}
+	}
+}
+
+void cpsw_get_pauseparam(struct net_device *ndev,
+			 struct ethtool_pauseparam *pause)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	pause->autoneg = AUTONEG_DISABLE;
+	pause->rx_pause = priv->rx_pause ? true : false;
+	pause->tx_pause = priv->tx_pause ? true : false;
+}
+
+void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (cpsw->slaves[slave_no].phy)
+		phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol);
+}
+
+int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol);
+	else
+		return -EOPNOTSUPP;
+}
+
+int cpsw_get_regs_len(struct net_device *ndev)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
+}
+
+void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p)
+{
+	u32 *reg = p;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	/* update CPSW IP version */
+	regs->version = cpsw->version;
+
+	cpsw_ale_dump(cpsw->ale, reg);
+}
+
+int cpsw_ethtool_op_begin(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int ret;
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
+		pm_runtime_put_noidle(cpsw->dev);
+	}
+
+	return ret;
+}
+
+void cpsw_ethtool_op_complete(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = pm_runtime_put(priv->cpsw->dev);
+	if (ret < 0)
+		cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
+}
+
+void cpsw_get_channels(struct net_device *ndev, struct ethtool_channels *ch)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	ch->max_rx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
+	ch->max_tx = cpsw->quirk_irq ? 1 : CPSW_MAX_QUEUES;
+	ch->max_combined = 0;
+	ch->max_other = 0;
+	ch->other_count = 0;
+	ch->rx_count = cpsw->rx_ch_num;
+	ch->tx_count = cpsw->tx_ch_num;
+	ch->combined_count = 0;
+}
+
+int cpsw_get_link_ksettings(struct net_device *ndev,
+			    struct ethtool_link_ksettings *ecmd)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (!cpsw->slaves[slave_no].phy)
+		return -EOPNOTSUPP;
+
+	phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy, ecmd);
+	return 0;
+}
+
+int cpsw_set_link_ksettings(struct net_device *ndev,
+			    const struct ethtool_link_ksettings *ecmd)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (!cpsw->slaves[slave_no].phy)
+		return -EOPNOTSUPP;
+
+	return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy, ecmd);
+}
+
+int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_get_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
+int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
+int cpsw_nway_reset(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return genphy_restart_aneg(cpsw->slaves[slave_no].phy);
+	else
+		return -EOPNOTSUPP;
+}
+
+static void cpsw_suspend_data_pass(struct net_device *ndev)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	int i;
+
+	/* Disable NAPI scheduling */
+	cpsw_intr_disable(cpsw);
+
+	/* Stop all transmit queues for every network device.
+	 */
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (!(ndev && netif_running(ndev)))
+			continue;
+
+		netif_tx_stop_all_queues(ndev);
+
+		/* Barrier, so that stop_queue visible to other cpus */
+		smp_mb__after_atomic();
+	}
+
+	/* Handle rest of tx packets and stop cpdma channels */
+	cpdma_ctlr_stop(cpsw->dma);
+}
+
+static int cpsw_resume_data_pass(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int i, ret;
+
+	/* After this receive is started */
+	if (cpsw->usage_count) {
+		ret = cpsw_fill_rx_channels(priv);
+		if (ret)
+			return ret;
+
+		cpdma_ctlr_start(cpsw->dma);
+		cpsw_intr_enable(cpsw);
+	}
+
+	/* Resume transmit for every affected interface */
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (ndev && netif_running(ndev))
+			netif_tx_start_all_queues(ndev);
+	}
+
+	return 0;
+}
+
+static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
+				  struct ethtool_channels *ch)
+{
+	if (cpsw->quirk_irq) {
+		dev_err(cpsw->dev, "Maximum one tx/rx queue is allowed");
+		return -EOPNOTSUPP;
+	}
+
+	if (ch->combined_count)
+		return -EINVAL;
+
+	/* verify we have at least one channel in each direction */
+	if (!ch->rx_count || !ch->tx_count)
+		return -EINVAL;
+
+	if (ch->rx_count > cpsw->data.channels ||
+	    ch->tx_count > cpsw->data.channels)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
+				    cpdma_handler_fn rx_handler)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	void (*handler)(void *, int, int);
+	struct netdev_queue *queue;
+	struct cpsw_vector *vec;
+	int ret, *ch, vch;
+
+	if (rx) {
+		ch = &cpsw->rx_ch_num;
+		vec = cpsw->rxv;
+		handler = rx_handler;
+	} else {
+		ch = &cpsw->tx_ch_num;
+		vec = cpsw->txv;
+		handler = cpsw_tx_handler;
+	}
+
+	while (*ch < ch_num) {
+		vch = rx ? *ch : 7 - *ch;
+		vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
+		queue = netdev_get_tx_queue(priv->ndev, *ch);
+		queue->tx_maxrate = 0;
+
+		if (IS_ERR(vec[*ch].ch))
+			return PTR_ERR(vec[*ch].ch);
+
+		if (!vec[*ch].ch)
+			return -EINVAL;
+
+		cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
+			  (rx ? "rx" : "tx"));
+		(*ch)++;
+	}
+
+	while (*ch > ch_num) {
+		(*ch)--;
+
+		ret = cpdma_chan_destroy(vec[*ch].ch);
+		if (ret)
+			return ret;
+
+		cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch,
+			  (rx ? "rx" : "tx"));
+	}
+
+	return 0;
+}
+
+static void cpsw_fail(struct cpsw_common *cpsw)
+{
+	struct net_device *ndev;
+	int i;
+
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		ndev = cpsw->slaves[i].ndev;
+		if (ndev)
+			dev_close(ndev);
+	}
+}
+
+int cpsw_set_channels_common(struct net_device *ndev,
+			     struct ethtool_channels *chs,
+			     cpdma_handler_fn rx_handler)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct net_device *sl_ndev;
+	int i, new_pools, ret;
+
+	ret = cpsw_check_ch_settings(cpsw, chs);
+	if (ret < 0)
+		return ret;
+
+	cpsw_suspend_data_pass(ndev);
+
+	new_pools = (chs->rx_count != cpsw->rx_ch_num) && cpsw->usage_count;
+
+	ret = cpsw_update_channels_res(priv, chs->rx_count, 1, rx_handler);
+	if (ret)
+		goto err;
+
+	ret = cpsw_update_channels_res(priv, chs->tx_count, 0, rx_handler);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		sl_ndev = cpsw->slaves[i].ndev;
+		if (!(sl_ndev && netif_running(sl_ndev)))
+			continue;
+
+		/* Inform stack about new count of queues */
+		ret = netif_set_real_num_tx_queues(sl_ndev, cpsw->tx_ch_num);
+		if (ret) {
+			dev_err(priv->dev, "cannot set real number of tx queues\n");
+			goto err;
+		}
+
+		ret = netif_set_real_num_rx_queues(sl_ndev, cpsw->rx_ch_num);
+		if (ret) {
+			dev_err(priv->dev, "cannot set real number of rx queues\n");
+			goto err;
+		}
+	}
+
+	cpsw_split_res(cpsw);
+
+	if (new_pools) {
+		cpsw_destroy_xdp_rxqs(cpsw);
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret)
+			goto err;
+	}
+
+	ret = cpsw_resume_data_pass(ndev);
+	if (!ret)
+		return 0;
+err:
+	dev_err(priv->dev, "cannot update channels number, closing device\n");
+	cpsw_fail(cpsw);
+	return ret;
+}
+
+void cpsw_get_ringparam(struct net_device *ndev,
+			struct ethtool_ringparam *ering)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+
+	/* not supported */
+	ering->tx_max_pending = cpsw->descs_pool_size - CPSW_MAX_QUEUES;
+	ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma);
+	ering->rx_max_pending = cpsw->descs_pool_size - CPSW_MAX_QUEUES;
+	ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma);
+}
+
+int cpsw_set_ringparam(struct net_device *ndev,
+		       struct ethtool_ringparam *ering)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	int descs_num, ret;
+
+	/* ignore ering->tx_pending - only rx_pending adjustment is supported */
+
+	if (ering->rx_mini_pending || ering->rx_jumbo_pending ||
+	    ering->rx_pending < CPSW_MAX_QUEUES ||
+	    ering->rx_pending > (cpsw->descs_pool_size - CPSW_MAX_QUEUES))
+		return -EINVAL;
+
+	descs_num = cpdma_get_num_rx_descs(cpsw->dma);
+	if (ering->rx_pending == descs_num)
+		return 0;
+
+	cpsw_suspend_data_pass(ndev);
+
+	ret = cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
+	if (ret) {
+		if (cpsw_resume_data_pass(ndev))
+			goto err;
+
+		return ret;
+	}
+
+	if (cpsw->usage_count) {
+		cpsw_destroy_xdp_rxqs(cpsw);
+		ret = cpsw_create_xdp_rxqs(cpsw);
+		if (ret)
+			goto err;
+	}
+
+	ret = cpsw_resume_data_pass(ndev);
+	if (!ret)
+		return 0;
+err:
+	cpdma_set_num_rx_descs(cpsw->dma, descs_num);
+	dev_err(cpsw->dev, "cannot set ring params, closing device\n");
+	cpsw_fail(cpsw);
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_TI_CPTS)
+int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = cpsw->cpts->phc_index;
+	info->tx_types =
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+	return 0;
+}
+#else
+int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info)
+{
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE;
+	info->phc_index = -1;
+	info->tx_types = 0;
+	info->rx_filters = 0;
+	return 0;
+}
+#endif

diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
new file mode 100644
index 0000000..476d050
--- /dev/null
+++ b/drivers/net/ethernet/ti/cpsw_priv.c

@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Texas Instruments Ethernet Switch Driver
+ *
+ * Copyright (C) 2019 Texas Instruments
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+
+#include "cpts.h"
+#include "cpsw_ale.h"
+#include "cpsw_priv.h"
+#include "cpsw_sl.h"
+#include "davinci_cpdma.h"
+
+int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs,
+		     int ale_ageout, phys_addr_t desc_mem_phys,
+		     int descs_pool_size)
+{
+	u32 slave_offset, sliver_offset, slave_size;
+	struct cpsw_ale_params ale_params;
+	struct cpsw_platform_data *data;
+	struct cpdma_params dma_params;
+	struct device *dev = cpsw->dev;
+	void __iomem *cpts_regs;
+	int ret = 0, i;
+
+	data = &cpsw->data;
+	cpsw->rx_ch_num = 1;
+	cpsw->tx_ch_num = 1;
+
+	cpsw->version = readl(&cpsw->regs->id_ver);
+
+	memset(&dma_params, 0, sizeof(dma_params));
+	memset(&ale_params, 0, sizeof(ale_params));
+
+	switch (cpsw->version) {
+	case CPSW_VERSION_1:
+		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
+		cpts_regs	     = ss_regs + CPSW1_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
+		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
+		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
+		ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
+		slave_offset         = CPSW1_SLAVE_OFFSET;
+		slave_size           = CPSW1_SLAVE_SIZE;
+		sliver_offset        = CPSW1_SLIVER_OFFSET;
+		dma_params.desc_mem_phys = 0;
+		break;
+	case CPSW_VERSION_2:
+	case CPSW_VERSION_3:
+	case CPSW_VERSION_4:
+		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
+		cpts_regs	     = ss_regs + CPSW2_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
+		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
+		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
+		ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
+		slave_offset         = CPSW2_SLAVE_OFFSET;
+		slave_size           = CPSW2_SLAVE_SIZE;
+		sliver_offset        = CPSW2_SLIVER_OFFSET;
+		dma_params.desc_mem_phys = desc_mem_phys;
+		break;
+	default:
+		dev_err(dev, "unknown version 0x%08x\n", cpsw->version);
+		return -ENODEV;
+	}
+
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		struct cpsw_slave *slave = &cpsw->slaves[i];
+		void __iomem		*regs = cpsw->regs;
+
+		slave->slave_num = i;
+		slave->data	= &cpsw->data.slave_data[i];
+		slave->regs	= regs + slave_offset;
+		slave->port_vlan = slave->data->dual_emac_res_vlan;
+		slave->mac_sl = cpsw_sl_get("cpsw", dev, regs + sliver_offset);
+		if (IS_ERR(slave->mac_sl))
+			return PTR_ERR(slave->mac_sl);
+
+		slave_offset  += slave_size;
+		sliver_offset += SLIVER_SIZE;
+	}
+
+	ale_params.dev			= dev;
+	ale_params.ale_ageout		= ale_ageout;
+	ale_params.ale_entries		= data->ale_entries;
+	ale_params.ale_ports		= CPSW_ALE_PORTS_NUM;
+
+	cpsw->ale = cpsw_ale_create(&ale_params);
+	if (!cpsw->ale) {
+		dev_err(dev, "error initializing ale engine\n");
+		return -ENODEV;
+	}
+
+	dma_params.dev		= dev;
+	dma_params.rxthresh	= dma_params.dmaregs + CPDMA_RXTHRESH;
+	dma_params.rxfree	= dma_params.dmaregs + CPDMA_RXFREE;
+	dma_params.rxhdp	= dma_params.txhdp + CPDMA_RXHDP;
+	dma_params.txcp		= dma_params.txhdp + CPDMA_TXCP;
+	dma_params.rxcp		= dma_params.txhdp + CPDMA_RXCP;
+
+	dma_params.num_chan		= data->channels;
+	dma_params.has_soft_reset	= true;
+	dma_params.min_packet_size	= CPSW_MIN_PACKET_SIZE;
+	dma_params.desc_mem_size	= data->bd_ram_size;
+	dma_params.desc_align		= 16;
+	dma_params.has_ext_regs		= true;
+	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
+	dma_params.bus_freq_mhz		= cpsw->bus_freq_mhz;
+	dma_params.descs_pool_size	= descs_pool_size;
+
+	cpsw->dma = cpdma_ctlr_create(&dma_params);
+	if (!cpsw->dma) {
+		dev_err(dev, "error initializing dma\n");
+		return -ENOMEM;
+	}
+
+	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
+	if (IS_ERR(cpsw->cpts)) {
+		ret = PTR_ERR(cpsw->cpts);
+		cpdma_ctlr_destroy(cpsw->dma);
+	}
+
+	return ret;
+}

diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
new file mode 100644
index 0000000..362c5a9
--- /dev/null
+++ b/drivers/net/ethernet/ti/cpsw_priv.h

@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Texas Instruments Ethernet Switch Driver
+ */
+
+#ifndef DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_
+#define DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_
+
+#include "davinci_cpdma.h"
+
+#define CPSW_DEBUG	(NETIF_MSG_HW		| NETIF_MSG_WOL		| \
+			 NETIF_MSG_DRV		| NETIF_MSG_LINK	| \
+			 NETIF_MSG_IFUP		| NETIF_MSG_INTR	| \
+			 NETIF_MSG_PROBE	| NETIF_MSG_TIMER	| \
+			 NETIF_MSG_IFDOWN	| NETIF_MSG_RX_ERR	| \
+			 NETIF_MSG_TX_ERR	| NETIF_MSG_TX_DONE	| \
+			 NETIF_MSG_PKTDATA	| NETIF_MSG_TX_QUEUED	| \
+			 NETIF_MSG_RX_STATUS)
+
+#define cpsw_info(priv, type, format, ...)		\
+do {								\
+	if (netif_msg_##type(priv) && net_ratelimit())		\
+		dev_info(priv->dev, format, ## __VA_ARGS__);	\
+} while (0)
+
+#define cpsw_err(priv, type, format, ...)		\
+do {								\
+	if (netif_msg_##type(priv) && net_ratelimit())		\
+		dev_err(priv->dev, format, ## __VA_ARGS__);	\
+} while (0)
+
+#define cpsw_dbg(priv, type, format, ...)		\
+do {								\
+	if (netif_msg_##type(priv) && net_ratelimit())		\
+		dev_dbg(priv->dev, format, ## __VA_ARGS__);	\
+} while (0)
+
+#define cpsw_notice(priv, type, format, ...)		\
+do {								\
+	if (netif_msg_##type(priv) && net_ratelimit())		\
+		dev_notice(priv->dev, format, ## __VA_ARGS__);	\
+} while (0)
+
+#define ALE_ALL_PORTS		0x7
+
+#define CPSW_MAJOR_VERSION(reg)		(reg >> 8 & 0x7)
+#define CPSW_MINOR_VERSION(reg)		(reg & 0xff)
+#define CPSW_RTL_VERSION(reg)		((reg >> 11) & 0x1f)
+
+#define CPSW_VERSION_1		0x19010a
+#define CPSW_VERSION_2		0x19010c
+#define CPSW_VERSION_3		0x19010f
+#define CPSW_VERSION_4		0x190112
+
+#define HOST_PORT_NUM		0
+#define CPSW_ALE_PORTS_NUM	3
+#define SLIVER_SIZE		0x40
+
+#define CPSW1_HOST_PORT_OFFSET	0x028
+#define CPSW1_SLAVE_OFFSET	0x050
+#define CPSW1_SLAVE_SIZE	0x040
+#define CPSW1_CPDMA_OFFSET	0x100
+#define CPSW1_STATERAM_OFFSET	0x200
+#define CPSW1_HW_STATS		0x400
+#define CPSW1_CPTS_OFFSET	0x500
+#define CPSW1_ALE_OFFSET	0x600
+#define CPSW1_SLIVER_OFFSET	0x700
+
+#define CPSW2_HOST_PORT_OFFSET	0x108
+#define CPSW2_SLAVE_OFFSET	0x200
+#define CPSW2_SLAVE_SIZE	0x100
+#define CPSW2_CPDMA_OFFSET	0x800
+#define CPSW2_HW_STATS		0x900
+#define CPSW2_STATERAM_OFFSET	0xa00
+#define CPSW2_CPTS_OFFSET	0xc00
+#define CPSW2_ALE_OFFSET	0xd00
+#define CPSW2_SLIVER_OFFSET	0xd80
+#define CPSW2_BD_OFFSET		0x2000
+
+#define CPDMA_RXTHRESH		0x0c0
+#define CPDMA_RXFREE		0x0e0
+#define CPDMA_TXHDP		0x00
+#define CPDMA_RXHDP		0x20
+#define CPDMA_TXCP		0x40
+#define CPDMA_RXCP		0x60
+
+#define CPSW_POLL_WEIGHT	64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE		4
+#define CPSW_MIN_PACKET_SIZE	(VLAN_ETH_ZLEN)
+#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN +\
+				 ETH_FCS_LEN +\
+				 CPSW_RX_VLAN_ENCAP_HDR_SIZE)
+
+#define RX_PRIORITY_MAPPING	0x76543210
+#define TX_PRIORITY_MAPPING	0x33221100
+#define CPDMA_TX_PRIORITY_MAP	0x76543210
+
+#define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_RX_VLAN_ENCAP	BIT(2)
+#define CPSW_ALE_VLAN_AWARE	1
+
+#define CPSW_FIFO_NORMAL_MODE		(0 << 16)
+#define CPSW_FIFO_DUAL_MAC_MODE		(1 << 16)
+#define CPSW_FIFO_RATE_LIMIT_MODE	(2 << 16)
+
+#define CPSW_INTPACEEN		(0x3f << 16)
+#define CPSW_INTPRESCALE_MASK	(0x7FF << 0)
+#define CPSW_CMINTMAX_CNT	63
+#define CPSW_CMINTMIN_CNT	2
+#define CPSW_CMINTMAX_INTVL	(1000 / CPSW_CMINTMIN_CNT)
+#define CPSW_CMINTMIN_INTVL	((1000 / CPSW_CMINTMAX_CNT) + 1)
+
+#define IRQ_NUM			2
+#define CPSW_MAX_QUEUES		8
+#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_FIFO_QUEUE_TYPE_SHIFT	16
+#define CPSW_FIFO_SHAPE_EN_SHIFT	16
+#define CPSW_FIFO_RATE_EN_SHIFT		20
+#define CPSW_TC_NUM			4
+#define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
+#define CPSW_PCT_MASK			0x7f
+
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT	16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT	8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK	GENMASK(1, 0)
+enum {
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
+struct cpsw_wr_regs {
+	u32	id_ver;
+	u32	soft_reset;
+	u32	control;
+	u32	int_control;
+	u32	rx_thresh_en;
+	u32	rx_en;
+	u32	tx_en;
+	u32	misc_en;
+	u32	mem_allign1[8];
+	u32	rx_thresh_stat;
+	u32	rx_stat;
+	u32	tx_stat;
+	u32	misc_stat;
+	u32	mem_allign2[8];
+	u32	rx_imax;
+	u32	tx_imax;
+
+};
+
+struct cpsw_ss_regs {
+	u32	id_ver;
+	u32	control;
+	u32	soft_reset;
+	u32	stat_port_en;
+	u32	ptype;
+	u32	soft_idle;
+	u32	thru_rate;
+	u32	gap_thresh;
+	u32	tx_start_wds;
+	u32	flow_control;
+	u32	vlan_ltype;
+	u32	ts_ltype;
+	u32	dlr_ltype;
+};
+
+/* CPSW_PORT_V1 */
+#define CPSW1_MAX_BLKS      0x00 /* Maximum FIFO Blocks */
+#define CPSW1_BLK_CNT       0x04 /* FIFO Block Usage Count (Read Only) */
+#define CPSW1_TX_IN_CTL     0x08 /* Transmit FIFO Control */
+#define CPSW1_PORT_VLAN     0x0c /* VLAN Register */
+#define CPSW1_TX_PRI_MAP    0x10 /* Tx Header Priority to Switch Pri Mapping */
+#define CPSW1_TS_CTL        0x14 /* Time Sync Control */
+#define CPSW1_TS_SEQ_LTYPE  0x18 /* Time Sync Sequence ID Offset and Msg Type */
+#define CPSW1_TS_VLAN       0x1c /* Time Sync VLAN1 and VLAN2 */
+
+/* CPSW_PORT_V2 */
+#define CPSW2_CONTROL       0x00 /* Control Register */
+#define CPSW2_MAX_BLKS      0x08 /* Maximum FIFO Blocks */
+#define CPSW2_BLK_CNT       0x0c /* FIFO Block Usage Count (Read Only) */
+#define CPSW2_TX_IN_CTL     0x10 /* Transmit FIFO Control */
+#define CPSW2_PORT_VLAN     0x14 /* VLAN Register */
+#define CPSW2_TX_PRI_MAP    0x18 /* Tx Header Priority to Switch Pri Mapping */
+#define CPSW2_TS_SEQ_MTYPE  0x1c /* Time Sync Sequence ID Offset and Msg Type */
+
+/* CPSW_PORT_V1 and V2 */
+#define SA_LO               0x20 /* CPGMAC_SL Source Address Low */
+#define SA_HI               0x24 /* CPGMAC_SL Source Address High */
+#define SEND_PERCENT        0x28 /* Transmit Queue Send Percentages */
+
+/* CPSW_PORT_V2 only */
+#define RX_DSCP_PRI_MAP0    0x30 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP1    0x34 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP2    0x38 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP3    0x3c /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP4    0x40 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP5    0x44 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP6    0x48 /* Rx DSCP Priority to Rx Packet Mapping */
+#define RX_DSCP_PRI_MAP7    0x4c /* Rx DSCP Priority to Rx Packet Mapping */
+
+/* Bit definitions for the CPSW2_CONTROL register */
+#define PASS_PRI_TAGGED     BIT(24) /* Pass Priority Tagged */
+#define VLAN_LTYPE2_EN      BIT(21) /* VLAN LTYPE 2 enable */
+#define VLAN_LTYPE1_EN      BIT(20) /* VLAN LTYPE 1 enable */
+#define DSCP_PRI_EN         BIT(16) /* DSCP Priority Enable */
+#define TS_107              BIT(15) /* Tyme Sync Dest IP Address 107 */
+#define TS_320              BIT(14) /* Time Sync Dest Port 320 enable */
+#define TS_319              BIT(13) /* Time Sync Dest Port 319 enable */
+#define TS_132              BIT(12) /* Time Sync Dest IP Addr 132 enable */
+#define TS_131              BIT(11) /* Time Sync Dest IP Addr 131 enable */
+#define TS_130              BIT(10) /* Time Sync Dest IP Addr 130 enable */
+#define TS_129              BIT(9)  /* Time Sync Dest IP Addr 129 enable */
+#define TS_TTL_NONZERO      BIT(8)  /* Time Sync Time To Live Non-zero enable */
+#define TS_ANNEX_F_EN       BIT(6)  /* Time Sync Annex F enable */
+#define TS_ANNEX_D_EN       BIT(4)  /* Time Sync Annex D enable */
+#define TS_LTYPE2_EN        BIT(3)  /* Time Sync LTYPE 2 enable */
+#define TS_LTYPE1_EN        BIT(2)  /* Time Sync LTYPE 1 enable */
+#define TS_TX_EN            BIT(1)  /* Time Sync Transmit Enable */
+#define TS_RX_EN            BIT(0)  /* Time Sync Receive Enable */
+
+#define CTRL_V2_TS_BITS \
+	(TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+	 TS_TTL_NONZERO  | TS_ANNEX_D_EN | TS_LTYPE1_EN | VLAN_LTYPE1_EN)
+
+#define CTRL_V2_ALL_TS_MASK (CTRL_V2_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V2_TX_TS_BITS  (CTRL_V2_TS_BITS | TS_TX_EN)
+#define CTRL_V2_RX_TS_BITS  (CTRL_V2_TS_BITS | TS_RX_EN)
+
+
+#define CTRL_V3_TS_BITS \
+	(TS_107 | TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+	 TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
+	 TS_LTYPE1_EN | VLAN_LTYPE1_EN)
+
+#define CTRL_V3_ALL_TS_MASK (CTRL_V3_TS_BITS | TS_TX_EN | TS_RX_EN)
+#define CTRL_V3_TX_TS_BITS  (CTRL_V3_TS_BITS | TS_TX_EN)
+#define CTRL_V3_RX_TS_BITS  (CTRL_V3_TS_BITS | TS_RX_EN)
+
+/* Bit definitions for the CPSW2_TS_SEQ_MTYPE register */
+#define TS_SEQ_ID_OFFSET_SHIFT   (16)    /* Time Sync Sequence ID Offset */
+#define TS_SEQ_ID_OFFSET_MASK    (0x3f)
+#define TS_MSG_TYPE_EN_SHIFT     (0)     /* Time Sync Message Type Enable */
+#define TS_MSG_TYPE_EN_MASK      (0xffff)
+
+/* The PTP event messages - Sync, Delay_Req, Pdelay_Req, and Pdelay_Resp. */
+#define EVENT_MSG_BITS ((1<<0) | (1<<1) | (1<<2) | (1<<3))
+
+/* Bit definitions for the CPSW1_TS_CTL register */
+#define CPSW_V1_TS_RX_EN		BIT(0)
+#define CPSW_V1_TS_TX_EN		BIT(4)
+#define CPSW_V1_MSG_TYPE_OFS		16
+
+/* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */
+#define CPSW_V1_SEQ_ID_OFS_SHIFT	16
+
+#define CPSW_MAX_BLKS_TX		15
+#define CPSW_MAX_BLKS_TX_SHIFT		4
+#define CPSW_MAX_BLKS_RX		5
+
+struct cpsw_host_regs {
+	u32	max_blks;
+	u32	blk_cnt;
+	u32	tx_in_ctl;
+	u32	port_vlan;
+	u32	tx_pri_map;
+	u32	cpdma_tx_pri_map;
+	u32	cpdma_rx_chan_map;
+};
+
+struct cpsw_slave_data {
+	struct device_node *slave_node;
+	struct device_node *phy_node;
+	char		phy_id[MII_BUS_ID_SIZE];
+	int		phy_if;
+	u8		mac_addr[ETH_ALEN];
+	u16		dual_emac_res_vlan;	/* Reserved VLAN for DualEMAC */
+	struct phy	*ifphy;
+};
+
+struct cpsw_platform_data {
+	struct cpsw_slave_data	*slave_data;
+	u32	ss_reg_ofs;	/* Subsystem control register offset */
+	u32	channels;	/* number of cpdma channels (symmetric) */
+	u32	slaves;		/* number of slave cpgmac ports */
+	u32	active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
+	u32	ale_entries;	/* ale table size */
+	u32	bd_ram_size;  /*buffer descriptor ram size */
+	u32	mac_control;	/* Mac control register */
+	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
+	bool	dual_emac;	/* Enable Dual EMAC mode */
+};
+
+struct cpsw_slave {
+	void __iomem			*regs;
+	int				slave_num;
+	u32				mac_control;
+	struct cpsw_slave_data		*data;
+	struct phy_device		*phy;
+	struct net_device		*ndev;
+	u32				port_vlan;
+	struct cpsw_sl			*mac_sl;
+};
+
+static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
+{
+	return readl_relaxed(slave->regs + offset);
+}
+
+static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
+{
+	writel_relaxed(val, slave->regs + offset);
+}
+
+struct cpsw_vector {
+	struct cpdma_chan *ch;
+	int budget;
+};
+
+struct cpsw_common {
+	struct device			*dev;
+	struct cpsw_platform_data	data;
+	struct napi_struct		napi_rx;
+	struct napi_struct		napi_tx;
+	struct cpsw_ss_regs __iomem	*regs;
+	struct cpsw_wr_regs __iomem	*wr_regs;
+	u8 __iomem			*hw_stats;
+	struct cpsw_host_regs __iomem	*host_port_regs;
+	u32				version;
+	u32				coal_intvl;
+	u32				bus_freq_mhz;
+	int				rx_packet_max;
+	int				descs_pool_size;
+	struct cpsw_slave		*slaves;
+	struct cpdma_ctlr		*dma;
+	struct cpsw_vector		txv[CPSW_MAX_QUEUES];
+	struct cpsw_vector		rxv[CPSW_MAX_QUEUES];
+	struct cpsw_ale			*ale;
+	bool				quirk_irq;
+	bool				rx_irq_disabled;
+	bool				tx_irq_disabled;
+	u32 irqs_table[IRQ_NUM];
+	struct cpts			*cpts;
+	int				rx_ch_num, tx_ch_num;
+	int				speed;
+	int				usage_count;
+	struct page_pool		*page_pool[CPSW_MAX_QUEUES];
+};
+
+struct cpsw_priv {
+	struct net_device		*ndev;
+	struct device			*dev;
+	u32				msg_enable;
+	u8				mac_addr[ETH_ALEN];
+	bool				rx_pause;
+	bool				tx_pause;
+	bool				mqprio_hw;
+	int				fifo_bw[CPSW_TC_NUM];
+	int				shp_cfg_speed;
+	int				tx_ts_enabled;
+	int				rx_ts_enabled;
+	struct bpf_prog			*xdp_prog;
+	struct xdp_rxq_info		xdp_rxq[CPSW_MAX_QUEUES];
+	struct xdp_attachment_info	xdpi;
+
+	u32 emac_port;
+	struct cpsw_common *cpsw;
+};
+
+#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
+#define napi_to_cpsw(napi)	container_of(napi, struct cpsw_common, napi)
+
+#define cpsw_slave_index(cpsw, priv)				\
+		((cpsw->data.dual_emac) ? priv->emac_port :	\
+		cpsw->data.active_slave)
+
+static inline int cpsw_get_slave_port(u32 slave_num)
+{
+	return slave_num + 1;
+}
+
+struct addr_sync_ctx {
+	struct net_device *ndev;
+	const u8 *addr;		/* address to be synched */
+	int consumed;		/* number of address instances */
+	int flush;		/* flush flag */
+};
+
+int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs,
+		     int ale_ageout, phys_addr_t desc_mem_phys,
+		     int descs_pool_size);
+void cpsw_split_res(struct cpsw_common *cpsw);
+int cpsw_fill_rx_channels(struct cpsw_priv *priv);
+void cpsw_intr_enable(struct cpsw_common *cpsw);
+void cpsw_intr_disable(struct cpsw_common *cpsw);
+void cpsw_tx_handler(void *token, int len, int status);
+int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw);
+void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw);
+
+/* ethtool */
+u32 cpsw_get_msglevel(struct net_device *ndev);
+void cpsw_set_msglevel(struct net_device *ndev, u32 value);
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
+int cpsw_get_sset_count(struct net_device *ndev, int sset);
+void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data);
+void cpsw_get_ethtool_stats(struct net_device *ndev,
+			    struct ethtool_stats *stats, u64 *data);
+void cpsw_get_pauseparam(struct net_device *ndev,
+			 struct ethtool_pauseparam *pause);
+void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol);
+int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol);
+int cpsw_get_regs_len(struct net_device *ndev);
+void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p);
+int cpsw_ethtool_op_begin(struct net_device *ndev);
+void cpsw_ethtool_op_complete(struct net_device *ndev);
+void cpsw_get_channels(struct net_device *ndev, struct ethtool_channels *ch);
+int cpsw_get_link_ksettings(struct net_device *ndev,
+			    struct ethtool_link_ksettings *ecmd);
+int cpsw_set_link_ksettings(struct net_device *ndev,
+			    const struct ethtool_link_ksettings *ecmd);
+int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata);
+int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata);
+int cpsw_nway_reset(struct net_device *ndev);
+void cpsw_get_ringparam(struct net_device *ndev,
+			struct ethtool_ringparam *ering);
+int cpsw_set_ringparam(struct net_device *ndev,
+		       struct ethtool_ringparam *ering);
+int cpsw_set_channels_common(struct net_device *ndev,
+			     struct ethtool_channels *chs,
+			     cpdma_handler_fn rx_handler);
+int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info);
+
+#endif /* DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_ */

diff --git a/drivers/net/ethernet/ti/cpsw_sl.c b/drivers/net/ethernet/ti/cpsw_sl.c
new file mode 100644
index 0000000..0c7531c
--- /dev/null
+++ b/drivers/net/ethernet/ti/cpsw_sl.c

@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Texas Instruments Ethernet Switch media-access-controller (MAC) submodule/
+ * Ethernet MAC Sliver (CPGMAC_SL)
+ *
+ * Copyright (C) 2019 Texas Instruments
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+
+#include "cpsw_sl.h"
+
+#define CPSW_SL_REG_NOTUSED U16_MAX
+
+static const u16 cpsw_sl_reg_map_cpsw[] = {
+	[CPSW_SL_IDVER] = 0x00,
+	[CPSW_SL_MACCONTROL] = 0x04,
+	[CPSW_SL_MACSTATUS] = 0x08,
+	[CPSW_SL_SOFT_RESET] = 0x0c,
+	[CPSW_SL_RX_MAXLEN] = 0x10,
+	[CPSW_SL_BOFFTEST] = 0x14,
+	[CPSW_SL_RX_PAUSE] = 0x18,
+	[CPSW_SL_TX_PAUSE] = 0x1c,
+	[CPSW_SL_EMCONTROL] = 0x20,
+	[CPSW_SL_RX_PRI_MAP] = 0x24,
+	[CPSW_SL_TX_GAP] = 0x28,
+};
+
+static const u16 cpsw_sl_reg_map_66ak2hk[] = {
+	[CPSW_SL_IDVER] = 0x00,
+	[CPSW_SL_MACCONTROL] = 0x04,
+	[CPSW_SL_MACSTATUS] = 0x08,
+	[CPSW_SL_SOFT_RESET] = 0x0c,
+	[CPSW_SL_RX_MAXLEN] = 0x10,
+	[CPSW_SL_BOFFTEST] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_RX_PAUSE] = 0x18,
+	[CPSW_SL_TX_PAUSE] = 0x1c,
+	[CPSW_SL_EMCONTROL] = 0x20,
+	[CPSW_SL_RX_PRI_MAP] = 0x24,
+	[CPSW_SL_TX_GAP] = CPSW_SL_REG_NOTUSED,
+};
+
+static const u16 cpsw_sl_reg_map_66ak2x_xgbe[] = {
+	[CPSW_SL_IDVER] = 0x00,
+	[CPSW_SL_MACCONTROL] = 0x04,
+	[CPSW_SL_MACSTATUS] = 0x08,
+	[CPSW_SL_SOFT_RESET] = 0x0c,
+	[CPSW_SL_RX_MAXLEN] = 0x10,
+	[CPSW_SL_BOFFTEST] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_RX_PAUSE] = 0x18,
+	[CPSW_SL_TX_PAUSE] = 0x1c,
+	[CPSW_SL_EMCONTROL] = 0x20,
+	[CPSW_SL_RX_PRI_MAP] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_TX_GAP] = 0x28,
+};
+
+static const u16 cpsw_sl_reg_map_66ak2elg_am65[] = {
+	[CPSW_SL_IDVER] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_MACCONTROL] = 0x00,
+	[CPSW_SL_MACSTATUS] = 0x04,
+	[CPSW_SL_SOFT_RESET] = 0x08,
+	[CPSW_SL_RX_MAXLEN] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_BOFFTEST] = 0x0c,
+	[CPSW_SL_RX_PAUSE] = 0x10,
+	[CPSW_SL_TX_PAUSE] = 0x40,
+	[CPSW_SL_EMCONTROL] = 0x70,
+	[CPSW_SL_RX_PRI_MAP] = CPSW_SL_REG_NOTUSED,
+	[CPSW_SL_TX_GAP] = 0x74,
+};
+
+#define CPSW_SL_SOFT_RESET_BIT		BIT(0)
+
+#define CPSW_SL_STATUS_PN_IDLE		BIT(31)
+#define CPSW_SL_AM65_STATUS_PN_E_IDLE	BIT(30)
+#define CPSW_SL_AM65_STATUS_PN_P_IDLE	BIT(29)
+#define CPSW_SL_AM65_STATUS_PN_TX_IDLE	BIT(28)
+
+#define CPSW_SL_STATUS_IDLE_MASK_BASE (CPSW_SL_STATUS_PN_IDLE)
+
+#define CPSW_SL_STATUS_IDLE_MASK_K3 \
+	(CPSW_SL_STATUS_IDLE_MASK_BASE | CPSW_SL_AM65_STATUS_PN_E_IDLE | \
+	 CPSW_SL_AM65_STATUS_PN_P_IDLE | CPSW_SL_AM65_STATUS_PN_TX_IDLE)
+
+#define CPSW_SL_CTL_FUNC_BASE \
+	(CPSW_SL_CTL_FULLDUPLEX |\
+	CPSW_SL_CTL_LOOPBACK |\
+	CPSW_SL_CTL_RX_FLOW_EN |\
+	CPSW_SL_CTL_TX_FLOW_EN |\
+	CPSW_SL_CTL_GMII_EN |\
+	CPSW_SL_CTL_TX_PACE |\
+	CPSW_SL_CTL_GIG |\
+	CPSW_SL_CTL_CMD_IDLE |\
+	CPSW_SL_CTL_IFCTL_A |\
+	CPSW_SL_CTL_IFCTL_B |\
+	CPSW_SL_CTL_GIG_FORCE |\
+	CPSW_SL_CTL_EXT_EN |\
+	CPSW_SL_CTL_RX_CEF_EN |\
+	CPSW_SL_CTL_RX_CSF_EN |\
+	CPSW_SL_CTL_RX_CMF_EN)
+
+struct cpsw_sl {
+	struct device *dev;
+	void __iomem *sl_base;
+	const u16 *regs;
+	u32 control_features;
+	u32 idle_mask;
+};
+
+struct cpsw_sl_dev_id {
+	const char *device_id;
+	const u16 *regs;
+	const u32 control_features;
+	const u32 regs_offset;
+	const u32 idle_mask;
+};
+
+static const struct cpsw_sl_dev_id cpsw_sl_id_match[] = {
+	{
+		.device_id = "cpsw",
+		.regs = cpsw_sl_reg_map_cpsw,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_MTEST |
+				    CPSW_SL_CTL_TX_SHORT_GAP_EN |
+				    CPSW_SL_CTL_TX_SG_LIM_EN,
+		.idle_mask = CPSW_SL_STATUS_IDLE_MASK_BASE,
+	},
+	{
+		.device_id = "66ak2hk",
+		.regs = cpsw_sl_reg_map_66ak2hk,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_TX_SHORT_GAP_EN,
+		.idle_mask = CPSW_SL_STATUS_IDLE_MASK_BASE,
+	},
+	{
+		.device_id = "66ak2x_xgbe",
+		.regs = cpsw_sl_reg_map_66ak2x_xgbe,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_XGIG |
+				    CPSW_SL_CTL_TX_SHORT_GAP_EN |
+				    CPSW_SL_CTL_CRC_TYPE |
+				    CPSW_SL_CTL_XGMII_EN,
+		.idle_mask = CPSW_SL_STATUS_IDLE_MASK_BASE,
+	},
+	{
+		.device_id = "66ak2el",
+		.regs = cpsw_sl_reg_map_66ak2elg_am65,
+		.regs_offset = 0x330,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_MTEST |
+				    CPSW_SL_CTL_TX_SHORT_GAP_EN |
+				    CPSW_SL_CTL_CRC_TYPE |
+				    CPSW_SL_CTL_EXT_EN_RX_FLO |
+				    CPSW_SL_CTL_EXT_EN_TX_FLO |
+				    CPSW_SL_CTL_TX_SG_LIM_EN,
+		.idle_mask = CPSW_SL_STATUS_IDLE_MASK_BASE,
+	},
+	{
+		.device_id = "66ak2g",
+		.regs = cpsw_sl_reg_map_66ak2elg_am65,
+		.regs_offset = 0x330,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_MTEST |
+				    CPSW_SL_CTL_CRC_TYPE |
+				    CPSW_SL_CTL_EXT_EN_RX_FLO |
+				    CPSW_SL_CTL_EXT_EN_TX_FLO,
+	},
+	{
+		.device_id = "am65",
+		.regs = cpsw_sl_reg_map_66ak2elg_am65,
+		.regs_offset = 0x330,
+		.control_features = CPSW_SL_CTL_FUNC_BASE |
+				    CPSW_SL_CTL_MTEST |
+				    CPSW_SL_CTL_XGIG |
+				    CPSW_SL_CTL_TX_SHORT_GAP_EN |
+				    CPSW_SL_CTL_CRC_TYPE |
+				    CPSW_SL_CTL_XGMII_EN |
+				    CPSW_SL_CTL_EXT_EN_RX_FLO |
+				    CPSW_SL_CTL_EXT_EN_TX_FLO |
+				    CPSW_SL_CTL_TX_SG_LIM_EN |
+				    CPSW_SL_CTL_EXT_EN_XGIG,
+		.idle_mask = CPSW_SL_STATUS_IDLE_MASK_K3,
+	},
+	{ },
+};
+
+u32 cpsw_sl_reg_read(struct cpsw_sl *sl, enum cpsw_sl_regs reg)
+{
+	int val;
+
+	if (sl->regs[reg] == CPSW_SL_REG_NOTUSED) {
+		dev_err(sl->dev, "cpsw_sl: not sup r reg: %04X\n",
+			sl->regs[reg]);
+		return 0;
+	}
+
+	val = readl(sl->sl_base + sl->regs[reg]);
+	dev_dbg(sl->dev, "cpsw_sl: reg: %04X r 0x%08X\n", sl->regs[reg], val);
+	return val;
+}
+
+void cpsw_sl_reg_write(struct cpsw_sl *sl, enum cpsw_sl_regs reg, u32 val)
+{
+	if (sl->regs[reg] == CPSW_SL_REG_NOTUSED) {
+		dev_err(sl->dev, "cpsw_sl: not sup w reg: %04X\n",
+			sl->regs[reg]);
+		return;
+	}
+
+	dev_dbg(sl->dev, "cpsw_sl: reg: %04X w 0x%08X\n", sl->regs[reg], val);
+	writel(val, sl->sl_base + sl->regs[reg]);
+}
+
+static const struct cpsw_sl_dev_id *cpsw_sl_match_id(
+		const struct cpsw_sl_dev_id *id,
+		const char *device_id)
+{
+	if (!id || !device_id)
+		return NULL;
+
+	while (id->device_id) {
+		if (strcmp(device_id, id->device_id) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+struct cpsw_sl *cpsw_sl_get(const char *device_id, struct device *dev,
+			    void __iomem *sl_base)
+{
+	const struct cpsw_sl_dev_id *sl_dev_id;
+	struct cpsw_sl *sl;
+
+	sl = devm_kzalloc(dev, sizeof(struct cpsw_sl), GFP_KERNEL);
+	if (!sl)
+		return ERR_PTR(-ENOMEM);
+	sl->dev = dev;
+	sl->sl_base = sl_base;
+
+	sl_dev_id = cpsw_sl_match_id(cpsw_sl_id_match, device_id);
+	if (!sl_dev_id) {
+		dev_err(sl->dev, "cpsw_sl: dev_id %s not found.\n", device_id);
+		return ERR_PTR(-EINVAL);
+	}
+	sl->regs = sl_dev_id->regs;
+	sl->control_features = sl_dev_id->control_features;
+	sl->idle_mask = sl_dev_id->idle_mask;
+	sl->sl_base += sl_dev_id->regs_offset;
+
+	return sl;
+}
+
+void cpsw_sl_reset(struct cpsw_sl *sl, unsigned long tmo)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(tmo);
+
+	/* Set the soft reset bit */
+	cpsw_sl_reg_write(sl, CPSW_SL_SOFT_RESET, CPSW_SL_SOFT_RESET_BIT);
+
+	/* Wait for the bit to clear */
+	do {
+		usleep_range(100, 200);
+	} while ((cpsw_sl_reg_read(sl, CPSW_SL_SOFT_RESET) &
+		  CPSW_SL_SOFT_RESET_BIT) &&
+		  time_after(timeout, jiffies));
+
+	if (cpsw_sl_reg_read(sl, CPSW_SL_SOFT_RESET) & CPSW_SL_SOFT_RESET_BIT)
+		dev_err(sl->dev, "cpsw_sl failed to soft-reset.\n");
+}
+
+u32 cpsw_sl_ctl_set(struct cpsw_sl *sl, u32 ctl_funcs)
+{
+	u32 val;
+
+	if (ctl_funcs & ~sl->control_features) {
+		dev_err(sl->dev, "cpsw_sl: unsupported func 0x%08X\n",
+			ctl_funcs & (~sl->control_features));
+		return -EINVAL;
+	}
+
+	val = cpsw_sl_reg_read(sl, CPSW_SL_MACCONTROL);
+	val |= ctl_funcs;
+	cpsw_sl_reg_write(sl, CPSW_SL_MACCONTROL, val);
+
+	return 0;
+}
+
+u32 cpsw_sl_ctl_clr(struct cpsw_sl *sl, u32 ctl_funcs)
+{
+	u32 val;
+
+	if (ctl_funcs & ~sl->control_features) {
+		dev_err(sl->dev, "cpsw_sl: unsupported func 0x%08X\n",
+			ctl_funcs & (~sl->control_features));
+		return -EINVAL;
+	}
+
+	val = cpsw_sl_reg_read(sl, CPSW_SL_MACCONTROL);
+	val &= ~ctl_funcs;
+	cpsw_sl_reg_write(sl, CPSW_SL_MACCONTROL, val);
+
+	return 0;
+}
+
+void cpsw_sl_ctl_reset(struct cpsw_sl *sl)
+{
+	cpsw_sl_reg_write(sl, CPSW_SL_MACCONTROL, 0);
+}
+
+int cpsw_sl_wait_for_idle(struct cpsw_sl *sl, unsigned long tmo)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(tmo);
+
+	do {
+		usleep_range(100, 200);
+	} while (!(cpsw_sl_reg_read(sl, CPSW_SL_MACSTATUS) &
+		  sl->idle_mask) && time_after(timeout, jiffies));
+
+	if (!(cpsw_sl_reg_read(sl, CPSW_SL_MACSTATUS) & sl->idle_mask)) {
+		dev_err(sl->dev, "cpsw_sl failed to soft-reset.\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}

diff --git a/drivers/net/ethernet/ti/cpsw_sl.h b/drivers/net/ethernet/ti/cpsw_sl.h
new file mode 100644
index 0000000..a6d06a5
--- /dev/null
+++ b/drivers/net/ethernet/ti/cpsw_sl.h

@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Texas Instruments Ethernet Switch media-access-controller (MAC) submodule/
+ * Ethernet MAC Sliver (CPGMAC_SL) APIs
+ *
+ * Copyright (C) 2019 Texas Instruments
+ *
+ */
+
+#ifndef __TI_CPSW_SL_H__
+#define __TI_CPSW_SL_H__
+
+#include <linux/device.h>
+
+enum cpsw_sl_regs {
+	CPSW_SL_IDVER,
+	CPSW_SL_MACCONTROL,
+	CPSW_SL_MACSTATUS,
+	CPSW_SL_SOFT_RESET,
+	CPSW_SL_RX_MAXLEN,
+	CPSW_SL_BOFFTEST,
+	CPSW_SL_RX_PAUSE,
+	CPSW_SL_TX_PAUSE,
+	CPSW_SL_EMCONTROL,
+	CPSW_SL_RX_PRI_MAP,
+	CPSW_SL_TX_GAP,
+};
+
+enum {
+	CPSW_SL_CTL_FULLDUPLEX = BIT(0), /* Full Duplex mode */
+	CPSW_SL_CTL_LOOPBACK = BIT(1), /* Loop Back Mode */
+	CPSW_SL_CTL_MTEST = BIT(2), /* Manufacturing Test mode */
+	CPSW_SL_CTL_RX_FLOW_EN = BIT(3), /* Receive Flow Control Enable */
+	CPSW_SL_CTL_TX_FLOW_EN = BIT(4), /* Transmit Flow Control Enable */
+	CPSW_SL_CTL_GMII_EN = BIT(5), /* GMII Enable */
+	CPSW_SL_CTL_TX_PACE = BIT(6), /* Transmit Pacing Enable */
+	CPSW_SL_CTL_GIG = BIT(7), /* Gigabit Mode */
+	CPSW_SL_CTL_XGIG = BIT(8), /* 10 Gigabit Mode */
+	CPSW_SL_CTL_TX_SHORT_GAP_EN = BIT(10), /* Transmit Short Gap Enable */
+	CPSW_SL_CTL_CMD_IDLE = BIT(11), /* Command Idle */
+	CPSW_SL_CTL_CRC_TYPE = BIT(12), /* Port CRC Type */
+	CPSW_SL_CTL_XGMII_EN = BIT(13), /* XGMII Enable */
+	CPSW_SL_CTL_IFCTL_A = BIT(15), /* Interface Control A */
+	CPSW_SL_CTL_IFCTL_B = BIT(16), /* Interface Control B */
+	CPSW_SL_CTL_GIG_FORCE = BIT(17), /* Gigabit Mode Force */
+	CPSW_SL_CTL_EXT_EN = BIT(18), /* External Control Enable */
+	CPSW_SL_CTL_EXT_EN_RX_FLO = BIT(19), /* Ext RX Flow Control Enable */
+	CPSW_SL_CTL_EXT_EN_TX_FLO = BIT(20), /* Ext TX Flow Control Enable */
+	CPSW_SL_CTL_TX_SG_LIM_EN = BIT(21), /* TXt Short Gap Limit Enable */
+	CPSW_SL_CTL_RX_CEF_EN = BIT(22), /* RX Copy Error Frames Enable */
+	CPSW_SL_CTL_RX_CSF_EN = BIT(23), /* RX Copy Short Frames Enable */
+	CPSW_SL_CTL_RX_CMF_EN = BIT(24), /* RX Copy MAC Control Frames Enable */
+	CPSW_SL_CTL_EXT_EN_XGIG = BIT(25),  /* Ext XGIG Control En, k3 only */
+
+	CPSW_SL_CTL_FUNCS_COUNT
+};
+
+struct cpsw_sl;
+
+struct cpsw_sl *cpsw_sl_get(const char *device_id, struct device *dev,
+			    void __iomem *sl_base);
+
+void cpsw_sl_reset(struct cpsw_sl *sl, unsigned long tmo);
+
+u32 cpsw_sl_ctl_set(struct cpsw_sl *sl, u32 ctl_funcs);
+u32 cpsw_sl_ctl_clr(struct cpsw_sl *sl, u32 ctl_funcs);
+void cpsw_sl_ctl_reset(struct cpsw_sl *sl);
+int cpsw_sl_wait_for_idle(struct cpsw_sl *sl, unsigned long tmo);
+
+u32 cpsw_sl_reg_read(struct cpsw_sl *sl, enum cpsw_sl_regs reg);
+void cpsw_sl_reg_write(struct cpsw_sl *sl, enum cpsw_sl_regs reg, u32 val);
+
+#endif /* __TI_CPSW_SL_H__ */

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index b96b93c..6113642 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c

@@ -1,22 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * TI Common Platform Time Sync
  *
  * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/if.h>
 #include <linux/hrtimer.h>
@@ -86,6 +75,25 @@
 	return removed ? 0 : -1;
 }
 
+static void cpts_purge_txq(struct cpts *cpts)
+{
+	struct cpts_skb_cb_data *skb_cb;
+	struct sk_buff *skb, *tmp;
+	int removed = 0;
+
+	skb_queue_walk_safe(&cpts->txq, skb, tmp) {
+		skb_cb = (struct cpts_skb_cb_data *)skb->cb;
+		if (time_after(jiffies, skb_cb->tmo)) {
+			__skb_unlink(skb, &cpts->txq);
+			dev_consume_skb_any(skb);
+			++removed;
+		}
+	}
+
+	if (removed)
+		dev_dbg(cpts->dev, "txq cleaned up %d\n", removed);
+}
+
 static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
 {
 	struct sk_buff *skb, *tmp;
@@ -119,9 +127,7 @@
 
 		if (time_after(jiffies, skb_cb->tmo)) {
 			/* timeout any expired skbs over 1s */
-			dev_dbg(cpts->dev,
-				"expiring tx timestamp mtype %u seqid %04x\n",
-				mtype, seqid);
+			dev_dbg(cpts->dev, "expiring tx timestamp from txq\n");
 			__skb_unlink(skb, &cpts->txq);
 			dev_consume_skb_any(skb);
 		}
@@ -294,8 +300,11 @@
 	spin_lock_irqsave(&cpts->lock, flags);
 	ts = ns_to_timespec64(timecounter_read(&cpts->tc));
 
-	if (!skb_queue_empty(&cpts->txq))
-		delay = CPTS_SKB_TX_WORK_TIMEOUT;
+	if (!skb_queue_empty(&cpts->txq)) {
+		cpts_purge_txq(cpts);
+		if (!skb_queue_empty(&cpts->txq))
+			delay = CPTS_SKB_TX_WORK_TIMEOUT;
+	}
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	pr_debug("cpts overflow check at %lld.%09ld\n",
@@ -410,8 +419,6 @@
 	u64 ns;
 	struct skb_shared_hwtstamps *ssh;
 
-	if (!cpts->rx_enable)
-		return;
 	ns = cpts_find_ts(cpts, skb, CPTS_EV_RX);
 	if (!ns)
 		return;
@@ -526,6 +533,82 @@
 		 freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
 }
 
+static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
+{
+	struct device_node *refclk_np;
+	const char **parent_names;
+	unsigned int num_parents;
+	struct clk_hw *clk_hw;
+	int ret = -EINVAL;
+	u32 *mux_table;
+
+	refclk_np = of_get_child_by_name(node, "cpts-refclk-mux");
+	if (!refclk_np)
+		/* refclk selection supported not for all SoCs */
+		return 0;
+
+	num_parents = of_clk_get_parent_count(refclk_np);
+	if (num_parents < 1) {
+		dev_err(cpts->dev, "mux-clock %s must have parents\n",
+			refclk_np->name);
+		goto mux_fail;
+	}
+
+	parent_names = devm_kzalloc(cpts->dev, (sizeof(char *) * num_parents),
+				    GFP_KERNEL);
+
+	mux_table = devm_kzalloc(cpts->dev, sizeof(*mux_table) * num_parents,
+				 GFP_KERNEL);
+	if (!mux_table || !parent_names) {
+		ret = -ENOMEM;
+		goto mux_fail;
+	}
+
+	of_clk_parent_fill(refclk_np, parent_names, num_parents);
+
+	ret = of_property_read_variable_u32_array(refclk_np, "ti,mux-tbl",
+						  mux_table,
+						  num_parents, num_parents);
+	if (ret < 0)
+		goto mux_fail;
+
+	clk_hw = clk_hw_register_mux_table(cpts->dev, refclk_np->name,
+					   parent_names, num_parents,
+					   0,
+					   &cpts->reg->rftclk_sel, 0, 0x1F,
+					   0, mux_table, NULL);
+	if (IS_ERR(clk_hw)) {
+		ret = PTR_ERR(clk_hw);
+		goto mux_fail;
+	}
+
+	ret = devm_add_action_or_reset(cpts->dev,
+				       (void(*)(void *))clk_hw_unregister_mux,
+				       clk_hw);
+	if (ret) {
+		dev_err(cpts->dev, "add clkmux unreg action %d", ret);
+		goto mux_fail;
+	}
+
+	ret = of_clk_add_hw_provider(refclk_np, of_clk_hw_simple_get, clk_hw);
+	if (ret)
+		goto mux_fail;
+
+	ret = devm_add_action_or_reset(cpts->dev,
+				       (void(*)(void *))of_clk_del_provider,
+				       refclk_np);
+	if (ret) {
+		dev_err(cpts->dev, "add clkmux provider unreg action %d", ret);
+		goto mux_fail;
+	}
+
+	return ret;
+
+mux_fail:
+	of_node_put(refclk_np);
+	return ret;
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
 	int ret = -EINVAL;
@@ -541,7 +624,7 @@
 	    (!cpts->cc.mult && cpts->cc.shift))
 		goto of_error;
 
-	return 0;
+	return cpts_of_mux_clk_setup(cpts, node);
 
 of_error:
 	dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
@@ -566,13 +649,20 @@
 	if (ret)
 		return ERR_PTR(ret);
 
-	cpts->refclk = devm_clk_get(dev, "cpts");
+	cpts->refclk = devm_get_clk_from_child(dev, node, "cpts");
+	if (IS_ERR(cpts->refclk))
+		/* try get clk from dev node for compatibility */
+		cpts->refclk = devm_clk_get(dev, "cpts");
+
 	if (IS_ERR(cpts->refclk)) {
-		dev_err(dev, "Failed to get cpts refclk\n");
+		dev_err(dev, "Failed to get cpts refclk %ld\n",
+			PTR_ERR(cpts->refclk));
 		return ERR_CAST(cpts->refclk);
 	}
 
-	clk_prepare(cpts->refclk);
+	ret = clk_prepare(cpts->refclk);
+	if (ret)
+		return ERR_PTR(ret);
 
 	cpts->cc.read = cpts_systim_read;
 	cpts->cc.mask = CLOCKSOURCE_MASK(32);

diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 73d73fa..bb997c1 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * TI Common Platform Time Sync
  *
  * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #ifndef _TI_CPTS_H_
 #define _TI_CPTS_H_
@@ -36,7 +24,7 @@
 struct cpsw_cpts {
 	u32 idver;                /* Identification and version */
 	u32 control;              /* Time sync control */
-	u32 res1;
+	u32 rftclk_sel;		  /* Reference Clock Select Register */
 	u32 ts_push;              /* Time stamp event push */
 	u32 ts_load_val;          /* Time stamp load value */
 	u32 ts_load_en;           /* Time stamp load enable */
@@ -136,26 +124,6 @@
 			 struct device_node *node);
 void cpts_release(struct cpts *cpts);
 
-static inline void cpts_rx_enable(struct cpts *cpts, int enable)
-{
-	cpts->rx_enable = enable;
-}
-
-static inline bool cpts_is_rx_enabled(struct cpts *cpts)
-{
-	return !!cpts->rx_enable;
-}
-
-static inline void cpts_tx_enable(struct cpts *cpts, int enable)
-{
-	cpts->tx_enable = enable;
-}
-
-static inline bool cpts_is_tx_enabled(struct cpts *cpts)
-{
-	return !!cpts->tx_enable;
-}
-
 static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 	unsigned int class = ptp_classify_raw(skb);
@@ -197,24 +165,6 @@
 {
 }
 
-static inline void cpts_rx_enable(struct cpts *cpts, int enable)
-{
-}
-
-static inline bool cpts_is_rx_enabled(struct cpts *cpts)
-{
-	return false;
-}
-
-static inline void cpts_tx_enable(struct cpts *cpts, int enable)
-{
-}
-
-static inline bool cpts_is_tx_enabled(struct cpts *cpts)
-{
-	return false;
-}
-
 static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 	return false;

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 4236dcd..37ba708 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c

@@ -1,16 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Texas Instruments CPDMA Driver
  *
  * Copyright (C) 2010 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
@@ -141,6 +134,15 @@
 #define ACCESS_RW	(ACCESS_RO | ACCESS_WO)
 };
 
+struct submit_info {
+	struct cpdma_chan *chan;
+	int directed;
+	void *token;
+	void *data_virt;
+	dma_addr_t data_dma;
+	int len;
+};
+
 static struct cpdma_control_info controls[] = {
 	[CPDMA_TX_RLIM]		  = {CPDMA_DMACONTROL,	8,  0xffff, ACCESS_RW},
 	[CPDMA_CMD_IDLE]	  = {CPDMA_DMACONTROL,	3,  1,      ACCESS_WO},
@@ -183,6 +185,8 @@
 				 (directed << CPDMA_TO_PORT_SHIFT));	\
 	} while (0)
 
+#define CPDMA_DMA_EXT_MAP		BIT(16)
+
 static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
 {
 	struct cpdma_desc_pool *pool = ctlr->pool;
@@ -527,7 +531,6 @@
 		ctlr->num_chan = CPDMA_MAX_CHANNELS;
 	return ctlr;
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_create);
 
 int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
 {
@@ -588,7 +591,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_start);
 
 int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr)
 {
@@ -621,7 +623,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_stop);
 
 int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
 {
@@ -639,7 +640,6 @@
 	cpdma_desc_pool_destroy(ctlr);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
 
 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable)
 {
@@ -660,25 +660,21 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_int_ctrl);
 
 void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value)
 {
 	dma_reg_write(ctlr, CPDMA_MACEOIVECTOR, value);
 }
-EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi);
 
 u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr)
 {
 	return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED);
 }
-EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state);
 
 u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr)
 {
 	return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED);
 }
-EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state);
 
 static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
 				 int rx, int desc_num,
@@ -726,7 +722,7 @@
  * cpdma_chan_split_pool - Splits ctrl pool between all channels.
  * Has to be called under ctlr lock
  */
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
 {
 	int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
 	int free_rx_num = 0, free_tx_num = 0;
@@ -774,7 +770,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_split_pool);
 
 
 /* cpdma_chan_set_weight - set weight of a channel in percentage.
@@ -807,7 +802,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_set_weight);
 
 /* cpdma_chan_get_min_rate - get minimum allowed rate for channel
  * Should be called before cpdma_chan_set_rate.
@@ -822,7 +816,6 @@
 
 	return DIV_ROUND_UP(divident, divisor);
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_get_min_rate);
 
 /* cpdma_chan_set_rate - limits bandwidth for transmit channel.
  * The bandwidth * limited channels have to be in order beginning from lowest.
@@ -867,7 +860,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_set_rate);
 
 u32 cpdma_chan_get_rate(struct cpdma_chan *ch)
 {
@@ -880,7 +872,6 @@
 
 	return rate;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_get_rate);
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 				     cpdma_handler_fn handler, int rx_type)
@@ -940,7 +931,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return chan;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_create);
 
 int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan)
 {
@@ -953,7 +943,6 @@
 
 	return desc_num;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num);
 
 int cpdma_chan_destroy(struct cpdma_chan *chan)
 {
@@ -975,7 +964,6 @@
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_destroy);
 
 int cpdma_chan_get_stats(struct cpdma_chan *chan,
 			 struct cpdma_chan_stats *stats)
@@ -988,7 +976,6 @@
 	spin_unlock_irqrestore(&chan->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_get_stats);
 
 static void __cpdma_chan_submit(struct cpdma_chan *chan,
 				struct cpdma_desc __iomem *desc)
@@ -1026,34 +1013,26 @@
 	}
 }
 
-int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
-		      int len, int directed)
+static int cpdma_chan_submit_si(struct submit_info *si)
 {
+	struct cpdma_chan		*chan = si->chan;
 	struct cpdma_ctlr		*ctlr = chan->ctlr;
+	int				len = si->len;
+	int				swlen = len;
 	struct cpdma_desc __iomem	*desc;
 	dma_addr_t			buffer;
-	unsigned long			flags;
 	u32				mode;
-	int				ret = 0;
-
-	spin_lock_irqsave(&chan->lock, flags);
-
-	if (chan->state == CPDMA_STATE_TEARDOWN) {
-		ret = -EINVAL;
-		goto unlock_ret;
-	}
+	int				ret;
 
 	if (chan->count >= chan->desc_num)	{
 		chan->stats.desc_alloc_fail++;
-		ret = -ENOMEM;
-		goto unlock_ret;
+		return -ENOMEM;
 	}
 
 	desc = cpdma_desc_alloc(ctlr->pool);
 	if (!desc) {
 		chan->stats.desc_alloc_fail++;
-		ret = -ENOMEM;
-		goto unlock_ret;
+		return -ENOMEM;
 	}
 
 	if (len < ctlr->params.min_packet_size) {
@@ -1061,16 +1040,21 @@
 		chan->stats.runt_transmit_buff++;
 	}
 
-	buffer = dma_map_single(ctlr->dev, data, len, chan->dir);
-	ret = dma_mapping_error(ctlr->dev, buffer);
-	if (ret) {
-		cpdma_desc_free(ctlr->pool, desc, 1);
-		ret = -EINVAL;
-		goto unlock_ret;
-	}
-
 	mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
-	cpdma_desc_to_port(chan, mode, directed);
+	cpdma_desc_to_port(chan, mode, si->directed);
+
+	if (si->data_dma) {
+		buffer = si->data_dma;
+		dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir);
+		swlen |= CPDMA_DMA_EXT_MAP;
+	} else {
+		buffer = dma_map_single(ctlr->dev, si->data_virt, len, chan->dir);
+		ret = dma_mapping_error(ctlr->dev, buffer);
+		if (ret) {
+			cpdma_desc_free(ctlr->pool, desc, 1);
+			return -EINVAL;
+		}
+	}
 
 	/* Relaxed IO accessors can be used here as there is read barrier
 	 * at the end of write sequence.
@@ -1079,9 +1063,9 @@
 	writel_relaxed(buffer, &desc->hw_buffer);
 	writel_relaxed(len, &desc->hw_len);
 	writel_relaxed(mode | len, &desc->hw_mode);
-	writel_relaxed((uintptr_t)token, &desc->sw_token);
+	writel_relaxed((uintptr_t)si->token, &desc->sw_token);
 	writel_relaxed(buffer, &desc->sw_buffer);
-	writel_relaxed(len, &desc->sw_len);
+	writel_relaxed(swlen, &desc->sw_len);
 	desc_read(desc, sw_len);
 
 	__cpdma_chan_submit(chan, desc);
@@ -1090,12 +1074,108 @@
 		chan_write(chan, rxfree, 1);
 
 	chan->count++;
+	return 0;
+}
 
-unlock_ret:
+int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
+			   int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data_virt = data;
+	si.data_dma = 0;
+	si.len = len;
+	si.directed = directed;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state == CPDMA_STATE_TEARDOWN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
 	spin_unlock_irqrestore(&chan->lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_submit);
+
+int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
+				  dma_addr_t data, int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data_virt = NULL;
+	si.data_dma = data;
+	si.len = len;
+	si.directed = directed;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state == CPDMA_STATE_TEARDOWN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
+
+int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
+		      int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data_virt = data;
+	si.data_dma = 0;
+	si.len = len;
+	si.directed = directed;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state != CPDMA_STATE_ACTIVE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
+
+int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
+			     dma_addr_t data, int len, int directed)
+{
+	struct submit_info si;
+	unsigned long flags;
+	int ret;
+
+	si.chan = chan;
+	si.token = token;
+	si.data_virt = NULL;
+	si.data_dma = data;
+	si.len = len;
+	si.directed = directed;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state != CPDMA_STATE_ACTIVE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+
+	ret = cpdma_chan_submit_si(&si);
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return ret;
+}
 
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan)
 {
@@ -1110,7 +1190,6 @@
 	spin_unlock_irqrestore(&chan->lock, flags);
 	return free_tx_desc;
 }
-EXPORT_SYMBOL_GPL(cpdma_check_free_tx_desc);
 
 static void __cpdma_chan_free(struct cpdma_chan *chan,
 			      struct cpdma_desc __iomem *desc,
@@ -1123,10 +1202,17 @@
 	uintptr_t			token;
 
 	token      = desc_read(desc, sw_token);
-	buff_dma   = desc_read(desc, sw_buffer);
 	origlen    = desc_read(desc, sw_len);
 
-	dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
+	buff_dma   = desc_read(desc, sw_buffer);
+	if (origlen & CPDMA_DMA_EXT_MAP) {
+		origlen &= ~CPDMA_DMA_EXT_MAP;
+		dma_sync_single_for_cpu(ctlr->dev, buff_dma, origlen,
+					chan->dir);
+	} else {
+		dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
+	}
+
 	cpdma_desc_free(pool, desc, 1);
 	(*chan->handler)((void *)token, outlen, status);
 }
@@ -1204,7 +1290,6 @@
 	}
 	return used;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_process);
 
 int cpdma_chan_start(struct cpdma_chan *chan)
 {
@@ -1224,7 +1309,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_start);
 
 int cpdma_chan_stop(struct cpdma_chan *chan)
 {
@@ -1287,7 +1371,6 @@
 	spin_unlock_irqrestore(&chan->lock, flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpdma_chan_stop);
 
 int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable)
 {
@@ -1329,25 +1412,34 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(cpdma_control_set);
 
 int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr)
 {
 	return ctlr->num_rx_desc;
 }
-EXPORT_SYMBOL_GPL(cpdma_get_num_rx_descs);
 
 int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr)
 {
 	return ctlr->num_tx_desc;
 }
-EXPORT_SYMBOL_GPL(cpdma_get_num_tx_descs);
 
-void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
+int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
 {
+	unsigned long flags;
+	int temp, ret;
+
+	spin_lock_irqsave(&ctlr->lock, flags);
+
+	temp = ctlr->num_rx_desc;
 	ctlr->num_rx_desc = num_rx_desc;
 	ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
-}
-EXPORT_SYMBOL_GPL(cpdma_set_num_rx_descs);
+	ret = cpdma_chan_split_pool(ctlr);
+	if (ret) {
+		ctlr->num_rx_desc = temp;
+		ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
+	}
 
-MODULE_LICENSE("GPL");
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+
+	return ret;
+}

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index d399af5..d3cfe23 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h

@@ -1,16 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Texas Instruments CPDMA Driver
  *
  * Copyright (C) 2010 Texas Instruments
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #ifndef __DAVINCI_CPDMA_H__
 #define __DAVINCI_CPDMA_H__
@@ -34,8 +27,8 @@
 	int			num_chan;
 	bool			has_soft_reset;
 	int			min_packet_size;
-	u32			desc_mem_phys;
-	u32			desc_hw_addr;
+	dma_addr_t		desc_mem_phys;
+	dma_addr_t		desc_hw_addr;
 	int			desc_mem_size;
 	int			desc_align;
 	u32			bus_freq_mhz;
@@ -84,8 +77,14 @@
 
 int cpdma_chan_get_stats(struct cpdma_chan *chan,
 			 struct cpdma_chan_stats *stats);
+int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
+			     dma_addr_t data, int len, int directed);
 int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
 		      int len, int directed);
+int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
+				  dma_addr_t data, int len, int directed);
+int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
+			   int len, int directed);
 int cpdma_chan_process(struct cpdma_chan *chan, int quota);
 
 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable);
@@ -117,8 +116,7 @@
 int cpdma_control_get(struct cpdma_ctlr *ctlr, int control);
 int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value);
 int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr);
-void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
+int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
 int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr);
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr);
 
 #endif

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f270bee..ae27be8 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * DaVinci Ethernet Medium Access Controller
  *
@@ -6,21 +7,6 @@
  * Copyright (C) 2009 Texas Instruments.
  *
  * ---------------------------------------------------------------------------
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- * ---------------------------------------------------------------------------
  * History:
  * 0-5 A number of folks worked on this driver in bits and pieces but the major
  *     contribution came from Suraj Iyer and Anant Gole
@@ -1385,7 +1371,7 @@
 		return -EOPNOTSUPP;
 }
 
-static int match_first_device(struct device *dev, void *data)
+static int match_first_device(struct device *dev, const void *data)
 {
 	if (dev->parent && dev->parent->of_node)
 		return of_device_is_compatible(dev->parent->of_node,
@@ -1442,8 +1428,8 @@
 		if (!skb)
 			break;
 
-		ret = cpdma_chan_submit(priv->rxchan, skb, skb->data,
-					skb_tailroom(skb), 0);
+		ret = cpdma_chan_idle_submit(priv->rxchan, skb, skb->data,
+					     skb_tailroom(skb), 0);
 		if (WARN_ON(ret < 0))
 			break;
 	}
@@ -1714,7 +1700,7 @@
 
 	if (!is_valid_ether_addr(pdata->mac_addr)) {
 		mac_addr = of_get_mac_address(np);
-		if (mac_addr)
+		if (!IS_ERR(mac_addr))
 			ether_addr_copy(pdata->mac_addr, mac_addr);
 	}
 
@@ -1912,11 +1898,11 @@
 		ether_addr_copy(ndev->dev_addr, priv->mac_addr);
 
 	if (!is_valid_ether_addr(priv->mac_addr)) {
-		/* Use random MAC if none passed */
+		/* Use random MAC if still none obtained. */
 		eth_hw_addr_random(ndev);
 		memcpy(priv->mac_addr, ndev->dev_addr, ndev->addr_len);
 		dev_warn(&pdev->dev, "using random MAC addr: %pM\n",
-							priv->mac_addr);
+			 priv->mac_addr);
 	}
 
 	ndev->netdev_ops = &emac_netdev_ops;
@@ -2002,8 +1988,7 @@
 
 static int davinci_emac_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (netif_running(ndev))
 		emac_dev_stop(ndev);
@@ -2013,8 +1998,7 @@
 
 static int davinci_emac_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (netif_running(ndev))
 		emac_dev_open(ndev);
@@ -2027,7 +2011,6 @@
 	.resume		= davinci_emac_resume,
 };
 
-#if IS_ENABLED(CONFIG_OF)
 static const struct emac_platform_data am3517_emac_data = {
 	.version		= EMAC_VERSION_2,
 	.hw_ram_addr		= 0x01e20000,
@@ -2044,14 +2027,13 @@
 	{},
 };
 MODULE_DEVICE_TABLE(of, davinci_emac_of_match);
-#endif
 
 /* davinci_emac_driver: EMAC platform driver structure */
 static struct platform_driver davinci_emac_driver = {
 	.driver = {
 		.name	 = "davinci_emac",
 		.pm	 = &davinci_emac_pm_ops,
-		.of_match_table = of_match_ptr(davinci_emac_of_match),
+		.of_match_table = davinci_emac_of_match,
 	},
 	.probe = davinci_emac_probe,
 	.remove = davinci_emac_remove,

diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index a98aeda..38b7f6d 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * DaVinci MDIO Module driver
  *
@@ -7,22 +8,6 @@
  *
  * Copyright (C) 2009 Texas Instruments.
  *
- * ---------------------------------------------------------------------------
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- * ---------------------------------------------------------------------------
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -140,7 +125,7 @@
 static void davinci_mdio_enable(struct davinci_mdio_data *data)
 {
 	/* set enable and clock divider */
-	__raw_writel(data->clk_div | CONTROL_ENABLE, &data->regs->control);
+	writel(data->clk_div | CONTROL_ENABLE, &data->regs->control);
 }
 
 static int davinci_mdio_reset(struct mii_bus *bus)
@@ -159,7 +144,7 @@
 	msleep(PHY_MAX_ADDR * data->access_time);
 
 	/* dump hardware version info */
-	ver = __raw_readl(&data->regs->version);
+	ver = readl(&data->regs->version);
 	dev_info(data->dev,
 		 "davinci mdio revision %d.%d, bus freq %ld\n",
 		 (ver >> 8) & 0xff, ver & 0xff,
@@ -169,7 +154,7 @@
 		goto done;
 
 	/* get phy mask from the alive register */
-	phy_mask = __raw_readl(&data->regs->alive);
+	phy_mask = readl(&data->regs->alive);
 	if (phy_mask) {
 		/* restrict mdio bus to live phys only */
 		dev_info(data->dev, "detected phy mask %x\n", ~phy_mask);
@@ -196,11 +181,11 @@
 	u32 reg;
 
 	while (time_after(timeout, jiffies)) {
-		reg = __raw_readl(&regs->user[0].access);
+		reg = readl(&regs->user[0].access);
 		if ((reg & USERACCESS_GO) == 0)
 			return 0;
 
-		reg = __raw_readl(&regs->control);
+		reg = readl(&regs->control);
 		if ((reg & CONTROL_IDLE) == 0) {
 			usleep_range(100, 200);
 			continue;
@@ -216,7 +201,7 @@
 		return -EAGAIN;
 	}
 
-	reg = __raw_readl(&regs->user[0].access);
+	reg = readl(&regs->user[0].access);
 	if ((reg & USERACCESS_GO) == 0)
 		return 0;
 
@@ -263,7 +248,7 @@
 		if (ret < 0)
 			break;
 
-		__raw_writel(reg, &data->regs->user[0].access);
+		writel(reg, &data->regs->user[0].access);
 
 		ret = wait_for_user_access(data);
 		if (ret == -EAGAIN)
@@ -271,7 +256,7 @@
 		if (ret < 0)
 			break;
 
-		reg = __raw_readl(&data->regs->user[0].access);
+		reg = readl(&data->regs->user[0].access);
 		ret = (reg & USERACCESS_ACK) ? (reg & USERACCESS_DATA) : -EIO;
 		break;
 	}
@@ -307,7 +292,7 @@
 		if (ret < 0)
 			break;
 
-		__raw_writel(reg, &data->regs->user[0].access);
+		writel(reg, &data->regs->user[0].access);
 
 		ret = wait_for_user_access(data);
 		if (ret == -EAGAIN)
@@ -412,9 +397,9 @@
 	data->dev = dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	data->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(data->regs))
-		return PTR_ERR(data->regs);
+	data->regs = devm_ioremap(dev, res->start, resource_size(res));
+	if (!data->regs)
+		return -ENOMEM;
 
 	davinci_mdio_init_clk(data);
 
@@ -472,9 +457,9 @@
 	u32 ctrl;
 
 	/* shutdown the scan state machine */
-	ctrl = __raw_readl(&data->regs->control);
+	ctrl = readl(&data->regs->control);
 	ctrl &= ~CONTROL_ENABLE;
-	__raw_writel(ctrl, &data->regs->control);
+	writel(ctrl, &data->regs->control);
 	wait_for_idle(data);
 
 	return 0;

diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index c4ffdf4..43d5cd5 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * NetCP driver local header
  *
@@ -8,15 +9,6 @@
  *		Santosh Shilimkar <santosh.shilimkar@ti.com>
  *		Wingman Kwok <w-kwok2@ti.com>
  *		Murali Karicheri <m-karicheri2@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #ifndef __NETCP_H__
 #define __NETCP_H__

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index a1d335a..1b2702f 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Keystone NetCP Core driver
  *
@@ -8,15 +9,6 @@
  *		Santosh Shilimkar <santosh.shilimkar@ti.com>
  *		Murali Karicheri <m-karicheri2@ti.com>
  *		Wingman Kwok <w-kwok2@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/io.h>
@@ -225,17 +217,6 @@
 	return 0;
 }
 
-static const char *netcp_node_name(struct device_node *node)
-{
-	const char *name;
-
-	if (of_property_read_string(node, "label", &name) < 0)
-		name = node->name;
-	if (!name)
-		name = "unknown";
-	return name;
-}
-
 /* Module management routines */
 static int netcp_register_interface(struct netcp_intf *netcp)
 {
@@ -267,8 +248,13 @@
 	}
 
 	for_each_available_child_of_node(devices, child) {
-		const char *name = netcp_node_name(child);
+		const char *name;
+		char node_name[32];
 
+		if (of_property_read_string(child, "label", &name) < 0) {
+			snprintf(node_name, sizeof(node_name), "%pOFn", child);
+			name = node_name;
+		}
 		if (!strcasecmp(module->name, name))
 			break;
 	}
@@ -1130,7 +1116,7 @@
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		struct page *page = skb_frag_page(frag);
-		u32 page_offset = frag->page_offset;
+		u32 page_offset = skb_frag_off(frag);
 		u32 buf_len = skb_frag_size(frag);
 		dma_addr_t desc_dma;
 		u32 desc_dma_32;
@@ -2051,7 +2037,7 @@
 		devm_release_mem_region(dev, res.start, size);
 	} else {
 		mac_addr = of_get_mac_address(node_interface);
-		if (mac_addr)
+		if (!IS_ERR(mac_addr))
 			ether_addr_copy(ndev->dev_addr, mac_addr);
 		else
 			eth_random_addr(ndev->dev_addr);
@@ -2209,8 +2195,8 @@
 	for_each_available_child_of_node(interfaces, child) {
 		ret = netcp_create_interface(netcp_device, child);
 		if (ret) {
-			dev_err(dev, "could not create interface(%s)\n",
-				child->name);
+			dev_err(dev, "could not create interface(%pOFn)\n",
+				child);
 			goto probe_quit_interface;
 		}
 	}

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 72b98e2..2c1fac3 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Keystone GBE and XGBE subsystem code
  *
@@ -7,15 +8,6 @@
  *		Cyril Chemparathy <cyril@ti.com>
  *		Santosh Shilimkar <santosh.shilimkar@ti.com>
  *		Wingman Kwok <w-kwok2@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/io.h>
@@ -763,6 +755,8 @@
 
 	int                             cpts_registered;
 	struct cpts                     *cpts;
+	int				rx_ts_enabled;
+	int				tx_ts_enabled;
 };
 
 struct gbe_intf {
@@ -2564,7 +2558,7 @@
 	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
 
 	if (!(skb_shinfo(p_info->skb)->tx_flags & SKBTX_HW_TSTAMP) ||
-	    !cpts_is_tx_enabled(gbe_dev->cpts))
+	    !gbe_dev->tx_ts_enabled)
 		return 0;
 
 	/* If phy has the txtstamp api, assume it will do it.
@@ -2598,7 +2592,9 @@
 		return 0;
 	}
 
-	cpts_rx_timestamp(gbe_dev->cpts, p_info->skb);
+	if (gbe_dev->rx_ts_enabled)
+		cpts_rx_timestamp(gbe_dev->cpts, p_info->skb);
+
 	p_info->rxtstamp_complete = true;
 
 	return 0;
@@ -2614,10 +2610,8 @@
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
-	cfg.tx_type = cpts_is_tx_enabled(cpts) ?
-		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-	cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
-			 cpts->rx_enable : HWTSTAMP_FILTER_NONE);
+	cfg.tx_type = gbe_dev->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = gbe_dev->rx_ts_enabled;
 
 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
@@ -2628,8 +2622,8 @@
 	struct gbe_slave *slave = gbe_intf->slave;
 	u32 ts_en, seq_id, ctl;
 
-	if (!cpts_is_rx_enabled(gbe_dev->cpts) &&
-	    !cpts_is_tx_enabled(gbe_dev->cpts)) {
+	if (!gbe_dev->rx_ts_enabled &&
+	    !gbe_dev->tx_ts_enabled) {
 		writel(0, GBE_REG_ADDR(slave, port_regs, ts_ctl));
 		return;
 	}
@@ -2641,10 +2635,10 @@
 		(slave->ts_ctl.uni ?  TS_UNI_EN :
 			slave->ts_ctl.maddr_map << TS_CTL_MADDR_SHIFT);
 
-	if (cpts_is_tx_enabled(gbe_dev->cpts))
+	if (gbe_dev->tx_ts_enabled)
 		ts_en |= (TS_TX_ANX_ALL_EN | TS_TX_VLAN_LT1_EN);
 
-	if (cpts_is_rx_enabled(gbe_dev->cpts))
+	if (gbe_dev->rx_ts_enabled)
 		ts_en |= (TS_RX_ANX_ALL_EN | TS_RX_VLAN_LT1_EN);
 
 	writel(ts_en,  GBE_REG_ADDR(slave, port_regs, ts_ctl));
@@ -2670,10 +2664,10 @@
 
 	switch (cfg.tx_type) {
 	case HWTSTAMP_TX_OFF:
-		cpts_tx_enable(cpts, 0);
+		gbe_dev->tx_ts_enabled = 0;
 		break;
 	case HWTSTAMP_TX_ON:
-		cpts_tx_enable(cpts, 1);
+		gbe_dev->tx_ts_enabled = 1;
 		break;
 	default:
 		return -ERANGE;
@@ -2681,12 +2675,12 @@
 
 	switch (cfg.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		cpts_rx_enable(cpts, 0);
+		gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_NONE;
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V1_L4_EVENT);
+		gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
@@ -2698,7 +2692,7 @@
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V2_EVENT);
+		gbe_dev->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
@@ -3137,15 +3131,15 @@
 	for_each_child_of_node(node, port) {
 		slave = devm_kzalloc(dev, sizeof(*slave), GFP_KERNEL);
 		if (!slave) {
-			dev_err(dev, "memory alloc failed for secondary port(%s), skipping...\n",
-				port->name);
+			dev_err(dev, "memory alloc failed for secondary port(%pOFn), skipping...\n",
+				port);
 			continue;
 		}
 
 		if (init_slave(gbe_dev, slave, port)) {
 			dev_err(dev,
-				"Failed to initialize secondary port(%s), skipping...\n",
-				port->name);
+				"Failed to initialize secondary port(%pOFn), skipping...\n",
+				port);
 			devm_kfree(dev, slave);
 			continue;
 		}
@@ -3239,8 +3233,8 @@
 	ret = of_address_to_resource(node, XGBE_SS_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't xlate xgbe of node(%s) ss address at %d\n",
-			node->name, XGBE_SS_REG_INDEX);
+			"Can't xlate xgbe of node(%pOFn) ss address at %d\n",
+			node, XGBE_SS_REG_INDEX);
 		return ret;
 	}
 
@@ -3254,8 +3248,8 @@
 	ret = of_address_to_resource(node, XGBE_SM_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't xlate xgbe of node(%s) sm address at %d\n",
-			node->name, XGBE_SM_REG_INDEX);
+			"Can't xlate xgbe of node(%pOFn) sm address at %d\n",
+			node, XGBE_SM_REG_INDEX);
 		return ret;
 	}
 
@@ -3269,8 +3263,8 @@
 	ret = of_address_to_resource(node, XGBE_SERDES_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't xlate xgbe serdes of node(%s) address at %d\n",
-			node->name, XGBE_SERDES_REG_INDEX);
+			"Can't xlate xgbe serdes of node(%pOFn) address at %d\n",
+			node, XGBE_SERDES_REG_INDEX);
 		return ret;
 	}
 
@@ -3347,8 +3341,8 @@
 	ret = of_address_to_resource(node, GBE_SS_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't translate of node(%s) of gbe ss address at %d\n",
-			node->name, GBE_SS_REG_INDEX);
+			"Can't translate of node(%pOFn) of gbe ss address at %d\n",
+			node, GBE_SS_REG_INDEX);
 		return ret;
 	}
 
@@ -3372,8 +3366,8 @@
 	ret = of_address_to_resource(node, GBE_SGMII34_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't translate of gbe node(%s) address at index %d\n",
-			node->name, GBE_SGMII34_REG_INDEX);
+			"Can't translate of gbe node(%pOFn) address at index %d\n",
+			node, GBE_SGMII34_REG_INDEX);
 		return ret;
 	}
 
@@ -3388,8 +3382,8 @@
 	ret = of_address_to_resource(node, GBE_SM_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't translate of gbe node(%s) address at index %d\n",
-			node->name, GBE_SM_REG_INDEX);
+			"Can't translate of gbe node(%pOFn) address at index %d\n",
+			node, GBE_SM_REG_INDEX);
 		return ret;
 	}
 
@@ -3498,8 +3492,8 @@
 	ret = of_address_to_resource(node, GBENU_SM_REG_INDEX, &res);
 	if (ret) {
 		dev_err(gbe_dev->dev,
-			"Can't translate of gbenu node(%s) addr at index %d\n",
-			node->name, GBENU_SM_REG_INDEX);
+			"Can't translate of gbenu node(%pOFn) addr at index %d\n",
+			node, GBENU_SM_REG_INDEX);
 		return ret;
 	}
 
@@ -3560,7 +3554,7 @@
 static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 		     struct device_node *node, void **inst_priv)
 {
-	struct device_node *interfaces, *interface;
+	struct device_node *interfaces, *interface, *cpts_node;
 	struct device_node *secondary_ports;
 	struct cpsw_ale_params ale_params;
 	struct gbe_priv *gbe_dev;
@@ -3621,7 +3615,7 @@
 		return -EINVAL;
 	}
 
-	if (!strcmp(node->name, "gbe")) {
+	if (of_node_name_eq(node, "gbe")) {
 		ret = get_gbe_resource_version(gbe_dev, node);
 		if (ret)
 			return ret;
@@ -3635,14 +3629,14 @@
 		else
 			ret = -ENODEV;
 
-	} else if (!strcmp(node->name, "xgbe")) {
+	} else if (of_node_name_eq(node, "xgbe")) {
 		ret = set_xgbe_ethss10_priv(gbe_dev, node);
 		if (ret)
 			return ret;
 		ret = netcp_xgbe_serdes_init(gbe_dev->xgbe_serdes_regs,
 					     gbe_dev->ss_regs);
 	} else {
-		dev_err(dev, "unknown GBE node(%s)\n", node->name);
+		dev_err(dev, "unknown GBE node(%pOFn)\n", node);
 		ret = -ENODEV;
 	}
 
@@ -3655,20 +3649,24 @@
 
 	ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device,
 				gbe_dev->dma_chan_name, gbe_dev->tx_queue_id);
-	if (ret)
+	if (ret) {
+		of_node_put(interfaces);
 		return ret;
+	}
 
 	ret = netcp_txpipe_open(&gbe_dev->tx_pipe);
-	if (ret)
+	if (ret) {
+		of_node_put(interfaces);
 		return ret;
+	}
 
 	/* Create network interfaces */
 	INIT_LIST_HEAD(&gbe_dev->gbe_intf_head);
 	for_each_child_of_node(interfaces, interface) {
 		ret = of_property_read_u32(interface, "slave-port", &slave_num);
 		if (ret) {
-			dev_err(dev, "missing slave-port parameter, skipping interface configuration for %s\n",
-				interface->name);
+			dev_err(dev, "missing slave-port parameter, skipping interface configuration for %pOFn\n",
+				interface);
 			continue;
 		}
 		gbe_dev->num_slaves++;
@@ -3715,7 +3713,12 @@
 		dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n");
 	}
 
-	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, node);
+	cpts_node = of_get_child_by_name(node, "cpts");
+	if (!cpts_node)
+		cpts_node = of_node_get(node);
+
+	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, cpts_node);
+	of_node_put(cpts_node);
 	if (IS_ENABLED(CONFIG_TI_CPTS) && IS_ERR(gbe_dev->cpts)) {
 		ret = PTR_ERR(gbe_dev->cpts);
 		goto free_sec_ports;

diff --git a/drivers/net/ethernet/ti/netcp_sgmii.c b/drivers/net/ethernet/ti/netcp_sgmii.c
index 5d8419f..f7cf56d 100644
--- a/drivers/net/ethernet/ti/netcp_sgmii.c
+++ b/drivers/net/ethernet/ti/netcp_sgmii.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SGMI module initialisation
  *
@@ -6,14 +7,6 @@
  *		Sandeep Paulraj <s-paulraj@ti.com>
  *		Wingman Kwok <w-kwok2@ti.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include "netcp.h"

diff --git a/drivers/net/ethernet/ti/netcp_xgbepcsr.c b/drivers/net/ethernet/ti/netcp_xgbepcsr.c
index 33571ac..112778a 100644
--- a/drivers/net/ethernet/ti/netcp_xgbepcsr.c
+++ b/drivers/net/ethernet/ti/netcp_xgbepcsr.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * XGE PCSR module initialisation
  *
@@ -5,14 +6,6 @@
  * Authors:	Sandeep Nair <sandeep_n@ti.com>
  *		WingMan Kwok <w-kwok2@ti.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 #include "netcp.h"
 

diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 93d1428..78f0f2d 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c

@@ -69,7 +69,9 @@
 MODULE_DESCRIPTION("Driver for TI ThunderLAN based ethernet PCI adapters");
 MODULE_LICENSE("GPL");
 
-/* Turn on debugging. See Documentation/networking/tlan.txt for details */
+/* Turn on debugging.
+ * See Documentation/networking/device_drivers/ti/tlan.txt for details
+ */
 static  int		debug;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "ThunderLAN debug mask");
@@ -853,7 +855,6 @@
 		       dev->name);
 		return -ENOMEM;
 	}
-	memset(priv->dma_storage, 0, dma_size);
 	priv->rx_list = (struct tlan_list *)
 		ALIGN((unsigned long)priv->dma_storage, 8);
 	priv->rx_list_dma = ALIGN(priv->dma_storage_dma, 8);

diff --git a/drivers/net/ethernet/toshiba/Kconfig b/drivers/net/ethernet/toshiba/Kconfig
index 6f1d5b6..9ccdf03 100644
--- a/drivers/net/ethernet/toshiba/Kconfig
+++ b/drivers/net/ethernet/toshiba/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Toshiba network device configuration
 #

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 88d74ae..9d9f8ac 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  PS3 gelic network driver.
  *
@@ -10,20 +11,6 @@
  *
  * Authors : Utz Bacher <utz.bacher@de.ibm.com>
  *           Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #undef DEBUG
@@ -845,9 +832,9 @@
  * @skb: packet to send out
  * @netdev: interface device structure
  *
- * returns 0 on success, <0 on failure
+ * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure
  */
-int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
+netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct gelic_card *card = netdev_card(netdev);
 	struct gelic_descr *descr;

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 003d045..0510335 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  *  PS3 Platfom gelic network driver.
  *
@@ -10,20 +11,6 @@
  *
  * Authors : Utz Bacher <utz.bacher@de.ibm.com>
  *           Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #ifndef _GELIC_NET_H
 #define _GELIC_NET_H
@@ -314,7 +301,7 @@
 	 */
 	unsigned int irq;
 	struct gelic_descr *tx_top, *rx_top;
-	struct gelic_descr descr[0]; /* must be the last */
+	struct gelic_descr descr[]; /* must be the last */
 };
 
 struct gelic_port {
@@ -370,7 +357,7 @@
 void gelic_card_down(struct gelic_card *card);
 int gelic_net_open(struct net_device *netdev);
 int gelic_net_stop(struct net_device *netdev);
-int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
+netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
 void gelic_net_set_multi(struct net_device *netdev);
 void gelic_net_tx_timeout(struct net_device *netdev);
 int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card);

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
index 302079e..2db546b 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c

@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  PS3 gelic network driver.
  *
  * Copyright (C) 2007 Sony Computer Entertainment Inc.
  * Copyright 2007 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #undef DEBUG
 
@@ -1094,7 +1082,7 @@
 	struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
 	struct iw_point *enc = &data->encoding;
 	unsigned long irqflag;
-	unsigned int key_index, index_specified;
+	unsigned int key_index;
 	int ret = 0;
 
 	pr_debug("%s: <-\n", __func__);
@@ -1105,13 +1093,10 @@
 		return -EINVAL;
 
 	spin_lock_irqsave(&wl->lock, irqflag);
-	if (key_index) {
-		index_specified = 1;
+	if (key_index)
 		key_index--;
-	} else {
-		index_specified = 0;
+	else
 		key_index = wl->current_key;
-	}
 
 	if (wl->group_cipher_method == GELIC_WL_CIPHER_WEP) {
 		switch (wl->auth_method) {

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h
index 11f443d..4041d94 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h

@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  PS3 gelic network driver.
  *
  * Copyright (C) 2007 Sony Computer Entertainment Inc.
  * Copyright 2007 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation version 2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #ifndef _GELIC_WIRELESS_H
 #define _GELIC_WIRELESS_H

diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index d925b82..538e708 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Network device driver for Cell Processor-Based Blade and Celleb platform
  *
@@ -6,20 +7,6 @@
  *
  * Authors : Utz Bacher <utz.bacher@de.ibm.com>
  *           Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/compiler.h>
@@ -801,6 +788,7 @@
 			/* fallthrough, if we release the descriptors
 			 * brutally (then we don't care about
 			 * SPIDER_NET_DESCR_CARDOWNED) */
+			/* Fall through */
 
 		case SPIDER_NET_DESCR_RESPONSE_ERROR:
 		case SPIDER_NET_DESCR_PROTECTION_ERROR:
@@ -880,9 +868,9 @@
  * @skb: packet to send out
  * @netdev: interface device structure
  *
- * returns 0 on success, !0 on failure
+ * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure
  */
-static int
+static netdev_tx_t
 spider_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	int cnt;
@@ -2323,11 +2311,9 @@
 {
 	struct net_device *netdev;
 	struct spider_net_card *card;
-	size_t alloc_size;
 
-	alloc_size = sizeof(struct spider_net_card) +
-	   (tx_descriptors + rx_descriptors) * sizeof(struct spider_net_descr);
-	netdev = alloc_etherdev(alloc_size);
+	netdev = alloc_etherdev(struct_size(card, darray,
+					    tx_descriptors + rx_descriptors));
 	if (!netdev)
 		return NULL;
 

diff --git a/drivers/net/ethernet/toshiba/spider_net.h b/drivers/net/ethernet/toshiba/spider_net.h
index 9b6af08..c0c68cb 100644
--- a/drivers/net/ethernet/toshiba/spider_net.h
+++ b/drivers/net/ethernet/toshiba/spider_net.h

@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Network device driver for Cell Processor-Based Blade and Celleb platform
  *
@@ -6,20 +7,6 @@
  *
  * Authors : Utz Bacher <utz.bacher@de.ibm.com>
  *           Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _SPIDER_NET_H

diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
index 16bd036..54f655a 100644
--- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c
+++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Network device driver for Cell Processor-Based Blade
  *
@@ -5,20 +6,6 @@
  *
  * Authors : Utz Bacher <utz.bacher@de.ibm.com>
  *           Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/netdevice.h>

diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index cce9c9e..12466a7 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c

@@ -474,7 +474,8 @@
 /* Index to functions, as function prototypes. */
 
 static int	tc35815_open(struct net_device *dev);
-static int	tc35815_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t	tc35815_send_packet(struct sk_buff *skb,
+					    struct net_device *dev);
 static irqreturn_t	tc35815_interrupt(int irq, void *dev_id);
 static int	tc35815_rx(struct net_device *dev, int limit);
 static int	tc35815_poll(struct napi_struct *napi, int budget);
@@ -606,9 +607,9 @@
 
 static int tc_mii_probe(struct net_device *dev)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 	struct tc35815_local *lp = netdev_priv(dev);
 	struct phy_device *phydev;
-	u32 dropmask;
 
 	phydev = phy_find_first(lp->mii_bus);
 	if (!phydev) {
@@ -628,18 +629,23 @@
 	phy_attached_info(phydev);
 
 	/* mask with MAC supported features */
-	phydev->supported &= PHY_BASIC_FEATURES;
-	dropmask = 0;
-	if (options.speed == 10)
-		dropmask |= SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full;
-	else if (options.speed == 100)
-		dropmask |= SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full;
-	if (options.duplex == 1)
-		dropmask |= SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Full;
-	else if (options.duplex == 2)
-		dropmask |= SUPPORTED_10baseT_Half | SUPPORTED_100baseT_Half;
-	phydev->supported &= ~dropmask;
-	phydev->advertising = phydev->supported;
+	phy_set_max_speed(phydev, SPEED_100);
+	if (options.speed == 10) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask);
+	} else if (options.speed == 100) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, mask);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, mask);
+	}
+	if (options.duplex == 1) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, mask);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask);
+	} else if (options.duplex == 2) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, mask);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask);
+	}
+	linkmode_and(phydev->supported, phydev->supported, mask);
+	linkmode_copy(phydev->advertising, phydev->supported);
 
 	lp->link = 0;
 	lp->speed = 0;
@@ -688,10 +694,10 @@
  * should provide a "tc35815-mac" device with a MAC address in its
  * platform_data.
  */
-static int tc35815_mac_match(struct device *dev, void *data)
+static int tc35815_mac_match(struct device *dev, const void *data)
 {
 	struct platform_device *plat_dev = to_platform_device(dev);
-	struct pci_dev *pci_dev = data;
+	const struct pci_dev *pci_dev = data;
 	unsigned int id = pci_dev->irq;
 	return !strcmp(plat_dev->name, "tc35815-mac") && plat_dev->id == id;
 }
@@ -1248,7 +1254,8 @@
  * invariant will hold if you make sure that the netif_*_queue()
  * calls are done at the proper times.
  */
-static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+tc35815_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tc35815_local *lp = netdev_priv(dev);
 	struct TxFD *txfd;
@@ -1497,7 +1504,7 @@
 			pci_unmap_single(lp->pci_dev,
 					 lp->rx_skbs[cur_bd].skb_dma,
 					 RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
-			if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN)
+			if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN != 0)
 				memmove(skb->data, skb->data - NET_IP_ALIGN,
 					pkt_len);
 			data = skb_put(skb, pkt_len);

diff --git a/drivers/net/ethernet/tundra/Kconfig b/drivers/net/ethernet/tundra/Kconfig
index 81d845e..5c909df 100644
--- a/drivers/net/ethernet/tundra/Kconfig
+++ b/drivers/net/ethernet/tundra/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Tundra network device configuration
 #

diff --git a/drivers/net/ethernet/tundra/Makefile b/drivers/net/ethernet/tundra/Makefile
index 439f693..78fee6b 100644
--- a/drivers/net/ethernet/tundra/Makefile
+++ b/drivers/net/ethernet/tundra/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Tundra network device drivers.
 #

diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index edcd1e6..c62f474 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c

@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*******************************************************************************
 
   Copyright(c) 2006 Tundra Semiconductor Corporation.
 
-  This program is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License as published by the Free
-  Software Foundation; either version 2 of the License, or (at your option)
-  any later version.
-
-  This program is distributed in the hope that it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc., 59
-  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 *******************************************************************************/
 
@@ -383,9 +371,10 @@
 static void tsi108_stat_carry(struct net_device *dev)
 {
 	struct tsi108_prv_data *data = netdev_priv(dev);
+	unsigned long flags;
 	u32 carry1, carry2;
 
-	spin_lock_irq(&data->misclock);
+	spin_lock_irqsave(&data->misclock, flags);
 
 	carry1 = TSI_READ(TSI108_STAT_CARRY1);
 	carry2 = TSI_READ(TSI108_STAT_CARRY2);
@@ -453,7 +442,7 @@
 			      TSI108_STAT_TXPAUSEDROP_CARRY,
 			      &data->tx_pause_drop);
 
-	spin_unlock_irq(&data->misclock);
+	spin_unlock_irqrestore(&data->misclock, flags);
 }
 
 /* Read a stat counter atomically with respect to carries.
@@ -1311,13 +1300,13 @@
 		       data->id, dev->irq, dev->name);
 	}
 
-	data->rxring = dma_zalloc_coherent(&data->pdev->dev, rxring_size,
-			&data->rxdma, GFP_KERNEL);
+	data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size,
+					  &data->rxdma, GFP_KERNEL);
 	if (!data->rxring)
 		return -ENOMEM;
 
-	data->txring = dma_zalloc_coherent(&data->pdev->dev, txring_size,
-			&data->txdma, GFP_KERNEL);
+	data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size,
+					  &data->txdma, GFP_KERNEL);
 	if (!data->txring) {
 		dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring,
 				    data->rxdma);

diff --git a/drivers/net/ethernet/tundra/tsi108_eth.h b/drivers/net/ethernet/tundra/tsi108_eth.h
index 4a03c59..00980fd 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.h
+++ b/drivers/net/ethernet/tundra/tsi108_eth.h

@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * (C) Copyright 2005 Tundra Semiconductor Corp.
  * Kong Lai, <kong.lai@tundra.com).
  *
  * See file CREDITS for list of people who contributed to this
  * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 /*

diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index d3d0947..a962097 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # VIA device configuration
 #

diff --git a/drivers/net/ethernet/via/Makefile b/drivers/net/ethernet/via/Makefile
index 46c5d4a..4ca40f9 100644
--- a/drivers/net/ethernet/via/Makefile
+++ b/drivers/net/ethernet/via/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the VIA device drivers.
 #

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 3394924..ed12dbd 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c

@@ -571,7 +571,6 @@
 	if (rp->quirks & rqStatusWBRace)
 		iowrite8(mask >> 16, ioaddr + IntrStatus2);
 	iowrite16(mask, ioaddr + IntrStatus);
-	mmiowb();
 }
 
 /*
@@ -863,7 +862,6 @@
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 		iowrite16(enable_mask, ioaddr + IntrEnable);
-		mmiowb();
 	}
 	return work_done;
 }
@@ -1129,15 +1127,13 @@
 	const struct of_device_id *match;
 	const u32 *quirks;
 	int irq;
-	struct resource *res;
 	void __iomem *ioaddr;
 
 	match = of_match_device(rhine_of_tbl, &pdev->dev);
 	if (!match)
 		return -EINVAL;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	ioaddr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ioaddr))
 		return PTR_ERR(ioaddr);
 
@@ -1893,7 +1889,6 @@
 static void rhine_irq_disable(struct rhine_private *rp)
 {
 	iowrite16(0x0000, rp->base + IntrEnable);
-	mmiowb();
 }
 
 /* The interrupt handler does all of the Rx thread work and cleans up

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index ef9538e..346e441 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * This code is derived from the VIA reference driver (copyright message
  * below) provided to Red Hat by VIA Networking Technologies, Inc. for
@@ -24,22 +25,11 @@
  * Copyright (c) 1996, 2003 VIA Networking Technologies, Inc.
  * All rights reserved.
  *
- * This software may be redistributed and/or modified under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
  * Author: Chuang Liang-Shing, AJ Jiang
  *
  * Date: Jan 24, 2003
  *
  * MODULE_LICENSE("GPL");
- *
  */
 
 #include <linux/module.h>
@@ -1740,7 +1730,7 @@
 		dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
 				 le16_to_cpu(pktlen), DMA_TO_DEVICE);
 	}
-	dev_kfree_skb_irq(skb);
+	dev_consume_skb_irq(skb);
 	tdinfo->skb = NULL;
 }
 
@@ -3605,7 +3595,7 @@
 	"tx_jumbo",
 	"rx_mac_control_frames",
 	"tx_mac_control_frames",
-	"rx_frame_alignement_errors",
+	"rx_frame_alignment_errors",
 	"rx_long_ok",
 	"rx_long_err",
 	"tx_sqe_errors",

diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h
index 9453bfa..cdfe780 100644
--- a/drivers/net/ethernet/via/via-velocity.h
+++ b/drivers/net/ethernet/via/via-velocity.h

@@ -1,17 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (c) 1996, 2003 VIA Networking Technologies, Inc.
  * All rights reserved.
  *
- * This software may be redistributed and/or modified under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
  * File: via-velocity.h
  *
  * Purpose: Header file to define driver's private structures.
@@ -1518,7 +1509,7 @@
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(vptr->netdev);
 	if (in_dev != NULL) {
-		ifa = (struct in_ifaddr *) in_dev->ifa_list;
+		ifa = rcu_dereference(in_dev->ifa_list);
 		if (ifa != NULL) {
 			memcpy(vptr->ip_addr, &ifa->ifa_address, 4);
 			res = 0;

diff --git a/drivers/net/ethernet/wiznet/Kconfig b/drivers/net/ethernet/wiznet/Kconfig
index 1981e88..0422775 100644
--- a/drivers/net/ethernet/wiznet/Kconfig
+++ b/drivers/net/ethernet/wiznet/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # WIZnet devices configuration
 #

diff --git a/drivers/net/ethernet/wiznet/Makefile b/drivers/net/ethernet/wiznet/Makefile
index 1e05e1a..78104f0 100644
--- a/drivers/net/ethernet/wiznet/Makefile
+++ b/drivers/net/ethernet/wiznet/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_WIZNET_W5100) += w5100.o
 obj-$(CONFIG_WIZNET_W5100_SPI) += w5100-spi.o
 obj-$(CONFIG_WIZNET_W5300) += w5300.o

diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 93a2d3c..2b4126d 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c

@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Ethernet driver for the WIZnet W5100/W5200/W5500 chip.
  *
  * Copyright (C) 2016 Akinobu Mita <akinobu.mita@gmail.com>
  *
- * Licensed under the GPL-2 or later.
- *
  * Datasheet:
  * http://www.wiznet.co.kr/wp-content/uploads/wiznethome/Chip/W5100/Document/W5100_Datasheet_v1.2.6.pdf
  * http://wiznethome.cafe24.com/wp-content/uploads/wiznethome/Chip/W5200/Documents/W5200_DS_V140E.pdf
@@ -16,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/of_net.h>
+#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
 #include "w5100.h"
@@ -410,14 +410,32 @@
 	.init = w5500_spi_init,
 };
 
+static const struct of_device_id w5100_of_match[] = {
+	{ .compatible = "wiznet,w5100", .data = (const void*)W5100, },
+	{ .compatible = "wiznet,w5200", .data = (const void*)W5200, },
+	{ .compatible = "wiznet,w5500", .data = (const void*)W5500, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, w5100_of_match);
+
 static int w5100_spi_probe(struct spi_device *spi)
 {
-	const struct spi_device_id *id = spi_get_device_id(spi);
+	const struct of_device_id *of_id;
 	const struct w5100_ops *ops;
+	kernel_ulong_t driver_data;
 	int priv_size;
 	const void *mac = of_get_mac_address(spi->dev.of_node);
 
-	switch (id->driver_data) {
+	if (spi->dev.of_node) {
+		of_id = of_match_device(w5100_of_match, &spi->dev);
+		if (!of_id)
+			return -ENODEV;
+		driver_data = (kernel_ulong_t)of_id->data;
+	} else {
+		driver_data = spi_get_device_id(spi)->driver_data;
+	}
+
+	switch (driver_data) {
 	case W5100:
 		ops = &w5100_spi_ops;
 		priv_size = 0;
@@ -454,6 +472,7 @@
 	.driver		= {
 		.name	= "w5100",
 		.pm	= &w5100_pm_ops,
+		.of_match_table = w5100_of_match,
 	},
 	.probe		= w5100_spi_probe,
 	.remove		= w5100_spi_remove,

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index d8ba512..bede1ff 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c

@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Ethernet driver for the WIZnet W5100 chip.
  *
  * Copyright (C) 2006-2008 WIZnet Co.,Ltd.
  * Copyright (C) 2012 Mike Sinkovsky <msink@permonline.ru>
- *
- * Licensed under the GPL-2 or later.
  */
 
 #include <linux/kernel.h>
@@ -219,7 +218,6 @@
 static inline int w5100_write_direct(struct net_device *ndev, u32 addr, u8 data)
 {
 	__w5100_write_direct(ndev, addr, data);
-	mmiowb();
 
 	return 0;
 }
@@ -236,7 +234,6 @@
 {
 	__w5100_write_direct(ndev, addr, data >> 8);
 	__w5100_write_direct(ndev, addr + 1, data);
-	mmiowb();
 
 	return 0;
 }
@@ -260,8 +257,6 @@
 	for (i = 0; i < len; i++, addr++)
 		__w5100_write_direct(ndev, addr, *buf++);
 
-	mmiowb();
-
 	return 0;
 }
 
@@ -375,7 +370,6 @@
 	for (i = 0; i < len; i++)
 		*buf++ = w5100_read_direct(ndev, W5100_IDM_DR);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return 0;
@@ -394,7 +388,6 @@
 	for (i = 0; i < len; i++)
 		__w5100_write_direct(ndev, W5100_IDM_DR, *buf++);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return 0;
@@ -1164,7 +1157,7 @@
 	INIT_WORK(&priv->setrx_work, w5100_setrx_work);
 	INIT_WORK(&priv->restart_work, w5100_restart_work);
 
-	if (mac_addr)
+	if (!IS_ERR_OR_NULL(mac_addr))
 		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
 	else
 		eth_hw_addr_random(ndev);

diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h
index 17983a3..5d3d4b5 100644
--- a/drivers/net/ethernet/wiznet/w5100.h
+++ b/drivers/net/ethernet/wiznet/w5100.h

@@ -1,10 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Ethernet driver for the WIZnet W5100 chip.
  *
  * Copyright (C) 2006-2008 WIZnet Co.,Ltd.
  * Copyright (C) 2012 Mike Sinkovsky <msink@permonline.ru>
- *
- * Licensed under the GPL-2 or later.
  */
 
 enum {

diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 80fdbff..6ba2747 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c

@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Ethernet driver for the WIZnet W5300 chip.
  *
  * Copyright (C) 2008-2009 WIZnet Co.,Ltd.
  * Copyright (C) 2011 Taehun Kim <kth3321 <at> gmail.com>
  * Copyright (C) 2012 Mike Sinkovsky <msink@permonline.ru>
- *
- * Licensed under the GPL-2 or later.
  */
 
 #include <linux/kernel.h>
@@ -141,7 +140,6 @@
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5300_write_direct(priv, W5300_IDM_AR, addr);
-	mmiowb();
 	data = w5300_read_direct(priv, W5300_IDM_DR);
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 
@@ -154,9 +152,7 @@
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5300_write_direct(priv, W5300_IDM_AR, addr);
-	mmiowb();
 	w5300_write_direct(priv, W5300_IDM_DR, data);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 }
 
@@ -192,7 +188,6 @@
 	unsigned long timeout = jiffies + msecs_to_jiffies(100);
 
 	w5300_write(priv, W5300_S0_CR, cmd);
-	mmiowb();
 
 	while (w5300_read(priv, W5300_S0_CR) != 0) {
 		if (time_after(jiffies, timeout))
@@ -241,18 +236,15 @@
 	w5300_write(priv, W5300_SHARH,
 		      ndev->dev_addr[4] << 8 |
 		      ndev->dev_addr[5]);
-	mmiowb();
 }
 
 static void w5300_hw_reset(struct w5300_priv *priv)
 {
 	w5300_write_direct(priv, W5300_MR, MR_RST);
-	mmiowb();
 	mdelay(5);
 	w5300_write_direct(priv, W5300_MR, priv->indirect ?
 				 MR_WDF(7) | MR_PB | MR_IND :
 				 MR_WDF(7) | MR_PB);
-	mmiowb();
 	w5300_write(priv, W5300_IMR, 0);
 	w5300_write_macaddr(priv);
 
@@ -264,24 +256,20 @@
 	w5300_write32(priv, W5300_TMSRL, 64 << 24);
 	w5300_write32(priv, W5300_TMSRH, 0);
 	w5300_write(priv, W5300_MTYPE, 0x00ff);
-	mmiowb();
 }
 
 static void w5300_hw_start(struct w5300_priv *priv)
 {
 	w5300_write(priv, W5300_S0_MR, priv->promisc ?
 			  S0_MR_MACRAW : S0_MR_MACRAW_MF);
-	mmiowb();
 	w5300_command(priv, S0_CR_OPEN);
 	w5300_write(priv, W5300_S0_IMR, S0_IR_RECV | S0_IR_SENDOK);
 	w5300_write(priv, W5300_IMR, IR_S0);
-	mmiowb();
 }
 
 static void w5300_hw_close(struct w5300_priv *priv)
 {
 	w5300_write(priv, W5300_IMR, 0);
-	mmiowb();
 	w5300_command(priv, S0_CR_CLOSE);
 }
 
@@ -372,7 +360,6 @@
 	netif_stop_queue(ndev);
 
 	w5300_write_frame(priv, skb->data, skb->len);
-	mmiowb();
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += skb->len;
 	dev_kfree_skb(skb);
@@ -419,7 +406,6 @@
 	if (rx_count < budget) {
 		napi_complete_done(napi, rx_count);
 		w5300_write(priv, W5300_IMR, IR_S0);
-		mmiowb();
 	}
 
 	return rx_count;
@@ -434,7 +420,6 @@
 	if (!ir)
 		return IRQ_NONE;
 	w5300_write(priv, W5300_S0_IR, ir);
-	mmiowb();
 
 	if (ir & S0_IR_SENDOK) {
 		netif_dbg(priv, tx_done, ndev, "tx done\n");
@@ -444,7 +429,6 @@
 	if (ir & S0_IR_RECV) {
 		if (napi_schedule_prep(&priv->napi)) {
 			w5300_write(priv, W5300_IMR, 0);
-			mmiowb();
 			__napi_schedule(&priv->napi);
 		}
 	}
@@ -661,8 +645,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int w5300_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5300_priv *priv = netdev_priv(ndev);
 
 	if (netif_running(ndev)) {
@@ -676,8 +659,7 @@
 
 static int w5300_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5300_priv *priv = netdev_priv(ndev);
 
 	if (!netif_running(ndev)) {

diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index da4ec57..8d994ce 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Xilink device configuration
 #
@@ -5,7 +6,7 @@
 config NET_VENDOR_XILINX
 	bool "Xilinx devices"
 	default y
-	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
+	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || X86 || ARM || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -25,16 +26,15 @@
 
 config XILINX_AXI_EMAC
 	tristate "Xilinx 10/100/1000 AXI Ethernet support"
-	depends on MICROBLAZE
-	select PHYLIB
+	depends on MICROBLAZE || X86 || ARM || COMPILE_TEST
+	select PHYLINK
 	---help---
 	  This driver supports the 10/100/1000 Ethernet from Xilinx for the
 	  AXI bus interface used in Xilinx Virtex FPGAs.
 
 config XILINX_LL_TEMAC
 	tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver"
-	depends on (PPC || MICROBLAZE)
-	depends on !64BIT || BROKEN
+	depends on PPC || MICROBLAZE || X86 || COMPILE_TEST
 	select PHYLIB
 	---help---
 	  This driver supports the Xilinx 10/100/1000 LocalLink TEMAC

diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 1075752..276292b 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h

@@ -334,6 +334,9 @@
 
 	/* Connection to PHY device */
 	struct device_node *phy_node;
+	/* For non-device-tree devices */
+	char phy_name[MII_BUS_ID_SIZE + 3];
+	phy_interface_t phy_interface;
 
 	/* MDIO bus data */
 	struct mii_bus *mii_bus;	/* MII bus reference */
@@ -344,8 +347,10 @@
 #ifdef CONFIG_PPC_DCR
 	dcr_host_t sdma_dcrs;
 #endif
-	u32 (*dma_in)(struct temac_local *, int);
-	void (*dma_out)(struct temac_local *, int, u32);
+	u32 (*temac_ior)(struct temac_local *lp, int offset);
+	void (*temac_iow)(struct temac_local *lp, int offset, u32 value);
+	u32 (*dma_in)(struct temac_local *lp, int reg);
+	void (*dma_out)(struct temac_local *lp, int reg, u32 value);
 
 	int tx_irq;
 	int rx_irq;
@@ -353,7 +358,10 @@
 
 	struct sk_buff **rx_skb;
 	spinlock_t rx_lock;
-	struct mutex indirect_mutex;
+	/* For synchronization of indirect register access.  Must be
+	 * shared mutex between interfaces in same TEMAC block.
+	 */
+	spinlock_t *indirect_lock;
 	u32 options;			/* Current options word */
 	int last_link;
 	unsigned int temac_features;
@@ -367,18 +375,25 @@
 	int tx_bd_next;
 	int tx_bd_tail;
 	int rx_bd_ci;
+
+	/* DMA channel control setup */
+	u32 tx_chnl_ctrl;
+	u32 rx_chnl_ctrl;
 };
 
+/* Wrappers for temac_ior()/temac_iow() function pointers above */
+#define temac_ior(lp, o) ((lp)->temac_ior(lp, o))
+#define temac_iow(lp, o, v) ((lp)->temac_iow(lp, o, v))
+
 /* xilinx_temac.c */
-u32 temac_ior(struct temac_local *lp, int offset);
-void temac_iow(struct temac_local *lp, int offset, u32 value);
 int temac_indirect_busywait(struct temac_local *lp);
 u32 temac_indirect_in32(struct temac_local *lp, int reg);
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg);
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value);
-
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value);
 
 /* xilinx_temac_mdio.c */
-int temac_mdio_setup(struct temac_local *lp, struct device_node *np);
+int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev);
 void temac_mdio_teardown(struct temac_local *lp);
 
 #endif /* XILINX_LL_TEMAC_H */

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 60abc92..21c1b43 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for Xilinx TEMAC Ethernet device
  *
@@ -21,7 +22,6 @@
  *
  * TODO:
  * - Factor out locallink DMA code into separate driver
- * - Fix multicast assignment.
  * - Fix support for hardware checksumming.
  * - Testing.  Lots and lots of testing.
  *
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
+#include <linux/if_ether.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
@@ -51,6 +52,8 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/processor.h>
+#include <linux/platform_data/xilinx-ll-temac.h>
 
 #include "ll_temac.h"
 
@@ -61,81 +64,170 @@
  * Low level register access functions
  */
 
-u32 temac_ior(struct temac_local *lp, int offset)
+static u32 _temac_ior_be(struct temac_local *lp, int offset)
 {
-	return in_be32(lp->regs + offset);
+	return ioread32be(lp->regs + offset);
 }
 
-void temac_iow(struct temac_local *lp, int offset, u32 value)
+static void _temac_iow_be(struct temac_local *lp, int offset, u32 value)
 {
-	out_be32(lp->regs + offset, value);
+	return iowrite32be(value, lp->regs + offset);
 }
 
+static u32 _temac_ior_le(struct temac_local *lp, int offset)
+{
+	return ioread32(lp->regs + offset);
+}
+
+static void _temac_iow_le(struct temac_local *lp, int offset, u32 value)
+{
+	return iowrite32(value, lp->regs + offset);
+}
+
+static bool hard_acs_rdy(struct temac_local *lp)
+{
+	return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK;
+}
+
+static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout)
+{
+	ktime_t cur = ktime_get();
+
+	return hard_acs_rdy(lp) || ktime_after(cur, timeout);
+}
+
+/* Poll for maximum 20 ms.  This is similar to the 2 jiffies @ 100 Hz
+ * that was used before, and should cover MDIO bus speed down to 3200
+ * Hz.
+ */
+#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC)
+
+/**
+ * temac_indirect_busywait - Wait for current indirect register access
+ * to complete.
+ */
 int temac_indirect_busywait(struct temac_local *lp)
 {
-	unsigned long end = jiffies + 2;
+	ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS);
 
-	while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) {
-		if (time_before_eq(end, jiffies)) {
-			WARN_ON(1);
-			return -ETIMEDOUT;
-		}
-		msleep(1);
-	}
-	return 0;
+	spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout));
+	if (WARN_ON(!hard_acs_rdy(lp)))
+		return -ETIMEDOUT;
+	else
+		return 0;
 }
 
 /**
- * temac_indirect_in32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_in32 - Indirect register read access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 u32 temac_indirect_in32(struct temac_local *lp, int reg)
 {
-	u32 val;
+	unsigned long flags;
+	int val;
 
-	if (temac_indirect_busywait(lp))
-		return -ETIMEDOUT;
-	temac_iow(lp, XTE_CTL0_OFFSET, reg);
-	if (temac_indirect_busywait(lp))
-		return -ETIMEDOUT;
-	val = temac_ior(lp, XTE_LSW0_OFFSET);
-
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 	return val;
 }
 
 /**
- * temac_indirect_out32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_in32_locked - Indirect register read access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg)
+{
+	/* This initial wait should normally not spin, as we always
+	 * try to wait for indirect access to complete before
+	 * releasing the indirect_lock.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
+		return -ETIMEDOUT;
+	/* Initiate read from indirect register */
+	temac_iow(lp, XTE_CTL0_OFFSET, reg);
+	/* Wait for indirect register access to complete.  We really
+	 * should not see timeouts, and could even end up causing
+	 * problem for following indirect access, so let's make a bit
+	 * of WARN noise.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
+		return -ETIMEDOUT;
+	/* Value is ready now */
+	return temac_ior(lp, XTE_LSW0_OFFSET);
+}
+
+/**
+ * temac_indirect_out32 - Indirect register write access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value)
 {
-	if (temac_indirect_busywait(lp))
+	unsigned long flags;
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, reg, value);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+}
+
+/**
+ * temac_indirect_out32_locked - Indirect register write access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value)
+{
+	/* As in temac_indirect_in32_locked(), we should normally not
+	 * spin here.  And if it happens, we actually end up silently
+	 * ignoring the write request.  Ouch.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return;
+	/* Initiate write to indirect register */
 	temac_iow(lp, XTE_LSW0_OFFSET, value);
 	temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg);
-	temac_indirect_busywait(lp);
+	/* As in temac_indirect_in32_locked(), we should not see timeouts
+	 * here.  And if it happens, we continue before the write has
+	 * completed.  Not good.
+	 */
+	WARN_ON(temac_indirect_busywait(lp));
 }
 
 /**
- * temac_dma_in32 - Memory mapped DMA read, this function expects a
- * register input that is based on DCR word addresses which
- * are then converted to memory mapped byte addresses
+ * temac_dma_in32_* - Memory mapped DMA read, these function expects a
+ * register input that is based on DCR word addresses which are then
+ * converted to memory mapped byte addresses.  To be assigned to
+ * lp->dma_in32.
  */
-static u32 temac_dma_in32(struct temac_local *lp, int reg)
+static u32 temac_dma_in32_be(struct temac_local *lp, int reg)
 {
-	return in_be32(lp->sdma_regs + (reg << 2));
+	return ioread32be(lp->sdma_regs + (reg << 2));
+}
+
+static u32 temac_dma_in32_le(struct temac_local *lp, int reg)
+{
+	return ioread32(lp->sdma_regs + (reg << 2));
 }
 
 /**
- * temac_dma_out32 - Memory mapped DMA read, this function expects a
- * register input that is based on DCR word addresses which
- * are then converted to memory mapped byte addresses
+ * temac_dma_out32_* - Memory mapped DMA read, these function expects
+ * a register input that is based on DCR word addresses which are then
+ * converted to memory mapped byte addresses.  To be assigned to
+ * lp->dma_out32.
  */
-static void temac_dma_out32(struct temac_local *lp, int reg, u32 value)
+static void temac_dma_out32_be(struct temac_local *lp, int reg, u32 value)
 {
-	out_be32(lp->sdma_regs + (reg << 2), value);
+	iowrite32be(value, lp->sdma_regs + (reg << 2));
+}
+
+static void temac_dma_out32_le(struct temac_local *lp, int reg, u32 value)
+{
+	iowrite32(value, lp->sdma_regs + (reg << 2));
 }
 
 /* DMA register access functions can be DCR based or memory mapped.
@@ -187,7 +279,7 @@
 
 /*
  * temac_dcr_setup - This is a stub for when DCR is not supported,
- * such as with MicroBlaze
+ * such as with MicroBlaze and x86
  */
 static int temac_dcr_setup(struct temac_local *lp, struct platform_device *op,
 				struct device_node *np)
@@ -225,7 +317,6 @@
 		dma_free_coherent(ndev->dev.parent,
 				sizeof(*lp->tx_bd_v) * TX_BD_NUM,
 				lp->tx_bd_v, lp->tx_bd_p);
-	kfree(lp->rx_skb);
 }
 
 /**
@@ -235,34 +326,36 @@
 {
 	struct temac_local *lp = netdev_priv(ndev);
 	struct sk_buff *skb;
+	dma_addr_t skb_dma_addr;
 	int i;
 
-	lp->rx_skb = kcalloc(RX_BD_NUM, sizeof(*lp->rx_skb), GFP_KERNEL);
+	lp->rx_skb = devm_kcalloc(&ndev->dev, RX_BD_NUM, sizeof(*lp->rx_skb),
+				  GFP_KERNEL);
 	if (!lp->rx_skb)
 		goto out;
 
 	/* allocate the tx and rx ring buffer descriptors. */
 	/* returns a virtual address and a physical address. */
-	lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					  sizeof(*lp->tx_bd_v) * TX_BD_NUM,
-					  &lp->tx_bd_p, GFP_KERNEL);
+	lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					 sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+					 &lp->tx_bd_p, GFP_KERNEL);
 	if (!lp->tx_bd_v)
 		goto out;
 
-	lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					  sizeof(*lp->rx_bd_v) * RX_BD_NUM,
-					  &lp->rx_bd_p, GFP_KERNEL);
+	lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					 sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+					 &lp->rx_bd_p, GFP_KERNEL);
 	if (!lp->rx_bd_v)
 		goto out;
 
 	for (i = 0; i < TX_BD_NUM; i++) {
-		lp->tx_bd_v[i].next = lp->tx_bd_p +
-				sizeof(*lp->tx_bd_v) * ((i + 1) % TX_BD_NUM);
+		lp->tx_bd_v[i].next = cpu_to_be32(lp->tx_bd_p
+				+ sizeof(*lp->tx_bd_v) * ((i + 1) % TX_BD_NUM));
 	}
 
 	for (i = 0; i < RX_BD_NUM; i++) {
-		lp->rx_bd_v[i].next = lp->rx_bd_p +
-				sizeof(*lp->rx_bd_v) * ((i + 1) % RX_BD_NUM);
+		lp->rx_bd_v[i].next = cpu_to_be32(lp->rx_bd_p
+				+ sizeof(*lp->rx_bd_v) * ((i + 1) % RX_BD_NUM));
 
 		skb = netdev_alloc_skb_ip_align(ndev,
 						XTE_MAX_JUMBO_FRAME_SIZE);
@@ -271,31 +364,23 @@
 
 		lp->rx_skb[i] = skb;
 		/* returns physical address of skb->data */
-		lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent,
-						     skb->data,
-						     XTE_MAX_JUMBO_FRAME_SIZE,
-						     DMA_FROM_DEVICE);
-		lp->rx_bd_v[i].len = XTE_MAX_JUMBO_FRAME_SIZE;
-		lp->rx_bd_v[i].app0 = STS_CTRL_APP0_IRQONEND;
+		skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
+					      XTE_MAX_JUMBO_FRAME_SIZE,
+					      DMA_FROM_DEVICE);
+		lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr);
+		lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
+		lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
 	}
 
-	lp->dma_out(lp, TX_CHNL_CTRL, 0x10220400 |
-					  CHNL_CTRL_IRQ_EN |
-					  CHNL_CTRL_IRQ_DLY_EN |
-					  CHNL_CTRL_IRQ_COAL_EN);
-	/* 0x10220483 */
-	/* 0x00100483 */
-	lp->dma_out(lp, RX_CHNL_CTRL, 0xff070000 |
-					  CHNL_CTRL_IRQ_EN |
-					  CHNL_CTRL_IRQ_DLY_EN |
-					  CHNL_CTRL_IRQ_COAL_EN |
-					  CHNL_CTRL_IRQ_IOE);
-	/* 0xff010283 */
-
-	lp->dma_out(lp, RX_CURDESC_PTR,  lp->rx_bd_p);
-	lp->dma_out(lp, RX_TAILDESC_PTR,
-		       lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
-	lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
+	/* Configure DMA channel (irq setup) */
+	lp->dma_out(lp, TX_CHNL_CTRL, lp->tx_chnl_ctrl |
+		    0x00000400 | // Use 1 Bit Wide Counters. Currently Not Used!
+		    CHNL_CTRL_IRQ_EN | CHNL_CTRL_IRQ_ERR_EN |
+		    CHNL_CTRL_IRQ_DLY_EN | CHNL_CTRL_IRQ_COAL_EN);
+	lp->dma_out(lp, RX_CHNL_CTRL, lp->rx_chnl_ctrl |
+		    CHNL_CTRL_IRQ_IOE |
+		    CHNL_CTRL_IRQ_EN | CHNL_CTRL_IRQ_ERR_EN |
+		    CHNL_CTRL_IRQ_DLY_EN | CHNL_CTRL_IRQ_COAL_EN);
 
 	/* Init descriptor indexes */
 	lp->tx_bd_ci = 0;
@@ -303,6 +388,15 @@
 	lp->tx_bd_tail = 0;
 	lp->rx_bd_ci = 0;
 
+	/* Enable RX DMA transfers */
+	wmb();
+	lp->dma_out(lp, RX_CURDESC_PTR,  lp->rx_bd_p);
+	lp->dma_out(lp, RX_TAILDESC_PTR,
+		       lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+
+	/* Prepare for TX DMA transfer */
+	lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
+
 	return 0;
 
 out:
@@ -317,25 +411,26 @@
 static void temac_do_set_mac_address(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
+	unsigned long flags;
 
 	/* set up unicast MAC address filter set its mac address */
-	mutex_lock(&lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_UAW0_OFFSET,
-			     (ndev->dev_addr[0]) |
-			     (ndev->dev_addr[1] << 8) |
-			     (ndev->dev_addr[2] << 16) |
-			     (ndev->dev_addr[3] << 24));
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET,
+				    (ndev->dev_addr[0]) |
+				    (ndev->dev_addr[1] << 8) |
+				    (ndev->dev_addr[2] << 16) |
+				    (ndev->dev_addr[3] << 24));
 	/* There are reserved bits in EUAW1
 	 * so don't affect them Set MAC bits [47:32] in EUAW1 */
-	temac_indirect_out32(lp, XTE_UAW1_OFFSET,
-			     (ndev->dev_addr[4] & 0x000000ff) |
-			     (ndev->dev_addr[5] << 8));
-	mutex_unlock(&lp->indirect_mutex);
+	temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET,
+				    (ndev->dev_addr[4] & 0x000000ff) |
+				    (ndev->dev_addr[5] << 8));
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 }
 
 static int temac_init_mac_address(struct net_device *ndev, const void *address)
 {
-	memcpy(ndev->dev_addr, address, ETH_ALEN);
+	ether_addr_copy(ndev->dev_addr, address);
 	if (!is_valid_ether_addr(ndev->dev_addr))
 		eth_hw_addr_random(ndev);
 	temac_do_set_mac_address(ndev);
@@ -356,49 +451,58 @@
 static void temac_set_multicast_list(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
-	u32 multi_addr_msw, multi_addr_lsw, val;
-	int i;
+	u32 multi_addr_msw, multi_addr_lsw;
+	int i = 0;
+	unsigned long flags;
+	bool promisc_mode_disabled = false;
 
-	mutex_lock(&lp->indirect_mutex);
-	if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) ||
-	    netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) {
-		/*
-		 *	We must make the kernel realise we had to move
-		 *	into promisc mode or we start all out war on
-		 *	the cable. If it was a promisc request the
-		 *	flag is already set. If not we assert it.
-		 */
-		ndev->flags |= IFF_PROMISC;
+	if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) ||
+	    (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) {
 		temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
 		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
-	} else if (!netdev_mc_empty(ndev)) {
+		return;
+	}
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+
+	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
-		i = 0;
 		netdev_for_each_mc_addr(ha, ndev) {
-			if (i >= MULTICAST_CAM_TABLE_NUM)
+			if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM))
 				break;
 			multi_addr_msw = ((ha->addr[3] << 24) |
 					  (ha->addr[2] << 16) |
 					  (ha->addr[1] << 8) |
 					  (ha->addr[0]));
-			temac_indirect_out32(lp, XTE_MAW0_OFFSET,
-					     multi_addr_msw);
+			temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET,
+						    multi_addr_msw);
 			multi_addr_lsw = ((ha->addr[5] << 8) |
 					  (ha->addr[4]) | (i << 16));
-			temac_indirect_out32(lp, XTE_MAW1_OFFSET,
-					     multi_addr_lsw);
+			temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET,
+						    multi_addr_lsw);
 			i++;
 		}
-	} else {
-		val = temac_indirect_in32(lp, XTE_AFM_OFFSET);
-		temac_indirect_out32(lp, XTE_AFM_OFFSET,
-				     val & ~XTE_AFM_EPPRM_MASK);
-		temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0);
-		temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0);
-		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 	}
-	mutex_unlock(&lp->indirect_mutex);
+
+	/* Clear all or remaining/unused address table entries */
+	while (i < MULTICAST_CAM_TABLE_NUM) {
+		temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0);
+		temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16);
+		i++;
+	}
+
+	/* Enable address filter block if currently disabled */
+	if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET)
+	    & XTE_AFM_EPPRM_MASK) {
+		temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0);
+		promisc_mode_disabled = true;
+	}
+
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+
+	if (promisc_mode_disabled)
+		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 }
 
 static struct temac_option {
@@ -489,17 +593,19 @@
 	struct temac_local *lp = netdev_priv(ndev);
 	struct temac_option *tp = &temac_options[0];
 	int reg;
+	unsigned long flags;
 
-	mutex_lock(&lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	while (tp->opt) {
-		reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or;
-		if (options & tp->opt)
+		reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or;
+		if (options & tp->opt) {
 			reg |= tp->m_or;
-		temac_indirect_out32(lp, tp->reg, reg);
+			temac_indirect_out32_locked(lp, tp->reg, reg);
+		}
 		tp++;
 	}
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 	lp->options |= options;
-	mutex_unlock(&lp->indirect_mutex);
 
 	return 0;
 }
@@ -510,6 +616,7 @@
 	struct temac_local *lp = netdev_priv(ndev);
 	u32 timeout;
 	u32 val;
+	unsigned long flags;
 
 	/* Perform a software reset */
 
@@ -518,7 +625,6 @@
 
 	dev_dbg(&ndev->dev, "%s()\n", __func__);
 
-	mutex_lock(&lp->indirect_mutex);
 	/* Reset the receiver and wait for it to finish reset */
 	temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK);
 	timeout = 1000;
@@ -544,8 +650,11 @@
 	}
 
 	/* Disable the receiver */
-	val = temac_indirect_in32(lp, XTE_RXC1_OFFSET);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET,
+				    val & ~XTE_RXC1_RXEN_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Reset Local Link (DMA) */
 	lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST);
@@ -565,12 +674,12 @@
 				"temac_device_reset descriptor allocation failed\n");
 	}
 
-	temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_TXC_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
-
-	mutex_unlock(&lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Sync default options with HW
 	 * but leave receiver and transmitter disabled.  */
@@ -594,13 +703,14 @@
 	struct phy_device *phy = ndev->phydev;
 	u32 mii_speed;
 	int link_state;
+	unsigned long flags;
 
 	/* hash together the state values to decide if something has changed */
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 
-	mutex_lock(&lp->indirect_mutex);
 	if (lp->last_link != link_state) {
-		mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET);
+		spin_lock_irqsave(lp->indirect_lock, flags);
+		mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET);
 		mii_speed &= ~XTE_EMCFG_LINKSPD_MASK;
 
 		switch (phy->speed) {
@@ -610,27 +720,57 @@
 		}
 
 		/* Write new speed setting out to TEMAC */
-		temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed);
+		temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed);
+		spin_unlock_irqrestore(lp->indirect_lock, flags);
+
 		lp->last_link = link_state;
 		phy_print_status(phy);
 	}
-	mutex_unlock(&lp->indirect_mutex);
 }
 
+#ifdef CONFIG_64BIT
+
+static void ptr_to_txbd(void *p, struct cdmac_bd *bd)
+{
+	bd->app3 = (u32)(((u64)p) >> 32);
+	bd->app4 = (u32)((u64)p & 0xFFFFFFFF);
+}
+
+static void *ptr_from_txbd(struct cdmac_bd *bd)
+{
+	return (void *)(((u64)(bd->app3) << 32) | bd->app4);
+}
+
+#else
+
+static void ptr_to_txbd(void *p, struct cdmac_bd *bd)
+{
+	bd->app4 = (u32)p;
+}
+
+static void *ptr_from_txbd(struct cdmac_bd *bd)
+{
+	return (void *)(bd->app4);
+}
+
+#endif
+
 static void temac_start_xmit_done(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
 	struct cdmac_bd *cur_p;
 	unsigned int stat = 0;
+	struct sk_buff *skb;
 
 	cur_p = &lp->tx_bd_v[lp->tx_bd_ci];
-	stat = cur_p->app0;
+	stat = be32_to_cpu(cur_p->app0);
 
 	while (stat & STS_CTRL_APP0_CMPLT) {
-		dma_unmap_single(ndev->dev.parent, cur_p->phys, cur_p->len,
-				 DMA_TO_DEVICE);
-		if (cur_p->app4)
-			dev_kfree_skb_irq((struct sk_buff *)cur_p->app4);
+		dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+				 be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
+		skb = (struct sk_buff *)ptr_from_txbd(cur_p);
+		if (skb)
+			dev_consume_skb_irq(skb);
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
@@ -638,14 +778,14 @@
 		cur_p->app4 = 0;
 
 		ndev->stats.tx_packets++;
-		ndev->stats.tx_bytes += cur_p->len;
+		ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
 
 		lp->tx_bd_ci++;
 		if (lp->tx_bd_ci >= TX_BD_NUM)
 			lp->tx_bd_ci = 0;
 
 		cur_p = &lp->tx_bd_v[lp->tx_bd_ci];
-		stat = cur_p->app0;
+		stat = be32_to_cpu(cur_p->app0);
 	}
 
 	netif_wake_queue(ndev);
@@ -674,11 +814,12 @@
 	return 0;
 }
 
-static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t
+temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
 	struct cdmac_bd *cur_p;
-	dma_addr_t start_p, tail_p;
+	dma_addr_t start_p, tail_p, skb_dma_addr;
 	int ii;
 	unsigned long num_frag;
 	skb_frag_t *frag;
@@ -688,7 +829,7 @@
 	start_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
 	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
-	if (temac_check_tx_bd_space(lp, num_frag)) {
+	if (temac_check_tx_bd_space(lp, num_frag + 1)) {
 		if (!netif_queue_stopped(ndev))
 			netif_stop_queue(ndev);
 		return NETDEV_TX_BUSY;
@@ -699,16 +840,18 @@
 		unsigned int csum_start_off = skb_checksum_start_offset(skb);
 		unsigned int csum_index_off = csum_start_off + skb->csum_offset;
 
-		cur_p->app0 |= 1; /* TX Checksum Enabled */
-		cur_p->app1 = (csum_start_off << 16) | csum_index_off;
+		cur_p->app0 |= cpu_to_be32(0x000001); /* TX Checksum Enabled */
+		cur_p->app1 = cpu_to_be32((csum_start_off << 16)
+					  | csum_index_off);
 		cur_p->app2 = 0;  /* initial checksum seed */
 	}
 
-	cur_p->app0 |= STS_CTRL_APP0_SOP;
-	cur_p->len = skb_headlen(skb);
-	cur_p->phys = dma_map_single(ndev->dev.parent, skb->data,
-				     skb_headlen(skb), DMA_TO_DEVICE);
-	cur_p->app4 = (unsigned long)skb;
+	cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_SOP);
+	skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
+				      skb_headlen(skb), DMA_TO_DEVICE);
+	cur_p->len = cpu_to_be32(skb_headlen(skb));
+	cur_p->phys = cpu_to_be32(skb_dma_addr);
+	ptr_to_txbd((void *)skb, cur_p);
 
 	for (ii = 0; ii < num_frag; ii++) {
 		lp->tx_bd_tail++;
@@ -716,14 +859,16 @@
 			lp->tx_bd_tail = 0;
 
 		cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
-		cur_p->phys = dma_map_single(ndev->dev.parent,
-					     skb_frag_address(frag),
-					     skb_frag_size(frag), DMA_TO_DEVICE);
-		cur_p->len = skb_frag_size(frag);
+		skb_dma_addr = dma_map_single(ndev->dev.parent,
+					      skb_frag_address(frag),
+					      skb_frag_size(frag),
+					      DMA_TO_DEVICE);
+		cur_p->phys = cpu_to_be32(skb_dma_addr);
+		cur_p->len = cpu_to_be32(skb_frag_size(frag));
 		cur_p->app0 = 0;
 		frag++;
 	}
-	cur_p->app0 |= STS_CTRL_APP0_EOP;
+	cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP);
 
 	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
 	lp->tx_bd_tail++;
@@ -733,6 +878,7 @@
 	skb_tx_timestamp(skb);
 
 	/* Kick off the transfer */
+	wmb();
 	lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
 
 	return NETDEV_TX_OK;
@@ -745,7 +891,7 @@
 	struct sk_buff *skb, *new_skb;
 	unsigned int bdstat;
 	struct cdmac_bd *cur_p;
-	dma_addr_t tail_p;
+	dma_addr_t tail_p, skb_dma_addr;
 	int length;
 	unsigned long flags;
 
@@ -754,14 +900,14 @@
 	tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
 	cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
 
-	bdstat = cur_p->app0;
+	bdstat = be32_to_cpu(cur_p->app0);
 	while ((bdstat & STS_CTRL_APP0_CMPLT)) {
 
 		skb = lp->rx_skb[lp->rx_bd_ci];
-		length = cur_p->app4 & 0x3FFF;
+		length = be32_to_cpu(cur_p->app4) & 0x3FFF;
 
-		dma_unmap_single(ndev->dev.parent, cur_p->phys, length,
-				 DMA_FROM_DEVICE);
+		dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+				 XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
 
 		skb_put(skb, length);
 		skb->protocol = eth_type_trans(skb, ndev);
@@ -772,7 +918,12 @@
 		    (skb->protocol == htons(ETH_P_IP)) &&
 		    (skb->len > 64)) {
 
-			skb->csum = cur_p->app3 & 0xFFFF;
+			/* Convert from device endianness (be32) to cpu
+			 * endiannes, and if necessary swap the bytes
+			 * (back) for proper IP checksum byte order
+			 * (be16).
+			 */
+			skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF);
 			skb->ip_summed = CHECKSUM_COMPLETE;
 		}
 
@@ -789,11 +940,12 @@
 			return;
 		}
 
-		cur_p->app0 = STS_CTRL_APP0_IRQONEND;
-		cur_p->phys = dma_map_single(ndev->dev.parent, new_skb->data,
-					     XTE_MAX_JUMBO_FRAME_SIZE,
-					     DMA_FROM_DEVICE);
-		cur_p->len = XTE_MAX_JUMBO_FRAME_SIZE;
+		cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
+		skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data,
+					      XTE_MAX_JUMBO_FRAME_SIZE,
+					      DMA_FROM_DEVICE);
+		cur_p->phys = cpu_to_be32(skb_dma_addr);
+		cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
 		lp->rx_skb[lp->rx_bd_ci] = new_skb;
 
 		lp->rx_bd_ci++;
@@ -801,7 +953,7 @@
 			lp->rx_bd_ci = 0;
 
 		cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
-		bdstat = cur_p->app0;
+		bdstat = be32_to_cpu(cur_p->app0);
 	}
 	lp->dma_out(lp, RX_TAILDESC_PTR, tail_p);
 
@@ -819,8 +971,10 @@
 
 	if (status & (IRQ_COAL | IRQ_DLY))
 		temac_start_xmit_done(lp->ndev);
-	if (status & 0x080)
-		dev_err(&ndev->dev, "DMA error 0x%x\n", status);
+	if (status & (IRQ_ERR | IRQ_DMAERR))
+		dev_err_ratelimited(&ndev->dev,
+				    "TX error 0x%x TX_CHNL_STS=0x%08x\n",
+				    status, lp->dma_in(lp, TX_CHNL_STS));
 
 	return IRQ_HANDLED;
 }
@@ -837,6 +991,10 @@
 
 	if (status & (IRQ_COAL | IRQ_DLY))
 		ll_temac_recv(lp->ndev);
+	if (status & (IRQ_ERR | IRQ_DMAERR))
+		dev_err_ratelimited(&ndev->dev,
+				    "RX error 0x%x RX_CHNL_STS=0x%08x\n",
+				    status, lp->dma_in(lp, RX_CHNL_STS));
 
 	return IRQ_HANDLED;
 }
@@ -856,7 +1014,14 @@
 			dev_err(lp->dev, "of_phy_connect() failed\n");
 			return -ENODEV;
 		}
-
+		phy_start(phydev);
+	} else if (strlen(lp->phy_name) > 0) {
+		phydev = phy_connect(lp->ndev, lp->phy_name, temac_adjust_link,
+				     lp->phy_interface);
+		if (IS_ERR(phydev)) {
+			dev_err(lp->dev, "phy_connect() failed\n");
+			return PTR_ERR(phydev);
+		}
 		phy_start(phydev);
 	}
 
@@ -930,6 +1095,7 @@
 	.ndo_open = temac_open,
 	.ndo_stop = temac_stop,
 	.ndo_start_xmit = temac_start_xmit,
+	.ndo_set_rx_mode = temac_set_multicast_list,
 	.ndo_set_mac_address = temac_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_do_ioctl = temac_ioctl,
@@ -976,23 +1142,25 @@
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
-static int temac_of_probe(struct platform_device *op)
+static int temac_probe(struct platform_device *pdev)
 {
-	struct device_node *np;
+	struct ll_temac_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np;
 	struct temac_local *lp;
 	struct net_device *ndev;
+	struct resource *res;
 	const void *addr;
 	__be32 *p;
+	bool little_endian;
 	int rc = 0;
 
 	/* Init network device structure */
-	ndev = alloc_etherdev(sizeof(*lp));
+	ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*lp));
 	if (!ndev)
 		return -ENOMEM;
 
-	platform_set_drvdata(op, ndev);
-	SET_NETDEV_DEV(ndev, &op->dev);
-	ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
 	ndev->features = NETIF_F_SG;
 	ndev->netdev_ops = &temac_netdev_ops;
 	ndev->ethtool_ops = &temac_ethtool_ops;
@@ -1013,89 +1181,196 @@
 	/* setup temac private info structure */
 	lp = netdev_priv(ndev);
 	lp->ndev = ndev;
-	lp->dev = &op->dev;
+	lp->dev = &pdev->dev;
 	lp->options = XTE_OPTION_DEFAULTS;
 	spin_lock_init(&lp->rx_lock);
-	mutex_init(&lp->indirect_mutex);
+
+	/* Setup mutex for synchronization of indirect register access */
+	if (pdata) {
+		if (!pdata->indirect_lock) {
+			dev_err(&pdev->dev,
+				"indirect_lock missing in platform_data\n");
+			return -EINVAL;
+		}
+		lp->indirect_lock = pdata->indirect_lock;
+	} else {
+		lp->indirect_lock = devm_kmalloc(&pdev->dev,
+						 sizeof(*lp->indirect_lock),
+						 GFP_KERNEL);
+		spin_lock_init(lp->indirect_lock);
+	}
 
 	/* map device registers */
-	lp->regs = of_iomap(op->dev.of_node, 0);
-	if (!lp->regs) {
-		dev_err(&op->dev, "could not map temac regs.\n");
-		rc = -ENOMEM;
-		goto nodev;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	lp->regs = devm_ioremap_nocache(&pdev->dev, res->start,
+					resource_size(res));
+	if (IS_ERR(lp->regs)) {
+		dev_err(&pdev->dev, "could not map TEMAC registers\n");
+		return PTR_ERR(lp->regs);
+	}
+
+	/* Select register access functions with the specified
+	 * endianness mode.  Default for OF devices is big-endian.
+	 */
+	little_endian = false;
+	if (temac_np) {
+		if (of_get_property(temac_np, "little-endian", NULL))
+			little_endian = true;
+	} else if (pdata) {
+		little_endian = pdata->reg_little_endian;
+	}
+	if (little_endian) {
+		lp->temac_ior = _temac_ior_le;
+		lp->temac_iow = _temac_iow_le;
+	} else {
+		lp->temac_ior = _temac_ior_be;
+		lp->temac_iow = _temac_iow_be;
 	}
 
 	/* Setup checksum offload, but default to off if not specified */
 	lp->temac_features = 0;
-	p = (__be32 *)of_get_property(op->dev.of_node, "xlnx,txcsum", NULL);
-	if (p && be32_to_cpu(*p)) {
-		lp->temac_features |= TEMAC_FEATURE_TX_CSUM;
+	if (temac_np) {
+		p = (__be32 *)of_get_property(temac_np, "xlnx,txcsum", NULL);
+		if (p && be32_to_cpu(*p))
+			lp->temac_features |= TEMAC_FEATURE_TX_CSUM;
+		p = (__be32 *)of_get_property(temac_np, "xlnx,rxcsum", NULL);
+		if (p && be32_to_cpu(*p))
+			lp->temac_features |= TEMAC_FEATURE_RX_CSUM;
+	} else if (pdata) {
+		if (pdata->txcsum)
+			lp->temac_features |= TEMAC_FEATURE_TX_CSUM;
+		if (pdata->rxcsum)
+			lp->temac_features |= TEMAC_FEATURE_RX_CSUM;
+	}
+	if (lp->temac_features & TEMAC_FEATURE_TX_CSUM)
 		/* Can checksum TCP/UDP over IPv4. */
 		ndev->features |= NETIF_F_IP_CSUM;
-	}
-	p = (__be32 *)of_get_property(op->dev.of_node, "xlnx,rxcsum", NULL);
-	if (p && be32_to_cpu(*p))
-		lp->temac_features |= TEMAC_FEATURE_RX_CSUM;
 
-	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
-	np = of_parse_phandle(op->dev.of_node, "llink-connected", 0);
-	if (!np) {
-		dev_err(&op->dev, "could not find DMA node\n");
-		rc = -ENODEV;
-		goto err_iounmap;
-	}
-
-	/* Setup the DMA register accesses, could be DCR or memory mapped */
-	if (temac_dcr_setup(lp, op, np)) {
-
-		/* no DCR in the device tree, try non-DCR */
-		lp->sdma_regs = of_iomap(np, 0);
-		if (lp->sdma_regs) {
-			lp->dma_in = temac_dma_in32;
-			lp->dma_out = temac_dma_out32;
-			dev_dbg(&op->dev, "MEM base: %p\n", lp->sdma_regs);
-		} else {
-			dev_err(&op->dev, "unable to map DMA registers\n");
-			of_node_put(np);
-			goto err_iounmap;
+	/* Setup LocalLink DMA */
+	if (temac_np) {
+		/* Find the DMA node, map the DMA registers, and
+		 * decode the DMA IRQs.
+		 */
+		dma_np = of_parse_phandle(temac_np, "llink-connected", 0);
+		if (!dma_np) {
+			dev_err(&pdev->dev, "could not find DMA node\n");
+			return -ENODEV;
 		}
+
+		/* Setup the DMA register accesses, could be DCR or
+		 * memory mapped.
+		 */
+		if (temac_dcr_setup(lp, pdev, dma_np)) {
+			/* no DCR in the device tree, try non-DCR */
+			lp->sdma_regs = devm_of_iomap(&pdev->dev, dma_np, 0,
+						      NULL);
+			if (IS_ERR(lp->sdma_regs)) {
+				dev_err(&pdev->dev,
+					"unable to map DMA registers\n");
+				of_node_put(dma_np);
+				return PTR_ERR(lp->sdma_regs);
+			}
+			if (of_get_property(dma_np, "little-endian", NULL)) {
+				lp->dma_in = temac_dma_in32_le;
+				lp->dma_out = temac_dma_out32_le;
+			} else {
+				lp->dma_in = temac_dma_in32_be;
+				lp->dma_out = temac_dma_out32_be;
+			}
+			dev_dbg(&pdev->dev, "MEM base: %p\n", lp->sdma_regs);
+		}
+
+		/* Get DMA RX and TX interrupts */
+		lp->rx_irq = irq_of_parse_and_map(dma_np, 0);
+		lp->tx_irq = irq_of_parse_and_map(dma_np, 1);
+
+		/* Use defaults for IRQ delay/coalescing setup.  These
+		 * are configuration values, so does not belong in
+		 * device-tree.
+		 */
+		lp->tx_chnl_ctrl = 0x10220000;
+		lp->rx_chnl_ctrl = 0xff070000;
+
+		/* Finished with the DMA node; drop the reference */
+		of_node_put(dma_np);
+	} else if (pdata) {
+		/* 2nd memory resource specifies DMA registers */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		lp->sdma_regs = devm_ioremap_nocache(&pdev->dev, res->start,
+						     resource_size(res));
+		if (IS_ERR(lp->sdma_regs)) {
+			dev_err(&pdev->dev,
+				"could not map DMA registers\n");
+			return PTR_ERR(lp->sdma_regs);
+		}
+		if (pdata->dma_little_endian) {
+			lp->dma_in = temac_dma_in32_le;
+			lp->dma_out = temac_dma_out32_le;
+		} else {
+			lp->dma_in = temac_dma_in32_be;
+			lp->dma_out = temac_dma_out32_be;
+		}
+
+		/* Get DMA RX and TX interrupts */
+		lp->rx_irq = platform_get_irq(pdev, 0);
+		lp->tx_irq = platform_get_irq(pdev, 1);
+
+		/* IRQ delay/coalescing setup */
+		if (pdata->tx_irq_timeout || pdata->tx_irq_count)
+			lp->tx_chnl_ctrl = (pdata->tx_irq_timeout << 24) |
+				(pdata->tx_irq_count << 16);
+		else
+			lp->tx_chnl_ctrl = 0x10220000;
+		if (pdata->rx_irq_timeout || pdata->rx_irq_count)
+			lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) |
+				(pdata->rx_irq_count << 16);
+		else
+			lp->rx_chnl_ctrl = 0xff070000;
 	}
 
-	lp->rx_irq = irq_of_parse_and_map(np, 0);
-	lp->tx_irq = irq_of_parse_and_map(np, 1);
-
-	of_node_put(np); /* Finished with the DMA node; drop the reference */
-
-	if (!lp->rx_irq || !lp->tx_irq) {
-		dev_err(&op->dev, "could not determine irqs\n");
-		rc = -ENOMEM;
-		goto err_iounmap_2;
+	/* Error handle returned DMA RX and TX interrupts */
+	if (lp->rx_irq < 0) {
+		if (lp->rx_irq != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "could not get DMA RX irq\n");
+		return lp->rx_irq;
+	}
+	if (lp->tx_irq < 0) {
+		if (lp->tx_irq != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "could not get DMA TX irq\n");
+		return lp->tx_irq;
 	}
 
-
-	/* Retrieve the MAC address */
-	addr = of_get_mac_address(op->dev.of_node);
-	if (!addr) {
-		dev_err(&op->dev, "could not find MAC address\n");
-		rc = -ENODEV;
-		goto err_iounmap_2;
+	if (temac_np) {
+		/* Retrieve the MAC address */
+		addr = of_get_mac_address(temac_np);
+		if (IS_ERR(addr)) {
+			dev_err(&pdev->dev, "could not find MAC address\n");
+			return -ENODEV;
+		}
+		temac_init_mac_address(ndev, addr);
+	} else if (pdata) {
+		temac_init_mac_address(ndev, pdata->mac_addr);
 	}
-	temac_init_mac_address(ndev, addr);
 
-	rc = temac_mdio_setup(lp, op->dev.of_node);
+	rc = temac_mdio_setup(lp, pdev);
 	if (rc)
-		dev_warn(&op->dev, "error registering MDIO bus\n");
+		dev_warn(&pdev->dev, "error registering MDIO bus\n");
 
-	lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0);
-	if (lp->phy_node)
-		dev_dbg(lp->dev, "using PHY node %pOF (%p)\n", np, np);
+	if (temac_np) {
+		lp->phy_node = of_parse_phandle(temac_np, "phy-handle", 0);
+		if (lp->phy_node)
+			dev_dbg(lp->dev, "using PHY node %pOF\n", temac_np);
+	} else if (pdata) {
+		snprintf(lp->phy_name, sizeof(lp->phy_name),
+			 PHY_ID_FMT, lp->mii_bus->id, pdata->phy_addr);
+		lp->phy_interface = pdata->phy_interface;
+	}
 
 	/* Add the device attributes */
 	rc = sysfs_create_group(&lp->dev->kobj, &temac_attr_group);
 	if (rc) {
 		dev_err(lp->dev, "Error creating sysfs files\n");
-		goto err_iounmap_2;
+		goto err_sysfs_create;
 	}
 
 	rc = register_netdev(lp->ndev);
@@ -1106,33 +1381,25 @@
 
 	return 0;
 
- err_register_ndev:
+err_register_ndev:
 	sysfs_remove_group(&lp->dev->kobj, &temac_attr_group);
- err_iounmap_2:
-	if (lp->sdma_regs)
-		iounmap(lp->sdma_regs);
- err_iounmap:
-	iounmap(lp->regs);
- nodev:
-	free_netdev(ndev);
-	ndev = NULL;
+err_sysfs_create:
+	if (lp->phy_node)
+		of_node_put(lp->phy_node);
+	temac_mdio_teardown(lp);
 	return rc;
 }
 
-static int temac_of_remove(struct platform_device *op)
+static int temac_remove(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(op);
+	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct temac_local *lp = netdev_priv(ndev);
 
-	temac_mdio_teardown(lp);
 	unregister_netdev(ndev);
 	sysfs_remove_group(&lp->dev->kobj, &temac_attr_group);
-	of_node_put(lp->phy_node);
-	lp->phy_node = NULL;
-	iounmap(lp->regs);
-	if (lp->sdma_regs)
-		iounmap(lp->sdma_regs);
-	free_netdev(ndev);
+	if (lp->phy_node)
+		of_node_put(lp->phy_node);
+	temac_mdio_teardown(lp);
 	return 0;
 }
 
@@ -1145,16 +1412,16 @@
 };
 MODULE_DEVICE_TABLE(of, temac_of_match);
 
-static struct platform_driver temac_of_driver = {
-	.probe = temac_of_probe,
-	.remove = temac_of_remove,
+static struct platform_driver temac_driver = {
+	.probe = temac_probe,
+	.remove = temac_remove,
 	.driver = {
 		.name = "xilinx_temac",
 		.of_match_table = temac_of_match,
 	},
 };
 
-module_platform_driver(temac_of_driver);
+module_platform_driver(temac_driver);
 
 MODULE_DESCRIPTION("Xilinx LL_TEMAC Ethernet driver");
 MODULE_AUTHOR("Yoshio Kashiwagi");

diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
index f5e83ac..6fd2dea 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c

@@ -14,6 +14,7 @@
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/xilinx-ll-temac.h>
 
 #include "ll_temac.h"
 
@@ -24,14 +25,15 @@
 {
 	struct temac_local *lp = bus->priv;
 	u32 rc;
+	unsigned long flags;
 
 	/* Write the PHY address to the MIIM Access Initiator register.
 	 * When the transfer completes, the PHY register value will appear
 	 * in the LSW0 register */
-	mutex_lock(&lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg);
-	rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET);
-	mutex_unlock(&lp->indirect_mutex);
+	rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n",
 		phy_id, reg, rc);
@@ -42,6 +44,7 @@
 static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
 {
 	struct temac_local *lp = bus->priv;
+	unsigned long flags;
 
 	dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n",
 		phy_id, reg, val);
@@ -49,25 +52,34 @@
 	/* First write the desired value into the write data register
 	 * and then write the address into the access initiator register
 	 */
-	mutex_lock(&lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val);
-	temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
-	mutex_unlock(&lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val);
+	temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	return 0;
 }
 
-int temac_mdio_setup(struct temac_local *lp, struct device_node *np)
+int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
 {
+	struct ll_temac_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct device_node *np = dev_of_node(&pdev->dev);
 	struct mii_bus *bus;
 	u32 bus_hz;
 	int clk_div;
 	int rc;
 	struct resource res;
 
+	/* Get MDIO bus frequency (if specified) */
+	bus_hz = 0;
+	if (np)
+		of_property_read_u32(np, "clock-frequency", &bus_hz);
+	else if (pdata)
+		bus_hz = pdata->mdio_clk_freq;
+
 	/* Calculate a reasonable divisor for the clock rate */
 	clk_div = 0x3f; /* worst-case default setting */
-	if (of_property_read_u32(np, "clock-frequency", &bus_hz) == 0) {
+	if (bus_hz != 0) {
 		clk_div = bus_hz / (2500 * 1000 * 2) - 1;
 		if (clk_div < 1)
 			clk_div = 1;
@@ -77,17 +89,21 @@
 
 	/* Enable the MDIO bus by asserting the enable bit and writing
 	 * in the clock config */
-	mutex_lock(&lp->indirect_mutex);
 	temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div);
-	mutex_unlock(&lp->indirect_mutex);
 
-	bus = mdiobus_alloc();
+	bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!bus)
 		return -ENOMEM;
 
-	of_address_to_resource(np, 0, &res);
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx",
-		 (unsigned long long)res.start);
+	if (np) {
+		of_address_to_resource(np, 0, &res);
+		snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx",
+			 (unsigned long long)res.start);
+	} else if (pdata) {
+		snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx",
+			 pdata->mdio_bus_id);
+	}
+
 	bus->priv = lp;
 	bus->name = "Xilinx TEMAC MDIO";
 	bus->read = temac_mdio_read;
@@ -98,23 +114,14 @@
 
 	rc = of_mdiobus_register(bus, np);
 	if (rc)
-		goto err_register;
+		return rc;
 
-	mutex_lock(&lp->indirect_mutex);
 	dev_dbg(lp->dev, "MDIO bus registered;  MC:%x\n",
 		temac_indirect_in32(lp, XTE_MC_OFFSET));
-	mutex_unlock(&lp->indirect_mutex);
 	return 0;
-
- err_register:
-	mdiobus_free(bus);
-	return rc;
 }
 
 void temac_mdio_teardown(struct temac_local *lp)
 {
 	mdiobus_unregister(lp->mii_bus);
-	mdiobus_free(lp->mii_bus);
-	lp->mii_bus = NULL;
 }
-

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index c337400..2dacfc8 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h

@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/if_vlan.h>
+#include <linux/phylink.h>
 
 /* Packet size info */
 #define XAE_HDR_SIZE			14 /* Size of Ethernet header */
@@ -83,6 +84,8 @@
 #define XAXIDMA_CR_RUNSTOP_MASK	0x00000001 /* Start/stop DMA channel */
 #define XAXIDMA_CR_RESET_MASK	0x00000004 /* Reset DMA engine */
 
+#define XAXIDMA_SR_HALT_MASK	0x00000001 /* Indicates DMA channel halted */
+
 #define XAXIDMA_BD_NDESC_OFFSET		0x00 /* Next descriptor pointer */
 #define XAXIDMA_BD_BUFA_OFFSET		0x08 /* Buffer address */
 #define XAXIDMA_BD_CTRL_LEN_OFFSET	0x18 /* Control/buffer length */
@@ -356,9 +359,6 @@
  * @app2:         MM2S/S2MM User Application Field 2.
  * @app3:         MM2S/S2MM User Application Field 3.
  * @app4:         MM2S/S2MM User Application Field 4.
- * @sw_id_offset: MM2S/S2MM Sw ID
- * @reserved5:    Reserved and not used
- * @reserved6:    Reserved and not used
  */
 struct axidma_bd {
 	u32 next;	/* Physical address of next buffer descriptor */
@@ -373,11 +373,9 @@
 	u32 app1;	/* TX start << 16 | insert */
 	u32 app2;	/* TX csum seed */
 	u32 app3;
-	u32 app4;
-	u32 sw_id_offset;
-	u32 reserved5;
-	u32 reserved6;
-};
+	u32 app4;   /* Last field used by HW */
+	struct sk_buff *skb;
+} __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
 
 /**
  * struct axienet_local - axienet private per device data
@@ -385,6 +383,7 @@
  * @dev:	Pointer to device structure
  * @phy_node:	Pointer to device node structure
  * @mii_bus:	Pointer to MII bus structure
+ * @regs_start: Resource start for axienet device addresses
  * @regs:	Base address for the axienet_local device address space
  * @dma_regs:	Base address for the axidma device address space
  * @dma_err_tasklet: Tasklet structure to process Axi DMA errors
@@ -422,10 +421,17 @@
 	/* Connection to PHY device */
 	struct device_node *phy_node;
 
+	struct phylink *phylink;
+	struct phylink_config phylink_config;
+
+	/* Clock for AXI bus */
+	struct clk *clk;
+
 	/* MDIO bus data */
 	struct mii_bus *mii_bus;	/* MII bus reference */
 
 	/* IO registers, dma functions and IRQs */
+	resource_size_t regs_start;
 	void __iomem *regs;
 	void __iomem *dma_regs;
 
@@ -433,17 +439,19 @@
 
 	int tx_irq;
 	int rx_irq;
+	int eth_irq;
 	phy_interface_t phy_mode;
 
 	u32 options;			/* Current options word */
-	u32 last_link;
 	u32 features;
 
 	/* Buffer descriptors */
 	struct axidma_bd *tx_bd_v;
 	dma_addr_t tx_bd_p;
+	u32 tx_bd_num;
 	struct axidma_bd *rx_bd_v;
 	dma_addr_t rx_bd_p;
+	u32 rx_bd_num;
 	u32 tx_bd_ci;
 	u32 tx_bd_tail;
 	u32 rx_bd_ci;
@@ -481,7 +489,12 @@
  */
 static inline u32 axienet_ior(struct axienet_local *lp, off_t offset)
 {
-	return in_be32(lp->regs + offset);
+	return ioread32(lp->regs + offset);
+}
+
+static inline u32 axinet_ior_read_mcr(struct axienet_local *lp)
+{
+	return axienet_ior(lp, XAE_MDIO_MCR_OFFSET);
 }
 
 /**
@@ -496,12 +509,13 @@
 static inline void axienet_iow(struct axienet_local *lp, off_t offset,
 			       u32 value)
 {
-	out_be32((lp->regs + offset), value);
+	iowrite32(value, lp->regs + offset);
 }
 
 /* Function prototypes visible in xilinx_axienet_mdio.c for other files */
-int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np);
-int axienet_mdio_wait_until_ready(struct axienet_local *lp);
+int axienet_mdio_enable(struct axienet_local *lp);
+void axienet_mdio_disable(struct axienet_local *lp);
+int axienet_mdio_setup(struct axienet_local *lp);
 void axienet_mdio_teardown(struct axienet_local *lp);
 
 #endif /* XILINX_AXI_ENET_H */

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index f24f48f..676006f 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Xilinx Axi Ethernet device driver
  *
@@ -6,6 +7,7 @@
  * Copyright (c) 2008-2009 Secret Lab Technologies Ltd.
  * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (c) 2010 - 2011 PetaLogix
+ * Copyright (c) 2019 SED Systems, a division of Calian Ltd.
  * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved.
  *
  * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6
@@ -20,6 +22,7 @@
  *  - Add support for extended VLAN support.
  */
 
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
@@ -37,16 +40,18 @@
 
 #include "xilinx_axienet.h"
 
-/* Descriptors defines for Tx and Rx DMA - 2^n for the best performance */
-#define TX_BD_NUM		64
-#define RX_BD_NUM		128
+/* Descriptors defines for Tx and Rx DMA */
+#define TX_BD_NUM_DEFAULT		64
+#define RX_BD_NUM_DEFAULT		1024
+#define TX_BD_NUM_MAX			4096
+#define RX_BD_NUM_MAX			4096
 
 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */
 #define DRIVER_NAME		"xaxienet"
 #define DRIVER_DESCRIPTION	"Xilinx Axi Ethernet driver"
 #define DRIVER_VERSION		"1.00a"
 
-#define AXIENET_REGS_N		32
+#define AXIENET_REGS_N		40
 
 /* Match table for of_platform binding */
 static const struct of_device_id axienet_of_match[] = {
@@ -124,7 +129,7 @@
  */
 static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg)
 {
-	return in_be32(lp->dma_regs + reg);
+	return ioread32(lp->dma_regs + reg);
 }
 
 /**
@@ -139,7 +144,7 @@
 static inline void axienet_dma_out32(struct axienet_local *lp,
 				     off_t reg, u32 value)
 {
-	out_be32((lp->dma_regs + reg), value);
+	iowrite32(value, lp->dma_regs + reg);
 }
 
 /**
@@ -155,22 +160,21 @@
 	int i;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		dma_unmap_single(ndev->dev.parent, lp->rx_bd_v[i].phys,
 				 lp->max_frm_size, DMA_FROM_DEVICE);
-		dev_kfree_skb((struct sk_buff *)
-			      (lp->rx_bd_v[i].sw_id_offset));
+		dev_kfree_skb(lp->rx_bd_v[i].skb);
 	}
 
 	if (lp->rx_bd_v) {
 		dma_free_coherent(ndev->dev.parent,
-				  sizeof(*lp->rx_bd_v) * RX_BD_NUM,
+				  sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
 				  lp->rx_bd_v,
 				  lp->rx_bd_p);
 	}
 	if (lp->tx_bd_v) {
 		dma_free_coherent(ndev->dev.parent,
-				  sizeof(*lp->tx_bd_v) * TX_BD_NUM,
+				  sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
 				  lp->tx_bd_v,
 				  lp->tx_bd_p);
 	}
@@ -199,34 +203,34 @@
 	lp->rx_bd_ci = 0;
 
 	/* Allocate the Tx and Rx buffer descriptors. */
-	lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					  sizeof(*lp->tx_bd_v) * TX_BD_NUM,
-					  &lp->tx_bd_p, GFP_KERNEL);
+	lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					 sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
+					 &lp->tx_bd_p, GFP_KERNEL);
 	if (!lp->tx_bd_v)
 		goto out;
 
-	lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
-					  sizeof(*lp->rx_bd_v) * RX_BD_NUM,
-					  &lp->rx_bd_p, GFP_KERNEL);
+	lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+					 sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
+					 &lp->rx_bd_p, GFP_KERNEL);
 	if (!lp->rx_bd_v)
 		goto out;
 
-	for (i = 0; i < TX_BD_NUM; i++) {
+	for (i = 0; i < lp->tx_bd_num; i++) {
 		lp->tx_bd_v[i].next = lp->tx_bd_p +
 				      sizeof(*lp->tx_bd_v) *
-				      ((i + 1) % TX_BD_NUM);
+				      ((i + 1) % lp->tx_bd_num);
 	}
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		lp->rx_bd_v[i].next = lp->rx_bd_p +
 				      sizeof(*lp->rx_bd_v) *
-				      ((i + 1) % RX_BD_NUM);
+				      ((i + 1) % lp->rx_bd_num);
 
 		skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size);
 		if (!skb)
 			goto out;
 
-		lp->rx_bd_v[i].sw_id_offset = (u32) skb;
+		lp->rx_bd_v[i].skb = skb;
 		lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent,
 						     skb->data,
 						     lp->max_frm_size,
@@ -268,7 +272,7 @@
 	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
 			  cr | XAXIDMA_CR_RUNSTOP_MASK);
 	axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-			  (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+			  (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
@@ -433,17 +437,20 @@
 	lp->options |= options;
 }
 
-static void __axienet_device_reset(struct axienet_local *lp, off_t offset)
+static void __axienet_device_reset(struct axienet_local *lp)
 {
 	u32 timeout;
 	/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
 	 * process of Axi DMA takes a while to complete as all pending
 	 * commands/transfers will be flushed or completed during this
 	 * reset process.
+	 * Note that even though both TX and RX have their own reset register,
+	 * they both reset the entire DMA core, so only one needs to be used.
 	 */
-	axienet_dma_out32(lp, offset, XAXIDMA_CR_RESET_MASK);
+	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
 	timeout = DELAY_OF_ONE_MILLISEC;
-	while (axienet_dma_in32(lp, offset) & XAXIDMA_CR_RESET_MASK) {
+	while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
+				XAXIDMA_CR_RESET_MASK) {
 		udelay(1);
 		if (--timeout == 0) {
 			netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
@@ -469,8 +476,7 @@
 	u32 axienet_status;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	__axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET);
-	__axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET);
+	__axienet_device_reset(lp);
 
 	lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE;
 	lp->options |= XAE_OPTION_VLAN;
@@ -497,6 +503,8 @@
 	axienet_status = axienet_ior(lp, XAE_IP_OFFSET);
 	if (axienet_status & XAE_INT_RXRJECT_MASK)
 		axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK);
+	axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ?
+		    XAE_INT_RECV_ERROR_MASK : 0);
 
 	axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK);
 
@@ -513,63 +521,6 @@
 }
 
 /**
- * axienet_adjust_link - Adjust the PHY link speed/duplex.
- * @ndev:	Pointer to the net_device structure
- *
- * This function is called to change the speed and duplex setting after
- * auto negotiation is done by the PHY. This is the function that gets
- * registered with the PHY interface through the "of_phy_connect" call.
- */
-static void axienet_adjust_link(struct net_device *ndev)
-{
-	u32 emmc_reg;
-	u32 link_state;
-	u32 setspeed = 1;
-	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phy = ndev->phydev;
-
-	link_state = phy->speed | (phy->duplex << 1) | phy->link;
-	if (lp->last_link != link_state) {
-		if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) {
-			if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX)
-				setspeed = 0;
-		} else {
-			if ((phy->speed == SPEED_1000) &&
-			    (lp->phy_mode == PHY_INTERFACE_MODE_MII))
-				setspeed = 0;
-		}
-
-		if (setspeed == 1) {
-			emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
-			emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK;
-
-			switch (phy->speed) {
-			case SPEED_1000:
-				emmc_reg |= XAE_EMMC_LINKSPD_1000;
-				break;
-			case SPEED_100:
-				emmc_reg |= XAE_EMMC_LINKSPD_100;
-				break;
-			case SPEED_10:
-				emmc_reg |= XAE_EMMC_LINKSPD_10;
-				break;
-			default:
-				dev_err(&ndev->dev, "Speed other than 10, 100 "
-					"or 1Gbps is not supported\n");
-				break;
-			}
-
-			axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg);
-			lp->last_link = link_state;
-			phy_print_status(phy);
-		} else {
-			netdev_err(ndev,
-				   "Error setting Axi Ethernet mac speed\n");
-		}
-	}
-}
-
-/**
  * axienet_start_xmit_done - Invoked once a transmit is completed by the
  * Axi DMA Tx channel.
  * @ndev:	Pointer to the net_device structure
@@ -594,26 +545,31 @@
 		dma_unmap_single(ndev->dev.parent, cur_p->phys,
 				(cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
 				DMA_TO_DEVICE);
-		if (cur_p->app4)
-			dev_kfree_skb_irq((struct sk_buff *)cur_p->app4);
+		if (cur_p->skb)
+			dev_consume_skb_irq(cur_p->skb);
 		/*cur_p->phys = 0;*/
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
 		cur_p->app4 = 0;
 		cur_p->status = 0;
+		cur_p->skb = NULL;
 
 		size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
 		packets++;
 
-		++lp->tx_bd_ci;
-		lp->tx_bd_ci %= TX_BD_NUM;
+		if (++lp->tx_bd_ci >= lp->tx_bd_num)
+			lp->tx_bd_ci = 0;
 		cur_p = &lp->tx_bd_v[lp->tx_bd_ci];
 		status = cur_p->status;
 	}
 
 	ndev->stats.tx_packets += packets;
 	ndev->stats.tx_bytes += size;
+
+	/* Matches barrier in axienet_start_xmit */
+	smp_mb();
+
 	netif_wake_queue(ndev);
 }
 
@@ -634,7 +590,7 @@
 					    int num_frag)
 {
 	struct axidma_bd *cur_p;
-	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % TX_BD_NUM];
+	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
 	if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
 		return NETDEV_TX_BUSY;
 	return 0;
@@ -653,7 +609,8 @@
  * start the transmission. Additionally if checksum offloading is supported,
  * it populates AXI Stream Control fields with appropriate values.
  */
-static int axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t
+axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	u32 ii;
 	u32 num_frag;
@@ -668,9 +625,19 @@
 	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
 	if (axienet_check_tx_bd_space(lp, num_frag)) {
-		if (!netif_queue_stopped(ndev))
-			netif_stop_queue(ndev);
-		return NETDEV_TX_BUSY;
+		if (netif_queue_stopped(ndev))
+			return NETDEV_TX_BUSY;
+
+		netif_stop_queue(ndev);
+
+		/* Matches barrier in axienet_start_xmit_done */
+		smp_mb();
+
+		/* Space might have just been freed - check again */
+		if (axienet_check_tx_bd_space(lp, num_frag))
+			return NETDEV_TX_BUSY;
+
+		netif_wake_queue(ndev);
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -693,8 +660,8 @@
 				     skb_headlen(skb), DMA_TO_DEVICE);
 
 	for (ii = 0; ii < num_frag; ii++) {
-		++lp->tx_bd_tail;
-		lp->tx_bd_tail %= TX_BD_NUM;
+		if (++lp->tx_bd_tail >= lp->tx_bd_num)
+			lp->tx_bd_tail = 0;
 		cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 		frag = &skb_shinfo(skb)->frags[ii];
 		cur_p->phys = dma_map_single(ndev->dev.parent,
@@ -705,13 +672,13 @@
 	}
 
 	cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK;
-	cur_p->app4 = (unsigned long)skb;
+	cur_p->skb = skb;
 
 	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
 	/* Start the transfer */
 	axienet_dma_out32(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p);
-	++lp->tx_bd_tail;
-	lp->tx_bd_tail %= TX_BD_NUM;
+	if (++lp->tx_bd_tail >= lp->tx_bd_num)
+		lp->tx_bd_tail = 0;
 
 	return NETDEV_TX_OK;
 }
@@ -740,13 +707,15 @@
 
 	while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
 		tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
-		skb = (struct sk_buff *) (cur_p->sw_id_offset);
-		length = cur_p->app4 & 0x0000FFFF;
 
 		dma_unmap_single(ndev->dev.parent, cur_p->phys,
 				 lp->max_frm_size,
 				 DMA_FROM_DEVICE);
 
+		skb = cur_p->skb;
+		cur_p->skb = NULL;
+		length = cur_p->app4 & 0x0000FFFF;
+
 		skb_put(skb, length);
 		skb->protocol = eth_type_trans(skb, ndev);
 		/*skb_checksum_none_assert(skb);*/
@@ -781,10 +750,10 @@
 					     DMA_FROM_DEVICE);
 		cur_p->cntrl = lp->max_frm_size;
 		cur_p->status = 0;
-		cur_p->sw_id_offset = (u32) new_skb;
+		cur_p->skb = new_skb;
 
-		++lp->rx_bd_ci;
-		lp->rx_bd_ci %= RX_BD_NUM;
+		if (++lp->rx_bd_ci >= lp->rx_bd_num)
+			lp->rx_bd_ci = 0;
 		cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
 	}
 
@@ -800,7 +769,7 @@
  * @irq:	irq number
  * @_ndev:	net_device pointer
  *
- * Return: IRQ_HANDLED for all cases.
+ * Return: IRQ_HANDLED if device generated a TX interrupt, IRQ_NONE otherwise.
  *
  * This is the Axi DMA Tx done Isr. It invokes "axienet_start_xmit_done"
  * to complete the BD processing.
@@ -819,7 +788,7 @@
 		goto out;
 	}
 	if (!(status & XAXIDMA_IRQ_ALL_MASK))
-		dev_err(&ndev->dev, "No interrupts asserted in Tx path\n");
+		return IRQ_NONE;
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
 		dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status);
 		dev_err(&ndev->dev, "Current BD is at: 0x%x\n",
@@ -849,7 +818,7 @@
  * @irq:	irq number
  * @_ndev:	net_device pointer
  *
- * Return: IRQ_HANDLED for all cases.
+ * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise.
  *
  * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD
  * processing.
@@ -868,7 +837,7 @@
 		goto out;
 	}
 	if (!(status & XAXIDMA_IRQ_ALL_MASK))
-		dev_err(&ndev->dev, "No interrupts asserted in Rx path\n");
+		return IRQ_NONE;
 	if (status & XAXIDMA_IRQ_ERROR_MASK) {
 		dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status);
 		dev_err(&ndev->dev, "Current BD is at: 0x%x\n",
@@ -893,6 +862,35 @@
 	return IRQ_HANDLED;
 }
 
+/**
+ * axienet_eth_irq - Ethernet core Isr.
+ * @irq:	irq number
+ * @_ndev:	net_device pointer
+ *
+ * Return: IRQ_HANDLED if device generated a core interrupt, IRQ_NONE otherwise.
+ *
+ * Handle miscellaneous conditions indicated by Ethernet core IRQ.
+ */
+static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
+{
+	struct net_device *ndev = _ndev;
+	struct axienet_local *lp = netdev_priv(ndev);
+	unsigned int pending;
+
+	pending = axienet_ior(lp, XAE_IP_OFFSET);
+	if (!pending)
+		return IRQ_NONE;
+
+	if (pending & XAE_INT_RXFIFOOVR_MASK)
+		ndev->stats.rx_missed_errors++;
+
+	if (pending & XAE_INT_RXRJECT_MASK)
+		ndev->stats.rx_frame_errors++;
+
+	axienet_iow(lp, XAE_IS_OFFSET, pending);
+	return IRQ_HANDLED;
+}
+
 static void axienet_dma_err_handler(unsigned long data);
 
 /**
@@ -902,67 +900,72 @@
  * Return: 0, on success.
  *	    non-zero error value on failure
  *
- * This is the driver open routine. It calls phy_start to start the PHY device.
+ * This is the driver open routine. It calls phylink_start to start the
+ * PHY device.
  * It also allocates interrupt service routines, enables the interrupt lines
  * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
  * descriptors are initialized.
  */
 static int axienet_open(struct net_device *ndev)
 {
-	int ret, mdio_mcreg;
+	int ret;
 	struct axienet_local *lp = netdev_priv(ndev);
-	struct phy_device *phydev = NULL;
 
 	dev_dbg(&ndev->dev, "axienet_open()\n");
 
-	mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET);
-	ret = axienet_mdio_wait_until_ready(lp);
-	if (ret < 0)
-		return ret;
 	/* Disable the MDIO interface till Axi Ethernet Reset is completed.
 	 * When we do an Axi Ethernet reset, it resets the complete core
-	 * including the MDIO. If MDIO is not disabled when the reset
-	 * process is started, MDIO will be broken afterwards.
+	 * including the MDIO. MDIO must be disabled before resetting
+	 * and re-enabled afterwards.
+	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
 	 */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET,
-		    (mdio_mcreg & (~XAE_MDIO_MC_MDIOEN_MASK)));
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
 	axienet_device_reset(ndev);
-	/* Enable the MDIO */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg);
-	ret = axienet_mdio_wait_until_ready(lp);
+	ret = axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
 	if (ret < 0)
 		return ret;
 
-	if (lp->phy_node) {
-		phydev = of_phy_connect(lp->ndev, lp->phy_node,
-					axienet_adjust_link, 0, lp->phy_mode);
-
-		if (!phydev)
-			dev_err(lp->dev, "of_phy_connect() failed\n");
-		else
-			phy_start(phydev);
+	ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
+	if (ret) {
+		dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
+		return ret;
 	}
 
+	phylink_start(lp->phylink);
+
 	/* Enable tasklets for Axi DMA error handling */
 	tasklet_init(&lp->dma_err_tasklet, axienet_dma_err_handler,
 		     (unsigned long) lp);
 
 	/* Enable interrupts for Axi DMA Tx */
-	ret = request_irq(lp->tx_irq, axienet_tx_irq, 0, ndev->name, ndev);
+	ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED,
+			  ndev->name, ndev);
 	if (ret)
 		goto err_tx_irq;
 	/* Enable interrupts for Axi DMA Rx */
-	ret = request_irq(lp->rx_irq, axienet_rx_irq, 0, ndev->name, ndev);
+	ret = request_irq(lp->rx_irq, axienet_rx_irq, IRQF_SHARED,
+			  ndev->name, ndev);
 	if (ret)
 		goto err_rx_irq;
+	/* Enable interrupts for Axi Ethernet core (if defined) */
+	if (lp->eth_irq > 0) {
+		ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED,
+				  ndev->name, ndev);
+		if (ret)
+			goto err_eth_irq;
+	}
 
 	return 0;
 
+err_eth_irq:
+	free_irq(lp->rx_irq, ndev);
 err_rx_irq:
 	free_irq(lp->tx_irq, ndev);
 err_tx_irq:
-	if (phydev)
-		phy_disconnect(phydev);
+	phylink_stop(lp->phylink);
+	phylink_disconnect_phy(lp->phylink);
 	tasklet_kill(&lp->dma_err_tasklet);
 	dev_err(lp->dev, "request_irq() failed\n");
 	return ret;
@@ -974,34 +977,61 @@
  *
  * Return: 0, on success.
  *
- * This is the driver stop routine. It calls phy_disconnect to stop the PHY
+ * This is the driver stop routine. It calls phylink_disconnect to stop the PHY
  * device. It also removes the interrupt handlers and disables the interrupts.
  * The Axi DMA Tx/Rx BDs are released.
  */
 static int axienet_stop(struct net_device *ndev)
 {
-	u32 cr;
+	u32 cr, sr;
+	int count;
 	struct axienet_local *lp = netdev_priv(ndev);
 
 	dev_dbg(&ndev->dev, "axienet_close()\n");
 
-	cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
-			  cr & (~XAXIDMA_CR_RUNSTOP_MASK));
-	cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET,
-			  cr & (~XAXIDMA_CR_RUNSTOP_MASK));
+	phylink_stop(lp->phylink);
+	phylink_disconnect_phy(lp->phylink);
+
 	axienet_setoptions(ndev, lp->options &
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
 
+	cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+	cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+
+	cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+	cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+
+	axienet_iow(lp, XAE_IE_OFFSET, 0);
+
+	/* Give DMAs a chance to halt gracefully */
+	sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+		msleep(20);
+		sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	}
+
+	sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+		msleep(20);
+		sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	}
+
+	/* Do a reset to ensure DMA is really stopped */
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
+	__axienet_device_reset(lp);
+	axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
+
 	tasklet_kill(&lp->dma_err_tasklet);
 
+	if (lp->eth_irq > 0)
+		free_irq(lp->eth_irq, ndev);
 	free_irq(lp->tx_irq, ndev);
 	free_irq(lp->rx_irq, ndev);
 
-	if (ndev->phydev)
-		phy_disconnect(ndev->phydev);
-
 	axienet_dma_bd_release(ndev);
 	return 0;
 }
@@ -1149,6 +1179,48 @@
 	data[29] = axienet_ior(lp, XAE_FMI_OFFSET);
 	data[30] = axienet_ior(lp, XAE_AF0_OFFSET);
 	data[31] = axienet_ior(lp, XAE_AF1_OFFSET);
+	data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+	data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+	data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
+	data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
+	data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+	data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+	data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
+	data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+}
+
+static void axienet_ethtools_get_ringparam(struct net_device *ndev,
+					   struct ethtool_ringparam *ering)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	ering->rx_max_pending = RX_BD_NUM_MAX;
+	ering->rx_mini_max_pending = 0;
+	ering->rx_jumbo_max_pending = 0;
+	ering->tx_max_pending = TX_BD_NUM_MAX;
+	ering->rx_pending = lp->rx_bd_num;
+	ering->rx_mini_pending = 0;
+	ering->rx_jumbo_pending = 0;
+	ering->tx_pending = lp->tx_bd_num;
+}
+
+static int axienet_ethtools_set_ringparam(struct net_device *ndev,
+					  struct ethtool_ringparam *ering)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	if (ering->rx_pending > RX_BD_NUM_MAX ||
+	    ering->rx_mini_pending ||
+	    ering->rx_jumbo_pending ||
+	    ering->rx_pending > TX_BD_NUM_MAX)
+		return -EINVAL;
+
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	lp->rx_bd_num = ering->rx_pending;
+	lp->tx_bd_num = ering->tx_pending;
+	return 0;
 }
 
 /**
@@ -1164,12 +1236,9 @@
 axienet_ethtools_get_pauseparam(struct net_device *ndev,
 				struct ethtool_pauseparam *epauseparm)
 {
-	u32 regval;
 	struct axienet_local *lp = netdev_priv(ndev);
-	epauseparm->autoneg  = 0;
-	regval = axienet_ior(lp, XAE_FCC_OFFSET);
-	epauseparm->tx_pause = regval & XAE_FCC_FCTX_MASK;
-	epauseparm->rx_pause = regval & XAE_FCC_FCRX_MASK;
+
+	phylink_ethtool_get_pauseparam(lp->phylink, epauseparm);
 }
 
 /**
@@ -1188,27 +1257,9 @@
 axienet_ethtools_set_pauseparam(struct net_device *ndev,
 				struct ethtool_pauseparam *epauseparm)
 {
-	u32 regval = 0;
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	if (netif_running(ndev)) {
-		netdev_err(ndev,
-			   "Please stop netif before applying configuration\n");
-		return -EFAULT;
-	}
-
-	regval = axienet_ior(lp, XAE_FCC_OFFSET);
-	if (epauseparm->tx_pause)
-		regval |= XAE_FCC_FCTX_MASK;
-	else
-		regval &= ~XAE_FCC_FCTX_MASK;
-	if (epauseparm->rx_pause)
-		regval |= XAE_FCC_FCRX_MASK;
-	else
-		regval &= ~XAE_FCC_FCRX_MASK;
-	axienet_iow(lp, XAE_FCC_OFFSET, regval);
-
-	return 0;
+	return phylink_ethtool_set_pauseparam(lp->phylink, epauseparm);
 }
 
 /**
@@ -1287,17 +1338,170 @@
 	return 0;
 }
 
+static int
+axienet_ethtools_get_link_ksettings(struct net_device *ndev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_get(lp->phylink, cmd);
+}
+
+static int
+axienet_ethtools_set_link_ksettings(struct net_device *ndev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_set(lp->phylink, cmd);
+}
+
 static const struct ethtool_ops axienet_ethtool_ops = {
 	.get_drvinfo    = axienet_ethtools_get_drvinfo,
 	.get_regs_len   = axienet_ethtools_get_regs_len,
 	.get_regs       = axienet_ethtools_get_regs,
 	.get_link       = ethtool_op_get_link,
+	.get_ringparam	= axienet_ethtools_get_ringparam,
+	.set_ringparam	= axienet_ethtools_set_ringparam,
 	.get_pauseparam = axienet_ethtools_get_pauseparam,
 	.set_pauseparam = axienet_ethtools_set_pauseparam,
 	.get_coalesce   = axienet_ethtools_get_coalesce,
 	.set_coalesce   = axienet_ethtools_set_coalesce,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_link_ksettings = axienet_ethtools_get_link_ksettings,
+	.set_link_ksettings = axienet_ethtools_set_link_ksettings,
+};
+
+static void axienet_validate(struct phylink_config *config,
+			     unsigned long *supported,
+			     struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	/* Only support the mode we are configured for */
+	if (state->interface != PHY_INTERFACE_MODE_NA &&
+	    state->interface != lp->phy_mode) {
+		netdev_warn(ndev, "Cannot use PHY mode %s, supported: %s\n",
+			    phy_modes(state->interface),
+			    phy_modes(lp->phy_mode));
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		return;
+	}
+
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+
+	phylink_set(mask, Asym_Pause);
+	phylink_set(mask, Pause);
+	phylink_set(mask, 1000baseX_Full);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Full);
+	phylink_set(mask, 1000baseT_Full);
+
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int axienet_mac_link_state(struct phylink_config *config,
+				  struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	u32 emmc_reg, fcc_reg;
+
+	state->interface = lp->phy_mode;
+
+	emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
+	if (emmc_reg & XAE_EMMC_LINKSPD_1000)
+		state->speed = SPEED_1000;
+	else if (emmc_reg & XAE_EMMC_LINKSPD_100)
+		state->speed = SPEED_100;
+	else
+		state->speed = SPEED_10;
+
+	state->pause = 0;
+	fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET);
+	if (fcc_reg & XAE_FCC_FCTX_MASK)
+		state->pause |= MLO_PAUSE_TX;
+	if (fcc_reg & XAE_FCC_FCRX_MASK)
+		state->pause |= MLO_PAUSE_RX;
+
+	state->an_complete = 0;
+	state->duplex = 1;
+
+	return 1;
+}
+
+static void axienet_mac_an_restart(struct phylink_config *config)
+{
+	/* Unsupported, do nothing */
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+			       const struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct axienet_local *lp = netdev_priv(ndev);
+	u32 emmc_reg, fcc_reg;
+
+	emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET);
+	emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK;
+
+	switch (state->speed) {
+	case SPEED_1000:
+		emmc_reg |= XAE_EMMC_LINKSPD_1000;
+		break;
+	case SPEED_100:
+		emmc_reg |= XAE_EMMC_LINKSPD_100;
+		break;
+	case SPEED_10:
+		emmc_reg |= XAE_EMMC_LINKSPD_10;
+		break;
+	default:
+		dev_err(&ndev->dev,
+			"Speed other than 10, 100 or 1Gbps is not supported\n");
+		break;
+	}
+
+	axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg);
+
+	fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET);
+	if (state->pause & MLO_PAUSE_TX)
+		fcc_reg |= XAE_FCC_FCTX_MASK;
+	else
+		fcc_reg &= ~XAE_FCC_FCTX_MASK;
+	if (state->pause & MLO_PAUSE_RX)
+		fcc_reg |= XAE_FCC_FCRX_MASK;
+	else
+		fcc_reg &= ~XAE_FCC_FCRX_MASK;
+	axienet_iow(lp, XAE_FCC_OFFSET, fcc_reg);
+}
+
+static void axienet_mac_link_down(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface)
+{
+	/* nothing meaningful to do */
+}
+
+static void axienet_mac_link_up(struct phylink_config *config,
+				unsigned int mode,
+				phy_interface_t interface,
+				struct phy_device *phy)
+{
+	/* nothing meaningful to do */
+}
+
+static const struct phylink_mac_ops axienet_phylink_ops = {
+	.validate = axienet_validate,
+	.mac_link_state = axienet_mac_link_state,
+	.mac_an_restart = axienet_mac_an_restart,
+	.mac_config = axienet_mac_config,
+	.mac_link_down = axienet_mac_link_down,
+	.mac_link_up = axienet_mac_link_up,
 };
 
 /**
@@ -1311,38 +1515,33 @@
 {
 	u32 axienet_status;
 	u32 cr, i;
-	int mdio_mcreg;
 	struct axienet_local *lp = (struct axienet_local *) data;
 	struct net_device *ndev = lp->ndev;
 	struct axidma_bd *cur_p;
 
 	axienet_setoptions(ndev, lp->options &
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
-	mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET);
-	axienet_mdio_wait_until_ready(lp);
 	/* Disable the MDIO interface till Axi Ethernet Reset is completed.
 	 * When we do an Axi Ethernet reset, it resets the complete core
-	 * including the MDIO. So if MDIO is not disabled when the reset
-	 * process is started, MDIO will be broken afterwards.
+	 * including the MDIO. MDIO must be disabled before resetting
+	 * and re-enabled afterwards.
+	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
 	 */
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg &
-		    ~XAE_MDIO_MC_MDIOEN_MASK));
+	mutex_lock(&lp->mii_bus->mdio_lock);
+	axienet_mdio_disable(lp);
+	__axienet_device_reset(lp);
+	axienet_mdio_enable(lp);
+	mutex_unlock(&lp->mii_bus->mdio_lock);
 
-	__axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET);
-	__axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET);
-
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg);
-	axienet_mdio_wait_until_ready(lp);
-
-	for (i = 0; i < TX_BD_NUM; i++) {
+	for (i = 0; i < lp->tx_bd_num; i++) {
 		cur_p = &lp->tx_bd_v[i];
 		if (cur_p->phys)
 			dma_unmap_single(ndev->dev.parent, cur_p->phys,
 					 (cur_p->cntrl &
 					  XAXIDMA_BD_CTRL_LENGTH_MASK),
 					 DMA_TO_DEVICE);
-		if (cur_p->app4)
-			dev_kfree_skb_irq((struct sk_buff *) cur_p->app4);
+		if (cur_p->skb)
+			dev_kfree_skb_irq(cur_p->skb);
 		cur_p->phys = 0;
 		cur_p->cntrl = 0;
 		cur_p->status = 0;
@@ -1351,10 +1550,10 @@
 		cur_p->app2 = 0;
 		cur_p->app3 = 0;
 		cur_p->app4 = 0;
-		cur_p->sw_id_offset = 0;
+		cur_p->skb = NULL;
 	}
 
-	for (i = 0; i < RX_BD_NUM; i++) {
+	for (i = 0; i < lp->rx_bd_num; i++) {
 		cur_p = &lp->rx_bd_v[i];
 		cur_p->status = 0;
 		cur_p->app0 = 0;
@@ -1402,7 +1601,7 @@
 	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
 			  cr | XAXIDMA_CR_RUNSTOP_MASK);
 	axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-			  (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+			  (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
 
 	/* Write to the RS (Run-stop) bit in the Tx channel control register.
 	 * Tx channel is now ready to run. But only after we write to the
@@ -1420,6 +1619,8 @@
 	axienet_status = axienet_ior(lp, XAE_IP_OFFSET);
 	if (axienet_status & XAE_INT_RXRJECT_MASK)
 		axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK);
+	axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ?
+		    XAE_INT_RECV_ERROR_MASK : 0);
 	axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK);
 
 	/* Sync default options with HW but leave receiver and
@@ -1451,7 +1652,7 @@
 	struct axienet_local *lp;
 	struct net_device *ndev;
 	const void *mac_addr;
-	struct resource *ethres, dmares;
+	struct resource *ethres;
 	u32 value;
 
 	ndev = alloc_etherdev(sizeof(*lp));
@@ -1474,6 +1675,8 @@
 	lp->ndev = ndev;
 	lp->dev = &pdev->dev;
 	lp->options = XAE_OPTION_DEFAULTS;
+	lp->rx_bd_num = RX_BD_NUM_DEFAULT;
+	lp->tx_bd_num = TX_BD_NUM_DEFAULT;
 	/* Map device registers */
 	ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
@@ -1482,6 +1685,7 @@
 		ret = PTR_ERR(lp->regs);
 		goto free_netdev;
 	}
+	lp->regs_start = ethres->start;
 
 	/* Setup checksum offload, but default to off if not specified */
 	lp->features = 0;
@@ -1558,7 +1762,7 @@
 		}
 	} else {
 		lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-		if (lp->phy_mode < 0) {
+		if ((int)lp->phy_mode < 0) {
 			ret = -EINVAL;
 			goto free_netdev;
 		}
@@ -1566,36 +1770,56 @@
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
-	if (!np) {
-		dev_err(&pdev->dev, "could not find DMA node\n");
-		ret = -ENODEV;
-		goto free_netdev;
+	if (np) {
+		struct resource dmares;
+
+		ret = of_address_to_resource(np, 0, &dmares);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"unable to get DMA resource\n");
+			of_node_put(np);
+			goto free_netdev;
+		}
+		lp->dma_regs = devm_ioremap_resource(&pdev->dev,
+						     &dmares);
+		lp->rx_irq = irq_of_parse_and_map(np, 1);
+		lp->tx_irq = irq_of_parse_and_map(np, 0);
+		of_node_put(np);
+		lp->eth_irq = platform_get_irq(pdev, 0);
+	} else {
+		/* Check for these resources directly on the Ethernet node. */
+		struct resource *res = platform_get_resource(pdev,
+							     IORESOURCE_MEM, 1);
+		if (!res) {
+			dev_err(&pdev->dev, "unable to get DMA memory resource\n");
+			goto free_netdev;
+		}
+		lp->dma_regs = devm_ioremap_resource(&pdev->dev, res);
+		lp->rx_irq = platform_get_irq(pdev, 1);
+		lp->tx_irq = platform_get_irq(pdev, 0);
+		lp->eth_irq = platform_get_irq(pdev, 2);
 	}
-	ret = of_address_to_resource(np, 0, &dmares);
-	if (ret) {
-		dev_err(&pdev->dev, "unable to get DMA resource\n");
-		goto free_netdev;
-	}
-	lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares);
 	if (IS_ERR(lp->dma_regs)) {
 		dev_err(&pdev->dev, "could not map DMA regs\n");
 		ret = PTR_ERR(lp->dma_regs);
 		goto free_netdev;
 	}
-	lp->rx_irq = irq_of_parse_and_map(np, 1);
-	lp->tx_irq = irq_of_parse_and_map(np, 0);
-	of_node_put(np);
 	if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) {
 		dev_err(&pdev->dev, "could not determine irqs\n");
 		ret = -ENOMEM;
 		goto free_netdev;
 	}
 
+	/* Check for Ethernet core IRQ (optional) */
+	if (lp->eth_irq <= 0)
+		dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
+
 	/* Retrieve the MAC address */
 	mac_addr = of_get_mac_address(pdev->dev.of_node);
-	if (!mac_addr) {
-		dev_err(&pdev->dev, "could not find MAC address\n");
-		goto free_netdev;
+	if (IS_ERR(mac_addr)) {
+		dev_warn(&pdev->dev, "could not find MAC address property: %ld\n",
+			 PTR_ERR(mac_addr));
+		mac_addr = NULL;
 	}
 	axienet_set_mac_address(ndev, mac_addr);
 
@@ -1604,9 +1828,36 @@
 
 	lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
 	if (lp->phy_node) {
-		ret = axienet_mdio_setup(lp, pdev->dev.of_node);
+		lp->clk = devm_clk_get(&pdev->dev, NULL);
+		if (IS_ERR(lp->clk)) {
+			dev_warn(&pdev->dev, "Failed to get clock: %ld\n",
+				 PTR_ERR(lp->clk));
+			lp->clk = NULL;
+		} else {
+			ret = clk_prepare_enable(lp->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "Unable to enable clock: %d\n",
+					ret);
+				goto free_netdev;
+			}
+		}
+
+		ret = axienet_mdio_setup(lp);
 		if (ret)
-			dev_warn(&pdev->dev, "error registering MDIO bus\n");
+			dev_warn(&pdev->dev,
+				 "error registering MDIO bus: %d\n", ret);
+	}
+
+	lp->phylink_config.dev = &ndev->dev;
+	lp->phylink_config.type = PHYLINK_NETDEV;
+
+	lp->phylink = phylink_create(&lp->phylink_config, pdev->dev.fwnode,
+				     lp->phy_mode,
+				     &axienet_phylink_ops);
+	if (IS_ERR(lp->phylink)) {
+		ret = PTR_ERR(lp->phylink);
+		dev_err(&pdev->dev, "phylink_create error (%i)\n", ret);
+		goto free_netdev;
 	}
 
 	ret = register_netdev(lp->ndev);
@@ -1628,9 +1879,16 @@
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct axienet_local *lp = netdev_priv(ndev);
 
-	axienet_mdio_teardown(lp);
 	unregister_netdev(ndev);
 
+	if (lp->phylink)
+		phylink_destroy(lp->phylink);
+
+	axienet_mdio_teardown(lp);
+
+	if (lp->clk)
+		clk_disable_unprepare(lp->clk);
+
 	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;
 
@@ -1639,9 +1897,23 @@
 	return 0;
 }
 
+static void axienet_shutdown(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+
+	rtnl_lock();
+	netif_device_detach(ndev);
+
+	if (netif_running(ndev))
+		dev_close(ndev);
+
+	rtnl_unlock();
+}
+
 static struct platform_driver axienet_driver = {
 	.probe = axienet_probe,
 	.remove = axienet_remove,
+	.shutdown = axienet_shutdown,
 	.driver = {
 		 .name = "xilinx_axienet",
 		 .of_match_table = axienet_of_match,

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
index 757a3b3..435ed30 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c

@@ -5,31 +5,29 @@
  * Copyright (c) 2009 Secret Lab Technologies, Ltd.
  * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (c) 2010 - 2011 PetaLogix
+ * Copyright (c) 2019 SED Systems, a division of Calian Ltd.
  * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved.
  */
 
+#include <linux/clk.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/jiffies.h>
+#include <linux/iopoll.h>
 
 #include "xilinx_axienet.h"
 
 #define MAX_MDIO_FREQ		2500000 /* 2.5 MHz */
-#define DEFAULT_CLOCK_DIVISOR	XAE_MDIO_DIV_DFT
+#define DEFAULT_HOST_CLOCK	150000000 /* 150 MHz */
 
 /* Wait till MDIO interface is ready to accept a new transaction.*/
-int axienet_mdio_wait_until_ready(struct axienet_local *lp)
+static int axienet_mdio_wait_until_ready(struct axienet_local *lp)
 {
-	unsigned long end = jiffies + 2;
-	while (!(axienet_ior(lp, XAE_MDIO_MCR_OFFSET) &
-		 XAE_MDIO_MCR_READY_MASK)) {
-		if (time_before_eq(end, jiffies)) {
-			WARN_ON(1);
-			return -ETIMEDOUT;
-		}
-		udelay(1);
-	}
-	return 0;
+	u32 val;
+
+	return readx_poll_timeout(axinet_ior_read_mcr, lp,
+				  val, val & XAE_MDIO_MCR_READY_MASK,
+				  1, 20000);
 }
 
 /**
@@ -116,23 +114,42 @@
 }
 
 /**
- * axienet_mdio_setup - MDIO setup function
+ * axienet_mdio_enable - MDIO hardware setup function
  * @lp:		Pointer to axienet local data structure.
- * @np:		Pointer to device node
  *
- * Return:	0 on success, -ETIMEDOUT on a timeout, -ENOMEM when
- *		mdiobus_alloc (to allocate memory for mii bus structure) fails.
+ * Return:	0 on success, -ETIMEDOUT on a timeout.
  *
  * Sets up the MDIO interface by initializing the MDIO clock and enabling the
- * MDIO interface in hardware. Register the MDIO interface.
+ * MDIO interface in hardware.
  **/
-int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np)
+int axienet_mdio_enable(struct axienet_local *lp)
 {
-	int ret;
 	u32 clk_div, host_clock;
-	struct mii_bus *bus;
-	struct resource res;
-	struct device_node *np1;
+
+	if (lp->clk) {
+		host_clock = clk_get_rate(lp->clk);
+	} else {
+		struct device_node *np1;
+
+		/* Legacy fallback: detect CPU clock frequency and use as AXI
+		 * bus clock frequency. This only works on certain platforms.
+		 */
+		np1 = of_find_node_by_name(NULL, "cpu");
+		if (!np1) {
+			netdev_warn(lp->ndev, "Could not find CPU device node.\n");
+			host_clock = DEFAULT_HOST_CLOCK;
+		} else {
+			int ret = of_property_read_u32(np1, "clock-frequency",
+						       &host_clock);
+			if (ret) {
+				netdev_warn(lp->ndev, "CPU clock-frequency property not found.\n");
+				host_clock = DEFAULT_HOST_CLOCK;
+			}
+			of_node_put(np1);
+		}
+		netdev_info(lp->ndev, "Setting assumed host clock to %u\n",
+			    host_clock);
+	}
 
 	/* clk_div can be calculated by deriving it from the equation:
 	 * fMDIO = fHOST / ((1 + clk_div) * 2)
@@ -159,25 +176,6 @@
 	 * "clock-frequency" from the CPU
 	 */
 
-	np1 = of_find_node_by_name(NULL, "cpu");
-	if (!np1) {
-		netdev_warn(lp->ndev, "Could not find CPU device node.\n");
-		netdev_warn(lp->ndev,
-			    "Setting MDIO clock divisor to default %d\n",
-			    DEFAULT_CLOCK_DIVISOR);
-		clk_div = DEFAULT_CLOCK_DIVISOR;
-		goto issue;
-	}
-	if (of_property_read_u32(np1, "clock-frequency", &host_clock)) {
-		netdev_warn(lp->ndev, "clock-frequency property not found.\n");
-		netdev_warn(lp->ndev,
-			    "Setting MDIO clock divisor to default %d\n",
-			    DEFAULT_CLOCK_DIVISOR);
-		clk_div = DEFAULT_CLOCK_DIVISOR;
-		of_node_put(np1);
-		goto issue;
-	}
-
 	clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1;
 	/* If there is any remainder from the division of
 	 * fHOST / (MAX_MDIO_FREQ * 2), then we need to add
@@ -190,12 +188,39 @@
 		   "Setting MDIO clock divisor to %u/%u Hz host clock.\n",
 		   clk_div, host_clock);
 
-	of_node_put(np1);
-issue:
-	axienet_iow(lp, XAE_MDIO_MC_OFFSET,
-		    (((u32) clk_div) | XAE_MDIO_MC_MDIOEN_MASK));
+	axienet_iow(lp, XAE_MDIO_MC_OFFSET, clk_div | XAE_MDIO_MC_MDIOEN_MASK);
 
-	ret = axienet_mdio_wait_until_ready(lp);
+	return axienet_mdio_wait_until_ready(lp);
+}
+
+/**
+ * axienet_mdio_disable - MDIO hardware disable function
+ * @lp:		Pointer to axienet local data structure.
+ *
+ * Disable the MDIO interface in hardware.
+ **/
+void axienet_mdio_disable(struct axienet_local *lp)
+{
+	axienet_iow(lp, XAE_MDIO_MC_OFFSET, 0);
+}
+
+/**
+ * axienet_mdio_setup - MDIO setup function
+ * @lp:		Pointer to axienet local data structure.
+ *
+ * Return:	0 on success, -ETIMEDOUT on a timeout, -ENOMEM when
+ *		mdiobus_alloc (to allocate memory for mii bus structure) fails.
+ *
+ * Sets up the MDIO interface by initializing the MDIO clock and enabling the
+ * MDIO interface in hardware. Register the MDIO interface.
+ **/
+int axienet_mdio_setup(struct axienet_local *lp)
+{
+	struct device_node *mdio_node;
+	struct mii_bus *bus;
+	int ret;
+
+	ret = axienet_mdio_enable(lp);
 	if (ret < 0)
 		return ret;
 
@@ -203,10 +228,8 @@
 	if (!bus)
 		return -ENOMEM;
 
-	np1 = of_get_parent(lp->phy_node);
-	of_address_to_resource(np1, 0, &res);
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx",
-		 (unsigned long long) res.start);
+	snprintf(bus->id, MII_BUS_ID_SIZE, "axienet-%.8llx",
+		 (unsigned long long)lp->regs_start);
 
 	bus->priv = lp;
 	bus->name = "Xilinx Axi Ethernet MDIO";
@@ -215,7 +238,9 @@
 	bus->parent = lp->dev;
 	lp->mii_bus = bus;
 
-	ret = of_mdiobus_register(bus, np1);
+	mdio_node = of_get_child_by_name(lp->dev->of_node, "mdio");
+	ret = of_mdiobus_register(bus, mdio_node);
+	of_node_put(mdio_node);
 	if (ret) {
 		mdiobus_free(bus);
 		lp->mii_bus = NULL;

diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 42f1f51..0de52e7 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c

@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Xilinx EmacLite Linux driver for the Xilinx Ethernet MAC Lite device.
  *
@@ -5,11 +6,6 @@
  * driver from John Williams <john.williams@xilinx.com>.
  *
  * 2007 - 2013 (c) Xilinx, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
  */
 
 #include <linux/module.h>
@@ -17,6 +13,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
+#include <linux/ethtool.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/of_address.h>
@@ -26,6 +23,7 @@
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/interrupt.h>
+#include <linux/iopoll.h>
 
 #define DRIVER_NAME "xilinx_emaclite"
 
@@ -581,7 +579,7 @@
 		return;
 
 	dev->stats.tx_bytes += lp->deferred_skb->len;
-	dev_kfree_skb_irq(lp->deferred_skb);
+	dev_consume_skb_irq(lp->deferred_skb);
 	lp->deferred_skb = NULL;
 	netif_trans_update(dev); /* prevent tx timeout */
 	netif_wake_queue(dev);
@@ -713,20 +711,15 @@
 
 static int xemaclite_mdio_wait(struct net_local *lp)
 {
-	unsigned long end = jiffies + 2;
+	u32 val;
 
 	/* wait for the MDIO interface to not be busy or timeout
 	 * after some time.
 	 */
-	while (xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) &
-			XEL_MDIOCTRL_MDIOSTS_MASK) {
-		if (time_before_eq(end, jiffies)) {
-			WARN_ON(1);
-			return -ETIMEDOUT;
-		}
-		msleep(1);
-	}
-	return 0;
+	return readx_poll_timeout(xemaclite_readl,
+				  lp->base_addr + XEL_MDIOCTRL_OFFSET,
+				  val, !(val & XEL_MDIOCTRL_MDIOSTS_MASK),
+				  1000, 20000);
 }
 
 /**
@@ -941,8 +934,7 @@
 		}
 
 		/* EmacLite doesn't support giga-bit speeds */
-		lp->phy_dev->supported &= (PHY_BASIC_FEATURES);
-		lp->phy_dev->advertising = lp->phy_dev->supported;
+		phy_set_max_speed(lp->phy_dev, SPEED_100);
 
 		/* Don't advertise 1000BASE-T Full/Half duplex speeds */
 		phy_write(lp->phy_dev, MII_CTRL1000, 0);
@@ -1020,9 +1012,10 @@
  * deferred and the Tx queue is stopped so that the deferred socket buffer can
  * be transmitted when the Emaclite device is free to transmit data.
  *
- * Return:	0, always.
+ * Return:	NETDEV_TX_OK, always.
  */
-static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev)
+static netdev_tx_t
+xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
 	struct sk_buff *new_skb;
@@ -1044,7 +1037,7 @@
 		/* Take the time stamp now, since we can't do this in an ISR. */
 		skb_tx_timestamp(new_skb);
 		spin_unlock_irqrestore(&lp->reset_lock, flags);
-		return 0;
+		return NETDEV_TX_OK;
 	}
 	spin_unlock_irqrestore(&lp->reset_lock, flags);
 
@@ -1053,7 +1046,7 @@
 	dev->stats.tx_bytes += len;
 	dev_consume_skb_any(new_skb);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /**
@@ -1078,6 +1071,27 @@
 	return (bool)*p;
 }
 
+/**
+ * xemaclite_ethtools_get_drvinfo - Get various Axi Emac Lite driver info
+ * @ndev:       Pointer to net_device structure
+ * @ed:         Pointer to ethtool_drvinfo structure
+ *
+ * This implements ethtool command for getting the driver information.
+ * Issue "ethtool -i ethX" under linux prompt to execute this function.
+ */
+static void xemaclite_ethtools_get_drvinfo(struct net_device *ndev,
+					   struct ethtool_drvinfo *ed)
+{
+	strlcpy(ed->driver, DRIVER_NAME, sizeof(ed->driver));
+}
+
+static const struct ethtool_ops xemaclite_ethtool_ops = {
+	.get_drvinfo    = xemaclite_ethtools_get_drvinfo,
+	.get_link       = ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+};
+
 static const struct net_device_ops xemaclite_netdev_ops;
 
 /**
@@ -1143,9 +1157,9 @@
 	lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
 	mac_address = of_get_mac_address(ofdev->dev.of_node);
 
-	if (mac_address) {
+	if (!IS_ERR(mac_address)) {
 		/* Set the MAC address. */
-		memcpy(ndev->dev_addr, mac_address, ETH_ALEN);
+		ether_addr_copy(ndev->dev_addr, mac_address);
 	} else {
 		dev_warn(dev, "No MAC address found, using random\n");
 		eth_hw_addr_random(ndev);
@@ -1164,6 +1178,7 @@
 	dev_info(dev, "MAC address is now %pM\n", ndev->dev_addr);
 
 	ndev->netdev_ops = &xemaclite_netdev_ops;
+	ndev->ethtool_ops = &xemaclite_ethtool_ops;
 	ndev->flags &= ~IFF_MULTICAST;
 	ndev->watchdog_timeo = TX_TIMEOUT;
 
@@ -1229,12 +1244,29 @@
 }
 #endif
 
+/* Ioctl MII Interface */
+static int xemaclite_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	if (!dev->phydev || !netif_running(dev))
+		return -EINVAL;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		return phy_mii_ioctl(dev->phydev, rq, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops xemaclite_netdev_ops = {
 	.ndo_open		= xemaclite_open,
 	.ndo_stop		= xemaclite_close,
 	.ndo_start_xmit		= xemaclite_send,
 	.ndo_set_mac_address	= xemaclite_set_mac_address,
 	.ndo_tx_timeout		= xemaclite_tx_timeout,
+	.ndo_do_ioctl		= xemaclite_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = xemaclite_poll_controller,
 #endif

diff --git a/drivers/net/ethernet/xircom/Kconfig b/drivers/net/ethernet/xircom/Kconfig
index d6208a4..ad53900 100644
--- a/drivers/net/ethernet/xircom/Kconfig
+++ b/drivers/net/ethernet/xircom/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Xircom network device configuration
 #

diff --git a/drivers/net/ethernet/xircom/Makefile b/drivers/net/ethernet/xircom/Makefile
index 3b7aebd..07667fe 100644
--- a/drivers/net/ethernet/xircom/Makefile
+++ b/drivers/net/ethernet/xircom/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Xircom network device drivers.
 #

diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index af3432f..cd0a8f4 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Intel XScale IXP device configuration
 #
@@ -12,7 +13,7 @@
 
 	  Note that the answer to this question does not directly affect the
 	  kernel: saying N will just cause the configurator to skip all
-	  the questions about XSacle IXP devices. If you say Y, you will be
+	  the questions about XScale IXP devices. If you say Y, you will be
 	  asked for your specific card in the following questions.
 
 if NET_VENDOR_XSCALE

diff --git a/drivers/net/ethernet/xscale/Makefile b/drivers/net/ethernet/xscale/Makefile
index abc3b03..794a519 100644
--- a/drivers/net/ethernet/xscale/Makefile
+++ b/drivers/net/ethernet/xscale/Makefile

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the Intel XScale IXP device drivers.
 #

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index aee55c0..6fc04ff 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c

@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Intel IXP4xx Ethernet driver for Linux
  *
  * Copyright (C) 2007 Krzysztof Halasa <khc@pm.waw.pl>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
  * Ethernet port config (0x00 is not present on IXP42X):
  *
  * logical port		0x00		0x10		0x20
@@ -16,7 +13,6 @@
  * RX-free queue	26		27		28
  * TX-done queue is always 31, per-port RX and TX-ready queues are configurable
  *
- *
  * Queue entries:
  * bits 0 -> 1	- NPE ID (RX and TX-done)
  * bits 0 -> 2	- priority (TX, per 802.1D)
@@ -31,14 +27,15 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/net_tstamp.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <linux/ptp_classify.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <mach/ixp46x_ts.h>
-#include <mach/npe.h>
-#include <mach/qmgr.h>
+#include <linux/soc/ixp4xx/npe.h>
+#include <linux/soc/ixp4xx/qmgr.h>
 
 #define DEBUG_DESC		0
 #define DEBUG_RX		0
@@ -139,7 +136,7 @@
 #ifdef __ARMEB__
 typedef struct sk_buff buffer_t;
 #define free_buffer dev_kfree_skb
-#define free_buffer_irq dev_kfree_skb_irq
+#define free_buffer_irq dev_consume_skb_irq
 #else
 typedef void buffer_t;
 #define free_buffer kfree
@@ -1497,6 +1494,15 @@
 static int __init eth_init_module(void)
 {
 	int err;
+
+	/*
+	 * FIXME: we bail out on device tree boot but this really needs
+	 * to be fixed in a nicer way: this registers the MDIO bus before
+	 * even matching the driver infrastructure, we should only probe
+	 * detected hardware.
+	 */
+	if (of_have_populated_dt())
+		return -ENODEV;
 	if ((err = ixp4xx_mdio_register()))
 		return err;
 	return platform_driver_register(&ixp4xx_eth_driver);
commit	0f672f6c0b52b7b0700b0915c72b540721af4465	[log] [tgz]
author	David Brazdil <dbrazdil@google.com>	Tue Dec 10 10:32:29 2019 +0000
committer	David Brazdil <dbrazdil@google.com>	Tue Dec 10 19:03:18 2019 +0000
tree	85c8cba019caa205e4f8920d72d93f6d6deaf29c
parent	3a0ad55d848b50499b68d7141d4eca997fce28ef [diff]